# HG changeset patch # User Chris Cannam # Date 1444133319 -3600 # Node ID 26056e866c29ad8e4a89654f884ef3d3e50e8fc8 # Parent 8db794ca3e0b9010fb6f15ead90f2fea3016cc9b Add FFTW to comparison table diff -r 8db794ca3e0b -r 26056e866c29 fft/cross/FFT.js --- a/fft/cross/FFT.js Mon Oct 05 15:51:10 2015 +0100 +++ b/fft/cross/FFT.js Tue Oct 06 13:08:39 2015 +0100 @@ -22,7 +22,7 @@ ptr, ptr + n, ptr + n * 2, ptr + n * 3); var ro = new Float64Array(crossModule.HEAPU8.buffer, ptr + n * 2, this.size); var io = new Float64Array(crossModule.HEAPU8.buffer, ptr + n * 3, this.size); - return { real: ro, imag: io }; + return { real: ro.slice(0), imag: io.slice(0) }; } this.transformReal = function(real, inverse) { var ptr = this.ptr; @@ -32,9 +32,9 @@ ptr, 0, ptr + n * 2, ptr + n * 3); var ro = new Float64Array(crossModule.HEAPU8.buffer, ptr + n * 2, this.size); var io = new Float64Array(crossModule.HEAPU8.buffer, ptr + n * 3, this.size); - return { real: ro, imag: io }; + return { real: ro.slice(0), imag: io.slice(0) }; } - this.discard = function() { + this.dispose = function() { crossModule._free(this.ptr); } } diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/FFT.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/FFT.js Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,55 @@ +"use strict"; + +var fftwModule = FFTWModule({}); + +var fftwf_plan_dft_r2c_1d = fftwModule.cwrap( + 'fftwf_plan_dft_r2c_1d', 'number', ['number', 'number', 'number', 'number'] +); + +var fftwf_plan_dft_c2r_1d = fftwModule.cwrap( + 'fftwf_plan_dft_c2r_1d', 'number', ['number', 'number', 'number', 'number'] +); + +var fftwf_execute = fftwModule.cwrap( + 'fftwf_execute', 'void', ['number'] +); + +var fftwf_destroy_plan = fftwModule.cwrap( + 'fftwf_destroy_plan', 'void', ['number'] +); + +function FFTW(size) { + + this.size = size; + this.rptr = fftwModule._malloc(size*4 + (size+2)*4); + this.cptr = this.rptr + size*4; + this.r = new Float32Array(fftwModule.HEAPU8.buffer, this.rptr, size); + this.c = new Float32Array(fftwModule.HEAPU8.buffer, this.cptr, size+2); + + var FFTW_ESTIMATE = (1 << 6); + this.fplan = fftwf_plan_dft_r2c_1d(size, this.rptr, this.cptr, FFTW_ESTIMATE); + this.iplan = fftwf_plan_dft_c2r_1d(size, this.cptr, this.rptr, FFTW_ESTIMATE); + + this.forward = function(real) { + this.r.set(real); + fftwf_execute(this.fplan); + return (new Float32Array + (fftwModule.HEAPU8.buffer, this.cptr, this.size+2)) + .slice(0); + } + + this.inverse = function(cpx) { + this.c.set(cpx); + fftwf_execute(this.iplan); + return (new Float32Array + (fftwModule.HEAPU8.buffer, this.rptr, this.size)) + .slice(0); + } + + this.dispose = function() { + fftwf_destroy_plan(this.fplan); + fftwf_destroy_plan(this.iplan); + fftwModule._free(this.rptr); + } +} + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/FFTW.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/FFTW.js Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,34 @@ +var FFTWModule = function(Module) { + Module = Module || {}; + +var Module;if(!Module)Module=(typeof FFTWModule!=="undefined"?FFTWModule:null)||{};var moduleOverrides={};for(var key in Module){if(Module.hasOwnProperty(key)){moduleOverrides[key]=Module[key]}}var ENVIRONMENT_IS_WEB=typeof window==="object";var ENVIRONMENT_IS_WORKER=typeof importScripts==="function";var ENVIRONMENT_IS_NODE=typeof process==="object"&&typeof require==="function"&&!ENVIRONMENT_IS_WEB&&!ENVIRONMENT_IS_WORKER;var ENVIRONMENT_IS_SHELL=!ENVIRONMENT_IS_WEB&&!ENVIRONMENT_IS_NODE&&!ENVIRONMENT_IS_WORKER;if(ENVIRONMENT_IS_NODE){if(!Module["print"])Module["print"]=function print(x){process["stdout"].write(x+"\n")};if(!Module["printErr"])Module["printErr"]=function printErr(x){process["stderr"].write(x+"\n")};var nodeFS=require("fs");var nodePath=require("path");Module["read"]=function read(filename,binary){filename=nodePath["normalize"](filename);var ret=nodeFS["readFileSync"](filename);if(!ret&&filename!=nodePath["resolve"](filename)){filename=path.join(__dirname,"..","src",filename);ret=nodeFS["readFileSync"](filename)}if(ret&&!binary)ret=ret.toString();return ret};Module["readBinary"]=function readBinary(filename){var ret=Module["read"](filename,true);if(!ret.buffer){ret=new Uint8Array(ret)}assert(ret.buffer);return ret};Module["load"]=function load(f){globalEval(read(f))};if(!Module["thisProgram"]){if(process["argv"].length>1){Module["thisProgram"]=process["argv"][1].replace(/\\/g,"/")}else{Module["thisProgram"]="unknown-program"}}Module["arguments"]=process["argv"].slice(2);if(typeof module!=="undefined"){module["exports"]=Module}process["on"]("uncaughtException",(function(ex){if(!(ex instanceof ExitStatus)){throw ex}}));Module["inspect"]=(function(){return"[Emscripten Module object]"})}else if(ENVIRONMENT_IS_SHELL){if(!Module["print"])Module["print"]=print;if(typeof printErr!="undefined")Module["printErr"]=printErr;if(typeof read!="undefined"){Module["read"]=read}else{Module["read"]=function read(){throw"no read() available (jsc?)"}}Module["readBinary"]=function readBinary(f){if(typeof readbuffer==="function"){return new Uint8Array(readbuffer(f))}var data=read(f,"binary");assert(typeof data==="object");return data};if(typeof scriptArgs!="undefined"){Module["arguments"]=scriptArgs}else if(typeof arguments!="undefined"){Module["arguments"]=arguments}}else if(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER){Module["read"]=function read(url){var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.send(null);return xhr.responseText};if(typeof arguments!="undefined"){Module["arguments"]=arguments}if(typeof console!=="undefined"){if(!Module["print"])Module["print"]=function print(x){console.log(x)};if(!Module["printErr"])Module["printErr"]=function printErr(x){console.log(x)}}else{var TRY_USE_DUMP=false;if(!Module["print"])Module["print"]=TRY_USE_DUMP&&typeof dump!=="undefined"?(function(x){dump(x)}):(function(x){})}if(ENVIRONMENT_IS_WORKER){Module["load"]=importScripts}if(typeof Module["setWindowTitle"]==="undefined"){Module["setWindowTitle"]=(function(title){document.title=title})}}else{throw"Unknown runtime environment. Where are we?"}function globalEval(x){eval.call(null,x)}if(!Module["load"]&&Module["read"]){Module["load"]=function load(f){globalEval(Module["read"](f))}}if(!Module["print"]){Module["print"]=(function(){})}if(!Module["printErr"]){Module["printErr"]=Module["print"]}if(!Module["arguments"]){Module["arguments"]=[]}if(!Module["thisProgram"]){Module["thisProgram"]="./this.program"}Module.print=Module["print"];Module.printErr=Module["printErr"];Module["preRun"]=[];Module["postRun"]=[];for(var key in moduleOverrides){if(moduleOverrides.hasOwnProperty(key)){Module[key]=moduleOverrides[key]}}var Runtime={setTempRet0:(function(value){tempRet0=value}),getTempRet0:(function(){return tempRet0}),stackSave:(function(){return STACKTOP}),stackRestore:(function(stackTop){STACKTOP=stackTop}),getNativeTypeSize:(function(type){switch(type){case"i1":case"i8":return 1;case"i16":return 2;case"i32":return 4;case"i64":return 8;case"float":return 4;case"double":return 8;default:{if(type[type.length-1]==="*"){return Runtime.QUANTUM_SIZE}else if(type[0]==="i"){var bits=parseInt(type.substr(1));assert(bits%8===0);return bits/8}else{return 0}}}}),getNativeFieldSize:(function(type){return Math.max(Runtime.getNativeTypeSize(type),Runtime.QUANTUM_SIZE)}),STACK_ALIGN:16,prepVararg:(function(ptr,type){if(type==="double"||type==="i64"){if(ptr&7){assert((ptr&7)===4);ptr+=4}}else{assert((ptr&3)===0)}return ptr}),getAlignSize:(function(type,size,vararg){if(!vararg&&(type=="i64"||type=="double"))return 8;if(!type)return Math.min(size,8);return Math.min(size||(type?Runtime.getNativeFieldSize(type):0),Runtime.QUANTUM_SIZE)}),dynCall:(function(sig,ptr,args){if(args&&args.length){if(!args.splice)args=Array.prototype.slice.call(args);args.splice(0,0,ptr);return Module["dynCall_"+sig].apply(null,args)}else{return Module["dynCall_"+sig].call(null,ptr)}}),functionPointers:[],addFunction:(function(func){for(var i=0;i=TOTAL_MEMORY){var success=enlargeMemory();if(!success){DYNAMICTOP=ret;return 0}}return ret}),alignMemory:(function(size,quantum){var ret=size=Math.ceil(size/(quantum?quantum:16))*(quantum?quantum:16);return ret}),makeBigInt:(function(low,high,unsigned){var ret=unsigned?+(low>>>0)+ +(high>>>0)*+4294967296:+(low>>>0)+ +(high|0)*+4294967296;return ret}),GLOBAL_BASE:8,QUANTUM_SIZE:4,__dummy__:0};Module["Runtime"]=Runtime;var __THREW__=0;var ABORT=false;var EXITSTATUS=0;var undef=0;var tempValue,tempInt,tempBigInt,tempInt2,tempBigInt2,tempPair,tempBigIntI,tempBigIntR,tempBigIntS,tempBigIntP,tempBigIntD,tempDouble,tempFloat;var tempI64,tempI64b;var tempRet0,tempRet1,tempRet2,tempRet3,tempRet4,tempRet5,tempRet6,tempRet7,tempRet8,tempRet9;function assert(condition,text){if(!condition){abort("Assertion failed: "+text)}}var globalScope=this;function getCFunc(ident){var func=Module["_"+ident];if(!func){try{func=eval("_"+ident)}catch(e){}}assert(func,"Cannot call unknown function "+ident+" (perhaps LLVM optimizations or closure removed it?)");return func}var cwrap,ccall;((function(){var JSfuncs={"stackSave":(function(){Runtime.stackSave()}),"stackRestore":(function(){Runtime.stackRestore()}),"arrayToC":(function(arr){var ret=Runtime.stackAlloc(arr.length);writeArrayToMemory(arr,ret);return ret}),"stringToC":(function(str){var ret=0;if(str!==null&&str!==undefined&&str!==0){ret=Runtime.stackAlloc((str.length<<2)+1);writeStringToMemory(str,ret)}return ret})};var toC={"string":JSfuncs["stringToC"],"array":JSfuncs["arrayToC"]};ccall=function ccallFunc(ident,returnType,argTypes,args,opts){var func=getCFunc(ident);var cArgs=[];var stack=0;if(args){for(var i=0;i>0]=value;break;case"i8":HEAP8[ptr>>0]=value;break;case"i16":HEAP16[ptr>>1]=value;break;case"i32":HEAP32[ptr>>2]=value;break;case"i64":tempI64=[value>>>0,(tempDouble=value,+Math_abs(tempDouble)>=+1?tempDouble>+0?(Math_min(+Math_floor(tempDouble/+4294967296),+4294967295)|0)>>>0:~~+Math_ceil((tempDouble- +(~~tempDouble>>>0))/+4294967296)>>>0:0)],HEAP32[ptr>>2]=tempI64[0],HEAP32[ptr+4>>2]=tempI64[1];break;case"float":HEAPF32[ptr>>2]=value;break;case"double":HEAPF64[ptr>>3]=value;break;default:abort("invalid type for setValue: "+type)}}Module["setValue"]=setValue;function getValue(ptr,type,noSafe){type=type||"i8";if(type.charAt(type.length-1)==="*")type="i32";switch(type){case"i1":return HEAP8[ptr>>0];case"i8":return HEAP8[ptr>>0];case"i16":return HEAP16[ptr>>1];case"i32":return HEAP32[ptr>>2];case"i64":return HEAP32[ptr>>2];case"float":return HEAPF32[ptr>>2];case"double":return HEAPF64[ptr>>3];default:abort("invalid type for setValue: "+type)}return null}Module["getValue"]=getValue;var ALLOC_NORMAL=0;var ALLOC_STACK=1;var ALLOC_STATIC=2;var ALLOC_DYNAMIC=3;var ALLOC_NONE=4;Module["ALLOC_NORMAL"]=ALLOC_NORMAL;Module["ALLOC_STACK"]=ALLOC_STACK;Module["ALLOC_STATIC"]=ALLOC_STATIC;Module["ALLOC_DYNAMIC"]=ALLOC_DYNAMIC;Module["ALLOC_NONE"]=ALLOC_NONE;function allocate(slab,types,allocator,ptr){var zeroinit,size;if(typeof slab==="number"){zeroinit=true;size=slab}else{zeroinit=false;size=slab.length}var singleType=typeof types==="string"?types:null;var ret;if(allocator==ALLOC_NONE){ret=ptr}else{ret=[_malloc,Runtime.stackAlloc,Runtime.staticAlloc,Runtime.dynamicAlloc][allocator===undefined?ALLOC_STATIC:allocator](Math.max(size,singleType?1:types.length))}if(zeroinit){var ptr=ret,stop;assert((ret&3)==0);stop=ret+(size&~3);for(;ptr>2]=0}stop=ret+size;while(ptr>0]=0}return ret}if(singleType==="i8"){if(slab.subarray||slab.slice){HEAPU8.set(slab,ret)}else{HEAPU8.set(new Uint8Array(slab),ret)}return ret}var i=0,type,typeSize,previousType;while(i>0];hasUtf|=t;if(t==0&&!length)break;i++;if(length&&i==length)break}if(!length)length=i;var ret="";if(hasUtf<128){var MAX_CHUNK=1024;var curr;while(length>0){curr=String.fromCharCode.apply(String,HEAPU8.subarray(ptr,ptr+Math.min(length,MAX_CHUNK)));ret=ret?ret+curr:curr;ptr+=MAX_CHUNK;length-=MAX_CHUNK}return ret}return Module["UTF8ToString"](ptr)}Module["Pointer_stringify"]=Pointer_stringify;function AsciiToString(ptr){var str="";while(1){var ch=HEAP8[ptr++>>0];if(!ch)return str;str+=String.fromCharCode(ch)}}Module["AsciiToString"]=AsciiToString;function stringToAscii(str,outPtr){return writeAsciiToMemory(str,outPtr,false)}Module["stringToAscii"]=stringToAscii;function UTF8ArrayToString(u8Array,idx){var u0,u1,u2,u3,u4,u5;var str="";while(1){u0=u8Array[idx++];if(!u0)return str;if(!(u0&128)){str+=String.fromCharCode(u0);continue}u1=u8Array[idx++]&63;if((u0&224)==192){str+=String.fromCharCode((u0&31)<<6|u1);continue}u2=u8Array[idx++]&63;if((u0&240)==224){u0=(u0&15)<<12|u1<<6|u2}else{u3=u8Array[idx++]&63;if((u0&248)==240){u0=(u0&7)<<18|u1<<12|u2<<6|u3}else{u4=u8Array[idx++]&63;if((u0&252)==248){u0=(u0&3)<<24|u1<<18|u2<<12|u3<<6|u4}else{u5=u8Array[idx++]&63;u0=(u0&1)<<30|u1<<24|u2<<18|u3<<12|u4<<6|u5}}}if(u0<65536){str+=String.fromCharCode(u0)}else{var ch=u0-65536;str+=String.fromCharCode(55296|ch>>10,56320|ch&1023)}}}Module["UTF8ArrayToString"]=UTF8ArrayToString;function UTF8ToString(ptr){return UTF8ArrayToString(HEAPU8,ptr)}Module["UTF8ToString"]=UTF8ToString;function stringToUTF8Array(str,outU8Array,outIdx,maxBytesToWrite){if(!(maxBytesToWrite>0))return 0;var startIdx=outIdx;var endIdx=outIdx+maxBytesToWrite-1;for(var i=0;i=55296&&u<=57343)u=65536+((u&1023)<<10)|str.charCodeAt(++i)&1023;if(u<=127){if(outIdx>=endIdx)break;outU8Array[outIdx++]=u}else if(u<=2047){if(outIdx+1>=endIdx)break;outU8Array[outIdx++]=192|u>>6;outU8Array[outIdx++]=128|u&63}else if(u<=65535){if(outIdx+2>=endIdx)break;outU8Array[outIdx++]=224|u>>12;outU8Array[outIdx++]=128|u>>6&63;outU8Array[outIdx++]=128|u&63}else if(u<=2097151){if(outIdx+3>=endIdx)break;outU8Array[outIdx++]=240|u>>18;outU8Array[outIdx++]=128|u>>12&63;outU8Array[outIdx++]=128|u>>6&63;outU8Array[outIdx++]=128|u&63}else if(u<=67108863){if(outIdx+4>=endIdx)break;outU8Array[outIdx++]=248|u>>24;outU8Array[outIdx++]=128|u>>18&63;outU8Array[outIdx++]=128|u>>12&63;outU8Array[outIdx++]=128|u>>6&63;outU8Array[outIdx++]=128|u&63}else{if(outIdx+5>=endIdx)break;outU8Array[outIdx++]=252|u>>30;outU8Array[outIdx++]=128|u>>24&63;outU8Array[outIdx++]=128|u>>18&63;outU8Array[outIdx++]=128|u>>12&63;outU8Array[outIdx++]=128|u>>6&63;outU8Array[outIdx++]=128|u&63}}outU8Array[outIdx]=0;return outIdx-startIdx}Module["stringToUTF8Array"]=stringToUTF8Array;function stringToUTF8(str,outPtr,maxBytesToWrite){return stringToUTF8Array(str,HEAPU8,outPtr,maxBytesToWrite)}Module["stringToUTF8"]=stringToUTF8;function lengthBytesUTF8(str){var len=0;for(var i=0;i=55296&&u<=57343)u=65536+((u&1023)<<10)|str.charCodeAt(++i)&1023;if(u<=127){++len}else if(u<=2047){len+=2}else if(u<=65535){len+=3}else if(u<=2097151){len+=4}else if(u<=67108863){len+=5}else{len+=6}}return len}Module["lengthBytesUTF8"]=lengthBytesUTF8;function UTF16ToString(ptr){var i=0;var str="";while(1){var codeUnit=HEAP16[ptr+i*2>>1];if(codeUnit==0)return str;++i;str+=String.fromCharCode(codeUnit)}}Module["UTF16ToString"]=UTF16ToString;function stringToUTF16(str,outPtr,maxBytesToWrite){if(maxBytesToWrite===undefined){maxBytesToWrite=2147483647}if(maxBytesToWrite<2)return 0;maxBytesToWrite-=2;var startPtr=outPtr;var numCharsToWrite=maxBytesToWrite>1]=codeUnit;outPtr+=2}HEAP16[outPtr>>1]=0;return outPtr-startPtr}Module["stringToUTF16"]=stringToUTF16;function lengthBytesUTF16(str){return str.length*2}Module["lengthBytesUTF16"]=lengthBytesUTF16;function UTF32ToString(ptr){var i=0;var str="";while(1){var utf32=HEAP32[ptr+i*4>>2];if(utf32==0)return str;++i;if(utf32>=65536){var ch=utf32-65536;str+=String.fromCharCode(55296|ch>>10,56320|ch&1023)}else{str+=String.fromCharCode(utf32)}}}Module["UTF32ToString"]=UTF32ToString;function stringToUTF32(str,outPtr,maxBytesToWrite){if(maxBytesToWrite===undefined){maxBytesToWrite=2147483647}if(maxBytesToWrite<4)return 0;var startPtr=outPtr;var endPtr=startPtr+maxBytesToWrite-4;for(var i=0;i=55296&&codeUnit<=57343){var trailSurrogate=str.charCodeAt(++i);codeUnit=65536+((codeUnit&1023)<<10)|trailSurrogate&1023}HEAP32[outPtr>>2]=codeUnit;outPtr+=4;if(outPtr+4>endPtr)break}HEAP32[outPtr>>2]=0;return outPtr-startPtr}Module["stringToUTF32"]=stringToUTF32;function lengthBytesUTF32(str){var len=0;for(var i=0;i=55296&&codeUnit<=57343)++i;len+=4}return len}Module["lengthBytesUTF32"]=lengthBytesUTF32;function demangle(func){var hasLibcxxabi=!!Module["___cxa_demangle"];if(hasLibcxxabi){try{var buf=_malloc(func.length);writeStringToMemory(func.substr(1),buf);var status=_malloc(4);var ret=Module["___cxa_demangle"](buf,0,0,status);if(getValue(status,"i32")===0&&ret){return Pointer_stringify(ret)}}catch(e){}finally{if(buf)_free(buf);if(status)_free(status);if(ret)_free(ret)}}var i=3;var basicTypes={"v":"void","b":"bool","c":"char","s":"short","i":"int","l":"long","f":"float","d":"double","w":"wchar_t","a":"signed char","h":"unsigned char","t":"unsigned short","j":"unsigned int","m":"unsigned long","x":"long long","y":"unsigned long long","z":"..."};var subs=[];var first=true;function dump(x){if(x)Module.print(x);Module.print(func);var pre="";for(var a=0;a"}else{ret=name}paramLoop:while(i0){var c=func[i++];if(c in basicTypes){list.push(basicTypes[c])}else{switch(c){case"P":list.push(parse(true,1,true)[0]+"*");break;case"R":list.push(parse(true,1,true)[0]+"&");break;case"L":{i++;var end=func.indexOf("E",i);var size=end-i;list.push(func.substr(i,size));i+=size+2;break};case"A":{var size=parseInt(func.substr(i));i+=size.toString().length;if(func[i]!=="_")throw"?";i++;list.push(parse(true,1,true)[0]+" ["+size+"]");break};case"E":break paramLoop;default:ret+="?"+c;break paramLoop}}}if(!allowVoid&&list.length===1&&list[0]==="void")list=[];if(rawList){if(ret){list.push(ret+"?")}return list}else{return ret+flushList()}}var parsed=func;try{if(func=="Object._main"||func=="_main"){return"main()"}if(typeof func==="number")func=Pointer_stringify(func);if(func[0]!=="_")return func;if(func[1]!=="_")return func;if(func[2]!=="Z")return func;switch(func[3]){case"n":return"operator new()";case"d":return"operator delete()"}parsed=parse()}catch(e){parsed+="?"}if(parsed.indexOf("?")>=0&&!hasLibcxxabi){Runtime.warnOnce("warning: a problem occurred in builtin C++ name demangling; build with -s DEMANGLE_SUPPORT=1 to link in libcxxabi demangling")}return parsed}function demangleAll(text){return text.replace(/__Z[\w\d_]+/g,(function(x){var y=demangle(x);return x===y?x:x+" ["+y+"]"}))}function jsStackTrace(){var err=new Error;if(!err.stack){try{throw new Error(0)}catch(e){err=e}if(!err.stack){return"(no stack trace available)"}}return err.stack.toString()}function stackTrace(){return demangleAll(jsStackTrace())}Module["stackTrace"]=stackTrace;var PAGE_SIZE=4096;function alignMemoryPage(x){if(x%4096>0){x+=4096-x%4096}return x}var HEAP;var HEAP8,HEAPU8,HEAP16,HEAPU16,HEAP32,HEAPU32,HEAPF32,HEAPF64;var STATIC_BASE=0,STATICTOP=0,staticSealed=false;var STACK_BASE=0,STACKTOP=0,STACK_MAX=0;var DYNAMIC_BASE=0,DYNAMICTOP=0;function enlargeMemory(){abort("Cannot enlarge memory arrays. Either (1) compile with -s TOTAL_MEMORY=X with X higher than the current value "+TOTAL_MEMORY+", (2) compile with ALLOW_MEMORY_GROWTH which adjusts the size at runtime but prevents some optimizations, or (3) set Module.TOTAL_MEMORY before the program runs.")}var TOTAL_STACK=Module["TOTAL_STACK"]||5242880;var TOTAL_MEMORY=Module["TOTAL_MEMORY"]||16777216;var totalMemory=64*1024;while(totalMemory0){var callback=callbacks.shift();if(typeof callback=="function"){callback();continue}var func=callback.func;if(typeof func==="number"){if(callback.arg===undefined){Runtime.dynCall("v",func)}else{Runtime.dynCall("vi",func,[callback.arg])}}else{func(callback.arg===undefined?null:callback.arg)}}}var __ATPRERUN__=[];var __ATINIT__=[];var __ATMAIN__=[];var __ATEXIT__=[];var __ATPOSTRUN__=[];var runtimeInitialized=false;var runtimeExited=false;function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="function")Module["preRun"]=[Module["preRun"]];while(Module["preRun"].length){addOnPreRun(Module["preRun"].shift())}}callRuntimeCallbacks(__ATPRERUN__)}function ensureInitRuntime(){if(runtimeInitialized)return;runtimeInitialized=true;callRuntimeCallbacks(__ATINIT__)}function preMain(){callRuntimeCallbacks(__ATMAIN__)}function exitRuntime(){callRuntimeCallbacks(__ATEXIT__);runtimeExited=true}function postRun(){if(Module["postRun"]){if(typeof Module["postRun"]=="function")Module["postRun"]=[Module["postRun"]];while(Module["postRun"].length){addOnPostRun(Module["postRun"].shift())}}callRuntimeCallbacks(__ATPOSTRUN__)}function addOnPreRun(cb){__ATPRERUN__.unshift(cb)}Module["addOnPreRun"]=addOnPreRun;function addOnInit(cb){__ATINIT__.unshift(cb)}Module["addOnInit"]=addOnInit;function addOnPreMain(cb){__ATMAIN__.unshift(cb)}Module["addOnPreMain"]=addOnPreMain;function addOnExit(cb){__ATEXIT__.unshift(cb)}Module["addOnExit"]=addOnExit;function addOnPostRun(cb){__ATPOSTRUN__.unshift(cb)}Module["addOnPostRun"]=addOnPostRun;function intArrayFromString(stringy,dontAddNull,length){var len=length>0?length:lengthBytesUTF8(stringy)+1;var u8array=new Array(len);var numBytesWritten=stringToUTF8Array(stringy,u8array,0,u8array.length);if(dontAddNull)u8array.length=numBytesWritten;return u8array}Module["intArrayFromString"]=intArrayFromString;function intArrayToString(array){var ret=[];for(var i=0;i255){chr&=255}ret.push(String.fromCharCode(chr))}return ret.join("")}Module["intArrayToString"]=intArrayToString;function writeStringToMemory(string,buffer,dontAddNull){var array=intArrayFromString(string,dontAddNull);var i=0;while(i>0]=chr;i=i+1}}Module["writeStringToMemory"]=writeStringToMemory;function writeArrayToMemory(array,buffer){for(var i=0;i>0]=array[i]}}Module["writeArrayToMemory"]=writeArrayToMemory;function writeAsciiToMemory(str,buffer,dontAddNull){for(var i=0;i>0]=str.charCodeAt(i)}if(!dontAddNull)HEAP8[buffer>>0]=0}Module["writeAsciiToMemory"]=writeAsciiToMemory;function unSign(value,bits,ignore){if(value>=0){return value}return bits<=32?2*Math.abs(1<=half&&(bits<=32||value>half)){value=-2*half+value}return value}if(!Math["imul"]||Math["imul"](4294967295,5)!==-5)Math["imul"]=function imul(a,b){var ah=a>>>16;var al=a&65535;var bh=b>>>16;var bl=b&65535;return al*bl+(ah*bl+al*bh<<16)|0};Math.imul=Math["imul"];if(!Math["clz32"])Math["clz32"]=(function(x){x=x>>>0;for(var i=0;i<32;i++){if(x&1<<31-i)return i}return 32});Math.clz32=Math["clz32"];var Math_abs=Math.abs;var Math_cos=Math.cos;var Math_sin=Math.sin;var Math_tan=Math.tan;var Math_acos=Math.acos;var Math_asin=Math.asin;var Math_atan=Math.atan;var Math_atan2=Math.atan2;var Math_exp=Math.exp;var Math_log=Math.log;var Math_sqrt=Math.sqrt;var Math_ceil=Math.ceil;var Math_floor=Math.floor;var Math_pow=Math.pow;var Math_imul=Math.imul;var Math_fround=Math.fround;var Math_min=Math.min;var Math_clz32=Math.clz32;var runDependencies=0;var runDependencyWatcher=null;var dependenciesFulfilled=null;function getUniqueRunDependency(id){return id}function addRunDependency(id){runDependencies++;if(Module["monitorRunDependencies"]){Module["monitorRunDependencies"](runDependencies)}}Module["addRunDependency"]=addRunDependency;function removeRunDependency(id){runDependencies--;if(Module["monitorRunDependencies"]){Module["monitorRunDependencies"](runDependencies)}if(runDependencies==0){if(runDependencyWatcher!==null){clearInterval(runDependencyWatcher);runDependencyWatcher=null}if(dependenciesFulfilled){var callback=dependenciesFulfilled;dependenciesFulfilled=null;callback()}}}Module["removeRunDependency"]=removeRunDependency;Module["preloadedImages"]={};Module["preloadedAudios"]={};var memoryInitializer=null;var ASM_CONSTS=[];STATIC_BASE=8;STATICTOP=STATIC_BASE+33120;__ATINIT__.push();allocate([10,0,0,0,91,84,0,0,0,0,0,0,0,0,82,64,0,0,0,0,0,0,40,64,0,0,0,0,0,0,40,64,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,97,84,0,0,0,0,0,0,0,0,78,64,0,0,0,0,0,0,52,64,0,0,0,0,0,0,84,64,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,103,84,0,0,0,0,0,0,0,0,86,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,0,0,0,109,84,0,0,0,0,0,0,0,64,97,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,67,64,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,115,84,0,0,0,0,0,0,0,0,89,64,0,0,0,0,0,0,56,64,0,0,0,0,0,0,72,64,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,121,84,0,0,0,0,0,0,0,0,96,64,0,0,0,0,0,0,60,64,0,0,0,0,0,0,60,64,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,127,84,0,0,0,0,0,0,0,0,97,64,0,0,0,0,0,0,48,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20,0,0,0,133,84,0,0,0,0,0,0,0,0,103,64,0,0,0,0,0,0,56,64,0,0,0,0,0,0,56,64,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25,0,0,0,139,84,0,0,0,0,0,0,0,64,112,64,0,0,0,0,0,0,87,64,0,0,0,0,0,0,87,64,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,145,84,0,0,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,0,0,0,150,84,0,0,0,0,0,0,0,64,117,64,0,0,0,0,0,0,74,64,0,0,0,0,0,0,64,64,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,156,84,0,0,0,0,0,0,0,0,36,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,161,84,0,0,0,0,0,0,0,0,48,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,166,84,0,0,0,0,0,0,0,0,58,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,64,0,0,0,171,84,0,0,0,0,0,0,0,64,137,64,0,0,0,0,0,0,98,64,0,0,0,0,0,0,90,64,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,177,84,0,0,0,0,0,0,0,0,64,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,182,84,0,0,0,0,0,0,0,0,66,64,0,0,0,0,0,0,40,64,0,0,0,0,0,0,56,64,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,187,84,0,0,0,0,0,0,0,0,74,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,192,84,0,0,0,0,0,0,0,0,78,64,0,0,0,0,0,0,52,64,0,0,0,0,0,0,52,64,0,0,0,0,0,0,0,0,236,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,197,84,0,0,172,69,0,0,244,50,0,0,0,0,0,0,0,0,32,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,202,84,0,0,180,69,0,0,244,50,0,0,0,0,0,0,0,0,62,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,207,84,0,0,188,69,0,0,244,50,0,0,0,0,0,0,0,0,80,64,0,0,0,0,0,0,56,64,0,0,0,0,0,0,56,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,212,84,0,0,196,69,0,0,244,50,0,0,0,0,0,0,0,64,96,64,0,0,0,0,0,128,81,64,0,0,0,0,0,128,81,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,217,84,0,0,204,69,0,0,244,50,0,0,0,0,0,0,0,0,104,64,0,0,0,0,0,0,85,64,0,0,0,0,0,0,85,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,222,84,0,0,212,69,0,0,244,50,0,0,0,0,0,0,0,0,122,64,0,0,0,0,0,0,98,64,0,0,0,0,0,0,92,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,227,84,0,0,220,69,0,0,244,50,0,0,0,0,0,0,0,0,82,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,233,84,0,0,228,69,0,0,244,50,0,0,0,0,0,0,0,0,86,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,239,84,0,0,236,69,0,0,244,50,0,0,0,0,0,0,0,0,96,64,0,0,0,0,0,0,76,64,0,0,0,0,0,0,76,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,245,84,0,0,244,69,0,0,244,50,0,0,0,0,0,0,0,0,97,64,0,0,0,0,0,0,71,64,0,0,0,0,0,0,67,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20,0,0,0,251,84,0,0,252,69,0,0,244,50,0,0,0,0,0,0,0,0,103,64,0,0,0,0,0,0,79,64,0,0,0,0,0,0,79,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25,0,0,0,1,85,0,0,4,70,0,0,244,50,0,0,0,0,0,0,0,64,112,64,0,0,0,0,0,128,97,64,0,0,0,0,0,128,97,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,7,85,0,0,12,70,0,0,244,50,0,0,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,0,0,0,12,85,0,0,20,70,0,0,244,50,0,0,0,0,0,0,0,64,117,64,0,0,0,0,0,128,92,64,0,0,0,0,0,128,87,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,18,85,0,0,28,70,0,0,244,50,0,0,0,0,0,0,0,0,36,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,23,85,0,0,36,70,0,0,244,50,0,0,0,0,0,0,0,0,48,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,28,85,0,0,44,70,0,0,244,50,0,0,0,0,0,0,0,0,58,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,64,0,0,0,33,85,0,0,52,70,0,0,244,50,0,0,0,0,0,0,0,64,137,64,0,0,0,0,0,224,112,64,0,0,0,0,0,192,108,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,39,85,0,0,60,70,0,0,244,50,0,0,0,0,0,0,0,0,64,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,44,85,0,0,68,70,0,0,244,50,0,0,0,0,0,0,0,0,66,64,0,0,0,0,0,0,56,64,0,0,0,0,0,0,66,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,49,85,0,0,76,70,0,0,244,50,0,0,0,0,0,0,0,0,74,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,54,85,0,0,84,70,0,0,244,50,0,0,0,0,0,0,0,0,78,64,0,0,0,0,0,0,66,64,0,0,0,0,0,0,66,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,59,85,0,0,92,70,0,0,244,50,0,0,0,0,0,0,0,0,83,64,0,0,0,0,0,0,69,64,0,0,0,0,0,0,67,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,65,85,0,0,108,70,0,0,244,50,0,0,0,0,0,0,0,128,99,64,0,0,0,0,0,0,81,64,0,0,0,0,0,0,68,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20,0,0,0,71,85,0,0,128,70,0,0,244,50,0,0,0,0,0,0,0,128,105,64,0,0,0,0,0,0,87,64,0,0,0,0,0,0,82,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25,0,0,0,77,85,0,0,148,70,0,0,244,50,0,0,0,0,0,0,0,128,113,64,0,0,0,0,0,128,102,64,0,0,0,0,0,0,100,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,0,0,0,83,85,0,0,168,70,0,0,244,50,0,0,0,0,0,0,0,128,119,64,0,0,0,0,0,0,101,64,0,0,0,0,0,0,92,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,89,85,0,0,188,70,0,0,244,50,0,0,0,0,0,0,0,0,48,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,94,85,0,0,200,70,0,0,244,50,0,0,0,0,0,0,0,0,62,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,64,0,0,0,99,85,0,0,212,70,0,0,244,50,0,0,0,0,0,0,0,128,139,64,0,0,0,0,0,32,120,64,0,0,0,0,0,32,113,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,105,85,0,0,236,70,0,0,244,50,0,0,0,0,0,0,0,0,76,64,0,0,0,0,0,0,58,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,119,100,0,0,12,71,0,0,40,57,0,0,0,0,0,0,0,128,99,64,0,0,0,0,0,0,81,64,0,0,0,0,0,0,68,64,0,0,0,0,0,0,0,0,20,0,0,0,129,100,0,0,32,71,0,0,40,57,0,0,0,0,0,0,0,128,105,64,0,0,0,0,0,0,87,64,0,0,0,0,0,0,82,64,0,0,0,0,0,0,0,0,32,0,0,0,139,100,0,0,52,71,0,0,40,57,0,0,0,0,0,0,0,128,119,64,0,0,0,0,0,0,101,64,0,0,0,0,0,0,92,64,0,0,0,0,0,0,0,0,4,0,0,0,149,100,0,0,72,71,0,0,40,57,0,0,0,0,0,0,0,0,48,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,0,0,8,0,0,0,158,100,0,0,84,71,0,0,40,57,0,0,0,0,0,0,0,0,76,64,0,0,0,0,0,0,58,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,0,0,10,0,0,0,167,100,0,0,100,71,0,0,40,57,0,0,0,0,0,0,0,0,82,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,0,0,12,0,0,0,176,100,0,0,108,71,0,0,40,57,0,0,0,0,0,0,0,0,86,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,0,0,16,0,0,0,185,100,0,0,116,71,0,0,40,57,0,0,0,0,0,0,0,0,97,64,0,0,0,0,0,0,71,64,0,0,0,0,0,0,67,64,0,0,0,0,0,0,0,0,20,0,0,0,194,100,0,0,124,71,0,0,40,57,0,0,0,0,0,0,0,0,103,64,0,0,0,0,0,0,79,64,0,0,0,0,0,0,79,64,0,0,0,0,0,0,0,0,2,0,0,0,203,100,0,0,132,71,0,0,40,57,0,0,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,32,0,0,0,211,100,0,0,140,71,0,0,40,57,0,0,0,0,0,0,0,64,117,64,0,0,0,0,0,128,92,64,0,0,0,0,0,128,87,64,0,0,0,0,0,0,0,0,4,0,0,0,220,100,0,0,148,71,0,0,40,57,0,0,0,0,0,0,0,0,48,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,6,0,0,0,228,100,0,0,156,71,0,0,40,57,0,0,0,0,0,0,0,0,64,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,8,0,0,0,236,100,0,0,164,71,0,0,40,57,0,0,0,0,0,0,0,0,74,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,16,0,0,0,244,100,0,0,172,71,0,0,40,57,0,0,0,0,0,0,0,128,103,64,0,0,0,0,0,0,85,64,0,0,0,0,0,0,68,64,0,0,0,0,0,0,0,0,20,0,0,0,1,101,0,0,192,71,0,0,40,57,0,0,0,0,0,0,0,128,110,64,0,0,0,0,0,0,91,64,0,0,0,0,0,0,82,64,0,0,0,0,0,0,0,0,32,0,0,0,14,101,0,0,212,71,0,0,40,57,0,0,0,0,0,0,0,128,123,64,0,0,0,0,0,128,103,64,0,0,0,0,0,0,92,64,0,0,0,0,0,0,0,0,4,0,0,0,27,101,0,0,232,71,0,0,40,57,0,0,0,0,0,0,0,0,56,64,0,0,0,0,0,0,48,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,0,0,8,0,0,0,39,101,0,0,244,71,0,0,40,57,0,0,0,0,0,0,0,0,82,64,0,0,0,0,0,0,67,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,0,0,10,0,0,0,51,101,0,0,4,72,0,0,40,57,0,0,0,0,0,0,0,0,87,64,0,0,0,0,0,0,67,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,0,0,12,0,0,0,63,101,0,0,12,72,0,0,40,57,0,0,0,0,0,0,0,0,92,64,0,0,0,0,0,0,71,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,0,0,16,0,0,0,75,101,0,0,20,72,0,0,40,57,0,0,0,0,0,0,0,0,101,64,0,0,0,0,0,0,79,64,0,0,0,0,0,0,67,64,0,0,0,0,0,0,0,0,20,0,0,0,87,101,0,0,28,72,0,0,40,57,0,0,0,0,0,0,0,0,108,64,0,0,0,0,0,128,83,64,0,0,0,0,0,0,79,64,0,0,0,0,0,0,0,0,2,0,0,0,99,101,0,0,36,72,0,0,40,57,0,0,0,0,0,0,0,0,32,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,32,0,0,0,110,101,0,0,44,72,0,0,40,57,0,0,0,0,0,0,0,64,121,64,0,0,0,0,0,192,96,64,0,0,0,0,0,128,87,64,0,0,0,0,0,0,0,0,4,0,0,0,122,101,0,0,52,72,0,0,40,57,0,0,0,0,0,0,0,0,56,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,6,0,0,0,133,101,0,0,60,72,0,0,40,57,0,0,0,0,0,0,0,0,70,64,0,0,0,0,0,0,54,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,8,0,0,0,144,101,0,0,68,72,0,0,40,57,0,0,0,0,0,0,0,0,81,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,16,0,0,0,155,101,0,0,76,72,0,0,64,57,0,0,0,0,0,0,0,128,99,64,0,0,0,0,0,0,81,64,0,0,0,0,0,0,68,64,0,0,0,0,0,0,0,0,20,0,0,0,162,101,0,0,96,72,0,0,64,57,0,0,0,0,0,0,0,128,105,64,0,0,0,0,0,0,87,64,0,0,0,0,0,0,82,64,0,0,0,0,0,0,0,0,25,0,0,0,169,101,0,0,116,72,0,0,64,57,0,0,0,0,0,0,0,128,113,64,0,0,0,0,0,128,102,64,0,0,0,0,0,0,100,64,0,0,0,0,0,0,0,0,32,0,0,0,176,101,0,0,136,72,0,0,64,57,0,0,0,0,0,0,0,128,119,64,0,0,0,0,0,0,101,64,0,0,0,0,0,0,92,64,0,0,0,0,0,0,0,0,4,0,0,0,183,101,0,0,156,72,0,0,64,57,0,0,0,0,0,0,0,0,48,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,0,0,5,0,0,0,189,101,0,0,168,72,0,0,64,57,0,0,0,0,0,0,0,0,62,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,8,0,0,0,195,101,0,0,180,72,0,0,64,57,0,0,0,0,0,0,0,0,76,64,0,0,0,0,0,0,58,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,0,0,10,0,0,0,201,101,0,0,196,72,0,0,64,57,0,0,0,0,0,0,0,0,82,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,0,0,12,0,0,0,207,101,0,0,204,72,0,0,64,57,0,0,0,0,0,0,0,0,86,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,0,0,15,0,0,0,213,101,0,0,212,72,0,0,64,57,0,0,0,0,0,0,0,0,96,64,0,0,0,0,0,0,76,64,0,0,0,0,0,0,76,64,0,0,0,0,0,0,0,0,16,0,0,0,219,101,0,0,220,72,0,0,64,57,0,0,0,0,0,0,0,0,97,64,0,0,0,0,0,0,71,64,0,0,0,0,0,0,67,64,0,0,0,0,0,0,0,0,20,0,0,0,225,101,0,0,228,72,0,0,64,57,0,0,0,0,0,0,0,0,103,64,0,0,0,0,0,0,79,64,0,0,0,0,0,0,79,64,0,0,0,0,0,0,0,0,25,0,0,0,231,101,0,0,236,72,0,0,64,57,0,0,0,0,0,0,0,64,112,64,0,0,0,0,0,128,97,64,0,0,0,0,0,128,97,64,0,0,0,0,0,0,0,0,2,0,0,0,237,101,0,0,244,72,0,0,64,57,0,0,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,32,0,0,0,242,101,0,0,252,72,0,0,64,57,0,0,0,0,0,0,0,64,117,64,0,0,0,0,0,128,92,64,0,0,0,0,0,128,87,64,0,0,0,0,0,0,0,0,3,0,0,0,248,101,0,0,4,73,0,0,64,57,0,0,0,0,0,0,0,0,36,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,4,0,0,0,253,101,0,0,12,73,0,0,64,57,0,0,0,0,0,0,0,0,48,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,5,0,0,0,2,102,0,0,20,73,0,0,64,57,0,0,0,0,0,0,0,0,58,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,64,0,0,0,7,102,0,0,28,73,0,0,64,57,0,0,0,0,0,0,0,64,137,64,0,0,0,0,0,224,112,64,0,0,0,0,0,192,108,64,0,0,0,0,0,0,0,0,6,0,0,0,13,102,0,0,36,73,0,0,64,57,0,0,0,0,0,0,0,0,64,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,7,0,0,0,18,102,0,0,44,73,0,0,64,57,0,0,0,0,0,0,0,0,66,64,0,0,0,0,0,0,56,64,0,0,0,0,0,0,66,64,0,0,0,0,0,0,0,0,8,0,0,0,23,102,0,0,52,73,0,0,64,57,0,0,0,0,0,0,0,0,74,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,9,0,0,0,28,102,0,0,60,73,0,0,64,57,0,0,0,0,0,0,0,0,78,64,0,0,0,0,0,0,66,64,0,0,0,0,0,0,66,64,0,0,0,0,0,0,0,0,10,0,0,0,33,102,0,0,0,0,0,0,0,0,58,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,88,57,0,0,0,0,0,0,12,0,0,0,43,102,0,0,0,0,0,0,0,128,67,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,0,88,57,0,0,0,0,0,0,15,0,0,0,53,102,0,0,0,0,0,0,0,0,75,64,0,0,0,0,0,0,46,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,0,0,88,57,0,0,0,0,0,0,16,0,0,0,63,102,0,0,0,0,0,0,0,0,75,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,40,64,0,0,0,0,0,0,0,0,88,57,0,0,0,0,0,0,20,0,0,0,73,102,0,0,0,0,0,0,0,128,85,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,48,64,0,0,0,0,0,0,0,0,88,57,0,0,0,0,0,0,25,0,0,0,83,102,0,0,0,0,0,0,0,128,95,64,0,0,0,0,0,128,78,64,0,0,0,0,0,192,85,64,0,0,0,0,0,0,0,0,88,57,0,0,0,0,0,0,2,0,0,0,93,102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,88,57,0,0,0,0,0,0,32,0,0,0,102,102,0,0,0,0,0,0,0,64,97,64,0,0,0,0,0,0,71,64,0,0,0,0,0,0,66,64,0,0,0,0,0,0,0,0,88,57,0,0,0,0,0,0,3,0,0,0,112,102,0,0,0,0,0,0,0,0,8,64,0,0,0,0,0,0,240,63,0,0,0,0,0,0,240,63,0,0,0,0,0,0,0,0,88,57,0,0,0,0,0,0,4,0,0,0,121,102,0,0,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,88,57,0,0,0,0,0,0,5,0,0,0,130,102,0,0,0,0,0,0,0,0,34,64,0,0,0,0,0,0,8,64,0,0,0,0,0,0,8,64,0,0,0,0,0,0,0,0,88,57,0,0,0,0,0,0,64,0,0,0,139,102,0,0,0,0,0,0,0,96,117,64,0,0,0,0,0,128,92,64,0,0,0,0,0,0,87,64,0,0,0,0,0,0,0,0,88,57,0,0,0,0,0,0,6,0,0,0,149,102,0,0,0,0,0,0,0,0,38,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,88,57,0,0,0,0,0,0,7,0,0,0,158,102,0,0,0,0,0,0,0,0,40,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,40,64,0,0,0,0,0,0,0,0,88,57,0,0,0,0,0,0,8,0,0,0,167,102,0,0,0,0,0,0,0,0,50,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,0,88,57,0,0,0,0,0,0,9,0,0,0,176,102,0,0,0,0,0,0,0,0,57,64,0,0,0,0,0,0,42,64,0,0,0,0,0,0,49,64,0,0,0,0,0,0,0,0,88,57,0,0,0,0,0,0,10,0,0,0,185,102,0,0,0,0,0,0,0,0,60,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,11,0,0,0,193,102,0,0,0,0,0,0,0,0,52,64,0,0,0,0,0,0,36,64,0,0,0,0,0,0,68,64,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,128,0,0,0,201,102,0,0,0,0,0,0,0,96,137,64,0,0,0,0,0,64,103,64,0,0,0,0,0,0,98,64,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,12,0,0,0,210,102,0,0,0,0,0,0,0,0,65,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,13,0,0,0,218,102,0,0,0,0,0,0,0,128,76,64,0,0,0,0,0,0,46,64,0,0,0,0,0,0,51,64,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,14,0,0,0,226,102,0,0,0,0,0,0,0,0,67,64,0,0,0,0,0,0,40,64,0,0,0,0,0,0,56,64,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,15,0,0,0,234,102,0,0,0,0,0,0,0,0,73,64,0,0,0,0,0,0,38,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,16,0,0,0,242,102,0,0,0,0,0,0,0,0,75,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,20,0,0,0,250,102,0,0,0,0,0,0,0,128,82,64,0,0,0,0,0,0,40,64,0,0,0,0,0,0,40,64,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,25,0,0,0,2,103,0,0,0,0,0,0,0,64,93,64,0,0,0,0,0,128,76,64,0,0,0,0,0,192,84,64,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,2,0,0,0,10,103,0,0,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,32,0,0,0,17,103,0,0,0,0,0,0,0,128,97,64,0,0,0,0,0,0,58,64,0,0,0,0,0,0,48,64,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,3,0,0,0,25,103,0,0,0,0,0,0,0,0,8,64,0,0,0,0,0,0,240,63,0,0,0,0,0,0,240,63,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,4,0,0,0,32,103,0,0,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,5,0,0,0,39,103,0,0,0,0,0,0,0,0,34,64,0,0,0,0,0,0,8,64,0,0,0,0,0,0,8,64,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,64,0,0,0,46,103,0,0,0,0,0,0,0,96,117,64,0,0,0,0,0,0,82,64,0,0,0,0,0,0,74,64,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,6,0,0,0,54,103,0,0,0,0,0,0,0,0,40,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,7,0,0,0,61,103,0,0,0,0,0,0,0,0,40,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,40,64,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,8,0,0,0,68,103,0,0,0,0,0,0,0,0,52,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,9,0,0,0,75,103,0,0,0,0,0,0,0,0,53,64,0,0,0,0,0,0,34,64,0,0,0,0,0,0,49,64,0,0,0,0,0,0,0,0,80,57,0,0,0,0,0,0,16,0,0,0,255,110,0,0,68,73,0,0,72,57,0,0,0,0,0,0,0,128,99,64,0,0,0,0,0,0,81,64,0,0,0,0,0,0,68,64,0,0,0,0,0,0,0,0,20,0,0,0,6,111,0,0,88,73,0,0,72,57,0,0,0,0,0,0,0,128,105,64,0,0,0,0,0,0,87,64,0,0,0,0,0,0,82,64,0,0,0,0,0,0,0,0,25,0,0,0,13,111,0,0,108,73,0,0,72,57,0,0,0,0,0,0,0,128,113,64,0,0,0,0,0,128,102,64,0,0,0,0,0,0,100,64,0,0,0,0,0,0,0,0,32,0,0,0,20,111,0,0,128,73,0,0,72,57,0,0,0,0,0,0,0,128,119,64,0,0,0,0,0,0,101,64,0,0,0,0,0,0,92,64,0,0,0,0,0,0,0,0,4,0,0,0,27,111,0,0,148,73,0,0,72,57,0,0,0,0,0,0,0,0,48,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,0,0,5,0,0,0,33,111,0,0,160,73,0,0,72,57,0,0,0,0,0,0,0,0,62,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,8,0,0,0,39,111,0,0,172,73,0,0,72,57,0,0,0,0,0,0,0,0,76,64,0,0,0,0,0,0,58,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,0,0,10,0,0,0,45,111,0,0,188,73,0,0,72,57,0,0,0,0,0,0,0,0,82,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,0,0,12,0,0,0,51,111,0,0,196,73,0,0,72,57,0,0,0,0,0,0,0,0,86,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,0,0,15,0,0,0,57,111,0,0,204,73,0,0,72,57,0,0,0,0,0,0,0,0,96,64,0,0,0,0,0,0,76,64,0,0,0,0,0,0,76,64,0,0,0,0,0,0,0,0,16,0,0,0,63,111,0,0,212,73,0,0,72,57,0,0,0,0,0,0,0,0,97,64,0,0,0,0,0,0,71,64,0,0,0,0,0,0,67,64,0,0,0,0,0,0,0,0,20,0,0,0,69,111,0,0,220,73,0,0,72,57,0,0,0,0,0,0,0,0,103,64,0,0,0,0,0,0,79,64,0,0,0,0,0,0,79,64,0,0,0,0,0,0,0,0,25,0,0,0,75,111,0,0,228,73,0,0,72,57,0,0,0,0,0,0,0,64,112,64,0,0,0,0,0,128,97,64,0,0,0,0,0,128,97,64,0,0,0,0,0,0,0,0,2,0,0,0,81,111,0,0,236,73,0,0,72,57,0,0,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,32,0,0,0,86,111,0,0,244,73,0,0,72,57,0,0,0,0,0,0,0,64,117,64,0,0,0,0,0,128,92,64,0,0,0,0,0,128,87,64,0,0,0,0,0,0,0,0,3,0,0,0,92,111,0,0,252,73,0,0,72,57,0,0,0,0,0,0,0,0,36,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,4,0,0,0,97,111,0,0,4,74,0,0,72,57,0,0,0,0,0,0,0,0,48,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,5,0,0,0,102,111,0,0,12,74,0,0,72,57,0,0,0,0,0,0,0,0,58,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,64,0,0,0,107,111,0,0,20,74,0,0,72,57,0,0,0,0,0,0,0,64,137,64,0,0,0,0,0,224,112,64,0,0,0,0,0,192,108,64,0,0,0,0,0,0,0,0,6,0,0,0,113,111,0,0,28,74,0,0,72,57,0,0,0,0,0,0,0,0,64,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,7,0,0,0,118,111,0,0,36,74,0,0,72,57,0,0,0,0,0,0,0,0,66,64,0,0,0,0,0,0,56,64,0,0,0,0,0,0,66,64,0,0,0,0,0,0,0,0,8,0,0,0,123,111,0,0,44,74,0,0,72,57,0,0,0,0,0,0,0,0,74,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,9,0,0,0,128,111,0,0,52,74,0,0,72,57,0,0,0,0,0,0,0,0,78,64,0,0,0,0,0,0,66,64,0,0,0,0,0,0,66,64,0,0,0,0,0,0,0,0,16,0,0,0,133,111,0,0,60,74,0,0,52,57,0,0,0,0,0,0,0,128,99,64,0,0,0,0,0,0,81,64,0,0,0,0,0,0,68,64,0,0,0,0,0,0,0,0,20,0,0,0,143,111,0,0,80,74,0,0,52,57,0,0,0,0,0,0,0,128,105,64,0,0,0,0,0,0,87,64,0,0,0,0,0,0,82,64,0,0,0,0,0,0,0,0,32,0,0,0,153,111,0,0,100,74,0,0,52,57,0,0,0,0,0,0,0,128,119,64,0,0,0,0,0,0,101,64,0,0,0,0,0,0,92,64,0,0,0,0,0,0,0,0,4,0,0,0,163,111,0,0,120,74,0,0,52,57,0,0,0,0,0,0,0,0,48,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,0,0,8,0,0,0,172,111,0,0,132,74,0,0,52,57,0,0,0,0,0,0,0,0,76,64,0,0,0,0,0,0,58,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,0,0,10,0,0,0,181,111,0,0,148,74,0,0,52,57,0,0,0,0,0,0,0,0,82,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,0,0,12,0,0,0,190,111,0,0,156,74,0,0,52,57,0,0,0,0,0,0,0,0,86,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,0,0,16,0,0,0,199,111,0,0,164,74,0,0,52,57,0,0,0,0,0,0,0,0,97,64,0,0,0,0,0,0,71,64,0,0,0,0,0,0,67,64,0,0,0,0,0,0,0,0,20,0,0,0,208,111,0,0,172,74,0,0,52,57,0,0,0,0,0,0,0,0,103,64,0,0,0,0,0,0,79,64,0,0,0,0,0,0,79,64,0,0,0,0,0,0,0,0,2,0,0,0,217,111,0,0,180,74,0,0,52,57,0,0,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,32,0,0,0,225,111,0,0,188,74,0,0,52,57,0,0,0,0,0,0,0,64,117,64,0,0,0,0,0,128,92,64,0,0,0,0,0,128,87,64,0,0,0,0,0,0,0,0,4,0,0,0,234,111,0,0,196,74,0,0,52,57,0,0,0,0,0,0,0,0,48,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,6,0,0,0,242,111,0,0,204,74,0,0,52,57,0,0,0,0,0,0,0,0,64,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,8,0,0,0,250,111,0,0,212,74,0,0,52,57,0,0,0,0,0,0,0,0,74,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,16,0,0,0,2,112,0,0,220,74,0,0,52,57,0,0,0,0,0,0,0,0,101,64,0,0,0,0,0,0,71,64,0,0,0,0,0,0,67,64,0,0,0,0,0,0,0,0,20,0,0,0,15,112,0,0,228,74,0,0,52,57,0,0,0,0,0,0,0,0,108,64,0,0,0,0,0,0,79,64,0,0,0,0,0,0,79,64,0,0,0,0,0,0,0,0,32,0,0,0,28,112,0,0,236,74,0,0,52,57,0,0,0,0,0,0,0,64,121,64,0,0,0,0,0,128,92,64,0,0,0,0,0,128,87,64,0,0,0,0,0,0,0,0,4,0,0,0,41,112,0,0,244,74,0,0,52,57,0,0,0,0,0,0,0,0,56,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,8,0,0,0,53,112,0,0,252,74,0,0,52,57,0,0,0,0,0,0,0,0,81,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,10,0,0,0,65,112,0,0,4,75,0,0,52,57,0,0,0,0,0,0,0,0,87,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,0,0,12,0,0,0,77,112,0,0,12,75,0,0,52,57,0,0,0,0,0,0,0,0,92,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,62,64,0,0,0,0,0,0,0,0,16,0,0,0,89,112,0,0,20,75,0,0,52,57,0,0,0,0,0,0,0,0,101,64,0,0,0,0,0,0,71,64,0,0,0,0,0,0,67,64,0,0,0,0,0,0,0,0,20,0,0,0,101,112,0,0,28,75,0,0,52,57,0,0,0,0,0,0,0,0,108,64,0,0,0,0,0,0,79,64,0,0,0,0,0,0,79,64,0,0,0,0,0,0,0,0,2,0,0,0,113,112,0,0,36,75,0,0,52,57,0,0,0,0,0,0,0,0,32,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,32,0,0,0,124,112,0,0,44,75,0,0,52,57,0,0,0,0,0,0,0,64,121,64,0,0,0,0,0,128,92,64,0,0,0,0,0,128,87,64,0,0,0,0,0,0,0,0,4,0,0,0,136,112,0,0,52,75,0,0,52,57,0,0,0,0,0,0,0,0,56,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,6,0,0,0,147,112,0,0,60,75,0,0,52,57,0,0,0,0,0,0,0,0,70,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,8,0,0,0,158,112,0,0,68,75,0,0,52,57,0,0,0,0,0,0,0,0,81,64,0,0,0,0,0,0,50,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,0,0,10,0,0,0,169,112,0,0,0,0,0,0,0,0,58,64,0,0,0,0,0,0,36,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,104,57,0,0,0,0,0,0,12,0,0,0,180,112,0,0,0,0,0,0,0,0,67,64,0,0,0,0,0,0,48,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,0,104,57,0,0,0,0,0,0,15,0,0,0,191,112,0,0,0,0,0,0,0,128,72,64,0,0,0,0,0,0,38,64,0,0,0,0,0,0,46,64,0,0,0,0,0,0,0,0,104,57,0,0,0,0,0,0,16,0,0,0,202,112,0,0,0,0,0,0,0,0,75,64,0,0,0,0,0,0,52,64,0,0,0,0,0,0,40,64,0,0,0,0,0,0,0,0,104,57,0,0,0,0,0,0,20,0,0,0,213,112,0,0,0,0,0,0,0,128,84,64,0,0,0,0,0,0,64,64,0,0,0,0,0,0,40,64,0,0,0,0,0,0,0,0,104,57,0,0,0,0,0,0,25,0,0,0,224,112,0,0,0,0,0,0,0,0,89,64,0,0,0,0,0,0,71,64,0,0,0,0,0,0,74,64,0,0,0,0,0,0,0,0,104,57,0,0,0,0,0,0,2,0,0,0,235,112,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,104,57,0,0,0,0,0,0,32,0,0,0,245,112,0,0,0,0,0,0,0,64,97,64,0,0,0,0,0,0,72,64,0,0,0,0,0,0,66,64,0,0,0,0,0,0,0,0,104,57,0,0,0,0,0,0,3,0,0,0,0,113,0,0,0,0,0,0,0,0,8,64,0,0,0,0,0,0,240,63,0,0,0,0,0,0,240,63],"i8",ALLOC_NONE,Runtime.GLOBAL_BASE);allocate([0,0,0,0,0,0,0,0,104,57,0,0,0,0,0,0,4,0,0,0,10,113,0,0,0,0,0,0,0,0,24,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,104,57,0,0,0,0,0,0,5,0,0,0,20,113,0,0,0,0,0,0,0,0,32,64,0,0,0,0,0,0,8,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,0,104,57,0,0,0,0,0,0,64,0,0,0,30,113,0,0,0,0,0,0,0,96,117,64,0,0,0,0,0,0,93,64,0,0,0,0,0,0,87,64,0,0,0,0,0,0,0,0,104,57,0,0,0,0,0,0,6,0,0,0,41,113,0,0,0,0,0,0,0,0,36,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,104,57,0,0,0,0,0,0,7,0,0,0,51,113,0,0,0,0,0,0,0,0,34,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,46,64,0,0,0,0,0,0,0,0,104,57,0,0,0,0,0,0,8,0,0,0,61,113,0,0,0,0,0,0,0,0,50,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,0,104,57,0,0,0,0,0,0,9,0,0,0,71,113,0,0,0,0,0,0,0,0,54,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,36,64,0,0,0,0,0,0,0,0,104,57,0,0,0,0,0,0,10,0,0,0,81,113,0,0,0,0,0,0,0,0,58,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,11,0,0,0,89,113,0,0,0,0,0,0,0,0,51,64,0,0,0,0,0,0,36,64,0,0,0,0,0,128,68,64,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,128,0,0,0,97,113,0,0,0,0,0,0,0,96,137,64,0,0,0,0,0,192,104,64,0,0,0,0,0,0,98,64,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,12,0,0,0,106,113,0,0,0,0,0,0,0,0,65,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,13,0,0,0,114,113,0,0,0,0,0,0,0,0,76,64,0,0,0,0,0,0,46,64,0,0,0,0,0,0,52,64,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,14,0,0,0,122,113,0,0,0,0,0,0,0,0,66,64,0,0,0,0,0,0,40,64,0,0,0,0,0,0,58,64,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,15,0,0,0,130,113,0,0,0,0,0,0,0,128,71,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,49,64,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,16,0,0,0,138,113,0,0,0,0,0,0,0,0,75,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,20,0,0,0,146,113,0,0,0,0,0,0,0,128,81,64,0,0,0,0,0,0,44,64,0,0,0,0,0,0,48,64,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,25,0,0,0,154,113,0,0,0,0,0,0,0,0,89,64,0,0,0,0,0,0,71,64,0,0,0,0,0,0,74,64,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,2,0,0,0,162,113,0,0,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,32,0,0,0,169,113,0,0,0,0,0,0,0,128,97,64,0,0,0,0,0,0,65,64,0,0,0,0,0,0,48,64,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,3,0,0,0,177,113,0,0,0,0,0,0,0,0,8,64,0,0,0,0,0,0,240,63,0,0,0,0,0,0,240,63,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,4,0,0,0,184,113,0,0,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,5,0,0,0,191,113,0,0,0,0,0,0,0,0,32,64,0,0,0,0,0,0,8,64,0,0,0,0,0,0,16,64,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,64,0,0,0,198,113,0,0,0,0,0,0,0,96,117,64,0,0,0,0,0,128,84,64,0,0,0,0,0,0,74,64,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,6,0,0,0,206,113,0,0,0,0,0,0,0,0,40,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,7,0,0,0,213,113,0,0,0,0,0,0,0,0,38,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,42,64,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,8,0,0,0,220,113,0,0,0,0,0,0,0,0,52,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,9,0,0,0,227,113,0,0,0,0,0,0,0,0,54,64,0,0,0,0,0,0,32,64,0,0,0,0,0,0,36,64,0,0,0,0,0,0,0,0,96,57,0,0,0,0,0,0,8,0,0,0,18,114,0,0,0,0,0,0,0,0,52,64,0,0,0,0,0,0,34,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,112,57,0,0,10,0,0,0,8,0,0,0,24,114,0,0,0,0,0,0,0,0,52,64,0,0,0,0,0,0,36,64,0,0,0,0,0,0,24,64,0,0,0,0,0,0,0,0,112,57,0,0,11,0,0,0,120,164,106,215,86,183,199,232,219,112,32,36,238,206,189,193,175,15,124,245,42,198,135,71,19,70,48,168,1,149,70,253,216,152,128,105,175,247,68,139,177,91,255,255,190,215,92,137,34,17,144,107,147,113,152,253,142,67,121,166,33,8,180,73,98,37,30,246,64,179,64,192,81,90,94,38,170,199,182,233,93,16,47,214,83,20,68,2,129,230,161,216,200,251,211,231,230,205,225,33,214,7,55,195,135,13,213,244,237,20,90,69,5,233,227,169,248,163,239,252,217,2,111,103,138,76,42,141,66,57,250,255,129,246,113,135,34,97,157,109,12,56,229,253,68,234,190,164,169,207,222,75,96,75,187,246,112,188,191,190,198,126,155,40,250,39,161,234,133,48,239,212,5,29,136,4,57,208,212,217,229,153,219,230,248,124,162,31,101,86,172,196,68,34,41,244,151,255,42,67,167,35,148,171,57,160,147,252,195,89,91,101,146,204,12,143,125,244,239,255,209,93,132,133,79,126,168,111,224,230,44,254,20,67,1,163,161,17,8,78,130,126,83,247,53,242,58,189,187,210,215,42,145,211,134,235,1,0,0,0,1,0,0,0,2,0,0,0,3,0,0,0,2,0,0,0,0,0,0,0,16,0,0,0,0,8,0,0,8,0,0,0,0,0,1,0,2,0,0,0,3,0,0,0,5,0,0,0,0,0,0,0,196,46,0,0,0,0,0,0,4,0,0,0,1,0,0,0,5,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,6,0,0,0,7,0,0,0,8,0,0,0,3,0,0,0,1,0,0,0,2,0,0,0,0,0,0,0,8,0,0,0,0,1,0,0,6,0,0,0,9,0,0,0,10,0,0,0,4,0,0,0,5,0,0,0,138,77,0,0,6,0,0,0,166,77,0,0,7,0,0,0,204,77,0,0,8,0,0,0,233,77,0,0,9,0,0,0,7,78,0,0,10,0,0,0,35,78,0,0,11,0,0,0,62,78,0,0,12,0,0,0,87,78,0,0,13,0,0,0,116,78,0,0,14,0,0,0,139,78,0,0,15,0,0,0,165,78,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,3,0,0,0,0,0,0,0,6,0,0,0,11,0,0,0,12,0,0,0,16,0,0,0,0,0,0,0,13,0,0,0,14,0,0,0,17,0,0,0,0,0,0,0,15,0,0,0,16,0,0,0,18,0,0,0,0,0,0,0,17,0,0,0,18,0,0,0,19,0,0,0,255,255,255,255,254,255,255,255,252,255,255,255,248,255,255,255,240,255,255,255,224,255,255,255,192,255,255,255,4,0,0,0,8,0,0,0,16,0,0,0,32,0,0,0,64,0,0,0,0,0,0,0,19,0,0,0,20,0,0,0,20,0,0,0,1,0,0,0,4,0,0,0,0,0,0,0,6,0,0,0,21,0,0,0,22,0,0,0,21,0,0,0,1,0,0,0,5,0,0,0,0,0,0,0,6,0,0,0,23,0,0,0,24,0,0,0,22,0,0,0,1,0,0,0,6,0,0,0,0,0,0,0,6,0,0,0,25,0,0,0,26,0,0,0,23,0,0,0,56,50,0,0,68,50,0,0,1,0,0,0,7,0,0,0,0,0,0,0,6,0,0,0,27,0,0,0,28,0,0,0,24,0,0,0,1,0,0,0,1,0,0,0,248,79,0,0,2,0,0,0,2,0,0,0,229,79,0,0,1,0,0,0,8,0,0,0,0,0,0,0,6,0,0,0,21,0,0,0,29,0,0,0,22,0,0,0,1,0,0,0,30,0,0,0,25,0,0,0,31,0,0,0,26,0,0,0,1,0,0,0,9,0,0,0,0,0,0,0,6,0,0,0,32,0,0,0,33,0,0,0,27,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,254,255,255,255,1,0,0,0,10,0,0,0,0,0,0,0,6,0,0,0,34,0,0,0,35,0,0,0,28,0,0,0,1,0,0,0,255,255,255,255,1,0,0,0,11,0,0,0,0,0,0,0,6,0,0,0,36,0,0,0,37,0,0,0,29,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,30,0,0,0,140,80,0,0,31,0,0,0,159,80,0,0,32,0,0,0,178,80,0,0,33,0,0,0,197,80,0,0,34,0,0,0,216,80,0,0,35,0,0,0,235,80,0,0,36,0,0,0,254,80,0,0,37,0,0,0,17,81,0,0,38,0,0,0,36,81,0,0,39,0,0,0,56,81,0,0,40,0,0,0,76,81,0,0,41,0,0,0,96,81,0,0,42,0,0,0,116,81,0,0,43,0,0,0,136,81,0,0,44,0,0,0,156,81,0,0,45,0,0,0,176,81,0,0,46,0,0,0,196,81,0,0,47,0,0,0,216,81,0,0,48,0,0,0,236,81,0,0,49,0,0,0,0,82,0,0,50,0,0,0,19,82,0,0,51,0,0,0,38,82,0,0,52,0,0,0,57,82,0,0,53,0,0,0,76,82,0,0,54,0,0,0,95,82,0,0,55,0,0,0,114,82,0,0,56,0,0,0,133,82,0,0,57,0,0,0,152,82,0,0,58,0,0,0,172,82,0,0,59,0,0,0,192,82,0,0,60,0,0,0,212,82,0,0,61,0,0,0,232,82,0,0,62,0,0,0,252,82,0,0,63,0,0,0,16,83,0,0,64,0,0,0,36,83,0,0,65,0,0,0,56,83,0,0,66,0,0,0,75,83,0,0,67,0,0,0,94,83,0,0,68,0,0,0,114,83,0,0,69,0,0,0,134,83,0,0,70,0,0,0,154,83,0,0,71,0,0,0,173,83,0,0,72,0,0,0,193,83,0,0,73,0,0,0,213,83,0,0,74,0,0,0,233,83,0,0,75,0,0,0,252,83,0,0,76,0,0,0,15,84,0,0,77,0,0,0,34,84,0,0,78,0,0,0,53,84,0,0,79,0,0,0,72,84,0,0,0,0,0,0,0,0,0,0,3,0,0,0,12,0,0,0,0,0,0,0,8,0,0,0,0,1,0,0,38,0,0,0,39,0,0,0,40,0,0,0,80,0,0,0,2,0,0,0,13,0,0,0,0,0,0,0,8,0,0,0,0,1,0,0,41,0,0,0,42,0,0,0,43,0,0,0,81,0,0,0,82,0,0,0,201,85,0,0,83,0,0,0,230,85,0,0,84,0,0,0,0,86,0,0,85,0,0,0,37,86,0,0,86,0,0,0,68,86,0,0,87,0,0,0,92,86,0,0,88,0,0,0,121,86,0,0,89,0,0,0,149,86,0,0,90,0,0,0,179,86,0,0,91,0,0,0,203,86,0,0,92,0,0,0,227,86,0,0,93,0,0,0,251,86,0,0,94,0,0,0,20,87,0,0,95,0,0,0,52,87,0,0,96,0,0,0,77,87,0,0,97,0,0,0,104,87,0,0,98,0,0,0,134,87,0,0,99,0,0,0,165,87,0,0,100,0,0,0,191,87,0,0,0,0,0,0,0,0,0,0,0,0,0,0,44,0,0,0,45,0,0,0,101,0,0,0,3,0,0,0,14,0,0,0,0,0,0,0,38,0,0,0,46,0,0,0,47,0,0,0,102,0,0,0,1,0,0,0,15,0,0,0,0,0,0,0,6,0,0,0,48,0,0,0,49,0,0,0,103,0,0,0,2,0,0,0,16,0,0,0,0,0,0,0,41,0,0,0,50,0,0,0,51,0,0,0,104,0,0,0,2,0,0,0,17,0,0,0,0,0,0,0,41,0,0,0,52,0,0,0,53,0,0,0,105,0,0,0,0,0,0,0,2,0,0,0,3,0,0,0,5,0,0,0,0,0,0,0,2,0,0,0,18,0,0,0,0,0,0,0,41,0,0,0,21,0,0,0,54,0,0,0,106,0,0,0,2,0,0,0,19,0,0,0,0,0,0,0,41,0,0,0,21,0,0,0,55,0,0,0,107,0,0,0,3,0,0,0,20,0,0,0,0,0,0,0,38,0,0,0,21,0,0,0,56,0,0,0,108,0,0,0,2,0,0,0,21,0,0,0,0,0,0,0,41,0,0,0,57,0,0,0,58,0,0,0,22,0,0,0,0,0,0,0,59,0,0,0,60,0,0,0,109,0,0,0,0,0,0,0,61,0,0,0,62,0,0,0,110,0,0,0,0,0,0,0,2,0,0,0,22,0,0,0,0,0,0,0,41,0,0,0,63,0,0,0,64,0,0,0,111,0,0,0,228,54,0,0,240,54,0,0,2,0,0,0,23,0,0,0,0,0,0,0,41,0,0,0,65,0,0,0,66,0,0,0,112,0,0,0,1,0,0,0,3,0,0,0,246,89,0,0,2,0,0,0,4,0,0,0,226,89,0,0,3,0,0,0,24,0,0,0,0,0,0,0,38,0,0,0,21,0,0,0,67,0,0,0,22,0,0,0,2,0,0,0,25,0,0,0,0,0,0,0,41,0,0,0,21,0,0,0,68,0,0,0,22,0,0,0,3,0,0,0,69,0,0,0,113,0,0,0,70,0,0,0,114,0,0,0,2,0,0,0,71,0,0,0,115,0,0,0,72,0,0,0,116,0,0,0,1,0,0,0,0,0,0,0,254,255,255,255,3,0,0,0,26,0,0,0,0,0,0,0,38,0,0,0,73,0,0,0,74,0,0,0,117,0,0,0,1,0,0,0,0,0,0,0,254,255,255,255,2,0,0,0,27,0,0,0,0,0,0,0,41,0,0,0,75,0,0,0,76,0,0,0,118,0,0,0,3,0,0,0,28,0,0,0,0,0,0,0,38,0,0,0,77,0,0,0,78,0,0,0,119,0,0,0,2,0,0,0,29,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,74,91,0,0,4,0,0,0,4,0,0,0,92,91,0,0,5,0,0,0,5,0,0,0,115,91,0,0,6,0,0,0,6,0,0,0,134,91,0,0,7,0,0,0,7,0,0,0,153,91,0,0,8,0,0,0,7,0,0,0,170,91,0,0,9,0,0,0,8,0,0,0,190,91,0,0,10,0,0,0,9,0,0,0,207,91,0,0,11,0,0,0,9,0,0,0,230,91,0,0,41,0,0,0,21,0,0,0,79,0,0,0,22,0,0,0,2,0,0,0,30,0,0,0,0,0,0,0,41,0,0,0,80,0,0,0,81,0,0,0,120,0,0,0,3,0,0,0,31,0,0,0,0,0,0,0,38,0,0,0,82,0,0,0,83,0,0,0,121,0,0,0,1,0,0,0,255,255,255,255,3,0,0,0,32,0,0,0,0,0,0,0,38,0,0,0,84,0,0,0,85,0,0,0,122,0,0,0,1,0,0,0,255,255,255,255,2,0,0,0,33,0,0,0,0,0,0,0,41,0,0,0,86,0,0,0,87,0,0,0,123,0,0,0,248,56,0,0,8,57,0,0,24,57,0,0,2,0,0,0,34,0,0,0,0,0,0,0,41,0,0,0,88,0,0,0,89,0,0,0,124,0,0,0,12,0,0,0,1,0,0,0,35,0,0,0,199,92,0,0,13,0,0,0,2,0,0,0,36,0,0,0,180,92,0,0,14,0,0,0,3,0,0,0,37,0,0,0,157,92,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,4,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,4,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,4,0,0,0,1,0,0,0,6,0,0,0,1,0,0,0,1,0,0,0,125,0,0,0,218,92,0,0,126,0,0,0,239,92,0,0,127,0,0,0,4,93,0,0,128,0,0,0,25,93,0,0,129,0,0,0,46,93,0,0,130,0,0,0,67,93,0,0,131,0,0,0,88,93,0,0,132,0,0,0,109,93,0,0,133,0,0,0,130,93,0,0,134,0,0,0,152,93,0,0,135,0,0,0,174,93,0,0,136,0,0,0,196,93,0,0,137,0,0,0,218,93,0,0,138,0,0,0,240,93,0,0,139,0,0,0,6,94,0,0,140,0,0,0,28,94,0,0,141,0,0,0,50,94,0,0,142,0,0,0,72,94,0,0,143,0,0,0,95,94,0,0,144,0,0,0,117,94,0,0,145,0,0,0,139,94,0,0,146,0,0,0,158,94,0,0,147,0,0,0,177,94,0,0,148,0,0,0,196,94,0,0,149,0,0,0,215,94,0,0,150,0,0,0,234,94,0,0,151,0,0,0,253,94,0,0,152,0,0,0,16,95,0,0,153,0,0,0,35,95,0,0,154,0,0,0,55,95,0,0,155,0,0,0,75,95,0,0,156,0,0,0,95,95,0,0,157,0,0,0,115,95,0,0,158,0,0,0,135,95,0,0,159,0,0,0,155,95,0,0,160,0,0,0,175,95,0,0,161,0,0,0,195,95,0,0,162,0,0,0,215,95,0,0,163,0,0,0,235,95,0,0,164,0,0,0,0,96,0,0,165,0,0,0,21,96,0,0,166,0,0,0,41,96,0,0,167,0,0,0,62,96,0,0,168,0,0,0,83,96,0,0,169,0,0,0,106,96,0,0,170,0,0,0,129,96,0,0,171,0,0,0,152,96,0,0,172,0,0,0,175,96,0,0,173,0,0,0,198,96,0,0,174,0,0,0,221,96,0,0,175,0,0,0,244,96,0,0,176,0,0,0,11,97,0,0,177,0,0,0,35,97,0,0,178,0,0,0,59,97,0,0,179,0,0,0,83,97,0,0,180,0,0,0,107,97,0,0,181,0,0,0,131,97,0,0,182,0,0,0,155,97,0,0,183,0,0,0,179,97,0,0,184,0,0,0,203,97,0,0,185,0,0,0,225,97,0,0,186,0,0,0,247,97,0,0,187,0,0,0,13,98,0,0,188,0,0,0,35,98,0,0,189,0,0,0,58,98,0,0,190,0,0,0,81,98,0,0,191,0,0,0,104,98,0,0,192,0,0,0,127,98,0,0,193,0,0,0,150,98,0,0,194,0,0,0,173,98,0,0,195,0,0,0,196,98,0,0,196,0,0,0,220,98,0,0,197,0,0,0,244,98,0,0,198,0,0,0,12,99,0,0,199,0,0,0,37,99,0,0,200,0,0,0,62,99,0,0,201,0,0,0,87,99,0,0,202,0,0,0,112,99,0,0,203,0,0,0,138,99,0,0,204,0,0,0,164,99,0,0,205,0,0,0,190,99,0,0,206,0,0,0,216,99,0,0,207,0,0,0,242,99,0,0,208,0,0,0,12,100,0,0,209,0,0,0,38,100,0,0,210,0,0,0,65,100,0,0,211,0,0,0,92,100,0,0,0,0,0,0,0,0,0,0,212,0,0,0,82,103,0,0,213,0,0,0,103,103,0,0,214,0,0,0,124,103,0,0,215,0,0,0,145,103,0,0,216,0,0,0,166,103,0,0,217,0,0,0,187,103,0,0,218,0,0,0,208,103,0,0,219,0,0,0,229,103,0,0,220,0,0,0,250,103,0,0,221,0,0,0,16,104,0,0,222,0,0,0,38,104,0,0,223,0,0,0,60,104,0,0,224,0,0,0,82,104,0,0,225,0,0,0,104,104,0,0,226,0,0,0,126,104,0,0,227,0,0,0,148,104,0,0,228,0,0,0,170,104,0,0,229,0,0,0,192,104,0,0,230,0,0,0,215,104,0,0,231,0,0,0,237,104,0,0,232,0,0,0,3,105,0,0,233,0,0,0,22,105,0,0,234,0,0,0,41,105,0,0,235,0,0,0,60,105,0,0,236,0,0,0,79,105,0,0,237,0,0,0,98,105,0,0,238,0,0,0,117,105,0,0,239,0,0,0,136,105,0,0,240,0,0,0,155,105,0,0,241,0,0,0,175,105,0,0,242,0,0,0,195,105,0,0,243,0,0,0,215,105,0,0,244,0,0,0,235,105,0,0,245,0,0,0,255,105,0,0,246,0,0,0,19,106,0,0,247,0,0,0,39,106,0,0,248,0,0,0,59,106,0,0,249,0,0,0,79,106,0,0,250,0,0,0,99,106,0,0,251,0,0,0,120,106,0,0,252,0,0,0,141,106,0,0,253,0,0,0,161,106,0,0,254,0,0,0,182,106,0,0,255,0,0,0,203,106,0,0,0,1,0,0,227,106,0,0,1,1,0,0,251,106,0,0,2,1,0,0,19,107,0,0,3,1,0,0,43,107,0,0,4,1,0,0,67,107,0,0,5,1,0,0,91,107,0,0,6,1,0,0,115,107,0,0,7,1,0,0,139,107,0,0,8,1,0,0,164,107,0,0,9,1,0,0,189,107,0,0,10,1,0,0,214,107,0,0,11,1,0,0,239,107,0,0,12,1,0,0,8,108,0,0,13,1,0,0,33,108,0,0,14,1,0,0,58,108,0,0,15,1,0,0,83,108,0,0,16,1,0,0,105,108,0,0,17,1,0,0,127,108,0,0,18,1,0,0,149,108,0,0,19,1,0,0,171,108,0,0,20,1,0,0,194,108,0,0,21,1,0,0,217,108,0,0,22,1,0,0,240,108,0,0,23,1,0,0,7,109,0,0,24,1,0,0,30,109,0,0,25,1,0,0,53,109,0,0,26,1,0,0,76,109,0,0,27,1,0,0,100,109,0,0,28,1,0,0,124,109,0,0,29,1,0,0,148,109,0,0,30,1,0,0,173,109,0,0,31,1,0,0,198,109,0,0,32,1,0,0,223,109,0,0,33,1,0,0,248,109,0,0,34,1,0,0,18,110,0,0,35,1,0,0,44,110,0,0,36,1,0,0,70,110,0,0,37,1,0,0,96,110,0,0,38,1,0,0,122,110,0,0,39,1,0,0,148,110,0,0,40,1,0,0,174,110,0,0,41,1,0,0,201,110,0,0,42,1,0,0,228,110,0,0,0,0,0,0,0,0,0,0,43,1,0,0,234,113,0,0,44,1,0,0,254,113,0,0,0,0,0,0,0,0,0,0,45,1,0,0,30,114,0,0,46,1,0,0,63,114,0,0,47,1,0,0,96,114,0,0,48,1,0,0,132,114,0,0,49,1,0,0,163,114,0,0,50,1,0,0,200,114,0,0,0,0,0,0,0,0,0,0,2,0,0,0,38,0,0,0,0,0,0,0,41,0,0,0,90,0,0,0,91,0,0,0,51,1,0,0,2,0,0,0,39,0,0,0,0,0,0,0,41,0,0,0,92,0,0,0,93,0,0,0,52,1,0,0,2,0,0,0,40,0,0,0,0,0,0,0,41,0,0,0,94,0,0,0,95,0,0,0,53,1,0,0,2,0,0,0,41,0,0,0,0,0,0,0,41,0,0,0,96,0,0,0,97,0,0,0,54,1,0,0,2,0,0,0,42,0,0,0,0,0,0,0,41,0,0,0,98,0,0,0,99,0,0,0,55,1,0,0,2,0,0,0,43,0,0,0,0,0,0,0,41,0,0,0,100,0,0,0,101,0,0,0,56,1,0,0,64,0,0,0,0,0,0,0,32,0,0,0,8,0,0,0,16,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,16,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,32,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,32,0,0,0,32,0,0,0,64,0,0,0,0,0,0,0,128,16,16,0,0,0,0,0,8,0,0,0,8,0,0,0,0,0,4,0,0,0,0,0,32,0,0,0,32,0,0,0,0,199,9,0,0,0,0,0,16,0,0,0,0,0,0,0,0,16,0,0,0,0,0,0,16,0,0,0,16,0,0,0,0,16,0,0,0,16,0,0,0,0,2,0,0,0,0,0,0,32,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0,32,0,0,0,32,0,0,4,0,0,0,0,0,0,0,0,64,0,0,0,0,0,0,4,0,0,0,4,0,0,0,0,64,0,0,0,64,0,0,0,8,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,8,0,0,0,8,0,0,0,4,0,0,0,4,0,0,0,32,0,0,0,0,0,0,64,0,0,0,64,0,0,0,0,32,0,0,0,32,0,0,64,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,8,0,0,0,8,0,0,0,0,0,1,0,0,0,0,0,128,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,2,0,0,0,2,0,0,0,0,0,16,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,16,0,0,0,16,0,0,0,2,0,0,0,2,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,2,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0,4,0,0,0,4,0,0,0,0,4,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,4,0,0,0,4,0,0,0,2,0,0,0,2,0,0,0,16,0,0,0,0,0,0,32,0,0,0,0,0,0,0,0,16,0,0,0,16,0,0,32,0,0,0,32,0,0,0,0,64,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,64,0,0,0,64,0,0,128,0,0,0,128,0,0,0,0,128,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,16,0,0,0,16,0,0,0,0,0,4,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,4,0,0,0,4,0,8,0,0,0,8,0,0,0,0,0,8,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,8,0,0,0,8,0,0,8,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,204,66,0,0,60,67,0,0,60,67,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,44,0,0,0,45,0,0,0,67,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,46,0,0,0,45,0,0,0,59,123,0,0,0,4,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,3,1,0,0,5,1,0,0,3,1,0,0,4,0,2,0,3,1,0,0,4,0,3,0,3,1,0,0,4,0,4,0,3,1,0,0,4,0,5,0,3,1,0,0,4,0,6,0,3,1,0,0,4,0,8,0,3,1,0,0,4,0,10,0,3,1,0,0,4,0,12,0,3,1,0,0,4,0,15,0,3,1,0,0,4,0,16,0,3,1,0,0,4,0,20,0,3,1,0,0,4,0,25,0,3,1,0,0,4,0,2,0,3,1,0,0,4,0,32,0,3,1,0,0,4,0,3,0,3,1,0,0,4,0,4,0,3,1,0,0,4,0,5,0,3,1,0,0,4,0,64,0,3,1,0,0,4,0,6,0,3,1,0,0,4,0,7,0,3,1,0,0,4,0,8,0,3,1,0,0,4,0,9,0,3,1,0,0,2,0,1,0,2,0,3,0,2,0,9,0,3,1,0,0,2,0,1,0,2,0,3,0,2,0,9,0,2,0,15,0,3,1,0,0,2,0,1,0,2,0,3,0,2,0,9,0,2,0,19,0,3,1,0,0,2,0,1,0,2,0,3,0,2,0,9,0,2,0,24,0,3,1,0,0,2,0,1,0,2,0,3,0,2,0,9,0,2,0,27,0,3,1,0,0,2,0,1,0,2,0,3,0,3,1,0,0,2,0,1,0,2,0,3,0,3,1,0,0,2,0,1,0,2,0,3,0,2,0,9,0,2,0,27,0,2,0,63,0,3,1,0,0,2,0,1,0,2,0,3,0,2,0,7,0,3,1,0,0,5,1,0,0,3,1,0,0,5,0,0,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,9,0,2,1,15,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,9,0,2,1,19,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,9,0,2,1,27,0,3,1,0,0,2,1,1,0,2,1,3,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,7,0,3,1,0,0,4,1,10,0,3,1,0,0,4,1,12,0,3,1,0,0,4,1,16,0,3,1,0,0,4,1,20,0,3,1,0,0,4,1,2,0,3,1,0,0,4,1,32,0,3,1,0,0,4,1,4,0,3,1,0,0,4,1,6,0,3,1,0,0,4,1,8,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,9,0,2,1,15,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,9,0,2,1,19,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,9,0,2,1,27,0,3,1,0,0,2,1,1,0,2,1,3,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,7,0,3,1,0,0,4,1,10,0,3,1,0,0,4,1,12,0,3,1,0,0,4,1,16,0,3,1,0,0,4,1,20,0,3,1,0,0,4,1,2,0,3,1,0,0,4,1,32,0,3,1,0,0,4,1,4,0,3,1,0,0,4,1,6,0,3,1,0,0,4,1,8,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,9,0,2,1,15,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,9,0,2,1,19,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,9,0,2,1,24,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,9,0,2,1,27,0,3,1,0,0,2,1,1,0,2,1,3,0,3,1,0,0,2,1,1,0,2,1,3,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,7,0,3,1,0,0,4,1,10,0,3,1,0,0,4,1,12,0,3,1,0,0,4,1,15,0,3,1,0,0,4,1,16,0,3,1,0,0,4,1,20,0,3,1,0,0,4,1,25,0,3,1,0,0,4,1,2,0,3,1,0,0,4,1,32,0,3,1,0,0,4,1,3,0,3,1,0,0,4,1,4,0,3,1,0,0,4,1,5,0,3,1,0,0,4,1,64,0,3,1,0,0,4,1,6,0,3,1,0,0,4,1,7,0,3,1,0,0,4,1,8,0,3,1,0,0,4,1,9,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,9,0,2,1,15,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,9,0,2,1,19,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,9,0,2,1,24,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,9,0,2,1,27,0,3,1,0,0,2,1,1,0,2,1,3,0,3,1,0,0,2,1,1,0,2,1,3,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,7,0,3,1,0,0,4,1,10,0,3,1,0,0,4,1,12,0,3,1,0,0,4,1,15,0,3,1,0,0,4,1,16,0,3,1,0,0,4,1,20,0,3,1,0,0,4,1,25,0,3,1,0,0,4,1,2,0,3,1,0,0,4,1,32,0,3,1,0,0,4,1,3,0,3,1,0,0,4,1,4,0,3,1,0,0,4,1,5,0,3,1,0,0,4,1,64,0,3,1,0,0,4,1,6,0,3,1,0,0,4,1,7,0,3,1,0,0,4,1,8,0,3,1,0,0,4,1,9,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,9,0,2,1,15,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,9,0,2,1,19,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,9,0,2,1,27,0,3,1,0,0,2,1,1,0,2,1,3,0,3,1,0,0,2,1,1,0,2,1,3,0,2,1,7,0,3,1,0,0,4,1,10,0,3,1,0,0,4,1,12,0,3,1,0,0,4,1,16,0,3,1,0,0,4,1,20,0,3,1,0,0,4,1,2,0,3,1,0,0,4,1,32,0,3,1,0,0,4,1,4,0,3,1,0,0,4,1,6,0,3,1,0,0,4,1,8,0,3,1,0,0,4,1,16,0,3,1,0,0,4,1,20,0,3,1,0,0,4,1,32,0,3,1,0,0,4,1,4,0,3,1,0,0,4,1,8,0,3,1,0,0,4,1,10,0,3,1,0,0,4,1,12,0,3,1,0,0,4,1,16,0,3,1,0,0,4,1,20,0,3,1,0,0,4,1,2,0,3,1,0,0,4,1,32,0,3,1,0,0,4,1,4,0,3,1,0,0,4,1,6,0,3,1,0,0,4,1,8,0,3,1,0,0,0,1,1,0,1,1,1,0,3,1,0,0,0,0,1,0,1,0,1,0,3,1,0,0,0,0,1,0,1,0,1,0,3,1,0,0,0,1,1,0,1,1,1,0,3,2,0,0,112,0,97,108,108,111,99,46,99,0,102,102,116,119,58,32,37,115,58,37,100,58,32,97,115,115,101,114,116,105,111,110,32,102,97,105,108,101,100,58,32,37,115,10,0,0,7,1,12,2,17,3,22,4,7,5,12,6,17,7,22,8,7,9,12,10,17,11,22,12,7,13,12,14,17,15,22,1,5,6,9,11,14,0,20,5,5,10,9,15,14,4,20,9,5,14,9,3,14,8,20,13,5,2,9,7,14,12,20,5,4,8,11,11,16,14,23,1,4,4,11,7,16,10,23,13,4,0,11,3,16,6,23,9,4,12,11,15,16,2,23,0,6,7,10,14,15,5,21,12,6,3,10,10,15,1,21,8,6,15,10,6,15,13,21,4,6,11,10,2,15,9,21,83,76,86,78,68,88,40,115,108,111,116,41,32,61,61,32,115,108,118,110,100,120,0,112,108,97,110,110,101,114,46,99,0,40,102,102,116,119,45,51,46,51,46,52,32,102,102,116,119,102,95,119,105,115,100,111,109,32,35,120,37,77,32,35,120,37,77,32,35,120,37,77,32,35,120,37,77,10,0,40,37,42,115,32,37,100,32,35,120,37,120,32,35,120,37,120,32,35,120,37,120,32,35,120,37,77,32,35,120,37,77,32,35,120,37,77,32,35,120,37,77,41,0,84,73,77,69,79,85,84,0,102,108,97,103,115,46,108,32,61,61,32,108,0,102,108,97,103,115,46,117,32,61,61,32,117,0,102,108,97,103,115,46,116,105,109,101,108,105,109,105,116,95,105,109,112,97,116,105,101,110,99,101,32,61,61,32,116,105,109,101,108,105,109,105,116,95,105,109,112,97,116,105,101,110,99,101,0,32,32,40,37,115,32,37,100,32,35,120,37,120,32,35,120,37,120,32,35,120,37,120,32,35,120,37,77,32,35,120,37,77,32,35,120,37,77,32,35,120,37,77,41,10,0,41,10,0,40,117,110,115,111,108,118,97,98,108,101,41,0,117,110,115,111,108,118,97,98,108,101,0,40,100,102,116,45,98,108,117,101,115,116,101,105,110,45,37,68,47,37,68,37,40,37,112,37,41,41,0,40,100,102,116,45,98,117,102,102,101,114,101,100,45,37,68,37,118,47,37,68,45,37,68,37,40,37,112,37,41,37,40,37,112,37,41,37,40,37,112,37,41,41,0,102,102,116,119,102,95,100,102,116,95,105,110,100,105,114,101,99,116,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,100,102,116,95,105,110,100,105,114,101,99,116,95,116,114,97,110,115,112,111,115,101,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,100,102,116,95,114,97,110,107,95,103,101,113,50,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,100,102,116,95,118,114,97,110,107,95,103,101,113,49,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,100,102,116,95,98,117,102,102,101,114,101,100,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,100,102,116,95,103,101,110,101,114,105,99,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,100,102,116,95,114,97,100,101,114,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,100,102,116,95,98,108,117,101,115,116,101,105,110,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,100,102,116,95,110,111,112,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,99,116,95,103,101,110,101,114,105,99,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,99,116,95,103,101,110,101,114,105,99,98,117,102,95,114,101,103,105,115,116,101,114,0,40,100,102,116,45,99,116,45,37,115,47,37,68,37,40,37,112,37,41,37,40,37,112,37,41,41,0,40,100,102,116,119,45,100,105,114,101,99,116,98,117,102,47,37,68,45,37,68,47,37,68,37,118,32,34,37,115,34,41,0,40,100,102,116,119,45,100,105,114,101,99,116,45,37,68,47,37,68,37,118,32,34,37,115,34,41,0,40,100,102,116,119,45,100,105,114,101,99,116,115,113,45,37,68,47,37,68,37,118,32,34,37,115,34,41,0,40,100,102,116,119,45,103,101,110,101,114,105,99,45,37,115,45,37,68,45,37,68,37,118,37,40,37,112,37,41,41,0,40,100,102,116,119,45,103,101,110,101,114,105,99,98,117,102,47,37,68,45,37,68,45,37,68,37,40,37,112,37,41,41,0,40,100,102,116,45,100,105,114,101,99,116,98,117,102,47,37,68,45,37,68,37,118,32,34,37,115,34,41,0,40,100,102,116,45,100,105,114,101,99,116,45,37,68,37,118,32,34,37,115,34,41,0,40,100,102,116,45,103,101,110,101,114,105,99,45,37,68,41,0,40,105,110,100,105,114,101,99,116,45,116,114,97,110,115,112,111,115,101,37,118,37,40,37,112,37,41,37,40,37,112,37,41,37,40,37,112,37,41,41,0,100,102,116,45,105,110,100,105,114,101,99,116,45,97,102,116,101,114,0,100,102,116,45,105,110,100,105,114,101,99,116,45,98,101,102],"i8",ALLOC_NONE,Runtime.GLOBAL_BASE+10240);allocate([111,114,101,0,40,100,102,116,45,110,111,112,41,0,40,100,102,116,32,37,100,32,37,100,32,37,100,32,37,68,32,37,68,32,37,84,32,37,84,41,0,100,102,116,0,40,100,102,116,45,114,97,100,101,114,45,37,68,37,111,105,115,61,37,111,111,115,61,37,40,37,112,37,41,0,40,100,102,116,45,114,97,110,107,62,61,50,47,37,100,37,40,37,112,37,41,37,40,37,112,37,41,41,0,40,100,102,116,45,118,114,97,110,107,62,61,49,45,120,37,68,47,37,100,37,40,37,112,37,41,41,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,51,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,55,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,57,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,49,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,49,49,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,49,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,49,51,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,49,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,49,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,54,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,50,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,110,49,95,50,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,49,95,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,49,95,51,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,49,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,49,95,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,49,95,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,49,95,55,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,49,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,49,95,57,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,49,95,49,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,49,95,49,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,49,95,49,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,49,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,49,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,49,95,54,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,49,95,50,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,49,95,50,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,50,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,50,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,50,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,50,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,50,95,54,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,50,95,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,50,95,49,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,50,95,50,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,116,50,95,50,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,113,49,95,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,113,49,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,113,49,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,113,49,95,51,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,113,49,95,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,113,49,95,54,0,110,49,95,49,48,0,110,49,95,49,49,0,110,49,95,49,50,0,110,49,95,49,51,0,110,49,95,49,52,0,110,49,95,49,53,0,110,49,95,49,54,0,110,49,95,50,48,0,110,49,95,50,53,0,110,49,95,50,0,110,49,95,51,50,0,110,49,95,51,0,110,49,95,52,0,110,49,95,53,0,110,49,95,54,52,0,110,49,95,54,0,110,49,95,55,0,110,49,95,56,0,110,49,95,57,0,113,49,95,50,0,113,49,95,51,0,113,49,95,52,0,113,49,95,53,0,113,49,95,54,0,113,49,95,56,0,116,49,95,49,48,0,116,49,95,49,50,0,116,49,95,49,53,0,116,49,95,49,54,0,116,49,95,50,48,0,116,49,95,50,53,0,116,49,95,50,0,116,49,95,51,50,0,116,49,95,51,0,116,49,95,52,0,116,49,95,53,0,116,49,95,54,52,0,116,49,95,54,0,116,49,95,55,0,116,49,95,56,0,116,49,95,57,0,116,50,95,49,48,0,116,50,95,49,54,0,116,50,95,50,48,0,116,50,95,50,53,0,116,50,95,51,50,0,116,50,95,52,0,116,50,95,53,0,116,50,95,54,52,0,116,50,95,56,0,40,114,100,102,116,50,45,98,117,102,102,101,114,101,100,45,37,68,37,118,47,37,68,45,37,68,37,40,37,112,37,41,37,40,37,112,37,41,37,40,37,112,37,41,41,0,40,114,100,102,116,45,98,117,102,102,101,114,101,100,45,37,68,37,118,47,37,68,45,37,68,37,40,37,112,37,41,37,40,37,112,37,41,37,40,37,112,37,41,41,0,102,102,116,119,102,95,114,100,102,116,95,105,110,100,105,114,101,99,116,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,100,102,116,95,114,97,110,107,48,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,100,102,116,95,118,114,97,110,107,51,95,116,114,97,110,115,112,111,115,101,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,100,102,116,95,118,114,97,110,107,95,103,101,113,49,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,100,102,116,95,110,111,112,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,100,102,116,95,98,117,102,102,101,114,101,100,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,100,102,116,95,103,101,110,101,114,105,99,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,100,102,116,95,114,97,110,107,95,103,101,113,50,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,100,102,116,95,114,50,104,99,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,100,102,116,95,100,104,116,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,100,104,116,95,114,50,104,99,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,100,104,116,95,114,97,100,101,114,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,100,102,116,50,95,118,114,97,110,107,95,103,101,113,49,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,100,102,116,50,95,110,111,112,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,100,102,116,50,95,114,97,110,107,48,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,100,102,116,50,95,98,117,102,102,101,114,101,100,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,100,102,116,50,95,114,97,110,107,95,103,101,113,50,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,100,102,116,50,95,114,100,102,116,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,104,99,50,104,99,95,103,101,110,101,114,105,99,95,114,101,103,105,115,116,101,114,0,40,104,99,50,99,45,100,105,114,101,99,116,98,117,102,47,37,68,45,37,68,47,37,68,47,37,68,37,118,32,34,37,115,34,37,40,37,112,37,41,37,40,37,112,37,41,41,0,40,104,99,50,99,45,100,105,114,101,99,116,45,37,68,47,37,68,47,37,68,37,118,32,34,37,115,34,37,40,37,112,37,41,37,40,37,112,37,41,41,0,40,114,100,102,116,50,45,99,116,45,37,115,47,37,68,37,40,37,112,37,41,37,40,37,112,37,41,41,0,40,100,102,116,45,114,50,104,99,45,37,68,37,40,37,112,37,41,41,0,40,100,104,116,45,114,50,104,99,45,37,68,37,40,37,112,37,41,41,0,40,100,104,116,45,114,97,100,101,114,45,37,68,47,37,68,37,111,105,115,61,37,111,111,115,61,37,40,37,112,37,41,0,40,114,100,102,116,45,37,115,45,100,105,114,101,99,116,98,117,102,47,37,68,45,114,50,99,45,37,68,37,118,32,34,37,115,34,41,0,40,114,100,102,116,45,37,115,45,100,105,114,101,99,116,45,114,50,99,45,37,68,37,118,32,34,37,115,34,41,0,40,114,100,102,116,45,37,115,45,100,105,114,101,99,116,45,114,50,114,45,37,68,37,118,32,34,37,115,34,41,0,40,114,100,102,116,50,45,37,115,45,100,105,114,101,99,116,45,37,68,37,118,32,34,37,115,34,41,0,40,114,100,102,116,45,103,101,110,101,114,105,99,45,37,115,45,37,68,41,0,40,104,99,50,104,99,45,100,105,114,101,99,116,98,117,102,47,37,68,45,37,68,47,37,68,37,118,32,34,37,115,34,37,40,37,112,37,41,37,40,37,112,37,41,41,0,40,104,99,50,104,99,45,100,105,114,101,99,116,45,37,68,47,37,68,37,118,32,34,37,115,34,37,40,37,112,37,41,37,40,37,112,37,41,41,0,40,104,99,50,104,99,45,103,101,110,101,114,105,99,45,37,115,45,37,68,45,37,68,37,118,37,40,37,112,37,41,37,40,37,112,37,41,41,0,100,105,116,0,100,105,102,0,40,114,100,102,116,45,99,116,45,37,115,47,37,68,37,40,37,112,37,41,37,40,37,112,37,41,41,0,40,37,115,37,40,37,112,37,41,37,40,37,112,37,41,41,0,114,100,102,116,45,105,110,100,105,114,101,99,116,45,97,102,116,101,114,0,114,100,102,116,45,105,110,100,105,114,101,99,116,45,98,101,102,111,114,101,0,40,114,100,102,116,50,45,110,111,112,41,0,40,114,100,102,116,45,110,111,112,41,0,40,114,100,102,116,50,32,37,100,32,37,100,32,37,84,32,37,84,41,0,114,100,102,116,50,0,114,50,104,99,0,0,0,0,114,50,104,99,48,49,0,0,114,50,104,99,49,48,0,0,114,50,104,99,49,49,0,0,104,99,50,114,0,0,0,0,104,99,50,114,48,49,0,0,104,99,50,114,49,48,0,0,104,99,50,114,49,49,0,0,100,104,116,0,0,0,0,0,114,101,100,102,116,48,48,0,114,101,100,102,116,48,49,0,114,101,100,102,116,49,48,0,114,101,100,102,116,49,49,0,114,111,100,102,116,48,48,0,114,111,100,102,116,48,49,0,114,111,100,102,116,49,48,0,114,111,100,102,116,49,49,0,40,114,100,102,116,32,37,100,32,37,68,32,37,84,32,37,84,0,32,37,100,0,114,100,102,116,0,40,114,100,102,116,50,45,114,97,110,107,62,61,50,47,37,100,37,40,37,112,37,41,37,40,37,112,37,41,41,0,40,114,100,102,116,45,114,97,110,107,62,61,50,47,37,100,37,40,37,112,37,41,37,40,37,112,37,41,41,0,40,114,100,102,116,50,45,104,99,50,114,45,114,97,110,107,48,37,40,37,112,37,41,41,0,40,114,100,102,116,50,45,114,50,104,99,45,114,97,110,107,48,37,118,41,0,114,100,102,116,45,114,97,110,107,48,45,109,101,109,99,112,121,0,114,100,102,116,45,114,97,110,107,48,45,109,101,109,99,112,121,45,108,111,111,112,0,114,100,102,116,45,114,97,110,107,48,45,105,116,101,114,45,99,105,0,114,100,102,116,45,114,97,110,107,48,45,105,116,101,114,45,99,111,0,114,100,102,116,45,114,97,110,107,48,45,116,105,108,101,100,0,114,100,102,116,45,114,97,110,107,48,45,116,105,108,101,100,98,117,102,0,114,100,102,116,45,114,97,110,107,48,45,105,112,45,115,113,0,114,100,102,116,45,114,97,110,107,48,45,105,112,45,115,113,45,116,105,108,101,100,0,114,100,102,116,45,114,97,110,107,48,45,105,112,45,115,113,45,116,105,108,101,100,98,117,102,0,40,37,115,47,37,68,0,37,118,0,40,37,115,45,100,104,116,45,37,68,37,40,37,112,37,41,41,0,114,50,104,99,0,104,99,50,114,0,40,114,100,102,116,50,45,114,100,102,116,45,37,115,45,37,68,37,118,47,37,68,45,37,68,37,40,37,112,37,41,37,40,37,112,37,41,41,0,40,114,100,102,116,50,45,118,114,97,110,107,62,61,49,45,120,37,68,47,37,100,37,40,37,112,37,41,41,0,40,114,100,102,116,45,118,114,97,110,107,62,61,49,45,120,37,68,47,37,100,37,40,37,112,37,41,41,0,40,37,115,45,37,68,120,37,68,37,118,0,37,40,37,112,37,41,0,41,0,114,100,102,116,45,116,114,97,110,115,112,111,115,101,45,116,111,109,115,53,49,51,0,114,100,102,116,45,116,114,97,110,115,112,111,115,101,45,99,117,116,0,114,100,102,116,45,116,114,97,110,115,112,111,115,101,45,103,99,100,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,51,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,55,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,57,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,49,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,49,49,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,49,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,49,51,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,49,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,49,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,54,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,49,50,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,50,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,95,50,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,95,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,95,51,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,95,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,95,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,95,55,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,95,57,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,95,49,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,95,49,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,95,49,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,95,54,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,95,50,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,95,50,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,50,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,50,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,50,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,50,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,50,95,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,50,95,50,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,102,50,95,50,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,73,73,95,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,73,73,95,51,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,73,73,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,73,73,95,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,73,73,95,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,73,73,95,55,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,73,73,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,73,73,95,57,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,73,73,95,49,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,73,73,95,49,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,73,73,95,49,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,73,73,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,73,73,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,73,73,95,54,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,73,73,95,50,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,102,73,73,95,50,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,95,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,95,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,95,49,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,95,49,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,95,50,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,50,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,50,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,50,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,50,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,50,95,50,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,100,102,116,95,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,100,102,116,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,100,102,116,95,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,100,102,116,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,100,102,116,95,49,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,100,102,116,95,49,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,100,102,116,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,100,102,116,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,100,102,116,95,50,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,100,102,116,50,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,100,102,116,50,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,100,102,116,50,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,100,102,116,50,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,102,100,102,116,50,95,50,48,0,104,99,50,99,102,50,95,49,54,0,104,99,50,99,102,50,95,50,48,0,104,99,50,99,102,50,95,51,50,0,104,99,50,99,102,50,95,52,0,104,99,50,99,102,50,95,56,0,104,99,50,99,102,95,49,48,0,104,99,50,99,102,95,49,50,0,104,99,50,99,102,95,49,54,0,104,99,50,99,102,95,50,48,0,104,99,50,99,102,95,50,0,104,99,50,99,102,95,51,50,0,104,99,50,99,102,95,52,0,104,99,50,99,102,95,54,0,104,99,50,99,102,95,56,0,104,99,50,99,102,100,102,116,50,95,49,54,0,104,99,50,99,102,100,102,116,50,95,50,48,0,104,99,50,99,102,100,102,116,50,95,51,50,0,104,99,50,99,102,100,102,116,50,95,52,0,104,99,50,99,102,100,102,116,50,95,56,0,104,99,50,99,102,100,102,116,95,49,48,0,104,99,50,99,102,100,102,116,95,49,50,0,104,99,50,99,102,100,102,116,95,49,54,0,104,99,50,99,102,100,102,116,95,50,48,0,104,99,50,99,102,100,102,116,95,50,0,104,99,50,99,102,100,102,116,95,51,50,0,104,99,50,99,102,100,102,116,95,52,0,104,99,50,99,102,100,102,116,95,54,0,104,99,50,99,102,100,102,116,95,56,0,104,102,50,95,49,54,0,104,102,50,95,50,48,0,104,102,50,95,50,53,0,104,102,50,95,51,50,0,104,102,50,95,52,0,104,102,50,95,53,0,104,102,50,95,56,0,104,102,95,49,48,0,104,102,95,49,50,0,104,102,95,49,53,0,104,102,95,49,54,0,104,102,95,50,48,0,104,102,95,50,53,0,104,102,95,50,0,104,102,95,51,50,0,104,102,95,51,0,104,102,95,52,0,104,102,95,53,0,104,102,95,54,52,0,104,102,95,54,0,104,102,95,55,0,104,102,95,56,0,104,102,95,57,0,114,50,99,102,73,73,95,49,48,0,114,50,99,102,73,73,95,49,50,0,114,50,99,102,73,73,95,49,53,0,114,50,99,102,73,73,95,49,54,0,114,50,99,102,73,73,95,50,48,0,114,50,99,102,73,73,95,50,53,0,114,50,99,102,73,73,95,50,0,114,50,99,102,73,73,95,51,50,0,114,50,99,102,73,73,95,51,0,114,50,99,102,73,73,95,52,0,114,50,99,102,73,73,95,53,0,114,50,99,102,73,73,95,54,52,0,114,50,99,102,73,73,95,54,0,114,50,99,102,73,73,95,55,0,114,50,99,102,73,73,95,56,0,114,50,99,102,73,73,95,57,0,114,50,99,102,95,49,48,0,114,50,99,102,95,49,49,0,114,50,99,102,95,49,50,56,0,114,50,99,102,95,49,50,0,114,50,99,102,95,49,51,0,114,50,99,102,95,49,52,0,114,50,99,102,95,49,53,0,114,50,99,102,95,49,54,0,114,50,99,102,95,50,48,0,114,50,99,102,95,50,53,0,114,50,99,102,95,50,0,114,50,99,102,95,51,50,0,114,50,99,102,95,51,0,114,50,99,102,95,52,0,114,50,99,102,95,53,0,114,50,99,102,95,54,52,0,114,50,99,102,95,54,0,114,50,99,102,95,55,0,114,50,99,102,95,56,0,114,50,99,102,95,57,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,51,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,55,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,57,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,49,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,49,49,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,49,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,49,51,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,49,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,49,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,54,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,49,50,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,50,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,95,50,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,95,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,95,51,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,95,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,95,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,95,55,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,95,57,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,95,49,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,95,49,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,95,49,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,95,54,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,95,50,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,95,50,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,50,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,50,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,50,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,50,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,50,95,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,50,95,50,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,98,50,95,50,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,73,73,73,95,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,73,73,73,95,51,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,73,73,73,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,73,73,73,95,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,73,73,73,95,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,73,73,73,95,55,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,73,73,73,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,73,73,73,95,57,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,73,73,73,95,49,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,73,73,73,95,49,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,73,73,73,95,49,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,73,73,73,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,73,73,73,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,73,73,73,95,54,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,73,73,73,95,50,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,114,50,99,98,73,73,73,95,50,53,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,95,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,95,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,95,49,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,95,49,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,95,50,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,50,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,50,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,50,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,50,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,50,95,50,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,100,102,116,95,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,100,102,116,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,100,102,116,95,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,100,102,116,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,100,102,116,95,49,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,100,102,116,95,49,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,100,102,116,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,100,102,116,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,100,102,116,95,50,48,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,100,102,116,50,95,52,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,100,102,116,50,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,100,102,116,50,95,49,54,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,100,102,116,50,95,51,50,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,104,99,50,99,98,100,102,116,50,95,50,48,0,104,98,50,95,49,54,0,104,98,50,95,50,48,0,104,98,50,95,50,53,0,104,98,50,95,51,50,0,104,98,50,95,52,0,104,98,50,95,53,0,104,98,50,95,56,0,104,98,95,49,48,0,104,98,95,49,50,0,104,98,95,49,53,0,104,98,95,49,54,0,104,98,95,50,48,0,104,98,95,50,53,0,104,98,95,50,0,104,98,95,51,50,0,104,98,95,51,0,104,98,95,52,0,104,98,95,53,0,104,98,95,54,52,0,104,98,95,54,0,104,98,95,55,0,104,98,95,56,0,104,98,95,57,0,104,99,50,99,98,50,95,49,54,0,104,99,50,99,98,50,95,50,48,0,104,99,50,99,98,50,95,51,50,0,104,99,50,99,98,50,95,52,0,104,99,50,99,98,50,95,56,0,104,99,50,99,98,95,49,48,0,104,99,50,99,98,95,49,50,0,104,99,50,99,98,95,49,54,0,104,99,50,99,98,95,50,48,0,104,99,50,99,98,95,50,0,104,99,50,99,98,95,51,50,0,104,99,50,99,98,95,52,0,104,99,50,99,98,95,54,0,104,99,50,99,98,95,56,0,104,99,50,99,98,100,102,116,50,95,49,54,0,104,99,50,99,98,100,102,116,50,95,50,48,0,104,99,50,99,98,100,102,116,50,95,51,50,0,104,99,50,99,98,100,102,116,50,95,52,0,104,99,50,99,98,100,102,116,50,95,56,0,104,99,50,99,98,100,102,116,95,49,48,0,104,99,50,99,98,100,102,116,95,49,50,0,104,99,50,99,98,100,102,116,95,49,54,0,104,99,50,99,98,100,102,116,95,50,48,0,104,99,50,99,98,100,102,116,95,50,0,104,99,50,99,98,100,102,116,95,51,50,0,104,99,50,99,98,100,102,116,95,52,0,104,99,50,99,98,100,102,116,95,54,0,104,99,50,99,98,100,102,116,95,56,0,114,50,99,98,73,73,73,95,49,48,0,114,50,99,98,73,73,73,95,49,50,0,114,50,99,98,73,73,73,95,49,53,0,114,50,99,98,73,73,73,95,49,54,0,114,50,99,98,73,73,73,95,50,48,0,114,50,99,98,73,73,73,95,50,53,0,114,50,99,98,73,73,73,95,50,0,114,50,99,98,73,73,73,95,51,50,0,114,50,99,98,73,73,73,95,51,0,114,50,99,98,73,73,73,95,52,0,114,50,99,98,73,73,73,95,53,0,114,50,99,98,73,73,73,95,54,52,0,114,50,99,98,73,73,73,95,54,0,114,50,99,98,73,73,73,95,55,0,114,50,99,98,73,73,73,95,56,0,114,50,99,98,73,73,73,95,57,0,114,50,99,98,95,49,48,0,114,50,99,98,95,49,49,0,114,50,99,98,95,49,50,56,0,114,50,99,98,95,49,50,0,114,50,99,98,95,49,51,0,114,50,99,98,95,49,52,0,114,50,99,98,95,49,53,0,114,50,99,98,95,49,54,0,114,50,99,98,95,50,48,0,114,50,99,98,95,50,53,0,114,50,99,98,95,50,0,114,50,99,98,95,51,50,0,114,50,99,98,95,51,0,114,50,99,98,95,52,0,114,50,99,98,95,53,0,114,50,99,98,95,54,52,0,114,50,99,98,95,54,0,114,50,99,98,95,55,0,114,50,99,98,95,56,0,114,50,99,98,95,57,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,101,48,49,95,56,0,102,102,116,119,102,95,99,111,100,101,108,101,116,95,101,49,48,95,56,0,101,48,49,95,56,0,101,49,48,95,56,0,102,102,116,119,102,95,114,101,100,102,116,48,48,101,95,114,50,104,99,95,112,97,100,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,111,100,102,116,48,48,101,95,114,50,104,99,95,112,97,100,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,101,111,100,102,116,48,48,101,95,115,112,108,105,116,114,97,100,105,120,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,101,111,100,102,116,48,49,48,101,95,114,50,104,99,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,101,111,100,102,116,49,49,101,95,114,97,100,105,120,50,95,114,50,104,99,95,114,101,103,105,115,116,101,114,0,102,102,116,119,102,95,114,101,111,100,102,116,49,49,101,95,114,50,104,99,95,111,100,100,95,114,101,103,105,115,116,101,114,0,40,114,101,100,102,116,48,48,101,45,114,50,104,99,45,112,97,100,45,37,68,37,118,37,40,37,112,37,41,37,40,37,112,37,41,41,0,40,114,101,100,102,116,48,48,101,45,115,112,108,105,116,114,97,100,105,120,45,37,68,37,118,37,40,37,112,37,41,37,40,37,112,37,41,41,0,40,114,111,100,102,116,48,48,101,45,115,112,108,105,116,114,97,100,105,120,45,37,68,37,118,37,40,37,112,37,41,37,40,37,112,37,41,41,0,40,37,115,101,45,114,50,104,99,45,37,68,37,118,37,40,37,112,37,41,41,0,40,37,115,101,45,114,50,104,99,45,111,100,100,45,37,68,37,118,37,40,37,112,37,41,41,0,40,37,115,101,45,114,97,100,105,120,50,45,114,50,104,99,45,37,68,37,118,37,40,37,112,37,41,41,0,40,114,111,100,102,116,48,48,101,45,114,50,104,99,45,112,97,100,45,37,68,37,118,37,40,37,112,37,41,37,40,37,112,37,41,41,0,84,33,34,25,13,1,2,3,17,75,28,12,16,4,11,29,18,30,39,104,110,111,112,113,98,32,5,6,15,19,20,21,26,8,22,7,40,36,23,24,9,10,14,27,31,37,35,131,130,125,38,42,43,60,61,62,63,67,71,74,77,88,89,90,91,92,93,94,95,96,97,99,100,101,102,103,105,106,107,108,114,115,116,121,122,123,124,0,73,108,108,101,103,97,108,32,98,121,116,101,32,115,101,113,117,101,110,99,101,0,68,111,109,97,105,110,32,101,114,114,111,114,0,82,101,115,117,108,116,32,110,111,116,32,114,101,112,114,101,115,101,110,116,97,98,108,101,0,78,111,116,32,97,32,116,116,121,0,80,101,114,109,105,115,115,105,111,110,32,100,101,110,105,101,100,0,79,112,101,114,97,116,105,111,110,32,110,111,116,32,112,101,114,109,105,116,116,101,100,0,78,111,32,115,117,99,104,32,102,105,108,101,32,111,114,32,100,105,114,101,99,116,111,114,121,0,78,111,32,115,117,99,104,32,112,114,111,99,101,115,115,0,70,105,108,101,32,101,120,105,115,116,115,0,86,97,108,117,101,32,116,111,111,32,108,97,114,103,101,32,102,111,114,32,100,97,116,97,32,116,121,112,101,0,78,111,32,115,112,97,99,101,32,108,101,102,116,32,111,110,32,100,101,118,105,99,101,0,79,117,116,32,111,102,32,109,101,109,111,114,121,0,82,101,115,111,117,114,99,101,32,98,117,115,121,0,73,110,116,101,114,114,117,112,116,101,100,32,115,121,115,116,101,109,32,99,97,108,108,0,82,101,115,111,117,114,99,101,32,116,101,109,112,111,114,97,114,105,108,121,32,117,110,97,118,97,105,108,97,98,108,101,0,73,110,118,97,108,105,100,32,115,101,101,107,0,67,114,111,115,115,45,100,101,118,105,99,101,32,108,105,110,107,0,82,101,97,100,45,111,110,108,121,32,102,105,108,101,32,115,121,115,116,101,109,0,68,105,114,101,99,116,111,114,121,32,110,111,116,32,101,109,112,116,121,0,67,111,110,110,101,99,116,105,111,110,32,114,101,115,101,116,32,98,121,32,112,101,101,114,0,79,112,101,114,97,116,105,111,110,32,116,105,109,101,100,32,111,117,116,0,67,111,110,110,101,99,116,105,111,110,32,114,101,102,117,115,101,100,0,72,111,115,116,32,105,115,32,100,111,119,110,0,72,111,115,116,32,105,115,32,117,110,114,101,97,99,104,97,98,108,101,0,65,100,100,114,101,115,115,32,105,110,32,117,115,101,0,66,114,111,107,101,110,32,112,105,112,101,0,73,47,79,32,101,114,114,111,114,0,78,111,32,115,117,99,104,32,100,101,118,105,99,101,32,111,114,32,97,100,100,114,101,115,115,0,66,108,111,99,107,32,100,101,118,105,99,101,32,114,101,113,117,105,114,101,100,0,78,111,32,115,117,99,104,32,100,101,118,105,99,101,0,78,111,116,32,97,32,100,105,114,101,99,116,111,114,121,0,73,115,32,97,32,100,105,114,101,99,116,111,114,121,0,84,101,120,116,32,102,105,108,101,32,98,117,115,121,0,69,120,101,99,32,102,111,114,109,97,116,32,101,114,114,111,114,0,73,110,118,97,108,105,100,32,97,114,103,117,109,101,110,116,0,65,114,103,117,109,101,110,116,32,108,105,115,116,32,116,111,111,32,108,111,110,103,0,83,121,109,98,111,108,105,99,32,108,105,110,107,32,108,111,111,112,0,70,105,108,101,110,97,109,101,32,116,111,111,32,108,111,110,103,0,84,111,111,32,109,97,110,121,32,111,112,101,110,32,102,105,108,101,115,32,105,110,32,115,121,115,116,101,109,0,78,111,32,102,105,108,101,32,100,101,115,99,114,105,112,116,111,114,115,32,97,118,97,105,108,97,98,108,101,0,66,97,100,32,102,105,108,101,32,100,101,115,99,114,105,112,116,111,114,0,78,111,32,99,104,105,108,100,32,112,114,111,99,101,115,115,0,66,97,100,32,97,100,100,114,101,115,115,0,70,105,108,101,32,116,111,111,32,108,97,114,103,101,0,84,111,111,32,109,97,110,121,32,108,105,110,107,115,0,78,111,32,108,111,99,107,115,32,97,118,97,105,108,97,98,108,101,0,82,101,115,111,117,114,99,101,32,100,101,97,100,108,111,99,107,32,119,111,117,108,100,32,111,99,99,117,114,0,83,116,97,116,101,32,110,111,116,32,114,101,99,111,118,101,114,97,98,108,101,0,80,114,101,118,105,111,117,115,32,111,119,110,101,114,32,100,105,101,100,0,79,112,101,114,97,116,105,111,110,32,99,97,110,99,101,108,101,100,0,70,117,110,99,116,105,111,110,32,110,111,116,32,105,109,112,108,101,109,101,110,116,101,100,0,78,111,32],"i8",ALLOC_NONE,Runtime.GLOBAL_BASE+20480);allocate([109,101,115,115,97,103,101,32,111,102,32,100,101,115,105,114,101,100,32,116,121,112,101,0,73,100,101,110,116,105,102,105,101,114,32,114,101,109,111,118,101,100,0,68,101,118,105,99,101,32,110,111,116,32,97,32,115,116,114,101,97,109,0,78,111,32,100,97,116,97,32,97,118,97,105,108,97,98,108,101,0,68,101,118,105,99,101,32,116,105,109,101,111,117,116,0,79,117,116,32,111,102,32,115,116,114,101,97,109,115,32,114,101,115,111,117,114,99,101,115,0,76,105,110,107,32,104,97,115,32,98,101,101,110,32,115,101,118,101,114,101,100,0,80,114,111,116,111,99,111,108,32,101,114,114,111,114,0,66,97,100,32,109,101,115,115,97,103,101,0,70,105,108,101,32,100,101,115,99,114,105,112,116,111,114,32,105,110,32,98,97,100,32,115,116,97,116,101,0,78,111,116,32,97,32,115,111,99,107,101,116,0,68,101,115,116,105,110,97,116,105,111,110,32,97,100,100,114,101,115,115,32,114,101,113,117,105,114,101,100,0,77,101,115,115,97,103,101,32,116,111,111,32,108,97,114,103,101,0,80,114,111,116,111,99,111,108,32,119,114,111,110,103,32,116,121,112,101,32,102,111,114,32,115,111,99,107,101,116,0,80,114,111,116,111,99,111,108,32,110,111,116,32,97,118,97,105,108,97,98,108,101,0,80,114,111,116,111,99,111,108,32,110,111,116,32,115,117,112,112,111,114,116,101,100,0,83,111,99,107,101,116,32,116,121,112,101,32,110,111,116,32,115,117,112,112,111,114,116,101,100,0,78,111,116,32,115,117,112,112,111,114,116,101,100,0,80,114,111,116,111,99,111,108,32,102,97,109,105,108,121,32,110,111,116,32,115,117,112,112,111,114,116,101,100,0,65,100,100,114,101,115,115,32,102,97,109,105,108,121,32,110,111,116,32,115,117,112,112,111,114,116,101,100,32,98,121,32,112,114,111,116,111,99,111,108,0,65,100,100,114,101,115,115,32,110,111,116,32,97,118,97,105,108,97,98,108,101,0,78,101,116,119,111,114,107,32,105,115,32,100,111,119,110,0,78,101,116,119,111,114,107,32,117,110,114,101,97,99,104,97,98,108,101,0,67,111,110,110,101,99,116,105,111,110,32,114,101,115,101,116,32,98,121,32,110,101,116,119,111,114,107,0,67,111,110,110,101,99,116,105,111,110,32,97,98,111,114,116,101,100,0,78,111,32,98,117,102,102,101,114,32,115,112,97,99,101,32,97,118,97,105,108,97,98,108,101,0,83,111,99,107,101,116,32,105,115,32,99,111,110,110,101,99,116,101,100,0,83,111,99,107,101,116,32,110,111,116,32,99,111,110,110,101,99,116,101,100,0,67,97,110,110,111,116,32,115,101,110,100,32,97,102,116,101,114,32,115,111,99,107,101,116,32,115,104,117,116,100,111,119,110,0,79,112,101,114,97,116,105,111,110,32,97,108,114,101,97,100,121,32,105,110,32,112,114,111,103,114,101,115,115,0,79,112,101,114,97,116,105,111,110,32,105,110,32,112,114,111,103,114,101,115,115,0,83,116,97,108,101,32,102,105,108,101,32,104,97,110,100,108,101,0,82,101,109,111,116,101,32,73,47,79,32,101,114,114,111,114,0,81,117,111,116,97,32,101,120,99,101,101,100,101,100,0,78,111,32,109,101,100,105,117,109,32,102,111,117,110,100,0,87,114,111,110,103,32,109,101,100,105,117,109,32,116,121,112,101,0,78,111,32,101,114,114,111,114,32,105,110,102,111,114,109,97,116,105,111,110],"i8",ALLOC_NONE,Runtime.GLOBAL_BASE+30720);allocate([17,0,10,0,17,17,17,0,0,0,0,5,0,0,0,0,0,0,9,0,0,0,0,11,0,0,0,0,0,0,0,0,17,0,15,10,17,17,17,3,10,7,0,1,19,9,11,11,0,0,9,6,11,0,0,11,0,6,17,0,0,0,17,17,17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,17,0,10,10,17,17,17,0,10,0,0,2,0,9,11,0,0,0,9,0,11,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,12,0,0,0,0,9,12,0,0,0,0,0,12,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,13,0,0,0,4,13,0,0,0,0,9,14,0,0,0,0,0,14,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,15,0,0,0,0,9,16,0,0,0,0,0,16,0,0,16,0,0,18,0,0,0,18,18,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,0,0,0,18,18,18,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,10,0,0,0,0,9,11,0,0,0,0,0,11,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,12,0,0,0,0,9,12,0,0,0,0,0,12,0,0,12,0,0,48,49,50,51,52,53,54,55,56,57,65,66,67,68,69,70,45,43,32,32,32,48,88,48,120,0,40,110,117,108,108,41,0,45,48,88,43,48,88,32,48,88,45,48,120,43,48,120,32,48,120,0,105,110,102,0,73,78,70,0,110,97,110,0,78,65,78,0,46,0],"i8",ALLOC_NONE,Runtime.GLOBAL_BASE+32571);var tempDoublePtr=Runtime.alignMemory(allocate(12,"i8",ALLOC_STATIC),8);assert(tempDoublePtr%8==0);function copyTempFloat(ptr){HEAP8[tempDoublePtr]=HEAP8[ptr];HEAP8[tempDoublePtr+1]=HEAP8[ptr+1];HEAP8[tempDoublePtr+2]=HEAP8[ptr+2];HEAP8[tempDoublePtr+3]=HEAP8[ptr+3]}function copyTempDouble(ptr){HEAP8[tempDoublePtr]=HEAP8[ptr];HEAP8[tempDoublePtr+1]=HEAP8[ptr+1];HEAP8[tempDoublePtr+2]=HEAP8[ptr+2];HEAP8[tempDoublePtr+3]=HEAP8[ptr+3];HEAP8[tempDoublePtr+4]=HEAP8[ptr+4];HEAP8[tempDoublePtr+5]=HEAP8[ptr+5];HEAP8[tempDoublePtr+6]=HEAP8[ptr+6];HEAP8[tempDoublePtr+7]=HEAP8[ptr+7]}function _emscripten_get_now(){if(!_emscripten_get_now.actual){if(ENVIRONMENT_IS_NODE){_emscripten_get_now.actual=function _emscripten_get_now_actual(){var t=process["hrtime"]();return t[0]*1e3+t[1]/1e6}}else if(typeof dateNow!=="undefined"){_emscripten_get_now.actual=dateNow}else if(typeof self==="object"&&self["performance"]&&typeof self["performance"]["now"]==="function"){_emscripten_get_now.actual=function _emscripten_get_now_actual(){return self["performance"]["now"]()}}else if(typeof performance==="object"&&typeof performance["now"]==="function"){_emscripten_get_now.actual=function _emscripten_get_now_actual(){return performance["now"]()}}else{_emscripten_get_now.actual=Date.now}}return _emscripten_get_now.actual()}function _emscripten_get_now_is_monotonic(){return ENVIRONMENT_IS_NODE||typeof dateNow!=="undefined"||(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER)&&self["performance"]&&self["performance"]["now"]}var ERRNO_CODES={EPERM:1,ENOENT:2,ESRCH:3,EINTR:4,EIO:5,ENXIO:6,E2BIG:7,ENOEXEC:8,EBADF:9,ECHILD:10,EAGAIN:11,EWOULDBLOCK:11,ENOMEM:12,EACCES:13,EFAULT:14,ENOTBLK:15,EBUSY:16,EEXIST:17,EXDEV:18,ENODEV:19,ENOTDIR:20,EISDIR:21,EINVAL:22,ENFILE:23,EMFILE:24,ENOTTY:25,ETXTBSY:26,EFBIG:27,ENOSPC:28,ESPIPE:29,EROFS:30,EMLINK:31,EPIPE:32,EDOM:33,ERANGE:34,ENOMSG:42,EIDRM:43,ECHRNG:44,EL2NSYNC:45,EL3HLT:46,EL3RST:47,ELNRNG:48,EUNATCH:49,ENOCSI:50,EL2HLT:51,EDEADLK:35,ENOLCK:37,EBADE:52,EBADR:53,EXFULL:54,ENOANO:55,EBADRQC:56,EBADSLT:57,EDEADLOCK:35,EBFONT:59,ENOSTR:60,ENODATA:61,ETIME:62,ENOSR:63,ENONET:64,ENOPKG:65,EREMOTE:66,ENOLINK:67,EADV:68,ESRMNT:69,ECOMM:70,EPROTO:71,EMULTIHOP:72,EDOTDOT:73,EBADMSG:74,ENOTUNIQ:76,EBADFD:77,EREMCHG:78,ELIBACC:79,ELIBBAD:80,ELIBSCN:81,ELIBMAX:82,ELIBEXEC:83,ENOSYS:38,ENOTEMPTY:39,ENAMETOOLONG:36,ELOOP:40,EOPNOTSUPP:95,EPFNOSUPPORT:96,ECONNRESET:104,ENOBUFS:105,EAFNOSUPPORT:97,EPROTOTYPE:91,ENOTSOCK:88,ENOPROTOOPT:92,ESHUTDOWN:108,ECONNREFUSED:111,EADDRINUSE:98,ECONNABORTED:103,ENETUNREACH:101,ENETDOWN:100,ETIMEDOUT:110,EHOSTDOWN:112,EHOSTUNREACH:113,EINPROGRESS:115,EALREADY:114,EDESTADDRREQ:89,EMSGSIZE:90,EPROTONOSUPPORT:93,ESOCKTNOSUPPORT:94,EADDRNOTAVAIL:99,ENETRESET:102,EISCONN:106,ENOTCONN:107,ETOOMANYREFS:109,EUSERS:87,EDQUOT:122,ESTALE:116,ENOTSUP:95,ENOMEDIUM:123,EILSEQ:84,EOVERFLOW:75,ECANCELED:125,ENOTRECOVERABLE:131,EOWNERDEAD:130,ESTRPIPE:86};function ___setErrNo(value){if(Module["___errno_location"])HEAP32[Module["___errno_location"]()>>2]=value;return value}function _clock_gettime(clk_id,tp){var now;if(clk_id===0){now=Date.now()}else if(clk_id===1&&_emscripten_get_now_is_monotonic()){now=_emscripten_get_now()}else{___setErrNo(ERRNO_CODES.EINVAL);return-1}HEAP32[tp>>2]=now/1e3|0;HEAP32[tp+4>>2]=now%1e3*1e3*1e3|0;return 0}var _BDtoIHigh=true;Module["_i64Subtract"]=_i64Subtract;function _sysconf(name){switch(name){case 30:return PAGE_SIZE;case 85:return totalMemory/PAGE_SIZE;case 132:case 133:case 12:case 137:case 138:case 15:case 235:case 16:case 17:case 18:case 19:case 20:case 149:case 13:case 10:case 236:case 153:case 9:case 21:case 22:case 159:case 154:case 14:case 77:case 78:case 139:case 80:case 81:case 82:case 68:case 67:case 164:case 11:case 29:case 47:case 48:case 95:case 52:case 51:case 46:return 200809;case 79:return 0;case 27:case 246:case 127:case 128:case 23:case 24:case 160:case 161:case 181:case 182:case 242:case 183:case 184:case 243:case 244:case 245:case 165:case 178:case 179:case 49:case 50:case 168:case 169:case 175:case 170:case 171:case 172:case 97:case 76:case 32:case 173:case 35:return-1;case 176:case 177:case 7:case 155:case 8:case 157:case 125:case 126:case 92:case 93:case 129:case 130:case 131:case 94:case 91:return 1;case 74:case 60:case 69:case 70:case 4:return 1024;case 31:case 42:case 72:return 32;case 87:case 26:case 33:return 2147483647;case 34:case 1:return 47839;case 38:case 36:return 99;case 43:case 37:return 2048;case 0:return 2097152;case 3:return 65536;case 28:return 32768;case 44:return 32767;case 75:return 16384;case 39:return 1e3;case 89:return 700;case 71:return 256;case 40:return 255;case 2:return 100;case 180:return 64;case 25:return 20;case 5:return 16;case 6:return 6;case 73:return 4;case 84:{if(typeof navigator==="object")return navigator["hardwareConcurrency"]||1;return 1}}___setErrNo(ERRNO_CODES.EINVAL);return-1}function _pthread_cleanup_push(routine,arg){__ATEXIT__.push((function(){Runtime.dynCall("vi",routine,[arg])}));_pthread_cleanup_push.level=__ATEXIT__.length}Module["_memset"]=_memset;var _BDtoILow=true;Module["_bitshift64Lshr"]=_bitshift64Lshr;Module["_bitshift64Shl"]=_bitshift64Shl;function _pthread_cleanup_pop(){assert(_pthread_cleanup_push.level==__ATEXIT__.length,"cannot pop if something else added meanwhile!");__ATEXIT__.pop();_pthread_cleanup_push.level=__ATEXIT__.length}function _abort(){Module["abort"]()}function _pthread_self(){return 0}function ___lock(){}function ___unlock(){}var _log=Math_log;var _cos=Math_cos;Module["_i64Add"]=_i64Add;function _sbrk(bytes){var self=_sbrk;if(!self.called){DYNAMICTOP=alignMemoryPage(DYNAMICTOP);self.called=true;assert(Runtime.dynamicAlloc);self.alloc=Runtime.dynamicAlloc;Runtime.dynamicAlloc=(function(){abort("cannot dynamically allocate, sbrk now has control")})}var ret=DYNAMICTOP;if(bytes!=0){var success=self.alloc(bytes);if(!success)return-1>>>0}return ret}function _emscripten_memcpy_big(dest,src,num){HEAPU8.set(HEAPU8.subarray(src,src+num),dest);return dest}Module["_memcpy"]=_memcpy;Module["_memmove"]=_memmove;function _gettimeofday(ptr){var now=Date.now();HEAP32[ptr>>2]=now/1e3|0;HEAP32[ptr+4>>2]=now%1e3*1e3|0;return 0}var _BItoD=true;function _time(ptr){var ret=Date.now()/1e3|0;if(ptr){HEAP32[ptr>>2]=ret}return ret}var _sin=Math_sin;var SYSCALLS={varargs:0,get:(function(varargs){SYSCALLS.varargs+=4;var ret=HEAP32[SYSCALLS.varargs-4>>2];return ret}),getStr:(function(){var ret=Pointer_stringify(SYSCALLS.get());return ret}),get64:(function(){var low=SYSCALLS.get(),high=SYSCALLS.get();if(low>=0)assert(high===0);else assert(high===-1);return low}),getZero:(function(){assert(SYSCALLS.get()===0)})};function ___syscall140(which,varargs){SYSCALLS.varargs=varargs;try{var stream=SYSCALLS.getStreamFromFD(),offset_high=SYSCALLS.get(),offset_low=SYSCALLS.get(),result=SYSCALLS.get(),whence=SYSCALLS.get();var offset=offset_low;assert(offset_high===0);FS.llseek(stream,offset,whence);HEAP32[result>>2]=stream.position;if(stream.getdents&&offset===0&&whence===0)stream.getdents=null;return 0}catch(e){if(typeof FS==="undefined"||!(e instanceof FS.ErrnoError))abort(e);return-e.errno}}function ___syscall6(which,varargs){SYSCALLS.varargs=varargs;try{var stream=SYSCALLS.getStreamFromFD();FS.close(stream);return 0}catch(e){if(typeof FS==="undefined"||!(e instanceof FS.ErrnoError))abort(e);return-e.errno}}function ___syscall146(which,varargs){SYSCALLS.varargs=varargs;try{var stream=SYSCALLS.get(),iov=SYSCALLS.get(),iovcnt=SYSCALLS.get();var ret=0;if(!___syscall146.buffer)___syscall146.buffer=[];var buffer=___syscall146.buffer;for(var i=0;i>2];var len=HEAP32[iov+(i*8+4)>>2];for(var j=0;j>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Aa>>2]=f;c[o>>2]=h;c[p>>2]=j;g[za+272>>2]=.22252093255519867;g[za+268>>2]=.9009688496589661;g[za+264>>2]=.6234897971153259;g[za+260>>2]=.4338837265968323;g[za+256>>2]=.9749279022216797;g[za+252>>2]=.7818315029144287;c[ya>>2]=c[Aa>>2];c[m>>2]=(c[m>>2]|0)+(((c[Aa>>2]|0)-1|0)*12<<2);while(1){if((c[ya>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[T>>2]=+g[c[l>>2]>>2];g[t>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[v>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[s>>2]=+g[c[m>>2]>>2];g[u>>2]=+g[(c[m>>2]|0)+4>>2];g[w>>2]=+g[s>>2]*+g[t>>2]+ +g[u>>2]*+g[v>>2];g[A>>2]=+g[s>>2]*+g[v>>2]-+g[u>>2]*+g[t>>2];g[y>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[_>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[x>>2]=+g[(c[m>>2]|0)+40>>2];g[z>>2]=+g[(c[m>>2]|0)+44>>2];g[$>>2]=+g[x>>2]*+g[y>>2]+ +g[z>>2]*+g[_>>2];g[B>>2]=+g[x>>2]*+g[_>>2]-+g[z>>2]*+g[y>>2];g[aa>>2]=+g[w>>2]+ +g[$>>2];g[V>>2]=+g[A>>2]+ +g[B>>2];g[C>>2]=+g[A>>2]-+g[B>>2];g[O>>2]=+g[$>>2]-+g[w>>2];g[ca>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[ea>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[ba>>2]=+g[(c[m>>2]|0)+8>>2];g[da>>2]=+g[(c[m>>2]|0)+12>>2];g[fa>>2]=+g[ba>>2]*+g[ca>>2]+ +g[da>>2]*+g[ea>>2];g[G>>2]=+g[ba>>2]*+g[ea>>2]-+g[da>>2]*+g[ca>>2];g[ha>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[ja>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[ga>>2]=+g[(c[m>>2]|0)+32>>2];g[ia>>2]=+g[(c[m>>2]|0)+36>>2];g[ka>>2]=+g[ga>>2]*+g[ha>>2]+ +g[ia>>2]*+g[ja>>2];g[H>>2]=+g[ga>>2]*+g[ja>>2]-+g[ia>>2]*+g[ha>>2];g[la>>2]=+g[fa>>2]+ +g[ka>>2];g[S>>2]=+g[G>>2]+ +g[H>>2];g[I>>2]=+g[G>>2]-+g[H>>2];g[P>>2]=+g[fa>>2]-+g[ka>>2];g[na>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[pa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ma>>2]=+g[(c[m>>2]|0)+16>>2];g[oa>>2]=+g[(c[m>>2]|0)+20>>2];g[qa>>2]=+g[ma>>2]*+g[na>>2]+ +g[oa>>2]*+g[pa>>2];g[D>>2]=+g[ma>>2]*+g[pa>>2]-+g[oa>>2]*+g[na>>2];g[sa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ua>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ra>>2]=+g[(c[m>>2]|0)+24>>2];g[ta>>2]=+g[(c[m>>2]|0)+28>>2];g[va>>2]=+g[ra>>2]*+g[sa>>2]+ +g[ta>>2]*+g[ua>>2];g[E>>2]=+g[ra>>2]*+g[ua>>2]-+g[ta>>2]*+g[sa>>2];g[wa>>2]=+g[qa>>2]+ +g[va>>2];g[U>>2]=+g[D>>2]+ +g[E>>2];g[F>>2]=+g[D>>2]-+g[E>>2];g[Q>>2]=+g[va>>2]-+g[qa>>2];g[c[k>>2]>>2]=+g[q>>2]+ +g[aa>>2]+ +g[la>>2]+ +g[wa>>2];g[L>>2]=+g[C>>2]*.7818315029144287+ +g[I>>2]*.9749279022216797+ +g[F>>2]*.4338837265968323;g[K>>2]=+g[aa>>2]*.6234897971153259+ +g[q>>2]+-(+g[wa>>2]*.9009688496589661+ +g[la>>2]*.22252093255519867);g[c[l>>2]>>2]=+g[K>>2]-+g[L>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[K>>2]+ +g[L>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[V>>2]+ +g[S>>2]+ +g[U>>2]+ +g[T>>2];g[Z>>2]=+g[O>>2]*.7818315029144287+ +g[Q>>2]*.4338837265968323-+g[P>>2]*.9749279022216797;g[r>>2]=+g[V>>2]*.6234897971153259+ +g[T>>2]+-(+g[U>>2]*.9009688496589661+ +g[S>>2]*.22252093255519867);g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Z>>2]-+g[r>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Z>>2]+ +g[r>>2];g[X>>2]=+g[O>>2]*.9749279022216797+ +g[P>>2]*.4338837265968323-+g[Q>>2]*.7818315029144287;g[Y>>2]=+g[U>>2]*.6234897971153259+ +g[T>>2]+-(+g[S>>2]*.9009688496589661+ +g[V>>2]*.22252093255519867);g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[X>>2]-+g[Y>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[X>>2]+ +g[Y>>2];g[R>>2]=+g[O>>2]*.4338837265968323+ +g[P>>2]*.7818315029144287+ +g[Q>>2]*.9749279022216797;g[W>>2]=+g[S>>2]*.6234897971153259+ +g[T>>2]+-(+g[U>>2]*.22252093255519867+ +g[V>>2]*.9009688496589661);g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[R>>2]-+g[W>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[R>>2]+ +g[W>>2];g[N>>2]=+g[C>>2]*.4338837265968323+ +g[F>>2]*.9749279022216797-+g[I>>2]*.7818315029144287;g[M>>2]=+g[la>>2]*.6234897971153259+ +g[q>>2]+-(+g[wa>>2]*.22252093255519867+ +g[aa>>2]*.9009688496589661);g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[M>>2]-+g[N>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[M>>2]+ +g[N>>2];g[J>>2]=+g[C>>2]*.9749279022216797-+g[F>>2]*.7818315029144287-+g[I>>2]*.4338837265968323;g[xa>>2]=+g[wa>>2]*.6234897971153259+ +g[q>>2]+-(+g[la>>2]*.9009688496589661+ +g[aa>>2]*.22252093255519867);g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[xa>>2]-+g[J>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[xa>>2]+ +g[J>>2];c[ya>>2]=(c[ya>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+48}i=za;return}function bs(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,47,5560);i=b;return}function cs(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0;Ra=i;i=i+368|0;k=Ra+352|0;l=Ra+348|0;m=Ra+344|0;n=Ra+340|0;Sa=Ra+336|0;o=Ra+332|0;p=Ra+328|0;Qa=Ra+320|0;P=Ra+316|0;G=Ra+312|0;Z=Ra+308|0;A=Ra+304|0;X=Ra+300|0;t=Ra+296|0;ha=Ra+292|0;ka=Ra+288|0;ya=Ra+284|0;F=Ra+280|0;aa=Ra+276|0;x=Ra+272|0;Ka=Ra+268|0;u=Ra+264|0;ca=Ra+260|0;fa=Ra+256|0;q=Ra+252|0;z=Ra+248|0;O=Ra+244|0;y=Ra+240|0;L=Ra+236|0;N=Ra+232|0;K=Ra+228|0;M=Ra+224|0;Pa=Ra+220|0;ia=Ra+216|0;W=Ra+212|0;ja=Ra+208|0;Ma=Ra+204|0;Oa=Ra+200|0;La=Ra+196|0;Na=Ra+192|0;T=Ra+188|0;V=Ra+184|0;S=Ra+180|0;U=Ra+176|0;sa=Ra+172|0;_=Ra+168|0;xa=Ra+164|0;$=Ra+160|0;R=Ra+156|0;ra=Ra+152|0;Q=Ra+148|0;qa=Ra+144|0;ua=Ra+140|0;wa=Ra+136|0;ta=Ra+132|0;va=Ra+128|0;Ea=Ra+124|0;da=Ra+120|0;Ja=Ra+116|0;ea=Ra+112|0;Ba=Ra+108|0;Da=Ra+104|0;Aa=Ra+100|0;Ca=Ra+96|0;Ga=Ra+92|0;Ia=Ra+88|0;Fa=Ra+84|0;Ha=Ra+80|0;za=Ra+76|0;Y=Ra+72|0;C=Ra+68|0;D=Ra+64|0;na=Ra+60|0;J=Ra+56|0;r=Ra+52|0;I=Ra+48|0;oa=Ra+44|0;pa=Ra+40|0;w=Ra+36|0;B=Ra+32|0;s=Ra+28|0;v=Ra+24|0;ba=Ra+20|0;H=Ra+16|0;ma=Ra+12|0;E=Ra+8|0;ga=Ra+4|0;la=Ra;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Sa>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Ra+324>>2]=.7071067690849304;c[Qa>>2]=c[Sa>>2];c[m>>2]=(c[m>>2]|0)+(((c[Sa>>2]|0)-1|0)*14<<2);while(1){if((c[Qa>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[z>>2]=+g[c[l>>2]>>2];g[L>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[N>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[K>>2]=+g[(c[m>>2]|0)+24>>2];g[M>>2]=+g[(c[m>>2]|0)+28>>2];g[O>>2]=+g[K>>2]*+g[L>>2]+ +g[M>>2]*+g[N>>2];g[y>>2]=+g[K>>2]*+g[N>>2]-+g[M>>2]*+g[L>>2];g[P>>2]=+g[q>>2]+ +g[O>>2];g[G>>2]=+g[z>>2]-+g[y>>2];g[Z>>2]=+g[q>>2]-+g[O>>2];g[A>>2]=+g[y>>2]+ +g[z>>2];g[Ma>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Oa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[La>>2]=+g[(c[m>>2]|0)+48>>2];g[Na>>2]=+g[(c[m>>2]|0)+52>>2];g[Pa>>2]=+g[La>>2]*+g[Ma>>2]+ +g[Na>>2]*+g[Oa>>2];g[ia>>2]=+g[La>>2]*+g[Oa>>2]-+g[Na>>2]*+g[Ma>>2];g[T>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[V>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[S>>2]=+g[(c[m>>2]|0)+16>>2];g[U>>2]=+g[(c[m>>2]|0)+20>>2];g[W>>2]=+g[S>>2]*+g[T>>2]+ +g[U>>2]*+g[V>>2];g[ja>>2]=+g[S>>2]*+g[V>>2]-+g[U>>2]*+g[T>>2];g[X>>2]=+g[Pa>>2]+ +g[W>>2];g[t>>2]=+g[ia>>2]+ +g[ja>>2];g[ha>>2]=+g[Pa>>2]-+g[W>>2];g[ka>>2]=+g[ia>>2]-+g[ja>>2];g[R>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[ra>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Q>>2]=+g[(c[m>>2]|0)+8>>2];g[qa>>2]=+g[(c[m>>2]|0)+12>>2];g[sa>>2]=+g[Q>>2]*+g[R>>2]+ +g[qa>>2]*+g[ra>>2];g[_>>2]=+g[Q>>2]*+g[ra>>2]-+g[qa>>2]*+g[R>>2];g[ua>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[wa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[ta>>2]=+g[(c[m>>2]|0)+40>>2];g[va>>2]=+g[(c[m>>2]|0)+44>>2];g[xa>>2]=+g[ta>>2]*+g[ua>>2]+ +g[va>>2]*+g[wa>>2];g[$>>2]=+g[ta>>2]*+g[wa>>2]-+g[va>>2]*+g[ua>>2];g[ya>>2]=+g[sa>>2]+ +g[xa>>2];g[F>>2]=+g[sa>>2]-+g[xa>>2];g[aa>>2]=+g[_>>2]-+g[$>>2];g[x>>2]=+g[_>>2]+ +g[$>>2];g[Ba>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[Da>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Aa>>2]=+g[c[m>>2]>>2];g[Ca>>2]=+g[(c[m>>2]|0)+4>>2];g[Ea>>2]=+g[Aa>>2]*+g[Ba>>2]+ +g[Ca>>2]*+g[Da>>2];g[da>>2]=+g[Aa>>2]*+g[Da>>2]-+g[Ca>>2]*+g[Ba>>2];g[Ga>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Ia>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Fa>>2]=+g[(c[m>>2]|0)+32>>2];g[Ha>>2]=+g[(c[m>>2]|0)+36>>2];g[Ja>>2]=+g[Fa>>2]*+g[Ga>>2]+ +g[Ha>>2]*+g[Ia>>2];g[ea>>2]=+g[Fa>>2]*+g[Ia>>2]-+g[Ha>>2]*+g[Ga>>2];g[Ka>>2]=+g[Ea>>2]+ +g[Ja>>2];g[u>>2]=+g[da>>2]+ +g[ea>>2];g[ca>>2]=+g[Ea>>2]-+g[Ja>>2];g[fa>>2]=+g[da>>2]-+g[ea>>2];g[za>>2]=+g[P>>2]+ +g[ya>>2];g[Y>>2]=+g[Ka>>2]+ +g[X>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[za>>2]-+g[Y>>2];g[c[k>>2]>>2]=+g[za>>2]+ +g[Y>>2];g[C>>2]=+g[X>>2]-+g[Ka>>2];g[D>>2]=+g[A>>2]-+g[x>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[C>>2]-+g[D>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[C>>2]+ +g[D>>2];g[na>>2]=+g[Z>>2]-+g[aa>>2];g[J>>2]=+g[G>>2]-+g[F>>2];g[oa>>2]=+g[ca>>2]-+g[fa>>2];g[pa>>2]=+g[ha>>2]+ +g[ka>>2];g[r>>2]=(+g[oa>>2]+ +g[pa>>2])*.7071067690849304;g[I>>2]=(+g[pa>>2]-+g[oa>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[na>>2]-+g[r>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[I>>2]+ +g[J>>2];g[c[l>>2]>>2]=+g[na>>2]+ +g[r>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[I>>2]-+g[J>>2];g[w>>2]=+g[u>>2]+ +g[t>>2];g[B>>2]=+g[x>>2]+ +g[A>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[w>>2]-+g[B>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[w>>2]+ +g[B>>2];g[s>>2]=+g[P>>2]-+g[ya>>2];g[v>>2]=+g[t>>2]-+g[u>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[s>>2]-+g[v>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[s>>2]+ +g[v>>2];g[ba>>2]=+g[Z>>2]+ +g[aa>>2];g[H>>2]=+g[F>>2]+ +g[G>>2];g[ga>>2]=+g[ca>>2]+ +g[fa>>2];g[la>>2]=+g[ha>>2]-+g[ka>>2];g[ma>>2]=(+g[ga>>2]+ +g[la>>2])*.7071067690849304;g[E>>2]=(+g[la>>2]-+g[ga>>2])*.7071067690849304;g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ba>>2]-+g[ma>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[E>>2]+ +g[H>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[ba>>2]+ +g[ma>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[E>>2]-+g[H>>2];c[Qa>>2]=(c[Qa>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+56}i=Ra;return}function ds(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,48,5608);i=b;return}function es(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0;vb=i;i=i+512|0;k=vb+508|0;l=vb+504|0;m=vb+500|0;n=vb+496|0;wb=vb+492|0;o=vb+488|0;p=vb+484|0;ub=vb+448|0;q=vb+444|0;C=vb+440|0;Ma=vb+436|0;B=vb+432|0;Ya=vb+428|0;Ja=vb+424|0;D=vb+420|0;E=vb+416|0;Ha=vb+412|0;la=vb+408|0;A=vb+404|0;ca=vb+400|0;S=vb+396|0;ba=vb+392|0;ob=vb+388|0;ka=vb+384|0;Sa=vb+380|0;_=vb+376|0;s=vb+372|0;$=vb+368|0;sa=vb+364|0;Ka=vb+360|0;Xa=vb+356|0;La=vb+352|0;pa=vb+348|0;ra=vb+344|0;oa=vb+340|0;qa=vb+336|0;ua=vb+332|0;Wa=vb+328|0;ta=vb+324|0;va=vb+320|0;tb=vb+316|0;u=vb+312|0;Aa=vb+308|0;v=vb+304|0;Fa=vb+300|0;w=vb+296|0;Ga=vb+292|0;x=vb+288|0;qb=vb+284|0;sb=vb+280|0;pb=vb+276|0;rb=vb+272|0;xa=vb+268|0;za=vb+264|0;wa=vb+260|0;ya=vb+256|0;Ca=vb+252|0;Ea=vb+248|0;Ba=vb+244|0;Da=vb+240|0;y=vb+236|0;z=vb+232|0;Q=vb+228|0;R=vb+224|0;cb=vb+220|0;Ta=vb+216|0;hb=vb+212|0;Pa=vb+208|0;mb=vb+204|0;Qa=vb+200|0;nb=vb+196|0;Ua=vb+192|0;$a=vb+188|0;bb=vb+184|0;_a=vb+180|0;ab=vb+176|0;eb=vb+172|0;gb=vb+168|0;db=vb+164|0;fb=vb+160|0;jb=vb+156|0;lb=vb+152|0;ib=vb+148|0;kb=vb+144|0;Oa=vb+140|0;Ra=vb+136|0;Va=vb+132|0;r=vb+128|0;ma=vb+124|0;Za=vb+120|0;Ia=vb+116|0;ja=vb+112|0;Na=vb+108|0;J=vb+104|0;U=vb+100|0;L=vb+96|0;Y=vb+92|0;I=vb+88|0;V=vb+84|0;K=vb+80|0;t=vb+76|0;T=vb+72|0;W=vb+68|0;X=vb+64|0;M=vb+60|0;N=vb+56|0;O=vb+52|0;P=vb+48|0;Z=vb+44|0;F=vb+40|0;ea=vb+36|0;na=vb+32|0;ia=vb+28|0;G=vb+24|0;fa=vb+20|0;H=vb+16|0;aa=vb+12|0;da=vb+8|0;ga=vb+4|0;ha=vb;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[wb>>2]=f;c[o>>2]=h;c[p>>2]=j;g[vb+480>>2]=.6427876353263855;g[vb+476>>2]=.7660444378852844;g[vb+472>>2]=.9396926164627075;g[vb+468>>2]=.3420201539993286;g[vb+464>>2]=.9848077297210693;g[vb+460>>2]=.1736481785774231;g[vb+456>>2]=.5;g[vb+452>>2]=.8660253882408142;c[ub>>2]=c[wb>>2];c[m>>2]=(c[m>>2]|0)+((c[wb>>2]|0)-1<<4<<2);while(1){if((c[ub>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[C>>2]=+g[c[l>>2]>>2];g[pa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ra>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[oa>>2]=+g[(c[m>>2]|0)+16>>2];g[qa>>2]=+g[(c[m>>2]|0)+20>>2];g[sa>>2]=+g[oa>>2]*+g[pa>>2]+ +g[qa>>2]*+g[ra>>2];g[Ka>>2]=+g[oa>>2]*+g[ra>>2]-+g[qa>>2]*+g[pa>>2];g[ua>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Wa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[ta>>2]=+g[(c[m>>2]|0)+40>>2];g[va>>2]=+g[(c[m>>2]|0)+44>>2];g[Xa>>2]=+g[ta>>2]*+g[ua>>2]+ +g[va>>2]*+g[Wa>>2];g[La>>2]=+g[ta>>2]*+g[Wa>>2]-+g[va>>2]*+g[ua>>2];g[Ma>>2]=(+g[Ka>>2]-+g[La>>2])*.8660253882408142;g[B>>2]=(+g[Xa>>2]-+g[sa>>2])*.8660253882408142;g[Ya>>2]=+g[sa>>2]+ +g[Xa>>2];g[Ja>>2]=+g[q>>2]-+g[Ya>>2]*.5;g[D>>2]=+g[Ka>>2]+ +g[La>>2];g[E>>2]=+g[C>>2]-+g[D>>2]*.5;g[qb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[sb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[pb>>2]=+g[(c[m>>2]|0)+8>>2];g[rb>>2]=+g[(c[m>>2]|0)+12>>2];g[tb>>2]=+g[pb>>2]*+g[qb>>2]+ +g[rb>>2]*+g[sb>>2];g[u>>2]=+g[pb>>2]*+g[sb>>2]-+g[rb>>2]*+g[qb>>2];g[xa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[za>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[wa>>2]=+g[(c[m>>2]|0)+32>>2];g[ya>>2]=+g[(c[m>>2]|0)+36>>2];g[Aa>>2]=+g[wa>>2]*+g[xa>>2]+ +g[ya>>2]*+g[za>>2];g[v>>2]=+g[wa>>2]*+g[za>>2]-+g[ya>>2]*+g[xa>>2];g[Ca>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Ea>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Ba>>2]=+g[(c[m>>2]|0)+56>>2];g[Da>>2]=+g[(c[m>>2]|0)+60>>2];g[Fa>>2]=+g[Ba>>2]*+g[Ca>>2]+ +g[Da>>2]*+g[Ea>>2];g[w>>2]=+g[Ba>>2]*+g[Ea>>2]-+g[Da>>2]*+g[Ca>>2];g[Ga>>2]=+g[Aa>>2]+ +g[Fa>>2];g[x>>2]=+g[v>>2]+ +g[w>>2];g[Ha>>2]=+g[tb>>2]+ +g[Ga>>2];g[la>>2]=+g[u>>2]+ +g[x>>2];g[y>>2]=+g[u>>2]-+g[x>>2]*.5;g[z>>2]=(+g[Fa>>2]-+g[Aa>>2])*.8660253882408142;g[A>>2]=+g[y>>2]-+g[z>>2];g[ca>>2]=+g[z>>2]+ +g[y>>2];g[Q>>2]=+g[tb>>2]-+g[Ga>>2]*.5;g[R>>2]=(+g[v>>2]-+g[w>>2])*.8660253882408142;g[S>>2]=+g[Q>>2]-+g[R>>2];g[ba>>2]=+g[Q>>2]+ +g[R>>2];g[$a>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[bb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[_a>>2]=+g[c[m>>2]>>2];g[ab>>2]=+g[(c[m>>2]|0)+4>>2];g[cb>>2]=+g[_a>>2]*+g[$a>>2]+ +g[ab>>2]*+g[bb>>2];g[Ta>>2]=+g[_a>>2]*+g[bb>>2]-+g[ab>>2]*+g[$a>>2];g[eb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[gb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[db>>2]=+g[(c[m>>2]|0)+24>>2];g[fb>>2]=+g[(c[m>>2]|0)+28>>2];g[hb>>2]=+g[db>>2]*+g[eb>>2]+ +g[fb>>2]*+g[gb>>2];g[Pa>>2]=+g[db>>2]*+g[gb>>2]-+g[fb>>2]*+g[eb>>2];g[jb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[lb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[ib>>2]=+g[(c[m>>2]|0)+48>>2];g[kb>>2]=+g[(c[m>>2]|0)+52>>2];g[mb>>2]=+g[ib>>2]*+g[jb>>2]+ +g[kb>>2]*+g[lb>>2];g[Qa>>2]=+g[ib>>2]*+g[lb>>2]-+g[kb>>2]*+g[jb>>2];g[nb>>2]=+g[hb>>2]+ +g[mb>>2];g[Ua>>2]=+g[Pa>>2]+ +g[Qa>>2];g[ob>>2]=+g[cb>>2]+ +g[nb>>2];g[ka>>2]=+g[Ta>>2]+ +g[Ua>>2];g[Oa>>2]=+g[cb>>2]-+g[nb>>2]*.5;g[Ra>>2]=(+g[Pa>>2]-+g[Qa>>2])*.8660253882408142;g[Sa>>2]=+g[Oa>>2]-+g[Ra>>2];g[_>>2]=+g[Oa>>2]+ +g[Ra>>2];g[Va>>2]=+g[Ta>>2]-+g[Ua>>2]*.5;g[r>>2]=(+g[mb>>2]-+g[hb>>2])*.8660253882408142;g[s>>2]=+g[Va>>2]-+g[r>>2];g[$>>2]=+g[r>>2]+ +g[Va>>2];g[ma>>2]=(+g[ka>>2]-+g[la>>2])*.8660253882408142;g[Za>>2]=+g[q>>2]+ +g[Ya>>2];g[Ia>>2]=+g[ob>>2]+ +g[Ha>>2];g[ja>>2]=+g[Za>>2]-+g[Ia>>2]*.5;g[c[k>>2]>>2]=+g[Za>>2]+ +g[Ia>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ja>>2]+ +g[ma>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ja>>2]-+g[ma>>2];g[Na>>2]=+g[Ja>>2]-+g[Ma>>2];g[J>>2]=+g[E>>2]-+g[B>>2];g[t>>2]=+g[Sa>>2]*.1736481785774231+ +g[s>>2]*.9848077297210693;g[T>>2]=+g[A>>2]*.3420201539993286-+g[S>>2]*.9396926164627075;g[U>>2]=+g[t>>2]+ +g[T>>2];g[L>>2]=(+g[T>>2]-+g[t>>2])*.8660253882408142;g[W>>2]=+g[s>>2]*.1736481785774231-+g[Sa>>2]*.9848077297210693;g[X>>2]=+g[S>>2]*.3420201539993286+ +g[A>>2]*.9396926164627075;g[Y>>2]=(+g[W>>2]+ +g[X>>2])*.8660253882408142;g[I>>2]=+g[W>>2]-+g[X>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Na>>2]+ +g[U>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[I>>2]+ +g[J>>2];g[V>>2]=+g[Na>>2]-+g[U>>2]*.5;g[c[l>>2]>>2]=+g[V>>2]-+g[Y>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[V>>2]+ +g[Y>>2];g[K>>2]=+g[I>>2]*.5-+g[J>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[K>>2]-+g[L>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[L>>2]+ +g[K>>2];g[M>>2]=(+g[Ha>>2]-+g[ob>>2])*.8660253882408142;g[N>>2]=+g[D>>2]+ +g[C>>2];g[O>>2]=+g[ka>>2]+ +g[la>>2];g[P>>2]=+g[N>>2]-+g[O>>2]*.5;g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[M>>2]-+g[P>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[O>>2]+ +g[N>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[M>>2]+ +g[P>>2];g[Z>>2]=+g[Ja>>2]+ +g[Ma>>2];g[F>>2]=+g[B>>2]+ +g[E>>2];g[aa>>2]=+g[_>>2]*.7660444378852844+ +g[$>>2]*.6427876353263855;g[da>>2]=+g[ba>>2]*.1736481785774231+ +g[ca>>2]*.9848077297210693;g[ea>>2]=+g[aa>>2]+ +g[da>>2];g[na>>2]=(+g[da>>2]-+g[aa>>2])*.8660253882408142;g[ga>>2]=+g[$>>2]*.7660444378852844-+g[_>>2]*.6427876353263855;g[ha>>2]=+g[ca>>2]*.1736481785774231-+g[ba>>2]*.9848077297210693;g[ia>>2]=(+g[ga>>2]-+g[ha>>2])*.8660253882408142;g[G>>2]=+g[ga>>2]+ +g[ha>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[Z>>2]+ +g[ea>>2];g[fa>>2]=+g[Z>>2]-+g[ea>>2]*.5;g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[fa>>2]-+g[ia>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[fa>>2]+ +g[ia>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[G>>2]+ +g[F>>2];g[H>>2]=+g[F>>2]-+g[G>>2]*.5;g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[na>>2]-+g[H>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[na>>2]+ +g[H>>2];c[ub>>2]=(c[ub>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+64;c[n>>2]=c[n>>2]^c[2998]}i=vb;return}function fs(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,1,5656);i=b;return}function gs(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0;ba=i;i=i+192|0;n=ba+184|0;o=ba+180|0;p=ba+176|0;q=ba+172|0;r=ba+168|0;s=ba+164|0;t=ba+160|0;ca=ba+156|0;u=ba+152|0;v=ba+148|0;aa=ba+128|0;w=ba+124|0;T=ba+120|0;D=ba+116|0;V=ba+112|0;E=ba+108|0;U=ba+104|0;J=ba+100|0;X=ba+96|0;M=ba+92|0;S=ba+88|0;x=ba+84|0;y=ba+80|0;z=ba+76|0;A=ba+72|0;B=ba+68|0;C=ba+64|0;H=ba+60|0;I=ba+56|0;R=ba+52|0;K=ba+48|0;L=ba+44|0;Q=ba+40|0;N=ba+36|0;P=ba+32|0;G=ba+28|0;O=ba+24|0;F=ba+20|0;W=ba+16|0;$=ba+12|0;Z=ba+8|0;_=ba+4|0;Y=ba;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[ca>>2]=k;c[u>>2]=l;c[v>>2]=m;g[ba+144>>2]=.25;g[ba+140>>2]=.5877852439880371;g[ba+136>>2]=.9510565400123596;g[ba+132>>2]=.55901700258255;c[aa>>2]=c[ca>>2];while(1){if((c[aa>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[T>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[x>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[y>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[A>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[B>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[C>>2]=+g[A>>2]-+g[B>>2];g[D>>2]=+g[z>>2]+ +g[C>>2];g[V>>2]=+g[A>>2]+ +g[B>>2];g[E>>2]=(+g[z>>2]-+g[C>>2])*.55901700258255;g[U>>2]=+g[x>>2]+ +g[y>>2];g[H>>2]=+g[c[o>>2]>>2];g[I>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[R>>2]=+g[H>>2]+ +g[I>>2];g[K>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[L>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Q>>2]=+g[K>>2]+ +g[L>>2];g[J>>2]=+g[H>>2]-+g[I>>2];g[X>>2]=(+g[R>>2]+ +g[Q>>2])*.55901700258255;g[M>>2]=+g[K>>2]-+g[L>>2];g[S>>2]=+g[Q>>2]-+g[R>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[w>>2]+ +g[D>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[S>>2]-+g[T>>2];g[N>>2]=+g[J>>2]*.9510565400123596+ +g[M>>2]*.5877852439880371;g[P>>2]=+g[M>>2]*.9510565400123596-+g[J>>2]*.5877852439880371;g[F>>2]=+g[w>>2]-+g[D>>2]*.25;g[G>>2]=+g[E>>2]+ +g[F>>2];g[O>>2]=+g[F>>2]-+g[E>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[G>>2]-+g[N>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[O>>2]+ +g[P>>2];g[c[p>>2]>>2]=+g[G>>2]+ +g[N>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[O>>2]-+g[P>>2];g[W>>2]=+g[U>>2]*.9510565400123596+ +g[V>>2]*.5877852439880371;g[$>>2]=+g[V>>2]*.9510565400123596-+g[U>>2]*.5877852439880371;g[Y>>2]=+g[S>>2]*.25+ +g[T>>2];g[Z>>2]=+g[X>>2]+ +g[Y>>2];g[_>>2]=+g[Y>>2]-+g[X>>2];g[c[q>>2]>>2]=-(+g[W>>2]+ +g[Z>>2]);g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[$>>2]+ +g[_>>2];g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[W>>2]-+g[Z>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[_>>2]-+g[$>>2];c[aa>>2]=(c[aa>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=ba;return}function hs(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,2,5704);i=b;return}function is(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0;ka=i;i=i+240|0;n=ka+224|0;o=ka+220|0;p=ka+216|0;q=ka+212|0;r=ka+208|0;s=ka+204|0;t=ka+200|0;la=ka+196|0;u=ka+192|0;v=ka+188|0;ja=ka+164|0;ga=ka+160|0;R=ka+156|0;z=ka+152|0;ia=ka+148|0;ha=ka+144|0;U=ka+140|0;F=ka+136|0;E=ka+132|0;X=ka+128|0;W=ka+124|0;P=ka+120|0;_=ka+116|0;Z=ka+112|0;Q=ka+108|0;J=ka+104|0;K=ka+100|0;w=ka+96|0;y=ka+92|0;x=ka+88|0;S=ka+84|0;T=ka+80|0;A=ka+76|0;B=ka+72|0;C=ka+68|0;D=ka+64|0;O=ka+60|0;L=ka+56|0;M=ka+52|0;N=ka+48|0;V=ka+44|0;G=ka+40|0;aa=ka+36|0;fa=ka+32|0;Y=ka+28|0;$=ka+24|0;ba=ka+20|0;I=ka+16|0;ea=ka+12|0;H=ka+8|0;ca=ka+4|0;da=ka;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[la>>2]=k;c[u>>2]=l;c[v>>2]=m;g[ka+184>>2]=.3535533845424652;g[ka+180>>2]=.7071067690849304;g[ka+176>>2]=.6123724579811096;g[ka+172>>2]=.5;g[ka+168>>2]=.8660253882408142;c[ja>>2]=c[la>>2];while(1){if((c[ja>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[y>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[x>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[ga>>2]=(+g[x>>2]+ +g[y>>2])*.8660253882408142;g[R>>2]=(+g[y>>2]-+g[x>>2])*.5+ +g[w>>2];g[z>>2]=+g[w>>2]+ +g[x>>2]-+g[y>>2];g[ia>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[S>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[T>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[ha>>2]=+g[S>>2]+ +g[T>>2];g[U>>2]=(+g[S>>2]-+g[T>>2])*.8660253882408142;g[F>>2]=+g[ha>>2]*.5+ +g[ia>>2];g[A>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[B>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[C>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[D>>2]=+g[B>>2]-+g[C>>2];g[E>>2]=+g[A>>2]+ +g[D>>2];g[X>>2]=(+g[B>>2]+ +g[C>>2])*.6123724579811096;g[W>>2]=+g[A>>2]*.7071067690849304-+g[D>>2]*.3535533845424652;g[O>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[L>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[M>>2]=+g[c[o>>2]>>2];g[N>>2]=+g[L>>2]-+g[M>>2];g[P>>2]=+g[N>>2]-+g[O>>2];g[_>>2]=+g[N>>2]*.3535533845424652+ +g[O>>2]*.7071067690849304;g[Z>>2]=(+g[L>>2]+ +g[M>>2])*.6123724579811096;g[Q>>2]=(+g[E>>2]+ +g[P>>2])*.7071067690849304;g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[z>>2]-+g[Q>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[z>>2]+ +g[Q>>2];g[J>>2]=(+g[P>>2]-+g[E>>2])*.7071067690849304;g[K>>2]=+g[ia>>2]-+g[ha>>2];g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[J>>2]-+g[K>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[J>>2]+ +g[K>>2];g[V>>2]=+g[R>>2]-+g[U>>2];g[G>>2]=+g[ga>>2]-+g[F>>2];g[Y>>2]=+g[W>>2]-+g[X>>2];g[$>>2]=+g[Z>>2]-+g[_>>2];g[aa>>2]=+g[Y>>2]+ +g[$>>2];g[fa>>2]=+g[Y>>2]-+g[$>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[V>>2]-+g[aa>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[fa>>2]+ +g[G>>2];g[c[p>>2]>>2]=+g[V>>2]+ +g[aa>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[fa>>2]-+g[G>>2];g[ba>>2]=+g[R>>2]+ +g[U>>2];g[I>>2]=+g[ga>>2]+ +g[F>>2];g[ca>>2]=+g[Z>>2]+ +g[_>>2];g[da>>2]=+g[X>>2]+ +g[W>>2];g[ea>>2]=+g[ca>>2]-+g[da>>2];g[H>>2]=+g[da>>2]+ +g[ca>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[ba>>2]-+g[ea>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=+g[I>>2]-+g[H>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[ba>>2]+ +g[ea>>2];g[c[q>>2]>>2]=-(+g[H>>2]+ +g[I>>2]);c[ja>>2]=(c[ja>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=ka;return}function js(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,3,5752);i=b;return}function ks(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0;Ca=i;i=i+320|0;n=Ca+308|0;o=Ca+304|0;p=Ca+300|0;q=Ca+296|0;r=Ca+292|0;s=Ca+288|0;t=Ca+284|0;Da=Ca+280|0;u=Ca+276|0;v=Ca+272|0;Ba=Ca+236|0;w=Ca+232|0;x=Ca+228|0;ya=Ca+224|0;W=Ca+220|0;J=Ca+216|0;C=Ca+212|0;I=Ca+208|0;ia=Ca+204|0;na=Ca+200|0;sa=Ca+196|0;V=Ca+192|0;F=Ca+188|0;G=Ca+184|0;ga=Ca+180|0;fa=Ca+176|0;va=Ca+172|0;X=Ca+168|0;ea=Ca+164|0;M=Ca+160|0;T=Ca+156|0;y=Ca+152|0;wa=Ca+148|0;B=Ca+144|0;xa=Ca+140|0;z=Ca+136|0;A=Ca+132|0;ja=Ca+128|0;ma=Ca+124|0;ka=Ca+120|0;la=Ca+116|0;qa=Ca+112|0;ra=Ca+108|0;E=Ca+104|0;ua=Ca+100|0;da=Ca+96|0;ta=Ca+92|0;ba=Ca+88|0;ca=Ca+84|0;L=Ca+80|0;oa=Ca+76|0;D=Ca+72|0;ha=Ca+68|0;pa=Ca+64|0;R=Ca+60|0;aa=Ca+56|0;Y=Ca+52|0;_=Ca+48|0;U=Ca+44|0;Z=Ca+40|0;S=Ca+36|0;$=Ca+32|0;Aa=Ca+28|0;H=Ca+24|0;za=Ca+20|0;P=Ca+16|0;N=Ca+12|0;Q=Ca+8|0;K=Ca+4|0;O=Ca;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[Da>>2]=k;c[u>>2]=l;c[v>>2]=m;g[Ca+268>>2]=.5;g[Ca+264>>2]=.8660253882408142;g[Ca+260>>2]=.80901700258255;g[Ca+256>>2]=.30901700258255005;g[Ca+252>>2]=.25;g[Ca+248>>2]=.55901700258255;g[Ca+244>>2]=.5877852439880371;g[Ca+240>>2]=.9510565400123596;c[Ba>>2]=c[Da>>2];while(1){if((c[Ba>>2]|0)<=0)break;g[w>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[x>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[y>>2]=+g[c[o>>2]>>2];g[wa>>2]=+g[x>>2]+ +g[y>>2];g[z>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[A>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[xa>>2]=+g[z>>2]-+g[A>>2];g[ya>>2]=+g[wa>>2]*.9510565400123596+ +g[xa>>2]*.5877852439880371;g[W>>2]=+g[xa>>2]*.9510565400123596-+g[wa>>2]*.5877852439880371;g[J>>2]=(+g[y>>2]-+g[B>>2])*.55901700258255;g[C>>2]=+g[y>>2]+ +g[B>>2];g[I>>2]=+g[C>>2]*.25;g[ia>>2]=+g[c[n>>2]>>2];g[ja>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[ma>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[ka>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[la>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[qa>>2]=+g[la>>2]+ +g[ja>>2];g[ra>>2]=+g[ma>>2]+ +g[ka>>2];g[na>>2]=+g[ja>>2]+ +g[ka>>2]-(+g[la>>2]+ +g[ma>>2]);g[sa>>2]=+g[qa>>2]*.9510565400123596+ +g[ra>>2]*.5877852439880371;g[V>>2]=+g[qa>>2]*.5877852439880371-+g[ra>>2]*.9510565400123596;g[F>>2]=+g[na>>2]*.25+ +g[ia>>2];g[G>>2]=(+g[ma>>2]+ +g[ja>>2]-(+g[la>>2]+ +g[ka>>2]))*.55901700258255;g[ga>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[E>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[fa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[ua>>2]=+g[E>>2]+ +g[fa>>2];g[ba>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[ca>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[da>>2]=+g[ba>>2]+ +g[ca>>2];g[ta>>2]=+g[ba>>2]-+g[ca>>2];g[va>>2]=+g[ta>>2]*.5877852439880371-+g[ua>>2]*.9510565400123596;g[X>>2]=+g[ta>>2]*.9510565400123596+ +g[ua>>2]*.5877852439880371;g[ea>>2]=+g[E>>2]+ +g[da>>2];g[L>>2]=(+g[E>>2]-+g[da>>2])*.55901700258255;g[M>>2]=+g[L>>2]-+g[fa>>2]*.30901700258255005+-(+g[ea>>2]*.25+ +g[ga>>2]);g[T>>2]=+g[fa>>2]*.80901700258255-+g[ga>>2]+-(+g[ea>>2]*.25+ +g[L>>2]);g[oa>>2]=+g[ia>>2]-+g[na>>2];g[D>>2]=+g[w>>2]+ +g[x>>2]-+g[C>>2];g[ha>>2]=+g[ea>>2]-+g[fa>>2]-+g[ga>>2];g[pa>>2]=+g[D>>2]+ +g[ha>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=(+g[D>>2]-+g[ha>>2])*.8660253882408142;g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[oa>>2]-+g[pa>>2]*.5;g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2]=+g[oa>>2]+ +g[pa>>2];g[R>>2]=+g[G>>2]+ +g[F>>2];g[aa>>2]=(+g[W>>2]+ +g[X>>2])*.8660253882408142;g[Y>>2]=+g[W>>2]-+g[X>>2];g[_>>2]=+g[Y>>2]*.5-+g[V>>2];g[S>>2]=+g[w>>2]+ +g[J>>2]+(+g[I>>2]-+g[x>>2]*.80901700258255);g[U>>2]=+g[S>>2]+ +g[T>>2];g[Z>>2]=(+g[T>>2]-+g[S>>2])*.8660253882408142;g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[R>>2]+ +g[U>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[V>>2]+ +g[Y>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2]=+g[Z>>2]-+g[_>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[Z>>2]+ +g[_>>2];g[$>>2]=+g[R>>2]-+g[U>>2]*.5;g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[$>>2]-+g[aa>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2]=+g[$>>2]+ +g[aa>>2];g[Aa>>2]=(+g[ya>>2]+ +g[va>>2])*.8660253882408142;g[H>>2]=+g[F>>2]-+g[G>>2];g[za>>2]=+g[va>>2]-+g[ya>>2];g[P>>2]=+g[za>>2]*.5-+g[sa>>2];g[K>>2]=+g[x>>2]*.30901700258255005+ +g[w>>2]+ +g[I>>2]-+g[J>>2];g[N>>2]=+g[K>>2]+ +g[M>>2];g[Q>>2]=(+g[M>>2]-+g[K>>2])*.8660253882408142;g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[sa>>2]+ +g[za>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[H>>2]+ +g[N>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=+g[P>>2]-+g[Q>>2];g[c[q>>2]>>2]=+g[P>>2]+ +g[Q>>2];g[O>>2]=+g[H>>2]-+g[N>>2]*.5;g[c[p>>2]>>2]=+g[Aa>>2]+ +g[O>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[O>>2]-+g[Aa>>2];c[Ba>>2]=(c[Ba>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=Ca;return}function ls(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,4,5800);i=b;return}function ms(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0;Ja=i;i=i+336|0;n=Ja+332|0;o=Ja+328|0;p=Ja+324|0;q=Ja+320|0;r=Ja+316|0;s=Ja+312|0;t=Ja+308|0;Ka=Ja+304|0;u=Ja+300|0;v=Ja+296|0;Ia=Ja+264|0;F=Ja+260|0;y=Ja+256|0;L=Ja+252|0;da=Ja+248|0;za=Ja+244|0;U=Ja+240|0;Ca=Ja+236|0;T=Ja+232|0;qa=Ja+228|0;R=Ja+224|0;ta=Ja+220|0;Q=Ja+216|0;ka=Ja+212|0;x=Ja+208|0;O=Ja+204|0;aa=Ja+200|0;w=Ja+196|0;ca=Ja+192|0;E=Ja+188|0;ba=Ja+184|0;C=Ja+180|0;D=Ja+176|0;ya=Ja+172|0;Ba=Ja+168|0;xa=Ja+164|0;Aa=Ja+160|0;va=Ja+156|0;wa=Ja+152|0;ma=Ja+148|0;sa=Ja+144|0;pa=Ja+140|0;ra=Ja+136|0;na=Ja+132|0;oa=Ja+128|0;I=Ja+124|0;M=Ja+120|0;ja=Ja+116|0;N=Ja+112|0;G=Ja+108|0;H=Ja+104|0;J=Ja+100|0;ia=Ja+96|0;la=Ja+92|0;ea=Ja+88|0;Ea=Ja+84|0;$=Ja+80|0;ua=Ja+76|0;Da=Ja+72|0;Fa=Ja+68|0;ga=Ja+64|0;K=Ja+60|0;fa=Ja+56|0;Ga=Ja+52|0;Ha=Ja+48|0;P=Ja+44|0;z=Ja+40|0;W=Ja+36|0;ha=Ja+32|0;S=Ja+28|0;V=Ja+24|0;X=Ja+20|0;B=Ja+16|0;_=Ja+12|0;A=Ja+8|0;Y=Ja+4|0;Z=Ja;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[Ka>>2]=k;c[u>>2]=l;c[v>>2]=m;g[Ja+292>>2]=.5555702447891235;g[Ja+288>>2]=.8314695954322815;g[Ja+284>>2]=.9807852506637573;g[Ja+280>>2]=.19509032368659973;g[Ja+276>>2]=.3826834261417389;g[Ja+272>>2]=.9238795042037964;g[Ja+268>>2]=.7071067690849304;c[Ia>>2]=c[Ka>>2];while(1){if((c[Ia>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[ca>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[C>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[D>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[E>>2]=(+g[C>>2]-+g[D>>2])*.7071067690849304;g[ba>>2]=(+g[C>>2]+ +g[D>>2])*.7071067690849304;g[F>>2]=+g[w>>2]+ +g[E>>2];g[y>>2]=+g[ca>>2]-+g[ba>>2];g[L>>2]=+g[w>>2]-+g[E>>2];g[da>>2]=+g[ba>>2]+ +g[ca>>2];g[ya>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Ba>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[va>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[wa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[xa>>2]=(+g[va>>2]-+g[wa>>2])*.7071067690849304;g[Aa>>2]=(+g[va>>2]+ +g[wa>>2])*.7071067690849304;g[za>>2]=+g[xa>>2]-+g[ya>>2];g[U>>2]=+g[Ba>>2]-+g[Aa>>2];g[Ca>>2]=+g[Aa>>2]+ +g[Ba>>2];g[T>>2]=+g[xa>>2]+ +g[ya>>2];g[ma>>2]=+g[c[o>>2]>>2];g[sa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[na>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[oa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[pa>>2]=(+g[na>>2]-+g[oa>>2])*.7071067690849304;g[ra>>2]=(+g[na>>2]+ +g[oa>>2])*.7071067690849304;g[qa>>2]=+g[ma>>2]+ +g[pa>>2];g[R>>2]=+g[sa>>2]-+g[ra>>2];g[ta>>2]=+g[ra>>2]+ +g[sa>>2];g[Q>>2]=+g[ma>>2]-+g[pa>>2];g[G>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[H>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[I>>2]=+g[G>>2]*.9238795042037964-+g[H>>2]*.3826834261417389;g[M>>2]=+g[G>>2]*.3826834261417389+ +g[H>>2]*.9238795042037964;g[J>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ia>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ja>>2]=+g[J>>2]*.3826834261417389-+g[ia>>2]*.9238795042037964;g[N>>2]=+g[J>>2]*.9238795042037964+ +g[ia>>2]*.3826834261417389;g[ka>>2]=+g[I>>2]+ +g[ja>>2];g[x>>2]=+g[ja>>2]-+g[I>>2];g[O>>2]=+g[M>>2]-+g[N>>2];g[aa>>2]=+g[M>>2]+ +g[N>>2];g[la>>2]=+g[F>>2]-+g[ka>>2];g[ea>>2]=+g[aa>>2]+ +g[da>>2];g[ua>>2]=+g[qa>>2]*.19509032368659973+ +g[ta>>2]*.9807852506637573;g[Da>>2]=+g[za>>2]*.19509032368659973-+g[Ca>>2]*.9807852506637573;g[Ea>>2]=+g[ua>>2]+ +g[Da>>2];g[$>>2]=+g[Da>>2]-+g[ua>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[la>>2]-+g[Ea>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2]=+g[$>>2]+ +g[ea>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[la>>2]+ +g[Ea>>2];g[c[q>>2]>>2]=+g[$>>2]-+g[ea>>2];g[Fa>>2]=+g[F>>2]+ +g[ka>>2];g[ga>>2]=+g[da>>2]-+g[aa>>2];g[Ga>>2]=+g[qa>>2]*.9807852506637573-+g[ta>>2]*.19509032368659973;g[Ha>>2]=+g[za>>2]*.9807852506637573+ +g[Ca>>2]*.19509032368659973;g[K>>2]=+g[Ga>>2]+ +g[Ha>>2];g[fa>>2]=+g[Ha>>2]-+g[Ga>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2]=+g[Fa>>2]-+g[K>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[fa>>2]+ +g[ga>>2];g[c[p>>2]>>2]=+g[Fa>>2]+ +g[K>>2];g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[fa>>2]-+g[ga>>2];g[P>>2]=+g[L>>2]+ +g[O>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[S>>2]=+g[Q>>2]*.8314695954322815+ +g[R>>2]*.5555702447891235;g[V>>2]=+g[T>>2]*.8314695954322815+ +g[U>>2]*.5555702447891235;g[W>>2]=+g[S>>2]-+g[V>>2];g[ha>>2]=+g[S>>2]+ +g[V>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2]=+g[P>>2]-+g[W>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[z>>2]-+g[ha>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[P>>2]+ +g[W>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=-(+g[ha>>2]+ +g[z>>2]);g[X>>2]=+g[L>>2]-+g[O>>2];g[B>>2]=+g[x>>2]+ +g[y>>2];g[Y>>2]=+g[U>>2]*.8314695954322815-+g[T>>2]*.5555702447891235;g[Z>>2]=+g[R>>2]*.8314695954322815-+g[Q>>2]*.5555702447891235;g[_>>2]=+g[Y>>2]-+g[Z>>2];g[A>>2]=+g[Z>>2]+ +g[Y>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[X>>2]-+g[_>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[A>>2]+ +g[B>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[X>>2]+ +g[_>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2]=+g[A>>2]-+g[B>>2];c[Ia>>2]=(c[Ia>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=Ja;return}function ns(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,5,5848);i=b;return}function os(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0;eb=i;i=i+464|0;n=eb+448|0;o=eb+444|0;p=eb+440|0;q=eb+436|0;r=eb+432|0;s=eb+428|0;t=eb+424|0;fb=eb+420|0;u=eb+416|0;v=eb+412|0;db=eb+356|0;da=eb+352|0;ia=eb+348|0;Ra=eb+344|0;sa=eb+340|0;ea=eb+336|0;ha=eb+332|0;Da=eb+328|0;ja=eb+324|0;Ja=eb+320|0;ka=eb+316|0;Qa=eb+312|0;pa=eb+308|0;z=eb+304|0;qa=eb+300|0;Pa=eb+296|0;ra=eb+292|0;w=eb+288|0;ba=eb+284|0;Va=eb+280|0;S=eb+276|0;J=eb+272|0;Ua=eb+268|0;M=eb+264|0;L=eb+260|0;K=eb+256|0;bb=eb+252|0;Ba=eb+248|0;N=eb+244|0;T=eb+240|0;ab=eb+236|0;Za=eb+232|0;Fa=eb+228|0;fa=eb+224|0;Ia=eb+220|0;ga=eb+216|0;Ga=eb+212|0;Ha=eb+208|0;La=eb+204|0;oa=eb+200|0;Oa=eb+196|0;na=eb+192|0;Ma=eb+188|0;Na=eb+184|0;Z=eb+180|0;aa=eb+176|0;_=eb+172|0;$=eb+168|0;H=eb+164|0;I=eb+160|0;_a=eb+156|0;$a=eb+152|0;Xa=eb+148|0;Ya=eb+144|0;ca=eb+140|0;Y=eb+136|0;Ta=eb+132|0;X=eb+128|0;Ka=eb+124|0;Sa=eb+120|0;O=eb+116|0;Q=eb+112|0;Ca=eb+108|0;C=eb+104|0;x=eb+100|0;D=eb+96|0;A=eb+92|0;E=eb+88|0;Aa=eb+84|0;Ea=eb+80|0;y=eb+76|0;B=eb+72|0;G=eb+68|0;F=eb+64|0;P=eb+60|0;U=eb+56|0;W=eb+52|0;cb=eb+48|0;wa=eb+44|0;ma=eb+40|0;xa=eb+36|0;ua=eb+32|0;ya=eb+28|0;Wa=eb+24|0;la=eb+20|0;ta=eb+16|0;va=eb+12|0;V=eb+8|0;za=eb+4|0;R=eb;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[fb>>2]=k;c[u>>2]=l;c[v>>2]=m;g[eb+408>>2]=.5720614194869995;g[eb+404>>2]=.21850800514221191;g[eb+400>>2]=.30901700258255005;g[eb+396>>2]=.80901700258255;g[eb+392>>2]=.55901700258255;g[eb+388>>2]=.9510565400123596;g[eb+384>>2]=.5877852439880371;g[eb+380>>2]=.25;g[eb+376>>2]=.1767766922712326;g[eb+372>>2]=.3952847123146057;g[eb+368>>2]=.6724985241889954;g[eb+364>>2]=.4156269431114197;g[eb+360>>2]=.7071067690849304;c[db>>2]=c[fb>>2];while(1){if((c[db>>2]|0)<=0)break;g[da>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ia>>2]=+g[da>>2]*.7071067690849304;g[Ra>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[sa>>2]=+g[Ra>>2]*.7071067690849304;g[ea>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Fa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[fa>>2]=+g[ea>>2]+ +g[Fa>>2];g[Ga>>2]=+g[c[o>>2]>>2];g[Ha>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ia>>2]=+g[Ga>>2]+ +g[Ha>>2];g[ga>>2]=+g[Ga>>2]-+g[Ha>>2];g[ha>>2]=+g[fa>>2]*.4156269431114197+ +g[ga>>2]*.6724985241889954;g[Da>>2]=+g[fa>>2]*.6724985241889954-+g[ga>>2]*.4156269431114197;g[ja>>2]=(+g[Fa>>2]-+g[Ia>>2])*.3952847123146057;g[Ja>>2]=+g[Fa>>2]+ +g[Ia>>2];g[ka>>2]=+g[Ja>>2]*.1767766922712326;g[La>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Qa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[oa>>2]=+g[La>>2]+ +g[Qa>>2];g[Ma>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Na>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Oa>>2]=+g[Ma>>2]+ +g[Na>>2];g[na>>2]=+g[Ma>>2]-+g[Na>>2];g[pa>>2]=+g[na>>2]*.6724985241889954-+g[oa>>2]*.4156269431114197;g[z>>2]=+g[na>>2]*.4156269431114197+ +g[oa>>2]*.6724985241889954;g[qa>>2]=(+g[La>>2]-+g[Oa>>2])*.3952847123146057;g[Pa>>2]=+g[La>>2]+ +g[Oa>>2];g[ra>>2]=+g[Pa>>2]*.1767766922712326;g[w>>2]=+g[c[n>>2]>>2];g[Z>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[aa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[_>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[$>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[H>>2]=+g[$>>2]+ +g[Z>>2];g[I>>2]=+g[aa>>2]+ +g[_>>2];g[ba>>2]=+g[Z>>2]+ +g[_>>2]-(+g[$>>2]+ +g[aa>>2]);g[Va>>2]=+g[ba>>2]*.25+ +g[w>>2];g[S>>2]=+g[H>>2]*.5877852439880371-+g[I>>2]*.9510565400123596;g[J>>2]=+g[H>>2]*.9510565400123596+ +g[I>>2]*.5877852439880371;g[Ua>>2]=(+g[aa>>2]+ +g[Z>>2]-(+g[$>>2]+ +g[_>>2]))*.55901700258255;g[M>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[_a>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[$a>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[ab>>2]=+g[_a>>2]-+g[$a>>2];g[L>>2]=+g[_a>>2]+ +g[$a>>2];g[Xa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ya>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Za>>2]=+g[Xa>>2]-+g[Ya>>2];g[K>>2]=+g[Xa>>2]+ +g[Ya>>2];g[bb>>2]=+g[Za>>2]*.9510565400123596+ +g[ab>>2]*.5877852439880371;g[Ba>>2]=+g[Za>>2]*.5877852439880371-+g[ab>>2]*.9510565400123596;g[N>>2]=+g[K>>2]*.80901700258255+ +g[L>>2]*.30901700258255005+ +g[M>>2];g[T>>2]=+g[M>>2]-+g[L>>2]*.80901700258255-+g[K>>2]*.30901700258255005;g[ca>>2]=+g[w>>2]-+g[ba>>2];g[Y>>2]=+g[L>>2]+ +g[M>>2]-+g[K>>2];g[Ka>>2]=+g[da>>2]+(+g[ea>>2]-+g[Ja>>2]);g[Sa>>2]=+g[Pa>>2]-+g[Qa>>2]-+g[Ra>>2];g[Ta>>2]=(+g[Ka>>2]+ +g[Sa>>2])*.7071067690849304;g[X>>2]=(+g[Ka>>2]-+g[Sa>>2])*.7071067690849304;g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[ca>>2]-+g[Ta>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[X>>2]-+g[Y>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2]=+g[ca>>2]+ +g[Ta>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2]=+g[X>>2]+ +g[Y>>2];g[O>>2]=+g[J>>2]-+g[N>>2];g[Q>>2]=+g[J>>2]+ +g[N>>2];g[Aa>>2]=+g[Va>>2]-+g[Ua>>2];g[Ca>>2]=+g[Aa>>2]-+g[Ba>>2];g[C>>2]=+g[Aa>>2]+ +g[Ba>>2];g[Ea>>2]=+g[ea>>2]*.21850800514221191+ +g[ia>>2]+ +g[ka>>2]-+g[ja>>2];g[x>>2]=+g[Da>>2]+ +g[Ea>>2];g[D>>2]=+g[Ea>>2]-+g[Da>>2];g[y>>2]=+g[qa>>2]-+g[Qa>>2]*.21850800514221191-(+g[ra>>2]+ +g[sa>>2]);g[A>>2]=+g[y>>2]-+g[z>>2];g[E>>2]=+g[y>>2]+ +g[z>>2];g[B>>2]=+g[x>>2]+ +g[A>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[Ca>>2]-+g[B>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[Ca>>2]+ +g[B>>2];g[G>>2]=+g[E>>2]-+g[D>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=+g[G>>2]-+g[O>>2];g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[G>>2]+ +g[O>>2];g[F>>2]=+g[D>>2]+ +g[E>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2]=+g[C>>2]-+g[F>>2];g[c[p>>2]>>2]=+g[C>>2]+ +g[F>>2];g[P>>2]=+g[A>>2]-+g[x>>2];g[c[q>>2]>>2]=+g[P>>2]-+g[Q>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2]=+g[P>>2]+ +g[Q>>2];g[U>>2]=+g[S>>2]+ +g[T>>2];g[W>>2]=+g[T>>2]-+g[S>>2];g[Wa>>2]=+g[Ua>>2]+ +g[Va>>2];g[cb>>2]=+g[Wa>>2]+ +g[bb>>2];g[wa>>2]=+g[Wa>>2]-+g[bb>>2];g[la>>2]=+g[ia>>2]+ +g[ja>>2]+(+g[ka>>2]-+g[ea>>2]*.5720614194869995);g[ma>>2]=+g[ha>>2]+ +g[la>>2];g[xa>>2]=+g[ha>>2]-+g[la>>2];g[ta>>2]=+g[qa>>2]+ +g[ra>>2]+(+g[sa>>2]-+g[Qa>>2]*.5720614194869995);g[ua>>2]=+g[pa>>2]-+g[ta>>2];g[ya>>2]=+g[pa>>2]+ +g[ta>>2];g[va>>2]=+g[ma>>2]+ +g[ua>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2]=+g[cb>>2]-+g[va>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[cb>>2]+ +g[va>>2];g[V>>2]=+g[ya>>2]-+g[xa>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2]=+g[V>>2]-+g[W>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[V>>2]+ +g[W>>2];g[za>>2]=+g[xa>>2]+ +g[ya>>2];g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2]=+g[wa>>2]-+g[za>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[wa>>2]+ +g[za>>2];g[R>>2]=+g[ua>>2]-+g[ma>>2];g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2]=+g[R>>2]-+g[U>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[R>>2]+ +g[U>>2];c[db>>2]=(c[db>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=eb;return}function ps(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,6,5896);i=b;return}function qs(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0;Lc=i;i=i+912|0;n=Lc+896|0;o=Lc+892|0;p=Lc+888|0;q=Lc+884|0;r=Lc+880|0;s=Lc+876|0;t=Lc+872|0;Mc=Lc+868|0;u=Lc+864|0;v=Lc+860|0;Kc=Lc+704|0;Qb=Lc+700|0;bc=Lc+696|0;nb=Lc+692|0;Ea=Lc+688|0;Xb=Lc+684|0;cc=Lc+680|0;Nb=Lc+676|0;ib=Lc+672|0;qa=Lc+668|0;na=Lc+664|0;hb=Lc+660|0;Lb=Lc+656|0;Ja=Lc+652|0;jc=Lc+648|0;gc=Lc+644|0;Ia=Lc+640|0;sc=Lc+636|0;Ma=Lc+632|0;D=Lc+628|0;A=Lc+624|0;La=Lc+620|0;Cc=Lc+616|0;fb=Lc+612|0;ja=Lc+608|0;ga=Lc+604|0;Pa=Lc+600|0;$b=Lc+596|0;ac=Lc+592|0;Wb=Lc+588|0;Da=Lc+584|0;Tb=Lc+580|0;Ca=Lc+576|0;Ub=Lc+572|0;Vb=Lc+568|0;Rb=Lc+564|0;Sb=Lc+560|0;Dc=Lc+556|0;Gc=Lc+552|0;Jc=Lc+548|0;Mb=Lc+544|0;pa=Lc+540|0;oa=Lc+536|0;la=Lc+532|0;ma=Lc+528|0;Ec=Lc+524|0;Fc=Lc+520|0;Hc=Lc+516|0;Ic=Lc+512|0;w=Lc+508|0;Gb=Lc+504|0;Jb=Lc+500|0;Kb=Lc+496|0;ic=Lc+492|0;hc=Lc+488|0;ec=Lc+484|0;fc=Lc+480|0;Fa=Lc+476|0;Fb=Lc+472|0;Hb=Lc+468|0;Ib=Lc+464|0;kc=Lc+460|0;nc=Lc+456|0;qc=Lc+452|0;rc=Lc+448|0;C=Lc+444|0;B=Lc+440|0;y=Lc+436|0;z=Lc+432|0;lc=Lc+428|0;mc=Lc+424|0;oc=Lc+420|0;pc=Lc+416|0;uc=Lc+412|0;xc=Lc+408|0;Ac=Lc+404|0;Bc=Lc+400|0;ia=Lc+396|0;ha=Lc+392|0;G=Lc+388|0;fa=Lc+384|0;vc=Lc+380|0;wc=Lc+376|0;yc=Lc+372|0;zc=Lc+368|0;Yb=Lc+364|0;Pb=Lc+360|0;Zb=Lc+356|0;tc=Lc+352|0;Ob=Lc+348|0;_b=Lc+344|0;dc=Lc+340|0;Q=Lc+336|0;da=Lc+332|0;$=Lc+328|0;ca=Lc+324|0;W=Lc+320|0;aa=Lc+316|0;T=Lc+312|0;ba=Lc+308|0;H=Lc+304|0;I=Lc+300|0;wa=Lc+296|0;J=Lc+292|0;Aa=Lc+288|0;F=Lc+284|0;sa=Lc+280|0;ta=Lc+276|0;N=Lc+272|0;K=Lc+268|0;O=Lc+264|0;P=Lc+260|0;Z=Lc+256|0;_=Lc+252|0;U=Lc+248|0;V=Lc+244|0;R=Lc+240|0;S=Lc+236|0;ua=Lc+232|0;va=Lc+228|0;ya=Lc+224|0;za=Lc+220|0;x=Lc+216|0;E=Lc+212|0;ka=Lc+208|0;ra=Lc+204|0;Y=Lc+200|0;X=Lc+196|0;Ga=Lc+192|0;ea=Lc+188|0;L=Lc+184|0;M=Lc+180|0;xa=Lc+176|0;Ba=Lc+172|0;Ha=Lc+168|0;Eb=Lc+164|0;$a=Lc+160|0;_a=Lc+156|0;ab=Lc+152|0;Va=Lc+148|0;db=Lc+144|0;Sa=Lc+140|0;eb=Lc+136|0;qb=Lc+132|0;tb=Lc+128|0;ub=Lc+124|0;Bb=Lc+120|0;zb=Lc+116|0;Oa=Lc+112|0;kb=Lc+108|0;lb=Lc+104|0;xb=Lc+100|0;mb=Lc+96|0;Cb=Lc+92|0;Db=Lc+88|0;Ya=Lc+84|0;Za=Lc+80|0;Ta=Lc+76|0;Ua=Lc+72|0;Qa=Lc+68|0;Ra=Lc+64|0;ob=Lc+60|0;pb=Lc+56|0;rb=Lc+52|0;sb=Lc+48|0;Ka=Lc+44|0;Na=Lc+40|0;gb=Lc+36|0;jb=Lc+32|0;Xa=Lc+28|0;Wa=Lc+24|0;cb=Lc+20|0;bb=Lc+16|0;yb=Lc+12|0;Ab=Lc+8|0;vb=Lc+4|0;wb=Lc;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[Mc>>2]=k;c[u>>2]=l;c[v>>2]=m;g[Lc+856>>2]=1.9960534572601318;g[Lc+852>>2]=.06279052048921585;g[Lc+848>>2]=.1255810409784317;g[Lc+844>>2]=.9980267286300659;g[Lc+840>>2]=1.3690942525863647;g[Lc+836>>2]=.728968620300293;g[Lc+832>>2]=.963507354259491;g[Lc+828>>2]=.8763066530227661;g[Lc+824>>2]=.49737977981567383;g[Lc+820>>2]=.9685831665992737;g[Lc+816>>2]=1.457937240600586;g[Lc+812>>2]=.6845471262931824;g[Lc+808>>2]=1.7526133060455322;g[Lc+804>>2]=.4817536771297455;g[Lc+800>>2]=1.9371663331985474;g[Lc+796>>2]=.24868988990783691;g[Lc+792>>2]=.9921147227287292;g[Lc+788>>2]=.25066646933555603;g[Lc+784>>2]=1.8096541166305542;g[Lc+780>>2]=.4257792830467224;g[Lc+776>>2]=1.5410264730453491;g[Lc+772>>2]=.6374239921569824;g[Lc+768>>2]=1.6886558532714844;g[Lc+764>>2]=.5358268022537231;g[Lc+760>>2]=.8515585660934448;g[Lc+756>>2]=.9048270583152771;g[Lc+752>>2]=1.9842294454574585;g[Lc+748>>2]=.12533323466777802;g[Lc+744>>2]=1.2748479843139648;g[Lc+740>>2]=.7705132365226746;g[Lc+736>>2]=.8443279266357422;g[Lc+732>>2]=1.0716536045074463;g[Lc+728>>2]=.29389262199401855;g[Lc+724>>2]=.4755282700061798;g[Lc+720>>2]=.25;g[Lc+716>>2]=.5877852439880371;g[Lc+712>>2]=.9510565400123596;g[Lc+708>>2]=.55901700258255;c[Kc>>2]=c[Mc>>2];while(1){if((c[Kc>>2]|0)<=0)break;g[Qb>>2]=+g[c[n>>2]>>2];g[Ub>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Vb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Wb>>2]=+g[Ub>>2]-+g[Vb>>2];g[Da>>2]=+g[Ub>>2]+ +g[Vb>>2];g[Rb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Sb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Tb>>2]=+g[Rb>>2]-+g[Sb>>2];g[Ca>>2]=+g[Rb>>2]+ +g[Sb>>2];g[bc>>2]=(+g[Tb>>2]-+g[Wb>>2])*.55901700258255;g[nb>>2]=+g[Da>>2]*.9510565400123596-+g[Ca>>2]*.5877852439880371;g[Ea>>2]=+g[Ca>>2]*.9510565400123596+ +g[Da>>2]*.5877852439880371;g[Xb>>2]=+g[Tb>>2]+ +g[Wb>>2];g[cc>>2]=+g[Qb>>2]-+g[Xb>>2]*.25;g[Dc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ec>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Fc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Gc>>2]=+g[Ec>>2]-+g[Fc>>2];g[Hc>>2]=+g[c[o>>2]>>2];g[Ic>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Jc>>2]=+g[Hc>>2]+ +g[Ic>>2];g[Mb>>2]=+g[Gc>>2]-+g[Jc>>2];g[pa>>2]=+g[Ic>>2]-+g[Hc>>2];g[oa>>2]=+g[Ec>>2]+ +g[Fc>>2];g[Nb>>2]=+g[Dc>>2]+ +g[Mb>>2];g[ib>>2]=+g[pa>>2]*.4755282700061798-+g[oa>>2]*.29389262199401855;g[qa>>2]=+g[oa>>2]*.4755282700061798+ +g[pa>>2]*.29389262199401855;g[la>>2]=+g[Dc>>2]-+g[Mb>>2]*.25;g[ma>>2]=(+g[Gc>>2]+ +g[Jc>>2])*.55901700258255;g[na>>2]=+g[la>>2]+ +g[ma>>2];g[hb>>2]=+g[la>>2]-+g[ma>>2];g[w>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Fa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Fb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Gb>>2]=+g[Fa>>2]-+g[Fb>>2];g[Hb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[Ib>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Jb>>2]=+g[Hb>>2]-+g[Ib>>2];g[Kb>>2]=+g[Gb>>2]+ +g[Jb>>2];g[ic>>2]=+g[Hb>>2]+ +g[Ib>>2];g[hc>>2]=+g[Fa>>2]+ +g[Fb>>2];g[Lb>>2]=+g[w>>2]+ +g[Kb>>2];g[Ja>>2]=+g[ic>>2]*.4755282700061798-+g[hc>>2]*.29389262199401855;g[jc>>2]=+g[hc>>2]*.4755282700061798+ +g[ic>>2]*.29389262199401855;g[ec>>2]=(+g[Gb>>2]-+g[Jb>>2])*.55901700258255;g[fc>>2]=+g[w>>2]-+g[Kb>>2]*.25;g[gc>>2]=+g[ec>>2]+ +g[fc>>2];g[Ia>>2]=+g[fc>>2]-+g[ec>>2];g[kc>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[lc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[mc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[nc>>2]=+g[lc>>2]-+g[mc>>2];g[oc>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[pc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[qc>>2]=+g[oc>>2]+ +g[pc>>2];g[rc>>2]=+g[nc>>2]-+g[qc>>2];g[C>>2]=+g[pc>>2]-+g[oc>>2];g[B>>2]=+g[lc>>2]+ +g[mc>>2];g[sc>>2]=+g[kc>>2]+ +g[rc>>2];g[Ma>>2]=+g[C>>2]*.4755282700061798-+g[B>>2]*.29389262199401855;g[D>>2]=+g[B>>2]*.4755282700061798+ +g[C>>2]*.29389262199401855;g[y>>2]=+g[kc>>2]-+g[rc>>2]*.25;g[z>>2]=(+g[nc>>2]+ +g[qc>>2])*.55901700258255;g[A>>2]=+g[y>>2]+ +g[z>>2];g[La>>2]=+g[y>>2]-+g[z>>2];g[uc>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[vc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[wc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[xc>>2]=+g[vc>>2]-+g[wc>>2];g[yc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[zc>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ac>>2]=+g[yc>>2]-+g[zc>>2];g[Bc>>2]=+g[xc>>2]+ +g[Ac>>2];g[ia>>2]=+g[yc>>2]+ +g[zc>>2];g[ha>>2]=+g[vc>>2]+ +g[wc>>2];g[Cc>>2]=+g[uc>>2]+ +g[Bc>>2];g[fb>>2]=+g[ia>>2]*.4755282700061798-+g[ha>>2]*.29389262199401855;g[ja>>2]=+g[ha>>2]*.4755282700061798+ +g[ia>>2]*.29389262199401855;g[G>>2]=(+g[xc>>2]-+g[Ac>>2])*.55901700258255;g[fa>>2]=+g[uc>>2]-+g[Bc>>2]*.25;g[ga>>2]=+g[G>>2]+ +g[fa>>2];g[Pa>>2]=+g[fa>>2]-+g[G>>2];g[$b>>2]=+g[Nb>>2]-+g[Cc>>2];g[ac>>2]=+g[Lb>>2]-+g[sc>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[$b>>2]*.5877852439880371-+g[ac>>2]*.9510565400123596;g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2]=+g[ac>>2]*.5877852439880371+ +g[$b>>2]*.9510565400123596;g[Yb>>2]=+g[Qb>>2]+ +g[Xb>>2];g[tc>>2]=+g[Lb>>2]+ +g[sc>>2];g[Ob>>2]=+g[Cc>>2]+ +g[Nb>>2];g[Pb>>2]=(+g[tc>>2]-+g[Ob>>2])*.55901700258255;g[Zb>>2]=+g[tc>>2]+ +g[Ob>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*12<<2)>>2]=+g[Yb>>2]+ +g[Zb>>2];g[_b>>2]=+g[Yb>>2]-+g[Zb>>2]*.25;g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[Pb>>2]+ +g[_b>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2]=+g[_b>>2]-+g[Pb>>2];g[dc>>2]=+g[bc>>2]+ +g[cc>>2];g[O>>2]=+g[jc>>2]*1.0716536045074463-+g[gc>>2]*.8443279266357422;g[P>>2]=+g[A>>2]*.7705132365226746-+g[D>>2]*1.2748479843139648;g[Q>>2]=+g[O>>2]-+g[P>>2];g[da>>2]=+g[O>>2]+ +g[P>>2];g[Z>>2]=+g[na>>2]*.12533323466777802+ +g[qa>>2]*1.9842294454574585;g[_>>2]=+g[ga>>2]*.9048270583152771+ +g[ja>>2]*.8515585660934448;g[$>>2]=+g[Z>>2]-+g[_>>2];g[ca>>2]=+g[_>>2]+ +g[Z>>2];g[U>>2]=+g[gc>>2]*.5358268022537231+ +g[jc>>2]*1.6886558532714844;g[V>>2]=+g[A>>2]*.6374239921569824+ +g[D>>2]*1.5410264730453491;g[W>>2]=+g[U>>2]-+g[V>>2];g[aa>>2]=+g[U>>2]+ +g[V>>2];g[R>>2]=+g[ga>>2]*.4257792830467224-+g[ja>>2]*1.8096541166305542;g[S>>2]=+g[qa>>2]*.25066646933555603-+g[na>>2]*.9921147227287292;g[T>>2]=+g[R>>2]-+g[S>>2];g[ba>>2]=+g[R>>2]+ +g[S>>2];g[ua>>2]=+g[A>>2]*.8443279266357422+ +g[D>>2]*1.0716536045074463;g[va>>2]=+g[gc>>2]*.24868988990783691+ +g[jc>>2]*1.9371663331985474;g[H>>2]=+g[va>>2]+ +g[ua>>2];g[ya>>2]=+g[ga>>2]*.4817536771297455+ +g[ja>>2]*1.7526133060455322;g[za>>2]=+g[na>>2]*.6845471262931824+ +g[qa>>2]*1.457937240600586;g[I>>2]=+g[ya>>2]+ +g[za>>2];g[wa>>2]=+g[ua>>2]-+g[va>>2];g[J>>2]=+g[H>>2]+ +g[I>>2];g[Aa>>2]=+g[ya>>2]-+g[za>>2];g[x>>2]=+g[gc>>2]*.9685831665992737-+g[jc>>2]*.49737977981567383;g[E>>2]=+g[A>>2]*.5358268022537231-+g[D>>2]*1.6886558532714844;g[F>>2]=+g[x>>2]+ +g[E>>2];g[ka>>2]=+g[ga>>2]*.8763066530227661-+g[ja>>2]*.963507354259491;g[ra>>2]=+g[na>>2]*.728968620300293-+g[qa>>2]*1.3690942525863647;g[sa>>2]=+g[ka>>2]+ +g[ra>>2];g[ta>>2]=+g[F>>2]+ +g[sa>>2];g[N>>2]=+g[x>>2]-+g[E>>2];g[K>>2]=+g[ka>>2]-+g[ra>>2];g[c[p>>2]>>2]=+g[dc>>2]+ +g[ta>>2];g[c[q>>2]>>2]=-(+g[Ea>>2]+ +g[J>>2]);g[Y>>2]=(+g[W>>2]+ +g[T>>2])*.55901700258255;g[X>>2]=(+g[T>>2]-+g[W>>2])*.25+ +g[dc>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[Q>>2]*.9510565400123596+ +g[X>>2]+(+g[$>>2]*.5877852439880371+ +g[Y>>2]);g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2]=+g[$>>2]*.9510565400123596+ +g[X>>2]+-(+g[Q>>2]*.5877852439880371+ +g[Y>>2]);g[Ga>>2]=(+g[da>>2]+ +g[ca>>2])*.55901700258255;g[ea>>2]=(+g[ca>>2]-+g[da>>2])*.25+ +g[Ea>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2]=+g[aa>>2]*.5877852439880371+ +g[ba>>2]*.9510565400123596+ +g[ea>>2]-+g[Ga>>2];g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[ba>>2]*.5877852439880371+ +g[ea>>2]+(+g[Ga>>2]-+g[aa>>2]*.9510565400123596);g[L>>2]=+g[J>>2]*.25-+g[Ea>>2];g[M>>2]=(+g[I>>2]-+g[H>>2])*.55901700258255;g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=+g[K>>2]*.9510565400123596+ +g[L>>2]+-(+g[N>>2]*.5877852439880371+ +g[M>>2]);g[(c[q>>2]|0)+((c[t>>2]|0)*10<<2)>>2]=+g[N>>2]*.9510565400123596+ +g[K>>2]*.5877852439880371+ +g[L>>2]+ +g[M>>2];g[xa>>2]=+g[dc>>2]-+g[ta>>2]*.25;g[Ba>>2]=(+g[F>>2]-+g[sa>>2])*.55901700258255;g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[wa>>2]*.5877852439880371+ +g[xa>>2]+(+g[Aa>>2]*.9510565400123596-+g[Ba>>2]);g[(c[p>>2]|0)+((c[s>>2]|0)*10<<2)>>2]=+g[Ba>>2]+(+g[Aa>>2]*.5877852439880371+ +g[xa>>2])-+g[wa>>2]*.9510565400123596;g[Ha>>2]=+g[cc>>2]-+g[bc>>2];g[Cb>>2]=+g[fb>>2]*1.0716536045074463-+g[Pa>>2]*.8443279266357422;g[Db>>2]=+g[hb>>2]*.9980267286300659-+g[ib>>2]*.1255810409784317;g[Eb>>2]=+g[Cb>>2]+ +g[Db>>2];g[$a>>2]=+g[Db>>2]-+g[Cb>>2];g[Ya>>2]=+g[Ja>>2]*1.7526133060455322-+g[Ia>>2]*.4817536771297455;g[Za>>2]=+g[La>>2]*.9048270583152771+ +g[Ma>>2]*.8515585660934448;g[_a>>2]=+g[Ya>>2]+ +g[Za>>2];g[ab>>2]=+g[Ya>>2]-+g[Za>>2];g[Ta>>2]=+g[Pa>>2]*.5358268022537231+ +g[fb>>2]*1.6886558532714844;g[Ua>>2]=+g[hb>>2]*.06279052048921585+ +g[ib>>2]*1.9960534572601318;g[Va>>2]=+g[Ta>>2]+ +g[Ua>>2];g[db>>2]=+g[Ua>>2]-+g[Ta>>2];g[Qa>>2]=+g[Ia>>2]*.8763066530227661+ +g[Ja>>2]*.963507354259491;g[Ra>>2]=+g[Ma>>2]*1.8096541166305542-+g[La>>2]*.4257792830467224;g[Sa>>2]=+g[Qa>>2]+ +g[Ra>>2];g[eb>>2]=+g[Qa>>2]-+g[Ra>>2];g[ob>>2]=+g[Ma>>2]*1.9842294454574585-+g[La>>2]*.12533323466777802;g[pb>>2]=+g[Ia>>2]*.6845471262931824+ +g[Ja>>2]*1.457937240600586;g[qb>>2]=+g[ob>>2]-+g[pb>>2];g[rb>>2]=+g[ib>>2]*1.2748479843139648-+g[hb>>2]*.7705132365226746;g[sb>>2]=+g[Pa>>2]*.9980267286300659+ +g[fb>>2]*.1255810409784317;g[tb>>2]=+g[rb>>2]-+g[sb>>2];g[ub>>2]=+g[qb>>2]+ +g[tb>>2];g[Bb>>2]=+g[pb>>2]+ +g[ob>>2];g[zb>>2]=+g[sb>>2]+ +g[rb>>2];g[Ka>>2]=+g[Ia>>2]*.728968620300293-+g[Ja>>2]*1.3690942525863647;g[Na>>2]=+g[La>>2]*.9921147227287292+ +g[Ma>>2]*.25066646933555603;g[Oa>>2]=+g[Ka>>2]-+g[Na>>2];g[gb>>2]=+g[Pa>>2]*.06279052048921585-+g[fb>>2]*1.9960534572601318;g[jb>>2]=+g[hb>>2]*.6374239921569824+ +g[ib>>2]*1.5410264730453491;g[kb>>2]=+g[gb>>2]-+g[jb>>2];g[lb>>2]=+g[Oa>>2]+ +g[kb>>2];g[xb>>2]=+g[Ka>>2]+ +g[Na>>2];g[mb>>2]=+g[gb>>2]+ +g[jb>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[Ha>>2]+ +g[lb>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[ub>>2]-+g[nb>>2];g[Xa>>2]=(+g[Sa>>2]-+g[Va>>2])*.55901700258255;g[Wa>>2]=+g[Ha>>2]-(+g[Sa>>2]+ +g[Va>>2])*.25;g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2]=+g[Eb>>2]*.9510565400123596+ +g[Wa>>2]+-(+g[_a>>2]*.5877852439880371+ +g[Xa>>2]);g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[_a>>2]*.9510565400123596+ +g[Eb>>2]*.5877852439880371+ +g[Wa>>2]+ +g[Xa>>2];g[cb>>2]=(+g[ab>>2]+ +g[$a>>2])*.55901700258255;g[bb>>2]=(+g[$a>>2]-+g[ab>>2])*.25+ +g[nb>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[bb>>2]+(+g[db>>2]*.5877852439880371+ +g[cb>>2])-+g[eb>>2]*.9510565400123596;g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2]=+g[eb>>2]*.5877852439880371+ +g[bb>>2]+(+g[db>>2]*.9510565400123596-+g[cb>>2]);g[yb>>2]=+g[Ha>>2]-+g[lb>>2]*.25;g[Ab>>2]=(+g[Oa>>2]-+g[kb>>2])*.55901700258255;g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2]=+g[zb>>2]*.9510565400123596+ +g[yb>>2]+-(+g[Bb>>2]*.5877852439880371+ +g[Ab>>2]);g[(c[p>>2]|0)+((c[s>>2]|0)*11<<2)>>2]=+g[Bb>>2]*.9510565400123596+ +g[Ab>>2]+(+g[zb>>2]*.5877852439880371+ +g[yb>>2]);g[vb>>2]=+g[ub>>2]*.25;g[wb>>2]=(+g[qb>>2]-+g[tb>>2])*.55901700258255;g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2]=+g[mb>>2]*.9510565400123596-(+g[nb>>2]+ +g[vb>>2])+-(+g[xb>>2]*.5877852439880371+ +g[wb>>2]);g[(c[q>>2]|0)+((c[t>>2]|0)*11<<2)>>2]=+g[xb>>2]*.9510565400123596+ +g[mb>>2]*.5877852439880371+ +g[wb>>2]-(+g[nb>>2]+ +g[vb>>2]);c[Kc>>2]=(c[Kc>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=Lc;return}function rs(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,7,5944);i=b;return}function ss(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0;w=i;i=i+64|0;n=w+48|0;o=w+44|0;p=w+40|0;q=w+36|0;x=w+20|0;r=w+16|0;s=w+12|0;v=w+8|0;t=w+4|0;u=w;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[w+32>>2]=f;c[w+28>>2]=h;c[w+24>>2]=j;c[x>>2]=k;c[r>>2]=l;c[s>>2]=m;c[v>>2]=c[x>>2];while(1){if((c[v>>2]|0)<=0)break;g[t>>2]=+g[c[n>>2]>>2];g[u>>2]=+g[c[o>>2]>>2];g[c[p>>2]>>2]=+g[t>>2];g[c[q>>2]>>2]=-+g[u>>2];c[v>>2]=(c[v>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[r>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[r>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[s>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[s>>2]<<2)}i=w;return}function ts(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,8,5992);i=b;return}function us(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0;Jc=i;i=i+800|0;n=Jc+796|0;o=Jc+792|0;p=Jc+788|0;q=Jc+784|0;r=Jc+780|0;s=Jc+776|0;t=Jc+772|0;Kc=Jc+768|0;u=Jc+764|0;v=Jc+760|0;Ic=Jc+696|0;Fb=Jc+692|0;Ta=Jc+688|0;Ea=Jc+684|0;tb=Jc+680|0;kc=Jc+676|0;Sa=Jc+672|0;J=Jc+668|0;qb=Jc+664|0;uc=Jc+660|0;pa=Jc+656|0;Q=Jc+652|0;Ma=Jc+648|0;Dc=Jc+644|0;qa=Jc+640|0;N=Jc+636|0;Na=Jc+632|0;C=Jc+628|0;wa=Jc+624|0;da=Jc+620|0;hb=Jc+616|0;ha=Jc+612|0;xa=Jc+608|0;aa=Jc+604|0;gb=Jc+600|0;Ub=Jc+596|0;ta=Jc+592|0;Y=Jc+588|0;eb=Jc+584|0;$b=Jc+580|0;ua=Jc+576|0;V=Jc+572|0;db=Jc+568|0;w=Jc+564|0;sb=Jc+560|0;Eb=Jc+556|0;rb=Jc+552|0;Fa=Jc+548|0;Db=Jc+544|0;Ib=Jc+540|0;H=Jc+536|0;jc=Jc+532|0;I=Jc+528|0;Gb=Jc+524|0;Hb=Jc+520|0;Jb=Jc+516|0;ic=Jc+512|0;mc=Jc+508|0;sc=Jc+504|0;pc=Jc+500|0;rc=Jc+496|0;nc=Jc+492|0;oc=Jc+488|0;qc=Jc+484|0;tc=Jc+480|0;O=Jc+476|0;P=Jc+472|0;yc=Jc+468|0;Bc=Jc+464|0;xc=Jc+460|0;Ac=Jc+456|0;vc=Jc+452|0;wc=Jc+448|0;zc=Jc+444|0;Cc=Jc+440|0;L=Jc+436|0;M=Jc+432|0;ec=Jc+428|0;fa=Jc+424|0;dc=Jc+420|0;G=Jc+416|0;x=Jc+412|0;D=Jc+408|0;A=Jc+404|0;E=Jc+400|0;bc=Jc+396|0;cc=Jc+392|0;gc=Jc+388|0;hc=Jc+384|0;y=Jc+380|0;z=Jc+376|0;fc=Jc+372|0;B=Jc+368|0;ba=Jc+364|0;ca=Jc+360|0;F=Jc+356|0;ga=Jc+352|0;_=Jc+348|0;$=Jc+344|0;Gc=Jc+340|0;Zb=Jc+336|0;Lb=Jc+332|0;Yb=Jc+328|0;Pb=Jc+324|0;Vb=Jc+320|0;Sb=Jc+316|0;Wb=Jc+312|0;Hc=Jc+308|0;Kb=Jc+304|0;Nb=Jc+300|0;Ob=Jc+296|0;Qb=Jc+292|0;Rb=Jc+288|0;Mb=Jc+284|0;Tb=Jc+280|0;W=Jc+276|0;X=Jc+272|0;Xb=Jc+268|0;_b=Jc+264|0;T=Jc+260|0;U=Jc+256|0;Fc=Jc+252|0;ka=Jc+248|0;vb=Jc+244|0;xb=Jc+240|0;ja=Jc+236|0;ob=Jc+232|0;na=Jc+228|0;wb=Jc+224|0;lc=Jc+220|0;Ec=Jc+216|0;pb=Jc+212|0;ub=Jc+208|0;ac=Jc+204|0;ia=Jc+200|0;la=Jc+196|0;ma=Jc+192|0;Pa=Jc+188|0;kb=Jc+184|0;ab=Jc+180|0;cb=Jc+176|0;jb=Jc+172|0;Za=Jc+168|0;nb=Jc+164|0;bb=Jc+160|0;La=Jc+156|0;Oa=Jc+152|0;_a=Jc+148|0;$a=Jc+144|0;fb=Jc+140|0;ib=Jc+136|0;lb=Jc+132|0;mb=Jc+128|0;sa=Jc+124|0;Aa=Jc+120|0;Bb=Jc+116|0;Qa=Jc+112|0;za=Jc+108|0;yb=Jc+104|0;Da=Jc+100|0;Cb=Jc+96|0;oa=Jc+92|0;ra=Jc+88|0;zb=Jc+84|0;Ab=Jc+80|0;va=Jc+76|0;ya=Jc+72|0;Ba=Jc+68|0;Ca=Jc+64|0;S=Jc+60|0;Ha=Jc+56|0;Wa=Jc+52|0;Ya=Jc+48|0;Ga=Jc+44|0;Xa=Jc+40|0;Ka=Jc+36|0;Ra=Jc+32|0;K=Jc+28|0;R=Jc+24|0;Ua=Jc+20|0;Va=Jc+16|0;Z=Jc+12|0;ea=Jc+8|0;Ia=Jc+4|0;Ja=Jc;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[Kc>>2]=k;c[u>>2]=l;c[v>>2]=m;g[Jc+756>>2]=.4713967442512512;g[Jc+752>>2]=.8819212913513184;g[Jc+748>>2]=.6343932747840881;g[Jc+744>>2]=.7730104327201843;g[Jc+740>>2]=.290284663438797;g[Jc+736>>2]=.9569403529167175;g[Jc+732>>2]=.9951847195625305;g[Jc+728>>2]=.0980171412229538;g[Jc+724>>2]=.5555702447891235;g[Jc+720>>2]=.8314695954322815;g[Jc+716>>2]=.19509032368659973;g[Jc+712>>2]=.9807852506637573;g[Jc+708>>2]=.3826834261417389;g[Jc+704>>2]=.9238795042037964;g[Jc+700>>2]=.7071067690849304;c[Ic>>2]=c[Kc>>2];while(1){if((c[Ic>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[sb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Fa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Db>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[Eb>>2]=(+g[Fa>>2]-+g[Db>>2])*.7071067690849304;g[rb>>2]=(+g[Fa>>2]+ +g[Db>>2])*.7071067690849304;g[Fb>>2]=+g[w>>2]+ +g[Eb>>2];g[Ta>>2]=+g[sb>>2]-+g[rb>>2];g[Ea>>2]=+g[w>>2]-+g[Eb>>2];g[tb>>2]=+g[rb>>2]+ +g[sb>>2];g[Gb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Hb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Ib>>2]=+g[Gb>>2]*.9238795042037964-+g[Hb>>2]*.3826834261417389;g[H>>2]=+g[Gb>>2]*.3826834261417389+ +g[Hb>>2]*.9238795042037964;g[Jb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[ic>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[jc>>2]=+g[Jb>>2]*.3826834261417389-+g[ic>>2]*.9238795042037964;g[I>>2]=+g[Jb>>2]*.9238795042037964+ +g[ic>>2]*.3826834261417389;g[kc>>2]=+g[Ib>>2]+ +g[jc>>2];g[Sa>>2]=+g[jc>>2]-+g[Ib>>2];g[J>>2]=+g[H>>2]-+g[I>>2];g[qb>>2]=+g[H>>2]+ +g[I>>2];g[mc>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[sc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[nc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[oc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[pc>>2]=(+g[nc>>2]-+g[oc>>2])*.7071067690849304;g[rc>>2]=(+g[nc>>2]+ +g[oc>>2])*.7071067690849304;g[qc>>2]=+g[mc>>2]+ +g[pc>>2];g[tc>>2]=+g[rc>>2]+ +g[sc>>2];g[uc>>2]=+g[qc>>2]*.9807852506637573-+g[tc>>2]*.19509032368659973;g[pa>>2]=+g[qc>>2]*.19509032368659973+ +g[tc>>2]*.9807852506637573;g[O>>2]=+g[sc>>2]-+g[rc>>2];g[P>>2]=+g[mc>>2]-+g[pc>>2];g[Q>>2]=+g[O>>2]*.8314695954322815-+g[P>>2]*.5555702447891235;g[Ma>>2]=+g[P>>2]*.8314695954322815+ +g[O>>2]*.5555702447891235;g[yc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[Bc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[vc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[wc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[xc>>2]=(+g[vc>>2]-+g[wc>>2])*.7071067690849304;g[Ac>>2]=(+g[vc>>2]+ +g[wc>>2])*.7071067690849304;g[zc>>2]=+g[xc>>2]-+g[yc>>2];g[Cc>>2]=+g[Ac>>2]+ +g[Bc>>2];g[Dc>>2]=+g[zc>>2]*.9807852506637573+ +g[Cc>>2]*.19509032368659973;g[qa>>2]=+g[zc>>2]*.19509032368659973-+g[Cc>>2]*.9807852506637573;g[L>>2]=+g[Bc>>2]-+g[Ac>>2];g[M>>2]=+g[xc>>2]+ +g[yc>>2];g[N>>2]=+g[L>>2]*.8314695954322815-+g[M>>2]*.5555702447891235;g[Na>>2]=+g[M>>2]*.8314695954322815+ +g[L>>2]*.5555702447891235;g[ec>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[fa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[bc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[cc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[dc>>2]=(+g[bc>>2]-+g[cc>>2])*.7071067690849304;g[G>>2]=(+g[bc>>2]+ +g[cc>>2])*.7071067690849304;g[gc>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[hc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[x>>2]=+g[gc>>2]*.9238795042037964-+g[hc>>2]*.3826834261417389;g[D>>2]=+g[gc>>2]*.3826834261417389+ +g[hc>>2]*.9238795042037964;g[y>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[z>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[A>>2]=+g[y>>2]*.3826834261417389-+g[z>>2]*.9238795042037964;g[E>>2]=+g[y>>2]*.9238795042037964+ +g[z>>2]*.3826834261417389;g[fc>>2]=+g[dc>>2]-+g[ec>>2];g[B>>2]=+g[x>>2]+ +g[A>>2];g[C>>2]=+g[fc>>2]+ +g[B>>2];g[wa>>2]=+g[fc>>2]-+g[B>>2];g[ba>>2]=+g[A>>2]-+g[x>>2];g[ca>>2]=+g[fa>>2]-+g[G>>2];g[da>>2]=+g[ba>>2]-+g[ca>>2];g[hb>>2]=+g[ba>>2]+ +g[ca>>2];g[F>>2]=+g[D>>2]+ +g[E>>2];g[ga>>2]=+g[G>>2]+ +g[fa>>2];g[ha>>2]=+g[F>>2]+ +g[ga>>2];g[xa>>2]=+g[ga>>2]-+g[F>>2];g[_>>2]=+g[dc>>2]+ +g[ec>>2];g[$>>2]=+g[D>>2]-+g[E>>2];g[aa>>2]=+g[_>>2]+ +g[$>>2];g[gb>>2]=+g[$>>2]-+g[_>>2];g[Gc>>2]=+g[c[o>>2]>>2];g[Zb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Hc>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Kb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[Lb>>2]=(+g[Hc>>2]-+g[Kb>>2])*.7071067690849304;g[Yb>>2]=(+g[Hc>>2]+ +g[Kb>>2])*.7071067690849304;g[Nb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ob>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Pb>>2]=+g[Nb>>2]*.9238795042037964-+g[Ob>>2]*.3826834261417389;g[Vb>>2]=+g[Nb>>2]*.3826834261417389+ +g[Ob>>2]*.9238795042037964;g[Qb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Rb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Sb>>2]=+g[Qb>>2]*.3826834261417389-+g[Rb>>2]*.9238795042037964;g[Wb>>2]=+g[Qb>>2]*.9238795042037964+ +g[Rb>>2]*.3826834261417389;g[Mb>>2]=+g[Gc>>2]+ +g[Lb>>2];g[Tb>>2]=+g[Pb>>2]+ +g[Sb>>2];g[Ub>>2]=+g[Mb>>2]+ +g[Tb>>2];g[ta>>2]=+g[Mb>>2]-+g[Tb>>2];g[W>>2]=+g[Sb>>2]-+g[Pb>>2];g[X>>2]=+g[Zb>>2]-+g[Yb>>2];g[Y>>2]=+g[W>>2]-+g[X>>2];g[eb>>2]=+g[W>>2]+ +g[X>>2];g[Xb>>2]=+g[Vb>>2]+ +g[Wb>>2];g[_b>>2]=+g[Yb>>2]+ +g[Zb>>2];g[$b>>2]=+g[Xb>>2]+ +g[_b>>2];g[ua>>2]=+g[_b>>2]-+g[Xb>>2];g[T>>2]=+g[Gc>>2]-+g[Lb>>2];g[U>>2]=+g[Vb>>2]-+g[Wb>>2];g[V>>2]=+g[T>>2]-+g[U>>2];g[db>>2]=+g[T>>2]+ +g[U>>2];g[lc>>2]=+g[Fb>>2]+ +g[kc>>2];g[Ec>>2]=+g[uc>>2]+ +g[Dc>>2];g[Fc>>2]=+g[lc>>2]-+g[Ec>>2];g[ka>>2]=+g[lc>>2]+ +g[Ec>>2];g[pb>>2]=+g[qa>>2]-+g[pa>>2];g[ub>>2]=+g[qb>>2]+ +g[tb>>2];g[vb>>2]=+g[pb>>2]-+g[ub>>2];g[xb>>2]=+g[pb>>2]+ +g[ub>>2];g[ac>>2]=+g[Ub>>2]*.0980171412229538+ +g[$b>>2]*.9951847195625305;g[ia>>2]=+g[C>>2]*.0980171412229538-+g[ha>>2]*.9951847195625305;g[ja>>2]=+g[ac>>2]+ +g[ia>>2];g[ob>>2]=+g[ia>>2]-+g[ac>>2];g[la>>2]=+g[Ub>>2]*.9951847195625305-+g[$b>>2]*.0980171412229538;g[ma>>2]=+g[C>>2]*.9951847195625305+ +g[ha>>2]*.0980171412229538;g[na>>2]=+g[la>>2]+ +g[ma>>2];g[wb>>2]=+g[ma>>2]-+g[la>>2];g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2]=+g[Fc>>2]-+g[ja>>2];g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2]=+g[wb>>2]-+g[xb>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2]=+g[Fc>>2]+ +g[ja>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2]=+g[wb>>2]+ +g[xb>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*15<<2)>>2]=+g[ka>>2]-+g[na>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*15<<2)>>2]=+g[ob>>2]-+g[vb>>2];g[c[p>>2]>>2]=+g[ka>>2]+ +g[na>>2];g[c[q>>2]>>2]=+g[ob>>2]+ +g[vb>>2];g[La>>2]=+g[Ea>>2]+ +g[J>>2];g[Oa>>2]=+g[Ma>>2]-+g[Na>>2];g[Pa>>2]=+g[La>>2]+ +g[Oa>>2];g[kb>>2]=+g[La>>2]-+g[Oa>>2];g[_a>>2]=+g[Q>>2]+ +g[N>>2];g[$a>>2]=+g[Sa>>2]+ +g[Ta>>2];g[ab>>2]=+g[_a>>2]-+g[$a>>2];g[cb>>2]=+g[_a>>2]+ +g[$a>>2];g[fb>>2]=+g[db>>2]*.9569403529167175+ +g[eb>>2]*.290284663438797;g[ib>>2]=+g[gb>>2]*.9569403529167175-+g[hb>>2]*.290284663438797;g[jb>>2]=+g[fb>>2]+ +g[ib>>2];g[Za>>2]=+g[ib>>2]-+g[fb>>2];g[lb>>2]=+g[gb>>2]*.290284663438797+ +g[hb>>2]*.9569403529167175;g[mb>>2]=+g[eb>>2]*.9569403529167175-+g[db>>2]*.290284663438797;g[nb>>2]=+g[lb>>2]-+g[mb>>2];g[bb>>2]=+g[mb>>2]+ +g[lb>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*14<<2)>>2]=+g[Pa>>2]-+g[jb>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*14<<2)>>2]=+g[bb>>2]-+g[cb>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[Pa>>2]+ +g[jb>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[bb>>2]+ +g[cb>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2]=+g[kb>>2]-+g[nb>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2]=+g[Za>>2]-+g[ab>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2]=+g[kb>>2]+ +g[nb>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2]=+g[Za>>2]+ +g[ab>>2];g[oa>>2]=+g[Fb>>2]-+g[kc>>2];g[ra>>2]=+g[pa>>2]+ +g[qa>>2];g[sa>>2]=+g[oa>>2]+ +g[ra>>2];g[Aa>>2]=+g[oa>>2]-+g[ra>>2];g[zb>>2]=+g[Dc>>2]-+g[uc>>2];g[Ab>>2]=+g[tb>>2]-+g[qb>>2];g[Bb>>2]=+g[zb>>2]-+g[Ab>>2];g[Qa>>2]=+g[zb>>2]+ +g[Ab>>2];g[va>>2]=+g[ta>>2]*.7730104327201843+ +g[ua>>2]*.6343932747840881;g[ya>>2]=+g[wa>>2]*.7730104327201843-+g[xa>>2]*.6343932747840881;g[za>>2]=+g[va>>2]+ +g[ya>>2];g[yb>>2]=+g[ya>>2]-+g[va>>2];g[Ba>>2]=+g[wa>>2]*.6343932747840881+ +g[xa>>2]*.7730104327201843;g[Ca>>2]=+g[ua>>2]*.7730104327201843-+g[ta>>2]*.6343932747840881;g[Da>>2]=+g[Ba>>2]-+g[Ca>>2];g[Cb>>2]=+g[Ca>>2]+ +g[Ba>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*12<<2)>>2]=+g[sa>>2]-+g[za>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*12<<2)>>2]=+g[Cb>>2]-+g[Qa>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[sa>>2]+ +g[za>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[Cb>>2]+ +g[Qa>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*11<<2)>>2]=+g[Aa>>2]-+g[Da>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*11<<2)>>2]=+g[yb>>2]-+g[Bb>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[Aa>>2]+ +g[Da>>2];g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[yb>>2]+ +g[Bb>>2];g[K>>2]=+g[Ea>>2]-+g[J>>2];g[R>>2]=+g[N>>2]-+g[Q>>2];g[S>>2]=+g[K>>2]+ +g[R>>2];g[Ha>>2]=+g[K>>2]-+g[R>>2];g[Ua>>2]=+g[Sa>>2]-+g[Ta>>2];g[Va>>2]=+g[Ma>>2]+ +g[Na>>2];g[Wa>>2]=+g[Ua>>2]-+g[Va>>2];g[Ya>>2]=+g[Va>>2]+ +g[Ua>>2];g[Z>>2]=+g[V>>2]*.8819212913513184+ +g[Y>>2]*.4713967442512512;g[ea>>2]=+g[aa>>2]*.8819212913513184+ +g[da>>2]*.4713967442512512;g[Ga>>2]=+g[Z>>2]-+g[ea>>2];g[Xa>>2]=+g[Z>>2]+ +g[ea>>2];g[Ia>>2]=+g[da>>2]*.8819212913513184-+g[aa>>2]*.4713967442512512;g[Ja>>2]=+g[Y>>2]*.8819212913513184-+g[V>>2]*.4713967442512512;g[Ka>>2]=+g[Ia>>2]-+g[Ja>>2];g[Ra>>2]=+g[Ja>>2]+ +g[Ia>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*13<<2)>>2]=+g[S>>2]-+g[Ga>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*13<<2)>>2]=+g[Ra>>2]-+g[Wa>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[S>>2]+ +g[Ga>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[Ra>>2]+ +g[Wa>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*10<<2)>>2]=+g[Ha>>2]-+g[Ka>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*10<<2)>>2]=+g[Ya>>2]-+g[Xa>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[Ha>>2]+ +g[Ka>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=-(+g[Xa>>2]+ +g[Ya>>2]);c[Ic>>2]=(c[Ic>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=Jc;return}function vs(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,9,6040);i=b;return}function ws(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0;A=i;i=i+80|0;n=A+64|0;o=A+60|0;p=A+56|0;q=A+52|0;r=A+48|0;s=A+44|0;B=A+36|0;t=A+32|0;u=A+28|0;z=A+16|0;v=A+12|0;w=A+8|0;x=A+4|0;y=A;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[A+40>>2]=j;c[B>>2]=k;c[t>>2]=l;c[u>>2]=m;g[A+24>>2]=.5;g[A+20>>2]=.8660253882408142;c[z>>2]=c[B>>2];while(1){if((c[z>>2]|0)<=0)break;g[v>>2]=+g[c[n>>2]>>2];g[w>>2]=+g[c[o>>2]>>2];g[x>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[v>>2]-+g[y>>2];g[c[q>>2]>>2]=-((+g[w>>2]+ +g[x>>2])*.8660253882408142);g[c[p>>2]>>2]=+g[y>>2]*.5+ +g[v>>2];c[z>>2]=(c[z>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2)}i=A;return}function xs(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,10,6088);i=b;return}function ys(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0;D=i;i=i+80|0;n=D+68|0;o=D+64|0;p=D+60|0;q=D+56|0;r=D+52|0;s=D+48|0;t=D+44|0;E=D+40|0;u=D+36|0;v=D+32|0;C=D+24|0;w=D+20|0;B=D+16|0;z=D+12|0;A=D+8|0;x=D+4|0;y=D;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[E>>2]=k;c[u>>2]=l;c[v>>2]=m;g[D+28>>2]=.7071067690849304;c[C>>2]=c[E>>2];while(1){if((c[C>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[B>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[x>>2]=+g[c[o>>2]>>2];g[y>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[z>>2]=(+g[x>>2]-+g[y>>2])*.7071067690849304;g[A>>2]=(+g[x>>2]+ +g[y>>2])*.7071067690849304;g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[w>>2]-+g[z>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[B>>2]-+g[A>>2];g[c[p>>2]>>2]=+g[w>>2]+ +g[z>>2];g[c[q>>2]>>2]=-(+g[A>>2]+ +g[B>>2]);c[C>>2]=(c[C>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2)}i=D;return}function zs(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,11,6136);i=b;return}function As(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0;J=i;i=i+112|0;n=J+104|0;o=J+100|0;p=J+96|0;q=J+92|0;r=J+88|0;s=J+84|0;t=J+80|0;K=J+76|0;u=J+72|0;v=J+68|0;I=J+48|0;D=J+44|0;y=J+40|0;B=J+36|0;E=J+32|0;H=J+28|0;G=J+24|0;C=J+20|0;F=J+16|0;w=J+12|0;x=J+8|0;z=J+4|0;A=J;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[K>>2]=k;c[u>>2]=l;c[v>>2]=m;g[J+64>>2]=.25;g[J+60>>2]=.55901700258255;g[J+56>>2]=.9510565400123596;g[J+52>>2]=.5877852439880371;c[I>>2]=c[K>>2];while(1){if((c[I>>2]|0)<=0)break;g[D>>2]=+g[c[n>>2]>>2];g[w>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[x>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[z>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[A>>2]=+g[c[o>>2]>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[E>>2]=+g[y>>2]+ +g[B>>2];g[H>>2]=+g[z>>2]+ +g[A>>2];g[G>>2]=+g[w>>2]+ +g[x>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[D>>2]+ +g[E>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[G>>2]*.5877852439880371-+g[H>>2]*.9510565400123596;g[c[q>>2]>>2]=-(+g[G>>2]*.9510565400123596+ +g[H>>2]*.5877852439880371);g[C>>2]=(+g[y>>2]-+g[B>>2])*.55901700258255;g[F>>2]=+g[D>>2]-+g[E>>2]*.25;g[c[p>>2]>>2]=+g[C>>2]+ +g[F>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[F>>2]-+g[C>>2];c[I>>2]=(c[I>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=J;return}function Bs(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,12,6184);i=b;return}function Cs(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0;Ah=i;i=i+1904|0;n=Ah+1900|0;o=Ah+1896|0;p=Ah+1892|0;q=Ah+1888|0;r=Ah+1884|0;s=Ah+1880|0;t=Ah+1876|0;Bh=Ah+1872|0;u=Ah+1868|0;v=Ah+1864|0;zh=Ah+1736|0;lh=Ah+1732|0;Tb=Ah+1728|0;wc=Ah+1724|0;Ve=Ah+1720|0;uh=Ah+1716|0;Ub=Ah+1712|0;tc=Ah+1708|0;We=Ah+1704|0;ch=Ah+1700|0;Sb=Ah+1696|0;Af=Ah+1692|0;Pf=Ah+1688|0;qc=Ah+1684|0;Ue=Ah+1680|0;xf=Ah+1676|0;jg=Ah+1672|0;pb=Ah+1668|0;Kc=Ah+1664|0;ud=Ah+1660|0;se=Ah+1656|0;Va=Ah+1652|0;Oc=Ah+1648|0;qd=Ah+1644|0;nf=Ah+1640|0;Lg=Ah+1636|0;zc=Ah+1632|0;dd=Ah+1628|0;af=Ah+1624|0;Sg=Ah+1620|0;yc=Ah+1616|0;ad=Ah+1612|0;bf=Ah+1608|0;C=Ah+1604|0;Xb=Ah+1600|0;Kd=Ah+1596|0;_e=Ah+1592|0;ha=Ah+1588|0;Yb=Ah+1584|0;Hd=Ah+1580|0;Ze=Ah+1576|0;xa=Ah+1572|0;Dc=Ah+1568|0;$d=Ah+1564|0;kf=Ah+1560|0;ba=Ah+1556|0;Hc=Ah+1552|0;Qd=Ah+1548|0;ff=Ah+1544|0;Ib=Ah+1540|0;Nc=Ah+1536|0;xd=Ah+1532|0;pe=Ah+1528|0;Mb=Ah+1524|0;Lc=Ah+1520|0;nd=Ah+1516|0;re=Ah+1512|0;S=Ah+1508|0;Gc=Ah+1504|0;ce=Ah+1500|0;gf=Ah+1496|0;W=Ah+1492|0;Ec=Ah+1488|0;Xd=Ah+1484|0;jf=Ah+1480|0;dh=Ah+1476|0;jh=Ah+1472|0;gh=Ah+1468|0;ih=Ah+1464|0;eh=Ah+1460|0;fh=Ah+1456|0;hh=Ah+1452|0;kh=Ah+1448|0;uc=Ah+1444|0;vc=Ah+1440|0;ph=Ah+1436|0;sh=Ah+1432|0;oh=Ah+1428|0;rh=Ah+1424|0;mh=Ah+1420|0;nh=Ah+1416|0;qh=Ah+1412|0;th=Ah+1408|0;rc=Ah+1404|0;sc=Ah+1400|0;w=Ah+1396|0;vf=Ah+1392|0;Xc=Ah+1388|0;uf=Ah+1384|0;zg=Ah+1380|0;nc=Ah+1376|0;ah=Ah+1372|0;oc=Ah+1368|0;Fa=Ah+1364|0;Ob=Ah+1360|0;of=Ah+1356|0;xg=Ah+1352|0;Ag=Ah+1348|0;$g=Ah+1344|0;ee=Ah+1340|0;bh=Ah+1336|0;wg=Ah+1332|0;zf=Ah+1328|0;mc=Ah+1324|0;pc=Ah+1320|0;tf=Ah+1316|0;wf=Ah+1312|0;Ia=Ah+1308|0;Ta=Ah+1304|0;Ha=Ah+1300|0;Sa=Ah+1296|0;Ma=Ah+1292|0;Nb=Ah+1288|0;Pa=Ah+1284|0;Qa=Ah+1280|0;ea=Ah+1276|0;Ga=Ah+1272|0;Ka=Ah+1268|0;La=Ah+1264|0;Na=Ah+1260|0;Oa=Ah+1256|0;Ja=Ah+1252|0;ob=Ah+1248|0;sd=Ah+1244|0;td=Ah+1240|0;Ra=Ah+1236|0;Ua=Ah+1232|0;od=Ah+1228|0;pd=Ah+1224|0;xh=Ah+1220|0;Qg=Ah+1216|0;Cg=Ah+1212|0;Pg=Ah+1208|0;Gg=Ah+1204|0;Mg=Ah+1200|0;Jg=Ah+1196|0;Ng=Ah+1192|0;yh=Ah+1188|0;Bg=Ah+1184|0;Eg=Ah+1180|0;Fg=Ah+1176|0;Hg=Ah+1172|0;Ig=Ah+1168|0;Dg=Ah+1164|0;Kg=Ah+1160|0;bd=Ah+1156|0;cd=Ah+1152|0;Og=Ah+1148|0;Rg=Ah+1144|0;_c=Ah+1140|0;$c=Ah+1136|0;Xg=Ah+1132|0;fa=Ah+1128|0;Wg=Ah+1124|0;G=Ah+1120|0;x=Ah+1116|0;D=Ah+1112|0;A=Ah+1108|0;E=Ah+1104|0;Ug=Ah+1100|0;Vg=Ah+1096|0;Zg=Ah+1092|0;_g=Ah+1088|0;y=Ah+1084|0;z=Ah+1080|0;Yg=Ah+1076|0;B=Ah+1072|0;Id=Ah+1068|0;Jd=Ah+1064|0;F=Ah+1060|0;ga=Ah+1056|0;fd=Ah+1052|0;Gd=Ah+1048|0;la=Ah+1044|0;$=Ah+1040|0;oa=Ah+1036|0;_=Ah+1032|0;sa=Ah+1028|0;X=Ah+1024|0;va=Ah+1020|0;Y=Ah+1016|0;ma=Ah+1012|0;na=Ah+1008|0;qa=Ah+1004|0;ra=Ah+1e3|0;ta=Ah+996|0;ua=Ah+992|0;pa=Ah+988|0;wa=Ah+984|0;Zd=Ah+980|0;_d=Ah+976|0;Z=Ah+972|0;aa=Ah+968|0;Od=Ah+964|0;Pd=Ah+960|0;ub=Ah+956|0;ld=Ah+952|0;xb=Ah+948|0;kd=Ah+944|0;Db=Ah+940|0;id=Ah+936|0;Gb=Ah+932|0;hd=Ah+928|0;qb=Ah+924|0;wb=Ah+920|0;tb=Ah+916|0;vb=Ah+912|0;rb=Ah+908|0;sb=Ah+904|0;Cb=Ah+900|0;Fb=Ah+896|0;Bb=Ah+892|0;Eb=Ah+888|0;zb=Ah+884|0;Ab=Ah+880|0;yb=Ah+876|0;Hb=Ah+872|0;vd=Ah+868|0;wd=Ah+864|0;Kb=Ah+860|0;Lb=Ah+856|0;jd=Ah+852|0;md=Ah+848|0;Ca=Ah+844|0;Vd=Ah+840|0;H=Ah+836|0;Ud=Ah+832|0;N=Ah+828|0;Sd=Ah+824|0;Q=Ah+820|0;Rd=Ah+816|0;ya=Ah+812|0;Ea=Ah+808|0;Ba=Ah+804|0;Da=Ah+800|0;za=Ah+796|0;Aa=Ah+792|0;M=Ah+788|0;P=Ah+784|0;L=Ah+780|0;O=Ah+776|0;J=Ah+772|0;K=Ah+768|0;I=Ah+764|0;R=Ah+760|0;ae=Ah+756|0;be=Ah+752|0;U=Ah+748|0;V=Ah+744|0;Td=Ah+740|0;Wd=Ah+736|0;wh=Ah+732|0;bb=Ah+728|0;yf=Ah+724|0;cg=Ah+720|0;ja=Ah+716|0;bg=Ah+712|0;lb=Ah+708|0;Pb=Ah+704|0;da=Ah+700|0;$a=Ah+696|0;eb=Ah+692|0;rf=Ah+688|0;ib=Ah+684|0;Qb=Ah+680|0;Xa=Ah+676|0;_a=Ah+672|0;vh=Ah+668|0;sf=Ah+664|0;Tg=Ah+660|0;ia=Ah+656|0;jb=Ah+652|0;kb=Ah+648|0;T=Ah+644|0;ca=Ah+640|0;cb=Ah+636|0;db=Ah+632|0;gb=Ah+628|0;hb=Ah+624|0;Jb=Ah+620|0;Wa=Ah+616|0;ka=Ah+612|0;Ya=Ah+608|0;_f=Ah+604|0;$f=Ah+600|0;Za=Ah+596|0;ab=Ah+592|0;qf=Ah+588|0;Zf=Ah+584|0;fb=Ah+580|0;mb=Ah+576|0;eg=Ah+572|0;fg=Ah+568|0;nb=Ah+564|0;Rb=Ah+560|0;ag=Ah+556|0;dg=Ah+552|0;Ye=Ah+548|0;Ae=Ah+544|0;Qf=Ah+540|0;Wf=Ah+536|0;df=Ah+532|0;Vf=Ah+528|0;Ke=Ah+524|0;Ne=Ah+520|0;mf=Ah+516|0;ye=Ah+512|0;De=Ah+508|0;Nf=Ah+504|0;He=Ah+500|0;Oe=Ah+496|0;ue=Ah+492|0;xe=Ah+488|0;Xe=Ah+484|0;Of=Ah+480|0;$e=Ah+476|0;cf=Ah+472|0;Ie=Ah+468|0;Je=Ah+464|0;hf=Ah+460|0;lf=Ah+456|0;Be=Ah+452|0;Ce=Ah+448|0;Fe=Ah+444|0;Ge=Ah+440|0;qe=Ah+436|0;te=Ah+432|0;ef=Ah+428|0;ve=Ah+424|0;Sf=Ah+420|0;Tf=Ah+416|0;we=Ah+412|0;ze=Ah+408|0;Mf=Ah+404|0;Rf=Ah+400|0;Ee=Ah+396|0;Le=Ah+392|0;Yf=Ah+388|0;yg=Ah+384|0;Me=Ah+380|0;pf=Ah+376|0;Uf=Ah+372|0;Xf=Ah+368|0;Wb=Ah+364|0;Wc=Ah+360|0;kg=Ah+356|0;qg=Ah+352|0;Bc=Ah+348|0;pg=Ah+344|0;gc=Ah+340|0;jc=Ah+336|0;Jc=Ah+332|0;Uc=Ah+328|0;$b=Ah+324|0;hg=Ah+320|0;dc=Ah+316|0;kc=Ah+312|0;Qc=Ah+308|0;Tc=Ah+304|0;Vb=Ah+300|0;ig=Ah+296|0;xc=Ah+292|0;Ac=Ah+288|0;ec=Ah+284|0;fc=Ah+280|0;Fc=Ah+276|0;Ic=Ah+272|0;Zb=Ah+268|0;_b=Ah+264|0;bc=Ah+260|0;cc=Ah+256|0;Mc=Ah+252|0;Pc=Ah+248|0;Cc=Ah+244|0;Rc=Ah+240|0;mg=Ah+236|0;ng=Ah+232|0;Sc=Ah+228|0;Vc=Ah+224|0;gg=Ah+220|0;lg=Ah+216|0;ac=Ah+212|0;hc=Ah+208|0;sg=Ah+204|0;tg=Ah+200|0;ic=Ah+196|0;lc=Ah+192|0;og=Ah+188|0;rg=Ah+184|0;Zc=Ah+180|0;Fd=Ah+176|0;Cf=Ah+172|0;If=Ah+168|0;Md=Ah+164|0;Hf=Ah+160|0;oe=Ah+156|0;Se=Ah+152|0;gd=Ah+148|0;Dd=Ah+144|0;he=Ah+140|0;vg=Ah+136|0;le=Ah+132|0;Re=Ah+128|0;zd=Ah+124|0;Cd=Ah+120|0;Yc=Ah+116|0;Bf=Ah+112|0;ed=Ah+108|0;Ld=Ah+104|0;me=Ah+100|0;ne=Ah+96|0;Yd=Ah+92|0;de=Ah+88|0;fe=Ah+84|0;ge=Ah+80|0;je=Ah+76|0;ke=Ah+72|0;rd=Ah+68|0;yd=Ah+64|0;Nd=Ah+60|0;Ad=Ah+56|0;Ef=Ah+52|0;Ff=Ah+48|0;Bd=Ah+44|0;Ed=Ah+40|0;ug=Ah+36|0;Df=Ah+32|0;ie=Ah+28|0;Pe=Ah+24|0;Kf=Ah+20|0;Lf=Ah+16|0;Qe=Ah+12|0;Te=Ah+8|0;Gf=Ah+4|0;Jf=Ah;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[Bh>>2]=k;c[u>>2]=l;c[v>>2]=m;g[Ah+1860>>2]=.24298018217086792;g[Ah+1856>>2]=.9700312614440918;g[Ah+1852>>2]=.8577286005020142;g[Ah+1848>>2]=.5141027569770813;g[Ah+1844>>2]=.4713967442512512;g[Ah+1840>>2]=.8819212913513184;g[Ah+1836>>2]=.4275550842285156;g[Ah+1832>>2]=.903989315032959;g[Ah+1828>>2]=.3368898630142212;g[Ah+1824>>2]=.9415440559387207;g[Ah+1820>>2]=.7730104327201843;g[Ah+1816>>2]=.6343932747840881;g[Ah+1812>>2]=.5956993103027344;g[Ah+1808>>2]=.803207516670227;g[Ah+1804>>2]=.1467304676771164;g[Ah+1800>>2]=.9891765117645264;g[Ah+1796>>2]=.9569403529167175;g[Ah+1792>>2]=.290284663438797;g[Ah+1788>>2]=.049067676067352295;g[Ah+1784>>2]=.9987954497337341;g[Ah+1780>>2]=.6715589761734009;g[Ah+1776>>2]=.7409511208534241;g[Ah+1772>>2]=.0980171412229538;g[Ah+1768>>2]=.9951847195625305;g[Ah+1764>>2]=.3826834261417389;g[Ah+1760>>2]=.9238795042037964;g[Ah+1756>>2]=.5555702447891235;g[Ah+1752>>2]=.8314695954322815;g[Ah+1748>>2]=.19509032368659973;g[Ah+1744>>2]=.9807852506637573;g[Ah+1740>>2]=.7071067690849304;c[zh>>2]=c[Bh>>2];while(1){if((c[zh>>2]|0)<=0)break;g[dh>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[jh>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*18<<2)>>2];g[eh>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[fh>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*26<<2)>>2];g[gh>>2]=(+g[eh>>2]-+g[fh>>2])*.7071067690849304;g[ih>>2]=(+g[eh>>2]+ +g[fh>>2])*.7071067690849304;g[hh>>2]=+g[dh>>2]+ +g[gh>>2];g[kh>>2]=+g[ih>>2]+ +g[jh>>2];g[lh>>2]=+g[hh>>2]*.9807852506637573-+g[kh>>2]*.19509032368659973;g[Tb>>2]=+g[hh>>2]*.19509032368659973+ +g[kh>>2]*.9807852506637573;g[uc>>2]=+g[jh>>2]-+g[ih>>2];g[vc>>2]=+g[dh>>2]-+g[gh>>2];g[wc>>2]=+g[uc>>2]*.8314695954322815-+g[vc>>2]*.5555702447891235;g[Ve>>2]=+g[vc>>2]*.8314695954322815+ +g[uc>>2]*.5555702447891235;g[ph>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*30<<2)>>2];g[sh>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[mh>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[nh>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*22<<2)>>2];g[oh>>2]=(+g[mh>>2]-+g[nh>>2])*.7071067690849304;g[rh>>2]=(+g[mh>>2]+ +g[nh>>2])*.7071067690849304;g[qh>>2]=+g[oh>>2]-+g[ph>>2];g[th>>2]=+g[rh>>2]+ +g[sh>>2];g[uh>>2]=+g[qh>>2]*.9807852506637573+ +g[th>>2]*.19509032368659973;g[Ub>>2]=+g[qh>>2]*.19509032368659973-+g[th>>2]*.9807852506637573;g[rc>>2]=+g[sh>>2]-+g[rh>>2];g[sc>>2]=+g[oh>>2]+ +g[ph>>2];g[tc>>2]=+g[rc>>2]*.8314695954322815-+g[sc>>2]*.5555702447891235;g[We>>2]=+g[sc>>2]*.8314695954322815+ +g[rc>>2]*.5555702447891235;g[w>>2]=+g[c[n>>2]>>2];g[vf>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<4<<2)>>2];g[Fa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Ob>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*24<<2)>>2];g[Xc>>2]=(+g[Fa>>2]-+g[Ob>>2])*.7071067690849304;g[uf>>2]=(+g[Fa>>2]+ +g[Ob>>2])*.7071067690849304;g[of>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[xg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*20<<2)>>2];g[zg>>2]=+g[of>>2]*.9238795042037964-+g[xg>>2]*.3826834261417389;g[nc>>2]=+g[of>>2]*.3826834261417389+ +g[xg>>2]*.9238795042037964;g[Ag>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[$g>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*28<<2)>>2];g[ah>>2]=+g[Ag>>2]*.3826834261417389-+g[$g>>2]*.9238795042037964;g[oc>>2]=+g[Ag>>2]*.9238795042037964+ +g[$g>>2]*.3826834261417389;g[ee>>2]=+g[w>>2]+ +g[Xc>>2];g[bh>>2]=+g[zg>>2]+ +g[ah>>2];g[ch>>2]=+g[ee>>2]+ +g[bh>>2];g[Sb>>2]=+g[ee>>2]-+g[bh>>2];g[wg>>2]=+g[ah>>2]-+g[zg>>2];g[zf>>2]=+g[vf>>2]-+g[uf>>2];g[Af>>2]=+g[wg>>2]-+g[zf>>2];g[Pf>>2]=+g[wg>>2]+ +g[zf>>2];g[mc>>2]=+g[w>>2]-+g[Xc>>2];g[pc>>2]=+g[nc>>2]-+g[oc>>2];g[qc>>2]=+g[mc>>2]-+g[pc>>2];g[Ue>>2]=+g[mc>>2]+ +g[pc>>2];g[tf>>2]=+g[nc>>2]+ +g[oc>>2];g[wf>>2]=+g[uf>>2]+ +g[vf>>2];g[xf>>2]=+g[tf>>2]+ +g[wf>>2];g[jg>>2]=+g[wf>>2]-+g[tf>>2];g[Ia>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*31<<2)>>2];g[Ta>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[ea>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Ga>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*23<<2)>>2];g[Ha>>2]=(+g[ea>>2]-+g[Ga>>2])*.7071067690849304;g[Sa>>2]=(+g[ea>>2]+ +g[Ga>>2])*.7071067690849304;g[Ka>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[La>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*19<<2)>>2];g[Ma>>2]=+g[Ka>>2]*.9238795042037964-+g[La>>2]*.3826834261417389;g[Nb>>2]=+g[Ka>>2]*.3826834261417389+ +g[La>>2]*.9238795042037964;g[Na>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[Oa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*27<<2)>>2];g[Pa>>2]=+g[Na>>2]*.3826834261417389-+g[Oa>>2]*.9238795042037964;g[Qa>>2]=+g[Na>>2]*.9238795042037964+ +g[Oa>>2]*.3826834261417389;g[Ja>>2]=+g[Ha>>2]-+g[Ia>>2];g[ob>>2]=+g[Ma>>2]+ +g[Pa>>2];g[pb>>2]=+g[Ja>>2]+ +g[ob>>2];g[Kc>>2]=+g[Ja>>2]-+g[ob>>2];g[sd>>2]=+g[Pa>>2]-+g[Ma>>2];g[td>>2]=+g[Ta>>2]-+g[Sa>>2];g[ud>>2]=+g[sd>>2]-+g[td>>2];g[se>>2]=+g[sd>>2]+ +g[td>>2];g[Ra>>2]=+g[Nb>>2]+ +g[Qa>>2];g[Ua>>2]=+g[Sa>>2]+ +g[Ta>>2];g[Va>>2]=+g[Ra>>2]+ +g[Ua>>2];g[Oc>>2]=+g[Ua>>2]-+g[Ra>>2];g[od>>2]=+g[Ha>>2]+ +g[Ia>>2];g[pd>>2]=+g[Nb>>2]-+g[Qa>>2];g[qd>>2]=+g[od>>2]+ +g[pd>>2];g[nf>>2]=+g[pd>>2]-+g[od>>2];g[xh>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Qg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*17<<2)>>2];g[yh>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Bg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*25<<2)>>2];g[Cg>>2]=(+g[yh>>2]-+g[Bg>>2])*.7071067690849304;g[Pg>>2]=(+g[yh>>2]+ +g[Bg>>2])*.7071067690849304;g[Eg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Fg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*21<<2)>>2];g[Gg>>2]=+g[Eg>>2]*.9238795042037964-+g[Fg>>2]*.3826834261417389;g[Mg>>2]=+g[Eg>>2]*.3826834261417389+ +g[Fg>>2]*.9238795042037964;g[Hg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[Ig>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*29<<2)>>2];g[Jg>>2]=+g[Hg>>2]*.3826834261417389-+g[Ig>>2]*.9238795042037964;g[Ng>>2]=+g[Hg>>2]*.9238795042037964+ +g[Ig>>2]*.3826834261417389;g[Dg>>2]=+g[xh>>2]+ +g[Cg>>2];g[Kg>>2]=+g[Gg>>2]+ +g[Jg>>2];g[Lg>>2]=+g[Dg>>2]+ +g[Kg>>2];g[zc>>2]=+g[Dg>>2]-+g[Kg>>2];g[bd>>2]=+g[Jg>>2]-+g[Gg>>2];g[cd>>2]=+g[Qg>>2]-+g[Pg>>2];g[dd>>2]=+g[bd>>2]-+g[cd>>2];g[af>>2]=+g[bd>>2]+ +g[cd>>2];g[Og>>2]=+g[Mg>>2]+ +g[Ng>>2];g[Rg>>2]=+g[Pg>>2]+ +g[Qg>>2];g[Sg>>2]=+g[Og>>2]+ +g[Rg>>2];g[yc>>2]=+g[Rg>>2]-+g[Og>>2];g[_c>>2]=+g[xh>>2]-+g[Cg>>2];g[$c>>2]=+g[Mg>>2]-+g[Ng>>2];g[ad>>2]=+g[_c>>2]-+g[$c>>2];g[bf>>2]=+g[_c>>2]+ +g[$c>>2];g[Xg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*31<<2)>>2];g[fa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[Ug>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Vg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*23<<2)>>2];g[Wg>>2]=(+g[Ug>>2]-+g[Vg>>2])*.7071067690849304;g[G>>2]=(+g[Ug>>2]+ +g[Vg>>2])*.7071067690849304;g[Zg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[_g>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*19<<2)>>2];g[x>>2]=+g[Zg>>2]*.9238795042037964-+g[_g>>2]*.3826834261417389;g[D>>2]=+g[Zg>>2]*.3826834261417389+ +g[_g>>2]*.9238795042037964;g[y>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[z>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*27<<2)>>2];g[A>>2]=+g[y>>2]*.3826834261417389-+g[z>>2]*.9238795042037964;g[E>>2]=+g[y>>2]*.9238795042037964+ +g[z>>2]*.3826834261417389;g[Yg>>2]=+g[Wg>>2]-+g[Xg>>2];g[B>>2]=+g[x>>2]+ +g[A>>2];g[C>>2]=+g[Yg>>2]+ +g[B>>2];g[Xb>>2]=+g[Yg>>2]-+g[B>>2];g[Id>>2]=+g[A>>2]-+g[x>>2];g[Jd>>2]=+g[fa>>2]-+g[G>>2];g[Kd>>2]=+g[Id>>2]-+g[Jd>>2];g[_e>>2]=+g[Id>>2]+ +g[Jd>>2];g[F>>2]=+g[D>>2]+ +g[E>>2];g[ga>>2]=+g[G>>2]+ +g[fa>>2];g[ha>>2]=+g[F>>2]+ +g[ga>>2];g[Yb>>2]=+g[ga>>2]-+g[F>>2];g[fd>>2]=+g[Wg>>2]+ +g[Xg>>2];g[Gd>>2]=+g[D>>2]-+g[E>>2];g[Hd>>2]=+g[fd>>2]+ +g[Gd>>2];g[Ze>>2]=+g[Gd>>2]-+g[fd>>2];g[la>>2]=+g[c[o>>2]>>2];g[$>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<4<<2)>>2];g[ma>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[na>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*24<<2)>>2];g[oa>>2]=(+g[ma>>2]-+g[na>>2])*.7071067690849304;g[_>>2]=(+g[ma>>2]+ +g[na>>2])*.7071067690849304;g[qa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[ra>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*20<<2)>>2];g[sa>>2]=+g[qa>>2]*.9238795042037964-+g[ra>>2]*.3826834261417389;g[X>>2]=+g[qa>>2]*.3826834261417389+ +g[ra>>2]*.9238795042037964;g[ta>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[ua>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*28<<2)>>2];g[va>>2]=+g[ta>>2]*.3826834261417389-+g[ua>>2]*.9238795042037964;g[Y>>2]=+g[ta>>2]*.9238795042037964+ +g[ua>>2]*.3826834261417389;g[pa>>2]=+g[la>>2]+ +g[oa>>2];g[wa>>2]=+g[sa>>2]+ +g[va>>2];g[xa>>2]=+g[pa>>2]+ +g[wa>>2];g[Dc>>2]=+g[pa>>2]-+g[wa>>2];g[Zd>>2]=+g[va>>2]-+g[sa>>2];g[_d>>2]=+g[$>>2]-+g[_>>2];g[$d>>2]=+g[Zd>>2]-+g[_d>>2];g[kf>>2]=+g[Zd>>2]+ +g[_d>>2];g[Z>>2]=+g[X>>2]+ +g[Y>>2];g[aa>>2]=+g[_>>2]+ +g[$>>2];g[ba>>2]=+g[Z>>2]+ +g[aa>>2];g[Hc>>2]=+g[aa>>2]-+g[Z>>2];g[Od>>2]=+g[la>>2]-+g[oa>>2];g[Pd>>2]=+g[X>>2]-+g[Y>>2];g[Qd>>2]=+g[Od>>2]-+g[Pd>>2];g[ff>>2]=+g[Od>>2]+ +g[Pd>>2];g[qb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[wb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*17<<2)>>2];g[rb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[sb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*25<<2)>>2];g[tb>>2]=(+g[rb>>2]-+g[sb>>2])*.7071067690849304;g[vb>>2]=(+g[rb>>2]+ +g[sb>>2])*.7071067690849304;g[ub>>2]=+g[qb>>2]+ +g[tb>>2];g[ld>>2]=+g[qb>>2]-+g[tb>>2];g[xb>>2]=+g[vb>>2]+ +g[wb>>2];g[kd>>2]=+g[wb>>2]-+g[vb>>2];g[Cb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*29<<2)>>2];g[Fb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[zb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Ab>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*21<<2)>>2];g[Bb>>2]=(+g[zb>>2]-+g[Ab>>2])*.7071067690849304;g[Eb>>2]=(+g[zb>>2]+ +g[Ab>>2])*.7071067690849304;g[Db>>2]=+g[Bb>>2]-+g[Cb>>2];g[id>>2]=+g[Bb>>2]+ +g[Cb>>2];g[Gb>>2]=+g[Eb>>2]+ +g[Fb>>2];g[hd>>2]=+g[Fb>>2]-+g[Eb>>2];g[yb>>2]=+g[ub>>2]*.9807852506637573-+g[xb>>2]*.19509032368659973;g[Hb>>2]=+g[Db>>2]*.9807852506637573+ +g[Gb>>2]*.19509032368659973;g[Ib>>2]=+g[yb>>2]+ +g[Hb>>2];g[Nc>>2]=+g[Hb>>2]-+g[yb>>2];g[vd>>2]=+g[ld>>2]*.8314695954322815+ +g[kd>>2]*.5555702447891235;g[wd>>2]=+g[id>>2]*.8314695954322815+ +g[hd>>2]*.5555702447891235;g[xd>>2]=+g[vd>>2]+ +g[wd>>2];g[pe>>2]=+g[vd>>2]-+g[wd>>2];g[Kb>>2]=+g[Db>>2]*.19509032368659973-+g[Gb>>2]*.9807852506637573;g[Lb>>2]=+g[ub>>2]*.19509032368659973+ +g[xb>>2]*.9807852506637573;g[Mb>>2]=+g[Kb>>2]-+g[Lb>>2];g[Lc>>2]=+g[Lb>>2]+ +g[Kb>>2];g[jd>>2]=+g[hd>>2]*.8314695954322815-+g[id>>2]*.5555702447891235;g[md>>2]=+g[kd>>2]*.8314695954322815-+g[ld>>2]*.5555702447891235;g[nd>>2]=+g[jd>>2]-+g[md>>2];g[re>>2]=+g[md>>2]+ +g[jd>>2];g[ya>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ea>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*18<<2)>>2];g[za>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Aa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*26<<2)>>2];g[Ba>>2]=(+g[za>>2]-+g[Aa>>2])*.7071067690849304;g[Da>>2]=(+g[za>>2]+ +g[Aa>>2])*.7071067690849304;g[Ca>>2]=+g[ya>>2]+ +g[Ba>>2];g[Vd>>2]=+g[ya>>2]-+g[Ba>>2];g[H>>2]=+g[Da>>2]+ +g[Ea>>2];g[Ud>>2]=+g[Ea>>2]-+g[Da>>2];g[M>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*30<<2)>>2];g[P>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[J>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[K>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*22<<2)>>2];g[L>>2]=(+g[J>>2]-+g[K>>2])*.7071067690849304;g[O>>2]=(+g[J>>2]+ +g[K>>2])*.7071067690849304;g[N>>2]=+g[L>>2]-+g[M>>2];g[Sd>>2]=+g[L>>2]+ +g[M>>2];g[Q>>2]=+g[O>>2]+ +g[P>>2];g[Rd>>2]=+g[P>>2]-+g[O>>2];g[I>>2]=+g[Ca>>2]*.9807852506637573-+g[H>>2]*.19509032368659973;g[R>>2]=+g[N>>2]*.9807852506637573+ +g[Q>>2]*.19509032368659973;g[S>>2]=+g[I>>2]+ +g[R>>2];g[Gc>>2]=+g[R>>2]-+g[I>>2];g[ae>>2]=+g[Vd>>2]*.8314695954322815+ +g[Ud>>2]*.5555702447891235;g[be>>2]=+g[Sd>>2]*.8314695954322815+ +g[Rd>>2]*.5555702447891235;g[ce>>2]=+g[ae>>2]+ +g[be>>2];g[gf>>2]=+g[ae>>2]-+g[be>>2];g[U>>2]=+g[N>>2]*.19509032368659973-+g[Q>>2]*.9807852506637573;g[V>>2]=+g[Ca>>2]*.19509032368659973+ +g[H>>2]*.9807852506637573;g[W>>2]=+g[U>>2]-+g[V>>2];g[Ec>>2]=+g[V>>2]+ +g[U>>2];g[Td>>2]=+g[Rd>>2]*.8314695954322815-+g[Sd>>2]*.5555702447891235;g[Wd>>2]=+g[Ud>>2]*.8314695954322815-+g[Vd>>2]*.5555702447891235;g[Xd>>2]=+g[Td>>2]-+g[Wd>>2];g[jf>>2]=+g[Wd>>2]+ +g[Td>>2];g[vh>>2]=+g[lh>>2]+ +g[uh>>2];g[wh>>2]=+g[ch>>2]+ +g[vh>>2];g[bb>>2]=+g[ch>>2]-+g[vh>>2];g[sf>>2]=+g[Ub>>2]-+g[Tb>>2];g[yf>>2]=+g[sf>>2]-+g[xf>>2];g[cg>>2]=+g[sf>>2]+ +g[xf>>2];g[Tg>>2]=+g[Lg>>2]*.9951847195625305-+g[Sg>>2]*.0980171412229538;g[ia>>2]=+g[C>>2]*.9951847195625305+ +g[ha>>2]*.0980171412229538;g[ja>>2]=+g[Tg>>2]+ +g[ia>>2];g[bg>>2]=+g[ia>>2]-+g[Tg>>2];g[jb>>2]=+g[pb>>2]-+g[Ib>>2];g[kb>>2]=+g[Mb>>2]+ +g[Va>>2];g[lb>>2]=+g[jb>>2]*.7409511208534241-+g[kb>>2]*.6715589761734009;g[Pb>>2]=+g[jb>>2]*.6715589761734009+ +g[kb>>2]*.7409511208534241;g[T>>2]=+g[xa>>2]+ +g[S>>2];g[ca>>2]=+g[W>>2]-+g[ba>>2];g[da>>2]=+g[T>>2]*.9987954497337341+ +g[ca>>2]*.049067676067352295;g[$a>>2]=+g[ca>>2]*.9987954497337341-+g[T>>2]*.049067676067352295;g[cb>>2]=+g[Lg>>2]*.0980171412229538+ +g[Sg>>2]*.9951847195625305;g[db>>2]=+g[C>>2]*.0980171412229538-+g[ha>>2]*.9951847195625305;g[eb>>2]=+g[cb>>2]+ +g[db>>2];g[rf>>2]=+g[db>>2]-+g[cb>>2];g[gb>>2]=+g[xa>>2]-+g[S>>2];g[hb>>2]=+g[W>>2]+ +g[ba>>2];g[ib>>2]=+g[gb>>2]*.7409511208534241+ +g[hb>>2]*.6715589761734009;g[Qb>>2]=+g[hb>>2]*.7409511208534241-+g[gb>>2]*.6715589761734009;g[Jb>>2]=+g[pb>>2]+ +g[Ib>>2];g[Wa>>2]=+g[Mb>>2]-+g[Va>>2];g[Xa>>2]=+g[Jb>>2]*.9987954497337341-+g[Wa>>2]*.049067676067352295;g[_a>>2]=+g[Jb>>2]*.049067676067352295+ +g[Wa>>2]*.9987954497337341;g[ka>>2]=+g[wh>>2]+ +g[ja>>2];g[Ya>>2]=+g[da>>2]+ +g[Xa>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*31<<2)>>2]=+g[ka>>2]-+g[Ya>>2];g[c[p>>2]>>2]=+g[ka>>2]+ +g[Ya>>2];g[_f>>2]=+g[$a>>2]+ +g[_a>>2];g[$f>>2]=+g[rf>>2]+ +g[yf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*31<<2)>>2]=+g[_f>>2]-+g[$f>>2];g[c[q>>2]>>2]=+g[_f>>2]+ +g[$f>>2];g[Za>>2]=+g[wh>>2]-+g[ja>>2];g[ab>>2]=+g[_a>>2]-+g[$a>>2];g[(c[p>>2]|0)+(c[s>>2]<<4<<2)>>2]=+g[Za>>2]-+g[ab>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*15<<2)>>2]=+g[Za>>2]+ +g[ab>>2];g[qf>>2]=+g[Xa>>2]-+g[da>>2];g[Zf>>2]=+g[rf>>2]-+g[yf>>2];g[(c[q>>2]|0)+(c[t>>2]<<4<<2)>>2]=+g[qf>>2]-+g[Zf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*15<<2)>>2]=+g[qf>>2]+ +g[Zf>>2];g[fb>>2]=+g[bb>>2]+ +g[eb>>2];g[mb>>2]=+g[ib>>2]+ +g[lb>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*24<<2)>>2]=+g[fb>>2]-+g[mb>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2]=+g[fb>>2]+ +g[mb>>2];g[eg>>2]=+g[Qb>>2]+ +g[Pb>>2];g[fg>>2]=+g[bg>>2]+ +g[cg>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*24<<2)>>2]=+g[eg>>2]-+g[fg>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2]=+g[eg>>2]+ +g[fg>>2];g[nb>>2]=+g[bb>>2]-+g[eb>>2];g[Rb>>2]=+g[Pb>>2]-+g[Qb>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*23<<2)>>2]=+g[nb>>2]-+g[Rb>>2];g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2]=+g[nb>>2]+ +g[Rb>>2];g[ag>>2]=+g[lb>>2]-+g[ib>>2];g[dg>>2]=+g[bg>>2]-+g[cg>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*23<<2)>>2]=+g[ag>>2]-+g[dg>>2];g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2]=+g[ag>>2]+ +g[dg>>2];g[Xe>>2]=+g[Ve>>2]-+g[We>>2];g[Ye>>2]=+g[Ue>>2]-+g[Xe>>2];g[Ae>>2]=+g[Ue>>2]+ +g[Xe>>2];g[Of>>2]=+g[wc>>2]+ +g[tc>>2];g[Qf>>2]=+g[Of>>2]-+g[Pf>>2];g[Wf>>2]=+g[Of>>2]+ +g[Pf>>2];g[$e>>2]=+g[Ze>>2]*.290284663438797+ +g[_e>>2]*.9569403529167175;g[cf>>2]=+g[af>>2]*.9569403529167175-+g[bf>>2]*.290284663438797;g[df>>2]=+g[$e>>2]-+g[cf>>2];g[Vf>>2]=+g[cf>>2]+ +g[$e>>2];g[Ie>>2]=+g[nf>>2]+ +g[pe>>2];g[Je>>2]=+g[re>>2]+ +g[se>>2];g[Ke>>2]=+g[Ie>>2]*.9891765117645264-+g[Je>>2]*.1467304676771164;g[Ne>>2]=+g[Ie>>2]*.1467304676771164+ +g[Je>>2]*.9891765117645264;g[hf>>2]=+g[ff>>2]-+g[gf>>2];g[lf>>2]=+g[jf>>2]-+g[kf>>2];g[mf>>2]=+g[hf>>2]*.803207516670227+ +g[lf>>2]*.5956993103027344;g[ye>>2]=+g[lf>>2]*.803207516670227-+g[hf>>2]*.5956993103027344;g[Be>>2]=+g[bf>>2]*.9569403529167175+ +g[af>>2]*.290284663438797;g[Ce>>2]=+g[Ze>>2]*.9569403529167175-+g[_e>>2]*.290284663438797;g[De>>2]=+g[Be>>2]+ +g[Ce>>2];g[Nf>>2]=+g[Ce>>2]-+g[Be>>2];g[Fe>>2]=+g[ff>>2]+ +g[gf>>2];g[Ge>>2]=+g[jf>>2]+ +g[kf>>2];g[He>>2]=+g[Fe>>2]*.9891765117645264+ +g[Ge>>2]*.1467304676771164;g[Oe>>2]=+g[Ge>>2]*.9891765117645264-+g[Fe>>2]*.1467304676771164;g[qe>>2]=+g[nf>>2]-+g[pe>>2];g[te>>2]=+g[re>>2]-+g[se>>2];g[ue>>2]=+g[qe>>2]*.803207516670227-+g[te>>2]*.5956993103027344;g[xe>>2]=+g[qe>>2]*.5956993103027344+ +g[te>>2]*.803207516670227;g[ef>>2]=+g[Ye>>2]+ +g[df>>2];g[ve>>2]=+g[mf>>2]+ +g[ue>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*25<<2)>>2]=+g[ef>>2]-+g[ve>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2]=+g[ef>>2]+ +g[ve>>2];g[Sf>>2]=+g[ye>>2]+ +g[xe>>2];g[Tf>>2]=+g[Nf>>2]+ +g[Qf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*25<<2)>>2]=+g[Sf>>2]-+g[Tf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2]=+g[Sf>>2]+ +g[Tf>>2];g[we>>2]=+g[Ye>>2]-+g[df>>2];g[ze>>2]=+g[xe>>2]-+g[ye>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*22<<2)>>2]=+g[we>>2]-+g[ze>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2]=+g[we>>2]+ +g[ze>>2];g[Mf>>2]=+g[ue>>2]-+g[mf>>2];g[Rf>>2]=+g[Nf>>2]-+g[Qf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*22<<2)>>2]=+g[Mf>>2]-+g[Rf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2]=+g[Mf>>2]+ +g[Rf>>2];g[Ee>>2]=+g[Ae>>2]+ +g[De>>2];g[Le>>2]=+g[He>>2]+ +g[Ke>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*30<<2)>>2]=+g[Ee>>2]-+g[Le>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[Ee>>2]+ +g[Le>>2];g[Yf>>2]=+g[Oe>>2]+ +g[Ne>>2];g[yg>>2]=+g[Vf>>2]+ +g[Wf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*30<<2)>>2]=+g[Yf>>2]-+g[yg>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[Yf>>2]+ +g[yg>>2];g[Me>>2]=+g[Ae>>2]-+g[De>>2];g[pf>>2]=+g[Ne>>2]-+g[Oe>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*17<<2)>>2]=+g[Me>>2]-+g[pf>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*14<<2)>>2]=+g[Me>>2]+ +g[pf>>2];g[Uf>>2]=+g[Ke>>2]-+g[He>>2];g[Xf>>2]=+g[Vf>>2]-+g[Wf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*17<<2)>>2]=+g[Uf>>2]-+g[Xf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*14<<2)>>2]=+g[Uf>>2]+ +g[Xf>>2];g[Vb>>2]=+g[Tb>>2]+ +g[Ub>>2];g[Wb>>2]=+g[Sb>>2]-+g[Vb>>2];g[Wc>>2]=+g[Sb>>2]+ +g[Vb>>2];g[ig>>2]=+g[uh>>2]-+g[lh>>2];g[kg>>2]=+g[ig>>2]-+g[jg>>2];g[qg>>2]=+g[ig>>2]+ +g[jg>>2];g[xc>>2]=+g[Xb>>2]*.6343932747840881+ +g[Yb>>2]*.7730104327201843;g[Ac>>2]=+g[yc>>2]*.7730104327201843-+g[zc>>2]*.6343932747840881;g[Bc>>2]=+g[xc>>2]-+g[Ac>>2];g[pg>>2]=+g[Ac>>2]+ +g[xc>>2];g[ec>>2]=+g[Kc>>2]+ +g[Lc>>2];g[fc>>2]=+g[Nc>>2]+ +g[Oc>>2];g[gc>>2]=+g[ec>>2]*.9415440559387207-+g[fc>>2]*.3368898630142212;g[jc>>2]=+g[ec>>2]*.3368898630142212+ +g[fc>>2]*.9415440559387207;g[Fc>>2]=+g[Dc>>2]-+g[Ec>>2];g[Ic>>2]=+g[Gc>>2]-+g[Hc>>2];g[Jc>>2]=+g[Fc>>2]*.903989315032959+ +g[Ic>>2]*.4275550842285156;g[Uc>>2]=+g[Ic>>2]*.903989315032959-+g[Fc>>2]*.4275550842285156;g[Zb>>2]=+g[zc>>2]*.7730104327201843+ +g[yc>>2]*.6343932747840881;g[_b>>2]=+g[Xb>>2]*.7730104327201843-+g[Yb>>2]*.6343932747840881;g[$b>>2]=+g[Zb>>2]+ +g[_b>>2];g[hg>>2]=+g[_b>>2]-+g[Zb>>2];g[bc>>2]=+g[Dc>>2]+ +g[Ec>>2];g[cc>>2]=+g[Gc>>2]+ +g[Hc>>2];g[dc>>2]=+g[bc>>2]*.9415440559387207+ +g[cc>>2]*.3368898630142212;g[kc>>2]=+g[cc>>2]*.9415440559387207-+g[bc>>2]*.3368898630142212;g[Mc>>2]=+g[Kc>>2]-+g[Lc>>2];g[Pc>>2]=+g[Nc>>2]-+g[Oc>>2];g[Qc>>2]=+g[Mc>>2]*.903989315032959-+g[Pc>>2]*.4275550842285156;g[Tc>>2]=+g[Mc>>2]*.4275550842285156+ +g[Pc>>2]*.903989315032959;g[Cc>>2]=+g[Wb>>2]+ +g[Bc>>2];g[Rc>>2]=+g[Jc>>2]+ +g[Qc>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*27<<2)>>2]=+g[Cc>>2]-+g[Rc>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[Cc>>2]+ +g[Rc>>2];g[mg>>2]=+g[Uc>>2]+ +g[Tc>>2];g[ng>>2]=+g[hg>>2]+ +g[kg>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*27<<2)>>2]=+g[mg>>2]-+g[ng>>2];g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[mg>>2]+ +g[ng>>2];g[Sc>>2]=+g[Wb>>2]-+g[Bc>>2];g[Vc>>2]=+g[Tc>>2]-+g[Uc>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*20<<2)>>2]=+g[Sc>>2]-+g[Vc>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*11<<2)>>2]=+g[Sc>>2]+ +g[Vc>>2];g[gg>>2]=+g[Qc>>2]-+g[Jc>>2];g[lg>>2]=+g[hg>>2]-+g[kg>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*20<<2)>>2]=+g[gg>>2]-+g[lg>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*11<<2)>>2]=+g[gg>>2]+ +g[lg>>2];g[ac>>2]=+g[Wc>>2]+ +g[$b>>2];g[hc>>2]=+g[dc>>2]+ +g[gc>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*28<<2)>>2]=+g[ac>>2]-+g[hc>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[ac>>2]+ +g[hc>>2];g[sg>>2]=+g[kc>>2]+ +g[jc>>2];g[tg>>2]=+g[pg>>2]+ +g[qg>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*28<<2)>>2]=+g[sg>>2]-+g[tg>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[sg>>2]+ +g[tg>>2];g[ic>>2]=+g[Wc>>2]-+g[$b>>2];g[lc>>2]=+g[jc>>2]-+g[kc>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*19<<2)>>2]=+g[ic>>2]-+g[lc>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*12<<2)>>2]=+g[ic>>2]+ +g[lc>>2];g[og>>2]=+g[gc>>2]-+g[dc>>2];g[rg>>2]=+g[pg>>2]-+g[qg>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*19<<2)>>2]=+g[og>>2]-+g[rg>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*12<<2)>>2]=+g[og>>2]+ +g[rg>>2];g[Yc>>2]=+g[tc>>2]-+g[wc>>2];g[Zc>>2]=+g[qc>>2]+ +g[Yc>>2];g[Fd>>2]=+g[qc>>2]-+g[Yc>>2];g[Bf>>2]=+g[Ve>>2]+ +g[We>>2];g[Cf>>2]=+g[Af>>2]-+g[Bf>>2];g[If>>2]=+g[Bf>>2]+ +g[Af>>2];g[ed>>2]=+g[ad>>2]*.8819212913513184+ +g[dd>>2]*.4713967442512512;g[Ld>>2]=+g[Hd>>2]*.8819212913513184+ +g[Kd>>2]*.4713967442512512;g[Md>>2]=+g[ed>>2]-+g[Ld>>2];g[Hf>>2]=+g[ed>>2]+ +g[Ld>>2];g[me>>2]=+g[qd>>2]+ +g[nd>>2];g[ne>>2]=+g[xd>>2]+ +g[ud>>2];g[oe>>2]=+g[me>>2]*.5141027569770813+ +g[ne>>2]*.8577286005020142;g[Se>>2]=+g[ne>>2]*.5141027569770813-+g[me>>2]*.8577286005020142;g[Yd>>2]=+g[Qd>>2]+ +g[Xd>>2];g[de>>2]=+g[$d>>2]-+g[ce>>2];g[gd>>2]=+g[Yd>>2]*.9700312614440918+ +g[de>>2]*.24298018217086792;g[Dd>>2]=+g[de>>2]*.9700312614440918-+g[Yd>>2]*.24298018217086792;g[fe>>2]=+g[Kd>>2]*.8819212913513184-+g[Hd>>2]*.4713967442512512;g[ge>>2]=+g[dd>>2]*.8819212913513184-+g[ad>>2]*.4713967442512512;g[he>>2]=+g[fe>>2]-+g[ge>>2];g[vg>>2]=+g[ge>>2]+ +g[fe>>2];g[je>>2]=+g[Qd>>2]-+g[Xd>>2];g[ke>>2]=+g[ce>>2]+ +g[$d>>2];g[le>>2]=+g[je>>2]*.5141027569770813+ +g[ke>>2]*.8577286005020142;g[Re>>2]=+g[je>>2]*.8577286005020142-+g[ke>>2]*.5141027569770813;g[rd>>2]=+g[nd>>2]-+g[qd>>2];g[yd>>2]=+g[ud>>2]-+g[xd>>2];g[zd>>2]=+g[rd>>2]*.9700312614440918-+g[yd>>2]*.24298018217086792;g[Cd>>2]=+g[rd>>2]*.24298018217086792+ +g[yd>>2]*.9700312614440918;g[Nd>>2]=+g[Zc>>2]+ +g[Md>>2];g[Ad>>2]=+g[gd>>2]+ +g[zd>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*29<<2)>>2]=+g[Nd>>2]-+g[Ad>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[Nd>>2]+ +g[Ad>>2];g[Ef>>2]=+g[Dd>>2]+ +g[Cd>>2];g[Ff>>2]=+g[vg>>2]+ +g[Cf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*29<<2)>>2]=+g[Ef>>2]-+g[Ff>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[Ef>>2]+ +g[Ff>>2];g[Bd>>2]=+g[Zc>>2]-+g[Md>>2];g[Ed>>2]=+g[Cd>>2]-+g[Dd>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*18<<2)>>2]=+g[Bd>>2]-+g[Ed>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*13<<2)>>2]=+g[Bd>>2]+ +g[Ed>>2];g[ug>>2]=+g[zd>>2]-+g[gd>>2];g[Df>>2]=+g[vg>>2]-+g[Cf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*18<<2)>>2]=+g[ug>>2]-+g[Df>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*13<<2)>>2]=+g[ug>>2]+ +g[Df>>2];g[ie>>2]=+g[Fd>>2]-+g[he>>2];g[Pe>>2]=+g[le>>2]-+g[oe>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*21<<2)>>2]=+g[ie>>2]-+g[Pe>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*10<<2)>>2]=+g[ie>>2]+ +g[Pe>>2];g[Kf>>2]=+g[Se>>2]-+g[Re>>2];g[Lf>>2]=+g[If>>2]-+g[Hf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*21<<2)>>2]=+g[Kf>>2]-+g[Lf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*10<<2)>>2]=+g[Kf>>2]+ +g[Lf>>2];g[Qe>>2]=+g[Fd>>2]+ +g[he>>2];g[Te>>2]=+g[Re>>2]+ +g[Se>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*26<<2)>>2]=+g[Qe>>2]-+g[Te>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[Qe>>2]+ +g[Te>>2];g[Gf>>2]=+g[le>>2]+ +g[oe>>2];g[Jf>>2]=+g[Hf>>2]+ +g[If>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=-(+g[Gf>>2]+ +g[Jf>>2]);g[(c[q>>2]|0)+((c[t>>2]|0)*26<<2)>>2]=+g[Jf>>2]-+g[Gf>>2];c[zh>>2]=(c[zh>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=Ah;return}function Ds(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,13,6232);i=b;return}function Es(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0;I=i;i=i+96|0;n=I+92|0;o=I+88|0;p=I+84|0;q=I+80|0;r=I+76|0;s=I+72|0;t=I+68|0;J=I+64|0;u=I+60|0;v=I+56|0;H=I+44|0;F=I+40|0;C=I+36|0;E=I+32|0;w=I+28|0;y=I+24|0;x=I+20|0;D=I+16|0;z=I+12|0;A=I+8|0;B=I+4|0;G=I;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[J>>2]=k;c[u>>2]=l;c[v>>2]=m;g[I+52>>2]=.5;g[I+48>>2]=.8660253882408142;c[H>>2]=c[J>>2];while(1){if((c[H>>2]|0)<=0)break;g[F>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[A>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[B>>2]=+g[c[o>>2]>>2];g[C>>2]=(+g[A>>2]-+g[B>>2])*.8660253882408142;g[E>>2]=+g[A>>2]+ +g[B>>2];g[w>>2]=+g[c[n>>2]>>2];g[y>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[x>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[D>>2]=(+g[x>>2]+ +g[y>>2])*.8660253882408142;g[z>>2]=(+g[y>>2]-+g[x>>2])*.5+ +g[w>>2];g[c[p>>2]>>2]=+g[z>>2]-+g[C>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[z>>2]+ +g[C>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[F>>2]-+g[E>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[w>>2]+ +g[x>>2]-+g[y>>2];g[G>>2]=+g[E>>2]*.5+ +g[F>>2];g[c[q>>2]>>2]=-(+g[D>>2]+ +g[G>>2]);g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[D>>2]-+g[G>>2];c[H>>2]=(c[H>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=I;return}function Fs(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,14,6280);i=b;return}function Gs(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0;K=i;i=i+128|0;n=K+116|0;o=K+112|0;p=K+108|0;q=K+104|0;r=K+100|0;s=K+96|0;t=K+92|0;L=K+88|0;u=K+84|0;v=K+80|0;J=K+52|0;w=K+48|0;F=K+44|0;I=K+40|0;z=K+36|0;G=K+32|0;C=K+28|0;H=K+24|0;D=K+20|0;E=K+16|0;x=K+12|0;y=K+8|0;A=K+4|0;B=K;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[L>>2]=k;c[u>>2]=l;c[v>>2]=m;g[K+76>>2]=.9009688496589661;g[K+72>>2]=.22252093255519867;g[K+68>>2]=.6234897971153259;g[K+64>>2]=.4338837265968323;g[K+60>>2]=.9749279022216797;g[K+56>>2]=.7818315029144287;c[J>>2]=c[L>>2];while(1){if((c[J>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[D>>2]=+g[c[o>>2]>>2];g[E>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[F>>2]=+g[D>>2]-+g[E>>2];g[I>>2]=+g[D>>2]+ +g[E>>2];g[x>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[y>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[G>>2]=+g[x>>2]+ +g[y>>2];g[A>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[B>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[C>>2]=+g[A>>2]-+g[B>>2];g[H>>2]=+g[A>>2]+ +g[B>>2];g[c[q>>2]>>2]=-(+g[G>>2]*.7818315029144287+ +g[H>>2]*.9749279022216797+ +g[I>>2]*.4338837265968323);g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[H>>2]*.7818315029144287-+g[I>>2]*.9749279022216797-+g[G>>2]*.4338837265968323;g[c[p>>2]>>2]=+g[z>>2]*.6234897971153259+ +g[w>>2]+(+g[C>>2]*.22252093255519867+ +g[F>>2]*.9009688496589661);g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[G>>2]*.9749279022216797-+g[I>>2]*.7818315029144287-+g[H>>2]*.4338837265968323;g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[C>>2]*.9009688496589661+ +g[w>>2]+-(+g[F>>2]*.6234897971153259+ +g[z>>2]*.22252093255519867);g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[F>>2]*.22252093255519867+ +g[w>>2]+-(+g[C>>2]*.6234897971153259+ +g[z>>2]*.9009688496589661);g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[w>>2]+ +g[z>>2]-(+g[C>>2]+ +g[F>>2]);c[J>>2]=(c[J>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=K;return}function Hs(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,15,6328);i=b;return}function Is(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0;T=i;i=i+144|0;n=T+140|0;o=T+136|0;p=T+132|0;q=T+128|0;r=T+124|0;s=T+120|0;t=T+116|0;U=T+112|0;u=T+108|0;v=T+104|0;S=T+88|0;w=T+84|0;O=T+80|0;z=T+76|0;N=T+72|0;D=T+68|0;J=T+64|0;G=T+60|0;K=T+56|0;x=T+52|0;y=T+48|0;B=T+44|0;C=T+40|0;E=T+36|0;F=T+32|0;A=T+28|0;H=T+24|0;M=T+20|0;P=T+16|0;I=T+12|0;L=T+8|0;Q=T+4|0;R=T;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[U>>2]=k;c[u>>2]=l;c[v>>2]=m;g[T+100>>2]=.3826834261417389;g[T+96>>2]=.9238795042037964;g[T+92>>2]=.7071067690849304;c[S>>2]=c[U>>2];while(1){if((c[S>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[O>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[x>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[y>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[z>>2]=(+g[x>>2]-+g[y>>2])*.7071067690849304;g[N>>2]=(+g[x>>2]+ +g[y>>2])*.7071067690849304;g[B>>2]=+g[c[o>>2]>>2];g[C>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[D>>2]=+g[B>>2]*.9238795042037964-+g[C>>2]*.3826834261417389;g[J>>2]=+g[B>>2]*.3826834261417389+ +g[C>>2]*.9238795042037964;g[E>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[F>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[G>>2]=+g[E>>2]*.3826834261417389-+g[F>>2]*.9238795042037964;g[K>>2]=+g[E>>2]*.9238795042037964+ +g[F>>2]*.3826834261417389;g[A>>2]=+g[w>>2]+ +g[z>>2];g[H>>2]=+g[D>>2]+ +g[G>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[A>>2]-+g[H>>2];g[c[p>>2]>>2]=+g[A>>2]+ +g[H>>2];g[M>>2]=+g[J>>2]+ +g[K>>2];g[P>>2]=+g[N>>2]+ +g[O>>2];g[c[q>>2]>>2]=-(+g[M>>2]+ +g[P>>2]);g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[P>>2]-+g[M>>2];g[I>>2]=+g[w>>2]-+g[z>>2];g[L>>2]=+g[J>>2]-+g[K>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[I>>2]-+g[L>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[I>>2]+ +g[L>>2];g[Q>>2]=+g[G>>2]-+g[D>>2];g[R>>2]=+g[O>>2]-+g[N>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[Q>>2]-+g[R>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[Q>>2]+ +g[R>>2];c[S>>2]=(c[S>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=T;return}function Js(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,16,6376);i=b;return}function Ks(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0;ba=i;i=i+240|0;n=ba+224|0;o=ba+220|0;p=ba+216|0;q=ba+212|0;r=ba+208|0;s=ba+204|0;t=ba+200|0;ca=ba+196|0;u=ba+192|0;v=ba+188|0;aa=ba+128|0;w=ba+124|0;z=ba+120|0;T=ba+116|0;F=ba+112|0;Q=ba+108|0;P=ba+104|0;K=ba+100|0;N=ba+96|0;M=ba+92|0;x=ba+88|0;y=ba+84|0;A=ba+80|0;L=ba+76|0;B=ba+72|0;C=ba+68|0;D=ba+64|0;E=ba+60|0;G=ba+56|0;H=ba+52|0;I=ba+48|0;J=ba+44|0;W=ba+40|0;Y=ba+36|0;$=ba+32|0;_=ba+28|0;Z=ba+24|0;U=ba+20|0;V=ba+16|0;X=ba+12|0;O=ba+8|0;R=ba+4|0;S=ba;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[ca>>2]=k;c[u>>2]=l;c[v>>2]=m;g[ba+184>>2]=.663413941860199;g[ba+180>>2]=.6427876353263855;g[ba+176>>2]=.5566704273223877;g[ba+172>>2]=.7660444378852844;g[ba+168>>2]=.8528685569763184;g[ba+164>>2]=.1736481785774231;g[ba+160>>2]=.9848077297210693;g[ba+156>>2]=.15038372576236725;g[ba+152>>2]=.813797652721405;g[ba+148>>2]=.3420201539993286;g[ba+144>>2]=.9396926164627075;g[ba+140>>2]=.29619812965393066;g[ba+136>>2]=.8660253882408142;g[ba+132>>2]=.5;c[aa>>2]=c[ca>>2];while(1){if((c[aa>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[x>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[y>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[T>>2]=+g[x>>2]+ +g[y>>2];g[B>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[C>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[D>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[E>>2]=+g[C>>2]-+g[D>>2];g[F>>2]=+g[B>>2]-+g[E>>2];g[Q>>2]=+g[C>>2]+ +g[D>>2];g[P>>2]=+g[E>>2]*.5+ +g[B>>2];g[G>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[H>>2]=+g[c[o>>2]>>2];g[I>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[J>>2]=+g[H>>2]+ +g[I>>2];g[K>>2]=+g[G>>2]-+g[J>>2];g[N>>2]=+g[J>>2]*.5+ +g[G>>2];g[M>>2]=+g[H>>2]-+g[I>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=(+g[K>>2]-+g[F>>2])*.8660253882408142;g[A>>2]=+g[w>>2]-+g[z>>2];g[L>>2]=+g[F>>2]+ +g[K>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[A>>2]-+g[L>>2]*.5;g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[A>>2]+ +g[L>>2];g[W>>2]=+g[z>>2]*.5+ +g[w>>2];g[Y>>2]=+g[M>>2]*.29619812965393066+ +g[N>>2]*.9396926164627075;g[$>>2]=+g[N>>2]*.3420201539993286-+g[M>>2]*.813797652721405;g[_>>2]=+g[Q>>2]*.15038372576236725-+g[P>>2]*.9848077297210693;g[Z>>2]=+g[P>>2]*.1736481785774231+ +g[Q>>2]*.8528685569763184;g[U>>2]=+g[P>>2]*.7660444378852844-+g[Q>>2]*.5566704273223877;g[V>>2]=+g[M>>2]*.8528685569763184+ +g[N>>2]*.1736481785774231;g[X>>2]=+g[U>>2]+ +g[V>>2];g[O>>2]=+g[M>>2]*.15038372576236725-+g[N>>2]*.9848077297210693;g[R>>2]=+g[P>>2]*.6427876353263855+ +g[Q>>2]*.663413941860199;g[S>>2]=+g[O>>2]-+g[R>>2];g[c[q>>2]>>2]=+g[S>>2]-+g[T>>2]*.8660253882408142;g[c[p>>2]>>2]=+g[W>>2]+ +g[X>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=(+g[U>>2]-+g[V>>2]-+g[T>>2])*.8660253882408142-+g[S>>2]*.5;g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=(+g[R>>2]+ +g[O>>2])*.8660253882408142+ +g[W>>2]-+g[X>>2]*.5;g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=(+g[T>>2]-(+g[Z>>2]+ +g[Y>>2]))*.8660253882408142+(+g[$>>2]-+g[_>>2])*.5;g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=(+g[Y>>2]-+g[Z>>2])*.5+ +g[W>>2]+(+g[_>>2]+ +g[$>>2])*.8660253882408142;c[aa>>2]=(c[aa>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=ba;return}function Ls(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,17,6424);i=b;return}function Ms(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0;da=i;i=i+208|0;n=da+192|0;o=da+188|0;p=da+184|0;q=da+180|0;r=da+176|0;s=da+172|0;t=da+168|0;ea=da+164|0;u=da+160|0;v=da+156|0;ca=da+136|0;N=da+132|0;Y=da+128|0;F=da+124|0;S=da+120|0;I=da+116|0;T=da+112|0;J=da+108|0;_=da+104|0;y=da+100|0;V=da+96|0;B=da+92|0;W=da+88|0;C=da+84|0;Z=da+80|0;L=da+76|0;M=da+72|0;D=da+68|0;E=da+64|0;G=da+60|0;H=da+56|0;w=da+52|0;x=da+48|0;z=da+44|0;A=da+40|0;Q=da+36|0;R=da+32|0;K=da+28|0;O=da+24|0;P=da+20|0;U=da+16|0;X=da+12|0;ba=da+8|0;$=da+4|0;aa=da;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[ea>>2]=k;c[u>>2]=l;c[v>>2]=m;g[da+152>>2]=.25;g[da+148>>2]=.55901700258255;g[da+144>>2]=.9510565400123596;g[da+140>>2]=.5877852439880371;c[ca>>2]=c[ea>>2];while(1){if((c[ca>>2]|0)<=0)break;g[L>>2]=+g[c[n>>2]>>2];g[M>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[N>>2]=+g[L>>2]-+g[M>>2];g[Y>>2]=+g[L>>2]+ +g[M>>2];g[D>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[E>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[F>>2]=+g[D>>2]-+g[E>>2];g[S>>2]=+g[D>>2]+ +g[E>>2];g[G>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[H>>2]=+g[c[o>>2]>>2];g[I>>2]=+g[G>>2]-+g[H>>2];g[T>>2]=+g[G>>2]+ +g[H>>2];g[J>>2]=+g[F>>2]+ +g[I>>2];g[_>>2]=+g[S>>2]+ +g[T>>2];g[w>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[x>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[V>>2]=+g[w>>2]+ +g[x>>2];g[z>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[A>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[W>>2]=+g[z>>2]+ +g[A>>2];g[C>>2]=+g[y>>2]+ +g[B>>2];g[Z>>2]=+g[V>>2]+ +g[W>>2];g[Q>>2]=+g[I>>2]-+g[F>>2];g[R>>2]=+g[y>>2]-+g[B>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[Q>>2]*.5877852439880371-+g[R>>2]*.9510565400123596;g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[R>>2]*.5877852439880371+ +g[Q>>2]*.9510565400123596;g[K>>2]=(+g[C>>2]-+g[J>>2])*.55901700258255;g[O>>2]=+g[C>>2]+ +g[J>>2];g[P>>2]=+g[N>>2]-+g[O>>2]*.25;g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[K>>2]+ +g[P>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[N>>2]+ +g[O>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[P>>2]-+g[K>>2];g[U>>2]=+g[S>>2]-+g[T>>2];g[X>>2]=+g[V>>2]-+g[W>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[U>>2]*.9510565400123596-+g[X>>2]*.5877852439880371;g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[X>>2]*.9510565400123596+ +g[U>>2]*.5877852439880371;g[ba>>2]=(+g[Z>>2]-+g[_>>2])*.55901700258255;g[$>>2]=+g[Z>>2]+ +g[_>>2];g[aa>>2]=+g[Y>>2]-+g[$>>2]*.25;g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[aa>>2]-+g[ba>>2];g[c[p>>2]>>2]=+g[Y>>2]+ +g[$>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[ba>>2]+ +g[aa>>2];c[ca>>2]=(c[ca>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=da;return}function Ns(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,18,6472);i=b;return}function Os(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0;S=i;i=i+176|0;n=S+164|0;o=S+160|0;p=S+156|0;q=S+152|0;r=S+148|0;s=S+144|0;t=S+140|0;T=S+136|0;u=S+132|0;v=S+128|0;R=S+84|0;w=S+80|0;z=S+76|0;Q=S+72|0;L=S+68|0;M=S+64|0;I=S+60|0;N=S+56|0;F=S+52|0;P=S+48|0;C=S+44|0;O=S+40|0;G=S+36|0;H=S+32|0;x=S+28|0;y=S+24|0;J=S+20|0;K=S+16|0;D=S+12|0;E=S+8|0;A=S+4|0;B=S;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[T>>2]=k;c[u>>2]=l;c[v>>2]=m;g[S+124>>2]=.6548607349395752;g[S+120>>2]=.1423148363828659;g[S+116>>2]=.9594929814338684;g[S+112>>2]=.4154150187969208;g[S+108>>2]=.8412535190582275;g[S+104>>2]=.9898214340209961;g[S+100>>2]=.9096319675445557;g[S+96>>2]=.28173255920410156;g[S+92>>2]=.5406408309936523;g[S+88>>2]=.7557495832443237;c[R>>2]=c[T>>2];while(1){if((c[R>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[x>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[y>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[Q>>2]=+g[y>>2]-+g[x>>2];g[J>>2]=+g[c[o>>2]>>2];g[K>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[L>>2]=+g[J>>2]+ +g[K>>2];g[M>>2]=+g[K>>2]-+g[J>>2];g[G>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[H>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[I>>2]=+g[G>>2]+ +g[H>>2];g[N>>2]=+g[H>>2]-+g[G>>2];g[D>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[E>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[F>>2]=+g[D>>2]+ +g[E>>2];g[P>>2]=+g[E>>2]-+g[D>>2];g[A>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[B>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[C>>2]=+g[A>>2]+ +g[B>>2];g[O>>2]=+g[B>>2]-+g[A>>2];g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[M>>2]*.7557495832443237+ +g[N>>2]*.5406408309936523+(+g[O>>2]*.28173255920410156-+g[P>>2]*.9096319675445557)-+g[Q>>2]*.9898214340209961;g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[I>>2]*.8412535190582275+ +g[w>>2]+(+g[F>>2]*.4154150187969208-+g[C>>2]*.9594929814338684)+-(+g[z>>2]*.1423148363828659+ +g[L>>2]*.6548607349395752);g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[M>>2]*.9096319675445557+ +g[Q>>2]*.7557495832443237+-(+g[P>>2]*.5406408309936523+ +g[O>>2]*.9898214340209961)-+g[N>>2]*.28173255920410156;g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=+g[M>>2]*.28173255920410156+ +g[N>>2]*.7557495832443237+(+g[P>>2]*.9898214340209961-+g[O>>2]*.9096319675445557)-+g[Q>>2]*.5406408309936523;g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[M>>2]*.5406408309936523+ +g[Q>>2]*.9096319675445557+(+g[N>>2]*.9898214340209961+ +g[O>>2]*.7557495832443237)+ +g[P>>2]*.28173255920410156;g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[M>>2]*.9898214340209961+ +g[O>>2]*.5406408309936523+(+g[P>>2]*.7557495832443237-+g[N>>2]*.9096319675445557)-+g[Q>>2]*.28173255920410156;g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[I>>2]*.4154150187969208+ +g[w>>2]+(+g[C>>2]*.8412535190582275-+g[F>>2]*.6548607349395752)+-(+g[z>>2]*.9594929814338684+ +g[L>>2]*.1423148363828659);g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[L>>2]*.8412535190582275+ +g[w>>2]+(+g[z>>2]*.4154150187969208-+g[F>>2]*.9594929814338684)+-(+g[C>>2]*.6548607349395752+ +g[I>>2]*.1423148363828659);g[c[p>>2]>>2]=+g[w>>2]+ +g[L>>2]+ +g[z>>2]+ +g[I>>2]+ +g[C>>2]+ +g[F>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[L>>2]*.4154150187969208+ +g[w>>2]+(+g[F>>2]*.8412535190582275-+g[C>>2]*.1423148363828659)+-(+g[I>>2]*.9594929814338684+ +g[z>>2]*.6548607349395752);g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[z>>2]*.8412535190582275+ +g[w>>2]+(+g[C>>2]*.4154150187969208-+g[F>>2]*.1423148363828659)+-(+g[I>>2]*.6548607349395752+ +g[L>>2]*.9594929814338684);c[R>>2]=(c[R>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=S;return}function Ps(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,19,6520);i=b;return}function Qs(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0,Ii=0,Ji=0,Ki=0,Li=0,Mi=0,Ni=0,Oi=0,Pi=0,Qi=0,Ri=0,Si=0,Ti=0,Ui=0,Vi=0,Wi=0,Xi=0,Yi=0,Zi=0,_i=0,$i=0,aj=0,bj=0,cj=0,dj=0,ej=0,fj=0,gj=0,hj=0,ij=0,jj=0,kj=0,lj=0,mj=0,nj=0,oj=0,pj=0,qj=0,rj=0,sj=0,tj=0,uj=0,vj=0,wj=0,xj=0,yj=0,zj=0,Aj=0,Bj=0,Cj=0,Dj=0,Ej=0,Fj=0,Gj=0,Hj=0,Ij=0,Jj=0,Kj=0,Lj=0,Mj=0,Nj=0,Oj=0,Pj=0,Qj=0,Rj=0,Sj=0,Tj=0,Uj=0,Vj=0,Wj=0,Xj=0,Yj=0,Zj=0,_j=0,$j=0,ak=0,bk=0,ck=0,dk=0,ek=0,fk=0,gk=0,hk=0,ik=0,jk=0,kk=0,lk=0,mk=0,nk=0,ok=0,pk=0,qk=0,rk=0,sk=0,tk=0,uk=0,vk=0,wk=0,xk=0,yk=0,zk=0,Ak=0,Bk=0,Ck=0,Dk=0,Ek=0,Fk=0,Gk=0,Hk=0,Ik=0,Jk=0,Kk=0,Lk=0,Mk=0,Nk=0,Ok=0,Pk=0,Qk=0,Rk=0,Sk=0,Tk=0,Uk=0,Vk=0,Wk=0,Xk=0,Yk=0,Zk=0,_k=0,$k=0,al=0,bl=0,cl=0,dl=0,el=0,fl=0,gl=0,hl=0,il=0,jl=0,kl=0,ll=0,ml=0,nl=0,ol=0,pl=0,ql=0,rl=0,sl=0,tl=0,ul=0,vl=0,wl=0,xl=0,yl=0,zl=0,Al=0,Bl=0,Cl=0,Dl=0,El=0,Fl=0,Gl=0,Hl=0,Il=0,Jl=0,Kl=0,Ll=0,Ml=0,Nl=0,Ol=0,Pl=0,Ql=0,Rl=0,Sl=0,Tl=0,Ul=0,Vl=0,Wl=0,Xl=0,Yl=0,Zl=0,_l=0,$l=0,am=0,bm=0,cm=0,dm=0,em=0,fm=0,gm=0,hm=0,im=0,jm=0,km=0,lm=0,mm=0,nm=0,om=0,pm=0,qm=0,rm=0,sm=0,tm=0,um=0,vm=0,wm=0,xm=0,ym=0,zm=0,Am=0,Bm=0,Cm=0,Dm=0,Em=0,Fm=0,Gm=0,Hm=0,Im=0,Jm=0,Km=0,Lm=0,Mm=0,Nm=0,Om=0,Pm=0,Qm=0,Rm=0,Sm=0,Tm=0,Um=0,Vm=0,Wm=0,Xm=0,Ym=0,Zm=0,_m=0,$m=0,an=0,bn=0,cn=0,dn=0,en=0,fn=0,gn=0,hn=0,jn=0,kn=0,ln=0,mn=0,nn=0,on=0,pn=0,qn=0,rn=0,sn=0,tn=0,un=0,vn=0,wn=0,xn=0,yn=0,zn=0,An=0,Bn=0,Cn=0,Dn=0,En=0,Fn=0,Gn=0,Hn=0,In=0,Jn=0,Kn=0,Ln=0,Mn=0,Nn=0,On=0,Pn=0,Qn=0,Rn=0,Sn=0,Tn=0,Un=0,Vn=0,Wn=0,Xn=0,Yn=0,Zn=0,_n=0,$n=0,ao=0,bo=0,co=0,eo=0,fo=0,go=0,ho=0,io=0,jo=0,ko=0,lo=0,mo=0,no=0,oo=0,po=0,qo=0,ro=0,so=0,to=0,uo=0,vo=0,wo=0,xo=0,yo=0,zo=0,Ao=0,Bo=0,Co=0,Do=0,Eo=0,Fo=0,Go=0,Ho=0,Io=0,Jo=0,Ko=0,Lo=0,Mo=0,No=0,Oo=0,Po=0,Qo=0,Ro=0,So=0,To=0,Uo=0,Vo=0,Wo=0,Xo=0,Yo=0,Zo=0,_o=0,$o=0,ap=0,bp=0,cp=0,dp=0,ep=0,fp=0,gp=0,hp=0,ip=0,jp=0,kp=0,lp=0,mp=0,np=0,op=0,pp=0,qp=0,rp=0,sp=0,tp=0,up=0,vp=0,wp=0,xp=0,yp=0,zp=0,Ap=0,Bp=0,Cp=0,Dp=0,Ep=0,Fp=0,Gp=0,Hp=0,Ip=0,Jp=0,Kp=0,Lp=0,Mp=0,Np=0,Op=0,Pp=0,Qp=0,Rp=0,Sp=0,Tp=0,Up=0,Vp=0,Wp=0,Xp=0,Yp=0,Zp=0,_p=0,$p=0,aq=0,bq=0,cq=0,dq=0,eq=0,fq=0,gq=0,hq=0,iq=0,jq=0,kq=0,lq=0,mq=0,nq=0,oq=0,pq=0,qq=0,rq=0,sq=0,tq=0,uq=0,vq=0,wq=0,xq=0,yq=0,zq=0,Aq=0,Bq=0,Cq=0,Dq=0,Eq=0,Fq=0,Gq=0,Hq=0,Iq=0,Jq=0,Kq=0,Lq=0,Mq=0,Nq=0,Oq=0,Pq=0,Qq=0,Rq=0,Sq=0,Tq=0,Uq=0,Vq=0,Wq=0,Xq=0,Yq=0,Zq=0,_q=0,$q=0,ar=0,br=0,cr=0,dr=0,er=0,fr=0,gr=0,hr=0,ir=0,jr=0,kr=0,lr=0;kr=i;i=i+4e3|0;n=kr+3988|0;o=kr+3984|0;p=kr+3980|0;q=kr+3976|0;r=kr+3972|0;s=kr+3968|0;t=kr+3964|0;lr=kr+3960|0;u=kr+3956|0;v=kr+3952|0;jr=kr+3824|0;an=kr+3820|0;Co=kr+3816|0;Na=kr+3812|0;xh=kr+3808|0;He=kr+3804|0;Ni=kr+3800|0;pq=kr+3796|0;Ck=kr+3792|0;dr=kr+3788|0;Em=kr+3784|0;dn=kr+3780|0;Do=kr+3776|0;sb=kr+3772|0;Oi=kr+3768|0;Ke=kr+3764|0;yh=kr+3760|0;gk=kr+3756|0;Dk=kr+3752|0;hn=kr+3748|0;Fo=kr+3744|0;Cb=kr+3740|0;Me=kr+3736|0;Ch=kr+3732|0;Sh=kr+3728|0;vk=kr+3724|0;Ek=kr+3720|0;ln=kr+3716|0;Go=kr+3712|0;Lb=kr+3708|0;Ne=kr+3704|0;Fh=kr+3700|0;Th=kr+3696|0;la=kr+3692|0;fl=kr+3688|0;sn=kr+3684|0;rp=kr+3680|0;vn=kr+3676|0;oq=kr+3672|0;Aa=kr+3668|0;gl=kr+3664|0;ab=kr+3660|0;wg=kr+3656|0;Mg=kr+3652|0;Cj=kr+3648|0;Pg=kr+3644|0;Bj=kr+3640|0;hb=kr+3636|0;zf=kr+3632|0;Dd=kr+3628|0;Kf=kr+3624|0;fp=kr+3620|0;Cp=kr+3616|0;so=kr+3612|0;Gp=kr+3608|0;ve=kr+3604|0;Of=kr+3600|0;si=kr+3596|0;Nj=kr+3592|0;al=kr+3588|0;Sm=kr+3584|0;wm=kr+3580|0;Tm=kr+3576|0;Di=kr+3572|0;Rj=kr+3568|0;fc=kr+3564|0;Gf=kr+3560|0;Mo=kr+3556|0;yp=kr+3552|0;Xo=kr+3548|0;wp=kr+3544|0;Xd=kr+3540|0;Ef=kr+3536|0;ah=kr+3532|0;Jj=kr+3528|0;tl=kr+3524|0;km=kr+3520|0;Sk=kr+3516|0;lm=kr+3512|0;Mh=kr+3508|0;Hj=kr+3504|0;S=kr+3500|0;il=kr+3496|0;ao=kr+3492|0;lq=kr+3488|0;eo=kr+3484|0;mq=kr+3480|0;Ga=kr+3476|0;jl=kr+3472|0;Wb=kr+3468|0;ug=kr+3464|0;Tg=kr+3460|0;yj=kr+3456|0;Wg=kr+3452|0;zj=kr+3448|0;Bc=kr+3444|0;tg=kr+3440|0;nm=kr+3436|0;Pm=kr+3432|0;wc=kr+3428|0;Yd=kr+3424|0;_o=kr+3420|0;zp=kr+3416|0;Md=kr+3412|0;Zd=kr+3408|0;Hh=kr+3404|0;Oh=kr+3400|0;To=kr+3396|0;vp=kr+3392|0;dh=kr+3388|0;Nh=kr+3384|0;Kk=kr+3380|0;Tk=kr+3376|0;Vm=kr+3372|0;Wm=kr+3368|0;Te=kr+3364|0;we=kr+3360|0;vo=kr+3356|0;Dp=kr+3352|0;jf=kr+3348|0;xe=kr+3344|0;yi=kr+3340|0;Fi=kr+3336|0;oo=kr+3332|0;Fp=kr+3328|0;vi=kr+3324|0;Ei=kr+3320|0;om=kr+3316|0;xm=kr+3312|0;Ob=kr+3308|0;Ja=kr+3304|0;Yn=kr+3300|0;La=kr+3296|0;of=kr+3292|0;Ge=kr+3288|0;wk=kr+3284|0;Ka=kr+3280|0;w=kr+3276|0;Fa=kr+3272|0;Fl=kr+3268|0;Om=kr+3264|0;Xc=kr+3260|0;ee=kr+3256|0;Gh=kr+3252|0;Pi=kr+3248|0;Ma=kr+3244|0;Fe=kr+3240|0;xg=kr+3236|0;gp=kr+3232|0;Tq=kr+3228|0;Oa=kr+3224|0;br=kr+3220|0;qb=kr+3216|0;Wq=kr+3212|0;Pa=kr+3208|0;_q=kr+3204|0;pb=kr+3200|0;Rq=kr+3196|0;Sq=kr+3192|0;$q=kr+3188|0;ar=kr+3184|0;Uq=kr+3180|0;Vq=kr+3176|0;Yq=kr+3172|0;Zq=kr+3168|0;Xq=kr+3164|0;cr=kr+3160|0;bn=kr+3156|0;cn=kr+3152|0;ob=kr+3148|0;rb=kr+3144|0;Ie=kr+3140|0;Je=kr+3136|0;hr=kr+3132|0;ub=kr+3128|0;Zj=kr+3124|0;Ab=kr+3120|0;bk=kr+3116|0;zb=kr+3112|0;ek=kr+3108|0;xb=kr+3104|0;fr=kr+3100|0;gr=kr+3096|0;ir=kr+3092|0;Yj=kr+3088|0;$j=kr+3084|0;ak=kr+3080|0;vb=kr+3076|0;ck=kr+3072|0;dk=kr+3068|0;wb=kr+3064|0;_j=kr+3060|0;fk=kr+3056|0;fn=kr+3052|0;gn=kr+3048|0;yb=kr+3044|0;Bb=kr+3040|0;Ah=kr+3036|0;Bh=kr+3032|0;jk=kr+3028|0;Db=kr+3024|0;mk=kr+3020|0;Jb=kr+3016|0;qk=kr+3012|0;Ib=kr+3008|0;tk=kr+3004|0;Gb=kr+3e3|0;hk=kr+2996|0;ik=kr+2992|0;kk=kr+2988|0;lk=kr+2984|0;ok=kr+2980|0;pk=kr+2976|0;Eb=kr+2972|0;rk=kr+2968|0;sk=kr+2964|0;Fb=kr+2960|0;nk=kr+2956|0;uk=kr+2952|0;jn=kr+2948|0;kn=kr+2944|0;Hb=kr+2940|0;Kb=kr+2936|0;Dh=kr+2932|0;Eh=kr+2928|0;B=kr+2924|0;bb=kr+2920|0;E=kr+2916|0;Ta=kr+2912|0;ga=kr+2908|0;cb=kr+2904|0;ja=kr+2900|0;Sa=kr+2896|0;oa=kr+2892|0;ra=kr+2888|0;_a=kr+2884|0;pn=kr+2880|0;eb=kr+2876|0;va=kr+2872|0;ya=kr+2868|0;Xa=kr+2864|0;qn=kr+2860|0;fb=kr+2856|0;z=kr+2852|0;A=kr+2848|0;C=kr+2844|0;D=kr+2840|0;G=kr+2836|0;fa=kr+2832|0;Ra=kr+2828|0;ha=kr+2824|0;ia=kr+2820|0;Qa=kr+2816|0;Ya=kr+2812|0;Za=kr+2808|0;Va=kr+2804|0;Wa=kr+2800|0;ma=kr+2796|0;na=kr+2792|0;pa=kr+2788|0;qa=kr+2784|0;ta=kr+2780|0;ua=kr+2776|0;wa=kr+2772|0;xa=kr+2768|0;F=kr+2764|0;ka=kr+2760|0;on=kr+2756|0;rn=kr+2752|0;tn=kr+2748|0;un=kr+2744|0;sa=kr+2740|0;za=kr+2736|0;Ua=kr+2732|0;$a=kr+2728|0;Kg=kr+2724|0;Lg=kr+2720|0;Ng=kr+2716|0;Og=kr+2712|0;db=kr+2708|0;gb=kr+2704|0;de=kr+2700|0;Wk=kr+2696|0;qe=kr+2692|0;Xk=kr+2688|0;md=kr+2684|0;_k=kr+2680|0;mf=kr+2676|0;Zk=kr+2672|0;tm=kr+2668|0;um=kr+2664|0;ud=kr+2660|0;cp=kr+2656|0;te=kr+2652|0;qm=kr+2648|0;rm=kr+2644|0;Bd=kr+2640|0;dp=kr+2636|0;se=kr+2632|0;be=kr+2628|0;ce=kr+2624|0;nf=kr+2620|0;pe=kr+2616|0;gd=kr+2612|0;hd=kr+2608|0;id=kr+2604|0;jd=kr+2600|0;kd=kr+2596|0;ld=kr+2592|0;qd=kr+2588|0;td=kr+2584|0;xd=kr+2580|0;Ad=kr+2576|0;od=kr+2572|0;pd=kr+2568|0;rd=kr+2564|0;sd=kr+2560|0;vd=kr+2556|0;wd=kr+2552|0;yd=kr+2548|0;zd=kr+2544|0;nd=kr+2540|0;Cd=kr+2536|0;bp=kr+2532|0;ep=kr+2528|0;qo=kr+2524|0;ro=kr+2520|0;re=kr+2516|0;ue=kr+2512|0;qi=kr+2508|0;ri=kr+2504|0;Yk=kr+2500|0;$k=kr+2496|0;sm=kr+2492|0;vm=kr+2488|0;Bi=kr+2484|0;Ci=kr+2480|0;Hc=kr+2476|0;nl=kr+2472|0;Sd=kr+2468|0;ol=kr+2464|0;Oc=kr+2460|0;rl=kr+2456|0;Pd=kr+2452|0;ql=kr+2448|0;Pk=kr+2444|0;Qk=kr+2440|0;Wc=kr+2436|0;Jo=kr+2432|0;Vd=kr+2428|0;Mk=kr+2424|0;Nk=kr+2420|0;dc=kr+2416|0;Ko=kr+2412|0;Ud=kr+2408|0;Fc=kr+2404|0;Gc=kr+2400|0;Qd=kr+2396|0;Rd=kr+2392|0;Ic=kr+2388|0;Jc=kr+2384|0;Kc=kr+2380|0;Lc=kr+2376|0;Mc=kr+2372|0;Nc=kr+2368|0;Sc=kr+2364|0;Vc=kr+2360|0;$b=kr+2356|0;cc=kr+2352|0;Qc=kr+2348|0;Rc=kr+2344|0;Tc=kr+2340|0;Uc=kr+2336|0;Zb=kr+2332|0;_b=kr+2328|0;ac=kr+2324|0;bc=kr+2320|0;Pc=kr+2316|0;ec=kr+2312|0;Io=kr+2308|0;Lo=kr+2304|0;Vo=kr+2300|0;Wo=kr+2296|0;Td=kr+2292|0;Wd=kr+2288|0;_g=kr+2284|0;$g=kr+2280|0;pl=kr+2276|0;sl=kr+2272|0;Ok=kr+2268|0;Rk=kr+2264|0;Kh=kr+2260|0;Lh=kr+2256|0;Ea=kr+2252|0;jb=kr+2248|0;J=kr+2244|0;Yb=kr+2240|0;N=kr+2236|0;Xb=kr+2232|0;Q=kr+2228|0;mb=kr+2224|0;V=kr+2220|0;Y=kr+2216|0;Rb=kr+2212|0;Zn=kr+2208|0;zc=kr+2204|0;aa=kr+2200|0;da=kr+2196|0;Ub=kr+2192|0;_n=kr+2188|0;yc=kr+2184|0;Ca=kr+2180|0;Da=kr+2176|0;H=kr+2172|0;I=kr+2168|0;L=kr+2164|0;M=kr+2160|0;kb=kr+2156|0;O=kr+2152|0;P=kr+2148|0;lb=kr+2144|0;Pb=kr+2140|0;Qb=kr+2136|0;Sb=kr+2132|0;Tb=kr+2128|0;T=kr+2124|0;U=kr+2120|0;W=kr+2116|0;X=kr+2112|0;_=kr+2108|0;$=kr+2104|0;ba=kr+2100|0;ca=kr+2096|0;K=kr+2092|0;R=kr+2088|0;xn=kr+2084|0;$n=kr+2080|0;bo=kr+2076|0;co=kr+2072|0;Z=kr+2068|0;ea=kr+2064|0;nb=kr+2060|0;Vb=kr+2056|0;Rg=kr+2052|0;Sg=kr+2048|0;Ug=kr+2044|0;Vg=kr+2040|0;xc=kr+2036|0;Ac=kr+2032|0;ic=kr+2028|0;No=kr+2024|0;uc=kr+2020|0;wl=kr+2016|0;_c=kr+2012|0;Qo=kr+2008|0;Kd=kr+2004|0;Dl=kr+2e3|0;pc=kr+1996|0;Oo=kr+1992|0;rc=kr+1988|0;zl=kr+1984|0;fd=kr+1980|0;Ro=kr+1976|0;Hd=kr+1972|0;Ik=kr+1968|0;Po=kr+1964|0;So=kr+1960|0;gc=kr+1956|0;hc=kr+1952|0;ul=kr+1948|0;sc=kr+1944|0;tc=kr+1940|0;vl=kr+1936|0;Yc=kr+1932|0;Zc=kr+1928|0;Bl=kr+1924|0;Id=kr+1920|0;Jd=kr+1916|0;Cl=kr+1912|0;lc=kr+1908|0;xl=kr+1904|0;oc=kr+1900|0;yl=kr+1896|0;jc=kr+1892|0;kc=kr+1888|0;mc=kr+1884|0;nc=kr+1880|0;bd=kr+1876|0;El=kr+1872|0;ed=kr+1868|0;Hk=kr+1864|0;$c=kr+1860|0;ad=kr+1856|0;cd=kr+1852|0;dd=kr+1848|0;qc=kr+1844|0;vc=kr+1840|0;Yo=kr+1836|0;Zo=kr+1832|0;Gd=kr+1828|0;Ld=kr+1824|0;eh=kr+1820|0;fh=kr+1816|0;bh=kr+1812|0;ch=kr+1808|0;Al=kr+1804|0;Jk=kr+1800|0;fe=kr+1796|0;io=kr+1792|0;Re=kr+1788|0;dl=kr+1784|0;We=kr+1780|0;lo=kr+1776|0;gf=kr+1772|0;Ll=kr+1768|0;me=kr+1764|0;jo=kr+1760|0;oe=kr+1756|0;Hl=kr+1752|0;bf=kr+1748|0;mo=kr+1744|0;df=kr+1740|0;Ol=kr+1736|0;ko=kr+1732|0;no=kr+1728|0;Ed=kr+1724|0;Fd=kr+1720|0;bl=kr+1716|0;Pe=kr+1712|0;Qe=kr+1708|0;cl=kr+1704|0;Ue=kr+1700|0;Ve=kr+1696|0;Jl=kr+1692|0;ef=kr+1688|0;ff=kr+1684|0;Kl=kr+1680|0;ie=kr+1676|0;el=kr+1672|0;le=kr+1668|0;Gl=kr+1664|0;ge=kr+1660|0;he=kr+1656|0;je=kr+1652|0;ke=kr+1648|0;Ze=kr+1644|0;Ml=kr+1640|0;af=kr+1636|0;Nl=kr+1632|0;Xe=kr+1628|0;Ye=kr+1624|0;_e=kr+1620|0;$e=kr+1616|0;ne=kr+1612|0;Se=kr+1608|0;to=kr+1604|0;uo=kr+1600|0;cf=kr+1596|0;hf=kr+1592|0;wi=kr+1588|0;xi=kr+1584|0;ti=kr+1580|0;ui=kr+1576|0;Il=kr+1572|0;Pl=kr+1568|0;y=kr+1564|0;jm=kr+1560|0;Ym=kr+1556|0;Dn=kr+1552|0;Ia=kr+1548|0;zn=kr+1544|0;Rm=kr+1540|0;Cn=kr+1536|0;er=kr+1532|0;x=kr+1528|0;Um=kr+1524|0;Xm=kr+1520|0;Ba=kr+1516|0;Ha=kr+1512|0;mm=kr+1508|0;Qm=kr+1504|0;yn=kr+1500|0;An=kr+1496|0;Bn=kr+1492|0;En=kr+1488|0;Fn=kr+1484|0;Sn=kr+1480|0;In=kr+1476|0;Rn=kr+1472|0;Mn=kr+1468|0;Wn=kr+1464|0;Pn=kr+1460|0;Xn=kr+1456|0;Gn=kr+1452|0;Hn=kr+1448|0;Kn=kr+1444|0;Ln=kr+1440|0;Nn=kr+1436|0;On=kr+1432|0;Jn=kr+1428|0;Qn=kr+1424|0;Vn=kr+1420|0;Zm=kr+1416|0;Tn=kr+1412|0;Un=kr+1408|0;_m=kr+1404|0;$m=kr+1400|0;Gk=kr+1396|0;Rl=kr+1392|0;Gm=kr+1388|0;am=kr+1384|0;ll=kr+1380|0;$l=kr+1376|0;Zl=kr+1372|0;fm=kr+1368|0;Vk=kr+1364|0;Km=kr+1360|0;Dm=kr+1356|0;Sl=kr+1352|0;Wl=kr+1348|0;em=kr+1344|0;zm=kr+1340|0;Lm=kr+1336|0;Fk=kr+1332|0;Fm=kr+1328|0;hl=kr+1324|0;kl=kr+1320|0;Xl=kr+1316|0;Yl=kr+1312|0;Lk=kr+1308|0;Uk=kr+1304|0;Bm=kr+1300|0;Cm=kr+1296|0;Ul=kr+1292|0;Vl=kr+1288|0;pm=kr+1284|0;ym=kr+1280|0;ml=kr+1276|0;Am=kr+1272|0;Jm=kr+1268|0;Mm=kr+1264|0;Hm=kr+1260|0;Im=kr+1256|0;Nm=kr+1252|0;Ql=kr+1248|0;Tl=kr+1244|0;_l=kr+1240|0;dm=kr+1236|0;gm=kr+1232|0;bm=kr+1228|0;cm=kr+1224|0;hm=kr+1220|0;im=kr+1216|0;kq=kr+1212|0;xq=kr+1208|0;Cq=kr+1204|0;Mq=kr+1200|0;Fq=kr+1196|0;Nq=kr+1192|0;tp=kr+1188|0;Hq=kr+1184|0;Bp=kr+1180|0;sq=kr+1176|0;Mp=kr+1172|0;yq=kr+1168|0;Pp=kr+1164|0;Iq=kr+1160|0;Ip=kr+1156|0;tq=kr+1152|0;iq=kr+1148|0;jq=kr+1144|0;Aq=kr+1140|0;Bq=kr+1136|0;Dq=kr+1132|0;Eq=kr+1128|0;nq=kr+1124|0;sp=kr+1120|0;xp=kr+1116|0;Ap=kr+1112|0;Kp=kr+1108|0;Lp=kr+1104|0;Np=kr+1100|0;Op=kr+1096|0;Ep=kr+1092|0;Hp=kr+1088|0;up=kr+1084|0;Jp=kr+1080|0;rq=kr+1076|0;uq=kr+1072|0;Qp=kr+1068|0;qq=kr+1064|0;vq=kr+1060|0;wq=kr+1056|0;zq=kr+1052|0;Gq=kr+1048|0;Lq=kr+1044|0;Oq=kr+1040|0;Jq=kr+1036|0;Kq=kr+1032|0;Pq=kr+1028|0;Qq=kr+1024|0;nn=kr+1020|0;qp=kr+1016|0;Vp=kr+1012|0;dq=kr+1008|0;Yp=kr+1004|0;eq=kr+1e3|0;go=kr+996|0;_p=kr+992|0;ap=kr+988|0;lp=kr+984|0;Bo=kr+980|0;Rp=kr+976|0;hp=kr+972|0;$p=kr+968|0;xo=kr+964|0;mp=kr+960|0;en=kr+956|0;mn=kr+952|0;Tp=kr+948|0;Up=kr+944|0;Wp=kr+940|0;Xp=kr+936|0;wn=kr+932|0;fo=kr+928|0;Uo=kr+924|0;$o=kr+920|0;zo=kr+916|0;Ao=kr+912|0;Eo=kr+908|0;Ho=kr+904|0;po=kr+900|0;wo=kr+896|0;ho=kr+892|0;yo=kr+888|0;kp=kr+884|0;np=kr+880|0;ip=kr+876|0;jp=kr+872|0;op=kr+868|0;pp=kr+864|0;Sp=kr+860|0;Zp=kr+856|0;cq=kr+852|0;fq=kr+848|0;aq=kr+844|0;bq=kr+840|0;gq=kr+836|0;hq=kr+832|0;sg=kr+828|0;Fg=kr+824|0;Bf=kr+820|0;nh=kr+816|0;Xf=kr+812|0;oh=kr+808|0;Uf=kr+804|0;Gg=kr+800|0;Qf=kr+796|0;th=kr+792|0;Bg=kr+788|0;lh=kr+784|0;Jf=kr+780|0;sh=kr+776|0;Ag=kr+772|0;ih=kr+768|0;qg=kr+764|0;rg=kr+760|0;Sf=kr+756|0;Tf=kr+752|0;vg=kr+748|0;Af=kr+744|0;Vf=kr+740|0;Wf=kr+736|0;Mf=kr+732|0;jh=kr+728|0;Pf=kr+724|0;kh=kr+720|0;Lf=kr+716|0;Nf=kr+712|0;Ff=kr+708|0;gh=kr+704|0;If=kr+700|0;hh=kr+696|0;Df=kr+692|0;Hf=kr+688|0;Cf=kr+684|0;Rf=kr+680|0;zg=kr+676|0;Cg=kr+672|0;Yf=kr+668|0;yg=kr+664|0;Dg=kr+660|0;Eg=kr+656|0;Hg=kr+652|0;mh=kr+648|0;rh=kr+644|0;uh=kr+640|0;ph=kr+636|0;qh=kr+632|0;vh=kr+628|0;wh=kr+624|0;Zi=kr+620|0;jj=kr+616|0;Ej=kr+612|0;tj=kr+608|0;aj=kr+604|0;uj=kr+600|0;Xj=kr+596|0;kj=kr+592|0;Tj=kr+588|0;yk=kr+584|0;fj=kr+580|0;rj=kr+576|0;Mj=kr+572|0;xk=kr+568|0;ej=kr+564|0;oj=kr+560|0;Xi=kr+556|0;Yi=kr+552|0;Vj=kr+548|0;Wj=kr+544|0;Aj=kr+540|0;Dj=kr+536|0;_i=kr+532|0;$i=kr+528|0;Pj=kr+524|0;pj=kr+520|0;Sj=kr+516|0;qj=kr+512|0;Oj=kr+508|0;Qj=kr+504|0;Ij=kr+500|0;mj=kr+496|0;Lj=kr+492|0;nj=kr+488|0;Gj=kr+484|0;Kj=kr+480|0;Fj=kr+476|0;Uj=kr+472|0;dj=kr+468|0;gj=kr+464|0;bj=kr+460|0;cj=kr+456|0;hj=kr+452|0;ij=kr+448|0;lj=kr+444|0;sj=kr+440|0;xj=kr+436|0;zk=kr+432|0;vj=kr+428|0;wj=kr+424|0;Ak=kr+420|0;Bk=kr+416|0;Nb=kr+412|0;yf=kr+408|0;Dc=kr+404|0;gg=kr+400|0;pf=kr+396|0;hg=kr+392|0;Ee=kr+388|0;Zf=kr+384|0;Ae=kr+380|0;mg=kr+376|0;uf=kr+372|0;eg=kr+368|0;ae=kr+364|0;lg=kr+360|0;tf=kr+356|0;bg=kr+352|0;tb=kr+348|0;Mb=kr+344|0;Ce=kr+340|0;De=kr+336|0;ib=kr+332|0;Cc=kr+328|0;Le=kr+324|0;Oe=kr+320|0;lf=kr+316|0;cg=kr+312|0;ze=kr+308|0;dg=kr+304|0;kf=kr+300|0;ye=kr+296|0;Od=kr+292|0;$f=kr+288|0;$d=kr+284|0;ag=kr+280|0;Nd=kr+276|0;_d=kr+272|0;Ec=kr+268|0;Be=kr+264|0;sf=kr+260|0;vf=kr+256|0;qf=kr+252|0;rf=kr+248|0;wf=kr+244|0;xf=kr+240|0;_f=kr+236|0;fg=kr+232|0;kg=kr+228|0;ng=kr+224|0;ig=kr+220|0;jg=kr+216|0;og=kr+212|0;pg=kr+208|0;Jg=kr+204|0;ci=kr+200|0;Yg=kr+196|0;mi=kr+192|0;Vh=kr+188|0;ni=kr+184|0;Mi=kr+180|0;di=kr+176|0;Ii=kr+172|0;Ti=kr+168|0;_h=kr+164|0;ki=kr+160|0;pi=kr+156|0;Si=kr+152|0;Zh=kr+148|0;hi=kr+144|0;zh=kr+140|0;Ig=kr+136|0;Ki=kr+132|0;Li=kr+128|0;Qg=kr+124|0;Xg=kr+120|0;Rh=kr+116|0;Uh=kr+112|0;Ai=kr+108|0;ii=kr+104|0;Hi=kr+100|0;ji=kr+96|0;zi=kr+92|0;Gi=kr+88|0;Jh=kr+84|0;fi=kr+80|0;Qh=kr+76|0;gi=kr+72|0;Ih=kr+68|0;Ph=kr+64|0;Zg=kr+60|0;Ji=kr+56|0;Yh=kr+52|0;$h=kr+48|0;Wh=kr+44|0;Xh=kr+40|0;ai=kr+36|0;bi=kr+32|0;ei=kr+28|0;li=kr+24|0;Ri=kr+20|0;Ui=kr+16|0;oi=kr+12|0;Qi=kr+8|0;Vi=kr+4|0;Wi=kr;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[lr>>2]=k;c[u>>2]=l;c[v>>2]=m;g[kr+3948>>2]=.803207516670227;g[kr+3944>>2]=.5956993103027344;g[kr+3940>>2]=.1467304676771164;g[kr+3936>>2]=.9891765117645264;g[kr+3932>>2]=.7409511208534241;g[kr+3928>>2]=.6715589761734009;g[kr+3924>>2]=.049067676067352295;g[kr+3920>>2]=.9987954497337341;g[kr+3916>>2]=.24298018217086792;g[kr+3912>>2]=.9700312614440918;g[kr+3908>>2]=.5141027569770813;g[kr+3904>>2]=.8577286005020142;g[kr+3900>>2]=.3368898630142212;g[kr+3896>>2]=.9415440559387207;g[kr+3892>>2]=.4275550842285156;g[kr+3888>>2]=.903989315032959;g[kr+3884>>2]=.0980171412229538;g[kr+3880>>2]=.9951847195625305;g[kr+3876>>2]=.6343932747840881;g[kr+3872>>2]=.7730104327201843;g[kr+3868>>2]=.8819212913513184;g[kr+3864>>2]=.4713967442512512;g[kr+3860>>2]=.9569403529167175;g[kr+3856>>2]=.290284663438797;g[kr+3852>>2]=.5555702447891235;g[kr+3848>>2]=.8314695954322815;g[kr+3844>>2]=.19509032368659973;g[kr+3840>>2]=.9807852506637573;g[kr+3836>>2]=.3826834261417389;g[kr+3832>>2]=.9238795042037964;g[kr+3828>>2]=.7071067690849304;c[jr>>2]=c[lr>>2];while(1){if((c[jr>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[Fa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<5<<2)>>2];g[Ob>>2]=+g[w>>2]+ +g[Fa>>2];g[Ja>>2]=+g[w>>2]-+g[Fa>>2];g[Fl>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*56<<2)>>2];g[Om>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*24<<2)>>2];g[Yn>>2]=+g[Fl>>2]+ +g[Om>>2];g[La>>2]=+g[Fl>>2]-+g[Om>>2];g[Xc>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<4<<2)>>2];g[ee>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*48<<2)>>2];g[of>>2]=+g[Xc>>2]+ +g[ee>>2];g[Ge>>2]=+g[Xc>>2]-+g[ee>>2];g[Gh>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Pi>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*40<<2)>>2];g[wk>>2]=+g[Gh>>2]+ +g[Pi>>2];g[Ka>>2]=+g[Gh>>2]-+g[Pi>>2];g[an>>2]=+g[Ob>>2]-+g[of>>2];g[Co>>2]=+g[Yn>>2]-+g[wk>>2];g[Ma>>2]=(+g[Ka>>2]+ +g[La>>2])*.7071067690849304;g[Na>>2]=+g[Ja>>2]+ +g[Ma>>2];g[xh>>2]=+g[Ja>>2]-+g[Ma>>2];g[Fe>>2]=(+g[La>>2]-+g[Ka>>2])*.7071067690849304;g[He>>2]=+g[Fe>>2]-+g[Ge>>2];g[Ni>>2]=+g[Ge>>2]+ +g[Fe>>2];g[xg>>2]=+g[Ob>>2]+ +g[of>>2];g[gp>>2]=+g[wk>>2]+ +g[Yn>>2];g[pq>>2]=+g[xg>>2]+ +g[gp>>2];g[Ck>>2]=+g[xg>>2]-+g[gp>>2];g[Rq>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Sq>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*36<<2)>>2];g[Tq>>2]=+g[Rq>>2]+ +g[Sq>>2];g[Oa>>2]=+g[Rq>>2]-+g[Sq>>2];g[$q>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[ar>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*44<<2)>>2];g[br>>2]=+g[$q>>2]+ +g[ar>>2];g[qb>>2]=+g[$q>>2]-+g[ar>>2];g[Uq>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*20<<2)>>2];g[Vq>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*52<<2)>>2];g[Wq>>2]=+g[Uq>>2]+ +g[Vq>>2];g[Pa>>2]=+g[Uq>>2]-+g[Vq>>2];g[Yq>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*60<<2)>>2];g[Zq>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*28<<2)>>2];g[_q>>2]=+g[Yq>>2]+ +g[Zq>>2];g[pb>>2]=+g[Yq>>2]-+g[Zq>>2];g[Xq>>2]=+g[Tq>>2]+ +g[Wq>>2];g[cr>>2]=+g[_q>>2]+ +g[br>>2];g[dr>>2]=+g[Xq>>2]+ +g[cr>>2];g[Em>>2]=+g[cr>>2]-+g[Xq>>2];g[bn>>2]=+g[Tq>>2]-+g[Wq>>2];g[cn>>2]=+g[_q>>2]-+g[br>>2];g[dn>>2]=(+g[bn>>2]+ +g[cn>>2])*.7071067690849304;g[Do>>2]=(+g[cn>>2]-+g[bn>>2])*.7071067690849304;g[ob>>2]=+g[Oa>>2]*.9238795042037964-+g[Pa>>2]*.3826834261417389;g[rb>>2]=+g[pb>>2]*.9238795042037964+ +g[qb>>2]*.3826834261417389;g[sb>>2]=+g[ob>>2]+ +g[rb>>2];g[Oi>>2]=+g[rb>>2]-+g[ob>>2];g[Ie>>2]=+g[pb>>2]*.3826834261417389-+g[qb>>2]*.9238795042037964;g[Je>>2]=+g[Oa>>2]*.3826834261417389+ +g[Pa>>2]*.9238795042037964;g[Ke>>2]=+g[Ie>>2]-+g[Je>>2];g[yh>>2]=+g[Je>>2]+ +g[Ie>>2];g[fr>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[gr>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*34<<2)>>2];g[hr>>2]=+g[fr>>2]+ +g[gr>>2];g[ub>>2]=+g[fr>>2]-+g[gr>>2];g[ir>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*18<<2)>>2];g[Yj>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*50<<2)>>2];g[Zj>>2]=+g[ir>>2]+ +g[Yj>>2];g[Ab>>2]=+g[ir>>2]-+g[Yj>>2];g[$j>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[ak>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*42<<2)>>2];g[vb>>2]=+g[$j>>2]-+g[ak>>2];g[ck>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*58<<2)>>2];g[dk>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*26<<2)>>2];g[wb>>2]=+g[ck>>2]-+g[dk>>2];g[bk>>2]=+g[$j>>2]+ +g[ak>>2];g[zb>>2]=(+g[wb>>2]-+g[vb>>2])*.7071067690849304;g[ek>>2]=+g[ck>>2]+ +g[dk>>2];g[xb>>2]=(+g[vb>>2]+ +g[wb>>2])*.7071067690849304;g[_j>>2]=+g[hr>>2]+ +g[Zj>>2];g[fk>>2]=+g[bk>>2]+ +g[ek>>2];g[gk>>2]=+g[_j>>2]+ +g[fk>>2];g[Dk>>2]=+g[_j>>2]-+g[fk>>2];g[fn>>2]=+g[hr>>2]-+g[Zj>>2];g[gn>>2]=+g[ek>>2]-+g[bk>>2];g[hn>>2]=+g[fn>>2]*.9238795042037964+ +g[gn>>2]*.3826834261417389;g[Fo>>2]=+g[gn>>2]*.9238795042037964-+g[fn>>2]*.3826834261417389;g[yb>>2]=+g[ub>>2]+ +g[xb>>2];g[Bb>>2]=+g[zb>>2]-+g[Ab>>2];g[Cb>>2]=+g[yb>>2]*.9807852506637573+ +g[Bb>>2]*.19509032368659973;g[Me>>2]=+g[Bb>>2]*.9807852506637573-+g[yb>>2]*.19509032368659973;g[Ah>>2]=+g[ub>>2]-+g[xb>>2];g[Bh>>2]=+g[Ab>>2]+ +g[zb>>2];g[Ch>>2]=+g[Ah>>2]*.8314695954322815+ +g[Bh>>2]*.5555702447891235;g[Sh>>2]=+g[Bh>>2]*.8314695954322815-+g[Ah>>2]*.5555702447891235;g[hk>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*62<<2)>>2];g[ik>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*30<<2)>>2];g[jk>>2]=+g[hk>>2]+ +g[ik>>2];g[Db>>2]=+g[hk>>2]-+g[ik>>2];g[kk>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[lk>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*46<<2)>>2];g[mk>>2]=+g[kk>>2]+ +g[lk>>2];g[Jb>>2]=+g[kk>>2]-+g[lk>>2];g[ok>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[pk>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*38<<2)>>2];g[Eb>>2]=+g[ok>>2]-+g[pk>>2];g[rk>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*54<<2)>>2];g[sk>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*22<<2)>>2];g[Fb>>2]=+g[rk>>2]-+g[sk>>2];g[qk>>2]=+g[ok>>2]+ +g[pk>>2];g[Ib>>2]=(+g[Fb>>2]-+g[Eb>>2])*.7071067690849304;g[tk>>2]=+g[rk>>2]+ +g[sk>>2];g[Gb>>2]=(+g[Eb>>2]+ +g[Fb>>2])*.7071067690849304;g[nk>>2]=+g[jk>>2]+ +g[mk>>2];g[uk>>2]=+g[qk>>2]+ +g[tk>>2];g[vk>>2]=+g[nk>>2]+ +g[uk>>2];g[Ek>>2]=+g[nk>>2]-+g[uk>>2];g[jn>>2]=+g[jk>>2]-+g[mk>>2];g[kn>>2]=+g[tk>>2]-+g[qk>>2];g[ln>>2]=+g[jn>>2]*.9238795042037964-+g[kn>>2]*.3826834261417389;g[Go>>2]=+g[jn>>2]*.3826834261417389+ +g[kn>>2]*.9238795042037964;g[Hb>>2]=+g[Db>>2]+ +g[Gb>>2];g[Kb>>2]=+g[Ib>>2]-+g[Jb>>2];g[Lb>>2]=+g[Hb>>2]*.9807852506637573-+g[Kb>>2]*.19509032368659973;g[Ne>>2]=+g[Hb>>2]*.19509032368659973+ +g[Kb>>2]*.9807852506637573;g[Dh>>2]=+g[Db>>2]-+g[Gb>>2];g[Eh>>2]=+g[Jb>>2]+ +g[Ib>>2];g[Fh>>2]=+g[Dh>>2]*.8314695954322815-+g[Eh>>2]*.5555702447891235;g[Th>>2]=+g[Dh>>2]*.5555702447891235+ +g[Eh>>2]*.8314695954322815;g[z>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[A>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*33<<2)>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[bb>>2]=+g[z>>2]-+g[A>>2];g[C>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*17<<2)>>2];g[D>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*49<<2)>>2];g[E>>2]=+g[C>>2]+ +g[D>>2];g[Ta>>2]=+g[C>>2]-+g[D>>2];g[G>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[fa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*41<<2)>>2];g[Ra>>2]=+g[G>>2]-+g[fa>>2];g[ha>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*57<<2)>>2];g[ia>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*25<<2)>>2];g[Qa>>2]=+g[ha>>2]-+g[ia>>2];g[ga>>2]=+g[G>>2]+ +g[fa>>2];g[cb>>2]=(+g[Ra>>2]+ +g[Qa>>2])*.7071067690849304;g[ja>>2]=+g[ha>>2]+ +g[ia>>2];g[Sa>>2]=(+g[Qa>>2]-+g[Ra>>2])*.7071067690849304;g[ma>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[na>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*37<<2)>>2];g[oa>>2]=+g[ma>>2]+ +g[na>>2];g[Ya>>2]=+g[ma>>2]-+g[na>>2];g[pa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*21<<2)>>2];g[qa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*53<<2)>>2];g[ra>>2]=+g[pa>>2]+ +g[qa>>2];g[Za>>2]=+g[pa>>2]-+g[qa>>2];g[_a>>2]=+g[Ya>>2]*.3826834261417389+ +g[Za>>2]*.9238795042037964;g[pn>>2]=+g[oa>>2]-+g[ra>>2];g[eb>>2]=+g[Ya>>2]*.9238795042037964-+g[Za>>2]*.3826834261417389;g[ta>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*61<<2)>>2];g[ua>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*29<<2)>>2];g[va>>2]=+g[ta>>2]+ +g[ua>>2];g[Va>>2]=+g[ta>>2]-+g[ua>>2];g[wa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[xa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*45<<2)>>2];g[ya>>2]=+g[wa>>2]+ +g[xa>>2];g[Wa>>2]=+g[wa>>2]-+g[xa>>2];g[Xa>>2]=+g[Va>>2]*.3826834261417389-+g[Wa>>2]*.9238795042037964;g[qn>>2]=+g[va>>2]-+g[ya>>2];g[fb>>2]=+g[Va>>2]*.9238795042037964+ +g[Wa>>2]*.3826834261417389;g[F>>2]=+g[B>>2]+ +g[E>>2];g[ka>>2]=+g[ga>>2]+ +g[ja>>2];g[la>>2]=+g[F>>2]+ +g[ka>>2];g[fl>>2]=+g[F>>2]-+g[ka>>2];g[on>>2]=+g[B>>2]-+g[E>>2];g[rn>>2]=(+g[pn>>2]+ +g[qn>>2])*.7071067690849304;g[sn>>2]=+g[on>>2]+ +g[rn>>2];g[rp>>2]=+g[on>>2]-+g[rn>>2];g[tn>>2]=+g[ja>>2]-+g[ga>>2];g[un>>2]=(+g[qn>>2]-+g[pn>>2])*.7071067690849304;g[vn>>2]=+g[tn>>2]+ +g[un>>2];g[oq>>2]=+g[un>>2]-+g[tn>>2];g[sa>>2]=+g[oa>>2]+ +g[ra>>2];g[za>>2]=+g[va>>2]+ +g[ya>>2];g[Aa>>2]=+g[sa>>2]+ +g[za>>2];g[gl>>2]=+g[za>>2]-+g[sa>>2];g[Ua>>2]=+g[Sa>>2]-+g[Ta>>2];g[$a>>2]=+g[Xa>>2]-+g[_a>>2];g[ab>>2]=+g[Ua>>2]+ +g[$a>>2];g[wg>>2]=+g[$a>>2]-+g[Ua>>2];g[Kg>>2]=+g[bb>>2]-+g[cb>>2];g[Lg>>2]=+g[_a>>2]+ +g[Xa>>2];g[Mg>>2]=+g[Kg>>2]+ +g[Lg>>2];g[Cj>>2]=+g[Kg>>2]-+g[Lg>>2];g[Ng>>2]=+g[Ta>>2]+ +g[Sa>>2];g[Og>>2]=+g[fb>>2]-+g[eb>>2];g[Pg>>2]=+g[Ng>>2]+ +g[Og>>2];g[Bj>>2]=+g[Og>>2]-+g[Ng>>2];g[db>>2]=+g[bb>>2]+ +g[cb>>2];g[gb>>2]=+g[eb>>2]+ +g[fb>>2];g[hb>>2]=+g[db>>2]+ +g[gb>>2];g[zf>>2]=+g[db>>2]-+g[gb>>2];g[be>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*63<<2)>>2];g[ce>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*31<<2)>>2];g[de>>2]=+g[be>>2]-+g[ce>>2];g[Wk>>2]=+g[be>>2]+ +g[ce>>2];g[nf>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[pe>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*47<<2)>>2];g[qe>>2]=+g[nf>>2]-+g[pe>>2];g[Xk>>2]=+g[nf>>2]+ +g[pe>>2];g[gd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[hd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*39<<2)>>2];g[id>>2]=+g[gd>>2]-+g[hd>>2];g[jd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*55<<2)>>2];g[kd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*23<<2)>>2];g[ld>>2]=+g[jd>>2]-+g[kd>>2];g[md>>2]=(+g[id>>2]+ +g[ld>>2])*.7071067690849304;g[_k>>2]=+g[jd>>2]+ +g[kd>>2];g[mf>>2]=(+g[ld>>2]-+g[id>>2])*.7071067690849304;g[Zk>>2]=+g[gd>>2]+ +g[hd>>2];g[od>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[pd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*35<<2)>>2];g[qd>>2]=+g[od>>2]-+g[pd>>2];g[tm>>2]=+g[od>>2]+ +g[pd>>2];g[rd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*19<<2)>>2];g[sd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*51<<2)>>2];g[td>>2]=+g[rd>>2]-+g[sd>>2];g[um>>2]=+g[rd>>2]+ +g[sd>>2];g[ud>>2]=+g[qd>>2]*.9238795042037964-+g[td>>2]*.3826834261417389;g[cp>>2]=+g[tm>>2]-+g[um>>2];g[te>>2]=+g[qd>>2]*.3826834261417389+ +g[td>>2]*.9238795042037964;g[vd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*59<<2)>>2];g[wd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*27<<2)>>2];g[xd>>2]=+g[vd>>2]-+g[wd>>2];g[qm>>2]=+g[vd>>2]+ +g[wd>>2];g[yd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[zd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*43<<2)>>2];g[Ad>>2]=+g[yd>>2]-+g[zd>>2];g[rm>>2]=+g[yd>>2]+ +g[zd>>2];g[Bd>>2]=+g[xd>>2]*.9238795042037964+ +g[Ad>>2]*.3826834261417389;g[dp>>2]=+g[qm>>2]-+g[rm>>2];g[se>>2]=+g[xd>>2]*.3826834261417389-+g[Ad>>2]*.9238795042037964;g[nd>>2]=+g[de>>2]+ +g[md>>2];g[Cd>>2]=+g[ud>>2]+ +g[Bd>>2];g[Dd>>2]=+g[nd>>2]+ +g[Cd>>2];g[Kf>>2]=+g[nd>>2]-+g[Cd>>2];g[bp>>2]=+g[Wk>>2]-+g[Xk>>2];g[ep>>2]=(+g[cp>>2]+ +g[dp>>2])*.7071067690849304;g[fp>>2]=+g[bp>>2]+ +g[ep>>2];g[Cp>>2]=+g[bp>>2]-+g[ep>>2];g[qo>>2]=+g[_k>>2]-+g[Zk>>2];g[ro>>2]=(+g[dp>>2]-+g[cp>>2])*.7071067690849304;g[so>>2]=+g[qo>>2]+ +g[ro>>2];g[Gp>>2]=+g[ro>>2]-+g[qo>>2];g[re>>2]=+g[mf>>2]-+g[qe>>2];g[ue>>2]=+g[se>>2]-+g[te>>2];g[ve>>2]=+g[re>>2]+ +g[ue>>2];g[Of>>2]=+g[ue>>2]-+g[re>>2];g[qi>>2]=+g[de>>2]-+g[md>>2];g[ri>>2]=+g[te>>2]+ +g[se>>2];g[si>>2]=+g[qi>>2]+ +g[ri>>2];g[Nj>>2]=+g[qi>>2]-+g[ri>>2];g[Yk>>2]=+g[Wk>>2]+ +g[Xk>>2];g[$k>>2]=+g[Zk>>2]+ +g[_k>>2];g[al>>2]=+g[Yk>>2]-+g[$k>>2];g[Sm>>2]=+g[Yk>>2]+ +g[$k>>2];g[sm>>2]=+g[qm>>2]+ +g[rm>>2];g[vm>>2]=+g[tm>>2]+ +g[um>>2];g[wm>>2]=+g[sm>>2]-+g[vm>>2];g[Tm>>2]=+g[vm>>2]+ +g[sm>>2];g[Bi>>2]=+g[qe>>2]+ +g[mf>>2];g[Ci>>2]=+g[Bd>>2]-+g[ud>>2];g[Di>>2]=+g[Bi>>2]+ +g[Ci>>2];g[Rj>>2]=+g[Ci>>2]-+g[Bi>>2];g[Fc>>2]=+g[c[o>>2]>>2];g[Gc>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<5<<2)>>2];g[Hc>>2]=+g[Fc>>2]-+g[Gc>>2];g[nl>>2]=+g[Fc>>2]+ +g[Gc>>2];g[Qd>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<4<<2)>>2];g[Rd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*48<<2)>>2];g[Sd>>2]=+g[Qd>>2]-+g[Rd>>2];g[ol>>2]=+g[Qd>>2]+ +g[Rd>>2];g[Ic>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Jc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*40<<2)>>2];g[Kc>>2]=+g[Ic>>2]-+g[Jc>>2];g[Lc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*56<<2)>>2];g[Mc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*24<<2)>>2];g[Nc>>2]=+g[Lc>>2]-+g[Mc>>2];g[Oc>>2]=(+g[Kc>>2]+ +g[Nc>>2])*.7071067690849304;g[rl>>2]=+g[Lc>>2]+ +g[Mc>>2];g[Pd>>2]=(+g[Nc>>2]-+g[Kc>>2])*.7071067690849304;g[ql>>2]=+g[Ic>>2]+ +g[Jc>>2];g[Qc>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Rc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*36<<2)>>2];g[Sc>>2]=+g[Qc>>2]-+g[Rc>>2];g[Pk>>2]=+g[Qc>>2]+ +g[Rc>>2];g[Tc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*20<<2)>>2];g[Uc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*52<<2)>>2];g[Vc>>2]=+g[Tc>>2]-+g[Uc>>2];g[Qk>>2]=+g[Tc>>2]+ +g[Uc>>2];g[Wc>>2]=+g[Sc>>2]*.9238795042037964-+g[Vc>>2]*.3826834261417389;g[Jo>>2]=+g[Pk>>2]-+g[Qk>>2];g[Vd>>2]=+g[Sc>>2]*.3826834261417389+ +g[Vc>>2]*.9238795042037964;g[Zb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*60<<2)>>2];g[_b>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*28<<2)>>2];g[$b>>2]=+g[Zb>>2]-+g[_b>>2];g[Mk>>2]=+g[Zb>>2]+ +g[_b>>2];g[ac>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[bc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*44<<2)>>2];g[cc>>2]=+g[ac>>2]-+g[bc>>2];g[Nk>>2]=+g[ac>>2]+ +g[bc>>2];g[dc>>2]=+g[$b>>2]*.9238795042037964+ +g[cc>>2]*.3826834261417389;g[Ko>>2]=+g[Mk>>2]-+g[Nk>>2];g[Ud>>2]=+g[$b>>2]*.3826834261417389-+g[cc>>2]*.9238795042037964;g[Pc>>2]=+g[Hc>>2]+ +g[Oc>>2];g[ec>>2]=+g[Wc>>2]+ +g[dc>>2];g[fc>>2]=+g[Pc>>2]+ +g[ec>>2];g[Gf>>2]=+g[Pc>>2]-+g[ec>>2];g[Io>>2]=+g[nl>>2]-+g[ol>>2];g[Lo>>2]=(+g[Jo>>2]+ +g[Ko>>2])*.7071067690849304;g[Mo>>2]=+g[Io>>2]+ +g[Lo>>2];g[yp>>2]=+g[Io>>2]-+g[Lo>>2];g[Vo>>2]=+g[rl>>2]-+g[ql>>2];g[Wo>>2]=(+g[Ko>>2]-+g[Jo>>2])*.7071067690849304;g[Xo>>2]=+g[Vo>>2]+ +g[Wo>>2];g[wp>>2]=+g[Wo>>2]-+g[Vo>>2];g[Td>>2]=+g[Pd>>2]-+g[Sd>>2];g[Wd>>2]=+g[Ud>>2]-+g[Vd>>2];g[Xd>>2]=+g[Td>>2]+ +g[Wd>>2];g[Ef>>2]=+g[Wd>>2]-+g[Td>>2];g[_g>>2]=+g[Hc>>2]-+g[Oc>>2];g[$g>>2]=+g[Vd>>2]+ +g[Ud>>2];g[ah>>2]=+g[_g>>2]+ +g[$g>>2];g[Jj>>2]=+g[_g>>2]-+g[$g>>2];g[pl>>2]=+g[nl>>2]+ +g[ol>>2];g[sl>>2]=+g[ql>>2]+ +g[rl>>2];g[tl>>2]=+g[pl>>2]-+g[sl>>2];g[km>>2]=+g[pl>>2]+ +g[sl>>2];g[Ok>>2]=+g[Mk>>2]+ +g[Nk>>2];g[Rk>>2]=+g[Pk>>2]+ +g[Qk>>2];g[Sk>>2]=+g[Ok>>2]-+g[Rk>>2];g[lm>>2]=+g[Rk>>2]+ +g[Ok>>2];g[Kh>>2]=+g[Sd>>2]+ +g[Pd>>2];g[Lh>>2]=+g[dc>>2]-+g[Wc>>2];g[Mh>>2]=+g[Kh>>2]+ +g[Lh>>2];g[Hj>>2]=+g[Lh>>2]-+g[Kh>>2];g[Ca>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*63<<2)>>2];g[Da>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*31<<2)>>2];g[Ea>>2]=+g[Ca>>2]+ +g[Da>>2];g[jb>>2]=+g[Ca>>2]-+g[Da>>2];g[H>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[I>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*47<<2)>>2];g[J>>2]=+g[H>>2]+ +g[I>>2];g[Yb>>2]=+g[H>>2]-+g[I>>2];g[L>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[M>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*39<<2)>>2];g[kb>>2]=+g[L>>2]-+g[M>>2];g[O>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*55<<2)>>2];g[P>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*23<<2)>>2];g[lb>>2]=+g[O>>2]-+g[P>>2];g[N>>2]=+g[L>>2]+ +g[M>>2];g[Xb>>2]=(+g[lb>>2]-+g[kb>>2])*.7071067690849304;g[Q>>2]=+g[O>>2]+ +g[P>>2];g[mb>>2]=(+g[kb>>2]+ +g[lb>>2])*.7071067690849304;g[T>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[U>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*35<<2)>>2];g[V>>2]=+g[T>>2]+ +g[U>>2];g[Pb>>2]=+g[T>>2]-+g[U>>2];g[W>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*19<<2)>>2];g[X>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*51<<2)>>2];g[Y>>2]=+g[W>>2]+ +g[X>>2];g[Qb>>2]=+g[W>>2]-+g[X>>2];g[Rb>>2]=+g[Pb>>2]*.9238795042037964-+g[Qb>>2]*.3826834261417389;g[Zn>>2]=+g[V>>2]-+g[Y>>2];g[zc>>2]=+g[Pb>>2]*.3826834261417389+ +g[Qb>>2]*.9238795042037964;g[_>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*59<<2)>>2];g[$>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*27<<2)>>2];g[aa>>2]=+g[_>>2]+ +g[$>>2];g[Sb>>2]=+g[_>>2]-+g[$>>2];g[ba>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[ca>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*43<<2)>>2];g[da>>2]=+g[ba>>2]+ +g[ca>>2];g[Tb>>2]=+g[ba>>2]-+g[ca>>2];g[Ub>>2]=+g[Sb>>2]*.9238795042037964+ +g[Tb>>2]*.3826834261417389;g[_n>>2]=+g[aa>>2]-+g[da>>2];g[yc>>2]=+g[Sb>>2]*.3826834261417389-+g[Tb>>2]*.9238795042037964;g[K>>2]=+g[Ea>>2]+ +g[J>>2];g[R>>2]=+g[N>>2]+ +g[Q>>2];g[S>>2]=+g[K>>2]+ +g[R>>2];g[il>>2]=+g[K>>2]-+g[R>>2];g[xn>>2]=+g[Ea>>2]-+g[J>>2];g[$n>>2]=(+g[Zn>>2]+ +g[_n>>2])*.7071067690849304;g[ao>>2]=+g[xn>>2]+ +g[$n>>2];g[lq>>2]=+g[xn>>2]-+g[$n>>2];g[bo>>2]=+g[Q>>2]-+g[N>>2];g[co>>2]=(+g[_n>>2]-+g[Zn>>2])*.7071067690849304;g[eo>>2]=+g[bo>>2]+ +g[co>>2];g[mq>>2]=+g[co>>2]-+g[bo>>2];g[Z>>2]=+g[V>>2]+ +g[Y>>2];g[ea>>2]=+g[aa>>2]+ +g[da>>2];g[Ga>>2]=+g[Z>>2]+ +g[ea>>2];g[jl>>2]=+g[ea>>2]-+g[Z>>2];g[nb>>2]=+g[jb>>2]+ +g[mb>>2];g[Vb>>2]=+g[Rb>>2]+ +g[Ub>>2];g[Wb>>2]=+g[nb>>2]+ +g[Vb>>2];g[ug>>2]=+g[nb>>2]-+g[Vb>>2];g[Rg>>2]=+g[jb>>2]-+g[mb>>2];g[Sg>>2]=+g[zc>>2]+ +g[yc>>2];g[Tg>>2]=+g[Rg>>2]+ +g[Sg>>2];g[yj>>2]=+g[Rg>>2]-+g[Sg>>2];g[Ug>>2]=+g[Yb>>2]+ +g[Xb>>2];g[Vg>>2]=+g[Ub>>2]-+g[Rb>>2];g[Wg>>2]=+g[Ug>>2]+ +g[Vg>>2];g[zj>>2]=+g[Vg>>2]-+g[Ug>>2];g[xc>>2]=+g[Xb>>2]-+g[Yb>>2];g[Ac>>2]=+g[yc>>2]-+g[zc>>2];g[Bc>>2]=+g[xc>>2]+ +g[Ac>>2];g[tg>>2]=+g[Ac>>2]-+g[xc>>2];g[gc>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[hc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*34<<2)>>2];g[ul>>2]=+g[gc>>2]+ +g[hc>>2];g[sc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*18<<2)>>2];g[tc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*50<<2)>>2];g[vl>>2]=+g[sc>>2]+ +g[tc>>2];g[ic>>2]=+g[gc>>2]-+g[hc>>2];g[No>>2]=+g[ul>>2]-+g[vl>>2];g[uc>>2]=+g[sc>>2]-+g[tc>>2];g[wl>>2]=+g[ul>>2]+ +g[vl>>2];g[Yc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*62<<2)>>2];g[Zc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*30<<2)>>2];g[Bl>>2]=+g[Yc>>2]+ +g[Zc>>2];g[Id>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Jd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*46<<2)>>2];g[Cl>>2]=+g[Id>>2]+ +g[Jd>>2];g[_c>>2]=+g[Yc>>2]-+g[Zc>>2];g[Qo>>2]=+g[Bl>>2]-+g[Cl>>2];g[Kd>>2]=+g[Id>>2]-+g[Jd>>2];g[Dl>>2]=+g[Bl>>2]+ +g[Cl>>2];g[jc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[kc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*42<<2)>>2];g[lc>>2]=+g[jc>>2]-+g[kc>>2];g[xl>>2]=+g[jc>>2]+ +g[kc>>2];g[mc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*58<<2)>>2];g[nc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*26<<2)>>2];g[oc>>2]=+g[mc>>2]-+g[nc>>2];g[yl>>2]=+g[mc>>2]+ +g[nc>>2];g[pc>>2]=(+g[lc>>2]+ +g[oc>>2])*.7071067690849304;g[Oo>>2]=+g[yl>>2]-+g[xl>>2];g[rc>>2]=(+g[oc>>2]-+g[lc>>2])*.7071067690849304;g[zl>>2]=+g[xl>>2]+ +g[yl>>2];g[$c>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[ad>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*38<<2)>>2];g[bd>>2]=+g[$c>>2]-+g[ad>>2];g[El>>2]=+g[$c>>2]+ +g[ad>>2];g[cd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*54<<2)>>2];g[dd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*22<<2)>>2];g[ed>>2]=+g[cd>>2]-+g[dd>>2];g[Hk>>2]=+g[cd>>2]+ +g[dd>>2];g[fd>>2]=(+g[bd>>2]+ +g[ed>>2])*.7071067690849304;g[Ro>>2]=+g[Hk>>2]-+g[El>>2];g[Hd>>2]=(+g[ed>>2]-+g[bd>>2])*.7071067690849304;g[Ik>>2]=+g[El>>2]+ +g[Hk>>2];g[nm>>2]=+g[wl>>2]+ +g[zl>>2];g[Pm>>2]=+g[Dl>>2]+ +g[Ik>>2];g[qc>>2]=+g[ic>>2]+ +g[pc>>2];g[vc>>2]=+g[rc>>2]-+g[uc>>2];g[wc>>2]=+g[qc>>2]*.9807852506637573+ +g[vc>>2]*.19509032368659973;g[Yd>>2]=+g[vc>>2]*.9807852506637573-+g[qc>>2]*.19509032368659973;g[Yo>>2]=+g[Oo>>2]*.9238795042037964-+g[No>>2]*.3826834261417389;g[Zo>>2]=+g[Qo>>2]*.3826834261417389+ +g[Ro>>2]*.9238795042037964;g[_o>>2]=+g[Yo>>2]+ +g[Zo>>2];g[zp>>2]=+g[Zo>>2]-+g[Yo>>2];g[Gd>>2]=+g[_c>>2]+ +g[fd>>2];g[Ld>>2]=+g[Hd>>2]-+g[Kd>>2];g[Md>>2]=+g[Gd>>2]*.9807852506637573-+g[Ld>>2]*.19509032368659973;g[Zd>>2]=+g[Gd>>2]*.19509032368659973+ +g[Ld>>2]*.9807852506637573;g[eh>>2]=+g[_c>>2]-+g[fd>>2];g[fh>>2]=+g[Kd>>2]+ +g[Hd>>2];g[Hh>>2]=+g[eh>>2]*.8314695954322815-+g[fh>>2]*.5555702447891235;g[Oh>>2]=+g[eh>>2]*.5555702447891235+ +g[fh>>2]*.8314695954322815;g[Po>>2]=+g[No>>2]*.9238795042037964+ +g[Oo>>2]*.3826834261417389;g[So>>2]=+g[Qo>>2]*.9238795042037964-+g[Ro>>2]*.3826834261417389;g[To>>2]=+g[Po>>2]+ +g[So>>2];g[vp>>2]=+g[So>>2]-+g[Po>>2];g[bh>>2]=+g[ic>>2]-+g[pc>>2];g[ch>>2]=+g[uc>>2]+ +g[rc>>2];g[dh>>2]=+g[bh>>2]*.8314695954322815+ +g[ch>>2]*.5555702447891235;g[Nh>>2]=+g[ch>>2]*.8314695954322815-+g[bh>>2]*.5555702447891235;g[Al>>2]=+g[wl>>2]-+g[zl>>2];g[Jk>>2]=+g[Dl>>2]-+g[Ik>>2];g[Kk>>2]=(+g[Al>>2]+ +g[Jk>>2])*.7071067690849304;g[Tk>>2]=(+g[Jk>>2]-+g[Al>>2])*.7071067690849304;g[Ed>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Fd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*33<<2)>>2];g[bl>>2]=+g[Ed>>2]+ +g[Fd>>2];g[Pe>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*17<<2)>>2];g[Qe>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*49<<2)>>2];g[cl>>2]=+g[Pe>>2]+ +g[Qe>>2];g[fe>>2]=+g[Ed>>2]-+g[Fd>>2];g[io>>2]=+g[bl>>2]-+g[cl>>2];g[Re>>2]=+g[Pe>>2]-+g[Qe>>2];g[dl>>2]=+g[bl>>2]+ +g[cl>>2];g[Ue>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*61<<2)>>2];g[Ve>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*29<<2)>>2];g[Jl>>2]=+g[Ue>>2]+ +g[Ve>>2];g[ef>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[ff>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*45<<2)>>2];g[Kl>>2]=+g[ef>>2]+ +g[ff>>2];g[We>>2]=+g[Ue>>2]-+g[Ve>>2];g[lo>>2]=+g[Jl>>2]-+g[Kl>>2];g[gf>>2]=+g[ef>>2]-+g[ff>>2];g[Ll>>2]=+g[Jl>>2]+ +g[Kl>>2];g[ge>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[he>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*41<<2)>>2];g[ie>>2]=+g[ge>>2]-+g[he>>2];g[el>>2]=+g[ge>>2]+ +g[he>>2];g[je>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*57<<2)>>2];g[ke>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*25<<2)>>2];g[le>>2]=+g[je>>2]-+g[ke>>2];g[Gl>>2]=+g[je>>2]+ +g[ke>>2];g[me>>2]=(+g[ie>>2]+ +g[le>>2])*.7071067690849304;g[jo>>2]=+g[Gl>>2]-+g[el>>2];g[oe>>2]=(+g[le>>2]-+g[ie>>2])*.7071067690849304;g[Hl>>2]=+g[el>>2]+ +g[Gl>>2];g[Xe>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Ye>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*37<<2)>>2];g[Ze>>2]=+g[Xe>>2]-+g[Ye>>2];g[Ml>>2]=+g[Xe>>2]+ +g[Ye>>2];g[_e>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*53<<2)>>2];g[$e>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*21<<2)>>2];g[af>>2]=+g[_e>>2]-+g[$e>>2];g[Nl>>2]=+g[_e>>2]+ +g[$e>>2];g[bf>>2]=(+g[Ze>>2]+ +g[af>>2])*.7071067690849304;g[mo>>2]=+g[Nl>>2]-+g[Ml>>2];g[df>>2]=(+g[af>>2]-+g[Ze>>2])*.7071067690849304;g[Ol>>2]=+g[Ml>>2]+ +g[Nl>>2];g[Vm>>2]=+g[dl>>2]+ +g[Hl>>2];g[Wm>>2]=+g[Ll>>2]+ +g[Ol>>2];g[ne>>2]=+g[fe>>2]+ +g[me>>2];g[Se>>2]=+g[oe>>2]-+g[Re>>2];g[Te>>2]=+g[ne>>2]*.9807852506637573+ +g[Se>>2]*.19509032368659973;g[we>>2]=+g[Se>>2]*.9807852506637573-+g[ne>>2]*.19509032368659973;g[to>>2]=+g[jo>>2]*.9238795042037964-+g[io>>2]*.3826834261417389;g[uo>>2]=+g[lo>>2]*.3826834261417389+ +g[mo>>2]*.9238795042037964;g[vo>>2]=+g[to>>2]+ +g[uo>>2];g[Dp>>2]=+g[uo>>2]-+g[to>>2];g[cf>>2]=+g[We>>2]+ +g[bf>>2];g[hf>>2]=+g[df>>2]-+g[gf>>2];g[jf>>2]=+g[cf>>2]*.9807852506637573-+g[hf>>2]*.19509032368659973;g[xe>>2]=+g[cf>>2]*.19509032368659973+ +g[hf>>2]*.9807852506637573;g[wi>>2]=+g[We>>2]-+g[bf>>2];g[xi>>2]=+g[gf>>2]+ +g[df>>2];g[yi>>2]=+g[wi>>2]*.8314695954322815-+g[xi>>2]*.5555702447891235;g[Fi>>2]=+g[wi>>2]*.5555702447891235+ +g[xi>>2]*.8314695954322815;g[ko>>2]=+g[io>>2]*.9238795042037964+ +g[jo>>2]*.3826834261417389;g[no>>2]=+g[lo>>2]*.9238795042037964-+g[mo>>2]*.3826834261417389;g[oo>>2]=+g[ko>>2]+ +g[no>>2];g[Fp>>2]=+g[no>>2]-+g[ko>>2];g[ti>>2]=+g[fe>>2]-+g[me>>2];g[ui>>2]=+g[Re>>2]+ +g[oe>>2];g[vi>>2]=+g[ti>>2]*.8314695954322815+ +g[ui>>2]*.5555702447891235;g[Ei>>2]=+g[ui>>2]*.8314695954322815-+g[ti>>2]*.5555702447891235;g[Il>>2]=+g[dl>>2]-+g[Hl>>2];g[Pl>>2]=+g[Ll>>2]-+g[Ol>>2];g[om>>2]=(+g[Il>>2]+ +g[Pl>>2])*.7071067690849304;g[xm>>2]=(+g[Pl>>2]-+g[Il>>2])*.7071067690849304;g[er>>2]=+g[pq>>2]+ +g[dr>>2];g[x>>2]=+g[gk>>2]+ +g[vk>>2];g[y>>2]=+g[er>>2]+ +g[x>>2];g[jm>>2]=+g[er>>2]-+g[x>>2];g[Um>>2]=+g[Sm>>2]+ +g[Tm>>2];g[Xm>>2]=+g[Vm>>2]+ +g[Wm>>2];g[Ym>>2]=+g[Um>>2]-+g[Xm>>2];g[Dn>>2]=+g[Um>>2]+ +g[Xm>>2];g[Ba>>2]=+g[la>>2]+ +g[Aa>>2];g[Ha>>2]=+g[S>>2]+ +g[Ga>>2];g[Ia>>2]=+g[Ba>>2]+ +g[Ha>>2];g[zn>>2]=+g[Ha>>2]-+g[Ba>>2];g[mm>>2]=+g[km>>2]+ +g[lm>>2];g[Qm>>2]=+g[nm>>2]+ +g[Pm>>2];g[Rm>>2]=+g[mm>>2]-+g[Qm>>2];g[Cn>>2]=+g[mm>>2]+ +g[Qm>>2];g[(c[p>>2]|0)+(c[s>>2]<<5<<2)>>2]=+g[y>>2]-+g[Ia>>2];g[(c[q>>2]|0)+(c[t>>2]<<5<<2)>>2]=+g[Dn>>2]-+g[Cn>>2];g[yn>>2]=(+g[Rm>>2]+ +g[Ym>>2])*.7071067690849304;g[(c[p>>2]|0)+((c[s>>2]|0)*48<<2)>>2]=+g[jm>>2]-+g[yn>>2];g[(c[p>>2]|0)+(c[s>>2]<<4<<2)>>2]=+g[jm>>2]+ +g[yn>>2];g[An>>2]=(+g[Ym>>2]-+g[Rm>>2])*.7071067690849304;g[(c[q>>2]|0)+(c[t>>2]<<4<<2)>>2]=+g[zn>>2]+ +g[An>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*48<<2)>>2]=+g[An>>2]-+g[zn>>2];g[Bn>>2]=+g[y>>2]+ +g[Ia>>2];g[En>>2]=+g[Cn>>2]+ +g[Dn>>2];g[(c[p>>2]|0)+(c[s>>2]<<6<<2)>>2]=+g[Bn>>2]-+g[En>>2];g[c[p>>2]>>2]=+g[Bn>>2]+ +g[En>>2];g[Fn>>2]=+g[pq>>2]-+g[dr>>2];g[Sn>>2]=+g[vk>>2]-+g[gk>>2];g[Gn>>2]=+g[la>>2]-+g[Aa>>2];g[Hn>>2]=+g[S>>2]-+g[Ga>>2];g[In>>2]=(+g[Gn>>2]+ +g[Hn>>2])*.7071067690849304;g[Rn>>2]=(+g[Hn>>2]-+g[Gn>>2])*.7071067690849304;g[Kn>>2]=+g[km>>2]-+g[lm>>2];g[Ln>>2]=+g[Pm>>2]-+g[nm>>2];g[Mn>>2]=+g[Kn>>2]*.9238795042037964+ +g[Ln>>2]*.3826834261417389;g[Wn>>2]=+g[Ln>>2]*.9238795042037964-+g[Kn>>2]*.3826834261417389;g[Nn>>2]=+g[Sm>>2]-+g[Tm>>2];g[On>>2]=+g[Wm>>2]-+g[Vm>>2];g[Pn>>2]=+g[Nn>>2]*.9238795042037964-+g[On>>2]*.3826834261417389;g[Xn>>2]=+g[Nn>>2]*.3826834261417389+ +g[On>>2]*.9238795042037964;g[Jn>>2]=+g[Fn>>2]+ +g[In>>2];g[Qn>>2]=+g[Mn>>2]+ +g[Pn>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*56<<2)>>2]=+g[Jn>>2]-+g[Qn>>2];g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2]=+g[Jn>>2]+ +g[Qn>>2];g[Vn>>2]=+g[Sn>>2]+ +g[Rn>>2];g[Zm>>2]=+g[Wn>>2]+ +g[Xn>>2];g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2]=+g[Vn>>2]+ +g[Zm>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*56<<2)>>2]=+g[Zm>>2]-+g[Vn>>2];g[Tn>>2]=+g[Rn>>2]-+g[Sn>>2];g[Un>>2]=+g[Pn>>2]-+g[Mn>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*24<<2)>>2]=+g[Tn>>2]+ +g[Un>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*40<<2)>>2]=+g[Un>>2]-+g[Tn>>2];g[_m>>2]=+g[Fn>>2]-+g[In>>2];g[$m>>2]=+g[Xn>>2]-+g[Wn>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*40<<2)>>2]=+g[_m>>2]-+g[$m>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*24<<2)>>2]=+g[_m>>2]+ +g[$m>>2];g[Fk>>2]=(+g[Dk>>2]+ +g[Ek>>2])*.7071067690849304;g[Gk>>2]=+g[Ck>>2]+ +g[Fk>>2];g[Rl>>2]=+g[Ck>>2]-+g[Fk>>2];g[Fm>>2]=(+g[Ek>>2]-+g[Dk>>2])*.7071067690849304;g[Gm>>2]=+g[Em>>2]+ +g[Fm>>2];g[am>>2]=+g[Fm>>2]-+g[Em>>2];g[hl>>2]=+g[fl>>2]*.9238795042037964+ +g[gl>>2]*.3826834261417389;g[kl>>2]=+g[il>>2]*.9238795042037964-+g[jl>>2]*.3826834261417389;g[ll>>2]=+g[hl>>2]+ +g[kl>>2];g[$l>>2]=+g[kl>>2]-+g[hl>>2];g[Xl>>2]=+g[al>>2]-+g[om>>2];g[Yl>>2]=+g[xm>>2]-+g[wm>>2];g[Zl>>2]=+g[Xl>>2]*.8314695954322815-+g[Yl>>2]*.5555702447891235;g[fm>>2]=+g[Xl>>2]*.5555702447891235+ +g[Yl>>2]*.8314695954322815;g[Lk>>2]=+g[tl>>2]+ +g[Kk>>2];g[Uk>>2]=+g[Sk>>2]+ +g[Tk>>2];g[Vk>>2]=+g[Lk>>2]*.9807852506637573+ +g[Uk>>2]*.19509032368659973;g[Km>>2]=+g[Uk>>2]*.9807852506637573-+g[Lk>>2]*.19509032368659973;g[Bm>>2]=+g[gl>>2]*.9238795042037964-+g[fl>>2]*.3826834261417389;g[Cm>>2]=+g[il>>2]*.3826834261417389+ +g[jl>>2]*.9238795042037964;g[Dm>>2]=+g[Bm>>2]+ +g[Cm>>2];g[Sl>>2]=+g[Cm>>2]-+g[Bm>>2];g[Ul>>2]=+g[tl>>2]-+g[Kk>>2];g[Vl>>2]=+g[Tk>>2]-+g[Sk>>2];g[Wl>>2]=+g[Ul>>2]*.8314695954322815+ +g[Vl>>2]*.5555702447891235;g[em>>2]=+g[Vl>>2]*.8314695954322815-+g[Ul>>2]*.5555702447891235;g[pm>>2]=+g[al>>2]+ +g[om>>2];g[ym>>2]=+g[wm>>2]+ +g[xm>>2];g[zm>>2]=+g[pm>>2]*.9807852506637573-+g[ym>>2]*.19509032368659973;g[Lm>>2]=+g[pm>>2]*.19509032368659973+ +g[ym>>2]*.9807852506637573;g[ml>>2]=+g[Gk>>2]+ +g[ll>>2];g[Am>>2]=+g[Vk>>2]+ +g[zm>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*60<<2)>>2]=+g[ml>>2]-+g[Am>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[ml>>2]+ +g[Am>>2];g[Jm>>2]=+g[Gm>>2]+ +g[Dm>>2];g[Mm>>2]=+g[Km>>2]+ +g[Lm>>2];g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[Jm>>2]+ +g[Mm>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*60<<2)>>2]=+g[Mm>>2]-+g[Jm>>2];g[Hm>>2]=+g[Dm>>2]-+g[Gm>>2];g[Im>>2]=+g[zm>>2]-+g[Vk>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*28<<2)>>2]=+g[Hm>>2]+ +g[Im>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*36<<2)>>2]=+g[Im>>2]-+g[Hm>>2];g[Nm>>2]=+g[Gk>>2]-+g[ll>>2];g[Ql>>2]=+g[Lm>>2]-+g[Km>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*36<<2)>>2]=+g[Nm>>2]-+g[Ql>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*28<<2)>>2]=+g[Nm>>2]+ +g[Ql>>2];g[Tl>>2]=+g[Rl>>2]+ +g[Sl>>2];g[_l>>2]=+g[Wl>>2]+ +g[Zl>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*52<<2)>>2]=+g[Tl>>2]-+g[_l>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*12<<2)>>2]=+g[Tl>>2]+ +g[_l>>2];g[dm>>2]=+g[am>>2]+ +g[$l>>2];g[gm>>2]=+g[em>>2]+ +g[fm>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*12<<2)>>2]=+g[dm>>2]+ +g[gm>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*52<<2)>>2]=+g[gm>>2]-+g[dm>>2];g[bm>>2]=+g[$l>>2]-+g[am>>2];g[cm>>2]=+g[Zl>>2]-+g[Wl>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*20<<2)>>2]=+g[bm>>2]+ +g[cm>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*44<<2)>>2]=+g[cm>>2]-+g[bm>>2];g[hm>>2]=+g[Rl>>2]-+g[Sl>>2];g[im>>2]=+g[fm>>2]-+g[em>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*44<<2)>>2]=+g[hm>>2]-+g[im>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*20<<2)>>2]=+g[hm>>2]+ +g[im>>2];g[iq>>2]=+g[an>>2]-+g[dn>>2];g[jq>>2]=+g[Go>>2]-+g[Fo>>2];g[kq>>2]=+g[iq>>2]-+g[jq>>2];g[xq>>2]=+g[iq>>2]+ +g[jq>>2];g[Aq>>2]=+g[wp>>2]+ +g[vp>>2];g[Bq>>2]=+g[yp>>2]+ +g[zp>>2];g[Cq>>2]=+g[Aq>>2]*.290284663438797+ +g[Bq>>2]*.9569403529167175;g[Mq>>2]=+g[Aq>>2]*.9569403529167175-+g[Bq>>2]*.290284663438797;g[Dq>>2]=+g[Cp>>2]+ +g[Dp>>2];g[Eq>>2]=+g[Gp>>2]+ +g[Fp>>2];g[Fq>>2]=+g[Dq>>2]*.9569403529167175-+g[Eq>>2]*.290284663438797;g[Nq>>2]=+g[Eq>>2]*.9569403529167175+ +g[Dq>>2]*.290284663438797;g[nq>>2]=+g[lq>>2]*.5555702447891235+ +g[mq>>2]*.8314695954322815;g[sp>>2]=+g[oq>>2]*.8314695954322815-+g[rp>>2]*.5555702447891235;g[tp>>2]=+g[nq>>2]-+g[sp>>2];g[Hq>>2]=+g[sp>>2]+ +g[nq>>2];g[xp>>2]=+g[vp>>2]-+g[wp>>2];g[Ap>>2]=+g[yp>>2]-+g[zp>>2];g[Bp>>2]=+g[xp>>2]*.4713967442512512+ +g[Ap>>2]*.8819212913513184;g[sq>>2]=+g[xp>>2]*.8819212913513184-+g[Ap>>2]*.4713967442512512;g[Kp>>2]=+g[lq>>2]*.8314695954322815-+g[mq>>2]*.5555702447891235;g[Lp>>2]=+g[rp>>2]*.8314695954322815+ +g[oq>>2]*.5555702447891235;g[Mp>>2]=+g[Kp>>2]-+g[Lp>>2];g[yq>>2]=+g[Lp>>2]+ +g[Kp>>2];g[Np>>2]=+g[ln>>2]-+g[hn>>2];g[Op>>2]=+g[Do>>2]-+g[Co>>2];g[Pp>>2]=+g[Np>>2]-+g[Op>>2];g[Iq>>2]=+g[Op>>2]+ +g[Np>>2];g[Ep>>2]=+g[Cp>>2]-+g[Dp>>2];g[Hp>>2]=+g[Fp>>2]-+g[Gp>>2];g[Ip>>2]=+g[Ep>>2]*.8819212913513184-+g[Hp>>2]*.4713967442512512;g[tq>>2]=+g[Hp>>2]*.8819212913513184+ +g[Ep>>2]*.4713967442512512;g[up>>2]=+g[kq>>2]+ +g[tp>>2];g[Jp>>2]=+g[Bp>>2]+ +g[Ip>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*54<<2)>>2]=+g[up>>2]-+g[Jp>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*10<<2)>>2]=+g[up>>2]+ +g[Jp>>2];g[rq>>2]=+g[Pp>>2]+ +g[Mp>>2];g[uq>>2]=+g[sq>>2]+ +g[tq>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*10<<2)>>2]=+g[rq>>2]+ +g[uq>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*54<<2)>>2]=+g[uq>>2]-+g[rq>>2];g[Qp>>2]=+g[Mp>>2]-+g[Pp>>2];g[qq>>2]=+g[Ip>>2]-+g[Bp>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*22<<2)>>2]=+g[Qp>>2]+ +g[qq>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*42<<2)>>2]=+g[qq>>2]-+g[Qp>>2];g[vq>>2]=+g[kq>>2]-+g[tp>>2];g[wq>>2]=+g[tq>>2]-+g[sq>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*42<<2)>>2]=+g[vq>>2]-+g[wq>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*22<<2)>>2]=+g[vq>>2]+ +g[wq>>2];g[zq>>2]=+g[xq>>2]+ +g[yq>>2];g[Gq>>2]=+g[Cq>>2]+ +g[Fq>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*58<<2)>>2]=+g[zq>>2]-+g[Gq>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2]=+g[zq>>2]+ +g[Gq>>2];g[Lq>>2]=+g[Iq>>2]+ +g[Hq>>2];g[Oq>>2]=+g[Mq>>2]+ +g[Nq>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2]=+g[Lq>>2]+ +g[Oq>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*58<<2)>>2]=+g[Oq>>2]-+g[Lq>>2];g[Jq>>2]=+g[Hq>>2]-+g[Iq>>2];g[Kq>>2]=+g[Fq>>2]-+g[Cq>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*26<<2)>>2]=+g[Jq>>2]+ +g[Kq>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*38<<2)>>2]=+g[Kq>>2]-+g[Jq>>2];g[Pq>>2]=+g[xq>>2]-+g[yq>>2];g[Qq>>2]=+g[Nq>>2]-+g[Mq>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*38<<2)>>2]=+g[Pq>>2]-+g[Qq>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*26<<2)>>2]=+g[Pq>>2]+ +g[Qq>>2];g[en>>2]=+g[an>>2]+ +g[dn>>2];g[mn>>2]=+g[hn>>2]+ +g[ln>>2];g[nn>>2]=+g[en>>2]+ +g[mn>>2];g[qp>>2]=+g[en>>2]-+g[mn>>2];g[Tp>>2]=+g[Mo>>2]-+g[To>>2];g[Up>>2]=+g[_o>>2]-+g[Xo>>2];g[Vp>>2]=+g[Tp>>2]*.7730104327201843+ +g[Up>>2]*.6343932747840881;g[dq>>2]=+g[Up>>2]*.7730104327201843-+g[Tp>>2]*.6343932747840881;g[Wp>>2]=+g[fp>>2]-+g[oo>>2];g[Xp>>2]=+g[vo>>2]-+g[so>>2];g[Yp>>2]=+g[Wp>>2]*.7730104327201843-+g[Xp>>2]*.6343932747840881;g[eq>>2]=+g[Wp>>2]*.6343932747840881+ +g[Xp>>2]*.7730104327201843;g[wn>>2]=+g[sn>>2]*.9807852506637573+ +g[vn>>2]*.19509032368659973;g[fo>>2]=+g[ao>>2]*.9807852506637573-+g[eo>>2]*.19509032368659973;g[go>>2]=+g[wn>>2]+ +g[fo>>2];g[_p>>2]=+g[fo>>2]-+g[wn>>2];g[Uo>>2]=+g[Mo>>2]+ +g[To>>2];g[$o>>2]=+g[Xo>>2]+ +g[_o>>2];g[ap>>2]=+g[Uo>>2]*.9951847195625305+ +g[$o>>2]*.0980171412229538;g[lp>>2]=+g[$o>>2]*.9951847195625305-+g[Uo>>2]*.0980171412229538;g[zo>>2]=+g[vn>>2]*.9807852506637573-+g[sn>>2]*.19509032368659973;g[Ao>>2]=+g[ao>>2]*.19509032368659973+ +g[eo>>2]*.9807852506637573;g[Bo>>2]=+g[zo>>2]+ +g[Ao>>2];g[Rp>>2]=+g[Ao>>2]-+g[zo>>2];g[Eo>>2]=+g[Co>>2]+ +g[Do>>2];g[Ho>>2]=+g[Fo>>2]+ +g[Go>>2];g[hp>>2]=+g[Eo>>2]+ +g[Ho>>2];g[$p>>2]=+g[Ho>>2]-+g[Eo>>2];g[po>>2]=+g[fp>>2]+ +g[oo>>2];g[wo>>2]=+g[so>>2]+ +g[vo>>2];g[xo>>2]=+g[po>>2]*.9951847195625305-+g[wo>>2]*.0980171412229538;g[mp>>2]=+g[po>>2]*.0980171412229538+ +g[wo>>2]*.9951847195625305;g[ho>>2]=+g[nn>>2]+ +g[go>>2];g[yo>>2]=+g[ap>>2]+ +g[xo>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*62<<2)>>2]=+g[ho>>2]-+g[yo>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[ho>>2]+ +g[yo>>2];g[kp>>2]=+g[hp>>2]+ +g[Bo>>2];g[np>>2]=+g[lp>>2]+ +g[mp>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[kp>>2]+ +g[np>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*62<<2)>>2]=+g[np>>2]-+g[kp>>2];g[ip>>2]=+g[Bo>>2]-+g[hp>>2];g[jp>>2]=+g[xo>>2]-+g[ap>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*30<<2)>>2]=+g[ip>>2]+ +g[jp>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*34<<2)>>2]=+g[jp>>2]-+g[ip>>2];g[op>>2]=+g[nn>>2]-+g[go>>2];g[pp>>2]=+g[mp>>2]-+g[lp>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*34<<2)>>2]=+g[op>>2]-+g[pp>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*30<<2)>>2]=+g[op>>2]+ +g[pp>>2];g[Sp>>2]=+g[qp>>2]+ +g[Rp>>2];g[Zp>>2]=+g[Vp>>2]+ +g[Yp>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*50<<2)>>2]=+g[Sp>>2]-+g[Zp>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*14<<2)>>2]=+g[Sp>>2]+ +g[Zp>>2];g[cq>>2]=+g[$p>>2]+ +g[_p>>2];g[fq>>2]=+g[dq>>2]+ +g[eq>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*14<<2)>>2]=+g[cq>>2]+ +g[fq>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*50<<2)>>2]=+g[fq>>2]-+g[cq>>2];g[aq>>2]=+g[_p>>2]-+g[$p>>2];g[bq>>2]=+g[Yp>>2]-+g[Vp>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*18<<2)>>2]=+g[aq>>2]+ +g[bq>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*46<<2)>>2]=+g[bq>>2]-+g[aq>>2];g[gq>>2]=+g[qp>>2]-+g[Rp>>2];g[hq>>2]=+g[eq>>2]-+g[dq>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*46<<2)>>2]=+g[gq>>2]-+g[hq>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*18<<2)>>2]=+g[gq>>2]+ +g[hq>>2];g[qg>>2]=+g[Na>>2]-+g[sb>>2];g[rg>>2]=+g[Ne>>2]-+g[Me>>2];g[sg>>2]=+g[qg>>2]-+g[rg>>2];g[Fg>>2]=+g[qg>>2]+ +g[rg>>2];g[vg>>2]=+g[tg>>2]*.7730104327201843+ +g[ug>>2]*.6343932747840881;g[Af>>2]=+g[wg>>2]*.7730104327201843-+g[zf>>2]*.6343932747840881;g[Bf>>2]=+g[vg>>2]-+g[Af>>2];g[nh>>2]=+g[Af>>2]+ +g[vg>>2];g[Vf>>2]=+g[Lb>>2]-+g[Cb>>2];g[Wf>>2]=+g[Ke>>2]-+g[He>>2];g[Xf>>2]=+g[Vf>>2]-+g[Wf>>2];g[oh>>2]=+g[Wf>>2]+ +g[Vf>>2];g[Sf>>2]=+g[ug>>2]*.7730104327201843-+g[tg>>2]*.6343932747840881;g[Tf>>2]=+g[wg>>2]*.6343932747840881+ +g[zf>>2]*.7730104327201843;g[Uf>>2]=+g[Sf>>2]-+g[Tf>>2];g[Gg>>2]=+g[Tf>>2]+ +g[Sf>>2];g[Lf>>2]=+g[xe>>2]-+g[we>>2];g[Mf>>2]=+g[Kf>>2]-+g[Lf>>2];g[jh>>2]=+g[Kf>>2]+ +g[Lf>>2];g[Nf>>2]=+g[jf>>2]-+g[Te>>2];g[Pf>>2]=+g[Nf>>2]-+g[Of>>2];g[kh>>2]=+g[Of>>2]+ +g[Nf>>2];g[Qf>>2]=+g[Mf>>2]*.903989315032959-+g[Pf>>2]*.4275550842285156;g[th>>2]=+g[kh>>2]*.9415440559387207+ +g[jh>>2]*.3368898630142212;g[Bg>>2]=+g[Pf>>2]*.903989315032959+ +g[Mf>>2]*.4275550842285156;g[lh>>2]=+g[jh>>2]*.9415440559387207-+g[kh>>2]*.3368898630142212;g[Df>>2]=+g[Md>>2]-+g[wc>>2];g[Ff>>2]=+g[Df>>2]-+g[Ef>>2];g[gh>>2]=+g[Ef>>2]+ +g[Df>>2];g[Hf>>2]=+g[Zd>>2]-+g[Yd>>2];g[If>>2]=+g[Gf>>2]-+g[Hf>>2];g[hh>>2]=+g[Gf>>2]+ +g[Hf>>2];g[Jf>>2]=+g[Ff>>2]*.4275550842285156+ +g[If>>2]*.903989315032959;g[sh>>2]=+g[gh>>2]*.9415440559387207-+g[hh>>2]*.3368898630142212;g[Ag>>2]=+g[Ff>>2]*.903989315032959-+g[If>>2]*.4275550842285156;g[ih>>2]=+g[gh>>2]*.3368898630142212+ +g[hh>>2]*.9415440559387207;g[Cf>>2]=+g[sg>>2]+ +g[Bf>>2];g[Rf>>2]=+g[Jf>>2]+ +g[Qf>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*55<<2)>>2]=+g[Cf>>2]-+g[Rf>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2]=+g[Cf>>2]+ +g[Rf>>2];g[zg>>2]=+g[Xf>>2]+ +g[Uf>>2];g[Cg>>2]=+g[Ag>>2]+ +g[Bg>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2]=+g[zg>>2]+ +g[Cg>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*55<<2)>>2]=+g[Cg>>2]-+g[zg>>2];g[Yf>>2]=+g[Uf>>2]-+g[Xf>>2];g[yg>>2]=+g[Qf>>2]-+g[Jf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*23<<2)>>2]=+g[Yf>>2]+ +g[yg>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*41<<2)>>2]=+g[yg>>2]-+g[Yf>>2];g[Dg>>2]=+g[sg>>2]-+g[Bf>>2];g[Eg>>2]=+g[Bg>>2]-+g[Ag>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*41<<2)>>2]=+g[Dg>>2]-+g[Eg>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*23<<2)>>2]=+g[Dg>>2]+ +g[Eg>>2];g[Hg>>2]=+g[Fg>>2]+ +g[Gg>>2];g[mh>>2]=+g[ih>>2]+ +g[lh>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*57<<2)>>2]=+g[Hg>>2]-+g[mh>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2]=+g[Hg>>2]+ +g[mh>>2];g[rh>>2]=+g[oh>>2]+ +g[nh>>2];g[uh>>2]=+g[sh>>2]+ +g[th>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2]=+g[rh>>2]+ +g[uh>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*57<<2)>>2]=+g[uh>>2]-+g[rh>>2];g[ph>>2]=+g[nh>>2]-+g[oh>>2];g[qh>>2]=+g[lh>>2]-+g[ih>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*25<<2)>>2]=+g[ph>>2]+ +g[qh>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*39<<2)>>2]=+g[qh>>2]-+g[ph>>2];g[vh>>2]=+g[Fg>>2]-+g[Gg>>2];g[wh>>2]=+g[th>>2]-+g[sh>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*39<<2)>>2]=+g[vh>>2]-+g[wh>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*25<<2)>>2]=+g[vh>>2]+ +g[wh>>2];g[Xi>>2]=+g[xh>>2]-+g[yh>>2];g[Yi>>2]=+g[Th>>2]-+g[Sh>>2];g[Zi>>2]=+g[Xi>>2]-+g[Yi>>2];g[jj>>2]=+g[Xi>>2]+ +g[Yi>>2];g[Aj>>2]=+g[yj>>2]*.4713967442512512+ +g[zj>>2]*.8819212913513184;g[Dj>>2]=+g[Bj>>2]*.8819212913513184-+g[Cj>>2]*.4713967442512512;g[Ej>>2]=+g[Aj>>2]-+g[Dj>>2];g[tj>>2]=+g[Dj>>2]+ +g[Aj>>2];g[_i>>2]=+g[Fh>>2]-+g[Ch>>2];g[$i>>2]=+g[Oi>>2]-+g[Ni>>2];g[aj>>2]=+g[_i>>2]-+g[$i>>2];g[uj>>2]=+g[$i>>2]+ +g[_i>>2];g[Vj>>2]=+g[yj>>2]*.8819212913513184-+g[zj>>2]*.4713967442512512;g[Wj>>2]=+g[Cj>>2]*.8819212913513184+ +g[Bj>>2]*.4713967442512512;g[Xj>>2]=+g[Vj>>2]-+g[Wj>>2];g[kj>>2]=+g[Wj>>2]+ +g[Vj>>2];g[Oj>>2]=+g[Fi>>2]-+g[Ei>>2];g[Pj>>2]=+g[Nj>>2]-+g[Oj>>2];g[pj>>2]=+g[Nj>>2]+ +g[Oj>>2];g[Qj>>2]=+g[yi>>2]-+g[vi>>2];g[Sj>>2]=+g[Qj>>2]-+g[Rj>>2];g[qj>>2]=+g[Rj>>2]+ +g[Qj>>2];g[Tj>>2]=+g[Pj>>2]*.8577286005020142-+g[Sj>>2]*.5141027569770813;g[yk>>2]=+g[qj>>2]*.9700312614440918+ +g[pj>>2]*.24298018217086792;g[fj>>2]=+g[Sj>>2]*.8577286005020142+ +g[Pj>>2]*.5141027569770813;g[rj>>2]=+g[pj>>2]*.9700312614440918-+g[qj>>2]*.24298018217086792;g[Gj>>2]=+g[Hh>>2]-+g[dh>>2];g[Ij>>2]=+g[Gj>>2]-+g[Hj>>2];g[mj>>2]=+g[Hj>>2]+ +g[Gj>>2];g[Kj>>2]=+g[Oh>>2]-+g[Nh>>2];g[Lj>>2]=+g[Jj>>2]-+g[Kj>>2];g[nj>>2]=+g[Jj>>2]+ +g[Kj>>2];g[Mj>>2]=+g[Ij>>2]*.5141027569770813+ +g[Lj>>2]*.8577286005020142;g[xk>>2]=+g[mj>>2]*.9700312614440918-+g[nj>>2]*.24298018217086792;g[ej>>2]=+g[Ij>>2]*.8577286005020142-+g[Lj>>2]*.5141027569770813;g[oj>>2]=+g[mj>>2]*.24298018217086792+ +g[nj>>2]*.9700312614440918;g[Fj>>2]=+g[Zi>>2]+ +g[Ej>>2];g[Uj>>2]=+g[Mj>>2]+ +g[Tj>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*53<<2)>>2]=+g[Fj>>2]-+g[Uj>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*11<<2)>>2]=+g[Fj>>2]+ +g[Uj>>2];g[dj>>2]=+g[aj>>2]+ +g[Xj>>2];g[gj>>2]=+g[ej>>2]+ +g[fj>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*11<<2)>>2]=+g[dj>>2]+ +g[gj>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*53<<2)>>2]=+g[gj>>2]-+g[dj>>2];g[bj>>2]=+g[Xj>>2]-+g[aj>>2];g[cj>>2]=+g[Tj>>2]-+g[Mj>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*21<<2)>>2]=+g[bj>>2]+ +g[cj>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*43<<2)>>2]=+g[cj>>2]-+g[bj>>2];g[hj>>2]=+g[Zi>>2]-+g[Ej>>2];g[ij>>2]=+g[fj>>2]-+g[ej>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*43<<2)>>2]=+g[hj>>2]-+g[ij>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*21<<2)>>2]=+g[hj>>2]+ +g[ij>>2];g[lj>>2]=+g[jj>>2]+ +g[kj>>2];g[sj>>2]=+g[oj>>2]+ +g[rj>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*59<<2)>>2]=+g[lj>>2]-+g[sj>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[lj>>2]+ +g[sj>>2];g[xj>>2]=+g[uj>>2]+ +g[tj>>2];g[zk>>2]=+g[xk>>2]+ +g[yk>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=+g[xj>>2]+ +g[zk>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*59<<2)>>2]=+g[zk>>2]-+g[xj>>2];g[vj>>2]=+g[tj>>2]-+g[uj>>2];g[wj>>2]=+g[rj>>2]-+g[oj>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*27<<2)>>2]=+g[vj>>2]+ +g[wj>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*37<<2)>>2]=+g[wj>>2]-+g[vj>>2];g[Ak>>2]=+g[jj>>2]-+g[kj>>2];g[Bk>>2]=+g[yk>>2]-+g[xk>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*37<<2)>>2]=+g[Ak>>2]-+g[Bk>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*27<<2)>>2]=+g[Ak>>2]+ +g[Bk>>2];g[tb>>2]=+g[Na>>2]+ +g[sb>>2];g[Mb>>2]=+g[Cb>>2]+ +g[Lb>>2];g[Nb>>2]=+g[tb>>2]+ +g[Mb>>2];g[yf>>2]=+g[tb>>2]-+g[Mb>>2];g[ib>>2]=+g[ab>>2]*.0980171412229538+ +g[hb>>2]*.9951847195625305;g[Cc>>2]=+g[Wb>>2]*.9951847195625305-+g[Bc>>2]*.0980171412229538;g[Dc>>2]=+g[ib>>2]+ +g[Cc>>2];g[gg>>2]=+g[Cc>>2]-+g[ib>>2];g[Le>>2]=+g[He>>2]+ +g[Ke>>2];g[Oe>>2]=+g[Me>>2]+ +g[Ne>>2];g[pf>>2]=+g[Le>>2]+ +g[Oe>>2];g[hg>>2]=+g[Oe>>2]-+g[Le>>2];g[Ce>>2]=+g[ab>>2]*.9951847195625305-+g[hb>>2]*.0980171412229538;g[De>>2]=+g[Bc>>2]*.9951847195625305+ +g[Wb>>2]*.0980171412229538;g[Ee>>2]=+g[Ce>>2]+ +g[De>>2];g[Zf>>2]=+g[De>>2]-+g[Ce>>2];g[kf>>2]=+g[Te>>2]+ +g[jf>>2];g[lf>>2]=+g[Dd>>2]+ +g[kf>>2];g[cg>>2]=+g[Dd>>2]-+g[kf>>2];g[ye>>2]=+g[we>>2]+ +g[xe>>2];g[ze>>2]=+g[ve>>2]+ +g[ye>>2];g[dg>>2]=+g[ye>>2]-+g[ve>>2];g[Ae>>2]=+g[lf>>2]*.9987954497337341-+g[ze>>2]*.049067676067352295;g[mg>>2]=+g[cg>>2]*.6715589761734009+ +g[dg>>2]*.7409511208534241;g[uf>>2]=+g[lf>>2]*.049067676067352295+ +g[ze>>2]*.9987954497337341;g[eg>>2]=+g[cg>>2]*.7409511208534241-+g[dg>>2]*.6715589761734009;g[Nd>>2]=+g[wc>>2]+ +g[Md>>2];g[Od>>2]=+g[fc>>2]+ +g[Nd>>2];g[$f>>2]=+g[fc>>2]-+g[Nd>>2];g[_d>>2]=+g[Yd>>2]+ +g[Zd>>2];g[$d>>2]=+g[Xd>>2]+ +g[_d>>2];g[ag>>2]=+g[_d>>2]-+g[Xd>>2];g[ae>>2]=+g[Od>>2]*.9987954497337341+ +g[$d>>2]*.049067676067352295;g[lg>>2]=+g[ag>>2]*.7409511208534241-+g[$f>>2]*.6715589761734009;g[tf>>2]=+g[$d>>2]*.9987954497337341-+g[Od>>2]*.049067676067352295;g[bg>>2]=+g[$f>>2]*.7409511208534241+ +g[ag>>2]*.6715589761734009;g[Ec>>2]=+g[Nb>>2]+ +g[Dc>>2];g[Be>>2]=+g[ae>>2]+ +g[Ae>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*63<<2)>>2]=+g[Ec>>2]-+g[Be>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[Ec>>2]+ +g[Be>>2];g[sf>>2]=+g[pf>>2]+ +g[Ee>>2];g[vf>>2]=+g[tf>>2]+ +g[uf>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[sf>>2]+ +g[vf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*63<<2)>>2]=+g[vf>>2]-+g[sf>>2];g[qf>>2]=+g[Ee>>2]-+g[pf>>2];g[rf>>2]=+g[Ae>>2]-+g[ae>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*31<<2)>>2]=+g[qf>>2]+ +g[rf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*33<<2)>>2]=+g[rf>>2]-+g[qf>>2];g[wf>>2]=+g[Nb>>2]-+g[Dc>>2];g[xf>>2]=+g[uf>>2]-+g[tf>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*33<<2)>>2]=+g[wf>>2]-+g[xf>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*31<<2)>>2]=+g[wf>>2]+ +g[xf>>2];g[_f>>2]=+g[yf>>2]+ +g[Zf>>2];g[fg>>2]=+g[bg>>2]+ +g[eg>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*49<<2)>>2]=+g[_f>>2]-+g[fg>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*15<<2)>>2]=+g[_f>>2]+ +g[fg>>2];g[kg>>2]=+g[hg>>2]+ +g[gg>>2];g[ng>>2]=+g[lg>>2]+ +g[mg>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*15<<2)>>2]=+g[kg>>2]+ +g[ng>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*49<<2)>>2]=+g[ng>>2]-+g[kg>>2];g[ig>>2]=+g[gg>>2]-+g[hg>>2];g[jg>>2]=+g[eg>>2]-+g[bg>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*17<<2)>>2]=+g[ig>>2]+ +g[jg>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*47<<2)>>2]=+g[jg>>2]-+g[ig>>2];g[og>>2]=+g[yf>>2]-+g[Zf>>2];g[pg>>2]=+g[mg>>2]-+g[lg>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*47<<2)>>2]=+g[og>>2]-+g[pg>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*17<<2)>>2]=+g[og>>2]+ +g[pg>>2];g[zh>>2]=+g[xh>>2]+ +g[yh>>2];g[Ig>>2]=+g[Ch>>2]+ +g[Fh>>2];g[Jg>>2]=+g[zh>>2]+ +g[Ig>>2];g[ci>>2]=+g[zh>>2]-+g[Ig>>2];g[Qg>>2]=+g[Mg>>2]*.9569403529167175+ +g[Pg>>2]*.290284663438797;g[Xg>>2]=+g[Tg>>2]*.9569403529167175-+g[Wg>>2]*.290284663438797;g[Yg>>2]=+g[Qg>>2]+ +g[Xg>>2];g[mi>>2]=+g[Xg>>2]-+g[Qg>>2];g[Rh>>2]=+g[Ni>>2]+ +g[Oi>>2];g[Uh>>2]=+g[Sh>>2]+ +g[Th>>2];g[Vh>>2]=+g[Rh>>2]+ +g[Uh>>2];g[ni>>2]=+g[Uh>>2]-+g[Rh>>2];g[Ki>>2]=+g[Pg>>2]*.9569403529167175-+g[Mg>>2]*.290284663438797;g[Li>>2]=+g[Tg>>2]*.290284663438797+ +g[Wg>>2]*.9569403529167175;g[Mi>>2]=+g[Ki>>2]+ +g[Li>>2];g[di>>2]=+g[Li>>2]-+g[Ki>>2];g[zi>>2]=+g[vi>>2]+ +g[yi>>2];g[Ai>>2]=+g[si>>2]+ +g[zi>>2];g[ii>>2]=+g[si>>2]-+g[zi>>2];g[Gi>>2]=+g[Ei>>2]+ +g[Fi>>2];g[Hi>>2]=+g[Di>>2]+ +g[Gi>>2];g[ji>>2]=+g[Gi>>2]-+g[Di>>2];g[Ii>>2]=+g[Ai>>2]*.9891765117645264-+g[Hi>>2]*.1467304676771164;g[Ti>>2]=+g[ii>>2]*.5956993103027344+ +g[ji>>2]*.803207516670227;g[_h>>2]=+g[Ai>>2]*.1467304676771164+ +g[Hi>>2]*.9891765117645264;g[ki>>2]=+g[ii>>2]*.803207516670227-+g[ji>>2]*.5956993103027344;g[Ih>>2]=+g[dh>>2]+ +g[Hh>>2];g[Jh>>2]=+g[ah>>2]+ +g[Ih>>2];g[fi>>2]=+g[ah>>2]-+g[Ih>>2];g[Ph>>2]=+g[Nh>>2]+ +g[Oh>>2];g[Qh>>2]=+g[Mh>>2]+ +g[Ph>>2];g[gi>>2]=+g[Ph>>2]-+g[Mh>>2];g[pi>>2]=+g[Jh>>2]*.9891765117645264+ +g[Qh>>2]*.1467304676771164;g[Si>>2]=+g[gi>>2]*.803207516670227-+g[fi>>2]*.5956993103027344;g[Zh>>2]=+g[Qh>>2]*.9891765117645264-+g[Jh>>2]*.1467304676771164;g[hi>>2]=+g[fi>>2]*.803207516670227+ +g[gi>>2]*.5956993103027344;g[Zg>>2]=+g[Jg>>2]+ +g[Yg>>2];g[Ji>>2]=+g[pi>>2]+ +g[Ii>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*61<<2)>>2]=+g[Zg>>2]-+g[Ji>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[Zg>>2]+ +g[Ji>>2];g[Yh>>2]=+g[Vh>>2]+ +g[Mi>>2];g[$h>>2]=+g[Zh>>2]+ +g[_h>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[Yh>>2]+ +g[$h>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*61<<2)>>2]=+g[$h>>2]-+g[Yh>>2];g[Wh>>2]=+g[Mi>>2]-+g[Vh>>2];g[Xh>>2]=+g[Ii>>2]-+g[pi>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*29<<2)>>2]=+g[Wh>>2]+ +g[Xh>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*35<<2)>>2]=+g[Xh>>2]-+g[Wh>>2];g[ai>>2]=+g[Jg>>2]-+g[Yg>>2];g[bi>>2]=+g[_h>>2]-+g[Zh>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*35<<2)>>2]=+g[ai>>2]-+g[bi>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*29<<2)>>2]=+g[ai>>2]+ +g[bi>>2];g[ei>>2]=+g[ci>>2]+ +g[di>>2];g[li>>2]=+g[hi>>2]+ +g[ki>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*51<<2)>>2]=+g[ei>>2]-+g[li>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*13<<2)>>2]=+g[ei>>2]+ +g[li>>2];g[Ri>>2]=+g[ni>>2]+ +g[mi>>2];g[Ui>>2]=+g[Si>>2]+ +g[Ti>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*13<<2)>>2]=+g[Ri>>2]+ +g[Ui>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*51<<2)>>2]=+g[Ui>>2]-+g[Ri>>2];g[oi>>2]=+g[mi>>2]-+g[ni>>2];g[Qi>>2]=+g[ki>>2]-+g[hi>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*19<<2)>>2]=+g[oi>>2]+ +g[Qi>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*45<<2)>>2]=+g[Qi>>2]-+g[oi>>2];g[Vi>>2]=+g[ci>>2]-+g[di>>2];g[Wi>>2]=+g[Ti>>2]-+g[Si>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*45<<2)>>2]=+g[Vi>>2]-+g[Wi>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*19<<2)>>2]=+g[Vi>>2]+ +g[Wi>>2];c[jr>>2]=(c[jr>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=kr;return}function Rs(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,20,6568);i=b;return}function Ss(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0;ha=i;i=i+208|0;n=ha+200|0;o=ha+196|0;p=ha+192|0;q=ha+188|0;r=ha+184|0;s=ha+180|0;t=ha+176|0;ia=ha+172|0;u=ha+168|0;v=ha+164|0;ga=ha+152|0;A=ha+148|0;X=ha+144|0;J=ha+140|0;V=ha+136|0;ea=ha+132|0;$=ha+128|0;I=ha+124|0;Y=ha+120|0;K=ha+116|0;Q=ha+112|0;fa=ha+108|0;aa=ha+104|0;L=ha+100|0;W=ha+96|0;w=ha+92|0;x=ha+88|0;y=ha+84|0;z=ha+80|0;R=ha+76|0;S=ha+72|0;T=ha+68|0;U=ha+64|0;B=ha+60|0;C=ha+56|0;D=ha+52|0;E=ha+48|0;M=ha+44|0;N=ha+40|0;O=ha+36|0;P=ha+32|0;da=ha+28|0;F=ha+24|0;ba=ha+20|0;ca=ha+16|0;Z=ha+12|0;_=ha+8|0;G=ha+4|0;H=ha;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[ia>>2]=k;c[u>>2]=l;c[v>>2]=m;g[ha+160>>2]=.8660253882408142;g[ha+156>>2]=.5;c[ga>>2]=c[ia>>2];while(1){if((c[ga>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[x>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[y>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[A>>2]=+g[w>>2]+ +g[z>>2];g[X>>2]=+g[w>>2]-+g[z>>2]*.5;g[J>>2]=+g[y>>2]-+g[x>>2];g[R>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[S>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[T>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[U>>2]=+g[S>>2]+ +g[T>>2];g[V>>2]=+g[R>>2]-+g[U>>2]*.5;g[ea>>2]=+g[T>>2]-+g[S>>2];g[$>>2]=+g[R>>2]+ +g[U>>2];g[B>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[C>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[D>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[E>>2]=+g[C>>2]+ +g[D>>2];g[I>>2]=+g[B>>2]+ +g[E>>2];g[Y>>2]=+g[B>>2]-+g[E>>2]*.5;g[K>>2]=+g[D>>2]-+g[C>>2];g[M>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[N>>2]=+g[c[o>>2]>>2];g[O>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[P>>2]=+g[N>>2]+ +g[O>>2];g[Q>>2]=+g[M>>2]-+g[P>>2]*.5;g[fa>>2]=+g[O>>2]-+g[N>>2];g[aa>>2]=+g[M>>2]+ +g[P>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[A>>2]-+g[I>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[$>>2]-+g[aa>>2];g[L>>2]=(+g[J>>2]-+g[K>>2])*.8660253882408142;g[W>>2]=+g[Q>>2]-+g[V>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[L>>2]+ +g[W>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=+g[W>>2]-+g[L>>2];g[da>>2]=+g[X>>2]-+g[Y>>2];g[F>>2]=(+g[ea>>2]-+g[fa>>2])*.8660253882408142;g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[da>>2]-+g[F>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[da>>2]+ +g[F>>2];g[ba>>2]=+g[A>>2]+ +g[I>>2];g[ca>>2]=+g[$>>2]+ +g[aa>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2]=+g[ba>>2]-+g[ca>>2];g[c[p>>2]>>2]=+g[ba>>2]+ +g[ca>>2];g[Z>>2]=+g[X>>2]+ +g[Y>>2];g[_>>2]=+g[V>>2]+ +g[Q>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[Z>>2]-+g[_>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[Z>>2]+ +g[_>>2];g[G>>2]=+g[ea>>2]+ +g[fa>>2];g[H>>2]=+g[J>>2]+ +g[K>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=(+g[G>>2]-+g[H>>2])*.8660253882408142;g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=(+g[H>>2]+ +g[G>>2])*.8660253882408142;c[ga>>2]=(c[ga>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=ha;return}function Ts(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,21,6616);i=b;return}function Us(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0;Ta=i;i=i+432|0;n=Ta+424|0;o=Ta+420|0;p=Ta+416|0;q=Ta+412|0;r=Ta+408|0;s=Ta+404|0;t=Ta+400|0;Ua=Ta+396|0;u=Ta+392|0;v=Ta+388|0;Sa=Ta+304|0;A=Ta+300|0;ta=Ta+296|0;Ea=Ta+292|0;oa=Ta+288|0;pa=Ta+284|0;B=Ta+280|0;ma=Ta+276|0;x=Ta+272|0;Ra=Ta+268|0;V=Ta+264|0;Ma=Ta+260|0;W=Ta+256|0;ja=Ta+252|0;y=Ta+248|0;wa=Ta+244|0;ga=Ta+240|0;sa=Ta+236|0;Na=Ta+232|0;Ga=Ta+228|0;P=Ta+224|0;Oa=Ta+220|0;Ha=Ta+216|0;za=Ta+212|0;Ja=Ta+208|0;Ca=Ta+204|0;Ka=Ta+200|0;Da=Ta+196|0;ha=Ta+192|0;ua=Ta+188|0;va=Ta+184|0;Q=Ta+180|0;R=Ta+176|0;S=Ta+172|0;T=Ta+168|0;w=Ta+164|0;M=Ta+160|0;N=Ta+156|0;O=Ta+152|0;xa=Ta+148|0;ya=Ta+144|0;Aa=Ta+140|0;Ba=Ta+136|0;ka=Ta+132|0;la=Ta+128|0;Pa=Ta+124|0;Qa=Ta+120|0;Ia=Ta+116|0;La=Ta+112|0;fa=Ta+108|0;ia=Ta+104|0;Fa=Ta+100|0;_=Ta+96|0;Y=Ta+92|0;Z=Ta+88|0;ba=Ta+84|0;ea=Ta+80|0;ca=Ta+76|0;da=Ta+72|0;U=Ta+68|0;X=Ta+64|0;$=Ta+60|0;aa=Ta+56|0;ra=Ta+52|0;I=Ta+48|0;G=Ta+44|0;L=Ta+40|0;D=Ta+36|0;H=Ta+32|0;na=Ta+28|0;qa=Ta+24|0;J=Ta+20|0;K=Ta+16|0;E=Ta+12|0;F=Ta+8|0;z=Ta+4|0;C=Ta;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[Ua>>2]=k;c[u>>2]=l;c[v>>2]=m;g[Ta+384>>2]=.0833333358168602;g[Ta+380>>2]=.07590298354625702;g[Ta+376>>2]=.2517685294151306;g[Ta+372>>2]=.5035370588302612;g[Ta+368>>2]=.11385448276996613;g[Ta+364>>2]=.2659662365913391;g[Ta+360>>2]=.3873905837535858;g[Ta+356>>2]=.30046260356903076;g[Ta+352>>2]=.13298311829566956;g[Ta+348>>2]=.2582603991031647;g[Ta+344>>2]=2.0;g[Ta+340>>2]=1.7320507764816284;g[Ta+336>>2]=.30023863911628723;g[Ta+332>>2]=.011599105782806873;g[Ta+328>>2]=.15689139068126678;g[Ta+324>>2]=.2562476694583893;g[Ta+320>>2]=.174138605594635;g[Ta+316>>2]=.5751407146453857;g[Ta+312>>2]=.8660253882408142;g[Ta+308>>2]=.5;c[Sa>>2]=c[Ua>>2];while(1){if((c[Sa>>2]|0)<=0)break;g[A>>2]=+g[c[n>>2]>>2];g[ua>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[va>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[wa>>2]=+g[ua>>2]-+g[va>>2];g[ga>>2]=+g[ua>>2]+ +g[va>>2];g[Q>>2]=+g[c[o>>2]>>2];g[R>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[S>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[T>>2]=+g[R>>2]+ +g[S>>2];g[sa>>2]=+g[Q>>2]+ +g[T>>2];g[Na>>2]=+g[R>>2]-+g[S>>2];g[Ga>>2]=+g[Q>>2]-+g[T>>2]*.5;g[w>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[M>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[N>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[O>>2]=+g[M>>2]+ +g[N>>2];g[P>>2]=+g[w>>2]+ +g[O>>2];g[Oa>>2]=+g[M>>2]-+g[N>>2];g[Ha>>2]=+g[w>>2]-+g[O>>2]*.5;g[xa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[ya>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[za>>2]=+g[xa>>2]-+g[ya>>2];g[Ja>>2]=+g[xa>>2]+ +g[ya>>2];g[Aa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ba>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Ca>>2]=+g[Aa>>2]-+g[Ba>>2];g[Ka>>2]=+g[Aa>>2]+ +g[Ba>>2];g[Da>>2]=+g[za>>2]+ +g[Ca>>2];g[ha>>2]=+g[Ja>>2]+ +g[Ka>>2];g[ta>>2]=+g[P>>2]-+g[sa>>2];g[Ea>>2]=+g[wa>>2]+ +g[Da>>2];g[oa>>2]=+g[sa>>2]+ +g[P>>2];g[pa>>2]=+g[ga>>2]+ +g[ha>>2];g[B>>2]=+g[oa>>2]+ +g[pa>>2];g[ka>>2]=+g[Na>>2]+ +g[Oa>>2];g[la>>2]=+g[za>>2]-+g[Ca>>2];g[ma>>2]=+g[ka>>2]-+g[la>>2];g[x>>2]=+g[ka>>2]+ +g[la>>2];g[Pa>>2]=(+g[Na>>2]-+g[Oa>>2])*.8660253882408142;g[Qa>>2]=+g[wa>>2]-+g[Da>>2]*.5;g[Ra>>2]=+g[Pa>>2]+ +g[Qa>>2];g[V>>2]=+g[Qa>>2]-+g[Pa>>2];g[Ia>>2]=+g[Ga>>2]-+g[Ha>>2];g[La>>2]=(+g[Ja>>2]-+g[Ka>>2])*.8660253882408142;g[Ma>>2]=+g[Ia>>2]-+g[La>>2];g[W>>2]=+g[Ia>>2]+ +g[La>>2];g[fa>>2]=+g[Ga>>2]+ +g[Ha>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2]*.5;g[ja>>2]=+g[fa>>2]-+g[ia>>2];g[y>>2]=+g[fa>>2]+ +g[ia>>2];g[c[p>>2]>>2]=+g[A>>2]+ +g[B>>2];g[Fa>>2]=+g[ta>>2]*.5751407146453857-+g[Ea>>2]*.174138605594635;g[_>>2]=+g[ta>>2]*.174138605594635+ +g[Ea>>2]*.5751407146453857;g[U>>2]=+g[Ma>>2]*.2562476694583893-+g[Ra>>2]*.15689139068126678;g[X>>2]=+g[V>>2]*.011599105782806873-+g[W>>2]*.30023863911628723;g[Y>>2]=+g[U>>2]+ +g[X>>2];g[Z>>2]=(+g[X>>2]-+g[U>>2])*1.7320507764816284;g[$>>2]=+g[V>>2]*.30023863911628723+ +g[W>>2]*.011599105782806873;g[aa>>2]=+g[Ra>>2]*.2562476694583893+ +g[Ma>>2]*.15689139068126678;g[ba>>2]=+g[$>>2]-+g[aa>>2];g[ea>>2]=(+g[aa>>2]+ +g[$>>2])*1.7320507764816284;g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=+g[Y>>2]*2.0+ +g[Fa>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[ba>>2]*2.0+ +g[_>>2];g[ca>>2]=+g[_>>2]-+g[ba>>2];g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[Z>>2]-+g[ca>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[Z>>2]+ +g[ca>>2];g[da>>2]=+g[Fa>>2]-+g[Y>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[da>>2]-+g[ea>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2]=+g[da>>2]+ +g[ea>>2];g[na>>2]=+g[ja>>2]*.2582603991031647-+g[ma>>2]*.13298311829566956;g[qa>>2]=(+g[oa>>2]-+g[pa>>2])*.30046260356903076;g[ra>>2]=+g[na>>2]*2.0+ +g[qa>>2];g[I>>2]=+g[qa>>2]-+g[na>>2];g[E>>2]=+g[ma>>2]*.3873905837535858+ +g[ja>>2]*.2659662365913391;g[F>>2]=+g[x>>2]*.11385448276996613-+g[y>>2]*.5035370588302612;g[G>>2]=+g[E>>2]-+g[F>>2];g[L>>2]=+g[E>>2]+ +g[F>>2];g[z>>2]=+g[x>>2]*.2517685294151306+ +g[y>>2]*.07590298354625702;g[C>>2]=+g[A>>2]-+g[B>>2]*.0833333358168602;g[D>>2]=+g[z>>2]*2.0+ +g[C>>2];g[H>>2]=+g[C>>2]-+g[z>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[ra>>2]+ +g[D>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[D>>2]-+g[ra>>2];g[J>>2]=+g[H>>2]-+g[I>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[G>>2]+ +g[J>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2]=+g[J>>2]-+g[G>>2];g[K>>2]=+g[I>>2]+ +g[H>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[K>>2]-+g[L>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[L>>2]+ +g[K>>2];c[Sa>>2]=(c[Sa>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=Ta;return}function Vs(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,22,6664);i=b;return}function Ws(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0;ja=i;i=i+240|0;n=ja+224|0;o=ja+220|0;p=ja+216|0;q=ja+212|0;r=ja+208|0;s=ja+204|0;t=ja+200|0;ka=ja+196|0;u=ja+192|0;v=ja+188|0;ia=ja+160|0;y=ja+156|0;G=ja+152|0;B=ja+148|0;da=ja+144|0;X=ja+140|0;aa=ja+136|0;U=ja+132|0;ba=ja+128|0;N=ja+124|0;ga=ja+120|0;E=ja+116|0;ea=ja+112|0;Q=ja+108|0;ha=ja+104|0;w=ja+100|0;x=ja+96|0;z=ja+92|0;A=ja+88|0;V=ja+84|0;W=ja+80|0;S=ja+76|0;T=ja+72|0;L=ja+68|0;M=ja+64|0;C=ja+60|0;D=ja+56|0;O=ja+52|0;P=ja+48|0;Z=ja+44|0;$=ja+40|0;_=ja+36|0;K=ja+32|0;Y=ja+28|0;R=ja+24|0;ca=ja+20|0;F=ja+16|0;fa=ja+12|0;H=ja+8|0;J=ja+4|0;I=ja;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[ka>>2]=k;c[u>>2]=l;c[v>>2]=m;g[ja+184>>2]=.9009688496589661;g[ja+180>>2]=.22252093255519867;g[ja+176>>2]=.6234897971153259;g[ja+172>>2]=.4338837265968323;g[ja+168>>2]=.9749279022216797;g[ja+164>>2]=.7818315029144287;c[ia>>2]=c[ka>>2];while(1){if((c[ia>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[x>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[G>>2]=+g[w>>2]+ +g[x>>2];g[z>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[A>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[da>>2]=+g[z>>2]+ +g[A>>2];g[V>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[W>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[X>>2]=+g[V>>2]-+g[W>>2];g[aa>>2]=+g[V>>2]+ +g[W>>2];g[S>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[T>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[U>>2]=+g[S>>2]-+g[T>>2];g[ba>>2]=+g[S>>2]+ +g[T>>2];g[L>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[M>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[N>>2]=+g[L>>2]-+g[M>>2];g[ga>>2]=+g[L>>2]+ +g[M>>2];g[C>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[D>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[E>>2]=+g[C>>2]-+g[D>>2];g[ea>>2]=+g[C>>2]+ +g[D>>2];g[O>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[P>>2]=+g[c[o>>2]>>2];g[Q>>2]=+g[O>>2]-+g[P>>2];g[ha>>2]=+g[O>>2]+ +g[P>>2];g[Z>>2]=+g[X>>2]-+g[U>>2];g[$>>2]=+g[Q>>2]-+g[N>>2];g[_>>2]=+g[E>>2]-+g[B>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[Z>>2]*.7818315029144287+ +g[_>>2]*.9749279022216797+ +g[$>>2]*.4338837265968323;g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=+g[_>>2]*.4338837265968323+ +g[$>>2]*.7818315029144287-+g[Z>>2]*.9749279022216797;g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[Z>>2]*.4338837265968323+ +g[$>>2]*.9749279022216797-+g[_>>2]*.7818315029144287;g[K>>2]=+g[B>>2]+ +g[E>>2];g[Y>>2]=+g[U>>2]+ +g[X>>2];g[R>>2]=+g[N>>2]+ +g[Q>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[K>>2]*.6234897971153259+ +g[y>>2]+-(+g[R>>2]*.22252093255519867+ +g[Y>>2]*.9009688496589661);g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2]=+g[y>>2]+ +g[Y>>2]+ +g[K>>2]+ +g[R>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[Y>>2]*.6234897971153259+ +g[y>>2]+-(+g[R>>2]*.9009688496589661+ +g[K>>2]*.22252093255519867);g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[R>>2]*.6234897971153259+ +g[y>>2]+-(+g[K>>2]*.9009688496589661+ +g[Y>>2]*.22252093255519867);g[ca>>2]=+g[aa>>2]-+g[ba>>2];g[F>>2]=+g[ga>>2]-+g[ha>>2];g[fa>>2]=+g[da>>2]-+g[ea>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[ca>>2]*.9749279022216797+ +g[fa>>2]*.4338837265968323+ +g[F>>2]*.7818315029144287;g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2]=+g[fa>>2]*.9749279022216797+ +g[F>>2]*.4338837265968323-+g[ca>>2]*.7818315029144287;g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[F>>2]*.9749279022216797-+g[fa>>2]*.7818315029144287-+g[ca>>2]*.4338837265968323;g[H>>2]=+g[ba>>2]+ +g[aa>>2];g[J>>2]=+g[da>>2]+ +g[ea>>2];g[I>>2]=+g[ga>>2]+ +g[ha>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2]=+g[H>>2]*.6234897971153259+ +g[G>>2]+-(+g[I>>2]*.9009688496589661+ +g[J>>2]*.22252093255519867);g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[I>>2]*.6234897971153259+ +g[G>>2]+-(+g[J>>2]*.9009688496589661+ +g[H>>2]*.22252093255519867);g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[J>>2]*.6234897971153259+ +g[G>>2]+-(+g[I>>2]*.22252093255519867+ +g[H>>2]*.9009688496589661);g[c[p>>2]>>2]=+g[G>>2]+ +g[H>>2]+ +g[J>>2]+ +g[I>>2];c[ia>>2]=(c[ia>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=ja;return}function Xs(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,23,6712);i=b;return}function Ys(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0;Ha=i;i=i+352|0;n=Ha+336|0;o=Ha+332|0;p=Ha+328|0;q=Ha+324|0;r=Ha+320|0;s=Ha+316|0;t=Ha+312|0;Ia=Ha+308|0;u=Ha+304|0;v=Ha+300|0;Ga=Ha+256|0;oa=Ha+252|0;Z=Ha+248|0;T=Ha+244|0;L=Ha+240|0;M=Ha+236|0;F=Ha+232|0;ka=Ha+228|0;la=Ha+224|0;ba=Ha+220|0;ca=Ha+216|0;da=Ha+212|0;Ba=Ha+208|0;Ea=Ha+204|0;P=Ha+200|0;ua=Ha+196|0;xa=Ha+192|0;O=Ha+188|0;_=Ha+184|0;$=Ha+180|0;aa=Ha+176|0;R=Ha+172|0;ma=Ha+168|0;na=Ha+164|0;S=Ha+160|0;sa=Ha+156|0;za=Ha+152|0;Ca=Ha+148|0;va=Ha+144|0;B=Ha+140|0;Da=Ha+136|0;ga=Ha+132|0;ta=Ha+128|0;ja=Ha+124|0;wa=Ha+120|0;E=Ha+116|0;Aa=Ha+112|0;w=Ha+108|0;A=Ha+104|0;G=Ha+100|0;H=Ha+96|0;ha=Ha+92|0;ia=Ha+88|0;C=Ha+84|0;D=Ha+80|0;N=Ha+76|0;X=Ha+72|0;Q=Ha+68|0;U=Ha+64|0;V=Ha+60|0;Y=Ha+56|0;W=Ha+52|0;y=Ha+48|0;z=Ha+44|0;x=Ha+40|0;ea=Ha+36|0;fa=Ha+32|0;ra=Ha+28|0;J=Ha+24|0;I=Ha+20|0;K=Ha+16|0;pa=Ha+12|0;qa=Ha+8|0;ya=Ha+4|0;Fa=Ha;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[Ia>>2]=k;c[u>>2]=l;c[v>>2]=m;g[Ha+296>>2]=.4841229319572449;g[Ha+292>>2]=.21650634706020355;g[Ha+288>>2]=.9510565400123596;g[Ha+284>>2]=.5877852439880371;g[Ha+280>>2]=.25;g[Ha+276>>2]=.55901700258255;g[Ha+272>>2]=.5090369582176208;g[Ha+268>>2]=.8236390948295593;g[Ha+264>>2]=.8660253882408142;g[Ha+260>>2]=.5;c[Ga>>2]=c[Ia>>2];while(1){if((c[Ga>>2]|0)<=0)break;g[R>>2]=+g[c[n>>2]>>2];g[ma>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[na>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[S>>2]=+g[na>>2]+ +g[ma>>2];g[oa>>2]=+g[ma>>2]-+g[na>>2];g[Z>>2]=+g[R>>2]+ +g[S>>2];g[T>>2]=+g[R>>2]-+g[S>>2]*.5;g[sa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[za>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ca>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[va>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[w>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[A>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[B>>2]=+g[w>>2]-+g[A>>2];g[Da>>2]=+g[w>>2]+ +g[A>>2];g[G>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[H>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[ga>>2]=+g[G>>2]-+g[H>>2];g[ta>>2]=+g[H>>2]+ +g[G>>2];g[ha>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ia>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[ja>>2]=+g[ha>>2]-+g[ia>>2];g[wa>>2]=+g[ia>>2]+ +g[ha>>2];g[C>>2]=+g[c[o>>2]>>2];g[D>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[E>>2]=+g[C>>2]-+g[D>>2];g[Aa>>2]=+g[D>>2]+ +g[C>>2];g[L>>2]=+g[ga>>2]-+g[ja>>2];g[M>>2]=+g[E>>2]+ +g[B>>2];g[F>>2]=+g[B>>2]-+g[E>>2];g[ka>>2]=+g[ga>>2]+ +g[ja>>2];g[la>>2]=+g[F>>2]-+g[ka>>2];g[ba>>2]=+g[za>>2]+ +g[Aa>>2];g[ca>>2]=+g[Ca>>2]+ +g[Da>>2];g[da>>2]=+g[ba>>2]+ +g[ca>>2];g[Ba>>2]=+g[za>>2]-+g[Aa>>2]*.5;g[Ea>>2]=+g[Ca>>2]-+g[Da>>2]*.5;g[P>>2]=+g[Ba>>2]+ +g[Ea>>2];g[ua>>2]=+g[sa>>2]-+g[ta>>2]*.5;g[xa>>2]=+g[va>>2]-+g[wa>>2]*.5;g[O>>2]=+g[ua>>2]+ +g[xa>>2];g[_>>2]=+g[sa>>2]+ +g[ta>>2];g[$>>2]=+g[va>>2]+ +g[wa>>2];g[aa>>2]=+g[_>>2]+ +g[$>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=(+g[la>>2]-+g[oa>>2])*.8660253882408142;g[N>>2]=+g[L>>2]*.8236390948295593+ +g[M>>2]*.5090369582176208;g[X>>2]=+g[M>>2]*.8236390948295593-+g[L>>2]*.5090369582176208;g[Q>>2]=(+g[O>>2]-+g[P>>2])*.55901700258255;g[U>>2]=+g[O>>2]+ +g[P>>2];g[V>>2]=+g[T>>2]-+g[U>>2]*.25;g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[T>>2]+ +g[U>>2];g[Y>>2]=+g[V>>2]-+g[Q>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[X>>2]+ +g[Y>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2]=+g[Y>>2]-+g[X>>2];g[W>>2]=+g[Q>>2]+ +g[V>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[N>>2]+ +g[W>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[W>>2]-+g[N>>2];g[y>>2]=+g[_>>2]-+g[$>>2];g[z>>2]=+g[ca>>2]-+g[ba>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[y>>2]*.5877852439880371+ +g[z>>2]*.9510565400123596;g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2]=+g[z>>2]*.5877852439880371-+g[y>>2]*.9510565400123596;g[x>>2]=(+g[aa>>2]-+g[da>>2])*.55901700258255;g[ea>>2]=+g[aa>>2]+ +g[da>>2];g[fa>>2]=+g[Z>>2]-+g[ea>>2]*.25;g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[fa>>2]-+g[x>>2];g[c[p>>2]>>2]=+g[Z>>2]+ +g[ea>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2]=+g[x>>2]+ +g[fa>>2];g[pa>>2]=+g[oa>>2]*.8660253882408142+ +g[la>>2]*.21650634706020355;g[qa>>2]=(+g[ka>>2]+ +g[F>>2])*.4841229319572449;g[ra>>2]=+g[pa>>2]+ +g[qa>>2];g[J>>2]=+g[qa>>2]-+g[pa>>2];g[ya>>2]=+g[ua>>2]-+g[xa>>2];g[Fa>>2]=+g[Ba>>2]-+g[Ea>>2];g[I>>2]=+g[ya>>2]*.9510565400123596+ +g[Fa>>2]*.5877852439880371;g[K>>2]=+g[Fa>>2]*.9510565400123596-+g[ya>>2]*.5877852439880371;g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[ra>>2]-+g[I>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2]=+g[K>>2]-+g[J>>2];g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[ra>>2]+ +g[I>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[J>>2]+ +g[K>>2];c[Ga>>2]=(c[Ga>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=Ha;return}function Zs(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,24,6760);i=b;return}function _s(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0;Ba=i;i=i+288|0;n=Ba+284|0;o=Ba+280|0;p=Ba+276|0;q=Ba+272|0;r=Ba+268|0;s=Ba+264|0;t=Ba+260|0;Ca=Ba+256|0;u=Ba+252|0;v=Ba+248|0;Aa=Ba+232|0;y=Ba+228|0;B=Ba+224|0;C=Ba+220|0;za=Ba+216|0;ia=Ba+212|0;aa=Ba+208|0;da=Ba+204|0;ea=Ba+200|0;F=Ba+196|0;ha=Ba+192|0;qa=Ba+188|0;_=Ba+184|0;K=Ba+180|0;U=Ba+176|0;xa=Ba+172|0;Z=Ba+168|0;J=Ba+164|0;R=Ba+160|0;ga=Ba+156|0;fa=Ba+152|0;O=Ba+148|0;V=Ba+144|0;w=Ba+140|0;x=Ba+136|0;z=Ba+132|0;A=Ba+128|0;D=Ba+124|0;E=Ba+120|0;ba=Ba+116|0;ca=Ba+112|0;ma=Ba+108|0;S=Ba+104|0;pa=Ba+100|0;T=Ba+96|0;ka=Ba+92|0;la=Ba+88|0;na=Ba+84|0;oa=Ba+80|0;ta=Ba+76|0;P=Ba+72|0;wa=Ba+68|0;Q=Ba+64|0;ra=Ba+60|0;sa=Ba+56|0;ua=Ba+52|0;va=Ba+48|0;ja=Ba+44|0;ya=Ba+40|0;I=Ba+36|0;L=Ba+32|0;G=Ba+28|0;H=Ba+24|0;M=Ba+20|0;N=Ba+16|0;W=Ba+12|0;X=Ba+8|0;Y=Ba+4|0;$=Ba;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[Ca>>2]=k;c[u>>2]=l;c[v>>2]=m;g[Ba+244>>2]=.9238795042037964;g[Ba+240>>2]=.3826834261417389;g[Ba+236>>2]=.7071067690849304;c[Aa>>2]=c[Ca>>2];while(1){if((c[Aa>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[x>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[y>>2]=+g[w>>2]+ +g[x>>2];g[z>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[A>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[C>>2]=+g[y>>2]+ +g[B>>2];g[za>>2]=+g[w>>2]-+g[x>>2];g[ia>>2]=+g[z>>2]-+g[A>>2];g[D>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[E>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[aa>>2]=+g[D>>2]+ +g[E>>2];g[ga>>2]=+g[D>>2]-+g[E>>2];g[ba>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ca>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[da>>2]=+g[ba>>2]+ +g[ca>>2];g[fa>>2]=+g[ba>>2]-+g[ca>>2];g[ea>>2]=+g[aa>>2]+ +g[da>>2];g[F>>2]=(+g[ga>>2]+ +g[fa>>2])*.7071067690849304;g[ha>>2]=(+g[fa>>2]-+g[ga>>2])*.7071067690849304;g[ka>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[la>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ma>>2]=+g[ka>>2]-+g[la>>2];g[S>>2]=+g[ka>>2]+ +g[la>>2];g[na>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[oa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[pa>>2]=+g[na>>2]-+g[oa>>2];g[T>>2]=+g[na>>2]+ +g[oa>>2];g[qa>>2]=+g[ma>>2]*.3826834261417389-+g[pa>>2]*.9238795042037964;g[_>>2]=+g[S>>2]+ +g[T>>2];g[K>>2]=+g[ma>>2]*.9238795042037964+ +g[pa>>2]*.3826834261417389;g[U>>2]=+g[S>>2]-+g[T>>2];g[ra>>2]=+g[c[o>>2]>>2];g[sa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[ta>>2]=+g[ra>>2]-+g[sa>>2];g[P>>2]=+g[ra>>2]+ +g[sa>>2];g[ua>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[va>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[wa>>2]=+g[ua>>2]-+g[va>>2];g[Q>>2]=+g[ua>>2]+ +g[va>>2];g[xa>>2]=+g[ta>>2]*.3826834261417389+ +g[wa>>2]*.9238795042037964;g[Z>>2]=+g[P>>2]+ +g[Q>>2];g[J>>2]=+g[ta>>2]*.9238795042037964-+g[wa>>2]*.3826834261417389;g[R>>2]=+g[P>>2]-+g[Q>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[C>>2]-+g[ea>>2];g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[_>>2]-+g[Z>>2];g[ja>>2]=+g[ha>>2]-+g[ia>>2];g[ya>>2]=+g[qa>>2]-+g[xa>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[ja>>2]+ +g[ya>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2]=+g[ya>>2]-+g[ja>>2];g[I>>2]=+g[za>>2]+ +g[F>>2];g[L>>2]=+g[J>>2]+ +g[K>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2]=+g[I>>2]-+g[L>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[I>>2]+ +g[L>>2];g[G>>2]=+g[za>>2]-+g[F>>2];g[H>>2]=+g[xa>>2]+ +g[qa>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[G>>2]-+g[H>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[G>>2]+ +g[H>>2];g[M>>2]=+g[ia>>2]+ +g[ha>>2];g[N>>2]=+g[K>>2]-+g[J>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[M>>2]+ +g[N>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=+g[N>>2]-+g[M>>2];g[O>>2]=+g[y>>2]-+g[B>>2];g[V>>2]=(+g[R>>2]+ +g[U>>2])*.7071067690849304;g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2]=+g[O>>2]-+g[V>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[O>>2]+ +g[V>>2];g[W>>2]=+g[da>>2]-+g[aa>>2];g[X>>2]=(+g[U>>2]-+g[R>>2])*.7071067690849304;g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[W>>2]+ +g[X>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2]=+g[X>>2]-+g[W>>2];g[Y>>2]=+g[C>>2]+ +g[ea>>2];g[$>>2]=+g[Z>>2]+ +g[_>>2];g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2]=+g[Y>>2]-+g[$>>2];g[c[p>>2]>>2]=+g[Y>>2]+ +g[$>>2];c[Aa>>2]=(c[Aa>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=Ba;return}function $s(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,25,6808);i=b;return}function at(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0;bb=i;i=i+416|0;n=bb+400|0;o=bb+396|0;p=bb+392|0;q=bb+388|0;r=bb+384|0;s=bb+380|0;t=bb+376|0;cb=bb+372|0;u=bb+368|0;v=bb+364|0;ab=bb+344|0;X=bb+340|0;T=bb+336|0;ha=bb+332|0;E=bb+328|0;Ua=bb+324|0;oa=bb+320|0;pa=bb+316|0;$a=bb+312|0;Ca=bb+308|0;Ja=bb+304|0;Ka=bb+300|0;N=bb+296|0;O=bb+292|0;R=bb+288|0;x=bb+284|0;A=bb+280|0;G=bb+276|0;ia=bb+272|0;ja=bb+268|0;ka=bb+264|0;K=bb+260|0;L=bb+256|0;Q=bb+252|0;va=bb+248|0;ya=bb+244|0;F=bb+240|0;w=bb+236|0;W=bb+232|0;C=bb+228|0;fa=bb+224|0;ga=bb+220|0;D=bb+216|0;_=bb+212|0;wa=bb+208|0;Xa=bb+204|0;z=bb+200|0;_a=bb+196|0;Ba=bb+192|0;ba=bb+188|0;ta=bb+184|0;Fa=bb+180|0;Aa=bb+176|0;Qa=bb+172|0;ua=bb+168|0;Ta=bb+164|0;xa=bb+160|0;Ia=bb+156|0;y=bb+152|0;Y=bb+148|0;Z=bb+144|0;Va=bb+140|0;Wa=bb+136|0;Ya=bb+132|0;Za=bb+128|0;$=bb+124|0;aa=bb+120|0;Da=bb+116|0;Ea=bb+112|0;Oa=bb+108|0;Pa=bb+104|0;Ra=bb+100|0;Sa=bb+96|0;Ga=bb+92|0;Ha=bb+88|0;za=bb+84|0;B=bb+80|0;M=bb+76|0;P=bb+72|0;S=bb+68|0;U=bb+64|0;V=bb+60|0;J=bb+56|0;H=bb+52|0;I=bb+48|0;ca=bb+44|0;ea=bb+40|0;Na=bb+36|0;da=bb+32|0;La=bb+28|0;Ma=bb+24|0;qa=bb+20|0;sa=bb+16|0;na=bb+12|0;ra=bb+8|0;la=bb+4|0;ma=bb;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[cb>>2]=k;c[u>>2]=l;c[v>>2]=m;g[bb+360>>2]=.25;g[bb+356>>2]=.55901700258255;g[bb+352>>2]=.5877852439880371;g[bb+348>>2]=.9510565400123596;c[ab>>2]=c[cb>>2];while(1){if((c[ab>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[W>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[C>>2]=+g[w>>2]+ +g[W>>2];g[fa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ga>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[D>>2]=+g[ga>>2]+ +g[fa>>2];g[X>>2]=+g[w>>2]-+g[W>>2];g[T>>2]=+g[C>>2]+ +g[D>>2];g[ha>>2]=+g[fa>>2]-+g[ga>>2];g[E>>2]=+g[C>>2]-+g[D>>2];g[Y>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Z>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[_>>2]=+g[Y>>2]-+g[Z>>2];g[wa>>2]=+g[Y>>2]+ +g[Z>>2];g[Va>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Wa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Xa>>2]=+g[Va>>2]-+g[Wa>>2];g[z>>2]=+g[Va>>2]+ +g[Wa>>2];g[Ya>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Za>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[_a>>2]=+g[Ya>>2]-+g[Za>>2];g[Ba>>2]=+g[Ya>>2]+ +g[Za>>2];g[$>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[aa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ba>>2]=+g[$>>2]-+g[aa>>2];g[ta>>2]=+g[$>>2]+ +g[aa>>2];g[Da>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ea>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Fa>>2]=+g[Da>>2]-+g[Ea>>2];g[Aa>>2]=+g[Da>>2]+ +g[Ea>>2];g[Oa>>2]=+g[c[o>>2]>>2];g[Pa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Qa>>2]=+g[Oa>>2]-+g[Pa>>2];g[ua>>2]=+g[Oa>>2]+ +g[Pa>>2];g[Ra>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Sa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Ta>>2]=+g[Ra>>2]-+g[Sa>>2];g[xa>>2]=+g[Ra>>2]+ +g[Sa>>2];g[Ga>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Ha>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Ia>>2]=+g[Ga>>2]-+g[Ha>>2];g[y>>2]=+g[Ga>>2]+ +g[Ha>>2];g[Ua>>2]=+g[Qa>>2]-+g[Ta>>2];g[oa>>2]=+g[_>>2]-+g[ba>>2];g[pa>>2]=+g[Fa>>2]-+g[Ia>>2];g[$a>>2]=+g[Xa>>2]-+g[_a>>2];g[Ca>>2]=+g[_>>2]+ +g[ba>>2];g[Ja>>2]=+g[Fa>>2]+ +g[Ia>>2];g[Ka>>2]=+g[Ca>>2]+ +g[Ja>>2];g[N>>2]=+g[Aa>>2]+ +g[Ba>>2];g[O>>2]=+g[y>>2]+ +g[z>>2];g[R>>2]=+g[N>>2]+ +g[O>>2];g[x>>2]=+g[Aa>>2]-+g[Ba>>2];g[A>>2]=+g[y>>2]-+g[z>>2];g[G>>2]=+g[x>>2]+ +g[A>>2];g[ia>>2]=+g[Ta>>2]+ +g[Qa>>2];g[ja>>2]=+g[_a>>2]+ +g[Xa>>2];g[ka>>2]=+g[ia>>2]+ +g[ja>>2];g[K>>2]=+g[wa>>2]+ +g[xa>>2];g[L>>2]=+g[ta>>2]+ +g[ua>>2];g[Q>>2]=+g[K>>2]+ +g[L>>2];g[va>>2]=+g[ta>>2]-+g[ua>>2];g[ya>>2]=+g[wa>>2]-+g[xa>>2];g[F>>2]=+g[ya>>2]+ +g[va>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[X>>2]+ +g[Ka>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=+g[ha>>2]-+g[ka>>2];g[za>>2]=+g[va>>2]-+g[ya>>2];g[B>>2]=+g[x>>2]-+g[A>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2]=+g[za>>2]*.9510565400123596-+g[B>>2]*.5877852439880371;g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[za>>2]*.5877852439880371+ +g[B>>2]*.9510565400123596;g[M>>2]=+g[K>>2]-+g[L>>2];g[P>>2]=+g[N>>2]-+g[O>>2];g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2]=+g[M>>2]*.5877852439880371-+g[P>>2]*.9510565400123596;g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[M>>2]*.9510565400123596+ +g[P>>2]*.5877852439880371;g[S>>2]=(+g[Q>>2]-+g[R>>2])*.55901700258255;g[U>>2]=+g[Q>>2]+ +g[R>>2];g[V>>2]=+g[T>>2]-+g[U>>2]*.25;g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[S>>2]+ +g[V>>2];g[c[p>>2]>>2]=+g[T>>2]+ +g[U>>2];g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2]=+g[V>>2]-+g[S>>2];g[J>>2]=(+g[F>>2]-+g[G>>2])*.55901700258255;g[H>>2]=+g[F>>2]+ +g[G>>2];g[I>>2]=+g[E>>2]-+g[H>>2]*.25;g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[I>>2]-+g[J>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*10<<2)>>2]=+g[E>>2]+ +g[H>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2]=+g[J>>2]+ +g[I>>2];g[ca>>2]=+g[Ua>>2]*.9510565400123596+ +g[$a>>2]*.5877852439880371;g[ea>>2]=+g[$a>>2]*.9510565400123596-+g[Ua>>2]*.5877852439880371;g[La>>2]=(+g[Ca>>2]-+g[Ja>>2])*.55901700258255;g[Ma>>2]=+g[X>>2]-+g[Ka>>2]*.25;g[Na>>2]=+g[La>>2]+ +g[Ma>>2];g[da>>2]=+g[Ma>>2]-+g[La>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2]=+g[Na>>2]-+g[ca>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2]=+g[da>>2]+ +g[ea>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[Na>>2]+ +g[ca>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[da>>2]-+g[ea>>2];g[qa>>2]=+g[oa>>2]*.9510565400123596+ +g[pa>>2]*.5877852439880371;g[sa>>2]=+g[pa>>2]*.9510565400123596-+g[oa>>2]*.5877852439880371;g[la>>2]=+g[ka>>2]*.25+ +g[ha>>2];g[ma>>2]=(+g[ja>>2]-+g[ia>>2])*.55901700258255;g[na>>2]=+g[la>>2]+ +g[ma>>2];g[ra>>2]=+g[ma>>2]-+g[la>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[na>>2]-+g[qa>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2]=+g[sa>>2]+ +g[ra>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2]=+g[qa>>2]+ +g[na>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[ra>>2]-+g[sa>>2];c[ab>>2]=(c[ab>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=bb;return}function bt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,26,6856);i=b;return}function ct(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0;wc=i;i=i+848|0;n=wc+844|0;o=wc+840|0;p=wc+836|0;q=wc+832|0;r=wc+828|0;s=wc+824|0;t=wc+820|0;xc=wc+816|0;u=wc+812|0;v=wc+808|0;vc=wc+644|0;vb=wc+640|0;oa=wc+636|0;aa=wc+632|0;qa=wc+628|0;ub=wc+624|0;wb=wc+620|0;Xb=wc+616|0;z=wc+612|0;kb=wc+608|0;V=wc+604|0;G=wc+600|0;W=wc+596|0;dc=wc+592|0;hb=wc+588|0;R=wc+584|0;kc=wc+580|0;S=wc+576|0;sc=wc+572|0;gb=wc+568|0;O=wc+564|0;Bb=wc+560|0;P=wc+556|0;Kb=wc+552|0;jb=wc+548|0;Y=wc+544|0;Rb=wc+540|0;Z=wc+536|0;tb=wc+532|0;pa=wc+528|0;qb=wc+524|0;ib=wc+520|0;lb=wc+516|0;rb=wc+512|0;sb=wc+508|0;w=wc+504|0;Fa=wc+500|0;D=wc+496|0;A=wc+492|0;B=wc+488|0;Vb=wc+484|0;E=wc+480|0;y=wc+476|0;C=wc+472|0;F=wc+468|0;Tb=wc+464|0;Ub=wc+460|0;Wb=wc+456|0;x=wc+452|0;hc=wc+448|0;ec=wc+444|0;fc=wc+440|0;$b=wc+436|0;ic=wc+432|0;cc=wc+428|0;gc=wc+424|0;jc=wc+420|0;Zb=wc+416|0;_b=wc+412|0;ac=wc+408|0;bc=wc+404|0;yb=wc+400|0;tc=wc+396|0;uc=wc+392|0;oc=wc+388|0;zb=wc+384|0;rc=wc+380|0;xb=wc+376|0;Ab=wc+372|0;mc=wc+368|0;nc=wc+364|0;pc=wc+360|0;qc=wc+356|0;Ob=wc+352|0;Lb=wc+348|0;Mb=wc+344|0;Gb=wc+340|0;Pb=wc+336|0;Jb=wc+332|0;Nb=wc+328|0;Qb=wc+324|0;Eb=wc+320|0;Fb=wc+316|0;Hb=wc+312|0;Ib=wc+308|0;pb=wc+304|0;ob=wc+300|0;Qa=wc+296|0;mb=wc+292|0;nb=wc+288|0;Ra=wc+284|0;Yb=wc+280|0;lc=wc+276|0;Cb=wc+272|0;Db=wc+268|0;L=wc+264|0;M=wc+260|0;N=wc+256|0;I=wc+252|0;J=wc+248|0;K=wc+244|0;Sb=wc+240|0;fa=wc+236|0;ga=wc+232|0;ta=wc+228|0;wa=wc+224|0;xa=wc+220|0;Ea=wc+216|0;Ca=wc+212|0;ja=wc+208|0;ma=wc+204|0;na=wc+200|0;za=wc+196|0;ya=wc+192|0;ra=wc+188|0;sa=wc+184|0;ua=wc+180|0;va=wc+176|0;ha=wc+172|0;ia=wc+168|0;ka=wc+164|0;la=wc+160|0;Aa=wc+156|0;Ba=wc+152|0;Da=wc+148|0;H=wc+144|0;ba=wc+140|0;ca=wc+136|0;Q=wc+132|0;T=wc+128|0;U=wc+124|0;Ha=wc+120|0;Ia=wc+116|0;Ja=wc+112|0;X=wc+108|0;_=wc+104|0;$=wc+100|0;da=wc+96|0;ea=wc+92|0;Ga=wc+88|0;Ma=wc+84|0;Pa=wc+80|0;Sa=wc+76|0;ab=wc+72|0;$a=wc+68|0;bb=wc+64|0;cb=wc+60|0;Va=wc+56|0;eb=wc+52|0;_a=wc+48|0;Ka=wc+44|0;La=wc+40|0;Na=wc+36|0;Oa=wc+32|0;Ta=wc+28|0;Ua=wc+24|0;Ya=wc+20|0;Za=wc+16|0;Wa=wc+12|0;Xa=wc+8|0;db=wc+4|0;fb=wc;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[xc>>2]=k;c[u>>2]=l;c[v>>2]=m;g[wc+804>>2]=.9980267286300659;g[wc+800>>2]=.1255810409784317;g[wc+796>>2]=1.9960534572601318;g[wc+792>>2]=.06279052048921585;g[wc+788>>2]=.80901700258255;g[wc+784>>2]=.30901700258255005;g[wc+780>>2]=1.3690942525863647;g[wc+776>>2]=.728968620300293;g[wc+772>>2]=.963507354259491;g[wc+768>>2]=.8763066530227661;g[wc+764>>2]=.49737977981567383;g[wc+760>>2]=.9685831665992737;g[wc+756>>2]=.6845471262931824;g[wc+752>>2]=1.457937240600586;g[wc+748>>2]=.4817536771297455;g[wc+744>>2]=1.7526133060455322;g[wc+740>>2]=.24868988990783691;g[wc+736>>2]=1.9371663331985474;g[wc+732>>2]=.9921147227287292;g[wc+728>>2]=.25066646933555603;g[wc+724>>2]=.4257792830467224;g[wc+720>>2]=1.8096541166305542;g[wc+716>>2]=1.2748479843139648;g[wc+712>>2]=.7705132365226746;g[wc+708>>2]=.8443279266357422;g[wc+704>>2]=1.0716536045074463;g[wc+700>>2]=.12533323466777802;g[wc+696>>2]=1.9842294454574585;g[wc+692>>2]=.9048270583152771;g[wc+688>>2]=.8515585660934448;g[wc+684>>2]=.6374239921569824;g[wc+680>>2]=1.5410264730453491;g[wc+676>>2]=.5358268022537231;g[wc+672>>2]=1.6886558532714844;g[wc+668>>2]=.29389262199401855;g[wc+664>>2]=.4755282700061798;g[wc+660>>2]=.25;g[wc+656>>2]=.55901700258255;g[wc+652>>2]=.5877852439880371;g[wc+648>>2]=.9510565400123596;c[vc>>2]=c[xc>>2];while(1){if((c[vc>>2]|0)<=0)break;g[vb>>2]=+g[c[n>>2]>>2];g[rb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[sb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[tb>>2]=+g[rb>>2]+ +g[sb>>2];g[pa>>2]=+g[rb>>2]-+g[sb>>2];g[w>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Fa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[qb>>2]=+g[w>>2]+ +g[Fa>>2];g[oa>>2]=+g[w>>2]-+g[Fa>>2];g[aa>>2]=+g[pa>>2]*.9510565400123596;g[qa>>2]=+g[oa>>2]*.9510565400123596+ +g[pa>>2]*.5877852439880371;g[ub>>2]=(+g[qb>>2]-+g[tb>>2])*.55901700258255;g[wb>>2]=+g[qb>>2]+ +g[tb>>2];g[Xb>>2]=+g[vb>>2]-+g[wb>>2]*.25;g[D>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Tb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ub>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[A>>2]=+g[Tb>>2]+ +g[Ub>>2];g[Wb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[x>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[B>>2]=+g[Wb>>2]+ +g[x>>2];g[Vb>>2]=+g[Tb>>2]-+g[Ub>>2];g[E>>2]=+g[A>>2]+ +g[B>>2];g[y>>2]=+g[Wb>>2]-+g[x>>2];g[z>>2]=+g[Vb>>2]*.4755282700061798+ +g[y>>2]*.29389262199401855;g[kb>>2]=+g[D>>2]+ +g[E>>2];g[V>>2]=+g[y>>2]*.4755282700061798-+g[Vb>>2]*.29389262199401855;g[C>>2]=(+g[A>>2]-+g[B>>2])*.55901700258255;g[F>>2]=+g[D>>2]-+g[E>>2]*.25;g[G>>2]=+g[C>>2]+ +g[F>>2];g[W>>2]=+g[F>>2]-+g[C>>2];g[hc>>2]=+g[c[o>>2]>>2];g[Zb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[_b>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[ec>>2]=+g[Zb>>2]+ +g[_b>>2];g[ac>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[bc>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[fc>>2]=+g[ac>>2]+ +g[bc>>2];g[$b>>2]=+g[Zb>>2]-+g[_b>>2];g[ic>>2]=+g[ec>>2]+ +g[fc>>2];g[cc>>2]=+g[ac>>2]-+g[bc>>2];g[dc>>2]=+g[$b>>2]*.4755282700061798+ +g[cc>>2]*.29389262199401855;g[hb>>2]=+g[hc>>2]+ +g[ic>>2];g[R>>2]=+g[cc>>2]*.4755282700061798-+g[$b>>2]*.29389262199401855;g[gc>>2]=(+g[ec>>2]-+g[fc>>2])*.55901700258255;g[jc>>2]=+g[hc>>2]-+g[ic>>2]*.25;g[kc>>2]=+g[gc>>2]+ +g[jc>>2];g[S>>2]=+g[jc>>2]-+g[gc>>2];g[yb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[mc>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[nc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[tc>>2]=+g[mc>>2]+ +g[nc>>2];g[pc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[qc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[uc>>2]=+g[pc>>2]+ +g[qc>>2];g[oc>>2]=+g[mc>>2]-+g[nc>>2];g[zb>>2]=+g[tc>>2]+ +g[uc>>2];g[rc>>2]=+g[pc>>2]-+g[qc>>2];g[sc>>2]=+g[oc>>2]*.4755282700061798+ +g[rc>>2]*.29389262199401855;g[gb>>2]=+g[yb>>2]+ +g[zb>>2];g[O>>2]=+g[rc>>2]*.4755282700061798-+g[oc>>2]*.29389262199401855;g[xb>>2]=(+g[tc>>2]-+g[uc>>2])*.55901700258255;g[Ab>>2]=+g[yb>>2]-+g[zb>>2]*.25;g[Bb>>2]=+g[xb>>2]+ +g[Ab>>2];g[P>>2]=+g[Ab>>2]-+g[xb>>2];g[Ob>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Eb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Fb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[Lb>>2]=+g[Eb>>2]+ +g[Fb>>2];g[Hb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Ib>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Mb>>2]=+g[Hb>>2]+ +g[Ib>>2];g[Gb>>2]=+g[Eb>>2]-+g[Fb>>2];g[Pb>>2]=+g[Lb>>2]+ +g[Mb>>2];g[Jb>>2]=+g[Hb>>2]-+g[Ib>>2];g[Kb>>2]=+g[Gb>>2]*.4755282700061798+ +g[Jb>>2]*.29389262199401855;g[jb>>2]=+g[Ob>>2]+ +g[Pb>>2];g[Y>>2]=+g[Jb>>2]*.4755282700061798-+g[Gb>>2]*.29389262199401855;g[Nb>>2]=(+g[Lb>>2]-+g[Mb>>2])*.55901700258255;g[Qb>>2]=+g[Ob>>2]-+g[Pb>>2]*.25;g[Rb>>2]=+g[Nb>>2]+ +g[Qb>>2];g[Z>>2]=+g[Qb>>2]-+g[Nb>>2];g[ib>>2]=+g[gb>>2]-+g[hb>>2];g[lb>>2]=+g[jb>>2]-+g[kb>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=+g[ib>>2]*.9510565400123596-+g[lb>>2]*.5877852439880371;g[(c[q>>2]|0)+((c[t>>2]|0)*10<<2)>>2]=+g[ib>>2]*.5877852439880371+ +g[lb>>2]*.9510565400123596;g[pb>>2]=+g[vb>>2]+ +g[wb>>2];g[mb>>2]=+g[hb>>2]+ +g[gb>>2];g[nb>>2]=+g[jb>>2]+ +g[kb>>2];g[ob>>2]=(+g[mb>>2]-+g[nb>>2])*.55901700258255;g[Qa>>2]=+g[mb>>2]+ +g[nb>>2];g[c[p>>2]>>2]=+g[pb>>2]+ +g[Qa>>2];g[Ra>>2]=+g[pb>>2]-+g[Qa>>2]*.25;g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[ob>>2]+ +g[Ra>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*10<<2)>>2]=+g[Ra>>2]-+g[ob>>2];g[Yb>>2]=+g[ub>>2]+ +g[Xb>>2];g[lc>>2]=+g[dc>>2]*1.6886558532714844+ +g[kc>>2]*.5358268022537231;g[Cb>>2]=+g[sc>>2]*1.5410264730453491+ +g[Bb>>2]*.6374239921569824;g[Db>>2]=+g[lc>>2]-+g[Cb>>2];g[L>>2]=+g[Kb>>2]*.8515585660934448+ +g[Rb>>2]*.9048270583152771;g[M>>2]=+g[z>>2]*1.9842294454574585+ +g[G>>2]*.12533323466777802;g[N>>2]=+g[L>>2]+ +g[M>>2];g[I>>2]=+g[dc>>2]*1.0716536045074463-+g[kc>>2]*.8443279266357422;g[J>>2]=+g[Bb>>2]*.7705132365226746-+g[sc>>2]*1.2748479843139648;g[K>>2]=+g[I>>2]+ +g[J>>2];g[Sb>>2]=+g[Kb>>2]*1.8096541166305542-+g[Rb>>2]*.4257792830467224;g[fa>>2]=+g[z>>2]*.25066646933555603-+g[G>>2]*.9921147227287292;g[ga>>2]=+g[Sb>>2]+ +g[fa>>2];g[ra>>2]=+g[dc>>2]*1.9371663331985474+ +g[kc>>2]*.24868988990783691;g[sa>>2]=+g[sc>>2]*1.0716536045074463+ +g[Bb>>2]*.8443279266357422;g[ta>>2]=+g[ra>>2]+ +g[sa>>2];g[ua>>2]=+g[Kb>>2]*1.7526133060455322+ +g[Rb>>2]*.4817536771297455;g[va>>2]=+g[z>>2]*1.457937240600586+ +g[G>>2]*.6845471262931824;g[wa>>2]=+g[ua>>2]+ +g[va>>2];g[xa>>2]=+g[ta>>2]+ +g[wa>>2];g[Ea>>2]=+g[va>>2]-+g[ua>>2];g[Ca>>2]=+g[sa>>2]-+g[ra>>2];g[ha>>2]=+g[kc>>2]*.9685831665992737-+g[dc>>2]*.49737977981567383;g[ia>>2]=+g[Bb>>2]*.5358268022537231-+g[sc>>2]*1.6886558532714844;g[ja>>2]=+g[ha>>2]+ +g[ia>>2];g[ka>>2]=+g[Rb>>2]*.8763066530227661-+g[Kb>>2]*.963507354259491;g[la>>2]=+g[G>>2]*.728968620300293-+g[z>>2]*1.3690942525863647;g[ma>>2]=+g[ka>>2]+ +g[la>>2];g[na>>2]=+g[ja>>2]+ +g[ma>>2];g[za>>2]=+g[ka>>2]-+g[la>>2];g[ya>>2]=+g[ia>>2]-+g[ha>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[Yb>>2]+ +g[na>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=-(+g[qa>>2]+ +g[xa>>2]);g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[Yb>>2]+ +g[Db>>2]+ +g[ga>>2];g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[qa>>2]+ +g[K>>2]-+g[N>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2]=+g[K>>2]*.30901700258255005+ +g[qa>>2]+((+g[fa>>2]-+g[Sb>>2])*.5877852439880371+ +g[N>>2]*.80901700258255)-(+g[lc>>2]+ +g[Cb>>2])*.9510565400123596;g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2]=+g[Db>>2]*.30901700258255005+ +g[Yb>>2]+((+g[I>>2]-+g[J>>2])*.9510565400123596+(+g[M>>2]-+g[L>>2])*.5877852439880371)-+g[ga>>2]*.80901700258255;g[Aa>>2]=+g[xa>>2]*.25-+g[qa>>2];g[Ba>>2]=(+g[wa>>2]-+g[ta>>2])*.55901700258255;g[(c[q>>2]|0)+((c[t>>2]|0)*11<<2)>>2]=+g[ya>>2]*.5877852439880371+ +g[za>>2]*.9510565400123596+ +g[Aa>>2]-+g[Ba>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2]=+g[ya>>2]*.9510565400123596+ +g[Aa>>2]+(+g[Ba>>2]-+g[za>>2]*.5877852439880371);g[Da>>2]=+g[Yb>>2]-+g[na>>2]*.25;g[H>>2]=(+g[ja>>2]-+g[ma>>2])*.55901700258255;g[(c[p>>2]|0)+((c[s>>2]|0)*11<<2)>>2]=+g[Ca>>2]*.5877852439880371+ +g[Da>>2]+-(+g[Ea>>2]*.9510565400123596+ +g[H>>2]);g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2]=+g[Ca>>2]*.9510565400123596+ +g[H>>2]+(+g[Ea>>2]*.5877852439880371+ +g[Da>>2]);g[ba>>2]=+g[aa>>2]-+g[oa>>2]*.5877852439880371;g[ca>>2]=+g[Xb>>2]-+g[ub>>2];g[Q>>2]=+g[O>>2]*1.9842294454574585-+g[P>>2]*.12533323466777802;g[T>>2]=+g[R>>2]*1.457937240600586+ +g[S>>2]*.6845471262931824;g[U>>2]=+g[Q>>2]-+g[T>>2];g[Ha>>2]=+g[Z>>2]*.06279052048921585-+g[Y>>2]*1.9960534572601318;g[Ia>>2]=+g[V>>2]*1.5410264730453491+ +g[W>>2]*.6374239921569824;g[Ja>>2]=+g[Ha>>2]-+g[Ia>>2];g[X>>2]=+g[V>>2]*1.2748479843139648-+g[W>>2]*.7705132365226746;g[_>>2]=+g[Y>>2]*.1255810409784317+ +g[Z>>2]*.9980267286300659;g[$>>2]=+g[X>>2]-+g[_>>2];g[da>>2]=+g[S>>2]*.728968620300293-+g[R>>2]*1.3690942525863647;g[ea>>2]=+g[O>>2]*.25066646933555603+ +g[P>>2]*.9921147227287292;g[Ga>>2]=+g[da>>2]-+g[ea>>2];g[Ka>>2]=+g[R>>2]*1.7526133060455322-+g[S>>2]*.4817536771297455;g[La>>2]=+g[O>>2]*.8515585660934448+ +g[P>>2]*.9048270583152771;g[Ma>>2]=+g[Ka>>2]-+g[La>>2];g[Na>>2]=+g[Y>>2]*1.0716536045074463-+g[Z>>2]*.8443279266357422;g[Oa>>2]=+g[V>>2]*.1255810409784317-+g[W>>2]*.9980267286300659;g[Pa>>2]=+g[Na>>2]+ +g[Oa>>2];g[Sa>>2]=+g[Ma>>2]+ +g[Pa>>2];g[ab>>2]=+g[Na>>2]-+g[Oa>>2];g[$a>>2]=+g[Ka>>2]+ +g[La>>2];g[Ta>>2]=+g[O>>2]*1.8096541166305542-+g[P>>2]*.4257792830467224;g[Ua>>2]=+g[R>>2]*.963507354259491+ +g[S>>2]*.8763066530227661;g[bb>>2]=+g[Ua>>2]+ +g[Ta>>2];g[Ya>>2]=+g[Y>>2]*1.6886558532714844+ +g[Z>>2]*.5358268022537231;g[Za>>2]=+g[V>>2]*1.9960534572601318+ +g[W>>2]*.06279052048921585;g[cb>>2]=+g[Ya>>2]+ +g[Za>>2];g[Va>>2]=+g[Ta>>2]-+g[Ua>>2];g[eb>>2]=+g[bb>>2]+ +g[cb>>2];g[_a>>2]=+g[Ya>>2]-+g[Za>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[ba>>2]+ +g[Sa>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[ca>>2]+ +g[eb>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[U>>2]+ +g[$>>2]-+g[ba>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[ca>>2]+ +g[Ga>>2]+ +g[Ja>>2];g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2]=+g[Ga>>2]*.30901700258255005+ +g[ca>>2]+-(+g[Ja>>2]*.80901700258255+(+g[_>>2]+ +g[X>>2])*.5877852439880371)-(+g[T>>2]+ +g[Q>>2])*.9510565400123596;g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2]=+g[U>>2]*.30901700258255005-(+g[Ha>>2]+ +g[Ia>>2])*.5877852439880371+-(+g[$>>2]*.80901700258255+(+g[da>>2]+ +g[ea>>2])*.9510565400123596)-+g[ba>>2];g[Wa>>2]=(+g[Ma>>2]-+g[Pa>>2])*.55901700258255;g[Xa>>2]=+g[ba>>2]-+g[Sa>>2]*.25;g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2]=+g[Va>>2]*.9510565400123596+ +g[Wa>>2]+(+g[Xa>>2]-+g[_a>>2]*.5877852439880371);g[(c[q>>2]|0)+((c[t>>2]|0)*12<<2)>>2]=+g[Va>>2]*.5877852439880371+ +g[Xa>>2]+(+g[_a>>2]*.9510565400123596-+g[Wa>>2]);g[db>>2]=(+g[bb>>2]-+g[cb>>2])*.55901700258255;g[fb>>2]=+g[ca>>2]-+g[eb>>2]*.25;g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2]=+g[$a>>2]*.9510565400123596+ +g[ab>>2]*.5877852439880371+ +g[db>>2]+ +g[fb>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*12<<2)>>2]=+g[$a>>2]*.5877852439880371+ +g[fb>>2]+-(+g[ab>>2]*.9510565400123596+ +g[db>>2]);c[vc>>2]=(c[vc>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=wc;return}function dt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,27,6904);i=b;return}function et(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0;x=i;i=i+64|0;n=x+48|0;o=x+44|0;p=x+40|0;q=x+36|0;r=x+28|0;y=x+20|0;s=x+16|0;t=x+12|0;w=x+8|0;u=x+4|0;v=x;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[x+32>>2]=f;c[r>>2]=h;c[x+24>>2]=j;c[y>>2]=k;c[s>>2]=l;c[t>>2]=m;c[w>>2]=c[y>>2];while(1){if((c[w>>2]|0)<=0)break;g[u>>2]=+g[c[n>>2]>>2];g[v>>2]=+g[c[o>>2]>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[u>>2]-+g[v>>2];g[c[p>>2]>>2]=+g[u>>2]+ +g[v>>2];c[w>>2]=(c[w>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[s>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[t>>2]<<2)}i=x;return}function ft(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,28,6952);i=b;return}function gt(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0;rc=i;i=i+704|0;n=rc+692|0;o=rc+688|0;p=rc+684|0;q=rc+680|0;r=rc+676|0;s=rc+672|0;t=rc+668|0;sc=rc+664|0;u=rc+660|0;v=rc+656|0;qc=rc+624|0;pb=rc+620|0;Ra=rc+616|0;lc=rc+612|0;qa=rc+608|0;Wb=rc+604|0;cb=rc+600|0;oc=rc+596|0;pa=rc+592|0;jc=rc+588|0;Ta=rc+584|0;xb=rc+580|0;ma=rc+576|0;cc=rc+572|0;Sa=rc+568|0;ub=rc+564|0;na=rc+560|0;ea=rc+556|0;Ia=rc+552|0;_a=rc+548|0;Za=rc+544|0;ja=rc+540|0;J=rc+536|0;G=rc+532|0;I=rc+528|0;Z=rc+524|0;aa=rc+520|0;Xa=rc+516|0;Wa=rc+512|0;Pb=rc+508|0;Ea=rc+504|0;Kb=rc+500|0;Da=rc+496|0;w=rc+492|0;Fa=rc+488|0;lb=rc+484|0;mb=rc+480|0;nb=rc+476|0;ob=rc+472|0;Sb=rc+468|0;mc=rc+464|0;Vb=rc+460|0;nc=rc+456|0;qb=rc+452|0;rb=rc+448|0;Tb=rc+444|0;Ub=rc+440|0;fc=rc+436|0;vb=rc+432|0;ic=rc+428|0;wb=rc+424|0;dc=rc+420|0;ec=rc+416|0;gc=rc+412|0;hc=rc+408|0;_b=rc+404|0;sb=rc+400|0;bc=rc+396|0;tb=rc+392|0;Yb=rc+388|0;Zb=rc+384|0;$b=rc+380|0;ac=rc+376|0;y=rc+372|0;ca=rc+368|0;ia=rc+364|0;da=rc+360|0;B=rc+356|0;Ga=rc+352|0;E=rc+348|0;Ha=rc+344|0;fa=rc+340|0;F=rc+336|0;Rb=rc+332|0;x=rc+328|0;ga=rc+324|0;ha=rc+320|0;z=rc+316|0;A=rc+312|0;C=rc+308|0;D=rc+304|0;Cb=rc+300|0;X=rc+296|0;Ob=rc+292|0;Y=rc+288|0;Fb=rc+284|0;_=rc+280|0;Ib=rc+276|0;$=rc+272|0;Lb=rc+268|0;Jb=rc+264|0;Ab=rc+260|0;Bb=rc+256|0;Mb=rc+252|0;Nb=rc+248|0;Db=rc+244|0;Eb=rc+240|0;Gb=rc+236|0;Hb=rc+232|0;Xb=rc+228|0;kc=rc+224|0;Na=rc+220|0;Oa=rc+216|0;Pa=rc+212|0;Qa=rc+208|0;W=rc+204|0;La=rc+200|0;Ka=rc+196|0;Ma=rc+192|0;ba=rc+188|0;Ja=rc+184|0;Va=rc+180|0;jb=rc+176|0;db=rc+172|0;fb=rc+168|0;ab=rc+164|0;eb=rc+160|0;ib=rc+156|0;kb=rc+152|0;Ua=rc+148|0;bb=rc+144|0;Ya=rc+140|0;$a=rc+136|0;gb=rc+132|0;hb=rc+128|0;zb=rc+124|0;ya=rc+120|0;xa=rc+116|0;za=rc+112|0;la=rc+108|0;ta=rc+104|0;sa=rc+100|0;ua=rc+96|0;pc=rc+92|0;yb=rc+88|0;va=rc+84|0;wa=rc+80|0;Qb=rc+76|0;ka=rc+72|0;oa=rc+68|0;ra=rc+64|0;Ca=rc+60|0;U=rc+56|0;T=rc+52|0;V=rc+48|0;L=rc+44|0;P=rc+40|0;O=rc+36|0;Q=rc+32|0;Aa=rc+28|0;Ba=rc+24|0;R=rc+20|0;S=rc+16|0;H=rc+12|0;K=rc+8|0;M=rc+4|0;N=rc;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[sc>>2]=k;c[u>>2]=l;c[v>>2]=m;g[rc+652>>2]=.5555702447891235;g[rc+648>>2]=.8314695954322815;g[rc+644>>2]=.19509032368659973;g[rc+640>>2]=.9807852506637573;g[rc+636>>2]=.3826834261417389;g[rc+632>>2]=.9238795042037964;g[rc+628>>2]=.7071067690849304;c[qc>>2]=c[sc>>2];while(1){if((c[qc>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[Fa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[lb>>2]=+g[w>>2]+ +g[Fa>>2];g[mb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[nb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[ob>>2]=+g[mb>>2]+ +g[nb>>2];g[pb>>2]=+g[lb>>2]+ +g[ob>>2];g[Ra>>2]=+g[lb>>2]-+g[ob>>2];g[lc>>2]=+g[w>>2]-+g[Fa>>2];g[qa>>2]=+g[mb>>2]-+g[nb>>2];g[qb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[rb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Sb>>2]=+g[qb>>2]+ +g[rb>>2];g[mc>>2]=+g[qb>>2]-+g[rb>>2];g[Tb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Ub>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Vb>>2]=+g[Tb>>2]+ +g[Ub>>2];g[nc>>2]=+g[Tb>>2]-+g[Ub>>2];g[Wb>>2]=+g[Sb>>2]+ +g[Vb>>2];g[cb>>2]=+g[Vb>>2]-+g[Sb>>2];g[oc>>2]=(+g[mc>>2]+ +g[nc>>2])*.7071067690849304;g[pa>>2]=(+g[nc>>2]-+g[mc>>2])*.7071067690849304;g[dc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[ec>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[fc>>2]=+g[dc>>2]+ +g[ec>>2];g[vb>>2]=+g[dc>>2]-+g[ec>>2];g[gc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[hc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[ic>>2]=+g[gc>>2]+ +g[hc>>2];g[wb>>2]=+g[gc>>2]-+g[hc>>2];g[jc>>2]=+g[fc>>2]+ +g[ic>>2];g[Ta>>2]=+g[fc>>2]-+g[ic>>2];g[xb>>2]=+g[vb>>2]*.9238795042037964+ +g[wb>>2]*.3826834261417389;g[ma>>2]=+g[vb>>2]*.3826834261417389-+g[wb>>2]*.9238795042037964;g[Yb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Zb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[_b>>2]=+g[Yb>>2]+ +g[Zb>>2];g[sb>>2]=+g[Yb>>2]-+g[Zb>>2];g[$b>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[ac>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[bc>>2]=+g[$b>>2]+ +g[ac>>2];g[tb>>2]=+g[$b>>2]-+g[ac>>2];g[cc>>2]=+g[_b>>2]+ +g[bc>>2];g[Sa>>2]=+g[_b>>2]-+g[bc>>2];g[ub>>2]=+g[sb>>2]*.9238795042037964-+g[tb>>2]*.3826834261417389;g[na>>2]=+g[sb>>2]*.3826834261417389+ +g[tb>>2]*.9238795042037964;g[Rb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[x>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[y>>2]=+g[Rb>>2]-+g[x>>2];g[ca>>2]=+g[Rb>>2]+ +g[x>>2];g[ga>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ha>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2];g[da>>2]=+g[ga>>2]+ +g[ha>>2];g[z>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[A>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[Ga>>2]=+g[z>>2]+ +g[A>>2];g[C>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[D>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[E>>2]=+g[C>>2]-+g[D>>2];g[Ha>>2]=+g[C>>2]+ +g[D>>2];g[ea>>2]=+g[ca>>2]+ +g[da>>2];g[Ia>>2]=+g[Ga>>2]+ +g[Ha>>2];g[_a>>2]=+g[Ha>>2]-+g[Ga>>2];g[Za>>2]=+g[ca>>2]-+g[da>>2];g[fa>>2]=(+g[E>>2]-+g[B>>2])*.7071067690849304;g[ja>>2]=+g[fa>>2]-+g[ia>>2];g[J>>2]=+g[ia>>2]+ +g[fa>>2];g[F>>2]=(+g[B>>2]+ +g[E>>2])*.7071067690849304;g[G>>2]=+g[y>>2]+ +g[F>>2];g[I>>2]=+g[y>>2]-+g[F>>2];g[Ab>>2]=+g[c[o>>2]>>2];g[Bb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Cb>>2]=+g[Ab>>2]-+g[Bb>>2];g[X>>2]=+g[Ab>>2]+ +g[Bb>>2];g[Mb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Nb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[Ob>>2]=+g[Mb>>2]-+g[Nb>>2];g[Y>>2]=+g[Mb>>2]+ +g[Nb>>2];g[Db>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Eb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Fb>>2]=+g[Db>>2]-+g[Eb>>2];g[_>>2]=+g[Db>>2]+ +g[Eb>>2];g[Gb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Hb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Ib>>2]=+g[Gb>>2]-+g[Hb>>2];g[$>>2]=+g[Gb>>2]+ +g[Hb>>2];g[Z>>2]=+g[X>>2]+ +g[Y>>2];g[aa>>2]=+g[_>>2]+ +g[$>>2];g[Xa>>2]=+g[$>>2]-+g[_>>2];g[Wa>>2]=+g[X>>2]-+g[Y>>2];g[Lb>>2]=(+g[Ib>>2]-+g[Fb>>2])*.7071067690849304;g[Pb>>2]=+g[Lb>>2]-+g[Ob>>2];g[Ea>>2]=+g[Ob>>2]+ +g[Lb>>2];g[Jb>>2]=(+g[Fb>>2]+ +g[Ib>>2])*.7071067690849304;g[Kb>>2]=+g[Cb>>2]+ +g[Jb>>2];g[Da>>2]=+g[Cb>>2]-+g[Jb>>2];g[Xb>>2]=+g[pb>>2]+ +g[Wb>>2];g[kc>>2]=+g[cc>>2]+ +g[jc>>2];g[Na>>2]=+g[Xb>>2]+ +g[kc>>2];g[Oa>>2]=+g[Z>>2]+ +g[aa>>2];g[Pa>>2]=+g[ea>>2]+ +g[Ia>>2];g[Qa>>2]=+g[Oa>>2]+ +g[Pa>>2];g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2]=+g[Xb>>2]-+g[kc>>2];g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2]=+g[Pa>>2]-+g[Oa>>2];g[(c[p>>2]|0)+(c[s>>2]<<4<<2)>>2]=+g[Na>>2]-+g[Qa>>2];g[c[p>>2]>>2]=+g[Na>>2]+ +g[Qa>>2];g[W>>2]=+g[pb>>2]-+g[Wb>>2];g[La>>2]=+g[jc>>2]-+g[cc>>2];g[ba>>2]=+g[Z>>2]-+g[aa>>2];g[Ja>>2]=+g[ea>>2]-+g[Ia>>2];g[Ka>>2]=(+g[ba>>2]+ +g[Ja>>2])*.7071067690849304;g[Ma>>2]=(+g[Ja>>2]-+g[ba>>2])*.7071067690849304;g[(c[p>>2]|0)+((c[s>>2]|0)*12<<2)>>2]=+g[W>>2]-+g[Ka>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*12<<2)>>2]=+g[Ma>>2]-+g[La>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[W>>2]+ +g[Ka>>2];g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[La>>2]+ +g[Ma>>2];g[Ua>>2]=(+g[Sa>>2]+ +g[Ta>>2])*.7071067690849304;g[Va>>2]=+g[Ra>>2]+ +g[Ua>>2];g[jb>>2]=+g[Ra>>2]-+g[Ua>>2];g[bb>>2]=(+g[Ta>>2]-+g[Sa>>2])*.7071067690849304;g[db>>2]=+g[bb>>2]-+g[cb>>2];g[fb>>2]=+g[cb>>2]+ +g[bb>>2];g[Ya>>2]=+g[Wa>>2]*.9238795042037964+ +g[Xa>>2]*.3826834261417389;g[$a>>2]=+g[Za>>2]*.9238795042037964-+g[_a>>2]*.3826834261417389;g[ab>>2]=+g[Ya>>2]+ +g[$a>>2];g[eb>>2]=+g[$a>>2]-+g[Ya>>2];g[gb>>2]=+g[Xa>>2]*.9238795042037964-+g[Wa>>2]*.3826834261417389;g[hb>>2]=+g[Za>>2]*.3826834261417389+ +g[_a>>2]*.9238795042037964;g[ib>>2]=+g[gb>>2]+ +g[hb>>2];g[kb>>2]=+g[hb>>2]-+g[gb>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*14<<2)>>2]=+g[Va>>2]-+g[ab>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*14<<2)>>2]=+g[ib>>2]-+g[fb>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[Va>>2]+ +g[ab>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[fb>>2]+ +g[ib>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2]=+g[db>>2]+ +g[eb>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2]=+g[jb>>2]+ +g[kb>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*10<<2)>>2]=+g[eb>>2]-+g[db>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*10<<2)>>2]=+g[jb>>2]-+g[kb>>2];g[pc>>2]=+g[lc>>2]+ +g[oc>>2];g[yb>>2]=+g[ub>>2]+ +g[xb>>2];g[zb>>2]=+g[pc>>2]+ +g[yb>>2];g[ya>>2]=+g[pc>>2]-+g[yb>>2];g[va>>2]=+g[Pb>>2]*.9807852506637573-+g[Kb>>2]*.19509032368659973;g[wa>>2]=+g[G>>2]*.19509032368659973+ +g[ja>>2]*.9807852506637573;g[xa>>2]=+g[va>>2]+ +g[wa>>2];g[za>>2]=+g[wa>>2]-+g[va>>2];g[Qb>>2]=+g[Kb>>2]*.9807852506637573+ +g[Pb>>2]*.19509032368659973;g[ka>>2]=+g[G>>2]*.9807852506637573-+g[ja>>2]*.19509032368659973;g[la>>2]=+g[Qb>>2]+ +g[ka>>2];g[ta>>2]=+g[ka>>2]-+g[Qb>>2];g[oa>>2]=+g[ma>>2]-+g[na>>2];g[ra>>2]=+g[pa>>2]-+g[qa>>2];g[sa>>2]=+g[oa>>2]-+g[ra>>2];g[ua>>2]=+g[ra>>2]+ +g[oa>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*15<<2)>>2]=+g[zb>>2]-+g[la>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*15<<2)>>2]=+g[xa>>2]-+g[ua>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[zb>>2]+ +g[la>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[ua>>2]+ +g[xa>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2]=+g[sa>>2]+ +g[ta>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2]=+g[ya>>2]+ +g[za>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2]=+g[ta>>2]-+g[sa>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2]=+g[ya>>2]-+g[za>>2];g[Aa>>2]=+g[lc>>2]-+g[oc>>2];g[Ba>>2]=+g[na>>2]+ +g[ma>>2];g[Ca>>2]=+g[Aa>>2]+ +g[Ba>>2];g[U>>2]=+g[Aa>>2]-+g[Ba>>2];g[R>>2]=+g[Ea>>2]*.8314695954322815-+g[Da>>2]*.5555702447891235;g[S>>2]=+g[I>>2]*.5555702447891235+ +g[J>>2]*.8314695954322815;g[T>>2]=+g[R>>2]+ +g[S>>2];g[V>>2]=+g[S>>2]-+g[R>>2];g[H>>2]=+g[Da>>2]*.8314695954322815+ +g[Ea>>2]*.5555702447891235;g[K>>2]=+g[I>>2]*.8314695954322815-+g[J>>2]*.5555702447891235;g[L>>2]=+g[H>>2]+ +g[K>>2];g[P>>2]=+g[K>>2]-+g[H>>2];g[M>>2]=+g[xb>>2]-+g[ub>>2];g[N>>2]=+g[qa>>2]+ +g[pa>>2];g[O>>2]=+g[M>>2]-+g[N>>2];g[Q>>2]=+g[N>>2]+ +g[M>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*13<<2)>>2]=+g[Ca>>2]-+g[L>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*13<<2)>>2]=+g[T>>2]-+g[Q>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[Ca>>2]+ +g[L>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[Q>>2]+ +g[T>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=+g[O>>2]+ +g[P>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[U>>2]+ +g[V>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*11<<2)>>2]=+g[P>>2]-+g[O>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*11<<2)>>2]=+g[U>>2]-+g[V>>2];c[qc>>2]=(c[qc>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=rc;return}function ht(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,29,7e3);i=b;return}function it(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0;B=i;i=i+80|0;n=B+64|0;o=B+60|0;p=B+56|0;q=B+52|0;r=B+48|0;s=B+44|0;t=B+40|0;C=B+36|0;u=B+32|0;v=B+28|0;A=B+16|0;w=B+12|0;x=B+8|0;y=B+4|0;z=B;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[C>>2]=k;c[u>>2]=l;c[v>>2]=m;g[B+24>>2]=.8660253882408142;g[B+20>>2]=.5;c[A>>2]=c[C>>2];while(1){if((c[A>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[x>>2]=+g[c[o>>2]>>2];g[y>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[w>>2]-+g[z>>2]*.5;g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=(+g[y>>2]-+g[x>>2])*.8660253882408142;g[c[p>>2]>>2]=+g[w>>2]+ +g[z>>2];c[A>>2]=(c[A>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2)}i=B;return}function jt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,30,7048);i=b;return}function kt(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0;D=i;i=i+80|0;n=D+64|0;o=D+60|0;p=D+56|0;q=D+52|0;r=D+48|0;s=D+44|0;t=D+40|0;E=D+36|0;u=D+32|0;v=D+28|0;C=D+24|0;w=D+20|0;x=D+16|0;y=D+12|0;z=D+8|0;A=D+4|0;B=D;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[E>>2]=k;c[u>>2]=l;c[v>>2]=m;c[C>>2]=c[E>>2];while(1){if((c[C>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[x>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[y>>2]=+g[w>>2]+ +g[x>>2];g[z>>2]=+g[c[o>>2]>>2];g[A>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[w>>2]-+g[x>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[A>>2]-+g[z>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[y>>2]-+g[B>>2];g[c[p>>2]>>2]=+g[y>>2]+ +g[B>>2];c[C>>2]=(c[C>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2)}i=D;return}function lt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,31,7096);i=b;return}function mt(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0;J=i;i=i+112|0;n=J+104|0;o=J+100|0;p=J+96|0;q=J+92|0;r=J+88|0;s=J+84|0;t=J+80|0;K=J+76|0;u=J+72|0;v=J+68|0;I=J+48|0;F=J+44|0;C=J+40|0;D=J+36|0;y=J+32|0;G=J+28|0;B=J+24|0;E=J+20|0;H=J+16|0;w=J+12|0;x=J+8|0;z=J+4|0;A=J;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[K>>2]=k;c[u>>2]=l;c[v>>2]=m;g[J+64>>2]=.25;g[J+60>>2]=.55901700258255;g[J+56>>2]=.5877852439880371;g[J+52>>2]=.9510565400123596;c[I>>2]=c[K>>2];while(1){if((c[I>>2]|0)<=0)break;g[F>>2]=+g[c[n>>2]>>2];g[w>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[x>>2]=+g[c[o>>2]>>2];g[C>>2]=+g[x>>2]+ +g[w>>2];g[z>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[A>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[D>>2]=+g[z>>2]+ +g[A>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[G>>2]=+g[C>>2]+ +g[D>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[y>>2]*.9510565400123596-+g[B>>2]*.5877852439880371;g[c[p>>2]>>2]=+g[F>>2]+ +g[G>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[y>>2]*.5877852439880371+ +g[B>>2]*.9510565400123596;g[E>>2]=(+g[C>>2]-+g[D>>2])*.55901700258255;g[H>>2]=+g[F>>2]-+g[G>>2]*.25;g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[E>>2]+ +g[H>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[H>>2]-+g[E>>2];c[I>>2]=(c[I>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=J;return}function nt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,32,7144);i=b;return}function ot(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0;Og=i;i=i+1680|0;n=Og+1676|0;o=Og+1672|0;p=Og+1668|0;q=Og+1664|0;r=Og+1660|0;s=Og+1656|0;t=Og+1652|0;Pg=Og+1648|0;u=Og+1644|0;v=Og+1640|0;Ng=Og+1576|0;Rd=Og+1572|0;Pe=Og+1568|0;C=Og+1564|0;Kc=Og+1560|0;hb=Og+1556|0;nc=Og+1552|0;Mf=Og+1548|0;rg=Og+1544|0;sg=Og+1540|0;gd=Og+1536|0;rd=Og+1532|0;ca=Og+1528|0;_b=Og+1524|0;Ja=Og+1520|0;Vc=Og+1516|0;xe=Og+1512|0;vf=Og+1508|0;xd=Og+1504|0;he=Og+1500|0;xb=Og+1496|0;cc=Og+1492|0;Xa=Og+1488|0;fc=Og+1484|0;Ee=Og+1480|0;yf=Og+1476|0;na=Og+1472|0;Qc=Og+1468|0;qa=Og+1464|0;Rc=Og+1460|0;mg=Og+1456|0;sf=Og+1452|0;$d=Og+1448|0;ne=Og+1444|0;wa=Og+1440|0;Oc=Og+1436|0;za=Og+1432|0;Nc=Og+1428|0;Zf=Og+1424|0;rf=Og+1420|0;Yd=Og+1416|0;me=Og+1412|0;zg=Og+1408|0;Gg=Og+1404|0;Hg=Og+1400|0;Ud=Og+1396|0;Qe=Og+1392|0;ha=Og+1388|0;oc=Og+1384|0;eb=Og+1380|0;Lc=Og+1376|0;T=Og+1372|0;Wc=Og+1368|0;Ae=Og+1364|0;wf=Og+1360|0;Ma=Og+1356|0;$b=Og+1352|0;nd=Og+1348|0;sd=Og+1344|0;Mb=Og+1340|0;gc=Og+1336|0;He=Og+1332|0;zf=Og+1328|0;Sa=Og+1324|0;dc=Og+1320|0;Ed=Og+1316|0;ie=Og+1312|0;Ob=Og+1308|0;y=Og+1304|0;qg=Og+1300|0;A=Og+1296|0;of=Og+1292|0;gb=Og+1288|0;ng=Og+1284|0;z=Og+1280|0;B=Og+1276|0;fb=Og+1272|0;w=Og+1268|0;Fa=Og+1264|0;og=Og+1260|0;pg=Og+1256|0;Xc=Og+1252|0;ee=Og+1248|0;Nf=Og+1244|0;Of=Og+1240|0;W=Og+1236|0;pd=Og+1232|0;Ha=Og+1228|0;ce=Og+1224|0;Z=Og+1220|0;qd=Og+1216|0;ba=Og+1212|0;de=Og+1208|0;U=Og+1204|0;V=Og+1200|0;ea=Og+1196|0;Ga=Og+1192|0;X=Og+1188|0;Y=Og+1184|0;$=Og+1180|0;aa=Og+1176|0;_=Og+1172|0;Ia=Og+1168|0;ve=Og+1164|0;we=Og+1160|0;pb=Og+1156|0;vd=Og+1152|0;Wa=Og+1148|0;wd=Og+1144|0;sb=Og+1140|0;ge=Og+1136|0;vb=Og+1132|0;fe=Og+1128|0;Pa=Og+1124|0;ob=Og+1120|0;Ua=Og+1116|0;Va=Og+1112|0;qb=Og+1108|0;rb=Og+1104|0;tb=Og+1100|0;ub=Og+1096|0;wb=Og+1092|0;Ta=Og+1088|0;Ce=Og+1084|0;De=Og+1080|0;ag=Og+1076|0;ja=Og+1072|0;kg=Og+1068|0;la=Og+1064|0;dg=Og+1060|0;pa=Og+1056|0;hg=Og+1052|0;ka=Og+1048|0;ma=Og+1044|0;oa=Og+1040|0;_f=Og+1036|0;$f=Og+1032|0;ig=Og+1028|0;jg=Og+1024|0;bg=Og+1020|0;cg=Og+1016|0;fg=Og+1012|0;gg=Og+1008|0;eg=Og+1004|0;lg=Og+1e3|0;Zd=Og+996|0;_d=Og+992|0;Lg=Og+988|0;xa=Og+984|0;Xf=Og+980|0;sa=Og+976|0;Qf=Og+972|0;va=Og+968|0;Uf=Og+964|0;ta=Og+960|0;ua=Og+956|0;ya=Og+952|0;Jg=Og+948|0;Kg=Og+944|0;Vf=Og+940|0;Wf=Og+936|0;Mg=Og+932|0;Pf=Og+928|0;Sf=Og+924|0;Tf=Og+920|0;Rf=Og+916|0;Yf=Og+912|0;Wd=Og+908|0;Xd=Og+904|0;vg=Og+900|0;D=Og+896|0;Fg=Og+892|0;fa=Og+888|0;yg=Og+884|0;E=Og+880|0;Cg=Og+876|0;G=Og+872|0;Sd=Og+868|0;Td=Og+864|0;tg=Og+860|0;ug=Og+856|0;Dg=Og+852|0;Eg=Og+848|0;wg=Og+844|0;xg=Og+840|0;Ag=Og+836|0;Bg=Og+832|0;F=Og+828|0;ga=Og+824|0;cb=Og+820|0;db=Og+816|0;H=Og+812|0;kd=Og+808|0;R=Og+804|0;id=Og+800|0;K=Og+796|0;ld=Og+792|0;O=Og+788|0;hd=Og+784|0;Da=Og+780|0;Ea=Og+776|0;P=Og+772|0;Q=Og+768|0;I=Og+764|0;J=Og+760|0;M=Og+756|0;N=Og+752|0;L=Og+748|0;S=Og+744|0;ye=Og+740|0;ze=Og+736|0;Ka=Og+732|0;La=Og+728|0;jd=Og+724|0;md=Og+720|0;Ab=Og+716|0;yd=Og+712|0;Kb=Og+708|0;Cd=Og+704|0;Db=Og+700|0;zd=Og+696|0;Hb=Og+692|0;Bd=Og+688|0;yb=Og+684|0;zb=Og+680|0;Ib=Og+676|0;Jb=Og+672|0;Bb=Og+668|0;Cb=Og+664|0;Fb=Og+660|0;Gb=Og+656|0;Eb=Og+652|0;Lb=Og+648|0;Fe=Og+644|0;Ge=Og+640|0;Qa=Og+636|0;Ra=Og+632|0;Ad=Og+628|0;Dd=Og+624|0;Ig=Og+620|0;x=Og+616|0;Me=Og+612|0;Ne=Og+608|0;Oe=Og+604|0;pf=Og+600|0;ue=Og+596|0;Ke=Og+592|0;Je=Og+588|0;Le=Og+584|0;Be=Og+580|0;Ie=Og+576|0;uf=Og+572|0;Kf=Og+568|0;Jf=Og+564|0;Lf=Og+560|0;Bf=Og+556|0;Ff=Og+552|0;Ef=Og+548|0;Gf=Og+544|0;qf=Og+540|0;tf=Og+536|0;Hf=Og+532|0;If=Og+528|0;xf=Og+524|0;Af=Og+520|0;Cf=Og+516|0;Df=Og+512|0;af=Og+508|0;se=Og+504|0;lf=Og+500|0;nf=Og+496|0;df=Og+492|0;pe=Og+488|0;gf=Og+484|0;qe=Og+480|0;_e=Og+476|0;$e=Og+472|0;jf=Og+468|0;kf=Og+464|0;bf=Og+460|0;cf=Og+456|0;ef=Og+452|0;ff=Og+448|0;hf=Og+444|0;re=Og+440|0;mf=Og+436|0;te=Og+432|0;be=Og+428|0;Ye=Og+424|0;Se=Og+420|0;Ue=Og+416|0;ud=Og+412|0;Ve=Og+408|0;ke=Og+404|0;We=Og+400|0;Vd=Og+396|0;ae=Og+392|0;oe=Og+388|0;Re=Og+384|0;od=Og+380|0;td=Og+376|0;Fd=Og+372|0;je=Og+368|0;le=Og+364|0;Xe=Og+360|0;Te=Og+356|0;Ze=Og+352|0;Mc=Og+348|0;Zc=Og+344|0;Id=Og+340|0;pc=Og+336|0;Tc=Og+332|0;Hd=Og+328|0;fd=Og+324|0;Nd=Og+320|0;mc=Og+316|0;_c=Og+312|0;bc=Og+308|0;tc=Og+304|0;cd=Og+300|0;Md=Og+296|0;ic=Og+292|0;uc=Og+288|0;Pc=Og+284|0;Sc=Og+280|0;Zb=Og+276|0;ac=Og+272|0;dd=Og+268|0;ed=Og+264|0;kc=Og+260|0;lc=Og+256|0;ad=Og+252|0;bd=Og+248|0;ec=Og+244|0;hc=Og+240|0;Uc=Og+236|0;jc=Og+232|0;sc=Og+228|0;vc=Og+224|0;qc=Og+220|0;rc=Og+216|0;wc=Og+212|0;Yc=Og+208|0;$c=Og+204|0;Gd=Og+200|0;Ld=Og+196|0;Od=Og+192|0;Jd=Og+188|0;Kd=Og+184|0;Pd=Og+180|0;Qd=Og+176|0;ia=Og+172|0;Sb=Og+168|0;Bc=Og+164|0;ib=Og+160|0;Ba=Og+156|0;Ac=Og+152|0;yc=Og+148|0;Gc=Og+144|0;bb=Og+140|0;Tb=Og+136|0;Oa=Og+132|0;mb=Og+128|0;Xb=Og+124|0;Fc=Og+120|0;Za=Og+116|0;nb=Og+112|0;ra=Og+108|0;Aa=Og+104|0;da=Og+100|0;Na=Og+96|0;Yb=Og+92|0;xc=Og+88|0;$a=Og+84|0;ab=Og+80|0;Vb=Og+76|0;Wb=Og+72|0;Nb=Og+68|0;Ya=Og+64|0;Ca=Og+60|0;_a=Og+56|0;lb=Og+52|0;Pb=Og+48|0;jb=Og+44|0;kb=Og+40|0;Qb=Og+36|0;Rb=Og+32|0;Ub=Og+28|0;zc=Og+24|0;Ec=Og+20|0;Hc=Og+16|0;Cc=Og+12|0;Dc=Og+8|0;Ic=Og+4|0;Jc=Og;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[Pg>>2]=k;c[u>>2]=l;c[v>>2]=m;g[Og+1636>>2]=.7730104327201843;g[Og+1632>>2]=.6343932747840881;g[Og+1628>>2]=.0980171412229538;g[Og+1624>>2]=.9951847195625305;g[Og+1620>>2]=.290284663438797;g[Og+1616>>2]=.9569403529167175;g[Og+1612>>2]=.4713967442512512;g[Og+1608>>2]=.8819212913513184;g[Og+1604>>2]=.19509032368659973;g[Og+1600>>2]=.9807852506637573;g[Og+1596>>2]=.5555702447891235;g[Og+1592>>2]=.8314695954322815;g[Og+1588>>2]=.3826834261417389;g[Og+1584>>2]=.9238795042037964;g[Og+1580>>2]=.7071067690849304;c[Ng>>2]=c[Pg>>2];while(1){if((c[Ng>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[Fa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<4<<2)>>2];g[Ob>>2]=+g[w>>2]+ +g[Fa>>2];g[y>>2]=+g[w>>2]-+g[Fa>>2];g[og>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*28<<2)>>2];g[pg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[qg>>2]=+g[og>>2]+ +g[pg>>2];g[A>>2]=+g[og>>2]-+g[pg>>2];g[Xc>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[ee>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*24<<2)>>2];g[of>>2]=+g[Xc>>2]+ +g[ee>>2];g[gb>>2]=+g[Xc>>2]-+g[ee>>2];g[Nf>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Of>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*20<<2)>>2];g[ng>>2]=+g[Nf>>2]+ +g[Of>>2];g[z>>2]=+g[Nf>>2]-+g[Of>>2];g[Rd>>2]=+g[Ob>>2]-+g[of>>2];g[Pe>>2]=+g[qg>>2]-+g[ng>>2];g[B>>2]=(+g[z>>2]+ +g[A>>2])*.7071067690849304;g[C>>2]=+g[y>>2]+ +g[B>>2];g[Kc>>2]=+g[y>>2]-+g[B>>2];g[fb>>2]=(+g[A>>2]-+g[z>>2])*.7071067690849304;g[hb>>2]=+g[fb>>2]-+g[gb>>2];g[nc>>2]=+g[gb>>2]+ +g[fb>>2];g[Mf>>2]=+g[Ob>>2]+ +g[of>>2];g[rg>>2]=+g[ng>>2]+ +g[qg>>2];g[sg>>2]=+g[Mf>>2]+ +g[rg>>2];g[U>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*28<<2)>>2];g[V>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[W>>2]=+g[U>>2]-+g[V>>2];g[pd>>2]=+g[U>>2]+ +g[V>>2];g[ea>>2]=+g[c[o>>2]>>2];g[Ga>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<4<<2)>>2];g[Ha>>2]=+g[ea>>2]-+g[Ga>>2];g[ce>>2]=+g[ea>>2]+ +g[Ga>>2];g[X>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Y>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*20<<2)>>2];g[Z>>2]=+g[X>>2]-+g[Y>>2];g[qd>>2]=+g[X>>2]+ +g[Y>>2];g[$>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[aa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*24<<2)>>2];g[ba>>2]=+g[$>>2]-+g[aa>>2];g[de>>2]=+g[$>>2]+ +g[aa>>2];g[gd>>2]=+g[ce>>2]-+g[de>>2];g[rd>>2]=+g[pd>>2]-+g[qd>>2];g[_>>2]=(+g[W>>2]-+g[Z>>2])*.7071067690849304;g[ca>>2]=+g[_>>2]-+g[ba>>2];g[_b>>2]=+g[ba>>2]+ +g[_>>2];g[Ia>>2]=(+g[Z>>2]+ +g[W>>2])*.7071067690849304;g[Ja>>2]=+g[Ha>>2]+ +g[Ia>>2];g[Vc>>2]=+g[Ha>>2]-+g[Ia>>2];g[ve>>2]=+g[ce>>2]+ +g[de>>2];g[we>>2]=+g[qd>>2]+ +g[pd>>2];g[xe>>2]=+g[ve>>2]+ +g[we>>2];g[vf>>2]=+g[ve>>2]-+g[we>>2];g[Pa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*31<<2)>>2];g[ob>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[pb>>2]=+g[Pa>>2]-+g[ob>>2];g[vd>>2]=+g[Pa>>2]+ +g[ob>>2];g[Ua>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Va>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*23<<2)>>2];g[Wa>>2]=+g[Ua>>2]-+g[Va>>2];g[wd>>2]=+g[Ua>>2]+ +g[Va>>2];g[qb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[rb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*19<<2)>>2];g[sb>>2]=+g[qb>>2]-+g[rb>>2];g[ge>>2]=+g[qb>>2]+ +g[rb>>2];g[tb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*27<<2)>>2];g[ub>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[vb>>2]=+g[tb>>2]-+g[ub>>2];g[fe>>2]=+g[tb>>2]+ +g[ub>>2];g[xd>>2]=+g[vd>>2]-+g[wd>>2];g[he>>2]=+g[fe>>2]-+g[ge>>2];g[wb>>2]=(+g[sb>>2]+ +g[vb>>2])*.7071067690849304;g[xb>>2]=+g[pb>>2]+ +g[wb>>2];g[cc>>2]=+g[pb>>2]-+g[wb>>2];g[Ta>>2]=(+g[vb>>2]-+g[sb>>2])*.7071067690849304;g[Xa>>2]=+g[Ta>>2]-+g[Wa>>2];g[fc>>2]=+g[Wa>>2]+ +g[Ta>>2];g[Ce>>2]=+g[vd>>2]+ +g[wd>>2];g[De>>2]=+g[ge>>2]+ +g[fe>>2];g[Ee>>2]=+g[Ce>>2]+ +g[De>>2];g[yf>>2]=+g[Ce>>2]-+g[De>>2];g[_f>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*31<<2)>>2];g[$f>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[ag>>2]=+g[_f>>2]+ +g[$f>>2];g[ja>>2]=+g[_f>>2]-+g[$f>>2];g[ig>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*27<<2)>>2];g[jg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[kg>>2]=+g[ig>>2]+ +g[jg>>2];g[la>>2]=+g[ig>>2]-+g[jg>>2];g[bg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[cg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*23<<2)>>2];g[dg>>2]=+g[bg>>2]+ +g[cg>>2];g[pa>>2]=+g[bg>>2]-+g[cg>>2];g[fg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[gg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*19<<2)>>2];g[hg>>2]=+g[fg>>2]+ +g[gg>>2];g[ka>>2]=+g[fg>>2]-+g[gg>>2];g[ma>>2]=(+g[ka>>2]+ +g[la>>2])*.7071067690849304;g[na>>2]=+g[ja>>2]+ +g[ma>>2];g[Qc>>2]=+g[ja>>2]-+g[ma>>2];g[oa>>2]=(+g[la>>2]-+g[ka>>2])*.7071067690849304;g[qa>>2]=+g[oa>>2]-+g[pa>>2];g[Rc>>2]=+g[pa>>2]+ +g[oa>>2];g[eg>>2]=+g[ag>>2]+ +g[dg>>2];g[lg>>2]=+g[hg>>2]+ +g[kg>>2];g[mg>>2]=+g[eg>>2]+ +g[lg>>2];g[sf>>2]=+g[eg>>2]-+g[lg>>2];g[Zd>>2]=+g[ag>>2]-+g[dg>>2];g[_d>>2]=+g[kg>>2]-+g[hg>>2];g[$d>>2]=+g[Zd>>2]*.9238795042037964-+g[_d>>2]*.3826834261417389;g[ne>>2]=+g[Zd>>2]*.3826834261417389+ +g[_d>>2]*.9238795042037964;g[Jg>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Kg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*17<<2)>>2];g[Lg>>2]=+g[Jg>>2]+ +g[Kg>>2];g[xa>>2]=+g[Jg>>2]-+g[Kg>>2];g[Vf>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*29<<2)>>2];g[Wf>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[Xf>>2]=+g[Vf>>2]+ +g[Wf>>2];g[sa>>2]=+g[Vf>>2]-+g[Wf>>2];g[Mg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Pf>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*25<<2)>>2];g[Qf>>2]=+g[Mg>>2]+ +g[Pf>>2];g[va>>2]=+g[Mg>>2]-+g[Pf>>2];g[Sf>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Tf>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*21<<2)>>2];g[Uf>>2]=+g[Sf>>2]+ +g[Tf>>2];g[ta>>2]=+g[Sf>>2]-+g[Tf>>2];g[ua>>2]=(+g[sa>>2]-+g[ta>>2])*.7071067690849304;g[wa>>2]=+g[ua>>2]-+g[va>>2];g[Oc>>2]=+g[va>>2]+ +g[ua>>2];g[ya>>2]=(+g[ta>>2]+ +g[sa>>2])*.7071067690849304;g[za>>2]=+g[xa>>2]+ +g[ya>>2];g[Nc>>2]=+g[xa>>2]-+g[ya>>2];g[Rf>>2]=+g[Lg>>2]+ +g[Qf>>2];g[Yf>>2]=+g[Uf>>2]+ +g[Xf>>2];g[Zf>>2]=+g[Rf>>2]+ +g[Yf>>2];g[rf>>2]=+g[Rf>>2]-+g[Yf>>2];g[Wd>>2]=+g[Lg>>2]-+g[Qf>>2];g[Xd>>2]=+g[Xf>>2]-+g[Uf>>2];g[Yd>>2]=+g[Wd>>2]*.9238795042037964+ +g[Xd>>2]*.3826834261417389;g[me>>2]=+g[Xd>>2]*.9238795042037964-+g[Wd>>2]*.3826834261417389;g[tg>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ug>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*18<<2)>>2];g[vg>>2]=+g[tg>>2]+ +g[ug>>2];g[D>>2]=+g[tg>>2]-+g[ug>>2];g[Dg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Eg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*22<<2)>>2];g[Fg>>2]=+g[Dg>>2]+ +g[Eg>>2];g[fa>>2]=+g[Dg>>2]-+g[Eg>>2];g[wg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[xg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*26<<2)>>2];g[yg>>2]=+g[wg>>2]+ +g[xg>>2];g[E>>2]=+g[wg>>2]-+g[xg>>2];g[Ag>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*30<<2)>>2];g[Bg>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Cg>>2]=+g[Ag>>2]+ +g[Bg>>2];g[G>>2]=+g[Ag>>2]-+g[Bg>>2];g[zg>>2]=+g[vg>>2]+ +g[yg>>2];g[Gg>>2]=+g[Cg>>2]+ +g[Fg>>2];g[Hg>>2]=+g[zg>>2]+ +g[Gg>>2];g[Sd>>2]=+g[vg>>2]-+g[yg>>2];g[Td>>2]=+g[Cg>>2]-+g[Fg>>2];g[Ud>>2]=(+g[Sd>>2]+ +g[Td>>2])*.7071067690849304;g[Qe>>2]=(+g[Td>>2]-+g[Sd>>2])*.7071067690849304;g[F>>2]=+g[D>>2]*.9238795042037964-+g[E>>2]*.3826834261417389;g[ga>>2]=+g[G>>2]*.9238795042037964+ +g[fa>>2]*.3826834261417389;g[ha>>2]=+g[F>>2]+ +g[ga>>2];g[oc>>2]=+g[ga>>2]-+g[F>>2];g[cb>>2]=+g[G>>2]*.3826834261417389-+g[fa>>2]*.9238795042037964;g[db>>2]=+g[D>>2]*.3826834261417389+ +g[E>>2]*.9238795042037964;g[eb>>2]=+g[cb>>2]-+g[db>>2];g[Lc>>2]=+g[db>>2]+ +g[cb>>2];g[Da>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*30<<2)>>2];g[Ea>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[H>>2]=+g[Da>>2]-+g[Ea>>2];g[kd>>2]=+g[Da>>2]+ +g[Ea>>2];g[P>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Q>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*26<<2)>>2];g[R>>2]=+g[P>>2]-+g[Q>>2];g[id>>2]=+g[P>>2]+ +g[Q>>2];g[I>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[J>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*22<<2)>>2];g[K>>2]=+g[I>>2]-+g[J>>2];g[ld>>2]=+g[I>>2]+ +g[J>>2];g[M>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[N>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*18<<2)>>2];g[O>>2]=+g[M>>2]-+g[N>>2];g[hd>>2]=+g[M>>2]+ +g[N>>2];g[L>>2]=+g[H>>2]*.3826834261417389-+g[K>>2]*.9238795042037964;g[S>>2]=+g[O>>2]*.3826834261417389+ +g[R>>2]*.9238795042037964;g[T>>2]=+g[L>>2]-+g[S>>2];g[Wc>>2]=+g[S>>2]+ +g[L>>2];g[ye>>2]=+g[hd>>2]+ +g[id>>2];g[ze>>2]=+g[kd>>2]+ +g[ld>>2];g[Ae>>2]=+g[ye>>2]+ +g[ze>>2];g[wf>>2]=+g[ze>>2]-+g[ye>>2];g[Ka>>2]=+g[O>>2]*.9238795042037964-+g[R>>2]*.3826834261417389;g[La>>2]=+g[H>>2]*.9238795042037964+ +g[K>>2]*.3826834261417389;g[Ma>>2]=+g[Ka>>2]+ +g[La>>2];g[$b>>2]=+g[La>>2]-+g[Ka>>2];g[jd>>2]=+g[hd>>2]-+g[id>>2];g[md>>2]=+g[kd>>2]-+g[ld>>2];g[nd>>2]=(+g[jd>>2]+ +g[md>>2])*.7071067690849304;g[sd>>2]=(+g[md>>2]-+g[jd>>2])*.7071067690849304;g[yb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[zb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*17<<2)>>2];g[Ab>>2]=+g[yb>>2]-+g[zb>>2];g[yd>>2]=+g[yb>>2]+ +g[zb>>2];g[Ib>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Jb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*21<<2)>>2];g[Kb>>2]=+g[Ib>>2]-+g[Jb>>2];g[Cd>>2]=+g[Ib>>2]+ +g[Jb>>2];g[Bb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Cb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*25<<2)>>2];g[Db>>2]=+g[Bb>>2]-+g[Cb>>2];g[zd>>2]=+g[Bb>>2]+ +g[Cb>>2];g[Fb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*29<<2)>>2];g[Gb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[Hb>>2]=+g[Fb>>2]-+g[Gb>>2];g[Bd>>2]=+g[Fb>>2]+ +g[Gb>>2];g[Eb>>2]=+g[Ab>>2]*.9238795042037964-+g[Db>>2]*.3826834261417389;g[Lb>>2]=+g[Hb>>2]*.9238795042037964+ +g[Kb>>2]*.3826834261417389;g[Mb>>2]=+g[Eb>>2]+ +g[Lb>>2];g[gc>>2]=+g[Lb>>2]-+g[Eb>>2];g[Fe>>2]=+g[yd>>2]+ +g[zd>>2];g[Ge>>2]=+g[Bd>>2]+ +g[Cd>>2];g[He>>2]=+g[Fe>>2]+ +g[Ge>>2];g[zf>>2]=+g[Ge>>2]-+g[Fe>>2];g[Qa>>2]=+g[Hb>>2]*.3826834261417389-+g[Kb>>2]*.9238795042037964;g[Ra>>2]=+g[Ab>>2]*.3826834261417389+ +g[Db>>2]*.9238795042037964;g[Sa>>2]=+g[Qa>>2]-+g[Ra>>2];g[dc>>2]=+g[Ra>>2]+ +g[Qa>>2];g[Ad>>2]=+g[yd>>2]-+g[zd>>2];g[Dd>>2]=+g[Bd>>2]-+g[Cd>>2];g[Ed>>2]=(+g[Ad>>2]+ +g[Dd>>2])*.7071067690849304;g[ie>>2]=(+g[Dd>>2]-+g[Ad>>2])*.7071067690849304;g[Ig>>2]=+g[sg>>2]+ +g[Hg>>2];g[x>>2]=+g[Zf>>2]+ +g[mg>>2];g[Me>>2]=+g[Ig>>2]+ +g[x>>2];g[Ne>>2]=+g[xe>>2]+ +g[Ae>>2];g[Oe>>2]=+g[Ee>>2]+ +g[He>>2];g[pf>>2]=+g[Ne>>2]+ +g[Oe>>2];g[(c[p>>2]|0)+(c[s>>2]<<4<<2)>>2]=+g[Ig>>2]-+g[x>>2];g[(c[q>>2]|0)+(c[t>>2]<<4<<2)>>2]=+g[Oe>>2]-+g[Ne>>2];g[(c[p>>2]|0)+(c[s>>2]<<5<<2)>>2]=+g[Me>>2]-+g[pf>>2];g[c[p>>2]>>2]=+g[Me>>2]+ +g[pf>>2];g[ue>>2]=+g[sg>>2]-+g[Hg>>2];g[Ke>>2]=+g[mg>>2]-+g[Zf>>2];g[Be>>2]=+g[xe>>2]-+g[Ae>>2];g[Ie>>2]=+g[Ee>>2]-+g[He>>2];g[Je>>2]=(+g[Be>>2]+ +g[Ie>>2])*.7071067690849304;g[Le>>2]=(+g[Ie>>2]-+g[Be>>2])*.7071067690849304;g[(c[p>>2]|0)+((c[s>>2]|0)*24<<2)>>2]=+g[ue>>2]-+g[Je>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*24<<2)>>2]=+g[Le>>2]-+g[Ke>>2];g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2]=+g[ue>>2]+ +g[Je>>2];g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2]=+g[Ke>>2]+ +g[Le>>2];g[qf>>2]=+g[Mf>>2]-+g[rg>>2];g[tf>>2]=(+g[rf>>2]+ +g[sf>>2])*.7071067690849304;g[uf>>2]=+g[qf>>2]+ +g[tf>>2];g[Kf>>2]=+g[qf>>2]-+g[tf>>2];g[Hf>>2]=+g[wf>>2]*.9238795042037964-+g[vf>>2]*.3826834261417389;g[If>>2]=+g[yf>>2]*.3826834261417389+ +g[zf>>2]*.9238795042037964;g[Jf>>2]=+g[Hf>>2]+ +g[If>>2];g[Lf>>2]=+g[If>>2]-+g[Hf>>2];g[xf>>2]=+g[vf>>2]*.9238795042037964+ +g[wf>>2]*.3826834261417389;g[Af>>2]=+g[yf>>2]*.9238795042037964-+g[zf>>2]*.3826834261417389;g[Bf>>2]=+g[xf>>2]+ +g[Af>>2];g[Ff>>2]=+g[Af>>2]-+g[xf>>2];g[Cf>>2]=(+g[sf>>2]-+g[rf>>2])*.7071067690849304;g[Df>>2]=+g[Gg>>2]-+g[zg>>2];g[Ef>>2]=+g[Cf>>2]-+g[Df>>2];g[Gf>>2]=+g[Df>>2]+ +g[Cf>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*28<<2)>>2]=+g[uf>>2]-+g[Bf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*28<<2)>>2]=+g[Jf>>2]-+g[Gf>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[uf>>2]+ +g[Bf>>2];g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=+g[Gf>>2]+ +g[Jf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*12<<2)>>2]=+g[Ef>>2]+ +g[Ff>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*12<<2)>>2]=+g[Kf>>2]+ +g[Lf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*20<<2)>>2]=+g[Ff>>2]-+g[Ef>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*20<<2)>>2]=+g[Kf>>2]-+g[Lf>>2];g[_e>>2]=+g[Rd>>2]-+g[Ud>>2];g[$e>>2]=+g[ne>>2]-+g[me>>2];g[af>>2]=+g[_e>>2]+ +g[$e>>2];g[se>>2]=+g[_e>>2]-+g[$e>>2];g[jf>>2]=+g[$d>>2]-+g[Yd>>2];g[kf>>2]=+g[Qe>>2]-+g[Pe>>2];g[lf>>2]=+g[jf>>2]-+g[kf>>2];g[nf>>2]=+g[kf>>2]+ +g[jf>>2];g[bf>>2]=+g[gd>>2]-+g[nd>>2];g[cf>>2]=+g[sd>>2]-+g[rd>>2];g[df>>2]=+g[bf>>2]*.8314695954322815+ +g[cf>>2]*.5555702447891235;g[pe>>2]=+g[cf>>2]*.8314695954322815-+g[bf>>2]*.5555702447891235;g[ef>>2]=+g[xd>>2]-+g[Ed>>2];g[ff>>2]=+g[ie>>2]-+g[he>>2];g[gf>>2]=+g[ef>>2]*.8314695954322815-+g[ff>>2]*.5555702447891235;g[qe>>2]=+g[ef>>2]*.5555702447891235+ +g[ff>>2]*.8314695954322815;g[hf>>2]=+g[df>>2]+ +g[gf>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*26<<2)>>2]=+g[af>>2]-+g[hf>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2]=+g[af>>2]+ +g[hf>>2];g[re>>2]=+g[pe>>2]+ +g[qe>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2]=+g[nf>>2]+ +g[re>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*26<<2)>>2]=+g[re>>2]-+g[nf>>2];g[mf>>2]=+g[gf>>2]-+g[df>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*10<<2)>>2]=+g[lf>>2]+ +g[mf>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*22<<2)>>2]=+g[mf>>2]-+g[lf>>2];g[te>>2]=+g[qe>>2]-+g[pe>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*22<<2)>>2]=+g[se>>2]-+g[te>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*10<<2)>>2]=+g[se>>2]+ +g[te>>2];g[Vd>>2]=+g[Rd>>2]+ +g[Ud>>2];g[ae>>2]=+g[Yd>>2]+ +g[$d>>2];g[be>>2]=+g[Vd>>2]+ +g[ae>>2];g[Ye>>2]=+g[Vd>>2]-+g[ae>>2];g[oe>>2]=+g[me>>2]+ +g[ne>>2];g[Re>>2]=+g[Pe>>2]+ +g[Qe>>2];g[Se>>2]=+g[oe>>2]-+g[Re>>2];g[Ue>>2]=+g[Re>>2]+ +g[oe>>2];g[od>>2]=+g[gd>>2]+ +g[nd>>2];g[td>>2]=+g[rd>>2]+ +g[sd>>2];g[ud>>2]=+g[od>>2]*.9807852506637573+ +g[td>>2]*.19509032368659973;g[Ve>>2]=+g[td>>2]*.9807852506637573-+g[od>>2]*.19509032368659973;g[Fd>>2]=+g[xd>>2]+ +g[Ed>>2];g[je>>2]=+g[he>>2]+ +g[ie>>2];g[ke>>2]=+g[Fd>>2]*.9807852506637573-+g[je>>2]*.19509032368659973;g[We>>2]=+g[Fd>>2]*.19509032368659973+ +g[je>>2]*.9807852506637573;g[le>>2]=+g[ud>>2]+ +g[ke>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*30<<2)>>2]=+g[be>>2]-+g[le>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[be>>2]+ +g[le>>2];g[Xe>>2]=+g[Ve>>2]+ +g[We>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[Ue>>2]+ +g[Xe>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*30<<2)>>2]=+g[Xe>>2]-+g[Ue>>2];g[Te>>2]=+g[ke>>2]-+g[ud>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*14<<2)>>2]=+g[Se>>2]+ +g[Te>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*18<<2)>>2]=+g[Te>>2]-+g[Se>>2];g[Ze>>2]=+g[We>>2]-+g[Ve>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*18<<2)>>2]=+g[Ye>>2]-+g[Ze>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*14<<2)>>2]=+g[Ye>>2]+ +g[Ze>>2];g[Mc>>2]=+g[Kc>>2]+ +g[Lc>>2];g[Zc>>2]=+g[Kc>>2]-+g[Lc>>2];g[Id>>2]=+g[oc>>2]-+g[nc>>2];g[pc>>2]=+g[nc>>2]+ +g[oc>>2];g[Pc>>2]=+g[Nc>>2]*.8314695954322815+ +g[Oc>>2]*.5555702447891235;g[Sc>>2]=+g[Qc>>2]*.8314695954322815-+g[Rc>>2]*.5555702447891235;g[Tc>>2]=+g[Pc>>2]+ +g[Sc>>2];g[Hd>>2]=+g[Sc>>2]-+g[Pc>>2];g[dd>>2]=+g[cc>>2]-+g[dc>>2];g[ed>>2]=+g[gc>>2]-+g[fc>>2];g[fd>>2]=+g[dd>>2]*.8819212913513184-+g[ed>>2]*.4713967442512512;g[Nd>>2]=+g[dd>>2]*.4713967442512512+ +g[ed>>2]*.8819212913513184;g[kc>>2]=+g[Oc>>2]*.8314695954322815-+g[Nc>>2]*.5555702447891235;g[lc>>2]=+g[Qc>>2]*.5555702447891235+ +g[Rc>>2]*.8314695954322815;g[mc>>2]=+g[kc>>2]+ +g[lc>>2];g[_c>>2]=+g[lc>>2]-+g[kc>>2];g[Zb>>2]=+g[Vc>>2]+ +g[Wc>>2];g[ac>>2]=+g[_b>>2]+ +g[$b>>2];g[bc>>2]=+g[Zb>>2]*.9569403529167175+ +g[ac>>2]*.290284663438797;g[tc>>2]=+g[ac>>2]*.9569403529167175-+g[Zb>>2]*.290284663438797;g[ad>>2]=+g[Vc>>2]-+g[Wc>>2];g[bd>>2]=+g[$b>>2]-+g[_b>>2];g[cd>>2]=+g[ad>>2]*.8819212913513184+ +g[bd>>2]*.4713967442512512;g[Md>>2]=+g[bd>>2]*.8819212913513184-+g[ad>>2]*.4713967442512512;g[ec>>2]=+g[cc>>2]+ +g[dc>>2];g[hc>>2]=+g[fc>>2]+ +g[gc>>2];g[ic>>2]=+g[ec>>2]*.9569403529167175-+g[hc>>2]*.290284663438797;g[uc>>2]=+g[ec>>2]*.290284663438797+ +g[hc>>2]*.9569403529167175;g[Uc>>2]=+g[Mc>>2]+ +g[Tc>>2];g[jc>>2]=+g[bc>>2]+ +g[ic>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*29<<2)>>2]=+g[Uc>>2]-+g[jc>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[Uc>>2]+ +g[jc>>2];g[sc>>2]=+g[pc>>2]+ +g[mc>>2];g[vc>>2]=+g[tc>>2]+ +g[uc>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[sc>>2]+ +g[vc>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*29<<2)>>2]=+g[vc>>2]-+g[sc>>2];g[qc>>2]=+g[mc>>2]-+g[pc>>2];g[rc>>2]=+g[ic>>2]-+g[bc>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*13<<2)>>2]=+g[qc>>2]+ +g[rc>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*19<<2)>>2]=+g[rc>>2]-+g[qc>>2];g[wc>>2]=+g[Mc>>2]-+g[Tc>>2];g[Yc>>2]=+g[uc>>2]-+g[tc>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*19<<2)>>2]=+g[wc>>2]-+g[Yc>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*13<<2)>>2]=+g[wc>>2]+ +g[Yc>>2];g[$c>>2]=+g[Zc>>2]+ +g[_c>>2];g[Gd>>2]=+g[cd>>2]+ +g[fd>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*27<<2)>>2]=+g[$c>>2]-+g[Gd>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2]=+g[$c>>2]+ +g[Gd>>2];g[Ld>>2]=+g[Id>>2]+ +g[Hd>>2];g[Od>>2]=+g[Md>>2]+ +g[Nd>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2]=+g[Ld>>2]+ +g[Od>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*27<<2)>>2]=+g[Od>>2]-+g[Ld>>2];g[Jd>>2]=+g[Hd>>2]-+g[Id>>2];g[Kd>>2]=+g[fd>>2]-+g[cd>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*11<<2)>>2]=+g[Jd>>2]+ +g[Kd>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*21<<2)>>2]=+g[Kd>>2]-+g[Jd>>2];g[Pd>>2]=+g[Zc>>2]-+g[_c>>2];g[Qd>>2]=+g[Nd>>2]-+g[Md>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*21<<2)>>2]=+g[Pd>>2]-+g[Qd>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*11<<2)>>2]=+g[Pd>>2]+ +g[Qd>>2];g[ia>>2]=+g[C>>2]-+g[ha>>2];g[Sb>>2]=+g[C>>2]+ +g[ha>>2];g[Bc>>2]=+g[hb>>2]+ +g[eb>>2];g[ib>>2]=+g[eb>>2]-+g[hb>>2];g[ra>>2]=+g[na>>2]*.19509032368659973+ +g[qa>>2]*.9807852506637573;g[Aa>>2]=+g[wa>>2]*.9807852506637573-+g[za>>2]*.19509032368659973;g[Ba>>2]=+g[ra>>2]-+g[Aa>>2];g[Ac>>2]=+g[Aa>>2]+ +g[ra>>2];g[Yb>>2]=+g[xb>>2]+ +g[Mb>>2];g[xc>>2]=+g[Xa>>2]+ +g[Sa>>2];g[yc>>2]=+g[Yb>>2]*.9951847195625305-+g[xc>>2]*.0980171412229538;g[Gc>>2]=+g[xc>>2]*.9951847195625305+ +g[Yb>>2]*.0980171412229538;g[$a>>2]=+g[na>>2]*.9807852506637573-+g[qa>>2]*.19509032368659973;g[ab>>2]=+g[za>>2]*.9807852506637573+ +g[wa>>2]*.19509032368659973;g[bb>>2]=+g[$a>>2]-+g[ab>>2];g[Tb>>2]=+g[ab>>2]+ +g[$a>>2];g[da>>2]=+g[T>>2]-+g[ca>>2];g[Na>>2]=+g[Ja>>2]-+g[Ma>>2];g[Oa>>2]=+g[da>>2]*.6343932747840881+ +g[Na>>2]*.7730104327201843;g[mb>>2]=+g[da>>2]*.7730104327201843-+g[Na>>2]*.6343932747840881;g[Vb>>2]=+g[ca>>2]+ +g[T>>2];g[Wb>>2]=+g[Ja>>2]+ +g[Ma>>2];g[Xb>>2]=+g[Vb>>2]*.0980171412229538+ +g[Wb>>2]*.9951847195625305;g[Fc>>2]=+g[Vb>>2]*.9951847195625305-+g[Wb>>2]*.0980171412229538;g[Nb>>2]=+g[xb>>2]-+g[Mb>>2];g[Ya>>2]=+g[Sa>>2]-+g[Xa>>2];g[Za>>2]=+g[Nb>>2]*.7730104327201843-+g[Ya>>2]*.6343932747840881;g[nb>>2]=+g[Ya>>2]*.7730104327201843+ +g[Nb>>2]*.6343932747840881;g[Ca>>2]=+g[ia>>2]+ +g[Ba>>2];g[_a>>2]=+g[Oa>>2]+ +g[Za>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*25<<2)>>2]=+g[Ca>>2]-+g[_a>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2]=+g[Ca>>2]+ +g[_a>>2];g[lb>>2]=+g[ib>>2]+ +g[bb>>2];g[Pb>>2]=+g[mb>>2]+ +g[nb>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2]=+g[lb>>2]+ +g[Pb>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*25<<2)>>2]=+g[Pb>>2]-+g[lb>>2];g[jb>>2]=+g[bb>>2]-+g[ib>>2];g[kb>>2]=+g[Za>>2]-+g[Oa>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2]=+g[jb>>2]+ +g[kb>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*23<<2)>>2]=+g[kb>>2]-+g[jb>>2];g[Qb>>2]=+g[ia>>2]-+g[Ba>>2];g[Rb>>2]=+g[nb>>2]-+g[mb>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*23<<2)>>2]=+g[Qb>>2]-+g[Rb>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2]=+g[Qb>>2]+ +g[Rb>>2];g[Ub>>2]=+g[Sb>>2]+ +g[Tb>>2];g[zc>>2]=+g[Xb>>2]+ +g[yc>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*31<<2)>>2]=+g[Ub>>2]-+g[zc>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[Ub>>2]+ +g[zc>>2];g[Ec>>2]=+g[Bc>>2]+ +g[Ac>>2];g[Hc>>2]=+g[Fc>>2]+ +g[Gc>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[Ec>>2]+ +g[Hc>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*31<<2)>>2]=+g[Hc>>2]-+g[Ec>>2];g[Cc>>2]=+g[Ac>>2]-+g[Bc>>2];g[Dc>>2]=+g[yc>>2]-+g[Xb>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*15<<2)>>2]=+g[Cc>>2]+ +g[Dc>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*17<<2)>>2]=+g[Dc>>2]-+g[Cc>>2];g[Ic>>2]=+g[Sb>>2]-+g[Tb>>2];g[Jc>>2]=+g[Gc>>2]-+g[Fc>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*17<<2)>>2]=+g[Ic>>2]-+g[Jc>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*15<<2)>>2]=+g[Ic>>2]+ +g[Jc>>2];c[Ng>>2]=(c[Ng>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=Og;return}function pt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,33,7192);i=b;return}function qt(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0;L=i;i=i+112|0;n=L+104|0;o=L+100|0;p=L+96|0;q=L+92|0;r=L+88|0;s=L+84|0;t=L+80|0;M=L+76|0;u=L+72|0;v=L+68|0;K=L+56|0;y=L+52|0;I=L+48|0;E=L+44|0;H=L+40|0;B=L+36|0;G=L+32|0;w=L+28|0;x=L+24|0;F=L+20|0;J=L+16|0;C=L+12|0;D=L+8|0;z=L+4|0;A=L;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[M>>2]=k;c[u>>2]=l;c[v>>2]=m;g[L+64>>2]=.5;g[L+60>>2]=.8660253882408142;c[K>>2]=c[M>>2];while(1){if((c[K>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[x>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[I>>2]=+g[w>>2]+ +g[x>>2];g[C>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[D>>2]=+g[c[o>>2]>>2];g[E>>2]=+g[C>>2]-+g[D>>2];g[H>>2]=+g[C>>2]+ +g[D>>2];g[z>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[A>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[G>>2]=+g[z>>2]+ +g[A>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=(+g[E>>2]-+g[B>>2])*.8660253882408142;g[F>>2]=+g[B>>2]+ +g[E>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[y>>2]-+g[F>>2]*.5;g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[y>>2]+ +g[F>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=(+g[G>>2]-+g[H>>2])*.8660253882408142;g[J>>2]=+g[G>>2]+ +g[H>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[I>>2]-+g[J>>2]*.5;g[c[p>>2]>>2]=+g[I>>2]+ +g[J>>2];c[K>>2]=(c[K>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=L;return}function rt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,34,7240);i=b;return} +function Ov(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0;Ah=i;i=i+1936|0;n=Ah+1932|0;o=Ah+1928|0;p=Ah+1924|0;q=Ah+1920|0;r=Ah+1916|0;s=Ah+1912|0;t=Ah+1908|0;Bh=Ah+1904|0;u=Ah+1900|0;v=Ah+1896|0;zh=Ah+1736|0;C=Ah+1732|0;Qc=Ah+1728|0;rc=Ah+1724|0;bb=Ah+1720|0;eh=Ah+1716|0;_f=Ah+1712|0;rg=Ah+1708|0;Qf=Ah+1704|0;rd=Ah+1700|0;ye=Ah+1696|0;la=Ah+1692|0;sc=Ah+1688|0;df=Ah+1684|0;Je=Ah+1680|0;Ya=Ah+1676|0;Rc=Ah+1672|0;th=Ah+1668|0;sg=Ah+1664|0;Bd=Ah+1660|0;ff=Ah+1656|0;bg=Ah+1652|0;Pf=Ah+1648|0;xa=Ah+1644|0;Ta=Ah+1640|0;K=Ah+1636|0;Ua=Ah+1632|0;_b=Ah+1628|0;vc=Ah+1624|0;wd=Ah+1620|0;ef=Ah+1616|0;Vc=Ah+1612|0;uc=Ah+1608|0;Lg=Ah+1604|0;dg=Ah+1600|0;me=Ah+1596|0;Ce=Ah+1592|0;gg=Ah+1588|0;Mf=Ah+1584|0;ba=Ah+1580|0;Tb=Ah+1576|0;La=Ah+1572|0;Ub=Ah+1568|0;gc=Ah+1564|0;Pd=Ah+1560|0;he=Ah+1556|0;Be=Ah+1552|0;dc=Ah+1548|0;Od=Ah+1544|0;_g=Ah+1540|0;ig=Ah+1536|0;Xe=Ah+1532|0;Fe=Ah+1528|0;lg=Ah+1524|0;Lf=Ah+1520|0;Bb=Ah+1516|0;Wb=Ah+1512|0;Kb=Ah+1508|0;Xb=Ah+1504|0;nc=Ah+1500|0;Sd=Ah+1496|0;Se=Ah+1492|0;Ee=Ah+1488|0;kc=Ah+1484|0;Rd=Ah+1480|0;Ob=Ah+1476|0;y=Ah+1472|0;ab=Ah+1468|0;af=Ah+1464|0;of=Ah+1460|0;Za=Ah+1456|0;B=Ah+1452|0;$e=Ah+1448|0;$g=Ah+1444|0;D=Ah+1440|0;G=Ah+1436|0;pd=Ah+1432|0;ch=Ah+1428|0;ga=Ah+1424|0;ja=Ah+1420|0;od=Ah+1416|0;w=Ah+1412|0;Fa=Ah+1408|0;_a=Ah+1404|0;$a=Ah+1400|0;Xc=Ah+1396|0;ee=Ah+1392|0;z=Ah+1388|0;A=Ah+1384|0;zg=Ah+1380|0;Ag=Ah+1376|0;E=Ah+1372|0;F=Ah+1368|0;ah=Ah+1364|0;bh=Ah+1360|0;ha=Ah+1356|0;ia=Ah+1352|0;xg=Ah+1348|0;dh=Ah+1344|0;fa=Ah+1340|0;ka=Ah+1336|0;pg=Ah+1332|0;qg=Ah+1328|0;nd=Ah+1324|0;qd=Ah+1320|0;bf=Ah+1316|0;cf=Ah+1312|0;Wa=Ah+1308|0;Xa=Ah+1304|0;hh=Ah+1300|0;na=Ah+1296|0;va=Ah+1292|0;td=Ah+1288|0;kh=Ah+1284|0;sa=Ah+1280|0;qa=Ah+1276|0;ud=Ah+1272|0;oh=Ah+1268|0;ya=Ah+1264|0;I=Ah+1260|0;yd=Ah+1256|0;rh=Ah+1252|0;Da=Ah+1248|0;Ba=Ah+1244|0;zd=Ah+1240|0;fh=Ah+1236|0;gh=Ah+1232|0;ta=Ah+1228|0;ua=Ah+1224|0;ih=Ah+1220|0;jh=Ah+1216|0;oa=Ah+1212|0;pa=Ah+1208|0;mh=Ah+1204|0;nh=Ah+1200|0;Ea=Ah+1196|0;H=Ah+1192|0;ph=Ah+1188|0;qh=Ah+1184|0;za=Ah+1180|0;Aa=Ah+1176|0;lh=Ah+1172|0;sh=Ah+1168|0;xd=Ah+1164|0;Ad=Ah+1160|0;$f=Ah+1156|0;ag=Ah+1152|0;ra=Ah+1148|0;wa=Ah+1144|0;Ca=Ah+1140|0;J=Ah+1136|0;Wc=Ah+1132|0;Zb=Ah+1128|0;sd=Ah+1124|0;vd=Ah+1120|0;Tc=Ah+1116|0;Uc=Ah+1112|0;xh=Ah+1108|0;N=Ah+1104|0;Ja=Ah+1100|0;je=Ah+1096|0;Cg=Ah+1092|0;Ga=Ah+1088|0;Q=Ah+1084|0;ke=Ah+1080|0;Jg=Ah+1076|0;Fd=Ah+1072|0;$=Ah+1068|0;da=Ah+1064|0;Gg=Ah+1060|0;fe=Ah+1056|0;W=Ah+1052|0;ca=Ah+1048|0;vh=Ah+1044|0;wh=Ah+1040|0;O=Ah+1036|0;P=Ah+1032|0;Ha=Ah+1028|0;Ia=Ah+1024|0;yh=Ah+1020|0;Bg=Ah+1016|0;Hg=Ah+1012|0;Ig=Ah+1008|0;X=Ah+1004|0;Y=Ah+1e3|0;Z=Ah+996|0;_=Ah+992|0;Eg=Ah+988|0;Fg=Ah+984|0;S=Ah+980|0;T=Ah+976|0;U=Ah+972|0;V=Ah+968|0;Dg=Ah+964|0;Kg=Ah+960|0;ie=Ah+956|0;le=Ah+952|0;eg=Ah+948|0;fg=Ah+944|0;R=Ah+940|0;aa=Ah+936|0;ea=Ah+932|0;Ka=Ah+928|0;ec=Ah+924|0;fc=Ah+920|0;Ed=Ah+916|0;ge=Ah+912|0;bc=Ah+908|0;cc=Ah+904|0;Og=Ah+900|0;Na=Ah+896|0;Ib=Ah+892|0;Ue=Ah+888|0;Rg=Ah+884|0;Fb=Ah+880|0;ob=Ah+876|0;Ve=Ah+872|0;Yg=Ah+868|0;Pe=Ah+864|0;zb=Ah+860|0;Db=Ah+856|0;Vg=Ah+852|0;Qe=Ah+848|0;ub=Ah+844|0;Cb=Ah+840|0;Mg=Ah+836|0;Ng=Ah+832|0;Oa=Ah+828|0;Pa=Ah+824|0;Gb=Ah+820|0;Hb=Ah+816|0;Pg=Ah+812|0;Qg=Ah+808|0;Wg=Ah+804|0;Xg=Ah+800|0;vb=Ah+796|0;wb=Ah+792|0;xb=Ah+788|0;yb=Ah+784|0;Tg=Ah+780|0;Ug=Ah+776|0;qb=Ah+772|0;rb=Ah+768|0;sb=Ah+764|0;tb=Ah+760|0;Sg=Ah+756|0;Zg=Ah+752|0;Te=Ah+748|0;We=Ah+744|0;jg=Ah+740|0;kg=Ah+736|0;pb=Ah+732|0;Ab=Ah+728|0;Eb=Ah+724|0;Jb=Ah+720|0;lc=Ah+716|0;mc=Ah+712|0;oe=Ah+708|0;Re=Ah+704|0;ic=Ah+700|0;jc=Ah+696|0;uh=Ah+692|0;x=Ah+688|0;Wf=Ah+684|0;Xf=Ah+680|0;Yf=Ah+676|0;yg=Ah+672|0;Of=Ah+668|0;Uf=Ah+664|0;Tf=Ah+660|0;Vf=Ah+656|0;Kf=Ah+652|0;Nf=Ah+648|0;Rf=Ah+644|0;Sf=Ah+640|0;cg=Ah+636|0;tg=Ah+632|0;Ff=Ah+628|0;Cf=Ah+624|0;wg=Ah+620|0;Df=Ah+616|0;ng=Ah+612|0;Gf=Ah+608|0;ug=Ah+604|0;vg=Ah+600|0;hg=Ah+596|0;mg=Ah+592|0;og=Ah+588|0;zf=Ah+584|0;If=Ah+580|0;Jf=Ah+576|0;Af=Ah+572|0;Bf=Ah+568|0;Ef=Ah+564|0;Hf=Ah+560|0;Ae=Ah+556|0;sf=Ah+552|0;Le=Ah+548|0;vf=Ah+544|0;He=Ah+540|0;wf=Ah+536|0;Oe=Ah+532|0;tf=Ah+528|0;ze=Ah+524|0;Ke=Ah+520|0;De=Ah+516|0;Ge=Ah+512|0;Me=Ah+508|0;Ne=Ah+504|0;Ie=Ah+500|0;pf=Ah+496|0;yf=Ah+492|0;Zf=Ah+488|0;qf=Ah+484|0;rf=Ah+480|0;uf=Ah+476|0;xf=Ah+472|0;Dd=Ah+468|0;qe=Ah+464|0;hf=Ah+460|0;te=Ah+456|0;Ze=Ah+452|0;ue=Ah+448|0;lf=Ah+444|0;re=Ah+440|0;Cd=Ah+436|0;gf=Ah+432|0;ne=Ah+428|0;Ye=Ah+424|0;jf=Ah+420|0;kf=Ah+416|0;_e=Ah+412|0;mf=Ah+408|0;we=Ah+404|0;xe=Ah+400|0;nf=Ah+396|0;pe=Ah+392|0;se=Ah+388|0;ve=Ah+384|0;Sb=Ah+380|0;Ic=Ah+376|0;Ec=Ah+372|0;Jc=Ah+368|0;xc=Ah+364|0;Mc=Ah+360|0;Bc=Ah+356|0;Lc=Ah+352|0;Qb=Ah+348|0;Rb=Ah+344|0;Cc=Ah+340|0;Dc=Ah+336|0;Vb=Ah+332|0;Yb=Ah+328|0;zc=Ah+324|0;Ac=Ah+320|0;yc=Ah+316|0;Fc=Ah+312|0;Oc=Ah+308|0;Pc=Ah+304|0;Gc=Ah+300|0;Hc=Ah+296|0;Kc=Ah+292|0;Nc=Ah+288|0;Nd=Ah+284|0;de=Ah+280|0;$d=Ah+276|0;gd=Ah+272|0;Ud=Ah+268|0;jd=Ah+264|0;Yd=Ah+260|0;id=Ah+256|0;Ld=Ah+252|0;Md=Ah+248|0;Zd=Ah+244|0;_d=Ah+240|0;Qd=Ah+236|0;Td=Ah+232|0;Wd=Ah+228|0;Xd=Ah+224|0;Vd=Ah+220|0;ae=Ah+216|0;ld=Ah+212|0;md=Ah+208|0;be=Ah+204|0;ce=Ah+200|0;hd=Ah+196|0;kd=Ah+192|0;M=Ah+188|0;hb=Ah+184|0;db=Ah+180|0;kb=Ah+176|0;Mb=Ah+172|0;lb=Ah+168|0;Sa=Ah+164|0;ib=Ah+160|0;ma=Ah+156|0;L=Ah+152|0;Va=Ah+148|0;cb=Ah+144|0;Ma=Ah+140|0;Lb=Ah+136|0;Qa=Ah+132|0;Ra=Ah+128|0;Nb=Ah+124|0;eb=Ah+120|0;nb=Ah+116|0;Pb=Ah+112|0;fb=Ah+108|0;gb=Ah+104|0;jb=Ah+100|0;mb=Ah+96|0;ac=Ah+92|0;dd=Ah+88|0;$c=Ah+84|0;ed=Ah+80|0;pc=Ah+76|0;Hd=Ah+72|0;Yc=Ah+68|0;Gd=Ah+64|0;Sc=Ah+60|0;$b=Ah+56|0;Zc=Ah+52|0;_c=Ah+48|0;hc=Ah+44|0;oc=Ah+40|0;tc=Ah+36|0;wc=Ah+32|0;qc=Ah+28|0;ad=Ah+24|0;Jd=Ah+20|0;Kd=Ah+16|0;bd=Ah+12|0;cd=Ah+8|0;fd=Ah+4|0;Id=Ah;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[Bh>>2]=k;c[u>>2]=l;c[v>>2]=m;g[Ah+1892>>2]=1.3431179523468018;g[Ah+1888>>2]=1.4819022417068481;g[Ah+1884>>2]=1.807978630065918;g[Ah+1880>>2]=.8551101684570312;g[Ah+1876>>2]=1.9975908994674683;g[Ah+1872>>2]=.09813535213470459;g[Ah+1868>>2]=.6737797260284424;g[Ah+1864>>2]=1.8830881118774414;g[Ah+1860>>2]=.19509032368659973;g[Ah+1856>>2]=.9807852506637573;g[Ah+1852>>2]=1.1913986206054688;g[Ah+1848>>2]=1.606415033340454;g[Ah+1844>>2]=1.7154572010040283;g[Ah+1840>>2]=1.0282055139541626;g[Ah+1836>>2]=1.9783530235290527;g[Ah+1832>>2]=.2934609353542328;g[Ah+1828>>2]=.48596036434173584;g[Ah+1824>>2]=1.9400625228881836;g[Ah+1820>>2]=.5555702447891235;g[Ah+1816>>2]=.8314695954322815;g[Ah+1812>>2]=1.2687865495681763;g[Ah+1808>>2]=1.5460208654403687;g[Ah+1804>>2]=1.7638425827026367;g[Ah+1800>>2]=.9427934885025024;g[Ah+1796>>2]=1.990369439125061;g[Ah+1792>>2]=.1960342824459076;g[Ah+1788>>2]=.580569326877594;g[Ah+1784>>2]=1.913880705833435;g[Ah+1780>>2]=1.662939190864563;g[Ah+1776>>2]=1.111140489578247;g[Ah+1772>>2]=.39018064737319946;g[Ah+1768>>2]=1.9615705013275146;g[Ah+1764>>2]=.7653668522834778;g[Ah+1760>>2]=1.8477590084075928;g[Ah+1756>>2]=1.4142135381698608;g[Ah+1752>>2]=2.0;g[Ah+1748>>2]=.3826834261417389;g[Ah+1744>>2]=.9238795042037964;g[Ah+1740>>2]=.7071067690849304;c[zh>>2]=c[Bh>>2];while(1){if((c[zh>>2]|0)<=0)break;g[w>>2]=+g[c[p>>2]>>2];g[Fa>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*31<<2)>>2];g[Ob>>2]=+g[w>>2]+ +g[Fa>>2];g[y>>2]=+g[w>>2]-+g[Fa>>2];g[_a>>2]=+g[c[q>>2]>>2];g[$a>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*31<<2)>>2];g[ab>>2]=+g[_a>>2]+ +g[$a>>2];g[af>>2]=+g[$a>>2]-+g[_a>>2];g[Xc>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<4<<2)>>2];g[ee>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*15<<2)>>2];g[of>>2]=+g[Xc>>2]+ +g[ee>>2];g[Za>>2]=+g[Xc>>2]-+g[ee>>2];g[z>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<4<<2)>>2];g[A>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*15<<2)>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[$e>>2]=+g[z>>2]-+g[A>>2];g[zg>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2];g[Ag>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*23<<2)>>2];g[$g>>2]=+g[zg>>2]+ +g[Ag>>2];g[D>>2]=+g[zg>>2]-+g[Ag>>2];g[E>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2];g[F>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*23<<2)>>2];g[G>>2]=+g[E>>2]+ +g[F>>2];g[pd>>2]=+g[E>>2]-+g[F>>2];g[ah>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2];g[bh>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*24<<2)>>2];g[ch>>2]=+g[ah>>2]+ +g[bh>>2];g[ga>>2]=+g[ah>>2]-+g[bh>>2];g[ha>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2];g[ia>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*24<<2)>>2];g[ja>>2]=+g[ha>>2]+ +g[ia>>2];g[od>>2]=+g[ia>>2]-+g[ha>>2];g[C>>2]=+g[y>>2]-+g[B>>2];g[Qc>>2]=+g[y>>2]+ +g[B>>2];g[rc>>2]=+g[Za>>2]-+g[ab>>2];g[bb>>2]=+g[Za>>2]+ +g[ab>>2];g[xg>>2]=+g[Ob>>2]+ +g[of>>2];g[dh>>2]=+g[$g>>2]+ +g[ch>>2];g[eh>>2]=+g[xg>>2]+ +g[dh>>2];g[_f>>2]=+g[xg>>2]-+g[dh>>2];g[pg>>2]=+g[pd>>2]+ +g[od>>2];g[qg>>2]=+g[af>>2]-+g[$e>>2];g[rg>>2]=+g[pg>>2]+ +g[qg>>2];g[Qf>>2]=+g[qg>>2]-+g[pg>>2];g[nd>>2]=+g[Ob>>2]-+g[of>>2];g[qd>>2]=+g[od>>2]-+g[pd>>2];g[rd>>2]=+g[nd>>2]+ +g[qd>>2];g[ye>>2]=+g[nd>>2]-+g[qd>>2];g[fa>>2]=+g[D>>2]-+g[G>>2];g[ka>>2]=+g[ga>>2]-+g[ja>>2];g[la>>2]=(+g[fa>>2]+ +g[ka>>2])*.7071067690849304;g[sc>>2]=(+g[fa>>2]-+g[ka>>2])*.7071067690849304;g[bf>>2]=+g[$e>>2]+ +g[af>>2];g[cf>>2]=+g[$g>>2]-+g[ch>>2];g[df>>2]=+g[bf>>2]-+g[cf>>2];g[Je>>2]=+g[cf>>2]+ +g[bf>>2];g[Wa>>2]=+g[D>>2]+ +g[G>>2];g[Xa>>2]=+g[ga>>2]+ +g[ja>>2];g[Ya>>2]=(+g[Wa>>2]-+g[Xa>>2])*.7071067690849304;g[Rc>>2]=(+g[Wa>>2]+ +g[Xa>>2])*.7071067690849304;g[fh>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[gh>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*27<<2)>>2];g[hh>>2]=+g[fh>>2]+ +g[gh>>2];g[na>>2]=+g[fh>>2]-+g[gh>>2];g[ta>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[ua>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*27<<2)>>2];g[va>>2]=+g[ta>>2]+ +g[ua>>2];g[td>>2]=+g[ta>>2]-+g[ua>>2];g[ih>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*20<<2)>>2];g[jh>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*11<<2)>>2];g[kh>>2]=+g[ih>>2]+ +g[jh>>2];g[sa>>2]=+g[ih>>2]-+g[jh>>2];g[oa>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*20<<2)>>2];g[pa>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*11<<2)>>2];g[qa>>2]=+g[oa>>2]+ +g[pa>>2];g[ud>>2]=+g[oa>>2]-+g[pa>>2];g[mh>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[nh>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*28<<2)>>2];g[oh>>2]=+g[mh>>2]+ +g[nh>>2];g[ya>>2]=+g[mh>>2]-+g[nh>>2];g[Ea>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[H>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*28<<2)>>2];g[I>>2]=+g[Ea>>2]+ +g[H>>2];g[yd>>2]=+g[H>>2]-+g[Ea>>2];g[ph>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*12<<2)>>2];g[qh>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*19<<2)>>2];g[rh>>2]=+g[ph>>2]+ +g[qh>>2];g[Da>>2]=+g[ph>>2]-+g[qh>>2];g[za>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*12<<2)>>2];g[Aa>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*19<<2)>>2];g[Ba>>2]=+g[za>>2]+ +g[Aa>>2];g[zd>>2]=+g[za>>2]-+g[Aa>>2];g[lh>>2]=+g[hh>>2]+ +g[kh>>2];g[sh>>2]=+g[oh>>2]+ +g[rh>>2];g[th>>2]=+g[lh>>2]+ +g[sh>>2];g[sg>>2]=+g[lh>>2]-+g[sh>>2];g[xd>>2]=+g[oh>>2]-+g[rh>>2];g[Ad>>2]=+g[yd>>2]-+g[zd>>2];g[Bd>>2]=+g[xd>>2]+ +g[Ad>>2];g[ff>>2]=+g[Ad>>2]-+g[xd>>2];g[$f>>2]=+g[zd>>2]+ +g[yd>>2];g[ag>>2]=+g[ud>>2]+ +g[td>>2];g[bg>>2]=+g[$f>>2]-+g[ag>>2];g[Pf>>2]=+g[ag>>2]+ +g[$f>>2];g[ra>>2]=+g[na>>2]-+g[qa>>2];g[wa>>2]=+g[sa>>2]+ +g[va>>2];g[xa>>2]=+g[ra>>2]*.9238795042037964-+g[wa>>2]*.3826834261417389;g[Ta>>2]=+g[ra>>2]*.3826834261417389+ +g[wa>>2]*.9238795042037964;g[Ca>>2]=+g[ya>>2]-+g[Ba>>2];g[J>>2]=+g[Da>>2]-+g[I>>2];g[K>>2]=+g[Ca>>2]*.9238795042037964+ +g[J>>2]*.3826834261417389;g[Ua>>2]=+g[J>>2]*.9238795042037964-+g[Ca>>2]*.3826834261417389;g[Wc>>2]=+g[ya>>2]+ +g[Ba>>2];g[Zb>>2]=+g[Da>>2]+ +g[I>>2];g[_b>>2]=+g[Wc>>2]*.3826834261417389-+g[Zb>>2]*.9238795042037964;g[vc>>2]=+g[Wc>>2]*.9238795042037964+ +g[Zb>>2]*.3826834261417389;g[sd>>2]=+g[hh>>2]-+g[kh>>2];g[vd>>2]=+g[td>>2]-+g[ud>>2];g[wd>>2]=+g[sd>>2]-+g[vd>>2];g[ef>>2]=+g[sd>>2]+ +g[vd>>2];g[Tc>>2]=+g[na>>2]+ +g[qa>>2];g[Uc>>2]=+g[va>>2]-+g[sa>>2];g[Vc>>2]=+g[Tc>>2]*.3826834261417389-+g[Uc>>2]*.9238795042037964;g[uc>>2]=+g[Tc>>2]*.9238795042037964+ +g[Uc>>2]*.3826834261417389;g[vh>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[wh>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*29<<2)>>2];g[xh>>2]=+g[vh>>2]+ +g[wh>>2];g[N>>2]=+g[vh>>2]-+g[wh>>2];g[Ha>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[Ia>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*29<<2)>>2];g[Ja>>2]=+g[Ha>>2]+ +g[Ia>>2];g[je>>2]=+g[Ha>>2]-+g[Ia>>2];g[yh>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*18<<2)>>2];g[Bg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*13<<2)>>2];g[Cg>>2]=+g[yh>>2]+ +g[Bg>>2];g[Ga>>2]=+g[yh>>2]-+g[Bg>>2];g[O>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*18<<2)>>2];g[P>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*13<<2)>>2];g[Q>>2]=+g[O>>2]+ +g[P>>2];g[ke>>2]=+g[O>>2]-+g[P>>2];g[Hg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[Ig>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*26<<2)>>2];g[X>>2]=+g[Hg>>2]-+g[Ig>>2];g[Y>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[Z>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*26<<2)>>2];g[_>>2]=+g[Y>>2]+ +g[Z>>2];g[Jg>>2]=+g[Hg>>2]+ +g[Ig>>2];g[Fd>>2]=+g[Z>>2]-+g[Y>>2];g[$>>2]=+g[X>>2]-+g[_>>2];g[da>>2]=+g[X>>2]+ +g[_>>2];g[Eg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*10<<2)>>2];g[Fg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*21<<2)>>2];g[S>>2]=+g[Eg>>2]-+g[Fg>>2];g[T>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*10<<2)>>2];g[U>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*21<<2)>>2];g[V>>2]=+g[T>>2]+ +g[U>>2];g[Gg>>2]=+g[Eg>>2]+ +g[Fg>>2];g[fe>>2]=+g[T>>2]-+g[U>>2];g[W>>2]=+g[S>>2]-+g[V>>2];g[ca>>2]=+g[S>>2]+ +g[V>>2];g[Dg>>2]=+g[xh>>2]+ +g[Cg>>2];g[Kg>>2]=+g[Gg>>2]+ +g[Jg>>2];g[Lg>>2]=+g[Dg>>2]+ +g[Kg>>2];g[dg>>2]=+g[Dg>>2]-+g[Kg>>2];g[ie>>2]=+g[Gg>>2]-+g[Jg>>2];g[le>>2]=+g[je>>2]-+g[ke>>2];g[me>>2]=+g[ie>>2]+ +g[le>>2];g[Ce>>2]=+g[le>>2]-+g[ie>>2];g[eg>>2]=+g[ke>>2]+ +g[je>>2];g[fg>>2]=+g[fe>>2]+ +g[Fd>>2];g[gg>>2]=+g[eg>>2]-+g[fg>>2];g[Mf>>2]=+g[fg>>2]+ +g[eg>>2];g[R>>2]=+g[N>>2]-+g[Q>>2];g[aa>>2]=(+g[W>>2]+ +g[$>>2])*.7071067690849304;g[ba>>2]=+g[R>>2]+ +g[aa>>2];g[Tb>>2]=+g[R>>2]-+g[aa>>2];g[ea>>2]=(+g[ca>>2]-+g[da>>2])*.7071067690849304;g[Ka>>2]=+g[Ga>>2]+ +g[Ja>>2];g[La>>2]=+g[ea>>2]+ +g[Ka>>2];g[Ub>>2]=+g[Ka>>2]-+g[ea>>2];g[ec>>2]=(+g[W>>2]-+g[$>>2])*.7071067690849304;g[fc>>2]=+g[Ja>>2]-+g[Ga>>2];g[gc>>2]=+g[ec>>2]+ +g[fc>>2];g[Pd>>2]=+g[fc>>2]-+g[ec>>2];g[Ed>>2]=+g[xh>>2]-+g[Cg>>2];g[ge>>2]=+g[Fd>>2]-+g[fe>>2];g[he>>2]=+g[Ed>>2]+ +g[ge>>2];g[Be>>2]=+g[Ed>>2]-+g[ge>>2];g[bc>>2]=+g[N>>2]+ +g[Q>>2];g[cc>>2]=(+g[ca>>2]+ +g[da>>2])*.7071067690849304;g[dc>>2]=+g[bc>>2]-+g[cc>>2];g[Od>>2]=+g[bc>>2]+ +g[cc>>2];g[Mg>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[Ng>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*30<<2)>>2];g[Og>>2]=+g[Mg>>2]+ +g[Ng>>2];g[Na>>2]=+g[Mg>>2]-+g[Ng>>2];g[Gb>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[Hb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*30<<2)>>2];g[Ib>>2]=+g[Gb>>2]+ +g[Hb>>2];g[Ue>>2]=+g[Hb>>2]-+g[Gb>>2];g[Pg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*14<<2)>>2];g[Qg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*17<<2)>>2];g[Rg>>2]=+g[Pg>>2]+ +g[Qg>>2];g[Fb>>2]=+g[Pg>>2]-+g[Qg>>2];g[Oa>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*14<<2)>>2];g[Pa>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*17<<2)>>2];g[ob>>2]=+g[Oa>>2]+ +g[Pa>>2];g[Ve>>2]=+g[Oa>>2]-+g[Pa>>2];g[Wg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2];g[Xg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*22<<2)>>2];g[vb>>2]=+g[Wg>>2]-+g[Xg>>2];g[wb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2];g[xb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*22<<2)>>2];g[yb>>2]=+g[wb>>2]+ +g[xb>>2];g[Yg>>2]=+g[Wg>>2]+ +g[Xg>>2];g[Pe>>2]=+g[xb>>2]-+g[wb>>2];g[zb>>2]=+g[vb>>2]-+g[yb>>2];g[Db>>2]=+g[vb>>2]+ +g[yb>>2];g[Tg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2];g[Ug>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*25<<2)>>2];g[qb>>2]=+g[Tg>>2]-+g[Ug>>2];g[rb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2];g[sb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*25<<2)>>2];g[tb>>2]=+g[rb>>2]+ +g[sb>>2];g[Vg>>2]=+g[Tg>>2]+ +g[Ug>>2];g[Qe>>2]=+g[rb>>2]-+g[sb>>2];g[ub>>2]=+g[qb>>2]-+g[tb>>2];g[Cb>>2]=+g[qb>>2]+ +g[tb>>2];g[Sg>>2]=+g[Og>>2]+ +g[Rg>>2];g[Zg>>2]=+g[Vg>>2]+ +g[Yg>>2];g[_g>>2]=+g[Sg>>2]+ +g[Zg>>2];g[ig>>2]=+g[Sg>>2]-+g[Zg>>2];g[Te>>2]=+g[Vg>>2]-+g[Yg>>2];g[We>>2]=+g[Ue>>2]-+g[Ve>>2];g[Xe>>2]=+g[Te>>2]+ +g[We>>2];g[Fe>>2]=+g[We>>2]-+g[Te>>2];g[jg>>2]=+g[Ve>>2]+ +g[Ue>>2];g[kg>>2]=+g[Qe>>2]+ +g[Pe>>2];g[lg>>2]=+g[jg>>2]-+g[kg>>2];g[Lf>>2]=+g[kg>>2]+ +g[jg>>2];g[pb>>2]=+g[Na>>2]-+g[ob>>2];g[Ab>>2]=(+g[ub>>2]+ +g[zb>>2])*.7071067690849304;g[Bb>>2]=+g[pb>>2]+ +g[Ab>>2];g[Wb>>2]=+g[pb>>2]-+g[Ab>>2];g[Eb>>2]=(+g[Cb>>2]-+g[Db>>2])*.7071067690849304;g[Jb>>2]=+g[Fb>>2]-+g[Ib>>2];g[Kb>>2]=+g[Eb>>2]+ +g[Jb>>2];g[Xb>>2]=+g[Jb>>2]-+g[Eb>>2];g[lc>>2]=(+g[ub>>2]-+g[zb>>2])*.7071067690849304;g[mc>>2]=+g[Fb>>2]+ +g[Ib>>2];g[nc>>2]=+g[lc>>2]-+g[mc>>2];g[Sd>>2]=+g[lc>>2]+ +g[mc>>2];g[oe>>2]=+g[Og>>2]-+g[Rg>>2];g[Re>>2]=+g[Pe>>2]-+g[Qe>>2];g[Se>>2]=+g[oe>>2]+ +g[Re>>2];g[Ee>>2]=+g[oe>>2]-+g[Re>>2];g[ic>>2]=+g[Na>>2]+ +g[ob>>2];g[jc>>2]=(+g[Cb>>2]+ +g[Db>>2])*.7071067690849304;g[kc>>2]=+g[ic>>2]-+g[jc>>2];g[Rd>>2]=+g[ic>>2]+ +g[jc>>2];g[uh>>2]=+g[eh>>2]+ +g[th>>2];g[x>>2]=+g[Lg>>2]+ +g[_g>>2];g[Wf>>2]=+g[uh>>2]-+g[x>>2];g[Xf>>2]=+g[Mf>>2]+ +g[Lf>>2];g[Yf>>2]=+g[Qf>>2]-+g[Pf>>2];g[yg>>2]=+g[Xf>>2]+ +g[Yf>>2];g[c[n>>2]>>2]=(+g[uh>>2]+ +g[x>>2])*2.0;g[(c[n>>2]|0)+(c[r>>2]<<4<<2)>>2]=(+g[Yf>>2]-+g[Xf>>2])*2.0;g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=(+g[Wf>>2]+ +g[yg>>2])*1.4142135381698608;g[(c[n>>2]|0)+((c[r>>2]|0)*24<<2)>>2]=(+g[yg>>2]-+g[Wf>>2])*1.4142135381698608;g[Kf>>2]=+g[eh>>2]-+g[th>>2];g[Nf>>2]=+g[Lf>>2]-+g[Mf>>2];g[Of>>2]=+g[Kf>>2]+ +g[Nf>>2];g[Uf>>2]=+g[Kf>>2]-+g[Nf>>2];g[Rf>>2]=+g[Pf>>2]+ +g[Qf>>2];g[Sf>>2]=+g[Lg>>2]-+g[_g>>2];g[Tf>>2]=+g[Rf>>2]-+g[Sf>>2];g[Vf>>2]=+g[Sf>>2]+ +g[Rf>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Of>>2]*1.8477590084075928+ +g[Tf>>2]*.7653668522834778;g[(c[n>>2]|0)+((c[r>>2]|0)*28<<2)>>2]=+g[Vf>>2]*.7653668522834778-+g[Uf>>2]*1.8477590084075928;g[(c[n>>2]|0)+((c[r>>2]|0)*20<<2)>>2]=+g[Tf>>2]*1.8477590084075928-+g[Of>>2]*.7653668522834778;g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Uf>>2]*.7653668522834778+ +g[Vf>>2]*1.8477590084075928;g[cg>>2]=+g[_f>>2]+ +g[bg>>2];g[tg>>2]=+g[rg>>2]-+g[sg>>2];g[Ff>>2]=+g[sg>>2]+ +g[rg>>2];g[Cf>>2]=+g[_f>>2]-+g[bg>>2];g[ug>>2]=+g[dg>>2]+ +g[gg>>2];g[vg>>2]=+g[lg>>2]-+g[ig>>2];g[wg>>2]=(+g[ug>>2]+ +g[vg>>2])*.7071067690849304;g[Df>>2]=(+g[vg>>2]-+g[ug>>2])*.7071067690849304;g[hg>>2]=+g[dg>>2]-+g[gg>>2];g[mg>>2]=+g[ig>>2]+ +g[lg>>2];g[ng>>2]=(+g[hg>>2]+ +g[mg>>2])*.7071067690849304;g[Gf>>2]=(+g[hg>>2]-+g[mg>>2])*.7071067690849304;g[og>>2]=+g[cg>>2]+ +g[ng>>2];g[zf>>2]=+g[tg>>2]-+g[wg>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[og>>2]*1.9615705013275146+ +g[zf>>2]*.39018064737319946;g[(c[n>>2]|0)+((c[r>>2]|0)*18<<2)>>2]=+g[zf>>2]*1.9615705013275146-+g[og>>2]*.39018064737319946;g[If>>2]=+g[Cf>>2]-+g[Df>>2];g[Jf>>2]=+g[Gf>>2]+ +g[Ff>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[If>>2]*.39018064737319946+ +g[Jf>>2]*1.9615705013275146;g[(c[n>>2]|0)+((c[r>>2]|0)*30<<2)>>2]=+g[Jf>>2]*.39018064737319946-+g[If>>2]*1.9615705013275146;g[Af>>2]=+g[cg>>2]-+g[ng>>2];g[Bf>>2]=+g[wg>>2]+ +g[tg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Af>>2]*1.111140489578247+ +g[Bf>>2]*1.662939190864563;g[(c[n>>2]|0)+((c[r>>2]|0)*26<<2)>>2]=+g[Bf>>2]*1.111140489578247-+g[Af>>2]*1.662939190864563;g[Ef>>2]=+g[Cf>>2]+ +g[Df>>2];g[Hf>>2]=+g[Ff>>2]-+g[Gf>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Ef>>2]*1.662939190864563+ +g[Hf>>2]*1.111140489578247;g[(c[n>>2]|0)+((c[r>>2]|0)*22<<2)>>2]=+g[Hf>>2]*1.662939190864563-+g[Ef>>2]*1.111140489578247;g[ze>>2]=(+g[ff>>2]-+g[ef>>2])*.7071067690849304;g[Ae>>2]=+g[ye>>2]+ +g[ze>>2];g[sf>>2]=+g[ye>>2]-+g[ze>>2];g[Ke>>2]=(+g[wd>>2]-+g[Bd>>2])*.7071067690849304;g[Le>>2]=+g[Je>>2]-+g[Ke>>2];g[vf>>2]=+g[Ke>>2]+ +g[Je>>2];g[De>>2]=+g[Be>>2]*.3826834261417389-+g[Ce>>2]*.9238795042037964;g[Ge>>2]=+g[Ee>>2]*.3826834261417389+ +g[Fe>>2]*.9238795042037964;g[He>>2]=+g[De>>2]+ +g[Ge>>2];g[wf>>2]=+g[De>>2]-+g[Ge>>2];g[Me>>2]=+g[Be>>2]*.9238795042037964+ +g[Ce>>2]*.3826834261417389;g[Ne>>2]=+g[Fe>>2]*.3826834261417389-+g[Ee>>2]*.9238795042037964;g[Oe>>2]=+g[Me>>2]+ +g[Ne>>2];g[tf>>2]=+g[Ne>>2]-+g[Me>>2];g[Ie>>2]=+g[Ae>>2]+ +g[He>>2];g[pf>>2]=+g[Le>>2]-+g[Oe>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ie>>2]*1.913880705833435+ +g[pf>>2]*.580569326877594;g[(c[n>>2]|0)+((c[r>>2]|0)*19<<2)>>2]=+g[pf>>2]*1.913880705833435-+g[Ie>>2]*.580569326877594;g[yf>>2]=+g[sf>>2]-+g[tf>>2];g[Zf>>2]=+g[wf>>2]+ +g[vf>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[yf>>2]*.1960342824459076+ +g[Zf>>2]*1.990369439125061;g[(c[n>>2]|0)+((c[r>>2]|0)*31<<2)>>2]=+g[Zf>>2]*.1960342824459076-+g[yf>>2]*1.990369439125061;g[qf>>2]=+g[Ae>>2]-+g[He>>2];g[rf>>2]=+g[Oe>>2]+ +g[Le>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[qf>>2]*.9427934885025024+ +g[rf>>2]*1.7638425827026367;g[(c[n>>2]|0)+((c[r>>2]|0)*27<<2)>>2]=+g[rf>>2]*.9427934885025024-+g[qf>>2]*1.7638425827026367;g[uf>>2]=+g[sf>>2]+ +g[tf>>2];g[xf>>2]=+g[vf>>2]-+g[wf>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[uf>>2]*1.5460208654403687+ +g[xf>>2]*1.2687865495681763;g[(c[n>>2]|0)+((c[r>>2]|0)*23<<2)>>2]=+g[xf>>2]*1.5460208654403687-+g[uf>>2]*1.2687865495681763;g[Cd>>2]=(+g[wd>>2]+ +g[Bd>>2])*.7071067690849304;g[Dd>>2]=+g[rd>>2]+ +g[Cd>>2];g[qe>>2]=+g[rd>>2]-+g[Cd>>2];g[gf>>2]=(+g[ef>>2]+ +g[ff>>2])*.7071067690849304;g[hf>>2]=+g[df>>2]-+g[gf>>2];g[te>>2]=+g[gf>>2]+ +g[df>>2];g[ne>>2]=+g[he>>2]*.9238795042037964-+g[me>>2]*.3826834261417389;g[Ye>>2]=+g[Se>>2]*.9238795042037964+ +g[Xe>>2]*.3826834261417389;g[Ze>>2]=+g[ne>>2]+ +g[Ye>>2];g[ue>>2]=+g[ne>>2]-+g[Ye>>2];g[jf>>2]=+g[he>>2]*.3826834261417389+ +g[me>>2]*.9238795042037964;g[kf>>2]=+g[Xe>>2]*.9238795042037964-+g[Se>>2]*.3826834261417389;g[lf>>2]=+g[jf>>2]+ +g[kf>>2];g[re>>2]=+g[kf>>2]-+g[jf>>2];g[_e>>2]=+g[Dd>>2]+ +g[Ze>>2];g[mf>>2]=+g[hf>>2]-+g[lf>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[_e>>2]*1.990369439125061+ +g[mf>>2]*.1960342824459076;g[(c[n>>2]|0)+((c[r>>2]|0)*17<<2)>>2]=+g[mf>>2]*1.990369439125061-+g[_e>>2]*.1960342824459076;g[we>>2]=+g[qe>>2]-+g[re>>2];g[xe>>2]=+g[ue>>2]+ +g[te>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[we>>2]*.580569326877594+ +g[xe>>2]*1.913880705833435;g[(c[n>>2]|0)+((c[r>>2]|0)*29<<2)>>2]=+g[xe>>2]*.580569326877594-+g[we>>2]*1.913880705833435;g[nf>>2]=+g[Dd>>2]-+g[Ze>>2];g[pe>>2]=+g[lf>>2]+ +g[hf>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[nf>>2]*1.2687865495681763+ +g[pe>>2]*1.5460208654403687;g[(c[n>>2]|0)+((c[r>>2]|0)*25<<2)>>2]=+g[pe>>2]*1.2687865495681763-+g[nf>>2]*1.5460208654403687;g[se>>2]=+g[qe>>2]+ +g[re>>2];g[ve>>2]=+g[te>>2]-+g[ue>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[se>>2]*1.7638425827026367+ +g[ve>>2]*.9427934885025024;g[(c[n>>2]|0)+((c[r>>2]|0)*21<<2)>>2]=+g[ve>>2]*1.7638425827026367-+g[se>>2]*.9427934885025024;g[Qb>>2]=+g[C>>2]-+g[la>>2];g[Rb>>2]=+g[Ua>>2]-+g[Ta>>2];g[Sb>>2]=+g[Qb>>2]+ +g[Rb>>2];g[Ic>>2]=+g[Qb>>2]-+g[Rb>>2];g[Cc>>2]=+g[Tb>>2]*.8314695954322815+ +g[Ub>>2]*.5555702447891235;g[Dc>>2]=+g[Xb>>2]*.5555702447891235-+g[Wb>>2]*.8314695954322815;g[Ec>>2]=+g[Cc>>2]+ +g[Dc>>2];g[Jc>>2]=+g[Dc>>2]-+g[Cc>>2];g[Vb>>2]=+g[Tb>>2]*.5555702447891235-+g[Ub>>2]*.8314695954322815;g[Yb>>2]=+g[Wb>>2]*.5555702447891235+ +g[Xb>>2]*.8314695954322815;g[xc>>2]=+g[Vb>>2]+ +g[Yb>>2];g[Mc>>2]=+g[Vb>>2]-+g[Yb>>2];g[zc>>2]=+g[Ya>>2]-+g[bb>>2];g[Ac>>2]=+g[xa>>2]-+g[K>>2];g[Bc>>2]=+g[zc>>2]-+g[Ac>>2];g[Lc>>2]=+g[Ac>>2]+ +g[zc>>2];g[yc>>2]=+g[Sb>>2]+ +g[xc>>2];g[Fc>>2]=+g[Bc>>2]-+g[Ec>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[yc>>2]*1.9400625228881836+ +g[Fc>>2]*.48596036434173584;g[(c[o>>2]|0)+((c[r>>2]|0)*18<<2)>>2]=+g[Fc>>2]*1.9400625228881836-+g[yc>>2]*.48596036434173584;g[Oc>>2]=+g[Ic>>2]-+g[Jc>>2];g[Pc>>2]=+g[Mc>>2]+ +g[Lc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Oc>>2]*.2934609353542328+ +g[Pc>>2]*1.9783530235290527;g[(c[o>>2]|0)+((c[r>>2]|0)*30<<2)>>2]=+g[Pc>>2]*.2934609353542328-+g[Oc>>2]*1.9783530235290527;g[Gc>>2]=+g[Sb>>2]-+g[xc>>2];g[Hc>>2]=+g[Ec>>2]+ +g[Bc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Gc>>2]*1.0282055139541626+ +g[Hc>>2]*1.7154572010040283;g[(c[o>>2]|0)+((c[r>>2]|0)*26<<2)>>2]=+g[Hc>>2]*1.0282055139541626-+g[Gc>>2]*1.7154572010040283;g[Kc>>2]=+g[Ic>>2]+ +g[Jc>>2];g[Nc>>2]=+g[Lc>>2]-+g[Mc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Kc>>2]*1.606415033340454+ +g[Nc>>2]*1.1913986206054688;g[(c[o>>2]|0)+((c[r>>2]|0)*22<<2)>>2]=+g[Nc>>2]*1.606415033340454-+g[Kc>>2]*1.1913986206054688;g[Ld>>2]=+g[Qc>>2]+ +g[Rc>>2];g[Md>>2]=+g[uc>>2]+ +g[vc>>2];g[Nd>>2]=+g[Ld>>2]-+g[Md>>2];g[de>>2]=+g[Ld>>2]+ +g[Md>>2];g[Zd>>2]=+g[Od>>2]*.9807852506637573+ +g[Pd>>2]*.19509032368659973;g[_d>>2]=+g[Rd>>2]*.9807852506637573+ +g[Sd>>2]*.19509032368659973;g[$d>>2]=+g[Zd>>2]-+g[_d>>2];g[gd>>2]=+g[Zd>>2]+ +g[_d>>2];g[Qd>>2]=+g[Od>>2]*.19509032368659973-+g[Pd>>2]*.9807852506637573;g[Td>>2]=+g[Rd>>2]*.19509032368659973-+g[Sd>>2]*.9807852506637573;g[Ud>>2]=+g[Qd>>2]+ +g[Td>>2];g[jd>>2]=+g[Qd>>2]-+g[Td>>2];g[Wd>>2]=+g[sc>>2]+ +g[rc>>2];g[Xd>>2]=+g[Vc>>2]-+g[_b>>2];g[Yd>>2]=+g[Wd>>2]-+g[Xd>>2];g[id>>2]=+g[Xd>>2]+ +g[Wd>>2];g[Vd>>2]=+g[Nd>>2]+ +g[Ud>>2];g[ae>>2]=+g[Yd>>2]-+g[$d>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Vd>>2]*1.8830881118774414+ +g[ae>>2]*.6737797260284424;g[(c[o>>2]|0)+((c[r>>2]|0)*19<<2)>>2]=+g[ae>>2]*1.8830881118774414-+g[Vd>>2]*.6737797260284424;g[ld>>2]=+g[de>>2]+ +g[gd>>2];g[md>>2]=+g[jd>>2]+ +g[id>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[ld>>2]*.09813535213470459+ +g[md>>2]*1.9975908994674683;g[(c[o>>2]|0)+((c[r>>2]|0)*31<<2)>>2]=+g[md>>2]*.09813535213470459-+g[ld>>2]*1.9975908994674683;g[be>>2]=+g[Nd>>2]-+g[Ud>>2];g[ce>>2]=+g[$d>>2]+ +g[Yd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[be>>2]*.8551101684570312+ +g[ce>>2]*1.807978630065918;g[(c[o>>2]|0)+((c[r>>2]|0)*27<<2)>>2]=+g[ce>>2]*.8551101684570312-+g[be>>2]*1.807978630065918;g[hd>>2]=+g[de>>2]-+g[gd>>2];g[kd>>2]=+g[id>>2]-+g[jd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[hd>>2]*1.4819022417068481+ +g[kd>>2]*1.3431179523468018;g[(c[o>>2]|0)+((c[r>>2]|0)*23<<2)>>2]=+g[kd>>2]*1.4819022417068481-+g[hd>>2]*1.3431179523468018;g[ma>>2]=+g[C>>2]+ +g[la>>2];g[L>>2]=+g[xa>>2]+ +g[K>>2];g[M>>2]=+g[ma>>2]+ +g[L>>2];g[hb>>2]=+g[ma>>2]-+g[L>>2];g[Va>>2]=+g[Ta>>2]+ +g[Ua>>2];g[cb>>2]=+g[Ya>>2]+ +g[bb>>2];g[db>>2]=+g[Va>>2]+ +g[cb>>2];g[kb>>2]=+g[Va>>2]-+g[cb>>2];g[Ma>>2]=+g[ba>>2]*.9807852506637573-+g[La>>2]*.19509032368659973;g[Lb>>2]=+g[Bb>>2]*.9807852506637573+ +g[Kb>>2]*.19509032368659973;g[Mb>>2]=+g[Ma>>2]+ +g[Lb>>2];g[lb>>2]=+g[Ma>>2]-+g[Lb>>2];g[Qa>>2]=+g[ba>>2]*.19509032368659973+ +g[La>>2]*.9807852506637573;g[Ra>>2]=+g[Kb>>2]*.9807852506637573-+g[Bb>>2]*.19509032368659973;g[Sa>>2]=+g[Qa>>2]+ +g[Ra>>2];g[ib>>2]=+g[Ra>>2]-+g[Qa>>2];g[Nb>>2]=+g[M>>2]+ +g[Mb>>2];g[eb>>2]=+g[Sa>>2]+ +g[db>>2];g[c[o>>2]>>2]=+g[Nb>>2]*1.9975908994674683-+g[eb>>2]*.09813535213470459;g[(c[o>>2]|0)+(c[r>>2]<<4<<2)>>2]=-(+g[Nb>>2]*.09813535213470459+ +g[eb>>2]*1.9975908994674683);g[nb>>2]=+g[hb>>2]-+g[ib>>2];g[Pb>>2]=+g[lb>>2]+ +g[kb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[nb>>2]*.6737797260284424+ +g[Pb>>2]*1.8830881118774414;g[(c[o>>2]|0)+((c[r>>2]|0)*28<<2)>>2]=+g[Pb>>2]*.6737797260284424-+g[nb>>2]*1.8830881118774414;g[fb>>2]=+g[M>>2]-+g[Mb>>2];g[gb>>2]=+g[Sa>>2]-+g[db>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[fb>>2]*1.3431179523468018+ +g[gb>>2]*1.4819022417068481;g[(c[o>>2]|0)+((c[r>>2]|0)*24<<2)>>2]=+g[gb>>2]*1.3431179523468018-+g[fb>>2]*1.4819022417068481;g[jb>>2]=+g[hb>>2]+ +g[ib>>2];g[mb>>2]=+g[kb>>2]-+g[lb>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[jb>>2]*1.807978630065918+ +g[mb>>2]*.8551101684570312;g[(c[o>>2]|0)+((c[r>>2]|0)*20<<2)>>2]=+g[mb>>2]*1.807978630065918-+g[jb>>2]*.8551101684570312;g[Sc>>2]=+g[Qc>>2]-+g[Rc>>2];g[$b>>2]=+g[Vc>>2]+ +g[_b>>2];g[ac>>2]=+g[Sc>>2]+ +g[$b>>2];g[dd>>2]=+g[Sc>>2]-+g[$b>>2];g[Zc>>2]=+g[dc>>2]*.5555702447891235+ +g[gc>>2]*.8314695954322815;g[_c>>2]=+g[nc>>2]*.8314695954322815-+g[kc>>2]*.5555702447891235;g[$c>>2]=+g[Zc>>2]+ +g[_c>>2];g[ed>>2]=+g[_c>>2]-+g[Zc>>2];g[hc>>2]=+g[dc>>2]*.8314695954322815-+g[gc>>2]*.5555702447891235;g[oc>>2]=+g[kc>>2]*.8314695954322815+ +g[nc>>2]*.5555702447891235;g[pc>>2]=+g[hc>>2]+ +g[oc>>2];g[Hd>>2]=+g[hc>>2]-+g[oc>>2];g[tc>>2]=+g[rc>>2]-+g[sc>>2];g[wc>>2]=+g[uc>>2]-+g[vc>>2];g[Yc>>2]=+g[tc>>2]-+g[wc>>2];g[Gd>>2]=+g[wc>>2]+ +g[tc>>2];g[qc>>2]=+g[ac>>2]+ +g[pc>>2];g[ad>>2]=+g[Yc>>2]-+g[$c>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[qc>>2]*1.9783530235290527+ +g[ad>>2]*.2934609353542328;g[(c[o>>2]|0)+((c[r>>2]|0)*17<<2)>>2]=+g[ad>>2]*1.9783530235290527-+g[qc>>2]*.2934609353542328;g[Jd>>2]=+g[dd>>2]-+g[ed>>2];g[Kd>>2]=+g[Hd>>2]+ +g[Gd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Jd>>2]*.48596036434173584+ +g[Kd>>2]*1.9400625228881836;g[(c[o>>2]|0)+((c[r>>2]|0)*29<<2)>>2]=+g[Kd>>2]*.48596036434173584-+g[Jd>>2]*1.9400625228881836;g[bd>>2]=+g[ac>>2]-+g[pc>>2];g[cd>>2]=+g[$c>>2]+ +g[Yc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[bd>>2]*1.1913986206054688+ +g[cd>>2]*1.606415033340454;g[(c[o>>2]|0)+((c[r>>2]|0)*25<<2)>>2]=+g[cd>>2]*1.1913986206054688-+g[bd>>2]*1.606415033340454;g[fd>>2]=+g[dd>>2]+ +g[ed>>2];g[Id>>2]=+g[Gd>>2]-+g[Hd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[fd>>2]*1.7154572010040283+ +g[Id>>2]*1.0282055139541626;g[(c[o>>2]|0)+((c[r>>2]|0)*21<<2)>>2]=+g[Id>>2]*1.7154572010040283-+g[fd>>2]*1.0282055139541626;c[zh>>2]=(c[zh>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=Ah;return}function Pv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,49,10408);i=b;return}function Qv(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0;J=i;i=i+112|0;n=J+96|0;o=J+92|0;p=J+88|0;q=J+84|0;r=J+80|0;s=J+76|0;t=J+72|0;K=J+68|0;u=J+64|0;v=J+60|0;I=J+48|0;w=J+44|0;B=J+40|0;z=J+36|0;A=J+32|0;E=J+28|0;G=J+24|0;F=J+20|0;H=J+16|0;x=J+12|0;y=J+8|0;C=J+4|0;D=J;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[K>>2]=k;c[u>>2]=l;c[v>>2]=m;g[J+56>>2]=2.0;g[J+52>>2]=1.7320507764816284;c[I>>2]=c[K>>2];while(1){if((c[I>>2]|0)<=0)break;g[w>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[B>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[x>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[y>>2]=+g[c[p>>2]>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[A>>2]=(+g[x>>2]-+g[y>>2])*1.7320507764816284;g[C>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[D>>2]=+g[c[q>>2]>>2];g[E>>2]=+g[C>>2]+ +g[D>>2];g[G>>2]=(+g[C>>2]-+g[D>>2])*1.7320507764816284;g[c[n>>2]>>2]=(+g[w>>2]+ +g[z>>2])*2.0;g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=(+g[B>>2]-+g[E>>2])*2.0;g[F>>2]=+g[B>>2]*2.0+ +g[E>>2];g[c[o>>2]>>2]=-(+g[A>>2]+ +g[F>>2]);g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[A>>2]-+g[F>>2];g[H>>2]=+g[w>>2]*2.0-+g[z>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[G>>2]-+g[H>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[H>>2]+ +g[G>>2];c[I>>2]=(c[I>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=J;return}function Rv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,50,10456);i=b;return}function Sv(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0;K=i;i=i+128|0;n=K+120|0;o=K+116|0;p=K+112|0;q=K+108|0;r=K+104|0;s=K+100|0;t=K+96|0;L=K+92|0;u=K+88|0;v=K+84|0;J=K+52|0;E=K+48|0;I=K+44|0;G=K+40|0;w=K+36|0;z=K+32|0;x=K+28|0;y=K+24|0;A=K+20|0;H=K+16|0;F=K+12|0;B=K+8|0;D=K+4|0;C=K;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[L>>2]=k;c[u>>2]=l;c[v>>2]=m;g[K+80>>2]=2.0;g[K+76>>2]=1.2469795942306519;g[K+72>>2]=1.8019376993179321;g[K+68>>2]=.44504186511039734;g[K+64>>2]=.8677674531936646;g[K+60>>2]=1.9498558044433594;g[K+56>>2]=1.5636630058288574;c[J>>2]=c[L>>2];while(1){if((c[J>>2]|0)<=0)break;g[B>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[D>>2]=+g[c[q>>2]>>2];g[C>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[E>>2]=+g[B>>2]*1.5636630058288574+ +g[C>>2]*1.9498558044433594+ +g[D>>2]*.8677674531936646;g[I>>2]=+g[C>>2]*1.5636630058288574-+g[D>>2]*1.9498558044433594-+g[B>>2]*.8677674531936646;g[G>>2]=+g[B>>2]*1.9498558044433594-+g[D>>2]*1.5636630058288574-+g[C>>2]*.8677674531936646;g[w>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[z>>2]=+g[c[p>>2]>>2];g[x>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[y>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[A>>2]=+g[y>>2]*.44504186511039734+ +g[z>>2]*1.8019376993179321+-(+g[x>>2]*1.2469795942306519+ +g[w>>2]);g[H>>2]=+g[x>>2]*1.8019376993179321+ +g[z>>2]*.44504186511039734+-(+g[y>>2]*1.2469795942306519+ +g[w>>2]);g[F>>2]=+g[z>>2]*1.2469795942306519+ +g[w>>2]+-(+g[y>>2]*1.8019376993179321+ +g[x>>2]*.44504186511039734);g[c[o>>2]>>2]=+g[A>>2]-+g[E>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=-(+g[A>>2]+ +g[E>>2]);g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[I>>2]-+g[H>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[H>>2]+ +g[I>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[G>>2]-+g[F>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[F>>2]+ +g[G>>2];g[c[n>>2]>>2]=(+g[x>>2]+ +g[y>>2]+ +g[z>>2])*2.0+ +g[w>>2];c[J>>2]=(c[J>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=K;return}function Tv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,51,10504);i=b;return}function Uv(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0;T=i;i=i+160|0;n=T+144|0;o=T+140|0;p=T+136|0;q=T+132|0;r=T+128|0;s=T+124|0;t=T+120|0;U=T+116|0;u=T+112|0;v=T+108|0;S=T+88|0;y=T+84|0;C=T+80|0;K=T+76|0;Q=T+72|0;B=T+68|0;H=T+64|0;F=T+60|0;P=T+56|0;G=T+52|0;L=T+48|0;w=T+44|0;x=T+40|0;I=T+36|0;J=T+32|0;z=T+28|0;A=T+24|0;D=T+20|0;E=T+16|0;M=T+12|0;N=T+8|0;O=T+4|0;R=T;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[U>>2]=k;c[u>>2]=l;c[v>>2]=m;g[T+104>>2]=1.4142135381698608;g[T+100>>2]=.7653668522834778;g[T+96>>2]=1.8477590084075928;g[T+92>>2]=2.0;c[S>>2]=c[U>>2];while(1){if((c[S>>2]|0)<=0)break;g[w>>2]=+g[c[p>>2]>>2];g[x>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[y>>2]=+g[w>>2]+ +g[x>>2];g[C>>2]=+g[w>>2]-+g[x>>2];g[I>>2]=+g[c[q>>2]>>2];g[J>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[K>>2]=+g[I>>2]+ +g[J>>2];g[Q>>2]=+g[J>>2]-+g[I>>2];g[z>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[A>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[H>>2]=+g[z>>2]-+g[A>>2];g[D>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[E>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[F>>2]=+g[D>>2]+ +g[E>>2];g[P>>2]=+g[D>>2]-+g[E>>2];g[c[n>>2]>>2]=(+g[y>>2]+ +g[B>>2])*2.0;g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=(+g[Q>>2]-+g[P>>2])*2.0;g[G>>2]=+g[C>>2]-+g[F>>2];g[L>>2]=+g[H>>2]+ +g[K>>2];g[c[o>>2]>>2]=+g[G>>2]*1.8477590084075928-+g[L>>2]*.7653668522834778;g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=-(+g[G>>2]*.7653668522834778+ +g[L>>2]*1.8477590084075928);g[M>>2]=+g[C>>2]+ +g[F>>2];g[N>>2]=+g[H>>2]-+g[K>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[M>>2]*.7653668522834778+ +g[N>>2]*1.8477590084075928;g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[N>>2]*.7653668522834778-+g[M>>2]*1.8477590084075928;g[O>>2]=+g[y>>2]-+g[B>>2];g[R>>2]=+g[P>>2]+ +g[Q>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=(+g[O>>2]+ +g[R>>2])*1.4142135381698608;g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=(+g[R>>2]-+g[O>>2])*1.4142135381698608;c[S>>2]=(c[S>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=T;return}function Vv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,52,10552);i=b;return}function Wv(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0;ca=i;i=i+224|0;n=ca+220|0;o=ca+216|0;p=ca+212|0;q=ca+208|0;r=ca+204|0;s=ca+200|0;t=ca+196|0;da=ca+192|0;u=ca+188|0;v=ca+184|0;ba=ca+132|0;y=ca+128|0;X=ca+124|0;N=ca+120|0;I=ca+116|0;H=ca+112|0;D=ca+108|0;T=ca+104|0;Z=ca+100|0;Q=ca+96|0;Y=ca+92|0;E=ca+88|0;J=ca+84|0;M=ca+80|0;w=ca+76|0;x=ca+72|0;K=ca+68|0;L=ca+64|0;z=ca+60|0;C=ca+56|0;R=ca+52|0;P=ca+48|0;S=ca+44|0;O=ca+40|0;A=ca+36|0;B=ca+32|0;F=ca+28|0;G=ca+24|0;W=ca+20|0;U=ca+16|0;V=ca+12|0;aa=ca+8|0;_=ca+4|0;$=ca;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[da>>2]=k;c[u>>2]=l;c[v>>2]=m;g[ca+180>>2]=.6427876353263855;g[ca+176>>2]=.7660444378852844;g[ca+172>>2]=1.326827883720398;g[ca+168>>2]=1.1133408546447754;g[ca+164>>2]=.9848077297210693;g[ca+160>>2]=.1736481785774231;g[ca+156>>2]=1.7057371139526367;g[ca+152>>2]=.3007674515247345;g[ca+148>>2]=.5;g[ca+144>>2]=.8660253882408142;g[ca+140>>2]=2.0;g[ca+136>>2]=1.7320507764816284;c[ba>>2]=c[da>>2];while(1){if((c[ba>>2]|0)<=0)break;g[L>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[M>>2]=+g[L>>2]*1.7320507764816284;g[w>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[x>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[K>>2]=+g[x>>2]-+g[w>>2];g[y>>2]=+g[x>>2]*2.0+ +g[w>>2];g[X>>2]=+g[K>>2]-+g[M>>2];g[N>>2]=+g[K>>2]+ +g[M>>2];g[z>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[I>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[A>>2]=+g[c[p>>2]>>2];g[B>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[C>>2]=+g[A>>2]+ +g[B>>2];g[R>>2]=(+g[B>>2]-+g[A>>2])*.8660253882408142;g[F>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[G>>2]=+g[c[q>>2]>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[P>>2]=(+g[G>>2]+ +g[F>>2])*.8660253882408142;g[D>>2]=+g[z>>2]+ +g[C>>2];g[S>>2]=+g[H>>2]*.5+ +g[I>>2];g[T>>2]=+g[R>>2]-+g[S>>2];g[Z>>2]=+g[R>>2]+ +g[S>>2];g[O>>2]=+g[C>>2]*.5-+g[z>>2];g[Q>>2]=+g[O>>2]+ +g[P>>2];g[Y>>2]=+g[O>>2]-+g[P>>2];g[c[n>>2]>>2]=+g[D>>2]*2.0+ +g[y>>2];g[E>>2]=+g[D>>2]-+g[y>>2];g[J>>2]=(+g[H>>2]-+g[I>>2])*1.7320507764816284;g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[E>>2]+ +g[J>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[J>>2]-+g[E>>2];g[W>>2]=+g[T>>2]*.3007674515247345-+g[Q>>2]*1.7057371139526367;g[U>>2]=+g[Q>>2]*.1736481785774231+ +g[T>>2]*.9848077297210693;g[V>>2]=+g[N>>2]-+g[U>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=-(+g[U>>2]*2.0+ +g[N>>2]);g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[W>>2]-+g[V>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[V>>2]+ +g[W>>2];g[aa>>2]=+g[Y>>2]*1.1133408546447754+ +g[Z>>2]*1.326827883720398;g[_>>2]=+g[Y>>2]*.7660444378852844-+g[Z>>2]*.6427876353263855;g[$>>2]=+g[_>>2]-+g[X>>2];g[c[o>>2]>>2]=+g[_>>2]*2.0+ +g[X>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[aa>>2]-+g[$>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[$>>2]+ +g[aa>>2];c[ba>>2]=(c[ba>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=ca;return}function Xv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,53,10600);i=b;return}function Yv(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0;da=i;i=i+208|0;n=da+196|0;o=da+192|0;p=da+188|0;q=da+184|0;r=da+180|0;s=da+176|0;t=da+172|0;ea=da+168|0;u=da+164|0;v=da+160|0;ca=da+136|0;y=da+132|0;G=da+128|0;S=da+124|0;_=da+120|0;P=da+116|0;Z=da+112|0;F=da+108|0;X=da+104|0;J=da+100|0;L=da+96|0;N=da+92|0;O=da+88|0;w=da+84|0;x=da+80|0;Q=da+76|0;R=da+72|0;B=da+68|0;H=da+64|0;E=da+60|0;I=da+56|0;z=da+52|0;A=da+48|0;C=da+44|0;D=da+40|0;T=da+36|0;V=da+32|0;M=da+28|0;U=da+24|0;K=da+20|0;$=da+16|0;ba=da+12|0;Y=da+8|0;aa=da+4|0;W=da;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[ea>>2]=k;c[u>>2]=l;c[v>>2]=m;g[da+156>>2]=.5;g[da+152>>2]=1.9021130800247192;g[da+148>>2]=1.1755704879760742;g[da+144>>2]=2.0;g[da+140>>2]=1.1180340051651;c[ca>>2]=c[ea>>2];while(1){if((c[ca>>2]|0)<=0)break;g[w>>2]=+g[c[p>>2]>>2];g[x>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[G>>2]=+g[w>>2]+ +g[x>>2];g[Q>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[R>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[S>>2]=+g[Q>>2]-+g[R>>2];g[_>>2]=+g[Q>>2]+ +g[R>>2];g[N>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[O>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[P>>2]=+g[N>>2]-+g[O>>2];g[Z>>2]=+g[N>>2]+ +g[O>>2];g[z>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[A>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[H>>2]=+g[z>>2]+ +g[A>>2];g[C>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[D>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[E>>2]=+g[C>>2]-+g[D>>2];g[I>>2]=+g[C>>2]+ +g[D>>2];g[F>>2]=+g[B>>2]+ +g[E>>2];g[X>>2]=(+g[B>>2]-+g[E>>2])*1.1180340051651;g[J>>2]=+g[H>>2]+ +g[I>>2];g[L>>2]=(+g[H>>2]-+g[I>>2])*1.1180340051651;g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[F>>2]*2.0+ +g[y>>2];g[c[n>>2]>>2]=+g[J>>2]*2.0+ +g[G>>2];g[T>>2]=+g[P>>2]*1.1755704879760742-+g[S>>2]*1.9021130800247192;g[V>>2]=+g[P>>2]*1.9021130800247192+ +g[S>>2]*1.1755704879760742;g[K>>2]=+g[G>>2]-+g[J>>2]*.5;g[M>>2]=+g[K>>2]-+g[L>>2];g[U>>2]=+g[L>>2]+ +g[K>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[M>>2]-+g[T>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[U>>2]+ +g[V>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[M>>2]+ +g[T>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[U>>2]-+g[V>>2];g[$>>2]=+g[Z>>2]*1.1755704879760742-+g[_>>2]*1.9021130800247192;g[ba>>2]=+g[Z>>2]*1.9021130800247192+ +g[_>>2]*1.1755704879760742;g[W>>2]=+g[y>>2]-+g[F>>2]*.5;g[Y>>2]=+g[W>>2]-+g[X>>2];g[aa>>2]=+g[X>>2]+ +g[W>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Y>>2]-+g[$>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[aa>>2]+ +g[ba>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Y>>2]+ +g[$>>2];g[c[o>>2]>>2]=+g[aa>>2]-+g[ba>>2];c[ca>>2]=(c[ca>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=da;return}function Zv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,54,10648);i=b;return}function _v(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0;S=i;i=i+176|0;n=S+168|0;o=S+164|0;p=S+160|0;q=S+156|0;r=S+152|0;s=S+148|0;t=S+144|0;T=S+140|0;u=S+136|0;v=S+132|0;R=S+84|0;I=S+80|0;Q=S+76|0;K=S+72|0;M=S+68|0;O=S+64|0;w=S+60|0;x=S+56|0;B=S+52|0;A=S+48|0;z=S+44|0;y=S+40|0;C=S+36|0;P=S+32|0;J=S+28|0;L=S+24|0;N=S+20|0;D=S+16|0;H=S+12|0;E=S+8|0;F=S+4|0;G=S;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[T>>2]=k;c[u>>2]=l;c[v>>2]=m;g[S+128>>2]=2.0;g[S+124>>2]=1.9189859628677368;g[S+120>>2]=1.3097214698791504;g[S+116>>2]=.2846296727657318;g[S+112>>2]=.8308300375938416;g[S+108>>2]=1.682507038116455;g[S+104>>2]=.5634651184082031;g[S+100>>2]=1.5114991664886475;g[S+96>>2]=1.9796428680419922;g[S+92>>2]=1.8192639350891113;g[S+88>>2]=1.0812816619873047;c[R>>2]=c[T>>2];while(1){if((c[R>>2]|0)<=0)break;g[D>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[H>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[E>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[F>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[G>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[I>>2]=+g[D>>2]*1.0812816619873047+ +g[E>>2]*1.8192639350891113+-(+g[F>>2]*1.9796428680419922+ +g[G>>2]*1.5114991664886475)-+g[H>>2]*.5634651184082031;g[Q>>2]=+g[D>>2]*1.9796428680419922+ +g[F>>2]*1.8192639350891113+-(+g[E>>2]*.5634651184082031+ +g[G>>2]*1.0812816619873047)-+g[H>>2]*1.5114991664886475;g[K>>2]=+g[D>>2]*.5634651184082031+ +g[G>>2]*1.8192639350891113+-(+g[F>>2]*1.5114991664886475+ +g[E>>2]*1.0812816619873047)-+g[H>>2]*1.9796428680419922;g[M>>2]=+g[H>>2]*1.0812816619873047+ +g[D>>2]*1.8192639350891113+(+g[G>>2]*1.9796428680419922+ +g[E>>2]*1.5114991664886475)+ +g[F>>2]*.5634651184082031;g[O>>2]=+g[G>>2]*.5634651184082031+ +g[E>>2]*1.9796428680419922+(+g[F>>2]*1.0812816619873047-+g[D>>2]*1.5114991664886475)-+g[H>>2]*1.8192639350891113;g[w>>2]=+g[c[p>>2]>>2];g[x>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[B>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[A>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[z>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[y>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[C>>2]=+g[y>>2]*1.682507038116455+ +g[w>>2]+(+g[A>>2]*.8308300375938416-+g[B>>2]*.2846296727657318)+-(+g[z>>2]*1.3097214698791504+ +g[x>>2]*1.9189859628677368);g[P>>2]=+g[z>>2]*1.682507038116455+ +g[w>>2]+(+g[B>>2]*.8308300375938416-+g[A>>2]*1.9189859628677368)+-(+g[y>>2]*.2846296727657318+ +g[x>>2]*1.3097214698791504);g[J>>2]=+g[z>>2]*.8308300375938416+ +g[w>>2]+(+g[A>>2]*1.682507038116455-+g[B>>2]*1.3097214698791504)+-(+g[y>>2]*1.9189859628677368+ +g[x>>2]*.2846296727657318);g[L>>2]=+g[x>>2]*1.682507038116455+ +g[w>>2]+(+g[y>>2]*.8308300375938416-+g[B>>2]*1.9189859628677368)+-(+g[A>>2]*1.3097214698791504+ +g[z>>2]*.2846296727657318);g[N>>2]=+g[x>>2]*.8308300375938416+ +g[w>>2]+(+g[B>>2]*1.682507038116455-+g[A>>2]*.2846296727657318)+-(+g[z>>2]*1.9189859628677368+ +g[y>>2]*1.3097214698791504);g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[C>>2]-+g[I>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[J>>2]-+g[K>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[P>>2]+ +g[Q>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[C>>2]+ +g[I>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[P>>2]-+g[Q>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[N>>2]+ +g[O>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[J>>2]+ +g[K>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[L>>2]+ +g[M>>2];g[c[o>>2]>>2]=+g[L>>2]-+g[M>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[N>>2]-+g[O>>2];g[c[n>>2]>>2]=(+g[x>>2]+ +g[y>>2]+ +g[z>>2]+ +g[A>>2]+ +g[B>>2])*2.0+ +g[w>>2];c[R>>2]=(c[R>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=S;return}function $v(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,55,10696);i=b;return}function aw(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0,Ii=0,Ji=0,Ki=0,Li=0,Mi=0,Ni=0,Oi=0,Pi=0,Qi=0,Ri=0,Si=0,Ti=0,Ui=0,Vi=0,Wi=0,Xi=0,Yi=0,Zi=0,_i=0,$i=0,aj=0,bj=0,cj=0,dj=0,ej=0,fj=0,gj=0,hj=0,ij=0,jj=0,kj=0,lj=0,mj=0,nj=0,oj=0,pj=0,qj=0,rj=0,sj=0,tj=0,uj=0,vj=0,wj=0,xj=0,yj=0,zj=0,Aj=0,Bj=0,Cj=0,Dj=0,Ej=0,Fj=0,Gj=0,Hj=0,Ij=0,Jj=0,Kj=0,Lj=0,Mj=0,Nj=0,Oj=0,Pj=0,Qj=0,Rj=0,Sj=0,Tj=0,Uj=0,Vj=0,Wj=0,Xj=0,Yj=0,Zj=0,_j=0,$j=0,ak=0,bk=0,ck=0,dk=0,ek=0,fk=0,gk=0,hk=0,ik=0,jk=0,kk=0,lk=0,mk=0,nk=0,ok=0,pk=0,qk=0,rk=0,sk=0,tk=0,uk=0,vk=0,wk=0,xk=0,yk=0,zk=0,Ak=0,Bk=0,Ck=0,Dk=0,Ek=0,Fk=0,Gk=0,Hk=0,Ik=0,Jk=0,Kk=0,Lk=0,Mk=0,Nk=0,Ok=0,Pk=0,Qk=0,Rk=0,Sk=0,Tk=0,Uk=0,Vk=0,Wk=0,Xk=0,Yk=0,Zk=0,_k=0,$k=0,al=0,bl=0,cl=0,dl=0,el=0,fl=0,gl=0,hl=0,il=0,jl=0,kl=0,ll=0,ml=0,nl=0,ol=0,pl=0,ql=0,rl=0,sl=0,tl=0,ul=0,vl=0,wl=0,xl=0,yl=0,zl=0,Al=0,Bl=0,Cl=0,Dl=0,El=0,Fl=0,Gl=0,Hl=0,Il=0,Jl=0,Kl=0,Ll=0,Ml=0,Nl=0,Ol=0,Pl=0,Ql=0,Rl=0,Sl=0,Tl=0,Ul=0,Vl=0,Wl=0,Xl=0,Yl=0,Zl=0,_l=0,$l=0,am=0,bm=0,cm=0,dm=0,em=0,fm=0,gm=0,hm=0,im=0,jm=0,km=0,lm=0,mm=0,nm=0,om=0,pm=0,qm=0,rm=0,sm=0,tm=0,um=0,vm=0,wm=0,xm=0,ym=0,zm=0,Am=0,Bm=0,Cm=0,Dm=0,Em=0,Fm=0,Gm=0,Hm=0,Im=0,Jm=0,Km=0,Lm=0,Mm=0,Nm=0,Om=0,Pm=0,Qm=0,Rm=0,Sm=0,Tm=0,Um=0,Vm=0,Wm=0,Xm=0,Ym=0,Zm=0,_m=0,$m=0,an=0,bn=0,cn=0,dn=0,en=0,fn=0,gn=0,hn=0,jn=0,kn=0,ln=0,mn=0,nn=0,on=0,pn=0,qn=0,rn=0,sn=0,tn=0,un=0,vn=0,wn=0,xn=0,yn=0,zn=0,An=0,Bn=0,Cn=0,Dn=0,En=0,Fn=0,Gn=0,Hn=0,In=0,Jn=0,Kn=0,Ln=0,Mn=0,Nn=0,On=0,Pn=0,Qn=0,Rn=0,Sn=0,Tn=0,Un=0,Vn=0,Wn=0,Xn=0,Yn=0,Zn=0,_n=0,$n=0,ao=0,bo=0,co=0,eo=0,fo=0,go=0,ho=0,io=0,jo=0,ko=0,lo=0,mo=0,no=0,oo=0,po=0,qo=0,ro=0,so=0,to=0,uo=0,vo=0,wo=0,xo=0,yo=0,zo=0,Ao=0,Bo=0,Co=0,Do=0,Eo=0,Fo=0,Go=0,Ho=0,Io=0,Jo=0,Ko=0,Lo=0,Mo=0,No=0,Oo=0,Po=0,Qo=0,Ro=0,So=0,To=0,Uo=0,Vo=0,Wo=0,Xo=0,Yo=0,Zo=0,_o=0,$o=0,ap=0,bp=0,cp=0,dp=0,ep=0,fp=0,gp=0,hp=0,ip=0,jp=0,kp=0,lp=0,mp=0,np=0,op=0,pp=0,qp=0,rp=0,sp=0,tp=0,up=0,vp=0,wp=0,xp=0,yp=0,zp=0,Ap=0,Bp=0,Cp=0,Dp=0,Ep=0,Fp=0,Gp=0,Hp=0,Ip=0,Jp=0,Kp=0,Lp=0,Mp=0,Np=0,Op=0,Pp=0,Qp=0,Rp=0,Sp=0,Tp=0,Up=0,Vp=0,Wp=0,Xp=0,Yp=0,Zp=0,_p=0,$p=0,aq=0,bq=0,cq=0,dq=0,eq=0,fq=0,gq=0,hq=0,iq=0,jq=0,kq=0,lq=0,mq=0,nq=0,oq=0,pq=0,qq=0,rq=0,sq=0,tq=0,uq=0,vq=0,wq=0,xq=0,yq=0,zq=0,Aq=0,Bq=0,Cq=0,Dq=0,Eq=0,Fq=0,Gq=0,Hq=0,Iq=0,Jq=0,Kq=0,Lq=0,Mq=0,Nq=0,Oq=0,Pq=0,Qq=0,Rq=0,Sq=0,Tq=0,Uq=0,Vq=0,Wq=0,Xq=0,Yq=0,Zq=0,_q=0,$q=0,ar=0,br=0,cr=0,dr=0,er=0,fr=0,gr=0,hr=0,ir=0,jr=0,kr=0,lr=0,mr=0,nr=0;mr=i;i=i+4032|0;n=mr+4028|0;o=mr+4024|0;p=mr+4020|0;q=mr+4016|0;r=mr+4012|0;s=mr+4008|0;t=mr+4004|0;nr=mr+4e3|0;u=mr+3996|0;v=mr+3992|0;lr=mr+3832|0;wk=mr+3828|0;ng=mr+3824|0;ob=mr+3820|0;Ze=mr+3816|0;Mi=mr+3812|0;Lm=mr+3808|0;cn=mr+3804|0;cq=mr+3800|0;Uq=mr+3796|0;og=mr+3792|0;xb=mr+3788|0;_e=mr+3784|0;Vh=mr+3780|0;Mm=mr+3776|0;fn=mr+3772|0;dq=mr+3768|0;ir=mr+3764|0;qg=mr+3760|0;kn=mr+3756|0;fq=mr+3752|0;nn=mr+3748|0;gq=mr+3744|0;Hb=mr+3740|0;af=mr+3736|0;Sa=mr+3732|0;bf=mr+3728|0;fi=mr+3724|0;Ql=mr+3720|0;mi=mr+3716|0;Rl=mr+3712|0;tg=mr+3708|0;Tg=mr+3704|0;oa=mr+3700|0;Kf=mr+3696|0;Mo=mr+3692|0;tp=mr+3688|0;to=mr+3684|0;Ip=mr+3680|0;Sc=mr+3676|0;nf=mr+3672|0;od=mr+3668|0;De=mr+3664|0;mj=mr+3660|0;am=mr+3656|0;al=mr+3652|0;Qm=mr+3648|0;Gg=mr+3644|0;fh=mr+3640|0;jk=mr+3636|0;wg=mr+3632|0;sn=mr+3628|0;jq=mr+3624|0;go=mr+3620|0;nq=mr+3616|0;bb=mr+3612|0;ef=mr+3608|0;Gc=mr+3604|0;jf=mr+3600|0;yj=mr+3596|0;Ul=mr+3592|0;_i=mr+3588|0;Yl=mr+3584|0;Gf=mr+3580|0;Wg=mr+3576|0;V=mr+3572|0;bh=mr+3568|0;ed=mr+3564|0;_d=mr+3560|0;_o=mr+3556|0;xp=mr+3552|0;te=mr+3548|0;ze=mr+3544|0;kl=mr+3540|0;dm=mr+3536|0;Xo=mr+3532|0;wp=mr+3528|0;Tf=mr+3524|0;Ag=mr+3520|0;rl=mr+3516|0;em=mr+3512|0;Ja=mr+3508|0;ah=mr+3504|0;Xd=mr+3500|0;$d=mr+3496|0;fp=mr+3492|0;Ap=mr+3488|0;we=mr+3484|0;Ae=mr+3480|0;Dl=mr+3476|0;gm=mr+3472|0;cp=mr+3468|0;zp=mr+3464|0;Yf=mr+3460|0;Bg=mr+3456|0;Mk=mr+3452|0;hm=mr+3448|0;Da=mr+3444|0;Dg=mr+3440|0;To=mr+3436|0;Hp=mr+3432|0;qo=mr+3428|0;up=mr+3424|0;lc=mr+3420|0;Ce=mr+3416|0;de=mr+3412|0;pe=mr+3408|0;Ak=mr+3404|0;Pm=mr+3400|0;Vk=mr+3396|0;bm=mr+3392|0;Nf=mr+3388|0;Hh=mr+3384|0;z=mr+3380|0;Df=mr+3376|0;_n=mr+3372|0;mq=mr+3368|0;co=mr+3364|0;kq=mr+3360|0;Vb=mr+3356|0;hf=mr+3352|0;xc=mr+3348|0;ff=mr+3344|0;Nj=mr+3340|0;Xl=mr+3336|0;Rj=mr+3332|0;Vl=mr+3328|0;Bf=mr+3324|0;Xg=mr+3320|0;ee=mr+3316|0;Hi=mr+3312|0;Ob=mr+3308|0;Fi=mr+3304|0;Pi=mr+3300|0;Ji=mr+3296|0;Pa=mr+3292|0;Ki=mr+3288|0;of=mr+3284|0;Ma=mr+3280|0;Xc=mr+3276|0;Gi=mr+3272|0;w=mr+3268|0;Fa=mr+3264|0;xg=mr+3260|0;Gh=mr+3256|0;Na=mr+3252|0;Oa=mr+3248|0;Ii=mr+3244|0;Li=mr+3240|0;an=mr+3236|0;bn=mr+3232|0;Yn=mr+3228|0;Ni=mr+3224|0;sb=mr+3220|0;Th=mr+3216|0;Tq=mr+3212|0;Sh=mr+3208|0;vb=mr+3204|0;Oi=mr+3200|0;pb=mr+3196|0;wb=mr+3192|0;Fl=mr+3188|0;Om=mr+3184|0;qb=mr+3180|0;rb=mr+3176|0;gp=mr+3172|0;pq=mr+3168|0;tb=mr+3164|0;ub=mr+3160|0;Rh=mr+3156|0;Uh=mr+3152|0;dn=mr+3148|0;en=mr+3144|0;Yq=mr+3140|0;Xh=mr+3136|0;Lb=mr+3132|0;ki=mr+3128|0;$q=mr+3124|0;ji=mr+3120|0;Qa=mr+3116|0;Yh=mr+3112|0;gr=mr+3108|0;hi=mr+3104|0;Cb=mr+3100|0;di=mr+3096|0;dr=mr+3092|0;gi=mr+3088|0;Fb=mr+3084|0;ai=mr+3080|0;Wq=mr+3076|0;Xq=mr+3072|0;Mb=mr+3068|0;Nb=mr+3064|0;Jb=mr+3060|0;Kb=mr+3056|0;Zq=mr+3052|0;_q=mr+3048|0;er=mr+3044|0;fr=mr+3040|0;bi=mr+3036|0;Ab=mr+3032|0;Bb=mr+3028|0;ci=mr+3024|0;br=mr+3020|0;cr=mr+3016|0;_h=mr+3012|0;Db=mr+3008|0;Eb=mr+3004|0;$h=mr+3e3|0;ar=mr+2996|0;hr=mr+2992|0;hn=mr+2988|0;jn=mr+2984|0;ln=mr+2980|0;mn=mr+2976|0;zb=mr+2972|0;Gb=mr+2968|0;Ib=mr+2964|0;Ra=mr+2960|0;Zh=mr+2956|0;ei=mr+2952|0;ii=mr+2948|0;li=mr+2944|0;rg=mr+2940|0;sg=mr+2936|0;E=mr+2932|0;cj=mr+2928|0;jd=mr+2924|0;_k=mr+2920|0;fa=mr+2916|0;Zk=mr+2912|0;md=mr+2908|0;dj=mr+2904|0;ma=mr+2900|0;Xk=mr+2896|0;Nc=mr+2892|0;kj=mr+2888|0;ja=mr+2884|0;Wk=mr+2880|0;Qc=mr+2876|0;hj=mr+2872|0;C=mr+2868|0;D=mr+2864|0;kd=mr+2860|0;ld=mr+2856|0;hd=mr+2852|0;id=mr+2848|0;F=mr+2844|0;G=mr+2840|0;ka=mr+2836|0;la=mr+2832|0;ij=mr+2828|0;Lc=mr+2824|0;Mc=mr+2820|0;jj=mr+2816|0;ha=mr+2812|0;ia=mr+2808|0;fj=mr+2804|0;Oc=mr+2800|0;Pc=mr+2796|0;gj=mr+2792|0;ga=mr+2788|0;na=mr+2784|0;Ko=mr+2780|0;Lo=mr+2776|0;ro=mr+2772|0;so=mr+2768|0;Kc=mr+2764|0;Rc=mr+2760|0;gd=mr+2756|0;nd=mr+2752|0;ej=mr+2748|0;lj=mr+2744|0;Yk=mr+2740|0;$k=mr+2736|0;Eg=mr+2732|0;Fg=mr+2728|0;Zj=mr+2724|0;Qi=mr+2720|0;Bc=mr+2716|0;Wj=mr+2712|0;ak=mr+2708|0;Vj=mr+2704|0;Ec=mr+2700|0;Ri=mr+2696|0;hk=mr+2692|0;Tj=mr+2688|0;Ya=mr+2684|0;Yi=mr+2680|0;ek=mr+2676|0;Sj=mr+2672|0;$a=mr+2668|0;Vi=mr+2664|0;kr=mr+2660|0;Yj=mr+2656|0;Cc=mr+2652|0;Dc=mr+2648|0;zc=mr+2644|0;Ac=mr+2640|0;_j=mr+2636|0;$j=mr+2632|0;fk=mr+2628|0;gk=mr+2624|0;Wi=mr+2620|0;Wa=mr+2616|0;Xa=mr+2612|0;Xi=mr+2608|0;ck=mr+2604|0;dk=mr+2600|0;Ti=mr+2596|0;Za=mr+2592|0;_a=mr+2588|0;Ui=mr+2584|0;bk=mr+2580|0;ik=mr+2576|0;qn=mr+2572|0;rn=mr+2568|0;eo=mr+2564|0;fo=mr+2560|0;Va=mr+2556|0;ab=mr+2552|0;yc=mr+2548|0;Fc=mr+2544|0;Si=mr+2540|0;Zi=mr+2536|0;Uj=mr+2532|0;Xj=mr+2528|0;Ef=mr+2524|0;Ff=mr+2520|0;N=mr+2516|0;ol=mr+2512|0;nc=mr+2508|0;Ck=mr+2504|0;cd=mr+2500|0;pl=mr+2496|0;Qf=mr+2492|0;Dk=mr+2488|0;U=mr+2484|0;ll=mr+2480|0;ml=mr+2476|0;uc=mr+2472|0;wc=mr+2468|0;fl=mr+2464|0;il=mr+2460|0;Rf=mr+2456|0;Vo=mr+2452|0;Wo=mr+2448|0;H=mr+2444|0;I=mr+2440|0;J=mr+2436|0;K=mr+2432|0;L=mr+2428|0;M=mr+2424|0;Yc=mr+2420|0;Zc=mr+2416|0;_c=mr+2412|0;$c=mr+2408|0;ad=mr+2404|0;bd=mr+2400|0;Q=mr+2396|0;Fk=mr+2392|0;tc=mr+2388|0;Gk=mr+2384|0;T=mr+2380|0;gl=mr+2376|0;qc=mr+2372|0;hl=mr+2368|0;O=mr+2364|0;P=mr+2360|0;rc=mr+2356|0;sc=mr+2352|0;R=mr+2348|0;S=mr+2344|0;oc=mr+2340|0;pc=mr+2336|0;vc=mr+2332|0;dd=mr+2328|0;Yo=mr+2324|0;Zo=mr+2320|0;re=mr+2316|0;se=mr+2312|0;Ek=mr+2308|0;jl=mr+2304|0;Pf=mr+2300|0;Sf=mr+2296|0;nl=mr+2292|0;ql=mr+2288|0;aa=mr+2284|0;Jk=mr+2280|0;fd=mr+2276|0;tl=mr+2272|0;Vd=mr+2268|0;Kk=mr+2264|0;Vf=mr+2260|0;ul=mr+2256|0;Ia=mr+2252|0;El=mr+2248|0;Hk=mr+2244|0;Md=mr+2240|0;Od=mr+2236|0;yl=mr+2232|0;Bl=mr+2228|0;Wf=mr+2224|0;ap=mr+2220|0;bp=mr+2216|0;W=mr+2212|0;X=mr+2208|0;Y=mr+2204|0;Z=mr+2200|0;_=mr+2196|0;$=mr+2192|0;Pd=mr+2188|0;Qd=mr+2184|0;Rd=mr+2180|0;Sd=mr+2176|0;Td=mr+2172|0;Ud=mr+2168|0;da=mr+2164|0;wl=mr+2160|0;Ld=mr+2156|0;xl=mr+2152|0;Ha=mr+2148|0;zl=mr+2144|0;Id=mr+2140|0;Al=mr+2136|0;ba=mr+2132|0;ca=mr+2128|0;Jd=mr+2124|0;Kd=mr+2120|0;ea=mr+2116|0;Ga=mr+2112|0;Gd=mr+2108|0;Hd=mr+2104|0;Nd=mr+2100|0;Wd=mr+2096|0;dp=mr+2092|0;ep=mr+2088|0;ue=mr+2084|0;ve=mr+2080|0;vl=mr+2076|0;Cl=mr+2072|0;Uf=mr+2068|0;Xf=mr+2064|0;Ik=mr+2060|0;Lk=mr+2056|0;ra=mr+2052|0;Wc=mr+2048|0;ua=mr+2044|0;$b=mr+2040|0;Tc=mr+2036|0;ac=mr+2032|0;Oo=mr+2028|0;No=mr+2024|0;sj=mr+2020|0;pj=mr+2016|0;ya=mr+2012|0;fc=mr+2008|0;Ba=mr+2004|0;ic=mr+2e3|0;cc=mr+1996|0;jc=mr+1992|0;Ro=mr+1988|0;Qo=mr+1984|0;yk=mr+1980|0;wj=mr+1976|0;nj=mr+1972|0;rj=mr+1968|0;qj=mr+1964|0;oj=mr+1960|0;pa=mr+1956|0;qa=mr+1952|0;Uc=mr+1948|0;Vc=mr+1944|0;sa=mr+1940|0;ta=mr+1936|0;Zb=mr+1932|0;_b=mr+1928|0;uj=mr+1924|0;xk=mr+1920|0;xj=mr+1916|0;vj=mr+1912|0;wa=mr+1908|0;xa=mr+1904|0;dc=mr+1900|0;ec=mr+1896|0;za=mr+1892|0;Aa=mr+1888|0;gc=mr+1884|0;hc=mr+1880|0;va=mr+1876|0;Ca=mr+1872|0;Po=mr+1868|0;So=mr+1864|0;oo=mr+1860|0;po=mr+1856|0;bc=mr+1852|0;kc=mr+1848|0;be=mr+1844|0;ce=mr+1840|0;tj=mr+1836|0;zk=mr+1832|0;Tk=mr+1828|0;Uk=mr+1824|0;Lf=mr+1820|0;Mf=mr+1816|0;mk=mr+1812|0;fb=mr+1808|0;pk=mr+1804|0;ib=mr+1800|0;cb=mr+1796|0;jb=mr+1792|0;un=mr+1788|0;tn=mr+1784|0;Ej=mr+1780|0;Bj=mr+1776|0;tk=mr+1772|0;Pb=mr+1768|0;x=mr+1764|0;Sb=mr+1760|0;lb=mr+1756|0;Tb=mr+1752|0;xn=mr+1748|0;wn=mr+1744|0;Lj=mr+1740|0;Ij=mr+1736|0;zj=mr+1732|0;Dj=mr+1728|0;Cj=mr+1724|0;Aj=mr+1720|0;kk=mr+1716|0;lk=mr+1712|0;db=mr+1708|0;eb=mr+1704|0;nk=mr+1700|0;ok=mr+1696|0;gb=mr+1692|0;hb=mr+1688|0;Gj=mr+1684|0;Kj=mr+1680|0;Jj=mr+1676|0;Hj=mr+1672|0;rk=mr+1668|0;sk=mr+1664|0;mb=mr+1660|0;nb=mr+1656|0;uk=mr+1652|0;vk=mr+1648|0;Qb=mr+1644|0;Rb=mr+1640|0;qk=mr+1636|0;y=mr+1632|0;vn=mr+1628|0;Zn=mr+1624|0;ao=mr+1620|0;bo=mr+1616|0;kb=mr+1612|0;Ub=mr+1608|0;Xb=mr+1604|0;Yb=mr+1600|0;Fj=mr+1596|0;Mj=mr+1592|0;Pj=mr+1588|0;Qj=mr+1584|0;zf=mr+1580|0;Af=mr+1576|0;A=mr+1572|0;ui=mr+1568|0;jr=mr+1564|0;ti=mr+1560|0;La=mr+1556|0;wi=mr+1552|0;zi=mr+1548|0;Ei=mr+1544|0;Vq=mr+1540|0;B=mr+1536|0;Di=mr+1532|0;Ea=mr+1528|0;Ka=mr+1524|0;xi=mr+1520|0;yi=mr+1516|0;vi=mr+1512|0;Ai=mr+1508|0;Bi=mr+1504|0;Ci=mr+1500|0;Ug=mr+1496|0;Nh=mr+1492|0;Jh=mr+1488|0;pi=mr+1484|0;Zg=mr+1480|0;Oh=mr+1476|0;dh=mr+1472|0;Qh=mr+1468|0;Sg=mr+1464|0;eh=mr+1460|0;Ih=mr+1456|0;Vg=mr+1452|0;Yg=mr+1448|0;$g=mr+1444|0;ch=mr+1440|0;_g=mr+1436|0;Kh=mr+1432|0;ri=mr+1428|0;si=mr+1424|0;Lh=mr+1420|0;Mh=mr+1416|0;Ph=mr+1412|0;qi=mr+1408|0;Ua=mr+1404|0;ud=mr+1400|0;qd=mr+1396|0;yd=mr+1392|0;Ic=mr+1388|0;vd=mr+1384|0;Zd=mr+1380|0;xd=mr+1376|0;yb=mr+1372|0;Ta=mr+1368|0;ae=mr+1364|0;pd=mr+1360|0;Wb=mr+1356|0;Hc=mr+1352|0;mc=mr+1348|0;Yd=mr+1344|0;Jc=mr+1340|0;rd=mr+1336|0;Ad=mr+1332|0;Bd=mr+1328|0;sd=mr+1324|0;td=mr+1320|0;wd=mr+1316|0;zd=mr+1312|0;vg=mr+1308|0;kh=mr+1304|0;gh=mr+1300|0;oh=mr+1296|0;If=mr+1292|0;lh=mr+1288|0;zg=mr+1284|0;nh=mr+1280|0;pg=mr+1276|0;ug=mr+1272|0;Cg=mr+1268|0;Hg=mr+1264|0;Cf=mr+1260|0;Hf=mr+1256|0;Of=mr+1252|0;yg=mr+1248|0;Jf=mr+1244|0;hh=mr+1240|0;qh=mr+1236|0;rh=mr+1232|0;ih=mr+1228|0;jh=mr+1224|0;mh=mr+1220|0;ph=mr+1216|0;Ed=mr+1212|0;Re=mr+1208|0;ne=mr+1204|0;Ve=mr+1200|0;ge=mr+1196|0;Se=mr+1192|0;ke=mr+1188|0;Ue=mr+1184|0;Cd=mr+1180|0;Dd=mr+1176|0;le=mr+1172|0;me=mr+1168|0;Fd=mr+1164|0;fe=mr+1160|0;ie=mr+1156|0;je=mr+1152|0;he=mr+1148|0;oe=mr+1144|0;Xe=mr+1140|0;Ye=mr+1136|0;Pe=mr+1132|0;Qe=mr+1128|0;Te=mr+1124|0;We=mr+1120|0;uh=mr+1116|0;Kg=mr+1112|0;Eh=mr+1108|0;Og=mr+1104|0;xh=mr+1100|0;Lg=mr+1096|0;Bh=mr+1092|0;Ng=mr+1088|0;sh=mr+1084|0;th=mr+1080|0;Ch=mr+1076|0;Dh=mr+1072|0;vh=mr+1068|0;wh=mr+1064|0;zh=mr+1060|0;Ah=mr+1056|0;yh=mr+1052|0;Fh=mr+1048|0;Qg=mr+1044|0;Rg=mr+1040|0;Ig=mr+1036|0;Jg=mr+1032|0;Mg=mr+1028|0;Pg=mr+1024|0;df=mr+1020|0;Je=mr+1016|0;Fe=mr+1012|0;Ne=mr+1008|0;lf=mr+1004|0;Ke=mr+1e3|0;ye=mr+996|0;Me=mr+992|0;$e=mr+988|0;cf=mr+984|0;Be=mr+980|0;Ee=mr+976|0;gf=mr+972|0;kf=mr+968|0;qe=mr+964|0;xe=mr+960|0;mf=mr+956|0;Ge=mr+952|0;pf=mr+948|0;qf=mr+944|0;He=mr+940|0;Ie=mr+936|0;Le=mr+932|0;Oe=mr+928|0;tf=mr+924|0;fg=mr+920|0;bg=mr+916|0;jg=mr+912|0;wf=mr+908|0;gg=mr+904|0;_f=mr+900|0;ig=mr+896|0;rf=mr+892|0;sf=mr+888|0;$f=mr+884|0;ag=mr+880|0;uf=mr+876|0;vf=mr+872|0;yf=mr+868|0;Zf=mr+864|0;xf=mr+860|0;cg=mr+856|0;lg=mr+852|0;mg=mr+848|0;dg=mr+844|0;eg=mr+840|0;hg=mr+836|0;kg=mr+832|0;oi=mr+828|0;Hl=mr+824|0;aj=mr+820|0;Il=mr+816|0;sm=mr+812|0;Em=mr+808|0;pm=mr+804|0;Dm=mr+800|0;cl=mr+796|0;Gm=mr+792|0;Ll=mr+788|0;wm=mr+784|0;Pk=mr+780|0;Hm=mr+776|0;Kl=mr+772|0;zm=mr+768|0;Wh=mr+764|0;ni=mr+760|0;Pl=mr+756|0;om=mr+752|0;Oj=mr+748|0;$i=mr+744|0;qm=mr+740|0;rm=mr+736|0;bl=mr+732|0;um=mr+728|0;Sk=mr+724|0;vm=mr+720|0;Qk=mr+716|0;Rk=mr+712|0;Bk=mr+708|0;ym=mr+704|0;Ok=mr+700|0;xm=mr+696|0;sl=mr+692|0;Nk=mr+688|0;bj=mr+684|0;dl=mr+680|0;Fm=mr+676|0;Im=mr+672|0;Jm=mr+668|0;Km=mr+664|0;el=mr+660|0;Gl=mr+656|0;Jl=mr+652|0;Ml=mr+648|0;tm=mr+644|0;Am=mr+640|0;Bm=mr+636|0;Cm=mr+632|0;Nl=mr+628|0;Ol=mr+624|0;pn=mr+620|0;zo=mr+616|0;Io=mr+612|0;Ao=mr+608|0;lp=mr+604|0;Xp=mr+600|0;ip=mr+596|0;Wp=mr+592|0;vo=mr+588|0;Zp=mr+584|0;Do=mr+580|0;pp=mr+576|0;ko=mr+572|0;_p=mr+568|0;Co=mr+564|0;Sp=mr+560|0;gn=mr+556|0;on=mr+552|0;Ho=mr+548|0;hp=mr+544|0;$n=mr+540|0;ho=mr+536|0;jp=mr+532|0;kp=mr+528|0;uo=mr+524|0;np=mr+520|0;no=mr+516|0;op=mr+512|0;lo=mr+508|0;mo=mr+504|0;Uo=mr+500|0;Rp=mr+496|0;jo=mr+492|0;qp=mr+488|0;$o=mr+484|0;io=mr+480|0;Jo=mr+476|0;wo=mr+472|0;Yp=mr+468|0;$p=mr+464|0;aq=mr+460|0;bq=mr+456|0;xo=mr+452|0;yo=mr+448|0;Bo=mr+444|0;Eo=mr+440|0;mp=mr+436|0;Tp=mr+432|0;Up=mr+428|0;Vp=mr+424|0;Fo=mr+420|0;Go=mr+416|0;Tl=mr+412|0;Wm=mr+408|0;_l=mr+404|0;Xm=mr+400|0;In=mr+396|0;Un=mr+392|0;Fn=mr+388|0;Tn=mr+384|0;Sm=mr+380|0;Wn=mr+376|0;zn=mr+372|0;Mn=mr+368|0;km=mr+364|0;Xn=mr+360|0;yn=mr+356|0;Pn=mr+352|0;Nm=mr+348|0;Sl=mr+344|0;Dn=mr+340|0;En=mr+336|0;Wl=mr+332|0;Zl=mr+328|0;Gn=mr+324|0;Hn=mr+320|0;Rm=mr+316|0;Kn=mr+312|0;nm=mr+308|0;Ln=mr+304|0;lm=mr+300|0;mm=mr+296|0;cm=mr+292|0;On=mr+288|0;jm=mr+284|0;Nn=mr+280|0;fm=mr+276|0;im=mr+272|0;$l=mr+268|0;Tm=mr+264|0;Vn=mr+260|0;Zm=mr+256|0;_m=mr+252|0;$m=mr+248|0;Um=mr+244|0;Vm=mr+240|0;Ym=mr+236|0;An=mr+232|0;Jn=mr+228|0;Qn=mr+224|0;Rn=mr+220|0;Sn=mr+216|0;Bn=mr+212|0;Cn=mr+208|0;iq=mr+204|0;Op=mr+200|0;rp=mr+196|0;Pp=mr+192|0;Aq=mr+188|0;Mq=mr+184|0;xq=mr+180|0;Lq=mr+176|0;Kp=mr+172|0;Oq=mr+168|0;rq=mr+164|0;Eq=mr+160|0;Dp=mr+156|0;Pq=mr+152|0;qq=mr+148|0;Hq=mr+144|0;eq=mr+140|0;hq=mr+136|0;vq=mr+132|0;wq=mr+128|0;lq=mr+124|0;oq=mr+120|0;yq=mr+116|0;zq=mr+112|0;Jp=mr+108|0;Cq=mr+104|0;Gp=mr+100|0;Dq=mr+96|0;Ep=mr+92|0;Fp=mr+88|0;vp=mr+84|0;Gq=mr+80|0;Cp=mr+76|0;Fq=mr+72|0;yp=mr+68|0;Bp=mr+64|0;sp=mr+60|0;Lp=mr+56|0;Nq=mr+52|0;Qq=mr+48|0;Rq=mr+44|0;Sq=mr+40|0;Mp=mr+36|0;Np=mr+32|0;Qp=mr+28|0;sq=mr+24|0;Bq=mr+20|0;Iq=mr+16|0;Jq=mr+12|0;Kq=mr+8|0;tq=mr+4|0;uq=mr;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[nr>>2]=k;c[u>>2]=l;c[v>>2]=m;g[mr+3988>>2]=1.0282055139541626;g[mr+3984>>2]=1.7154572010040283;g[mr+3980>>2]=1.606415033340454;g[mr+3976>>2]=1.1913986206054688;g[mr+3972>>2]=1.9400625228881836;g[mr+3968>>2]=.48596036434173584;g[mr+3964>>2]=.2934609353542328;g[mr+3960>>2]=1.9783530235290527;g[mr+3956>>2]=.8314695954322815;g[mr+3952>>2]=.5555702447891235;g[mr+3948>>2]=.8551101684570312;g[mr+3944>>2]=1.807978630065918;g[mr+3940>>2]=1.4819022417068481;g[mr+3936>>2]=1.3431179523468018;g[mr+3932>>2]=1.8830881118774414;g[mr+3928>>2]=.6737797260284424;g[mr+3924>>2]=.09813535213470459;g[mr+3920>>2]=1.9975908994674683;g[mr+3916>>2]=.9807852506637573;g[mr+3912>>2]=.19509032368659973;g[mr+3908>>2]=.580569326877594;g[mr+3904>>2]=1.913880705833435;g[mr+3900>>2]=.9427934885025024;g[mr+3896>>2]=1.7638425827026367;g[mr+3892>>2]=1.111140489578247;g[mr+3888>>2]=1.662939190864563;g[mr+3884>>2]=1.2687865495681763;g[mr+3880>>2]=1.5460208654403687;g[mr+3876>>2]=.1960342824459076;g[mr+3872>>2]=1.990369439125061;g[mr+3868>>2]=.39018064737319946;g[mr+3864>>2]=1.9615705013275146;g[mr+3860>>2]=.3826834261417389;g[mr+3856>>2]=.9238795042037964;g[mr+3852>>2]=.7071067690849304;g[mr+3848>>2]=.7653668522834778;g[mr+3844>>2]=1.8477590084075928;g[mr+3840>>2]=1.4142135381698608;g[mr+3836>>2]=2.0;c[lr>>2]=c[nr>>2];while(1){if((c[lr>>2]|0)<=0)break;g[Xc>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<5<<2)>>2];g[ee>>2]=+g[Xc>>2]*2.0;g[Gi>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<5<<2)>>2];g[Hi>>2]=+g[Gi>>2]*2.0;g[w>>2]=+g[c[p>>2]>>2];g[Fa>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<6<<2)>>2];g[Ob>>2]=+g[w>>2]+ +g[Fa>>2];g[Fi>>2]=+g[w>>2]-+g[Fa>>2];g[xg>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<4<<2)>>2];g[Gh>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*48<<2)>>2];g[Pi>>2]=(+g[xg>>2]+ +g[Gh>>2])*2.0;g[Ji>>2]=+g[xg>>2]-+g[Gh>>2];g[Na>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<4<<2)>>2];g[Oa>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*48<<2)>>2];g[Pa>>2]=(+g[Na>>2]-+g[Oa>>2])*2.0;g[Ki>>2]=+g[Na>>2]+ +g[Oa>>2];g[of>>2]=+g[Ob>>2]+ +g[ee>>2];g[wk>>2]=+g[of>>2]+ +g[Pi>>2];g[ng>>2]=+g[of>>2]-+g[Pi>>2];g[Ma>>2]=+g[Ob>>2]-+g[ee>>2];g[ob>>2]=+g[Ma>>2]-+g[Pa>>2];g[Ze>>2]=+g[Ma>>2]+ +g[Pa>>2];g[Ii>>2]=+g[Fi>>2]-+g[Hi>>2];g[Li>>2]=(+g[Ji>>2]-+g[Ki>>2])*1.4142135381698608;g[Mi>>2]=+g[Ii>>2]+ +g[Li>>2];g[Lm>>2]=+g[Ii>>2]-+g[Li>>2];g[an>>2]=+g[Fi>>2]+ +g[Hi>>2];g[bn>>2]=(+g[Ji>>2]+ +g[Ki>>2])*1.4142135381698608;g[cn>>2]=+g[an>>2]-+g[bn>>2];g[cq>>2]=+g[an>>2]+ +g[bn>>2];g[Fl>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2];g[Om>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*56<<2)>>2];g[Yn>>2]=+g[Fl>>2]+ +g[Om>>2];g[Ni>>2]=+g[Fl>>2]-+g[Om>>2];g[qb>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2];g[rb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*56<<2)>>2];g[sb>>2]=+g[qb>>2]-+g[rb>>2];g[Th>>2]=+g[qb>>2]+ +g[rb>>2];g[gp>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*40<<2)>>2];g[pq>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*24<<2)>>2];g[Tq>>2]=+g[gp>>2]+ +g[pq>>2];g[Sh>>2]=+g[gp>>2]-+g[pq>>2];g[tb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*40<<2)>>2];g[ub>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*24<<2)>>2];g[vb>>2]=+g[tb>>2]-+g[ub>>2];g[Oi>>2]=+g[tb>>2]+ +g[ub>>2];g[Uq>>2]=(+g[Yn>>2]+ +g[Tq>>2])*2.0;g[og>>2]=(+g[vb>>2]+ +g[sb>>2])*2.0;g[pb>>2]=+g[Yn>>2]-+g[Tq>>2];g[wb>>2]=+g[sb>>2]-+g[vb>>2];g[xb>>2]=(+g[pb>>2]-+g[wb>>2])*1.4142135381698608;g[_e>>2]=(+g[pb>>2]+ +g[wb>>2])*1.4142135381698608;g[Rh>>2]=+g[Ni>>2]-+g[Oi>>2];g[Uh>>2]=+g[Sh>>2]+ +g[Th>>2];g[Vh>>2]=+g[Rh>>2]*1.8477590084075928-+g[Uh>>2]*.7653668522834778;g[Mm>>2]=+g[Rh>>2]*.7653668522834778+ +g[Uh>>2]*1.8477590084075928;g[dn>>2]=+g[Ni>>2]+ +g[Oi>>2];g[en>>2]=+g[Th>>2]-+g[Sh>>2];g[fn>>2]=+g[dn>>2]*.7653668522834778-+g[en>>2]*1.8477590084075928;g[dq>>2]=+g[dn>>2]*1.8477590084075928+ +g[en>>2]*.7653668522834778;g[Wq>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[Xq>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*60<<2)>>2];g[Yq>>2]=+g[Wq>>2]+ +g[Xq>>2];g[Xh>>2]=+g[Wq>>2]-+g[Xq>>2];g[Jb>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[Kb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*60<<2)>>2];g[Lb>>2]=+g[Jb>>2]-+g[Kb>>2];g[ki>>2]=+g[Jb>>2]+ +g[Kb>>2];g[Zq>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*36<<2)>>2];g[_q>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*28<<2)>>2];g[$q>>2]=+g[Zq>>2]+ +g[_q>>2];g[ji>>2]=+g[Zq>>2]-+g[_q>>2];g[Mb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*36<<2)>>2];g[Nb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*28<<2)>>2];g[Qa>>2]=+g[Mb>>2]-+g[Nb>>2];g[Yh>>2]=+g[Mb>>2]+ +g[Nb>>2];g[er>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*12<<2)>>2];g[fr>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*52<<2)>>2];g[bi>>2]=+g[er>>2]-+g[fr>>2];g[Ab>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*52<<2)>>2];g[Bb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*12<<2)>>2];g[ci>>2]=+g[Bb>>2]+ +g[Ab>>2];g[gr>>2]=+g[er>>2]+ +g[fr>>2];g[hi>>2]=+g[bi>>2]+ +g[ci>>2];g[Cb>>2]=+g[Ab>>2]-+g[Bb>>2];g[di>>2]=+g[bi>>2]-+g[ci>>2];g[br>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*20<<2)>>2];g[cr>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*44<<2)>>2];g[_h>>2]=+g[br>>2]-+g[cr>>2];g[Db>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*20<<2)>>2];g[Eb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*44<<2)>>2];g[$h>>2]=+g[Db>>2]+ +g[Eb>>2];g[dr>>2]=+g[br>>2]+ +g[cr>>2];g[gi>>2]=+g[_h>>2]+ +g[$h>>2];g[Fb>>2]=+g[Db>>2]-+g[Eb>>2];g[ai>>2]=+g[_h>>2]-+g[$h>>2];g[ar>>2]=+g[Yq>>2]+ +g[$q>>2];g[hr>>2]=+g[dr>>2]+ +g[gr>>2];g[ir>>2]=(+g[ar>>2]+ +g[hr>>2])*2.0;g[qg>>2]=+g[ar>>2]-+g[hr>>2];g[hn>>2]=+g[Xh>>2]+ +g[Yh>>2];g[jn>>2]=(+g[gi>>2]+ +g[hi>>2])*.7071067690849304;g[kn>>2]=+g[hn>>2]-+g[jn>>2];g[fq>>2]=+g[hn>>2]+ +g[jn>>2];g[ln>>2]=(+g[ai>>2]-+g[di>>2])*.7071067690849304;g[mn>>2]=+g[ki>>2]-+g[ji>>2];g[nn>>2]=+g[ln>>2]+ +g[mn>>2];g[gq>>2]=+g[mn>>2]-+g[ln>>2];g[zb>>2]=+g[Yq>>2]-+g[$q>>2];g[Gb>>2]=+g[Cb>>2]-+g[Fb>>2];g[Hb>>2]=+g[zb>>2]+ +g[Gb>>2];g[af>>2]=+g[zb>>2]-+g[Gb>>2];g[Ib>>2]=+g[dr>>2]-+g[gr>>2];g[Ra>>2]=+g[Lb>>2]-+g[Qa>>2];g[Sa>>2]=+g[Ib>>2]+ +g[Ra>>2];g[bf>>2]=+g[Ra>>2]-+g[Ib>>2];g[Zh>>2]=+g[Xh>>2]-+g[Yh>>2];g[ei>>2]=(+g[ai>>2]+ +g[di>>2])*.7071067690849304;g[fi>>2]=+g[Zh>>2]+ +g[ei>>2];g[Ql>>2]=+g[Zh>>2]-+g[ei>>2];g[ii>>2]=(+g[gi>>2]-+g[hi>>2])*.7071067690849304;g[li>>2]=+g[ji>>2]+ +g[ki>>2];g[mi>>2]=+g[ii>>2]+ +g[li>>2];g[Rl>>2]=+g[li>>2]-+g[ii>>2];g[rg>>2]=+g[Qa>>2]+ +g[Lb>>2];g[sg>>2]=+g[Fb>>2]+ +g[Cb>>2];g[tg>>2]=+g[rg>>2]-+g[sg>>2];g[Tg>>2]=(+g[sg>>2]+ +g[rg>>2])*2.0;g[C>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[D>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*63<<2)>>2];g[E>>2]=+g[C>>2]+ +g[D>>2];g[cj>>2]=+g[C>>2]-+g[D>>2];g[hd>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[id>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*63<<2)>>2];g[jd>>2]=+g[hd>>2]-+g[id>>2];g[_k>>2]=+g[hd>>2]+ +g[id>>2];g[F>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*33<<2)>>2];g[G>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*31<<2)>>2];g[fa>>2]=+g[F>>2]+ +g[G>>2];g[Zk>>2]=+g[F>>2]-+g[G>>2];g[kd>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*33<<2)>>2];g[ld>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*31<<2)>>2];g[md>>2]=+g[kd>>2]-+g[ld>>2];g[dj>>2]=+g[kd>>2]+ +g[ld>>2];g[ka>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*15<<2)>>2];g[la>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*49<<2)>>2];g[ij>>2]=+g[ka>>2]-+g[la>>2];g[Lc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*49<<2)>>2];g[Mc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*15<<2)>>2];g[jj>>2]=+g[Mc>>2]+ +g[Lc>>2];g[ma>>2]=+g[ka>>2]+ +g[la>>2];g[Xk>>2]=+g[ij>>2]+ +g[jj>>2];g[Nc>>2]=+g[Lc>>2]-+g[Mc>>2];g[kj>>2]=+g[ij>>2]-+g[jj>>2];g[ha>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*17<<2)>>2];g[ia>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*47<<2)>>2];g[fj>>2]=+g[ha>>2]-+g[ia>>2];g[Oc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*17<<2)>>2];g[Pc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*47<<2)>>2];g[gj>>2]=+g[Oc>>2]+ +g[Pc>>2];g[ja>>2]=+g[ha>>2]+ +g[ia>>2];g[Wk>>2]=+g[fj>>2]+ +g[gj>>2];g[Qc>>2]=+g[Oc>>2]-+g[Pc>>2];g[hj>>2]=+g[fj>>2]-+g[gj>>2];g[ga>>2]=+g[E>>2]+ +g[fa>>2];g[na>>2]=+g[ja>>2]+ +g[ma>>2];g[oa>>2]=+g[ga>>2]+ +g[na>>2];g[Kf>>2]=+g[ga>>2]-+g[na>>2];g[Ko>>2]=+g[cj>>2]+ +g[dj>>2];g[Lo>>2]=(+g[Wk>>2]+ +g[Xk>>2])*.7071067690849304;g[Mo>>2]=+g[Ko>>2]-+g[Lo>>2];g[tp>>2]=+g[Ko>>2]+ +g[Lo>>2];g[ro>>2]=(+g[hj>>2]-+g[kj>>2])*.7071067690849304;g[so>>2]=+g[_k>>2]-+g[Zk>>2];g[to>>2]=+g[ro>>2]+ +g[so>>2];g[Ip>>2]=+g[so>>2]-+g[ro>>2];g[Kc>>2]=+g[E>>2]-+g[fa>>2];g[Rc>>2]=+g[Nc>>2]-+g[Qc>>2];g[Sc>>2]=+g[Kc>>2]+ +g[Rc>>2];g[nf>>2]=+g[Kc>>2]-+g[Rc>>2];g[gd>>2]=+g[ja>>2]-+g[ma>>2];g[nd>>2]=+g[jd>>2]-+g[md>>2];g[od>>2]=+g[gd>>2]+ +g[nd>>2];g[De>>2]=+g[nd>>2]-+g[gd>>2];g[ej>>2]=+g[cj>>2]-+g[dj>>2];g[lj>>2]=(+g[hj>>2]+ +g[kj>>2])*.7071067690849304;g[mj>>2]=+g[ej>>2]+ +g[lj>>2];g[am>>2]=+g[ej>>2]-+g[lj>>2];g[Yk>>2]=(+g[Wk>>2]-+g[Xk>>2])*.7071067690849304;g[$k>>2]=+g[Zk>>2]+ +g[_k>>2];g[al>>2]=+g[Yk>>2]+ +g[$k>>2];g[Qm>>2]=+g[$k>>2]-+g[Yk>>2];g[Eg>>2]=+g[md>>2]+ +g[jd>>2];g[Fg>>2]=+g[Qc>>2]+ +g[Nc>>2];g[Gg>>2]=+g[Eg>>2]-+g[Fg>>2];g[fh>>2]=+g[Fg>>2]+ +g[Eg>>2];g[kr>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[Yj>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*62<<2)>>2];g[Zj>>2]=+g[kr>>2]+ +g[Yj>>2];g[Qi>>2]=+g[kr>>2]-+g[Yj>>2];g[zc>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[Ac>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*62<<2)>>2];g[Bc>>2]=+g[zc>>2]-+g[Ac>>2];g[Wj>>2]=+g[zc>>2]+ +g[Ac>>2];g[_j>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*34<<2)>>2];g[$j>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*30<<2)>>2];g[ak>>2]=+g[_j>>2]+ +g[$j>>2];g[Vj>>2]=+g[_j>>2]-+g[$j>>2];g[Cc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*34<<2)>>2];g[Dc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*30<<2)>>2];g[Ec>>2]=+g[Cc>>2]-+g[Dc>>2];g[Ri>>2]=+g[Cc>>2]+ +g[Dc>>2];g[fk>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*14<<2)>>2];g[gk>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*50<<2)>>2];g[Wi>>2]=+g[fk>>2]-+g[gk>>2];g[Wa>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*50<<2)>>2];g[Xa>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*14<<2)>>2];g[Xi>>2]=+g[Xa>>2]+ +g[Wa>>2];g[hk>>2]=+g[fk>>2]+ +g[gk>>2];g[Tj>>2]=+g[Wi>>2]+ +g[Xi>>2];g[Ya>>2]=+g[Wa>>2]-+g[Xa>>2];g[Yi>>2]=+g[Wi>>2]-+g[Xi>>2];g[ck>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*18<<2)>>2];g[dk>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*46<<2)>>2];g[Ti>>2]=+g[ck>>2]-+g[dk>>2];g[Za>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*18<<2)>>2];g[_a>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*46<<2)>>2];g[Ui>>2]=+g[Za>>2]+ +g[_a>>2];g[ek>>2]=+g[ck>>2]+ +g[dk>>2];g[Sj>>2]=+g[Ti>>2]+ +g[Ui>>2];g[$a>>2]=+g[Za>>2]-+g[_a>>2];g[Vi>>2]=+g[Ti>>2]-+g[Ui>>2];g[bk>>2]=+g[Zj>>2]+ +g[ak>>2];g[ik>>2]=+g[ek>>2]+ +g[hk>>2];g[jk>>2]=+g[bk>>2]+ +g[ik>>2];g[wg>>2]=+g[bk>>2]-+g[ik>>2];g[qn>>2]=+g[Qi>>2]+ +g[Ri>>2];g[rn>>2]=(+g[Sj>>2]+ +g[Tj>>2])*.7071067690849304;g[sn>>2]=+g[qn>>2]-+g[rn>>2];g[jq>>2]=+g[qn>>2]+ +g[rn>>2];g[eo>>2]=(+g[Vi>>2]-+g[Yi>>2])*.7071067690849304;g[fo>>2]=+g[Wj>>2]-+g[Vj>>2];g[go>>2]=+g[eo>>2]+ +g[fo>>2];g[nq>>2]=+g[fo>>2]-+g[eo>>2];g[Va>>2]=+g[Zj>>2]-+g[ak>>2];g[ab>>2]=+g[Ya>>2]-+g[$a>>2];g[bb>>2]=+g[Va>>2]+ +g[ab>>2];g[ef>>2]=+g[Va>>2]-+g[ab>>2];g[yc>>2]=+g[ek>>2]-+g[hk>>2];g[Fc>>2]=+g[Bc>>2]-+g[Ec>>2];g[Gc>>2]=+g[yc>>2]+ +g[Fc>>2];g[jf>>2]=+g[Fc>>2]-+g[yc>>2];g[Si>>2]=+g[Qi>>2]-+g[Ri>>2];g[Zi>>2]=(+g[Vi>>2]+ +g[Yi>>2])*.7071067690849304;g[yj>>2]=+g[Si>>2]+ +g[Zi>>2];g[Ul>>2]=+g[Si>>2]-+g[Zi>>2];g[Uj>>2]=(+g[Sj>>2]-+g[Tj>>2])*.7071067690849304;g[Xj>>2]=+g[Vj>>2]+ +g[Wj>>2];g[_i>>2]=+g[Uj>>2]+ +g[Xj>>2];g[Yl>>2]=+g[Xj>>2]-+g[Uj>>2];g[Ef>>2]=+g[Ec>>2]+ +g[Bc>>2];g[Ff>>2]=+g[$a>>2]+ +g[Ya>>2];g[Gf>>2]=+g[Ef>>2]-+g[Ff>>2];g[Wg>>2]=+g[Ff>>2]+ +g[Ef>>2];g[H>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[I>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*59<<2)>>2];g[J>>2]=+g[H>>2]+ +g[I>>2];g[K>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*37<<2)>>2];g[L>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*27<<2)>>2];g[M>>2]=+g[K>>2]+ +g[L>>2];g[N>>2]=+g[J>>2]+ +g[M>>2];g[ol>>2]=+g[K>>2]-+g[L>>2];g[nc>>2]=+g[J>>2]-+g[M>>2];g[Ck>>2]=+g[H>>2]-+g[I>>2];g[Yc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[Zc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*59<<2)>>2];g[_c>>2]=+g[Yc>>2]-+g[Zc>>2];g[$c>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*37<<2)>>2];g[ad>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*27<<2)>>2];g[bd>>2]=+g[$c>>2]-+g[ad>>2];g[cd>>2]=+g[_c>>2]-+g[bd>>2];g[pl>>2]=+g[Yc>>2]+ +g[Zc>>2];g[Qf>>2]=+g[bd>>2]+ +g[_c>>2];g[Dk>>2]=+g[$c>>2]+ +g[ad>>2];g[O>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*21<<2)>>2];g[P>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*43<<2)>>2];g[Q>>2]=+g[O>>2]+ +g[P>>2];g[Fk>>2]=+g[O>>2]-+g[P>>2];g[rc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*21<<2)>>2];g[sc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*43<<2)>>2];g[tc>>2]=+g[rc>>2]-+g[sc>>2];g[Gk>>2]=+g[rc>>2]+ +g[sc>>2];g[R>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*11<<2)>>2];g[S>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*53<<2)>>2];g[T>>2]=+g[R>>2]+ +g[S>>2];g[gl>>2]=+g[R>>2]-+g[S>>2];g[oc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*53<<2)>>2];g[pc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*11<<2)>>2];g[qc>>2]=+g[oc>>2]-+g[pc>>2];g[hl>>2]=+g[pc>>2]+ +g[oc>>2];g[U>>2]=+g[Q>>2]+ +g[T>>2];g[ll>>2]=+g[Fk>>2]+ +g[Gk>>2];g[ml>>2]=+g[gl>>2]+ +g[hl>>2];g[uc>>2]=+g[qc>>2]-+g[tc>>2];g[wc>>2]=+g[Q>>2]-+g[T>>2];g[fl>>2]=+g[Fk>>2]-+g[Gk>>2];g[il>>2]=+g[gl>>2]-+g[hl>>2];g[Rf>>2]=+g[tc>>2]+ +g[qc>>2];g[V>>2]=+g[N>>2]+ +g[U>>2];g[bh>>2]=+g[Rf>>2]+ +g[Qf>>2];g[vc>>2]=+g[nc>>2]+ +g[uc>>2];g[dd>>2]=+g[wc>>2]+ +g[cd>>2];g[ed>>2]=+g[vc>>2]*.9238795042037964-+g[dd>>2]*.3826834261417389;g[_d>>2]=+g[vc>>2]*.3826834261417389+ +g[dd>>2]*.9238795042037964;g[Yo>>2]=(+g[fl>>2]-+g[il>>2])*.7071067690849304;g[Zo>>2]=+g[pl>>2]-+g[ol>>2];g[_o>>2]=+g[Yo>>2]+ +g[Zo>>2];g[xp>>2]=+g[Zo>>2]-+g[Yo>>2];g[re>>2]=+g[nc>>2]-+g[uc>>2];g[se>>2]=+g[cd>>2]-+g[wc>>2];g[te>>2]=+g[re>>2]*.3826834261417389-+g[se>>2]*.9238795042037964;g[ze>>2]=+g[re>>2]*.9238795042037964+ +g[se>>2]*.3826834261417389;g[Ek>>2]=+g[Ck>>2]-+g[Dk>>2];g[jl>>2]=(+g[fl>>2]+ +g[il>>2])*.7071067690849304;g[kl>>2]=+g[Ek>>2]+ +g[jl>>2];g[dm>>2]=+g[Ek>>2]-+g[jl>>2];g[Vo>>2]=+g[Ck>>2]+ +g[Dk>>2];g[Wo>>2]=(+g[ll>>2]+ +g[ml>>2])*.7071067690849304;g[Xo>>2]=+g[Vo>>2]-+g[Wo>>2];g[wp>>2]=+g[Vo>>2]+ +g[Wo>>2];g[Pf>>2]=+g[N>>2]-+g[U>>2];g[Sf>>2]=+g[Qf>>2]-+g[Rf>>2];g[Tf>>2]=+g[Pf>>2]-+g[Sf>>2];g[Ag>>2]=+g[Pf>>2]+ +g[Sf>>2];g[nl>>2]=(+g[ll>>2]-+g[ml>>2])*.7071067690849304;g[ql>>2]=+g[ol>>2]+ +g[pl>>2];g[rl>>2]=+g[nl>>2]+ +g[ql>>2];g[em>>2]=+g[ql>>2]-+g[nl>>2];g[W>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[X>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*61<<2)>>2];g[Y>>2]=+g[W>>2]+ +g[X>>2];g[Z>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*29<<2)>>2];g[_>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*35<<2)>>2];g[$>>2]=+g[Z>>2]+ +g[_>>2];g[aa>>2]=+g[Y>>2]+ +g[$>>2];g[Jk>>2]=+g[Z>>2]-+g[_>>2];g[fd>>2]=+g[Y>>2]-+g[$>>2];g[tl>>2]=+g[W>>2]-+g[X>>2];g[Pd>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*61<<2)>>2];g[Qd>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[Rd>>2]=+g[Pd>>2]-+g[Qd>>2];g[Sd>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*29<<2)>>2];g[Td>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*35<<2)>>2];g[Ud>>2]=+g[Sd>>2]-+g[Td>>2];g[Vd>>2]=+g[Rd>>2]-+g[Ud>>2];g[Kk>>2]=+g[Qd>>2]+ +g[Pd>>2];g[Vf>>2]=+g[Ud>>2]+ +g[Rd>>2];g[ul>>2]=+g[Sd>>2]+ +g[Td>>2];g[ba>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*13<<2)>>2];g[ca>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*51<<2)>>2];g[da>>2]=+g[ba>>2]+ +g[ca>>2];g[wl>>2]=+g[ba>>2]-+g[ca>>2];g[Jd>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*13<<2)>>2];g[Kd>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*51<<2)>>2];g[Ld>>2]=+g[Jd>>2]-+g[Kd>>2];g[xl>>2]=+g[Jd>>2]+ +g[Kd>>2];g[ea>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*19<<2)>>2];g[Ga>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*45<<2)>>2];g[Ha>>2]=+g[ea>>2]+ +g[Ga>>2];g[zl>>2]=+g[ea>>2]-+g[Ga>>2];g[Gd>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*45<<2)>>2];g[Hd>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*19<<2)>>2];g[Id>>2]=+g[Gd>>2]-+g[Hd>>2];g[Al>>2]=+g[Hd>>2]+ +g[Gd>>2];g[Ia>>2]=+g[da>>2]+ +g[Ha>>2];g[El>>2]=+g[wl>>2]+ +g[xl>>2];g[Hk>>2]=+g[zl>>2]+ +g[Al>>2];g[Md>>2]=+g[Id>>2]-+g[Ld>>2];g[Od>>2]=+g[da>>2]-+g[Ha>>2];g[yl>>2]=+g[wl>>2]-+g[xl>>2];g[Bl>>2]=+g[zl>>2]-+g[Al>>2];g[Wf>>2]=+g[Ld>>2]+ +g[Id>>2];g[Ja>>2]=+g[aa>>2]+ +g[Ia>>2];g[ah>>2]=+g[Wf>>2]+ +g[Vf>>2];g[Nd>>2]=+g[fd>>2]+ +g[Md>>2];g[Wd>>2]=+g[Od>>2]+ +g[Vd>>2];g[Xd>>2]=+g[Nd>>2]*.9238795042037964+ +g[Wd>>2]*.3826834261417389;g[$d>>2]=+g[Wd>>2]*.9238795042037964-+g[Nd>>2]*.3826834261417389;g[dp>>2]=(+g[yl>>2]-+g[Bl>>2])*.7071067690849304;g[ep>>2]=+g[Jk>>2]+ +g[Kk>>2];g[fp>>2]=+g[dp>>2]-+g[ep>>2];g[Ap>>2]=+g[dp>>2]+ +g[ep>>2];g[ue>>2]=+g[fd>>2]-+g[Md>>2];g[ve>>2]=+g[Vd>>2]-+g[Od>>2];g[we>>2]=+g[ue>>2]*.3826834261417389+ +g[ve>>2]*.9238795042037964;g[Ae>>2]=+g[ve>>2]*.3826834261417389-+g[ue>>2]*.9238795042037964;g[vl>>2]=+g[tl>>2]-+g[ul>>2];g[Cl>>2]=(+g[yl>>2]+ +g[Bl>>2])*.7071067690849304;g[Dl>>2]=+g[vl>>2]+ +g[Cl>>2];g[gm>>2]=+g[vl>>2]-+g[Cl>>2];g[ap>>2]=+g[tl>>2]+ +g[ul>>2];g[bp>>2]=(+g[El>>2]+ +g[Hk>>2])*.7071067690849304;g[cp>>2]=+g[ap>>2]-+g[bp>>2];g[zp>>2]=+g[ap>>2]+ +g[bp>>2];g[Uf>>2]=+g[aa>>2]-+g[Ia>>2];g[Xf>>2]=+g[Vf>>2]-+g[Wf>>2];g[Yf>>2]=+g[Uf>>2]+ +g[Xf>>2];g[Bg>>2]=+g[Xf>>2]-+g[Uf>>2];g[Ik>>2]=(+g[El>>2]-+g[Hk>>2])*.7071067690849304;g[Lk>>2]=+g[Jk>>2]-+g[Kk>>2];g[Mk>>2]=+g[Ik>>2]+ +g[Lk>>2];g[hm>>2]=+g[Lk>>2]-+g[Ik>>2];g[pa>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2];g[qa>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*55<<2)>>2];g[ra>>2]=+g[pa>>2]+ +g[qa>>2];g[nj>>2]=+g[pa>>2]-+g[qa>>2];g[Uc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2];g[Vc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*55<<2)>>2];g[Wc>>2]=+g[Uc>>2]-+g[Vc>>2];g[rj>>2]=+g[Uc>>2]+ +g[Vc>>2];g[sa>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*41<<2)>>2];g[ta>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*23<<2)>>2];g[ua>>2]=+g[sa>>2]+ +g[ta>>2];g[qj>>2]=+g[sa>>2]-+g[ta>>2];g[Zb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*41<<2)>>2];g[_b>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*23<<2)>>2];g[$b>>2]=+g[Zb>>2]-+g[_b>>2];g[oj>>2]=+g[Zb>>2]+ +g[_b>>2];g[Tc>>2]=+g[ra>>2]-+g[ua>>2];g[ac>>2]=+g[Wc>>2]-+g[$b>>2];g[Oo>>2]=+g[rj>>2]-+g[qj>>2];g[No>>2]=+g[nj>>2]+ +g[oj>>2];g[sj>>2]=+g[qj>>2]+ +g[rj>>2];g[pj>>2]=+g[nj>>2]-+g[oj>>2];g[wa>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2];g[xa>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*57<<2)>>2];g[ya>>2]=+g[wa>>2]+ +g[xa>>2];g[uj>>2]=+g[wa>>2]-+g[xa>>2];g[dc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*57<<2)>>2];g[ec>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2];g[fc>>2]=+g[dc>>2]-+g[ec>>2];g[xk>>2]=+g[ec>>2]+ +g[dc>>2];g[za>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*25<<2)>>2];g[Aa>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*39<<2)>>2];g[Ba>>2]=+g[za>>2]+ +g[Aa>>2];g[xj>>2]=+g[za>>2]-+g[Aa>>2];g[gc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*25<<2)>>2];g[hc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*39<<2)>>2];g[ic>>2]=+g[gc>>2]-+g[hc>>2];g[vj>>2]=+g[gc>>2]+ +g[hc>>2];g[cc>>2]=+g[ya>>2]-+g[Ba>>2];g[jc>>2]=+g[fc>>2]-+g[ic>>2];g[Ro>>2]=+g[xj>>2]+ +g[xk>>2];g[Qo>>2]=+g[uj>>2]+ +g[vj>>2];g[yk>>2]=+g[xj>>2]-+g[xk>>2];g[wj>>2]=+g[uj>>2]-+g[vj>>2];g[va>>2]=+g[ra>>2]+ +g[ua>>2];g[Ca>>2]=+g[ya>>2]+ +g[Ba>>2];g[Da>>2]=+g[va>>2]+ +g[Ca>>2];g[Dg>>2]=+g[va>>2]-+g[Ca>>2];g[Po>>2]=+g[No>>2]*.3826834261417389-+g[Oo>>2]*.9238795042037964;g[So>>2]=+g[Qo>>2]*.3826834261417389-+g[Ro>>2]*.9238795042037964;g[To>>2]=+g[Po>>2]+ +g[So>>2];g[Hp>>2]=+g[Po>>2]-+g[So>>2];g[oo>>2]=+g[No>>2]*.9238795042037964+ +g[Oo>>2]*.3826834261417389;g[po>>2]=+g[Qo>>2]*.9238795042037964+ +g[Ro>>2]*.3826834261417389;g[qo>>2]=+g[oo>>2]-+g[po>>2];g[up>>2]=+g[oo>>2]+ +g[po>>2];g[bc>>2]=+g[Tc>>2]-+g[ac>>2];g[kc>>2]=+g[cc>>2]+ +g[jc>>2];g[lc>>2]=(+g[bc>>2]+ +g[kc>>2])*.7071067690849304;g[Ce>>2]=(+g[bc>>2]-+g[kc>>2])*.7071067690849304;g[be>>2]=+g[Tc>>2]+ +g[ac>>2];g[ce>>2]=+g[jc>>2]-+g[cc>>2];g[de>>2]=(+g[be>>2]+ +g[ce>>2])*.7071067690849304;g[pe>>2]=(+g[ce>>2]-+g[be>>2])*.7071067690849304;g[tj>>2]=+g[pj>>2]*.9238795042037964-+g[sj>>2]*.3826834261417389;g[zk>>2]=+g[wj>>2]*.9238795042037964+ +g[yk>>2]*.3826834261417389;g[Ak>>2]=+g[tj>>2]+ +g[zk>>2];g[Pm>>2]=+g[tj>>2]-+g[zk>>2];g[Tk>>2]=+g[pj>>2]*.3826834261417389+ +g[sj>>2]*.9238795042037964;g[Uk>>2]=+g[yk>>2]*.9238795042037964-+g[wj>>2]*.3826834261417389;g[Vk>>2]=+g[Tk>>2]+ +g[Uk>>2];g[bm>>2]=+g[Uk>>2]-+g[Tk>>2];g[Lf>>2]=+g[ic>>2]+ +g[fc>>2];g[Mf>>2]=+g[$b>>2]+ +g[Wc>>2];g[Nf>>2]=+g[Lf>>2]-+g[Mf>>2];g[Hh>>2]=+g[Mf>>2]+ +g[Lf>>2];g[kk>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*10<<2)>>2];g[lk>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*54<<2)>>2];g[mk>>2]=+g[kk>>2]+ +g[lk>>2];g[zj>>2]=+g[kk>>2]-+g[lk>>2];g[db>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*10<<2)>>2];g[eb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*54<<2)>>2];g[fb>>2]=+g[db>>2]-+g[eb>>2];g[Dj>>2]=+g[db>>2]+ +g[eb>>2];g[nk>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*42<<2)>>2];g[ok>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*22<<2)>>2];g[pk>>2]=+g[nk>>2]+ +g[ok>>2];g[Cj>>2]=+g[nk>>2]-+g[ok>>2];g[gb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*42<<2)>>2];g[hb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*22<<2)>>2];g[ib>>2]=+g[gb>>2]-+g[hb>>2];g[Aj>>2]=+g[gb>>2]+ +g[hb>>2];g[cb>>2]=+g[mk>>2]-+g[pk>>2];g[jb>>2]=+g[fb>>2]-+g[ib>>2];g[un>>2]=+g[Dj>>2]-+g[Cj>>2];g[tn>>2]=+g[zj>>2]+ +g[Aj>>2];g[Ej>>2]=+g[Cj>>2]+ +g[Dj>>2];g[Bj>>2]=+g[zj>>2]-+g[Aj>>2];g[rk>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2];g[sk>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*58<<2)>>2];g[tk>>2]=+g[rk>>2]+ +g[sk>>2];g[Gj>>2]=+g[rk>>2]-+g[sk>>2];g[mb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*58<<2)>>2];g[nb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2];g[Pb>>2]=+g[mb>>2]-+g[nb>>2];g[Kj>>2]=+g[nb>>2]+ +g[mb>>2];g[uk>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*26<<2)>>2];g[vk>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*38<<2)>>2];g[x>>2]=+g[uk>>2]+ +g[vk>>2];g[Jj>>2]=+g[uk>>2]-+g[vk>>2];g[Qb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*26<<2)>>2];g[Rb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*38<<2)>>2];g[Sb>>2]=+g[Qb>>2]-+g[Rb>>2];g[Hj>>2]=+g[Qb>>2]+ +g[Rb>>2];g[lb>>2]=+g[tk>>2]-+g[x>>2];g[Tb>>2]=+g[Pb>>2]-+g[Sb>>2];g[xn>>2]=+g[Jj>>2]+ +g[Kj>>2];g[wn>>2]=+g[Gj>>2]+ +g[Hj>>2];g[Lj>>2]=+g[Jj>>2]-+g[Kj>>2];g[Ij>>2]=+g[Gj>>2]-+g[Hj>>2];g[qk>>2]=+g[mk>>2]+ +g[pk>>2];g[y>>2]=+g[tk>>2]+ +g[x>>2];g[z>>2]=+g[qk>>2]+ +g[y>>2];g[Df>>2]=+g[qk>>2]-+g[y>>2];g[vn>>2]=+g[tn>>2]*.3826834261417389-+g[un>>2]*.9238795042037964;g[Zn>>2]=+g[wn>>2]*.3826834261417389-+g[xn>>2]*.9238795042037964;g[_n>>2]=+g[vn>>2]+ +g[Zn>>2];g[mq>>2]=+g[vn>>2]-+g[Zn>>2];g[ao>>2]=+g[tn>>2]*.9238795042037964+ +g[un>>2]*.3826834261417389;g[bo>>2]=+g[wn>>2]*.9238795042037964+ +g[xn>>2]*.3826834261417389;g[co>>2]=+g[ao>>2]-+g[bo>>2];g[kq>>2]=+g[ao>>2]+ +g[bo>>2];g[kb>>2]=+g[cb>>2]-+g[jb>>2];g[Ub>>2]=+g[lb>>2]+ +g[Tb>>2];g[Vb>>2]=(+g[kb>>2]+ +g[Ub>>2])*.7071067690849304;g[hf>>2]=(+g[kb>>2]-+g[Ub>>2])*.7071067690849304;g[Xb>>2]=+g[cb>>2]+ +g[jb>>2];g[Yb>>2]=+g[Tb>>2]-+g[lb>>2];g[xc>>2]=(+g[Xb>>2]+ +g[Yb>>2])*.7071067690849304;g[ff>>2]=(+g[Yb>>2]-+g[Xb>>2])*.7071067690849304;g[Fj>>2]=+g[Bj>>2]*.9238795042037964-+g[Ej>>2]*.3826834261417389;g[Mj>>2]=+g[Ij>>2]*.9238795042037964+ +g[Lj>>2]*.3826834261417389;g[Nj>>2]=+g[Fj>>2]+ +g[Mj>>2];g[Xl>>2]=+g[Fj>>2]-+g[Mj>>2];g[Pj>>2]=+g[Bj>>2]*.3826834261417389+ +g[Ej>>2]*.9238795042037964;g[Qj>>2]=+g[Lj>>2]*.9238795042037964-+g[Ij>>2]*.3826834261417389;g[Rj>>2]=+g[Pj>>2]+ +g[Qj>>2];g[Vl>>2]=+g[Qj>>2]-+g[Pj>>2];g[zf>>2]=+g[Sb>>2]+ +g[Pb>>2];g[Af>>2]=+g[ib>>2]+ +g[fb>>2];g[Bf>>2]=+g[zf>>2]-+g[Af>>2];g[Xg>>2]=+g[Af>>2]+ +g[zf>>2];g[A>>2]=(+g[jk>>2]+ +g[z>>2])*2.0;g[ui>>2]=(+g[Xg>>2]+ +g[Wg>>2])*2.0;g[Vq>>2]=+g[wk>>2]+ +g[Uq>>2];g[jr>>2]=+g[Vq>>2]+ +g[ir>>2];g[ti>>2]=+g[Vq>>2]-+g[ir>>2];g[Ea>>2]=+g[oa>>2]+ +g[Da>>2];g[Ka>>2]=+g[V>>2]+ +g[Ja>>2];g[La>>2]=(+g[Ea>>2]+ +g[Ka>>2])*2.0;g[wi>>2]=+g[Ea>>2]-+g[Ka>>2];g[xi>>2]=+g[Hh>>2]+ +g[fh>>2];g[yi>>2]=+g[bh>>2]+ +g[ah>>2];g[zi>>2]=+g[xi>>2]-+g[yi>>2];g[Ei>>2]=(+g[yi>>2]+ +g[xi>>2])*2.0;g[B>>2]=+g[jr>>2]+ +g[A>>2];g[(c[n>>2]|0)+(c[r>>2]<<5<<2)>>2]=+g[B>>2]-+g[La>>2];g[c[n>>2]>>2]=+g[B>>2]+ +g[La>>2];g[Di>>2]=+g[jr>>2]-+g[A>>2];g[(c[n>>2]|0)+(c[r>>2]<<4<<2)>>2]=+g[Di>>2]-+g[Ei>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*48<<2)>>2]=+g[Di>>2]+ +g[Ei>>2];g[vi>>2]=+g[ti>>2]-+g[ui>>2];g[Ai>>2]=(+g[wi>>2]-+g[zi>>2])*1.4142135381698608;g[(c[n>>2]|0)+((c[r>>2]|0)*40<<2)>>2]=+g[vi>>2]-+g[Ai>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[vi>>2]+ +g[Ai>>2];g[Bi>>2]=+g[ti>>2]+ +g[ui>>2];g[Ci>>2]=(+g[wi>>2]+ +g[zi>>2])*1.4142135381698608;g[(c[n>>2]|0)+((c[r>>2]|0)*24<<2)>>2]=+g[Bi>>2]-+g[Ci>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*56<<2)>>2]=+g[Bi>>2]+ +g[Ci>>2];g[Sg>>2]=+g[wk>>2]-+g[Uq>>2];g[Ug>>2]=+g[Sg>>2]-+g[Tg>>2];g[Nh>>2]=+g[Sg>>2]+ +g[Tg>>2];g[eh>>2]=+g[V>>2]-+g[Ja>>2];g[Ih>>2]=+g[fh>>2]-+g[Hh>>2];g[Jh>>2]=+g[eh>>2]+ +g[Ih>>2];g[pi>>2]=+g[Ih>>2]-+g[eh>>2];g[Vg>>2]=+g[jk>>2]-+g[z>>2];g[Yg>>2]=+g[Wg>>2]-+g[Xg>>2];g[Zg>>2]=(+g[Vg>>2]-+g[Yg>>2])*1.4142135381698608;g[Oh>>2]=(+g[Vg>>2]+ +g[Yg>>2])*1.4142135381698608;g[$g>>2]=+g[oa>>2]-+g[Da>>2];g[ch>>2]=+g[ah>>2]-+g[bh>>2];g[dh>>2]=+g[$g>>2]+ +g[ch>>2];g[Qh>>2]=+g[$g>>2]-+g[ch>>2];g[_g>>2]=+g[Ug>>2]+ +g[Zg>>2];g[Kh>>2]=+g[dh>>2]*1.8477590084075928-+g[Jh>>2]*.7653668522834778;g[(c[n>>2]|0)+((c[r>>2]|0)*36<<2)>>2]=+g[_g>>2]-+g[Kh>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[_g>>2]+ +g[Kh>>2];g[ri>>2]=+g[Nh>>2]+ +g[Oh>>2];g[si>>2]=+g[Qh>>2]*1.8477590084075928+ +g[pi>>2]*.7653668522834778;g[(c[n>>2]|0)+((c[r>>2]|0)*28<<2)>>2]=+g[ri>>2]-+g[si>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*60<<2)>>2]=+g[ri>>2]+ +g[si>>2];g[Lh>>2]=+g[Ug>>2]-+g[Zg>>2];g[Mh>>2]=+g[dh>>2]*.7653668522834778+ +g[Jh>>2]*1.8477590084075928;g[(c[n>>2]|0)+((c[r>>2]|0)*20<<2)>>2]=+g[Lh>>2]-+g[Mh>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*52<<2)>>2]=+g[Lh>>2]+ +g[Mh>>2];g[Ph>>2]=+g[Nh>>2]-+g[Oh>>2];g[qi>>2]=+g[Qh>>2]*.7653668522834778-+g[pi>>2]*1.8477590084075928;g[(c[n>>2]|0)+((c[r>>2]|0)*44<<2)>>2]=+g[Ph>>2]-+g[qi>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Ph>>2]+ +g[qi>>2];g[yb>>2]=+g[ob>>2]+ +g[xb>>2];g[Ta>>2]=+g[Hb>>2]*1.8477590084075928-+g[Sa>>2]*.7653668522834778;g[Ua>>2]=+g[yb>>2]+ +g[Ta>>2];g[ud>>2]=+g[yb>>2]-+g[Ta>>2];g[ae>>2]=+g[_d>>2]+ +g[$d>>2];g[pd>>2]=+g[de>>2]+ +g[od>>2];g[qd>>2]=+g[ae>>2]+ +g[pd>>2];g[yd>>2]=+g[pd>>2]-+g[ae>>2];g[Wb>>2]=+g[bb>>2]+ +g[Vb>>2];g[Hc>>2]=+g[xc>>2]+ +g[Gc>>2];g[Ic>>2]=+g[Wb>>2]*1.9615705013275146-+g[Hc>>2]*.39018064737319946;g[vd>>2]=+g[Wb>>2]*.39018064737319946+ +g[Hc>>2]*1.9615705013275146;g[mc>>2]=+g[Sc>>2]+ +g[lc>>2];g[Yd>>2]=+g[ed>>2]+ +g[Xd>>2];g[Zd>>2]=+g[mc>>2]+ +g[Yd>>2];g[xd>>2]=+g[mc>>2]-+g[Yd>>2];g[Jc>>2]=+g[Ua>>2]+ +g[Ic>>2];g[rd>>2]=+g[Zd>>2]*1.990369439125061-+g[qd>>2]*.1960342824459076;g[(c[n>>2]|0)+((c[r>>2]|0)*33<<2)>>2]=+g[Jc>>2]-+g[rd>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Jc>>2]+ +g[rd>>2];g[Ad>>2]=+g[ud>>2]+ +g[vd>>2];g[Bd>>2]=+g[xd>>2]*1.5460208654403687+ +g[yd>>2]*1.2687865495681763;g[(c[n>>2]|0)+((c[r>>2]|0)*25<<2)>>2]=+g[Ad>>2]-+g[Bd>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*57<<2)>>2]=+g[Ad>>2]+ +g[Bd>>2];g[sd>>2]=+g[Ua>>2]-+g[Ic>>2];g[td>>2]=+g[Zd>>2]*.1960342824459076+ +g[qd>>2]*1.990369439125061;g[(c[n>>2]|0)+((c[r>>2]|0)*17<<2)>>2]=+g[sd>>2]-+g[td>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*49<<2)>>2]=+g[sd>>2]+ +g[td>>2];g[wd>>2]=+g[ud>>2]-+g[vd>>2];g[zd>>2]=+g[xd>>2]*1.2687865495681763-+g[yd>>2]*1.5460208654403687;g[(c[n>>2]|0)+((c[r>>2]|0)*41<<2)>>2]=+g[wd>>2]-+g[zd>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[wd>>2]+ +g[zd>>2];g[pg>>2]=+g[ng>>2]-+g[og>>2];g[ug>>2]=(+g[qg>>2]-+g[tg>>2])*1.4142135381698608;g[vg>>2]=+g[pg>>2]+ +g[ug>>2];g[kh>>2]=+g[pg>>2]-+g[ug>>2];g[Cg>>2]=(+g[Ag>>2]+ +g[Bg>>2])*.7071067690849304;g[Hg>>2]=+g[Dg>>2]+ +g[Gg>>2];g[gh>>2]=+g[Cg>>2]+ +g[Hg>>2];g[oh>>2]=+g[Hg>>2]-+g[Cg>>2];g[Cf>>2]=+g[wg>>2]+ +g[Bf>>2];g[Hf>>2]=+g[Df>>2]+ +g[Gf>>2];g[If>>2]=+g[Cf>>2]*1.8477590084075928-+g[Hf>>2]*.7653668522834778;g[lh>>2]=+g[Cf>>2]*.7653668522834778+ +g[Hf>>2]*1.8477590084075928;g[Of>>2]=+g[Kf>>2]+ +g[Nf>>2];g[yg>>2]=(+g[Tf>>2]+ +g[Yf>>2])*.7071067690849304;g[zg>>2]=+g[Of>>2]+ +g[yg>>2];g[nh>>2]=+g[Of>>2]-+g[yg>>2];g[Jf>>2]=+g[vg>>2]+ +g[If>>2];g[hh>>2]=+g[zg>>2]*1.9615705013275146-+g[gh>>2]*.39018064737319946;g[(c[n>>2]|0)+((c[r>>2]|0)*34<<2)>>2]=+g[Jf>>2]-+g[hh>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Jf>>2]+ +g[hh>>2];g[qh>>2]=+g[kh>>2]+ +g[lh>>2];g[rh>>2]=+g[nh>>2]*1.662939190864563+ +g[oh>>2]*1.111140489578247;g[(c[n>>2]|0)+((c[r>>2]|0)*26<<2)>>2]=+g[qh>>2]-+g[rh>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*58<<2)>>2]=+g[qh>>2]+ +g[rh>>2];g[ih>>2]=+g[vg>>2]-+g[If>>2];g[jh>>2]=+g[zg>>2]*.39018064737319946+ +g[gh>>2]*1.9615705013275146;g[(c[n>>2]|0)+((c[r>>2]|0)*18<<2)>>2]=+g[ih>>2]-+g[jh>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*50<<2)>>2]=+g[ih>>2]+ +g[jh>>2];g[mh>>2]=+g[kh>>2]-+g[lh>>2];g[ph>>2]=+g[nh>>2]*1.111140489578247-+g[oh>>2]*1.662939190864563;g[(c[n>>2]|0)+((c[r>>2]|0)*42<<2)>>2]=+g[mh>>2]-+g[ph>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[mh>>2]+ +g[ph>>2];g[Cd>>2]=+g[ob>>2]-+g[xb>>2];g[Dd>>2]=+g[Hb>>2]*.7653668522834778+ +g[Sa>>2]*1.8477590084075928;g[Ed>>2]=+g[Cd>>2]-+g[Dd>>2];g[Re>>2]=+g[Cd>>2]+ +g[Dd>>2];g[le>>2]=+g[ed>>2]-+g[Xd>>2];g[me>>2]=+g[od>>2]-+g[de>>2];g[ne>>2]=+g[le>>2]+ +g[me>>2];g[Ve>>2]=+g[me>>2]-+g[le>>2];g[Fd>>2]=+g[bb>>2]-+g[Vb>>2];g[fe>>2]=+g[Gc>>2]-+g[xc>>2];g[ge>>2]=+g[Fd>>2]*1.111140489578247-+g[fe>>2]*1.662939190864563;g[Se>>2]=+g[Fd>>2]*1.662939190864563+ +g[fe>>2]*1.111140489578247;g[ie>>2]=+g[Sc>>2]-+g[lc>>2];g[je>>2]=+g[$d>>2]-+g[_d>>2];g[ke>>2]=+g[ie>>2]+ +g[je>>2];g[Ue>>2]=+g[ie>>2]-+g[je>>2];g[he>>2]=+g[Ed>>2]+ +g[ge>>2];g[oe>>2]=+g[ke>>2]*1.7638425827026367-+g[ne>>2]*.9427934885025024;g[(c[n>>2]|0)+((c[r>>2]|0)*37<<2)>>2]=+g[he>>2]-+g[oe>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[he>>2]+ +g[oe>>2];g[Xe>>2]=+g[Re>>2]+ +g[Se>>2];g[Ye>>2]=+g[Ue>>2]*1.913880705833435+ +g[Ve>>2]*.580569326877594;g[(c[n>>2]|0)+((c[r>>2]|0)*29<<2)>>2]=+g[Xe>>2]-+g[Ye>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*61<<2)>>2]=+g[Xe>>2]+ +g[Ye>>2];g[Pe>>2]=+g[Ed>>2]-+g[ge>>2];g[Qe>>2]=+g[ke>>2]*.9427934885025024+ +g[ne>>2]*1.7638425827026367;g[(c[n>>2]|0)+((c[r>>2]|0)*21<<2)>>2]=+g[Pe>>2]-+g[Qe>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*53<<2)>>2]=+g[Pe>>2]+ +g[Qe>>2];g[Te>>2]=+g[Re>>2]-+g[Se>>2];g[We>>2]=+g[Ue>>2]*.580569326877594-+g[Ve>>2]*1.913880705833435;g[(c[n>>2]|0)+((c[r>>2]|0)*45<<2)>>2]=+g[Te>>2]-+g[We>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Te>>2]+ +g[We>>2];g[sh>>2]=+g[ng>>2]+ +g[og>>2];g[th>>2]=(+g[qg>>2]+ +g[tg>>2])*1.4142135381698608;g[uh>>2]=+g[sh>>2]-+g[th>>2];g[Kg>>2]=+g[sh>>2]+ +g[th>>2];g[Ch>>2]=(+g[Tf>>2]-+g[Yf>>2])*.7071067690849304;g[Dh>>2]=+g[Gg>>2]-+g[Dg>>2];g[Eh>>2]=+g[Ch>>2]+ +g[Dh>>2];g[Og>>2]=+g[Dh>>2]-+g[Ch>>2];g[vh>>2]=+g[wg>>2]-+g[Bf>>2];g[wh>>2]=+g[Gf>>2]-+g[Df>>2];g[xh>>2]=+g[vh>>2]*.7653668522834778-+g[wh>>2]*1.8477590084075928;g[Lg>>2]=+g[vh>>2]*1.8477590084075928+ +g[wh>>2]*.7653668522834778;g[zh>>2]=+g[Kf>>2]-+g[Nf>>2];g[Ah>>2]=(+g[Bg>>2]-+g[Ag>>2])*.7071067690849304;g[Bh>>2]=+g[zh>>2]+ +g[Ah>>2];g[Ng>>2]=+g[zh>>2]-+g[Ah>>2];g[yh>>2]=+g[uh>>2]+ +g[xh>>2];g[Fh>>2]=+g[Bh>>2]*1.662939190864563-+g[Eh>>2]*1.111140489578247;g[(c[n>>2]|0)+((c[r>>2]|0)*38<<2)>>2]=+g[yh>>2]-+g[Fh>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[yh>>2]+ +g[Fh>>2];g[Qg>>2]=+g[Kg>>2]+ +g[Lg>>2];g[Rg>>2]=+g[Ng>>2]*1.9615705013275146+ +g[Og>>2]*.39018064737319946;g[(c[n>>2]|0)+((c[r>>2]|0)*30<<2)>>2]=+g[Qg>>2]-+g[Rg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*62<<2)>>2]=+g[Qg>>2]+ +g[Rg>>2];g[Ig>>2]=+g[uh>>2]-+g[xh>>2];g[Jg>>2]=+g[Bh>>2]*1.111140489578247+ +g[Eh>>2]*1.662939190864563;g[(c[n>>2]|0)+((c[r>>2]|0)*22<<2)>>2]=+g[Ig>>2]-+g[Jg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*54<<2)>>2]=+g[Ig>>2]+ +g[Jg>>2];g[Mg>>2]=+g[Kg>>2]-+g[Lg>>2];g[Pg>>2]=+g[Ng>>2]*.39018064737319946-+g[Og>>2]*1.9615705013275146;g[(c[n>>2]|0)+((c[r>>2]|0)*46<<2)>>2]=+g[Mg>>2]-+g[Pg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Mg>>2]+ +g[Pg>>2];g[$e>>2]=+g[Ze>>2]-+g[_e>>2];g[cf>>2]=+g[af>>2]*.7653668522834778-+g[bf>>2]*1.8477590084075928;g[df>>2]=+g[$e>>2]+ +g[cf>>2];g[Je>>2]=+g[$e>>2]-+g[cf>>2];g[Be>>2]=+g[ze>>2]+ +g[Ae>>2];g[Ee>>2]=+g[Ce>>2]+ +g[De>>2];g[Fe>>2]=+g[Be>>2]+ +g[Ee>>2];g[Ne>>2]=+g[Ee>>2]-+g[Be>>2];g[gf>>2]=+g[ef>>2]+ +g[ff>>2];g[kf>>2]=+g[hf>>2]+ +g[jf>>2];g[lf>>2]=+g[gf>>2]*1.662939190864563-+g[kf>>2]*1.111140489578247;g[Ke>>2]=+g[gf>>2]*1.111140489578247+ +g[kf>>2]*1.662939190864563;g[qe>>2]=+g[nf>>2]+ +g[pe>>2];g[xe>>2]=+g[te>>2]+ +g[we>>2];g[ye>>2]=+g[qe>>2]+ +g[xe>>2];g[Me>>2]=+g[qe>>2]-+g[xe>>2];g[mf>>2]=+g[df>>2]+ +g[lf>>2];g[Ge>>2]=+g[ye>>2]*1.913880705833435-+g[Fe>>2]*.580569326877594;g[(c[n>>2]|0)+((c[r>>2]|0)*35<<2)>>2]=+g[mf>>2]-+g[Ge>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[mf>>2]+ +g[Ge>>2];g[pf>>2]=+g[Je>>2]+ +g[Ke>>2];g[qf>>2]=+g[Me>>2]*1.7638425827026367+ +g[Ne>>2]*.9427934885025024;g[(c[n>>2]|0)+((c[r>>2]|0)*27<<2)>>2]=+g[pf>>2]-+g[qf>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*59<<2)>>2]=+g[pf>>2]+ +g[qf>>2];g[He>>2]=+g[df>>2]-+g[lf>>2];g[Ie>>2]=+g[ye>>2]*.580569326877594+ +g[Fe>>2]*1.913880705833435;g[(c[n>>2]|0)+((c[r>>2]|0)*19<<2)>>2]=+g[He>>2]-+g[Ie>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*51<<2)>>2]=+g[He>>2]+ +g[Ie>>2];g[Le>>2]=+g[Je>>2]-+g[Ke>>2];g[Oe>>2]=+g[Me>>2]*.9427934885025024-+g[Ne>>2]*1.7638425827026367;g[(c[n>>2]|0)+((c[r>>2]|0)*43<<2)>>2]=+g[Le>>2]-+g[Oe>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Le>>2]+ +g[Oe>>2];g[rf>>2]=+g[Ze>>2]+ +g[_e>>2];g[sf>>2]=+g[af>>2]*1.8477590084075928+ +g[bf>>2]*.7653668522834778;g[tf>>2]=+g[rf>>2]-+g[sf>>2];g[fg>>2]=+g[rf>>2]+ +g[sf>>2];g[$f>>2]=+g[te>>2]-+g[we>>2];g[ag>>2]=+g[De>>2]-+g[Ce>>2];g[bg>>2]=+g[$f>>2]+ +g[ag>>2];g[jg>>2]=+g[ag>>2]-+g[$f>>2];g[uf>>2]=+g[ef>>2]-+g[ff>>2];g[vf>>2]=+g[jf>>2]-+g[hf>>2];g[wf>>2]=+g[uf>>2]*.39018064737319946-+g[vf>>2]*1.9615705013275146;g[gg>>2]=+g[uf>>2]*1.9615705013275146+ +g[vf>>2]*.39018064737319946;g[yf>>2]=+g[nf>>2]-+g[pe>>2];g[Zf>>2]=+g[Ae>>2]-+g[ze>>2];g[_f>>2]=+g[yf>>2]+ +g[Zf>>2];g[ig>>2]=+g[yf>>2]-+g[Zf>>2];g[xf>>2]=+g[tf>>2]+ +g[wf>>2];g[cg>>2]=+g[_f>>2]*1.5460208654403687-+g[bg>>2]*1.2687865495681763;g[(c[n>>2]|0)+((c[r>>2]|0)*39<<2)>>2]=+g[xf>>2]-+g[cg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[xf>>2]+ +g[cg>>2];g[lg>>2]=+g[fg>>2]+ +g[gg>>2];g[mg>>2]=+g[ig>>2]*1.990369439125061+ +g[jg>>2]*.1960342824459076;g[(c[n>>2]|0)+((c[r>>2]|0)*31<<2)>>2]=+g[lg>>2]-+g[mg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*63<<2)>>2]=+g[lg>>2]+ +g[mg>>2];g[dg>>2]=+g[tf>>2]-+g[wf>>2];g[eg>>2]=+g[_f>>2]*1.2687865495681763+ +g[bg>>2]*1.5460208654403687;g[(c[n>>2]|0)+((c[r>>2]|0)*23<<2)>>2]=+g[dg>>2]-+g[eg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*55<<2)>>2]=+g[dg>>2]+ +g[eg>>2];g[hg>>2]=+g[fg>>2]-+g[gg>>2];g[kg>>2]=+g[ig>>2]*.1960342824459076-+g[jg>>2]*1.990369439125061;g[(c[n>>2]|0)+((c[r>>2]|0)*47<<2)>>2]=+g[hg>>2]-+g[kg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[hg>>2]+ +g[kg>>2];g[Wh>>2]=+g[Mi>>2]+ +g[Vh>>2];g[ni>>2]=+g[fi>>2]*1.9615705013275146-+g[mi>>2]*.39018064737319946;g[oi>>2]=+g[Wh>>2]+ +g[ni>>2];g[Hl>>2]=+g[Wh>>2]-+g[ni>>2];g[Oj>>2]=+g[yj>>2]+ +g[Nj>>2];g[$i>>2]=+g[Rj>>2]+ +g[_i>>2];g[aj>>2]=+g[Oj>>2]*1.990369439125061-+g[$i>>2]*.1960342824459076;g[Il>>2]=+g[Oj>>2]*.1960342824459076+ +g[$i>>2]*1.990369439125061;g[qm>>2]=+g[yj>>2]-+g[Nj>>2];g[rm>>2]=+g[_i>>2]-+g[Rj>>2];g[sm>>2]=+g[qm>>2]*1.2687865495681763-+g[rm>>2]*1.5460208654403687;g[Em>>2]=+g[qm>>2]*1.5460208654403687+ +g[rm>>2]*1.2687865495681763;g[Pl>>2]=+g[Mi>>2]-+g[Vh>>2];g[om>>2]=+g[fi>>2]*.39018064737319946+ +g[mi>>2]*1.9615705013275146;g[pm>>2]=+g[Pl>>2]-+g[om>>2];g[Dm>>2]=+g[Pl>>2]+ +g[om>>2];g[bl>>2]=+g[Vk>>2]+ +g[al>>2];g[um>>2]=+g[mj>>2]-+g[Ak>>2];g[Qk>>2]=+g[kl>>2]*.19509032368659973+ +g[rl>>2]*.9807852506637573;g[Rk>>2]=+g[Mk>>2]*.9807852506637573-+g[Dl>>2]*.19509032368659973;g[Sk>>2]=+g[Qk>>2]+ +g[Rk>>2];g[vm>>2]=+g[Rk>>2]-+g[Qk>>2];g[cl>>2]=+g[Sk>>2]+ +g[bl>>2];g[Gm>>2]=+g[um>>2]-+g[vm>>2];g[Ll>>2]=+g[bl>>2]-+g[Sk>>2];g[wm>>2]=+g[um>>2]+ +g[vm>>2];g[Bk>>2]=+g[mj>>2]+ +g[Ak>>2];g[ym>>2]=+g[al>>2]-+g[Vk>>2];g[sl>>2]=+g[kl>>2]*.9807852506637573-+g[rl>>2]*.19509032368659973;g[Nk>>2]=+g[Dl>>2]*.9807852506637573+ +g[Mk>>2]*.19509032368659973;g[Ok>>2]=+g[sl>>2]+ +g[Nk>>2];g[xm>>2]=+g[sl>>2]-+g[Nk>>2];g[Pk>>2]=+g[Bk>>2]+ +g[Ok>>2];g[Hm>>2]=+g[ym>>2]-+g[xm>>2];g[Kl>>2]=+g[Bk>>2]-+g[Ok>>2];g[zm>>2]=+g[xm>>2]+ +g[ym>>2];g[bj>>2]=+g[oi>>2]+ +g[aj>>2];g[dl>>2]=+g[Pk>>2]*1.9975908994674683-+g[cl>>2]*.09813535213470459;g[(c[o>>2]|0)+(c[r>>2]<<5<<2)>>2]=+g[bj>>2]-+g[dl>>2];g[c[o>>2]>>2]=+g[bj>>2]+ +g[dl>>2];g[Fm>>2]=+g[Dm>>2]-+g[Em>>2];g[Im>>2]=+g[Gm>>2]*.6737797260284424-+g[Hm>>2]*1.8830881118774414;g[(c[o>>2]|0)+((c[r>>2]|0)*44<<2)>>2]=+g[Fm>>2]-+g[Im>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Fm>>2]+ +g[Im>>2];g[Jm>>2]=+g[Dm>>2]+ +g[Em>>2];g[Km>>2]=+g[Gm>>2]*1.8830881118774414+ +g[Hm>>2]*.6737797260284424;g[(c[o>>2]|0)+((c[r>>2]|0)*28<<2)>>2]=+g[Jm>>2]-+g[Km>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*60<<2)>>2]=+g[Jm>>2]+ +g[Km>>2];g[el>>2]=+g[oi>>2]-+g[aj>>2];g[Gl>>2]=+g[Pk>>2]*.09813535213470459+ +g[cl>>2]*1.9975908994674683;g[(c[o>>2]|0)+(c[r>>2]<<4<<2)>>2]=+g[el>>2]-+g[Gl>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*48<<2)>>2]=+g[el>>2]+ +g[Gl>>2];g[Jl>>2]=+g[Hl>>2]-+g[Il>>2];g[Ml>>2]=+g[Kl>>2]*1.3431179523468018-+g[Ll>>2]*1.4819022417068481;g[(c[o>>2]|0)+((c[r>>2]|0)*40<<2)>>2]=+g[Jl>>2]-+g[Ml>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Jl>>2]+ +g[Ml>>2];g[tm>>2]=+g[pm>>2]+ +g[sm>>2];g[Am>>2]=+g[wm>>2]*1.807978630065918-+g[zm>>2]*.8551101684570312;g[(c[o>>2]|0)+((c[r>>2]|0)*36<<2)>>2]=+g[tm>>2]-+g[Am>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[tm>>2]+ +g[Am>>2];g[Bm>>2]=+g[pm>>2]-+g[sm>>2];g[Cm>>2]=+g[wm>>2]*.8551101684570312+ +g[zm>>2]*1.807978630065918;g[(c[o>>2]|0)+((c[r>>2]|0)*20<<2)>>2]=+g[Bm>>2]-+g[Cm>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*52<<2)>>2]=+g[Bm>>2]+ +g[Cm>>2];g[Nl>>2]=+g[Hl>>2]+ +g[Il>>2];g[Ol>>2]=+g[Kl>>2]*1.4819022417068481+ +g[Ll>>2]*1.3431179523468018;g[(c[o>>2]|0)+((c[r>>2]|0)*24<<2)>>2]=+g[Nl>>2]-+g[Ol>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*56<<2)>>2]=+g[Nl>>2]+ +g[Ol>>2];g[gn>>2]=+g[cn>>2]+ +g[fn>>2];g[on>>2]=+g[kn>>2]*1.662939190864563-+g[nn>>2]*1.111140489578247;g[pn>>2]=+g[gn>>2]+ +g[on>>2];g[zo>>2]=+g[gn>>2]-+g[on>>2];g[$n>>2]=+g[sn>>2]+ +g[_n>>2];g[ho>>2]=+g[co>>2]+ +g[go>>2];g[Io>>2]=+g[$n>>2]*1.913880705833435-+g[ho>>2]*.580569326877594;g[Ao>>2]=+g[$n>>2]*.580569326877594+ +g[ho>>2]*1.913880705833435;g[jp>>2]=+g[sn>>2]-+g[_n>>2];g[kp>>2]=+g[go>>2]-+g[co>>2];g[lp>>2]=+g[jp>>2]*.9427934885025024-+g[kp>>2]*1.7638425827026367;g[Xp>>2]=+g[jp>>2]*1.7638425827026367+ +g[kp>>2]*.9427934885025024;g[Ho>>2]=+g[cn>>2]-+g[fn>>2];g[hp>>2]=+g[kn>>2]*1.111140489578247+ +g[nn>>2]*1.662939190864563;g[ip>>2]=+g[Ho>>2]-+g[hp>>2];g[Wp>>2]=+g[Ho>>2]+ +g[hp>>2];g[uo>>2]=+g[qo>>2]+ +g[to>>2];g[np>>2]=+g[Mo>>2]-+g[To>>2];g[lo>>2]=+g[Xo>>2]*.5555702447891235+ +g[_o>>2]*.8314695954322815;g[mo>>2]=+g[fp>>2]*.8314695954322815-+g[cp>>2]*.5555702447891235;g[no>>2]=+g[lo>>2]+ +g[mo>>2];g[op>>2]=+g[mo>>2]-+g[lo>>2];g[vo>>2]=+g[no>>2]+ +g[uo>>2];g[Zp>>2]=+g[np>>2]-+g[op>>2];g[Do>>2]=+g[uo>>2]-+g[no>>2];g[pp>>2]=+g[np>>2]+ +g[op>>2];g[Uo>>2]=+g[Mo>>2]+ +g[To>>2];g[Rp>>2]=+g[to>>2]-+g[qo>>2];g[$o>>2]=+g[Xo>>2]*.8314695954322815-+g[_o>>2]*.5555702447891235;g[io>>2]=+g[cp>>2]*.8314695954322815+ +g[fp>>2]*.5555702447891235;g[jo>>2]=+g[$o>>2]+ +g[io>>2];g[qp>>2]=+g[$o>>2]-+g[io>>2];g[ko>>2]=+g[Uo>>2]+ +g[jo>>2];g[_p>>2]=+g[Rp>>2]-+g[qp>>2];g[Co>>2]=+g[Uo>>2]-+g[jo>>2];g[Sp>>2]=+g[qp>>2]+ +g[Rp>>2];g[Jo>>2]=+g[pn>>2]+ +g[Io>>2];g[wo>>2]=+g[ko>>2]*1.9783530235290527-+g[vo>>2]*.2934609353542328;g[(c[o>>2]|0)+((c[r>>2]|0)*33<<2)>>2]=+g[Jo>>2]-+g[wo>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Jo>>2]+ +g[wo>>2];g[Yp>>2]=+g[Wp>>2]-+g[Xp>>2];g[$p>>2]=+g[Zp>>2]*.48596036434173584-+g[_p>>2]*1.9400625228881836;g[(c[o>>2]|0)+((c[r>>2]|0)*45<<2)>>2]=+g[Yp>>2]-+g[$p>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Yp>>2]+ +g[$p>>2];g[aq>>2]=+g[Wp>>2]+ +g[Xp>>2];g[bq>>2]=+g[Zp>>2]*1.9400625228881836+ +g[_p>>2]*.48596036434173584;g[(c[o>>2]|0)+((c[r>>2]|0)*29<<2)>>2]=+g[aq>>2]-+g[bq>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*61<<2)>>2]=+g[aq>>2]+ +g[bq>>2];g[xo>>2]=+g[pn>>2]-+g[Io>>2];g[yo>>2]=+g[ko>>2]*.2934609353542328+ +g[vo>>2]*1.9783530235290527;g[(c[o>>2]|0)+((c[r>>2]|0)*17<<2)>>2]=+g[xo>>2]-+g[yo>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*49<<2)>>2]=+g[xo>>2]+ +g[yo>>2];g[Bo>>2]=+g[zo>>2]-+g[Ao>>2];g[Eo>>2]=+g[Co>>2]*1.1913986206054688-+g[Do>>2]*1.606415033340454;g[(c[o>>2]|0)+((c[r>>2]|0)*41<<2)>>2]=+g[Bo>>2]-+g[Eo>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Bo>>2]+ +g[Eo>>2];g[mp>>2]=+g[ip>>2]+ +g[lp>>2];g[Tp>>2]=+g[pp>>2]*1.7154572010040283-+g[Sp>>2]*1.0282055139541626;g[(c[o>>2]|0)+((c[r>>2]|0)*37<<2)>>2]=+g[mp>>2]-+g[Tp>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[mp>>2]+ +g[Tp>>2];g[Up>>2]=+g[ip>>2]-+g[lp>>2];g[Vp>>2]=+g[pp>>2]*1.0282055139541626+ +g[Sp>>2]*1.7154572010040283;g[(c[o>>2]|0)+((c[r>>2]|0)*21<<2)>>2]=+g[Up>>2]-+g[Vp>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*53<<2)>>2]=+g[Up>>2]+ +g[Vp>>2];g[Fo>>2]=+g[zo>>2]+ +g[Ao>>2];g[Go>>2]=+g[Co>>2]*1.606415033340454+ +g[Do>>2]*1.1913986206054688;g[(c[o>>2]|0)+((c[r>>2]|0)*25<<2)>>2]=+g[Fo>>2]-+g[Go>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*57<<2)>>2]=+g[Fo>>2]+ +g[Go>>2];g[Nm>>2]=+g[Lm>>2]-+g[Mm>>2];g[Sl>>2]=+g[Ql>>2]*1.111140489578247-+g[Rl>>2]*1.662939190864563;g[Tl>>2]=+g[Nm>>2]+ +g[Sl>>2];g[Wm>>2]=+g[Nm>>2]-+g[Sl>>2];g[Wl>>2]=+g[Ul>>2]+ +g[Vl>>2];g[Zl>>2]=+g[Xl>>2]+ +g[Yl>>2];g[_l>>2]=+g[Wl>>2]*1.7638425827026367-+g[Zl>>2]*.9427934885025024;g[Xm>>2]=+g[Wl>>2]*.9427934885025024+ +g[Zl>>2]*1.7638425827026367;g[Gn>>2]=+g[Ul>>2]-+g[Vl>>2];g[Hn>>2]=+g[Yl>>2]-+g[Xl>>2];g[In>>2]=+g[Gn>>2]*.580569326877594-+g[Hn>>2]*1.913880705833435;g[Un>>2]=+g[Gn>>2]*1.913880705833435+ +g[Hn>>2]*.580569326877594;g[Dn>>2]=+g[Lm>>2]+ +g[Mm>>2];g[En>>2]=+g[Ql>>2]*1.662939190864563+ +g[Rl>>2]*1.111140489578247;g[Fn>>2]=+g[Dn>>2]-+g[En>>2];g[Tn>>2]=+g[Dn>>2]+ +g[En>>2];g[Rm>>2]=+g[Pm>>2]+ +g[Qm>>2];g[Kn>>2]=+g[am>>2]-+g[bm>>2];g[lm>>2]=+g[dm>>2]*.8314695954322815+ +g[em>>2]*.5555702447891235;g[mm>>2]=+g[hm>>2]*.5555702447891235-+g[gm>>2]*.8314695954322815;g[nm>>2]=+g[lm>>2]+ +g[mm>>2];g[Ln>>2]=+g[mm>>2]-+g[lm>>2];g[Sm>>2]=+g[nm>>2]+ +g[Rm>>2];g[Wn>>2]=+g[Kn>>2]-+g[Ln>>2];g[zn>>2]=+g[Rm>>2]-+g[nm>>2];g[Mn>>2]=+g[Kn>>2]+ +g[Ln>>2];g[cm>>2]=+g[am>>2]+ +g[bm>>2];g[On>>2]=+g[Qm>>2]-+g[Pm>>2];g[fm>>2]=+g[dm>>2]*.5555702447891235-+g[em>>2]*.8314695954322815;g[im>>2]=+g[gm>>2]*.5555702447891235+ +g[hm>>2]*.8314695954322815;g[jm>>2]=+g[fm>>2]+ +g[im>>2];g[Nn>>2]=+g[fm>>2]-+g[im>>2];g[km>>2]=+g[cm>>2]+ +g[jm>>2];g[Xn>>2]=+g[On>>2]-+g[Nn>>2];g[yn>>2]=+g[cm>>2]-+g[jm>>2];g[Pn>>2]=+g[Nn>>2]+ +g[On>>2];g[$l>>2]=+g[Tl>>2]+ +g[_l>>2];g[Tm>>2]=+g[km>>2]*1.9400625228881836-+g[Sm>>2]*.48596036434173584;g[(c[o>>2]|0)+((c[r>>2]|0)*34<<2)>>2]=+g[$l>>2]-+g[Tm>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[$l>>2]+ +g[Tm>>2];g[Vn>>2]=+g[Tn>>2]-+g[Un>>2];g[Zm>>2]=+g[Wn>>2]*.2934609353542328-+g[Xn>>2]*1.9783530235290527;g[(c[o>>2]|0)+((c[r>>2]|0)*46<<2)>>2]=+g[Vn>>2]-+g[Zm>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Vn>>2]+ +g[Zm>>2];g[_m>>2]=+g[Tn>>2]+ +g[Un>>2];g[$m>>2]=+g[Wn>>2]*1.9783530235290527+ +g[Xn>>2]*.2934609353542328;g[(c[o>>2]|0)+((c[r>>2]|0)*30<<2)>>2]=+g[_m>>2]-+g[$m>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*62<<2)>>2]=+g[_m>>2]+ +g[$m>>2];g[Um>>2]=+g[Tl>>2]-+g[_l>>2];g[Vm>>2]=+g[km>>2]*.48596036434173584+ +g[Sm>>2]*1.9400625228881836;g[(c[o>>2]|0)+((c[r>>2]|0)*18<<2)>>2]=+g[Um>>2]-+g[Vm>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*50<<2)>>2]=+g[Um>>2]+ +g[Vm>>2];g[Ym>>2]=+g[Wm>>2]-+g[Xm>>2];g[An>>2]=+g[yn>>2]*1.0282055139541626-+g[zn>>2]*1.7154572010040283;g[(c[o>>2]|0)+((c[r>>2]|0)*42<<2)>>2]=+g[Ym>>2]-+g[An>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Ym>>2]+ +g[An>>2];g[Jn>>2]=+g[Fn>>2]+ +g[In>>2];g[Qn>>2]=+g[Mn>>2]*1.606415033340454-+g[Pn>>2]*1.1913986206054688;g[(c[o>>2]|0)+((c[r>>2]|0)*38<<2)>>2]=+g[Jn>>2]-+g[Qn>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Jn>>2]+ +g[Qn>>2];g[Rn>>2]=+g[Fn>>2]-+g[In>>2];g[Sn>>2]=+g[Mn>>2]*1.1913986206054688+ +g[Pn>>2]*1.606415033340454;g[(c[o>>2]|0)+((c[r>>2]|0)*22<<2)>>2]=+g[Rn>>2]-+g[Sn>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*54<<2)>>2]=+g[Rn>>2]+ +g[Sn>>2];g[Bn>>2]=+g[Wm>>2]+ +g[Xm>>2];g[Cn>>2]=+g[yn>>2]*1.7154572010040283+ +g[zn>>2]*1.0282055139541626;g[(c[o>>2]|0)+((c[r>>2]|0)*26<<2)>>2]=+g[Bn>>2]-+g[Cn>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*58<<2)>>2]=+g[Bn>>2]+ +g[Cn>>2];g[eq>>2]=+g[cq>>2]-+g[dq>>2];g[hq>>2]=+g[fq>>2]*.39018064737319946-+g[gq>>2]*1.9615705013275146;g[iq>>2]=+g[eq>>2]+ +g[hq>>2];g[Op>>2]=+g[eq>>2]-+g[hq>>2];g[lq>>2]=+g[jq>>2]-+g[kq>>2];g[oq>>2]=+g[mq>>2]+ +g[nq>>2];g[rp>>2]=+g[lq>>2]*1.5460208654403687-+g[oq>>2]*1.2687865495681763;g[Pp>>2]=+g[lq>>2]*1.2687865495681763+ +g[oq>>2]*1.5460208654403687;g[yq>>2]=+g[jq>>2]+ +g[kq>>2];g[zq>>2]=+g[nq>>2]-+g[mq>>2];g[Aq>>2]=+g[yq>>2]*.1960342824459076-+g[zq>>2]*1.990369439125061;g[Mq>>2]=+g[yq>>2]*1.990369439125061+ +g[zq>>2]*.1960342824459076;g[vq>>2]=+g[cq>>2]+ +g[dq>>2];g[wq>>2]=+g[fq>>2]*1.9615705013275146+ +g[gq>>2]*.39018064737319946;g[xq>>2]=+g[vq>>2]-+g[wq>>2];g[Lq>>2]=+g[vq>>2]+ +g[wq>>2];g[Jp>>2]=+g[Hp>>2]+ +g[Ip>>2];g[Cq>>2]=+g[tp>>2]+ +g[up>>2];g[Ep>>2]=+g[wp>>2]*.9807852506637573+ +g[xp>>2]*.19509032368659973;g[Fp>>2]=+g[zp>>2]*.9807852506637573+ +g[Ap>>2]*.19509032368659973;g[Gp>>2]=+g[Ep>>2]-+g[Fp>>2];g[Dq>>2]=+g[Ep>>2]+ +g[Fp>>2];g[Kp>>2]=+g[Gp>>2]+ +g[Jp>>2];g[Oq>>2]=+g[Cq>>2]+ +g[Dq>>2];g[rq>>2]=+g[Jp>>2]-+g[Gp>>2];g[Eq>>2]=+g[Cq>>2]-+g[Dq>>2];g[vp>>2]=+g[tp>>2]-+g[up>>2];g[Gq>>2]=+g[Ip>>2]-+g[Hp>>2];g[yp>>2]=+g[wp>>2]*.19509032368659973-+g[xp>>2]*.9807852506637573;g[Bp>>2]=+g[zp>>2]*.19509032368659973-+g[Ap>>2]*.9807852506637573;g[Cp>>2]=+g[yp>>2]+ +g[Bp>>2];g[Fq>>2]=+g[yp>>2]-+g[Bp>>2];g[Dp>>2]=+g[vp>>2]+ +g[Cp>>2];g[Pq>>2]=+g[Gq>>2]-+g[Fq>>2];g[qq>>2]=+g[vp>>2]-+g[Cp>>2];g[Hq>>2]=+g[Fq>>2]+ +g[Gq>>2];g[sp>>2]=+g[iq>>2]+ +g[rp>>2];g[Lp>>2]=+g[Dp>>2]*1.8830881118774414-+g[Kp>>2]*.6737797260284424;g[(c[o>>2]|0)+((c[r>>2]|0)*35<<2)>>2]=+g[sp>>2]-+g[Lp>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[sp>>2]+ +g[Lp>>2];g[Nq>>2]=+g[Lq>>2]-+g[Mq>>2];g[Qq>>2]=+g[Oq>>2]*.09813535213470459-+g[Pq>>2]*1.9975908994674683;g[(c[o>>2]|0)+((c[r>>2]|0)*47<<2)>>2]=+g[Nq>>2]-+g[Qq>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[Nq>>2]+ +g[Qq>>2];g[Rq>>2]=+g[Lq>>2]+ +g[Mq>>2];g[Sq>>2]=+g[Oq>>2]*1.9975908994674683+ +g[Pq>>2]*.09813535213470459;g[(c[o>>2]|0)+((c[r>>2]|0)*31<<2)>>2]=+g[Rq>>2]-+g[Sq>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*63<<2)>>2]=+g[Rq>>2]+ +g[Sq>>2];g[Mp>>2]=+g[iq>>2]-+g[rp>>2];g[Np>>2]=+g[Dp>>2]*.6737797260284424+ +g[Kp>>2]*1.8830881118774414;g[(c[o>>2]|0)+((c[r>>2]|0)*19<<2)>>2]=+g[Mp>>2]-+g[Np>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*51<<2)>>2]=+g[Mp>>2]+ +g[Np>>2];g[Qp>>2]=+g[Op>>2]-+g[Pp>>2];g[sq>>2]=+g[qq>>2]*.8551101684570312-+g[rq>>2]*1.807978630065918;g[(c[o>>2]|0)+((c[r>>2]|0)*43<<2)>>2]=+g[Qp>>2]-+g[sq>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Qp>>2]+ +g[sq>>2];g[Bq>>2]=+g[xq>>2]+ +g[Aq>>2];g[Iq>>2]=+g[Eq>>2]*1.4819022417068481-+g[Hq>>2]*1.3431179523468018;g[(c[o>>2]|0)+((c[r>>2]|0)*39<<2)>>2]=+g[Bq>>2]-+g[Iq>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Bq>>2]+ +g[Iq>>2];g[Jq>>2]=+g[xq>>2]-+g[Aq>>2];g[Kq>>2]=+g[Eq>>2]*1.3431179523468018+ +g[Hq>>2]*1.4819022417068481;g[(c[o>>2]|0)+((c[r>>2]|0)*23<<2)>>2]=+g[Jq>>2]-+g[Kq>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*55<<2)>>2]=+g[Jq>>2]+ +g[Kq>>2];g[tq>>2]=+g[Op>>2]+ +g[Pp>>2];g[uq>>2]=+g[qq>>2]*1.807978630065918+ +g[rq>>2]*.8551101684570312;g[(c[o>>2]|0)+((c[r>>2]|0)*27<<2)>>2]=+g[tq>>2]-+g[uq>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*59<<2)>>2]=+g[tq>>2]+ +g[uq>>2];c[lr>>2]=(c[lr>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=mr;return}function bw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,56,10744);i=b;return}function cw(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0;ja=i;i=i+224|0;n=ja+208|0;o=ja+204|0;p=ja+200|0;q=ja+196|0;r=ja+192|0;s=ja+188|0;t=ja+184|0;ka=ja+180|0;u=ja+176|0;v=ja+172|0;ia=ja+160|0;D=ja+156|0;L=ja+152|0;W=ja+148|0;F=ja+144|0;ea=ja+140|0;fa=ja+136|0;Z=ja+132|0;G=ja+128|0;y=ja+124|0;$=ja+120|0;Q=ja+116|0;B=ja+112|0;aa=ja+108|0;U=ja+104|0;E=ja+100|0;K=ja+96|0;X=ja+92|0;Y=ja+88|0;P=ja+84|0;w=ja+80|0;x=ja+76|0;N=ja+72|0;O=ja+68|0;T=ja+64|0;z=ja+60|0;A=ja+56|0;R=ja+52|0;S=ja+48|0;C=ja+44|0;M=ja+40|0;ha=ja+36|0;H=ja+32|0;V=ja+28|0;_=ja+24|0;I=ja+20|0;J=ja+16|0;da=ja+12|0;ga=ja+8|0;ba=ja+4|0;ca=ja;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[ka>>2]=k;c[u>>2]=l;c[v>>2]=m;g[ja+168>>2]=1.7320507764816284;g[ja+164>>2]=2.0;c[ia>>2]=c[ka>>2];while(1){if((c[ia>>2]|0)<=0)break;g[D>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[E>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[K>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[L>>2]=+g[E>>2]+ +g[K>>2];g[W>>2]=+g[D>>2]*2.0-+g[L>>2];g[F>>2]=(+g[E>>2]-+g[K>>2])*1.7320507764816284;g[ea>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[X>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[Y>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[fa>>2]=+g[X>>2]+ +g[Y>>2];g[Z>>2]=(+g[X>>2]-+g[Y>>2])*1.7320507764816284;g[G>>2]=+g[ea>>2]*2.0+ +g[fa>>2];g[O>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[P>>2]=+g[O>>2]*1.7320507764816284;g[w>>2]=+g[c[p>>2]>>2];g[x>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[N>>2]=+g[w>>2]-+g[x>>2];g[y>>2]=+g[x>>2]*2.0+ +g[w>>2];g[$>>2]=+g[N>>2]-+g[P>>2];g[Q>>2]=+g[N>>2]+ +g[P>>2];g[S>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[T>>2]=+g[S>>2]*1.7320507764816284;g[z>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2];g[A>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[R>>2]=+g[z>>2]-+g[A>>2];g[B>>2]=+g[A>>2]*2.0+ +g[z>>2];g[aa>>2]=+g[R>>2]+ +g[T>>2];g[U>>2]=+g[R>>2]-+g[T>>2];g[C>>2]=+g[y>>2]+ +g[B>>2];g[M>>2]=(+g[D>>2]+ +g[L>>2])*2.0;g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[C>>2]-+g[M>>2];g[c[n>>2]>>2]=+g[C>>2]+ +g[M>>2];g[V>>2]=+g[Q>>2]+ +g[U>>2];g[_>>2]=+g[W>>2]-+g[Z>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[V>>2]-+g[_>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[V>>2]+ +g[_>>2];g[I>>2]=+g[Q>>2]-+g[U>>2];g[J>>2]=+g[G>>2]-+g[F>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[I>>2]-+g[J>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[I>>2]+ +g[J>>2];g[ha>>2]=+g[$>>2]-+g[aa>>2];g[H>>2]=+g[F>>2]+ +g[G>>2];g[c[o>>2]>>2]=+g[ha>>2]-+g[H>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ha>>2]+ +g[H>>2];g[da>>2]=+g[y>>2]-+g[B>>2];g[ga>>2]=(+g[ea>>2]-+g[fa>>2])*2.0;g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[da>>2]-+g[ga>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[da>>2]+ +g[ga>>2];g[ba>>2]=+g[$>>2]+ +g[aa>>2];g[ca>>2]=+g[W>>2]+ +g[Z>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[ba>>2]-+g[ca>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ba>>2]+ +g[ca>>2];c[ia>>2]=(c[ia>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=ja;return}function dw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,57,10792);i=b;return}function ew(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0;Ta=i;i=i+432|0;n=Ta+420|0;o=Ta+416|0;p=Ta+412|0;q=Ta+408|0;r=Ta+404|0;s=Ta+400|0;t=Ta+396|0;Ua=Ta+392|0;u=Ta+388|0;v=Ta+384|0;Sa=Ta+304|0;_=Ta+300|0;ka=Ta+296|0;ja=Ta+292|0;C=Ta+288|0;ba=Ta+284|0;la=Ta+280|0;w=Ta+276|0;Ea=Ta+272|0;ua=Ta+268|0;va=Ta+264|0;ya=Ta+260|0;Ba=Ta+256|0;Ca=Ta+252|0;Fa=Ta+248|0;Ga=Ta+244|0;Ha=Ta+240|0;Ka=Ta+236|0;Na=Ta+232|0;Oa=Ta+228|0;Y=Ta+224|0;W=Ta+220|0;V=Ta+216|0;Ra=Ta+212|0;X=Ta+208|0;U=Ta+204|0;Z=Ta+200|0;La=Ta+196|0;Ma=Ta+192|0;Pa=Ta+188|0;Qa=Ta+184|0;ha=Ta+180|0;ia=Ta+176|0;$=Ta+172|0;aa=Ta+168|0;ta=Ta+164|0;Aa=Ta+160|0;xa=Ta+156|0;Q=Ta+152|0;za=Ta+148|0;wa=Ta+144|0;R=Ta+140|0;S=Ta+136|0;T=Ta+132|0;sa=Ta+128|0;M=Ta+124|0;N=Ta+120|0;O=Ta+116|0;P=Ta+112|0;ca=Ta+108|0;I=Ta+104|0;na=Ta+100|0;z=Ta+96|0;D=Ta+92|0;F=Ta+88|0;ga=Ta+84|0;H=Ta+80|0;Ja=Ta+76|0;E=Ta+72|0;y=Ta+68|0;A=Ta+64|0;ma=Ta+60|0;B=Ta+56|0;ea=Ta+52|0;fa=Ta+48|0;Da=Ta+44|0;Ia=Ta+40|0;ra=Ta+36|0;x=Ta+32|0;da=Ta+28|0;oa=Ta+24|0;G=Ta+20|0;J=Ta+16|0;K=Ta+12|0;L=Ta+8|0;pa=Ta+4|0;qa=Ta;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[Ua>>2]=k;c[u>>2]=l;c[v>>2]=m;g[Ta+380>>2]=1.0070741176605225;g[Ta+376>>2]=.22770896553993225;g[Ta+372>>2]=.5319324731826782;g[Ta+368>>2]=.7747811675071716;g[Ta+364>>2]=.2659662365913391;g[Ta+360>>2]=.5165207982063293;g[Ta+356>>2]=.15180596709251404;g[Ta+352>>2]=.5035370588302612;g[Ta+348>>2]=.1666666716337204;g[Ta+344>>2]=.6009252071380615;g[Ta+340>>2]=.5;g[Ta+336>>2]=.2562476694583893;g[Ta+332>>2]=.15689139068126678;g[Ta+328>>2]=.34827721118927;g[Ta+324>>2]=1.1502814292907715;g[Ta+320>>2]=.30023863911628723;g[Ta+316>>2]=.011599105782806873;g[Ta+312>>2]=1.7320507764816284;g[Ta+308>>2]=2.0;c[Sa>>2]=c[Ua>>2];while(1){if((c[Sa>>2]|0)<=0)break;g[Ka>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[La>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[Ma>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[Na>>2]=+g[La>>2]-+g[Ma>>2];g[Oa>>2]=+g[Ka>>2]*2.0-+g[Na>>2];g[Y>>2]=(+g[La>>2]+ +g[Ma>>2])*1.7320507764816284;g[W>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[Pa>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2];g[Qa>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[V>>2]=+g[Pa>>2]+ +g[Qa>>2];g[Ra>>2]=(+g[Pa>>2]-+g[Qa>>2])*1.7320507764816284;g[X>>2]=+g[V>>2]-+g[W>>2]*2.0;g[U>>2]=+g[Oa>>2]+ +g[Ra>>2];g[Z>>2]=+g[X>>2]-+g[Y>>2];g[_>>2]=+g[U>>2]*.011599105782806873+ +g[Z>>2]*.30023863911628723;g[ka>>2]=+g[U>>2]*.30023863911628723-+g[Z>>2]*.011599105782806873;g[ha>>2]=+g[Ka>>2]+ +g[Na>>2];g[ia>>2]=+g[V>>2]+ +g[W>>2];g[ja>>2]=+g[ha>>2]*1.1502814292907715-+g[ia>>2]*.34827721118927;g[C>>2]=+g[ha>>2]*.34827721118927+ +g[ia>>2]*1.1502814292907715;g[$>>2]=+g[Oa>>2]-+g[Ra>>2];g[aa>>2]=+g[Y>>2]+ +g[X>>2];g[ba>>2]=+g[$>>2]*.15689139068126678+ +g[aa>>2]*.2562476694583893;g[la>>2]=+g[aa>>2]*.15689139068126678-+g[$>>2]*.2562476694583893;g[w>>2]=+g[c[p>>2]>>2];g[R>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[S>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[T>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2];g[sa>>2]=+g[S>>2]+ +g[T>>2];g[ta>>2]=+g[R>>2]+ +g[sa>>2];g[Aa>>2]=+g[R>>2]-+g[sa>>2]*.5;g[xa>>2]=+g[S>>2]-+g[T>>2];g[M>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[N>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[O>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[P>>2]=+g[N>>2]+ +g[O>>2];g[Q>>2]=+g[M>>2]+ +g[P>>2];g[za>>2]=+g[M>>2]-+g[P>>2]*.5;g[wa>>2]=+g[N>>2]-+g[O>>2];g[Ea>>2]=(+g[Q>>2]-+g[ta>>2])*.6009252071380615;g[ua>>2]=+g[Q>>2]+ +g[ta>>2];g[va>>2]=+g[w>>2]-+g[ua>>2]*.1666666716337204;g[ya>>2]=+g[wa>>2]+ +g[xa>>2];g[Ba>>2]=+g[za>>2]+ +g[Aa>>2];g[Ca>>2]=+g[ya>>2]*.5035370588302612+ +g[Ba>>2]*.15180596709251404;g[Fa>>2]=+g[za>>2]-+g[Aa>>2];g[Ga>>2]=+g[wa>>2]-+g[xa>>2];g[Ha>>2]=+g[Fa>>2]*.5165207982063293-+g[Ga>>2]*.2659662365913391;g[c[n>>2]>>2]=+g[ua>>2]*2.0+ +g[w>>2];g[ca>>2]=(+g[_>>2]+ +g[ba>>2])*1.7320507764816284;g[I>>2]=(+g[ka>>2]-+g[la>>2])*1.7320507764816284;g[ma>>2]=+g[ka>>2]+ +g[la>>2];g[na>>2]=+g[ja>>2]-+g[ma>>2];g[z>>2]=+g[ma>>2]*2.0+ +g[ja>>2];g[B>>2]=+g[_>>2]-+g[ba>>2];g[D>>2]=+g[B>>2]*2.0-+g[C>>2];g[F>>2]=+g[B>>2]+ +g[C>>2];g[ea>>2]=+g[Ga>>2]*.7747811675071716+ +g[Fa>>2]*.5319324731826782;g[fa>>2]=+g[ya>>2]*.22770896553993225-+g[Ba>>2]*1.0070741176605225;g[ga>>2]=+g[ea>>2]-+g[fa>>2];g[H>>2]=+g[ea>>2]+ +g[fa>>2];g[Da>>2]=+g[va>>2]-+g[Ca>>2];g[Ia>>2]=+g[Ea>>2]-+g[Ha>>2];g[Ja>>2]=+g[Da>>2]-+g[Ia>>2];g[E>>2]=+g[Ia>>2]+ +g[Da>>2];g[ra>>2]=+g[Ca>>2]*2.0+ +g[va>>2];g[x>>2]=+g[Ha>>2]*2.0+ +g[Ea>>2];g[y>>2]=+g[ra>>2]-+g[x>>2];g[A>>2]=+g[x>>2]+ +g[ra>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[y>>2]-+g[z>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[A>>2]-+g[D>>2];g[c[o>>2]>>2]=+g[A>>2]+ +g[D>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[y>>2]+ +g[z>>2];g[da>>2]=+g[Ja>>2]-+g[ca>>2];g[oa>>2]=+g[ga>>2]-+g[na>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[da>>2]-+g[oa>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[da>>2]+ +g[oa>>2];g[G>>2]=+g[E>>2]-+g[F>>2];g[J>>2]=+g[H>>2]+ +g[I>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[G>>2]-+g[J>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[J>>2]+ +g[G>>2];g[K>>2]=+g[H>>2]-+g[I>>2];g[L>>2]=+g[E>>2]+ +g[F>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[K>>2]+ +g[L>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[L>>2]-+g[K>>2];g[pa>>2]=+g[Ja>>2]+ +g[ca>>2];g[qa>>2]=+g[ga>>2]+ +g[na>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[pa>>2]-+g[qa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[pa>>2]+ +g[qa>>2];c[Sa>>2]=(c[Sa>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=Ta;return}function fw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,58,10840);i=b;return}function gw(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0;ja=i;i=i+240|0;n=ja+228|0;o=ja+224|0;p=ja+220|0;q=ja+216|0;r=ja+212|0;s=ja+208|0;t=ja+204|0;ka=ja+200|0;u=ja+196|0;v=ja+192|0;ia=ja+160|0;y=ja+156|0;N=ja+152|0;B=ja+148|0;O=ja+144|0;_=ja+140|0;ha=ja+136|0;X=ja+132|0;ga=ja+128|0;M=ja+124|0;Q=ja+120|0;U=ja+116|0;fa=ja+112|0;E=ja+108|0;P=ja+104|0;w=ja+100|0;x=ja+96|0;z=ja+92|0;A=ja+88|0;Y=ja+84|0;Z=ja+80|0;V=ja+76|0;W=ja+72|0;K=ja+68|0;L=ja+64|0;S=ja+60|0;T=ja+56|0;C=ja+52|0;D=ja+48|0;$=ja+44|0;R=ja+40|0;J=ja+36|0;I=ja+32|0;ba=ja+28|0;aa=ja+24|0;F=ja+20|0;ea=ja+16|0;H=ja+12|0;G=ja+8|0;da=ja+4|0;ca=ja;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[ka>>2]=k;c[u>>2]=l;c[v>>2]=m;g[ja+188>>2]=1.8019376993179321;g[ja+184>>2]=.44504186511039734;g[ja+180>>2]=1.2469795942306519;g[ja+176>>2]=.8677674531936646;g[ja+172>>2]=1.9498558044433594;g[ja+168>>2]=1.5636630058288574;g[ja+164>>2]=2.0;c[ia>>2]=c[ka>>2];while(1){if((c[ia>>2]|0)<=0)break;g[w>>2]=+g[c[p>>2]>>2];g[x>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[N>>2]=+g[w>>2]+ +g[x>>2];g[z>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[A>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[O>>2]=+g[z>>2]+ +g[A>>2];g[Y>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[Z>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[_>>2]=+g[Y>>2]-+g[Z>>2];g[ha>>2]=+g[Y>>2]+ +g[Z>>2];g[V>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2];g[W>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[X>>2]=+g[V>>2]-+g[W>>2];g[ga>>2]=+g[V>>2]+ +g[W>>2];g[K>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2];g[L>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[M>>2]=+g[K>>2]-+g[L>>2];g[Q>>2]=+g[K>>2]+ +g[L>>2];g[S>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[T>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[U>>2]=+g[S>>2]-+g[T>>2];g[fa>>2]=+g[S>>2]+ +g[T>>2];g[C>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[D>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[E>>2]=+g[C>>2]-+g[D>>2];g[P>>2]=+g[C>>2]+ +g[D>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=(+g[B>>2]+ +g[E>>2]+ +g[M>>2])*2.0+ +g[y>>2];g[c[n>>2]>>2]=(+g[O>>2]+ +g[P>>2]+ +g[Q>>2])*2.0+ +g[N>>2];g[$>>2]=+g[U>>2]*1.5636630058288574-+g[X>>2]*1.9498558044433594-+g[_>>2]*.8677674531936646;g[R>>2]=+g[P>>2]*1.2469795942306519+ +g[N>>2]+-(+g[Q>>2]*.44504186511039734+ +g[O>>2]*1.8019376993179321);g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[R>>2]-+g[$>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[R>>2]+ +g[$>>2];g[J>>2]=+g[fa>>2]*.8677674531936646+ +g[ga>>2]*1.5636630058288574-+g[ha>>2]*1.9498558044433594;g[I>>2]=+g[M>>2]*1.2469795942306519+ +g[y>>2]+-(+g[E>>2]*1.8019376993179321+ +g[B>>2]*.44504186511039734);g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[I>>2]-+g[J>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[I>>2]+ +g[J>>2];g[ba>>2]=+g[U>>2]*.8677674531936646+ +g[X>>2]*1.5636630058288574-+g[_>>2]*1.9498558044433594;g[aa>>2]=+g[Q>>2]*1.2469795942306519+ +g[N>>2]+-(+g[P>>2]*1.8019376993179321+ +g[O>>2]*.44504186511039734);g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[aa>>2]-+g[ba>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[aa>>2]+ +g[ba>>2];g[F>>2]=+g[fa>>2]*1.5636630058288574-+g[ga>>2]*1.9498558044433594-+g[ha>>2]*.8677674531936646;g[ea>>2]=+g[E>>2]*1.2469795942306519+ +g[y>>2]+-(+g[M>>2]*.44504186511039734+ +g[B>>2]*1.8019376993179321);g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[ea>>2]-+g[F>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[ea>>2]+ +g[F>>2];g[H>>2]=+g[ha>>2]*1.5636630058288574+ +g[fa>>2]*1.9498558044433594+ +g[ga>>2]*.8677674531936646;g[G>>2]=+g[B>>2]*1.2469795942306519+ +g[y>>2]+-(+g[M>>2]*1.8019376993179321+ +g[E>>2]*.44504186511039734);g[c[o>>2]>>2]=+g[G>>2]-+g[H>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[G>>2]+ +g[H>>2];g[da>>2]=+g[_>>2]*1.5636630058288574+ +g[U>>2]*1.9498558044433594+ +g[X>>2]*.8677674531936646;g[ca>>2]=+g[O>>2]*1.2469795942306519+ +g[N>>2]+-(+g[Q>>2]*1.8019376993179321+ +g[P>>2]*.44504186511039734);g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[ca>>2]-+g[da>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ca>>2]+ +g[da>>2];c[ia>>2]=(c[ia>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=ja;return}function hw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,59,10888);i=b;return}function iw(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0;Ia=i;i=i+336|0;n=Ia+328|0;o=Ia+324|0;p=Ia+320|0;q=Ia+316|0;r=Ia+312|0;s=Ia+308|0;t=Ia+304|0;Ja=Ia+300|0;u=Ia+296|0;v=Ia+292|0;Ha=Ia+260|0;C=Ia+256|0;Ba=Ia+252|0;pa=Ia+248|0;K=Ia+244|0;ga=Ia+240|0;x=Ia+236|0;N=Ia+232|0;P=Ia+228|0;S=Ia+224|0;ua=Ia+220|0;Ca=Ia+216|0;za=Ia+212|0;Da=Ia+208|0;H=Ia+204|0;ka=Ia+200|0;la=Ia+196|0;oa=Ia+192|0;w=Ia+188|0;B=Ia+184|0;ma=Ia+180|0;na=Ia+176|0;D=Ia+172|0;M=Ia+168|0;I=Ia+164|0;R=Ia+160|0;E=Ia+156|0;F=Ia+152|0;G=Ia+148|0;ha=Ia+144|0;ia=Ia+140|0;ja=Ia+136|0;ya=Ia+132|0;Q=Ia+128|0;ta=Ia+124|0;L=Ia+120|0;qa=Ia+116|0;va=Ia+112|0;wa=Ia+108|0;xa=Ia+104|0;ra=Ia+100|0;sa=Ia+96|0;y=Ia+92|0;A=Ia+88|0;fa=Ia+84|0;z=Ia+80|0;da=Ia+76|0;ea=Ia+72|0;Y=Ia+68|0;Aa=Ia+64|0;X=Ia+60|0;aa=Ia+56|0;ca=Ia+52|0;_=Ia+48|0;$=Ia+44|0;ba=Ia+40|0;Z=Ia+36|0;Ga=Ia+32|0;Ea=Ia+28|0;Fa=Ia+24|0;U=Ia+20|0;W=Ia+16|0;O=Ia+12|0;T=Ia+8|0;V=Ia+4|0;J=Ia;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[Ja>>2]=k;c[u>>2]=l;c[v>>2]=m;g[Ia+288>>2]=1.1180340051651;g[Ia+284>>2]=1.9021130800247192;g[Ia+280>>2]=1.1755704879760742;g[Ia+276>>2]=.5;g[Ia+272>>2]=.8660253882408142;g[Ia+268>>2]=2.0;g[Ia+264>>2]=1.7320507764816284;c[Ha>>2]=c[Ja>>2];while(1){if((c[Ha>>2]|0)<=0)break;g[na>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[oa>>2]=+g[na>>2]*1.7320507764816284;g[w>>2]=+g[c[p>>2]>>2];g[B>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[ma>>2]=+g[w>>2]-+g[B>>2];g[C>>2]=+g[B>>2]*2.0+ +g[w>>2];g[Ba>>2]=+g[ma>>2]-+g[oa>>2];g[pa>>2]=+g[ma>>2]+ +g[oa>>2];g[D>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[M>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[I>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2];g[R>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2];g[E>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2];g[F>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[G>>2]=+g[E>>2]+ +g[F>>2];g[ha>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[ia>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[ja>>2]=+g[ha>>2]+ +g[ia>>2];g[wa>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[xa>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[ya>>2]=(+g[wa>>2]+ +g[xa>>2])*.8660253882408142;g[Q>>2]=+g[wa>>2]-+g[xa>>2];g[ra>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2];g[sa>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[ta>>2]=(+g[ra>>2]-+g[sa>>2])*.8660253882408142;g[L>>2]=+g[ra>>2]+ +g[sa>>2];g[K>>2]=(+g[E>>2]-+g[F>>2])*.8660253882408142;g[ga>>2]=+g[M>>2]-+g[L>>2];g[x>>2]=+g[R>>2]-+g[Q>>2];g[N>>2]=+g[L>>2]*.5+ +g[M>>2];g[P>>2]=(+g[ha>>2]-+g[ia>>2])*.8660253882408142;g[S>>2]=+g[Q>>2]*.5+ +g[R>>2];g[qa>>2]=+g[D>>2]-+g[G>>2]*.5;g[ua>>2]=+g[qa>>2]-+g[ta>>2];g[Ca>>2]=+g[qa>>2]+ +g[ta>>2];g[va>>2]=+g[I>>2]-+g[ja>>2]*.5;g[za>>2]=+g[va>>2]-+g[ya>>2];g[Da>>2]=+g[va>>2]+ +g[ya>>2];g[H>>2]=+g[D>>2]+ +g[G>>2];g[ka>>2]=+g[I>>2]+ +g[ja>>2];g[la>>2]=+g[H>>2]+ +g[ka>>2];g[c[n>>2]>>2]=+g[la>>2]*2.0+ +g[C>>2];g[y>>2]=+g[ga>>2]*1.1755704879760742-+g[x>>2]*1.9021130800247192;g[A>>2]=+g[ga>>2]*1.9021130800247192+ +g[x>>2]*1.1755704879760742;g[da>>2]=+g[C>>2]-+g[la>>2]*.5;g[ea>>2]=(+g[H>>2]-+g[ka>>2])*1.1180340051651;g[fa>>2]=+g[da>>2]-+g[ea>>2];g[z>>2]=+g[ea>>2]+ +g[da>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[fa>>2]-+g[y>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[z>>2]+ +g[A>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[fa>>2]+ +g[y>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[z>>2]-+g[A>>2];g[Y>>2]=(+g[ua>>2]-+g[za>>2])*1.1180340051651;g[Aa>>2]=+g[ua>>2]+ +g[za>>2];g[X>>2]=+g[pa>>2]-+g[Aa>>2]*.5;g[_>>2]=+g[N>>2]-+g[K>>2];g[$>>2]=+g[S>>2]-+g[P>>2];g[aa>>2]=+g[_>>2]*1.1755704879760742-+g[$>>2]*1.9021130800247192;g[ca>>2]=+g[_>>2]*1.9021130800247192+ +g[$>>2]*1.1755704879760742;g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Aa>>2]*2.0+ +g[pa>>2];g[ba>>2]=+g[Y>>2]+ +g[X>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[ba>>2]-+g[ca>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ba>>2]+ +g[ca>>2];g[Z>>2]=+g[X>>2]-+g[Y>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Z>>2]-+g[aa>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Z>>2]+ +g[aa>>2];g[Ga>>2]=(+g[Ca>>2]-+g[Da>>2])*1.1180340051651;g[Ea>>2]=+g[Ca>>2]+ +g[Da>>2];g[Fa>>2]=+g[Ba>>2]-+g[Ea>>2]*.5;g[O>>2]=+g[K>>2]+ +g[N>>2];g[T>>2]=+g[P>>2]+ +g[S>>2];g[U>>2]=+g[O>>2]*1.1755704879760742-+g[T>>2]*1.9021130800247192;g[W>>2]=+g[O>>2]*1.9021130800247192+ +g[T>>2]*1.1755704879760742;g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Ea>>2]*2.0+ +g[Ba>>2];g[V>>2]=+g[Ga>>2]+ +g[Fa>>2];g[c[o>>2]>>2]=+g[V>>2]-+g[W>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[V>>2]+ +g[W>>2];g[J>>2]=+g[Fa>>2]-+g[Ga>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[J>>2]-+g[U>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[J>>2]+ +g[U>>2];c[Ha>>2]=(c[Ha>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=Ia;return}function jw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,60,10936);i=b;return}function kw(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0;Da=i;i=i+304|0;n=Da+296|0;o=Da+292|0;p=Da+288|0;q=Da+284|0;r=Da+280|0;s=Da+276|0;t=Da+272|0;Ea=Da+268|0;u=Da+264|0;v=Da+260|0;Ca=Da+240|0;E=Da+236|0;X=Da+232|0;na=Da+228|0;L=Da+224|0;B=Da+220|0;W=Da+216|0;ka=Da+212|0;I=Da+208|0;fa=Da+204|0;sa=Da+200|0;ia=Da+196|0;va=Da+192|0;pa=Da+188|0;wa=Da+184|0;_=Da+180|0;Z=Da+176|0;S=Da+172|0;P=Da+168|0;C=Da+164|0;D=Da+160|0;J=Da+156|0;la=Da+152|0;ma=Da+148|0;K=Da+144|0;A=Da+140|0;H=Da+136|0;y=Da+132|0;F=Da+128|0;z=Da+124|0;G=Da+120|0;w=Da+116|0;x=Da+112|0;N=Da+108|0;R=Da+104|0;Q=Da+100|0;O=Da+96|0;da=Da+92|0;ea=Da+88|0;qa=Da+84|0;ra=Da+80|0;ga=Da+76|0;ha=Da+72|0;ta=Da+68|0;ua=Da+64|0;ca=Da+60|0;ja=Da+56|0;Y=Da+52|0;$=Da+48|0;aa=Da+44|0;ba=Da+40|0;oa=Da+36|0;xa=Da+32|0;ya=Da+28|0;za=Da+24|0;M=Da+20|0;T=Da+16|0;U=Da+12|0;V=Da+8|0;Aa=Da+4|0;Ba=Da;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[Ea>>2]=k;c[u>>2]=l;c[v>>2]=m;g[Da+256>>2]=1.8477590084075928;g[Da+252>>2]=.7653668522834778;g[Da+248>>2]=1.4142135381698608;g[Da+244>>2]=2.0;c[Ca>>2]=c[Ea>>2];while(1){if((c[Ca>>2]|0)<=0)break;g[C>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[D>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2];g[J>>2]=+g[C>>2]-+g[D>>2];g[la>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[ma>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2];g[K>>2]=+g[la>>2]+ +g[ma>>2];g[E>>2]=(+g[C>>2]+ +g[D>>2])*2.0;g[X>>2]=(+g[J>>2]+ +g[K>>2])*1.4142135381698608;g[na>>2]=(+g[la>>2]-+g[ma>>2])*2.0;g[L>>2]=(+g[J>>2]-+g[K>>2])*1.4142135381698608;g[z>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[A>>2]=+g[z>>2]*2.0;g[G>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[H>>2]=+g[G>>2]*2.0;g[w>>2]=+g[c[p>>2]>>2];g[x>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2];g[y>>2]=+g[w>>2]+ +g[x>>2];g[F>>2]=+g[w>>2]-+g[x>>2];g[B>>2]=+g[y>>2]+ +g[A>>2];g[W>>2]=+g[F>>2]+ +g[H>>2];g[ka>>2]=+g[y>>2]-+g[A>>2];g[I>>2]=+g[F>>2]-+g[H>>2];g[da>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[ea>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2];g[fa>>2]=+g[da>>2]+ +g[ea>>2];g[N>>2]=+g[da>>2]-+g[ea>>2];g[qa>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[ra>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2];g[sa>>2]=+g[qa>>2]-+g[ra>>2];g[R>>2]=+g[qa>>2]+ +g[ra>>2];g[ga>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[ha>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[ia>>2]=+g[ga>>2]+ +g[ha>>2];g[Q>>2]=+g[ga>>2]-+g[ha>>2];g[ta>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[ua>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[va>>2]=+g[ta>>2]-+g[ua>>2];g[O>>2]=+g[ta>>2]+ +g[ua>>2];g[pa>>2]=+g[fa>>2]-+g[ia>>2];g[wa>>2]=+g[sa>>2]-+g[va>>2];g[_>>2]=+g[R>>2]-+g[Q>>2];g[Z>>2]=+g[N>>2]+ +g[O>>2];g[S>>2]=+g[Q>>2]+ +g[R>>2];g[P>>2]=+g[N>>2]-+g[O>>2];g[ca>>2]=+g[B>>2]+ +g[E>>2];g[ja>>2]=(+g[fa>>2]+ +g[ia>>2])*2.0;g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[ca>>2]-+g[ja>>2];g[c[n>>2]>>2]=+g[ca>>2]+ +g[ja>>2];g[Y>>2]=+g[W>>2]-+g[X>>2];g[$>>2]=+g[Z>>2]*.7653668522834778-+g[_>>2]*1.8477590084075928;g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Y>>2]-+g[$>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Y>>2]+ +g[$>>2];g[aa>>2]=+g[W>>2]+ +g[X>>2];g[ba>>2]=+g[Z>>2]*1.8477590084075928+ +g[_>>2]*.7653668522834778;g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[aa>>2]-+g[ba>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[aa>>2]+ +g[ba>>2];g[oa>>2]=+g[ka>>2]-+g[na>>2];g[xa>>2]=(+g[pa>>2]-+g[wa>>2])*1.4142135381698608;g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[oa>>2]-+g[xa>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[oa>>2]+ +g[xa>>2];g[ya>>2]=+g[ka>>2]+ +g[na>>2];g[za>>2]=(+g[pa>>2]+ +g[wa>>2])*1.4142135381698608;g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ya>>2]-+g[za>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ya>>2]+ +g[za>>2];g[M>>2]=+g[I>>2]+ +g[L>>2];g[T>>2]=+g[P>>2]*1.8477590084075928-+g[S>>2]*.7653668522834778;g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[M>>2]-+g[T>>2];g[c[o>>2]>>2]=+g[M>>2]+ +g[T>>2];g[U>>2]=+g[I>>2]-+g[L>>2];g[V>>2]=+g[P>>2]*.7653668522834778+ +g[S>>2]*1.8477590084075928;g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[U>>2]-+g[V>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[U>>2]+ +g[V>>2];g[Aa>>2]=+g[B>>2]-+g[E>>2];g[Ba>>2]=(+g[va>>2]+ +g[sa>>2])*2.0;g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Aa>>2]-+g[Ba>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Aa>>2]+ +g[Ba>>2];c[Ca>>2]=(c[Ca>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=Da;return}function lw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,61,10984);i=b;return}function mw(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0;db=i;i=i+416|0;n=db+412|0;o=db+408|0;p=db+404|0;q=db+400|0;r=db+396|0;s=db+392|0;t=db+388|0;eb=db+384|0;u=db+380|0;v=db+376|0;cb=db+352|0;aa=db+348|0;ja=db+344|0;Qa=db+340|0;Xa=db+336|0;ua=db+332|0;U=db+328|0;M=db+324|0;z=db+320|0;T=db+316|0;za=db+312|0;A=db+308|0;J=db+304|0;Ha=db+300|0;Oa=db+296|0;Pa=db+292|0;ab=db+288|0;ha=db+284|0;ia=db+280|0;Ra=db+276|0;Sa=db+272|0;Ta=db+268|0;ka=db+264|0;la=db+260|0;ma=db+256|0;$=db+252|0;Wa=db+248|0;Z=db+244|0;Ua=db+240|0;_=db+236|0;Va=db+232|0;w=db+228|0;Y=db+224|0;da=db+220|0;Ya=db+216|0;sa=db+212|0;I=db+208|0;Ga=db+204|0;H=db+200|0;$a=db+196|0;ta=db+192|0;Ka=db+188|0;bb=db+184|0;xa=db+180|0;L=db+176|0;Na=db+172|0;K=db+168|0;ga=db+164|0;ya=db+160|0;ba=db+156|0;ca=db+152|0;qa=db+148|0;ra=db+144|0;Ea=db+140|0;Fa=db+136|0;Za=db+132|0;_a=db+128|0;Ia=db+124|0;Ja=db+120|0;va=db+116|0;wa=db+112|0;La=db+108|0;Ma=db+104|0;ea=db+100|0;fa=db+96|0;Aa=db+92|0;Ca=db+88|0;pa=db+84|0;Ba=db+80|0;na=db+76|0;oa=db+72|0;N=db+68|0;P=db+64|0;G=db+60|0;O=db+56|0;E=db+52|0;F=db+48|0;V=db+44|0;X=db+40|0;S=db+36|0;W=db+32|0;Q=db+28|0;R=db+24|0;B=db+20|0;D=db+16|0;y=db+12|0;C=db+8|0;Da=db+4|0;x=db;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[eb>>2]=k;c[u>>2]=l;c[v>>2]=m;g[db+372>>2]=1.1180340051651;g[db+368>>2]=.5;g[db+364>>2]=1.9021130800247192;g[db+360>>2]=1.1755704879760742;g[db+356>>2]=2.0;c[cb>>2]=c[eb>>2];while(1){if((c[cb>>2]|0)<=0)break;g[_>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[$>>2]=+g[_>>2]*2.0;g[Va>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[Wa>>2]=+g[Va>>2]*2.0;g[w>>2]=+g[c[p>>2]>>2];g[Y>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*10<<2)>>2];g[Z>>2]=+g[w>>2]+ +g[Y>>2];g[Ua>>2]=+g[w>>2]-+g[Y>>2];g[aa>>2]=+g[Z>>2]-+g[$>>2];g[ja>>2]=+g[Ua>>2]-+g[Wa>>2];g[Qa>>2]=+g[Z>>2]+ +g[$>>2];g[Xa>>2]=+g[Ua>>2]+ +g[Wa>>2];g[ba>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[ca>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2];g[da>>2]=+g[ba>>2]+ +g[ca>>2];g[Ya>>2]=+g[ba>>2]-+g[ca>>2];g[qa>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[ra>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2];g[sa>>2]=+g[qa>>2]-+g[ra>>2];g[I>>2]=+g[qa>>2]+ +g[ra>>2];g[Ea>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2];g[Fa>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[Ga>>2]=+g[Ea>>2]+ +g[Fa>>2];g[H>>2]=+g[Ea>>2]-+g[Fa>>2];g[Za>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2];g[_a>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[$a>>2]=+g[Za>>2]+ +g[_a>>2];g[ta>>2]=+g[Za>>2]-+g[_a>>2];g[Ia>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2];g[Ja>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[Ka>>2]=+g[Ia>>2]+ +g[Ja>>2];g[bb>>2]=+g[Ia>>2]-+g[Ja>>2];g[va>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2];g[wa>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[xa>>2]=+g[va>>2]-+g[wa>>2];g[L>>2]=+g[va>>2]+ +g[wa>>2];g[La>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2];g[Ma>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[Na>>2]=+g[La>>2]+ +g[Ma>>2];g[K>>2]=+g[La>>2]-+g[Ma>>2];g[ea>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2];g[fa>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[ga>>2]=+g[ea>>2]+ +g[fa>>2];g[ya>>2]=+g[fa>>2]-+g[ea>>2];g[ua>>2]=+g[sa>>2]-+g[ta>>2];g[U>>2]=+g[L>>2]-+g[K>>2];g[M>>2]=+g[K>>2]+ +g[L>>2];g[z>>2]=+g[ta>>2]+ +g[sa>>2];g[T>>2]=+g[I>>2]-+g[H>>2];g[za>>2]=+g[xa>>2]-+g[ya>>2];g[A>>2]=+g[ya>>2]+ +g[xa>>2];g[J>>2]=+g[H>>2]+ +g[I>>2];g[Ha>>2]=+g[da>>2]-+g[Ga>>2];g[Oa>>2]=+g[Ka>>2]-+g[Na>>2];g[Pa>>2]=+g[Ha>>2]+ +g[Oa>>2];g[ab>>2]=+g[Ya>>2]+ +g[$a>>2];g[ha>>2]=+g[bb>>2]-+g[ga>>2];g[ia>>2]=+g[ab>>2]+ +g[ha>>2];g[Ra>>2]=+g[da>>2]+ +g[Ga>>2];g[Sa>>2]=+g[Ka>>2]+ +g[Na>>2];g[Ta>>2]=+g[Ra>>2]+ +g[Sa>>2];g[ka>>2]=+g[Ya>>2]-+g[$a>>2];g[la>>2]=+g[bb>>2]+ +g[ga>>2];g[ma>>2]=+g[ka>>2]+ +g[la>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Pa>>2]*2.0+ +g[aa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ia>>2]*2.0+ +g[Xa>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ma>>2]*2.0+ +g[ja>>2];g[c[n>>2]>>2]=+g[Ta>>2]*2.0+ +g[Qa>>2];g[Aa>>2]=+g[ua>>2]*1.1755704879760742-+g[za>>2]*1.9021130800247192;g[Ca>>2]=+g[ua>>2]*1.9021130800247192+ +g[za>>2]*1.1755704879760742;g[na>>2]=+g[aa>>2]-+g[Pa>>2]*.5;g[oa>>2]=(+g[Ha>>2]-+g[Oa>>2])*1.1180340051651;g[pa>>2]=+g[na>>2]-+g[oa>>2];g[Ba>>2]=+g[oa>>2]+ +g[na>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[pa>>2]-+g[Aa>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Ba>>2]+ +g[Ca>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[pa>>2]+ +g[Aa>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ba>>2]-+g[Ca>>2];g[N>>2]=+g[J>>2]*1.1755704879760742-+g[M>>2]*1.9021130800247192;g[P>>2]=+g[J>>2]*1.9021130800247192+ +g[M>>2]*1.1755704879760742;g[E>>2]=+g[ja>>2]-+g[ma>>2]*.5;g[F>>2]=(+g[ka>>2]-+g[la>>2])*1.1180340051651;g[G>>2]=+g[E>>2]-+g[F>>2];g[O>>2]=+g[F>>2]+ +g[E>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[G>>2]-+g[N>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[O>>2]+ +g[P>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[G>>2]+ +g[N>>2];g[c[o>>2]>>2]=+g[O>>2]-+g[P>>2];g[V>>2]=+g[T>>2]*1.1755704879760742-+g[U>>2]*1.9021130800247192;g[X>>2]=+g[T>>2]*1.9021130800247192+ +g[U>>2]*1.1755704879760742;g[Q>>2]=+g[Xa>>2]-+g[ia>>2]*.5;g[R>>2]=(+g[ab>>2]-+g[ha>>2])*1.1180340051651;g[S>>2]=+g[Q>>2]-+g[R>>2];g[W>>2]=+g[R>>2]+ +g[Q>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[S>>2]-+g[V>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[W>>2]+ +g[X>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[S>>2]+ +g[V>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[W>>2]-+g[X>>2];g[B>>2]=+g[z>>2]*1.1755704879760742-+g[A>>2]*1.9021130800247192;g[D>>2]=+g[z>>2]*1.9021130800247192+ +g[A>>2]*1.1755704879760742;g[Da>>2]=+g[Qa>>2]-+g[Ta>>2]*.5;g[x>>2]=(+g[Ra>>2]-+g[Sa>>2])*1.1180340051651;g[y>>2]=+g[Da>>2]-+g[x>>2];g[C>>2]=+g[x>>2]+ +g[Da>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[y>>2]-+g[B>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[C>>2]+ +g[D>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[y>>2]+ +g[B>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[C>>2]-+g[D>>2];c[cb>>2]=(c[cb>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=db;return}function nw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,62,11032);i=b;return}function ow(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0;nc=i;i=i+736|0;n=nc+732|0;o=nc+728|0;p=nc+724|0;q=nc+720|0;r=nc+716|0;s=nc+712|0;t=nc+708|0;oc=nc+704|0;u=nc+700|0;v=nc+696|0;mc=nc+608|0;gc=nc+604|0;N=nc+600|0;jb=nc+596|0;dc=nc+592|0;M=nc+588|0;Bb=nc+584|0;Cb=nc+580|0;Sb=nc+576|0;Fb=nc+572|0;Na=nc+568|0;wa=nc+564|0;U=nc+560|0;ub=nc+556|0;Ma=nc+552|0;va=nc+548|0;R=nc+544|0;fa=nc+540|0;ga=nc+536|0;$b=nc+532|0;ja=nc+528|0;Qa=nc+524|0;za=nc+520|0;$=nc+516|0;A=nc+512|0;Pa=nc+508|0;ya=nc+504|0;Y=nc+500|0;ec=nc+496|0;fc=nc+492|0;w=nc+488|0;ib=nc+484|0;bc=nc+480|0;Fa=nc+476|0;hb=nc+472|0;cc=nc+468|0;kb=nc+464|0;Rb=nc+460|0;wb=nc+456|0;ic=nc+452|0;vb=nc+448|0;pb=nc+444|0;sb=nc+440|0;Ab=nc+436|0;lb=nc+432|0;mb=nc+428|0;nb=nc+424|0;Ob=nc+420|0;Pb=nc+416|0;Qb=nc+412|0;lc=nc+408|0;ob=nc+404|0;yb=nc+400|0;qb=nc+396|0;rb=nc+392|0;zb=nc+388|0;xb=nc+384|0;S=nc+380|0;Eb=nc+376|0;T=nc+372|0;Db=nc+368|0;tb=nc+364|0;Q=nc+360|0;kc=nc+356|0;P=nc+352|0;jc=nc+348|0;Tb=nc+344|0;_b=nc+340|0;C=nc+336|0;Hb=nc+332|0;B=nc+328|0;Mb=nc+324|0;y=nc+320|0;G=nc+316|0;Ub=nc+312|0;Vb=nc+308|0;Wb=nc+304|0;Xb=nc+300|0;Yb=nc+296|0;Zb=nc+292|0;Kb=nc+288|0;Lb=nc+284|0;E=nc+280|0;Nb=nc+276|0;x=nc+272|0;F=nc+268|0;D=nc+264|0;Z=nc+260|0;ia=nc+256|0;_=nc+252|0;ha=nc+248|0;z=nc+244|0;X=nc+240|0;Jb=nc+236|0;W=nc+232|0;Ib=nc+228|0;ab=nc+224|0;ac=nc+220|0;$a=nc+216|0;eb=nc+212|0;gb=nc+208|0;cb=nc+204|0;db=nc+200|0;fb=nc+196|0;bb=nc+192|0;Ya=nc+188|0;_a=nc+184|0;La=nc+180|0;Sa=nc+176|0;Ta=nc+172|0;Ua=nc+168|0;Za=nc+164|0;Va=nc+160|0;Wa=nc+156|0;Xa=nc+152|0;Oa=nc+148|0;Ra=nc+144|0;ra=nc+140|0;ta=nc+136|0;hc=nc+132|0;la=nc+128|0;ma=nc+124|0;na=nc+120|0;sa=nc+116|0;oa=nc+112|0;pa=nc+108|0;qa=nc+104|0;Gb=nc+100|0;ka=nc+96|0;J=nc+92|0;L=nc+88|0;ua=nc+84|0;Ba=nc+80|0;Ca=nc+76|0;Da=nc+72|0;K=nc+68|0;Ea=nc+64|0;H=nc+60|0;I=nc+56|0;xa=nc+52|0;Aa=nc+48|0;Ia=nc+44|0;Ka=nc+40|0;O=nc+36|0;ba=nc+32|0;ca=nc+28|0;da=nc+24|0;Ja=nc+20|0;ea=nc+16|0;Ga=nc+12|0;Ha=nc+8|0;V=nc+4|0;aa=nc;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[oc>>2]=k;c[u>>2]=l;c[v>>2]=m;g[nc+692>>2]=.4257792830467224;g[nc+688>>2]=.9048270583152771;g[nc+684>>2]=.5358268022537231;g[nc+680>>2]=.8443279266357422;g[nc+676>>2]=.8763066530227661;g[nc+672>>2]=.4817536771297455;g[nc+668>>2]=.9685831665992737;g[nc+664>>2]=.24868988990783691;g[nc+660>>2]=.06279052048921585;g[nc+656>>2]=.9980267286300659;g[nc+652>>2]=.728968620300293;g[nc+648>>2]=.6845471262931824;g[nc+644>>2]=.25;g[nc+640>>2]=.5877852439880371;g[nc+636>>2]=.9510565400123596;g[nc+632>>2]=.55901700258255;g[nc+628>>2]=.5;g[nc+624>>2]=2.0;g[nc+620>>2]=1.1180340051651;g[nc+616>>2]=1.1755704879760742;g[nc+612>>2]=1.9021130800247192;c[mc>>2]=c[oc>>2];while(1){if((c[mc>>2]|0)<=0)break;g[ec>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[fc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*10<<2)>>2];g[gc>>2]=+g[ec>>2]*1.9021130800247192+ +g[fc>>2]*1.1755704879760742;g[N>>2]=+g[ec>>2]*1.1755704879760742-+g[fc>>2]*1.9021130800247192;g[w>>2]=+g[c[p>>2]>>2];g[Fa>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[hb>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*10<<2)>>2];g[ib>>2]=+g[Fa>>2]+ +g[hb>>2];g[bc>>2]=(+g[Fa>>2]-+g[hb>>2])*1.1180340051651;g[jb>>2]=+g[ib>>2]*2.0+ +g[w>>2];g[cc>>2]=+g[w>>2]-+g[ib>>2]*.5;g[dc>>2]=+g[bc>>2]+ +g[cc>>2];g[M>>2]=+g[cc>>2]-+g[bc>>2];g[kb>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[Bb>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[lb>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2];g[mb>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[nb>>2]=+g[lb>>2]+ +g[mb>>2];g[Ob>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*11<<2)>>2];g[Pb>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2];g[Qb>>2]=+g[Ob>>2]+ +g[Pb>>2];g[Rb>>2]=+g[nb>>2]+ +g[Qb>>2];g[wb>>2]=+g[Ob>>2]-+g[Pb>>2];g[ic>>2]=(+g[nb>>2]-+g[Qb>>2])*.55901700258255;g[vb>>2]=+g[lb>>2]-+g[mb>>2];g[lc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2];g[ob>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[yb>>2]=+g[lc>>2]-+g[ob>>2];g[qb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*11<<2)>>2];g[rb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2];g[zb>>2]=+g[qb>>2]-+g[rb>>2];g[pb>>2]=+g[lc>>2]+ +g[ob>>2];g[Cb>>2]=+g[yb>>2]+ +g[zb>>2];g[sb>>2]=+g[qb>>2]+ +g[rb>>2];g[Ab>>2]=(+g[yb>>2]-+g[zb>>2])*.55901700258255;g[Sb>>2]=+g[kb>>2]+ +g[Rb>>2];g[xb>>2]=+g[vb>>2]*.9510565400123596+ +g[wb>>2]*.5877852439880371;g[S>>2]=+g[vb>>2]*.5877852439880371-+g[wb>>2]*.9510565400123596;g[Db>>2]=+g[Bb>>2]-+g[Cb>>2]*.25;g[Eb>>2]=+g[Ab>>2]+ +g[Db>>2];g[T>>2]=+g[Db>>2]-+g[Ab>>2];g[Fb>>2]=+g[xb>>2]+ +g[Eb>>2];g[Na>>2]=+g[T>>2]-+g[S>>2];g[wa>>2]=+g[Eb>>2]-+g[xb>>2];g[U>>2]=+g[S>>2]+ +g[T>>2];g[tb>>2]=+g[pb>>2]*.9510565400123596+ +g[sb>>2]*.5877852439880371;g[Q>>2]=+g[pb>>2]*.5877852439880371-+g[sb>>2]*.9510565400123596;g[jc>>2]=+g[kb>>2]-+g[Rb>>2]*.25;g[kc>>2]=+g[ic>>2]+ +g[jc>>2];g[P>>2]=+g[jc>>2]-+g[ic>>2];g[ub>>2]=+g[kc>>2]-+g[tb>>2];g[Ma>>2]=+g[P>>2]+ +g[Q>>2];g[va>>2]=+g[kc>>2]+ +g[tb>>2];g[R>>2]=+g[P>>2]-+g[Q>>2];g[Tb>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[fa>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[Ub>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2];g[Vb>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[Wb>>2]=+g[Ub>>2]+ +g[Vb>>2];g[Xb>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*12<<2)>>2];g[Yb>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2];g[Zb>>2]=+g[Xb>>2]+ +g[Yb>>2];g[_b>>2]=+g[Wb>>2]+ +g[Zb>>2];g[C>>2]=+g[Xb>>2]-+g[Yb>>2];g[Hb>>2]=(+g[Wb>>2]-+g[Zb>>2])*.55901700258255;g[B>>2]=+g[Ub>>2]-+g[Vb>>2];g[Kb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2];g[Lb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[E>>2]=+g[Kb>>2]-+g[Lb>>2];g[Nb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*12<<2)>>2];g[x>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2];g[F>>2]=+g[Nb>>2]-+g[x>>2];g[Mb>>2]=+g[Kb>>2]+ +g[Lb>>2];g[ga>>2]=+g[E>>2]+ +g[F>>2];g[y>>2]=+g[Nb>>2]+ +g[x>>2];g[G>>2]=(+g[E>>2]-+g[F>>2])*.55901700258255;g[$b>>2]=+g[Tb>>2]+ +g[_b>>2];g[D>>2]=+g[B>>2]*.9510565400123596+ +g[C>>2]*.5877852439880371;g[Z>>2]=+g[B>>2]*.5877852439880371-+g[C>>2]*.9510565400123596;g[ha>>2]=+g[fa>>2]-+g[ga>>2]*.25;g[ia>>2]=+g[G>>2]+ +g[ha>>2];g[_>>2]=+g[ha>>2]-+g[G>>2];g[ja>>2]=+g[D>>2]+ +g[ia>>2];g[Qa>>2]=+g[_>>2]-+g[Z>>2];g[za>>2]=+g[ia>>2]-+g[D>>2];g[$>>2]=+g[Z>>2]+ +g[_>>2];g[z>>2]=+g[Mb>>2]*.9510565400123596+ +g[y>>2]*.5877852439880371;g[X>>2]=+g[Mb>>2]*.5877852439880371-+g[y>>2]*.9510565400123596;g[Ib>>2]=+g[Tb>>2]-+g[_b>>2]*.25;g[Jb>>2]=+g[Hb>>2]+ +g[Ib>>2];g[W>>2]=+g[Ib>>2]-+g[Hb>>2];g[A>>2]=+g[Jb>>2]-+g[z>>2];g[Pa>>2]=+g[W>>2]+ +g[X>>2];g[ya>>2]=+g[Jb>>2]+ +g[z>>2];g[Y>>2]=+g[W>>2]-+g[X>>2];g[ab>>2]=(+g[Sb>>2]-+g[$b>>2])*1.1180340051651;g[ac>>2]=+g[Sb>>2]+ +g[$b>>2];g[$a>>2]=+g[jb>>2]-+g[ac>>2]*.5;g[cb>>2]=+g[Cb>>2]+ +g[Bb>>2];g[db>>2]=+g[ga>>2]+ +g[fa>>2];g[eb>>2]=+g[cb>>2]*1.1755704879760742-+g[db>>2]*1.9021130800247192;g[gb>>2]=+g[cb>>2]*1.9021130800247192+ +g[db>>2]*1.1755704879760742;g[c[n>>2]>>2]=+g[ac>>2]*2.0+ +g[jb>>2];g[fb>>2]=+g[ab>>2]+ +g[$a>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[fb>>2]-+g[gb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[fb>>2]+ +g[gb>>2];g[bb>>2]=+g[$a>>2]-+g[ab>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[bb>>2]-+g[eb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[bb>>2]+ +g[eb>>2];g[Wa>>2]=+g[Ma>>2]*.6845471262931824+ +g[Na>>2]*.728968620300293;g[Xa>>2]=+g[Pa>>2]*.9980267286300659+ +g[Qa>>2]*.06279052048921585;g[Ya>>2]=+g[Wa>>2]*1.1755704879760742-+g[Xa>>2]*1.9021130800247192;g[_a>>2]=+g[Wa>>2]*1.9021130800247192+ +g[Xa>>2]*1.1755704879760742;g[La>>2]=+g[M>>2]+ +g[N>>2];g[Oa>>2]=+g[Ma>>2]*.728968620300293-+g[Na>>2]*.6845471262931824;g[Ra>>2]=+g[Pa>>2]*.06279052048921585-+g[Qa>>2]*.9980267286300659;g[Sa>>2]=+g[Oa>>2]+ +g[Ra>>2];g[Ta>>2]=+g[La>>2]-+g[Sa>>2]*.5;g[Ua>>2]=(+g[Oa>>2]-+g[Ra>>2])*1.1180340051651;g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Sa>>2]*2.0+ +g[La>>2];g[Za>>2]=+g[Ua>>2]+ +g[Ta>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Za>>2]-+g[_a>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Za>>2]+ +g[_a>>2];g[Va>>2]=+g[Ta>>2]-+g[Ua>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Va>>2]-+g[Ya>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Va>>2]+ +g[Ya>>2];g[pa>>2]=+g[ub>>2]*.24868988990783691+ +g[Fb>>2]*.9685831665992737;g[qa>>2]=+g[A>>2]*.4817536771297455+ +g[ja>>2]*.8763066530227661;g[ra>>2]=+g[pa>>2]*1.1755704879760742-+g[qa>>2]*1.9021130800247192;g[ta>>2]=+g[pa>>2]*1.9021130800247192+ +g[qa>>2]*1.1755704879760742;g[hc>>2]=+g[dc>>2]-+g[gc>>2];g[Gb>>2]=+g[ub>>2]*.9685831665992737-+g[Fb>>2]*.24868988990783691;g[ka>>2]=+g[A>>2]*.8763066530227661-+g[ja>>2]*.4817536771297455;g[la>>2]=+g[Gb>>2]+ +g[ka>>2];g[ma>>2]=+g[hc>>2]-+g[la>>2]*.5;g[na>>2]=(+g[Gb>>2]-+g[ka>>2])*1.1180340051651;g[c[o>>2]>>2]=+g[la>>2]*2.0+ +g[hc>>2];g[sa>>2]=+g[na>>2]+ +g[ma>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[sa>>2]-+g[ta>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[sa>>2]+ +g[ta>>2];g[oa>>2]=+g[ma>>2]-+g[na>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[oa>>2]-+g[ra>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[oa>>2]+ +g[ra>>2];g[H>>2]=+g[va>>2]*.8443279266357422+ +g[wa>>2]*.5358268022537231;g[I>>2]=+g[ya>>2]*.9048270583152771-+g[za>>2]*.4257792830467224;g[J>>2]=+g[H>>2]*1.1755704879760742-+g[I>>2]*1.9021130800247192;g[L>>2]=+g[H>>2]*1.9021130800247192+ +g[I>>2]*1.1755704879760742;g[ua>>2]=+g[dc>>2]+ +g[gc>>2];g[xa>>2]=+g[va>>2]*.5358268022537231-+g[wa>>2]*.8443279266357422;g[Aa>>2]=+g[ya>>2]*.4257792830467224+ +g[za>>2]*.9048270583152771;g[Ba>>2]=+g[xa>>2]-+g[Aa>>2];g[Ca>>2]=+g[ua>>2]-+g[Ba>>2]*.5;g[Da>>2]=(+g[xa>>2]+ +g[Aa>>2])*1.1180340051651;g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ba>>2]*2.0+ +g[ua>>2];g[K>>2]=+g[Ca>>2]+ +g[Da>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[K>>2]-+g[L>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[L>>2]+ +g[K>>2];g[Ea>>2]=+g[Ca>>2]-+g[Da>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Ea>>2]-+g[J>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[J>>2]+ +g[Ea>>2];g[Ga>>2]=+g[R>>2]*.4817536771297455+ +g[U>>2]*.8763066530227661;g[Ha>>2]=+g[Y>>2]*.8443279266357422+ +g[$>>2]*.5358268022537231;g[Ia>>2]=+g[Ga>>2]*1.1755704879760742-+g[Ha>>2]*1.9021130800247192;g[Ka>>2]=+g[Ga>>2]*1.9021130800247192+ +g[Ha>>2]*1.1755704879760742;g[O>>2]=+g[M>>2]-+g[N>>2];g[V>>2]=+g[R>>2]*.8763066530227661-+g[U>>2]*.4817536771297455;g[aa>>2]=+g[Y>>2]*.5358268022537231-+g[$>>2]*.8443279266357422;g[ba>>2]=+g[V>>2]+ +g[aa>>2];g[ca>>2]=+g[O>>2]-+g[ba>>2]*.5;g[da>>2]=(+g[V>>2]-+g[aa>>2])*1.1180340051651;g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[ba>>2]*2.0+ +g[O>>2];g[Ja>>2]=+g[da>>2]+ +g[ca>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ja>>2]-+g[Ka>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Ja>>2]+ +g[Ka>>2];g[ea>>2]=+g[ca>>2]-+g[da>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[ea>>2]-+g[Ia>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[ea>>2]+ +g[Ia>>2];c[mc>>2]=(c[mc>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=nc;return}function pw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,63,11080);i=b;return}function qw(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0;x=i;i=i+64|0;n=x+48|0;o=x+44|0;p=x+40|0;q=x+36|0;r=x+28|0;y=x+20|0;s=x+16|0;t=x+12|0;w=x+8|0;u=x+4|0;v=x;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[x+32>>2]=f;c[r>>2]=h;c[x+24>>2]=j;c[y>>2]=k;c[s>>2]=l;c[t>>2]=m;c[w>>2]=c[y>>2];while(1){if((c[w>>2]|0)<=0)break;g[u>>2]=+g[c[p>>2]>>2];g[v>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[c[o>>2]>>2]=+g[u>>2]-+g[v>>2];g[c[n>>2]>>2]=+g[u>>2]+ +g[v>>2];c[w>>2]=(c[w>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[s>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[s>>2]<<2)}i=x;return}function rw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,64,11128);i=b;return}function sw(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0;tc=i;i=i+720|0;n=tc+708|0;o=tc+704|0;p=tc+700|0;q=tc+696|0;r=tc+692|0;s=tc+688|0;t=tc+684|0;uc=tc+680|0;u=tc+676|0;v=tc+672|0;sc=tc+632|0;tb=tc+628|0;Sa=tc+624|0;vb=tc+620|0;Da=tc+616|0;qb=tc+612|0;Ra=tc+608|0;qc=tc+604|0;Aa=tc+600|0;$b=tc+596|0;Ua=tc+592|0;Va=tc+588|0;xb=tc+584|0;Eb=tc+580|0;J=tc+576|0;M=tc+572|0;ma=tc+568|0;hc=tc+564|0;Ya=tc+560|0;ab=tc+556|0;Hb=tc+552|0;A=tc+548|0;R=tc+544|0;da=tc+540|0;pa=tc+536|0;oc=tc+532|0;Ob=tc+528|0;qa=tc+524|0;Qb=tc+520|0;aa=tc+516|0;Za=tc+512|0;Y=tc+508|0;$a=tc+504|0;rb=tc+500|0;sb=tc+496|0;Ba=tc+492|0;rc=tc+488|0;ub=tc+484|0;Ca=tc+480|0;pb=tc+476|0;za=tc+472|0;nb=tc+468|0;xa=tc+464|0;ob=tc+460|0;ya=tc+456|0;w=tc+452|0;Fa=tc+448|0;Xb=tc+444|0;H=tc+440|0;Ab=tc+436|0;L=tc+432|0;_b=tc+428|0;K=tc+424|0;Db=tc+420|0;I=tc+416|0;Vb=tc+412|0;Wb=tc+408|0;yb=tc+404|0;zb=tc+400|0;Yb=tc+396|0;Zb=tc+392|0;Bb=tc+388|0;Cb=tc+384|0;dc=tc+380|0;P=tc+376|0;Tb=tc+372|0;ca=tc+368|0;gc=tc+364|0;ba=tc+360|0;z=tc+356|0;Q=tc+352|0;bc=tc+348|0;cc=tc+344|0;Rb=tc+340|0;Sb=tc+336|0;ec=tc+332|0;fc=tc+328|0;x=tc+324|0;y=tc+320|0;kc=tc+316|0;S=tc+312|0;Nb=tc+308|0;T=tc+304|0;nc=tc+300|0;V=tc+296|0;Kb=tc+292|0;W=tc+288|0;ic=tc+284|0;jc=tc+280|0;Lb=tc+276|0;Mb=tc+272|0;lc=tc+268|0;mc=tc+264|0;Ib=tc+260|0;Jb=tc+256|0;_=tc+252|0;$=tc+248|0;U=tc+244|0;X=tc+240|0;pc=tc+236|0;wa=tc+232|0;ac=tc+228|0;va=tc+224|0;Ub=tc+220|0;na=tc+216|0;ta=tc+212|0;sa=tc+208|0;ua=tc+204|0;la=tc+200|0;oa=tc+196|0;ra=tc+192|0;Gb=tc+188|0;D=tc+184|0;C=tc+180|0;E=tc+176|0;wb=tc+172|0;Fb=tc+168|0;Pb=tc+164|0;B=tc+160|0;hb=tc+156|0;lb=tc+152|0;kb=tc+148|0;mb=tc+144|0;fb=tc+140|0;gb=tc+136|0;ib=tc+132|0;jb=tc+128|0;fa=tc+124|0;ja=tc+120|0;ia=tc+116|0;ka=tc+112|0;F=tc+108|0;G=tc+104|0;ga=tc+100|0;ha=tc+96|0;La=tc+92|0;Pa=tc+88|0;Oa=tc+84|0;Qa=tc+80|0;Ja=tc+76|0;Ka=tc+72|0;Ma=tc+68|0;Na=tc+64|0;Xa=tc+60|0;db=tc+56|0;cb=tc+52|0;eb=tc+48|0;Ta=tc+44|0;Wa=tc+40|0;_a=tc+36|0;bb=tc+32|0;O=tc+28|0;Ha=tc+24|0;Ga=tc+20|0;Ia=tc+16|0;Ea=tc+12|0;N=tc+8|0;Z=tc+4|0;ea=tc;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[uc>>2]=k;c[u>>2]=l;c[v>>2]=m;g[tc+668>>2]=1.662939190864563;g[tc+664>>2]=1.111140489578247;g[tc+660>>2]=1.9615705013275146;g[tc+656>>2]=.39018064737319946;g[tc+652>>2]=.7653668522834778;g[tc+648>>2]=1.8477590084075928;g[tc+644>>2]=.7071067690849304;g[tc+640>>2]=1.4142135381698608;g[tc+636>>2]=2.0;c[sc>>2]=c[uc>>2];while(1){if((c[sc>>2]|0)<=0)break;g[rb>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[sb>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*12<<2)>>2];g[Ba>>2]=+g[rb>>2]-+g[sb>>2];g[rc>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[ub>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*12<<2)>>2];g[Ca>>2]=+g[rc>>2]+ +g[ub>>2];g[tb>>2]=(+g[rb>>2]+ +g[sb>>2])*2.0;g[Sa>>2]=(+g[Ba>>2]+ +g[Ca>>2])*1.4142135381698608;g[vb>>2]=(+g[rc>>2]-+g[ub>>2])*2.0;g[Da>>2]=(+g[Ba>>2]-+g[Ca>>2])*1.4142135381698608;g[ob>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2];g[pb>>2]=+g[ob>>2]*2.0;g[ya>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2];g[za>>2]=+g[ya>>2]*2.0;g[w>>2]=+g[c[p>>2]>>2];g[Fa>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<4<<2)>>2];g[nb>>2]=+g[w>>2]+ +g[Fa>>2];g[xa>>2]=+g[w>>2]-+g[Fa>>2];g[qb>>2]=+g[nb>>2]+ +g[pb>>2];g[Ra>>2]=+g[xa>>2]+ +g[za>>2];g[qc>>2]=+g[nb>>2]-+g[pb>>2];g[Aa>>2]=+g[xa>>2]-+g[za>>2];g[Vb>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[Wb>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*14<<2)>>2];g[Xb>>2]=+g[Vb>>2]+ +g[Wb>>2];g[H>>2]=+g[Vb>>2]-+g[Wb>>2];g[yb>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[zb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*14<<2)>>2];g[Ab>>2]=+g[yb>>2]-+g[zb>>2];g[L>>2]=+g[yb>>2]+ +g[zb>>2];g[Yb>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*10<<2)>>2];g[Zb>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2];g[_b>>2]=+g[Yb>>2]+ +g[Zb>>2];g[K>>2]=+g[Yb>>2]-+g[Zb>>2];g[Bb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*10<<2)>>2];g[Cb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2];g[Db>>2]=+g[Bb>>2]-+g[Cb>>2];g[I>>2]=+g[Bb>>2]+ +g[Cb>>2];g[$b>>2]=(+g[Xb>>2]+ +g[_b>>2])*2.0;g[Ua>>2]=+g[H>>2]+ +g[I>>2];g[Va>>2]=+g[L>>2]-+g[K>>2];g[xb>>2]=+g[Xb>>2]-+g[_b>>2];g[Eb>>2]=+g[Ab>>2]-+g[Db>>2];g[J>>2]=+g[H>>2]-+g[I>>2];g[M>>2]=+g[K>>2]+ +g[L>>2];g[ma>>2]=(+g[Db>>2]+ +g[Ab>>2])*2.0;g[bc>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[cc>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*15<<2)>>2];g[dc>>2]=+g[bc>>2]+ +g[cc>>2];g[P>>2]=+g[bc>>2]-+g[cc>>2];g[Rb>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[Sb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*15<<2)>>2];g[Tb>>2]=+g[Rb>>2]-+g[Sb>>2];g[ca>>2]=+g[Rb>>2]+ +g[Sb>>2];g[ec>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2];g[fc>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2];g[gc>>2]=+g[ec>>2]+ +g[fc>>2];g[ba>>2]=+g[ec>>2]-+g[fc>>2];g[x>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2];g[y>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[Q>>2]=+g[x>>2]+ +g[y>>2];g[hc>>2]=+g[dc>>2]+ +g[gc>>2];g[Ya>>2]=+g[P>>2]+ +g[Q>>2];g[ab>>2]=+g[ca>>2]-+g[ba>>2];g[Hb>>2]=+g[dc>>2]-+g[gc>>2];g[A>>2]=+g[Tb>>2]-+g[z>>2];g[R>>2]=+g[P>>2]-+g[Q>>2];g[da>>2]=+g[ba>>2]+ +g[ca>>2];g[pa>>2]=+g[z>>2]+ +g[Tb>>2];g[ic>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[jc>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*11<<2)>>2];g[kc>>2]=+g[ic>>2]+ +g[jc>>2];g[S>>2]=+g[ic>>2]-+g[jc>>2];g[Lb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[Mb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*11<<2)>>2];g[Nb>>2]=+g[Lb>>2]-+g[Mb>>2];g[T>>2]=+g[Lb>>2]+ +g[Mb>>2];g[lc>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[mc>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*13<<2)>>2];g[nc>>2]=+g[lc>>2]+ +g[mc>>2];g[V>>2]=+g[lc>>2]-+g[mc>>2];g[Ib>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*13<<2)>>2];g[Jb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[Kb>>2]=+g[Ib>>2]-+g[Jb>>2];g[W>>2]=+g[Jb>>2]+ +g[Ib>>2];g[oc>>2]=+g[kc>>2]+ +g[nc>>2];g[Ob>>2]=+g[Kb>>2]-+g[Nb>>2];g[qa>>2]=+g[Nb>>2]+ +g[Kb>>2];g[Qb>>2]=+g[kc>>2]-+g[nc>>2];g[_>>2]=+g[S>>2]+ +g[T>>2];g[$>>2]=+g[V>>2]+ +g[W>>2];g[aa>>2]=(+g[_>>2]-+g[$>>2])*.7071067690849304;g[Za>>2]=(+g[_>>2]+ +g[$>>2])*.7071067690849304;g[U>>2]=+g[S>>2]-+g[T>>2];g[X>>2]=+g[V>>2]-+g[W>>2];g[Y>>2]=(+g[U>>2]+ +g[X>>2])*.7071067690849304;g[$a>>2]=(+g[U>>2]-+g[X>>2])*.7071067690849304;g[pc>>2]=(+g[hc>>2]+ +g[oc>>2])*2.0;g[wa>>2]=(+g[qa>>2]+ +g[pa>>2])*2.0;g[Ub>>2]=+g[qb>>2]+ +g[tb>>2];g[ac>>2]=+g[Ub>>2]+ +g[$b>>2];g[va>>2]=+g[Ub>>2]-+g[$b>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[ac>>2]-+g[pc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[va>>2]+ +g[wa>>2];g[c[n>>2]>>2]=+g[ac>>2]+ +g[pc>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[va>>2]-+g[wa>>2];g[la>>2]=+g[qb>>2]-+g[tb>>2];g[na>>2]=+g[la>>2]-+g[ma>>2];g[ta>>2]=+g[la>>2]+ +g[ma>>2];g[oa>>2]=+g[hc>>2]-+g[oc>>2];g[ra>>2]=+g[pa>>2]-+g[qa>>2];g[sa>>2]=(+g[oa>>2]-+g[ra>>2])*1.4142135381698608;g[ua>>2]=(+g[oa>>2]+ +g[ra>>2])*1.4142135381698608;g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[na>>2]-+g[sa>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[ta>>2]+ +g[ua>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[na>>2]+ +g[sa>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[ta>>2]-+g[ua>>2];g[wb>>2]=+g[qc>>2]-+g[vb>>2];g[Fb>>2]=(+g[xb>>2]-+g[Eb>>2])*1.4142135381698608;g[Gb>>2]=+g[wb>>2]+ +g[Fb>>2];g[D>>2]=+g[wb>>2]-+g[Fb>>2];g[Pb>>2]=+g[Hb>>2]+ +g[Ob>>2];g[B>>2]=+g[Qb>>2]+ +g[A>>2];g[C>>2]=+g[Pb>>2]*1.8477590084075928-+g[B>>2]*.7653668522834778;g[E>>2]=+g[Pb>>2]*.7653668522834778+ +g[B>>2]*1.8477590084075928;g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Gb>>2]-+g[C>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[D>>2]+ +g[E>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Gb>>2]+ +g[C>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[D>>2]-+g[E>>2];g[fb>>2]=+g[Ra>>2]+ +g[Sa>>2];g[gb>>2]=+g[Ua>>2]*1.8477590084075928+ +g[Va>>2]*.7653668522834778;g[hb>>2]=+g[fb>>2]-+g[gb>>2];g[lb>>2]=+g[fb>>2]+ +g[gb>>2];g[ib>>2]=+g[Ya>>2]+ +g[Za>>2];g[jb>>2]=+g[ab>>2]-+g[$a>>2];g[kb>>2]=+g[ib>>2]*.39018064737319946-+g[jb>>2]*1.9615705013275146;g[mb>>2]=+g[ib>>2]*1.9615705013275146+ +g[jb>>2]*.39018064737319946;g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[hb>>2]-+g[kb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[lb>>2]+ +g[mb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[hb>>2]+ +g[kb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[lb>>2]-+g[mb>>2];g[F>>2]=+g[qc>>2]+ +g[vb>>2];g[G>>2]=(+g[xb>>2]+ +g[Eb>>2])*1.4142135381698608;g[fa>>2]=+g[F>>2]-+g[G>>2];g[ja>>2]=+g[F>>2]+ +g[G>>2];g[ga>>2]=+g[Hb>>2]-+g[Ob>>2];g[ha>>2]=+g[A>>2]-+g[Qb>>2];g[ia>>2]=+g[ga>>2]*.7653668522834778-+g[ha>>2]*1.8477590084075928;g[ka>>2]=+g[ga>>2]*1.8477590084075928+ +g[ha>>2]*.7653668522834778;g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[fa>>2]-+g[ia>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[ja>>2]+ +g[ka>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[fa>>2]+ +g[ia>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ja>>2]-+g[ka>>2];g[Ja>>2]=+g[Aa>>2]-+g[Da>>2];g[Ka>>2]=+g[J>>2]*.7653668522834778+ +g[M>>2]*1.8477590084075928;g[La>>2]=+g[Ja>>2]-+g[Ka>>2];g[Pa>>2]=+g[Ja>>2]+ +g[Ka>>2];g[Ma>>2]=+g[R>>2]-+g[Y>>2];g[Na>>2]=+g[da>>2]-+g[aa>>2];g[Oa>>2]=+g[Ma>>2]*1.111140489578247-+g[Na>>2]*1.662939190864563;g[Qa>>2]=+g[Ma>>2]*1.662939190864563+ +g[Na>>2]*1.111140489578247;g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[La>>2]-+g[Oa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Pa>>2]+ +g[Qa>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[La>>2]+ +g[Oa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Pa>>2]-+g[Qa>>2];g[Ta>>2]=+g[Ra>>2]-+g[Sa>>2];g[Wa>>2]=+g[Ua>>2]*.7653668522834778-+g[Va>>2]*1.8477590084075928;g[Xa>>2]=+g[Ta>>2]+ +g[Wa>>2];g[db>>2]=+g[Ta>>2]-+g[Wa>>2];g[_a>>2]=+g[Ya>>2]-+g[Za>>2];g[bb>>2]=+g[$a>>2]+ +g[ab>>2];g[cb>>2]=+g[_a>>2]*1.662939190864563-+g[bb>>2]*1.111140489578247;g[eb>>2]=+g[_a>>2]*1.111140489578247+ +g[bb>>2]*1.662939190864563;g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Xa>>2]-+g[cb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[db>>2]+ +g[eb>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Xa>>2]+ +g[cb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[db>>2]-+g[eb>>2];g[Ea>>2]=+g[Aa>>2]+ +g[Da>>2];g[N>>2]=+g[J>>2]*1.8477590084075928-+g[M>>2]*.7653668522834778;g[O>>2]=+g[Ea>>2]+ +g[N>>2];g[Ha>>2]=+g[Ea>>2]-+g[N>>2];g[Z>>2]=+g[R>>2]+ +g[Y>>2];g[ea>>2]=+g[aa>>2]+ +g[da>>2];g[Ga>>2]=+g[Z>>2]*1.9615705013275146-+g[ea>>2]*.39018064737319946;g[Ia>>2]=+g[Z>>2]*.39018064737319946+ +g[ea>>2]*1.9615705013275146;g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[O>>2]-+g[Ga>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Ha>>2]+ +g[Ia>>2];g[c[o>>2]>>2]=+g[O>>2]+ +g[Ga>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ha>>2]-+g[Ia>>2];c[sc>>2]=(c[sc>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=tc;return}function tw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,65,11176);i=b;return}function uw(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0;C=i;i=i+80|0;n=C+68|0;o=C+64|0;p=C+60|0;q=C+56|0;r=C+52|0;s=C+48|0;t=C+44|0;D=C+40|0;u=C+36|0;v=C+32|0;B=C+20|0;A=C+16|0;w=C+12|0;x=C+8|0;y=C+4|0;z=C;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[D>>2]=k;c[u>>2]=l;c[v>>2]=m;g[C+28>>2]=2.0;g[C+24>>2]=1.7320507764816284;c[B>>2]=c[D>>2];while(1){if((c[B>>2]|0)<=0)break;g[z>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[A>>2]=+g[z>>2]*1.7320507764816284;g[w>>2]=+g[c[p>>2]>>2];g[x>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[c[n>>2]>>2]=+g[x>>2]*2.0+ +g[w>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[y>>2]+ +g[A>>2];g[c[o>>2]>>2]=+g[y>>2]-+g[A>>2];c[B>>2]=(c[B>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2)}i=C;return}function vw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,66,11224);i=b;return}function ww(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0;F=i;i=i+80|0;n=F+76|0;o=F+72|0;p=F+68|0;q=F+64|0;r=F+60|0;s=F+56|0;t=F+52|0;G=F+48|0;u=F+44|0;v=F+40|0;E=F+32|0;A=F+28|0;D=F+24|0;y=F+20|0;B=F+16|0;z=F+12|0;C=F+8|0;w=F+4|0;x=F;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[G>>2]=k;c[u>>2]=l;c[v>>2]=m;g[F+36>>2]=2.0;c[E>>2]=c[G>>2];while(1){if((c[E>>2]|0)<=0)break;g[z>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[A>>2]=+g[z>>2]*2.0;g[C>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[D>>2]=+g[C>>2]*2.0;g[w>>2]=+g[c[p>>2]>>2];g[x>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[y>>2]=+g[w>>2]+ +g[x>>2];g[B>>2]=+g[w>>2]-+g[x>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[y>>2]-+g[A>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[B>>2]+ +g[D>>2];g[c[n>>2]>>2]=+g[y>>2]+ +g[A>>2];g[c[o>>2]>>2]=+g[B>>2]-+g[D>>2];c[E>>2]=(c[E>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2)}i=F;return}function xw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,67,11272);i=b;return}function yw(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0;J=i;i=i+112|0;n=J+108|0;o=J+104|0;p=J+100|0;q=J+96|0;r=J+92|0;s=J+88|0;t=J+84|0;K=J+80|0;u=J+76|0;v=J+72|0;I=J+48|0;F=J+44|0;H=J+40|0;w=J+36|0;z=J+32|0;A=J+28|0;B=J+24|0;G=J+20|0;C=J+16|0;D=J+12|0;E=J+8|0;x=J+4|0;y=J;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[K>>2]=k;c[u>>2]=l;c[v>>2]=m;g[J+68>>2]=2.0;g[J+64>>2]=1.1180340051651;g[J+60>>2]=.5;g[J+56>>2]=1.9021130800247192;g[J+52>>2]=1.1755704879760742;c[I>>2]=c[K>>2];while(1){if((c[I>>2]|0)<=0)break;g[D>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[E>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[F>>2]=+g[D>>2]*1.1755704879760742-+g[E>>2]*1.9021130800247192;g[H>>2]=+g[D>>2]*1.9021130800247192+ +g[E>>2]*1.1755704879760742;g[w>>2]=+g[c[p>>2]>>2];g[x>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[y>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[A>>2]=+g[w>>2]-+g[z>>2]*.5;g[B>>2]=(+g[x>>2]-+g[y>>2])*1.1180340051651;g[c[n>>2]>>2]=+g[z>>2]*2.0+ +g[w>>2];g[G>>2]=+g[B>>2]+ +g[A>>2];g[c[o>>2]>>2]=+g[G>>2]-+g[H>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[G>>2]+ +g[H>>2];g[C>>2]=+g[A>>2]-+g[B>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[C>>2]-+g[F>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[C>>2]+ +g[F>>2];c[I>>2]=(c[I>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=J;return}function zw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,68,11320);i=b;return}function Aw(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0;Qg=i;i=i+1712|0;n=Qg+1700|0;o=Qg+1696|0;p=Qg+1692|0;q=Qg+1688|0;r=Qg+1684|0;s=Qg+1680|0;t=Qg+1676|0;Rg=Qg+1672|0;u=Qg+1668|0;v=Qg+1664|0;Pg=Qg+1584|0;pg=Qg+1580|0;gb=Qg+1576|0;F=Qg+1572|0;Ib=Qg+1568|0;cc=Qg+1564|0;Bd=Qg+1560|0;_e=Qg+1556|0;qf=Qg+1552|0;wg=Qg+1548|0;hb=Qg+1544|0;ma=Qg+1540|0;Jb=Qg+1536|0;jc=Qg+1532|0;Cd=Qg+1528|0;bf=Qg+1524|0;rf=Qg+1520|0;nc=Qg+1516|0;df=Qg+1512|0;hf=Qg+1508|0;ad=Qg+1504|0;Eg=Qg+1500|0;Lg=Qg+1496|0;jb=Qg+1492|0;kb=Qg+1488|0;lb=Qg+1484|0;mb=Qg+1480|0;uc=Qg+1476|0;gf=Qg+1472|0;wa=Qg+1468|0;Lb=Qg+1464|0;Zc=Qg+1460|0;ef=Qg+1456|0;H=Qg+1452|0;Mb=Qg+1448|0;Gd=Qg+1444|0;mf=Qg+1440|0;Ce=Qg+1436|0;nd=Qg+1432|0;cg=Qg+1428|0;Qb=Qg+1424|0;kd=Qg+1420|0;nf=Qg+1416|0;Yb=Qg+1412|0;Nc=Qg+1408|0;S=Qg+1404|0;Ra=Qg+1400|0;Nd=Qg+1396|0;Be=Qg+1392|0;vb=Qg+1388|0;Va=Qg+1384|0;z=Qg+1380|0;Vb=Qg+1376|0;se=Qg+1372|0;ye=Qg+1368|0;ve=Qg+1364|0;ze=Qg+1360|0;$=Qg+1356|0;Ma=Qg+1352|0;Ja=Qg+1348|0;Na=Qg+1344|0;Vd=Qg+1340|0;de=Qg+1336|0;ae=Qg+1332|0;gd=Qg+1328|0;Tb=Qg+1324|0;Oc=Qg+1320|0;ee=Qg+1316|0;Zb=Qg+1312|0;Ob=Qg+1308|0;Vc=Qg+1304|0;Qf=Qg+1300|0;$b=Qg+1296|0;E=Qg+1292|0;ac=Qg+1288|0;of=Qg+1284|0;B=Qg+1280|0;Xc=Qg+1276|0;Wc=Qg+1272|0;w=Qg+1268|0;Fa=Qg+1264|0;Of=Qg+1260|0;Pf=Qg+1256|0;C=Qg+1252|0;D=Qg+1248|0;_b=Qg+1244|0;bc=Qg+1240|0;Ye=Qg+1236|0;Ze=Qg+1232|0;sg=Qg+1228|0;dc=Qg+1224|0;ha=Qg+1220|0;hc=Qg+1216|0;vg=Qg+1212|0;gc=Qg+1208|0;ka=Qg+1204|0;ec=Qg+1200|0;G=Qg+1196|0;la=Qg+1192|0;qg=Qg+1188|0;rg=Qg+1184|0;fa=Qg+1180|0;ga=Qg+1176|0;tg=Qg+1172|0;ug=Qg+1168|0;ia=Qg+1164|0;ja=Qg+1160|0;fc=Qg+1156|0;ic=Qg+1152|0;$e=Qg+1148|0;af=Qg+1144|0;Ag=Qg+1140|0;lc=Qg+1136|0;Aa=Qg+1132|0;$c=Qg+1128|0;Dg=Qg+1124|0;_c=Qg+1120|0;Da=Qg+1116|0;mc=Qg+1112|0;Hg=Qg+1108|0;oc=Qg+1104|0;ua=Qg+1100|0;pc=Qg+1096|0;Kg=Qg+1092|0;rc=Qg+1088|0;ra=Qg+1084|0;sc=Qg+1080|0;yg=Qg+1076|0;zg=Qg+1072|0;ya=Qg+1068|0;za=Qg+1064|0;Bg=Qg+1060|0;Cg=Qg+1056|0;Ba=Qg+1052|0;Ca=Qg+1048|0;Fg=Qg+1044|0;Gg=Qg+1040|0;sa=Qg+1036|0;ta=Qg+1032|0;Ig=Qg+1028|0;Jg=Qg+1024|0;pa=Qg+1020|0;qa=Qg+1016|0;qc=Qg+1012|0;tc=Qg+1008|0;oa=Qg+1004|0;va=Qg+1e3|0;wc=Qg+996|0;Yc=Qg+992|0;xa=Qg+988|0;Ea=Qg+984|0;Sf=Qg+980|0;ed=Qg+976|0;qb=Qg+972|0;md=Qg+968|0;Vf=Qg+964|0;ld=Qg+960|0;tb=Qg+956|0;fd=Qg+952|0;Zf=Qg+948|0;Hd=Qg+944|0;Q=Qg+940|0;Id=Qg+936|0;ag=Qg+932|0;Kd=Qg+928|0;N=Qg+924|0;Ld=Qg+920|0;Og=Qg+916|0;Rf=Qg+912|0;ob=Qg+908|0;pb=Qg+904|0;Tf=Qg+900|0;Uf=Qg+896|0;rb=Qg+892|0;sb=Qg+888|0;Xf=Qg+884|0;Yf=Qg+880|0;O=Qg+876|0;P=Qg+872|0;_f=Qg+868|0;$f=Qg+864|0;L=Qg+860|0;M=Qg+856|0;Wf=Qg+852|0;bg=Qg+848|0;K=Qg+844|0;R=Qg+840|0;id=Qg+836|0;jd=Qg+832|0;Wb=Qg+828|0;Xb=Qg+824|0;Jd=Qg+820|0;Md=Qg+816|0;Pa=Qg+812|0;ub=Qg+808|0;fg=Qg+804|0;Pd=Qg+800|0;W=Qg+796|0;Td=Qg+792|0;ig=Qg+788|0;Sd=Qg+784|0;Z=Qg+780|0;Qd=Qg+776|0;mg=Qg+772|0;Wd=Qg+768|0;da=Qg+764|0;_d=Qg+760|0;x=Qg+756|0;Zd=Qg+752|0;Ha=Qg+748|0;Xd=Qg+744|0;dg=Qg+740|0;eg=Qg+736|0;U=Qg+732|0;V=Qg+728|0;gg=Qg+724|0;hg=Qg+720|0;X=Qg+716|0;Y=Qg+712|0;kg=Qg+708|0;lg=Qg+704|0;ba=Qg+700|0;ca=Qg+696|0;ng=Qg+692|0;og=Qg+688|0;ea=Qg+684|0;Ga=Qg+680|0;jg=Qg+676|0;y=Qg+672|0;qe=Qg+668|0;re=Qg+664|0;te=Qg+660|0;ue=Qg+656|0;T=Qg+652|0;_=Qg+648|0;aa=Qg+644|0;Ia=Qg+640|0;Rd=Qg+636|0;Ud=Qg+632|0;Yd=Qg+628|0;$d=Qg+624|0;Rb=Qg+620|0;Sb=Qg+616|0;A=Qg+612|0;Uc=Qg+608|0;Ng=Qg+604|0;Tc=Qg+600|0;xg=Qg+596|0;Mg=Qg+592|0;Dc=Qg+588|0;Hc=Qg+584|0;Gc=Qg+580|0;Ic=Qg+576|0;Bc=Qg+572|0;Cc=Qg+568|0;Ec=Qg+564|0;Fc=Qg+560|0;Lc=Qg+556|0;Rc=Qg+552|0;Qc=Qg+548|0;Sc=Qg+544|0;Jc=Qg+540|0;Kc=Qg+536|0;Mc=Qg+532|0;Pc=Qg+528|0;Pb=Qg+524|0;zc=Qg+520|0;yc=Qg+516|0;Ac=Qg+512|0;ib=Qg+508|0;nb=Qg+504|0;Ub=Qg+500|0;xc=Qg+496|0;La=Qg+492|0;Db=Qg+488|0;wb=Qg+484|0;Eb=Qg+480|0;J=Qg+476|0;yb=Qg+472|0;Cb=Qg+468|0;Gb=Qg+464|0;Ka=Qg+460|0;Oa=Qg+456|0;na=Qg+452|0;I=Qg+448|0;Ab=Qg+444|0;Bb=Qg+440|0;xb=Qg+436|0;Hb=Qg+432|0;zb=Qg+428|0;Fb=Qg+424|0;Ta=Qg+420|0;bb=Qg+416|0;Wa=Qg+412|0;cb=Qg+408|0;Qa=Qg+404|0;Ya=Qg+400|0;ab=Qg+396|0;eb=Qg+392|0;Sa=Qg+388|0;Ua=Qg+384|0;Kb=Qg+380|0;Nb=Qg+376|0;_a=Qg+372|0;$a=Qg+368|0;Xa=Qg+364|0;fb=Qg+360|0;Za=Qg+356|0;db=Qg+352|0;cf=Qg+348|0;Ie=Qg+344|0;kf=Qg+340|0;Je=Qg+336|0;xe=Qg+332|0;Le=Qg+328|0;Ee=Qg+324|0;Me=Qg+320|0;ff=Qg+316|0;jf=Qg+312|0;pe=Qg+308|0;we=Qg+304|0;Ae=Qg+300|0;De=Qg+296|0;lf=Qg+292|0;Fe=Qg+288|0;Oe=Qg+284|0;pf=Qg+280|0;Ge=Qg+276|0;He=Qg+272|0;Ke=Qg+268|0;Ne=Qg+264|0;kc=Qg+260|0;td=Qg+256|0;cd=Qg+252|0;ud=Qg+248|0;ce=Qg+244|0;wd=Qg+240|0;pd=Qg+236|0;xd=Qg+232|0;vc=Qg+228|0;bd=Qg+224|0;Od=Qg+220|0;be=Qg+216|0;hd=Qg+212|0;od=Qg+208|0;dd=Qg+204|0;qd=Qg+200|0;zd=Qg+196|0;Ad=Qg+192|0;rd=Qg+188|0;sd=Qg+184|0;vd=Qg+180|0;yd=Qg+176|0;sf=Qg+172|0;Gf=Qg+168|0;vf=Qg+164|0;Hf=Qg+160|0;zf=Qg+156|0;Jf=Qg+152|0;Cf=Qg+148|0;Kf=Qg+144|0;tf=Qg+140|0;uf=Qg+136|0;xf=Qg+132|0;yf=Qg+128|0;Af=Qg+124|0;Bf=Qg+120|0;wf=Qg+116|0;Df=Qg+112|0;Mf=Qg+108|0;Nf=Qg+104|0;Ef=Qg+100|0;Ff=Qg+96|0;If=Qg+92|0;Lf=Qg+88|0;Dd=Qg+84|0;Qe=Qg+80|0;fe=Qg+76|0;Re=Qg+72|0;je=Qg+68|0;Te=Qg+64|0;me=Qg+60|0;Ue=Qg+56|0;Ed=Qg+52|0;Fd=Qg+48|0;he=Qg+44|0;ie=Qg+40|0;ke=Qg+36|0;le=Qg+32|0;ge=Qg+28|0;ne=Qg+24|0;We=Qg+20|0;Xe=Qg+16|0;oe=Qg+12|0;Pe=Qg+8|0;Se=Qg+4|0;Ve=Qg;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[Rg>>2]=k;c[u>>2]=l;c[v>>2]=m;g[Qg+1660>>2]=1.2687865495681763;g[Qg+1656>>2]=1.5460208654403687;g[Qg+1652>>2]=.1960342824459076;g[Qg+1648>>2]=1.990369439125061;g[Qg+1644>>2]=.9427934885025024;g[Qg+1640>>2]=1.7638425827026367;g[Qg+1636>>2]=.580569326877594;g[Qg+1632>>2]=1.913880705833435;g[Qg+1628>>2]=1.111140489578247;g[Qg+1624>>2]=1.662939190864563;g[Qg+1620>>2]=.39018064737319946;g[Qg+1616>>2]=1.9615705013275146;g[Qg+1612>>2]=.9238795042037964;g[Qg+1608>>2]=.3826834261417389;g[Qg+1604>>2]=.7071067690849304;g[Qg+1600>>2]=.7653668522834778;g[Qg+1596>>2]=1.8477590084075928;g[Qg+1592>>2]=1.4142135381698608;g[Qg+1588>>2]=2.0;c[Pg>>2]=c[Rg>>2];while(1){if((c[Pg>>2]|0)<=0)break;g[Xc>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<4<<2)>>2];g[ee>>2]=+g[Xc>>2]*2.0;g[Wc>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<4<<2)>>2];g[Zb>>2]=+g[Wc>>2]*2.0;g[w>>2]=+g[c[p>>2]>>2];g[Fa>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<5<<2)>>2];g[Ob>>2]=+g[w>>2]+ +g[Fa>>2];g[Vc>>2]=+g[w>>2]-+g[Fa>>2];g[Of>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2];g[Pf>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*24<<2)>>2];g[Qf>>2]=(+g[Of>>2]+ +g[Pf>>2])*2.0;g[$b>>2]=+g[Of>>2]-+g[Pf>>2];g[C>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2];g[D>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*24<<2)>>2];g[E>>2]=(+g[C>>2]-+g[D>>2])*2.0;g[ac>>2]=+g[C>>2]+ +g[D>>2];g[of>>2]=+g[Ob>>2]+ +g[ee>>2];g[pg>>2]=+g[of>>2]+ +g[Qf>>2];g[gb>>2]=+g[of>>2]-+g[Qf>>2];g[B>>2]=+g[Ob>>2]-+g[ee>>2];g[F>>2]=+g[B>>2]-+g[E>>2];g[Ib>>2]=+g[B>>2]+ +g[E>>2];g[_b>>2]=+g[Vc>>2]-+g[Zb>>2];g[bc>>2]=(+g[$b>>2]-+g[ac>>2])*1.4142135381698608;g[cc>>2]=+g[_b>>2]+ +g[bc>>2];g[Bd>>2]=+g[_b>>2]-+g[bc>>2];g[Ye>>2]=+g[Vc>>2]+ +g[Zb>>2];g[Ze>>2]=(+g[$b>>2]+ +g[ac>>2])*1.4142135381698608;g[_e>>2]=+g[Ye>>2]-+g[Ze>>2];g[qf>>2]=+g[Ye>>2]+ +g[Ze>>2];g[qg>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[rg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*28<<2)>>2];g[sg>>2]=+g[qg>>2]+ +g[rg>>2];g[dc>>2]=+g[qg>>2]-+g[rg>>2];g[fa>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[ga>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*28<<2)>>2];g[ha>>2]=+g[fa>>2]-+g[ga>>2];g[hc>>2]=+g[fa>>2]+ +g[ga>>2];g[tg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*20<<2)>>2];g[ug>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*12<<2)>>2];g[vg>>2]=+g[tg>>2]+ +g[ug>>2];g[gc>>2]=+g[tg>>2]-+g[ug>>2];g[ia>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*20<<2)>>2];g[ja>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*12<<2)>>2];g[ka>>2]=+g[ia>>2]-+g[ja>>2];g[ec>>2]=+g[ia>>2]+ +g[ja>>2];g[wg>>2]=(+g[sg>>2]+ +g[vg>>2])*2.0;g[hb>>2]=(+g[ka>>2]+ +g[ha>>2])*2.0;g[G>>2]=+g[sg>>2]-+g[vg>>2];g[la>>2]=+g[ha>>2]-+g[ka>>2];g[ma>>2]=(+g[G>>2]-+g[la>>2])*1.4142135381698608;g[Jb>>2]=(+g[G>>2]+ +g[la>>2])*1.4142135381698608;g[fc>>2]=+g[dc>>2]-+g[ec>>2];g[ic>>2]=+g[gc>>2]+ +g[hc>>2];g[jc>>2]=+g[fc>>2]*1.8477590084075928-+g[ic>>2]*.7653668522834778;g[Cd>>2]=+g[fc>>2]*.7653668522834778+ +g[ic>>2]*1.8477590084075928;g[$e>>2]=+g[dc>>2]+ +g[ec>>2];g[af>>2]=+g[hc>>2]-+g[gc>>2];g[bf>>2]=+g[$e>>2]*.7653668522834778-+g[af>>2]*1.8477590084075928;g[rf>>2]=+g[$e>>2]*1.8477590084075928+ +g[af>>2]*.7653668522834778;g[yg>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[zg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*30<<2)>>2];g[Ag>>2]=+g[yg>>2]+ +g[zg>>2];g[lc>>2]=+g[yg>>2]-+g[zg>>2];g[ya>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[za>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*30<<2)>>2];g[Aa>>2]=+g[ya>>2]-+g[za>>2];g[$c>>2]=+g[ya>>2]+ +g[za>>2];g[Bg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*18<<2)>>2];g[Cg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*14<<2)>>2];g[Dg>>2]=+g[Bg>>2]+ +g[Cg>>2];g[_c>>2]=+g[Bg>>2]-+g[Cg>>2];g[Ba>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*18<<2)>>2];g[Ca>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*14<<2)>>2];g[Da>>2]=+g[Ba>>2]-+g[Ca>>2];g[mc>>2]=+g[Ba>>2]+ +g[Ca>>2];g[Fg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*10<<2)>>2];g[Gg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*22<<2)>>2];g[Hg>>2]=+g[Fg>>2]+ +g[Gg>>2];g[oc>>2]=+g[Fg>>2]-+g[Gg>>2];g[sa>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*10<<2)>>2];g[ta>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*22<<2)>>2];g[ua>>2]=+g[sa>>2]-+g[ta>>2];g[pc>>2]=+g[sa>>2]+ +g[ta>>2];g[Ig>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2];g[Jg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*26<<2)>>2];g[Kg>>2]=+g[Ig>>2]+ +g[Jg>>2];g[rc>>2]=+g[Ig>>2]-+g[Jg>>2];g[pa>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*26<<2)>>2];g[qa>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2];g[ra>>2]=+g[pa>>2]-+g[qa>>2];g[sc>>2]=+g[qa>>2]+ +g[pa>>2];g[nc>>2]=+g[lc>>2]-+g[mc>>2];g[df>>2]=+g[lc>>2]+ +g[mc>>2];g[hf>>2]=+g[$c>>2]-+g[_c>>2];g[ad>>2]=+g[_c>>2]+ +g[$c>>2];g[Eg>>2]=+g[Ag>>2]+ +g[Dg>>2];g[Lg>>2]=+g[Hg>>2]+ +g[Kg>>2];g[jb>>2]=+g[Eg>>2]-+g[Lg>>2];g[kb>>2]=+g[Da>>2]+ +g[Aa>>2];g[lb>>2]=+g[ua>>2]+ +g[ra>>2];g[mb>>2]=+g[kb>>2]-+g[lb>>2];g[qc>>2]=+g[oc>>2]-+g[pc>>2];g[tc>>2]=+g[rc>>2]-+g[sc>>2];g[uc>>2]=(+g[qc>>2]+ +g[tc>>2])*.7071067690849304;g[gf>>2]=(+g[qc>>2]-+g[tc>>2])*.7071067690849304;g[oa>>2]=+g[Ag>>2]-+g[Dg>>2];g[va>>2]=+g[ra>>2]-+g[ua>>2];g[wa>>2]=+g[oa>>2]+ +g[va>>2];g[Lb>>2]=+g[oa>>2]-+g[va>>2];g[wc>>2]=+g[oc>>2]+ +g[pc>>2];g[Yc>>2]=+g[rc>>2]+ +g[sc>>2];g[Zc>>2]=(+g[wc>>2]-+g[Yc>>2])*.7071067690849304;g[ef>>2]=(+g[wc>>2]+ +g[Yc>>2])*.7071067690849304;g[xa>>2]=+g[Hg>>2]-+g[Kg>>2];g[Ea>>2]=+g[Aa>>2]-+g[Da>>2];g[H>>2]=+g[xa>>2]+ +g[Ea>>2];g[Mb>>2]=+g[Ea>>2]-+g[xa>>2];g[Og>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[Rf>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*31<<2)>>2];g[Sf>>2]=+g[Og>>2]+ +g[Rf>>2];g[ed>>2]=+g[Og>>2]-+g[Rf>>2];g[ob>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[pb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*31<<2)>>2];g[qb>>2]=+g[ob>>2]-+g[pb>>2];g[md>>2]=+g[ob>>2]+ +g[pb>>2];g[Tf>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*17<<2)>>2];g[Uf>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*15<<2)>>2];g[Vf>>2]=+g[Tf>>2]+ +g[Uf>>2];g[ld>>2]=+g[Tf>>2]-+g[Uf>>2];g[rb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*17<<2)>>2];g[sb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*15<<2)>>2];g[tb>>2]=+g[rb>>2]-+g[sb>>2];g[fd>>2]=+g[rb>>2]+ +g[sb>>2];g[Xf>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2];g[Yf>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*23<<2)>>2];g[Zf>>2]=+g[Xf>>2]+ +g[Yf>>2];g[Hd>>2]=+g[Xf>>2]-+g[Yf>>2];g[O>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2];g[P>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*23<<2)>>2];g[Q>>2]=+g[O>>2]-+g[P>>2];g[Id>>2]=+g[O>>2]+ +g[P>>2];g[_f>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2];g[$f>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*25<<2)>>2];g[ag>>2]=+g[_f>>2]+ +g[$f>>2];g[Kd>>2]=+g[_f>>2]-+g[$f>>2];g[L>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*25<<2)>>2];g[M>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2];g[N>>2]=+g[L>>2]-+g[M>>2];g[Ld>>2]=+g[M>>2]+ +g[L>>2];g[Gd>>2]=+g[ed>>2]-+g[fd>>2];g[mf>>2]=+g[ed>>2]+ +g[fd>>2];g[Ce>>2]=+g[md>>2]-+g[ld>>2];g[nd>>2]=+g[ld>>2]+ +g[md>>2];g[Wf>>2]=+g[Sf>>2]+ +g[Vf>>2];g[bg>>2]=+g[Zf>>2]+ +g[ag>>2];g[cg>>2]=+g[Wf>>2]+ +g[bg>>2];g[Qb>>2]=+g[Wf>>2]-+g[bg>>2];g[id>>2]=+g[Hd>>2]+ +g[Id>>2];g[jd>>2]=+g[Kd>>2]+ +g[Ld>>2];g[kd>>2]=(+g[id>>2]-+g[jd>>2])*.7071067690849304;g[nf>>2]=(+g[id>>2]+ +g[jd>>2])*.7071067690849304;g[Wb>>2]=+g[tb>>2]+ +g[qb>>2];g[Xb>>2]=+g[Q>>2]+ +g[N>>2];g[Yb>>2]=+g[Wb>>2]-+g[Xb>>2];g[Nc>>2]=+g[Xb>>2]+ +g[Wb>>2];g[K>>2]=+g[Sf>>2]-+g[Vf>>2];g[R>>2]=+g[N>>2]-+g[Q>>2];g[S>>2]=+g[K>>2]+ +g[R>>2];g[Ra>>2]=+g[K>>2]-+g[R>>2];g[Jd>>2]=+g[Hd>>2]-+g[Id>>2];g[Md>>2]=+g[Kd>>2]-+g[Ld>>2];g[Nd>>2]=(+g[Jd>>2]+ +g[Md>>2])*.7071067690849304;g[Be>>2]=(+g[Jd>>2]-+g[Md>>2])*.7071067690849304;g[Pa>>2]=+g[Zf>>2]-+g[ag>>2];g[ub>>2]=+g[qb>>2]-+g[tb>>2];g[vb>>2]=+g[Pa>>2]+ +g[ub>>2];g[Va>>2]=+g[ub>>2]-+g[Pa>>2];g[dg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[eg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*27<<2)>>2];g[fg>>2]=+g[dg>>2]+ +g[eg>>2];g[Pd>>2]=+g[dg>>2]-+g[eg>>2];g[U>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[V>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*27<<2)>>2];g[W>>2]=+g[U>>2]-+g[V>>2];g[Td>>2]=+g[U>>2]+ +g[V>>2];g[gg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*21<<2)>>2];g[hg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*11<<2)>>2];g[ig>>2]=+g[gg>>2]+ +g[hg>>2];g[Sd>>2]=+g[gg>>2]-+g[hg>>2];g[X>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*21<<2)>>2];g[Y>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*11<<2)>>2];g[Z>>2]=+g[X>>2]-+g[Y>>2];g[Qd>>2]=+g[X>>2]+ +g[Y>>2];g[kg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[lg>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*29<<2)>>2];g[mg>>2]=+g[kg>>2]+ +g[lg>>2];g[Wd>>2]=+g[kg>>2]-+g[lg>>2];g[ba>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*29<<2)>>2];g[ca>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[da>>2]=+g[ba>>2]-+g[ca>>2];g[_d>>2]=+g[ca>>2]+ +g[ba>>2];g[ng>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*13<<2)>>2];g[og>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*19<<2)>>2];g[x>>2]=+g[ng>>2]+ +g[og>>2];g[Zd>>2]=+g[ng>>2]-+g[og>>2];g[ea>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*13<<2)>>2];g[Ga>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*19<<2)>>2];g[Ha>>2]=+g[ea>>2]-+g[Ga>>2];g[Xd>>2]=+g[ea>>2]+ +g[Ga>>2];g[jg>>2]=+g[fg>>2]+ +g[ig>>2];g[y>>2]=+g[mg>>2]+ +g[x>>2];g[z>>2]=+g[jg>>2]+ +g[y>>2];g[Vb>>2]=+g[jg>>2]-+g[y>>2];g[qe>>2]=+g[Pd>>2]+ +g[Qd>>2];g[re>>2]=+g[Td>>2]-+g[Sd>>2];g[se>>2]=+g[qe>>2]*.3826834261417389-+g[re>>2]*.9238795042037964;g[ye>>2]=+g[qe>>2]*.9238795042037964+ +g[re>>2]*.3826834261417389;g[te>>2]=+g[Wd>>2]+ +g[Xd>>2];g[ue>>2]=+g[Zd>>2]+ +g[_d>>2];g[ve>>2]=+g[te>>2]*.3826834261417389-+g[ue>>2]*.9238795042037964;g[ze>>2]=+g[te>>2]*.9238795042037964+ +g[ue>>2]*.3826834261417389;g[T>>2]=+g[fg>>2]-+g[ig>>2];g[_>>2]=+g[W>>2]-+g[Z>>2];g[$>>2]=+g[T>>2]-+g[_>>2];g[Ma>>2]=+g[T>>2]+ +g[_>>2];g[aa>>2]=+g[mg>>2]-+g[x>>2];g[Ia>>2]=+g[da>>2]-+g[Ha>>2];g[Ja>>2]=+g[aa>>2]+ +g[Ia>>2];g[Na>>2]=+g[Ia>>2]-+g[aa>>2];g[Rd>>2]=+g[Pd>>2]-+g[Qd>>2];g[Ud>>2]=+g[Sd>>2]+ +g[Td>>2];g[Vd>>2]=+g[Rd>>2]*.9238795042037964-+g[Ud>>2]*.3826834261417389;g[de>>2]=+g[Rd>>2]*.3826834261417389+ +g[Ud>>2]*.9238795042037964;g[Yd>>2]=+g[Wd>>2]-+g[Xd>>2];g[$d>>2]=+g[Zd>>2]-+g[_d>>2];g[ae>>2]=+g[Yd>>2]*.9238795042037964+ +g[$d>>2]*.3826834261417389;g[gd>>2]=+g[$d>>2]*.9238795042037964-+g[Yd>>2]*.3826834261417389;g[Rb>>2]=+g[Ha>>2]+ +g[da>>2];g[Sb>>2]=+g[Z>>2]+ +g[W>>2];g[Tb>>2]=+g[Rb>>2]-+g[Sb>>2];g[Oc>>2]=+g[Sb>>2]+ +g[Rb>>2];g[A>>2]=(+g[cg>>2]+ +g[z>>2])*2.0;g[Uc>>2]=(+g[Oc>>2]+ +g[Nc>>2])*2.0;g[xg>>2]=+g[pg>>2]+ +g[wg>>2];g[Mg>>2]=(+g[Eg>>2]+ +g[Lg>>2])*2.0;g[Ng>>2]=+g[xg>>2]+ +g[Mg>>2];g[Tc>>2]=+g[xg>>2]-+g[Mg>>2];g[(c[n>>2]|0)+(c[r>>2]<<4<<2)>>2]=+g[Ng>>2]-+g[A>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*24<<2)>>2]=+g[Tc>>2]+ +g[Uc>>2];g[c[n>>2]>>2]=+g[Ng>>2]+ +g[A>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Tc>>2]-+g[Uc>>2];g[Bc>>2]=+g[gb>>2]+ +g[hb>>2];g[Cc>>2]=(+g[jb>>2]+ +g[mb>>2])*1.4142135381698608;g[Dc>>2]=+g[Bc>>2]-+g[Cc>>2];g[Hc>>2]=+g[Bc>>2]+ +g[Cc>>2];g[Ec>>2]=+g[Qb>>2]-+g[Tb>>2];g[Fc>>2]=+g[Yb>>2]-+g[Vb>>2];g[Gc>>2]=+g[Ec>>2]*.7653668522834778-+g[Fc>>2]*1.8477590084075928;g[Ic>>2]=+g[Ec>>2]*1.8477590084075928+ +g[Fc>>2]*.7653668522834778;g[(c[n>>2]|0)+((c[r>>2]|0)*22<<2)>>2]=+g[Dc>>2]-+g[Gc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*30<<2)>>2]=+g[Hc>>2]+ +g[Ic>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Dc>>2]+ +g[Gc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Hc>>2]-+g[Ic>>2];g[Jc>>2]=+g[pg>>2]-+g[wg>>2];g[Kc>>2]=(+g[lb>>2]+ +g[kb>>2])*2.0;g[Lc>>2]=+g[Jc>>2]-+g[Kc>>2];g[Rc>>2]=+g[Jc>>2]+ +g[Kc>>2];g[Mc>>2]=+g[cg>>2]-+g[z>>2];g[Pc>>2]=+g[Nc>>2]-+g[Oc>>2];g[Qc>>2]=(+g[Mc>>2]-+g[Pc>>2])*1.4142135381698608;g[Sc>>2]=(+g[Mc>>2]+ +g[Pc>>2])*1.4142135381698608;g[(c[n>>2]|0)+((c[r>>2]|0)*20<<2)>>2]=+g[Lc>>2]-+g[Qc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*28<<2)>>2]=+g[Rc>>2]+ +g[Sc>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Lc>>2]+ +g[Qc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Rc>>2]-+g[Sc>>2];g[ib>>2]=+g[gb>>2]-+g[hb>>2];g[nb>>2]=(+g[jb>>2]-+g[mb>>2])*1.4142135381698608;g[Pb>>2]=+g[ib>>2]+ +g[nb>>2];g[zc>>2]=+g[ib>>2]-+g[nb>>2];g[Ub>>2]=+g[Qb>>2]+ +g[Tb>>2];g[xc>>2]=+g[Vb>>2]+ +g[Yb>>2];g[yc>>2]=+g[Ub>>2]*1.8477590084075928-+g[xc>>2]*.7653668522834778;g[Ac>>2]=+g[Ub>>2]*.7653668522834778+ +g[xc>>2]*1.8477590084075928;g[(c[n>>2]|0)+((c[r>>2]|0)*18<<2)>>2]=+g[Pb>>2]-+g[yc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*26<<2)>>2]=+g[zc>>2]+ +g[Ac>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Pb>>2]+ +g[yc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[zc>>2]-+g[Ac>>2];g[Ka>>2]=(+g[$>>2]+ +g[Ja>>2])*.7071067690849304;g[La>>2]=+g[S>>2]+ +g[Ka>>2];g[Db>>2]=+g[S>>2]-+g[Ka>>2];g[Oa>>2]=(+g[Ma>>2]+ +g[Na>>2])*.7071067690849304;g[wb>>2]=+g[Oa>>2]+ +g[vb>>2];g[Eb>>2]=+g[vb>>2]-+g[Oa>>2];g[na>>2]=+g[F>>2]+ +g[ma>>2];g[I>>2]=+g[wa>>2]*1.8477590084075928-+g[H>>2]*.7653668522834778;g[J>>2]=+g[na>>2]+ +g[I>>2];g[yb>>2]=+g[na>>2]-+g[I>>2];g[Ab>>2]=+g[F>>2]-+g[ma>>2];g[Bb>>2]=+g[wa>>2]*.7653668522834778+ +g[H>>2]*1.8477590084075928;g[Cb>>2]=+g[Ab>>2]-+g[Bb>>2];g[Gb>>2]=+g[Ab>>2]+ +g[Bb>>2];g[xb>>2]=+g[La>>2]*1.9615705013275146-+g[wb>>2]*.39018064737319946;g[(c[n>>2]|0)+((c[r>>2]|0)*17<<2)>>2]=+g[J>>2]-+g[xb>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[J>>2]+ +g[xb>>2];g[Hb>>2]=+g[Db>>2]*1.662939190864563+ +g[Eb>>2]*1.111140489578247;g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Gb>>2]-+g[Hb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*29<<2)>>2]=+g[Gb>>2]+ +g[Hb>>2];g[zb>>2]=+g[La>>2]*.39018064737319946+ +g[wb>>2]*1.9615705013275146;g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[yb>>2]-+g[zb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*25<<2)>>2]=+g[yb>>2]+ +g[zb>>2];g[Fb>>2]=+g[Db>>2]*1.111140489578247-+g[Eb>>2]*1.662939190864563;g[(c[n>>2]|0)+((c[r>>2]|0)*21<<2)>>2]=+g[Cb>>2]-+g[Fb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Cb>>2]+ +g[Fb>>2];g[Sa>>2]=(+g[Na>>2]-+g[Ma>>2])*.7071067690849304;g[Ta>>2]=+g[Ra>>2]+ +g[Sa>>2];g[bb>>2]=+g[Ra>>2]-+g[Sa>>2];g[Ua>>2]=(+g[$>>2]-+g[Ja>>2])*.7071067690849304;g[Wa>>2]=+g[Ua>>2]+ +g[Va>>2];g[cb>>2]=+g[Va>>2]-+g[Ua>>2];g[Kb>>2]=+g[Ib>>2]-+g[Jb>>2];g[Nb>>2]=+g[Lb>>2]*.7653668522834778-+g[Mb>>2]*1.8477590084075928;g[Qa>>2]=+g[Kb>>2]+ +g[Nb>>2];g[Ya>>2]=+g[Kb>>2]-+g[Nb>>2];g[_a>>2]=+g[Ib>>2]+ +g[Jb>>2];g[$a>>2]=+g[Lb>>2]*1.8477590084075928+ +g[Mb>>2]*.7653668522834778;g[ab>>2]=+g[_a>>2]-+g[$a>>2];g[eb>>2]=+g[_a>>2]+ +g[$a>>2];g[Xa>>2]=+g[Ta>>2]*1.662939190864563-+g[Wa>>2]*1.111140489578247;g[(c[n>>2]|0)+((c[r>>2]|0)*19<<2)>>2]=+g[Qa>>2]-+g[Xa>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Qa>>2]+ +g[Xa>>2];g[fb>>2]=+g[bb>>2]*1.9615705013275146+ +g[cb>>2]*.39018064737319946;g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[eb>>2]-+g[fb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*31<<2)>>2]=+g[eb>>2]+ +g[fb>>2];g[Za>>2]=+g[Ta>>2]*1.111140489578247+ +g[Wa>>2]*1.662939190864563;g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Ya>>2]-+g[Za>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*27<<2)>>2]=+g[Ya>>2]+ +g[Za>>2];g[db>>2]=+g[bb>>2]*.39018064737319946-+g[cb>>2]*1.9615705013275146;g[(c[n>>2]|0)+((c[r>>2]|0)*23<<2)>>2]=+g[ab>>2]-+g[db>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ab>>2]+ +g[db>>2];g[cf>>2]=+g[_e>>2]+ +g[bf>>2];g[Ie>>2]=+g[_e>>2]-+g[bf>>2];g[ff>>2]=+g[df>>2]-+g[ef>>2];g[jf>>2]=+g[gf>>2]+ +g[hf>>2];g[kf>>2]=+g[ff>>2]*1.662939190864563-+g[jf>>2]*1.111140489578247;g[Je>>2]=+g[ff>>2]*1.111140489578247+ +g[jf>>2]*1.662939190864563;g[pe>>2]=+g[mf>>2]-+g[nf>>2];g[we>>2]=+g[se>>2]+ +g[ve>>2];g[xe>>2]=+g[pe>>2]+ +g[we>>2];g[Le>>2]=+g[pe>>2]-+g[we>>2];g[Ae>>2]=+g[ye>>2]-+g[ze>>2];g[De>>2]=+g[Be>>2]+ +g[Ce>>2];g[Ee>>2]=+g[Ae>>2]+ +g[De>>2];g[Me>>2]=+g[De>>2]-+g[Ae>>2];g[lf>>2]=+g[cf>>2]+ +g[kf>>2];g[Fe>>2]=+g[xe>>2]*1.913880705833435-+g[Ee>>2]*.580569326877594;g[(c[o>>2]|0)+((c[r>>2]|0)*17<<2)>>2]=+g[lf>>2]-+g[Fe>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[lf>>2]+ +g[Fe>>2];g[Oe>>2]=+g[Ie>>2]+ +g[Je>>2];g[pf>>2]=+g[Le>>2]*1.7638425827026367+ +g[Me>>2]*.9427934885025024;g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Oe>>2]-+g[pf>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*29<<2)>>2]=+g[Oe>>2]+ +g[pf>>2];g[Ge>>2]=+g[cf>>2]-+g[kf>>2];g[He>>2]=+g[xe>>2]*.580569326877594+ +g[Ee>>2]*1.913880705833435;g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Ge>>2]-+g[He>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*25<<2)>>2]=+g[Ge>>2]+ +g[He>>2];g[Ke>>2]=+g[Ie>>2]-+g[Je>>2];g[Ne>>2]=+g[Le>>2]*.9427934885025024-+g[Me>>2]*1.7638425827026367;g[(c[o>>2]|0)+((c[r>>2]|0)*21<<2)>>2]=+g[Ke>>2]-+g[Ne>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Ke>>2]+ +g[Ne>>2];g[kc>>2]=+g[cc>>2]+ +g[jc>>2];g[td>>2]=+g[cc>>2]-+g[jc>>2];g[vc>>2]=+g[nc>>2]+ +g[uc>>2];g[bd>>2]=+g[Zc>>2]+ +g[ad>>2];g[cd>>2]=+g[vc>>2]*1.9615705013275146-+g[bd>>2]*.39018064737319946;g[ud>>2]=+g[vc>>2]*.39018064737319946+ +g[bd>>2]*1.9615705013275146;g[Od>>2]=+g[Gd>>2]+ +g[Nd>>2];g[be>>2]=+g[Vd>>2]+ +g[ae>>2];g[ce>>2]=+g[Od>>2]+ +g[be>>2];g[wd>>2]=+g[Od>>2]-+g[be>>2];g[hd>>2]=+g[de>>2]+ +g[gd>>2];g[od>>2]=+g[kd>>2]+ +g[nd>>2];g[pd>>2]=+g[hd>>2]+ +g[od>>2];g[xd>>2]=+g[od>>2]-+g[hd>>2];g[dd>>2]=+g[kc>>2]+ +g[cd>>2];g[qd>>2]=+g[ce>>2]*1.990369439125061-+g[pd>>2]*.1960342824459076;g[(c[o>>2]|0)+(c[r>>2]<<4<<2)>>2]=+g[dd>>2]-+g[qd>>2];g[c[o>>2]>>2]=+g[dd>>2]+ +g[qd>>2];g[zd>>2]=+g[td>>2]+ +g[ud>>2];g[Ad>>2]=+g[wd>>2]*1.5460208654403687+ +g[xd>>2]*1.2687865495681763;g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[zd>>2]-+g[Ad>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*28<<2)>>2]=+g[zd>>2]+ +g[Ad>>2];g[rd>>2]=+g[kc>>2]-+g[cd>>2];g[sd>>2]=+g[ce>>2]*.1960342824459076+ +g[pd>>2]*1.990369439125061;g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[rd>>2]-+g[sd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*24<<2)>>2]=+g[rd>>2]+ +g[sd>>2];g[vd>>2]=+g[td>>2]-+g[ud>>2];g[yd>>2]=+g[wd>>2]*1.2687865495681763-+g[xd>>2]*1.5460208654403687;g[(c[o>>2]|0)+((c[r>>2]|0)*20<<2)>>2]=+g[vd>>2]-+g[yd>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[vd>>2]+ +g[yd>>2];g[sf>>2]=+g[qf>>2]-+g[rf>>2];g[Gf>>2]=+g[qf>>2]+ +g[rf>>2];g[tf>>2]=+g[df>>2]+ +g[ef>>2];g[uf>>2]=+g[hf>>2]-+g[gf>>2];g[vf>>2]=+g[tf>>2]*.39018064737319946-+g[uf>>2]*1.9615705013275146;g[Hf>>2]=+g[tf>>2]*1.9615705013275146+ +g[uf>>2]*.39018064737319946;g[xf>>2]=+g[mf>>2]+ +g[nf>>2];g[yf>>2]=+g[ye>>2]+ +g[ze>>2];g[zf>>2]=+g[xf>>2]-+g[yf>>2];g[Jf>>2]=+g[xf>>2]+ +g[yf>>2];g[Af>>2]=+g[se>>2]-+g[ve>>2];g[Bf>>2]=+g[Ce>>2]-+g[Be>>2];g[Cf>>2]=+g[Af>>2]+ +g[Bf>>2];g[Kf>>2]=+g[Bf>>2]-+g[Af>>2];g[wf>>2]=+g[sf>>2]+ +g[vf>>2];g[Df>>2]=+g[zf>>2]*1.5460208654403687-+g[Cf>>2]*1.2687865495681763;g[(c[o>>2]|0)+((c[r>>2]|0)*19<<2)>>2]=+g[wf>>2]-+g[Df>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[wf>>2]+ +g[Df>>2];g[Mf>>2]=+g[Gf>>2]+ +g[Hf>>2];g[Nf>>2]=+g[Jf>>2]*1.990369439125061+ +g[Kf>>2]*.1960342824459076;g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[Mf>>2]-+g[Nf>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*31<<2)>>2]=+g[Mf>>2]+ +g[Nf>>2];g[Ef>>2]=+g[sf>>2]-+g[vf>>2];g[Ff>>2]=+g[zf>>2]*1.2687865495681763+ +g[Cf>>2]*1.5460208654403687;g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Ef>>2]-+g[Ff>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*27<<2)>>2]=+g[Ef>>2]+ +g[Ff>>2];g[If>>2]=+g[Gf>>2]-+g[Hf>>2];g[Lf>>2]=+g[Jf>>2]*.1960342824459076-+g[Kf>>2]*1.990369439125061;g[(c[o>>2]|0)+((c[r>>2]|0)*23<<2)>>2]=+g[If>>2]-+g[Lf>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[If>>2]+ +g[Lf>>2];g[Dd>>2]=+g[Bd>>2]-+g[Cd>>2];g[Qe>>2]=+g[Bd>>2]+ +g[Cd>>2];g[Ed>>2]=+g[nc>>2]-+g[uc>>2];g[Fd>>2]=+g[ad>>2]-+g[Zc>>2];g[fe>>2]=+g[Ed>>2]*1.111140489578247-+g[Fd>>2]*1.662939190864563;g[Re>>2]=+g[Ed>>2]*1.662939190864563+ +g[Fd>>2]*1.111140489578247;g[he>>2]=+g[Gd>>2]-+g[Nd>>2];g[ie>>2]=+g[gd>>2]-+g[de>>2];g[je>>2]=+g[he>>2]+ +g[ie>>2];g[Te>>2]=+g[he>>2]-+g[ie>>2];g[ke>>2]=+g[Vd>>2]-+g[ae>>2];g[le>>2]=+g[nd>>2]-+g[kd>>2];g[me>>2]=+g[ke>>2]+ +g[le>>2];g[Ue>>2]=+g[le>>2]-+g[ke>>2];g[ge>>2]=+g[Dd>>2]+ +g[fe>>2];g[ne>>2]=+g[je>>2]*1.7638425827026367-+g[me>>2]*.9427934885025024;g[(c[o>>2]|0)+((c[r>>2]|0)*18<<2)>>2]=+g[ge>>2]-+g[ne>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ge>>2]+ +g[ne>>2];g[We>>2]=+g[Qe>>2]+ +g[Re>>2];g[Xe>>2]=+g[Te>>2]*1.913880705833435+ +g[Ue>>2]*.580569326877594;g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[We>>2]-+g[Xe>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*30<<2)>>2]=+g[We>>2]+ +g[Xe>>2];g[oe>>2]=+g[Dd>>2]-+g[fe>>2];g[Pe>>2]=+g[je>>2]*.9427934885025024+ +g[me>>2]*1.7638425827026367;g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[oe>>2]-+g[Pe>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*26<<2)>>2]=+g[oe>>2]+ +g[Pe>>2];g[Se>>2]=+g[Qe>>2]-+g[Re>>2];g[Ve>>2]=+g[Te>>2]*.580569326877594-+g[Ue>>2]*1.913880705833435;g[(c[o>>2]|0)+((c[r>>2]|0)*22<<2)>>2]=+g[Se>>2]-+g[Ve>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Se>>2]+ +g[Ve>>2];c[Pg>>2]=(c[Pg>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=Qg;return}function Bw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,69,11368);i=b;return}function Cw(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0;L=i;i=i+112|0;n=L+104|0;o=L+100|0;p=L+96|0;q=L+92|0;r=L+88|0;s=L+84|0;t=L+80|0;M=L+76|0;u=L+72|0;v=L+68|0;K=L+56|0;y=L+52|0;C=L+48|0;H=L+44|0;J=L+40|0;B=L+36|0;D=L+32|0;w=L+28|0;x=L+24|0;E=L+20|0;I=L+16|0;F=L+12|0;G=L+8|0;z=L+4|0;A=L;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[M>>2]=k;c[u>>2]=l;c[v>>2]=m;g[L+64>>2]=2.0;g[L+60>>2]=1.7320507764816284;c[K>>2]=c[M>>2];while(1){if((c[K>>2]|0)<=0)break;g[w>>2]=+g[c[p>>2]>>2];g[x>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[C>>2]=+g[w>>2]+ +g[x>>2];g[F>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[G>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[H>>2]=(+g[F>>2]-+g[G>>2])*1.7320507764816284;g[J>>2]=(+g[F>>2]+ +g[G>>2])*1.7320507764816284;g[z>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[A>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[D>>2]=+g[z>>2]+ +g[A>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[B>>2]*2.0+ +g[y>>2];g[c[n>>2]>>2]=+g[D>>2]*2.0+ +g[C>>2];g[E>>2]=+g[C>>2]-+g[D>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[E>>2]-+g[H>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[E>>2]+ +g[H>>2];g[I>>2]=+g[y>>2]-+g[B>>2];g[c[o>>2]>>2]=+g[I>>2]-+g[J>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[I>>2]+ +g[J>>2];c[K>>2]=(c[K>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=L;return}function Dw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,70,11416);i=b;return}function Ew(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0;K=i;i=i+128|0;n=K+120|0;o=K+116|0;p=K+112|0;q=K+108|0;r=K+104|0;s=K+100|0;t=K+96|0;L=K+92|0;u=K+88|0;v=K+84|0;J=K+52|0;E=K+48|0;I=K+44|0;G=K+40|0;w=K+36|0;z=K+32|0;x=K+28|0;y=K+24|0;A=K+20|0;H=K+16|0;F=K+12|0;B=K+8|0;D=K+4|0;C=K;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[L>>2]=k;c[u>>2]=l;c[v>>2]=m;g[K+80>>2]=2.0;g[K+76>>2]=1.8019376993179321;g[K+72>>2]=.44504186511039734;g[K+68>>2]=1.2469795942306519;g[K+64>>2]=.8677674531936646;g[K+60>>2]=1.9498558044433594;g[K+56>>2]=1.5636630058288574;c[J>>2]=c[L>>2];while(1){if((c[J>>2]|0)<=0)break;g[B>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[D>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[C>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[E>>2]=+g[B>>2]*1.5636630058288574-+g[C>>2]*1.9498558044433594-+g[D>>2]*.8677674531936646;g[I>>2]=+g[B>>2]*.8677674531936646+ +g[C>>2]*1.5636630058288574-+g[D>>2]*1.9498558044433594;g[G>>2]=+g[D>>2]*1.5636630058288574+ +g[B>>2]*1.9498558044433594+ +g[C>>2]*.8677674531936646;g[w>>2]=+g[c[p>>2]>>2];g[z>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[x>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[y>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[A>>2]=+g[y>>2]*1.2469795942306519+ +g[w>>2]+-(+g[z>>2]*.44504186511039734+ +g[x>>2]*1.8019376993179321);g[H>>2]=+g[z>>2]*1.2469795942306519+ +g[w>>2]+-(+g[y>>2]*1.8019376993179321+ +g[x>>2]*.44504186511039734);g[F>>2]=+g[x>>2]*1.2469795942306519+ +g[w>>2]+-(+g[z>>2]*1.8019376993179321+ +g[y>>2]*.44504186511039734);g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[A>>2]-+g[E>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[A>>2]+ +g[E>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[H>>2]+ +g[I>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[H>>2]-+g[I>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[F>>2]+ +g[G>>2];g[c[o>>2]>>2]=+g[F>>2]-+g[G>>2];g[c[n>>2]>>2]=(+g[x>>2]+ +g[y>>2]+ +g[z>>2])*2.0+ +g[w>>2];c[J>>2]=(c[J>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=K;return}function Fw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,71,11464);i=b;return}function Gw(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0;T=i;i=i+144|0;n=T+136|0;o=T+132|0;p=T+128|0;q=T+124|0;r=T+120|0;s=T+116|0;t=T+112|0;U=T+108|0;u=T+104|0;v=T+100|0;S=T+88|0;A=T+84|0;L=T+80|0;y=T+76|0;J=T+72|0;E=T+68|0;N=T+64|0;I=T+60|0;O=T+56|0;B=T+52|0;F=T+48|0;z=T+44|0;K=T+40|0;w=T+36|0;x=T+32|0;C=T+28|0;D=T+24|0;G=T+20|0;H=T+16|0;M=T+12|0;P=T+8|0;Q=T+4|0;R=T;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[U>>2]=k;c[u>>2]=l;c[v>>2]=m;g[T+96>>2]=1.4142135381698608;g[T+92>>2]=2.0;c[S>>2]=c[U>>2];while(1){if((c[S>>2]|0)<=0)break;g[z>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[A>>2]=+g[z>>2]*2.0;g[K>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[L>>2]=+g[K>>2]*2.0;g[w>>2]=+g[c[p>>2]>>2];g[x>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[y>>2]=+g[w>>2]+ +g[x>>2];g[J>>2]=+g[w>>2]-+g[x>>2];g[C>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[D>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[E>>2]=(+g[C>>2]+ +g[D>>2])*2.0;g[N>>2]=+g[C>>2]-+g[D>>2];g[G>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[H>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[I>>2]=(+g[G>>2]-+g[H>>2])*2.0;g[O>>2]=+g[G>>2]+ +g[H>>2];g[B>>2]=+g[y>>2]+ +g[A>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[B>>2]-+g[E>>2];g[c[n>>2]>>2]=+g[B>>2]+ +g[E>>2];g[F>>2]=+g[y>>2]-+g[A>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[F>>2]-+g[I>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[F>>2]+ +g[I>>2];g[M>>2]=+g[J>>2]-+g[L>>2];g[P>>2]=(+g[N>>2]-+g[O>>2])*1.4142135381698608;g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[M>>2]-+g[P>>2];g[c[o>>2]>>2]=+g[M>>2]+ +g[P>>2];g[Q>>2]=+g[J>>2]+ +g[L>>2];g[R>>2]=(+g[N>>2]+ +g[O>>2])*1.4142135381698608;g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Q>>2]-+g[R>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Q>>2]+ +g[R>>2];c[S>>2]=(c[S>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=T;return}function Hw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,72,11512);i=b;return}function Iw(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0;ca=i;i=i+224|0;n=ca+220|0;o=ca+216|0;p=ca+212|0;q=ca+208|0;r=ca+204|0;s=ca+200|0;t=ca+196|0;da=ca+192|0;u=ca+188|0;v=ca+184|0;ba=ca+132|0;y=ca+128|0;V=ca+124|0;H=ca+120|0;P=ca+116|0;O=ca+112|0;D=ca+108|0;R=ca+104|0;X=ca+100|0;M=ca+96|0;W=ca+92|0;$=ca+88|0;aa=ca+84|0;G=ca+80|0;w=ca+76|0;x=ca+72|0;E=ca+68|0;F=ca+64|0;z=ca+60|0;C=ca+56|0;N=ca+52|0;L=ca+48|0;Q=ca+44|0;I=ca+40|0;A=ca+36|0;B=ca+32|0;J=ca+28|0;K=ca+24|0;U=ca+20|0;S=ca+16|0;T=ca+12|0;_=ca+8|0;Y=ca+4|0;Z=ca;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[da>>2]=k;c[u>>2]=l;c[v>>2]=m;g[ca+180>>2]=.9848077297210693;g[ca+176>>2]=.1736481785774231;g[ca+172>>2]=.3007674515247345;g[ca+168>>2]=1.7057371139526367;g[ca+164>>2]=.6427876353263855;g[ca+160>>2]=.7660444378852844;g[ca+156>>2]=1.326827883720398;g[ca+152>>2]=1.1133408546447754;g[ca+148>>2]=.5;g[ca+144>>2]=.8660253882408142;g[ca+140>>2]=2.0;g[ca+136>>2]=1.7320507764816284;c[ba>>2]=c[da>>2];while(1){if((c[ba>>2]|0)<=0)break;g[F>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[G>>2]=+g[F>>2]*1.7320507764816284;g[w>>2]=+g[c[p>>2]>>2];g[x>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[E>>2]=+g[w>>2]-+g[x>>2];g[y>>2]=+g[x>>2]*2.0+ +g[w>>2];g[V>>2]=+g[E>>2]+ +g[G>>2];g[H>>2]=+g[E>>2]-+g[G>>2];g[z>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[P>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[A>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[B>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[C>>2]=+g[A>>2]+ +g[B>>2];g[N>>2]=(+g[A>>2]-+g[B>>2])*.8660253882408142;g[J>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[K>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[L>>2]=(+g[J>>2]+ +g[K>>2])*.8660253882408142;g[O>>2]=+g[K>>2]-+g[J>>2];g[D>>2]=+g[z>>2]+ +g[C>>2];g[Q>>2]=+g[O>>2]*.5+ +g[P>>2];g[R>>2]=+g[N>>2]+ +g[Q>>2];g[X>>2]=+g[Q>>2]-+g[N>>2];g[I>>2]=+g[z>>2]-+g[C>>2]*.5;g[M>>2]=+g[I>>2]-+g[L>>2];g[W>>2]=+g[I>>2]+ +g[L>>2];g[c[n>>2]>>2]=+g[D>>2]*2.0+ +g[y>>2];g[$>>2]=+g[y>>2]-+g[D>>2];g[aa>>2]=(+g[P>>2]-+g[O>>2])*1.7320507764816284;g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[$>>2]-+g[aa>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[$>>2]+ +g[aa>>2];g[U>>2]=+g[M>>2]*1.1133408546447754+ +g[R>>2]*1.326827883720398;g[S>>2]=+g[M>>2]*.7660444378852844-+g[R>>2]*.6427876353263855;g[T>>2]=+g[H>>2]-+g[S>>2];g[c[o>>2]>>2]=+g[S>>2]*2.0+ +g[H>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[T>>2]+ +g[U>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[T>>2]-+g[U>>2];g[_>>2]=+g[W>>2]*1.7057371139526367+ +g[X>>2]*.3007674515247345;g[Y>>2]=+g[W>>2]*.1736481785774231-+g[X>>2]*.9848077297210693;g[Z>>2]=+g[V>>2]-+g[Y>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Y>>2]*2.0+ +g[V>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Z>>2]+ +g[_>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Z>>2]-+g[_>>2];c[ba>>2]=(c[ba>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=ca;return}function Jw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;dn(c[d>>2]|0,72,11560);i=b;return}function Kw(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0;S=i;i=i+176|0;k=S+168|0;l=S+164|0;m=S+160|0;n=S+156|0;T=S+152|0;o=S+148|0;p=S+144|0;R=S+108|0;w=S+104|0;K=S+100|0;t=S+96|0;J=S+92|0;C=S+88|0;N=S+84|0;F=S+80|0;M=S+76|0;u=S+72|0;v=S+68|0;q=S+64|0;s=S+60|0;r=S+56|0;y=S+52|0;E=S+48|0;B=S+44|0;D=S+40|0;z=S+36|0;A=S+32|0;x=S+28|0;G=S+24|0;P=S+20|0;Q=S+16|0;H=S+12|0;I=S+8|0;L=S+4|0;O=S;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[T>>2]=f;c[o>>2]=h;c[p>>2]=j;g[S+140>>2]=1.662939190864563;g[S+136>>2]=1.111140489578247;g[S+132>>2]=.39018064737319946;g[S+128>>2]=1.9615705013275146;g[S+124>>2]=.7071067690849304;g[S+120>>2]=1.4142135381698608;g[S+116>>2]=.7653668522834778;g[S+112>>2]=1.8477590084075928;c[R>>2]=c[T>>2];while(1){if((c[R>>2]|0)<=0)break;g[u>>2]=+g[(c[k>>2]|0)+(c[m>>2]<<1<<2)>>2];g[v>>2]=+g[(c[k>>2]|0)+((c[m>>2]|0)*6<<2)>>2];g[w>>2]=+g[u>>2]*1.8477590084075928+ +g[v>>2]*.7653668522834778;g[K>>2]=+g[u>>2]*.7653668522834778-+g[v>>2]*1.8477590084075928;g[q>>2]=+g[c[k>>2]>>2];g[r>>2]=+g[(c[k>>2]|0)+(c[m>>2]<<2<<2)>>2];g[s>>2]=+g[r>>2]*1.4142135381698608;g[t>>2]=+g[q>>2]+ +g[s>>2];g[J>>2]=+g[q>>2]-+g[s>>2];g[y>>2]=+g[(c[k>>2]|0)+(c[m>>2]<<2)>>2];g[E>>2]=+g[(c[k>>2]|0)+((c[m>>2]|0)*7<<2)>>2];g[z>>2]=+g[(c[k>>2]|0)+((c[m>>2]|0)*5<<2)>>2];g[A>>2]=+g[(c[k>>2]|0)+((c[m>>2]|0)*3<<2)>>2];g[B>>2]=(+g[z>>2]+ +g[A>>2])*.7071067690849304;g[D>>2]=(+g[z>>2]-+g[A>>2])*.7071067690849304;g[C>>2]=+g[y>>2]+ +g[B>>2];g[N>>2]=+g[D>>2]+ +g[E>>2];g[F>>2]=+g[D>>2]-+g[E>>2];g[M>>2]=+g[y>>2]-+g[B>>2];g[x>>2]=+g[t>>2]+ +g[w>>2];g[G>>2]=+g[C>>2]*1.9615705013275146-+g[F>>2]*.39018064737319946;g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[x>>2]-+g[G>>2];g[c[l>>2]>>2]=+g[x>>2]+ +g[G>>2];g[P>>2]=+g[J>>2]-+g[K>>2];g[Q>>2]=+g[M>>2]*1.111140489578247+ +g[N>>2]*1.662939190864563;g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[P>>2]-+g[Q>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[P>>2]+ +g[Q>>2];g[H>>2]=+g[t>>2]-+g[w>>2];g[I>>2]=+g[C>>2]*.39018064737319946+ +g[F>>2]*1.9615705013275146;g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[H>>2]-+g[I>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[H>>2]+ +g[I>>2];g[L>>2]=+g[J>>2]+ +g[K>>2];g[O>>2]=+g[M>>2]*1.662939190864563-+g[N>>2]*1.111140489578247;g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[L>>2]-+g[O>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[L>>2]+ +g[O>>2];c[R>>2]=(c[R>>2]|0)-1;c[k>>2]=(c[k>>2]|0)+(c[o>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2)}i=S;return}function Lw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;dn(c[d>>2]|0,73,11608);i=b;return}function Mw(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0;R=i;i=i+176|0;k=R+168|0;l=R+164|0;m=R+160|0;n=R+156|0;S=R+152|0;o=R+148|0;p=R+144|0;Q=R+104|0;s=R+100|0;I=R+96|0;E=R+92|0;J=R+88|0;z=R+84|0;M=R+80|0;B=R+76|0;L=R+72|0;q=R+68|0;r=R+64|0;C=R+60|0;D=R+56|0;t=R+52|0;u=R+48|0;v=R+44|0;w=R+40|0;x=R+36|0;y=R+32|0;A=R+28|0;F=R+24|0;O=R+20|0;P=R+16|0;G=R+12|0;H=R+8|0;K=R+4|0;N=R;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[S>>2]=f;c[o>>2]=h;c[p>>2]=j;g[R+140>>2]=.7653668522834778;g[R+136>>2]=1.8477590084075928;g[R+132>>2]=.39018064737319946;g[R+128>>2]=1.9615705013275146;g[R+124>>2]=2.0;g[R+120>>2]=1.4142135381698608;g[R+116>>2]=1.111140489578247;g[R+112>>2]=1.662939190864563;g[R+108>>2]=.7071067690849304;c[Q>>2]=c[S>>2];while(1){if((c[Q>>2]|0)<=0)break;g[q>>2]=+g[c[k>>2]>>2];g[r>>2]=+g[(c[k>>2]|0)+((c[m>>2]|0)*7<<2)>>2];g[s>>2]=+g[q>>2]-+g[r>>2];g[I>>2]=+g[q>>2]+ +g[r>>2];g[C>>2]=+g[(c[k>>2]|0)+(c[m>>2]<<2<<2)>>2];g[D>>2]=+g[(c[k>>2]|0)+((c[m>>2]|0)*3<<2)>>2];g[E>>2]=+g[C>>2]-+g[D>>2];g[J>>2]=+g[C>>2]+ +g[D>>2];g[t>>2]=+g[(c[k>>2]|0)+(c[m>>2]<<1<<2)>>2];g[u>>2]=+g[(c[k>>2]|0)+((c[m>>2]|0)*5<<2)>>2];g[v>>2]=+g[t>>2]-+g[u>>2];g[w>>2]=+g[(c[k>>2]|0)+(c[m>>2]<<2)>>2];g[x>>2]=+g[(c[k>>2]|0)+((c[m>>2]|0)*6<<2)>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[z>>2]=(+g[v>>2]+ +g[y>>2])*.7071067690849304;g[M>>2]=+g[w>>2]+ +g[x>>2];g[B>>2]=(+g[v>>2]-+g[y>>2])*.7071067690849304;g[L>>2]=+g[t>>2]+ +g[u>>2];g[A>>2]=+g[s>>2]-+g[z>>2];g[F>>2]=+g[B>>2]-+g[E>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[A>>2]*1.662939190864563-+g[F>>2]*1.111140489578247;g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[F>>2]*1.662939190864563+ +g[A>>2]*1.111140489578247;g[O>>2]=+g[I>>2]+ +g[J>>2];g[P>>2]=+g[L>>2]+ +g[M>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=(+g[O>>2]-+g[P>>2])*1.4142135381698608;g[c[l>>2]>>2]=(+g[O>>2]+ +g[P>>2])*2.0;g[G>>2]=+g[s>>2]+ +g[z>>2];g[H>>2]=+g[E>>2]+ +g[B>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[G>>2]*1.9615705013275146-+g[H>>2]*.39018064737319946;g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[H>>2]*1.9615705013275146+ +g[G>>2]*.39018064737319946;g[K>>2]=+g[I>>2]-+g[J>>2];g[N>>2]=+g[L>>2]-+g[M>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[K>>2]*1.8477590084075928-+g[N>>2]*.7653668522834778;g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[K>>2]*.7653668522834778+ +g[N>>2]*1.8477590084075928;c[Q>>2]=(c[Q>>2]|0)-1;c[k>>2]=(c[k>>2]|0)+(c[o>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2)}i=R;return}function Nw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;Cd(16140,c[d>>2]|0);i=b;return}function Ow(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,Pw()|0);i=b;return}function Pw(){var a=0,b=0;b=i;i=i+16|0;a=b;c[a>>2]=zd(8,16196)|0;i=b;return c[a>>2]|0}function Qw(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0;t=i;i=i+96|0;e=t+80|0;u=t+76|0;f=t+72|0;g=t+68|0;r=t+64|0;q=t+60|0;k=t+56|0;l=t+52|0;j=t+48|0;n=t+44|0;s=t+40|0;m=t+36|0;p=t+32|0;o=t;c[u>>2]=a;c[f>>2]=b;c[g>>2]=d;c[k>>2]=0;c[j>>2]=0;if(((Rw(c[u>>2]|0,c[f>>2]|0,c[g>>2]|0)|0)!=0?(c[q>>2]=c[f>>2],c[n>>2]=(c[(c[(c[q>>2]|0)+4>>2]|0)+4>>2]|0)-1,c[j>>2]=wb(c[n>>2]<<1<<2)|0,b=c[g>>2]|0,f=Ed(c[n>>2]<<1,1,1)|0,a=Dd()|0,c[k>>2]=uc(b,In(f,a,c[j>>2]|0,c[j>>2]|0,0)|0)|0,(c[k>>2]|0)!=0):0)?(ke(c[(c[q>>2]|0)+8>>2]|0,s,m,p)|0,g=c[g>>2]|0,f=Dd()|0,a=Ed((c[n>>2]|0)+1|0,1,c[(c[(c[q>>2]|0)+4>>2]|0)+4+8>>2]|0)|0,c[l>>2]=uc(g,In(f,a,c[j>>2]|0,c[(c[q>>2]|0)+16>>2]|0,0)|0)|0,(c[l>>2]|0)!=0):0){xb(c[j>>2]|0);c[r>>2]=sn(96,16208,49)|0;c[(c[r>>2]|0)+76>>2]=c[n>>2];c[(c[r>>2]|0)+72>>2]=c[(c[(c[q>>2]|0)+4>>2]|0)+4+4>>2];c[(c[r>>2]|0)+64>>2]=c[k>>2];c[(c[r>>2]|0)+68>>2]=c[l>>2];c[(c[r>>2]|0)+80>>2]=c[s>>2];c[(c[r>>2]|0)+84>>2]=c[m>>2];c[(c[r>>2]|0)+88>>2]=c[p>>2];fc(o);h[o+24>>3]=+((c[n>>2]|0)+(c[n>>2]<<1)|0);fc((c[r>>2]|0)+8|0);lc(c[(c[r>>2]|0)+80>>2]|0,o,(c[r>>2]|0)+8|0);lc(c[(c[r>>2]|0)+80>>2]|0,(c[k>>2]|0)+8|0,(c[r>>2]|0)+8|0);lc(c[(c[r>>2]|0)+80>>2]|0,(c[l>>2]|0)+8|0,(c[r>>2]|0)+8|0);c[e>>2]=c[r>>2];o=c[e>>2]|0;i=t;return o|0}yb(c[j>>2]|0);if(c[k>>2]|0)pc(c[k>>2]|0);c[e>>2]=0;o=c[e>>2]|0;i=t;return o|0}function Rw(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;g=i;i=i+16|0;e=g+8|0;f=g+4|0;h=g;c[e>>2]=a;c[f>>2]=b;c[h>>2]=d;if(c[(c[h>>2]|0)+164>>2]&8){b=0;b=b&1;i=g;return b|0}b=(Ww(c[e>>2]|0,c[f>>2]|0)|0)!=0;b=b&1;i=g;return b|0}function Sw(a,b,d){a=a|0;b=b|0;d=d|0;var e=0.0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0;v=i;i=i+64|0;w=v+56|0;f=v+52|0;h=v+48|0;n=v+44|0;p=v+40|0;o=v+36|0;s=v+32|0;q=v+28|0;u=v+24|0;r=v+20|0;t=v+16|0;k=v+12|0;j=v+8|0;l=v+4|0;m=v;c[w>>2]=a;c[f>>2]=b;c[h>>2]=d;c[n>>2]=c[w>>2];c[p>>2]=c[(c[n>>2]|0)+72>>2];c[s>>2]=c[(c[n>>2]|0)+76>>2];c[u>>2]=c[(c[n>>2]|0)+80>>2];c[r>>2]=c[(c[n>>2]|0)+84>>2];c[t>>2]=c[(c[n>>2]|0)+88>>2];c[k>>2]=wb(c[s>>2]<<1<<2)|0;c[q>>2]=0;while(1){if((c[q>>2]|0)>=(c[u>>2]|0))break;g[c[k>>2]>>2]=+g[c[f>>2]>>2];c[o>>2]=1;while(1){b=_(c[o>>2]|0,c[p>>2]|0)|0;e=+g[(c[f>>2]|0)+(b<<2)>>2];if((c[o>>2]|0)>=(c[s>>2]|0))break;g[j>>2]=e;g[(c[k>>2]|0)+(c[o>>2]<<2)>>2]=+g[j>>2];g[(c[k>>2]|0)+((c[s>>2]<<1)-(c[o>>2]|0)<<2)>>2]=+g[j>>2];c[o>>2]=(c[o>>2]|0)+1}g[(c[k>>2]|0)+(c[o>>2]<<2)>>2]=e;c[l>>2]=c[(c[n>>2]|0)+64>>2];eb[c[(c[l>>2]|0)+56>>2]&63](c[l>>2]|0,c[k>>2]|0,c[k>>2]|0);c[m>>2]=c[(c[n>>2]|0)+68>>2];eb[c[(c[m>>2]|0)+56>>2]&63](c[m>>2]|0,c[k>>2]|0,c[h>>2]|0);c[q>>2]=(c[q>>2]|0)+1;c[f>>2]=(c[f>>2]|0)+(c[r>>2]<<2);c[h>>2]=(c[h>>2]|0)+(c[t>>2]<<2)}xb(c[k>>2]|0);i=v;return}function Tw(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+68>>2]|0,c[e>>2]|0);i=d;return}function Uw(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;g=d+24|0;h=d+20|0;j=d+16|0;c[g>>2]=a;c[h>>2]=b;c[j>>2]=c[g>>2];b=c[c[h>>2]>>2]|0;a=c[h>>2]|0;h=c[(c[j>>2]|0)+80>>2]|0;g=c[(c[j>>2]|0)+64>>2]|0;f=c[(c[j>>2]|0)+68>>2]|0;c[e>>2]=(c[(c[j>>2]|0)+76>>2]|0)+1;c[e+4>>2]=h;c[e+8>>2]=g;c[e+12>>2]=f;eb[b&63](a,29418,e);i=d;return}function Vw(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function Ww(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;f=e+4|0;d=e;c[e+8>>2]=a;c[f>>2]=b;c[d>>2]=c[f>>2];if(((c[c[(c[d>>2]|0)+4>>2]>>2]|0)==1?(c[c[(c[d>>2]|0)+8>>2]>>2]|0)<=1:0)?(c[(c[d>>2]|0)+20>>2]|0)==9:0)a=(c[(c[(c[d>>2]|0)+4>>2]|0)+4>>2]|0)>1;else a=0;i=e;return a&1|0}function Xw(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,Yw()|0);i=b;return}function Yw(){var a=0,b=0;b=i;i=i+16|0;a=b;c[a>>2]=zd(8,16224)|0;i=b;return c[a>>2]|0}function Zw(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0;s=i;i=i+80|0;f=s+76|0;t=s+72|0;e=s+68|0;g=s+64|0;r=s+60|0;q=s+56|0;k=s+52|0;l=s+48|0;j=s+44|0;n=s+40|0;o=s+36|0;p=s;m=s+32|0;c[t>>2]=a;c[e>>2]=b;c[g>>2]=d;if(!(_w(c[t>>2]|0,c[e>>2]|0,c[g>>2]|0)|0)){c[f>>2]=0;m=c[f>>2]|0;i=s;return m|0}c[q>>2]=c[e>>2];a=c[(c[(c[q>>2]|0)+4>>2]|0)+4>>2]|0;c[o>>2]=a;c[n>>2]=a+((c[(c[q>>2]|0)+20>>2]|0)==9?-1:1);c[j>>2]=wb(((c[n>>2]|0)/2|0)<<2)|0;if((c[(c[q>>2]|0)+20>>2]|0)==13)a=(c[(c[q>>2]|0)+12>>2]|0)==(c[(c[q>>2]|0)+16>>2]|0);else a=0;c[m>>2]=a&1;e=c[g>>2]|0;a=(c[(c[q>>2]|0)+4>>2]|0)+4|0;if(c[m>>2]|0)a=c[a+4>>2]|0;else a=c[a+8>>2]|0;d=Ed((c[o>>2]|0)-((c[n>>2]|0)/2|0)|0,c[(c[(c[q>>2]|0)+4>>2]|0)+4+4>>2]<<1,a)|0;b=Dd()|0;a=(c[(c[q>>2]|0)+12>>2]|0)+((_(c[(c[(c[q>>2]|0)+4>>2]|0)+4+4>>2]|0,(c[(c[q>>2]|0)+20>>2]|0)==13&1)|0)<<2)|0;m=(c[(c[q>>2]|0)+16>>2]|0)+((_(c[(c[(c[q>>2]|0)+4>>2]|0)+4+4>>2]|0,c[m>>2]|0)|0)<<2)|0;c[k>>2]=uc(e,In(d,b,a,m,c[(c[q>>2]|0)+20>>2]|0)|0)|0;if(!(c[k>>2]|0)){xb(c[j>>2]|0);c[f>>2]=0;m=c[f>>2]|0;i=s;return m|0}a=c[g>>2]|0;g=Ed((c[n>>2]|0)/2|0,1,1)|0;m=Dd()|0;c[l>>2]=uc(a,In(g,m,c[j>>2]|0,c[j>>2]|0,0)|0)|0;xb(c[j>>2]|0);if(c[l>>2]|0){c[r>>2]=sn(104,16236,(c[(c[q>>2]|0)+20>>2]|0)==9?51:50)|0;c[(c[r>>2]|0)+84>>2]=c[n>>2];c[(c[r>>2]|0)+76>>2]=c[(c[(c[q>>2]|0)+4>>2]|0)+4+4>>2];c[(c[r>>2]|0)+80>>2]=c[(c[(c[q>>2]|0)+4>>2]|0)+4+8>>2];c[(c[r>>2]|0)+64>>2]=c[k>>2];c[(c[r>>2]|0)+68>>2]=c[l>>2];c[(c[r>>2]|0)+72>>2]=0;ke(c[(c[q>>2]|0)+8>>2]|0,(c[r>>2]|0)+88|0,(c[r>>2]|0)+92|0,(c[r>>2]|0)+96|0)|0;fc(p);h[p+24>>3]=+((c[n>>2]|0)/2|0|0);h[p>>3]=+(((c[(c[q>>2]|0)+20>>2]|0)==9?2:0)+(((((c[n>>2]|0)/2|0)-1|0)/2|0)*6|0)+(((((c[n>>2]|0)/2|0|0)%2|0|0)==0&1)<<1)|0);h[p+8>>3]=+(1+(((((c[n>>2]|0)/2|0)-1|0)/2|0)*6|0)+(((((c[n>>2]|0)/2|0|0)%2|0|0)==0&1)<<1)|0);m=p+24|0;h[m>>3]=+h[m>>3]+256.0;fc((c[r>>2]|0)+8|0);lc(c[(c[r>>2]|0)+88>>2]|0,p,(c[r>>2]|0)+8|0);lc(c[(c[r>>2]|0)+88>>2]|0,(c[k>>2]|0)+8|0,(c[r>>2]|0)+8|0);lc(c[(c[r>>2]|0)+88>>2]|0,(c[l>>2]|0)+8|0,(c[r>>2]|0)+8|0);c[f>>2]=c[r>>2];m=c[f>>2]|0;i=s;return m|0}else{c[f>>2]=0;m=c[f>>2]|0;i=s;return m|0}return 0}function _w(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;g=i;i=i+16|0;e=g+8|0;f=g+4|0;h=g;c[e>>2]=a;c[f>>2]=b;c[h>>2]=d;if(c[(c[h>>2]|0)+164>>2]&8){b=0;b=b&1;i=g;return b|0}b=(ex(c[e>>2]|0,c[f>>2]|0)|0)!=0;b=b&1;i=g;return b|0}function $w(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0.0;J=i;i=i+128|0;K=J+116|0;e=J+112|0;f=J+108|0;t=J+104|0;v=J+100|0;B=J+96|0;u=J+92|0;y=J+88|0;z=J+84|0;A=J+80|0;w=J+76|0;D=J+72|0;x=J+68|0;C=J+64|0;h=J+60|0;q=J+56|0;r=J+52|0;s=J+48|0;n=J+44|0;m=J+40|0;k=J+36|0;j=J+32|0;p=J+28|0;o=J+24|0;I=J+20|0;H=J+16|0;F=J+12|0;E=J+8|0;l=J+4|0;G=J;c[K>>2]=a;c[e>>2]=b;c[f>>2]=d;c[t>>2]=c[K>>2];c[v>>2]=c[(c[t>>2]|0)+76>>2];c[B>>2]=c[(c[t>>2]|0)+80>>2];c[z>>2]=(c[(c[t>>2]|0)+84>>2]|0)+1;c[A>>2]=((c[z>>2]|0)-1|0)/2|0;c[D>>2]=c[(c[t>>2]|0)+88>>2];c[x>>2]=c[(c[t>>2]|0)+92>>2];c[C>>2]=c[(c[t>>2]|0)+96>>2];c[h>>2]=(c[c[(c[t>>2]|0)+72>>2]>>2]|0)+-8;c[q>>2]=wb(c[A>>2]<<2)|0;c[w>>2]=0;while(1){if((c[w>>2]|0)>=(c[D>>2]|0))break;c[y>>2]=0;c[u>>2]=1;while(1){if((c[u>>2]|0)>=(c[z>>2]|0))break;a=_(c[v>>2]|0,c[u>>2]|0)|0;L=+g[(c[e>>2]|0)+(a<<2)>>2];a=c[y>>2]|0;c[y>>2]=a+1;g[(c[q>>2]|0)+(a<<2)>>2]=L;c[u>>2]=(c[u>>2]|0)+4}c[u>>2]=(c[z>>2]<<1)-2-(c[u>>2]|0);while(1){if((c[u>>2]|0)<=0)break;a=_(c[v>>2]|0,c[u>>2]|0)|0;L=+g[(c[e>>2]|0)+(a<<2)>>2];a=c[y>>2]|0;c[y>>2]=a+1;g[(c[q>>2]|0)+(a<<2)>>2]=L;c[u>>2]=(c[u>>2]|0)-4}c[r>>2]=c[(c[t>>2]|0)+68>>2];eb[c[(c[r>>2]|0)+56>>2]&63](c[r>>2]|0,c[q>>2]|0,c[q>>2]|0);c[s>>2]=c[(c[t>>2]|0)+64>>2];eb[c[(c[s>>2]|0)+56>>2]&63](c[s>>2]|0,c[e>>2]|0,c[f>>2]|0);g[n>>2]=+g[c[f>>2]>>2];g[m>>2]=+g[c[q>>2]>>2]*2.0;g[c[f>>2]>>2]=+g[n>>2]+ +g[m>>2];a=(_(c[A>>2]|0,c[B>>2]|0)|0)<<1;g[(c[f>>2]|0)+(a<<2)>>2]=+g[n>>2]-+g[m>>2];c[u>>2]=1;while(1){b=c[u>>2]|0;if((c[u>>2]|0)>=((c[A>>2]|0)-(c[u>>2]|0)|0))break;g[p>>2]=+g[(c[q>>2]|0)+(b<<2)>>2];g[o>>2]=+g[(c[q>>2]|0)+((c[A>>2]|0)-(c[u>>2]|0)<<2)>>2];g[I>>2]=+g[(c[h>>2]|0)+(c[u>>2]<<1<<2)>>2];g[H>>2]=+g[(c[h>>2]|0)+((c[u>>2]<<1)+1<<2)>>2];g[F>>2]=(+g[I>>2]*+g[p>>2]+ +g[H>>2]*+g[o>>2])*2.0;g[E>>2]=(+g[I>>2]*+g[o>>2]-+g[H>>2]*+g[p>>2])*2.0;a=_(c[u>>2]|0,c[B>>2]|0)|0;g[k>>2]=+g[(c[f>>2]|0)+(a<<2)>>2];a=_(c[u>>2]|0,c[B>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[k>>2]+ +g[F>>2];a=_((c[A>>2]<<1)-(c[u>>2]|0)|0,c[B>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[k>>2]-+g[F>>2];a=_((c[A>>2]|0)-(c[u>>2]|0)|0,c[B>>2]|0)|0;g[j>>2]=+g[(c[f>>2]|0)+(a<<2)>>2];a=_((c[A>>2]|0)-(c[u>>2]|0)|0,c[B>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[j>>2]-+g[E>>2];a=_((c[A>>2]|0)+(c[u>>2]|0)|0,c[B>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[j>>2]+ +g[E>>2];c[u>>2]=(c[u>>2]|0)+1}if((b|0)==((c[A>>2]|0)-(c[u>>2]|0)|0)){g[G>>2]=+g[(c[h>>2]|0)+(c[u>>2]<<1<<2)>>2]*+g[(c[q>>2]|0)+(c[u>>2]<<2)>>2]*2.0;a=_(c[u>>2]|0,c[B>>2]|0)|0;g[l>>2]=+g[(c[f>>2]|0)+(a<<2)>>2];a=_(c[u>>2]|0,c[B>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[l>>2]+ +g[G>>2];a=_((c[A>>2]<<1)-(c[u>>2]|0)|0,c[B>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[l>>2]-+g[G>>2]}c[w>>2]=(c[w>>2]|0)+1;c[e>>2]=(c[e>>2]|0)+(c[x>>2]<<2);c[f>>2]=(c[f>>2]|0)+(c[C>>2]<<2)}xb(c[q>>2]|0);i=J;return}function ax(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0.0;H=i;i=i+112|0;I=H+108|0;e=H+104|0;f=H+100|0;r=H+96|0;t=H+92|0;z=H+88|0;s=H+84|0;w=H+80|0;x=H+76|0;y=H+72|0;u=H+68|0;B=H+64|0;v=H+60|0;A=H+56|0;h=H+52|0;o=H+48|0;p=H+44|0;q=H+40|0;k=H+36|0;j=H+32|0;n=H+28|0;m=H+24|0;G=H+20|0;F=H+16|0;E=H+12|0;C=H+8|0;l=H+4|0;D=H;c[I>>2]=a;c[e>>2]=b;c[f>>2]=d;c[r>>2]=c[I>>2];c[t>>2]=c[(c[r>>2]|0)+76>>2];c[z>>2]=c[(c[r>>2]|0)+80>>2];c[x>>2]=(c[(c[r>>2]|0)+84>>2]|0)-1;c[y>>2]=((c[x>>2]|0)+1|0)/2|0;c[B>>2]=c[(c[r>>2]|0)+88>>2];c[v>>2]=c[(c[r>>2]|0)+92>>2];c[A>>2]=c[(c[r>>2]|0)+96>>2];c[h>>2]=(c[c[(c[r>>2]|0)+72>>2]>>2]|0)+-8;c[o>>2]=wb(c[y>>2]<<2)|0;c[u>>2]=0;while(1){if((c[u>>2]|0)>=(c[B>>2]|0))break;c[w>>2]=0;c[s>>2]=0;while(1){if((c[s>>2]|0)>=(c[x>>2]|0))break;b=_(c[t>>2]|0,c[s>>2]|0)|0;J=+g[(c[e>>2]|0)+(b<<2)>>2];b=c[w>>2]|0;c[w>>2]=b+1;g[(c[o>>2]|0)+(b<<2)>>2]=J;c[s>>2]=(c[s>>2]|0)+4}c[s>>2]=(c[x>>2]<<1)-(c[s>>2]|0);while(1){if((c[s>>2]|0)<=0)break;b=_(c[t>>2]|0,c[s>>2]|0)|0;J=-+g[(c[e>>2]|0)+(b<<2)>>2];b=c[w>>2]|0;c[w>>2]=b+1;g[(c[o>>2]|0)+(b<<2)>>2]=J;c[s>>2]=(c[s>>2]|0)-4}c[p>>2]=c[(c[r>>2]|0)+68>>2];eb[c[(c[p>>2]|0)+56>>2]&63](c[p>>2]|0,c[o>>2]|0,c[o>>2]|0);c[q>>2]=c[(c[r>>2]|0)+64>>2];a=c[(c[q>>2]|0)+56>>2]|0;d=c[q>>2]|0;b=(c[e>>2]|0)+(c[t>>2]<<2)|0;a:do if((c[e>>2]|0)==(c[f>>2]|0)){eb[a&63](d,b,(c[e>>2]|0)+(c[t>>2]<<2)|0);c[s>>2]=0;while(1){if((c[s>>2]|0)>=((c[y>>2]|0)-1|0))break a;d=_(c[t>>2]|0,(c[s>>2]|0)+1|0)|0;b=_(c[z>>2]|0,c[s>>2]|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[(c[e>>2]|0)+(d<<2)>>2];c[s>>2]=(c[s>>2]|0)+1}}else eb[a&63](d,b,c[f>>2]|0);while(0);b=_((c[y>>2]|0)-1|0,c[z>>2]|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[c[o>>2]>>2]*2.0;c[s>>2]=1;while(1){a=c[s>>2]|0;if((c[s>>2]|0)>=((c[y>>2]|0)-(c[s>>2]|0)|0))break;g[n>>2]=+g[(c[o>>2]|0)+(a<<2)>>2];g[m>>2]=+g[(c[o>>2]|0)+((c[y>>2]|0)-(c[s>>2]|0)<<2)>>2];g[G>>2]=+g[(c[h>>2]|0)+(c[s>>2]<<1<<2)>>2];g[F>>2]=+g[(c[h>>2]|0)+((c[s>>2]<<1)+1<<2)>>2];g[E>>2]=(+g[G>>2]*+g[n>>2]+ +g[F>>2]*+g[m>>2])*2.0;g[C>>2]=(+g[F>>2]*+g[n>>2]-+g[G>>2]*+g[m>>2])*2.0;b=_((c[s>>2]|0)-1|0,c[z>>2]|0)|0;g[k>>2]=+g[(c[f>>2]|0)+(b<<2)>>2];b=_((c[s>>2]|0)-1|0,c[z>>2]|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[C>>2]+ +g[k>>2];b=_((c[y>>2]<<1)-1-(c[s>>2]|0)|0,c[z>>2]|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[C>>2]-+g[k>>2];b=_((c[y>>2]|0)-1-(c[s>>2]|0)|0,c[z>>2]|0)|0;g[j>>2]=+g[(c[f>>2]|0)+(b<<2)>>2];b=_((c[y>>2]|0)-1-(c[s>>2]|0)|0,c[z>>2]|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[E>>2]+ +g[j>>2];b=_((c[y>>2]|0)-1+(c[s>>2]|0)|0,c[z>>2]|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[E>>2]-+g[j>>2];c[s>>2]=(c[s>>2]|0)+1}if((a|0)==((c[y>>2]|0)-(c[s>>2]|0)|0)){g[D>>2]=+g[(c[h>>2]|0)+((c[s>>2]<<1)+1<<2)>>2]*+g[(c[o>>2]|0)+(c[s>>2]<<2)>>2]*2.0;b=_((c[s>>2]|0)-1|0,c[z>>2]|0)|0;g[l>>2]=+g[(c[f>>2]|0)+(b<<2)>>2];b=_((c[s>>2]|0)-1|0,c[z>>2]|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[D>>2]+ +g[l>>2];b=_((c[y>>2]<<1)-1-(c[s>>2]|0)|0,c[z>>2]|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[D>>2]-+g[l>>2]}c[u>>2]=(c[u>>2]|0)+1;c[e>>2]=(c[e>>2]|0)+(c[v>>2]<<2);c[f>>2]=(c[f>>2]|0)+(c[A>>2]<<2)}xb(c[o>>2]|0);i=H;return}function bx(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;f=d+4|0;e=d;c[g>>2]=a;c[f>>2]=b;c[e>>2]=c[g>>2];rc(c[(c[e>>2]|0)+64>>2]|0,c[f>>2]|0);rc(c[(c[e>>2]|0)+68>>2]|0,c[f>>2]|0);Me(c[f>>2]|0,(c[e>>2]|0)+72|0,19276,c[(c[e>>2]|0)+84>>2]<<1,1,(c[(c[e>>2]|0)+84>>2]|0)/4|0);i=d;return}function cx(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;h=i;i=i+48|0;g=h+16|0;f=h;j=h+40|0;d=h+36|0;e=h+32|0;c[j>>2]=a;c[d>>2]=b;c[e>>2]=c[j>>2];a=c[c[d>>2]>>2]|0;d=c[d>>2]|0;b=c[(c[e>>2]|0)+84>>2]|0;if((c[(c[e>>2]|0)+56>>2]|0)==51){j=c[(c[e>>2]|0)+88>>2]|0;g=c[(c[e>>2]|0)+64>>2]|0;e=c[(c[e>>2]|0)+68>>2]|0;c[f>>2]=b+1;c[f+4>>2]=j;c[f+8>>2]=g;c[f+12>>2]=e;eb[a&63](d,29455,f);i=h;return}else{j=c[(c[e>>2]|0)+88>>2]|0;f=c[(c[e>>2]|0)+64>>2]|0;e=c[(c[e>>2]|0)+68>>2]|0;c[g>>2]=b-1;c[g+4>>2]=j;c[g+8>>2]=f;c[g+12>>2]=e;eb[a&63](d,29494,g);i=h;return}}function dx(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function ex(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;f=e+4|0;d=e;c[e+8>>2]=a;c[f>>2]=b;c[d>>2]=c[f>>2];if((c[c[(c[d>>2]|0)+4>>2]>>2]|0)!=1){b=0;b=b&1;i=e;return b|0}if((c[c[(c[d>>2]|0)+8>>2]>>2]|0)>1){b=0;b=b&1;i=e;return b|0}if((c[(c[d>>2]|0)+20>>2]|0)!=9?(c[(c[d>>2]|0)+20>>2]|0)!=13:0){b=0;b=b&1;i=e;return b|0}if((c[(c[(c[d>>2]|0)+4>>2]|0)+4>>2]|0)<=1){b=0;b=b&1;i=e;return b|0}if(!((c[(c[(c[d>>2]|0)+4>>2]|0)+4>>2]|0)%2|0)){b=0;b=b&1;i=e;return b|0}if(((c[(c[d>>2]|0)+12>>2]|0)==(c[(c[d>>2]|0)+16>>2]|0)?(c[c[(c[d>>2]|0)+8>>2]>>2]|0)!=0:0)?(c[(c[(c[d>>2]|0)+8>>2]|0)+4+4>>2]|0)!=(c[(c[(c[d>>2]|0)+8>>2]|0)+4+8>>2]|0):0){b=0;b=b&1;i=e;return b|0}if((c[(c[d>>2]|0)+20>>2]|0)!=13){b=1;b=b&1;i=e;return b|0}if((c[(c[d>>2]|0)+12>>2]|0)!=(c[(c[d>>2]|0)+16>>2]|0)){b=1;b=b&1;i=e;return b|0}b=(c[(c[(c[d>>2]|0)+4>>2]|0)+4+4>>2]|0)>=(c[(c[(c[d>>2]|0)+4>>2]|0)+4+8>>2]|0);b=b&1;i=e;return b|0}function fx(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,gx()|0);i=b;return}function gx(){var a=0,b=0;b=i;i=i+16|0;a=b;c[a>>2]=zd(8,16252)|0;i=b;return c[a>>2]|0}function hx(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;p=i;i=i+80|0;e=p+64|0;q=p+60|0;f=p+56|0;g=p+52|0;o=p+48|0;n=p+44|0;k=p+40|0;j=p+36|0;l=p+32|0;m=p;c[q>>2]=a;c[f>>2]=b;c[g>>2]=d;if(!(ix(c[q>>2]|0,c[f>>2]|0,c[g>>2]|0)|0)){c[e>>2]=0;b=c[e>>2]|0;i=p;return b|0}c[n>>2]=c[f>>2];c[l>>2]=c[(c[(c[n>>2]|0)+4>>2]|0)+4>>2];c[j>>2]=wb(c[l>>2]<<2)|0;f=c[g>>2]|0;a=Ed(c[l>>2]|0,1,1)|0;b=Dd()|0;c[k>>2]=uc(f,In(a,b,c[j>>2]|0,c[j>>2]|0,0)|0)|0;xb(c[j>>2]|0);if(!(c[k>>2]|0)){c[e>>2]=0;b=c[e>>2]|0;i=p;return b|0}switch(c[(c[n>>2]|0)+20>>2]|0){case 10:{c[o>>2]=sn(104,16264,52)|0;break}case 11:{c[o>>2]=sn(104,16264,53)|0;break}case 14:{c[o>>2]=sn(104,16264,54)|0;break}case 15:{c[o>>2]=sn(104,16264,55)|0;break}default:{c[e>>2]=0;b=c[e>>2]|0;i=p;return b|0}}c[(c[o>>2]|0)+80>>2]=c[l>>2];c[(c[o>>2]|0)+72>>2]=c[(c[(c[n>>2]|0)+4>>2]|0)+4+4>>2];c[(c[o>>2]|0)+76>>2]=c[(c[(c[n>>2]|0)+4>>2]|0)+4+8>>2];c[(c[o>>2]|0)+64>>2]=c[k>>2];c[(c[o>>2]|0)+68>>2]=0;c[(c[o>>2]|0)+96>>2]=c[(c[n>>2]|0)+20>>2];ke(c[(c[n>>2]|0)+8>>2]|0,(c[o>>2]|0)+84|0,(c[o>>2]|0)+88|0,(c[o>>2]|0)+92|0)|0;fc(m);h[m+24>>3]=+(4+((((c[l>>2]|0)-1|0)/2|0)*10|0)+((1-((c[l>>2]|0)%2|0)|0)*5|0)|0);if((c[(c[n>>2]|0)+20>>2]|0)!=10?(c[(c[n>>2]|0)+20>>2]|0)!=14:0){h[m>>3]=+((((c[l>>2]|0)-1|0)/2|0)<<1|0);h[m+8>>3]=+(1+((((c[l>>2]|0)-1|0)/2|0)*6|0)+(1-((c[l>>2]|0)%2|0)<<1)|0)}else{h[m>>3]=+((((c[l>>2]|0)-1|0)/2|0)*6|0);h[m+8>>3]=+(((((c[l>>2]|0)-1|0)/2|0)<<2)+(1-((c[l>>2]|0)%2|0)<<1)|0)}fc((c[o>>2]|0)+8|0);lc(c[(c[o>>2]|0)+84>>2]|0,m,(c[o>>2]|0)+8|0);lc(c[(c[o>>2]|0)+84>>2]|0,(c[k>>2]|0)+8|0,(c[o>>2]|0)+8|0);c[e>>2]=c[o>>2];b=c[e>>2]|0;i=p;return b|0}function ix(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;g=i;i=i+16|0;e=g+8|0;f=g+4|0;h=g;c[e>>2]=a;c[f>>2]=b;c[h>>2]=d;if(c[(c[h>>2]|0)+164>>2]&8){b=0;b=b&1;i=g;return b|0}b=(qx(c[e>>2]|0,c[f>>2]|0)|0)!=0;b=b&1;i=g;return b|0}function jx(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0;D=i;i=i+96|0;E=D+92|0;e=D+88|0;f=D+84|0;r=D+80|0;t=D+76|0;y=D+72|0;s=D+68|0;x=D+64|0;u=D+60|0;A=D+56|0;v=D+52|0;z=D+48|0;h=D+44|0;p=D+40|0;j=D+36|0;n=D+32|0;m=D+28|0;l=D+24|0;B=D+20|0;C=D+16|0;q=D+12|0;k=D+8|0;o=D+4|0;w=D;c[E>>2]=a;c[e>>2]=b;c[f>>2]=d;c[r>>2]=c[E>>2];c[t>>2]=c[(c[r>>2]|0)+72>>2];c[y>>2]=c[(c[r>>2]|0)+76>>2];c[x>>2]=c[(c[r>>2]|0)+80>>2];c[A>>2]=c[(c[r>>2]|0)+84>>2];c[v>>2]=c[(c[r>>2]|0)+88>>2];c[z>>2]=c[(c[r>>2]|0)+92>>2];c[h>>2]=c[c[(c[r>>2]|0)+68>>2]>>2];c[p>>2]=wb(c[x>>2]<<2)|0;c[u>>2]=0;while(1){if((c[u>>2]|0)>=(c[A>>2]|0))break;g[c[p>>2]>>2]=+g[c[e>>2]>>2];c[s>>2]=1;while(1){if((c[s>>2]|0)>=((c[x>>2]|0)-(c[s>>2]|0)|0))break;a=_(c[t>>2]|0,c[s>>2]|0)|0;g[j>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];a=_(c[t>>2]|0,(c[x>>2]|0)-(c[s>>2]|0)|0)|0;g[n>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];g[m>>2]=+g[j>>2]+ +g[n>>2];g[l>>2]=+g[j>>2]-+g[n>>2];g[B>>2]=+g[(c[h>>2]|0)+(c[s>>2]<<1<<2)>>2];g[C>>2]=+g[(c[h>>2]|0)+((c[s>>2]<<1)+1<<2)>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[B>>2]*+g[l>>2]+ +g[C>>2]*+g[m>>2];g[(c[p>>2]|0)+((c[x>>2]|0)-(c[s>>2]|0)<<2)>>2]=+g[B>>2]*+g[m>>2]-+g[C>>2]*+g[l>>2];c[s>>2]=(c[s>>2]|0)+1}if((c[s>>2]|0)==((c[x>>2]|0)-(c[s>>2]|0)|0)){a=_(c[t>>2]|0,c[s>>2]|0)|0;g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[(c[e>>2]|0)+(a<<2)>>2]*2.0*+g[(c[h>>2]|0)+(c[s>>2]<<1<<2)>>2]}c[q>>2]=c[(c[r>>2]|0)+64>>2];eb[c[(c[q>>2]|0)+56>>2]&63](c[q>>2]|0,c[p>>2]|0,c[p>>2]|0);g[c[f>>2]>>2]=+g[c[p>>2]>>2];c[s>>2]=1;while(1){b=c[s>>2]|0;if((c[s>>2]|0)>=((c[x>>2]|0)-(c[s>>2]|0)|0))break;g[k>>2]=+g[(c[p>>2]|0)+(b<<2)>>2];g[o>>2]=+g[(c[p>>2]|0)+((c[x>>2]|0)-(c[s>>2]|0)<<2)>>2];c[w>>2]=(c[s>>2]|0)+(c[s>>2]|0);a=_(c[y>>2]|0,(c[w>>2]|0)-1|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[k>>2]-+g[o>>2];a=_(c[y>>2]|0,c[w>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[k>>2]+ +g[o>>2];c[s>>2]=(c[s>>2]|0)+1}if((b|0)==((c[x>>2]|0)-(c[s>>2]|0)|0)){a=_(c[y>>2]|0,(c[x>>2]|0)-1|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]}c[u>>2]=(c[u>>2]|0)+1;c[e>>2]=(c[e>>2]|0)+(c[v>>2]<<2);c[f>>2]=(c[f>>2]|0)+(c[z>>2]<<2)}xb(c[p>>2]|0);i=D;return}function kx(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0;B=i;i=i+96|0;C=B+84|0;e=B+80|0;f=B+76|0;n=B+72|0;p=B+68|0;u=B+64|0;o=B+60|0;t=B+56|0;q=B+52|0;y=B+48|0;r=B+44|0;v=B+40|0;h=B+36|0;l=B+32|0;w=B+28|0;x=B+24|0;s=B+20|0;m=B+16|0;j=B+12|0;k=B+8|0;z=B+4|0;A=B;c[C>>2]=a;c[e>>2]=b;c[f>>2]=d;c[n>>2]=c[C>>2];c[p>>2]=c[(c[n>>2]|0)+72>>2];c[u>>2]=c[(c[n>>2]|0)+76>>2];c[t>>2]=c[(c[n>>2]|0)+80>>2];c[y>>2]=c[(c[n>>2]|0)+84>>2];c[r>>2]=c[(c[n>>2]|0)+88>>2];c[v>>2]=c[(c[n>>2]|0)+92>>2];c[h>>2]=c[c[(c[n>>2]|0)+68>>2]>>2];c[l>>2]=wb(c[t>>2]<<2)|0;c[q>>2]=0;while(1){if((c[q>>2]|0)>=(c[y>>2]|0))break;g[c[l>>2]>>2]=+g[c[e>>2]>>2];c[o>>2]=1;while(1){b=c[o>>2]|0;if((c[o>>2]|0)>=((c[t>>2]|0)-(c[o>>2]|0)|0))break;c[s>>2]=b+(c[o>>2]|0);a=_(c[p>>2]|0,(c[s>>2]|0)-1|0)|0;g[w>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];a=_(c[p>>2]|0,c[s>>2]|0)|0;g[x>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];g[(c[l>>2]|0)+((c[t>>2]|0)-(c[o>>2]|0)<<2)>>2]=+g[w>>2];g[(c[l>>2]|0)+(c[o>>2]<<2)>>2]=+g[x>>2];c[o>>2]=(c[o>>2]|0)+1}if((b|0)==((c[t>>2]|0)-(c[o>>2]|0)|0)){a=_(c[p>>2]|0,(c[t>>2]|0)-1|0)|0;g[(c[l>>2]|0)+(c[o>>2]<<2)>>2]=+g[(c[e>>2]|0)+(a<<2)>>2]}c[m>>2]=c[(c[n>>2]|0)+64>>2];eb[c[(c[m>>2]|0)+56>>2]&63](c[m>>2]|0,c[l>>2]|0,c[l>>2]|0);g[c[f>>2]>>2]=+g[c[l>>2]>>2]*2.0;c[o>>2]=1;while(1){b=c[o>>2]|0;if((c[o>>2]|0)>=((c[t>>2]|0)-(c[o>>2]|0)|0))break;g[j>>2]=+g[(c[l>>2]|0)+(b<<2)>>2]*2.0;g[k>>2]=+g[(c[l>>2]|0)+((c[t>>2]|0)-(c[o>>2]|0)<<2)>>2]*2.0;g[z>>2]=+g[(c[h>>2]|0)+(c[o>>2]<<1<<2)>>2];g[A>>2]=+g[(c[h>>2]|0)+((c[o>>2]<<1)+1<<2)>>2];a=_(c[u>>2]|0,c[o>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[z>>2]*+g[j>>2]+ +g[A>>2]*+g[k>>2];a=_(c[u>>2]|0,(c[t>>2]|0)-(c[o>>2]|0)|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[A>>2]*+g[j>>2]-+g[z>>2]*+g[k>>2];c[o>>2]=(c[o>>2]|0)+1}if((b|0)==((c[t>>2]|0)-(c[o>>2]|0)|0)){a=_(c[u>>2]|0,c[o>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[(c[l>>2]|0)+(c[o>>2]<<2)>>2]*2.0*+g[(c[h>>2]|0)+(c[o>>2]<<1<<2)>>2]}c[q>>2]=(c[q>>2]|0)+1;c[e>>2]=(c[e>>2]|0)+(c[r>>2]<<2);c[f>>2]=(c[f>>2]|0)+(c[v>>2]<<2)}xb(c[l>>2]|0);i=B;return}function lx(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0;D=i;i=i+96|0;E=D+92|0;e=D+88|0;f=D+84|0;r=D+80|0;t=D+76|0;y=D+72|0;s=D+68|0;x=D+64|0;u=D+60|0;A=D+56|0;v=D+52|0;z=D+48|0;h=D+44|0;p=D+40|0;j=D+36|0;n=D+32|0;m=D+28|0;l=D+24|0;B=D+20|0;C=D+16|0;q=D+12|0;k=D+8|0;o=D+4|0;w=D;c[E>>2]=a;c[e>>2]=b;c[f>>2]=d;c[r>>2]=c[E>>2];c[t>>2]=c[(c[r>>2]|0)+72>>2];c[y>>2]=c[(c[r>>2]|0)+76>>2];c[x>>2]=c[(c[r>>2]|0)+80>>2];c[A>>2]=c[(c[r>>2]|0)+84>>2];c[v>>2]=c[(c[r>>2]|0)+88>>2];c[z>>2]=c[(c[r>>2]|0)+92>>2];c[h>>2]=c[c[(c[r>>2]|0)+68>>2]>>2];c[p>>2]=wb(c[x>>2]<<2)|0;c[u>>2]=0;while(1){if((c[u>>2]|0)>=(c[A>>2]|0))break;a=_(c[t>>2]|0,(c[x>>2]|0)-1|0)|0;g[c[p>>2]>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];c[s>>2]=1;while(1){if((c[s>>2]|0)>=((c[x>>2]|0)-(c[s>>2]|0)|0))break;a=_(c[t>>2]|0,(c[x>>2]|0)-1-(c[s>>2]|0)|0)|0;g[j>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];a=_(c[t>>2]|0,(c[s>>2]|0)-1|0)|0;g[n>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];g[m>>2]=+g[j>>2]+ +g[n>>2];g[l>>2]=+g[j>>2]-+g[n>>2];g[B>>2]=+g[(c[h>>2]|0)+(c[s>>2]<<1<<2)>>2];g[C>>2]=+g[(c[h>>2]|0)+((c[s>>2]<<1)+1<<2)>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[B>>2]*+g[l>>2]+ +g[C>>2]*+g[m>>2];g[(c[p>>2]|0)+((c[x>>2]|0)-(c[s>>2]|0)<<2)>>2]=+g[B>>2]*+g[m>>2]-+g[C>>2]*+g[l>>2];c[s>>2]=(c[s>>2]|0)+1}if((c[s>>2]|0)==((c[x>>2]|0)-(c[s>>2]|0)|0)){a=_(c[t>>2]|0,(c[s>>2]|0)-1|0)|0;g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[(c[e>>2]|0)+(a<<2)>>2]*2.0*+g[(c[h>>2]|0)+(c[s>>2]<<1<<2)>>2]}c[q>>2]=c[(c[r>>2]|0)+64>>2];eb[c[(c[q>>2]|0)+56>>2]&63](c[q>>2]|0,c[p>>2]|0,c[p>>2]|0);g[c[f>>2]>>2]=+g[c[p>>2]>>2];c[s>>2]=1;while(1){b=c[s>>2]|0;if((c[s>>2]|0)>=((c[x>>2]|0)-(c[s>>2]|0)|0))break;g[k>>2]=+g[(c[p>>2]|0)+(b<<2)>>2];g[o>>2]=+g[(c[p>>2]|0)+((c[x>>2]|0)-(c[s>>2]|0)<<2)>>2];c[w>>2]=(c[s>>2]|0)+(c[s>>2]|0);a=_(c[y>>2]|0,(c[w>>2]|0)-1|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[o>>2]-+g[k>>2];a=_(c[y>>2]|0,c[w>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[k>>2]+ +g[o>>2];c[s>>2]=(c[s>>2]|0)+1}if((b|0)==((c[x>>2]|0)-(c[s>>2]|0)|0)){a=_(c[y>>2]|0,(c[x>>2]|0)-1|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=-+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]}c[u>>2]=(c[u>>2]|0)+1;c[e>>2]=(c[e>>2]|0)+(c[v>>2]<<2);c[f>>2]=(c[f>>2]|0)+(c[z>>2]<<2)}xb(c[p>>2]|0);i=D;return}function mx(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0;B=i;i=i+96|0;C=B+84|0;e=B+80|0;f=B+76|0;n=B+72|0;p=B+68|0;u=B+64|0;o=B+60|0;t=B+56|0;q=B+52|0;y=B+48|0;r=B+44|0;v=B+40|0;h=B+36|0;l=B+32|0;w=B+28|0;x=B+24|0;s=B+20|0;m=B+16|0;j=B+12|0;k=B+8|0;z=B+4|0;A=B;c[C>>2]=a;c[e>>2]=b;c[f>>2]=d;c[n>>2]=c[C>>2];c[p>>2]=c[(c[n>>2]|0)+72>>2];c[u>>2]=c[(c[n>>2]|0)+76>>2];c[t>>2]=c[(c[n>>2]|0)+80>>2];c[y>>2]=c[(c[n>>2]|0)+84>>2];c[r>>2]=c[(c[n>>2]|0)+88>>2];c[v>>2]=c[(c[n>>2]|0)+92>>2];c[h>>2]=c[c[(c[n>>2]|0)+68>>2]>>2];c[l>>2]=wb(c[t>>2]<<2)|0;c[q>>2]=0;while(1){if((c[q>>2]|0)>=(c[y>>2]|0))break;g[c[l>>2]>>2]=+g[c[e>>2]>>2];c[o>>2]=1;while(1){b=c[o>>2]|0;if((c[o>>2]|0)>=((c[t>>2]|0)-(c[o>>2]|0)|0))break;c[s>>2]=b+(c[o>>2]|0);a=_(c[p>>2]|0,(c[s>>2]|0)-1|0)|0;g[w>>2]=-+g[(c[e>>2]|0)+(a<<2)>>2];a=_(c[p>>2]|0,c[s>>2]|0)|0;g[x>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];g[(c[l>>2]|0)+((c[t>>2]|0)-(c[o>>2]|0)<<2)>>2]=+g[w>>2];g[(c[l>>2]|0)+(c[o>>2]<<2)>>2]=+g[x>>2];c[o>>2]=(c[o>>2]|0)+1}if((b|0)==((c[t>>2]|0)-(c[o>>2]|0)|0)){a=_(c[p>>2]|0,(c[t>>2]|0)-1|0)|0;g[(c[l>>2]|0)+(c[o>>2]<<2)>>2]=-+g[(c[e>>2]|0)+(a<<2)>>2]}c[m>>2]=c[(c[n>>2]|0)+64>>2];eb[c[(c[m>>2]|0)+56>>2]&63](c[m>>2]|0,c[l>>2]|0,c[l>>2]|0);a=_(c[u>>2]|0,(c[t>>2]|0)-1|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[c[l>>2]>>2]*2.0;c[o>>2]=1;while(1){b=c[o>>2]|0;if((c[o>>2]|0)>=((c[t>>2]|0)-(c[o>>2]|0)|0))break;g[j>>2]=+g[(c[l>>2]|0)+(b<<2)>>2]*2.0;g[k>>2]=+g[(c[l>>2]|0)+((c[t>>2]|0)-(c[o>>2]|0)<<2)>>2]*2.0;g[z>>2]=+g[(c[h>>2]|0)+(c[o>>2]<<1<<2)>>2];g[A>>2]=+g[(c[h>>2]|0)+((c[o>>2]<<1)+1<<2)>>2];a=_(c[u>>2]|0,(c[t>>2]|0)-1-(c[o>>2]|0)|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[z>>2]*+g[j>>2]+ +g[A>>2]*+g[k>>2];a=_(c[u>>2]|0,(c[o>>2]|0)-1|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[A>>2]*+g[j>>2]-+g[z>>2]*+g[k>>2];c[o>>2]=(c[o>>2]|0)+1}if((b|0)==((c[t>>2]|0)-(c[o>>2]|0)|0)){a=_(c[u>>2]|0,(c[o>>2]|0)-1|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[(c[l>>2]|0)+(c[o>>2]<<2)>>2]*2.0*+g[(c[h>>2]|0)+(c[o>>2]<<1<<2)>>2]}c[q>>2]=(c[q>>2]|0)+1;c[e>>2]=(c[e>>2]|0)+(c[r>>2]<<2);c[f>>2]=(c[f>>2]|0)+(c[v>>2]<<2)}xb(c[l>>2]|0);i=B;return}function nx(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;f=d+4|0;e=d;c[g>>2]=a;c[f>>2]=b;c[e>>2]=c[g>>2];rc(c[(c[e>>2]|0)+64>>2]|0,c[f>>2]|0);Me(c[f>>2]|0,(c[e>>2]|0)+68|0,19288,c[(c[e>>2]|0)+80>>2]<<2,1,((c[(c[e>>2]|0)+80>>2]|0)/2|0)+1|0);i=d;return}function ox(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;h=d+24|0;j=d+20|0;f=d+16|0;c[h>>2]=a;c[j>>2]=b;c[f>>2]=c[h>>2];b=c[c[j>>2]>>2]|0;a=c[j>>2]|0;j=En(c[(c[f>>2]|0)+96>>2]|0)|0;h=c[(c[f>>2]|0)+80>>2]|0;g=c[(c[f>>2]|0)+84>>2]|0;f=c[(c[f>>2]|0)+64>>2]|0;c[e>>2]=j;c[e+4>>2]=h;c[e+8>>2]=g;c[e+12>>2]=f;eb[b&63](a,29533,e);i=d;return}function px(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function qx(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;f=e+4|0;d=e;c[e+8>>2]=a;c[f>>2]=b;c[d>>2]=c[f>>2];if((c[c[(c[d>>2]|0)+4>>2]>>2]|0)==1?(c[c[(c[d>>2]|0)+8>>2]>>2]|0)<=1:0)if(((c[(c[d>>2]|0)+20>>2]|0)!=10?(c[(c[d>>2]|0)+20>>2]|0)!=11:0)?(c[(c[d>>2]|0)+20>>2]|0)!=14:0)a=(c[(c[d>>2]|0)+20>>2]|0)==15;else a=1;else a=0;i=e;return a&1|0}function rx(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,sx()|0);i=b;return}function sx(){var a=0,b=0;b=i;i=i+16|0;a=b;c[a>>2]=zd(8,16280)|0;i=b;return c[a>>2]|0}function tx(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;p=i;i=i+80|0;e=p+64|0;q=p+60|0;f=p+56|0;g=p+52|0;o=p+48|0;n=p+44|0;k=p+40|0;j=p+36|0;l=p+32|0;m=p;c[q>>2]=a;c[f>>2]=b;c[g>>2]=d;if(!(ux(c[q>>2]|0,c[f>>2]|0,c[g>>2]|0)|0)){c[e>>2]=0;b=c[e>>2]|0;i=p;return b|0}c[n>>2]=c[f>>2];c[l>>2]=c[(c[(c[n>>2]|0)+4>>2]|0)+4>>2];c[j>>2]=wb(c[l>>2]<<2)|0;f=c[g>>2]|0;a=Ed(c[l>>2]|0,1,1)|0;b=Dd()|0;c[k>>2]=uc(f,In(a,b,c[j>>2]|0,c[j>>2]|0,0)|0)|0;xb(c[j>>2]|0);if(c[k>>2]|0){c[o>>2]=sn(96,16292,(c[(c[n>>2]|0)+20>>2]|0)==12?57:56)|0;c[(c[o>>2]|0)+76>>2]=c[l>>2];c[(c[o>>2]|0)+68>>2]=c[(c[(c[n>>2]|0)+4>>2]|0)+4+4>>2];c[(c[o>>2]|0)+72>>2]=c[(c[(c[n>>2]|0)+4>>2]|0)+4+8>>2];c[(c[o>>2]|0)+64>>2]=c[k>>2];c[(c[o>>2]|0)+92>>2]=c[(c[n>>2]|0)+20>>2];ke(c[(c[n>>2]|0)+8>>2]|0,(c[o>>2]|0)+80|0,(c[o>>2]|0)+84|0,(c[o>>2]|0)+88|0)|0;fc(m);h[m>>3]=+((c[l>>2]|0)-1|0);h[m+8>>3]=+(c[l>>2]|0);h[m+24>>3]=+(c[l>>2]<<2|0);fc((c[o>>2]|0)+8|0);lc(c[(c[o>>2]|0)+80>>2]|0,m,(c[o>>2]|0)+8|0);lc(c[(c[o>>2]|0)+80>>2]|0,(c[k>>2]|0)+8|0,(c[o>>2]|0)+8|0);c[e>>2]=c[o>>2];b=c[e>>2]|0;i=p;return b|0}else{c[e>>2]=0;b=c[e>>2]|0;i=p;return b|0}return 0}function ux(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;g=i;i=i+16|0;e=g+8|0;f=g+4|0;h=g;c[e>>2]=a;c[f>>2]=b;c[h>>2]=d;if(c[(c[h>>2]|0)+164>>2]&8){b=0;b=b&1;i=g;return b|0}b=(Ax(c[e>>2]|0,c[f>>2]|0)|0)!=0;b=b&1;i=g;return b|0}function vx(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0.0,F=0.0;C=i;i=i+96|0;D=C+88|0;e=C+84|0;f=C+80|0;n=C+76|0;p=C+72|0;w=C+68|0;o=C+64|0;u=C+60|0;v=C+56|0;q=C+52|0;B=C+48|0;r=C+44|0;x=C+40|0;h=C+36|0;t=C+32|0;m=C+28|0;s=C+24|0;k=C+20|0;z=C+16|0;l=C+12|0;A=C+8|0;j=C+4|0;y=C;c[D>>2]=a;c[e>>2]=b;c[f>>2]=d;c[n>>2]=c[D>>2];c[p>>2]=c[(c[n>>2]|0)+68>>2];c[w>>2]=c[(c[n>>2]|0)+72>>2];c[u>>2]=c[(c[n>>2]|0)+76>>2];c[v>>2]=(c[u>>2]|0)/2|0;c[B>>2]=c[(c[n>>2]|0)+80>>2];c[r>>2]=c[(c[n>>2]|0)+84>>2];c[x>>2]=c[(c[n>>2]|0)+88>>2];c[h>>2]=wb(c[u>>2]<<2)|0;c[q>>2]=0;while(1){if((c[q>>2]|0)>=(c[B>>2]|0))break;c[o>>2]=0;c[t>>2]=c[v>>2];while(1){if((c[t>>2]|0)>=(c[u>>2]|0))break;a=_(c[p>>2]|0,c[t>>2]|0)|0;g[(c[h>>2]|0)+(c[o>>2]<<2)>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];c[o>>2]=(c[o>>2]|0)+1;c[t>>2]=(c[t>>2]|0)+4}while(1){if((c[t>>2]|0)>=(c[u>>2]<<1|0))break;a=_(c[p>>2]|0,(c[u>>2]<<1)-(c[t>>2]|0)-1|0)|0;g[(c[h>>2]|0)+(c[o>>2]<<2)>>2]=-+g[(c[e>>2]|0)+(a<<2)>>2];c[o>>2]=(c[o>>2]|0)+1;c[t>>2]=(c[t>>2]|0)+4}while(1){if((c[t>>2]|0)>=((c[u>>2]|0)*3|0))break;a=_(c[p>>2]|0,(c[t>>2]|0)-(c[u>>2]<<1)|0)|0;g[(c[h>>2]|0)+(c[o>>2]<<2)>>2]=-+g[(c[e>>2]|0)+(a<<2)>>2];c[o>>2]=(c[o>>2]|0)+1;c[t>>2]=(c[t>>2]|0)+4}while(1){if((c[t>>2]|0)>=(c[u>>2]<<2|0))break;a=_(c[p>>2]|0,(c[u>>2]<<2)-(c[t>>2]|0)-1|0)|0;g[(c[h>>2]|0)+(c[o>>2]<<2)>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];c[o>>2]=(c[o>>2]|0)+1;c[t>>2]=(c[t>>2]|0)+4}c[t>>2]=(c[t>>2]|0)-(c[u>>2]<<2);while(1){if((c[o>>2]|0)>=(c[u>>2]|0))break;a=_(c[p>>2]|0,c[t>>2]|0)|0;g[(c[h>>2]|0)+(c[o>>2]<<2)>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];c[o>>2]=(c[o>>2]|0)+1;c[t>>2]=(c[t>>2]|0)+4}c[m>>2]=c[(c[n>>2]|0)+64>>2];eb[c[(c[m>>2]|0)+56>>2]&63](c[m>>2]|0,c[h>>2]|0,c[h>>2]|0);c[o>>2]=0;while(1){b=(c[o>>2]|0)+(c[o>>2]|0)+1|0;if(((c[o>>2]|0)+(c[o>>2]|0)+1|0)>=(c[v>>2]|0))break;c[s>>2]=b;g[k>>2]=+g[(c[h>>2]|0)+(c[s>>2]<<2)>>2];g[l>>2]=+g[(c[h>>2]|0)+((c[s>>2]|0)+1<<2)>>2];g[A>>2]=+g[(c[h>>2]|0)+((c[u>>2]|0)-((c[s>>2]|0)+1)<<2)>>2];g[z>>2]=+g[(c[h>>2]|0)+((c[u>>2]|0)-(c[s>>2]|0)<<2)>>2];E=+g[k>>2];F=+g[z>>2];a=_(c[w>>2]|0,c[o>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=((((((c[o>>2]|0)+1|0)/2|0|0)%2|0|0)!=0?-E:E)+((((c[o>>2]|0)/2|0|0)%2|0|0)!=0?-F:F))*1.4142135381698608;F=+g[k>>2];E=+g[z>>2];a=_(c[w>>2]|0,(c[u>>2]|0)-((c[o>>2]|0)+1)|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=((((((c[u>>2]|0)-(c[o>>2]|0)|0)/2|0|0)%2|0|0)!=0?-F:F)-(((((c[u>>2]|0)-((c[o>>2]|0)+1)|0)/2|0|0)%2|0|0)!=0?-E:E))*1.4142135381698608;E=+g[l>>2];F=+g[A>>2];a=_(c[w>>2]|0,(c[v>>2]|0)-((c[o>>2]|0)+1)|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=((((((c[v>>2]|0)-(c[o>>2]|0)|0)/2|0|0)%2|0|0)!=0?-E:E)-(((((c[v>>2]|0)-((c[o>>2]|0)+1)|0)/2|0|0)%2|0|0)!=0?-F:F))*1.4142135381698608;F=+g[l>>2];E=+g[A>>2];a=_(c[w>>2]|0,(c[v>>2]|0)+((c[o>>2]|0)+1)|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=((((((c[v>>2]|0)+(c[o>>2]|0)+2|0)/2|0|0)%2|0|0)!=0?-F:F)+(((((c[v>>2]|0)+((c[o>>2]|0)+1)|0)/2|0|0)%2|0|0)!=0?-E:E))*1.4142135381698608;c[o>>2]=(c[o>>2]|0)+1}if((b|0)==(c[v>>2]|0)){g[j>>2]=+g[(c[h>>2]|0)+(c[v>>2]<<2)>>2];g[y>>2]=+g[(c[h>>2]|0)+((c[u>>2]|0)-(c[v>>2]|0)<<2)>>2];E=+g[j>>2];F=+g[y>>2];a=_(c[w>>2]|0,c[o>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=((((((c[o>>2]|0)+1|0)/2|0|0)%2|0|0)!=0?-E:E)+((((c[o>>2]|0)/2|0|0)%2|0|0)!=0?-F:F))*1.4142135381698608;F=+g[j>>2];E=+g[y>>2];a=_(c[w>>2]|0,(c[u>>2]|0)-((c[o>>2]|0)+1)|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=((((((c[o>>2]|0)+2|0)/2|0|0)%2|0|0)!=0?-F:F)+(((((c[o>>2]|0)+1|0)/2|0|0)%2|0|0)!=0?-E:E))*1.4142135381698608}E=+g[c[h>>2]>>2];a=_(c[w>>2]|0,c[v>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=(((((c[v>>2]|0)+1|0)/2|0|0)%2|0|0)!=0?-E:E)*1.4142135381698608;c[q>>2]=(c[q>>2]|0)+1;c[e>>2]=(c[e>>2]|0)+(c[r>>2]<<2);c[f>>2]=(c[f>>2]|0)+(c[x>>2]<<2)}xb(c[h>>2]|0);i=C;return}function wx(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0.0,G=0.0;D=i;i=i+96|0;E=D+92|0;e=D+88|0;f=D+84|0;n=D+80|0;p=D+76|0;x=D+72|0;o=D+68|0;v=D+64|0;w=D+60|0;q=D+56|0;C=D+52|0;r=D+48|0;y=D+44|0;h=D+40|0;u=D+36|0;m=D+32|0;t=D+28|0;s=D+24|0;k=D+20|0;A=D+16|0;l=D+12|0;B=D+8|0;j=D+4|0;z=D;c[E>>2]=a;c[e>>2]=b;c[f>>2]=d;c[n>>2]=c[E>>2];c[p>>2]=c[(c[n>>2]|0)+68>>2];c[x>>2]=c[(c[n>>2]|0)+72>>2];c[v>>2]=c[(c[n>>2]|0)+76>>2];c[w>>2]=(c[v>>2]|0)/2|0;c[C>>2]=c[(c[n>>2]|0)+80>>2];c[r>>2]=c[(c[n>>2]|0)+84>>2];c[y>>2]=c[(c[n>>2]|0)+88>>2];c[h>>2]=wb(c[v>>2]<<2)|0;c[q>>2]=0;while(1){if((c[q>>2]|0)>=(c[C>>2]|0))break;c[o>>2]=0;c[u>>2]=c[w>>2];while(1){if((c[u>>2]|0)>=(c[v>>2]|0))break;a=_(c[p>>2]|0,(c[v>>2]|0)-1-(c[u>>2]|0)|0)|0;g[(c[h>>2]|0)+(c[o>>2]<<2)>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];c[o>>2]=(c[o>>2]|0)+1;c[u>>2]=(c[u>>2]|0)+4}while(1){if((c[u>>2]|0)>=(c[v>>2]<<1|0))break;a=_(c[p>>2]|0,(c[u>>2]|0)-(c[v>>2]|0)|0)|0;g[(c[h>>2]|0)+(c[o>>2]<<2)>>2]=-+g[(c[e>>2]|0)+(a<<2)>>2];c[o>>2]=(c[o>>2]|0)+1;c[u>>2]=(c[u>>2]|0)+4}while(1){if((c[u>>2]|0)>=((c[v>>2]|0)*3|0))break;a=_(c[p>>2]|0,((c[v>>2]|0)*3|0)-1-(c[u>>2]|0)|0)|0;g[(c[h>>2]|0)+(c[o>>2]<<2)>>2]=-+g[(c[e>>2]|0)+(a<<2)>>2];c[o>>2]=(c[o>>2]|0)+1;c[u>>2]=(c[u>>2]|0)+4}while(1){if((c[u>>2]|0)>=(c[v>>2]<<2|0))break;a=_(c[p>>2]|0,(c[u>>2]|0)-((c[v>>2]|0)*3|0)|0)|0;g[(c[h>>2]|0)+(c[o>>2]<<2)>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];c[o>>2]=(c[o>>2]|0)+1;c[u>>2]=(c[u>>2]|0)+4}c[u>>2]=(c[u>>2]|0)-(c[v>>2]<<2);while(1){if((c[o>>2]|0)>=(c[v>>2]|0))break;a=_(c[p>>2]|0,(c[v>>2]|0)-1-(c[u>>2]|0)|0)|0;g[(c[h>>2]|0)+(c[o>>2]<<2)>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];c[o>>2]=(c[o>>2]|0)+1;c[u>>2]=(c[u>>2]|0)+4}c[m>>2]=c[(c[n>>2]|0)+64>>2];eb[c[(c[m>>2]|0)+56>>2]&63](c[m>>2]|0,c[h>>2]|0,c[h>>2]|0);c[o>>2]=0;while(1){b=(c[o>>2]|0)+(c[o>>2]|0)+1|0;if(((c[o>>2]|0)+(c[o>>2]|0)+1|0)>=(c[w>>2]|0))break;c[t>>2]=b;g[k>>2]=+g[(c[h>>2]|0)+(c[t>>2]<<2)>>2];g[l>>2]=+g[(c[h>>2]|0)+((c[t>>2]|0)+1<<2)>>2];g[B>>2]=+g[(c[h>>2]|0)+((c[v>>2]|0)-((c[t>>2]|0)+1)<<2)>>2];g[A>>2]=+g[(c[h>>2]|0)+((c[v>>2]|0)-(c[t>>2]|0)<<2)>>2];F=+g[k>>2];G=+g[A>>2];a=_(c[x>>2]|0,c[o>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=(((((((c[o>>2]|0)+1|0)/2|0)+(c[o>>2]|0)|0)%2|0|0)!=0?-F:F)+(((((c[o>>2]|0)/2|0)+(c[o>>2]|0)|0)%2|0|0)!=0?-G:G))*1.4142135381698608;G=+g[k>>2];F=+g[A>>2];a=_(c[x>>2]|0,(c[v>>2]|0)-((c[o>>2]|0)+1)|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=(((((((c[v>>2]|0)-(c[o>>2]|0)|0)/2|0)+(c[o>>2]|0)|0)%2|0|0)!=0?-G:G)-((((((c[v>>2]|0)-((c[o>>2]|0)+1)|0)/2|0)+(c[o>>2]|0)|0)%2|0|0)!=0?-F:F))*1.4142135381698608;c[s>>2]=(c[w>>2]|0)-((c[o>>2]|0)+1);F=+g[l>>2];G=+g[B>>2];a=_(c[x>>2]|0,c[s>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=(((((((c[w>>2]|0)-(c[o>>2]|0)|0)/2|0)+(c[s>>2]|0)|0)%2|0|0)!=0?-F:F)-((((((c[w>>2]|0)-((c[o>>2]|0)+1)|0)/2|0)+(c[s>>2]|0)|0)%2|0|0)!=0?-G:G))*1.4142135381698608;G=+g[l>>2];F=+g[B>>2];a=_(c[x>>2]|0,(c[w>>2]|0)+((c[o>>2]|0)+1)|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=(((((((c[w>>2]|0)+(c[o>>2]|0)+2|0)/2|0)+(c[s>>2]|0)|0)%2|0|0)!=0?-G:G)+((((((c[w>>2]|0)+((c[o>>2]|0)+1)|0)/2|0)+(c[s>>2]|0)|0)%2|0|0)!=0?-F:F))*1.4142135381698608;c[o>>2]=(c[o>>2]|0)+1}if((b|0)==(c[w>>2]|0)){g[j>>2]=+g[(c[h>>2]|0)+(c[w>>2]<<2)>>2];g[z>>2]=+g[(c[h>>2]|0)+((c[v>>2]|0)-(c[w>>2]|0)<<2)>>2];F=+g[j>>2];G=+g[z>>2];a=_(c[x>>2]|0,c[o>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=(((((((c[o>>2]|0)+1|0)/2|0)+(c[o>>2]|0)|0)%2|0|0)!=0?-F:F)+(((((c[o>>2]|0)/2|0)+(c[o>>2]|0)|0)%2|0|0)!=0?-G:G))*1.4142135381698608;G=+g[j>>2];F=+g[z>>2];a=_(c[x>>2]|0,(c[v>>2]|0)-((c[o>>2]|0)+1)|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=(((((((c[o>>2]|0)+2|0)/2|0)+(c[o>>2]|0)|0)%2|0|0)!=0?-G:G)+((((((c[o>>2]|0)+1|0)/2|0)+(c[o>>2]|0)|0)%2|0|0)!=0?-F:F))*1.4142135381698608}F=+g[c[h>>2]>>2];a=_(c[x>>2]|0,c[w>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=((((((c[w>>2]|0)+1|0)/2|0)+(c[w>>2]|0)|0)%2|0|0)!=0?-F:F)*1.4142135381698608;c[q>>2]=(c[q>>2]|0)+1;c[e>>2]=(c[e>>2]|0)+(c[r>>2]<<2);c[f>>2]=(c[f>>2]|0)+(c[y>>2]<<2)}xb(c[h>>2]|0);i=D;return}function xx(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);i=d;return}function yx(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;h=d+24|0;j=d+20|0;f=d+16|0;c[h>>2]=a;c[j>>2]=b;c[f>>2]=c[h>>2];b=c[c[j>>2]>>2]|0;a=c[j>>2]|0;j=En(c[(c[f>>2]|0)+92>>2]|0)|0;h=c[(c[f>>2]|0)+76>>2]|0;g=c[(c[f>>2]|0)+80>>2]|0;f=c[(c[f>>2]|0)+64>>2]|0;c[e>>2]=j;c[e+4>>2]=h;c[e+8>>2]=g;c[e+12>>2]=f;eb[b&63](a,29555,e);i=d;return}function zx(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function Ax(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;f=e+4|0;d=e;c[e+8>>2]=a;c[f>>2]=b;c[d>>2]=c[f>>2];if(((c[c[(c[d>>2]|0)+4>>2]>>2]|0)==1?(c[c[(c[d>>2]|0)+8>>2]>>2]|0)<=1:0)?((c[(c[(c[d>>2]|0)+4>>2]|0)+4>>2]|0)%2|0|0)==1:0)if((c[(c[d>>2]|0)+20>>2]|0)==12)a=1;else a=(c[(c[d>>2]|0)+20>>2]|0)==16;else a=0;i=e;return a&1|0}function Bx(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,Cx()|0);i=b;return}function Cx(){var a=0,b=0;b=i;i=i+16|0;a=b;c[a>>2]=zd(8,16308)|0;i=b;return c[a>>2]|0}function Dx(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;p=i;i=i+80|0;e=p+64|0;q=p+60|0;f=p+56|0;g=p+52|0;o=p+48|0;n=p+44|0;k=p+40|0;j=p+36|0;l=p+32|0;m=p;c[q>>2]=a;c[f>>2]=b;c[g>>2]=d;if(!(Ex(c[q>>2]|0,c[f>>2]|0,c[g>>2]|0)|0)){c[e>>2]=0;b=c[e>>2]|0;i=p;return b|0}c[n>>2]=c[f>>2];c[l>>2]=c[(c[(c[n>>2]|0)+4>>2]|0)+4>>2];c[j>>2]=wb(c[l>>2]<<2)|0;f=c[g>>2]|0;a=Ed((c[l>>2]|0)/2|0,1,1)|0;b=Ed(2,(c[l>>2]|0)/2|0,(c[l>>2]|0)/2|0)|0;c[k>>2]=uc(f,In(a,b,c[j>>2]|0,c[j>>2]|0,0)|0)|0;xb(c[j>>2]|0);if(!(c[k>>2]|0)){c[e>>2]=0;b=c[e>>2]|0;i=p;return b|0}c[o>>2]=sn(104,16320,(c[(c[n>>2]|0)+20>>2]|0)==12?59:58)|0;c[(c[o>>2]|0)+84>>2]=c[l>>2];c[(c[o>>2]|0)+76>>2]=c[(c[(c[n>>2]|0)+4>>2]|0)+4+4>>2];c[(c[o>>2]|0)+80>>2]=c[(c[(c[n>>2]|0)+4>>2]|0)+4+8>>2];c[(c[o>>2]|0)+64>>2]=c[k>>2];c[(c[o>>2]|0)+72>>2]=0;c[(c[o>>2]|0)+68>>2]=0;c[(c[o>>2]|0)+100>>2]=c[(c[n>>2]|0)+20>>2];ke(c[(c[n>>2]|0)+8>>2]|0,(c[o>>2]|0)+88|0,(c[o>>2]|0)+92|0,(c[o>>2]|0)+96|0)|0;fc(m);h[m>>3]=+(2+(((((c[l>>2]|0)/2|0)-1|0)/2|0)*20|0)|0);h[m+8>>3]=+(6+(((((c[l>>2]|0)/2|0)-1|0)/2|0)<<4)|0);h[m+24>>3]=+((c[l>>2]<<2)+2+(((((c[l>>2]|0)/2|0)-1|0)/2|0)*6|0)|0);if(!(((c[l>>2]|0)/2|0|0)%2|0)){h[m>>3]=+h[m>>3]+4.0;b=m+8|0;h[b>>3]=+h[b>>3]+8.0;b=m+24|0;h[b>>3]=+h[b>>3]+4.0}fc((c[o>>2]|0)+8|0);lc(c[(c[o>>2]|0)+88>>2]|0,m,(c[o>>2]|0)+8|0);lc(c[(c[o>>2]|0)+88>>2]|0,(c[k>>2]|0)+8|0,(c[o>>2]|0)+8|0);c[e>>2]=c[o>>2];b=c[e>>2]|0;i=p;return b|0}function Ex(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;g=i;i=i+16|0;e=g+8|0;f=g+4|0;h=g;c[e>>2]=a;c[f>>2]=b;c[h>>2]=d;if(c[(c[h>>2]|0)+164>>2]&8){b=0;b=b&1;i=g;return b|0}b=(Kx(c[e>>2]|0,c[f>>2]|0)|0)!=0;b=b&1;i=g;return b|0}function Fx(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0;ia=i;i=i+224|0;ja=ia+220|0;e=ia+216|0;f=ia+212|0;C=ia+208|0;E=ia+204|0;M=ia+200|0;D=ia+196|0;K=ia+192|0;L=ia+188|0;F=ia+184|0;Y=ia+180|0;G=ia+176|0;N=ia+172|0;h=ia+168|0;j=ia+164|0;A=ia+160|0;H=ia+156|0;k=ia+152|0;u=ia+148|0;m=ia+144|0;x=ia+140|0;O=ia+136|0;T=ia+132|0;P=ia+128|0;V=ia+124|0;Z=ia+120|0;da=ia+116|0;s=ia+112|0;q=ia+108|0;t=ia+104|0;r=ia+100|0;S=ia+96|0;X=ia+92|0;B=ia+88|0;ca=ia+84|0;ha=ia+80|0;p=ia+76|0;v=ia+72|0;I=ia+68|0;Q=ia+64|0;U=ia+60|0;R=ia+56|0;W=ia+52|0;$=ia+48|0;ea=ia+44|0;l=ia+40|0;w=ia+36|0;aa=ia+32|0;fa=ia+28|0;n=ia+24|0;y=ia+20|0;J=ia+16|0;ba=ia+12|0;ga=ia+8|0;o=ia+4|0;z=ia;c[ja>>2]=a;c[e>>2]=b;c[f>>2]=d;c[C>>2]=c[ja>>2];c[E>>2]=c[(c[C>>2]|0)+76>>2];c[M>>2]=c[(c[C>>2]|0)+80>>2];c[K>>2]=c[(c[C>>2]|0)+84>>2];c[L>>2]=(c[K>>2]|0)/2|0;c[Y>>2]=c[(c[C>>2]|0)+88>>2];c[G>>2]=c[(c[C>>2]|0)+92>>2];c[N>>2]=c[(c[C>>2]|0)+96>>2];c[h>>2]=c[c[(c[C>>2]|0)+68>>2]>>2];c[A>>2]=wb(c[K>>2]<<2)|0;c[F>>2]=0;while(1){if((c[F>>2]|0)>=(c[Y>>2]|0))break;g[c[A>>2]>>2]=+g[c[e>>2]>>2]*2.0;b=_(c[E>>2]|0,(c[K>>2]|0)-1|0)|0;g[(c[A>>2]|0)+(c[L>>2]<<2)>>2]=+g[(c[e>>2]|0)+(b<<2)>>2]*2.0;c[D>>2]=1;while(1){b=(c[D>>2]|0)+(c[D>>2]|0)|0;if(((c[D>>2]|0)+(c[D>>2]|0)|0)>=(c[L>>2]|0))break;c[H>>2]=b;b=_(c[E>>2]|0,(c[H>>2]|0)-1|0)|0;g[O>>2]=+g[(c[e>>2]|0)+(b<<2)>>2];b=_(c[E>>2]|0,c[H>>2]|0)|0;g[T>>2]=+g[(c[e>>2]|0)+(b<<2)>>2];g[k>>2]=+g[O>>2]+ +g[T>>2];g[x>>2]=+g[O>>2]-+g[T>>2];b=_(c[E>>2]|0,(c[K>>2]|0)-(c[H>>2]|0)-1|0)|0;g[P>>2]=+g[(c[e>>2]|0)+(b<<2)>>2];b=_(c[E>>2]|0,(c[K>>2]|0)-(c[H>>2]|0)|0)|0;g[V>>2]=+g[(c[e>>2]|0)+(b<<2)>>2];g[u>>2]=+g[P>>2]+ +g[V>>2];g[m>>2]=+g[P>>2]-+g[V>>2];g[Z>>2]=+g[(c[h>>2]|0)+(c[D>>2]<<1<<2)>>2];g[da>>2]=+g[(c[h>>2]|0)+((c[D>>2]<<1)+1<<2)>>2];g[s>>2]=+g[k>>2]+ +g[u>>2];g[q>>2]=+g[k>>2]-+g[u>>2];g[(c[A>>2]|0)+(c[D>>2]<<2)>>2]=+g[Z>>2]*+g[q>>2]+ +g[da>>2]*+g[s>>2];g[(c[A>>2]|0)+((c[L>>2]|0)-(c[D>>2]|0)<<2)>>2]=+g[Z>>2]*+g[s>>2]-+g[da>>2]*+g[q>>2];g[t>>2]=+g[m>>2]+ +g[x>>2];g[r>>2]=+g[m>>2]-+g[x>>2];g[(c[A>>2]|0)+((c[L>>2]|0)+(c[D>>2]|0)<<2)>>2]=+g[Z>>2]*+g[r>>2]+ +g[da>>2]*+g[t>>2];g[(c[A>>2]|0)+((c[K>>2]|0)-(c[D>>2]|0)<<2)>>2]=+g[Z>>2]*+g[t>>2]-+g[da>>2]*+g[r>>2];c[D>>2]=(c[D>>2]|0)+1}if((b|0)==(c[L>>2]|0)){b=_(c[E>>2]|0,(c[L>>2]|0)-1|0)|0;g[S>>2]=+g[(c[e>>2]|0)+(b<<2)>>2];b=_(c[E>>2]|0,c[L>>2]|0)|0;g[X>>2]=+g[(c[e>>2]|0)+(b<<2)>>2];g[(c[A>>2]|0)+(c[D>>2]<<2)>>2]=(+g[S>>2]+ +g[X>>2])*(+g[(c[h>>2]|0)+(c[D>>2]<<1<<2)>>2]*2.0);g[(c[A>>2]|0)+((c[K>>2]|0)-(c[D>>2]|0)<<2)>>2]=(+g[S>>2]-+g[X>>2])*(+g[(c[h>>2]|0)+(c[D>>2]<<1<<2)>>2]*2.0)}c[B>>2]=c[(c[C>>2]|0)+64>>2];eb[c[(c[B>>2]|0)+56>>2]&63](c[B>>2]|0,c[A>>2]|0,c[A>>2]|0);c[j>>2]=c[c[(c[C>>2]|0)+72>>2]>>2];g[ca>>2]=+g[c[j>>2]>>2];g[ha>>2]=+g[(c[j>>2]|0)+4>>2];g[p>>2]=+g[c[A>>2]>>2];g[v>>2]=+g[(c[A>>2]|0)+(c[L>>2]<<2)>>2];g[c[f>>2]>>2]=+g[ca>>2]*+g[p>>2]+ +g[ha>>2]*+g[v>>2];b=_(c[M>>2]|0,(c[K>>2]|0)-1|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[ha>>2]*+g[p>>2]-+g[ca>>2]*+g[v>>2];c[j>>2]=(c[j>>2]|0)+8;c[D>>2]=1;while(1){b=c[D>>2]|0;if(((c[D>>2]|0)+(c[D>>2]|0)|0)>=(c[L>>2]|0))break;g[Q>>2]=+g[(c[A>>2]|0)+(b<<2)>>2];g[U>>2]=+g[(c[A>>2]|0)+((c[L>>2]|0)-(c[D>>2]|0)<<2)>>2];g[R>>2]=+g[(c[A>>2]|0)+((c[L>>2]|0)+(c[D>>2]|0)<<2)>>2];g[W>>2]=+g[(c[A>>2]|0)+((c[K>>2]|0)-(c[D>>2]|0)<<2)>>2];c[I>>2]=(c[D>>2]|0)+(c[D>>2]|0)-1;g[$>>2]=+g[c[j>>2]>>2];g[ea>>2]=+g[(c[j>>2]|0)+4>>2];g[l>>2]=+g[Q>>2]-+g[U>>2];g[w>>2]=+g[W>>2]-+g[R>>2];b=_(c[M>>2]|0,c[I>>2]|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[$>>2]*+g[l>>2]+ +g[ea>>2]*+g[w>>2];b=_(c[M>>2]|0,(c[K>>2]|0)-1-(c[I>>2]|0)|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[ea>>2]*+g[l>>2]-+g[$>>2]*+g[w>>2];c[I>>2]=(c[I>>2]|0)+1;c[j>>2]=(c[j>>2]|0)+8;g[aa>>2]=+g[c[j>>2]>>2];g[fa>>2]=+g[(c[j>>2]|0)+4>>2];g[n>>2]=+g[Q>>2]+ +g[U>>2];g[y>>2]=+g[R>>2]+ +g[W>>2];b=_(c[M>>2]|0,c[I>>2]|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[aa>>2]*+g[n>>2]+ +g[fa>>2]*+g[y>>2];b=_(c[M>>2]|0,(c[K>>2]|0)-1-(c[I>>2]|0)|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[fa>>2]*+g[n>>2]-+g[aa>>2]*+g[y>>2];c[D>>2]=(c[D>>2]|0)+1;c[j>>2]=(c[j>>2]|0)+8}if((b+(c[D>>2]|0)|0)==(c[L>>2]|0)){c[J>>2]=(c[D>>2]|0)+(c[D>>2]|0)-1;g[ba>>2]=+g[c[j>>2]>>2];g[ga>>2]=+g[(c[j>>2]|0)+4>>2];g[o>>2]=+g[(c[A>>2]|0)+(c[D>>2]<<2)>>2];g[z>>2]=+g[(c[A>>2]|0)+((c[L>>2]|0)+(c[D>>2]|0)<<2)>>2];b=_(c[M>>2]|0,c[J>>2]|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[ba>>2]*+g[o>>2]-+g[ga>>2]*+g[z>>2];b=_(c[M>>2]|0,(c[K>>2]|0)-1-(c[J>>2]|0)|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[ga>>2]*+g[o>>2]+ +g[ba>>2]*+g[z>>2]}c[F>>2]=(c[F>>2]|0)+1;c[e>>2]=(c[e>>2]|0)+(c[G>>2]<<2);c[f>>2]=(c[f>>2]|0)+(c[N>>2]<<2)}xb(c[A>>2]|0);i=ia;return}function Gx(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0;ia=i;i=i+224|0;ja=ia+220|0;e=ia+216|0;f=ia+212|0;C=ia+208|0;E=ia+204|0;M=ia+200|0;D=ia+196|0;K=ia+192|0;L=ia+188|0;F=ia+184|0;Y=ia+180|0;G=ia+176|0;N=ia+172|0;h=ia+168|0;j=ia+164|0;A=ia+160|0;H=ia+156|0;k=ia+152|0;u=ia+148|0;m=ia+144|0;x=ia+140|0;O=ia+136|0;T=ia+132|0;P=ia+128|0;V=ia+124|0;Z=ia+120|0;da=ia+116|0;s=ia+112|0;q=ia+108|0;t=ia+104|0;r=ia+100|0;S=ia+96|0;X=ia+92|0;B=ia+88|0;ca=ia+84|0;ha=ia+80|0;p=ia+76|0;v=ia+72|0;I=ia+68|0;Q=ia+64|0;U=ia+60|0;R=ia+56|0;W=ia+52|0;$=ia+48|0;ea=ia+44|0;l=ia+40|0;w=ia+36|0;aa=ia+32|0;fa=ia+28|0;n=ia+24|0;y=ia+20|0;J=ia+16|0;ba=ia+12|0;ga=ia+8|0;o=ia+4|0;z=ia;c[ja>>2]=a;c[e>>2]=b;c[f>>2]=d;c[C>>2]=c[ja>>2];c[E>>2]=c[(c[C>>2]|0)+76>>2];c[M>>2]=c[(c[C>>2]|0)+80>>2];c[K>>2]=c[(c[C>>2]|0)+84>>2];c[L>>2]=(c[K>>2]|0)/2|0;c[Y>>2]=c[(c[C>>2]|0)+88>>2];c[G>>2]=c[(c[C>>2]|0)+92>>2];c[N>>2]=c[(c[C>>2]|0)+96>>2];c[h>>2]=c[c[(c[C>>2]|0)+68>>2]>>2];c[A>>2]=wb(c[K>>2]<<2)|0;c[F>>2]=0;while(1){if((c[F>>2]|0)>=(c[Y>>2]|0))break;b=_(c[E>>2]|0,(c[K>>2]|0)-1|0)|0;g[c[A>>2]>>2]=+g[(c[e>>2]|0)+(b<<2)>>2]*2.0;g[(c[A>>2]|0)+(c[L>>2]<<2)>>2]=+g[c[e>>2]>>2]*2.0;c[D>>2]=1;while(1){b=(c[D>>2]|0)+(c[D>>2]|0)|0;if(((c[D>>2]|0)+(c[D>>2]|0)|0)>=(c[L>>2]|0))break;c[H>>2]=b;b=_(c[E>>2]|0,(c[K>>2]|0)-(c[H>>2]|0)|0)|0;g[O>>2]=+g[(c[e>>2]|0)+(b<<2)>>2];b=_(c[E>>2]|0,(c[K>>2]|0)-1-(c[H>>2]|0)|0)|0;g[T>>2]=+g[(c[e>>2]|0)+(b<<2)>>2];g[k>>2]=+g[O>>2]+ +g[T>>2];g[x>>2]=+g[O>>2]-+g[T>>2];b=_(c[E>>2]|0,c[H>>2]|0)|0;g[P>>2]=+g[(c[e>>2]|0)+(b<<2)>>2];b=_(c[E>>2]|0,(c[H>>2]|0)-1|0)|0;g[V>>2]=+g[(c[e>>2]|0)+(b<<2)>>2];g[u>>2]=+g[P>>2]+ +g[V>>2];g[m>>2]=+g[P>>2]-+g[V>>2];g[Z>>2]=+g[(c[h>>2]|0)+(c[D>>2]<<1<<2)>>2];g[da>>2]=+g[(c[h>>2]|0)+((c[D>>2]<<1)+1<<2)>>2];g[s>>2]=+g[k>>2]+ +g[u>>2];g[q>>2]=+g[k>>2]-+g[u>>2];g[(c[A>>2]|0)+(c[D>>2]<<2)>>2]=+g[Z>>2]*+g[q>>2]+ +g[da>>2]*+g[s>>2];g[(c[A>>2]|0)+((c[L>>2]|0)-(c[D>>2]|0)<<2)>>2]=+g[Z>>2]*+g[s>>2]-+g[da>>2]*+g[q>>2];g[t>>2]=+g[m>>2]+ +g[x>>2];g[r>>2]=+g[m>>2]-+g[x>>2];g[(c[A>>2]|0)+((c[L>>2]|0)+(c[D>>2]|0)<<2)>>2]=+g[Z>>2]*+g[r>>2]+ +g[da>>2]*+g[t>>2];g[(c[A>>2]|0)+((c[K>>2]|0)-(c[D>>2]|0)<<2)>>2]=+g[Z>>2]*+g[t>>2]-+g[da>>2]*+g[r>>2];c[D>>2]=(c[D>>2]|0)+1}if((b|0)==(c[L>>2]|0)){b=_(c[E>>2]|0,c[L>>2]|0)|0;g[S>>2]=+g[(c[e>>2]|0)+(b<<2)>>2];b=_(c[E>>2]|0,(c[L>>2]|0)-1|0)|0;g[X>>2]=+g[(c[e>>2]|0)+(b<<2)>>2];g[(c[A>>2]|0)+(c[D>>2]<<2)>>2]=(+g[S>>2]+ +g[X>>2])*(+g[(c[h>>2]|0)+(c[D>>2]<<1<<2)>>2]*2.0);g[(c[A>>2]|0)+((c[K>>2]|0)-(c[D>>2]|0)<<2)>>2]=(+g[S>>2]-+g[X>>2])*(+g[(c[h>>2]|0)+(c[D>>2]<<1<<2)>>2]*2.0)}c[B>>2]=c[(c[C>>2]|0)+64>>2];eb[c[(c[B>>2]|0)+56>>2]&63](c[B>>2]|0,c[A>>2]|0,c[A>>2]|0);c[j>>2]=c[c[(c[C>>2]|0)+72>>2]>>2];g[ca>>2]=+g[c[j>>2]>>2];g[ha>>2]=+g[(c[j>>2]|0)+4>>2];g[p>>2]=+g[c[A>>2]>>2];g[v>>2]=+g[(c[A>>2]|0)+(c[L>>2]<<2)>>2];g[c[f>>2]>>2]=+g[ca>>2]*+g[p>>2]+ +g[ha>>2]*+g[v>>2];b=_(c[M>>2]|0,(c[K>>2]|0)-1|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[ca>>2]*+g[v>>2]-+g[ha>>2]*+g[p>>2];c[j>>2]=(c[j>>2]|0)+8;c[D>>2]=1;while(1){b=c[D>>2]|0;if(((c[D>>2]|0)+(c[D>>2]|0)|0)>=(c[L>>2]|0))break;g[Q>>2]=+g[(c[A>>2]|0)+(b<<2)>>2];g[U>>2]=+g[(c[A>>2]|0)+((c[L>>2]|0)-(c[D>>2]|0)<<2)>>2];g[R>>2]=+g[(c[A>>2]|0)+((c[L>>2]|0)+(c[D>>2]|0)<<2)>>2];g[W>>2]=+g[(c[A>>2]|0)+((c[K>>2]|0)-(c[D>>2]|0)<<2)>>2];c[I>>2]=(c[D>>2]|0)+(c[D>>2]|0)-1;g[$>>2]=+g[c[j>>2]>>2];g[ea>>2]=+g[(c[j>>2]|0)+4>>2];g[l>>2]=+g[U>>2]-+g[Q>>2];g[w>>2]=+g[R>>2]-+g[W>>2];b=_(c[M>>2]|0,c[I>>2]|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[$>>2]*+g[l>>2]+ +g[ea>>2]*+g[w>>2];b=_(c[M>>2]|0,(c[K>>2]|0)-1-(c[I>>2]|0)|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[$>>2]*+g[w>>2]-+g[ea>>2]*+g[l>>2];c[I>>2]=(c[I>>2]|0)+1;c[j>>2]=(c[j>>2]|0)+8;g[aa>>2]=+g[c[j>>2]>>2];g[fa>>2]=+g[(c[j>>2]|0)+4>>2];g[n>>2]=+g[Q>>2]+ +g[U>>2];g[y>>2]=+g[R>>2]+ +g[W>>2];b=_(c[M>>2]|0,c[I>>2]|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[aa>>2]*+g[n>>2]+ +g[fa>>2]*+g[y>>2];b=_(c[M>>2]|0,(c[K>>2]|0)-1-(c[I>>2]|0)|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[aa>>2]*+g[y>>2]-+g[fa>>2]*+g[n>>2];c[D>>2]=(c[D>>2]|0)+1;c[j>>2]=(c[j>>2]|0)+8}if((b+(c[D>>2]|0)|0)==(c[L>>2]|0)){c[J>>2]=(c[D>>2]|0)+(c[D>>2]|0)-1;g[ba>>2]=+g[c[j>>2]>>2];g[ga>>2]=+g[(c[j>>2]|0)+4>>2];g[o>>2]=+g[(c[A>>2]|0)+(c[D>>2]<<2)>>2];g[z>>2]=+g[(c[A>>2]|0)+((c[L>>2]|0)+(c[D>>2]|0)<<2)>>2];b=_(c[M>>2]|0,c[J>>2]|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[ga>>2]*+g[z>>2]-+g[ba>>2]*+g[o>>2];b=_(c[M>>2]|0,(c[K>>2]|0)-1-(c[J>>2]|0)|0)|0;g[(c[f>>2]|0)+(b<<2)>>2]=+g[ba>>2]*+g[z>>2]+ +g[ga>>2]*+g[o>>2]}c[F>>2]=(c[F>>2]|0)+1;c[e>>2]=(c[e>>2]|0)+(c[G>>2]<<2);c[f>>2]=(c[f>>2]|0)+(c[N>>2]<<2)}xb(c[A>>2]|0);i=ia;return}function Hx(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;f=d+4|0;e=d;c[g>>2]=a;c[f>>2]=b;c[e>>2]=c[g>>2];rc(c[(c[e>>2]|0)+64>>2]|0,c[f>>2]|0);Me(c[f>>2]|0,(c[e>>2]|0)+68|0,19300,c[(c[e>>2]|0)+84>>2]<<1,1,((c[(c[e>>2]|0)+84>>2]|0)/4|0)+1|0);Me(c[f>>2]|0,(c[e>>2]|0)+72|0,19312,c[(c[e>>2]|0)+84>>2]<<3,1,c[(c[e>>2]|0)+84>>2]|0);i=d;return}function Ix(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;h=d+24|0;j=d+20|0;f=d+16|0;c[h>>2]=a;c[j>>2]=b;c[f>>2]=c[h>>2];b=c[c[j>>2]>>2]|0;a=c[j>>2]|0;j=En(c[(c[f>>2]|0)+100>>2]|0)|0;h=c[(c[f>>2]|0)+84>>2]|0;g=c[(c[f>>2]|0)+88>>2]|0;f=c[(c[f>>2]|0)+64>>2]|0;c[e>>2]=j;c[e+4>>2]=h;c[e+8>>2]=g;c[e+12>>2]=f;eb[b&63](a,29581,e);i=d;return}function Jx(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function Kx(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;f=e+4|0;d=e;c[e+8>>2]=a;c[f>>2]=b;c[d>>2]=c[f>>2];if(((c[c[(c[d>>2]|0)+4>>2]>>2]|0)==1?(c[c[(c[d>>2]|0)+8>>2]>>2]|0)<=1:0)?((c[(c[(c[d>>2]|0)+4>>2]|0)+4>>2]|0)%2|0|0)==0:0)if((c[(c[d>>2]|0)+20>>2]|0)==12)a=1;else a=(c[(c[d>>2]|0)+20>>2]|0)==16;else a=0;i=e;return a&1|0}function Lx(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,Mx()|0);i=b;return}function Mx(){var a=0,b=0;b=i;i=i+16|0;a=b;c[a>>2]=zd(8,16336)|0;i=b;return c[a>>2]|0}function Nx(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0;t=i;i=i+96|0;e=t+80|0;u=t+76|0;f=t+72|0;g=t+68|0;r=t+64|0;q=t+60|0;k=t+56|0;l=t+52|0;j=t+48|0;n=t+44|0;s=t+40|0;m=t+36|0;p=t+32|0;o=t;c[u>>2]=a;c[f>>2]=b;c[g>>2]=d;c[k>>2]=0;c[j>>2]=0;if(((Ox(c[u>>2]|0,c[f>>2]|0,c[g>>2]|0)|0)!=0?(c[q>>2]=c[f>>2],c[n>>2]=(c[(c[(c[q>>2]|0)+4>>2]|0)+4>>2]|0)+1,c[j>>2]=wb(c[n>>2]<<1<<2)|0,b=c[g>>2]|0,f=Ed(c[n>>2]<<1,1,1)|0,a=Dd()|0,c[k>>2]=uc(b,In(f,a,c[j>>2]|0,c[j>>2]|0,0)|0)|0,(c[k>>2]|0)!=0):0)?(ke(c[(c[q>>2]|0)+8>>2]|0,s,m,p)|0,g=c[g>>2]|0,f=Dd()|0,a=Ed((c[n>>2]|0)-1|0,-1,c[(c[(c[q>>2]|0)+4>>2]|0)+4+8>>2]|0)|0,c[l>>2]=uc(g,In(f,a,(c[j>>2]|0)+(c[n>>2]<<1<<2)+-4|0,c[(c[q>>2]|0)+16>>2]|0,0)|0)|0,(c[l>>2]|0)!=0):0){xb(c[j>>2]|0);c[r>>2]=sn(96,16348,60)|0;c[(c[r>>2]|0)+76>>2]=c[n>>2];c[(c[r>>2]|0)+72>>2]=c[(c[(c[q>>2]|0)+4>>2]|0)+4+4>>2];c[(c[r>>2]|0)+64>>2]=c[k>>2];c[(c[r>>2]|0)+68>>2]=c[l>>2];c[(c[r>>2]|0)+80>>2]=c[s>>2];c[(c[r>>2]|0)+84>>2]=c[m>>2];c[(c[r>>2]|0)+88>>2]=c[p>>2];fc(o);h[o+24>>3]=+((c[n>>2]|0)-1+(c[n>>2]<<1)|0);fc((c[r>>2]|0)+8|0);lc(c[(c[r>>2]|0)+80>>2]|0,o,(c[r>>2]|0)+8|0);lc(c[(c[r>>2]|0)+80>>2]|0,(c[k>>2]|0)+8|0,(c[r>>2]|0)+8|0);lc(c[(c[r>>2]|0)+80>>2]|0,(c[l>>2]|0)+8|0,(c[r>>2]|0)+8|0);c[e>>2]=c[r>>2];o=c[e>>2]|0;i=t;return o|0}yb(c[j>>2]|0);if(c[k>>2]|0)pc(c[k>>2]|0);c[e>>2]=0;o=c[e>>2]|0;i=t;return o|0}function Ox(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;g=i;i=i+16|0;e=g+8|0;f=g+4|0;h=g;c[e>>2]=a;c[f>>2]=b;c[h>>2]=d;if(c[(c[h>>2]|0)+164>>2]&8){b=0;b=b&1;i=g;return b|0}b=(Tx(c[e>>2]|0,c[f>>2]|0)|0)!=0;b=b&1;i=g;return b|0}function Px(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0;u=i;i=i+64|0;v=u+56|0;e=u+52|0;f=u+48|0;m=u+44|0;o=u+40|0;n=u+36|0;r=u+32|0;p=u+28|0;t=u+24|0;q=u+20|0;s=u+16|0;j=u+12|0;h=u+8|0;k=u+4|0;l=u;c[v>>2]=a;c[e>>2]=b;c[f>>2]=d;c[m>>2]=c[v>>2];c[o>>2]=c[(c[m>>2]|0)+72>>2];c[r>>2]=c[(c[m>>2]|0)+76>>2];c[t>>2]=c[(c[m>>2]|0)+80>>2];c[q>>2]=c[(c[m>>2]|0)+84>>2];c[s>>2]=c[(c[m>>2]|0)+88>>2];c[j>>2]=wb(c[r>>2]<<1<<2)|0;c[p>>2]=0;while(1){b=c[j>>2]|0;if((c[p>>2]|0)>=(c[t>>2]|0))break;g[b>>2]=0.0;c[n>>2]=1;while(1){b=c[n>>2]|0;if((c[n>>2]|0)>=(c[r>>2]|0))break;b=_(b-1|0,c[o>>2]|0)|0;g[h>>2]=+g[(c[e>>2]|0)+(b<<2)>>2];g[(c[j>>2]|0)+(c[n>>2]<<2)>>2]=-+g[h>>2];g[(c[j>>2]|0)+((c[r>>2]<<1)-(c[n>>2]|0)<<2)>>2]=+g[h>>2];c[n>>2]=(c[n>>2]|0)+1}g[(c[j>>2]|0)+(b<<2)>>2]=0.0;c[k>>2]=c[(c[m>>2]|0)+64>>2];eb[c[(c[k>>2]|0)+56>>2]&63](c[k>>2]|0,c[j>>2]|0,c[j>>2]|0);c[l>>2]=c[(c[m>>2]|0)+68>>2];eb[c[(c[l>>2]|0)+56>>2]&63](c[l>>2]|0,(c[j>>2]|0)+(c[r>>2]<<1<<2)+-4|0,c[f>>2]|0);c[p>>2]=(c[p>>2]|0)+1;c[e>>2]=(c[e>>2]|0)+(c[q>>2]<<2);c[f>>2]=(c[f>>2]|0)+(c[s>>2]<<2)}xb(b);i=u;return}function Qx(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+68>>2]|0,c[e>>2]|0);i=d;return}function Rx(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;g=d+24|0;h=d+20|0;j=d+16|0;c[g>>2]=a;c[h>>2]=b;c[j>>2]=c[g>>2];b=c[c[h>>2]>>2]|0;a=c[h>>2]|0;h=c[(c[j>>2]|0)+80>>2]|0;g=c[(c[j>>2]|0)+64>>2]|0;f=c[(c[j>>2]|0)+68>>2]|0;c[e>>2]=(c[(c[j>>2]|0)+76>>2]|0)-1;c[e+4>>2]=h;c[e+8>>2]=g;c[e+12>>2]=f;eb[b&63](a,29610,e);i=d;return}function Sx(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function Tx(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;f=e+4|0;d=e;c[e+8>>2]=a;c[f>>2]=b;c[d>>2]=c[f>>2];if((c[c[(c[d>>2]|0)+4>>2]>>2]|0)!=1){b=0;b=b&1;i=e;return b|0}if((c[c[(c[d>>2]|0)+8>>2]>>2]|0)>1){b=0;b=b&1;i=e;return b|0}b=(c[(c[d>>2]|0)+20>>2]|0)==13;b=b&1;i=e;return b|0}function Ux(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0;u=i;i=i+80|0;e=u+72|0;f=u+68|0;g=u+64|0;l=u+60|0;q=u+56|0;k=u+52|0;s=u+48|0;o=u+32|0;m=u+28|0;n=u+24|0;p=u;j=u+16|0;r=u+12|0;t=u+8|0;c[e>>2]=a;c[f>>2]=b;c[g>>2]=d;c[l>>2]=0;c[s>>2]=my()|0;c[o>>2]=c[4091];c[o+4>>2]=c[4092];c[o+8>>2]=c[4093];c[o+12>>2]=c[4094];h[p>>3]=0.0;b=c[f>>2]|0;a:do if(c[f>>2]&2097152){c[k>>2]=b;c[q>>2]=Wx(c[s>>2]|0,c[f>>2]|0,c[g>>2]|0,0,1)|0}else{if(!(b&64))if(c[f>>2]&8)b=3;else b=(c[f>>2]&32|0)!=0?2:1;else b=0;c[n>>2]=b;c[m>>2]=+h[(c[s>>2]|0)+184>>3]>=0.0?0:c[n>>2]|0;c[f>>2]=c[f>>2]&-105;b=(c[s>>2]|0)+172|0;ne(j);c[b>>2]=c[j>>2];c[b+4>>2]=c[j+4>>2];c[q>>2]=0;c[k>>2]=0;while(1){if((c[m>>2]|0)>(c[n>>2]|0))break a;c[t>>2]=c[f>>2]|c[o+(c[m>>2]<<2)>>2];c[r>>2]=Xx(c[s>>2]|0,c[t>>2]|0,c[g>>2]|0,0)|0;if(!(c[r>>2]|0))break a;pc(c[q>>2]|0);c[q>>2]=c[r>>2];c[k>>2]=c[t>>2];h[p>>3]=+h[(c[q>>2]|0)+40>>3];c[m>>2]=(c[m>>2]|0)+1}}while(0);if(c[q>>2]|0){c[l>>2]=wb(12)|0;c[(c[l>>2]|0)+4>>2]=c[g>>2];c[(c[l>>2]|0)+8>>2]=c[e>>2];k=Xx(c[s>>2]|0,c[k>>2]|0,c[g>>2]|0,1)|0;c[c[l>>2]>>2]=k;h[(c[c[l>>2]>>2]|0)+40>>3]=+h[p>>3];rc(c[c[l>>2]>>2]|0,2);pc(c[q>>2]|0);k=c[s>>2]|0;k=c[k>>2]|0;k=k+8|0;k=c[k>>2]|0;m=c[s>>2]|0;$a[k&127](m,0);m=c[l>>2]|0;i=u;return m|0}else{qd(c[g>>2]|0);k=c[s>>2]|0;k=c[k>>2]|0;k=k+8|0;k=c[k>>2]|0;m=c[s>>2]|0;$a[k&127](m,0);m=c[l>>2]|0;i=u;return m|0}return 0}function Vx(a){a=a|0;var b=0,d=0;d=i;i=i+16|0;b=d;c[b>>2]=a;if(!(c[b>>2]|0)){i=d;return}rc(c[c[b>>2]>>2]|0,0);pc(c[c[b>>2]>>2]|0);qd(c[(c[b>>2]|0)+4>>2]|0);xb(c[b>>2]|0);i=d;return}function Wx(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0;g=i;i=i+32|0;j=g+16|0;m=g+12|0;h=g+8|0;l=g+4|0;k=g;c[j>>2]=a;c[m>>2]=b;c[h>>2]=d;c[l>>2]=e;c[k>>2]=f;$x(c[j>>2]|0,c[m>>2]|0);d=(c[j>>2]|0)+164|0;a=d;b=c[a>>2]|0;a=c[a+4>>2]|0;e=Xy(c[l>>2]&7|0,0,20)|0;c[d>>2]=b&-7340033|e;c[d+4>>2]=a|C;c[(c[j>>2]|0)+76>>2]=c[k>>2];d=jb[c[(c[c[j>>2]>>2]|0)+4>>2]&15](c[j>>2]|0,c[h>>2]|0)|0;i=g;return d|0}function Xx(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0;l=i;i=i+32|0;f=l+16|0;g=l+12|0;h=l+8|0;j=l+4|0;k=l;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[k>>2]=Wx(c[f>>2]|0,c[g>>2]|0,c[h>>2]|0,c[j>>2]|0,0)|0;if(!((c[k>>2]|0)!=0?1:(c[(c[f>>2]|0)+76>>2]|0)!=0)){e=c[f>>2]|0;b=Yx(c[g>>2]|0)|0;c[k>>2]=Wx(e,b,c[h>>2]|0,c[j>>2]|0,3)|0}if((c[(c[f>>2]|0)+76>>2]|0)!=2){j=c[k>>2]|0;i=l;return j|0}$a[c[(c[c[f>>2]>>2]|0)+8>>2]&127](c[f>>2]|0,1);c[k>>2]=Wx(c[f>>2]|0,c[g>>2]|0,c[h>>2]|0,c[j>>2]|0,0)|0;if((c[(c[f>>2]|0)+76>>2]|0)!=2){j=c[k>>2]|0;i=l;return j|0}$a[c[(c[c[f>>2]>>2]|0)+8>>2]&127](c[f>>2]|0,1);f=c[f>>2]|0;g=Yx(c[g>>2]|0)|0;c[k>>2]=Wx(f,g,c[h>>2]|0,c[j>>2]|0,4)|0;j=c[k>>2]|0;i=l;return j|0}function Yx(a){a=a|0;var b=0,d=0;d=i;i=i+16|0;b=d;c[b>>2]=a;c[b>>2]=c[b>>2]&-41;i=d;return c[b>>2]|64|0}function Zx(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;of(c[d>>2]|0);pk(c[d>>2]|0);Nw(c[d>>2]|0);i=b;return}function _x(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;d=b+4|0;e=b;c[d>>2]=a;c[e>>2]=c[c[d>>2]>>2];$a[c[c[c[e>>2]>>2]>>2]&127](c[e>>2]|0,c[(c[d>>2]|0)+4>>2]|0);i=b;return}function $x(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,j=0,k=0;d=i;i=i+32|0;f=d+16|0;g=d+12|0;j=d+8|0;k=d+4|0;e=d;c[f>>2]=a;c[g>>2]=b;ay(g,g,16380,7);c[k>>2]=0;c[j>>2]=0;ay(g,j,16492,10);ay(g,k,16652,24);b=(c[f>>2]|0)+164|0;a=b;g=c[a+4>>2]|0;c[b>>2]=c[a>>2]&-1048576|c[j>>2]&1048575;c[b+4>>2]=g;b=(c[f>>2]|0)+164|0;g=b;a=c[g+4>>2]&-1048576|(c[k>>2]|c[j>>2])&1048575;c[b>>2]=c[g>>2];c[b+4>>2]=a;c[e>>2]=by(+h[(c[f>>2]|0)+184>>3])|0;b=(c[f>>2]|0)+164|0;a=b;f=c[a>>2]|0;a=c[a+4>>2]|0;e=Xy(c[e>>2]&511|0,0,23)|0;c[b>>2]=f&8388607|e;c[b+4>>2]=a|C;i=d;return}function ay(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0;l=i;i=i+32|0;f=l+16|0;g=l+12|0;h=l+8|0;j=l+4|0;k=l;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[k>>2]=0;while(1){if((c[k>>2]|0)>=(c[j>>2]|0))break;if(c[c[f>>2]>>2]&c[(c[h>>2]|0)+(c[k>>2]<<4)>>2]^c[(c[h>>2]|0)+(c[k>>2]<<4)+4>>2])c[c[g>>2]>>2]=(c[c[g>>2]>>2]|c[(c[h>>2]|0)+(c[k>>2]<<4)+8>>2])^c[(c[h>>2]|0)+(c[k>>2]<<4)+8+4>>2];c[k>>2]=(c[k>>2]|0)+1}i=l;return}function by(a){a=+a;var b=0,d=0,e=0,f=0;f=i;i=i+48|0;b=f+32|0;d=f+16|0;e=f+24|0;h[d>>3]=a;h[f+8>>3]=31536.0e3;h[f>>3]=1.05;c[f+28>>2]=512;if(+h[d>>3]<0.0|+h[d>>3]>=31536.0e3){c[b>>2]=0;b=c[b>>2]|0;i=f;return b|0}if(+h[d>>3]<=1.0e-10){c[b>>2]=511;b=c[b>>2]|0;i=f;return b|0}else{d=~~(+Y(+(31536.0e3/+h[d>>3]))/+Y(1.05)+.5);c[e>>2]=d;d=(c[e>>2]|0)<0?0:d;c[e>>2]=d;c[e>>2]=(c[e>>2]|0)>=512?511:d;c[b>>2]=c[e>>2];b=c[b>>2]|0;i=f;return b|0}return 0}function cy(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;q=i;i=i+32|0;h=q+28|0;j=q+24|0;k=q+20|0;l=q+16|0;m=q+12|0;n=q+8|0;p=q+4|0;o=q;c[h>>2]=a;c[j>>2]=b;c[k>>2]=d;c[l>>2]=e;c[m>>2]=f;c[n>>2]=g;c[p>>2]=ge(c[h>>2]|0)|0;if(!((c[h>>2]|0)!=2147483647&(c[h>>2]|0)>0)){d=c[p>>2]|0;i=q;return d|0}c[(c[p>>2]|0)+4+(((c[h>>2]|0)-1|0)*12|0)+4>>2]=c[m>>2];c[(c[p>>2]|0)+4+(((c[h>>2]|0)-1|0)*12|0)+8>>2]=c[n>>2];c[(c[p>>2]|0)+4+(((c[h>>2]|0)-1|0)*12|0)>>2]=c[(c[j>>2]|0)+((c[h>>2]|0)-1<<2)>>2];c[o>>2]=(c[h>>2]|0)-1;while(1){if((c[o>>2]|0)<=0)break;d=_(c[(c[p>>2]|0)+4+((c[o>>2]|0)*12|0)+4>>2]|0,c[(c[k>>2]|0)+(c[o>>2]<<2)>>2]|0)|0;c[(c[p>>2]|0)+4+(((c[o>>2]|0)-1|0)*12|0)+4>>2]=d;d=_(c[(c[p>>2]|0)+4+((c[o>>2]|0)*12|0)+8>>2]|0,c[(c[l>>2]|0)+(c[o>>2]<<2)>>2]|0)|0;c[(c[p>>2]|0)+4+(((c[o>>2]|0)-1|0)*12|0)+8>>2]=d;c[(c[p>>2]|0)+4+(((c[o>>2]|0)-1|0)*12|0)>>2]=c[(c[j>>2]|0)+((c[o>>2]|0)-1<<2)>>2];c[o>>2]=(c[o>>2]|0)+-1}d=c[p>>2]|0;i=q;return d|0}function dy(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;g=i;i=i+16|0;e=g+8|0;f=g+4|0;h=g;c[e>>2]=a;c[f>>2]=b;c[h>>2]=d;if((c[h>>2]|0)<0){b=0;b=b&1;i=g;return b|0}b=(ey(c[e>>2]|0,c[f>>2]|0)|0)!=0;b=b&1;i=g;return b|0}function ey(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;h=i;i=i+16|0;g=h+12|0;d=h+8|0;e=h+4|0;f=h;c[d>>2]=a;c[e>>2]=b;do if((c[d>>2]|0)!=2147483647){if((c[d>>2]|0)<0){c[g>>2]=0;break}c[f>>2]=0;while(1){if((c[f>>2]|0)>=(c[d>>2]|0)){d=10;break}if((c[(c[e>>2]|0)+(c[f>>2]<<2)>>2]|0)<=0){d=8;break}c[f>>2]=(c[f>>2]|0)+1}if((d|0)==8){c[g>>2]=0;break}else if((d|0)==10){c[g>>2]=1;break}}else c[g>>2]=0;while(0);i=h;return c[g>>2]|0}function fy(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;f=i;i=i+16|0;k=f+12|0;j=f+8|0;h=f+4|0;g=f;c[k>>2]=a;c[j>>2]=b;c[h>>2]=d;c[g>>2]=e;e=gy(1,k,c[j>>2]|0,c[h>>2]|0,c[g>>2]|0)|0;i=f;return e|0}function gy(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0;g=i;i=i+32|0;m=g+16|0;l=g+12|0;k=g+8|0;j=g+4|0;h=g;c[m>>2]=a;c[l>>2]=b;c[k>>2]=d;c[j>>2]=e;c[h>>2]=f;e=jy(c[m>>2]|0,c[l>>2]|0,1,c[k>>2]|0,0,1,1,c[j>>2]|0,0,1,1,c[h>>2]|0)|0;i=g;return e|0}function hy(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;f=i;i=i+16|0;k=f+12|0;j=f+8|0;h=f+4|0;g=f;c[k>>2]=a;c[j>>2]=b;c[h>>2]=d;c[g>>2]=e;e=iy(1,k,c[j>>2]|0,c[h>>2]|0,c[g>>2]|0)|0;i=f;return e|0} +function mb(a){a=a|0;var b=0;b=i;i=i+a|0;i=i+15&-16;return b|0}function nb(){return i|0}function ob(a){a=a|0;i=a}function pb(a,b){a=a|0;b=b|0;i=a;j=b}function qb(a,b){a=a|0;b=b|0;if(!n){n=a;o=b}}function rb(b){b=b|0;a[k>>0]=a[b>>0];a[k+1>>0]=a[b+1>>0];a[k+2>>0]=a[b+2>>0];a[k+3>>0]=a[b+3>>0]}function sb(b){b=b|0;a[k>>0]=a[b>>0];a[k+1>>0]=a[b+1>>0];a[k+2>>0]=a[b+2>>0];a[k+3>>0]=a[b+3>>0];a[k+4>>0]=a[b+4>>0];a[k+5>>0]=a[b+5>>0];a[k+6>>0]=a[b+6>>0];a[k+7>>0]=a[b+7>>0]}function tb(a){a=a|0;C=a}function ub(){return C|0}function vb(a){a=a|0;var b=0;b=i;i=i+16|0;c[b>>2]=a;i=b;return 0}function wb(a){a=a|0;var b=0,d=0,e=0;d=i;i=i+16|0;e=d+4|0;b=d;c[e>>2]=a;c[e>>2]=(c[e>>2]|0)==0?1:a;c[b>>2]=Ub(c[e>>2]|0)|0;if(c[b>>2]|0){a=c[b>>2]|0;i=d;return a|0}zb(19324,269,19326);a=c[b>>2]|0;i=d;return a|0}function xb(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;Vb(c[d>>2]|0);i=b;return}function yb(a){a=a|0;var b=0,d=0;d=i;i=i+16|0;b=d;c[b>>2]=a;if(!(c[b>>2]|0)){i=d;return}xb(c[b>>2]|0);i=d;return}function zb(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;f=i;i=i+32|0;e=f;g=f+20|0;h=f+16|0;f=f+12|0;c[g>>2]=a;c[h>>2]=b;c[f>>2]=d;yy(c[4272]|0)|0;d=c[4271]|0;a=c[h>>2]|0;b=c[g>>2]|0;c[e>>2]=c[f>>2];c[e+4>>2]=a;c[e+8>>2]=b;wy(d,19334,e)|0;Ba()}function Ab(a,b){a=a|0;b=b|0;var d=0;d=i;i=i+16|0;c[d+4>>2]=a;c[d>>2]=b;i=d;return}function Bb(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0;k=i;i=i+32|0;h=k+24|0;l=k+20|0;e=k+16|0;m=k+12|0;f=k+8|0;j=k+4|0;g=k;c[l>>2]=a;c[e>>2]=b;c[m>>2]=d;c[m>>2]=(c[m>>2]|0)!=0?d:256;d=c[m>>2]|0;b=c[e>>2]|0;c[j>>2]=ec(d,ec(b,dc(1,65536/(c[l>>2]|0)|0)|0)|0)|0;c[g>>2]=dc(1,(c[j>>2]|0)/4|0)|0;c[f>>2]=c[j>>2];while(1){if((c[f>>2]|0)<(c[g>>2]|0)){a=6;break}d=c[f>>2]|0;if(!((c[e>>2]|0)%(c[f>>2]|0)|0)){a=4;break}c[f>>2]=d+-1}if((a|0)==4){c[h>>2]=d;l=c[h>>2]|0;i=k;return l|0}else if((a|0)==6){c[h>>2]=c[j>>2];l=c[h>>2]|0;i=k;return l|0}return 0}function Cb(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;f=i;i=i+16|0;d=f+8|0;e=f+4|0;g=f;c[e>>2]=a;c[g>>2]=b;a=c[e>>2]|0;if((c[g>>2]|0)==1){c[d>>2]=a;e=c[d>>2]|0;i=f;return e|0}else{c[d>>2]=a+(ld(6-(c[e>>2]|0)|0,8)|0);e=c[d>>2]|0;i=f;return e|0}return 0}function Db(a){a=a|0;var b=0,d=0;d=i;i=i+16|0;b=d;c[b>>2]=a;i=d;return (c[b>>2]|0)>65536|0}function Eb(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0;n=i;i=i+32|0;m=n+24|0;g=n+20|0;h=n+16|0;j=n+12|0;k=n+8|0;l=n;c[g>>2]=a;c[h>>2]=b;c[j>>2]=d;c[k>>2]=e;c[n+4>>2]=f;c[l>>2]=0;while(1){if((c[l>>2]|0)>=(c[j>>2]|0)){g=6;break}d=Bb(c[g>>2]|0,c[h>>2]|0,c[(c[k>>2]|0)+(c[l>>2]<<2)>>2]|0)|0;if((d|0)==(Bb(c[g>>2]|0,c[h>>2]|0,c[(c[k>>2]|0)+(c[j>>2]<<2)>>2]|0)|0)){g=4;break}c[l>>2]=(c[l>>2]|0)+1}if((g|0)==4){c[m>>2]=1;d=c[m>>2]|0;i=n;return d|0}else if((g|0)==6){c[m>>2]=0;d=c[m>>2]|0;i=n;return d|0}return 0}function Fb(a,b,d,e,f,h){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;var j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0;z=i;i=i+64|0;q=z+56|0;r=z+52|0;s=z+48|0;t=z+44|0;u=z+40|0;v=z+36|0;w=z+32|0;x=z+28|0;j=z+24|0;l=z+20|0;k=z+16|0;m=z+12|0;n=z+8|0;o=z+4|0;y=z;c[q>>2]=a;c[r>>2]=b;c[s>>2]=d;c[t>>2]=e;c[u>>2]=f;c[v>>2]=h;a:do switch(c[v>>2]|0){case 1:{if(!((c[s>>2]&1|0)!=0|(c[t>>2]|0)!=1|(c[u>>2]|0)!=1)){c[s>>2]=(c[s>>2]|0)/2|0;c[t>>2]=2;c[u>>2]=2;p=6;break a}while(1){if((c[s>>2]|0)<=0)break;g[c[r>>2]>>2]=+g[c[q>>2]>>2];c[s>>2]=(c[s>>2]|0)+-1;c[q>>2]=(c[q>>2]|0)+(c[t>>2]<<2);c[r>>2]=(c[r>>2]|0)+(c[u>>2]<<2)}i=z;return}case 2:{p=6;break}case 4:break;default:{c[w>>2]=0;while(1){if((c[w>>2]|0)>=(c[s>>2]|0))break;c[x>>2]=0;while(1){a=c[w>>2]|0;if((c[x>>2]|0)>=(c[v>>2]|0))break;p=_(a,c[t>>2]|0)|0;g[y>>2]=+g[(c[q>>2]|0)+(p+(c[x>>2]|0)<<2)>>2];p=_(c[w>>2]|0,c[u>>2]|0)|0;g[(c[r>>2]|0)+(p+(c[x>>2]|0)<<2)>>2]=+g[y>>2];c[x>>2]=(c[x>>2]|0)+1}c[w>>2]=a+1}i=z;return}}while(0);do if((p|0)==6){if(!((c[s>>2]&1|0)!=0|(c[t>>2]|0)!=2|(c[u>>2]|0)!=2)){c[s>>2]=(c[s>>2]|0)/2|0;c[t>>2]=4;c[u>>2]=4;break}while(1){if((c[s>>2]|0)<=0)break;g[j>>2]=+g[c[q>>2]>>2];g[l>>2]=+g[(c[q>>2]|0)+4>>2];g[c[r>>2]>>2]=+g[j>>2];g[(c[r>>2]|0)+4>>2]=+g[l>>2];c[s>>2]=(c[s>>2]|0)+-1;c[q>>2]=(c[q>>2]|0)+(c[t>>2]<<2);c[r>>2]=(c[r>>2]|0)+(c[u>>2]<<2)}i=z;return}while(0);while(1){if((c[s>>2]|0)<=0)break;g[k>>2]=+g[c[q>>2]>>2];g[m>>2]=+g[(c[q>>2]|0)+4>>2];g[n>>2]=+g[(c[q>>2]|0)+8>>2];g[o>>2]=+g[(c[q>>2]|0)+12>>2];g[c[r>>2]>>2]=+g[k>>2];g[(c[r>>2]|0)+4>>2]=+g[m>>2];g[(c[r>>2]|0)+8>>2]=+g[n>>2];g[(c[r>>2]|0)+12>>2]=+g[o>>2];c[s>>2]=(c[s>>2]|0)+-1;c[q>>2]=(c[q>>2]|0)+(c[t>>2]<<2);c[r>>2]=(c[r>>2]|0)+(c[u>>2]<<2)}i=z;return}function Gb(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0;B=i;i=i+64|0;n=B+52|0;o=B+48|0;p=B+44|0;q=B+40|0;r=B+36|0;s=B+32|0;t=B+28|0;u=B+24|0;v=B+20|0;w=B+16|0;x=B+12|0;y=B+8|0;z=B+4|0;A=B;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[u>>2]=k;c[v>>2]=l;c[w>>2]=m;c[y>>2]=0;while(1){if((c[y>>2]|0)>=(c[u>>2]|0))break;c[x>>2]=0;while(1){if((c[x>>2]|0)>=(c[r>>2]|0))break;m=_(c[x>>2]|0,c[s>>2]|0)|0;m=m+(_(c[y>>2]|0,c[v>>2]|0)|0)|0;g[z>>2]=+g[(c[n>>2]|0)+(m<<2)>>2];m=_(c[x>>2]|0,c[s>>2]|0)|0;m=m+(_(c[y>>2]|0,c[v>>2]|0)|0)|0;g[A>>2]=+g[(c[o>>2]|0)+(m<<2)>>2];m=_(c[x>>2]|0,c[t>>2]|0)|0;m=m+(_(c[y>>2]|0,c[w>>2]|0)|0)|0;g[(c[p>>2]|0)+(m<<2)>>2]=+g[z>>2];m=_(c[x>>2]|0,c[t>>2]|0)|0;m=m+(_(c[y>>2]|0,c[w>>2]|0)|0)|0;g[(c[q>>2]|0)+(m<<2)>>2]=+g[A>>2];c[x>>2]=(c[x>>2]|0)+1}c[y>>2]=(c[y>>2]|0)+1}i=B;return}function Hb(a,b,d,e,f,g,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0;s=i;i=i+48|0;w=s+36|0;v=s+32|0;u=s+28|0;t=s+24|0;m=s+20|0;n=s+16|0;o=s+12|0;p=s+8|0;q=s+4|0;r=s;c[w>>2]=a;c[v>>2]=b;c[u>>2]=d;c[t>>2]=e;c[m>>2]=f;c[n>>2]=g;c[o>>2]=h;c[p>>2]=j;c[q>>2]=k;c[r>>2]=l;h=c[n>>2]|0;l=c[q>>2]|0;e=c[w>>2]|0;d=c[v>>2]|0;b=c[u>>2]|0;a=c[t>>2]|0;if((((c[n>>2]|0)<0?0-h|0:h)|0)<(((c[q>>2]|0)<0?0-l|0:l)|0)){Gb(e,d,b,a,c[m>>2]|0,c[n>>2]|0,c[o>>2]|0,c[p>>2]|0,c[q>>2]|0,c[r>>2]|0);i=s;return}else{Gb(e,d,b,a,c[p>>2]|0,c[q>>2]|0,c[r>>2]|0,c[m>>2]|0,c[n>>2]|0,c[o>>2]|0);i=s;return}}function Ib(a,b,d,e,f,g,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0;s=i;i=i+48|0;w=s+36|0;v=s+32|0;u=s+28|0;t=s+24|0;m=s+20|0;n=s+16|0;o=s+12|0;p=s+8|0;q=s+4|0;r=s;c[w>>2]=a;c[v>>2]=b;c[u>>2]=d;c[t>>2]=e;c[m>>2]=f;c[n>>2]=g;c[o>>2]=h;c[p>>2]=j;c[q>>2]=k;c[r>>2]=l;h=c[o>>2]|0;l=c[r>>2]|0;e=c[w>>2]|0;d=c[v>>2]|0;b=c[u>>2]|0;a=c[t>>2]|0;if((((c[o>>2]|0)<0?0-h|0:h)|0)<(((c[r>>2]|0)<0?0-l|0:l)|0)){Gb(e,d,b,a,c[m>>2]|0,c[n>>2]|0,c[o>>2]|0,c[p>>2]|0,c[q>>2]|0,c[r>>2]|0);i=s;return}else{Gb(e,d,b,a,c[p>>2]|0,c[q>>2]|0,c[r>>2]|0,c[m>>2]|0,c[n>>2]|0,c[o>>2]|0);i=s;return}}function Jb(a,b,d,e,f,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0;D=i;i=i+64|0;q=D+60|0;r=D+56|0;s=D+52|0;t=D+48|0;u=D+44|0;v=D+40|0;w=D+36|0;x=D+32|0;y=D+28|0;z=D+24|0;A=D+20|0;B=D+16|0;n=D+12|0;o=D+8|0;p=D+4|0;C=D;c[q>>2]=a;c[r>>2]=b;c[s>>2]=d;c[t>>2]=e;c[u>>2]=f;c[v>>2]=j;c[w>>2]=k;c[x>>2]=l;c[y>>2]=m;switch(c[y>>2]|0){case 1:{c[A>>2]=0;while(1){if((c[A>>2]|0)>=(c[v>>2]|0))break;c[z>>2]=0;while(1){if((c[z>>2]|0)>=(c[s>>2]|0))break;y=_(c[z>>2]|0,c[t>>2]|0)|0;y=y+(_(c[A>>2]|0,c[w>>2]|0)|0)|0;g[n>>2]=+g[(c[q>>2]|0)+(y<<2)>>2];y=_(c[z>>2]|0,c[u>>2]|0)|0;y=y+(_(c[A>>2]|0,c[x>>2]|0)|0)|0;g[(c[r>>2]|0)+(y<<2)>>2]=+g[n>>2];c[z>>2]=(c[z>>2]|0)+1}c[A>>2]=(c[A>>2]|0)+1}i=D;return}case 2:{if((((((((c[q>>2]|0)>>>0)%8|0|0)==0?(((c[r>>2]|0)>>>0)%8|0|0)==0:0)?(c[t>>2]&1|0)==0:0)?(c[w>>2]&1|0)==0:0)?(c[u>>2]&1|0)==0:0)?(c[x>>2]&1|0)==0:0){c[A>>2]=0;while(1){if((c[A>>2]|0)>=(c[v>>2]|0))break;c[z>>2]=0;while(1){if((c[z>>2]|0)>=(c[s>>2]|0))break;p=_(c[z>>2]|0,c[t>>2]|0)|0;p=p+(_(c[A>>2]|0,c[w>>2]|0)|0)|0;y=_(c[z>>2]|0,c[u>>2]|0)|0;y=y+(_(c[A>>2]|0,c[x>>2]|0)|0)|0;h[(c[r>>2]|0)+(y<<2)>>3]=+h[(c[q>>2]|0)+(p<<2)>>3];c[z>>2]=(c[z>>2]|0)+1}c[A>>2]=(c[A>>2]|0)+1}i=D;return}c[A>>2]=0;while(1){if((c[A>>2]|0)>=(c[v>>2]|0))break;c[z>>2]=0;while(1){if((c[z>>2]|0)>=(c[s>>2]|0))break;y=_(c[z>>2]|0,c[t>>2]|0)|0;y=y+(_(c[A>>2]|0,c[w>>2]|0)|0)|0;g[o>>2]=+g[(c[q>>2]|0)+(y<<2)>>2];y=_(c[z>>2]|0,c[t>>2]|0)|0;y=y+(_(c[A>>2]|0,c[w>>2]|0)|0)+1|0;g[p>>2]=+g[(c[q>>2]|0)+(y<<2)>>2];y=_(c[z>>2]|0,c[u>>2]|0)|0;y=y+(_(c[A>>2]|0,c[x>>2]|0)|0)|0;g[(c[r>>2]|0)+(y<<2)>>2]=+g[o>>2];y=_(c[z>>2]|0,c[u>>2]|0)|0;y=y+(_(c[A>>2]|0,c[x>>2]|0)|0)+1|0;g[(c[r>>2]|0)+(y<<2)>>2]=+g[p>>2];c[z>>2]=(c[z>>2]|0)+1}c[A>>2]=(c[A>>2]|0)+1}i=D;return}default:{c[A>>2]=0;while(1){if((c[A>>2]|0)>=(c[v>>2]|0))break;c[z>>2]=0;while(1){if((c[z>>2]|0)>=(c[s>>2]|0))break;c[B>>2]=0;while(1){a=c[z>>2]|0;if((c[B>>2]|0)>=(c[y>>2]|0))break;p=_(a,c[t>>2]|0)|0;p=p+(_(c[A>>2]|0,c[w>>2]|0)|0)|0;g[C>>2]=+g[(c[q>>2]|0)+(p+(c[B>>2]|0)<<2)>>2];p=_(c[z>>2]|0,c[u>>2]|0)|0;p=p+(_(c[A>>2]|0,c[x>>2]|0)|0)|0;g[(c[r>>2]|0)+(p+(c[B>>2]|0)<<2)>>2]=+g[C>>2];c[B>>2]=(c[B>>2]|0)+1}c[z>>2]=a+1}c[A>>2]=(c[A>>2]|0)+1}i=D;return}}}function Kb(a,b,d,e,f,g,h,j,k){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;var l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0;s=i;i=i+48|0;u=s+32|0;t=s+28|0;l=s+24|0;m=s+20|0;n=s+16|0;o=s+12|0;p=s+8|0;q=s+4|0;r=s;c[u>>2]=a;c[t>>2]=b;c[l>>2]=d;c[m>>2]=e;c[n>>2]=f;c[o>>2]=g;c[p>>2]=h;c[q>>2]=j;c[r>>2]=k;j=c[m>>2]|0;k=c[p>>2]|0;b=c[u>>2]|0;a=c[t>>2]|0;if((((c[m>>2]|0)<0?0-j|0:j)|0)<(((c[p>>2]|0)<0?0-k|0:k)|0)){Jb(b,a,c[l>>2]|0,c[m>>2]|0,c[n>>2]|0,c[o>>2]|0,c[p>>2]|0,c[q>>2]|0,c[r>>2]|0);i=s;return}else{Jb(b,a,c[o>>2]|0,c[p>>2]|0,c[q>>2]|0,c[l>>2]|0,c[m>>2]|0,c[n>>2]|0,c[r>>2]|0);i=s;return}}function Lb(a,b,d,e,f,g,h,j,k){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;var l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0;s=i;i=i+48|0;u=s+32|0;t=s+28|0;l=s+24|0;m=s+20|0;n=s+16|0;o=s+12|0;p=s+8|0;q=s+4|0;r=s;c[u>>2]=a;c[t>>2]=b;c[l>>2]=d;c[m>>2]=e;c[n>>2]=f;c[o>>2]=g;c[p>>2]=h;c[q>>2]=j;c[r>>2]=k;j=c[n>>2]|0;k=c[q>>2]|0;b=c[u>>2]|0;a=c[t>>2]|0;if((((c[n>>2]|0)<0?0-j|0:j)|0)<(((c[q>>2]|0)<0?0-k|0:k)|0)){Jb(b,a,c[l>>2]|0,c[m>>2]|0,c[n>>2]|0,c[o>>2]|0,c[p>>2]|0,c[q>>2]|0,c[r>>2]|0);i=s;return}else{Jb(b,a,c[o>>2]|0,c[p>>2]|0,c[q>>2]|0,c[l>>2]|0,c[m>>2]|0,c[n>>2]|0,c[r>>2]|0);i=s;return}}function Mb(a,b,d,e,f,g,h,j,k){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;var l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0;l=i;i=i+80|0;w=l+68|0;v=l+64|0;p=l+60|0;u=l+56|0;t=l+52|0;o=l+48|0;s=l+44|0;r=l+40|0;q=l+36|0;n=l+32|0;m=l;c[w>>2]=a;c[v>>2]=b;c[p>>2]=d;c[u>>2]=e;c[t>>2]=f;c[o>>2]=g;c[s>>2]=h;c[r>>2]=j;c[q>>2]=k;c[n>>2]=me(c[q>>2]|0,2)|0;c[m>>2]=c[w>>2];c[m+4>>2]=c[v>>2];c[m+8>>2]=c[u>>2];c[m+12>>2]=c[t>>2];c[m+16>>2]=c[s>>2];c[m+20>>2]=c[r>>2];c[m+24>>2]=c[q>>2];c[m+28>>2]=0;le(0,c[p>>2]|0,0,c[o>>2]|0,c[n>>2]|0,3,m);i=l;return}function Nb(a,b,d,e,f,g,h,j,k){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;var l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0;l=i;i=i+4176|0;w=l+4168|0;v=l+4164|0;p=l+4160|0;u=l+4156|0;t=l+4152|0;o=l+4148|0;s=l+4144|0;r=l+4140|0;q=l+4136|0;n=l+32|0;m=l;c[w>>2]=a;c[v>>2]=b;c[p>>2]=d;c[u>>2]=e;c[t>>2]=f;c[o>>2]=g;c[s>>2]=h;c[r>>2]=j;c[q>>2]=k;c[n>>2]=me(c[q>>2]|0,2)|0;c[m>>2]=c[w>>2];c[m+4>>2]=c[v>>2];c[m+8>>2]=c[u>>2];c[m+12>>2]=c[t>>2];c[m+16>>2]=c[s>>2];c[m+20>>2]=c[r>>2];c[m+24>>2]=c[q>>2];c[m+28>>2]=l+40;le(0,c[p>>2]|0,0,c[o>>2]|0,c[n>>2]|0,4,m);i=l;return}function Ob(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0;g=i;i=i+32|0;l=g+20|0;m=g+16|0;j=g+12|0;k=g+8|0;n=g+4|0;h=g;c[l>>2]=a;c[m>>2]=b;c[j>>2]=d;c[k>>2]=e;c[n>>2]=f;c[h>>2]=c[n>>2];b=(c[c[h>>2]>>2]|0)+((_(c[l>>2]|0,c[(c[h>>2]|0)+8>>2]|0)|0)<<2)|0;b=b+((_(c[j>>2]|0,c[(c[h>>2]|0)+16>>2]|0)|0)<<2)|0;d=(c[(c[h>>2]|0)+4>>2]|0)+((_(c[l>>2]|0,c[(c[h>>2]|0)+12>>2]|0)|0)<<2)|0;d=d+((_(c[j>>2]|0,c[(c[h>>2]|0)+20>>2]|0)|0)<<2)|0;Jb(b,d,(c[m>>2]|0)-(c[l>>2]|0)|0,c[(c[h>>2]|0)+8>>2]|0,c[(c[h>>2]|0)+12>>2]|0,(c[k>>2]|0)-(c[j>>2]|0)|0,c[(c[h>>2]|0)+16>>2]|0,c[(c[h>>2]|0)+20>>2]|0,c[(c[h>>2]|0)+24>>2]|0);i=g;return}function Pb(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0;g=i;i=i+32|0;l=g+20|0;m=g+16|0;j=g+12|0;k=g+8|0;n=g+4|0;h=g;c[l>>2]=a;c[m>>2]=b;c[j>>2]=d;c[k>>2]=e;c[n>>2]=f;c[h>>2]=c[n>>2];d=(c[c[h>>2]>>2]|0)+((_(c[l>>2]|0,c[(c[h>>2]|0)+8>>2]|0)|0)<<2)|0;d=d+((_(c[j>>2]|0,c[(c[h>>2]|0)+16>>2]|0)|0)<<2)|0;b=_(c[(c[h>>2]|0)+24>>2]|0,(c[m>>2]|0)-(c[l>>2]|0)|0)|0;Kb(d,c[(c[h>>2]|0)+28>>2]|0,(c[m>>2]|0)-(c[l>>2]|0)|0,c[(c[h>>2]|0)+8>>2]|0,c[(c[h>>2]|0)+24>>2]|0,(c[k>>2]|0)-(c[j>>2]|0)|0,c[(c[h>>2]|0)+16>>2]|0,b,c[(c[h>>2]|0)+24>>2]|0);b=(c[(c[h>>2]|0)+4>>2]|0)+((_(c[l>>2]|0,c[(c[h>>2]|0)+12>>2]|0)|0)<<2)|0;b=b+((_(c[j>>2]|0,c[(c[h>>2]|0)+20>>2]|0)|0)<<2)|0;d=_(c[(c[h>>2]|0)+24>>2]|0,(c[m>>2]|0)-(c[l>>2]|0)|0)|0;Lb(c[(c[h>>2]|0)+28>>2]|0,b,(c[m>>2]|0)-(c[l>>2]|0)|0,c[(c[h>>2]|0)+24>>2]|0,c[(c[h>>2]|0)+12>>2]|0,(c[k>>2]|0)-(c[j>>2]|0)|0,d,c[(c[h>>2]|0)+20>>2]|0,c[(c[h>>2]|0)+24>>2]|0);i=g;return}function Qb(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;j=i;i=i+16|0;k=j+12|0;f=j+8|0;g=j+4|0;h=j;c[k>>2]=a;c[f>>2]=b;c[g>>2]=d;c[h>>2]=e;if((c[g>>2]|0)>(c[k>>2]|0))if((c[g>>2]|0)>0?(c[g>>2]&(c[g>>2]|0)-1|0)==0:0)f=(_(c[f>>2]|0,(c[g>>2]|0)/(c[h>>2]|0)|0)|0)<=4;else f=0;else f=1;i=j;return f&1|0}function Rb(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;j=i;i=i+16|0;k=j+12|0;f=j+8|0;g=j+4|0;h=j;c[k>>2]=a;c[f>>2]=b;c[g>>2]=d;c[h>>2]=e;b=c[f>>2]|0;if((c[k>>2]|0)==-1){c[c[g>>2]>>2]=b;c[c[h>>2]>>2]=(c[f>>2]|0)+4;i=j;return}else{c[c[g>>2]>>2]=b+4;c[c[h>>2]>>2]=c[f>>2];i=j;return}}function Sb(b){b=b|0;var d=0,e=0,f=0;f=i;i=i+16|0;d=f+4|0;e=f;c[d>>2]=b;c[e>>2]=-559038737;do{c[e>>2]=((c[e>>2]|0)*17|0)+(a[c[d>>2]>>0]|0);b=c[d>>2]|0;c[d>>2]=b+1}while((a[b>>0]|0)!=0);i=f;return c[e>>2]|0}function Tb(a){a=a|0;var b=0,d=0;d=i;i=i+16|0;b=d;c[b>>2]=a;a=c[b>>2]|0;i=d;return ((c[b>>2]|0)<0?0-a|0:a)|0}function Ub(a){a=a|0;var b=0,d=0,e=0;d=i;i=i+16|0;e=d+4|0;b=d;c[e>>2]=a;c[b>>2]=Ry(c[e>>2]|0)|0;i=d;return c[b>>2]|0}function Vb(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;Sy(c[d>>2]|0);i=b;return}function Wb(b,d,e){b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0;k=i;i=i+32|0;f=k+16|0;l=k+12|0;g=k+8|0;j=k+4|0;h=k;c[f>>2]=b;c[l>>2]=d;c[g>>2]=e;c[h>>2]=c[l>>2];c[j>>2]=0;while(1){if((c[j>>2]|0)>>>0>=(c[g>>2]|0)>>>0)break;ac(c[f>>2]|0,a[(c[h>>2]|0)+(c[j>>2]|0)>>0]|0);c[j>>2]=(c[j>>2]|0)+1}i=k;return}function Xb(b,d){b=b|0;d=d|0;var e=0,f=0,g=0;g=i;i=i+16|0;e=g+4|0;f=g;c[e>>2]=b;c[f>>2]=d;do{ac(c[e>>2]|0,a[c[f>>2]>>0]|0);d=c[f>>2]|0;c[f>>2]=d+1}while((a[d>>0]|0)!=0);i=g;return}function Yb(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;f=d+4|0;e=d;c[f>>2]=a;c[e>>2]=b;Wb(c[f>>2]|0,e,4);i=d;return}function Zb(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;f=d+4|0;e=d;c[f>>2]=a;c[e>>2]=b;Wb(c[f>>2]|0,e,4);i=d;return}function _b(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;f=d+4|0;e=d;c[f>>2]=a;c[e>>2]=b;Wb(c[f>>2]|0,e,4);i=d;return}function $b(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;c[c[d>>2]>>2]=1732584193;c[(c[d>>2]|0)+4>>2]=-271733879;c[(c[d>>2]|0)+8>>2]=-1732584194;c[(c[d>>2]|0)+12>>2]=271733878;c[(c[d>>2]|0)+80>>2]=0;i=b;return}function ac(b,d){b=b|0;d=d|0;var e=0,f=0,g=0;f=i;i=i+16|0;e=f;g=f+4|0;c[e>>2]=b;a[g>>0]=d;a[(c[e>>2]|0)+16+(((c[(c[e>>2]|0)+80>>2]|0)>>>0)%64|0)>>0]=a[g>>0]|0;b=(c[e>>2]|0)+80|0;d=(c[b>>2]|0)+1|0;c[b>>2]=d;if((d>>>0)%64|0){i=f;return}cc(c[e>>2]|0,(c[e>>2]|0)+16|0);i=f;return}function bc(a){a=a|0;var b=0,d=0,e=0,f=0;f=i;i=i+16|0;b=f+8|0;e=f+4|0;d=f;c[b>>2]=a;c[e>>2]=c[(c[b>>2]|0)+80>>2]<<3;ac(c[b>>2]|0,-128);while(1){if((((c[(c[b>>2]|0)+80>>2]|0)>>>0)%64|0|0)==56)break;ac(c[b>>2]|0,0)}c[d>>2]=0;while(1){if((c[d>>2]|0)>>>0>=8)break;ac(c[b>>2]|0,c[e>>2]&255);c[e>>2]=(c[e>>2]|0)>>>8;c[d>>2]=(c[d>>2]|0)+1}i=f;return}function cc(b,e){b=b|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0;r=i;i=i+112|0;f=r+104|0;g=r+100|0;h=r+96|0;j=r+92|0;k=r+88|0;l=r+84|0;p=r+80|0;q=r+16|0;m=r+8|0;n=r+4|0;o=r;c[f>>2]=b;c[g>>2]=e;c[r+12>>2]=-1;c[m>>2]=0;while(1){if((c[m>>2]|0)>=16)break;c[n>>2]=(c[g>>2]|0)+(c[m>>2]<<2);c[q+(c[m>>2]<<2)>>2]=d[c[n>>2]>>0]|d[(c[n>>2]|0)+1>>0]<<8|d[(c[n>>2]|0)+2>>0]<<16|d[(c[n>>2]|0)+3>>0]<<24;c[m>>2]=(c[m>>2]|0)+1}c[h>>2]=c[c[f>>2]>>2];c[j>>2]=c[(c[f>>2]|0)+4>>2];c[k>>2]=c[(c[f>>2]|0)+8>>2];c[l>>2]=c[(c[f>>2]|0)+12>>2];c[m>>2]=0;while(1){if((c[m>>2]|0)>=64)break;c[o>>2]=19369+(c[m>>2]<<1);switch(c[m>>2]>>4|0){case 0:{c[h>>2]=(c[h>>2]|0)+(c[j>>2]&c[k>>2]|~c[j>>2]&c[l>>2]);break}case 1:{c[h>>2]=(c[h>>2]|0)+(c[j>>2]&c[l>>2]|c[k>>2]&~c[l>>2]);break}case 2:{c[h>>2]=(c[h>>2]|0)+(c[j>>2]^c[k>>2]^c[l>>2]);break}case 3:{c[h>>2]=(c[h>>2]|0)+(c[k>>2]^(c[j>>2]|~c[l>>2]));break}default:{}}c[h>>2]=(c[h>>2]|0)+(c[11656+(c[m>>2]<<2)>>2]|0);c[h>>2]=(c[h>>2]|0)+(c[q+(a[c[o>>2]>>0]<<2)>>2]|0);c[h>>2]=c[h>>2];c[p>>2]=(c[j>>2]|0)+(c[h>>2]<>2]|0)+1>>0]|(c[h>>2]|0)>>>(32-(a[(c[o>>2]|0)+1>>0]|0)|0));c[h>>2]=c[l>>2];c[l>>2]=c[k>>2];c[k>>2]=c[j>>2];c[j>>2]=c[p>>2];c[m>>2]=(c[m>>2]|0)+1}c[c[f>>2]>>2]=(c[c[f>>2]>>2]|0)+(c[h>>2]|0);c[(c[f>>2]|0)+4>>2]=(c[(c[f>>2]|0)+4>>2]|0)+(c[j>>2]|0);c[(c[f>>2]|0)+8>>2]=(c[(c[f>>2]|0)+8>>2]|0)+(c[k>>2]|0);c[(c[f>>2]|0)+12>>2]=(c[(c[f>>2]|0)+12>>2]|0)+(c[l>>2]|0);i=r;return}function dc(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;f=i;i=i+16|0;e=f+4|0;d=f;c[e>>2]=a;c[d>>2]=b;i=f;return ((c[e>>2]|0)>(c[d>>2]|0)?c[e>>2]|0:c[d>>2]|0)|0}function ec(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;f=i;i=i+16|0;e=f+4|0;d=f;c[e>>2]=a;c[d>>2]=b;i=f;return ((c[e>>2]|0)<(c[d>>2]|0)?c[e>>2]|0:c[d>>2]|0)|0}function fc(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;h[(c[d>>2]|0)+24>>3]=0.0;h[(c[d>>2]|0)+16>>3]=0.0;h[(c[d>>2]|0)+8>>3]=0.0;h[c[d>>2]>>3]=0.0;i=b;return}function gc(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;e=d+4|0;f=d;c[e>>2]=a;c[f>>2]=b;b=c[f>>2]|0;a=c[e>>2]|0;c[b>>2]=c[a>>2];c[b+4>>2]=c[a+4>>2];c[b+8>>2]=c[a+8>>2];c[b+12>>2]=c[a+12>>2];c[b+16>>2]=c[a+16>>2];c[b+20>>2]=c[a+20>>2];c[b+24>>2]=c[a+24>>2];c[b+28>>2]=c[a+28>>2];i=d;return}function hc(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;f=d+4|0;e=d;c[f>>2]=a;c[e>>2]=b;fc(c[e>>2]|0);h[(c[e>>2]|0)+24>>3]=+(c[f>>2]|0);i=d;return}function ic(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,j=0,k=0,l=0;f=i;i=i+16|0;l=f+12|0;k=f+8|0;j=f+4|0;g=f;c[l>>2]=a;c[k>>2]=b;c[j>>2]=d;c[g>>2]=e;h[c[g>>2]>>3]=+(c[l>>2]|0)*+h[c[k>>2]>>3]+ +h[c[j>>2]>>3];h[(c[g>>2]|0)+8>>3]=+(c[l>>2]|0)*+h[(c[k>>2]|0)+8>>3]+ +h[(c[j>>2]|0)+8>>3];h[(c[g>>2]|0)+16>>3]=+(c[l>>2]|0)*+h[(c[k>>2]|0)+16>>3]+ +h[(c[j>>2]|0)+16>>3];h[(c[g>>2]|0)+24>>3]=+(c[l>>2]|0)*+h[(c[k>>2]|0)+24>>3]+ +h[(c[j>>2]|0)+24>>3];i=f;return}function jc(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;h=e+8|0;g=e+4|0;f=e;c[h>>2]=a;c[g>>2]=b;c[f>>2]=d;ic(1,c[h>>2]|0,c[g>>2]|0,c[f>>2]|0);i=e;return}function kc(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;f=d+4|0;e=d;c[f>>2]=a;c[e>>2]=b;jc(c[f>>2]|0,c[e>>2]|0,c[e>>2]|0);i=d;return}function lc(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;h=e+8|0;g=e+4|0;f=e;c[h>>2]=a;c[g>>2]=b;c[f>>2]=d;ic(c[h>>2]|0,c[g>>2]|0,c[f>>2]|0,c[f>>2]|0);i=e;return}function mc(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0;r=i;i=i+48|0;q=r+32|0;h=r+28|0;j=r+24|0;k=r+20|0;l=r+16|0;m=r+12|0;n=r+8|0;p=r+4|0;o=r;c[h>>2]=a;c[j>>2]=b;c[k>>2]=d;c[l>>2]=e;c[m>>2]=f;c[n>>2]=g;if(!(nc(c[h>>2]|0,c[l>>2]|0,c[m>>2]|0,c[n>>2]|0)|0)){c[q>>2]=0;p=c[q>>2]|0;i=r;return p|0}c[p>>2]=0;while(1){if((c[p>>2]|0)>=(c[k>>2]|0)){h=10;break}if((c[(c[j>>2]|0)+(c[p>>2]<<2)>>2]|0)==(c[h>>2]|0)){h=10;break}if((nc(c[(c[j>>2]|0)+(c[p>>2]<<2)>>2]|0,c[l>>2]|0,c[m>>2]|0,o)|0)!=0?(c[c[n>>2]>>2]|0)==(c[o>>2]|0):0){h=8;break}c[p>>2]=(c[p>>2]|0)+1}if((h|0)==8){c[q>>2]=0;p=c[q>>2]|0;i=r;return p|0}else if((h|0)==10){c[q>>2]=1;p=c[q>>2]|0;i=r;return p|0}return 0}function nc(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;o=i;i=i+32|0;f=o+24|0;g=o+20|0;h=o+16|0;j=o+12|0;k=o+8|0;m=o+4|0;l=o;c[g>>2]=a;c[h>>2]=b;c[j>>2]=d;c[k>>2]=e;c[l>>2]=0;a:do if((c[g>>2]|0)>0){c[m>>2]=0;while(1){if((c[m>>2]|0)>=(c[c[h>>2]>>2]|0))break a;if(!((c[j>>2]|0)==0?(c[(c[h>>2]|0)+4+((c[m>>2]|0)*12|0)+4>>2]|0)!=(c[(c[h>>2]|0)+4+((c[m>>2]|0)*12|0)+8>>2]|0):0))n=6;if((n|0)==6?(n=0,e=(c[l>>2]|0)+1|0,c[l>>2]=e,(e|0)==(c[g>>2]|0)):0)break;c[m>>2]=(c[m>>2]|0)+1}c[c[k>>2]>>2]=c[m>>2];c[f>>2]=1;l=c[f>>2]|0;i=o;return l|0}else{e=(c[c[h>>2]>>2]|0)-1|0;if((c[g>>2]|0)>=0){c[m>>2]=(e|0)/2|0;if((c[m>>2]|0)<0)break;if((c[j>>2]|0)==0?(c[(c[h>>2]|0)+4+((c[m>>2]|0)*12|0)+4>>2]|0)!=(c[(c[h>>2]|0)+4+((c[m>>2]|0)*12|0)+8>>2]|0):0)break;c[c[k>>2]>>2]=c[m>>2];c[f>>2]=1;l=c[f>>2]|0;i=o;return l|0}c[m>>2]=e;while(1){if((c[m>>2]|0)<0)break a;if(!((c[j>>2]|0)==0?(c[(c[h>>2]|0)+4+((c[m>>2]|0)*12|0)+4>>2]|0)!=(c[(c[h>>2]|0)+4+((c[m>>2]|0)*12|0)+8>>2]|0):0))n=14;if((n|0)==14?(n=0,e=(c[l>>2]|0)+1|0,c[l>>2]=e,(e|0)==(0-(c[g>>2]|0)|0)):0)break;c[m>>2]=(c[m>>2]|0)+-1}c[c[k>>2]>>2]=c[m>>2];c[f>>2]=1;l=c[f>>2]|0;i=o;return l|0}while(0);c[f>>2]=0;l=c[f>>2]|0;i=o;return l|0}function oc(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;e=i;i=i+16|0;g=e+8|0;f=e+4|0;d=e;c[g>>2]=a;c[f>>2]=b;c[d>>2]=wb(c[g>>2]|0)|0;c[c[d>>2]>>2]=c[f>>2];fc((c[d>>2]|0)+8|0);h[(c[d>>2]|0)+40>>3]=0.0;c[(c[d>>2]|0)+48>>2]=0;c[(c[d>>2]|0)+52>>2]=0;i=e;return c[d>>2]|0}function pc(a){a=a|0;var b=0,d=0;d=i;i=i+16|0;b=d;c[b>>2]=a;if(!(c[b>>2]|0)){i=d;return}Ua[c[(c[c[b>>2]>>2]|0)+12>>2]&511](c[b>>2]|0);xb(c[b>>2]|0);i=d;return}function qc(a){a=a|0;var b=0;b=i;i=i+16|0;c[b>>2]=a;i=b;return}function rc(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;f=i;i=i+16|0;d=f+4|0;e=f;c[d>>2]=a;c[e>>2]=b;if(!(c[d>>2]|0)){i=f;return}$a[c[(c[c[d>>2]>>2]|0)+4>>2]&127](c[d>>2]|0,c[e>>2]|0);c[(c[d>>2]|0)+48>>2]=c[e>>2];i=f;return}function sc(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0.0;j=i;i=i+32|0;e=j+16|0;k=j+12|0;f=j+8|0;g=j;c[e>>2]=a;c[k>>2]=b;c[f>>2]=d;h[g>>3]=+h[(c[k>>2]|0)+8>>3]+ +h[(c[k>>2]|0)+8+8>>3]+ +h[(c[k>>2]|0)+8+16>>3]*2.0+ +h[(c[k>>2]|0)+8+24>>3];if(!(c[(c[e>>2]|0)+8>>2]|0)){l=+h[g>>3];i=j;return +l}h[g>>3]=+ab[c[(c[e>>2]|0)+8>>2]&0](c[f>>2]|0,+h[g>>3],1);l=+h[g>>3];i=j;return +l}function tc(){var a=0,b=0,d=0,e=0,f=0,g=0;d=i;i=i+16|0;a=d+4|0;b=d;c[b>>2]=wb(232)|0;c[c[b>>2]>>2]=11912;c[(c[b>>2]|0)+224>>2]=0;c[(c[b>>2]|0)+200>>2]=0;h[(c[b>>2]|0)+216>>3]=0.0;h[(c[b>>2]|0)+208>>3]=0.0;c[(c[b>>2]|0)+4>>2]=0;c[(c[b>>2]|0)+8>>2]=0;c[(c[b>>2]|0)+12>>2]=0;c[(c[b>>2]|0)+16>>2]=0;c[(c[b>>2]|0)+20>>2]=0;c[(c[b>>2]|0)+36>>2]=0;c[(c[b>>2]|0)+76>>2]=0;c[(c[b>>2]|0)+24>>2]=0;c[(c[b>>2]|0)+32>>2]=0;c[(c[b>>2]|0)+28>>2]=0;e=(c[b>>2]|0)+164|0;f=e;g=c[f+4>>2]|0;c[e>>2]=c[f>>2]&-1048576;c[e+4>>2]=g;e=(c[b>>2]|0)+164|0;g=e;f=c[g+4>>2]&-1048576;c[e>>2]=c[g>>2];c[e+4>>2]=f;e=(c[b>>2]|0)+164|0;f=e;g=c[f+4>>2]|0;c[e>>2]=c[f>>2]&8388607;c[e+4>>2]=g;e=(c[b>>2]|0)+164|0;g=e;f=c[g+4>>2]|0;c[e>>2]=c[g>>2]&-7340033;c[e+4>>2]=f;c[(c[b>>2]|0)+160>>2]=1;c[(c[b>>2]|0)+196>>2]=1;h[(c[b>>2]|0)+184>>3]=-1.0;wc((c[b>>2]|0)+80|0);wc((c[b>>2]|0)+120|0);c[a>>2]=0;while(1){if((c[a>>2]|0)>=8)break;c[(c[b>>2]|0)+44+(c[a>>2]<<2)>>2]=-1;c[a>>2]=(c[a>>2]|0)+1}i=d;return c[b>>2]|0}function uc(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;e=i;i=i+16|0;g=e+8|0;f=e+4|0;d=e;c[g>>2]=a;c[f>>2]=b;c[d>>2]=jb[c[(c[c[g>>2]>>2]|0)+4>>2]&15](c[g>>2]|0,c[f>>2]|0)|0;qd(c[f>>2]|0);i=e;return c[d>>2]|0}function vc(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;h=i;i=i+48|0;k=h+32|0;l=h+28|0;m=h+24|0;n=h+20|0;o=h+16|0;j=h+8|0;g=h;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[o>>2]=f;d=(c[k>>2]|0)+164|0;c[j>>2]=c[d>>2];c[j+4>>2]=c[d+4>>2];d=(c[k>>2]|0)+164|0;a=d;f=c[a+4>>2]&-1048576|c[d+4>>2]&1048575&~c[o>>2];c[d>>2]=c[a>>2];c[d+4>>2]=f;d=(c[k>>2]|0)+164|0;f=d;a=c[f+4>>2]|0;b=d;c[b>>2]=c[f>>2]&-1048576|c[d>>2]&1048575&~c[o>>2];c[b+4>>2]=a;b=(c[k>>2]|0)+164|0;a=b;d=c[a+4>>2]|0;f=b;c[f>>2]=c[a>>2]&-1048576|(c[b>>2]&1048575|c[m>>2])&1048575;c[f+4>>2]=d;f=(c[k>>2]|0)+164|0;d=f;b=c[d+4>>2]&-1048576|(c[f+4>>2]&1048575|(c[n>>2]|c[m>>2]))&1048575;c[f>>2]=c[d>>2];c[f+4>>2]=b;c[g>>2]=uc(c[k>>2]|0,c[l>>2]|0)|0;f=(c[k>>2]|0)+164|0;c[f>>2]=c[j>>2];c[f+4>>2]=c[j+4>>2];i=h;return c[g>>2]|0}function wc(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;c[(c[d>>2]|0)+36>>2]=0;c[(c[d>>2]|0)+20>>2]=0;c[(c[d>>2]|0)+12>>2]=0;c[(c[d>>2]|0)+16>>2]=0;c[(c[d>>2]|0)+32>>2]=0;c[(c[d>>2]|0)+28>>2]=0;c[(c[d>>2]|0)+24>>2]=0;c[c[d>>2]>>2]=0;c[(c[d>>2]|0)+8>>2]=0;c[(c[d>>2]|0)+4>>2]=0;yc(c[d>>2]|0);i=b;return}function xc(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;xb(c[c[d>>2]>>2]|0);c[c[d>>2]>>2]=0;c[(c[d>>2]|0)+8>>2]=0;i=b;return}function yc(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;b=e+4|0;d=e;c[b>>2]=a;c[d>>2]=c[(c[b>>2]|0)+8>>2];a=zc(c[d>>2]|0)|0;if(a>>>0<(c[(c[b>>2]|0)+4>>2]|0)>>>0){i=e;return}b=c[b>>2]|0;Bc(b,Ac(c[d>>2]|0)|0);i=e;return}function zc(a){a=a|0;var b=0,d=0;d=i;i=i+16|0;b=d;c[b>>2]=a;i=d;return 1+(c[b>>2]|0)+(((c[b>>2]|0)>>>0)/8|0)|0}function Ac(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=zc(zc(c[d>>2]|0)|0)|0;i=b;return a|0}function Bc(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0;l=i;i=i+32|0;f=l+24|0;d=l+20|0;j=l+16|0;g=l+12|0;k=l+8|0;e=l+4|0;h=l;c[f>>2]=a;c[d>>2]=b;c[j>>2]=c[(c[f>>2]|0)+4>>2];c[k>>2]=c[c[f>>2]>>2];c[d>>2]=hd(c[d>>2]|0)|0;c[e>>2]=wb((c[d>>2]|0)*24|0)|0;b=(c[f>>2]|0)+36|0;c[b>>2]=(c[b>>2]|0)+1;c[g>>2]=0;while(1){if((c[g>>2]|0)>>>0>=(c[d>>2]|0)>>>0)break;b=(c[e>>2]|0)+((c[g>>2]|0)*24|0)+16|0;m=b;a=c[m+4>>2]|0;c[b>>2]=c[m>>2]&-7340033;c[b+4>>2]=a;c[g>>2]=(c[g>>2]|0)+1}c[(c[f>>2]|0)+4>>2]=c[d>>2];c[c[f>>2]>>2]=c[e>>2];c[(c[f>>2]|0)+8>>2]=0;c[g>>2]=0;while(1){d=c[k>>2]|0;if((c[g>>2]|0)>>>0>=(c[j>>2]|0)>>>0)break;c[h>>2]=d+((c[g>>2]|0)*24|0);d=(c[h>>2]|0)+16|0;d=Wy(c[d>>2]|0,c[d+4>>2]|0,20)|0;if(d&4){e=c[f>>2]|0;b=c[h>>2]|0;a=(c[h>>2]|0)+16|0;d=(c[h>>2]|0)+16|0;d=Wy(c[d>>2]|0,c[d+4>>2]|0,52)|0;Cc(e,b,a,d)}c[g>>2]=(c[g>>2]|0)+1}yb(d);i=l;return}function Cc(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;n=i;i=i+32|0;f=n+28|0;g=n+24|0;h=n+20|0;j=n+16|0;m=n+12|0;l=n+8|0;o=n+4|0;k=n;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[o>>2]=Dc(c[f>>2]|0,c[g>>2]|0)|0;c[k>>2]=Ec(c[f>>2]|0,c[g>>2]|0)|0;b=(c[f>>2]|0)+32|0;c[b>>2]=(c[b>>2]|0)+1;c[l>>2]=c[o>>2];while(1){b=(c[f>>2]|0)+28|0;c[b>>2]=(c[b>>2]|0)+1;c[m>>2]=(c[c[f>>2]>>2]|0)+((c[l>>2]|0)*24|0);b=(c[m>>2]|0)+16|0;b=Wy(c[b>>2]|0,c[b+4>>2]|0,20)|0;if(!(b&4))break;c[l>>2]=Fc(c[l>>2]|0,c[k>>2]|0,c[(c[f>>2]|0)+4>>2]|0)|0}Gc(c[f>>2]|0,c[g>>2]|0,c[h>>2]|0,c[j>>2]|0,c[m>>2]|0);i=n;return}function Dc(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;e=i;i=i+16|0;f=e+8|0;g=e+4|0;d=e;c[f>>2]=a;c[g>>2]=b;c[d>>2]=((c[c[g>>2]>>2]|0)>>>0)%((c[(c[f>>2]|0)+4>>2]|0)>>>0)|0;i=e;return c[d>>2]|0}function Ec(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;e=i;i=i+16|0;f=e+8|0;g=e+4|0;d=e;c[f>>2]=a;c[g>>2]=b;c[d>>2]=1+(((c[(c[g>>2]|0)+4>>2]|0)>>>0)%(((c[(c[f>>2]|0)+4>>2]|0)-1|0)>>>0)|0);i=e;return c[d>>2]|0}function Fc(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;g=i;i=i+16|0;j=g+12|0;h=g+8|0;e=g+4|0;f=g;c[j>>2]=a;c[h>>2]=b;c[e>>2]=d;c[f>>2]=(c[j>>2]|0)+(c[h>>2]|0);a=c[f>>2]|0;i=g;return ((c[f>>2]|0)>>>0>=(c[e>>2]|0)>>>0?a-(c[e>>2]|0)|0:a)|0}function Gc(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0;j=i;i=i+32|0;m=j+16|0;g=j+12|0;l=j+8|0;k=j+4|0;h=j;c[m>>2]=a;c[g>>2]=b;c[l>>2]=d;c[k>>2]=e;c[h>>2]=f;d=(c[m>>2]|0)+24|0;c[d>>2]=(c[d>>2]|0)+1;d=(c[m>>2]|0)+8|0;c[d>>2]=(c[d>>2]|0)+1;d=(c[h>>2]|0)+16|0;a=d;f=c[a+4>>2]&-1048576|c[(c[l>>2]|0)+4>>2]&1048575;c[d>>2]=c[a>>2];c[d+4>>2]=f;d=(c[h>>2]|0)+16|0;f=d;a=c[f+4>>2]|0;c[d>>2]=c[f>>2]&-1048576|c[c[l>>2]>>2]&1048575;c[d+4>>2]=a;d=c[l>>2]|0;d=Wy(c[d>>2]|0,c[d+4>>2]|0,23)|0;a=(c[h>>2]|0)+16|0;f=a;b=c[f>>2]|0;f=c[f+4>>2]|0;d=Xy(d&511|0,0,23)|0;c[a>>2]=b&8388607|d;c[a+4>>2]=f|C;a=(c[h>>2]|0)+16|0;f=a;f=Wy(c[f>>2]|0,c[f+4>>2]|0,20)|0;d=a;b=c[d>>2]|0;d=c[d+4>>2]|0;f=Xy(f&7|6|0,0,20)|0;c[a>>2]=b&-7340033|f;c[a+4>>2]=d|C;a=(c[h>>2]|0)+16|0;d=a;f=c[d>>2]|0;d=c[d+4>>2]|0;b=Xy(c[k>>2]&4095|0,0,52)|0;c[a>>2]=f|b;c[a+4>>2]=d&1048575|C;a=(c[h>>2]|0)+16|0;a=Wy(c[a>>2]|0,c[a+4>>2]|0,52)|0;if((a|0)==(c[k>>2]|0)){k=c[g>>2]|0;l=c[h>>2]|0;Hc(k,l);i=j;return}zb(19497,261,19520);k=c[g>>2]|0;l=c[h>>2]|0;Hc(k,l);i=j;return}function Hc(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;f=d+4|0;e=d;c[f>>2]=a;c[e>>2]=b;c[c[e>>2]>>2]=c[c[f>>2]>>2];c[(c[e>>2]|0)+4>>2]=c[(c[f>>2]|0)+4>>2];c[(c[e>>2]|0)+8>>2]=c[(c[f>>2]|0)+8>>2];c[(c[e>>2]|0)+12>>2]=c[(c[f>>2]|0)+12>>2];i=d;return}function Ic(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;h=i;i=i+16|0;d=h+12|0;e=h+8|0;g=h+4|0;f=h;c[d>>2]=a;c[e>>2]=b;if(!(c[e>>2]|0)){i=h;return}Ad(c[e>>2]|0);if((c[(c[d>>2]|0)+28>>2]|0)>>>0>=(c[(c[d>>2]|0)+32>>2]|0)>>>0)bd(c[d>>2]|0);c[g>>2]=(c[(c[d>>2]|0)+24>>2]|0)+((c[(c[d>>2]|0)+28>>2]|0)*20|0);c[c[g>>2]>>2]=c[e>>2];c[(c[g>>2]|0)+4>>2]=c[(c[d>>2]|0)+36>>2];b=(c[d>>2]|0)+40|0;a=c[b>>2]|0;c[b>>2]=a+1;c[(c[g>>2]|0)+12>>2]=a;a=Sb(c[(c[g>>2]|0)+4>>2]|0)|0;c[(c[g>>2]|0)+8>>2]=a;c[f>>2]=c[c[c[e>>2]>>2]>>2];c[(c[g>>2]|0)+16>>2]=c[(c[d>>2]|0)+44+(c[f>>2]<<2)>>2];c[(c[d>>2]|0)+44+(c[f>>2]<<2)>>2]=c[(c[d>>2]|0)+28>>2];f=(c[d>>2]|0)+28|0;c[f>>2]=(c[f>>2]|0)+1;i=h;return}function Jc(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0;q=i;i=i+144|0;d=q;j=q+132|0;k=q+128|0;l=q+124|0;o=q+120|0;n=q+36|0;p=q+32|0;m=q+24|0;g=q+16|0;e=q+12|0;f=q+8|0;c[k>>2]=a;c[l>>2]=b;if(c[(c[k>>2]|0)+164+4>>2]&2){a=(c[k>>2]|0)+164|0;r=a;b=c[r+4>>2]|0;c[a>>2]=c[r>>2]&8388607;c[a+4>>2]=b}c[o>>2]=0;a=c[k>>2]|0;if(c[(c[k>>2]|0)+20>>2]|0){a=jb[c[a+20>>2]&15](c[(c[k>>2]|0)+76>>2]|0,c[l>>2]|0)|0;c[(c[k>>2]|0)+76>>2]=a}else a=c[a+76>>2]|0;a:do if((a|0)!=2){c[(c[k>>2]|0)+192>>2]=0;a=(c[k>>2]|0)+224|0;c[a>>2]=(c[a>>2]|0)+1;Wc(n,c[l>>2]|0,c[k>>2]|0);a=(c[k>>2]|0)+164|0;c[m>>2]=c[a>>2];c[m+4>>2]=c[a+4>>2];do if((c[(c[k>>2]|0)+76>>2]|0)!=4){b=Pc(c[k>>2]|0,n,m)|0;c[g>>2]=b;a=c[k>>2]|0;if(!b){if(!(c[a+16>>2]|0)){h=23;break}Ua[c[(c[k>>2]|0)+16>>2]&511](c[l>>2]|0);h=23;break}c[f>>2]=c[a+76>>2];if((c[(c[k>>2]|0)+12>>2]|0)!=0?(b=c[(c[k>>2]|0)+12>>2]|0,a=c[l>>2]|0,r=(c[g>>2]|0)+16|0,c[d>>2]=c[r>>2],c[d+4>>2]=c[r+4>>2],(jb[b&15](a,d)|0)==0):0){h=23;break}a=(c[g>>2]|0)+16|0;a=Wy(c[a>>2]|0,c[a+4>>2]|0,52)|0;c[p>>2]=a;if((c[p>>2]|0)==4095){if((c[(c[k>>2]|0)+76>>2]|0)==3){h=23;break}c[j>>2]=0;l=c[j>>2]|0;i=q;return l|0}d=(c[g>>2]|0)+16|0;c[m>>2]=c[d>>2];c[m+4>>2]=c[d+4>>2];d=(c[k>>2]|0)+164|0;d=Wy(c[d>>2]|0,c[d+4>>2]|0,20)|0;a=m;a=Wy(c[a>>2]|0,c[a+4>>2]|0,20)|0;b=m;r=c[b>>2]|0;b=c[b+4>>2]|0;d=Xy(a&7|d&1|0,0,20)|0;a=m;c[a>>2]=r&-7340033|d;c[a+4>>2]=b|C;c[(c[k>>2]|0)+76>>2]=1;c[e>>2]=c[(c[(c[k>>2]|0)+24>>2]|0)+((c[p>>2]|0)*20|0)>>2];if((c[c[c[l>>2]>>2]>>2]|0)!=(c[c[c[e>>2]>>2]>>2]|0))break a;c[o>>2]=Xc(c[k>>2]|0,c[l>>2]|0,c[e>>2]|0,m)|0;a=c[k>>2]|0;if(c[(c[k>>2]|0)+20>>2]|0){a=jb[c[a+20>>2]&15](c[(c[k>>2]|0)+76>>2]|0,c[l>>2]|0)|0;c[(c[k>>2]|0)+76>>2]=a}else a=c[a+76>>2]|0;if((a|0)==2)break a;c[g>>2]=0;if(!(c[o>>2]|0))break a;c[(c[k>>2]|0)+76>>2]=c[f>>2]}else h=23;while(0);do if((h|0)==23){if((c[(c[k>>2]|0)+76>>2]|0)==1)break a;a=(c[k>>2]|0)+164|0;c[m>>2]=c[a>>2];c[m+4>>2]=c[a+4>>2];c[o>>2]=Yc(c[k>>2]|0,c[l>>2]|0,p,m)|0;a=c[k>>2]|0;if(c[(c[k>>2]|0)+20>>2]|0){a=jb[c[a+20>>2]&15](c[(c[k>>2]|0)+76>>2]|0,c[l>>2]|0)|0;c[(c[k>>2]|0)+76>>2]=a}else a=c[a+76>>2]|0;if((a|0)==2)break a;if(!(c[(c[k>>2]|0)+192>>2]|0)){a=m;f=c[a+4>>2]|0;g=m;c[g>>2]=c[a>>2]&8388607;c[g+4>>2]=f;break}g=(c[k>>2]|0)+164|0;g=Wy(c[g>>2]|0,c[g+4>>2]|0,23)|0;if(g&511){a=m;a=Wy(c[a>>2]|0,c[a+4>>2]|0,20)|0;f=m;b=c[f>>2]|0;f=c[f+4>>2]|0;a=Xy(a&7|1|0,0,20)|0;g=m;c[g>>2]=b&-7340033|a;c[g+4>>2]=f|C;break}c[j>>2]=0;l=c[j>>2]|0;i=q;return l|0}while(0);if(!((c[(c[k>>2]|0)+76>>2]|0)!=0?(c[(c[k>>2]|0)+76>>2]|0)!=1:0))h=35;do if((h|0)==35){a=c[k>>2]|0;if(c[o>>2]|0){Qc(a,n,m,c[p>>2]|0);Zc(c[k>>2]|0,c[o>>2]|0,c[l>>2]|0,1);break}else{Qc(a,n,m,4095);break}}while(0);c[j>>2]=c[o>>2];l=c[j>>2]|0;i=q;return l|0}while(0);pc(c[o>>2]|0);c[(c[k>>2]|0)+76>>2]=2;c[j>>2]=0;l=c[j>>2]|0;i=q;return l|0}function Kc(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;d=e+4|0;f=e;c[d>>2]=a;c[f>>2]=b;switch(c[f>>2]|0){case 1:{xc((c[d>>2]|0)+80|0);wc((c[d>>2]|0)+80|0);break}case 0:break;default:{i=e;return}}xc((c[d>>2]|0)+120|0);wc((c[d>>2]|0)+120|0);i=e;return}function Lc(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0;o=i;i=i+176|0;m=o+56|0;n=o+16|0;p=o;d=o+172|0;e=o+168|0;f=o+164|0;g=o+160|0;t=o+76|0;h=o+72|0;k=o+68|0;j=o+64|0;l=o+60|0;c[d>>2]=a;c[e>>2]=b;c[g>>2]=(c[d>>2]|0)+80;Nc(t,c[d>>2]|0);a=c[c[e>>2]>>2]|0;b=c[e>>2]|0;s=c[t+4>>2]|0;r=c[t+8>>2]|0;q=c[t+12>>2]|0;c[p>>2]=c[t>>2];c[p+4>>2]=s;c[p+8>>2]=r;c[p+12>>2]=q;eb[a&63](b,19530,p);c[f>>2]=0;while(1){if((c[f>>2]|0)>>>0>=(c[(c[g>>2]|0)+4>>2]|0)>>>0)break;c[h>>2]=(c[c[g>>2]>>2]|0)+((c[f>>2]|0)*24|0);b=(c[h>>2]|0)+16|0;b=Wy(c[b>>2]|0,c[b+4>>2]|0,20)|0;if(b&4){b=(c[h>>2]|0)+16|0;b=Wy(c[b>>2]|0,c[b+4>>2]|0,52)|0;if((b|0)==4095){c[k>>2]=19620;c[j>>2]=0}else{a=c[(c[d>>2]|0)+24>>2]|0;b=(c[h>>2]|0)+16|0;b=Wy(c[b>>2]|0,c[b+4>>2]|0,52)|0;c[l>>2]=a+(b*20|0);c[k>>2]=c[(c[l>>2]|0)+4>>2];c[j>>2]=c[(c[l>>2]|0)+12>>2]}a=c[c[e>>2]>>2]|0;b=c[e>>2]|0;x=c[k>>2]|0;w=c[j>>2]|0;v=c[(c[h>>2]|0)+16>>2]&1048575;u=c[(c[h>>2]|0)+16+4>>2]&1048575;t=(c[h>>2]|0)+16|0;t=Wy(c[t>>2]|0,c[t+4>>2]|0,23)|0;s=c[c[h>>2]>>2]|0;r=c[(c[h>>2]|0)+4>>2]|0;q=c[(c[h>>2]|0)+8>>2]|0;p=c[(c[h>>2]|0)+12>>2]|0;c[n>>2]=x;c[n+4>>2]=w;c[n+8>>2]=v;c[n+12>>2]=u;c[n+16>>2]=t&511;c[n+20>>2]=s;c[n+24>>2]=r;c[n+28>>2]=q;c[n+32>>2]=p;eb[a&63](b,19705,n)}c[f>>2]=(c[f>>2]|0)+1}eb[c[c[e>>2]>>2]&63](c[e>>2]|0,19751,m);i=o;return}function Mc(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0;w=i;i=i+336|0;s=w+24|0;r=w+16|0;x=w;t=w+264|0;e=w+260|0;f=w+256|0;g=w+268|0;n=w+240|0;l=w+232|0;q=w+228|0;p=w+224|0;h=w+216|0;m=w+208|0;o=w+204|0;u=w+200|0;v=w+160|0;d=w+72|0;j=w+68|0;k=w+64|0;c[e>>2]=a;c[f>>2]=b;c[u>>2]=(c[e>>2]|0)+80;b=c[c[f>>2]>>2]|0;a=c[f>>2]|0;c[x>>2]=n;c[x+4>>2]=n+4;c[x+8>>2]=n+8;c[x+12>>2]=n+12;if(!(Va[b&63](a,19530,x)|0)){c[t>>2]=0;l=c[t>>2]|0;i=w;return l|0}Nc(d,c[e>>2]|0);if((((c[d>>2]|0)==(c[n>>2]|0)?(c[d+4>>2]|0)==(c[n+4>>2]|0):0)?(c[d+8>>2]|0)==(c[n+8>>2]|0):0)?(c[d+12>>2]|0)==(c[n+12>>2]|0):0){c[k>>2]=c[(c[u>>2]|0)+4>>2];b=v;d=c[u>>2]|0;a=b+40|0;do{c[b>>2]=c[d>>2];b=b+4|0;d=d+4|0}while((b|0)<(a|0));c[v>>2]=wb((c[k>>2]|0)*24|0)|0;c[j>>2]=0;while(1){if((c[j>>2]|0)>>>0>=(c[k>>2]|0)>>>0)break;a=(c[v>>2]|0)+((c[j>>2]|0)*24|0)|0;d=(c[c[u>>2]>>2]|0)+((c[j>>2]|0)*24|0)|0;c[a>>2]=c[d>>2];c[a+4>>2]=c[d+4>>2];c[a+8>>2]=c[d+8>>2];c[a+12>>2]=c[d+12>>2];c[a+16>>2]=c[d+16>>2];c[a+20>>2]=c[d+20>>2];c[j>>2]=(c[j>>2]|0)+1}while(1){if(Va[c[c[f>>2]>>2]&63](c[f>>2]|0,23707,r)|0){a=25;break}j=c[c[f>>2]>>2]|0;k=c[f>>2]|0;c[s>>2]=64;c[s+4>>2]=g;c[s+8>>2]=m;c[s+12>>2]=l;c[s+16>>2]=q;c[s+20>>2]=p;c[s+24>>2]=n;c[s+28>>2]=n+4;c[s+32>>2]=n+8;c[s+36>>2]=n+12;if(!(Va[j&63](k,19576,s)|0)){a=26;break}k=(Gy(g,19620)|0)==0;if(!(k&(c[m>>2]|0)==0)){if(c[p>>2]|0){a=26;break}c[o>>2]=Oc(c[e>>2]|0,g,c[m>>2]|0)|0;if((c[o>>2]|0)==4095){a=26;break}}else c[o>>2]=4095;j=h;d=c[j+4>>2]|0;k=h;c[k>>2]=c[j>>2]&-1048576|c[l>>2]&1048575;c[k+4>>2]=d;k=h;d=c[k+4>>2]&-1048576|c[q>>2]&1048575;j=h;c[j>>2]=c[k>>2];c[j+4>>2]=d;j=h;d=c[j>>2]|0;j=c[j+4>>2]|0;k=Xy(c[p>>2]&511|0,0,23)|0;a=h;c[a>>2]=d&8388607|k;c[a+4>>2]=j|C;a=h;j=c[a+4>>2]|0;k=h;c[k>>2]=c[a>>2]&-7340033|1048576;c[k+4>>2]=j;if((c[h>>2]&1048575|0)!=(c[l>>2]|0))zb(19628,890,19520);if((c[h+4>>2]&1048575|0)!=(c[q>>2]|0))zb(19641,891,19520);k=h;k=Wy(c[k>>2]|0,c[k+4>>2]|0,23)|0;if((k&511|0)!=(c[p>>2]|0))zb(19654,892,19520);if(Pc(c[e>>2]|0,n,h)|0)continue;Qc(c[e>>2]|0,n,h,c[o>>2]|0)}if((a|0)==25){yb(c[v>>2]|0);c[t>>2]=1;l=c[t>>2]|0;i=w;return l|0}else if((a|0)==26){yb(c[c[u>>2]>>2]|0);b=c[u>>2]|0;d=v;a=b+40|0;do{c[b>>2]=c[d>>2];b=b+4|0;d=d+4|0}while((b|0)<(a|0));c[t>>2]=0;l=c[t>>2]|0;i=w;return l|0}}c[t>>2]=0;l=c[t>>2]|0;i=w;return l|0}function Nc(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;j=i;i=i+32|0;d=j+16|0;e=j+12|0;f=j+8|0;h=j+4|0;g=j;c[d>>2]=a;c[e>>2]=b;$b(c[d>>2]|0);_b(c[d>>2]|0,4);c[f>>2]=0;while(1){if((c[f>>2]|0)>>>0>=(c[(c[e>>2]|0)+28>>2]|0)>>>0)break;c[h>>2]=(c[(c[e>>2]|0)+24>>2]|0)+((c[f>>2]|0)*20|0);c[g>>2]=c[c[h>>2]>>2];Yb(c[d>>2]|0,c[(c[h>>2]|0)+12>>2]|0);Xb(c[d>>2]|0,c[(c[h>>2]|0)+4>>2]|0);c[f>>2]=(c[f>>2]|0)+1}bc(c[d>>2]|0);i=j;return}function Oc(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0;n=i;i=i+32|0;k=n+28|0;l=n+24|0;e=n+20|0;f=n+16|0;h=n+12|0;g=n+8|0;m=n+4|0;j=n;c[l>>2]=a;c[e>>2]=b;c[f>>2]=d;c[h>>2]=Sb(c[e>>2]|0)|0;c[g>>2]=0;while(1){if((c[g>>2]|0)>>>0>=(c[(c[l>>2]|0)+28>>2]|0)>>>0){e=8;break}c[m>>2]=(c[(c[l>>2]|0)+24>>2]|0)+((c[g>>2]|0)*20|0);c[j>>2]=c[c[m>>2]>>2];if(((c[(c[m>>2]|0)+12>>2]|0)==(c[f>>2]|0)?(c[(c[m>>2]|0)+8>>2]|0)==(c[h>>2]|0):0)?(Gy(c[(c[m>>2]|0)+4>>2]|0,c[e>>2]|0)|0)==0:0){e=6;break}c[g>>2]=(c[g>>2]|0)+1}if((e|0)==6){c[k>>2]=((c[m>>2]|0)-(c[(c[l>>2]|0)+24>>2]|0)|0)/20|0;l=c[k>>2]|0;i=n;return l|0}else if((e|0)==8){c[k>>2]=4095;l=c[k>>2]|0;i=n;return l|0}return 0}function Pc(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;j=i;i=i+16|0;e=j+12|0;f=j+8|0;g=j+4|0;h=j;c[e>>2]=a;c[f>>2]=b;c[g>>2]=d;c[h>>2]=Vc((c[e>>2]|0)+80|0,c[f>>2]|0,c[g>>2]|0)|0;if(c[h>>2]|0){a=c[h>>2]|0;i=j;return a|0}c[h>>2]=Vc((c[e>>2]|0)+120|0,c[f>>2]|0,c[g>>2]|0)|0;a=c[h>>2]|0;i=j;return a|0}function Qc(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;f=i;i=i+16|0;k=f+12|0;j=f+8|0;h=f+4|0;g=f;c[k>>2]=a;c[j>>2]=b;c[h>>2]=d;c[g>>2]=e;d=c[h>>2]|0;d=Wy(c[d>>2]|0,c[d+4>>2]|0,20)|0;a=c[k>>2]|0;Rc((d&1|0)!=0?a+80|0:a+120|0,c[j>>2]|0,c[h>>2]|0,c[g>>2]|0);i=f;return}function Rc(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;p=i;i=i+48|0;k=p+32|0;l=p+28|0;m=p+24|0;n=p+20|0;g=p+16|0;h=p+12|0;f=p+8|0;o=p+4|0;j=p;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[h>>2]=Dc(c[k>>2]|0,c[l>>2]|0)|0;c[f>>2]=Ec(c[k>>2]|0,c[l>>2]|0)|0;c[o>>2]=0;c[g>>2]=c[h>>2];do{c[j>>2]=(c[c[k>>2]>>2]|0)+((c[g>>2]|0)*24|0);e=(c[k>>2]|0)+28|0;c[e>>2]=(c[e>>2]|0)+1;e=(c[j>>2]|0)+16|0;e=Wy(c[e>>2]|0,c[e+4>>2]|0,20)|0;if(!(e&2))break;e=(c[j>>2]|0)+16|0;e=Wy(c[e>>2]|0,c[e+4>>2]|0,20)|0;if(((e&4|0)!=0?(Sc(c[l>>2]|0,c[j>>2]|0)|0)!=0:0)?(Tc(c[m>>2]|0,c[n>>2]|0,(c[j>>2]|0)+16|0)|0)!=0:0){if(!(c[o>>2]|0))c[o>>2]=c[j>>2];Uc(c[k>>2]|0,c[j>>2]|0)}c[g>>2]=Fc(c[g>>2]|0,c[f>>2]|0,c[(c[k>>2]|0)+4>>2]|0)|0}while((c[g>>2]|0)!=(c[h>>2]|0));f=c[k>>2]|0;if(c[o>>2]|0){Gc(f,c[l>>2]|0,c[m>>2]|0,c[n>>2]|0,c[o>>2]|0);i=p;return}else{yc(f);Cc(c[k>>2]|0,c[l>>2]|0,c[m>>2]|0,c[n>>2]|0);i=p;return}}function Sc(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;f=i;i=i+16|0;d=f+4|0;e=f;c[d>>2]=a;c[e>>2]=b;if((c[c[d>>2]>>2]|0)!=(c[c[e>>2]>>2]|0)){b=0;b=b&1;i=f;return b|0}if((c[(c[d>>2]|0)+4>>2]|0)!=(c[(c[e>>2]|0)+4>>2]|0)){b=0;b=b&1;i=f;return b|0}if((c[(c[d>>2]|0)+8>>2]|0)!=(c[(c[e>>2]|0)+8>>2]|0)){b=0;b=b&1;i=f;return b|0}b=(c[(c[d>>2]|0)+12>>2]|0)==(c[(c[e>>2]|0)+12>>2]|0);b=b&1;i=f;return b|0}function Tc(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;h=i;i=i+16|0;g=h+12|0;e=h+8|0;j=h+4|0;f=h;c[e>>2]=a;c[j>>2]=b;c[f>>2]=d;d=c[e>>2]|0;a=c[d>>2]|0;if((c[j>>2]|0)!=4095){if((c[d+4>>2]&1048575&(c[(c[f>>2]|0)+4>>2]&1048575)|0)==(c[(c[e>>2]|0)+4>>2]&1048575|0))d=(c[c[f>>2]>>2]&1048575&(c[c[e>>2]>>2]&1048575)|0)==(c[c[f>>2]>>2]&1048575|0);else d=0;c[g>>2]=d&1;e=c[g>>2]|0;i=h;return e|0}else{if((a&1048575&(c[c[f>>2]>>2]&1048575)|0)==(c[c[e>>2]>>2]&1048575|0)){e=c[e>>2]|0;e=Wy(c[e>>2]|0,c[e+4>>2]|0,23)|0;d=c[f>>2]|0;d=Wy(c[d>>2]|0,c[d+4>>2]|0,23)|0;d=(e&511|0)<=(d&511|0)}else d=0;c[g>>2]=d&1;e=c[g>>2]|0;i=h;return e|0}return 0}function Uc(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;f=d+4|0;e=d;c[f>>2]=a;c[e>>2]=b;a=(c[f>>2]|0)+8|0;c[a>>2]=(c[a>>2]|0)+-1;a=(c[e>>2]|0)+16|0;e=a;b=c[e+4>>2]|0;c[a>>2]=c[e>>2]&-7340033|2097152;c[a+4>>2]=b;i=d;return}function Vc(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0;n=i;i=i+32|0;e=n+28|0;f=n+24|0;g=n+20|0;k=n+16|0;l=n+12|0;j=n+8|0;h=n+4|0;m=n;c[e>>2]=a;c[f>>2]=b;c[g>>2]=d;c[l>>2]=Dc(c[e>>2]|0,c[f>>2]|0)|0;c[j>>2]=Ec(c[e>>2]|0,c[f>>2]|0)|0;c[h>>2]=0;a=(c[e>>2]|0)+12|0;c[a>>2]=(c[a>>2]|0)+1;c[k>>2]=c[l>>2];do{c[m>>2]=(c[c[e>>2]>>2]|0)+((c[k>>2]|0)*24|0);a=(c[e>>2]|0)+20|0;c[a>>2]=(c[a>>2]|0)+1;a=(c[m>>2]|0)+16|0;a=Wy(c[a>>2]|0,c[a+4>>2]|0,20)|0;if(!(a&2))break;a=(c[m>>2]|0)+16|0;a=Wy(c[a>>2]|0,c[a+4>>2]|0,20)|0;do if(((a&4|0)!=0?(Sc(c[f>>2]|0,c[m>>2]|0)|0)!=0:0)?(d=(c[m>>2]|0)+16|0,a=(c[m>>2]|0)+16|0,a=Wy(c[a>>2]|0,c[a+4>>2]|0,52)|0,(Tc(d,a,c[g>>2]|0)|0)!=0):0){if((c[h>>2]|0)!=0?(c[(c[m>>2]|0)+16+4>>2]&1048575&(c[(c[h>>2]|0)+16+4>>2]&1048575)|0)!=(c[(c[m>>2]|0)+16+4>>2]&1048575|0):0)break;c[h>>2]=c[m>>2]}while(0);c[k>>2]=Fc(c[k>>2]|0,c[j>>2]|0,c[(c[e>>2]|0)+4>>2]|0)|0}while((c[k>>2]|0)!=(c[l>>2]|0));if(!(c[h>>2]|0)){m=c[h>>2]|0;i=n;return m|0}m=(c[e>>2]|0)+16|0;c[m>>2]=(c[m>>2]|0)+1;m=c[h>>2]|0;i=n;return m|0}function Wc(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;f=e+8|0;g=e+4|0;h=e;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;$b(c[f>>2]|0);_b(c[f>>2]|0,4);Yb(c[f>>2]|0,c[(c[h>>2]|0)+160>>2]|0);$a[c[(c[c[g>>2]>>2]|0)+4>>2]&127](c[g>>2]|0,c[f>>2]|0);bc(c[f>>2]|0);i=e;return}function Xc(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0;g=i;i=i+32|0;j=g+28|0;l=g+24|0;m=g+20|0;n=g+16|0;h=g+8|0;k=g+4|0;f=g;c[j>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;a=(c[j>>2]|0)+164|0;c[h>>2]=c[a>>2];c[h+4>>2]=c[a+4>>2];c[k>>2]=c[(c[j>>2]|0)+160>>2];a=(c[j>>2]|0)+164|0;b=c[n>>2]|0;c[a>>2]=c[b>>2];c[a+4>>2]=c[b+4>>2];a=(c[j>>2]|0)+164|0;b=a;e=c[b+4>>2]|0;c[a>>2]=c[b>>2]&8388607;c[a+4>>2]=e;c[f>>2]=Va[c[(c[c[m>>2]>>2]|0)+4>>2]&63](c[m>>2]|0,c[l>>2]|0,c[j>>2]|0)|0;c[(c[j>>2]|0)+160>>2]=c[k>>2];a=(c[j>>2]|0)+164|0;c[a>>2]=c[h>>2];c[a+4>>2]=c[h+4>>2];i=g;return c[f>>2]|0}function Yc(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;p=i;i=i+48|0;f=p+32|0;g=p+28|0;h=p+24|0;j=p+20|0;n=p+16|0;k=p+12|0;l=p+8|0;o=p+4|0;m=p;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[n>>2]=0;c[l>>2]=c[c[j>>2]>>2]&1048575;c[o>>2]=c[(c[j>>2]|0)+4>>2]&1048575;c[m>>2]=~c[o>>2];c[k>>2]=0;while(1){if((c[k>>2]|0)>>>0>=5)break;if((c[l>>2]&(c[o>>2]&~c[11932+(c[k>>2]<<2)>>2])|0)==(c[l>>2]|0))c[o>>2]=c[o>>2]&~c[11932+(c[k>>2]<<2)>>2];if((c[o>>2]|0)!=(c[m>>2]|0)?(c[m>>2]=c[o>>2],b=c[j>>2]|0,a=b,e=c[a+4>>2]|0,b,c[b>>2]=c[a>>2]&-1048576|c[o>>2]&1048575,c[b+4>>2]=e,c[n>>2]=_c(c[f>>2]|0,c[g>>2]|0,c[h>>2]|0,c[j>>2]|0)|0,(c[n>>2]|0)!=0):0)break;c[k>>2]=(c[k>>2]|0)+1}if(c[n>>2]|0){b=c[n>>2]|0;i=p;return b|0}if((c[l>>2]|0)==(c[m>>2]|0)){b=c[n>>2]|0;i=p;return b|0}c[m>>2]=c[l>>2];b=c[j>>2]|0;k=b;m=c[k+4>>2]|0;c[b>>2]=c[k>>2]&-1048576|c[l>>2]&1048575;c[b+4>>2]=m;c[n>>2]=_c(c[f>>2]|0,c[g>>2]|0,c[h>>2]|0,c[j>>2]|0)|0;b=c[n>>2]|0;i=p;return b|0}function Zc(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;k=i;i=i+16|0;f=k+12|0;g=k+8|0;h=k+4|0;j=k;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;if(!(c[(c[f>>2]|0)+4>>2]|0)){i=k;return}ib[c[(c[f>>2]|0)+4>>2]&0](c[f>>2]|0,c[g>>2]|0,c[h>>2]|0,c[j>>2]|0);i=k;return}function _c(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0;t=i;i=i+48|0;q=t+44|0;f=t+40|0;g=t+36|0;j=t+32|0;k=t+28|0;r=t+24|0;m=t+20|0;l=t+16|0;p=t+12|0;o=t+8|0;s=t+4|0;n=t;c[f>>2]=a;c[g>>2]=b;c[j>>2]=d;c[k>>2]=e;c[r>>2]=0;c[m>>2]=1;if($c(c[f>>2]|0,c[g>>2]|0)|0){c[q>>2]=0;b=c[q>>2]|0;i=t;return b|0}c[l>>2]=c[(c[f>>2]|0)+44+(c[c[c[g>>2]>>2]>>2]<<2)>>2];while(1){if((c[l>>2]|0)<0){f=18;break}c[p>>2]=(c[(c[f>>2]|0)+24>>2]|0)+((c[l>>2]|0)*20|0);c[o>>2]=c[c[p>>2]>>2];c[s>>2]=Xc(c[f>>2]|0,c[g>>2]|0,c[o>>2]|0,c[k>>2]|0)|0;if((c[(c[f>>2]|0)+196>>2]|0)!=0?($c(c[f>>2]|0,c[g>>2]|0)|0)!=0:0){f=7;break}if(c[s>>2]|0){c[n>>2]=c[(c[s>>2]|0)+52>>2];do if(c[r>>2]|0){if(c[m>>2]|0){ad(c[f>>2]|0,c[r>>2]|0,c[g>>2]|0);c[m>>2]=0}ad(c[f>>2]|0,c[s>>2]|0,c[g>>2]|0);if(+h[(c[s>>2]|0)+40>>3]<+h[(c[r>>2]|0)+40>>3]){pc(c[r>>2]|0);c[r>>2]=c[s>>2];c[c[j>>2]>>2]=((c[p>>2]|0)-(c[(c[f>>2]|0)+24>>2]|0)|0)/20|0;break}else{pc(c[s>>2]|0);break}}else{c[r>>2]=c[s>>2];c[c[j>>2]>>2]=((c[p>>2]|0)-(c[(c[f>>2]|0)+24>>2]|0)|0)/20|0}while(0);if((c[n>>2]|0)!=0?(c[(c[f>>2]|0)+164+4>>2]&131072|0)!=0:0){f=18;break}}c[l>>2]=c[(c[p>>2]|0)+16>>2]}if((f|0)==7){pc(c[s>>2]|0);pc(c[r>>2]|0);c[q>>2]=0;b=c[q>>2]|0;i=t;return b|0}else if((f|0)==18){c[q>>2]=c[r>>2];b=c[q>>2]|0;i=t;return b|0}return 0}function $c(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,j=0,k=0.0;j=i;i=i+32|0;d=j+16|0;e=j+8|0;f=j+4|0;g=j;c[f>>2]=a;c[g>>2]=b;if(!(c[(c[f>>2]|0)+164+4>>2]&2)){if(c[(c[f>>2]|0)+192>>2]|0){c[e>>2]=1;g=c[e>>2]|0;i=j;return g|0}if(+h[(c[f>>2]|0)+184>>3]>=0.0?(a=c[f>>2]|0,g=c[g>>2]|0,b=(c[f>>2]|0)+172|0,c[d>>2]=c[b>>2],c[d+4>>2]=c[b+4>>2],k=+oe(a,g,d),k>=+h[(c[f>>2]|0)+184>>3]):0){c[(c[f>>2]|0)+192>>2]=1;c[(c[f>>2]|0)+196>>2]=1;c[e>>2]=1;g=c[e>>2]|0;i=j;return g|0}}c[(c[f>>2]|0)+196>>2]=0;c[e>>2]=0;g=c[e>>2]|0;i=j;return g|0}function ad(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0.0;k=i;i=i+32|0;e=k+16|0;f=k+12|0;g=k+8|0;j=k;c[e>>2]=a;c[f>>2]=b;c[g>>2]=d;if(((c[(c[e>>2]|0)+164+4>>2]&2|0)==0?(c[(c[e>>2]|0)+164+4>>2]&1|0)!=0:0)?!(+h[(c[f>>2]|0)+40>>3]==0.0):0){e=c[e>>2]|0;f=c[f>>2]|0;a=c[g>>2]|0;Zc(e,f,a,0);i=k;return}a=(c[e>>2]|0)+200|0;c[a>>2]=(c[a>>2]|0)+1;if((c[(c[e>>2]|0)+164+4>>2]&2|0)==0?(h[j>>3]=+pe(c[e>>2]|0,c[f>>2]|0,c[g>>2]|0),!(+h[j>>3]<0.0)):0){h[(c[f>>2]|0)+40>>3]=+h[j>>3];a=(c[e>>2]|0)+208|0;h[a>>3]=+h[a>>3]+ +h[j>>3];c[(c[e>>2]|0)+196>>2]=1;e=c[e>>2]|0;f=c[f>>2]|0;a=c[g>>2]|0;Zc(e,f,a,0);i=k;return}l=+sc(c[e>>2]|0,c[f>>2]|0,c[g>>2]|0);h[(c[f>>2]|0)+40>>3]=l;a=(c[e>>2]|0)+216|0;h[a>>3]=+h[a>>3]+ +h[(c[f>>2]|0)+40>>3];e=c[e>>2]|0;f=c[f>>2]|0;a=c[g>>2]|0;Zc(e,f,a,0);i=k;return}function bd(a){a=a|0;var b=0,d=0,e=0,f=0,g=0,h=0,j=0;g=i;i=i+32|0;h=g+20|0;e=g+16|0;j=g+12|0;d=g+8|0;f=g+4|0;b=g;c[h>>2]=a;c[e>>2]=c[(c[h>>2]|0)+32>>2];c[j>>2]=1+(c[e>>2]|0)+(((c[e>>2]|0)>>>0)/4|0);c[d>>2]=wb((c[j>>2]|0)*20|0)|0;c[f>>2]=c[(c[h>>2]|0)+24>>2];c[(c[h>>2]|0)+24>>2]=c[d>>2];c[(c[h>>2]|0)+32>>2]=c[j>>2];c[b>>2]=0;while(1){if((c[b>>2]|0)>>>0>=(c[e>>2]|0)>>>0)break;a=(c[d>>2]|0)+((c[b>>2]|0)*20|0)|0;h=(c[f>>2]|0)+((c[b>>2]|0)*20|0)|0;c[a>>2]=c[h>>2];c[a+4>>2]=c[h+4>>2];c[a+8>>2]=c[h+8>>2];c[a+12>>2]=c[h+12>>2];c[a+16>>2]=c[h+16>>2];c[b>>2]=(c[b>>2]|0)+1}yb(c[f>>2]|0);i=g;return}function cd(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;k=i;i=i+32|0;e=k+16|0;f=k+12|0;g=k+8|0;h=k+4|0;j=k;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;if((c[g>>2]|0)>(c[f>>2]|0)){c[e>>2]=cd(c[g>>2]|0,c[f>>2]|0,c[h>>2]|0)|0;e=c[e>>2]|0;i=k;return e|0}c[j>>2]=0;while(1){d=c[j>>2]|0;if(!(c[g>>2]|0))break;a=(d|0)>=((c[h>>2]|0)-(_(c[f>>2]|0,c[g>>2]&1)|0)|0);b=_(c[f>>2]|0,c[g>>2]&1)|0;c[j>>2]=(c[j>>2]|0)+(a?b-(c[h>>2]|0)|0:b);c[g>>2]=c[g>>2]>>1;b=c[f>>2]|0;c[f>>2]=(c[f>>2]|0)+((c[f>>2]|0)>=((c[h>>2]|0)-(c[f>>2]|0)|0)?b-(c[h>>2]|0)|0:b)}c[e>>2]=d;e=c[e>>2]|0;i=k;return e|0}function dd(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;k=i;i=i+32|0;j=k+16|0;e=k+12|0;f=k+8|0;h=k+4|0;g=k;c[e>>2]=a;c[f>>2]=b;c[h>>2]=d;if(!(c[f>>2]|0)){c[j>>2]=1;e=c[j>>2]|0;i=k;return e|0}b=c[e>>2]|0;if(!((c[f>>2]|0)%2|0)){c[g>>2]=dd(b,(c[f>>2]|0)/2|0,c[h>>2]|0)|0;a=c[g>>2]|0;b=c[g>>2]|0;if((c[g>>2]|0)<=(92681-(c[g>>2]|0)|0)){a=_(a,b)|0;a=(a|0)%(c[h>>2]|0)|0}else a=cd(a,b,c[h>>2]|0)|0;c[j>>2]=a;e=c[j>>2]|0;i=k;return e|0}else{d=(b|0)<=(92681-(dd(c[e>>2]|0,(c[f>>2]|0)-1|0,c[h>>2]|0)|0)|0);b=c[e>>2]|0;a=dd(c[e>>2]|0,(c[f>>2]|0)-1|0,c[h>>2]|0)|0;if(d){a=_(b,a)|0;a=(a|0)%(c[h>>2]|0)|0}else a=cd(b,a,c[h>>2]|0)|0;c[j>>2]=a;e=c[j>>2]|0;i=k;return e|0}return 0}function ed(a){a=a|0;var b=0,d=0,e=0,f=0,g=0,h=0,j=0,k=0;k=i;i=i+96|0;b=k+88|0;d=k+84|0;f=k+80|0;e=k+76|0;j=k+72|0;h=k+8|0;g=k;c[d>>2]=a;c[g>>2]=(c[d>>2]|0)-1;if((c[d>>2]|0)==2){c[b>>2]=1;e=c[b>>2]|0;i=k;return e|0}c[j>>2]=nd(c[g>>2]|0,h)|0;c[f>>2]=2;c[e>>2]=0;while(1){a=c[f>>2]|0;if((c[e>>2]|0)>=(c[j>>2]|0))break;if((dd(a,(c[g>>2]|0)/(c[h+(c[e>>2]<<2)>>2]|0)|0,c[d>>2]|0)|0)==1){c[e>>2]=-1;c[f>>2]=(c[f>>2]|0)+1}c[e>>2]=(c[e>>2]|0)+1}c[b>>2]=a;e=c[b>>2]|0;i=k;return e|0}function fd(a){a=a|0;var b=0,d=0,e=0,f=0,g=0,h=0;h=i;i=i+16|0;g=h+8|0;e=h+4|0;f=h;c[e>>2]=a;a=c[e>>2]|0;do if((c[e>>2]|0)>1){if(!((a|0)%2|0)){c[g>>2]=2;break}c[f>>2]=3;while(1){d=_(c[f>>2]|0,c[f>>2]|0)|0;b=c[e>>2]|0;if((d|0)>(c[e>>2]|0)){a=10;break}d=c[f>>2]|0;if(!((b|0)%(c[f>>2]|0)|0)){a=8;break}c[f>>2]=d+2}if((a|0)==8){c[g>>2]=d;break}else if((a|0)==10){c[g>>2]=b;break}}else c[g>>2]=a;while(0);i=h;return c[g>>2]|0}function gd(a){a=a|0;var b=0,d=0;d=i;i=i+16|0;b=d;c[b>>2]=a;if((c[b>>2]|0)<=1){b=0;b=b&1;i=d;return b|0}a=fd(c[b>>2]|0)|0;b=(a|0)==(c[b>>2]|0);b=b&1;i=d;return b|0}function hd(a){a=a|0;var b=0,d=0,e=0;d=i;i=i+16|0;b=d;c[b>>2]=a;while(1){e=(gd(c[b>>2]|0)|0)!=0^1;a=c[b>>2]|0;if(!e)break;c[b>>2]=a+1}i=d;return a|0}function id(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;f=i;i=i+16|0;d=f+4|0;e=f;c[d>>2]=a;c[e>>2]=b;while(1){if(!(c[c[e>>2]>>2]|0))break;while(1){a=c[e>>2]|0;if((c[d>>2]|0)%(c[c[e>>2]>>2]|0)|0)break;c[d>>2]=(c[d>>2]|0)/(c[a>>2]|0)|0}c[e>>2]=a+4}i=f;return (c[d>>2]|0)==1|0}function jd(a){a=a|0;var b=0,d=0,e=0,f=0,g=0;g=i;i=i+16|0;b=g+12|0;d=g+8|0;e=g+4|0;f=g;c[d>>2]=a;if(!(c[d>>2]|0)){c[b>>2]=0;f=c[b>>2]|0;i=g;return f|0}c[e>>2]=c[d>>2];c[f>>2]=1;do{c[e>>2]=((c[e>>2]|0)+(c[f>>2]|0)|0)/2|0;c[f>>2]=(c[d>>2]|0)/(c[e>>2]|0)|0}while((c[e>>2]|0)>(c[f>>2]|0));c[b>>2]=c[e>>2];f=c[b>>2]|0;i=g;return f|0}function kd(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;g=i;i=i+16|0;f=g+8|0;d=g+4|0;e=g;c[d>>2]=a;c[e>>2]=b;if((c[d>>2]|0)>0)if(!((c[e>>2]|0)%(c[d>>2]|0)|0)){c[f>>2]=c[d>>2];a=c[f>>2]|0;i=g;return a|0}else{c[f>>2]=0;a=c[f>>2]|0;i=g;return a|0}if(!(c[d>>2]|0)){c[f>>2]=fd(c[e>>2]|0)|0;a=c[f>>2]|0;i=g;return a|0}c[d>>2]=0-(c[d>>2]|0);if((c[e>>2]|0)>(c[d>>2]|0)?((c[e>>2]|0)%(c[d>>2]|0)|0|0)==0:0)d=od((c[e>>2]|0)/(c[d>>2]|0)|0)|0;else d=0;c[f>>2]=d;a=c[f>>2]|0;i=g;return a|0}function ld(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;g=i;i=i+16|0;d=g+8|0;e=g+4|0;f=g;c[e>>2]=a;c[f>>2]=b;if((c[e>>2]|0)>=0){c[d>>2]=(c[e>>2]|0)%(c[f>>2]|0)|0;a=c[d>>2]|0;i=g;return a|0}else{c[d>>2]=(c[f>>2]|0)-1-((0-((c[e>>2]|0)+1)|0)%(c[f>>2]|0)|0);a=c[d>>2]|0;i=g;return a|0}return 0}function md(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=id(c[d>>2]|0,11952)|0;i=b;return a|0}function nd(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;j=i;i=i+32|0;d=j+16|0;e=j+12|0;f=j+8|0;g=j+4|0;h=j;c[e>>2]=a;c[f>>2]=b;c[h>>2]=0;b=c[h>>2]|0;c[h>>2]=b+1;c[(c[f>>2]|0)+(b<<2)>>2]=2;do c[e>>2]=c[e>>2]>>1;while((c[e>>2]&1|0)==0);if((c[e>>2]|0)==1){c[d>>2]=c[h>>2];e=c[d>>2]|0;i=j;return e|0}c[g>>2]=3;while(1){b=_(c[g>>2]|0,c[g>>2]|0)|0;a=c[e>>2]|0;if((b|0)>(c[e>>2]|0))break;if(!((a|0)%(c[g>>2]|0)|0)){a=c[g>>2]|0;b=c[h>>2]|0;c[h>>2]=b+1;c[(c[f>>2]|0)+(b<<2)>>2]=a;do c[e>>2]=(c[e>>2]|0)/(c[g>>2]|0)|0;while(((c[e>>2]|0)%(c[g>>2]|0)|0|0)!=0^1)}c[g>>2]=(c[g>>2]|0)+2}if((a|0)==1){c[d>>2]=c[h>>2];e=c[d>>2]|0;i=j;return e|0}else{b=c[e>>2]|0;e=c[h>>2]|0;c[h>>2]=e+1;c[(c[f>>2]|0)+(e<<2)>>2]=b;c[d>>2]=c[h>>2];e=c[d>>2]|0;i=j;return e|0}return 0}function od(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;d=e+4|0;b=e;c[d>>2]=a;c[b>>2]=jd(c[d>>2]|0)|0;a=_(c[b>>2]|0,c[b>>2]|0)|0;i=e;return ((a|0)==(c[d>>2]|0)?c[b>>2]|0:0)|0}function pd(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;e=i;i=i+16|0;g=e+8|0;f=e+4|0;d=e;c[g>>2]=a;c[f>>2]=b;c[d>>2]=wb(c[g>>2]|0)|0;c[c[d>>2]>>2]=c[f>>2];i=e;return c[d>>2]|0}function qd(a){a=a|0;var b=0,d=0;d=i;i=i+16|0;b=d;c[b>>2]=a;if(!(c[b>>2]|0)){i=d;return}Ua[c[(c[c[b>>2]>>2]|0)+16>>2]&511](c[b>>2]|0);i=d;return}function rd(){return 11968}function sd(a,b){a=a|0;b=b|0;var d=0,e=0;d=i;i=i+16|0;e=d;c[d+4>>2]=a;c[e>>2]=b;Xb(c[e>>2]|0,19767);i=d;return}function td(a){a=a|0;var b=0;b=i;i=i+16|0;c[b>>2]=a;i=b;return}function ud(a,b){a=a|0;b=b|0;var d=0,e=0;d=i;i=i+16|0;e=d+4|0;c[d+8>>2]=a;c[e>>2]=b;eb[c[c[e>>2]>>2]&63](c[e>>2]|0,19754,d);i=d;return}function vd(a){a=a|0;var b=0;b=i;i=i+16|0;c[b>>2]=a;i=b;return}function wd(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0;g=i;i=i+32|0;n=g+20|0;m=g+16|0;l=g+12|0;k=g+8|0;h=g+4|0;j=g;c[n>>2]=a;c[m>>2]=b;c[l>>2]=d;c[k>>2]=e;c[h>>2]=f;c[j>>2]=wb(24)|0;c[c[j>>2]>>2]=c[n>>2];c[(c[j>>2]|0)+4>>2]=c[m>>2];c[(c[j>>2]|0)+8>>2]=c[l>>2];c[(c[j>>2]|0)+12>>2]=c[k>>2];c[(c[j>>2]|0)+16>>2]=1;c[(c[j>>2]|0)+20>>2]=c[c[h>>2]>>2];c[c[h>>2]>>2]=c[j>>2];i=g;return}function xd(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0;l=i;i=i+32|0;f=l+16|0;g=l+12|0;h=l+8|0;j=l+4|0;k=l;c[g>>2]=a;c[h>>2]=b;c[j>>2]=d;c[k>>2]=e;while(1){if(c[k>>2]|0)if((c[c[k>>2]>>2]|0)==(c[g>>2]|0)?(c[(c[k>>2]|0)+4>>2]|0)==(c[h>>2]|0):0)a=(c[(c[k>>2]|0)+8>>2]|0)!=(c[j>>2]|0);else a=1;else a=0;b=c[k>>2]|0;if(!a)break;c[k>>2]=c[b+20>>2]}if(b){e=(c[k>>2]|0)+16|0;c[e>>2]=(c[e>>2]|0)+1;c[f>>2]=c[(c[k>>2]|0)+12>>2];e=c[f>>2]|0;i=l;return e|0}else{c[f>>2]=0;e=c[f>>2]|0;i=l;return e|0}return 0}function yd(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;h=i;i=i+16|0;d=h+12|0;e=h+8|0;g=h+4|0;f=h;c[d>>2]=a;c[e>>2]=b;if(!(c[d>>2]|0)){i=h;return}c[g>>2]=c[e>>2];while(1){a=c[c[g>>2]>>2]|0;c[f>>2]=a;if(!a)break;if((c[(c[f>>2]|0)+12>>2]|0)==(c[d>>2]|0))break;c[g>>2]=(c[f>>2]|0)+20}if(!(c[f>>2]|0)){i=h;return}e=(c[f>>2]|0)+16|0;a=(c[e>>2]|0)+-1|0;c[e>>2]=a;if((a|0)>0){i=h;return}c[c[g>>2]>>2]=c[(c[f>>2]|0)+20>>2];xb(c[(c[f>>2]|0)+12>>2]|0);xb(c[f>>2]|0);i=h;return}function zd(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;e=i;i=i+16|0;g=e+8|0;f=e+4|0;d=e;c[g>>2]=a;c[f>>2]=b;c[d>>2]=wb(c[g>>2]|0)|0;c[c[d>>2]>>2]=c[f>>2];c[(c[d>>2]|0)+4>>2]=0;i=e;return c[d>>2]|0}function Ad(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=(c[d>>2]|0)+4|0;c[a>>2]=(c[a>>2]|0)+1;i=b;return}function Bd(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;f=d+4|0;e=d;c[f>>2]=a;c[e>>2]=b;$a[c[c[c[f>>2]>>2]>>2]&127](c[f>>2]|0,c[e>>2]|0);i=d;return}function Cd(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;f=i;i=i+16|0;d=f+4|0;e=f;c[d>>2]=a;c[e>>2]=b;while(1){if(!(c[(c[d>>2]|0)+4>>2]|0))break;c[(c[e>>2]|0)+36>>2]=c[(c[d>>2]|0)+4>>2];c[(c[e>>2]|0)+40>>2]=0;Ua[c[c[d>>2]>>2]&511](c[e>>2]|0);c[d>>2]=(c[d>>2]|0)+8}c[(c[e>>2]|0)+36>>2]=0;i=f;return}function Dd(){return ge(0)|0}function Ed(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;f=i;i=i+16|0;j=f+12|0;h=f+8|0;g=f+4|0;e=f;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[e>>2]=ge(1)|0;c[(c[e>>2]|0)+4>>2]=c[j>>2];c[(c[e>>2]|0)+4+4>>2]=c[h>>2];c[(c[e>>2]|0)+4+8>>2]=c[g>>2];i=f;return c[e>>2]|0}function Fd(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;j=i;i=i+32|0;p=j+24|0;o=j+20|0;n=j+16|0;m=j+12|0;l=j+8|0;k=j+4|0;h=j;c[p>>2]=a;c[o>>2]=b;c[n>>2]=d;c[m>>2]=e;c[l>>2]=f;c[k>>2]=g;c[h>>2]=ge(2)|0;c[(c[h>>2]|0)+4>>2]=c[p>>2];c[(c[h>>2]|0)+4+4>>2]=c[o>>2];c[(c[h>>2]|0)+4+8>>2]=c[n>>2];c[(c[h>>2]|0)+4+12>>2]=c[m>>2];c[(c[h>>2]|0)+4+12+4>>2]=c[l>>2];c[(c[h>>2]|0)+4+12+8>>2]=c[k>>2];i=j;return c[h>>2]|0}function Gd(a,b,d,e,f,g,h,j,k){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;var l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0;m=i;i=i+48|0;v=m+36|0;u=m+32|0;t=m+28|0;s=m+24|0;r=m+20|0;q=m+16|0;p=m+12|0;o=m+8|0;n=m+4|0;l=m;c[v>>2]=a;c[u>>2]=b;c[t>>2]=d;c[s>>2]=e;c[r>>2]=f;c[q>>2]=g;c[p>>2]=h;c[o>>2]=j;c[n>>2]=k;c[l>>2]=ge(3)|0;c[(c[l>>2]|0)+4>>2]=c[v>>2];c[(c[l>>2]|0)+4+4>>2]=c[u>>2];c[(c[l>>2]|0)+4+8>>2]=c[t>>2];c[(c[l>>2]|0)+4+12>>2]=c[s>>2];c[(c[l>>2]|0)+4+12+4>>2]=c[r>>2];c[(c[l>>2]|0)+4+12+8>>2]=c[q>>2];c[(c[l>>2]|0)+4+24>>2]=c[p>>2];c[(c[l>>2]|0)+4+24+4>>2]=c[o>>2];c[(c[l>>2]|0)+4+24+8>>2]=c[n>>2];i=m;return c[l>>2]|0}function Hd(a){a=a|0;var b=0,d=0,e=0,f=0,g=0,h=0;h=i;i=i+32|0;b=h+16|0;d=h+12|0;e=h+8|0;f=h+4|0;g=h;c[b>>2]=a;c[e>>2]=0;c[f>>2]=0;c[d>>2]=0;while(1){if((c[d>>2]|0)>=(c[c[b>>2]>>2]|0))break;c[g>>2]=(c[b>>2]|0)+4+((c[d>>2]|0)*12|0);a=(c[c[g>>2]>>2]|0)-1|0;a=_(a,Tb(c[(c[g>>2]|0)+4>>2]|0)|0)|0;c[e>>2]=(c[e>>2]|0)+a;a=(c[c[g>>2]>>2]|0)-1|0;a=_(a,Tb(c[(c[g>>2]|0)+8>>2]|0)|0)|0;c[f>>2]=(c[f>>2]|0)+a;c[d>>2]=(c[d>>2]|0)+1}f=dc(c[e>>2]|0,c[f>>2]|0)|0;i=h;return f|0}function Id(a){a=a|0;var b=0,d=0,e=0,f=0,g=0;g=i;i=i+16|0;b=g+12|0;d=g+8|0;e=g+4|0;f=g;c[d>>2]=a;if(!(c[c[d>>2]>>2]|0)){c[b>>2]=0;e=c[b>>2]|0;i=g;return e|0}c[f>>2]=Tb(c[(c[d>>2]|0)+4+4>>2]|0)|0;c[e>>2]=1;while(1){a=c[f>>2]|0;if((c[e>>2]|0)>=(c[c[d>>2]>>2]|0))break;c[f>>2]=ec(a,Tb(c[(c[d>>2]|0)+4+((c[e>>2]|0)*12|0)+4>>2]|0)|0)|0;c[e>>2]=(c[e>>2]|0)+1}c[b>>2]=a;e=c[b>>2]|0;i=g;return e|0}function Jd(a){a=a|0;var b=0,d=0,e=0,f=0,g=0;g=i;i=i+16|0;b=g+12|0;d=g+8|0;e=g+4|0;f=g;c[d>>2]=a;if(!(c[c[d>>2]>>2]|0)){c[b>>2]=0;e=c[b>>2]|0;i=g;return e|0}c[f>>2]=Tb(c[(c[d>>2]|0)+4+8>>2]|0)|0;c[e>>2]=1;while(1){a=c[f>>2]|0;if((c[e>>2]|0)>=(c[c[d>>2]>>2]|0))break;c[f>>2]=ec(a,Tb(c[(c[d>>2]|0)+4+((c[e>>2]|0)*12|0)+8>>2]|0)|0)|0;c[e>>2]=(c[e>>2]|0)+1}c[b>>2]=a;e=c[b>>2]|0;i=g;return e|0}function Kd(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=Id(c[d>>2]|0)|0;a=ec(a,Jd(c[d>>2]|0)|0)|0;i=b;return a|0}function Ld(a){a=a|0;var b=0,d=0,e=0,f=0,g=0;g=i;i=i+16|0;f=g+12|0;b=g+8|0;d=g+4|0;e=g;c[b>>2]=a;c[d>>2]=0;while(1){if((c[d>>2]|0)>=(c[c[b>>2]>>2]|0)){b=6;break}c[e>>2]=(c[b>>2]|0)+4+((c[d>>2]|0)*12|0);if((c[(c[e>>2]|0)+4>>2]|0)!=(c[(c[e>>2]|0)+8>>2]|0)){b=4;break}c[d>>2]=(c[d>>2]|0)+1}if((b|0)==4){c[f>>2]=0;a=c[f>>2]|0;i=g;return a|0}else if((b|0)==6){c[f>>2]=1;a=c[f>>2]|0;i=g;return a|0}return 0}function Md(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;f=e+4|0;d=e;c[f>>2]=a;c[d>>2]=b;if(!(Ld(c[f>>2]|0)|0)){b=0;b=b&1;i=e;return b|0}b=(Ld(c[d>>2]|0)|0)!=0;b=b&1;i=e;return b|0}function Nd(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;h=i;i=i+16|0;e=h+8|0;f=h+4|0;g=h;c[e>>2]=a;c[f>>2]=b;c[g>>2]=d;if(Od(c[e>>2]|0,c[g>>2]|0)|0){d=1;d=d&1;i=h;return d|0}if(!(Ld(c[e>>2]|0)|0)){d=0;d=d&1;i=h;return d|0}d=(Od(c[f>>2]|0,c[g>>2]|0)|0)!=0;d=d&1;i=h;return d|0}function Od(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;h=i;i=i+16|0;d=h+12|0;e=h+8|0;f=h+4|0;g=h;c[e>>2]=a;c[f>>2]=b;a:do if((c[c[e>>2]>>2]|0)!=2147483647){c[g>>2]=0;while(1){if((c[g>>2]|0)>=(c[c[e>>2]>>2]|0))break a;if((_((c[(c[e>>2]|0)+4+((c[g>>2]|0)*12|0)+8>>2]|0)-(c[(c[e>>2]|0)+4+((c[g>>2]|0)*12|0)+4>>2]|0)|0,(c[f>>2]|0)==1?1:-1)|0)<0)break;c[g>>2]=(c[g>>2]|0)+1}c[d>>2]=1;g=c[d>>2]|0;i=h;return g|0}while(0);c[d>>2]=0;g=c[d>>2]|0;i=h;return g|0}function Pd(a){a=a|0;var b=0,d=0,e=0;d=i;i=i+16|0;e=d+4|0;b=d;c[e>>2]=a;c[b>>2]=ge(c[c[e>>2]>>2]|0)|0;Ud((c[b>>2]|0)+4|0,(c[e>>2]|0)+4|0,c[c[e>>2]>>2]|0);i=d;return c[b>>2]|0}function Qd(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;g=i;i=i+16|0;h=g+12|0;d=g+8|0;f=g+4|0;e=g;c[h>>2]=a;c[d>>2]=b;c[f>>2]=Pd(c[h>>2]|0)|0;if((c[c[f>>2]>>2]|0)==2147483647){a=c[f>>2]|0;i=g;return a|0}a=(c[d>>2]|0)==1;c[e>>2]=0;if(a){while(1){if((c[e>>2]|0)>=(c[c[f>>2]>>2]|0))break;c[(c[f>>2]|0)+4+((c[e>>2]|0)*12|0)+4>>2]=c[(c[f>>2]|0)+4+((c[e>>2]|0)*12|0)+8>>2];c[e>>2]=(c[e>>2]|0)+1}a=c[f>>2]|0;i=g;return a|0}else{while(1){if((c[e>>2]|0)>=(c[c[f>>2]>>2]|0))break;c[(c[f>>2]|0)+4+((c[e>>2]|0)*12|0)+8>>2]=c[(c[f>>2]|0)+4+((c[e>>2]|0)*12|0)+4>>2];c[e>>2]=(c[e>>2]|0)+1}a=c[f>>2]|0;i=g;return a|0}return 0}function Rd(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;e=i;i=i+16|0;g=e+8|0;f=e+4|0;d=e;c[g>>2]=a;c[f>>2]=b;c[d>>2]=ge((c[c[g>>2]>>2]|0)-1|0)|0;Ud((c[d>>2]|0)+4|0,(c[g>>2]|0)+4|0,c[f>>2]|0);Ud((c[d>>2]|0)+4+((c[f>>2]|0)*12|0)|0,(c[g>>2]|0)+4+((c[f>>2]|0)*12|0)+12|0,(c[c[d>>2]>>2]|0)-(c[f>>2]|0)|0);i=e;return c[d>>2]|0}function Sd(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;f=i;i=i+16|0;j=f+12|0;h=f+8|0;g=f+4|0;e=f;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[e>>2]=ge(c[g>>2]|0)|0;Ud((c[e>>2]|0)+4|0,(c[j>>2]|0)+4+((c[h>>2]|0)*12|0)|0,c[g>>2]|0);i=f;return c[e>>2]|0}function Td(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;h=i;i=i+16|0;d=h+12|0;e=h+8|0;f=h+4|0;g=h;c[e>>2]=a;c[f>>2]=b;if((c[c[e>>2]>>2]|0)!=2147483647?(c[c[f>>2]>>2]|0)!=2147483647:0){c[g>>2]=ge((c[c[e>>2]>>2]|0)+(c[c[f>>2]>>2]|0)|0)|0;Ud((c[g>>2]|0)+4|0,(c[e>>2]|0)+4|0,c[c[e>>2]>>2]|0);Ud((c[g>>2]|0)+4+((c[c[e>>2]>>2]|0)*12|0)|0,(c[f>>2]|0)+4|0,c[c[f>>2]>>2]|0);c[d>>2]=c[g>>2];a=c[d>>2]|0;i=h;return a|0}c[d>>2]=ge(2147483647)|0;a=c[d>>2]|0;i=h;return a|0}function Ud(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;j=i;i=i+16|0;e=j+12|0;f=j+8|0;g=j+4|0;h=j;c[e>>2]=a;c[f>>2]=b;c[g>>2]=d;if((c[g>>2]|0)==2147483647){i=j;return}c[h>>2]=0;while(1){if((c[h>>2]|0)>=(c[g>>2]|0))break;a=(c[e>>2]|0)+((c[h>>2]|0)*12|0)|0;d=(c[f>>2]|0)+((c[h>>2]|0)*12|0)|0;c[a>>2]=c[d>>2];c[a+4>>2]=c[d+4>>2];c[a+8>>2]=c[d+8>>2];c[h>>2]=(c[h>>2]|0)+1}i=j;return}function Vd(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0;n=i;i=i+48|0;d=n+32|0;e=n+28|0;f=n+24|0;g=n+20|0;k=n+16|0;j=n+12|0;m=n+8|0;h=n+4|0;l=n;c[e>>2]=a;c[f>>2]=b;c[g>>2]=Tb(c[(c[e>>2]|0)+4>>2]|0)|0;c[k>>2]=Tb(c[(c[f>>2]|0)+4>>2]|0)|0;c[j>>2]=Tb(c[(c[e>>2]|0)+8>>2]|0)|0;c[m>>2]=Tb(c[(c[f>>2]|0)+8>>2]|0)|0;c[h>>2]=ec(c[g>>2]|0,c[j>>2]|0)|0;c[l>>2]=ec(c[k>>2]|0,c[m>>2]|0)|0;if((c[h>>2]|0)!=(c[l>>2]|0)){c[d>>2]=$d((c[l>>2]|0)-(c[h>>2]|0)|0)|0;l=c[d>>2]|0;i=n;return l|0}if((c[k>>2]|0)!=(c[g>>2]|0)){c[d>>2]=$d((c[k>>2]|0)-(c[g>>2]|0)|0)|0;l=c[d>>2]|0;i=n;return l|0}if((c[m>>2]|0)!=(c[j>>2]|0)){c[d>>2]=$d((c[m>>2]|0)-(c[j>>2]|0)|0)|0;l=c[d>>2]|0;i=n;return l|0}else{c[d>>2]=$d((c[c[e>>2]>>2]|0)-(c[c[f>>2]>>2]|0)|0)|0;l=c[d>>2]|0;i=n;return l|0}return 0}function Wd(a){a=a|0;var b=0,d=0,e=0;d=i;i=i+16|0;e=d+4|0;b=d;c[e>>2]=a;c[b>>2]=ae(c[e>>2]|0)|0;be(c[b>>2]|0);i=d;return c[b>>2]|0}function Xd(a){a=a|0;var b=0,d=0,e=0,f=0,g=0,h=0,j=0;j=i;i=i+32|0;d=j+20|0;b=j+16|0;e=j+12|0;f=j+8|0;g=j+4|0;h=j;c[b>>2]=a;if(!(ie(c[b>>2]|0)|0)){c[d>>2]=ge(2147483647)|0;e=c[d>>2]|0;i=j;return e|0}c[g>>2]=ae(c[b>>2]|0)|0;b=c[g>>2]|0;if((c[c[g>>2]>>2]|0)<=1){c[d>>2]=b;e=c[d>>2]|0;i=j;return e|0}Fy(b+4|0,c[c[g>>2]>>2]|0,12,10);c[f>>2]=1;c[e>>2]=1;while(1){if((c[e>>2]|0)>=(c[c[g>>2]>>2]|0))break;if(!(de((c[g>>2]|0)+4+((c[e>>2]|0)*12|0)+-12|0,(c[g>>2]|0)+4+((c[e>>2]|0)*12|0)|0)|0))c[f>>2]=(c[f>>2]|0)+1;c[e>>2]=(c[e>>2]|0)+1}c[h>>2]=ge(c[f>>2]|0)|0;a=(c[h>>2]|0)+4|0;b=(c[g>>2]|0)+4|0;c[a>>2]=c[b>>2];c[a+4>>2]=c[b+4>>2];c[a+8>>2]=c[b+8>>2];c[f>>2]=1;c[e>>2]=1;while(1){b=c[g>>2]|0;if((c[e>>2]|0)>=(c[c[g>>2]>>2]|0))break;if(de(b+4+((c[e>>2]|0)*12|0)+-12|0,(c[g>>2]|0)+4+((c[e>>2]|0)*12|0)|0)|0){a=(c[h>>2]|0)+4+(((c[f>>2]|0)-1|0)*12|0)|0;c[a>>2]=_(c[a>>2]|0,c[(c[g>>2]|0)+4+((c[e>>2]|0)*12|0)>>2]|0)|0;c[(c[h>>2]|0)+4+(((c[f>>2]|0)-1|0)*12|0)+4>>2]=c[(c[g>>2]|0)+4+((c[e>>2]|0)*12|0)+4>>2];c[(c[h>>2]|0)+4+(((c[f>>2]|0)-1|0)*12|0)+8>>2]=c[(c[g>>2]|0)+4+((c[e>>2]|0)*12|0)+8>>2]}else{a=c[f>>2]|0;c[f>>2]=a+1;a=(c[h>>2]|0)+4+(a*12|0)|0;b=(c[g>>2]|0)+4+((c[e>>2]|0)*12|0)|0;c[a>>2]=c[b>>2];c[a+4>>2]=c[b+4>>2];c[a+8>>2]=c[b+8>>2]}c[e>>2]=(c[e>>2]|0)+1}he(b);be(c[h>>2]|0);c[d>>2]=c[h>>2];e=c[d>>2]|0;i=j;return e|0}function Yd(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;f=i;i=i+16|0;j=f+12|0;k=f+8|0;h=f+4|0;g=f;c[j>>2]=a;c[k>>2]=b;c[h>>2]=d;c[g>>2]=e;b=Sd(c[j>>2]|0,0,c[h>>2]|0)|0;c[c[k>>2]>>2]=b;b=Sd(c[j>>2]|0,c[h>>2]|0,(c[c[j>>2]>>2]|0)-(c[h>>2]|0)|0)|0;c[c[g>>2]>>2]=b;i=f;return}function Zd(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;h=i;i=i+16|0;d=h+12|0;e=h+8|0;f=h+4|0;g=h;c[e>>2]=a;c[f>>2]=b;if((c[c[e>>2]>>2]|0)!=(c[c[f>>2]>>2]|0)){c[d>>2]=0;a=c[d>>2]|0;i=h;return a|0}a:do if((c[c[e>>2]>>2]|0)!=2147483647){c[g>>2]=0;while(1){if((c[g>>2]|0)>=(c[c[e>>2]>>2]|0))break a;if((c[(c[e>>2]|0)+4+((c[g>>2]|0)*12|0)>>2]|0)!=(c[(c[f>>2]|0)+4+((c[g>>2]|0)*12|0)>>2]|0))break;if((c[(c[e>>2]|0)+4+((c[g>>2]|0)*12|0)+4>>2]|0)!=(c[(c[f>>2]|0)+4+((c[g>>2]|0)*12|0)+4>>2]|0))break;if((c[(c[e>>2]|0)+4+((c[g>>2]|0)*12|0)+8>>2]|0)!=(c[(c[f>>2]|0)+4+((c[g>>2]|0)*12|0)+8>>2]|0))break;c[g>>2]=(c[g>>2]|0)+1}c[d>>2]=0;a=c[d>>2]|0;i=h;return a|0}while(0);c[d>>2]=1;a=c[d>>2]|0;i=h;return a|0}function _d(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0;e=i;i=i+32|0;m=e+28|0;l=e+24|0;k=e+20|0;j=e+16|0;h=e+12|0;g=e+8|0;f=e+4|0;d=e;c[m>>2]=a;c[l>>2]=b;c[k>>2]=Td(c[m>>2]|0,c[l>>2]|0)|0;c[j>>2]=Qd(c[k>>2]|0,0)|0;c[h>>2]=Qd(c[k>>2]|0,1)|0;c[g>>2]=Xd(c[j>>2]|0)|0;c[f>>2]=Xd(c[h>>2]|0)|0;c[d>>2]=Zd(c[g>>2]|0,c[f>>2]|0)|0;he(c[k>>2]|0);fe(c[j>>2]|0,c[h>>2]|0,c[g>>2]|0,c[f>>2]|0);i=e;return c[d>>2]|0}function $d(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;b=e+4|0;d=e;c[d>>2]=a;do if((c[d>>2]|0)>=0)if(!(c[d>>2]|0)){c[b>>2]=0;break}else{c[b>>2]=1;break}else c[b>>2]=-1;while(0);i=e;return c[b>>2]|0}function ae(a){a=a|0;var b=0,d=0,e=0,f=0,g=0,h=0;g=i;i=i+16|0;b=g+12|0;d=g+8|0;e=g+4|0;f=g;c[b>>2]=a;c[e>>2]=0;c[d>>2]=0;while(1){if((c[d>>2]|0)>=(c[c[b>>2]>>2]|0))break;if((c[(c[b>>2]|0)+4+((c[d>>2]|0)*12|0)>>2]|0)!=1)c[e>>2]=(c[e>>2]|0)+1;c[d>>2]=(c[d>>2]|0)+1}c[f>>2]=ge(c[e>>2]|0)|0;c[e>>2]=0;c[d>>2]=0;while(1){if((c[d>>2]|0)>=(c[c[b>>2]>>2]|0))break;if((c[(c[b>>2]|0)+4+((c[d>>2]|0)*12|0)>>2]|0)!=1){a=c[e>>2]|0;c[e>>2]=a+1;a=(c[f>>2]|0)+4+(a*12|0)|0;h=(c[b>>2]|0)+4+((c[d>>2]|0)*12|0)|0;c[a>>2]=c[h>>2];c[a+4>>2]=c[h+4>>2];c[a+8>>2]=c[h+8>>2]}c[d>>2]=(c[d>>2]|0)+1}i=g;return c[f>>2]|0}function be(a){a=a|0;var b=0,d=0;d=i;i=i+16|0;b=d;c[b>>2]=a;if((c[c[b>>2]>>2]|0)<=1){i=d;return}Fy((c[b>>2]|0)+4|0,c[c[b>>2]>>2]|0,12,11);i=d;return}function ce(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;d=i;i=i+16|0;h=d+12|0;g=d+8|0;e=d+4|0;f=d;c[h>>2]=a;c[g>>2]=b;c[e>>2]=Tb(c[(c[h>>2]|0)+4>>2]|0)|0;c[f>>2]=Tb(c[(c[g>>2]|0)+4>>2]|0)|0;a=$d((c[f>>2]|0)-(c[e>>2]|0)|0)|0;i=d;return a|0}function de(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;f=i;i=i+16|0;d=f+4|0;e=f;c[d>>2]=a;c[e>>2]=b;if((c[(c[d>>2]|0)+4>>2]|0)!=(_(c[(c[e>>2]|0)+4>>2]|0,c[c[e>>2]>>2]|0)|0)){b=0;b=b&1;i=f;return b|0}b=(c[(c[d>>2]|0)+8>>2]|0)==(_(c[(c[e>>2]|0)+8>>2]|0,c[c[e>>2]>>2]|0)|0);b=b&1;i=f;return b|0}function ee(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;f=d+4|0;e=d;c[f>>2]=a;c[e>>2]=b;he(c[f>>2]|0);he(c[e>>2]|0);i=d;return}function fe(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;f=i;i=i+16|0;k=f+12|0;j=f+8|0;h=f+4|0;g=f;c[k>>2]=a;c[j>>2]=b;c[h>>2]=d;c[g>>2]=e;ee(c[k>>2]|0,c[j>>2]|0);ee(c[h>>2]|0,c[g>>2]|0);i=f;return}function ge(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;b=e+4|0;d=e;c[b>>2]=a;if((c[b>>2]|0)!=2147483647&(c[b>>2]|0)>1)c[d>>2]=wb(16+(((c[b>>2]|0)-1|0)*12|0)|0)|0;else c[d>>2]=wb(16)|0;c[c[d>>2]>>2]=c[b>>2];i=e;return c[d>>2]|0}function he(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;xb(c[d>>2]|0);i=b;return}function ie(a){a=a|0;var b=0,d=0,e=0,f=0,g=0;g=i;i=i+16|0;b=g+12|0;d=g+8|0;e=g+4|0;f=g;c[d>>2]=a;c[f>>2]=1;if((c[c[d>>2]>>2]|0)==2147483647){c[b>>2]=0;f=c[b>>2]|0;i=g;return f|0}c[e>>2]=0;while(1){if((c[e>>2]|0)>=(c[c[d>>2]>>2]|0))break;c[f>>2]=_(c[f>>2]|0,c[(c[d>>2]|0)+4+((c[e>>2]|0)*12|0)>>2]|0)|0;c[e>>2]=(c[e>>2]|0)+1}c[b>>2]=c[f>>2];f=c[b>>2]|0;i=g;return f|0}function je(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;h=i;i=i+16|0;d=h+12|0;e=h+8|0;f=h+4|0;g=h;c[d>>2]=a;c[e>>2]=b;Yb(c[d>>2]|0,c[c[e>>2]>>2]|0);if((c[c[e>>2]>>2]|0)==2147483647){i=h;return}c[f>>2]=0;while(1){if((c[f>>2]|0)>=(c[c[e>>2]>>2]|0))break;c[g>>2]=(c[e>>2]|0)+4+((c[f>>2]|0)*12|0);Zb(c[d>>2]|0,c[c[g>>2]>>2]|0);Zb(c[d>>2]|0,c[(c[g>>2]|0)+4>>2]|0);Zb(c[d>>2]|0,c[(c[g>>2]|0)+8>>2]|0);c[f>>2]=(c[f>>2]|0)+1}i=h;return}function ke(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0;l=i;i=i+32|0;f=l+16|0;g=l+12|0;h=l+8|0;j=l+4|0;k=l;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;if((c[c[f>>2]>>2]|0)==1){c[k>>2]=(c[f>>2]|0)+4;c[c[g>>2]>>2]=c[c[k>>2]>>2];c[c[h>>2]>>2]=c[(c[k>>2]|0)+4>>2];c[c[j>>2]>>2]=c[(c[k>>2]|0)+8>>2];i=l;return 1}else{c[c[g>>2]>>2]=1;c[c[j>>2]>>2]=0;c[c[h>>2]>>2]=0;i=l;return 1}return 0}function le(a,b,d,e,f,g,h){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;var j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0;u=i;i=i+48|0;j=u+40|0;k=u+36|0;l=u+32|0;m=u+28|0;n=u+24|0;o=u+20|0;p=u+16|0;q=u+12|0;r=u+8|0;s=u+4|0;t=u;c[j>>2]=a;c[k>>2]=b;c[l>>2]=d;c[m>>2]=e;c[n>>2]=f;c[o>>2]=g;c[p>>2]=h;while(1){c[q>>2]=(c[k>>2]|0)-(c[j>>2]|0);c[r>>2]=(c[m>>2]|0)-(c[l>>2]|0);if((c[q>>2]|0)>=(c[r>>2]|0)?(c[q>>2]|0)>(c[n>>2]|0):0){c[s>>2]=((c[k>>2]|0)+(c[j>>2]|0)|0)/2|0;le(c[j>>2]|0,c[s>>2]|0,c[l>>2]|0,c[m>>2]|0,c[n>>2]|0,c[o>>2]|0,c[p>>2]|0);c[j>>2]=c[s>>2];continue}if((c[r>>2]|0)<=(c[n>>2]|0))break;c[t>>2]=((c[m>>2]|0)+(c[l>>2]|0)|0)/2|0;le(c[j>>2]|0,c[k>>2]|0,c[l>>2]|0,c[t>>2]|0,c[n>>2]|0,c[o>>2]|0,c[p>>2]|0);c[l>>2]=c[t>>2]}Ya[c[o>>2]&63](c[j>>2]|0,c[k>>2]|0,c[l>>2]|0,c[m>>2]|0,c[p>>2]|0);i=u;return}function me(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;f=d+4|0;e=d;c[f>>2]=a;c[e>>2]=b;b=jd(8192/(_(c[f>>2]<<2,c[e>>2]|0)|0)|0)|0;i=d;return b|0}function ne(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;Qa(d|0,0)|0;c[a>>2]=c[d>>2];c[a+4>>2]=c[d+4>>2];i=b;return}function oe(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0.0;j=i;i=i+32|0;k=j+16|0;e=j+12|0;f=j+8|0;g=j;c[e>>2]=a;c[f>>2]=b;c[k>>2]=c[d>>2];c[k+4>>2]=c[d+4>>2];h[g>>3]=+qe(k);if(!(c[(c[e>>2]|0)+8>>2]|0)){l=+h[g>>3];i=j;return +l}h[g>>3]=+ab[c[(c[e>>2]|0)+8>>2]&0](c[f>>2]|0,+h[g>>3],1);l=+h[g>>3];i=j;return +l}function pe(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;q=i;i=i+64|0;k=q+48|0;e=q+44|0;f=q+40|0;g=q+36|0;m=q+32|0;n=q+28|0;p=q+8|0;l=q+24|0;j=q+16|0;o=q;c[e>>2]=a;c[f>>2]=b;c[g>>2]=d;rc(c[f>>2]|0,1);Ua[c[(c[c[g>>2]>>2]|0)+8>>2]&511](c[g>>2]|0);a:while(1){c[m>>2]=1;while(1){if(!(c[m>>2]|0))continue a;h[p>>3]=0.0;c[l>>2]=1;ne(j);c[n>>2]=0;while(1){if((c[n>>2]|0)>=8)break;h[o>>3]=+re(c[f>>2]|0,c[g>>2]|0,c[m>>2]|0);if(c[(c[e>>2]|0)+8>>2]|0)h[o>>3]=+ab[c[(c[e>>2]|0)+8>>2]&0](c[g>>2]|0,+h[o>>3],1);if(+h[o>>3]<0.0)continue a;if(!((c[l>>2]|0)==0?!(+h[o>>3]<+h[p>>3]):0))h[p>>3]=+h[o>>3];c[l>>2]=0;b=c[e>>2]|0;a=c[g>>2]|0;c[k>>2]=c[j>>2];c[k+4>>2]=c[j+4>>2];if(+oe(b,a,k)>2.0)break;c[n>>2]=(c[n>>2]|0)+1}if(+h[p>>3]>=100.0)break a;c[m>>2]=c[m>>2]<<1}}rc(c[f>>2]|0,0);i=q;return +(+h[p>>3]/+(c[m>>2]|0))}function qe(a){a=a|0;var b=0,d=0;d=i;i=i+16|0;b=d;Qa(b|0,0)|0;i=d;return +(+((c[b>>2]|0)-(c[a>>2]|0)|0)+ +((c[b+4>>2]|0)-(c[a+4>>2]|0)|0)*1.0e-06)}function re(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0.0;o=i;i=i+80|0;l=o+64|0;n=o+56|0;e=o+48|0;f=o+44|0;g=o+40|0;k=o+32|0;m=o+24|0;j=o+16|0;p=o+8|0;h=o;c[e>>2]=a;c[f>>2]=b;c[g>>2]=d;se(p);c[k>>2]=c[p>>2];c[k+4>>2]=c[p+4>>2];c[j>>2]=0;while(1){if((c[j>>2]|0)>=(c[g>>2]|0))break;$a[c[c[c[e>>2]>>2]>>2]&127](c[e>>2]|0,c[f>>2]|0);c[j>>2]=(c[j>>2]|0)+1}se(h);c[m>>2]=c[h>>2];c[m+4>>2]=c[h+4>>2];c[n>>2]=c[m>>2];c[n+4>>2]=c[m+4>>2];c[l>>2]=c[k>>2];c[l+4>>2]=c[k+4>>2];q=+te(n,l);i=o;return +q}function se(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;Ca(10,d|0)|0;c[a>>2]=c[d>>2];c[a+4>>2]=c[d+4>>2];i=b;return}function te(a,b){a=a|0;b=b|0;return +((+(c[a>>2]|0)-+(c[b>>2]|0))*1.0e9+(+(c[a+4>>2]|0)-+(c[b+4>>2]|0)))}function ue(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0;y=i;i=i+64|0;s=y+60|0;t=y+56|0;u=y+52|0;v=y+48|0;h=y+44|0;w=y+40|0;x=y+36|0;j=y+32|0;m=y+28|0;p=y+24|0;n=y+20|0;o=y+16|0;q=y+12|0;r=y+8|0;k=y+4|0;l=y;c[s>>2]=a;c[t>>2]=b;c[u>>2]=d;c[v>>2]=e;c[h>>2]=f;switch(c[h>>2]|0){case 1:{c[x>>2]=1;while(1){if((c[x>>2]|0)>=(c[t>>2]|0))break;c[w>>2]=0;while(1){h=c[x>>2]|0;if((c[w>>2]|0)>=(c[x>>2]|0))break;l=_(h,c[u>>2]|0)|0;l=l+(_(c[w>>2]|0,c[v>>2]|0)|0)|0;g[m>>2]=+g[(c[s>>2]|0)+(l<<2)>>2];l=_(c[x>>2]|0,c[v>>2]|0)|0;l=l+(_(c[w>>2]|0,c[u>>2]|0)|0)|0;g[p>>2]=+g[(c[s>>2]|0)+(l<<2)>>2];l=_(c[x>>2]|0,c[v>>2]|0)|0;l=l+(_(c[w>>2]|0,c[u>>2]|0)|0)|0;g[(c[s>>2]|0)+(l<<2)>>2]=+g[m>>2];l=_(c[x>>2]|0,c[u>>2]|0)|0;l=l+(_(c[w>>2]|0,c[v>>2]|0)|0)|0;g[(c[s>>2]|0)+(l<<2)>>2]=+g[p>>2];c[w>>2]=(c[w>>2]|0)+1}c[x>>2]=h+1}i=y;return}case 2:{c[x>>2]=1;while(1){if((c[x>>2]|0)>=(c[t>>2]|0))break;c[w>>2]=0;while(1){h=c[x>>2]|0;if((c[w>>2]|0)>=(c[x>>2]|0))break;m=_(h,c[u>>2]|0)|0;m=m+(_(c[w>>2]|0,c[v>>2]|0)|0)|0;g[n>>2]=+g[(c[s>>2]|0)+(m<<2)>>2];m=_(c[x>>2]|0,c[u>>2]|0)|0;m=m+(_(c[w>>2]|0,c[v>>2]|0)|0)+1|0;g[o>>2]=+g[(c[s>>2]|0)+(m<<2)>>2];m=_(c[x>>2]|0,c[v>>2]|0)|0;m=m+(_(c[w>>2]|0,c[u>>2]|0)|0)|0;g[q>>2]=+g[(c[s>>2]|0)+(m<<2)>>2];m=_(c[x>>2]|0,c[v>>2]|0)|0;m=m+(_(c[w>>2]|0,c[u>>2]|0)|0)+1|0;g[r>>2]=+g[(c[s>>2]|0)+(m<<2)>>2];m=_(c[x>>2]|0,c[v>>2]|0)|0;m=m+(_(c[w>>2]|0,c[u>>2]|0)|0)|0;g[(c[s>>2]|0)+(m<<2)>>2]=+g[n>>2];m=_(c[x>>2]|0,c[v>>2]|0)|0;m=m+(_(c[w>>2]|0,c[u>>2]|0)|0)+1|0;g[(c[s>>2]|0)+(m<<2)>>2]=+g[o>>2];m=_(c[x>>2]|0,c[u>>2]|0)|0;m=m+(_(c[w>>2]|0,c[v>>2]|0)|0)|0;g[(c[s>>2]|0)+(m<<2)>>2]=+g[q>>2];m=_(c[x>>2]|0,c[u>>2]|0)|0;m=m+(_(c[w>>2]|0,c[v>>2]|0)|0)+1|0;g[(c[s>>2]|0)+(m<<2)>>2]=+g[r>>2];c[w>>2]=(c[w>>2]|0)+1}c[x>>2]=h+1}i=y;return}default:{c[x>>2]=1;while(1){if((c[x>>2]|0)>=(c[t>>2]|0))break;c[w>>2]=0;while(1){if((c[w>>2]|0)>=(c[x>>2]|0))break;c[j>>2]=0;while(1){if((c[j>>2]|0)>=(c[h>>2]|0))break;m=_(c[x>>2]|0,c[u>>2]|0)|0;m=m+(_(c[w>>2]|0,c[v>>2]|0)|0)|0;g[k>>2]=+g[(c[s>>2]|0)+(m+(c[j>>2]|0)<<2)>>2];m=_(c[x>>2]|0,c[v>>2]|0)|0;m=m+(_(c[w>>2]|0,c[u>>2]|0)|0)|0;g[l>>2]=+g[(c[s>>2]|0)+(m+(c[j>>2]|0)<<2)>>2];m=_(c[x>>2]|0,c[v>>2]|0)|0;m=m+(_(c[w>>2]|0,c[u>>2]|0)|0)|0;g[(c[s>>2]|0)+(m+(c[j>>2]|0)<<2)>>2]=+g[k>>2];m=_(c[x>>2]|0,c[u>>2]|0)|0;m=m+(_(c[w>>2]|0,c[v>>2]|0)|0)|0;g[(c[s>>2]|0)+(m+(c[j>>2]|0)<<2)>>2]=+g[l>>2];c[j>>2]=(c[j>>2]|0)+1}c[w>>2]=(c[w>>2]|0)+1}c[x>>2]=(c[x>>2]|0)+1}i=y;return}}}function ve(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0;g=i;i=i+48|0;k=g+44|0;j=g+40|0;n=g+36|0;m=g+32|0;l=g+28|0;h=g;c[k>>2]=a;c[j>>2]=b;c[n>>2]=d;c[m>>2]=e;c[l>>2]=f;c[h+4>>2]=c[n>>2];c[h+8>>2]=c[m>>2];c[h+12>>2]=c[l>>2];c[h+16>>2]=me(c[l>>2]|0,2)|0;c[h+24>>2]=0;c[h+20>>2]=0;ye(c[k>>2]|0,c[j>>2]|0,5,h);i=g;return}function we(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0;g=i;i=i+8240|0;k=g+8236|0;j=g+8232|0;n=g+8228|0;m=g+8224|0;l=g+8220|0;h=g+8192|0;c[k>>2]=a;c[j>>2]=b;c[n>>2]=d;c[m>>2]=e;c[l>>2]=f;c[h+4>>2]=c[n>>2];c[h+8>>2]=c[m>>2];c[h+12>>2]=c[l>>2];c[h+16>>2]=me(c[l>>2]|0,2)|0;c[h+20>>2]=g+4096;c[h+24>>2]=g;ye(c[k>>2]|0,c[j>>2]|0,6,h);i=g;return}function xe(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0;B=i;i=i+96|0;t=B+80|0;u=B+76|0;h=B+72|0;v=B+68|0;D=B+64|0;C=B+60|0;w=B+56|0;z=B+52|0;A=B+48|0;k=B+44|0;x=B+40|0;y=B+36|0;j=B+32|0;n=B+28|0;q=B+24|0;o=B+20|0;p=B+16|0;r=B+12|0;s=B+8|0;l=B+4|0;m=B;c[t>>2]=a;c[u>>2]=b;c[h>>2]=d;c[v>>2]=e;c[D>>2]=f;c[C>>2]=c[D>>2];c[w>>2]=c[c[C>>2]>>2];c[z>>2]=c[(c[C>>2]|0)+4>>2];c[A>>2]=c[(c[C>>2]|0)+8>>2];c[k>>2]=c[(c[C>>2]|0)+12>>2];switch(c[k>>2]|0){case 1:{c[y>>2]=c[h>>2];while(1){if((c[y>>2]|0)>=(c[v>>2]|0))break;c[x>>2]=c[t>>2];while(1){h=c[y>>2]|0;if((c[x>>2]|0)>=(c[u>>2]|0))break;s=_(h,c[z>>2]|0)|0;s=s+(_(c[x>>2]|0,c[A>>2]|0)|0)|0;g[n>>2]=+g[(c[w>>2]|0)+(s<<2)>>2];s=_(c[y>>2]|0,c[A>>2]|0)|0;s=s+(_(c[x>>2]|0,c[z>>2]|0)|0)|0;g[q>>2]=+g[(c[w>>2]|0)+(s<<2)>>2];s=_(c[y>>2]|0,c[A>>2]|0)|0;s=s+(_(c[x>>2]|0,c[z>>2]|0)|0)|0;g[(c[w>>2]|0)+(s<<2)>>2]=+g[n>>2];s=_(c[y>>2]|0,c[z>>2]|0)|0;s=s+(_(c[x>>2]|0,c[A>>2]|0)|0)|0;g[(c[w>>2]|0)+(s<<2)>>2]=+g[q>>2];c[x>>2]=(c[x>>2]|0)+1}c[y>>2]=h+1}i=B;return}case 2:{c[y>>2]=c[h>>2];while(1){if((c[y>>2]|0)>=(c[v>>2]|0))break;c[x>>2]=c[t>>2];while(1){h=c[y>>2]|0;if((c[x>>2]|0)>=(c[u>>2]|0))break;q=_(h,c[z>>2]|0)|0;q=q+(_(c[x>>2]|0,c[A>>2]|0)|0)|0;g[o>>2]=+g[(c[w>>2]|0)+(q<<2)>>2];q=_(c[y>>2]|0,c[z>>2]|0)|0;q=q+(_(c[x>>2]|0,c[A>>2]|0)|0)+1|0;g[p>>2]=+g[(c[w>>2]|0)+(q<<2)>>2];q=_(c[y>>2]|0,c[A>>2]|0)|0;q=q+(_(c[x>>2]|0,c[z>>2]|0)|0)|0;g[r>>2]=+g[(c[w>>2]|0)+(q<<2)>>2];q=_(c[y>>2]|0,c[A>>2]|0)|0;q=q+(_(c[x>>2]|0,c[z>>2]|0)|0)+1|0;g[s>>2]=+g[(c[w>>2]|0)+(q<<2)>>2];q=_(c[y>>2]|0,c[A>>2]|0)|0;q=q+(_(c[x>>2]|0,c[z>>2]|0)|0)|0;g[(c[w>>2]|0)+(q<<2)>>2]=+g[o>>2];q=_(c[y>>2]|0,c[A>>2]|0)|0;q=q+(_(c[x>>2]|0,c[z>>2]|0)|0)+1|0;g[(c[w>>2]|0)+(q<<2)>>2]=+g[p>>2];q=_(c[y>>2]|0,c[z>>2]|0)|0;q=q+(_(c[x>>2]|0,c[A>>2]|0)|0)|0;g[(c[w>>2]|0)+(q<<2)>>2]=+g[r>>2];q=_(c[y>>2]|0,c[z>>2]|0)|0;q=q+(_(c[x>>2]|0,c[A>>2]|0)|0)+1|0;g[(c[w>>2]|0)+(q<<2)>>2]=+g[s>>2];c[x>>2]=(c[x>>2]|0)+1}c[y>>2]=h+1}i=B;return}default:{c[y>>2]=c[h>>2];while(1){if((c[y>>2]|0)>=(c[v>>2]|0))break;c[x>>2]=c[t>>2];while(1){if((c[x>>2]|0)>=(c[u>>2]|0))break;c[j>>2]=0;while(1){if((c[j>>2]|0)>=(c[k>>2]|0))break;s=_(c[y>>2]|0,c[z>>2]|0)|0;s=s+(_(c[x>>2]|0,c[A>>2]|0)|0)|0;g[l>>2]=+g[(c[w>>2]|0)+(s+(c[j>>2]|0)<<2)>>2];s=_(c[y>>2]|0,c[A>>2]|0)|0;s=s+(_(c[x>>2]|0,c[z>>2]|0)|0)|0;g[m>>2]=+g[(c[w>>2]|0)+(s+(c[j>>2]|0)<<2)>>2];s=_(c[y>>2]|0,c[A>>2]|0)|0;s=s+(_(c[x>>2]|0,c[z>>2]|0)|0)|0;g[(c[w>>2]|0)+(s+(c[j>>2]|0)<<2)>>2]=+g[l>>2];s=_(c[y>>2]|0,c[z>>2]|0)|0;s=s+(_(c[x>>2]|0,c[A>>2]|0)|0)|0;g[(c[w>>2]|0)+(s+(c[j>>2]|0)<<2)>>2]=+g[m>>2];c[j>>2]=(c[j>>2]|0)+1}c[x>>2]=(c[x>>2]|0)+1}c[y>>2]=(c[y>>2]|0)+1}i=B;return}}}function ye(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0;l=i;i=i+32|0;f=l+16|0;g=l+12|0;h=l+8|0;j=l+4|0;k=l;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;while(1){if((c[g>>2]|0)<=1)break;c[k>>2]=(c[g>>2]|0)/2|0;c[c[j>>2]>>2]=c[f>>2];le(0,c[k>>2]|0,c[k>>2]|0,c[g>>2]|0,c[(c[j>>2]|0)+16>>2]|0,c[h>>2]|0,c[j>>2]|0);ye(c[f>>2]|0,c[k>>2]|0,c[h>>2]|0,c[j>>2]|0);a=_(c[k>>2]|0,(c[(c[j>>2]|0)+4>>2]|0)+(c[(c[j>>2]|0)+8>>2]|0)|0)|0;c[f>>2]=(c[f>>2]|0)+(a<<2);c[g>>2]=(c[g>>2]|0)-(c[k>>2]|0)}i=l;return}function ze(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0;g=i;i=i+32|0;l=g+20|0;m=g+16|0;j=g+12|0;k=g+8|0;n=g+4|0;h=g;c[l>>2]=a;c[m>>2]=b;c[j>>2]=d;c[k>>2]=e;c[n>>2]=f;c[h>>2]=c[n>>2];d=(c[c[h>>2]>>2]|0)+((_(c[l>>2]|0,c[(c[h>>2]|0)+4>>2]|0)|0)<<2)|0;d=d+((_(c[j>>2]|0,c[(c[h>>2]|0)+8>>2]|0)|0)<<2)|0;b=_(c[(c[h>>2]|0)+12>>2]|0,(c[m>>2]|0)-(c[l>>2]|0)|0)|0;Kb(d,c[(c[h>>2]|0)+20>>2]|0,(c[m>>2]|0)-(c[l>>2]|0)|0,c[(c[h>>2]|0)+4>>2]|0,c[(c[h>>2]|0)+12>>2]|0,(c[k>>2]|0)-(c[j>>2]|0)|0,c[(c[h>>2]|0)+8>>2]|0,b,c[(c[h>>2]|0)+12>>2]|0);b=(c[c[h>>2]>>2]|0)+((_(c[l>>2]|0,c[(c[h>>2]|0)+8>>2]|0)|0)<<2)|0;b=b+((_(c[j>>2]|0,c[(c[h>>2]|0)+4>>2]|0)|0)<<2)|0;d=_(c[(c[h>>2]|0)+12>>2]|0,(c[m>>2]|0)-(c[l>>2]|0)|0)|0;Kb(b,c[(c[h>>2]|0)+24>>2]|0,(c[m>>2]|0)-(c[l>>2]|0)|0,c[(c[h>>2]|0)+8>>2]|0,c[(c[h>>2]|0)+12>>2]|0,(c[k>>2]|0)-(c[j>>2]|0)|0,c[(c[h>>2]|0)+4>>2]|0,d,c[(c[h>>2]|0)+12>>2]|0);d=(c[c[h>>2]>>2]|0)+((_(c[l>>2]|0,c[(c[h>>2]|0)+4>>2]|0)|0)<<2)|0;d=d+((_(c[j>>2]|0,c[(c[h>>2]|0)+8>>2]|0)|0)<<2)|0;b=_(c[(c[h>>2]|0)+12>>2]|0,(c[m>>2]|0)-(c[l>>2]|0)|0)|0;Lb(c[(c[h>>2]|0)+24>>2]|0,d,(c[m>>2]|0)-(c[l>>2]|0)|0,c[(c[h>>2]|0)+12>>2]|0,c[(c[h>>2]|0)+4>>2]|0,(c[k>>2]|0)-(c[j>>2]|0)|0,b,c[(c[h>>2]|0)+8>>2]|0,c[(c[h>>2]|0)+12>>2]|0);b=(c[c[h>>2]>>2]|0)+((_(c[l>>2]|0,c[(c[h>>2]|0)+8>>2]|0)|0)<<2)|0;b=b+((_(c[j>>2]|0,c[(c[h>>2]|0)+4>>2]|0)|0)<<2)|0;d=_(c[(c[h>>2]|0)+12>>2]|0,(c[m>>2]|0)-(c[l>>2]|0)|0)|0;Lb(c[(c[h>>2]|0)+20>>2]|0,b,(c[m>>2]|0)-(c[l>>2]|0)|0,c[(c[h>>2]|0)+12>>2]|0,c[(c[h>>2]|0)+8>>2]|0,(c[k>>2]|0)-(c[j>>2]|0)|0,d,c[(c[h>>2]|0)+4>>2]|0,c[(c[h>>2]|0)+12>>2]|0);i=g;return}function Ae(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0;k=i;i=i+32|0;l=k+24|0;d=k+20|0;e=k+16|0;f=k+12|0;g=k+8|0;h=k+4|0;j=k;c[l>>2]=a;c[d>>2]=b;c[h>>2]=wb(36)|0;c[(c[h>>2]|0)+32>>2]=c[d>>2];c[(c[h>>2]|0)+28>>2]=0;c[(c[h>>2]|0)+24>>2]=0;c[c[h>>2]>>2]=0;c[(c[h>>2]|0)+8>>2]=0;switch(c[l>>2]|0){case 1:{c[c[h>>2]>>2]=17;c[(c[h>>2]|0)+4>>2]=18;break}case 2:{c[j>>2]=Ce(c[d>>2]|0)|0;c[(c[h>>2]|0)+12>>2]=c[j>>2];c[(c[h>>2]|0)+16>>2]=1<>2];c[(c[h>>2]|0)+20>>2]=(c[(c[h>>2]|0)+16>>2]|0)-1;c[f>>2]=c[(c[h>>2]|0)+16>>2];c[g>>2]=((c[d>>2]|0)+(c[f>>2]|0)-1|0)/(c[f>>2]|0)|0;b=wb(c[f>>2]<<1<<3)|0;c[(c[h>>2]|0)+24>>2]=b;b=wb(c[g>>2]<<1<<3)|0;c[(c[h>>2]|0)+28>>2]=b;c[e>>2]=0;while(1){if((c[e>>2]|0)>=(c[f>>2]|0))break;De(c[e>>2]|0,c[d>>2]|0,(c[(c[h>>2]|0)+24>>2]|0)+(c[e>>2]<<1<<3)|0);c[e>>2]=(c[e>>2]|0)+1}c[e>>2]=0;while(1){if((c[e>>2]|0)>=(c[g>>2]|0))break;f=_(c[e>>2]|0,c[(c[h>>2]|0)+16>>2]|0)|0;De(f,c[d>>2]|0,(c[(c[h>>2]|0)+28>>2]|0)+(c[e>>2]<<1<<3)|0);c[e>>2]=(c[e>>2]|0)+1}c[(c[h>>2]|0)+4>>2]=15;c[(c[h>>2]|0)+8>>2]=1;break}case 3:{c[(c[h>>2]|0)+4>>2]=16;break}default:{}}if(!(c[c[h>>2]>>2]|0))c[c[h>>2]>>2]=19;if(c[(c[h>>2]|0)+8>>2]|0){f=c[h>>2]|0;i=k;return f|0}c[(c[h>>2]|0)+8>>2]=2;f=c[h>>2]|0;i=k;return f|0}function Be(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;yb(c[(c[d>>2]|0)+24>>2]|0);yb(c[(c[d>>2]|0)+28>>2]|0);xb(c[d>>2]|0);i=b;return}function Ce(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;b=e+4|0;d=e;c[b>>2]=a;c[d>>2]=0;while(1){a=c[d>>2]|0;if((c[b>>2]|0)<=0)break;c[d>>2]=a+1;c[b>>2]=(c[b>>2]|0)/4|0}i=e;return a|0}function De(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0.0;p=i;i=i+64|0;e=p+48|0;f=p+44|0;g=p+40|0;o=p+24|0;j=p+16|0;m=p+8|0;n=p;k=p+36|0;l=p+32|0;c[e>>2]=a;c[f>>2]=b;c[g>>2]=d;c[k>>2]=0;c[l>>2]=c[f>>2];c[f>>2]=(c[f>>2]|0)+(c[f>>2]|0);c[f>>2]=(c[f>>2]|0)+(c[f>>2]|0);c[e>>2]=(c[e>>2]|0)+(c[e>>2]|0);c[e>>2]=(c[e>>2]|0)+(c[e>>2]|0);if((c[e>>2]|0)<0)c[e>>2]=(c[e>>2]|0)+(c[f>>2]|0);if((c[e>>2]|0)>((c[f>>2]|0)-(c[e>>2]|0)|0)){c[e>>2]=(c[f>>2]|0)-(c[e>>2]|0);c[k>>2]=c[k>>2]|4}if(((c[e>>2]|0)-(c[l>>2]|0)|0)>0){c[e>>2]=(c[e>>2]|0)-(c[l>>2]|0);c[k>>2]=c[k>>2]|2}if((c[e>>2]|0)>((c[l>>2]|0)-(c[e>>2]|0)|0)){c[e>>2]=(c[l>>2]|0)-(c[e>>2]|0);c[k>>2]=c[k>>2]|1}h[o>>3]=+(c[e>>2]|0)*6.283185307179586/+(c[f>>2]|0);h[j>>3]=+Q(+(+h[o>>3]));h[m>>3]=+R(+(+h[o>>3]));if(c[k>>2]&1){h[n>>3]=+h[j>>3];h[j>>3]=+h[m>>3];h[m>>3]=+h[n>>3]}if(c[k>>2]&2){h[n>>3]=+h[j>>3];h[j>>3]=-+h[m>>3];h[m>>3]=+h[n>>3]}if(!(c[k>>2]&4)){q=+h[j>>3];l=c[g>>2]|0;h[l>>3]=q;q=+h[m>>3];m=c[g>>2]|0;m=m+8|0;h[m>>3]=q;i=p;return}h[m>>3]=-+h[m>>3];q=+h[j>>3];l=c[g>>2]|0;h[l>>3]=q;q=+h[m>>3];m=c[g>>2]|0;m=m+8|0;h[m>>3]=q;i=p;return}function Ee(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;e=i;i=i+64|0;n=e+48|0;p=e+44|0;f=e+40|0;o=e+36|0;m=e+32|0;k=e+24|0;g=e+16|0;j=e+8|0;l=e;c[n>>2]=a;c[p>>2]=b;c[f>>2]=d;b=_(c[(c[n>>2]|0)+32>>2]|0,(c[p>>2]|0)<0&1)|0;c[p>>2]=(c[p>>2]|0)+b;c[o>>2]=c[p>>2]&c[(c[n>>2]|0)+20>>2];c[m>>2]=c[p>>2]>>c[(c[n>>2]|0)+12>>2];h[k>>3]=+h[(c[(c[n>>2]|0)+24>>2]|0)+(c[o>>2]<<1<<3)>>3];h[g>>3]=+h[(c[(c[n>>2]|0)+24>>2]|0)+((c[o>>2]<<1)+1<<3)>>3];h[j>>3]=+h[(c[(c[n>>2]|0)+28>>2]|0)+(c[m>>2]<<1<<3)>>3];h[l>>3]=+h[(c[(c[n>>2]|0)+28>>2]|0)+((c[m>>2]<<1)+1<<3)>>3];h[c[f>>2]>>3]=+h[j>>3]*+h[k>>3]-+h[l>>3]*+h[g>>3];h[(c[f>>2]|0)+8>>3]=+h[l>>3]*+h[k>>3]+ +h[j>>3]*+h[g>>3];i=e;return}function Fe(a,b,d,e,f){a=a|0;b=b|0;d=+d;e=+e;f=f|0;var j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0;j=i;i=i+80|0;u=j+72|0;w=j+68|0;m=j+64|0;o=j+60|0;k=j+56|0;v=j+52|0;t=j+48|0;r=j+40|0;p=j+32|0;q=j+24|0;s=j+16|0;n=j+8|0;l=j;c[u>>2]=a;c[w>>2]=b;g[m>>2]=d;g[o>>2]=e;c[k>>2]=f;b=_(c[(c[u>>2]|0)+32>>2]|0,(c[w>>2]|0)<0&1)|0;c[w>>2]=(c[w>>2]|0)+b;c[v>>2]=c[w>>2]&c[(c[u>>2]|0)+20>>2];c[t>>2]=c[w>>2]>>c[(c[u>>2]|0)+12>>2];h[r>>3]=+h[(c[(c[u>>2]|0)+24>>2]|0)+(c[v>>2]<<1<<3)>>3];h[p>>3]=+h[(c[(c[u>>2]|0)+24>>2]|0)+((c[v>>2]<<1)+1<<3)>>3];h[q>>3]=+h[(c[(c[u>>2]|0)+28>>2]|0)+(c[t>>2]<<1<<3)>>3];h[s>>3]=+h[(c[(c[u>>2]|0)+28>>2]|0)+((c[t>>2]<<1)+1<<3)>>3];h[n>>3]=+h[q>>3]*+h[r>>3]-+h[s>>3]*+h[p>>3];h[l>>3]=+h[s>>3]*+h[r>>3]+ +h[q>>3]*+h[p>>3];g[c[k>>2]>>2]=+g[m>>2]*+h[n>>3]+ +g[o>>2]*+h[l>>3];g[(c[k>>2]|0)+4>>2]=+g[o>>2]*+h[n>>3]-+g[m>>2]*+h[l>>3];i=j;return}function Ge(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;g=e+8|0;h=e+4|0;f=e;c[g>>2]=a;c[h>>2]=b;c[f>>2]=d;De(c[h>>2]|0,c[(c[g>>2]|0)+32>>2]|0,c[f>>2]|0);i=e;return}function He(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0;e=i;i=i+16|0;f=e;c[e+8>>2]=a;c[e+4>>2]=b;c[f>>2]=d;g[c[f>>2]>>2]=0.0;g[(c[f>>2]|0)+4>>2]=0.0;i=e;return}function Ie(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0;e=i;i=i+16|0;f=e;c[e+8>>2]=a;c[e+4>>2]=b;c[f>>2]=d;h[c[f>>2]>>3]=0.0;h[(c[f>>2]|0)+8>>3]=0.0;i=e;return}function Je(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,j=0,k=0,l=0;e=i;i=i+32|0;l=e+24|0;k=e+20|0;f=e+16|0;j=e;c[l>>2]=a;c[k>>2]=b;c[f>>2]=d;eb[c[(c[l>>2]|0)+4>>2]&63](c[l>>2]|0,c[k>>2]|0,j);g[c[f>>2]>>2]=+h[j>>3];g[(c[f>>2]|0)+4>>2]=+h[j+8>>3];i=e;return}function Ke(a,b,d,e,f){a=a|0;b=b|0;d=+d;e=+e;f=f|0;var j=0,k=0,l=0,m=0,n=0,o=0,p=0;j=i;i=i+48|0;p=j+32|0;o=j+28|0;m=j+24|0;n=j+20|0;k=j+16|0;l=j;c[p>>2]=a;c[o>>2]=b;g[m>>2]=d;g[n>>2]=e;c[k>>2]=f;eb[c[(c[p>>2]|0)+4>>2]&63](c[p>>2]|0,c[o>>2]|0,l);g[c[k>>2]>>2]=+g[m>>2]*+h[l>>3]-+g[n>>2]*(+h[l+8>>3]*-1.0);g[(c[k>>2]|0)+4>>2]=+g[n>>2]*+h[l>>3]+ +g[m>>2]*(+h[l+8>>3]*-1.0);i=j;return}function Le(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;f=d+8|0;e=d+4|0;c[f>>2]=a;c[e>>2]=b;b=Ne(c[f>>2]|0,c[e>>2]|0,d)|0;i=d;return b|0}function Me(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0;o=i;i=i+32|0;h=o+20|0;j=o+16|0;k=o+12|0;l=o+8|0;m=o+4|0;n=o;c[h>>2]=a;c[j>>2]=b;c[k>>2]=d;c[l>>2]=e;c[m>>2]=f;c[n>>2]=g;if(!(c[h>>2]|0)){Oe(c[j>>2]|0);i=o;return}else{Pe(c[h>>2]|0,c[j>>2]|0,c[k>>2]|0,c[l>>2]|0,c[m>>2]|0,c[n>>2]|0);i=o;return}}function Ne(b,e,f){b=b|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0;l=i;i=i+16|0;g=l+12|0;h=l+8|0;j=l+4|0;k=l;c[g>>2]=b;c[h>>2]=e;c[j>>2]=f;c[k>>2]=0;while(1){e=c[h>>2]|0;if((d[c[h>>2]>>0]|0)==3)break;switch(d[e>>0]|0){case 4:{c[k>>2]=(c[k>>2]|0)+((c[g>>2]|0)-1<<1);break}case 5:{c[k>>2]=(c[k>>2]|0)+((c[g>>2]|0)-1);break}case 2:{c[k>>2]=(c[k>>2]|0)+2;break}case 1:case 0:{c[k>>2]=(c[k>>2]|0)+1;break}default:{}}c[h>>2]=(c[h>>2]|0)+4}c[c[j>>2]>>2]=a[e+1>>0];i=l;return c[k>>2]|0}function Oe(a){a=a|0;var b=0,d=0,e=0,f=0,g=0,h=0;g=i;i=i+16|0;b=g+8|0;d=g+4|0;e=g;c[b>>2]=a;c[d>>2]=c[c[b>>2]>>2];h=(c[d>>2]|0)+16|0;a=(c[h>>2]|0)+-1|0;c[h>>2]=a;if(a){i=g;return}c[e>>2]=11996+((Se(c[(c[d>>2]|0)+4>>2]|0,c[(c[d>>2]|0)+8>>2]|0)|0)<<2);while(1){if(!(c[c[e>>2]>>2]|0)){f=7;break}if((c[c[e>>2]>>2]|0)==(c[d>>2]|0))break;c[e>>2]=(c[c[e>>2]>>2]|0)+24}if((f|0)==7){i=g;return}c[c[e>>2]>>2]=c[(c[d>>2]|0)+24>>2];xb(c[c[d>>2]>>2]|0);xb(c[d>>2]|0);c[c[b>>2]>>2]=0;i=g;return}function Pe(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;q=i;i=i+32|0;h=q+28|0;j=q+24|0;k=q+20|0;l=q+16|0;m=q+12|0;n=q+8|0;p=q+4|0;o=q;c[h>>2]=a;c[j>>2]=b;c[k>>2]=d;c[l>>2]=e;c[m>>2]=f;c[n>>2]=g;e=Qe(c[h>>2]|0,c[k>>2]|0,c[l>>2]|0,c[m>>2]|0,c[n>>2]|0)|0;c[p>>2]=e;if(e){g=(c[p>>2]|0)+16|0;c[g>>2]=(c[g>>2]|0)+1;g=c[p>>2]|0;e=c[j>>2]|0;c[e>>2]=g;i=q;return}else{c[p>>2]=wb(32)|0;c[(c[p>>2]|0)+4>>2]=c[l>>2];c[(c[p>>2]|0)+8>>2]=c[m>>2];c[(c[p>>2]|0)+12>>2]=c[n>>2];c[(c[p>>2]|0)+20>>2]=c[k>>2];c[(c[p>>2]|0)+16>>2]=1;c[(c[p>>2]|0)+28>>2]=c[h>>2];g=Re(c[h>>2]|0,c[k>>2]|0,c[l>>2]|0,c[m>>2]|0,c[n>>2]|0)|0;c[c[p>>2]>>2]=g;c[o>>2]=Se(c[l>>2]|0,c[m>>2]|0)|0;c[(c[p>>2]|0)+24>>2]=c[11996+(c[o>>2]<<2)>>2];c[11996+(c[o>>2]<<2)>>2]=c[p>>2];g=c[p>>2]|0;e=c[j>>2]|0;c[e>>2]=g;i=q;return}}function Qe(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0;n=i;i=i+32|0;g=n+20|0;h=n+16|0;j=n+12|0;k=n+8|0;l=n+4|0;m=n;c[g>>2]=a;c[h>>2]=b;c[j>>2]=d;c[k>>2]=e;c[l>>2]=f;c[m>>2]=c[11996+((Se(c[j>>2]|0,c[k>>2]|0)|0)<<2)>>2];while(1){if(!(c[m>>2]|0)){g=5;break}if(!((Te(c[m>>2]|0,c[g>>2]|0,c[h>>2]|0,c[j>>2]|0,c[k>>2]|0,c[l>>2]|0)|0)!=0^1)){g=5;break}c[m>>2]=c[(c[m>>2]|0)+24>>2]}if((g|0)==5){i=n;return c[m>>2]|0}return 0}function Re(e,f,h,j,k){e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;var l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0.0;z=i;i=i+80|0;B=z+68|0;l=z+64|0;m=z+60|0;n=z+56|0;o=z+52|0;A=z+48|0;v=z+44|0;y=z+40|0;p=z+36|0;q=z+32|0;w=z+28|0;x=z+24|0;t=z+20|0;u=z+16|0;r=z+8|0;s=z;c[B>>2]=e;c[l>>2]=f;c[m>>2]=h;c[n>>2]=j;c[o>>2]=k;c[x>>2]=Ae(c[B>>2]|0,c[m>>2]|0)|0;c[w>>2]=c[l>>2];c[A>>2]=Ne(c[n>>2]|0,c[w>>2]|0,y)|0;e=wb((_(c[A>>2]|0,(c[o>>2]|0)/(c[y>>2]|0)|0)|0)<<2)|0;c[p>>2]=e;c[q>>2]=e;c[v>>2]=0;while(1){if((c[v>>2]|0)>=(c[o>>2]|0))break;c[w>>2]=c[l>>2];while(1){if((d[c[w>>2]>>0]|0)==3)break;a:do switch(d[c[w>>2]>>0]|0){case 4:{c[t>>2]=1;while(1){if((c[t>>2]|0)>=(c[n>>2]|0))break a;e=_((c[v>>2]|0)+(a[(c[w>>2]|0)+1>>0]|0)|0,c[t>>2]|0)|0;eb[c[c[x>>2]>>2]&63](c[x>>2]|0,e,c[p>>2]|0);c[p>>2]=(c[p>>2]|0)+8;c[t>>2]=(c[t>>2]|0)+1}}case 5:{c[u>>2]=1;while(1){if(((c[u>>2]|0)+(c[u>>2]|0)|0)>=(c[n>>2]|0))break a;k=c[c[x>>2]>>2]|0;f=c[x>>2]|0;j=c[u>>2]|0;h=(c[v>>2]|0)+(a[(c[w>>2]|0)+1>>0]|0)|0;if((c[u>>2]|0)<=(92681-((c[v>>2]|0)+(a[(c[w>>2]|0)+1>>0]|0))|0)){j=_(j,h)|0;j=(j|0)%(c[m>>2]|0)|0}else j=cd(j,h,c[m>>2]|0)|0;eb[k&63](f,j,c[p>>2]|0);c[p>>2]=(c[p>>2]|0)+8;c[u>>2]=(c[u>>2]|0)+1}}case 0:{e=_((c[v>>2]|0)+(a[(c[w>>2]|0)+1>>0]|0)|0,b[(c[w>>2]|0)+2>>1]|0)|0;eb[c[c[x>>2]>>2]&63](c[x>>2]|0,e,r);C=+g[r>>2];e=c[p>>2]|0;c[p>>2]=e+4;g[e>>2]=C;break}case 1:{e=_((c[v>>2]|0)+(a[(c[w>>2]|0)+1>>0]|0)|0,b[(c[w>>2]|0)+2>>1]|0)|0;eb[c[c[x>>2]>>2]&63](c[x>>2]|0,e,s);C=+g[s+4>>2];e=c[p>>2]|0;c[p>>2]=e+4;g[e>>2]=C;break}case 2:{e=_((c[v>>2]|0)+(a[(c[w>>2]|0)+1>>0]|0)|0,b[(c[w>>2]|0)+2>>1]|0)|0;eb[c[c[x>>2]>>2]&63](c[x>>2]|0,e,c[p>>2]|0);c[p>>2]=(c[p>>2]|0)+8;break}default:{}}while(0);c[w>>2]=(c[w>>2]|0)+4}c[v>>2]=(c[v>>2]|0)+(c[y>>2]|0)}Be(c[x>>2]|0);i=z;return c[q>>2]|0}function Se(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;e=i;i=i+16|0;g=e+8|0;f=e+4|0;d=e;c[g>>2]=a;c[f>>2]=b;c[d>>2]=((c[g>>2]|0)*17|0)+(c[f>>2]|0);if((c[d>>2]|0)<0)c[d>>2]=0-(c[d>>2]|0);i=e;return (c[d>>2]|0)%109|0|0}function Te(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0;n=i;i=i+32|0;h=n+20|0;o=n+16|0;j=n+12|0;k=n+8|0;l=n+4|0;m=n;c[h>>2]=a;c[o>>2]=b;c[j>>2]=d;c[k>>2]=e;c[l>>2]=f;c[m>>2]=g;if((c[o>>2]|0)!=(c[(c[h>>2]|0)+28>>2]|0)){a=0;a=a&1;i=n;return a|0}if((c[k>>2]|0)!=(c[(c[h>>2]|0)+4>>2]|0)){a=0;a=a&1;i=n;return a|0}if((c[l>>2]|0)!=(c[(c[h>>2]|0)+8>>2]|0)){a=0;a=a&1;i=n;return a|0}if((c[m>>2]|0)>(c[(c[h>>2]|0)+12>>2]|0)){a=0;a=a&1;i=n;return a|0}a=(Ue(c[(c[h>>2]|0)+20>>2]|0,c[j>>2]|0)|0)!=0;a=a&1;i=n;return a|0}function Ue(e,f){e=e|0;f=f|0;var g=0,h=0,j=0,k=0;k=i;i=i+16|0;g=k+8|0;h=k+4|0;j=k;c[h>>2]=e;c[j>>2]=f;if((c[h>>2]|0)==(c[j>>2]|0)){c[g>>2]=1;h=c[g>>2]|0;i=k;return h|0}a:while(1){if((d[c[h>>2]>>0]|0)!=(d[c[j>>2]>>0]|0)){e=4;break}switch(d[c[h>>2]>>0]|0){case 3:{e=6;break a}case 5:case 4:{if((a[(c[h>>2]|0)+1>>0]|0)!=(a[(c[j>>2]|0)+1>>0]|0)){e=8;break a}break}default:{if((a[(c[h>>2]|0)+1>>0]|0)!=(a[(c[j>>2]|0)+1>>0]|0)){e=11;break a}if((b[(c[h>>2]|0)+2>>1]|0)!=(b[(c[j>>2]|0)+2>>1]|0)){e=11;break a}}}c[h>>2]=(c[h>>2]|0)+4;c[j>>2]=(c[j>>2]|0)+4}if((e|0)==4){c[g>>2]=0;h=c[g>>2]|0;i=k;return h|0}else if((e|0)==6){c[g>>2]=(a[(c[h>>2]|0)+1>>0]|0)==(a[(c[j>>2]|0)+1>>0]|0)&1;h=c[g>>2]|0;i=k;return h|0}else if((e|0)==8){c[g>>2]=0;h=c[g>>2]|0;i=k;return h|0}else if((e|0)==11){c[g>>2]=0;h=c[g>>2]|0;i=k;return h|0}return 0}function Ve(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,We()|0);i=b;return}function We(){var a=0,b=0;b=i;i=i+16|0;a=b;c[a>>2]=zd(8,12432)|0;i=b;return c[a>>2]|0}function Xe(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;o=i;i=i+48|0;g=o+36|0;q=o+32|0;p=o+28|0;e=o+24|0;m=o+20|0;n=o+16|0;k=o+12|0;l=o+8|0;j=o+4|0;f=o;c[q>>2]=a;c[p>>2]=b;c[e>>2]=d;c[m>>2]=c[p>>2];c[j>>2]=0;c[f>>2]=0;if(!(Ye(c[q>>2]|0,c[p>>2]|0,c[e>>2]|0)|0)){c[g>>2]=0;m=c[g>>2]|0;i=o;return m|0}c[k>>2]=c[(c[(c[m>>2]|0)+4>>2]|0)+4>>2];c[l>>2]=Ze((c[k>>2]<<1)-1|0)|0;c[f>>2]=wb(c[l>>2]<<1<<2)|0;a=c[e>>2]|0;d=Ed(c[l>>2]|0,2,2)|0;e=Ed(1,0,0)|0;c[j>>2]=vc(a,qh(d,e,c[f>>2]|0,(c[f>>2]|0)+4|0,c[f>>2]|0,(c[f>>2]|0)+4|0)|0,8,0,0)|0;e=c[f>>2]|0;if(c[j>>2]|0){xb(e);c[n>>2]=oh(96,12444,7)|0;c[(c[n>>2]|0)+64>>2]=c[k>>2];c[(c[n>>2]|0)+68>>2]=c[l>>2];c[(c[n>>2]|0)+72>>2]=0;c[(c[n>>2]|0)+76>>2]=0;c[(c[n>>2]|0)+80>>2]=c[j>>2];c[(c[n>>2]|0)+84>>2]=c[(c[(c[m>>2]|0)+4>>2]|0)+4+4>>2];c[(c[n>>2]|0)+88>>2]=c[(c[(c[m>>2]|0)+4>>2]|0)+4+8>>2];jc((c[j>>2]|0)+8|0,(c[j>>2]|0)+8|0,(c[n>>2]|0)+8|0);m=(c[n>>2]|0)+8|0;h[m>>3]=+h[m>>3]+ +((c[k>>2]<<2)+(c[l>>2]<<1)|0);m=(c[n>>2]|0)+8+8|0;h[m>>3]=+h[m>>3]+ +((c[k>>2]<<3)+(c[l>>2]<<2)|0);m=(c[n>>2]|0)+8+24|0;h[m>>3]=+h[m>>3]+ +(((c[k>>2]|0)+(c[l>>2]|0)|0)*6|0);c[g>>2]=c[n>>2];m=c[g>>2]|0;i=o;return m|0}else{yb(e);pc(c[j>>2]|0);c[g>>2]=0;m=c[g>>2]|0;i=o;return m|0}return 0}function Ye(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;g=i;i=i+16|0;h=g+8|0;e=g+4|0;f=g;c[g+12>>2]=a;c[h>>2]=b;c[e>>2]=d;c[f>>2]=c[h>>2];if((((c[c[(c[f>>2]|0)+4>>2]>>2]|0)==1?(c[c[(c[f>>2]|0)+8>>2]>>2]|0)==0:0)?(gd(c[(c[(c[f>>2]|0)+4>>2]|0)+4>>2]|0)|0)!=0:0)?(c[(c[(c[f>>2]|0)+4>>2]|0)+4>>2]|0)>16:0)if(c[(c[e>>2]|0)+164>>2]&8)e=(c[(c[(c[f>>2]|0)+4>>2]|0)+4>>2]|0)>24;else e=1;else e=0;i=g;return e&1|0}function Ze(a){a=a|0;var b=0,d=0,e=0;d=i;i=i+16|0;b=d;c[b>>2]=a;while(1){e=(md(c[b>>2]|0)|0)!=0^1;a=c[b>>2]|0;if(!e)break;c[b>>2]=a+1}i=d;return a|0}function _e(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0;J=i;i=i+112|0;K=J+108|0;h=J+104|0;j=J+100|0;k=J+96|0;l=J+92|0;q=J+88|0;r=J+84|0;t=J+80|0;u=J+76|0;s=J+72|0;v=J+68|0;w=J+64|0;m=J+60|0;n=J+56|0;G=J+52|0;D=J+48|0;A=J+44|0;x=J+40|0;o=J+36|0;H=J+32|0;E=J+28|0;B=J+24|0;y=J+20|0;p=J+16|0;F=J+12|0;I=J+8|0;C=J+4|0;z=J;c[K>>2]=a;c[h>>2]=b;c[j>>2]=d;c[k>>2]=e;c[l>>2]=f;c[q>>2]=c[K>>2];c[t>>2]=c[(c[q>>2]|0)+64>>2];c[u>>2]=c[(c[q>>2]|0)+68>>2];c[s>>2]=c[(c[q>>2]|0)+84>>2];c[v>>2]=c[(c[q>>2]|0)+88>>2];c[w>>2]=c[(c[q>>2]|0)+72>>2];c[m>>2]=c[(c[q>>2]|0)+76>>2];c[n>>2]=wb(c[u>>2]<<1<<2)|0;c[r>>2]=0;while(1){if((c[r>>2]|0)>=(c[t>>2]|0))break;e=_(c[r>>2]|0,c[s>>2]|0)|0;g[G>>2]=+g[(c[h>>2]|0)+(e<<2)>>2];e=_(c[r>>2]|0,c[s>>2]|0)|0;g[D>>2]=+g[(c[j>>2]|0)+(e<<2)>>2];g[A>>2]=+g[(c[w>>2]|0)+(c[r>>2]<<1<<2)>>2];g[x>>2]=+g[(c[w>>2]|0)+((c[r>>2]<<1)+1<<2)>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[G>>2]*+g[A>>2]+ +g[D>>2]*+g[x>>2];g[(c[n>>2]|0)+((c[r>>2]<<1)+1<<2)>>2]=+g[D>>2]*+g[A>>2]-+g[G>>2]*+g[x>>2];c[r>>2]=(c[r>>2]|0)+1}while(1){if((c[r>>2]|0)>=(c[u>>2]|0))break;g[(c[n>>2]|0)+((c[r>>2]<<1)+1<<2)>>2]=0.0;g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=0.0;c[r>>2]=(c[r>>2]|0)+1}c[o>>2]=c[(c[q>>2]|0)+80>>2];Ya[c[(c[o>>2]|0)+56>>2]&63](c[(c[q>>2]|0)+80>>2]|0,c[n>>2]|0,(c[n>>2]|0)+4|0,c[n>>2]|0,(c[n>>2]|0)+4|0);c[r>>2]=0;while(1){if((c[r>>2]|0)>=(c[u>>2]|0))break;g[H>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[E>>2]=+g[(c[n>>2]|0)+((c[r>>2]<<1)+1<<2)>>2];g[B>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[y>>2]=+g[(c[m>>2]|0)+((c[r>>2]<<1)+1<<2)>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[E>>2]*+g[B>>2]+ +g[H>>2]*+g[y>>2];g[(c[n>>2]|0)+((c[r>>2]<<1)+1<<2)>>2]=+g[H>>2]*+g[B>>2]-+g[E>>2]*+g[y>>2];c[r>>2]=(c[r>>2]|0)+1}c[p>>2]=c[(c[q>>2]|0)+80>>2];Ya[c[(c[p>>2]|0)+56>>2]&63](c[(c[q>>2]|0)+80>>2]|0,c[n>>2]|0,(c[n>>2]|0)+4|0,c[n>>2]|0,(c[n>>2]|0)+4|0);c[r>>2]=0;while(1){if((c[r>>2]|0)>=(c[t>>2]|0))break;g[F>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[I>>2]=+g[(c[n>>2]|0)+((c[r>>2]<<1)+1<<2)>>2];g[C>>2]=+g[(c[w>>2]|0)+(c[r>>2]<<1<<2)>>2];g[z>>2]=+g[(c[w>>2]|0)+((c[r>>2]<<1)+1<<2)>>2];y=_(c[r>>2]|0,c[v>>2]|0)|0;g[(c[k>>2]|0)+(y<<2)>>2]=+g[I>>2]*+g[C>>2]+ +g[F>>2]*+g[z>>2];y=_(c[r>>2]|0,c[v>>2]|0)|0;g[(c[l>>2]|0)+(y<<2)>>2]=+g[F>>2]*+g[C>>2]-+g[I>>2]*+g[z>>2];c[r>>2]=(c[r>>2]|0)+1}xb(c[n>>2]|0);i=J;return}function $e(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;f=i;i=i+16|0;g=f+8|0;d=f+4|0;e=f;c[g>>2]=a;c[d>>2]=b;c[e>>2]=c[g>>2];rc(c[(c[e>>2]|0)+80>>2]|0,c[d>>2]|0);if(!(c[d>>2]|0)){yb(c[(c[e>>2]|0)+72>>2]|0);c[(c[e>>2]|0)+72>>2]=0;yb(c[(c[e>>2]|0)+76>>2]|0);c[(c[e>>2]|0)+76>>2]=0;i=f;return}else{cf(c[d>>2]|0,c[e>>2]|0);i=f;return}}function af(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;d=i;i=i+32|0;e=d;f=d+20|0;g=d+16|0;h=d+12|0;c[f>>2]=a;c[g>>2]=b;c[h>>2]=c[f>>2];b=c[c[g>>2]>>2]|0;a=c[g>>2]|0;g=c[(c[h>>2]|0)+68>>2]|0;f=c[(c[h>>2]|0)+80>>2]|0;c[e>>2]=c[(c[h>>2]|0)+64>>2];c[e+4>>2]=g;c[e+8>>2]=f;eb[b&63](a,19778,e);i=d;return}function bf(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+80>>2]|0);i=b;return}function cf(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0.0;n=i;i=i+48|0;o=n+32|0;d=n+28|0;h=n+24|0;j=n+20|0;k=n+16|0;m=n+12|0;e=n+8|0;l=n+4|0;f=n;c[o>>2]=a;c[d>>2]=b;c[j>>2]=c[(c[d>>2]|0)+64>>2];c[k>>2]=c[(c[d>>2]|0)+68>>2];g[l>>2]=+(c[k>>2]|0);a=wb(c[j>>2]<<1<<2)|0;c[m>>2]=a;c[(c[d>>2]|0)+72>>2]=a;a=wb(c[k>>2]<<1<<2)|0;c[e>>2]=a;c[(c[d>>2]|0)+76>>2]=a;df(c[o>>2]|0,c[j>>2]|0,c[m>>2]|0);c[h>>2]=0;while(1){if((c[h>>2]|0)>=(c[k>>2]|0))break;g[(c[e>>2]|0)+((c[h>>2]<<1)+1<<2)>>2]=0.0;g[(c[e>>2]|0)+(c[h>>2]<<1<<2)>>2]=0.0;c[h>>2]=(c[h>>2]|0)+1}g[c[e>>2]>>2]=+g[c[m>>2]>>2]/+g[l>>2];g[(c[e>>2]|0)+4>>2]=+g[(c[m>>2]|0)+4>>2]/+g[l>>2];c[h>>2]=1;while(1){if((c[h>>2]|0)>=(c[j>>2]|0))break;p=+g[(c[m>>2]|0)+(c[h>>2]<<1<<2)>>2]/+g[l>>2];g[(c[e>>2]|0)+((c[k>>2]|0)-(c[h>>2]|0)<<1<<2)>>2]=p;g[(c[e>>2]|0)+(c[h>>2]<<1<<2)>>2]=p;p=+g[(c[m>>2]|0)+((c[h>>2]<<1)+1<<2)>>2]/+g[l>>2];g[(c[e>>2]|0)+(((c[k>>2]|0)-(c[h>>2]|0)<<1)+1<<2)>>2]=p;g[(c[e>>2]|0)+((c[h>>2]<<1)+1<<2)>>2]=p;c[h>>2]=(c[h>>2]|0)+1}c[f>>2]=c[(c[d>>2]|0)+80>>2];Ya[c[(c[f>>2]|0)+56>>2]&63](c[(c[d>>2]|0)+80>>2]|0,c[e>>2]|0,(c[e>>2]|0)+4|0,c[e>>2]|0,(c[e>>2]|0)+4|0);i=n;return}function df(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0;l=i;i=i+32|0;m=l+24|0;e=l+20|0;f=l+16|0;g=l+12|0;h=l+8|0;j=l+4|0;k=l;c[m>>2]=a;c[e>>2]=b;c[f>>2]=d;c[j>>2]=c[e>>2]<<1;c[k>>2]=Ae(c[m>>2]|0,c[j>>2]|0)|0;c[h>>2]=0;c[g>>2]=0;while(1){b=c[k>>2]|0;if((c[g>>2]|0)>=(c[e>>2]|0))break;eb[c[b>>2]&63](c[k>>2]|0,c[h>>2]|0,(c[f>>2]|0)+(c[g>>2]<<1<<2)|0);c[h>>2]=(c[h>>2]|0)+((c[g>>2]<<1)+1);while(1){if((c[h>>2]|0)<=(c[j>>2]|0))break;c[h>>2]=(c[h>>2]|0)-(c[j>>2]|0)}c[g>>2]=(c[g>>2]|0)+1}Be(b);i=l;return}function ef(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;b=e+4|0;d=e;c[b>>2]=a;c[d>>2]=0;while(1){if((c[d>>2]|0)>>>0>=2)break;a=c[b>>2]|0;Bd(a,ff(c[d>>2]|0)|0);c[d>>2]=(c[d>>2]|0)+1}i=e;return}function ff(a){a=a|0;var b=0,d=0,e=0;d=i;i=i+16|0;e=d+4|0;b=d;c[e>>2]=a;c[b>>2]=zd(12,12460)|0;c[(c[b>>2]|0)+8>>2]=c[e>>2];i=d;return c[b>>2]|0}function gf(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0;z=i;i=i+128|0;e=z+112|0;B=z+108|0;A=z+104|0;f=z+100|0;v=z+96|0;m=z+92|0;j=z+88|0;k=z+84|0;l=z+80|0;u=z+76|0;h=z+72|0;r=z+68|0;g=z+64|0;q=z+60|0;y=z+56|0;p=z+52|0;t=z+48|0;w=z+44|0;o=z+40|0;n=z+36|0;s=z+32|0;x=z;c[B>>2]=a;c[A>>2]=b;c[f>>2]=d;c[m>>2]=c[B>>2];c[j>>2]=0;c[k>>2]=0;c[l>>2]=0;c[u>>2]=c[A>>2];c[h>>2]=0;c[r>>2]=0;if((((hf(c[m>>2]|0,c[A>>2]|0,c[f>>2]|0)|0)!=0?(c[q>>2]=ie(c[(c[u>>2]|0)+4>>2]|0)|0,ke(c[(c[u>>2]|0)+8>>2]|0,y,p,t)|0,c[r>>2]=Bb(c[q>>2]|0,c[y>>2]|0,c[12472+(c[(c[m>>2]|0)+8>>2]<<2)>>2]|0)|0,c[g>>2]=Cb(c[q>>2]|0,c[y>>2]|0)|0,c[w>>2]=(((c[(c[u>>2]|0)+12>>2]|0)-(c[(c[u>>2]|0)+16>>2]|0)|0)/4|0|0)>0?1:0,c[o>>2]=1-(c[w>>2]|0),c[h>>2]=wb((_(c[r>>2]<<2,c[g>>2]|0)|0)<<1)|0,b=c[f>>2]|0,m=Ed(c[q>>2]|0,c[(c[(c[u>>2]|0)+4>>2]|0)+4+4>>2]|0,2)|0,d=Ed(c[r>>2]|0,c[p>>2]|0,c[g>>2]<<1)|0,d=qh(m,d,c[(c[u>>2]|0)+12>>2]|0,c[(c[u>>2]|0)+16>>2]|0,(c[h>>2]|0)+(c[w>>2]<<2)|0,(c[h>>2]|0)+(c[o>>2]<<2)|0)|0,c[j>>2]=vc(b,d,0,0,(c[(c[u>>2]|0)+12>>2]|0)==(c[(c[u>>2]|0)+20>>2]|0)?4096:0)|0,(c[j>>2]|0)!=0):0)?(m=c[f>>2]|0,b=Dd()|0,d=Fd(c[r>>2]|0,c[g>>2]<<1,c[t>>2]|0,c[q>>2]|0,2,c[(c[(c[u>>2]|0)+4>>2]|0)+4+8>>2]|0)|0,c[k>>2]=uc(m,qh(b,d,(c[h>>2]|0)+(c[w>>2]<<2)|0,(c[h>>2]|0)+(c[o>>2]<<2)|0,c[(c[u>>2]|0)+20>>2]|0,c[(c[u>>2]|0)+24>>2]|0)|0)|0,(c[k>>2]|0)!=0):0)?(xb(c[h>>2]|0),c[h>>2]=0,c[n>>2]=_(c[p>>2]|0,_(c[r>>2]|0,(c[y>>2]|0)/(c[r>>2]|0)|0)|0)|0,c[s>>2]=_(c[t>>2]|0,_(c[r>>2]|0,(c[y>>2]|0)/(c[r>>2]|0)|0)|0)|0,f=c[f>>2]|0,b=Pd(c[(c[u>>2]|0)+4>>2]|0)|0,d=Ed((c[y>>2]|0)%(c[r>>2]|0)|0,c[p>>2]|0,c[t>>2]|0)|0,c[l>>2]=uc(f,qh(b,d,(c[(c[u>>2]|0)+12>>2]|0)+(c[n>>2]<<2)|0,(c[(c[u>>2]|0)+16>>2]|0)+(c[n>>2]<<2)|0,(c[(c[u>>2]|0)+20>>2]|0)+(c[s>>2]<<2)|0,(c[(c[u>>2]|0)+24>>2]|0)+(c[s>>2]<<2)|0)|0)|0,(c[l>>2]|0)!=0):0){c[v>>2]=oh(112,12480,8)|0;c[(c[v>>2]|0)+64>>2]=c[j>>2];c[(c[v>>2]|0)+68>>2]=c[k>>2];c[(c[v>>2]|0)+72>>2]=c[l>>2];c[(c[v>>2]|0)+76>>2]=c[q>>2];c[(c[v>>2]|0)+80>>2]=c[y>>2];d=_(c[p>>2]|0,c[r>>2]|0)|0;c[(c[v>>2]|0)+92>>2]=d;d=_(c[t>>2]|0,c[r>>2]|0)|0;c[(c[v>>2]|0)+96>>2]=d;c[(c[v>>2]|0)+100>>2]=c[w>>2];c[(c[v>>2]|0)+104>>2]=c[o>>2];c[(c[v>>2]|0)+84>>2]=c[r>>2];c[(c[v>>2]|0)+88>>2]=c[g>>2];jc((c[j>>2]|0)+8|0,(c[k>>2]|0)+8|0,x);ic((c[y>>2]|0)/(c[r>>2]|0)|0,x,(c[l>>2]|0)+8|0,(c[v>>2]|0)+8|0);c[e>>2]=c[v>>2];x=c[e>>2]|0;i=z;return x|0}yb(c[h>>2]|0);pc(c[l>>2]|0);pc(c[k>>2]|0);pc(c[j>>2]|0);c[e>>2]=0;x=c[e>>2]|0;i=z;return x|0}function hf(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;k=i;i=i+32|0;e=k+16|0;f=k+12|0;g=k+8|0;h=k+4|0;j=k;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;if(c[(c[h>>2]|0)+164>>2]&1024){c[e>>2]=0;h=c[e>>2]|0;i=k;return h|0}if(!(nf(c[f>>2]|0,c[g>>2]|0,c[h>>2]|0)|0)){c[e>>2]=0;h=c[e>>2]|0;i=k;return h|0}if(c[(c[h>>2]|0)+164>>2]&65536){c[j>>2]=c[g>>2];if((c[(c[j>>2]|0)+12>>2]|0)!=(c[(c[j>>2]|0)+20>>2]|0)){c[e>>2]=0;h=c[e>>2]|0;i=k;return h|0}if(Db(c[(c[(c[j>>2]|0)+4>>2]|0)+4>>2]|0)|0){c[e>>2]=0;h=c[e>>2]|0;i=k;return h|0}}c[e>>2]=1;h=c[e>>2]|0;i=k;return h|0}function jf(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0;x=i;i=i+80|0;y=x+64|0;g=x+60|0;h=x+56|0;j=x+52|0;k=x+48|0;p=x+44|0;t=x+40|0;l=x+36|0;m=x+32|0;n=x+28|0;o=x+24|0;q=x+20|0;w=x+16|0;s=x+12|0;u=x+8|0;v=x+4|0;r=x;c[y>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[k>>2]=f;c[p>>2]=c[y>>2];c[t>>2]=c[(c[p>>2]|0)+84>>2];c[l>>2]=wb((_(c[t>>2]<<2,c[(c[p>>2]|0)+88>>2]|0)|0)<<1)|0;c[m>>2]=c[(c[p>>2]|0)+64>>2];c[n>>2]=c[(c[p>>2]|0)+68>>2];c[w>>2]=c[(c[p>>2]|0)+80>>2];c[s>>2]=c[(c[p>>2]|0)+92>>2];c[u>>2]=c[(c[p>>2]|0)+96>>2];c[v>>2]=c[(c[p>>2]|0)+100>>2];c[r>>2]=c[(c[p>>2]|0)+104>>2];c[q>>2]=c[t>>2];while(1){if((c[q>>2]|0)>(c[w>>2]|0))break;Ya[c[(c[m>>2]|0)+56>>2]&63](c[m>>2]|0,c[g>>2]|0,c[h>>2]|0,(c[l>>2]|0)+(c[v>>2]<<2)|0,(c[l>>2]|0)+(c[r>>2]<<2)|0);c[g>>2]=(c[g>>2]|0)+(c[s>>2]<<2);c[h>>2]=(c[h>>2]|0)+(c[s>>2]<<2);Ya[c[(c[n>>2]|0)+56>>2]&63](c[n>>2]|0,(c[l>>2]|0)+(c[v>>2]<<2)|0,(c[l>>2]|0)+(c[r>>2]<<2)|0,c[j>>2]|0,c[k>>2]|0);c[j>>2]=(c[j>>2]|0)+(c[u>>2]<<2);c[k>>2]=(c[k>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[t>>2]|0)}xb(c[l>>2]|0);c[o>>2]=c[(c[p>>2]|0)+72>>2];Ya[c[(c[o>>2]|0)+56>>2]&63](c[o>>2]|0,c[g>>2]|0,c[h>>2]|0,c[j>>2]|0,c[k>>2]|0);i=x;return}function kf(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+68>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+72>>2]|0,c[e>>2]|0);i=d;return}function lf(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0;d=i;i=i+48|0;e=d;k=d+36|0;l=d+32|0;m=d+28|0;c[k>>2]=a;c[l>>2]=b;c[m>>2]=c[k>>2];b=c[c[l>>2]>>2]|0;a=c[l>>2]|0;l=c[(c[m>>2]|0)+84>>2]|0;k=c[(c[m>>2]|0)+80>>2]|0;j=(c[(c[m>>2]|0)+88>>2]|0)%(c[(c[m>>2]|0)+76>>2]|0)|0;h=c[(c[m>>2]|0)+64>>2]|0;g=c[(c[m>>2]|0)+68>>2]|0;f=c[(c[m>>2]|0)+72>>2]|0;c[e>>2]=c[(c[m>>2]|0)+76>>2];c[e+4>>2]=l;c[e+8>>2]=k;c[e+12>>2]=j;c[e+16>>2]=h;c[e+20>>2]=g;c[e+24>>2]=f;eb[b&63](a,19806,e);i=d;return}function mf(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+72>>2]|0);pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function nf(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;n=i;i=i+48|0;e=n+32|0;f=n+28|0;o=n+24|0;g=n+20|0;l=n+16|0;h=n+12|0;m=n+8|0;j=n+4|0;k=n;c[f>>2]=a;c[o>>2]=b;c[g>>2]=d;c[l>>2]=c[o>>2];c[h>>2]=(c[(c[l>>2]|0)+4>>2]|0)+4;do if((c[c[(c[l>>2]|0)+8>>2]>>2]|0)<=1?(c[c[(c[l>>2]|0)+4>>2]>>2]|0)==1:0){ke(c[(c[l>>2]|0)+8>>2]|0,m,j,k)|0;if((Db(c[(c[(c[l>>2]|0)+4>>2]|0)+4>>2]|0)|0)!=0?(c[(c[g>>2]|0)+164>>2]&16384|0)!=0:0){c[e>>2]=0;d=c[e>>2]|0;i=n;return d|0}if(Eb(c[c[h>>2]>>2]|0,c[m>>2]|0,c[(c[f>>2]|0)+8>>2]|0,12472,2)|0){c[e>>2]=0;d=c[e>>2]|0;i=n;return d|0}if((c[(c[l>>2]|0)+12>>2]|0)!=(c[(c[l>>2]|0)+20>>2]|0)){c[e>>2]=(c[(c[h>>2]|0)+8>>2]|0)>2&1;d=c[e>>2]|0;i=n;return d|0}if(Md(c[(c[l>>2]|0)+4>>2]|0,c[(c[l>>2]|0)+8>>2]|0)|0){c[e>>2]=1;d=c[e>>2]|0;i=n;return d|0}if((c[c[(c[l>>2]|0)+8>>2]>>2]|0)!=0?(d=Bb(c[c[h>>2]>>2]|0,c[(c[(c[l>>2]|0)+8>>2]|0)+4>>2]|0,c[12472+(c[(c[f>>2]|0)+8>>2]<<2)>>2]|0)|0,(d|0)!=(c[(c[(c[l>>2]|0)+8>>2]|0)+4>>2]|0)):0)break;c[e>>2]=1;d=c[e>>2]|0;i=n;return d|0}while(0);c[e>>2]=0;d=c[e>>2]|0;i=n;return d|0}function of(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;Cd(12496,c[d>>2]|0);Cd(13052,c[d>>2]|0);i=b;return}function pf(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;k=i;i=i+32|0;j=k+16|0;e=k+12|0;f=k+8|0;g=k+4|0;h=k;c[e>>2]=a;c[f>>2]=b;c[g>>2]=d;if(!(sf(c[e>>2]|0,c[f>>2]|0,c[g>>2]|0)|0)){c[j>>2]=0;a=c[j>>2]|0;i=k;return a|0}c[h>>2]=c[f>>2];if(((c[(c[e>>2]|0)+12>>2]|0)!=2?(c[c[(c[h>>2]|0)+8>>2]>>2]|0)!=0:0)?(c[(c[g>>2]|0)+164>>2]&16|0)!=0:0)if(c[(c[e>>2]|0)+20>>2]|0)e=(jb[c[(c[e>>2]|0)+20>>2]&15](c[e>>2]|0,c[h>>2]|0)|0)!=0;else e=0;else e=1;c[j>>2]=e&1;a=c[j>>2]|0;i=k;return a|0}function qf(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0;h=i;i=i+32|0;n=h+20|0;m=h+16|0;l=h+12|0;k=h+8|0;j=h+4|0;g=h;c[n>>2]=a;c[m>>2]=b;c[l>>2]=d;c[k>>2]=e;c[j>>2]=f;c[g>>2]=zd(c[n>>2]|0,12596)|0;c[(c[g>>2]|0)+8>>2]=c[m>>2];c[(c[g>>2]|0)+12>>2]=c[l>>2];c[(c[g>>2]|0)+16>>2]=c[k>>2];c[(c[g>>2]|0)+20>>2]=c[j>>2];i=h;return c[g>>2]|0}function rf(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;f=i;i=i+16|0;j=f+12|0;h=f+8|0;g=f+4|0;e=f;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[e>>2]=oc(c[j>>2]|0,c[h>>2]|0)|0;c[(c[e>>2]|0)+56>>2]=c[g>>2];i=f;return c[e>>2]|0}function sf(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;j=i;i=i+32|0;e=j+16|0;k=j+12|0;f=j+8|0;g=j+4|0;h=j;c[e>>2]=a;c[k>>2]=b;c[f>>2]=d;c[g>>2]=c[k>>2];if((c[c[(c[g>>2]|0)+4>>2]>>2]|0)!=1){g=0;g=g&1;i=j;return g|0}if((c[c[(c[g>>2]|0)+8>>2]>>2]|0)>1){g=0;g=g&1;i=j;return g|0}if(((c[(c[e>>2]|0)+12>>2]|0)!=1?(c[(c[g>>2]|0)+12>>2]|0)!=(c[(c[g>>2]|0)+20>>2]|0):0)?(c[(c[f>>2]|0)+164>>2]&4096|0)!=0:0){g=0;g=g&1;i=j;return g|0}a=kd(c[(c[e>>2]|0)+8>>2]|0,c[(c[(c[g>>2]|0)+4>>2]|0)+4>>2]|0)|0;c[h>>2]=a;if((a|0)<=1){g=0;g=g&1;i=j;return g|0}g=(c[(c[(c[g>>2]|0)+4>>2]|0)+4>>2]|0)>(c[h>>2]|0);g=g&1;i=j;return g|0}function tf(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0;x=i;i=i+80|0;e=x+68|0;y=x+64|0;f=x+60|0;g=x+56|0;n=x+52|0;s=x+48|0;t=x+44|0;h=x+40|0;j=x+36|0;q=x+32|0;u=x+28|0;p=x+24|0;v=x+20|0;o=x+16|0;r=x+12|0;m=x+8|0;k=x+4|0;l=x;c[y>>2]=a;c[f>>2]=b;c[g>>2]=d;c[n>>2]=c[y>>2];c[t>>2]=0;c[h>>2]=0;c[j>>2]=0;if(!((c[(c[g>>2]|0)+164>>2]&512|0)!=0?(c[(c[g>>2]|0)+160>>2]|0)>1:0))w=3;if((w|0)==3?(pf(c[n>>2]|0,c[f>>2]|0,c[g>>2]|0)|0)!=0:0){c[s>>2]=c[f>>2];c[m>>2]=(c[(c[s>>2]|0)+4>>2]|0)+4;c[q>>2]=c[c[m>>2]>>2];c[u>>2]=kd(c[(c[n>>2]|0)+8>>2]|0,c[q>>2]|0)|0;c[p>>2]=(c[q>>2]|0)/(c[u>>2]|0)|0;ke(c[(c[s>>2]|0)+8>>2]|0,v,o,r)|0;a:do switch(c[(c[n>>2]|0)+12>>2]|0){case 1:{d=_(c[p>>2]|0,c[(c[m>>2]|0)+8>>2]|0)|0;w=_(c[p>>2]|0,c[(c[m>>2]|0)+8>>2]|0)|0;c[j>>2]=db[c[(c[n>>2]|0)+16>>2]&7](c[n>>2]|0,c[u>>2]|0,d,w,c[p>>2]|0,c[(c[m>>2]|0)+8>>2]|0,c[v>>2]|0,c[r>>2]|0,c[r>>2]|0,0,c[p>>2]|0,c[(c[s>>2]|0)+20>>2]|0,c[(c[s>>2]|0)+24>>2]|0,c[g>>2]|0)|0;if((c[j>>2]|0)!=0?(b=c[g>>2]|0,d=_(c[u>>2]|0,c[(c[m>>2]|0)+4>>2]|0)|0,d=Ed(c[p>>2]|0,d,c[(c[m>>2]|0)+8>>2]|0)|0,w=_(c[p>>2]|0,c[(c[m>>2]|0)+8>>2]|0)|0,w=Fd(c[u>>2]|0,c[(c[m>>2]|0)+4>>2]|0,w,c[v>>2]|0,c[o>>2]|0,c[r>>2]|0)|0,c[h>>2]=uc(b,qh(d,w,c[(c[s>>2]|0)+12>>2]|0,c[(c[s>>2]|0)+16>>2]|0,c[(c[s>>2]|0)+20>>2]|0,c[(c[s>>2]|0)+24>>2]|0)|0)|0,(c[h>>2]|0)!=0):0){c[t>>2]=oh(80,12608,9)|0;w=20}else w=21;break}case 2:case 0:{if((c[(c[n>>2]|0)+12>>2]|0)==2){c[k>>2]=c[o>>2];c[l>>2]=_(c[p>>2]|0,c[(c[m>>2]|0)+4>>2]|0)|0;if((c[u>>2]|0)!=(c[v>>2]|0)){w=21;break a}if((c[(c[m>>2]|0)+4>>2]|0)!=(_(c[u>>2]|0,c[k>>2]|0)|0)){w=21;break a}if((c[(c[s>>2]|0)+12>>2]|0)!=(c[(c[s>>2]|0)+20>>2]|0)){w=21;break a}if((c[(c[m>>2]|0)+4>>2]|0)!=(_(c[u>>2]|0,c[(c[m>>2]|0)+8>>2]|0)|0)){w=21;break a}if((c[k>>2]|0)!=(c[(c[m>>2]|0)+8>>2]|0)){w=21;break a}if((c[l>>2]|0)!=(c[r>>2]|0)){w=21;break a}}else{c[k>>2]=_(c[p>>2]|0,c[(c[m>>2]|0)+4>>2]|0)|0;c[l>>2]=c[o>>2]}w=_(c[p>>2]|0,c[(c[m>>2]|0)+4>>2]|0)|0;c[j>>2]=db[c[(c[n>>2]|0)+16>>2]&7](c[n>>2]|0,c[u>>2]|0,w,c[k>>2]|0,c[p>>2]|0,c[(c[m>>2]|0)+4>>2]|0,c[v>>2]|0,c[o>>2]|0,c[l>>2]|0,0,c[p>>2]|0,c[(c[s>>2]|0)+12>>2]|0,c[(c[s>>2]|0)+16>>2]|0,c[g>>2]|0)|0;if((c[j>>2]|0)!=0?(b=c[g>>2]|0,d=Ed(c[p>>2]|0,c[(c[m>>2]|0)+4>>2]|0,_(c[u>>2]|0,c[(c[m>>2]|0)+8>>2]|0)|0)|0,w=Fd(c[u>>2]|0,c[k>>2]|0,c[(c[m>>2]|0)+8>>2]|0,c[v>>2]|0,c[l>>2]|0,c[r>>2]|0)|0,c[h>>2]=uc(b,qh(d,w,c[(c[s>>2]|0)+12>>2]|0,c[(c[s>>2]|0)+16>>2]|0,c[(c[s>>2]|0)+20>>2]|0,c[(c[s>>2]|0)+24>>2]|0)|0)|0,(c[h>>2]|0)!=0):0){c[t>>2]=oh(80,12608,10)|0;w=20}else w=21;break}default:w=20}while(0);if((w|0)==20){c[(c[t>>2]|0)+64>>2]=c[h>>2];c[(c[t>>2]|0)+68>>2]=c[j>>2];c[(c[t>>2]|0)+72>>2]=c[u>>2];jc((c[h>>2]|0)+8|0,(c[j>>2]|0)+8|0,(c[t>>2]|0)+8|0);c[(c[t>>2]|0)+52>>2]=c[(c[j>>2]|0)+52>>2];c[e>>2]=c[t>>2];w=c[e>>2]|0;i=x;return w|0}else if((w|0)==21){pc(c[j>>2]|0);pc(c[h>>2]|0);c[e>>2]=0;w=c[e>>2]|0;i=x;return w|0}}c[e>>2]=0;w=c[e>>2]|0;i=x;return w|0}function uf(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;g=i;i=i+32|0;p=g+28|0;n=g+24|0;m=g+20|0;j=g+16|0;h=g+12|0;k=g+8|0;o=g+4|0;l=g;c[p>>2]=a;c[n>>2]=b;c[m>>2]=d;c[j>>2]=e;c[h>>2]=f;c[k>>2]=c[p>>2];c[o>>2]=c[(c[k>>2]|0)+64>>2];Ya[c[(c[o>>2]|0)+56>>2]&63](c[(c[k>>2]|0)+64>>2]|0,c[n>>2]|0,c[m>>2]|0,c[j>>2]|0,c[h>>2]|0);c[l>>2]=c[(c[k>>2]|0)+68>>2];eb[c[(c[l>>2]|0)+56>>2]&63](c[(c[k>>2]|0)+68>>2]|0,c[j>>2]|0,c[h>>2]|0);i=g;return}function vf(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;g=i;i=i+32|0;p=g+28|0;l=g+24|0;k=g+20|0;j=g+16|0;h=g+12|0;m=g+8|0;n=g+4|0;o=g;c[p>>2]=a;c[l>>2]=b;c[k>>2]=d;c[j>>2]=e;c[h>>2]=f;c[m>>2]=c[p>>2];c[o>>2]=c[(c[m>>2]|0)+68>>2];eb[c[(c[o>>2]|0)+56>>2]&63](c[(c[m>>2]|0)+68>>2]|0,c[l>>2]|0,c[k>>2]|0);c[n>>2]=c[(c[m>>2]|0)+64>>2];Ya[c[(c[n>>2]|0)+56>>2]&63](c[(c[m>>2]|0)+64>>2]|0,c[l>>2]|0,c[k>>2]|0,c[j>>2]|0,c[h>>2]|0);i=g;return}function wf(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+68>>2]|0,c[e>>2]|0);i=d;return}function xf(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;g=d+24|0;h=d+20|0;j=d+16|0;c[g>>2]=a;c[h>>2]=b;c[j>>2]=c[g>>2];b=c[c[h>>2]>>2]|0;a=c[h>>2]|0;h=c[(c[j>>2]|0)+72>>2]|0;g=c[(c[j>>2]|0)+68>>2]|0;f=c[(c[j>>2]|0)+64>>2]|0;c[e>>2]=(c[(c[j>>2]|0)+56>>2]|0)==9?22957:22961;c[e+4>>2]=h;c[e+8>>2]=g;c[e+12>>2]=f;eb[b&63](a,20162,e);i=d;return}function yf(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function zf(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;f=i;i=i+16|0;k=f+12|0;j=f+8|0;h=f+4|0;g=f;c[k>>2]=a;c[j>>2]=b;c[h>>2]=d;c[g>>2]=e;Af(c[k>>2]|0,c[j>>2]|0,c[h>>2]|0,c[g>>2]|0,0);Af(c[k>>2]|0,c[j>>2]|0,c[h>>2]|0,c[g>>2]|0,1);i=f;return}function Af(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0;n=i;i=i+32|0;g=n+20|0;h=n+16|0;j=n+12|0;k=n+8|0;l=n+4|0;m=n;c[g>>2]=a;c[h>>2]=b;c[j>>2]=d;c[k>>2]=e;c[l>>2]=f;c[m>>2]=qf(36,c[c[j>>2]>>2]|0,c[k>>2]|0,1,0)|0;c[(c[m>>2]|0)+32>>2]=c[h>>2];c[(c[m>>2]|0)+24>>2]=c[j>>2];c[(c[m>>2]|0)+28>>2]=c[l>>2];Bd(c[g>>2]|0,c[m>>2]|0);if(!(c[3148]|0)){i=n;return}c[m>>2]=kb[c[12592>>2]&0](36,c[c[j>>2]>>2]|0,c[k>>2]|0,1,0)|0;c[(c[m>>2]|0)+32>>2]=c[h>>2];c[(c[m>>2]|0)+24>>2]=c[j>>2];c[(c[m>>2]|0)+28>>2]=c[l>>2];Bd(c[g>>2]|0,c[m>>2]|0);i=n;return}function Bf(a,b,d,e,f,g,j,k,l,m,n,o,p,q){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;o=o|0;p=p|0;q=q|0;var r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0.0;E=i;i=i+80|0;C=E+72|0;K=E+68|0;t=E+64|0;u=E+60|0;J=E+56|0;v=E+52|0;w=E+48|0;x=E+44|0;y=E+40|0;I=E+36|0;r=E+32|0;s=E+28|0;H=E+24|0;G=E+20|0;F=E+16|0;A=E+12|0;D=E+8|0;z=E+4|0;B=E;c[K>>2]=a;c[t>>2]=b;c[u>>2]=d;c[J>>2]=e;c[v>>2]=f;c[w>>2]=g;c[x>>2]=j;c[y>>2]=k;c[I>>2]=l;c[r>>2]=m;c[s>>2]=n;c[H>>2]=o;c[G>>2]=p;c[F>>2]=q;c[A>>2]=c[K>>2];c[z>>2]=c[(c[A>>2]|0)+24>>2];if(!(Cf(c[A>>2]|0,c[t>>2]|0,c[u>>2]|0,c[J>>2]|0,c[v>>2]|0,c[w>>2]|0,c[x>>2]|0,c[y>>2]|0,c[I>>2]|0,c[r>>2]|0,(c[r>>2]|0)+(c[s>>2]|0)|0,c[H>>2]|0,c[G>>2]|0,c[F>>2]|0,B)|0)){c[C>>2]=0;q=c[C>>2]|0;i=E;return q|0}if(c[(c[A>>2]|0)+28>>2]|0)c[D>>2]=rf(120,12624,20)|0;else c[D>>2]=rf(120,12624,(c[B>>2]|0)!=0?22:21)|0;c[(c[D>>2]|0)+64>>2]=c[(c[A>>2]|0)+32>>2];c[(c[D>>2]|0)+72>>2]=c[u>>2];c[(c[D>>2]|0)+108>>2]=0;c[(c[D>>2]|0)+68>>2]=c[t>>2];c[(c[D>>2]|0)+76>>2]=c[v>>2];c[(c[D>>2]|0)+80>>2]=c[w>>2];c[(c[D>>2]|0)+84>>2]=c[x>>2];c[(c[D>>2]|0)+88>>2]=c[y>>2];c[(c[D>>2]|0)+92>>2]=c[r>>2];c[(c[D>>2]|0)+96>>2]=(c[r>>2]|0)+(c[s>>2]|0);c[(c[D>>2]|0)+112>>2]=c[A>>2];q=(Gf(c[t>>2]|0)|0)<<1;c[(c[D>>2]|0)+104>>2]=q;c[(c[D>>2]|0)+100>>2]=c[B>>2];fc((c[D>>2]|0)+8|0);q=_(c[x>>2]|0,(c[s>>2]|0)/(c[(c[(c[z>>2]|0)+12>>2]|0)+4>>2]|0)|0)|0;lc(q,(c[z>>2]|0)+16|0,(c[D>>2]|0)+8|0);if(c[(c[A>>2]|0)+28>>2]|0){q=_(c[t>>2]<<3,c[s>>2]|0)|0;L=+(_(q,c[x>>2]|0)|0);q=(c[D>>2]|0)+8+24|0;h[q>>3]=+h[q>>3]+L}if(((c[t>>2]|0)>=5?(c[(c[A>>2]|0)+28>>2]|0)==0:0)&(c[t>>2]|0)<64)r=(c[v>>2]|0)>=(c[t>>2]|0);else r=0;c[(c[D>>2]|0)+52>>2]=r&1;c[C>>2]=c[D>>2];q=c[C>>2]|0;i=E;return q|0}function Cf(a,b,d,e,f,g,h,j,k,l,m,n,o,p,q){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;o=o|0;p=p|0;q=q|0;var r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0;H=i;i=i+64|0;r=H+60|0;s=H+56|0;z=H+52|0;A=H+48|0;B=H+44|0;C=H+40|0;D=H+36|0;E=H+32|0;F=H+28|0;G=H+24|0;t=H+20|0;u=H+16|0;v=H+12|0;w=H+8|0;x=H+4|0;y=H;c[s>>2]=a;c[z>>2]=b;c[A>>2]=d;c[B>>2]=e;c[C>>2]=f;c[D>>2]=g;c[E>>2]=h;c[F>>2]=j;c[G>>2]=k;c[t>>2]=l;c[u>>2]=m;c[v>>2]=n;c[w>>2]=o;c[x>>2]=p;c[y>>2]=q;if(c[(c[s>>2]|0)+28>>2]|0){c[c[y>>2]>>2]=0;if(!(Lf(c[s>>2]|0,c[z>>2]|0,c[A>>2]|0,c[B>>2]|0,c[C>>2]|0,c[D>>2]|0,c[E>>2]|0,c[F>>2]|0,c[G>>2]|0,c[t>>2]|0,c[u>>2]|0,c[v>>2]|0,c[w>>2]|0,c[x>>2]|0)|0)){c[r>>2]=0;f=c[r>>2]|0;i=H;return f|0}}else if(!(Mf(c[s>>2]|0,c[z>>2]|0,c[A>>2]|0,c[B>>2]|0,c[C>>2]|0,c[D>>2]|0,c[E>>2]|0,c[F>>2]|0,c[G>>2]|0,c[t>>2]|0,c[u>>2]|0,c[v>>2]|0,c[w>>2]|0,c[x>>2]|0,c[y>>2]|0)|0)){c[r>>2]=0;f=c[r>>2]|0;i=H;return f|0}if((c[(c[x>>2]|0)+164>>2]&65536|0)!=0?(f=_(c[C>>2]|0,c[z>>2]|0)|0,(Qb((c[(c[s>>2]|0)+28>>2]|0)!=0?512:16,c[E>>2]|0,f,c[z>>2]|0)|0)!=0):0){c[r>>2]=0;f=c[r>>2]|0;i=H;return f|0}if((_(c[C>>2]|0,c[z>>2]|0)|0)>262144?(c[(c[x>>2]|0)+164>>2]&2048|0)!=0:0){c[r>>2]=0;f=c[r>>2]|0;i=H;return f|0}c[r>>2]=1;f=c[r>>2]|0;i=H;return f|0}function Df(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0;r=i;i=i+64|0;s=r+48|0;f=r+44|0;g=r+40|0;l=r+36|0;m=r+32|0;n=r+28|0;q=r+24|0;e=r+20|0;h=r+16|0;j=r+12|0;o=r+8|0;p=r+4|0;k=r;c[s>>2]=a;c[f>>2]=b;c[g>>2]=d;c[l>>2]=c[s>>2];c[q>>2]=c[(c[l>>2]|0)+84>>2];c[e>>2]=c[(c[l>>2]|0)+68>>2];c[h>>2]=Gf(c[e>>2]|0)|0;c[o>>2]=c[(c[l>>2]|0)+92>>2];c[p>>2]=c[(c[l>>2]|0)+96>>2];c[k>>2]=(_(c[e>>2]|0,c[h>>2]|0)|0)<<1<<2;a=c[k>>2]|0;if((c[k>>2]|0)>>>0<65536){d=i;i=i+((1*a|0)+15&-16)|0;c[j>>2]=d}else c[j>>2]=wb(a)|0;c[m>>2]=0;while(1){if((c[m>>2]|0)>=(c[q>>2]|0))break;c[n>>2]=c[o>>2];while(1){b=c[l>>2]|0;d=c[f>>2]|0;a=c[g>>2]|0;e=c[n>>2]|0;if(((c[n>>2]|0)+(c[h>>2]|0)|0)>=(c[p>>2]|0))break;Hf(b,d,a,e,(c[n>>2]|0)+(c[h>>2]|0)|0,c[j>>2]|0);c[n>>2]=(c[n>>2]|0)+(c[h>>2]|0)}Hf(b,d,a,e,c[p>>2]|0,c[j>>2]|0);c[m>>2]=(c[m>>2]|0)+1;c[f>>2]=(c[f>>2]|0)+(c[(c[l>>2]|0)+88>>2]<<2);c[g>>2]=(c[g>>2]|0)+(c[(c[l>>2]|0)+88>>2]<<2)}if((c[k>>2]|0)>>>0<65536){i=r;return}xb(c[j>>2]|0);i=r;return}function Ef(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;o=i;i=i+48|0;q=o+40|0;e=o+36|0;f=o+32|0;g=o+28|0;h=o+24|0;m=o+20|0;n=o+16|0;j=o+12|0;p=o+8|0;k=o+4|0;l=o;c[q>>2]=a;c[e>>2]=b;c[f>>2]=d;c[g>>2]=c[q>>2];c[m>>2]=c[(c[g>>2]|0)+84>>2];c[n>>2]=c[(c[g>>2]|0)+88>>2];c[j>>2]=c[(c[g>>2]|0)+92>>2];c[p>>2]=c[(c[g>>2]|0)+96>>2];c[k>>2]=(c[p>>2]|0)-1;c[l>>2]=c[(c[g>>2]|0)+80>>2];c[h>>2]=0;while(1){if((c[h>>2]|0)>=(c[m>>2]|0))break;b=(c[e>>2]|0)+((_(c[j>>2]|0,c[l>>2]|0)|0)<<2)|0;d=(c[f>>2]|0)+((_(c[j>>2]|0,c[l>>2]|0)|0)<<2)|0;Xa[c[(c[g>>2]|0)+64>>2]&127](b,d,c[c[(c[g>>2]|0)+108>>2]>>2]|0,c[(c[g>>2]|0)+72>>2]|0,c[j>>2]|0,c[k>>2]|0,c[l>>2]|0);d=(c[e>>2]|0)+((_(c[k>>2]|0,c[l>>2]|0)|0)<<2)|0;b=(c[f>>2]|0)+((_(c[k>>2]|0,c[l>>2]|0)|0)<<2)|0;Xa[c[(c[g>>2]|0)+64>>2]&127](d,b,c[c[(c[g>>2]|0)+108>>2]>>2]|0,c[(c[g>>2]|0)+72>>2]|0,c[k>>2]|0,(c[k>>2]|0)+2|0,0);c[h>>2]=(c[h>>2]|0)+1;c[e>>2]=(c[e>>2]|0)+(c[n>>2]<<2);c[f>>2]=(c[f>>2]|0)+(c[n>>2]<<2)}i=o;return}function Ff(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0;l=i;i=i+32|0;m=l+24|0;e=l+20|0;f=l+16|0;g=l+12|0;h=l+8|0;j=l+4|0;k=l;c[m>>2]=a;c[e>>2]=b;c[f>>2]=d;c[g>>2]=c[m>>2];c[h>>2]=0;while(1){if((c[h>>2]|0)>=(c[(c[g>>2]|0)+84>>2]|0))break;c[j>>2]=c[(c[g>>2]|0)+92>>2];c[k>>2]=c[(c[g>>2]|0)+80>>2];d=(c[e>>2]|0)+((_(c[j>>2]|0,c[k>>2]|0)|0)<<2)|0;b=(c[f>>2]|0)+((_(c[j>>2]|0,c[k>>2]|0)|0)<<2)|0;Xa[c[(c[g>>2]|0)+64>>2]&127](d,b,c[c[(c[g>>2]|0)+108>>2]>>2]|0,c[(c[g>>2]|0)+72>>2]|0,c[j>>2]|0,c[(c[g>>2]|0)+96>>2]|0,c[k>>2]|0);c[h>>2]=(c[h>>2]|0)+1;c[e>>2]=(c[e>>2]|0)+(c[(c[g>>2]|0)+88>>2]<<2);c[f>>2]=(c[f>>2]|0)+(c[(c[g>>2]|0)+88>>2]<<2)}i=l;return}function Gf(a){a=a|0;var b=0,d=0;d=i;i=i+16|0;b=d;c[b>>2]=a;c[b>>2]=(c[b>>2]|0)+3;c[b>>2]=c[b>>2]&-4;i=d;return (c[b>>2]|0)+2|0}function Hf(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0;h=i;i=i+48|0;o=h+32|0;r=h+28|0;q=h+24|0;k=h+20|0;l=h+16|0;p=h+12|0;n=h+8|0;m=h+4|0;j=h;c[o>>2]=a;c[r>>2]=b;c[q>>2]=d;c[k>>2]=e;c[l>>2]=f;c[p>>2]=g;c[n>>2]=c[(c[o>>2]|0)+104>>2];c[m>>2]=c[(c[o>>2]|0)+72>>2];c[j>>2]=c[(c[o>>2]|0)+80>>2];e=(c[r>>2]|0)+((_(c[k>>2]|0,c[j>>2]|0)|0)<<2)|0;d=(c[q>>2]|0)+((_(c[k>>2]|0,c[j>>2]|0)|0)<<2)|0;Hb(e,d,c[p>>2]|0,(c[p>>2]|0)+4|0,c[(c[o>>2]|0)+68>>2]|0,c[m>>2]|0,c[n>>2]|0,(c[l>>2]|0)-(c[k>>2]|0)|0,c[j>>2]|0,2);Xa[c[(c[o>>2]|0)+64>>2]&127](c[p>>2]|0,(c[p>>2]|0)+4|0,c[c[(c[o>>2]|0)+108>>2]>>2]|0,c[(c[o>>2]|0)+104>>2]|0,c[k>>2]|0,c[l>>2]|0,2);d=(c[r>>2]|0)+((_(c[k>>2]|0,c[j>>2]|0)|0)<<2)|0;e=(c[q>>2]|0)+((_(c[k>>2]|0,c[j>>2]|0)|0)<<2)|0;Ib(c[p>>2]|0,(c[p>>2]|0)+4|0,d,e,c[(c[o>>2]|0)+68>>2]|0,c[n>>2]|0,c[m>>2]|0,(c[l>>2]|0)-(c[k>>2]|0)|0,2,c[j>>2]|0);i=h;return}function If(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;f=d+4|0;e=d;c[g>>2]=a;c[f>>2]=b;c[e>>2]=c[g>>2];a=_(c[(c[e>>2]|0)+68>>2]|0,c[(c[e>>2]|0)+76>>2]|0)|0;Me(c[f>>2]|0,(c[e>>2]|0)+108|0,c[(c[(c[(c[e>>2]|0)+112>>2]|0)+24>>2]|0)+8>>2]|0,a,c[(c[e>>2]|0)+68>>2]|0,(c[(c[e>>2]|0)+76>>2]|0)+(c[(c[e>>2]|0)+100>>2]|0)|0);i=d;return}function Jf(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0;j=i;i=i+64|0;h=j+24|0;g=j;l=j+56|0;d=j+52|0;f=j+48|0;k=j+44|0;e=j+40|0;c[l>>2]=a;c[d>>2]=b;c[f>>2]=c[l>>2];c[k>>2]=c[(c[f>>2]|0)+112>>2];c[e>>2]=c[(c[k>>2]|0)+24>>2];b=c[c[d>>2]>>2]|0;a=c[d>>2]|0;d=c[(c[f>>2]|0)+68>>2]|0;if(c[(c[k>>2]|0)+28>>2]|0){l=Gf(d)|0;k=c[(c[f>>2]|0)+68>>2]|0;h=Le(c[(c[f>>2]|0)+68>>2]|0,c[(c[e>>2]|0)+8>>2]|0)|0;d=c[(c[f>>2]|0)+84>>2]|0;e=c[(c[e>>2]|0)+4>>2]|0;c[g>>2]=l;c[g+4>>2]=k;c[g+8>>2]=h;c[g+12>>2]=d;c[g+16>>2]=e;eb[b&63](a,20189,g);i=j;return}else{g=Le(c[(c[f>>2]|0)+68>>2]|0,c[(c[e>>2]|0)+8>>2]|0)|0;f=c[(c[f>>2]|0)+84>>2]|0;e=c[(c[e>>2]|0)+4>>2]|0;c[h>>2]=d;c[h+4>>2]=g;c[h+8>>2]=f;c[h+12>>2]=e;eb[b&63](a,20222,h);i=j;return}}function Kf(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b+4|0;c[d>>2]=a;c[b>>2]=c[d>>2];i=b;return}function Lf(a,b,d,e,f,g,h,j,k,l,m,n,o,p){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;o=o|0;p=p|0;var q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0;B=i;i=i+64|0;C=B+60|0;q=B+56|0;t=B+52|0;u=B+48|0;v=B+44|0;w=B+32|0;x=B+28|0;y=B+24|0;r=B+20|0;s=B+8|0;A=B+4|0;z=B;c[C>>2]=a;c[q>>2]=b;c[t>>2]=d;c[u>>2]=e;c[v>>2]=f;c[B+40>>2]=g;c[B+36>>2]=h;c[w>>2]=j;c[x>>2]=k;c[y>>2]=l;c[r>>2]=m;c[B+16>>2]=n;c[B+12>>2]=o;c[s>>2]=p;c[A>>2]=c[(c[C>>2]|0)+24>>2];if((c[q>>2]|0)!=(c[c[A>>2]>>2]|0)){p=0;p=p&1;i=B;return p|0}if((c[t>>2]|0)!=(c[u>>2]|0)){p=0;p=p&1;i=B;return p|0}if((c[w>>2]|0)!=(c[x>>2]|0)){p=0;p=p&1;i=B;return p|0}c[z>>2]=Gf(c[q>>2]|0)|0;if(!(_a[c[c[(c[A>>2]|0)+12>>2]>>2]&1](c[A>>2]|0,0,4|0,c[z>>2]<<1,0,c[v>>2]|0,c[y>>2]|0,(c[y>>2]|0)+(c[z>>2]|0)|0,2,c[s>>2]|0)|0)){p=0;p=p&1;i=B;return p|0}p=(_a[c[c[(c[A>>2]|0)+12>>2]>>2]&1](c[A>>2]|0,0,4|0,c[z>>2]<<1,0,c[v>>2]|0,c[y>>2]|0,c[r>>2]|0,2,c[s>>2]|0)|0)!=0;p=p&1;i=B;return p|0}function Mf(a,b,d,e,f,g,h,j,k,l,m,n,o,p,q){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;o=o|0;p=p|0;q=q|0;var r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0;E=i;i=i+64|0;G=E+60|0;F=E+56|0;w=E+52|0;x=E+48|0;y=E+44|0;z=E+40|0;A=E+32|0;B=E+28|0;C=E+24|0;r=E+20|0;s=E+16|0;t=E+12|0;u=E+8|0;v=E+4|0;D=E;c[G>>2]=a;c[F>>2]=b;c[w>>2]=d;c[x>>2]=e;c[y>>2]=f;c[z>>2]=g;c[E+36>>2]=h;c[A>>2]=j;c[B>>2]=k;c[C>>2]=l;c[r>>2]=m;c[s>>2]=n;c[t>>2]=o;c[u>>2]=p;c[v>>2]=q;c[D>>2]=c[(c[G>>2]|0)+24>>2];if((c[F>>2]|0)!=(c[c[D>>2]>>2]|0)){g=0;g=g&1;i=E;return g|0}if((c[w>>2]|0)!=(c[x>>2]|0)){g=0;g=g&1;i=E;return g|0}if((c[A>>2]|0)!=(c[B>>2]|0)){g=0;g=g&1;i=E;return g|0}c[c[v>>2]>>2]=0;if(!(_a[c[c[(c[D>>2]|0)+12>>2]>>2]&1](c[D>>2]|0,c[s>>2]|0,c[t>>2]|0,c[w>>2]|0,c[A>>2]|0,c[y>>2]|0,c[C>>2]|0,c[r>>2]|0,c[z>>2]|0,c[u>>2]|0)|0)){c[c[v>>2]>>2]=1;if(c[C>>2]|0){g=0;g=g&1;i=E;return g|0}if((c[r>>2]|0)!=(c[y>>2]|0)){g=0;g=g&1;i=E;return g|0}if(!(_a[c[c[(c[D>>2]|0)+12>>2]>>2]&1](c[D>>2]|0,c[s>>2]|0,c[t>>2]|0,c[w>>2]|0,c[A>>2]|0,c[y>>2]|0,c[C>>2]|0,(c[r>>2]|0)-1|0,c[z>>2]|0,c[u>>2]|0)|0)){g=0;g=g&1;i=E;return g|0}if(!(_a[c[c[(c[D>>2]|0)+12>>2]>>2]&1](c[D>>2]|0,c[s>>2]|0,c[t>>2]|0,c[w>>2]|0,c[A>>2]|0,c[y>>2]|0,(c[r>>2]|0)-1|0,(c[r>>2]|0)+1|0,c[z>>2]|0,c[u>>2]|0)|0)){g=0;g=g&1;i=E;return g|0}}g=(_a[c[c[(c[D>>2]|0)+12>>2]>>2]&1](c[D>>2]|0,(c[s>>2]|0)+(c[A>>2]<<2)|0,(c[t>>2]|0)+(c[A>>2]<<2)|0,c[w>>2]|0,c[A>>2]|0,c[y>>2]|0,c[C>>2]|0,(c[r>>2]|0)-(c[c[v>>2]>>2]|0)|0,c[z>>2]|0,c[u>>2]|0)|0)!=0;g=g&1;i=E;return g|0}function Nf(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;f=i;i=i+16|0;k=f+12|0;j=f+8|0;h=f+4|0;g=f;c[k>>2]=a;c[j>>2]=b;c[h>>2]=d;c[g>>2]=e;Of(c[k>>2]|0,c[j>>2]|0,c[h>>2]|0,(c[g>>2]|0)+2|0);i=f;return}function Of(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0;l=i;i=i+32|0;f=l+16|0;g=l+12|0;h=l+8|0;j=l+4|0;k=l;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[k>>2]=qf(32,c[c[h>>2]>>2]|0,c[j>>2]|0,2,0)|0;c[(c[k>>2]|0)+28>>2]=c[g>>2];c[(c[k>>2]|0)+24>>2]=c[h>>2];Bd(c[f>>2]|0,c[k>>2]|0);if(!(c[3148]|0)){i=l;return}c[k>>2]=kb[c[12592>>2]&0](32,c[c[h>>2]>>2]|0,c[j>>2]|0,2,0)|0;c[(c[k>>2]|0)+28>>2]=c[g>>2];c[(c[k>>2]|0)+24>>2]=c[h>>2];Bd(c[f>>2]|0,c[k>>2]|0);i=l;return}function Pf(a,b,d,e,f,g,h,j,k,l,m,n,o,p){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;o=o|0;p=p|0;var q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0;C=i;i=i+80|0;q=C+68|0;I=C+64|0;t=C+60|0;u=C+56|0;H=C+52|0;v=C+48|0;w=C+44|0;x=C+40|0;y=C+36|0;G=C+32|0;r=C+28|0;s=C+24|0;F=C+20|0;E=C+16|0;D=C+12|0;A=C+8|0;B=C+4|0;z=C;c[I>>2]=a;c[t>>2]=b;c[u>>2]=d;c[H>>2]=e;c[v>>2]=f;c[w>>2]=g;c[x>>2]=h;c[y>>2]=j;c[G>>2]=k;c[r>>2]=l;c[s>>2]=m;c[F>>2]=n;c[E>>2]=o;c[D>>2]=p;c[A>>2]=c[I>>2];c[z>>2]=c[(c[A>>2]|0)+24>>2];if(Qf(c[A>>2]|0,c[t>>2]|0,c[u>>2]|0,c[H>>2]|0,c[v>>2]|0,c[w>>2]|0,c[x>>2]|0,c[y>>2]|0,c[G>>2]|0,c[r>>2]|0,(c[r>>2]|0)+(c[s>>2]|0)|0,c[F>>2]|0,c[E>>2]|0,c[D>>2]|0)|0){c[B>>2]=rf(112,12640,23)|0;c[(c[B>>2]|0)+64>>2]=c[(c[A>>2]|0)+28>>2];c[(c[B>>2]|0)+72>>2]=c[u>>2];c[(c[B>>2]|0)+76>>2]=c[y>>2];c[(c[B>>2]|0)+100>>2]=0;c[(c[B>>2]|0)+68>>2]=c[t>>2];c[(c[B>>2]|0)+80>>2]=c[v>>2];c[(c[B>>2]|0)+84>>2]=c[w>>2];c[(c[B>>2]|0)+88>>2]=c[x>>2];c[(c[B>>2]|0)+92>>2]=c[r>>2];c[(c[B>>2]|0)+96>>2]=(c[r>>2]|0)+(c[s>>2]|0);c[(c[B>>2]|0)+104>>2]=c[A>>2];fc((c[B>>2]|0)+8|0);lc((c[s>>2]|0)/(c[(c[(c[z>>2]|0)+12>>2]|0)+4>>2]|0)|0,(c[z>>2]|0)+16|0,(c[B>>2]|0)+8|0);c[q>>2]=c[B>>2];B=c[q>>2]|0;i=C;return B|0}else{c[q>>2]=0;B=c[q>>2]|0;i=C;return B|0}return 0}function Qf(a,b,d,e,f,g,h,j,k,l,m,n,o,p){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;o=o|0;p=p|0;var q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0;E=i;i=i+64|0;F=E+56|0;q=E+52|0;v=E+48|0;w=E+44|0;x=E+40|0;y=E+36|0;z=E+32|0;A=E+28|0;B=E+24|0;C=E+20|0;r=E+16|0;s=E+12|0;t=E+8|0;u=E+4|0;D=E;c[F>>2]=a;c[q>>2]=b;c[v>>2]=d;c[w>>2]=e;c[x>>2]=f;c[y>>2]=g;c[z>>2]=h;c[A>>2]=j;c[B>>2]=k;c[C>>2]=l;c[r>>2]=m;c[s>>2]=n;c[t>>2]=o;c[u>>2]=p;c[D>>2]=c[(c[F>>2]|0)+24>>2];if((c[q>>2]|0)!=(c[c[D>>2]>>2]|0)){g=0;g=g&1;i=E;return g|0}if((c[q>>2]|0)!=(c[z>>2]|0)){g=0;g=g&1;i=E;return g|0}if((c[v>>2]|0)!=(c[B>>2]|0)){g=0;g=g&1;i=E;return g|0}if((c[A>>2]|0)!=(c[w>>2]|0)){g=0;g=g&1;i=E;return g|0}g=(_a[c[c[(c[D>>2]|0)+12>>2]>>2]&1](c[D>>2]|0,c[s>>2]|0,c[t>>2]|0,c[v>>2]|0,c[A>>2]|0,c[x>>2]|0,c[C>>2]|0,c[r>>2]|0,c[y>>2]|0,c[u>>2]|0)|0)!=0;g=g&1;i=E;return g|0}function Rf(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0;e=i;i=i+32|0;l=e+20|0;k=e+16|0;j=e+12|0;g=e+8|0;h=e+4|0;f=e;c[l>>2]=a;c[k>>2]=b;c[j>>2]=d;c[g>>2]=c[l>>2];c[h>>2]=c[(c[g>>2]|0)+92>>2];c[f>>2]=c[(c[g>>2]|0)+84>>2];d=(c[k>>2]|0)+((_(c[h>>2]|0,c[f>>2]|0)|0)<<2)|0;a=(c[j>>2]|0)+((_(c[h>>2]|0,c[f>>2]|0)|0)<<2)|0;fb[c[(c[g>>2]|0)+64>>2]&7](d,a,c[c[(c[g>>2]|0)+100>>2]>>2]|0,c[(c[g>>2]|0)+72>>2]|0,c[(c[g>>2]|0)+76>>2]|0,c[h>>2]|0,c[(c[g>>2]|0)+96>>2]|0,c[f>>2]|0);i=e;return}function Sf(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;f=d+4|0;e=d;c[g>>2]=a;c[f>>2]=b;c[e>>2]=c[g>>2];a=_(c[(c[e>>2]|0)+68>>2]|0,c[(c[e>>2]|0)+80>>2]|0)|0;Me(c[f>>2]|0,(c[e>>2]|0)+100|0,c[(c[(c[(c[e>>2]|0)+104>>2]|0)+24>>2]|0)+8>>2]|0,a,c[(c[e>>2]|0)+68>>2]|0,c[(c[e>>2]|0)+80>>2]|0);i=d;return}function Tf(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0;d=i;i=i+48|0;e=d;k=d+32|0;j=d+28|0;g=d+24|0;h=d+20|0;f=d+16|0;c[k>>2]=a;c[j>>2]=b;c[g>>2]=c[k>>2];c[h>>2]=c[(c[g>>2]|0)+104>>2];c[f>>2]=c[(c[h>>2]|0)+24>>2];b=c[c[j>>2]>>2]|0;a=c[j>>2]|0;j=c[(c[g>>2]|0)+68>>2]|0;h=Le(c[(c[g>>2]|0)+68>>2]|0,c[(c[f>>2]|0)+8>>2]|0)|0;g=c[(c[g>>2]|0)+88>>2]|0;f=c[(c[f>>2]|0)+4>>2]|0;c[e>>2]=j;c[e+4>>2]=h;c[e+8>>2]=g;c[e+12>>2]=f;eb[b&63](a,20249,e);i=d;return}function Uf(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b+4|0;c[d>>2]=a;c[b>>2]=c[d>>2];i=b;return}function Vf(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;Wf(c[d>>2]|0,0,1);Wf(c[d>>2]|0,0,0);i=b;return}function Wf(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;j=i;i=i+16|0;e=j+12|0;f=j+8|0;g=j+4|0;h=j;c[e>>2]=a;c[f>>2]=b;c[g>>2]=d;c[h>>2]=qf(24,c[f>>2]|0,c[g>>2]|0,3,0)|0;Bd(c[e>>2]|0,c[h>>2]|0);if(!(c[3148]|0)){i=j;return}c[h>>2]=kb[c[12592>>2]&0](24,c[f>>2]|0,c[g>>2]|0,3,0)|0;Bd(c[e>>2]|0,c[h>>2]|0);i=j;return}function Xf(a,b,d,e,f,g,j,k,l,m,n,o,p,q){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;o=o|0;p=p|0;q=q|0;var r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0;I=i;i=i+96|0;r=I+80|0;L=I+76|0;x=I+72|0;y=I+68|0;K=I+64|0;z=I+60|0;A=I+56|0;B=I+52|0;C=I+48|0;J=I+44|0;s=I+40|0;t=I+36|0;u=I+32|0;v=I+28|0;w=I+24|0;F=I+20|0;H=I+16|0;D=I+12|0;E=I+8|0;G=I;c[L>>2]=a;c[x>>2]=b;c[y>>2]=d;c[K>>2]=e;c[z>>2]=f;c[A>>2]=g;c[B>>2]=j;c[C>>2]=k;c[J>>2]=l;c[s>>2]=m;c[t>>2]=n;c[u>>2]=o;c[v>>2]=p;c[w>>2]=q;c[F>>2]=c[L>>2];c[D>>2]=0;c[E>>2]=_(c[A>>2]|0,c[s>>2]|0)|0;if(!(Yf(c[y>>2]|0,c[K>>2]|0,c[C>>2]|0,c[J>>2]|0,c[w>>2]|0)|0)){c[r>>2]=0;g=c[r>>2]|0;i=I;return g|0}f=c[w>>2]|0;n=Ed(c[x>>2]|0,c[y>>2]|0,c[y>>2]|0)|0;g=Fd(c[t>>2]|0,c[A>>2]|0,c[A>>2]|0,c[B>>2]|0,c[C>>2]|0,c[C>>2]|0)|0;c[D>>2]=uc(f,qh(n,g,(c[u>>2]|0)+(c[E>>2]<<2)|0,(c[v>>2]|0)+(c[E>>2]<<2)|0,(c[u>>2]|0)+(c[E>>2]<<2)|0,(c[v>>2]|0)+(c[E>>2]<<2)|0)|0)|0;if(c[D>>2]|0){c[H>>2]=rf(112,12656,(c[(c[F>>2]|0)+12>>2]|0)==1?25:24)|0;c[(c[H>>2]|0)+104>>2]=c[F>>2];c[(c[H>>2]|0)+96>>2]=c[D>>2];c[(c[H>>2]|0)+64>>2]=c[x>>2];c[(c[H>>2]|0)+68>>2]=c[y>>2];c[(c[H>>2]|0)+72>>2]=c[z>>2];c[(c[H>>2]|0)+84>>2]=c[A>>2];c[(c[H>>2]|0)+88>>2]=c[B>>2];c[(c[H>>2]|0)+92>>2]=c[C>>2];c[(c[H>>2]|0)+76>>2]=c[s>>2];c[(c[H>>2]|0)+80>>2]=(c[s>>2]|0)+(c[t>>2]|0);c[(c[H>>2]|0)+108>>2]=c[(c[F>>2]|0)+12>>2];c[(c[H>>2]|0)+100>>2]=0;g=_((c[x>>2]|0)-1|0,(c[t>>2]|0)-1|0)|0;h[G>>3]=+(_(g,c[B>>2]|0)|0);g=(c[H>>2]|0)+8|0;n=(c[D>>2]|0)+8|0;c[g>>2]=c[n>>2];c[g+4>>2]=c[n+4>>2];c[g+8>>2]=c[n+8>>2];c[g+12>>2]=c[n+12>>2];c[g+16>>2]=c[n+16>>2];c[g+20>>2]=c[n+20>>2];c[g+24>>2]=c[n+24>>2];c[g+28>>2]=c[n+28>>2];g=(c[H>>2]|0)+8+8|0;h[g>>3]=+h[g>>3]+ +h[G>>3]*8.0;g=(c[H>>2]|0)+8|0;h[g>>3]=+h[g>>3]+ +h[G>>3]*4.0;g=(c[H>>2]|0)+8+24|0;h[g>>3]=+h[g>>3]+ +h[G>>3]*8.0;c[r>>2]=c[H>>2];g=c[r>>2]|0;i=I;return g|0}else{pc(c[D>>2]|0);c[r>>2]=0;g=c[r>>2]|0;i=I;return g|0}return 0}function Yf(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0;k=i;i=i+32|0;m=k+16|0;l=k+12|0;g=k+8|0;h=k+4|0;j=k;c[m>>2]=a;c[l>>2]=b;c[g>>2]=d;c[h>>2]=e;c[j>>2]=f;if((c[m>>2]|0)!=(c[l>>2]|0)){l=0;l=l&1;i=k;return l|0}if((c[g>>2]|0)!=(c[h>>2]|0)){l=0;l=l&1;i=k;return l|0}l=(c[(c[j>>2]|0)+164>>2]&8|0)!=0^1;l=l&1;i=k;return l|0}function Zf(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0;e=i;i=i+32|0;l=e+20|0;h=e+16|0;g=e+12|0;j=e+8|0;k=e+4|0;f=e;c[l>>2]=a;c[h>>2]=b;c[g>>2]=d;c[j>>2]=c[l>>2];c[f>>2]=_(c[(c[j>>2]|0)+84>>2]|0,c[(c[j>>2]|0)+76>>2]|0)|0;dg(c[j>>2]|0,c[h>>2]|0,c[g>>2]|0);c[k>>2]=c[(c[j>>2]|0)+96>>2];Ya[c[(c[k>>2]|0)+56>>2]&63](c[(c[j>>2]|0)+96>>2]|0,(c[h>>2]|0)+(c[f>>2]<<2)|0,(c[g>>2]|0)+(c[f>>2]<<2)|0,(c[h>>2]|0)+(c[f>>2]<<2)|0,(c[g>>2]|0)+(c[f>>2]<<2)|0);i=e;return}function _f(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0;e=i;i=i+32|0;l=e+20|0;g=e+16|0;f=e+12|0;h=e+8|0;k=e+4|0;j=e;c[l>>2]=a;c[g>>2]=b;c[f>>2]=d;c[h>>2]=c[l>>2];c[j>>2]=_(c[(c[h>>2]|0)+84>>2]|0,c[(c[h>>2]|0)+76>>2]|0)|0;c[k>>2]=c[(c[h>>2]|0)+96>>2];Ya[c[(c[k>>2]|0)+56>>2]&63](c[(c[h>>2]|0)+96>>2]|0,(c[g>>2]|0)+(c[j>>2]<<2)|0,(c[f>>2]|0)+(c[j>>2]<<2)|0,(c[g>>2]|0)+(c[j>>2]<<2)|0,(c[f>>2]|0)+(c[j>>2]<<2)|0);dg(c[h>>2]|0,c[g>>2]|0,c[f>>2]|0);i=e;return}function $f(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+96>>2]|0,c[e>>2]|0);cg(c[f>>2]|0,c[e>>2]|0);i=d;return}function ag(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0;d=i;i=i+32|0;e=d;h=d+28|0;j=d+24|0;k=d+20|0;c[h>>2]=a;c[j>>2]=b;c[k>>2]=c[h>>2];b=c[c[j>>2]>>2]|0;a=c[j>>2]|0;j=c[(c[k>>2]|0)+64>>2]|0;h=c[(c[k>>2]|0)+72>>2]|0;g=c[(c[k>>2]|0)+88>>2]|0;f=c[(c[k>>2]|0)+96>>2]|0;c[e>>2]=(c[(c[k>>2]|0)+108>>2]|0)==1?22957:22961;c[e+4>>2]=j;c[e+8>>2]=h;c[e+12>>2]=g;c[e+16>>2]=f;eb[b&63](a,20278,e);i=d;return}function bg(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+96>>2]|0);i=b;return}function cg(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;e=d+4|0;f=d;c[e>>2]=a;c[f>>2]=b;a=_(c[(c[e>>2]|0)+64>>2]|0,c[(c[e>>2]|0)+72>>2]|0)|0;Me(c[f>>2]|0,(c[e>>2]|0)+100|0,17820,a,c[(c[e>>2]|0)+72>>2]|0,c[(c[e>>2]|0)+64>>2]|0);i=d;return}function dg(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0;A=i;i=i+96|0;B=A+80|0;e=A+76|0;f=A+72|0;l=A+68|0;k=A+64|0;j=A+60|0;s=A+56|0;t=A+52|0;m=A+48|0;n=A+44|0;o=A+40|0;p=A+36|0;u=A+32|0;v=A+28|0;h=A+24|0;r=A+20|0;q=A+16|0;z=A+12|0;y=A+8|0;x=A+4|0;w=A;c[B>>2]=a;c[e>>2]=b;c[f>>2]=d;c[s>>2]=c[(c[B>>2]|0)+64>>2];c[t>>2]=c[(c[B>>2]|0)+68>>2];c[m>>2]=c[(c[B>>2]|0)+72>>2];c[n>>2]=c[(c[B>>2]|0)+76>>2];c[o>>2]=c[(c[B>>2]|0)+80>>2];c[p>>2]=c[(c[B>>2]|0)+84>>2];c[u>>2]=c[(c[B>>2]|0)+88>>2];c[v>>2]=c[(c[B>>2]|0)+92>>2];c[h>>2]=c[c[(c[B>>2]|0)+100>>2]>>2];c[n>>2]=(c[n>>2]|0)+((c[n>>2]|0)==0&1);c[l>>2]=0;while(1){if((c[l>>2]|0)>=(c[u>>2]|0))break;c[k>>2]=1;while(1){if((c[k>>2]|0)>=(c[s>>2]|0))break;c[j>>2]=c[n>>2];while(1){if((c[j>>2]|0)>=(c[o>>2]|0))break;b=(c[e>>2]|0)+((_(c[p>>2]|0,c[j>>2]|0)|0)<<2)|0;c[r>>2]=b+((_(c[t>>2]|0,c[k>>2]|0)|0)<<2);b=(c[f>>2]|0)+((_(c[p>>2]|0,c[j>>2]|0)|0)<<2)|0;c[q>>2]=b+((_(c[t>>2]|0,c[k>>2]|0)|0)<<2);g[z>>2]=+g[c[r>>2]>>2];g[y>>2]=+g[c[q>>2]>>2];b=(c[j>>2]<<1)+(_((c[m>>2]|0)-1<<1,c[k>>2]|0)|0)-2|0;g[x>>2]=+g[(c[h>>2]|0)+(b<<2)>>2];b=(c[j>>2]<<1)+(_((c[m>>2]|0)-1<<1,c[k>>2]|0)|0)-1|0;g[w>>2]=+g[(c[h>>2]|0)+(b<<2)>>2];g[c[r>>2]>>2]=+g[z>>2]*+g[x>>2]+ +g[y>>2]*+g[w>>2];g[c[q>>2]>>2]=+g[y>>2]*+g[x>>2]-+g[z>>2]*+g[w>>2];c[j>>2]=(c[j>>2]|0)+1}c[k>>2]=(c[k>>2]|0)+1}c[e>>2]=(c[e>>2]|0)+(c[v>>2]<<2);c[f>>2]=(c[f>>2]|0)+(c[v>>2]<<2);c[l>>2]=(c[l>>2]|0)+1}i=A;return}function eg(a){a=a|0;var b=0,d=0,e=0,f=0;f=i;i=i+16|0;b=f+8|0;d=f+4|0;e=f;c[b>>2]=a;c[d>>2]=0;while(1){if((c[d>>2]|0)>>>0>=7)break;c[e>>2]=0;while(1){if((c[e>>2]|0)>>>0>=5)break;fg(c[b>>2]|0,c[12672+(c[d>>2]<<2)>>2]|0,c[12700+(c[e>>2]<<2)>>2]|0);c[e>>2]=(c[e>>2]|0)+1}c[d>>2]=(c[d>>2]|0)+1}i=f;return}function fg(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;j=i;i=i+16|0;e=j+12|0;f=j+8|0;g=j+4|0;h=j;c[e>>2]=a;c[f>>2]=b;c[g>>2]=d;c[h>>2]=qf(28,c[f>>2]|0,1,4,0)|0;c[(c[h>>2]|0)+24>>2]=c[g>>2];Bd(c[e>>2]|0,c[h>>2]|0);if(!(c[3148]|0)){i=j;return}c[h>>2]=kb[c[12592>>2]&0](28,c[f>>2]|0,1,4,0)|0;c[(c[h>>2]|0)+24>>2]=c[g>>2];Bd(c[e>>2]|0,c[h>>2]|0);i=j;return}function gg(a,b,d,e,f,g,j,k,l,m,n,o,p,q){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;o=o|0;p=p|0;q=q|0;var r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0;E=i;i=i+96|0;r=E+80|0;H=E+76|0;v=E+72|0;w=E+68|0;G=E+64|0;x=E+60|0;y=E+56|0;F=E+52|0;s=E+40|0;t=E+36|0;u=E+24|0;B=E+20|0;D=E+16|0;A=E+12|0;z=E+8|0;C=E;c[H>>2]=a;c[v>>2]=b;c[w>>2]=d;c[G>>2]=e;c[x>>2]=f;c[y>>2]=g;c[F>>2]=j;c[E+48>>2]=k;c[E+44>>2]=l;c[s>>2]=m;c[t>>2]=n;c[E+32>>2]=o;c[E+28>>2]=p;c[u>>2]=q;c[B>>2]=c[H>>2];c[A>>2]=0;if(!(hg(c[B>>2]|0,c[v>>2]|0,c[w>>2]|0,c[G>>2]|0,c[x>>2]|0,c[F>>2]|0,c[t>>2]|0,c[u>>2]|0)|0)){c[r>>2]=0;l=c[r>>2]|0;i=E;return l|0}c[z>>2]=wb(_((c[v>>2]|0)+16<<3,c[(c[B>>2]|0)+24>>2]|0)|0)|0;m=c[u>>2]|0;e=Ed(c[v>>2]|0,2,2)|0;l=Ed(c[(c[B>>2]|0)+24>>2]|0,(c[v>>2]|0)+16<<1,(c[v>>2]|0)+16<<1)|0;c[A>>2]=uc(m,qh(e,l,c[z>>2]|0,(c[z>>2]|0)+4|0,c[z>>2]|0,(c[z>>2]|0)+4|0)|0)|0;xb(c[z>>2]|0);if(c[A>>2]|0){c[D>>2]=rf(112,12720,26)|0;c[(c[D>>2]|0)+108>>2]=c[B>>2];c[(c[D>>2]|0)+100>>2]=c[A>>2];c[(c[D>>2]|0)+64>>2]=c[v>>2];c[(c[D>>2]|0)+72>>2]=c[x>>2];c[(c[D>>2]|0)+76>>2]=c[y>>2];c[(c[D>>2]|0)+68>>2]=c[w>>2];c[(c[D>>2]|0)+96>>2]=c[(c[B>>2]|0)+24>>2];c[(c[D>>2]|0)+88>>2]=c[s>>2];c[(c[D>>2]|0)+92>>2]=(c[s>>2]|0)+(c[t>>2]|0);h[C>>3]=+(_((c[v>>2]|0)-1|0,(c[t>>2]|0)-1|0)|0);l=(c[D>>2]|0)+8|0;e=(c[A>>2]|0)+8|0;c[l>>2]=c[e>>2];c[l+4>>2]=c[e+4>>2];c[l+8>>2]=c[e+8>>2];c[l+12>>2]=c[e+12>>2];c[l+16>>2]=c[e+16>>2];c[l+20>>2]=c[e+20>>2];c[l+24>>2]=c[e+24>>2];c[l+28>>2]=c[e+28>>2];l=(c[D>>2]|0)+8+8|0;h[l>>3]=+h[l>>3]+ +h[C>>3]*8.0;l=(c[D>>2]|0)+8|0;h[l>>3]=+h[l>>3]+ +h[C>>3]*4.0;l=(c[D>>2]|0)+8+24|0;h[l>>3]=+h[l>>3]+ +h[C>>3]*8.0;c[r>>2]=c[D>>2];l=c[r>>2]|0;i=E;return l|0}else{pc(c[A>>2]|0);c[r>>2]=0;l=c[r>>2]|0;i=E;return l|0}return 0}function hg(a,b,d,e,f,g,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0;o=i;i=i+48|0;k=o+32|0;t=o+28|0;l=o+24|0;s=o+20|0;r=o+16|0;m=o+12|0;q=o+8|0;p=o+4|0;n=o;c[t>>2]=a;c[l>>2]=b;c[s>>2]=d;c[r>>2]=e;c[m>>2]=f;c[q>>2]=g;c[p>>2]=h;c[n>>2]=j;if(!(og(c[t>>2]|0,c[l>>2]|0,c[s>>2]|0,c[r>>2]|0,c[m>>2]|0,c[q>>2]|0,c[p>>2]|0)|0)){c[k>>2]=0;j=c[k>>2]|0;i=o;return j|0}if((c[(c[n>>2]|0)+164>>2]&65536|0)!=0?(_(c[m>>2]|0,c[l>>2]|0)|0)<65536:0){c[k>>2]=0;j=c[k>>2]|0;i=o;return j|0}c[k>>2]=1;j=c[k>>2]|0;i=o;return j|0}function ig(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0;k=i;i=i+32|0;l=k+20|0;e=k+16|0;f=k+12|0;h=k+8|0;g=k+4|0;j=k;c[l>>2]=a;c[e>>2]=b;c[f>>2]=d;c[h>>2]=c[l>>2];c[g>>2]=wb(_((c[(c[h>>2]|0)+64>>2]|0)+16<<3,c[(c[h>>2]|0)+96>>2]|0)|0)|0;c[j>>2]=c[(c[h>>2]|0)+88>>2];while(1){if((c[j>>2]|0)>=(c[(c[h>>2]|0)+92>>2]|0))break;jg(c[h>>2]|0,c[j>>2]|0,(c[j>>2]|0)+(c[(c[h>>2]|0)+96>>2]|0)|0,c[g>>2]|0,c[e>>2]|0,c[f>>2]|0);c[j>>2]=(c[j>>2]|0)+(c[(c[h>>2]|0)+96>>2]|0)}xb(c[g>>2]|0);i=k;return}function jg(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;h=i;i=i+32|0;j=h+28|0;l=h+24|0;m=h+20|0;n=h+16|0;p=h+12|0;o=h+8|0;q=h+4|0;k=h;c[j>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[p>>2]=f;c[o>>2]=g;c[k>>2]=c[(c[j>>2]|0)+76>>2];kg(c[j>>2]|0,c[l>>2]|0,c[m>>2]|0,c[n>>2]|0,c[p>>2]|0,c[o>>2]|0);c[q>>2]=c[(c[j>>2]|0)+100>>2];Ya[c[(c[q>>2]|0)+56>>2]&63](c[(c[j>>2]|0)+100>>2]|0,c[n>>2]|0,(c[n>>2]|0)+4|0,c[n>>2]|0,(c[n>>2]|0)+4|0);g=(c[p>>2]|0)+((_(c[k>>2]|0,c[l>>2]|0)|0)<<2)|0;b=(c[o>>2]|0)+((_(c[k>>2]|0,c[l>>2]|0)|0)<<2)|0;Ib(c[n>>2]|0,(c[n>>2]|0)+4|0,g,b,(c[m>>2]|0)-(c[l>>2]|0)|0,(c[(c[j>>2]|0)+64>>2]|0)+16<<1,c[k>>2]|0,c[(c[j>>2]|0)+64>>2]|0,2,c[(c[j>>2]|0)+68>>2]|0);i=h;return}function kg(a,b,d,e,f,h){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;var j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0;u=i;i=i+48|0;v=u+44|0;j=u+40|0;k=u+36|0;l=u+32|0;m=u+28|0;n=u+24|0;o=u+20|0;p=u+16|0;r=u+12|0;s=u+8|0;q=u+4|0;t=u;c[v>>2]=a;c[j>>2]=b;c[k>>2]=d;c[l>>2]=e;c[m>>2]=f;c[n>>2]=h;c[r>>2]=c[(c[v>>2]|0)+64>>2];c[s>>2]=c[(c[v>>2]|0)+68>>2];c[q>>2]=c[(c[v>>2]|0)+76>>2];c[t>>2]=c[(c[v>>2]|0)+104>>2];c[o>>2]=0;while(1){if((c[o>>2]|0)>=(c[r>>2]|0))break;c[p>>2]=c[j>>2];while(1){if((c[p>>2]|0)>=(c[k>>2]|0))break;b=_(c[o>>2]|0,c[p>>2]|0)|0;e=_(c[o>>2]|0,c[s>>2]|0)|0;e=e+(_(c[p>>2]|0,c[q>>2]|0)|0)|0;a=_(c[o>>2]|0,c[s>>2]|0)|0;a=a+(_(c[p>>2]|0,c[q>>2]|0)|0)|0;h=(c[o>>2]<<1)+(_((c[r>>2]|0)+16<<1,(c[p>>2]|0)-(c[j>>2]|0)|0)|0)+0|0;lb[c[(c[t>>2]|0)+8>>2]&3](c[t>>2]|0,b,+g[(c[m>>2]|0)+(e<<2)>>2],+g[(c[n>>2]|0)+(a<<2)>>2],(c[l>>2]|0)+(h<<2)|0);c[p>>2]=(c[p>>2]|0)+1}c[o>>2]=(c[o>>2]|0)+1}i=u;return}function lg(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;e=i;i=i+16|0;g=e+8|0;f=e+4|0;d=e;c[g>>2]=a;c[f>>2]=b;c[d>>2]=c[g>>2];rc(c[(c[d>>2]|0)+100>>2]|0,c[f>>2]|0);a=c[d>>2]|0;if(!(c[f>>2]|0)){Be(c[a+104>>2]|0);c[(c[d>>2]|0)+104>>2]=0;i=e;return}else{b=Ae(2,_(c[a+64>>2]|0,c[(c[d>>2]|0)+72>>2]|0)|0)|0;c[(c[d>>2]|0)+104>>2]=b;i=e;return}}function mg(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;g=d+24|0;h=d+20|0;j=d+16|0;c[g>>2]=a;c[h>>2]=b;c[j>>2]=c[g>>2];b=c[c[h>>2]>>2]|0;a=c[h>>2]|0;h=c[(c[j>>2]|0)+64>>2]|0;g=c[(c[j>>2]|0)+72>>2]|0;f=c[(c[j>>2]|0)+100>>2]|0;c[e>>2]=c[(c[j>>2]|0)+96>>2];c[e+4>>2]=h;c[e+8>>2]=g;c[e+12>>2]=f;eb[b&63](a,20310,e);i=d;return}function ng(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+100>>2]|0);i=b;return}function og(a,b,d,e,f,g,h){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;var j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;l=i;i=i+32|0;m=l+24|0;j=l+20|0;p=l+16|0;o=l+12|0;k=l+8|0;q=l+4|0;n=l;c[m>>2]=a;c[j>>2]=b;c[p>>2]=d;c[o>>2]=e;c[k>>2]=f;c[q>>2]=g;c[n>>2]=h;if((((c[q>>2]|0)==1?(c[p>>2]|0)==(c[o>>2]|0):0)?(c[n>>2]|0)>=(c[(c[m>>2]|0)+24>>2]|0):0)?((c[j>>2]|0)>=64?((c[n>>2]|0)%(c[(c[m>>2]|0)+24>>2]|0)|0|0)==0:0):0)j=(c[k>>2]|0)>=(c[j>>2]|0);else j=0;i=l;return j&1|0}function pg(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;f=d+4|0;e=d;c[f>>2]=a;c[e>>2]=b;b=rg(c[f>>2]|0,c[e>>2]|0,0)|0;i=d;return b|0}function qg(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;f=d+4|0;e=d;c[f>>2]=a;c[e>>2]=b;b=rg(c[f>>2]|0,c[e>>2]|0,1)|0;i=d;return b|0}function rg(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;f=i;i=i+16|0;j=f+12|0;h=f+8|0;g=f+4|0;e=f;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[e>>2]=zd(20,12736)|0;c[(c[e>>2]|0)+12>>2]=c[j>>2];c[(c[e>>2]|0)+8>>2]=c[h>>2];c[(c[e>>2]|0)+16>>2]=c[g>>2];i=f;return c[e>>2]|0}function sg(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0.0;q=i;i=i+48|0;e=q+36|0;f=q+32|0;g=q+28|0;j=q+24|0;m=q+20|0;p=q+16|0;o=q+12|0;k=q+8|0;l=q+4|0;n=q;c[f>>2]=a;c[g>>2]=b;c[j>>2]=d;c[m>>2]=c[f>>2];c[l>>2]=c[(c[m>>2]|0)+8>>2];do if(c[(c[m>>2]|0)+16>>2]|0){if(tg(c[f>>2]|0,c[g>>2]|0,c[j>>2]|0)|0){c[p>>2]=oh(104,12748,11)|0;break}c[e>>2]=0;o=c[e>>2]|0;i=q;return o|0}else{c[n>>2]=0;if(vg(c[f>>2]|0,c[g>>2]|0,c[j>>2]|0,n)|0){c[p>>2]=oh(104,12748,(c[n>>2]|0)!=0?13:12)|0;break}c[e>>2]=0;o=c[e>>2]|0;i=q;return o|0}while(0);c[o>>2]=c[g>>2];c[k>>2]=(c[(c[o>>2]|0)+4>>2]|0)+4;c[(c[p>>2]|0)+92>>2]=c[(c[m>>2]|0)+12>>2];c[(c[p>>2]|0)+76>>2]=c[c[k>>2]>>2];c[(c[p>>2]|0)+64>>2]=c[(c[k>>2]|0)+4>>2];c[(c[p>>2]|0)+68>>2]=c[(c[k>>2]|0)+8>>2];n=(yg(c[(c[p>>2]|0)+76>>2]|0)|0)<<1;c[(c[p>>2]|0)+72>>2]=n;ke(c[(c[o>>2]|0)+8>>2]|0,(c[p>>2]|0)+80|0,(c[p>>2]|0)+84|0,(c[p>>2]|0)+88|0)|0;c[(c[p>>2]|0)+96>>2]=c[m>>2];fc((c[p>>2]|0)+8|0);lc((c[(c[p>>2]|0)+80>>2]|0)/(c[(c[(c[l>>2]|0)+40>>2]|0)+4>>2]|0)|0,(c[l>>2]|0)+8|0,(c[p>>2]|0)+8|0);if(c[(c[m>>2]|0)+16>>2]|0){r=+(_(c[(c[p>>2]|0)+76>>2]<<2,c[(c[p>>2]|0)+80>>2]|0)|0);o=(c[p>>2]|0)+8+24|0;h[o>>3]=+h[o>>3]+r}c[(c[p>>2]|0)+52>>2]=((c[(c[m>>2]|0)+16>>2]|0)!=0^1)&1;c[e>>2]=c[p>>2];o=c[e>>2]|0;i=q;return o|0}function tg(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;m=i;i=i+48|0;p=m+36|0;o=m+32|0;e=m+28|0;n=m+24|0;k=m+20|0;g=m+16|0;l=m+12|0;h=m+8|0;j=m+4|0;f=m;c[p>>2]=a;c[o>>2]=b;c[e>>2]=d;c[n>>2]=c[p>>2];c[k>>2]=c[o>>2];c[g>>2]=c[(c[n>>2]|0)+8>>2];if((c[c[(c[k>>2]|0)+4>>2]>>2]|0)!=1){l=0;l=l&1;i=m;return l|0}if((c[c[(c[k>>2]|0)+8>>2]>>2]|0)!=1){l=0;l=l&1;i=m;return l|0}if((c[(c[(c[k>>2]|0)+4>>2]|0)+4>>2]|0)!=(c[c[g>>2]>>2]|0)){l=0;l=l&1;i=m;return l|0}if(!(ke(c[(c[k>>2]|0)+8>>2]|0,l,h,j)|0)){l=0;l=l&1;i=m;return l|0}if((c[(c[e>>2]|0)+164>>2]&65536|0)!=0?(d=Tb(c[(c[(c[k>>2]|0)+4>>2]|0)+4+4>>2]|0)|0,(d|0)<=(Tb(c[h>>2]|0)|0)):0){l=0;l=l&1;i=m;return l|0}c[f>>2]=yg(c[c[g>>2]>>2]|0)|0;if(!(gb[c[c[(c[g>>2]|0)+40>>2]>>2]&7](c[g>>2]|0,0,4|0,c[(c[k>>2]|0)+20>>2]|0,c[(c[k>>2]|0)+24>>2]|0,c[f>>2]<<1,c[(c[(c[k>>2]|0)+4>>2]|0)+4+8>>2]|0,c[f>>2]|0,2,c[j>>2]|0,c[e>>2]|0)|0)){l=0;l=l&1;i=m;return l|0}if(!(gb[c[c[(c[g>>2]|0)+40>>2]>>2]&7](c[g>>2]|0,0,4|0,c[(c[k>>2]|0)+20>>2]|0,c[(c[k>>2]|0)+24>>2]|0,c[f>>2]<<1,c[(c[(c[k>>2]|0)+4>>2]|0)+4+8>>2]|0,(c[l>>2]|0)%(c[f>>2]|0)|0,2,c[j>>2]|0,c[e>>2]|0)|0)){l=0;l=l&1;i=m;return l|0}if((c[(c[k>>2]|0)+12>>2]|0)!=(c[(c[k>>2]|0)+20>>2]|0)){l=1;l=l&1;i=m;return l|0}if(Md(c[(c[k>>2]|0)+4>>2]|0,c[(c[k>>2]|0)+8>>2]|0)|0){l=1;l=l&1;i=m;return l|0}l=(c[l>>2]|0)<=(c[f>>2]|0);l=l&1;i=m;return l|0}function ug(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0;s=i;i=i+48|0;t=s+44|0;h=s+40|0;j=s+36|0;k=s+32|0;l=s+28|0;p=s+24|0;n=s+20|0;r=s+16|0;g=s+12|0;m=s+8|0;q=s+4|0;o=s;c[t>>2]=a;c[h>>2]=b;c[j>>2]=d;c[k>>2]=e;c[l>>2]=f;c[p>>2]=c[t>>2];c[r>>2]=c[(c[p>>2]|0)+80>>2];c[g>>2]=c[(c[p>>2]|0)+76>>2];c[m>>2]=yg(c[g>>2]|0)|0;c[o>>2]=(_(c[g>>2]|0,c[m>>2]|0)|0)<<1<<2;a=c[o>>2]|0;if((c[o>>2]|0)>>>0<65536){g=i;i=i+((1*a|0)+15&-16)|0;c[n>>2]=g}else c[n>>2]=wb(a)|0;c[q>>2]=0;while(1){e=c[p>>2]|0;a=c[h>>2]|0;d=c[j>>2]|0;f=c[k>>2]|0;b=c[l>>2]|0;g=c[n>>2]|0;if((c[q>>2]|0)>=((c[r>>2]|0)-(c[m>>2]|0)|0))break;zg(e,a,d,f,b,g,c[m>>2]|0);g=_(c[m>>2]|0,c[(c[p>>2]|0)+84>>2]|0)|0;c[h>>2]=(c[h>>2]|0)+(g<<2);g=_(c[m>>2]|0,c[(c[p>>2]|0)+84>>2]|0)|0;c[j>>2]=(c[j>>2]|0)+(g<<2);g=_(c[m>>2]|0,c[(c[p>>2]|0)+88>>2]|0)|0;c[k>>2]=(c[k>>2]|0)+(g<<2);g=_(c[m>>2]|0,c[(c[p>>2]|0)+88>>2]|0)|0;c[l>>2]=(c[l>>2]|0)+(g<<2);c[q>>2]=(c[q>>2]|0)+(c[m>>2]|0)}zg(e,a,d,f,b,g,(c[r>>2]|0)-(c[q>>2]|0)|0);if((c[o>>2]|0)>>>0<65536){i=s;return}xb(c[n>>2]|0);i=s;return}function vg(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;n=i;i=i+48|0;q=n+36|0;p=n+32|0;f=n+28|0;g=n+24|0;o=n+20|0;l=n+16|0;h=n+12|0;m=n+8|0;j=n+4|0;k=n;c[q>>2]=a;c[p>>2]=b;c[f>>2]=d;c[g>>2]=e;c[o>>2]=c[q>>2];c[l>>2]=c[p>>2];c[h>>2]=c[(c[o>>2]|0)+8>>2];if((c[c[(c[l>>2]|0)+4>>2]>>2]|0)!=1){d=0;d=d&1;i=n;return d|0}if((c[c[(c[l>>2]|0)+8>>2]>>2]|0)>1){d=0;d=d&1;i=n;return d|0}if((c[(c[(c[l>>2]|0)+4>>2]|0)+4>>2]|0)!=(c[c[h>>2]>>2]|0)){d=0;d=d&1;i=n;return d|0}if(!(ke(c[(c[l>>2]|0)+8>>2]|0,m,j,k)|0)){d=0;d=d&1;i=n;return d|0}c[c[g>>2]>>2]=0;if(!(gb[c[c[(c[h>>2]|0)+40>>2]>>2]&7](c[h>>2]|0,c[(c[l>>2]|0)+12>>2]|0,c[(c[l>>2]|0)+16>>2]|0,c[(c[l>>2]|0)+20>>2]|0,c[(c[l>>2]|0)+24>>2]|0,c[(c[(c[l>>2]|0)+4>>2]|0)+4+4>>2]|0,c[(c[(c[l>>2]|0)+4>>2]|0)+4+8>>2]|0,c[m>>2]|0,c[j>>2]|0,c[k>>2]|0,c[f>>2]|0)|0)){c[c[g>>2]>>2]=1;if(!(gb[c[c[(c[h>>2]|0)+40>>2]>>2]&7](c[h>>2]|0,c[(c[l>>2]|0)+12>>2]|0,c[(c[l>>2]|0)+16>>2]|0,c[(c[l>>2]|0)+20>>2]|0,c[(c[l>>2]|0)+24>>2]|0,c[(c[(c[l>>2]|0)+4>>2]|0)+4+4>>2]|0,c[(c[(c[l>>2]|0)+4>>2]|0)+4+8>>2]|0,(c[m>>2]|0)-1|0,c[j>>2]|0,c[k>>2]|0,c[f>>2]|0)|0)){d=0;d=d&1;i=n;return d|0}if(!(gb[c[c[(c[h>>2]|0)+40>>2]>>2]&7](c[h>>2]|0,c[(c[l>>2]|0)+12>>2]|0,c[(c[l>>2]|0)+16>>2]|0,c[(c[l>>2]|0)+20>>2]|0,c[(c[l>>2]|0)+24>>2]|0,c[(c[(c[l>>2]|0)+4>>2]|0)+4+4>>2]|0,c[(c[(c[l>>2]|0)+4>>2]|0)+4+8>>2]|0,2,0,0,c[f>>2]|0)|0)){d=0;d=d&1;i=n;return d|0}}if((c[m>>2]|0)==1?1:(c[(c[l>>2]|0)+12>>2]|0)!=(c[(c[l>>2]|0)+20>>2]|0)){d=1;d=d&1;i=n;return d|0}d=(Md(c[(c[l>>2]|0)+4>>2]|0,c[(c[l>>2]|0)+8>>2]|0)|0)!=0;d=d&1;i=n;return d|0}function wg(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;g=i;i=i+32|0;o=g+24|0;n=g+20|0;m=g+16|0;l=g+12|0;k=g+8|0;h=g+4|0;j=g;c[o>>2]=a;c[n>>2]=b;c[m>>2]=d;c[l>>2]=e;c[k>>2]=f;c[h>>2]=c[o>>2];c[j>>2]=c[(c[h>>2]|0)+80>>2];hb[c[(c[h>>2]|0)+92>>2]&127](c[n>>2]|0,c[m>>2]|0,c[l>>2]|0,c[k>>2]|0,c[(c[h>>2]|0)+64>>2]|0,c[(c[h>>2]|0)+68>>2]|0,(c[j>>2]|0)-1|0,c[(c[h>>2]|0)+84>>2]|0,c[(c[h>>2]|0)+88>>2]|0);a=(c[n>>2]|0)+((_((c[j>>2]|0)-1|0,c[(c[h>>2]|0)+84>>2]|0)|0)<<2)|0;d=(c[m>>2]|0)+((_((c[j>>2]|0)-1|0,c[(c[h>>2]|0)+84>>2]|0)|0)<<2)|0;f=(c[l>>2]|0)+((_((c[j>>2]|0)-1|0,c[(c[h>>2]|0)+88>>2]|0)|0)<<2)|0;b=(c[k>>2]|0)+((_((c[j>>2]|0)-1|0,c[(c[h>>2]|0)+88>>2]|0)|0)<<2)|0;hb[c[(c[h>>2]|0)+92>>2]&127](a,d,f,b,c[(c[h>>2]|0)+64>>2]|0,c[(c[h>>2]|0)+68>>2]|0,1,0,0);i=g;return}function xg(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0;g=i;i=i+32|0;n=g+20|0;m=g+16|0;l=g+12|0;k=g+8|0;j=g+4|0;h=g;c[n>>2]=a;c[m>>2]=b;c[l>>2]=d;c[k>>2]=e;c[j>>2]=f;c[h>>2]=c[n>>2];hb[c[(c[h>>2]|0)+92>>2]&127](c[m>>2]|0,c[l>>2]|0,c[k>>2]|0,c[j>>2]|0,c[(c[h>>2]|0)+64>>2]|0,c[(c[h>>2]|0)+68>>2]|0,c[(c[h>>2]|0)+80>>2]|0,c[(c[h>>2]|0)+84>>2]|0,c[(c[h>>2]|0)+88>>2]|0);i=g;return}function yg(a){a=a|0;var b=0,d=0;d=i;i=i+16|0;b=d;c[b>>2]=a;c[b>>2]=(c[b>>2]|0)+3;c[b>>2]=c[b>>2]&-4;i=d;return (c[b>>2]|0)+2|0}function zg(a,b,d,e,f,g,h){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;var j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;o=i;i=i+32|0;j=o+24|0;q=o+20|0;p=o+16|0;k=o+12|0;l=o+8|0;m=o+4|0;n=o;c[j>>2]=a;c[q>>2]=b;c[p>>2]=d;c[k>>2]=e;c[l>>2]=f;c[m>>2]=g;c[n>>2]=h;Hb(c[q>>2]|0,c[p>>2]|0,c[m>>2]|0,(c[m>>2]|0)+4|0,c[(c[j>>2]|0)+76>>2]|0,c[(c[j>>2]|0)+64>>2]|0,c[(c[j>>2]|0)+72>>2]|0,c[n>>2]|0,c[(c[j>>2]|0)+84>>2]|0,2);b=c[(c[j>>2]|0)+68>>2]|0;e=c[(c[j>>2]|0)+88>>2]|0;a=c[(c[j>>2]|0)+92>>2]|0;g=c[m>>2]|0;h=(c[m>>2]|0)+4|0;if((((c[(c[j>>2]|0)+68>>2]|0)<0?0-b|0:b)|0)<(((c[(c[j>>2]|0)+88>>2]|0)<0?0-e|0:e)|0)){hb[a&127](g,h,c[k>>2]|0,c[l>>2]|0,c[(c[j>>2]|0)+72>>2]|0,c[(c[j>>2]|0)+68>>2]|0,c[n>>2]|0,2,c[(c[j>>2]|0)+88>>2]|0);i=o;return}else{hb[a&127](g,h,c[m>>2]|0,(c[m>>2]|0)+4|0,c[(c[j>>2]|0)+72>>2]|0,c[(c[j>>2]|0)+72>>2]|0,c[n>>2]|0,2,2);Ib(c[m>>2]|0,(c[m>>2]|0)+4|0,c[k>>2]|0,c[l>>2]|0,c[(c[j>>2]|0)+76>>2]|0,c[(c[j>>2]|0)+72>>2]|0,c[(c[j>>2]|0)+68>>2]|0,c[n>>2]|0,2,c[(c[j>>2]|0)+88>>2]|0);i=o;return}}function Ag(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0;j=i;i=i+48|0;h=j+16|0;g=j;l=j+44|0;d=j+40|0;f=j+36|0;k=j+32|0;e=j+28|0;c[l>>2]=a;c[d>>2]=b;c[f>>2]=c[l>>2];c[k>>2]=c[(c[f>>2]|0)+96>>2];c[e>>2]=c[(c[k>>2]|0)+8>>2];b=c[c[d>>2]>>2]|0;a=c[d>>2]|0;d=c[c[e>>2]>>2]|0;if(c[(c[(c[f>>2]|0)+96>>2]|0)+16>>2]|0){k=yg(d)|0;h=c[c[e>>2]>>2]|0;d=c[(c[f>>2]|0)+80>>2]|0;e=c[(c[e>>2]|0)+4>>2]|0;c[g>>2]=k;c[g+4>>2]=h;c[g+8>>2]=d;c[g+12>>2]=e;eb[b&63](a,20343,g);i=j;return}else{f=c[(c[f>>2]|0)+80>>2]|0;e=c[(c[e>>2]|0)+4>>2]|0;c[h>>2]=d;c[h+4>>2]=f;c[h+8>>2]=e;eb[b&63](a,20372,h);i=j;return}}function Bg(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b+4|0;c[d>>2]=a;c[b>>2]=c[d>>2];i=b;return}function Cg(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,Dg()|0);i=b;return}function Dg(){var a=0,b=0;b=i;i=i+16|0;a=b;c[a>>2]=zd(8,12764)|0;i=b;return c[a>>2]|0}function Eg(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0.0;l=i;i=i+32|0;e=l+24|0;n=l+20|0;f=l+16|0;m=l+12|0;j=l+8|0;k=l+4|0;g=l;c[n>>2]=a;c[f>>2]=b;c[m>>2]=d;if(Fg(c[n>>2]|0,c[f>>2]|0,c[m>>2]|0)|0){c[k>>2]=oh(80,12776,14)|0;c[j>>2]=c[f>>2];m=c[(c[(c[j>>2]|0)+4>>2]|0)+4>>2]|0;c[g>>2]=m;c[(c[k>>2]|0)+68>>2]=m;c[(c[k>>2]|0)+72>>2]=c[(c[(c[j>>2]|0)+4>>2]|0)+4+4>>2];c[(c[k>>2]|0)+76>>2]=c[(c[(c[j>>2]|0)+4>>2]|0)+4+8>>2];c[(c[k>>2]|0)+64>>2]=0;h[(c[k>>2]|0)+8>>3]=+(((c[g>>2]|0)-1|0)*5|0);h[(c[k>>2]|0)+8+8>>3]=0.0;o=+(_((c[g>>2]|0)-1|0,(c[g>>2]|0)-1|0)|0);h[(c[k>>2]|0)+8+16>>3]=o;c[e>>2]=c[k>>2];m=c[e>>2]|0;i=l;return m|0}else{c[e>>2]=0;m=c[e>>2]|0;i=l;return m|0}return 0}function Fg(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;g=i;i=i+16|0;h=g+8|0;e=g+4|0;f=g;c[g+12>>2]=a;c[h>>2]=b;c[e>>2]=d;c[f>>2]=c[h>>2];if((c[c[(c[f>>2]|0)+4>>2]>>2]|0)!=1){b=0;b=b&1;i=g;return b|0}if(c[c[(c[f>>2]|0)+8>>2]>>2]|0){b=0;b=b&1;i=g;return b|0}if(((c[(c[(c[f>>2]|0)+4>>2]|0)+4>>2]|0)%2|0|0)!=1){b=0;b=b&1;i=g;return b|0}if((c[(c[e>>2]|0)+164>>2]&64|0)!=0?(c[(c[(c[f>>2]|0)+4>>2]|0)+4>>2]|0)>=173:0){b=0;b=b&1;i=g;return b|0}if((c[(c[e>>2]|0)+164>>2]&8|0)!=0?(c[(c[(c[f>>2]|0)+4>>2]|0)+4>>2]|0)<=16:0){b=0;b=b&1;i=g;return b|0}b=(gd(c[(c[(c[f>>2]|0)+4>>2]|0)+4>>2]|0)|0)!=0;b=b&1;i=g;return b|0}function Gg(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0;s=i;i=i+64|0;u=s+48|0;g=s+44|0;h=s+40|0;j=s+36|0;k=s+32|0;t=s+28|0;o=s+24|0;q=s+20|0;p=s+16|0;r=s+12|0;l=s+8|0;m=s+4|0;n=s;c[u>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[k>>2]=f;c[t>>2]=c[u>>2];c[q>>2]=c[(c[t>>2]|0)+68>>2];c[p>>2]=c[(c[t>>2]|0)+72>>2];c[r>>2]=c[(c[t>>2]|0)+76>>2];c[l>>2]=c[c[(c[t>>2]|0)+64>>2]>>2];c[n>>2]=c[q>>2]<<1<<2;a=c[n>>2]|0;if((c[n>>2]|0)>>>0<65536){e=i;i=i+((1*a|0)+15&-16)|0;c[m>>2]=e}else c[m>>2]=wb(a)|0;Hg(c[q>>2]|0,c[g>>2]|0,c[h>>2]|0,c[p>>2]|0,c[m>>2]|0,c[j>>2]|0,c[k>>2]|0);c[o>>2]=1;while(1){if(((c[o>>2]|0)+(c[o>>2]|0)|0)>=(c[q>>2]|0))break;e=(c[j>>2]|0)+((_(c[o>>2]|0,c[r>>2]|0)|0)<<2)|0;g=(c[k>>2]|0)+((_(c[o>>2]|0,c[r>>2]|0)|0)<<2)|0;h=(c[j>>2]|0)+((_((c[q>>2]|0)-(c[o>>2]|0)|0,c[r>>2]|0)|0)<<2)|0;Ig(c[q>>2]|0,c[m>>2]|0,c[l>>2]|0,e,g,h,(c[k>>2]|0)+((_((c[q>>2]|0)-(c[o>>2]|0)|0,c[r>>2]|0)|0)<<2)|0);c[l>>2]=(c[l>>2]|0)+((c[q>>2]|0)-1<<2);c[o>>2]=(c[o>>2]|0)+1}if((c[n>>2]|0)>>>0<65536){i=s;return}xb(c[m>>2]|0);i=s;return}function Hg(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0.0;u=i;i=i+48|0;k=u+36|0;l=u+32|0;m=u+28|0;n=u+24|0;o=u+20|0;p=u+16|0;q=u+12|0;r=u+8|0;t=u+4|0;s=u;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[o>>2]=f;c[p>>2]=h;c[q>>2]=j;v=+g[c[l>>2]>>2];g[t>>2]=v;g[c[o>>2]>>2]=v;v=+g[c[m>>2]>>2];g[s>>2]=v;g[(c[o>>2]|0)+4>>2]=v;c[o>>2]=(c[o>>2]|0)+8;c[r>>2]=1;while(1){if(((c[r>>2]|0)+(c[r>>2]|0)|0)>=(c[k>>2]|0))break;f=_(c[r>>2]|0,c[n>>2]|0)|0;a=_((c[k>>2]|0)-(c[r>>2]|0)|0,c[n>>2]|0)|0;v=+g[(c[l>>2]|0)+(f<<2)>>2]+ +g[(c[l>>2]|0)+(a<<2)>>2];g[c[o>>2]>>2]=v;g[t>>2]=+g[t>>2]+v;a=_(c[r>>2]|0,c[n>>2]|0)|0;f=_((c[k>>2]|0)-(c[r>>2]|0)|0,c[n>>2]|0)|0;v=+g[(c[m>>2]|0)+(a<<2)>>2]+ +g[(c[m>>2]|0)+(f<<2)>>2];g[(c[o>>2]|0)+4>>2]=v;g[s>>2]=+g[s>>2]+v;f=_(c[r>>2]|0,c[n>>2]|0)|0;a=_((c[k>>2]|0)-(c[r>>2]|0)|0,c[n>>2]|0)|0;g[(c[o>>2]|0)+8>>2]=+g[(c[l>>2]|0)+(f<<2)>>2]-+g[(c[l>>2]|0)+(a<<2)>>2];a=_(c[r>>2]|0,c[n>>2]|0)|0;f=_((c[k>>2]|0)-(c[r>>2]|0)|0,c[n>>2]|0)|0;g[(c[o>>2]|0)+12>>2]=+g[(c[m>>2]|0)+(a<<2)>>2]-+g[(c[m>>2]|0)+(f<<2)>>2];c[o>>2]=(c[o>>2]|0)+16;c[r>>2]=(c[r>>2]|0)+1}g[c[p>>2]>>2]=+g[t>>2];g[c[q>>2]>>2]=+g[s>>2];i=u;return}function Ig(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0;w=i;i=i+48|0;k=w+44|0;l=w+40|0;m=w+36|0;n=w+32|0;o=w+28|0;p=w+24|0;q=w+20|0;r=w+16|0;v=w+12|0;u=w+8|0;t=w+4|0;s=w;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[o>>2]=f;c[p>>2]=h;c[q>>2]=j;g[v>>2]=+g[c[l>>2]>>2];g[u>>2]=0.0;g[t>>2]=+g[(c[l>>2]|0)+4>>2];g[s>>2]=0.0;c[l>>2]=(c[l>>2]|0)+8;c[r>>2]=1;while(1){if(((c[r>>2]|0)+(c[r>>2]|0)|0)>=(c[k>>2]|0))break;g[v>>2]=+g[v>>2]+ +g[c[l>>2]>>2]*+g[c[m>>2]>>2];g[t>>2]=+g[t>>2]+ +g[(c[l>>2]|0)+4>>2]*+g[c[m>>2]>>2];g[u>>2]=+g[u>>2]+ +g[(c[l>>2]|0)+8>>2]*+g[(c[m>>2]|0)+4>>2];g[s>>2]=+g[s>>2]+ +g[(c[l>>2]|0)+12>>2]*+g[(c[m>>2]|0)+4>>2];c[l>>2]=(c[l>>2]|0)+16;c[m>>2]=(c[m>>2]|0)+8;c[r>>2]=(c[r>>2]|0)+1}g[c[n>>2]>>2]=+g[v>>2]+ +g[s>>2];g[c[o>>2]>>2]=+g[t>>2]-+g[u>>2];g[c[p>>2]>>2]=+g[v>>2]-+g[s>>2];g[c[q>>2]>>2]=+g[t>>2]+ +g[u>>2];i=w;return}function Jg(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;f=d+4|0;e=d;c[g>>2]=a;c[f>>2]=b;c[e>>2]=c[g>>2];Me(c[f>>2]|0,(c[e>>2]|0)+64|0,17828,c[(c[e>>2]|0)+68>>2]|0,c[(c[e>>2]|0)+68>>2]|0,((c[(c[e>>2]|0)+68>>2]|0)-1|0)/2|0);i=d;return}function Kg(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;d=i;i=i+16|0;e=d;h=d+12|0;g=d+8|0;f=d+4|0;c[h>>2]=a;c[g>>2]=b;c[f>>2]=c[h>>2];b=c[c[g>>2]>>2]|0;a=c[g>>2]|0;c[e>>2]=c[(c[f>>2]|0)+68>>2];eb[b&63](a,20395,e);i=d;return}function Lg(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,Mg()|0);i=b;return}function Mg(){var a=0,b=0;b=i;i=i+16|0;a=b;c[a>>2]=zd(8,12792)|0;i=b;return c[a>>2]|0}function Ng(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0;x=i;i=i+80|0;e=x+76|0;z=x+72|0;y=x+68|0;f=x+64|0;o=x+60|0;r=x+56|0;g=x+52|0;j=x+48|0;h=x+44|0;p=x+40|0;q=x+36|0;u=x+32|0;v=x+28|0;w=x+24|0;m=x+20|0;n=x+16|0;s=x+12|0;k=x+8|0;t=x+4|0;l=x;c[z>>2]=a;c[y>>2]=b;c[f>>2]=d;c[o>>2]=c[y>>2];c[g>>2]=0;c[j>>2]=0;c[h>>2]=0;if(!(Og(c[z>>2]|0,c[y>>2]|0,c[f>>2]|0,p,q)|0)){c[e>>2]=0;w=c[e>>2]|0;i=x;return w|0}c[w>>2]=(c[(c[(c[o>>2]|0)+8>>2]|0)+4+((c[p>>2]|0)*12|0)>>2]|0)/(c[(c[(c[o>>2]|0)+4>>2]|0)+4+((c[q>>2]|0)*12|0)>>2]|0)|0;c[m>>2]=_(c[(c[(c[o>>2]|0)+4>>2]|0)+4+((c[q>>2]|0)*12|0)>>2]|0,c[(c[(c[o>>2]|0)+8>>2]|0)+4+((c[p>>2]|0)*12|0)+4>>2]|0)|0;c[n>>2]=_(c[(c[(c[o>>2]|0)+4>>2]|0)+4+((c[q>>2]|0)*12|0)>>2]|0,c[(c[(c[o>>2]|0)+8>>2]|0)+4+((c[p>>2]|0)*12|0)+8>>2]|0)|0;c[s>>2]=c[(c[o>>2]|0)+12>>2];c[k>>2]=c[(c[o>>2]|0)+16>>2];c[t>>2]=c[(c[o>>2]|0)+20>>2];c[l>>2]=c[(c[o>>2]|0)+24>>2];c[u>>2]=Qd(c[(c[o>>2]|0)+4>>2]|0,0)|0;c[(c[u>>2]|0)+4+((c[q>>2]|0)*12|0)+8>>2]=c[(c[(c[o>>2]|0)+8>>2]|0)+4+((c[p>>2]|0)*12|0)+4>>2];c[v>>2]=Qd(c[(c[o>>2]|0)+8>>2]|0,0)|0;c[(c[v>>2]|0)+4+((c[p>>2]|0)*12|0)+8>>2]=c[(c[(c[o>>2]|0)+4>>2]|0)+4+((c[q>>2]|0)*12|0)+4>>2];c[(c[v>>2]|0)+4+((c[p>>2]|0)*12|0)>>2]=c[(c[(c[o>>2]|0)+4>>2]|0)+4+((c[q>>2]|0)*12|0)>>2];a=c[f>>2]|0;b=Dd()|0;d=Td(c[v>>2]|0,c[u>>2]|0)|0;c[j>>2]=uc(a,qh(b,d,c[s>>2]|0,c[k>>2]|0,c[t>>2]|0,c[l>>2]|0)|0)|0;ee(c[u>>2]|0,c[v>>2]|0);if(((c[j>>2]|0)!=0?(c[u>>2]=Pd(c[(c[o>>2]|0)+4>>2]|0)|0,c[(c[u>>2]|0)+4+((c[q>>2]|0)*12|0)+4>>2]=c[(c[(c[o>>2]|0)+8>>2]|0)+4+((c[p>>2]|0)*12|0)+4>>2],c[v>>2]=Pd(c[(c[o>>2]|0)+8>>2]|0)|0,c[(c[v>>2]|0)+4+((c[p>>2]|0)*12|0)+4>>2]=c[(c[(c[o>>2]|0)+4>>2]|0)+4+((c[q>>2]|0)*12|0)+4>>2],c[(c[v>>2]|0)+4+((c[p>>2]|0)*12|0)>>2]=c[(c[(c[o>>2]|0)+4>>2]|0)+4+((c[q>>2]|0)*12|0)>>2],s=c[f>>2]|0,c[g>>2]=uc(s,qh(c[u>>2]|0,c[v>>2]|0,c[t>>2]|0,c[l>>2]|0,c[t>>2]|0,c[l>>2]|0)|0)|0,(c[g>>2]|0)!=0):0)?(c[v>>2]=Pd(c[(c[o>>2]|0)+8>>2]|0)|0,d=_(c[w>>2]|0,c[(c[(c[o>>2]|0)+4>>2]|0)+4+((c[q>>2]|0)*12|0)>>2]|0)|0,q=(c[v>>2]|0)+4+((c[p>>2]|0)*12|0)|0,c[q>>2]=(c[q>>2]|0)-d,q=c[f>>2]|0,d=Pd(c[(c[o>>2]|0)+4>>2]|0)|0,s=(c[(c[o>>2]|0)+12>>2]|0)+((_(c[m>>2]|0,c[w>>2]|0)|0)<<2)|0,t=(c[(c[o>>2]|0)+16>>2]|0)+((_(c[m>>2]|0,c[w>>2]|0)|0)<<2)|0,u=(c[(c[o>>2]|0)+20>>2]|0)+((_(c[n>>2]|0,c[w>>2]|0)|0)<<2)|0,c[h>>2]=uc(q,qh(d,c[v>>2]|0,s,t,u,(c[(c[o>>2]|0)+24>>2]|0)+((_(c[n>>2]|0,c[w>>2]|0)|0)<<2)|0)|0)|0,(c[h>>2]|0)!=0):0){c[r>>2]=oh(88,12804,15)|0;c[(c[r>>2]|0)+76>>2]=c[j>>2];c[(c[r>>2]|0)+80>>2]=c[g>>2];c[(c[r>>2]|0)+84>>2]=c[h>>2];c[(c[r>>2]|0)+64>>2]=c[w>>2];c[(c[r>>2]|0)+68>>2]=c[m>>2];c[(c[r>>2]|0)+72>>2]=c[n>>2];gc((c[h>>2]|0)+8|0,(c[r>>2]|0)+8|0);lc(c[w>>2]|0,(c[g>>2]|0)+8|0,(c[r>>2]|0)+8|0);lc(c[w>>2]|0,(c[j>>2]|0)+8|0,(c[r>>2]|0)+8|0);c[e>>2]=c[r>>2];w=c[e>>2]|0;i=x;return w|0}pc(c[h>>2]|0);pc(c[g>>2]|0);pc(c[j>>2]|0);c[e>>2]=0;w=c[e>>2]|0;i=x;return w|0}function Og(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;n=i;i=i+32|0;h=n+28|0;p=n+24|0;g=n+20|0;j=n+16|0;k=n+12|0;o=n+8|0;l=n+4|0;m=n;c[p>>2]=a;c[g>>2]=b;c[j>>2]=d;c[k>>2]=e;c[o>>2]=f;if(!(Tg(c[p>>2]|0,c[g>>2]|0,c[j>>2]|0,c[k>>2]|0,c[o>>2]|0)|0)){c[h>>2]=0;l=c[h>>2]|0;i=n;return l|0}c[l>>2]=c[g>>2];if((c[(c[l>>2]|0)+12>>2]|0)==((c[(c[l>>2]|0)+16>>2]|0)+4|0))g=1;else g=(c[(c[l>>2]|0)+16>>2]|0)==((c[(c[l>>2]|0)+12>>2]|0)+4|0);c[m>>2]=g?2:1;do if((c[(c[j>>2]|0)+164>>2]&65536|0)!=0?(c[(c[(c[l>>2]|0)+8>>2]|0)+4+((c[c[k>>2]>>2]|0)*12|0)+4>>2]|0)!=(c[m>>2]|0):0){if(((c[c[(c[l>>2]|0)+8>>2]>>2]|0)==2?(c[(c[(c[l>>2]|0)+8>>2]|0)+4+((1-(c[c[k>>2]>>2]|0)|0)*12|0)+4>>2]|0)==(c[m>>2]|0):0)?(c[(c[(c[l>>2]|0)+8>>2]|0)+4+((c[c[k>>2]>>2]|0)*12|0)+4>>2]|0)==(_(c[m>>2]|0,c[(c[(c[l>>2]|0)+8>>2]|0)+4+((1-(c[c[k>>2]>>2]|0)|0)*12|0)>>2]|0)|0):0)break;c[h>>2]=0;l=c[h>>2]|0;i=n;return l|0}while(0);if((c[(c[j>>2]|0)+164>>2]&32|0)!=0?(c[(c[l>>2]|0)+12>>2]|0)!=(c[(c[l>>2]|0)+20>>2]|0):0){c[h>>2]=0;l=c[h>>2]|0;i=n;return l|0}c[h>>2]=1;l=c[h>>2]|0;i=n;return l|0}function Pg(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0;t=i;i=i+64|0;u=t+48|0;g=t+44|0;h=t+40|0;j=t+36|0;k=t+32|0;o=t+28|0;s=t+24|0;q=t+20|0;r=t+16|0;p=t+12|0;n=t+8|0;l=t+4|0;m=t;c[u>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[k>>2]=f;c[o>>2]=c[u>>2];c[s>>2]=c[(c[o>>2]|0)+64>>2];c[q>>2]=c[(c[o>>2]|0)+68>>2];c[r>>2]=c[(c[o>>2]|0)+72>>2];c[p>>2]=0;while(1){a=c[o>>2]|0;if((c[p>>2]|0)>=(c[s>>2]|0))break;c[n>>2]=c[a+76>>2];Ya[c[(c[n>>2]|0)+56>>2]&63](c[(c[o>>2]|0)+76>>2]|0,c[g>>2]|0,c[h>>2]|0,c[j>>2]|0,c[k>>2]|0);c[l>>2]=c[(c[o>>2]|0)+80>>2];Ya[c[(c[l>>2]|0)+56>>2]&63](c[(c[o>>2]|0)+80>>2]|0,c[j>>2]|0,c[k>>2]|0,c[j>>2]|0,c[k>>2]|0);c[g>>2]=(c[g>>2]|0)+(c[q>>2]<<2);c[h>>2]=(c[h>>2]|0)+(c[q>>2]<<2);c[j>>2]=(c[j>>2]|0)+(c[r>>2]<<2);c[k>>2]=(c[k>>2]|0)+(c[r>>2]<<2);c[p>>2]=(c[p>>2]|0)+1}c[m>>2]=c[a+84>>2];Ya[c[(c[m>>2]|0)+56>>2]&63](c[(c[o>>2]|0)+84>>2]|0,c[g>>2]|0,c[h>>2]|0,c[j>>2]|0,c[k>>2]|0);i=t;return}function Qg(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+76>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+80>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+84>>2]|0,c[e>>2]|0);i=d;return}function Rg(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;g=d+24|0;h=d+20|0;j=d+16|0;c[g>>2]=a;c[h>>2]=b;c[j>>2]=c[g>>2];b=c[c[h>>2]>>2]|0;a=c[h>>2]|0;h=c[(c[j>>2]|0)+76>>2]|0;g=c[(c[j>>2]|0)+80>>2]|0;f=c[(c[j>>2]|0)+84>>2]|0;c[e>>2]=c[(c[j>>2]|0)+64>>2];c[e+4>>2]=h;c[e+8>>2]=g;c[e+12>>2]=f;eb[b&63](a,20412,e);i=d;return}function Sg(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+84>>2]|0);pc(c[(c[d>>2]|0)+80>>2]|0);pc(c[(c[d>>2]|0)+76>>2]|0);i=b;return}function Tg(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0;k=i;i=i+32|0;l=k+16|0;g=k+8|0;h=k+4|0;j=k;c[k+20>>2]=a;c[l>>2]=b;c[k+12>>2]=d;c[g>>2]=e;c[h>>2]=f;c[j>>2]=c[l>>2];if((c[c[(c[j>>2]|0)+8>>2]>>2]|0)==2147483647){j=0;j=j&1;i=k;return j|0}if((c[c[(c[j>>2]|0)+4>>2]>>2]|0)==2147483647){j=0;j=j&1;i=k;return j|0}if(!(Md(c[(c[j>>2]|0)+8>>2]|0,c[(c[j>>2]|0)+4>>2]|0)|0)){j=0;j=j&1;i=k;return j|0}if(!(Ug(c[(c[j>>2]|0)+8>>2]|0,c[(c[j>>2]|0)+4>>2]|0,c[g>>2]|0,c[h>>2]|0)|0)){j=0;j=j&1;i=k;return j|0}j=(c[(c[(c[j>>2]|0)+4>>2]|0)+4+((c[c[h>>2]>>2]|0)*12|0)+8>>2]|0)!=(c[(c[(c[j>>2]|0)+8>>2]|0)+4+((c[c[g>>2]>>2]|0)*12|0)+4>>2]|0);j=j&1;i=k;return j|0}function Ug(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0;m=i;i=i+32|0;f=m+20|0;g=m+16|0;h=m+12|0;j=m+8|0;k=m+4|0;l=m;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[c[j>>2]>>2]=-1;c[c[h>>2]>>2]=-1;c[k>>2]=0;while(1){if((c[k>>2]|0)>=(c[c[f>>2]>>2]|0))break;c[l>>2]=0;while(1){d=c[k>>2]|0;if((c[l>>2]|0)>=(c[c[g>>2]>>2]|0))break;d=c[(c[f>>2]|0)+4+(d*12|0)>>2]|0;d=_(d,Tb(c[(c[f>>2]|0)+4+((c[k>>2]|0)*12|0)+4>>2]|0)|0)|0;do if((d|0)<=(Tb(c[(c[g>>2]|0)+4+((c[l>>2]|0)*12|0)+4>>2]|0)|0)?(c[(c[f>>2]|0)+4+((c[k>>2]|0)*12|0)>>2]|0)>=(c[(c[g>>2]|0)+4+((c[l>>2]|0)*12|0)>>2]|0):0){if((c[c[h>>2]>>2]|0)!=-1){d=Tb(c[(c[f>>2]|0)+4+((c[k>>2]|0)*12|0)+4>>2]|0)|0;if((d|0)>(Tb(c[(c[f>>2]|0)+4+((c[c[h>>2]>>2]|0)*12|0)+4>>2]|0)|0))break;d=Tb(c[(c[g>>2]|0)+4+((c[l>>2]|0)*12|0)+4>>2]|0)|0;if((d|0)<(Tb(c[(c[g>>2]|0)+4+((c[c[j>>2]>>2]|0)*12|0)+4>>2]|0)|0))break}c[c[h>>2]>>2]=c[k>>2];c[c[j>>2]>>2]=c[l>>2]}while(0);c[l>>2]=(c[l>>2]|0)+1}c[k>>2]=d+1}if((c[c[h>>2]>>2]|0)==-1){l=0;l=l&1;i=m;return l|0}l=(c[c[j>>2]>>2]|0)!=-1;l=l&1;i=m;return l|0}function Vg(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;b=e+4|0;d=e;c[b>>2]=a;c[d>>2]=0;while(1){if((c[d>>2]|0)>>>0>=2)break;a=c[b>>2]|0;Bd(a,Wg(c[12820+(c[d>>2]<<2)>>2]|0)|0);c[d>>2]=(c[d>>2]|0)+1}i=e;return}function Wg(a){a=a|0;var b=0,d=0,e=0;d=i;i=i+16|0;e=d+4|0;b=d;c[e>>2]=a;c[b>>2]=zd(12,12828)|0;c[(c[b>>2]|0)+8>>2]=c[e>>2];i=d;return c[b>>2]|0}function Xg(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;m=i;i=i+48|0;e=m+32|0;o=m+28|0;n=m+24|0;f=m+20|0;k=m+16|0;j=m+12|0;l=m+8|0;g=m+4|0;h=m;c[o>>2]=a;c[n>>2]=b;c[f>>2]=d;c[k>>2]=c[n>>2];c[j>>2]=c[o>>2];c[g>>2]=0;c[h>>2]=0;if(!(Yg(c[o>>2]|0,c[n>>2]|0,c[f>>2]|0)|0)){c[e>>2]=0;d=c[e>>2]|0;i=m;return d|0}a=c[f>>2]|0;b=Dd()|0;d=Td(c[(c[k>>2]|0)+8>>2]|0,c[(c[k>>2]|0)+4>>2]|0)|0;c[h>>2]=uc(a,qh(b,d,c[(c[k>>2]|0)+12>>2]|0,c[(c[k>>2]|0)+16>>2]|0,c[(c[k>>2]|0)+20>>2]|0,c[(c[k>>2]|0)+24>>2]|0)|0)|0;if((c[h>>2]|0)!=0?(d=c[f>>2]|0,c[g>>2]=vc(d,bb[c[(c[(c[j>>2]|0)+8>>2]|0)+4>>2]&7](c[k>>2]|0)|0,1024,0,0)|0,(c[g>>2]|0)!=0):0){c[l>>2]=oh(80,12840,c[c[(c[j>>2]|0)+8>>2]>>2]|0)|0;c[(c[l>>2]|0)+68>>2]=c[g>>2];c[(c[l>>2]|0)+64>>2]=c[h>>2];c[(c[l>>2]|0)+72>>2]=c[j>>2];jc((c[g>>2]|0)+8|0,(c[h>>2]|0)+8|0,(c[l>>2]|0)+8|0);c[e>>2]=c[l>>2];d=c[e>>2]|0;i=m;return d|0}pc(c[g>>2]|0);pc(c[h>>2]|0);c[e>>2]=0;d=c[e>>2]|0;i=m;return d|0}function Yg(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;j=i;i=i+32|0;e=j+16|0;k=j+12|0;f=j+8|0;g=j+4|0;h=j;c[k>>2]=a;c[f>>2]=b;c[g>>2]=d;if(!(ah(c[k>>2]|0,c[f>>2]|0,c[g>>2]|0)|0)){c[e>>2]=0;a=c[e>>2]|0;i=j;return a|0}c[h>>2]=c[f>>2];if((c[(c[g>>2]|0)+164>>2]&32|0)!=0?(c[(c[h>>2]|0)+12>>2]|0)!=(c[(c[h>>2]|0)+20>>2]|0):0){c[e>>2]=0;a=c[e>>2]|0;i=j;return a|0}c[e>>2]=1;a=c[e>>2]|0;i=j;return a|0}function Zg(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+68>>2]|0,c[e>>2]|0);i=d;return}function _g(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;j=d+24|0;g=d+20|0;f=d+16|0;h=d+12|0;c[j>>2]=a;c[g>>2]=b;c[f>>2]=c[j>>2];c[h>>2]=c[(c[f>>2]|0)+72>>2];b=c[c[g>>2]>>2]|0;a=c[g>>2]|0;g=c[(c[f>>2]|0)+68>>2]|0;f=c[(c[f>>2]|0)+64>>2]|0;c[e>>2]=c[(c[(c[h>>2]|0)+8>>2]|0)+8>>2];c[e+4>>2]=g;c[e+8>>2]=f;eb[b&63](a,22993,e);i=d;return}function $g(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function ah(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;h=i;i=i+32|0;k=h+16|0;j=h+12|0;e=h+8|0;f=h+4|0;g=h;c[k>>2]=a;c[j>>2]=b;c[e>>2]=d;c[f>>2]=c[k>>2];c[g>>2]=c[j>>2];if((c[c[(c[g>>2]|0)+8>>2]>>2]|0)==2147483647){g=0;g=g&1;i=h;return g|0}if((c[c[(c[g>>2]|0)+4>>2]>>2]|0)<=0){g=0;g=g&1;i=h;return g|0}if(((c[(c[g>>2]|0)+12>>2]|0)==(c[(c[g>>2]|0)+20>>2]|0)?(Md(c[(c[g>>2]|0)+4>>2]|0,c[(c[g>>2]|0)+8>>2]|0)|0)==0:0)?(Nd(c[(c[g>>2]|0)+4>>2]|0,c[(c[g>>2]|0)+8>>2]|0,(c[c[(c[f>>2]|0)+8>>2]>>2]|0)==2?0:1)|0)!=0:0){g=1;g=g&1;i=h;return g|0}if(((((c[(c[g>>2]|0)+12>>2]|0)!=(c[(c[g>>2]|0)+20>>2]|0)?(c[c[(c[f>>2]|0)+8>>2]>>2]|0)==2:0)?(c[(c[e>>2]|0)+164>>2]&4096|0)==0:0)?(Id(c[(c[g>>2]|0)+4>>2]|0)|0)<=2:0)?(Jd(c[(c[g>>2]|0)+4>>2]|0)|0)>2:0){g=1;g=g&1;i=h;return g|0}if((c[(c[g>>2]|0)+12>>2]|0)==(c[(c[g>>2]|0)+20>>2]|0)){g=0;g=g&1;i=h;return g|0}if((c[c[(c[f>>2]|0)+8>>2]>>2]|0)!=1){g=0;g=g&1;i=h;return g|0}if((Jd(c[(c[g>>2]|0)+4>>2]|0)|0)>2){g=0;g=g&1;i=h;return g|0}g=(Id(c[(c[g>>2]|0)+4>>2]|0)|0)>2;g=g&1;i=h;return g|0}function bh(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;g=i;i=i+32|0;p=g+28|0;l=g+24|0;k=g+20|0;j=g+16|0;h=g+12|0;m=g+8|0;o=g+4|0;n=g;c[p>>2]=a;c[l>>2]=b;c[k>>2]=d;c[j>>2]=e;c[h>>2]=f;c[m>>2]=c[p>>2];c[o>>2]=c[(c[m>>2]|0)+68>>2];Ya[c[(c[o>>2]|0)+56>>2]&63](c[(c[m>>2]|0)+68>>2]|0,c[l>>2]|0,c[k>>2]|0,c[l>>2]|0,c[k>>2]|0);c[n>>2]=c[(c[m>>2]|0)+64>>2];Ya[c[(c[n>>2]|0)+56>>2]&63](c[(c[m>>2]|0)+64>>2]|0,c[l>>2]|0,c[k>>2]|0,c[j>>2]|0,c[h>>2]|0);i=g;return}function ch(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;g=i;i=i+32|0;p=g+28|0;n=g+24|0;m=g+20|0;j=g+16|0;h=g+12|0;k=g+8|0;o=g+4|0;l=g;c[p>>2]=a;c[n>>2]=b;c[m>>2]=d;c[j>>2]=e;c[h>>2]=f;c[k>>2]=c[p>>2];c[o>>2]=c[(c[k>>2]|0)+64>>2];Ya[c[(c[o>>2]|0)+56>>2]&63](c[(c[k>>2]|0)+64>>2]|0,c[n>>2]|0,c[m>>2]|0,c[j>>2]|0,c[h>>2]|0);c[l>>2]=c[(c[k>>2]|0)+68>>2];Ya[c[(c[l>>2]|0)+56>>2]&63](c[(c[k>>2]|0)+68>>2]|0,c[j>>2]|0,c[h>>2]|0,c[j>>2]|0,c[h>>2]|0);i=g;return}function dh(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;d=b;c[d>>2]=a;e=Qd(c[(c[d>>2]|0)+4>>2]|0,0)|0;a=Qd(c[(c[d>>2]|0)+8>>2]|0,0)|0;a=qh(e,a,c[(c[d>>2]|0)+12>>2]|0,c[(c[d>>2]|0)+16>>2]|0,c[(c[d>>2]|0)+12>>2]|0,c[(c[d>>2]|0)+16>>2]|0)|0;i=b;return a|0}function eh(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;d=b;c[d>>2]=a;e=Qd(c[(c[d>>2]|0)+4>>2]|0,1)|0;a=Qd(c[(c[d>>2]|0)+8>>2]|0,1)|0;a=qh(e,a,c[(c[d>>2]|0)+20>>2]|0,c[(c[d>>2]|0)+24>>2]|0,c[(c[d>>2]|0)+20>>2]|0,c[(c[d>>2]|0)+24>>2]|0)|0;i=b;return a|0}function fh(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;h=e+8|0;g=e+4|0;f=e;c[h>>2]=a;c[g>>2]=b;c[f>>2]=d;Nf(c[h>>2]|0,c[g>>2]|0,c[f>>2]|0,0);i=e;return}function gh(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;h=e+8|0;g=e+4|0;f=e;c[h>>2]=a;c[g>>2]=b;c[f>>2]=d;zf(c[h>>2]|0,c[g>>2]|0,c[f>>2]|0,1);i=e;return}function hh(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;h=e+8|0;g=e+4|0;f=e;c[h>>2]=a;c[g>>2]=b;c[f>>2]=d;b=c[h>>2]|0;Bd(b,pg(c[g>>2]|0,c[f>>2]|0)|0);b=c[h>>2]|0;Bd(b,qg(c[g>>2]|0,c[f>>2]|0)|0);i=e;return}function ih(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,jh()|0);i=b;return}function jh(){return zd(8,12880)|0}function kh(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;g=i;i=i+32|0;e=g+16|0;j=g+12|0;h=g+8|0;f=g;c[j>>2]=a;c[h>>2]=b;c[g+4>>2]=d;if(lh(c[j>>2]|0,c[h>>2]|0)|0){c[f>>2]=oh(64,12892,16)|0;fc((c[f>>2]|0)+8|0);c[e>>2]=c[f>>2];f=c[e>>2]|0;i=g;return f|0}else{c[e>>2]=0;f=c[e>>2]|0;i=g;return f|0}return 0}function lh(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;f=e+4|0;d=e;c[e+8>>2]=a;c[f>>2]=b;c[d>>2]=c[f>>2];if((c[c[(c[d>>2]|0)+8>>2]>>2]|0)!=2147483647)if(((c[c[(c[d>>2]|0)+4>>2]>>2]|0)==0?(c[c[(c[d>>2]|0)+8>>2]>>2]|0)!=2147483647:0)?(c[(c[d>>2]|0)+20>>2]|0)==(c[(c[d>>2]|0)+12>>2]|0):0)a=(Ld(c[(c[d>>2]|0)+8>>2]|0)|0)!=0;else a=0;else a=1;i=e;return a&1|0}function mh(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0;g=i;i=i+32|0;c[g+16>>2]=a;c[g+12>>2]=b;c[g+8>>2]=d;c[g+4>>2]=e;c[g>>2]=f;i=g;return}function nh(a,b){a=a|0;b=b|0;var d=0,e=0;d=i;i=i+16|0;e=d+4|0;c[d+8>>2]=a;c[e>>2]=b;eb[c[c[e>>2]>>2]&63](c[e>>2]|0,20492,d);i=d;return}function oh(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;f=i;i=i+16|0;j=f+12|0;h=f+8|0;g=f+4|0;e=f;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[e>>2]=oc(c[j>>2]|0,c[h>>2]|0)|0;c[(c[e>>2]|0)+56>>2]=c[g>>2];i=f;return c[e>>2]|0}function ph(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0;r=i;i=i+32|0;h=r+28|0;j=r+24|0;k=r+20|0;l=r+16|0;m=r+12|0;n=r+8|0;o=r+4|0;p=r;c[j>>2]=a;c[k>>2]=b;c[l>>2]=d;c[m>>2]=e;c[n>>2]=f;c[o>>2]=g;if((c[l>>2]|0)==(c[n>>2]|0)){g=c[l>>2]|0;c[n>>2]=g;c[l>>2]=g}if((c[m>>2]|0)==(c[o>>2]|0)){g=c[m>>2]|0;c[o>>2]=g;c[m>>2]=g}if(!((c[l>>2]|0)!=(c[n>>2]|0)?(c[m>>2]|0)!=(c[o>>2]|0):0))q=7;do if((q|0)==7){if(((c[l>>2]|0)==(c[n>>2]|0)?(c[m>>2]|0)==(c[o>>2]|0):0)?(_d(c[j>>2]|0,c[k>>2]|0)|0)!=0:0)break;c[h>>2]=rd()|0;g=c[h>>2]|0;i=r;return g|0}while(0);c[p>>2]=pd(28,12908)|0;g=Wd(c[j>>2]|0)|0;c[(c[p>>2]|0)+4>>2]=g;g=Xd(c[k>>2]|0)|0;c[(c[p>>2]|0)+8>>2]=g;c[(c[p>>2]|0)+12>>2]=c[l>>2];c[(c[p>>2]|0)+16>>2]=c[m>>2];c[(c[p>>2]|0)+20>>2]=c[n>>2];c[(c[p>>2]|0)+24>>2]=c[o>>2];c[h>>2]=c[p>>2];g=c[h>>2]|0;i=r;return g|0}function qh(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;j=i;i=i+32|0;k=j+24|0;l=j+20|0;p=j+16|0;o=j+12|0;n=j+8|0;m=j+4|0;h=j;c[k>>2]=a;c[l>>2]=b;c[p>>2]=d;c[o>>2]=e;c[n>>2]=f;c[m>>2]=g;c[h>>2]=ph(c[k>>2]|0,c[l>>2]|0,c[p>>2]|0,c[o>>2]|0,c[n>>2]|0,c[m>>2]|0)|0;ee(c[l>>2]|0,c[k>>2]|0);i=j;return c[h>>2]|0}function rh(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;f=d+4|0;e=d;c[g>>2]=a;c[f>>2]=b;c[e>>2]=c[g>>2];Xb(c[f>>2]|0,20529);Yb(c[f>>2]|0,(c[(c[e>>2]|0)+12>>2]|0)==(c[(c[e>>2]|0)+20>>2]|0)&1);Zb(c[f>>2]|0,((c[(c[e>>2]|0)+16>>2]|0)-(c[(c[e>>2]|0)+12>>2]|0)|0)/4|0);Zb(c[f>>2]|0,((c[(c[e>>2]|0)+24>>2]|0)-(c[(c[e>>2]|0)+20>>2]|0)|0)/4|0);b=c[f>>2]|0;Yb(b,vb(c[(c[e>>2]|0)+12>>2]|0)|0);b=c[f>>2]|0;Yb(b,vb(c[(c[e>>2]|0)+16>>2]|0)|0);b=c[f>>2]|0;Yb(b,vb(c[(c[e>>2]|0)+20>>2]|0)|0);b=c[f>>2]|0;Yb(b,vb(c[(c[e>>2]|0)+24>>2]|0)|0);je(c[f>>2]|0,c[(c[e>>2]|0)+4>>2]|0);je(c[f>>2]|0,c[(c[e>>2]|0)+8>>2]|0);i=d;return}function sh(a){a=a|0;var b=0,d=0,e=0,f=0;b=i;i=i+16|0;f=b+8|0;e=b+4|0;d=b;c[f>>2]=a;c[e>>2]=c[f>>2];c[d>>2]=Td(c[(c[e>>2]|0)+8>>2]|0,c[(c[e>>2]|0)+4>>2]|0)|0;$h(c[d>>2]|0,c[(c[e>>2]|0)+12>>2]|0,c[(c[e>>2]|0)+16>>2]|0);he(c[d>>2]|0);i=b;return}function th(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0;d=i;i=i+48|0;e=d;l=d+36|0;m=d+32|0;f=d+28|0;c[l>>2]=a;c[m>>2]=b;c[f>>2]=c[l>>2];b=c[c[m>>2]>>2]|0;a=c[m>>2]|0;m=(c[(c[f>>2]|0)+12>>2]|0)==(c[(c[f>>2]|0)+20>>2]|0)&1;l=vb(c[(c[f>>2]|0)+12>>2]|0)|0;k=vb(c[(c[f>>2]|0)+20>>2]|0)|0;j=((c[(c[f>>2]|0)+16>>2]|0)-(c[(c[f>>2]|0)+12>>2]|0)|0)/4|0;h=((c[(c[f>>2]|0)+24>>2]|0)-(c[(c[f>>2]|0)+20>>2]|0)|0)/4|0;g=c[(c[f>>2]|0)+4>>2]|0;f=c[(c[f>>2]|0)+8>>2]|0;c[e>>2]=m;c[e+4>>2]=l;c[e+8>>2]=k;c[e+12>>2]=j;c[e+16>>2]=h;c[e+20>>2]=g;c[e+24>>2]=f;eb[b&63](a,20502,e);i=d;return}function uh(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;d=b+4|0;e=b;c[d>>2]=a;c[e>>2]=c[d>>2];ee(c[(c[e>>2]|0)+8>>2]|0,c[(c[e>>2]|0)+4>>2]|0);xb(c[d>>2]|0);i=b;return}function vh(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,wh()|0);i=b;return}function wh(){var a=0,b=0;b=i;i=i+16|0;a=b;c[a>>2]=zd(8,12928)|0;i=b;return c[a>>2]|0}function xh(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;m=i;i=i+48|0;l=m+32|0;o=m+28|0;n=m+24|0;e=m+20|0;j=m+16|0;k=m+12|0;g=m+8|0;f=m+4|0;h=m;c[o>>2]=a;c[n>>2]=b;c[e>>2]=d;c[j>>2]=c[n>>2];if(!(yh(c[o>>2]|0,c[n>>2]|0,c[e>>2]|0)|0)){c[l>>2]=0;l=c[l>>2]|0;i=m;return l|0}c[g>>2]=c[(c[(c[j>>2]|0)+4>>2]|0)+4>>2];c[f>>2]=c[(c[(c[j>>2]|0)+4>>2]|0)+4+4>>2];c[h>>2]=c[(c[(c[j>>2]|0)+4>>2]|0)+4+8>>2];c[k>>2]=oh(104,12940,17)|0;d=(Ah(c[k>>2]|0,c[g>>2]|0,c[f>>2]|0,c[h>>2]|0,c[(c[j>>2]|0)+20>>2]|0,c[(c[j>>2]|0)+24>>2]|0,c[e>>2]|0)|0)!=0;e=c[k>>2]|0;if(d){c[l>>2]=e;l=c[l>>2]|0;i=m;return l|0}else{xb(e);c[l>>2]=0;l=c[l>>2]|0;i=m;return l|0}return 0}function yh(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;g=i;i=i+16|0;h=g+8|0;e=g+4|0;f=g;c[g+12>>2]=a;c[h>>2]=b;c[e>>2]=d;c[f>>2]=c[h>>2];if((c[c[(c[f>>2]|0)+4>>2]>>2]|0)!=1){b=0;b=b&1;i=g;return b|0}if(c[c[(c[f>>2]|0)+8>>2]>>2]|0){b=0;b=b&1;i=g;return b|0}if((c[(c[e>>2]|0)+164>>2]&8|0)!=0?(c[(c[(c[f>>2]|0)+4>>2]|0)+4>>2]|0)<=32:0){b=0;b=b&1;i=g;return b|0}if(!(gd(c[(c[(c[f>>2]|0)+4>>2]|0)+4>>2]|0)|0)){b=0;b=b&1;i=g;return b|0}if(!(c[(c[e>>2]|0)+164>>2]&8)){b=1;b=b&1;i=g;return b|0}b=(md((c[(c[(c[f>>2]|0)+4>>2]|0)+4>>2]|0)-1|0)|0)!=0;b=b&1;i=g;return b|0}function zh(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0;G=i;i=i+112|0;H=G+96|0;h=G+92|0;j=G+88|0;y=G+84|0;z=G+80|0;m=G+76|0;s=G+72|0;E=G+68|0;D=G+64|0;C=G+60|0;n=G+56|0;F=G+52|0;A=G+48|0;u=G+44|0;o=G+40|0;v=G+36|0;p=G+32|0;k=G+28|0;t=G+24|0;w=G+20|0;q=G+16|0;x=G+12|0;r=G+8|0;l=G+4|0;B=G;c[H>>2]=a;c[h>>2]=b;c[j>>2]=d;c[y>>2]=e;c[z>>2]=f;c[m>>2]=c[H>>2];g[u>>2]=+g[c[h>>2]>>2];g[o>>2]=+g[c[j>>2]>>2];c[F>>2]=c[(c[m>>2]|0)+76>>2];c[s>>2]=c[(c[m>>2]|0)+88>>2];c[E>>2]=c[(c[m>>2]|0)+92>>2];c[n>>2]=c[(c[m>>2]|0)+80>>2];c[A>>2]=wb((c[F>>2]|0)-1<<2<<1)|0;c[C>>2]=1;c[D>>2]=0;while(1){if((c[D>>2]|0)>=((c[F>>2]|0)-1|0))break;a=_(c[C>>2]|0,c[s>>2]|0)|0;g[v>>2]=+g[(c[h>>2]|0)+(a<<2)>>2];a=_(c[C>>2]|0,c[s>>2]|0)|0;g[p>>2]=+g[(c[j>>2]|0)+(a<<2)>>2];g[(c[A>>2]|0)+(c[D>>2]<<1<<2)>>2]=+g[v>>2];g[(c[A>>2]|0)+((c[D>>2]<<1)+1<<2)>>2]=+g[p>>2];c[D>>2]=(c[D>>2]|0)+1;a=c[C>>2]|0;d=c[n>>2]|0;if((c[C>>2]|0)<=(92681-(c[n>>2]|0)|0)){a=_(a,d)|0;a=(a|0)%(c[F>>2]|0)|0}else a=cd(a,d,c[F>>2]|0)|0;c[C>>2]=a}c[k>>2]=c[(c[m>>2]|0)+64>>2];Ya[c[(c[k>>2]|0)+56>>2]&63](c[(c[m>>2]|0)+64>>2]|0,c[A>>2]|0,(c[A>>2]|0)+4|0,(c[y>>2]|0)+(c[E>>2]<<2)|0,(c[z>>2]|0)+(c[E>>2]<<2)|0);g[c[y>>2]>>2]=+g[u>>2]+ +g[(c[y>>2]|0)+(c[E>>2]<<2)>>2];g[c[z>>2]>>2]=+g[o>>2]+ +g[(c[z>>2]|0)+(c[E>>2]<<2)>>2];c[t>>2]=c[(c[m>>2]|0)+72>>2];c[D>>2]=0;while(1){if((c[D>>2]|0)>=((c[F>>2]|0)-1|0))break;g[x>>2]=+g[(c[t>>2]|0)+(c[D>>2]<<1<<2)>>2];g[r>>2]=+g[(c[t>>2]|0)+((c[D>>2]<<1)+1<<2)>>2];v=_((c[D>>2]|0)+1|0,c[E>>2]|0)|0;g[w>>2]=+g[(c[y>>2]|0)+(v<<2)>>2];v=_((c[D>>2]|0)+1|0,c[E>>2]|0)|0;g[q>>2]=+g[(c[z>>2]|0)+(v<<2)>>2];v=_((c[D>>2]|0)+1|0,c[E>>2]|0)|0;g[(c[y>>2]|0)+(v<<2)>>2]=+g[x>>2]*+g[w>>2]-+g[r>>2]*+g[q>>2];v=_((c[D>>2]|0)+1|0,c[E>>2]|0)|0;g[(c[z>>2]|0)+(v<<2)>>2]=-(+g[x>>2]*+g[q>>2]+ +g[r>>2]*+g[w>>2]);c[D>>2]=(c[D>>2]|0)+1}x=(c[y>>2]|0)+(c[E>>2]<<2)|0;g[x>>2]=+g[x>>2]+ +g[u>>2];x=(c[z>>2]|0)+(c[E>>2]<<2)|0;g[x>>2]=+g[x>>2]-+g[o>>2];c[l>>2]=c[(c[m>>2]|0)+68>>2];Ya[c[(c[l>>2]|0)+56>>2]&63](c[(c[m>>2]|0)+68>>2]|0,(c[y>>2]|0)+(c[E>>2]<<2)|0,(c[z>>2]|0)+(c[E>>2]<<2)|0,c[A>>2]|0,(c[A>>2]|0)+4|0);c[B>>2]=c[(c[m>>2]|0)+84>>2];c[C>>2]=1;c[D>>2]=0;while(1){if((c[D>>2]|0)>=((c[F>>2]|0)-1|0))break;d=_(c[C>>2]|0,c[E>>2]|0)|0;g[(c[y>>2]|0)+(d<<2)>>2]=+g[(c[A>>2]|0)+(c[D>>2]<<1<<2)>>2];d=_(c[C>>2]|0,c[E>>2]|0)|0;g[(c[z>>2]|0)+(d<<2)>>2]=-+g[(c[A>>2]|0)+((c[D>>2]<<1)+1<<2)>>2];c[D>>2]=(c[D>>2]|0)+1;d=c[C>>2]|0;a=c[B>>2]|0;if((c[C>>2]|0)<=(92681-(c[B>>2]|0)|0)){d=_(d,a)|0;d=(d|0)%(c[F>>2]|0)|0}else d=cd(d,a,c[F>>2]|0)|0;c[C>>2]=d}xb(c[A>>2]|0);i=G;return}function Ah(a,b,d,e,f,g,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0;w=i;i=i+48|0;k=w+44|0;l=w+40|0;m=w+36|0;n=w+32|0;o=w+28|0;p=w+24|0;q=w+20|0;r=w+16|0;t=w+12|0;u=w+8|0;v=w+4|0;s=w;c[l>>2]=a;c[m>>2]=b;c[n>>2]=d;c[o>>2]=e;c[p>>2]=f;c[q>>2]=g;c[r>>2]=j;c[t>>2]=0;c[u>>2]=0;c[v>>2]=0;c[s>>2]=0;c[s>>2]=wb((c[m>>2]|0)-1<<2<<1)|0;b=c[r>>2]|0;d=Ed((c[m>>2]|0)-1|0,2,c[o>>2]|0)|0;g=Ed(1,0,0)|0;c[t>>2]=vc(b,qh(d,g,c[s>>2]|0,(c[s>>2]|0)+4|0,(c[p>>2]|0)+(c[o>>2]<<2)|0,(c[q>>2]|0)+(c[o>>2]<<2)|0)|0,8,0,0)|0;if(((c[t>>2]|0)!=0?(b=c[r>>2]|0,d=Ed((c[m>>2]|0)-1|0,c[o>>2]|0,2)|0,g=Ed(1,0,0)|0,c[u>>2]=vc(b,qh(d,g,(c[p>>2]|0)+(c[o>>2]<<2)|0,(c[q>>2]|0)+(c[o>>2]<<2)|0,c[s>>2]|0,(c[s>>2]|0)+4|0)|0,8,0,0)|0,(c[u>>2]|0)!=0):0)?(p=c[r>>2]|0,q=Ed((c[m>>2]|0)-1|0,2,2)|0,r=Ed(1,0,0)|0,c[v>>2]=vc(p,qh(q,r,c[s>>2]|0,(c[s>>2]|0)+4|0,c[s>>2]|0,(c[s>>2]|0)+4|0)|0,8,2,0)|0,(c[v>>2]|0)!=0):0){xb(c[s>>2]|0);c[s>>2]=0;c[(c[l>>2]|0)+64>>2]=c[t>>2];c[(c[l>>2]|0)+68>>2]=c[u>>2];c[(c[l>>2]|0)+96>>2]=c[v>>2];c[(c[l>>2]|0)+72>>2]=0;c[(c[l>>2]|0)+76>>2]=c[m>>2];c[(c[l>>2]|0)+88>>2]=c[n>>2];c[(c[l>>2]|0)+92>>2]=c[o>>2];jc((c[t>>2]|0)+8|0,(c[u>>2]|0)+8|0,(c[l>>2]|0)+8|0);t=(c[l>>2]|0)+8+24|0;h[t>>3]=+h[t>>3]+ +((((c[m>>2]|0)-1|0)*14|0)+6|0);t=(c[l>>2]|0)+8|0;h[t>>3]=+h[t>>3]+ +(((c[m>>2]|0)-1<<1)+4|0);t=(c[l>>2]|0)+8+8|0;h[t>>3]=+h[t>>3]+ +((c[m>>2]|0)-1<<2|0);c[k>>2]=1;t=c[k>>2]|0;i=w;return t|0}yb(c[s>>2]|0);pc(c[v>>2]|0);pc(c[u>>2]|0);pc(c[t>>2]|0);c[k>>2]=0;t=c[k>>2]|0;i=w;return t|0}function Bh(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;f=i;i=i+16|0;g=f+8|0;d=f+4|0;e=f;c[g>>2]=a;c[d>>2]=b;c[e>>2]=c[g>>2];rc(c[(c[e>>2]|0)+64>>2]|0,c[d>>2]|0);rc(c[(c[e>>2]|0)+68>>2]|0,c[d>>2]|0);rc(c[(c[e>>2]|0)+96>>2]|0,c[d>>2]|0);a=c[e>>2]|0;if(!(c[d>>2]|0)){Eh(c[a+72>>2]|0);c[(c[e>>2]|0)+72>>2]=0;i=f;return}else{b=ed(c[a+76>>2]|0)|0;c[(c[e>>2]|0)+80>>2]=b;b=dd(c[(c[e>>2]|0)+80>>2]|0,(c[(c[e>>2]|0)+76>>2]|0)-2|0,c[(c[e>>2]|0)+76>>2]|0)|0;c[(c[e>>2]|0)+84>>2]=b;d=Fh(c[d>>2]|0,c[(c[e>>2]|0)+96>>2]|0,c[(c[e>>2]|0)+76>>2]|0,c[(c[e>>2]|0)+84>>2]|0)|0;c[(c[e>>2]|0)+72>>2]=d;i=f;return}}function Ch(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0;h=i;i=i+48|0;g=h+24|0;f=h+16|0;j=h;m=h+36|0;d=h+32|0;e=h+28|0;c[m>>2]=a;c[d>>2]=b;c[e>>2]=c[m>>2];a=c[c[d>>2]>>2]|0;b=c[d>>2]|0;m=c[(c[e>>2]|0)+88>>2]|0;l=c[(c[e>>2]|0)+92>>2]|0;k=c[(c[e>>2]|0)+64>>2]|0;c[j>>2]=c[(c[e>>2]|0)+76>>2];c[j+4>>2]=m;c[j+8>>2]=l;c[j+12>>2]=k;eb[a&63](b,20533,j);if((c[(c[e>>2]|0)+68>>2]|0)!=(c[(c[e>>2]|0)+64>>2]|0)){k=c[c[d>>2]>>2]|0;l=c[d>>2]|0;c[f>>2]=c[(c[e>>2]|0)+68>>2];eb[k&63](l,23700,f)}if((c[(c[e>>2]|0)+96>>2]|0)==(c[(c[e>>2]|0)+64>>2]|0)){k=c[d>>2]|0;k=k+8|0;k=c[k>>2]|0;l=c[d>>2]|0;$a[k&127](l,41);i=h;return}if((c[(c[e>>2]|0)+96>>2]|0)==(c[(c[e>>2]|0)+68>>2]|0)){k=c[d>>2]|0;k=k+8|0;k=c[k>>2]|0;l=c[d>>2]|0;$a[k&127](l,41);i=h;return}l=c[c[d>>2]>>2]|0;k=c[d>>2]|0;c[g>>2]=c[(c[e>>2]|0)+96>>2];eb[l&63](k,23700,g);k=c[d>>2]|0;k=k+8|0;k=c[k>>2]|0;l=c[d>>2]|0;$a[k&127](l,41);i=h;return}function Dh(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+96>>2]|0);pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function Eh(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;yd(c[d>>2]|0,12956);i=b;return}function Fh(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0;u=i;i=i+64|0;j=u+60|0;f=u+56|0;k=u+52|0;l=u+48|0;m=u+44|0;q=u+40|0;p=u+36|0;o=u+32|0;n=u+28|0;r=u+16|0;s=u+24|0;t=u;c[f>>2]=a;c[k>>2]=b;c[l>>2]=d;c[m>>2]=e;c[q>>2]=c[k>>2];a=xd(c[l>>2]|0,c[l>>2]|0,c[m>>2]|0,c[3239]|0)|0;c[p>>2]=a;if(a){c[j>>2]=c[p>>2];n=c[j>>2]|0;i=u;return n|0}c[p>>2]=wb((c[l>>2]|0)-1<<2<<1)|0;h[r>>3]=+(c[l>>2]|0)-1.0;c[s>>2]=Ae(c[f>>2]|0,c[l>>2]|0)|0;c[o>>2]=0;c[n>>2]=1;while(1){e=c[s>>2]|0;if((c[o>>2]|0)>=((c[l>>2]|0)-1|0))break;eb[c[e+4>>2]&63](c[s>>2]|0,c[n>>2]|0,t);g[(c[p>>2]|0)+(c[o>>2]<<1<<2)>>2]=+h[t>>3]/+h[r>>3];g[(c[p>>2]|0)+((c[o>>2]<<1)+1<<2)>>2]=+h[t+8>>3]*-1.0/+h[r>>3];c[o>>2]=(c[o>>2]|0)+1;e=c[n>>2]|0;f=c[m>>2]|0;if((c[n>>2]|0)<=(92681-(c[m>>2]|0)|0)){e=_(e,f)|0;e=(e|0)%(c[l>>2]|0)|0}else e=cd(e,f,c[l>>2]|0)|0;c[n>>2]=e}Be(e);Ya[c[(c[q>>2]|0)+56>>2]&63](c[k>>2]|0,c[p>>2]|0,(c[p>>2]|0)+4|0,c[p>>2]|0,(c[p>>2]|0)+4|0);wd(c[l>>2]|0,c[l>>2]|0,c[m>>2]|0,c[p>>2]|0,12956);c[j>>2]=c[p>>2];n=c[j>>2]|0;i=u;return n|0}function Gh(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;b=e+8|0;d=e+4|0;c[b>>2]=a;c[e>>2]=3;c[d>>2]=0;while(1){if((c[d>>2]|0)>=3)break;a=c[b>>2]|0;Bd(a,Hh(c[12960+(c[d>>2]<<2)>>2]|0,12960,3)|0);c[d>>2]=(c[d>>2]|0)+1}i=e;return}function Hh(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;f=i;i=i+16|0;j=f+12|0;h=f+8|0;g=f+4|0;e=f;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[e>>2]=zd(20,12972)|0;c[(c[e>>2]|0)+8>>2]=c[j>>2];c[(c[e>>2]|0)+12>>2]=c[h>>2];c[(c[e>>2]|0)+16>>2]=c[g>>2];i=f;return c[e>>2]|0}function Ih(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0;s=i;i=i+64|0;e=s+52|0;t=s+48|0;f=s+44|0;g=s+40|0;k=s+36|0;l=s+32|0;m=s+28|0;h=s+24|0;j=s+20|0;o=s+16|0;p=s+12|0;r=s+8|0;q=s+4|0;n=s;c[t>>2]=a;c[f>>2]=b;c[g>>2]=d;c[k>>2]=c[t>>2];c[h>>2]=0;c[j>>2]=0;if(!(Jh(c[t>>2]|0,c[f>>2]|0,c[g>>2]|0,n)|0)){c[e>>2]=0;m=c[e>>2]|0;i=s;return m|0}c[l>>2]=c[f>>2];Yd(c[(c[l>>2]|0)+4>>2]|0,o,c[n>>2]|0,p);c[r>>2]=Qd(c[(c[l>>2]|0)+8>>2]|0,1)|0;c[q>>2]=Qd(c[p>>2]|0,1)|0;f=c[g>>2]|0;a=Pd(c[p>>2]|0)|0;b=Td(c[(c[l>>2]|0)+8>>2]|0,c[o>>2]|0)|0;c[h>>2]=uc(f,qh(a,b,c[(c[l>>2]|0)+12>>2]|0,c[(c[l>>2]|0)+16>>2]|0,c[(c[l>>2]|0)+20>>2]|0,c[(c[l>>2]|0)+24>>2]|0)|0)|0;if((c[h>>2]|0)!=0?(f=c[g>>2]|0,a=Qd(c[o>>2]|0,1)|0,b=Td(c[r>>2]|0,c[q>>2]|0)|0,c[j>>2]=uc(f,qh(a,b,c[(c[l>>2]|0)+20>>2]|0,c[(c[l>>2]|0)+24>>2]|0,c[(c[l>>2]|0)+20>>2]|0,c[(c[l>>2]|0)+24>>2]|0)|0)|0,(c[j>>2]|0)!=0):0){c[m>>2]=oh(80,12984,18)|0;c[(c[m>>2]|0)+64>>2]=c[h>>2];c[(c[m>>2]|0)+68>>2]=c[j>>2];c[(c[m>>2]|0)+72>>2]=c[k>>2];jc((c[h>>2]|0)+8|0,(c[j>>2]|0)+8|0,(c[m>>2]|0)+8|0);fe(c[o>>2]|0,c[p>>2]|0,c[r>>2]|0,c[q>>2]|0);c[e>>2]=c[m>>2];m=c[e>>2]|0;i=s;return m|0}pc(c[j>>2]|0);pc(c[h>>2]|0);fe(c[o>>2]|0,c[p>>2]|0,c[r>>2]|0,c[q>>2]|0);c[e>>2]=0;m=c[e>>2]|0;i=s;return m|0}function Jh(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0;k=i;i=i+32|0;f=k+24|0;n=k+20|0;m=k+16|0;g=k+12|0;l=k+8|0;h=k+4|0;j=k;c[n>>2]=a;c[m>>2]=b;c[g>>2]=d;c[l>>2]=e;c[h>>2]=c[n>>2];c[j>>2]=c[m>>2];if(!(Oh(c[n>>2]|0,c[m>>2]|0,c[l>>2]|0)|0)){c[f>>2]=0;l=c[f>>2]|0;i=k;return l|0}if((c[(c[g>>2]|0)+164>>2]&128|0)!=0?(c[(c[h>>2]|0)+8>>2]|0)!=(c[c[(c[h>>2]|0)+12>>2]>>2]|0):0){c[f>>2]=0;l=c[f>>2]|0;i=k;return l|0}if(((c[(c[g>>2]|0)+164>>2]&65536|0)!=0?(c[c[(c[j>>2]|0)+8>>2]>>2]|0)>0:0)?(l=Kd(c[(c[j>>2]|0)+8>>2]|0)|0,(l|0)>(Hd(c[(c[j>>2]|0)+4>>2]|0)|0)):0){c[f>>2]=0;l=c[f>>2]|0;i=k;return l|0}c[f>>2]=1;l=c[f>>2]|0;i=k;return l|0}function Kh(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;g=i;i=i+32|0;p=g+28|0;n=g+24|0;m=g+20|0;j=g+16|0;h=g+12|0;k=g+8|0;o=g+4|0;l=g;c[p>>2]=a;c[n>>2]=b;c[m>>2]=d;c[j>>2]=e;c[h>>2]=f;c[k>>2]=c[p>>2];c[o>>2]=c[(c[k>>2]|0)+64>>2];Ya[c[(c[o>>2]|0)+56>>2]&63](c[(c[k>>2]|0)+64>>2]|0,c[n>>2]|0,c[m>>2]|0,c[j>>2]|0,c[h>>2]|0);c[l>>2]=c[(c[k>>2]|0)+68>>2];Ya[c[(c[l>>2]|0)+56>>2]&63](c[(c[k>>2]|0)+68>>2]|0,c[j>>2]|0,c[h>>2]|0,c[j>>2]|0,c[h>>2]|0);i=g;return}function Lh(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+68>>2]|0,c[e>>2]|0);i=d;return}function Mh(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;j=d+24|0;g=d+20|0;f=d+16|0;h=d+12|0;c[j>>2]=a;c[g>>2]=b;c[f>>2]=c[j>>2];c[h>>2]=c[(c[f>>2]|0)+72>>2];b=c[c[g>>2]>>2]|0;a=c[g>>2]|0;g=c[(c[f>>2]|0)+64>>2]|0;f=c[(c[f>>2]|0)+68>>2]|0;c[e>>2]=c[(c[h>>2]|0)+8>>2];c[e+4>>2]=g;c[e+8>>2]=f;eb[b&63](a,20563,e);i=d;return}function Nh(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function Oh(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;h=i;i=i+32|0;j=h+16|0;k=h+12|0;e=h+8|0;g=h+4|0;f=h;c[j>>2]=a;c[k>>2]=b;c[e>>2]=d;c[g>>2]=c[k>>2];c[f>>2]=c[j>>2];if((c[c[(c[g>>2]|0)+4>>2]>>2]|0)==2147483647){g=0;g=g&1;i=h;return g|0}if((c[c[(c[g>>2]|0)+8>>2]>>2]|0)==2147483647){g=0;g=g&1;i=h;return g|0}if((c[c[(c[g>>2]|0)+4>>2]>>2]|0)<2){g=0;g=g&1;i=h;return g|0}g=(Ph(c[f>>2]|0,c[(c[g>>2]|0)+4>>2]|0,c[e>>2]|0)|0)!=0;g=g&1;i=h;return g|0}function Ph(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;h=i;i=i+16|0;e=h+12|0;j=h+8|0;f=h+4|0;g=h;c[j>>2]=a;c[f>>2]=b;c[g>>2]=d;if(!(mc(c[(c[j>>2]|0)+8>>2]|0,c[(c[j>>2]|0)+12>>2]|0,c[(c[j>>2]|0)+16>>2]|0,c[f>>2]|0,1,c[g>>2]|0)|0)){c[e>>2]=0;a=c[e>>2]|0;i=h;return a|0}a=c[g>>2]|0;c[a>>2]=(c[a>>2]|0)+1;if((c[c[g>>2]>>2]|0)>=(c[c[f>>2]>>2]|0)){c[e>>2]=0;a=c[e>>2]|0;i=h;return a|0}else{c[e>>2]=1;a=c[e>>2]|0;i=h;return a|0}return 0}function Qh(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;d=i;i=i+16|0;f=d+12|0;h=d+8|0;g=d+4|0;e=d;c[f>>2]=a;c[h>>2]=b;c[g>>2]=c[f>>2];c[e>>2]=c[h>>2];Ya[c[(c[g>>2]|0)+56>>2]&63](c[f>>2]|0,c[(c[e>>2]|0)+12>>2]|0,c[(c[e>>2]|0)+16>>2]|0,c[(c[e>>2]|0)+20>>2]|0,c[(c[e>>2]|0)+24>>2]|0);i=d;return}function Rh(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;b=e+8|0;d=e+4|0;c[b>>2]=a;c[e>>2]=2;c[d>>2]=0;while(1){if((c[d>>2]|0)>=2)break;a=c[b>>2]|0;Bd(a,Sh(c[13e3+(c[d>>2]<<2)>>2]|0,13e3,2)|0);c[d>>2]=(c[d>>2]|0)+1}i=e;return}function Sh(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;f=i;i=i+16|0;j=f+12|0;h=f+8|0;g=f+4|0;e=f;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[e>>2]=zd(20,13008)|0;c[(c[e>>2]|0)+8>>2]=c[j>>2];c[(c[e>>2]|0)+12>>2]=c[h>>2];c[(c[e>>2]|0)+16>>2]=c[g>>2];i=f;return c[e>>2]|0}function Th(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;p=i;i=i+48|0;e=p+36|0;q=p+32|0;f=p+28|0;g=p+24|0;l=p+20|0;m=p+16|0;n=p+12|0;j=p+8|0;o=p+4|0;k=p;c[q>>2]=a;c[f>>2]=b;c[g>>2]=d;c[l>>2]=c[q>>2];if(!(Uh(c[q>>2]|0,c[f>>2]|0,c[g>>2]|0,o)|0)){c[e>>2]=0;n=c[e>>2]|0;i=p;return n|0}c[m>>2]=c[f>>2];c[k>>2]=(c[(c[m>>2]|0)+8>>2]|0)+4+((c[o>>2]|0)*12|0);f=c[g>>2]|0;a=Pd(c[(c[m>>2]|0)+4>>2]|0)|0;b=Rd(c[(c[m>>2]|0)+8>>2]|0,c[o>>2]|0)|0;c[j>>2]=uc(f,qh(a,b,c[(c[m>>2]|0)+12>>2]|0,c[(c[m>>2]|0)+16>>2]|0,c[(c[m>>2]|0)+20>>2]|0,c[(c[m>>2]|0)+24>>2]|0)|0)|0;if(!(c[j>>2]|0)){c[e>>2]=0;n=c[e>>2]|0;i=p;return n|0}c[n>>2]=oh(88,13020,19)|0;c[(c[n>>2]|0)+64>>2]=c[j>>2];c[(c[n>>2]|0)+68>>2]=c[c[k>>2]>>2];c[(c[n>>2]|0)+72>>2]=c[(c[k>>2]|0)+4>>2];c[(c[n>>2]|0)+76>>2]=c[(c[k>>2]|0)+8>>2];c[(c[n>>2]|0)+80>>2]=c[l>>2];fc((c[n>>2]|0)+8|0);h[(c[n>>2]|0)+8+24>>3]=3.14159;lc(c[(c[n>>2]|0)+68>>2]|0,(c[j>>2]|0)+8|0,(c[n>>2]|0)+8|0);if(!((c[c[(c[m>>2]|0)+4>>2]>>2]|0)==1?(c[(c[(c[m>>2]|0)+4>>2]|0)+4>>2]|0)<=64:0))h[(c[n>>2]|0)+40>>3]=+(c[(c[n>>2]|0)+68>>2]|0)*+h[(c[j>>2]|0)+40>>3];c[e>>2]=c[n>>2];n=c[e>>2]|0;i=p;return n|0}function Uh(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;n=i;i=i+32|0;f=n+28|0;o=n+24|0;g=n+20|0;h=n+16|0;j=n+12|0;l=n+8|0;m=n+4|0;k=n;c[o>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[l>>2]=c[o>>2];if(!(Zh(c[o>>2]|0,c[g>>2]|0,c[j>>2]|0)|0)){c[f>>2]=0;b=c[f>>2]|0;i=n;return b|0}if((c[(c[h>>2]|0)+164>>2]&256|0)!=0?(c[(c[l>>2]|0)+8>>2]|0)!=(c[c[(c[l>>2]|0)+12>>2]>>2]|0):0){c[f>>2]=0;b=c[f>>2]|0;i=n;return b|0}c[m>>2]=c[g>>2];if(c[(c[h>>2]|0)+164>>2]&65536){c[k>>2]=(c[(c[m>>2]|0)+8>>2]|0)+4+((c[c[j>>2]>>2]|0)*12|0);if((c[c[(c[m>>2]|0)+4>>2]>>2]|0)>1?(b=Tb(c[(c[k>>2]|0)+4>>2]|0)|0,b=ec(b,Tb(c[(c[k>>2]|0)+8>>2]|0)|0)|0,(b|0)<(Hd(c[(c[m>>2]|0)+4>>2]|0)|0)):0){c[f>>2]=0;b=c[f>>2]|0;i=n;return b|0}if((c[(c[h>>2]|0)+164>>2]&512|0)!=0?(c[(c[h>>2]|0)+160>>2]|0)>1:0){c[f>>2]=0;b=c[f>>2]|0;i=n;return b|0}}c[f>>2]=1;b=c[f>>2]|0;i=n;return b|0}function Vh(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0;r=i;i=i+48|0;s=r+40|0;g=r+36|0;h=r+32|0;j=r+28|0;k=r+24|0;m=r+20|0;n=r+16|0;q=r+12|0;o=r+8|0;p=r+4|0;l=r;c[s>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[k>>2]=f;c[m>>2]=c[s>>2];c[q>>2]=c[(c[m>>2]|0)+68>>2];c[o>>2]=c[(c[m>>2]|0)+72>>2];c[p>>2]=c[(c[m>>2]|0)+76>>2];c[l>>2]=c[(c[(c[m>>2]|0)+64>>2]|0)+56>>2];c[n>>2]=0;while(1){if((c[n>>2]|0)>=(c[q>>2]|0))break;b=(c[g>>2]|0)+((_(c[n>>2]|0,c[o>>2]|0)|0)<<2)|0;a=(c[h>>2]|0)+((_(c[n>>2]|0,c[o>>2]|0)|0)<<2)|0;d=(c[j>>2]|0)+((_(c[n>>2]|0,c[p>>2]|0)|0)<<2)|0;f=(c[k>>2]|0)+((_(c[n>>2]|0,c[p>>2]|0)|0)<<2)|0;Ya[c[l>>2]&63](c[(c[m>>2]|0)+64>>2]|0,b,a,d,f);c[n>>2]=(c[n>>2]|0)+1}i=r;return}function Wh(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);i=d;return}function Xh(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;j=d+24|0;f=d+20|0;h=d+16|0;g=d+12|0;c[j>>2]=a;c[f>>2]=b;c[h>>2]=c[j>>2];c[g>>2]=c[(c[h>>2]|0)+80>>2];b=c[c[f>>2]>>2]|0;a=c[f>>2]|0;g=c[(c[g>>2]|0)+8>>2]|0;f=c[(c[h>>2]|0)+64>>2]|0;c[e>>2]=c[(c[h>>2]|0)+68>>2];c[e+4>>2]=g;c[e+8>>2]=f;eb[b&63](a,20592,e);i=d;return}function Yh(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function Zh(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;h=i;i=i+32|0;k=h+16|0;j=h+12|0;e=h+8|0;f=h+4|0;g=h;c[k>>2]=a;c[j>>2]=b;c[e>>2]=d;c[f>>2]=c[k>>2];c[g>>2]=c[j>>2];if((c[c[(c[g>>2]|0)+8>>2]>>2]|0)==2147483647){a=0;a=a&1;i=h;return a|0}if((c[c[(c[g>>2]|0)+8>>2]>>2]|0)<=0){a=0;a=a&1;i=h;return a|0}if((c[c[(c[g>>2]|0)+4>>2]>>2]|0)<=0){a=0;a=a&1;i=h;return a|0}a=(_h(c[f>>2]|0,c[(c[g>>2]|0)+8>>2]|0,(c[(c[g>>2]|0)+12>>2]|0)!=(c[(c[g>>2]|0)+20>>2]|0)&1,c[e>>2]|0)|0)!=0;a=a&1;i=h;return a|0}function _h(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;f=i;i=i+16|0;k=f+12|0;j=f+8|0;h=f+4|0;g=f;c[k>>2]=a;c[j>>2]=b;c[h>>2]=d;c[g>>2]=e;e=mc(c[(c[k>>2]|0)+8>>2]|0,c[(c[k>>2]|0)+12>>2]|0,c[(c[k>>2]|0)+16>>2]|0,c[j>>2]|0,c[h>>2]|0,c[g>>2]|0)|0;i=f;return e|0}function $h(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;h=e+8|0;g=e+4|0;f=e;c[h>>2]=a;c[g>>2]=b;c[f>>2]=d;ai((c[h>>2]|0)+4|0,c[c[h>>2]>>2]|0,c[g>>2]|0,c[f>>2]|0);i=e;return}function ai(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;o=i;i=i+32|0;f=o+24|0;h=o+20|0;j=o+16|0;k=o+12|0;l=o+8|0;n=o+4|0;m=o;c[f>>2]=a;c[h>>2]=b;c[j>>2]=d;c[k>>2]=e;if((c[h>>2]|0)==2147483647){i=o;return}if(!(c[h>>2]|0)){g[c[k>>2]>>2]=0.0;g[c[j>>2]>>2]=0.0;i=o;return}if((c[h>>2]|0)<=0){i=o;return}c[n>>2]=c[c[f>>2]>>2];c[m>>2]=c[(c[f>>2]|0)+4>>2];d=(c[h>>2]|0)==1;c[l>>2]=0;if(d){while(1){if((c[l>>2]|0)>=(c[n>>2]|0))break;d=_(c[l>>2]|0,c[m>>2]|0)|0;g[(c[k>>2]|0)+(d<<2)>>2]=0.0;d=_(c[l>>2]|0,c[m>>2]|0)|0;g[(c[j>>2]|0)+(d<<2)>>2]=0.0;c[l>>2]=(c[l>>2]|0)+1}i=o;return}else{while(1){if((c[l>>2]|0)>=(c[n>>2]|0))break;d=(c[j>>2]|0)+((_(c[l>>2]|0,c[m>>2]|0)|0)<<2)|0;ai((c[f>>2]|0)+12|0,(c[h>>2]|0)-1|0,d,(c[k>>2]|0)+((_(c[l>>2]|0,c[m>>2]|0)|0)<<2)|0);c[l>>2]=(c[l>>2]|0)+1}i=o;return}}function bi(a,b,d,e,f,g,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0;r=i;i=i+48|0;n=r+40|0;s=r+20|0;o=r+16|0;p=r+8|0;q=r+4|0;c[n>>2]=a;c[r+36>>2]=b;c[r+32>>2]=d;c[r+28>>2]=e;c[r+24>>2]=f;c[s>>2]=g;c[o>>2]=h;c[r+12>>2]=j;c[p>>2]=k;c[q>>2]=l;c[r>>2]=m;if((c[(c[n>>2]|0)+44>>2]|0)!=0?(c[(c[n>>2]|0)+44>>2]|0)!=(c[s>>2]|0):0){h=0;h=h&1;i=r;return h|0}if((c[(c[n>>2]|0)+48>>2]|0)!=0?(c[(c[n>>2]|0)+48>>2]|0)!=(c[o>>2]|0):0){h=0;h=h&1;i=r;return h|0}if((c[(c[n>>2]|0)+52>>2]|0)!=0?(c[(c[n>>2]|0)+52>>2]|0)!=(c[p>>2]|0):0){h=0;h=h&1;i=r;return h|0}if(!(c[(c[n>>2]|0)+56>>2]|0)){h=1;h=h&1;i=r;return h|0}h=(c[(c[n>>2]|0)+56>>2]|0)==(c[q>>2]|0);h=h&1;i=r;return h|0}function ci(a,b,d,e,f,g,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0;p=i;i=i+48|0;m=p+36|0;q=p+24|0;n=p+20|0;o=p+4|0;c[m>>2]=a;c[p+32>>2]=b;c[p+28>>2]=d;c[q>>2]=e;c[n>>2]=f;c[p+16>>2]=g;c[p+12>>2]=h;c[p+8>>2]=j;c[o>>2]=k;c[p>>2]=l;if((c[(c[m>>2]|0)+48>>2]|0)!=0?(c[(c[m>>2]|0)+48>>2]|0)!=(c[q>>2]|0):0){l=0;l=l&1;i=p;return l|0}if((c[(c[m>>2]|0)+52>>2]|0)!=0?(c[(c[m>>2]|0)+52>>2]|0)!=(c[n>>2]|0):0){l=0;l=l&1;i=p;return l|0}if(!(c[(c[m>>2]|0)+56>>2]|0)){l=1;l=l&1;i=p;return l|0}l=(c[(c[m>>2]|0)+56>>2]|0)==(c[o>>2]|0);l=l&1;i=p;return l|0}function di(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,1,8);i=b;return}function ei(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0;Za=i;i=i+400|0;m=Za+388|0;n=Za+384|0;o=Za+380|0;p=Za+376|0;q=Za+372|0;r=Za+368|0;_a=Za+364|0;s=Za+360|0;t=Za+356|0;Ya=Za+336|0;T=Za+332|0;Ha=Za+328|0;oa=Za+324|0;J=Za+320|0;sa=Za+316|0;ta=Za+312|0;H=Za+308|0;G=Za+304|0;Ka=Za+300|0;Na=Za+296|0;Oa=Za+292|0;ya=Za+288|0;Fa=Za+284|0;Ga=Za+280|0;_=Za+276|0;fa=Za+272|0;C=Za+268|0;z=Za+264|0;K=Za+260|0;L=Za+256|0;M=Za+252|0;ja=Za+248|0;ka=Za+244|0;pa=Za+240|0;u=Za+236|0;S=Za+232|0;ma=Za+228|0;na=Za+224|0;W=Za+220|0;Ia=Za+216|0;Ea=Za+212|0;Ma=Za+208|0;Z=Za+204|0;Ja=Za+200|0;Ba=Za+196|0;La=Za+192|0;U=Za+188|0;V=Za+184|0;Ca=Za+180|0;Da=Za+176|0;X=Za+172|0;Y=Za+168|0;za=Za+164|0;Aa=Za+160|0;Ua=Za+156|0;A=Za+152|0;ea=Za+148|0;y=Za+144|0;Xa=Za+140|0;B=Za+136|0;ba=Za+132|0;x=Za+128|0;Sa=Za+124|0;Ta=Za+120|0;ca=Za+116|0;da=Za+112|0;Va=Za+108|0;Wa=Za+104|0;$=Za+100|0;aa=Za+96|0;ga=Za+92|0;ia=Za+88|0;Ra=Za+84|0;ha=Za+80|0;Pa=Za+76|0;Qa=Za+72|0;ua=Za+68|0;wa=Za+64|0;ra=Za+60|0;va=Za+56|0;la=Za+52|0;qa=Za+48|0;D=Za+44|0;F=Za+40|0;w=Za+36|0;E=Za+32|0;xa=Za+28|0;v=Za+24|0;I=Za+20|0;Q=Za+16|0;P=Za+12|0;R=Za+8|0;N=Za+4|0;O=Za;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[_a>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Za+352>>2]=.25;g[Za+348>>2]=.55901700258255;g[Za+344>>2]=.5877852439880371;g[Za+340>>2]=.9510565400123596;c[Ya>>2]=c[_a>>2];while(1){if((c[Ya>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[S>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[T>>2]=+g[u>>2]-+g[S>>2];g[Ha>>2]=+g[u>>2]+ +g[S>>2];g[ma>>2]=+g[c[n>>2]>>2];g[na>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[oa>>2]=+g[ma>>2]-+g[na>>2];g[J>>2]=+g[ma>>2]+ +g[na>>2];g[U>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[V>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[W>>2]=+g[U>>2]-+g[V>>2];g[Ia>>2]=+g[U>>2]+ +g[V>>2];g[Ca>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[Da>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[Ea>>2]=+g[Ca>>2]-+g[Da>>2];g[Ma>>2]=+g[Ca>>2]+ +g[Da>>2];g[X>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<3<<2)>>2];g[Y>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[Z>>2]=+g[X>>2]-+g[Y>>2];g[Ja>>2]=+g[X>>2]+ +g[Y>>2];g[za>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2<<2)>>2];g[Aa>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[Ba>>2]=+g[za>>2]-+g[Aa>>2];g[La>>2]=+g[za>>2]+ +g[Aa>>2];g[sa>>2]=+g[W>>2]-+g[Z>>2];g[ta>>2]=+g[Ba>>2]-+g[Ea>>2];g[H>>2]=+g[Ia>>2]-+g[Ja>>2];g[G>>2]=+g[La>>2]-+g[Ma>>2];g[Ka>>2]=+g[Ia>>2]+ +g[Ja>>2];g[Na>>2]=+g[La>>2]+ +g[Ma>>2];g[Oa>>2]=+g[Ka>>2]+ +g[Na>>2];g[ya>>2]=+g[W>>2]+ +g[Z>>2];g[Fa>>2]=+g[Ba>>2]+ +g[Ea>>2];g[Ga>>2]=+g[ya>>2]+ +g[Fa>>2];g[Sa>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[Ta>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[Ua>>2]=+g[Sa>>2]-+g[Ta>>2];g[A>>2]=+g[Sa>>2]+ +g[Ta>>2];g[ca>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[da>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[ea>>2]=+g[ca>>2]-+g[da>>2];g[y>>2]=+g[ca>>2]+ +g[da>>2];g[Va>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<3<<2)>>2];g[Wa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[Xa>>2]=+g[Va>>2]-+g[Wa>>2];g[B>>2]=+g[Va>>2]+ +g[Wa>>2];g[$>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2<<2)>>2];g[aa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[ba>>2]=+g[$>>2]-+g[aa>>2];g[x>>2]=+g[$>>2]+ +g[aa>>2];g[_>>2]=+g[Ua>>2]-+g[Xa>>2];g[fa>>2]=+g[ba>>2]-+g[ea>>2];g[C>>2]=+g[A>>2]-+g[B>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[K>>2]=+g[A>>2]+ +g[B>>2];g[L>>2]=+g[x>>2]+ +g[y>>2];g[M>>2]=+g[K>>2]+ +g[L>>2];g[ja>>2]=+g[Ua>>2]+ +g[Xa>>2];g[ka>>2]=+g[ba>>2]+ +g[ea>>2];g[pa>>2]=+g[ja>>2]+ +g[ka>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[T>>2]+ +g[Ga>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[oa>>2]+ +g[pa>>2];g[c[o>>2]>>2]=+g[Ha>>2]+ +g[Oa>>2];g[c[p>>2]>>2]=+g[J>>2]+ +g[M>>2];g[ga>>2]=+g[_>>2]*.9510565400123596+ +g[fa>>2]*.5877852439880371;g[ia>>2]=+g[fa>>2]*.9510565400123596-+g[_>>2]*.5877852439880371;g[Pa>>2]=(+g[ya>>2]-+g[Fa>>2])*.55901700258255;g[Qa>>2]=+g[T>>2]-+g[Ga>>2]*.25;g[Ra>>2]=+g[Pa>>2]+ +g[Qa>>2];g[ha>>2]=+g[Qa>>2]-+g[Pa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Ra>>2]-+g[ga>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ha>>2]+ +g[ia>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Ra>>2]+ +g[ga>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ha>>2]-+g[ia>>2];g[ua>>2]=+g[sa>>2]*.9510565400123596+ +g[ta>>2]*.5877852439880371;g[wa>>2]=+g[ta>>2]*.9510565400123596-+g[sa>>2]*.5877852439880371;g[la>>2]=(+g[ja>>2]-+g[ka>>2])*.55901700258255;g[qa>>2]=+g[oa>>2]-+g[pa>>2]*.25;g[ra>>2]=+g[la>>2]+ +g[qa>>2];g[va>>2]=+g[qa>>2]-+g[la>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[ra>>2]-+g[ua>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[wa>>2]+ +g[va>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[ua>>2]+ +g[ra>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[va>>2]-+g[wa>>2];g[D>>2]=+g[z>>2]*.9510565400123596-+g[C>>2]*.5877852439880371;g[F>>2]=+g[C>>2]*.9510565400123596+ +g[z>>2]*.5877852439880371;g[xa>>2]=+g[Ha>>2]-+g[Oa>>2]*.25;g[v>>2]=(+g[Ka>>2]-+g[Na>>2])*.55901700258255;g[w>>2]=+g[xa>>2]-+g[v>>2];g[E>>2]=+g[v>>2]+ +g[xa>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[w>>2]-+g[D>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[E>>2]+ +g[F>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[w>>2]+ +g[D>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[E>>2]-+g[F>>2];g[I>>2]=+g[G>>2]*.9510565400123596-+g[H>>2]*.5877852439880371;g[Q>>2]=+g[H>>2]*.9510565400123596+ +g[G>>2]*.5877852439880371;g[N>>2]=+g[J>>2]-+g[M>>2]*.25;g[O>>2]=(+g[K>>2]-+g[L>>2])*.55901700258255;g[P>>2]=+g[N>>2]-+g[O>>2];g[R>>2]=+g[O>>2]+ +g[N>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[I>>2]+ +g[P>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[R>>2]-+g[Q>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[P>>2]-+g[I>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Q>>2]+ +g[R>>2];c[Ya>>2]=(c[Ya>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=c[q>>2]^c[2998];c[r>>2]=c[r>>2]^c[2998]}i=Za;return}function fi(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,2,72);i=b;return}function gi(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0;Da=i;i=i+336|0;m=Da+324|0;n=Da+320|0;o=Da+316|0;p=Da+312|0;q=Da+308|0;r=Da+304|0;Ea=Da+300|0;s=Da+296|0;t=Da+292|0;Ca=Da+248|0;u=Da+244|0;Q=Da+240|0;y=Da+236|0;K=Da+232|0;ma=Da+228|0;V=Da+224|0;ya=Da+220|0;R=Da+216|0;B=Da+212|0;O=Da+208|0;ca=Da+204|0;L=Da+200|0;pa=Da+196|0;U=Da+192|0;fa=Da+188|0;N=Da+184|0;sa=Da+180|0;S=Da+176|0;va=Da+172|0;T=Da+168|0;ia=Da+164|0;M=Da+160|0;w=Da+156|0;x=Da+152|0;ka=Da+148|0;la=Da+144|0;wa=Da+140|0;xa=Da+136|0;z=Da+132|0;A=Da+128|0;C=Da+124|0;D=Da+120|0;qa=Da+116|0;ra=Da+112|0;na=Da+108|0;oa=Da+104|0;da=Da+100|0;ea=Da+96|0;ta=Da+92|0;ua=Da+88|0;ga=Da+84|0;ha=Da+80|0;za=Da+76|0;ja=Da+72|0;ba=Da+68|0;v=Da+64|0;$=Da+60|0;aa=Da+56|0;Ba=Da+52|0;Aa=Da+48|0;F=Da+44|0;E=Da+40|0;X=Da+36|0;Y=Da+32|0;Z=Da+28|0;_=Da+24|0;H=Da+20|0;G=Da+16|0;P=Da+12|0;W=Da+8|0;J=Da+4|0;I=Da;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Ea>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Da+288>>2]=.6548607349395752;g[Da+284>>2]=.1423148363828659;g[Da+280>>2]=.9594929814338684;g[Da+276>>2]=.4154150187969208;g[Da+272>>2]=.8412535190582275;g[Da+268>>2]=.9898214340209961;g[Da+264>>2]=.9096319675445557;g[Da+260>>2]=.28173255920410156;g[Da+256>>2]=.5406408309936523;g[Da+252>>2]=.7557495832443237;c[Ca>>2]=c[Ea>>2];while(1){if((c[Ca>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[Q>>2]=+g[c[n>>2]>>2];g[w>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[x>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[y>>2]=+g[w>>2]+ +g[x>>2];g[K>>2]=+g[x>>2]-+g[w>>2];g[ka>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[la>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[ma>>2]=+g[ka>>2]-+g[la>>2];g[V>>2]=+g[ka>>2]+ +g[la>>2];g[wa>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[xa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[ya>>2]=+g[wa>>2]-+g[xa>>2];g[R>>2]=+g[wa>>2]+ +g[xa>>2];g[z>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[A>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[O>>2]=+g[A>>2]-+g[z>>2];g[C>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[D>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<3<<2)>>2];g[ca>>2]=+g[C>>2]+ +g[D>>2];g[L>>2]=+g[D>>2]-+g[C>>2];g[na>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[oa>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<3<<2)>>2];g[pa>>2]=+g[na>>2]-+g[oa>>2];g[U>>2]=+g[na>>2]+ +g[oa>>2];g[da>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2<<2)>>2];g[ea>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[fa>>2]=+g[da>>2]+ +g[ea>>2];g[N>>2]=+g[ea>>2]-+g[da>>2];g[qa>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2<<2)>>2];g[ra>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[sa>>2]=+g[qa>>2]-+g[ra>>2];g[S>>2]=+g[qa>>2]+ +g[ra>>2];g[ta>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[ua>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[va>>2]=+g[ta>>2]-+g[ua>>2];g[T>>2]=+g[ta>>2]+ +g[ua>>2];g[ga>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[ha>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[ia>>2]=+g[ga>>2]+ +g[ha>>2];g[M>>2]=+g[ha>>2]-+g[ga>>2];g[c[o>>2]>>2]=+g[u>>2]+ +g[y>>2]+ +g[B>>2]+ +g[ca>>2]+ +g[fa>>2]+ +g[ia>>2];g[c[p>>2]>>2]=+g[Q>>2]+ +g[V>>2]+ +g[R>>2]+ +g[U>>2]+ +g[S>>2]+ +g[T>>2];g[za>>2]=+g[ma>>2]*.7557495832443237+ +g[pa>>2]*.5406408309936523+(+g[sa>>2]*.28173255920410156-+g[va>>2]*.9096319675445557)-+g[ya>>2]*.9898214340209961;g[ja>>2]=+g[ca>>2]*.8412535190582275+ +g[u>>2]+(+g[ia>>2]*.4154150187969208-+g[fa>>2]*.9594929814338684)+-(+g[B>>2]*.1423148363828659+ +g[y>>2]*.6548607349395752);g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ja>>2]-+g[za>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[ja>>2]+ +g[za>>2];g[ba>>2]=+g[K>>2]*.7557495832443237+ +g[L>>2]*.5406408309936523+(+g[N>>2]*.28173255920410156-+g[M>>2]*.9096319675445557)-+g[O>>2]*.9898214340209961;g[v>>2]=+g[U>>2]*.8412535190582275+ +g[Q>>2]+(+g[T>>2]*.4154150187969208-+g[S>>2]*.9594929814338684)+-(+g[R>>2]*.1423148363828659+ +g[V>>2]*.6548607349395752);g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[ba>>2]+ +g[v>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[v>>2]-+g[ba>>2];g[$>>2]=+g[K>>2]*.9096319675445557+ +g[O>>2]*.7557495832443237+-(+g[M>>2]*.5406408309936523+ +g[N>>2]*.9898214340209961)-+g[L>>2]*.28173255920410156;g[aa>>2]=+g[V>>2]*.4154150187969208+ +g[Q>>2]+(+g[T>>2]*.8412535190582275-+g[S>>2]*.1423148363828659)+-(+g[U>>2]*.9594929814338684+ +g[R>>2]*.6548607349395752);g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[$>>2]+ +g[aa>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[aa>>2]-+g[$>>2];g[Ba>>2]=+g[ma>>2]*.9096319675445557+ +g[ya>>2]*.7557495832443237+-(+g[va>>2]*.5406408309936523+ +g[sa>>2]*.9898214340209961)-+g[pa>>2]*.28173255920410156;g[Aa>>2]=+g[y>>2]*.4154150187969208+ +g[u>>2]+(+g[ia>>2]*.8412535190582275-+g[fa>>2]*.1423148363828659)+-(+g[ca>>2]*.9594929814338684+ +g[B>>2]*.6548607349395752);g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Aa>>2]-+g[Ba>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Aa>>2]+ +g[Ba>>2];g[F>>2]=+g[ma>>2]*.5406408309936523+ +g[ya>>2]*.9096319675445557+(+g[pa>>2]*.9898214340209961+ +g[sa>>2]*.7557495832443237)+ +g[va>>2]*.28173255920410156;g[E>>2]=+g[y>>2]*.8412535190582275+ +g[u>>2]+(+g[B>>2]*.4154150187969208-+g[ia>>2]*.9594929814338684)+-(+g[fa>>2]*.6548607349395752+ +g[ca>>2]*.1423148363828659);g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[E>>2]-+g[F>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[E>>2]+ +g[F>>2];g[Z>>2]=+g[K>>2]*.5406408309936523+ +g[O>>2]*.9096319675445557+(+g[L>>2]*.9898214340209961+ +g[N>>2]*.7557495832443237)+ +g[M>>2]*.28173255920410156;g[_>>2]=+g[V>>2]*.8412535190582275+ +g[Q>>2]+(+g[R>>2]*.4154150187969208-+g[T>>2]*.9594929814338684)+-(+g[S>>2]*.6548607349395752+ +g[U>>2]*.1423148363828659);g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Z>>2]+ +g[_>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[_>>2]-+g[Z>>2];g[H>>2]=+g[ma>>2]*.9898214340209961+ +g[sa>>2]*.5406408309936523+(+g[va>>2]*.7557495832443237-+g[pa>>2]*.9096319675445557)-+g[ya>>2]*.28173255920410156;g[G>>2]=+g[ca>>2]*.4154150187969208+ +g[u>>2]+(+g[fa>>2]*.8412535190582275-+g[ia>>2]*.6548607349395752)+-(+g[B>>2]*.9594929814338684+ +g[y>>2]*.1423148363828659);g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[G>>2]-+g[H>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[G>>2]+ +g[H>>2];g[X>>2]=+g[K>>2]*.9898214340209961+ +g[N>>2]*.5406408309936523+(+g[M>>2]*.7557495832443237-+g[L>>2]*.9096319675445557)-+g[O>>2]*.28173255920410156;g[Y>>2]=+g[U>>2]*.4154150187969208+ +g[Q>>2]+(+g[S>>2]*.8412535190582275-+g[T>>2]*.6548607349395752)+-(+g[R>>2]*.9594929814338684+ +g[V>>2]*.1423148363828659);g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[X>>2]+ +g[Y>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Y>>2]-+g[X>>2];g[P>>2]=+g[K>>2]*.28173255920410156+ +g[L>>2]*.7557495832443237+(+g[M>>2]*.9898214340209961-+g[N>>2]*.9096319675445557)-+g[O>>2]*.5406408309936523;g[W>>2]=+g[R>>2]*.8412535190582275+ +g[Q>>2]+(+g[S>>2]*.4154150187969208-+g[T>>2]*.1423148363828659)+-(+g[U>>2]*.6548607349395752+ +g[V>>2]*.9594929814338684);g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[P>>2]+ +g[W>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[W>>2]-+g[P>>2];g[J>>2]=+g[ma>>2]*.28173255920410156+ +g[pa>>2]*.7557495832443237+(+g[va>>2]*.9898214340209961-+g[sa>>2]*.9096319675445557)-+g[ya>>2]*.5406408309936523;g[I>>2]=+g[B>>2]*.8412535190582275+ +g[u>>2]+(+g[fa>>2]*.4154150187969208-+g[ia>>2]*.1423148363828659)+-(+g[ca>>2]*.6548607349395752+ +g[y>>2]*.9594929814338684);g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[I>>2]-+g[J>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[I>>2]+ +g[J>>2];c[Ca>>2]=(c[Ca>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=c[q>>2]^c[2998];c[r>>2]=c[r>>2]^c[2998]}i=Da;return}function hi(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,3,136);i=b;return}function ii(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0;jb=i;i=i+432|0;m=jb+428|0;n=jb+424|0;o=jb+420|0;p=jb+416|0;q=jb+412|0;r=jb+408|0;kb=jb+404|0;s=jb+400|0;t=jb+396|0;ib=jb+384|0;fa=jb+380|0;Ba=jb+376|0;ka=jb+372|0;ab=jb+368|0;Ca=jb+364|0;hb=jb+360|0;Ka=jb+356|0;Ea=jb+352|0;na=jb+348|0;fb=jb+344|0;Fa=jb+340|0;ma=jb+336|0;Qa=jb+332|0;F=jb+328|0;qa=jb+324|0;ta=jb+320|0;Z=jb+316|0;I=jb+312|0;Va=jb+308|0;K=jb+304|0;va=jb+300|0;ya=jb+296|0;_=jb+292|0;N=jb+288|0;u=jb+284|0;ca=jb+280|0;da=jb+276|0;ea=jb+272|0;Ya=jb+268|0;Za=jb+264|0;_a=jb+260|0;$a=jb+256|0;ga=jb+252|0;ha=jb+248|0;ia=jb+244|0;ja=jb+240|0;bb=jb+236|0;cb=jb+232|0;db=jb+228|0;eb=jb+224|0;Ma=jb+220|0;Na=jb+216|0;Oa=jb+212|0;Pa=jb+208|0;G=jb+204|0;ra=jb+200|0;sa=jb+196|0;H=jb+192|0;Ra=jb+188|0;Sa=jb+184|0;Ta=jb+180|0;Ua=jb+176|0;L=jb+172|0;wa=jb+168|0;xa=jb+164|0;M=jb+160|0;La=jb+156|0;Wa=jb+152|0;Y=jb+148|0;$=jb+144|0;aa=jb+140|0;ba=jb+136|0;Xa=jb+132|0;gb=jb+128|0;w=jb+124|0;Q=jb+120|0;P=jb+116|0;R=jb+112|0;z=jb+108|0;D=jb+104|0;C=jb+100|0;E=jb+96|0;Ja=jb+92|0;v=jb+88|0;J=jb+84|0;O=jb+80|0;x=jb+76|0;y=jb+72|0;A=jb+68|0;B=jb+64|0;pa=jb+60|0;W=jb+56|0;V=jb+52|0;X=jb+48|0;Aa=jb+44|0;Ia=jb+40|0;Ha=jb+36|0;S=jb+32|0;la=jb+28|0;oa=jb+24|0;T=jb+20|0;U=jb+16|0;ua=jb+12|0;za=jb+8|0;Da=jb+4|0;Ga=jb;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[kb>>2]=j;c[s>>2]=k;c[t>>2]=l;g[jb+392>>2]=.8660253882408142;g[jb+388>>2]=.5;c[ib>>2]=c[kb>>2];while(1){if((c[ib>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[ca>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2<<2)>>2];g[da>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<3<<2)>>2];g[ea>>2]=+g[ca>>2]+ +g[da>>2];g[fa>>2]=+g[u>>2]+ +g[ea>>2];g[Ba>>2]=+g[u>>2]-+g[ea>>2]*.5;g[ka>>2]=(+g[da>>2]-+g[ca>>2])*.8660253882408142;g[Ya>>2]=+g[c[n>>2]>>2];g[Za>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2<<2)>>2];g[_a>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<3<<2)>>2];g[$a>>2]=+g[Za>>2]+ +g[_a>>2];g[ab>>2]=+g[Ya>>2]+ +g[$a>>2];g[Ca>>2]=(+g[Za>>2]-+g[_a>>2])*.8660253882408142;g[hb>>2]=+g[Ya>>2]-+g[$a>>2]*.5;g[ga>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[ha>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[ia>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[ja>>2]=+g[ha>>2]+ +g[ia>>2];g[Ka>>2]=+g[ga>>2]+ +g[ja>>2];g[Ea>>2]=+g[ga>>2]-+g[ja>>2]*.5;g[na>>2]=(+g[ia>>2]-+g[ha>>2])*.8660253882408142;g[bb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[cb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[db>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[eb>>2]=+g[cb>>2]+ +g[db>>2];g[fb>>2]=+g[bb>>2]+ +g[eb>>2];g[Fa>>2]=(+g[cb>>2]-+g[db>>2])*.8660253882408142;g[ma>>2]=+g[bb>>2]-+g[eb>>2]*.5;g[Ma>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[Na>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[Oa>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[Pa>>2]=+g[Na>>2]+ +g[Oa>>2];g[Qa>>2]=+g[Ma>>2]+ +g[Pa>>2];g[F>>2]=(+g[Oa>>2]-+g[Na>>2])*.8660253882408142;g[qa>>2]=+g[Ma>>2]-+g[Pa>>2]*.5;g[G>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[ra>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[sa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[H>>2]=+g[ra>>2]+ +g[sa>>2];g[ta>>2]=(+g[ra>>2]-+g[sa>>2])*.8660253882408142;g[Z>>2]=+g[G>>2]+ +g[H>>2];g[I>>2]=+g[G>>2]-+g[H>>2]*.5;g[Ra>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[Sa>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[Ta>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[Ua>>2]=+g[Sa>>2]+ +g[Ta>>2];g[Va>>2]=+g[Ra>>2]+ +g[Ua>>2];g[K>>2]=(+g[Ta>>2]-+g[Sa>>2])*.8660253882408142;g[va>>2]=+g[Ra>>2]-+g[Ua>>2]*.5;g[L>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[wa>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[xa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[M>>2]=+g[wa>>2]+ +g[xa>>2];g[ya>>2]=(+g[wa>>2]-+g[xa>>2])*.8660253882408142;g[_>>2]=+g[L>>2]+ +g[M>>2];g[N>>2]=+g[L>>2]-+g[M>>2]*.5;g[La>>2]=+g[fa>>2]+ +g[Ka>>2];g[Wa>>2]=+g[Qa>>2]+ +g[Va>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[La>>2]-+g[Wa>>2];g[c[o>>2]>>2]=+g[La>>2]+ +g[Wa>>2];g[aa>>2]=+g[ab>>2]+ +g[fb>>2];g[ba>>2]=+g[Z>>2]+ +g[_>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[aa>>2]-+g[ba>>2];g[c[p>>2]>>2]=+g[aa>>2]+ +g[ba>>2];g[Xa>>2]=+g[Qa>>2]-+g[Va>>2];g[gb>>2]=+g[ab>>2]-+g[fb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Xa>>2]+ +g[gb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[gb>>2]-+g[Xa>>2];g[Y>>2]=+g[fa>>2]-+g[Ka>>2];g[$>>2]=+g[Z>>2]-+g[_>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Y>>2]-+g[$>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Y>>2]+ +g[$>>2];g[Ja>>2]=+g[ka>>2]+ +g[hb>>2];g[v>>2]=+g[na>>2]+ +g[ma>>2];g[w>>2]=+g[Ja>>2]-+g[v>>2];g[Q>>2]=+g[Ja>>2]+ +g[v>>2];g[J>>2]=+g[F>>2]+ +g[I>>2];g[O>>2]=+g[K>>2]+ +g[N>>2];g[P>>2]=+g[J>>2]-+g[O>>2];g[R>>2]=+g[J>>2]+ +g[O>>2];g[x>>2]=+g[qa>>2]+ +g[ta>>2];g[y>>2]=+g[va>>2]+ +g[ya>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[D>>2]=+g[x>>2]+ +g[y>>2];g[A>>2]=+g[Ba>>2]+ +g[Ca>>2];g[B>>2]=+g[Ea>>2]+ +g[Fa>>2];g[C>>2]=+g[A>>2]+ +g[B>>2];g[E>>2]=+g[A>>2]-+g[B>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[w>>2]-+g[z>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[E>>2]+ +g[P>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[w>>2]+ +g[z>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[E>>2]-+g[P>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[C>>2]-+g[D>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Q>>2]-+g[R>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[C>>2]+ +g[D>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Q>>2]+ +g[R>>2];g[la>>2]=+g[hb>>2]-+g[ka>>2];g[oa>>2]=+g[ma>>2]-+g[na>>2];g[pa>>2]=+g[la>>2]-+g[oa>>2];g[W>>2]=+g[la>>2]+ +g[oa>>2];g[T>>2]=+g[I>>2]-+g[F>>2];g[U>>2]=+g[N>>2]-+g[K>>2];g[V>>2]=+g[T>>2]-+g[U>>2];g[X>>2]=+g[T>>2]+ +g[U>>2];g[ua>>2]=+g[qa>>2]-+g[ta>>2];g[za>>2]=+g[va>>2]-+g[ya>>2];g[Aa>>2]=+g[ua>>2]-+g[za>>2];g[Ia>>2]=+g[ua>>2]+ +g[za>>2];g[Da>>2]=+g[Ba>>2]-+g[Ca>>2];g[Ga>>2]=+g[Ea>>2]-+g[Fa>>2];g[Ha>>2]=+g[Da>>2]+ +g[Ga>>2];g[S>>2]=+g[Da>>2]-+g[Ga>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[pa>>2]-+g[Aa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[S>>2]+ +g[V>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[pa>>2]+ +g[Aa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[S>>2]-+g[V>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ha>>2]-+g[Ia>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[W>>2]-+g[X>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Ha>>2]+ +g[Ia>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[W>>2]+ +g[X>>2];c[ib>>2]=(c[ib>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=c[q>>2]^c[2998];c[r>>2]=c[r>>2]^c[2998]}i=jb;return}function ji(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,4,200);i=b;return}function ki(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0;Jc=i;i=i+832|0;m=Jc+820|0;n=Jc+816|0;o=Jc+812|0;p=Jc+808|0;q=Jc+804|0;r=Jc+800|0;Kc=Jc+796|0;s=Jc+792|0;t=Jc+788|0;Ic=Jc+704|0;u=Jc+700|0;ta=Jc+696|0;Bc=Jc+692|0;Cc=Jc+688|0;wc=Jc+684|0;Ga=Jc+680|0;Ea=Jc+676|0;Ia=Jc+672|0;Pb=Jc+668|0;Rb=Jc+664|0;Kb=Jc+660|0;Sb=Jc+656|0;aa=Jc+652|0;Ja=Jc+648|0;db=Jc+644|0;gb=Jc+640|0;D=Jc+636|0;qa=Jc+632|0;nb=Jc+628|0;qb=Jc+624|0;oa=Jc+620|0;ua=Jc+616|0;ia=Jc+612|0;ra=Jc+608|0;kb=Jc+604|0;pb=Jc+600|0;nc=Jc+596|0;xc=Jc+592|0;jc=Jc+588|0;Mb=Jc+584|0;Fc=Jc+580|0;Gb=Jc+576|0;Lb=Jc+572|0;Ec=Jc+568|0;qc=Jc+564|0;yc=Jc+560|0;tc=Jc+556|0;zc=Jc+552|0;uc=Jc+548|0;Ac=Jc+544|0;lc=Jc+540|0;mc=Jc+536|0;kc=Jc+532|0;vc=Jc+528|0;Hb=Jc+524|0;Ib=Jc+520|0;Jb=Jc+516|0;ic=Jc+512|0;Da=Jc+508|0;Db=Jc+504|0;Eb=Jc+500|0;Fb=Jc+496|0;oc=Jc+492|0;pc=Jc+488|0;rc=Jc+484|0;sc=Jc+480|0;ba=Jc+476|0;ca=Jc+472|0;Nb=Jc+468|0;Ob=Jc+464|0;Gc=Jc+460|0;Hc=Jc+456|0;_=Jc+452|0;$=Jc+448|0;hc=Jc+444|0;eb=Jc+440|0;dc=Jc+436|0;la=Jc+432|0;da=Jc+428|0;_b=Jc+424|0;ka=Jc+420|0;E=Jc+416|0;x=Jc+412|0;ga=Jc+408|0;A=Jc+404|0;fa=Jc+400|0;B=Jc+396|0;fb=Jc+392|0;fc=Jc+388|0;gc=Jc+384|0;ec=Jc+380|0;C=Jc+376|0;$b=Jc+372|0;ac=Jc+368|0;bc=Jc+364|0;cc=Jc+360|0;Wb=Jc+356|0;Xb=Jc+352|0;Yb=Jc+348|0;Zb=Jc+344|0;v=Jc+340|0;w=Jc+336|0;y=Jc+332|0;z=Jc+328|0;lb=Jc+324|0;mb=Jc+320|0;ma=Jc+316|0;na=Jc+312|0;ea=Jc+308|0;ha=Jc+304|0;ib=Jc+300|0;jb=Jc+296|0;I=Jc+292|0;S=Jc+288|0;Ba=Jc+284|0;Aa=Jc+280|0;J=Jc+276|0;T=Jc+272|0;Dc=Jc+268|0;Ub=Jc+264|0;O=Jc+260|0;V=Jc+256|0;pa=Jc+252|0;W=Jc+248|0;wa=Jc+244|0;N=Jc+240|0;Qb=Jc+236|0;Tb=Jc+232|0;G=Jc+228|0;H=Jc+224|0;ya=Jc+220|0;za=Jc+216|0;ja=Jc+212|0;M=Jc+208|0;sa=Jc+204|0;va=Jc+200|0;L=Jc+196|0;Vb=Jc+192|0;xa=Jc+188|0;U=Jc+184|0;X=Jc+180|0;Ca=Jc+176|0;F=Jc+172|0;Y=Jc+168|0;Z=Jc+164|0;Q=Jc+160|0;R=Jc+156|0;K=Jc+152|0;P=Jc+148|0;Bb=Jc+144|0;Wa=Jc+140|0;Xa=Jc+136|0;Ya=Jc+132|0;Pa=Jc+128|0;Za=Jc+124|0;hb=Jc+120|0;sb=Jc+116|0;xb=Jc+112|0;Ua=Jc+108|0;Ha=Jc+104|0;Ta=Jc+100|0;Ma=Jc+96|0;wb=Jc+92|0;ob=Jc+88|0;rb=Jc+84|0;zb=Jc+80|0;Ab=Jc+76|0;Cb=Jc+72|0;Oa=Jc+68|0;Fa=Jc+64|0;ub=Jc+60|0;Ka=Jc+56|0;La=Jc+52|0;vb=Jc+48|0;Na=Jc+44|0;tb=Jc+40|0;$a=Jc+36|0;ab=Jc+32|0;yb=Jc+28|0;Qa=Jc+24|0;bb=Jc+20|0;cb=Jc+16|0;Va=Jc+12|0;_a=Jc+8|0;Ra=Jc+4|0;Sa=Jc;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Kc>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Jc+784>>2]=2.0;g[Jc+780>>2]=.0833333358168602;g[Jc+776>>2]=.2517685294151306;g[Jc+772>>2]=.07590298354625702;g[Jc+768>>2]=.13298311829566956;g[Jc+764>>2]=.2582603991031647;g[Jc+760>>2]=1.7320507764816284;g[Jc+756>>2]=.30023863911628723;g[Jc+752>>2]=.011599105782806873;g[Jc+748>>2]=.15689139068126678;g[Jc+744>>2]=.2562476694583893;g[Jc+740>>2]=.174138605594635;g[Jc+736>>2]=.5751407146453857;g[Jc+732>>2]=.5035370588302612;g[Jc+728>>2]=.11385448276996613;g[Jc+724>>2]=.2659662365913391;g[Jc+720>>2]=.3873905837535858;g[Jc+716>>2]=.8660253882408142;g[Jc+712>>2]=.30046260356903076;g[Jc+708>>2]=.5;c[Ic>>2]=c[Kc>>2];while(1){if((c[Ic>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[ta>>2]=+g[c[n>>2]>>2];g[lc>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<3<<2)>>2];g[mc>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[nc>>2]=+g[lc>>2]+ +g[mc>>2];g[xc>>2]=+g[lc>>2]-+g[mc>>2];g[Hb>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*12<<2)>>2];g[Ib>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[Jb>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2<<2)>>2];g[ic>>2]=+g[Ib>>2]+ +g[Jb>>2];g[jc>>2]=+g[Hb>>2]+ +g[ic>>2];g[Mb>>2]=+g[Ib>>2]-+g[Jb>>2];g[Fc>>2]=+g[Hb>>2]-+g[ic>>2]*.5;g[Da>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[Db>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[Eb>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[Fb>>2]=+g[Db>>2]+ +g[Eb>>2];g[Gb>>2]=+g[Da>>2]+ +g[Fb>>2];g[Lb>>2]=+g[Db>>2]-+g[Eb>>2];g[Ec>>2]=+g[Da>>2]-+g[Fb>>2]*.5;g[oc>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[pc>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[qc>>2]=+g[oc>>2]+ +g[pc>>2];g[yc>>2]=+g[oc>>2]-+g[pc>>2];g[rc>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[sc>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[tc>>2]=+g[rc>>2]+ +g[sc>>2];g[zc>>2]=+g[rc>>2]-+g[sc>>2];g[uc>>2]=+g[qc>>2]+ +g[tc>>2];g[Ac>>2]=+g[yc>>2]+ +g[zc>>2];g[Bc>>2]=+g[xc>>2]+ +g[Ac>>2];g[Cc>>2]=+g[Gb>>2]-+g[jc>>2];g[kc>>2]=+g[Gb>>2]+ +g[jc>>2];g[vc>>2]=+g[nc>>2]+ +g[uc>>2];g[wc>>2]=+g[kc>>2]+ +g[vc>>2];g[Ga>>2]=(+g[kc>>2]-+g[vc>>2])*.30046260356903076;g[ba>>2]=+g[Lb>>2]+ +g[Mb>>2];g[ca>>2]=+g[yc>>2]-+g[zc>>2];g[Ea>>2]=+g[ba>>2]-+g[ca>>2];g[Ia>>2]=+g[ba>>2]+ +g[ca>>2];g[Nb>>2]=(+g[Lb>>2]-+g[Mb>>2])*.8660253882408142;g[Ob>>2]=+g[xc>>2]-+g[Ac>>2]*.5;g[Pb>>2]=+g[Nb>>2]-+g[Ob>>2];g[Rb>>2]=+g[Nb>>2]+ +g[Ob>>2];g[Gc>>2]=+g[Ec>>2]-+g[Fc>>2];g[Hc>>2]=(+g[qc>>2]-+g[tc>>2])*.8660253882408142;g[Kb>>2]=+g[Gc>>2]+ +g[Hc>>2];g[Sb>>2]=+g[Gc>>2]-+g[Hc>>2];g[_>>2]=+g[Ec>>2]+ +g[Fc>>2];g[$>>2]=+g[nc>>2]-+g[uc>>2]*.5;g[aa>>2]=+g[_>>2]-+g[$>>2];g[Ja>>2]=+g[_>>2]+ +g[$>>2];g[fc>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<3<<2)>>2];g[gc>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[hc>>2]=+g[fc>>2]+ +g[gc>>2];g[eb>>2]=+g[fc>>2]-+g[gc>>2];g[$b>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*12<<2)>>2];g[ac>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[bc>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2<<2)>>2];g[cc>>2]=+g[ac>>2]+ +g[bc>>2];g[dc>>2]=+g[$b>>2]-+g[cc>>2]*.5;g[la>>2]=+g[$b>>2]+ +g[cc>>2];g[da>>2]=+g[ac>>2]-+g[bc>>2];g[Wb>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[Xb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[Yb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[Zb>>2]=+g[Xb>>2]+ +g[Yb>>2];g[_b>>2]=+g[Wb>>2]-+g[Zb>>2]*.5;g[ka>>2]=+g[Wb>>2]+ +g[Zb>>2];g[E>>2]=+g[Xb>>2]-+g[Yb>>2];g[v>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[w>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[x>>2]=+g[v>>2]+ +g[w>>2];g[ga>>2]=+g[v>>2]-+g[w>>2];g[y>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[z>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[A>>2]=+g[y>>2]+ +g[z>>2];g[fa>>2]=+g[y>>2]-+g[z>>2];g[B>>2]=+g[x>>2]+ +g[A>>2];g[fb>>2]=+g[ga>>2]+ +g[fa>>2];g[db>>2]=+g[ka>>2]-+g[la>>2];g[gb>>2]=+g[eb>>2]+ +g[fb>>2];g[ec>>2]=+g[_b>>2]+ +g[dc>>2];g[C>>2]=+g[hc>>2]-+g[B>>2]*.5;g[D>>2]=+g[ec>>2]-+g[C>>2];g[qa>>2]=+g[ec>>2]+ +g[C>>2];g[lb>>2]=+g[_b>>2]-+g[dc>>2];g[mb>>2]=(+g[A>>2]-+g[x>>2])*.8660253882408142;g[nb>>2]=+g[lb>>2]+ +g[mb>>2];g[qb>>2]=+g[lb>>2]-+g[mb>>2];g[ma>>2]=+g[ka>>2]+ +g[la>>2];g[na>>2]=+g[hc>>2]+ +g[B>>2];g[oa>>2]=(+g[ma>>2]-+g[na>>2])*.30046260356903076;g[ua>>2]=+g[ma>>2]+ +g[na>>2];g[ea>>2]=+g[E>>2]+ +g[da>>2];g[ha>>2]=+g[fa>>2]-+g[ga>>2];g[ia>>2]=+g[ea>>2]+ +g[ha>>2];g[ra>>2]=+g[ha>>2]-+g[ea>>2];g[ib>>2]=+g[eb>>2]-+g[fb>>2]*.5;g[jb>>2]=(+g[da>>2]-+g[E>>2])*.8660253882408142;g[kb>>2]=+g[ib>>2]-+g[jb>>2];g[pb>>2]=+g[jb>>2]+ +g[ib>>2];g[c[o>>2]>>2]=+g[u>>2]+ +g[wc>>2];g[c[p>>2]>>2]=+g[ta>>2]+ +g[ua>>2];g[G>>2]=+g[ia>>2]*.3873905837535858+ +g[D>>2]*.2659662365913391;g[H>>2]=+g[ra>>2]*.11385448276996613+ +g[qa>>2]*.5035370588302612;g[I>>2]=+g[G>>2]+ +g[H>>2];g[S>>2]=+g[H>>2]-+g[G>>2];g[Ba>>2]=+g[Cc>>2]*.5751407146453857+ +g[Bc>>2]*.174138605594635;g[ya>>2]=+g[Sb>>2]*.2562476694583893-+g[Rb>>2]*.15689139068126678;g[za>>2]=+g[Pb>>2]*.011599105782806873+ +g[Kb>>2]*.30023863911628723;g[Aa>>2]=+g[ya>>2]-+g[za>>2];g[J>>2]=+g[Ba>>2]+ +g[Aa>>2];g[T>>2]=(+g[ya>>2]+ +g[za>>2])*1.7320507764816284;g[Dc>>2]=+g[Bc>>2]*.5751407146453857-+g[Cc>>2]*.174138605594635;g[Qb>>2]=+g[Kb>>2]*.011599105782806873-+g[Pb>>2]*.30023863911628723;g[Tb>>2]=+g[Rb>>2]*.2562476694583893+ +g[Sb>>2]*.15689139068126678;g[Ub>>2]=+g[Qb>>2]-+g[Tb>>2];g[O>>2]=(+g[Tb>>2]+ +g[Qb>>2])*1.7320507764816284;g[V>>2]=+g[Dc>>2]-+g[Ub>>2];g[ja>>2]=+g[D>>2]*.2582603991031647-+g[ia>>2]*.13298311829566956;g[M>>2]=+g[oa>>2]-+g[ja>>2];g[sa>>2]=+g[qa>>2]*.07590298354625702-+g[ra>>2]*.2517685294151306;g[va>>2]=+g[ta>>2]-+g[ua>>2]*.0833333358168602;g[L>>2]=+g[va>>2]-+g[sa>>2];g[pa>>2]=+g[ja>>2]*2.0+ +g[oa>>2];g[W>>2]=+g[M>>2]+ +g[L>>2];g[wa>>2]=+g[sa>>2]*2.0+ +g[va>>2];g[N>>2]=+g[L>>2]-+g[M>>2];g[Vb>>2]=+g[Ub>>2]*2.0+ +g[Dc>>2];g[xa>>2]=+g[pa>>2]+ +g[wa>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Vb>>2]+ +g[xa>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[xa>>2]-+g[Vb>>2];g[Ca>>2]=+g[Aa>>2]*2.0-+g[Ba>>2];g[F>>2]=+g[wa>>2]-+g[pa>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Ca>>2]+ +g[F>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[F>>2]-+g[Ca>>2];g[Y>>2]=+g[W>>2]-+g[V>>2];g[Z>>2]=+g[T>>2]+ +g[S>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Y>>2]-+g[Z>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Z>>2]+ +g[Y>>2];g[U>>2]=+g[S>>2]-+g[T>>2];g[X>>2]=+g[V>>2]+ +g[W>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[U>>2]+ +g[X>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[X>>2]-+g[U>>2];g[Q>>2]=+g[O>>2]+ +g[N>>2];g[R>>2]=+g[J>>2]+ +g[I>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Q>>2]-+g[R>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[R>>2]+ +g[Q>>2];g[K>>2]=+g[I>>2]-+g[J>>2];g[P>>2]=+g[N>>2]-+g[O>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[K>>2]+ +g[P>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[P>>2]-+g[K>>2];g[zb>>2]=+g[Ea>>2]*.3873905837535858+ +g[aa>>2]*.2659662365913391;g[Ab>>2]=+g[Ia>>2]*.11385448276996613-+g[Ja>>2]*.5035370588302612;g[Bb>>2]=+g[zb>>2]+ +g[Ab>>2];g[Wa>>2]=+g[zb>>2]-+g[Ab>>2];g[Xa>>2]=+g[db>>2]*.5751407146453857+ +g[gb>>2]*.174138605594635;g[Cb>>2]=+g[pb>>2]*.011599105782806873-+g[qb>>2]*.30023863911628723;g[Oa>>2]=+g[nb>>2]*.2562476694583893-+g[kb>>2]*.15689139068126678;g[Ya>>2]=+g[Cb>>2]+ +g[Oa>>2];g[Pa>>2]=(+g[Cb>>2]-+g[Oa>>2])*1.7320507764816284;g[Za>>2]=+g[Xa>>2]+ +g[Ya>>2];g[hb>>2]=+g[db>>2]*.174138605594635-+g[gb>>2]*.5751407146453857;g[ob>>2]=+g[kb>>2]*.2562476694583893+ +g[nb>>2]*.15689139068126678;g[rb>>2]=+g[pb>>2]*.30023863911628723+ +g[qb>>2]*.011599105782806873;g[sb>>2]=+g[ob>>2]-+g[rb>>2];g[xb>>2]=+g[hb>>2]-+g[sb>>2];g[Ua>>2]=(+g[rb>>2]+ +g[ob>>2])*1.7320507764816284;g[Fa>>2]=+g[aa>>2]*.2582603991031647-+g[Ea>>2]*.13298311829566956;g[ub>>2]=+g[Ga>>2]-+g[Fa>>2];g[Ka>>2]=+g[Ia>>2]*.2517685294151306+ +g[Ja>>2]*.07590298354625702;g[La>>2]=+g[u>>2]-+g[wc>>2]*.0833333358168602;g[vb>>2]=+g[La>>2]-+g[Ka>>2];g[Ha>>2]=+g[Fa>>2]*2.0+ +g[Ga>>2];g[Ta>>2]=+g[vb>>2]-+g[ub>>2];g[Ma>>2]=+g[Ka>>2]*2.0+ +g[La>>2];g[wb>>2]=+g[ub>>2]+ +g[vb>>2];g[Na>>2]=+g[Ha>>2]+ +g[Ma>>2];g[tb>>2]=+g[sb>>2]*2.0+ +g[hb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Na>>2]-+g[tb>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Na>>2]+ +g[tb>>2];g[yb>>2]=+g[wb>>2]-+g[xb>>2];g[Qa>>2]=+g[Bb>>2]-+g[Pa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[yb>>2]-+g[Qa>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[yb>>2]+ +g[Qa>>2];g[bb>>2]=+g[Ma>>2]-+g[Ha>>2];g[cb>>2]=+g[Ya>>2]*2.0-+g[Xa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[bb>>2]-+g[cb>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[bb>>2]+ +g[cb>>2];g[$a>>2]=+g[Ta>>2]-+g[Ua>>2];g[ab>>2]=+g[Za>>2]-+g[Wa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[$a>>2]-+g[ab>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[$a>>2]+ +g[ab>>2];g[Va>>2]=+g[Ta>>2]+ +g[Ua>>2];g[_a>>2]=+g[Wa>>2]+ +g[Za>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Va>>2]-+g[_a>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Va>>2]+ +g[_a>>2];g[Ra>>2]=+g[wb>>2]+ +g[xb>>2];g[Sa>>2]=+g[Bb>>2]+ +g[Pa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ra>>2]-+g[Sa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Ra>>2]+ +g[Sa>>2];c[Ic>>2]=(c[Ic>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=c[q>>2]^c[2998];c[r>>2]=c[r>>2]^c[2998]}i=Jc;return}function li(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,5,264);i=b;return}function mi(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0;rb=i;i=i+480|0;m=rb+476|0;n=rb+472|0;o=rb+468|0;p=rb+464|0;q=rb+460|0;r=rb+456|0;sb=rb+452|0;s=rb+448|0;t=rb+444|0;qb=rb+416|0;la=rb+412|0;fb=rb+408|0;B=rb+404|0;R=rb+400|0;Sa=rb+396|0;aa=rb+392|0;ib=rb+388|0;v=rb+384|0;ya=rb+380|0;ja=rb+376|0;E=rb+372|0;U=rb+368|0;Za=rb+364|0;ca=rb+360|0;lb=rb+356|0;x=rb+352|0;Ma=rb+348|0;G=rb+344|0;C=rb+340|0;_=rb+336|0;eb=rb+332|0;ba=rb+328|0;ob=rb+324|0;w=rb+320|0;Fa=rb+316|0;F=rb+312|0;D=rb+308|0;X=rb+304|0;u=rb+300|0;ka=rb+296|0;z=rb+292|0;A=rb+288|0;oa=rb+284|0;gb=rb+280|0;ra=rb+276|0;hb=rb+272|0;ma=rb+268|0;na=rb+264|0;pa=rb+260|0;qa=rb+256|0;ua=rb+252|0;S=rb+248|0;xa=rb+244|0;T=rb+240|0;sa=rb+236|0;ta=rb+232|0;va=rb+228|0;wa=rb+224|0;Va=rb+220|0;jb=rb+216|0;Ya=rb+212|0;kb=rb+208|0;Ta=rb+204|0;Ua=rb+200|0;Wa=rb+196|0;Xa=rb+192|0;Ia=rb+188|0;Y=rb+184|0;La=rb+180|0;Z=rb+176|0;Ga=rb+172|0;Ha=rb+168|0;Ja=rb+164|0;Ka=rb+160|0;ab=rb+156|0;mb=rb+152|0;db=rb+148|0;nb=rb+144|0;_a=rb+140|0;$a=rb+136|0;bb=rb+132|0;cb=rb+128|0;Ba=rb+124|0;V=rb+120|0;Ea=rb+116|0;W=rb+112|0;za=rb+108|0;Aa=rb+104|0;Ca=rb+100|0;Da=rb+96|0;Na=rb+92|0;pb=rb+88|0;Q=rb+84|0;P=rb+80|0;Pa=rb+76|0;Oa=rb+72|0;N=rb+68|0;O=rb+64|0;Ra=rb+60|0;Qa=rb+56|0;y=rb+52|0;M=rb+48|0;da=rb+44|0;$=rb+40|0;H=rb+36|0;ia=rb+32|0;fa=rb+28|0;ea=rb+24|0;J=rb+20|0;I=rb+16|0;ga=rb+12|0;ha=rb+8|0;L=rb+4|0;K=rb;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[sb>>2]=j;c[s>>2]=k;c[t>>2]=l;g[rb+440>>2]=.22252093255519867;g[rb+436>>2]=.9009688496589661;g[rb+432>>2]=.6234897971153259;g[rb+428>>2]=.4338837265968323;g[rb+424>>2]=.7818315029144287;g[rb+420>>2]=.9749279022216797;c[qb>>2]=c[sb>>2];while(1){if((c[qb>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[ka>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[la>>2]=+g[u>>2]-+g[ka>>2];g[fb>>2]=+g[u>>2]+ +g[ka>>2];g[z>>2]=+g[c[n>>2]>>2];g[A>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[R>>2]=+g[z>>2]+ +g[A>>2];g[ma>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[na>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[oa>>2]=+g[ma>>2]-+g[na>>2];g[gb>>2]=+g[ma>>2]+ +g[na>>2];g[pa>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*12<<2)>>2];g[qa>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[ra>>2]=+g[pa>>2]-+g[qa>>2];g[hb>>2]=+g[pa>>2]+ +g[qa>>2];g[Sa>>2]=+g[oa>>2]+ +g[ra>>2];g[aa>>2]=+g[hb>>2]-+g[gb>>2];g[ib>>2]=+g[gb>>2]+ +g[hb>>2];g[v>>2]=+g[ra>>2]-+g[oa>>2];g[sa>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[ta>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[ua>>2]=+g[sa>>2]-+g[ta>>2];g[S>>2]=+g[sa>>2]+ +g[ta>>2];g[va>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*12<<2)>>2];g[wa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[xa>>2]=+g[va>>2]-+g[wa>>2];g[T>>2]=+g[va>>2]+ +g[wa>>2];g[ya>>2]=+g[ua>>2]-+g[xa>>2];g[ja>>2]=+g[S>>2]-+g[T>>2];g[E>>2]=+g[ua>>2]+ +g[xa>>2];g[U>>2]=+g[S>>2]+ +g[T>>2];g[Ta>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2<<2)>>2];g[Ua>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[Va>>2]=+g[Ta>>2]-+g[Ua>>2];g[jb>>2]=+g[Ta>>2]+ +g[Ua>>2];g[Wa>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[Xa>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[Ya>>2]=+g[Wa>>2]-+g[Xa>>2];g[kb>>2]=+g[Wa>>2]+ +g[Xa>>2];g[Za>>2]=+g[Va>>2]+ +g[Ya>>2];g[ca>>2]=+g[jb>>2]-+g[kb>>2];g[lb>>2]=+g[jb>>2]+ +g[kb>>2];g[x>>2]=+g[Ya>>2]-+g[Va>>2];g[Ga>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2<<2)>>2];g[Ha>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[Ia>>2]=+g[Ga>>2]-+g[Ha>>2];g[Y>>2]=+g[Ga>>2]+ +g[Ha>>2];g[Ja>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[Ka>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[La>>2]=+g[Ja>>2]-+g[Ka>>2];g[Z>>2]=+g[Ja>>2]+ +g[Ka>>2];g[Ma>>2]=+g[Ia>>2]-+g[La>>2];g[G>>2]=+g[Z>>2]-+g[Y>>2];g[C>>2]=+g[Ia>>2]+ +g[La>>2];g[_>>2]=+g[Y>>2]+ +g[Z>>2];g[_a>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[$a>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*13<<2)>>2];g[ab>>2]=+g[_a>>2]-+g[$a>>2];g[mb>>2]=+g[_a>>2]+ +g[$a>>2];g[bb>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<3<<2)>>2];g[cb>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[db>>2]=+g[bb>>2]-+g[cb>>2];g[nb>>2]=+g[bb>>2]+ +g[cb>>2];g[eb>>2]=+g[ab>>2]+ +g[db>>2];g[ba>>2]=+g[mb>>2]-+g[nb>>2];g[ob>>2]=+g[mb>>2]+ +g[nb>>2];g[w>>2]=+g[db>>2]-+g[ab>>2];g[za>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[Aa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*13<<2)>>2];g[Ba>>2]=+g[za>>2]-+g[Aa>>2];g[V>>2]=+g[za>>2]+ +g[Aa>>2];g[Ca>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<3<<2)>>2];g[Da>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[Ea>>2]=+g[Ca>>2]-+g[Da>>2];g[W>>2]=+g[Ca>>2]+ +g[Da>>2];g[Fa>>2]=+g[Ba>>2]-+g[Ea>>2];g[F>>2]=+g[W>>2]-+g[V>>2];g[D>>2]=+g[Ba>>2]+ +g[Ea>>2];g[X>>2]=+g[V>>2]+ +g[W>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[la>>2]+ +g[Sa>>2]+ +g[Za>>2]+ +g[eb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[B>>2]+ +g[E>>2]+ +g[C>>2]+ +g[D>>2];g[c[o>>2]>>2]=+g[fb>>2]+ +g[ib>>2]+ +g[lb>>2]+ +g[ob>>2];g[c[p>>2]>>2]=+g[R>>2]+ +g[U>>2]+ +g[_>>2]+ +g[X>>2];g[Na>>2]=+g[ya>>2]*.9749279022216797-+g[Fa>>2]*.7818315029144287-+g[Ma>>2]*.4338837265968323;g[pb>>2]=+g[eb>>2]*.6234897971153259+ +g[la>>2]+-(+g[Za>>2]*.9009688496589661+ +g[Sa>>2]*.22252093255519867);g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[pb>>2]-+g[Na>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[pb>>2]+ +g[Na>>2];g[Q>>2]=+g[v>>2]*.9749279022216797-+g[w>>2]*.7818315029144287-+g[x>>2]*.4338837265968323;g[P>>2]=+g[D>>2]*.6234897971153259+ +g[B>>2]+-(+g[C>>2]*.9009688496589661+ +g[E>>2]*.22252093255519867);g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[P>>2]-+g[Q>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Q>>2]+ +g[P>>2];g[Pa>>2]=+g[ya>>2]*.7818315029144287+ +g[Ma>>2]*.9749279022216797+ +g[Fa>>2]*.4338837265968323;g[Oa>>2]=+g[Sa>>2]*.6234897971153259+ +g[la>>2]+-(+g[eb>>2]*.9009688496589661+ +g[Za>>2]*.22252093255519867);g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Oa>>2]-+g[Pa>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Oa>>2]+ +g[Pa>>2];g[N>>2]=+g[v>>2]*.7818315029144287+ +g[x>>2]*.9749279022216797+ +g[w>>2]*.4338837265968323;g[O>>2]=+g[E>>2]*.6234897971153259+ +g[B>>2]+-(+g[D>>2]*.9009688496589661+ +g[C>>2]*.22252093255519867);g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[N>>2]+ +g[O>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[O>>2]-+g[N>>2];g[Ra>>2]=+g[ya>>2]*.4338837265968323+ +g[Fa>>2]*.9749279022216797-+g[Ma>>2]*.7818315029144287;g[Qa>>2]=+g[Za>>2]*.6234897971153259+ +g[la>>2]+-(+g[eb>>2]*.22252093255519867+ +g[Sa>>2]*.9009688496589661);g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Qa>>2]-+g[Ra>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Qa>>2]+ +g[Ra>>2];g[y>>2]=+g[v>>2]*.4338837265968323+ +g[w>>2]*.9749279022216797-+g[x>>2]*.7818315029144287;g[M>>2]=+g[C>>2]*.6234897971153259+ +g[B>>2]+-(+g[D>>2]*.22252093255519867+ +g[E>>2]*.9009688496589661);g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[y>>2]+ +g[M>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[M>>2]-+g[y>>2];g[da>>2]=+g[aa>>2]*.7818315029144287-+g[ba>>2]*.4338837265968323-+g[ca>>2]*.9749279022216797;g[$>>2]=+g[U>>2]*.6234897971153259+ +g[R>>2]+-(+g[X>>2]*.9009688496589661+ +g[_>>2]*.22252093255519867);g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[$>>2]-+g[da>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[da>>2]+ +g[$>>2];g[H>>2]=+g[ja>>2]*.7818315029144287-+g[F>>2]*.4338837265968323-+g[G>>2]*.9749279022216797;g[ia>>2]=+g[ib>>2]*.6234897971153259+ +g[fb>>2]+-(+g[ob>>2]*.9009688496589661+ +g[lb>>2]*.22252093255519867);g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[ia>>2]-+g[H>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[ia>>2]+ +g[H>>2];g[fa>>2]=+g[aa>>2]*.4338837265968323+ +g[ca>>2]*.7818315029144287-+g[ba>>2]*.9749279022216797;g[ea>>2]=+g[_>>2]*.6234897971153259+ +g[R>>2]+-(+g[X>>2]*.22252093255519867+ +g[U>>2]*.9009688496589661);g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[ea>>2]-+g[fa>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[fa>>2]+ +g[ea>>2];g[J>>2]=+g[ja>>2]*.4338837265968323+ +g[G>>2]*.7818315029144287-+g[F>>2]*.9749279022216797;g[I>>2]=+g[lb>>2]*.6234897971153259+ +g[fb>>2]+-(+g[ob>>2]*.22252093255519867+ +g[ib>>2]*.9009688496589661);g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[I>>2]-+g[J>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[I>>2]+ +g[J>>2];g[ga>>2]=+g[aa>>2]*.9749279022216797+ +g[ca>>2]*.4338837265968323+ +g[ba>>2]*.7818315029144287;g[ha>>2]=+g[X>>2]*.6234897971153259+ +g[R>>2]+-(+g[_>>2]*.9009688496589661+ +g[U>>2]*.22252093255519867);g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ga>>2]+ +g[ha>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[ha>>2]-+g[ga>>2];g[L>>2]=+g[ja>>2]*.9749279022216797+ +g[G>>2]*.4338837265968323+ +g[F>>2]*.7818315029144287;g[K>>2]=+g[ob>>2]*.6234897971153259+ +g[fb>>2]+-(+g[lb>>2]*.9009688496589661+ +g[ib>>2]*.22252093255519867);g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[K>>2]-+g[L>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[K>>2]+ +g[L>>2];c[qb>>2]=(c[qb>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=c[q>>2]^c[2998];c[r>>2]=c[r>>2]^c[2998]}i=rb;return}function ni(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,6,328);i=b;return}function oi(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0;pc=i;i=i+688|0;m=pc+684|0;n=pc+680|0;o=pc+676|0;p=pc+672|0;q=pc+668|0;r=pc+664|0;qc=pc+660|0;s=pc+656|0;t=pc+652|0;oc=pc+624|0;lb=pc+620|0;Za=pc+616|0;lc=pc+612|0;Lb=pc+608|0;H=pc+604|0;Ea=pc+600|0;$b=pc+596|0;ec=pc+592|0;fc=pc+588|0;Db=pc+584|0;Ib=pc+580|0;Jb=pc+576|0;Qa=pc+572|0;Ra=pc+568|0;$a=pc+564|0;T=pc+560|0;U=pc+556|0;Ga=pc+552|0;oa=pc+548|0;ta=pc+544|0;za=pc+540|0;Pb=pc+536|0;v=pc+532|0;w=pc+528|0;Qb=pc+524|0;Vb=pc+520|0;Wb=pc+516|0;sb=pc+512|0;xb=pc+508|0;yb=pc+504|0;Ta=pc+500|0;Ua=pc+496|0;_a=pc+492|0;W=pc+488|0;X=pc+484|0;Fa=pc+480|0;da=pc+476|0;ia=pc+472|0;ya=pc+468|0;Mb=pc+464|0;Nb=pc+460|0;Ob=pc+456|0;u=pc+452|0;Ca=pc+448|0;kb=pc+444|0;Ba=pc+440|0;kc=pc+436|0;F=pc+432|0;hc=pc+428|0;G=pc+424|0;Da=pc+420|0;jb=pc+416|0;ic=pc+412|0;jc=pc+408|0;Xb=pc+404|0;_b=pc+400|0;zb=pc+396|0;ka=pc+392|0;la=pc+388|0;ma=pc+384|0;Cb=pc+380|0;na=pc+376|0;ac=pc+372|0;dc=pc+368|0;Eb=pc+364|0;pa=pc+360|0;qa=pc+356|0;ra=pc+352|0;Hb=pc+348|0;sa=pc+344|0;Yb=pc+340|0;Zb=pc+336|0;Ab=pc+332|0;Bb=pc+328|0;bc=pc+324|0;cc=pc+320|0;Fb=pc+316|0;Gb=pc+312|0;mb=pc+308|0;pb=pc+304|0;mc=pc+300|0;B=pc+296|0;C=pc+292|0;D=pc+288|0;rb=pc+284|0;E=pc+280|0;Rb=pc+276|0;Ub=pc+272|0;tb=pc+268|0;ea=pc+264|0;fa=pc+260|0;ga=pc+256|0;wb=pc+252|0;ha=pc+248|0;nb=pc+244|0;ob=pc+240|0;nc=pc+236|0;qb=pc+232|0;Sb=pc+228|0;Tb=pc+224|0;ub=pc+220|0;vb=pc+216|0;Oa=pc+212|0;gc=pc+208|0;Na=pc+204|0;Wa=pc+200|0;Ya=pc+196|0;Sa=pc+192|0;Va=pc+188|0;Xa=pc+184|0;Pa=pc+180|0;cb=pc+176|0;ab=pc+172|0;bb=pc+168|0;gb=pc+164|0;ib=pc+160|0;eb=pc+156|0;fb=pc+152|0;hb=pc+148|0;db=pc+144|0;R=pc+140|0;Kb=pc+136|0;Q=pc+132|0;Z=pc+128|0;$=pc+124|0;V=pc+120|0;Y=pc+116|0;_=pc+112|0;S=pc+108|0;Ja=pc+104|0;Ha=pc+100|0;Ia=pc+96|0;ca=pc+92|0;Ma=pc+88|0;aa=pc+84|0;ba=pc+80|0;La=pc+76|0;Ka=pc+72|0;Aa=pc+68|0;I=pc+64|0;J=pc+60|0;N=pc+56|0;O=pc+52|0;L=pc+48|0;M=pc+44|0;P=pc+40|0;K=pc+36|0;y=pc+32|0;x=pc+28|0;z=pc+24|0;va=pc+20|0;xa=pc+16|0;ja=pc+12|0;ua=pc+8|0;wa=pc+4|0;A=pc;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[qc>>2]=j;c[s>>2]=k;c[t>>2]=l;g[pc+648>>2]=.5877852439880371;g[pc+644>>2]=.9510565400123596;g[pc+640>>2]=.25;g[pc+636>>2]=.55901700258255;g[pc+632>>2]=.5;g[pc+628>>2]=.8660253882408142;c[oc>>2]=c[qc>>2];while(1){if((c[oc>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[Ca>>2]=+g[c[n>>2]>>2];g[Da>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[jb>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[kb>>2]=+g[Da>>2]+ +g[jb>>2];g[Ba>>2]=(+g[jb>>2]-+g[Da>>2])*.8660253882408142;g[ic>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[jc>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[kc>>2]=(+g[ic>>2]-+g[jc>>2])*.8660253882408142;g[F>>2]=+g[ic>>2]+ +g[jc>>2];g[lb>>2]=+g[u>>2]+ +g[kb>>2];g[Za>>2]=+g[Ca>>2]+ +g[F>>2];g[hc>>2]=+g[u>>2]-+g[kb>>2]*.5;g[lc>>2]=+g[hc>>2]-+g[kc>>2];g[Lb>>2]=+g[hc>>2]+ +g[kc>>2];g[G>>2]=+g[Ca>>2]-+g[F>>2]*.5;g[H>>2]=+g[Ba>>2]+ +g[G>>2];g[Ea>>2]=+g[G>>2]-+g[Ba>>2];g[Xb>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[Yb>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[Zb>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[_b>>2]=+g[Yb>>2]+ +g[Zb>>2];g[zb>>2]=+g[Xb>>2]-+g[_b>>2]*.5;g[ka>>2]=(+g[Zb>>2]-+g[Yb>>2])*.8660253882408142;g[la>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[Ab>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[Bb>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[ma>>2]=+g[Ab>>2]+ +g[Bb>>2];g[Cb>>2]=(+g[Ab>>2]-+g[Bb>>2])*.8660253882408142;g[na>>2]=+g[la>>2]-+g[ma>>2]*.5;g[ac>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[bc>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*14<<2)>>2];g[cc>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2<<2)>>2];g[dc>>2]=+g[bc>>2]+ +g[cc>>2];g[Eb>>2]=+g[ac>>2]-+g[dc>>2]*.5;g[pa>>2]=(+g[cc>>2]-+g[bc>>2])*.8660253882408142;g[qa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[Fb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*14<<2)>>2];g[Gb>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2<<2)>>2];g[ra>>2]=+g[Fb>>2]+ +g[Gb>>2];g[Hb>>2]=(+g[Fb>>2]-+g[Gb>>2])*.8660253882408142;g[sa>>2]=+g[qa>>2]-+g[ra>>2]*.5;g[$b>>2]=+g[Xb>>2]+ +g[_b>>2];g[ec>>2]=+g[ac>>2]+ +g[dc>>2];g[fc>>2]=+g[$b>>2]+ +g[ec>>2];g[Db>>2]=+g[zb>>2]-+g[Cb>>2];g[Ib>>2]=+g[Eb>>2]-+g[Hb>>2];g[Jb>>2]=+g[Db>>2]+ +g[Ib>>2];g[Qa>>2]=+g[la>>2]+ +g[ma>>2];g[Ra>>2]=+g[qa>>2]+ +g[ra>>2];g[$a>>2]=+g[Qa>>2]+ +g[Ra>>2];g[T>>2]=+g[na>>2]-+g[ka>>2];g[U>>2]=+g[sa>>2]-+g[pa>>2];g[Ga>>2]=+g[T>>2]+ +g[U>>2];g[oa>>2]=+g[ka>>2]+ +g[na>>2];g[ta>>2]=+g[pa>>2]+ +g[sa>>2];g[za>>2]=+g[oa>>2]+ +g[ta>>2];g[Pb>>2]=+g[zb>>2]+ +g[Cb>>2];g[v>>2]=+g[Eb>>2]+ +g[Hb>>2];g[w>>2]=+g[Pb>>2]+ +g[v>>2];g[mb>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[nb>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<3<<2)>>2];g[ob>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*13<<2)>>2];g[pb>>2]=+g[nb>>2]+ +g[ob>>2];g[mc>>2]=+g[mb>>2]-+g[pb>>2]*.5;g[B>>2]=(+g[ob>>2]-+g[nb>>2])*.8660253882408142;g[C>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[nc>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<3<<2)>>2];g[qb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*13<<2)>>2];g[D>>2]=+g[nc>>2]+ +g[qb>>2];g[rb>>2]=(+g[nc>>2]-+g[qb>>2])*.8660253882408142;g[E>>2]=+g[C>>2]-+g[D>>2]*.5;g[Rb>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*12<<2)>>2];g[Sb>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[Tb>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[Ub>>2]=+g[Sb>>2]+ +g[Tb>>2];g[tb>>2]=+g[Rb>>2]-+g[Ub>>2]*.5;g[ea>>2]=(+g[Tb>>2]-+g[Sb>>2])*.8660253882408142;g[fa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*12<<2)>>2];g[ub>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[vb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[ga>>2]=+g[ub>>2]+ +g[vb>>2];g[wb>>2]=(+g[ub>>2]-+g[vb>>2])*.8660253882408142;g[ha>>2]=+g[fa>>2]-+g[ga>>2]*.5;g[Qb>>2]=+g[mb>>2]+ +g[pb>>2];g[Vb>>2]=+g[Rb>>2]+ +g[Ub>>2];g[Wb>>2]=+g[Qb>>2]+ +g[Vb>>2];g[sb>>2]=+g[mc>>2]-+g[rb>>2];g[xb>>2]=+g[tb>>2]-+g[wb>>2];g[yb>>2]=+g[sb>>2]+ +g[xb>>2];g[Ta>>2]=+g[C>>2]+ +g[D>>2];g[Ua>>2]=+g[fa>>2]+ +g[ga>>2];g[_a>>2]=+g[Ta>>2]+ +g[Ua>>2];g[W>>2]=+g[E>>2]-+g[B>>2];g[X>>2]=+g[ha>>2]-+g[ea>>2];g[Fa>>2]=+g[W>>2]+ +g[X>>2];g[da>>2]=+g[B>>2]+ +g[E>>2];g[ia>>2]=+g[ea>>2]+ +g[ha>>2];g[ya>>2]=+g[da>>2]+ +g[ia>>2];g[Mb>>2]=+g[mc>>2]+ +g[rb>>2];g[Nb>>2]=+g[tb>>2]+ +g[wb>>2];g[Ob>>2]=+g[Mb>>2]+ +g[Nb>>2];g[Oa>>2]=(+g[Wb>>2]-+g[fc>>2])*.55901700258255;g[gc>>2]=+g[Wb>>2]+ +g[fc>>2];g[Na>>2]=+g[lb>>2]-+g[gc>>2]*.25;g[Sa>>2]=+g[Qa>>2]-+g[Ra>>2];g[Va>>2]=+g[Ta>>2]-+g[Ua>>2];g[Wa>>2]=+g[Sa>>2]*.9510565400123596-+g[Va>>2]*.5877852439880371;g[Ya>>2]=+g[Va>>2]*.9510565400123596+ +g[Sa>>2]*.5877852439880371;g[c[o>>2]>>2]=+g[lb>>2]+ +g[gc>>2];g[Xa>>2]=+g[Oa>>2]+ +g[Na>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Xa>>2]-+g[Ya>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Xa>>2]+ +g[Ya>>2];g[Pa>>2]=+g[Na>>2]-+g[Oa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Pa>>2]-+g[Wa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Pa>>2]+ +g[Wa>>2];g[cb>>2]=(+g[_a>>2]-+g[$a>>2])*.55901700258255;g[ab>>2]=+g[_a>>2]+ +g[$a>>2];g[bb>>2]=+g[Za>>2]-+g[ab>>2]*.25;g[eb>>2]=+g[$b>>2]-+g[ec>>2];g[fb>>2]=+g[Qb>>2]-+g[Vb>>2];g[gb>>2]=+g[eb>>2]*.9510565400123596-+g[fb>>2]*.5877852439880371;g[ib>>2]=+g[fb>>2]*.9510565400123596+ +g[eb>>2]*.5877852439880371;g[c[p>>2]>>2]=+g[Za>>2]+ +g[ab>>2];g[hb>>2]=+g[cb>>2]+ +g[bb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[hb>>2]-+g[ib>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[ib>>2]+ +g[hb>>2];g[db>>2]=+g[bb>>2]-+g[cb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[db>>2]-+g[gb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[gb>>2]+ +g[db>>2];g[R>>2]=(+g[yb>>2]-+g[Jb>>2])*.55901700258255;g[Kb>>2]=+g[yb>>2]+ +g[Jb>>2];g[Q>>2]=+g[lc>>2]-+g[Kb>>2]*.25;g[V>>2]=+g[T>>2]-+g[U>>2];g[Y>>2]=+g[W>>2]-+g[X>>2];g[Z>>2]=+g[V>>2]*.9510565400123596-+g[Y>>2]*.5877852439880371;g[$>>2]=+g[Y>>2]*.9510565400123596+ +g[V>>2]*.5877852439880371;g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[lc>>2]+ +g[Kb>>2];g[_>>2]=+g[R>>2]+ +g[Q>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[_>>2]-+g[$>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[_>>2]+ +g[$>>2];g[S>>2]=+g[Q>>2]-+g[R>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[S>>2]-+g[Z>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[S>>2]+ +g[Z>>2];g[Ja>>2]=(+g[Fa>>2]-+g[Ga>>2])*.55901700258255;g[Ha>>2]=+g[Fa>>2]+ +g[Ga>>2];g[Ia>>2]=+g[Ea>>2]-+g[Ha>>2]*.25;g[aa>>2]=+g[Db>>2]-+g[Ib>>2];g[ba>>2]=+g[sb>>2]-+g[xb>>2];g[ca>>2]=+g[aa>>2]*.9510565400123596-+g[ba>>2]*.5877852439880371;g[Ma>>2]=+g[ba>>2]*.9510565400123596+ +g[aa>>2]*.5877852439880371;g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Ea>>2]+ +g[Ha>>2];g[La>>2]=+g[Ja>>2]+ +g[Ia>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[La>>2]-+g[Ma>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Ma>>2]+ +g[La>>2];g[Ka>>2]=+g[Ia>>2]-+g[Ja>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ca>>2]+ +g[Ka>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Ka>>2]-+g[ca>>2];g[Aa>>2]=(+g[ya>>2]-+g[za>>2])*.55901700258255;g[I>>2]=+g[ya>>2]+ +g[za>>2];g[J>>2]=+g[H>>2]-+g[I>>2]*.25;g[L>>2]=+g[Mb>>2]-+g[Nb>>2];g[M>>2]=+g[Pb>>2]-+g[v>>2];g[N>>2]=+g[L>>2]*.9510565400123596+ +g[M>>2]*.5877852439880371;g[O>>2]=+g[M>>2]*.9510565400123596-+g[L>>2]*.5877852439880371;g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[H>>2]+ +g[I>>2];g[P>>2]=+g[J>>2]-+g[Aa>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[O>>2]+ +g[P>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[P>>2]-+g[O>>2];g[K>>2]=+g[Aa>>2]+ +g[J>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[K>>2]-+g[N>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[N>>2]+ +g[K>>2];g[y>>2]=(+g[Ob>>2]-+g[w>>2])*.55901700258255;g[x>>2]=+g[Ob>>2]+ +g[w>>2];g[z>>2]=+g[Lb>>2]-+g[x>>2]*.25;g[ja>>2]=+g[da>>2]-+g[ia>>2];g[ua>>2]=+g[oa>>2]-+g[ta>>2];g[va>>2]=+g[ja>>2]*.9510565400123596+ +g[ua>>2]*.5877852439880371;g[xa>>2]=+g[ua>>2]*.9510565400123596-+g[ja>>2]*.5877852439880371;g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Lb>>2]+ +g[x>>2];g[wa>>2]=+g[z>>2]-+g[y>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[wa>>2]-+g[xa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[wa>>2]+ +g[xa>>2];g[A>>2]=+g[y>>2]+ +g[z>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[A>>2]-+g[va>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[A>>2]+ +g[va>>2];c[oc>>2]=(c[oc>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=c[q>>2]^c[2998];c[r>>2]=c[r>>2]^c[2998]}i=pc;return}function pi(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,7,392);i=b;return}function qi(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0;dc=i;i=i+640|0;m=dc+624|0;n=dc+620|0;o=dc+616|0;p=dc+612|0;q=dc+608|0;r=dc+604|0;ec=dc+600|0;s=dc+596|0;t=dc+592|0;cc=dc+576|0;bb=dc+572|0;W=dc+568|0;Ja=dc+564|0;gb=dc+560|0;rb=dc+556|0;Aa=dc+552|0;M=dc+548|0;oa=dc+544|0;Xb=dc+540|0;Ga=dc+536|0;Va=dc+532|0;ea=dc+528|0;ja=dc+524|0;J=dc+520|0;ca=dc+516|0;I=dc+512|0;Ib=dc+508|0;X=dc+504|0;Ka=dc+500|0;nb=dc+496|0;ub=dc+492|0;pa=dc+488|0;qa=dc+484|0;xb=dc+480|0;Qb=dc+476|0;aa=dc+472|0;Ua=dc+468|0;v=dc+464|0;A=dc+460|0;G=dc+456|0;Z=dc+452|0;F=dc+448|0;Za=dc+444|0;pb=dc+440|0;ac=dc+436|0;na=dc+432|0;ab=dc+428|0;ma=dc+424|0;fb=dc+420|0;qb=dc+416|0;u=dc+412|0;Da=dc+408|0;_b=dc+404|0;$b=dc+400|0;_a=dc+396|0;$a=dc+392|0;bc=dc+388|0;eb=dc+384|0;Tb=dc+380|0;C=dc+376|0;ia=dc+372|0;Ea=dc+368|0;Wb=dc+364|0;fa=dc+360|0;da=dc+356|0;Fa=dc+352|0;Rb=dc+348|0;Sb=dc+344|0;ga=dc+340|0;ha=dc+336|0;Ub=dc+332|0;Vb=dc+328|0;D=dc+324|0;E=dc+320|0;Eb=dc+316|0;tb=dc+312|0;jb=dc+308|0;sb=dc+304|0;Hb=dc+300|0;vb=dc+296|0;mb=dc+292|0;wb=dc+288|0;cb=dc+284|0;db=dc+280|0;hb=dc+276|0;ib=dc+272|0;Fb=dc+268|0;Gb=dc+264|0;kb=dc+260|0;lb=dc+256|0;Mb=dc+252|0;w=dc+248|0;Db=dc+244|0;_=dc+240|0;Pb=dc+236|0;Ab=dc+232|0;z=dc+228|0;$=dc+224|0;Kb=dc+220|0;Lb=dc+216|0;Bb=dc+212|0;Cb=dc+208|0;Nb=dc+204|0;Ob=dc+200|0;x=dc+196|0;y=dc+192|0;Jb=dc+188|0;Yb=dc+184|0;Xa=dc+180|0;Ya=dc+176|0;Zb=dc+172|0;ob=dc+168|0;Ta=dc+164|0;Wa=dc+160|0;Y=dc+156|0;La=dc+152|0;Ia=dc+148|0;Ma=dc+144|0;ba=dc+140|0;Ha=dc+136|0;Na=dc+132|0;Ra=dc+128|0;Qa=dc+124|0;Sa=dc+120|0;Oa=dc+116|0;Pa=dc+112|0;zb=dc+108|0;ua=dc+104|0;sa=dc+100|0;ya=dc+96|0;la=dc+92|0;ta=dc+88|0;xa=dc+84|0;za=dc+80|0;yb=dc+76|0;ra=dc+72|0;B=dc+68|0;ka=dc+64|0;va=dc+60|0;wa=dc+56|0;Ca=dc+52|0;Q=dc+48|0;O=dc+44|0;U=dc+40|0;L=dc+36|0;P=dc+32|0;T=dc+28|0;V=dc+24|0;Ba=dc+20|0;N=dc+16|0;H=dc+12|0;K=dc+8|0;R=dc+4|0;S=dc;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[ec>>2]=j;c[s>>2]=k;c[t>>2]=l;g[dc+588>>2]=.3826834261417389;g[dc+584>>2]=.9238795042037964;g[dc+580>>2]=.7071067690849304;c[cc>>2]=c[ec>>2];while(1){if((c[cc>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<3<<2)>>2];g[Za>>2]=+g[u>>2]+ +g[Da>>2];g[pb>>2]=+g[u>>2]-+g[Da>>2];g[_b>>2]=+g[c[n>>2]>>2];g[$b>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<3<<2)>>2];g[ac>>2]=+g[_b>>2]+ +g[$b>>2];g[na>>2]=+g[_b>>2]-+g[$b>>2];g[_a>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2<<2)>>2];g[$a>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*12<<2)>>2];g[ab>>2]=+g[_a>>2]+ +g[$a>>2];g[ma>>2]=+g[_a>>2]-+g[$a>>2];g[bc>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2<<2)>>2];g[eb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*12<<2)>>2];g[fb>>2]=+g[bc>>2]+ +g[eb>>2];g[qb>>2]=+g[bc>>2]-+g[eb>>2];g[bb>>2]=+g[Za>>2]+ +g[ab>>2];g[W>>2]=+g[Za>>2]-+g[ab>>2];g[Ja>>2]=+g[ac>>2]-+g[fb>>2];g[gb>>2]=+g[ac>>2]+ +g[fb>>2];g[rb>>2]=+g[pb>>2]-+g[qb>>2];g[Aa>>2]=+g[pb>>2]+ +g[qb>>2];g[M>>2]=+g[na>>2]-+g[ma>>2];g[oa>>2]=+g[ma>>2]+ +g[na>>2];g[Rb>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*15<<2)>>2];g[Sb>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[Tb>>2]=+g[Rb>>2]+ +g[Sb>>2];g[C>>2]=+g[Rb>>2]-+g[Sb>>2];g[ga>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*15<<2)>>2];g[ha>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2];g[Ea>>2]=+g[ga>>2]+ +g[ha>>2];g[Ub>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[Vb>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[Wb>>2]=+g[Ub>>2]+ +g[Vb>>2];g[fa>>2]=+g[Ub>>2]-+g[Vb>>2];g[D>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[E>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[da>>2]=+g[D>>2]-+g[E>>2];g[Fa>>2]=+g[D>>2]+ +g[E>>2];g[Xb>>2]=+g[Tb>>2]+ +g[Wb>>2];g[Ga>>2]=+g[Ea>>2]-+g[Fa>>2];g[Va>>2]=+g[Ea>>2]+ +g[Fa>>2];g[ea>>2]=+g[C>>2]-+g[da>>2];g[ja>>2]=+g[fa>>2]+ +g[ia>>2];g[J>>2]=+g[ia>>2]-+g[fa>>2];g[ca>>2]=+g[Tb>>2]-+g[Wb>>2];g[I>>2]=+g[C>>2]+ +g[da>>2];g[cb>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[db>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[Eb>>2]=+g[cb>>2]+ +g[db>>2];g[tb>>2]=+g[cb>>2]-+g[db>>2];g[hb>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[ib>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[jb>>2]=+g[hb>>2]+ +g[ib>>2];g[sb>>2]=+g[hb>>2]-+g[ib>>2];g[Fb>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*14<<2)>>2];g[Gb>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[Hb>>2]=+g[Fb>>2]+ +g[Gb>>2];g[vb>>2]=+g[Fb>>2]-+g[Gb>>2];g[kb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*14<<2)>>2];g[lb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[mb>>2]=+g[kb>>2]+ +g[lb>>2];g[wb>>2]=+g[kb>>2]-+g[lb>>2];g[Ib>>2]=+g[Eb>>2]+ +g[Hb>>2];g[X>>2]=+g[jb>>2]-+g[mb>>2];g[Ka>>2]=+g[Hb>>2]-+g[Eb>>2];g[nb>>2]=+g[jb>>2]+ +g[mb>>2];g[ub>>2]=+g[sb>>2]-+g[tb>>2];g[pa>>2]=+g[vb>>2]-+g[wb>>2];g[qa>>2]=+g[tb>>2]+ +g[sb>>2];g[xb>>2]=+g[vb>>2]+ +g[wb>>2];g[Kb>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[Lb>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[Mb>>2]=+g[Kb>>2]+ +g[Lb>>2];g[w>>2]=+g[Kb>>2]-+g[Lb>>2];g[Bb>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[Cb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[Db>>2]=+g[Bb>>2]-+g[Cb>>2];g[_>>2]=+g[Bb>>2]+ +g[Cb>>2];g[Nb>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[Ob>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*13<<2)>>2];g[Pb>>2]=+g[Nb>>2]+ +g[Ob>>2];g[Ab>>2]=+g[Nb>>2]-+g[Ob>>2];g[x>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[y>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*13<<2)>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[$>>2]=+g[x>>2]+ +g[y>>2];g[Qb>>2]=+g[Mb>>2]+ +g[Pb>>2];g[aa>>2]=+g[_>>2]-+g[$>>2];g[Ua>>2]=+g[_>>2]+ +g[$>>2];g[v>>2]=+g[Ab>>2]+ +g[Db>>2];g[A>>2]=+g[w>>2]-+g[z>>2];g[G>>2]=+g[w>>2]+ +g[z>>2];g[Z>>2]=+g[Mb>>2]-+g[Pb>>2];g[F>>2]=+g[Db>>2]-+g[Ab>>2];g[Jb>>2]=+g[bb>>2]+ +g[Ib>>2];g[Yb>>2]=+g[Qb>>2]+ +g[Xb>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Jb>>2]-+g[Yb>>2];g[c[o>>2]>>2]=+g[Jb>>2]+ +g[Yb>>2];g[Xa>>2]=+g[gb>>2]+ +g[nb>>2];g[Ya>>2]=+g[Ua>>2]+ +g[Va>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Xa>>2]-+g[Ya>>2];g[c[p>>2]>>2]=+g[Xa>>2]+ +g[Ya>>2];g[Zb>>2]=+g[Xb>>2]-+g[Qb>>2];g[ob>>2]=+g[gb>>2]-+g[nb>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Zb>>2]+ +g[ob>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[ob>>2]-+g[Zb>>2];g[Ta>>2]=+g[bb>>2]-+g[Ib>>2];g[Wa>>2]=+g[Ua>>2]-+g[Va>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Ta>>2]-+g[Wa>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ta>>2]+ +g[Wa>>2];g[Y>>2]=+g[W>>2]+ +g[X>>2];g[La>>2]=+g[Ja>>2]-+g[Ka>>2];g[ba>>2]=+g[Z>>2]+ +g[aa>>2];g[Ha>>2]=+g[ca>>2]-+g[Ga>>2];g[Ia>>2]=(+g[ba>>2]+ +g[Ha>>2])*.7071067690849304;g[Ma>>2]=(+g[Ha>>2]-+g[ba>>2])*.7071067690849304;g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Y>>2]-+g[Ia>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[La>>2]+ +g[Ma>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Y>>2]+ +g[Ia>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[La>>2]-+g[Ma>>2];g[Na>>2]=+g[W>>2]-+g[X>>2];g[Ra>>2]=+g[Ka>>2]+ +g[Ja>>2];g[Oa>>2]=+g[aa>>2]-+g[Z>>2];g[Pa>>2]=+g[ca>>2]+ +g[Ga>>2];g[Qa>>2]=(+g[Oa>>2]-+g[Pa>>2])*.7071067690849304;g[Sa>>2]=(+g[Oa>>2]+ +g[Pa>>2])*.7071067690849304;g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Na>>2]-+g[Qa>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ra>>2]+ +g[Sa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Na>>2]+ +g[Qa>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Ra>>2]-+g[Sa>>2];g[yb>>2]=(+g[ub>>2]-+g[xb>>2])*.7071067690849304;g[zb>>2]=+g[rb>>2]+ +g[yb>>2];g[ua>>2]=+g[rb>>2]-+g[yb>>2];g[ra>>2]=(+g[pa>>2]-+g[qa>>2])*.7071067690849304;g[sa>>2]=+g[oa>>2]-+g[ra>>2];g[ya>>2]=+g[oa>>2]+ +g[ra>>2];g[B>>2]=+g[v>>2]*.9238795042037964+ +g[A>>2]*.3826834261417389;g[ka>>2]=+g[ea>>2]*.3826834261417389-+g[ja>>2]*.9238795042037964;g[la>>2]=+g[B>>2]+ +g[ka>>2];g[ta>>2]=+g[ka>>2]-+g[B>>2];g[va>>2]=+g[v>>2]*.3826834261417389-+g[A>>2]*.9238795042037964;g[wa>>2]=+g[ja>>2]*.3826834261417389+ +g[ea>>2]*.9238795042037964;g[xa>>2]=+g[va>>2]-+g[wa>>2];g[za>>2]=+g[va>>2]+ +g[wa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[zb>>2]-+g[la>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[ya>>2]-+g[za>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[zb>>2]+ +g[la>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ya>>2]+ +g[za>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[sa>>2]-+g[ta>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[ua>>2]-+g[xa>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[sa>>2]+ +g[ta>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ua>>2]+ +g[xa>>2];g[Ba>>2]=(+g[qa>>2]+ +g[pa>>2])*.7071067690849304;g[Ca>>2]=+g[Aa>>2]+ +g[Ba>>2];g[Q>>2]=+g[Aa>>2]-+g[Ba>>2];g[N>>2]=(+g[ub>>2]+ +g[xb>>2])*.7071067690849304;g[O>>2]=+g[M>>2]-+g[N>>2];g[U>>2]=+g[M>>2]+ +g[N>>2];g[H>>2]=+g[F>>2]*.3826834261417389+ +g[G>>2]*.9238795042037964;g[K>>2]=+g[I>>2]*.9238795042037964-+g[J>>2]*.3826834261417389;g[L>>2]=+g[H>>2]+ +g[K>>2];g[P>>2]=+g[K>>2]-+g[H>>2];g[R>>2]=+g[F>>2]*.9238795042037964-+g[G>>2]*.3826834261417389;g[S>>2]=+g[J>>2]*.9238795042037964+ +g[I>>2]*.3826834261417389;g[T>>2]=+g[R>>2]-+g[S>>2];g[V>>2]=+g[R>>2]+ +g[S>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Ca>>2]-+g[L>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[U>>2]-+g[V>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Ca>>2]+ +g[L>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[U>>2]+ +g[V>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[O>>2]-+g[P>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Q>>2]-+g[T>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[O>>2]+ +g[P>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Q>>2]+ +g[T>>2];c[cc>>2]=(c[cc>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=c[q>>2]^c[2998];c[r>>2]=c[r>>2]^c[2998]}i=dc;return}function ri(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,8,456);i=b;return}function si(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0;nd=i;i=i+896|0;m=nd+884|0;n=nd+880|0;o=nd+876|0;p=nd+872|0;q=nd+868|0;r=nd+864|0;od=nd+860|0;s=nd+856|0;t=nd+852|0;md=nd+832|0;lc=nd+828|0;cb=nd+824|0;cc=nd+820|0;rc=nd+816|0;Dc=nd+812|0;Z=nd+808|0;xb=nd+804|0;ga=nd+800|0;fd=nd+796|0;oc=nd+792|0;pc=nd+788|0;Ib=nd+784|0;Lb=nd+780|0;eb=nd+776|0;Sb=nd+772|0;Tb=nd+768|0;ac=nd+764|0;vc=nd+760|0;wc=nd+756|0;xc=nd+752|0;A=nd+748|0;da=nd+744|0;ea=nd+740|0;va=nd+736|0;Aa=nd+732|0;$=nd+728|0;Na=nd+724|0;mb=nd+720|0;vb=nd+716|0;ka=nd+712|0;la=nd+708|0;ma=nd+704|0;Sc=nd+700|0;Zc=nd+696|0;_c=nd+692|0;Ra=nd+688|0;Ua=nd+684|0;db=nd+680|0;Pb=nd+676|0;Qb=nd+672|0;$b=nd+668|0;sc=nd+664|0;tc=nd+660|0;uc=nd+656|0;Ic=nd+652|0;Nc=nd+648|0;v=nd+644|0;I=nd+640|0;N=nd+636|0;_=nd+632|0;Ka=nd+628|0;La=nd+624|0;ub=nd+620|0;ha=nd+616|0;ia=nd+612|0;ja=nd+608|0;Mb=nd+604|0;V=nd+600|0;Bc=nd+596|0;ab=nd+592|0;kc=nd+588|0;Cc=nd+584|0;Y=nd+580|0;bb=nd+576|0;u=nd+572|0;Da=nd+568|0;zc=nd+564|0;Ac=nd+560|0;ic=nd+556|0;jc=nd+552|0;W=nd+548|0;X=nd+544|0;bd=nd+540|0;ra=nd+536|0;y=nd+532|0;Gb=nd+528|0;ed=nd+524|0;z=nd+520|0;ua=nd+516|0;Hb=nd+512|0;id=nd+508|0;wa=nd+504|0;D=nd+500|0;Jb=nd+496|0;ld=nd+492|0;E=nd+488|0;za=nd+484|0;Kb=nd+480|0;$c=nd+476|0;ad=nd+472|0;w=nd+468|0;x=nd+464|0;cd=nd+460|0;dd=nd+456|0;sa=nd+452|0;ta=nd+448|0;gd=nd+444|0;hd=nd+440|0;B=nd+436|0;C=nd+432|0;jd=nd+428|0;kd=nd+424|0;xa=nd+420|0;ya=nd+416|0;Oc=nd+412|0;Ca=nd+408|0;Gc=nd+404|0;Pa=nd+400|0;Rc=nd+396|0;Hc=nd+392|0;H=nd+388|0;Qa=nd+384|0;Vc=nd+380|0;J=nd+376|0;Lc=nd+372|0;Sa=nd+368|0;Yc=nd+364|0;Mc=nd+360|0;M=nd+356|0;Ta=nd+352|0;mc=nd+348|0;nc=nd+344|0;Ec=nd+340|0;Fc=nd+336|0;Pc=nd+332|0;Qc=nd+328|0;F=nd+324|0;G=nd+320|0;Tc=nd+316|0;Uc=nd+312|0;Jc=nd+308|0;Kc=nd+304|0;Wc=nd+300|0;Xc=nd+296|0;K=nd+292|0;L=nd+288|0;Eb=nd+284|0;qc=nd+280|0;Db=nd+276|0;Wa=nd+272|0;Ya=nd+268|0;Oa=nd+264|0;Va=nd+260|0;Xa=nd+256|0;Fb=nd+252|0;hb=nd+248|0;fb=nd+244|0;gb=nd+240|0;$a=nd+236|0;kb=nd+232|0;Za=nd+228|0;_a=nd+224|0;jb=nd+220|0;ib=nd+216|0;lb=nd+212|0;yc=nd+208|0;Nb=nd+204|0;Vb=nd+200|0;Xb=nd+196|0;Rb=nd+192|0;Ub=nd+188|0;Wb=nd+184|0;Ob=nd+180|0;bc=nd+176|0;dc=nd+172|0;ec=nd+168|0;_b=nd+164|0;hc=nd+160|0;Yb=nd+156|0;Zb=nd+152|0;gc=nd+148|0;fc=nd+144|0;Ha=nd+140|0;fa=nd+136|0;Ia=nd+132|0;ob=nd+128|0;qb=nd+124|0;Ma=nd+120|0;nb=nd+116|0;pb=nd+112|0;Ja=nd+108|0;wb=nd+104|0;yb=nd+100|0;zb=nd+96|0;tb=nd+92|0;Bb=nd+88|0;rb=nd+84|0;sb=nd+80|0;Cb=nd+76|0;Ab=nd+72|0;pa=nd+68|0;na=nd+64|0;oa=nd+60|0;P=nd+56|0;R=nd+52|0;Ba=nd+48|0;O=nd+44|0;Q=nd+40|0;qa=nd+36|0;ca=nd+32|0;aa=nd+28|0;ba=nd+24|0;U=nd+20|0;Fa=nd+16|0;S=nd+12|0;T=nd+8|0;Ga=nd+4|0;Ea=nd;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[od>>2]=j;c[s>>2]=k;c[t>>2]=l;g[nd+848>>2]=.5877852439880371;g[nd+844>>2]=.9510565400123596;g[nd+840>>2]=.25;g[nd+836>>2]=.55901700258255;c[md>>2]=c[od>>2];while(1){if((c[md>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[Mb>>2]=+g[u>>2]+ +g[Da>>2];g[V>>2]=+g[u>>2]-+g[Da>>2];g[zc>>2]=+g[c[n>>2]>>2];g[Ac>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[Bc>>2]=+g[zc>>2]-+g[Ac>>2];g[ab>>2]=+g[zc>>2]+ +g[Ac>>2];g[ic>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[jc>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*15<<2)>>2];g[kc>>2]=+g[ic>>2]+ +g[jc>>2];g[Cc>>2]=+g[ic>>2]-+g[jc>>2];g[W>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[X>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*15<<2)>>2];g[Y>>2]=+g[W>>2]-+g[X>>2];g[bb>>2]=+g[W>>2]+ +g[X>>2];g[lc>>2]=+g[Mb>>2]-+g[kc>>2];g[cb>>2]=+g[ab>>2]-+g[bb>>2];g[cc>>2]=+g[ab>>2]+ +g[bb>>2];g[rc>>2]=+g[Mb>>2]+ +g[kc>>2];g[Dc>>2]=+g[Bc>>2]-+g[Cc>>2];g[Z>>2]=+g[V>>2]-+g[Y>>2];g[xb>>2]=+g[V>>2]+ +g[Y>>2];g[ga>>2]=+g[Cc>>2]+ +g[Bc>>2];g[$c>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<3<<2)>>2];g[ad>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*18<<2)>>2];g[bd>>2]=+g[$c>>2]+ +g[ad>>2];g[ra>>2]=+g[$c>>2]-+g[ad>>2];g[w>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<3<<2)>>2];g[x>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*18<<2)>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[Gb>>2]=+g[w>>2]+ +g[x>>2];g[cd>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*13<<2)>>2];g[dd>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[ed>>2]=+g[cd>>2]+ +g[dd>>2];g[z>>2]=+g[cd>>2]-+g[dd>>2];g[sa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*13<<2)>>2];g[ta>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[ua>>2]=+g[sa>>2]-+g[ta>>2];g[Hb>>2]=+g[sa>>2]+ +g[ta>>2];g[gd>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*12<<2)>>2];g[hd>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[id>>2]=+g[gd>>2]+ +g[hd>>2];g[wa>>2]=+g[gd>>2]-+g[hd>>2];g[B>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*12<<2)>>2];g[C>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[D>>2]=+g[B>>2]-+g[C>>2];g[Jb>>2]=+g[B>>2]+ +g[C>>2];g[jd>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*17<<2)>>2];g[kd>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[ld>>2]=+g[jd>>2]+ +g[kd>>2];g[E>>2]=+g[jd>>2]-+g[kd>>2];g[xa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*17<<2)>>2];g[ya>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[za>>2]=+g[xa>>2]-+g[ya>>2];g[Kb>>2]=+g[xa>>2]+ +g[ya>>2];g[fd>>2]=+g[bd>>2]-+g[ed>>2];g[oc>>2]=+g[id>>2]-+g[ld>>2];g[pc>>2]=+g[fd>>2]+ +g[oc>>2];g[Ib>>2]=+g[Gb>>2]-+g[Hb>>2];g[Lb>>2]=+g[Jb>>2]-+g[Kb>>2];g[eb>>2]=+g[Ib>>2]+ +g[Lb>>2];g[Sb>>2]=+g[Gb>>2]+ +g[Hb>>2];g[Tb>>2]=+g[Jb>>2]+ +g[Kb>>2];g[ac>>2]=+g[Sb>>2]+ +g[Tb>>2];g[vc>>2]=+g[bd>>2]+ +g[ed>>2];g[wc>>2]=+g[id>>2]+ +g[ld>>2];g[xc>>2]=+g[vc>>2]+ +g[wc>>2];g[A>>2]=+g[y>>2]-+g[z>>2];g[da>>2]=+g[D>>2]-+g[E>>2];g[ea>>2]=+g[A>>2]+ +g[da>>2];g[va>>2]=+g[ra>>2]-+g[ua>>2];g[Aa>>2]=+g[wa>>2]-+g[za>>2];g[$>>2]=+g[va>>2]+ +g[Aa>>2];g[Na>>2]=+g[ra>>2]+ +g[ua>>2];g[mb>>2]=+g[wa>>2]+ +g[za>>2];g[vb>>2]=+g[Na>>2]+ +g[mb>>2];g[ka>>2]=+g[z>>2]+ +g[y>>2];g[la>>2]=+g[E>>2]+ +g[D>>2];g[ma>>2]=+g[ka>>2]+ +g[la>>2];g[mc>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2<<2)>>2];g[nc>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*14<<2)>>2];g[Oc>>2]=+g[mc>>2]+ +g[nc>>2];g[Ca>>2]=+g[mc>>2]-+g[nc>>2];g[Ec>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2<<2)>>2];g[Fc>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*14<<2)>>2];g[Gc>>2]=+g[Ec>>2]-+g[Fc>>2];g[Pa>>2]=+g[Ec>>2]+ +g[Fc>>2];g[Pc>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[Qc>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*19<<2)>>2];g[Rc>>2]=+g[Pc>>2]+ +g[Qc>>2];g[Hc>>2]=+g[Pc>>2]-+g[Qc>>2];g[F>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[G>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*19<<2)>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[Qa>>2]=+g[F>>2]+ +g[G>>2];g[Tc>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<4<<2)>>2];g[Uc>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[Vc>>2]=+g[Tc>>2]+ +g[Uc>>2];g[J>>2]=+g[Tc>>2]-+g[Uc>>2];g[Jc>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<4<<2)>>2];g[Kc>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[Lc>>2]=+g[Jc>>2]-+g[Kc>>2];g[Sa>>2]=+g[Jc>>2]+ +g[Kc>>2];g[Wc>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[Xc>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[Yc>>2]=+g[Wc>>2]+ +g[Xc>>2];g[Mc>>2]=+g[Wc>>2]-+g[Xc>>2];g[K>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[L>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[M>>2]=+g[K>>2]-+g[L>>2];g[Ta>>2]=+g[K>>2]+ +g[L>>2];g[Sc>>2]=+g[Oc>>2]-+g[Rc>>2];g[Zc>>2]=+g[Vc>>2]-+g[Yc>>2];g[_c>>2]=+g[Sc>>2]+ +g[Zc>>2];g[Ra>>2]=+g[Pa>>2]-+g[Qa>>2];g[Ua>>2]=+g[Sa>>2]-+g[Ta>>2];g[db>>2]=+g[Ra>>2]+ +g[Ua>>2];g[Pb>>2]=+g[Pa>>2]+ +g[Qa>>2];g[Qb>>2]=+g[Sa>>2]+ +g[Ta>>2];g[$b>>2]=+g[Pb>>2]+ +g[Qb>>2];g[sc>>2]=+g[Oc>>2]+ +g[Rc>>2];g[tc>>2]=+g[Vc>>2]+ +g[Yc>>2];g[uc>>2]=+g[sc>>2]+ +g[tc>>2];g[Ic>>2]=+g[Gc>>2]-+g[Hc>>2];g[Nc>>2]=+g[Lc>>2]-+g[Mc>>2];g[v>>2]=+g[Ic>>2]+ +g[Nc>>2];g[I>>2]=+g[Ca>>2]-+g[H>>2];g[N>>2]=+g[J>>2]-+g[M>>2];g[_>>2]=+g[I>>2]+ +g[N>>2];g[Ka>>2]=+g[Ca>>2]+ +g[H>>2];g[La>>2]=+g[J>>2]+ +g[M>>2];g[ub>>2]=+g[Ka>>2]+ +g[La>>2];g[ha>>2]=+g[Hc>>2]+ +g[Gc>>2];g[ia>>2]=+g[Mc>>2]+ +g[Lc>>2];g[ja>>2]=+g[ha>>2]+ +g[ia>>2];g[Eb>>2]=(+g[_c>>2]-+g[pc>>2])*.55901700258255;g[qc>>2]=+g[_c>>2]+ +g[pc>>2];g[Db>>2]=+g[lc>>2]-+g[qc>>2]*.25;g[Oa>>2]=+g[Ib>>2]-+g[Lb>>2];g[Va>>2]=+g[Ra>>2]-+g[Ua>>2];g[Wa>>2]=+g[Oa>>2]*.9510565400123596-+g[Va>>2]*.5877852439880371;g[Ya>>2]=+g[Va>>2]*.9510565400123596+ +g[Oa>>2]*.5877852439880371;g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[lc>>2]+ +g[qc>>2];g[Xa>>2]=+g[Eb>>2]+ +g[Db>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Xa>>2]-+g[Ya>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Xa>>2]+ +g[Ya>>2];g[Fb>>2]=+g[Db>>2]-+g[Eb>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Fb>>2]-+g[Wa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*18<<2)>>2]=+g[Fb>>2]+ +g[Wa>>2];g[hb>>2]=(+g[db>>2]-+g[eb>>2])*.55901700258255;g[fb>>2]=+g[db>>2]+ +g[eb>>2];g[gb>>2]=+g[cb>>2]-+g[fb>>2]*.25;g[Za>>2]=+g[fd>>2]-+g[oc>>2];g[_a>>2]=+g[Sc>>2]-+g[Zc>>2];g[$a>>2]=+g[Za>>2]*.9510565400123596-+g[_a>>2]*.5877852439880371;g[kb>>2]=+g[_a>>2]*.9510565400123596+ +g[Za>>2]*.5877852439880371;g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[cb>>2]+ +g[fb>>2];g[jb>>2]=+g[hb>>2]+ +g[gb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[jb>>2]-+g[kb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[kb>>2]+ +g[jb>>2];g[ib>>2]=+g[gb>>2]-+g[hb>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[$a>>2]+ +g[ib>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*18<<2)>>2]=+g[ib>>2]-+g[$a>>2];g[lb>>2]=(+g[uc>>2]-+g[xc>>2])*.55901700258255;g[yc>>2]=+g[uc>>2]+ +g[xc>>2];g[Nb>>2]=+g[rc>>2]-+g[yc>>2]*.25;g[Rb>>2]=+g[Pb>>2]-+g[Qb>>2];g[Ub>>2]=+g[Sb>>2]-+g[Tb>>2];g[Vb>>2]=+g[Rb>>2]*.9510565400123596+ +g[Ub>>2]*.5877852439880371;g[Xb>>2]=+g[Ub>>2]*.9510565400123596-+g[Rb>>2]*.5877852439880371;g[c[o>>2]>>2]=+g[rc>>2]+ +g[yc>>2];g[Wb>>2]=+g[Nb>>2]-+g[lb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Wb>>2]-+g[Xb>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Wb>>2]+ +g[Xb>>2];g[Ob>>2]=+g[lb>>2]+ +g[Nb>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ob>>2]-+g[Vb>>2];g[(c[o>>2]|0)+(c[r>>2]<<4<<2)>>2]=+g[Ob>>2]+ +g[Vb>>2];g[bc>>2]=(+g[$b>>2]-+g[ac>>2])*.55901700258255;g[dc>>2]=+g[$b>>2]+ +g[ac>>2];g[ec>>2]=+g[cc>>2]-+g[dc>>2]*.25;g[Yb>>2]=+g[sc>>2]-+g[tc>>2];g[Zb>>2]=+g[vc>>2]-+g[wc>>2];g[_b>>2]=+g[Yb>>2]*.9510565400123596+ +g[Zb>>2]*.5877852439880371;g[hc>>2]=+g[Zb>>2]*.9510565400123596-+g[Yb>>2]*.5877852439880371;g[c[p>>2]>>2]=+g[cc>>2]+ +g[dc>>2];g[gc>>2]=+g[ec>>2]-+g[bc>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[gc>>2]-+g[hc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[hc>>2]+ +g[gc>>2];g[fc>>2]=+g[bc>>2]+ +g[ec>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[_b>>2]+ +g[fc>>2];g[(c[p>>2]|0)+(c[r>>2]<<4<<2)>>2]=+g[fc>>2]-+g[_b>>2];g[Ha>>2]=(+g[v>>2]-+g[ea>>2])*.55901700258255;g[fa>>2]=+g[v>>2]+ +g[ea>>2];g[Ia>>2]=+g[Dc>>2]-+g[fa>>2]*.25;g[Ma>>2]=+g[Ka>>2]-+g[La>>2];g[nb>>2]=+g[Na>>2]-+g[mb>>2];g[ob>>2]=+g[Ma>>2]*.9510565400123596+ +g[nb>>2]*.5877852439880371;g[qb>>2]=+g[nb>>2]*.9510565400123596-+g[Ma>>2]*.5877852439880371;g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Dc>>2]+ +g[fa>>2];g[pb>>2]=+g[Ia>>2]-+g[Ha>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[pb>>2]-+g[qb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*17<<2)>>2]=+g[pb>>2]+ +g[qb>>2];g[Ja>>2]=+g[Ha>>2]+ +g[Ia>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Ja>>2]-+g[ob>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Ja>>2]+ +g[ob>>2];g[wb>>2]=(+g[ub>>2]-+g[vb>>2])*.55901700258255;g[yb>>2]=+g[ub>>2]+ +g[vb>>2];g[zb>>2]=+g[xb>>2]-+g[yb>>2]*.25;g[rb>>2]=+g[Ic>>2]-+g[Nc>>2];g[sb>>2]=+g[A>>2]-+g[da>>2];g[tb>>2]=+g[rb>>2]*.9510565400123596+ +g[sb>>2]*.5877852439880371;g[Bb>>2]=+g[sb>>2]*.9510565400123596-+g[rb>>2]*.5877852439880371;g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[xb>>2]+ +g[yb>>2];g[Cb>>2]=+g[zb>>2]-+g[wb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Bb>>2]+ +g[Cb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*17<<2)>>2]=+g[Cb>>2]-+g[Bb>>2];g[Ab>>2]=+g[wb>>2]+ +g[zb>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[tb>>2]+ +g[Ab>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Ab>>2]-+g[tb>>2];g[pa>>2]=(+g[ja>>2]-+g[ma>>2])*.55901700258255;g[na>>2]=+g[ja>>2]+ +g[ma>>2];g[oa>>2]=+g[ga>>2]-+g[na>>2]*.25;g[Ba>>2]=+g[va>>2]-+g[Aa>>2];g[O>>2]=+g[I>>2]-+g[N>>2];g[P>>2]=+g[Ba>>2]*.9510565400123596-+g[O>>2]*.5877852439880371;g[R>>2]=+g[O>>2]*.9510565400123596+ +g[Ba>>2]*.5877852439880371;g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[ga>>2]+ +g[na>>2];g[Q>>2]=+g[pa>>2]+ +g[oa>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Q>>2]-+g[R>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*19<<2)>>2]=+g[Q>>2]+ +g[R>>2];g[qa>>2]=+g[oa>>2]-+g[pa>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[qa>>2]-+g[P>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[qa>>2]+ +g[P>>2];g[ca>>2]=(+g[_>>2]-+g[$>>2])*.55901700258255;g[aa>>2]=+g[_>>2]+ +g[$>>2];g[ba>>2]=+g[Z>>2]-+g[aa>>2]*.25;g[S>>2]=+g[ka>>2]-+g[la>>2];g[T>>2]=+g[ha>>2]-+g[ia>>2];g[U>>2]=+g[S>>2]*.9510565400123596-+g[T>>2]*.5877852439880371;g[Fa>>2]=+g[T>>2]*.9510565400123596+ +g[S>>2]*.5877852439880371;g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[Z>>2]+ +g[aa>>2];g[Ga>>2]=+g[ca>>2]+ +g[ba>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Fa>>2]+ +g[Ga>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*19<<2)>>2]=+g[Ga>>2]-+g[Fa>>2];g[Ea>>2]=+g[ba>>2]-+g[ca>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[U>>2]+ +g[Ea>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Ea>>2]-+g[U>>2];c[md>>2]=(c[md>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=c[q>>2]^c[2998];c[r>>2]=c[r>>2]^c[2998]}i=nd;return}function ti(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,9,520);i=b;return} +function Mj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0;ai=i;i=i+1984|0;k=ai+1980|0;l=ai+1976|0;m=ai+1972|0;n=ai+1968|0;bi=ai+1964|0;o=ai+1960|0;p=ai+1956|0;$h=ai+1872|0;za=ai+1868|0;_d=ai+1864|0;Ib=ai+1860|0;hf=ai+1856|0;$g=ai+1852|0;Eh=ai+1848|0;x=ai+1844|0;v=ai+1840|0;Fh=ai+1836|0;ah=ai+1832|0;Ba=ai+1828|0;Da=ai+1824|0;Yh=ai+1820|0;sh=ai+1816|0;fa=ai+1812|0;ch=ai+1808|0;ea=ai+1804|0;wh=ai+1800|0;R=ai+1796|0;Hh=ai+1792|0;T=ai+1788|0;Lh=ai+1784|0;z=ai+1780|0;Ab=ai+1776|0;ba=ai+1772|0;yb=ai+1768|0;Oh=ai+1764|0;Ph=ai+1760|0;Qh=ai+1756|0;Sh=ai+1752|0;Ia=ai+1748|0;Fb=ai+1744|0;zh=ai+1740|0;sb=ai+1736|0;ub=ai+1732|0;Db=ai+1728|0;hh=ai+1724|0;fh=ai+1720|0;r=ai+1716|0;ga=ai+1712|0;F=ai+1708|0;Ga=ai+1704|0;C=ai+1700|0;H=ai+1696|0;W=ai+1692|0;Y=ai+1688|0;ya=ai+1684|0;ia=ai+1680|0;Xh=ai+1676|0;uh=ai+1672|0;_h=ai+1668|0;rh=ai+1664|0;Wh=ai+1660|0;vh=ai+1656|0;bh=ai+1652|0;qh=ai+1648|0;Rc=ai+1644|0;Dh=ai+1640|0;rg=ai+1636|0;Ch=ai+1632|0;w=ai+1628|0;y=ai+1624|0;Bh=ai+1620|0;Gh=ai+1616|0;$=ai+1612|0;aa=ai+1608|0;Jh=ai+1604|0;Kh=ai+1600|0;q=ai+1596|0;mg=ai+1592|0;Pa=ai+1588|0;Bf=ai+1584|0;lh=ai+1580|0;Qa=ai+1576|0;lg=ai+1572|0;Cf=ai+1568|0;Gf=ai+1564|0;Eg=ai+1560|0;Ya=ai+1556|0;be=ai+1552|0;Ma=ai+1548|0;bg=ai+1544|0;_c=ai+1540|0;Ge=ai+1536|0;ld=ai+1532|0;$e=ai+1528|0;Jd=ai+1524|0;Fe=ai+1520|0;kd=ai+1516|0;cf=ai+1512|0;na=ai+1508|0;Zf=ai+1504|0;Jb=ai+1500|0;ve=ai+1496|0;ad=ai+1492|0;fe=ai+1488|0;sc=ai+1484|0;we=ai+1480|0;bd=ai+1476|0;ie=ai+1472|0;M=ai+1468|0;_f=ai+1464|0;Ec=ai+1460|0;ze=ai+1456|0;dd=ai+1452|0;Pe=ai+1448|0;Pc=ai+1444|0;ye=ai+1440|0;ed=ai+1436|0;Me=ai+1432|0;lb=ai+1428|0;ag=ai+1424|0;cc=ai+1420|0;Ce=ai+1416|0;id=ai+1412|0;Ue=ai+1408|0;nc=ai+1404|0;De=ai+1400|0;hd=ai+1396|0;Xe=ai+1392|0;Nh=ai+1388|0;Sa=ai+1384|0;Uh=ai+1380|0;Ta=ai+1376|0;Vh=ai+1372|0;jg=ai+1368|0;eh=ai+1364|0;Va=ai+1360|0;jh=ai+1356|0;Wa=ai+1352|0;kh=ai+1348|0;kg=ai+1344|0;Ih=ai+1340|0;Mh=ai+1336|0;Rh=ai+1332|0;Th=ai+1328|0;Zh=ai+1324|0;dh=ai+1320|0;gh=ai+1316|0;ih=ai+1312|0;Ef=ai+1308|0;Ff=ai+1304|0;Ua=ai+1300|0;Xa=ai+1296|0;ob=ai+1292|0;Cd=ai+1288|0;Vc=ai+1284|0;Yc=ai+1280|0;Hd=ai+1276|0;Gd=ai+1272|0;$c=ai+1268|0;Ad=ai+1264|0;Dd=ai+1260|0;xb=ai+1256|0;Ka=ai+1252|0;La=ai+1248|0;mb=ai+1244|0;nb=ai+1240|0;rb=ai+1236|0;Tc=ai+1232|0;Hb=ai+1228|0;Xc=ai+1224|0;wb=ai+1220|0;Uc=ai+1216|0;Cb=ai+1212|0;Wc=ai+1208|0;pb=ai+1204|0;qb=ai+1200|0;Eb=ai+1196|0;Gb=ai+1192|0;tb=ai+1188|0;vb=ai+1184|0;zb=ai+1180|0;Bb=ai+1176|0;Zc=ai+1172|0;_e=ai+1168|0;Sc=ai+1164|0;Ze=ai+1160|0;pc=ai+1156|0;qc=ai+1152|0;Id=ai+1148|0;af=ai+1144|0;Fd=ai+1140|0;bf=ai+1136|0;Bd=ai+1132|0;Ed=ai+1128|0;ph=ai+1124|0;Nb=ai+1120|0;db=ai+1116|0;gb=ai+1112|0;Sb=ai+1108|0;Rb=ai+1104|0;Kb=ai+1100|0;Lb=ai+1096|0;Ob=ai+1092|0;u=ai+1088|0;la=ai+1084|0;ma=ai+1080|0;nh=ai+1076|0;oh=ai+1072|0;yh=ai+1068|0;bb=ai+1064|0;ka=ai+1060|0;fb=ai+1056|0;t=ai+1052|0;cb=ai+1048|0;da=ai+1044|0;eb=ai+1040|0;th=ai+1036|0;xh=ai+1032|0;ha=ai+1028|0;ja=ai+1024|0;Ah=ai+1020|0;s=ai+1016|0;A=ai+1012|0;ca=ai+1008|0;hb=ai+1004|0;ee=ai+1e3|0;ab=ai+996|0;de=ai+992|0;_a=ai+988|0;$a=ai+984|0;rc=ai+980|0;ge=ai+976|0;Qb=ai+972|0;he=ai+968|0;Mb=ai+964|0;Pb=ai+960|0;qa=ai+956|0;Ic=ai+952|0;zc=ai+948|0;Cc=ai+944|0;Nc=ai+940|0;Mc=ai+936|0;Fc=ai+932|0;Gc=ai+928|0;Jc=ai+924|0;xa=ai+920|0;K=ai+916|0;L=ai+912|0;oa=ai+908|0;pa=ai+904|0;ta=ai+900|0;xc=ai+896|0;J=ai+892|0;Bc=ai+888|0;wa=ai+884|0;yc=ai+880|0;E=ai+876|0;Ac=ai+872|0;ra=ai+868|0;sa=ai+864|0;G=ai+860|0;I=ai+856|0;ua=ai+852|0;va=ai+848|0;B=ai+844|0;D=ai+840|0;Dc=ai+836|0;Oe=ai+832|0;wc=ai+828|0;Ne=ai+824|0;uc=ai+820|0;vc=ai+816|0;Oc=ai+812|0;Ke=ai+808|0;Lc=ai+804|0;Le=ai+800|0;Hc=ai+796|0;Kc=ai+792|0;Q=ai+788|0;gc=ai+784|0;Zb=ai+780|0;ac=ai+776|0;lc=ai+772|0;kc=ai+768|0;dc=ai+764|0;ec=ai+760|0;hc=ai+756|0;Aa=ai+752|0;jb=ai+748|0;kb=ai+744|0;O=ai+740|0;P=ai+736|0;V=ai+732|0;Xb=ai+728|0;ib=ai+724|0;$b=ai+720|0;_=ai+716|0;Yb=ai+712|0;Fa=ai+708|0;_b=ai+704|0;S=ai+700|0;U=ai+696|0;Ha=ai+692|0;Ja=ai+688|0;X=ai+684|0;Z=ai+680|0;Ca=ai+676|0;Ea=ai+672|0;bc=ai+668|0;Te=ai+664|0;Wb=ai+660|0;Se=ai+656|0;Ub=ai+652|0;Vb=ai+648|0;mc=ai+644|0;Ve=ai+640|0;jc=ai+636|0;We=ai+632|0;fc=ai+628|0;ic=ai+624|0;dg=ai+620|0;fg=ai+616|0;mh=ai+612|0;Oa=ai+608|0;Wf=ai+604|0;Xf=ai+600|0;eg=ai+596|0;Yf=ai+592|0;$f=ai+588|0;cg=ai+584|0;N=ai+580|0;Na=ai+576|0;vf=ai+572|0;wf=ai+568|0;ng=ai+564|0;ig=ai+560|0;og=ai+556|0;pg=ai+552|0;xf=ai+548|0;qg=ai+544|0;tf=ai+540|0;uf=ai+536|0;gg=ai+532|0;hg=ai+528|0;Za=ai+524|0;Zd=ai+520|0;Hf=ai+516|0;sg=ai+512|0;Md=ai+508|0;Mf=ai+504|0;Nd=ai+500|0;Lf=ai+496|0;ud=ai+492|0;tg=ai+488|0;xd=ai+484|0;Sf=ai+480|0;od=ai+476|0;xg=ai+472|0;pd=ai+468|0;wg=ai+464|0;Sd=ai+460|0;If=ai+456|0;Vd=ai+452|0;Af=ai+448|0;Ra=ai+444|0;Df=ai+440|0;tc=ai+436|0;Qc=ai+432|0;Tb=ai+428|0;oc=ai+424|0;Kd=ai+420|0;Ld=ai+416|0;sd=ai+412|0;td=ai+408|0;Qf=ai+404|0;vd=ai+400|0;wd=ai+396|0;Rf=ai+392|0;cd=ai+388|0;fd=ai+384|0;gd=ai+380|0;jd=ai+376|0;md=ai+372|0;nd=ai+368|0;Qd=ai+364|0;Rd=ai+360|0;yf=ai+356|0;Td=ai+352|0;Ud=ai+348|0;zf=ai+344|0;Wd=ai+340|0;Yd=ai+336|0;Pd=ai+332|0;Xd=ai+328|0;Od=ai+324|0;Nf=ai+320|0;Of=ai+316|0;Kf=ai+312|0;Pf=ai+308|0;Jf=ai+304|0;yd=ai+300|0;$d=ai+296|0;rd=ai+292|0;zd=ai+288|0;qd=ai+284|0;yg=ai+280|0;zg=ai+276|0;vg=ai+272|0;Ag=ai+268|0;ug=ai+264|0;ce=ai+260|0;ue=ai+256|0;Gg=ai+252|0;Sg=ai+248|0;ff=ai+244|0;Lg=ai+240|0;gf=ai+236|0;Kg=ai+232|0;pf=ai+228|0;Tg=ai+224|0;sf=ai+220|0;Rg=ai+216|0;jf=ai+212|0;Xg=ai+208|0;kf=ai+204|0;Wg=ai+200|0;ne=ai+196|0;Hg=ai+192|0;qe=ai+188|0;Dg=ai+184|0;ae=ai+180|0;Fg=ai+176|0;Je=ai+172|0;Qe=ai+168|0;Re=ai+164|0;Ye=ai+160|0;df=ai+156|0;ef=ai+152|0;nf=ai+148|0;of=ai+144|0;Pg=ai+140|0;qf=ai+136|0;rf=ai+132|0;Qg=ai+128|0;xe=ai+124|0;Ae=ai+120|0;Be=ai+116|0;Ee=ai+112|0;He=ai+108|0;Ie=ai+104|0;le=ai+100|0;me=ai+96|0;Bg=ai+92|0;oe=ai+88|0;pe=ai+84|0;Cg=ai+80|0;re=ai+76|0;te=ai+72|0;ke=ai+68|0;se=ai+64|0;je=ai+60|0;Mg=ai+56|0;Ng=ai+52|0;Jg=ai+48|0;Og=ai+44|0;Ig=ai+40|0;Tf=ai+36|0;Vf=ai+32|0;mf=ai+28|0;Uf=ai+24|0;lf=ai+20|0;Yg=ai+16|0;Zg=ai+12|0;Vg=ai+8|0;_g=ai+4|0;Ug=ai;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[bi>>2]=f;c[o>>2]=h;c[p>>2]=j;g[ai+1952>>2]=.9980267286300659;g[ai+1948>>2]=.06279052048921585;g[ai+1944>>2]=.4257792830467224;g[ai+1940>>2]=.9048270583152771;g[ai+1936>>2]=.9921147227287292;g[ai+1932>>2]=.12533323466777802;g[ai+1928>>2]=.6374239921569824;g[ai+1924>>2]=.7705132365226746;g[ai+1920>>2]=.6845471262931824;g[ai+1916>>2]=.728968620300293;g[ai+1912>>2]=.4817536771297455;g[ai+1908>>2]=.8763066530227661;g[ai+1904>>2]=.8443279266357422;g[ai+1900>>2]=.5358268022537231;g[ai+1896>>2]=.24868988990783691;g[ai+1892>>2]=.9685831665992737;g[ai+1888>>2]=.5877852439880371;g[ai+1884>>2]=.9510565400123596;g[ai+1880>>2]=.25;g[ai+1876>>2]=.55901700258255;c[$h>>2]=c[bi>>2];c[m>>2]=(c[m>>2]|0)+(c[bi>>2]<<3<<2);while(1){if((c[$h>>2]|0)>=(c[o>>2]|0))break;g[za>>2]=+g[c[m>>2]>>2];g[_d>>2]=+g[(c[m>>2]|0)+4>>2];g[Ib>>2]=+g[(c[m>>2]|0)+8>>2];g[hf>>2]=+g[(c[m>>2]|0)+12>>2];g[Rc>>2]=+g[za>>2]*+g[Ib>>2];g[Dh>>2]=+g[_d>>2]*+g[Ib>>2];g[rg>>2]=+g[_d>>2]*+g[hf>>2];g[Ch>>2]=+g[za>>2]*+g[hf>>2];g[$g>>2]=+g[Rc>>2]-+g[rg>>2];g[Eh>>2]=+g[Ch>>2]+ +g[Dh>>2];g[x>>2]=+g[Ch>>2]-+g[Dh>>2];g[v>>2]=+g[Rc>>2]+ +g[rg>>2];g[Fh>>2]=+g[(c[m>>2]|0)+20>>2];g[Xh>>2]=+g[_d>>2]*+g[Fh>>2];g[uh>>2]=+g[Ib>>2]*+g[Fh>>2];g[_h>>2]=+g[za>>2]*+g[Fh>>2];g[rh>>2]=+g[hf>>2]*+g[Fh>>2];g[ah>>2]=+g[(c[m>>2]|0)+16>>2];g[Wh>>2]=+g[za>>2]*+g[ah>>2];g[vh>>2]=+g[hf>>2]*+g[ah>>2];g[bh>>2]=+g[_d>>2]*+g[ah>>2];g[qh>>2]=+g[Ib>>2]*+g[ah>>2];g[Ba>>2]=+g[qh>>2]-+g[rh>>2];g[Da>>2]=+g[uh>>2]+ +g[vh>>2];g[Yh>>2]=+g[Wh>>2]-+g[Xh>>2];g[sh>>2]=+g[qh>>2]+ +g[rh>>2];g[fa>>2]=+g[_h>>2]-+g[bh>>2];g[ch>>2]=+g[_h>>2]+ +g[bh>>2];g[ea>>2]=+g[Wh>>2]+ +g[Xh>>2];g[wh>>2]=+g[uh>>2]-+g[vh>>2];g[w>>2]=+g[v>>2]*+g[ah>>2];g[y>>2]=+g[x>>2]*+g[Fh>>2];g[R>>2]=+g[w>>2]+ +g[y>>2];g[Bh>>2]=+g[$g>>2]*+g[ah>>2];g[Gh>>2]=+g[Eh>>2]*+g[Fh>>2];g[Hh>>2]=+g[Bh>>2]+ +g[Gh>>2];g[$>>2]=+g[v>>2]*+g[Fh>>2];g[aa>>2]=+g[x>>2]*+g[ah>>2];g[T>>2]=+g[$>>2]-+g[aa>>2];g[Jh>>2]=+g[$g>>2]*+g[Fh>>2];g[Kh>>2]=+g[Eh>>2]*+g[ah>>2];g[Lh>>2]=+g[Jh>>2]-+g[Kh>>2];g[z>>2]=+g[w>>2]-+g[y>>2];g[Ab>>2]=+g[Jh>>2]+ +g[Kh>>2];g[ba>>2]=+g[$>>2]+ +g[aa>>2];g[yb>>2]=+g[Bh>>2]-+g[Gh>>2];g[Oh>>2]=+g[(c[m>>2]|0)+24>>2];g[Ph>>2]=+g[(c[m>>2]|0)+28>>2];g[Qh>>2]=+g[$g>>2]*+g[Oh>>2]+ +g[Eh>>2]*+g[Ph>>2];g[Sh>>2]=+g[$g>>2]*+g[Ph>>2]-+g[Eh>>2]*+g[Oh>>2];g[Ia>>2]=+g[R>>2]*+g[Ph>>2]-+g[T>>2]*+g[Oh>>2];g[Fb>>2]=+g[sh>>2]*+g[Ph>>2]-+g[wh>>2]*+g[Oh>>2];g[zh>>2]=+g[Ib>>2]*+g[Oh>>2]+ +g[hf>>2]*+g[Ph>>2];g[sb>>2]=+g[za>>2]*+g[Oh>>2]+ +g[_d>>2]*+g[Ph>>2];g[ub>>2]=+g[za>>2]*+g[Ph>>2]-+g[_d>>2]*+g[Oh>>2];g[Db>>2]=+g[sh>>2]*+g[Oh>>2]+ +g[wh>>2]*+g[Ph>>2];g[hh>>2]=+g[ah>>2]*+g[Ph>>2]-+g[Fh>>2]*+g[Oh>>2];g[fh>>2]=+g[ah>>2]*+g[Oh>>2]+ +g[Fh>>2]*+g[Ph>>2];g[r>>2]=+g[Ib>>2]*+g[Ph>>2]-+g[hf>>2]*+g[Oh>>2];g[ga>>2]=+g[ea>>2]*+g[Oh>>2]+ +g[fa>>2]*+g[Ph>>2];g[F>>2]=+g[Hh>>2]*+g[Oh>>2]+ +g[Lh>>2]*+g[Ph>>2];g[Ga>>2]=+g[R>>2]*+g[Oh>>2]+ +g[T>>2]*+g[Ph>>2];g[C>>2]=+g[Yh>>2]*+g[Ph>>2]-+g[ch>>2]*+g[Oh>>2];g[H>>2]=+g[Hh>>2]*+g[Ph>>2]-+g[Lh>>2]*+g[Oh>>2];g[W>>2]=+g[v>>2]*+g[Oh>>2]+ +g[x>>2]*+g[Ph>>2];g[Y>>2]=+g[v>>2]*+g[Ph>>2]-+g[x>>2]*+g[Oh>>2];g[ya>>2]=+g[Yh>>2]*+g[Oh>>2]+ +g[ch>>2]*+g[Ph>>2];g[ia>>2]=+g[ea>>2]*+g[Ph>>2]-+g[fa>>2]*+g[Oh>>2];g[q>>2]=+g[c[k>>2]>>2];g[mg>>2]=+g[c[l>>2]>>2];g[Ih>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Mh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Nh>>2]=+g[Hh>>2]*+g[Ih>>2]+ +g[Lh>>2]*+g[Mh>>2];g[Sa>>2]=+g[Hh>>2]*+g[Mh>>2]-+g[Lh>>2]*+g[Ih>>2];g[Rh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[Th>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[Uh>>2]=+g[Qh>>2]*+g[Rh>>2]+ +g[Sh>>2]*+g[Th>>2];g[Ta>>2]=+g[Qh>>2]*+g[Th>>2]-+g[Sh>>2]*+g[Rh>>2];g[Vh>>2]=+g[Nh>>2]+ +g[Uh>>2];g[jg>>2]=+g[Sa>>2]+ +g[Ta>>2];g[Zh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[dh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[eh>>2]=+g[Yh>>2]*+g[Zh>>2]+ +g[ch>>2]*+g[dh>>2];g[Va>>2]=+g[Yh>>2]*+g[dh>>2]-+g[ch>>2]*+g[Zh>>2];g[gh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[ih>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[jh>>2]=+g[fh>>2]*+g[gh>>2]+ +g[hh>>2]*+g[ih>>2];g[Wa>>2]=+g[fh>>2]*+g[ih>>2]-+g[hh>>2]*+g[gh>>2];g[kh>>2]=+g[eh>>2]+ +g[jh>>2];g[kg>>2]=+g[Va>>2]+ +g[Wa>>2];g[Pa>>2]=(+g[Vh>>2]-+g[kh>>2])*.55901700258255;g[Bf>>2]=(+g[jg>>2]-+g[kg>>2])*.55901700258255;g[lh>>2]=+g[Vh>>2]+ +g[kh>>2];g[Qa>>2]=+g[q>>2]-+g[lh>>2]*.25;g[lg>>2]=+g[jg>>2]+ +g[kg>>2];g[Cf>>2]=+g[mg>>2]-+g[lg>>2]*.25;g[Ef>>2]=+g[Nh>>2]-+g[Uh>>2];g[Ff>>2]=+g[eh>>2]-+g[jh>>2];g[Gf>>2]=+g[Ef>>2]*.9510565400123596+ +g[Ff>>2]*.5877852439880371;g[Eg>>2]=+g[Ff>>2]*.9510565400123596-+g[Ef>>2]*.5877852439880371;g[Ua>>2]=+g[Sa>>2]-+g[Ta>>2];g[Xa>>2]=+g[Va>>2]-+g[Wa>>2];g[Ya>>2]=+g[Ua>>2]*.9510565400123596+ +g[Xa>>2]*.5877852439880371;g[be>>2]=+g[Xa>>2]*.9510565400123596-+g[Ua>>2]*.5877852439880371;g[mb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[nb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ob>>2]=+g[Ib>>2]*+g[mb>>2]+ +g[hf>>2]*+g[nb>>2];g[Cd>>2]=+g[Ib>>2]*+g[nb>>2]-+g[hf>>2]*+g[mb>>2];g[pb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[qb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[rb>>2]=+g[ea>>2]*+g[pb>>2]+ +g[fa>>2]*+g[qb>>2];g[Tc>>2]=+g[ea>>2]*+g[qb>>2]-+g[fa>>2]*+g[pb>>2];g[Eb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[Gb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[Hb>>2]=+g[Db>>2]*+g[Eb>>2]+ +g[Fb>>2]*+g[Gb>>2];g[Xc>>2]=+g[Db>>2]*+g[Gb>>2]-+g[Fb>>2]*+g[Eb>>2];g[tb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[vb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[wb>>2]=+g[sb>>2]*+g[tb>>2]+ +g[ub>>2]*+g[vb>>2];g[Uc>>2]=+g[sb>>2]*+g[vb>>2]-+g[ub>>2]*+g[tb>>2];g[zb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Bb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Cb>>2]=+g[yb>>2]*+g[zb>>2]+ +g[Ab>>2]*+g[Bb>>2];g[Wc>>2]=+g[yb>>2]*+g[Bb>>2]-+g[Ab>>2]*+g[zb>>2];g[Vc>>2]=+g[Tc>>2]-+g[Uc>>2];g[Yc>>2]=+g[Wc>>2]-+g[Xc>>2];g[Hd>>2]=+g[Cb>>2]-+g[Hb>>2];g[Gd>>2]=+g[rb>>2]-+g[wb>>2];g[$c>>2]=+g[Tc>>2]+ +g[Uc>>2];g[Ad>>2]=+g[Wc>>2]+ +g[Xc>>2];g[Dd>>2]=+g[$c>>2]+ +g[Ad>>2];g[xb>>2]=+g[rb>>2]+ +g[wb>>2];g[Ka>>2]=+g[Cb>>2]+ +g[Hb>>2];g[La>>2]=+g[xb>>2]+ +g[Ka>>2];g[Ma>>2]=+g[ob>>2]+ +g[La>>2];g[bg>>2]=+g[Cd>>2]+ +g[Dd>>2];g[Zc>>2]=+g[Vc>>2]*.9510565400123596+ +g[Yc>>2]*.5877852439880371;g[_e>>2]=+g[Yc>>2]*.9510565400123596-+g[Vc>>2]*.5877852439880371;g[pc>>2]=(+g[xb>>2]-+g[Ka>>2])*.55901700258255;g[qc>>2]=+g[ob>>2]-+g[La>>2]*.25;g[Sc>>2]=+g[pc>>2]+ +g[qc>>2];g[Ze>>2]=+g[qc>>2]-+g[pc>>2];g[_c>>2]=+g[Sc>>2]+ +g[Zc>>2];g[Ge>>2]=+g[Ze>>2]+ +g[_e>>2];g[ld>>2]=+g[Sc>>2]-+g[Zc>>2];g[$e>>2]=+g[Ze>>2]-+g[_e>>2];g[Id>>2]=+g[Gd>>2]*.9510565400123596+ +g[Hd>>2]*.5877852439880371;g[af>>2]=+g[Hd>>2]*.9510565400123596-+g[Gd>>2]*.5877852439880371;g[Bd>>2]=(+g[$c>>2]-+g[Ad>>2])*.55901700258255;g[Ed>>2]=+g[Cd>>2]-+g[Dd>>2]*.25;g[Fd>>2]=+g[Bd>>2]+ +g[Ed>>2];g[bf>>2]=+g[Ed>>2]-+g[Bd>>2];g[Jd>>2]=+g[Fd>>2]-+g[Id>>2];g[Fe>>2]=+g[bf>>2]-+g[af>>2];g[kd>>2]=+g[Id>>2]+ +g[Fd>>2];g[cf>>2]=+g[af>>2]+ +g[bf>>2];g[nh>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[oh>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[ph>>2]=+g[za>>2]*+g[nh>>2]+ +g[_d>>2]*+g[oh>>2];g[Nb>>2]=+g[za>>2]*+g[oh>>2]-+g[_d>>2]*+g[nh>>2];g[th>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[xh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[yh>>2]=+g[sh>>2]*+g[th>>2]+ +g[wh>>2]*+g[xh>>2];g[bb>>2]=+g[sh>>2]*+g[xh>>2]-+g[wh>>2]*+g[th>>2];g[ha>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[ja>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[ka>>2]=+g[ga>>2]*+g[ha>>2]+ +g[ia>>2]*+g[ja>>2];g[fb>>2]=+g[ga>>2]*+g[ja>>2]-+g[ia>>2]*+g[ha>>2];g[Ah>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[s>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[t>>2]=+g[zh>>2]*+g[Ah>>2]+ +g[r>>2]*+g[s>>2];g[cb>>2]=+g[zh>>2]*+g[s>>2]-+g[r>>2]*+g[Ah>>2];g[A>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[ca>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[da>>2]=+g[z>>2]*+g[A>>2]+ +g[ba>>2]*+g[ca>>2];g[eb>>2]=+g[z>>2]*+g[ca>>2]-+g[ba>>2]*+g[A>>2];g[db>>2]=+g[bb>>2]-+g[cb>>2];g[gb>>2]=+g[eb>>2]-+g[fb>>2];g[Sb>>2]=+g[da>>2]-+g[ka>>2];g[Rb>>2]=+g[yh>>2]-+g[t>>2];g[Kb>>2]=+g[bb>>2]+ +g[cb>>2];g[Lb>>2]=+g[eb>>2]+ +g[fb>>2];g[Ob>>2]=+g[Kb>>2]+ +g[Lb>>2];g[u>>2]=+g[yh>>2]+ +g[t>>2];g[la>>2]=+g[da>>2]+ +g[ka>>2];g[ma>>2]=+g[u>>2]+ +g[la>>2];g[na>>2]=+g[ph>>2]+ +g[ma>>2];g[Zf>>2]=+g[Nb>>2]+ +g[Ob>>2];g[hb>>2]=+g[db>>2]*.9510565400123596+ +g[gb>>2]*.5877852439880371;g[ee>>2]=+g[gb>>2]*.9510565400123596-+g[db>>2]*.5877852439880371;g[_a>>2]=(+g[u>>2]-+g[la>>2])*.55901700258255;g[$a>>2]=+g[ph>>2]-+g[ma>>2]*.25;g[ab>>2]=+g[_a>>2]+ +g[$a>>2];g[de>>2]=+g[$a>>2]-+g[_a>>2];g[Jb>>2]=+g[ab>>2]+ +g[hb>>2];g[ve>>2]=+g[de>>2]+ +g[ee>>2];g[ad>>2]=+g[ab>>2]-+g[hb>>2];g[fe>>2]=+g[de>>2]-+g[ee>>2];g[rc>>2]=+g[Rb>>2]*.9510565400123596+ +g[Sb>>2]*.5877852439880371;g[ge>>2]=+g[Sb>>2]*.9510565400123596-+g[Rb>>2]*.5877852439880371;g[Mb>>2]=(+g[Kb>>2]-+g[Lb>>2])*.55901700258255;g[Pb>>2]=+g[Nb>>2]-+g[Ob>>2]*.25;g[Qb>>2]=+g[Mb>>2]+ +g[Pb>>2];g[he>>2]=+g[Pb>>2]-+g[Mb>>2];g[sc>>2]=+g[Qb>>2]-+g[rc>>2];g[we>>2]=+g[he>>2]-+g[ge>>2];g[bd>>2]=+g[rc>>2]+ +g[Qb>>2];g[ie>>2]=+g[ge>>2]+ +g[he>>2];g[oa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[pa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[qa>>2]=+g[$g>>2]*+g[oa>>2]+ +g[Eh>>2]*+g[pa>>2];g[Ic>>2]=+g[$g>>2]*+g[pa>>2]-+g[Eh>>2]*+g[oa>>2];g[ra>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[sa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[ta>>2]=+g[ah>>2]*+g[ra>>2]+ +g[Fh>>2]*+g[sa>>2];g[xc>>2]=+g[ah>>2]*+g[sa>>2]-+g[Fh>>2]*+g[ra>>2];g[G>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[I>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[J>>2]=+g[F>>2]*+g[G>>2]+ +g[H>>2]*+g[I>>2];g[Bc>>2]=+g[F>>2]*+g[I>>2]-+g[H>>2]*+g[G>>2];g[ua>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[va>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[wa>>2]=+g[Oh>>2]*+g[ua>>2]+ +g[Ph>>2]*+g[va>>2];g[yc>>2]=+g[Oh>>2]*+g[va>>2]-+g[Ph>>2]*+g[ua>>2];g[B>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[D>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[E>>2]=+g[ya>>2]*+g[B>>2]+ +g[C>>2]*+g[D>>2];g[Ac>>2]=+g[ya>>2]*+g[D>>2]-+g[C>>2]*+g[B>>2];g[zc>>2]=+g[xc>>2]-+g[yc>>2];g[Cc>>2]=+g[Ac>>2]-+g[Bc>>2];g[Nc>>2]=+g[E>>2]-+g[J>>2];g[Mc>>2]=+g[ta>>2]-+g[wa>>2];g[Fc>>2]=+g[xc>>2]+ +g[yc>>2];g[Gc>>2]=+g[Ac>>2]+ +g[Bc>>2];g[Jc>>2]=+g[Fc>>2]+ +g[Gc>>2];g[xa>>2]=+g[ta>>2]+ +g[wa>>2];g[K>>2]=+g[E>>2]+ +g[J>>2];g[L>>2]=+g[xa>>2]+ +g[K>>2];g[M>>2]=+g[qa>>2]+ +g[L>>2];g[_f>>2]=+g[Ic>>2]+ +g[Jc>>2];g[Dc>>2]=+g[zc>>2]*.9510565400123596+ +g[Cc>>2]*.5877852439880371;g[Oe>>2]=+g[Cc>>2]*.9510565400123596-+g[zc>>2]*.5877852439880371;g[uc>>2]=(+g[xa>>2]-+g[K>>2])*.55901700258255;g[vc>>2]=+g[qa>>2]-+g[L>>2]*.25;g[wc>>2]=+g[uc>>2]+ +g[vc>>2];g[Ne>>2]=+g[vc>>2]-+g[uc>>2];g[Ec>>2]=+g[wc>>2]+ +g[Dc>>2];g[ze>>2]=+g[Ne>>2]+ +g[Oe>>2];g[dd>>2]=+g[wc>>2]-+g[Dc>>2];g[Pe>>2]=+g[Ne>>2]-+g[Oe>>2];g[Oc>>2]=+g[Mc>>2]*.9510565400123596+ +g[Nc>>2]*.5877852439880371;g[Ke>>2]=+g[Nc>>2]*.9510565400123596-+g[Mc>>2]*.5877852439880371;g[Hc>>2]=(+g[Fc>>2]-+g[Gc>>2])*.55901700258255;g[Kc>>2]=+g[Ic>>2]-+g[Jc>>2]*.25;g[Lc>>2]=+g[Hc>>2]+ +g[Kc>>2];g[Le>>2]=+g[Kc>>2]-+g[Hc>>2];g[Pc>>2]=+g[Lc>>2]-+g[Oc>>2];g[ye>>2]=+g[Le>>2]-+g[Ke>>2];g[ed>>2]=+g[Oc>>2]+ +g[Lc>>2];g[Me>>2]=+g[Ke>>2]+ +g[Le>>2];g[O>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[P>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Q>>2]=+g[v>>2]*+g[O>>2]+ +g[x>>2]*+g[P>>2];g[gc>>2]=+g[v>>2]*+g[P>>2]-+g[x>>2]*+g[O>>2];g[S>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[U>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[V>>2]=+g[R>>2]*+g[S>>2]+ +g[T>>2]*+g[U>>2];g[Xb>>2]=+g[R>>2]*+g[U>>2]-+g[T>>2]*+g[S>>2];g[Ha>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[Ja>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[ib>>2]=+g[Ga>>2]*+g[Ha>>2]+ +g[Ia>>2]*+g[Ja>>2];g[$b>>2]=+g[Ga>>2]*+g[Ja>>2]-+g[Ia>>2]*+g[Ha>>2];g[X>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[Z>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[_>>2]=+g[W>>2]*+g[X>>2]+ +g[Y>>2]*+g[Z>>2];g[Yb>>2]=+g[W>>2]*+g[Z>>2]-+g[Y>>2]*+g[X>>2];g[Ca>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Ea>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Fa>>2]=+g[Ba>>2]*+g[Ca>>2]+ +g[Da>>2]*+g[Ea>>2];g[_b>>2]=+g[Ba>>2]*+g[Ea>>2]-+g[Da>>2]*+g[Ca>>2];g[Zb>>2]=+g[Xb>>2]-+g[Yb>>2];g[ac>>2]=+g[_b>>2]-+g[$b>>2];g[lc>>2]=+g[Fa>>2]-+g[ib>>2];g[kc>>2]=+g[V>>2]-+g[_>>2];g[dc>>2]=+g[Xb>>2]+ +g[Yb>>2];g[ec>>2]=+g[_b>>2]+ +g[$b>>2];g[hc>>2]=+g[dc>>2]+ +g[ec>>2];g[Aa>>2]=+g[V>>2]+ +g[_>>2];g[jb>>2]=+g[Fa>>2]+ +g[ib>>2];g[kb>>2]=+g[Aa>>2]+ +g[jb>>2];g[lb>>2]=+g[Q>>2]+ +g[kb>>2];g[ag>>2]=+g[gc>>2]+ +g[hc>>2];g[bc>>2]=+g[Zb>>2]*.9510565400123596+ +g[ac>>2]*.5877852439880371;g[Te>>2]=+g[ac>>2]*.9510565400123596-+g[Zb>>2]*.5877852439880371;g[Ub>>2]=(+g[Aa>>2]-+g[jb>>2])*.55901700258255;g[Vb>>2]=+g[Q>>2]-+g[kb>>2]*.25;g[Wb>>2]=+g[Ub>>2]+ +g[Vb>>2];g[Se>>2]=+g[Vb>>2]-+g[Ub>>2];g[cc>>2]=+g[Wb>>2]+ +g[bc>>2];g[Ce>>2]=+g[Se>>2]+ +g[Te>>2];g[id>>2]=+g[Wb>>2]-+g[bc>>2];g[Ue>>2]=+g[Se>>2]-+g[Te>>2];g[mc>>2]=+g[kc>>2]*.9510565400123596+ +g[lc>>2]*.5877852439880371;g[Ve>>2]=+g[lc>>2]*.9510565400123596-+g[kc>>2]*.5877852439880371;g[fc>>2]=(+g[dc>>2]-+g[ec>>2])*.55901700258255;g[ic>>2]=+g[gc>>2]-+g[hc>>2]*.25;g[jc>>2]=+g[fc>>2]+ +g[ic>>2];g[We>>2]=+g[ic>>2]-+g[fc>>2];g[nc>>2]=+g[jc>>2]-+g[mc>>2];g[De>>2]=+g[We>>2]-+g[Ve>>2];g[hd>>2]=+g[mc>>2]+ +g[jc>>2];g[Xe>>2]=+g[Ve>>2]+ +g[We>>2];g[$f>>2]=+g[Zf>>2]-+g[_f>>2];g[cg>>2]=+g[ag>>2]-+g[bg>>2];g[dg>>2]=+g[$f>>2]*.9510565400123596+ +g[cg>>2]*.5877852439880371;g[fg>>2]=+g[cg>>2]*.9510565400123596-+g[$f>>2]*.5877852439880371;g[mh>>2]=+g[q>>2]+ +g[lh>>2];g[N>>2]=+g[na>>2]+ +g[M>>2];g[Na>>2]=+g[lb>>2]+ +g[Ma>>2];g[Oa>>2]=+g[N>>2]+ +g[Na>>2];g[Wf>>2]=(+g[N>>2]-+g[Na>>2])*.55901700258255;g[Xf>>2]=+g[mh>>2]-+g[Oa>>2]*.25;g[c[k>>2]>>2]=+g[mh>>2]+ +g[Oa>>2];g[eg>>2]=+g[Xf>>2]-+g[Wf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[eg>>2]-+g[fg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[eg>>2]+ +g[fg>>2];g[Yf>>2]=+g[Wf>>2]+ +g[Xf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[Yf>>2]-+g[dg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Yf>>2]+ +g[dg>>2];g[tf>>2]=+g[na>>2]-+g[M>>2];g[uf>>2]=+g[lb>>2]-+g[Ma>>2];g[vf>>2]=+g[tf>>2]*.9510565400123596+ +g[uf>>2]*.5877852439880371;g[wf>>2]=+g[uf>>2]*.9510565400123596-+g[tf>>2]*.5877852439880371;g[ng>>2]=+g[lg>>2]+ +g[mg>>2];g[gg>>2]=+g[Zf>>2]+ +g[_f>>2];g[hg>>2]=+g[ag>>2]+ +g[bg>>2];g[ig>>2]=+g[gg>>2]+ +g[hg>>2];g[og>>2]=(+g[gg>>2]-+g[hg>>2])*.55901700258255;g[pg>>2]=+g[ng>>2]-+g[ig>>2]*.25;g[c[l>>2]>>2]=+g[ig>>2]+ +g[ng>>2];g[xf>>2]=+g[pg>>2]-+g[og>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[wf>>2]+ +g[xf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[xf>>2]-+g[wf>>2];g[qg>>2]=+g[og>>2]+ +g[pg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[qg>>2]-+g[vf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[vf>>2]+ +g[qg>>2];g[Ra>>2]=+g[Pa>>2]+ +g[Qa>>2];g[Za>>2]=+g[Ra>>2]+ +g[Ya>>2];g[Zd>>2]=+g[Ra>>2]-+g[Ya>>2];g[Df>>2]=+g[Bf>>2]+ +g[Cf>>2];g[Hf>>2]=+g[Df>>2]-+g[Gf>>2];g[sg>>2]=+g[Gf>>2]+ +g[Df>>2];g[tc>>2]=+g[Jb>>2]*.9685831665992737+ +g[sc>>2]*.24868988990783691;g[Qc>>2]=+g[Ec>>2]*.5358268022537231+ +g[Pc>>2]*.8443279266357422;g[Tb>>2]=+g[tc>>2]+ +g[Qc>>2];g[oc>>2]=+g[cc>>2]*.8763066530227661+ +g[nc>>2]*.4817536771297455;g[Kd>>2]=+g[_c>>2]*.728968620300293+ +g[Jd>>2]*.6845471262931824;g[Ld>>2]=+g[oc>>2]+ +g[Kd>>2];g[Md>>2]=+g[Tb>>2]+ +g[Ld>>2];g[Mf>>2]=+g[oc>>2]-+g[Kd>>2];g[Nd>>2]=(+g[Tb>>2]-+g[Ld>>2])*.55901700258255;g[Lf>>2]=+g[tc>>2]-+g[Qc>>2];g[sd>>2]=+g[bd>>2]*.5358268022537231-+g[ad>>2]*.8443279266357422;g[td>>2]=+g[dd>>2]*.7705132365226746-+g[ed>>2]*.6374239921569824;g[Qf>>2]=+g[sd>>2]+ +g[td>>2];g[vd>>2]=+g[ld>>2]*.12533323466777802+ +g[kd>>2]*.9921147227287292;g[wd>>2]=+g[id>>2]*.9048270583152771+ +g[hd>>2]*.4257792830467224;g[Rf>>2]=+g[wd>>2]+ +g[vd>>2];g[ud>>2]=+g[sd>>2]-+g[td>>2];g[tg>>2]=(+g[Qf>>2]+ +g[Rf>>2])*.55901700258255;g[xd>>2]=+g[vd>>2]-+g[wd>>2];g[Sf>>2]=+g[Qf>>2]-+g[Rf>>2];g[cd>>2]=+g[ad>>2]*.5358268022537231+ +g[bd>>2]*.8443279266357422;g[fd>>2]=+g[dd>>2]*.6374239921569824+ +g[ed>>2]*.7705132365226746;g[gd>>2]=+g[cd>>2]-+g[fd>>2];g[jd>>2]=+g[hd>>2]*.9048270583152771-+g[id>>2]*.4257792830467224;g[md>>2]=+g[kd>>2]*.12533323466777802-+g[ld>>2]*.9921147227287292;g[nd>>2]=+g[jd>>2]+ +g[md>>2];g[od>>2]=+g[gd>>2]+ +g[nd>>2];g[xg>>2]=+g[jd>>2]-+g[md>>2];g[pd>>2]=(+g[gd>>2]-+g[nd>>2])*.55901700258255;g[wg>>2]=+g[cd>>2]+ +g[fd>>2];g[Qd>>2]=+g[sc>>2]*.9685831665992737-+g[Jb>>2]*.24868988990783691;g[Rd>>2]=+g[Pc>>2]*.5358268022537231-+g[Ec>>2]*.8443279266357422;g[yf>>2]=+g[Qd>>2]+ +g[Rd>>2];g[Td>>2]=+g[nc>>2]*.8763066530227661-+g[cc>>2]*.4817536771297455;g[Ud>>2]=+g[Jd>>2]*.728968620300293-+g[_c>>2]*.6845471262931824;g[zf>>2]=+g[Td>>2]+ +g[Ud>>2];g[Sd>>2]=+g[Qd>>2]-+g[Rd>>2];g[If>>2]=(+g[yf>>2]-+g[zf>>2])*.55901700258255;g[Vd>>2]=+g[Td>>2]-+g[Ud>>2];g[Af>>2]=+g[yf>>2]+ +g[zf>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[Za>>2]+ +g[Md>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[Af>>2]+ +g[Hf>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Zd>>2]+ +g[od>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Sf>>2]+ +g[sg>>2];g[Wd>>2]=+g[Sd>>2]*.9510565400123596+ +g[Vd>>2]*.5877852439880371;g[Yd>>2]=+g[Vd>>2]*.9510565400123596-+g[Sd>>2]*.5877852439880371;g[Od>>2]=+g[Za>>2]-+g[Md>>2]*.25;g[Pd>>2]=+g[Nd>>2]+ +g[Od>>2];g[Xd>>2]=+g[Od>>2]-+g[Nd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[Pd>>2]-+g[Wd>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Xd>>2]+ +g[Yd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Pd>>2]+ +g[Wd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Xd>>2]-+g[Yd>>2];g[Nf>>2]=+g[Lf>>2]*.9510565400123596+ +g[Mf>>2]*.5877852439880371;g[Of>>2]=+g[Mf>>2]*.9510565400123596-+g[Lf>>2]*.5877852439880371;g[Jf>>2]=+g[Hf>>2]-+g[Af>>2]*.25;g[Kf>>2]=+g[If>>2]+ +g[Jf>>2];g[Pf>>2]=+g[Jf>>2]-+g[If>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Kf>>2]-+g[Nf>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Pf>>2]-+g[Of>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[Nf>>2]+ +g[Kf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Of>>2]+ +g[Pf>>2];g[yd>>2]=+g[ud>>2]*.9510565400123596+ +g[xd>>2]*.5877852439880371;g[$d>>2]=+g[xd>>2]*.9510565400123596-+g[ud>>2]*.5877852439880371;g[qd>>2]=+g[Zd>>2]-+g[od>>2]*.25;g[rd>>2]=+g[pd>>2]+ +g[qd>>2];g[zd>>2]=+g[qd>>2]-+g[pd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[rd>>2]-+g[yd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[zd>>2]+ +g[$d>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[rd>>2]+ +g[yd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[zd>>2]-+g[$d>>2];g[yg>>2]=+g[wg>>2]*.9510565400123596+ +g[xg>>2]*.5877852439880371;g[zg>>2]=+g[xg>>2]*.9510565400123596-+g[wg>>2]*.5877852439880371;g[ug>>2]=+g[sg>>2]-+g[Sf>>2]*.25;g[vg>>2]=+g[tg>>2]+ +g[ug>>2];g[Ag>>2]=+g[ug>>2]-+g[tg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[vg>>2]-+g[yg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Ag>>2]-+g[zg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[yg>>2]+ +g[vg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[zg>>2]+ +g[Ag>>2];g[ae>>2]=+g[Qa>>2]-+g[Pa>>2];g[ce>>2]=+g[ae>>2]-+g[be>>2];g[ue>>2]=+g[ae>>2]+ +g[be>>2];g[Fg>>2]=+g[Cf>>2]-+g[Bf>>2];g[Gg>>2]=+g[Eg>>2]+ +g[Fg>>2];g[Sg>>2]=+g[Fg>>2]-+g[Eg>>2];g[Je>>2]=+g[fe>>2]*.8763066530227661+ +g[ie>>2]*.4817536771297455;g[Qe>>2]=+g[Me>>2]*.9048270583152771-+g[Pe>>2]*.4257792830467224;g[Re>>2]=+g[Je>>2]+ +g[Qe>>2];g[Ye>>2]=+g[Ue>>2]*.5358268022537231+ +g[Xe>>2]*.8443279266357422;g[df>>2]=+g[$e>>2]*.06279052048921585+ +g[cf>>2]*.9980267286300659;g[ef>>2]=+g[Ye>>2]+ +g[df>>2];g[ff>>2]=+g[Re>>2]+ +g[ef>>2];g[Lg>>2]=+g[Ye>>2]-+g[df>>2];g[gf>>2]=(+g[Re>>2]-+g[ef>>2])*.55901700258255;g[Kg>>2]=+g[Je>>2]-+g[Qe>>2];g[nf>>2]=+g[we>>2]*.728968620300293-+g[ve>>2]*.6845471262931824;g[of>>2]=+g[ze>>2]*.12533323466777802+ +g[ye>>2]*.9921147227287292;g[Pg>>2]=+g[nf>>2]-+g[of>>2];g[qf>>2]=+g[De>>2]*.06279052048921585-+g[Ce>>2]*.9980267286300659;g[rf>>2]=+g[Ge>>2]*.7705132365226746+ +g[Fe>>2]*.6374239921569824;g[Qg>>2]=+g[qf>>2]-+g[rf>>2];g[pf>>2]=+g[nf>>2]+ +g[of>>2];g[Tg>>2]=(+g[Pg>>2]-+g[Qg>>2])*.55901700258255;g[sf>>2]=+g[qf>>2]+ +g[rf>>2];g[Rg>>2]=+g[Pg>>2]+ +g[Qg>>2];g[xe>>2]=+g[ve>>2]*.728968620300293+ +g[we>>2]*.6845471262931824;g[Ae>>2]=+g[ye>>2]*.12533323466777802-+g[ze>>2]*.9921147227287292;g[Be>>2]=+g[xe>>2]+ +g[Ae>>2];g[Ee>>2]=+g[Ce>>2]*.06279052048921585+ +g[De>>2]*.9980267286300659;g[He>>2]=+g[Fe>>2]*.7705132365226746-+g[Ge>>2]*.6374239921569824;g[Ie>>2]=+g[Ee>>2]+ +g[He>>2];g[jf>>2]=+g[Be>>2]+ +g[Ie>>2];g[Xg>>2]=+g[Ee>>2]-+g[He>>2];g[kf>>2]=(+g[Be>>2]-+g[Ie>>2])*.55901700258255;g[Wg>>2]=+g[xe>>2]-+g[Ae>>2];g[le>>2]=+g[ie>>2]*.8763066530227661-+g[fe>>2]*.4817536771297455;g[me>>2]=+g[Pe>>2]*.9048270583152771+ +g[Me>>2]*.4257792830467224;g[Bg>>2]=+g[le>>2]-+g[me>>2];g[oe>>2]=+g[Xe>>2]*.5358268022537231-+g[Ue>>2]*.8443279266357422;g[pe>>2]=+g[cf>>2]*.06279052048921585-+g[$e>>2]*.9980267286300659;g[Cg>>2]=+g[oe>>2]+ +g[pe>>2];g[ne>>2]=+g[le>>2]+ +g[me>>2];g[Hg>>2]=(+g[Bg>>2]-+g[Cg>>2])*.55901700258255;g[qe>>2]=+g[oe>>2]-+g[pe>>2];g[Dg>>2]=+g[Bg>>2]+ +g[Cg>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ce>>2]+ +g[ff>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Dg>>2]+ +g[Gg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ue>>2]+ +g[jf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Rg>>2]+ +g[Sg>>2];g[re>>2]=+g[ne>>2]*.9510565400123596+ +g[qe>>2]*.5877852439880371;g[te>>2]=+g[qe>>2]*.9510565400123596-+g[ne>>2]*.5877852439880371;g[je>>2]=+g[ce>>2]-+g[ff>>2]*.25;g[ke>>2]=+g[gf>>2]+ +g[je>>2];g[se>>2]=+g[je>>2]-+g[gf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[ke>>2]-+g[re>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[se>>2]+ +g[te>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[ke>>2]+ +g[re>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[se>>2]-+g[te>>2];g[Mg>>2]=+g[Kg>>2]*.9510565400123596+ +g[Lg>>2]*.5877852439880371;g[Ng>>2]=+g[Lg>>2]*.9510565400123596-+g[Kg>>2]*.5877852439880371;g[Ig>>2]=+g[Gg>>2]-+g[Dg>>2]*.25;g[Jg>>2]=+g[Hg>>2]+ +g[Ig>>2];g[Og>>2]=+g[Ig>>2]-+g[Hg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Jg>>2]-+g[Mg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[Og>>2]-+g[Ng>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[Mg>>2]+ +g[Jg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Ng>>2]+ +g[Og>>2];g[Tf>>2]=+g[pf>>2]*.9510565400123596+ +g[sf>>2]*.5877852439880371;g[Vf>>2]=+g[sf>>2]*.9510565400123596-+g[pf>>2]*.5877852439880371;g[lf>>2]=+g[ue>>2]-+g[jf>>2]*.25;g[mf>>2]=+g[kf>>2]+ +g[lf>>2];g[Uf>>2]=+g[lf>>2]-+g[kf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[mf>>2]-+g[Tf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[Uf>>2]+ +g[Vf>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[mf>>2]+ +g[Tf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Uf>>2]-+g[Vf>>2];g[Yg>>2]=+g[Wg>>2]*.9510565400123596+ +g[Xg>>2]*.5877852439880371;g[Zg>>2]=+g[Xg>>2]*.9510565400123596-+g[Wg>>2]*.5877852439880371;g[Ug>>2]=+g[Sg>>2]-+g[Rg>>2]*.25;g[Vg>>2]=+g[Tg>>2]+ +g[Ug>>2];g[_g>>2]=+g[Ug>>2]-+g[Tg>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Vg>>2]-+g[Yg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[_g>>2]-+g[Zg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[Yg>>2]+ +g[Vg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Zg>>2]+ +g[_g>>2];c[$h>>2]=(c[$h>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+32;c[n>>2]=c[n>>2]^c[2998]}i=ai;return}function Nj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,21,2888);i=b;return}function Oj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0,Ii=0,Ji=0,Ki=0,Li=0,Mi=0,Ni=0,Oi=0,Pi=0,Qi=0,Ri=0,Si=0,Ti=0,Ui=0,Vi=0,Wi=0,Xi=0,Yi=0,Zi=0,_i=0,$i=0,aj=0,bj=0,cj=0,dj=0,ej=0,fj=0,gj=0,hj=0,ij=0,jj=0,kj=0,lj=0;kj=i;i=i+2192|0;k=kj+2184|0;l=kj+2180|0;m=kj+2176|0;n=kj+2172|0;lj=kj+2168|0;o=kj+2164|0;p=kj+2160|0;jj=kj+2128|0;za=kj+2124|0;_d=kj+2120|0;Ib=kj+2116|0;hf=kj+2112|0;Ah=kj+2108|0;xi=kj+2104|0;zi=kj+2100|0;Oi=kj+2096|0;ki=kj+2092|0;Pi=kj+2088|0;Si=kj+2084|0;Wi=kj+2080|0;oi=kj+2076|0;si=kj+2072|0;xa=kj+2068|0;I=kj+2064|0;w=kj+2060|0;B=kj+2056|0;s=kj+2052|0;G=kj+2048|0;ma=kj+2044|0;oa=kj+2040|0;yb=kj+2036|0;Sa=kj+2032|0;Cb=kj+2028|0;Ua=kj+2024|0;db=kj+2020|0;sc=kj+2016|0;hb=kj+2012|0;uc=kj+2008|0;dj=kj+2004|0;hj=kj+2e3|0;Cc=kj+1996|0;Ec=kj+1992|0;sa=kj+1988|0;ua=kj+1984|0;nb=kj+1980|0;pb=kj+1976|0;$=kj+1972|0;da=kj+1968|0;Lb=kj+1964|0;Nb=kj+1960|0;X=kj+1956|0;Z=kj+1952|0;Ma=kj+1948|0;Oa=kj+1944|0;Ri=kj+1940|0;Ci=kj+1936|0;Vi=kj+1932|0;Di=kj+1928|0;Xi=kj+1924|0;Gi=kj+1920|0;Zi=kj+1916|0;Ei=kj+1912|0;N=kj+1908|0;Ba=kj+1904|0;Q=kj+1900|0;Ca=kj+1896|0;R=kj+1892|0;Fa=kj+1888|0;T=kj+1884|0;Da=kj+1880|0;bj=kj+1876|0;ca=kj+1872|0;gj=kj+1868|0;z=kj+1864|0;cj=kj+1860|0;ba=kj+1856|0;fj=kj+1852|0;A=kj+1848|0;mi=kj+1844|0;v=kj+1840|0;ri=kj+1836|0;Ki=kj+1832|0;ni=kj+1828|0;u=kj+1824|0;qi=kj+1820|0;r=kj+1816|0;Rc=kj+1812|0;Ni=kj+1808|0;rg=kj+1804|0;Mi=kj+1800|0;wb=kj+1796|0;xb=kj+1792|0;Ab=kj+1788|0;Bb=kj+1784|0;bb=kj+1780|0;cb=kj+1776|0;fb=kj+1772|0;gb=kj+1768|0;Li=kj+1764|0;Qi=kj+1760|0;Ti=kj+1756|0;Ui=kj+1752|0;L=kj+1748|0;M=kj+1744|0;O=kj+1740|0;P=kj+1736|0;wi=kj+1732|0;Yf=kj+1728|0;Oh=kj+1724|0;ai=kj+1720|0;Yb=kj+1716|0;af=kj+1712|0;Kg=kj+1708|0;Yg=kj+1704|0;Gb=kj+1700|0;uf=kj+1696|0;pg=kj+1692|0;jh=kj+1688|0;Kd=kj+1684|0;se=kj+1680|0;bd=kj+1676|0;pe=kj+1672|0;yc=kj+1668|0;Af=kj+1664|0;Hf=kj+1660|0;oh=kj+1656|0;ld=kj+1652|0;we=kj+1648|0;be=kj+1644|0;ze=kj+1640|0;ha=kj+1636|0;Xg=kj+1632|0;$f=kj+1628|0;Fg=kj+1624|0;bc=kj+1620|0;bf=kj+1616|0;gc=kj+1612|0;cf=kj+1608|0;F=kj+1604|0;eg=kj+1600|0;dg=kj+1596|0;eh=kj+1592|0;nc=kj+1588|0;ff=kj+1584|0;Tc=kj+1580|0;gf=kj+1576|0;Ja=kj+1572|0;gg=kj+1568|0;jg=kj+1564|0;fh=kj+1560|0;Zc=kj+1556|0;ke=kj+1552|0;Cd=kj+1548|0;le=kj+1544|0;$a=kj+1540|0;qg=kj+1536|0;xf=kj+1532|0;kh=kj+1528|0;Vd=kj+1524|0;qe=kj+1520|0;ed=kj+1516|0;te=kj+1512|0;Pc=kj+1508|0;If=kj+1504|0;Df=kj+1500|0;ph=kj+1496|0;wd=kj+1492|0;Ae=kj+1488|0;ee=kj+1484|0;xe=kj+1480|0;q=kj+1476|0;Ig=kj+1472|0;$i=kj+1468|0;Hg=kj+1464|0;li=kj+1460|0;Vb=kj+1456|0;ui=kj+1452|0;Wb=kj+1448|0;Yi=kj+1444|0;_i=kj+1440|0;ej=kj+1436|0;ij=kj+1432|0;pi=kj+1428|0;ti=kj+1424|0;aj=kj+1420|0;vi=kj+1416|0;Mh=kj+1412|0;Nh=kj+1408|0;Ub=kj+1404|0;Xb=kj+1400|0;Gg=kj+1396|0;Jg=kj+1392|0;mb=kj+1388|0;Gd=kj+1384|0;Eb=kj+1380|0;Zd=kj+1376|0;rb=kj+1372|0;Hd=kj+1368|0;vb=kj+1364|0;Yd=kj+1360|0;kb=kj+1356|0;lb=kj+1352|0;zb=kj+1348|0;Db=kj+1344|0;ob=kj+1340|0;qb=kj+1336|0;tb=kj+1332|0;ub=kj+1328|0;sb=kj+1324|0;Fb=kj+1320|0;ng=kj+1316|0;og=kj+1312|0;Id=kj+1308|0;Jd=kj+1304|0;Xd=kj+1300|0;ad=kj+1296|0;Kb=kj+1292|0;yd=kj+1288|0;wc=kj+1284|0;jd=kj+1280|0;Pb=kj+1276|0;zd=kj+1272|0;rc=kj+1268|0;id=kj+1264|0;eb=kj+1260|0;Jb=kj+1256|0;tc=kj+1252|0;vc=kj+1248|0;Mb=kj+1244|0;Ob=kj+1240|0;Rb=kj+1236|0;Sb=kj+1232|0;Qb=kj+1228|0;xc=kj+1224|0;Ff=kj+1220|0;Gf=kj+1216|0;hd=kj+1212|0;kd=kj+1208|0;$d=kj+1204|0;ae=kj+1200|0;Bi=kj+1196|0;Zb=kj+1192|0;fa=kj+1188|0;ec=kj+1184|0;Ii=kj+1180|0;_b=kj+1176|0;y=kj+1172|0;dc=kj+1168|0;yi=kj+1164|0;Ai=kj+1160|0;aa=kj+1156|0;ea=kj+1152|0;Fi=kj+1148|0;Hi=kj+1144|0;t=kj+1140|0;x=kj+1136|0;Ji=kj+1132|0;ga=kj+1128|0;Zf=kj+1124|0;_f=kj+1120|0;$b=kj+1116|0;ac=kj+1112|0;cc=kj+1108|0;fc=kj+1104|0;la=kj+1100|0;jc=kj+1096|0;D=kj+1092|0;qc=kj+1088|0;qa=kj+1084|0;kc=kj+1080|0;wa=kj+1076|0;pc=kj+1072|0;ja=kj+1068|0;ka=kj+1064|0;ya=kj+1060|0;C=kj+1056|0;na=kj+1052|0;pa=kj+1048|0;ta=kj+1044|0;va=kj+1040|0;ra=kj+1036|0;E=kj+1032|0;bg=kj+1028|0;cg=kj+1024|0;lc=kj+1020|0;mc=kj+1016|0;oc=kj+1012|0;Sc=kj+1008|0;K=kj+1004|0;Vc=kj+1e3|0;Ha=kj+996|0;Ad=kj+992|0;V=kj+988|0;Wc=kj+984|0;Aa=kj+980|0;$c=kj+976|0;H=kj+972|0;J=kj+968|0;Ea=kj+964|0;Ga=kj+960|0;S=kj+956|0;U=kj+952|0;Y=kj+948|0;_=kj+944|0;W=kj+940|0;Ia=kj+936|0;hg=kj+932|0;ig=kj+928|0;Xc=kj+924|0;Yc=kj+920|0;_c=kj+916|0;Bd=kj+912|0;La=kj+908|0;Rd=kj+904|0;Qa=kj+900|0;Sd=kj+896|0;Qd=kj+892|0;Td=kj+888|0;Wa=kj+884|0;Md=kj+880|0;Za=kj+876|0;Nd=kj+872|0;Ld=kj+868|0;Od=kj+864|0;Hb=kj+860|0;Ka=kj+856|0;Na=kj+852|0;Pa=kj+848|0;Ta=kj+844|0;Va=kj+840|0;Xa=kj+836|0;Ya=kj+832|0;Ra=kj+828|0;_a=kj+824|0;vf=kj+820|0;wf=kj+816|0;Pd=kj+812|0;Ud=kj+808|0;cd=kj+804|0;dd=kj+800|0;Bc=kj+796|0;md=kj+792|0;Gc=kj+788|0;nd=kj+784|0;od=kj+780|0;pd=kj+776|0;Kc=kj+772|0;sd=kj+768|0;Nc=kj+764|0;td=kj+760|0;rd=kj+756|0;ud=kj+752|0;zc=kj+748|0;Ac=kj+744|0;Dc=kj+740|0;Fc=kj+736|0;Ic=kj+732|0;Jc=kj+728|0;Lc=kj+724|0;Mc=kj+720|0;Hc=kj+716|0;Oc=kj+712|0;Bf=kj+708|0;Cf=kj+704|0;qd=kj+700|0;vd=kj+696|0;ce=kj+692|0;de=kj+688|0;jb=kj+684|0;xh=kj+680|0;Mg=kj+676|0;Og=kj+672|0;Tb=kj+668|0;Ng=kj+664|0;Cg=kj+660|0;Dg=kj+656|0;ia=kj+652|0;ib=kj+648|0;Eg=kj+644|0;Lg=kj+640|0;ab=kj+636|0;Qc=kj+632|0;yh=kj+628|0;zh=kj+624|0;hh=kj+620|0;th=kj+616|0;Sg=kj+612|0;Ug=kj+608|0;mh=kj+604|0;uh=kj+600|0;rh=kj+596|0;vh=kj+592|0;dh=kj+588|0;gh=kj+584|0;Qg=kj+580|0;Rg=kj+576|0;ih=kj+572|0;lh=kj+568|0;nh=kj+564|0;qh=kj+560|0;sh=kj+556|0;Pg=kj+552|0;wh=kj+548|0;Tg=kj+544|0;ag=kj+540|0;Zg=kj+536|0;Eh=kj+532|0;Qf=kj+528|0;lg=kj+524|0;Wg=kj+520|0;zg=kj+516|0;bh=kj+512|0;sg=kj+508|0;Dh=kj+504|0;zf=kj+500|0;Nf=kj+496|0;wg=kj+492|0;ah=kj+488|0;Kf=kj+484|0;Of=kj+480|0;fg=kj+476|0;kg=kj+472|0;tf=kj+468|0;yf=kj+464|0;xg=kj+460|0;yg=kj+456|0;Rf=kj+452|0;Sf=kj+448|0;ug=kj+444|0;vg=kj+440|0;Ef=kj+436|0;Jf=kj+432|0;mg=kj+428|0;Lf=kj+424|0;Ch=kj+420|0;Fh=kj+416|0;Mf=kj+412|0;Pf=kj+408|0;Gh=kj+404|0;Hh=kj+400|0;tg=kj+396|0;Ag=kj+392|0;Vg=kj+388|0;_g=kj+384|0;Bg=kj+380|0;ch=kj+376|0;$g=kj+372|0;Bh=kj+368|0;ic=kj+364|0;Me=kj+360|0;bi=kj+356|0;hi=kj+352|0;Ed=kj+348|0;_h=kj+344|0;We=kj+340|0;_e=kj+336|0;gd=kj+332|0;Je=kj+328|0;Pe=kj+324|0;gi=kj+320|0;Te=kj+316|0;Ze=kj+312|0;ge=kj+308|0;Ke=kj+304|0;hc=kj+300|0;$h=kj+296|0;Uc=kj+292|0;Dd=kj+288|0;Ue=kj+284|0;Ve=kj+280|0;Wd=kj+276|0;fd=kj+272|0;Ne=kj+268|0;Oe=kj+264|0;Re=kj+260|0;Se=kj+256|0;xd=kj+252|0;fe=kj+248|0;Fd=kj+244|0;he=kj+240|0;fi=kj+236|0;ii=kj+232|0;ie=kj+228|0;Le=kj+224|0;ji=kj+220|0;Lh=kj+216|0;Qe=kj+212|0;Xe=kj+208|0;Zh=kj+204|0;ci=kj+200|0;Ye=kj+196|0;$e=kj+192|0;di=kj+188|0;ei=kj+184|0;ef=kj+180|0;Ie=kj+176|0;Ph=kj+172|0;Vh=kj+168|0;ne=kj+164|0;Jh=kj+160|0;sf=kj+156|0;Wf=kj+152|0;ve=kj+148|0;Fe=kj+144|0;lf=kj+140|0;Uh=kj+136|0;pf=kj+132|0;Vf=kj+128|0;Ce=kj+124|0;Ge=kj+120|0;df=kj+116|0;Kh=kj+112|0;je=kj+108|0;me=kj+104|0;qf=kj+100|0;rf=kj+96|0;re=kj+92|0;ue=kj+88|0;jf=kj+84|0;kf=kj+80|0;nf=kj+76|0;of=kj+72|0;ye=kj+68|0;Be=kj+64|0;oe=kj+60|0;De=kj+56|0;Th=kj+52|0;Wh=kj+48|0;Ee=kj+44|0;He=kj+40|0;Xh=kj+36|0;Yh=kj+32|0;mf=kj+28|0;Tf=kj+24|0;Ih=kj+20|0;Qh=kj+16|0;Uf=kj+12|0;Xf=kj+8|0;Rh=kj+4|0;Sh=kj;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[lj>>2]=f;c[o>>2]=h;c[p>>2]=j;g[kj+2156>>2]=.19509032368659973;g[kj+2152>>2]=.9807852506637573;g[kj+2148>>2]=.5555702447891235;g[kj+2144>>2]=.8314695954322815;g[kj+2140>>2]=.3826834261417389;g[kj+2136>>2]=.9238795042037964;g[kj+2132>>2]=.7071067690849304;c[jj>>2]=c[lj>>2];c[m>>2]=(c[m>>2]|0)+(c[lj>>2]<<3<<2);while(1){if((c[jj>>2]|0)>=(c[o>>2]|0))break;g[za>>2]=+g[c[m>>2]>>2];g[_d>>2]=+g[(c[m>>2]|0)+4>>2];g[Ib>>2]=+g[(c[m>>2]|0)+8>>2];g[hf>>2]=+g[(c[m>>2]|0)+12>>2];g[Rc>>2]=+g[za>>2]*+g[Ib>>2];g[Ni>>2]=+g[_d>>2]*+g[Ib>>2];g[rg>>2]=+g[_d>>2]*+g[hf>>2];g[Mi>>2]=+g[za>>2]*+g[hf>>2];g[Ah>>2]=+g[Rc>>2]+ +g[rg>>2];g[xi>>2]=+g[Rc>>2]-+g[rg>>2];g[zi>>2]=+g[Mi>>2]+ +g[Ni>>2];g[Oi>>2]=+g[Mi>>2]-+g[Ni>>2];g[ki>>2]=+g[(c[m>>2]|0)+16>>2];g[bj>>2]=+g[za>>2]*+g[ki>>2];g[ca>>2]=+g[hf>>2]*+g[ki>>2];g[gj>>2]=+g[_d>>2]*+g[ki>>2];g[z>>2]=+g[Ib>>2]*+g[ki>>2];g[Pi>>2]=+g[(c[m>>2]|0)+20>>2];g[cj>>2]=+g[_d>>2]*+g[Pi>>2];g[ba>>2]=+g[Ib>>2]*+g[Pi>>2];g[fj>>2]=+g[za>>2]*+g[Pi>>2];g[A>>2]=+g[hf>>2]*+g[Pi>>2];g[Si>>2]=+g[(c[m>>2]|0)+24>>2];g[mi>>2]=+g[Ib>>2]*+g[Si>>2];g[v>>2]=+g[_d>>2]*+g[Si>>2];g[ri>>2]=+g[hf>>2]*+g[Si>>2];g[Ki>>2]=+g[za>>2]*+g[Si>>2];g[Wi>>2]=+g[(c[m>>2]|0)+28>>2];g[ni>>2]=+g[hf>>2]*+g[Wi>>2];g[u>>2]=+g[za>>2]*+g[Wi>>2];g[qi>>2]=+g[Ib>>2]*+g[Wi>>2];g[r>>2]=+g[_d>>2]*+g[Wi>>2];g[oi>>2]=+g[mi>>2]+ +g[ni>>2];g[si>>2]=+g[qi>>2]-+g[ri>>2];g[xa>>2]=+g[Ki>>2]+ +g[r>>2];g[I>>2]=+g[qi>>2]+ +g[ri>>2];g[w>>2]=+g[u>>2]+ +g[v>>2];g[B>>2]=+g[u>>2]-+g[v>>2];g[s>>2]=+g[Ki>>2]-+g[r>>2];g[G>>2]=+g[mi>>2]-+g[ni>>2];g[ma>>2]=+g[ki>>2]*+g[Si>>2]+ +g[Pi>>2]*+g[Wi>>2];g[oa>>2]=+g[ki>>2]*+g[Wi>>2]-+g[Pi>>2]*+g[Si>>2];g[wb>>2]=+g[Ah>>2]*+g[Si>>2];g[xb>>2]=+g[Oi>>2]*+g[Wi>>2];g[yb>>2]=+g[wb>>2]+ +g[xb>>2];g[Sa>>2]=+g[wb>>2]-+g[xb>>2];g[Ab>>2]=+g[Ah>>2]*+g[Wi>>2];g[Bb>>2]=+g[Oi>>2]*+g[Si>>2];g[Cb>>2]=+g[Ab>>2]-+g[Bb>>2];g[Ua>>2]=+g[Ab>>2]+ +g[Bb>>2];g[bb>>2]=+g[xi>>2]*+g[Si>>2];g[cb>>2]=+g[zi>>2]*+g[Wi>>2];g[db>>2]=+g[bb>>2]-+g[cb>>2];g[sc>>2]=+g[bb>>2]+ +g[cb>>2];g[fb>>2]=+g[xi>>2]*+g[Wi>>2];g[gb>>2]=+g[zi>>2]*+g[Si>>2];g[hb>>2]=+g[fb>>2]+ +g[gb>>2];g[uc>>2]=+g[fb>>2]-+g[gb>>2];g[dj>>2]=+g[bj>>2]+ +g[cj>>2];g[hj>>2]=+g[fj>>2]-+g[gj>>2];g[Cc>>2]=+g[dj>>2]*+g[Si>>2]+ +g[hj>>2]*+g[Wi>>2];g[Ec>>2]=+g[dj>>2]*+g[Wi>>2]-+g[hj>>2]*+g[Si>>2];g[sa>>2]=+g[bj>>2]-+g[cj>>2];g[ua>>2]=+g[fj>>2]+ +g[gj>>2];g[nb>>2]=+g[sa>>2]*+g[Si>>2]+ +g[ua>>2]*+g[Wi>>2];g[pb>>2]=+g[sa>>2]*+g[Wi>>2]-+g[ua>>2]*+g[Si>>2];g[$>>2]=+g[z>>2]-+g[A>>2];g[da>>2]=+g[ba>>2]+ +g[ca>>2];g[Lb>>2]=+g[$>>2]*+g[Si>>2]+ +g[da>>2]*+g[Wi>>2];g[Nb>>2]=+g[$>>2]*+g[Wi>>2]-+g[da>>2]*+g[Si>>2];g[X>>2]=+g[z>>2]+ +g[A>>2];g[Z>>2]=+g[ba>>2]-+g[ca>>2];g[Ma>>2]=+g[X>>2]*+g[Si>>2]+ +g[Z>>2]*+g[Wi>>2];g[Oa>>2]=+g[X>>2]*+g[Wi>>2]-+g[Z>>2]*+g[Si>>2];g[Li>>2]=+g[Ah>>2]*+g[ki>>2];g[Qi>>2]=+g[Oi>>2]*+g[Pi>>2];g[Ri>>2]=+g[Li>>2]-+g[Qi>>2];g[Ci>>2]=+g[Li>>2]+ +g[Qi>>2];g[Ti>>2]=+g[Ah>>2]*+g[Pi>>2];g[Ui>>2]=+g[Oi>>2]*+g[ki>>2];g[Vi>>2]=+g[Ti>>2]+ +g[Ui>>2];g[Di>>2]=+g[Ti>>2]-+g[Ui>>2];g[Xi>>2]=+g[Ri>>2]*+g[Si>>2]+ +g[Vi>>2]*+g[Wi>>2];g[Gi>>2]=+g[Ci>>2]*+g[Wi>>2]-+g[Di>>2]*+g[Si>>2];g[Zi>>2]=+g[Ri>>2]*+g[Wi>>2]-+g[Vi>>2]*+g[Si>>2];g[Ei>>2]=+g[Ci>>2]*+g[Si>>2]+ +g[Di>>2]*+g[Wi>>2];g[L>>2]=+g[xi>>2]*+g[ki>>2];g[M>>2]=+g[zi>>2]*+g[Pi>>2];g[N>>2]=+g[L>>2]-+g[M>>2];g[Ba>>2]=+g[L>>2]+ +g[M>>2];g[O>>2]=+g[xi>>2]*+g[Pi>>2];g[P>>2]=+g[zi>>2]*+g[ki>>2];g[Q>>2]=+g[O>>2]+ +g[P>>2];g[Ca>>2]=+g[O>>2]-+g[P>>2];g[R>>2]=+g[N>>2]*+g[Si>>2]+ +g[Q>>2]*+g[Wi>>2];g[Fa>>2]=+g[Ba>>2]*+g[Wi>>2]-+g[Ca>>2]*+g[Si>>2];g[T>>2]=+g[N>>2]*+g[Wi>>2]-+g[Q>>2]*+g[Si>>2];g[Da>>2]=+g[Ba>>2]*+g[Si>>2]+ +g[Ca>>2]*+g[Wi>>2];g[q>>2]=+g[c[k>>2]>>2];g[Ig>>2]=+g[c[l>>2]>>2];g[Yi>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[_i>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[$i>>2]=+g[Xi>>2]*+g[Yi>>2]+ +g[Zi>>2]*+g[_i>>2];g[Hg>>2]=+g[Xi>>2]*+g[_i>>2]-+g[Zi>>2]*+g[Yi>>2];g[ej>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[ij>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[li>>2]=+g[dj>>2]*+g[ej>>2]+ +g[hj>>2]*+g[ij>>2];g[Vb>>2]=+g[dj>>2]*+g[ij>>2]-+g[hj>>2]*+g[ej>>2];g[pi>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[ti>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[ui>>2]=+g[oi>>2]*+g[pi>>2]+ +g[si>>2]*+g[ti>>2];g[Wb>>2]=+g[oi>>2]*+g[ti>>2]-+g[si>>2]*+g[pi>>2];g[aj>>2]=+g[q>>2]+ +g[$i>>2];g[vi>>2]=+g[li>>2]+ +g[ui>>2];g[wi>>2]=+g[aj>>2]+ +g[vi>>2];g[Yf>>2]=+g[aj>>2]-+g[vi>>2];g[Mh>>2]=+g[Ig>>2]-+g[Hg>>2];g[Nh>>2]=+g[li>>2]-+g[ui>>2];g[Oh>>2]=+g[Mh>>2]-+g[Nh>>2];g[ai>>2]=+g[Nh>>2]+ +g[Mh>>2];g[Ub>>2]=+g[q>>2]-+g[$i>>2];g[Xb>>2]=+g[Vb>>2]-+g[Wb>>2];g[Yb>>2]=+g[Ub>>2]-+g[Xb>>2];g[af>>2]=+g[Ub>>2]+ +g[Xb>>2];g[Gg>>2]=+g[Vb>>2]+ +g[Wb>>2];g[Jg>>2]=+g[Hg>>2]+ +g[Ig>>2];g[Kg>>2]=+g[Gg>>2]+ +g[Jg>>2];g[Yg>>2]=+g[Jg>>2]-+g[Gg>>2];g[kb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[lb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[mb>>2]=+g[za>>2]*+g[kb>>2]+ +g[_d>>2]*+g[lb>>2];g[Gd>>2]=+g[za>>2]*+g[lb>>2]-+g[_d>>2]*+g[kb>>2];g[zb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[Db>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[Eb>>2]=+g[yb>>2]*+g[zb>>2]+ +g[Cb>>2]*+g[Db>>2];g[Zd>>2]=+g[yb>>2]*+g[Db>>2]-+g[Cb>>2]*+g[zb>>2];g[ob>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[qb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[rb>>2]=+g[nb>>2]*+g[ob>>2]+ +g[pb>>2]*+g[qb>>2];g[Hd>>2]=+g[nb>>2]*+g[qb>>2]-+g[pb>>2]*+g[ob>>2];g[tb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[ub>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[vb>>2]=+g[ki>>2]*+g[tb>>2]+ +g[Pi>>2]*+g[ub>>2];g[Yd>>2]=+g[ki>>2]*+g[ub>>2]-+g[Pi>>2]*+g[tb>>2];g[sb>>2]=+g[mb>>2]+ +g[rb>>2];g[Fb>>2]=+g[vb>>2]+ +g[Eb>>2];g[Gb>>2]=+g[sb>>2]+ +g[Fb>>2];g[uf>>2]=+g[sb>>2]-+g[Fb>>2];g[ng>>2]=+g[Gd>>2]+ +g[Hd>>2];g[og>>2]=+g[Yd>>2]+ +g[Zd>>2];g[pg>>2]=+g[ng>>2]-+g[og>>2];g[jh>>2]=+g[ng>>2]+ +g[og>>2];g[Id>>2]=+g[Gd>>2]-+g[Hd>>2];g[Jd>>2]=+g[vb>>2]-+g[Eb>>2];g[Kd>>2]=+g[Id>>2]+ +g[Jd>>2];g[se>>2]=+g[Id>>2]-+g[Jd>>2];g[Xd>>2]=+g[mb>>2]-+g[rb>>2];g[ad>>2]=+g[Yd>>2]-+g[Zd>>2];g[bd>>2]=+g[Xd>>2]-+g[ad>>2];g[pe>>2]=+g[Xd>>2]+ +g[ad>>2];g[eb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[Jb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[Kb>>2]=+g[db>>2]*+g[eb>>2]+ +g[hb>>2]*+g[Jb>>2];g[yd>>2]=+g[db>>2]*+g[Jb>>2]-+g[hb>>2]*+g[eb>>2];g[tc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[vc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[wc>>2]=+g[sc>>2]*+g[tc>>2]+ +g[uc>>2]*+g[vc>>2];g[jd>>2]=+g[sc>>2]*+g[vc>>2]-+g[uc>>2]*+g[tc>>2];g[Mb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[Ob>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[Pb>>2]=+g[Lb>>2]*+g[Mb>>2]+ +g[Nb>>2]*+g[Ob>>2];g[zd>>2]=+g[Lb>>2]*+g[Ob>>2]-+g[Nb>>2]*+g[Mb>>2];g[Rb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Sb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[rc>>2]=+g[Ci>>2]*+g[Rb>>2]+ +g[Di>>2]*+g[Sb>>2];g[id>>2]=+g[Ci>>2]*+g[Sb>>2]-+g[Di>>2]*+g[Rb>>2];g[Qb>>2]=+g[Kb>>2]+ +g[Pb>>2];g[xc>>2]=+g[rc>>2]+ +g[wc>>2];g[yc>>2]=+g[Qb>>2]+ +g[xc>>2];g[Af>>2]=+g[Qb>>2]-+g[xc>>2];g[Ff>>2]=+g[yd>>2]+ +g[zd>>2];g[Gf>>2]=+g[id>>2]+ +g[jd>>2];g[Hf>>2]=+g[Ff>>2]-+g[Gf>>2];g[oh>>2]=+g[Ff>>2]+ +g[Gf>>2];g[hd>>2]=+g[Kb>>2]-+g[Pb>>2];g[kd>>2]=+g[id>>2]-+g[jd>>2];g[ld>>2]=+g[hd>>2]-+g[kd>>2];g[we>>2]=+g[hd>>2]+ +g[kd>>2];g[$d>>2]=+g[yd>>2]-+g[zd>>2];g[ae>>2]=+g[rc>>2]-+g[wc>>2];g[be>>2]=+g[$d>>2]+ +g[ae>>2];g[ze>>2]=+g[$d>>2]-+g[ae>>2];g[yi>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Ai>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Bi>>2]=+g[xi>>2]*+g[yi>>2]+ +g[zi>>2]*+g[Ai>>2];g[Zb>>2]=+g[xi>>2]*+g[Ai>>2]-+g[zi>>2]*+g[yi>>2];g[aa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[ea>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[fa>>2]=+g[$>>2]*+g[aa>>2]+ +g[da>>2]*+g[ea>>2];g[ec>>2]=+g[$>>2]*+g[ea>>2]-+g[da>>2]*+g[aa>>2];g[Fi>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[Hi>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[Ii>>2]=+g[Ei>>2]*+g[Fi>>2]+ +g[Gi>>2]*+g[Hi>>2];g[_b>>2]=+g[Ei>>2]*+g[Hi>>2]-+g[Gi>>2]*+g[Fi>>2];g[t>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[x>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[y>>2]=+g[s>>2]*+g[t>>2]+ +g[w>>2]*+g[x>>2];g[dc>>2]=+g[s>>2]*+g[x>>2]-+g[w>>2]*+g[t>>2];g[Ji>>2]=+g[Bi>>2]+ +g[Ii>>2];g[ga>>2]=+g[y>>2]+ +g[fa>>2];g[ha>>2]=+g[Ji>>2]+ +g[ga>>2];g[Xg>>2]=+g[ga>>2]-+g[Ji>>2];g[Zf>>2]=+g[Zb>>2]+ +g[_b>>2];g[_f>>2]=+g[dc>>2]+ +g[ec>>2];g[$f>>2]=+g[Zf>>2]-+g[_f>>2];g[Fg>>2]=+g[Zf>>2]+ +g[_f>>2];g[$b>>2]=+g[Zb>>2]-+g[_b>>2];g[ac>>2]=+g[Bi>>2]-+g[Ii>>2];g[bc>>2]=+g[$b>>2]-+g[ac>>2];g[bf>>2]=+g[ac>>2]+ +g[$b>>2];g[cc>>2]=+g[y>>2]-+g[fa>>2];g[fc>>2]=+g[dc>>2]-+g[ec>>2];g[gc>>2]=+g[cc>>2]+ +g[fc>>2];g[cf>>2]=+g[cc>>2]-+g[fc>>2];g[ja>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[ka>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[la>>2]=+g[Ah>>2]*+g[ja>>2]+ +g[Oi>>2]*+g[ka>>2];g[jc>>2]=+g[Ah>>2]*+g[ka>>2]-+g[Oi>>2]*+g[ja>>2];g[ya>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[C>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[D>>2]=+g[xa>>2]*+g[ya>>2]+ +g[B>>2]*+g[C>>2];g[qc>>2]=+g[xa>>2]*+g[C>>2]-+g[B>>2]*+g[ya>>2];g[na>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[pa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[qa>>2]=+g[ma>>2]*+g[na>>2]+ +g[oa>>2]*+g[pa>>2];g[kc>>2]=+g[ma>>2]*+g[pa>>2]-+g[oa>>2]*+g[na>>2];g[ta>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[va>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[wa>>2]=+g[sa>>2]*+g[ta>>2]+ +g[ua>>2]*+g[va>>2];g[pc>>2]=+g[sa>>2]*+g[va>>2]-+g[ua>>2]*+g[ta>>2];g[ra>>2]=+g[la>>2]+ +g[qa>>2];g[E>>2]=+g[wa>>2]+ +g[D>>2];g[F>>2]=+g[ra>>2]+ +g[E>>2];g[eg>>2]=+g[ra>>2]-+g[E>>2];g[bg>>2]=+g[jc>>2]+ +g[kc>>2];g[cg>>2]=+g[pc>>2]+ +g[qc>>2];g[dg>>2]=+g[bg>>2]-+g[cg>>2];g[eh>>2]=+g[bg>>2]+ +g[cg>>2];g[lc>>2]=+g[jc>>2]-+g[kc>>2];g[mc>>2]=+g[wa>>2]-+g[D>>2];g[nc>>2]=+g[lc>>2]+ +g[mc>>2];g[ff>>2]=+g[lc>>2]-+g[mc>>2];g[oc>>2]=+g[la>>2]-+g[qa>>2];g[Sc>>2]=+g[pc>>2]-+g[qc>>2];g[Tc>>2]=+g[oc>>2]-+g[Sc>>2];g[gf>>2]=+g[oc>>2]+ +g[Sc>>2];g[H>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[J>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[K>>2]=+g[G>>2]*+g[H>>2]+ +g[I>>2]*+g[J>>2];g[Vc>>2]=+g[G>>2]*+g[J>>2]-+g[I>>2]*+g[H>>2];g[Ea>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[Ga>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[Ha>>2]=+g[Da>>2]*+g[Ea>>2]+ +g[Fa>>2]*+g[Ga>>2];g[Ad>>2]=+g[Da>>2]*+g[Ga>>2]-+g[Fa>>2]*+g[Ea>>2];g[S>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[U>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[V>>2]=+g[R>>2]*+g[S>>2]+ +g[T>>2]*+g[U>>2];g[Wc>>2]=+g[R>>2]*+g[U>>2]-+g[T>>2]*+g[S>>2];g[Y>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[_>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Aa>>2]=+g[X>>2]*+g[Y>>2]+ +g[Z>>2]*+g[_>>2];g[$c>>2]=+g[X>>2]*+g[_>>2]-+g[Z>>2]*+g[Y>>2];g[W>>2]=+g[K>>2]+ +g[V>>2];g[Ia>>2]=+g[Aa>>2]+ +g[Ha>>2];g[Ja>>2]=+g[W>>2]+ +g[Ia>>2];g[gg>>2]=+g[W>>2]-+g[Ia>>2];g[hg>>2]=+g[Vc>>2]+ +g[Wc>>2];g[ig>>2]=+g[$c>>2]+ +g[Ad>>2];g[jg>>2]=+g[hg>>2]-+g[ig>>2];g[fh>>2]=+g[hg>>2]+ +g[ig>>2];g[Xc>>2]=+g[Vc>>2]-+g[Wc>>2];g[Yc>>2]=+g[Aa>>2]-+g[Ha>>2];g[Zc>>2]=+g[Xc>>2]+ +g[Yc>>2];g[ke>>2]=+g[Xc>>2]-+g[Yc>>2];g[_c>>2]=+g[K>>2]-+g[V>>2];g[Bd>>2]=+g[$c>>2]-+g[Ad>>2];g[Cd>>2]=+g[_c>>2]-+g[Bd>>2];g[le>>2]=+g[_c>>2]+ +g[Bd>>2];g[Hb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Ka>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[La>>2]=+g[Ba>>2]*+g[Hb>>2]+ +g[Ca>>2]*+g[Ka>>2];g[Rd>>2]=+g[Ba>>2]*+g[Ka>>2]-+g[Ca>>2]*+g[Hb>>2];g[Na>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[Pa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[Qa>>2]=+g[Ma>>2]*+g[Na>>2]+ +g[Oa>>2]*+g[Pa>>2];g[Sd>>2]=+g[Ma>>2]*+g[Pa>>2]-+g[Oa>>2]*+g[Na>>2];g[Qd>>2]=+g[La>>2]-+g[Qa>>2];g[Td>>2]=+g[Rd>>2]-+g[Sd>>2];g[Ta>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[Va>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[Wa>>2]=+g[Sa>>2]*+g[Ta>>2]+ +g[Ua>>2]*+g[Va>>2];g[Md>>2]=+g[Sa>>2]*+g[Va>>2]-+g[Ua>>2]*+g[Ta>>2];g[Xa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Ya>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Za>>2]=+g[N>>2]*+g[Xa>>2]+ +g[Q>>2]*+g[Ya>>2];g[Nd>>2]=+g[N>>2]*+g[Ya>>2]-+g[Q>>2]*+g[Xa>>2];g[Ld>>2]=+g[Wa>>2]-+g[Za>>2];g[Od>>2]=+g[Md>>2]-+g[Nd>>2];g[Ra>>2]=+g[La>>2]+ +g[Qa>>2];g[_a>>2]=+g[Wa>>2]+ +g[Za>>2];g[$a>>2]=+g[Ra>>2]+ +g[_a>>2];g[qg>>2]=+g[_a>>2]-+g[Ra>>2];g[vf>>2]=+g[Rd>>2]+ +g[Sd>>2];g[wf>>2]=+g[Md>>2]+ +g[Nd>>2];g[xf>>2]=+g[vf>>2]-+g[wf>>2];g[kh>>2]=+g[vf>>2]+ +g[wf>>2];g[Pd>>2]=+g[Ld>>2]-+g[Od>>2];g[Ud>>2]=+g[Qd>>2]+ +g[Td>>2];g[Vd>>2]=(+g[Pd>>2]-+g[Ud>>2])*.7071067690849304;g[qe>>2]=(+g[Ud>>2]+ +g[Pd>>2])*.7071067690849304;g[cd>>2]=+g[Td>>2]-+g[Qd>>2];g[dd>>2]=+g[Ld>>2]+ +g[Od>>2];g[ed>>2]=(+g[cd>>2]-+g[dd>>2])*.7071067690849304;g[te>>2]=(+g[cd>>2]+ +g[dd>>2])*.7071067690849304;g[zc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Ac>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Bc>>2]=+g[Ib>>2]*+g[zc>>2]+ +g[hf>>2]*+g[Ac>>2];g[md>>2]=+g[Ib>>2]*+g[Ac>>2]-+g[hf>>2]*+g[zc>>2];g[Dc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Fc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Gc>>2]=+g[Cc>>2]*+g[Dc>>2]+ +g[Ec>>2]*+g[Fc>>2];g[nd>>2]=+g[Cc>>2]*+g[Fc>>2]-+g[Ec>>2]*+g[Dc>>2];g[od>>2]=+g[md>>2]-+g[nd>>2];g[pd>>2]=+g[Bc>>2]-+g[Gc>>2];g[Ic>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[Jc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[Kc>>2]=+g[Si>>2]*+g[Ic>>2]+ +g[Wi>>2]*+g[Jc>>2];g[sd>>2]=+g[Si>>2]*+g[Jc>>2]-+g[Wi>>2]*+g[Ic>>2];g[Lc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Mc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Nc>>2]=+g[Ri>>2]*+g[Lc>>2]+ +g[Vi>>2]*+g[Mc>>2];g[td>>2]=+g[Ri>>2]*+g[Mc>>2]-+g[Vi>>2]*+g[Lc>>2];g[rd>>2]=+g[Kc>>2]-+g[Nc>>2];g[ud>>2]=+g[sd>>2]-+g[td>>2];g[Hc>>2]=+g[Bc>>2]+ +g[Gc>>2];g[Oc>>2]=+g[Kc>>2]+ +g[Nc>>2];g[Pc>>2]=+g[Hc>>2]+ +g[Oc>>2];g[If>>2]=+g[Oc>>2]-+g[Hc>>2];g[Bf>>2]=+g[md>>2]+ +g[nd>>2];g[Cf>>2]=+g[sd>>2]+ +g[td>>2];g[Df>>2]=+g[Bf>>2]-+g[Cf>>2];g[ph>>2]=+g[Bf>>2]+ +g[Cf>>2];g[qd>>2]=+g[od>>2]-+g[pd>>2];g[vd>>2]=+g[rd>>2]+ +g[ud>>2];g[wd>>2]=(+g[qd>>2]-+g[vd>>2])*.7071067690849304;g[Ae>>2]=(+g[qd>>2]+ +g[vd>>2])*.7071067690849304;g[ce>>2]=+g[rd>>2]-+g[ud>>2];g[de>>2]=+g[pd>>2]+ +g[od>>2];g[ee>>2]=(+g[ce>>2]-+g[de>>2])*.7071067690849304;g[xe>>2]=(+g[de>>2]+ +g[ce>>2])*.7071067690849304;g[ia>>2]=+g[wi>>2]+ +g[ha>>2];g[ib>>2]=+g[F>>2]+ +g[Ja>>2];g[jb>>2]=+g[ia>>2]+ +g[ib>>2];g[xh>>2]=+g[ia>>2]-+g[ib>>2];g[Eg>>2]=+g[eh>>2]+ +g[fh>>2];g[Lg>>2]=+g[Fg>>2]+ +g[Kg>>2];g[Mg>>2]=+g[Eg>>2]+ +g[Lg>>2];g[Og>>2]=+g[Lg>>2]-+g[Eg>>2];g[ab>>2]=+g[Gb>>2]+ +g[$a>>2];g[Qc>>2]=+g[yc>>2]+ +g[Pc>>2];g[Tb>>2]=+g[ab>>2]+ +g[Qc>>2];g[Ng>>2]=+g[Qc>>2]-+g[ab>>2];g[yh>>2]=+g[jh>>2]+ +g[kh>>2];g[zh>>2]=+g[oh>>2]+ +g[ph>>2];g[Cg>>2]=+g[yh>>2]-+g[zh>>2];g[Dg>>2]=+g[yh>>2]+ +g[zh>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[jb>>2]-+g[Tb>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Mg>>2]-+g[Dg>>2];g[c[k>>2]>>2]=+g[jb>>2]+ +g[Tb>>2];g[c[l>>2]>>2]=+g[Dg>>2]+ +g[Mg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[xh>>2]-+g[Cg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Og>>2]-+g[Ng>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[xh>>2]+ +g[Cg>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Ng>>2]+ +g[Og>>2];g[dh>>2]=+g[wi>>2]-+g[ha>>2];g[gh>>2]=+g[eh>>2]-+g[fh>>2];g[hh>>2]=+g[dh>>2]+ +g[gh>>2];g[th>>2]=+g[dh>>2]-+g[gh>>2];g[Qg>>2]=+g[Ja>>2]-+g[F>>2];g[Rg>>2]=+g[Kg>>2]-+g[Fg>>2];g[Sg>>2]=+g[Qg>>2]+ +g[Rg>>2];g[Ug>>2]=+g[Rg>>2]-+g[Qg>>2];g[ih>>2]=+g[Gb>>2]-+g[$a>>2];g[lh>>2]=+g[jh>>2]-+g[kh>>2];g[mh>>2]=+g[ih>>2]+ +g[lh>>2];g[uh>>2]=+g[lh>>2]-+g[ih>>2];g[nh>>2]=+g[yc>>2]-+g[Pc>>2];g[qh>>2]=+g[oh>>2]-+g[ph>>2];g[rh>>2]=+g[nh>>2]-+g[qh>>2];g[vh>>2]=+g[nh>>2]+ +g[qh>>2];g[sh>>2]=(+g[mh>>2]+ +g[rh>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[hh>>2]-+g[sh>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[hh>>2]+ +g[sh>>2];g[Pg>>2]=(+g[uh>>2]+ +g[vh>>2])*.7071067690849304;g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Pg>>2]+ +g[Sg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[Sg>>2]-+g[Pg>>2];g[wh>>2]=(+g[uh>>2]-+g[vh>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[th>>2]-+g[wh>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[th>>2]+ +g[wh>>2];g[Tg>>2]=(+g[rh>>2]-+g[mh>>2])*.7071067690849304;g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Tg>>2]+ +g[Ug>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[Ug>>2]-+g[Tg>>2];g[ag>>2]=+g[Yf>>2]-+g[$f>>2];g[Zg>>2]=+g[Xg>>2]+ +g[Yg>>2];g[Eh>>2]=+g[Yg>>2]-+g[Xg>>2];g[Qf>>2]=+g[Yf>>2]+ +g[$f>>2];g[fg>>2]=+g[dg>>2]-+g[eg>>2];g[kg>>2]=+g[gg>>2]+ +g[jg>>2];g[lg>>2]=(+g[fg>>2]-+g[kg>>2])*.7071067690849304;g[Wg>>2]=(+g[fg>>2]+ +g[kg>>2])*.7071067690849304;g[xg>>2]=+g[Af>>2]+ +g[Df>>2];g[yg>>2]=+g[Hf>>2]+ +g[If>>2];g[zg>>2]=+g[xg>>2]*.9238795042037964-+g[yg>>2]*.3826834261417389;g[bh>>2]=+g[yg>>2]*.9238795042037964+ +g[xg>>2]*.3826834261417389;g[Rf>>2]=+g[eg>>2]+ +g[dg>>2];g[Sf>>2]=+g[gg>>2]-+g[jg>>2];g[sg>>2]=(+g[Rf>>2]+ +g[Sf>>2])*.7071067690849304;g[Dh>>2]=(+g[Sf>>2]-+g[Rf>>2])*.7071067690849304;g[tf>>2]=+g[pg>>2]-+g[qg>>2];g[yf>>2]=+g[uf>>2]-+g[xf>>2];g[zf>>2]=+g[tf>>2]*.9238795042037964+ +g[yf>>2]*.3826834261417389;g[Nf>>2]=+g[tf>>2]*.3826834261417389-+g[yf>>2]*.9238795042037964;g[ug>>2]=+g[pg>>2]+ +g[qg>>2];g[vg>>2]=+g[uf>>2]+ +g[xf>>2];g[wg>>2]=+g[ug>>2]*.3826834261417389+ +g[vg>>2]*.9238795042037964;g[ah>>2]=+g[ug>>2]*.9238795042037964-+g[vg>>2]*.3826834261417389;g[Ef>>2]=+g[Af>>2]-+g[Df>>2];g[Jf>>2]=+g[Hf>>2]-+g[If>>2];g[Kf>>2]=+g[Ef>>2]*.3826834261417389-+g[Jf>>2]*.9238795042037964;g[Of>>2]=+g[Jf>>2]*.3826834261417389+ +g[Ef>>2]*.9238795042037964;g[mg>>2]=+g[ag>>2]+ +g[lg>>2];g[Lf>>2]=+g[zf>>2]+ +g[Kf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[mg>>2]-+g[Lf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[mg>>2]+ +g[Lf>>2];g[Ch>>2]=+g[Nf>>2]+ +g[Of>>2];g[Fh>>2]=+g[Dh>>2]+ +g[Eh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Ch>>2]+ +g[Fh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[Fh>>2]-+g[Ch>>2];g[Mf>>2]=+g[ag>>2]-+g[lg>>2];g[Pf>>2]=+g[Nf>>2]-+g[Of>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[Mf>>2]-+g[Pf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Mf>>2]+ +g[Pf>>2];g[Gh>>2]=+g[Kf>>2]-+g[zf>>2];g[Hh>>2]=+g[Eh>>2]-+g[Dh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Gh>>2]+ +g[Hh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[Hh>>2]-+g[Gh>>2];g[tg>>2]=+g[Qf>>2]+ +g[sg>>2];g[Ag>>2]=+g[wg>>2]+ +g[zg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[tg>>2]-+g[Ag>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[tg>>2]+ +g[Ag>>2];g[Vg>>2]=+g[ah>>2]+ +g[bh>>2];g[_g>>2]=+g[Wg>>2]+ +g[Zg>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Vg>>2]+ +g[_g>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[_g>>2]-+g[Vg>>2];g[Bg>>2]=+g[Qf>>2]-+g[sg>>2];g[ch>>2]=+g[ah>>2]-+g[bh>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[Bg>>2]-+g[ch>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Bg>>2]+ +g[ch>>2];g[$g>>2]=+g[zg>>2]-+g[wg>>2];g[Bh>>2]=+g[Zg>>2]-+g[Wg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[$g>>2]+ +g[Bh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[Bh>>2]-+g[$g>>2];g[hc>>2]=(+g[bc>>2]-+g[gc>>2])*.7071067690849304;g[ic>>2]=+g[Yb>>2]-+g[hc>>2];g[Me>>2]=+g[Yb>>2]+ +g[hc>>2];g[$h>>2]=(+g[cf>>2]-+g[bf>>2])*.7071067690849304;g[bi>>2]=+g[$h>>2]+ +g[ai>>2];g[hi>>2]=+g[ai>>2]-+g[$h>>2];g[Uc>>2]=+g[nc>>2]*.3826834261417389-+g[Tc>>2]*.9238795042037964;g[Dd>>2]=+g[Zc>>2]*.3826834261417389+ +g[Cd>>2]*.9238795042037964;g[Ed>>2]=+g[Uc>>2]-+g[Dd>>2];g[_h>>2]=+g[Uc>>2]+ +g[Dd>>2];g[Ue>>2]=+g[ld>>2]+ +g[wd>>2];g[Ve>>2]=+g[be>>2]+ +g[ee>>2];g[We>>2]=+g[Ue>>2]*.8314695954322815-+g[Ve>>2]*.5555702447891235;g[_e>>2]=+g[Ve>>2]*.8314695954322815+ +g[Ue>>2]*.5555702447891235;g[Wd>>2]=+g[Kd>>2]-+g[Vd>>2];g[fd>>2]=+g[bd>>2]-+g[ed>>2];g[gd>>2]=+g[Wd>>2]*.9807852506637573+ +g[fd>>2]*.19509032368659973;g[Je>>2]=+g[Wd>>2]*.19509032368659973-+g[fd>>2]*.9807852506637573;g[Ne>>2]=+g[nc>>2]*.9238795042037964+ +g[Tc>>2]*.3826834261417389;g[Oe>>2]=+g[Cd>>2]*.3826834261417389-+g[Zc>>2]*.9238795042037964;g[Pe>>2]=+g[Ne>>2]+ +g[Oe>>2];g[gi>>2]=+g[Oe>>2]-+g[Ne>>2];g[Re>>2]=+g[Kd>>2]+ +g[Vd>>2];g[Se>>2]=+g[bd>>2]+ +g[ed>>2];g[Te>>2]=+g[Re>>2]*.5555702447891235+ +g[Se>>2]*.8314695954322815;g[Ze>>2]=+g[Re>>2]*.8314695954322815-+g[Se>>2]*.5555702447891235;g[xd>>2]=+g[ld>>2]-+g[wd>>2];g[fe>>2]=+g[be>>2]-+g[ee>>2];g[ge>>2]=+g[xd>>2]*.19509032368659973-+g[fe>>2]*.9807852506637573;g[Ke>>2]=+g[fe>>2]*.19509032368659973+ +g[xd>>2]*.9807852506637573;g[Fd>>2]=+g[ic>>2]+ +g[Ed>>2];g[he>>2]=+g[gd>>2]+ +g[ge>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[Fd>>2]-+g[he>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Fd>>2]+ +g[he>>2];g[fi>>2]=+g[Je>>2]+ +g[Ke>>2];g[ii>>2]=+g[gi>>2]+ +g[hi>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[fi>>2]+ +g[ii>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[ii>>2]-+g[fi>>2];g[ie>>2]=+g[ic>>2]-+g[Ed>>2];g[Le>>2]=+g[Je>>2]-+g[Ke>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[ie>>2]-+g[Le>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[ie>>2]+ +g[Le>>2];g[ji>>2]=+g[ge>>2]-+g[gd>>2];g[Lh>>2]=+g[hi>>2]-+g[gi>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[ji>>2]+ +g[Lh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[Lh>>2]-+g[ji>>2];g[Qe>>2]=+g[Me>>2]+ +g[Pe>>2];g[Xe>>2]=+g[Te>>2]+ +g[We>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Qe>>2]-+g[Xe>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Qe>>2]+ +g[Xe>>2];g[Zh>>2]=+g[Ze>>2]+ +g[_e>>2];g[ci>>2]=+g[_h>>2]+ +g[bi>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Zh>>2]+ +g[ci>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[ci>>2]-+g[Zh>>2];g[Ye>>2]=+g[Me>>2]-+g[Pe>>2];g[$e>>2]=+g[Ze>>2]-+g[_e>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[Ye>>2]-+g[$e>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Ye>>2]+ +g[$e>>2];g[di>>2]=+g[We>>2]-+g[Te>>2];g[ei>>2]=+g[bi>>2]-+g[_h>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[di>>2]+ +g[ei>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[ei>>2]-+g[di>>2];g[df>>2]=(+g[bf>>2]+ +g[cf>>2])*.7071067690849304;g[ef>>2]=+g[af>>2]-+g[df>>2];g[Ie>>2]=+g[af>>2]+ +g[df>>2];g[Kh>>2]=(+g[bc>>2]+ +g[gc>>2])*.7071067690849304;g[Ph>>2]=+g[Kh>>2]+ +g[Oh>>2];g[Vh>>2]=+g[Oh>>2]-+g[Kh>>2];g[je>>2]=+g[ff>>2]*.9238795042037964-+g[gf>>2]*.3826834261417389;g[me>>2]=+g[ke>>2]*.9238795042037964+ +g[le>>2]*.3826834261417389;g[ne>>2]=+g[je>>2]-+g[me>>2];g[Jh>>2]=+g[je>>2]+ +g[me>>2];g[qf>>2]=+g[we>>2]+ +g[xe>>2];g[rf>>2]=+g[ze>>2]+ +g[Ae>>2];g[sf>>2]=+g[qf>>2]*.9807852506637573-+g[rf>>2]*.19509032368659973;g[Wf>>2]=+g[qf>>2]*.19509032368659973+ +g[rf>>2]*.9807852506637573;g[re>>2]=+g[pe>>2]-+g[qe>>2];g[ue>>2]=+g[se>>2]-+g[te>>2];g[ve>>2]=+g[re>>2]*.5555702447891235+ +g[ue>>2]*.8314695954322815;g[Fe>>2]=+g[ue>>2]*.5555702447891235-+g[re>>2]*.8314695954322815;g[jf>>2]=+g[ff>>2]*.3826834261417389+ +g[gf>>2]*.9238795042037964;g[kf>>2]=+g[le>>2]*.9238795042037964-+g[ke>>2]*.3826834261417389;g[lf>>2]=+g[jf>>2]+ +g[kf>>2];g[Uh>>2]=+g[kf>>2]-+g[jf>>2];g[nf>>2]=+g[pe>>2]+ +g[qe>>2];g[of>>2]=+g[se>>2]+ +g[te>>2];g[pf>>2]=+g[nf>>2]*.9807852506637573+ +g[of>>2]*.19509032368659973;g[Vf>>2]=+g[of>>2]*.9807852506637573-+g[nf>>2]*.19509032368659973;g[ye>>2]=+g[we>>2]-+g[xe>>2];g[Be>>2]=+g[ze>>2]-+g[Ae>>2];g[Ce>>2]=+g[ye>>2]*.5555702447891235-+g[Be>>2]*.8314695954322815;g[Ge>>2]=+g[ye>>2]*.8314695954322815+ +g[Be>>2]*.5555702447891235;g[oe>>2]=+g[ef>>2]+ +g[ne>>2];g[De>>2]=+g[ve>>2]+ +g[Ce>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[oe>>2]-+g[De>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[oe>>2]+ +g[De>>2];g[Th>>2]=+g[Fe>>2]+ +g[Ge>>2];g[Wh>>2]=+g[Uh>>2]+ +g[Vh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Th>>2]+ +g[Wh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[Wh>>2]-+g[Th>>2];g[Ee>>2]=+g[ef>>2]-+g[ne>>2];g[He>>2]=+g[Fe>>2]-+g[Ge>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[Ee>>2]-+g[He>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Ee>>2]+ +g[He>>2];g[Xh>>2]=+g[Ce>>2]-+g[ve>>2];g[Yh>>2]=+g[Vh>>2]-+g[Uh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Xh>>2]+ +g[Yh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[Yh>>2]-+g[Xh>>2];g[mf>>2]=+g[Ie>>2]+ +g[lf>>2];g[Tf>>2]=+g[pf>>2]+ +g[sf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[mf>>2]-+g[Tf>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[mf>>2]+ +g[Tf>>2];g[Ih>>2]=+g[Vf>>2]+ +g[Wf>>2];g[Qh>>2]=+g[Jh>>2]+ +g[Ph>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[Ih>>2]+ +g[Qh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[Qh>>2]-+g[Ih>>2];g[Uf>>2]=+g[Ie>>2]-+g[lf>>2];g[Xf>>2]=+g[Vf>>2]-+g[Wf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[Uf>>2]-+g[Xf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Uf>>2]+ +g[Xf>>2];g[Rh>>2]=+g[sf>>2]-+g[pf>>2];g[Sh>>2]=+g[Ph>>2]-+g[Jh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Rh>>2]+ +g[Sh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[Sh>>2]-+g[Rh>>2];c[jj>>2]=(c[jj>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+32;c[n>>2]=c[n>>2]^c[2998]}i=kj;return}function Pj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,22,2952);i=b;return}function Qj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0;T=i;i=i+144|0;k=T+140|0;l=T+136|0;m=T+132|0;n=T+128|0;U=T+124|0;o=T+120|0;p=T+116|0;S=T+112|0;r=T+108|0;t=T+104|0;s=T+100|0;u=T+96|0;v=T+92|0;x=T+88|0;q=T+84|0;O=T+80|0;z=T+76|0;N=T+72|0;D=T+68|0;J=T+64|0;G=T+60|0;K=T+56|0;w=T+52|0;y=T+48|0;B=T+44|0;C=T+40|0;E=T+36|0;F=T+32|0;A=T+28|0;H=T+24|0;M=T+20|0;P=T+16|0;I=T+12|0;L=T+8|0;Q=T+4|0;R=T;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[U>>2]=f;c[o>>2]=h;c[p>>2]=j;c[S>>2]=c[U>>2];c[m>>2]=(c[m>>2]|0)+(c[U>>2]<<2<<2);while(1){if((c[S>>2]|0)>=(c[o>>2]|0))break;g[r>>2]=+g[c[m>>2]>>2];g[t>>2]=+g[(c[m>>2]|0)+4>>2];g[s>>2]=+g[(c[m>>2]|0)+8>>2];g[u>>2]=+g[(c[m>>2]|0)+12>>2];g[v>>2]=+g[r>>2]*+g[s>>2]+ +g[t>>2]*+g[u>>2];g[x>>2]=+g[r>>2]*+g[u>>2]-+g[t>>2]*+g[s>>2];g[q>>2]=+g[c[k>>2]>>2];g[O>>2]=+g[c[l>>2]>>2];g[w>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[y>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[z>>2]=+g[v>>2]*+g[w>>2]+ +g[x>>2]*+g[y>>2];g[N>>2]=+g[v>>2]*+g[y>>2]-+g[x>>2]*+g[w>>2];g[B>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[C>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[D>>2]=+g[r>>2]*+g[B>>2]+ +g[t>>2]*+g[C>>2];g[J>>2]=+g[r>>2]*+g[C>>2]-+g[t>>2]*+g[B>>2];g[E>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[F>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[G>>2]=+g[s>>2]*+g[E>>2]+ +g[u>>2]*+g[F>>2];g[K>>2]=+g[s>>2]*+g[F>>2]-+g[u>>2]*+g[E>>2];g[A>>2]=+g[q>>2]+ +g[z>>2];g[H>>2]=+g[D>>2]+ +g[G>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[A>>2]-+g[H>>2];g[c[k>>2]>>2]=+g[A>>2]+ +g[H>>2];g[M>>2]=+g[J>>2]+ +g[K>>2];g[P>>2]=+g[N>>2]+ +g[O>>2];g[c[l>>2]>>2]=+g[M>>2]+ +g[P>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[P>>2]-+g[M>>2];g[I>>2]=+g[q>>2]-+g[z>>2];g[L>>2]=+g[J>>2]-+g[K>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[I>>2]-+g[L>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[I>>2]+ +g[L>>2];g[Q>>2]=+g[O>>2]-+g[N>>2];g[R>>2]=+g[D>>2]-+g[G>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[Q>>2]-+g[R>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[R>>2]+ +g[Q>>2];c[S>>2]=(c[S>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+16}i=T;return}function Rj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,23,3016);i=b;return}function Sj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0;pa=i;i=i+256|0;k=pa+252|0;l=pa+248|0;m=pa+244|0;n=pa+240|0;qa=pa+236|0;o=pa+232|0;p=pa+228|0;oa=pa+208|0;r=pa+204|0;t=pa+200|0;w=pa+196|0;y=pa+192|0;R=pa+188|0;$=pa+184|0;V=pa+180|0;Z=pa+176|0;x=pa+172|0;U=pa+168|0;Q=pa+164|0;T=pa+160|0;q=pa+156|0;H=pa+152|0;ma=pa+148|0;A=pa+144|0;M=pa+140|0;L=pa+136|0;E=pa+132|0;F=pa+128|0;G=pa+124|0;Y=pa+120|0;fa=pa+116|0;ga=pa+112|0;v=pa+108|0;ka=pa+104|0;ea=pa+100|0;z=pa+96|0;X=pa+92|0;la=pa+88|0;ba=pa+84|0;na=pa+80|0;s=pa+76|0;u=pa+72|0;ca=pa+68|0;da=pa+64|0;S=pa+60|0;W=pa+56|0;_=pa+52|0;aa=pa+48|0;B=pa+44|0;D=pa+40|0;ja=pa+36|0;C=pa+32|0;ha=pa+28|0;ia=pa+24|0;N=pa+20|0;O=pa+16|0;K=pa+12|0;P=pa+8|0;I=pa+4|0;J=pa;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[qa>>2]=f;c[o>>2]=h;c[p>>2]=j;g[pa+224>>2]=.25;g[pa+220>>2]=.55901700258255;g[pa+216>>2]=.5877852439880371;g[pa+212>>2]=.9510565400123596;c[oa>>2]=c[qa>>2];c[m>>2]=(c[m>>2]|0)+(c[qa>>2]<<2<<2);while(1){if((c[oa>>2]|0)>=(c[o>>2]|0))break;g[r>>2]=+g[c[m>>2]>>2];g[t>>2]=+g[(c[m>>2]|0)+4>>2];g[w>>2]=+g[(c[m>>2]|0)+8>>2];g[y>>2]=+g[(c[m>>2]|0)+12>>2];g[x>>2]=+g[r>>2]*+g[w>>2];g[U>>2]=+g[t>>2]*+g[w>>2];g[Q>>2]=+g[t>>2]*+g[y>>2];g[T>>2]=+g[r>>2]*+g[y>>2];g[R>>2]=+g[x>>2]-+g[Q>>2];g[$>>2]=+g[T>>2]-+g[U>>2];g[V>>2]=+g[T>>2]+ +g[U>>2];g[Z>>2]=+g[x>>2]+ +g[Q>>2];g[q>>2]=+g[c[k>>2]>>2];g[H>>2]=+g[c[l>>2]>>2];g[s>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[u>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[v>>2]=+g[r>>2]*+g[s>>2]+ +g[t>>2]*+g[u>>2];g[ka>>2]=+g[r>>2]*+g[u>>2]-+g[t>>2]*+g[s>>2];g[ca>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[da>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ea>>2]=+g[w>>2]*+g[ca>>2]+ +g[y>>2]*+g[da>>2];g[z>>2]=+g[w>>2]*+g[da>>2]-+g[y>>2]*+g[ca>>2];g[S>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[W>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[X>>2]=+g[R>>2]*+g[S>>2]+ +g[V>>2]*+g[W>>2];g[la>>2]=+g[R>>2]*+g[W>>2]-+g[V>>2]*+g[S>>2];g[_>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[aa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[ba>>2]=+g[Z>>2]*+g[_>>2]+ +g[$>>2]*+g[aa>>2];g[na>>2]=+g[Z>>2]*+g[aa>>2]-+g[$>>2]*+g[_>>2];g[ma>>2]=+g[ka>>2]-+g[la>>2];g[A>>2]=+g[na>>2]-+g[z>>2];g[M>>2]=+g[ba>>2]-+g[ea>>2];g[L>>2]=+g[v>>2]-+g[X>>2];g[E>>2]=+g[ka>>2]+ +g[la>>2];g[F>>2]=+g[na>>2]+ +g[z>>2];g[G>>2]=+g[E>>2]+ +g[F>>2];g[Y>>2]=+g[v>>2]+ +g[X>>2];g[fa>>2]=+g[ba>>2]+ +g[ea>>2];g[ga>>2]=+g[Y>>2]+ +g[fa>>2];g[c[k>>2]>>2]=+g[q>>2]+ +g[ga>>2];g[c[l>>2]>>2]=+g[G>>2]+ +g[H>>2];g[B>>2]=+g[ma>>2]*.9510565400123596+ +g[A>>2]*.5877852439880371;g[D>>2]=+g[A>>2]*.9510565400123596-+g[ma>>2]*.5877852439880371;g[ha>>2]=(+g[Y>>2]-+g[fa>>2])*.55901700258255;g[ia>>2]=+g[q>>2]-+g[ga>>2]*.25;g[ja>>2]=+g[ha>>2]+ +g[ia>>2];g[C>>2]=+g[ia>>2]-+g[ha>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[ja>>2]-+g[B>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[C>>2]+ +g[D>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[ja>>2]+ +g[B>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[C>>2]-+g[D>>2];g[N>>2]=+g[L>>2]*.9510565400123596+ +g[M>>2]*.5877852439880371;g[O>>2]=+g[M>>2]*.9510565400123596-+g[L>>2]*.5877852439880371;g[I>>2]=(+g[E>>2]-+g[F>>2])*.55901700258255;g[J>>2]=+g[H>>2]-+g[G>>2]*.25;g[K>>2]=+g[I>>2]+ +g[J>>2];g[P>>2]=+g[J>>2]-+g[I>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[K>>2]-+g[N>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[P>>2]-+g[O>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[N>>2]+ +g[K>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[O>>2]+ +g[P>>2];c[oa>>2]=(c[oa>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+16}i=pa;return}function Tj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,24,3080);i=b;return}function Uj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0,Ii=0,Ji=0,Ki=0,Li=0,Mi=0,Ni=0,Oi=0,Pi=0,Qi=0,Ri=0,Si=0,Ti=0,Ui=0,Vi=0,Wi=0,Xi=0,Yi=0,Zi=0,_i=0,$i=0,aj=0,bj=0,cj=0,dj=0,ej=0,fj=0,gj=0,hj=0,ij=0,jj=0,kj=0,lj=0,mj=0,nj=0,oj=0,pj=0,qj=0,rj=0,sj=0,tj=0,uj=0,vj=0,wj=0,xj=0,yj=0,zj=0,Aj=0,Bj=0,Cj=0,Dj=0,Ej=0,Fj=0,Gj=0,Hj=0,Ij=0,Jj=0,Kj=0,Lj=0,Mj=0,Nj=0,Oj=0,Pj=0,Qj=0,Rj=0,Sj=0,Tj=0,Uj=0,Vj=0,Wj=0,Xj=0,Yj=0,Zj=0,_j=0,$j=0,ak=0,bk=0,ck=0,dk=0,ek=0,fk=0,gk=0,hk=0,ik=0,jk=0,kk=0,lk=0,mk=0,nk=0,ok=0,pk=0,qk=0,rk=0,sk=0,tk=0,uk=0,vk=0,wk=0,xk=0,yk=0,zk=0,Ak=0,Bk=0,Ck=0,Dk=0,Ek=0,Fk=0,Gk=0,Hk=0,Ik=0,Jk=0,Kk=0,Lk=0,Mk=0,Nk=0,Ok=0,Pk=0,Qk=0,Rk=0,Sk=0,Tk=0,Uk=0,Vk=0,Wk=0,Xk=0,Yk=0,Zk=0,_k=0,$k=0,al=0,bl=0,cl=0,dl=0,el=0,fl=0,gl=0,hl=0,il=0,jl=0,kl=0,ll=0,ml=0,nl=0,ol=0,pl=0,ql=0,rl=0,sl=0,tl=0,ul=0,vl=0,wl=0,xl=0,yl=0,zl=0,Al=0,Bl=0,Cl=0,Dl=0,El=0,Fl=0,Gl=0,Hl=0,Il=0,Jl=0,Kl=0,Ll=0,Ml=0,Nl=0,Ol=0,Pl=0,Ql=0,Rl=0,Sl=0,Tl=0,Ul=0,Vl=0,Wl=0,Xl=0,Yl=0,Zl=0,_l=0,$l=0,am=0,bm=0,cm=0,dm=0,em=0,fm=0,gm=0,hm=0,im=0,jm=0,km=0,lm=0,mm=0,nm=0,om=0,pm=0,qm=0,rm=0,sm=0,tm=0,um=0,vm=0,wm=0,xm=0,ym=0,zm=0,Am=0,Bm=0,Cm=0,Dm=0,Em=0,Fm=0,Gm=0,Hm=0,Im=0,Jm=0,Km=0,Lm=0,Mm=0,Nm=0,Om=0,Pm=0,Qm=0,Rm=0,Sm=0,Tm=0,Um=0,Vm=0,Wm=0,Xm=0,Ym=0,Zm=0,_m=0,$m=0,an=0,bn=0,cn=0,dn=0,en=0,fn=0,gn=0,hn=0,jn=0,kn=0,ln=0,mn=0,nn=0,on=0,pn=0,qn=0,rn=0,sn=0,tn=0,un=0,vn=0,wn=0,xn=0,yn=0,zn=0,An=0,Bn=0,Cn=0,Dn=0,En=0,Fn=0,Gn=0,Hn=0,In=0,Jn=0,Kn=0,Ln=0,Mn=0,Nn=0,On=0,Pn=0,Qn=0,Rn=0,Sn=0,Tn=0,Un=0,Vn=0,Wn=0,Xn=0,Yn=0,Zn=0,_n=0,$n=0,ao=0,bo=0,co=0,eo=0,fo=0,go=0,ho=0,io=0,jo=0,ko=0,lo=0,mo=0,no=0,oo=0,po=0,qo=0,ro=0,so=0,to=0,uo=0,vo=0,wo=0,xo=0,yo=0,zo=0,Ao=0,Bo=0,Co=0,Do=0,Eo=0,Fo=0,Go=0,Ho=0,Io=0,Jo=0,Ko=0,Lo=0,Mo=0,No=0,Oo=0,Po=0,Qo=0,Ro=0,So=0,To=0,Uo=0,Vo=0,Wo=0,Xo=0,Yo=0,Zo=0,_o=0,$o=0,ap=0,bp=0,cp=0,dp=0,ep=0,fp=0,gp=0,hp=0,ip=0,jp=0,kp=0,lp=0,mp=0,np=0,op=0,pp=0,qp=0,rp=0,sp=0,tp=0,up=0,vp=0,wp=0,xp=0,yp=0,zp=0,Ap=0,Bp=0,Cp=0,Dp=0,Ep=0,Fp=0,Gp=0,Hp=0,Ip=0,Jp=0,Kp=0,Lp=0,Mp=0,Np=0,Op=0,Pp=0,Qp=0,Rp=0,Sp=0,Tp=0,Up=0,Vp=0,Wp=0,Xp=0,Yp=0,Zp=0,_p=0,$p=0,aq=0,bq=0,cq=0,dq=0,eq=0,fq=0,gq=0,hq=0,iq=0,jq=0,kq=0,lq=0,mq=0,nq=0,oq=0,pq=0,qq=0,rq=0,sq=0,tq=0,uq=0,vq=0,wq=0,xq=0,yq=0,zq=0,Aq=0,Bq=0,Cq=0,Dq=0,Eq=0,Fq=0,Gq=0,Hq=0,Iq=0,Jq=0,Kq=0,Lq=0,Mq=0,Nq=0,Oq=0,Pq=0,Qq=0,Rq=0,Sq=0,Tq=0,Uq=0,Vq=0,Wq=0,Xq=0,Yq=0,Zq=0,_q=0,$q=0,ar=0,br=0,cr=0,dr=0,er=0,fr=0,gr=0,hr=0,ir=0,jr=0,kr=0,lr=0,mr=0,nr=0,or=0,pr=0,qr=0,rr=0,sr=0,tr=0,ur=0,vr=0,wr=0,xr=0,yr=0,zr=0,Ar=0,Br=0,Cr=0,Dr=0,Er=0,Fr=0,Gr=0,Hr=0,Ir=0,Jr=0,Kr=0,Lr=0,Mr=0,Nr=0,Or=0,Pr=0,Qr=0,Rr=0,Sr=0,Tr=0,Ur=0,Vr=0,Wr=0,Xr=0,Yr=0,Zr=0,_r=0,$r=0,as=0,bs=0,cs=0,ds=0,es=0,fs=0,gs=0,hs=0,is=0,js=0,ks=0,ls=0,ms=0,ns=0,os=0,ps=0,qs=0,rs=0,ss=0,ts=0,us=0,vs=0,ws=0,xs=0,ys=0,zs=0,As=0,Bs=0,Cs=0,Ds=0,Es=0,Fs=0,Gs=0,Hs=0,Is=0,Js=0,Ks=0,Ls=0,Ms=0,Ns=0,Os=0,Ps=0,Qs=0,Rs=0,Ss=0,Ts=0,Us=0,Vs=0,Ws=0,Xs=0,Ys=0,Zs=0,_s=0,$s=0,at=0,bt=0,ct=0,dt=0,et=0,ft=0,gt=0,ht=0,it=0,jt=0,kt=0,lt=0,mt=0,nt=0,ot=0,pt=0,qt=0,rt=0,st=0,tt=0,ut=0,vt=0,wt=0,xt=0,yt=0,zt=0,At=0,Bt=0,Ct=0,Dt=0,Et=0,Ft=0,Gt=0,Ht=0,It=0,Jt=0,Kt=0,Lt=0,Mt=0,Nt=0,Ot=0,Pt=0,Qt=0,Rt=0,St=0,Tt=0,Ut=0,Vt=0,Wt=0,Xt=0,Yt=0,Zt=0,_t=0,$t=0,au=0,bu=0,cu=0,du=0,eu=0,fu=0,gu=0,hu=0,iu=0,ju=0,ku=0,lu=0,mu=0,nu=0,ou=0,pu=0,qu=0,ru=0,su=0,tu=0,uu=0,vu=0,wu=0,xu=0,yu=0,zu=0,Au=0,Bu=0,Cu=0,Du=0,Eu=0,Fu=0,Gu=0,Hu=0,Iu=0,Ju=0,Ku=0,Lu=0,Mu=0,Nu=0,Ou=0,Pu=0,Qu=0,Ru=0,Su=0,Tu=0,Uu=0,Vu=0,Wu=0,Xu=0,Yu=0,Zu=0,_u=0,$u=0,av=0,bv=0,cv=0,dv=0,ev=0,fv=0,gv=0,hv=0,iv=0,jv=0,kv=0,lv=0,mv=0,nv=0,ov=0,pv=0,qv=0,rv=0,sv=0,tv=0,uv=0,vv=0,wv=0,xv=0,yv=0,zv=0,Av=0,Bv=0,Cv=0,Dv=0,Ev=0,Fv=0,Gv=0,Hv=0,Iv=0,Jv=0,Kv=0,Lv=0,Mv=0,Nv=0,Ov=0,Pv=0,Qv=0,Rv=0,Sv=0,Tv=0,Uv=0,Vv=0,Wv=0,Xv=0,Yv=0,Zv=0,_v=0,$v=0,aw=0,bw=0,cw=0,dw=0,ew=0,fw=0,gw=0,hw=0,iw=0,jw=0,kw=0,lw=0,mw=0,nw=0,ow=0,pw=0;ow=i;i=i+5040|0;k=ow+5032|0;l=ow+5028|0;m=ow+5024|0;n=ow+5020|0;pw=ow+5016|0;o=ow+5012|0;p=ow+5008|0;nw=ow+4944|0;za=ow+4940|0;_d=ow+4936|0;Ib=ow+4932|0;hf=ow+4928|0;ap=ow+4924|0;Ji=ow+4920|0;fk=ow+4916|0;vc=ow+4912|0;da=ow+4908|0;Sb=ow+4904|0;tc=ow+4900|0;jk=ow+4896|0;$=ow+4892|0;Qb=ow+4888|0;Ah=ow+4884|0;jw=ow+4880|0;Sn=ow+4876|0;lw=ow+4872|0;$v=ow+4868|0;Bs=ow+4864|0;D=ow+4860|0;bc=ow+4856|0;W=ow+4852|0;Oc=ow+4848|0;$b=ow+4844|0;H=ow+4840|0;S=ow+4836|0;Mc=ow+4832|0;ub=ow+4828|0;yb=ow+4824|0;He=ow+4820|0;lg=ow+4816|0;ce=ow+4812|0;ke=ow+4808|0;lf=ow+4804|0;ng=ow+4800|0;ge=ow+4796|0;me=ow+4792|0;db=ow+4788|0;fb=ow+4784|0;sr=ow+4780|0;Wj=ow+4776|0;pc=ow+4772|0;mc=ow+4768|0;Ac=ow+4764|0;_v=ow+4760|0;Sj=ow+4756|0;xc=ow+4752|0;ia=ow+4748|0;sa=ow+4744|0;lk=ow+4740|0;Ma=ow+4736|0;Hb=ow+4732|0;ta=ow+4728|0;pk=ow+4724|0;ga=ow+4720|0;bw=ow+4716|0;oa=ow+4712|0;ag=ow+4708|0;Af=ow+4704|0;fw=ow+4700|0;ka=ow+4696|0;cg=ow+4692|0;yf=ow+4688|0;Za=ow+4684|0;Bd=ow+4680|0;Xd=ow+4676|0;Re=ow+4672|0;$a=ow+4668|0;$c=ow+4664|0;Zd=ow+4660|0;Pe=ow+4656|0;Yj=ow+4652|0;Kd=ow+4648|0;Md=ow+4644|0;ak=ow+4640|0;Ba=ow+4636|0;fc=ow+4632|0;hc=ow+4628|0;Fa=ow+4624|0;mk=ow+4620|0;r=ow+4616|0;s=ow+4612|0;Bg=ow+4608|0;Qf=ow+4604|0;ve=ow+4600|0;Uf=ow+4596|0;ua=ow+4592|0;Jb=ow+4588|0;sf=ow+4584|0;zg=ow+4580|0;u=ow+4576|0;Pa=ow+4572|0;Na=ow+4568|0;gg=ow+4564|0;Hf=ow+4560|0;wa=ow+4556|0;ib=ow+4552|0;nb=ow+4548|0;Of=ow+4544|0;Ia=ow+4540|0;ig=ow+4536|0;pb=ow+4532|0;Jf=ow+4528|0;Lb=ow+4524|0;te=ow+4520|0;df=ow+4516|0;gd=ow+4512|0;qd=ow+4508|0;Dc=ow+4504|0;Hd=ow+4500|0;id=ow+4496|0;ff=ow+4492|0;Bc=ow+4488|0;Fd=ow+4484|0;Tc=ow+4480|0;sd=ow+4476|0;yd=ow+4472|0;Hc=ow+4468|0;Wb=ow+4464|0;Yb=ow+4460|0;_e=ow+4456|0;Jc=ow+4452|0;wd=ow+4448|0;qc=ow+4444|0;Ye=ow+4440|0;qk=ow+4436|0;Tu=ow+4432|0;kw=ow+4428|0;Uj=ow+4424|0;jq=ow+4420|0;Zv=ow+4416|0;mw=ow+4412|0;Vj=ow+4408|0;C=ow+4404|0;F=ow+4400|0;wb=ow+4396|0;tb=ow+4392|0;U=ow+4388|0;R=ow+4384|0;B=ow+4380|0;G=ow+4376|0;xb=ow+4372|0;sb=ow+4368|0;V=ow+4364|0;Q=ow+4360|0;Rc=ow+4356|0;ca=ow+4352|0;A=ow+4348|0;zl=ow+4344|0;ba=ow+4340|0;rg=ow+4336|0;Im=ow+4332|0;z=ow+4328|0;hk=ow+4324|0;ek=ow+4320|0;ik=ow+4316|0;dk=ow+4312|0;Fe=ow+4308|0;Ge=ow+4304|0;ee=ow+4300|0;fe=ow+4296|0;ae=ow+4292|0;be=ow+4288|0;jf=ow+4284|0;kf=ow+4280|0;Kt=ow+4276|0;dw=ow+4272|0;Xj=ow+4268|0;$j=ow+4264|0;nc=ow+4260|0;oc=ow+4256|0;kc=ow+4252|0;lc=ow+4248|0;yc=ow+4244|0;zc=ow+4240|0;aw=ow+4236|0;ew=ow+4232|0;Tj=ow+4228|0;_j=ow+4224|0;uc=ow+4220|0;wc=ow+4216|0;ja=ow+4212|0;na=ow+4208|0;_=ow+4204|0;Da=ow+4200|0;gk=ow+4196|0;kk=ow+4192|0;Ka=ow+4188|0;La=ow+4184|0;Fb=ow+4180|0;Gb=ow+4176|0;Aa=ow+4172|0;Ea=ow+4168|0;nk=ow+4164|0;ok=ow+4160|0;ha=ow+4156|0;ma=ow+4152|0;y=ow+4148|0;xo=ow+4144|0;Xu=ow+4140|0;Wv=ow+4136|0;lh=ow+4132|0;Tl=ow+4128|0;Ot=ow+4124|0;Qu=ow+4120|0;L=ow+4116|0;Pu=ow+4112|0;Ao=ow+4108|0;it=ow+4104|0;wh=ow+4100|0;Uu=ow+4096|0;Wl=ow+4092|0;Vv=ow+4088|0;mb=ow+4084|0;Dr=ow+4080|0;Kg=ow+4076|0;Zk=ow+4072|0;_l=ow+4068|0;Co=ow+4064|0;fp=ow+4060|0;Iq=ow+4056|0;Ta=ow+4052|0;Er=ow+4048|0;Vg=ow+4044|0;_k=ow+4040|0;bm=ow+4036|0;Do=ow+4032|0;kp=ow+4028|0;Jq=ow+4024|0;Pb=ow+4020|0;Ub=ow+4016|0;Kr=ow+4012|0;Hr=ow+4008|0;Ir=ow+4004|0;Jr=ow+4e3|0;Bh=ow+3996|0;em=ow+3992|0;Wp=ow+3988|0;Nq=ow+3984|0;ki=ow+3980|0;Jm=ow+3976|0;ti=ow+3972|0;fm=ow+3968|0;Rp=ow+3964|0;Mq=ow+3960|0;qi=ow+3956|0;hm=ow+3952|0;Xc=ow+3948|0;Qd=ow+3944|0;Mr=ow+3940|0;Nr=ow+3936|0;Or=ow+3932|0;Pr=ow+3928|0;Ai=ow+3924|0;Pm=ow+3920|0;fq=ow+3916|0;Qq=ow+3912|0;Nh=ow+3908|0;Nm=ow+3904|0;Wh=ow+3900|0;Qm=ow+3896|0;aq=ow+3892|0;Pq=ow+3888|0;Th=ow+3884|0;Mm=ow+3880|0;uf=ow+3876|0;Ds=ow+3872|0;er=ow+3868|0;bs=ow+3864|0;Ks=ow+3860|0;Us=ow+3856|0;fj=ow+3852|0;Nn=ow+3848|0;qj=ow+3844|0;_m=ow+3840|0;ul=ow+3836|0;On=ow+3832|0;pq=ow+3828|0;Ar=ow+3824|0;rl=ow+3820|0;Zm=ow+3816|0;Le=ow+3812|0;Yr=ow+3808|0;Gp=ow+3804|0;tr=ow+3800|0;Vr=ow+3796|0;Ps=ow+3792|0;di=ow+3788|0;un=ow+3784|0;Pi=ow+3780|0;Gn=ow+3776|0;Wi=ow+3772|0;vn=ow+3768|0;pp=ow+3764|0;wr=ow+3760|0;Rj=ow+3756|0;Fn=ow+3752|0;Ae=ow+3748|0;Wr=ow+3744|0;zp=ow+3740|0;Hp=ow+3736|0;$r=ow+3732|0;Qs=ow+3728|0;zj=ow+3724|0;Zi=ow+3720|0;Kj=ow+3716|0;Yi=ow+3712|0;Cn=ow+3708|0;In=ow+3704|0;up=ow+3700|0;Ip=ow+3696|0;zn=ow+3692|0;Jn=ow+3688|0;eh=ow+3684|0;Ls=ow+3680|0;Zq=ow+3676|0;fr=ow+3672|0;Gs=ow+3668|0;Vs=ow+3664|0;$k=ow+3660|0;xl=ow+3656|0;kl=ow+3652|0;wl=ow+3648|0;Wm=ow+3644|0;an=ow+3640|0;Uq=ow+3636|0;gr=ow+3632|0;Tm=ow+3628|0;bn=ow+3624|0;q=ow+3620|0;Mt=ow+3616|0;hw=ow+3612|0;Lt=ow+3608|0;ck=ow+3604|0;ih=ow+3600|0;w=ow+3596|0;jh=ow+3592|0;cw=ow+3588|0;gw=ow+3584|0;Zj=ow+3580|0;bk=ow+3576|0;t=ow+3572|0;v=ow+3568|0;iw=ow+3564|0;x=ow+3560|0;Vu=ow+3556|0;Wu=ow+3552|0;hh=ow+3548|0;kh=ow+3544|0;jt=ow+3540|0;Nt=ow+3536|0;fa=ow+3532|0;mh=ow+3528|0;qa=ow+3524|0;nh=ow+3520|0;oh=ow+3516|0;ph=ow+3512|0;ya=ow+3508|0;sh=ow+3504|0;J=ow+3500|0;th=ow+3496|0;rh=ow+3492|0;uh=ow+3488|0;aa=ow+3484|0;ea=ow+3480|0;la=ow+3476|0;pa=ow+3472|0;va=ow+3468|0;xa=ow+3464|0;E=ow+3460|0;I=ow+3456|0;ra=ow+3452|0;K=ow+3448|0;yo=ow+3444|0;zo=ow+3440|0;qh=ow+3436|0;vh=ow+3432|0;Ul=ow+3428|0;Vl=ow+3424|0;Z=ow+3420|0;bp=ow+3416|0;Cg=ow+3412|0;Fg=ow+3408|0;lb=ow+3404|0;cp=ow+3400|0;Dg=ow+3396|0;Ig=ow+3392|0;Eg=ow+3388|0;Jg=ow+3384|0;P=ow+3380|0;yh=ow+3376|0;Y=ow+3372|0;zh=ow+3368|0;N=ow+3364|0;O=ow+3360|0;T=ow+3356|0;X=ow+3352|0;Ha=ow+3348|0;Gg=ow+3344|0;kb=ow+3340|0;Hg=ow+3336|0;Ca=ow+3332|0;Ga=ow+3328|0;Ja=ow+3324|0;jb=ow+3320|0;Yl=ow+3316|0;Zl=ow+3312|0;dp=ow+3308|0;ep=ow+3304|0;Bb=ow+3300|0;hp=ow+3296|0;Ng=ow+3292|0;Qg=ow+3288|0;Sa=ow+3284|0;ip=ow+3280|0;Og=ow+3276|0;Tg=ow+3272|0;Pg=ow+3268|0;Ug=ow+3264|0;rb=ow+3260|0;Lg=ow+3256|0;Ab=ow+3252|0;Mg=ow+3248|0;ob=ow+3244|0;qb=ow+3240|0;vb=ow+3236|0;zb=ow+3232|0;Eb=ow+3228|0;Rg=ow+3224|0;Ra=ow+3220|0;Sg=ow+3216|0;Cb=ow+3212|0;Db=ow+3208|0;Oa=ow+3204|0;Qa=ow+3200|0;$l=ow+3196|0;am=ow+3192|0;gp=ow+3188|0;jp=ow+3184|0;Ya=ow+3180|0;Yg=ow+3176|0;bb=ow+3172|0;Zg=ow+3168|0;cb=ow+3164|0;Np=ow+3160|0;hb=ow+3156|0;ni=ow+3152|0;Nb=ow+3148|0;oi=ow+3144|0;Ob=ow+3140|0;Op=ow+3136|0;Gc=ow+3132|0;Tp=ow+3128|0;Hh=ow+3124|0;Kh=ow+3120|0;Tb=ow+3116|0;Up=ow+3112|0;Ch=ow+3108|0;Fh=ow+3104|0;Wa=ow+3100|0;Xa=ow+3096|0;_a=ow+3092|0;ab=ow+3088|0;eb=ow+3084|0;gb=ow+3080|0;Kb=ow+3076|0;Mb=ow+3072|0;sc=ow+3068|0;Ih=ow+3064|0;Fc=ow+3060|0;Jh=ow+3056|0;Rb=ow+3052|0;rc=ow+3048|0;Cc=ow+3044|0;Ec=ow+3040|0;Lc=ow+3036|0;Dh=ow+3032|0;Qc=ow+3028|0;Eh=ow+3024|0;Ic=ow+3020|0;Kc=ow+3016|0;Nc=ow+3012|0;Pc=ow+3008|0;_g=ow+3004|0;$g=ow+3e3|0;Sp=ow+2996|0;Vp=ow+2992|0;Gh=ow+2988|0;ji=ow+2984|0;ri=ow+2980|0;si=ow+2976|0;Pp=ow+2972|0;Qp=ow+2968|0;mi=ow+2964|0;pi=ow+2960|0;_b=ow+2956|0;wi=ow+2952|0;dc=ow+2948|0;xi=ow+2944|0;ec=ow+2940|0;Yp=ow+2936|0;jc=ow+2932|0;Qh=ow+2928|0;Vc=ow+2924|0;Rh=ow+2920|0;Wc=ow+2916|0;Zp=ow+2912|0;Ed=ow+2908|0;cq=ow+2904|0;Gi=ow+2900|0;Lh=ow+2896|0;Pd=ow+2892|0;dq=ow+2888|0;Bi=ow+2884|0;Ei=ow+2880|0;Xb=ow+2876|0;Zb=ow+2872|0;ac=ow+2868|0;cc=ow+2864|0;gc=ow+2860|0;ic=ow+2856|0;Sc=ow+2852|0;Uc=ow+2848|0;_c=ow+2844|0;Hi=ow+2840|0;Dd=ow+2836|0;Ii=ow+2832|0;Yc=ow+2828|0;Zc=ow+2824|0;Ad=ow+2820|0;Cd=ow+2816|0;Jd=ow+2812|0;Ci=ow+2808|0;Od=ow+2804|0;Di=ow+2800|0;Gd=ow+2796|0;Id=ow+2792|0;Ld=ow+2788|0;Nd=ow+2784|0;yi=ow+2780|0;zi=ow+2776|0;bq=ow+2772|0;eq=ow+2768|0;Fi=ow+2764|0;Mh=ow+2760|0;Uh=ow+2756|0;Vh=ow+2752|0;_p=ow+2748|0;$p=ow+2744|0;Ph=ow+2740|0;Sh=ow+2736|0;Ee=ow+2732|0;nl=ow+2728|0;nf=ow+2724|0;ol=ow+2720|0;of=ow+2716|0;ar=ow+2712|0;rf=ow+2708|0;cj=ow+2704|0;Wf=ow+2700|0;dj=ow+2696|0;Xf=ow+2692|0;br=ow+2688|0;fg=ow+2684|0;mq=ow+2680|0;ij=ow+2676|0;jj=ow+2672|0;qg=ow+2668|0;nq=ow+2664|0;lj=ow+2660|0;oj=ow+2656|0;Ce=ow+2652|0;De=ow+2648|0;Ie=ow+2644|0;mf=ow+2640|0;pf=ow+2636|0;qf=ow+2632|0;Tf=ow+2628|0;Vf=ow+2624|0;$f=ow+2620|0;gj=ow+2616|0;eg=ow+2612|0;hj=ow+2608|0;Zf=ow+2604|0;_f=ow+2600|0;bg=ow+2596|0;dg=ow+2592|0;kg=ow+2588|0;mj=ow+2584|0;pg=ow+2580|0;nj=ow+2576|0;hg=ow+2572|0;jg=ow+2568|0;mg=ow+2564|0;og=ow+2560|0;Yf=ow+2556|0;tf=ow+2552|0;cr=ow+2548|0;dr=ow+2544|0;Is=ow+2540|0;Js=ow+2536|0;bj=ow+2532|0;ej=ow+2528|0;kj=ow+2524|0;pj=ow+2520|0;sl=ow+2516|0;tl=ow+2512|0;lq=ow+2508|0;oq=ow+2504|0;pl=ow+2500|0;ql=ow+2496|0;Wd=ow+2492|0;$h=ow+2488|0;bd=ow+2484|0;ai=ow+2480|0;cd=ow+2476|0;lp=ow+2472|0;fd=ow+2468|0;Oj=ow+2464|0;kd=ow+2460|0;Pj=ow+2456|0;ld=ow+2452|0;mp=ow+2448|0;vd=ow+2444|0;Dp=ow+2440|0;Ki=ow+2436|0;Ni=ow+2432|0;Je=ow+2428|0;Ep=ow+2424|0;ei=ow+2420|0;hi=ow+2416|0;Ud=ow+2412|0;Vd=ow+2408|0;Yd=ow+2404|0;ad=ow+2400|0;dd=ow+2396|0;ed=ow+2392|0;hd=ow+2388|0;jd=ow+2384|0;pd=ow+2380|0;Li=ow+2376|0;ud=ow+2372|0;Mi=ow+2368|0;nd=ow+2364|0;od=ow+2360|0;rd=ow+2356|0;td=ow+2352|0;$d=ow+2348|0;fi=ow+2344|0;ie=ow+2340|0;gi=ow+2336|0;xd=ow+2332|0;zd=ow+2328|0;de=ow+2324|0;he=ow+2320|0;md=ow+2316|0;Ke=ow+2312|0;Cp=ow+2308|0;Fp=ow+2304|0;Tr=ow+2300|0;Ur=ow+2296|0;bi=ow+2292|0;ci=ow+2288|0;ii=ow+2284|0;Oi=ow+2280|0;Ui=ow+2276|0;Vi=ow+2272|0;np=ow+2268|0;op=ow+2264|0;Nj=ow+2260|0;Qj=ow+2256|0;Ue=ow+2252|0;wp=ow+2248|0;Cj=ow+2244|0;Fj=ow+2240|0;ye=ow+2236|0;sp=ow+2232|0;sj=ow+2228|0;xj=ow+2224|0;bf=ow+2220|0;xp=ow+2216|0;Dj=ow+2212|0;Ij=ow+2208|0;pe=ow+2204|0;rp=ow+2200|0;Ri=ow+2196|0;wj=ow+2192|0;Oe=ow+2188|0;Aj=ow+2184|0;Te=ow+2180|0;Bj=ow+2176|0;Me=ow+2172|0;Ne=ow+2168|0;Qe=ow+2164|0;Se=ow+2160|0;se=ow+2156|0;Si=ow+2152|0;xe=ow+2148|0;Ti=ow+2144|0;qe=ow+2140|0;re=ow+2136|0;ue=ow+2132|0;we=ow+2128|0;Xe=ow+2124|0;Gj=ow+2120|0;af=ow+2116|0;Hj=ow+2112|0;Ve=ow+2108|0;We=ow+2104|0;Ze=ow+2100|0;$e=ow+2096|0;je=ow+2092|0;uj=ow+2088|0;oe=ow+2084|0;vj=ow+2080|0;ef=ow+2076|0;gf=ow+2072|0;le=ow+2068|0;ne=ow+2064|0;cf=ow+2060|0;ze=ow+2056|0;vp=ow+2052|0;yp=ow+2048|0;Zr=ow+2044|0;_r=ow+2040|0;tj=ow+2036|0;yj=ow+2032|0;Ej=ow+2028|0;Jj=ow+2024|0;An=ow+2020|0;Bn=ow+2016|0;qp=ow+2012|0;tp=ow+2008|0;xn=ow+2004|0;yn=ow+2e3|0;Df=ow+1996|0;qq=ow+1992|0;tk=ow+1988|0;wk=ow+1984|0;ch=ow+1980|0;Xq=ow+1976|0;dl=ow+1972|0;il=ow+1968|0;Mf=ow+1964|0;rq=ow+1960|0;uk=ow+1956|0;zk=ow+1952|0;vg=ow+1948|0;Wq=ow+1944|0;cl=ow+1940|0;fl=ow+1936|0;xf=ow+1932|0;rk=ow+1928|0;Cf=ow+1924|0;sk=ow+1920|0;vf=ow+1916|0;wf=ow+1912|0;zf=ow+1908|0;Bf=ow+1904|0;yg=ow+1900|0;gl=ow+1896|0;bh=ow+1892|0;hl=ow+1888|0;wg=ow+1884|0;xg=ow+1880|0;Ag=ow+1876|0;ah=ow+1872|0;Gf=ow+1868|0;xk=ow+1864|0;Lf=ow+1860|0;yk=ow+1856|0;Ef=ow+1852|0;Ff=ow+1848|0;If=ow+1844|0;Kf=ow+1840|0;Sf=ow+1836|0;al=ow+1832|0;ug=ow+1828|0;bl=ow+1824|0;Pf=ow+1820|0;Rf=ow+1816|0;sg=ow+1812|0;tg=ow+1808|0;Nf=ow+1804|0;dh=ow+1800|0;Vq=ow+1796|0;Yq=ow+1792|0;Es=ow+1788|0;Fs=ow+1784|0;vk=ow+1780|0;Ak=ow+1776|0;el=ow+1772|0;jl=ow+1768|0;Um=ow+1764|0;Vm=ow+1760|0;sq=ow+1756|0;tq=ow+1752|0;Qn=ow+1748|0;Rn=ow+1744|0;Va=ow+1740|0;Ht=ow+1736|0;et=ow+1732|0;ft=ow+1728|0;Qt=ow+1724|0;uu=ow+1720|0;Sd=ow+1716|0;tu=ow+1712|0;gh=ow+1708|0;St=ow+1704|0;Ss=ow+1700|0;_s=ow+1696|0;Xs=ow+1692|0;$s=ow+1688|0;Ms=ow+1684|0;gt=ow+1680|0;M=ow+1676|0;Ua=ow+1672|0;ct=ow+1668|0;dt=ow+1664|0;ht=ow+1660|0;Pt=ow+1656|0;Vb=ow+1652|0;Rd=ow+1648|0;Be=ow+1644|0;fh=ow+1640|0;Os=ow+1636|0;Rs=ow+1632|0;Ts=ow+1628|0;Ws=ow+1624|0;It=ow+1620|0;Jt=ow+1616|0;Td=ow+1612|0;Rt=ow+1608|0;bt=ow+1604|0;Tt=ow+1600|0;Ns=ow+1596|0;Ys=ow+1592|0;Ut=ow+1588|0;vu=ow+1584|0;Zs=ow+1580|0;at=ow+1576|0;wu=ow+1572|0;xu=ow+1568|0;Gr=ow+1564|0;rt=ow+1560|0;Bt=ow+1556|0;Ft=ow+1552|0;Cu=ow+1548|0;Iu=ow+1544|0;Rr=ow+1540|0;zu=ow+1536|0;Cs=ow+1532|0;ot=ow+1528|0;ut=ow+1524|0;Hu=ow+1520|0;yt=ow+1516|0;Et=ow+1512|0;lt=ow+1508|0;pt=ow+1504|0;As=ow+1500|0;Fr=ow+1496|0;zt=ow+1492|0;At=ow+1488|0;Au=ow+1484|0;Bu=ow+1480|0;Lr=ow+1476|0;Qr=ow+1472|0;Xr=ow+1468|0;as=ow+1464|0;st=ow+1460|0;tt=ow+1456|0;wt=ow+1452|0;xt=ow+1448|0;Hs=ow+1444|0;kt=ow+1440|0;Sr=ow+1436|0;mt=ow+1432|0;Gu=ow+1428|0;Ju=ow+1424|0;nt=ow+1420|0;qt=ow+1416|0;Ku=ow+1412|0;Lu=ow+1408|0;vt=ow+1404|0;Ct=ow+1400|0;yu=ow+1396|0;Du=ow+1392|0;Dt=ow+1388|0;Gt=ow+1384|0;Eu=ow+1380|0;Fu=ow+1376|0;Mp=ow+1372|0;pr=ow+1368|0;hq=ow+1364|0;du=ow+1360|0;gu=ow+1356|0;mu=ow+1352|0;uq=ow+1348|0;lu=ow+1344|0;jr=ow+1340|0;Fq=ow+1336|0;nr=ow+1332|0;Bq=ow+1328|0;kq=ow+1324|0;Eq=ow+1320|0;mr=ow+1316|0;yq=ow+1312|0;Bo=ow+1308|0;Lp=ow+1304|0;qr=ow+1300|0;rr=ow+1296|0;Xp=ow+1292|0;gq=ow+1288|0;eu=ow+1284|0;fu=ow+1280|0;$q=ow+1276|0;zq=ow+1272|0;ir=ow+1268|0;Aq=ow+1264|0;_q=ow+1260|0;hr=ow+1256|0;Bp=ow+1252|0;wq=ow+1248|0;Kp=ow+1244|0;xq=ow+1240|0;Ap=ow+1236|0;Jp=ow+1232|0;iq=ow+1228|0;kr=ow+1224|0;ku=ow+1220|0;nu=ow+1216|0;lr=ow+1212|0;or=ow+1208|0;ou=ow+1204|0;pu=ow+1200|0;vq=ow+1196|0;Cq=ow+1192|0;cu=ow+1188|0;hu=ow+1184|0;Dq=ow+1180|0;Gq=ow+1176|0;iu=ow+1172|0;ju=ow+1168|0;Lq=ow+1164|0;ks=ow+1160|0;Sq=ow+1156|0;Nu=ow+1152|0;Su=ow+1148|0;_t=ow+1144|0;ns=ow+1140|0;Zt=ow+1136|0;es=ow+1132|0;ys=ow+1128|0;is=ow+1124|0;us=ow+1120|0;zr=ow+1116|0;xs=ow+1112|0;hs=ow+1108|0;rs=ow+1104|0;Hq=ow+1100|0;Kq=ow+1096|0;ls=ow+1092|0;ms=ow+1088|0;Oq=ow+1084|0;Rq=ow+1080|0;Ou=ow+1076|0;Ru=ow+1072|0;Cr=ow+1068|0;ss=ow+1064|0;ds=ow+1060|0;ts=ow+1056|0;Br=ow+1052|0;cs=ow+1048|0;vr=ow+1044|0;ps=ow+1040|0;yr=ow+1036|0;qs=ow+1032|0;ur=ow+1028|0;xr=ow+1024|0;Tq=ow+1020|0;fs=ow+1016|0;Yt=ow+1012|0;$t=ow+1008|0;gs=ow+1004|0;js=ow+1e3|0;au=ow+996|0;bu=ow+992|0;os=ow+988|0;vs=ow+984|0;Mu=ow+980|0;Vt=ow+976|0;ws=ow+972|0;zs=ow+968|0;Wt=ow+964|0;Xt=ow+960|0;Xg=ow+956|0;Ik=ow+952|0;pv=ow+948|0;vv=ow+944|0;Zh=ow+940|0;uv=ow+936|0;Lk=ow+932|0;mv=ow+928|0;aj=ow+924|0;Vk=ow+920|0;Fk=ow+916|0;Pk=ow+912|0;Ck=ow+908|0;Wk=ow+904|0;Gk=ow+900|0;Sk=ow+896|0;xh=ow+892|0;Wg=ow+888|0;nv=ow+884|0;ov=ow+880|0;vi=ow+876|0;Jk=ow+872|0;Yh=ow+868|0;Kk=ow+864|0;li=ow+860|0;ui=ow+856|0;Oh=ow+852|0;Xh=ow+848|0;Mj=ow+844|0;Nk=ow+840|0;$i=ow+836|0;Ok=ow+832|0;Qi=ow+828|0;Lj=ow+824|0;Xi=ow+820|0;_i=ow+816|0;ml=ow+812|0;Qk=ow+808|0;Bk=ow+804|0;Rk=ow+800|0;rj=ow+796|0;ll=ow+792|0;vl=ow+788|0;yl=ow+784|0;_h=ow+780|0;Dk=ow+776|0;tv=ow+772|0;wv=ow+768|0;Ek=ow+764|0;Hk=ow+760|0;xv=ow+756|0;yv=ow+752|0;Mk=ow+748|0;Tk=ow+744|0;lv=ow+740|0;qv=ow+736|0;Uk=ow+732|0;Xk=ow+728|0;rv=ow+724|0;sv=ow+720|0;dm=ow+716|0;ln=ow+712|0;Iv=ow+708|0;Ov=ow+704|0;sn=ow+700|0;Nv=ow+696|0;on=ow+692|0;Fv=ow+688|0;Mn=ow+684|0;Zn=ow+680|0;hn=ow+676|0;Tn=ow+672|0;en=ow+668|0;_n=ow+664|0;jn=ow+660|0;Wn=ow+656|0;Xl=ow+652|0;cm=ow+648|0;Gv=ow+644|0;Hv=ow+640|0;Lm=ow+636|0;mn=ow+632|0;Sm=ow+628|0;nn=ow+624|0;gm=ow+620|0;Km=ow+616|0;Om=ow+612|0;Rm=ow+608|0;En=ow+604|0;qn=ow+600|0;Ln=ow+596|0;rn=ow+592|0;wn=ow+588|0;Dn=ow+584|0;Hn=ow+580|0;Kn=ow+576|0;Ym=ow+572|0;Un=ow+568|0;dn=ow+564|0;Vn=ow+560|0;Pn=ow+556|0;Xm=ow+552|0;$m=ow+548|0;cn=ow+544|0;tn=ow+540|0;fn=ow+536|0;Mv=ow+532|0;Pv=ow+528|0;gn=ow+524|0;kn=ow+520|0;Qv=ow+516|0;Rv=ow+512|0;pn=ow+508|0;Xn=ow+504|0;Ev=ow+500|0;Jv=ow+496|0;Yn=ow+492|0;$n=ow+488|0;Kv=ow+484|0;Lv=ow+480|0;Fo=ow+476|0;ho=ow+472|0;Zu=ow+468|0;Av=ow+464|0;Mo=ow+460|0;zv=ow+456|0;ko=ow+452|0;ru=ow+448|0;Uo=ow+444|0;uo=ow+440|0;eo=ow+436|0;oo=ow+432|0;$o=ow+428|0;vo=ow+424|0;fo=ow+420|0;ro=ow+416|0;ao=ow+412|0;Eo=ow+408|0;su=ow+404|0;Yu=ow+400|0;Io=ow+396|0;io=ow+392|0;Lo=ow+388|0;jo=ow+384|0;Go=ow+380|0;Ho=ow+376|0;Jo=ow+372|0;Ko=ow+368|0;Qo=ow+364|0;mo=ow+360|0;To=ow+356|0;no=ow+352|0;Oo=ow+348|0;Po=ow+344|0;Ro=ow+340|0;So=ow+336|0;Xo=ow+332|0;po=ow+328|0;_o=ow+324|0;qo=ow+320|0;Vo=ow+316|0;Wo=ow+312|0;Yo=ow+308|0;Zo=ow+304|0;No=ow+300|0;bo=ow+296|0;bv=ow+292|0;Bv=ow+288|0;co=ow+284|0;go=ow+280|0;Cv=ow+276|0;Dv=ow+272|0;lo=ow+268|0;so=ow+264|0;qu=ow+260|0;_u=ow+256|0;to=ow+252|0;wo=ow+248|0;$u=ow+244|0;av=ow+240|0;Bl=ow+236|0;Bm=ow+232|0;Yv=ow+228|0;hv=ow+224|0;Il=ow+220|0;gv=ow+216|0;Em=ow+212|0;Tv=ow+208|0;om=ow+204|0;Ql=ow+200|0;ym=ow+196|0;Kl=ow+192|0;vm=ow+188|0;Rl=ow+184|0;zm=ow+180|0;Nl=ow+176|0;Yk=ow+172|0;Al=ow+168|0;Uv=ow+164|0;Xv=ow+160|0;El=ow+156|0;Cm=ow+152|0;Hl=ow+148|0;Dm=ow+144|0;Cl=ow+140|0;Dl=ow+136|0;Fl=ow+132|0;Gl=ow+128|0;km=ow+124|0;Gm=ow+120|0;nm=ow+116|0;Hm=ow+112|0;im=ow+108|0;jm=ow+104|0;lm=ow+100|0;mm=ow+96|0;rm=ow+92|0;Ll=ow+88|0;um=ow+84|0;Ml=ow+80|0;pm=ow+76|0;qm=ow+72|0;sm=ow+68|0;tm=ow+64|0;Jl=ow+60|0;wm=ow+56|0;fv=ow+52|0;iv=ow+48|0;xm=ow+44|0;Am=ow+40|0;jv=ow+36|0;kv=ow+32|0;Fm=ow+28|0;Ol=ow+24|0;Sv=ow+20|0;cv=ow+16|0;Pl=ow+12|0;Sl=ow+8|0;dv=ow+4|0;ev=ow;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[pw>>2]=f;c[o>>2]=h;c[p>>2]=j;g[ow+5004>>2]=.4713967442512512;g[ow+5e3>>2]=.8819212913513184;g[ow+4996>>2]=.290284663438797;g[ow+4992>>2]=.9569403529167175;g[ow+4988>>2]=.6343932747840881;g[ow+4984>>2]=.7730104327201843;g[ow+4980>>2]=.0980171412229538;g[ow+4976>>2]=.9951847195625305;g[ow+4972>>2]=.5555702447891235;g[ow+4968>>2]=.8314695954322815;g[ow+4964>>2]=.9807852506637573;g[ow+4960>>2]=.19509032368659973;g[ow+4956>>2]=.9238795042037964;g[ow+4952>>2]=.3826834261417389;g[ow+4948>>2]=.7071067690849304;c[nw>>2]=c[pw>>2];c[m>>2]=(c[m>>2]|0)+((c[pw>>2]|0)*10<<2);while(1){if((c[nw>>2]|0)>=(c[o>>2]|0))break;g[za>>2]=+g[c[m>>2]>>2];g[_d>>2]=+g[(c[m>>2]|0)+4>>2];g[Ib>>2]=+g[(c[m>>2]|0)+8>>2];g[hf>>2]=+g[(c[m>>2]|0)+12>>2];g[ap>>2]=+g[(c[m>>2]|0)+20>>2];g[Ji>>2]=+g[(c[m>>2]|0)+16>>2];g[Rc>>2]=+g[za>>2]*+g[Ib>>2];g[ca>>2]=+g[_d>>2]*+g[Ji>>2];g[A>>2]=+g[_d>>2]*+g[ap>>2];g[zl>>2]=+g[za>>2]*+g[hf>>2];g[ba>>2]=+g[za>>2]*+g[ap>>2];g[rg>>2]=+g[_d>>2]*+g[hf>>2];g[Im>>2]=+g[_d>>2]*+g[Ib>>2];g[z>>2]=+g[za>>2]*+g[Ji>>2];g[hk>>2]=+g[Ib>>2]*+g[ap>>2];g[ek>>2]=+g[hf>>2]*+g[ap>>2];g[ik>>2]=+g[hf>>2]*+g[Ji>>2];g[dk>>2]=+g[Ib>>2]*+g[Ji>>2];g[fk>>2]=+g[dk>>2]-+g[ek>>2];g[vc>>2]=+g[hk>>2]-+g[ik>>2];g[da>>2]=+g[ba>>2]-+g[ca>>2];g[Sb>>2]=+g[ba>>2]+ +g[ca>>2];g[tc>>2]=+g[dk>>2]+ +g[ek>>2];g[jk>>2]=+g[hk>>2]+ +g[ik>>2];g[$>>2]=+g[z>>2]+ +g[A>>2];g[Qb>>2]=+g[z>>2]-+g[A>>2];g[Ah>>2]=+g[Rc>>2]-+g[rg>>2];g[qk>>2]=+g[Ah>>2]*+g[Ji>>2];g[Tu>>2]=+g[Ah>>2]*+g[ap>>2];g[jw>>2]=+g[Rc>>2]+ +g[rg>>2];g[kw>>2]=+g[jw>>2]*+g[Ji>>2];g[Uj>>2]=+g[jw>>2]*+g[ap>>2];g[Sn>>2]=+g[zl>>2]+ +g[Im>>2];g[jq>>2]=+g[Sn>>2]*+g[ap>>2];g[Zv>>2]=+g[Sn>>2]*+g[Ji>>2];g[lw>>2]=+g[zl>>2]-+g[Im>>2];g[mw>>2]=+g[lw>>2]*+g[ap>>2];g[Vj>>2]=+g[lw>>2]*+g[Ji>>2];g[$v>>2]=+g[(c[m>>2]|0)+28>>2];g[C>>2]=+g[hf>>2]*+g[$v>>2];g[F>>2]=+g[Ib>>2]*+g[$v>>2];g[wb>>2]=+g[za>>2]*+g[$v>>2];g[tb>>2]=+g[_d>>2]*+g[$v>>2];g[U>>2]=+g[Ji>>2]*+g[$v>>2];g[R>>2]=+g[ap>>2]*+g[$v>>2];g[Bs>>2]=+g[(c[m>>2]|0)+24>>2];g[B>>2]=+g[Ib>>2]*+g[Bs>>2];g[G>>2]=+g[hf>>2]*+g[Bs>>2];g[xb>>2]=+g[_d>>2]*+g[Bs>>2];g[sb>>2]=+g[za>>2]*+g[Bs>>2];g[V>>2]=+g[ap>>2]*+g[Bs>>2];g[Q>>2]=+g[Ji>>2]*+g[Bs>>2];g[D>>2]=+g[B>>2]+ +g[C>>2];g[bc>>2]=+g[F>>2]+ +g[G>>2];g[W>>2]=+g[U>>2]+ +g[V>>2];g[Oc>>2]=+g[wb>>2]-+g[xb>>2];g[$b>>2]=+g[B>>2]-+g[C>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[S>>2]=+g[Q>>2]-+g[R>>2];g[Mc>>2]=+g[sb>>2]+ +g[tb>>2];g[ub>>2]=+g[sb>>2]-+g[tb>>2];g[yb>>2]=+g[wb>>2]+ +g[xb>>2];g[Fe>>2]=+g[Ah>>2]*+g[Bs>>2];g[Ge>>2]=+g[Sn>>2]*+g[$v>>2];g[He>>2]=+g[Fe>>2]-+g[Ge>>2];g[lg>>2]=+g[Fe>>2]+ +g[Ge>>2];g[ae>>2]=+g[jw>>2]*+g[Bs>>2];g[be>>2]=+g[lw>>2]*+g[$v>>2];g[ce>>2]=+g[ae>>2]+ +g[be>>2];g[ke>>2]=+g[ae>>2]-+g[be>>2];g[jf>>2]=+g[Ah>>2]*+g[$v>>2];g[kf>>2]=+g[Sn>>2]*+g[Bs>>2];g[lf>>2]=+g[jf>>2]+ +g[kf>>2];g[ng>>2]=+g[jf>>2]-+g[kf>>2];g[ee>>2]=+g[jw>>2]*+g[$v>>2];g[fe>>2]=+g[lw>>2]*+g[Bs>>2];g[ge>>2]=+g[ee>>2]-+g[fe>>2];g[me>>2]=+g[ee>>2]+ +g[fe>>2];g[db>>2]=+g[Q>>2]+ +g[R>>2];g[fb>>2]=+g[U>>2]-+g[V>>2];g[sr>>2]=+g[qk>>2]+ +g[jq>>2];g[Kt>>2]=+g[sr>>2]*+g[Bs>>2];g[dw>>2]=+g[sr>>2]*+g[$v>>2];g[Wj>>2]=+g[Uj>>2]+ +g[Vj>>2];g[Xj>>2]=+g[Wj>>2]*+g[$v>>2];g[$j>>2]=+g[Wj>>2]*+g[Bs>>2];g[nc>>2]=+g[Qb>>2]*+g[$v>>2];g[oc>>2]=+g[Sb>>2]*+g[Bs>>2];g[pc>>2]=+g[nc>>2]-+g[oc>>2];g[kc>>2]=+g[Qb>>2]*+g[Bs>>2];g[lc>>2]=+g[Sb>>2]*+g[$v>>2];g[mc>>2]=+g[kc>>2]+ +g[lc>>2];g[yc>>2]=+g[tc>>2]*+g[$v>>2];g[zc>>2]=+g[vc>>2]*+g[Bs>>2];g[Ac>>2]=+g[yc>>2]-+g[zc>>2];g[_v>>2]=+g[Tu>>2]-+g[Zv>>2];g[aw>>2]=+g[_v>>2]*+g[$v>>2];g[ew>>2]=+g[_v>>2]*+g[Bs>>2];g[Sj>>2]=+g[kw>>2]-+g[mw>>2];g[Tj>>2]=+g[Sj>>2]*+g[Bs>>2];g[_j>>2]=+g[Sj>>2]*+g[$v>>2];g[uc>>2]=+g[tc>>2]*+g[Bs>>2];g[wc>>2]=+g[vc>>2]*+g[$v>>2];g[xc>>2]=+g[uc>>2]+ +g[wc>>2];g[ia>>2]=+g[Tu>>2]+ +g[Zv>>2];g[ja>>2]=+g[ia>>2]*+g[$v>>2];g[na>>2]=+g[ia>>2]*+g[Bs>>2];g[sa>>2]=+g[kw>>2]+ +g[mw>>2];g[_>>2]=+g[sa>>2]*+g[Bs>>2];g[Da>>2]=+g[sa>>2]*+g[$v>>2];g[gk>>2]=+g[fk>>2]*+g[Bs>>2];g[kk>>2]=+g[jk>>2]*+g[$v>>2];g[lk>>2]=+g[gk>>2]+ +g[kk>>2];g[Ka>>2]=+g[$>>2]*+g[$v>>2];g[La>>2]=+g[da>>2]*+g[Bs>>2];g[Ma>>2]=+g[Ka>>2]-+g[La>>2];g[Fb>>2]=+g[$>>2]*+g[Bs>>2];g[Gb>>2]=+g[da>>2]*+g[$v>>2];g[Hb>>2]=+g[Fb>>2]+ +g[Gb>>2];g[ta>>2]=+g[Uj>>2]-+g[Vj>>2];g[Aa>>2]=+g[ta>>2]*+g[$v>>2];g[Ea>>2]=+g[ta>>2]*+g[Bs>>2];g[nk>>2]=+g[fk>>2]*+g[$v>>2];g[ok>>2]=+g[jk>>2]*+g[Bs>>2];g[pk>>2]=+g[nk>>2]-+g[ok>>2];g[ga>>2]=+g[qk>>2]-+g[jq>>2];g[ha>>2]=+g[ga>>2]*+g[Bs>>2];g[ma>>2]=+g[ga>>2]*+g[$v>>2];g[bw>>2]=+g[Kt>>2]-+g[aw>>2];g[oa>>2]=+g[ma>>2]+ +g[na>>2];g[ag>>2]=+g[gk>>2]-+g[kk>>2];g[Af>>2]=+g[Ka>>2]+ +g[La>>2];g[fw>>2]=+g[dw>>2]+ +g[ew>>2];g[ka>>2]=+g[ha>>2]-+g[ja>>2];g[cg>>2]=+g[nk>>2]+ +g[ok>>2];g[yf>>2]=+g[Fb>>2]-+g[Gb>>2];g[Za>>2]=+g[_>>2]-+g[Aa>>2];g[Bd>>2]=+g[_j>>2]+ +g[$j>>2];g[Xd>>2]=+g[uc>>2]-+g[wc>>2];g[Re>>2]=+g[nc>>2]+ +g[oc>>2];g[$a>>2]=+g[Da>>2]+ +g[Ea>>2];g[$c>>2]=+g[Tj>>2]-+g[Xj>>2];g[Zd>>2]=+g[yc>>2]+ +g[zc>>2];g[Pe>>2]=+g[kc>>2]-+g[lc>>2];g[Yj>>2]=+g[Tj>>2]+ +g[Xj>>2];g[Kd>>2]=+g[Kt>>2]+ +g[aw>>2];g[Md>>2]=+g[dw>>2]-+g[ew>>2];g[ak>>2]=+g[_j>>2]-+g[$j>>2];g[Ba>>2]=+g[_>>2]+ +g[Aa>>2];g[fc>>2]=+g[ha>>2]+ +g[ja>>2];g[hc>>2]=+g[ma>>2]-+g[na>>2];g[Fa>>2]=+g[Da>>2]-+g[Ea>>2];g[mk>>2]=+g[(c[m>>2]|0)+32>>2];g[r>>2]=+g[(c[m>>2]|0)+36>>2];g[s>>2]=+g[lk>>2]*+g[mk>>2]+ +g[pk>>2]*+g[r>>2];g[Bg>>2]=+g[Ba>>2]*+g[r>>2]-+g[Fa>>2]*+g[mk>>2];g[Qf>>2]=+g[Ah>>2]*+g[r>>2]-+g[Sn>>2]*+g[mk>>2];g[ve>>2]=+g[db>>2]*+g[r>>2]-+g[fb>>2]*+g[mk>>2];g[Uf>>2]=+g[Yj>>2]*+g[r>>2]-+g[ak>>2]*+g[mk>>2];g[ua>>2]=+g[sa>>2]*+g[mk>>2]+ +g[ta>>2]*+g[r>>2];g[Jb>>2]=+g[ga>>2]*+g[mk>>2]+ +g[ia>>2]*+g[r>>2];g[sf>>2]=+g[Yj>>2]*+g[mk>>2]+ +g[ak>>2]*+g[r>>2];g[zg>>2]=+g[Ba>>2]*+g[mk>>2]+ +g[Fa>>2]*+g[r>>2];g[u>>2]=+g[lk>>2]*+g[r>>2]-+g[pk>>2]*+g[mk>>2];g[Pa>>2]=+g[Hb>>2]*+g[r>>2]-+g[Ma>>2]*+g[mk>>2];g[Na>>2]=+g[Hb>>2]*+g[mk>>2]+ +g[Ma>>2]*+g[r>>2];g[gg>>2]=+g[$>>2]*+g[mk>>2]+ +g[da>>2]*+g[r>>2];g[Hf>>2]=+g[fk>>2]*+g[mk>>2]+ +g[jk>>2]*+g[r>>2];g[wa>>2]=+g[sa>>2]*+g[r>>2]-+g[ta>>2]*+g[mk>>2];g[ib>>2]=+g[Sj>>2]*+g[r>>2]-+g[Wj>>2]*+g[mk>>2];g[nb>>2]=+g[Ib>>2]*+g[mk>>2]+ +g[hf>>2]*+g[r>>2];g[Of>>2]=+g[Ah>>2]*+g[mk>>2]+ +g[Sn>>2]*+g[r>>2];g[Ia>>2]=+g[Sj>>2]*+g[mk>>2]+ +g[Wj>>2]*+g[r>>2];g[ig>>2]=+g[$>>2]*+g[r>>2]-+g[da>>2]*+g[mk>>2];g[pb>>2]=+g[Ib>>2]*+g[r>>2]-+g[hf>>2]*+g[mk>>2];g[Jf>>2]=+g[fk>>2]*+g[r>>2]-+g[jk>>2]*+g[mk>>2];g[Lb>>2]=+g[ga>>2]*+g[r>>2]-+g[ia>>2]*+g[mk>>2];g[te>>2]=+g[db>>2]*+g[mk>>2]+ +g[fb>>2]*+g[r>>2];g[df>>2]=+g[jw>>2]*+g[mk>>2]+ +g[lw>>2]*+g[r>>2];g[gd>>2]=+g[fc>>2]*+g[mk>>2]+ +g[hc>>2]*+g[r>>2];g[qd>>2]=+g[Kd>>2]*+g[mk>>2]+ +g[Md>>2]*+g[r>>2];g[Dc>>2]=+g[xc>>2]*+g[r>>2]-+g[Ac>>2]*+g[mk>>2];g[Hd>>2]=+g[Ji>>2]*+g[r>>2]-+g[ap>>2]*+g[mk>>2];g[id>>2]=+g[fc>>2]*+g[r>>2]-+g[hc>>2]*+g[mk>>2];g[ff>>2]=+g[jw>>2]*+g[r>>2]-+g[lw>>2]*+g[mk>>2];g[Bc>>2]=+g[xc>>2]*+g[mk>>2]+ +g[Ac>>2]*+g[r>>2];g[Fd>>2]=+g[Ji>>2]*+g[mk>>2]+ +g[ap>>2]*+g[r>>2];g[Tc>>2]=+g[mc>>2]*+g[r>>2]-+g[pc>>2]*+g[mk>>2];g[sd>>2]=+g[Kd>>2]*+g[r>>2]-+g[Md>>2]*+g[mk>>2];g[yd>>2]=+g[tc>>2]*+g[r>>2]-+g[vc>>2]*+g[mk>>2];g[Hc>>2]=+g[sr>>2]*+g[mk>>2]+ +g[_v>>2]*+g[r>>2];g[Wb>>2]=+g[za>>2]*+g[mk>>2]+ +g[_d>>2]*+g[r>>2];g[Yb>>2]=+g[za>>2]*+g[r>>2]-+g[_d>>2]*+g[mk>>2];g[_e>>2]=+g[Qb>>2]*+g[r>>2]-+g[Sb>>2]*+g[mk>>2];g[Jc>>2]=+g[sr>>2]*+g[r>>2]-+g[_v>>2]*+g[mk>>2];g[wd>>2]=+g[tc>>2]*+g[mk>>2]+ +g[vc>>2]*+g[r>>2];g[qc>>2]=+g[mc>>2]*+g[mk>>2]+ +g[pc>>2]*+g[r>>2];g[Ye>>2]=+g[Qb>>2]*+g[mk>>2]+ +g[Sb>>2]*+g[r>>2];g[q>>2]=+g[c[k>>2]>>2];g[Mt>>2]=+g[c[l>>2]>>2];g[cw>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<5<<2)>>2];g[gw>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<5<<2)>>2];g[hw>>2]=+g[bw>>2]*+g[cw>>2]+ +g[fw>>2]*+g[gw>>2];g[Lt>>2]=+g[bw>>2]*+g[gw>>2]-+g[fw>>2]*+g[cw>>2];g[Zj>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[bk>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[ck>>2]=+g[Yj>>2]*+g[Zj>>2]+ +g[ak>>2]*+g[bk>>2];g[ih>>2]=+g[Yj>>2]*+g[bk>>2]-+g[ak>>2]*+g[Zj>>2];g[t>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*48<<2)>>2];g[v>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*48<<2)>>2];g[w>>2]=+g[s>>2]*+g[t>>2]+ +g[u>>2]*+g[v>>2];g[jh>>2]=+g[s>>2]*+g[v>>2]-+g[u>>2]*+g[t>>2];g[iw>>2]=+g[q>>2]+ +g[hw>>2];g[x>>2]=+g[ck>>2]+ +g[w>>2];g[y>>2]=+g[iw>>2]+ +g[x>>2];g[xo>>2]=+g[iw>>2]-+g[x>>2];g[Vu>>2]=+g[Mt>>2]-+g[Lt>>2];g[Wu>>2]=+g[ck>>2]-+g[w>>2];g[Xu>>2]=+g[Vu>>2]-+g[Wu>>2];g[Wv>>2]=+g[Wu>>2]+ +g[Vu>>2];g[hh>>2]=+g[q>>2]-+g[hw>>2];g[kh>>2]=+g[ih>>2]-+g[jh>>2];g[lh>>2]=+g[hh>>2]-+g[kh>>2];g[Tl>>2]=+g[hh>>2]+ +g[kh>>2];g[jt>>2]=+g[ih>>2]+ +g[jh>>2];g[Nt>>2]=+g[Lt>>2]+ +g[Mt>>2];g[Ot>>2]=+g[jt>>2]+ +g[Nt>>2];g[Qu>>2]=+g[Nt>>2]-+g[jt>>2];g[aa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[ea>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[fa>>2]=+g[$>>2]*+g[aa>>2]+ +g[da>>2]*+g[ea>>2];g[mh>>2]=+g[$>>2]*+g[ea>>2]-+g[da>>2]*+g[aa>>2];g[la>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*40<<2)>>2];g[pa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*40<<2)>>2];g[qa>>2]=+g[ka>>2]*+g[la>>2]+ +g[oa>>2]*+g[pa>>2];g[nh>>2]=+g[ka>>2]*+g[pa>>2]-+g[oa>>2]*+g[la>>2];g[oh>>2]=+g[mh>>2]-+g[nh>>2];g[ph>>2]=+g[fa>>2]-+g[qa>>2];g[va>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*56<<2)>>2];g[xa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*56<<2)>>2];g[ya>>2]=+g[ua>>2]*+g[va>>2]+ +g[wa>>2]*+g[xa>>2];g[sh>>2]=+g[ua>>2]*+g[xa>>2]-+g[wa>>2]*+g[va>>2];g[E>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[I>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[J>>2]=+g[D>>2]*+g[E>>2]+ +g[H>>2]*+g[I>>2];g[th>>2]=+g[D>>2]*+g[I>>2]-+g[H>>2]*+g[E>>2];g[rh>>2]=+g[ya>>2]-+g[J>>2];g[uh>>2]=+g[sh>>2]-+g[th>>2];g[ra>>2]=+g[fa>>2]+ +g[qa>>2];g[K>>2]=+g[ya>>2]+ +g[J>>2];g[L>>2]=+g[ra>>2]+ +g[K>>2];g[Pu>>2]=+g[K>>2]-+g[ra>>2];g[yo>>2]=+g[mh>>2]+ +g[nh>>2];g[zo>>2]=+g[sh>>2]+ +g[th>>2];g[Ao>>2]=+g[yo>>2]-+g[zo>>2];g[it>>2]=+g[yo>>2]+ +g[zo>>2];g[qh>>2]=+g[oh>>2]-+g[ph>>2];g[vh>>2]=+g[rh>>2]+ +g[uh>>2];g[wh>>2]=(+g[qh>>2]-+g[vh>>2])*.7071067690849304;g[Uu>>2]=(+g[qh>>2]+ +g[vh>>2])*.7071067690849304;g[Ul>>2]=+g[ph>>2]+ +g[oh>>2];g[Vl>>2]=+g[rh>>2]-+g[uh>>2];g[Wl>>2]=(+g[Ul>>2]+ +g[Vl>>2])*.7071067690849304;g[Vv>>2]=(+g[Vl>>2]-+g[Ul>>2])*.7071067690849304;g[N>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[O>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[P>>2]=+g[Ah>>2]*+g[N>>2]+ +g[Sn>>2]*+g[O>>2];g[yh>>2]=+g[Ah>>2]*+g[O>>2]-+g[Sn>>2]*+g[N>>2];g[T>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*36<<2)>>2];g[X>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*36<<2)>>2];g[Y>>2]=+g[S>>2]*+g[T>>2]+ +g[W>>2]*+g[X>>2];g[zh>>2]=+g[S>>2]*+g[X>>2]-+g[W>>2]*+g[T>>2];g[Z>>2]=+g[P>>2]+ +g[Y>>2];g[bp>>2]=+g[yh>>2]+ +g[zh>>2];g[Cg>>2]=+g[yh>>2]-+g[zh>>2];g[Fg>>2]=+g[P>>2]-+g[Y>>2];g[Ca>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[Ga>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[Ha>>2]=+g[Ba>>2]*+g[Ca>>2]+ +g[Fa>>2]*+g[Ga>>2];g[Gg>>2]=+g[Ba>>2]*+g[Ga>>2]-+g[Fa>>2]*+g[Ca>>2];g[Ja>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*52<<2)>>2];g[jb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*52<<2)>>2];g[kb>>2]=+g[Ia>>2]*+g[Ja>>2]+ +g[ib>>2]*+g[jb>>2];g[Hg>>2]=+g[Ia>>2]*+g[jb>>2]-+g[ib>>2]*+g[Ja>>2];g[lb>>2]=+g[Ha>>2]+ +g[kb>>2];g[cp>>2]=+g[Gg>>2]+ +g[Hg>>2];g[Dg>>2]=+g[Ha>>2]-+g[kb>>2];g[Ig>>2]=+g[Gg>>2]-+g[Hg>>2];g[mb>>2]=+g[Z>>2]+ +g[lb>>2];g[Dr>>2]=+g[bp>>2]+ +g[cp>>2];g[Eg>>2]=+g[Cg>>2]+ +g[Dg>>2];g[Jg>>2]=+g[Fg>>2]-+g[Ig>>2];g[Kg>>2]=+g[Eg>>2]*.3826834261417389-+g[Jg>>2]*.9238795042037964;g[Zk>>2]=+g[Eg>>2]*.9238795042037964+ +g[Jg>>2]*.3826834261417389;g[Yl>>2]=+g[Cg>>2]-+g[Dg>>2];g[Zl>>2]=+g[Fg>>2]+ +g[Ig>>2];g[_l>>2]=+g[Yl>>2]*.9238795042037964-+g[Zl>>2]*.3826834261417389;g[Co>>2]=+g[Yl>>2]*.3826834261417389+ +g[Zl>>2]*.9238795042037964;g[dp>>2]=+g[bp>>2]-+g[cp>>2];g[ep>>2]=+g[Z>>2]-+g[lb>>2];g[fp>>2]=+g[dp>>2]-+g[ep>>2];g[Iq>>2]=+g[ep>>2]+ +g[dp>>2];g[ob>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*60<<2)>>2];g[qb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*60<<2)>>2];g[rb>>2]=+g[nb>>2]*+g[ob>>2]+ +g[pb>>2]*+g[qb>>2];g[Lg>>2]=+g[nb>>2]*+g[qb>>2]-+g[pb>>2]*+g[ob>>2];g[vb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[zb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[Ab>>2]=+g[ub>>2]*+g[vb>>2]+ +g[yb>>2]*+g[zb>>2];g[Mg>>2]=+g[ub>>2]*+g[zb>>2]-+g[yb>>2]*+g[vb>>2];g[Bb>>2]=+g[rb>>2]+ +g[Ab>>2];g[hp>>2]=+g[Lg>>2]+ +g[Mg>>2];g[Ng>>2]=+g[Lg>>2]-+g[Mg>>2];g[Qg>>2]=+g[rb>>2]-+g[Ab>>2];g[Cb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Db>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Eb>>2]=+g[fk>>2]*+g[Cb>>2]+ +g[jk>>2]*+g[Db>>2];g[Rg>>2]=+g[fk>>2]*+g[Db>>2]-+g[jk>>2]*+g[Cb>>2];g[Oa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*44<<2)>>2];g[Qa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*44<<2)>>2];g[Ra>>2]=+g[Na>>2]*+g[Oa>>2]+ +g[Pa>>2]*+g[Qa>>2];g[Sg>>2]=+g[Na>>2]*+g[Qa>>2]-+g[Pa>>2]*+g[Oa>>2];g[Sa>>2]=+g[Eb>>2]+ +g[Ra>>2];g[ip>>2]=+g[Rg>>2]+ +g[Sg>>2];g[Og>>2]=+g[Eb>>2]-+g[Ra>>2];g[Tg>>2]=+g[Rg>>2]-+g[Sg>>2];g[Ta>>2]=+g[Bb>>2]+ +g[Sa>>2];g[Er>>2]=+g[hp>>2]+ +g[ip>>2];g[Pg>>2]=+g[Ng>>2]+ +g[Og>>2];g[Ug>>2]=+g[Qg>>2]-+g[Tg>>2];g[Vg>>2]=+g[Pg>>2]*.3826834261417389+ +g[Ug>>2]*.9238795042037964;g[_k>>2]=+g[Ug>>2]*.3826834261417389-+g[Pg>>2]*.9238795042037964;g[$l>>2]=+g[Ng>>2]-+g[Og>>2];g[am>>2]=+g[Qg>>2]+ +g[Tg>>2];g[bm>>2]=+g[$l>>2]*.9238795042037964+ +g[am>>2]*.3826834261417389;g[Do>>2]=+g[am>>2]*.9238795042037964-+g[$l>>2]*.3826834261417389;g[gp>>2]=+g[Bb>>2]-+g[Sa>>2];g[jp>>2]=+g[hp>>2]-+g[ip>>2];g[kp>>2]=+g[gp>>2]+ +g[jp>>2];g[Jq>>2]=+g[gp>>2]-+g[jp>>2];g[Wa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Xa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Ya>>2]=+g[jw>>2]*+g[Wa>>2]+ +g[lw>>2]*+g[Xa>>2];g[Yg>>2]=+g[jw>>2]*+g[Xa>>2]-+g[lw>>2]*+g[Wa>>2];g[_a>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*34<<2)>>2];g[ab>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*34<<2)>>2];g[bb>>2]=+g[Za>>2]*+g[_a>>2]+ +g[$a>>2]*+g[ab>>2];g[Zg>>2]=+g[Za>>2]*+g[ab>>2]-+g[$a>>2]*+g[_a>>2];g[cb>>2]=+g[Ya>>2]+ +g[bb>>2];g[Np>>2]=+g[Yg>>2]+ +g[Zg>>2];g[eb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[gb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[hb>>2]=+g[db>>2]*+g[eb>>2]+ +g[fb>>2]*+g[gb>>2];g[ni>>2]=+g[db>>2]*+g[gb>>2]-+g[fb>>2]*+g[eb>>2];g[Kb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*50<<2)>>2];g[Mb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*50<<2)>>2];g[Nb>>2]=+g[Jb>>2]*+g[Kb>>2]+ +g[Lb>>2]*+g[Mb>>2];g[oi>>2]=+g[Jb>>2]*+g[Mb>>2]-+g[Lb>>2]*+g[Kb>>2];g[Ob>>2]=+g[hb>>2]+ +g[Nb>>2];g[Op>>2]=+g[ni>>2]+ +g[oi>>2];g[Rb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[rc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[sc>>2]=+g[Qb>>2]*+g[Rb>>2]+ +g[Sb>>2]*+g[rc>>2];g[Ih>>2]=+g[Qb>>2]*+g[rc>>2]-+g[Sb>>2]*+g[Rb>>2];g[Cc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*42<<2)>>2];g[Ec>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*42<<2)>>2];g[Fc>>2]=+g[Bc>>2]*+g[Cc>>2]+ +g[Dc>>2]*+g[Ec>>2];g[Jh>>2]=+g[Bc>>2]*+g[Ec>>2]-+g[Dc>>2]*+g[Cc>>2];g[Gc>>2]=+g[sc>>2]+ +g[Fc>>2];g[Tp>>2]=+g[Ih>>2]+ +g[Jh>>2];g[Hh>>2]=+g[sc>>2]-+g[Fc>>2];g[Kh>>2]=+g[Ih>>2]-+g[Jh>>2];g[Ic>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*58<<2)>>2];g[Kc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*58<<2)>>2];g[Lc>>2]=+g[Hc>>2]*+g[Ic>>2]+ +g[Jc>>2]*+g[Kc>>2];g[Dh>>2]=+g[Hc>>2]*+g[Kc>>2]-+g[Jc>>2]*+g[Ic>>2];g[Nc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[Pc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[Qc>>2]=+g[Mc>>2]*+g[Nc>>2]+ +g[Oc>>2]*+g[Pc>>2];g[Eh>>2]=+g[Mc>>2]*+g[Pc>>2]-+g[Oc>>2]*+g[Nc>>2];g[Tb>>2]=+g[Lc>>2]+ +g[Qc>>2];g[Up>>2]=+g[Dh>>2]+ +g[Eh>>2];g[Ch>>2]=+g[Lc>>2]-+g[Qc>>2];g[Fh>>2]=+g[Dh>>2]-+g[Eh>>2];g[Pb>>2]=+g[cb>>2]+ +g[Ob>>2];g[Ub>>2]=+g[Gc>>2]+ +g[Tb>>2];g[Kr>>2]=+g[Pb>>2]-+g[Ub>>2];g[Hr>>2]=+g[Np>>2]+ +g[Op>>2];g[Ir>>2]=+g[Tp>>2]+ +g[Up>>2];g[Jr>>2]=+g[Hr>>2]-+g[Ir>>2];g[_g>>2]=+g[Yg>>2]-+g[Zg>>2];g[$g>>2]=+g[hb>>2]-+g[Nb>>2];g[Bh>>2]=+g[_g>>2]+ +g[$g>>2];g[em>>2]=+g[_g>>2]-+g[$g>>2];g[Sp>>2]=+g[cb>>2]-+g[Ob>>2];g[Vp>>2]=+g[Tp>>2]-+g[Up>>2];g[Wp>>2]=+g[Sp>>2]-+g[Vp>>2];g[Nq>>2]=+g[Sp>>2]+ +g[Vp>>2];g[Gh>>2]=+g[Ch>>2]-+g[Fh>>2];g[ji>>2]=+g[Hh>>2]+ +g[Kh>>2];g[ki>>2]=(+g[Gh>>2]-+g[ji>>2])*.7071067690849304;g[Jm>>2]=(+g[ji>>2]+ +g[Gh>>2])*.7071067690849304;g[ri>>2]=+g[Kh>>2]-+g[Hh>>2];g[si>>2]=+g[Ch>>2]+ +g[Fh>>2];g[ti>>2]=(+g[ri>>2]-+g[si>>2])*.7071067690849304;g[fm>>2]=(+g[ri>>2]+ +g[si>>2])*.7071067690849304;g[Pp>>2]=+g[Np>>2]-+g[Op>>2];g[Qp>>2]=+g[Tb>>2]-+g[Gc>>2];g[Rp>>2]=+g[Pp>>2]-+g[Qp>>2];g[Mq>>2]=+g[Pp>>2]+ +g[Qp>>2];g[mi>>2]=+g[Ya>>2]-+g[bb>>2];g[pi>>2]=+g[ni>>2]-+g[oi>>2];g[qi>>2]=+g[mi>>2]-+g[pi>>2];g[hm>>2]=+g[mi>>2]+ +g[pi>>2];g[Xb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*62<<2)>>2];g[Zb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*62<<2)>>2];g[_b>>2]=+g[Wb>>2]*+g[Xb>>2]+ +g[Yb>>2]*+g[Zb>>2];g[wi>>2]=+g[Wb>>2]*+g[Zb>>2]-+g[Yb>>2]*+g[Xb>>2];g[ac>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[cc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[dc>>2]=+g[$b>>2]*+g[ac>>2]+ +g[bc>>2]*+g[cc>>2];g[xi>>2]=+g[$b>>2]*+g[cc>>2]-+g[bc>>2]*+g[ac>>2];g[ec>>2]=+g[_b>>2]+ +g[dc>>2];g[Yp>>2]=+g[wi>>2]+ +g[xi>>2];g[gc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[ic>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[jc>>2]=+g[fc>>2]*+g[gc>>2]+ +g[hc>>2]*+g[ic>>2];g[Qh>>2]=+g[fc>>2]*+g[ic>>2]-+g[hc>>2]*+g[gc>>2];g[Sc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*46<<2)>>2];g[Uc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*46<<2)>>2];g[Vc>>2]=+g[qc>>2]*+g[Sc>>2]+ +g[Tc>>2]*+g[Uc>>2];g[Rh>>2]=+g[qc>>2]*+g[Uc>>2]-+g[Tc>>2]*+g[Sc>>2];g[Wc>>2]=+g[jc>>2]+ +g[Vc>>2];g[Zp>>2]=+g[Qh>>2]+ +g[Rh>>2];g[Yc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Zc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[_c>>2]=+g[tc>>2]*+g[Yc>>2]+ +g[vc>>2]*+g[Zc>>2];g[Hi>>2]=+g[tc>>2]*+g[Zc>>2]-+g[vc>>2]*+g[Yc>>2];g[Ad>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*38<<2)>>2];g[Cd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*38<<2)>>2];g[Dd>>2]=+g[$c>>2]*+g[Ad>>2]+ +g[Bd>>2]*+g[Cd>>2];g[Ii>>2]=+g[$c>>2]*+g[Cd>>2]-+g[Bd>>2]*+g[Ad>>2];g[Ed>>2]=+g[_c>>2]+ +g[Dd>>2];g[cq>>2]=+g[Hi>>2]+ +g[Ii>>2];g[Gi>>2]=+g[_c>>2]-+g[Dd>>2];g[Lh>>2]=+g[Hi>>2]-+g[Ii>>2];g[Gd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*54<<2)>>2];g[Id>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*54<<2)>>2];g[Jd>>2]=+g[Fd>>2]*+g[Gd>>2]+ +g[Hd>>2]*+g[Id>>2];g[Ci>>2]=+g[Fd>>2]*+g[Id>>2]-+g[Hd>>2]*+g[Gd>>2];g[Ld>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[Nd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[Od>>2]=+g[Kd>>2]*+g[Ld>>2]+ +g[Md>>2]*+g[Nd>>2];g[Di>>2]=+g[Kd>>2]*+g[Nd>>2]-+g[Md>>2]*+g[Ld>>2];g[Pd>>2]=+g[Jd>>2]+ +g[Od>>2];g[dq>>2]=+g[Ci>>2]+ +g[Di>>2];g[Bi>>2]=+g[Jd>>2]-+g[Od>>2];g[Ei>>2]=+g[Ci>>2]-+g[Di>>2];g[Xc>>2]=+g[ec>>2]+ +g[Wc>>2];g[Qd>>2]=+g[Ed>>2]+ +g[Pd>>2];g[Mr>>2]=+g[Xc>>2]-+g[Qd>>2];g[Nr>>2]=+g[Yp>>2]+ +g[Zp>>2];g[Or>>2]=+g[cq>>2]+ +g[dq>>2];g[Pr>>2]=+g[Nr>>2]-+g[Or>>2];g[yi>>2]=+g[wi>>2]-+g[xi>>2];g[zi>>2]=+g[jc>>2]-+g[Vc>>2];g[Ai>>2]=+g[yi>>2]+ +g[zi>>2];g[Pm>>2]=+g[yi>>2]-+g[zi>>2];g[bq>>2]=+g[ec>>2]-+g[Wc>>2];g[eq>>2]=+g[cq>>2]-+g[dq>>2];g[fq>>2]=+g[bq>>2]-+g[eq>>2];g[Qq>>2]=+g[bq>>2]+ +g[eq>>2];g[Fi>>2]=+g[Bi>>2]-+g[Ei>>2];g[Mh>>2]=+g[Gi>>2]+ +g[Lh>>2];g[Nh>>2]=(+g[Fi>>2]-+g[Mh>>2])*.7071067690849304;g[Nm>>2]=(+g[Mh>>2]+ +g[Fi>>2])*.7071067690849304;g[Uh>>2]=+g[Lh>>2]-+g[Gi>>2];g[Vh>>2]=+g[Bi>>2]+ +g[Ei>>2];g[Wh>>2]=(+g[Uh>>2]-+g[Vh>>2])*.7071067690849304;g[Qm>>2]=(+g[Uh>>2]+ +g[Vh>>2])*.7071067690849304;g[_p>>2]=+g[Yp>>2]-+g[Zp>>2];g[$p>>2]=+g[Pd>>2]-+g[Ed>>2];g[aq>>2]=+g[_p>>2]-+g[$p>>2];g[Pq>>2]=+g[_p>>2]+ +g[$p>>2];g[Ph>>2]=+g[_b>>2]-+g[dc>>2];g[Sh>>2]=+g[Qh>>2]-+g[Rh>>2];g[Th>>2]=+g[Ph>>2]-+g[Sh>>2];g[Mm>>2]=+g[Ph>>2]+ +g[Sh>>2];g[Ce>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*63<<2)>>2];g[De>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*63<<2)>>2];g[Ee>>2]=+g[mk>>2]*+g[Ce>>2]+ +g[r>>2]*+g[De>>2];g[nl>>2]=+g[mk>>2]*+g[De>>2]-+g[r>>2]*+g[Ce>>2];g[Ie>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[mf>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[nf>>2]=+g[He>>2]*+g[Ie>>2]+ +g[lf>>2]*+g[mf>>2];g[ol>>2]=+g[He>>2]*+g[mf>>2]-+g[lf>>2]*+g[Ie>>2];g[of>>2]=+g[Ee>>2]+ +g[nf>>2];g[ar>>2]=+g[nl>>2]+ +g[ol>>2];g[pf>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[qf>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[rf>>2]=+g[lk>>2]*+g[pf>>2]+ +g[pk>>2]*+g[qf>>2];g[cj>>2]=+g[lk>>2]*+g[qf>>2]-+g[pk>>2]*+g[pf>>2];g[Tf>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*47<<2)>>2];g[Vf>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*47<<2)>>2];g[Wf>>2]=+g[sf>>2]*+g[Tf>>2]+ +g[Uf>>2]*+g[Vf>>2];g[dj>>2]=+g[sf>>2]*+g[Vf>>2]-+g[Uf>>2]*+g[Tf>>2];g[Xf>>2]=+g[rf>>2]+ +g[Wf>>2];g[br>>2]=+g[cj>>2]+ +g[dj>>2];g[Zf>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[_f>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[$f>>2]=+g[sa>>2]*+g[Zf>>2]+ +g[ta>>2]*+g[_f>>2];g[gj>>2]=+g[sa>>2]*+g[_f>>2]-+g[ta>>2]*+g[Zf>>2];g[bg>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*39<<2)>>2];g[dg>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*39<<2)>>2];g[eg>>2]=+g[ag>>2]*+g[bg>>2]+ +g[cg>>2]*+g[dg>>2];g[hj>>2]=+g[ag>>2]*+g[dg>>2]-+g[cg>>2]*+g[bg>>2];g[fg>>2]=+g[$f>>2]+ +g[eg>>2];g[mq>>2]=+g[gj>>2]+ +g[hj>>2];g[ij>>2]=+g[gj>>2]-+g[hj>>2];g[jj>>2]=+g[$f>>2]-+g[eg>>2];g[hg>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*55<<2)>>2];g[jg>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*55<<2)>>2];g[kg>>2]=+g[gg>>2]*+g[hg>>2]+ +g[ig>>2]*+g[jg>>2];g[mj>>2]=+g[gg>>2]*+g[jg>>2]-+g[ig>>2]*+g[hg>>2];g[mg>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[og>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[pg>>2]=+g[lg>>2]*+g[mg>>2]+ +g[ng>>2]*+g[og>>2];g[nj>>2]=+g[lg>>2]*+g[og>>2]-+g[ng>>2]*+g[mg>>2];g[qg>>2]=+g[kg>>2]+ +g[pg>>2];g[nq>>2]=+g[mj>>2]+ +g[nj>>2];g[lj>>2]=+g[kg>>2]-+g[pg>>2];g[oj>>2]=+g[mj>>2]-+g[nj>>2];g[Yf>>2]=+g[of>>2]+ +g[Xf>>2];g[tf>>2]=+g[fg>>2]+ +g[qg>>2];g[uf>>2]=+g[Yf>>2]+ +g[tf>>2];g[Ds>>2]=+g[Yf>>2]-+g[tf>>2];g[cr>>2]=+g[ar>>2]-+g[br>>2];g[dr>>2]=+g[qg>>2]-+g[fg>>2];g[er>>2]=+g[cr>>2]-+g[dr>>2];g[bs>>2]=+g[cr>>2]+ +g[dr>>2];g[Is>>2]=+g[ar>>2]+ +g[br>>2];g[Js>>2]=+g[mq>>2]+ +g[nq>>2];g[Ks>>2]=+g[Is>>2]-+g[Js>>2];g[Us>>2]=+g[Is>>2]+ +g[Js>>2];g[bj>>2]=+g[Ee>>2]-+g[nf>>2];g[ej>>2]=+g[cj>>2]-+g[dj>>2];g[fj>>2]=+g[bj>>2]-+g[ej>>2];g[Nn>>2]=+g[bj>>2]+ +g[ej>>2];g[kj>>2]=+g[ij>>2]-+g[jj>>2];g[pj>>2]=+g[lj>>2]+ +g[oj>>2];g[qj>>2]=(+g[kj>>2]-+g[pj>>2])*.7071067690849304;g[_m>>2]=(+g[kj>>2]+ +g[pj>>2])*.7071067690849304;g[sl>>2]=+g[lj>>2]-+g[oj>>2];g[tl>>2]=+g[jj>>2]+ +g[ij>>2];g[ul>>2]=(+g[sl>>2]-+g[tl>>2])*.7071067690849304;g[On>>2]=(+g[tl>>2]+ +g[sl>>2])*.7071067690849304;g[lq>>2]=+g[of>>2]-+g[Xf>>2];g[oq>>2]=+g[mq>>2]-+g[nq>>2];g[pq>>2]=+g[lq>>2]-+g[oq>>2];g[Ar>>2]=+g[lq>>2]+ +g[oq>>2];g[pl>>2]=+g[nl>>2]-+g[ol>>2];g[ql>>2]=+g[rf>>2]-+g[Wf>>2];g[rl>>2]=+g[pl>>2]+ +g[ql>>2];g[Zm>>2]=+g[pl>>2]-+g[ql>>2];g[Ud>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[Vd>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Wd>>2]=+g[za>>2]*+g[Ud>>2]+ +g[_d>>2]*+g[Vd>>2];g[$h>>2]=+g[za>>2]*+g[Vd>>2]-+g[_d>>2]*+g[Ud>>2];g[Yd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*33<<2)>>2];g[ad>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*33<<2)>>2];g[bd>>2]=+g[Xd>>2]*+g[Yd>>2]+ +g[Zd>>2]*+g[ad>>2];g[ai>>2]=+g[Xd>>2]*+g[ad>>2]-+g[Zd>>2]*+g[Yd>>2];g[cd>>2]=+g[Wd>>2]+ +g[bd>>2];g[lp>>2]=+g[$h>>2]+ +g[ai>>2];g[dd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[ed>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[fd>>2]=+g[mc>>2]*+g[dd>>2]+ +g[pc>>2]*+g[ed>>2];g[Oj>>2]=+g[mc>>2]*+g[ed>>2]-+g[pc>>2]*+g[dd>>2];g[hd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*49<<2)>>2];g[jd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*49<<2)>>2];g[kd>>2]=+g[gd>>2]*+g[hd>>2]+ +g[id>>2]*+g[jd>>2];g[Pj>>2]=+g[gd>>2]*+g[jd>>2]-+g[id>>2]*+g[hd>>2];g[ld>>2]=+g[fd>>2]+ +g[kd>>2];g[mp>>2]=+g[Oj>>2]+ +g[Pj>>2];g[nd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[od>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[pd>>2]=+g[Ji>>2]*+g[nd>>2]+ +g[ap>>2]*+g[od>>2];g[Li>>2]=+g[Ji>>2]*+g[od>>2]-+g[ap>>2]*+g[nd>>2];g[rd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*41<<2)>>2];g[td>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*41<<2)>>2];g[ud>>2]=+g[qd>>2]*+g[rd>>2]+ +g[sd>>2]*+g[td>>2];g[Mi>>2]=+g[qd>>2]*+g[td>>2]-+g[sd>>2]*+g[rd>>2];g[vd>>2]=+g[pd>>2]+ +g[ud>>2];g[Dp>>2]=+g[Li>>2]+ +g[Mi>>2];g[Ki>>2]=+g[pd>>2]-+g[ud>>2];g[Ni>>2]=+g[Li>>2]-+g[Mi>>2];g[xd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*57<<2)>>2];g[zd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*57<<2)>>2];g[$d>>2]=+g[wd>>2]*+g[xd>>2]+ +g[yd>>2]*+g[zd>>2];g[fi>>2]=+g[wd>>2]*+g[zd>>2]-+g[yd>>2]*+g[xd>>2];g[de>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[he>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[ie>>2]=+g[ce>>2]*+g[de>>2]+ +g[ge>>2]*+g[he>>2];g[gi>>2]=+g[ce>>2]*+g[he>>2]-+g[ge>>2]*+g[de>>2];g[Je>>2]=+g[$d>>2]+ +g[ie>>2];g[Ep>>2]=+g[fi>>2]+ +g[gi>>2];g[ei>>2]=+g[$d>>2]-+g[ie>>2];g[hi>>2]=+g[fi>>2]-+g[gi>>2];g[md>>2]=+g[cd>>2]+ +g[ld>>2];g[Ke>>2]=+g[vd>>2]+ +g[Je>>2];g[Le>>2]=+g[md>>2]+ +g[Ke>>2];g[Yr>>2]=+g[md>>2]-+g[Ke>>2];g[Cp>>2]=+g[cd>>2]-+g[ld>>2];g[Fp>>2]=+g[Dp>>2]-+g[Ep>>2];g[Gp>>2]=+g[Cp>>2]-+g[Fp>>2];g[tr>>2]=+g[Cp>>2]+ +g[Fp>>2];g[Tr>>2]=+g[lp>>2]+ +g[mp>>2];g[Ur>>2]=+g[Dp>>2]+ +g[Ep>>2];g[Vr>>2]=+g[Tr>>2]-+g[Ur>>2];g[Ps>>2]=+g[Tr>>2]+ +g[Ur>>2];g[bi>>2]=+g[$h>>2]-+g[ai>>2];g[ci>>2]=+g[fd>>2]-+g[kd>>2];g[di>>2]=+g[bi>>2]+ +g[ci>>2];g[un>>2]=+g[bi>>2]-+g[ci>>2];g[ii>>2]=+g[ei>>2]-+g[hi>>2];g[Oi>>2]=+g[Ki>>2]+ +g[Ni>>2];g[Pi>>2]=(+g[ii>>2]-+g[Oi>>2])*.7071067690849304;g[Gn>>2]=(+g[Oi>>2]+ +g[ii>>2])*.7071067690849304;g[Ui>>2]=+g[Ni>>2]-+g[Ki>>2];g[Vi>>2]=+g[ei>>2]+ +g[hi>>2];g[Wi>>2]=(+g[Ui>>2]-+g[Vi>>2])*.7071067690849304;g[vn>>2]=(+g[Ui>>2]+ +g[Vi>>2])*.7071067690849304;g[np>>2]=+g[lp>>2]-+g[mp>>2];g[op>>2]=+g[Je>>2]-+g[vd>>2];g[pp>>2]=+g[np>>2]-+g[op>>2];g[wr>>2]=+g[np>>2]+ +g[op>>2];g[Nj>>2]=+g[Wd>>2]-+g[bd>>2];g[Qj>>2]=+g[Oj>>2]-+g[Pj>>2];g[Rj>>2]=+g[Nj>>2]-+g[Qj>>2];g[Fn>>2]=+g[Nj>>2]+ +g[Qj>>2];g[Me>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Ne>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Oe>>2]=+g[sr>>2]*+g[Me>>2]+ +g[_v>>2]*+g[Ne>>2];g[Aj>>2]=+g[sr>>2]*+g[Ne>>2]-+g[_v>>2]*+g[Me>>2];g[Qe>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*37<<2)>>2];g[Se>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*37<<2)>>2];g[Te>>2]=+g[Pe>>2]*+g[Qe>>2]+ +g[Re>>2]*+g[Se>>2];g[Bj>>2]=+g[Pe>>2]*+g[Se>>2]-+g[Re>>2]*+g[Qe>>2];g[Ue>>2]=+g[Oe>>2]+ +g[Te>>2];g[wp>>2]=+g[Aj>>2]+ +g[Bj>>2];g[Cj>>2]=+g[Aj>>2]-+g[Bj>>2];g[Fj>>2]=+g[Oe>>2]-+g[Te>>2];g[qe>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[re>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[se>>2]=+g[ga>>2]*+g[qe>>2]+ +g[ia>>2]*+g[re>>2];g[Si>>2]=+g[ga>>2]*+g[re>>2]-+g[ia>>2]*+g[qe>>2];g[ue>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*45<<2)>>2];g[we>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*45<<2)>>2];g[xe>>2]=+g[te>>2]*+g[ue>>2]+ +g[ve>>2]*+g[we>>2];g[Ti>>2]=+g[te>>2]*+g[we>>2]-+g[ve>>2]*+g[ue>>2];g[ye>>2]=+g[se>>2]+ +g[xe>>2];g[sp>>2]=+g[Si>>2]+ +g[Ti>>2];g[sj>>2]=+g[Si>>2]-+g[Ti>>2];g[xj>>2]=+g[se>>2]-+g[xe>>2];g[Ve>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[We>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[Xe>>2]=+g[xc>>2]*+g[Ve>>2]+ +g[Ac>>2]*+g[We>>2];g[Gj>>2]=+g[xc>>2]*+g[We>>2]-+g[Ac>>2]*+g[Ve>>2];g[Ze>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*53<<2)>>2];g[$e>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*53<<2)>>2];g[af>>2]=+g[Ye>>2]*+g[Ze>>2]+ +g[_e>>2]*+g[$e>>2];g[Hj>>2]=+g[Ye>>2]*+g[$e>>2]-+g[_e>>2]*+g[Ze>>2];g[bf>>2]=+g[Xe>>2]+ +g[af>>2];g[xp>>2]=+g[Gj>>2]+ +g[Hj>>2];g[Dj>>2]=+g[Xe>>2]-+g[af>>2];g[Ij>>2]=+g[Gj>>2]-+g[Hj>>2];g[ef>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*61<<2)>>2];g[gf>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*61<<2)>>2];g[je>>2]=+g[df>>2]*+g[ef>>2]+ +g[ff>>2]*+g[gf>>2];g[uj>>2]=+g[df>>2]*+g[gf>>2]-+g[ff>>2]*+g[ef>>2];g[le>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[ne>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[oe>>2]=+g[ke>>2]*+g[le>>2]+ +g[me>>2]*+g[ne>>2];g[vj>>2]=+g[ke>>2]*+g[ne>>2]-+g[me>>2]*+g[le>>2];g[pe>>2]=+g[je>>2]+ +g[oe>>2];g[rp>>2]=+g[uj>>2]+ +g[vj>>2];g[Ri>>2]=+g[je>>2]-+g[oe>>2];g[wj>>2]=+g[uj>>2]-+g[vj>>2];g[cf>>2]=+g[Ue>>2]+ +g[bf>>2];g[ze>>2]=+g[pe>>2]+ +g[ye>>2];g[Ae>>2]=+g[cf>>2]+ +g[ze>>2];g[Wr>>2]=+g[ze>>2]-+g[cf>>2];g[vp>>2]=+g[Ue>>2]-+g[bf>>2];g[yp>>2]=+g[wp>>2]-+g[xp>>2];g[zp>>2]=+g[vp>>2]+ +g[yp>>2];g[Hp>>2]=+g[yp>>2]-+g[vp>>2];g[Zr>>2]=+g[wp>>2]+ +g[xp>>2];g[_r>>2]=+g[rp>>2]+ +g[sp>>2];g[$r>>2]=+g[Zr>>2]-+g[_r>>2];g[Qs>>2]=+g[Zr>>2]+ +g[_r>>2];g[tj>>2]=+g[Ri>>2]-+g[sj>>2];g[yj>>2]=+g[wj>>2]+ +g[xj>>2];g[zj>>2]=+g[tj>>2]*.3826834261417389-+g[yj>>2]*.9238795042037964;g[Zi>>2]=+g[yj>>2]*.3826834261417389+ +g[tj>>2]*.9238795042037964;g[Ej>>2]=+g[Cj>>2]+ +g[Dj>>2];g[Jj>>2]=+g[Fj>>2]-+g[Ij>>2];g[Kj>>2]=+g[Ej>>2]*.9238795042037964+ +g[Jj>>2]*.3826834261417389;g[Yi>>2]=+g[Ej>>2]*.3826834261417389-+g[Jj>>2]*.9238795042037964;g[An>>2]=+g[Cj>>2]-+g[Dj>>2];g[Bn>>2]=+g[Fj>>2]+ +g[Ij>>2];g[Cn>>2]=+g[An>>2]*.3826834261417389+ +g[Bn>>2]*.9238795042037964;g[In>>2]=+g[An>>2]*.9238795042037964-+g[Bn>>2]*.3826834261417389;g[qp>>2]=+g[pe>>2]-+g[ye>>2];g[tp>>2]=+g[rp>>2]-+g[sp>>2];g[up>>2]=+g[qp>>2]-+g[tp>>2];g[Ip>>2]=+g[qp>>2]+ +g[tp>>2];g[xn>>2]=+g[Ri>>2]+ +g[sj>>2];g[yn>>2]=+g[wj>>2]-+g[xj>>2];g[zn>>2]=+g[xn>>2]*.9238795042037964-+g[yn>>2]*.3826834261417389;g[Jn>>2]=+g[yn>>2]*.9238795042037964+ +g[xn>>2]*.3826834261417389;g[vf>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[wf>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[xf>>2]=+g[Ib>>2]*+g[vf>>2]+ +g[hf>>2]*+g[wf>>2];g[rk>>2]=+g[Ib>>2]*+g[wf>>2]-+g[hf>>2]*+g[vf>>2];g[zf>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*35<<2)>>2];g[Bf>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*35<<2)>>2];g[Cf>>2]=+g[yf>>2]*+g[zf>>2]+ +g[Af>>2]*+g[Bf>>2];g[sk>>2]=+g[yf>>2]*+g[Bf>>2]-+g[Af>>2]*+g[zf>>2];g[Df>>2]=+g[xf>>2]+ +g[Cf>>2];g[qq>>2]=+g[rk>>2]+ +g[sk>>2];g[tk>>2]=+g[rk>>2]-+g[sk>>2];g[wk>>2]=+g[xf>>2]-+g[Cf>>2];g[wg>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[xg>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[yg>>2]=+g[Sj>>2]*+g[wg>>2]+ +g[Wj>>2]*+g[xg>>2];g[gl>>2]=+g[Sj>>2]*+g[xg>>2]-+g[Wj>>2]*+g[wg>>2];g[Ag>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*43<<2)>>2];g[ah>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*43<<2)>>2];g[bh>>2]=+g[zg>>2]*+g[Ag>>2]+ +g[Bg>>2]*+g[ah>>2];g[hl>>2]=+g[zg>>2]*+g[ah>>2]-+g[Bg>>2]*+g[Ag>>2];g[ch>>2]=+g[yg>>2]+ +g[bh>>2];g[Xq>>2]=+g[gl>>2]+ +g[hl>>2];g[dl>>2]=+g[yg>>2]-+g[bh>>2];g[il>>2]=+g[gl>>2]-+g[hl>>2];g[Ef>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Ff>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Gf>>2]=+g[Hb>>2]*+g[Ef>>2]+ +g[Ma>>2]*+g[Ff>>2];g[xk>>2]=+g[Hb>>2]*+g[Ff>>2]-+g[Ma>>2]*+g[Ef>>2];g[If>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*51<<2)>>2];g[Kf>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*51<<2)>>2];g[Lf>>2]=+g[Hf>>2]*+g[If>>2]+ +g[Jf>>2]*+g[Kf>>2];g[yk>>2]=+g[Hf>>2]*+g[Kf>>2]-+g[Jf>>2]*+g[If>>2];g[Mf>>2]=+g[Gf>>2]+ +g[Lf>>2];g[rq>>2]=+g[xk>>2]+ +g[yk>>2];g[uk>>2]=+g[Gf>>2]-+g[Lf>>2];g[zk>>2]=+g[xk>>2]-+g[yk>>2];g[Pf>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*59<<2)>>2];g[Rf>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*59<<2)>>2];g[Sf>>2]=+g[Of>>2]*+g[Pf>>2]+ +g[Qf>>2]*+g[Rf>>2];g[al>>2]=+g[Of>>2]*+g[Rf>>2]-+g[Qf>>2]*+g[Pf>>2];g[sg>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[tg>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[ug>>2]=+g[Bs>>2]*+g[sg>>2]+ +g[$v>>2]*+g[tg>>2];g[bl>>2]=+g[Bs>>2]*+g[tg>>2]-+g[$v>>2]*+g[sg>>2];g[vg>>2]=+g[Sf>>2]+ +g[ug>>2];g[Wq>>2]=+g[al>>2]+ +g[bl>>2];g[cl>>2]=+g[al>>2]-+g[bl>>2];g[fl>>2]=+g[Sf>>2]-+g[ug>>2];g[Nf>>2]=+g[Df>>2]+ +g[Mf>>2];g[dh>>2]=+g[vg>>2]+ +g[ch>>2];g[eh>>2]=+g[Nf>>2]+ +g[dh>>2];g[Ls>>2]=+g[dh>>2]-+g[Nf>>2];g[Vq>>2]=+g[vg>>2]-+g[ch>>2];g[Yq>>2]=+g[Wq>>2]-+g[Xq>>2];g[Zq>>2]=+g[Vq>>2]+ +g[Yq>>2];g[fr>>2]=+g[Vq>>2]-+g[Yq>>2];g[Es>>2]=+g[qq>>2]+ +g[rq>>2];g[Fs>>2]=+g[Wq>>2]+ +g[Xq>>2];g[Gs>>2]=+g[Es>>2]-+g[Fs>>2];g[Vs>>2]=+g[Es>>2]+ +g[Fs>>2];g[vk>>2]=+g[tk>>2]+ +g[uk>>2];g[Ak>>2]=+g[wk>>2]-+g[zk>>2];g[$k>>2]=+g[vk>>2]*.3826834261417389-+g[Ak>>2]*.9238795042037964;g[xl>>2]=+g[vk>>2]*.9238795042037964+ +g[Ak>>2]*.3826834261417389;g[el>>2]=+g[cl>>2]+ +g[dl>>2];g[jl>>2]=+g[fl>>2]-+g[il>>2];g[kl>>2]=+g[el>>2]*.3826834261417389+ +g[jl>>2]*.9238795042037964;g[wl>>2]=+g[jl>>2]*.3826834261417389-+g[el>>2]*.9238795042037964;g[Um>>2]=+g[cl>>2]-+g[dl>>2];g[Vm>>2]=+g[fl>>2]+ +g[il>>2];g[Wm>>2]=+g[Um>>2]*.9238795042037964+ +g[Vm>>2]*.3826834261417389;g[an>>2]=+g[Vm>>2]*.9238795042037964-+g[Um>>2]*.3826834261417389;g[sq>>2]=+g[qq>>2]-+g[rq>>2];g[tq>>2]=+g[Df>>2]-+g[Mf>>2];g[Uq>>2]=+g[sq>>2]-+g[tq>>2];g[gr>>2]=+g[tq>>2]+ +g[sq>>2];g[Qn>>2]=+g[tk>>2]-+g[uk>>2];g[Rn>>2]=+g[wk>>2]+ +g[zk>>2];g[Tm>>2]=+g[Qn>>2]*.9238795042037964-+g[Rn>>2]*.3826834261417389;g[bn>>2]=+g[Qn>>2]*.3826834261417389+ +g[Rn>>2]*.9238795042037964;g[M>>2]=+g[y>>2]+ +g[L>>2];g[Ua>>2]=+g[mb>>2]+ +g[Ta>>2];g[Va>>2]=+g[M>>2]+ +g[Ua>>2];g[Ht>>2]=+g[M>>2]-+g[Ua>>2];g[ct>>2]=+g[Ps>>2]+ +g[Qs>>2];g[dt>>2]=+g[Us>>2]+ +g[Vs>>2];g[et>>2]=+g[ct>>2]-+g[dt>>2];g[ft>>2]=+g[ct>>2]+ +g[dt>>2];g[ht>>2]=+g[Dr>>2]+ +g[Er>>2];g[Pt>>2]=+g[it>>2]+ +g[Ot>>2];g[Qt>>2]=+g[ht>>2]+ +g[Pt>>2];g[uu>>2]=+g[Pt>>2]-+g[ht>>2];g[Vb>>2]=+g[Pb>>2]+ +g[Ub>>2];g[Rd>>2]=+g[Xc>>2]+ +g[Qd>>2];g[Sd>>2]=+g[Vb>>2]+ +g[Rd>>2];g[tu>>2]=+g[Rd>>2]-+g[Vb>>2];g[Be>>2]=+g[Le>>2]+ +g[Ae>>2];g[fh>>2]=+g[uf>>2]+ +g[eh>>2];g[gh>>2]=+g[Be>>2]+ +g[fh>>2];g[St>>2]=+g[fh>>2]-+g[Be>>2];g[Os>>2]=+g[Le>>2]-+g[Ae>>2];g[Rs>>2]=+g[Ps>>2]-+g[Qs>>2];g[Ss>>2]=+g[Os>>2]+ +g[Rs>>2];g[_s>>2]=+g[Rs>>2]-+g[Os>>2];g[Ts>>2]=+g[uf>>2]-+g[eh>>2];g[Ws>>2]=+g[Us>>2]-+g[Vs>>2];g[Xs>>2]=+g[Ts>>2]-+g[Ws>>2];g[$s>>2]=+g[Ts>>2]+ +g[Ws>>2];g[It>>2]=+g[Hr>>2]+ +g[Ir>>2];g[Jt>>2]=+g[Nr>>2]+ +g[Or>>2];g[Ms>>2]=+g[It>>2]-+g[Jt>>2];g[gt>>2]=+g[It>>2]+ +g[Jt>>2];g[Td>>2]=+g[Va>>2]+ +g[Sd>>2];g[(c[k>>2]|0)+(c[n>>2]<<5<<2)>>2]=+g[Td>>2]-+g[gh>>2];g[c[k>>2]>>2]=+g[Td>>2]+ +g[gh>>2];g[Rt>>2]=+g[gt>>2]+ +g[Qt>>2];g[c[l>>2]>>2]=+g[ft>>2]+ +g[Rt>>2];g[(c[l>>2]|0)+(c[n>>2]<<5<<2)>>2]=+g[Rt>>2]-+g[ft>>2];g[bt>>2]=+g[Va>>2]-+g[Sd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*48<<2)>>2]=+g[bt>>2]-+g[et>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[bt>>2]+ +g[et>>2];g[Tt>>2]=+g[Qt>>2]-+g[gt>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[St>>2]+ +g[Tt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*48<<2)>>2]=+g[Tt>>2]-+g[St>>2];g[Ns>>2]=+g[Ht>>2]+ +g[Ms>>2];g[Ys>>2]=(+g[Ss>>2]+ +g[Xs>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*40<<2)>>2]=+g[Ns>>2]-+g[Ys>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Ns>>2]+ +g[Ys>>2];g[Ut>>2]=(+g[_s>>2]+ +g[$s>>2])*.7071067690849304;g[vu>>2]=+g[tu>>2]+ +g[uu>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Ut>>2]+ +g[vu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*40<<2)>>2]=+g[vu>>2]-+g[Ut>>2];g[Zs>>2]=+g[Ht>>2]-+g[Ms>>2];g[at>>2]=(+g[_s>>2]-+g[$s>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*56<<2)>>2]=+g[Zs>>2]-+g[at>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Zs>>2]+ +g[at>>2];g[wu>>2]=(+g[Xs>>2]-+g[Ss>>2])*.7071067690849304;g[xu>>2]=+g[uu>>2]-+g[tu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[wu>>2]+ +g[xu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*56<<2)>>2]=+g[xu>>2]-+g[wu>>2];g[As>>2]=+g[y>>2]-+g[L>>2];g[Fr>>2]=+g[Dr>>2]-+g[Er>>2];g[Gr>>2]=+g[As>>2]-+g[Fr>>2];g[rt>>2]=+g[As>>2]+ +g[Fr>>2];g[zt>>2]=+g[Ds>>2]+ +g[Gs>>2];g[At>>2]=+g[Ks>>2]+ +g[Ls>>2];g[Bt>>2]=+g[zt>>2]*.9238795042037964-+g[At>>2]*.3826834261417389;g[Ft>>2]=+g[At>>2]*.9238795042037964+ +g[zt>>2]*.3826834261417389;g[Au>>2]=+g[Ta>>2]-+g[mb>>2];g[Bu>>2]=+g[Ot>>2]-+g[it>>2];g[Cu>>2]=+g[Au>>2]+ +g[Bu>>2];g[Iu>>2]=+g[Bu>>2]-+g[Au>>2];g[Lr>>2]=+g[Jr>>2]-+g[Kr>>2];g[Qr>>2]=+g[Mr>>2]+ +g[Pr>>2];g[Rr>>2]=(+g[Lr>>2]-+g[Qr>>2])*.7071067690849304;g[zu>>2]=(+g[Lr>>2]+ +g[Qr>>2])*.7071067690849304;g[Xr>>2]=+g[Vr>>2]-+g[Wr>>2];g[as>>2]=+g[Yr>>2]-+g[$r>>2];g[Cs>>2]=+g[Xr>>2]*.9238795042037964+ +g[as>>2]*.3826834261417389;g[ot>>2]=+g[Xr>>2]*.3826834261417389-+g[as>>2]*.9238795042037964;g[st>>2]=+g[Kr>>2]+ +g[Jr>>2];g[tt>>2]=+g[Mr>>2]-+g[Pr>>2];g[ut>>2]=(+g[st>>2]+ +g[tt>>2])*.7071067690849304;g[Hu>>2]=(+g[tt>>2]-+g[st>>2])*.7071067690849304;g[wt>>2]=+g[Vr>>2]+ +g[Wr>>2];g[xt>>2]=+g[Yr>>2]+ +g[$r>>2];g[yt>>2]=+g[wt>>2]*.3826834261417389+ +g[xt>>2]*.9238795042037964;g[Et>>2]=+g[wt>>2]*.9238795042037964-+g[xt>>2]*.3826834261417389;g[Hs>>2]=+g[Ds>>2]-+g[Gs>>2];g[kt>>2]=+g[Ks>>2]-+g[Ls>>2];g[lt>>2]=+g[Hs>>2]*.3826834261417389-+g[kt>>2]*.9238795042037964;g[pt>>2]=+g[kt>>2]*.3826834261417389+ +g[Hs>>2]*.9238795042037964;g[Sr>>2]=+g[Gr>>2]+ +g[Rr>>2];g[mt>>2]=+g[Cs>>2]+ +g[lt>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*44<<2)>>2]=+g[Sr>>2]-+g[mt>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Sr>>2]+ +g[mt>>2];g[Gu>>2]=+g[ot>>2]+ +g[pt>>2];g[Ju>>2]=+g[Hu>>2]+ +g[Iu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Gu>>2]+ +g[Ju>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*44<<2)>>2]=+g[Ju>>2]-+g[Gu>>2];g[nt>>2]=+g[Gr>>2]-+g[Rr>>2];g[qt>>2]=+g[ot>>2]-+g[pt>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*60<<2)>>2]=+g[nt>>2]-+g[qt>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[nt>>2]+ +g[qt>>2];g[Ku>>2]=+g[lt>>2]-+g[Cs>>2];g[Lu>>2]=+g[Iu>>2]-+g[Hu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[Ku>>2]+ +g[Lu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*60<<2)>>2]=+g[Lu>>2]-+g[Ku>>2];g[vt>>2]=+g[rt>>2]+ +g[ut>>2];g[Ct>>2]=+g[yt>>2]+ +g[Bt>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*36<<2)>>2]=+g[vt>>2]-+g[Ct>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[vt>>2]+ +g[Ct>>2];g[yu>>2]=+g[Et>>2]+ +g[Ft>>2];g[Du>>2]=+g[zu>>2]+ +g[Cu>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[yu>>2]+ +g[Du>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*36<<2)>>2]=+g[Du>>2]-+g[yu>>2];g[Dt>>2]=+g[rt>>2]-+g[ut>>2];g[Gt>>2]=+g[Et>>2]-+g[Ft>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*52<<2)>>2]=+g[Dt>>2]-+g[Gt>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[Dt>>2]+ +g[Gt>>2];g[Eu>>2]=+g[Bt>>2]-+g[yt>>2];g[Fu>>2]=+g[Cu>>2]-+g[zu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[Eu>>2]+ +g[Fu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*52<<2)>>2]=+g[Fu>>2]-+g[Eu>>2];g[Bo>>2]=+g[xo>>2]-+g[Ao>>2];g[Lp>>2]=(+g[fp>>2]-+g[kp>>2])*.7071067690849304;g[Mp>>2]=+g[Bo>>2]-+g[Lp>>2];g[pr>>2]=+g[Bo>>2]+ +g[Lp>>2];g[Xp>>2]=+g[Rp>>2]*.3826834261417389-+g[Wp>>2]*.9238795042037964;g[gq>>2]=+g[aq>>2]*.3826834261417389+ +g[fq>>2]*.9238795042037964;g[hq>>2]=+g[Xp>>2]-+g[gq>>2];g[du>>2]=+g[Xp>>2]+ +g[gq>>2];g[eu>>2]=(+g[Jq>>2]-+g[Iq>>2])*.7071067690849304;g[fu>>2]=+g[Qu>>2]-+g[Pu>>2];g[gu>>2]=+g[eu>>2]+ +g[fu>>2];g[mu>>2]=+g[fu>>2]-+g[eu>>2];g[qr>>2]=+g[Rp>>2]*.9238795042037964+ +g[Wp>>2]*.3826834261417389;g[rr>>2]=+g[fq>>2]*.3826834261417389-+g[aq>>2]*.9238795042037964;g[uq>>2]=+g[qr>>2]+ +g[rr>>2];g[lu>>2]=+g[rr>>2]-+g[qr>>2];g[_q>>2]=(+g[Uq>>2]-+g[Zq>>2])*.7071067690849304;g[$q>>2]=+g[pq>>2]-+g[_q>>2];g[zq>>2]=+g[pq>>2]+ +g[_q>>2];g[hr>>2]=(+g[fr>>2]-+g[gr>>2])*.7071067690849304;g[ir>>2]=+g[er>>2]-+g[hr>>2];g[Aq>>2]=+g[er>>2]+ +g[hr>>2];g[jr>>2]=+g[$q>>2]*.19509032368659973-+g[ir>>2]*.9807852506637573;g[Fq>>2]=+g[Aq>>2]*.8314695954322815+ +g[zq>>2]*.5555702447891235;g[nr>>2]=+g[ir>>2]*.19509032368659973+ +g[$q>>2]*.9807852506637573;g[Bq>>2]=+g[zq>>2]*.8314695954322815-+g[Aq>>2]*.5555702447891235;g[Ap>>2]=(+g[up>>2]-+g[zp>>2])*.7071067690849304;g[Bp>>2]=+g[pp>>2]-+g[Ap>>2];g[wq>>2]=+g[pp>>2]+ +g[Ap>>2];g[Jp>>2]=(+g[Hp>>2]-+g[Ip>>2])*.7071067690849304;g[Kp>>2]=+g[Gp>>2]-+g[Jp>>2];g[xq>>2]=+g[Gp>>2]+ +g[Jp>>2];g[kq>>2]=+g[Bp>>2]*.9807852506637573+ +g[Kp>>2]*.19509032368659973;g[Eq>>2]=+g[wq>>2]*.8314695954322815-+g[xq>>2]*.5555702447891235;g[mr>>2]=+g[Bp>>2]*.19509032368659973-+g[Kp>>2]*.9807852506637573;g[yq>>2]=+g[wq>>2]*.5555702447891235+ +g[xq>>2]*.8314695954322815;g[iq>>2]=+g[Mp>>2]+ +g[hq>>2];g[kr>>2]=+g[kq>>2]+ +g[jr>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*46<<2)>>2]=+g[iq>>2]-+g[kr>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[iq>>2]+ +g[kr>>2];g[ku>>2]=+g[mr>>2]+ +g[nr>>2];g[nu>>2]=+g[lu>>2]+ +g[mu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[ku>>2]+ +g[nu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*46<<2)>>2]=+g[nu>>2]-+g[ku>>2];g[lr>>2]=+g[Mp>>2]-+g[hq>>2];g[or>>2]=+g[mr>>2]-+g[nr>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*62<<2)>>2]=+g[lr>>2]-+g[or>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[lr>>2]+ +g[or>>2];g[ou>>2]=+g[jr>>2]-+g[kq>>2];g[pu>>2]=+g[mu>>2]-+g[lu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[ou>>2]+ +g[pu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*62<<2)>>2]=+g[pu>>2]-+g[ou>>2];g[vq>>2]=+g[pr>>2]+ +g[uq>>2];g[Cq>>2]=+g[yq>>2]+ +g[Bq>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*38<<2)>>2]=+g[vq>>2]-+g[Cq>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[vq>>2]+ +g[Cq>>2];g[cu>>2]=+g[Eq>>2]+ +g[Fq>>2];g[hu>>2]=+g[du>>2]+ +g[gu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[cu>>2]+ +g[hu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*38<<2)>>2]=+g[hu>>2]-+g[cu>>2];g[Dq>>2]=+g[pr>>2]-+g[uq>>2];g[Gq>>2]=+g[Eq>>2]-+g[Fq>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*54<<2)>>2]=+g[Dq>>2]-+g[Gq>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[Dq>>2]+ +g[Gq>>2];g[iu>>2]=+g[Bq>>2]-+g[yq>>2];g[ju>>2]=+g[gu>>2]-+g[du>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[iu>>2]+ +g[ju>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*54<<2)>>2]=+g[ju>>2]-+g[iu>>2];g[Hq>>2]=+g[xo>>2]+ +g[Ao>>2];g[Kq>>2]=(+g[Iq>>2]+ +g[Jq>>2])*.7071067690849304;g[Lq>>2]=+g[Hq>>2]-+g[Kq>>2];g[ks>>2]=+g[Hq>>2]+ +g[Kq>>2];g[Oq>>2]=+g[Mq>>2]*.9238795042037964-+g[Nq>>2]*.3826834261417389;g[Rq>>2]=+g[Pq>>2]*.9238795042037964+ +g[Qq>>2]*.3826834261417389;g[Sq>>2]=+g[Oq>>2]-+g[Rq>>2];g[Nu>>2]=+g[Oq>>2]+ +g[Rq>>2];g[Ou>>2]=(+g[fp>>2]+ +g[kp>>2])*.7071067690849304;g[Ru>>2]=+g[Pu>>2]+ +g[Qu>>2];g[Su>>2]=+g[Ou>>2]+ +g[Ru>>2];g[_t>>2]=+g[Ru>>2]-+g[Ou>>2];g[ls>>2]=+g[Mq>>2]*.3826834261417389+ +g[Nq>>2]*.9238795042037964;g[ms>>2]=+g[Qq>>2]*.9238795042037964-+g[Pq>>2]*.3826834261417389;g[ns>>2]=+g[ls>>2]+ +g[ms>>2];g[Zt>>2]=+g[ms>>2]-+g[ls>>2];g[Br>>2]=(+g[gr>>2]+ +g[fr>>2])*.7071067690849304;g[Cr>>2]=+g[Ar>>2]-+g[Br>>2];g[ss>>2]=+g[Ar>>2]+ +g[Br>>2];g[cs>>2]=(+g[Uq>>2]+ +g[Zq>>2])*.7071067690849304;g[ds>>2]=+g[bs>>2]-+g[cs>>2];g[ts>>2]=+g[bs>>2]+ +g[cs>>2];g[es>>2]=+g[Cr>>2]*.5555702447891235-+g[ds>>2]*.8314695954322815;g[ys>>2]=+g[ss>>2]*.19509032368659973+ +g[ts>>2]*.9807852506637573;g[is>>2]=+g[Cr>>2]*.8314695954322815+ +g[ds>>2]*.5555702447891235;g[us>>2]=+g[ss>>2]*.9807852506637573-+g[ts>>2]*.19509032368659973;g[ur>>2]=(+g[zp>>2]+ +g[up>>2])*.7071067690849304;g[vr>>2]=+g[tr>>2]-+g[ur>>2];g[ps>>2]=+g[tr>>2]+ +g[ur>>2];g[xr>>2]=(+g[Hp>>2]+ +g[Ip>>2])*.7071067690849304;g[yr>>2]=+g[wr>>2]-+g[xr>>2];g[qs>>2]=+g[wr>>2]+ +g[xr>>2];g[zr>>2]=+g[vr>>2]*.5555702447891235+ +g[yr>>2]*.8314695954322815;g[xs>>2]=+g[qs>>2]*.9807852506637573-+g[ps>>2]*.19509032368659973;g[hs>>2]=+g[yr>>2]*.5555702447891235-+g[vr>>2]*.8314695954322815;g[rs>>2]=+g[ps>>2]*.9807852506637573+ +g[qs>>2]*.19509032368659973;g[Tq>>2]=+g[Lq>>2]+ +g[Sq>>2];g[fs>>2]=+g[zr>>2]+ +g[es>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*42<<2)>>2]=+g[Tq>>2]-+g[fs>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Tq>>2]+ +g[fs>>2];g[Yt>>2]=+g[hs>>2]+ +g[is>>2];g[$t>>2]=+g[Zt>>2]+ +g[_t>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Yt>>2]+ +g[$t>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*42<<2)>>2]=+g[$t>>2]-+g[Yt>>2];g[gs>>2]=+g[Lq>>2]-+g[Sq>>2];g[js>>2]=+g[hs>>2]-+g[is>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*58<<2)>>2]=+g[gs>>2]-+g[js>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[gs>>2]+ +g[js>>2];g[au>>2]=+g[es>>2]-+g[zr>>2];g[bu>>2]=+g[_t>>2]-+g[Zt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[au>>2]+ +g[bu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*58<<2)>>2]=+g[bu>>2]-+g[au>>2];g[os>>2]=+g[ks>>2]+ +g[ns>>2];g[vs>>2]=+g[rs>>2]+ +g[us>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*34<<2)>>2]=+g[os>>2]-+g[vs>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[os>>2]+ +g[vs>>2];g[Mu>>2]=+g[xs>>2]+ +g[ys>>2];g[Vt>>2]=+g[Nu>>2]+ +g[Su>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Mu>>2]+ +g[Vt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*34<<2)>>2]=+g[Vt>>2]-+g[Mu>>2];g[ws>>2]=+g[ks>>2]-+g[ns>>2];g[zs>>2]=+g[xs>>2]-+g[ys>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*50<<2)>>2]=+g[ws>>2]-+g[zs>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[ws>>2]+ +g[zs>>2];g[Wt>>2]=+g[us>>2]-+g[rs>>2];g[Xt>>2]=+g[Su>>2]-+g[Nu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[Wt>>2]+ +g[Xt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*50<<2)>>2]=+g[Xt>>2]-+g[Wt>>2];g[xh>>2]=+g[lh>>2]-+g[wh>>2];g[Wg>>2]=+g[Kg>>2]-+g[Vg>>2];g[Xg>>2]=+g[xh>>2]-+g[Wg>>2];g[Ik>>2]=+g[xh>>2]+ +g[Wg>>2];g[nv>>2]=+g[_k>>2]-+g[Zk>>2];g[ov>>2]=+g[Wv>>2]-+g[Vv>>2];g[pv>>2]=+g[nv>>2]+ +g[ov>>2];g[vv>>2]=+g[ov>>2]-+g[nv>>2];g[li>>2]=+g[Bh>>2]-+g[ki>>2];g[ui>>2]=+g[qi>>2]-+g[ti>>2];g[vi>>2]=+g[li>>2]*.19509032368659973-+g[ui>>2]*.9807852506637573;g[Jk>>2]=+g[li>>2]*.9807852506637573+ +g[ui>>2]*.19509032368659973;g[Oh>>2]=+g[Ai>>2]-+g[Nh>>2];g[Xh>>2]=+g[Th>>2]-+g[Wh>>2];g[Yh>>2]=+g[Oh>>2]*.19509032368659973+ +g[Xh>>2]*.9807852506637573;g[Kk>>2]=+g[Xh>>2]*.19509032368659973-+g[Oh>>2]*.9807852506637573;g[Zh>>2]=+g[vi>>2]-+g[Yh>>2];g[uv>>2]=+g[Kk>>2]-+g[Jk>>2];g[Lk>>2]=+g[Jk>>2]+ +g[Kk>>2];g[mv>>2]=+g[vi>>2]+ +g[Yh>>2];g[Qi>>2]=+g[di>>2]-+g[Pi>>2];g[Lj>>2]=+g[zj>>2]-+g[Kj>>2];g[Mj>>2]=+g[Qi>>2]-+g[Lj>>2];g[Nk>>2]=+g[Qi>>2]+ +g[Lj>>2];g[Xi>>2]=+g[Rj>>2]-+g[Wi>>2];g[_i>>2]=+g[Yi>>2]-+g[Zi>>2];g[$i>>2]=+g[Xi>>2]-+g[_i>>2];g[Ok>>2]=+g[Xi>>2]+ +g[_i>>2];g[aj>>2]=+g[Mj>>2]*.9951847195625305+ +g[$i>>2]*.0980171412229538;g[Vk>>2]=+g[Nk>>2]*.7730104327201843-+g[Ok>>2]*.6343932747840881;g[Fk>>2]=+g[Mj>>2]*.0980171412229538-+g[$i>>2]*.9951847195625305;g[Pk>>2]=+g[Nk>>2]*.6343932747840881+ +g[Ok>>2]*.7730104327201843;g[rj>>2]=+g[fj>>2]-+g[qj>>2];g[ll>>2]=+g[$k>>2]-+g[kl>>2];g[ml>>2]=+g[rj>>2]-+g[ll>>2];g[Qk>>2]=+g[rj>>2]+ +g[ll>>2];g[vl>>2]=+g[rl>>2]-+g[ul>>2];g[yl>>2]=+g[wl>>2]-+g[xl>>2];g[Bk>>2]=+g[vl>>2]-+g[yl>>2];g[Rk>>2]=+g[vl>>2]+ +g[yl>>2];g[Ck>>2]=+g[ml>>2]*.0980171412229538-+g[Bk>>2]*.9951847195625305;g[Wk>>2]=+g[Rk>>2]*.7730104327201843+ +g[Qk>>2]*.6343932747840881;g[Gk>>2]=+g[Bk>>2]*.0980171412229538+ +g[ml>>2]*.9951847195625305;g[Sk>>2]=+g[Qk>>2]*.7730104327201843-+g[Rk>>2]*.6343932747840881;g[_h>>2]=+g[Xg>>2]+ +g[Zh>>2];g[Dk>>2]=+g[aj>>2]+ +g[Ck>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*47<<2)>>2]=+g[_h>>2]-+g[Dk>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[_h>>2]+ +g[Dk>>2];g[tv>>2]=+g[Fk>>2]+ +g[Gk>>2];g[wv>>2]=+g[uv>>2]+ +g[vv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[tv>>2]+ +g[wv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*47<<2)>>2]=+g[wv>>2]-+g[tv>>2];g[Ek>>2]=+g[Xg>>2]-+g[Zh>>2];g[Hk>>2]=+g[Fk>>2]-+g[Gk>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*63<<2)>>2]=+g[Ek>>2]-+g[Hk>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[Ek>>2]+ +g[Hk>>2];g[xv>>2]=+g[Ck>>2]-+g[aj>>2];g[yv>>2]=+g[vv>>2]-+g[uv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[xv>>2]+ +g[yv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*63<<2)>>2]=+g[yv>>2]-+g[xv>>2];g[Mk>>2]=+g[Ik>>2]+ +g[Lk>>2];g[Tk>>2]=+g[Pk>>2]+ +g[Sk>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*39<<2)>>2]=+g[Mk>>2]-+g[Tk>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Mk>>2]+ +g[Tk>>2];g[lv>>2]=+g[Vk>>2]+ +g[Wk>>2];g[qv>>2]=+g[mv>>2]+ +g[pv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[lv>>2]+ +g[qv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*39<<2)>>2]=+g[qv>>2]-+g[lv>>2];g[Uk>>2]=+g[Ik>>2]-+g[Lk>>2];g[Xk>>2]=+g[Vk>>2]-+g[Wk>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*55<<2)>>2]=+g[Uk>>2]-+g[Xk>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[Uk>>2]+ +g[Xk>>2];g[rv>>2]=+g[Sk>>2]-+g[Pk>>2];g[sv>>2]=+g[pv>>2]-+g[mv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[rv>>2]+ +g[sv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*55<<2)>>2]=+g[sv>>2]-+g[rv>>2];g[Xl>>2]=+g[Tl>>2]-+g[Wl>>2];g[cm>>2]=+g[_l>>2]-+g[bm>>2];g[dm>>2]=+g[Xl>>2]-+g[cm>>2];g[ln>>2]=+g[Xl>>2]+ +g[cm>>2];g[Gv>>2]=+g[Do>>2]-+g[Co>>2];g[Hv>>2]=+g[Xu>>2]-+g[Uu>>2];g[Iv>>2]=+g[Gv>>2]+ +g[Hv>>2];g[Ov>>2]=+g[Hv>>2]-+g[Gv>>2];g[gm>>2]=+g[em>>2]-+g[fm>>2];g[Km>>2]=+g[hm>>2]-+g[Jm>>2];g[Lm>>2]=+g[gm>>2]*.5555702447891235-+g[Km>>2]*.8314695954322815;g[mn>>2]=+g[Km>>2]*.5555702447891235+ +g[gm>>2]*.8314695954322815;g[Om>>2]=+g[Mm>>2]-+g[Nm>>2];g[Rm>>2]=+g[Pm>>2]-+g[Qm>>2];g[Sm>>2]=+g[Om>>2]*.8314695954322815+ +g[Rm>>2]*.5555702447891235;g[nn>>2]=+g[Om>>2]*.5555702447891235-+g[Rm>>2]*.8314695954322815;g[sn>>2]=+g[Lm>>2]-+g[Sm>>2];g[Nv>>2]=+g[nn>>2]-+g[mn>>2];g[on>>2]=+g[mn>>2]+ +g[nn>>2];g[Fv>>2]=+g[Lm>>2]+ +g[Sm>>2];g[wn>>2]=+g[un>>2]-+g[vn>>2];g[Dn>>2]=+g[zn>>2]-+g[Cn>>2];g[En>>2]=+g[wn>>2]-+g[Dn>>2];g[qn>>2]=+g[wn>>2]+ +g[Dn>>2];g[Hn>>2]=+g[Fn>>2]-+g[Gn>>2];g[Kn>>2]=+g[In>>2]-+g[Jn>>2];g[Ln>>2]=+g[Hn>>2]-+g[Kn>>2];g[rn>>2]=+g[Hn>>2]+ +g[Kn>>2];g[Mn>>2]=+g[En>>2]*.9569403529167175+ +g[Ln>>2]*.290284663438797;g[Zn>>2]=+g[qn>>2]*.8819212913513184-+g[rn>>2]*.4713967442512512;g[hn>>2]=+g[En>>2]*.290284663438797-+g[Ln>>2]*.9569403529167175;g[Tn>>2]=+g[qn>>2]*.4713967442512512+ +g[rn>>2]*.8819212913513184;g[Pn>>2]=+g[Nn>>2]-+g[On>>2];g[Xm>>2]=+g[Tm>>2]-+g[Wm>>2];g[Ym>>2]=+g[Pn>>2]-+g[Xm>>2];g[Un>>2]=+g[Pn>>2]+ +g[Xm>>2];g[$m>>2]=+g[Zm>>2]-+g[_m>>2];g[cn>>2]=+g[an>>2]-+g[bn>>2];g[dn>>2]=+g[$m>>2]-+g[cn>>2];g[Vn>>2]=+g[$m>>2]+ +g[cn>>2];g[en>>2]=+g[Ym>>2]*.290284663438797-+g[dn>>2]*.9569403529167175;g[_n>>2]=+g[Vn>>2]*.8819212913513184+ +g[Un>>2]*.4713967442512512;g[jn>>2]=+g[dn>>2]*.290284663438797+ +g[Ym>>2]*.9569403529167175;g[Wn>>2]=+g[Un>>2]*.8819212913513184-+g[Vn>>2]*.4713967442512512;g[tn>>2]=+g[dm>>2]+ +g[sn>>2];g[fn>>2]=+g[Mn>>2]+ +g[en>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*45<<2)>>2]=+g[tn>>2]-+g[fn>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[tn>>2]+ +g[fn>>2];g[Mv>>2]=+g[hn>>2]+ +g[jn>>2];g[Pv>>2]=+g[Nv>>2]+ +g[Ov>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Mv>>2]+ +g[Pv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*45<<2)>>2]=+g[Pv>>2]-+g[Mv>>2];g[gn>>2]=+g[dm>>2]-+g[sn>>2];g[kn>>2]=+g[hn>>2]-+g[jn>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*61<<2)>>2]=+g[gn>>2]-+g[kn>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[gn>>2]+ +g[kn>>2];g[Qv>>2]=+g[en>>2]-+g[Mn>>2];g[Rv>>2]=+g[Ov>>2]-+g[Nv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[Qv>>2]+ +g[Rv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*61<<2)>>2]=+g[Rv>>2]-+g[Qv>>2];g[pn>>2]=+g[ln>>2]+ +g[on>>2];g[Xn>>2]=+g[Tn>>2]+ +g[Wn>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*37<<2)>>2]=+g[pn>>2]-+g[Xn>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[pn>>2]+ +g[Xn>>2];g[Ev>>2]=+g[Zn>>2]+ +g[_n>>2];g[Jv>>2]=+g[Fv>>2]+ +g[Iv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Ev>>2]+ +g[Jv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*37<<2)>>2]=+g[Jv>>2]-+g[Ev>>2];g[Yn>>2]=+g[ln>>2]-+g[on>>2];g[$n>>2]=+g[Zn>>2]-+g[_n>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*53<<2)>>2]=+g[Yn>>2]-+g[$n>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[Yn>>2]+ +g[$n>>2];g[Kv>>2]=+g[Wn>>2]-+g[Tn>>2];g[Lv>>2]=+g[Iv>>2]-+g[Fv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[Kv>>2]+ +g[Lv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*53<<2)>>2]=+g[Lv>>2]-+g[Kv>>2];g[ao>>2]=+g[Tl>>2]+ +g[Wl>>2];g[Eo>>2]=+g[Co>>2]+ +g[Do>>2];g[Fo>>2]=+g[ao>>2]-+g[Eo>>2];g[ho>>2]=+g[ao>>2]+ +g[Eo>>2];g[su>>2]=+g[_l>>2]+ +g[bm>>2];g[Yu>>2]=+g[Uu>>2]+ +g[Xu>>2];g[Zu>>2]=+g[su>>2]+ +g[Yu>>2];g[Av>>2]=+g[Yu>>2]-+g[su>>2];g[Go>>2]=+g[em>>2]+ +g[fm>>2];g[Ho>>2]=+g[hm>>2]+ +g[Jm>>2];g[Io>>2]=+g[Go>>2]*.9807852506637573-+g[Ho>>2]*.19509032368659973;g[io>>2]=+g[Ho>>2]*.9807852506637573+ +g[Go>>2]*.19509032368659973;g[Jo>>2]=+g[Mm>>2]+ +g[Nm>>2];g[Ko>>2]=+g[Pm>>2]+ +g[Qm>>2];g[Lo>>2]=+g[Jo>>2]*.19509032368659973+ +g[Ko>>2]*.9807852506637573;g[jo>>2]=+g[Jo>>2]*.9807852506637573-+g[Ko>>2]*.19509032368659973;g[Mo>>2]=+g[Io>>2]-+g[Lo>>2];g[zv>>2]=+g[jo>>2]-+g[io>>2];g[ko>>2]=+g[io>>2]+ +g[jo>>2];g[ru>>2]=+g[Io>>2]+ +g[Lo>>2];g[Oo>>2]=+g[Fn>>2]+ +g[Gn>>2];g[Po>>2]=+g[Cn>>2]+ +g[zn>>2];g[Qo>>2]=+g[Oo>>2]-+g[Po>>2];g[mo>>2]=+g[Oo>>2]+ +g[Po>>2];g[Ro>>2]=+g[un>>2]+ +g[vn>>2];g[So>>2]=+g[In>>2]+ +g[Jn>>2];g[To>>2]=+g[Ro>>2]-+g[So>>2];g[no>>2]=+g[Ro>>2]+ +g[So>>2];g[Uo>>2]=+g[Qo>>2]*.6343932747840881+ +g[To>>2]*.7730104327201843;g[uo>>2]=+g[no>>2]*.9951847195625305-+g[mo>>2]*.0980171412229538;g[eo>>2]=+g[To>>2]*.6343932747840881-+g[Qo>>2]*.7730104327201843;g[oo>>2]=+g[mo>>2]*.9951847195625305+ +g[no>>2]*.0980171412229538;g[Vo>>2]=+g[Nn>>2]+ +g[On>>2];g[Wo>>2]=+g[bn>>2]+ +g[an>>2];g[Xo>>2]=+g[Vo>>2]-+g[Wo>>2];g[po>>2]=+g[Vo>>2]+ +g[Wo>>2];g[Yo>>2]=+g[Zm>>2]+ +g[_m>>2];g[Zo>>2]=+g[Tm>>2]+ +g[Wm>>2];g[_o>>2]=+g[Yo>>2]-+g[Zo>>2];g[qo>>2]=+g[Yo>>2]+ +g[Zo>>2];g[$o>>2]=+g[Xo>>2]*.6343932747840881-+g[_o>>2]*.7730104327201843;g[vo>>2]=+g[po>>2]*.0980171412229538+ +g[qo>>2]*.9951847195625305;g[fo>>2]=+g[Xo>>2]*.7730104327201843+ +g[_o>>2]*.6343932747840881;g[ro>>2]=+g[po>>2]*.9951847195625305-+g[qo>>2]*.0980171412229538;g[No>>2]=+g[Fo>>2]+ +g[Mo>>2];g[bo>>2]=+g[Uo>>2]+ +g[$o>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*41<<2)>>2]=+g[No>>2]-+g[bo>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[No>>2]+ +g[bo>>2];g[bv>>2]=+g[eo>>2]+ +g[fo>>2];g[Bv>>2]=+g[zv>>2]+ +g[Av>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[bv>>2]+ +g[Bv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*41<<2)>>2]=+g[Bv>>2]-+g[bv>>2];g[co>>2]=+g[Fo>>2]-+g[Mo>>2];g[go>>2]=+g[eo>>2]-+g[fo>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*57<<2)>>2]=+g[co>>2]-+g[go>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[co>>2]+ +g[go>>2];g[Cv>>2]=+g[$o>>2]-+g[Uo>>2];g[Dv>>2]=+g[Av>>2]-+g[zv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[Cv>>2]+ +g[Dv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*57<<2)>>2]=+g[Dv>>2]-+g[Cv>>2];g[lo>>2]=+g[ho>>2]+ +g[ko>>2];g[so>>2]=+g[oo>>2]+ +g[ro>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*33<<2)>>2]=+g[lo>>2]-+g[so>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[lo>>2]+ +g[so>>2];g[qu>>2]=+g[uo>>2]+ +g[vo>>2];g[_u>>2]=+g[ru>>2]+ +g[Zu>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[qu>>2]+ +g[_u>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*33<<2)>>2]=+g[_u>>2]-+g[qu>>2];g[to>>2]=+g[ho>>2]-+g[ko>>2];g[wo>>2]=+g[uo>>2]-+g[vo>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*49<<2)>>2]=+g[to>>2]-+g[wo>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[to>>2]+ +g[wo>>2];g[$u>>2]=+g[ro>>2]-+g[oo>>2];g[av>>2]=+g[Zu>>2]-+g[ru>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[$u>>2]+ +g[av>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*49<<2)>>2]=+g[av>>2]-+g[$u>>2];g[Yk>>2]=+g[lh>>2]+ +g[wh>>2];g[Al>>2]=+g[Zk>>2]+ +g[_k>>2];g[Bl>>2]=+g[Yk>>2]-+g[Al>>2];g[Bm>>2]=+g[Yk>>2]+ +g[Al>>2];g[Uv>>2]=+g[Kg>>2]+ +g[Vg>>2];g[Xv>>2]=+g[Vv>>2]+ +g[Wv>>2];g[Yv>>2]=+g[Uv>>2]+ +g[Xv>>2];g[hv>>2]=+g[Xv>>2]-+g[Uv>>2];g[Cl>>2]=+g[Bh>>2]+ +g[ki>>2];g[Dl>>2]=+g[qi>>2]+ +g[ti>>2];g[El>>2]=+g[Cl>>2]*.8314695954322815-+g[Dl>>2]*.5555702447891235;g[Cm>>2]=+g[Cl>>2]*.5555702447891235+ +g[Dl>>2]*.8314695954322815;g[Fl>>2]=+g[Ai>>2]+ +g[Nh>>2];g[Gl>>2]=+g[Th>>2]+ +g[Wh>>2];g[Hl>>2]=+g[Fl>>2]*.8314695954322815+ +g[Gl>>2]*.5555702447891235;g[Dm>>2]=+g[Gl>>2]*.8314695954322815-+g[Fl>>2]*.5555702447891235;g[Il>>2]=+g[El>>2]-+g[Hl>>2];g[gv>>2]=+g[Dm>>2]-+g[Cm>>2];g[Em>>2]=+g[Cm>>2]+ +g[Dm>>2];g[Tv>>2]=+g[El>>2]+ +g[Hl>>2];g[im>>2]=+g[Rj>>2]+ +g[Wi>>2];g[jm>>2]=+g[Kj>>2]+ +g[zj>>2];g[km>>2]=+g[im>>2]-+g[jm>>2];g[Gm>>2]=+g[im>>2]+ +g[jm>>2];g[lm>>2]=+g[di>>2]+ +g[Pi>>2];g[mm>>2]=+g[Yi>>2]+ +g[Zi>>2];g[nm>>2]=+g[lm>>2]-+g[mm>>2];g[Hm>>2]=+g[lm>>2]+ +g[mm>>2];g[om>>2]=+g[km>>2]*.4713967442512512+ +g[nm>>2]*.8819212913513184;g[Ql>>2]=+g[Hm>>2]*.9569403529167175-+g[Gm>>2]*.290284663438797;g[ym>>2]=+g[nm>>2]*.4713967442512512-+g[km>>2]*.8819212913513184;g[Kl>>2]=+g[Gm>>2]*.9569403529167175+ +g[Hm>>2]*.290284663438797;g[pm>>2]=+g[fj>>2]+ +g[qj>>2];g[qm>>2]=+g[xl>>2]+ +g[wl>>2];g[rm>>2]=+g[pm>>2]-+g[qm>>2];g[Ll>>2]=+g[pm>>2]+ +g[qm>>2];g[sm>>2]=+g[rl>>2]+ +g[ul>>2];g[tm>>2]=+g[$k>>2]+ +g[kl>>2];g[um>>2]=+g[sm>>2]-+g[tm>>2];g[Ml>>2]=+g[sm>>2]+ +g[tm>>2];g[vm>>2]=+g[rm>>2]*.4713967442512512-+g[um>>2]*.8819212913513184;g[Rl>>2]=+g[Ll>>2]*.290284663438797+ +g[Ml>>2]*.9569403529167175;g[zm>>2]=+g[rm>>2]*.8819212913513184+ +g[um>>2]*.4713967442512512;g[Nl>>2]=+g[Ll>>2]*.9569403529167175-+g[Ml>>2]*.290284663438797;g[Jl>>2]=+g[Bl>>2]+ +g[Il>>2];g[wm>>2]=+g[om>>2]+ +g[vm>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*43<<2)>>2]=+g[Jl>>2]-+g[wm>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Jl>>2]+ +g[wm>>2];g[fv>>2]=+g[ym>>2]+ +g[zm>>2];g[iv>>2]=+g[gv>>2]+ +g[hv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[fv>>2]+ +g[iv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*43<<2)>>2]=+g[iv>>2]-+g[fv>>2];g[xm>>2]=+g[Bl>>2]-+g[Il>>2];g[Am>>2]=+g[ym>>2]-+g[zm>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*59<<2)>>2]=+g[xm>>2]-+g[Am>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[xm>>2]+ +g[Am>>2];g[jv>>2]=+g[vm>>2]-+g[om>>2];g[kv>>2]=+g[hv>>2]-+g[gv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[jv>>2]+ +g[kv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*59<<2)>>2]=+g[kv>>2]-+g[jv>>2];g[Fm>>2]=+g[Bm>>2]+ +g[Em>>2];g[Ol>>2]=+g[Kl>>2]+ +g[Nl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*35<<2)>>2]=+g[Fm>>2]-+g[Ol>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Fm>>2]+ +g[Ol>>2];g[Sv>>2]=+g[Ql>>2]+ +g[Rl>>2];g[cv>>2]=+g[Tv>>2]+ +g[Yv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Sv>>2]+ +g[cv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*35<<2)>>2]=+g[cv>>2]-+g[Sv>>2];g[Pl>>2]=+g[Bm>>2]-+g[Em>>2];g[Sl>>2]=+g[Ql>>2]-+g[Rl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*51<<2)>>2]=+g[Pl>>2]-+g[Sl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Pl>>2]+ +g[Sl>>2];g[dv>>2]=+g[Nl>>2]-+g[Kl>>2];g[ev>>2]=+g[Yv>>2]-+g[Tv>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[dv>>2]+ +g[ev>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*51<<2)>>2]=+g[ev>>2]-+g[dv>>2];c[nw>>2]=(c[nw>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+40;c[n>>2]=c[n>>2]^c[2998]}i=ow;return}function Vj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,25,3144);i=b;return}function Wj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0;Va=i;i=i+384|0;k=Va+368|0;l=Va+364|0;m=Va+360|0;n=Va+356|0;Wa=Va+352|0;o=Va+348|0;p=Va+344|0;Ua=Va+336|0;O=Va+332|0;R=Va+328|0;P=Va+324|0;S=Va+320|0;U=Va+316|0;wa=Va+312|0;Aa=Va+308|0;Ca=Va+304|0;Fa=Va+300|0;Ga=Va+296|0;Ha=Va+292|0;Ta=Va+288|0;Ja=Va+284|0;Ra=Va+280|0;Q=Va+276|0;va=Va+272|0;T=Va+268|0;ua=Va+264|0;za=Va+260|0;J=Va+256|0;fa=Va+252|0;E=Va+248|0;da=Va+244|0;y=Va+240|0;pa=Va+236|0;sa=Va+232|0;Ma=Va+228|0;K=Va+224|0;ia=Va+220|0;B=Va+216|0;Y=Va+212|0;x=Va+208|0;ka=Va+204|0;na=Va+200|0;q=Va+196|0;D=Va+192|0;ya=Va+188|0;C=Va+184|0;V=Va+180|0;xa=Va+176|0;$=Va+172|0;qa=Va+168|0;ca=Va+164|0;ra=Va+160|0;Z=Va+156|0;_=Va+152|0;aa=Va+148|0;ba=Va+144|0;Ea=Va+140|0;ga=Va+136|0;La=Va+132|0;ha=Va+128|0;Ba=Va+124|0;Da=Va+120|0;Ia=Va+116|0;Ka=Va+112|0;Qa=Va+108|0;la=Va+104|0;X=Va+100|0;ma=Va+96|0;Oa=Va+92|0;Pa=Va+88|0;Sa=Va+84|0;W=Va+80|0;Na=Va+76|0;ea=Va+72|0;G=Va+68|0;H=Va+64|0;A=Va+60|0;F=Va+56|0;w=Va+52|0;z=Va+48|0;s=Va+44|0;L=Va+40|0;v=Va+36|0;I=Va+32|0;t=Va+28|0;u=Va+24|0;ja=Va+20|0;N=Va+16|0;r=Va+12|0;M=Va+8|0;oa=Va+4|0;ta=Va;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Wa>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Va+340>>2]=.7071067690849304;c[Ua>>2]=c[Wa>>2];c[m>>2]=(c[m>>2]|0)+((c[Wa>>2]|0)*6<<2);while(1){if((c[Ua>>2]|0)>=(c[o>>2]|0))break;g[O>>2]=+g[c[m>>2]>>2];g[R>>2]=+g[(c[m>>2]|0)+4>>2];g[P>>2]=+g[(c[m>>2]|0)+8>>2];g[S>>2]=+g[(c[m>>2]|0)+12>>2];g[Q>>2]=+g[O>>2]*+g[P>>2];g[va>>2]=+g[R>>2]*+g[P>>2];g[T>>2]=+g[R>>2]*+g[S>>2];g[ua>>2]=+g[O>>2]*+g[S>>2];g[U>>2]=+g[Q>>2]-+g[T>>2];g[wa>>2]=+g[ua>>2]+ +g[va>>2];g[Aa>>2]=+g[Q>>2]+ +g[T>>2];g[Ca>>2]=+g[ua>>2]-+g[va>>2];g[Fa>>2]=+g[(c[m>>2]|0)+16>>2];g[Ga>>2]=+g[(c[m>>2]|0)+20>>2];g[Ha>>2]=+g[O>>2]*+g[Fa>>2]+ +g[R>>2]*+g[Ga>>2];g[Ta>>2]=+g[Aa>>2]*+g[Ga>>2]-+g[Ca>>2]*+g[Fa>>2];g[Ja>>2]=+g[O>>2]*+g[Ga>>2]-+g[R>>2]*+g[Fa>>2];g[Ra>>2]=+g[Aa>>2]*+g[Fa>>2]+ +g[Ca>>2]*+g[Ga>>2];g[q>>2]=+g[c[k>>2]>>2];g[D>>2]=+g[c[l>>2]>>2];g[V>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[xa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ya>>2]=+g[U>>2]*+g[V>>2]+ +g[wa>>2]*+g[xa>>2];g[C>>2]=+g[U>>2]*+g[xa>>2]-+g[wa>>2]*+g[V>>2];g[za>>2]=+g[q>>2]+ +g[ya>>2];g[J>>2]=+g[D>>2]-+g[C>>2];g[fa>>2]=+g[q>>2]-+g[ya>>2];g[E>>2]=+g[C>>2]+ +g[D>>2];g[Z>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[_>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[$>>2]=+g[Fa>>2]*+g[Z>>2]+ +g[Ga>>2]*+g[_>>2];g[qa>>2]=+g[Fa>>2]*+g[_>>2]-+g[Ga>>2]*+g[Z>>2];g[aa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ba>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ca>>2]=+g[P>>2]*+g[aa>>2]+ +g[S>>2]*+g[ba>>2];g[ra>>2]=+g[P>>2]*+g[ba>>2]-+g[S>>2]*+g[aa>>2];g[da>>2]=+g[$>>2]+ +g[ca>>2];g[y>>2]=+g[qa>>2]+ +g[ra>>2];g[pa>>2]=+g[$>>2]-+g[ca>>2];g[sa>>2]=+g[qa>>2]-+g[ra>>2];g[Ba>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Da>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Ea>>2]=+g[Aa>>2]*+g[Ba>>2]+ +g[Ca>>2]*+g[Da>>2];g[ga>>2]=+g[Aa>>2]*+g[Da>>2]-+g[Ca>>2]*+g[Ba>>2];g[Ia>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Ka>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[La>>2]=+g[Ha>>2]*+g[Ia>>2]+ +g[Ja>>2]*+g[Ka>>2];g[ha>>2]=+g[Ha>>2]*+g[Ka>>2]-+g[Ja>>2]*+g[Ia>>2];g[Ma>>2]=+g[Ea>>2]+ +g[La>>2];g[K>>2]=+g[Ea>>2]-+g[La>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2];g[B>>2]=+g[ga>>2]+ +g[ha>>2];g[Oa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[Pa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Qa>>2]=+g[O>>2]*+g[Oa>>2]+ +g[R>>2]*+g[Pa>>2];g[la>>2]=+g[O>>2]*+g[Pa>>2]-+g[R>>2]*+g[Oa>>2];g[Sa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[W>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[X>>2]=+g[Ra>>2]*+g[Sa>>2]+ +g[Ta>>2]*+g[W>>2];g[ma>>2]=+g[Ra>>2]*+g[W>>2]-+g[Ta>>2]*+g[Sa>>2];g[Y>>2]=+g[Qa>>2]+ +g[X>>2];g[x>>2]=+g[la>>2]+ +g[ma>>2];g[ka>>2]=+g[Qa>>2]-+g[X>>2];g[na>>2]=+g[la>>2]-+g[ma>>2];g[Na>>2]=+g[za>>2]+ +g[Ma>>2];g[ea>>2]=+g[Y>>2]+ +g[da>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Na>>2]-+g[ea>>2];g[c[k>>2]>>2]=+g[Na>>2]+ +g[ea>>2];g[A>>2]=+g[x>>2]+ +g[y>>2];g[F>>2]=+g[B>>2]+ +g[E>>2];g[c[l>>2]>>2]=+g[A>>2]+ +g[F>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[F>>2]-+g[A>>2];g[w>>2]=+g[za>>2]-+g[Ma>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[w>>2]-+g[z>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[w>>2]+ +g[z>>2];g[G>>2]=+g[da>>2]-+g[Y>>2];g[H>>2]=+g[E>>2]-+g[B>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[G>>2]+ +g[H>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[H>>2]-+g[G>>2];g[s>>2]=+g[fa>>2]-+g[ia>>2];g[L>>2]=+g[J>>2]-+g[K>>2];g[t>>2]=+g[na>>2]-+g[ka>>2];g[u>>2]=+g[pa>>2]+ +g[sa>>2];g[v>>2]=(+g[t>>2]-+g[u>>2])*.7071067690849304;g[I>>2]=(+g[t>>2]+ +g[u>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[s>>2]-+g[v>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[L>>2]-+g[I>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[s>>2]+ +g[v>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[I>>2]+ +g[L>>2];g[ja>>2]=+g[fa>>2]+ +g[ia>>2];g[N>>2]=+g[K>>2]+ +g[J>>2];g[oa>>2]=+g[ka>>2]+ +g[na>>2];g[ta>>2]=+g[pa>>2]-+g[sa>>2];g[r>>2]=(+g[oa>>2]+ +g[ta>>2])*.7071067690849304;g[M>>2]=(+g[ta>>2]-+g[oa>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[ja>>2]-+g[r>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[N>>2]-+g[M>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[ja>>2]+ +g[r>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[M>>2]+ +g[N>>2];c[Ua>>2]=(c[Ua>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+24}i=Va;return}function Xj(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;b=e+4|0;d=e;c[b>>2]=a;c[d>>2]=0;while(1){if((c[d>>2]|0)>>>0>=2)break;a=c[b>>2]|0;Bd(a,Yj(c[d>>2]|0)|0);c[d>>2]=(c[d>>2]|0)+1}i=e;return}function Yj(a){a=a|0;var b=0,d=0,e=0;d=i;i=i+16|0;e=d+4|0;b=d;c[e>>2]=a;c[b>>2]=zd(12,13460)|0;c[(c[b>>2]|0)+8>>2]=c[e>>2];i=d;return c[b>>2]|0}function Zj(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0;z=i;i=i+128|0;f=z+112|0;B=z+108|0;A=z+104|0;g=z+100|0;v=z+96|0;e=z+92|0;k=z+88|0;l=z+84|0;m=z+80|0;u=z+76|0;j=z+72|0;r=z+68|0;h=z+64|0;q=z+60|0;y=z+56|0;p=z+52|0;t=z+48|0;o=z+44|0;w=z+40|0;n=z+36|0;s=z+32|0;x=z;c[B>>2]=a;c[A>>2]=b;c[g>>2]=d;c[e>>2]=c[B>>2];c[k>>2]=0;c[l>>2]=0;c[m>>2]=0;c[u>>2]=c[A>>2];c[j>>2]=0;c[r>>2]=0;do if(_j(c[e>>2]|0,c[A>>2]|0,c[g>>2]|0)|0){c[q>>2]=ie(c[(c[u>>2]|0)+4>>2]|0)|0;ke(c[(c[u>>2]|0)+8>>2]|0,y,p,t)|0;c[r>>2]=Bb(c[q>>2]|0,c[y>>2]|0,c[13472+(c[(c[e>>2]|0)+8>>2]<<2)>>2]|0)|0;c[h>>2]=Cb((c[q>>2]|0)+2|0,c[y>>2]|0)|0;c[w>>2]=(((c[(c[u>>2]|0)+20>>2]|0)-(c[(c[u>>2]|0)+24>>2]|0)|0)/4|0|0)>0?1:0;c[o>>2]=1-(c[w>>2]|0);c[j>>2]=wb(_(c[r>>2]<<2,c[h>>2]|0)|0)|0;c[n>>2]=_(c[p>>2]|0,_(c[r>>2]|0,(c[y>>2]|0)/(c[r>>2]|0)|0)|0)|0;c[s>>2]=_(c[t>>2]|0,_(c[r>>2]|0,(c[y>>2]|0)/(c[r>>2]|0)|0)|0)|0;b=c[g>>2]|0;a=c[q>>2]|0;e=(c[(c[u>>2]|0)+4>>2]|0)+4|0;if(!(c[(c[u>>2]|0)+28>>2]|0)){e=Ed(a,c[e+4>>2]|0,2)|0;d=Ed(c[r>>2]|0,c[p>>2]|0,c[h>>2]|0)|0;d=vn(e,d,c[(c[u>>2]|0)+12>>2]|0,c[(c[u>>2]|0)+16>>2]|0,(c[j>>2]|0)+(c[w>>2]<<2)|0,(c[j>>2]|0)+(c[o>>2]<<2)|0,c[(c[u>>2]|0)+28>>2]|0)|0;c[k>>2]=vc(b,d,0,0,(c[(c[u>>2]|0)+12>>2]|0)==(c[(c[u>>2]|0)+20>>2]|0)?4096:0)|0;if(!(c[k>>2]|0))break;a=c[g>>2]|0;e=Dd()|0;d=Fd(c[r>>2]|0,c[h>>2]|0,c[t>>2]|0,((c[q>>2]|0)/2|0)+1|0,2,c[(c[(c[u>>2]|0)+4>>2]|0)+4+8>>2]|0)|0;c[l>>2]=uc(a,qh(e,d,(c[j>>2]|0)+(c[w>>2]<<2)|0,(c[j>>2]|0)+(c[o>>2]<<2)|0,c[(c[u>>2]|0)+20>>2]|0,c[(c[u>>2]|0)+24>>2]|0)|0)|0;if(!(c[l>>2]|0))break;xb(c[j>>2]|0);c[j>>2]=0;a=c[g>>2]|0;e=Pd(c[(c[u>>2]|0)+4>>2]|0)|0;d=Ed((c[y>>2]|0)%(c[r>>2]|0)|0,c[p>>2]|0,c[t>>2]|0)|0;c[m>>2]=uc(a,vn(e,d,(c[(c[u>>2]|0)+12>>2]|0)+(c[n>>2]<<2)|0,(c[(c[u>>2]|0)+16>>2]|0)+(c[n>>2]<<2)|0,(c[(c[u>>2]|0)+20>>2]|0)+(c[s>>2]<<2)|0,(c[(c[u>>2]|0)+24>>2]|0)+(c[s>>2]<<2)|0,c[(c[u>>2]|0)+28>>2]|0)|0)|0;if(!(c[m>>2]|0))break;c[v>>2]=rn(112,13480,20)|0}else{e=Ed(a,2,c[e+8>>2]|0)|0;d=Ed(c[r>>2]|0,c[h>>2]|0,c[t>>2]|0)|0;c[k>>2]=vc(b,vn(e,d,c[(c[u>>2]|0)+12>>2]|0,c[(c[u>>2]|0)+16>>2]|0,(c[j>>2]|0)+(c[w>>2]<<2)|0,(c[j>>2]|0)+(c[o>>2]<<2)|0,c[(c[u>>2]|0)+28>>2]|0)|0,0,0,4096)|0;if(!(c[k>>2]|0))break;a=c[g>>2]|0;e=Dd()|0;d=Fd(c[r>>2]|0,c[p>>2]|0,c[h>>2]|0,((c[q>>2]|0)/2|0)+1|0,c[(c[(c[u>>2]|0)+4>>2]|0)+4+4>>2]|0,2)|0;c[l>>2]=uc(a,qh(e,d,c[(c[u>>2]|0)+20>>2]|0,c[(c[u>>2]|0)+24>>2]|0,(c[j>>2]|0)+(c[w>>2]<<2)|0,(c[j>>2]|0)+(c[o>>2]<<2)|0)|0)|0;if(!(c[l>>2]|0))break;xb(c[j>>2]|0);c[j>>2]=0;a=c[g>>2]|0;e=Pd(c[(c[u>>2]|0)+4>>2]|0)|0;d=Ed((c[y>>2]|0)%(c[r>>2]|0)|0,c[p>>2]|0,c[t>>2]|0)|0;c[m>>2]=uc(a,vn(e,d,(c[(c[u>>2]|0)+12>>2]|0)+(c[s>>2]<<2)|0,(c[(c[u>>2]|0)+16>>2]|0)+(c[s>>2]<<2)|0,(c[(c[u>>2]|0)+20>>2]|0)+(c[n>>2]<<2)|0,(c[(c[u>>2]|0)+24>>2]|0)+(c[n>>2]<<2)|0,c[(c[u>>2]|0)+28>>2]|0)|0)|0;if(!(c[m>>2]|0))break;c[v>>2]=rn(112,13480,21)|0}c[(c[v>>2]|0)+64>>2]=c[k>>2];c[(c[v>>2]|0)+68>>2]=c[l>>2];c[(c[v>>2]|0)+72>>2]=c[m>>2];c[(c[v>>2]|0)+76>>2]=c[q>>2];c[(c[v>>2]|0)+80>>2]=c[y>>2];u=_(c[p>>2]|0,c[r>>2]|0)|0;c[(c[v>>2]|0)+92>>2]=u;u=_(c[t>>2]|0,c[r>>2]|0)|0;c[(c[v>>2]|0)+96>>2]=u;c[(c[v>>2]|0)+104>>2]=c[w>>2];c[(c[v>>2]|0)+100>>2]=c[o>>2];c[(c[v>>2]|0)+84>>2]=c[r>>2];c[(c[v>>2]|0)+88>>2]=c[h>>2];jc((c[k>>2]|0)+8|0,(c[l>>2]|0)+8|0,x);ic((c[y>>2]|0)/(c[r>>2]|0)|0,x,(c[m>>2]|0)+8|0,(c[v>>2]|0)+8|0);c[f>>2]=c[v>>2];x=c[f>>2]|0;i=z;return x|0}while(0);yb(c[j>>2]|0);pc(c[m>>2]|0);pc(c[l>>2]|0);pc(c[k>>2]|0);c[f>>2]=0;x=c[f>>2]|0;i=z;return x|0}function _j(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;k=i;i=i+32|0;h=k+16|0;e=k+12|0;f=k+8|0;g=k+4|0;j=k;c[e>>2]=a;c[f>>2]=b;c[g>>2]=d;if(c[(c[g>>2]|0)+164>>2]&1024){c[h>>2]=0;a=c[h>>2]|0;i=k;return a|0}if(!(ek(c[e>>2]|0,c[f>>2]|0,c[g>>2]|0)|0)){c[h>>2]=0;a=c[h>>2]|0;i=k;return a|0}c[j>>2]=c[f>>2];e=(c[(c[g>>2]|0)+164>>2]&65536|0)!=0;do if((c[(c[j>>2]|0)+28>>2]|0)==4){if((e?(c[(c[j>>2]|0)+12>>2]|0)==(c[(c[j>>2]|0)+20>>2]|0):0)?(Db(c[(c[(c[j>>2]|0)+4>>2]|0)+4>>2]|0)|0)!=0:0){c[h>>2]=0;a=c[h>>2]|0;i=k;return a|0}}else if(e){if((c[(c[j>>2]|0)+12>>2]|0)==(c[(c[j>>2]|0)+20>>2]|0)?(Db(c[(c[(c[j>>2]|0)+4>>2]|0)+4>>2]|0)|0)==0:0)break;c[h>>2]=0;a=c[h>>2]|0;i=k;return a|0}while(0);c[h>>2]=1;a=c[h>>2]|0;i=k;return a|0}function $j(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0;x=i;i=i+80|0;y=x+64|0;g=x+60|0;h=x+56|0;j=x+52|0;k=x+48|0;r=x+44|0;o=x+40|0;p=x+36|0;s=x+32|0;w=x+28|0;u=x+24|0;t=x+20|0;v=x+16|0;n=x+12|0;m=x+8|0;l=x+4|0;q=x;c[y>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[k>>2]=f;c[r>>2]=c[y>>2];c[o>>2]=c[(c[r>>2]|0)+64>>2];c[p>>2]=c[(c[r>>2]|0)+68>>2];c[w>>2]=c[(c[r>>2]|0)+80>>2];c[u>>2]=c[(c[r>>2]|0)+84>>2];c[t>>2]=c[(c[r>>2]|0)+92>>2];c[v>>2]=c[(c[r>>2]|0)+96>>2];c[n>>2]=wb(_(c[u>>2]<<2,c[(c[r>>2]|0)+88>>2]|0)|0)|0;c[m>>2]=(c[n>>2]|0)+(c[(c[r>>2]|0)+104>>2]<<2);c[l>>2]=(c[n>>2]|0)+(c[(c[r>>2]|0)+100>>2]<<2);c[s>>2]=c[u>>2];while(1){if((c[s>>2]|0)>(c[w>>2]|0))break;Ya[c[(c[o>>2]|0)+56>>2]&63](c[o>>2]|0,c[g>>2]|0,c[h>>2]|0,c[m>>2]|0,c[l>>2]|0);c[g>>2]=(c[g>>2]|0)+(c[t>>2]<<2);c[h>>2]=(c[h>>2]|0)+(c[t>>2]<<2);Ya[c[(c[p>>2]|0)+56>>2]&63](c[p>>2]|0,c[m>>2]|0,c[l>>2]|0,c[j>>2]|0,c[k>>2]|0);c[j>>2]=(c[j>>2]|0)+(c[v>>2]<<2);c[k>>2]=(c[k>>2]|0)+(c[v>>2]<<2);c[s>>2]=(c[s>>2]|0)+(c[u>>2]|0)}xb(c[n>>2]|0);c[q>>2]=c[(c[r>>2]|0)+72>>2];Ya[c[(c[q>>2]|0)+56>>2]&63](c[q>>2]|0,c[g>>2]|0,c[h>>2]|0,c[j>>2]|0,c[k>>2]|0);i=x;return}function ak(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0;x=i;i=i+80|0;y=x+64|0;g=x+60|0;h=x+56|0;j=x+52|0;k=x+48|0;r=x+44|0;o=x+40|0;p=x+36|0;s=x+32|0;w=x+28|0;u=x+24|0;t=x+20|0;v=x+16|0;n=x+12|0;m=x+8|0;l=x+4|0;q=x;c[y>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[k>>2]=f;c[r>>2]=c[y>>2];c[o>>2]=c[(c[r>>2]|0)+64>>2];c[p>>2]=c[(c[r>>2]|0)+68>>2];c[w>>2]=c[(c[r>>2]|0)+80>>2];c[u>>2]=c[(c[r>>2]|0)+84>>2];c[t>>2]=c[(c[r>>2]|0)+92>>2];c[v>>2]=c[(c[r>>2]|0)+96>>2];c[n>>2]=wb(_(c[u>>2]<<2,c[(c[r>>2]|0)+88>>2]|0)|0)|0;c[m>>2]=(c[n>>2]|0)+(c[(c[r>>2]|0)+104>>2]<<2);c[l>>2]=(c[n>>2]|0)+(c[(c[r>>2]|0)+100>>2]<<2);c[s>>2]=c[u>>2];while(1){if((c[s>>2]|0)>(c[w>>2]|0))break;Ya[c[(c[p>>2]|0)+56>>2]&63](c[p>>2]|0,c[j>>2]|0,c[k>>2]|0,c[m>>2]|0,c[l>>2]|0);c[j>>2]=(c[j>>2]|0)+(c[t>>2]<<2);c[k>>2]=(c[k>>2]|0)+(c[t>>2]<<2);Ya[c[(c[o>>2]|0)+56>>2]&63](c[o>>2]|0,c[g>>2]|0,c[h>>2]|0,c[m>>2]|0,c[l>>2]|0);c[g>>2]=(c[g>>2]|0)+(c[v>>2]<<2);c[h>>2]=(c[h>>2]|0)+(c[v>>2]<<2);c[s>>2]=(c[s>>2]|0)+(c[u>>2]|0)}xb(c[n>>2]|0);c[q>>2]=c[(c[r>>2]|0)+72>>2];Ya[c[(c[q>>2]|0)+56>>2]&63](c[q>>2]|0,c[g>>2]|0,c[h>>2]|0,c[j>>2]|0,c[k>>2]|0);i=x;return}function bk(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+68>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+72>>2]|0,c[e>>2]|0);i=d;return}function ck(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0;d=i;i=i+48|0;e=d;k=d+36|0;l=d+32|0;m=d+28|0;c[k>>2]=a;c[l>>2]=b;c[m>>2]=c[k>>2];b=c[c[l>>2]>>2]|0;a=c[l>>2]|0;l=c[(c[m>>2]|0)+84>>2]|0;k=c[(c[m>>2]|0)+80>>2]|0;j=(c[(c[m>>2]|0)+88>>2]|0)%(c[(c[m>>2]|0)+76>>2]|0)|0;h=c[(c[m>>2]|0)+64>>2]|0;g=c[(c[m>>2]|0)+68>>2]|0;f=c[(c[m>>2]|0)+72>>2]|0;c[e>>2]=c[(c[m>>2]|0)+76>>2];c[e+4>>2]=l;c[e+8>>2]=k;c[e+12>>2]=j;c[e+16>>2]=h;c[e+20>>2]=g;c[e+24>>2]=f;eb[b&63](a,21870,e);i=d;return}function dk(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+72>>2]|0);pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function ek(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;n=i;i=i+48|0;h=n+32|0;j=n+28|0;o=n+24|0;k=n+20|0;m=n+16|0;l=n+12|0;g=n+8|0;e=n+4|0;f=n;c[j>>2]=a;c[o>>2]=b;c[k>>2]=d;c[m>>2]=c[o>>2];c[l>>2]=(c[(c[m>>2]|0)+4>>2]|0)+4;do if(((c[c[(c[m>>2]|0)+8>>2]>>2]|0)<=1?(c[c[(c[m>>2]|0)+4>>2]>>2]|0)==1:0)?((c[c[l>>2]>>2]|0)%2|0|0)==0:0){if((c[(c[m>>2]|0)+28>>2]|0)!=0?(c[(c[m>>2]|0)+28>>2]|0)!=4:0)break;ke(c[(c[m>>2]|0)+8>>2]|0,g,e,f)|0;if((Db(c[c[l>>2]>>2]|0)|0)!=0?(c[(c[k>>2]|0)+164>>2]&16384|0)!=0:0){c[h>>2]=0;l=c[h>>2]|0;i=n;return l|0}if(Eb(c[c[l>>2]>>2]|0,c[g>>2]|0,c[(c[j>>2]|0)+8>>2]|0,13472,2)|0){c[h>>2]=0;l=c[h>>2]|0;i=n;return l|0}a=c[m>>2]|0;if((c[(c[m>>2]|0)+12>>2]|0)!=(c[(c[m>>2]|0)+20>>2]|0))if((c[a+28>>2]|0)==4){c[h>>2]=c[(c[k>>2]|0)+164>>2]&4096;l=c[h>>2]|0;i=n;return l|0}else{c[h>>2]=(c[(c[l>>2]|0)+8>>2]|0)>2&1;l=c[h>>2]|0;i=n;return l|0}if(dp(a,2147483647)|0){c[h>>2]=1;l=c[h>>2]|0;i=n;return l|0}if((c[c[(c[m>>2]|0)+8>>2]>>2]|0)!=0?(l=Bb(c[c[l>>2]>>2]|0,c[(c[(c[m>>2]|0)+8>>2]|0)+4>>2]|0,c[13472+(c[(c[j>>2]|0)+8>>2]<<2)>>2]|0)|0,(l|0)!=(c[(c[(c[m>>2]|0)+8>>2]|0)+4>>2]|0)):0)break;c[h>>2]=1;l=c[h>>2]|0;i=n;return l|0}while(0);c[h>>2]=0;l=c[h>>2]|0;i=n;return l|0}function fk(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;b=e+4|0;d=e;c[b>>2]=a;c[d>>2]=0;while(1){if((c[d>>2]|0)>>>0>=2)break;a=c[b>>2]|0;Bd(a,gk(c[d>>2]|0)|0);c[d>>2]=(c[d>>2]|0)+1}i=e;return}function gk(a){a=a|0;var b=0,d=0,e=0;d=i;i=i+16|0;e=d+4|0;b=d;c[e>>2]=a;c[b>>2]=zd(12,13496)|0;c[(c[b>>2]|0)+8>>2]=c[e>>2];i=d;return c[b>>2]|0}function hk(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0;y=i;i=i+112|0;f=y+108|0;A=y+104|0;z=y+100|0;g=y+96|0;v=y+92|0;e=y+88|0;k=y+84|0;l=y+80|0;m=y+76|0;u=y+72|0;j=y+68|0;r=y+64|0;h=y+60|0;q=y+56|0;x=y+52|0;p=y+48|0;t=y+44|0;n=y+40|0;o=y+36|0;s=y+32|0;w=y;c[A>>2]=a;c[z>>2]=b;c[g>>2]=d;c[e>>2]=c[A>>2];c[k>>2]=0;c[l>>2]=0;c[m>>2]=0;c[u>>2]=c[z>>2];c[j>>2]=0;c[r>>2]=0;do if(ik(c[e>>2]|0,c[z>>2]|0,c[g>>2]|0)|0){c[q>>2]=ie(c[(c[u>>2]|0)+4>>2]|0)|0;ke(c[(c[u>>2]|0)+8>>2]|0,x,p,t)|0;c[n>>2]=(c[(c[u>>2]|0)+20>>2]|0)==4&1;c[r>>2]=Bb(c[q>>2]|0,c[x>>2]|0,c[13508+(c[(c[e>>2]|0)+8>>2]<<2)>>2]|0)|0;c[h>>2]=Cb(c[q>>2]|0,c[x>>2]|0)|0;c[j>>2]=wb(_(c[r>>2]<<2,c[h>>2]|0)|0)|0;b=c[g>>2]|0;a=c[q>>2]|0;e=(c[(c[u>>2]|0)+4>>2]|0)+4|0;if(c[n>>2]|0){e=Ed(a,1,c[e+8>>2]|0)|0;d=Ed(c[r>>2]|0,c[h>>2]|0,c[t>>2]|0)|0;c[k>>2]=vc(b,Gn(e,d,c[j>>2]|0,c[(c[u>>2]|0)+16>>2]|0,(c[u>>2]|0)+20|0)|0,0,0,4096)|0;if(!(c[k>>2]|0))break;e=c[g>>2]|0;d=Fd(c[r>>2]|0,c[p>>2]|0,c[h>>2]|0,c[q>>2]|0,c[(c[(c[u>>2]|0)+4>>2]|0)+4+4>>2]|0,1)|0;c[l>>2]=uc(e,Jn(d,c[(c[u>>2]|0)+12>>2]|0,c[j>>2]|0)|0)|0;if(!(c[l>>2]|0))break}else{e=Ed(a,c[e+4>>2]|0,1)|0;d=Ed(c[r>>2]|0,c[p>>2]|0,c[h>>2]|0)|0;d=Gn(e,d,c[(c[u>>2]|0)+12>>2]|0,c[j>>2]|0,(c[u>>2]|0)+20|0)|0;c[k>>2]=vc(b,d,0,0,(c[(c[u>>2]|0)+12>>2]|0)==(c[(c[u>>2]|0)+16>>2]|0)?4096:0)|0;if(!(c[k>>2]|0))break;e=c[g>>2]|0;d=Fd(c[r>>2]|0,c[h>>2]|0,c[t>>2]|0,c[q>>2]|0,1,c[(c[(c[u>>2]|0)+4>>2]|0)+4+8>>2]|0)|0;c[l>>2]=uc(e,Jn(d,c[j>>2]|0,c[(c[u>>2]|0)+16>>2]|0)|0)|0;if(!(c[l>>2]|0))break}xb(c[j>>2]|0);c[j>>2]=0;c[o>>2]=_(c[p>>2]|0,_(c[r>>2]|0,(c[x>>2]|0)/(c[r>>2]|0)|0)|0)|0;c[s>>2]=_(c[t>>2]|0,_(c[r>>2]|0,(c[x>>2]|0)/(c[r>>2]|0)|0)|0)|0;a=c[g>>2]|0;e=Pd(c[(c[u>>2]|0)+4>>2]|0)|0;d=Ed((c[x>>2]|0)%(c[r>>2]|0)|0,c[p>>2]|0,c[t>>2]|0)|0;c[m>>2]=uc(a,Gn(e,d,(c[(c[u>>2]|0)+12>>2]|0)+(c[o>>2]<<2)|0,(c[(c[u>>2]|0)+16>>2]|0)+(c[s>>2]<<2)|0,(c[u>>2]|0)+20|0)|0)|0;if(c[m>>2]|0){c[v>>2]=sn(104,13516,(c[n>>2]|0)!=0?28:27)|0;c[(c[v>>2]|0)+64>>2]=c[k>>2];c[(c[v>>2]|0)+68>>2]=c[l>>2];c[(c[v>>2]|0)+72>>2]=c[m>>2];c[(c[v>>2]|0)+76>>2]=c[q>>2];c[(c[v>>2]|0)+80>>2]=c[x>>2];u=_(c[p>>2]|0,c[r>>2]|0)|0;c[(c[v>>2]|0)+92>>2]=u;u=_(c[t>>2]|0,c[r>>2]|0)|0;c[(c[v>>2]|0)+96>>2]=u;c[(c[v>>2]|0)+84>>2]=c[r>>2];c[(c[v>>2]|0)+88>>2]=c[h>>2];jc((c[k>>2]|0)+8|0,(c[l>>2]|0)+8|0,w);ic((c[x>>2]|0)/(c[r>>2]|0)|0,w,(c[m>>2]|0)+8|0,(c[v>>2]|0)+8|0);c[f>>2]=c[v>>2];x=c[f>>2]|0;i=y;return x|0}}while(0);yb(c[j>>2]|0);pc(c[m>>2]|0);pc(c[l>>2]|0);pc(c[k>>2]|0);c[f>>2]=0;x=c[f>>2]|0;i=y;return x|0}function ik(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;k=i;i=i+32|0;h=k+16|0;e=k+12|0;f=k+8|0;g=k+4|0;j=k;c[e>>2]=a;c[f>>2]=b;c[g>>2]=d;if(c[(c[g>>2]|0)+164>>2]&1024){c[h>>2]=0;a=c[h>>2]|0;i=k;return a|0}if(!(ok(c[e>>2]|0,c[f>>2]|0,c[g>>2]|0)|0)){c[h>>2]=0;a=c[h>>2]|0;i=k;return a|0}c[j>>2]=c[f>>2];e=(c[(c[g>>2]|0)+164>>2]&65536|0)!=0;if((c[(c[j>>2]|0)+20>>2]|0)==4){if((e?(c[(c[j>>2]|0)+12>>2]|0)==(c[(c[j>>2]|0)+16>>2]|0):0)?(Db(c[(c[(c[j>>2]|0)+4>>2]|0)+4>>2]|0)|0)!=0:0){c[h>>2]=0;a=c[h>>2]|0;i=k;return a|0}}else if(e){if((c[(c[j>>2]|0)+12>>2]|0)!=(c[(c[j>>2]|0)+16>>2]|0)){c[h>>2]=0;a=c[h>>2]|0;i=k;return a|0}if(Db(c[(c[(c[j>>2]|0)+4>>2]|0)+4>>2]|0)|0){c[h>>2]=0;a=c[h>>2]|0;i=k;return a|0}}c[h>>2]=1;a=c[h>>2]|0;i=k;return a|0}function jk(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0;r=i;i=i+64|0;s=r+48|0;e=r+44|0;f=r+40|0;l=r+36|0;h=r+32|0;j=r+28|0;k=r+24|0;m=r+20|0;q=r+16|0;o=r+12|0;n=r+8|0;p=r+4|0;g=r;c[s>>2]=a;c[e>>2]=b;c[f>>2]=d;c[l>>2]=c[s>>2];c[h>>2]=c[(c[l>>2]|0)+64>>2];c[j>>2]=c[(c[l>>2]|0)+68>>2];c[q>>2]=c[(c[l>>2]|0)+80>>2];c[o>>2]=c[(c[l>>2]|0)+84>>2];c[n>>2]=c[(c[l>>2]|0)+92>>2];c[p>>2]=c[(c[l>>2]|0)+96>>2];c[g>>2]=wb(_(c[o>>2]<<2,c[(c[l>>2]|0)+88>>2]|0)|0)|0;c[m>>2]=c[o>>2];while(1){if((c[m>>2]|0)>(c[q>>2]|0))break;eb[c[(c[j>>2]|0)+56>>2]&63](c[j>>2]|0,c[e>>2]|0,c[g>>2]|0);c[e>>2]=(c[e>>2]|0)+(c[n>>2]<<2);eb[c[(c[h>>2]|0)+56>>2]&63](c[h>>2]|0,c[g>>2]|0,c[f>>2]|0);c[f>>2]=(c[f>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+(c[o>>2]|0)}xb(c[g>>2]|0);c[k>>2]=c[(c[l>>2]|0)+72>>2];eb[c[(c[k>>2]|0)+56>>2]&63](c[k>>2]|0,c[e>>2]|0,c[f>>2]|0);i=r;return}function kk(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0;r=i;i=i+64|0;s=r+48|0;e=r+44|0;f=r+40|0;l=r+36|0;h=r+32|0;j=r+28|0;k=r+24|0;m=r+20|0;q=r+16|0;o=r+12|0;n=r+8|0;p=r+4|0;g=r;c[s>>2]=a;c[e>>2]=b;c[f>>2]=d;c[l>>2]=c[s>>2];c[h>>2]=c[(c[l>>2]|0)+64>>2];c[j>>2]=c[(c[l>>2]|0)+68>>2];c[q>>2]=c[(c[l>>2]|0)+80>>2];c[o>>2]=c[(c[l>>2]|0)+84>>2];c[n>>2]=c[(c[l>>2]|0)+92>>2];c[p>>2]=c[(c[l>>2]|0)+96>>2];c[g>>2]=wb(_(c[o>>2]<<2,c[(c[l>>2]|0)+88>>2]|0)|0)|0;c[m>>2]=c[o>>2];while(1){if((c[m>>2]|0)>(c[q>>2]|0))break;eb[c[(c[h>>2]|0)+56>>2]&63](c[h>>2]|0,c[e>>2]|0,c[g>>2]|0);c[e>>2]=(c[e>>2]|0)+(c[n>>2]<<2);eb[c[(c[j>>2]|0)+56>>2]&63](c[j>>2]|0,c[g>>2]|0,c[f>>2]|0);c[f>>2]=(c[f>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+(c[o>>2]|0)}xb(c[g>>2]|0);c[k>>2]=c[(c[l>>2]|0)+72>>2];eb[c[(c[k>>2]|0)+56>>2]&63](c[k>>2]|0,c[e>>2]|0,c[f>>2]|0);i=r;return}function lk(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+68>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+72>>2]|0,c[e>>2]|0);i=d;return}function mk(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0;d=i;i=i+48|0;e=d;k=d+36|0;l=d+32|0;m=d+28|0;c[k>>2]=a;c[l>>2]=b;c[m>>2]=c[k>>2];b=c[c[l>>2]>>2]|0;a=c[l>>2]|0;l=c[(c[m>>2]|0)+84>>2]|0;k=c[(c[m>>2]|0)+80>>2]|0;j=(c[(c[m>>2]|0)+88>>2]|0)%(c[(c[m>>2]|0)+76>>2]|0)|0;h=c[(c[m>>2]|0)+64>>2]|0;g=c[(c[m>>2]|0)+68>>2]|0;f=c[(c[m>>2]|0)+72>>2]|0;c[e>>2]=c[(c[m>>2]|0)+76>>2];c[e+4>>2]=l;c[e+8>>2]=k;c[e+12>>2]=j;c[e+16>>2]=h;c[e+20>>2]=g;c[e+24>>2]=f;eb[b&63](a,21916,e);i=d;return}function nk(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+72>>2]|0);pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function ok(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;n=i;i=i+48|0;h=n+32|0;j=n+28|0;o=n+24|0;k=n+20|0;m=n+16|0;l=n+12|0;g=n+8|0;e=n+4|0;f=n;c[j>>2]=a;c[o>>2]=b;c[k>>2]=d;c[m>>2]=c[o>>2];c[l>>2]=(c[(c[m>>2]|0)+4>>2]|0)+4;do if((c[c[(c[m>>2]|0)+8>>2]>>2]|0)<=1?(c[c[(c[m>>2]|0)+4>>2]>>2]|0)==1:0){ke(c[(c[m>>2]|0)+8>>2]|0,g,e,f)|0;if((Db(c[c[l>>2]>>2]|0)|0)!=0?(c[(c[k>>2]|0)+164>>2]&16384|0)!=0:0){c[h>>2]=0;l=c[h>>2]|0;i=n;return l|0}if(Eb(c[c[l>>2]>>2]|0,c[g>>2]|0,c[(c[j>>2]|0)+8>>2]|0,13508,2)|0){c[h>>2]=0;l=c[h>>2]|0;i=n;return l|0}a=c[m>>2]|0;if((c[(c[m>>2]|0)+12>>2]|0)!=(c[(c[m>>2]|0)+16>>2]|0))if((c[a+20>>2]|0)==4){c[h>>2]=c[(c[k>>2]|0)+164>>2]&4096;l=c[h>>2]|0;i=n;return l|0}else{c[h>>2]=(c[(c[l>>2]|0)+8>>2]|0)>1&1;l=c[h>>2]|0;i=n;return l|0}if(Md(c[a+4>>2]|0,c[(c[m>>2]|0)+8>>2]|0)|0){c[h>>2]=1;l=c[h>>2]|0;i=n;return l|0}if((c[c[(c[m>>2]|0)+8>>2]>>2]|0)!=0?(l=Bb(c[c[l>>2]>>2]|0,c[(c[(c[m>>2]|0)+8>>2]|0)+4>>2]|0,c[13508+(c[(c[j>>2]|0)+8>>2]<<2)>>2]|0)|0,(l|0)!=(c[(c[(c[m>>2]|0)+8>>2]|0)+4>>2]|0)):0)break;c[h>>2]=1;l=c[h>>2]|0;i=n;return l|0}while(0);c[h>>2]=0;l=c[h>>2]|0;i=n;return l|0}function pk(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;Cd(13532,c[d>>2]|0);Cd(14708,c[d>>2]|0);Cd(15412,c[d>>2]|0);Cd(16116,c[d>>2]|0);i=b;return}function qk(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;f=i;i=i+16|0;k=f+12|0;j=f+8|0;h=f+4|0;g=f;c[k>>2]=a;c[j>>2]=b;c[h>>2]=d;c[g>>2]=e;rk(c[k>>2]|0,c[j>>2]|0,c[h>>2]|0,c[g>>2]|0,0);rk(c[k>>2]|0,c[j>>2]|0,c[h>>2]|0,c[g>>2]|0,1);i=f;return}function rk(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0;g=i;i=i+32|0;j=g+20|0;m=g+16|0;l=g+12|0;n=g+8|0;k=g+4|0;h=g;c[j>>2]=a;c[m>>2]=b;c[l>>2]=d;c[n>>2]=e;c[k>>2]=f;c[h>>2]=Fk(32,c[c[l>>2]>>2]|0,c[n>>2]|0,2)|0;c[(c[h>>2]|0)+28>>2]=c[m>>2];c[(c[h>>2]|0)+20>>2]=c[l>>2];c[(c[h>>2]|0)+24>>2]=c[k>>2];Bd(c[j>>2]|0,c[h>>2]|0);i=g;return}function sk(a,b,d,e,f,g,j,k,l,m,n){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;var o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0.0;G=i;i=i+80|0;p=G+72|0;H=G+68|0;r=G+64|0;s=G+60|0;t=G+56|0;u=G+52|0;v=G+48|0;w=G+44|0;x=G+40|0;y=G+36|0;q=G+32|0;o=G+28|0;C=G+24|0;F=G+20|0;B=G+16|0;z=G+12|0;A=G+8|0;E=G+4|0;D=G;c[H>>2]=a;c[r>>2]=b;c[s>>2]=d;c[t>>2]=e;c[u>>2]=f;c[v>>2]=g;c[w>>2]=j;c[x>>2]=k;c[y>>2]=l;c[q>>2]=m;c[o>>2]=n;c[C>>2]=c[H>>2];c[B>>2]=c[(c[C>>2]|0)+20>>2];c[z>>2]=0;c[A>>2]=0;c[E>>2]=_((c[u>>2]|0)/2|0,c[v>>2]|0)|0;if(!(tk(c[C>>2]|0,c[r>>2]|0,c[s>>2]|0,c[t>>2]|0,c[u>>2]|0,c[v>>2]|0,c[w>>2]|0,c[x>>2]|0,c[y>>2]|0,c[q>>2]|0,c[o>>2]|0,D)|0)){c[p>>2]=0;y=c[p>>2]|0;i=G;return y|0}a=c[o>>2]|0;j=Ed(c[s>>2]|0,c[t>>2]|0,c[t>>2]|0)|0;k=Dd()|0;c[z>>2]=uc(a,vn(j,k,c[y>>2]|0,c[q>>2]|0,c[y>>2]|0,c[q>>2]|0,c[r>>2]|0)|0)|0;if(c[z>>2]|0){m=c[o>>2]|0;if((c[u>>2]|0)%2|0)o=Dd()|0;else o=Ed(c[s>>2]|0,c[t>>2]|0,c[t>>2]|0)|0;k=Dd()|0;c[A>>2]=uc(m,vn(o,k,(c[y>>2]|0)+(c[E>>2]<<2)|0,(c[q>>2]|0)+(c[E>>2]<<2)|0,(c[y>>2]|0)+(c[E>>2]<<2)|0,(c[q>>2]|0)+(c[E>>2]<<2)|0,(c[r>>2]|0)==0?1:6)|0)|0;if(c[A>>2]|0){if(c[(c[C>>2]|0)+24>>2]|0)c[F>>2]=Gk(120,13692,29)|0;else c[F>>2]=Gk(120,13692,(c[D>>2]|0)!=0?31:30)|0;c[(c[F>>2]|0)+64>>2]=c[(c[C>>2]|0)+28>>2];c[(c[F>>2]|0)+108>>2]=0;c[(c[F>>2]|0)+76>>2]=c[s>>2];c[(c[F>>2]|0)+100>>2]=c[t>>2];c[(c[F>>2]|0)+80>>2]=c[u>>2];c[(c[F>>2]|0)+92>>2]=c[v>>2];c[(c[F>>2]|0)+84>>2]=c[w>>2];c[(c[F>>2]|0)+96>>2]=c[x>>2];c[(c[F>>2]|0)+112>>2]=c[C>>2];y=(xk(c[s>>2]|0)|0)<<2;c[(c[F>>2]|0)+104>>2]=y;c[(c[F>>2]|0)+68>>2]=c[z>>2];c[(c[F>>2]|0)+72>>2]=c[A>>2];c[(c[F>>2]|0)+88>>2]=c[D>>2];fc((c[F>>2]|0)+8|0);y=_(c[w>>2]|0,(((c[u>>2]|0)-1|0)/2|0|0)/(c[(c[(c[B>>2]|0)+12>>2]|0)+8>>2]|0)|0)|0;lc(y,(c[B>>2]|0)+16|0,(c[F>>2]|0)+8|0);lc(c[w>>2]|0,(c[z>>2]|0)+8|0,(c[F>>2]|0)+8|0);lc(c[w>>2]|0,(c[A>>2]|0)+8|0,(c[F>>2]|0)+8|0);if(c[(c[C>>2]|0)+24>>2]|0){y=_(c[s>>2]<<2,c[u>>2]|0)|0;I=+(_(y,c[w>>2]|0)|0);y=(c[F>>2]|0)+8+24|0;h[y>>3]=+h[y>>3]+I}c[p>>2]=c[F>>2];y=c[p>>2]|0;i=G;return y|0}}pc(c[z>>2]|0);pc(c[A>>2]|0);c[p>>2]=0;y=c[p>>2]|0;i=G;return y|0}function tk(a,b,d,e,f,g,h,j,k,l,m,n){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;var o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0;u=i;i=i+64|0;o=u+48|0;p=u+44|0;B=u+40|0;r=u+36|0;A=u+32|0;s=u+28|0;z=u+24|0;t=u+20|0;y=u+16|0;x=u+12|0;w=u+8|0;q=u+4|0;v=u;c[p>>2]=a;c[B>>2]=b;c[r>>2]=d;c[A>>2]=e;c[s>>2]=f;c[z>>2]=g;c[t>>2]=h;c[y>>2]=j;c[x>>2]=k;c[w>>2]=l;c[q>>2]=m;c[v>>2]=n;f=c[p>>2]|0;b=c[B>>2]|0;n=c[r>>2]|0;a=c[A>>2]|0;g=c[s>>2]|0;m=c[z>>2]|0;d=c[t>>2]|0;e=c[y>>2]|0;h=c[x>>2]|0;j=c[w>>2]|0;k=c[q>>2]|0;l=c[v>>2]|0;if(c[(c[p>>2]|0)+24>>2]|0){if(!(Ck(f,b,n,a,g,m,d,e,h,j,k,l)|0)){c[o>>2]=0;x=c[o>>2]|0;i=u;return x|0}}else if(!(Dk(f,b,n,a,g,m,d,e,h,j,k,l)|0)){c[o>>2]=0;x=c[o>>2]|0;i=u;return x|0}if((c[(c[q>>2]|0)+164>>2]&65536|0)!=0?(x=_(c[s>>2]|0,c[r>>2]|0)|0,(Qb((c[(c[p>>2]|0)+24>>2]|0)!=0?512:16,c[t>>2]|0,x,c[r>>2]|0)|0)!=0):0){c[o>>2]=0;x=c[o>>2]|0;i=u;return x|0}c[o>>2]=1;x=c[o>>2]|0;i=u;return x|0}function uk(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0;A=i;i=i+80|0;e=A+72|0;h=A+68|0;j=A+64|0;t=A+60|0;r=A+56|0;s=A+52|0;u=A+48|0;v=A+44|0;y=A+40|0;z=A+36|0;o=A+32|0;p=A+28|0;w=A+24|0;x=A+20|0;q=A+16|0;n=A+12|0;l=A+8|0;m=A+4|0;k=A;c[e>>2]=a;c[h>>2]=b;c[j>>2]=d;c[t>>2]=c[e>>2];c[r>>2]=c[(c[t>>2]|0)+68>>2];c[s>>2]=c[(c[t>>2]|0)+72>>2];c[y>>2]=c[(c[t>>2]|0)+92>>2];c[z>>2]=c[(c[t>>2]|0)+84>>2];c[o>>2]=xk(c[(c[t>>2]|0)+76>>2]|0)|0;c[w>>2]=1;c[x>>2]=((c[(c[t>>2]|0)+80>>2]|0)+1|0)/2|0;c[q>>2]=(_(c[(c[t>>2]|0)+76>>2]|0,c[o>>2]|0)|0)<<1<<2;d=c[q>>2]|0;if((c[q>>2]|0)>>>0<65536){e=i;i=i+((1*d|0)+15&-16)|0;c[p>>2]=e}else c[p>>2]=wb(d)|0;c[u>>2]=0;while(1){if((c[u>>2]|0)>=(c[z>>2]|0))break;c[n>>2]=c[h>>2];c[l>>2]=c[j>>2];c[m>>2]=(c[h>>2]|0)+((_(c[(c[t>>2]|0)+80>>2]|0,c[y>>2]|0)|0)<<2);c[k>>2]=(c[j>>2]|0)+((_(c[(c[t>>2]|0)+80>>2]|0,c[y>>2]|0)|0)<<2);Ya[c[(c[r>>2]|0)+56>>2]&63](c[r>>2]|0,c[n>>2]|0,c[l>>2]|0,c[n>>2]|0,c[l>>2]|0);c[v>>2]=c[w>>2];while(1){a=c[t>>2]|0;b=c[n>>2]|0;d=c[l>>2]|0;e=c[m>>2]|0;f=c[k>>2]|0;g=c[v>>2]|0;if(((c[v>>2]|0)+(c[o>>2]|0)|0)>=(c[x>>2]|0))break;yk(a,b,d,e,f,g,(c[v>>2]|0)+(c[o>>2]|0)|0,0,c[p>>2]|0);c[v>>2]=(c[v>>2]|0)+(c[o>>2]|0)}yk(a,b,d,e,f,g,c[x>>2]|0,c[(c[t>>2]|0)+88>>2]|0,c[p>>2]|0);d=(c[n>>2]|0)+((_(c[x>>2]|0,c[y>>2]|0)|0)<<2)|0;b=(c[l>>2]|0)+((_(c[x>>2]|0,c[y>>2]|0)|0)<<2)|0;a=(c[n>>2]|0)+((_(c[x>>2]|0,c[y>>2]|0)|0)<<2)|0;e=(c[l>>2]|0)+((_(c[x>>2]|0,c[y>>2]|0)|0)<<2)|0;Ya[c[(c[s>>2]|0)+56>>2]&63](c[s>>2]|0,d,b,a,e);c[u>>2]=(c[u>>2]|0)+1;c[h>>2]=(c[h>>2]|0)+(c[(c[t>>2]|0)+96>>2]<<2);c[j>>2]=(c[j>>2]|0)+(c[(c[t>>2]|0)+96>>2]<<2)}if((c[q>>2]|0)>>>0<65536){i=A;return}xb(c[p>>2]|0);i=A;return}function vk(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0;q=i;i=i+48|0;r=q+44|0;e=q+40|0;f=q+36|0;j=q+32|0;g=q+28|0;h=q+24|0;k=q+20|0;l=q+16|0;o=q+12|0;n=q+8|0;p=q+4|0;m=q;c[r>>2]=a;c[e>>2]=b;c[f>>2]=d;c[j>>2]=c[r>>2];c[g>>2]=c[(c[j>>2]|0)+68>>2];c[h>>2]=c[(c[j>>2]|0)+72>>2];c[l>>2]=c[(c[j>>2]|0)+80>>2];c[o>>2]=c[(c[j>>2]|0)+84>>2];c[n>>2]=c[(c[j>>2]|0)+92>>2];c[p>>2]=c[(c[j>>2]|0)+96>>2];c[m>>2]=((c[l>>2]|0)-1|0)/2|0;c[k>>2]=0;while(1){if((c[k>>2]|0)>=(c[o>>2]|0))break;Ya[c[(c[g>>2]|0)+56>>2]&63](c[g>>2]|0,c[e>>2]|0,c[f>>2]|0,c[e>>2]|0,c[f>>2]|0);b=(c[e>>2]|0)+((_((c[l>>2]|0)-1|0,c[n>>2]|0)|0)<<2)|0;a=(c[f>>2]|0)+((_((c[l>>2]|0)-1|0,c[n>>2]|0)|0)<<2)|0;hb[c[(c[j>>2]|0)+64>>2]&127]((c[e>>2]|0)+(c[n>>2]<<2)|0,(c[f>>2]|0)+(c[n>>2]<<2)|0,b,a,c[c[(c[j>>2]|0)+108>>2]>>2]|0,c[(c[j>>2]|0)+100>>2]|0,1,c[m>>2]|0,c[n>>2]|0);a=(c[e>>2]|0)+((_(c[m>>2]|0,c[n>>2]|0)|0)<<2)|0;b=(c[f>>2]|0)+((_(c[m>>2]|0,c[n>>2]|0)|0)<<2)|0;d=(c[e>>2]|0)+((_((c[l>>2]|0)-(c[m>>2]|0)|0,c[n>>2]|0)|0)<<2)|0;r=(c[f>>2]|0)+((_((c[l>>2]|0)-(c[m>>2]|0)|0,c[n>>2]|0)|0)<<2)|0;hb[c[(c[j>>2]|0)+64>>2]&127](a,b,d,r,c[c[(c[j>>2]|0)+108>>2]>>2]|0,c[(c[j>>2]|0)+100>>2]|0,c[m>>2]|0,(c[m>>2]|0)+2|0,0);r=(c[e>>2]|0)+((_((c[l>>2]|0)/2|0,c[n>>2]|0)|0)<<2)|0;d=(c[f>>2]|0)+((_((c[l>>2]|0)/2|0,c[n>>2]|0)|0)<<2)|0;b=(c[e>>2]|0)+((_((c[l>>2]|0)/2|0,c[n>>2]|0)|0)<<2)|0;a=(c[f>>2]|0)+((_((c[l>>2]|0)/2|0,c[n>>2]|0)|0)<<2)|0;Ya[c[(c[h>>2]|0)+56>>2]&63](c[h>>2]|0,r,d,b,a);c[k>>2]=(c[k>>2]|0)+1;c[e>>2]=(c[e>>2]|0)+(c[p>>2]<<2);c[f>>2]=(c[f>>2]|0)+(c[p>>2]<<2)}i=q;return}function wk(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;p=i;i=i+48|0;q=p+40|0;e=p+36|0;f=p+32|0;j=p+28|0;g=p+24|0;h=p+20|0;k=p+16|0;l=p+12|0;n=p+8|0;m=p+4|0;o=p;c[q>>2]=a;c[e>>2]=b;c[f>>2]=d;c[j>>2]=c[q>>2];c[g>>2]=c[(c[j>>2]|0)+68>>2];c[h>>2]=c[(c[j>>2]|0)+72>>2];c[l>>2]=c[(c[j>>2]|0)+80>>2];c[n>>2]=c[(c[j>>2]|0)+84>>2];c[m>>2]=c[(c[j>>2]|0)+92>>2];c[o>>2]=c[(c[j>>2]|0)+96>>2];c[k>>2]=0;while(1){if((c[k>>2]|0)>=(c[n>>2]|0))break;Ya[c[(c[g>>2]|0)+56>>2]&63](c[g>>2]|0,c[e>>2]|0,c[f>>2]|0,c[e>>2]|0,c[f>>2]|0);d=(c[e>>2]|0)+((_((c[l>>2]|0)-1|0,c[m>>2]|0)|0)<<2)|0;q=(c[f>>2]|0)+((_((c[l>>2]|0)-1|0,c[m>>2]|0)|0)<<2)|0;hb[c[(c[j>>2]|0)+64>>2]&127]((c[e>>2]|0)+(c[m>>2]<<2)|0,(c[f>>2]|0)+(c[m>>2]<<2)|0,d,q,c[c[(c[j>>2]|0)+108>>2]>>2]|0,c[(c[j>>2]|0)+100>>2]|0,1,((c[l>>2]|0)+1|0)/2|0,c[m>>2]|0);q=(c[e>>2]|0)+((_((c[l>>2]|0)/2|0,c[m>>2]|0)|0)<<2)|0;d=(c[f>>2]|0)+((_((c[l>>2]|0)/2|0,c[m>>2]|0)|0)<<2)|0;b=(c[e>>2]|0)+((_((c[l>>2]|0)/2|0,c[m>>2]|0)|0)<<2)|0;a=(c[f>>2]|0)+((_((c[l>>2]|0)/2|0,c[m>>2]|0)|0)<<2)|0;Ya[c[(c[h>>2]|0)+56>>2]&63](c[h>>2]|0,q,d,b,a);c[k>>2]=(c[k>>2]|0)+1;c[e>>2]=(c[e>>2]|0)+(c[o>>2]<<2);c[f>>2]=(c[f>>2]|0)+(c[o>>2]<<2)}i=p;return}function xk(a){a=a|0;var b=0,d=0;d=i;i=i+16|0;b=d;c[b>>2]=a;c[b>>2]=(c[b>>2]|0)+3;c[b>>2]=c[b>>2]&-4;i=d;return (c[b>>2]|0)+2|0}function yk(a,b,d,e,f,g,h,j,k){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;var l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0;l=i;i=i+64|0;r=l+48|0;x=l+44|0;w=l+40|0;u=l+36|0;t=l+32|0;n=l+28|0;o=l+24|0;y=l+20|0;v=l+16|0;q=l+12|0;p=l+8|0;m=l+4|0;s=l;c[r>>2]=a;c[x>>2]=b;c[w>>2]=d;c[u>>2]=e;c[t>>2]=f;c[n>>2]=g;c[o>>2]=h;c[y>>2]=j;c[v>>2]=k;c[q>>2]=c[(c[r>>2]|0)+104>>2];c[p>>2]=c[(c[r>>2]|0)+100>>2];c[m>>2]=c[(c[r>>2]|0)+92>>2];c[s>>2]=(c[v>>2]|0)+(c[q>>2]<<2)+-8;b=(c[x>>2]|0)+((_(c[n>>2]|0,c[m>>2]|0)|0)<<2)|0;e=(c[w>>2]|0)+((_(c[n>>2]|0,c[m>>2]|0)|0)<<2)|0;Hb(b,e,c[v>>2]|0,(c[v>>2]|0)+4|0,(c[(c[r>>2]|0)+76>>2]|0)/2|0,c[p>>2]|0,c[q>>2]|0,(c[o>>2]|0)-(c[n>>2]|0)|0,c[m>>2]|0,2);e=(c[u>>2]|0)+(0-(_(c[n>>2]|0,c[m>>2]|0)|0)<<2)|0;b=(c[t>>2]|0)+(0-(_(c[n>>2]|0,c[m>>2]|0)|0)<<2)|0;Hb(e,b,c[s>>2]|0,(c[s>>2]|0)+4|0,(c[(c[r>>2]|0)+76>>2]|0)/2|0,c[p>>2]|0,c[q>>2]|0,(c[o>>2]|0)-(c[n>>2]|0)|0,0-(c[m>>2]|0)|0,-2);hb[c[(c[r>>2]|0)+64>>2]&127](c[v>>2]|0,(c[v>>2]|0)+4|0,c[s>>2]|0,(c[s>>2]|0)+4|0,c[c[(c[r>>2]|0)+108>>2]>>2]|0,c[(c[r>>2]|0)+104>>2]|0,c[n>>2]|0,(c[o>>2]|0)+(c[y>>2]|0)|0,2);b=(c[x>>2]|0)+((_(c[n>>2]|0,c[m>>2]|0)|0)<<2)|0;e=(c[w>>2]|0)+((_(c[n>>2]|0,c[m>>2]|0)|0)<<2)|0;Ib(c[v>>2]|0,(c[v>>2]|0)+4|0,b,e,(c[(c[r>>2]|0)+76>>2]|0)/2|0,c[q>>2]|0,c[p>>2]|0,(c[o>>2]|0)-(c[n>>2]|0)|0,2,c[m>>2]|0);e=(c[u>>2]|0)+(0-(_(c[n>>2]|0,c[m>>2]|0)|0)<<2)|0;b=(c[t>>2]|0)+(0-(_(c[n>>2]|0,c[m>>2]|0)|0)<<2)|0;Ib(c[s>>2]|0,(c[s>>2]|0)+4|0,e,b,(c[(c[r>>2]|0)+76>>2]|0)/2|0,c[q>>2]|0,c[p>>2]|0,(c[o>>2]|0)-(c[n>>2]|0)|0,-2,0-(c[m>>2]|0)|0);i=l;return}function zk(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;f=d+4|0;e=d;c[g>>2]=a;c[f>>2]=b;c[e>>2]=c[g>>2];rc(c[(c[e>>2]|0)+68>>2]|0,c[f>>2]|0);rc(c[(c[e>>2]|0)+72>>2]|0,c[f>>2]|0);a=_(c[(c[e>>2]|0)+76>>2]|0,c[(c[e>>2]|0)+80>>2]|0)|0;Me(c[f>>2]|0,(c[e>>2]|0)+108|0,c[(c[(c[(c[e>>2]|0)+112>>2]|0)+20>>2]|0)+8>>2]|0,a,c[(c[e>>2]|0)+76>>2]|0,(((c[(c[e>>2]|0)+80>>2]|0)-1|0)/2|0)+(c[(c[e>>2]|0)+88>>2]|0)|0);i=d;return}function Ak(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;j=i;i=i+80|0;h=j+32|0;g=j;l=j+76|0;d=j+72|0;f=j+68|0;k=j+64|0;e=j+60|0;c[l>>2]=a;c[d>>2]=b;c[f>>2]=c[l>>2];c[k>>2]=c[(c[f>>2]|0)+112>>2];c[e>>2]=c[(c[k>>2]|0)+20>>2];b=c[c[d>>2]>>2]|0;a=c[d>>2]|0;d=c[(c[f>>2]|0)+76>>2]|0;if(c[(c[k>>2]|0)+24>>2]|0){o=xk(d)|0;n=c[(c[f>>2]|0)+76>>2]|0;m=Le(c[(c[f>>2]|0)+76>>2]|0,c[(c[e>>2]|0)+8>>2]|0)|0;l=c[(c[f>>2]|0)+88>>2]|0;k=c[(c[f>>2]|0)+84>>2]|0;h=c[(c[e>>2]|0)+4>>2]|0;d=c[(c[f>>2]|0)+68>>2]|0;e=c[(c[f>>2]|0)+72>>2]|0;c[g>>2]=o;c[g+4>>2]=n;c[g+8>>2]=m;c[g+12>>2]=l;c[g+16>>2]=k;c[g+20>>2]=h;c[g+24>>2]=d;c[g+28>>2]=e;eb[b&63](a,22492,g);i=j;return}else{o=Le(c[(c[f>>2]|0)+76>>2]|0,c[(c[e>>2]|0)+8>>2]|0)|0;n=c[(c[f>>2]|0)+88>>2]|0;m=c[(c[f>>2]|0)+84>>2]|0;g=c[(c[e>>2]|0)+4>>2]|0;k=c[(c[f>>2]|0)+68>>2]|0;l=c[(c[f>>2]|0)+72>>2]|0;c[h>>2]=d;c[h+4>>2]=o;c[h+8>>2]=n;c[h+12>>2]=m;c[h+16>>2]=g;c[h+20>>2]=k;c[h+24>>2]=l;eb[b&63](a,22540,h);i=j;return}}function Bk(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+72>>2]|0);i=b;return}function Ck(a,b,d,e,f,g,h,j,k,l,m,n){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;var o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0;y=i;i=i+64|0;z=y+56|0;o=y+52|0;r=y+48|0;s=y+40|0;t=y+24|0;u=y+20|0;p=y+16|0;q=y+12|0;x=y+8|0;v=y+4|0;w=y;c[z>>2]=a;c[o>>2]=b;c[r>>2]=d;c[y+44>>2]=e;c[s>>2]=f;c[y+36>>2]=g;c[y+32>>2]=h;c[y+28>>2]=j;c[t>>2]=k;c[u>>2]=l;c[p>>2]=m;c[q>>2]=n;c[x>>2]=c[(c[z>>2]|0)+20>>2];if((c[r>>2]|0)!=(c[c[x>>2]>>2]|0)){j=0;j=j&1;i=y;return j|0}if((c[o>>2]|0)!=(c[(c[(c[x>>2]|0)+12>>2]|0)+4>>2]|0)){j=0;j=j&1;i=y;return j|0}c[t>>2]=0;c[u>>2]=(c[t>>2]|0)+4;c[v>>2]=xk(c[r>>2]|0)|0;c[w>>2]=c[v>>2]<<2;if(!(cb[c[c[(c[x>>2]|0)+12>>2]>>2]&1](c[t>>2]|0,c[u>>2]|0,(c[t>>2]|0)+(c[w>>2]<<2)+-8|0,(c[u>>2]|0)+(c[w>>2]<<2)+-8|0,c[w>>2]|0,1,1+(c[v>>2]|0)|0,2,c[p>>2]|0)|0)){j=0;j=j&1;i=y;return j|0}c[c[q>>2]>>2]=0;if(cb[c[c[(c[x>>2]|0)+12>>2]>>2]&1](c[t>>2]|0,c[u>>2]|0,(c[t>>2]|0)+(c[w>>2]<<2)+-8|0,(c[u>>2]|0)+(c[w>>2]<<2)+-8|0,c[w>>2]|0,1,1+((((c[s>>2]|0)-1|0)/2|0|0)%(c[v>>2]|0)|0)|0,2,c[p>>2]|0)|0){j=1;j=j&1;i=y;return j|0}c[c[q>>2]>>2]=1;j=(cb[c[c[(c[x>>2]|0)+12>>2]>>2]&1](c[t>>2]|0,c[u>>2]|0,(c[t>>2]|0)+(c[w>>2]<<2)+-8|0,(c[u>>2]|0)+(c[w>>2]<<2)+-8|0,c[w>>2]|0,1,2+((((c[s>>2]|0)-1|0)/2|0|0)%(c[v>>2]|0)|0)|0,2,c[p>>2]|0)|0)!=0;j=j&1;i=y;return j|0}function Dk(a,b,d,e,f,g,h,j,k,l,m,n){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;var o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0;y=i;i=i+64|0;A=y+48|0;o=y+44|0;z=y+40|0;r=y+36|0;s=y+32|0;t=y+28|0;u=y+20|0;v=y+16|0;w=y+12|0;p=y+8|0;q=y+4|0;x=y;c[A>>2]=a;c[o>>2]=b;c[z>>2]=d;c[r>>2]=e;c[s>>2]=f;c[t>>2]=g;c[y+24>>2]=h;c[u>>2]=j;c[v>>2]=k;c[w>>2]=l;c[p>>2]=m;c[q>>2]=n;c[x>>2]=c[(c[A>>2]|0)+20>>2];if((c[z>>2]|0)!=(c[c[x>>2]>>2]|0)){j=0;j=j&1;i=y;return j|0}if((c[o>>2]|0)!=(c[(c[(c[x>>2]|0)+12>>2]|0)+4>>2]|0)){j=0;j=j&1;i=y;return j|0}c[c[q>>2]>>2]=0;h=(c[v>>2]|0)+((_((c[s>>2]|0)-1|0,c[t>>2]|0)|0)<<2)|0;j=(c[w>>2]|0)+((_((c[s>>2]|0)-1|0,c[t>>2]|0)|0)<<2)|0;if(!(cb[c[c[(c[x>>2]|0)+12>>2]>>2]&1]((c[v>>2]|0)+(c[t>>2]<<2)|0,(c[w>>2]|0)+(c[t>>2]<<2)|0,h,j,c[r>>2]|0,1,((c[s>>2]|0)+1|0)/2|0,c[t>>2]|0,c[p>>2]|0)|0)){c[c[q>>2]>>2]=1;h=(c[v>>2]|0)+((_((c[s>>2]|0)-1|0,c[t>>2]|0)|0)<<2)|0;j=(c[w>>2]|0)+((_((c[s>>2]|0)-1|0,c[t>>2]|0)|0)<<2)|0;if(!(cb[c[c[(c[x>>2]|0)+12>>2]>>2]&1]((c[v>>2]|0)+(c[t>>2]<<2)|0,(c[w>>2]|0)+(c[t>>2]<<2)|0,h,j,c[r>>2]|0,1,((c[s>>2]|0)-1|0)/2|0,c[t>>2]|0,c[p>>2]|0)|0)){j=0;j=j&1;i=y;return j|0}h=(c[v>>2]|0)+((_((c[s>>2]|0)-1|0,c[t>>2]|0)|0)<<2)|0;j=(c[w>>2]|0)+((_((c[s>>2]|0)-1|0,c[t>>2]|0)|0)<<2)|0;if(!(cb[c[c[(c[x>>2]|0)+12>>2]>>2]&1]((c[v>>2]|0)+(c[t>>2]<<2)|0,(c[w>>2]|0)+(c[t>>2]<<2)|0,h,j,c[r>>2]|0,((c[s>>2]|0)-1|0)/2|0,(((c[s>>2]|0)-1|0)/2|0)+2|0,0,c[p>>2]|0)|0)){j=0;j=j&1;i=y;return j|0}}c[v>>2]=(c[v>>2]|0)+(c[u>>2]<<2);c[w>>2]=(c[w>>2]|0)+(c[u>>2]<<2);h=(c[v>>2]|0)+((_((c[s>>2]|0)-1|0,c[t>>2]|0)|0)<<2)|0;j=(c[w>>2]|0)+((_((c[s>>2]|0)-1|0,c[t>>2]|0)|0)<<2)|0;j=(cb[c[c[(c[x>>2]|0)+12>>2]>>2]&1]((c[v>>2]|0)+(c[t>>2]<<2)|0,(c[w>>2]|0)+(c[t>>2]<<2)|0,h,j,c[r>>2]|0,1,(((c[s>>2]|0)+1|0)/2|0)-(c[c[q>>2]>>2]|0)|0,c[t>>2]|0,c[p>>2]|0)|0)!=0;j=j&1;i=y;return j|0}function Ek(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;j=i;i=i+32|0;h=j+16|0;k=j+12|0;e=j+8|0;f=j+4|0;g=j;c[k>>2]=a;c[e>>2]=b;c[f>>2]=d;if(!(Hk(c[k>>2]|0,c[e>>2]|0,c[f>>2]|0)|0)){c[h>>2]=0;g=c[h>>2]|0;i=j;return g|0}c[g>>2]=c[e>>2];if(!(c[c[(c[g>>2]|0)+8>>2]>>2]|0))e=1;else e=(c[(c[f>>2]|0)+164>>2]&16|0)!=0^1;c[h>>2]=e&1;g=c[h>>2]|0;i=j;return g|0}function Fk(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0;g=i;i=i+32|0;l=g+16|0;k=g+12|0;j=g+8|0;h=g+4|0;f=g;c[l>>2]=a;c[k>>2]=b;c[j>>2]=d;c[h>>2]=e;c[f>>2]=zd(c[l>>2]|0,13708)|0;c[(c[f>>2]|0)+8>>2]=c[k>>2];c[(c[f>>2]|0)+16>>2]=c[j>>2];c[(c[f>>2]|0)+12>>2]=c[h>>2];i=g;return c[f>>2]|0}function Gk(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;f=i;i=i+16|0;j=f+12|0;h=f+8|0;g=f+4|0;e=f;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[e>>2]=oc(c[j>>2]|0,c[h>>2]|0)|0;c[(c[e>>2]|0)+56>>2]=c[g>>2];i=f;return c[e>>2]|0}function Hk(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;j=i;i=i+32|0;e=j+16|0;k=j+12|0;f=j+8|0;g=j+4|0;h=j;c[e>>2]=a;c[k>>2]=b;c[f>>2]=d;c[g>>2]=c[k>>2];if((c[c[(c[g>>2]|0)+4>>2]>>2]|0)!=1){g=0;g=g&1;i=j;return g|0}if((c[c[(c[g>>2]|0)+8>>2]>>2]|0)>1){g=0;g=g&1;i=j;return g|0}if(c[(c[g>>2]|0)+28>>2]|0){if((c[(c[g>>2]|0)+28>>2]|0)!=4){g=0;g=g&1;i=j;return g|0}if((c[(c[g>>2]|0)+12>>2]|0)!=(c[(c[g>>2]|0)+20>>2]|0)?(c[(c[f>>2]|0)+164>>2]&4096|0)!=0:0){g=0;g=g&1;i=j;return g|0}}a=kd(c[(c[e>>2]|0)+8>>2]|0,c[(c[(c[g>>2]|0)+4>>2]|0)+4>>2]|0)|0;c[h>>2]=a;if((a|0)<=0){g=0;g=g&1;i=j;return g|0}g=(c[(c[(c[g>>2]|0)+4>>2]|0)+4>>2]|0)>(c[h>>2]|0);g=g&1;i=j;return g|0}function Ik(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0;u=i;i=i+64|0;p=u+60|0;v=u+56|0;e=u+52|0;f=u+48|0;h=u+44|0;n=u+40|0;s=u+36|0;q=u+32|0;r=u+28|0;l=u+24|0;t=u+20|0;k=u+16|0;o=u+12|0;j=u+8|0;m=u+4|0;g=u;c[v>>2]=a;c[e>>2]=b;c[f>>2]=d;c[h>>2]=c[v>>2];c[s>>2]=0;c[q>>2]=0;c[r>>2]=0;if(!(Ek(c[h>>2]|0,c[e>>2]|0,c[f>>2]|0)|0)){c[p>>2]=0;n=c[p>>2]|0;i=u;return n|0}c[n>>2]=c[e>>2];c[g>>2]=(c[(c[n>>2]|0)+4>>2]|0)+4;c[l>>2]=c[c[g>>2]>>2];c[t>>2]=kd(c[(c[h>>2]|0)+8>>2]|0,c[l>>2]|0)|0;c[k>>2]=(c[l>>2]|0)/(c[t>>2]|0)|0;ke(c[(c[n>>2]|0)+8>>2]|0,o,j,m)|0;a:do switch(c[(c[n>>2]|0)+28>>2]|0){case 0:{l=_(c[k>>2]|0,c[(c[g>>2]|0)+8>>2]|0)|0;c[r>>2]=gb[c[(c[h>>2]|0)+12>>2]&7](c[h>>2]|0,0,c[t>>2]|0,l,c[k>>2]|0,c[(c[g>>2]|0)+8>>2]|0,c[o>>2]|0,c[m>>2]|0,c[(c[n>>2]|0)+20>>2]|0,c[(c[n>>2]|0)+24>>2]|0,c[f>>2]|0)|0;if(c[r>>2]|0)switch(c[(c[h>>2]|0)+16>>2]|0){case 0:{e=c[f>>2]|0;a=_((c[t>>2]|0)/2|0,c[(c[g>>2]|0)+4>>2]|0)|0;a=Ed(c[k>>2]|0,a,c[(c[g>>2]|0)+8>>2]|0)|0;l=_(c[k>>2]|0,c[(c[g>>2]|0)+8>>2]|0)|0;c[q>>2]=uc(e,In(a,Gd(2,((c[(c[n>>2]|0)+16>>2]|0)-(c[(c[n>>2]|0)+12>>2]|0)|0)/4|0,((c[(c[n>>2]|0)+24>>2]|0)-(c[(c[n>>2]|0)+20>>2]|0)|0)/4|0,(c[t>>2]|0)/2|0,c[(c[g>>2]|0)+4>>2]|0,l,c[o>>2]|0,c[j>>2]|0,c[m>>2]|0)|0,c[(c[n>>2]|0)+12>>2]|0,c[(c[n>>2]|0)+20>>2]|0,0)|0)|0;if(!(c[q>>2]|0)){e=17;break a}c[s>>2]=rn(80,13720,22)|0;e=16;break a}case 1:{e=c[f>>2]|0;a=_((c[t>>2]|0)/2|0,c[(c[g>>2]|0)+4>>2]|0)|0;a=Ed(c[k>>2]|0,a,c[(c[g>>2]|0)+8>>2]|0)|0;l=_(c[k>>2]|0,c[(c[g>>2]|0)+8>>2]|0)|0;c[q>>2]=uc(e,qh(a,Fd((c[t>>2]|0)/2|0,c[(c[g>>2]|0)+4>>2]|0,l,c[o>>2]|0,c[j>>2]|0,c[m>>2]|0)|0,c[(c[n>>2]|0)+12>>2]|0,c[(c[n>>2]|0)+16>>2]|0,c[(c[n>>2]|0)+20>>2]|0,c[(c[n>>2]|0)+24>>2]|0)|0)|0;if(!(c[q>>2]|0)){e=17;break a}c[s>>2]=rn(80,13720,23)|0;e=16;break a}default:{e=16;break a}}else e=17;break}case 4:{l=_(c[k>>2]|0,c[(c[g>>2]|0)+4>>2]|0)|0;c[r>>2]=gb[c[(c[h>>2]|0)+12>>2]&7](c[h>>2]|0,4,c[t>>2]|0,l,c[k>>2]|0,c[(c[g>>2]|0)+4>>2]|0,c[o>>2]|0,c[j>>2]|0,c[(c[n>>2]|0)+20>>2]|0,c[(c[n>>2]|0)+24>>2]|0,c[f>>2]|0)|0;if(c[r>>2]|0)switch(c[(c[h>>2]|0)+16>>2]|0){case 0:{e=c[f>>2]|0;a=Ed(c[k>>2]|0,c[(c[g>>2]|0)+4>>2]|0,_((c[t>>2]|0)/2|0,c[(c[g>>2]|0)+8>>2]|0)|0)|0;l=_(c[k>>2]|0,c[(c[g>>2]|0)+4>>2]|0)|0;c[q>>2]=uc(e,In(a,Gd(2,((c[(c[n>>2]|0)+24>>2]|0)-(c[(c[n>>2]|0)+20>>2]|0)|0)/4|0,((c[(c[n>>2]|0)+16>>2]|0)-(c[(c[n>>2]|0)+12>>2]|0)|0)/4|0,(c[t>>2]|0)/2|0,l,c[(c[g>>2]|0)+8>>2]|0,c[o>>2]|0,c[j>>2]|0,c[m>>2]|0)|0,c[(c[n>>2]|0)+20>>2]|0,c[(c[n>>2]|0)+12>>2]|0,4)|0)|0;if(!(c[q>>2]|0)){e=17;break a}c[s>>2]=rn(80,13720,24)|0;e=16;break a}case 1:{e=c[f>>2]|0;a=Ed(c[k>>2]|0,c[(c[g>>2]|0)+4>>2]|0,_((c[t>>2]|0)/2|0,c[(c[g>>2]|0)+8>>2]|0)|0)|0;l=_(c[k>>2]|0,c[(c[g>>2]|0)+4>>2]|0)|0;c[q>>2]=uc(e,qh(a,Fd((c[t>>2]|0)/2|0,l,c[(c[g>>2]|0)+8>>2]|0,c[o>>2]|0,c[j>>2]|0,c[m>>2]|0)|0,c[(c[n>>2]|0)+24>>2]|0,c[(c[n>>2]|0)+20>>2]|0,c[(c[n>>2]|0)+16>>2]|0,c[(c[n>>2]|0)+12>>2]|0)|0)|0;if(!(c[q>>2]|0)){e=17;break a}c[s>>2]=rn(80,13720,25)|0;e=16;break a}default:{e=16;break a}}else e=17;break}default:e=16}while(0);if((e|0)==16){c[(c[s>>2]|0)+64>>2]=c[q>>2];c[(c[s>>2]|0)+68>>2]=c[r>>2];c[(c[s>>2]|0)+72>>2]=c[t>>2];jc((c[q>>2]|0)+8|0,(c[r>>2]|0)+8|0,(c[s>>2]|0)+8|0);c[(c[s>>2]|0)+52>>2]=c[(c[r>>2]|0)+52>>2];c[p>>2]=c[s>>2];n=c[p>>2]|0;i=u;return n|0}else if((e|0)==17){pc(c[r>>2]|0);pc(c[q>>2]|0);c[p>>2]=0;n=c[p>>2]|0;i=u;return n|0}return 0}function Jk(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;g=i;i=i+32|0;o=g+28|0;m=g+24|0;j=g+16|0;h=g+12|0;k=g+8|0;n=g+4|0;l=g;c[o>>2]=a;c[m>>2]=b;c[g+20>>2]=d;c[j>>2]=e;c[h>>2]=f;c[k>>2]=c[o>>2];c[n>>2]=c[(c[k>>2]|0)+64>>2];eb[c[(c[n>>2]|0)+56>>2]&63](c[(c[k>>2]|0)+64>>2]|0,c[m>>2]|0,c[j>>2]|0);c[l>>2]=c[(c[k>>2]|0)+68>>2];eb[c[(c[l>>2]|0)+56>>2]&63](c[(c[k>>2]|0)+68>>2]|0,c[j>>2]|0,c[h>>2]|0);i=g;return}function Kk(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;g=i;i=i+32|0;p=g+28|0;n=g+24|0;m=g+20|0;j=g+16|0;h=g+12|0;k=g+8|0;o=g+4|0;l=g;c[p>>2]=a;c[n>>2]=b;c[m>>2]=d;c[j>>2]=e;c[h>>2]=f;c[k>>2]=c[p>>2];c[o>>2]=c[(c[k>>2]|0)+64>>2];Ya[c[(c[o>>2]|0)+56>>2]&63](c[(c[k>>2]|0)+64>>2]|0,c[n>>2]|0,c[m>>2]|0,c[j>>2]|0,c[h>>2]|0);c[l>>2]=c[(c[k>>2]|0)+68>>2];eb[c[(c[l>>2]|0)+56>>2]&63](c[(c[k>>2]|0)+68>>2]|0,c[j>>2]|0,c[h>>2]|0);i=g;return}function Lk(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;g=i;i=i+32|0;o=g+28|0;h=g+24|0;j=g+16|0;m=g+12|0;k=g+8|0;l=g+4|0;n=g;c[o>>2]=a;c[h>>2]=b;c[g+20>>2]=d;c[j>>2]=e;c[m>>2]=f;c[k>>2]=c[o>>2];c[n>>2]=c[(c[k>>2]|0)+68>>2];eb[c[(c[n>>2]|0)+56>>2]&63](c[(c[k>>2]|0)+68>>2]|0,c[j>>2]|0,c[m>>2]|0);c[l>>2]=c[(c[k>>2]|0)+64>>2];eb[c[(c[l>>2]|0)+56>>2]&63](c[(c[k>>2]|0)+64>>2]|0,c[j>>2]|0,c[h>>2]|0);i=g;return}function Mk(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;g=i;i=i+32|0;p=g+28|0;h=g+24|0;j=g+20|0;k=g+16|0;l=g+12|0;m=g+8|0;n=g+4|0;o=g;c[p>>2]=a;c[h>>2]=b;c[j>>2]=d;c[k>>2]=e;c[l>>2]=f;c[m>>2]=c[p>>2];c[o>>2]=c[(c[m>>2]|0)+68>>2];eb[c[(c[o>>2]|0)+56>>2]&63](c[(c[m>>2]|0)+68>>2]|0,c[k>>2]|0,c[l>>2]|0);c[n>>2]=c[(c[m>>2]|0)+64>>2];Ya[c[(c[n>>2]|0)+56>>2]&63](c[(c[m>>2]|0)+64>>2]|0,c[l>>2]|0,c[k>>2]|0,c[j>>2]|0,c[h>>2]|0);i=g;return}function Nk(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+68>>2]|0,c[e>>2]|0);i=d;return}function Ok(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;g=i;i=i+32|0;f=g;h=g+24|0;d=g+20|0;e=g+16|0;c[h>>2]=a;c[d>>2]=b;c[e>>2]=c[h>>2];a=c[c[d>>2]>>2]|0;d=c[d>>2]|0;if((c[(c[e>>2]|0)+56>>2]|0)==22)b=1;else b=(c[(c[e>>2]|0)+56>>2]|0)==23;j=c[(c[e>>2]|0)+72>>2]|0;h=c[(c[e>>2]|0)+68>>2]|0;e=c[(c[e>>2]|0)+64>>2]|0;c[f>>2]=b?22957:22961;c[f+4>>2]=j;c[f+8>>2]=h;c[f+12>>2]=e;eb[a&63](d,22582,f);i=g;return}function Pk(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function Qk(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,Rk()|0);i=b;return}function Rk(){var a=0,b=0;b=i;i=i+16|0;a=b;c[a>>2]=zd(8,13736)|0;i=b;return c[a>>2]|0}function Sk(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0;s=i;i=i+64|0;e=s+48|0;f=s+40|0;g=s+36|0;q=s+32|0;p=s+28|0;j=s+24|0;m=s+20|0;o=s+16|0;r=s+12|0;k=s+8|0;l=s+4|0;n=s;c[s+44>>2]=a;c[f>>2]=b;c[g>>2]=d;c[m>>2]=0;c[o>>2]=0;if(!(Tk(c[f>>2]|0,c[g>>2]|0)|0)){c[e>>2]=0;o=c[e>>2]|0;i=s;return o|0}c[p>>2]=c[f>>2];c[r>>2]=Ed(2,((c[(c[p>>2]|0)+16>>2]|0)-(c[(c[p>>2]|0)+12>>2]|0)|0)/4|0,((c[(c[p>>2]|0)+24>>2]|0)-(c[(c[p>>2]|0)+20>>2]|0)|0)/4|0)|0;c[k>>2]=Td(c[r>>2]|0,c[(c[p>>2]|0)+8>>2]|0)|0;c[l>>2]=0;while(1){if((c[l>>2]|0)>=(c[c[k>>2]>>2]|0))break;if((c[(c[k>>2]|0)+4+((c[l>>2]|0)*12|0)+4>>2]|0)<0){c[n>>2]=(c[(c[k>>2]|0)+4+((c[l>>2]|0)*12|0)>>2]|0)-1;b=c[n>>2]|0;a=(c[k>>2]|0)+4+((c[l>>2]|0)*12|0)+4|0;f=_(c[a>>2]|0,-1)|0;c[a>>2]=f;f=_(b,f)|0;c[m>>2]=(c[m>>2]|0)-f;f=c[n>>2]|0;b=(c[k>>2]|0)+4+((c[l>>2]|0)*12|0)+8|0;a=_(c[b>>2]|0,-1)|0;c[b>>2]=a;a=_(f,a)|0;c[o>>2]=(c[o>>2]|0)-a}c[l>>2]=(c[l>>2]|0)+1}n=c[g>>2]|0;c[j>>2]=uc(n,Hn(c[(c[p>>2]|0)+4>>2]|0,c[k>>2]|0,(c[(c[p>>2]|0)+12>>2]|0)+(c[m>>2]<<2)|0,(c[(c[p>>2]|0)+20>>2]|0)+(c[o>>2]<<2)|0,0)|0)|0;ee(c[r>>2]|0,c[k>>2]|0);if(!(c[j>>2]|0)){c[e>>2]=0;o=c[e>>2]|0;i=s;return o|0}c[q>>2]=oh(88,13748,26)|0;if(!(c[c[(c[p>>2]|0)+4>>2]>>2]|0)){c[(c[q>>2]|0)+80>>2]=1;c[(c[q>>2]|0)+76>>2]=0}else{c[(c[q>>2]|0)+80>>2]=c[(c[(c[p>>2]|0)+4>>2]|0)+4>>2];c[(c[q>>2]|0)+76>>2]=c[(c[(c[p>>2]|0)+4>>2]|0)+4+8>>2]}c[(c[q>>2]|0)+68>>2]=c[m>>2];c[(c[q>>2]|0)+72>>2]=c[o>>2];c[(c[q>>2]|0)+64>>2]=c[j>>2];o=(c[q>>2]|0)+8|0;n=(c[j>>2]|0)+8|0;c[o>>2]=c[n>>2];c[o+4>>2]=c[n+4>>2];c[o+8>>2]=c[n+8>>2];c[o+12>>2]=c[n+12>>2];c[o+16>>2]=c[n+16>>2];c[o+20>>2]=c[n+20>>2];c[o+24>>2]=c[n+24>>2];c[o+28>>2]=c[n+28>>2];o=(c[q>>2]|0)+8+24|0;h[o>>3]=+h[o>>3]+ +((((c[(c[q>>2]|0)+80>>2]|0)-1|0)/2|0)<<3|0);o=(c[q>>2]|0)+8|0;h[o>>3]=+h[o>>3]+ +((((c[(c[q>>2]|0)+80>>2]|0)-1|0)/2|0)<<2|0);o=(c[q>>2]|0)+8+24|0;h[o>>3]=+h[o>>3]+1.0;c[e>>2]=c[q>>2];o=c[e>>2]|0;i=s;return o|0}function Tk(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;h=i;i=i+16|0;d=h+12|0;e=h+8|0;f=h+4|0;g=h;c[e>>2]=a;c[f>>2]=b;if(!(Yk(c[e>>2]|0)|0)){c[d>>2]=0;g=c[d>>2]|0;i=h;return g|0}c[g>>2]=c[e>>2];if(!(c[c[(c[g>>2]|0)+4>>2]>>2]|0)){c[d>>2]=1;g=c[d>>2]|0;i=h;return g|0}if(((c[c[(c[g>>2]|0)+4>>2]>>2]|0)==1?(Zk(c[(c[g>>2]|0)+12>>2]|0,c[(c[g>>2]|0)+16>>2]|0,c[(c[(c[g>>2]|0)+4>>2]|0)+4>>2]|0,c[(c[(c[g>>2]|0)+4>>2]|0)+4+4>>2]|0)|0)!=0:0)?(Zk(c[(c[g>>2]|0)+20>>2]|0,c[(c[g>>2]|0)+24>>2]|0,c[(c[(c[g>>2]|0)+4>>2]|0)+4>>2]|0,c[(c[(c[g>>2]|0)+4>>2]|0)+4+8>>2]|0)|0)!=0:0){c[d>>2]=1;g=c[d>>2]|0;i=h;return g|0}c[d>>2]=((c[(c[f>>2]|0)+164>>2]&4|0)!=0^1)&1;g=c[d>>2]|0;i=h;return g|0}function Uk(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0;s=i;i=i+64|0;v=s+52|0;t=s+48|0;h=s+40|0;j=s+36|0;k=s+32|0;o=s+28|0;u=s+24|0;l=s+20|0;p=s+16|0;r=s+12|0;n=s+8|0;m=s+4|0;q=s;c[v>>2]=a;c[t>>2]=b;c[s+44>>2]=d;c[h>>2]=e;c[j>>2]=f;c[k>>2]=c[v>>2];c[u>>2]=c[(c[k>>2]|0)+64>>2];eb[c[(c[u>>2]|0)+56>>2]&63](c[u>>2]|0,(c[t>>2]|0)+(c[(c[k>>2]|0)+68>>2]<<2)|0,(c[h>>2]|0)+(c[(c[k>>2]|0)+72>>2]<<2)|0);c[o>>2]=c[(c[k>>2]|0)+80>>2];if((c[o>>2]|0)<=1){i=s;return}c[p>>2]=c[(c[k>>2]|0)+76>>2];c[l>>2]=1;while(1){if((c[l>>2]|0)>=(((c[o>>2]|0)+1|0)/2|0|0))break;f=_(c[p>>2]|0,c[l>>2]|0)|0;g[r>>2]=+g[(c[h>>2]|0)+(f<<2)>>2];f=_(c[p>>2]|0,c[l>>2]|0)|0;g[n>>2]=+g[(c[j>>2]|0)+(f<<2)>>2];f=_(c[p>>2]|0,(c[o>>2]|0)-(c[l>>2]|0)|0)|0;g[q>>2]=+g[(c[h>>2]|0)+(f<<2)>>2];f=_(c[p>>2]|0,(c[o>>2]|0)-(c[l>>2]|0)|0)|0;g[m>>2]=+g[(c[j>>2]|0)+(f<<2)>>2];f=_(c[p>>2]|0,c[l>>2]|0)|0;g[(c[h>>2]|0)+(f<<2)>>2]=+g[r>>2]-+g[m>>2];f=_(c[p>>2]|0,c[l>>2]|0)|0;g[(c[j>>2]|0)+(f<<2)>>2]=+g[n>>2]+ +g[q>>2];f=_(c[p>>2]|0,(c[o>>2]|0)-(c[l>>2]|0)|0)|0;g[(c[h>>2]|0)+(f<<2)>>2]=+g[r>>2]+ +g[m>>2];f=_(c[p>>2]|0,(c[o>>2]|0)-(c[l>>2]|0)|0)|0;g[(c[j>>2]|0)+(f<<2)>>2]=+g[n>>2]-+g[q>>2];c[l>>2]=(c[l>>2]|0)+1}i=s;return}function Vk(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);i=d;return}function Wk(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;d=i;i=i+32|0;e=d;h=d+16|0;f=d+12|0;g=d+8|0;c[h>>2]=a;c[f>>2]=b;c[g>>2]=c[h>>2];b=c[c[f>>2]>>2]|0;a=c[f>>2]|0;f=c[(c[g>>2]|0)+64>>2]|0;c[e>>2]=c[(c[g>>2]|0)+80>>2];c[e+4>>2]=f;eb[b&63](a,22611,e);i=d;return}function Xk(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function Yk(a){a=a|0;var b=0,d=0,e=0;d=i;i=i+16|0;e=d+4|0;b=d;c[e>>2]=a;c[b>>2]=c[e>>2];if((c[c[(c[b>>2]|0)+4>>2]>>2]|0)==1?(c[c[(c[b>>2]|0)+8>>2]>>2]|0)==0:0){b=1;b=b&1;i=d;return b|0}if(c[c[(c[b>>2]|0)+4>>2]>>2]|0){b=0;b=b&1;i=d;return b|0}b=(c[c[(c[b>>2]|0)+8>>2]>>2]|0)!=2147483647;b=b&1;i=d;return b|0}function Zk(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;k=i;i=i+16|0;f=k+12|0;g=k+8|0;h=k+4|0;j=k;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;if((c[f>>2]|0)>>>0>(c[g>>2]|0)>>>0)f=(c[f>>2]|0)-(c[g>>2]|0)|0;else f=(c[g>>2]|0)-(c[f>>2]|0)|0;d=c[j>>2]|0;d=((f|0)/4|0|0)>=(_(c[h>>2]|0,(c[j>>2]|0)>0?d:0-d|0)|0)&1;i=k;return d|0}function _k(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,$k()|0);i=b;return}function $k(){var a=0,b=0;b=i;i=i+16|0;a=b;c[a>>2]=zd(8,13764)|0;i=b;return c[a>>2]|0}function al(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0;m=i;i=i+32|0;e=m+24|0;n=m+20|0;f=m+16|0;g=m+12|0;l=m+8|0;k=m+4|0;j=m;c[n>>2]=a;c[f>>2]=b;c[g>>2]=d;if(!(bl(c[n>>2]|0,c[f>>2]|0,c[g>>2]|0)|0)){c[e>>2]=0;d=c[e>>2]|0;i=m;return d|0}c[k>>2]=c[f>>2];d=c[g>>2]|0;c[j>>2]=vc(d,Hn(c[(c[k>>2]|0)+4>>2]|0,c[(c[k>>2]|0)+8>>2]|0,c[(c[k>>2]|0)+12>>2]|0,c[(c[k>>2]|0)+16>>2]|0,0)|0,32768,0,0)|0;if(c[j>>2]|0){c[l>>2]=sn(80,13776,32)|0;c[(c[l>>2]|0)+72>>2]=c[(c[(c[k>>2]|0)+4>>2]|0)+4>>2];c[(c[l>>2]|0)+68>>2]=c[(c[(c[k>>2]|0)+4>>2]|0)+4+8>>2];c[(c[l>>2]|0)+64>>2]=c[j>>2];d=(c[l>>2]|0)+8|0;b=(c[j>>2]|0)+8|0;c[d>>2]=c[b>>2];c[d+4>>2]=c[b+4>>2];c[d+8>>2]=c[b+8>>2];c[d+12>>2]=c[b+12>>2];c[d+16>>2]=c[b+16>>2];c[d+20>>2]=c[b+20>>2];c[d+24>>2]=c[b+24>>2];c[d+28>>2]=c[b+28>>2];d=(c[l>>2]|0)+8+24|0;h[d>>3]=+h[d>>3]+ +((((c[(c[l>>2]|0)+72>>2]|0)-1|0)/2|0)<<2|0);d=(c[l>>2]|0)+8|0;h[d>>3]=+h[d>>3]+ +((((c[(c[l>>2]|0)+72>>2]|0)-1|0)/2|0)<<1|0);c[e>>2]=c[l>>2];d=c[e>>2]|0;i=m;return d|0}else{c[e>>2]=0;d=c[e>>2]|0;i=m;return d|0}return 0}function bl(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0;g=i;i=i+16|0;e=g+4|0;f=g;c[g+8>>2]=a;c[e>>2]=b;c[f>>2]=d;if(c[(c[f>>2]|0)+164>>2]&8){b=0;b=b&1;i=g;return b|0}b=(gl(c[e>>2]|0,c[f>>2]|0)|0)!=0;b=b&1;i=g;return b|0}function cl(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;m=i;i=i+48|0;q=m+36|0;n=m+32|0;e=m+28|0;p=m+24|0;l=m+20|0;j=m+16|0;k=m+12|0;o=m+8|0;f=m+4|0;h=m;c[q>>2]=a;c[n>>2]=b;c[e>>2]=d;c[p>>2]=c[q>>2];c[l>>2]=c[(c[p>>2]|0)+68>>2];c[k>>2]=c[(c[p>>2]|0)+72>>2];c[o>>2]=c[(c[p>>2]|0)+64>>2];eb[c[(c[o>>2]|0)+56>>2]&63](c[o>>2]|0,c[n>>2]|0,c[e>>2]|0);c[j>>2]=1;while(1){if((c[j>>2]|0)>=((c[k>>2]|0)-(c[j>>2]|0)|0))break;a=_(c[l>>2]|0,c[j>>2]|0)|0;g[f>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];a=_(c[l>>2]|0,(c[k>>2]|0)-(c[j>>2]|0)|0)|0;g[h>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];a=_(c[l>>2]|0,c[j>>2]|0)|0;g[(c[e>>2]|0)+(a<<2)>>2]=+g[f>>2]-+g[h>>2];a=_(c[l>>2]|0,(c[k>>2]|0)-(c[j>>2]|0)|0)|0;g[(c[e>>2]|0)+(a<<2)>>2]=+g[f>>2]+ +g[h>>2];c[j>>2]=(c[j>>2]|0)+1}i=m;return}function dl(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);i=d;return}function el(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;d=i;i=i+32|0;e=d;h=d+16|0;f=d+12|0;g=d+8|0;c[h>>2]=a;c[f>>2]=b;c[g>>2]=c[h>>2];b=c[c[f>>2]>>2]|0;a=c[f>>2]|0;f=c[(c[g>>2]|0)+64>>2]|0;c[e>>2]=c[(c[g>>2]|0)+72>>2];c[e+4>>2]=f;eb[b&63](a,22631,e);i=d;return}function fl(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function gl(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;e=i;i=i+16|0;g=e+8|0;f=e+4|0;d=e;c[g>>2]=a;c[f>>2]=b;c[d>>2]=c[g>>2];if(c[(c[f>>2]|0)+164>>2]&32768){b=0;b=b&1;i=e;return b|0}if((c[c[(c[d>>2]|0)+4>>2]>>2]|0)!=1){b=0;b=b&1;i=e;return b|0}if(c[c[(c[d>>2]|0)+8>>2]>>2]|0){b=0;b=b&1;i=e;return b|0}b=(c[(c[d>>2]|0)+20>>2]|0)==8;b=b&1;i=e;return b|0}function hl(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,il(0)|0);a=c[d>>2]|0;Bd(a,il(1)|0);i=b;return}function il(a){a=a|0;var b=0,d=0,e=0;d=i;i=i+16|0;e=d+4|0;b=d;c[e>>2]=a;c[b>>2]=zd(12,13792)|0;c[(c[b>>2]|0)+8>>2]=c[e>>2];i=d;return c[b>>2]|0}function jl(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0.0;u=i;i=i+64|0;f=u+60|0;w=u+56|0;v=u+52|0;g=u+48|0;o=u+44|0;e=u+40|0;t=u+36|0;q=u+32|0;r=u+28|0;p=u+24|0;s=u+20|0;k=u+16|0;l=u+12|0;m=u+8|0;j=u+4|0;n=u;c[w>>2]=a;c[v>>2]=b;c[g>>2]=d;c[o>>2]=c[w>>2];c[e>>2]=c[v>>2];c[k>>2]=0;c[l>>2]=0;c[m>>2]=0;c[j>>2]=0;if(!(kl(c[w>>2]|0,c[v>>2]|0,c[g>>2]|0)|0)){c[f>>2]=0;m=c[f>>2]|0;i=u;return m|0}c[q>>2]=c[(c[(c[e>>2]|0)+4>>2]|0)+4>>2];c[p>>2]=c[(c[(c[e>>2]|0)+4>>2]|0)+4+4>>2];c[s>>2]=c[(c[(c[e>>2]|0)+4>>2]|0)+4+8>>2];a=(c[q>>2]|0)-1|0;if(c[(c[o>>2]|0)+8>>2]|0)c[r>>2]=ll((a<<1)-1|0)|0;else c[r>>2]=a;c[j>>2]=wb(c[r>>2]<<2)|0;b=c[g>>2]|0;e=Ed(c[r>>2]|0,1,1)|0;a=Ed(1,0,0)|0;c[k>>2]=vc(b,In(e,a,c[j>>2]|0,c[j>>2]|0,0)|0,8,0,0)|0;if(((c[k>>2]|0)!=0?(e=Ed(c[r>>2]|0,1,1)|0,a=Ed(1,0,0)|0,c[n>>2]=In(e,a,c[j>>2]|0,c[j>>2]|0,0)|0,a=vc(c[g>>2]|0,c[n>>2]|0,8,0,0)|0,c[l>>2]=a,(a|0)!=0):0)?(e=c[g>>2]|0,a=Ed(c[r>>2]|0,1,1)|0,g=Ed(1,0,0)|0,c[m>>2]=vc(e,In(a,g,c[j>>2]|0,c[j>>2]|0,0)|0,8,2,0)|0,(c[m>>2]|0)!=0):0){xb(c[j>>2]|0);c[j>>2]=0;c[t>>2]=sn(104,13804,33)|0;c[(c[t>>2]|0)+64>>2]=c[k>>2];c[(c[t>>2]|0)+68>>2]=c[l>>2];c[(c[t>>2]|0)+100>>2]=c[m>>2];c[(c[t>>2]|0)+72>>2]=0;c[(c[t>>2]|0)+76>>2]=c[q>>2];c[(c[t>>2]|0)+80>>2]=c[r>>2];c[(c[t>>2]|0)+92>>2]=c[p>>2];c[(c[t>>2]|0)+96>>2]=c[s>>2];jc((c[k>>2]|0)+8|0,(c[l>>2]|0)+8|0,(c[t>>2]|0)+8|0);x=+(((((c[r>>2]|0)/2|0)-1|0)*6|0)+(c[r>>2]|0)+(c[q>>2]|0)+(_((c[q>>2]|0)-1|0,c[(c[o>>2]|0)+8>>2]|0)|0)|0);m=(c[t>>2]|0)+8+24|0;h[m>>3]=+h[m>>3]+x;x=+((((c[r>>2]|0)/2|0)-1<<1)+2+(_((c[q>>2]|0)-1|0,c[(c[o>>2]|0)+8>>2]|0)|0)|0);m=(c[t>>2]|0)+8|0;h[m>>3]=+h[m>>3]+x;m=(c[t>>2]|0)+8+8|0;h[m>>3]=+h[m>>3]+ +((((c[r>>2]|0)/2|0)-1<<2)+2+(c[(c[o>>2]|0)+8>>2]|0)|0);m=(c[t>>2]|0)+8+24|0;h[m>>3]=+h[m>>3]+ +((c[q>>2]|0)-2-(c[(c[o>>2]|0)+8>>2]|0)|0);m=(c[t>>2]|0)+8|0;h[m>>3]=+h[m>>3]+ +((((c[r>>2]|0)/2|0)-1<<1)+((c[q>>2]|0)-2)-(c[(c[o>>2]|0)+8>>2]|0)|0);c[f>>2]=c[t>>2];m=c[f>>2]|0;i=u;return m|0}yb(c[j>>2]|0);pc(c[m>>2]|0);pc(c[l>>2]|0);pc(c[k>>2]|0);c[f>>2]=0;m=c[f>>2]|0;i=u;return m|0}function kl(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;g=i;i=i+16|0;h=g+8|0;e=g+4|0;f=g;c[g+12>>2]=a;c[h>>2]=b;c[e>>2]=d;c[f>>2]=c[h>>2];if((c[c[(c[f>>2]|0)+4>>2]>>2]|0)!=1){b=0;b=b&1;i=g;return b|0}if(c[c[(c[f>>2]|0)+8>>2]>>2]|0){b=0;b=b&1;i=g;return b|0}if((c[(c[f>>2]|0)+20>>2]|0)!=8){b=0;b=b&1;i=g;return b|0}if(!(gd(c[(c[(c[f>>2]|0)+4>>2]|0)+4>>2]|0)|0)){b=0;b=b&1;i=g;return b|0}if((c[(c[(c[f>>2]|0)+4>>2]|0)+4>>2]|0)<=2){b=0;b=b&1;i=g;return b|0}if((c[(c[e>>2]|0)+164>>2]&8|0)!=0?(c[(c[(c[f>>2]|0)+4>>2]|0)+4>>2]|0)<=32:0){b=0;b=b&1;i=g;return b|0}if(!(c[(c[e>>2]|0)+164>>2]&8)){b=1;b=b&1;i=g;return b|0}b=(md((c[(c[(c[f>>2]|0)+4>>2]|0)+4>>2]|0)-1|0)|0)!=0;b=b&1;i=g;return b|0}function ll(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;d=e;c[d>>2]=a;while(1){if(id(c[d>>2]|0,13824)|0)b=((c[d>>2]|0)%2|0|0)!=0;else b=1;a=c[d>>2]|0;if(!b)break;c[d>>2]=a+1}i=e;return a|0}function ml(a,b,d){a=a|0;b=b|0;d=d|0;var e=0.0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0;C=i;i=i+96|0;D=C+84|0;f=C+80|0;u=C+76|0;n=C+72|0;z=C+68|0;A=C+64|0;j=C+60|0;B=C+56|0;y=C+52|0;x=C+48|0;w=C+44|0;v=C+40|0;q=C+36|0;r=C+32|0;h=C+28|0;s=C+24|0;o=C+20|0;t=C+16|0;p=C+12|0;k=C+8|0;l=C+4|0;m=C;c[D>>2]=a;c[f>>2]=b;c[u>>2]=d;c[n>>2]=c[D>>2];c[z>>2]=c[(c[n>>2]|0)+76>>2];c[A>>2]=c[(c[n>>2]|0)+80>>2];c[j>>2]=c[(c[n>>2]|0)+92>>2];c[v>>2]=wb(c[A>>2]<<2)|0;c[w>>2]=c[(c[n>>2]|0)+84>>2];c[x>>2]=1;c[y>>2]=0;while(1){if((c[y>>2]|0)>=((c[z>>2]|0)-1|0))break;b=_(c[x>>2]|0,c[j>>2]|0)|0;g[(c[v>>2]|0)+(c[y>>2]<<2)>>2]=+g[(c[f>>2]|0)+(b<<2)>>2];c[y>>2]=(c[y>>2]|0)+1;b=c[x>>2]|0;d=c[w>>2]|0;if((c[x>>2]|0)<=(92681-(c[w>>2]|0)|0)){b=_(b,d)|0;b=(b|0)%(c[z>>2]|0)|0}else b=cd(b,d,c[z>>2]|0)|0;c[x>>2]=b}c[y>>2]=(c[z>>2]|0)-1;while(1){if((c[y>>2]|0)>=(c[A>>2]|0))break;g[(c[v>>2]|0)+(c[y>>2]<<2)>>2]=0.0;c[y>>2]=(c[y>>2]|0)+1}c[B>>2]=c[(c[n>>2]|0)+96>>2];c[h>>2]=c[(c[n>>2]|0)+64>>2];eb[c[(c[h>>2]|0)+56>>2]&63](c[h>>2]|0,c[v>>2]|0,c[v>>2]|0);e=+g[c[f>>2]>>2];g[r>>2]=e;g[c[u>>2]>>2]=e+ +g[c[v>>2]>>2];c[q>>2]=c[(c[n>>2]|0)+72>>2];f=c[v>>2]|0;g[f>>2]=+g[f>>2]*+g[c[q>>2]>>2];c[y>>2]=1;while(1){e=+g[(c[q>>2]|0)+(c[y>>2]<<2)>>2];if((c[y>>2]|0)>=((c[A>>2]|0)/2|0|0))break;g[t>>2]=e;g[p>>2]=+g[(c[q>>2]|0)+((c[A>>2]|0)-(c[y>>2]|0)<<2)>>2];g[s>>2]=+g[(c[v>>2]|0)+(c[y>>2]<<2)>>2];g[o>>2]=+g[(c[v>>2]|0)+((c[A>>2]|0)-(c[y>>2]|0)<<2)>>2];g[k>>2]=+g[t>>2]*+g[s>>2]-+g[p>>2]*+g[o>>2];g[l>>2]=+g[t>>2]*+g[o>>2]+ +g[p>>2]*+g[s>>2];g[(c[v>>2]|0)+(c[y>>2]<<2)>>2]=+g[k>>2]+ +g[l>>2];g[(c[v>>2]|0)+((c[A>>2]|0)-(c[y>>2]|0)<<2)>>2]=+g[k>>2]-+g[l>>2];c[y>>2]=(c[y>>2]|0)+1}t=(c[v>>2]|0)+(c[y>>2]<<2)|0;g[t>>2]=+g[t>>2]*e;t=c[v>>2]|0;g[t>>2]=+g[t>>2]+ +g[r>>2];c[m>>2]=c[(c[n>>2]|0)+68>>2];eb[c[(c[m>>2]|0)+56>>2]&63](c[m>>2]|0,c[v>>2]|0,c[v>>2]|0);g[(c[u>>2]|0)+(c[B>>2]<<2)>>2]=+g[c[v>>2]>>2];t=c[(c[n>>2]|0)+88>>2]|0;c[w>>2]=t;c[x>>2]=t;t=(c[A>>2]|0)==((c[z>>2]|0)-1|0);c[y>>2]=1;if(!t){while(1){if((c[y>>2]|0)>=((c[z>>2]|0)-1|0))break;b=_(c[x>>2]|0,c[B>>2]|0)|0;g[(c[u>>2]|0)+(b<<2)>>2]=+g[(c[v>>2]|0)+(c[y>>2]<<2)>>2]+ +g[(c[v>>2]|0)+((c[A>>2]|0)-(c[y>>2]|0)<<2)>>2];c[y>>2]=(c[y>>2]|0)+1;b=c[x>>2]|0;d=c[w>>2]|0;if((c[x>>2]|0)<=(92681-(c[w>>2]|0)|0)){d=_(b,d)|0;d=(d|0)%(c[z>>2]|0)|0}else d=cd(b,d,c[z>>2]|0)|0;c[x>>2]=d}z=c[v>>2]|0;xb(z);i=C;return}while(1){e=+g[(c[v>>2]|0)+(c[y>>2]<<2)>>2];if((c[y>>2]|0)>=((c[A>>2]|0)/2|0|0))break;d=_(c[x>>2]|0,c[B>>2]|0)|0;g[(c[u>>2]|0)+(d<<2)>>2]=e+ +g[(c[v>>2]|0)+((c[A>>2]|0)-(c[y>>2]|0)<<2)>>2];c[y>>2]=(c[y>>2]|0)+1;d=c[x>>2]|0;b=c[w>>2]|0;if((c[x>>2]|0)<=(92681-(c[w>>2]|0)|0)){d=_(d,b)|0;d=(d|0)%(c[z>>2]|0)|0}else d=cd(d,b,c[z>>2]|0)|0;c[x>>2]=d}b=_(c[x>>2]|0,c[B>>2]|0)|0;g[(c[u>>2]|0)+(b<<2)>>2]=e;c[y>>2]=(c[y>>2]|0)+1;b=c[x>>2]|0;d=c[w>>2]|0;if((c[x>>2]|0)<=(92681-(c[w>>2]|0)|0)){d=_(b,d)|0;d=(d|0)%(c[z>>2]|0)|0}else d=cd(b,d,c[z>>2]|0)|0;c[x>>2]=d;while(1){if((c[y>>2]|0)>=(c[A>>2]|0))break;b=_(c[x>>2]|0,c[B>>2]|0)|0;g[(c[u>>2]|0)+(b<<2)>>2]=+g[(c[v>>2]|0)+((c[A>>2]|0)-(c[y>>2]|0)<<2)>>2]-+g[(c[v>>2]|0)+(c[y>>2]<<2)>>2];c[y>>2]=(c[y>>2]|0)+1;b=c[x>>2]|0;d=c[w>>2]|0;if((c[x>>2]|0)<=(92681-(c[w>>2]|0)|0)){d=_(b,d)|0;d=(d|0)%(c[z>>2]|0)|0}else d=cd(b,d,c[z>>2]|0)|0;c[x>>2]=d}z=c[v>>2]|0;xb(z);i=C;return}function nl(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;f=i;i=i+16|0;g=f+8|0;d=f+4|0;e=f;c[g>>2]=a;c[d>>2]=b;c[e>>2]=c[g>>2];rc(c[(c[e>>2]|0)+64>>2]|0,c[d>>2]|0);rc(c[(c[e>>2]|0)+68>>2]|0,c[d>>2]|0);rc(c[(c[e>>2]|0)+100>>2]|0,c[d>>2]|0);a=c[e>>2]|0;if(!(c[d>>2]|0)){ql(c[a+72>>2]|0);c[(c[e>>2]|0)+72>>2]=0;i=f;return}else{b=ed(c[a+76>>2]|0)|0;c[(c[e>>2]|0)+84>>2]=b;b=dd(c[(c[e>>2]|0)+84>>2]|0,(c[(c[e>>2]|0)+76>>2]|0)-2|0,c[(c[e>>2]|0)+76>>2]|0)|0;c[(c[e>>2]|0)+88>>2]=b;d=rl(c[d>>2]|0,c[(c[e>>2]|0)+100>>2]|0,c[(c[e>>2]|0)+76>>2]|0,c[(c[e>>2]|0)+80>>2]|0,c[(c[e>>2]|0)+88>>2]|0)|0;c[(c[e>>2]|0)+72>>2]=d;i=f;return}}function ol(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0;h=i;i=i+48|0;g=h+32|0;f=h+24|0;j=h;n=h+44|0;d=h+40|0;e=h+36|0;c[n>>2]=a;c[d>>2]=b;c[e>>2]=c[n>>2];a=c[c[d>>2]>>2]|0;b=c[d>>2]|0;n=c[(c[e>>2]|0)+80>>2]|0;m=c[(c[e>>2]|0)+92>>2]|0;l=c[(c[e>>2]|0)+96>>2]|0;k=c[(c[e>>2]|0)+64>>2]|0;c[j>>2]=c[(c[e>>2]|0)+76>>2];c[j+4>>2]=n;c[j+8>>2]=m;c[j+12>>2]=l;c[j+16>>2]=k;eb[a&63](b,22651,j);if((c[(c[e>>2]|0)+68>>2]|0)!=(c[(c[e>>2]|0)+64>>2]|0)){k=c[c[d>>2]>>2]|0;l=c[d>>2]|0;c[f>>2]=c[(c[e>>2]|0)+68>>2];eb[k&63](l,23700,f)}if((c[(c[e>>2]|0)+100>>2]|0)==(c[(c[e>>2]|0)+64>>2]|0)){k=c[d>>2]|0;k=k+8|0;k=c[k>>2]|0;l=c[d>>2]|0;$a[k&127](l,41);i=h;return}if((c[(c[e>>2]|0)+100>>2]|0)==(c[(c[e>>2]|0)+68>>2]|0)){k=c[d>>2]|0;k=k+8|0;k=c[k>>2]|0;l=c[d>>2]|0;$a[k&127](l,41);i=h;return}l=c[c[d>>2]>>2]|0;k=c[d>>2]|0;c[g>>2]=c[(c[e>>2]|0)+100>>2];eb[l&63](k,23700,g);k=c[d>>2]|0;k=k+8|0;k=c[k>>2]|0;l=c[d>>2]|0;$a[k&127](l,41);i=h;return}function pl(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+100>>2]|0);pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function ql(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;yd(c[d>>2]|0,13820);i=b;return}function rl(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0;w=i;i=i+80|0;k=w+64|0;j=w+60|0;l=w+56|0;m=w+52|0;n=w+48|0;o=w+44|0;s=w+40|0;r=w+36|0;q=w+32|0;p=w+28|0;t=w+16|0;u=w+24|0;v=w;c[j>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[o>>2]=f;c[s>>2]=c[l>>2];a=xd(c[m>>2]|0,(c[n>>2]|0)+1|0,c[o>>2]|0,c[3455]|0)|0;c[r>>2]=a;if(a){c[k>>2]=c[r>>2];n=c[k>>2]|0;i=w;return n|0}c[r>>2]=wb(c[n>>2]<<2)|0;h[t>>3]=+(c[n>>2]|0);c[u>>2]=Ae(c[j>>2]|0,c[m>>2]|0)|0;c[q>>2]=0;c[p>>2]=1;while(1){f=c[u>>2]|0;if((c[q>>2]|0)>=((c[m>>2]|0)-1|0))break;eb[c[f+4>>2]&63](c[u>>2]|0,c[p>>2]|0,v);g[(c[r>>2]|0)+(c[q>>2]<<2)>>2]=(+h[v>>3]+ +h[v+8>>3])/+h[t>>3];c[q>>2]=(c[q>>2]|0)+1;f=c[p>>2]|0;j=c[o>>2]|0;if((c[p>>2]|0)<=(92681-(c[o>>2]|0)|0)){f=_(f,j)|0;f=(f|0)%(c[m>>2]|0)|0}else f=cd(f,j,c[m>>2]|0)|0;c[p>>2]=f}Be(f);while(1){if((c[q>>2]|0)>=(c[n>>2]|0))break;g[(c[r>>2]|0)+(c[q>>2]<<2)>>2]=0.0;c[q>>2]=(c[q>>2]|0)+1}a:do if((c[n>>2]|0)>((c[m>>2]|0)-1|0)){c[q>>2]=1;while(1){if((c[q>>2]|0)>=((c[m>>2]|0)-1|0))break a;g[(c[r>>2]|0)+((c[n>>2]|0)-(c[q>>2]|0)<<2)>>2]=+g[(c[r>>2]|0)+((c[m>>2]|0)-1-(c[q>>2]|0)<<2)>>2];c[q>>2]=(c[q>>2]|0)+1}}while(0);eb[c[(c[s>>2]|0)+56>>2]&63](c[l>>2]|0,c[r>>2]|0,c[r>>2]|0);wd(c[m>>2]|0,(c[n>>2]|0)+1|0,c[o>>2]|0,c[r>>2]|0,13820);c[k>>2]=c[r>>2];n=c[k>>2]|0;i=w;return n|0}function sl(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;f=d+4|0;e=d;c[f>>2]=a;c[e>>2]=b;b=ul(c[f>>2]|0,c[e>>2]|0,0)|0;i=d;return b|0}function tl(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;f=d+4|0;e=d;c[f>>2]=a;c[e>>2]=b;b=ul(c[f>>2]|0,c[e>>2]|0,1)|0;i=d;return b|0}function ul(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;f=i;i=i+16|0;j=f+12|0;h=f+8|0;g=f+4|0;e=f;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[e>>2]=zd(20,13840)|0;c[(c[e>>2]|0)+12>>2]=c[j>>2];c[(c[e>>2]|0)+8>>2]=c[h>>2];c[(c[e>>2]|0)+16>>2]=c[g>>2];i=f;return c[e>>2]|0}function vl(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0.0;q=i;i=i+48|0;e=q+44|0;r=q+40|0;f=q+36|0;l=q+28|0;o=q+24|0;n=q+20|0;k=q+16|0;p=q+12|0;j=q+8|0;g=q+4|0;m=q;c[r>>2]=a;c[f>>2]=b;c[q+32>>2]=d;c[l>>2]=c[r>>2];a=c[r>>2]|0;b=c[f>>2]|0;if(c[(c[l>>2]|0)+16>>2]|0){if(!(wl(a,b)|0)){c[e>>2]=0;m=c[e>>2]|0;i=q;return m|0}}else if(!(xl(a,b)|0)){c[e>>2]=0;m=c[e>>2]|0;i=q;return m|0}c[n>>2]=c[f>>2];if((c[(c[n>>2]|0)+20>>2]|0)>>>0>=0?(c[(c[n>>2]|0)+20>>2]|0)>>>0<=3:0){c[p>>2]=c[(c[(c[n>>2]|0)+4>>2]|0)+4+4>>2];c[j>>2]=c[(c[(c[n>>2]|0)+4>>2]|0)+4+8>>2];c[o>>2]=sn(128,13852,(c[(c[l>>2]|0)+16>>2]|0)!=0?35:34)|0}else{c[p>>2]=c[(c[(c[n>>2]|0)+4>>2]|0)+4+8>>2];c[j>>2]=c[(c[(c[n>>2]|0)+4>>2]|0)+4+4>>2];c[o>>2]=sn(128,13852,(c[(c[l>>2]|0)+16>>2]|0)!=0?37:36)|0}c[k>>2]=(c[(c[n>>2]|0)+4>>2]|0)+4;c[m>>2]=c[c[k>>2]>>2];c[(c[o>>2]|0)+116>>2]=c[(c[l>>2]|0)+12>>2];c[(c[o>>2]|0)+88>>2]=c[m>>2];c[(c[o>>2]|0)+96>>2]=c[p>>2];c[(c[o>>2]|0)+64>>2]=c[p>>2]<<1;c[(c[o>>2]|0)+68>>2]=c[j>>2];c[(c[o>>2]|0)+72>>2]=0-(c[j>>2]|0);k=Cl(c[(c[n>>2]|0)+20>>2]|0,c[m>>2]|0,c[j>>2]|0)|0;c[(c[o>>2]|0)+108>>2]=k;c[g>>2]=Dl(c[m>>2]|0)|0;c[(c[o>>2]|0)+76>>2]=c[g>>2]<<1;c[(c[o>>2]|0)+80>>2]=c[g>>2];c[(c[o>>2]|0)+84>>2]=0-(c[g>>2]|0);k=Cl(c[(c[n>>2]|0)+20>>2]|0,c[m>>2]|0,c[g>>2]|0)|0;c[(c[o>>2]|0)+112>>2]=k;ke(c[(c[n>>2]|0)+8>>2]|0,(c[o>>2]|0)+92|0,(c[o>>2]|0)+100|0,(c[o>>2]|0)+104|0)|0;c[(c[o>>2]|0)+120>>2]=c[l>>2];fc((c[o>>2]|0)+8|0);lc((c[(c[o>>2]|0)+92>>2]|0)/(c[(c[(c[(c[l>>2]|0)+8>>2]|0)+40>>2]|0)+4>>2]|0)|0,(c[(c[l>>2]|0)+8>>2]|0)+8|0,(c[o>>2]|0)+8|0);if(c[(c[l>>2]|0)+16>>2]|0){s=+(_(c[m>>2]<<1,c[(c[o>>2]|0)+92>>2]|0)|0);m=(c[o>>2]|0)+8+24|0;h[m>>3]=+h[m>>3]+s}c[(c[o>>2]|0)+52>>2]=((c[(c[l>>2]|0)+16>>2]|0)!=0^1)&1;c[e>>2]=c[o>>2];m=c[e>>2]|0;i=q;return m|0}function wl(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0;h=i;i=i+48|0;l=h+32|0;j=h+28|0;k=h+24|0;e=h+20|0;f=h+16|0;g=h+12|0;d=h;c[l>>2]=a;c[j>>2]=b;c[k>>2]=c[l>>2];c[e>>2]=c[(c[k>>2]|0)+8>>2];c[f>>2]=c[j>>2];if((c[c[(c[f>>2]|0)+4>>2]>>2]|0)!=1){f=0;f=f&1;i=h;return f|0}if((c[c[(c[f>>2]|0)+8>>2]>>2]|0)>1){f=0;f=f&1;i=h;return f|0}if((c[(c[(c[f>>2]|0)+4>>2]|0)+4>>2]|0)!=(c[c[e>>2]>>2]|0)){f=0;f=f&1;i=h;return f|0}if((c[(c[f>>2]|0)+20>>2]|0)!=(c[c[(c[e>>2]|0)+40>>2]>>2]|0)){f=0;f=f&1;i=h;return f|0}if(!(ke(c[(c[f>>2]|0)+8>>2]|0,g,h+8|0,h+4|0)|0)){f=0;f=f&1;i=h;return f|0}c[d>>2]=Dl(c[c[e>>2]>>2]|0)|0;if((c[(c[f>>2]|0)+12>>2]|0)!=(c[(c[f>>2]|0)+16>>2]|0)){f=1;f=f&1;i=h;return f|0}if(Md(c[(c[f>>2]|0)+4>>2]|0,c[(c[f>>2]|0)+8>>2]|0)|0){f=1;f=f&1;i=h;return f|0}f=(c[g>>2]|0)<=(c[d>>2]|0);f=f&1;i=h;return f|0}function xl(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0;g=i;i=i+32|0;k=g+28|0;h=g+24|0;j=g+20|0;d=g+16|0;e=g+12|0;f=g+8|0;c[k>>2]=a;c[h>>2]=b;c[j>>2]=c[k>>2];c[d>>2]=c[(c[j>>2]|0)+8>>2];c[e>>2]=c[h>>2];if((c[c[(c[e>>2]|0)+4>>2]>>2]|0)!=1){b=0;b=b&1;i=g;return b|0}if((c[c[(c[e>>2]|0)+8>>2]>>2]|0)>1){b=0;b=b&1;i=g;return b|0}if((c[(c[(c[e>>2]|0)+4>>2]|0)+4>>2]|0)!=(c[c[d>>2]>>2]|0)){b=0;b=b&1;i=g;return b|0}if((c[(c[e>>2]|0)+20>>2]|0)!=(c[c[(c[d>>2]|0)+40>>2]>>2]|0)){b=0;b=b&1;i=g;return b|0}if(!(ke(c[(c[e>>2]|0)+8>>2]|0,f,g+4|0,g)|0)){b=0;b=b&1;i=g;return b|0}if((c[f>>2]|0)==1?1:(c[(c[e>>2]|0)+12>>2]|0)!=(c[(c[e>>2]|0)+16>>2]|0)){b=1;b=b&1;i=g;return b|0}b=(Md(c[(c[e>>2]|0)+4>>2]|0,c[(c[e>>2]|0)+8>>2]|0)|0)!=0;b=b&1;i=g;return b|0}function yl(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;h=e+8|0;g=e+4|0;f=e;c[h>>2]=a;c[g>>2]=b;c[f>>2]=d;Fl(c[h>>2]|0,c[g>>2]|0,c[f>>2]|0,27);i=e;return}function zl(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;e=i;i=i+16|0;j=e+12|0;h=e+8|0;g=e+4|0;f=e;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[f>>2]=c[j>>2];Wa[c[(c[f>>2]|0)+116>>2]&127](c[h>>2]|0,(c[h>>2]|0)+(c[(c[f>>2]|0)+96>>2]<<2)|0,c[g>>2]|0,(c[g>>2]|0)+(c[(c[f>>2]|0)+108>>2]<<2)|0,c[(c[f>>2]|0)+64>>2]|0,c[(c[f>>2]|0)+68>>2]|0,c[(c[f>>2]|0)+72>>2]|0,c[(c[f>>2]|0)+92>>2]|0,c[(c[f>>2]|0)+100>>2]|0,c[(c[f>>2]|0)+104>>2]|0);i=e;return}function Al(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;h=e+8|0;g=e+4|0;f=e;c[h>>2]=a;c[g>>2]=b;c[f>>2]=d;Fl(c[h>>2]|0,c[g>>2]|0,c[f>>2]|0,28);i=e;return}function Bl(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;e=i;i=i+16|0;j=e+12|0;g=e+8|0;h=e+4|0;f=e;c[j>>2]=a;c[g>>2]=b;c[h>>2]=d;c[f>>2]=c[j>>2];Wa[c[(c[f>>2]|0)+116>>2]&127](c[h>>2]|0,(c[h>>2]|0)+(c[(c[f>>2]|0)+96>>2]<<2)|0,c[g>>2]|0,(c[g>>2]|0)+(c[(c[f>>2]|0)+108>>2]<<2)|0,c[(c[f>>2]|0)+64>>2]|0,c[(c[f>>2]|0)+68>>2]|0,c[(c[f>>2]|0)+72>>2]|0,c[(c[f>>2]|0)+92>>2]|0,c[(c[f>>2]|0)+100>>2]|0,c[(c[f>>2]|0)+104>>2]|0);i=e;return}function Cl(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;g=e+8|0;f=e+4|0;h=e;c[g>>2]=a;c[f>>2]=b;c[h>>2]=d;a=_(c[h>>2]|0,(c[g>>2]|0)==0|(c[g>>2]|0)==4?c[f>>2]|0:(c[f>>2]|0)-1|0)|0;i=e;return a|0}function Dl(a){a=a|0;var b=0,d=0;d=i;i=i+16|0;b=d;c[b>>2]=a;c[b>>2]=(c[b>>2]|0)+3;c[b>>2]=c[b>>2]&-4;i=d;return (c[b>>2]|0)+2|0}function El(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0;m=i;i=i+32|0;g=m+16|0;h=m+12|0;j=m+8|0;k=m+4|0;l=m;c[g>>2]=a;c[h>>2]=b;c[j>>2]=d;c[k>>2]=e;c[l>>2]=f;e=c[(c[g>>2]|0)+68>>2]|0;a=c[(c[g>>2]|0)+100>>2]|0;if((((c[(c[g>>2]|0)+68>>2]|0)<0?0-e|0:e)|0)<(((c[(c[g>>2]|0)+100>>2]|0)<0?0-a|0:a)|0)){Wa[c[(c[g>>2]|0)+116>>2]&127](c[k>>2]|0,(c[k>>2]|0)+(c[(c[g>>2]|0)+80>>2]<<2)|0,c[h>>2]|0,(c[h>>2]|0)+(c[(c[g>>2]|0)+108>>2]<<2)|0,c[(c[g>>2]|0)+76>>2]|0,c[(c[g>>2]|0)+68>>2]|0,c[(c[g>>2]|0)+72>>2]|0,c[l>>2]|0,c[(c[g>>2]|0)+100>>2]|0,1);k=c[k>>2]|0;h=c[j>>2]|0;b=c[g>>2]|0;b=b+88|0;b=c[b>>2]|0;d=c[g>>2]|0;d=d+80|0;d=c[d>>2]|0;f=c[g>>2]|0;f=f+96|0;f=c[f>>2]|0;e=c[l>>2]|0;a=c[g>>2]|0;a=a+104|0;a=c[a>>2]|0;Lb(k,h,b,d,f,e,1,a,1);i=m;return}else{Kb(c[h>>2]|0,c[k>>2]|0,c[(c[g>>2]|0)+88>>2]|0,c[(c[g>>2]|0)+68>>2]|0,c[(c[g>>2]|0)+80>>2]|0,c[l>>2]|0,c[(c[g>>2]|0)+100>>2]|0,1,1);Wa[c[(c[g>>2]|0)+116>>2]&127](c[k>>2]|0,(c[k>>2]|0)+(c[(c[g>>2]|0)+80>>2]<<2)|0,c[k>>2]|0,(c[k>>2]|0)+(c[(c[g>>2]|0)+112>>2]<<2)|0,c[(c[g>>2]|0)+76>>2]|0,c[(c[g>>2]|0)+80>>2]|0,c[(c[g>>2]|0)+84>>2]|0,c[l>>2]|0,1,1);k=c[k>>2]|0;h=c[j>>2]|0;b=c[g>>2]|0;b=b+88|0;b=c[b>>2]|0;d=c[g>>2]|0;d=d+80|0;d=c[d>>2]|0;f=c[g>>2]|0;f=f+96|0;f=c[f>>2]|0;e=c[l>>2]|0;a=c[g>>2]|0;a=a+104|0;a=c[a>>2]|0;Lb(k,h,b,d,f,e,1,a,1);i=m;return}}function Fl(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;q=i;i=i+48|0;g=q+36|0;h=q+32|0;j=q+28|0;k=q+24|0;m=q+20|0;p=q+16|0;f=q+12|0;o=q+8|0;l=q+4|0;n=q;c[g>>2]=a;c[h>>2]=b;c[j>>2]=d;c[k>>2]=e;c[p>>2]=c[(c[g>>2]|0)+92>>2];c[f>>2]=c[(c[g>>2]|0)+88>>2];c[l>>2]=Dl(c[f>>2]|0)|0;c[n>>2]=(_(c[f>>2]|0,c[l>>2]|0)|0)<<2;b=c[n>>2]|0;if((c[n>>2]|0)>>>0<65536){f=i;i=i+((1*b|0)+15&-16)|0;c[m>>2]=f}else c[m>>2]=wb(b)|0;c[o>>2]=0;while(1){a=c[k>>2]|0;e=c[g>>2]|0;d=c[h>>2]|0;b=c[j>>2]|0;f=c[m>>2]|0;if((c[o>>2]|0)>=((c[p>>2]|0)-(c[l>>2]|0)|0))break;Ya[a&63](e,d,b,f,c[l>>2]|0);f=_(c[l>>2]|0,c[(c[g>>2]|0)+100>>2]|0)|0;c[h>>2]=(c[h>>2]|0)+(f<<2);f=_(c[l>>2]|0,c[(c[g>>2]|0)+104>>2]|0)|0;c[j>>2]=(c[j>>2]|0)+(f<<2);c[o>>2]=(c[o>>2]|0)+(c[l>>2]|0)}Ya[a&63](e,d,b,f,(c[p>>2]|0)-(c[o>>2]|0)|0);if((c[n>>2]|0)>>>0<65536){i=q;return}xb(c[m>>2]|0);i=q;return}function Gl(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0;k=i;i=i+64|0;j=k+24|0;h=k;l=k+52|0;d=k+48|0;f=k+44|0;g=k+40|0;c[l>>2]=a;c[d>>2]=b;c[f>>2]=c[l>>2];c[g>>2]=c[(c[f>>2]|0)+120>>2];l=(c[(c[(c[f>>2]|0)+120>>2]|0)+16>>2]|0)!=0;b=c[c[d>>2]>>2]|0;a=c[d>>2]|0;d=En(c[c[(c[(c[g>>2]|0)+8>>2]|0)+40>>2]>>2]|0)|0;e=c[f>>2]|0;if(l){l=c[e+80>>2]|0;j=c[(c[f>>2]|0)+88>>2]|0;f=c[(c[f>>2]|0)+92>>2]|0;e=c[(c[(c[g>>2]|0)+8>>2]|0)+4>>2]|0;c[h>>2]=d;c[h+4>>2]=l;c[h+8>>2]=j;c[h+12>>2]=f;c[h+16>>2]=e;eb[b&63](a,22684,h);i=k;return}else{h=c[e+88>>2]|0;f=c[(c[f>>2]|0)+92>>2]|0;e=c[(c[(c[g>>2]|0)+8>>2]|0)+4>>2]|0;c[j>>2]=d;c[j+4>>2]=h;c[j+8>>2]=f;c[j+12>>2]=e;eb[b&63](a,22721,j);i=k;return}}function Hl(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b+4|0;c[d>>2]=a;c[b>>2]=c[d>>2];i=b;return}function Il(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0;l=i;i=i+32|0;g=l+16|0;m=l+12|0;h=l+8|0;j=l+4|0;k=l;c[g>>2]=a;c[m>>2]=b;c[h>>2]=d;c[j>>2]=e;c[k>>2]=f;Kb(c[m>>2]|0,c[j>>2]|0,c[(c[g>>2]|0)+88>>2]|0,c[(c[g>>2]|0)+96>>2]|0,c[(c[g>>2]|0)+80>>2]|0,c[k>>2]|0,c[(c[g>>2]|0)+100>>2]|0,1,1);a=c[(c[g>>2]|0)+68>>2]|0;e=c[(c[g>>2]|0)+104>>2]|0;f=c[(c[g>>2]|0)+116>>2]|0;d=c[j>>2]|0;b=(c[j>>2]|0)+(c[(c[g>>2]|0)+80>>2]<<2)|0;if((((c[(c[g>>2]|0)+68>>2]|0)<0?0-a|0:a)|0)<(((c[(c[g>>2]|0)+104>>2]|0)<0?0-e|0:e)|0)){Wa[f&127](d,b,c[h>>2]|0,(c[h>>2]|0)+(c[(c[g>>2]|0)+108>>2]<<2)|0,c[(c[g>>2]|0)+76>>2]|0,c[(c[g>>2]|0)+68>>2]|0,c[(c[g>>2]|0)+72>>2]|0,c[k>>2]|0,1,c[(c[g>>2]|0)+104>>2]|0);i=l;return}else{Wa[f&127](d,b,c[j>>2]|0,(c[j>>2]|0)+(c[(c[g>>2]|0)+112>>2]<<2)|0,c[(c[g>>2]|0)+76>>2]|0,c[(c[g>>2]|0)+80>>2]|0,c[(c[g>>2]|0)+84>>2]|0,c[k>>2]|0,1,1);Lb(c[j>>2]|0,c[h>>2]|0,c[(c[g>>2]|0)+88>>2]|0,c[(c[g>>2]|0)+80>>2]|0,c[(c[g>>2]|0)+68>>2]|0,c[k>>2]|0,1,c[(c[g>>2]|0)+104>>2]|0,1);i=l;return}}function Jl(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;e=i;i=i+16|0;g=e+8|0;f=e+4|0;d=e;c[g>>2]=a;c[f>>2]=b;c[d>>2]=zd(16,13868)|0;c[(c[d>>2]|0)+12>>2]=c[g>>2];c[(c[d>>2]|0)+8>>2]=c[f>>2];i=e;return c[d>>2]|0}function Kl(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0;l=i;i=i+32|0;e=l+28|0;m=l+24|0;f=l+20|0;h=l+12|0;k=l+8|0;j=l+4|0;g=l;c[m>>2]=a;c[f>>2]=b;c[l+16>>2]=d;c[h>>2]=c[m>>2];if(Ll(c[m>>2]|0,c[f>>2]|0)|0){c[j>>2]=c[f>>2];c[k>>2]=sn(96,13880,38)|0;c[g>>2]=(c[(c[j>>2]|0)+4>>2]|0)+4;c[(c[k>>2]|0)+84>>2]=c[(c[h>>2]|0)+12>>2];c[(c[k>>2]|0)+76>>2]=c[(c[g>>2]|0)+4>>2];c[(c[k>>2]|0)+80>>2]=c[(c[g>>2]|0)+8>>2];ke(c[(c[j>>2]|0)+8>>2]|0,(c[k>>2]|0)+64|0,(c[k>>2]|0)+68|0,(c[k>>2]|0)+72|0)|0;c[(c[k>>2]|0)+88>>2]=c[h>>2];fc((c[k>>2]|0)+8|0);lc((c[(c[k>>2]|0)+64>>2]|0)/(c[c[(c[(c[h>>2]|0)+8>>2]|0)+40>>2]>>2]|0)|0,(c[(c[h>>2]|0)+8>>2]|0)+8|0,(c[k>>2]|0)+8|0);c[(c[k>>2]|0)+52>>2]=1;c[e>>2]=c[k>>2];d=c[e>>2]|0;i=l;return d|0}else{c[e>>2]=0;d=c[e>>2]|0;i=l;return d|0}return 0}function Ll(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;g=i;i=i+32|0;j=g+24|0;h=g+20|0;d=g+16|0;e=g+12|0;f=g+8|0;c[j>>2]=a;c[h>>2]=b;c[d>>2]=c[j>>2];c[e>>2]=c[h>>2];if((c[c[(c[e>>2]|0)+4>>2]>>2]|0)!=1){b=0;b=b&1;i=g;return b|0}if((c[c[(c[e>>2]|0)+8>>2]>>2]|0)>1){b=0;b=b&1;i=g;return b|0}if((c[(c[(c[e>>2]|0)+4>>2]|0)+4>>2]|0)!=(c[c[(c[d>>2]|0)+8>>2]>>2]|0)){b=0;b=b&1;i=g;return b|0}if((c[(c[e>>2]|0)+20>>2]|0)!=(c[(c[(c[d>>2]|0)+8>>2]|0)+44>>2]|0)){b=0;b=b&1;i=g;return b|0}if(!(ke(c[(c[e>>2]|0)+8>>2]|0,f,g+4|0,g)|0)){b=0;b=b&1;i=g;return b|0}if((c[f>>2]|0)==1?1:(c[(c[e>>2]|0)+12>>2]|0)!=(c[(c[e>>2]|0)+16>>2]|0)){b=1;b=b&1;i=g;return b|0}b=(Md(c[(c[e>>2]|0)+4>>2]|0,c[(c[e>>2]|0)+8>>2]|0)|0)!=0;b=b&1;i=g;return b|0}function Ml(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;e=i;i=i+16|0;j=e+12|0;h=e+8|0;g=e+4|0;f=e;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[f>>2]=c[j>>2];Xa[c[(c[f>>2]|0)+84>>2]&127](c[h>>2]|0,c[g>>2]|0,c[(c[f>>2]|0)+76>>2]|0,c[(c[f>>2]|0)+80>>2]|0,c[(c[f>>2]|0)+64>>2]|0,c[(c[f>>2]|0)+68>>2]|0,c[(c[f>>2]|0)+72>>2]|0);i=e;return}function Nl(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;h=d+28|0;j=d+24|0;g=d+20|0;f=d+16|0;c[h>>2]=a;c[j>>2]=b;c[g>>2]=c[h>>2];c[f>>2]=c[(c[g>>2]|0)+88>>2];b=c[c[j>>2]>>2]|0;a=c[j>>2]|0;j=En(c[(c[(c[f>>2]|0)+8>>2]|0)+44>>2]|0)|0;h=c[c[(c[f>>2]|0)+8>>2]>>2]|0;g=c[(c[g>>2]|0)+64>>2]|0;f=c[(c[(c[f>>2]|0)+8>>2]|0)+4>>2]|0;c[e>>2]=j;c[e+4>>2]=h;c[e+8>>2]=g;c[e+12>>2]=f;eb[b&63](a,22752,e);i=d;return}function Ol(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b+4|0;c[d>>2]=a;c[b>>2]=c[d>>2];i=b;return}function Pl(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;e=i;i=i+16|0;g=e+8|0;f=e+4|0;d=e;c[g>>2]=a;c[f>>2]=b;c[d>>2]=zd(16,13896)|0;c[(c[d>>2]|0)+12>>2]=c[g>>2];c[(c[d>>2]|0)+8>>2]=c[f>>2];i=e;return c[d>>2]|0}function Ql(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0;n=i;i=i+48|0;j=n+32|0;o=n+28|0;e=n+24|0;k=n+16|0;m=n+12|0;l=n+8|0;g=n+4|0;f=n;c[o>>2]=a;c[e>>2]=b;c[n+20>>2]=d;c[k>>2]=c[o>>2];if(!(Rl(c[o>>2]|0,c[e>>2]|0)|0)){c[j>>2]=0;m=c[j>>2]|0;i=n;return m|0}c[l>>2]=c[e>>2];if((c[(c[l>>2]|0)+28>>2]|0)>>>0>=0)e=(c[(c[l>>2]|0)+28>>2]|0)>>>0<=3;else e=0;c[f>>2]=e&1;c[m>>2]=rn(96,13908,(c[(c[l>>2]|0)+28>>2]|0)==0?30:29)|0;c[g>>2]=(c[(c[l>>2]|0)+4>>2]|0)+4;c[(c[m>>2]|0)+84>>2]=c[(c[k>>2]|0)+12>>2];e=c[g>>2]|0;if(c[f>>2]|0)e=c[e+4>>2]|0;else e=c[e+8>>2]|0;c[(c[m>>2]|0)+64>>2]=e;e=c[g>>2]|0;if(c[f>>2]|0)e=c[e+8>>2]|0;else e=c[e+4>>2]|0;c[(c[m>>2]|0)+68>>2]=e;ke(c[(c[l>>2]|0)+8>>2]|0,(c[m>>2]|0)+72|0,(c[m>>2]|0)+76|0,(c[m>>2]|0)+80|0)|0;if((c[c[g>>2]>>2]|0)%2|0)e=0;else e=_((c[c[g>>2]>>2]|0)/2|0,c[(c[g>>2]|0)+8>>2]|0)|0;c[(c[m>>2]|0)+92>>2]=e;c[(c[m>>2]|0)+88>>2]=c[k>>2];fc((c[m>>2]|0)+8|0);lc((c[(c[m>>2]|0)+72>>2]|0)/(c[(c[(c[(c[k>>2]|0)+8>>2]|0)+40>>2]|0)+4>>2]|0)|0,(c[(c[k>>2]|0)+8>>2]|0)+8|0,(c[m>>2]|0)+8|0);if(!(c[(c[l>>2]|0)+28>>2]|0)){l=(c[m>>2]|0)+8+24|0;h[l>>3]=+h[l>>3]+ +(c[(c[m>>2]|0)+72>>2]<<1|0)}c[(c[m>>2]|0)+52>>2]=1;c[j>>2]=c[m>>2];m=c[j>>2]|0;i=n;return m|0}function Rl(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;f=i;i=i+32|0;j=f+28|0;g=f+24|0;h=f+20|0;d=f+16|0;e=f+12|0;c[j>>2]=a;c[g>>2]=b;c[h>>2]=c[j>>2];c[d>>2]=c[(c[h>>2]|0)+8>>2];c[e>>2]=c[g>>2];if((c[c[(c[e>>2]|0)+4>>2]>>2]|0)!=1){b=0;b=b&1;i=f;return b|0}if((c[c[(c[e>>2]|0)+8>>2]>>2]|0)>1){b=0;b=b&1;i=f;return b|0}if((c[(c[(c[e>>2]|0)+4>>2]|0)+4>>2]|0)!=(c[c[d>>2]>>2]|0)){b=0;b=b&1;i=f;return b|0}if((c[(c[e>>2]|0)+28>>2]|0)!=(c[c[(c[d>>2]|0)+40>>2]>>2]|0)){b=0;b=b&1;i=f;return b|0}if(!(ke(c[(c[e>>2]|0)+8>>2]|0,f+8|0,f+4|0,f)|0)){b=0;b=b&1;i=f;return b|0}if((c[(c[e>>2]|0)+12>>2]|0)!=(c[(c[e>>2]|0)+20>>2]|0)){b=1;b=b&1;i=f;return b|0}if(!(c[c[(c[e>>2]|0)+8>>2]>>2]|0)){b=1;b=b&1;i=f;return b|0}b=(dp(c[e>>2]|0,2147483647)|0)!=0;b=b&1;i=f;return b|0}function Sl(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0;n=i;i=i+48|0;r=n+32|0;q=n+28|0;p=n+24|0;o=n+20|0;h=n+16|0;j=n+12|0;k=n+8|0;m=n+4|0;l=n;c[r>>2]=a;c[q>>2]=b;c[p>>2]=d;c[o>>2]=e;c[h>>2]=f;c[j>>2]=c[r>>2];c[m>>2]=c[(c[j>>2]|0)+72>>2];c[l>>2]=c[(c[j>>2]|0)+80>>2];Wa[c[(c[j>>2]|0)+84>>2]&127](c[q>>2]|0,c[p>>2]|0,c[o>>2]|0,c[h>>2]|0,c[(c[j>>2]|0)+64>>2]|0,c[(c[j>>2]|0)+68>>2]|0,c[(c[j>>2]|0)+68>>2]|0,c[m>>2]|0,c[(c[j>>2]|0)+76>>2]|0,c[l>>2]|0);c[k>>2]=0;while(1){if((c[k>>2]|0)>=(c[m>>2]|0))break;g[(c[h>>2]|0)+(c[(c[j>>2]|0)+92>>2]<<2)>>2]=0.0;g[c[h>>2]>>2]=0.0;c[k>>2]=(c[k>>2]|0)+1;c[h>>2]=(c[h>>2]|0)+(c[l>>2]<<2)}i=n;return}function Tl(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0;g=i;i=i+32|0;n=g+20|0;m=g+16|0;l=g+12|0;k=g+8|0;j=g+4|0;h=g;c[n>>2]=a;c[m>>2]=b;c[l>>2]=d;c[k>>2]=e;c[j>>2]=f;c[h>>2]=c[n>>2];Wa[c[(c[h>>2]|0)+84>>2]&127](c[m>>2]|0,c[l>>2]|0,c[k>>2]|0,c[j>>2]|0,c[(c[h>>2]|0)+64>>2]|0,c[(c[h>>2]|0)+68>>2]|0,c[(c[h>>2]|0)+68>>2]|0,c[(c[h>>2]|0)+72>>2]|0,c[(c[h>>2]|0)+76>>2]|0,c[(c[h>>2]|0)+80>>2]|0);i=g;return}function Ul(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;h=d+28|0;j=d+24|0;g=d+20|0;f=d+16|0;c[h>>2]=a;c[j>>2]=b;c[g>>2]=c[h>>2];c[f>>2]=c[(c[g>>2]|0)+88>>2];b=c[c[j>>2]>>2]|0;a=c[j>>2]|0;j=En(c[c[(c[(c[f>>2]|0)+8>>2]|0)+40>>2]>>2]|0)|0;h=c[c[(c[f>>2]|0)+8>>2]>>2]|0;g=c[(c[g>>2]|0)+72>>2]|0;f=c[(c[(c[f>>2]|0)+8>>2]|0)+4>>2]|0;c[e>>2]=j;c[e+4>>2]=h;c[e+8>>2]=g;c[e+12>>2]=f;eb[b&63](a,22783,e);i=d;return}function Vl(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b+4|0;c[d>>2]=a;c[b>>2]=c[d>>2];i=b;return}function Wl(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,Xl(0)|0);a=c[d>>2]|0;Bd(a,Xl(4)|0);i=b;return}function Xl(a){a=a|0;var b=0,d=0,e=0;d=i;i=i+16|0;e=d+4|0;b=d;c[e>>2]=a;c[b>>2]=zd(12,13924)|0;c[(c[b>>2]|0)+8>>2]=c[e>>2];i=d;return c[b>>2]|0}function Yl(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0;m=i;i=i+32|0;f=m+28|0;o=m+24|0;e=m+20|0;n=m+16|0;g=m+12|0;k=m+8|0;l=m+4|0;j=m;c[o>>2]=a;c[e>>2]=b;c[n>>2]=d;c[g>>2]=c[o>>2];if(!(Zl(c[g>>2]|0,c[e>>2]|0,c[n>>2]|0)|0)){c[f>>2]=0;l=c[f>>2]|0;i=m;return l|0}c[k>>2]=c[e>>2];if((c[(c[k>>2]|0)+20>>2]|0)>>>0>=0)e=(c[(c[k>>2]|0)+20>>2]|0)>>>0<=3;else e=0;c[l>>2]=sn(88,13936,e?40:39)|0;e=c[(c[(c[k>>2]|0)+4>>2]|0)+4>>2]|0;c[j>>2]=e;c[(c[l>>2]|0)+68>>2]=e;c[(c[l>>2]|0)+72>>2]=c[(c[(c[k>>2]|0)+4>>2]|0)+4+4>>2];c[(c[l>>2]|0)+76>>2]=c[(c[(c[k>>2]|0)+4>>2]|0)+4+8>>2];c[(c[l>>2]|0)+64>>2]=0;c[(c[l>>2]|0)+80>>2]=c[(c[g>>2]|0)+8>>2];h[(c[l>>2]|0)+8>>3]=+((c[j>>2]|0)-1|0)*2.5;h[(c[l>>2]|0)+8+8>>3]=0.0;h[(c[l>>2]|0)+8+16>>3]=+((c[j>>2]|0)-1|0)*.5*+((c[j>>2]|0)-1|0);c[f>>2]=c[l>>2];l=c[f>>2]|0;i=m;return l|0}function Zl(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;h=i;i=i+16|0;e=h+12|0;j=h+8|0;f=h+4|0;g=h;c[e>>2]=a;c[j>>2]=b;c[f>>2]=d;c[g>>2]=c[j>>2];if((c[c[(c[g>>2]|0)+4>>2]>>2]|0)!=1){g=0;g=g&1;i=h;return g|0}if(c[c[(c[g>>2]|0)+8>>2]>>2]|0){g=0;g=g&1;i=h;return g|0}if(((c[(c[(c[g>>2]|0)+4>>2]|0)+4>>2]|0)%2|0|0)!=1){g=0;g=g&1;i=h;return g|0}if((c[(c[f>>2]|0)+164>>2]&64|0)!=0?(c[(c[(c[g>>2]|0)+4>>2]|0)+4>>2]|0)>=173:0){g=0;g=g&1;i=h;return g|0}if((c[(c[f>>2]|0)+164>>2]&8|0)!=0?(c[(c[(c[g>>2]|0)+4>>2]|0)+4>>2]|0)<=16:0){g=0;g=g&1;i=h;return g|0}if(!(gd(c[(c[(c[g>>2]|0)+4>>2]|0)+4>>2]|0)|0)){g=0;g=g&1;i=h;return g|0}g=(c[(c[g>>2]|0)+20>>2]|0)==(c[(c[e>>2]|0)+8>>2]|0);g=g&1;i=h;return g|0}function _l(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;o=i;i=i+48|0;q=o+40|0;e=o+36|0;f=o+32|0;p=o+28|0;k=o+24|0;m=o+20|0;l=o+16|0;n=o+12|0;g=o+8|0;h=o+4|0;j=o;c[q>>2]=a;c[e>>2]=b;c[f>>2]=d;c[p>>2]=c[q>>2];c[m>>2]=c[(c[p>>2]|0)+68>>2];c[l>>2]=c[(c[p>>2]|0)+72>>2];c[n>>2]=c[(c[p>>2]|0)+76>>2];c[g>>2]=c[c[(c[p>>2]|0)+64>>2]>>2];c[j>>2]=c[m>>2]<<2;b=c[j>>2]|0;if((c[j>>2]|0)>>>0<65536){d=i;i=i+((1*b|0)+15&-16)|0;c[h>>2]=d}else c[h>>2]=wb(b)|0;em(c[m>>2]|0,c[e>>2]|0,c[l>>2]|0,c[h>>2]|0,c[f>>2]|0);c[k>>2]=1;while(1){if(((c[k>>2]|0)+(c[k>>2]|0)|0)>=(c[m>>2]|0))break;l=(c[f>>2]|0)+((_(c[k>>2]|0,c[n>>2]|0)|0)<<2)|0;fm(c[m>>2]|0,c[h>>2]|0,c[g>>2]|0,l,(c[f>>2]|0)+((_((c[m>>2]|0)-(c[k>>2]|0)|0,c[n>>2]|0)|0)<<2)|0);c[g>>2]=(c[g>>2]|0)+((c[m>>2]|0)-1<<2);c[k>>2]=(c[k>>2]|0)+1}if((c[j>>2]|0)>>>0<65536){i=o;return}xb(c[h>>2]|0);i=o;return}function $l(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;o=i;i=i+48|0;q=o+40|0;e=o+36|0;f=o+32|0;p=o+28|0;k=o+24|0;m=o+20|0;l=o+16|0;n=o+12|0;g=o+8|0;h=o+4|0;j=o;c[q>>2]=a;c[e>>2]=b;c[f>>2]=d;c[p>>2]=c[q>>2];c[m>>2]=c[(c[p>>2]|0)+68>>2];c[l>>2]=c[(c[p>>2]|0)+72>>2];c[n>>2]=c[(c[p>>2]|0)+76>>2];c[g>>2]=c[c[(c[p>>2]|0)+64>>2]>>2];c[j>>2]=c[m>>2]<<2;b=c[j>>2]|0;if((c[j>>2]|0)>>>0<65536){d=i;i=i+((1*b|0)+15&-16)|0;c[h>>2]=d}else c[h>>2]=wb(b)|0;cm(c[m>>2]|0,c[e>>2]|0,c[l>>2]|0,c[h>>2]|0,c[f>>2]|0);c[k>>2]=1;while(1){if(((c[k>>2]|0)+(c[k>>2]|0)|0)>=(c[m>>2]|0))break;l=(c[f>>2]|0)+((_(c[k>>2]|0,c[n>>2]|0)|0)<<2)|0;dm(c[m>>2]|0,c[h>>2]|0,c[g>>2]|0,l,(c[f>>2]|0)+((_((c[m>>2]|0)-(c[k>>2]|0)|0,c[n>>2]|0)|0)<<2)|0);c[g>>2]=(c[g>>2]|0)+((c[m>>2]|0)-1<<2);c[k>>2]=(c[k>>2]|0)+1}if((c[j>>2]|0)>>>0<65536){i=o;return}xb(c[h>>2]|0);i=o;return}function am(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;f=d+4|0;e=d;c[g>>2]=a;c[f>>2]=b;c[e>>2]=c[g>>2];Me(c[f>>2]|0,(c[e>>2]|0)+64|0,18172,c[(c[e>>2]|0)+68>>2]|0,c[(c[e>>2]|0)+68>>2]|0,((c[(c[e>>2]|0)+68>>2]|0)-1|0)/2|0);i=d;return}function bm(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;d=i;i=i+32|0;e=d;h=d+16|0;f=d+12|0;g=d+8|0;c[h>>2]=a;c[f>>2]=b;c[g>>2]=c[h>>2];b=c[c[f>>2]>>2]|0;a=c[f>>2]|0;f=c[(c[g>>2]|0)+68>>2]|0;c[e>>2]=(c[(c[g>>2]|0)+80>>2]|0)==0?23580:23585;c[e+4>>2]=f;eb[b&63](a,22811,e);i=d;return}function cm(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0.0;p=i;i=i+32|0;h=p+24|0;j=p+20|0;k=p+16|0;l=p+12|0;m=p+8|0;n=p+4|0;o=p;c[h>>2]=a;c[j>>2]=b;c[k>>2]=d;c[l>>2]=e;c[m>>2]=f;q=+g[c[j>>2]>>2];g[o>>2]=q;g[c[l>>2]>>2]=q;c[l>>2]=(c[l>>2]|0)+4;c[n>>2]=1;while(1){if(((c[n>>2]|0)+(c[n>>2]|0)|0)>=(c[h>>2]|0))break;f=_(c[n>>2]|0,c[k>>2]|0)|0;e=_(c[n>>2]|0,c[k>>2]|0)|0;q=+g[(c[j>>2]|0)+(f<<2)>>2]+ +g[(c[j>>2]|0)+(e<<2)>>2];g[c[l>>2]>>2]=q;g[o>>2]=+g[o>>2]+q;e=_((c[h>>2]|0)-(c[n>>2]|0)|0,c[k>>2]|0)|0;f=_((c[h>>2]|0)-(c[n>>2]|0)|0,c[k>>2]|0)|0;g[(c[l>>2]|0)+4>>2]=+g[(c[j>>2]|0)+(e<<2)>>2]+ +g[(c[j>>2]|0)+(f<<2)>>2];c[l>>2]=(c[l>>2]|0)+8;c[n>>2]=(c[n>>2]|0)+1}g[c[m>>2]>>2]=+g[o>>2];i=p;return}function dm(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;q=i;i=i+32|0;h=q+28|0;j=q+24|0;k=q+20|0;l=q+16|0;m=q+12|0;n=q+8|0;p=q+4|0;o=q;c[h>>2]=a;c[j>>2]=b;c[k>>2]=d;c[l>>2]=e;c[m>>2]=f;g[p>>2]=+g[c[j>>2]>>2];g[o>>2]=0.0;c[j>>2]=(c[j>>2]|0)+4;c[n>>2]=1;while(1){if(((c[n>>2]|0)+(c[n>>2]|0)|0)>=(c[h>>2]|0))break;g[p>>2]=+g[p>>2]+ +g[c[j>>2]>>2]*+g[c[k>>2]>>2];g[o>>2]=+g[o>>2]+ +g[(c[j>>2]|0)+4>>2]*+g[(c[k>>2]|0)+4>>2];c[j>>2]=(c[j>>2]|0)+8;c[k>>2]=(c[k>>2]|0)+8;c[n>>2]=(c[n>>2]|0)+1}g[c[l>>2]>>2]=+g[p>>2]-+g[o>>2];g[c[m>>2]>>2]=+g[p>>2]+ +g[o>>2];i=q;return}function em(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0.0;r=i;i=i+48|0;h=r+32|0;j=r+28|0;k=r+24|0;l=r+20|0;m=r+16|0;p=r+12|0;q=r+8|0;n=r+4|0;o=r;c[h>>2]=a;c[j>>2]=b;c[k>>2]=d;c[l>>2]=e;c[m>>2]=f;s=+g[c[j>>2]>>2];g[q>>2]=s;g[c[l>>2]>>2]=s;c[l>>2]=(c[l>>2]|0)+4;c[p>>2]=1;while(1){if(((c[p>>2]|0)+(c[p>>2]|0)|0)>=(c[h>>2]|0))break;e=_(c[p>>2]|0,c[k>>2]|0)|0;g[n>>2]=+g[(c[j>>2]|0)+(e<<2)>>2];e=_((c[h>>2]|0)-(c[p>>2]|0)|0,c[k>>2]|0)|0;g[o>>2]=+g[(c[j>>2]|0)+(e<<2)>>2];s=+g[n>>2]+ +g[o>>2];g[c[l>>2]>>2]=s;g[q>>2]=+g[q>>2]+s;g[(c[l>>2]|0)+4>>2]=+g[o>>2]-+g[n>>2];c[l>>2]=(c[l>>2]|0)+8;c[p>>2]=(c[p>>2]|0)+1}g[c[m>>2]>>2]=+g[q>>2];i=r;return}function fm(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;q=i;i=i+32|0;h=q+28|0;j=q+24|0;k=q+20|0;l=q+16|0;m=q+12|0;n=q+8|0;p=q+4|0;o=q;c[h>>2]=a;c[j>>2]=b;c[k>>2]=d;c[l>>2]=e;c[m>>2]=f;g[p>>2]=+g[c[j>>2]>>2];g[o>>2]=0.0;c[j>>2]=(c[j>>2]|0)+4;c[n>>2]=1;while(1){if(((c[n>>2]|0)+(c[n>>2]|0)|0)>=(c[h>>2]|0))break;g[p>>2]=+g[p>>2]+ +g[c[j>>2]>>2]*+g[c[k>>2]>>2];g[o>>2]=+g[o>>2]+ +g[(c[j>>2]|0)+4>>2]*+g[(c[k>>2]|0)+4>>2];c[j>>2]=(c[j>>2]|0)+8;c[k>>2]=(c[k>>2]|0)+8;c[n>>2]=(c[n>>2]|0)+1}g[c[l>>2]>>2]=+g[p>>2];g[c[m>>2]>>2]=+g[o>>2];i=q;return}function gm(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;h=e+8|0;g=e+4|0;f=e;c[h>>2]=a;c[g>>2]=b;c[f>>2]=d;hm(c[h>>2]|0,c[g>>2]|0,c[f>>2]|0,0);hm(c[h>>2]|0,c[g>>2]|0,c[f>>2]|0,1);i=e;return}function hm(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0;l=i;i=i+32|0;f=l+16|0;g=l+12|0;h=l+8|0;j=l+4|0;k=l;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[k>>2]=Hm(28,c[c[h>>2]>>2]|0,3)|0;c[(c[k>>2]|0)+20>>2]=c[g>>2];c[(c[k>>2]|0)+16>>2]=c[h>>2];c[(c[k>>2]|0)+24>>2]=c[j>>2];Bd(c[f>>2]|0,c[k>>2]|0);if(!(c[3496]|0)){i=l;return}c[k>>2]=Va[c[13984>>2]&63](28,c[c[h>>2]>>2]|0,3)|0;c[(c[k>>2]|0)+20>>2]=c[g>>2];c[(c[k>>2]|0)+16>>2]=c[h>>2];c[(c[k>>2]|0)+24>>2]=c[j>>2];Bd(c[f>>2]|0,c[k>>2]|0);i=l;return}function im(a,b,d,e,f,g,j,k,l,m,n){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;var o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0.0;G=i;i=i+80|0;E=G+72|0;H=G+68|0;q=G+64|0;r=G+60|0;s=G+56|0;t=G+52|0;u=G+48|0;v=G+44|0;w=G+40|0;x=G+36|0;p=G+32|0;o=G+28|0;B=G+24|0;F=G+20|0;A=G+16|0;y=G+12|0;z=G+8|0;C=G+4|0;D=G;c[H>>2]=a;c[q>>2]=b;c[r>>2]=d;c[s>>2]=e;c[t>>2]=f;c[u>>2]=g;c[v>>2]=j;c[w>>2]=k;c[x>>2]=l;c[p>>2]=m;c[o>>2]=n;c[B>>2]=c[H>>2];c[A>>2]=c[(c[B>>2]|0)+16>>2];c[y>>2]=0;c[z>>2]=0;c[C>>2]=_((c[s>>2]|0)/2|0,c[t>>2]|0)|0;c[D>>2]=_(c[s>>2]|0,c[t>>2]|0)|0;if(!(jm(c[B>>2]|0,c[q>>2]|0,c[r>>2]|0,c[s>>2]|0,c[u>>2]|0,c[o>>2]|0)|0)){c[E>>2]=0;y=c[E>>2]|0;i=G;return y|0}m=c[o>>2]|0;if(!(c[w>>2]|0))a=Ed(c[r>>2]|0,c[D>>2]|0,c[D>>2]|0)|0;else a=Dd()|0;j=Dd()|0;c[y>>2]=uc(m,In(a,j,c[p>>2]|0,c[p>>2]|0,c[q>>2]|0)|0)|0;if(c[y>>2]|0){a=c[o>>2]|0;if(((c[w>>2]|0)+(c[x>>2]|0)<<1|0)==((c[s>>2]|0)+2|0))m=Ed(c[r>>2]|0,c[D>>2]|0,c[D>>2]|0)|0;else m=Dd()|0;o=Dd()|0;c[z>>2]=uc(a,In(m,o,(c[p>>2]|0)+(c[C>>2]<<2)|0,(c[p>>2]|0)+(c[C>>2]<<2)|0,(c[q>>2]|0)==0?1:6)|0)|0;if(c[z>>2]|0){c[F>>2]=Im(120,13952,(c[(c[B>>2]|0)+24>>2]|0)!=0?103:102)|0;c[(c[F>>2]|0)+64>>2]=c[(c[B>>2]|0)+20>>2];c[(c[F>>2]|0)+112>>2]=0;c[(c[F>>2]|0)+76>>2]=c[r>>2];c[(c[F>>2]|0)+104>>2]=c[D>>2];c[(c[F>>2]|0)+80>>2]=c[s>>2];c[(c[F>>2]|0)+88>>2]=c[t>>2];c[(c[F>>2]|0)+84>>2]=c[u>>2];c[(c[F>>2]|0)+92>>2]=c[v>>2];c[(c[F>>2]|0)+116>>2]=c[B>>2];v=(mm(c[r>>2]|0)|0)<<1;c[(c[F>>2]|0)+108>>2]=v;c[(c[F>>2]|0)+68>>2]=c[y>>2];c[(c[F>>2]|0)+72>>2]=c[z>>2];c[(c[F>>2]|0)+96>>2]=(c[w>>2]|0)+((c[w>>2]|0)==0&1);c[(c[F>>2]|0)+100>>2]=(c[w>>2]|0)+(c[x>>2]|0)-(((c[w>>2]|0)+(c[x>>2]|0)<<1|0)==((c[s>>2]|0)+2|0)&1);fc((c[F>>2]|0)+8|0);x=_(c[u>>2]|0,((c[(c[F>>2]|0)+100>>2]|0)-(c[(c[F>>2]|0)+96>>2]|0)|0)/(c[(c[(c[A>>2]|0)+12>>2]|0)+4>>2]|0)|0)|0;lc(x,(c[A>>2]|0)+16|0,(c[F>>2]|0)+8|0);lc(c[u>>2]|0,(c[y>>2]|0)+8|0,(c[F>>2]|0)+8|0);lc(c[u>>2]|0,(c[z>>2]|0)+8|0,(c[F>>2]|0)+8|0);if(c[(c[B>>2]|0)+24>>2]|0){y=_(c[r>>2]<<2,(c[(c[F>>2]|0)+100>>2]|0)-(c[(c[F>>2]|0)+96>>2]|0)|0)|0;I=+(_(y,c[u>>2]|0)|0);y=(c[F>>2]|0)+8+24|0;h[y>>3]=+h[y>>3]+I}if(((c[r>>2]|0)>=5?(c[(c[B>>2]|0)+24>>2]|0)==0:0)&(c[r>>2]|0)<64)a=(c[s>>2]|0)>=(c[r>>2]|0);else a=0;c[(c[F>>2]|0)+52>>2]=a&1;c[E>>2]=c[F>>2];y=c[E>>2]|0;i=G;return y|0}}pc(c[y>>2]|0);pc(c[z>>2]|0);c[E>>2]=0;y=c[E>>2]|0;i=G;return y|0}function jm(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;o=i;i=i+32|0;h=o+24|0;j=o+20|0;p=o+16|0;k=o+12|0;l=o+8|0;m=o+4|0;n=o;c[j>>2]=a;c[p>>2]=b;c[k>>2]=d;c[l>>2]=e;c[m>>2]=f;c[n>>2]=g;if(!(rm(c[j>>2]|0,c[p>>2]|0,c[k>>2]|0)|0)){c[h>>2]=0;g=c[h>>2]|0;i=o;return g|0}if((c[(c[n>>2]|0)+164>>2]&65536|0)!=0?(g=_(c[l>>2]|0,c[k>>2]|0)|0,(Qb((c[(c[j>>2]|0)+24>>2]|0)!=0?512:16,c[m>>2]|0,g,c[k>>2]|0)|0)!=0):0){c[h>>2]=0;g=c[h>>2]|0;i=o;return g|0}c[h>>2]=1;g=c[h>>2]|0;i=o;return g|0}function km(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0;w=i;i=i+80|0;e=w+68|0;f=w+64|0;o=w+60|0;m=w+56|0;n=w+52|0;p=w+48|0;q=w+44|0;r=w+40|0;v=w+36|0;d=w+32|0;s=w+28|0;t=w+24|0;u=w+20|0;j=w+16|0;k=w+12|0;l=w+8|0;h=w+4|0;g=w;c[e>>2]=a;c[f>>2]=b;c[o>>2]=c[e>>2];c[m>>2]=c[(c[o>>2]|0)+68>>2];c[n>>2]=c[(c[o>>2]|0)+72>>2];c[r>>2]=c[(c[o>>2]|0)+80>>2];c[v>>2]=c[(c[o>>2]|0)+84>>2];c[d>>2]=c[(c[o>>2]|0)+76>>2];c[s>>2]=c[(c[o>>2]|0)+96>>2];c[t>>2]=c[(c[o>>2]|0)+100>>2];c[u>>2]=c[(c[o>>2]|0)+88>>2];c[j>>2]=mm(c[d>>2]|0)|0;c[l>>2]=(_(c[d>>2]|0,c[j>>2]|0)|0)<<1<<2;b=c[l>>2]|0;if((c[l>>2]|0)>>>0<65536){a=i;i=i+((1*b|0)+15&-16)|0;c[k>>2]=a}else c[k>>2]=wb(b)|0;c[p>>2]=0;while(1){if((c[p>>2]|0)>=(c[v>>2]|0))break;c[h>>2]=c[f>>2];c[g>>2]=(c[f>>2]|0)+((_(c[r>>2]|0,c[u>>2]|0)|0)<<2);eb[c[(c[m>>2]|0)+56>>2]&63](c[m>>2]|0,c[f>>2]|0,c[f>>2]|0);c[q>>2]=c[s>>2];while(1){a=c[o>>2]|0;b=c[h>>2]|0;d=c[g>>2]|0;e=c[q>>2]|0;if(((c[q>>2]|0)+(c[j>>2]|0)|0)>=(c[t>>2]|0))break;qm(a,b,d,e,(c[q>>2]|0)+(c[j>>2]|0)|0,c[k>>2]|0);c[q>>2]=(c[q>>2]|0)+(c[j>>2]|0)}qm(a,b,d,e,c[t>>2]|0,c[k>>2]|0);a=(c[f>>2]|0)+((_(c[u>>2]|0,(c[r>>2]|0)/2|0)|0)<<2)|0;b=(c[f>>2]|0)+((_(c[u>>2]|0,(c[r>>2]|0)/2|0)|0)<<2)|0;eb[c[(c[n>>2]|0)+56>>2]&63](c[n>>2]|0,a,b);c[p>>2]=(c[p>>2]|0)+1;c[f>>2]=(c[f>>2]|0)+(c[(c[o>>2]|0)+92>>2]<<2)}if((c[l>>2]|0)>>>0<65536){i=w;return}xb(c[k>>2]|0);i=w;return}function lm(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;p=i;i=i+48|0;q=p+44|0;d=p+40|0;g=p+36|0;e=p+32|0;f=p+28|0;h=p+24|0;j=p+20|0;n=p+16|0;k=p+12|0;l=p+8|0;m=p+4|0;o=p;c[q>>2]=a;c[d>>2]=b;c[g>>2]=c[q>>2];c[e>>2]=c[(c[g>>2]|0)+68>>2];c[f>>2]=c[(c[g>>2]|0)+72>>2];c[j>>2]=c[(c[g>>2]|0)+80>>2];c[n>>2]=c[(c[g>>2]|0)+84>>2];c[k>>2]=c[(c[g>>2]|0)+96>>2];c[l>>2]=c[(c[g>>2]|0)+100>>2];c[m>>2]=c[(c[g>>2]|0)+88>>2];c[o>>2]=c[(c[g>>2]|0)+92>>2];c[h>>2]=0;while(1){if((c[h>>2]|0)>=(c[n>>2]|0))break;eb[c[(c[e>>2]|0)+56>>2]&63](c[e>>2]|0,c[d>>2]|0,c[d>>2]|0);a=(c[d>>2]|0)+((_(c[m>>2]|0,c[k>>2]|0)|0)<<2)|0;b=(c[d>>2]|0)+((_((c[j>>2]|0)-(c[k>>2]|0)|0,c[m>>2]|0)|0)<<2)|0;Xa[c[(c[g>>2]|0)+64>>2]&127](a,b,c[c[(c[g>>2]|0)+112>>2]>>2]|0,c[(c[g>>2]|0)+104>>2]|0,c[k>>2]|0,c[l>>2]|0,c[m>>2]|0);b=(c[d>>2]|0)+((_((c[j>>2]|0)/2|0,c[m>>2]|0)|0)<<2)|0;a=(c[d>>2]|0)+((_((c[j>>2]|0)/2|0,c[m>>2]|0)|0)<<2)|0;eb[c[(c[f>>2]|0)+56>>2]&63](c[f>>2]|0,b,a);c[h>>2]=(c[h>>2]|0)+1;c[d>>2]=(c[d>>2]|0)+(c[o>>2]<<2)}i=p;return}function mm(a){a=a|0;var b=0,d=0;d=i;i=i+16|0;b=d;c[b>>2]=a;c[b>>2]=(c[b>>2]|0)+3;c[b>>2]=c[b>>2]&-4;i=d;return (c[b>>2]|0)+2|0}function nm(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;f=d+4|0;e=d;c[g>>2]=a;c[f>>2]=b;c[e>>2]=c[g>>2];rc(c[(c[e>>2]|0)+68>>2]|0,c[f>>2]|0);rc(c[(c[e>>2]|0)+72>>2]|0,c[f>>2]|0);a=_(c[(c[e>>2]|0)+76>>2]|0,c[(c[e>>2]|0)+80>>2]|0)|0;Me(c[f>>2]|0,(c[e>>2]|0)+112|0,c[(c[(c[(c[e>>2]|0)+116>>2]|0)+16>>2]|0)+8>>2]|0,a,c[(c[e>>2]|0)+76>>2]|0,((c[(c[e>>2]|0)+80>>2]|0)-1|0)/2|0);i=d;return}function om(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;j=i;i=i+80|0;h=j+32|0;g=j;m=j+76|0;l=j+72|0;f=j+68|0;k=j+64|0;e=j+60|0;d=j+56|0;c[m>>2]=a;c[l>>2]=b;c[f>>2]=c[m>>2];c[k>>2]=c[(c[f>>2]|0)+116>>2];c[e>>2]=c[(c[k>>2]|0)+16>>2];c[d>>2]=mm(c[(c[f>>2]|0)+76>>2]|0)|0;b=c[c[l>>2]>>2]|0;a=c[l>>2]|0;if(c[(c[k>>2]|0)+24>>2]|0){o=c[d>>2]|0;n=c[(c[f>>2]|0)+76>>2]|0;m=Le(c[(c[f>>2]|0)+76>>2]|0,c[(c[e>>2]|0)+8>>2]|0)|0;d=c[(c[f>>2]|0)+84>>2]|0;h=c[(c[e>>2]|0)+4>>2]|0;k=c[(c[f>>2]|0)+68>>2]|0;l=c[(c[f>>2]|0)+72>>2]|0;c[g>>2]=o;c[g+4>>2]=n;c[g+8>>2]=m;c[g+12>>2]=d;c[g+16>>2]=h;c[g+20>>2]=k;c[g+24>>2]=l;eb[b&63](a,22832,g);i=j;return}else{n=c[(c[f>>2]|0)+76>>2]|0;m=Le(c[(c[f>>2]|0)+76>>2]|0,c[(c[e>>2]|0)+8>>2]|0)|0;d=c[(c[f>>2]|0)+84>>2]|0;g=c[(c[e>>2]|0)+4>>2]|0;k=c[(c[f>>2]|0)+68>>2]|0;l=c[(c[f>>2]|0)+72>>2]|0;c[h>>2]=n;c[h+4>>2]=m;c[h+8>>2]=d;c[h+12>>2]=g;c[h+16>>2]=k;c[h+20>>2]=l;eb[b&63](a,22878,h);i=j;return}}function pm(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+72>>2]|0);i=b;return}function qm(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0;h=i;i=i+48|0;t=h+40|0;s=h+36|0;q=h+32|0;k=h+28|0;l=h+24|0;r=h+20|0;n=h+16|0;m=h+12|0;o=h+8|0;j=h+4|0;p=h;c[t>>2]=a;c[s>>2]=b;c[q>>2]=d;c[k>>2]=e;c[l>>2]=f;c[r>>2]=g;c[n>>2]=c[(c[t>>2]|0)+108>>2];c[m>>2]=c[(c[t>>2]|0)+104>>2];c[o>>2]=c[(c[t>>2]|0)+76>>2];c[j>>2]=c[(c[t>>2]|0)+88>>2];c[p>>2]=(c[r>>2]|0)+(c[n>>2]<<2)+-4;a=(c[s>>2]|0)+((_(c[k>>2]|0,c[j>>2]|0)|0)<<2)|0;Kb(a,c[r>>2]|0,c[o>>2]|0,c[m>>2]|0,c[n>>2]|0,(c[l>>2]|0)-(c[k>>2]|0)|0,c[j>>2]|0,1,1);a=(c[q>>2]|0)+(0-(_(c[k>>2]|0,c[j>>2]|0)|0)<<2)|0;Kb(a,c[p>>2]|0,c[o>>2]|0,c[m>>2]|0,c[n>>2]|0,(c[l>>2]|0)-(c[k>>2]|0)|0,0-(c[j>>2]|0)|0,-1,1);Xa[c[(c[t>>2]|0)+64>>2]&127](c[r>>2]|0,c[p>>2]|0,c[c[(c[t>>2]|0)+112>>2]>>2]|0,c[(c[t>>2]|0)+108>>2]|0,c[k>>2]|0,c[l>>2]|0,1);a=(c[s>>2]|0)+((_(c[k>>2]|0,c[j>>2]|0)|0)<<2)|0;Lb(c[r>>2]|0,a,c[o>>2]|0,c[n>>2]|0,c[m>>2]|0,(c[l>>2]|0)-(c[k>>2]|0)|0,1,c[j>>2]|0,1);a=(c[q>>2]|0)+(0-(_(c[k>>2]|0,c[j>>2]|0)|0)<<2)|0;Lb(c[p>>2]|0,a,c[o>>2]|0,c[n>>2]|0,c[m>>2]|0,(c[l>>2]|0)-(c[k>>2]|0)|0,-1,0-(c[j>>2]|0)|0,1);i=h;return}function rm(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;g=i;i=i+16|0;j=g+12|0;e=g+8|0;h=g+4|0;f=g;c[j>>2]=a;c[e>>2]=b;c[h>>2]=d;c[f>>2]=c[(c[j>>2]|0)+16>>2];if((c[h>>2]|0)!=(c[c[f>>2]>>2]|0)){b=0;b=b&1;i=g;return b|0}b=(c[e>>2]|0)==(c[c[(c[f>>2]|0)+12>>2]>>2]|0);b=b&1;i=g;return b|0}function sm(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;tm(c[d>>2]|0,0);i=b;return}function tm(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;g=i;i=i+16|0;d=g+8|0;e=g+4|0;f=g;c[d>>2]=a;c[e>>2]=b;c[f>>2]=Hm(16,c[e>>2]|0,4)|0;Bd(c[d>>2]|0,c[f>>2]|0);if(!(c[3496]|0)){i=g;return}c[f>>2]=Va[c[13984>>2]&63](16,c[e>>2]|0,4)|0;Bd(c[d>>2]|0,c[f>>2]|0);i=g;return}function um(a,b,d,e,f,g,j,k,l,m,n){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;var o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0;G=i;i=i+80|0;q=G+76|0;t=G+68|0;u=G+64|0;v=G+60|0;w=G+56|0;x=G+52|0;y=G+48|0;p=G+44|0;o=G+40|0;r=G+36|0;s=G+32|0;F=G+28|0;A=G+24|0;z=G+20|0;C=G+16|0;B=G+12|0;D=G+8|0;E=G;c[G+72>>2]=a;c[t>>2]=b;c[u>>2]=d;c[v>>2]=e;c[w>>2]=f;c[x>>2]=g;c[y>>2]=j;c[p>>2]=k;c[o>>2]=l;c[r>>2]=m;c[s>>2]=n;c[A>>2]=0;c[z>>2]=0;if(!(vm(c[t>>2]|0,c[u>>2]|0,c[v>>2]|0,c[s>>2]|0)|0)){c[q>>2]=0;y=c[q>>2]|0;i=G;return y|0}c[C>>2]=(c[p>>2]|0)+((c[p>>2]|0)==0&1);c[B>>2]=(c[o>>2]|0)-((c[p>>2]|0)==0&1);c[D>>2]=(c[v>>2]|0)-((c[p>>2]|0)+(c[o>>2]|0)-1)-(c[C>>2]|0);o=c[s>>2]|0;if(!(c[p>>2]|0)){m=_(c[v>>2]|0,c[w>>2]|0)|0;m=Ed(c[u>>2]|0,m,_(c[v>>2]|0,c[w>>2]|0)|0)|0}else m=Dd()|0;p=Ed(c[x>>2]|0,c[y>>2]|0,c[y>>2]|0)|0;c[A>>2]=uc(o,In(m,p,c[r>>2]|0,c[r>>2]|0,c[t>>2]|0)|0)|0;if((c[A>>2]|0)!=0?(b=c[s>>2]|0,g=_(c[v>>2]|0,c[w>>2]|0)|0,g=Ed(c[u>>2]|0,g,_(c[v>>2]|0,c[w>>2]|0)|0)|0,p=_(c[D>>2]|0,c[w>>2]|0)|0,j=_(c[D>>2]|0,c[w>>2]|0)|0,j=Gd(2,p,j,c[B>>2]|0,c[w>>2]|0,c[w>>2]|0,c[x>>2]|0,c[y>>2]|0,c[y>>2]|0)|0,p=(c[r>>2]|0)+((_(c[w>>2]|0,c[C>>2]|0)|0)<<2)|0,s=(c[r>>2]|0)+((_(c[w>>2]|0,c[C>>2]|0)|0)<<2)|0,c[z>>2]=uc(b,In(g,j,p,s,c[t>>2]|0)|0)|0,(c[z>>2]|0)!=0):0){c[F>>2]=Im(104,13968,(c[t>>2]|0)==0?105:104)|0;c[(c[F>>2]|0)+96>>2]=c[z>>2];c[(c[F>>2]|0)+92>>2]=c[A>>2];c[(c[F>>2]|0)+64>>2]=c[u>>2];c[(c[F>>2]|0)+68>>2]=c[v>>2];c[(c[F>>2]|0)+72>>2]=c[w>>2];c[(c[F>>2]|0)+76>>2]=c[x>>2];c[(c[F>>2]|0)+80>>2]=c[y>>2];c[(c[F>>2]|0)+100>>2]=0;c[(c[F>>2]|0)+84>>2]=c[C>>2];c[(c[F>>2]|0)+88>>2]=c[B>>2];h[E>>3]=+((c[u>>2]|0)-1|0)*.5*+(c[B>>2]<<1|0)*+(c[x>>2]|0);y=(c[F>>2]|0)+8|0;x=(c[z>>2]|0)+8|0;c[y>>2]=c[x>>2];c[y+4>>2]=c[x+4>>2];c[y+8>>2]=c[x+8>>2];c[y+12>>2]=c[x+12>>2];c[y+16>>2]=c[x+16>>2];c[y+20>>2]=c[x+20>>2];c[y+24>>2]=c[x+24>>2];c[y+28>>2]=c[x+28>>2];y=(c[F>>2]|0)+8+8|0;h[y>>3]=+h[y>>3]+((c[t>>2]|0)==0?5.0:7.0)*+h[E>>3];y=(c[F>>2]|0)+8|0;h[y>>3]=+h[y>>3]+ +h[E>>3]*4.0;y=(c[F>>2]|0)+8+24|0;h[y>>3]=+h[y>>3]+ +h[E>>3]*11.0;c[q>>2]=c[F>>2];y=c[q>>2]|0;i=G;return y|0}pc(c[z>>2]|0);pc(c[A>>2]|0);c[q>>2]=0;y=c[q>>2]|0;i=G;return y|0}function vm(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;g=i;i=i+16|0;k=g+12|0;h=g+8|0;j=g+4|0;f=g;c[k>>2]=a;c[h>>2]=b;c[j>>2]=d;c[f>>2]=e;if(((c[k>>2]|0)==0|(c[k>>2]|0)==4?((c[j>>2]|0)%2|0|0)!=0:0)?((c[h>>2]|0)%2|0|0)!=0:0)f=(c[(c[f>>2]|0)+164>>2]&8|0)!=0^1;else f=0;i=g;return f&1|0}function wm(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0;d=i;i=i+32|0;k=d+20|0;e=d+16|0;f=d+12|0;g=d+8|0;h=d+4|0;j=d;c[k>>2]=a;c[e>>2]=b;c[f>>2]=c[k>>2];Dm(c[f>>2]|0,c[e>>2]|0,-1.0);c[j>>2]=c[(c[f>>2]|0)+92>>2];eb[c[(c[j>>2]|0)+56>>2]&63](c[(c[f>>2]|0)+92>>2]|0,c[e>>2]|0,c[e>>2]|0);c[g>>2]=_(c[(c[f>>2]|0)+84>>2]|0,c[(c[f>>2]|0)+72>>2]|0)|0;c[h>>2]=c[(c[f>>2]|0)+96>>2];eb[c[(c[h>>2]|0)+56>>2]&63](c[(c[f>>2]|0)+96>>2]|0,(c[e>>2]|0)+(c[g>>2]<<2)|0,(c[e>>2]|0)+(c[g>>2]<<2)|0);Fm(c[f>>2]|0,c[e>>2]|0);i=d;return}function xm(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0;d=i;i=i+32|0;k=d+20|0;e=d+16|0;f=d+12|0;g=d+8|0;h=d+4|0;j=d;c[k>>2]=a;c[e>>2]=b;c[f>>2]=c[k>>2];Cm(c[f>>2]|0,c[e>>2]|0);c[j>>2]=c[(c[f>>2]|0)+92>>2];eb[c[(c[j>>2]|0)+56>>2]&63](c[(c[f>>2]|0)+92>>2]|0,c[e>>2]|0,c[e>>2]|0);c[g>>2]=_(c[(c[f>>2]|0)+84>>2]|0,c[(c[f>>2]|0)+72>>2]|0)|0;c[h>>2]=c[(c[f>>2]|0)+96>>2];eb[c[(c[h>>2]|0)+56>>2]&63](c[(c[f>>2]|0)+96>>2]|0,(c[e>>2]|0)+(c[g>>2]<<2)|0,(c[e>>2]|0)+(c[g>>2]<<2)|0);Dm(c[f>>2]|0,c[e>>2]|0,1.0);i=d;return}function ym(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+92>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+96>>2]|0,c[e>>2]|0);Bm(c[f>>2]|0,c[e>>2]|0);i=d;return}function zm(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0;d=i;i=i+48|0;e=d;j=d+32|0;k=d+28|0;l=d+24|0;c[j>>2]=a;c[k>>2]=b;c[l>>2]=c[j>>2];b=c[c[k>>2]>>2]|0;a=c[k>>2]|0;k=c[(c[l>>2]|0)+64>>2]|0;j=c[(c[l>>2]|0)+68>>2]|0;h=c[(c[l>>2]|0)+76>>2]|0;g=c[(c[l>>2]|0)+92>>2]|0;f=c[(c[l>>2]|0)+96>>2]|0;c[e>>2]=(c[(c[l>>2]|0)+56>>2]|0)==105?22957:22961;c[e+4>>2]=k;c[e+8>>2]=j;c[e+12>>2]=h;c[e+16>>2]=g;c[e+20>>2]=f;eb[b&63](a,22918,e);i=d;return}function Am(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+96>>2]|0);pc(c[(c[d>>2]|0)+92>>2]|0);i=b;return}function Bm(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;e=d+4|0;f=d;c[e>>2]=a;c[f>>2]=b;a=_(c[(c[e>>2]|0)+64>>2]|0,c[(c[e>>2]|0)+68>>2]|0)|0;Me(c[f>>2]|0,(c[e>>2]|0)+100|0,18180,a,c[(c[e>>2]|0)+68>>2]|0,c[(c[e>>2]|0)+64>>2]|0);i=d;return}function Cm(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0;y=i;i=i+80|0;z=y+76|0;d=y+72|0;f=y+68|0;l=y+64|0;s=y+60|0;m=y+56|0;v=y+52|0;w=y+48|0;x=y+44|0;o=y+40|0;p=y+36|0;n=y+32|0;q=y+28|0;r=y+24|0;e=y+20|0;k=y+16|0;u=y+12|0;j=y+8|0;h=y+4|0;t=y;c[z>>2]=a;c[d>>2]=b;c[s>>2]=c[(c[z>>2]|0)+64>>2];c[m>>2]=c[(c[z>>2]|0)+68>>2];c[v>>2]=c[(c[z>>2]|0)+72>>2];c[w>>2]=c[(c[z>>2]|0)+76>>2];c[x>>2]=c[(c[z>>2]|0)+80>>2];c[o>>2]=_(c[m>>2]|0,c[v>>2]|0)|0;c[p>>2]=c[(c[z>>2]|0)+84>>2];c[n>>2]=(c[p>>2]|0)+(c[(c[z>>2]|0)+88>>2]|0);c[f>>2]=0;while(1){if((c[f>>2]|0)>=(c[w>>2]|0))break;Em(c[d>>2]|0,c[s>>2]|0,c[m>>2]|0,c[v>>2]|0,c[p>>2]|0,c[n>>2]|0);c[l>>2]=1;while(1){if(((c[l>>2]|0)+(c[l>>2]|0)|0)>=(c[s>>2]|0))break;c[q>>2]=(c[d>>2]|0)+((_(c[l>>2]|0,c[o>>2]|0)|0)<<2);c[r>>2]=(c[d>>2]|0)+((_((c[s>>2]|0)-(c[l>>2]|0)|0,c[o>>2]|0)|0)<<2);g[e>>2]=.5;c[k>>2]=c[p>>2];while(1){if((c[k>>2]|0)>=(c[n>>2]|0))break;a=_(c[k>>2]|0,c[v>>2]|0)|0;g[u>>2]=+g[(c[q>>2]|0)+(a<<2)>>2]*.5;a=(c[o>>2]|0)-(_(c[k>>2]|0,c[v>>2]|0)|0)|0;g[h>>2]=+g[(c[r>>2]|0)+(a<<2)>>2]*.5;a=_(c[k>>2]|0,c[v>>2]|0)|0;g[t>>2]=+g[(c[r>>2]|0)+(a<<2)>>2]*.5;a=(c[o>>2]|0)-(_(c[k>>2]|0,c[v>>2]|0)|0)|0;g[j>>2]=+g[(c[q>>2]|0)+(a<<2)>>2]*.5;a=_(c[k>>2]|0,c[v>>2]|0)|0;g[(c[q>>2]|0)+(a<<2)>>2]=+g[u>>2]+ +g[h>>2];a=(c[o>>2]|0)-(_(c[k>>2]|0,c[v>>2]|0)|0)|0;g[(c[r>>2]|0)+(a<<2)>>2]=+g[h>>2]-+g[u>>2];a=_(c[k>>2]|0,c[v>>2]|0)|0;g[(c[r>>2]|0)+(a<<2)>>2]=+g[t>>2]+ +g[j>>2];a=(c[o>>2]|0)-(_(c[k>>2]|0,c[v>>2]|0)|0)|0;g[(c[q>>2]|0)+(a<<2)>>2]=+g[j>>2]-+g[t>>2];c[k>>2]=(c[k>>2]|0)+1}c[l>>2]=(c[l>>2]|0)+1}c[f>>2]=(c[f>>2]|0)+1;c[d>>2]=(c[d>>2]|0)+(c[x>>2]<<2)}i=y;return}function Dm(a,b,d){a=a|0;b=b|0;d=+d;var e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0;C=i;i=i+96|0;e=C+84|0;f=C+80|0;h=C+76|0;k=C+72|0;l=C+68|0;m=C+64|0;t=C+60|0;n=C+56|0;u=C+52|0;v=C+48|0;w=C+44|0;p=C+40|0;q=C+36|0;o=C+32|0;z=C+28|0;j=C+24|0;s=C+20|0;r=C+16|0;B=C+12|0;A=C+8|0;y=C+4|0;x=C;c[e>>2]=a;c[f>>2]=b;g[h>>2]=d;c[t>>2]=c[(c[e>>2]|0)+64>>2];c[n>>2]=c[(c[e>>2]|0)+68>>2];c[u>>2]=c[(c[e>>2]|0)+72>>2];c[v>>2]=c[(c[e>>2]|0)+76>>2];c[w>>2]=c[(c[e>>2]|0)+80>>2];c[p>>2]=_(c[n>>2]|0,c[u>>2]|0)|0;c[q>>2]=c[(c[e>>2]|0)+84>>2];c[o>>2]=c[(c[e>>2]|0)+88>>2];c[z>>2]=(((c[n>>2]|0)-1|0)/2|0)-(c[o>>2]|0)<<1;c[k>>2]=0;while(1){if((c[k>>2]|0)>=(c[v>>2]|0))break;c[j>>2]=c[c[(c[e>>2]|0)+100>>2]>>2];c[m>>2]=1;c[j>>2]=(c[j>>2]|0)+((c[n>>2]|0)-1+((c[q>>2]|0)-1<<1)<<2);while(1){if((c[m>>2]|0)>=(c[t>>2]|0))break;b=(c[f>>2]|0)+((_(c[q>>2]|0,c[u>>2]|0)|0)<<2)|0;c[s>>2]=b+((_(c[m>>2]|0,c[p>>2]|0)|0)<<2);b=(c[f>>2]|0)+(0-(_(c[q>>2]|0,c[u>>2]|0)|0)<<2)|0;c[r>>2]=b+((_((c[m>>2]|0)+1|0,c[p>>2]|0)|0)<<2);c[l>>2]=0;while(1){if((c[l>>2]|0)>=(c[o>>2]|0))break;g[B>>2]=+g[c[s>>2]>>2];g[A>>2]=+g[c[r>>2]>>2];g[y>>2]=+g[c[j>>2]>>2];g[x>>2]=+g[h>>2]*+g[(c[j>>2]|0)+4>>2];g[c[s>>2]>>2]=+g[B>>2]*+g[y>>2]-+g[A>>2]*+g[x>>2];g[c[r>>2]>>2]=+g[A>>2]*+g[y>>2]+ +g[B>>2]*+g[x>>2];c[j>>2]=(c[j>>2]|0)+8;c[l>>2]=(c[l>>2]|0)+1;c[s>>2]=(c[s>>2]|0)+(c[u>>2]<<2);c[r>>2]=(c[r>>2]|0)+(0-(c[u>>2]|0)<<2)}c[j>>2]=(c[j>>2]|0)+(c[z>>2]<<2);c[m>>2]=(c[m>>2]|0)+1}c[k>>2]=(c[k>>2]|0)+1;c[f>>2]=(c[f>>2]|0)+(c[w>>2]<<2)}i=C;return}function Em(a,b,d,e,f,h){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;var j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0;v=i;i=i+64|0;j=v+48|0;k=v+44|0;w=v+40|0;l=v+36|0;m=v+32|0;n=v+28|0;q=v+24|0;r=v+20|0;p=v+16|0;t=v+12|0;s=v+8|0;o=v+4|0;u=v;c[j>>2]=a;c[k>>2]=b;c[w>>2]=d;c[l>>2]=e;c[m>>2]=f;c[n>>2]=h;c[r>>2]=_(c[w>>2]|0,c[l>>2]|0)|0;c[p>>2]=_(c[m>>2]|0,c[l>>2]|0)|0;c[q>>2]=0;while(1){if(((c[q>>2]|0)+(c[q>>2]|0)|0)>=(c[k>>2]|0))break;f=(c[j>>2]|0)+((_((c[q>>2]|0)+1|0,c[r>>2]|0)|0)<<2)|0;c[t>>2]=f+(0-(c[p>>2]|0)<<2);f=(c[j>>2]|0)+((_((c[k>>2]|0)-(c[q>>2]|0)|0,c[r>>2]|0)|0)<<2)|0;c[s>>2]=f+(0-(c[p>>2]|0)<<2);c[o>>2]=c[m>>2];while(1){if((c[o>>2]|0)>=(c[n>>2]|0))break;g[u>>2]=+g[c[t>>2]>>2];g[c[t>>2]>>2]=+g[c[s>>2]>>2];g[c[s>>2]>>2]=+g[u>>2];c[o>>2]=(c[o>>2]|0)+1;c[t>>2]=(c[t>>2]|0)+(0-(c[l>>2]|0)<<2);c[s>>2]=(c[s>>2]|0)+(0-(c[l>>2]|0)<<2)}c[q>>2]=(c[q>>2]|0)+1}i=v;return}function Fm(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0;x=i;i=i+80|0;y=x+72|0;d=x+68|0;e=x+64|0;k=x+60|0;r=x+56|0;l=x+52|0;u=x+48|0;v=x+44|0;w=x+40|0;n=x+36|0;o=x+32|0;m=x+28|0;p=x+24|0;q=x+20|0;j=x+16|0;t=x+12|0;h=x+8|0;f=x+4|0;s=x;c[y>>2]=a;c[d>>2]=b;c[r>>2]=c[(c[y>>2]|0)+64>>2];c[l>>2]=c[(c[y>>2]|0)+68>>2];c[u>>2]=c[(c[y>>2]|0)+72>>2];c[v>>2]=c[(c[y>>2]|0)+76>>2];c[w>>2]=c[(c[y>>2]|0)+80>>2];c[n>>2]=_(c[l>>2]|0,c[u>>2]|0)|0;c[o>>2]=c[(c[y>>2]|0)+84>>2];c[m>>2]=(c[o>>2]|0)+(c[(c[y>>2]|0)+88>>2]|0);c[e>>2]=0;while(1){if((c[e>>2]|0)>=(c[v>>2]|0))break;c[k>>2]=1;while(1){b=c[d>>2]|0;if(((c[k>>2]|0)+(c[k>>2]|0)|0)>=(c[r>>2]|0))break;c[p>>2]=b+((_(c[k>>2]|0,c[n>>2]|0)|0)<<2);c[q>>2]=(c[d>>2]|0)+((_((c[r>>2]|0)-(c[k>>2]|0)|0,c[n>>2]|0)|0)<<2);c[j>>2]=c[o>>2];while(1){if((c[j>>2]|0)>=(c[m>>2]|0))break;b=_(c[j>>2]|0,c[u>>2]|0)|0;g[t>>2]=+g[(c[p>>2]|0)+(b<<2)>>2];b=(c[n>>2]|0)-(_(c[j>>2]|0,c[u>>2]|0)|0)|0;g[f>>2]=+g[(c[q>>2]|0)+(b<<2)>>2];b=_(c[j>>2]|0,c[u>>2]|0)|0;g[s>>2]=+g[(c[q>>2]|0)+(b<<2)>>2];b=(c[n>>2]|0)-(_(c[j>>2]|0,c[u>>2]|0)|0)|0;g[h>>2]=+g[(c[p>>2]|0)+(b<<2)>>2];b=_(c[j>>2]|0,c[u>>2]|0)|0;g[(c[p>>2]|0)+(b<<2)>>2]=+g[t>>2]-+g[f>>2];b=(c[n>>2]|0)-(_(c[j>>2]|0,c[u>>2]|0)|0)|0;g[(c[q>>2]|0)+(b<<2)>>2]=+g[t>>2]+ +g[f>>2];b=_(c[j>>2]|0,c[u>>2]|0)|0;g[(c[q>>2]|0)+(b<<2)>>2]=+g[s>>2]-+g[h>>2];b=(c[n>>2]|0)-(_(c[j>>2]|0,c[u>>2]|0)|0)|0;g[(c[p>>2]|0)+(b<<2)>>2]=+g[h>>2]+ +g[s>>2];c[j>>2]=(c[j>>2]|0)+1}c[k>>2]=(c[k>>2]|0)+1}Em(b,c[r>>2]|0,c[l>>2]|0,c[u>>2]|0,c[o>>2]|0,c[m>>2]|0);c[e>>2]=(c[e>>2]|0)+1;c[d>>2]=(c[d>>2]|0)+(c[w>>2]<<2)}i=x;return}function Gm(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;j=i;i=i+32|0;h=j+16|0;k=j+12|0;e=j+8|0;f=j+4|0;g=j;c[k>>2]=a;c[e>>2]=b;c[f>>2]=d;if(!(Jm(c[k>>2]|0,c[e>>2]|0,c[f>>2]|0)|0)){c[h>>2]=0;g=c[h>>2]|0;i=j;return g|0}c[g>>2]=c[e>>2];if(!(c[c[(c[g>>2]|0)+8>>2]>>2]|0))e=1;else e=(c[(c[f>>2]|0)+164>>2]&16|0)!=0^1;c[h>>2]=e&1;g=c[h>>2]|0;i=j;return g|0}function Hm(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;f=i;i=i+16|0;j=f+12|0;h=f+8|0;g=f+4|0;e=f;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[e>>2]=zd(c[j>>2]|0,13988)|0;c[(c[e>>2]|0)+8>>2]=c[h>>2];c[(c[e>>2]|0)+12>>2]=c[g>>2];i=f;return c[e>>2]|0}function Im(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;f=i;i=i+16|0;j=f+12|0;h=f+8|0;g=f+4|0;e=f;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[e>>2]=oc(c[j>>2]|0,c[h>>2]|0)|0;c[(c[e>>2]|0)+56>>2]=c[g>>2];i=f;return c[e>>2]|0}function Jm(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;j=i;i=i+32|0;e=j+16|0;k=j+12|0;f=j+8|0;g=j+4|0;h=j;c[e>>2]=a;c[k>>2]=b;c[f>>2]=d;c[g>>2]=c[k>>2];if((c[c[(c[g>>2]|0)+4>>2]>>2]|0)!=1){g=0;g=g&1;i=j;return g|0}if((c[c[(c[g>>2]|0)+8>>2]>>2]|0)>1){g=0;g=g&1;i=j;return g|0}if(c[(c[g>>2]|0)+20>>2]|0){if((c[(c[g>>2]|0)+20>>2]|0)!=4){g=0;g=g&1;i=j;return g|0}if((c[(c[g>>2]|0)+12>>2]|0)!=(c[(c[g>>2]|0)+16>>2]|0)?(c[(c[f>>2]|0)+164>>2]&4096|0)!=0:0){g=0;g=g&1;i=j;return g|0}}a=kd(c[(c[e>>2]|0)+8>>2]|0,c[(c[(c[g>>2]|0)+4>>2]|0)+4>>2]|0)|0;c[h>>2]=a;if((a|0)<=0){g=0;g=g&1;i=j;return g|0}g=(c[(c[(c[g>>2]|0)+4>>2]|0)+4>>2]|0)>(c[h>>2]|0);g=g&1;i=j;return g|0}function Km(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0;v=i;i=i+64|0;e=v+60|0;w=v+56|0;f=v+52|0;g=v+48|0;l=v+44|0;q=v+40|0;r=v+36|0;h=v+32|0;j=v+28|0;o=v+24|0;s=v+20|0;n=v+16|0;t=v+12|0;m=v+8|0;p=v+4|0;k=v;c[w>>2]=a;c[f>>2]=b;c[g>>2]=d;c[l>>2]=c[w>>2];c[r>>2]=0;c[h>>2]=0;c[j>>2]=0;if(!((c[(c[g>>2]|0)+164>>2]&512|0)!=0?(c[(c[g>>2]|0)+160>>2]|0)>1:0))u=3;if((u|0)==3?(Gm(c[l>>2]|0,c[f>>2]|0,c[g>>2]|0)|0)!=0:0){c[q>>2]=c[f>>2];c[k>>2]=(c[(c[q>>2]|0)+4>>2]|0)+4;c[o>>2]=c[c[k>>2]>>2];c[s>>2]=kd(c[(c[l>>2]|0)+8>>2]|0,c[o>>2]|0)|0;c[n>>2]=(c[o>>2]|0)/(c[s>>2]|0)|0;ke(c[(c[q>>2]|0)+8>>2]|0,t,m,p)|0;switch(c[(c[q>>2]|0)+20>>2]|0){case 0:{c[j>>2]=gb[c[(c[l>>2]|0)+12>>2]&7](c[l>>2]|0,0,c[s>>2]|0,c[n>>2]|0,c[(c[k>>2]|0)+8>>2]|0,c[t>>2]|0,c[p>>2]|0,0,((c[n>>2]|0)+2|0)/2|0,c[(c[q>>2]|0)+16>>2]|0,c[g>>2]|0)|0;if((c[j>>2]|0)!=0?(l=c[g>>2]|0,a=_(c[s>>2]|0,c[(c[k>>2]|0)+4>>2]|0)|0,a=Ed(c[n>>2]|0,a,c[(c[k>>2]|0)+8>>2]|0)|0,n=_(c[n>>2]|0,c[(c[k>>2]|0)+8>>2]|0)|0,n=Fd(c[s>>2]|0,c[(c[k>>2]|0)+4>>2]|0,n,c[t>>2]|0,c[m>>2]|0,c[p>>2]|0)|0,c[h>>2]=uc(l,Gn(a,n,c[(c[q>>2]|0)+12>>2]|0,c[(c[q>>2]|0)+16>>2]|0,(c[q>>2]|0)+20|0)|0)|0,(c[h>>2]|0)!=0):0){c[r>>2]=sn(80,14e3,41)|0;u=12}else u=13;break}case 4:{c[j>>2]=gb[c[(c[l>>2]|0)+12>>2]&7](c[l>>2]|0,4,c[s>>2]|0,c[n>>2]|0,c[(c[k>>2]|0)+4>>2]|0,c[t>>2]|0,c[m>>2]|0,0,((c[n>>2]|0)+2|0)/2|0,c[(c[q>>2]|0)+12>>2]|0,c[g>>2]|0)|0;if((c[j>>2]|0)!=0?(l=c[g>>2]|0,a=Ed(c[n>>2]|0,c[(c[k>>2]|0)+4>>2]|0,_(c[s>>2]|0,c[(c[k>>2]|0)+8>>2]|0)|0)|0,n=_(c[n>>2]|0,c[(c[k>>2]|0)+4>>2]|0)|0,n=Fd(c[s>>2]|0,n,c[(c[k>>2]|0)+8>>2]|0,c[t>>2]|0,c[m>>2]|0,c[p>>2]|0)|0,c[h>>2]=uc(l,Gn(a,n,c[(c[q>>2]|0)+12>>2]|0,c[(c[q>>2]|0)+16>>2]|0,(c[q>>2]|0)+20|0)|0)|0,(c[h>>2]|0)!=0):0){c[r>>2]=sn(80,14e3,42)|0;u=12}else u=13;break}default:u=12}if((u|0)==12){c[(c[r>>2]|0)+64>>2]=c[h>>2];c[(c[r>>2]|0)+68>>2]=c[j>>2];c[(c[r>>2]|0)+72>>2]=c[s>>2];jc((c[h>>2]|0)+8|0,(c[j>>2]|0)+8|0,(c[r>>2]|0)+8|0);c[(c[r>>2]|0)+52>>2]=c[(c[j>>2]|0)+52>>2];c[e>>2]=c[r>>2];n=c[e>>2]|0;i=v;return n|0}else if((u|0)==13){pc(c[j>>2]|0);pc(c[h>>2]|0);c[e>>2]=0;n=c[e>>2]|0;i=v;return n|0}}c[e>>2]=0;n=c[e>>2]|0;i=v;return n|0}function Lm(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0;e=i;i=i+32|0;l=e+20|0;j=e+16|0;f=e+12|0;g=e+8|0;k=e+4|0;h=e;c[l>>2]=a;c[j>>2]=b;c[f>>2]=d;c[g>>2]=c[l>>2];c[k>>2]=c[(c[g>>2]|0)+64>>2];eb[c[(c[k>>2]|0)+56>>2]&63](c[(c[g>>2]|0)+64>>2]|0,c[j>>2]|0,c[f>>2]|0);c[h>>2]=c[(c[g>>2]|0)+68>>2];$a[c[(c[h>>2]|0)+56>>2]&127](c[(c[g>>2]|0)+68>>2]|0,c[f>>2]|0);i=e;return}function Mm(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0;e=i;i=i+32|0;l=e+20|0;g=e+16|0;f=e+12|0;h=e+8|0;j=e+4|0;k=e;c[l>>2]=a;c[g>>2]=b;c[f>>2]=d;c[h>>2]=c[l>>2];c[k>>2]=c[(c[h>>2]|0)+68>>2];$a[c[(c[k>>2]|0)+56>>2]&127](c[(c[h>>2]|0)+68>>2]|0,c[g>>2]|0);c[j>>2]=c[(c[h>>2]|0)+64>>2];eb[c[(c[j>>2]|0)+56>>2]&63](c[(c[h>>2]|0)+64>>2]|0,c[g>>2]|0,c[f>>2]|0);i=e;return}function Nm(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+68>>2]|0,c[e>>2]|0);i=d;return}function Om(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;g=d+24|0;h=d+20|0;j=d+16|0;c[g>>2]=a;c[h>>2]=b;c[j>>2]=c[g>>2];b=c[c[h>>2]>>2]|0;a=c[h>>2]|0;h=c[(c[j>>2]|0)+72>>2]|0;g=c[(c[j>>2]|0)+68>>2]|0;f=c[(c[j>>2]|0)+64>>2]|0;c[e>>2]=(c[(c[j>>2]|0)+56>>2]|0)==41?22957:22961;c[e+4>>2]=h;c[e+8>>2]=g;c[e+12>>2]=f;eb[b&63](a,22965,e);i=d;return}function Pm(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function Qm(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;b=e+4|0;d=e;c[b>>2]=a;c[d>>2]=0;while(1){if((c[d>>2]|0)>>>0>=2)break;a=c[b>>2]|0;Bd(a,Rm(c[14016+(c[d>>2]<<2)>>2]|0)|0);c[d>>2]=(c[d>>2]|0)+1}i=e;return}function Rm(a){a=a|0;var b=0,d=0,e=0;d=i;i=i+16|0;e=d+4|0;b=d;c[e>>2]=a;c[b>>2]=zd(12,14024)|0;c[(c[b>>2]|0)+8>>2]=c[e>>2];i=d;return c[b>>2]|0}function Sm(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;m=i;i=i+48|0;e=m+32|0;o=m+28|0;n=m+24|0;f=m+20|0;k=m+16|0;j=m+12|0;l=m+8|0;g=m+4|0;h=m;c[o>>2]=a;c[n>>2]=b;c[f>>2]=d;c[k>>2]=c[n>>2];c[j>>2]=c[o>>2];c[g>>2]=0;c[h>>2]=0;if(!(Tm(c[o>>2]|0,c[n>>2]|0,c[f>>2]|0)|0)){c[e>>2]=0;d=c[e>>2]|0;i=m;return d|0}b=c[f>>2]|0;d=Td(c[(c[k>>2]|0)+8>>2]|0,c[(c[k>>2]|0)+4>>2]|0)|0;c[h>>2]=uc(b,Jn(d,c[(c[k>>2]|0)+12>>2]|0,c[(c[k>>2]|0)+16>>2]|0)|0)|0;if((c[h>>2]|0)!=0?(d=c[f>>2]|0,c[g>>2]=vc(d,bb[c[(c[(c[j>>2]|0)+8>>2]|0)+4>>2]&7](c[k>>2]|0)|0,1024,0,0)|0,(c[g>>2]|0)!=0):0){c[l>>2]=sn(80,14036,c[c[(c[j>>2]|0)+8>>2]>>2]|0)|0;c[(c[l>>2]|0)+68>>2]=c[g>>2];c[(c[l>>2]|0)+64>>2]=c[h>>2];c[(c[l>>2]|0)+72>>2]=c[j>>2];jc((c[g>>2]|0)+8|0,(c[h>>2]|0)+8|0,(c[l>>2]|0)+8|0);c[e>>2]=c[l>>2];d=c[e>>2]|0;i=m;return d|0}pc(c[g>>2]|0);pc(c[h>>2]|0);c[e>>2]=0;d=c[e>>2]|0;i=m;return d|0}function Tm(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;j=i;i=i+32|0;e=j+16|0;k=j+12|0;f=j+8|0;g=j+4|0;h=j;c[k>>2]=a;c[f>>2]=b;c[g>>2]=d;if(!(Xm(c[k>>2]|0,c[f>>2]|0,c[g>>2]|0)|0)){c[e>>2]=0;a=c[e>>2]|0;i=j;return a|0}if((c[(c[g>>2]|0)+164>>2]&32|0)!=0?(c[h>>2]=c[f>>2],(c[(c[h>>2]|0)+12>>2]|0)!=(c[(c[h>>2]|0)+16>>2]|0)):0){c[e>>2]=0;a=c[e>>2]|0;i=j;return a|0}c[e>>2]=1;a=c[e>>2]|0;i=j;return a|0}function Um(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+68>>2]|0,c[e>>2]|0);i=d;return}function Vm(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;j=d+24|0;g=d+20|0;f=d+16|0;h=d+12|0;c[j>>2]=a;c[g>>2]=b;c[f>>2]=c[j>>2];c[h>>2]=c[(c[f>>2]|0)+72>>2];b=c[c[g>>2]>>2]|0;a=c[g>>2]|0;g=c[(c[f>>2]|0)+68>>2]|0;f=c[(c[f>>2]|0)+64>>2]|0;c[e>>2]=c[(c[(c[h>>2]|0)+8>>2]|0)+8>>2];c[e+4>>2]=g;c[e+8>>2]=f;eb[b&63](a,22993,e);i=d;return}function Wm(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function Xm(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;h=i;i=i+32|0;k=h+16|0;j=h+12|0;e=h+8|0;f=h+4|0;g=h;c[k>>2]=a;c[j>>2]=b;c[e>>2]=d;c[f>>2]=c[k>>2];c[g>>2]=c[j>>2];if((c[c[(c[g>>2]|0)+8>>2]>>2]|0)==2147483647){g=0;g=g&1;i=h;return g|0}if((c[c[(c[g>>2]|0)+4>>2]>>2]|0)<=0){g=0;g=g&1;i=h;return g|0}if((c[(c[g>>2]|0)+12>>2]|0)==(c[(c[g>>2]|0)+16>>2]|0)?(Md(c[(c[g>>2]|0)+4>>2]|0,c[(c[g>>2]|0)+8>>2]|0)|0)==0:0){g=1;g=g&1;i=h;return g|0}if(((((c[(c[g>>2]|0)+12>>2]|0)!=(c[(c[g>>2]|0)+16>>2]|0)?(c[c[(c[f>>2]|0)+8>>2]>>2]|0)==2:0)?(c[(c[e>>2]|0)+164>>2]&4096|0)==0:0)?(Id(c[(c[g>>2]|0)+4>>2]|0)|0)<=2:0)?(Jd(c[(c[g>>2]|0)+4>>2]|0)|0)>2:0){g=1;g=g&1;i=h;return g|0}if((c[(c[g>>2]|0)+12>>2]|0)==(c[(c[g>>2]|0)+16>>2]|0)){g=0;g=g&1;i=h;return g|0}if((c[c[(c[f>>2]|0)+8>>2]>>2]|0)!=1){g=0;g=g&1;i=h;return g|0}if((Jd(c[(c[g>>2]|0)+4>>2]|0)|0)>2){g=0;g=g&1;i=h;return g|0}g=(Id(c[(c[g>>2]|0)+4>>2]|0)|0)>2;g=g&1;i=h;return g|0}function Ym(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0;e=i;i=i+32|0;l=e+20|0;g=e+16|0;f=e+12|0;h=e+8|0;k=e+4|0;j=e;c[l>>2]=a;c[g>>2]=b;c[f>>2]=d;c[h>>2]=c[l>>2];c[k>>2]=c[(c[h>>2]|0)+68>>2];eb[c[(c[k>>2]|0)+56>>2]&63](c[(c[h>>2]|0)+68>>2]|0,c[g>>2]|0,c[g>>2]|0);c[j>>2]=c[(c[h>>2]|0)+64>>2];eb[c[(c[j>>2]|0)+56>>2]&63](c[(c[h>>2]|0)+64>>2]|0,c[g>>2]|0,c[f>>2]|0);i=e;return}function Zm(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0;e=i;i=i+32|0;l=e+20|0;j=e+16|0;f=e+12|0;g=e+8|0;k=e+4|0;h=e;c[l>>2]=a;c[j>>2]=b;c[f>>2]=d;c[g>>2]=c[l>>2];c[k>>2]=c[(c[g>>2]|0)+64>>2];eb[c[(c[k>>2]|0)+56>>2]&63](c[(c[g>>2]|0)+64>>2]|0,c[j>>2]|0,c[f>>2]|0);c[h>>2]=c[(c[g>>2]|0)+68>>2];eb[c[(c[h>>2]|0)+56>>2]&63](c[(c[g>>2]|0)+68>>2]|0,c[f>>2]|0,c[f>>2]|0);i=e;return}function _m(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;d=b;c[d>>2]=a;e=Qd(c[(c[d>>2]|0)+4>>2]|0,0)|0;a=Qd(c[(c[d>>2]|0)+8>>2]|0,0)|0;a=Gn(e,a,c[(c[d>>2]|0)+12>>2]|0,c[(c[d>>2]|0)+12>>2]|0,(c[d>>2]|0)+20|0)|0;i=b;return a|0}function $m(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;d=b;c[d>>2]=a;e=Qd(c[(c[d>>2]|0)+4>>2]|0,1)|0;a=Qd(c[(c[d>>2]|0)+8>>2]|0,1)|0;a=Gn(e,a,c[(c[d>>2]|0)+16>>2]|0,c[(c[d>>2]|0)+16>>2]|0,(c[d>>2]|0)+20|0)|0;i=b;return a|0}function an(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;f=i;i=i+16|0;k=f+12|0;j=f+8|0;h=f+4|0;g=f;c[k>>2]=a;c[j>>2]=b;c[h>>2]=d;c[g>>2]=e;qk(c[k>>2]|0,c[j>>2]|0,c[h>>2]|0,c[g>>2]|0);i=f;return}function bn(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;h=e+8|0;g=e+4|0;f=e;c[h>>2]=a;c[g>>2]=b;c[f>>2]=d;gm(c[h>>2]|0,c[g>>2]|0,c[f>>2]|0);i=e;return} +function cn(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;h=e+8|0;g=e+4|0;f=e;c[h>>2]=a;c[g>>2]=b;c[f>>2]=d;b=c[h>>2]|0;Bd(b,sl(c[g>>2]|0,c[f>>2]|0)|0);b=c[h>>2]|0;Bd(b,tl(c[g>>2]|0,c[f>>2]|0)|0);b=c[h>>2]|0;Bd(b,Pl(c[g>>2]|0,c[f>>2]|0)|0);i=e;return}function dn(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;h=e+8|0;g=e+4|0;f=e;c[h>>2]=a;c[g>>2]=b;c[f>>2]=d;b=c[h>>2]|0;Bd(b,Jl(c[g>>2]|0,c[f>>2]|0)|0);i=e;return}function en(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,fn()|0);i=b;return}function fn(){return zd(8,14076)|0}function gn(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;g=i;i=i+32|0;e=g+16|0;j=g+12|0;h=g+8|0;f=g;c[j>>2]=a;c[h>>2]=b;c[g+4>>2]=d;if(hn(c[j>>2]|0,c[h>>2]|0)|0){c[f>>2]=rn(64,14088,31)|0;fc((c[f>>2]|0)+8|0);c[e>>2]=c[f>>2];f=c[e>>2]|0;i=g;return f|0}else{c[e>>2]=0;f=c[e>>2]|0;i=g;return f|0}return 0}function hn(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;f=e+4|0;d=e;c[e+8>>2]=a;c[f>>2]=b;c[d>>2]=c[f>>2];if((c[c[(c[d>>2]|0)+8>>2]>>2]|0)!=2147483647)if((((c[(c[d>>2]|0)+28>>2]|0)!=0?(c[c[(c[d>>2]|0)+4>>2]>>2]|0)==0:0)?(c[c[(c[d>>2]|0)+8>>2]>>2]|0)!=2147483647:0)?(c[(c[d>>2]|0)+12>>2]|0)==(c[(c[d>>2]|0)+20>>2]|0):0)a=(dp(c[d>>2]|0,2147483647)|0)!=0;else a=0;else a=1;i=e;return a&1|0}function jn(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0;g=i;i=i+32|0;c[g+16>>2]=a;c[g+12>>2]=b;c[g+8>>2]=d;c[g+4>>2]=e;c[g>>2]=f;i=g;return}function kn(a,b){a=a|0;b=b|0;var d=0,e=0;d=i;i=i+16|0;e=d+4|0;c[d+8>>2]=a;c[e>>2]=b;eb[c[c[e>>2]>>2]&63](c[e>>2]|0,23051,d);i=d;return}function ln(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,mn()|0);i=b;return}function mn(){return zd(8,14104)|0}function nn(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;g=i;i=i+32|0;e=g+16|0;j=g+12|0;h=g+8|0;f=g;c[j>>2]=a;c[h>>2]=b;c[g+4>>2]=d;if(on(c[j>>2]|0,c[h>>2]|0)|0){c[f>>2]=sn(64,14116,43)|0;fc((c[f>>2]|0)+8|0);c[e>>2]=c[f>>2];f=c[e>>2]|0;i=g;return f|0}else{c[e>>2]=0;f=c[e>>2]|0;i=g;return f|0}return 0}function on(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;f=e+4|0;d=e;c[e+8>>2]=a;c[f>>2]=b;c[d>>2]=c[f>>2];if((c[c[(c[d>>2]|0)+8>>2]>>2]|0)!=2147483647)if(((c[c[(c[d>>2]|0)+4>>2]>>2]|0)==0?(c[c[(c[d>>2]|0)+8>>2]>>2]|0)!=2147483647:0)?(c[(c[d>>2]|0)+16>>2]|0)==(c[(c[d>>2]|0)+12>>2]|0):0)a=(Ld(c[(c[d>>2]|0)+8>>2]|0)|0)!=0;else a=0;else a=1;i=e;return a&1|0}function pn(a,b,d){a=a|0;b=b|0;d=d|0;var e=0;e=i;i=i+16|0;c[e+8>>2]=a;c[e+4>>2]=b;c[e>>2]=d;i=e;return}function qn(a,b){a=a|0;b=b|0;var d=0,e=0;d=i;i=i+16|0;e=d+4|0;c[d+8>>2]=a;c[e>>2]=b;eb[c[c[e>>2]>>2]&63](c[e>>2]|0,23063,d);i=d;return}function rn(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;f=i;i=i+16|0;j=f+12|0;h=f+8|0;g=f+4|0;e=f;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[e>>2]=oc(c[j>>2]|0,c[h>>2]|0)|0;c[(c[e>>2]|0)+56>>2]=c[g>>2];i=f;return c[e>>2]|0}function sn(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;f=i;i=i+16|0;j=f+12|0;h=f+8|0;g=f+4|0;e=f;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[e>>2]=oc(c[j>>2]|0,c[h>>2]|0)|0;c[(c[e>>2]|0)+56>>2]=c[g>>2];i=f;return c[e>>2]|0}function tn(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;f=i;i=i+16|0;d=f+8|0;e=f+4|0;g=f;c[e>>2]=a;c[g>>2]=b;switch(c[g>>2]|0){case 4:case 0:{c[d>>2]=((c[e>>2]|0)/2|0)+1;break}case 6:case 1:{c[d>>2]=((c[e>>2]|0)+1|0)/2|0;break}default:c[d>>2]=0}i=f;return c[d>>2]|0}function un(a,b,d,e,f,g,h){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;var j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0;v=i;i=i+48|0;j=v+44|0;k=v+40|0;l=v+36|0;m=v+32|0;n=v+28|0;o=v+24|0;p=v+20|0;q=v+16|0;r=v+12|0;s=v+8|0;u=v+4|0;t=v;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[o>>2]=f;c[p>>2]=g;c[q>>2]=h;if((c[m>>2]|0)==(c[p>>2]|0)){c[j>>2]=rd()|0;l=c[j>>2]|0;i=v;return l|0}if((c[m>>2]|0)==(c[o>>2]|0)){b=c[m>>2]|0;c[o>>2]=b;c[m>>2]=b}c[r>>2]=pd(32,14132)|0;g=c[k>>2]|0;if((c[c[k>>2]>>2]|0)>1){c[s>>2]=Rd(g,(c[c[k>>2]>>2]|0)-1|0)|0;c[u>>2]=Sd(c[k>>2]|0,(c[c[k>>2]>>2]|0)-1|0,1)|0;c[t>>2]=Wd(c[s>>2]|0)|0;if((c[c[t>>2]>>2]|0)>0){k=Td(c[t>>2]|0,c[u>>2]|0)|0;c[(c[r>>2]|0)+4>>2]=k}else{k=Wd(c[u>>2]|0)|0;c[(c[r>>2]|0)+4>>2]=k}ee(c[s>>2]|0,c[u>>2]|0);he(c[t>>2]|0)}else{k=Wd(g)|0;c[(c[r>>2]|0)+4>>2]=k}l=Xd(c[l>>2]|0)|0;c[(c[r>>2]|0)+8>>2]=l;c[(c[r>>2]|0)+12>>2]=c[m>>2];c[(c[r>>2]|0)+16>>2]=c[n>>2];c[(c[r>>2]|0)+20>>2]=c[o>>2];c[(c[r>>2]|0)+24>>2]=c[p>>2];c[(c[r>>2]|0)+28>>2]=c[q>>2];c[j>>2]=c[r>>2];l=c[j>>2]|0;i=v;return l|0}function vn(a,b,d,e,f,g,h){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;var j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0;k=i;i=i+32|0;l=k+28|0;m=k+24|0;r=k+20|0;q=k+16|0;p=k+12|0;o=k+8|0;n=k+4|0;j=k;c[l>>2]=a;c[m>>2]=b;c[r>>2]=d;c[q>>2]=e;c[p>>2]=f;c[o>>2]=g;c[n>>2]=h;c[j>>2]=un(c[l>>2]|0,c[m>>2]|0,c[r>>2]|0,c[q>>2]|0,c[p>>2]|0,c[o>>2]|0,c[n>>2]|0)|0;ee(c[m>>2]|0,c[l>>2]|0);i=k;return c[j>>2]|0}function wn(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0;r=i;i=i+48|0;h=r+32|0;j=r+28|0;k=r+24|0;l=r+20|0;m=r+16|0;n=r+12|0;o=r+8|0;q=r+4|0;p=r;c[h>>2]=a;c[j>>2]=b;c[k>>2]=d;c[l>>2]=e;c[m>>2]=f;c[n>>2]=g;c[q>>2]=c[c[h>>2]>>2];do if(c[q>>2]|0){e=c[k>>2]|0;f=(c[h>>2]|0)+4+(((c[q>>2]|0)-1|0)*12|0)|0;if((c[n>>2]|0)>>>0>=0&(c[n>>2]|0)>>>0<=3){c[p>>2]=e+(c[f+4>>2]<<2);b=(c[h>>2]|0)+4+(((c[q>>2]|0)-1|0)*12|0)+4|0;c[b>>2]=c[b>>2]<<1;break}else{c[p>>2]=e+(c[f+8>>2]<<2);b=(c[h>>2]|0)+4+(((c[q>>2]|0)-1|0)*12|0)+8|0;c[b>>2]=c[b>>2]<<1;break}}else c[p>>2]=c[k>>2];while(0);c[o>>2]=un(c[h>>2]|0,c[j>>2]|0,c[k>>2]|0,c[p>>2]|0,c[l>>2]|0,c[m>>2]|0,c[n>>2]|0)|0;ee(c[j>>2]|0,c[h>>2]|0);i=r;return c[o>>2]|0}function xn(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;f=d+4|0;e=d;c[g>>2]=a;c[f>>2]=b;c[e>>2]=c[g>>2];Xb(c[f>>2]|0,23094);Yb(c[f>>2]|0,(c[(c[e>>2]|0)+12>>2]|0)==(c[(c[e>>2]|0)+20>>2]|0)&1);Zb(c[f>>2]|0,((c[(c[e>>2]|0)+16>>2]|0)-(c[(c[e>>2]|0)+12>>2]|0)|0)/4|0);Zb(c[f>>2]|0,((c[(c[e>>2]|0)+24>>2]|0)-(c[(c[e>>2]|0)+20>>2]|0)|0)/4|0);b=c[f>>2]|0;Yb(b,vb(c[(c[e>>2]|0)+12>>2]|0)|0);b=c[f>>2]|0;Yb(b,vb(c[(c[e>>2]|0)+16>>2]|0)|0);b=c[f>>2]|0;Yb(b,vb(c[(c[e>>2]|0)+20>>2]|0)|0);b=c[f>>2]|0;Yb(b,vb(c[(c[e>>2]|0)+24>>2]|0)|0);Yb(c[f>>2]|0,c[(c[e>>2]|0)+28>>2]|0);je(c[f>>2]|0,c[(c[e>>2]|0)+4>>2]|0);je(c[f>>2]|0,c[(c[e>>2]|0)+8>>2]|0);i=d;return}function yn(a){a=a|0;var b=0,d=0,e=0,f=0,g=0,h=0;g=i;i=i+32|0;h=g+16|0;b=g+12|0;e=g+8|0;f=g+4|0;d=g;c[h>>2]=a;c[b>>2]=c[h>>2];if((c[(c[b>>2]|0)+28>>2]|0)>>>0>=0?(c[(c[b>>2]|0)+28>>2]|0)>>>0<=3:0){Bn((c[(c[b>>2]|0)+8>>2]|0)+4|0,c[c[(c[b>>2]|0)+8>>2]>>2]|0,(c[(c[b>>2]|0)+4>>2]|0)+4|0,c[c[(c[b>>2]|0)+4>>2]>>2]|0,c[(c[b>>2]|0)+12>>2]|0,c[(c[b>>2]|0)+16>>2]|0);i=g;return}c[f>>2]=Pd(c[(c[b>>2]|0)+4>>2]|0)|0;c[d>>2]=c[c[f>>2]>>2];if((c[d>>2]|0)>0){a=tn(c[(c[f>>2]|0)+4+(((c[d>>2]|0)-1|0)*12|0)>>2]|0,c[(c[b>>2]|0)+28>>2]|0)|0;c[(c[f>>2]|0)+4+(((c[d>>2]|0)-1|0)*12|0)>>2]=a}c[e>>2]=Td(c[(c[b>>2]|0)+8>>2]|0,c[f>>2]|0)|0;he(c[f>>2]|0);$h(c[e>>2]|0,c[(c[b>>2]|0)+20>>2]|0,c[(c[b>>2]|0)+24>>2]|0);he(c[e>>2]|0);i=g;return}function zn(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;g=d+24|0;h=d+20|0;j=d+16|0;c[g>>2]=a;c[h>>2]=b;c[j>>2]=c[g>>2];b=c[c[h>>2]>>2]|0;a=c[h>>2]|0;h=c[(c[j>>2]|0)+28>>2]|0;g=c[(c[j>>2]|0)+4>>2]|0;f=c[(c[j>>2]|0)+8>>2]|0;c[e>>2]=(c[(c[j>>2]|0)+20>>2]|0)==(c[(c[j>>2]|0)+12>>2]|0)&1;c[e+4>>2]=h;c[e+8>>2]=g;c[e+12>>2]=f;eb[b&63](a,23074,e);i=d;return}function An(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;d=b+4|0;e=b;c[d>>2]=a;c[e>>2]=c[d>>2];ee(c[(c[e>>2]|0)+8>>2]|0,c[(c[e>>2]|0)+4>>2]|0);xb(c[d>>2]|0);i=b;return}function Bn(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0;r=i;i=i+48|0;h=r+32|0;j=r+28|0;k=r+24|0;l=r+20|0;m=r+16|0;n=r+12|0;o=r+8|0;q=r+4|0;p=r;c[h>>2]=a;c[j>>2]=b;c[k>>2]=d;c[l>>2]=e;c[m>>2]=f;c[n>>2]=g;if((c[j>>2]|0)==2147483647){i=r;return}if(!(c[j>>2]|0)){Cn(c[k>>2]|0,c[l>>2]|0,c[m>>2]|0,c[n>>2]|0);i=r;return}if((c[j>>2]|0)<=0){i=r;return}c[q>>2]=c[c[h>>2]>>2];c[p>>2]=c[(c[h>>2]|0)+4>>2];c[o>>2]=0;while(1){if((c[o>>2]|0)>=(c[q>>2]|0))break;d=(c[m>>2]|0)+((_(c[o>>2]|0,c[p>>2]|0)|0)<<2)|0;Bn((c[h>>2]|0)+12|0,(c[j>>2]|0)-1|0,c[k>>2]|0,c[l>>2]|0,d,(c[n>>2]|0)+((_(c[o>>2]|0,c[p>>2]|0)|0)<<2)|0);c[o>>2]=(c[o>>2]|0)+1}i=r;return}function Cn(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;o=i;i=i+32|0;f=o+24|0;h=o+20|0;j=o+16|0;k=o+12|0;l=o+8|0;n=o+4|0;m=o;c[f>>2]=a;c[h>>2]=b;c[j>>2]=d;c[k>>2]=e;if((c[h>>2]|0)==2147483647){i=o;return}if(!(c[h>>2]|0)){g[c[j>>2]>>2]=0.0;i=o;return}if((c[h>>2]|0)<=0){i=o;return}c[n>>2]=c[c[f>>2]>>2];c[m>>2]=c[(c[f>>2]|0)+4>>2];a=(c[h>>2]|0)==1;c[l>>2]=0;if(!a){while(1){if((c[l>>2]|0)>=(c[n>>2]|0))break;a=(c[j>>2]|0)+((_(c[l>>2]|0,c[m>>2]|0)|0)<<2)|0;Cn((c[f>>2]|0)+12|0,(c[h>>2]|0)-1|0,a,(c[k>>2]|0)+((_(c[l>>2]|0,c[m>>2]|0)|0)<<2)|0);c[l>>2]=(c[l>>2]|0)+1}i=o;return}while(1){if((c[l>>2]|0)>=((c[n>>2]|0)-1|0))break;g[c[k>>2]>>2]=0.0;g[c[j>>2]>>2]=0.0;c[j>>2]=(c[j>>2]|0)+(c[m>>2]<<2);c[k>>2]=(c[k>>2]|0)+(c[m>>2]<<2);c[l>>2]=(c[l>>2]|0)+2}if((c[l>>2]|0)>=(c[n>>2]|0)){i=o;return}g[c[j>>2]>>2]=0.0;i=o;return}function Dn(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;d=i;i=i+16|0;f=d+4|0;e=d;c[f>>2]=a;c[e>>2]=b;Kn((c[f>>2]|0)+4|0,c[c[f>>2]>>2]|0,c[e>>2]|0);i=d;return}function En(a){a=a|0;var b=0,d=0;d=i;i=i+16|0;b=d;c[b>>2]=a;i=d;return 23100+(c[b>>2]<<3)|0}function Fn(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0;t=i;i=i+64|0;g=t+52|0;h=t+48|0;j=t+44|0;k=t+40|0;l=t+36|0;m=t+32|0;o=t+28|0;s=t+24|0;p=t+20|0;q=t+16|0;n=t+4|0;r=t;c[h>>2]=a;c[j>>2]=b;c[k>>2]=d;c[l>>2]=e;c[m>>2]=f;c[s>>2]=c[c[h>>2]>>2];if((c[k>>2]|0)==(c[l>>2]|0)){e=c[k>>2]|0;c[l>>2]=e;c[k>>2]=e}if((c[k>>2]|0)==(c[l>>2]|0)?(_d(c[h>>2]|0,c[j>>2]|0)|0)==0:0){c[g>>2]=rd()|0;o=c[g>>2]|0;i=t;return o|0}c[s>>2]=0;c[p>>2]=0;while(1){if((c[p>>2]|0)>=(c[c[h>>2]>>2]|0))break;if(Ln((c[h>>2]|0)+4+((c[p>>2]|0)*12|0)|0,c[(c[m>>2]|0)+(c[p>>2]<<2)>>2]|0)|0)c[s>>2]=(c[s>>2]|0)+1;c[p>>2]=(c[p>>2]|0)+1}c[o>>2]=pd(24+(((c[s>>2]|0)>0?(c[s>>2]|0)-1|0:0)<<2)|0,14152)|0;e=ge(c[s>>2]|0)|0;c[(c[o>>2]|0)+4>>2]=e;c[s>>2]=0;c[p>>2]=0;while(1){if((c[p>>2]|0)>=(c[c[h>>2]>>2]|0))break;if(Ln((c[h>>2]|0)+4+((c[p>>2]|0)*12|0)|0,c[(c[m>>2]|0)+(c[p>>2]<<2)>>2]|0)|0){c[(c[o>>2]|0)+20+(c[s>>2]<<2)>>2]=c[(c[m>>2]|0)+(c[p>>2]<<2)>>2];e=c[s>>2]|0;c[s>>2]=e+1;e=(c[(c[o>>2]|0)+4>>2]|0)+4+(e*12|0)|0;d=(c[h>>2]|0)+4+((c[p>>2]|0)*12|0)|0;c[e>>2]=c[d>>2];c[e+4>>2]=c[d+4>>2];c[e+8>>2]=c[d+8>>2]}c[p>>2]=(c[p>>2]|0)+1}c[p>>2]=0;while(1){if(((c[p>>2]|0)+1|0)>=(c[s>>2]|0))break;c[q>>2]=(c[p>>2]|0)+1;while(1){if((c[q>>2]|0)>=(c[s>>2]|0))break;if((Vd((c[(c[o>>2]|0)+4>>2]|0)+4+((c[p>>2]|0)*12|0)|0,(c[(c[o>>2]|0)+4>>2]|0)+4+((c[q>>2]|0)*12|0)|0)|0)>0){e=(c[(c[o>>2]|0)+4>>2]|0)+4+((c[p>>2]|0)*12|0)|0;c[n>>2]=c[e>>2];c[n+4>>2]=c[e+4>>2];c[n+8>>2]=c[e+8>>2];e=(c[(c[o>>2]|0)+4>>2]|0)+4+((c[p>>2]|0)*12|0)|0;d=(c[(c[o>>2]|0)+4>>2]|0)+4+((c[q>>2]|0)*12|0)|0;c[e>>2]=c[d>>2];c[e+4>>2]=c[d+4>>2];c[e+8>>2]=c[d+8>>2];e=(c[(c[o>>2]|0)+4>>2]|0)+4+((c[q>>2]|0)*12|0)|0;c[e>>2]=c[n>>2];c[e+4>>2]=c[n+4>>2];c[e+8>>2]=c[n+8>>2];c[r>>2]=c[(c[o>>2]|0)+20+(c[p>>2]<<2)>>2];c[(c[o>>2]|0)+20+(c[p>>2]<<2)>>2]=c[(c[o>>2]|0)+20+(c[q>>2]<<2)>>2];c[(c[o>>2]|0)+20+(c[q>>2]<<2)>>2]=c[r>>2]}c[q>>2]=(c[q>>2]|0)+1}c[p>>2]=(c[p>>2]|0)+1}c[p>>2]=0;while(1){if((c[p>>2]|0)>=(c[s>>2]|0))break;do if((c[(c[(c[o>>2]|0)+4>>2]|0)+4+((c[p>>2]|0)*12|0)>>2]|0)==2){if(((c[(c[o>>2]|0)+20+(c[p>>2]<<2)>>2]|0)!=9?(c[(c[o>>2]|0)+20+(c[p>>2]<<2)>>2]|0)!=8:0)?(c[(c[o>>2]|0)+20+(c[p>>2]<<2)>>2]|0)!=4:0)break;c[(c[o>>2]|0)+20+(c[p>>2]<<2)>>2]=0}while(0);c[p>>2]=(c[p>>2]|0)+1}n=Xd(c[j>>2]|0)|0;c[(c[o>>2]|0)+8>>2]=n;c[(c[o>>2]|0)+12>>2]=c[k>>2];c[(c[o>>2]|0)+16>>2]=c[l>>2];c[g>>2]=c[o>>2];o=c[g>>2]|0;i=t;return o|0}function Gn(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0;h=i;i=i+32|0;j=h+20|0;k=h+16|0;n=h+12|0;m=h+8|0;l=h+4|0;g=h;c[j>>2]=a;c[k>>2]=b;c[n>>2]=d;c[m>>2]=e;c[l>>2]=f;c[g>>2]=Fn(c[j>>2]|0,c[k>>2]|0,c[n>>2]|0,c[m>>2]|0,c[l>>2]|0)|0;ee(c[k>>2]|0,c[j>>2]|0);i=h;return c[g>>2]|0}function Hn(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0;g=i;i=i+32|0;m=g+16|0;l=g+12|0;k=g+8|0;j=g+4|0;h=g;c[m>>2]=a;c[l>>2]=b;c[k>>2]=d;c[j>>2]=e;c[h>>2]=f;a=Fn(c[m>>2]|0,c[l>>2]|0,c[k>>2]|0,c[j>>2]|0,h)|0;i=g;return a|0}function In(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0;g=i;i=i+32|0;m=g+16|0;l=g+12|0;k=g+8|0;j=g+4|0;h=g;c[m>>2]=a;c[l>>2]=b;c[k>>2]=d;c[j>>2]=e;c[h>>2]=f;a=Gn(c[m>>2]|0,c[l>>2]|0,c[k>>2]|0,c[j>>2]|0,h)|0;i=g;return a|0}function Jn(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;h=e+8|0;g=e+4|0;f=e;c[h>>2]=a;c[g>>2]=b;c[f>>2]=d;b=Dd()|0;b=Gn(b,c[h>>2]|0,c[g>>2]|0,c[f>>2]|0,0)|0;i=e;return b|0}function Kn(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,h=0,j=0,k=0,l=0,m=0;m=i;i=i+32|0;e=m+20|0;f=m+16|0;h=m+12|0;j=m+8|0;l=m+4|0;k=m;c[e>>2]=a;c[f>>2]=b;c[h>>2]=d;if((c[f>>2]|0)==2147483647){i=m;return}if(!(c[f>>2]|0)){g[c[h>>2]>>2]=0.0;i=m;return}if((c[f>>2]|0)<=0){i=m;return}c[l>>2]=c[c[e>>2]>>2];c[k>>2]=c[(c[e>>2]|0)+4>>2];d=(c[f>>2]|0)==1;c[j>>2]=0;if(d){while(1){if((c[j>>2]|0)>=(c[l>>2]|0))break;d=_(c[j>>2]|0,c[k>>2]|0)|0;g[(c[h>>2]|0)+(d<<2)>>2]=0.0;c[j>>2]=(c[j>>2]|0)+1}i=m;return}else{while(1){if((c[j>>2]|0)>=(c[l>>2]|0))break;Kn((c[e>>2]|0)+12|0,(c[f>>2]|0)-1|0,(c[h>>2]|0)+((_(c[j>>2]|0,c[k>>2]|0)|0)<<2)|0);c[j>>2]=(c[j>>2]|0)+1}i=m;return}}function Ln(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;f=e+4|0;d=e;c[f>>2]=a;c[d>>2]=b;if(((c[d>>2]|0)==3?1:(c[c[f>>2]>>2]|0)>1)|(c[d>>2]|0)==7){b=1;b=b&1;i=e;return b|0}if(!((c[d>>2]|0)>>>0>=9&(c[d>>2]|0)>>>0<=16&(c[d>>2]|0)!=10)){b=0;b=b&1;i=e;return b|0}b=(c[d>>2]|0)!=14;b=b&1;i=e;return b|0}function Mn(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;f=d+4|0;e=d;c[g>>2]=a;c[f>>2]=b;c[e>>2]=c[g>>2];Xb(c[f>>2]|0,23258);Yb(c[f>>2]|0,(c[(c[e>>2]|0)+12>>2]|0)==(c[(c[e>>2]|0)+16>>2]|0)&1);Qn(c[f>>2]|0,(c[e>>2]|0)+20|0,c[c[(c[e>>2]|0)+4>>2]>>2]|0);b=c[f>>2]|0;Yb(b,vb(c[(c[e>>2]|0)+12>>2]|0)|0);b=c[f>>2]|0;Yb(b,vb(c[(c[e>>2]|0)+16>>2]|0)|0);je(c[f>>2]|0,c[(c[e>>2]|0)+4>>2]|0);je(c[f>>2]|0,c[(c[e>>2]|0)+8>>2]|0);i=d;return}function Nn(a){a=a|0;var b=0,d=0,e=0,f=0;b=i;i=i+16|0;f=b+8|0;e=b+4|0;d=b;c[f>>2]=a;c[e>>2]=c[f>>2];c[d>>2]=Td(c[(c[e>>2]|0)+8>>2]|0,c[(c[e>>2]|0)+4>>2]|0)|0;Dn(c[d>>2]|0,c[(c[e>>2]|0)+12>>2]|0);he(c[d>>2]|0);i=b;return}function On(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;j=i;i=i+48|0;h=j+24|0;g=j+16|0;k=j;o=j+40|0;d=j+36|0;e=j+32|0;f=j+28|0;c[o>>2]=a;c[d>>2]=b;c[e>>2]=c[o>>2];b=c[c[d>>2]>>2]|0;a=c[d>>2]|0;o=vb(c[(c[e>>2]|0)+12>>2]|0)|0;n=((c[(c[e>>2]|0)+16>>2]|0)-(c[(c[e>>2]|0)+12>>2]|0)|0)/4|0;m=c[(c[e>>2]|0)+4>>2]|0;l=c[(c[e>>2]|0)+8>>2]|0;c[k>>2]=o;c[k+4>>2]=n;c[k+8>>2]=m;c[k+12>>2]=l;eb[b&63](a,23236,k);c[f>>2]=0;while(1){b=c[c[d>>2]>>2]|0;a=c[d>>2]|0;if((c[f>>2]|0)>=(c[c[(c[e>>2]|0)+4>>2]>>2]|0))break;c[g>>2]=c[(c[e>>2]|0)+20+(c[f>>2]<<2)>>2];eb[b&63](a,23254,g);c[f>>2]=(c[f>>2]|0)+1}eb[b&63](a,23707,h);i=j;return}function Pn(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;d=b+4|0;e=b;c[d>>2]=a;c[e>>2]=c[d>>2];ee(c[(c[e>>2]|0)+8>>2]|0,c[(c[e>>2]|0)+4>>2]|0);xb(c[d>>2]|0);i=b;return}function Qn(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;j=i;i=i+16|0;e=j+12|0;f=j+8|0;g=j+4|0;h=j;c[e>>2]=a;c[f>>2]=b;c[g>>2]=d;c[h>>2]=0;while(1){if((c[h>>2]|0)>=(c[g>>2]|0))break;Yb(c[e>>2]|0,c[(c[f>>2]|0)+(c[h>>2]<<2)>>2]|0);c[h>>2]=(c[h>>2]|0)+1}i=j;return}function Rn(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;b=e+8|0;d=e+4|0;c[b>>2]=a;c[e>>2]=3;c[d>>2]=0;while(1){if((c[d>>2]|0)>=3)break;a=c[b>>2]|0;Bd(a,Sn(c[14172+(c[d>>2]<<2)>>2]|0,14172,3)|0);c[d>>2]=(c[d>>2]|0)+1}i=e;return}function Sn(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;f=i;i=i+16|0;j=f+12|0;h=f+8|0;g=f+4|0;e=f;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[e>>2]=zd(20,14184)|0;c[(c[e>>2]|0)+8>>2]=c[j>>2];c[(c[e>>2]|0)+12>>2]=c[h>>2];c[(c[e>>2]|0)+16>>2]=c[g>>2];i=f;return c[e>>2]|0}function Tn(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0;u=i;i=i+64|0;h=u+60|0;v=u+56|0;f=u+52|0;j=u+48|0;n=u+44|0;o=u+40|0;p=u+36|0;m=u+32|0;k=u+28|0;q=u+24|0;r=u+20|0;t=u+16|0;s=u+12|0;g=u+8|0;e=u+4|0;l=u;c[v>>2]=a;c[f>>2]=b;c[j>>2]=d;c[n>>2]=c[v>>2];c[m>>2]=0;c[k>>2]=0;if(!(Un(c[v>>2]|0,c[f>>2]|0,c[j>>2]|0,g)|0)){c[h>>2]=0;l=c[h>>2]|0;i=u;return l|0}c[o>>2]=c[f>>2];Yd(c[(c[o>>2]|0)+4>>2]|0,q,c[g>>2]|0,r);c[e>>2]=(c[(c[o>>2]|0)+28>>2]|0)==0?1:0;c[t>>2]=Qd(c[(c[o>>2]|0)+8>>2]|0,c[e>>2]|0)|0;c[s>>2]=Qd(c[r>>2]|0,c[e>>2]|0)|0;c[(c[s>>2]|0)+4+(((c[c[s>>2]>>2]|0)-1|0)*12|0)>>2]=((c[(c[s>>2]|0)+4+(((c[c[s>>2]>>2]|0)-1|0)*12|0)>>2]|0)/2|0)+1;a=c[j>>2]|0;d=Pd(c[r>>2]|0)|0;g=Td(c[(c[o>>2]|0)+8>>2]|0,c[q>>2]|0)|0;c[m>>2]=uc(a,vn(d,g,c[(c[o>>2]|0)+12>>2]|0,c[(c[o>>2]|0)+16>>2]|0,c[(c[o>>2]|0)+20>>2]|0,c[(c[o>>2]|0)+24>>2]|0,c[(c[o>>2]|0)+28>>2]|0)|0)|0;if(c[m>>2]|0){g=(c[(c[o>>2]|0)+28>>2]|0)==0;e=Qd(c[q>>2]|0,c[e>>2]|0)|0;a=Td(c[t>>2]|0,c[s>>2]|0)|0;f=c[o>>2]|0;if(g)c[l>>2]=qh(e,a,c[f+20>>2]|0,c[(c[o>>2]|0)+24>>2]|0,c[(c[o>>2]|0)+20>>2]|0,c[(c[o>>2]|0)+24>>2]|0)|0;else c[l>>2]=qh(e,a,c[f+24>>2]|0,c[(c[o>>2]|0)+20>>2]|0,c[(c[o>>2]|0)+24>>2]|0,c[(c[o>>2]|0)+20>>2]|0)|0;c[k>>2]=uc(c[j>>2]|0,c[l>>2]|0)|0;if(c[k>>2]|0){c[p>>2]=rn(80,14196,(c[(c[o>>2]|0)+28>>2]|0)==0?33:32)|0;c[(c[p>>2]|0)+64>>2]=c[m>>2];c[(c[p>>2]|0)+68>>2]=c[k>>2];c[(c[p>>2]|0)+72>>2]=c[n>>2];jc((c[m>>2]|0)+8|0,(c[k>>2]|0)+8|0,(c[p>>2]|0)+8|0);fe(c[s>>2]|0,c[t>>2]|0,c[r>>2]|0,c[q>>2]|0);c[h>>2]=c[p>>2];l=c[h>>2]|0;i=u;return l|0}}pc(c[m>>2]|0);pc(c[k>>2]|0);fe(c[s>>2]|0,c[t>>2]|0,c[r>>2]|0,c[q>>2]|0);c[h>>2]=0;l=c[h>>2]|0;i=u;return l|0}function Un(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0;l=i;i=i+32|0;f=l+24|0;n=l+20|0;g=l+16|0;h=l+12|0;m=l+8|0;j=l+4|0;k=l;c[n>>2]=a;c[g>>2]=b;c[h>>2]=d;c[m>>2]=e;c[j>>2]=c[n>>2];if(!(_n(c[n>>2]|0,c[g>>2]|0,c[m>>2]|0,c[h>>2]|0)|0)){c[f>>2]=0;e=c[f>>2]|0;i=l;return e|0}if((c[(c[h>>2]|0)+164>>2]&128|0)!=0?(c[(c[j>>2]|0)+8>>2]|0)!=(c[c[(c[j>>2]|0)+12>>2]>>2]|0):0){c[f>>2]=0;e=c[f>>2]|0;i=l;return e|0}if(((c[(c[h>>2]|0)+164>>2]&65536|0)!=0?(c[k>>2]=c[g>>2],(c[c[(c[k>>2]|0)+8>>2]>>2]|0)>0):0)?(e=Kd(c[(c[k>>2]|0)+8>>2]|0)|0,(e|0)>(sp(c[(c[k>>2]|0)+4>>2]|0,c[(c[k>>2]|0)+28>>2]|0)|0)):0){c[f>>2]=0;e=c[f>>2]|0;i=l;return e|0}c[f>>2]=1;e=c[f>>2]|0;i=l;return e|0}function Vn(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;g=i;i=i+32|0;p=g+28|0;n=g+24|0;m=g+20|0;j=g+16|0;h=g+12|0;l=g+8|0;o=g+4|0;k=g;c[p>>2]=a;c[n>>2]=b;c[m>>2]=d;c[j>>2]=e;c[h>>2]=f;c[l>>2]=c[p>>2];c[o>>2]=c[(c[l>>2]|0)+64>>2];Ya[c[(c[o>>2]|0)+56>>2]&63](c[o>>2]|0,c[n>>2]|0,c[m>>2]|0,c[j>>2]|0,c[h>>2]|0);c[k>>2]=c[(c[l>>2]|0)+68>>2];Ya[c[(c[k>>2]|0)+56>>2]&63](c[k>>2]|0,c[j>>2]|0,c[h>>2]|0,c[j>>2]|0,c[h>>2]|0);i=g;return}function Wn(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;g=i;i=i+32|0;p=g+28|0;l=g+24|0;k=g+20|0;j=g+16|0;h=g+12|0;n=g+8|0;o=g+4|0;m=g;c[p>>2]=a;c[l>>2]=b;c[k>>2]=d;c[j>>2]=e;c[h>>2]=f;c[n>>2]=c[p>>2];c[o>>2]=c[(c[n>>2]|0)+68>>2];Ya[c[(c[o>>2]|0)+56>>2]&63](c[o>>2]|0,c[h>>2]|0,c[j>>2]|0,c[h>>2]|0,c[j>>2]|0);c[m>>2]=c[(c[n>>2]|0)+64>>2];Ya[c[(c[m>>2]|0)+56>>2]&63](c[m>>2]|0,c[l>>2]|0,c[k>>2]|0,c[j>>2]|0,c[h>>2]|0);i=g;return}function Xn(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+68>>2]|0,c[e>>2]|0);i=d;return}function Yn(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;j=d+24|0;g=d+20|0;f=d+16|0;h=d+12|0;c[j>>2]=a;c[g>>2]=b;c[f>>2]=c[j>>2];c[h>>2]=c[(c[f>>2]|0)+72>>2];b=c[c[g>>2]>>2]|0;a=c[g>>2]|0;g=c[(c[f>>2]|0)+64>>2]|0;f=c[(c[f>>2]|0)+68>>2]|0;c[e>>2]=c[(c[h>>2]|0)+8>>2];c[e+4>>2]=g;c[e+8>>2]=f;eb[b&63](a,23263,e);i=d;return}function Zn(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+64>>2]|0);pc(c[(c[d>>2]|0)+68>>2]|0);i=b;return}function _n(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0;k=i;i=i+32|0;l=k+20|0;m=k+16|0;f=k+12|0;g=k+8|0;j=k+4|0;h=k;c[l>>2]=a;c[m>>2]=b;c[f>>2]=d;c[g>>2]=e;c[j>>2]=c[m>>2];c[h>>2]=c[l>>2];if((c[c[(c[j>>2]|0)+4>>2]>>2]|0)==2147483647){l=0;l=l&1;i=k;return l|0}if((c[c[(c[j>>2]|0)+8>>2]>>2]|0)==2147483647){l=0;l=l&1;i=k;return l|0}if((c[(c[j>>2]|0)+28>>2]|0)!=0?(c[(c[j>>2]|0)+28>>2]|0)!=4:0){l=0;l=l&1;i=k;return l|0}if((c[c[(c[j>>2]|0)+4>>2]>>2]|0)<2){l=0;l=l&1;i=k;return l|0}if(!($n(c[h>>2]|0,c[(c[j>>2]|0)+4>>2]|0,c[f>>2]|0)|0)){l=0;l=l&1;i=k;return l|0}if((c[(c[j>>2]|0)+12>>2]|0)!=(c[(c[j>>2]|0)+20>>2]|0)){if(!(c[(c[j>>2]|0)+28>>2]|0)){l=1;l=l&1;i=k;return l|0}if(!(c[(c[g>>2]|0)+164>>2]&4096)){l=1;l=l&1;i=k;return l|0}}l=(c[(c[j>>2]|0)+12>>2]|0)==(c[(c[j>>2]|0)+20>>2]|0);l=l&1;i=k;return l|0}function $n(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;h=i;i=i+16|0;e=h+12|0;j=h+8|0;f=h+4|0;g=h;c[j>>2]=a;c[f>>2]=b;c[g>>2]=d;if(!(mc(c[(c[j>>2]|0)+8>>2]|0,c[(c[j>>2]|0)+12>>2]|0,c[(c[j>>2]|0)+16>>2]|0,c[f>>2]|0,1,c[g>>2]|0)|0)){c[e>>2]=0;a=c[e>>2]|0;i=h;return a|0}a=c[g>>2]|0;c[a>>2]=(c[a>>2]|0)+1;if((c[c[g>>2]>>2]|0)>=(c[c[f>>2]>>2]|0)){c[e>>2]=0;a=c[e>>2]|0;i=h;return a|0}else{c[e>>2]=1;a=c[e>>2]|0;i=h;return a|0}return 0}function ao(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;b=e+8|0;d=e+4|0;c[b>>2]=a;c[e>>2]=3;c[d>>2]=0;while(1){if((c[d>>2]|0)>=3)break;a=c[b>>2]|0;Bd(a,bo(c[14212+(c[d>>2]<<2)>>2]|0,14212,3)|0);c[d>>2]=(c[d>>2]|0)+1}i=e;return}function bo(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;f=i;i=i+16|0;j=f+12|0;h=f+8|0;g=f+4|0;e=f;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[e>>2]=zd(20,14224)|0;c[(c[e>>2]|0)+8>>2]=c[j>>2];c[(c[e>>2]|0)+12>>2]=c[h>>2];c[(c[e>>2]|0)+16>>2]=c[g>>2];i=f;return c[e>>2]|0}function co(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0;s=i;i=i+64|0;e=s+52|0;t=s+48|0;f=s+44|0;g=s+40|0;k=s+36|0;l=s+32|0;m=s+28|0;h=s+24|0;j=s+20|0;o=s+16|0;p=s+12|0;r=s+8|0;q=s+4|0;n=s;c[t>>2]=a;c[f>>2]=b;c[g>>2]=d;c[k>>2]=c[t>>2];c[h>>2]=0;c[j>>2]=0;if(!(eo(c[t>>2]|0,c[f>>2]|0,c[g>>2]|0,n)|0)){c[e>>2]=0;m=c[e>>2]|0;i=s;return m|0}c[l>>2]=c[f>>2];Yd(c[(c[l>>2]|0)+4>>2]|0,o,c[n>>2]|0,p);c[r>>2]=Qd(c[(c[l>>2]|0)+8>>2]|0,1)|0;c[q>>2]=Qd(c[p>>2]|0,1)|0;f=c[g>>2]|0;a=Pd(c[p>>2]|0)|0;b=Td(c[(c[l>>2]|0)+8>>2]|0,c[o>>2]|0)|0;c[h>>2]=uc(f,Gn(a,b,c[(c[l>>2]|0)+12>>2]|0,c[(c[l>>2]|0)+16>>2]|0,(c[l>>2]|0)+20+(c[n>>2]<<2)|0)|0)|0;if((c[h>>2]|0)!=0?(f=c[g>>2]|0,a=Qd(c[o>>2]|0,1)|0,b=Td(c[r>>2]|0,c[q>>2]|0)|0,c[j>>2]=uc(f,Gn(a,b,c[(c[l>>2]|0)+16>>2]|0,c[(c[l>>2]|0)+16>>2]|0,(c[l>>2]|0)+20|0)|0)|0,(c[j>>2]|0)!=0):0){c[m>>2]=sn(80,14236,44)|0;c[(c[m>>2]|0)+64>>2]=c[h>>2];c[(c[m>>2]|0)+68>>2]=c[j>>2];c[(c[m>>2]|0)+72>>2]=c[k>>2];jc((c[h>>2]|0)+8|0,(c[j>>2]|0)+8|0,(c[m>>2]|0)+8|0);fe(c[p>>2]|0,c[o>>2]|0,c[r>>2]|0,c[q>>2]|0);c[e>>2]=c[m>>2];m=c[e>>2]|0;i=s;return m|0}pc(c[j>>2]|0);pc(c[h>>2]|0);fe(c[p>>2]|0,c[o>>2]|0,c[r>>2]|0,c[q>>2]|0);c[e>>2]=0;m=c[e>>2]|0;i=s;return m|0}function eo(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0;l=i;i=i+32|0;f=l+24|0;n=l+20|0;g=l+16|0;h=l+12|0;m=l+8|0;j=l+4|0;k=l;c[n>>2]=a;c[g>>2]=b;c[h>>2]=d;c[m>>2]=e;c[j>>2]=c[n>>2];if(!(jo(c[n>>2]|0,c[g>>2]|0,c[m>>2]|0)|0)){c[f>>2]=0;e=c[f>>2]|0;i=l;return e|0}if((c[(c[h>>2]|0)+164>>2]&128|0)!=0?(c[(c[j>>2]|0)+8>>2]|0)!=(c[c[(c[j>>2]|0)+12>>2]>>2]|0):0){c[f>>2]=0;e=c[f>>2]|0;i=l;return e|0}if(((c[(c[h>>2]|0)+164>>2]&65536|0)!=0?(c[k>>2]=c[g>>2],(c[c[(c[k>>2]|0)+8>>2]>>2]|0)>0):0)?(e=Kd(c[(c[k>>2]|0)+8>>2]|0)|0,(e|0)>(Hd(c[(c[k>>2]|0)+4>>2]|0)|0)):0){c[f>>2]=0;e=c[f>>2]|0;i=l;return e|0}c[f>>2]=1;e=c[f>>2]|0;i=l;return e|0}function fo(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0;e=i;i=i+32|0;l=e+20|0;j=e+16|0;f=e+12|0;g=e+8|0;k=e+4|0;h=e;c[l>>2]=a;c[j>>2]=b;c[f>>2]=d;c[g>>2]=c[l>>2];c[k>>2]=c[(c[g>>2]|0)+64>>2];eb[c[(c[k>>2]|0)+56>>2]&63](c[(c[g>>2]|0)+64>>2]|0,c[j>>2]|0,c[f>>2]|0);c[h>>2]=c[(c[g>>2]|0)+68>>2];eb[c[(c[h>>2]|0)+56>>2]&63](c[(c[g>>2]|0)+68>>2]|0,c[f>>2]|0,c[f>>2]|0);i=e;return}function go(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+68>>2]|0,c[e>>2]|0);i=d;return}function ho(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;j=d+24|0;g=d+20|0;f=d+16|0;h=d+12|0;c[j>>2]=a;c[g>>2]=b;c[f>>2]=c[j>>2];c[h>>2]=c[(c[f>>2]|0)+72>>2];b=c[c[g>>2]>>2]|0;a=c[g>>2]|0;g=c[(c[f>>2]|0)+64>>2]|0;f=c[(c[f>>2]|0)+68>>2]|0;c[e>>2]=c[(c[h>>2]|0)+8>>2];c[e+4>>2]=g;c[e+8>>2]=f;eb[b&63](a,23294,e);i=d;return}function io(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function jo(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;h=i;i=i+32|0;j=h+16|0;k=h+12|0;e=h+8|0;g=h+4|0;f=h;c[j>>2]=a;c[k>>2]=b;c[e>>2]=d;c[g>>2]=c[k>>2];c[f>>2]=c[j>>2];if((c[c[(c[g>>2]|0)+4>>2]>>2]|0)==2147483647){g=0;g=g&1;i=h;return g|0}if((c[c[(c[g>>2]|0)+8>>2]>>2]|0)==2147483647){g=0;g=g&1;i=h;return g|0}if((c[c[(c[g>>2]|0)+4>>2]>>2]|0)<2){g=0;g=g&1;i=h;return g|0}g=(ko(c[f>>2]|0,c[(c[g>>2]|0)+4>>2]|0,c[e>>2]|0)|0)!=0;g=g&1;i=h;return g|0}function ko(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;h=i;i=i+16|0;e=h+12|0;j=h+8|0;f=h+4|0;g=h;c[j>>2]=a;c[f>>2]=b;c[g>>2]=d;if(!(mc(c[(c[j>>2]|0)+8>>2]|0,c[(c[j>>2]|0)+12>>2]|0,c[(c[j>>2]|0)+16>>2]|0,c[f>>2]|0,1,c[g>>2]|0)|0)){c[e>>2]=0;a=c[e>>2]|0;i=h;return a|0}a=c[g>>2]|0;c[a>>2]=(c[a>>2]|0)+1;if((c[c[g>>2]>>2]|0)>=(c[c[f>>2]>>2]|0)){c[e>>2]=0;a=c[e>>2]|0;i=h;return a|0}else{c[e>>2]=1;a=c[e>>2]|0;i=h;return a|0}return 0}function lo(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,mo()|0);i=b;return}function mo(){var a=0,b=0;b=i;i=i+16|0;a=b;c[a>>2]=zd(8,14252)|0;i=b;return c[a>>2]|0}function no(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0;l=i;i=i+32|0;g=l+24|0;e=l+16|0;f=l+12|0;j=l+8|0;h=l+4|0;k=l;c[l+20>>2]=a;c[e>>2]=b;c[f>>2]=d;c[h>>2]=0;if(!(oo(c[e>>2]|0)|0)){c[g>>2]=0;b=c[g>>2]|0;i=l;return b|0}c[j>>2]=c[e>>2];if((c[(c[j>>2]|0)+28>>2]|0)==4?(a=c[f>>2]|0,b=Pd(c[(c[j>>2]|0)+8>>2]|0)|0,c[h>>2]=uc(a,Jn(b,c[(c[j>>2]|0)+20>>2]|0,c[(c[j>>2]|0)+12>>2]|0)|0)|0,(c[h>>2]|0)==0):0){c[g>>2]=0;b=c[g>>2]|0;i=l;return b|0}if(!(c[(c[j>>2]|0)+28>>2]|0))e=(c[(c[j>>2]|0)+12>>2]|0)==(c[(c[j>>2]|0)+20>>2]|0)?35:34;else e=36;c[k>>2]=rn(80,14264,e)|0;if(!(c[(c[j>>2]|0)+28>>2]|0))ke(c[(c[j>>2]|0)+8>>2]|0,(c[k>>2]|0)+64|0,(c[k>>2]|0)+68|0,(c[k>>2]|0)+72|0)|0;c[(c[k>>2]|0)+76>>2]=c[h>>2];e=c[k>>2]|0;if(!(c[(c[j>>2]|0)+28>>2]|0))hc((c[e+64>>2]|0)*3|0,(c[k>>2]|0)+8|0);else{b=e+8|0;a=(c[h>>2]|0)+8|0;c[b>>2]=c[a>>2];c[b+4>>2]=c[a+4>>2];c[b+8>>2]=c[a+8>>2];c[b+12>>2]=c[a+12>>2];c[b+16>>2]=c[a+16>>2];c[b+20>>2]=c[a+20>>2];c[b+24>>2]=c[a+24>>2];c[b+28>>2]=c[a+28>>2]}c[g>>2]=c[k>>2];b=c[g>>2]|0;i=l;return b|0}function oo(a){a=a|0;var b=0,d=0,e=0;d=i;i=i+16|0;e=d+4|0;b=d;c[e>>2]=a;c[b>>2]=c[e>>2];if(!(c[c[(c[b>>2]|0)+4>>2]>>2]|0))if((c[(c[b>>2]|0)+28>>2]|0)!=4)if((c[(c[b>>2]|0)+28>>2]|0)==0?(c[c[(c[b>>2]|0)+8>>2]>>2]|0)<=1:0)if((c[(c[b>>2]|0)+12>>2]|0)!=(c[(c[b>>2]|0)+20>>2]|0))b=1;else b=(dp(c[b>>2]|0,2147483647)|0)!=0;else b=0;else b=1;else b=0;i=d;return b&1|0}function po(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0;m=i;i=i+48|0;o=m+32|0;h=m+16|0;n=m+12|0;j=m+8|0;l=m+4|0;k=m;c[o>>2]=a;c[m+28>>2]=b;c[m+24>>2]=d;c[m+20>>2]=e;c[h>>2]=f;c[n>>2]=c[o>>2];c[l>>2]=c[(c[n>>2]|0)+64>>2];c[k>>2]=c[(c[n>>2]|0)+72>>2];c[j>>2]=4;while(1){if((c[j>>2]|0)>(c[l>>2]|0))break;g[c[h>>2]>>2]=0.0;c[h>>2]=(c[h>>2]|0)+(c[k>>2]<<2);g[c[h>>2]>>2]=0.0;c[h>>2]=(c[h>>2]|0)+(c[k>>2]<<2);g[c[h>>2]>>2]=0.0;c[h>>2]=(c[h>>2]|0)+(c[k>>2]<<2);g[c[h>>2]>>2]=0.0;c[h>>2]=(c[h>>2]|0)+(c[k>>2]<<2);c[j>>2]=(c[j>>2]|0)+4}while(1){if((c[j>>2]|0)>=((c[l>>2]|0)+4|0))break;g[c[h>>2]>>2]=0.0;c[h>>2]=(c[h>>2]|0)+(c[k>>2]<<2);c[j>>2]=(c[j>>2]|0)+1}i=m;return}function qo(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0;u=i;i=i+64|0;w=u+56|0;h=u+52|0;j=u+44|0;k=u+40|0;v=u+36|0;l=u+32|0;o=u+28|0;m=u+24|0;n=u+20|0;p=u+16|0;r=u+12|0;s=u+8|0;t=u+4|0;q=u;c[w>>2]=a;c[h>>2]=b;c[u+48>>2]=d;c[j>>2]=e;c[k>>2]=f;c[v>>2]=c[w>>2];c[o>>2]=c[(c[v>>2]|0)+64>>2];c[m>>2]=c[(c[v>>2]|0)+68>>2];c[n>>2]=c[(c[v>>2]|0)+72>>2];c[l>>2]=4;while(1){if((c[l>>2]|0)>(c[o>>2]|0))break;g[p>>2]=+g[c[h>>2]>>2];c[h>>2]=(c[h>>2]|0)+(c[m>>2]<<2);g[r>>2]=+g[c[h>>2]>>2];c[h>>2]=(c[h>>2]|0)+(c[m>>2]<<2);g[s>>2]=+g[c[h>>2]>>2];c[h>>2]=(c[h>>2]|0)+(c[m>>2]<<2);g[t>>2]=+g[c[h>>2]>>2];c[h>>2]=(c[h>>2]|0)+(c[m>>2]<<2);g[c[j>>2]>>2]=+g[p>>2];c[j>>2]=(c[j>>2]|0)+(c[n>>2]<<2);g[c[k>>2]>>2]=0.0;c[k>>2]=(c[k>>2]|0)+(c[n>>2]<<2);g[c[j>>2]>>2]=+g[r>>2];c[j>>2]=(c[j>>2]|0)+(c[n>>2]<<2);g[c[k>>2]>>2]=0.0;c[k>>2]=(c[k>>2]|0)+(c[n>>2]<<2);g[c[j>>2]>>2]=+g[s>>2];c[j>>2]=(c[j>>2]|0)+(c[n>>2]<<2);g[c[k>>2]>>2]=0.0;c[k>>2]=(c[k>>2]|0)+(c[n>>2]<<2);g[c[j>>2]>>2]=+g[t>>2];c[j>>2]=(c[j>>2]|0)+(c[n>>2]<<2);g[c[k>>2]>>2]=0.0;c[k>>2]=(c[k>>2]|0)+(c[n>>2]<<2);c[l>>2]=(c[l>>2]|0)+4}while(1){if((c[l>>2]|0)>=((c[o>>2]|0)+4|0))break;g[q>>2]=+g[c[h>>2]>>2];c[h>>2]=(c[h>>2]|0)+(c[m>>2]<<2);g[c[j>>2]>>2]=+g[q>>2];c[j>>2]=(c[j>>2]|0)+(c[n>>2]<<2);g[c[k>>2]>>2]=0.0;c[k>>2]=(c[k>>2]|0)+(c[n>>2]<<2);c[l>>2]=(c[l>>2]|0)+1}i=u;return}function ro(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0;g=i;i=i+32|0;m=g+24|0;h=g+20|0;j=g+12|0;l=g+4|0;k=g;c[m>>2]=a;c[h>>2]=b;c[g+16>>2]=d;c[j>>2]=e;c[g+8>>2]=f;c[l>>2]=c[m>>2];c[k>>2]=c[(c[l>>2]|0)+76>>2];eb[c[(c[k>>2]|0)+56>>2]&63](c[k>>2]|0,c[j>>2]|0,c[h>>2]|0);i=g;return}function so(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;f=i;i=i+16|0;g=f+8|0;d=f+4|0;e=f;c[g>>2]=a;c[d>>2]=b;c[e>>2]=c[g>>2];if(!(c[(c[e>>2]|0)+76>>2]|0)){i=f;return}rc(c[(c[e>>2]|0)+76>>2]|0,c[d>>2]|0);i=f;return}function to(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;g=i;i=i+32|0;f=g+8|0;e=g;j=g+20|0;d=g+16|0;h=g+12|0;c[j>>2]=a;c[d>>2]=b;c[h>>2]=c[j>>2];a=c[c[d>>2]>>2]|0;d=c[d>>2]|0;b=c[h>>2]|0;if(c[(c[h>>2]|0)+76>>2]|0){c[e>>2]=c[b+76>>2];eb[a&63](d,23324,e);i=g;return}else{c[f>>2]=c[b+64>>2];eb[a&63](d,23349,f);i=g;return}}function uo(a){a=a|0;var b=0,d=0,e=0;d=i;i=i+16|0;e=d+4|0;b=d;c[e>>2]=a;c[b>>2]=c[e>>2];if(!(c[(c[b>>2]|0)+76>>2]|0)){i=d;return}pc(c[(c[b>>2]|0)+76>>2]|0);i=d;return}function vo(a){a=a|0;var b=0,d=0,e=0,f=0;f=i;i=i+16|0;b=f+8|0;d=f+4|0;e=f;c[b>>2]=a;c[d>>2]=0;while(1){if((c[d>>2]|0)>>>0>=9)break;c[e>>2]=zd(20,14280)|0;c[(c[e>>2]|0)+8>>2]=c[14292+((c[d>>2]|0)*12|0)>>2];c[(c[e>>2]|0)+12>>2]=c[14292+((c[d>>2]|0)*12|0)+4>>2];c[(c[e>>2]|0)+16>>2]=c[14292+((c[d>>2]|0)*12|0)+8>>2];Bd(c[b>>2]|0,c[e>>2]|0);c[d>>2]=(c[d>>2]|0)+1}i=f;return}function wo(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;e=i;i=i+16|0;j=e+12|0;g=e+8|0;h=e+4|0;f=e;c[j>>2]=a;c[g>>2]=b;c[h>>2]=d;c[f>>2]=c[j>>2];Zy(c[h>>2]|0,c[g>>2]|0,c[(c[f>>2]|0)+64>>2]<<2|0)|0;i=e;return}function xo(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;d=e+4|0;f=e;c[d>>2]=a;c[f>>2]=b;if((c[(c[f>>2]|0)+12>>2]|0)==(c[(c[f>>2]|0)+16>>2]|0)){a=0;a=a&1;i=e;return a|0}if(c[(c[d>>2]|0)+68>>2]|0){a=0;a=a&1;i=e;return a|0}a=(c[(c[d>>2]|0)+64>>2]|0)>2;a=a&1;i=e;return a|0}function yo(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;e=i;i=i+16|0;j=e+12|0;g=e+8|0;f=e+4|0;h=e;c[j>>2]=a;c[g>>2]=b;c[f>>2]=d;c[h>>2]=c[j>>2];Po(c[(c[h>>2]|0)+64>>2]<<2,c[(c[h>>2]|0)+68>>2]|0,(c[h>>2]|0)+72|0,c[g>>2]|0,c[f>>2]|0);i=e;return}function zo(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;d=e+4|0;f=e;c[d>>2]=a;c[f>>2]=b;if((c[(c[f>>2]|0)+12>>2]|0)==(c[(c[f>>2]|0)+16>>2]|0)){a=0;a=a&1;i=e;return a|0}if((c[(c[d>>2]|0)+68>>2]|0)<=0){a=0;a=a&1;i=e;return a|0}a=(c[(c[d>>2]|0)+64>>2]|0)>2;a=a&1;i=e;return a|0}function Ao(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;h=i;i=i+16|0;j=h+12|0;e=h+8|0;f=h+4|0;g=h;c[j>>2]=a;c[e>>2]=b;c[f>>2]=d;c[g>>2]=c[j>>2];switch(c[(c[g>>2]|0)+68>>2]|0){case 0:{Fb(c[e>>2]|0,c[f>>2]|0,c[(c[g>>2]|0)+64>>2]|0,1,1,1);i=h;return}case 1:{Fb(c[e>>2]|0,c[f>>2]|0,c[(c[g>>2]|0)+72>>2]|0,c[(c[g>>2]|0)+72+4>>2]|0,c[(c[g>>2]|0)+72+8>>2]|0,c[(c[g>>2]|0)+64>>2]|0);i=h;return}default:{Oo((c[g>>2]|0)+72|0,c[(c[g>>2]|0)+68>>2]|0,c[(c[g>>2]|0)+64>>2]|0,c[e>>2]|0,c[f>>2]|0,20);i=h;return}}}function Bo(a,b){a=a|0;b=b|0;var d=0,e=0;e=i;i=i+16|0;d=e;c[e+4>>2]=a;c[d>>2]=b;i=e;return (c[(c[d>>2]|0)+12>>2]|0)!=(c[(c[d>>2]|0)+16>>2]|0)|0}function Co(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;e=i;i=i+16|0;j=e+12|0;g=e+8|0;f=e+4|0;h=e;c[j>>2]=a;c[g>>2]=b;c[f>>2]=d;c[h>>2]=c[j>>2];Oo((c[h>>2]|0)+72|0,c[(c[h>>2]|0)+68>>2]|0,c[(c[h>>2]|0)+64>>2]|0,c[g>>2]|0,c[f>>2]|0,21);i=e;return}function Do(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;f=i;i=i+16|0;d=f+8|0;g=f+4|0;e=f;c[d>>2]=a;c[g>>2]=b;c[e>>2]=c[(c[d>>2]|0)+68>>2];if(!((c[e>>2]|0)>=2?(c[(c[g>>2]|0)+12>>2]|0)!=(c[(c[g>>2]|0)+16>>2]|0):0)){e=0;e=e&1;i=f;return e|0}a=Tb(c[(c[d>>2]|0)+72+(((c[e>>2]|0)-2|0)*12|0)+4>>2]|0)|0;if((a|0)<=(Tb(c[(c[d>>2]|0)+72+(((c[e>>2]|0)-1|0)*12|0)+4>>2]|0)|0)){e=1;e=e&1;i=f;return e|0}a=Tb(c[(c[d>>2]|0)+72+(((c[e>>2]|0)-2|0)*12|0)+8>>2]|0)|0;e=(a|0)<=(Tb(c[(c[d>>2]|0)+72+(((c[e>>2]|0)-1|0)*12|0)+8>>2]|0)|0);e=e&1;i=f;return e|0}function Eo(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;e=i;i=i+16|0;j=e+12|0;g=e+8|0;f=e+4|0;h=e;c[j>>2]=a;c[g>>2]=b;c[f>>2]=d;c[h>>2]=c[j>>2];Oo((c[h>>2]|0)+72|0,c[(c[h>>2]|0)+68>>2]|0,c[(c[h>>2]|0)+64>>2]|0,c[g>>2]|0,c[f>>2]|0,22);i=e;return}function Fo(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;d=e+4|0;f=e;c[d>>2]=a;c[f>>2]=b;if((c[(c[f>>2]|0)+12>>2]|0)==(c[(c[f>>2]|0)+16>>2]|0)){a=0;a=a&1;i=e;return a|0}if((c[(c[d>>2]|0)+68>>2]|0)<2){a=0;a=a&1;i=e;return a|0}a=(me(c[(c[d>>2]|0)+64>>2]|0,1)|0)>4;a=a&1;i=e;return a|0}function Go(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;e=i;i=i+16|0;j=e+12|0;g=e+8|0;f=e+4|0;h=e;c[j>>2]=a;c[g>>2]=b;c[f>>2]=d;c[h>>2]=c[j>>2];Oo((c[h>>2]|0)+72|0,c[(c[h>>2]|0)+68>>2]|0,c[(c[h>>2]|0)+64>>2]|0,c[g>>2]|0,c[f>>2]|0,23);i=e;return}function Ho(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;h=e+12|0;f=e+8|0;g=e;c[h>>2]=a;c[f>>2]=b;c[e+4>>2]=d;c[g>>2]=c[h>>2];Mo((c[g>>2]|0)+72|0,c[(c[g>>2]|0)+68>>2]|0,c[(c[g>>2]|0)+64>>2]|0,c[f>>2]|0,37);i=e;return}function Io(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;d=e+4|0;f=e;c[d>>2]=a;c[f>>2]=b;if((c[(c[f>>2]|0)+12>>2]|0)==(c[(c[f>>2]|0)+16>>2]|0)?(c[(c[d>>2]|0)+68>>2]|0)>=2:0)d=(No(c[d>>2]|0)|0)!=0;else d=0;i=e;return d&1|0}function Jo(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;h=e+12|0;f=e+8|0;g=e;c[h>>2]=a;c[f>>2]=b;c[e+4>>2]=d;c[g>>2]=c[h>>2];Mo((c[g>>2]|0)+72|0,c[(c[g>>2]|0)+68>>2]|0,c[(c[g>>2]|0)+64>>2]|0,c[f>>2]|0,38);i=e;return}function Ko(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;d=e+4|0;f=e;c[d>>2]=a;c[f>>2]=b;if(!(Io(c[d>>2]|0,c[f>>2]|0)|0)){a=0;a=a&1;i=e;return a|0}a=(me(c[(c[d>>2]|0)+64>>2]|0,2)|0)>4;a=a&1;i=e;return a|0}function Lo(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;e=i;i=i+16|0;h=e+12|0;f=e+8|0;g=e;c[h>>2]=a;c[f>>2]=b;c[e+4>>2]=d;c[g>>2]=c[h>>2];Mo((c[g>>2]|0)+72|0,c[(c[g>>2]|0)+68>>2]|0,c[(c[g>>2]|0)+64>>2]|0,c[f>>2]|0,39);i=e;return}function Mo(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0;n=i;i=i+32|0;g=n+20|0;h=n+16|0;j=n+12|0;k=n+8|0;l=n+4|0;m=n;c[g>>2]=a;c[h>>2]=b;c[j>>2]=d;c[k>>2]=e;c[l>>2]=f;if((c[h>>2]|0)==2){Ya[c[l>>2]&63](c[k>>2]|0,c[c[g>>2]>>2]|0,c[(c[g>>2]|0)+4>>2]|0,c[(c[g>>2]|0)+8>>2]|0,c[j>>2]|0);i=n;return}c[m>>2]=0;while(1){if((c[m>>2]|0)>=(c[c[g>>2]>>2]|0))break;Mo((c[g>>2]|0)+12|0,(c[h>>2]|0)-1|0,c[j>>2]|0,c[k>>2]|0,c[l>>2]|0);c[m>>2]=(c[m>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[(c[g>>2]|0)+4>>2]<<2)}i=n;return}function No(a){a=a|0;var b=0,d=0,e=0,f=0,g=0;g=i;i=i+16|0;f=g+8|0;b=g+4|0;d=g;c[b>>2]=a;c[d>>2]=0;while(1){a=(c[b>>2]|0)+72+((c[d>>2]|0)*12|0)|0;if((c[d>>2]|0)>=((c[(c[b>>2]|0)+68>>2]|0)-2|0))break;if((c[a+4>>2]|0)!=(c[(c[b>>2]|0)+72+((c[d>>2]|0)*12|0)+8>>2]|0)){e=4;break}c[d>>2]=(c[d>>2]|0)+1}if((e|0)==4){c[f>>2]=0;e=c[f>>2]|0;i=g;return e|0}if((c[a>>2]|0)==(c[(c[b>>2]|0)+72+(((c[d>>2]|0)+1|0)*12|0)>>2]|0)?(c[(c[b>>2]|0)+72+((c[d>>2]|0)*12|0)+4>>2]|0)==(c[(c[b>>2]|0)+72+(((c[d>>2]|0)+1|0)*12|0)+8>>2]|0):0)a=(c[(c[b>>2]|0)+72+((c[d>>2]|0)*12|0)+8>>2]|0)==(c[(c[b>>2]|0)+72+(((c[d>>2]|0)+1|0)*12|0)+4>>2]|0);else a=0;c[f>>2]=a&1;e=c[f>>2]|0;i=g;return e|0}function Oo(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;p=i;i=i+32|0;h=p+24|0;j=p+20|0;k=p+16|0;l=p+12|0;m=p+8|0;n=p+4|0;o=p;c[h>>2]=a;c[j>>2]=b;c[k>>2]=d;c[l>>2]=e;c[m>>2]=f;c[n>>2]=g;if((c[j>>2]|0)==2){hb[c[n>>2]&127](c[l>>2]|0,c[m>>2]|0,c[c[h>>2]>>2]|0,c[(c[h>>2]|0)+4>>2]|0,c[(c[h>>2]|0)+8>>2]|0,c[(c[h>>2]|0)+12>>2]|0,c[(c[h>>2]|0)+12+4>>2]|0,c[(c[h>>2]|0)+12+8>>2]|0,c[k>>2]|0);i=p;return}c[o>>2]=0;while(1){if((c[o>>2]|0)>=(c[c[h>>2]>>2]|0))break;Oo((c[h>>2]|0)+12|0,(c[j>>2]|0)-1|0,c[k>>2]|0,c[l>>2]|0,c[m>>2]|0,c[n>>2]|0);c[o>>2]=(c[o>>2]|0)+1;c[l>>2]=(c[l>>2]|0)+(c[(c[h>>2]|0)+4>>2]<<2);c[m>>2]=(c[m>>2]|0)+(c[(c[h>>2]|0)+8>>2]<<2)}i=p;return}function Po(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;q=i;i=i+48|0;g=q+32|0;h=q+28|0;j=q+24|0;k=q+20|0;l=q+16|0;m=q+12|0;o=q+8|0;n=q+4|0;p=q;c[g>>2]=a;c[h>>2]=b;c[j>>2]=d;c[k>>2]=e;c[l>>2]=f;c[o>>2]=c[c[j>>2]>>2];c[n>>2]=c[(c[j>>2]|0)+4>>2];c[p>>2]=c[(c[j>>2]|0)+8>>2];if((c[h>>2]|0)==1){c[m>>2]=0;while(1){if((c[m>>2]|0)>=(c[o>>2]|0))break;Zy(c[l>>2]|0,c[k>>2]|0,c[g>>2]|0)|0;c[m>>2]=(c[m>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[n>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2)}i=q;return}else{c[h>>2]=(c[h>>2]|0)+-1;c[j>>2]=(c[j>>2]|0)+12;c[m>>2]=0;while(1){if((c[m>>2]|0)>=(c[o>>2]|0))break;Po(c[g>>2]|0,c[h>>2]|0,c[j>>2]|0,c[k>>2]|0,c[l>>2]|0);c[m>>2]=(c[m>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[n>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2)}i=q;return}}function Qo(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0;k=i;i=i+32|0;e=k+28|0;l=k+24|0;f=k+20|0;h=k+12|0;g=k+8|0;j=k+4|0;c[l>>2]=a;c[f>>2]=b;c[k+16>>2]=d;c[g>>2]=c[l>>2];if(Ro(c[g>>2]|0,c[f>>2]|0)|0){c[h>>2]=c[f>>2];c[j>>2]=sn(464,14400,c[(c[g>>2]|0)+8>>2]|0)|0;c[k>>2]=So(c[j>>2]|0,c[h>>2]|0)|0;c[(c[j>>2]|0)+456>>2]=c[(c[g>>2]|0)+16>>2];a=(ie(c[(c[h>>2]|0)+8>>2]|0)|0)<<1;hc(a,(c[j>>2]|0)+8|0);c[e>>2]=c[j>>2];a=c[e>>2]|0;i=k;return a|0}else{c[e>>2]=0;a=c[e>>2]|0;i=k;return a|0}return 0}function Ro(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;g=i;i=i+480|0;d=g+472|0;h=g+468|0;e=g+464|0;f=g;c[d>>2]=a;c[h>>2]=b;c[e>>2]=c[h>>2];if(c[c[(c[e>>2]|0)+4>>2]>>2]|0){b=0;b=b&1;i=g;return b|0}if((c[c[(c[e>>2]|0)+8>>2]>>2]|0)==2147483647){b=0;b=b&1;i=g;return b|0}if(!(So(f,c[e>>2]|0)|0)){b=0;b=b&1;i=g;return b|0}b=(jb[c[(c[d>>2]|0)+12>>2]&15](f,c[e>>2]|0)|0)!=0;b=b&1;i=g;return b|0}function So(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;h=i;i=i+32|0;g=h+16|0;d=h+12|0;j=h+8|0;e=h+4|0;f=h;c[d>>2]=a;c[j>>2]=b;c[f>>2]=c[(c[j>>2]|0)+8>>2];c[(c[d>>2]|0)+64>>2]=1;c[(c[d>>2]|0)+68>>2]=0;c[e>>2]=0;while(1){if((c[e>>2]|0)>=(c[c[f>>2]>>2]|0)){d=11;break}if(((c[(c[d>>2]|0)+64>>2]|0)==1?(c[(c[f>>2]|0)+4+((c[e>>2]|0)*12|0)+4>>2]|0)==1:0)?(c[(c[f>>2]|0)+4+((c[e>>2]|0)*12|0)+8>>2]|0)==1:0)c[(c[d>>2]|0)+64>>2]=c[(c[f>>2]|0)+4+((c[e>>2]|0)*12|0)>>2];else{if((c[(c[d>>2]|0)+68>>2]|0)==32){d=8;break}b=(c[d>>2]|0)+68|0;a=c[b>>2]|0;c[b>>2]=a+1;a=(c[d>>2]|0)+72+(a*12|0)|0;b=(c[f>>2]|0)+4+((c[e>>2]|0)*12|0)|0;c[a>>2]=c[b>>2];c[a+4>>2]=c[b+4>>2];c[a+8>>2]=c[b+8>>2]}c[e>>2]=(c[e>>2]|0)+1}if((d|0)==8){c[g>>2]=0;a=c[g>>2]|0;i=h;return a|0}else if((d|0)==11){c[g>>2]=1;a=c[g>>2]|0;i=h;return a|0}return 0}function To(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0;j=i;i=i+48|0;h=j+16|0;g=j+8|0;k=j;l=j+32|0;d=j+28|0;e=j+24|0;f=j+20|0;c[l>>2]=a;c[d>>2]=b;c[e>>2]=c[l>>2];b=c[c[d>>2]>>2]|0;a=c[d>>2]|0;l=c[(c[e>>2]|0)+64>>2]|0;c[k>>2]=c[(c[e>>2]|0)+456>>2];c[k+4>>2]=l;eb[b&63](a,23552,k);c[f>>2]=0;while(1){b=c[c[d>>2]>>2]|0;a=c[d>>2]|0;if((c[f>>2]|0)>=(c[(c[e>>2]|0)+68>>2]|0))break;c[g>>2]=c[(c[e>>2]|0)+72+((c[f>>2]|0)*12|0)>>2];eb[b&63](a,23559,g);c[f>>2]=(c[f>>2]|0)+1}eb[b&63](a,23707,h);i=j;return}function Uo(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,Vo()|0);i=b;return}function Vo(){var a=0,b=0;b=i;i=i+16|0;a=b;c[a>>2]=zd(8,14416)|0;i=b;return c[a>>2]|0}function Wo(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;o=i;i=i+48|0;k=o+32|0;p=o+28|0;e=o+24|0;f=o+20|0;n=o+16|0;m=o+12|0;g=o+8|0;l=o+4|0;j=o;c[p>>2]=a;c[e>>2]=b;c[f>>2]=d;if(!(Xo(c[p>>2]|0,c[e>>2]|0,c[f>>2]|0)|0)){c[k>>2]=0;m=c[k>>2]|0;i=o;return m|0}c[m>>2]=c[e>>2];if((c[(c[m>>2]|0)+20>>2]|0)!=0?(c[(c[f>>2]|0)+164>>2]&4096|0)!=0:0){c[j>>2]=Qd(c[(c[m>>2]|0)+4>>2]|0,1)|0;c[g>>2]=Hn(c[j>>2]|0,c[(c[m>>2]|0)+8>>2]|0,c[(c[m>>2]|0)+16>>2]|0,c[(c[m>>2]|0)+16>>2]|0,8)|0;he(c[j>>2]|0)}else c[g>>2]=Hn(c[(c[m>>2]|0)+4>>2]|0,c[(c[m>>2]|0)+8>>2]|0,c[(c[m>>2]|0)+12>>2]|0,c[(c[m>>2]|0)+16>>2]|0,8)|0;c[l>>2]=uc(c[f>>2]|0,c[g>>2]|0)|0;if(!(c[l>>2]|0)){c[k>>2]=0;m=c[k>>2]|0;i=o;return m|0}if(!(c[(c[m>>2]|0)+20>>2]|0))e=47;else e=(c[(c[f>>2]|0)+164>>2]&4096|0)!=0?46:45;c[n>>2]=sn(80,14428,e)|0;c[(c[n>>2]|0)+76>>2]=c[(c[(c[m>>2]|0)+4>>2]|0)+4>>2];c[(c[n>>2]|0)+68>>2]=c[(c[(c[m>>2]|0)+4>>2]|0)+4+4>>2];c[(c[n>>2]|0)+72>>2]=c[(c[(c[m>>2]|0)+4>>2]|0)+4+8>>2];c[(c[n>>2]|0)+64>>2]=c[l>>2];j=(c[n>>2]|0)+8|0;l=(c[l>>2]|0)+8|0;c[j>>2]=c[l>>2];c[j+4>>2]=c[l+4>>2];c[j+8>>2]=c[l+8>>2];c[j+12>>2]=c[l+12>>2];c[j+16>>2]=c[l+16>>2];c[j+20>>2]=c[l+20>>2];c[j+24>>2]=c[l+24>>2];c[j+28>>2]=c[l+28>>2];l=(c[n>>2]|0)+8+24|0;h[l>>3]=+h[l>>3]+ +((((c[(c[n>>2]|0)+76>>2]|0)-1|0)/2|0)<<2|0);l=(c[n>>2]|0)+8|0;h[l>>3]=+h[l>>3]+ +((((c[(c[n>>2]|0)+76>>2]|0)-1|0)/2|0)<<1|0);if(!(c[(c[m>>2]|0)+20>>2]|0)){m=(c[n>>2]|0)+8+8|0;h[m>>3]=+h[m>>3]+ +((((c[(c[n>>2]|0)+76>>2]|0)-1|0)/2|0)<<1|0)}if((c[(c[n>>2]|0)+56>>2]|0)==46){m=(c[n>>2]|0)+8+24|0;h[m>>3]=+h[m>>3]+ +(2+(((c[(c[n>>2]|0)+76>>2]|0)%2|0|0)!=0?0:2)|0)}c[k>>2]=c[n>>2];m=c[k>>2]|0;i=o;return m|0}function Xo(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;g=i;i=i+16|0;e=g+8|0;f=g+4|0;h=g;c[e>>2]=a;c[f>>2]=b;c[h>>2]=d;if(c[(c[h>>2]|0)+164>>2]&8){b=0;b=b&1;i=g;return b|0}b=(cp(c[e>>2]|0,c[f>>2]|0)|0)!=0;b=b&1;i=g;return b|0}function Yo(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0;q=i;i=i+48|0;r=q+40|0;e=q+36|0;f=q+32|0;l=q+28|0;n=q+24|0;p=q+20|0;m=q+16|0;o=q+12|0;h=q+8|0;j=q+4|0;k=q;c[r>>2]=a;c[e>>2]=b;c[f>>2]=d;c[l>>2]=c[r>>2];c[n>>2]=c[(c[l>>2]|0)+68>>2];c[p>>2]=c[(c[l>>2]|0)+72>>2];c[o>>2]=c[(c[l>>2]|0)+76>>2];g[c[f>>2]>>2]=+g[c[e>>2]>>2];c[m>>2]=1;while(1){if((c[m>>2]|0)>=((c[o>>2]|0)-(c[m>>2]|0)|0))break;a=_(c[n>>2]|0,c[m>>2]|0)|0;g[h>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];a=_(c[n>>2]|0,(c[o>>2]|0)-(c[m>>2]|0)|0)|0;g[j>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];a=_(c[p>>2]|0,c[m>>2]|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[h>>2]-+g[j>>2];a=_(c[p>>2]|0,(c[o>>2]|0)-(c[m>>2]|0)|0)|0;g[(c[f>>2]|0)+(a<<2)>>2]=+g[h>>2]+ +g[j>>2];c[m>>2]=(c[m>>2]|0)+1}if((c[m>>2]|0)!=((c[o>>2]|0)-(c[m>>2]|0)|0)){j=c[l>>2]|0;j=j+64|0;j=c[j>>2]|0;c[k>>2]=j;j=c[k>>2]|0;j=j+56|0;j=c[j>>2]|0;l=c[k>>2]|0;a=c[f>>2]|0;m=c[f>>2]|0;eb[j&63](l,a,m);i=q;return}a=_(c[n>>2]|0,c[m>>2]|0)|0;j=_(c[p>>2]|0,c[m>>2]|0)|0;g[(c[f>>2]|0)+(j<<2)>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];j=c[l>>2]|0;j=j+64|0;j=c[j>>2]|0;c[k>>2]=j;j=c[k>>2]|0;j=j+56|0;j=c[j>>2]|0;l=c[k>>2]|0;a=c[f>>2]|0;m=c[f>>2]|0;eb[j&63](l,a,m);i=q;return}function Zo(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;p=i;i=i+48|0;q=p+36|0;e=p+32|0;f=p+28|0;l=p+24|0;n=p+20|0;m=p+16|0;o=p+12|0;h=p+8|0;j=p+4|0;k=p;c[q>>2]=a;c[e>>2]=b;c[f>>2]=d;c[l>>2]=c[q>>2];c[n>>2]=c[(c[l>>2]|0)+68>>2];c[o>>2]=c[(c[l>>2]|0)+76>>2];c[m>>2]=1;while(1){if((c[m>>2]|0)>=((c[o>>2]|0)-(c[m>>2]|0)|0))break;a=_(c[n>>2]|0,c[m>>2]|0)|0;g[h>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];a=_(c[n>>2]|0,(c[o>>2]|0)-(c[m>>2]|0)|0)|0;g[j>>2]=+g[(c[e>>2]|0)+(a<<2)>>2];a=_(c[n>>2]|0,c[m>>2]|0)|0;g[(c[e>>2]|0)+(a<<2)>>2]=+g[h>>2]-+g[j>>2];a=_(c[n>>2]|0,(c[o>>2]|0)-(c[m>>2]|0)|0)|0;g[(c[e>>2]|0)+(a<<2)>>2]=+g[h>>2]+ +g[j>>2];c[m>>2]=(c[m>>2]|0)+1}c[k>>2]=c[(c[l>>2]|0)+64>>2];eb[c[(c[k>>2]|0)+56>>2]&63](c[k>>2]|0,c[e>>2]|0,c[f>>2]|0);i=p;return}function _o(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;m=i;i=i+48|0;q=m+36|0;o=m+32|0;e=m+28|0;n=m+24|0;l=m+20|0;j=m+16|0;k=m+12|0;p=m+8|0;f=m+4|0;h=m;c[q>>2]=a;c[o>>2]=b;c[e>>2]=d;c[n>>2]=c[q>>2];c[p>>2]=c[(c[n>>2]|0)+64>>2];eb[c[(c[p>>2]|0)+56>>2]&63](c[p>>2]|0,c[o>>2]|0,c[e>>2]|0);c[k>>2]=c[(c[n>>2]|0)+76>>2];c[l>>2]=c[(c[n>>2]|0)+72>>2];c[j>>2]=1;while(1){if((c[j>>2]|0)>=((c[k>>2]|0)-(c[j>>2]|0)|0))break;a=_(c[l>>2]|0,c[j>>2]|0)|0;g[f>>2]=+g[(c[e>>2]|0)+(a<<2)>>2]*.5;a=_(c[l>>2]|0,(c[k>>2]|0)-(c[j>>2]|0)|0)|0;g[h>>2]=+g[(c[e>>2]|0)+(a<<2)>>2]*.5;a=_(c[l>>2]|0,c[j>>2]|0)|0;g[(c[e>>2]|0)+(a<<2)>>2]=+g[f>>2]+ +g[h>>2];a=_(c[l>>2]|0,(c[k>>2]|0)-(c[j>>2]|0)|0)|0;g[(c[e>>2]|0)+(a<<2)>>2]=+g[h>>2]-+g[f>>2];c[j>>2]=(c[j>>2]|0)+1}i=m;return}function $o(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);i=d;return}function ap(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;d=i;i=i+32|0;e=d;f=d+20|0;g=d+16|0;h=d+12|0;c[f>>2]=a;c[g>>2]=b;c[h>>2]=c[f>>2];b=c[c[g>>2]>>2]|0;a=c[g>>2]|0;g=c[(c[h>>2]|0)+76>>2]|0;f=c[(c[h>>2]|0)+64>>2]|0;c[e>>2]=(c[(c[h>>2]|0)+56>>2]|0)==47?23580:23585;c[e+4>>2]=g;c[e+8>>2]=f;eb[b&63](a,23562,e);i=d;return}function bp(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function cp(a,b){a=a|0;b=b|0;var d=0,e=0,f=0;e=i;i=i+16|0;f=e+4|0;d=e;c[e+8>>2]=a;c[f>>2]=b;c[d>>2]=c[f>>2];if((c[c[(c[d>>2]|0)+4>>2]>>2]|0)!=1){b=0;b=b&1;i=e;return b|0}if(c[c[(c[d>>2]|0)+8>>2]>>2]|0){b=0;b=b&1;i=e;return b|0}if((c[(c[d>>2]|0)+20>>2]|0)!=0?(c[(c[d>>2]|0)+20>>2]|0)!=4:0){b=0;b=b&1;i=e;return b|0}b=(c[(c[(c[d>>2]|0)+4>>2]|0)+4>>2]|0)>2;b=b&1;i=e;return b|0}function dp(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0;n=i;i=i+32|0;m=n+28|0;d=n+24|0;e=n+20|0;f=n+16|0;g=n+12|0;k=n+8|0;h=n+4|0;j=n;c[d>>2]=a;c[e>>2]=b;c[j>>2]=0;while(1){if(((c[j>>2]|0)+1|0)>=(c[c[(c[d>>2]|0)+4>>2]>>2]|0))break;if((c[(c[(c[d>>2]|0)+4>>2]|0)+4+((c[j>>2]|0)*12|0)+4>>2]|0)!=(c[(c[(c[d>>2]|0)+4>>2]|0)+4+((c[j>>2]|0)*12|0)+8>>2]|0)){l=4;break}c[j>>2]=(c[j>>2]|0)+1}if((l|0)==4){c[m>>2]=0;l=c[m>>2]|0;i=n;return l|0}if((c[c[(c[d>>2]|0)+8>>2]>>2]|0)!=2147483647?(c[c[(c[d>>2]|0)+8>>2]>>2]|0)!=0:0){if((c[e>>2]|0)!=2147483647){if(!(c[c[(c[d>>2]|0)+4>>2]>>2]|0)){c[m>>2]=(c[(c[(c[d>>2]|0)+8>>2]|0)+4+((c[e>>2]|0)*12|0)+4>>2]|0)==(c[(c[(c[d>>2]|0)+8>>2]|0)+4+((c[e>>2]|0)*12|0)+8>>2]|0)&1;l=c[m>>2]|0;i=n;return l|0}c[f>>2]=ie(c[(c[d>>2]|0)+4>>2]|0)|0;c[g>>2]=_((c[f>>2]|0)/(c[(c[(c[d>>2]|0)+4>>2]|0)+4+(((c[c[(c[d>>2]|0)+4>>2]>>2]|0)-1|0)*12|0)>>2]|0)|0,((c[(c[(c[d>>2]|0)+4>>2]|0)+4+(((c[c[(c[d>>2]|0)+4>>2]>>2]|0)-1|0)*12|0)>>2]|0)/2|0)+1|0)|0;rp(c[(c[d>>2]|0)+28>>2]|0,(c[(c[d>>2]|0)+4>>2]|0)+4+((c[c[(c[d>>2]|0)+4>>2]>>2]|0)*12|0)+-12|0,k,h);if((c[(c[(c[d>>2]|0)+8>>2]|0)+4+((c[e>>2]|0)*12|0)+4>>2]|0)==(c[(c[(c[d>>2]|0)+8>>2]|0)+4+((c[e>>2]|0)*12|0)+8>>2]|0)){b=Tb(c[(c[(c[d>>2]|0)+8>>2]|0)+4+((c[e>>2]|0)*12|0)+8>>2]<<1)|0;l=c[g>>2]<<1;l=_(l,Tb(c[h>>2]|0)|0)|0;d=c[f>>2]|0;d=(b|0)>=(dc(l,_(d,Tb(c[k>>2]|0)|0)|0)|0)}else d=0;c[m>>2]=d&1;l=c[m>>2]|0;i=n;return l|0}c[e>>2]=0;while(1){if((c[e>>2]|0)>=(c[c[(c[d>>2]|0)+8>>2]>>2]|0)){l=15;break}if(!(dp(c[d>>2]|0,c[e>>2]|0)|0)){l=13;break}c[e>>2]=(c[e>>2]|0)+1}if((l|0)==13){c[m>>2]=0;l=c[m>>2]|0;i=n;return l|0}else if((l|0)==15){c[m>>2]=1;l=c[m>>2]|0;i=n;return l|0}}c[m>>2]=1;l=c[m>>2]|0;i=n;return l|0}function ep(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;a=c[d>>2]|0;Bd(a,fp()|0);i=b;return}function fp(){var a=0,b=0;b=i;i=i+16|0;a=b;c[a>>2]=zd(8,14444)|0;i=b;return c[a>>2]|0}function gp(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0.0;w=i;i=i+80|0;e=w+72|0;z=w+68|0;y=w+64|0;f=w+60|0;x=w+56|0;t=w+52|0;k=w+48|0;l=w+44|0;s=w+40|0;j=w+36|0;p=w+32|0;g=w+28|0;o=w+24|0;v=w+20|0;n=w+16|0;r=w+12|0;u=w+8|0;m=w+4|0;q=w;c[z>>2]=a;c[y>>2]=b;c[f>>2]=d;c[x>>2]=c[z>>2];c[k>>2]=0;c[l>>2]=0;c[s>>2]=c[y>>2];c[j>>2]=0;c[p>>2]=0;do if(hp(c[y>>2]|0,c[x>>2]|0,c[f>>2]|0)|0){c[o>>2]=c[(c[(c[s>>2]|0)+4>>2]|0)+4>>2];ke(c[(c[s>>2]|0)+8>>2]|0,v,n,r)|0;d=Bb(c[o>>2]|0,c[v>>2]|0,0)|0;c[p>>2]=dc(d,ip(c[s>>2]|0,c[o>>2]|0,c[v>>2]|0)|0)|0;c[g>>2]=Cb(c[o>>2]|0,c[v>>2]|0)|0;c[j>>2]=wb(_(c[p>>2]<<2,c[g>>2]|0)|0)|0;c[m>>2]=_(c[n>>2]|0,_(c[p>>2]|0,(c[v>>2]|0)/(c[p>>2]|0)|0)|0)|0;c[q>>2]=_(c[r>>2]|0,_(c[p>>2]|0,(c[v>>2]|0)/(c[p>>2]|0)|0)|0)|0;d=c[f>>2]|0;b=c[o>>2]|0;a=(c[(c[s>>2]|0)+4>>2]|0)+4|0;if(!(c[(c[s>>2]|0)+28>>2]|0)){x=Ed(b,(c[a+4>>2]|0)/2|0,1)|0;y=Ed(c[p>>2]|0,c[n>>2]|0,c[g>>2]|0)|0;y=Gn(x,y,c[(c[s>>2]|0)+12>>2]|0,c[j>>2]|0,(c[s>>2]|0)+28|0)|0;c[k>>2]=vc(d,y,0,0,(c[(c[s>>2]|0)+12>>2]|0)==(c[(c[s>>2]|0)+20>>2]|0)?4096:0)|0;if(!(c[k>>2]|0))break;xb(c[j>>2]|0);c[j>>2]=0;a=c[f>>2]|0;x=Pd(c[(c[s>>2]|0)+4>>2]|0)|0;y=Ed((c[v>>2]|0)%(c[p>>2]|0)|0,c[n>>2]|0,c[r>>2]|0)|0;c[l>>2]=uc(a,vn(x,y,(c[(c[s>>2]|0)+12>>2]|0)+(c[m>>2]<<2)|0,(c[(c[s>>2]|0)+16>>2]|0)+(c[m>>2]<<2)|0,(c[(c[s>>2]|0)+20>>2]|0)+(c[q>>2]<<2)|0,(c[(c[s>>2]|0)+24>>2]|0)+(c[q>>2]<<2)|0,c[(c[s>>2]|0)+28>>2]|0)|0)|0;if(!(c[l>>2]|0))break;c[t>>2]=rn(104,14456,40)|0}else{x=Ed(b,1,(c[a+8>>2]|0)/2|0)|0;y=Ed(c[p>>2]|0,c[g>>2]|0,c[r>>2]|0)|0;c[k>>2]=vc(d,Gn(x,y,c[j>>2]|0,c[(c[s>>2]|0)+12>>2]|0,(c[s>>2]|0)+28|0)|0,0,0,4096)|0;if(!(c[k>>2]|0))break;xb(c[j>>2]|0);c[j>>2]=0;a=c[f>>2]|0;x=Pd(c[(c[s>>2]|0)+4>>2]|0)|0;y=Ed((c[v>>2]|0)%(c[p>>2]|0)|0,c[n>>2]|0,c[r>>2]|0)|0;c[l>>2]=uc(a,vn(x,y,(c[(c[s>>2]|0)+12>>2]|0)+(c[q>>2]<<2)|0,(c[(c[s>>2]|0)+16>>2]|0)+(c[q>>2]<<2)|0,(c[(c[s>>2]|0)+20>>2]|0)+(c[m>>2]<<2)|0,(c[(c[s>>2]|0)+24>>2]|0)+(c[m>>2]<<2)|0,c[(c[s>>2]|0)+28>>2]|0)|0)|0;if(!(c[l>>2]|0))break;c[t>>2]=rn(104,14456,41)|0}c[(c[t>>2]|0)+64>>2]=c[k>>2];c[(c[t>>2]|0)+68>>2]=c[l>>2];c[(c[t>>2]|0)+72>>2]=c[o>>2];c[(c[t>>2]|0)+76>>2]=c[v>>2];c[(c[t>>2]|0)+92>>2]=c[n>>2];c[(c[t>>2]|0)+96>>2]=c[r>>2];rp(c[(c[s>>2]|0)+28>>2]|0,(c[(c[s>>2]|0)+4>>2]|0)+4|0,u,(c[t>>2]|0)+88|0);c[(c[t>>2]|0)+80>>2]=c[p>>2];c[(c[t>>2]|0)+84>>2]=c[g>>2];ic((c[v>>2]|0)/(c[p>>2]|0)|0,(c[k>>2]|0)+8|0,(c[l>>2]|0)+8|0,(c[t>>2]|0)+8|0);y=c[o>>2]|0;A=+(_((c[(c[s>>2]|0)+28>>2]|0)==0?y+2|0:y,c[v>>2]|0)|0);y=(c[t>>2]|0)+8+24|0;h[y>>3]=+h[y>>3]+A;c[e>>2]=c[t>>2];y=c[e>>2]|0;i=w;return y|0}while(0);yb(c[j>>2]|0);pc(c[l>>2]|0);pc(c[k>>2]|0);c[e>>2]=0;y=c[e>>2]|0;i=w;return y|0}function hp(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;k=i;i=i+32|0;e=k+16|0;f=k+12|0;g=k+8|0;h=k+4|0;j=k;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;if(c[(c[h>>2]|0)+164>>2]&1024){c[e>>2]=0;h=c[e>>2]|0;i=k;return h|0}if(!(qp(c[f>>2]|0,c[g>>2]|0,c[h>>2]|0)|0)){c[e>>2]=0;h=c[e>>2]|0;i=k;return h|0}c[j>>2]=c[f>>2];if(c[(c[h>>2]|0)+164>>2]&65536){if((c[(c[j>>2]|0)+12>>2]|0)!=(c[(c[j>>2]|0)+20>>2]|0)){c[e>>2]=0;h=c[e>>2]|0;i=k;return h|0}if(Db(c[(c[(c[j>>2]|0)+4>>2]|0)+4>>2]|0)|0){c[e>>2]=0;h=c[e>>2]|0;i=k;return h|0}}c[e>>2]=1;h=c[e>>2]|0;i=k;return h|0}function ip(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;p=i;i=i+48|0;e=p+36|0;f=p+32|0;g=p+28|0;h=p+24|0;j=p+20|0;l=p+16|0;k=p+12|0;m=p+8|0;o=p+4|0;n=p;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;if((c[(c[f>>2]|0)+12>>2]|0)!=(c[(c[f>>2]|0)+20>>2]|0)){c[e>>2]=1;a=c[e>>2]|0;i=p;return a|0}if(dp(c[f>>2]|0,2147483647)|0){c[e>>2]=1;a=c[e>>2]|0;i=p;return a|0}rp(c[(c[f>>2]|0)+28>>2]|0,(c[(c[f>>2]|0)+4>>2]|0)+4|0,j,l);rp(c[(c[f>>2]|0)+28>>2]|0,(c[(c[f>>2]|0)+8>>2]|0)+4|0,k,m);a=c[g>>2]|0;a=_(a,Tb(c[j>>2]|0)|0)|0;do if((a|0)<=(Tb(c[k>>2]|0)|0)?(a=((c[g>>2]|0)/2|0)+1|0,a=_(a,Tb(c[l>>2]|0)|0)|0,(a|0)<=(Tb(c[m>>2]|0)|0)):0){a=((c[(c[f>>2]|0)+20>>2]|0)-(c[(c[f>>2]|0)+24>>2]|0)|0)/4|0;if((a|0)<=(Tb(c[l>>2]|0)|0)){if(!((c[k>>2]|0)>0&(c[m>>2]|0)>0))break}else{a=((c[(c[f>>2]|0)+24>>2]|0)-(c[(c[f>>2]|0)+20>>2]|0)|0)/4|0;a=(a|0)<=(Tb(c[l>>2]|0)|0);if(!(a&(c[k>>2]|0)>0&(c[m>>2]|0)>0))break}c[o>>2]=ec(c[k>>2]|0,c[m>>2]|0)|0;c[n>>2]=dc(c[k>>2]|0,c[m>>2]|0)|0;a=_((c[n>>2]|0)-(c[o>>2]|0)|0,c[h>>2]|0)|0;c[e>>2]=(a+(c[o>>2]|0)-1|0)/(c[o>>2]|0)|0;a=c[e>>2]|0;i=p;return a|0}while(0);c[e>>2]=c[h>>2];a=c[e>>2]|0;i=p;return a|0}function jp(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0;y=i;i=i+80|0;z=y+68|0;g=y+64|0;h=y+60|0;j=y+56|0;k=y+52|0;p=y+48|0;n=y+44|0;q=y+40|0;s=y+36|0;x=y+32|0;u=y+28|0;l=y+24|0;t=y+20|0;r=y+16|0;w=y+12|0;v=y+8|0;m=y+4|0;o=y;c[z>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[k>>2]=f;c[p>>2]=c[z>>2];c[n>>2]=c[(c[p>>2]|0)+64>>2];c[x>>2]=c[(c[p>>2]|0)+76>>2];c[u>>2]=c[(c[p>>2]|0)+80>>2];c[l>>2]=c[(c[p>>2]|0)+84>>2];c[t>>2]=c[(c[p>>2]|0)+72>>2];c[r>>2]=c[(c[p>>2]|0)+92>>2];c[w>>2]=c[(c[p>>2]|0)+96>>2];c[v>>2]=c[(c[p>>2]|0)+88>>2];c[m>>2]=wb(_(c[u>>2]<<2,c[l>>2]|0)|0)|0;c[q>>2]=c[u>>2];while(1){if((c[q>>2]|0)>(c[x>>2]|0))break;eb[c[(c[n>>2]|0)+56>>2]&63](c[n>>2]|0,c[g>>2]|0,c[m>>2]|0);d=_(c[r>>2]|0,c[u>>2]|0)|0;c[g>>2]=(c[g>>2]|0)+(d<<2);d=_(c[r>>2]|0,c[u>>2]|0)|0;c[h>>2]=(c[h>>2]|0)+(d<<2);c[s>>2]=0;while(1){if((c[s>>2]|0)>=(c[u>>2]|0))break;d=(c[m>>2]|0)+((_(c[s>>2]|0,c[l>>2]|0)|0)<<2)|0;mp(c[t>>2]|0,d,c[j>>2]|0,c[k>>2]|0,c[v>>2]|0);c[s>>2]=(c[s>>2]|0)+1;c[j>>2]=(c[j>>2]|0)+(c[w>>2]<<2);c[k>>2]=(c[k>>2]|0)+(c[w>>2]<<2)}c[q>>2]=(c[q>>2]|0)+(c[u>>2]|0)}xb(c[m>>2]|0);c[o>>2]=c[(c[p>>2]|0)+68>>2];Ya[c[(c[o>>2]|0)+56>>2]&63](c[o>>2]|0,c[g>>2]|0,c[h>>2]|0,c[j>>2]|0,c[k>>2]|0);i=y;return}function kp(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0;y=i;i=i+80|0;z=y+68|0;g=y+64|0;h=y+60|0;j=y+56|0;k=y+52|0;p=y+48|0;n=y+44|0;q=y+40|0;t=y+36|0;x=y+32|0;v=y+28|0;l=y+24|0;u=y+20|0;s=y+16|0;w=y+12|0;r=y+8|0;m=y+4|0;o=y;c[z>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[k>>2]=f;c[p>>2]=c[z>>2];c[n>>2]=c[(c[p>>2]|0)+64>>2];c[x>>2]=c[(c[p>>2]|0)+76>>2];c[v>>2]=c[(c[p>>2]|0)+80>>2];c[l>>2]=c[(c[p>>2]|0)+84>>2];c[u>>2]=c[(c[p>>2]|0)+72>>2];c[s>>2]=c[(c[p>>2]|0)+92>>2];c[w>>2]=c[(c[p>>2]|0)+96>>2];c[r>>2]=c[(c[p>>2]|0)+88>>2];c[m>>2]=wb(_(c[v>>2]<<2,c[l>>2]|0)|0)|0;c[q>>2]=c[v>>2];while(1){if((c[q>>2]|0)>(c[x>>2]|0))break;c[t>>2]=0;while(1){if((c[t>>2]|0)>=(c[v>>2]|0))break;lp(c[u>>2]|0,c[j>>2]|0,c[k>>2]|0,c[r>>2]|0,(c[m>>2]|0)+((_(c[t>>2]|0,c[l>>2]|0)|0)<<2)|0);c[t>>2]=(c[t>>2]|0)+1;c[j>>2]=(c[j>>2]|0)+(c[s>>2]<<2);c[k>>2]=(c[k>>2]|0)+(c[s>>2]<<2)}eb[c[(c[n>>2]|0)+56>>2]&63](c[n>>2]|0,c[m>>2]|0,c[g>>2]|0);d=_(c[w>>2]|0,c[v>>2]|0)|0;c[g>>2]=(c[g>>2]|0)+(d<<2);d=_(c[w>>2]|0,c[v>>2]|0)|0;c[h>>2]=(c[h>>2]|0)+(d<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]|0)}xb(c[m>>2]|0);c[o>>2]=c[(c[p>>2]|0)+68>>2];Ya[c[(c[o>>2]|0)+56>>2]&63](c[o>>2]|0,c[g>>2]|0,c[h>>2]|0,c[j>>2]|0,c[k>>2]|0);i=y;return}function lp(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0;o=i;i=i+32|0;h=o+20|0;j=o+16|0;k=o+12|0;l=o+8|0;m=o+4|0;n=o;c[h>>2]=a;c[j>>2]=b;c[k>>2]=d;c[l>>2]=e;c[m>>2]=f;g[c[m>>2]>>2]=+g[c[j>>2]>>2];c[n>>2]=1;while(1){d=c[n>>2]|0;if(((c[n>>2]|0)+(c[n>>2]|0)|0)>=(c[h>>2]|0))break;b=_(d,c[l>>2]|0)|0;g[(c[m>>2]|0)+(c[n>>2]<<2)>>2]=+g[(c[j>>2]|0)+(b<<2)>>2];b=_(c[n>>2]|0,c[l>>2]|0)|0;g[(c[m>>2]|0)+((c[h>>2]|0)-(c[n>>2]|0)<<2)>>2]=+g[(c[k>>2]|0)+(b<<2)>>2];c[n>>2]=(c[n>>2]|0)+1}if((d+(c[n>>2]|0)|0)!=(c[h>>2]|0)){i=o;return}l=_(c[n>>2]|0,c[l>>2]|0)|0;g[(c[m>>2]|0)+(c[n>>2]<<2)>>2]=+g[(c[j>>2]|0)+(l<<2)>>2];i=o;return}function mp(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0;o=i;i=i+32|0;h=o+20|0;j=o+16|0;k=o+12|0;l=o+8|0;m=o+4|0;n=o;c[h>>2]=a;c[j>>2]=b;c[k>>2]=d;c[l>>2]=e;c[m>>2]=f;g[c[k>>2]>>2]=+g[c[j>>2]>>2];g[c[l>>2]>>2]=0.0;c[n>>2]=1;while(1){e=c[n>>2]|0;if(((c[n>>2]|0)+(c[n>>2]|0)|0)>=(c[h>>2]|0))break;d=_(c[n>>2]|0,c[m>>2]|0)|0;g[(c[k>>2]|0)+(d<<2)>>2]=+g[(c[j>>2]|0)+(e<<2)>>2];d=_(c[n>>2]|0,c[m>>2]|0)|0;g[(c[l>>2]|0)+(d<<2)>>2]=+g[(c[j>>2]|0)+((c[h>>2]|0)-(c[n>>2]|0)<<2)>>2];c[n>>2]=(c[n>>2]|0)+1}if((e+(c[n>>2]|0)|0)!=(c[h>>2]|0)){i=o;return}h=_(c[n>>2]|0,c[m>>2]|0)|0;g[(c[k>>2]|0)+(h<<2)>>2]=+g[(c[j>>2]|0)+(c[n>>2]<<2)>>2];m=_(c[n>>2]|0,c[m>>2]|0)|0;g[(c[l>>2]|0)+(m<<2)>>2]=0.0;i=o;return}function np(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+68>>2]|0,c[e>>2]|0);i=d;return}function op(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0;d=i;i=i+48|0;e=d;k=d+36|0;l=d+32|0;m=d+28|0;c[k>>2]=a;c[l>>2]=b;c[m>>2]=c[k>>2];b=c[c[l>>2]>>2]|0;a=c[l>>2]|0;l=c[(c[m>>2]|0)+72>>2]|0;k=c[(c[m>>2]|0)+80>>2]|0;j=c[(c[m>>2]|0)+76>>2]|0;h=(c[(c[m>>2]|0)+84>>2]|0)%(c[(c[m>>2]|0)+72>>2]|0)|0;g=c[(c[m>>2]|0)+64>>2]|0;f=c[(c[m>>2]|0)+68>>2]|0;c[e>>2]=(c[(c[m>>2]|0)+56>>2]|0)==40?23580:23585;c[e+4>>2]=l;c[e+8>>2]=k;c[e+12>>2]=j;c[e+16>>2]=h;c[e+20>>2]=g;c[e+24>>2]=f;eb[b&63](a,23590,e);i=d;return}function pp(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+68>>2]|0);pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function qp(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;g=i;i=i+16|0;h=g+12|0;e=g+4|0;f=g;c[h>>2]=a;c[g+8>>2]=b;c[e>>2]=d;c[f>>2]=c[h>>2];if((c[c[(c[f>>2]|0)+8>>2]>>2]|0)>1){e=0;e=e&1;i=g;return e|0}if((c[c[(c[f>>2]|0)+4>>2]>>2]|0)!=1){e=0;e=e&1;i=g;return e|0}if((c[(c[f>>2]|0)+28>>2]|0)!=0?(c[(c[f>>2]|0)+28>>2]|0)!=4:0){e=0;e=e&1;i=g;return e|0}b=(c[(c[f>>2]|0)+4>>2]|0)+4|0;if(!(c[(c[f>>2]|0)+28>>2]|0))b=c[b+4>>2]|0;else b=c[b+8>>2]|0;if(((((c[(c[f>>2]|0)+16>>2]|0)-(c[(c[f>>2]|0)+12>>2]|0)|0)/4|0)<<1|0)!=(b|0)){e=0;e=e&1;i=g;return e|0}if(Db(c[(c[(c[f>>2]|0)+4>>2]|0)+4>>2]|0)|0)b=(c[(c[e>>2]|0)+164>>2]&16384|0)!=0;else b=0;e=b^1;e=e&1;i=g;return e|0}function rp(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;j=i;i=i+16|0;k=j+12|0;f=j+8|0;g=j+4|0;h=j;c[k>>2]=a;c[f>>2]=b;c[g>>2]=d;c[h>>2]=e;e=c[f>>2]|0;if(!(c[k>>2]|0)){c[c[g>>2]>>2]=c[e+4>>2];c[c[h>>2]>>2]=c[(c[f>>2]|0)+8>>2];i=j;return}else{c[c[g>>2]>>2]=c[e+8>>2];c[c[h>>2]>>2]=c[(c[f>>2]|0)+4>>2];i=j;return}}function sp(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0;m=i;i=i+32|0;d=m+28|0;e=m+24|0;f=m+20|0;h=m+16|0;k=m+12|0;l=m+8|0;g=m+4|0;j=m;c[d>>2]=a;c[e>>2]=b;c[h>>2]=0;c[f>>2]=0;while(1){if(((c[f>>2]|0)+1|0)>=(c[c[d>>2]>>2]|0))break;c[k>>2]=(c[d>>2]|0)+4+((c[f>>2]|0)*12|0);b=(c[c[k>>2]>>2]|0)-1|0;a=Tb(c[(c[k>>2]|0)+4>>2]|0)|0;a=_(b,dc(a,Tb(c[(c[k>>2]|0)+8>>2]|0)|0)|0)|0;c[h>>2]=(c[h>>2]|0)+a;c[f>>2]=(c[f>>2]|0)+1}if((c[f>>2]|0)>=(c[c[d>>2]>>2]|0)){a=c[h>>2]|0;i=m;return a|0}c[l>>2]=(c[d>>2]|0)+4+((c[f>>2]|0)*12|0);rp(c[e>>2]|0,c[l>>2]|0,g,j);k=(c[c[l>>2]>>2]|0)-1|0;k=_(k,Tb(c[g>>2]|0)|0)|0;a=(c[c[l>>2]>>2]|0)/2|0;a=dc(k,_(a,Tb(c[j>>2]|0)|0)|0)|0;c[h>>2]=(c[h>>2]|0)+a;a=c[h>>2]|0;i=m;return a|0}function tp(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;d=i;i=i+16|0;f=d+12|0;h=d+8|0;g=d+4|0;e=d;c[f>>2]=a;c[h>>2]=b;c[g>>2]=c[f>>2];c[e>>2]=c[h>>2];Ya[c[(c[g>>2]|0)+56>>2]&63](c[f>>2]|0,c[(c[e>>2]|0)+12>>2]|0,c[(c[e>>2]|0)+16>>2]|0,c[(c[e>>2]|0)+20>>2]|0,c[(c[e>>2]|0)+24>>2]|0);i=d;return}function up(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0;d=i;i=i+16|0;f=d+12|0;h=d+8|0;g=d+4|0;e=d;c[f>>2]=a;c[h>>2]=b;c[g>>2]=c[f>>2];c[e>>2]=c[h>>2];eb[c[(c[g>>2]|0)+56>>2]&63](c[f>>2]|0,c[(c[e>>2]|0)+12>>2]|0,c[(c[e>>2]|0)+16>>2]|0);i=d;return}function vp(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;b=e+8|0;d=e+4|0;c[b>>2]=a;c[e>>2]=2;c[d>>2]=0;while(1){if((c[d>>2]|0)>=2)break;a=c[b>>2]|0;Bd(a,wp(c[14472+(c[d>>2]<<2)>>2]|0,14472,2)|0);c[d>>2]=(c[d>>2]|0)+1}i=e;return}function wp(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;f=i;i=i+16|0;j=f+12|0;h=f+8|0;g=f+4|0;e=f;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[e>>2]=zd(20,14480)|0;c[(c[e>>2]|0)+8>>2]=c[j>>2];c[(c[e>>2]|0)+12>>2]=c[h>>2];c[(c[e>>2]|0)+16>>2]=c[g>>2];i=f;return c[e>>2]|0}function xp(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0;r=i;i=i+48|0;e=r+44|0;s=r+40|0;f=r+36|0;g=r+32|0;m=r+28|0;n=r+24|0;o=r+20|0;j=r+16|0;q=r+12|0;l=r+8|0;p=r+4|0;k=r;c[s>>2]=a;c[f>>2]=b;c[g>>2]=d;c[m>>2]=c[s>>2];if(!(yp(c[s>>2]|0,c[f>>2]|0,c[g>>2]|0,q)|0)){c[e>>2]=0;b=c[e>>2]|0;i=r;return b|0}c[n>>2]=c[f>>2];c[l>>2]=(c[(c[n>>2]|0)+8>>2]|0)+4+((c[q>>2]|0)*12|0);rp(c[(c[n>>2]|0)+28>>2]|0,c[l>>2]|0,p,k);f=c[g>>2]|0;a=Pd(c[(c[n>>2]|0)+4>>2]|0)|0;b=Rd(c[(c[n>>2]|0)+8>>2]|0,c[q>>2]|0)|0;c[j>>2]=uc(f,vn(a,b,c[(c[n>>2]|0)+12>>2]|0,c[(c[n>>2]|0)+16>>2]|0,c[(c[n>>2]|0)+20>>2]|0,c[(c[n>>2]|0)+24>>2]|0,c[(c[n>>2]|0)+28>>2]|0)|0)|0;if(!(c[j>>2]|0)){c[e>>2]=0;b=c[e>>2]|0;i=r;return b|0}c[o>>2]=rn(88,14492,42)|0;c[(c[o>>2]|0)+64>>2]=c[j>>2];c[(c[o>>2]|0)+68>>2]=c[c[l>>2]>>2];c[(c[o>>2]|0)+72>>2]=c[p>>2];c[(c[o>>2]|0)+76>>2]=c[k>>2];c[(c[o>>2]|0)+80>>2]=c[m>>2];fc((c[o>>2]|0)+8|0);h[(c[o>>2]|0)+8+24>>3]=3.14159;lc(c[(c[o>>2]|0)+68>>2]|0,(c[j>>2]|0)+8|0,(c[o>>2]|0)+8|0);if(!((c[c[(c[n>>2]|0)+4>>2]>>2]|0)==1?(c[(c[(c[n>>2]|0)+4>>2]|0)+4>>2]|0)<=128:0))h[(c[o>>2]|0)+40>>3]=+(c[(c[o>>2]|0)+68>>2]|0)*+h[(c[j>>2]|0)+40>>3];c[e>>2]=c[o>>2];b=c[e>>2]|0;i=r;return b|0}function yp(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;n=i;i=i+32|0;f=n+28|0;o=n+24|0;g=n+20|0;h=n+16|0;j=n+12|0;l=n+8|0;m=n+4|0;k=n;c[o>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[l>>2]=c[o>>2];if(!(Dp(c[o>>2]|0,c[g>>2]|0,c[j>>2]|0)|0)){c[f>>2]=0;b=c[f>>2]|0;i=n;return b|0}if((c[(c[h>>2]|0)+164>>2]&256|0)!=0?(c[(c[l>>2]|0)+8>>2]|0)!=(c[c[(c[l>>2]|0)+12>>2]>>2]|0):0){c[f>>2]=0;b=c[f>>2]|0;i=n;return b|0}if(c[(c[h>>2]|0)+164>>2]&65536){c[m>>2]=c[g>>2];c[k>>2]=(c[(c[m>>2]|0)+8>>2]|0)+4+((c[c[j>>2]>>2]|0)*12|0);if((c[c[(c[m>>2]|0)+4>>2]>>2]|0)>1?(b=Tb(c[(c[k>>2]|0)+4>>2]|0)|0,b=ec(b,Tb(c[(c[k>>2]|0)+8>>2]|0)|0)|0,(b|0)<(sp(c[(c[m>>2]|0)+4>>2]|0,c[(c[m>>2]|0)+28>>2]|0)|0)):0){c[f>>2]=0;b=c[f>>2]|0;i=n;return b|0}if((c[c[(c[m>>2]|0)+4>>2]>>2]|0)==0?(c[c[(c[m>>2]|0)+8>>2]>>2]|0)==1:0){c[f>>2]=0;b=c[f>>2]|0;i=n;return b|0}if((c[(c[h>>2]|0)+164>>2]&512|0)!=0?(c[(c[h>>2]|0)+160>>2]|0)>1:0){c[f>>2]=0;b=c[f>>2]|0;i=n;return b|0}}c[f>>2]=1;b=c[f>>2]|0;i=n;return b|0}function zp(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0;r=i;i=i+48|0;s=r+40|0;g=r+36|0;h=r+32|0;j=r+28|0;k=r+24|0;n=r+20|0;o=r+16|0;q=r+12|0;p=r+8|0;m=r+4|0;l=r;c[s>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[k>>2]=f;c[n>>2]=c[s>>2];c[q>>2]=c[(c[n>>2]|0)+68>>2];c[p>>2]=c[(c[n>>2]|0)+72>>2];c[m>>2]=c[(c[n>>2]|0)+76>>2];c[l>>2]=c[(c[(c[n>>2]|0)+64>>2]|0)+56>>2];c[o>>2]=0;while(1){if((c[o>>2]|0)>=(c[q>>2]|0))break;b=(c[g>>2]|0)+((_(c[o>>2]|0,c[p>>2]|0)|0)<<2)|0;f=(c[h>>2]|0)+((_(c[o>>2]|0,c[p>>2]|0)|0)<<2)|0;e=(c[j>>2]|0)+((_(c[o>>2]|0,c[m>>2]|0)|0)<<2)|0;a=(c[k>>2]|0)+((_(c[o>>2]|0,c[m>>2]|0)|0)<<2)|0;Ya[c[l>>2]&63](c[(c[n>>2]|0)+64>>2]|0,b,f,e,a);c[o>>2]=(c[o>>2]|0)+1}i=r;return}function Ap(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);i=d;return}function Bp(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;j=d+24|0;f=d+20|0;h=d+16|0;g=d+12|0;c[j>>2]=a;c[f>>2]=b;c[h>>2]=c[j>>2];c[g>>2]=c[(c[h>>2]|0)+80>>2];b=c[c[f>>2]>>2]|0;a=c[f>>2]|0;g=c[(c[g>>2]|0)+8>>2]|0;f=c[(c[h>>2]|0)+64>>2]|0;c[e>>2]=c[(c[h>>2]|0)+68>>2];c[e+4>>2]=g;c[e+8>>2]=f;eb[b&63](a,23629,e);i=d;return}function Cp(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function Dp(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0;j=i;i=i+32|0;e=j+20|0;l=j+16|0;k=j+12|0;f=j+8|0;g=j+4|0;h=j;c[l>>2]=a;c[k>>2]=b;c[f>>2]=d;c[g>>2]=c[l>>2];c[h>>2]=c[k>>2];if(((c[c[(c[h>>2]|0)+8>>2]>>2]|0)!=2147483647?(c[c[(c[h>>2]|0)+8>>2]>>2]|0)>0:0)?(Ep(c[g>>2]|0,c[(c[h>>2]|0)+8>>2]|0,(c[(c[h>>2]|0)+12>>2]|0)!=(c[(c[h>>2]|0)+20>>2]|0)&1,c[f>>2]|0)|0)!=0:0)if((c[(c[h>>2]|0)+12>>2]|0)!=(c[(c[h>>2]|0)+20>>2]|0)){c[e>>2]=1;g=c[e>>2]|0;i=j;return g|0}else{c[e>>2]=dp(c[h>>2]|0,c[c[f>>2]>>2]|0)|0;g=c[e>>2]|0;i=j;return g|0}c[e>>2]=0;g=c[e>>2]|0;i=j;return g|0}function Ep(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;f=i;i=i+16|0;k=f+12|0;j=f+8|0;h=f+4|0;g=f;c[k>>2]=a;c[j>>2]=b;c[h>>2]=d;c[g>>2]=e;e=mc(c[(c[k>>2]|0)+8>>2]|0,c[(c[k>>2]|0)+12>>2]|0,c[(c[k>>2]|0)+16>>2]|0,c[j>>2]|0,c[h>>2]|0,c[g>>2]|0)|0;i=f;return e|0}function Fp(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;b=e+8|0;d=e+4|0;c[b>>2]=a;c[e>>2]=2;c[d>>2]=0;while(1){if((c[d>>2]|0)>=2)break;a=c[b>>2]|0;Bd(a,Gp(c[14508+(c[d>>2]<<2)>>2]|0,14508,2)|0);c[d>>2]=(c[d>>2]|0)+1}i=e;return}function Gp(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;f=i;i=i+16|0;j=f+12|0;h=f+8|0;g=f+4|0;e=f;c[j>>2]=a;c[h>>2]=b;c[g>>2]=d;c[e>>2]=zd(20,14516)|0;c[(c[e>>2]|0)+8>>2]=c[j>>2];c[(c[e>>2]|0)+12>>2]=c[h>>2];c[(c[e>>2]|0)+16>>2]=c[g>>2];i=f;return c[e>>2]|0}function Hp(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;p=i;i=i+48|0;e=p+36|0;q=p+32|0;f=p+28|0;g=p+24|0;l=p+20|0;m=p+16|0;n=p+12|0;j=p+8|0;o=p+4|0;k=p;c[q>>2]=a;c[f>>2]=b;c[g>>2]=d;c[l>>2]=c[q>>2];if(!(Ip(c[q>>2]|0,c[f>>2]|0,c[g>>2]|0,o)|0)){c[e>>2]=0;n=c[e>>2]|0;i=p;return n|0}c[m>>2]=c[f>>2];c[k>>2]=(c[(c[m>>2]|0)+8>>2]|0)+4+((c[o>>2]|0)*12|0);f=c[g>>2]|0;a=Pd(c[(c[m>>2]|0)+4>>2]|0)|0;b=Rd(c[(c[m>>2]|0)+8>>2]|0,c[o>>2]|0)|0;c[j>>2]=uc(f,Gn(a,b,c[(c[m>>2]|0)+12>>2]|0,c[(c[m>>2]|0)+16>>2]|0,(c[m>>2]|0)+20|0)|0)|0;if(!(c[j>>2]|0)){c[e>>2]=0;n=c[e>>2]|0;i=p;return n|0}c[n>>2]=sn(88,14528,48)|0;c[(c[n>>2]|0)+64>>2]=c[j>>2];c[(c[n>>2]|0)+68>>2]=c[c[k>>2]>>2];c[(c[n>>2]|0)+72>>2]=c[(c[k>>2]|0)+4>>2];c[(c[n>>2]|0)+76>>2]=c[(c[k>>2]|0)+8>>2];c[(c[n>>2]|0)+80>>2]=c[l>>2];fc((c[n>>2]|0)+8|0);h[(c[n>>2]|0)+8+24>>3]=3.14159;lc(c[(c[n>>2]|0)+68>>2]|0,(c[j>>2]|0)+8|0,(c[n>>2]|0)+8|0);if(!((c[c[(c[m>>2]|0)+4>>2]>>2]|0)==1?(c[(c[(c[m>>2]|0)+4>>2]|0)+4>>2]|0)<=128:0))h[(c[n>>2]|0)+40>>3]=+(c[(c[n>>2]|0)+68>>2]|0)*+h[(c[j>>2]|0)+40>>3];c[e>>2]=c[n>>2];n=c[e>>2]|0;i=p;return n|0}function Ip(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;n=i;i=i+32|0;f=n+28|0;o=n+24|0;g=n+20|0;h=n+16|0;j=n+12|0;l=n+8|0;m=n+4|0;k=n;c[o>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;c[l>>2]=c[o>>2];if(!(Np(c[o>>2]|0,c[g>>2]|0,c[j>>2]|0)|0)){c[f>>2]=0;b=c[f>>2]|0;i=n;return b|0}if((c[(c[h>>2]|0)+164>>2]&256|0)!=0?(c[(c[l>>2]|0)+8>>2]|0)!=(c[c[(c[l>>2]|0)+12>>2]>>2]|0):0){c[f>>2]=0;b=c[f>>2]|0;i=n;return b|0}c[m>>2]=c[g>>2];if(c[(c[h>>2]|0)+164>>2]&65536){if((c[(c[h>>2]|0)+164>>2]&8|0)!=0?(c[c[(c[m>>2]|0)+4>>2]>>2]|0)==0:0){c[f>>2]=0;b=c[f>>2]|0;i=n;return b|0}c[k>>2]=(c[(c[m>>2]|0)+8>>2]|0)+4+((c[c[j>>2]>>2]|0)*12|0);if((c[c[(c[m>>2]|0)+4>>2]>>2]|0)>1?(b=Tb(c[(c[k>>2]|0)+4>>2]|0)|0,b=ec(b,Tb(c[(c[k>>2]|0)+8>>2]|0)|0)|0,(b|0)<(Hd(c[(c[m>>2]|0)+4>>2]|0)|0)):0){c[f>>2]=0;b=c[f>>2]|0;i=n;return b|0}if((c[(c[h>>2]|0)+164>>2]&512|0)!=0?(c[(c[h>>2]|0)+160>>2]|0)>1:0){c[f>>2]=0;b=c[f>>2]|0;i=n;return b|0}if((((c[c[(c[m>>2]|0)+8>>2]>>2]|0)==1?(c[c[(c[m>>2]|0)+4>>2]>>2]|0)==1:0)?(c[(c[m>>2]|0)+20>>2]|0)>>>0>=9:0)?(c[(c[m>>2]|0)+20>>2]|0)>>>0<=16:0){c[f>>2]=0;b=c[f>>2]|0;i=n;return b|0}}c[f>>2]=1;b=c[f>>2]|0;i=n;return b|0}function Jp(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;n=i;i=i+48|0;o=n+32|0;e=n+28|0;f=n+24|0;h=n+20|0;j=n+16|0;m=n+12|0;k=n+8|0;l=n+4|0;g=n;c[o>>2]=a;c[e>>2]=b;c[f>>2]=d;c[h>>2]=c[o>>2];c[m>>2]=c[(c[h>>2]|0)+68>>2];c[k>>2]=c[(c[h>>2]|0)+72>>2];c[l>>2]=c[(c[h>>2]|0)+76>>2];c[g>>2]=c[(c[(c[h>>2]|0)+64>>2]|0)+56>>2];c[j>>2]=0;while(1){if((c[j>>2]|0)>=(c[m>>2]|0))break;b=(c[e>>2]|0)+((_(c[j>>2]|0,c[k>>2]|0)|0)<<2)|0;a=(c[f>>2]|0)+((_(c[j>>2]|0,c[l>>2]|0)|0)<<2)|0;eb[c[g>>2]&63](c[(c[h>>2]|0)+64>>2]|0,b,a);c[j>>2]=(c[j>>2]|0)+1}i=n;return}function Kp(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+64>>2]|0,c[e>>2]|0);i=d;return}function Lp(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0;d=i;i=i+32|0;e=d;j=d+24|0;f=d+20|0;h=d+16|0;g=d+12|0;c[j>>2]=a;c[f>>2]=b;c[h>>2]=c[j>>2];c[g>>2]=c[(c[h>>2]|0)+80>>2];b=c[c[f>>2]>>2]|0;a=c[f>>2]|0;g=c[(c[g>>2]|0)+8>>2]|0;f=c[(c[h>>2]|0)+64>>2]|0;c[e>>2]=c[(c[h>>2]|0)+68>>2];c[e+4>>2]=g;c[e+8>>2]=f;eb[b&63](a,23659,e);i=d;return}function Mp(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+64>>2]|0);i=b;return}function Np(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0;h=i;i=i+32|0;k=h+16|0;j=h+12|0;e=h+8|0;f=h+4|0;g=h;c[k>>2]=a;c[j>>2]=b;c[e>>2]=d;c[f>>2]=c[k>>2];c[g>>2]=c[j>>2];if((c[c[(c[g>>2]|0)+8>>2]>>2]|0)==2147483647){a=0;a=a&1;i=h;return a|0}if((c[c[(c[g>>2]|0)+8>>2]>>2]|0)<=0){a=0;a=a&1;i=h;return a|0}if((c[c[(c[g>>2]|0)+4>>2]>>2]|0)<0){a=0;a=a&1;i=h;return a|0}a=(Op(c[f>>2]|0,c[(c[g>>2]|0)+8>>2]|0,(c[(c[g>>2]|0)+12>>2]|0)!=(c[(c[g>>2]|0)+16>>2]|0)&1,c[e>>2]|0)|0)!=0;a=a&1;i=h;return a|0}function Op(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;f=i;i=i+16|0;k=f+12|0;j=f+8|0;h=f+4|0;g=f;c[k>>2]=a;c[j>>2]=b;c[h>>2]=d;c[g>>2]=e;e=mc(c[(c[k>>2]|0)+8>>2]|0,c[(c[k>>2]|0)+12>>2]|0,c[(c[k>>2]|0)+16>>2]|0,c[j>>2]|0,c[h>>2]|0,c[g>>2]|0)|0;i=f;return e|0}function Pp(a){a=a|0;var b=0,d=0,e=0;e=i;i=i+16|0;b=e+4|0;d=e;c[b>>2]=a;c[d>>2]=0;while(1){if((c[d>>2]|0)>>>0>=3)break;a=c[b>>2]|0;Bd(a,Qp(c[14544+(c[d>>2]<<2)>>2]|0)|0);c[d>>2]=(c[d>>2]|0)+1}i=e;return}function Qp(a){a=a|0;var b=0,d=0,e=0;d=i;i=i+16|0;e=d+4|0;b=d;c[e>>2]=a;c[b>>2]=zd(12,14556)|0;c[(c[b>>2]|0)+8>>2]=c[e>>2];i=d;return c[b>>2]|0}function Rp(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;p=i;i=i+48|0;o=p+44|0;q=p+40|0;e=p+36|0;f=p+32|0;k=p+28|0;m=p+24|0;g=p+20|0;h=p+16|0;j=p+12|0;l=p+8|0;n=p;c[q>>2]=a;c[e>>2]=b;c[f>>2]=d;c[k>>2]=c[q>>2];if(!(Sp(c[q>>2]|0,c[e>>2]|0,c[f>>2]|0,g,h,j,l)|0)){c[o>>2]=0;b=c[o>>2]|0;i=p;return b|0}c[m>>2]=c[e>>2];c[n>>2]=sn(120,14568,c[c[(c[k>>2]|0)+8>>2]>>2]|0)|0;c[(c[n>>2]|0)+64>>2]=c[(c[(c[m>>2]|0)+8>>2]|0)+4+((c[g>>2]|0)*12|0)>>2];c[(c[n>>2]|0)+68>>2]=c[(c[(c[m>>2]|0)+8>>2]|0)+4+((c[h>>2]|0)*12|0)>>2];Tp(c[m>>2]|0,c[j>>2]|0,(c[n>>2]|0)+72|0,p+4|0);c[(c[n>>2]|0)+76>>2]=c[l>>2];b=Up(c[(c[n>>2]|0)+64>>2]|0,c[(c[n>>2]|0)+68>>2]|0)|0;c[(c[n>>2]|0)+88>>2]=b;c[(c[n>>2]|0)+80>>2]=(c[(c[n>>2]|0)+64>>2]|0)/(c[(c[n>>2]|0)+88>>2]|0)|0;c[(c[n>>2]|0)+84>>2]=(c[(c[n>>2]|0)+68>>2]|0)/(c[(c[n>>2]|0)+88>>2]|0)|0;c[(c[n>>2]|0)+112>>2]=c[k>>2];fc((c[n>>2]|0)+8|0);c[(c[n>>2]|0)+108>>2]=0;c[(c[n>>2]|0)+104>>2]=0;c[(c[n>>2]|0)+100>>2]=0;b=(Va[c[(c[(c[k>>2]|0)+8>>2]|0)+8>>2]&63](c[m>>2]|0,c[f>>2]|0,c[n>>2]|0)|0)!=0;e=c[n>>2]|0;if(b){c[o>>2]=e;b=c[o>>2]|0;i=p;return b|0}else{pc(e);c[o>>2]=0;b=c[o>>2]|0;i=p;return b|0}return 0}function Sp(a,b,d,e,f,g,h){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;var j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0;q=i;i=i+48|0;s=q+32|0;r=q+28|0;j=q+24|0;k=q+20|0;l=q+16|0;m=q+12|0;n=q+8|0;o=q+4|0;p=q;c[s>>2]=a;c[r>>2]=b;c[j>>2]=d;c[k>>2]=e;c[l>>2]=f;c[m>>2]=g;c[n>>2]=h;c[o>>2]=c[s>>2];c[p>>2]=c[r>>2];if((c[(c[p>>2]|0)+12>>2]|0)!=(c[(c[p>>2]|0)+16>>2]|0)){a=0;a=a&1;i=q;return a|0}if(c[c[(c[p>>2]|0)+4>>2]>>2]|0){a=0;a=a&1;i=q;return a|0}if((c[c[(c[p>>2]|0)+8>>2]>>2]|0)!=2?(c[c[(c[p>>2]|0)+8>>2]>>2]|0)!=3:0){a=0;a=a&1;i=q;return a|0}if(!(Yp(c[(c[p>>2]|0)+8>>2]|0,c[k>>2]|0,c[l>>2]|0,c[m>>2]|0)|0)){a=0;a=a&1;i=q;return a|0}if(((c[(c[j>>2]|0)+164>>2]&65536|0)!=0?(c[c[(c[p>>2]|0)+8>>2]>>2]|0)!=2:0)?(g=Tb(c[(c[(c[p>>2]|0)+8>>2]|0)+4+((c[c[m>>2]>>2]|0)*12|0)+4>>2]|0)|0,a=Tb(c[(c[(c[p>>2]|0)+8>>2]|0)+4+((c[c[k>>2]>>2]|0)*12|0)+4>>2]|0)|0,(g|0)>=(dc(a,Tb(c[(c[(c[p>>2]|0)+8>>2]|0)+4+((c[c[k>>2]>>2]|0)*12|0)+8>>2]|0)|0)|0)):0){a=0;a=a&1;i=q;return a|0}if((c[(c[j>>2]|0)+164>>2]&8|0)!=0?(c[(c[(c[p>>2]|0)+8>>2]|0)+4+((c[c[k>>2]>>2]|0)*12|0)>>2]|0)!=(c[(c[(c[p>>2]|0)+8>>2]|0)+4+((c[c[l>>2]>>2]|0)*12|0)>>2]|0):0){a=0;a=a&1;i=q;return a|0}if(!(Za[c[(c[(c[o>>2]|0)+8>>2]|0)+4>>2]&3](c[p>>2]|0,c[j>>2]|0,c[c[k>>2]>>2]|0,c[c[l>>2]>>2]|0,c[c[m>>2]>>2]|0,c[n>>2]|0)|0)){a=0;a=a&1;i=q;return a|0}if((c[(c[j>>2]|0)+164>>2]&65536|0)==0?(c[(c[j>>2]|0)+164>>2]&16384|0)==0:0){a=1;a=a&1;i=q;return a|0}if((c[c[n>>2]>>2]|0)<=65536){a=1;a=a&1;i=q;return a|0}a=(c[c[n>>2]>>2]|0)*9|0;a=(a|0)<=(ie(c[(c[p>>2]|0)+8>>2]|0)|0);a=a&1;i=q;return a|0}function Tp(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;k=i;i=i+16|0;f=k+12|0;g=k+8|0;h=k+4|0;j=k;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;if((c[c[(c[f>>2]|0)+8>>2]>>2]|0)==2){c[c[h>>2]>>2]=1;c[c[j>>2]>>2]=1;i=k;return}else{c[c[h>>2]>>2]=c[(c[(c[f>>2]|0)+8>>2]|0)+4+((c[g>>2]|0)*12|0)>>2];c[c[j>>2]>>2]=c[(c[(c[f>>2]|0)+8>>2]|0)+4+((c[g>>2]|0)*12|0)+4>>2];i=k;return}}function Up(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;g=i;i=i+16|0;d=g+8|0;e=g+4|0;f=g;c[d>>2]=a;c[e>>2]=b;do{c[f>>2]=(c[d>>2]|0)%(c[e>>2]|0)|0;c[d>>2]=c[e>>2];c[e>>2]=c[f>>2]}while((c[f>>2]|0)!=0);i=g;return c[d>>2]|0}function Vp(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0;d=i;i=i+16|0;g=d+8|0;e=d+4|0;f=d;c[g>>2]=a;c[e>>2]=b;c[f>>2]=c[g>>2];rc(c[(c[f>>2]|0)+100>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+104>>2]|0,c[e>>2]|0);rc(c[(c[f>>2]|0)+108>>2]|0,c[e>>2]|0);i=d;return}function Wp(a,b){a=a|0;b=b|0;var d=0,e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;k=i;i=i+64|0;g=k+40|0;f=k+32|0;j=k+24|0;h=k+16|0;l=k;o=k+52|0;d=k+48|0;e=k+44|0;c[o>>2]=a;c[d>>2]=b;c[e>>2]=c[o>>2];a=c[c[d>>2]>>2]|0;b=c[d>>2]|0;o=c[(c[e>>2]|0)+64>>2]|0;n=c[(c[e>>2]|0)+68>>2]|0;m=c[(c[e>>2]|0)+72>>2]|0;c[l>>2]=c[(c[(c[(c[e>>2]|0)+112>>2]|0)+8>>2]|0)+12>>2];c[l+4>>2]=o;c[l+8>>2]=n;c[l+12>>2]=m;eb[a&63](b,23688,l);if(c[(c[e>>2]|0)+100>>2]|0){a=c[c[d>>2]>>2]|0;l=c[d>>2]|0;c[h>>2]=c[(c[e>>2]|0)+100>>2];eb[a&63](l,23700,h)}if(c[(c[e>>2]|0)+104>>2]|0){h=c[c[d>>2]>>2]|0;l=c[d>>2]|0;c[j>>2]=c[(c[e>>2]|0)+104>>2];eb[h&63](l,23700,j)}if(!(c[(c[e>>2]|0)+108>>2]|0)){j=c[d>>2]|0;j=c[j>>2]|0;l=c[d>>2]|0;eb[j&63](l,23707,g);i=k;return}l=c[c[d>>2]>>2]|0;j=c[d>>2]|0;c[f>>2]=c[(c[e>>2]|0)+108>>2];eb[l&63](j,23700,f);j=c[d>>2]|0;j=c[j>>2]|0;l=c[d>>2]|0;eb[j&63](l,23707,g);i=k;return}function Xp(a){a=a|0;var b=0,d=0,e=0;b=i;i=i+16|0;e=b+4|0;d=b;c[e>>2]=a;c[d>>2]=c[e>>2];pc(c[(c[d>>2]|0)+108>>2]|0);pc(c[(c[d>>2]|0)+104>>2]|0);pc(c[(c[d>>2]|0)+100>>2]|0);i=b;return}function Yp(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0;o=i;i=i+32|0;g=o+28|0;f=o+24|0;h=o+20|0;j=o+16|0;k=o+12|0;l=o+8|0;m=o+4|0;n=o;c[f>>2]=a;c[h>>2]=b;c[j>>2]=d;c[k>>2]=e;c[l>>2]=0;a:while(1){if((c[l>>2]|0)>=(c[c[f>>2]>>2]|0)){d=16;break}c[m>>2]=0;while(1){d=c[l>>2]|0;if((c[m>>2]|0)>=(c[c[f>>2]>>2]|0))break;c[n>>2]=3-d-(c[m>>2]|0);do if((c[l>>2]|0)!=(c[m>>2]|0)){if((c[c[f>>2]>>2]|0)!=2?(c[(c[f>>2]|0)+4+((c[n>>2]|0)*12|0)+4>>2]|0)!=(c[(c[f>>2]|0)+4+((c[n>>2]|0)*12|0)+8>>2]|0):0)break;if((c[c[f>>2]>>2]|0)==2)d=1;else d=c[(c[f>>2]|0)+4+((c[n>>2]|0)*12|0)>>2]|0;if((c[c[f>>2]>>2]|0)==2)b=1;else b=c[(c[f>>2]|0)+4+((c[n>>2]|0)*12|0)+4>>2]|0;if(Zp((c[f>>2]|0)+4+((c[l>>2]|0)*12|0)|0,(c[f>>2]|0)+4+((c[m>>2]|0)*12|0)|0,d,b)|0){d=13;break a}}while(0);c[m>>2]=(c[m>>2]|0)+1}c[l>>2]=d+1}if((d|0)==13){c[c[h>>2]>>2]=c[l>>2];c[c[j>>2]>>2]=c[m>>2];c[c[k>>2]>>2]=c[n>>2];c[g>>2]=1;l=c[g>>2]|0;i=o;return l|0}else if((d|0)==16){c[g>>2]=0;l=c[g>>2]|0;i=o;return l|0}return 0}function Zp(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;k=i;i=i+16|0;f=k+12|0;g=k+8|0;h=k+4|0;j=k;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;c[j>>2]=e;if(((c[c[f>>2]>>2]|0)==(c[c[g>>2]>>2]|0)?(c[(c[f>>2]|0)+8>>2]|0)==(c[(c[g>>2]|0)+4>>2]|0):0)?(c[(c[f>>2]|0)+4>>2]|0)==(c[(c[g>>2]|0)+8>>2]|0):0){j=1;j=j&1;i=k;return j|0}j=(_p(c[f>>2]|0,c[g>>2]|0,c[h>>2]|0,c[j>>2]|0)|0)!=0;j=j&1;i=k;return j|0}function _p(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0;j=i;i=i+16|0;f=j+12|0;g=j+8|0;h=j+4|0;k=j;c[f>>2]=a;c[g>>2]=b;c[h>>2]=d;c[k>>2]=e;if((c[k>>2]|0)!=1){a=0;a=a&1;i=j;return a|0}if((c[(c[g>>2]|0)+4>>2]|0)!=(c[h>>2]|0)){a=0;a=a&1;i=j;return a|0}if((c[(c[f>>2]|0)+8>>2]|0)!=(c[h>>2]|0)){a=0;a=a&1;i=j;return a|0}if((((c[c[f>>2]>>2]|0)==(c[c[g>>2]>>2]|0)?(c[(c[f>>2]|0)+4>>2]|0)==(c[(c[g>>2]|0)+8>>2]|0):0)?(c[(c[f>>2]|0)+4>>2]|0)>=(c[c[g>>2]>>2]|0):0)?((c[(c[f>>2]|0)+4>>2]|0)%(c[h>>2]|0)|0|0)==0:0){a=1;a=a&1;i=j;return a|0}if((c[(c[f>>2]|0)+4>>2]|0)!=(_(c[c[g>>2]>>2]|0,c[h>>2]|0)|0)){a=0;a=a&1;i=j;return a|0}a=(c[(c[g>>2]|0)+8>>2]|0)==(_(c[c[f>>2]>>2]|0,c[h>>2]|0)|0);a=a&1;i=j;return a|0}function $p(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0;e=i;i=i+32|0;m=e+28|0;k=e+24|0;l=e+16|0;h=e+12|0;g=e+8|0;j=e+4|0;f=e;c[m>>2]=a;c[k>>2]=b;c[e+20>>2]=d;c[l>>2]=c[m>>2];c[h>>2]=c[(c[l>>2]|0)+64>>2];c[g>>2]=c[(c[l>>2]|0)+68>>2];c[j>>2]=c[(c[l>>2]|0)+72>>2];c[f>>2]=wb(c[(c[l>>2]|0)+76>>2]<<2)|0;cq(c[k>>2]|0,c[h>>2]|0,c[g>>2]|0,c[j>>2]|0,(c[f>>2]|0)+(c[j>>2]<<1<<2)|0,((c[h>>2]|0)+(c[g>>2]|0)|0)/2|0,c[f>>2]|0);xb(c[f>>2]|0);i=e;return}function aq(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0;q=i;i=i+48|0;h=q+36|0;j=q+32|0;k=q+28|0;l=q+24|0;s=q+20|0;r=q+16|0;n=q+12|0;m=q+8|0;o=q+4|0;p=q;c[h>>2]=a;c[j>>2]=b;c[k>>2]=d;c[l>>2]=e;c[s>>2]=f;c[r>>2]=g;c[n>>2]=c[(c[(c[h>>2]|0)+8>>2]|0)+4+((c[k>>2]|0)*12|0)>>2];c[m>>2]=c[(c[(c[h>>2]|0)+8>>2]|0)+4+((c[l>>2]|0)*12|0)>>2];Tp(c[h>>2]|0,c[s>>2]|0,o,p);c[c[r>>2]>>2]=(c[o>>2]<<1)+((((((c[n>>2]|0)+(c[m>>2]|0)|0)/2|0)+4-1|0)>>>0)/4|0);if(c[(c[j>>2]|0)+164>>2]&8){g=0;g=g&1;i=q;return g|0}if((c[o>>2]|0)<=8?(c[(c[j>>2]|0)+164>>2]&65536|0)!=0:0){g=0;g=g&1;i=q;return g|0}if((c[n>>2]|0)==(c[m>>2]|0)){g=0;g=g&1;i=q;return g|0}g=(_p((c[(c[h>>2]|0)+8>>2]|0)+4+((c[k>>2]|0)*12|0)|0,(c[(c[h>>2]|0)+8>>2]|0)+4+((c[l>>2]|0)*12|0)|0,c[o>>2]|0,c[p>>2]|0)|0)!=0;g=g&1;i=q;return g|0}function bq(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0.0,g=0;e=i;i=i+16|0;g=e;c[e+8>>2]=a;c[e+4>>2]=b;c[g>>2]=d;d=(_(c[(c[g>>2]|0)+64>>2]|0,c[(c[g>>2]|0)+68>>2]|0)|0)<<1;f=+(_(d,(c[(c[g>>2]|0)+72>>2]|0)+30|0)|0);d=(c[g>>2]|0)+8+24|0;h[d>>3]=+h[d>>3]+f;i=e;return 1}function cq(b,d,e,f,h,j,k){b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;var l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0;G=i;i=i+96|0;l=G+80|0;m=G+76|0;n=G+72|0;o=G+68|0;p=G+64|0;q=G+60|0;H=G+56|0;u=G+52|0;z=G+48|0;D=G+44|0;r=G+40|0;s=G+36|0;t=G+32|0;E=G+28|0;A=G+24|0;v=G+20|0;x=G+16|0;w=G+12|0;y=G+8|0;B=G+4|0;C=G;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[o>>2]=f;c[p>>2]=h;c[q>>2]=j;c[H>>2]=k;c[r>>2]=c[H>>2];c[s>>2]=(c[H>>2]|0)+(c[o>>2]<<2);c[E>>2]=2;h=_(c[n>>2]|0,c[m>>2]|0)|0;c[D>>2]=h;c[A>>2]=h-1;c[u>>2]=0;while(1){if((c[u>>2]|0)>=(c[q>>2]|0))break;a[(c[p>>2]|0)+(c[u>>2]|0)>>0]=0;c[u>>2]=(c[u>>2]|0)+1}if((c[n>>2]|0)>=3&(c[m>>2]|0)>=3){h=(Up((c[n>>2]|0)-1|0,(c[m>>2]|0)-1|0)|0)-1|0;c[E>>2]=(c[E>>2]|0)+h}c[u>>2]=1;c[z>>2]=c[n>>2];a:while(1){c[v>>2]=c[u>>2];c[B>>2]=(c[A>>2]|0)-(c[u>>2]|0);c[w>>2]=c[B>>2];switch(c[o>>2]|0){case 1:{g[c[r>>2]>>2]=+g[(c[l>>2]|0)+(c[v>>2]<<2)>>2];g[c[s>>2]>>2]=+g[(c[l>>2]|0)+(c[w>>2]<<2)>>2];break}case 2:{g[c[r>>2]>>2]=+g[(c[l>>2]|0)+(c[v>>2]<<1<<2)>>2];g[(c[r>>2]|0)+4>>2]=+g[(c[l>>2]|0)+((c[v>>2]<<1)+1<<2)>>2];g[c[s>>2]>>2]=+g[(c[l>>2]|0)+(c[w>>2]<<1<<2)>>2];g[(c[s>>2]|0)+4>>2]=+g[(c[l>>2]|0)+((c[w>>2]<<1)+1<<2)>>2];break}default:{h=_(c[o>>2]|0,c[v>>2]|0)|0;Zy(c[r>>2]|0,(c[l>>2]|0)+(h<<2)|0,c[o>>2]<<2|0)|0;h=_(c[o>>2]|0,c[w>>2]|0)|0;Zy(c[s>>2]|0,(c[l>>2]|0)+(h<<2)|0,c[o>>2]<<2|0)|0}}while(1){h=_(c[n>>2]|0,c[v>>2]|0)|0;c[x>>2]=h-(_(c[A>>2]|0,(c[v>>2]|0)/(c[m>>2]|0)|0)|0);c[y>>2]=(c[A>>2]|0)-(c[x>>2]|0);if((c[v>>2]|0)<(c[q>>2]|0))a[(c[p>>2]|0)+(c[v>>2]|0)>>0]=1;if((c[w>>2]|0)<(c[q>>2]|0))a[(c[p>>2]|0)+(c[w>>2]|0)>>0]=1;c[E>>2]=(c[E>>2]|0)+2;if((c[x>>2]|0)==(c[u>>2]|0))break;if((c[x>>2]|0)==(c[B>>2]|0)){F=17;break}switch(c[o>>2]|0){case 1:{g[(c[l>>2]|0)+(c[v>>2]<<2)>>2]=+g[(c[l>>2]|0)+(c[x>>2]<<2)>>2];g[(c[l>>2]|0)+(c[w>>2]<<2)>>2]=+g[(c[l>>2]|0)+(c[y>>2]<<2)>>2];break}case 2:{g[(c[l>>2]|0)+(c[v>>2]<<1<<2)>>2]=+g[(c[l>>2]|0)+(c[x>>2]<<1<<2)>>2];g[(c[l>>2]|0)+((c[v>>2]<<1)+1<<2)>>2]=+g[(c[l>>2]|0)+((c[x>>2]<<1)+1<<2)>>2];g[(c[l>>2]|0)+(c[w>>2]<<1<<2)>>2]=+g[(c[l>>2]|0)+(c[y>>2]<<1<<2)>>2];g[(c[l>>2]|0)+((c[w>>2]<<1)+1<<2)>>2]=+g[(c[l>>2]|0)+((c[y>>2]<<1)+1<<2)>>2];break}default:{h=_(c[o>>2]|0,c[v>>2]|0)|0;k=_(c[o>>2]|0,c[x>>2]|0)|0;Zy((c[l>>2]|0)+(h<<2)|0,(c[l>>2]|0)+(k<<2)|0,c[o>>2]<<2|0)|0;k=_(c[o>>2]|0,c[w>>2]|0)|0;h=_(c[o>>2]|0,c[y>>2]|0)|0;Zy((c[l>>2]|0)+(k<<2)|0,(c[l>>2]|0)+(h<<2)|0,c[o>>2]<<2|0)|0}}c[v>>2]=c[x>>2];c[w>>2]=c[y>>2]}if((F|0)==17){F=0;c[t>>2]=c[r>>2];c[r>>2]=c[s>>2];c[s>>2]=c[t>>2]}switch(c[o>>2]|0){case 1:{g[(c[l>>2]|0)+(c[v>>2]<<2)>>2]=+g[c[r>>2]>>2];g[(c[l>>2]|0)+(c[w>>2]<<2)>>2]=+g[c[s>>2]>>2];break}case 2:{g[(c[l>>2]|0)+(c[v>>2]<<1<<2)>>2]=+g[c[r>>2]>>2];g[(c[l>>2]|0)+((c[v>>2]<<1)+1<<2)>>2]=+g[(c[r>>2]|0)+4>>2];g[(c[l>>2]|0)+(c[w>>2]<<1<<2)>>2]=+g[c[s>>2]>>2];g[(c[l>>2]|0)+((c[w>>2]<<1)+1<<2)>>2]=+g[(c[s>>2]|0)+4>>2];break}default:{h=_(c[o>>2]|0,c[v>>2]|0)|0;Zy((c[l>>2]|0)+(h<<2)|0,c[r>>2]|0,c[o>>2]<<2|0)|0;h=_(c[o>>2]|0,c[w>>2]|0)|0;Zy((c[l>>2]|0)+(h<<2)|0,c[s>>2]|0,c[o>>2]<<2|0)|0}}if((c[E>>2]|0)>=(c[D>>2]|0))break;while(1){c[C>>2]=(c[A>>2]|0)-(c[u>>2]|0);c[u>>2]=(c[u>>2]|0)+1;c[z>>2]=(c[z>>2]|0)+(c[n>>2]|0);if((c[z>>2]|0)>(c[A>>2]|0))c[z>>2]=(c[z>>2]|0)-(c[A>>2]|0);c[x>>2]=c[z>>2];if((c[u>>2]|0)==(c[x>>2]|0))continue;if((c[u>>2]|0)<(c[q>>2]|0))if(a[(c[p>>2]|0)+(c[u>>2]|0)>>0]|0)continue;else continue a;while(1){if((c[x>>2]|0)>(c[u>>2]|0))f=(c[x>>2]|0)<(c[C>>2]|0);else f=0;b=c[x>>2]|0;if(!f)break;c[v>>2]=b;h=_(c[n>>2]|0,c[v>>2]|0)|0;c[x>>2]=h-(_(c[A>>2]|0,(c[v>>2]|0)/(c[m>>2]|0)|0)|0)}if((b|0)==(c[u>>2]|0))continue a}}i=G;return}function dq(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;p=i;i=i+48|0;q=p+44|0;e=p+40|0;h=p+32|0;m=p+28|0;k=p+24|0;n=p+20|0;l=p+16|0;o=p+12|0;j=p+8|0;f=p+4|0;g=p;c[q>>2]=a;c[e>>2]=b;c[p+36>>2]=d;c[h>>2]=c[q>>2];c[m>>2]=c[(c[h>>2]|0)+64>>2];c[k>>2]=c[(c[h>>2]|0)+68>>2];c[n>>2]=c[(c[h>>2]|0)+92>>2];c[l>>2]=c[(c[h>>2]|0)+96>>2];c[o>>2]=c[(c[h>>2]|0)+72>>2];c[f>>2]=wb(c[(c[h>>2]|0)+76>>2]<<2)|0;a:do if((c[k>>2]|0)>(c[l>>2]|0)){a=(c[e>>2]|0)+((_(c[l>>2]|0,c[o>>2]|0)|0)<<2)|0;eb[c[(c[(c[h>>2]|0)+100>>2]|0)+56>>2]&63](c[(c[h>>2]|0)+100>>2]|0,a,c[f>>2]|0);c[j>>2]=0;while(1){if((c[j>>2]|0)>=(c[n>>2]|0))break a;b=_(c[l>>2]|0,c[o>>2]|0)|0;b=(c[e>>2]|0)+((_(b,c[j>>2]|0)|0)<<2)|0;a=_(c[k>>2]|0,c[o>>2]|0)|0;a=(c[e>>2]|0)+((_(a,c[j>>2]|0)|0)<<2)|0;_y(b|0,a|0,(_(c[l>>2]|0,c[o>>2]|0)|0)<<2|0)|0;c[j>>2]=(c[j>>2]|0)+1}}while(0);eb[c[(c[(c[h>>2]|0)+104>>2]|0)+56>>2]&63](c[(c[h>>2]|0)+104>>2]|0,c[e>>2]|0,c[e>>2]|0);if((c[m>>2]|0)>(c[n>>2]|0)){c[g>>2]=(c[f>>2]|0)+((_((c[k>>2]|0)-(c[l>>2]|0)|0,_(c[n>>2]|0,c[o>>2]|0)|0)|0)<<2);a=(c[e>>2]|0)+((_(c[n>>2]|0,_(c[k>>2]|0,c[o>>2]|0)|0)|0)<<2)|0;Zy(c[g>>2]|0,a|0,(_((c[m>>2]|0)-(c[n>>2]|0)|0,_(c[k>>2]|0,c[o>>2]|0)|0)|0)<<2|0)|0;c[j>>2]=(c[l>>2]|0)-1;while(1){if((c[j>>2]|0)<0)break;b=_(c[m>>2]|0,c[o>>2]|0)|0;b=(c[e>>2]|0)+((_(b,c[j>>2]|0)|0)<<2)|0;a=_(c[n>>2]|0,c[o>>2]|0)|0;a=(c[e>>2]|0)+((_(a,c[j>>2]|0)|0)<<2)|0;_y(b|0,a|0,(_(c[m>>2]|0,c[o>>2]|0)|0)<<2|0)|0;c[j>>2]=(c[j>>2]|0)+-1}a=(c[e>>2]|0)+((_(c[n>>2]|0,c[o>>2]|0)|0)<<2)|0;eb[c[(c[(c[h>>2]|0)+108>>2]|0)+56>>2]&63](c[(c[h>>2]|0)+108>>2]|0,c[g>>2]|0,a)}if((c[k>>2]|0)<=(c[l>>2]|0)){l=c[f>>2]|0;xb(l);i=p;return}if((c[m>>2]|0)<=(c[n>>2]|0)){j=(c[e>>2]|0)+((_(c[l>>2]|0,_(c[m>>2]|0,c[o>>2]|0)|0)|0)<<2)|0;Zy(j|0,c[f>>2]|0,(_((c[k>>2]|0)-(c[l>>2]|0)|0,_(c[m>>2]|0,c[o>>2]|0)|0)|0)<<2|0)|0;l=c[f>>2]|0;xb(l);i=p;return}c[j>>2]=c[l>>2];while(1){if((c[j>>2]|0)>=(c[k>>2]|0))break;h=(c[e>>2]|0)+((_(c[j>>2]|0,_(c[m>>2]|0,c[o>>2]|0)|0)|0)<<2)|0;a=(c[f>>2]|0)+((_((c[j>>2]|0)-(c[l>>2]|0)|0,_(c[n>>2]|0,c[o>>2]|0)|0)|0)<<2)|0;Zy(h|0,a|0,(_(c[n>>2]|0,c[o>>2]|0)|0)<<2|0)|0;c[j>>2]=(c[j>>2]|0)+1}l=c[f>>2]|0;xb(l);i=p;return}function eq(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0;p=i;i=i+48|0;h=p+36|0;q=p+32|0;j=p+28|0;k=p+24|0;s=p+20|0;r=p+16|0;m=p+12|0;l=p+8|0;n=p+4|0;o=p;c[h>>2]=a;c[q>>2]=b;c[j>>2]=d;c[k>>2]=e;c[s>>2]=f;c[r>>2]=g;c[m>>2]=c[(c[(c[h>>2]|0)+8>>2]|0)+4+((c[j>>2]|0)*12|0)>>2];c[l>>2]=c[(c[(c[h>>2]|0)+8>>2]|0)+4+((c[k>>2]|0)*12|0)>>2];Tp(c[h>>2]|0,c[s>>2]|0,n,o);c[c[r>>2]>>2]=0;if(c[(c[q>>2]|0)+164>>2]&8){a=0;a=a&1;i=p;return a|0}if((c[m>>2]|0)==(c[l>>2]|0)){a=0;a=a&1;i=p;return a|0}if((gq(c[m>>2]|0,c[l>>2]|0,c[n>>2]|0)|0)==0?(a=Up(c[m>>2]|0,c[l>>2]|0)|0,(a|0)>=(ec(9,ec(c[m>>2]|0,c[l>>2]|0)|0)|0)):0){a=0;a=a&1;i=p;return a|0}a=(_p((c[(c[h>>2]|0)+8>>2]|0)+4+((c[j>>2]|0)*12|0)|0,(c[(c[h>>2]|0)+8>>2]|0)+4+((c[k>>2]|0)*12|0)|0,c[n>>2]|0,c[o>>2]|0)|0)!=0;a=a&1;i=p;return a|0}function fq(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0.0;v=i;i=i+64|0;e=v+52|0;f=v+48|0;g=v+44|0;j=v+40|0;q=v+36|0;n=v+32|0;r=v+28|0;o=v+24|0;t=v+20|0;k=v+16|0;l=v+12|0;s=v+8|0;p=v+4|0;m=v;c[f>>2]=a;c[g>>2]=b;c[j>>2]=d;c[q>>2]=c[(c[j>>2]|0)+64>>2];c[n>>2]=c[(c[j>>2]|0)+68>>2];c[t>>2]=c[(c[j>>2]|0)+72>>2];a:do if(gq(c[q>>2]|0,c[n>>2]|0,c[t>>2]|0)|0){d=ec(c[q>>2]|0,c[n>>2]|0)|0;c[o>>2]=d;c[r>>2]=d}else{c[l>>2]=Up(c[n>>2]|0,c[q>>2]|0)|0;c[r>>2]=c[q>>2];c[o>>2]=c[n>>2];c[p>>2]=c[n>>2];while(1){if((c[p>>2]|0)<=0)break a;if((c[p>>2]|0)<=((c[n>>2]|0)-32|0))break a;c[s>>2]=c[q>>2];while(1){if((c[s>>2]|0)<=0)break;if((c[s>>2]|0)<=((c[q>>2]|0)-32|0))break;c[m>>2]=Up(c[p>>2]|0,c[s>>2]|0)|0;if((c[m>>2]|0)>(c[l>>2]|0)?(c[l>>2]=c[m>>2],c[r>>2]=c[s>>2],c[o>>2]=c[p>>2],d=c[l>>2]|0,(d|0)==(ec(c[s>>2]|0,c[p>>2]|0)|0)):0)break;c[s>>2]=(c[s>>2]|0)+-1}d=c[l>>2]|0;if((d|0)==(ec(c[q>>2]|0,c[p>>2]|0)|0))break a;c[p>>2]=(c[p>>2]|0)+-1}}while(0);c[(c[j>>2]|0)+92>>2]=c[r>>2];c[(c[j>>2]|0)+96>>2]=c[o>>2];d=_((c[n>>2]|0)-(c[o>>2]|0)|0,_(c[r>>2]|0,c[t>>2]|0)|0)|0;d=d+(_((c[q>>2]|0)-(c[r>>2]|0)|0,_(c[n>>2]|0,c[t>>2]|0)|0)|0)|0;c[(c[j>>2]|0)+76>>2]=d;c[k>>2]=wb(c[(c[j>>2]|0)+76>>2]<<2)|0;if((c[n>>2]|0)>(c[o>>2]|0)){l=c[g>>2]|0;d=_(c[n>>2]|0,c[t>>2]|0)|0;m=_(c[r>>2]|0,c[t>>2]|0)|0;m=Gd(c[r>>2]|0,d,c[t>>2]|0,(c[n>>2]|0)-(c[o>>2]|0)|0,c[t>>2]|0,m,c[t>>2]|0,1,1)|0;d=(c[(c[f>>2]|0)+12>>2]|0)+((_(c[o>>2]|0,c[t>>2]|0)|0)<<2)|0;d=uc(l,Jn(m,d,c[k>>2]|0)|0)|0;c[(c[j>>2]|0)+100>>2]=d;if(c[(c[j>>2]|0)+100>>2]|0){kc((c[(c[j>>2]|0)+100>>2]|0)+8|0,(c[j>>2]|0)+8|0);u=17}}else u=17;do if((u|0)==17?(m=c[g>>2]|0,l=_(c[o>>2]|0,c[t>>2]|0)|0,d=_(c[r>>2]|0,c[t>>2]|0)|0,d=Gd(c[r>>2]|0,l,c[t>>2]|0,c[o>>2]|0,c[t>>2]|0,d,c[t>>2]|0,1,1)|0,d=uc(m,Jn(d,c[(c[f>>2]|0)+12>>2]|0,c[(c[f>>2]|0)+12>>2]|0)|0)|0,c[(c[j>>2]|0)+104>>2]=d,(c[(c[j>>2]|0)+104>>2]|0)!=0):0){kc((c[(c[j>>2]|0)+104>>2]|0)+8|0,(c[j>>2]|0)+8|0);if((c[q>>2]|0)>(c[r>>2]|0)){l=c[g>>2]|0;d=_(c[n>>2]|0,c[t>>2]|0)|0;m=_(c[q>>2]|0,c[t>>2]|0)|0;m=Gd((c[q>>2]|0)-(c[r>>2]|0)|0,d,c[t>>2]|0,c[n>>2]|0,c[t>>2]|0,m,c[t>>2]|0,1,1)|0;d=(c[k>>2]|0)+((_((c[n>>2]|0)-(c[o>>2]|0)|0,_(c[r>>2]|0,c[t>>2]|0)|0)|0)<<2)|0;d=uc(l,Jn(m,d,(c[(c[f>>2]|0)+12>>2]|0)+((_(c[r>>2]|0,c[t>>2]|0)|0)<<2)|0)|0)|0;c[(c[j>>2]|0)+108>>2]=d;if(!(c[(c[j>>2]|0)+108>>2]|0))break;kc((c[(c[j>>2]|0)+108>>2]|0)+8|0,(c[j>>2]|0)+8|0)}d=_(c[r>>2]|0,c[o>>2]|0)|0;d=_(d,((c[n>>2]|0)>(c[o>>2]|0)&1)+((c[q>>2]|0)>(c[r>>2]|0)&1)|0)|0;d=d+(_((c[q>>2]|0)-(c[r>>2]|0)|0,c[n>>2]|0)|0)|0;w=+(_(c[t>>2]<<1,d+(_((c[n>>2]|0)-(c[o>>2]|0)|0,c[r>>2]|0)|0)|0)|0);o=(c[j>>2]|0)+8+24|0;h[o>>3]=+h[o>>3]+w;xb(c[k>>2]|0);c[e>>2]=1;o=c[e>>2]|0;i=v;return o|0}while(0);xb(c[k>>2]|0);c[e>>2]=0;o=c[e>>2]|0;i=v;return o|0}function gq(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0;h=i;i=i+16|0;e=h+8|0;f=h+4|0;g=h;c[e>>2]=a;c[f>>2]=b;c[g>>2]=d;b=dc(c[e>>2]|0,c[f>>2]|0)|0;if((b|0)>=((Tb((c[e>>2]|0)-(c[f>>2]|0)|0)|0)*9|0)){b=1;b=b&1;i=h;return b|0}b=ec(c[e>>2]|0,c[f>>2]|0)|0;b=_(b,Tb((c[e>>2]|0)-(c[f>>2]|0)|0)|0)|0;b=(_(b,c[g>>2]|0)|0)<=65536;b=b&1;i=h;return b|0}function hq(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0;p=i;i=i+64|0;s=p+52|0;e=p+48|0;l=p+40|0;q=p+36|0;n=p+32|0;k=p+28|0;r=p+24|0;f=p+20|0;m=p+16|0;o=p+12|0;g=p+8|0;h=p+4|0;j=p;c[s>>2]=a;c[e>>2]=b;c[p+44>>2]=d;c[l>>2]=c[s>>2];c[q>>2]=c[(c[l>>2]|0)+80>>2];c[n>>2]=c[(c[l>>2]|0)+84>>2];c[k>>2]=c[(c[l>>2]|0)+88>>2];c[r>>2]=c[(c[l>>2]|0)+72>>2];c[f>>2]=wb(c[(c[l>>2]|0)+76>>2]<<2)|0;a=_(c[q>>2]|0,c[n>>2]|0)|0;a=_(a,c[k>>2]|0)|0;c[o>>2]=_(a,c[r>>2]|0)|0;a:do if((c[q>>2]|0)>1){c[g>>2]=c[(c[(c[l>>2]|0)+100>>2]|0)+56>>2];c[m>>2]=0;while(1){if((c[m>>2]|0)>=(c[k>>2]|0))break a;a=(c[e>>2]|0)+((_(c[m>>2]|0,c[o>>2]|0)|0)<<2)|0;eb[c[g>>2]&63](c[(c[l>>2]|0)+100>>2]|0,a,c[f>>2]|0);a=(c[e>>2]|0)+((_(c[m>>2]|0,c[o>>2]|0)|0)<<2)|0;Zy(a|0,c[f>>2]|0,c[o>>2]<<2|0)|0;c[m>>2]=(c[m>>2]|0)+1}}while(0);c[h>>2]=c[(c[(c[l>>2]|0)+104>>2]|0)+56>>2];eb[c[h>>2]&63](c[(c[l>>2]|0)+104>>2]|0,c[e>>2]|0,c[e>>2]|0);if((c[n>>2]|0)<=1){a=c[f>>2]|0;xb(a);i=p;return}c[j>>2]=c[(c[(c[l>>2]|0)+108>>2]|0)+56>>2];c[m>>2]=0;while(1){if((c[m>>2]|0)>=(c[k>>2]|0))break;a=(c[e>>2]|0)+((_(c[m>>2]|0,c[o>>2]|0)|0)<<2)|0;eb[c[j>>2]&63](c[(c[l>>2]|0)+108>>2]|0,a,c[f>>2]|0);a=(c[e>>2]|0)+((_(c[m>>2]|0,c[o>>2]|0)|0)<<2)|0;Zy(a|0,c[f>>2]|0,c[o>>2]<<2|0)|0;c[m>>2]=(c[m>>2]|0)+1}a=c[f>>2]|0;xb(a);i=p;return}function iq(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0;q=i;i=i+48|0;h=q+40|0;r=q+36|0;j=q+32|0;k=q+28|0;t=q+24|0;s=q+20|0;n=q+16|0;m=q+12|0;l=q+8|0;o=q+4|0;p=q;c[h>>2]=a;c[r>>2]=b;c[j>>2]=d;c[k>>2]=e;c[t>>2]=f;c[s>>2]=g;c[n>>2]=c[(c[(c[h>>2]|0)+8>>2]|0)+4+((c[j>>2]|0)*12|0)>>2];c[m>>2]=c[(c[(c[h>>2]|0)+8>>2]|0)+4+((c[k>>2]|0)*12|0)>>2];Tp(c[h>>2]|0,c[t>>2]|0,o,p);c[l>>2]=Up(c[n>>2]|0,c[m>>2]|0)|0;g=_(c[n>>2]|0,(c[m>>2]|0)/(c[l>>2]|0)|0)|0;g=_(g,c[o>>2]|0)|0;c[c[s>>2]>>2]=g;if(c[(c[r>>2]|0)+164>>2]&8){g=0;g=g&1;i=q;return g|0}if(!((c[l>>2]|0)>1?(c[n>>2]|0)!=(c[m>>2]|0):0)){g=0;g=g&1;i=q;return g|0}g=(_p((c[(c[h>>2]|0)+8>>2]|0)+4+((c[j>>2]|0)*12|0)|0,(c[(c[h>>2]|0)+8>>2]|0)+4+((c[k>>2]|0)*12|0)|0,c[o>>2]|0,c[p>>2]|0)|0)!=0;g=g&1;i=q;return g|0}function jq(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0.0;r=i;i=i+48|0;e=r+36|0;f=r+32|0;g=r+28|0;j=r+24|0;n=r+20|0;m=r+16|0;l=r+12|0;p=r+8|0;k=r+4|0;o=r;c[f>>2]=a;c[g>>2]=b;c[j>>2]=d;c[n>>2]=c[(c[j>>2]|0)+80>>2];c[m>>2]=c[(c[j>>2]|0)+84>>2];c[l>>2]=c[(c[j>>2]|0)+88>>2];c[p>>2]=c[(c[j>>2]|0)+72>>2];c[k>>2]=wb(c[(c[j>>2]|0)+76>>2]<<2)|0;d=_(c[n>>2]|0,c[m>>2]|0)|0;d=_(d,c[l>>2]|0)|0;c[o>>2]=_(d,c[p>>2]|0)|0;if((c[n>>2]|0)>1){a=c[g>>2]|0;t=_(c[l>>2]|0,c[m>>2]|0)|0;t=_(t,c[p>>2]|0)|0;s=_(c[m>>2]|0,c[p>>2]|0)|0;b=_(c[m>>2]|0,c[p>>2]|0)|0;d=_(c[n>>2]|0,c[m>>2]|0)|0;d=_(d,c[p>>2]|0)|0;d=Gd(c[n>>2]|0,t,s,c[l>>2]|0,b,d,_(c[m>>2]|0,c[p>>2]|0)|0,1,1)|0;d=uc(a,Jn(d,c[(c[f>>2]|0)+12>>2]|0,c[k>>2]|0)|0)|0;c[(c[j>>2]|0)+100>>2]=d;if(c[(c[j>>2]|0)+100>>2]|0){ic(c[l>>2]|0,(c[(c[j>>2]|0)+100>>2]|0)+8|0,(c[j>>2]|0)+8|0,(c[j>>2]|0)+8|0);u=+((_(c[o>>2]|0,c[l>>2]|0)|0)<<1|0);q=(c[j>>2]|0)+8+24|0;h[q>>3]=+h[q>>3]+u;q=4}}else q=4;do if((q|0)==4?(a=c[g>>2]|0,t=_(c[l>>2]|0,c[n>>2]|0)|0,t=_(t,c[m>>2]|0)|0,t=_(t,c[p>>2]|0)|0,s=_(c[n>>2]|0,c[m>>2]|0)|0,s=_(s,c[p>>2]|0)|0,q=_(c[n>>2]|0,c[m>>2]|0)|0,q=_(q,c[p>>2]|0)|0,b=_(c[l>>2]|0,c[n>>2]|0)|0,b=_(b,c[m>>2]|0)|0,b=_(b,c[p>>2]|0)|0,d=_(c[n>>2]|0,c[m>>2]|0)|0,d=Gd(c[l>>2]|0,t,s,c[l>>2]|0,q,b,_(d,c[p>>2]|0)|0,1,1)|0,d=uc(a,Jn(d,c[(c[f>>2]|0)+12>>2]|0,c[(c[f>>2]|0)+12>>2]|0)|0)|0,c[(c[j>>2]|0)+104>>2]=d,(c[(c[j>>2]|0)+104>>2]|0)!=0):0){kc((c[(c[j>>2]|0)+104>>2]|0)+8|0,(c[j>>2]|0)+8|0);if((c[m>>2]|0)>1){d=c[g>>2]|0;a=_(c[l>>2]|0,c[n>>2]|0)|0;g=_(c[m>>2]|0,c[p>>2]|0)|0;n=_(c[l>>2]|0,c[n>>2]|0)|0;n=_(n,c[p>>2]|0)|0;n=Gd(a,g,c[p>>2]|0,c[m>>2]|0,c[p>>2]|0,n,c[p>>2]|0,1,1)|0;n=uc(d,Jn(n,c[(c[f>>2]|0)+12>>2]|0,c[k>>2]|0)|0)|0;c[(c[j>>2]|0)+108>>2]=n;if(!(c[(c[j>>2]|0)+108>>2]|0))break;lc(c[l>>2]|0,(c[(c[j>>2]|0)+108>>2]|0)+8|0,(c[j>>2]|0)+8|0);u=+((_(c[o>>2]|0,c[l>>2]|0)|0)<<1|0);o=(c[j>>2]|0)+8+24|0;h[o>>3]=+h[o>>3]+u}xb(c[k>>2]|0);c[e>>2]=1;o=c[e>>2]|0;i=r;return o|0}while(0);xb(c[k>>2]|0);c[e>>2]=0;o=c[e>>2]|0;i=r;return o|0}function kq(a,b,d,e,f,g,h,j,k){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;var l=0;l=i;i=i+48|0;c[l+32>>2]=a;c[l+28>>2]=b;c[l+24>>2]=d;c[l+20>>2]=e;c[l+16>>2]=f;c[l+12>>2]=g;c[l+8>>2]=h;c[l+4>>2]=j;c[l>>2]=k;i=l;return 1}function lq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,24,3208,0);i=b;return}function mq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0;Dd=i;i=i+960|0;m=Dd+944|0;n=Dd+940|0;o=Dd+936|0;p=Dd+932|0;q=Dd+928|0;r=Dd+924|0;Ed=Dd+920|0;s=Dd+916|0;t=Dd+912|0;Cd=Dd+896|0;Da=Dd+892|0;zc=Dd+888|0;id=Dd+884|0;kd=Dd+880|0;md=Dd+876|0;qd=Dd+872|0;Ic=Dd+868|0;Gc=Dd+864|0;Ac=Dd+860|0;Mb=Dd+856|0;Cc=Dd+852|0;_c=Dd+848|0;Nc=Dd+844|0;vd=Dd+840|0;Yc=Dd+836|0;ed=Dd+832|0;zd=Dd+828|0;Lc=Dd+824|0;Rc=Dd+820|0;Sc=Dd+816|0;Tc=Dd+812|0;Vc=Dd+808|0;ia=Dd+804|0;na=Dd+800|0;ea=Dd+796|0;la=Dd+792|0;Ba=Dd+788|0;M=Dd+784|0;xa=Dd+780|0;K=Dd+776|0;Bc=Dd+772|0;xd=Dd+768|0;cd=Dd+764|0;ud=Dd+760|0;yc=Dd+756|0;yd=Dd+752|0;dd=Dd+748|0;td=Dd+744|0;jd=Dd+740|0;pd=Dd+736|0;ld=Dd+732|0;od=Dd+728|0;ga=Dd+724|0;ha=Dd+720|0;E=Dd+716|0;da=Dd+712|0;za=Dd+708|0;Aa=Dd+704|0;va=Dd+700|0;wa=Dd+696|0;hd=Dd+692|0;pc=Dd+688|0;S=Dd+684|0;cc=Dd+680|0;Ec=Dd+676|0;qc=Dd+672|0;V=Dd+668|0;$b=Dd+664|0;Qc=Dd+660|0;_a=Dd+656|0;$=Dd+652|0;Ib=Dd+648|0;bd=Dd+644|0;$a=Dd+640|0;Fa=Dd+636|0;Jb=Dd+632|0;G=Dd+628|0;P=Dd+624|0;hb=Dd+620|0;ib=Dd+616|0;jb=Dd+612|0;kb=Dd+608|0;vb=Dd+604|0;Ra=Dd+600|0;Ab=Dd+596|0;Sa=Dd+592|0;D=Dd+588|0;qa=Dd+584|0;cb=Dd+580|0;db=Dd+576|0;eb=Dd+572|0;fb=Dd+568|0;Ma=Dd+564|0;Oa=Dd+560|0;pb=Dd+556|0;Pa=Dd+552|0;u=Dd+548|0;bc=Dd+544|0;gd=Dd+540|0;ac=Dd+536|0;Dc=Dd+532|0;fd=Dd+528|0;sd=Dd+524|0;T=Dd+520|0;Bd=Dd+516|0;U=Dd+512|0;nd=Dd+508|0;rd=Dd+504|0;wd=Dd+500|0;Ad=Dd+496|0;Kc=Dd+492|0;X=Dd+488|0;Pc=Dd+484|0;Y=Dd+480|0;Z=Dd+476|0;_=Dd+472|0;Hc=Dd+468|0;Jc=Dd+464|0;Mc=Dd+460|0;Oc=Dd+456|0;Xc=Dd+452|0;ba=Dd+448|0;ad=Dd+444|0;ca=Dd+440|0;aa=Dd+436|0;Ea=Dd+432|0;Uc=Dd+428|0;Wc=Dd+424|0;Zc=Dd+420|0;$c=Dd+416|0;ua=Dd+412|0;wb=Dd+408|0;O=Dd+404|0;tb=Dd+400|0;F=Dd+396|0;xb=Dd+392|0;J=Dd+388|0;sb=Dd+384|0;sa=Dd+380|0;ta=Dd+376|0;L=Dd+372|0;N=Dd+368|0;ya=Dd+364|0;Ca=Dd+360|0;H=Dd+356|0;I=Dd+352|0;rb=Dd+348|0;ub=Dd+344|0;yb=Dd+340|0;zb=Dd+336|0;z=Dd+332|0;Ia=Dd+328|0;pa=Dd+324|0;nb=Dd+320|0;C=Dd+316|0;Ja=Dd+312|0;ka=Dd+308|0;mb=Dd+304|0;x=Dd+300|0;y=Dd+296|0;ma=Dd+292|0;oa=Dd+288|0;A=Dd+284|0;B=Dd+280|0;fa=Dd+276|0;ja=Dd+272|0;Ka=Dd+268|0;La=Dd+264|0;Na=Dd+260|0;ob=Dd+256|0;Ha=Dd+252|0;Db=Dd+248|0;Xb=Dd+244|0;Zb=Dd+240|0;Cb=Dd+236|0;Yb=Dd+232|0;Gb=Dd+228|0;vc=Dd+224|0;W=Dd+220|0;Ga=Dd+216|0;wc=Dd+212|0;xc=Dd+208|0;qb=Dd+204|0;Bb=Dd+200|0;Eb=Dd+196|0;Fb=Dd+192|0;bb=Dd+188|0;Ob=Dd+184|0;kc=Dd+180|0;mc=Dd+176|0;Nb=Dd+172|0;lc=Dd+168|0;Rb=Dd+164|0;hc=Dd+160|0;Za=Dd+156|0;ab=Dd+152|0;ic=Dd+148|0;jc=Dd+144|0;gb=Dd+140|0;lb=Dd+136|0;Pb=Dd+132|0;Qb=Dd+128|0;Lb=Dd+124|0;Va=Dd+120|0;sc=Dd+116|0;uc=Dd+112|0;Ua=Dd+108|0;tc=Dd+104|0;Ya=Dd+100|0;nc=Dd+96|0;Hb=Dd+92|0;Kb=Dd+88|0;oc=Dd+84|0;rc=Dd+80|0;Qa=Dd+76|0;Ta=Dd+72|0;Wa=Dd+68|0;Xa=Dd+64|0;w=Dd+60|0;Sb=Dd+56|0;ec=Dd+52|0;gc=Dd+48|0;R=Dd+44|0;fc=Dd+40|0;Vb=Dd+36|0;Wb=Dd+32|0;Fc=Dd+28|0;v=Dd+24|0;_b=Dd+20|0;dc=Dd+16|0;ra=Dd+12|0;Q=Dd+8|0;Tb=Dd+4|0;Ub=Dd;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Ed>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Dd+908>>2]=.3826834261417389;g[Dd+904>>2]=.9238795042037964;g[Dd+900>>2]=.7071067690849304;c[Cd>>2]=c[Ed>>2];c[q>>2]=(c[q>>2]|0)+((c[Ed>>2]|0)-1<<3<<2);while(1){if((c[Cd>>2]|0)>=(c[s>>2]|0))break;g[Da>>2]=+g[c[q>>2]>>2];g[zc>>2]=+g[(c[q>>2]|0)+4>>2];g[id>>2]=+g[(c[q>>2]|0)+8>>2];g[kd>>2]=+g[(c[q>>2]|0)+12>>2];g[jd>>2]=+g[Da>>2]*+g[id>>2];g[pd>>2]=+g[zc>>2]*+g[id>>2];g[ld>>2]=+g[zc>>2]*+g[kd>>2];g[od>>2]=+g[Da>>2]*+g[kd>>2];g[md>>2]=+g[jd>>2]-+g[ld>>2];g[qd>>2]=+g[od>>2]+ +g[pd>>2];g[Ic>>2]=+g[od>>2]-+g[pd>>2];g[Gc>>2]=+g[jd>>2]+ +g[ld>>2];g[Ac>>2]=+g[(c[q>>2]|0)+20>>2];g[Bc>>2]=+g[zc>>2]*+g[Ac>>2];g[xd>>2]=+g[id>>2]*+g[Ac>>2];g[cd>>2]=+g[Da>>2]*+g[Ac>>2];g[ud>>2]=+g[kd>>2]*+g[Ac>>2];g[Mb>>2]=+g[(c[q>>2]|0)+16>>2];g[yc>>2]=+g[Da>>2]*+g[Mb>>2];g[yd>>2]=+g[kd>>2]*+g[Mb>>2];g[dd>>2]=+g[zc>>2]*+g[Mb>>2];g[td>>2]=+g[id>>2]*+g[Mb>>2];g[Cc>>2]=+g[yc>>2]+ +g[Bc>>2];g[_c>>2]=+g[xd>>2]-+g[yd>>2];g[Nc>>2]=+g[cd>>2]+ +g[dd>>2];g[vd>>2]=+g[td>>2]-+g[ud>>2];g[Yc>>2]=+g[td>>2]+ +g[ud>>2];g[ed>>2]=+g[cd>>2]-+g[dd>>2];g[zd>>2]=+g[xd>>2]+ +g[yd>>2];g[Lc>>2]=+g[yc>>2]-+g[Bc>>2];g[Rc>>2]=+g[(c[q>>2]|0)+24>>2];g[Sc>>2]=+g[(c[q>>2]|0)+28>>2];g[Tc>>2]=+g[Da>>2]*+g[Rc>>2]+ +g[zc>>2]*+g[Sc>>2];g[Vc>>2]=+g[Da>>2]*+g[Sc>>2]-+g[zc>>2]*+g[Rc>>2];g[ga>>2]=+g[md>>2]*+g[Ac>>2];g[ha>>2]=+g[qd>>2]*+g[Mb>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2];g[na>>2]=+g[ga>>2]+ +g[ha>>2];g[E>>2]=+g[md>>2]*+g[Mb>>2];g[da>>2]=+g[qd>>2]*+g[Ac>>2];g[ea>>2]=+g[E>>2]+ +g[da>>2];g[la>>2]=+g[E>>2]-+g[da>>2];g[za>>2]=+g[Gc>>2]*+g[Ac>>2];g[Aa>>2]=+g[Ic>>2]*+g[Mb>>2];g[Ba>>2]=+g[za>>2]-+g[Aa>>2];g[M>>2]=+g[za>>2]+ +g[Aa>>2];g[va>>2]=+g[Gc>>2]*+g[Mb>>2];g[wa>>2]=+g[Ic>>2]*+g[Ac>>2];g[xa>>2]=+g[va>>2]+ +g[wa>>2];g[K>>2]=+g[va>>2]-+g[wa>>2];g[u>>2]=+g[c[m>>2]>>2];g[bc>>2]=+g[c[o>>2]>>2];g[Dc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[fd>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[gd>>2]=+g[Cc>>2]*+g[Dc>>2]+ +g[ed>>2]*+g[fd>>2];g[ac>>2]=+g[Cc>>2]*+g[fd>>2]-+g[ed>>2]*+g[Dc>>2];g[hd>>2]=+g[u>>2]+ +g[gd>>2];g[pc>>2]=+g[bc>>2]-+g[ac>>2];g[S>>2]=+g[u>>2]-+g[gd>>2];g[cc>>2]=+g[ac>>2]+ +g[bc>>2];g[nd>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[rd>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[sd>>2]=+g[md>>2]*+g[nd>>2]+ +g[qd>>2]*+g[rd>>2];g[T>>2]=+g[md>>2]*+g[rd>>2]-+g[qd>>2]*+g[nd>>2];g[wd>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Ad>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Bd>>2]=+g[vd>>2]*+g[wd>>2]+ +g[zd>>2]*+g[Ad>>2];g[U>>2]=+g[vd>>2]*+g[Ad>>2]-+g[zd>>2]*+g[wd>>2];g[Ec>>2]=+g[sd>>2]+ +g[Bd>>2];g[qc>>2]=+g[sd>>2]-+g[Bd>>2];g[V>>2]=+g[T>>2]-+g[U>>2];g[$b>>2]=+g[T>>2]+ +g[U>>2];g[Hc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Jc>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Kc>>2]=+g[Gc>>2]*+g[Hc>>2]+ +g[Ic>>2]*+g[Jc>>2];g[X>>2]=+g[Gc>>2]*+g[Jc>>2]-+g[Ic>>2]*+g[Hc>>2];g[Mc>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Oc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Pc>>2]=+g[Lc>>2]*+g[Mc>>2]+ +g[Nc>>2]*+g[Oc>>2];g[Y>>2]=+g[Lc>>2]*+g[Oc>>2]-+g[Nc>>2]*+g[Mc>>2];g[Qc>>2]=+g[Kc>>2]+ +g[Pc>>2];g[_a>>2]=+g[X>>2]+ +g[Y>>2];g[Z>>2]=+g[X>>2]-+g[Y>>2];g[_>>2]=+g[Kc>>2]-+g[Pc>>2];g[$>>2]=+g[Z>>2]-+g[_>>2];g[Ib>>2]=+g[_>>2]+ +g[Z>>2];g[Uc>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Wc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Xc>>2]=+g[Tc>>2]*+g[Uc>>2]+ +g[Vc>>2]*+g[Wc>>2];g[ba>>2]=+g[Tc>>2]*+g[Wc>>2]-+g[Vc>>2]*+g[Uc>>2];g[Zc>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[$c>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ad>>2]=+g[Yc>>2]*+g[Zc>>2]+ +g[_c>>2]*+g[$c>>2];g[ca>>2]=+g[Yc>>2]*+g[$c>>2]-+g[_c>>2]*+g[Zc>>2];g[bd>>2]=+g[Xc>>2]+ +g[ad>>2];g[$a>>2]=+g[ba>>2]+ +g[ca>>2];g[aa>>2]=+g[Xc>>2]-+g[ad>>2];g[Ea>>2]=+g[ba>>2]-+g[ca>>2];g[Fa>>2]=+g[aa>>2]+ +g[Ea>>2];g[Jb>>2]=+g[aa>>2]-+g[Ea>>2];g[sa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ta>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ua>>2]=+g[Rc>>2]*+g[sa>>2]+ +g[Sc>>2]*+g[ta>>2];g[wb>>2]=+g[Rc>>2]*+g[ta>>2]-+g[Sc>>2]*+g[sa>>2];g[L>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[N>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[O>>2]=+g[K>>2]*+g[L>>2]+ +g[M>>2]*+g[N>>2];g[tb>>2]=+g[K>>2]*+g[N>>2]-+g[M>>2]*+g[L>>2];g[ya>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ca>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[F>>2]=+g[xa>>2]*+g[ya>>2]+ +g[Ba>>2]*+g[Ca>>2];g[xb>>2]=+g[xa>>2]*+g[Ca>>2]-+g[Ba>>2]*+g[ya>>2];g[H>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[I>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[J>>2]=+g[id>>2]*+g[H>>2]+ +g[kd>>2]*+g[I>>2];g[sb>>2]=+g[id>>2]*+g[I>>2]-+g[kd>>2]*+g[H>>2];g[G>>2]=+g[ua>>2]+ +g[F>>2];g[P>>2]=+g[J>>2]+ +g[O>>2];g[hb>>2]=+g[G>>2]-+g[P>>2];g[ib>>2]=+g[wb>>2]+ +g[xb>>2];g[jb>>2]=+g[sb>>2]+ +g[tb>>2];g[kb>>2]=+g[ib>>2]-+g[jb>>2];g[rb>>2]=+g[ua>>2]-+g[F>>2];g[ub>>2]=+g[sb>>2]-+g[tb>>2];g[vb>>2]=+g[rb>>2]-+g[ub>>2];g[Ra>>2]=+g[rb>>2]+ +g[ub>>2];g[yb>>2]=+g[wb>>2]-+g[xb>>2];g[zb>>2]=+g[J>>2]-+g[O>>2];g[Ab>>2]=+g[yb>>2]+ +g[zb>>2];g[Sa>>2]=+g[yb>>2]-+g[zb>>2];g[x>>2]=+g[c[n>>2]>>2];g[y>>2]=+g[c[p>>2]>>2];g[z>>2]=+g[Da>>2]*+g[x>>2]+ +g[zc>>2]*+g[y>>2];g[Ia>>2]=+g[Da>>2]*+g[y>>2]-+g[zc>>2]*+g[x>>2];g[ma>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[oa>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[pa>>2]=+g[la>>2]*+g[ma>>2]+ +g[na>>2]*+g[oa>>2];g[nb>>2]=+g[la>>2]*+g[oa>>2]-+g[na>>2]*+g[ma>>2];g[A>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[B>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[C>>2]=+g[Mb>>2]*+g[A>>2]+ +g[Ac>>2]*+g[B>>2];g[Ja>>2]=+g[Mb>>2]*+g[B>>2]-+g[Ac>>2]*+g[A>>2];g[fa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ja>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ka>>2]=+g[ea>>2]*+g[fa>>2]+ +g[ia>>2]*+g[ja>>2];g[mb>>2]=+g[ea>>2]*+g[ja>>2]-+g[ia>>2]*+g[fa>>2];g[D>>2]=+g[z>>2]+ +g[C>>2];g[qa>>2]=+g[ka>>2]+ +g[pa>>2];g[cb>>2]=+g[D>>2]-+g[qa>>2];g[db>>2]=+g[Ia>>2]+ +g[Ja>>2];g[eb>>2]=+g[mb>>2]+ +g[nb>>2];g[fb>>2]=+g[db>>2]-+g[eb>>2];g[Ka>>2]=+g[Ia>>2]-+g[Ja>>2];g[La>>2]=+g[ka>>2]-+g[pa>>2];g[Ma>>2]=+g[Ka>>2]+ +g[La>>2];g[Oa>>2]=+g[Ka>>2]-+g[La>>2];g[Na>>2]=+g[z>>2]-+g[C>>2];g[ob>>2]=+g[mb>>2]-+g[nb>>2];g[pb>>2]=+g[Na>>2]-+g[ob>>2];g[Pa>>2]=+g[Na>>2]+ +g[ob>>2];g[W>>2]=+g[S>>2]-+g[V>>2];g[Ga>>2]=(+g[$>>2]-+g[Fa>>2])*.7071067690849304;g[Ha>>2]=+g[W>>2]+ +g[Ga>>2];g[Db>>2]=+g[W>>2]-+g[Ga>>2];g[wc>>2]=(+g[Jb>>2]-+g[Ib>>2])*.7071067690849304;g[xc>>2]=+g[qc>>2]+ +g[pc>>2];g[Xb>>2]=+g[wc>>2]+ +g[xc>>2];g[Zb>>2]=+g[xc>>2]-+g[wc>>2];g[qb>>2]=+g[Ma>>2]*.9238795042037964+ +g[pb>>2]*.3826834261417389;g[Bb>>2]=+g[vb>>2]*.3826834261417389-+g[Ab>>2]*.9238795042037964;g[Cb>>2]=+g[qb>>2]+ +g[Bb>>2];g[Yb>>2]=+g[Bb>>2]-+g[qb>>2];g[Eb>>2]=+g[Ma>>2]*.3826834261417389-+g[pb>>2]*.9238795042037964;g[Fb>>2]=+g[Ab>>2]*.3826834261417389+ +g[vb>>2]*.9238795042037964;g[Gb>>2]=+g[Eb>>2]-+g[Fb>>2];g[vc>>2]=+g[Eb>>2]+ +g[Fb>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ha>>2]-+g[Cb>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[vc>>2]-+g[Xb>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ha>>2]+ +g[Cb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[vc>>2]+ +g[Xb>>2];g[c[o>>2]>>2]=+g[Db>>2]-+g[Gb>>2];g[c[p>>2]>>2]=+g[Yb>>2]-+g[Zb>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Db>>2]+ +g[Gb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Yb>>2]+ +g[Zb>>2];g[Za>>2]=+g[hd>>2]-+g[Ec>>2];g[ab>>2]=+g[_a>>2]-+g[$a>>2];g[bb>>2]=+g[Za>>2]+ +g[ab>>2];g[Ob>>2]=+g[Za>>2]-+g[ab>>2];g[ic>>2]=+g[bd>>2]-+g[Qc>>2];g[jc>>2]=+g[cc>>2]-+g[$b>>2];g[kc>>2]=+g[ic>>2]+ +g[jc>>2];g[mc>>2]=+g[jc>>2]-+g[ic>>2];g[gb>>2]=+g[cb>>2]+ +g[fb>>2];g[lb>>2]=+g[hb>>2]-+g[kb>>2];g[Nb>>2]=(+g[gb>>2]+ +g[lb>>2])*.7071067690849304;g[lc>>2]=(+g[lb>>2]-+g[gb>>2])*.7071067690849304;g[Pb>>2]=+g[fb>>2]-+g[cb>>2];g[Qb>>2]=+g[hb>>2]+ +g[kb>>2];g[Rb>>2]=(+g[Pb>>2]-+g[Qb>>2])*.7071067690849304;g[hc>>2]=(+g[Pb>>2]+ +g[Qb>>2])*.7071067690849304;g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[bb>>2]-+g[Nb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[hc>>2]-+g[kc>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[bb>>2]+ +g[Nb>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[hc>>2]+ +g[kc>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Ob>>2]-+g[Rb>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[lc>>2]-+g[mc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Ob>>2]+ +g[Rb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[lc>>2]+ +g[mc>>2];g[Hb>>2]=+g[S>>2]+ +g[V>>2];g[Kb>>2]=(+g[Ib>>2]+ +g[Jb>>2])*.7071067690849304;g[Lb>>2]=+g[Hb>>2]+ +g[Kb>>2];g[Va>>2]=+g[Hb>>2]-+g[Kb>>2];g[oc>>2]=(+g[$>>2]+ +g[Fa>>2])*.7071067690849304;g[rc>>2]=+g[pc>>2]-+g[qc>>2];g[sc>>2]=+g[oc>>2]+ +g[rc>>2];g[uc>>2]=+g[rc>>2]-+g[oc>>2];g[Qa>>2]=+g[Oa>>2]*.3826834261417389+ +g[Pa>>2]*.9238795042037964;g[Ta>>2]=+g[Ra>>2]*.9238795042037964-+g[Sa>>2]*.3826834261417389;g[Ua>>2]=+g[Qa>>2]+ +g[Ta>>2];g[tc>>2]=+g[Ta>>2]-+g[Qa>>2];g[Wa>>2]=+g[Oa>>2]*.9238795042037964-+g[Pa>>2]*.3826834261417389;g[Xa>>2]=+g[Sa>>2]*.9238795042037964+ +g[Ra>>2]*.3826834261417389;g[Ya>>2]=+g[Wa>>2]-+g[Xa>>2];g[nc>>2]=+g[Wa>>2]+ +g[Xa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Lb>>2]-+g[Ua>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[nc>>2]-+g[sc>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[Lb>>2]+ +g[Ua>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[nc>>2]+ +g[sc>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Va>>2]-+g[Ya>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[tc>>2]-+g[uc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Va>>2]+ +g[Ya>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[tc>>2]+ +g[uc>>2];g[Fc>>2]=+g[hd>>2]+ +g[Ec>>2];g[v>>2]=+g[Qc>>2]+ +g[bd>>2];g[w>>2]=+g[Fc>>2]+ +g[v>>2];g[Sb>>2]=+g[Fc>>2]-+g[v>>2];g[_b>>2]=+g[_a>>2]+ +g[$a>>2];g[dc>>2]=+g[$b>>2]+ +g[cc>>2];g[ec>>2]=+g[_b>>2]+ +g[dc>>2];g[gc>>2]=+g[dc>>2]-+g[_b>>2];g[ra>>2]=+g[D>>2]+ +g[qa>>2];g[Q>>2]=+g[G>>2]+ +g[P>>2];g[R>>2]=+g[ra>>2]+ +g[Q>>2];g[fc>>2]=+g[Q>>2]-+g[ra>>2];g[Tb>>2]=+g[db>>2]+ +g[eb>>2];g[Ub>>2]=+g[ib>>2]+ +g[jb>>2];g[Vb>>2]=+g[Tb>>2]-+g[Ub>>2];g[Wb>>2]=+g[Tb>>2]+ +g[Ub>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[w>>2]-+g[R>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Wb>>2]-+g[ec>>2];g[c[m>>2]>>2]=+g[w>>2]+ +g[R>>2];g[c[n>>2]>>2]=+g[Wb>>2]+ +g[ec>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Sb>>2]-+g[Vb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[fc>>2]-+g[gc>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Sb>>2]+ +g[Vb>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[fc>>2]+ +g[gc>>2];c[Cd>>2]=(c[Cd>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+32;c[r>>2]=c[r>>2]^c[2998]}i=Dd;return}function nq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,25,3256,0);i=b;return}function oq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0;bf=i;i=i+1280|0;m=bf+1268|0;n=bf+1264|0;o=bf+1260|0;p=bf+1256|0;q=bf+1252|0;r=bf+1248|0;cf=bf+1244|0;s=bf+1240|0;t=bf+1236|0;af=bf+1216|0;Da=bf+1212|0;Zd=bf+1208|0;Ie=bf+1204|0;Ke=bf+1200|0;Me=bf+1196|0;Qe=bf+1192|0;ka=bf+1188|0;ia=bf+1184|0;_d=bf+1180|0;Mb=bf+1176|0;ae=bf+1172|0;z=bf+1168|0;V=bf+1164|0;Ee=bf+1160|0;T=bf+1156|0;ya=bf+1152|0;D=bf+1148|0;wa=bf+1144|0;qa=bf+1140|0;Ia=bf+1136|0;ma=bf+1132|0;Ga=bf+1128|0;Se=bf+1124|0;We=bf+1120|0;J=bf+1116|0;L=bf+1112|0;Ze=bf+1108|0;_e=bf+1104|0;$e=bf+1100|0;le=bf+1096|0;ca=bf+1092|0;de=bf+1088|0;aa=bf+1084|0;F=bf+1080|0;Be=bf+1076|0;ne=bf+1072|0;Ba=bf+1068|0;ze=bf+1064|0;$d=bf+1060|0;B=bf+1056|0;Ce=bf+1052|0;y=bf+1048|0;Vc=bf+1044|0;C=bf+1040|0;De=bf+1036|0;x=bf+1032|0;Je=bf+1028|0;Pe=bf+1024|0;Le=bf+1020|0;Oe=bf+1016|0;oa=bf+1012|0;pa=bf+1008|0;ja=bf+1004|0;la=bf+1e3|0;Ne=bf+996|0;Re=bf+992|0;Ue=bf+988|0;Ve=bf+984|0;he=bf+980|0;nb=bf+976|0;hd=bf+972|0;qd=bf+968|0;Ya=bf+964|0;Mc=bf+960|0;zd=bf+956|0;Ld=bf+952|0;S=bf+948|0;Ma=bf+944|0;Na=bf+940|0;ec=bf+936|0;hc=bf+932|0;_c=bf+928|0;pc=bf+924|0;qc=bf+920|0;Id=bf+916|0;rb=bf+912|0;sb=bf+908|0;tb=bf+904|0;zb=bf+900|0;Eb=bf+896|0;od=bf+892|0;Bc=bf+888|0;Cc=bf+884|0;Yd=bf+880|0;Ic=bf+876|0;Jc=bf+872|0;Kc=bf+868|0;Nb=bf+864|0;Sb=bf+860|0;Tb=bf+856|0;ye=bf+852|0;ua=bf+848|0;va=bf+844|0;Zb=bf+840|0;ac=bf+836|0;Zc=bf+832|0;sc=bf+828|0;tc=bf+824|0;Jd=bf+820|0;ob=bf+816|0;pb=bf+812|0;qb=bf+808|0;Kb=bf+804|0;Ra=bf+800|0;nd=bf+796|0;yc=bf+792|0;zc=bf+788|0;Xd=bf+784|0;Fc=bf+780|0;Gc=bf+776|0;Hc=bf+772|0;bb=bf+768|0;gb=bf+764|0;hb=bf+760|0;u=bf+756|0;cd=bf+752|0;Ge=bf+748|0;bd=bf+744|0;Ye=bf+740|0;Va=bf+736|0;fe=bf+732|0;Wa=bf+728|0;be=bf+724|0;Fe=bf+720|0;Te=bf+716|0;Xe=bf+712|0;ce=bf+708|0;ee=bf+704|0;He=bf+700|0;ge=bf+696|0;fd=bf+692|0;gd=bf+688|0;Ua=bf+684|0;Xa=bf+680|0;ad=bf+676|0;dd=bf+672|0;I=bf+668|0;cc=bf+664|0;xb=bf+660|0;ib=bf+656|0;La=bf+652|0;gc=bf+648|0;Db=bf+644|0;Rb=bf+640|0;R=bf+636|0;dc=bf+632|0;yb=bf+628|0;lb=bf+624|0;$=bf+620|0;fc=bf+616|0;Cb=bf+612|0;Ob=bf+608|0;Aa=bf+604|0;vb=bf+600|0;H=bf+596|0;wb=bf+592|0;xa=bf+588|0;za=bf+584|0;Ca=bf+580|0;G=bf+576|0;Fa=bf+572|0;Pb=bf+568|0;Ka=bf+564|0;Qb=bf+560|0;ba=bf+556|0;Ea=bf+552|0;Ha=bf+548|0;Ja=bf+544|0;N=bf+540|0;jb=bf+536|0;Q=bf+532|0;kb=bf+528|0;K=bf+524|0;M=bf+520|0;O=bf+516|0;P=bf+512|0;X=bf+508|0;Ab=bf+504|0;_=bf+500|0;Bb=bf+496|0;U=bf+492|0;W=bf+488|0;Y=bf+484|0;Z=bf+480|0;qe=bf+476|0;Xb=bf+472|0;Ib=bf+468|0;Za=bf+464|0;ta=bf+460|0;$b=bf+456|0;Lb=bf+452|0;fb=bf+448|0;xe=bf+444|0;Yb=bf+440|0;Jb=bf+436|0;ab=bf+432|0;ea=bf+428|0;_b=bf+424|0;Qa=bf+420|0;cb=bf+416|0;ke=bf+412|0;Gb=bf+408|0;pe=bf+404|0;Hb=bf+400|0;ie=bf+396|0;je=bf+392|0;me=bf+388|0;oe=bf+384|0;ha=bf+380|0;db=bf+376|0;sa=bf+372|0;eb=bf+368|0;fa=bf+364|0;ga=bf+360|0;na=bf+356|0;ra=bf+352|0;te=bf+348|0;_a=bf+344|0;we=bf+340|0;$a=bf+336|0;re=bf+332|0;se=bf+328|0;ue=bf+324|0;ve=bf+320|0;w=bf+316|0;Oa=bf+312|0;da=bf+308|0;Pa=bf+304|0;Ae=bf+300|0;v=bf+296|0;A=bf+292|0;E=bf+288|0;nc=bf+284|0;mb=bf+280|0;mc=bf+276|0;Wc=bf+272|0;Yc=bf+268|0;rc=bf+264|0;uc=bf+260|0;Xc=bf+256|0;oc=bf+252|0;Qd=bf+248|0;Kd=bf+244|0;Pd=bf+240|0;Od=bf+236|0;Sd=bf+232|0;Md=bf+228|0;Nd=bf+224|0;Td=bf+220|0;Rd=bf+216|0;Sc=bf+212|0;ub=bf+208|0;Tc=bf+204|0;jc=bf+200|0;lc=bf+196|0;bc=bf+192|0;ic=bf+188|0;kc=bf+184|0;Uc=bf+180|0;Dd=bf+176|0;$c=bf+172|0;Ed=bf+168|0;Cd=bf+164|0;Gd=bf+160|0;Ad=bf+156|0;Bd=bf+152|0;Hd=bf+148|0;Fd=bf+144|0;Wb=bf+140|0;Ub=bf+136|0;Vb=bf+132|0;Ta=bf+128|0;wc=bf+124|0;Fb=bf+120|0;Sa=bf+116|0;xc=bf+112|0;vc=bf+108|0;ud=bf+104|0;pd=bf+100|0;vd=bf+96|0;td=bf+92|0;yd=bf+88|0;rd=bf+84|0;sd=bf+80|0;xd=bf+76|0;wd=bf+72|0;Lc=bf+68|0;Nc=bf+64|0;Oc=bf+60|0;Ec=bf+56|0;Qc=bf+52|0;Ac=bf+48|0;Dc=bf+44|0;Rc=bf+40|0;Pc=bf+36|0;ed=bf+32|0;id=bf+28|0;jd=bf+24|0;Wd=bf+20|0;md=bf+16|0;Ud=bf+12|0;Vd=bf+8|0;ld=bf+4|0;kd=bf;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[cf>>2]=j;c[s>>2]=k;c[t>>2]=l;g[bf+1232>>2]=.5877852439880371;g[bf+1228>>2]=.9510565400123596;g[bf+1224>>2]=.25;g[bf+1220>>2]=.55901700258255;c[af>>2]=c[cf>>2];c[q>>2]=(c[q>>2]|0)+((c[cf>>2]|0)-1<<3<<2);while(1){if((c[af>>2]|0)>=(c[s>>2]|0))break;g[Da>>2]=+g[c[q>>2]>>2];g[Zd>>2]=+g[(c[q>>2]|0)+4>>2];g[Ie>>2]=+g[(c[q>>2]|0)+8>>2];g[Ke>>2]=+g[(c[q>>2]|0)+12>>2];g[Je>>2]=+g[Da>>2]*+g[Ie>>2];g[Pe>>2]=+g[Zd>>2]*+g[Ie>>2];g[Le>>2]=+g[Zd>>2]*+g[Ke>>2];g[Oe>>2]=+g[Da>>2]*+g[Ke>>2];g[Me>>2]=+g[Je>>2]-+g[Le>>2];g[Qe>>2]=+g[Oe>>2]+ +g[Pe>>2];g[ka>>2]=+g[Oe>>2]-+g[Pe>>2];g[ia>>2]=+g[Je>>2]+ +g[Le>>2];g[_d>>2]=+g[(c[q>>2]|0)+20>>2];g[$d>>2]=+g[Zd>>2]*+g[_d>>2];g[B>>2]=+g[Ie>>2]*+g[_d>>2];g[Ce>>2]=+g[Da>>2]*+g[_d>>2];g[y>>2]=+g[Ke>>2]*+g[_d>>2];g[Mb>>2]=+g[(c[q>>2]|0)+16>>2];g[Vc>>2]=+g[Da>>2]*+g[Mb>>2];g[C>>2]=+g[Ke>>2]*+g[Mb>>2];g[De>>2]=+g[Zd>>2]*+g[Mb>>2];g[x>>2]=+g[Ie>>2]*+g[Mb>>2];g[ae>>2]=+g[Vc>>2]-+g[$d>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[V>>2]=+g[B>>2]+ +g[C>>2];g[Ee>>2]=+g[Ce>>2]+ +g[De>>2];g[T>>2]=+g[x>>2]-+g[y>>2];g[ya>>2]=+g[Ce>>2]-+g[De>>2];g[D>>2]=+g[B>>2]-+g[C>>2];g[wa>>2]=+g[Vc>>2]+ +g[$d>>2];g[oa>>2]=+g[ia>>2]*+g[_d>>2];g[pa>>2]=+g[ka>>2]*+g[Mb>>2];g[qa>>2]=+g[oa>>2]+ +g[pa>>2];g[Ia>>2]=+g[oa>>2]-+g[pa>>2];g[ja>>2]=+g[ia>>2]*+g[Mb>>2];g[la>>2]=+g[ka>>2]*+g[_d>>2];g[ma>>2]=+g[ja>>2]-+g[la>>2];g[Ga>>2]=+g[ja>>2]+ +g[la>>2];g[Ne>>2]=+g[Me>>2]*+g[Mb>>2];g[Re>>2]=+g[Qe>>2]*+g[_d>>2];g[Se>>2]=+g[Ne>>2]+ +g[Re>>2];g[Ue>>2]=+g[Me>>2]*+g[_d>>2];g[Ve>>2]=+g[Qe>>2]*+g[Mb>>2];g[We>>2]=+g[Ue>>2]-+g[Ve>>2];g[J>>2]=+g[Ne>>2]-+g[Re>>2];g[L>>2]=+g[Ue>>2]+ +g[Ve>>2];g[Ze>>2]=+g[(c[q>>2]|0)+24>>2];g[_e>>2]=+g[(c[q>>2]|0)+28>>2];g[$e>>2]=+g[Me>>2]*+g[Ze>>2]+ +g[Qe>>2]*+g[_e>>2];g[le>>2]=+g[Se>>2]*+g[Ze>>2]+ +g[We>>2]*+g[_e>>2];g[ca>>2]=+g[ia>>2]*+g[_e>>2]-+g[ka>>2]*+g[Ze>>2];g[de>>2]=+g[Me>>2]*+g[_e>>2]-+g[Qe>>2]*+g[Ze>>2];g[aa>>2]=+g[ia>>2]*+g[Ze>>2]+ +g[ka>>2]*+g[_e>>2];g[F>>2]=+g[Da>>2]*+g[_e>>2]-+g[Zd>>2]*+g[Ze>>2];g[Be>>2]=+g[Ie>>2]*+g[_e>>2]-+g[Ke>>2]*+g[Ze>>2];g[ne>>2]=+g[Se>>2]*+g[_e>>2]-+g[We>>2]*+g[Ze>>2];g[Ba>>2]=+g[Da>>2]*+g[Ze>>2]+ +g[Zd>>2]*+g[_e>>2];g[ze>>2]=+g[Ie>>2]*+g[Ze>>2]+ +g[Ke>>2]*+g[_e>>2];g[u>>2]=+g[c[m>>2]>>2];g[cd>>2]=+g[c[o>>2]>>2];g[be>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Fe>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Ge>>2]=+g[ae>>2]*+g[be>>2]+ +g[Ee>>2]*+g[Fe>>2];g[bd>>2]=+g[ae>>2]*+g[Fe>>2]-+g[Ee>>2]*+g[be>>2];g[Te>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Xe>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ye>>2]=+g[Se>>2]*+g[Te>>2]+ +g[We>>2]*+g[Xe>>2];g[Va>>2]=+g[Se>>2]*+g[Xe>>2]-+g[We>>2]*+g[Te>>2];g[ce>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ee>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[fe>>2]=+g[$e>>2]*+g[ce>>2]+ +g[de>>2]*+g[ee>>2];g[Wa>>2]=+g[$e>>2]*+g[ee>>2]-+g[de>>2]*+g[ce>>2];g[He>>2]=+g[u>>2]+ +g[Ge>>2];g[ge>>2]=+g[Ye>>2]+ +g[fe>>2];g[he>>2]=+g[He>>2]-+g[ge>>2];g[nb>>2]=+g[He>>2]+ +g[ge>>2];g[fd>>2]=+g[cd>>2]-+g[bd>>2];g[gd>>2]=+g[Ye>>2]-+g[fe>>2];g[hd>>2]=+g[fd>>2]-+g[gd>>2];g[qd>>2]=+g[gd>>2]+ +g[fd>>2];g[Ua>>2]=+g[u>>2]-+g[Ge>>2];g[Xa>>2]=+g[Va>>2]-+g[Wa>>2];g[Ya>>2]=+g[Ua>>2]-+g[Xa>>2];g[Mc>>2]=+g[Ua>>2]+ +g[Xa>>2];g[ad>>2]=+g[Va>>2]+ +g[Wa>>2];g[dd>>2]=+g[bd>>2]+ +g[cd>>2];g[zd>>2]=+g[ad>>2]+ +g[dd>>2];g[Ld>>2]=+g[dd>>2]-+g[ad>>2];g[xa>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[za>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Aa>>2]=+g[wa>>2]*+g[xa>>2]+ +g[ya>>2]*+g[za>>2];g[vb>>2]=+g[wa>>2]*+g[za>>2]-+g[ya>>2]*+g[xa>>2];g[Ca>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[G>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[H>>2]=+g[Ba>>2]*+g[Ca>>2]+ +g[F>>2]*+g[G>>2];g[wb>>2]=+g[Ba>>2]*+g[G>>2]-+g[F>>2]*+g[Ca>>2];g[I>>2]=+g[Aa>>2]+ +g[H>>2];g[cc>>2]=+g[vb>>2]+ +g[wb>>2];g[xb>>2]=+g[vb>>2]-+g[wb>>2];g[ib>>2]=+g[Aa>>2]-+g[H>>2];g[ba>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Ea>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Fa>>2]=+g[aa>>2]*+g[ba>>2]+ +g[ca>>2]*+g[Ea>>2];g[Pb>>2]=+g[aa>>2]*+g[Ea>>2]-+g[ca>>2]*+g[ba>>2];g[Ha>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ja>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ka>>2]=+g[Ga>>2]*+g[Ha>>2]+ +g[Ia>>2]*+g[Ja>>2];g[Qb>>2]=+g[Ga>>2]*+g[Ja>>2]-+g[Ia>>2]*+g[Ha>>2];g[La>>2]=+g[Fa>>2]+ +g[Ka>>2];g[gc>>2]=+g[Pb>>2]+ +g[Qb>>2];g[Db>>2]=+g[Fa>>2]-+g[Ka>>2];g[Rb>>2]=+g[Pb>>2]-+g[Qb>>2];g[K>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[M>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[N>>2]=+g[J>>2]*+g[K>>2]+ +g[L>>2]*+g[M>>2];g[jb>>2]=+g[J>>2]*+g[M>>2]-+g[L>>2]*+g[K>>2];g[O>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[P>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Q>>2]=+g[Ie>>2]*+g[O>>2]+ +g[Ke>>2]*+g[P>>2];g[kb>>2]=+g[Ie>>2]*+g[P>>2]-+g[Ke>>2]*+g[O>>2];g[R>>2]=+g[N>>2]+ +g[Q>>2];g[dc>>2]=+g[jb>>2]+ +g[kb>>2];g[yb>>2]=+g[N>>2]-+g[Q>>2];g[lb>>2]=+g[jb>>2]-+g[kb>>2];g[U>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[W>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[X>>2]=+g[T>>2]*+g[U>>2]+ +g[V>>2]*+g[W>>2];g[Ab>>2]=+g[T>>2]*+g[W>>2]-+g[V>>2]*+g[U>>2];g[Y>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Z>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[_>>2]=+g[ia>>2]*+g[Y>>2]+ +g[ka>>2]*+g[Z>>2];g[Bb>>2]=+g[ia>>2]*+g[Z>>2]-+g[ka>>2]*+g[Y>>2];g[$>>2]=+g[X>>2]+ +g[_>>2];g[fc>>2]=+g[Ab>>2]+ +g[Bb>>2];g[Cb>>2]=+g[Ab>>2]-+g[Bb>>2];g[Ob>>2]=+g[X>>2]-+g[_>>2];g[S>>2]=+g[I>>2]-+g[R>>2];g[Ma>>2]=+g[$>>2]-+g[La>>2];g[Na>>2]=+g[S>>2]+ +g[Ma>>2];g[ec>>2]=+g[cc>>2]+ +g[dc>>2];g[hc>>2]=+g[fc>>2]+ +g[gc>>2];g[_c>>2]=+g[ec>>2]+ +g[hc>>2];g[pc>>2]=+g[cc>>2]-+g[dc>>2];g[qc>>2]=+g[gc>>2]-+g[fc>>2];g[Id>>2]=+g[qc>>2]-+g[pc>>2];g[rb>>2]=+g[I>>2]+ +g[R>>2];g[sb>>2]=+g[$>>2]+ +g[La>>2];g[tb>>2]=+g[rb>>2]+ +g[sb>>2];g[zb>>2]=+g[xb>>2]+ +g[yb>>2];g[Eb>>2]=+g[Cb>>2]+ +g[Db>>2];g[od>>2]=+g[zb>>2]+ +g[Eb>>2];g[Bc>>2]=+g[xb>>2]-+g[yb>>2];g[Cc>>2]=+g[Cb>>2]-+g[Db>>2];g[Yd>>2]=+g[Bc>>2]+ +g[Cc>>2];g[Ic>>2]=+g[ib>>2]+ +g[lb>>2];g[Jc>>2]=+g[Ob>>2]+ +g[Rb>>2];g[Kc>>2]=+g[Ic>>2]+ +g[Jc>>2];g[Nb>>2]=+g[ib>>2]-+g[lb>>2];g[Sb>>2]=+g[Ob>>2]-+g[Rb>>2];g[Tb>>2]=+g[Nb>>2]+ +g[Sb>>2];g[ie>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[je>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ke>>2]=+g[Me>>2]*+g[ie>>2]+ +g[Qe>>2]*+g[je>>2];g[Gb>>2]=+g[Me>>2]*+g[je>>2]-+g[Qe>>2]*+g[ie>>2];g[me>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[oe>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[pe>>2]=+g[le>>2]*+g[me>>2]+ +g[ne>>2]*+g[oe>>2];g[Hb>>2]=+g[le>>2]*+g[oe>>2]-+g[ne>>2]*+g[me>>2];g[qe>>2]=+g[ke>>2]+ +g[pe>>2];g[Xb>>2]=+g[Gb>>2]+ +g[Hb>>2];g[Ib>>2]=+g[Gb>>2]-+g[Hb>>2];g[Za>>2]=+g[ke>>2]-+g[pe>>2];g[fa>>2]=+g[c[n>>2]>>2];g[ga>>2]=+g[c[p>>2]>>2];g[ha>>2]=+g[Da>>2]*+g[fa>>2]+ +g[Zd>>2]*+g[ga>>2];g[db>>2]=+g[Da>>2]*+g[ga>>2]-+g[Zd>>2]*+g[fa>>2];g[na>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[ra>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[sa>>2]=+g[ma>>2]*+g[na>>2]+ +g[qa>>2]*+g[ra>>2];g[eb>>2]=+g[ma>>2]*+g[ra>>2]-+g[qa>>2]*+g[na>>2];g[ta>>2]=+g[ha>>2]+ +g[sa>>2];g[$b>>2]=+g[db>>2]+ +g[eb>>2];g[Lb>>2]=+g[sa>>2]-+g[ha>>2];g[fb>>2]=+g[db>>2]-+g[eb>>2];g[re>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[se>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[te>>2]=+g[Mb>>2]*+g[re>>2]+ +g[_d>>2]*+g[se>>2];g[_a>>2]=+g[Mb>>2]*+g[se>>2]-+g[_d>>2]*+g[re>>2];g[ue>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[ve>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[we>>2]=+g[Ze>>2]*+g[ue>>2]+ +g[_e>>2]*+g[ve>>2];g[$a>>2]=+g[Ze>>2]*+g[ve>>2]-+g[_e>>2]*+g[ue>>2];g[xe>>2]=+g[te>>2]+ +g[we>>2];g[Yb>>2]=+g[_a>>2]+ +g[$a>>2];g[Jb>>2]=+g[te>>2]-+g[we>>2];g[ab>>2]=+g[_a>>2]-+g[$a>>2];g[Ae>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2];g[v>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[w>>2]=+g[ze>>2]*+g[Ae>>2]+ +g[Be>>2]*+g[v>>2];g[Oa>>2]=+g[ze>>2]*+g[v>>2]-+g[Be>>2]*+g[Ae>>2];g[A>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[E>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[da>>2]=+g[z>>2]*+g[A>>2]+ +g[D>>2]*+g[E>>2];g[Pa>>2]=+g[z>>2]*+g[E>>2]-+g[D>>2]*+g[A>>2];g[ea>>2]=+g[w>>2]+ +g[da>>2];g[_b>>2]=+g[Oa>>2]+ +g[Pa>>2];g[Qa>>2]=+g[Oa>>2]-+g[Pa>>2];g[cb>>2]=+g[w>>2]-+g[da>>2];g[ye>>2]=+g[qe>>2]-+g[xe>>2];g[ua>>2]=+g[ea>>2]-+g[ta>>2];g[va>>2]=+g[ye>>2]+ +g[ua>>2];g[Zb>>2]=+g[Xb>>2]+ +g[Yb>>2];g[ac>>2]=+g[_b>>2]+ +g[$b>>2];g[Zc>>2]=+g[Zb>>2]+ +g[ac>>2];g[sc>>2]=+g[Xb>>2]-+g[Yb>>2];g[tc>>2]=+g[_b>>2]-+g[$b>>2];g[Jd>>2]=+g[sc>>2]+ +g[tc>>2];g[ob>>2]=+g[qe>>2]+ +g[xe>>2];g[pb>>2]=+g[ea>>2]+ +g[ta>>2];g[qb>>2]=+g[ob>>2]+ +g[pb>>2];g[Kb>>2]=+g[Ib>>2]+ +g[Jb>>2];g[Ra>>2]=+g[Lb>>2]-+g[Qa>>2];g[nd>>2]=+g[Ra>>2]-+g[Kb>>2];g[yc>>2]=+g[Ib>>2]-+g[Jb>>2];g[zc>>2]=+g[Qa>>2]+ +g[Lb>>2];g[Xd>>2]=+g[yc>>2]+ +g[zc>>2];g[Fc>>2]=+g[Za>>2]+ +g[ab>>2];g[Gc>>2]=+g[cb>>2]+ +g[fb>>2];g[Hc>>2]=+g[Fc>>2]+ +g[Gc>>2];g[bb>>2]=+g[Za>>2]-+g[ab>>2];g[gb>>2]=+g[cb>>2]-+g[fb>>2];g[hb>>2]=+g[bb>>2]+ +g[gb>>2];g[nc>>2]=(+g[va>>2]-+g[Na>>2])*.55901700258255;g[mb>>2]=+g[va>>2]+ +g[Na>>2];g[mc>>2]=+g[he>>2]-+g[mb>>2]*.25;g[rc>>2]=+g[pc>>2]+ +g[qc>>2];g[uc>>2]=+g[sc>>2]-+g[tc>>2];g[Wc>>2]=+g[rc>>2]*.9510565400123596-+g[uc>>2]*.5877852439880371;g[Yc>>2]=+g[uc>>2]*.9510565400123596+ +g[rc>>2]*.5877852439880371;g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[he>>2]+ +g[mb>>2];g[Xc>>2]=+g[nc>>2]+ +g[mc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Xc>>2]-+g[Yc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Xc>>2]+ +g[Yc>>2];g[oc>>2]=+g[mc>>2]-+g[nc>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[oc>>2]-+g[Wc>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[oc>>2]+ +g[Wc>>2];g[Qd>>2]=(+g[Jd>>2]+ +g[Id>>2])*.55901700258255;g[Kd>>2]=+g[Id>>2]-+g[Jd>>2];g[Pd>>2]=+g[Kd>>2]*.25+ +g[Ld>>2];g[Md>>2]=+g[ua>>2]-+g[ye>>2];g[Nd>>2]=+g[S>>2]-+g[Ma>>2];g[Od>>2]=+g[Md>>2]*.5877852439880371+ +g[Nd>>2]*.9510565400123596;g[Sd>>2]=+g[Md>>2]*.9510565400123596-+g[Nd>>2]*.5877852439880371;g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Kd>>2]-+g[Ld>>2];g[Td>>2]=+g[Qd>>2]+ +g[Pd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Sd>>2]-+g[Td>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Sd>>2]+ +g[Td>>2];g[Rd>>2]=+g[Pd>>2]-+g[Qd>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Od>>2]-+g[Rd>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Od>>2]+ +g[Rd>>2];g[Sc>>2]=(+g[qb>>2]-+g[tb>>2])*.55901700258255;g[ub>>2]=+g[qb>>2]+ +g[tb>>2];g[Tc>>2]=+g[nb>>2]-+g[ub>>2]*.25;g[bc>>2]=+g[Zb>>2]-+g[ac>>2];g[ic>>2]=+g[ec>>2]-+g[hc>>2];g[jc>>2]=+g[bc>>2]*.9510565400123596+ +g[ic>>2]*.5877852439880371;g[lc>>2]=+g[ic>>2]*.9510565400123596-+g[bc>>2]*.5877852439880371;g[c[m>>2]>>2]=+g[nb>>2]+ +g[ub>>2];g[kc>>2]=+g[Tc>>2]-+g[Sc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[kc>>2]-+g[lc>>2];g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[kc>>2]+ +g[lc>>2];g[Uc>>2]=+g[Sc>>2]+ +g[Tc>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Uc>>2]-+g[jc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Uc>>2]+ +g[jc>>2];g[Dd>>2]=(+g[Zc>>2]-+g[_c>>2])*.55901700258255;g[$c>>2]=+g[Zc>>2]+ +g[_c>>2];g[Ed>>2]=+g[zd>>2]-+g[$c>>2]*.25;g[Ad>>2]=+g[ob>>2]-+g[pb>>2];g[Bd>>2]=+g[rb>>2]-+g[sb>>2];g[Cd>>2]=+g[Ad>>2]*.9510565400123596+ +g[Bd>>2]*.5877852439880371;g[Gd>>2]=+g[Ad>>2]*.5877852439880371-+g[Bd>>2]*.9510565400123596;g[c[n>>2]>>2]=+g[$c>>2]+ +g[zd>>2];g[Hd>>2]=+g[Ed>>2]-+g[Dd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Gd>>2]-+g[Hd>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Gd>>2]+ +g[Hd>>2];g[Fd>>2]=+g[Dd>>2]+ +g[Ed>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Cd>>2]-+g[Fd>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Cd>>2]+ +g[Fd>>2];g[Wb>>2]=(+g[hb>>2]-+g[Tb>>2])*.55901700258255;g[Ub>>2]=+g[hb>>2]+ +g[Tb>>2];g[Vb>>2]=+g[Ya>>2]-+g[Ub>>2]*.25;g[Fb>>2]=+g[zb>>2]-+g[Eb>>2];g[Sa>>2]=+g[Kb>>2]+ +g[Ra>>2];g[Ta>>2]=+g[Fb>>2]*.9510565400123596-+g[Sa>>2]*.5877852439880371;g[wc>>2]=+g[Sa>>2]*.9510565400123596+ +g[Fb>>2]*.5877852439880371;g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ya>>2]+ +g[Ub>>2];g[xc>>2]=+g[Wb>>2]+ +g[Vb>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[wc>>2]+ +g[xc>>2];g[c[o>>2]>>2]=+g[xc>>2]-+g[wc>>2];g[vc>>2]=+g[Vb>>2]-+g[Wb>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ta>>2]+ +g[vc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[vc>>2]-+g[Ta>>2];g[ud>>2]=(+g[nd>>2]+ +g[od>>2])*.55901700258255;g[pd>>2]=+g[nd>>2]-+g[od>>2];g[vd>>2]=+g[pd>>2]*.25+ +g[qd>>2];g[rd>>2]=+g[bb>>2]-+g[gb>>2];g[sd>>2]=+g[Nb>>2]-+g[Sb>>2];g[td>>2]=+g[rd>>2]*.9510565400123596+ +g[sd>>2]*.5877852439880371;g[yd>>2]=+g[sd>>2]*.9510565400123596-+g[rd>>2]*.5877852439880371;g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[pd>>2]-+g[qd>>2];g[xd>>2]=+g[ud>>2]+ +g[vd>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[xd>>2]-+g[yd>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[yd>>2]+ +g[xd>>2];g[wd>>2]=+g[ud>>2]-+g[vd>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[td>>2]+ +g[wd>>2];g[c[p>>2]>>2]=+g[wd>>2]-+g[td>>2];g[Lc>>2]=(+g[Hc>>2]-+g[Kc>>2])*.55901700258255;g[Nc>>2]=+g[Hc>>2]+ +g[Kc>>2];g[Oc>>2]=+g[Mc>>2]-+g[Nc>>2]*.25;g[Ac>>2]=+g[yc>>2]-+g[zc>>2];g[Dc>>2]=+g[Bc>>2]-+g[Cc>>2];g[Ec>>2]=+g[Ac>>2]*.9510565400123596+ +g[Dc>>2]*.5877852439880371;g[Qc>>2]=+g[Dc>>2]*.9510565400123596-+g[Ac>>2]*.5877852439880371;g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Mc>>2]+ +g[Nc>>2];g[Rc>>2]=+g[Oc>>2]-+g[Lc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Qc>>2]+ +g[Rc>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Rc>>2]-+g[Qc>>2];g[Pc>>2]=+g[Lc>>2]+ +g[Oc>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[Ec>>2]+ +g[Pc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Pc>>2]-+g[Ec>>2];g[ed>>2]=(+g[Xd>>2]-+g[Yd>>2])*.55901700258255;g[id>>2]=+g[Xd>>2]+ +g[Yd>>2];g[jd>>2]=+g[hd>>2]-+g[id>>2]*.25;g[Ud>>2]=+g[Ic>>2]-+g[Jc>>2];g[Vd>>2]=+g[Fc>>2]-+g[Gc>>2];g[Wd>>2]=+g[Ud>>2]*.9510565400123596-+g[Vd>>2]*.5877852439880371;g[md>>2]=+g[Vd>>2]*.9510565400123596+ +g[Ud>>2]*.5877852439880371;g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[id>>2]+ +g[hd>>2];g[ld>>2]=+g[ed>>2]+ +g[jd>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[ld>>2]-+g[md>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[md>>2]+ +g[ld>>2];g[kd>>2]=+g[ed>>2]-+g[jd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Wd>>2]+ +g[kd>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[kd>>2]-+g[Wd>>2];c[af>>2]=(c[af>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+32;c[r>>2]=c[r>>2]^c[2998]}i=bf;return}function pq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,26,3304,0);i=b;return}function qq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0,Ii=0,Ji=0,Ki=0,Li=0,Mi=0,Ni=0,Oi=0,Pi=0,Qi=0,Ri=0,Si=0,Ti=0,Ui=0,Vi=0,Wi=0,Xi=0,Yi=0,Zi=0,_i=0,$i=0,aj=0,bj=0,cj=0,dj=0,ej=0,fj=0,gj=0,hj=0,ij=0,jj=0,kj=0,lj=0,mj=0,nj=0,oj=0,pj=0;oj=i;i=i+2208|0;m=oj+2192|0;n=oj+2188|0;o=oj+2184|0;p=oj+2180|0;q=oj+2176|0;r=oj+2172|0;pj=oj+2168|0;s=oj+2164|0;t=oj+2160|0;nj=oj+2128|0;Da=oj+2124|0;ce=oj+2120|0;Mb=oj+2116|0;mf=oj+2112|0;Eh=oj+2108|0;Bi=oj+2104|0;Di=oj+2100|0;Si=oj+2096|0;oi=oj+2092|0;Ti=oj+2088|0;Wi=oj+2084|0;_i=oj+2080|0;si=oj+2076|0;wi=oj+2072|0;Ba=oj+2068|0;M=oj+2064|0;A=oj+2060|0;F=oj+2056|0;w=oj+2052|0;K=oj+2048|0;qa=oj+2044|0;sa=oj+2040|0;Cb=oj+2036|0;Wa=oj+2032|0;Gb=oj+2028|0;Ya=oj+2024|0;hb=oj+2020|0;wc=oj+2016|0;lb=oj+2012|0;yc=oj+2008|0;hj=oj+2004|0;lj=oj+2e3|0;Gc=oj+1996|0;Ic=oj+1992|0;wa=oj+1988|0;ya=oj+1984|0;rb=oj+1980|0;tb=oj+1976|0;da=oj+1972|0;ha=oj+1968|0;Pb=oj+1964|0;Rb=oj+1960|0;$=oj+1956|0;ba=oj+1952|0;Qa=oj+1948|0;Sa=oj+1944|0;Vi=oj+1940|0;Gi=oj+1936|0;Zi=oj+1932|0;Hi=oj+1928|0;$i=oj+1924|0;Ki=oj+1920|0;bj=oj+1916|0;Ii=oj+1912|0;R=oj+1908|0;Fa=oj+1904|0;U=oj+1900|0;Ga=oj+1896|0;V=oj+1892|0;Ja=oj+1888|0;X=oj+1884|0;Ha=oj+1880|0;fj=oj+1876|0;ga=oj+1872|0;kj=oj+1868|0;D=oj+1864|0;gj=oj+1860|0;fa=oj+1856|0;jj=oj+1852|0;E=oj+1848|0;qi=oj+1844|0;z=oj+1840|0;vi=oj+1836|0;Oi=oj+1832|0;ri=oj+1828|0;y=oj+1824|0;ui=oj+1820|0;v=oj+1816|0;Vc=oj+1812|0;Ri=oj+1808|0;vg=oj+1804|0;Qi=oj+1800|0;Ab=oj+1796|0;Bb=oj+1792|0;Eb=oj+1788|0;Fb=oj+1784|0;fb=oj+1780|0;gb=oj+1776|0;jb=oj+1772|0;kb=oj+1768|0;Pi=oj+1764|0;Ui=oj+1760|0;Xi=oj+1756|0;Yi=oj+1752|0;P=oj+1748|0;Q=oj+1744|0;S=oj+1740|0;T=oj+1736|0;Ai=oj+1732|0;ag=oj+1728|0;Sh=oj+1724|0;ei=oj+1720|0;ac=oj+1716|0;ef=oj+1712|0;Og=oj+1708|0;ah=oj+1704|0;Kb=oj+1700|0;yf=oj+1696|0;tg=oj+1692|0;nh=oj+1688|0;Od=oj+1684|0;we=oj+1680|0;fd=oj+1676|0;te=oj+1672|0;Cc=oj+1668|0;Ef=oj+1664|0;Lf=oj+1660|0;sh=oj+1656|0;pd=oj+1652|0;Ae=oj+1648|0;fe=oj+1644|0;De=oj+1640|0;la=oj+1636|0;$g=oj+1632|0;dg=oj+1628|0;Jg=oj+1624|0;fc=oj+1620|0;ff=oj+1616|0;kc=oj+1612|0;gf=oj+1608|0;J=oj+1604|0;ig=oj+1600|0;hg=oj+1596|0;ih=oj+1592|0;rc=oj+1588|0;kf=oj+1584|0;Xc=oj+1580|0;lf=oj+1576|0;Na=oj+1572|0;kg=oj+1568|0;ng=oj+1564|0;jh=oj+1560|0;bd=oj+1556|0;oe=oj+1552|0;Gd=oj+1548|0;pe=oj+1544|0;db=oj+1540|0;ug=oj+1536|0;Bf=oj+1532|0;oh=oj+1528|0;Zd=oj+1524|0;ue=oj+1520|0;id=oj+1516|0;xe=oj+1512|0;Tc=oj+1508|0;Mf=oj+1504|0;Hf=oj+1500|0;th=oj+1496|0;Ad=oj+1492|0;Ee=oj+1488|0;ie=oj+1484|0;Be=oj+1480|0;u=oj+1476|0;Mg=oj+1472|0;dj=oj+1468|0;Lg=oj+1464|0;pi=oj+1460|0;Zb=oj+1456|0;yi=oj+1452|0;_b=oj+1448|0;aj=oj+1444|0;cj=oj+1440|0;ij=oj+1436|0;mj=oj+1432|0;ti=oj+1428|0;xi=oj+1424|0;ej=oj+1420|0;zi=oj+1416|0;Qh=oj+1412|0;Rh=oj+1408|0;Yb=oj+1404|0;$b=oj+1400|0;Kg=oj+1396|0;Ng=oj+1392|0;qb=oj+1388|0;Kd=oj+1384|0;Ib=oj+1380|0;be=oj+1376|0;vb=oj+1372|0;Ld=oj+1368|0;zb=oj+1364|0;ae=oj+1360|0;ob=oj+1356|0;pb=oj+1352|0;Db=oj+1348|0;Hb=oj+1344|0;sb=oj+1340|0;ub=oj+1336|0;xb=oj+1332|0;yb=oj+1328|0;wb=oj+1324|0;Jb=oj+1320|0;rg=oj+1316|0;sg=oj+1312|0;Md=oj+1308|0;Nd=oj+1304|0;$d=oj+1300|0;ed=oj+1296|0;Ob=oj+1292|0;Cd=oj+1288|0;Ac=oj+1284|0;nd=oj+1280|0;Tb=oj+1276|0;Dd=oj+1272|0;vc=oj+1268|0;md=oj+1264|0;ib=oj+1260|0;Nb=oj+1256|0;xc=oj+1252|0;zc=oj+1248|0;Qb=oj+1244|0;Sb=oj+1240|0;Vb=oj+1236|0;Wb=oj+1232|0;Ub=oj+1228|0;Bc=oj+1224|0;Jf=oj+1220|0;Kf=oj+1216|0;ld=oj+1212|0;od=oj+1208|0;de=oj+1204|0;ee=oj+1200|0;Fi=oj+1196|0;bc=oj+1192|0;ja=oj+1188|0;ic=oj+1184|0;Mi=oj+1180|0;cc=oj+1176|0;C=oj+1172|0;hc=oj+1168|0;Ci=oj+1164|0;Ei=oj+1160|0;ea=oj+1156|0;ia=oj+1152|0;Ji=oj+1148|0;Li=oj+1144|0;x=oj+1140|0;B=oj+1136|0;Ni=oj+1132|0;ka=oj+1128|0;bg=oj+1124|0;cg=oj+1120|0;dc=oj+1116|0;ec=oj+1112|0;gc=oj+1108|0;jc=oj+1104|0;pa=oj+1100|0;nc=oj+1096|0;H=oj+1092|0;uc=oj+1088|0;ua=oj+1084|0;oc=oj+1080|0;Aa=oj+1076|0;tc=oj+1072|0;na=oj+1068|0;oa=oj+1064|0;Ca=oj+1060|0;G=oj+1056|0;ra=oj+1052|0;ta=oj+1048|0;xa=oj+1044|0;za=oj+1040|0;va=oj+1036|0;I=oj+1032|0;fg=oj+1028|0;gg=oj+1024|0;pc=oj+1020|0;qc=oj+1016|0;sc=oj+1012|0;Wc=oj+1008|0;O=oj+1004|0;Zc=oj+1e3|0;La=oj+996|0;Ed=oj+992|0;Z=oj+988|0;_c=oj+984|0;Ea=oj+980|0;dd=oj+976|0;L=oj+972|0;N=oj+968|0;Ia=oj+964|0;Ka=oj+960|0;W=oj+956|0;Y=oj+952|0;aa=oj+948|0;ca=oj+944|0;_=oj+940|0;Ma=oj+936|0;lg=oj+932|0;mg=oj+928|0;$c=oj+924|0;ad=oj+920|0;cd=oj+916|0;Fd=oj+912|0;Pa=oj+908|0;Vd=oj+904|0;Ua=oj+900|0;Wd=oj+896|0;Ud=oj+892|0;Xd=oj+888|0;_a=oj+884|0;Qd=oj+880|0;bb=oj+876|0;Rd=oj+872|0;Pd=oj+868|0;Sd=oj+864|0;Lb=oj+860|0;Oa=oj+856|0;Ra=oj+852|0;Ta=oj+848|0;Xa=oj+844|0;Za=oj+840|0;$a=oj+836|0;ab=oj+832|0;Va=oj+828|0;cb=oj+824|0;zf=oj+820|0;Af=oj+816|0;Td=oj+812|0;Yd=oj+808|0;gd=oj+804|0;hd=oj+800|0;Fc=oj+796|0;qd=oj+792|0;Kc=oj+788|0;rd=oj+784|0;sd=oj+780|0;td=oj+776|0;Oc=oj+772|0;wd=oj+768|0;Rc=oj+764|0;xd=oj+760|0;vd=oj+756|0;yd=oj+752|0;Dc=oj+748|0;Ec=oj+744|0;Hc=oj+740|0;Jc=oj+736|0;Mc=oj+732|0;Nc=oj+728|0;Pc=oj+724|0;Qc=oj+720|0;Lc=oj+716|0;Sc=oj+712|0;Ff=oj+708|0;Gf=oj+704|0;ud=oj+700|0;zd=oj+696|0;ge=oj+692|0;he=oj+688|0;nb=oj+684|0;Bh=oj+680|0;Qg=oj+676|0;Sg=oj+672|0;Xb=oj+668|0;Rg=oj+664|0;Gg=oj+660|0;Hg=oj+656|0;ma=oj+652|0;mb=oj+648|0;Ig=oj+644|0;Pg=oj+640|0;eb=oj+636|0;Uc=oj+632|0;Ch=oj+628|0;Dh=oj+624|0;lh=oj+620|0;xh=oj+616|0;Wg=oj+612|0;Yg=oj+608|0;qh=oj+604|0;yh=oj+600|0;vh=oj+596|0;zh=oj+592|0;hh=oj+588|0;kh=oj+584|0;Ug=oj+580|0;Vg=oj+576|0;mh=oj+572|0;ph=oj+568|0;rh=oj+564|0;uh=oj+560|0;wh=oj+556|0;Tg=oj+552|0;Ah=oj+548|0;Xg=oj+544|0;eg=oj+540|0;bh=oj+536|0;Ih=oj+532|0;Uf=oj+528|0;pg=oj+524|0;_g=oj+520|0;Dg=oj+516|0;fh=oj+512|0;wg=oj+508|0;Hh=oj+504|0;Df=oj+500|0;Rf=oj+496|0;Ag=oj+492|0;eh=oj+488|0;Of=oj+484|0;Sf=oj+480|0;jg=oj+476|0;og=oj+472|0;xf=oj+468|0;Cf=oj+464|0;Bg=oj+460|0;Cg=oj+456|0;Vf=oj+452|0;Wf=oj+448|0;yg=oj+444|0;zg=oj+440|0;If=oj+436|0;Nf=oj+432|0;qg=oj+428|0;Pf=oj+424|0;Gh=oj+420|0;Jh=oj+416|0;Qf=oj+412|0;Tf=oj+408|0;Kh=oj+404|0;Lh=oj+400|0;xg=oj+396|0;Eg=oj+392|0;Zg=oj+388|0;ch=oj+384|0;Fg=oj+380|0;gh=oj+376|0;dh=oj+372|0;Fh=oj+368|0;mc=oj+364|0;Qe=oj+360|0;fi=oj+356|0;li=oj+352|0;Id=oj+348|0;ci=oj+344|0;_e=oj+340|0;cf=oj+336|0;kd=oj+332|0;Ne=oj+328|0;Te=oj+324|0;ki=oj+320|0;Xe=oj+316|0;bf=oj+312|0;ke=oj+308|0;Oe=oj+304|0;lc=oj+300|0;di=oj+296|0;Yc=oj+292|0;Hd=oj+288|0;Ye=oj+284|0;Ze=oj+280|0;_d=oj+276|0;jd=oj+272|0;Re=oj+268|0;Se=oj+264|0;Ve=oj+260|0;We=oj+256|0;Bd=oj+252|0;je=oj+248|0;Jd=oj+244|0;le=oj+240|0;ji=oj+236|0;mi=oj+232|0;me=oj+228|0;Pe=oj+224|0;ni=oj+220|0;Ph=oj+216|0;Ue=oj+212|0;$e=oj+208|0;bi=oj+204|0;gi=oj+200|0;af=oj+196|0;df=oj+192|0;hi=oj+188|0;ii=oj+184|0;jf=oj+180|0;Me=oj+176|0;Th=oj+172|0;Zh=oj+168|0;re=oj+164|0;Nh=oj+160|0;wf=oj+156|0;_f=oj+152|0;ze=oj+148|0;Je=oj+144|0;pf=oj+140|0;Yh=oj+136|0;tf=oj+132|0;Zf=oj+128|0;Ge=oj+124|0;Ke=oj+120|0;hf=oj+116|0;Oh=oj+112|0;ne=oj+108|0;qe=oj+104|0;uf=oj+100|0;vf=oj+96|0;ve=oj+92|0;ye=oj+88|0;nf=oj+84|0;of=oj+80|0;rf=oj+76|0;sf=oj+72|0;Ce=oj+68|0;Fe=oj+64|0;se=oj+60|0;He=oj+56|0;Xh=oj+52|0;_h=oj+48|0;Ie=oj+44|0;Le=oj+40|0;$h=oj+36|0;ai=oj+32|0;qf=oj+28|0;Xf=oj+24|0;Mh=oj+20|0;Uh=oj+16|0;Yf=oj+12|0;$f=oj+8|0;Vh=oj+4|0;Wh=oj;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[pj>>2]=j;c[s>>2]=k;c[t>>2]=l;g[oj+2156>>2]=.19509032368659973;g[oj+2152>>2]=.9807852506637573;g[oj+2148>>2]=.5555702447891235;g[oj+2144>>2]=.8314695954322815;g[oj+2140>>2]=.3826834261417389;g[oj+2136>>2]=.9238795042037964;g[oj+2132>>2]=.7071067690849304;c[nj>>2]=c[pj>>2];c[q>>2]=(c[q>>2]|0)+((c[pj>>2]|0)-1<<3<<2);while(1){if((c[nj>>2]|0)>=(c[s>>2]|0))break;g[Da>>2]=+g[c[q>>2]>>2];g[ce>>2]=+g[(c[q>>2]|0)+4>>2];g[Mb>>2]=+g[(c[q>>2]|0)+8>>2];g[mf>>2]=+g[(c[q>>2]|0)+12>>2];g[Vc>>2]=+g[Da>>2]*+g[Mb>>2];g[Ri>>2]=+g[ce>>2]*+g[Mb>>2];g[vg>>2]=+g[ce>>2]*+g[mf>>2];g[Qi>>2]=+g[Da>>2]*+g[mf>>2];g[Eh>>2]=+g[Vc>>2]+ +g[vg>>2];g[Bi>>2]=+g[Vc>>2]-+g[vg>>2];g[Di>>2]=+g[Qi>>2]+ +g[Ri>>2];g[Si>>2]=+g[Qi>>2]-+g[Ri>>2];g[oi>>2]=+g[(c[q>>2]|0)+16>>2];g[fj>>2]=+g[Da>>2]*+g[oi>>2];g[ga>>2]=+g[mf>>2]*+g[oi>>2];g[kj>>2]=+g[ce>>2]*+g[oi>>2];g[D>>2]=+g[Mb>>2]*+g[oi>>2];g[Ti>>2]=+g[(c[q>>2]|0)+20>>2];g[gj>>2]=+g[ce>>2]*+g[Ti>>2];g[fa>>2]=+g[Mb>>2]*+g[Ti>>2];g[jj>>2]=+g[Da>>2]*+g[Ti>>2];g[E>>2]=+g[mf>>2]*+g[Ti>>2];g[Wi>>2]=+g[(c[q>>2]|0)+24>>2];g[qi>>2]=+g[Mb>>2]*+g[Wi>>2];g[z>>2]=+g[ce>>2]*+g[Wi>>2];g[vi>>2]=+g[mf>>2]*+g[Wi>>2];g[Oi>>2]=+g[Da>>2]*+g[Wi>>2];g[_i>>2]=+g[(c[q>>2]|0)+28>>2];g[ri>>2]=+g[mf>>2]*+g[_i>>2];g[y>>2]=+g[Da>>2]*+g[_i>>2];g[ui>>2]=+g[Mb>>2]*+g[_i>>2];g[v>>2]=+g[ce>>2]*+g[_i>>2];g[si>>2]=+g[qi>>2]+ +g[ri>>2];g[wi>>2]=+g[ui>>2]-+g[vi>>2];g[Ba>>2]=+g[Oi>>2]+ +g[v>>2];g[M>>2]=+g[ui>>2]+ +g[vi>>2];g[A>>2]=+g[y>>2]+ +g[z>>2];g[F>>2]=+g[y>>2]-+g[z>>2];g[w>>2]=+g[Oi>>2]-+g[v>>2];g[K>>2]=+g[qi>>2]-+g[ri>>2];g[qa>>2]=+g[oi>>2]*+g[Wi>>2]+ +g[Ti>>2]*+g[_i>>2];g[sa>>2]=+g[oi>>2]*+g[_i>>2]-+g[Ti>>2]*+g[Wi>>2];g[Ab>>2]=+g[Eh>>2]*+g[Wi>>2];g[Bb>>2]=+g[Si>>2]*+g[_i>>2];g[Cb>>2]=+g[Ab>>2]+ +g[Bb>>2];g[Wa>>2]=+g[Ab>>2]-+g[Bb>>2];g[Eb>>2]=+g[Eh>>2]*+g[_i>>2];g[Fb>>2]=+g[Si>>2]*+g[Wi>>2];g[Gb>>2]=+g[Eb>>2]-+g[Fb>>2];g[Ya>>2]=+g[Eb>>2]+ +g[Fb>>2];g[fb>>2]=+g[Bi>>2]*+g[Wi>>2];g[gb>>2]=+g[Di>>2]*+g[_i>>2];g[hb>>2]=+g[fb>>2]-+g[gb>>2];g[wc>>2]=+g[fb>>2]+ +g[gb>>2];g[jb>>2]=+g[Bi>>2]*+g[_i>>2];g[kb>>2]=+g[Di>>2]*+g[Wi>>2];g[lb>>2]=+g[jb>>2]+ +g[kb>>2];g[yc>>2]=+g[jb>>2]-+g[kb>>2];g[hj>>2]=+g[fj>>2]+ +g[gj>>2];g[lj>>2]=+g[jj>>2]-+g[kj>>2];g[Gc>>2]=+g[hj>>2]*+g[Wi>>2]+ +g[lj>>2]*+g[_i>>2];g[Ic>>2]=+g[hj>>2]*+g[_i>>2]-+g[lj>>2]*+g[Wi>>2];g[wa>>2]=+g[fj>>2]-+g[gj>>2];g[ya>>2]=+g[jj>>2]+ +g[kj>>2];g[rb>>2]=+g[wa>>2]*+g[Wi>>2]+ +g[ya>>2]*+g[_i>>2];g[tb>>2]=+g[wa>>2]*+g[_i>>2]-+g[ya>>2]*+g[Wi>>2];g[da>>2]=+g[D>>2]-+g[E>>2];g[ha>>2]=+g[fa>>2]+ +g[ga>>2];g[Pb>>2]=+g[da>>2]*+g[Wi>>2]+ +g[ha>>2]*+g[_i>>2];g[Rb>>2]=+g[da>>2]*+g[_i>>2]-+g[ha>>2]*+g[Wi>>2];g[$>>2]=+g[D>>2]+ +g[E>>2];g[ba>>2]=+g[fa>>2]-+g[ga>>2];g[Qa>>2]=+g[$>>2]*+g[Wi>>2]+ +g[ba>>2]*+g[_i>>2];g[Sa>>2]=+g[$>>2]*+g[_i>>2]-+g[ba>>2]*+g[Wi>>2];g[Pi>>2]=+g[Eh>>2]*+g[oi>>2];g[Ui>>2]=+g[Si>>2]*+g[Ti>>2];g[Vi>>2]=+g[Pi>>2]-+g[Ui>>2];g[Gi>>2]=+g[Pi>>2]+ +g[Ui>>2];g[Xi>>2]=+g[Eh>>2]*+g[Ti>>2];g[Yi>>2]=+g[Si>>2]*+g[oi>>2];g[Zi>>2]=+g[Xi>>2]+ +g[Yi>>2];g[Hi>>2]=+g[Xi>>2]-+g[Yi>>2];g[$i>>2]=+g[Vi>>2]*+g[Wi>>2]+ +g[Zi>>2]*+g[_i>>2];g[Ki>>2]=+g[Gi>>2]*+g[_i>>2]-+g[Hi>>2]*+g[Wi>>2];g[bj>>2]=+g[Vi>>2]*+g[_i>>2]-+g[Zi>>2]*+g[Wi>>2];g[Ii>>2]=+g[Gi>>2]*+g[Wi>>2]+ +g[Hi>>2]*+g[_i>>2];g[P>>2]=+g[Bi>>2]*+g[oi>>2];g[Q>>2]=+g[Di>>2]*+g[Ti>>2];g[R>>2]=+g[P>>2]-+g[Q>>2];g[Fa>>2]=+g[P>>2]+ +g[Q>>2];g[S>>2]=+g[Bi>>2]*+g[Ti>>2];g[T>>2]=+g[Di>>2]*+g[oi>>2];g[U>>2]=+g[S>>2]+ +g[T>>2];g[Ga>>2]=+g[S>>2]-+g[T>>2];g[V>>2]=+g[R>>2]*+g[Wi>>2]+ +g[U>>2]*+g[_i>>2];g[Ja>>2]=+g[Fa>>2]*+g[_i>>2]-+g[Ga>>2]*+g[Wi>>2];g[X>>2]=+g[R>>2]*+g[_i>>2]-+g[U>>2]*+g[Wi>>2];g[Ha>>2]=+g[Fa>>2]*+g[Wi>>2]+ +g[Ga>>2]*+g[_i>>2];g[u>>2]=+g[c[m>>2]>>2];g[Mg>>2]=+g[c[o>>2]>>2];g[aj>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2];g[cj>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[dj>>2]=+g[$i>>2]*+g[aj>>2]+ +g[bj>>2]*+g[cj>>2];g[Lg>>2]=+g[$i>>2]*+g[cj>>2]-+g[bj>>2]*+g[aj>>2];g[ij>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[mj>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[pi>>2]=+g[hj>>2]*+g[ij>>2]+ +g[lj>>2]*+g[mj>>2];g[Zb>>2]=+g[hj>>2]*+g[mj>>2]-+g[lj>>2]*+g[ij>>2];g[ti>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[xi>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[yi>>2]=+g[si>>2]*+g[ti>>2]+ +g[wi>>2]*+g[xi>>2];g[_b>>2]=+g[si>>2]*+g[xi>>2]-+g[wi>>2]*+g[ti>>2];g[ej>>2]=+g[u>>2]+ +g[dj>>2];g[zi>>2]=+g[pi>>2]+ +g[yi>>2];g[Ai>>2]=+g[ej>>2]+ +g[zi>>2];g[ag>>2]=+g[ej>>2]-+g[zi>>2];g[Qh>>2]=+g[Mg>>2]-+g[Lg>>2];g[Rh>>2]=+g[pi>>2]-+g[yi>>2];g[Sh>>2]=+g[Qh>>2]-+g[Rh>>2];g[ei>>2]=+g[Rh>>2]+ +g[Qh>>2];g[Yb>>2]=+g[u>>2]-+g[dj>>2];g[$b>>2]=+g[Zb>>2]-+g[_b>>2];g[ac>>2]=+g[Yb>>2]-+g[$b>>2];g[ef>>2]=+g[Yb>>2]+ +g[$b>>2];g[Kg>>2]=+g[Zb>>2]+ +g[_b>>2];g[Ng>>2]=+g[Lg>>2]+ +g[Mg>>2];g[Og>>2]=+g[Kg>>2]+ +g[Ng>>2];g[ah>>2]=+g[Ng>>2]-+g[Kg>>2];g[ob>>2]=+g[c[n>>2]>>2];g[pb>>2]=+g[c[p>>2]>>2];g[qb>>2]=+g[Da>>2]*+g[ob>>2]+ +g[ce>>2]*+g[pb>>2];g[Kd>>2]=+g[Da>>2]*+g[pb>>2]-+g[ce>>2]*+g[ob>>2];g[Db>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[Hb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[Ib>>2]=+g[Cb>>2]*+g[Db>>2]+ +g[Gb>>2]*+g[Hb>>2];g[be>>2]=+g[Cb>>2]*+g[Hb>>2]-+g[Gb>>2]*+g[Db>>2];g[sb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[ub>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2];g[vb>>2]=+g[rb>>2]*+g[sb>>2]+ +g[tb>>2]*+g[ub>>2];g[Ld>>2]=+g[rb>>2]*+g[ub>>2]-+g[tb>>2]*+g[sb>>2];g[xb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[yb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[zb>>2]=+g[oi>>2]*+g[xb>>2]+ +g[Ti>>2]*+g[yb>>2];g[ae>>2]=+g[oi>>2]*+g[yb>>2]-+g[Ti>>2]*+g[xb>>2];g[wb>>2]=+g[qb>>2]+ +g[vb>>2];g[Jb>>2]=+g[zb>>2]+ +g[Ib>>2];g[Kb>>2]=+g[wb>>2]+ +g[Jb>>2];g[yf>>2]=+g[wb>>2]-+g[Jb>>2];g[rg>>2]=+g[Kd>>2]+ +g[Ld>>2];g[sg>>2]=+g[ae>>2]+ +g[be>>2];g[tg>>2]=+g[rg>>2]-+g[sg>>2];g[nh>>2]=+g[rg>>2]+ +g[sg>>2];g[Md>>2]=+g[Kd>>2]-+g[Ld>>2];g[Nd>>2]=+g[zb>>2]-+g[Ib>>2];g[Od>>2]=+g[Md>>2]+ +g[Nd>>2];g[we>>2]=+g[Md>>2]-+g[Nd>>2];g[$d>>2]=+g[qb>>2]-+g[vb>>2];g[ed>>2]=+g[ae>>2]-+g[be>>2];g[fd>>2]=+g[$d>>2]-+g[ed>>2];g[te>>2]=+g[$d>>2]+ +g[ed>>2];g[ib>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[Nb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[Ob>>2]=+g[hb>>2]*+g[ib>>2]+ +g[lb>>2]*+g[Nb>>2];g[Cd>>2]=+g[hb>>2]*+g[Nb>>2]-+g[lb>>2]*+g[ib>>2];g[xc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[zc>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[Ac>>2]=+g[wc>>2]*+g[xc>>2]+ +g[yc>>2]*+g[zc>>2];g[nd>>2]=+g[wc>>2]*+g[zc>>2]-+g[yc>>2]*+g[xc>>2];g[Qb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Sb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Tb>>2]=+g[Pb>>2]*+g[Qb>>2]+ +g[Rb>>2]*+g[Sb>>2];g[Dd>>2]=+g[Pb>>2]*+g[Sb>>2]-+g[Rb>>2]*+g[Qb>>2];g[Vb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Wb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[vc>>2]=+g[Gi>>2]*+g[Vb>>2]+ +g[Hi>>2]*+g[Wb>>2];g[md>>2]=+g[Gi>>2]*+g[Wb>>2]-+g[Hi>>2]*+g[Vb>>2];g[Ub>>2]=+g[Ob>>2]+ +g[Tb>>2];g[Bc>>2]=+g[vc>>2]+ +g[Ac>>2];g[Cc>>2]=+g[Ub>>2]+ +g[Bc>>2];g[Ef>>2]=+g[Ub>>2]-+g[Bc>>2];g[Jf>>2]=+g[Cd>>2]+ +g[Dd>>2];g[Kf>>2]=+g[md>>2]+ +g[nd>>2];g[Lf>>2]=+g[Jf>>2]-+g[Kf>>2];g[sh>>2]=+g[Jf>>2]+ +g[Kf>>2];g[ld>>2]=+g[Ob>>2]-+g[Tb>>2];g[od>>2]=+g[md>>2]-+g[nd>>2];g[pd>>2]=+g[ld>>2]-+g[od>>2];g[Ae>>2]=+g[ld>>2]+ +g[od>>2];g[de>>2]=+g[Cd>>2]-+g[Dd>>2];g[ee>>2]=+g[vc>>2]-+g[Ac>>2];g[fe>>2]=+g[de>>2]+ +g[ee>>2];g[De>>2]=+g[de>>2]-+g[ee>>2];g[Ci>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ei>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Fi>>2]=+g[Bi>>2]*+g[Ci>>2]+ +g[Di>>2]*+g[Ei>>2];g[bc>>2]=+g[Bi>>2]*+g[Ei>>2]-+g[Di>>2]*+g[Ci>>2];g[ea>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[ia>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[ja>>2]=+g[da>>2]*+g[ea>>2]+ +g[ha>>2]*+g[ia>>2];g[ic>>2]=+g[da>>2]*+g[ia>>2]-+g[ha>>2]*+g[ea>>2];g[Ji>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Li>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Mi>>2]=+g[Ii>>2]*+g[Ji>>2]+ +g[Ki>>2]*+g[Li>>2];g[cc>>2]=+g[Ii>>2]*+g[Li>>2]-+g[Ki>>2]*+g[Ji>>2];g[x>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[B>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[C>>2]=+g[w>>2]*+g[x>>2]+ +g[A>>2]*+g[B>>2];g[hc>>2]=+g[w>>2]*+g[B>>2]-+g[A>>2]*+g[x>>2];g[Ni>>2]=+g[Fi>>2]+ +g[Mi>>2];g[ka>>2]=+g[C>>2]+ +g[ja>>2];g[la>>2]=+g[Ni>>2]+ +g[ka>>2];g[$g>>2]=+g[ka>>2]-+g[Ni>>2];g[bg>>2]=+g[bc>>2]+ +g[cc>>2];g[cg>>2]=+g[hc>>2]+ +g[ic>>2];g[dg>>2]=+g[bg>>2]-+g[cg>>2];g[Jg>>2]=+g[bg>>2]+ +g[cg>>2];g[dc>>2]=+g[bc>>2]-+g[cc>>2];g[ec>>2]=+g[Fi>>2]-+g[Mi>>2];g[fc>>2]=+g[dc>>2]-+g[ec>>2];g[ff>>2]=+g[ec>>2]+ +g[dc>>2];g[gc>>2]=+g[C>>2]-+g[ja>>2];g[jc>>2]=+g[hc>>2]-+g[ic>>2];g[kc>>2]=+g[gc>>2]+ +g[jc>>2];g[gf>>2]=+g[gc>>2]-+g[jc>>2];g[na>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[oa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[pa>>2]=+g[Eh>>2]*+g[na>>2]+ +g[Si>>2]*+g[oa>>2];g[nc>>2]=+g[Eh>>2]*+g[oa>>2]-+g[Si>>2]*+g[na>>2];g[Ca>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[G>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[H>>2]=+g[Ba>>2]*+g[Ca>>2]+ +g[F>>2]*+g[G>>2];g[uc>>2]=+g[Ba>>2]*+g[G>>2]-+g[F>>2]*+g[Ca>>2];g[ra>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[ta>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[ua>>2]=+g[qa>>2]*+g[ra>>2]+ +g[sa>>2]*+g[ta>>2];g[oc>>2]=+g[qa>>2]*+g[ta>>2]-+g[sa>>2]*+g[ra>>2];g[xa>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[za>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Aa>>2]=+g[wa>>2]*+g[xa>>2]+ +g[ya>>2]*+g[za>>2];g[tc>>2]=+g[wa>>2]*+g[za>>2]-+g[ya>>2]*+g[xa>>2];g[va>>2]=+g[pa>>2]+ +g[ua>>2];g[I>>2]=+g[Aa>>2]+ +g[H>>2];g[J>>2]=+g[va>>2]+ +g[I>>2];g[ig>>2]=+g[va>>2]-+g[I>>2];g[fg>>2]=+g[nc>>2]+ +g[oc>>2];g[gg>>2]=+g[tc>>2]+ +g[uc>>2];g[hg>>2]=+g[fg>>2]-+g[gg>>2];g[ih>>2]=+g[fg>>2]+ +g[gg>>2];g[pc>>2]=+g[nc>>2]-+g[oc>>2];g[qc>>2]=+g[Aa>>2]-+g[H>>2];g[rc>>2]=+g[pc>>2]+ +g[qc>>2];g[kf>>2]=+g[pc>>2]-+g[qc>>2];g[sc>>2]=+g[pa>>2]-+g[ua>>2];g[Wc>>2]=+g[tc>>2]-+g[uc>>2];g[Xc>>2]=+g[sc>>2]-+g[Wc>>2];g[lf>>2]=+g[sc>>2]+ +g[Wc>>2];g[L>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[N>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[O>>2]=+g[K>>2]*+g[L>>2]+ +g[M>>2]*+g[N>>2];g[Zc>>2]=+g[K>>2]*+g[N>>2]-+g[M>>2]*+g[L>>2];g[Ia>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[Ka>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[La>>2]=+g[Ha>>2]*+g[Ia>>2]+ +g[Ja>>2]*+g[Ka>>2];g[Ed>>2]=+g[Ha>>2]*+g[Ka>>2]-+g[Ja>>2]*+g[Ia>>2];g[W>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Y>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Z>>2]=+g[V>>2]*+g[W>>2]+ +g[X>>2]*+g[Y>>2];g[_c>>2]=+g[V>>2]*+g[Y>>2]-+g[X>>2]*+g[W>>2];g[aa>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ca>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ea>>2]=+g[$>>2]*+g[aa>>2]+ +g[ba>>2]*+g[ca>>2];g[dd>>2]=+g[$>>2]*+g[ca>>2]-+g[ba>>2]*+g[aa>>2];g[_>>2]=+g[O>>2]+ +g[Z>>2];g[Ma>>2]=+g[Ea>>2]+ +g[La>>2];g[Na>>2]=+g[_>>2]+ +g[Ma>>2];g[kg>>2]=+g[_>>2]-+g[Ma>>2];g[lg>>2]=+g[Zc>>2]+ +g[_c>>2];g[mg>>2]=+g[dd>>2]+ +g[Ed>>2];g[ng>>2]=+g[lg>>2]-+g[mg>>2];g[jh>>2]=+g[lg>>2]+ +g[mg>>2];g[$c>>2]=+g[Zc>>2]-+g[_c>>2];g[ad>>2]=+g[Ea>>2]-+g[La>>2];g[bd>>2]=+g[$c>>2]+ +g[ad>>2];g[oe>>2]=+g[$c>>2]-+g[ad>>2];g[cd>>2]=+g[O>>2]-+g[Z>>2];g[Fd>>2]=+g[dd>>2]-+g[Ed>>2];g[Gd>>2]=+g[cd>>2]-+g[Fd>>2];g[pe>>2]=+g[cd>>2]+ +g[Fd>>2];g[Lb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Oa>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Pa>>2]=+g[Fa>>2]*+g[Lb>>2]+ +g[Ga>>2]*+g[Oa>>2];g[Vd>>2]=+g[Fa>>2]*+g[Oa>>2]-+g[Ga>>2]*+g[Lb>>2];g[Ra>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Ta>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Ua>>2]=+g[Qa>>2]*+g[Ra>>2]+ +g[Sa>>2]*+g[Ta>>2];g[Wd>>2]=+g[Qa>>2]*+g[Ta>>2]-+g[Sa>>2]*+g[Ra>>2];g[Ud>>2]=+g[Pa>>2]-+g[Ua>>2];g[Xd>>2]=+g[Vd>>2]-+g[Wd>>2];g[Xa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Za>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[_a>>2]=+g[Wa>>2]*+g[Xa>>2]+ +g[Ya>>2]*+g[Za>>2];g[Qd>>2]=+g[Wa>>2]*+g[Za>>2]-+g[Ya>>2]*+g[Xa>>2];g[$a>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[ab>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[bb>>2]=+g[R>>2]*+g[$a>>2]+ +g[U>>2]*+g[ab>>2];g[Rd>>2]=+g[R>>2]*+g[ab>>2]-+g[U>>2]*+g[$a>>2];g[Pd>>2]=+g[_a>>2]-+g[bb>>2];g[Sd>>2]=+g[Qd>>2]-+g[Rd>>2];g[Va>>2]=+g[Pa>>2]+ +g[Ua>>2];g[cb>>2]=+g[_a>>2]+ +g[bb>>2];g[db>>2]=+g[Va>>2]+ +g[cb>>2];g[ug>>2]=+g[cb>>2]-+g[Va>>2];g[zf>>2]=+g[Vd>>2]+ +g[Wd>>2];g[Af>>2]=+g[Qd>>2]+ +g[Rd>>2];g[Bf>>2]=+g[zf>>2]-+g[Af>>2];g[oh>>2]=+g[zf>>2]+ +g[Af>>2];g[Td>>2]=+g[Pd>>2]-+g[Sd>>2];g[Yd>>2]=+g[Ud>>2]+ +g[Xd>>2];g[Zd>>2]=(+g[Td>>2]-+g[Yd>>2])*.7071067690849304;g[ue>>2]=(+g[Yd>>2]+ +g[Td>>2])*.7071067690849304;g[gd>>2]=+g[Xd>>2]-+g[Ud>>2];g[hd>>2]=+g[Pd>>2]+ +g[Sd>>2];g[id>>2]=(+g[gd>>2]-+g[hd>>2])*.7071067690849304;g[xe>>2]=(+g[gd>>2]+ +g[hd>>2])*.7071067690849304;g[Dc>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Ec>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Fc>>2]=+g[Mb>>2]*+g[Dc>>2]+ +g[mf>>2]*+g[Ec>>2];g[qd>>2]=+g[Mb>>2]*+g[Ec>>2]-+g[mf>>2]*+g[Dc>>2];g[Hc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Jc>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Kc>>2]=+g[Gc>>2]*+g[Hc>>2]+ +g[Ic>>2]*+g[Jc>>2];g[rd>>2]=+g[Gc>>2]*+g[Jc>>2]-+g[Ic>>2]*+g[Hc>>2];g[sd>>2]=+g[qd>>2]-+g[rd>>2];g[td>>2]=+g[Fc>>2]-+g[Kc>>2];g[Mc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[Nc>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[Oc>>2]=+g[Wi>>2]*+g[Mc>>2]+ +g[_i>>2]*+g[Nc>>2];g[wd>>2]=+g[Wi>>2]*+g[Nc>>2]-+g[_i>>2]*+g[Mc>>2];g[Pc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Qc>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Rc>>2]=+g[Vi>>2]*+g[Pc>>2]+ +g[Zi>>2]*+g[Qc>>2];g[xd>>2]=+g[Vi>>2]*+g[Qc>>2]-+g[Zi>>2]*+g[Pc>>2];g[vd>>2]=+g[Oc>>2]-+g[Rc>>2];g[yd>>2]=+g[wd>>2]-+g[xd>>2];g[Lc>>2]=+g[Fc>>2]+ +g[Kc>>2];g[Sc>>2]=+g[Oc>>2]+ +g[Rc>>2];g[Tc>>2]=+g[Lc>>2]+ +g[Sc>>2];g[Mf>>2]=+g[Sc>>2]-+g[Lc>>2];g[Ff>>2]=+g[qd>>2]+ +g[rd>>2];g[Gf>>2]=+g[wd>>2]+ +g[xd>>2];g[Hf>>2]=+g[Ff>>2]-+g[Gf>>2];g[th>>2]=+g[Ff>>2]+ +g[Gf>>2];g[ud>>2]=+g[sd>>2]-+g[td>>2];g[zd>>2]=+g[vd>>2]+ +g[yd>>2];g[Ad>>2]=(+g[ud>>2]-+g[zd>>2])*.7071067690849304;g[Ee>>2]=(+g[ud>>2]+ +g[zd>>2])*.7071067690849304;g[ge>>2]=+g[vd>>2]-+g[yd>>2];g[he>>2]=+g[td>>2]+ +g[sd>>2];g[ie>>2]=(+g[ge>>2]-+g[he>>2])*.7071067690849304;g[Be>>2]=(+g[he>>2]+ +g[ge>>2])*.7071067690849304;g[ma>>2]=+g[Ai>>2]+ +g[la>>2];g[mb>>2]=+g[J>>2]+ +g[Na>>2];g[nb>>2]=+g[ma>>2]+ +g[mb>>2];g[Bh>>2]=+g[ma>>2]-+g[mb>>2];g[Ig>>2]=+g[ih>>2]+ +g[jh>>2];g[Pg>>2]=+g[Jg>>2]+ +g[Og>>2];g[Qg>>2]=+g[Ig>>2]+ +g[Pg>>2];g[Sg>>2]=+g[Pg>>2]-+g[Ig>>2];g[eb>>2]=+g[Kb>>2]+ +g[db>>2];g[Uc>>2]=+g[Cc>>2]+ +g[Tc>>2];g[Xb>>2]=+g[eb>>2]+ +g[Uc>>2];g[Rg>>2]=+g[Uc>>2]-+g[eb>>2];g[Ch>>2]=+g[nh>>2]+ +g[oh>>2];g[Dh>>2]=+g[sh>>2]+ +g[th>>2];g[Gg>>2]=+g[Ch>>2]-+g[Dh>>2];g[Hg>>2]=+g[Ch>>2]+ +g[Dh>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[nb>>2]-+g[Xb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[Hg>>2]-+g[Qg>>2];g[c[m>>2]>>2]=+g[nb>>2]+ +g[Xb>>2];g[c[n>>2]>>2]=+g[Hg>>2]+ +g[Qg>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Bh>>2]-+g[Gg>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Rg>>2]-+g[Sg>>2];g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Bh>>2]+ +g[Gg>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Rg>>2]+ +g[Sg>>2];g[hh>>2]=+g[Ai>>2]-+g[la>>2];g[kh>>2]=+g[ih>>2]-+g[jh>>2];g[lh>>2]=+g[hh>>2]+ +g[kh>>2];g[xh>>2]=+g[hh>>2]-+g[kh>>2];g[Ug>>2]=+g[Na>>2]-+g[J>>2];g[Vg>>2]=+g[Og>>2]-+g[Jg>>2];g[Wg>>2]=+g[Ug>>2]+ +g[Vg>>2];g[Yg>>2]=+g[Vg>>2]-+g[Ug>>2];g[mh>>2]=+g[Kb>>2]-+g[db>>2];g[ph>>2]=+g[nh>>2]-+g[oh>>2];g[qh>>2]=+g[mh>>2]+ +g[ph>>2];g[yh>>2]=+g[ph>>2]-+g[mh>>2];g[rh>>2]=+g[Cc>>2]-+g[Tc>>2];g[uh>>2]=+g[sh>>2]-+g[th>>2];g[vh>>2]=+g[rh>>2]-+g[uh>>2];g[zh>>2]=+g[rh>>2]+ +g[uh>>2];g[wh>>2]=(+g[qh>>2]+ +g[vh>>2])*.7071067690849304;g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[lh>>2]-+g[wh>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[lh>>2]+ +g[wh>>2];g[Tg>>2]=(+g[yh>>2]+ +g[zh>>2])*.7071067690849304;g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Tg>>2]-+g[Wg>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Tg>>2]+ +g[Wg>>2];g[Ah>>2]=(+g[yh>>2]-+g[zh>>2])*.7071067690849304;g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[xh>>2]-+g[Ah>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[xh>>2]+ +g[Ah>>2];g[Xg>>2]=(+g[vh>>2]-+g[qh>>2])*.7071067690849304;g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Xg>>2]-+g[Yg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Xg>>2]+ +g[Yg>>2];g[eg>>2]=+g[ag>>2]-+g[dg>>2];g[bh>>2]=+g[$g>>2]+ +g[ah>>2];g[Ih>>2]=+g[ah>>2]-+g[$g>>2];g[Uf>>2]=+g[ag>>2]+ +g[dg>>2];g[jg>>2]=+g[hg>>2]-+g[ig>>2];g[og>>2]=+g[kg>>2]+ +g[ng>>2];g[pg>>2]=(+g[jg>>2]-+g[og>>2])*.7071067690849304;g[_g>>2]=(+g[jg>>2]+ +g[og>>2])*.7071067690849304;g[Bg>>2]=+g[Ef>>2]+ +g[Hf>>2];g[Cg>>2]=+g[Lf>>2]+ +g[Mf>>2];g[Dg>>2]=+g[Bg>>2]*.9238795042037964-+g[Cg>>2]*.3826834261417389;g[fh>>2]=+g[Cg>>2]*.9238795042037964+ +g[Bg>>2]*.3826834261417389;g[Vf>>2]=+g[ig>>2]+ +g[hg>>2];g[Wf>>2]=+g[kg>>2]-+g[ng>>2];g[wg>>2]=(+g[Vf>>2]+ +g[Wf>>2])*.7071067690849304;g[Hh>>2]=(+g[Wf>>2]-+g[Vf>>2])*.7071067690849304;g[xf>>2]=+g[tg>>2]-+g[ug>>2];g[Cf>>2]=+g[yf>>2]-+g[Bf>>2];g[Df>>2]=+g[xf>>2]*.9238795042037964+ +g[Cf>>2]*.3826834261417389;g[Rf>>2]=+g[xf>>2]*.3826834261417389-+g[Cf>>2]*.9238795042037964;g[yg>>2]=+g[tg>>2]+ +g[ug>>2];g[zg>>2]=+g[yf>>2]+ +g[Bf>>2];g[Ag>>2]=+g[yg>>2]*.3826834261417389+ +g[zg>>2]*.9238795042037964;g[eh>>2]=+g[yg>>2]*.9238795042037964-+g[zg>>2]*.3826834261417389;g[If>>2]=+g[Ef>>2]-+g[Hf>>2];g[Nf>>2]=+g[Lf>>2]-+g[Mf>>2];g[Of>>2]=+g[If>>2]*.3826834261417389-+g[Nf>>2]*.9238795042037964;g[Sf>>2]=+g[Nf>>2]*.3826834261417389+ +g[If>>2]*.9238795042037964;g[qg>>2]=+g[eg>>2]+ +g[pg>>2];g[Pf>>2]=+g[Df>>2]+ +g[Of>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[qg>>2]-+g[Pf>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[qg>>2]+ +g[Pf>>2];g[Gh>>2]=+g[Rf>>2]+ +g[Sf>>2];g[Jh>>2]=+g[Hh>>2]+ +g[Ih>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Gh>>2]-+g[Jh>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Gh>>2]+ +g[Jh>>2];g[Qf>>2]=+g[eg>>2]-+g[pg>>2];g[Tf>>2]=+g[Rf>>2]-+g[Sf>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Qf>>2]-+g[Tf>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Qf>>2]+ +g[Tf>>2];g[Kh>>2]=+g[Of>>2]-+g[Df>>2];g[Lh>>2]=+g[Ih>>2]-+g[Hh>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Kh>>2]-+g[Lh>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Kh>>2]+ +g[Lh>>2];g[xg>>2]=+g[Uf>>2]+ +g[wg>>2];g[Eg>>2]=+g[Ag>>2]+ +g[Dg>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[xg>>2]-+g[Eg>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[xg>>2]+ +g[Eg>>2];g[Zg>>2]=+g[eh>>2]+ +g[fh>>2];g[ch>>2]=+g[_g>>2]+ +g[bh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Zg>>2]-+g[ch>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Zg>>2]+ +g[ch>>2];g[Fg>>2]=+g[Uf>>2]-+g[wg>>2];g[gh>>2]=+g[eh>>2]-+g[fh>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Fg>>2]-+g[gh>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Fg>>2]+ +g[gh>>2];g[dh>>2]=+g[Dg>>2]-+g[Ag>>2];g[Fh>>2]=+g[bh>>2]-+g[_g>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[dh>>2]-+g[Fh>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[dh>>2]+ +g[Fh>>2];g[lc>>2]=(+g[fc>>2]-+g[kc>>2])*.7071067690849304;g[mc>>2]=+g[ac>>2]-+g[lc>>2];g[Qe>>2]=+g[ac>>2]+ +g[lc>>2];g[di>>2]=(+g[gf>>2]-+g[ff>>2])*.7071067690849304;g[fi>>2]=+g[di>>2]+ +g[ei>>2];g[li>>2]=+g[ei>>2]-+g[di>>2];g[Yc>>2]=+g[rc>>2]*.3826834261417389-+g[Xc>>2]*.9238795042037964;g[Hd>>2]=+g[bd>>2]*.3826834261417389+ +g[Gd>>2]*.9238795042037964;g[Id>>2]=+g[Yc>>2]-+g[Hd>>2];g[ci>>2]=+g[Yc>>2]+ +g[Hd>>2];g[Ye>>2]=+g[pd>>2]+ +g[Ad>>2];g[Ze>>2]=+g[fe>>2]+ +g[ie>>2];g[_e>>2]=+g[Ye>>2]*.8314695954322815-+g[Ze>>2]*.5555702447891235;g[cf>>2]=+g[Ze>>2]*.8314695954322815+ +g[Ye>>2]*.5555702447891235;g[_d>>2]=+g[Od>>2]-+g[Zd>>2];g[jd>>2]=+g[fd>>2]-+g[id>>2];g[kd>>2]=+g[_d>>2]*.9807852506637573+ +g[jd>>2]*.19509032368659973;g[Ne>>2]=+g[_d>>2]*.19509032368659973-+g[jd>>2]*.9807852506637573;g[Re>>2]=+g[rc>>2]*.9238795042037964+ +g[Xc>>2]*.3826834261417389;g[Se>>2]=+g[Gd>>2]*.3826834261417389-+g[bd>>2]*.9238795042037964;g[Te>>2]=+g[Re>>2]+ +g[Se>>2];g[ki>>2]=+g[Se>>2]-+g[Re>>2];g[Ve>>2]=+g[Od>>2]+ +g[Zd>>2];g[We>>2]=+g[fd>>2]+ +g[id>>2];g[Xe>>2]=+g[Ve>>2]*.5555702447891235+ +g[We>>2]*.8314695954322815;g[bf>>2]=+g[Ve>>2]*.8314695954322815-+g[We>>2]*.5555702447891235;g[Bd>>2]=+g[pd>>2]-+g[Ad>>2];g[je>>2]=+g[fe>>2]-+g[ie>>2];g[ke>>2]=+g[Bd>>2]*.19509032368659973-+g[je>>2]*.9807852506637573;g[Oe>>2]=+g[je>>2]*.19509032368659973+ +g[Bd>>2]*.9807852506637573;g[Jd>>2]=+g[mc>>2]+ +g[Id>>2];g[le>>2]=+g[kd>>2]+ +g[ke>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Jd>>2]-+g[le>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Jd>>2]+ +g[le>>2];g[ji>>2]=+g[Ne>>2]+ +g[Oe>>2];g[mi>>2]=+g[ki>>2]+ +g[li>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[ji>>2]-+g[mi>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ji>>2]+ +g[mi>>2];g[me>>2]=+g[mc>>2]-+g[Id>>2];g[Pe>>2]=+g[Ne>>2]-+g[Oe>>2];g[c[o>>2]>>2]=+g[me>>2]-+g[Pe>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[me>>2]+ +g[Pe>>2];g[ni>>2]=+g[ke>>2]-+g[kd>>2];g[Ph>>2]=+g[li>>2]-+g[ki>>2];g[c[p>>2]>>2]=+g[ni>>2]-+g[Ph>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[ni>>2]+ +g[Ph>>2];g[Ue>>2]=+g[Qe>>2]+ +g[Te>>2];g[$e>>2]=+g[Xe>>2]+ +g[_e>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Ue>>2]-+g[$e>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ue>>2]+ +g[$e>>2];g[bi>>2]=+g[bf>>2]+ +g[cf>>2];g[gi>>2]=+g[ci>>2]+ +g[fi>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[bi>>2]-+g[gi>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[bi>>2]+ +g[gi>>2];g[af>>2]=+g[Qe>>2]-+g[Te>>2];g[df>>2]=+g[bf>>2]-+g[cf>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[af>>2]-+g[df>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[af>>2]+ +g[df>>2];g[hi>>2]=+g[_e>>2]-+g[Xe>>2];g[ii>>2]=+g[fi>>2]-+g[ci>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[hi>>2]-+g[ii>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[hi>>2]+ +g[ii>>2];g[hf>>2]=(+g[ff>>2]+ +g[gf>>2])*.7071067690849304;g[jf>>2]=+g[ef>>2]-+g[hf>>2];g[Me>>2]=+g[ef>>2]+ +g[hf>>2];g[Oh>>2]=(+g[fc>>2]+ +g[kc>>2])*.7071067690849304;g[Th>>2]=+g[Oh>>2]+ +g[Sh>>2];g[Zh>>2]=+g[Sh>>2]-+g[Oh>>2];g[ne>>2]=+g[kf>>2]*.9238795042037964-+g[lf>>2]*.3826834261417389;g[qe>>2]=+g[oe>>2]*.9238795042037964+ +g[pe>>2]*.3826834261417389;g[re>>2]=+g[ne>>2]-+g[qe>>2];g[Nh>>2]=+g[ne>>2]+ +g[qe>>2];g[uf>>2]=+g[Ae>>2]+ +g[Be>>2];g[vf>>2]=+g[De>>2]+ +g[Ee>>2];g[wf>>2]=+g[uf>>2]*.9807852506637573-+g[vf>>2]*.19509032368659973;g[_f>>2]=+g[uf>>2]*.19509032368659973+ +g[vf>>2]*.9807852506637573;g[ve>>2]=+g[te>>2]-+g[ue>>2];g[ye>>2]=+g[we>>2]-+g[xe>>2];g[ze>>2]=+g[ve>>2]*.5555702447891235+ +g[ye>>2]*.8314695954322815;g[Je>>2]=+g[ye>>2]*.5555702447891235-+g[ve>>2]*.8314695954322815;g[nf>>2]=+g[kf>>2]*.3826834261417389+ +g[lf>>2]*.9238795042037964;g[of>>2]=+g[pe>>2]*.9238795042037964-+g[oe>>2]*.3826834261417389;g[pf>>2]=+g[nf>>2]+ +g[of>>2];g[Yh>>2]=+g[of>>2]-+g[nf>>2];g[rf>>2]=+g[te>>2]+ +g[ue>>2];g[sf>>2]=+g[we>>2]+ +g[xe>>2];g[tf>>2]=+g[rf>>2]*.9807852506637573+ +g[sf>>2]*.19509032368659973;g[Zf>>2]=+g[sf>>2]*.9807852506637573-+g[rf>>2]*.19509032368659973;g[Ce>>2]=+g[Ae>>2]-+g[Be>>2];g[Fe>>2]=+g[De>>2]-+g[Ee>>2];g[Ge>>2]=+g[Ce>>2]*.5555702447891235-+g[Fe>>2]*.8314695954322815;g[Ke>>2]=+g[Ce>>2]*.8314695954322815+ +g[Fe>>2]*.5555702447891235;g[se>>2]=+g[jf>>2]+ +g[re>>2];g[He>>2]=+g[ze>>2]+ +g[Ge>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[se>>2]-+g[He>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[se>>2]+ +g[He>>2];g[Xh>>2]=+g[Je>>2]+ +g[Ke>>2];g[_h>>2]=+g[Yh>>2]+ +g[Zh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Xh>>2]-+g[_h>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Xh>>2]+ +g[_h>>2];g[Ie>>2]=+g[jf>>2]-+g[re>>2];g[Le>>2]=+g[Je>>2]-+g[Ke>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ie>>2]-+g[Le>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Ie>>2]+ +g[Le>>2];g[$h>>2]=+g[Ge>>2]-+g[ze>>2];g[ai>>2]=+g[Zh>>2]-+g[Yh>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[$h>>2]-+g[ai>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[$h>>2]+ +g[ai>>2];g[qf>>2]=+g[Me>>2]+ +g[pf>>2];g[Xf>>2]=+g[tf>>2]+ +g[wf>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[qf>>2]-+g[Xf>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[qf>>2]+ +g[Xf>>2];g[Mh>>2]=+g[Zf>>2]+ +g[_f>>2];g[Uh>>2]=+g[Nh>>2]+ +g[Th>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Mh>>2]-+g[Uh>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Mh>>2]+ +g[Uh>>2];g[Yf>>2]=+g[Me>>2]-+g[pf>>2];g[$f>>2]=+g[Zf>>2]-+g[_f>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Yf>>2]-+g[$f>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Yf>>2]+ +g[$f>>2];g[Vh>>2]=+g[wf>>2]-+g[tf>>2];g[Wh>>2]=+g[Th>>2]-+g[Nh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Vh>>2]-+g[Wh>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Vh>>2]+ +g[Wh>>2];c[nj>>2]=(c[nj>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+32;c[r>>2]=c[r>>2]^c[2998]}i=oj;return}function rq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,27,3352,0);i=b;return}function sq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0;X=i;i=i+160|0;m=X+148|0;n=X+144|0;o=X+140|0;p=X+136|0;q=X+132|0;r=X+128|0;Y=X+124|0;s=X+120|0;t=X+116|0;W=X+112|0;v=X+108|0;x=X+104|0;w=X+100|0;y=X+96|0;z=X+92|0;B=X+88|0;u=X+84|0;S=X+80|0;D=X+76|0;R=X+72|0;H=X+68|0;N=X+64|0;K=X+60|0;O=X+56|0;A=X+52|0;C=X+48|0;F=X+44|0;G=X+40|0;I=X+36|0;J=X+32|0;E=X+28|0;L=X+24|0;Q=X+20|0;T=X+16|0;M=X+12|0;P=X+8|0;U=X+4|0;V=X;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Y>>2]=j;c[s>>2]=k;c[t>>2]=l;c[W>>2]=c[Y>>2];c[q>>2]=(c[q>>2]|0)+((c[Y>>2]|0)-1<<2<<2);while(1){if((c[W>>2]|0)>=(c[s>>2]|0))break;g[v>>2]=+g[c[q>>2]>>2];g[x>>2]=+g[(c[q>>2]|0)+4>>2];g[w>>2]=+g[(c[q>>2]|0)+8>>2];g[y>>2]=+g[(c[q>>2]|0)+12>>2];g[z>>2]=+g[v>>2]*+g[w>>2]+ +g[x>>2]*+g[y>>2];g[B>>2]=+g[v>>2]*+g[y>>2]-+g[x>>2]*+g[w>>2];g[u>>2]=+g[c[m>>2]>>2];g[S>>2]=+g[c[o>>2]>>2];g[A>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[C>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[D>>2]=+g[z>>2]*+g[A>>2]+ +g[B>>2]*+g[C>>2];g[R>>2]=+g[z>>2]*+g[C>>2]-+g[B>>2]*+g[A>>2];g[F>>2]=+g[c[n>>2]>>2];g[G>>2]=+g[c[p>>2]>>2];g[H>>2]=+g[v>>2]*+g[F>>2]+ +g[x>>2]*+g[G>>2];g[N>>2]=+g[v>>2]*+g[G>>2]-+g[x>>2]*+g[F>>2];g[I>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[J>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[K>>2]=+g[w>>2]*+g[I>>2]+ +g[y>>2]*+g[J>>2];g[O>>2]=+g[w>>2]*+g[J>>2]-+g[y>>2]*+g[I>>2];g[E>>2]=+g[u>>2]+ +g[D>>2];g[L>>2]=+g[H>>2]+ +g[K>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[E>>2]-+g[L>>2];g[c[m>>2]>>2]=+g[E>>2]+ +g[L>>2];g[Q>>2]=+g[N>>2]+ +g[O>>2];g[T>>2]=+g[R>>2]+ +g[S>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Q>>2]-+g[T>>2];g[c[n>>2]>>2]=+g[Q>>2]+ +g[T>>2];g[M>>2]=+g[u>>2]-+g[D>>2];g[P>>2]=+g[N>>2]-+g[O>>2];g[c[o>>2]>>2]=+g[M>>2]-+g[P>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[M>>2]+ +g[P>>2];g[U>>2]=+g[K>>2]-+g[H>>2];g[V>>2]=+g[S>>2]-+g[R>>2];g[c[p>>2]>>2]=+g[U>>2]-+g[V>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[U>>2]+ +g[V>>2];c[W>>2]=(c[W>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+16}i=X;return}function tq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,28,3400,0);i=b;return}function uq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0;Za=i;i=i+384|0;m=Za+376|0;n=Za+372|0;o=Za+368|0;p=Za+364|0;q=Za+360|0;r=Za+356|0;_a=Za+352|0;s=Za+348|0;t=Za+344|0;Ya=Za+336|0;S=Za+332|0;V=Za+328|0;T=Za+324|0;W=Za+320|0;Y=Za+316|0;Aa=Za+312|0;Ea=Za+308|0;Ga=Za+304|0;Ja=Za+300|0;Ka=Za+296|0;La=Za+292|0;Xa=Za+288|0;Na=Za+284|0;Va=Za+280|0;U=Za+276|0;za=Za+272|0;X=Za+268|0;ya=Za+264|0;Da=Za+260|0;N=Za+256|0;ja=Za+252|0;I=Za+248|0;ha=Za+244|0;C=Za+240|0;ta=Za+236|0;wa=Za+232|0;Qa=Za+228|0;O=Za+224|0;ma=Za+220|0;F=Za+216|0;aa=Za+212|0;B=Za+208|0;oa=Za+204|0;ra=Za+200|0;u=Za+196|0;H=Za+192|0;Ca=Za+188|0;G=Za+184|0;Z=Za+180|0;Ba=Za+176|0;da=Za+172|0;ua=Za+168|0;ga=Za+164|0;va=Za+160|0;ba=Za+156|0;ca=Za+152|0;ea=Za+148|0;fa=Za+144|0;Ia=Za+140|0;ka=Za+136|0;Pa=Za+132|0;la=Za+128|0;Fa=Za+124|0;Ha=Za+120|0;Ma=Za+116|0;Oa=Za+112|0;Ua=Za+108|0;pa=Za+104|0;$=Za+100|0;qa=Za+96|0;Sa=Za+92|0;Ta=Za+88|0;Wa=Za+84|0;_=Za+80|0;Ra=Za+76|0;ia=Za+72|0;K=Za+68|0;L=Za+64|0;E=Za+60|0;J=Za+56|0;A=Za+52|0;D=Za+48|0;w=Za+44|0;P=Za+40|0;z=Za+36|0;M=Za+32|0;x=Za+28|0;y=Za+24|0;na=Za+20|0;R=Za+16|0;v=Za+12|0;Q=Za+8|0;sa=Za+4|0;xa=Za;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[_a>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Za+340>>2]=.7071067690849304;c[Ya>>2]=c[_a>>2];c[q>>2]=(c[q>>2]|0)+(((c[_a>>2]|0)-1|0)*6<<2);while(1){if((c[Ya>>2]|0)>=(c[s>>2]|0))break;g[S>>2]=+g[c[q>>2]>>2];g[V>>2]=+g[(c[q>>2]|0)+4>>2];g[T>>2]=+g[(c[q>>2]|0)+8>>2];g[W>>2]=+g[(c[q>>2]|0)+12>>2];g[U>>2]=+g[S>>2]*+g[T>>2];g[za>>2]=+g[V>>2]*+g[T>>2];g[X>>2]=+g[V>>2]*+g[W>>2];g[ya>>2]=+g[S>>2]*+g[W>>2];g[Y>>2]=+g[U>>2]-+g[X>>2];g[Aa>>2]=+g[ya>>2]+ +g[za>>2];g[Ea>>2]=+g[U>>2]+ +g[X>>2];g[Ga>>2]=+g[ya>>2]-+g[za>>2];g[Ja>>2]=+g[(c[q>>2]|0)+16>>2];g[Ka>>2]=+g[(c[q>>2]|0)+20>>2];g[La>>2]=+g[S>>2]*+g[Ja>>2]+ +g[V>>2]*+g[Ka>>2];g[Xa>>2]=+g[Ea>>2]*+g[Ka>>2]-+g[Ga>>2]*+g[Ja>>2];g[Na>>2]=+g[S>>2]*+g[Ka>>2]-+g[V>>2]*+g[Ja>>2];g[Va>>2]=+g[Ea>>2]*+g[Ja>>2]+ +g[Ga>>2]*+g[Ka>>2];g[u>>2]=+g[c[m>>2]>>2];g[H>>2]=+g[c[o>>2]>>2];g[Z>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ba>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ca>>2]=+g[Y>>2]*+g[Z>>2]+ +g[Aa>>2]*+g[Ba>>2];g[G>>2]=+g[Y>>2]*+g[Ba>>2]-+g[Aa>>2]*+g[Z>>2];g[Da>>2]=+g[u>>2]+ +g[Ca>>2];g[N>>2]=+g[H>>2]-+g[G>>2];g[ja>>2]=+g[u>>2]-+g[Ca>>2];g[I>>2]=+g[G>>2]+ +g[H>>2];g[ba>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ca>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[da>>2]=+g[Ja>>2]*+g[ba>>2]+ +g[Ka>>2]*+g[ca>>2];g[ua>>2]=+g[Ja>>2]*+g[ca>>2]-+g[Ka>>2]*+g[ba>>2];g[ea>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[fa>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[ga>>2]=+g[T>>2]*+g[ea>>2]+ +g[W>>2]*+g[fa>>2];g[va>>2]=+g[T>>2]*+g[fa>>2]-+g[W>>2]*+g[ea>>2];g[ha>>2]=+g[da>>2]+ +g[ga>>2];g[C>>2]=+g[ua>>2]+ +g[va>>2];g[ta>>2]=+g[da>>2]-+g[ga>>2];g[wa>>2]=+g[ua>>2]-+g[va>>2];g[Fa>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Ha>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Ia>>2]=+g[Ea>>2]*+g[Fa>>2]+ +g[Ga>>2]*+g[Ha>>2];g[ka>>2]=+g[Ea>>2]*+g[Ha>>2]-+g[Ga>>2]*+g[Fa>>2];g[Ma>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Oa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Pa>>2]=+g[La>>2]*+g[Ma>>2]+ +g[Na>>2]*+g[Oa>>2];g[la>>2]=+g[La>>2]*+g[Oa>>2]-+g[Na>>2]*+g[Ma>>2];g[Qa>>2]=+g[Ia>>2]+ +g[Pa>>2];g[O>>2]=+g[Ia>>2]-+g[Pa>>2];g[ma>>2]=+g[ka>>2]-+g[la>>2];g[F>>2]=+g[ka>>2]+ +g[la>>2];g[Sa>>2]=+g[c[n>>2]>>2];g[Ta>>2]=+g[c[p>>2]>>2];g[Ua>>2]=+g[S>>2]*+g[Sa>>2]+ +g[V>>2]*+g[Ta>>2];g[pa>>2]=+g[S>>2]*+g[Ta>>2]-+g[V>>2]*+g[Sa>>2];g[Wa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[_>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[$>>2]=+g[Va>>2]*+g[Wa>>2]+ +g[Xa>>2]*+g[_>>2];g[qa>>2]=+g[Va>>2]*+g[_>>2]-+g[Xa>>2]*+g[Wa>>2];g[aa>>2]=+g[Ua>>2]+ +g[$>>2];g[B>>2]=+g[pa>>2]+ +g[qa>>2];g[oa>>2]=+g[Ua>>2]-+g[$>>2];g[ra>>2]=+g[pa>>2]-+g[qa>>2];g[Ra>>2]=+g[Da>>2]+ +g[Qa>>2];g[ia>>2]=+g[aa>>2]+ +g[ha>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ra>>2]-+g[ia>>2];g[c[m>>2]>>2]=+g[Ra>>2]+ +g[ia>>2];g[E>>2]=+g[B>>2]+ +g[C>>2];g[J>>2]=+g[F>>2]+ +g[I>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[E>>2]-+g[J>>2];g[c[n>>2]>>2]=+g[E>>2]+ +g[J>>2];g[A>>2]=+g[Da>>2]-+g[Qa>>2];g[D>>2]=+g[B>>2]-+g[C>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[A>>2]-+g[D>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[A>>2]+ +g[D>>2];g[K>>2]=+g[ha>>2]-+g[aa>>2];g[L>>2]=+g[I>>2]-+g[F>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[K>>2]-+g[L>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[K>>2]+ +g[L>>2];g[w>>2]=+g[ja>>2]-+g[ma>>2];g[P>>2]=+g[N>>2]-+g[O>>2];g[x>>2]=+g[ra>>2]-+g[oa>>2];g[y>>2]=+g[ta>>2]+ +g[wa>>2];g[z>>2]=(+g[x>>2]-+g[y>>2])*.7071067690849304;g[M>>2]=(+g[x>>2]+ +g[y>>2])*.7071067690849304;g[c[o>>2]>>2]=+g[w>>2]-+g[z>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[M>>2]+ +g[P>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[w>>2]+ +g[z>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[M>>2]-+g[P>>2];g[na>>2]=+g[ja>>2]+ +g[ma>>2];g[R>>2]=+g[O>>2]+ +g[N>>2];g[sa>>2]=+g[oa>>2]+ +g[ra>>2];g[xa>>2]=+g[ta>>2]-+g[wa>>2];g[v>>2]=(+g[sa>>2]+ +g[xa>>2])*.7071067690849304;g[Q>>2]=(+g[xa>>2]-+g[sa>>2])*.7071067690849304;g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[na>>2]-+g[v>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Q>>2]+ +g[R>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[na>>2]+ +g[v>>2];g[c[p>>2]>>2]=+g[Q>>2]-+g[R>>2];c[Ya>>2]=(c[Ya>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+24;c[r>>2]=c[r>>2]^c[2998]}i=Za;return}function vq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,29,3448,0);i=b;return}function wq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0;Hb=i;i=i+544|0;m=Hb+532|0;n=Hb+528|0;o=Hb+524|0;p=Hb+520|0;q=Hb+516|0;r=Hb+512|0;Ib=Hb+508|0;s=Hb+504|0;t=Hb+500|0;Gb=Hb+480|0;Fa=Hb+476|0;T=Hb+472|0;$a=Hb+468|0;H=Hb+464|0;Na=Hb+460|0;Ya=Hb+456|0;Za=Hb+452|0;ra=Hb+448|0;sa=Hb+444|0;Q=Hb+440|0;db=Hb+436|0;eb=Hb+432|0;fb=Hb+428|0;B=Hb+424|0;E=Hb+420|0;ya=Hb+416|0;ob=Hb+412|0;zb=Hb+408|0;Ab=Hb+404|0;oa=Hb+400|0;pa=Hb+396|0;R=Hb+392|0;ab=Hb+388|0;bb=Hb+384|0;cb=Hb+380|0;da=Hb+376|0;ga=Hb+372|0;xa=Hb+368|0;u=Hb+364|0;G=Hb+360|0;Ea=Hb+356|0;F=Hb+352|0;Ba=Hb+348|0;Da=Hb+344|0;Aa=Hb+340|0;Ca=Hb+336|0;Fb=Hb+332|0;z=Hb+328|0;Xa=Hb+324|0;D=Hb+320|0;Ma=Hb+316|0;A=Hb+312|0;Sa=Hb+308|0;C=Hb+304|0;Cb=Hb+300|0;Eb=Hb+296|0;Bb=Hb+292|0;Db=Hb+288|0;Ua=Hb+284|0;Wa=Hb+280|0;Ta=Hb+276|0;Va=Hb+272|0;Ja=Hb+268|0;La=Hb+264|0;Ia=Hb+260|0;Ka=Hb+256|0;Pa=Hb+252|0;Ra=Hb+248|0;Oa=Hb+244|0;Qa=Hb+240|0;ib=Hb+236|0;ba=Hb+232|0;yb=Hb+228|0;fa=Hb+224|0;nb=Hb+220|0;ca=Hb+216|0;tb=Hb+212|0;ea=Hb+208|0;Ha=Hb+204|0;hb=Hb+200|0;Ga=Hb+196|0;gb=Hb+192|0;vb=Hb+188|0;xb=Hb+184|0;ub=Hb+180|0;wb=Hb+176|0;kb=Hb+172|0;mb=Hb+168|0;jb=Hb+164|0;lb=Hb+160|0;qb=Hb+156|0;sb=Hb+152|0;pb=Hb+148|0;rb=Hb+144|0;la=Hb+140|0;_a=Hb+136|0;ma=Hb+132|0;ua=Hb+128|0;wa=Hb+124|0;qa=Hb+120|0;ta=Hb+116|0;va=Hb+112|0;na=Hb+108|0;X=Hb+104|0;S=Hb+100|0;Y=Hb+96|0;W=Hb+92|0;_=Hb+88|0;U=Hb+84|0;V=Hb+80|0;$=Hb+76|0;Z=Hb+72|0;x=Hb+68|0;v=Hb+64|0;w=Hb+60|0;ia=Hb+56|0;ka=Hb+52|0;aa=Hb+48|0;ha=Hb+44|0;ja=Hb+40|0;y=Hb+36|0;M=Hb+32|0;za=Hb+28|0;L=Hb+24|0;K=Hb+20|0;O=Hb+16|0;I=Hb+12|0;J=Hb+8|0;P=Hb+4|0;N=Hb;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Ib>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Hb+496>>2]=.5877852439880371;g[Hb+492>>2]=.9510565400123596;g[Hb+488>>2]=.25;g[Hb+484>>2]=.55901700258255;c[Gb>>2]=c[Ib>>2];c[q>>2]=(c[q>>2]|0)+(((c[Ib>>2]|0)-1|0)*18<<2);while(1){if((c[Gb>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[G>>2]=+g[c[o>>2]>>2];g[Ba>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Da>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Aa>>2]=+g[(c[q>>2]|0)+32>>2];g[Ca>>2]=+g[(c[q>>2]|0)+36>>2];g[Ea>>2]=+g[Aa>>2]*+g[Ba>>2]+ +g[Ca>>2]*+g[Da>>2];g[F>>2]=+g[Aa>>2]*+g[Da>>2]-+g[Ca>>2]*+g[Ba>>2];g[Fa>>2]=+g[u>>2]-+g[Ea>>2];g[T>>2]=+g[G>>2]-+g[F>>2];g[$a>>2]=+g[u>>2]+ +g[Ea>>2];g[H>>2]=+g[F>>2]+ +g[G>>2];g[Cb>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Eb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Bb>>2]=+g[(c[q>>2]|0)+24>>2];g[Db>>2]=+g[(c[q>>2]|0)+28>>2];g[Fb>>2]=+g[Bb>>2]*+g[Cb>>2]+ +g[Db>>2]*+g[Eb>>2];g[z>>2]=+g[Bb>>2]*+g[Eb>>2]-+g[Db>>2]*+g[Cb>>2];g[Ua>>2]=+g[c[n>>2]>>2];g[Wa>>2]=+g[c[p>>2]>>2];g[Ta>>2]=+g[c[q>>2]>>2];g[Va>>2]=+g[(c[q>>2]|0)+4>>2];g[Xa>>2]=+g[Ta>>2]*+g[Ua>>2]+ +g[Va>>2]*+g[Wa>>2];g[D>>2]=+g[Ta>>2]*+g[Wa>>2]-+g[Va>>2]*+g[Ua>>2];g[Ja>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[La>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ia>>2]=+g[(c[q>>2]|0)+64>>2];g[Ka>>2]=+g[(c[q>>2]|0)+68>>2];g[Ma>>2]=+g[Ia>>2]*+g[Ja>>2]+ +g[Ka>>2]*+g[La>>2];g[A>>2]=+g[Ia>>2]*+g[La>>2]-+g[Ka>>2]*+g[Ja>>2];g[Pa>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ra>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Oa>>2]=+g[(c[q>>2]|0)+40>>2];g[Qa>>2]=+g[(c[q>>2]|0)+44>>2];g[Sa>>2]=+g[Oa>>2]*+g[Pa>>2]+ +g[Qa>>2]*+g[Ra>>2];g[C>>2]=+g[Oa>>2]*+g[Ra>>2]-+g[Qa>>2]*+g[Pa>>2];g[Na>>2]=+g[Fb>>2]-+g[Ma>>2];g[Ya>>2]=+g[Sa>>2]-+g[Xa>>2];g[Za>>2]=+g[Na>>2]+ +g[Ya>>2];g[ra>>2]=+g[z>>2]-+g[A>>2];g[sa>>2]=+g[D>>2]-+g[C>>2];g[Q>>2]=+g[sa>>2]-+g[ra>>2];g[db>>2]=+g[Fb>>2]+ +g[Ma>>2];g[eb>>2]=+g[Sa>>2]+ +g[Xa>>2];g[fb>>2]=+g[db>>2]+ +g[eb>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[E>>2]=+g[C>>2]+ +g[D>>2];g[ya>>2]=+g[B>>2]+ +g[E>>2];g[Ha>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[hb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Ga>>2]=+g[(c[q>>2]|0)+8>>2];g[gb>>2]=+g[(c[q>>2]|0)+12>>2];g[ib>>2]=+g[Ga>>2]*+g[Ha>>2]+ +g[gb>>2]*+g[hb>>2];g[ba>>2]=+g[Ga>>2]*+g[hb>>2]-+g[gb>>2]*+g[Ha>>2];g[vb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[xb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[ub>>2]=+g[(c[q>>2]|0)+16>>2];g[wb>>2]=+g[(c[q>>2]|0)+20>>2];g[yb>>2]=+g[ub>>2]*+g[vb>>2]+ +g[wb>>2]*+g[xb>>2];g[fa>>2]=+g[ub>>2]*+g[xb>>2]-+g[wb>>2]*+g[vb>>2];g[kb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[mb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[jb>>2]=+g[(c[q>>2]|0)+48>>2];g[lb>>2]=+g[(c[q>>2]|0)+52>>2];g[nb>>2]=+g[jb>>2]*+g[kb>>2]+ +g[lb>>2]*+g[mb>>2];g[ca>>2]=+g[jb>>2]*+g[mb>>2]-+g[lb>>2]*+g[kb>>2];g[qb>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[sb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[pb>>2]=+g[(c[q>>2]|0)+56>>2];g[rb>>2]=+g[(c[q>>2]|0)+60>>2];g[tb>>2]=+g[pb>>2]*+g[qb>>2]+ +g[rb>>2]*+g[sb>>2];g[ea>>2]=+g[pb>>2]*+g[sb>>2]-+g[rb>>2]*+g[qb>>2];g[ob>>2]=+g[ib>>2]-+g[nb>>2];g[zb>>2]=+g[tb>>2]-+g[yb>>2];g[Ab>>2]=+g[ob>>2]+ +g[zb>>2];g[oa>>2]=+g[ba>>2]-+g[ca>>2];g[pa>>2]=+g[ea>>2]-+g[fa>>2];g[R>>2]=+g[oa>>2]+ +g[pa>>2];g[ab>>2]=+g[ib>>2]+ +g[nb>>2];g[bb>>2]=+g[tb>>2]+ +g[yb>>2];g[cb>>2]=+g[ab>>2]+ +g[bb>>2];g[da>>2]=+g[ba>>2]+ +g[ca>>2];g[ga>>2]=+g[ea>>2]+ +g[fa>>2];g[xa>>2]=+g[da>>2]+ +g[ga>>2];g[la>>2]=(+g[Ab>>2]-+g[Za>>2])*.55901700258255;g[_a>>2]=+g[Ab>>2]+ +g[Za>>2];g[ma>>2]=+g[Fa>>2]-+g[_a>>2]*.25;g[qa>>2]=+g[oa>>2]-+g[pa>>2];g[ta>>2]=+g[ra>>2]+ +g[sa>>2];g[ua>>2]=+g[qa>>2]*.9510565400123596+ +g[ta>>2]*.5877852439880371;g[wa>>2]=+g[ta>>2]*.9510565400123596-+g[qa>>2]*.5877852439880371;g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Fa>>2]+ +g[_a>>2];g[va>>2]=+g[ma>>2]-+g[la>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[va>>2]-+g[wa>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[va>>2]+ +g[wa>>2];g[na>>2]=+g[la>>2]+ +g[ma>>2];g[c[o>>2]>>2]=+g[na>>2]-+g[ua>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[na>>2]+ +g[ua>>2];g[X>>2]=(+g[R>>2]+ +g[Q>>2])*.55901700258255;g[S>>2]=+g[Q>>2]-+g[R>>2];g[Y>>2]=+g[S>>2]*.25+ +g[T>>2];g[U>>2]=+g[Ya>>2]-+g[Na>>2];g[V>>2]=+g[ob>>2]-+g[zb>>2];g[W>>2]=+g[U>>2]*.5877852439880371-+g[V>>2]*.9510565400123596;g[_>>2]=+g[V>>2]*.5877852439880371+ +g[U>>2]*.9510565400123596;g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[S>>2]-+g[T>>2];g[$>>2]=+g[Y>>2]-+g[X>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[_>>2]-+g[$>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[_>>2]+ +g[$>>2];g[Z>>2]=+g[X>>2]+ +g[Y>>2];g[c[p>>2]>>2]=+g[W>>2]-+g[Z>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[W>>2]+ +g[Z>>2];g[x>>2]=(+g[cb>>2]-+g[fb>>2])*.55901700258255;g[v>>2]=+g[cb>>2]+ +g[fb>>2];g[w>>2]=+g[$a>>2]-+g[v>>2]*.25;g[aa>>2]=+g[B>>2]-+g[E>>2];g[ha>>2]=+g[da>>2]-+g[ga>>2];g[ia>>2]=+g[aa>>2]*.9510565400123596-+g[ha>>2]*.5877852439880371;g[ka>>2]=+g[ha>>2]*.9510565400123596+ +g[aa>>2]*.5877852439880371;g[c[m>>2]>>2]=+g[$a>>2]+ +g[v>>2];g[ja>>2]=+g[x>>2]+ +g[w>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[ja>>2]-+g[ka>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ja>>2]+ +g[ka>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[y>>2]-+g[ia>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[y>>2]+ +g[ia>>2];g[M>>2]=(+g[xa>>2]-+g[ya>>2])*.55901700258255;g[za>>2]=+g[xa>>2]+ +g[ya>>2];g[L>>2]=+g[H>>2]-+g[za>>2]*.25;g[I>>2]=+g[db>>2]-+g[eb>>2];g[J>>2]=+g[ab>>2]-+g[bb>>2];g[K>>2]=+g[I>>2]*.9510565400123596-+g[J>>2]*.5877852439880371;g[O>>2]=+g[J>>2]*.9510565400123596+ +g[I>>2]*.5877852439880371;g[c[n>>2]>>2]=+g[za>>2]+ +g[H>>2];g[P>>2]=+g[M>>2]+ +g[L>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[O>>2]-+g[P>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[O>>2]+ +g[P>>2];g[N>>2]=+g[L>>2]-+g[M>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[K>>2]-+g[N>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[K>>2]+ +g[N>>2];c[Gb>>2]=(c[Gb>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+72;c[r>>2]=c[r>>2]^c[2998]}i=Hb;return}function xq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,30,3496,0);i=b;return}function yq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0;$b=i;i=i+608|0;m=$b+604|0;n=$b+600|0;o=$b+596|0;p=$b+592|0;q=$b+588|0;r=$b+584|0;ac=$b+580|0;s=$b+576|0;t=$b+572|0;_b=$b+560|0;u=$b+556|0;$=$b+552|0;D=$b+548|0;Ga=$b+544|0;Cb=$b+540|0;A=$b+536|0;_=$b+532|0;Ha=$b+528|0;rb=$b+524|0;J=$b+520|0;ra=$b+516|0;I=$b+512|0;x=$b+508|0;oa=$b+504|0;K=$b+500|0;L=$b+496|0;Ib=$b+492|0;X=$b+488|0;ga=$b+484|0;Ja=$b+480|0;Tb=$b+476|0;da=$b+472|0;Y=$b+468|0;Ka=$b+464|0;ab=$b+460|0;Ba=$b+456|0;ma=$b+452|0;G=$b+448|0;lb=$b+444|0;ja=$b+440|0;Ca=$b+436|0;F=$b+432|0;Ya=$b+428|0;B=$b+424|0;Bb=$b+420|0;C=$b+416|0;Va=$b+412|0;Xa=$b+408|0;Da=$b+404|0;Wa=$b+400|0;_a=$b+396|0;Ab=$b+392|0;Za=$b+388|0;$a=$b+384|0;w=$b+380|0;qa=$b+376|0;wb=$b+372|0;pa=$b+368|0;ob=$b+364|0;qb=$b+360|0;nb=$b+356|0;pb=$b+352|0;yb=$b+348|0;v=$b+344|0;xb=$b+340|0;zb=$b+336|0;tb=$b+332|0;vb=$b+328|0;sb=$b+324|0;ub=$b+320|0;Sb=$b+316|0;fa=$b+312|0;Nb=$b+308|0;ea=$b+304|0;Fb=$b+300|0;Hb=$b+296|0;Eb=$b+292|0;Gb=$b+288|0;Pb=$b+284|0;Rb=$b+280|0;Ob=$b+276|0;Qb=$b+272|0;Kb=$b+268|0;Mb=$b+264|0;Jb=$b+260|0;Lb=$b+256|0;kb=$b+252|0;la=$b+248|0;fb=$b+244|0;ka=$b+240|0;Xb=$b+236|0;Zb=$b+232|0;Wb=$b+228|0;Yb=$b+224|0;hb=$b+220|0;jb=$b+216|0;gb=$b+212|0;ib=$b+208|0;cb=$b+204|0;eb=$b+200|0;bb=$b+196|0;db=$b+192|0;Vb=$b+188|0;S=$b+184|0;ba=$b+180|0;Ea=$b+176|0;z=$b+172|0;ca=$b+168|0;V=$b+164|0;W=$b+160|0;Db=$b+156|0;Ub=$b+152|0;Z=$b+148|0;aa=$b+144|0;mb=$b+140|0;y=$b+136|0;T=$b+132|0;U=$b+128|0;wa=$b+124|0;O=$b+120|0;Ma=$b+116|0;Oa=$b+112|0;za=$b+108|0;Fa=$b+104|0;R=$b+100|0;Na=$b+96|0;ua=$b+92|0;va=$b+88|0;Ia=$b+84|0;La=$b+80|0;xa=$b+76|0;ya=$b+72|0;P=$b+68|0;Q=$b+64|0;ia=$b+60|0;Aa=$b+56|0;Sa=$b+52|0;Ua=$b+48|0;ta=$b+44|0;Ta=$b+40|0;N=$b+36|0;Pa=$b+32|0;E=$b+28|0;ha=$b+24|0;Qa=$b+20|0;Ra=$b+16|0;na=$b+12|0;sa=$b+8|0;H=$b+4|0;M=$b;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[ac>>2]=j;c[s>>2]=k;c[t>>2]=l;g[$b+568>>2]=.5;g[$b+564>>2]=.8660253882408142;c[_b>>2]=c[ac>>2];c[q>>2]=(c[q>>2]|0)+(((c[ac>>2]|0)-1|0)*22<<2);while(1){if((c[_b>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[$>>2]=+g[c[o>>2]>>2];g[Va>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Xa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Da>>2]=+g[(c[q>>2]|0)+24>>2];g[Wa>>2]=+g[(c[q>>2]|0)+28>>2];g[Ya>>2]=+g[Da>>2]*+g[Va>>2]+ +g[Wa>>2]*+g[Xa>>2];g[B>>2]=+g[Da>>2]*+g[Xa>>2]-+g[Wa>>2]*+g[Va>>2];g[_a>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ab>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Za>>2]=+g[(c[q>>2]|0)+56>>2];g[$a>>2]=+g[(c[q>>2]|0)+60>>2];g[Bb>>2]=+g[Za>>2]*+g[_a>>2]+ +g[$a>>2]*+g[Ab>>2];g[C>>2]=+g[Za>>2]*+g[Ab>>2]-+g[$a>>2]*+g[_a>>2];g[D>>2]=(+g[B>>2]-+g[C>>2])*.8660253882408142;g[Ga>>2]=(+g[Bb>>2]-+g[Ya>>2])*.8660253882408142;g[Cb>>2]=+g[Ya>>2]+ +g[Bb>>2];g[A>>2]=+g[u>>2]-+g[Cb>>2]*.5;g[_>>2]=+g[B>>2]+ +g[C>>2];g[Ha>>2]=+g[$>>2]-+g[_>>2]*.5;g[ob>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[qb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[nb>>2]=+g[(c[q>>2]|0)+64>>2];g[pb>>2]=+g[(c[q>>2]|0)+68>>2];g[rb>>2]=+g[nb>>2]*+g[ob>>2]+ +g[pb>>2]*+g[qb>>2];g[J>>2]=+g[nb>>2]*+g[qb>>2]-+g[pb>>2]*+g[ob>>2];g[yb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[v>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[xb>>2]=+g[(c[q>>2]|0)+32>>2];g[zb>>2]=+g[(c[q>>2]|0)+36>>2];g[w>>2]=+g[xb>>2]*+g[yb>>2]+ +g[zb>>2]*+g[v>>2];g[qa>>2]=+g[xb>>2]*+g[v>>2]-+g[zb>>2]*+g[yb>>2];g[tb>>2]=+g[c[n>>2]>>2];g[vb>>2]=+g[c[p>>2]>>2];g[sb>>2]=+g[c[q>>2]>>2];g[ub>>2]=+g[(c[q>>2]|0)+4>>2];g[wb>>2]=+g[sb>>2]*+g[tb>>2]+ +g[ub>>2]*+g[vb>>2];g[pa>>2]=+g[sb>>2]*+g[vb>>2]-+g[ub>>2]*+g[tb>>2];g[ra>>2]=(+g[pa>>2]-+g[qa>>2])*.8660253882408142;g[I>>2]=(+g[w>>2]-+g[wb>>2])*.8660253882408142;g[x>>2]=+g[wb>>2]+ +g[w>>2];g[oa>>2]=+g[rb>>2]-+g[x>>2]*.5;g[K>>2]=+g[pa>>2]+ +g[qa>>2];g[L>>2]=+g[J>>2]-+g[K>>2]*.5;g[Fb>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Hb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Eb>>2]=+g[(c[q>>2]|0)+40>>2];g[Gb>>2]=+g[(c[q>>2]|0)+44>>2];g[Ib>>2]=+g[Eb>>2]*+g[Fb>>2]+ +g[Gb>>2]*+g[Hb>>2];g[X>>2]=+g[Eb>>2]*+g[Hb>>2]-+g[Gb>>2]*+g[Fb>>2];g[Pb>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Rb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Ob>>2]=+g[(c[q>>2]|0)+8>>2];g[Qb>>2]=+g[(c[q>>2]|0)+12>>2];g[Sb>>2]=+g[Ob>>2]*+g[Pb>>2]+ +g[Qb>>2]*+g[Rb>>2];g[fa>>2]=+g[Ob>>2]*+g[Rb>>2]-+g[Qb>>2]*+g[Pb>>2];g[Kb>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Mb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Jb>>2]=+g[(c[q>>2]|0)+72>>2];g[Lb>>2]=+g[(c[q>>2]|0)+76>>2];g[Nb>>2]=+g[Jb>>2]*+g[Kb>>2]+ +g[Lb>>2]*+g[Mb>>2];g[ea>>2]=+g[Jb>>2]*+g[Mb>>2]-+g[Lb>>2]*+g[Kb>>2];g[ga>>2]=(+g[ea>>2]-+g[fa>>2])*.8660253882408142;g[Ja>>2]=(+g[Sb>>2]-+g[Nb>>2])*.8660253882408142;g[Tb>>2]=+g[Nb>>2]+ +g[Sb>>2];g[da>>2]=+g[Ib>>2]-+g[Tb>>2]*.5;g[Y>>2]=+g[ea>>2]+ +g[fa>>2];g[Ka>>2]=+g[X>>2]-+g[Y>>2]*.5;g[Xb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Zb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Wb>>2]=+g[(c[q>>2]|0)+16>>2];g[Yb>>2]=+g[(c[q>>2]|0)+20>>2];g[ab>>2]=+g[Wb>>2]*+g[Xb>>2]+ +g[Yb>>2]*+g[Zb>>2];g[Ba>>2]=+g[Wb>>2]*+g[Zb>>2]-+g[Yb>>2]*+g[Xb>>2];g[hb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[jb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[gb>>2]=+g[(c[q>>2]|0)+80>>2];g[ib>>2]=+g[(c[q>>2]|0)+84>>2];g[kb>>2]=+g[gb>>2]*+g[hb>>2]+ +g[ib>>2]*+g[jb>>2];g[la>>2]=+g[gb>>2]*+g[jb>>2]-+g[ib>>2]*+g[hb>>2];g[cb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[eb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[bb>>2]=+g[(c[q>>2]|0)+48>>2];g[db>>2]=+g[(c[q>>2]|0)+52>>2];g[fb>>2]=+g[bb>>2]*+g[cb>>2]+ +g[db>>2]*+g[eb>>2];g[ka>>2]=+g[bb>>2]*+g[eb>>2]-+g[db>>2]*+g[cb>>2];g[ma>>2]=(+g[ka>>2]-+g[la>>2])*.8660253882408142;g[G>>2]=(+g[kb>>2]-+g[fb>>2])*.8660253882408142;g[lb>>2]=+g[fb>>2]+ +g[kb>>2];g[ja>>2]=+g[ab>>2]-+g[lb>>2]*.5;g[Ca>>2]=+g[ka>>2]+ +g[la>>2];g[F>>2]=+g[Ba>>2]-+g[Ca>>2]*.5;g[Db>>2]=+g[u>>2]+ +g[Cb>>2];g[Ub>>2]=+g[Ib>>2]+ +g[Tb>>2];g[Vb>>2]=+g[Db>>2]+ +g[Ub>>2];g[S>>2]=+g[Db>>2]-+g[Ub>>2];g[Z>>2]=+g[X>>2]+ +g[Y>>2];g[aa>>2]=+g[_>>2]+ +g[$>>2];g[ba>>2]=+g[Z>>2]+ +g[aa>>2];g[Ea>>2]=+g[aa>>2]-+g[Z>>2];g[mb>>2]=+g[ab>>2]+ +g[lb>>2];g[y>>2]=+g[rb>>2]+ +g[x>>2];g[z>>2]=+g[mb>>2]+ +g[y>>2];g[ca>>2]=+g[mb>>2]-+g[y>>2];g[T>>2]=+g[Ba>>2]+ +g[Ca>>2];g[U>>2]=+g[J>>2]+ +g[K>>2];g[V>>2]=+g[T>>2]-+g[U>>2];g[W>>2]=+g[T>>2]+ +g[U>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Vb>>2]-+g[z>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[W>>2]-+g[ba>>2];g[c[m>>2]>>2]=+g[Vb>>2]+ +g[z>>2];g[c[n>>2]>>2]=+g[W>>2]+ +g[ba>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[S>>2]-+g[V>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ca>>2]+ +g[Ea>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[S>>2]+ +g[V>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ca>>2]-+g[Ea>>2];g[ua>>2]=+g[A>>2]+ +g[D>>2];g[va>>2]=+g[da>>2]+ +g[ga>>2];g[wa>>2]=+g[ua>>2]+ +g[va>>2];g[O>>2]=+g[ua>>2]-+g[va>>2];g[Ia>>2]=+g[Ga>>2]+ +g[Ha>>2];g[La>>2]=+g[Ja>>2]+ +g[Ka>>2];g[Ma>>2]=+g[Ia>>2]-+g[La>>2];g[Oa>>2]=+g[La>>2]+ +g[Ia>>2];g[xa>>2]=+g[ja>>2]+ +g[ma>>2];g[ya>>2]=+g[oa>>2]+ +g[ra>>2];g[za>>2]=+g[xa>>2]+ +g[ya>>2];g[Fa>>2]=+g[ya>>2]-+g[xa>>2];g[P>>2]=+g[G>>2]+ +g[F>>2];g[Q>>2]=+g[I>>2]+ +g[L>>2];g[R>>2]=+g[P>>2]-+g[Q>>2];g[Na>>2]=+g[P>>2]+ +g[Q>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[wa>>2]-+g[za>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Na>>2]-+g[Oa>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[wa>>2]+ +g[za>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Na>>2]+ +g[Oa>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[O>>2]-+g[R>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Fa>>2]-+g[Ma>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[O>>2]+ +g[R>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Fa>>2]+ +g[Ma>>2];g[E>>2]=+g[A>>2]-+g[D>>2];g[ha>>2]=+g[da>>2]-+g[ga>>2];g[ia>>2]=+g[E>>2]+ +g[ha>>2];g[Aa>>2]=+g[E>>2]-+g[ha>>2];g[Qa>>2]=+g[Ka>>2]-+g[Ja>>2];g[Ra>>2]=+g[Ha>>2]-+g[Ga>>2];g[Sa>>2]=+g[Qa>>2]+ +g[Ra>>2];g[Ua>>2]=+g[Ra>>2]-+g[Qa>>2];g[na>>2]=+g[ja>>2]-+g[ma>>2];g[sa>>2]=+g[oa>>2]-+g[ra>>2];g[ta>>2]=+g[na>>2]+ +g[sa>>2];g[Ta>>2]=+g[sa>>2]-+g[na>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[M>>2]=+g[I>>2]-+g[L>>2];g[N>>2]=+g[H>>2]+ +g[M>>2];g[Pa>>2]=+g[M>>2]-+g[H>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ia>>2]-+g[ta>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Pa>>2]+ +g[Sa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ia>>2]+ +g[ta>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Pa>>2]-+g[Sa>>2];g[c[o>>2]>>2]=+g[Aa>>2]-+g[N>>2];g[c[p>>2]>>2]=+g[Ta>>2]-+g[Ua>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Aa>>2]+ +g[N>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Ta>>2]+ +g[Ua>>2];c[_b>>2]=(c[_b>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+88;c[r>>2]=c[r>>2]^c[2998]}i=$b;return}function zq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,31,3544,0);i=b;return}function Aq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0;jd=i;i=i+880|0;m=jd+864|0;n=jd+860|0;o=jd+856|0;p=jd+852|0;q=jd+848|0;r=jd+844|0;kd=jd+840|0;s=jd+836|0;t=jd+832|0;id=jd+816|0;hc=jd+812|0;Ub=jd+808|0;wa=jd+804|0;gb=jd+800|0;Sc=jd+796|0;Vb=jd+792|0;za=jd+788|0;db=jd+784|0;cd=jd+780|0;Eb=jd+776|0;H=jd+772|0;ob=jd+768|0;pc=jd+764|0;Fb=jd+760|0;M=jd+756|0;pb=jd+752|0;ia=jd+748|0;ta=jd+744|0;Pa=jd+740|0;Qa=jd+736|0;Ra=jd+732|0;Sa=jd+728|0;ca=jd+724|0;vb=jd+720|0;Ia=jd+716|0;wb=jd+712|0;Cc=jd+708|0;y=jd+704|0;Ib=jd+700|0;Jb=jd+696|0;Kb=jd+692|0;Lb=jd+688|0;T=jd+684|0;sb=jd+680|0;Y=jd+676|0;tb=jd+672|0;u=jd+668|0;fb=jd+664|0;gc=jd+660|0;eb=jd+656|0;Mb=jd+652|0;fc=jd+648|0;Da=jd+644|0;ec=jd+640|0;Mc=jd+636|0;xa=jd+632|0;Rc=jd+628|0;ya=jd+624|0;jc=jd+620|0;Lc=jd+616|0;ic=jd+612|0;Kc=jd+608|0;Oc=jd+604|0;Qc=jd+600|0;Nc=jd+596|0;Pc=jd+592|0;Yc=jd+588|0;Ba=jd+584|0;bd=jd+580|0;Ca=jd+576|0;F=jd+572|0;G=jd+568|0;Vc=jd+564|0;Xc=jd+560|0;Uc=jd+556|0;Wc=jd+552|0;_c=jd+548|0;ad=jd+544|0;Zc=jd+540|0;$c=jd+536|0;hd=jd+532|0;J=jd+528|0;oc=jd+524|0;K=jd+520|0;I=jd+516|0;L=jd+512|0;ed=jd+508|0;gd=jd+504|0;dd=jd+500|0;fd=jd+496|0;lc=jd+492|0;nc=jd+488|0;kc=jd+484|0;mc=jd+480|0;E=jd+476|0;Ea=jd+472|0;sa=jd+468|0;aa=jd+464|0;ha=jd+460|0;Fa=jd+456|0;na=jd+452|0;$=jd+448|0;B=jd+444|0;D=jd+440|0;A=jd+436|0;C=jd+432|0;pa=jd+428|0;ra=jd+424|0;oa=jd+420|0;qa=jd+416|0;ea=jd+412|0;ga=jd+408|0;da=jd+404|0;fa=jd+400|0;ka=jd+396|0;ma=jd+392|0;ja=jd+388|0;la=jd+384|0;_=jd+380|0;ba=jd+376|0;Ga=jd+372|0;Ha=jd+368|0;wc=jd+364|0;P=jd+360|0;x=jd+356|0;W=jd+352|0;Bc=jd+348|0;Q=jd+344|0;Hc=jd+340|0;V=jd+336|0;tc=jd+332|0;vc=jd+328|0;sc=jd+324|0;uc=jd+320|0;Jc=jd+316|0;w=jd+312|0;Ic=jd+308|0;v=jd+304|0;yc=jd+300|0;Ac=jd+296|0;xc=jd+292|0;zc=jd+288|0;Ec=jd+284|0;Gc=jd+280|0;Dc=jd+276|0;Fc=jd+272|0;R=jd+268|0;S=jd+264|0;U=jd+260|0;X=jd+256|0;O=jd+252|0;La=jd+248|0;bc=jd+244|0;dc=jd+240|0;Ka=jd+236|0;cc=jd+232|0;mb=jd+228|0;_b=jd+224|0;Aa=jd+220|0;N=jd+216|0;$b=jd+212|0;ac=jd+208|0;Z=jd+204|0;Ja=jd+200|0;Ma=jd+196|0;Na=jd+192|0;Hb=jd+188|0;Va=jd+184|0;Pb=jd+180|0;Rb=jd+176|0;Ua=jd+172|0;Qb=jd+168|0;Ya=jd+164|0;lb=jd+160|0;Db=jd+156|0;Gb=jd+152|0;Nb=jd+148|0;Ob=jd+144|0;Oa=jd+140|0;Ta=jd+136|0;Wa=jd+132|0;Xa=jd+128|0;rb=jd+124|0;zb=jd+120|0;Xb=jd+116|0;Zb=jd+112|0;yb=jd+108|0;Yb=jd+104|0;Cb=jd+100|0;Sb=jd+96|0;nb=jd+92|0;qb=jd+88|0;Tb=jd+84|0;Wb=jd+80|0;ub=jd+76|0;xb=jd+72|0;Ab=jd+68|0;Bb=jd+64|0;rc=jd+60|0;Za=jd+56|0;ib=jd+52|0;kb=jd+48|0;va=jd+44|0;jb=jd+40|0;ab=jd+36|0;bb=jd+32|0;Tc=jd+28|0;qc=jd+24|0;cb=jd+20|0;hb=jd+16|0;z=jd+12|0;ua=jd+8|0;_a=jd+4|0;$a=jd;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[kd>>2]=j;c[s>>2]=k;c[t>>2]=l;g[jd+828>>2]=.3826834261417389;g[jd+824>>2]=.9238795042037964;g[jd+820>>2]=.7071067690849304;c[id>>2]=c[kd>>2];c[q>>2]=(c[q>>2]|0)+(((c[kd>>2]|0)-1|0)*30<<2);while(1){if((c[id>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[fb>>2]=+g[c[o>>2]>>2];g[Mb>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[fc>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Da>>2]=+g[(c[q>>2]|0)+56>>2];g[ec>>2]=+g[(c[q>>2]|0)+60>>2];g[gc>>2]=+g[Da>>2]*+g[Mb>>2]+ +g[ec>>2]*+g[fc>>2];g[eb>>2]=+g[Da>>2]*+g[fc>>2]-+g[ec>>2]*+g[Mb>>2];g[hc>>2]=+g[u>>2]+ +g[gc>>2];g[Ub>>2]=+g[fb>>2]-+g[eb>>2];g[wa>>2]=+g[u>>2]-+g[gc>>2];g[gb>>2]=+g[eb>>2]+ +g[fb>>2];g[jc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Lc>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ic>>2]=+g[(c[q>>2]|0)+24>>2];g[Kc>>2]=+g[(c[q>>2]|0)+28>>2];g[Mc>>2]=+g[ic>>2]*+g[jc>>2]+ +g[Kc>>2]*+g[Lc>>2];g[xa>>2]=+g[ic>>2]*+g[Lc>>2]-+g[Kc>>2]*+g[jc>>2];g[Oc>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Qc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Nc>>2]=+g[(c[q>>2]|0)+88>>2];g[Pc>>2]=+g[(c[q>>2]|0)+92>>2];g[Rc>>2]=+g[Nc>>2]*+g[Oc>>2]+ +g[Pc>>2]*+g[Qc>>2];g[ya>>2]=+g[Nc>>2]*+g[Qc>>2]-+g[Pc>>2]*+g[Oc>>2];g[Sc>>2]=+g[Mc>>2]+ +g[Rc>>2];g[Vb>>2]=+g[Mc>>2]-+g[Rc>>2];g[za>>2]=+g[xa>>2]-+g[ya>>2];g[db>>2]=+g[xa>>2]+ +g[ya>>2];g[Vc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Xc>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Uc>>2]=+g[(c[q>>2]|0)+8>>2];g[Wc>>2]=+g[(c[q>>2]|0)+12>>2];g[Yc>>2]=+g[Uc>>2]*+g[Vc>>2]+ +g[Wc>>2]*+g[Xc>>2];g[Ba>>2]=+g[Uc>>2]*+g[Xc>>2]-+g[Wc>>2]*+g[Vc>>2];g[_c>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[ad>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Zc>>2]=+g[(c[q>>2]|0)+72>>2];g[$c>>2]=+g[(c[q>>2]|0)+76>>2];g[bd>>2]=+g[Zc>>2]*+g[_c>>2]+ +g[$c>>2]*+g[ad>>2];g[Ca>>2]=+g[Zc>>2]*+g[ad>>2]-+g[$c>>2]*+g[_c>>2];g[cd>>2]=+g[Yc>>2]+ +g[bd>>2];g[Eb>>2]=+g[Ba>>2]+ +g[Ca>>2];g[F>>2]=+g[Ba>>2]-+g[Ca>>2];g[G>>2]=+g[Yc>>2]-+g[bd>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[ob>>2]=+g[G>>2]+ +g[F>>2];g[ed>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[gd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[dd>>2]=+g[(c[q>>2]|0)+104>>2];g[fd>>2]=+g[(c[q>>2]|0)+108>>2];g[hd>>2]=+g[dd>>2]*+g[ed>>2]+ +g[fd>>2]*+g[gd>>2];g[J>>2]=+g[dd>>2]*+g[gd>>2]-+g[fd>>2]*+g[ed>>2];g[lc>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[nc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[kc>>2]=+g[(c[q>>2]|0)+40>>2];g[mc>>2]=+g[(c[q>>2]|0)+44>>2];g[oc>>2]=+g[kc>>2]*+g[lc>>2]+ +g[mc>>2]*+g[nc>>2];g[K>>2]=+g[kc>>2]*+g[nc>>2]-+g[mc>>2]*+g[lc>>2];g[pc>>2]=+g[hd>>2]+ +g[oc>>2];g[Fb>>2]=+g[J>>2]+ +g[K>>2];g[I>>2]=+g[hd>>2]-+g[oc>>2];g[L>>2]=+g[J>>2]-+g[K>>2];g[M>>2]=+g[I>>2]+ +g[L>>2];g[pb>>2]=+g[I>>2]-+g[L>>2];g[B>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[D>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[A>>2]=+g[(c[q>>2]|0)+112>>2];g[C>>2]=+g[(c[q>>2]|0)+116>>2];g[E>>2]=+g[A>>2]*+g[B>>2]+ +g[C>>2]*+g[D>>2];g[Ea>>2]=+g[A>>2]*+g[D>>2]-+g[C>>2]*+g[B>>2];g[pa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[ra>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[oa>>2]=+g[(c[q>>2]|0)+80>>2];g[qa>>2]=+g[(c[q>>2]|0)+84>>2];g[sa>>2]=+g[oa>>2]*+g[pa>>2]+ +g[qa>>2]*+g[ra>>2];g[aa>>2]=+g[oa>>2]*+g[ra>>2]-+g[qa>>2]*+g[pa>>2];g[ea>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ga>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[da>>2]=+g[(c[q>>2]|0)+48>>2];g[fa>>2]=+g[(c[q>>2]|0)+52>>2];g[ha>>2]=+g[da>>2]*+g[ea>>2]+ +g[fa>>2]*+g[ga>>2];g[Fa>>2]=+g[da>>2]*+g[ga>>2]-+g[fa>>2]*+g[ea>>2];g[ka>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[ma>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[ja>>2]=+g[(c[q>>2]|0)+16>>2];g[la>>2]=+g[(c[q>>2]|0)+20>>2];g[na>>2]=+g[ja>>2]*+g[ka>>2]+ +g[la>>2]*+g[ma>>2];g[$>>2]=+g[ja>>2]*+g[ma>>2]-+g[la>>2]*+g[ka>>2];g[ia>>2]=+g[E>>2]+ +g[ha>>2];g[ta>>2]=+g[na>>2]+ +g[sa>>2];g[Pa>>2]=+g[ia>>2]-+g[ta>>2];g[Qa>>2]=+g[Ea>>2]+ +g[Fa>>2];g[Ra>>2]=+g[$>>2]+ +g[aa>>2];g[Sa>>2]=+g[Qa>>2]-+g[Ra>>2];g[_>>2]=+g[E>>2]-+g[ha>>2];g[ba>>2]=+g[$>>2]-+g[aa>>2];g[ca>>2]=+g[_>>2]-+g[ba>>2];g[vb>>2]=+g[_>>2]+ +g[ba>>2];g[Ga>>2]=+g[Ea>>2]-+g[Fa>>2];g[Ha>>2]=+g[na>>2]-+g[sa>>2];g[Ia>>2]=+g[Ga>>2]+ +g[Ha>>2];g[wb>>2]=+g[Ga>>2]-+g[Ha>>2];g[tc>>2]=+g[c[n>>2]>>2];g[vc>>2]=+g[c[p>>2]>>2];g[sc>>2]=+g[c[q>>2]>>2];g[uc>>2]=+g[(c[q>>2]|0)+4>>2];g[wc>>2]=+g[sc>>2]*+g[tc>>2]+ +g[uc>>2]*+g[vc>>2];g[P>>2]=+g[sc>>2]*+g[vc>>2]-+g[uc>>2]*+g[tc>>2];g[Jc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[w>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Ic>>2]=+g[(c[q>>2]|0)+96>>2];g[v>>2]=+g[(c[q>>2]|0)+100>>2];g[x>>2]=+g[Ic>>2]*+g[Jc>>2]+ +g[v>>2]*+g[w>>2];g[W>>2]=+g[Ic>>2]*+g[w>>2]-+g[v>>2]*+g[Jc>>2];g[yc>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ac>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[xc>>2]=+g[(c[q>>2]|0)+64>>2];g[zc>>2]=+g[(c[q>>2]|0)+68>>2];g[Bc>>2]=+g[xc>>2]*+g[yc>>2]+ +g[zc>>2]*+g[Ac>>2];g[Q>>2]=+g[xc>>2]*+g[Ac>>2]-+g[zc>>2]*+g[yc>>2];g[Ec>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Gc>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Dc>>2]=+g[(c[q>>2]|0)+32>>2];g[Fc>>2]=+g[(c[q>>2]|0)+36>>2];g[Hc>>2]=+g[Dc>>2]*+g[Ec>>2]+ +g[Fc>>2]*+g[Gc>>2];g[V>>2]=+g[Dc>>2]*+g[Gc>>2]-+g[Fc>>2]*+g[Ec>>2];g[Cc>>2]=+g[wc>>2]+ +g[Bc>>2];g[y>>2]=+g[Hc>>2]+ +g[x>>2];g[Ib>>2]=+g[Cc>>2]-+g[y>>2];g[Jb>>2]=+g[P>>2]+ +g[Q>>2];g[Kb>>2]=+g[V>>2]+ +g[W>>2];g[Lb>>2]=+g[Jb>>2]-+g[Kb>>2];g[R>>2]=+g[P>>2]-+g[Q>>2];g[S>>2]=+g[Hc>>2]-+g[x>>2];g[T>>2]=+g[R>>2]+ +g[S>>2];g[sb>>2]=+g[R>>2]-+g[S>>2];g[U>>2]=+g[wc>>2]-+g[Bc>>2];g[X>>2]=+g[V>>2]-+g[W>>2];g[Y>>2]=+g[U>>2]-+g[X>>2];g[tb>>2]=+g[U>>2]+ +g[X>>2];g[Aa>>2]=+g[wa>>2]-+g[za>>2];g[N>>2]=(+g[H>>2]-+g[M>>2])*.7071067690849304;g[O>>2]=+g[Aa>>2]+ +g[N>>2];g[La>>2]=+g[Aa>>2]-+g[N>>2];g[$b>>2]=(+g[pb>>2]-+g[ob>>2])*.7071067690849304;g[ac>>2]=+g[Vb>>2]+ +g[Ub>>2];g[bc>>2]=+g[$b>>2]+ +g[ac>>2];g[dc>>2]=+g[ac>>2]-+g[$b>>2];g[Z>>2]=+g[T>>2]*.9238795042037964+ +g[Y>>2]*.3826834261417389;g[Ja>>2]=+g[ca>>2]*.3826834261417389-+g[Ia>>2]*.9238795042037964;g[Ka>>2]=+g[Z>>2]+ +g[Ja>>2];g[cc>>2]=+g[Ja>>2]-+g[Z>>2];g[Ma>>2]=+g[T>>2]*.3826834261417389-+g[Y>>2]*.9238795042037964;g[Na>>2]=+g[Ia>>2]*.3826834261417389+ +g[ca>>2]*.9238795042037964;g[mb>>2]=+g[Ma>>2]-+g[Na>>2];g[_b>>2]=+g[Ma>>2]+ +g[Na>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[O>>2]-+g[Ka>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[_b>>2]-+g[bc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[O>>2]+ +g[Ka>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[_b>>2]+ +g[bc>>2];g[c[o>>2]>>2]=+g[La>>2]-+g[mb>>2];g[c[p>>2]>>2]=+g[cc>>2]-+g[dc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[La>>2]+ +g[mb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[cc>>2]+ +g[dc>>2];g[Db>>2]=+g[hc>>2]-+g[Sc>>2];g[Gb>>2]=+g[Eb>>2]-+g[Fb>>2];g[Hb>>2]=+g[Db>>2]+ +g[Gb>>2];g[Va>>2]=+g[Db>>2]-+g[Gb>>2];g[Nb>>2]=+g[pc>>2]-+g[cd>>2];g[Ob>>2]=+g[gb>>2]-+g[db>>2];g[Pb>>2]=+g[Nb>>2]+ +g[Ob>>2];g[Rb>>2]=+g[Ob>>2]-+g[Nb>>2];g[Oa>>2]=+g[Ib>>2]+ +g[Lb>>2];g[Ta>>2]=+g[Pa>>2]-+g[Sa>>2];g[Ua>>2]=(+g[Oa>>2]+ +g[Ta>>2])*.7071067690849304;g[Qb>>2]=(+g[Ta>>2]-+g[Oa>>2])*.7071067690849304;g[Wa>>2]=+g[Lb>>2]-+g[Ib>>2];g[Xa>>2]=+g[Pa>>2]+ +g[Sa>>2];g[Ya>>2]=(+g[Wa>>2]-+g[Xa>>2])*.7071067690849304;g[lb>>2]=(+g[Wa>>2]+ +g[Xa>>2])*.7071067690849304;g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Hb>>2]-+g[Ua>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[lb>>2]-+g[Pb>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Hb>>2]+ +g[Ua>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[lb>>2]+ +g[Pb>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Va>>2]-+g[Ya>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Qb>>2]-+g[Rb>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Va>>2]+ +g[Ya>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Qb>>2]+ +g[Rb>>2];g[nb>>2]=+g[wa>>2]+ +g[za>>2];g[qb>>2]=(+g[ob>>2]+ +g[pb>>2])*.7071067690849304;g[rb>>2]=+g[nb>>2]+ +g[qb>>2];g[zb>>2]=+g[nb>>2]-+g[qb>>2];g[Tb>>2]=(+g[H>>2]+ +g[M>>2])*.7071067690849304;g[Wb>>2]=+g[Ub>>2]-+g[Vb>>2];g[Xb>>2]=+g[Tb>>2]+ +g[Wb>>2];g[Zb>>2]=+g[Wb>>2]-+g[Tb>>2];g[ub>>2]=+g[sb>>2]*.3826834261417389+ +g[tb>>2]*.9238795042037964;g[xb>>2]=+g[vb>>2]*.9238795042037964-+g[wb>>2]*.3826834261417389;g[yb>>2]=+g[ub>>2]+ +g[xb>>2];g[Yb>>2]=+g[xb>>2]-+g[ub>>2];g[Ab>>2]=+g[sb>>2]*.9238795042037964-+g[tb>>2]*.3826834261417389;g[Bb>>2]=+g[wb>>2]*.9238795042037964+ +g[vb>>2]*.3826834261417389;g[Cb>>2]=+g[Ab>>2]-+g[Bb>>2];g[Sb>>2]=+g[Ab>>2]+ +g[Bb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[rb>>2]-+g[yb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Sb>>2]-+g[Xb>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[rb>>2]+ +g[yb>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Sb>>2]+ +g[Xb>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[zb>>2]-+g[Cb>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Yb>>2]-+g[Zb>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[zb>>2]+ +g[Cb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Yb>>2]+ +g[Zb>>2];g[Tc>>2]=+g[hc>>2]+ +g[Sc>>2];g[qc>>2]=+g[cd>>2]+ +g[pc>>2];g[rc>>2]=+g[Tc>>2]+ +g[qc>>2];g[Za>>2]=+g[Tc>>2]-+g[qc>>2];g[cb>>2]=+g[Eb>>2]+ +g[Fb>>2];g[hb>>2]=+g[db>>2]+ +g[gb>>2];g[ib>>2]=+g[cb>>2]+ +g[hb>>2];g[kb>>2]=+g[hb>>2]-+g[cb>>2];g[z>>2]=+g[Cc>>2]+ +g[y>>2];g[ua>>2]=+g[ia>>2]+ +g[ta>>2];g[va>>2]=+g[z>>2]+ +g[ua>>2];g[jb>>2]=+g[ua>>2]-+g[z>>2];g[_a>>2]=+g[Jb>>2]+ +g[Kb>>2];g[$a>>2]=+g[Qa>>2]+ +g[Ra>>2];g[ab>>2]=+g[_a>>2]-+g[$a>>2];g[bb>>2]=+g[_a>>2]+ +g[$a>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[rc>>2]-+g[va>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[bb>>2]-+g[ib>>2];g[c[m>>2]>>2]=+g[rc>>2]+ +g[va>>2];g[c[n>>2]>>2]=+g[bb>>2]+ +g[ib>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Za>>2]-+g[ab>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[jb>>2]-+g[kb>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Za>>2]+ +g[ab>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[jb>>2]+ +g[kb>>2];c[id>>2]=(c[id>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+120;c[r>>2]=c[r>>2]^c[2998]}i=jd;return}function Bq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,32,3592,0);i=b;return}function Cq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0;Je=i;i=i+1200|0;m=Je+1188|0;n=Je+1184|0;o=Je+1180|0;p=Je+1176|0;q=Je+1172|0;r=Je+1168|0;Ke=Je+1164|0;s=Je+1160|0;t=Je+1156|0;Ie=Je+1136|0;re=Je+1132|0;W=Je+1128|0;od=Je+1124|0;xd=Je+1120|0;Cb=Je+1116|0;Ub=Je+1112|0;lc=Je+1108|0;Yc=Je+1104|0;ua=Je+1100|0;T=Je+1096|0;U=Je+1092|0;Kc=Je+1088|0;Nc=Je+1084|0;fc=Je+1080|0;Xb=Je+1076|0;Yb=Je+1072|0;uc=Je+1068|0;_=Je+1064|0;$=Je+1060|0;aa=Je+1056|0;Ha=Je+1052|0;Ma=Je+1048|0;vd=Je+1044|0;ib=Je+1040|0;jb=Je+1036|0;kd=Je+1032|0;Qb=Je+1028|0;Rb=Je+1024|0;Sb=Je+1020|0;Ua=Je+1016|0;Za=Je+1012|0;_a=Je+1008|0;Qd=Je+1004|0;y=Je+1e3|0;z=Je+996|0;Dc=Je+992|0;Gc=Je+988|0;ec=Je+984|0;_b=Je+980|0;$b=Je+976|0;Wc=Je+972|0;X=Je+968|0;Y=Je+964|0;Z=Je+960|0;qb=Je+956|0;vb=Je+952|0;ud=Je+948|0;fb=Je+944|0;gb=Je+940|0;jd=Je+936|0;Nb=Je+932|0;Ob=Je+928|0;Pb=Je+924|0;Hb=Je+920|0;Oa=Je+916|0;Pa=Je+912|0;u=Je+908|0;jc=Je+904|0;Gd=Je+900|0;ic=Je+896|0;ke=Je+892|0;zb=Je+888|0;pe=Je+884|0;Ab=Je+880|0;Mb=Je+876|0;Fd=Je+872|0;Da=Je+868|0;Vc=Je+864|0;Jd=Je+860|0;je=Je+856|0;Id=Je+852|0;ie=Je+848|0;me=Je+844|0;oe=Je+840|0;le=Je+836|0;ne=Je+832|0;Hd=Je+828|0;qe=Je+824|0;md=Je+820|0;nd=Je+816|0;yb=Je+812|0;Bb=Je+808|0;hc=Je+804|0;kc=Je+800|0;ia=Je+796|0;Ic=Je+792|0;Fa=Je+788|0;Qa=Je+784|0;S=Je+780|0;Mc=Je+776|0;La=Je+772|0;Ya=Je+768|0;ta=Je+764|0;Jc=Je+760|0;Ga=Je+756|0;Ta=Je+752|0;H=Je+748|0;Lc=Je+744|0;Ka=Je+740|0;Va=Je+736|0;E=Je+732|0;ca=Je+728|0;ha=Je+724|0;Ea=Je+720|0;B=Je+716|0;D=Je+712|0;A=Je+708|0;C=Je+704|0;ea=Je+700|0;ga=Je+696|0;da=Je+692|0;fa=Je+688|0;M=Je+684|0;Wa=Je+680|0;R=Je+676|0;Xa=Je+672|0;J=Je+668|0;L=Je+664|0;I=Je+660|0;K=Je+656|0;O=Je+652|0;Q=Je+648|0;N=Je+644|0;P=Je+640|0;na=Je+636|0;Ra=Je+632|0;sa=Je+628|0;Sa=Je+624|0;ka=Je+620|0;ma=Je+616|0;ja=Je+612|0;la=Je+608|0;pa=Je+604|0;ra=Je+600|0;oa=Je+596|0;qa=Je+592|0;za=Je+588|0;Ia=Je+584|0;G=Je+580|0;Ja=Je+576|0;wa=Je+572|0;ya=Je+568|0;va=Je+564|0;xa=Je+560|0;Ba=Je+556|0;F=Je+552|0;Aa=Je+548|0;Ca=Je+544|0;Ce=Je+540|0;Bc=Je+536|0;ob=Je+532|0;Db=Je+528|0;x=Je+524|0;Fc=Je+520|0;rb=Je+516|0;Lb=Je+512|0;Pd=Je+508|0;Cc=Je+504|0;pb=Je+500|0;Gb=Je+496|0;$d=Je+492|0;Ec=Je+488|0;ub=Je+484|0;Ib=Je+480|0;we=Je+476|0;mb=Je+472|0;Be=Je+468|0;nb=Je+464|0;te=Je+460|0;ve=Je+456|0;se=Je+452|0;ue=Je+448|0;ye=Je+444|0;Ae=Je+440|0;xe=Je+436|0;ze=Je+432|0;ee=Je+428|0;Jb=Je+424|0;w=Je+420|0;Kb=Je+416|0;be=Je+412|0;de=Je+408|0;ae=Je+404|0;ce=Je+400|0;ge=Je+396|0;v=Je+392|0;fe=Je+388|0;he=Je+384|0;He=Je+380|0;Eb=Je+376|0;Od=Je+372|0;Fb=Je+368|0;Ee=Je+364|0;Ge=Je+360|0;De=Je+356|0;Fe=Je+352|0;Ld=Je+348|0;Nd=Je+344|0;Kd=Je+340|0;Md=Je+336|0;Vd=Je+332|0;sb=Je+328|0;_d=Je+324|0;tb=Je+320|0;Sd=Je+316|0;Ud=Je+312|0;Rd=Je+308|0;Td=Je+304|0;Xd=Je+300|0;Zd=Je+296|0;Wd=Je+292|0;Yd=Je+288|0;Tc=Je+284|0;V=Je+280|0;Sc=Je+276|0;bc=Je+272|0;dc=Je+268|0;Zb=Je+264|0;ac=Je+260|0;cc=Je+256|0;Uc=Je+252|0;bd=Je+248|0;Xc=Je+244|0;ad=Je+240|0;$c=Je+236|0;dd=Je+232|0;Zc=Je+228|0;_c=Je+224|0;fd=Je+220|0;cd=Je+216|0;yc=Je+212|0;ba=Je+208|0;zc=Je+204|0;Pc=Je+200|0;Rc=Je+196|0;Hc=Je+192|0;Oc=Je+188|0;Qc=Je+184|0;Ac=Je+180|0;pc=Je+176|0;gc=Je+172|0;qc=Je+168|0;oc=Je+164|0;sc=Je+160|0;mc=Je+156|0;nc=Je+152|0;tc=Je+148|0;rc=Je+144|0;bb=Je+140|0;$a=Je+136|0;ab=Je+132|0;xb=Je+128|0;db=Je+124|0;Na=Je+120|0;wb=Je+116|0;eb=Je+112|0;cb=Je+108|0;Bd=Je+104|0;wd=Je+100|0;Cd=Je+96|0;Ad=Je+92|0;ed=Je+88|0;yd=Je+84|0;zd=Je+80|0;Ed=Je+76|0;Dd=Je+72|0;Tb=Je+68|0;Vb=Je+64|0;Wb=Je+60|0;lb=Je+56|0;wc=Je+52|0;hb=Je+48|0;kb=Je+44|0;xc=Je+40|0;vc=Je+36|0;ld=Je+32|0;pd=Je+28|0;qd=Je+24|0;id=Je+20|0;td=Je+16|0;gd=Je+12|0;hd=Je+8|0;sd=Je+4|0;rd=Je;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Ke>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Je+1152>>2]=.5877852439880371;g[Je+1148>>2]=.9510565400123596;g[Je+1144>>2]=.25;g[Je+1140>>2]=.55901700258255;c[Ie>>2]=c[Ke>>2];c[q>>2]=(c[q>>2]|0)+(((c[Ke>>2]|0)-1|0)*38<<2);while(1){if((c[Ie>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[jc>>2]=+g[c[o>>2]>>2];g[Mb>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Fd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Da>>2]=+g[(c[q>>2]|0)+72>>2];g[Vc>>2]=+g[(c[q>>2]|0)+76>>2];g[Gd>>2]=+g[Da>>2]*+g[Mb>>2]+ +g[Vc>>2]*+g[Fd>>2];g[ic>>2]=+g[Da>>2]*+g[Fd>>2]-+g[Vc>>2]*+g[Mb>>2];g[Jd>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[je>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Id>>2]=+g[(c[q>>2]|0)+32>>2];g[ie>>2]=+g[(c[q>>2]|0)+36>>2];g[ke>>2]=+g[Id>>2]*+g[Jd>>2]+ +g[ie>>2]*+g[je>>2];g[zb>>2]=+g[Id>>2]*+g[je>>2]-+g[ie>>2]*+g[Jd>>2];g[me>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[oe>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[le>>2]=+g[(c[q>>2]|0)+112>>2];g[ne>>2]=+g[(c[q>>2]|0)+116>>2];g[pe>>2]=+g[le>>2]*+g[me>>2]+ +g[ne>>2]*+g[oe>>2];g[Ab>>2]=+g[le>>2]*+g[oe>>2]-+g[ne>>2]*+g[me>>2];g[Hd>>2]=+g[u>>2]+ +g[Gd>>2];g[qe>>2]=+g[ke>>2]+ +g[pe>>2];g[re>>2]=+g[Hd>>2]-+g[qe>>2];g[W>>2]=+g[Hd>>2]+ +g[qe>>2];g[md>>2]=+g[jc>>2]-+g[ic>>2];g[nd>>2]=+g[ke>>2]-+g[pe>>2];g[od>>2]=+g[md>>2]-+g[nd>>2];g[xd>>2]=+g[nd>>2]+ +g[md>>2];g[yb>>2]=+g[u>>2]-+g[Gd>>2];g[Bb>>2]=+g[zb>>2]-+g[Ab>>2];g[Cb>>2]=+g[yb>>2]-+g[Bb>>2];g[Ub>>2]=+g[yb>>2]+ +g[Bb>>2];g[hc>>2]=+g[zb>>2]+ +g[Ab>>2];g[kc>>2]=+g[ic>>2]+ +g[jc>>2];g[lc>>2]=+g[hc>>2]+ +g[kc>>2];g[Yc>>2]=+g[kc>>2]-+g[hc>>2];g[B>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[D>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[A>>2]=+g[(c[q>>2]|0)+56>>2];g[C>>2]=+g[(c[q>>2]|0)+60>>2];g[E>>2]=+g[A>>2]*+g[B>>2]+ +g[C>>2]*+g[D>>2];g[ca>>2]=+g[A>>2]*+g[D>>2]-+g[C>>2]*+g[B>>2];g[ea>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[ga>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[da>>2]=+g[(c[q>>2]|0)+136>>2];g[fa>>2]=+g[(c[q>>2]|0)+140>>2];g[ha>>2]=+g[da>>2]*+g[ea>>2]+ +g[fa>>2]*+g[ga>>2];g[Ea>>2]=+g[da>>2]*+g[ga>>2]-+g[fa>>2]*+g[ea>>2];g[ia>>2]=+g[E>>2]+ +g[ha>>2];g[Ic>>2]=+g[ca>>2]+ +g[Ea>>2];g[Fa>>2]=+g[ca>>2]-+g[Ea>>2];g[Qa>>2]=+g[E>>2]-+g[ha>>2];g[J>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[L>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2];g[I>>2]=+g[(c[q>>2]|0)+128>>2];g[K>>2]=+g[(c[q>>2]|0)+132>>2];g[M>>2]=+g[I>>2]*+g[J>>2]+ +g[K>>2]*+g[L>>2];g[Wa>>2]=+g[I>>2]*+g[L>>2]-+g[K>>2]*+g[J>>2];g[O>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Q>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[N>>2]=+g[(c[q>>2]|0)+48>>2];g[P>>2]=+g[(c[q>>2]|0)+52>>2];g[R>>2]=+g[N>>2]*+g[O>>2]+ +g[P>>2]*+g[Q>>2];g[Xa>>2]=+g[N>>2]*+g[Q>>2]-+g[P>>2]*+g[O>>2];g[S>>2]=+g[M>>2]+ +g[R>>2];g[Mc>>2]=+g[Wa>>2]+ +g[Xa>>2];g[La>>2]=+g[M>>2]-+g[R>>2];g[Ya>>2]=+g[Wa>>2]-+g[Xa>>2];g[ka>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[ma>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[ja>>2]=+g[(c[q>>2]|0)+96>>2];g[la>>2]=+g[(c[q>>2]|0)+100>>2];g[na>>2]=+g[ja>>2]*+g[ka>>2]+ +g[la>>2]*+g[ma>>2];g[Ra>>2]=+g[ja>>2]*+g[ma>>2]-+g[la>>2]*+g[ka>>2];g[pa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[ra>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[oa>>2]=+g[(c[q>>2]|0)+16>>2];g[qa>>2]=+g[(c[q>>2]|0)+20>>2];g[sa>>2]=+g[oa>>2]*+g[pa>>2]+ +g[qa>>2]*+g[ra>>2];g[Sa>>2]=+g[oa>>2]*+g[ra>>2]-+g[qa>>2]*+g[pa>>2];g[ta>>2]=+g[na>>2]+ +g[sa>>2];g[Jc>>2]=+g[Ra>>2]+ +g[Sa>>2];g[Ga>>2]=+g[na>>2]-+g[sa>>2];g[Ta>>2]=+g[Ra>>2]-+g[Sa>>2];g[wa>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[ya>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[va>>2]=+g[(c[q>>2]|0)+88>>2];g[xa>>2]=+g[(c[q>>2]|0)+92>>2];g[za>>2]=+g[va>>2]*+g[wa>>2]+ +g[xa>>2]*+g[ya>>2];g[Ia>>2]=+g[va>>2]*+g[ya>>2]-+g[xa>>2]*+g[wa>>2];g[Ba>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[F>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Aa>>2]=+g[(c[q>>2]|0)+8>>2];g[Ca>>2]=+g[(c[q>>2]|0)+12>>2];g[G>>2]=+g[Aa>>2]*+g[Ba>>2]+ +g[Ca>>2]*+g[F>>2];g[Ja>>2]=+g[Aa>>2]*+g[F>>2]-+g[Ca>>2]*+g[Ba>>2];g[H>>2]=+g[za>>2]+ +g[G>>2];g[Lc>>2]=+g[Ia>>2]+ +g[Ja>>2];g[Ka>>2]=+g[Ia>>2]-+g[Ja>>2];g[Va>>2]=+g[za>>2]-+g[G>>2];g[ua>>2]=+g[ia>>2]-+g[ta>>2];g[T>>2]=+g[H>>2]-+g[S>>2];g[U>>2]=+g[ua>>2]+ +g[T>>2];g[Kc>>2]=+g[Ic>>2]+ +g[Jc>>2];g[Nc>>2]=+g[Lc>>2]+ +g[Mc>>2];g[fc>>2]=+g[Kc>>2]+ +g[Nc>>2];g[Xb>>2]=+g[Ic>>2]-+g[Jc>>2];g[Yb>>2]=+g[Mc>>2]-+g[Lc>>2];g[uc>>2]=+g[Yb>>2]-+g[Xb>>2];g[_>>2]=+g[ia>>2]+ +g[ta>>2];g[$>>2]=+g[H>>2]+ +g[S>>2];g[aa>>2]=+g[_>>2]+ +g[$>>2];g[Ha>>2]=+g[Fa>>2]+ +g[Ga>>2];g[Ma>>2]=+g[Ka>>2]+ +g[La>>2];g[vd>>2]=+g[Ha>>2]+ +g[Ma>>2];g[ib>>2]=+g[Fa>>2]-+g[Ga>>2];g[jb>>2]=+g[Ka>>2]-+g[La>>2];g[kd>>2]=+g[ib>>2]+ +g[jb>>2];g[Qb>>2]=+g[Qa>>2]+ +g[Ta>>2];g[Rb>>2]=+g[Va>>2]+ +g[Ya>>2];g[Sb>>2]=+g[Qb>>2]+ +g[Rb>>2];g[Ua>>2]=+g[Qa>>2]-+g[Ta>>2];g[Za>>2]=+g[Va>>2]-+g[Ya>>2];g[_a>>2]=+g[Ua>>2]+ +g[Za>>2];g[te>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ve>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[se>>2]=+g[(c[q>>2]|0)+24>>2];g[ue>>2]=+g[(c[q>>2]|0)+28>>2];g[we>>2]=+g[se>>2]*+g[te>>2]+ +g[ue>>2]*+g[ve>>2];g[mb>>2]=+g[se>>2]*+g[ve>>2]-+g[ue>>2]*+g[te>>2];g[ye>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Ae>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[xe>>2]=+g[(c[q>>2]|0)+104>>2];g[ze>>2]=+g[(c[q>>2]|0)+108>>2];g[Be>>2]=+g[xe>>2]*+g[ye>>2]+ +g[ze>>2]*+g[Ae>>2];g[nb>>2]=+g[xe>>2]*+g[Ae>>2]-+g[ze>>2]*+g[ye>>2];g[Ce>>2]=+g[we>>2]+ +g[Be>>2];g[Bc>>2]=+g[mb>>2]+ +g[nb>>2];g[ob>>2]=+g[mb>>2]-+g[nb>>2];g[Db>>2]=+g[we>>2]-+g[Be>>2];g[be>>2]=+g[c[n>>2]>>2];g[de>>2]=+g[c[p>>2]>>2];g[ae>>2]=+g[c[q>>2]>>2];g[ce>>2]=+g[(c[q>>2]|0)+4>>2];g[ee>>2]=+g[ae>>2]*+g[be>>2]+ +g[ce>>2]*+g[de>>2];g[Jb>>2]=+g[ae>>2]*+g[de>>2]-+g[ce>>2]*+g[be>>2];g[ge>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[v>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[fe>>2]=+g[(c[q>>2]|0)+80>>2];g[he>>2]=+g[(c[q>>2]|0)+84>>2];g[w>>2]=+g[fe>>2]*+g[ge>>2]+ +g[he>>2]*+g[v>>2];g[Kb>>2]=+g[fe>>2]*+g[v>>2]-+g[he>>2]*+g[ge>>2];g[x>>2]=+g[ee>>2]+ +g[w>>2];g[Fc>>2]=+g[Jb>>2]+ +g[Kb>>2];g[rb>>2]=+g[w>>2]-+g[ee>>2];g[Lb>>2]=+g[Jb>>2]-+g[Kb>>2];g[Ee>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ge>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[De>>2]=+g[(c[q>>2]|0)+64>>2];g[Fe>>2]=+g[(c[q>>2]|0)+68>>2];g[He>>2]=+g[De>>2]*+g[Ee>>2]+ +g[Fe>>2]*+g[Ge>>2];g[Eb>>2]=+g[De>>2]*+g[Ge>>2]-+g[Fe>>2]*+g[Ee>>2];g[Ld>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Nd>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Kd>>2]=+g[(c[q>>2]|0)+144>>2];g[Md>>2]=+g[(c[q>>2]|0)+148>>2];g[Od>>2]=+g[Kd>>2]*+g[Ld>>2]+ +g[Md>>2]*+g[Nd>>2];g[Fb>>2]=+g[Kd>>2]*+g[Nd>>2]-+g[Md>>2]*+g[Ld>>2];g[Pd>>2]=+g[He>>2]+ +g[Od>>2];g[Cc>>2]=+g[Eb>>2]+ +g[Fb>>2];g[pb>>2]=+g[He>>2]-+g[Od>>2];g[Gb>>2]=+g[Eb>>2]-+g[Fb>>2];g[Sd>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Ud>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Rd>>2]=+g[(c[q>>2]|0)+120>>2];g[Td>>2]=+g[(c[q>>2]|0)+124>>2];g[Vd>>2]=+g[Rd>>2]*+g[Sd>>2]+ +g[Td>>2]*+g[Ud>>2];g[sb>>2]=+g[Rd>>2]*+g[Ud>>2]-+g[Td>>2]*+g[Sd>>2];g[Xd>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Zd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Wd>>2]=+g[(c[q>>2]|0)+40>>2];g[Yd>>2]=+g[(c[q>>2]|0)+44>>2];g[_d>>2]=+g[Wd>>2]*+g[Xd>>2]+ +g[Yd>>2]*+g[Zd>>2];g[tb>>2]=+g[Wd>>2]*+g[Zd>>2]-+g[Yd>>2]*+g[Xd>>2];g[$d>>2]=+g[Vd>>2]+ +g[_d>>2];g[Ec>>2]=+g[sb>>2]+ +g[tb>>2];g[ub>>2]=+g[sb>>2]-+g[tb>>2];g[Ib>>2]=+g[Vd>>2]-+g[_d>>2];g[Qd>>2]=+g[Ce>>2]-+g[Pd>>2];g[y>>2]=+g[$d>>2]-+g[x>>2];g[z>>2]=+g[Qd>>2]+ +g[y>>2];g[Dc>>2]=+g[Bc>>2]+ +g[Cc>>2];g[Gc>>2]=+g[Ec>>2]+ +g[Fc>>2];g[ec>>2]=+g[Dc>>2]+ +g[Gc>>2];g[_b>>2]=+g[Bc>>2]-+g[Cc>>2];g[$b>>2]=+g[Ec>>2]-+g[Fc>>2];g[Wc>>2]=+g[_b>>2]+ +g[$b>>2];g[X>>2]=+g[Ce>>2]+ +g[Pd>>2];g[Y>>2]=+g[$d>>2]+ +g[x>>2];g[Z>>2]=+g[X>>2]+ +g[Y>>2];g[qb>>2]=+g[ob>>2]+ +g[pb>>2];g[vb>>2]=+g[rb>>2]-+g[ub>>2];g[ud>>2]=+g[vb>>2]-+g[qb>>2];g[fb>>2]=+g[ob>>2]-+g[pb>>2];g[gb>>2]=+g[ub>>2]+ +g[rb>>2];g[jd>>2]=+g[fb>>2]+ +g[gb>>2];g[Nb>>2]=+g[Db>>2]+ +g[Gb>>2];g[Ob>>2]=+g[Ib>>2]+ +g[Lb>>2];g[Pb>>2]=+g[Nb>>2]+ +g[Ob>>2];g[Hb>>2]=+g[Db>>2]-+g[Gb>>2];g[Oa>>2]=+g[Ib>>2]-+g[Lb>>2];g[Pa>>2]=+g[Hb>>2]+ +g[Oa>>2];g[Tc>>2]=(+g[z>>2]-+g[U>>2])*.55901700258255;g[V>>2]=+g[z>>2]+ +g[U>>2];g[Sc>>2]=+g[re>>2]-+g[V>>2]*.25;g[Zb>>2]=+g[Xb>>2]+ +g[Yb>>2];g[ac>>2]=+g[_b>>2]-+g[$b>>2];g[bc>>2]=+g[Zb>>2]*.9510565400123596-+g[ac>>2]*.5877852439880371;g[dc>>2]=+g[ac>>2]*.9510565400123596+ +g[Zb>>2]*.5877852439880371;g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[re>>2]+ +g[V>>2];g[cc>>2]=+g[Tc>>2]+ +g[Sc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[cc>>2]-+g[dc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[cc>>2]+ +g[dc>>2];g[Uc>>2]=+g[Sc>>2]-+g[Tc>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Uc>>2]-+g[bc>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Uc>>2]+ +g[bc>>2];g[bd>>2]=(+g[Wc>>2]+ +g[uc>>2])*.55901700258255;g[Xc>>2]=+g[uc>>2]-+g[Wc>>2];g[ad>>2]=+g[Xc>>2]*.25+ +g[Yc>>2];g[Zc>>2]=+g[y>>2]-+g[Qd>>2];g[_c>>2]=+g[ua>>2]-+g[T>>2];g[$c>>2]=+g[Zc>>2]*.5877852439880371+ +g[_c>>2]*.9510565400123596;g[dd>>2]=+g[Zc>>2]*.9510565400123596-+g[_c>>2]*.5877852439880371;g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Xc>>2]-+g[Yc>>2];g[fd>>2]=+g[bd>>2]+ +g[ad>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[dd>>2]-+g[fd>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[dd>>2]+ +g[fd>>2];g[cd>>2]=+g[ad>>2]-+g[bd>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[$c>>2]-+g[cd>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[$c>>2]+ +g[cd>>2];g[yc>>2]=(+g[Z>>2]-+g[aa>>2])*.55901700258255;g[ba>>2]=+g[Z>>2]+ +g[aa>>2];g[zc>>2]=+g[W>>2]-+g[ba>>2]*.25;g[Hc>>2]=+g[Dc>>2]-+g[Gc>>2];g[Oc>>2]=+g[Kc>>2]-+g[Nc>>2];g[Pc>>2]=+g[Hc>>2]*.9510565400123596+ +g[Oc>>2]*.5877852439880371;g[Rc>>2]=+g[Oc>>2]*.9510565400123596-+g[Hc>>2]*.5877852439880371;g[c[m>>2]>>2]=+g[W>>2]+ +g[ba>>2];g[Qc>>2]=+g[zc>>2]-+g[yc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Qc>>2]-+g[Rc>>2];g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Qc>>2]+ +g[Rc>>2];g[Ac>>2]=+g[yc>>2]+ +g[zc>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ac>>2]-+g[Pc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ac>>2]+ +g[Pc>>2];g[pc>>2]=(+g[ec>>2]-+g[fc>>2])*.55901700258255;g[gc>>2]=+g[ec>>2]+ +g[fc>>2];g[qc>>2]=+g[lc>>2]-+g[gc>>2]*.25;g[mc>>2]=+g[X>>2]-+g[Y>>2];g[nc>>2]=+g[_>>2]-+g[$>>2];g[oc>>2]=+g[mc>>2]*.9510565400123596+ +g[nc>>2]*.5877852439880371;g[sc>>2]=+g[mc>>2]*.5877852439880371-+g[nc>>2]*.9510565400123596;g[c[n>>2]>>2]=+g[gc>>2]+ +g[lc>>2];g[tc>>2]=+g[qc>>2]-+g[pc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[sc>>2]-+g[tc>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[sc>>2]+ +g[tc>>2];g[rc>>2]=+g[pc>>2]+ +g[qc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[oc>>2]-+g[rc>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[oc>>2]+ +g[rc>>2];g[bb>>2]=(+g[Pa>>2]-+g[_a>>2])*.55901700258255;g[$a>>2]=+g[Pa>>2]+ +g[_a>>2];g[ab>>2]=+g[Cb>>2]-+g[$a>>2]*.25;g[Na>>2]=+g[Ha>>2]-+g[Ma>>2];g[wb>>2]=+g[qb>>2]+ +g[vb>>2];g[xb>>2]=+g[Na>>2]*.9510565400123596-+g[wb>>2]*.5877852439880371;g[db>>2]=+g[wb>>2]*.9510565400123596+ +g[Na>>2]*.5877852439880371;g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Cb>>2]+ +g[$a>>2];g[eb>>2]=+g[bb>>2]+ +g[ab>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[db>>2]+ +g[eb>>2];g[c[o>>2]>>2]=+g[eb>>2]-+g[db>>2];g[cb>>2]=+g[ab>>2]-+g[bb>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[xb>>2]+ +g[cb>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[cb>>2]-+g[xb>>2];g[Bd>>2]=(+g[ud>>2]+ +g[vd>>2])*.55901700258255;g[wd>>2]=+g[ud>>2]-+g[vd>>2];g[Cd>>2]=+g[wd>>2]*.25+ +g[xd>>2];g[yd>>2]=+g[Hb>>2]-+g[Oa>>2];g[zd>>2]=+g[Ua>>2]-+g[Za>>2];g[Ad>>2]=+g[yd>>2]*.9510565400123596+ +g[zd>>2]*.5877852439880371;g[ed>>2]=+g[zd>>2]*.9510565400123596-+g[yd>>2]*.5877852439880371;g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[wd>>2]-+g[xd>>2];g[Ed>>2]=+g[Bd>>2]+ +g[Cd>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ed>>2]-+g[ed>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ed>>2]+ +g[Ed>>2];g[Dd>>2]=+g[Bd>>2]-+g[Cd>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Ad>>2]+ +g[Dd>>2];g[c[p>>2]>>2]=+g[Dd>>2]-+g[Ad>>2];g[Tb>>2]=(+g[Pb>>2]-+g[Sb>>2])*.55901700258255;g[Vb>>2]=+g[Pb>>2]+ +g[Sb>>2];g[Wb>>2]=+g[Ub>>2]-+g[Vb>>2]*.25;g[hb>>2]=+g[fb>>2]-+g[gb>>2];g[kb>>2]=+g[ib>>2]-+g[jb>>2];g[lb>>2]=+g[hb>>2]*.9510565400123596+ +g[kb>>2]*.5877852439880371;g[wc>>2]=+g[kb>>2]*.9510565400123596-+g[hb>>2]*.5877852439880371;g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Ub>>2]+ +g[Vb>>2];g[xc>>2]=+g[Wb>>2]-+g[Tb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[wc>>2]+ +g[xc>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[xc>>2]-+g[wc>>2];g[vc>>2]=+g[Tb>>2]+ +g[Wb>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[lb>>2]+ +g[vc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[vc>>2]-+g[lb>>2];g[ld>>2]=(+g[jd>>2]-+g[kd>>2])*.55901700258255;g[pd>>2]=+g[jd>>2]+ +g[kd>>2];g[qd>>2]=+g[od>>2]-+g[pd>>2]*.25;g[gd>>2]=+g[Qb>>2]-+g[Rb>>2];g[hd>>2]=+g[Nb>>2]-+g[Ob>>2];g[id>>2]=+g[gd>>2]*.9510565400123596-+g[hd>>2]*.5877852439880371;g[td>>2]=+g[hd>>2]*.9510565400123596+ +g[gd>>2]*.5877852439880371;g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[pd>>2]+ +g[od>>2];g[sd>>2]=+g[ld>>2]+ +g[qd>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[sd>>2]-+g[td>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[td>>2]+ +g[sd>>2];g[rd>>2]=+g[ld>>2]-+g[qd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[id>>2]+ +g[rd>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[rd>>2]-+g[id>>2];c[Ie>>2]=(c[Ie>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+152;c[r>>2]=c[r>>2]^c[2998]}i=Je;return}function Dq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,33,3640,0);i=b;return}function Eq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0;C=i;i=i+80|0;m=C+68|0;n=C+64|0;o=C+60|0;p=C+56|0;q=C+52|0;D=C+44|0;r=C+40|0;s=C+36|0;B=C+32|0;t=C+28|0;A=C+24|0;y=C+20|0;z=C+16|0;v=C+12|0;x=C+8|0;u=C+4|0;w=C;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[C+48>>2]=h;c[D>>2]=j;c[r>>2]=k;c[s>>2]=l;c[B>>2]=c[D>>2];c[q>>2]=(c[q>>2]|0)+((c[D>>2]|0)-1<<1<<2);while(1){if((c[B>>2]|0)>=(c[r>>2]|0))break;g[t>>2]=+g[c[m>>2]>>2];g[A>>2]=+g[c[o>>2]>>2];g[v>>2]=+g[c[n>>2]>>2];g[x>>2]=+g[c[p>>2]>>2];g[u>>2]=+g[c[q>>2]>>2];g[w>>2]=+g[(c[q>>2]|0)+4>>2];g[y>>2]=+g[u>>2]*+g[v>>2]+ +g[w>>2]*+g[x>>2];g[z>>2]=+g[u>>2]*+g[x>>2]-+g[w>>2]*+g[v>>2];g[c[o>>2]>>2]=+g[t>>2]-+g[y>>2];g[c[p>>2]>>2]=+g[z>>2]-+g[A>>2];g[c[m>>2]>>2]=+g[t>>2]+ +g[y>>2];g[c[n>>2]>>2]=+g[z>>2]+ +g[A>>2];c[B>>2]=(c[B>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[s>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[s>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+8}i=C;return}function Fq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,34,3688,0);i=b;return}function Gq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0;Gi=i;i=i+2064|0;m=Gi+2048|0;n=Gi+2044|0;o=Gi+2040|0;p=Gi+2036|0;q=Gi+2032|0;r=Gi+2028|0;Hi=Gi+2024|0;s=Gi+2020|0;t=Gi+2016|0;Fi=Gi+1984|0;oi=Gi+1980|0;se=Gi+1976|0;Ig=Gi+1972|0;Wg=Gi+1968|0;Sb=Gi+1964|0;xd=Gi+1960|0;Eg=Gi+1956|0;qh=Gi+1952|0;V=Gi+1948|0;of=Gi+1944|0;Le=Gi+1940|0;Gf=Gi+1936|0;fc=Gi+1932|0;me=Gi+1928|0;Xc=Gi+1924|0;je=Gi+1920|0;Pa=Gi+1916|0;uf=Gi+1912|0;$f=Gi+1908|0;Lf=Gi+1904|0;Fd=Gi+1900|0;Qe=Gi+1896|0;Wd=Gi+1892|0;Te=Gi+1888|0;Nh=Gi+1884|0;ph=Gi+1880|0;ve=Gi+1876|0;zg=Gi+1872|0;vc=Gi+1868|0;yd=Gi+1864|0;Ac=Gi+1860|0;zd=Gi+1856|0;z=Gi+1852|0;Ae=Gi+1848|0;ze=Gi+1844|0;Bf=Gi+1840|0;Hc=Gi+1836|0;Cd=Gi+1832|0;Mc=Gi+1828|0;Dd=Gi+1824|0;ua=Gi+1820|0;Ce=Gi+1816|0;Fe=Gi+1812|0;Cf=Gi+1808|0;Sc=Gi+1804|0;ee=Gi+1800|0;Zb=Gi+1796|0;fe=Gi+1792|0;pb=Gi+1788|0;Me=Gi+1784|0;rf=Gi+1780|0;Hf=Gi+1776|0;qc=Gi+1772|0;ke=Gi+1768|0;_c=Gi+1764|0;Ne=Gi+1760|0;kb=Gi+1756|0;ag=Gi+1752|0;Xf=Gi+1748|0;Mf=Gi+1744|0;Qd=Gi+1740|0;Ue=Gi+1736|0;Zd=Gi+1732|0;Re=Gi+1728|0;u=Gi+1724|0;Cg=Gi+1720|0;mf=Gi+1716|0;Bg=Gi+1712|0;hi=Gi+1708|0;Pb=Gi+1704|0;mi=Gi+1700|0;Qb=Gi+1696|0;Mb=Gi+1692|0;ce=Gi+1688|0;Da=Gi+1684|0;Vc=Gi+1680|0;Gh=Gi+1676|0;gi=Gi+1672|0;Eh=Gi+1668|0;fi=Gi+1664|0;ji=Gi+1660|0;li=Gi+1656|0;ii=Gi+1652|0;ki=Gi+1648|0;vg=Gi+1644|0;ni=Gi+1640|0;Gg=Gi+1636|0;Hg=Gi+1632|0;Ob=Gi+1628|0;Rb=Gi+1624|0;Ag=Gi+1620|0;Dg=Gi+1616|0;Ba=Gi+1612|0;bc=Gi+1608|0;T=Gi+1604|0;uc=Gi+1600|0;I=Gi+1596|0;cc=Gi+1592|0;O=Gi+1588|0;tc=Gi+1584|0;ya=Gi+1580|0;Aa=Gi+1576|0;xa=Gi+1572|0;za=Gi+1568|0;Q=Gi+1564|0;S=Gi+1560|0;P=Gi+1556|0;R=Gi+1552|0;F=Gi+1548|0;H=Gi+1544|0;Ca=Gi+1540|0;G=Gi+1536|0;L=Gi+1532|0;N=Gi+1528|0;K=Gi+1524|0;M=Gi+1520|0;J=Gi+1516|0;U=Gi+1512|0;Je=Gi+1508|0;Ke=Gi+1504|0;dc=Gi+1500|0;ec=Gi+1496|0;sc=Gi+1492|0;Wc=Gi+1488|0;vb=Gi+1484|0;Sd=Gi+1480|0;Lb=Gi+1476|0;dd=Gi+1472|0;Ab=Gi+1468|0;Td=Gi+1464|0;Gb=Gi+1460|0;cd=Gi+1456|0;sb=Gi+1452|0;ub=Gi+1448|0;rb=Gi+1444|0;tb=Gi+1440|0;Ib=Gi+1436|0;Kb=Gi+1432|0;Hb=Gi+1428|0;Jb=Gi+1424|0;xb=Gi+1420|0;zb=Gi+1416|0;wb=Gi+1412|0;yb=Gi+1408|0;Db=Gi+1404|0;Fb=Gi+1400|0;Cb=Gi+1396|0;Eb=Gi+1392|0;Bb=Gi+1388|0;Oa=Gi+1384|0;Zf=Gi+1380|0;_f=Gi+1376|0;bd=Gi+1372|0;Ed=Gi+1368|0;Ud=Gi+1364|0;Vd=Gi+1360|0;ti=Gi+1356|0;Tb=Gi+1352|0;Lh=Gi+1348|0;yc=Gi+1344|0;yi=Gi+1340|0;Ub=Gi+1336|0;Ei=Gi+1332|0;xc=Gi+1328|0;qi=Gi+1324|0;si=Gi+1320|0;pi=Gi+1316|0;ri=Gi+1312|0;Ih=Gi+1308|0;Kh=Gi+1304|0;Hh=Gi+1300|0;Jh=Gi+1296|0;vi=Gi+1292|0;xi=Gi+1288|0;ui=Gi+1284|0;wi=Gi+1280|0;Bi=Gi+1276|0;Di=Gi+1272|0;Ai=Gi+1268|0;Ci=Gi+1264|0;zi=Gi+1260|0;Mh=Gi+1256|0;te=Gi+1252|0;ue=Gi+1248|0;Vb=Gi+1244|0;Wb=Gi+1240|0;wc=Gi+1236|0;zc=Gi+1232|0;Th=Gi+1228|0;Dc=Gi+1224|0;x=Gi+1220|0;Kc=Gi+1216|0;Yh=Gi+1212|0;Ec=Gi+1208|0;ci=Gi+1204|0;Jc=Gi+1200|0;Qh=Gi+1196|0;Sh=Gi+1192|0;Ph=Gi+1188|0;Rh=Gi+1184|0;ei=Gi+1180|0;w=Gi+1176|0;di=Gi+1172|0;v=Gi+1168|0;Vh=Gi+1164|0;Xh=Gi+1160|0;Uh=Gi+1156|0;Wh=Gi+1152|0;$h=Gi+1148|0;bi=Gi+1144|0;_h=Gi+1140|0;ai=Gi+1136|0;Zh=Gi+1132|0;y=Gi+1128|0;xe=Gi+1124|0;ye=Gi+1120|0;Fc=Gi+1116|0;Gc=Gi+1112|0;Ic=Gi+1108|0;Lc=Gi+1104|0;E=Gi+1100|0;Oc=Gi+1096|0;sa=Gi+1092|0;Xb=Gi+1088|0;ha=Gi+1084|0;Pc=Gi+1080|0;na=Gi+1076|0;Uc=Gi+1072|0;B=Gi+1068|0;D=Gi+1064|0;A=Gi+1060|0;C=Gi+1056|0;pa=Gi+1052|0;ra=Gi+1048|0;oa=Gi+1044|0;qa=Gi+1040|0;ea=Gi+1036|0;ga=Gi+1032|0;da=Gi+1028|0;fa=Gi+1024|0;ka=Gi+1020|0;ma=Gi+1016|0;ja=Gi+1012|0;la=Gi+1008|0;ia=Gi+1004|0;ta=Gi+1e3|0;De=Gi+996|0;Ee=Gi+992|0;Qc=Gi+988|0;Rc=Gi+984|0;Tc=Gi+980|0;Yb=Gi+976|0;_=Gi+972|0;mc=Gi+968|0;Ea=Gi+964|0;nc=Gi+960|0;lc=Gi+956|0;oc=Gi+952|0;Ka=Gi+948|0;hc=Gi+944|0;nb=Gi+940|0;ic=Gi+936|0;gc=Gi+932|0;jc=Gi+928|0;X=Gi+924|0;Z=Gi+920|0;W=Gi+916|0;Y=Gi+912|0;aa=Gi+908|0;ca=Gi+904|0;$=Gi+900|0;ba=Gi+896|0;Ha=Gi+892|0;Ja=Gi+888|0;Ga=Gi+884|0;Ia=Gi+880|0;Ma=Gi+876|0;mb=Gi+872|0;La=Gi+868|0;Na=Gi+864|0;Fa=Gi+860|0;ob=Gi+856|0;pf=Gi+852|0;qf=Gi+848|0;kc=Gi+844|0;pc=Gi+840|0;Yc=Gi+836|0;Zc=Gi+832|0;Ua=Gi+828|0;Gd=Gi+824|0;Za=Gi+820|0;Hd=Gi+816|0;Id=Gi+812|0;Jd=Gi+808|0;db=Gi+804|0;Md=Gi+800|0;ib=Gi+796|0;Nd=Gi+792|0;Ld=Gi+788|0;Od=Gi+784|0;Ra=Gi+780|0;Ta=Gi+776|0;Qa=Gi+772|0;Sa=Gi+768|0;Wa=Gi+764|0;Ya=Gi+760|0;Va=Gi+756|0;Xa=Gi+752|0;ab=Gi+748|0;cb=Gi+744|0;$a=Gi+740|0;bb=Gi+736|0;fb=Gi+732|0;hb=Gi+728|0;eb=Gi+724|0;gb=Gi+720|0;_a=Gi+716|0;jb=Gi+712|0;vf=Gi+708|0;wf=Gi+704|0;Kd=Gi+700|0;Pd=Gi+696|0;Xd=Gi+692|0;Yd=Gi+688|0;wa=Gi+684|0;Uf=Gi+680|0;eh=Gi+676|0;gh=Gi+672|0;Nb=Gi+668|0;fh=Gi+664|0;wg=Gi+660|0;xg=Gi+656|0;Oh=Gi+652|0;va=Gi+648|0;yg=Gi+644|0;Fg=Gi+640|0;qb=Gi+636|0;lb=Gi+632|0;Vf=Gi+628|0;Wf=Gi+624|0;Ef=Gi+620|0;Qf=Gi+616|0;kh=Gi+612|0;mh=Gi+608|0;Jf=Gi+604|0;Rf=Gi+600|0;Of=Gi+596|0;Sf=Gi+592|0;Af=Gi+588|0;Df=Gi+584|0;ih=Gi+580|0;jh=Gi+576|0;Ff=Gi+572|0;If=Gi+568|0;Kf=Gi+564|0;Nf=Gi+560|0;Pf=Gi+556|0;hh=Gi+552|0;Tf=Gi+548|0;lh=Gi+544|0;we=Gi+540|0;rh=Gi+536|0;xh=Gi+532|0;ig=Gi+528|0;He=Gi+524|0;oh=Gi+520|0;sg=Gi+516|0;yf=Gi+512|0;lg=Gi+508|0;wh=Gi+504|0;tf=Gi+500|0;fg=Gi+496|0;pg=Gi+492|0;xf=Gi+488|0;cg=Gi+484|0;gg=Gi+480|0;Be=Gi+476|0;Ge=Gi+472|0;nf=Gi+468|0;sf=Gi+464|0;qg=Gi+460|0;rg=Gi+456|0;jg=Gi+452|0;kg=Gi+448|0;ng=Gi+444|0;og=Gi+440|0;Yf=Gi+436|0;bg=Gi+432|0;Ie=Gi+428|0;dg=Gi+424|0;vh=Gi+420|0;yh=Gi+416|0;eg=Gi+412|0;hg=Gi+408|0;zh=Gi+404|0;Ah=Gi+400|0;mg=Gi+396|0;tg=Gi+392|0;nh=Gi+388|0;sh=Gi+384|0;ug=Gi+380|0;zf=Gi+376|0;th=Gi+372|0;uh=Gi+368|0;Cc=Gi+364|0;hd=Gi+360|0;Xg=Gi+356|0;bh=Gi+352|0;$b=Gi+348|0;Ug=Gi+344|0;rd=Gi+340|0;vd=Gi+336|0;ad=Gi+332|0;ed=Gi+328|0;kd=Gi+324|0;ah=Gi+320|0;od=Gi+316|0;ud=Gi+312|0;$d=Gi+308|0;fd=Gi+304|0;Bc=Gi+300|0;Vg=Gi+296|0;Nc=Gi+292|0;_b=Gi+288|0;pd=Gi+284|0;qd=Gi+280|0;rc=Gi+276|0;$c=Gi+272|0;id=Gi+268|0;jd=Gi+264|0;md=Gi+260|0;nd=Gi+256|0;Rd=Gi+252|0;_d=Gi+248|0;ac=Gi+244|0;ae=Gi+240|0;$g=Gi+236|0;ch=Gi+232|0;be=Gi+228|0;gd=Gi+224|0;dh=Gi+220|0;Fh=Gi+216|0;ld=Gi+212|0;sd=Gi+208|0;Tg=Gi+204|0;Yg=Gi+200|0;td=Gi+196|0;wd=Gi+192|0;Zg=Gi+188|0;_g=Gi+184|0;Bd=Gi+180|0;af=Gi+176|0;Jg=Gi+172|0;Pg=Gi+168|0;he=Gi+164|0;Ch=Gi+160|0;lf=Gi+156|0;qe=Gi+152|0;Pe=Gi+148|0;Ze=Gi+144|0;df=Gi+140|0;Og=Gi+136|0;hf=Gi+132|0;pe=Gi+128|0;We=Gi+124|0;_e=Gi+120|0;Ad=Gi+116|0;Dh=Gi+112|0;de=Gi+108|0;ge=Gi+104|0;jf=Gi+100|0;kf=Gi+96|0;le=Gi+92|0;Oe=Gi+88|0;bf=Gi+84|0;cf=Gi+80|0;ff=Gi+76|0;gf=Gi+72|0;Se=Gi+68|0;Ve=Gi+64|0;ie=Gi+60|0;Xe=Gi+56|0;Ng=Gi+52|0;Qg=Gi+48|0;Ye=Gi+44|0;$e=Gi+40|0;Rg=Gi+36|0;Sg=Gi+32|0;ef=Gi+28|0;ne=Gi+24|0;Bh=Gi+20|0;Kg=Gi+16|0;oe=Gi+12|0;re=Gi+8|0;Lg=Gi+4|0;Mg=Gi;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Hi>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Gi+2012>>2]=.19509032368659973;g[Gi+2008>>2]=.9807852506637573;g[Gi+2004>>2]=.5555702447891235;g[Gi+2e3>>2]=.8314695954322815;g[Gi+1996>>2]=.3826834261417389;g[Gi+1992>>2]=.9238795042037964;g[Gi+1988>>2]=.7071067690849304;c[Fi>>2]=c[Hi>>2];c[q>>2]=(c[q>>2]|0)+(((c[Hi>>2]|0)-1|0)*62<<2);while(1){if((c[Fi>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[Cg>>2]=+g[c[o>>2]>>2];g[Mb>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2];g[ce>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Da>>2]=+g[(c[q>>2]|0)+120>>2];g[Vc>>2]=+g[(c[q>>2]|0)+124>>2];g[mf>>2]=+g[Da>>2]*+g[Mb>>2]+ +g[Vc>>2]*+g[ce>>2];g[Bg>>2]=+g[Da>>2]*+g[ce>>2]-+g[Vc>>2]*+g[Mb>>2];g[Gh>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[gi>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Eh>>2]=+g[(c[q>>2]|0)+56>>2];g[fi>>2]=+g[(c[q>>2]|0)+60>>2];g[hi>>2]=+g[Eh>>2]*+g[Gh>>2]+ +g[fi>>2]*+g[gi>>2];g[Pb>>2]=+g[Eh>>2]*+g[gi>>2]-+g[fi>>2]*+g[Gh>>2];g[ji>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[li>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[ii>>2]=+g[(c[q>>2]|0)+184>>2];g[ki>>2]=+g[(c[q>>2]|0)+188>>2];g[mi>>2]=+g[ii>>2]*+g[ji>>2]+ +g[ki>>2]*+g[li>>2];g[Qb>>2]=+g[ii>>2]*+g[li>>2]-+g[ki>>2]*+g[ji>>2];g[vg>>2]=+g[u>>2]+ +g[mf>>2];g[ni>>2]=+g[hi>>2]+ +g[mi>>2];g[oi>>2]=+g[vg>>2]+ +g[ni>>2];g[se>>2]=+g[vg>>2]-+g[ni>>2];g[Gg>>2]=+g[Cg>>2]-+g[Bg>>2];g[Hg>>2]=+g[hi>>2]-+g[mi>>2];g[Ig>>2]=+g[Gg>>2]-+g[Hg>>2];g[Wg>>2]=+g[Hg>>2]+ +g[Gg>>2];g[Ob>>2]=+g[u>>2]-+g[mf>>2];g[Rb>>2]=+g[Pb>>2]-+g[Qb>>2];g[Sb>>2]=+g[Ob>>2]-+g[Rb>>2];g[xd>>2]=+g[Ob>>2]+ +g[Rb>>2];g[Ag>>2]=+g[Pb>>2]+ +g[Qb>>2];g[Dg>>2]=+g[Bg>>2]+ +g[Cg>>2];g[Eg>>2]=+g[Ag>>2]+ +g[Dg>>2];g[qh>>2]=+g[Dg>>2]-+g[Ag>>2];g[ya>>2]=+g[c[n>>2]>>2];g[Aa>>2]=+g[c[p>>2]>>2];g[xa>>2]=+g[c[q>>2]>>2];g[za>>2]=+g[(c[q>>2]|0)+4>>2];g[Ba>>2]=+g[xa>>2]*+g[ya>>2]+ +g[za>>2]*+g[Aa>>2];g[bc>>2]=+g[xa>>2]*+g[Aa>>2]-+g[za>>2]*+g[ya>>2];g[Q>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[S>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[P>>2]=+g[(c[q>>2]|0)+192>>2];g[R>>2]=+g[(c[q>>2]|0)+196>>2];g[T>>2]=+g[P>>2]*+g[Q>>2]+ +g[R>>2]*+g[S>>2];g[uc>>2]=+g[P>>2]*+g[S>>2]-+g[R>>2]*+g[Q>>2];g[F>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[H>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Ca>>2]=+g[(c[q>>2]|0)+128>>2];g[G>>2]=+g[(c[q>>2]|0)+132>>2];g[I>>2]=+g[Ca>>2]*+g[F>>2]+ +g[G>>2]*+g[H>>2];g[cc>>2]=+g[Ca>>2]*+g[H>>2]-+g[G>>2]*+g[F>>2];g[L>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[N>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[K>>2]=+g[(c[q>>2]|0)+64>>2];g[M>>2]=+g[(c[q>>2]|0)+68>>2];g[O>>2]=+g[K>>2]*+g[L>>2]+ +g[M>>2]*+g[N>>2];g[tc>>2]=+g[K>>2]*+g[N>>2]-+g[M>>2]*+g[L>>2];g[J>>2]=+g[Ba>>2]+ +g[I>>2];g[U>>2]=+g[O>>2]+ +g[T>>2];g[V>>2]=+g[J>>2]+ +g[U>>2];g[of>>2]=+g[J>>2]-+g[U>>2];g[Je>>2]=+g[bc>>2]+ +g[cc>>2];g[Ke>>2]=+g[tc>>2]+ +g[uc>>2];g[Le>>2]=+g[Je>>2]-+g[Ke>>2];g[Gf>>2]=+g[Je>>2]+ +g[Ke>>2];g[dc>>2]=+g[bc>>2]-+g[cc>>2];g[ec>>2]=+g[O>>2]-+g[T>>2];g[fc>>2]=+g[dc>>2]+ +g[ec>>2];g[me>>2]=+g[dc>>2]-+g[ec>>2];g[sc>>2]=+g[Ba>>2]-+g[I>>2];g[Wc>>2]=+g[tc>>2]-+g[uc>>2];g[Xc>>2]=+g[sc>>2]-+g[Wc>>2];g[je>>2]=+g[sc>>2]+ +g[Wc>>2];g[sb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[ub>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[rb>>2]=+g[(c[q>>2]|0)+240>>2];g[tb>>2]=+g[(c[q>>2]|0)+244>>2];g[vb>>2]=+g[rb>>2]*+g[sb>>2]+ +g[tb>>2]*+g[ub>>2];g[Sd>>2]=+g[rb>>2]*+g[ub>>2]-+g[tb>>2]*+g[sb>>2];g[Ib>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[Kb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[Hb>>2]=+g[(c[q>>2]|0)+176>>2];g[Jb>>2]=+g[(c[q>>2]|0)+180>>2];g[Lb>>2]=+g[Hb>>2]*+g[Ib>>2]+ +g[Jb>>2]*+g[Kb>>2];g[dd>>2]=+g[Hb>>2]*+g[Kb>>2]-+g[Jb>>2]*+g[Ib>>2];g[xb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[zb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[wb>>2]=+g[(c[q>>2]|0)+112>>2];g[yb>>2]=+g[(c[q>>2]|0)+116>>2];g[Ab>>2]=+g[wb>>2]*+g[xb>>2]+ +g[yb>>2]*+g[zb>>2];g[Td>>2]=+g[wb>>2]*+g[zb>>2]-+g[yb>>2]*+g[xb>>2];g[Db>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Fb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Cb>>2]=+g[(c[q>>2]|0)+48>>2];g[Eb>>2]=+g[(c[q>>2]|0)+52>>2];g[Gb>>2]=+g[Cb>>2]*+g[Db>>2]+ +g[Eb>>2]*+g[Fb>>2];g[cd>>2]=+g[Cb>>2]*+g[Fb>>2]-+g[Eb>>2]*+g[Db>>2];g[Bb>>2]=+g[vb>>2]+ +g[Ab>>2];g[Oa>>2]=+g[Gb>>2]+ +g[Lb>>2];g[Pa>>2]=+g[Bb>>2]+ +g[Oa>>2];g[uf>>2]=+g[Bb>>2]-+g[Oa>>2];g[Zf>>2]=+g[Sd>>2]+ +g[Td>>2];g[_f>>2]=+g[cd>>2]+ +g[dd>>2];g[$f>>2]=+g[Zf>>2]-+g[_f>>2];g[Lf>>2]=+g[Zf>>2]+ +g[_f>>2];g[bd>>2]=+g[vb>>2]-+g[Ab>>2];g[Ed>>2]=+g[cd>>2]-+g[dd>>2];g[Fd>>2]=+g[bd>>2]-+g[Ed>>2];g[Qe>>2]=+g[bd>>2]+ +g[Ed>>2];g[Ud>>2]=+g[Sd>>2]-+g[Td>>2];g[Vd>>2]=+g[Gb>>2]-+g[Lb>>2];g[Wd>>2]=+g[Ud>>2]+ +g[Vd>>2];g[Te>>2]=+g[Ud>>2]-+g[Vd>>2];g[qi>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[si>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[pi>>2]=+g[(c[q>>2]|0)+24>>2];g[ri>>2]=+g[(c[q>>2]|0)+28>>2];g[ti>>2]=+g[pi>>2]*+g[qi>>2]+ +g[ri>>2]*+g[si>>2];g[Tb>>2]=+g[pi>>2]*+g[si>>2]-+g[ri>>2]*+g[qi>>2];g[Ih>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Kh>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Hh>>2]=+g[(c[q>>2]|0)+88>>2];g[Jh>>2]=+g[(c[q>>2]|0)+92>>2];g[Lh>>2]=+g[Hh>>2]*+g[Ih>>2]+ +g[Jh>>2]*+g[Kh>>2];g[yc>>2]=+g[Hh>>2]*+g[Kh>>2]-+g[Jh>>2]*+g[Ih>>2];g[vi>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[xi>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[ui>>2]=+g[(c[q>>2]|0)+152>>2];g[wi>>2]=+g[(c[q>>2]|0)+156>>2];g[yi>>2]=+g[ui>>2]*+g[vi>>2]+ +g[wi>>2]*+g[xi>>2];g[Ub>>2]=+g[ui>>2]*+g[xi>>2]-+g[wi>>2]*+g[vi>>2];g[Bi>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Di>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Ai>>2]=+g[(c[q>>2]|0)+216>>2];g[Ci>>2]=+g[(c[q>>2]|0)+220>>2];g[Ei>>2]=+g[Ai>>2]*+g[Bi>>2]+ +g[Ci>>2]*+g[Di>>2];g[xc>>2]=+g[Ai>>2]*+g[Di>>2]-+g[Ci>>2]*+g[Bi>>2];g[zi>>2]=+g[ti>>2]+ +g[yi>>2];g[Mh>>2]=+g[Ei>>2]+ +g[Lh>>2];g[Nh>>2]=+g[zi>>2]+ +g[Mh>>2];g[ph>>2]=+g[Mh>>2]-+g[zi>>2];g[te>>2]=+g[Tb>>2]+ +g[Ub>>2];g[ue>>2]=+g[xc>>2]+ +g[yc>>2];g[ve>>2]=+g[te>>2]-+g[ue>>2];g[zg>>2]=+g[te>>2]+ +g[ue>>2];g[Vb>>2]=+g[Tb>>2]-+g[Ub>>2];g[Wb>>2]=+g[ti>>2]-+g[yi>>2];g[vc>>2]=+g[Vb>>2]-+g[Wb>>2];g[yd>>2]=+g[Wb>>2]+ +g[Vb>>2];g[wc>>2]=+g[Ei>>2]-+g[Lh>>2];g[zc>>2]=+g[xc>>2]-+g[yc>>2];g[Ac>>2]=+g[wc>>2]+ +g[zc>>2];g[zd>>2]=+g[wc>>2]-+g[zc>>2];g[Qh>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Sh>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Ph>>2]=+g[(c[q>>2]|0)+8>>2];g[Rh>>2]=+g[(c[q>>2]|0)+12>>2];g[Th>>2]=+g[Ph>>2]*+g[Qh>>2]+ +g[Rh>>2]*+g[Sh>>2];g[Dc>>2]=+g[Ph>>2]*+g[Sh>>2]-+g[Rh>>2]*+g[Qh>>2];g[ei>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[w>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[di>>2]=+g[(c[q>>2]|0)+200>>2];g[v>>2]=+g[(c[q>>2]|0)+204>>2];g[x>>2]=+g[di>>2]*+g[ei>>2]+ +g[v>>2]*+g[w>>2];g[Kc>>2]=+g[di>>2]*+g[w>>2]-+g[v>>2]*+g[ei>>2];g[Vh>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Xh>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Uh>>2]=+g[(c[q>>2]|0)+136>>2];g[Wh>>2]=+g[(c[q>>2]|0)+140>>2];g[Yh>>2]=+g[Uh>>2]*+g[Vh>>2]+ +g[Wh>>2]*+g[Xh>>2];g[Ec>>2]=+g[Uh>>2]*+g[Xh>>2]-+g[Wh>>2]*+g[Vh>>2];g[$h>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[bi>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[_h>>2]=+g[(c[q>>2]|0)+72>>2];g[ai>>2]=+g[(c[q>>2]|0)+76>>2];g[ci>>2]=+g[_h>>2]*+g[$h>>2]+ +g[ai>>2]*+g[bi>>2];g[Jc>>2]=+g[_h>>2]*+g[bi>>2]-+g[ai>>2]*+g[$h>>2];g[Zh>>2]=+g[Th>>2]+ +g[Yh>>2];g[y>>2]=+g[ci>>2]+ +g[x>>2];g[z>>2]=+g[Zh>>2]+ +g[y>>2];g[Ae>>2]=+g[Zh>>2]-+g[y>>2];g[xe>>2]=+g[Dc>>2]+ +g[Ec>>2];g[ye>>2]=+g[Jc>>2]+ +g[Kc>>2];g[ze>>2]=+g[xe>>2]-+g[ye>>2];g[Bf>>2]=+g[xe>>2]+ +g[ye>>2];g[Fc>>2]=+g[Dc>>2]-+g[Ec>>2];g[Gc>>2]=+g[ci>>2]-+g[x>>2];g[Hc>>2]=+g[Fc>>2]+ +g[Gc>>2];g[Cd>>2]=+g[Fc>>2]-+g[Gc>>2];g[Ic>>2]=+g[Th>>2]-+g[Yh>>2];g[Lc>>2]=+g[Jc>>2]-+g[Kc>>2];g[Mc>>2]=+g[Ic>>2]-+g[Lc>>2];g[Dd>>2]=+g[Ic>>2]+ +g[Lc>>2];g[B>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[D>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[A>>2]=+g[(c[q>>2]|0)+232>>2];g[C>>2]=+g[(c[q>>2]|0)+236>>2];g[E>>2]=+g[A>>2]*+g[B>>2]+ +g[C>>2]*+g[D>>2];g[Oc>>2]=+g[A>>2]*+g[D>>2]-+g[C>>2]*+g[B>>2];g[pa>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[ra>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[oa>>2]=+g[(c[q>>2]|0)+168>>2];g[qa>>2]=+g[(c[q>>2]|0)+172>>2];g[sa>>2]=+g[oa>>2]*+g[pa>>2]+ +g[qa>>2]*+g[ra>>2];g[Xb>>2]=+g[oa>>2]*+g[ra>>2]-+g[qa>>2]*+g[pa>>2];g[ea>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ga>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[da>>2]=+g[(c[q>>2]|0)+104>>2];g[fa>>2]=+g[(c[q>>2]|0)+108>>2];g[ha>>2]=+g[da>>2]*+g[ea>>2]+ +g[fa>>2]*+g[ga>>2];g[Pc>>2]=+g[da>>2]*+g[ga>>2]-+g[fa>>2]*+g[ea>>2];g[ka>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ma>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ja>>2]=+g[(c[q>>2]|0)+40>>2];g[la>>2]=+g[(c[q>>2]|0)+44>>2];g[na>>2]=+g[ja>>2]*+g[ka>>2]+ +g[la>>2]*+g[ma>>2];g[Uc>>2]=+g[ja>>2]*+g[ma>>2]-+g[la>>2]*+g[ka>>2];g[ia>>2]=+g[E>>2]+ +g[ha>>2];g[ta>>2]=+g[na>>2]+ +g[sa>>2];g[ua>>2]=+g[ia>>2]+ +g[ta>>2];g[Ce>>2]=+g[ia>>2]-+g[ta>>2];g[De>>2]=+g[Oc>>2]+ +g[Pc>>2];g[Ee>>2]=+g[Uc>>2]+ +g[Xb>>2];g[Fe>>2]=+g[De>>2]-+g[Ee>>2];g[Cf>>2]=+g[De>>2]+ +g[Ee>>2];g[Qc>>2]=+g[Oc>>2]-+g[Pc>>2];g[Rc>>2]=+g[na>>2]-+g[sa>>2];g[Sc>>2]=+g[Qc>>2]+ +g[Rc>>2];g[ee>>2]=+g[Qc>>2]-+g[Rc>>2];g[Tc>>2]=+g[E>>2]-+g[ha>>2];g[Yb>>2]=+g[Uc>>2]-+g[Xb>>2];g[Zb>>2]=+g[Tc>>2]-+g[Yb>>2];g[fe>>2]=+g[Tc>>2]+ +g[Yb>>2];g[X>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Z>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[W>>2]=+g[(c[q>>2]|0)+32>>2];g[Y>>2]=+g[(c[q>>2]|0)+36>>2];g[_>>2]=+g[W>>2]*+g[X>>2]+ +g[Y>>2]*+g[Z>>2];g[mc>>2]=+g[W>>2]*+g[Z>>2]-+g[Y>>2]*+g[X>>2];g[aa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[ca>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[$>>2]=+g[(c[q>>2]|0)+160>>2];g[ba>>2]=+g[(c[q>>2]|0)+164>>2];g[Ea>>2]=+g[$>>2]*+g[aa>>2]+ +g[ba>>2]*+g[ca>>2];g[nc>>2]=+g[$>>2]*+g[ca>>2]-+g[ba>>2]*+g[aa>>2];g[lc>>2]=+g[_>>2]-+g[Ea>>2];g[oc>>2]=+g[mc>>2]-+g[nc>>2];g[Ha>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Ja>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Ga>>2]=+g[(c[q>>2]|0)+224>>2];g[Ia>>2]=+g[(c[q>>2]|0)+228>>2];g[Ka>>2]=+g[Ga>>2]*+g[Ha>>2]+ +g[Ia>>2]*+g[Ja>>2];g[hc>>2]=+g[Ga>>2]*+g[Ja>>2]-+g[Ia>>2]*+g[Ha>>2];g[Ma>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[mb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[La>>2]=+g[(c[q>>2]|0)+96>>2];g[Na>>2]=+g[(c[q>>2]|0)+100>>2];g[nb>>2]=+g[La>>2]*+g[Ma>>2]+ +g[Na>>2]*+g[mb>>2];g[ic>>2]=+g[La>>2]*+g[mb>>2]-+g[Na>>2]*+g[Ma>>2];g[gc>>2]=+g[Ka>>2]-+g[nb>>2];g[jc>>2]=+g[hc>>2]-+g[ic>>2];g[Fa>>2]=+g[_>>2]+ +g[Ea>>2];g[ob>>2]=+g[Ka>>2]+ +g[nb>>2];g[pb>>2]=+g[Fa>>2]+ +g[ob>>2];g[Me>>2]=+g[ob>>2]-+g[Fa>>2];g[pf>>2]=+g[mc>>2]+ +g[nc>>2];g[qf>>2]=+g[hc>>2]+ +g[ic>>2];g[rf>>2]=+g[pf>>2]-+g[qf>>2];g[Hf>>2]=+g[pf>>2]+ +g[qf>>2];g[kc>>2]=+g[gc>>2]-+g[jc>>2];g[pc>>2]=+g[lc>>2]+ +g[oc>>2];g[qc>>2]=(+g[kc>>2]-+g[pc>>2])*.7071067690849304;g[ke>>2]=(+g[pc>>2]+ +g[kc>>2])*.7071067690849304;g[Yc>>2]=+g[oc>>2]-+g[lc>>2];g[Zc>>2]=+g[gc>>2]+ +g[jc>>2];g[_c>>2]=(+g[Yc>>2]-+g[Zc>>2])*.7071067690849304;g[Ne>>2]=(+g[Yc>>2]+ +g[Zc>>2])*.7071067690849304;g[Ra>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Ta>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Qa>>2]=+g[(c[q>>2]|0)+16>>2];g[Sa>>2]=+g[(c[q>>2]|0)+20>>2];g[Ua>>2]=+g[Qa>>2]*+g[Ra>>2]+ +g[Sa>>2]*+g[Ta>>2];g[Gd>>2]=+g[Qa>>2]*+g[Ta>>2]-+g[Sa>>2]*+g[Ra>>2];g[Wa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Ya>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Va>>2]=+g[(c[q>>2]|0)+144>>2];g[Xa>>2]=+g[(c[q>>2]|0)+148>>2];g[Za>>2]=+g[Va>>2]*+g[Wa>>2]+ +g[Xa>>2]*+g[Ya>>2];g[Hd>>2]=+g[Va>>2]*+g[Ya>>2]-+g[Xa>>2]*+g[Wa>>2];g[Id>>2]=+g[Gd>>2]-+g[Hd>>2];g[Jd>>2]=+g[Ua>>2]-+g[Za>>2];g[ab>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[cb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[$a>>2]=+g[(c[q>>2]|0)+208>>2];g[bb>>2]=+g[(c[q>>2]|0)+212>>2];g[db>>2]=+g[$a>>2]*+g[ab>>2]+ +g[bb>>2]*+g[cb>>2];g[Md>>2]=+g[$a>>2]*+g[cb>>2]-+g[bb>>2]*+g[ab>>2];g[fb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[hb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[eb>>2]=+g[(c[q>>2]|0)+80>>2];g[gb>>2]=+g[(c[q>>2]|0)+84>>2];g[ib>>2]=+g[eb>>2]*+g[fb>>2]+ +g[gb>>2]*+g[hb>>2];g[Nd>>2]=+g[eb>>2]*+g[hb>>2]-+g[gb>>2]*+g[fb>>2];g[Ld>>2]=+g[db>>2]-+g[ib>>2];g[Od>>2]=+g[Md>>2]-+g[Nd>>2];g[_a>>2]=+g[Ua>>2]+ +g[Za>>2];g[jb>>2]=+g[db>>2]+ +g[ib>>2];g[kb>>2]=+g[_a>>2]+ +g[jb>>2];g[ag>>2]=+g[jb>>2]-+g[_a>>2];g[vf>>2]=+g[Gd>>2]+ +g[Hd>>2];g[wf>>2]=+g[Md>>2]+ +g[Nd>>2];g[Xf>>2]=+g[vf>>2]-+g[wf>>2];g[Mf>>2]=+g[vf>>2]+ +g[wf>>2];g[Kd>>2]=+g[Id>>2]-+g[Jd>>2];g[Pd>>2]=+g[Ld>>2]+ +g[Od>>2];g[Qd>>2]=(+g[Kd>>2]-+g[Pd>>2])*.7071067690849304;g[Ue>>2]=(+g[Kd>>2]+ +g[Pd>>2])*.7071067690849304;g[Xd>>2]=+g[Ld>>2]-+g[Od>>2];g[Yd>>2]=+g[Jd>>2]+ +g[Id>>2];g[Zd>>2]=(+g[Xd>>2]-+g[Yd>>2])*.7071067690849304;g[Re>>2]=(+g[Yd>>2]+ +g[Xd>>2])*.7071067690849304;g[Oh>>2]=+g[oi>>2]+ +g[Nh>>2];g[va>>2]=+g[z>>2]+ +g[ua>>2];g[wa>>2]=+g[Oh>>2]+ +g[va>>2];g[Uf>>2]=+g[Oh>>2]-+g[va>>2];g[yg>>2]=+g[Bf>>2]+ +g[Cf>>2];g[Fg>>2]=+g[zg>>2]+ +g[Eg>>2];g[eh>>2]=+g[yg>>2]+ +g[Fg>>2];g[gh>>2]=+g[Fg>>2]-+g[yg>>2];g[qb>>2]=+g[V>>2]+ +g[pb>>2];g[lb>>2]=+g[Pa>>2]+ +g[kb>>2];g[Nb>>2]=+g[qb>>2]+ +g[lb>>2];g[fh>>2]=+g[lb>>2]-+g[qb>>2];g[Vf>>2]=+g[Gf>>2]+ +g[Hf>>2];g[Wf>>2]=+g[Lf>>2]+ +g[Mf>>2];g[wg>>2]=+g[Vf>>2]-+g[Wf>>2];g[xg>>2]=+g[Vf>>2]+ +g[Wf>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[wa>>2]-+g[Nb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[xg>>2]-+g[eh>>2];g[c[m>>2]>>2]=+g[wa>>2]+ +g[Nb>>2];g[c[n>>2]>>2]=+g[xg>>2]+ +g[eh>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Uf>>2]-+g[wg>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[fh>>2]-+g[gh>>2];g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Uf>>2]+ +g[wg>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[fh>>2]+ +g[gh>>2];g[Af>>2]=+g[oi>>2]-+g[Nh>>2];g[Df>>2]=+g[Bf>>2]-+g[Cf>>2];g[Ef>>2]=+g[Af>>2]+ +g[Df>>2];g[Qf>>2]=+g[Af>>2]-+g[Df>>2];g[ih>>2]=+g[ua>>2]-+g[z>>2];g[jh>>2]=+g[Eg>>2]-+g[zg>>2];g[kh>>2]=+g[ih>>2]+ +g[jh>>2];g[mh>>2]=+g[jh>>2]-+g[ih>>2];g[Ff>>2]=+g[V>>2]-+g[pb>>2];g[If>>2]=+g[Gf>>2]-+g[Hf>>2];g[Jf>>2]=+g[Ff>>2]+ +g[If>>2];g[Rf>>2]=+g[If>>2]-+g[Ff>>2];g[Kf>>2]=+g[Pa>>2]-+g[kb>>2];g[Nf>>2]=+g[Lf>>2]-+g[Mf>>2];g[Of>>2]=+g[Kf>>2]-+g[Nf>>2];g[Sf>>2]=+g[Kf>>2]+ +g[Nf>>2];g[Pf>>2]=(+g[Jf>>2]+ +g[Of>>2])*.7071067690849304;g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Ef>>2]-+g[Pf>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ef>>2]+ +g[Pf>>2];g[hh>>2]=(+g[Rf>>2]+ +g[Sf>>2])*.7071067690849304;g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[hh>>2]-+g[kh>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[hh>>2]+ +g[kh>>2];g[Tf>>2]=(+g[Rf>>2]-+g[Sf>>2])*.7071067690849304;g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Qf>>2]-+g[Tf>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Qf>>2]+ +g[Tf>>2];g[lh>>2]=(+g[Of>>2]-+g[Jf>>2])*.7071067690849304;g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[lh>>2]-+g[mh>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[lh>>2]+ +g[mh>>2];g[we>>2]=+g[se>>2]-+g[ve>>2];g[rh>>2]=+g[ph>>2]+ +g[qh>>2];g[xh>>2]=+g[qh>>2]-+g[ph>>2];g[ig>>2]=+g[se>>2]+ +g[ve>>2];g[Be>>2]=+g[ze>>2]-+g[Ae>>2];g[Ge>>2]=+g[Ce>>2]+ +g[Fe>>2];g[He>>2]=(+g[Be>>2]-+g[Ge>>2])*.7071067690849304;g[oh>>2]=(+g[Be>>2]+ +g[Ge>>2])*.7071067690849304;g[qg>>2]=+g[uf>>2]+ +g[Xf>>2];g[rg>>2]=+g[$f>>2]+ +g[ag>>2];g[sg>>2]=+g[qg>>2]*.9238795042037964-+g[rg>>2]*.3826834261417389;g[yf>>2]=+g[rg>>2]*.9238795042037964+ +g[qg>>2]*.3826834261417389;g[jg>>2]=+g[Ae>>2]+ +g[ze>>2];g[kg>>2]=+g[Ce>>2]-+g[Fe>>2];g[lg>>2]=(+g[jg>>2]+ +g[kg>>2])*.7071067690849304;g[wh>>2]=(+g[kg>>2]-+g[jg>>2])*.7071067690849304;g[nf>>2]=+g[Le>>2]-+g[Me>>2];g[sf>>2]=+g[of>>2]-+g[rf>>2];g[tf>>2]=+g[nf>>2]*.9238795042037964+ +g[sf>>2]*.3826834261417389;g[fg>>2]=+g[nf>>2]*.3826834261417389-+g[sf>>2]*.9238795042037964;g[ng>>2]=+g[Le>>2]+ +g[Me>>2];g[og>>2]=+g[of>>2]+ +g[rf>>2];g[pg>>2]=+g[ng>>2]*.3826834261417389+ +g[og>>2]*.9238795042037964;g[xf>>2]=+g[ng>>2]*.9238795042037964-+g[og>>2]*.3826834261417389;g[Yf>>2]=+g[uf>>2]-+g[Xf>>2];g[bg>>2]=+g[$f>>2]-+g[ag>>2];g[cg>>2]=+g[Yf>>2]*.3826834261417389-+g[bg>>2]*.9238795042037964;g[gg>>2]=+g[bg>>2]*.3826834261417389+ +g[Yf>>2]*.9238795042037964;g[Ie>>2]=+g[we>>2]+ +g[He>>2];g[dg>>2]=+g[tf>>2]+ +g[cg>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Ie>>2]-+g[dg>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Ie>>2]+ +g[dg>>2];g[vh>>2]=+g[fg>>2]+ +g[gg>>2];g[yh>>2]=+g[wh>>2]+ +g[xh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[vh>>2]-+g[yh>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[vh>>2]+ +g[yh>>2];g[eg>>2]=+g[we>>2]-+g[He>>2];g[hg>>2]=+g[fg>>2]-+g[gg>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[eg>>2]-+g[hg>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[eg>>2]+ +g[hg>>2];g[zh>>2]=+g[cg>>2]-+g[tf>>2];g[Ah>>2]=+g[xh>>2]-+g[wh>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[zh>>2]-+g[Ah>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[zh>>2]+ +g[Ah>>2];g[mg>>2]=+g[ig>>2]+ +g[lg>>2];g[tg>>2]=+g[pg>>2]+ +g[sg>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[mg>>2]-+g[tg>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[mg>>2]+ +g[tg>>2];g[nh>>2]=+g[xf>>2]+ +g[yf>>2];g[sh>>2]=+g[oh>>2]+ +g[rh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[nh>>2]-+g[sh>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[nh>>2]+ +g[sh>>2];g[ug>>2]=+g[ig>>2]-+g[lg>>2];g[zf>>2]=+g[xf>>2]-+g[yf>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[ug>>2]-+g[zf>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[ug>>2]+ +g[zf>>2];g[th>>2]=+g[sg>>2]-+g[pg>>2];g[uh>>2]=+g[rh>>2]-+g[oh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[th>>2]-+g[uh>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[th>>2]+ +g[uh>>2];g[Bc>>2]=(+g[vc>>2]-+g[Ac>>2])*.7071067690849304;g[Cc>>2]=+g[Sb>>2]-+g[Bc>>2];g[hd>>2]=+g[Sb>>2]+ +g[Bc>>2];g[Vg>>2]=(+g[zd>>2]-+g[yd>>2])*.7071067690849304;g[Xg>>2]=+g[Vg>>2]+ +g[Wg>>2];g[bh>>2]=+g[Wg>>2]-+g[Vg>>2];g[Nc>>2]=+g[Hc>>2]*.3826834261417389-+g[Mc>>2]*.9238795042037964;g[_b>>2]=+g[Sc>>2]*.3826834261417389+ +g[Zb>>2]*.9238795042037964;g[$b>>2]=+g[Nc>>2]-+g[_b>>2];g[Ug>>2]=+g[Nc>>2]+ +g[_b>>2];g[pd>>2]=+g[Fd>>2]+ +g[Qd>>2];g[qd>>2]=+g[Wd>>2]+ +g[Zd>>2];g[rd>>2]=+g[pd>>2]*.8314695954322815-+g[qd>>2]*.5555702447891235;g[vd>>2]=+g[qd>>2]*.8314695954322815+ +g[pd>>2]*.5555702447891235;g[rc>>2]=+g[fc>>2]-+g[qc>>2];g[$c>>2]=+g[Xc>>2]-+g[_c>>2];g[ad>>2]=+g[rc>>2]*.9807852506637573+ +g[$c>>2]*.19509032368659973;g[ed>>2]=+g[rc>>2]*.19509032368659973-+g[$c>>2]*.9807852506637573;g[id>>2]=+g[Hc>>2]*.9238795042037964+ +g[Mc>>2]*.3826834261417389;g[jd>>2]=+g[Zb>>2]*.3826834261417389-+g[Sc>>2]*.9238795042037964;g[kd>>2]=+g[id>>2]+ +g[jd>>2];g[ah>>2]=+g[jd>>2]-+g[id>>2];g[md>>2]=+g[fc>>2]+ +g[qc>>2];g[nd>>2]=+g[Xc>>2]+ +g[_c>>2];g[od>>2]=+g[md>>2]*.5555702447891235+ +g[nd>>2]*.8314695954322815;g[ud>>2]=+g[md>>2]*.8314695954322815-+g[nd>>2]*.5555702447891235;g[Rd>>2]=+g[Fd>>2]-+g[Qd>>2];g[_d>>2]=+g[Wd>>2]-+g[Zd>>2];g[$d>>2]=+g[Rd>>2]*.19509032368659973-+g[_d>>2]*.9807852506637573;g[fd>>2]=+g[_d>>2]*.19509032368659973+ +g[Rd>>2]*.9807852506637573;g[ac>>2]=+g[Cc>>2]+ +g[$b>>2];g[ae>>2]=+g[ad>>2]+ +g[$d>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[ac>>2]-+g[ae>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ac>>2]+ +g[ae>>2];g[$g>>2]=+g[ed>>2]+ +g[fd>>2];g[ch>>2]=+g[ah>>2]+ +g[bh>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[$g>>2]-+g[ch>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[$g>>2]+ +g[ch>>2];g[be>>2]=+g[Cc>>2]-+g[$b>>2];g[gd>>2]=+g[ed>>2]-+g[fd>>2];g[c[o>>2]>>2]=+g[be>>2]-+g[gd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[be>>2]+ +g[gd>>2];g[dh>>2]=+g[$d>>2]-+g[ad>>2];g[Fh>>2]=+g[bh>>2]-+g[ah>>2];g[c[p>>2]>>2]=+g[dh>>2]-+g[Fh>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[dh>>2]+ +g[Fh>>2];g[ld>>2]=+g[hd>>2]+ +g[kd>>2];g[sd>>2]=+g[od>>2]+ +g[rd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[ld>>2]-+g[sd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ld>>2]+ +g[sd>>2];g[Tg>>2]=+g[ud>>2]+ +g[vd>>2];g[Yg>>2]=+g[Ug>>2]+ +g[Xg>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Tg>>2]-+g[Yg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Tg>>2]+ +g[Yg>>2];g[td>>2]=+g[hd>>2]-+g[kd>>2];g[wd>>2]=+g[ud>>2]-+g[vd>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[td>>2]-+g[wd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[td>>2]+ +g[wd>>2];g[Zg>>2]=+g[rd>>2]-+g[od>>2];g[_g>>2]=+g[Xg>>2]-+g[Ug>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Zg>>2]-+g[_g>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Zg>>2]+ +g[_g>>2];g[Ad>>2]=(+g[yd>>2]+ +g[zd>>2])*.7071067690849304;g[Bd>>2]=+g[xd>>2]-+g[Ad>>2];g[af>>2]=+g[xd>>2]+ +g[Ad>>2];g[Dh>>2]=(+g[vc>>2]+ +g[Ac>>2])*.7071067690849304;g[Jg>>2]=+g[Dh>>2]+ +g[Ig>>2];g[Pg>>2]=+g[Ig>>2]-+g[Dh>>2];g[de>>2]=+g[Cd>>2]*.9238795042037964-+g[Dd>>2]*.3826834261417389;g[ge>>2]=+g[ee>>2]*.9238795042037964+ +g[fe>>2]*.3826834261417389;g[he>>2]=+g[de>>2]-+g[ge>>2];g[Ch>>2]=+g[de>>2]+ +g[ge>>2];g[jf>>2]=+g[Qe>>2]+ +g[Re>>2];g[kf>>2]=+g[Te>>2]+ +g[Ue>>2];g[lf>>2]=+g[jf>>2]*.9807852506637573-+g[kf>>2]*.19509032368659973;g[qe>>2]=+g[jf>>2]*.19509032368659973+ +g[kf>>2]*.9807852506637573;g[le>>2]=+g[je>>2]-+g[ke>>2];g[Oe>>2]=+g[me>>2]-+g[Ne>>2];g[Pe>>2]=+g[le>>2]*.5555702447891235+ +g[Oe>>2]*.8314695954322815;g[Ze>>2]=+g[Oe>>2]*.5555702447891235-+g[le>>2]*.8314695954322815;g[bf>>2]=+g[Cd>>2]*.3826834261417389+ +g[Dd>>2]*.9238795042037964;g[cf>>2]=+g[fe>>2]*.9238795042037964-+g[ee>>2]*.3826834261417389;g[df>>2]=+g[bf>>2]+ +g[cf>>2];g[Og>>2]=+g[cf>>2]-+g[bf>>2];g[ff>>2]=+g[je>>2]+ +g[ke>>2];g[gf>>2]=+g[me>>2]+ +g[Ne>>2];g[hf>>2]=+g[ff>>2]*.9807852506637573+ +g[gf>>2]*.19509032368659973;g[pe>>2]=+g[gf>>2]*.9807852506637573-+g[ff>>2]*.19509032368659973;g[Se>>2]=+g[Qe>>2]-+g[Re>>2];g[Ve>>2]=+g[Te>>2]-+g[Ue>>2];g[We>>2]=+g[Se>>2]*.5555702447891235-+g[Ve>>2]*.8314695954322815;g[_e>>2]=+g[Se>>2]*.8314695954322815+ +g[Ve>>2]*.5555702447891235;g[ie>>2]=+g[Bd>>2]+ +g[he>>2];g[Xe>>2]=+g[Pe>>2]+ +g[We>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[ie>>2]-+g[Xe>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[ie>>2]+ +g[Xe>>2];g[Ng>>2]=+g[Ze>>2]+ +g[_e>>2];g[Qg>>2]=+g[Og>>2]+ +g[Pg>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Ng>>2]-+g[Qg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Ng>>2]+ +g[Qg>>2];g[Ye>>2]=+g[Bd>>2]-+g[he>>2];g[$e>>2]=+g[Ze>>2]-+g[_e>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ye>>2]-+g[$e>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Ye>>2]+ +g[$e>>2];g[Rg>>2]=+g[We>>2]-+g[Pe>>2];g[Sg>>2]=+g[Pg>>2]-+g[Og>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Rg>>2]-+g[Sg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Rg>>2]+ +g[Sg>>2];g[ef>>2]=+g[af>>2]+ +g[df>>2];g[ne>>2]=+g[hf>>2]+ +g[lf>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[ef>>2]-+g[ne>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[ef>>2]+ +g[ne>>2];g[Bh>>2]=+g[pe>>2]+ +g[qe>>2];g[Kg>>2]=+g[Ch>>2]+ +g[Jg>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Bh>>2]-+g[Kg>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Bh>>2]+ +g[Kg>>2];g[oe>>2]=+g[af>>2]-+g[df>>2];g[re>>2]=+g[pe>>2]-+g[qe>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[oe>>2]-+g[re>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[oe>>2]+ +g[re>>2];g[Lg>>2]=+g[lf>>2]-+g[hf>>2];g[Mg>>2]=+g[Jg>>2]-+g[Ch>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Lg>>2]-+g[Mg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Lg>>2]+ +g[Mg>>2];c[Fi>>2]=(c[Fi>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+248;c[r>>2]=c[r>>2]^c[2998]}i=Gi;return}function Hq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,35,3736,0);i=b;return}function Iq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0;X=i;i=i+160|0;m=X+148|0;n=X+144|0;o=X+140|0;p=X+136|0;q=X+132|0;r=X+128|0;Y=X+124|0;s=X+120|0;t=X+116|0;W=X+112|0;u=X+108|0;S=X+104|0;z=X+100|0;R=X+96|0;F=X+92|0;N=X+88|0;K=X+84|0;O=X+80|0;w=X+76|0;y=X+72|0;v=X+68|0;x=X+64|0;C=X+60|0;E=X+56|0;B=X+52|0;D=X+48|0;H=X+44|0;J=X+40|0;G=X+36|0;I=X+32|0;A=X+28|0;L=X+24|0;Q=X+20|0;T=X+16|0;M=X+12|0;P=X+8|0;U=X+4|0;V=X;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Y>>2]=j;c[s>>2]=k;c[t>>2]=l;c[W>>2]=c[Y>>2];c[q>>2]=(c[q>>2]|0)+(((c[Y>>2]|0)-1|0)*6<<2);while(1){if((c[W>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[S>>2]=+g[c[o>>2]>>2];g[w>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[y>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[v>>2]=+g[(c[q>>2]|0)+8>>2];g[x>>2]=+g[(c[q>>2]|0)+12>>2];g[z>>2]=+g[v>>2]*+g[w>>2]+ +g[x>>2]*+g[y>>2];g[R>>2]=+g[v>>2]*+g[y>>2]-+g[x>>2]*+g[w>>2];g[C>>2]=+g[c[n>>2]>>2];g[E>>2]=+g[c[p>>2]>>2];g[B>>2]=+g[c[q>>2]>>2];g[D>>2]=+g[(c[q>>2]|0)+4>>2];g[F>>2]=+g[B>>2]*+g[C>>2]+ +g[D>>2]*+g[E>>2];g[N>>2]=+g[B>>2]*+g[E>>2]-+g[D>>2]*+g[C>>2];g[H>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[J>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[G>>2]=+g[(c[q>>2]|0)+16>>2];g[I>>2]=+g[(c[q>>2]|0)+20>>2];g[K>>2]=+g[G>>2]*+g[H>>2]+ +g[I>>2]*+g[J>>2];g[O>>2]=+g[G>>2]*+g[J>>2]-+g[I>>2]*+g[H>>2];g[A>>2]=+g[u>>2]+ +g[z>>2];g[L>>2]=+g[F>>2]+ +g[K>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[A>>2]-+g[L>>2];g[c[m>>2]>>2]=+g[A>>2]+ +g[L>>2];g[Q>>2]=+g[N>>2]+ +g[O>>2];g[T>>2]=+g[R>>2]+ +g[S>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Q>>2]-+g[T>>2];g[c[n>>2]>>2]=+g[Q>>2]+ +g[T>>2];g[M>>2]=+g[u>>2]-+g[z>>2];g[P>>2]=+g[N>>2]-+g[O>>2];g[c[o>>2]>>2]=+g[M>>2]-+g[P>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[M>>2]+ +g[P>>2];g[U>>2]=+g[K>>2]-+g[F>>2];g[V>>2]=+g[S>>2]-+g[R>>2];g[c[p>>2]>>2]=+g[U>>2]-+g[V>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[U>>2]+ +g[V>>2];c[W>>2]=(c[W>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+24}i=X;return}function Jq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,36,3784,0);i=b;return}function Kq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0;xa=i;i=i+272|0;m=xa+268|0;n=xa+264|0;o=xa+260|0;p=xa+256|0;q=xa+252|0;r=xa+248|0;ya=xa+244|0;s=xa+240|0;t=xa+236|0;wa=xa+224|0;A=xa+220|0;V=xa+216|0;ra=xa+212|0;R=xa+208|0;pa=xa+204|0;M=xa+200|0;ta=xa+196|0;I=xa+192|0;ea=xa+188|0;L=xa+184|0;sa=xa+180|0;F=xa+176|0;u=xa+172|0;Q=xa+168|0;z=xa+164|0;P=xa+160|0;w=xa+156|0;y=xa+152|0;v=xa+148|0;x=xa+144|0;ja=xa+140|0;G=xa+136|0;oa=xa+132|0;H=xa+128|0;ga=xa+124|0;ia=xa+120|0;fa=xa+116|0;ha=xa+112|0;la=xa+108|0;na=xa+104|0;ka=xa+100|0;ma=xa+96|0;_=xa+92|0;D=xa+88|0;da=xa+84|0;E=xa+80|0;C=xa+76|0;Z=xa+72|0;B=xa+68|0;Y=xa+64|0;aa=xa+60|0;ca=xa+56|0;$=xa+52|0;ba=xa+48|0;N=xa+44|0;qa=xa+40|0;K=xa+36|0;W=xa+32|0;U=xa+28|0;X=xa+24|0;J=xa+20|0;ua=xa+16|0;va=xa+12|0;S=xa+8|0;O=xa+4|0;T=xa;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[ya>>2]=j;c[s>>2]=k;c[t>>2]=l;g[xa+232>>2]=.5;g[xa+228>>2]=.8660253882408142;c[wa>>2]=c[ya>>2];c[q>>2]=(c[q>>2]|0)+(((c[ya>>2]|0)-1|0)*10<<2);while(1){if((c[wa>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[Q>>2]=+g[c[o>>2]>>2];g[w>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[y>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[v>>2]=+g[(c[q>>2]|0)+16>>2];g[x>>2]=+g[(c[q>>2]|0)+20>>2];g[z>>2]=+g[v>>2]*+g[w>>2]+ +g[x>>2]*+g[y>>2];g[P>>2]=+g[v>>2]*+g[y>>2]-+g[x>>2]*+g[w>>2];g[A>>2]=+g[u>>2]-+g[z>>2];g[V>>2]=+g[Q>>2]-+g[P>>2];g[ra>>2]=+g[u>>2]+ +g[z>>2];g[R>>2]=+g[P>>2]+ +g[Q>>2];g[ga>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ia>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[fa>>2]=+g[(c[q>>2]|0)+24>>2];g[ha>>2]=+g[(c[q>>2]|0)+28>>2];g[ja>>2]=+g[fa>>2]*+g[ga>>2]+ +g[ha>>2]*+g[ia>>2];g[G>>2]=+g[fa>>2]*+g[ia>>2]-+g[ha>>2]*+g[ga>>2];g[la>>2]=+g[c[n>>2]>>2];g[na>>2]=+g[c[p>>2]>>2];g[ka>>2]=+g[c[q>>2]>>2];g[ma>>2]=+g[(c[q>>2]|0)+4>>2];g[oa>>2]=+g[ka>>2]*+g[la>>2]+ +g[ma>>2]*+g[na>>2];g[H>>2]=+g[ka>>2]*+g[na>>2]-+g[ma>>2]*+g[la>>2];g[pa>>2]=+g[ja>>2]-+g[oa>>2];g[M>>2]=+g[H>>2]-+g[G>>2];g[ta>>2]=+g[ja>>2]+ +g[oa>>2];g[I>>2]=+g[G>>2]+ +g[H>>2];g[C>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Z>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[B>>2]=+g[(c[q>>2]|0)+8>>2];g[Y>>2]=+g[(c[q>>2]|0)+12>>2];g[_>>2]=+g[B>>2]*+g[C>>2]+ +g[Y>>2]*+g[Z>>2];g[D>>2]=+g[B>>2]*+g[Z>>2]-+g[Y>>2]*+g[C>>2];g[aa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ca>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[$>>2]=+g[(c[q>>2]|0)+32>>2];g[ba>>2]=+g[(c[q>>2]|0)+36>>2];g[da>>2]=+g[$>>2]*+g[aa>>2]+ +g[ba>>2]*+g[ca>>2];g[E>>2]=+g[$>>2]*+g[ca>>2]-+g[ba>>2]*+g[aa>>2];g[ea>>2]=+g[_>>2]-+g[da>>2];g[L>>2]=+g[D>>2]-+g[E>>2];g[sa>>2]=+g[_>>2]+ +g[da>>2];g[F>>2]=+g[D>>2]+ +g[E>>2];g[N>>2]=(+g[L>>2]+ +g[M>>2])*.8660253882408142;g[qa>>2]=+g[ea>>2]+ +g[pa>>2];g[K>>2]=+g[A>>2]-+g[qa>>2]*.5;g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[A>>2]+ +g[qa>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[K>>2]+ +g[N>>2];g[c[o>>2]>>2]=+g[K>>2]-+g[N>>2];g[W>>2]=(+g[pa>>2]-+g[ea>>2])*.8660253882408142;g[U>>2]=+g[M>>2]-+g[L>>2];g[X>>2]=+g[U>>2]*.5+ +g[V>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[U>>2]-+g[V>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[W>>2]+ +g[X>>2];g[c[p>>2]>>2]=+g[W>>2]-+g[X>>2];g[J>>2]=(+g[F>>2]-+g[I>>2])*.8660253882408142;g[ua>>2]=+g[sa>>2]+ +g[ta>>2];g[va>>2]=+g[ra>>2]-+g[ua>>2]*.5;g[c[m>>2]>>2]=+g[ra>>2]+ +g[ua>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[va>>2]+ +g[J>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[va>>2]-+g[J>>2];g[S>>2]=(+g[sa>>2]-+g[ta>>2])*.8660253882408142;g[O>>2]=+g[F>>2]+ +g[I>>2];g[T>>2]=+g[R>>2]-+g[O>>2]*.5;g[c[n>>2]>>2]=+g[O>>2]+ +g[R>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[S>>2]+ +g[T>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[S>>2]-+g[T>>2];c[wa>>2]=(c[wa>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+40;c[r>>2]=c[r>>2]^c[2998]}i=xa;return}function Lq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,37,3832,0);i=b;return}function Mq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0;Va=i;i=i+368|0;m=Va+360|0;n=Va+356|0;o=Va+352|0;p=Va+348|0;q=Va+344|0;r=Va+340|0;Wa=Va+336|0;s=Va+332|0;t=Va+328|0;Ua=Va+320|0;T=Va+316|0;J=Va+312|0;ba=Va+308|0;E=Va+304|0;$=Va+300|0;y=Va+296|0;la=Va+292|0;oa=Va+288|0;Ca=Va+284|0;K=Va+280|0;ea=Va+276|0;B=Va+272|0;Oa=Va+268|0;x=Va+264|0;ga=Va+260|0;ja=Va+256|0;u=Va+252|0;D=Va+248|0;S=Va+244|0;C=Va+240|0;P=Va+236|0;R=Va+232|0;O=Va+228|0;Q=Va+224|0;Ta=Va+220|0;ma=Va+216|0;_=Va+212|0;na=Va+208|0;Qa=Va+204|0;Sa=Va+200|0;Pa=Va+196|0;Ra=Va+192|0;X=Va+188|0;Z=Va+184|0;W=Va+180|0;Y=Va+176|0;wa=Va+172|0;ca=Va+168|0;Ba=Va+164|0;da=Va+160|0;V=Va+156|0;va=Va+152|0;U=Va+148|0;ua=Va+144|0;ya=Va+140|0;Aa=Va+136|0;xa=Va+132|0;za=Va+128|0;Ia=Va+124|0;ha=Va+120|0;Na=Va+116|0;ia=Va+112|0;Fa=Va+108|0;Ha=Va+104|0;Ea=Va+100|0;Ga=Va+96|0;Ka=Va+92|0;Ma=Va+88|0;Ja=Va+84|0;La=Va+80|0;Da=Va+76|0;aa=Va+72|0;G=Va+68|0;H=Va+64|0;A=Va+60|0;F=Va+56|0;w=Va+52|0;z=Va+48|0;ra=Va+44|0;L=Va+40|0;v=Va+36|0;I=Va+32|0;sa=Va+28|0;ta=Va+24|0;fa=Va+20|0;N=Va+16|0;qa=Va+12|0;M=Va+8|0;ka=Va+4|0;pa=Va;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Wa>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Va+324>>2]=.7071067690849304;c[Ua>>2]=c[Wa>>2];c[q>>2]=(c[q>>2]|0)+(((c[Wa>>2]|0)-1|0)*14<<2);while(1){if((c[Ua>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[D>>2]=+g[c[o>>2]>>2];g[P>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[R>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[O>>2]=+g[(c[q>>2]|0)+24>>2];g[Q>>2]=+g[(c[q>>2]|0)+28>>2];g[S>>2]=+g[O>>2]*+g[P>>2]+ +g[Q>>2]*+g[R>>2];g[C>>2]=+g[O>>2]*+g[R>>2]-+g[Q>>2]*+g[P>>2];g[T>>2]=+g[u>>2]+ +g[S>>2];g[J>>2]=+g[D>>2]-+g[C>>2];g[ba>>2]=+g[u>>2]-+g[S>>2];g[E>>2]=+g[C>>2]+ +g[D>>2];g[Qa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Sa>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Pa>>2]=+g[(c[q>>2]|0)+48>>2];g[Ra>>2]=+g[(c[q>>2]|0)+52>>2];g[Ta>>2]=+g[Pa>>2]*+g[Qa>>2]+ +g[Ra>>2]*+g[Sa>>2];g[ma>>2]=+g[Pa>>2]*+g[Sa>>2]-+g[Ra>>2]*+g[Qa>>2];g[X>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Z>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[W>>2]=+g[(c[q>>2]|0)+16>>2];g[Y>>2]=+g[(c[q>>2]|0)+20>>2];g[_>>2]=+g[W>>2]*+g[X>>2]+ +g[Y>>2]*+g[Z>>2];g[na>>2]=+g[W>>2]*+g[Z>>2]-+g[Y>>2]*+g[X>>2];g[$>>2]=+g[Ta>>2]+ +g[_>>2];g[y>>2]=+g[ma>>2]+ +g[na>>2];g[la>>2]=+g[Ta>>2]-+g[_>>2];g[oa>>2]=+g[ma>>2]-+g[na>>2];g[V>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[va>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[U>>2]=+g[(c[q>>2]|0)+8>>2];g[ua>>2]=+g[(c[q>>2]|0)+12>>2];g[wa>>2]=+g[U>>2]*+g[V>>2]+ +g[ua>>2]*+g[va>>2];g[ca>>2]=+g[U>>2]*+g[va>>2]-+g[ua>>2]*+g[V>>2];g[ya>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Aa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[xa>>2]=+g[(c[q>>2]|0)+40>>2];g[za>>2]=+g[(c[q>>2]|0)+44>>2];g[Ba>>2]=+g[xa>>2]*+g[ya>>2]+ +g[za>>2]*+g[Aa>>2];g[da>>2]=+g[xa>>2]*+g[Aa>>2]-+g[za>>2]*+g[ya>>2];g[Ca>>2]=+g[wa>>2]+ +g[Ba>>2];g[K>>2]=+g[wa>>2]-+g[Ba>>2];g[ea>>2]=+g[ca>>2]-+g[da>>2];g[B>>2]=+g[ca>>2]+ +g[da>>2];g[Fa>>2]=+g[c[n>>2]>>2];g[Ha>>2]=+g[c[p>>2]>>2];g[Ea>>2]=+g[c[q>>2]>>2];g[Ga>>2]=+g[(c[q>>2]|0)+4>>2];g[Ia>>2]=+g[Ea>>2]*+g[Fa>>2]+ +g[Ga>>2]*+g[Ha>>2];g[ha>>2]=+g[Ea>>2]*+g[Ha>>2]-+g[Ga>>2]*+g[Fa>>2];g[Ka>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ma>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ja>>2]=+g[(c[q>>2]|0)+32>>2];g[La>>2]=+g[(c[q>>2]|0)+36>>2];g[Na>>2]=+g[Ja>>2]*+g[Ka>>2]+ +g[La>>2]*+g[Ma>>2];g[ia>>2]=+g[Ja>>2]*+g[Ma>>2]-+g[La>>2]*+g[Ka>>2];g[Oa>>2]=+g[Ia>>2]+ +g[Na>>2];g[x>>2]=+g[ha>>2]+ +g[ia>>2];g[ga>>2]=+g[Ia>>2]-+g[Na>>2];g[ja>>2]=+g[ha>>2]-+g[ia>>2];g[Da>>2]=+g[T>>2]+ +g[Ca>>2];g[aa>>2]=+g[Oa>>2]+ +g[$>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Da>>2]-+g[aa>>2];g[c[m>>2]>>2]=+g[Da>>2]+ +g[aa>>2];g[A>>2]=+g[x>>2]+ +g[y>>2];g[F>>2]=+g[B>>2]+ +g[E>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[A>>2]-+g[F>>2];g[c[n>>2]>>2]=+g[A>>2]+ +g[F>>2];g[w>>2]=+g[T>>2]-+g[Ca>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[w>>2]-+g[z>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[w>>2]+ +g[z>>2];g[G>>2]=+g[$>>2]-+g[Oa>>2];g[H>>2]=+g[E>>2]-+g[B>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[G>>2]-+g[H>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[G>>2]+ +g[H>>2];g[ra>>2]=+g[ba>>2]-+g[ea>>2];g[L>>2]=+g[J>>2]-+g[K>>2];g[sa>>2]=+g[ja>>2]-+g[ga>>2];g[ta>>2]=+g[la>>2]+ +g[oa>>2];g[v>>2]=(+g[sa>>2]-+g[ta>>2])*.7071067690849304;g[I>>2]=(+g[sa>>2]+ +g[ta>>2])*.7071067690849304;g[c[o>>2]>>2]=+g[ra>>2]-+g[v>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[I>>2]+ +g[L>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ra>>2]+ +g[v>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[I>>2]-+g[L>>2];g[fa>>2]=+g[ba>>2]+ +g[ea>>2];g[N>>2]=+g[K>>2]+ +g[J>>2];g[ka>>2]=+g[ga>>2]+ +g[ja>>2];g[pa>>2]=+g[la>>2]-+g[oa>>2];g[qa>>2]=(+g[ka>>2]+ +g[pa>>2])*.7071067690849304;g[M>>2]=(+g[pa>>2]-+g[ka>>2])*.7071067690849304;g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[fa>>2]-+g[qa>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[M>>2]+ +g[N>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[fa>>2]+ +g[qa>>2];g[c[p>>2]>>2]=+g[M>>2]-+g[N>>2];c[Ua>>2]=(c[Ua>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+56;c[r>>2]=c[r>>2]^c[2998]}i=Va;return}function Nq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,38,3880,1);i=b;return} +function ur(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0,Ii=0,Ji=0,Ki=0,Li=0,Mi=0,Ni=0,Oi=0,Pi=0,Qi=0,Ri=0,Si=0,Ti=0,Ui=0,Vi=0,Wi=0,Xi=0,Yi=0,Zi=0,_i=0,$i=0,aj=0,bj=0,cj=0,dj=0,ej=0,fj=0,gj=0,hj=0,ij=0,jj=0,kj=0,lj=0;kj=i;i=i+2192|0;k=kj+2184|0;l=kj+2180|0;m=kj+2176|0;n=kj+2172|0;lj=kj+2168|0;o=kj+2164|0;p=kj+2160|0;jj=kj+2128|0;za=kj+2124|0;_d=kj+2120|0;Ib=kj+2116|0;hf=kj+2112|0;Ah=kj+2108|0;xi=kj+2104|0;zi=kj+2100|0;Oi=kj+2096|0;ki=kj+2092|0;Pi=kj+2088|0;Si=kj+2084|0;Wi=kj+2080|0;oi=kj+2076|0;si=kj+2072|0;xa=kj+2068|0;I=kj+2064|0;w=kj+2060|0;B=kj+2056|0;s=kj+2052|0;G=kj+2048|0;ma=kj+2044|0;oa=kj+2040|0;yb=kj+2036|0;Sa=kj+2032|0;Cb=kj+2028|0;Ua=kj+2024|0;db=kj+2020|0;sc=kj+2016|0;hb=kj+2012|0;uc=kj+2008|0;dj=kj+2004|0;hj=kj+2e3|0;Cc=kj+1996|0;Ec=kj+1992|0;sa=kj+1988|0;ua=kj+1984|0;nb=kj+1980|0;pb=kj+1976|0;$=kj+1972|0;da=kj+1968|0;Lb=kj+1964|0;Nb=kj+1960|0;X=kj+1956|0;Z=kj+1952|0;Ma=kj+1948|0;Oa=kj+1944|0;Ri=kj+1940|0;Ci=kj+1936|0;Vi=kj+1932|0;Di=kj+1928|0;Xi=kj+1924|0;Gi=kj+1920|0;Zi=kj+1916|0;Ei=kj+1912|0;N=kj+1908|0;Ba=kj+1904|0;Q=kj+1900|0;Ca=kj+1896|0;R=kj+1892|0;Fa=kj+1888|0;T=kj+1884|0;Da=kj+1880|0;bj=kj+1876|0;ca=kj+1872|0;gj=kj+1868|0;z=kj+1864|0;cj=kj+1860|0;ba=kj+1856|0;fj=kj+1852|0;A=kj+1848|0;mi=kj+1844|0;v=kj+1840|0;ri=kj+1836|0;Ki=kj+1832|0;ni=kj+1828|0;u=kj+1824|0;qi=kj+1820|0;r=kj+1816|0;Rc=kj+1812|0;Ni=kj+1808|0;rg=kj+1804|0;Mi=kj+1800|0;wb=kj+1796|0;xb=kj+1792|0;Ab=kj+1788|0;Bb=kj+1784|0;bb=kj+1780|0;cb=kj+1776|0;fb=kj+1772|0;gb=kj+1768|0;Li=kj+1764|0;Qi=kj+1760|0;Ti=kj+1756|0;Ui=kj+1752|0;L=kj+1748|0;M=kj+1744|0;O=kj+1740|0;P=kj+1736|0;wi=kj+1732|0;Yf=kj+1728|0;Oh=kj+1724|0;ai=kj+1720|0;Yb=kj+1716|0;af=kj+1712|0;Kg=kj+1708|0;Yg=kj+1704|0;Gb=kj+1700|0;uf=kj+1696|0;pg=kj+1692|0;jh=kj+1688|0;Kd=kj+1684|0;pe=kj+1680|0;bd=kj+1676|0;se=kj+1672|0;yc=kj+1668|0;Af=kj+1664|0;Hf=kj+1660|0;oh=kj+1656|0;ld=kj+1652|0;ze=kj+1648|0;be=kj+1644|0;we=kj+1640|0;ha=kj+1636|0;Xg=kj+1632|0;$f=kj+1628|0;Fg=kj+1624|0;bc=kj+1620|0;bf=kj+1616|0;gc=kj+1612|0;cf=kj+1608|0;F=kj+1604|0;bg=kj+1600|0;eg=kj+1596|0;fh=kj+1592|0;nc=kj+1588|0;gf=kj+1584|0;Tc=kj+1580|0;ff=kj+1576|0;Ja=kj+1572|0;gg=kj+1568|0;jg=kj+1564|0;eh=kj+1560|0;Zc=kj+1556|0;le=kj+1552|0;Cd=kj+1548|0;ke=kj+1544|0;$a=kj+1540|0;qg=kj+1536|0;xf=kj+1532|0;kh=kj+1528|0;Vd=kj+1524|0;te=kj+1520|0;ed=kj+1516|0;qe=kj+1512|0;Pc=kj+1508|0;If=kj+1504|0;Df=kj+1500|0;ph=kj+1496|0;wd=kj+1492|0;xe=kj+1488|0;ee=kj+1484|0;Ae=kj+1480|0;q=kj+1476|0;Ig=kj+1472|0;$i=kj+1468|0;Hg=kj+1464|0;li=kj+1460|0;Vb=kj+1456|0;ui=kj+1452|0;Wb=kj+1448|0;Yi=kj+1444|0;_i=kj+1440|0;ej=kj+1436|0;ij=kj+1432|0;pi=kj+1428|0;ti=kj+1424|0;aj=kj+1420|0;vi=kj+1416|0;Mh=kj+1412|0;Nh=kj+1408|0;Ub=kj+1404|0;Xb=kj+1400|0;Gg=kj+1396|0;Jg=kj+1392|0;mb=kj+1388|0;Xd=kj+1384|0;Eb=kj+1380|0;Id=kj+1376|0;rb=kj+1372|0;Yd=kj+1368|0;vb=kj+1364|0;Hd=kj+1360|0;kb=kj+1356|0;lb=kj+1352|0;zb=kj+1348|0;Db=kj+1344|0;ob=kj+1340|0;qb=kj+1336|0;tb=kj+1332|0;ub=kj+1328|0;sb=kj+1324|0;Fb=kj+1320|0;ng=kj+1316|0;og=kj+1312|0;Gd=kj+1308|0;Jd=kj+1304|0;Zd=kj+1300|0;ad=kj+1296|0;Kb=kj+1292|0;hd=kj+1288|0;wc=kj+1284|0;$d=kj+1280|0;Pb=kj+1276|0;id=kj+1272|0;rc=kj+1268|0;zd=kj+1264|0;eb=kj+1260|0;Jb=kj+1256|0;tc=kj+1252|0;vc=kj+1248|0;Mb=kj+1244|0;Ob=kj+1240|0;Rb=kj+1236|0;Sb=kj+1232|0;Qb=kj+1228|0;xc=kj+1224|0;Ff=kj+1220|0;Gf=kj+1216|0;jd=kj+1212|0;kd=kj+1208|0;yd=kj+1204|0;ae=kj+1200|0;Bi=kj+1196|0;_b=kj+1192|0;fa=kj+1188|0;ec=kj+1184|0;Ii=kj+1180|0;$b=kj+1176|0;y=kj+1172|0;dc=kj+1168|0;yi=kj+1164|0;Ai=kj+1160|0;aa=kj+1156|0;ea=kj+1152|0;Fi=kj+1148|0;Hi=kj+1144|0;t=kj+1140|0;x=kj+1136|0;Ji=kj+1132|0;ga=kj+1128|0;Zf=kj+1124|0;_f=kj+1120|0;Zb=kj+1116|0;ac=kj+1112|0;cc=kj+1108|0;fc=kj+1104|0;la=kj+1100|0;oc=kj+1096|0;D=kj+1092|0;lc=kj+1088|0;qa=kj+1084|0;pc=kj+1080|0;wa=kj+1076|0;kc=kj+1072|0;ja=kj+1068|0;ka=kj+1064|0;ya=kj+1060|0;C=kj+1056|0;na=kj+1052|0;pa=kj+1048|0;ta=kj+1044|0;va=kj+1040|0;ra=kj+1036|0;E=kj+1032|0;cg=kj+1028|0;dg=kj+1024|0;jc=kj+1020|0;mc=kj+1016|0;qc=kj+1012|0;Sc=kj+1008|0;K=kj+1004|0;Vc=kj+1e3|0;Ha=kj+996|0;Ad=kj+992|0;V=kj+988|0;Wc=kj+984|0;Aa=kj+980|0;$c=kj+976|0;H=kj+972|0;J=kj+968|0;Ea=kj+964|0;Ga=kj+960|0;S=kj+956|0;U=kj+952|0;Y=kj+948|0;_=kj+944|0;W=kj+940|0;Ia=kj+936|0;hg=kj+932|0;ig=kj+928|0;Xc=kj+924|0;Yc=kj+920|0;_c=kj+916|0;Bd=kj+912|0;La=kj+908|0;Md=kj+904|0;Qa=kj+900|0;Nd=kj+896|0;Ld=kj+892|0;Od=kj+888|0;Wa=kj+884|0;Qd=kj+880|0;Za=kj+876|0;Rd=kj+872|0;Sd=kj+868|0;Td=kj+864|0;Hb=kj+860|0;Ka=kj+856|0;Na=kj+852|0;Pa=kj+848|0;Ta=kj+844|0;Va=kj+840|0;Xa=kj+836|0;Ya=kj+832|0;Ra=kj+828|0;_a=kj+824|0;vf=kj+820|0;wf=kj+816|0;Pd=kj+812|0;Ud=kj+808|0;cd=kj+804|0;dd=kj+800|0;Bc=kj+796|0;sd=kj+792|0;Gc=kj+788|0;td=kj+784|0;rd=kj+780|0;ud=kj+776|0;Kc=kj+772|0;nd=kj+768|0;Nc=kj+764|0;od=kj+760|0;md=kj+756|0;pd=kj+752|0;zc=kj+748|0;Ac=kj+744|0;Dc=kj+740|0;Fc=kj+736|0;Ic=kj+732|0;Jc=kj+728|0;Lc=kj+724|0;Mc=kj+720|0;Hc=kj+716|0;Oc=kj+712|0;Bf=kj+708|0;Cf=kj+704|0;qd=kj+700|0;vd=kj+696|0;ce=kj+692|0;de=kj+688|0;jb=kj+684|0;xh=kj+680|0;Mg=kj+676|0;Og=kj+672|0;Tb=kj+668|0;Ng=kj+664|0;Cg=kj+660|0;Dg=kj+656|0;ia=kj+652|0;ib=kj+648|0;Eg=kj+644|0;Lg=kj+640|0;ab=kj+636|0;Qc=kj+632|0;yh=kj+628|0;zh=kj+624|0;ef=kj+620|0;Ie=kj+616|0;Ph=kj+612|0;Vh=kj+608|0;ne=kj+604|0;Jh=kj+600|0;sf=kj+596|0;Wf=kj+592|0;ve=kj+588|0;Fe=kj+584|0;lf=kj+580|0;Uh=kj+576|0;pf=kj+572|0;Vf=kj+568|0;Ce=kj+564|0;Ge=kj+560|0;df=kj+556|0;Kh=kj+552|0;je=kj+548|0;me=kj+544|0;qf=kj+540|0;rf=kj+536|0;re=kj+532|0;ue=kj+528|0;jf=kj+524|0;kf=kj+520|0;nf=kj+516|0;of=kj+512|0;ye=kj+508|0;Be=kj+504|0;oe=kj+500|0;De=kj+496|0;Th=kj+492|0;Wh=kj+488|0;Xh=kj+484|0;Yh=kj+480|0;Ee=kj+476|0;He=kj+472|0;mf=kj+468|0;Tf=kj+464|0;Ih=kj+460|0;Qh=kj+456|0;Rh=kj+452|0;Sh=kj+448|0;Uf=kj+444|0;Xf=kj+440|0;hh=kj+436|0;th=kj+432|0;Sg=kj+428|0;Ug=kj+424|0;mh=kj+420|0;uh=kj+416|0;rh=kj+412|0;vh=kj+408|0;dh=kj+404|0;gh=kj+400|0;Qg=kj+396|0;Rg=kj+392|0;ih=kj+388|0;lh=kj+384|0;nh=kj+380|0;qh=kj+376|0;sh=kj+372|0;Tg=kj+368|0;wh=kj+364|0;Pg=kj+360|0;ag=kj+356|0;Zg=kj+352|0;Eh=kj+348|0;Qf=kj+344|0;lg=kj+340|0;Wg=kj+336|0;zg=kj+332|0;bh=kj+328|0;sg=kj+324|0;Dh=kj+320|0;zf=kj+316|0;Nf=kj+312|0;wg=kj+308|0;ah=kj+304|0;Kf=kj+300|0;Of=kj+296|0;fg=kj+292|0;kg=kj+288|0;tf=kj+284|0;yf=kj+280|0;xg=kj+276|0;yg=kj+272|0;Rf=kj+268|0;Sf=kj+264|0;ug=kj+260|0;vg=kj+256|0;Ef=kj+252|0;Jf=kj+248|0;mg=kj+244|0;Lf=kj+240|0;Ch=kj+236|0;Fh=kj+232|0;Gh=kj+228|0;Hh=kj+224|0;Mf=kj+220|0;Pf=kj+216|0;tg=kj+212|0;Ag=kj+208|0;Vg=kj+204|0;_g=kj+200|0;$g=kj+196|0;Bh=kj+192|0;Bg=kj+188|0;ch=kj+184|0;ic=kj+180|0;Me=kj+176|0;bi=kj+172|0;hi=kj+168|0;Ed=kj+164|0;_h=kj+160|0;We=kj+156|0;_e=kj+152|0;gd=kj+148|0;Je=kj+144|0;Pe=kj+140|0;gi=kj+136|0;Te=kj+132|0;Ze=kj+128|0;ge=kj+124|0;Ke=kj+120|0;hc=kj+116|0;$h=kj+112|0;Uc=kj+108|0;Dd=kj+104|0;Ue=kj+100|0;Ve=kj+96|0;Wd=kj+92|0;fd=kj+88|0;Ne=kj+84|0;Oe=kj+80|0;Re=kj+76|0;Se=kj+72|0;xd=kj+68|0;fe=kj+64|0;Fd=kj+60|0;he=kj+56|0;fi=kj+52|0;ii=kj+48|0;ji=kj+44|0;Lh=kj+40|0;ie=kj+36|0;Le=kj+32|0;Qe=kj+28|0;Xe=kj+24|0;Zh=kj+20|0;ci=kj+16|0;di=kj+12|0;ei=kj+8|0;Ye=kj+4|0;$e=kj;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[lj>>2]=f;c[o>>2]=h;c[p>>2]=j;g[kj+2156>>2]=.5555702447891235;g[kj+2152>>2]=.8314695954322815;g[kj+2148>>2]=.9807852506637573;g[kj+2144>>2]=.19509032368659973;g[kj+2140>>2]=.3826834261417389;g[kj+2136>>2]=.9238795042037964;g[kj+2132>>2]=.7071067690849304;c[jj>>2]=c[lj>>2];c[m>>2]=(c[m>>2]|0)+((c[lj>>2]|0)-1<<3<<2);while(1){if((c[jj>>2]|0)>=(c[o>>2]|0))break;g[za>>2]=+g[c[m>>2]>>2];g[_d>>2]=+g[(c[m>>2]|0)+4>>2];g[Ib>>2]=+g[(c[m>>2]|0)+8>>2];g[hf>>2]=+g[(c[m>>2]|0)+12>>2];g[Rc>>2]=+g[za>>2]*+g[Ib>>2];g[Ni>>2]=+g[_d>>2]*+g[Ib>>2];g[rg>>2]=+g[_d>>2]*+g[hf>>2];g[Mi>>2]=+g[za>>2]*+g[hf>>2];g[Ah>>2]=+g[Rc>>2]+ +g[rg>>2];g[xi>>2]=+g[Rc>>2]-+g[rg>>2];g[zi>>2]=+g[Mi>>2]+ +g[Ni>>2];g[Oi>>2]=+g[Mi>>2]-+g[Ni>>2];g[ki>>2]=+g[(c[m>>2]|0)+16>>2];g[bj>>2]=+g[za>>2]*+g[ki>>2];g[ca>>2]=+g[hf>>2]*+g[ki>>2];g[gj>>2]=+g[_d>>2]*+g[ki>>2];g[z>>2]=+g[Ib>>2]*+g[ki>>2];g[Pi>>2]=+g[(c[m>>2]|0)+20>>2];g[cj>>2]=+g[_d>>2]*+g[Pi>>2];g[ba>>2]=+g[Ib>>2]*+g[Pi>>2];g[fj>>2]=+g[za>>2]*+g[Pi>>2];g[A>>2]=+g[hf>>2]*+g[Pi>>2];g[Si>>2]=+g[(c[m>>2]|0)+24>>2];g[mi>>2]=+g[Ib>>2]*+g[Si>>2];g[v>>2]=+g[_d>>2]*+g[Si>>2];g[ri>>2]=+g[hf>>2]*+g[Si>>2];g[Ki>>2]=+g[za>>2]*+g[Si>>2];g[Wi>>2]=+g[(c[m>>2]|0)+28>>2];g[ni>>2]=+g[hf>>2]*+g[Wi>>2];g[u>>2]=+g[za>>2]*+g[Wi>>2];g[qi>>2]=+g[Ib>>2]*+g[Wi>>2];g[r>>2]=+g[_d>>2]*+g[Wi>>2];g[oi>>2]=+g[mi>>2]+ +g[ni>>2];g[si>>2]=+g[qi>>2]-+g[ri>>2];g[xa>>2]=+g[Ki>>2]+ +g[r>>2];g[I>>2]=+g[qi>>2]+ +g[ri>>2];g[w>>2]=+g[u>>2]+ +g[v>>2];g[B>>2]=+g[u>>2]-+g[v>>2];g[s>>2]=+g[Ki>>2]-+g[r>>2];g[G>>2]=+g[mi>>2]-+g[ni>>2];g[ma>>2]=+g[ki>>2]*+g[Si>>2]+ +g[Pi>>2]*+g[Wi>>2];g[oa>>2]=+g[ki>>2]*+g[Wi>>2]-+g[Pi>>2]*+g[Si>>2];g[wb>>2]=+g[Ah>>2]*+g[Si>>2];g[xb>>2]=+g[Oi>>2]*+g[Wi>>2];g[yb>>2]=+g[wb>>2]+ +g[xb>>2];g[Sa>>2]=+g[wb>>2]-+g[xb>>2];g[Ab>>2]=+g[Ah>>2]*+g[Wi>>2];g[Bb>>2]=+g[Oi>>2]*+g[Si>>2];g[Cb>>2]=+g[Ab>>2]-+g[Bb>>2];g[Ua>>2]=+g[Ab>>2]+ +g[Bb>>2];g[bb>>2]=+g[xi>>2]*+g[Si>>2];g[cb>>2]=+g[zi>>2]*+g[Wi>>2];g[db>>2]=+g[bb>>2]-+g[cb>>2];g[sc>>2]=+g[bb>>2]+ +g[cb>>2];g[fb>>2]=+g[xi>>2]*+g[Wi>>2];g[gb>>2]=+g[zi>>2]*+g[Si>>2];g[hb>>2]=+g[fb>>2]+ +g[gb>>2];g[uc>>2]=+g[fb>>2]-+g[gb>>2];g[dj>>2]=+g[bj>>2]+ +g[cj>>2];g[hj>>2]=+g[fj>>2]-+g[gj>>2];g[Cc>>2]=+g[dj>>2]*+g[Si>>2]+ +g[hj>>2]*+g[Wi>>2];g[Ec>>2]=+g[dj>>2]*+g[Wi>>2]-+g[hj>>2]*+g[Si>>2];g[sa>>2]=+g[bj>>2]-+g[cj>>2];g[ua>>2]=+g[fj>>2]+ +g[gj>>2];g[nb>>2]=+g[sa>>2]*+g[Si>>2]+ +g[ua>>2]*+g[Wi>>2];g[pb>>2]=+g[sa>>2]*+g[Wi>>2]-+g[ua>>2]*+g[Si>>2];g[$>>2]=+g[z>>2]-+g[A>>2];g[da>>2]=+g[ba>>2]+ +g[ca>>2];g[Lb>>2]=+g[$>>2]*+g[Si>>2]+ +g[da>>2]*+g[Wi>>2];g[Nb>>2]=+g[$>>2]*+g[Wi>>2]-+g[da>>2]*+g[Si>>2];g[X>>2]=+g[z>>2]+ +g[A>>2];g[Z>>2]=+g[ba>>2]-+g[ca>>2];g[Ma>>2]=+g[X>>2]*+g[Si>>2]+ +g[Z>>2]*+g[Wi>>2];g[Oa>>2]=+g[X>>2]*+g[Wi>>2]-+g[Z>>2]*+g[Si>>2];g[Li>>2]=+g[Ah>>2]*+g[ki>>2];g[Qi>>2]=+g[Oi>>2]*+g[Pi>>2];g[Ri>>2]=+g[Li>>2]-+g[Qi>>2];g[Ci>>2]=+g[Li>>2]+ +g[Qi>>2];g[Ti>>2]=+g[Ah>>2]*+g[Pi>>2];g[Ui>>2]=+g[Oi>>2]*+g[ki>>2];g[Vi>>2]=+g[Ti>>2]+ +g[Ui>>2];g[Di>>2]=+g[Ti>>2]-+g[Ui>>2];g[Xi>>2]=+g[Ri>>2]*+g[Si>>2]+ +g[Vi>>2]*+g[Wi>>2];g[Gi>>2]=+g[Ci>>2]*+g[Wi>>2]-+g[Di>>2]*+g[Si>>2];g[Zi>>2]=+g[Ri>>2]*+g[Wi>>2]-+g[Vi>>2]*+g[Si>>2];g[Ei>>2]=+g[Ci>>2]*+g[Si>>2]+ +g[Di>>2]*+g[Wi>>2];g[L>>2]=+g[xi>>2]*+g[ki>>2];g[M>>2]=+g[zi>>2]*+g[Pi>>2];g[N>>2]=+g[L>>2]-+g[M>>2];g[Ba>>2]=+g[L>>2]+ +g[M>>2];g[O>>2]=+g[xi>>2]*+g[Pi>>2];g[P>>2]=+g[zi>>2]*+g[ki>>2];g[Q>>2]=+g[O>>2]+ +g[P>>2];g[Ca>>2]=+g[O>>2]-+g[P>>2];g[R>>2]=+g[N>>2]*+g[Si>>2]+ +g[Q>>2]*+g[Wi>>2];g[Fa>>2]=+g[Ba>>2]*+g[Wi>>2]-+g[Ca>>2]*+g[Si>>2];g[T>>2]=+g[N>>2]*+g[Wi>>2]-+g[Q>>2]*+g[Si>>2];g[Da>>2]=+g[Ba>>2]*+g[Si>>2]+ +g[Ca>>2]*+g[Wi>>2];g[q>>2]=+g[c[k>>2]>>2];g[Ig>>2]=+g[c[l>>2]>>2];g[Yi>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[_i>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[$i>>2]=+g[Xi>>2]*+g[Yi>>2]+ +g[Zi>>2]*+g[_i>>2];g[Hg>>2]=+g[Xi>>2]*+g[_i>>2]-+g[Zi>>2]*+g[Yi>>2];g[ej>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[ij>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[li>>2]=+g[dj>>2]*+g[ej>>2]+ +g[hj>>2]*+g[ij>>2];g[Vb>>2]=+g[dj>>2]*+g[ij>>2]-+g[hj>>2]*+g[ej>>2];g[pi>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[ti>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[ui>>2]=+g[oi>>2]*+g[pi>>2]+ +g[si>>2]*+g[ti>>2];g[Wb>>2]=+g[oi>>2]*+g[ti>>2]-+g[si>>2]*+g[pi>>2];g[aj>>2]=+g[q>>2]+ +g[$i>>2];g[vi>>2]=+g[li>>2]+ +g[ui>>2];g[wi>>2]=+g[aj>>2]+ +g[vi>>2];g[Yf>>2]=+g[aj>>2]-+g[vi>>2];g[Mh>>2]=+g[li>>2]-+g[ui>>2];g[Nh>>2]=+g[Ig>>2]-+g[Hg>>2];g[Oh>>2]=+g[Mh>>2]+ +g[Nh>>2];g[ai>>2]=+g[Nh>>2]-+g[Mh>>2];g[Ub>>2]=+g[q>>2]-+g[$i>>2];g[Xb>>2]=+g[Vb>>2]-+g[Wb>>2];g[Yb>>2]=+g[Ub>>2]+ +g[Xb>>2];g[af>>2]=+g[Ub>>2]-+g[Xb>>2];g[Gg>>2]=+g[Vb>>2]+ +g[Wb>>2];g[Jg>>2]=+g[Hg>>2]+ +g[Ig>>2];g[Kg>>2]=+g[Gg>>2]+ +g[Jg>>2];g[Yg>>2]=+g[Jg>>2]-+g[Gg>>2];g[kb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[lb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[mb>>2]=+g[za>>2]*+g[kb>>2]+ +g[_d>>2]*+g[lb>>2];g[Xd>>2]=+g[za>>2]*+g[lb>>2]-+g[_d>>2]*+g[kb>>2];g[zb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[Db>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[Eb>>2]=+g[yb>>2]*+g[zb>>2]+ +g[Cb>>2]*+g[Db>>2];g[Id>>2]=+g[yb>>2]*+g[Db>>2]-+g[Cb>>2]*+g[zb>>2];g[ob>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[qb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[rb>>2]=+g[nb>>2]*+g[ob>>2]+ +g[pb>>2]*+g[qb>>2];g[Yd>>2]=+g[nb>>2]*+g[qb>>2]-+g[pb>>2]*+g[ob>>2];g[tb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[ub>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[vb>>2]=+g[ki>>2]*+g[tb>>2]+ +g[Pi>>2]*+g[ub>>2];g[Hd>>2]=+g[ki>>2]*+g[ub>>2]-+g[Pi>>2]*+g[tb>>2];g[sb>>2]=+g[mb>>2]+ +g[rb>>2];g[Fb>>2]=+g[vb>>2]+ +g[Eb>>2];g[Gb>>2]=+g[sb>>2]+ +g[Fb>>2];g[uf>>2]=+g[sb>>2]-+g[Fb>>2];g[ng>>2]=+g[Xd>>2]+ +g[Yd>>2];g[og>>2]=+g[Hd>>2]+ +g[Id>>2];g[pg>>2]=+g[ng>>2]-+g[og>>2];g[jh>>2]=+g[ng>>2]+ +g[og>>2];g[Gd>>2]=+g[mb>>2]-+g[rb>>2];g[Jd>>2]=+g[Hd>>2]-+g[Id>>2];g[Kd>>2]=+g[Gd>>2]+ +g[Jd>>2];g[pe>>2]=+g[Gd>>2]-+g[Jd>>2];g[Zd>>2]=+g[Xd>>2]-+g[Yd>>2];g[ad>>2]=+g[vb>>2]-+g[Eb>>2];g[bd>>2]=+g[Zd>>2]-+g[ad>>2];g[se>>2]=+g[Zd>>2]+ +g[ad>>2];g[eb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[Jb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[Kb>>2]=+g[db>>2]*+g[eb>>2]+ +g[hb>>2]*+g[Jb>>2];g[hd>>2]=+g[db>>2]*+g[Jb>>2]-+g[hb>>2]*+g[eb>>2];g[tc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[vc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[wc>>2]=+g[sc>>2]*+g[tc>>2]+ +g[uc>>2]*+g[vc>>2];g[$d>>2]=+g[sc>>2]*+g[vc>>2]-+g[uc>>2]*+g[tc>>2];g[Mb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[Ob>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[Pb>>2]=+g[Lb>>2]*+g[Mb>>2]+ +g[Nb>>2]*+g[Ob>>2];g[id>>2]=+g[Lb>>2]*+g[Ob>>2]-+g[Nb>>2]*+g[Mb>>2];g[Rb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Sb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[rc>>2]=+g[Ci>>2]*+g[Rb>>2]+ +g[Di>>2]*+g[Sb>>2];g[zd>>2]=+g[Ci>>2]*+g[Sb>>2]-+g[Di>>2]*+g[Rb>>2];g[Qb>>2]=+g[Kb>>2]+ +g[Pb>>2];g[xc>>2]=+g[rc>>2]+ +g[wc>>2];g[yc>>2]=+g[Qb>>2]+ +g[xc>>2];g[Af>>2]=+g[Qb>>2]-+g[xc>>2];g[Ff>>2]=+g[hd>>2]+ +g[id>>2];g[Gf>>2]=+g[zd>>2]+ +g[$d>>2];g[Hf>>2]=+g[Ff>>2]-+g[Gf>>2];g[oh>>2]=+g[Ff>>2]+ +g[Gf>>2];g[jd>>2]=+g[hd>>2]-+g[id>>2];g[kd>>2]=+g[rc>>2]-+g[wc>>2];g[ld>>2]=+g[jd>>2]-+g[kd>>2];g[ze>>2]=+g[jd>>2]+ +g[kd>>2];g[yd>>2]=+g[Kb>>2]-+g[Pb>>2];g[ae>>2]=+g[zd>>2]-+g[$d>>2];g[be>>2]=+g[yd>>2]+ +g[ae>>2];g[we>>2]=+g[yd>>2]-+g[ae>>2];g[yi>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Ai>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Bi>>2]=+g[xi>>2]*+g[yi>>2]+ +g[zi>>2]*+g[Ai>>2];g[_b>>2]=+g[xi>>2]*+g[Ai>>2]-+g[zi>>2]*+g[yi>>2];g[aa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[ea>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[fa>>2]=+g[$>>2]*+g[aa>>2]+ +g[da>>2]*+g[ea>>2];g[ec>>2]=+g[$>>2]*+g[ea>>2]-+g[da>>2]*+g[aa>>2];g[Fi>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[Hi>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[Ii>>2]=+g[Ei>>2]*+g[Fi>>2]+ +g[Gi>>2]*+g[Hi>>2];g[$b>>2]=+g[Ei>>2]*+g[Hi>>2]-+g[Gi>>2]*+g[Fi>>2];g[t>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[x>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[y>>2]=+g[s>>2]*+g[t>>2]+ +g[w>>2]*+g[x>>2];g[dc>>2]=+g[s>>2]*+g[x>>2]-+g[w>>2]*+g[t>>2];g[Ji>>2]=+g[Bi>>2]+ +g[Ii>>2];g[ga>>2]=+g[y>>2]+ +g[fa>>2];g[ha>>2]=+g[Ji>>2]+ +g[ga>>2];g[Xg>>2]=+g[Ji>>2]-+g[ga>>2];g[Zf>>2]=+g[dc>>2]+ +g[ec>>2];g[_f>>2]=+g[_b>>2]+ +g[$b>>2];g[$f>>2]=+g[Zf>>2]-+g[_f>>2];g[Fg>>2]=+g[_f>>2]+ +g[Zf>>2];g[Zb>>2]=+g[Bi>>2]-+g[Ii>>2];g[ac>>2]=+g[_b>>2]-+g[$b>>2];g[bc>>2]=+g[Zb>>2]+ +g[ac>>2];g[bf>>2]=+g[Zb>>2]-+g[ac>>2];g[cc>>2]=+g[y>>2]-+g[fa>>2];g[fc>>2]=+g[dc>>2]-+g[ec>>2];g[gc>>2]=+g[cc>>2]-+g[fc>>2];g[cf>>2]=+g[cc>>2]+ +g[fc>>2];g[ja>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[ka>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[la>>2]=+g[Ah>>2]*+g[ja>>2]+ +g[Oi>>2]*+g[ka>>2];g[oc>>2]=+g[Ah>>2]*+g[ka>>2]-+g[Oi>>2]*+g[ja>>2];g[ya>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[C>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[D>>2]=+g[xa>>2]*+g[ya>>2]+ +g[B>>2]*+g[C>>2];g[lc>>2]=+g[xa>>2]*+g[C>>2]-+g[B>>2]*+g[ya>>2];g[na>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[pa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[qa>>2]=+g[ma>>2]*+g[na>>2]+ +g[oa>>2]*+g[pa>>2];g[pc>>2]=+g[ma>>2]*+g[pa>>2]-+g[oa>>2]*+g[na>>2];g[ta>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[va>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[wa>>2]=+g[sa>>2]*+g[ta>>2]+ +g[ua>>2]*+g[va>>2];g[kc>>2]=+g[sa>>2]*+g[va>>2]-+g[ua>>2]*+g[ta>>2];g[ra>>2]=+g[la>>2]+ +g[qa>>2];g[E>>2]=+g[wa>>2]+ +g[D>>2];g[F>>2]=+g[ra>>2]+ +g[E>>2];g[bg>>2]=+g[ra>>2]-+g[E>>2];g[cg>>2]=+g[oc>>2]+ +g[pc>>2];g[dg>>2]=+g[kc>>2]+ +g[lc>>2];g[eg>>2]=+g[cg>>2]-+g[dg>>2];g[fh>>2]=+g[cg>>2]+ +g[dg>>2];g[jc>>2]=+g[la>>2]-+g[qa>>2];g[mc>>2]=+g[kc>>2]-+g[lc>>2];g[nc>>2]=+g[jc>>2]+ +g[mc>>2];g[gf>>2]=+g[jc>>2]-+g[mc>>2];g[qc>>2]=+g[oc>>2]-+g[pc>>2];g[Sc>>2]=+g[wa>>2]-+g[D>>2];g[Tc>>2]=+g[qc>>2]-+g[Sc>>2];g[ff>>2]=+g[qc>>2]+ +g[Sc>>2];g[H>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[J>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[K>>2]=+g[G>>2]*+g[H>>2]+ +g[I>>2]*+g[J>>2];g[Vc>>2]=+g[G>>2]*+g[J>>2]-+g[I>>2]*+g[H>>2];g[Ea>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[Ga>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[Ha>>2]=+g[Da>>2]*+g[Ea>>2]+ +g[Fa>>2]*+g[Ga>>2];g[Ad>>2]=+g[Da>>2]*+g[Ga>>2]-+g[Fa>>2]*+g[Ea>>2];g[S>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[U>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[V>>2]=+g[R>>2]*+g[S>>2]+ +g[T>>2]*+g[U>>2];g[Wc>>2]=+g[R>>2]*+g[U>>2]-+g[T>>2]*+g[S>>2];g[Y>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[_>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Aa>>2]=+g[X>>2]*+g[Y>>2]+ +g[Z>>2]*+g[_>>2];g[$c>>2]=+g[X>>2]*+g[_>>2]-+g[Z>>2]*+g[Y>>2];g[W>>2]=+g[K>>2]+ +g[V>>2];g[Ia>>2]=+g[Aa>>2]+ +g[Ha>>2];g[Ja>>2]=+g[W>>2]+ +g[Ia>>2];g[gg>>2]=+g[W>>2]-+g[Ia>>2];g[hg>>2]=+g[Vc>>2]+ +g[Wc>>2];g[ig>>2]=+g[$c>>2]+ +g[Ad>>2];g[jg>>2]=+g[hg>>2]-+g[ig>>2];g[eh>>2]=+g[hg>>2]+ +g[ig>>2];g[Xc>>2]=+g[Vc>>2]-+g[Wc>>2];g[Yc>>2]=+g[Aa>>2]-+g[Ha>>2];g[Zc>>2]=+g[Xc>>2]-+g[Yc>>2];g[le>>2]=+g[Xc>>2]+ +g[Yc>>2];g[_c>>2]=+g[K>>2]-+g[V>>2];g[Bd>>2]=+g[$c>>2]-+g[Ad>>2];g[Cd>>2]=+g[_c>>2]+ +g[Bd>>2];g[ke>>2]=+g[_c>>2]-+g[Bd>>2];g[Hb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Ka>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[La>>2]=+g[Ba>>2]*+g[Hb>>2]+ +g[Ca>>2]*+g[Ka>>2];g[Md>>2]=+g[Ba>>2]*+g[Ka>>2]-+g[Ca>>2]*+g[Hb>>2];g[Na>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[Pa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[Qa>>2]=+g[Ma>>2]*+g[Na>>2]+ +g[Oa>>2]*+g[Pa>>2];g[Nd>>2]=+g[Ma>>2]*+g[Pa>>2]-+g[Oa>>2]*+g[Na>>2];g[Ld>>2]=+g[La>>2]-+g[Qa>>2];g[Od>>2]=+g[Md>>2]-+g[Nd>>2];g[Ta>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[Va>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[Wa>>2]=+g[Sa>>2]*+g[Ta>>2]+ +g[Ua>>2]*+g[Va>>2];g[Qd>>2]=+g[Sa>>2]*+g[Va>>2]-+g[Ua>>2]*+g[Ta>>2];g[Xa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Ya>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Za>>2]=+g[N>>2]*+g[Xa>>2]+ +g[Q>>2]*+g[Ya>>2];g[Rd>>2]=+g[N>>2]*+g[Ya>>2]-+g[Q>>2]*+g[Xa>>2];g[Sd>>2]=+g[Qd>>2]-+g[Rd>>2];g[Td>>2]=+g[Wa>>2]-+g[Za>>2];g[Ra>>2]=+g[La>>2]+ +g[Qa>>2];g[_a>>2]=+g[Wa>>2]+ +g[Za>>2];g[$a>>2]=+g[Ra>>2]+ +g[_a>>2];g[qg>>2]=+g[Ra>>2]-+g[_a>>2];g[vf>>2]=+g[Qd>>2]+ +g[Rd>>2];g[wf>>2]=+g[Md>>2]+ +g[Nd>>2];g[xf>>2]=+g[vf>>2]-+g[wf>>2];g[kh>>2]=+g[wf>>2]+ +g[vf>>2];g[Pd>>2]=+g[Ld>>2]+ +g[Od>>2];g[Ud>>2]=+g[Sd>>2]-+g[Td>>2];g[Vd>>2]=(+g[Pd>>2]-+g[Ud>>2])*.7071067690849304;g[te>>2]=(+g[Pd>>2]+ +g[Ud>>2])*.7071067690849304;g[cd>>2]=+g[Td>>2]+ +g[Sd>>2];g[dd>>2]=+g[Ld>>2]-+g[Od>>2];g[ed>>2]=(+g[cd>>2]-+g[dd>>2])*.7071067690849304;g[qe>>2]=(+g[dd>>2]+ +g[cd>>2])*.7071067690849304;g[zc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Ac>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Bc>>2]=+g[Ib>>2]*+g[zc>>2]+ +g[hf>>2]*+g[Ac>>2];g[sd>>2]=+g[Ib>>2]*+g[Ac>>2]-+g[hf>>2]*+g[zc>>2];g[Dc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Fc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Gc>>2]=+g[Cc>>2]*+g[Dc>>2]+ +g[Ec>>2]*+g[Fc>>2];g[td>>2]=+g[Cc>>2]*+g[Fc>>2]-+g[Ec>>2]*+g[Dc>>2];g[rd>>2]=+g[Bc>>2]-+g[Gc>>2];g[ud>>2]=+g[sd>>2]-+g[td>>2];g[Ic>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[Jc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[Kc>>2]=+g[Si>>2]*+g[Ic>>2]+ +g[Wi>>2]*+g[Jc>>2];g[nd>>2]=+g[Si>>2]*+g[Jc>>2]-+g[Wi>>2]*+g[Ic>>2];g[Lc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Mc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Nc>>2]=+g[Ri>>2]*+g[Lc>>2]+ +g[Vi>>2]*+g[Mc>>2];g[od>>2]=+g[Ri>>2]*+g[Mc>>2]-+g[Vi>>2]*+g[Lc>>2];g[md>>2]=+g[Kc>>2]-+g[Nc>>2];g[pd>>2]=+g[nd>>2]-+g[od>>2];g[Hc>>2]=+g[Bc>>2]+ +g[Gc>>2];g[Oc>>2]=+g[Kc>>2]+ +g[Nc>>2];g[Pc>>2]=+g[Hc>>2]+ +g[Oc>>2];g[If>>2]=+g[Hc>>2]-+g[Oc>>2];g[Bf>>2]=+g[nd>>2]+ +g[od>>2];g[Cf>>2]=+g[sd>>2]+ +g[td>>2];g[Df>>2]=+g[Bf>>2]-+g[Cf>>2];g[ph>>2]=+g[Cf>>2]+ +g[Bf>>2];g[qd>>2]=+g[md>>2]+ +g[pd>>2];g[vd>>2]=+g[rd>>2]-+g[ud>>2];g[wd>>2]=(+g[qd>>2]-+g[vd>>2])*.7071067690849304;g[xe>>2]=(+g[vd>>2]+ +g[qd>>2])*.7071067690849304;g[ce>>2]=+g[rd>>2]+ +g[ud>>2];g[de>>2]=+g[pd>>2]-+g[md>>2];g[ee>>2]=(+g[ce>>2]-+g[de>>2])*.7071067690849304;g[Ae>>2]=(+g[ce>>2]+ +g[de>>2])*.7071067690849304;g[ia>>2]=+g[wi>>2]+ +g[ha>>2];g[ib>>2]=+g[F>>2]+ +g[Ja>>2];g[jb>>2]=+g[ia>>2]+ +g[ib>>2];g[xh>>2]=+g[ia>>2]-+g[ib>>2];g[Eg>>2]=+g[fh>>2]+ +g[eh>>2];g[Lg>>2]=+g[Fg>>2]+ +g[Kg>>2];g[Mg>>2]=+g[Eg>>2]+ +g[Lg>>2];g[Og>>2]=+g[Lg>>2]-+g[Eg>>2];g[ab>>2]=+g[Gb>>2]+ +g[$a>>2];g[Qc>>2]=+g[yc>>2]+ +g[Pc>>2];g[Tb>>2]=+g[ab>>2]+ +g[Qc>>2];g[Ng>>2]=+g[Qc>>2]-+g[ab>>2];g[yh>>2]=+g[oh>>2]+ +g[ph>>2];g[zh>>2]=+g[jh>>2]+ +g[kh>>2];g[Cg>>2]=+g[yh>>2]-+g[zh>>2];g[Dg>>2]=+g[zh>>2]+ +g[yh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[jb>>2]-+g[Tb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Ng>>2]-+g[Og>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[Ng>>2]+ +g[Og>>2];g[c[k>>2]>>2]=+g[jb>>2]+ +g[Tb>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[xh>>2]-+g[Cg>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Dg>>2]-+g[Mg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[Dg>>2]+ +g[Mg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[xh>>2]+ +g[Cg>>2];g[df>>2]=(+g[bf>>2]+ +g[cf>>2])*.7071067690849304;g[ef>>2]=+g[af>>2]-+g[df>>2];g[Ie>>2]=+g[af>>2]+ +g[df>>2];g[Kh>>2]=(+g[bc>>2]-+g[gc>>2])*.7071067690849304;g[Ph>>2]=+g[Kh>>2]+ +g[Oh>>2];g[Vh>>2]=+g[Oh>>2]-+g[Kh>>2];g[je>>2]=+g[ff>>2]*.9238795042037964+ +g[gf>>2]*.3826834261417389;g[me>>2]=+g[ke>>2]*.3826834261417389-+g[le>>2]*.9238795042037964;g[ne>>2]=+g[je>>2]+ +g[me>>2];g[Jh>>2]=+g[je>>2]-+g[me>>2];g[qf>>2]=+g[we>>2]+ +g[xe>>2];g[rf>>2]=+g[ze>>2]+ +g[Ae>>2];g[sf>>2]=+g[qf>>2]*.19509032368659973-+g[rf>>2]*.9807852506637573;g[Wf>>2]=+g[qf>>2]*.9807852506637573+ +g[rf>>2]*.19509032368659973;g[re>>2]=+g[pe>>2]-+g[qe>>2];g[ue>>2]=+g[se>>2]-+g[te>>2];g[ve>>2]=+g[re>>2]*.8314695954322815+ +g[ue>>2]*.5555702447891235;g[Fe>>2]=+g[re>>2]*.5555702447891235-+g[ue>>2]*.8314695954322815;g[jf>>2]=+g[gf>>2]*.9238795042037964-+g[ff>>2]*.3826834261417389;g[kf>>2]=+g[le>>2]*.3826834261417389+ +g[ke>>2]*.9238795042037964;g[lf>>2]=+g[jf>>2]+ +g[kf>>2];g[Uh>>2]=+g[kf>>2]-+g[jf>>2];g[nf>>2]=+g[pe>>2]+ +g[qe>>2];g[of>>2]=+g[se>>2]+ +g[te>>2];g[pf>>2]=+g[nf>>2]*.19509032368659973+ +g[of>>2]*.9807852506637573;g[Vf>>2]=+g[nf>>2]*.9807852506637573-+g[of>>2]*.19509032368659973;g[ye>>2]=+g[we>>2]-+g[xe>>2];g[Be>>2]=+g[ze>>2]-+g[Ae>>2];g[Ce>>2]=+g[ye>>2]*.8314695954322815-+g[Be>>2]*.5555702447891235;g[Ge>>2]=+g[ye>>2]*.5555702447891235+ +g[Be>>2]*.8314695954322815;g[oe>>2]=+g[ef>>2]+ +g[ne>>2];g[De>>2]=+g[ve>>2]+ +g[Ce>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[oe>>2]-+g[De>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[oe>>2]+ +g[De>>2];g[Th>>2]=+g[Ge>>2]-+g[Fe>>2];g[Wh>>2]=+g[Uh>>2]+ +g[Vh>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Th>>2]-+g[Wh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[Th>>2]+ +g[Wh>>2];g[Xh>>2]=+g[Ce>>2]-+g[ve>>2];g[Yh>>2]=+g[Vh>>2]-+g[Uh>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[Xh>>2]-+g[Yh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[Xh>>2]+ +g[Yh>>2];g[Ee>>2]=+g[ef>>2]-+g[ne>>2];g[He>>2]=+g[Fe>>2]+ +g[Ge>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Ee>>2]-+g[He>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Ee>>2]+ +g[He>>2];g[mf>>2]=+g[Ie>>2]-+g[lf>>2];g[Tf>>2]=+g[pf>>2]+ +g[sf>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[mf>>2]-+g[Tf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[mf>>2]+ +g[Tf>>2];g[Ih>>2]=+g[sf>>2]-+g[pf>>2];g[Qh>>2]=+g[Jh>>2]+ +g[Ph>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[Ih>>2]-+g[Qh>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Ih>>2]+ +g[Qh>>2];g[Rh>>2]=+g[Wf>>2]-+g[Vf>>2];g[Sh>>2]=+g[Ph>>2]-+g[Jh>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[Rh>>2]-+g[Sh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Rh>>2]+ +g[Sh>>2];g[Uf>>2]=+g[Ie>>2]+ +g[lf>>2];g[Xf>>2]=+g[Vf>>2]+ +g[Wf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Uf>>2]-+g[Xf>>2];g[c[l>>2]>>2]=+g[Uf>>2]+ +g[Xf>>2];g[dh>>2]=+g[wi>>2]-+g[ha>>2];g[gh>>2]=+g[eh>>2]-+g[fh>>2];g[hh>>2]=+g[dh>>2]-+g[gh>>2];g[th>>2]=+g[dh>>2]+ +g[gh>>2];g[Qg>>2]=+g[F>>2]-+g[Ja>>2];g[Rg>>2]=+g[Kg>>2]-+g[Fg>>2];g[Sg>>2]=+g[Qg>>2]+ +g[Rg>>2];g[Ug>>2]=+g[Rg>>2]-+g[Qg>>2];g[ih>>2]=+g[Gb>>2]-+g[$a>>2];g[lh>>2]=+g[jh>>2]-+g[kh>>2];g[mh>>2]=+g[ih>>2]+ +g[lh>>2];g[uh>>2]=+g[ih>>2]-+g[lh>>2];g[nh>>2]=+g[yc>>2]-+g[Pc>>2];g[qh>>2]=+g[oh>>2]-+g[ph>>2];g[rh>>2]=+g[nh>>2]-+g[qh>>2];g[vh>>2]=+g[nh>>2]+ +g[qh>>2];g[sh>>2]=(+g[mh>>2]+ +g[rh>>2])*.7071067690849304;g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[hh>>2]-+g[sh>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[hh>>2]+ +g[sh>>2];g[Tg>>2]=(+g[vh>>2]-+g[uh>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[Tg>>2]-+g[Ug>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[Tg>>2]+ +g[Ug>>2];g[wh>>2]=(+g[uh>>2]+ +g[vh>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[th>>2]-+g[wh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[th>>2]+ +g[wh>>2];g[Pg>>2]=(+g[rh>>2]-+g[mh>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[Pg>>2]-+g[Sg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Pg>>2]+ +g[Sg>>2];g[ag>>2]=+g[Yf>>2]-+g[$f>>2];g[Zg>>2]=+g[Xg>>2]+ +g[Yg>>2];g[Eh>>2]=+g[Yg>>2]-+g[Xg>>2];g[Qf>>2]=+g[Yf>>2]+ +g[$f>>2];g[fg>>2]=+g[bg>>2]+ +g[eg>>2];g[kg>>2]=+g[gg>>2]-+g[jg>>2];g[lg>>2]=(+g[fg>>2]+ +g[kg>>2])*.7071067690849304;g[Wg>>2]=(+g[fg>>2]-+g[kg>>2])*.7071067690849304;g[xg>>2]=+g[Hf>>2]+ +g[If>>2];g[yg>>2]=+g[Af>>2]+ +g[Df>>2];g[zg>>2]=+g[xg>>2]*.3826834261417389+ +g[yg>>2]*.9238795042037964;g[bh>>2]=+g[yg>>2]*.3826834261417389-+g[xg>>2]*.9238795042037964;g[Rf>>2]=+g[bg>>2]-+g[eg>>2];g[Sf>>2]=+g[gg>>2]+ +g[jg>>2];g[sg>>2]=(+g[Rf>>2]+ +g[Sf>>2])*.7071067690849304;g[Dh>>2]=(+g[Sf>>2]-+g[Rf>>2])*.7071067690849304;g[tf>>2]=+g[pg>>2]-+g[qg>>2];g[yf>>2]=+g[uf>>2]-+g[xf>>2];g[zf>>2]=+g[tf>>2]*.3826834261417389+ +g[yf>>2]*.9238795042037964;g[Nf>>2]=+g[yf>>2]*.3826834261417389-+g[tf>>2]*.9238795042037964;g[ug>>2]=+g[uf>>2]+ +g[xf>>2];g[vg>>2]=+g[pg>>2]+ +g[qg>>2];g[wg>>2]=+g[ug>>2]*.9238795042037964-+g[vg>>2]*.3826834261417389;g[ah>>2]=+g[vg>>2]*.9238795042037964+ +g[ug>>2]*.3826834261417389;g[Ef>>2]=+g[Af>>2]-+g[Df>>2];g[Jf>>2]=+g[Hf>>2]-+g[If>>2];g[Kf>>2]=+g[Ef>>2]*.9238795042037964-+g[Jf>>2]*.3826834261417389;g[Of>>2]=+g[Jf>>2]*.9238795042037964+ +g[Ef>>2]*.3826834261417389;g[mg>>2]=+g[ag>>2]+ +g[lg>>2];g[Lf>>2]=+g[zf>>2]+ +g[Kf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[mg>>2]-+g[Lf>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[mg>>2]+ +g[Lf>>2];g[Ch>>2]=+g[Of>>2]-+g[Nf>>2];g[Fh>>2]=+g[Dh>>2]+ +g[Eh>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[Ch>>2]-+g[Fh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[Ch>>2]+ +g[Fh>>2];g[Gh>>2]=+g[Kf>>2]-+g[zf>>2];g[Hh>>2]=+g[Eh>>2]-+g[Dh>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[Gh>>2]-+g[Hh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[Gh>>2]+ +g[Hh>>2];g[Mf>>2]=+g[ag>>2]-+g[lg>>2];g[Pf>>2]=+g[Nf>>2]+ +g[Of>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Mf>>2]-+g[Pf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Mf>>2]+ +g[Pf>>2];g[tg>>2]=+g[Qf>>2]+ +g[sg>>2];g[Ag>>2]=+g[wg>>2]+ +g[zg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[tg>>2]-+g[Ag>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[tg>>2]+ +g[Ag>>2];g[Vg>>2]=+g[bh>>2]-+g[ah>>2];g[_g>>2]=+g[Wg>>2]+ +g[Zg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[Vg>>2]-+g[_g>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[Vg>>2]+ +g[_g>>2];g[$g>>2]=+g[zg>>2]-+g[wg>>2];g[Bh>>2]=+g[Zg>>2]-+g[Wg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[$g>>2]-+g[Bh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[$g>>2]+ +g[Bh>>2];g[Bg>>2]=+g[Qf>>2]-+g[sg>>2];g[ch>>2]=+g[ah>>2]+ +g[bh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Bg>>2]-+g[ch>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Bg>>2]+ +g[ch>>2];g[hc>>2]=(+g[bc>>2]+ +g[gc>>2])*.7071067690849304;g[ic>>2]=+g[Yb>>2]-+g[hc>>2];g[Me>>2]=+g[Yb>>2]+ +g[hc>>2];g[$h>>2]=(+g[cf>>2]-+g[bf>>2])*.7071067690849304;g[bi>>2]=+g[$h>>2]+ +g[ai>>2];g[hi>>2]=+g[ai>>2]-+g[$h>>2];g[Uc>>2]=+g[nc>>2]*.3826834261417389-+g[Tc>>2]*.9238795042037964;g[Dd>>2]=+g[Zc>>2]*.9238795042037964+ +g[Cd>>2]*.3826834261417389;g[Ed>>2]=+g[Uc>>2]+ +g[Dd>>2];g[_h>>2]=+g[Dd>>2]-+g[Uc>>2];g[Ue>>2]=+g[be>>2]+ +g[ee>>2];g[Ve>>2]=+g[ld>>2]+ +g[wd>>2];g[We>>2]=+g[Ue>>2]*.9807852506637573-+g[Ve>>2]*.19509032368659973;g[_e>>2]=+g[Ve>>2]*.9807852506637573+ +g[Ue>>2]*.19509032368659973;g[Wd>>2]=+g[Kd>>2]-+g[Vd>>2];g[fd>>2]=+g[bd>>2]-+g[ed>>2];g[gd>>2]=+g[Wd>>2]*.8314695954322815-+g[fd>>2]*.5555702447891235;g[Je>>2]=+g[fd>>2]*.8314695954322815+ +g[Wd>>2]*.5555702447891235;g[Ne>>2]=+g[Tc>>2]*.3826834261417389+ +g[nc>>2]*.9238795042037964;g[Oe>>2]=+g[Cd>>2]*.9238795042037964-+g[Zc>>2]*.3826834261417389;g[Pe>>2]=+g[Ne>>2]+ +g[Oe>>2];g[gi>>2]=+g[Ne>>2]-+g[Oe>>2];g[Re>>2]=+g[bd>>2]+ +g[ed>>2];g[Se>>2]=+g[Kd>>2]+ +g[Vd>>2];g[Te>>2]=+g[Re>>2]*.19509032368659973+ +g[Se>>2]*.9807852506637573;g[Ze>>2]=+g[Se>>2]*.19509032368659973-+g[Re>>2]*.9807852506637573;g[xd>>2]=+g[ld>>2]-+g[wd>>2];g[fe>>2]=+g[be>>2]-+g[ee>>2];g[ge>>2]=+g[xd>>2]*.5555702447891235+ +g[fe>>2]*.8314695954322815;g[Ke>>2]=+g[fe>>2]*.5555702447891235-+g[xd>>2]*.8314695954322815;g[Fd>>2]=+g[ic>>2]+ +g[Ed>>2];g[he>>2]=+g[gd>>2]+ +g[ge>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Fd>>2]-+g[he>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Fd>>2]+ +g[he>>2];g[fi>>2]=+g[Ke>>2]-+g[Je>>2];g[ii>>2]=+g[gi>>2]+ +g[hi>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[fi>>2]-+g[ii>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[fi>>2]+ +g[ii>>2];g[ji>>2]=+g[ge>>2]-+g[gd>>2];g[Lh>>2]=+g[hi>>2]-+g[gi>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[ji>>2]-+g[Lh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[ji>>2]+ +g[Lh>>2];g[ie>>2]=+g[ic>>2]-+g[Ed>>2];g[Le>>2]=+g[Je>>2]+ +g[Ke>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[ie>>2]-+g[Le>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[ie>>2]+ +g[Le>>2];g[Qe>>2]=+g[Me>>2]+ +g[Pe>>2];g[Xe>>2]=+g[Te>>2]+ +g[We>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Qe>>2]-+g[Xe>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[Qe>>2]+ +g[Xe>>2];g[Zh>>2]=+g[_e>>2]-+g[Ze>>2];g[ci>>2]=+g[_h>>2]+ +g[bi>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[Zh>>2]-+g[ci>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[Zh>>2]+ +g[ci>>2];g[di>>2]=+g[We>>2]-+g[Te>>2];g[ei>>2]=+g[bi>>2]-+g[_h>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[di>>2]-+g[ei>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[di>>2]+ +g[ei>>2];g[Ye>>2]=+g[Me>>2]-+g[Pe>>2];g[$e>>2]=+g[Ze>>2]+ +g[_e>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Ye>>2]-+g[$e>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Ye>>2]+ +g[$e>>2];c[jj>>2]=(c[jj>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+32;c[n>>2]=c[n>>2]^c[2998]}i=kj;return}function vr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,30,4744);i=b;return}function wr(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0;T=i;i=i+144|0;k=T+140|0;l=T+136|0;m=T+132|0;n=T+128|0;U=T+124|0;o=T+120|0;p=T+116|0;S=T+112|0;r=T+108|0;t=T+104|0;s=T+100|0;u=T+96|0;v=T+92|0;x=T+88|0;q=T+84|0;O=T+80|0;z=T+76|0;N=T+72|0;D=T+68|0;J=T+64|0;G=T+60|0;K=T+56|0;w=T+52|0;y=T+48|0;B=T+44|0;C=T+40|0;E=T+36|0;F=T+32|0;A=T+28|0;H=T+24|0;I=T+20|0;L=T+16|0;M=T+12|0;P=T+8|0;Q=T+4|0;R=T;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[U>>2]=f;c[o>>2]=h;c[p>>2]=j;c[S>>2]=c[U>>2];c[m>>2]=(c[m>>2]|0)+((c[U>>2]|0)-1<<2<<2);while(1){if((c[S>>2]|0)>=(c[o>>2]|0))break;g[r>>2]=+g[c[m>>2]>>2];g[t>>2]=+g[(c[m>>2]|0)+4>>2];g[s>>2]=+g[(c[m>>2]|0)+8>>2];g[u>>2]=+g[(c[m>>2]|0)+12>>2];g[v>>2]=+g[r>>2]*+g[s>>2]+ +g[t>>2]*+g[u>>2];g[x>>2]=+g[r>>2]*+g[u>>2]-+g[t>>2]*+g[s>>2];g[q>>2]=+g[c[k>>2]>>2];g[O>>2]=+g[c[l>>2]>>2];g[w>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[y>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[z>>2]=+g[v>>2]*+g[w>>2]+ +g[x>>2]*+g[y>>2];g[N>>2]=+g[v>>2]*+g[y>>2]-+g[x>>2]*+g[w>>2];g[B>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[C>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[D>>2]=+g[r>>2]*+g[B>>2]+ +g[t>>2]*+g[C>>2];g[J>>2]=+g[r>>2]*+g[C>>2]-+g[t>>2]*+g[B>>2];g[E>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[F>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[G>>2]=+g[s>>2]*+g[E>>2]+ +g[u>>2]*+g[F>>2];g[K>>2]=+g[s>>2]*+g[F>>2]-+g[u>>2]*+g[E>>2];g[A>>2]=+g[q>>2]+ +g[z>>2];g[H>>2]=+g[D>>2]+ +g[G>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[A>>2]-+g[H>>2];g[c[k>>2]>>2]=+g[A>>2]+ +g[H>>2];g[I>>2]=+g[q>>2]-+g[z>>2];g[L>>2]=+g[J>>2]-+g[K>>2];g[c[l>>2]>>2]=+g[I>>2]-+g[L>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[I>>2]+ +g[L>>2];g[M>>2]=+g[J>>2]+ +g[K>>2];g[P>>2]=+g[N>>2]+ +g[O>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[M>>2]-+g[P>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[M>>2]+ +g[P>>2];g[Q>>2]=+g[G>>2]-+g[D>>2];g[R>>2]=+g[O>>2]-+g[N>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Q>>2]-+g[R>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Q>>2]+ +g[R>>2];c[S>>2]=(c[S>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+16}i=T;return}function xr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,31,4792);i=b;return}function yr(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0;pa=i;i=i+256|0;k=pa+252|0;l=pa+248|0;m=pa+244|0;n=pa+240|0;qa=pa+236|0;o=pa+232|0;p=pa+228|0;oa=pa+208|0;r=pa+204|0;t=pa+200|0;w=pa+196|0;y=pa+192|0;R=pa+188|0;$=pa+184|0;V=pa+180|0;Z=pa+176|0;x=pa+172|0;U=pa+168|0;Q=pa+164|0;T=pa+160|0;q=pa+156|0;H=pa+152|0;ma=pa+148|0;A=pa+144|0;F=pa+140|0;E=pa+136|0;I=pa+132|0;J=pa+128|0;K=pa+124|0;Y=pa+120|0;fa=pa+116|0;ga=pa+112|0;v=pa+108|0;ka=pa+104|0;ea=pa+100|0;z=pa+96|0;X=pa+92|0;la=pa+88|0;ba=pa+84|0;na=pa+80|0;s=pa+76|0;u=pa+72|0;ca=pa+68|0;da=pa+64|0;S=pa+60|0;W=pa+56|0;_=pa+52|0;aa=pa+48|0;B=pa+44|0;D=pa+40|0;ja=pa+36|0;C=pa+32|0;ha=pa+28|0;ia=pa+24|0;G=pa+20|0;O=pa+16|0;N=pa+12|0;P=pa+8|0;L=pa+4|0;M=pa;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[qa>>2]=f;c[o>>2]=h;c[p>>2]=j;g[pa+224>>2]=.25;g[pa+220>>2]=.55901700258255;g[pa+216>>2]=.5877852439880371;g[pa+212>>2]=.9510565400123596;c[oa>>2]=c[qa>>2];c[m>>2]=(c[m>>2]|0)+((c[qa>>2]|0)-1<<2<<2);while(1){if((c[oa>>2]|0)>=(c[o>>2]|0))break;g[r>>2]=+g[c[m>>2]>>2];g[t>>2]=+g[(c[m>>2]|0)+4>>2];g[w>>2]=+g[(c[m>>2]|0)+8>>2];g[y>>2]=+g[(c[m>>2]|0)+12>>2];g[x>>2]=+g[r>>2]*+g[w>>2];g[U>>2]=+g[t>>2]*+g[w>>2];g[Q>>2]=+g[t>>2]*+g[y>>2];g[T>>2]=+g[r>>2]*+g[y>>2];g[R>>2]=+g[x>>2]-+g[Q>>2];g[$>>2]=+g[T>>2]-+g[U>>2];g[V>>2]=+g[T>>2]+ +g[U>>2];g[Z>>2]=+g[x>>2]+ +g[Q>>2];g[q>>2]=+g[c[k>>2]>>2];g[H>>2]=+g[c[l>>2]>>2];g[s>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[u>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[v>>2]=+g[r>>2]*+g[s>>2]+ +g[t>>2]*+g[u>>2];g[ka>>2]=+g[r>>2]*+g[u>>2]-+g[t>>2]*+g[s>>2];g[ca>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[da>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ea>>2]=+g[w>>2]*+g[ca>>2]+ +g[y>>2]*+g[da>>2];g[z>>2]=+g[w>>2]*+g[da>>2]-+g[y>>2]*+g[ca>>2];g[S>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[W>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[X>>2]=+g[R>>2]*+g[S>>2]+ +g[V>>2]*+g[W>>2];g[la>>2]=+g[R>>2]*+g[W>>2]-+g[V>>2]*+g[S>>2];g[_>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[aa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[ba>>2]=+g[Z>>2]*+g[_>>2]+ +g[$>>2]*+g[aa>>2];g[na>>2]=+g[Z>>2]*+g[aa>>2]-+g[$>>2]*+g[_>>2];g[ma>>2]=+g[ka>>2]-+g[la>>2];g[A>>2]=+g[na>>2]-+g[z>>2];g[F>>2]=+g[ba>>2]-+g[ea>>2];g[E>>2]=+g[X>>2]-+g[v>>2];g[I>>2]=+g[ka>>2]+ +g[la>>2];g[J>>2]=+g[na>>2]+ +g[z>>2];g[K>>2]=+g[I>>2]+ +g[J>>2];g[Y>>2]=+g[v>>2]+ +g[X>>2];g[fa>>2]=+g[ba>>2]+ +g[ea>>2];g[ga>>2]=+g[Y>>2]+ +g[fa>>2];g[c[k>>2]>>2]=+g[q>>2]+ +g[ga>>2];g[B>>2]=+g[ma>>2]*.9510565400123596+ +g[A>>2]*.5877852439880371;g[D>>2]=+g[A>>2]*.9510565400123596-+g[ma>>2]*.5877852439880371;g[ha>>2]=(+g[Y>>2]-+g[fa>>2])*.55901700258255;g[ia>>2]=+g[q>>2]-+g[ga>>2]*.25;g[ja>>2]=+g[ha>>2]+ +g[ia>>2];g[C>>2]=+g[ia>>2]-+g[ha>>2];g[c[l>>2]>>2]=+g[ja>>2]-+g[B>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[C>>2]+ +g[D>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[ja>>2]+ +g[B>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[C>>2]-+g[D>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[K>>2]+ +g[H>>2];g[G>>2]=+g[E>>2]*.5877852439880371+ +g[F>>2]*.9510565400123596;g[O>>2]=+g[E>>2]*.9510565400123596-+g[F>>2]*.5877852439880371;g[L>>2]=+g[H>>2]-+g[K>>2]*.25;g[M>>2]=(+g[I>>2]-+g[J>>2])*.55901700258255;g[N>>2]=+g[L>>2]-+g[M>>2];g[P>>2]=+g[M>>2]+ +g[L>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[G>>2]-+g[N>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[O>>2]+ +g[P>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[G>>2]+ +g[N>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[O>>2]-+g[P>>2];c[oa>>2]=(c[oa>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+16}i=pa;return}function zr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,32,4840);i=b;return}function Ar(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0;Va=i;i=i+384|0;k=Va+368|0;l=Va+364|0;m=Va+360|0;n=Va+356|0;Wa=Va+352|0;o=Va+348|0;p=Va+344|0;Ua=Va+336|0;O=Va+332|0;R=Va+328|0;P=Va+324|0;S=Va+320|0;U=Va+316|0;wa=Va+312|0;Aa=Va+308|0;Ca=Va+304|0;Fa=Va+300|0;Ga=Va+296|0;Ha=Va+292|0;Ta=Va+288|0;Ja=Va+284|0;Ra=Va+280|0;Q=Va+276|0;va=Va+272|0;T=Va+268|0;ua=Va+264|0;za=Va+260|0;K=Va+256|0;fa=Va+252|0;E=Va+248|0;da=Va+244|0;x=Va+240|0;pa=Va+236|0;sa=Va+232|0;Ma=Va+228|0;J=Va+224|0;ia=Va+220|0;B=Va+216|0;Y=Va+212|0;y=Va+208|0;ka=Va+204|0;na=Va+200|0;q=Va+196|0;D=Va+192|0;ya=Va+188|0;C=Va+184|0;V=Va+180|0;xa=Va+176|0;$=Va+172|0;qa=Va+168|0;ca=Va+164|0;ra=Va+160|0;Z=Va+156|0;_=Va+152|0;aa=Va+148|0;ba=Va+144|0;Ea=Va+140|0;ga=Va+136|0;La=Va+132|0;ha=Va+128|0;Ba=Va+124|0;Da=Va+120|0;Ia=Va+116|0;Ka=Va+112|0;Qa=Va+108|0;la=Va+104|0;X=Va+100|0;ma=Va+96|0;Oa=Va+92|0;Pa=Va+88|0;Sa=Va+84|0;W=Va+80|0;Na=Va+76|0;ea=Va+72|0;G=Va+68|0;H=Va+64|0;s=Va+60|0;N=Va+56|0;v=Va+52|0;M=Va+48|0;t=Va+44|0;u=Va+40|0;A=Va+36|0;F=Va+32|0;w=Va+28|0;z=Va+24|0;ja=Va+20|0;L=Va+16|0;r=Va+12|0;I=Va+8|0;oa=Va+4|0;ta=Va;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Wa>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Va+340>>2]=.7071067690849304;c[Ua>>2]=c[Wa>>2];c[m>>2]=(c[m>>2]|0)+(((c[Wa>>2]|0)-1|0)*6<<2);while(1){if((c[Ua>>2]|0)>=(c[o>>2]|0))break;g[O>>2]=+g[c[m>>2]>>2];g[R>>2]=+g[(c[m>>2]|0)+4>>2];g[P>>2]=+g[(c[m>>2]|0)+8>>2];g[S>>2]=+g[(c[m>>2]|0)+12>>2];g[Q>>2]=+g[O>>2]*+g[P>>2];g[va>>2]=+g[R>>2]*+g[P>>2];g[T>>2]=+g[R>>2]*+g[S>>2];g[ua>>2]=+g[O>>2]*+g[S>>2];g[U>>2]=+g[Q>>2]-+g[T>>2];g[wa>>2]=+g[ua>>2]+ +g[va>>2];g[Aa>>2]=+g[Q>>2]+ +g[T>>2];g[Ca>>2]=+g[ua>>2]-+g[va>>2];g[Fa>>2]=+g[(c[m>>2]|0)+16>>2];g[Ga>>2]=+g[(c[m>>2]|0)+20>>2];g[Ha>>2]=+g[O>>2]*+g[Fa>>2]+ +g[R>>2]*+g[Ga>>2];g[Ta>>2]=+g[Aa>>2]*+g[Ga>>2]-+g[Ca>>2]*+g[Fa>>2];g[Ja>>2]=+g[O>>2]*+g[Ga>>2]-+g[R>>2]*+g[Fa>>2];g[Ra>>2]=+g[Aa>>2]*+g[Fa>>2]+ +g[Ca>>2]*+g[Ga>>2];g[q>>2]=+g[c[k>>2]>>2];g[D>>2]=+g[c[l>>2]>>2];g[V>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[xa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ya>>2]=+g[U>>2]*+g[V>>2]+ +g[wa>>2]*+g[xa>>2];g[C>>2]=+g[U>>2]*+g[xa>>2]-+g[wa>>2]*+g[V>>2];g[za>>2]=+g[q>>2]+ +g[ya>>2];g[K>>2]=+g[D>>2]-+g[C>>2];g[fa>>2]=+g[q>>2]-+g[ya>>2];g[E>>2]=+g[C>>2]+ +g[D>>2];g[Z>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[_>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[$>>2]=+g[Fa>>2]*+g[Z>>2]+ +g[Ga>>2]*+g[_>>2];g[qa>>2]=+g[Fa>>2]*+g[_>>2]-+g[Ga>>2]*+g[Z>>2];g[aa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ba>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ca>>2]=+g[P>>2]*+g[aa>>2]+ +g[S>>2]*+g[ba>>2];g[ra>>2]=+g[P>>2]*+g[ba>>2]-+g[S>>2]*+g[aa>>2];g[da>>2]=+g[$>>2]+ +g[ca>>2];g[x>>2]=+g[qa>>2]+ +g[ra>>2];g[pa>>2]=+g[$>>2]-+g[ca>>2];g[sa>>2]=+g[qa>>2]-+g[ra>>2];g[Ba>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Da>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Ea>>2]=+g[Aa>>2]*+g[Ba>>2]+ +g[Ca>>2]*+g[Da>>2];g[ga>>2]=+g[Aa>>2]*+g[Da>>2]-+g[Ca>>2]*+g[Ba>>2];g[Ia>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Ka>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[La>>2]=+g[Ha>>2]*+g[Ia>>2]+ +g[Ja>>2]*+g[Ka>>2];g[ha>>2]=+g[Ha>>2]*+g[Ka>>2]-+g[Ja>>2]*+g[Ia>>2];g[Ma>>2]=+g[Ea>>2]+ +g[La>>2];g[J>>2]=+g[Ea>>2]-+g[La>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2];g[B>>2]=+g[ga>>2]+ +g[ha>>2];g[Oa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[Pa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Qa>>2]=+g[O>>2]*+g[Oa>>2]+ +g[R>>2]*+g[Pa>>2];g[la>>2]=+g[O>>2]*+g[Pa>>2]-+g[R>>2]*+g[Oa>>2];g[Sa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[W>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[X>>2]=+g[Ra>>2]*+g[Sa>>2]+ +g[Ta>>2]*+g[W>>2];g[ma>>2]=+g[Ra>>2]*+g[W>>2]-+g[Ta>>2]*+g[Sa>>2];g[Y>>2]=+g[Qa>>2]+ +g[X>>2];g[y>>2]=+g[la>>2]+ +g[ma>>2];g[ka>>2]=+g[Qa>>2]-+g[X>>2];g[na>>2]=+g[la>>2]-+g[ma>>2];g[Na>>2]=+g[za>>2]+ +g[Ma>>2];g[ea>>2]=+g[Y>>2]+ +g[da>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Na>>2]-+g[ea>>2];g[c[k>>2]>>2]=+g[Na>>2]+ +g[ea>>2];g[G>>2]=+g[da>>2]-+g[Y>>2];g[H>>2]=+g[E>>2]-+g[B>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[G>>2]-+g[H>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[G>>2]+ +g[H>>2];g[s>>2]=+g[fa>>2]-+g[ia>>2];g[N>>2]=+g[K>>2]-+g[J>>2];g[t>>2]=+g[ka>>2]-+g[na>>2];g[u>>2]=+g[pa>>2]+ +g[sa>>2];g[v>>2]=(+g[t>>2]+ +g[u>>2])*.7071067690849304;g[M>>2]=(+g[u>>2]-+g[t>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[s>>2]-+g[v>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[M>>2]+ +g[N>>2];g[c[l>>2]>>2]=+g[s>>2]+ +g[v>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[M>>2]-+g[N>>2];g[A>>2]=+g[y>>2]+ +g[x>>2];g[F>>2]=+g[B>>2]+ +g[E>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[A>>2]-+g[F>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[A>>2]+ +g[F>>2];g[w>>2]=+g[za>>2]-+g[Ma>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[w>>2]-+g[z>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[w>>2]+ +g[z>>2];g[ja>>2]=+g[fa>>2]+ +g[ia>>2];g[L>>2]=+g[J>>2]+ +g[K>>2];g[oa>>2]=+g[ka>>2]+ +g[na>>2];g[ta>>2]=+g[pa>>2]-+g[sa>>2];g[r>>2]=(+g[oa>>2]+ +g[ta>>2])*.7071067690849304;g[I>>2]=(+g[ta>>2]-+g[oa>>2])*.7071067690849304;g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ja>>2]-+g[r>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[I>>2]+ +g[L>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[ja>>2]+ +g[r>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[I>>2]-+g[L>>2];c[Ua>>2]=(c[Ua>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+24}i=Va;return}function Br(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,33,4888);i=b;return}function Cr(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0;Db=i;i=i+528|0;k=Db+524|0;l=Db+520|0;m=Db+516|0;n=Db+512|0;Eb=Db+508|0;o=Db+504|0;p=Db+500|0;Cb=Db+480|0;Ba=Db+476|0;S=Db+472|0;Xa=Db+468|0;D=Db+464|0;Ja=Db+460|0;Ua=Db+456|0;Va=Db+452|0;ka=Db+448|0;la=Db+444|0;Q=Db+440|0;$a=Db+436|0;ab=Db+432|0;bb=Db+428|0;$=Db+424|0;ca=Db+420|0;ta=Db+416|0;kb=Db+412|0;vb=Db+408|0;wb=Db+404|0;na=Db+400|0;oa=Db+396|0;P=Db+392|0;Ya=Db+388|0;Za=Db+384|0;_a=Db+380|0;x=Db+376|0;A=Db+372|0;ua=Db+368|0;q=Db+364|0;B=Db+360|0;Aa=Db+356|0;C=Db+352|0;xa=Db+348|0;za=Db+344|0;wa=Db+340|0;ya=Db+336|0;Bb=Db+332|0;Z=Db+328|0;Ta=Db+324|0;aa=Db+320|0;Ia=Db+316|0;_=Db+312|0;Oa=Db+308|0;ba=Db+304|0;yb=Db+300|0;Ab=Db+296|0;xb=Db+292|0;zb=Db+288|0;Qa=Db+284|0;Sa=Db+280|0;Pa=Db+276|0;Ra=Db+272|0;Fa=Db+268|0;Ha=Db+264|0;Ea=Db+260|0;Ga=Db+256|0;La=Db+252|0;Na=Db+248|0;Ka=Db+244|0;Ma=Db+240|0;eb=Db+236|0;v=Db+232|0;ub=Db+228|0;z=Db+224|0;jb=Db+220|0;w=Db+216|0;pb=Db+212|0;y=Db+208|0;Da=Db+204|0;db=Db+200|0;Ca=Db+196|0;cb=Db+192|0;rb=Db+188|0;tb=Db+184|0;qb=Db+180|0;sb=Db+176|0;gb=Db+172|0;ib=Db+168|0;fb=Db+164|0;hb=Db+160|0;mb=Db+156|0;ob=Db+152|0;lb=Db+148|0;nb=Db+144|0;s=Db+140|0;Wa=Db+136|0;t=Db+132|0;ea=Db+128|0;ga=Db+124|0;Y=Db+120|0;da=Db+116|0;fa=Db+112|0;u=Db+108|0;ia=Db+104|0;r=Db+100|0;ha=Db+96|0;qa=Db+92|0;sa=Db+88|0;ma=Db+84|0;pa=Db+80|0;ra=Db+76|0;ja=Db+72|0;I=Db+68|0;va=Db+64|0;H=Db+60|0;G=Db+56|0;K=Db+52|0;E=Db+48|0;F=Db+44|0;L=Db+40|0;J=Db+36|0;R=Db+32|0;T=Db+28|0;U=Db+24|0;O=Db+20|0;W=Db+16|0;M=Db+12|0;N=Db+8|0;X=Db+4|0;V=Db;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Eb>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Db+496>>2]=.5877852439880371;g[Db+492>>2]=.9510565400123596;g[Db+488>>2]=.25;g[Db+484>>2]=.55901700258255;c[Cb>>2]=c[Eb>>2];c[m>>2]=(c[m>>2]|0)+(((c[Eb>>2]|0)-1|0)*18<<2);while(1){if((c[Cb>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[B>>2]=+g[c[l>>2]>>2];g[xa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[za>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[wa>>2]=+g[(c[m>>2]|0)+32>>2];g[ya>>2]=+g[(c[m>>2]|0)+36>>2];g[Aa>>2]=+g[wa>>2]*+g[xa>>2]+ +g[ya>>2]*+g[za>>2];g[C>>2]=+g[wa>>2]*+g[za>>2]-+g[ya>>2]*+g[xa>>2];g[Ba>>2]=+g[q>>2]-+g[Aa>>2];g[S>>2]=+g[C>>2]+ +g[B>>2];g[Xa>>2]=+g[q>>2]+ +g[Aa>>2];g[D>>2]=+g[B>>2]-+g[C>>2];g[yb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Ab>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[xb>>2]=+g[(c[m>>2]|0)+24>>2];g[zb>>2]=+g[(c[m>>2]|0)+28>>2];g[Bb>>2]=+g[xb>>2]*+g[yb>>2]+ +g[zb>>2]*+g[Ab>>2];g[Z>>2]=+g[xb>>2]*+g[Ab>>2]-+g[zb>>2]*+g[yb>>2];g[Qa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[Sa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Pa>>2]=+g[c[m>>2]>>2];g[Ra>>2]=+g[(c[m>>2]|0)+4>>2];g[Ta>>2]=+g[Pa>>2]*+g[Qa>>2]+ +g[Ra>>2]*+g[Sa>>2];g[aa>>2]=+g[Pa>>2]*+g[Sa>>2]-+g[Ra>>2]*+g[Qa>>2];g[Fa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Ha>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Ea>>2]=+g[(c[m>>2]|0)+64>>2];g[Ga>>2]=+g[(c[m>>2]|0)+68>>2];g[Ia>>2]=+g[Ea>>2]*+g[Fa>>2]+ +g[Ga>>2]*+g[Ha>>2];g[_>>2]=+g[Ea>>2]*+g[Ha>>2]-+g[Ga>>2]*+g[Fa>>2];g[La>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Na>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Ka>>2]=+g[(c[m>>2]|0)+40>>2];g[Ma>>2]=+g[(c[m>>2]|0)+44>>2];g[Oa>>2]=+g[Ka>>2]*+g[La>>2]+ +g[Ma>>2]*+g[Na>>2];g[ba>>2]=+g[Ka>>2]*+g[Na>>2]-+g[Ma>>2]*+g[La>>2];g[Ja>>2]=+g[Bb>>2]-+g[Ia>>2];g[Ua>>2]=+g[Oa>>2]-+g[Ta>>2];g[Va>>2]=+g[Ja>>2]+ +g[Ua>>2];g[ka>>2]=+g[Z>>2]+ +g[_>>2];g[la>>2]=+g[ba>>2]+ +g[aa>>2];g[Q>>2]=+g[ka>>2]+ +g[la>>2];g[$a>>2]=+g[Bb>>2]+ +g[Ia>>2];g[ab>>2]=+g[Oa>>2]+ +g[Ta>>2];g[bb>>2]=+g[$a>>2]+ +g[ab>>2];g[$>>2]=+g[Z>>2]-+g[_>>2];g[ca>>2]=+g[aa>>2]-+g[ba>>2];g[ta>>2]=+g[ca>>2]-+g[$>>2];g[Da>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[db>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Ca>>2]=+g[(c[m>>2]|0)+8>>2];g[cb>>2]=+g[(c[m>>2]|0)+12>>2];g[eb>>2]=+g[Ca>>2]*+g[Da>>2]+ +g[cb>>2]*+g[db>>2];g[v>>2]=+g[Ca>>2]*+g[db>>2]-+g[cb>>2]*+g[Da>>2];g[rb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[tb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[qb>>2]=+g[(c[m>>2]|0)+16>>2];g[sb>>2]=+g[(c[m>>2]|0)+20>>2];g[ub>>2]=+g[qb>>2]*+g[rb>>2]+ +g[sb>>2]*+g[tb>>2];g[z>>2]=+g[qb>>2]*+g[tb>>2]-+g[sb>>2]*+g[rb>>2];g[gb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[ib>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[fb>>2]=+g[(c[m>>2]|0)+48>>2];g[hb>>2]=+g[(c[m>>2]|0)+52>>2];g[jb>>2]=+g[fb>>2]*+g[gb>>2]+ +g[hb>>2]*+g[ib>>2];g[w>>2]=+g[fb>>2]*+g[ib>>2]-+g[hb>>2]*+g[gb>>2];g[mb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[ob>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[lb>>2]=+g[(c[m>>2]|0)+56>>2];g[nb>>2]=+g[(c[m>>2]|0)+60>>2];g[pb>>2]=+g[lb>>2]*+g[mb>>2]+ +g[nb>>2]*+g[ob>>2];g[y>>2]=+g[lb>>2]*+g[ob>>2]-+g[nb>>2]*+g[mb>>2];g[kb>>2]=+g[eb>>2]-+g[jb>>2];g[vb>>2]=+g[pb>>2]-+g[ub>>2];g[wb>>2]=+g[kb>>2]+ +g[vb>>2];g[na>>2]=+g[v>>2]+ +g[w>>2];g[oa>>2]=+g[y>>2]+ +g[z>>2];g[P>>2]=+g[na>>2]+ +g[oa>>2];g[Ya>>2]=+g[eb>>2]+ +g[jb>>2];g[Za>>2]=+g[pb>>2]+ +g[ub>>2];g[_a>>2]=+g[Ya>>2]+ +g[Za>>2];g[x>>2]=+g[v>>2]-+g[w>>2];g[A>>2]=+g[y>>2]-+g[z>>2];g[ua>>2]=+g[x>>2]+ +g[A>>2];g[s>>2]=(+g[wb>>2]-+g[Va>>2])*.55901700258255;g[Wa>>2]=+g[wb>>2]+ +g[Va>>2];g[t>>2]=+g[Ba>>2]-+g[Wa>>2]*.25;g[Y>>2]=+g[x>>2]-+g[A>>2];g[da>>2]=+g[$>>2]+ +g[ca>>2];g[ea>>2]=+g[Y>>2]*.9510565400123596+ +g[da>>2]*.5877852439880371;g[ga>>2]=+g[da>>2]*.9510565400123596-+g[Y>>2]*.5877852439880371;g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Ba>>2]+ +g[Wa>>2];g[fa>>2]=+g[t>>2]-+g[s>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[fa>>2]-+g[ga>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[fa>>2]+ +g[ga>>2];g[u>>2]=+g[s>>2]+ +g[t>>2];g[c[l>>2]>>2]=+g[u>>2]-+g[ea>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[u>>2]+ +g[ea>>2];g[ia>>2]=(+g[_a>>2]-+g[bb>>2])*.55901700258255;g[r>>2]=+g[_a>>2]+ +g[bb>>2];g[ha>>2]=+g[Xa>>2]-+g[r>>2]*.25;g[ma>>2]=+g[ka>>2]-+g[la>>2];g[pa>>2]=+g[na>>2]-+g[oa>>2];g[qa>>2]=+g[ma>>2]*.9510565400123596-+g[pa>>2]*.5877852439880371;g[sa>>2]=+g[pa>>2]*.9510565400123596+ +g[ma>>2]*.5877852439880371;g[c[k>>2]>>2]=+g[Xa>>2]+ +g[r>>2];g[ra>>2]=+g[ia>>2]+ +g[ha>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[ra>>2]-+g[sa>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ra>>2]+ +g[sa>>2];g[ja>>2]=+g[ha>>2]-+g[ia>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ja>>2]-+g[qa>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[ja>>2]+ +g[qa>>2];g[I>>2]=(+g[ua>>2]+ +g[ta>>2])*.55901700258255;g[va>>2]=+g[ta>>2]-+g[ua>>2];g[H>>2]=+g[va>>2]*.25+ +g[D>>2];g[E>>2]=+g[kb>>2]-+g[vb>>2];g[F>>2]=+g[Ua>>2]-+g[Ja>>2];g[G>>2]=+g[E>>2]*.5877852439880371+ +g[F>>2]*.9510565400123596;g[K>>2]=+g[F>>2]*.5877852439880371-+g[E>>2]*.9510565400123596;g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[va>>2]-+g[D>>2];g[L>>2]=+g[I>>2]+ +g[H>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[K>>2]-+g[L>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[K>>2]+ +g[L>>2];g[J>>2]=+g[H>>2]-+g[I>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[G>>2]-+g[J>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[G>>2]+ +g[J>>2];g[R>>2]=(+g[P>>2]-+g[Q>>2])*.55901700258255;g[T>>2]=+g[P>>2]+ +g[Q>>2];g[U>>2]=+g[S>>2]-+g[T>>2]*.25;g[M>>2]=+g[Ya>>2]-+g[Za>>2];g[N>>2]=+g[$a>>2]-+g[ab>>2];g[O>>2]=+g[M>>2]*.9510565400123596+ +g[N>>2]*.5877852439880371;g[W>>2]=+g[N>>2]*.9510565400123596-+g[M>>2]*.5877852439880371;g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[T>>2]+ +g[S>>2];g[X>>2]=+g[U>>2]-+g[R>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[W>>2]-+g[X>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[W>>2]+ +g[X>>2];g[V>>2]=+g[R>>2]+ +g[U>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[O>>2]-+g[V>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[O>>2]+ +g[V>>2];c[Cb>>2]=(c[Cb>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+72;c[n>>2]=c[n>>2]^c[2998]}i=Db;return}function Dr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,34,4936);i=b;return}function Er(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0;Xb=i;i=i+608|0;k=Xb+596|0;l=Xb+592|0;m=Xb+588|0;n=Xb+584|0;Yb=Xb+580|0;o=Xb+576|0;p=Xb+572|0;Wb=Xb+560|0;q=Xb+556|0;X=Xb+552|0;z=Xb+548|0;Da=Xb+544|0;yb=Xb+540|0;w=Xb+536|0;W=Xb+532|0;Ca=Xb+528|0;nb=Xb+524|0;F=Xb+520|0;na=Xb+516|0;E=Xb+512|0;t=Xb+508|0;ka=Xb+504|0;G=Xb+500|0;H=Xb+496|0;Eb=Xb+492|0;T=Xb+488|0;ca=Xb+484|0;Ga=Xb+480|0;Pb=Xb+476|0;$=Xb+472|0;U=Xb+468|0;Fa=Xb+464|0;Ya=Xb+460|0;xa=Xb+456|0;ia=Xb+452|0;C=Xb+448|0;hb=Xb+444|0;fa=Xb+440|0;ya=Xb+436|0;B=Xb+432|0;Ua=Xb+428|0;x=Xb+424|0;xb=Xb+420|0;y=Xb+416|0;Ra=Xb+412|0;Ta=Xb+408|0;za=Xb+404|0;Sa=Xb+400|0;Wa=Xb+396|0;wb=Xb+392|0;Va=Xb+388|0;Xa=Xb+384|0;s=Xb+380|0;ma=Xb+376|0;sb=Xb+372|0;la=Xb+368|0;kb=Xb+364|0;mb=Xb+360|0;jb=Xb+356|0;lb=Xb+352|0;ub=Xb+348|0;r=Xb+344|0;tb=Xb+340|0;vb=Xb+336|0;pb=Xb+332|0;rb=Xb+328|0;ob=Xb+324|0;qb=Xb+320|0;Ob=Xb+316|0;ba=Xb+312|0;Jb=Xb+308|0;aa=Xb+304|0;Bb=Xb+300|0;Db=Xb+296|0;Ab=Xb+292|0;Cb=Xb+288|0;Lb=Xb+284|0;Nb=Xb+280|0;Kb=Xb+276|0;Mb=Xb+272|0;Gb=Xb+268|0;Ib=Xb+264|0;Fb=Xb+260|0;Hb=Xb+256|0;gb=Xb+252|0;ha=Xb+248|0;bb=Xb+244|0;ga=Xb+240|0;Tb=Xb+236|0;Vb=Xb+232|0;Sb=Xb+228|0;Ub=Xb+224|0;db=Xb+220|0;fb=Xb+216|0;cb=Xb+212|0;eb=Xb+208|0;_a=Xb+204|0;ab=Xb+200|0;Za=Xb+196|0;$a=Xb+192|0;Rb=Xb+188|0;O=Xb+184|0;Z=Xb+180|0;Aa=Xb+176|0;v=Xb+172|0;_=Xb+168|0;R=Xb+164|0;S=Xb+160|0;zb=Xb+156|0;Qb=Xb+152|0;V=Xb+148|0;Y=Xb+144|0;ib=Xb+140|0;u=Xb+136|0;P=Xb+132|0;Q=Xb+128|0;ea=Xb+124|0;wa=Xb+120|0;Ia=Xb+116|0;Ka=Xb+112|0;pa=Xb+108|0;Ba=Xb+104|0;J=Xb+100|0;Ja=Xb+96|0;A=Xb+92|0;da=Xb+88|0;Ea=Xb+84|0;Ha=Xb+80|0;ja=Xb+76|0;oa=Xb+72|0;D=Xb+68|0;I=Xb+64|0;sa=Xb+60|0;K=Xb+56|0;Oa=Xb+52|0;Qa=Xb+48|0;va=Xb+44|0;La=Xb+40|0;N=Xb+36|0;Pa=Xb+32|0;qa=Xb+28|0;ra=Xb+24|0;Ma=Xb+20|0;Na=Xb+16|0;ta=Xb+12|0;ua=Xb+8|0;L=Xb+4|0;M=Xb;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Yb>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Xb+568>>2]=.5;g[Xb+564>>2]=.8660253882408142;c[Wb>>2]=c[Yb>>2];c[m>>2]=(c[m>>2]|0)+(((c[Yb>>2]|0)-1|0)*22<<2);while(1){if((c[Wb>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[X>>2]=+g[c[l>>2]>>2];g[Ra>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Ta>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[za>>2]=+g[(c[m>>2]|0)+24>>2];g[Sa>>2]=+g[(c[m>>2]|0)+28>>2];g[Ua>>2]=+g[za>>2]*+g[Ra>>2]+ +g[Sa>>2]*+g[Ta>>2];g[x>>2]=+g[za>>2]*+g[Ta>>2]-+g[Sa>>2]*+g[Ra>>2];g[Wa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[wb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Va>>2]=+g[(c[m>>2]|0)+56>>2];g[Xa>>2]=+g[(c[m>>2]|0)+60>>2];g[xb>>2]=+g[Va>>2]*+g[Wa>>2]+ +g[Xa>>2]*+g[wb>>2];g[y>>2]=+g[Va>>2]*+g[wb>>2]-+g[Xa>>2]*+g[Wa>>2];g[z>>2]=(+g[x>>2]-+g[y>>2])*.8660253882408142;g[Da>>2]=(+g[xb>>2]-+g[Ua>>2])*.8660253882408142;g[yb>>2]=+g[Ua>>2]+ +g[xb>>2];g[w>>2]=+g[q>>2]-+g[yb>>2]*.5;g[W>>2]=+g[x>>2]+ +g[y>>2];g[Ca>>2]=+g[X>>2]-+g[W>>2]*.5;g[kb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[mb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[jb>>2]=+g[(c[m>>2]|0)+64>>2];g[lb>>2]=+g[(c[m>>2]|0)+68>>2];g[nb>>2]=+g[jb>>2]*+g[kb>>2]+ +g[lb>>2]*+g[mb>>2];g[F>>2]=+g[jb>>2]*+g[mb>>2]-+g[lb>>2]*+g[kb>>2];g[ub>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[r>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[tb>>2]=+g[(c[m>>2]|0)+32>>2];g[vb>>2]=+g[(c[m>>2]|0)+36>>2];g[s>>2]=+g[tb>>2]*+g[ub>>2]+ +g[vb>>2]*+g[r>>2];g[ma>>2]=+g[tb>>2]*+g[r>>2]-+g[vb>>2]*+g[ub>>2];g[pb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[rb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[ob>>2]=+g[c[m>>2]>>2];g[qb>>2]=+g[(c[m>>2]|0)+4>>2];g[sb>>2]=+g[ob>>2]*+g[pb>>2]+ +g[qb>>2]*+g[rb>>2];g[la>>2]=+g[ob>>2]*+g[rb>>2]-+g[qb>>2]*+g[pb>>2];g[na>>2]=(+g[la>>2]-+g[ma>>2])*.8660253882408142;g[E>>2]=(+g[s>>2]-+g[sb>>2])*.8660253882408142;g[t>>2]=+g[sb>>2]+ +g[s>>2];g[ka>>2]=+g[nb>>2]-+g[t>>2]*.5;g[G>>2]=+g[la>>2]+ +g[ma>>2];g[H>>2]=+g[F>>2]-+g[G>>2]*.5;g[Bb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Db>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Ab>>2]=+g[(c[m>>2]|0)+40>>2];g[Cb>>2]=+g[(c[m>>2]|0)+44>>2];g[Eb>>2]=+g[Ab>>2]*+g[Bb>>2]+ +g[Cb>>2]*+g[Db>>2];g[T>>2]=+g[Ab>>2]*+g[Db>>2]-+g[Cb>>2]*+g[Bb>>2];g[Lb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Nb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Kb>>2]=+g[(c[m>>2]|0)+8>>2];g[Mb>>2]=+g[(c[m>>2]|0)+12>>2];g[Ob>>2]=+g[Kb>>2]*+g[Lb>>2]+ +g[Mb>>2]*+g[Nb>>2];g[ba>>2]=+g[Kb>>2]*+g[Nb>>2]-+g[Mb>>2]*+g[Lb>>2];g[Gb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Ib>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Fb>>2]=+g[(c[m>>2]|0)+72>>2];g[Hb>>2]=+g[(c[m>>2]|0)+76>>2];g[Jb>>2]=+g[Fb>>2]*+g[Gb>>2]+ +g[Hb>>2]*+g[Ib>>2];g[aa>>2]=+g[Fb>>2]*+g[Ib>>2]-+g[Hb>>2]*+g[Gb>>2];g[ca>>2]=(+g[aa>>2]-+g[ba>>2])*.8660253882408142;g[Ga>>2]=(+g[Ob>>2]-+g[Jb>>2])*.8660253882408142;g[Pb>>2]=+g[Jb>>2]+ +g[Ob>>2];g[$>>2]=+g[Eb>>2]-+g[Pb>>2]*.5;g[U>>2]=+g[aa>>2]+ +g[ba>>2];g[Fa>>2]=+g[T>>2]-+g[U>>2]*.5;g[Tb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Vb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Sb>>2]=+g[(c[m>>2]|0)+16>>2];g[Ub>>2]=+g[(c[m>>2]|0)+20>>2];g[Ya>>2]=+g[Sb>>2]*+g[Tb>>2]+ +g[Ub>>2]*+g[Vb>>2];g[xa>>2]=+g[Sb>>2]*+g[Vb>>2]-+g[Ub>>2]*+g[Tb>>2];g[db>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[fb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[cb>>2]=+g[(c[m>>2]|0)+80>>2];g[eb>>2]=+g[(c[m>>2]|0)+84>>2];g[gb>>2]=+g[cb>>2]*+g[db>>2]+ +g[eb>>2]*+g[fb>>2];g[ha>>2]=+g[cb>>2]*+g[fb>>2]-+g[eb>>2]*+g[db>>2];g[_a>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[ab>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Za>>2]=+g[(c[m>>2]|0)+48>>2];g[$a>>2]=+g[(c[m>>2]|0)+52>>2];g[bb>>2]=+g[Za>>2]*+g[_a>>2]+ +g[$a>>2]*+g[ab>>2];g[ga>>2]=+g[Za>>2]*+g[ab>>2]-+g[$a>>2]*+g[_a>>2];g[ia>>2]=(+g[ga>>2]-+g[ha>>2])*.8660253882408142;g[C>>2]=(+g[gb>>2]-+g[bb>>2])*.8660253882408142;g[hb>>2]=+g[bb>>2]+ +g[gb>>2];g[fa>>2]=+g[Ya>>2]-+g[hb>>2]*.5;g[ya>>2]=+g[ga>>2]+ +g[ha>>2];g[B>>2]=+g[xa>>2]-+g[ya>>2]*.5;g[zb>>2]=+g[q>>2]+ +g[yb>>2];g[Qb>>2]=+g[Eb>>2]+ +g[Pb>>2];g[Rb>>2]=+g[zb>>2]+ +g[Qb>>2];g[O>>2]=+g[zb>>2]-+g[Qb>>2];g[V>>2]=+g[T>>2]+ +g[U>>2];g[Y>>2]=+g[W>>2]+ +g[X>>2];g[Z>>2]=+g[V>>2]+ +g[Y>>2];g[Aa>>2]=+g[Y>>2]-+g[V>>2];g[ib>>2]=+g[Ya>>2]+ +g[hb>>2];g[u>>2]=+g[nb>>2]+ +g[t>>2];g[v>>2]=+g[ib>>2]+ +g[u>>2];g[_>>2]=+g[ib>>2]-+g[u>>2];g[P>>2]=+g[xa>>2]+ +g[ya>>2];g[Q>>2]=+g[F>>2]+ +g[G>>2];g[R>>2]=+g[P>>2]-+g[Q>>2];g[S>>2]=+g[P>>2]+ +g[Q>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Rb>>2]-+g[v>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[_>>2]-+g[Aa>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[_>>2]+ +g[Aa>>2];g[c[k>>2]>>2]=+g[Rb>>2]+ +g[v>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[O>>2]-+g[R>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[S>>2]-+g[Z>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[S>>2]+ +g[Z>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[O>>2]+ +g[R>>2];g[A>>2]=+g[w>>2]-+g[z>>2];g[da>>2]=+g[$>>2]-+g[ca>>2];g[ea>>2]=+g[A>>2]+ +g[da>>2];g[wa>>2]=+g[A>>2]-+g[da>>2];g[Ea>>2]=+g[Ca>>2]-+g[Da>>2];g[Ha>>2]=+g[Fa>>2]-+g[Ga>>2];g[Ia>>2]=+g[Ea>>2]-+g[Ha>>2];g[Ka>>2]=+g[Ha>>2]+ +g[Ea>>2];g[ja>>2]=+g[fa>>2]-+g[ia>>2];g[oa>>2]=+g[ka>>2]-+g[na>>2];g[pa>>2]=+g[ja>>2]+ +g[oa>>2];g[Ba>>2]=+g[oa>>2]-+g[ja>>2];g[D>>2]=+g[B>>2]-+g[C>>2];g[I>>2]=+g[E>>2]-+g[H>>2];g[J>>2]=+g[D>>2]+ +g[I>>2];g[Ja>>2]=+g[I>>2]-+g[D>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ea>>2]-+g[pa>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Ja>>2]-+g[Ka>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Ja>>2]+ +g[Ka>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ea>>2]+ +g[pa>>2];g[c[l>>2]>>2]=+g[wa>>2]-+g[J>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Ba>>2]-+g[Ia>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Ba>>2]+ +g[Ia>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[wa>>2]+ +g[J>>2];g[qa>>2]=+g[w>>2]+ +g[z>>2];g[ra>>2]=+g[$>>2]+ +g[ca>>2];g[sa>>2]=+g[qa>>2]+ +g[ra>>2];g[K>>2]=+g[qa>>2]-+g[ra>>2];g[Ma>>2]=+g[Da>>2]+ +g[Ca>>2];g[Na>>2]=+g[Ga>>2]+ +g[Fa>>2];g[Oa>>2]=+g[Ma>>2]-+g[Na>>2];g[Qa>>2]=+g[Na>>2]+ +g[Ma>>2];g[ta>>2]=+g[fa>>2]+ +g[ia>>2];g[ua>>2]=+g[ka>>2]+ +g[na>>2];g[va>>2]=+g[ta>>2]+ +g[ua>>2];g[La>>2]=+g[ua>>2]-+g[ta>>2];g[L>>2]=+g[C>>2]+ +g[B>>2];g[M>>2]=+g[E>>2]+ +g[H>>2];g[N>>2]=+g[L>>2]-+g[M>>2];g[Pa>>2]=+g[L>>2]+ +g[M>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[sa>>2]-+g[va>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[K>>2]+ +g[N>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[sa>>2]+ +g[va>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[K>>2]-+g[N>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[La>>2]-+g[Oa>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Pa>>2]+ +g[Qa>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[La>>2]+ +g[Oa>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Pa>>2]-+g[Qa>>2];c[Wb>>2]=(c[Wb>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+88;c[n>>2]=c[n>>2]^c[2998]}i=Xb;return}function Fr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,35,4984);i=b;return}function Gr(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0;nd=i;i=i+912|0;k=nd+900|0;l=nd+896|0;m=nd+892|0;n=nd+888|0;od=nd+884|0;o=nd+880|0;p=nd+876|0;md=nd+848|0;pa=nd+844|0;_a=nd+840|0;Rc=nd+836|0;ma=nd+832|0;bb=nd+828|0;cc=nd+824|0;u=nd+820|0;ja=nd+816|0;ka=nd+812|0;Oa=nd+808|0;Pa=nd+804|0;ac=nd+800|0;I=nd+796|0;U=nd+792|0;sb=nd+788|0;Eb=nd+784|0;nb=nd+780|0;Db=nd+776|0;N=nd+772|0;V=nd+768|0;gd=nd+764|0;zc=nd+760|0;Ac=nd+756|0;Ra=nd+752|0;Sa=nd+748|0;$b=nd+744|0;va=nd+740|0;R=nd+736|0;Ja=nd+732|0;Bb=nd+728|0;Ea=nd+724|0;Ab=nd+720|0;C=nd+716|0;S=nd+712|0;q=nd+708|0;$a=nd+704|0;kc=nd+700|0;na=nd+696|0;Pc=nd+692|0;oa=nd+688|0;Qc=nd+684|0;ab=nd+680|0;Ib=nd+676|0;jc=nd+672|0;za=nd+668|0;ic=nd+664|0;mc=nd+660|0;Oc=nd+656|0;lc=nd+652|0;nc=nd+648|0;Fc=nd+644|0;kb=nd+640|0;z=nd+636|0;pb=nd+632|0;Kc=nd+628|0;F=nd+624|0;s=nd+620|0;G=nd+616|0;t=nd+612|0;lb=nd+608|0;ca=nd+604|0;K=nd+600|0;ha=nd+596|0;L=nd+592|0;ia=nd+588|0;qb=nd+584|0;Cc=nd+580|0;Ec=nd+576|0;Bc=nd+572|0;Dc=nd+568|0;w=nd+564|0;y=nd+560|0;v=nd+556|0;x=nd+552|0;Hc=nd+548|0;Jc=nd+544|0;Gc=nd+540|0;Ic=nd+536|0;Mc=nd+532|0;r=nd+528|0;Lc=nd+524|0;Nc=nd+520|0;$=nd+516|0;ba=nd+512|0;A=nd+508|0;aa=nd+504|0;ea=nd+500|0;ga=nd+496|0;da=nd+492|0;fa=nd+488|0;E=nd+484|0;H=nd+480|0;ob=nd+476|0;rb=nd+472|0;jb=nd+468|0;mb=nd+464|0;J=nd+460|0;M=nd+456|0;Wc=nd+452|0;Ba=nd+448|0;ld=nd+444|0;Ga=nd+440|0;$c=nd+436|0;sa=nd+432|0;ed=nd+428|0;ta=nd+424|0;fd=nd+420|0;Ca=nd+416|0;sc=nd+412|0;xa=nd+408|0;xc=nd+404|0;ya=nd+400|0;yc=nd+396|0;Ha=nd+392|0;Tc=nd+388|0;Vc=nd+384|0;Sc=nd+380|0;Uc=nd+376|0;id=nd+372|0;kd=nd+368|0;hd=nd+364|0;jd=nd+360|0;Yc=nd+356|0;_c=nd+352|0;Xc=nd+348|0;Zc=nd+344|0;bd=nd+340|0;dd=nd+336|0;ad=nd+332|0;cd=nd+328|0;pc=nd+324|0;rc=nd+320|0;oc=nd+316|0;qc=nd+312|0;uc=nd+308|0;wc=nd+304|0;tc=nd+300|0;vc=nd+296|0;ra=nd+292|0;ua=nd+288|0;Fa=nd+284|0;Ia=nd+280|0;Aa=nd+276|0;Da=nd+272|0;wa=nd+268|0;B=nd+264|0;Ma=nd+260|0;la=nd+256|0;La=nd+252|0;Ua=nd+248|0;Wa=nd+244|0;Qa=nd+240|0;Ta=nd+236|0;Va=nd+232|0;Na=nd+228|0;bc=nd+224|0;dc=nd+220|0;ec=nd+216|0;_b=nd+212|0;gc=nd+208|0;Yb=nd+204|0;Zb=nd+200|0;hc=nd+196|0;fc=nd+192|0;Gb=nd+188|0;Ka=nd+184|0;qa=nd+180|0;P=nd+176|0;xb=nd+172|0;yb=nd+168|0;Hb=nd+164|0;zb=nd+160|0;Cb=nd+156|0;Fb=nd+152|0;D=nd+148|0;O=nd+144|0;Ob=nd+140|0;Wb=nd+136|0;Sb=nd+132|0;Tb=nd+128|0;Rb=nd+124|0;Ub=nd+120|0;Xb=nd+116|0;Vb=nd+112|0;Mb=nd+108|0;Nb=nd+104|0;Pb=nd+100|0;Qb=nd+96|0;fb=nd+92|0;Kb=nd+88|0;cb=nd+84|0;Za=nd+80|0;gb=nd+76|0;hb=nd+72|0;Lb=nd+68|0;Jb=nd+64|0;db=nd+60|0;eb=nd+56|0;Xa=nd+52|0;Ya=nd+48|0;ub=nd+44|0;wb=nd+40|0;Q=nd+36|0;X=nd+32|0;Y=nd+28|0;Z=nd+24|0;_=nd+20|0;vb=nd+16|0;ib=nd+12|0;tb=nd+8|0;T=nd+4|0;W=nd;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[od>>2]=f;c[o>>2]=h;c[p>>2]=j;g[nd+872>>2]=.5877852439880371;g[nd+868>>2]=.9510565400123596;g[nd+864>>2]=.25;g[nd+860>>2]=.55901700258255;g[nd+856>>2]=.5;g[nd+852>>2]=.8660253882408142;c[md>>2]=c[od>>2];c[m>>2]=(c[m>>2]|0)+(((c[od>>2]|0)-1|0)*28<<2);while(1){if((c[md>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[$a>>2]=+g[c[l>>2]>>2];g[Ib>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[jc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[za>>2]=+g[(c[m>>2]|0)+32>>2];g[ic>>2]=+g[(c[m>>2]|0)+36>>2];g[kc>>2]=+g[za>>2]*+g[Ib>>2]+ +g[ic>>2]*+g[jc>>2];g[na>>2]=+g[za>>2]*+g[jc>>2]-+g[ic>>2]*+g[Ib>>2];g[mc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Oc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[lc>>2]=+g[(c[m>>2]|0)+72>>2];g[nc>>2]=+g[(c[m>>2]|0)+76>>2];g[Pc>>2]=+g[lc>>2]*+g[mc>>2]+ +g[nc>>2]*+g[Oc>>2];g[oa>>2]=+g[lc>>2]*+g[Oc>>2]-+g[nc>>2]*+g[mc>>2];g[pa>>2]=(+g[na>>2]-+g[oa>>2])*.8660253882408142;g[_a>>2]=(+g[Pc>>2]-+g[kc>>2])*.8660253882408142;g[Qc>>2]=+g[kc>>2]+ +g[Pc>>2];g[Rc>>2]=+g[q>>2]+ +g[Qc>>2];g[ma>>2]=+g[q>>2]-+g[Qc>>2]*.5;g[ab>>2]=+g[na>>2]+ +g[oa>>2];g[bb>>2]=+g[$a>>2]-+g[ab>>2]*.5;g[cc>>2]=+g[ab>>2]+ +g[$a>>2];g[Cc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Ec>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Bc>>2]=+g[(c[m>>2]|0)+40>>2];g[Dc>>2]=+g[(c[m>>2]|0)+44>>2];g[Fc>>2]=+g[Bc>>2]*+g[Cc>>2]+ +g[Dc>>2]*+g[Ec>>2];g[kb>>2]=+g[Bc>>2]*+g[Ec>>2]-+g[Dc>>2]*+g[Cc>>2];g[w>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[y>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[v>>2]=+g[(c[m>>2]|0)+64>>2];g[x>>2]=+g[(c[m>>2]|0)+68>>2];g[z>>2]=+g[v>>2]*+g[w>>2]+ +g[x>>2]*+g[y>>2];g[pb>>2]=+g[v>>2]*+g[y>>2]-+g[x>>2]*+g[w>>2];g[Hc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Jc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Gc>>2]=+g[(c[m>>2]|0)+80>>2];g[Ic>>2]=+g[(c[m>>2]|0)+84>>2];g[Kc>>2]=+g[Gc>>2]*+g[Hc>>2]+ +g[Ic>>2]*+g[Jc>>2];g[F>>2]=+g[Gc>>2]*+g[Jc>>2]-+g[Ic>>2]*+g[Hc>>2];g[Mc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[r>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Lc>>2]=+g[c[m>>2]>>2];g[Nc>>2]=+g[(c[m>>2]|0)+4>>2];g[s>>2]=+g[Lc>>2]*+g[Mc>>2]+ +g[Nc>>2]*+g[r>>2];g[G>>2]=+g[Lc>>2]*+g[r>>2]-+g[Nc>>2]*+g[Mc>>2];g[t>>2]=+g[Kc>>2]+ +g[s>>2];g[lb>>2]=+g[F>>2]+ +g[G>>2];g[$>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[ba>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[A>>2]=+g[(c[m>>2]|0)+104>>2];g[aa>>2]=+g[(c[m>>2]|0)+108>>2];g[ca>>2]=+g[A>>2]*+g[$>>2]+ +g[aa>>2]*+g[ba>>2];g[K>>2]=+g[A>>2]*+g[ba>>2]-+g[aa>>2]*+g[$>>2];g[ea>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ga>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[da>>2]=+g[(c[m>>2]|0)+24>>2];g[fa>>2]=+g[(c[m>>2]|0)+28>>2];g[ha>>2]=+g[da>>2]*+g[ea>>2]+ +g[fa>>2]*+g[ga>>2];g[L>>2]=+g[da>>2]*+g[ga>>2]-+g[fa>>2]*+g[ea>>2];g[ia>>2]=+g[ca>>2]+ +g[ha>>2];g[qb>>2]=+g[K>>2]+ +g[L>>2];g[u>>2]=+g[Fc>>2]+ +g[t>>2];g[ja>>2]=+g[z>>2]+ +g[ia>>2];g[ka>>2]=+g[u>>2]+ +g[ja>>2];g[Oa>>2]=+g[kb>>2]+ +g[lb>>2];g[Pa>>2]=+g[pb>>2]+ +g[qb>>2];g[ac>>2]=+g[Oa>>2]+ +g[Pa>>2];g[E>>2]=+g[Fc>>2]-+g[t>>2]*.5;g[H>>2]=(+g[F>>2]-+g[G>>2])*.8660253882408142;g[I>>2]=+g[E>>2]-+g[H>>2];g[U>>2]=+g[E>>2]+ +g[H>>2];g[ob>>2]=(+g[ca>>2]-+g[ha>>2])*.8660253882408142;g[rb>>2]=+g[pb>>2]-+g[qb>>2]*.5;g[sb>>2]=+g[ob>>2]-+g[rb>>2];g[Eb>>2]=+g[ob>>2]+ +g[rb>>2];g[jb>>2]=(+g[s>>2]-+g[Kc>>2])*.8660253882408142;g[mb>>2]=+g[kb>>2]-+g[lb>>2]*.5;g[nb>>2]=+g[jb>>2]+ +g[mb>>2];g[Db>>2]=+g[mb>>2]-+g[jb>>2];g[J>>2]=+g[z>>2]-+g[ia>>2]*.5;g[M>>2]=(+g[K>>2]-+g[L>>2])*.8660253882408142;g[N>>2]=+g[J>>2]-+g[M>>2];g[V>>2]=+g[J>>2]+ +g[M>>2];g[Tc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Vc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Sc>>2]=+g[(c[m>>2]|0)+16>>2];g[Uc>>2]=+g[(c[m>>2]|0)+20>>2];g[Wc>>2]=+g[Sc>>2]*+g[Tc>>2]+ +g[Uc>>2]*+g[Vc>>2];g[Ba>>2]=+g[Sc>>2]*+g[Vc>>2]-+g[Uc>>2]*+g[Tc>>2];g[id>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[kd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[hd>>2]=+g[(c[m>>2]|0)+88>>2];g[jd>>2]=+g[(c[m>>2]|0)+92>>2];g[ld>>2]=+g[hd>>2]*+g[id>>2]+ +g[jd>>2]*+g[kd>>2];g[Ga>>2]=+g[hd>>2]*+g[kd>>2]-+g[jd>>2]*+g[id>>2];g[Yc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[_c>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Xc>>2]=+g[(c[m>>2]|0)+56>>2];g[Zc>>2]=+g[(c[m>>2]|0)+60>>2];g[$c>>2]=+g[Xc>>2]*+g[Yc>>2]+ +g[Zc>>2]*+g[_c>>2];g[sa>>2]=+g[Xc>>2]*+g[_c>>2]-+g[Zc>>2]*+g[Yc>>2];g[bd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[dd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[ad>>2]=+g[(c[m>>2]|0)+96>>2];g[cd>>2]=+g[(c[m>>2]|0)+100>>2];g[ed>>2]=+g[ad>>2]*+g[bd>>2]+ +g[cd>>2]*+g[dd>>2];g[ta>>2]=+g[ad>>2]*+g[dd>>2]-+g[cd>>2]*+g[bd>>2];g[fd>>2]=+g[$c>>2]+ +g[ed>>2];g[Ca>>2]=+g[sa>>2]+ +g[ta>>2];g[pc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[rc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[oc>>2]=+g[(c[m>>2]|0)+8>>2];g[qc>>2]=+g[(c[m>>2]|0)+12>>2];g[sc>>2]=+g[oc>>2]*+g[pc>>2]+ +g[qc>>2]*+g[rc>>2];g[xa>>2]=+g[oc>>2]*+g[rc>>2]-+g[qc>>2]*+g[pc>>2];g[uc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[wc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[tc>>2]=+g[(c[m>>2]|0)+48>>2];g[vc>>2]=+g[(c[m>>2]|0)+52>>2];g[xc>>2]=+g[tc>>2]*+g[uc>>2]+ +g[vc>>2]*+g[wc>>2];g[ya>>2]=+g[tc>>2]*+g[wc>>2]-+g[vc>>2]*+g[uc>>2];g[yc>>2]=+g[sc>>2]+ +g[xc>>2];g[Ha>>2]=+g[xa>>2]+ +g[ya>>2];g[gd>>2]=+g[Wc>>2]+ +g[fd>>2];g[zc>>2]=+g[ld>>2]+ +g[yc>>2];g[Ac>>2]=+g[gd>>2]+ +g[zc>>2];g[Ra>>2]=+g[Ba>>2]+ +g[Ca>>2];g[Sa>>2]=+g[Ga>>2]+ +g[Ha>>2];g[$b>>2]=+g[Ra>>2]+ +g[Sa>>2];g[ra>>2]=+g[Wc>>2]-+g[fd>>2]*.5;g[ua>>2]=(+g[sa>>2]-+g[ta>>2])*.8660253882408142;g[va>>2]=+g[ra>>2]-+g[ua>>2];g[R>>2]=+g[ra>>2]+ +g[ua>>2];g[Fa>>2]=(+g[xc>>2]-+g[sc>>2])*.8660253882408142;g[Ia>>2]=+g[Ga>>2]-+g[Ha>>2]*.5;g[Ja>>2]=+g[Fa>>2]+ +g[Ia>>2];g[Bb>>2]=+g[Ia>>2]-+g[Fa>>2];g[Aa>>2]=(+g[ed>>2]-+g[$c>>2])*.8660253882408142;g[Da>>2]=+g[Ba>>2]-+g[Ca>>2]*.5;g[Ea>>2]=+g[Aa>>2]+ +g[Da>>2];g[Ab>>2]=+g[Da>>2]-+g[Aa>>2];g[wa>>2]=+g[ld>>2]-+g[yc>>2]*.5;g[B>>2]=(+g[xa>>2]-+g[ya>>2])*.8660253882408142;g[C>>2]=+g[wa>>2]-+g[B>>2];g[S>>2]=+g[wa>>2]+ +g[B>>2];g[Ma>>2]=(+g[Ac>>2]-+g[ka>>2])*.55901700258255;g[la>>2]=+g[Ac>>2]+ +g[ka>>2];g[La>>2]=+g[Rc>>2]-+g[la>>2]*.25;g[Qa>>2]=+g[Oa>>2]-+g[Pa>>2];g[Ta>>2]=+g[Ra>>2]-+g[Sa>>2];g[Ua>>2]=+g[Qa>>2]*.9510565400123596-+g[Ta>>2]*.5877852439880371;g[Wa>>2]=+g[Ta>>2]*.9510565400123596+ +g[Qa>>2]*.5877852439880371;g[c[k>>2]>>2]=+g[Rc>>2]+ +g[la>>2];g[Va>>2]=+g[Ma>>2]+ +g[La>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Va>>2]-+g[Wa>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Va>>2]+ +g[Wa>>2];g[Na>>2]=+g[La>>2]-+g[Ma>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Na>>2]-+g[Ua>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Na>>2]+ +g[Ua>>2];g[bc>>2]=(+g[$b>>2]-+g[ac>>2])*.55901700258255;g[dc>>2]=+g[$b>>2]+ +g[ac>>2];g[ec>>2]=+g[cc>>2]-+g[dc>>2]*.25;g[Yb>>2]=+g[ja>>2]-+g[u>>2];g[Zb>>2]=+g[gd>>2]-+g[zc>>2];g[_b>>2]=+g[Yb>>2]*.5877852439880371-+g[Zb>>2]*.9510565400123596;g[gc>>2]=+g[Zb>>2]*.5877852439880371+ +g[Yb>>2]*.9510565400123596;g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[dc>>2]+ +g[cc>>2];g[hc>>2]=+g[ec>>2]-+g[bc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[gc>>2]-+g[hc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[gc>>2]+ +g[hc>>2];g[fc>>2]=+g[bc>>2]+ +g[ec>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[_b>>2]-+g[fc>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[_b>>2]+ +g[fc>>2];g[Cb>>2]=+g[Ab>>2]-+g[Bb>>2];g[Fb>>2]=+g[Db>>2]-+g[Eb>>2];g[Gb>>2]=+g[Cb>>2]*.9510565400123596+ +g[Fb>>2]*.5877852439880371;g[Ka>>2]=+g[Fb>>2]*.9510565400123596-+g[Cb>>2]*.5877852439880371;g[qa>>2]=+g[ma>>2]-+g[pa>>2];g[D>>2]=+g[va>>2]+ +g[C>>2];g[O>>2]=+g[I>>2]+ +g[N>>2];g[P>>2]=+g[D>>2]+ +g[O>>2];g[xb>>2]=(+g[D>>2]-+g[O>>2])*.55901700258255;g[yb>>2]=+g[qa>>2]-+g[P>>2]*.25;g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[qa>>2]+ +g[P>>2];g[Hb>>2]=+g[yb>>2]-+g[xb>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Hb>>2]-+g[Ka>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Hb>>2]+ +g[Ka>>2];g[zb>>2]=+g[xb>>2]+ +g[yb>>2];g[c[l>>2]>>2]=+g[zb>>2]-+g[Gb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[zb>>2]+ +g[Gb>>2];g[Mb>>2]=+g[va>>2]-+g[C>>2];g[Nb>>2]=+g[I>>2]-+g[N>>2];g[Ob>>2]=+g[Mb>>2]*.9510565400123596+ +g[Nb>>2]*.5877852439880371;g[Wb>>2]=+g[Nb>>2]*.9510565400123596-+g[Mb>>2]*.5877852439880371;g[Sb>>2]=+g[bb>>2]-+g[_a>>2];g[Pb>>2]=+g[Db>>2]+ +g[Eb>>2];g[Qb>>2]=+g[Ab>>2]+ +g[Bb>>2];g[Tb>>2]=+g[Qb>>2]+ +g[Pb>>2];g[Rb>>2]=(+g[Pb>>2]-+g[Qb>>2])*.55901700258255;g[Ub>>2]=+g[Sb>>2]-+g[Tb>>2]*.25;g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Tb>>2]+ +g[Sb>>2];g[Xb>>2]=+g[Rb>>2]+ +g[Ub>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Wb>>2]-+g[Xb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Wb>>2]+ +g[Xb>>2];g[Vb>>2]=+g[Rb>>2]-+g[Ub>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Ob>>2]+ +g[Vb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Vb>>2]-+g[Ob>>2];g[db>>2]=+g[U>>2]-+g[V>>2];g[eb>>2]=+g[R>>2]-+g[S>>2];g[fb>>2]=+g[db>>2]*.9510565400123596-+g[eb>>2]*.5877852439880371;g[Kb>>2]=+g[eb>>2]*.9510565400123596+ +g[db>>2]*.5877852439880371;g[cb>>2]=+g[_a>>2]+ +g[bb>>2];g[Xa>>2]=+g[sb>>2]-+g[nb>>2];g[Ya>>2]=+g[Ea>>2]+ +g[Ja>>2];g[Za>>2]=+g[Xa>>2]-+g[Ya>>2];g[gb>>2]=+g[Za>>2]*.25+ +g[cb>>2];g[hb>>2]=(+g[Ya>>2]+ +g[Xa>>2])*.55901700258255;g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Za>>2]-+g[cb>>2];g[Lb>>2]=+g[hb>>2]+ +g[gb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Kb>>2]+ +g[Lb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Lb>>2]-+g[Kb>>2];g[Jb>>2]=+g[gb>>2]-+g[hb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[fb>>2]-+g[Jb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[fb>>2]+ +g[Jb>>2];g[ib>>2]=+g[Ea>>2]-+g[Ja>>2];g[tb>>2]=+g[nb>>2]+ +g[sb>>2];g[ub>>2]=+g[ib>>2]*.9510565400123596+ +g[tb>>2]*.5877852439880371;g[wb>>2]=+g[tb>>2]*.9510565400123596-+g[ib>>2]*.5877852439880371;g[Q>>2]=+g[ma>>2]+ +g[pa>>2];g[T>>2]=+g[R>>2]+ +g[S>>2];g[W>>2]=+g[U>>2]+ +g[V>>2];g[X>>2]=+g[T>>2]+ +g[W>>2];g[Y>>2]=(+g[T>>2]-+g[W>>2])*.55901700258255;g[Z>>2]=+g[Q>>2]-+g[X>>2]*.25;g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Q>>2]+ +g[X>>2];g[_>>2]=+g[Y>>2]+ +g[Z>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[_>>2]-+g[ub>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[_>>2]+ +g[ub>>2];g[vb>>2]=+g[Z>>2]-+g[Y>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[vb>>2]-+g[wb>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[vb>>2]+ +g[wb>>2];c[md>>2]=(c[md>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+112;c[n>>2]=c[n>>2]^c[2998]}i=nd;return}function Hr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,36,5032);i=b;return}function Ir(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0;fd=i;i=i+864|0;k=fd+856|0;l=fd+852|0;m=fd+848|0;n=fd+844|0;gd=fd+840|0;o=fd+836|0;p=fd+832|0;ed=fd+816|0;dc=fd+812|0;Rb=fd+808|0;sa=fd+804|0;cb=fd+800|0;Oc=fd+796|0;Qb=fd+792|0;va=fd+788|0;$a=fd+784|0;_c=fd+780|0;Bb=fd+776|0;D=fd+772|0;kb=fd+768|0;lc=fd+764|0;Ab=fd+760|0;I=fd+756|0;lb=fd+752|0;ea=fd+748|0;pa=fd+744|0;La=fd+740|0;Ma=fd+736|0;Na=fd+732|0;Oa=fd+728|0;_=fd+724|0;sb=fd+720|0;Ea=fd+716|0;rb=fd+712|0;yc=fd+708|0;u=fd+704|0;Eb=fd+700|0;Fb=fd+696|0;Gb=fd+692|0;Hb=fd+688|0;P=fd+684|0;pb=fd+680|0;U=fd+676|0;ob=fd+672|0;q=fd+668|0;bb=fd+664|0;cc=fd+660|0;ab=fd+656|0;Ib=fd+652|0;bc=fd+648|0;za=fd+644|0;ac=fd+640|0;Ic=fd+636|0;ta=fd+632|0;Nc=fd+628|0;ua=fd+624|0;fc=fd+620|0;Hc=fd+616|0;ec=fd+612|0;Gc=fd+608|0;Kc=fd+604|0;Mc=fd+600|0;Jc=fd+596|0;Lc=fd+592|0;Uc=fd+588|0;ya=fd+584|0;Zc=fd+580|0;B=fd+576|0;xa=fd+572|0;C=fd+568|0;Rc=fd+564|0;Tc=fd+560|0;Qc=fd+556|0;Sc=fd+552|0;Wc=fd+548|0;Yc=fd+544|0;Vc=fd+540|0;Xc=fd+536|0;dd=fd+532|0;F=fd+528|0;kc=fd+524|0;G=fd+520|0;E=fd+516|0;H=fd+512|0;ad=fd+508|0;cd=fd+504|0;$c=fd+500|0;bd=fd+496|0;hc=fd+492|0;jc=fd+488|0;gc=fd+484|0;ic=fd+480|0;A=fd+476|0;W=fd+472|0;oa=fd+468|0;Ca=fd+464|0;da=fd+460|0;X=fd+456|0;ja=fd+452|0;Ba=fd+448|0;x=fd+444|0;z=fd+440|0;w=fd+436|0;y=fd+432|0;la=fd+428|0;na=fd+424|0;ka=fd+420|0;ma=fd+416|0;aa=fd+412|0;ca=fd+408|0;$=fd+404|0;ba=fd+400|0;ga=fd+396|0;ia=fd+392|0;fa=fd+388|0;ha=fd+384|0;Y=fd+380|0;Z=fd+376|0;Aa=fd+372|0;Da=fd+368|0;sc=fd+364|0;Q=fd+360|0;t=fd+356|0;N=fd+352|0;xc=fd+348|0;R=fd+344|0;Dc=fd+340|0;M=fd+336|0;pc=fd+332|0;rc=fd+328|0;oc=fd+324|0;qc=fd+320|0;Fc=fd+316|0;s=fd+312|0;Ec=fd+308|0;r=fd+304|0;uc=fd+300|0;wc=fd+296|0;tc=fd+292|0;vc=fd+288|0;Ac=fd+284|0;Cc=fd+280|0;zc=fd+276|0;Bc=fd+272|0;L=fd+268|0;O=fd+264|0;S=fd+260|0;T=fd+256|0;K=fd+252|0;Ha=fd+248|0;Tb=fd+244|0;Vb=fd+240|0;Ga=fd+236|0;Ub=fd+232|0;ib=fd+228|0;Ob=fd+224|0;wa=fd+220|0;J=fd+216|0;Pb=fd+212|0;Sb=fd+208|0;V=fd+204|0;Fa=fd+200|0;Ia=fd+196|0;Ja=fd+192|0;nc=fd+188|0;Va=fd+184|0;eb=fd+180|0;gb=fd+176|0;ra=fd+172|0;fb=fd+168|0;Ya=fd+164|0;Za=fd+160|0;Pc=fd+156|0;mc=fd+152|0;_a=fd+148|0;db=fd+144|0;v=fd+140|0;qa=fd+136|0;Wa=fd+132|0;Xa=fd+128|0;nb=fd+124|0;vb=fd+120|0;Zb=fd+116|0;$b=fd+112|0;ub=fd+108|0;_b=fd+104|0;yb=fd+100|0;Wb=fd+96|0;jb=fd+92|0;mb=fd+88|0;Xb=fd+84|0;Yb=fd+80|0;qb=fd+76|0;tb=fd+72|0;wb=fd+68|0;xb=fd+64|0;Db=fd+60|0;Ra=fd+56|0;Lb=fd+52|0;Nb=fd+48|0;Qa=fd+44|0;hb=fd+40|0;Ua=fd+36|0;Mb=fd+32|0;zb=fd+28|0;Cb=fd+24|0;Jb=fd+20|0;Kb=fd+16|0;Ka=fd+12|0;Pa=fd+8|0;Sa=fd+4|0;Ta=fd;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[gd>>2]=f;c[o>>2]=h;c[p>>2]=j;g[fd+828>>2]=.3826834261417389;g[fd+824>>2]=.9238795042037964;g[fd+820>>2]=.7071067690849304;c[ed>>2]=c[gd>>2];c[m>>2]=(c[m>>2]|0)+(((c[gd>>2]|0)-1|0)*30<<2);while(1){if((c[ed>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[bb>>2]=+g[c[l>>2]>>2];g[Ib>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[bc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[za>>2]=+g[(c[m>>2]|0)+56>>2];g[ac>>2]=+g[(c[m>>2]|0)+60>>2];g[cc>>2]=+g[za>>2]*+g[Ib>>2]+ +g[ac>>2]*+g[bc>>2];g[ab>>2]=+g[za>>2]*+g[bc>>2]-+g[ac>>2]*+g[Ib>>2];g[dc>>2]=+g[q>>2]+ +g[cc>>2];g[Rb>>2]=+g[bb>>2]-+g[ab>>2];g[sa>>2]=+g[q>>2]-+g[cc>>2];g[cb>>2]=+g[ab>>2]+ +g[bb>>2];g[fc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Hc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ec>>2]=+g[(c[m>>2]|0)+24>>2];g[Gc>>2]=+g[(c[m>>2]|0)+28>>2];g[Ic>>2]=+g[ec>>2]*+g[fc>>2]+ +g[Gc>>2]*+g[Hc>>2];g[ta>>2]=+g[ec>>2]*+g[Hc>>2]-+g[Gc>>2]*+g[fc>>2];g[Kc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Mc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Jc>>2]=+g[(c[m>>2]|0)+88>>2];g[Lc>>2]=+g[(c[m>>2]|0)+92>>2];g[Nc>>2]=+g[Jc>>2]*+g[Kc>>2]+ +g[Lc>>2]*+g[Mc>>2];g[ua>>2]=+g[Jc>>2]*+g[Mc>>2]-+g[Lc>>2]*+g[Kc>>2];g[Oc>>2]=+g[Ic>>2]+ +g[Nc>>2];g[Qb>>2]=+g[Ic>>2]-+g[Nc>>2];g[va>>2]=+g[ta>>2]-+g[ua>>2];g[$a>>2]=+g[ta>>2]+ +g[ua>>2];g[Rc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Tc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Qc>>2]=+g[(c[m>>2]|0)+8>>2];g[Sc>>2]=+g[(c[m>>2]|0)+12>>2];g[Uc>>2]=+g[Qc>>2]*+g[Rc>>2]+ +g[Sc>>2]*+g[Tc>>2];g[ya>>2]=+g[Qc>>2]*+g[Tc>>2]-+g[Sc>>2]*+g[Rc>>2];g[Wc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Yc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Vc>>2]=+g[(c[m>>2]|0)+72>>2];g[Xc>>2]=+g[(c[m>>2]|0)+76>>2];g[Zc>>2]=+g[Vc>>2]*+g[Wc>>2]+ +g[Xc>>2]*+g[Yc>>2];g[B>>2]=+g[Vc>>2]*+g[Yc>>2]-+g[Xc>>2]*+g[Wc>>2];g[_c>>2]=+g[Uc>>2]+ +g[Zc>>2];g[Bb>>2]=+g[ya>>2]+ +g[B>>2];g[xa>>2]=+g[Uc>>2]-+g[Zc>>2];g[C>>2]=+g[ya>>2]-+g[B>>2];g[D>>2]=+g[xa>>2]-+g[C>>2];g[kb>>2]=+g[xa>>2]+ +g[C>>2];g[ad>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[cd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[$c>>2]=+g[(c[m>>2]|0)+104>>2];g[bd>>2]=+g[(c[m>>2]|0)+108>>2];g[dd>>2]=+g[$c>>2]*+g[ad>>2]+ +g[bd>>2]*+g[cd>>2];g[F>>2]=+g[$c>>2]*+g[cd>>2]-+g[bd>>2]*+g[ad>>2];g[hc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[jc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[gc>>2]=+g[(c[m>>2]|0)+40>>2];g[ic>>2]=+g[(c[m>>2]|0)+44>>2];g[kc>>2]=+g[gc>>2]*+g[hc>>2]+ +g[ic>>2]*+g[jc>>2];g[G>>2]=+g[gc>>2]*+g[jc>>2]-+g[ic>>2]*+g[hc>>2];g[lc>>2]=+g[dd>>2]+ +g[kc>>2];g[Ab>>2]=+g[F>>2]+ +g[G>>2];g[E>>2]=+g[dd>>2]-+g[kc>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[I>>2]=+g[E>>2]+ +g[H>>2];g[lb>>2]=+g[E>>2]-+g[H>>2];g[x>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[z>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[w>>2]=+g[(c[m>>2]|0)+112>>2];g[y>>2]=+g[(c[m>>2]|0)+116>>2];g[A>>2]=+g[w>>2]*+g[x>>2]+ +g[y>>2]*+g[z>>2];g[W>>2]=+g[w>>2]*+g[z>>2]-+g[y>>2]*+g[x>>2];g[la>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[na>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[ka>>2]=+g[(c[m>>2]|0)+80>>2];g[ma>>2]=+g[(c[m>>2]|0)+84>>2];g[oa>>2]=+g[ka>>2]*+g[la>>2]+ +g[ma>>2]*+g[na>>2];g[Ca>>2]=+g[ka>>2]*+g[na>>2]-+g[ma>>2]*+g[la>>2];g[aa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[ca>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[$>>2]=+g[(c[m>>2]|0)+48>>2];g[ba>>2]=+g[(c[m>>2]|0)+52>>2];g[da>>2]=+g[$>>2]*+g[aa>>2]+ +g[ba>>2]*+g[ca>>2];g[X>>2]=+g[$>>2]*+g[ca>>2]-+g[ba>>2]*+g[aa>>2];g[ga>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ia>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[fa>>2]=+g[(c[m>>2]|0)+16>>2];g[ha>>2]=+g[(c[m>>2]|0)+20>>2];g[ja>>2]=+g[fa>>2]*+g[ga>>2]+ +g[ha>>2]*+g[ia>>2];g[Ba>>2]=+g[fa>>2]*+g[ia>>2]-+g[ha>>2]*+g[ga>>2];g[ea>>2]=+g[A>>2]+ +g[da>>2];g[pa>>2]=+g[ja>>2]+ +g[oa>>2];g[La>>2]=+g[ea>>2]-+g[pa>>2];g[Ma>>2]=+g[W>>2]+ +g[X>>2];g[Na>>2]=+g[Ba>>2]+ +g[Ca>>2];g[Oa>>2]=+g[Ma>>2]-+g[Na>>2];g[Y>>2]=+g[W>>2]-+g[X>>2];g[Z>>2]=+g[ja>>2]-+g[oa>>2];g[_>>2]=+g[Y>>2]+ +g[Z>>2];g[sb>>2]=+g[Y>>2]-+g[Z>>2];g[Aa>>2]=+g[A>>2]-+g[da>>2];g[Da>>2]=+g[Ba>>2]-+g[Ca>>2];g[Ea>>2]=+g[Aa>>2]-+g[Da>>2];g[rb>>2]=+g[Aa>>2]+ +g[Da>>2];g[pc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[rc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[oc>>2]=+g[c[m>>2]>>2];g[qc>>2]=+g[(c[m>>2]|0)+4>>2];g[sc>>2]=+g[oc>>2]*+g[pc>>2]+ +g[qc>>2]*+g[rc>>2];g[Q>>2]=+g[oc>>2]*+g[rc>>2]-+g[qc>>2]*+g[pc>>2];g[Fc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[s>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Ec>>2]=+g[(c[m>>2]|0)+96>>2];g[r>>2]=+g[(c[m>>2]|0)+100>>2];g[t>>2]=+g[Ec>>2]*+g[Fc>>2]+ +g[r>>2]*+g[s>>2];g[N>>2]=+g[Ec>>2]*+g[s>>2]-+g[r>>2]*+g[Fc>>2];g[uc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[wc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[tc>>2]=+g[(c[m>>2]|0)+64>>2];g[vc>>2]=+g[(c[m>>2]|0)+68>>2];g[xc>>2]=+g[tc>>2]*+g[uc>>2]+ +g[vc>>2]*+g[wc>>2];g[R>>2]=+g[tc>>2]*+g[wc>>2]-+g[vc>>2]*+g[uc>>2];g[Ac>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Cc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[zc>>2]=+g[(c[m>>2]|0)+32>>2];g[Bc>>2]=+g[(c[m>>2]|0)+36>>2];g[Dc>>2]=+g[zc>>2]*+g[Ac>>2]+ +g[Bc>>2]*+g[Cc>>2];g[M>>2]=+g[zc>>2]*+g[Cc>>2]-+g[Bc>>2]*+g[Ac>>2];g[yc>>2]=+g[sc>>2]+ +g[xc>>2];g[u>>2]=+g[Dc>>2]+ +g[t>>2];g[Eb>>2]=+g[yc>>2]-+g[u>>2];g[Fb>>2]=+g[Q>>2]+ +g[R>>2];g[Gb>>2]=+g[M>>2]+ +g[N>>2];g[Hb>>2]=+g[Fb>>2]-+g[Gb>>2];g[L>>2]=+g[sc>>2]-+g[xc>>2];g[O>>2]=+g[M>>2]-+g[N>>2];g[P>>2]=+g[L>>2]-+g[O>>2];g[pb>>2]=+g[L>>2]+ +g[O>>2];g[S>>2]=+g[Q>>2]-+g[R>>2];g[T>>2]=+g[Dc>>2]-+g[t>>2];g[U>>2]=+g[S>>2]+ +g[T>>2];g[ob>>2]=+g[S>>2]-+g[T>>2];g[wa>>2]=+g[sa>>2]-+g[va>>2];g[J>>2]=(+g[D>>2]+ +g[I>>2])*.7071067690849304;g[K>>2]=+g[wa>>2]+ +g[J>>2];g[Ha>>2]=+g[wa>>2]-+g[J>>2];g[Pb>>2]=(+g[kb>>2]-+g[lb>>2])*.7071067690849304;g[Sb>>2]=+g[Qb>>2]+ +g[Rb>>2];g[Tb>>2]=+g[Pb>>2]+ +g[Sb>>2];g[Vb>>2]=+g[Sb>>2]-+g[Pb>>2];g[V>>2]=+g[P>>2]*.9238795042037964-+g[U>>2]*.3826834261417389;g[Fa>>2]=+g[_>>2]*.3826834261417389+ +g[Ea>>2]*.9238795042037964;g[Ga>>2]=+g[V>>2]+ +g[Fa>>2];g[Ub>>2]=+g[Fa>>2]-+g[V>>2];g[Ia>>2]=+g[U>>2]*.9238795042037964+ +g[P>>2]*.3826834261417389;g[Ja>>2]=+g[Ea>>2]*.3826834261417389-+g[_>>2]*.9238795042037964;g[ib>>2]=+g[Ia>>2]+ +g[Ja>>2];g[Ob>>2]=+g[Ja>>2]-+g[Ia>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[K>>2]-+g[Ga>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Ub>>2]-+g[Vb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Ub>>2]+ +g[Vb>>2];g[c[l>>2]>>2]=+g[K>>2]+ +g[Ga>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Ha>>2]-+g[ib>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Ob>>2]-+g[Tb>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Ob>>2]+ +g[Tb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Ha>>2]+ +g[ib>>2];g[Pc>>2]=+g[dc>>2]+ +g[Oc>>2];g[mc>>2]=+g[_c>>2]+ +g[lc>>2];g[nc>>2]=+g[Pc>>2]+ +g[mc>>2];g[Va>>2]=+g[Pc>>2]-+g[mc>>2];g[_a>>2]=+g[Bb>>2]+ +g[Ab>>2];g[db>>2]=+g[$a>>2]+ +g[cb>>2];g[eb>>2]=+g[_a>>2]+ +g[db>>2];g[gb>>2]=+g[db>>2]-+g[_a>>2];g[v>>2]=+g[yc>>2]+ +g[u>>2];g[qa>>2]=+g[ea>>2]+ +g[pa>>2];g[ra>>2]=+g[v>>2]+ +g[qa>>2];g[fb>>2]=+g[qa>>2]-+g[v>>2];g[Wa>>2]=+g[Ma>>2]+ +g[Na>>2];g[Xa>>2]=+g[Fb>>2]+ +g[Gb>>2];g[Ya>>2]=+g[Wa>>2]-+g[Xa>>2];g[Za>>2]=+g[Xa>>2]+ +g[Wa>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[nc>>2]-+g[ra>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[fb>>2]-+g[gb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[fb>>2]+ +g[gb>>2];g[c[k>>2]>>2]=+g[nc>>2]+ +g[ra>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Va>>2]-+g[Ya>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Za>>2]-+g[eb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Za>>2]+ +g[eb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Va>>2]+ +g[Ya>>2];g[jb>>2]=+g[sa>>2]+ +g[va>>2];g[mb>>2]=(+g[kb>>2]+ +g[lb>>2])*.7071067690849304;g[nb>>2]=+g[jb>>2]+ +g[mb>>2];g[vb>>2]=+g[jb>>2]-+g[mb>>2];g[Xb>>2]=(+g[I>>2]-+g[D>>2])*.7071067690849304;g[Yb>>2]=+g[Rb>>2]-+g[Qb>>2];g[Zb>>2]=+g[Xb>>2]+ +g[Yb>>2];g[$b>>2]=+g[Yb>>2]-+g[Xb>>2];g[qb>>2]=+g[ob>>2]*.3826834261417389+ +g[pb>>2]*.9238795042037964;g[tb>>2]=+g[rb>>2]*.9238795042037964-+g[sb>>2]*.3826834261417389;g[ub>>2]=+g[qb>>2]+ +g[tb>>2];g[_b>>2]=+g[tb>>2]-+g[qb>>2];g[wb>>2]=+g[pb>>2]*.3826834261417389-+g[ob>>2]*.9238795042037964;g[xb>>2]=+g[sb>>2]*.9238795042037964+ +g[rb>>2]*.3826834261417389;g[yb>>2]=+g[wb>>2]+ +g[xb>>2];g[Wb>>2]=+g[xb>>2]-+g[wb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[nb>>2]-+g[ub>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[_b>>2]-+g[$b>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[_b>>2]+ +g[$b>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[nb>>2]+ +g[ub>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[vb>>2]-+g[yb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Wb>>2]-+g[Zb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Wb>>2]+ +g[Zb>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[vb>>2]+ +g[yb>>2];g[zb>>2]=+g[dc>>2]-+g[Oc>>2];g[Cb>>2]=+g[Ab>>2]-+g[Bb>>2];g[Db>>2]=+g[zb>>2]-+g[Cb>>2];g[Ra>>2]=+g[zb>>2]+ +g[Cb>>2];g[Jb>>2]=+g[_c>>2]-+g[lc>>2];g[Kb>>2]=+g[cb>>2]-+g[$a>>2];g[Lb>>2]=+g[Jb>>2]+ +g[Kb>>2];g[Nb>>2]=+g[Kb>>2]-+g[Jb>>2];g[Ka>>2]=+g[Eb>>2]+ +g[Hb>>2];g[Pa>>2]=+g[La>>2]-+g[Oa>>2];g[Qa>>2]=(+g[Ka>>2]+ +g[Pa>>2])*.7071067690849304;g[hb>>2]=(+g[Pa>>2]-+g[Ka>>2])*.7071067690849304;g[Sa>>2]=+g[Eb>>2]-+g[Hb>>2];g[Ta>>2]=+g[La>>2]+ +g[Oa>>2];g[Ua>>2]=(+g[Sa>>2]+ +g[Ta>>2])*.7071067690849304;g[Mb>>2]=(+g[Ta>>2]-+g[Sa>>2])*.7071067690849304;g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Db>>2]-+g[Qa>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Mb>>2]-+g[Nb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Mb>>2]+ +g[Nb>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Db>>2]+ +g[Qa>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Ra>>2]-+g[Ua>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[hb>>2]-+g[Lb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[hb>>2]+ +g[Lb>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[Ra>>2]+ +g[Ua>>2];c[ed>>2]=(c[ed>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+120;c[n>>2]=c[n>>2]^c[2998]}i=fd;return}function Jr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,37,5080);i=b;return}function Kr(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0;Fe=i;i=i+1184|0;k=Fe+1180|0;l=Fe+1176|0;m=Fe+1172|0;n=Fe+1168|0;Ge=Fe+1164|0;o=Fe+1160|0;p=Fe+1156|0;Ee=Fe+1136|0;ne=Fe+1132|0;S=Fe+1128|0;kd=Fe+1124|0;td=Fe+1120|0;yb=Fe+1116|0;Qb=Fe+1112|0;hc=Fe+1108|0;Uc=Fe+1104|0;qa=Fe+1100|0;P=Fe+1096|0;Q=Fe+1092|0;zc=Fe+1088|0;Cc=Fe+1084|0;ac=Fe+1080|0;Wb=Fe+1076|0;Xb=Fe+1072|0;Wc=Fe+1068|0;W=Fe+1064|0;X=Fe+1060|0;Y=Fe+1056|0;mb=Fe+1052|0;rb=Fe+1048|0;sb=Fe+1044|0;eb=Fe+1040|0;fb=Fe+1036|0;gd=Fe+1032|0;Mb=Fe+1028|0;Nb=Fe+1024|0;Ob=Fe+1020|0;Ta=Fe+1016|0;Ya=Fe+1012|0;rd=Fe+1008|0;Md=Fe+1004|0;u=Fe+1e3|0;v=Fe+996|0;Gc=Fe+992|0;Jc=Fe+988|0;bc=Fe+984|0;Tb=Fe+980|0;Ub=Fe+976|0;Vc=Fe+972|0;T=Fe+968|0;U=Fe+964|0;V=Fe+960|0;Da=Fe+956|0;Ia=Fe+952|0;Ja=Fe+948|0;bb=Fe+944|0;cb=Fe+940|0;fd=Fe+936|0;Jb=Fe+932|0;Kb=Fe+928|0;Lb=Fe+924|0;Gb=Fe+920|0;Na=Fe+916|0;qd=Fe+912|0;q=Fe+908|0;ec=Fe+904|0;Cd=Fe+900|0;dc=Fe+896|0;ge=Fe+892|0;vb=Fe+888|0;le=Fe+884|0;wb=Fe+880|0;Ib=Fe+876|0;Bd=Fe+872|0;za=Fe+868|0;Rc=Fe+864|0;Fd=Fe+860|0;fe=Fe+856|0;Ed=Fe+852|0;ee=Fe+848|0;ie=Fe+844|0;ke=Fe+840|0;he=Fe+836|0;je=Fe+832|0;Dd=Fe+828|0;me=Fe+824|0;id=Fe+820|0;jd=Fe+816|0;ub=Fe+812|0;xb=Fe+808|0;fc=Fe+804|0;gc=Fe+800|0;ea=Fe+796|0;xc=Fe+792|0;ib=Fe+788|0;Ra=Fe+784|0;O=Fe+780|0;Ac=Fe+776|0;qb=Fe+772|0;Xa=Fe+768|0;pa=Fe+764|0;yc=Fe+760|0;lb=Fe+756|0;Sa=Fe+752|0;D=Fe+748|0;Bc=Fe+744|0;nb=Fe+740|0;Wa=Fe+736|0;A=Fe+732|0;Pa=Fe+728|0;da=Fe+724|0;Qa=Fe+720|0;x=Fe+716|0;z=Fe+712|0;w=Fe+708|0;y=Fe+704|0;aa=Fe+700|0;ca=Fe+696|0;$=Fe+692|0;ba=Fe+688|0;I=Fe+684|0;ob=Fe+680|0;N=Fe+676|0;pb=Fe+672|0;F=Fe+668|0;H=Fe+664|0;E=Fe+660|0;G=Fe+656|0;K=Fe+652|0;M=Fe+648|0;J=Fe+644|0;L=Fe+640|0;ja=Fe+636|0;jb=Fe+632|0;oa=Fe+628|0;kb=Fe+624|0;ga=Fe+620|0;ia=Fe+616|0;fa=Fe+612|0;ha=Fe+608|0;la=Fe+604|0;na=Fe+600|0;ka=Fe+596|0;ma=Fe+592|0;va=Fe+588|0;Ua=Fe+584|0;C=Fe+580|0;Va=Fe+576|0;sa=Fe+572|0;ua=Fe+568|0;ra=Fe+564|0;ta=Fe+560|0;xa=Fe+556|0;B=Fe+552|0;wa=Fe+548|0;ya=Fe+544|0;ye=Fe+540|0;Ec=Fe+536|0;_=Fe+532|0;Eb=Fe+528|0;t=Fe+524|0;Ic=Fe+520|0;Ha=Fe+516|0;Hb=Fe+512|0;Ld=Fe+508|0;Fc=Fe+504|0;Ca=Fe+500|0;Fb=Fe+496|0;Xd=Fe+492|0;Hc=Fe+488|0;Ea=Fe+484|0;Ma=Fe+480|0;se=Fe+476|0;Cb=Fe+472|0;xe=Fe+468|0;Db=Fe+464|0;pe=Fe+460|0;re=Fe+456|0;oe=Fe+452|0;qe=Fe+448|0;ue=Fe+444|0;we=Fe+440|0;te=Fe+436|0;ve=Fe+432|0;ae=Fe+428|0;Fa=Fe+424|0;s=Fe+420|0;Ga=Fe+416|0;Zd=Fe+412|0;$d=Fe+408|0;Yd=Fe+404|0;_d=Fe+400|0;ce=Fe+396|0;r=Fe+392|0;be=Fe+388|0;de=Fe+384|0;De=Fe+380|0;Aa=Fe+376|0;Kd=Fe+372|0;Ba=Fe+368|0;Ae=Fe+364|0;Ce=Fe+360|0;ze=Fe+356|0;Be=Fe+352|0;Hd=Fe+348|0;Jd=Fe+344|0;Gd=Fe+340|0;Id=Fe+336|0;Rd=Fe+332|0;Ka=Fe+328|0;Wd=Fe+324|0;La=Fe+320|0;Od=Fe+316|0;Qd=Fe+312|0;Nd=Fe+308|0;Pd=Fe+304|0;Td=Fe+300|0;Vd=Fe+296|0;Sd=Fe+292|0;Ud=Fe+288|0;vc=Fe+284|0;R=Fe+280|0;uc=Fe+276|0;Lc=Fe+272|0;Nc=Fe+268|0;Dc=Fe+264|0;Kc=Fe+260|0;Mc=Fe+256|0;wc=Fe+252|0;Pb=Fe+248|0;Rb=Fe+244|0;Sb=Fe+240|0;hb=Fe+236|0;tc=Fe+232|0;db=Fe+228|0;gb=Fe+224|0;sc=Fe+220|0;rc=Fe+216|0;Oc=Fe+212|0;Z=Fe+208|0;Pc=Fe+204|0;Zb=Fe+200|0;$b=Fe+196|0;Vb=Fe+192|0;Yb=Fe+188|0;_b=Fe+184|0;Qc=Fe+180|0;tb=Fe+176|0;zb=Fe+172|0;Ab=Fe+168|0;_a=Fe+164|0;$a=Fe+160|0;Oa=Fe+156|0;Za=Fe+152|0;ab=Fe+148|0;Bb=Fe+144|0;lc=Fe+140|0;cc=Fe+136|0;mc=Fe+132|0;kc=Fe+128|0;oc=Fe+124|0;ic=Fe+120|0;jc=Fe+116|0;pc=Fe+112|0;nc=Fe+108|0;hd=Fe+104|0;ld=Fe+100|0;md=Fe+96|0;ed=Fe+92|0;od=Fe+88|0;cd=Fe+84|0;dd=Fe+80|0;pd=Fe+76|0;nd=Fe+72|0;Zc=Fe+68|0;Xc=Fe+64|0;Yc=Fe+60|0;Tc=Fe+56|0;$c=Fe+52|0;qc=Fe+48|0;Sc=Fe+44|0;bd=Fe+40|0;_c=Fe+36|0;xd=Fe+32|0;sd=Fe+28|0;yd=Fe+24|0;wd=Fe+20|0;Ad=Fe+16|0;ud=Fe+12|0;vd=Fe+8|0;ad=Fe+4|0;zd=Fe;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Ge>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Fe+1152>>2]=.5877852439880371;g[Fe+1148>>2]=.9510565400123596;g[Fe+1144>>2]=.25;g[Fe+1140>>2]=.55901700258255;c[Ee>>2]=c[Ge>>2];c[m>>2]=(c[m>>2]|0)+(((c[Ge>>2]|0)-1|0)*38<<2);while(1){if((c[Ee>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[ec>>2]=+g[c[l>>2]>>2];g[Ib>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Bd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[za>>2]=+g[(c[m>>2]|0)+72>>2];g[Rc>>2]=+g[(c[m>>2]|0)+76>>2];g[Cd>>2]=+g[za>>2]*+g[Ib>>2]+ +g[Rc>>2]*+g[Bd>>2];g[dc>>2]=+g[za>>2]*+g[Bd>>2]-+g[Rc>>2]*+g[Ib>>2];g[Fd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[fe>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Ed>>2]=+g[(c[m>>2]|0)+32>>2];g[ee>>2]=+g[(c[m>>2]|0)+36>>2];g[ge>>2]=+g[Ed>>2]*+g[Fd>>2]+ +g[ee>>2]*+g[fe>>2];g[vb>>2]=+g[Ed>>2]*+g[fe>>2]-+g[ee>>2]*+g[Fd>>2];g[ie>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[ke>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[he>>2]=+g[(c[m>>2]|0)+112>>2];g[je>>2]=+g[(c[m>>2]|0)+116>>2];g[le>>2]=+g[he>>2]*+g[ie>>2]+ +g[je>>2]*+g[ke>>2];g[wb>>2]=+g[he>>2]*+g[ke>>2]-+g[je>>2]*+g[ie>>2];g[Dd>>2]=+g[q>>2]+ +g[Cd>>2];g[me>>2]=+g[ge>>2]+ +g[le>>2];g[ne>>2]=+g[Dd>>2]-+g[me>>2];g[S>>2]=+g[Dd>>2]+ +g[me>>2];g[id>>2]=+g[ec>>2]-+g[dc>>2];g[jd>>2]=+g[ge>>2]-+g[le>>2];g[kd>>2]=+g[id>>2]-+g[jd>>2];g[td>>2]=+g[jd>>2]+ +g[id>>2];g[ub>>2]=+g[q>>2]-+g[Cd>>2];g[xb>>2]=+g[vb>>2]-+g[wb>>2];g[yb>>2]=+g[ub>>2]-+g[xb>>2];g[Qb>>2]=+g[ub>>2]+ +g[xb>>2];g[fc>>2]=+g[dc>>2]+ +g[ec>>2];g[gc>>2]=+g[vb>>2]+ +g[wb>>2];g[hc>>2]=+g[fc>>2]-+g[gc>>2];g[Uc>>2]=+g[gc>>2]+ +g[fc>>2];g[x>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[z>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[w>>2]=+g[(c[m>>2]|0)+56>>2];g[y>>2]=+g[(c[m>>2]|0)+60>>2];g[A>>2]=+g[w>>2]*+g[x>>2]+ +g[y>>2]*+g[z>>2];g[Pa>>2]=+g[w>>2]*+g[z>>2]-+g[y>>2]*+g[x>>2];g[aa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[ca>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[$>>2]=+g[(c[m>>2]|0)+136>>2];g[ba>>2]=+g[(c[m>>2]|0)+140>>2];g[da>>2]=+g[$>>2]*+g[aa>>2]+ +g[ba>>2]*+g[ca>>2];g[Qa>>2]=+g[$>>2]*+g[ca>>2]-+g[ba>>2]*+g[aa>>2];g[ea>>2]=+g[A>>2]+ +g[da>>2];g[xc>>2]=+g[Pa>>2]+ +g[Qa>>2];g[ib>>2]=+g[A>>2]-+g[da>>2];g[Ra>>2]=+g[Pa>>2]-+g[Qa>>2];g[F>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[H>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[E>>2]=+g[(c[m>>2]|0)+128>>2];g[G>>2]=+g[(c[m>>2]|0)+132>>2];g[I>>2]=+g[E>>2]*+g[F>>2]+ +g[G>>2]*+g[H>>2];g[ob>>2]=+g[E>>2]*+g[H>>2]-+g[G>>2]*+g[F>>2];g[K>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[M>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[J>>2]=+g[(c[m>>2]|0)+48>>2];g[L>>2]=+g[(c[m>>2]|0)+52>>2];g[N>>2]=+g[J>>2]*+g[K>>2]+ +g[L>>2]*+g[M>>2];g[pb>>2]=+g[J>>2]*+g[M>>2]-+g[L>>2]*+g[K>>2];g[O>>2]=+g[I>>2]+ +g[N>>2];g[Ac>>2]=+g[ob>>2]+ +g[pb>>2];g[qb>>2]=+g[ob>>2]-+g[pb>>2];g[Xa>>2]=+g[I>>2]-+g[N>>2];g[ga>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[ia>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[fa>>2]=+g[(c[m>>2]|0)+96>>2];g[ha>>2]=+g[(c[m>>2]|0)+100>>2];g[ja>>2]=+g[fa>>2]*+g[ga>>2]+ +g[ha>>2]*+g[ia>>2];g[jb>>2]=+g[fa>>2]*+g[ia>>2]-+g[ha>>2]*+g[ga>>2];g[la>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[na>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ka>>2]=+g[(c[m>>2]|0)+16>>2];g[ma>>2]=+g[(c[m>>2]|0)+20>>2];g[oa>>2]=+g[ka>>2]*+g[la>>2]+ +g[ma>>2]*+g[na>>2];g[kb>>2]=+g[ka>>2]*+g[na>>2]-+g[ma>>2]*+g[la>>2];g[pa>>2]=+g[ja>>2]+ +g[oa>>2];g[yc>>2]=+g[jb>>2]+ +g[kb>>2];g[lb>>2]=+g[jb>>2]-+g[kb>>2];g[Sa>>2]=+g[ja>>2]-+g[oa>>2];g[sa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[ua>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[ra>>2]=+g[(c[m>>2]|0)+88>>2];g[ta>>2]=+g[(c[m>>2]|0)+92>>2];g[va>>2]=+g[ra>>2]*+g[sa>>2]+ +g[ta>>2]*+g[ua>>2];g[Ua>>2]=+g[ra>>2]*+g[ua>>2]-+g[ta>>2]*+g[sa>>2];g[xa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[B>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[wa>>2]=+g[(c[m>>2]|0)+8>>2];g[ya>>2]=+g[(c[m>>2]|0)+12>>2];g[C>>2]=+g[wa>>2]*+g[xa>>2]+ +g[ya>>2]*+g[B>>2];g[Va>>2]=+g[wa>>2]*+g[B>>2]-+g[ya>>2]*+g[xa>>2];g[D>>2]=+g[va>>2]+ +g[C>>2];g[Bc>>2]=+g[Ua>>2]+ +g[Va>>2];g[nb>>2]=+g[va>>2]-+g[C>>2];g[Wa>>2]=+g[Ua>>2]-+g[Va>>2];g[qa>>2]=+g[ea>>2]-+g[pa>>2];g[P>>2]=+g[D>>2]-+g[O>>2];g[Q>>2]=+g[qa>>2]+ +g[P>>2];g[zc>>2]=+g[xc>>2]-+g[yc>>2];g[Cc>>2]=+g[Ac>>2]-+g[Bc>>2];g[ac>>2]=+g[Cc>>2]-+g[zc>>2];g[Wb>>2]=+g[xc>>2]+ +g[yc>>2];g[Xb>>2]=+g[Bc>>2]+ +g[Ac>>2];g[Wc>>2]=+g[Wb>>2]+ +g[Xb>>2];g[W>>2]=+g[ea>>2]+ +g[pa>>2];g[X>>2]=+g[D>>2]+ +g[O>>2];g[Y>>2]=+g[W>>2]+ +g[X>>2];g[mb>>2]=+g[ib>>2]-+g[lb>>2];g[rb>>2]=+g[nb>>2]-+g[qb>>2];g[sb>>2]=+g[mb>>2]+ +g[rb>>2];g[eb>>2]=+g[Ra>>2]-+g[Sa>>2];g[fb>>2]=+g[Wa>>2]-+g[Xa>>2];g[gd>>2]=+g[eb>>2]+ +g[fb>>2];g[Mb>>2]=+g[ib>>2]+ +g[lb>>2];g[Nb>>2]=+g[nb>>2]+ +g[qb>>2];g[Ob>>2]=+g[Mb>>2]+ +g[Nb>>2];g[Ta>>2]=+g[Ra>>2]+ +g[Sa>>2];g[Ya>>2]=+g[Wa>>2]+ +g[Xa>>2];g[rd>>2]=+g[Ta>>2]+ +g[Ya>>2];g[pe>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[re>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[oe>>2]=+g[(c[m>>2]|0)+24>>2];g[qe>>2]=+g[(c[m>>2]|0)+28>>2];g[se>>2]=+g[oe>>2]*+g[pe>>2]+ +g[qe>>2]*+g[re>>2];g[Cb>>2]=+g[oe>>2]*+g[re>>2]-+g[qe>>2]*+g[pe>>2];g[ue>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[we>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[te>>2]=+g[(c[m>>2]|0)+104>>2];g[ve>>2]=+g[(c[m>>2]|0)+108>>2];g[xe>>2]=+g[te>>2]*+g[ue>>2]+ +g[ve>>2]*+g[we>>2];g[Db>>2]=+g[te>>2]*+g[we>>2]-+g[ve>>2]*+g[ue>>2];g[ye>>2]=+g[se>>2]+ +g[xe>>2];g[Ec>>2]=+g[Cb>>2]+ +g[Db>>2];g[_>>2]=+g[se>>2]-+g[xe>>2];g[Eb>>2]=+g[Cb>>2]-+g[Db>>2];g[Zd>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[$d>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Yd>>2]=+g[c[m>>2]>>2];g[_d>>2]=+g[(c[m>>2]|0)+4>>2];g[ae>>2]=+g[Yd>>2]*+g[Zd>>2]+ +g[_d>>2]*+g[$d>>2];g[Fa>>2]=+g[Yd>>2]*+g[$d>>2]-+g[_d>>2]*+g[Zd>>2];g[ce>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[r>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[be>>2]=+g[(c[m>>2]|0)+80>>2];g[de>>2]=+g[(c[m>>2]|0)+84>>2];g[s>>2]=+g[be>>2]*+g[ce>>2]+ +g[de>>2]*+g[r>>2];g[Ga>>2]=+g[be>>2]*+g[r>>2]-+g[de>>2]*+g[ce>>2];g[t>>2]=+g[ae>>2]+ +g[s>>2];g[Ic>>2]=+g[Fa>>2]+ +g[Ga>>2];g[Ha>>2]=+g[Fa>>2]-+g[Ga>>2];g[Hb>>2]=+g[s>>2]-+g[ae>>2];g[Ae>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Ce>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[ze>>2]=+g[(c[m>>2]|0)+64>>2];g[Be>>2]=+g[(c[m>>2]|0)+68>>2];g[De>>2]=+g[ze>>2]*+g[Ae>>2]+ +g[Be>>2]*+g[Ce>>2];g[Aa>>2]=+g[ze>>2]*+g[Ce>>2]-+g[Be>>2]*+g[Ae>>2];g[Hd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Jd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Gd>>2]=+g[(c[m>>2]|0)+144>>2];g[Id>>2]=+g[(c[m>>2]|0)+148>>2];g[Kd>>2]=+g[Gd>>2]*+g[Hd>>2]+ +g[Id>>2]*+g[Jd>>2];g[Ba>>2]=+g[Gd>>2]*+g[Jd>>2]-+g[Id>>2]*+g[Hd>>2];g[Ld>>2]=+g[De>>2]+ +g[Kd>>2];g[Fc>>2]=+g[Aa>>2]+ +g[Ba>>2];g[Ca>>2]=+g[Aa>>2]-+g[Ba>>2];g[Fb>>2]=+g[De>>2]-+g[Kd>>2];g[Od>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[Qd>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[Nd>>2]=+g[(c[m>>2]|0)+120>>2];g[Pd>>2]=+g[(c[m>>2]|0)+124>>2];g[Rd>>2]=+g[Nd>>2]*+g[Od>>2]+ +g[Pd>>2]*+g[Qd>>2];g[Ka>>2]=+g[Nd>>2]*+g[Qd>>2]-+g[Pd>>2]*+g[Od>>2];g[Td>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Vd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Sd>>2]=+g[(c[m>>2]|0)+40>>2];g[Ud>>2]=+g[(c[m>>2]|0)+44>>2];g[Wd>>2]=+g[Sd>>2]*+g[Td>>2]+ +g[Ud>>2]*+g[Vd>>2];g[La>>2]=+g[Sd>>2]*+g[Vd>>2]-+g[Ud>>2]*+g[Td>>2];g[Xd>>2]=+g[Rd>>2]+ +g[Wd>>2];g[Hc>>2]=+g[Ka>>2]+ +g[La>>2];g[Ea>>2]=+g[Rd>>2]-+g[Wd>>2];g[Ma>>2]=+g[Ka>>2]-+g[La>>2];g[Md>>2]=+g[ye>>2]-+g[Ld>>2];g[u>>2]=+g[Xd>>2]-+g[t>>2];g[v>>2]=+g[Md>>2]+ +g[u>>2];g[Gc>>2]=+g[Ec>>2]-+g[Fc>>2];g[Jc>>2]=+g[Hc>>2]-+g[Ic>>2];g[bc>>2]=+g[Gc>>2]+ +g[Jc>>2];g[Tb>>2]=+g[Ec>>2]+ +g[Fc>>2];g[Ub>>2]=+g[Hc>>2]+ +g[Ic>>2];g[Vc>>2]=+g[Tb>>2]+ +g[Ub>>2];g[T>>2]=+g[ye>>2]+ +g[Ld>>2];g[U>>2]=+g[Xd>>2]+ +g[t>>2];g[V>>2]=+g[T>>2]+ +g[U>>2];g[Da>>2]=+g[_>>2]-+g[Ca>>2];g[Ia>>2]=+g[Ea>>2]-+g[Ha>>2];g[Ja>>2]=+g[Da>>2]+ +g[Ia>>2];g[bb>>2]=+g[Eb>>2]-+g[Fb>>2];g[cb>>2]=+g[Ma>>2]+ +g[Hb>>2];g[fd>>2]=+g[bb>>2]+ +g[cb>>2];g[Jb>>2]=+g[_>>2]+ +g[Ca>>2];g[Kb>>2]=+g[Ea>>2]+ +g[Ha>>2];g[Lb>>2]=+g[Jb>>2]+ +g[Kb>>2];g[Gb>>2]=+g[Eb>>2]+ +g[Fb>>2];g[Na>>2]=+g[Hb>>2]-+g[Ma>>2];g[qd>>2]=+g[Na>>2]-+g[Gb>>2];g[vc>>2]=(+g[v>>2]-+g[Q>>2])*.55901700258255;g[R>>2]=+g[v>>2]+ +g[Q>>2];g[uc>>2]=+g[ne>>2]-+g[R>>2]*.25;g[Dc>>2]=+g[zc>>2]+ +g[Cc>>2];g[Kc>>2]=+g[Gc>>2]-+g[Jc>>2];g[Lc>>2]=+g[Dc>>2]*.9510565400123596-+g[Kc>>2]*.5877852439880371;g[Nc>>2]=+g[Kc>>2]*.9510565400123596+ +g[Dc>>2]*.5877852439880371;g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[ne>>2]+ +g[R>>2];g[Mc>>2]=+g[vc>>2]+ +g[uc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Mc>>2]-+g[Nc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Mc>>2]+ +g[Nc>>2];g[wc>>2]=+g[uc>>2]-+g[vc>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[wc>>2]-+g[Lc>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[wc>>2]+ +g[Lc>>2];g[Pb>>2]=(+g[Lb>>2]-+g[Ob>>2])*.55901700258255;g[Rb>>2]=+g[Lb>>2]+ +g[Ob>>2];g[Sb>>2]=+g[Qb>>2]-+g[Rb>>2]*.25;g[db>>2]=+g[bb>>2]-+g[cb>>2];g[gb>>2]=+g[eb>>2]-+g[fb>>2];g[hb>>2]=+g[db>>2]*.9510565400123596+ +g[gb>>2]*.5877852439880371;g[tc>>2]=+g[gb>>2]*.9510565400123596-+g[db>>2]*.5877852439880371;g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Qb>>2]+ +g[Rb>>2];g[sc>>2]=+g[Sb>>2]-+g[Pb>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[sc>>2]-+g[tc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[tc>>2]+ +g[sc>>2];g[rc>>2]=+g[Pb>>2]+ +g[Sb>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[hb>>2]+ +g[rc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[rc>>2]-+g[hb>>2];g[Oc>>2]=(+g[V>>2]-+g[Y>>2])*.55901700258255;g[Z>>2]=+g[V>>2]+ +g[Y>>2];g[Pc>>2]=+g[S>>2]-+g[Z>>2]*.25;g[Vb>>2]=+g[Tb>>2]-+g[Ub>>2];g[Yb>>2]=+g[Wb>>2]-+g[Xb>>2];g[Zb>>2]=+g[Vb>>2]*.9510565400123596+ +g[Yb>>2]*.5877852439880371;g[$b>>2]=+g[Yb>>2]*.9510565400123596-+g[Vb>>2]*.5877852439880371;g[c[k>>2]>>2]=+g[S>>2]+ +g[Z>>2];g[_b>>2]=+g[Pc>>2]-+g[Oc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[_b>>2]-+g[$b>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[_b>>2]+ +g[$b>>2];g[Qc>>2]=+g[Oc>>2]+ +g[Pc>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Qc>>2]-+g[Zb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Qc>>2]+ +g[Zb>>2];g[tb>>2]=(+g[Ja>>2]-+g[sb>>2])*.55901700258255;g[zb>>2]=+g[Ja>>2]+ +g[sb>>2];g[Ab>>2]=+g[yb>>2]-+g[zb>>2]*.25;g[Oa>>2]=+g[Gb>>2]+ +g[Na>>2];g[Za>>2]=+g[Ta>>2]-+g[Ya>>2];g[_a>>2]=+g[Oa>>2]*.9510565400123596+ +g[Za>>2]*.5877852439880371;g[$a>>2]=+g[Za>>2]*.9510565400123596-+g[Oa>>2]*.5877852439880371;g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[yb>>2]+ +g[zb>>2];g[ab>>2]=+g[Ab>>2]-+g[tb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[$a>>2]+ +g[ab>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[ab>>2]-+g[$a>>2];g[Bb>>2]=+g[tb>>2]+ +g[Ab>>2];g[c[l>>2]>>2]=+g[Bb>>2]-+g[_a>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[_a>>2]+ +g[Bb>>2];g[lc>>2]=(+g[bc>>2]+ +g[ac>>2])*.55901700258255;g[cc>>2]=+g[ac>>2]-+g[bc>>2];g[mc>>2]=+g[cc>>2]*.25+ +g[hc>>2];g[ic>>2]=+g[u>>2]-+g[Md>>2];g[jc>>2]=+g[qa>>2]-+g[P>>2];g[kc>>2]=+g[ic>>2]*.9510565400123596-+g[jc>>2]*.5877852439880371;g[oc>>2]=+g[ic>>2]*.5877852439880371+ +g[jc>>2]*.9510565400123596;g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[cc>>2]-+g[hc>>2];g[pc>>2]=+g[mc>>2]-+g[lc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[oc>>2]-+g[pc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[oc>>2]+ +g[pc>>2];g[nc>>2]=+g[lc>>2]+ +g[mc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[kc>>2]-+g[nc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[kc>>2]+ +g[nc>>2];g[hd>>2]=(+g[fd>>2]-+g[gd>>2])*.55901700258255;g[ld>>2]=+g[fd>>2]+ +g[gd>>2];g[md>>2]=+g[kd>>2]-+g[ld>>2]*.25;g[cd>>2]=+g[Mb>>2]-+g[Nb>>2];g[dd>>2]=+g[Jb>>2]-+g[Kb>>2];g[ed>>2]=+g[cd>>2]*.9510565400123596-+g[dd>>2]*.5877852439880371;g[od>>2]=+g[dd>>2]*.9510565400123596+ +g[cd>>2]*.5877852439880371;g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[ld>>2]+ +g[kd>>2];g[pd>>2]=+g[hd>>2]+ +g[md>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[od>>2]+ +g[pd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[pd>>2]-+g[od>>2];g[nd>>2]=+g[hd>>2]-+g[md>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[ed>>2]+ +g[nd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[nd>>2]-+g[ed>>2];g[Zc>>2]=(+g[Vc>>2]-+g[Wc>>2])*.55901700258255;g[Xc>>2]=+g[Vc>>2]+ +g[Wc>>2];g[Yc>>2]=+g[Uc>>2]-+g[Xc>>2]*.25;g[qc>>2]=+g[T>>2]-+g[U>>2];g[Sc>>2]=+g[W>>2]-+g[X>>2];g[Tc>>2]=+g[qc>>2]*.5877852439880371-+g[Sc>>2]*.9510565400123596;g[$c>>2]=+g[qc>>2]*.9510565400123596+ +g[Sc>>2]*.5877852439880371;g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Xc>>2]+ +g[Uc>>2];g[bd>>2]=+g[Zc>>2]+ +g[Yc>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[$c>>2]-+g[bd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[$c>>2]+ +g[bd>>2];g[_c>>2]=+g[Yc>>2]-+g[Zc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Tc>>2]-+g[_c>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Tc>>2]+ +g[_c>>2];g[xd>>2]=(+g[qd>>2]+ +g[rd>>2])*.55901700258255;g[sd>>2]=+g[qd>>2]-+g[rd>>2];g[yd>>2]=+g[sd>>2]*.25+ +g[td>>2];g[ud>>2]=+g[Da>>2]-+g[Ia>>2];g[vd>>2]=+g[mb>>2]-+g[rb>>2];g[wd>>2]=+g[ud>>2]*.9510565400123596+ +g[vd>>2]*.5877852439880371;g[Ad>>2]=+g[vd>>2]*.9510565400123596-+g[ud>>2]*.5877852439880371;g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[sd>>2]-+g[td>>2];g[ad>>2]=+g[xd>>2]+ +g[yd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Ad>>2]+ +g[ad>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[ad>>2]-+g[Ad>>2];g[zd>>2]=+g[xd>>2]-+g[yd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[wd>>2]+ +g[zd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[zd>>2]-+g[wd>>2];c[Ee>>2]=(c[Ee>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+152;c[n>>2]=c[n>>2]^c[2998]}i=Fe;return}function Lr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,38,5128);i=b;return}function Mr(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0;Ih=i;i=i+1904|0;k=Ih+1900|0;l=Ih+1896|0;m=Ih+1892|0;n=Ih+1888|0;Jh=Ih+1884|0;o=Ih+1880|0;p=Ih+1876|0;Hh=Ih+1792|0;q=Ih+1788|0;Uf=Ih+1784|0;tb=Ih+1780|0;Zf=Ih+1776|0;vh=Ih+1772|0;ub=Ih+1768|0;Xf=Ih+1764|0;Yf=Ih+1760|0;Tf=Ih+1756|0;Af=Ih+1752|0;Cb=Ih+1748|0;id=Ih+1744|0;qb=Ih+1740|0;jf=Ih+1736|0;jc=Ih+1732|0;me=Ih+1728|0;Rd=Ih+1724|0;he=Ih+1720|0;qc=Ih+1716|0;le=Ih+1712|0;Qd=Ih+1708|0;Ke=Ih+1704|0;$g=Ih+1700|0;Fe=Ih+1696|0;Qa=Ih+1692|0;$e=Ih+1688|0;Gd=Ih+1684|0;md=Ih+1680|0;$a=Ih+1676|0;af=Ih+1672|0;Hd=Ih+1668|0;pd=Ih+1664|0;ka=Ih+1660|0;Ge=Ih+1656|0;Qb=Ih+1652|0;cf=Ih+1648|0;Kd=Ih+1644|0;td=Ih+1640|0;vc=Ih+1636|0;df=Ih+1632|0;Jd=Ih+1628|0;wd=Ih+1624|0;Q=Ih+1620|0;Ie=Ih+1616|0;Mc=Ih+1612|0;je=Ih+1608|0;Od=Ih+1604|0;de=Ih+1600|0;Vb=Ih+1596|0;gf=Ih+1592|0;Nd=Ih+1588|0;ae=Ih+1584|0;hf=Ih+1580|0;wb=Ih+1576|0;ih=Ih+1572|0;xb=Ih+1568|0;jh=Ih+1564|0;Vf=Ih+1560|0;oh=Ih+1556|0;zb=Ih+1552|0;th=Ih+1548|0;Ab=Ih+1544|0;uh=Ih+1540|0;Wf=Ih+1536|0;Ib=Ih+1532|0;_d=Ih+1528|0;za=Ih+1524|0;Rc=Ih+1520|0;Hg=Ih+1516|0;hh=Ih+1512|0;rg=Ih+1508|0;Ig=Ih+1504|0;lh=Ih+1500|0;nh=Ih+1496|0;kh=Ih+1492|0;mh=Ih+1488|0;qh=Ih+1484|0;sh=Ih+1480|0;ph=Ih+1476|0;rh=Ih+1472|0;rf=Ih+1468|0;sf=Ih+1464|0;yb=Ih+1460|0;Bb=Ih+1456|0;V=Ih+1452|0;fc=Ih+1448|0;Xb=Ih+1444|0;Yb=Ih+1440|0;oc=Ih+1436|0;nc=Ih+1432|0;ac=Ih+1428|0;dc=Ih+1424|0;gc=Ih+1420|0;Fa=Ih+1416|0;ob=Ih+1412|0;pb=Ih+1408|0;S=Ih+1404|0;U=Ih+1400|0;R=Ih+1396|0;T=Ih+1392|0;_=Ih+1388|0;_b=Ih+1384|0;nb=Ih+1380|0;cc=Ih+1376|0;Ea=Ih+1372|0;$b=Ih+1368|0;ib=Ih+1364|0;bc=Ih+1360|0;X=Ih+1356|0;Z=Ih+1352|0;W=Ih+1348|0;Y=Ih+1344|0;kb=Ih+1340|0;mb=Ih+1336|0;jb=Ih+1332|0;lb=Ih+1328|0;Ba=Ih+1324|0;Da=Ih+1320|0;Aa=Ih+1316|0;Ca=Ih+1312|0;Ha=Ih+1308|0;Ja=Ih+1304|0;Ga=Ih+1300|0;Ia=Ih+1296|0;Zb=Ih+1292|0;ge=Ih+1288|0;ic=Ih+1284|0;fe=Ih+1280|0;ec=Ih+1276|0;hc=Ih+1272|0;pc=Ih+1268|0;Je=Ih+1264|0;mc=Ih+1260|0;ie=Ih+1256|0;kc=Ih+1252|0;lc=Ih+1248|0;Bh=Ih+1244|0;Xa=Ih+1240|0;La=Ih+1236|0;Oa=Ih+1232|0;Sa=Ih+1228|0;Ra=Ih+1224|0;Ua=Ih+1220|0;Va=Ih+1216|0;Ya=Ih+1212|0;Og=Ih+1208|0;Zg=Ih+1204|0;_g=Ih+1200|0;yh=Ih+1196|0;Ah=Ih+1192|0;xh=Ih+1188|0;zh=Ih+1184|0;Gh=Ih+1180|0;Hb=Ih+1176|0;Yg=Ih+1172|0;Na=Ih+1168|0;Ng=Ih+1164|0;Ka=Ih+1160|0;Tg=Ih+1156|0;Ma=Ih+1152|0;Dh=Ih+1148|0;Fh=Ih+1144|0;Ch=Ih+1140|0;Eh=Ih+1136|0;Vg=Ih+1132|0;Xg=Ih+1128|0;Ug=Ih+1124|0;Wg=Ih+1120|0;Kg=Ih+1116|0;Mg=Ih+1112|0;Jg=Ih+1108|0;Lg=Ih+1104|0;Qg=Ih+1100|0;Sg=Ih+1096|0;Pg=Ih+1092|0;Rg=Ih+1088|0;Pa=Ih+1084|0;ld=Ih+1080|0;Gb=Ih+1076|0;kd=Ih+1072|0;Eb=Ih+1068|0;Fb=Ih+1064|0;Ta=Ih+1060|0;od=Ih+1056|0;_a=Ih+1052|0;nd=Ih+1048|0;Wa=Ih+1044|0;Za=Ih+1040|0;eh=Ih+1036|0;Mb=Ih+1032|0;bb=Ih+1028|0;cb=Ih+1024|0;tc=Ih+1020|0;sc=Ih+1016|0;gb=Ih+1012|0;Kb=Ih+1008|0;Nb=Ih+1004|0;z=Ih+1e3|0;ia=Ih+996|0;ja=Ih+992|0;bh=Ih+988|0;dh=Ih+984|0;ah=Ih+980|0;ch=Ih+976|0;t=Ih+972|0;eb=Ih+968|0;ha=Ih+964|0;Jb=Ih+960|0;y=Ih+956|0;fb=Ih+952|0;ca=Ih+948|0;hb=Ih+944|0;gh=Ih+940|0;s=Ih+936|0;fh=Ih+932|0;r=Ih+928|0;ea=Ih+924|0;ga=Ih+920|0;da=Ih+916|0;fa=Ih+912|0;v=Ih+908|0;x=Ih+904|0;u=Ih+900|0;w=Ih+896|0;$=Ih+892|0;ba=Ih+888|0;A=Ih+884|0;aa=Ih+880|0;db=Ih+876|0;sd=Ih+872|0;Pb=Ih+868|0;rd=Ih+864|0;Lb=Ih+860|0;Ob=Ih+856|0;uc=Ih+852|0;vd=Ih+848|0;rc=Ih+844|0;ud=Ih+840|0;Rb=Ih+836|0;Sb=Ih+832|0;qa=Ih+828|0;Ic=Ih+824|0;yc=Ih+820|0;zc=Ih+816|0;Tb=Ih+812|0;Qc=Ih+808|0;Dc=Ih+804|0;Gc=Ih+800|0;Jc=Ih+796|0;D=Ih+792|0;O=Ih+788|0;P=Ih+784|0;na=Ih+780|0;pa=Ih+776|0;ma=Ih+772|0;oa=Ih+768|0;va=Ih+764|0;Bc=Ih+760|0;N=Ih+756|0;Fc=Ih+752|0;C=Ih+748|0;Cc=Ih+744|0;I=Ih+740|0;Ec=Ih+736|0;sa=Ih+732|0;ua=Ih+728|0;ra=Ih+724|0;ta=Ih+720|0;K=Ih+716|0;M=Ih+712|0;J=Ih+708|0;L=Ih+704|0;xa=Ih+700|0;B=Ih+696|0;wa=Ih+692|0;ya=Ih+688|0;F=Ih+684|0;H=Ih+680|0;E=Ih+676|0;G=Ih+672|0;Ac=Ih+668|0;ce=Ih+664|0;Lc=Ih+660|0;be=Ih+656|0;Hc=Ih+652|0;Kc=Ih+648|0;Ub=Ih+644|0;$d=Ih+640|0;Pc=Ih+636|0;zd=Ih+632|0;Nc=Ih+628|0;Oc=Ih+624|0;lf=Ih+620|0;nf=Ih+616|0;wh=Ih+612|0;sb=Ih+608|0;Ce=Ih+604|0;De=Ih+600|0;mf=Ih+596|0;Ee=Ih+592|0;He=Ih+588|0;kf=Ih+584|0;la=Ih+580|0;rb=Ih+576|0;Db=Ih+572|0;Fd=Ih+568|0;Bf=Ih+564|0;Nf=Ih+560|0;Uc=Ih+556|0;Sf=Ih+552|0;Vc=Ih+548|0;Rf=Ih+544|0;ad=Ih+540|0;Gf=Ih+536|0;dd=Ih+532|0;Ef=Ih+528|0;Ud=Ih+524|0;xf=Ih+520|0;Vd=Ih+516|0;wf=Ih+512|0;_c=Ih+508|0;Of=Ih+504|0;Bd=Ih+500|0;Mf=Ih+496|0;vb=Ih+492|0;zf=Ih+488|0;ab=Ih+484|0;wc=Ih+480|0;xc=Ih+476|0;Wb=Ih+472|0;Sc=Ih+468|0;Tc=Ih+464|0;Yd=Ih+460|0;Zd=Ih+456|0;Cf=Ih+452|0;bd=Ih+448|0;cd=Ih+444|0;Df=Ih+440|0;Id=Ih+436|0;Ld=Ih+432|0;Md=Ih+428|0;Pd=Ih+424|0;Sd=Ih+420|0;Td=Ih+416|0;Yc=Ih+412|0;Zc=Ih+408|0;Kf=Ih+404|0;$c=Ih+400|0;Ad=Ih+396|0;Lf=Ih+392|0;Cd=Ih+388|0;Ed=Ih+384|0;Xc=Ih+380|0;Dd=Ih+376|0;Wc=Ih+372|0;yf=Ih+368|0;If=Ih+364|0;Hf=Ih+360|0;Jf=Ih+356|0;Ff=Ih+352|0;sg=Ih+348|0;tg=Ih+344|0;Qf=Ih+340|0;ug=Ih+336|0;Pf=Ih+332|0;ed=Ih+328|0;gd=Ih+324|0;Xd=Ih+320|0;fd=Ih+316|0;Wd=Ih+312|0;xg=Ih+308|0;Fg=Ih+304|0;yg=Ih+300|0;Bg=Ih+296|0;Cg=Ih+292|0;Dg=Ih+288|0;Gg=Ih+284|0;Eg=Ih+280|0;vg=Ih+276|0;wg=Ih+272|0;zg=Ih+268|0;Ag=Ih+264|0;jd=Ih+260|0;_e=Ih+256|0;$f=Ih+252|0;lg=Ih+248|0;Ne=Ih+244|0;qg=Ih+240|0;Oe=Ih+236|0;pg=Ih+232|0;ve=Ih+228|0;eg=Ih+224|0;ye=Ih+220|0;cg=Ih+216|0;pe=Ih+212|0;pf=Ih+208|0;qe=Ih+204|0;of=Ih+200|0;Te=Ih+196|0;mg=Ih+192|0;We=Ih+188|0;kg=Ih+184|0;hd=Ih+180|0;_f=Ih+176|0;qd=Ih+172|0;xd=Ih+168|0;yd=Ih+164|0;ee=Ih+160|0;Le=Ih+156|0;Me=Ih+152|0;te=Ih+148|0;ue=Ih+144|0;ag=Ih+140|0;we=Ih+136|0;xe=Ih+132|0;bg=Ih+128|0;bf=Ih+124|0;ef=Ih+120|0;ff=Ih+116|0;ke=Ih+112|0;ne=Ih+108|0;oe=Ih+104|0;Re=Ih+100|0;Se=Ih+96|0;ig=Ih+92|0;Ue=Ih+88|0;Ve=Ih+84|0;jg=Ih+80|0;qf=Ih+76|0;gg=Ih+72|0;fg=Ih+68|0;hg=Ih+64|0;dg=Ih+60|0;ze=Ih+56|0;Be=Ih+52|0;se=Ih+48|0;Ae=Ih+44|0;re=Ih+40|0;Xe=Ih+36|0;Ze=Ih+32|0;Qe=Ih+28|0;Ye=Ih+24|0;Pe=Ih+20|0;tf=Ih+16|0;uf=Ih+12|0;og=Ih+8|0;vf=Ih+4|0;ng=Ih;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Jh>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Ih+1872>>2]=.9980267286300659;g[Ih+1868>>2]=.06279052048921585;g[Ih+1864>>2]=.6845471262931824;g[Ih+1860>>2]=.728968620300293;g[Ih+1856>>2]=.4817536771297455;g[Ih+1852>>2]=.8763066530227661;g[Ih+1848>>2]=.24868988990783691;g[Ih+1844>>2]=.9685831665992737;g[Ih+1840>>2]=.9921147227287292;g[Ih+1836>>2]=.12533323466777802;g[Ih+1832>>2]=.4257792830467224;g[Ih+1828>>2]=.9048270583152771;g[Ih+1824>>2]=.6374239921569824;g[Ih+1820>>2]=.7705132365226746;g[Ih+1816>>2]=.8443279266357422;g[Ih+1812>>2]=.5358268022537231;g[Ih+1808>>2]=.5877852439880371;g[Ih+1804>>2]=.9510565400123596;g[Ih+1800>>2]=.25;g[Ih+1796>>2]=.55901700258255;c[Hh>>2]=c[Jh>>2];c[m>>2]=(c[m>>2]|0)+(((c[Jh>>2]|0)-1|0)*48<<2);while(1){if((c[Hh>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[Uf>>2]=+g[c[l>>2]>>2];g[Ib>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[_d>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[za>>2]=+g[(c[m>>2]|0)+32>>2];g[Rc>>2]=+g[(c[m>>2]|0)+36>>2];g[hf>>2]=+g[za>>2]*+g[Ib>>2]+ +g[Rc>>2]*+g[_d>>2];g[wb>>2]=+g[za>>2]*+g[_d>>2]-+g[Rc>>2]*+g[Ib>>2];g[Hg>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[hh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[rg>>2]=+g[(c[m>>2]|0)+152>>2];g[Ig>>2]=+g[(c[m>>2]|0)+156>>2];g[ih>>2]=+g[rg>>2]*+g[Hg>>2]+ +g[Ig>>2]*+g[hh>>2];g[xb>>2]=+g[rg>>2]*+g[hh>>2]-+g[Ig>>2]*+g[Hg>>2];g[jh>>2]=+g[hf>>2]+ +g[ih>>2];g[Vf>>2]=+g[wb>>2]+ +g[xb>>2];g[lh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[nh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[kh>>2]=+g[(c[m>>2]|0)+72>>2];g[mh>>2]=+g[(c[m>>2]|0)+76>>2];g[oh>>2]=+g[kh>>2]*+g[lh>>2]+ +g[mh>>2]*+g[nh>>2];g[zb>>2]=+g[kh>>2]*+g[nh>>2]-+g[mh>>2]*+g[lh>>2];g[qh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[sh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[ph>>2]=+g[(c[m>>2]|0)+112>>2];g[rh>>2]=+g[(c[m>>2]|0)+116>>2];g[th>>2]=+g[ph>>2]*+g[qh>>2]+ +g[rh>>2]*+g[sh>>2];g[Ab>>2]=+g[ph>>2]*+g[sh>>2]-+g[rh>>2]*+g[qh>>2];g[uh>>2]=+g[oh>>2]+ +g[th>>2];g[Wf>>2]=+g[zb>>2]+ +g[Ab>>2];g[tb>>2]=(+g[jh>>2]-+g[uh>>2])*.55901700258255;g[Zf>>2]=(+g[Vf>>2]-+g[Wf>>2])*.55901700258255;g[vh>>2]=+g[jh>>2]+ +g[uh>>2];g[ub>>2]=+g[q>>2]-+g[vh>>2]*.25;g[Xf>>2]=+g[Vf>>2]+ +g[Wf>>2];g[Yf>>2]=+g[Uf>>2]-+g[Xf>>2]*.25;g[rf>>2]=+g[oh>>2]-+g[th>>2];g[sf>>2]=+g[hf>>2]-+g[ih>>2];g[Tf>>2]=+g[rf>>2]*.9510565400123596-+g[sf>>2]*.5877852439880371;g[Af>>2]=+g[sf>>2]*.9510565400123596+ +g[rf>>2]*.5877852439880371;g[yb>>2]=+g[wb>>2]-+g[xb>>2];g[Bb>>2]=+g[zb>>2]-+g[Ab>>2];g[Cb>>2]=+g[yb>>2]*.9510565400123596+ +g[Bb>>2]*.5877852439880371;g[id>>2]=+g[Bb>>2]*.9510565400123596-+g[yb>>2]*.5877852439880371;g[S>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[U>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[R>>2]=+g[(c[m>>2]|0)+16>>2];g[T>>2]=+g[(c[m>>2]|0)+20>>2];g[V>>2]=+g[R>>2]*+g[S>>2]+ +g[T>>2]*+g[U>>2];g[fc>>2]=+g[R>>2]*+g[U>>2]-+g[T>>2]*+g[S>>2];g[X>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Z>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[W>>2]=+g[(c[m>>2]|0)+56>>2];g[Y>>2]=+g[(c[m>>2]|0)+60>>2];g[_>>2]=+g[W>>2]*+g[X>>2]+ +g[Y>>2]*+g[Z>>2];g[_b>>2]=+g[W>>2]*+g[Z>>2]-+g[Y>>2]*+g[X>>2];g[kb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[mb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[jb>>2]=+g[(c[m>>2]|0)+136>>2];g[lb>>2]=+g[(c[m>>2]|0)+140>>2];g[nb>>2]=+g[jb>>2]*+g[kb>>2]+ +g[lb>>2]*+g[mb>>2];g[cc>>2]=+g[jb>>2]*+g[mb>>2]-+g[lb>>2]*+g[kb>>2];g[Ba>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[Da>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[Aa>>2]=+g[(c[m>>2]|0)+176>>2];g[Ca>>2]=+g[(c[m>>2]|0)+180>>2];g[Ea>>2]=+g[Aa>>2]*+g[Ba>>2]+ +g[Ca>>2]*+g[Da>>2];g[$b>>2]=+g[Aa>>2]*+g[Da>>2]-+g[Ca>>2]*+g[Ba>>2];g[Ha>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Ja>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Ga>>2]=+g[(c[m>>2]|0)+96>>2];g[Ia>>2]=+g[(c[m>>2]|0)+100>>2];g[ib>>2]=+g[Ga>>2]*+g[Ha>>2]+ +g[Ia>>2]*+g[Ja>>2];g[bc>>2]=+g[Ga>>2]*+g[Ja>>2]-+g[Ia>>2]*+g[Ha>>2];g[Xb>>2]=+g[_>>2]-+g[Ea>>2];g[Yb>>2]=+g[ib>>2]-+g[nb>>2];g[oc>>2]=+g[bc>>2]-+g[cc>>2];g[nc>>2]=+g[_b>>2]-+g[$b>>2];g[ac>>2]=+g[_b>>2]+ +g[$b>>2];g[dc>>2]=+g[bc>>2]+ +g[cc>>2];g[gc>>2]=+g[ac>>2]+ +g[dc>>2];g[Fa>>2]=+g[_>>2]+ +g[Ea>>2];g[ob>>2]=+g[ib>>2]+ +g[nb>>2];g[pb>>2]=+g[Fa>>2]+ +g[ob>>2];g[qb>>2]=+g[V>>2]+ +g[pb>>2];g[jf>>2]=+g[fc>>2]+ +g[gc>>2];g[Zb>>2]=+g[Xb>>2]*.9510565400123596+ +g[Yb>>2]*.5877852439880371;g[ge>>2]=+g[Yb>>2]*.9510565400123596-+g[Xb>>2]*.5877852439880371;g[ec>>2]=(+g[ac>>2]-+g[dc>>2])*.55901700258255;g[hc>>2]=+g[fc>>2]-+g[gc>>2]*.25;g[ic>>2]=+g[ec>>2]+ +g[hc>>2];g[fe>>2]=+g[hc>>2]-+g[ec>>2];g[jc>>2]=+g[Zb>>2]+ +g[ic>>2];g[me>>2]=+g[ge>>2]+ +g[fe>>2];g[Rd>>2]=+g[ic>>2]-+g[Zb>>2];g[he>>2]=+g[fe>>2]-+g[ge>>2];g[pc>>2]=+g[nc>>2]*.9510565400123596+ +g[oc>>2]*.5877852439880371;g[Je>>2]=+g[oc>>2]*.9510565400123596-+g[nc>>2]*.5877852439880371;g[kc>>2]=(+g[Fa>>2]-+g[ob>>2])*.55901700258255;g[lc>>2]=+g[V>>2]-+g[pb>>2]*.25;g[mc>>2]=+g[kc>>2]+ +g[lc>>2];g[ie>>2]=+g[lc>>2]-+g[kc>>2];g[qc>>2]=+g[mc>>2]-+g[pc>>2];g[le>>2]=+g[ie>>2]-+g[Je>>2];g[Qd>>2]=+g[mc>>2]+ +g[pc>>2];g[Ke>>2]=+g[ie>>2]+ +g[Je>>2];g[yh>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[Ah>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[xh>>2]=+g[c[m>>2]>>2];g[zh>>2]=+g[(c[m>>2]|0)+4>>2];g[Bh>>2]=+g[xh>>2]*+g[yh>>2]+ +g[zh>>2]*+g[Ah>>2];g[Xa>>2]=+g[xh>>2]*+g[Ah>>2]-+g[zh>>2]*+g[yh>>2];g[Dh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Fh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Ch>>2]=+g[(c[m>>2]|0)+40>>2];g[Eh>>2]=+g[(c[m>>2]|0)+44>>2];g[Gh>>2]=+g[Ch>>2]*+g[Dh>>2]+ +g[Eh>>2]*+g[Fh>>2];g[Hb>>2]=+g[Ch>>2]*+g[Fh>>2]-+g[Eh>>2]*+g[Dh>>2];g[Vg>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[Xg>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[Ug>>2]=+g[(c[m>>2]|0)+120>>2];g[Wg>>2]=+g[(c[m>>2]|0)+124>>2];g[Yg>>2]=+g[Ug>>2]*+g[Vg>>2]+ +g[Wg>>2]*+g[Xg>>2];g[Na>>2]=+g[Ug>>2]*+g[Xg>>2]-+g[Wg>>2]*+g[Vg>>2];g[Kg>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[Mg>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[Jg>>2]=+g[(c[m>>2]|0)+160>>2];g[Lg>>2]=+g[(c[m>>2]|0)+164>>2];g[Ng>>2]=+g[Jg>>2]*+g[Kg>>2]+ +g[Lg>>2]*+g[Mg>>2];g[Ka>>2]=+g[Jg>>2]*+g[Mg>>2]-+g[Lg>>2]*+g[Kg>>2];g[Qg>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Sg>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Pg>>2]=+g[(c[m>>2]|0)+80>>2];g[Rg>>2]=+g[(c[m>>2]|0)+84>>2];g[Tg>>2]=+g[Pg>>2]*+g[Qg>>2]+ +g[Rg>>2]*+g[Sg>>2];g[Ma>>2]=+g[Pg>>2]*+g[Sg>>2]-+g[Rg>>2]*+g[Qg>>2];g[La>>2]=+g[Hb>>2]-+g[Ka>>2];g[Oa>>2]=+g[Ma>>2]-+g[Na>>2];g[Sa>>2]=+g[Tg>>2]-+g[Yg>>2];g[Ra>>2]=+g[Gh>>2]-+g[Ng>>2];g[Ua>>2]=+g[Hb>>2]+ +g[Ka>>2];g[Va>>2]=+g[Ma>>2]+ +g[Na>>2];g[Ya>>2]=+g[Ua>>2]+ +g[Va>>2];g[Og>>2]=+g[Gh>>2]+ +g[Ng>>2];g[Zg>>2]=+g[Tg>>2]+ +g[Yg>>2];g[_g>>2]=+g[Og>>2]+ +g[Zg>>2];g[$g>>2]=+g[Bh>>2]+ +g[_g>>2];g[Fe>>2]=+g[Xa>>2]+ +g[Ya>>2];g[Pa>>2]=+g[La>>2]*.9510565400123596+ +g[Oa>>2]*.5877852439880371;g[ld>>2]=+g[Oa>>2]*.9510565400123596-+g[La>>2]*.5877852439880371;g[Eb>>2]=(+g[Og>>2]-+g[Zg>>2])*.55901700258255;g[Fb>>2]=+g[Bh>>2]-+g[_g>>2]*.25;g[Gb>>2]=+g[Eb>>2]+ +g[Fb>>2];g[kd>>2]=+g[Fb>>2]-+g[Eb>>2];g[Qa>>2]=+g[Gb>>2]-+g[Pa>>2];g[$e>>2]=+g[kd>>2]-+g[ld>>2];g[Gd>>2]=+g[Gb>>2]+ +g[Pa>>2];g[md>>2]=+g[kd>>2]+ +g[ld>>2];g[Ta>>2]=+g[Ra>>2]*.9510565400123596+ +g[Sa>>2]*.5877852439880371;g[od>>2]=+g[Sa>>2]*.9510565400123596-+g[Ra>>2]*.5877852439880371;g[Wa>>2]=(+g[Ua>>2]-+g[Va>>2])*.55901700258255;g[Za>>2]=+g[Xa>>2]-+g[Ya>>2]*.25;g[_a>>2]=+g[Wa>>2]+ +g[Za>>2];g[nd>>2]=+g[Za>>2]-+g[Wa>>2];g[$a>>2]=+g[Ta>>2]+ +g[_a>>2];g[af>>2]=+g[od>>2]+ +g[nd>>2];g[Hd>>2]=+g[_a>>2]-+g[Ta>>2];g[pd>>2]=+g[nd>>2]-+g[od>>2];g[bh>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[dh>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ah>>2]=+g[(c[m>>2]|0)+24>>2];g[ch>>2]=+g[(c[m>>2]|0)+28>>2];g[eh>>2]=+g[ah>>2]*+g[bh>>2]+ +g[ch>>2]*+g[dh>>2];g[Mb>>2]=+g[ah>>2]*+g[dh>>2]-+g[ch>>2]*+g[bh>>2];g[gh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[s>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[fh>>2]=+g[(c[m>>2]|0)+64>>2];g[r>>2]=+g[(c[m>>2]|0)+68>>2];g[t>>2]=+g[fh>>2]*+g[gh>>2]+ +g[r>>2]*+g[s>>2];g[eb>>2]=+g[fh>>2]*+g[s>>2]-+g[r>>2]*+g[gh>>2];g[ea>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[ga>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[da>>2]=+g[(c[m>>2]|0)+144>>2];g[fa>>2]=+g[(c[m>>2]|0)+148>>2];g[ha>>2]=+g[da>>2]*+g[ea>>2]+ +g[fa>>2]*+g[ga>>2];g[Jb>>2]=+g[da>>2]*+g[ga>>2]-+g[fa>>2]*+g[ea>>2];g[v>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[x>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[u>>2]=+g[(c[m>>2]|0)+184>>2];g[w>>2]=+g[(c[m>>2]|0)+188>>2];g[y>>2]=+g[u>>2]*+g[v>>2]+ +g[w>>2]*+g[x>>2];g[fb>>2]=+g[u>>2]*+g[x>>2]-+g[w>>2]*+g[v>>2];g[$>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[ba>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[A>>2]=+g[(c[m>>2]|0)+104>>2];g[aa>>2]=+g[(c[m>>2]|0)+108>>2];g[ca>>2]=+g[A>>2]*+g[$>>2]+ +g[aa>>2]*+g[ba>>2];g[hb>>2]=+g[A>>2]*+g[ba>>2]-+g[aa>>2]*+g[$>>2];g[bb>>2]=+g[y>>2]-+g[t>>2];g[cb>>2]=+g[ca>>2]-+g[ha>>2];g[tc>>2]=+g[hb>>2]-+g[Jb>>2];g[sc>>2]=+g[eb>>2]-+g[fb>>2];g[gb>>2]=+g[eb>>2]+ +g[fb>>2];g[Kb>>2]=+g[hb>>2]+ +g[Jb>>2];g[Nb>>2]=+g[gb>>2]+ +g[Kb>>2];g[z>>2]=+g[t>>2]+ +g[y>>2];g[ia>>2]=+g[ca>>2]+ +g[ha>>2];g[ja>>2]=+g[z>>2]+ +g[ia>>2];g[ka>>2]=+g[eh>>2]+ +g[ja>>2];g[Ge>>2]=+g[Mb>>2]+ +g[Nb>>2];g[db>>2]=+g[bb>>2]*.9510565400123596-+g[cb>>2]*.5877852439880371;g[sd>>2]=+g[bb>>2]*.5877852439880371+ +g[cb>>2]*.9510565400123596;g[Lb>>2]=(+g[gb>>2]-+g[Kb>>2])*.55901700258255;g[Ob>>2]=+g[Mb>>2]-+g[Nb>>2]*.25;g[Pb>>2]=+g[Lb>>2]+ +g[Ob>>2];g[rd>>2]=+g[Ob>>2]-+g[Lb>>2];g[Qb>>2]=+g[db>>2]-+g[Pb>>2];g[cf>>2]=+g[sd>>2]+ +g[rd>>2];g[Kd>>2]=+g[db>>2]+ +g[Pb>>2];g[td>>2]=+g[rd>>2]-+g[sd>>2];g[uc>>2]=+g[sc>>2]*.9510565400123596+ +g[tc>>2]*.5877852439880371;g[vd>>2]=+g[tc>>2]*.9510565400123596-+g[sc>>2]*.5877852439880371;g[Rb>>2]=(+g[z>>2]-+g[ia>>2])*.55901700258255;g[Sb>>2]=+g[eh>>2]-+g[ja>>2]*.25;g[rc>>2]=+g[Rb>>2]+ +g[Sb>>2];g[ud>>2]=+g[Sb>>2]-+g[Rb>>2];g[vc>>2]=+g[rc>>2]-+g[uc>>2];g[df>>2]=+g[ud>>2]-+g[vd>>2];g[Jd>>2]=+g[rc>>2]+ +g[uc>>2];g[wd>>2]=+g[ud>>2]+ +g[vd>>2];g[na>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[pa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[ma>>2]=+g[(c[m>>2]|0)+8>>2];g[oa>>2]=+g[(c[m>>2]|0)+12>>2];g[qa>>2]=+g[ma>>2]*+g[na>>2]+ +g[oa>>2]*+g[pa>>2];g[Ic>>2]=+g[ma>>2]*+g[pa>>2]-+g[oa>>2]*+g[na>>2];g[sa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[ua>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[ra>>2]=+g[(c[m>>2]|0)+48>>2];g[ta>>2]=+g[(c[m>>2]|0)+52>>2];g[va>>2]=+g[ra>>2]*+g[sa>>2]+ +g[ta>>2]*+g[ua>>2];g[Bc>>2]=+g[ra>>2]*+g[ua>>2]-+g[ta>>2]*+g[sa>>2];g[K>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[M>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[J>>2]=+g[(c[m>>2]|0)+128>>2];g[L>>2]=+g[(c[m>>2]|0)+132>>2];g[N>>2]=+g[J>>2]*+g[K>>2]+ +g[L>>2]*+g[M>>2];g[Fc>>2]=+g[J>>2]*+g[M>>2]-+g[L>>2]*+g[K>>2];g[xa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[B>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[wa>>2]=+g[(c[m>>2]|0)+168>>2];g[ya>>2]=+g[(c[m>>2]|0)+172>>2];g[C>>2]=+g[wa>>2]*+g[xa>>2]+ +g[ya>>2]*+g[B>>2];g[Cc>>2]=+g[wa>>2]*+g[B>>2]-+g[ya>>2]*+g[xa>>2];g[F>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[H>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[E>>2]=+g[(c[m>>2]|0)+88>>2];g[G>>2]=+g[(c[m>>2]|0)+92>>2];g[I>>2]=+g[E>>2]*+g[F>>2]+ +g[G>>2]*+g[H>>2];g[Ec>>2]=+g[E>>2]*+g[H>>2]-+g[G>>2]*+g[F>>2];g[yc>>2]=+g[va>>2]-+g[C>>2];g[zc>>2]=+g[I>>2]-+g[N>>2];g[Tb>>2]=+g[Ec>>2]-+g[Fc>>2];g[Qc>>2]=+g[Bc>>2]-+g[Cc>>2];g[Dc>>2]=+g[Bc>>2]+ +g[Cc>>2];g[Gc>>2]=+g[Ec>>2]+ +g[Fc>>2];g[Jc>>2]=+g[Dc>>2]+ +g[Gc>>2];g[D>>2]=+g[va>>2]+ +g[C>>2];g[O>>2]=+g[I>>2]+ +g[N>>2];g[P>>2]=+g[D>>2]+ +g[O>>2];g[Q>>2]=+g[qa>>2]+ +g[P>>2];g[Ie>>2]=+g[Ic>>2]+ +g[Jc>>2];g[Ac>>2]=+g[yc>>2]*.9510565400123596+ +g[zc>>2]*.5877852439880371;g[ce>>2]=+g[zc>>2]*.9510565400123596-+g[yc>>2]*.5877852439880371;g[Hc>>2]=(+g[Dc>>2]-+g[Gc>>2])*.55901700258255;g[Kc>>2]=+g[Ic>>2]-+g[Jc>>2]*.25;g[Lc>>2]=+g[Hc>>2]+ +g[Kc>>2];g[be>>2]=+g[Kc>>2]-+g[Hc>>2];g[Mc>>2]=+g[Ac>>2]+ +g[Lc>>2];g[je>>2]=+g[ce>>2]+ +g[be>>2];g[Od>>2]=+g[Lc>>2]-+g[Ac>>2];g[de>>2]=+g[be>>2]-+g[ce>>2];g[Ub>>2]=+g[Qc>>2]*.9510565400123596+ +g[Tb>>2]*.5877852439880371;g[$d>>2]=+g[Tb>>2]*.9510565400123596-+g[Qc>>2]*.5877852439880371;g[Nc>>2]=(+g[D>>2]-+g[O>>2])*.55901700258255;g[Oc>>2]=+g[qa>>2]-+g[P>>2]*.25;g[Pc>>2]=+g[Nc>>2]+ +g[Oc>>2];g[zd>>2]=+g[Oc>>2]-+g[Nc>>2];g[Vb>>2]=+g[Pc>>2]-+g[Ub>>2];g[gf>>2]=+g[zd>>2]-+g[$d>>2];g[Nd>>2]=+g[Pc>>2]+ +g[Ub>>2];g[ae>>2]=+g[zd>>2]+ +g[$d>>2];g[He>>2]=+g[Fe>>2]-+g[Ge>>2];g[kf>>2]=+g[Ie>>2]-+g[jf>>2];g[lf>>2]=+g[He>>2]*.9510565400123596+ +g[kf>>2]*.5877852439880371;g[nf>>2]=+g[kf>>2]*.9510565400123596-+g[He>>2]*.5877852439880371;g[wh>>2]=+g[q>>2]+ +g[vh>>2];g[la>>2]=+g[$g>>2]+ +g[ka>>2];g[rb>>2]=+g[Q>>2]+ +g[qb>>2];g[sb>>2]=+g[la>>2]+ +g[rb>>2];g[Ce>>2]=(+g[la>>2]-+g[rb>>2])*.55901700258255;g[De>>2]=+g[wh>>2]-+g[sb>>2]*.25;g[c[k>>2]>>2]=+g[wh>>2]+ +g[sb>>2];g[mf>>2]=+g[De>>2]-+g[Ce>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[mf>>2]-+g[nf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[mf>>2]+ +g[nf>>2];g[Ee>>2]=+g[Ce>>2]+ +g[De>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Ee>>2]-+g[lf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Ee>>2]+ +g[lf>>2];g[vb>>2]=+g[tb>>2]+ +g[ub>>2];g[Db>>2]=+g[vb>>2]-+g[Cb>>2];g[Fd>>2]=+g[vb>>2]+ +g[Cb>>2];g[zf>>2]=+g[Zf>>2]+ +g[Yf>>2];g[Bf>>2]=+g[zf>>2]-+g[Af>>2];g[Nf>>2]=+g[Af>>2]+ +g[zf>>2];g[ab>>2]=+g[Qa>>2]*.5358268022537231+ +g[$a>>2]*.8443279266357422;g[wc>>2]=+g[Qb>>2]*.7705132365226746-+g[vc>>2]*.6374239921569824;g[xc>>2]=+g[ab>>2]+ +g[wc>>2];g[Wb>>2]=+g[Mc>>2]*.9048270583152771-+g[Vb>>2]*.4257792830467224;g[Sc>>2]=+g[jc>>2]*.12533323466777802-+g[qc>>2]*.9921147227287292;g[Tc>>2]=+g[Wb>>2]+ +g[Sc>>2];g[Uc>>2]=+g[xc>>2]+ +g[Tc>>2];g[Sf>>2]=+g[Wb>>2]-+g[Sc>>2];g[Vc>>2]=(+g[xc>>2]-+g[Tc>>2])*.55901700258255;g[Rf>>2]=+g[wc>>2]-+g[ab>>2];g[Yd>>2]=+g[Hd>>2]*.9685831665992737-+g[Gd>>2]*.24868988990783691;g[Zd>>2]=+g[Kd>>2]*.5358268022537231-+g[Jd>>2]*.8443279266357422;g[Cf>>2]=+g[Yd>>2]+ +g[Zd>>2];g[bd>>2]=+g[Od>>2]*.8763066530227661-+g[Nd>>2]*.4817536771297455;g[cd>>2]=+g[Rd>>2]*.728968620300293-+g[Qd>>2]*.6845471262931824;g[Df>>2]=+g[bd>>2]+ +g[cd>>2];g[ad>>2]=+g[Yd>>2]-+g[Zd>>2];g[Gf>>2]=(+g[Cf>>2]-+g[Df>>2])*.55901700258255;g[dd>>2]=+g[bd>>2]-+g[cd>>2];g[Ef>>2]=+g[Cf>>2]+ +g[Df>>2];g[Id>>2]=+g[Gd>>2]*.9685831665992737+ +g[Hd>>2]*.24868988990783691;g[Ld>>2]=+g[Jd>>2]*.5358268022537231+ +g[Kd>>2]*.8443279266357422;g[Md>>2]=+g[Id>>2]+ +g[Ld>>2];g[Pd>>2]=+g[Nd>>2]*.8763066530227661+ +g[Od>>2]*.4817536771297455;g[Sd>>2]=+g[Qd>>2]*.728968620300293+ +g[Rd>>2]*.6845471262931824;g[Td>>2]=+g[Pd>>2]+ +g[Sd>>2];g[Ud>>2]=+g[Md>>2]+ +g[Td>>2];g[xf>>2]=+g[Pd>>2]-+g[Sd>>2];g[Vd>>2]=(+g[Md>>2]-+g[Td>>2])*.55901700258255;g[wf>>2]=+g[Ld>>2]-+g[Id>>2];g[Yc>>2]=+g[$a>>2]*.5358268022537231-+g[Qa>>2]*.8443279266357422;g[Zc>>2]=+g[vc>>2]*.7705132365226746+ +g[Qb>>2]*.6374239921569824;g[Kf>>2]=+g[Yc>>2]+ +g[Zc>>2];g[$c>>2]=+g[qc>>2]*.12533323466777802+ +g[jc>>2]*.9921147227287292;g[Ad>>2]=+g[Vb>>2]*.9048270583152771+ +g[Mc>>2]*.4257792830467224;g[Lf>>2]=+g[Ad>>2]+ +g[$c>>2];g[_c>>2]=+g[Yc>>2]-+g[Zc>>2];g[Of>>2]=+g[Kf>>2]-+g[Lf>>2];g[Bd>>2]=+g[$c>>2]-+g[Ad>>2];g[Mf>>2]=(+g[Kf>>2]+ +g[Lf>>2])*.55901700258255;g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Db>>2]+ +g[Uc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[Ef>>2]+ +g[Bf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[Of>>2]+ +g[Nf>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[Fd>>2]+ +g[Ud>>2];g[Cd>>2]=+g[_c>>2]*.9510565400123596+ +g[Bd>>2]*.5877852439880371;g[Ed>>2]=+g[Bd>>2]*.9510565400123596-+g[_c>>2]*.5877852439880371;g[Wc>>2]=+g[Db>>2]-+g[Uc>>2]*.25;g[Xc>>2]=+g[Vc>>2]+ +g[Wc>>2];g[Dd>>2]=+g[Wc>>2]-+g[Vc>>2];g[c[l>>2]>>2]=+g[Xc>>2]-+g[Cd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Dd>>2]+ +g[Ed>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Xc>>2]+ +g[Cd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Dd>>2]-+g[Ed>>2];g[yf>>2]=+g[wf>>2]*.5877852439880371+ +g[xf>>2]*.9510565400123596;g[If>>2]=+g[wf>>2]*.9510565400123596-+g[xf>>2]*.5877852439880371;g[Ff>>2]=+g[Bf>>2]-+g[Ef>>2]*.25;g[Hf>>2]=+g[Ff>>2]-+g[Gf>>2];g[Jf>>2]=+g[Gf>>2]+ +g[Ff>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[yf>>2]-+g[Hf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[If>>2]+ +g[Jf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[yf>>2]+ +g[Hf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[If>>2]-+g[Jf>>2];g[sg>>2]=+g[Rf>>2]*.5877852439880371+ +g[Sf>>2]*.9510565400123596;g[tg>>2]=+g[Rf>>2]*.9510565400123596-+g[Sf>>2]*.5877852439880371;g[Pf>>2]=+g[Nf>>2]-+g[Of>>2]*.25;g[Qf>>2]=+g[Mf>>2]-+g[Pf>>2];g[ug>>2]=+g[Mf>>2]+ +g[Pf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Qf>>2]-+g[sg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[tg>>2]+ +g[ug>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[sg>>2]+ +g[Qf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[tg>>2]-+g[ug>>2];g[ed>>2]=+g[ad>>2]*.9510565400123596+ +g[dd>>2]*.5877852439880371;g[gd>>2]=+g[dd>>2]*.9510565400123596-+g[ad>>2]*.5877852439880371;g[Wd>>2]=+g[Fd>>2]-+g[Ud>>2]*.25;g[Xd>>2]=+g[Vd>>2]+ +g[Wd>>2];g[fd>>2]=+g[Wd>>2]-+g[Vd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Xd>>2]-+g[ed>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[fd>>2]+ +g[gd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Xd>>2]+ +g[ed>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[fd>>2]-+g[gd>>2];g[vg>>2]=+g[ka>>2]-+g[$g>>2];g[wg>>2]=+g[Q>>2]-+g[qb>>2];g[xg>>2]=+g[vg>>2]*.5877852439880371+ +g[wg>>2]*.9510565400123596;g[Fg>>2]=+g[vg>>2]*.9510565400123596-+g[wg>>2]*.5877852439880371;g[yg>>2]=+g[Xf>>2]+ +g[Uf>>2];g[zg>>2]=+g[Fe>>2]+ +g[Ge>>2];g[Ag>>2]=+g[Ie>>2]+ +g[jf>>2];g[Bg>>2]=+g[zg>>2]+ +g[Ag>>2];g[Cg>>2]=+g[yg>>2]-+g[Bg>>2]*.25;g[Dg>>2]=(+g[zg>>2]-+g[Ag>>2])*.55901700258255;g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Bg>>2]+ +g[yg>>2];g[Gg>>2]=+g[Dg>>2]+ +g[Cg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[Fg>>2]-+g[Gg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Fg>>2]+ +g[Gg>>2];g[Eg>>2]=+g[Cg>>2]-+g[Dg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[xg>>2]-+g[Eg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[xg>>2]+ +g[Eg>>2];g[hd>>2]=+g[ub>>2]-+g[tb>>2];g[jd>>2]=+g[hd>>2]+ +g[id>>2];g[_e>>2]=+g[hd>>2]-+g[id>>2];g[_f>>2]=+g[Yf>>2]-+g[Zf>>2];g[$f>>2]=+g[Tf>>2]+ +g[_f>>2];g[lg>>2]=+g[_f>>2]-+g[Tf>>2];g[qd>>2]=+g[md>>2]*.728968620300293+ +g[pd>>2]*.6845471262931824;g[xd>>2]=+g[td>>2]*.12533323466777802-+g[wd>>2]*.9921147227287292;g[yd>>2]=+g[qd>>2]+ +g[xd>>2];g[ee>>2]=+g[ae>>2]*.06279052048921585+ +g[de>>2]*.9980267286300659;g[Le>>2]=+g[he>>2]*.7705132365226746-+g[Ke>>2]*.6374239921569824;g[Me>>2]=+g[ee>>2]+ +g[Le>>2];g[Ne>>2]=+g[yd>>2]+ +g[Me>>2];g[qg>>2]=+g[ee>>2]-+g[Le>>2];g[Oe>>2]=(+g[yd>>2]-+g[Me>>2])*.55901700258255;g[pg>>2]=+g[xd>>2]-+g[qd>>2];g[te>>2]=+g[af>>2]*.8763066530227661-+g[$e>>2]*.4817536771297455;g[ue>>2]=+g[df>>2]*.9048270583152771+ +g[cf>>2]*.4257792830467224;g[ag>>2]=+g[te>>2]-+g[ue>>2];g[we>>2]=+g[je>>2]*.5358268022537231-+g[gf>>2]*.8443279266357422;g[xe>>2]=+g[me>>2]*.06279052048921585-+g[le>>2]*.9980267286300659;g[bg>>2]=+g[we>>2]+ +g[xe>>2];g[ve>>2]=+g[te>>2]+ +g[ue>>2];g[eg>>2]=(+g[ag>>2]-+g[bg>>2])*.55901700258255;g[ye>>2]=+g[we>>2]-+g[xe>>2];g[cg>>2]=+g[ag>>2]+ +g[bg>>2];g[bf>>2]=+g[$e>>2]*.8763066530227661+ +g[af>>2]*.4817536771297455;g[ef>>2]=+g[cf>>2]*.9048270583152771-+g[df>>2]*.4257792830467224;g[ff>>2]=+g[bf>>2]+ +g[ef>>2];g[ke>>2]=+g[gf>>2]*.5358268022537231+ +g[je>>2]*.8443279266357422;g[ne>>2]=+g[le>>2]*.06279052048921585+ +g[me>>2]*.9980267286300659;g[oe>>2]=+g[ke>>2]+ +g[ne>>2];g[pe>>2]=+g[ff>>2]+ +g[oe>>2];g[pf>>2]=+g[ke>>2]-+g[ne>>2];g[qe>>2]=(+g[ff>>2]-+g[oe>>2])*.55901700258255;g[of>>2]=+g[ef>>2]-+g[bf>>2];g[Re>>2]=+g[pd>>2]*.728968620300293-+g[md>>2]*.6845471262931824;g[Se>>2]=+g[wd>>2]*.12533323466777802+ +g[td>>2]*.9921147227287292;g[ig>>2]=+g[Re>>2]-+g[Se>>2];g[Ue>>2]=+g[de>>2]*.06279052048921585-+g[ae>>2]*.9980267286300659;g[Ve>>2]=+g[Ke>>2]*.7705132365226746+ +g[he>>2]*.6374239921569824;g[jg>>2]=+g[Ue>>2]-+g[Ve>>2];g[Te>>2]=+g[Re>>2]+ +g[Se>>2];g[mg>>2]=+g[ig>>2]+ +g[jg>>2];g[We>>2]=+g[Ue>>2]+ +g[Ve>>2];g[kg>>2]=(+g[ig>>2]-+g[jg>>2])*.55901700258255;g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[jd>>2]+ +g[Ne>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[cg>>2]+ +g[$f>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[mg>>2]+ +g[lg>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[_e>>2]+ +g[pe>>2];g[qf>>2]=+g[of>>2]*.5877852439880371+ +g[pf>>2]*.9510565400123596;g[gg>>2]=+g[of>>2]*.9510565400123596-+g[pf>>2]*.5877852439880371;g[dg>>2]=+g[$f>>2]-+g[cg>>2]*.25;g[fg>>2]=+g[dg>>2]-+g[eg>>2];g[hg>>2]=+g[eg>>2]+ +g[dg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[qf>>2]-+g[fg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[gg>>2]+ +g[hg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[qf>>2]+ +g[fg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[gg>>2]-+g[hg>>2];g[ze>>2]=+g[ve>>2]*.9510565400123596+ +g[ye>>2]*.5877852439880371;g[Be>>2]=+g[ye>>2]*.9510565400123596-+g[ve>>2]*.5877852439880371;g[re>>2]=+g[_e>>2]-+g[pe>>2]*.25;g[se>>2]=+g[qe>>2]+ +g[re>>2];g[Ae>>2]=+g[re>>2]-+g[qe>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[se>>2]-+g[ze>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Ae>>2]+ +g[Be>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[se>>2]+ +g[ze>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Ae>>2]-+g[Be>>2];g[Xe>>2]=+g[Te>>2]*.9510565400123596+ +g[We>>2]*.5877852439880371;g[Ze>>2]=+g[We>>2]*.9510565400123596-+g[Te>>2]*.5877852439880371;g[Pe>>2]=+g[jd>>2]-+g[Ne>>2]*.25;g[Qe>>2]=+g[Oe>>2]+ +g[Pe>>2];g[Ye>>2]=+g[Pe>>2]-+g[Oe>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[Qe>>2]-+g[Xe>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Ye>>2]+ +g[Ze>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Qe>>2]+ +g[Xe>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Ye>>2]-+g[Ze>>2];g[tf>>2]=+g[pg>>2]*.5877852439880371+ +g[qg>>2]*.9510565400123596;g[uf>>2]=+g[pg>>2]*.9510565400123596-+g[qg>>2]*.5877852439880371;g[ng>>2]=+g[lg>>2]-+g[mg>>2]*.25;g[og>>2]=+g[kg>>2]-+g[ng>>2];g[vf>>2]=+g[kg>>2]+ +g[ng>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[og>>2]-+g[tf>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[uf>>2]+ +g[vf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[tf>>2]+ +g[og>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[uf>>2]-+g[vf>>2];c[Hh>>2]=(c[Hh>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+192;c[n>>2]=c[n>>2]^c[2998]}i=Ih;return}function Nr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,39,5176);i=b;return}function Or(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0;z=i;i=i+64|0;k=z+60|0;l=z+56|0;m=z+52|0;n=z+48|0;A=z+44|0;o=z+40|0;p=z+36|0;y=z+32|0;q=z+28|0;x=z+24|0;v=z+20|0;w=z+16|0;s=z+12|0;u=z+8|0;r=z+4|0;t=z;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[A>>2]=f;c[o>>2]=h;c[p>>2]=j;c[y>>2]=c[A>>2];c[m>>2]=(c[m>>2]|0)+((c[A>>2]|0)-1<<1<<2);while(1){if((c[y>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[x>>2]=+g[c[l>>2]>>2];g[s>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[u>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[r>>2]=+g[c[m>>2]>>2];g[t>>2]=+g[(c[m>>2]|0)+4>>2];g[v>>2]=+g[r>>2]*+g[s>>2]+ +g[t>>2]*+g[u>>2];g[w>>2]=+g[r>>2]*+g[u>>2]-+g[t>>2]*+g[s>>2];g[c[l>>2]>>2]=+g[q>>2]-+g[v>>2];g[c[k>>2]>>2]=+g[q>>2]+ +g[v>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[w>>2]-+g[x>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[w>>2]+ +g[x>>2];c[y>>2]=(c[y>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+8}i=z;return}function Pr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,40,5224);i=b;return}function Qr(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0;Ci=i;i=i+2048|0;k=Ci+2040|0;l=Ci+2036|0;m=Ci+2032|0;n=Ci+2028|0;Di=Ci+2024|0;o=Ci+2020|0;p=Ci+2016|0;Bi=Ci+1984|0;ki=Ci+1980|0;oe=Ci+1976|0;Eg=Ci+1972|0;Sg=Ci+1968|0;Ob=Ci+1964|0;td=Ci+1960|0;Ag=Ci+1956|0;mh=Ci+1952|0;R=Ci+1948|0;kf=Ci+1944|0;He=Ci+1940|0;Cf=Ci+1936|0;bc=Ci+1932|0;fe=Ci+1928|0;Tc=Ci+1924|0;ie=Ci+1920|0;La=Ci+1916|0;qf=Ci+1912|0;Xf=Ci+1908|0;Hf=Ci+1904|0;Bd=Ci+1900|0;Pe=Ci+1896|0;Sd=Ci+1892|0;Me=Ci+1888|0;Jh=Ci+1884|0;lh=Ci+1880|0;re=Ci+1876|0;vg=Ci+1872|0;rc=Ci+1868|0;ud=Ci+1864|0;wc=Ci+1860|0;vd=Ci+1856|0;v=Ci+1852|0;te=Ci+1848|0;we=Ci+1844|0;yf=Ci+1840|0;Dc=Ci+1836|0;zd=Ci+1832|0;Ic=Ci+1828|0;yd=Ci+1824|0;qa=Ci+1820|0;ye=Ci+1816|0;Be=Ci+1812|0;xf=Ci+1808|0;Oc=Ci+1804|0;be=Ci+1800|0;Vb=Ci+1796|0;ae=Ci+1792|0;lb=Ci+1788|0;Ie=Ci+1784|0;nf=Ci+1780|0;Df=Ci+1776|0;mc=Ci+1772|0;Je=Ci+1768|0;Wc=Ci+1764|0;ge=Ci+1760|0;gb=Ci+1756|0;Yf=Ci+1752|0;Tf=Ci+1748|0;If=Ci+1744|0;Md=Ci+1740|0;Ne=Ci+1736|0;Vd=Ci+1732|0;Qe=Ci+1728|0;q=Ci+1724|0;yg=Ci+1720|0;hf=Ci+1716|0;xg=Ci+1712|0;di=Ci+1708|0;Lb=Ci+1704|0;ii=Ci+1700|0;Mb=Ci+1696|0;Ib=Ci+1692|0;_d=Ci+1688|0;za=Ci+1684|0;Rc=Ci+1680|0;Ch=Ci+1676|0;ci=Ci+1672|0;Ah=Ci+1668|0;bi=Ci+1664|0;fi=Ci+1660|0;hi=Ci+1656|0;ei=Ci+1652|0;gi=Ci+1648|0;rg=Ci+1644|0;ji=Ci+1640|0;Cg=Ci+1636|0;Dg=Ci+1632|0;Kb=Ci+1628|0;Nb=Ci+1624|0;wg=Ci+1620|0;zg=Ci+1616|0;xa=Ci+1612|0;oc=Ci+1608|0;P=Ci+1604|0;$b=Ci+1600|0;E=Ci+1596|0;pc=Ci+1592|0;K=Ci+1588|0;_b=Ci+1584|0;ua=Ci+1580|0;wa=Ci+1576|0;ta=Ci+1572|0;va=Ci+1568|0;M=Ci+1564|0;O=Ci+1560|0;L=Ci+1556|0;N=Ci+1552|0;B=Ci+1548|0;D=Ci+1544|0;ya=Ci+1540|0;C=Ci+1536|0;H=Ci+1532|0;J=Ci+1528|0;G=Ci+1524|0;I=Ci+1520|0;F=Ci+1516|0;Q=Ci+1512|0;Fe=Ci+1508|0;Ge=Ci+1504|0;Zb=Ci+1500|0;ac=Ci+1496|0;qc=Ci+1492|0;Sc=Ci+1488|0;rb=Ci+1484|0;Zc=Ci+1480|0;Hb=Ci+1476|0;Qd=Ci+1472|0;wb=Ci+1468|0;_c=Ci+1464|0;Cb=Ci+1460|0;Pd=Ci+1456|0;ob=Ci+1452|0;qb=Ci+1448|0;nb=Ci+1444|0;pb=Ci+1440|0;Eb=Ci+1436|0;Gb=Ci+1432|0;Db=Ci+1428|0;Fb=Ci+1424|0;tb=Ci+1420|0;vb=Ci+1416|0;sb=Ci+1412|0;ub=Ci+1408|0;zb=Ci+1404|0;Bb=Ci+1400|0;yb=Ci+1396|0;Ab=Ci+1392|0;xb=Ci+1388|0;Ka=Ci+1384|0;Vf=Ci+1380|0;Wf=Ci+1376|0;$c=Ci+1372|0;Ad=Ci+1368|0;Od=Ci+1364|0;Rd=Ci+1360|0;pi=Ci+1356|0;Qb=Ci+1352|0;Hh=Ci+1348|0;uc=Ci+1344|0;ui=Ci+1340|0;Rb=Ci+1336|0;Ai=Ci+1332|0;tc=Ci+1328|0;mi=Ci+1324|0;oi=Ci+1320|0;li=Ci+1316|0;ni=Ci+1312|0;Eh=Ci+1308|0;Gh=Ci+1304|0;Dh=Ci+1300|0;Fh=Ci+1296|0;ri=Ci+1292|0;ti=Ci+1288|0;qi=Ci+1284|0;si=Ci+1280|0;xi=Ci+1276|0;zi=Ci+1272|0;wi=Ci+1268|0;yi=Ci+1264|0;vi=Ci+1260|0;Ih=Ci+1256|0;pe=Ci+1252|0;qe=Ci+1248|0;Pb=Ci+1244|0;Sb=Ci+1240|0;sc=Ci+1236|0;vc=Ci+1232|0;Ph=Ci+1228|0;Ec=Ci+1224|0;t=Ci+1220|0;Bc=Ci+1216|0;Uh=Ci+1212|0;Fc=Ci+1208|0;_h=Ci+1204|0;Ac=Ci+1200|0;Mh=Ci+1196|0;Oh=Ci+1192|0;Lh=Ci+1188|0;Nh=Ci+1184|0;ai=Ci+1180|0;s=Ci+1176|0;$h=Ci+1172|0;r=Ci+1168|0;Rh=Ci+1164|0;Th=Ci+1160|0;Qh=Ci+1156|0;Sh=Ci+1152|0;Xh=Ci+1148|0;Zh=Ci+1144|0;Wh=Ci+1140|0;Yh=Ci+1136|0;Vh=Ci+1132|0;u=Ci+1128|0;ue=Ci+1124|0;ve=Ci+1120|0;zc=Ci+1116|0;Cc=Ci+1112|0;Gc=Ci+1108|0;Hc=Ci+1104|0;A=Ci+1100|0;Kc=Ci+1096|0;oa=Ci+1092|0;Tb=Ci+1088|0;da=Ci+1084|0;Lc=Ci+1080|0;ja=Ci+1076|0;Qc=Ci+1072|0;x=Ci+1068|0;z=Ci+1064|0;w=Ci+1060|0;y=Ci+1056|0;la=Ci+1052|0;na=Ci+1048|0;ka=Ci+1044|0;ma=Ci+1040|0;aa=Ci+1036|0;ca=Ci+1032|0;$=Ci+1028|0;ba=Ci+1024|0;ga=Ci+1020|0;ia=Ci+1016|0;fa=Ci+1012|0;ha=Ci+1008|0;ea=Ci+1004|0;pa=Ci+1e3|0;ze=Ci+996|0;Ae=Ci+992|0;Mc=Ci+988|0;Nc=Ci+984|0;Pc=Ci+980|0;Ub=Ci+976|0;W=Ci+972|0;dc=Ci+968|0;Aa=Ci+964|0;ec=Ci+960|0;cc=Ci+956|0;fc=Ci+952|0;Ga=Ci+948|0;hc=Ci+944|0;jb=Ci+940|0;ic=Ci+936|0;jc=Ci+932|0;kc=Ci+928|0;T=Ci+924|0;V=Ci+920|0;S=Ci+916|0;U=Ci+912|0;Y=Ci+908|0;_=Ci+904|0;X=Ci+900|0;Z=Ci+896|0;Da=Ci+892|0;Fa=Ci+888|0;Ca=Ci+884|0;Ea=Ci+880|0;Ia=Ci+876|0;ib=Ci+872|0;Ha=Ci+868|0;Ja=Ci+864|0;Ba=Ci+860|0;kb=Ci+856|0;lf=Ci+852|0;mf=Ci+848|0;gc=Ci+844|0;lc=Ci+840|0;Uc=Ci+836|0;Vc=Ci+832|0;Qa=Ci+828|0;Id=Ci+824|0;Va=Ci+820|0;Jd=Ci+816|0;Hd=Ci+812|0;Kd=Ci+808|0;$a=Ci+804|0;Dd=Ci+800|0;eb=Ci+796|0;Ed=Ci+792|0;Cd=Ci+788|0;Fd=Ci+784|0;Na=Ci+780|0;Pa=Ci+776|0;Ma=Ci+772|0;Oa=Ci+768|0;Sa=Ci+764|0;Ua=Ci+760|0;Ra=Ci+756|0;Ta=Ci+752|0;Ya=Ci+748|0;_a=Ci+744|0;Xa=Ci+740|0;Za=Ci+736|0;bb=Ci+732|0;db=Ci+728|0;ab=Ci+724|0;cb=Ci+720|0;Wa=Ci+716|0;fb=Ci+712|0;rf=Ci+708|0;sf=Ci+704|0;Gd=Ci+700|0;Ld=Ci+696|0;Td=Ci+692|0;Ud=Ci+688|0;sa=Ci+684|0;Qf=Ci+680|0;ah=Ci+676|0;ch=Ci+672|0;Jb=Ci+668|0;bh=Ci+664|0;sg=Ci+660|0;tg=Ci+656|0;Kh=Ci+652|0;ra=Ci+648|0;ug=Ci+644|0;Bg=Ci+640|0;mb=Ci+636|0;hb=Ci+632|0;Rf=Ci+628|0;Sf=Ci+624|0;xd=Ci+620|0;Ye=Ci+616|0;Fg=Ci+612|0;Lg=Ci+608|0;de=Ci+604|0;yh=Ci+600|0;gf=Ci+596|0;me=Ci+592|0;Le=Ci+588|0;Ve=Ci+584|0;$e=Ci+580|0;Kg=Ci+576|0;df=Ci+572|0;le=Ci+568|0;Se=Ci+564|0;We=Ci+560|0;wd=Ci+556|0;zh=Ci+552|0;$d=Ci+548|0;ce=Ci+544|0;ef=Ci+540|0;ff=Ci+536|0;he=Ci+532|0;Ke=Ci+528|0;Ze=Ci+524|0;_e=Ci+520|0;bf=Ci+516|0;cf=Ci+512|0;Oe=Ci+508|0;Re=Ci+504|0;ee=Ci+500|0;Te=Ci+496|0;Jg=Ci+492|0;Mg=Ci+488|0;Ng=Ci+484|0;Og=Ci+480|0;Ue=Ci+476|0;Xe=Ci+472|0;af=Ci+468|0;je=Ci+464|0;xh=Ci+460|0;Gg=Ci+456|0;Hg=Ci+452|0;Ig=Ci+448|0;ke=Ci+444|0;ne=Ci+440|0;Af=Ci+436|0;Mf=Ci+432|0;gh=Ci+428|0;ih=Ci+424|0;Ff=Ci+420|0;Nf=Ci+416|0;Kf=Ci+412|0;Of=Ci+408|0;wf=Ci+404|0;zf=Ci+400|0;eh=Ci+396|0;fh=Ci+392|0;Bf=Ci+388|0;Ef=Ci+384|0;Gf=Ci+380|0;Jf=Ci+376|0;Lf=Ci+372|0;hh=Ci+368|0;Pf=Ci+364|0;dh=Ci+360|0;se=Ci+356|0;nh=Ci+352|0;th=Ci+348|0;eg=Ci+344|0;De=Ci+340|0;kh=Ci+336|0;og=Ci+332|0;uf=Ci+328|0;hg=Ci+324|0;sh=Ci+320|0;pf=Ci+316|0;bg=Ci+312|0;lg=Ci+308|0;tf=Ci+304|0;_f=Ci+300|0;cg=Ci+296|0;xe=Ci+292|0;Ce=Ci+288|0;jf=Ci+284|0;of=Ci+280|0;mg=Ci+276|0;ng=Ci+272|0;fg=Ci+268|0;gg=Ci+264|0;jg=Ci+260|0;kg=Ci+256|0;Uf=Ci+252|0;Zf=Ci+248|0;Ee=Ci+244|0;$f=Ci+240|0;rh=Ci+236|0;uh=Ci+232|0;vh=Ci+228|0;wh=Ci+224|0;ag=Ci+220|0;dg=Ci+216|0;ig=Ci+212|0;pg=Ci+208|0;jh=Ci+204|0;oh=Ci+200|0;ph=Ci+196|0;qh=Ci+192|0;qg=Ci+188|0;vf=Ci+184|0;yc=Ci+180|0;dd=Ci+176|0;Tg=Ci+172|0;Zg=Ci+168|0;Xb=Ci+164|0;Qg=Ci+160|0;nd=Ci+156|0;rd=Ci+152|0;Yc=Ci+148|0;ad=Ci+144|0;gd=Ci+140|0;Yg=Ci+136|0;kd=Ci+132|0;qd=Ci+128|0;Xd=Ci+124|0;bd=Ci+120|0;xc=Ci+116|0;Rg=Ci+112|0;Jc=Ci+108|0;Wb=Ci+104|0;ld=Ci+100|0;md=Ci+96|0;nc=Ci+92|0;Xc=Ci+88|0;ed=Ci+84|0;fd=Ci+80|0;id=Ci+76|0;jd=Ci+72|0;Nd=Ci+68|0;Wd=Ci+64|0;Yb=Ci+60|0;Yd=Ci+56|0;Xg=Ci+52|0;_g=Ci+48|0;$g=Ci+44|0;Bh=Ci+40|0;Zd=Ci+36|0;cd=Ci+32|0;hd=Ci+28|0;od=Ci+24|0;Pg=Ci+20|0;Ug=Ci+16|0;Vg=Ci+12|0;Wg=Ci+8|0;pd=Ci+4|0;sd=Ci;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Di>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Ci+2012>>2]=.5555702447891235;g[Ci+2008>>2]=.8314695954322815;g[Ci+2004>>2]=.9807852506637573;g[Ci+2e3>>2]=.19509032368659973;g[Ci+1996>>2]=.3826834261417389;g[Ci+1992>>2]=.9238795042037964;g[Ci+1988>>2]=.7071067690849304;c[Bi>>2]=c[Di>>2];c[m>>2]=(c[m>>2]|0)+(((c[Di>>2]|0)-1|0)*62<<2);while(1){if((c[Bi>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[yg>>2]=+g[c[l>>2]>>2];g[Ib>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[_d>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[za>>2]=+g[(c[m>>2]|0)+120>>2];g[Rc>>2]=+g[(c[m>>2]|0)+124>>2];g[hf>>2]=+g[za>>2]*+g[Ib>>2]+ +g[Rc>>2]*+g[_d>>2];g[xg>>2]=+g[za>>2]*+g[_d>>2]-+g[Rc>>2]*+g[Ib>>2];g[Ch>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[ci>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Ah>>2]=+g[(c[m>>2]|0)+56>>2];g[bi>>2]=+g[(c[m>>2]|0)+60>>2];g[di>>2]=+g[Ah>>2]*+g[Ch>>2]+ +g[bi>>2]*+g[ci>>2];g[Lb>>2]=+g[Ah>>2]*+g[ci>>2]-+g[bi>>2]*+g[Ch>>2];g[fi>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[hi>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[ei>>2]=+g[(c[m>>2]|0)+184>>2];g[gi>>2]=+g[(c[m>>2]|0)+188>>2];g[ii>>2]=+g[ei>>2]*+g[fi>>2]+ +g[gi>>2]*+g[hi>>2];g[Mb>>2]=+g[ei>>2]*+g[hi>>2]-+g[gi>>2]*+g[fi>>2];g[rg>>2]=+g[q>>2]+ +g[hf>>2];g[ji>>2]=+g[di>>2]+ +g[ii>>2];g[ki>>2]=+g[rg>>2]+ +g[ji>>2];g[oe>>2]=+g[rg>>2]-+g[ji>>2];g[Cg>>2]=+g[di>>2]-+g[ii>>2];g[Dg>>2]=+g[yg>>2]-+g[xg>>2];g[Eg>>2]=+g[Cg>>2]+ +g[Dg>>2];g[Sg>>2]=+g[Dg>>2]-+g[Cg>>2];g[Kb>>2]=+g[q>>2]-+g[hf>>2];g[Nb>>2]=+g[Lb>>2]-+g[Mb>>2];g[Ob>>2]=+g[Kb>>2]+ +g[Nb>>2];g[td>>2]=+g[Kb>>2]-+g[Nb>>2];g[wg>>2]=+g[Lb>>2]+ +g[Mb>>2];g[zg>>2]=+g[xg>>2]+ +g[yg>>2];g[Ag>>2]=+g[wg>>2]+ +g[zg>>2];g[mh>>2]=+g[zg>>2]-+g[wg>>2];g[ua>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[wa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[ta>>2]=+g[c[m>>2]>>2];g[va>>2]=+g[(c[m>>2]|0)+4>>2];g[xa>>2]=+g[ta>>2]*+g[ua>>2]+ +g[va>>2]*+g[wa>>2];g[oc>>2]=+g[ta>>2]*+g[wa>>2]-+g[va>>2]*+g[ua>>2];g[M>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[O>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[L>>2]=+g[(c[m>>2]|0)+192>>2];g[N>>2]=+g[(c[m>>2]|0)+196>>2];g[P>>2]=+g[L>>2]*+g[M>>2]+ +g[N>>2]*+g[O>>2];g[$b>>2]=+g[L>>2]*+g[O>>2]-+g[N>>2]*+g[M>>2];g[B>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[D>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[ya>>2]=+g[(c[m>>2]|0)+128>>2];g[C>>2]=+g[(c[m>>2]|0)+132>>2];g[E>>2]=+g[ya>>2]*+g[B>>2]+ +g[C>>2]*+g[D>>2];g[pc>>2]=+g[ya>>2]*+g[D>>2]-+g[C>>2]*+g[B>>2];g[H>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[J>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[G>>2]=+g[(c[m>>2]|0)+64>>2];g[I>>2]=+g[(c[m>>2]|0)+68>>2];g[K>>2]=+g[G>>2]*+g[H>>2]+ +g[I>>2]*+g[J>>2];g[_b>>2]=+g[G>>2]*+g[J>>2]-+g[I>>2]*+g[H>>2];g[F>>2]=+g[xa>>2]+ +g[E>>2];g[Q>>2]=+g[K>>2]+ +g[P>>2];g[R>>2]=+g[F>>2]+ +g[Q>>2];g[kf>>2]=+g[F>>2]-+g[Q>>2];g[Fe>>2]=+g[oc>>2]+ +g[pc>>2];g[Ge>>2]=+g[_b>>2]+ +g[$b>>2];g[He>>2]=+g[Fe>>2]-+g[Ge>>2];g[Cf>>2]=+g[Fe>>2]+ +g[Ge>>2];g[Zb>>2]=+g[xa>>2]-+g[E>>2];g[ac>>2]=+g[_b>>2]-+g[$b>>2];g[bc>>2]=+g[Zb>>2]+ +g[ac>>2];g[fe>>2]=+g[Zb>>2]-+g[ac>>2];g[qc>>2]=+g[oc>>2]-+g[pc>>2];g[Sc>>2]=+g[K>>2]-+g[P>>2];g[Tc>>2]=+g[qc>>2]-+g[Sc>>2];g[ie>>2]=+g[qc>>2]+ +g[Sc>>2];g[ob>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[qb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[nb>>2]=+g[(c[m>>2]|0)+240>>2];g[pb>>2]=+g[(c[m>>2]|0)+244>>2];g[rb>>2]=+g[nb>>2]*+g[ob>>2]+ +g[pb>>2]*+g[qb>>2];g[Zc>>2]=+g[nb>>2]*+g[qb>>2]-+g[pb>>2]*+g[ob>>2];g[Eb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[Gb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[Db>>2]=+g[(c[m>>2]|0)+176>>2];g[Fb>>2]=+g[(c[m>>2]|0)+180>>2];g[Hb>>2]=+g[Db>>2]*+g[Eb>>2]+ +g[Fb>>2]*+g[Gb>>2];g[Qd>>2]=+g[Db>>2]*+g[Gb>>2]-+g[Fb>>2]*+g[Eb>>2];g[tb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[vb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[sb>>2]=+g[(c[m>>2]|0)+112>>2];g[ub>>2]=+g[(c[m>>2]|0)+116>>2];g[wb>>2]=+g[sb>>2]*+g[tb>>2]+ +g[ub>>2]*+g[vb>>2];g[_c>>2]=+g[sb>>2]*+g[vb>>2]-+g[ub>>2]*+g[tb>>2];g[zb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Bb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[yb>>2]=+g[(c[m>>2]|0)+48>>2];g[Ab>>2]=+g[(c[m>>2]|0)+52>>2];g[Cb>>2]=+g[yb>>2]*+g[zb>>2]+ +g[Ab>>2]*+g[Bb>>2];g[Pd>>2]=+g[yb>>2]*+g[Bb>>2]-+g[Ab>>2]*+g[zb>>2];g[xb>>2]=+g[rb>>2]+ +g[wb>>2];g[Ka>>2]=+g[Cb>>2]+ +g[Hb>>2];g[La>>2]=+g[xb>>2]+ +g[Ka>>2];g[qf>>2]=+g[xb>>2]-+g[Ka>>2];g[Vf>>2]=+g[Zc>>2]+ +g[_c>>2];g[Wf>>2]=+g[Pd>>2]+ +g[Qd>>2];g[Xf>>2]=+g[Vf>>2]-+g[Wf>>2];g[Hf>>2]=+g[Vf>>2]+ +g[Wf>>2];g[$c>>2]=+g[Zc>>2]-+g[_c>>2];g[Ad>>2]=+g[Cb>>2]-+g[Hb>>2];g[Bd>>2]=+g[$c>>2]-+g[Ad>>2];g[Pe>>2]=+g[$c>>2]+ +g[Ad>>2];g[Od>>2]=+g[rb>>2]-+g[wb>>2];g[Rd>>2]=+g[Pd>>2]-+g[Qd>>2];g[Sd>>2]=+g[Od>>2]+ +g[Rd>>2];g[Me>>2]=+g[Od>>2]-+g[Rd>>2];g[mi>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[oi>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[li>>2]=+g[(c[m>>2]|0)+24>>2];g[ni>>2]=+g[(c[m>>2]|0)+28>>2];g[pi>>2]=+g[li>>2]*+g[mi>>2]+ +g[ni>>2]*+g[oi>>2];g[Qb>>2]=+g[li>>2]*+g[oi>>2]-+g[ni>>2]*+g[mi>>2];g[Eh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Gh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Dh>>2]=+g[(c[m>>2]|0)+88>>2];g[Fh>>2]=+g[(c[m>>2]|0)+92>>2];g[Hh>>2]=+g[Dh>>2]*+g[Eh>>2]+ +g[Fh>>2]*+g[Gh>>2];g[uc>>2]=+g[Dh>>2]*+g[Gh>>2]-+g[Fh>>2]*+g[Eh>>2];g[ri>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[ti>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[qi>>2]=+g[(c[m>>2]|0)+152>>2];g[si>>2]=+g[(c[m>>2]|0)+156>>2];g[ui>>2]=+g[qi>>2]*+g[ri>>2]+ +g[si>>2]*+g[ti>>2];g[Rb>>2]=+g[qi>>2]*+g[ti>>2]-+g[si>>2]*+g[ri>>2];g[xi>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[zi>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[wi>>2]=+g[(c[m>>2]|0)+216>>2];g[yi>>2]=+g[(c[m>>2]|0)+220>>2];g[Ai>>2]=+g[wi>>2]*+g[xi>>2]+ +g[yi>>2]*+g[zi>>2];g[tc>>2]=+g[wi>>2]*+g[zi>>2]-+g[yi>>2]*+g[xi>>2];g[vi>>2]=+g[pi>>2]+ +g[ui>>2];g[Ih>>2]=+g[Ai>>2]+ +g[Hh>>2];g[Jh>>2]=+g[vi>>2]+ +g[Ih>>2];g[lh>>2]=+g[vi>>2]-+g[Ih>>2];g[pe>>2]=+g[tc>>2]+ +g[uc>>2];g[qe>>2]=+g[Qb>>2]+ +g[Rb>>2];g[re>>2]=+g[pe>>2]-+g[qe>>2];g[vg>>2]=+g[qe>>2]+ +g[pe>>2];g[Pb>>2]=+g[pi>>2]-+g[ui>>2];g[Sb>>2]=+g[Qb>>2]-+g[Rb>>2];g[rc>>2]=+g[Pb>>2]+ +g[Sb>>2];g[ud>>2]=+g[Pb>>2]-+g[Sb>>2];g[sc>>2]=+g[Ai>>2]-+g[Hh>>2];g[vc>>2]=+g[tc>>2]-+g[uc>>2];g[wc>>2]=+g[sc>>2]-+g[vc>>2];g[vd>>2]=+g[sc>>2]+ +g[vc>>2];g[Mh>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Oh>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Lh>>2]=+g[(c[m>>2]|0)+8>>2];g[Nh>>2]=+g[(c[m>>2]|0)+12>>2];g[Ph>>2]=+g[Lh>>2]*+g[Mh>>2]+ +g[Nh>>2]*+g[Oh>>2];g[Ec>>2]=+g[Lh>>2]*+g[Oh>>2]-+g[Nh>>2]*+g[Mh>>2];g[ai>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[s>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[$h>>2]=+g[(c[m>>2]|0)+200>>2];g[r>>2]=+g[(c[m>>2]|0)+204>>2];g[t>>2]=+g[$h>>2]*+g[ai>>2]+ +g[r>>2]*+g[s>>2];g[Bc>>2]=+g[$h>>2]*+g[s>>2]-+g[r>>2]*+g[ai>>2];g[Rh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[Th>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[Qh>>2]=+g[(c[m>>2]|0)+136>>2];g[Sh>>2]=+g[(c[m>>2]|0)+140>>2];g[Uh>>2]=+g[Qh>>2]*+g[Rh>>2]+ +g[Sh>>2]*+g[Th>>2];g[Fc>>2]=+g[Qh>>2]*+g[Th>>2]-+g[Sh>>2]*+g[Rh>>2];g[Xh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Zh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Wh>>2]=+g[(c[m>>2]|0)+72>>2];g[Yh>>2]=+g[(c[m>>2]|0)+76>>2];g[_h>>2]=+g[Wh>>2]*+g[Xh>>2]+ +g[Yh>>2]*+g[Zh>>2];g[Ac>>2]=+g[Wh>>2]*+g[Zh>>2]-+g[Yh>>2]*+g[Xh>>2];g[Vh>>2]=+g[Ph>>2]+ +g[Uh>>2];g[u>>2]=+g[_h>>2]+ +g[t>>2];g[v>>2]=+g[Vh>>2]+ +g[u>>2];g[te>>2]=+g[Vh>>2]-+g[u>>2];g[ue>>2]=+g[Ec>>2]+ +g[Fc>>2];g[ve>>2]=+g[Ac>>2]+ +g[Bc>>2];g[we>>2]=+g[ue>>2]-+g[ve>>2];g[yf>>2]=+g[ue>>2]+ +g[ve>>2];g[zc>>2]=+g[Ph>>2]-+g[Uh>>2];g[Cc>>2]=+g[Ac>>2]-+g[Bc>>2];g[Dc>>2]=+g[zc>>2]+ +g[Cc>>2];g[zd>>2]=+g[zc>>2]-+g[Cc>>2];g[Gc>>2]=+g[Ec>>2]-+g[Fc>>2];g[Hc>>2]=+g[_h>>2]-+g[t>>2];g[Ic>>2]=+g[Gc>>2]-+g[Hc>>2];g[yd>>2]=+g[Gc>>2]+ +g[Hc>>2];g[x>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[z>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[w>>2]=+g[(c[m>>2]|0)+232>>2];g[y>>2]=+g[(c[m>>2]|0)+236>>2];g[A>>2]=+g[w>>2]*+g[x>>2]+ +g[y>>2]*+g[z>>2];g[Kc>>2]=+g[w>>2]*+g[z>>2]-+g[y>>2]*+g[x>>2];g[la>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[na>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[ka>>2]=+g[(c[m>>2]|0)+168>>2];g[ma>>2]=+g[(c[m>>2]|0)+172>>2];g[oa>>2]=+g[ka>>2]*+g[la>>2]+ +g[ma>>2]*+g[na>>2];g[Tb>>2]=+g[ka>>2]*+g[na>>2]-+g[ma>>2]*+g[la>>2];g[aa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[ca>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[$>>2]=+g[(c[m>>2]|0)+104>>2];g[ba>>2]=+g[(c[m>>2]|0)+108>>2];g[da>>2]=+g[$>>2]*+g[aa>>2]+ +g[ba>>2]*+g[ca>>2];g[Lc>>2]=+g[$>>2]*+g[ca>>2]-+g[ba>>2]*+g[aa>>2];g[ga>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[ia>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[fa>>2]=+g[(c[m>>2]|0)+40>>2];g[ha>>2]=+g[(c[m>>2]|0)+44>>2];g[ja>>2]=+g[fa>>2]*+g[ga>>2]+ +g[ha>>2]*+g[ia>>2];g[Qc>>2]=+g[fa>>2]*+g[ia>>2]-+g[ha>>2]*+g[ga>>2];g[ea>>2]=+g[A>>2]+ +g[da>>2];g[pa>>2]=+g[ja>>2]+ +g[oa>>2];g[qa>>2]=+g[ea>>2]+ +g[pa>>2];g[ye>>2]=+g[ea>>2]-+g[pa>>2];g[ze>>2]=+g[Kc>>2]+ +g[Lc>>2];g[Ae>>2]=+g[Qc>>2]+ +g[Tb>>2];g[Be>>2]=+g[ze>>2]-+g[Ae>>2];g[xf>>2]=+g[ze>>2]+ +g[Ae>>2];g[Mc>>2]=+g[Kc>>2]-+g[Lc>>2];g[Nc>>2]=+g[ja>>2]-+g[oa>>2];g[Oc>>2]=+g[Mc>>2]-+g[Nc>>2];g[be>>2]=+g[Mc>>2]+ +g[Nc>>2];g[Pc>>2]=+g[A>>2]-+g[da>>2];g[Ub>>2]=+g[Qc>>2]-+g[Tb>>2];g[Vb>>2]=+g[Pc>>2]+ +g[Ub>>2];g[ae>>2]=+g[Pc>>2]-+g[Ub>>2];g[T>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[V>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[S>>2]=+g[(c[m>>2]|0)+32>>2];g[U>>2]=+g[(c[m>>2]|0)+36>>2];g[W>>2]=+g[S>>2]*+g[T>>2]+ +g[U>>2]*+g[V>>2];g[dc>>2]=+g[S>>2]*+g[V>>2]-+g[U>>2]*+g[T>>2];g[Y>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[_>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[X>>2]=+g[(c[m>>2]|0)+160>>2];g[Z>>2]=+g[(c[m>>2]|0)+164>>2];g[Aa>>2]=+g[X>>2]*+g[Y>>2]+ +g[Z>>2]*+g[_>>2];g[ec>>2]=+g[X>>2]*+g[_>>2]-+g[Z>>2]*+g[Y>>2];g[cc>>2]=+g[W>>2]-+g[Aa>>2];g[fc>>2]=+g[dc>>2]-+g[ec>>2];g[Da>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[Fa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[Ca>>2]=+g[(c[m>>2]|0)+224>>2];g[Ea>>2]=+g[(c[m>>2]|0)+228>>2];g[Ga>>2]=+g[Ca>>2]*+g[Da>>2]+ +g[Ea>>2]*+g[Fa>>2];g[hc>>2]=+g[Ca>>2]*+g[Fa>>2]-+g[Ea>>2]*+g[Da>>2];g[Ia>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[ib>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Ha>>2]=+g[(c[m>>2]|0)+96>>2];g[Ja>>2]=+g[(c[m>>2]|0)+100>>2];g[jb>>2]=+g[Ha>>2]*+g[Ia>>2]+ +g[Ja>>2]*+g[ib>>2];g[ic>>2]=+g[Ha>>2]*+g[ib>>2]-+g[Ja>>2]*+g[Ia>>2];g[jc>>2]=+g[hc>>2]-+g[ic>>2];g[kc>>2]=+g[Ga>>2]-+g[jb>>2];g[Ba>>2]=+g[W>>2]+ +g[Aa>>2];g[kb>>2]=+g[Ga>>2]+ +g[jb>>2];g[lb>>2]=+g[Ba>>2]+ +g[kb>>2];g[Ie>>2]=+g[Ba>>2]-+g[kb>>2];g[lf>>2]=+g[hc>>2]+ +g[ic>>2];g[mf>>2]=+g[dc>>2]+ +g[ec>>2];g[nf>>2]=+g[lf>>2]-+g[mf>>2];g[Df>>2]=+g[mf>>2]+ +g[lf>>2];g[gc>>2]=+g[cc>>2]+ +g[fc>>2];g[lc>>2]=+g[jc>>2]-+g[kc>>2];g[mc>>2]=(+g[gc>>2]-+g[lc>>2])*.7071067690849304;g[Je>>2]=(+g[gc>>2]+ +g[lc>>2])*.7071067690849304;g[Uc>>2]=+g[kc>>2]+ +g[jc>>2];g[Vc>>2]=+g[cc>>2]-+g[fc>>2];g[Wc>>2]=(+g[Uc>>2]-+g[Vc>>2])*.7071067690849304;g[ge>>2]=(+g[Vc>>2]+ +g[Uc>>2])*.7071067690849304;g[Na>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Pa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Ma>>2]=+g[(c[m>>2]|0)+16>>2];g[Oa>>2]=+g[(c[m>>2]|0)+20>>2];g[Qa>>2]=+g[Ma>>2]*+g[Na>>2]+ +g[Oa>>2]*+g[Pa>>2];g[Id>>2]=+g[Ma>>2]*+g[Pa>>2]-+g[Oa>>2]*+g[Na>>2];g[Sa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Ua>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Ra>>2]=+g[(c[m>>2]|0)+144>>2];g[Ta>>2]=+g[(c[m>>2]|0)+148>>2];g[Va>>2]=+g[Ra>>2]*+g[Sa>>2]+ +g[Ta>>2]*+g[Ua>>2];g[Jd>>2]=+g[Ra>>2]*+g[Ua>>2]-+g[Ta>>2]*+g[Sa>>2];g[Hd>>2]=+g[Qa>>2]-+g[Va>>2];g[Kd>>2]=+g[Id>>2]-+g[Jd>>2];g[Ya>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[_a>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[Xa>>2]=+g[(c[m>>2]|0)+208>>2];g[Za>>2]=+g[(c[m>>2]|0)+212>>2];g[$a>>2]=+g[Xa>>2]*+g[Ya>>2]+ +g[Za>>2]*+g[_a>>2];g[Dd>>2]=+g[Xa>>2]*+g[_a>>2]-+g[Za>>2]*+g[Ya>>2];g[bb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[db>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[ab>>2]=+g[(c[m>>2]|0)+80>>2];g[cb>>2]=+g[(c[m>>2]|0)+84>>2];g[eb>>2]=+g[ab>>2]*+g[bb>>2]+ +g[cb>>2]*+g[db>>2];g[Ed>>2]=+g[ab>>2]*+g[db>>2]-+g[cb>>2]*+g[bb>>2];g[Cd>>2]=+g[$a>>2]-+g[eb>>2];g[Fd>>2]=+g[Dd>>2]-+g[Ed>>2];g[Wa>>2]=+g[Qa>>2]+ +g[Va>>2];g[fb>>2]=+g[$a>>2]+ +g[eb>>2];g[gb>>2]=+g[Wa>>2]+ +g[fb>>2];g[Yf>>2]=+g[Wa>>2]-+g[fb>>2];g[rf>>2]=+g[Dd>>2]+ +g[Ed>>2];g[sf>>2]=+g[Id>>2]+ +g[Jd>>2];g[Tf>>2]=+g[rf>>2]-+g[sf>>2];g[If>>2]=+g[sf>>2]+ +g[rf>>2];g[Gd>>2]=+g[Cd>>2]+ +g[Fd>>2];g[Ld>>2]=+g[Hd>>2]-+g[Kd>>2];g[Md>>2]=(+g[Gd>>2]-+g[Ld>>2])*.7071067690849304;g[Ne>>2]=(+g[Ld>>2]+ +g[Gd>>2])*.7071067690849304;g[Td>>2]=+g[Hd>>2]+ +g[Kd>>2];g[Ud>>2]=+g[Fd>>2]-+g[Cd>>2];g[Vd>>2]=(+g[Td>>2]-+g[Ud>>2])*.7071067690849304;g[Qe>>2]=(+g[Td>>2]+ +g[Ud>>2])*.7071067690849304;g[Kh>>2]=+g[ki>>2]+ +g[Jh>>2];g[ra>>2]=+g[v>>2]+ +g[qa>>2];g[sa>>2]=+g[Kh>>2]+ +g[ra>>2];g[Qf>>2]=+g[Kh>>2]-+g[ra>>2];g[ug>>2]=+g[yf>>2]+ +g[xf>>2];g[Bg>>2]=+g[vg>>2]+ +g[Ag>>2];g[ah>>2]=+g[ug>>2]+ +g[Bg>>2];g[ch>>2]=+g[Bg>>2]-+g[ug>>2];g[mb>>2]=+g[R>>2]+ +g[lb>>2];g[hb>>2]=+g[La>>2]+ +g[gb>>2];g[Jb>>2]=+g[mb>>2]+ +g[hb>>2];g[bh>>2]=+g[hb>>2]-+g[mb>>2];g[Rf>>2]=+g[Hf>>2]+ +g[If>>2];g[Sf>>2]=+g[Cf>>2]+ +g[Df>>2];g[sg>>2]=+g[Rf>>2]-+g[Sf>>2];g[tg>>2]=+g[Sf>>2]+ +g[Rf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[sa>>2]-+g[Jb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[bh>>2]-+g[ch>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[bh>>2]+ +g[ch>>2];g[c[k>>2]>>2]=+g[sa>>2]+ +g[Jb>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Qf>>2]-+g[sg>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[tg>>2]-+g[ah>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[tg>>2]+ +g[ah>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Qf>>2]+ +g[sg>>2];g[wd>>2]=(+g[ud>>2]+ +g[vd>>2])*.7071067690849304;g[xd>>2]=+g[td>>2]-+g[wd>>2];g[Ye>>2]=+g[td>>2]+ +g[wd>>2];g[zh>>2]=(+g[rc>>2]-+g[wc>>2])*.7071067690849304;g[Fg>>2]=+g[zh>>2]+ +g[Eg>>2];g[Lg>>2]=+g[Eg>>2]-+g[zh>>2];g[$d>>2]=+g[yd>>2]*.9238795042037964+ +g[zd>>2]*.3826834261417389;g[ce>>2]=+g[ae>>2]*.3826834261417389-+g[be>>2]*.9238795042037964;g[de>>2]=+g[$d>>2]+ +g[ce>>2];g[yh>>2]=+g[$d>>2]-+g[ce>>2];g[ef>>2]=+g[Me>>2]+ +g[Ne>>2];g[ff>>2]=+g[Pe>>2]+ +g[Qe>>2];g[gf>>2]=+g[ef>>2]*.19509032368659973-+g[ff>>2]*.9807852506637573;g[me>>2]=+g[ef>>2]*.9807852506637573+ +g[ff>>2]*.19509032368659973;g[he>>2]=+g[fe>>2]-+g[ge>>2];g[Ke>>2]=+g[ie>>2]-+g[Je>>2];g[Le>>2]=+g[he>>2]*.8314695954322815+ +g[Ke>>2]*.5555702447891235;g[Ve>>2]=+g[he>>2]*.5555702447891235-+g[Ke>>2]*.8314695954322815;g[Ze>>2]=+g[zd>>2]*.9238795042037964-+g[yd>>2]*.3826834261417389;g[_e>>2]=+g[be>>2]*.3826834261417389+ +g[ae>>2]*.9238795042037964;g[$e>>2]=+g[Ze>>2]+ +g[_e>>2];g[Kg>>2]=+g[_e>>2]-+g[Ze>>2];g[bf>>2]=+g[fe>>2]+ +g[ge>>2];g[cf>>2]=+g[ie>>2]+ +g[Je>>2];g[df>>2]=+g[bf>>2]*.19509032368659973+ +g[cf>>2]*.9807852506637573;g[le>>2]=+g[bf>>2]*.9807852506637573-+g[cf>>2]*.19509032368659973;g[Oe>>2]=+g[Me>>2]-+g[Ne>>2];g[Re>>2]=+g[Pe>>2]-+g[Qe>>2];g[Se>>2]=+g[Oe>>2]*.8314695954322815-+g[Re>>2]*.5555702447891235;g[We>>2]=+g[Oe>>2]*.5555702447891235+ +g[Re>>2]*.8314695954322815;g[ee>>2]=+g[xd>>2]+ +g[de>>2];g[Te>>2]=+g[Le>>2]+ +g[Se>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[ee>>2]-+g[Te>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ee>>2]+ +g[Te>>2];g[Jg>>2]=+g[We>>2]-+g[Ve>>2];g[Mg>>2]=+g[Kg>>2]+ +g[Lg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Jg>>2]-+g[Mg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[Jg>>2]+ +g[Mg>>2];g[Ng>>2]=+g[Se>>2]-+g[Le>>2];g[Og>>2]=+g[Lg>>2]-+g[Kg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[Ng>>2]-+g[Og>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[Ng>>2]+ +g[Og>>2];g[Ue>>2]=+g[xd>>2]-+g[de>>2];g[Xe>>2]=+g[Ve>>2]+ +g[We>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Ue>>2]-+g[Xe>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Ue>>2]+ +g[Xe>>2];g[af>>2]=+g[Ye>>2]-+g[$e>>2];g[je>>2]=+g[df>>2]+ +g[gf>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[af>>2]-+g[je>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[af>>2]+ +g[je>>2];g[xh>>2]=+g[gf>>2]-+g[df>>2];g[Gg>>2]=+g[yh>>2]+ +g[Fg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[xh>>2]-+g[Gg>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[xh>>2]+ +g[Gg>>2];g[Hg>>2]=+g[me>>2]-+g[le>>2];g[Ig>>2]=+g[Fg>>2]-+g[yh>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[Hg>>2]-+g[Ig>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Hg>>2]+ +g[Ig>>2];g[ke>>2]=+g[Ye>>2]+ +g[$e>>2];g[ne>>2]=+g[le>>2]+ +g[me>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[ke>>2]-+g[ne>>2];g[c[l>>2]>>2]=+g[ke>>2]+ +g[ne>>2];g[wf>>2]=+g[ki>>2]-+g[Jh>>2];g[zf>>2]=+g[xf>>2]-+g[yf>>2];g[Af>>2]=+g[wf>>2]-+g[zf>>2];g[Mf>>2]=+g[wf>>2]+ +g[zf>>2];g[eh>>2]=+g[v>>2]-+g[qa>>2];g[fh>>2]=+g[Ag>>2]-+g[vg>>2];g[gh>>2]=+g[eh>>2]+ +g[fh>>2];g[ih>>2]=+g[fh>>2]-+g[eh>>2];g[Bf>>2]=+g[R>>2]-+g[lb>>2];g[Ef>>2]=+g[Cf>>2]-+g[Df>>2];g[Ff>>2]=+g[Bf>>2]+ +g[Ef>>2];g[Nf>>2]=+g[Bf>>2]-+g[Ef>>2];g[Gf>>2]=+g[La>>2]-+g[gb>>2];g[Jf>>2]=+g[Hf>>2]-+g[If>>2];g[Kf>>2]=+g[Gf>>2]-+g[Jf>>2];g[Of>>2]=+g[Gf>>2]+ +g[Jf>>2];g[Lf>>2]=(+g[Ff>>2]+ +g[Kf>>2])*.7071067690849304;g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Af>>2]-+g[Lf>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Af>>2]+ +g[Lf>>2];g[hh>>2]=(+g[Of>>2]-+g[Nf>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[hh>>2]-+g[ih>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[hh>>2]+ +g[ih>>2];g[Pf>>2]=(+g[Nf>>2]+ +g[Of>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Mf>>2]-+g[Pf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Mf>>2]+ +g[Pf>>2];g[dh>>2]=(+g[Kf>>2]-+g[Ff>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[dh>>2]-+g[gh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[dh>>2]+ +g[gh>>2];g[se>>2]=+g[oe>>2]-+g[re>>2];g[nh>>2]=+g[lh>>2]+ +g[mh>>2];g[th>>2]=+g[mh>>2]-+g[lh>>2];g[eg>>2]=+g[oe>>2]+ +g[re>>2];g[xe>>2]=+g[te>>2]+ +g[we>>2];g[Ce>>2]=+g[ye>>2]-+g[Be>>2];g[De>>2]=(+g[xe>>2]+ +g[Ce>>2])*.7071067690849304;g[kh>>2]=(+g[xe>>2]-+g[Ce>>2])*.7071067690849304;g[mg>>2]=+g[Xf>>2]+ +g[Yf>>2];g[ng>>2]=+g[qf>>2]+ +g[Tf>>2];g[og>>2]=+g[mg>>2]*.3826834261417389+ +g[ng>>2]*.9238795042037964;g[uf>>2]=+g[ng>>2]*.3826834261417389-+g[mg>>2]*.9238795042037964;g[fg>>2]=+g[te>>2]-+g[we>>2];g[gg>>2]=+g[ye>>2]+ +g[Be>>2];g[hg>>2]=(+g[fg>>2]+ +g[gg>>2])*.7071067690849304;g[sh>>2]=(+g[gg>>2]-+g[fg>>2])*.7071067690849304;g[jf>>2]=+g[He>>2]-+g[Ie>>2];g[of>>2]=+g[kf>>2]-+g[nf>>2];g[pf>>2]=+g[jf>>2]*.3826834261417389+ +g[of>>2]*.9238795042037964;g[bg>>2]=+g[of>>2]*.3826834261417389-+g[jf>>2]*.9238795042037964;g[jg>>2]=+g[kf>>2]+ +g[nf>>2];g[kg>>2]=+g[He>>2]+ +g[Ie>>2];g[lg>>2]=+g[jg>>2]*.9238795042037964-+g[kg>>2]*.3826834261417389;g[tf>>2]=+g[kg>>2]*.9238795042037964+ +g[jg>>2]*.3826834261417389;g[Uf>>2]=+g[qf>>2]-+g[Tf>>2];g[Zf>>2]=+g[Xf>>2]-+g[Yf>>2];g[_f>>2]=+g[Uf>>2]*.9238795042037964-+g[Zf>>2]*.3826834261417389;g[cg>>2]=+g[Zf>>2]*.9238795042037964+ +g[Uf>>2]*.3826834261417389;g[Ee>>2]=+g[se>>2]+ +g[De>>2];g[$f>>2]=+g[pf>>2]+ +g[_f>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Ee>>2]-+g[$f>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Ee>>2]+ +g[$f>>2];g[rh>>2]=+g[cg>>2]-+g[bg>>2];g[uh>>2]=+g[sh>>2]+ +g[th>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[rh>>2]-+g[uh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[rh>>2]+ +g[uh>>2];g[vh>>2]=+g[_f>>2]-+g[pf>>2];g[wh>>2]=+g[th>>2]-+g[sh>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[vh>>2]-+g[wh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[vh>>2]+ +g[wh>>2];g[ag>>2]=+g[se>>2]-+g[De>>2];g[dg>>2]=+g[bg>>2]+ +g[cg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[ag>>2]-+g[dg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[ag>>2]+ +g[dg>>2];g[ig>>2]=+g[eg>>2]+ +g[hg>>2];g[pg>>2]=+g[lg>>2]+ +g[og>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[ig>>2]-+g[pg>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[ig>>2]+ +g[pg>>2];g[jh>>2]=+g[uf>>2]-+g[tf>>2];g[oh>>2]=+g[kh>>2]+ +g[nh>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[jh>>2]-+g[oh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[jh>>2]+ +g[oh>>2];g[ph>>2]=+g[og>>2]-+g[lg>>2];g[qh>>2]=+g[nh>>2]-+g[kh>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[ph>>2]-+g[qh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[ph>>2]+ +g[qh>>2];g[qg>>2]=+g[eg>>2]-+g[hg>>2];g[vf>>2]=+g[tf>>2]+ +g[uf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[qg>>2]-+g[vf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[qg>>2]+ +g[vf>>2];g[xc>>2]=(+g[rc>>2]+ +g[wc>>2])*.7071067690849304;g[yc>>2]=+g[Ob>>2]-+g[xc>>2];g[dd>>2]=+g[Ob>>2]+ +g[xc>>2];g[Rg>>2]=(+g[vd>>2]-+g[ud>>2])*.7071067690849304;g[Tg>>2]=+g[Rg>>2]+ +g[Sg>>2];g[Zg>>2]=+g[Sg>>2]-+g[Rg>>2];g[Jc>>2]=+g[Dc>>2]*.3826834261417389-+g[Ic>>2]*.9238795042037964;g[Wb>>2]=+g[Oc>>2]*.9238795042037964+ +g[Vb>>2]*.3826834261417389;g[Xb>>2]=+g[Jc>>2]+ +g[Wb>>2];g[Qg>>2]=+g[Wb>>2]-+g[Jc>>2];g[ld>>2]=+g[Sd>>2]+ +g[Vd>>2];g[md>>2]=+g[Bd>>2]+ +g[Md>>2];g[nd>>2]=+g[ld>>2]*.9807852506637573-+g[md>>2]*.19509032368659973;g[rd>>2]=+g[md>>2]*.9807852506637573+ +g[ld>>2]*.19509032368659973;g[nc>>2]=+g[bc>>2]-+g[mc>>2];g[Xc>>2]=+g[Tc>>2]-+g[Wc>>2];g[Yc>>2]=+g[nc>>2]*.8314695954322815-+g[Xc>>2]*.5555702447891235;g[ad>>2]=+g[Xc>>2]*.8314695954322815+ +g[nc>>2]*.5555702447891235;g[ed>>2]=+g[Ic>>2]*.3826834261417389+ +g[Dc>>2]*.9238795042037964;g[fd>>2]=+g[Vb>>2]*.9238795042037964-+g[Oc>>2]*.3826834261417389;g[gd>>2]=+g[ed>>2]+ +g[fd>>2];g[Yg>>2]=+g[ed>>2]-+g[fd>>2];g[id>>2]=+g[Tc>>2]+ +g[Wc>>2];g[jd>>2]=+g[bc>>2]+ +g[mc>>2];g[kd>>2]=+g[id>>2]*.19509032368659973+ +g[jd>>2]*.9807852506637573;g[qd>>2]=+g[jd>>2]*.19509032368659973-+g[id>>2]*.9807852506637573;g[Nd>>2]=+g[Bd>>2]-+g[Md>>2];g[Wd>>2]=+g[Sd>>2]-+g[Vd>>2];g[Xd>>2]=+g[Nd>>2]*.5555702447891235+ +g[Wd>>2]*.8314695954322815;g[bd>>2]=+g[Wd>>2]*.5555702447891235-+g[Nd>>2]*.8314695954322815;g[Yb>>2]=+g[yc>>2]+ +g[Xb>>2];g[Yd>>2]=+g[Yc>>2]+ +g[Xd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Yb>>2]-+g[Yd>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Yb>>2]+ +g[Yd>>2];g[Xg>>2]=+g[bd>>2]-+g[ad>>2];g[_g>>2]=+g[Yg>>2]+ +g[Zg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[Xg>>2]-+g[_g>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[Xg>>2]+ +g[_g>>2];g[$g>>2]=+g[Xd>>2]-+g[Yc>>2];g[Bh>>2]=+g[Zg>>2]-+g[Yg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[$g>>2]-+g[Bh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[$g>>2]+ +g[Bh>>2];g[Zd>>2]=+g[yc>>2]-+g[Xb>>2];g[cd>>2]=+g[ad>>2]+ +g[bd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Zd>>2]-+g[cd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Zd>>2]+ +g[cd>>2];g[hd>>2]=+g[dd>>2]+ +g[gd>>2];g[od>>2]=+g[kd>>2]+ +g[nd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[hd>>2]-+g[od>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[hd>>2]+ +g[od>>2];g[Pg>>2]=+g[rd>>2]-+g[qd>>2];g[Ug>>2]=+g[Qg>>2]+ +g[Tg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[Pg>>2]-+g[Ug>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[Pg>>2]+ +g[Ug>>2];g[Vg>>2]=+g[nd>>2]-+g[kd>>2];g[Wg>>2]=+g[Tg>>2]-+g[Qg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[Vg>>2]-+g[Wg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[Vg>>2]+ +g[Wg>>2];g[pd>>2]=+g[dd>>2]-+g[gd>>2];g[sd>>2]=+g[qd>>2]+ +g[rd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[pd>>2]-+g[sd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[pd>>2]+ +g[sd>>2];c[Bi>>2]=(c[Bi>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+248;c[n>>2]=c[n>>2]^c[2998]}i=Ci;return}function Rr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,41,5272);i=b;return}function Sr(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0;L=i;i=i+128|0;k=L+116|0;l=L+112|0;m=L+108|0;n=L+104|0;M=L+100|0;o=L+96|0;p=L+92|0;K=L+80|0;q=L+76|0;H=L+72|0;v=L+68|0;D=L+64|0;A=L+60|0;E=L+56|0;B=L+52|0;I=L+48|0;s=L+44|0;u=L+40|0;r=L+36|0;t=L+32|0;x=L+28|0;z=L+24|0;w=L+20|0;y=L+16|0;C=L+12|0;F=L+8|0;G=L+4|0;J=L;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[M>>2]=f;c[o>>2]=h;c[p>>2]=j;g[L+88>>2]=.8660253882408142;g[L+84>>2]=.5;c[K>>2]=c[M>>2];c[m>>2]=(c[m>>2]|0)+((c[M>>2]|0)-1<<2<<2);while(1){if((c[K>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[H>>2]=+g[c[l>>2]>>2];g[s>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[u>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[r>>2]=+g[c[m>>2]>>2];g[t>>2]=+g[(c[m>>2]|0)+4>>2];g[v>>2]=+g[r>>2]*+g[s>>2]+ +g[t>>2]*+g[u>>2];g[D>>2]=+g[r>>2]*+g[u>>2]-+g[t>>2]*+g[s>>2];g[x>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[z>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[w>>2]=+g[(c[m>>2]|0)+8>>2];g[y>>2]=+g[(c[m>>2]|0)+12>>2];g[A>>2]=+g[w>>2]*+g[x>>2]+ +g[y>>2]*+g[z>>2];g[E>>2]=+g[w>>2]*+g[z>>2]-+g[y>>2]*+g[x>>2];g[B>>2]=+g[v>>2]+ +g[A>>2];g[I>>2]=+g[D>>2]+ +g[E>>2];g[c[k>>2]>>2]=+g[q>>2]+ +g[B>>2];g[C>>2]=+g[q>>2]-+g[B>>2]*.5;g[F>>2]=(+g[D>>2]-+g[E>>2])*.8660253882408142;g[c[l>>2]>>2]=+g[C>>2]-+g[F>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[C>>2]+ +g[F>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[I>>2]+ +g[H>>2];g[G>>2]=(+g[A>>2]-+g[v>>2])*.8660253882408142;g[J>>2]=+g[H>>2]-+g[I>>2]*.5;g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[G>>2]-+g[J>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[G>>2]+ +g[J>>2];c[K>>2]=(c[K>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+16}i=L;return}function Tr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,42,5320);i=b;return}function Ur(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0;T=i;i=i+144|0;k=T+140|0;l=T+136|0;m=T+132|0;n=T+128|0;U=T+124|0;o=T+120|0;p=T+116|0;S=T+112|0;q=T+108|0;O=T+104|0;v=T+100|0;N=T+96|0;B=T+92|0;J=T+88|0;G=T+84|0;K=T+80|0;s=T+76|0;u=T+72|0;r=T+68|0;t=T+64|0;y=T+60|0;A=T+56|0;x=T+52|0;z=T+48|0;D=T+44|0;F=T+40|0;C=T+36|0;E=T+32|0;w=T+28|0;H=T+24|0;I=T+20|0;L=T+16|0;M=T+12|0;P=T+8|0;Q=T+4|0;R=T;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[U>>2]=f;c[o>>2]=h;c[p>>2]=j;c[S>>2]=c[U>>2];c[m>>2]=(c[m>>2]|0)+(((c[U>>2]|0)-1|0)*6<<2);while(1){if((c[S>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[O>>2]=+g[c[l>>2]>>2];g[s>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[u>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[r>>2]=+g[(c[m>>2]|0)+8>>2];g[t>>2]=+g[(c[m>>2]|0)+12>>2];g[v>>2]=+g[r>>2]*+g[s>>2]+ +g[t>>2]*+g[u>>2];g[N>>2]=+g[r>>2]*+g[u>>2]-+g[t>>2]*+g[s>>2];g[y>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[A>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[x>>2]=+g[c[m>>2]>>2];g[z>>2]=+g[(c[m>>2]|0)+4>>2];g[B>>2]=+g[x>>2]*+g[y>>2]+ +g[z>>2]*+g[A>>2];g[J>>2]=+g[x>>2]*+g[A>>2]-+g[z>>2]*+g[y>>2];g[D>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[F>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[C>>2]=+g[(c[m>>2]|0)+16>>2];g[E>>2]=+g[(c[m>>2]|0)+20>>2];g[G>>2]=+g[C>>2]*+g[D>>2]+ +g[E>>2]*+g[F>>2];g[K>>2]=+g[C>>2]*+g[F>>2]-+g[E>>2]*+g[D>>2];g[w>>2]=+g[q>>2]+ +g[v>>2];g[H>>2]=+g[B>>2]+ +g[G>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[w>>2]-+g[H>>2];g[c[k>>2]>>2]=+g[w>>2]+ +g[H>>2];g[I>>2]=+g[q>>2]-+g[v>>2];g[L>>2]=+g[J>>2]-+g[K>>2];g[c[l>>2]>>2]=+g[I>>2]-+g[L>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[I>>2]+ +g[L>>2];g[M>>2]=+g[J>>2]+ +g[K>>2];g[P>>2]=+g[N>>2]+ +g[O>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[M>>2]-+g[P>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[M>>2]+ +g[P>>2];g[Q>>2]=+g[G>>2]-+g[B>>2];g[R>>2]=+g[O>>2]-+g[N>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Q>>2]-+g[R>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Q>>2]+ +g[R>>2];c[S>>2]=(c[S>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+24}i=T;return}function Vr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,43,5368);i=b;return}function Wr(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0;la=i;i=i+240|0;k=la+236|0;l=la+232|0;m=la+228|0;n=la+224|0;ma=la+220|0;o=la+216|0;p=la+212|0;ka=la+192|0;q=la+188|0;D=la+184|0;ea=la+180|0;ha=la+176|0;B=la+172|0;A=la+168|0;E=la+164|0;F=la+160|0;G=la+156|0;O=la+152|0;Z=la+148|0;_=la+144|0;v=la+140|0;ca=la+136|0;Y=la+132|0;ga=la+128|0;N=la+124|0;da=la+120|0;T=la+116|0;fa=la+112|0;s=la+108|0;u=la+104|0;r=la+100|0;t=la+96|0;V=la+92|0;X=la+88|0;U=la+84|0;W=la+80|0;x=la+76|0;M=la+72|0;w=la+68|0;y=la+64|0;Q=la+60|0;S=la+56|0;P=la+52|0;R=la+48|0;ia=la+44|0;z=la+40|0;ba=la+36|0;ja=la+32|0;$=la+28|0;aa=la+24|0;C=la+20|0;K=la+16|0;J=la+12|0;L=la+8|0;H=la+4|0;I=la;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[ma>>2]=f;c[o>>2]=h;c[p>>2]=j;g[la+208>>2]=.25;g[la+204>>2]=.55901700258255;g[la+200>>2]=.5877852439880371;g[la+196>>2]=.9510565400123596;c[ka>>2]=c[ma>>2];c[m>>2]=(c[m>>2]|0)+((c[ma>>2]|0)-1<<3<<2);while(1){if((c[ka>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[D>>2]=+g[c[l>>2]>>2];g[s>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[u>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[r>>2]=+g[c[m>>2]>>2];g[t>>2]=+g[(c[m>>2]|0)+4>>2];g[v>>2]=+g[r>>2]*+g[s>>2]+ +g[t>>2]*+g[u>>2];g[ca>>2]=+g[r>>2]*+g[u>>2]-+g[t>>2]*+g[s>>2];g[V>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[X>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[U>>2]=+g[(c[m>>2]|0)+16>>2];g[W>>2]=+g[(c[m>>2]|0)+20>>2];g[Y>>2]=+g[U>>2]*+g[V>>2]+ +g[W>>2]*+g[X>>2];g[ga>>2]=+g[U>>2]*+g[X>>2]-+g[W>>2]*+g[V>>2];g[x>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[M>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[w>>2]=+g[(c[m>>2]|0)+24>>2];g[y>>2]=+g[(c[m>>2]|0)+28>>2];g[N>>2]=+g[w>>2]*+g[x>>2]+ +g[y>>2]*+g[M>>2];g[da>>2]=+g[w>>2]*+g[M>>2]-+g[y>>2]*+g[x>>2];g[Q>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[S>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[P>>2]=+g[(c[m>>2]|0)+8>>2];g[R>>2]=+g[(c[m>>2]|0)+12>>2];g[T>>2]=+g[P>>2]*+g[Q>>2]+ +g[R>>2]*+g[S>>2];g[fa>>2]=+g[P>>2]*+g[S>>2]-+g[R>>2]*+g[Q>>2];g[ea>>2]=+g[ca>>2]-+g[da>>2];g[ha>>2]=+g[fa>>2]-+g[ga>>2];g[B>>2]=+g[T>>2]-+g[Y>>2];g[A>>2]=+g[N>>2]-+g[v>>2];g[E>>2]=+g[ca>>2]+ +g[da>>2];g[F>>2]=+g[fa>>2]+ +g[ga>>2];g[G>>2]=+g[E>>2]+ +g[F>>2];g[O>>2]=+g[v>>2]+ +g[N>>2];g[Z>>2]=+g[T>>2]+ +g[Y>>2];g[_>>2]=+g[O>>2]+ +g[Z>>2];g[c[k>>2]>>2]=+g[q>>2]+ +g[_>>2];g[ia>>2]=+g[ea>>2]*.9510565400123596+ +g[ha>>2]*.5877852439880371;g[z>>2]=+g[ha>>2]*.9510565400123596-+g[ea>>2]*.5877852439880371;g[$>>2]=(+g[O>>2]-+g[Z>>2])*.55901700258255;g[aa>>2]=+g[q>>2]-+g[_>>2]*.25;g[ba>>2]=+g[$>>2]+ +g[aa>>2];g[ja>>2]=+g[aa>>2]-+g[$>>2];g[c[l>>2]>>2]=+g[ba>>2]-+g[ia>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[ja>>2]+ +g[z>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[ba>>2]+ +g[ia>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ja>>2]-+g[z>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[G>>2]+ +g[D>>2];g[C>>2]=+g[A>>2]*.5877852439880371+ +g[B>>2]*.9510565400123596;g[K>>2]=+g[A>>2]*.9510565400123596-+g[B>>2]*.5877852439880371;g[H>>2]=+g[D>>2]-+g[G>>2]*.25;g[I>>2]=(+g[E>>2]-+g[F>>2])*.55901700258255;g[J>>2]=+g[H>>2]-+g[I>>2];g[L>>2]=+g[I>>2]+ +g[H>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[C>>2]-+g[J>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[K>>2]+ +g[L>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[C>>2]+ +g[J>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[K>>2]-+g[L>>2];c[ka>>2]=(c[ka>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+32}i=la;return}function Xr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,44,5416);i=b;return}function Yr(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0,Ii=0,Ji=0,Ki=0,Li=0,Mi=0,Ni=0,Oi=0,Pi=0,Qi=0,Ri=0,Si=0,Ti=0,Ui=0,Vi=0,Wi=0,Xi=0,Yi=0,Zi=0,_i=0,$i=0,aj=0,bj=0,cj=0,dj=0,ej=0,fj=0,gj=0,hj=0,ij=0,jj=0,kj=0,lj=0,mj=0,nj=0,oj=0,pj=0,qj=0,rj=0,sj=0,tj=0,uj=0,vj=0,wj=0,xj=0,yj=0,zj=0,Aj=0,Bj=0,Cj=0,Dj=0,Ej=0,Fj=0,Gj=0,Hj=0,Ij=0,Jj=0,Kj=0,Lj=0,Mj=0,Nj=0,Oj=0,Pj=0,Qj=0,Rj=0,Sj=0,Tj=0,Uj=0,Vj=0,Wj=0,Xj=0,Yj=0,Zj=0,_j=0,$j=0,ak=0,bk=0,ck=0,dk=0,ek=0,fk=0,gk=0,hk=0,ik=0,jk=0,kk=0,lk=0,mk=0,nk=0,ok=0,pk=0,qk=0,rk=0,sk=0,tk=0,uk=0,vk=0,wk=0,xk=0,yk=0,zk=0,Ak=0,Bk=0,Ck=0,Dk=0,Ek=0,Fk=0,Gk=0,Hk=0,Ik=0,Jk=0,Kk=0,Lk=0,Mk=0,Nk=0,Ok=0,Pk=0,Qk=0,Rk=0,Sk=0,Tk=0,Uk=0,Vk=0,Wk=0,Xk=0,Yk=0,Zk=0,_k=0,$k=0,al=0,bl=0,cl=0,dl=0,el=0,fl=0,gl=0,hl=0,il=0,jl=0,kl=0,ll=0,ml=0,nl=0,ol=0,pl=0,ql=0,rl=0,sl=0,tl=0,ul=0,vl=0,wl=0,xl=0,yl=0,zl=0,Al=0,Bl=0,Cl=0,Dl=0,El=0,Fl=0,Gl=0,Hl=0,Il=0,Jl=0,Kl=0,Ll=0,Ml=0,Nl=0,Ol=0,Pl=0,Ql=0,Rl=0,Sl=0,Tl=0,Ul=0,Vl=0,Wl=0,Xl=0,Yl=0,Zl=0,_l=0,$l=0,am=0,bm=0,cm=0,dm=0,em=0,fm=0,gm=0,hm=0,im=0,jm=0,km=0,lm=0,mm=0,nm=0,om=0,pm=0,qm=0,rm=0,sm=0,tm=0,um=0,vm=0,wm=0,xm=0,ym=0,zm=0,Am=0,Bm=0,Cm=0,Dm=0,Em=0,Fm=0,Gm=0,Hm=0,Im=0,Jm=0,Km=0,Lm=0,Mm=0,Nm=0,Om=0,Pm=0,Qm=0,Rm=0,Sm=0,Tm=0,Um=0,Vm=0,Wm=0,Xm=0,Ym=0,Zm=0,_m=0,$m=0,an=0,bn=0,cn=0,dn=0,en=0,fn=0,gn=0,hn=0,jn=0,kn=0,ln=0,mn=0,nn=0,on=0,pn=0,qn=0,rn=0,sn=0,tn=0,un=0,vn=0,wn=0,xn=0,yn=0,zn=0,An=0,Bn=0,Cn=0,Dn=0,En=0,Fn=0,Gn=0,Hn=0,In=0,Jn=0,Kn=0,Ln=0,Mn=0,Nn=0,On=0,Pn=0,Qn=0,Rn=0,Sn=0,Tn=0,Un=0,Vn=0,Wn=0,Xn=0,Yn=0,Zn=0,_n=0,$n=0,ao=0,bo=0,co=0,eo=0,fo=0,go=0,ho=0,io=0,jo=0,ko=0,lo=0,mo=0,no=0,oo=0,po=0,qo=0,ro=0,so=0,to=0,uo=0,vo=0,wo=0,xo=0,yo=0,zo=0,Ao=0,Bo=0,Co=0,Do=0,Eo=0,Fo=0,Go=0,Ho=0,Io=0,Jo=0,Ko=0,Lo=0,Mo=0,No=0,Oo=0,Po=0,Qo=0,Ro=0,So=0,To=0,Uo=0,Vo=0,Wo=0,Xo=0,Yo=0,Zo=0,_o=0,$o=0,ap=0,bp=0,cp=0,dp=0,ep=0,fp=0,gp=0,hp=0,ip=0,jp=0,kp=0,lp=0,mp=0,np=0,op=0,pp=0,qp=0,rp=0,sp=0,tp=0,up=0,vp=0,wp=0,xp=0,yp=0,zp=0,Ap=0,Bp=0,Cp=0,Dp=0,Ep=0,Fp=0,Gp=0,Hp=0,Ip=0,Jp=0,Kp=0,Lp=0,Mp=0,Np=0,Op=0,Pp=0,Qp=0,Rp=0,Sp=0,Tp=0,Up=0,Vp=0,Wp=0,Xp=0,Yp=0,Zp=0,_p=0,$p=0,aq=0,bq=0,cq=0,dq=0,eq=0,fq=0,gq=0,hq=0,iq=0,jq=0,kq=0,lq=0,mq=0,nq=0,oq=0,pq=0,qq=0,rq=0,sq=0,tq=0,uq=0,vq=0,wq=0,xq=0,yq=0,zq=0,Aq=0,Bq=0,Cq=0,Dq=0,Eq=0,Fq=0,Gq=0,Hq=0,Iq=0,Jq=0,Kq=0,Lq=0,Mq=0,Nq=0,Oq=0,Pq=0,Qq=0,Rq=0,Sq=0,Tq=0,Uq=0,Vq=0,Wq=0,Xq=0,Yq=0,Zq=0,_q=0,$q=0,ar=0,br=0,cr=0,dr=0,er=0,fr=0,gr=0,hr=0,ir=0,jr=0,kr=0,lr=0,mr=0,nr=0,or=0,pr=0,qr=0,rr=0,sr=0,tr=0,ur=0,vr=0,wr=0,xr=0,yr=0,zr=0,Ar=0,Br=0,Cr=0,Dr=0,Er=0,Fr=0,Gr=0,Hr=0,Ir=0,Jr=0,Kr=0,Lr=0,Mr=0,Nr=0,Or=0,Pr=0,Qr=0,Rr=0,Sr=0,Tr=0,Ur=0,Vr=0,Wr=0,Xr=0,Yr=0,Zr=0,_r=0,$r=0,as=0,bs=0,cs=0,ds=0,es=0,fs=0,gs=0,hs=0,is=0,js=0,ks=0,ls=0,ms=0,ns=0,os=0,ps=0,qs=0,rs=0,ss=0,ts=0,us=0,vs=0,ws=0,xs=0,ys=0,zs=0,As=0,Bs=0,Cs=0,Ds=0,Es=0,Fs=0,Gs=0,Hs=0,Is=0,Js=0,Ks=0,Ls=0,Ms=0,Ns=0,Os=0,Ps=0,Qs=0,Rs=0,Ss=0,Ts=0,Us=0,Vs=0,Ws=0,Xs=0,Ys=0,Zs=0,_s=0,$s=0,at=0,bt=0,ct=0,dt=0,et=0,ft=0,gt=0,ht=0,it=0,jt=0,kt=0,lt=0,mt=0,nt=0,ot=0,pt=0,qt=0,rt=0,st=0,tt=0,ut=0,vt=0,wt=0,xt=0,yt=0,zt=0,At=0,Bt=0,Ct=0,Dt=0,Et=0,Ft=0,Gt=0,Ht=0,It=0,Jt=0,Kt=0,Lt=0,Mt=0,Nt=0,Ot=0,Pt=0,Qt=0,Rt=0,St=0,Tt=0,Ut=0,Vt=0,Wt=0,Xt=0,Yt=0,Zt=0,_t=0,$t=0,au=0,bu=0,cu=0,du=0,eu=0,fu=0,gu=0,hu=0,iu=0,ju=0,ku=0,lu=0,mu=0,nu=0,ou=0,pu=0,qu=0,ru=0,su=0,tu=0,uu=0,vu=0,wu=0,xu=0,yu=0,zu=0,Au=0,Bu=0,Cu=0,Du=0,Eu=0,Fu=0,Gu=0,Hu=0,Iu=0,Ju=0,Ku=0,Lu=0,Mu=0,Nu=0,Ou=0,Pu=0,Qu=0,Ru=0,Su=0,Tu=0,Uu=0,Vu=0,Wu=0,Xu=0,Yu=0,Zu=0;Yu=i;i=i+4752|0;k=Yu+4744|0;l=Yu+4740|0;m=Yu+4736|0;n=Yu+4732|0;Zu=Yu+4728|0;o=Yu+4724|0;p=Yu+4720|0;Xu=Yu+4656|0;Gu=Yu+4652|0;cn=Yu+4648|0;dt=Yu+4644|0;tu=Yu+4640|0;Uf=Yu+4636|0;yl=Yu+4632|0;Wr=Yu+4628|0;xt=Yu+4624|0;Yj=Yu+4620|0;wt=Yu+4616|0;fn=Yu+4612|0;Rr=Yu+4608|0;dg=Yu+4604|0;su=Yu+4600|0;Dk=Yu+4596|0;at=Yu+4592|0;v=Yu+4588|0;jr=Yu+4584|0;pg=Yu+4580|0;gj=Yu+4576|0;Hk=Yu+4572|0;Jm=Yu+4568|0;mn=Yu+4564|0;pp=Yu+4560|0;qa=Yu+4556|0;ir=Yu+4552|0;Cf=Yu+4548|0;hj=Yu+4544|0;Kk=Yu+4540|0;Km=Yu+4536|0;rn=Yu+4532|0;qp=Yu+4528|0;R=Yu+4524|0;lb=Yu+4520|0;mr=Yu+4516|0;nr=Yu+4512|0;or=Yu+4508|0;pr=Yu+4504|0;Jf=Yu+4500|0;Nk=Yu+4496|0;Do=Yu+4492|0;tp=Yu+4488|0;tg=Yu+4484|0;Rk=Yu+4480|0;ah=Yu+4476|0;Ok=Yu+4472|0;Zn=Yu+4468|0;up=Yu+4464|0;zg=Yu+4460|0;Qk=Yu+4456|0;La=Yu+4452|0;gb=Yu+4448|0;rr=Yu+4444|0;uq=Yu+4440|0;vq=Yu+4436|0;wq=Yu+4432|0;hh=Yu+4428|0;Xk=Yu+4424|0;Oo=Yu+4420|0;wp=Yu+4416|0;sh=Yu+4412|0;Vk=Yu+4408|0;Dg=Yu+4404|0;Yk=Yu+4400|0;Jo=Yu+4396|0;xp=Yu+4392|0;yh=Yu+4388|0;Uk=Yu+4384|0;Se=Yu+4380|0;Lq=Yu+4376|0;Np=Yu+4372|0;Ip=Yu+4368|0;Sq=Yu+4364|0;zs=Yu+4360|0;Oh=Yu+4356|0;tm=Yu+4352|0;Zh=Yu+4348|0;Fm=Yu+4344|0;Dj=Yu+4340|0;um=Yu+4336|0;xo=Yu+4332|0;kq=Yu+4328|0;Aj=Yu+4324|0;Em=Yu+4320|0;dc=Yu+4316|0;Fq=Yu+4312|0;no=Yu+4308|0;Ep=Yu+4304|0;Cq=Yu+4300|0;us=Yu+4296|0;Mg=Yu+4292|0;lm=Yu+4288|0;Xg=Yu+4284|0;Dl=Yu+4280|0;Bi=Yu+4276|0;mm=Yu+4272|0;Wo=Yu+4268|0;Bp=Yu+4264|0;yi=Yu+4260|0;Cl=Yu+4256|0;Xd=Yu+4252|0;Dq=Yu+4248|0;go=Yu+4244|0;oo=Yu+4240|0;Iq=Yu+4236|0;vs=Yu+4232|0;Ih=Yu+4228|0;Di=Yu+4224|0;ri=Yu+4220|0;Ei=Yu+4216|0;im=Yu+4212|0;pm=Yu+4208|0;$o=Yu+4204|0;po=Yu+4200|0;Hl=Yu+4196|0;om=Yu+4192|0;nf=Yu+4188|0;Tq=Yu+4184|0;gp=Yu+4180|0;Op=Yu+4176|0;Oq=Yu+4172|0;As=Yu+4168|0;Ki=Yu+4164|0;Fj=Yu+4160|0;tj=Yu+4156|0;Gj=Yu+4152|0;Bm=Yu+4148|0;Kl=Yu+4144|0;bp=Yu+4140|0;Pp=Yu+4136|0;ym=Yu+4132|0;Hm=Yu+4128|0;q=Yu+4124|0;Ur=Yu+4120|0;hf=Yu+4116|0;Tr=Yu+4112|0;Im=Yu+4108|0;rf=Yu+4104|0;Bs=Yu+4100|0;sf=Yu+4096|0;Ib=Yu+4092|0;_d=Yu+4088|0;za=Yu+4084|0;Rc=Yu+4080|0;Ji=Yu+4076|0;zl=Yu+4072|0;Ah=Yu+4068|0;qk=Yu+4064|0;ap=Yu+4060|0;sr=Yu+4056|0;Sn=Yu+4052|0;jq=Yu+4048|0;rg=Yu+4044|0;Kt=Yu+4040|0;bt=Yu+4036|0;ct=Yu+4032|0;qf=Yu+4028|0;Tf=Yu+4024|0;Sr=Yu+4020|0;Vr=Yu+4016|0;Lu=Yu+4012|0;Wf=Yu+4008|0;Qu=Yu+4004|0;Xf=Yu+4e3|0;Vf=Yu+3996|0;Yf=Yu+3992|0;Wu=Yu+3988|0;$f=Yu+3984|0;Wj=Yu+3980|0;ag=Yu+3976|0;_f=Yu+3972|0;bg=Yu+3968|0;Iu=Yu+3964|0;Ku=Yu+3960|0;Hu=Yu+3956|0;Ju=Yu+3952|0;Nu=Yu+3948|0;Pu=Yu+3944|0;Mu=Yu+3940|0;Ou=Yu+3936|0;Tu=Yu+3932|0;Vu=Yu+3928|0;Su=Yu+3924|0;Uu=Yu+3920|0;Tj=Yu+3916|0;Vj=Yu+3912|0;Sj=Yu+3908|0;Uj=Yu+3904|0;Ru=Yu+3900|0;Xj=Yu+3896|0;dn=Yu+3892|0;en=Yu+3888|0;Zf=Yu+3884|0;cg=Yu+3880|0;Bk=Yu+3876|0;Ck=Yu+3872|0;ik=Yu+3868|0;jn=Yu+3864|0;fg=Yu+3860|0;mg=Yu+3856|0;u=Yu+3852|0;kn=Yu+3848|0;ig=Yu+3844|0;ng=Yu+3840|0;jg=Yu+3836|0;og=Yu+3832|0;ck=Yu+3828|0;kg=Yu+3824|0;hk=Yu+3820|0;lg=Yu+3816|0;$j=Yu+3812|0;bk=Yu+3808|0;_j=Yu+3804|0;ak=Yu+3800|0;ek=Yu+3796|0;gk=Yu+3792|0;dk=Yu+3788|0;fk=Yu+3784|0;nk=Yu+3780|0;gg=Yu+3776|0;t=Yu+3772|0;hg=Yu+3768|0;kk=Yu+3764|0;mk=Yu+3760|0;jk=Yu+3756|0;lk=Yu+3752|0;pk=Yu+3748|0;s=Yu+3744|0;ok=Yu+3740|0;r=Yu+3736|0;Fk=Yu+3732|0;Gk=Yu+3728|0;hn=Yu+3724|0;ln=Yu+3720|0;ea=Yu+3716|0;on=Yu+3712|0;uf=Yu+3708|0;xf=Yu+3704|0;pa=Yu+3700|0;pn=Yu+3696|0;vf=Yu+3692|0;Af=Yu+3688|0;wf=Yu+3684|0;Bf=Yu+3680|0;A=Yu+3676|0;qg=Yu+3672|0;da=Yu+3668|0;tf=Yu+3664|0;x=Yu+3660|0;z=Yu+3656|0;w=Yu+3652|0;y=Yu+3648|0;aa=Yu+3644|0;ca=Yu+3640|0;$=Yu+3636|0;ba=Yu+3632|0;ja=Yu+3628|0;yf=Yu+3624|0;oa=Yu+3620|0;zf=Yu+3616|0;ga=Yu+3612|0;ia=Yu+3608|0;fa=Yu+3604|0;ha=Yu+3600|0;la=Yu+3596|0;na=Yu+3592|0;ka=Yu+3588|0;ma=Yu+3584|0;Ik=Yu+3580|0;Jk=Yu+3576|0;nn=Yu+3572|0;qn=Yu+3568|0;xa=Yu+3564|0;vg=Yu+3560|0;E=Yu+3556|0;wg=Yu+3552|0;F=Yu+3548|0;_n=Yu+3544|0;K=Yu+3540|0;Gf=Yu+3536|0;P=Yu+3532|0;Hf=Yu+3528|0;Q=Yu+3524|0;$n=Yu+3520|0;Ba=Yu+3516|0;Xn=Yu+3512|0;Kf=Yu+3508|0;Nf=Yu+3504|0;kb=Yu+3500|0;Wn=Yu+3496|0;Pf=Yu+3492|0;Sf=Yu+3488|0;ua=Yu+3484|0;wa=Yu+3480|0;ta=Yu+3476|0;va=Yu+3472|0;B=Yu+3468|0;D=Yu+3464|0;ya=Yu+3460|0;C=Yu+3456|0;H=Yu+3452|0;J=Yu+3448|0;G=Yu+3444|0;I=Yu+3440|0;M=Yu+3436|0;O=Yu+3432|0;L=Yu+3428|0;N=Yu+3424|0;W=Yu+3420|0;Lf=Yu+3416|0;Aa=Yu+3412|0;Mf=Yu+3408|0;T=Yu+3404|0;V=Yu+3400|0;S=Yu+3396|0;U=Yu+3392|0;Y=Yu+3388|0;_=Yu+3384|0;X=Yu+3380|0;Z=Yu+3376|0;Ga=Yu+3372|0;Qf=Yu+3368|0;jb=Yu+3364|0;Rf=Yu+3360|0;Da=Yu+3356|0;Fa=Yu+3352|0;Ca=Yu+3348|0;Ea=Yu+3344|0;Ia=Yu+3340|0;ib=Yu+3336|0;Ha=Yu+3332|0;Ja=Yu+3328|0;Ff=Yu+3324|0;If=Yu+3320|0;ao=Yu+3316|0;Co=Yu+3312|0;Of=Yu+3308|0;sg=Yu+3304|0;Ag=Yu+3300|0;Bg=Yu+3296|0;Vn=Yu+3292|0;Yn=Yu+3288|0;xg=Yu+3284|0;yg=Yu+3280|0;rb=Yu+3276|0;uh=Yu+3272|0;wb=Yu+3268|0;vh=Yu+3264|0;xb=Yu+3260|0;Fo=Yu+3256|0;Cb=Yu+3252|0;eh=Yu+3248|0;Hb=Yu+3244|0;fh=Yu+3240|0;Ka=Yu+3236|0;Go=Yu+3232|0;Wa=Yu+3228|0;Mo=Yu+3224|0;ih=Yu+3220|0;lh=Yu+3216|0;fb=Yu+3212|0;Lo=Yu+3208|0;nh=Yu+3204|0;qh=Yu+3200|0;ob=Yu+3196|0;qb=Yu+3192|0;nb=Yu+3188|0;pb=Yu+3184|0;tb=Yu+3180|0;vb=Yu+3176|0;sb=Yu+3172|0;ub=Yu+3168|0;zb=Yu+3164|0;Bb=Yu+3160|0;yb=Yu+3156|0;Ab=Yu+3152|0;Eb=Yu+3148|0;Gb=Yu+3144|0;Db=Yu+3140|0;Fb=Yu+3136|0;Qa=Yu+3132|0;jh=Yu+3128|0;Va=Yu+3124|0;kh=Yu+3120|0;Na=Yu+3116|0;Pa=Yu+3112|0;Ma=Yu+3108|0;Oa=Yu+3104|0;Sa=Yu+3100|0;Ua=Yu+3096|0;Ra=Yu+3092|0;Ta=Yu+3088|0;$a=Yu+3084|0;oh=Yu+3080|0;eb=Yu+3076|0;ph=Yu+3072|0;Ya=Yu+3068|0;_a=Yu+3064|0;Xa=Yu+3060|0;Za=Yu+3056|0;bb=Yu+3052|0;db=Yu+3048|0;ab=Yu+3044|0;cb=Yu+3040|0;dh=Yu+3036|0;gh=Yu+3032|0;Ko=Yu+3028|0;No=Yu+3024|0;mh=Yu+3020|0;rh=Yu+3016|0;zh=Yu+3012|0;Cg=Yu+3008|0;Ho=Yu+3004|0;Io=Yu+3e3|0;wh=Yu+2996|0;xh=Yu+2992|0;dd=Yu+2988|0;wj=Yu+2984|0;id=Yu+2980|0;xj=Yu+2976|0;jd=Yu+2972|0;to=Yu+2968|0;od=Yu+2964|0;Lh=Yu+2960|0;td=Yu+2956|0;Mh=Yu+2952|0;ud=Yu+2948|0;uo=Yu+2944|0;fe=Yu+2940|0;Lp=Yu+2936|0;Ph=Yu+2932|0;Sh=Yu+2928|0;Qe=Yu+2924|0;kp=Yu+2920|0;Uh=Yu+2916|0;Xh=Yu+2912|0;ad=Yu+2908|0;cd=Yu+2904|0;Zd=Yu+2900|0;bd=Yu+2896|0;fd=Yu+2892|0;hd=Yu+2888|0;ed=Yu+2884|0;gd=Yu+2880|0;ld=Yu+2876|0;nd=Yu+2872|0;kd=Yu+2868|0;md=Yu+2864|0;qd=Yu+2860|0;sd=Yu+2856|0;pd=Yu+2852|0;rd=Yu+2848|0;$d=Yu+2844|0;Qh=Yu+2840|0;ee=Yu+2836|0;Rh=Yu+2832|0;xd=Yu+2828|0;zd=Yu+2824|0;wd=Yu+2820|0;yd=Yu+2816|0;be=Yu+2812|0;de=Yu+2808|0;ae=Yu+2804|0;ce=Yu+2800|0;Ke=Yu+2796|0;Vh=Yu+2792|0;Pe=Yu+2788|0;Wh=Yu+2784|0;he=Yu+2780|0;Je=Yu+2776|0;ge=Yu+2772|0;ie=Yu+2768|0;Me=Yu+2764|0;Oe=Yu+2760|0;Le=Yu+2756|0;Ne=Yu+2752|0;vd=Yu+2748|0;Re=Yu+2744|0;jp=Yu+2740|0;Mp=Yu+2736|0;Qq=Yu+2732|0;Rq=Yu+2728|0;Ii=Yu+2724|0;Nh=Yu+2720|0;Th=Yu+2716|0;Yh=Yu+2712|0;Bj=Yu+2708|0;Cj=Yu+2704|0;vo=Yu+2700|0;wo=Yu+2696|0;yj=Yu+2692|0;zj=Yu+2688|0;Pb=Yu+2684|0;Ig=Yu+2680|0;sc=Yu+2676|0;Jg=Yu+2672|0;tc=Yu+2668|0;jo=Yu+2664|0;yc=Yu+2660|0;vi=Yu+2656|0;Dc=Yu+2652|0;wi=Yu+2648|0;Ec=Yu+2644|0;ko=Yu+2640|0;Qc=Yu+2636|0;Uo=Yu+2632|0;Ng=Yu+2628|0;Qg=Yu+2624|0;bc=Yu+2620|0;To=Yu+2616|0;Ug=Yu+2612|0;Vg=Yu+2608|0;Mb=Yu+2604|0;Ob=Yu+2600|0;Lb=Yu+2596|0;Nb=Yu+2592|0;Rb=Yu+2588|0;rc=Yu+2584|0;Qb=Yu+2580|0;Sb=Yu+2576|0;vc=Yu+2572|0;xc=Yu+2568|0;uc=Yu+2564|0;wc=Yu+2560|0;Ac=Yu+2556|0;Cc=Yu+2552|0;zc=Yu+2548|0;Bc=Yu+2544|0;Kc=Yu+2540|0;Og=Yu+2536|0;Pc=Yu+2532|0;Pg=Yu+2528|0;Hc=Yu+2524|0;Jc=Yu+2520|0;Gc=Yu+2516|0;Ic=Yu+2512|0;Mc=Yu+2508|0;Oc=Yu+2504|0;Lc=Yu+2500|0;Nc=Yu+2496|0;Xb=Yu+2492|0;Sg=Yu+2488|0;ac=Yu+2484|0;Tg=Yu+2480|0;Ub=Yu+2476|0;Wb=Yu+2472|0;Tb=Yu+2468|0;Vb=Yu+2464|0;Zb=Yu+2460|0;$b=Yu+2456|0;Yb=Yu+2452|0;_b=Yu+2448|0;Fc=Yu+2444|0;cc=Yu+2440|0;lo=Yu+2436|0;mo=Yu+2432|0;Aq=Yu+2428|0;Bq=Yu+2424|0;Kg=Yu+2420|0;Lg=Yu+2416|0;Rg=Yu+2412|0;Wg=Yu+2408|0;zi=Yu+2404|0;Ai=Yu+2400|0;So=Yu+2396|0;Vo=Yu+2392|0;ui=Yu+2388|0;xi=Yu+2384|0;oc=Yu+2380|0;Yo=Yu+2376|0;$g=Yu+2372|0;Dh=Yu+2368|0;Vd=Yu+2364|0;co=Yu+2360|0;ki=Yu+2356|0;pi=Yu+2352|0;_c=Yu+2348|0;Zo=Yu+2344|0;Bh=Yu+2340|0;Gh=Yu+2336|0;Kd=Yu+2332|0;bo=Yu+2328|0;ji=Yu+2324|0;mi=Yu+2320|0;ic=Yu+2316|0;Zg=Yu+2312|0;nc=Yu+2308|0;_g=Yu+2304|0;fc=Yu+2300|0;hc=Yu+2296|0;ec=Yu+2292|0;gc=Yu+2288|0;kc=Yu+2284|0;mc=Yu+2280|0;jc=Yu+2276|0;lc=Yu+2272|0;Pd=Yu+2268|0;ni=Yu+2264|0;Ud=Yu+2260|0;oi=Yu+2256|0;Md=Yu+2252|0;Od=Yu+2248|0;Ld=Yu+2244|0;Nd=Yu+2240|0;Rd=Yu+2236|0;Td=Yu+2232|0;Qd=Yu+2228|0;Sd=Yu+2224|0;Uc=Yu+2220|0;Eh=Yu+2216|0;Zc=Yu+2212|0;Fh=Yu+2208|0;qc=Yu+2204|0;Tc=Yu+2200|0;pc=Yu+2196|0;Sc=Yu+2192|0;Wc=Yu+2188|0;Yc=Yu+2184|0;Vc=Yu+2180|0;Xc=Yu+2176|0;Ed=Yu+2172|0;Jh=Yu+2168|0;Jd=Yu+2164|0;Kh=Yu+2160|0;Bd=Yu+2156|0;Dd=Yu+2152|0;Ad=Yu+2148|0;Cd=Yu+2144|0;Gd=Yu+2140|0;Id=Yu+2136|0;Fd=Yu+2132|0;Hd=Yu+2128|0;$c=Yu+2124|0;Wd=Yu+2120|0;eo=Yu+2116|0;fo=Yu+2112|0;Gq=Yu+2108|0;Hq=Yu+2104|0;Ch=Yu+2100|0;Hh=Yu+2096|0;li=Yu+2092|0;qi=Yu+2088|0;Il=Yu+2084|0;Jl=Yu+2080|0;Xo=Yu+2076|0;_o=Yu+2072|0;Fl=Yu+2068|0;Gl=Yu+2064|0;bf=Yu+2060|0;dp=Yu+2056|0;$h=Yu+2052|0;gi=Yu+2048|0;lf=Yu+2044|0;Ao=Yu+2040|0;Oi=Yu+2036|0;Ti=Yu+2032|0;oe=Yu+2028|0;ep=Yu+2024|0;ci=Yu+2020|0;hi=Yu+2016|0;Ae=Yu+2012|0;zo=Yu+2008|0;Ni=Yu+2004|0;Qi=Yu+2e3|0;Xe=Yu+1996|0;ei=Yu+1992|0;af=Yu+1988|0;fi=Yu+1984|0;Ue=Yu+1980|0;We=Yu+1976|0;Te=Yu+1972|0;Ve=Yu+1968|0;Ze=Yu+1964|0;$e=Yu+1960|0;Ye=Yu+1956|0;_e=Yu+1952|0;Fe=Yu+1948|0;Ri=Yu+1944|0;kf=Yu+1940|0;Si=Yu+1936|0;Ce=Yu+1932|0;Ee=Yu+1928|0;Be=Yu+1924|0;De=Yu+1920|0;He=Yu+1916|0;jf=Yu+1912|0;Ge=Yu+1908|0;Ie=Yu+1904|0;gf=Yu+1900|0;ai=Yu+1896|0;ne=Yu+1892|0;bi=Yu+1888|0;df=Yu+1884|0;ff=Yu+1880|0;cf=Yu+1876|0;ef=Yu+1872|0;ke=Yu+1868|0;me=Yu+1864|0;je=Yu+1860|0;le=Yu+1856|0;ue=Yu+1852|0;Li=Yu+1848|0;ze=Yu+1844|0;Mi=Yu+1840|0;re=Yu+1836|0;te=Yu+1832|0;qe=Yu+1828|0;se=Yu+1824|0;we=Yu+1820|0;ye=Yu+1816|0;ve=Yu+1812|0;xe=Yu+1808|0;pe=Yu+1804|0;mf=Yu+1800|0;cp=Yu+1796|0;fp=Yu+1792|0;Mq=Yu+1788|0;Nq=Yu+1784|0;di=Yu+1780|0;ii=Yu+1776|0;Pi=Yu+1772|0;sj=Yu+1768|0;zm=Yu+1764|0;Am=Yu+1760|0;yo=Yu+1756|0;Bo=Yu+1752|0;wm=Yu+1748|0;xm=Yu+1744|0;sa=Yu+1740|0;os=Yu+1736|0;Nr=Yu+1732|0;Or=Yu+1728|0;Yr=Yu+1724|0;Ds=Yu+1720|0;Jb=Yu+1716|0;Cs=Yu+1712|0;pf=Yu+1708|0;_r=Yu+1704|0;xs=Yu+1700|0;Hr=Yu+1696|0;Er=Yu+1692|0;Ir=Yu+1688|0;rs=Yu+1684|0;Pr=Yu+1680|0;Zj=Yu+1676|0;ra=Yu+1672|0;Lr=Yu+1668|0;Mr=Yu+1664|0;Qr=Yu+1660|0;Xr=Yu+1656|0;mb=Yu+1652|0;hb=Yu+1648|0;Yd=Yu+1644|0;of=Yu+1640|0;ts=Yu+1636|0;ws=Yu+1632|0;ys=Yu+1628|0;Dr=Yu+1624|0;ps=Yu+1620|0;qs=Yu+1616|0;Kb=Yu+1612|0;Zr=Yu+1608|0;$r=Yu+1604|0;Kr=Yu+1600|0;ss=Yu+1596|0;Fr=Yu+1592|0;as=Yu+1588|0;Es=Yu+1584|0;Fs=Yu+1580|0;Gs=Yu+1576|0;Gr=Yu+1572|0;Jr=Yu+1568|0;Ef=Yu+1564|0;Pj=Yu+1560|0;ft=Yu+1556|0;Mt=Yu+1552|0;Gg=Yu+1548|0;Lt=Yu+1544|0;Ui=Yu+1540|0;_s=Yu+1536|0;Hi=Yu+1532|0;cj=Yu+1528|0;Mj=Yu+1524|0;Yi=Yu+1520|0;Jj=Yu+1516|0;dj=Yu+1512|0;Nj=Yu+1508|0;$i=Yu+1504|0;eg=Yu+1500|0;Df=Yu+1496|0;$s=Yu+1492|0;et=Yu+1488|0;ch=Yu+1484|0;Qj=Yu+1480|0;Fg=Yu+1476|0;Rj=Yu+1472|0;ug=Yu+1468|0;bh=Yu+1464|0;th=Yu+1460|0;Eg=Yu+1456|0;ti=Yu+1452|0;Xi=Yu+1448|0;Gi=Yu+1444|0;Wi=Yu+1440|0;Yg=Yu+1436|0;si=Yu+1432|0;Ci=Yu+1428|0;Fi=Yu+1424|0;vj=Yu+1420|0;_i=Yu+1416|0;Ij=Yu+1412|0;Zi=Yu+1408|0;_h=Yu+1404|0;uj=Yu+1400|0;Ej=Yu+1396|0;Hj=Yu+1392|0;Hg=Yu+1388|0;Kj=Yu+1384|0;jt=Yu+1380|0;Nt=Yu+1376|0;Ot=Yu+1372|0;Pt=Yu+1368|0;Lj=Yu+1364|0;Oj=Yu+1360|0;Vi=Yu+1356|0;aj=Yu+1352|0;Zs=Yu+1348|0;gt=Yu+1344|0;ht=Yu+1340|0;it=Yu+1336|0;bj=Yu+1332|0;ej=Yu+1328|0;lr=Yu+1324|0;Ar=Yu+1320|0;is=Yu+1316|0;ms=Yu+1312|0;Ls=Yu+1308|0;pt=Yu+1304|0;yq=Yu+1300|0;Is=Yu+1296|0;Kq=Yu+1292|0;xr=Yu+1288|0;bs=Yu+1284|0;ot=Yu+1280|0;fs=Yu+1276|0;ls=Yu+1272|0;ur=Yu+1268|0;yr=Yu+1264|0;hr=Yu+1260|0;kr=Yu+1256|0;gs=Yu+1252|0;hs=Yu+1248|0;Js=Yu+1244|0;Ks=Yu+1240|0;qr=Yu+1236|0;xq=Yu+1232|0;Eq=Yu+1228|0;Jq=Yu+1224|0;Br=Yu+1220|0;Cr=Yu+1216|0;ds=Yu+1212|0;es=Yu+1208|0;Pq=Yu+1204|0;tr=Yu+1200|0;zq=Yu+1196|0;vr=Yu+1192|0;nt=Yu+1188|0;qt=Yu+1184|0;rt=Yu+1180|0;st=Yu+1176|0;wr=Yu+1172|0;zr=Yu+1168|0;cs=Yu+1164|0;js=Yu+1160|0;Hs=Yu+1156|0;kt=Yu+1152|0;lt=Yu+1148|0;mt=Yu+1144|0;ks=Yu+1140|0;ns=Yu+1136|0;sp=Yu+1132|0;tq=Yu+1128|0;zp=Yu+1124|0;ut=Yu+1120|0;zt=Yu+1116|0;Ft=Yu+1112|0;Wq=Yu+1108|0;Et=Yu+1104|0;nq=Yu+1100|0;fr=Yu+1096|0;rq=Yu+1092|0;br=Yu+1088|0;Hp=Yu+1084|0;er=Yu+1080|0;qq=Yu+1076|0;_q=Yu+1072|0;op=Yu+1068|0;rp=Yu+1064|0;Uq=Yu+1060|0;Vq=Yu+1056|0;vp=Yu+1052|0;yp=Yu+1048|0;vt=Yu+1044|0;yt=Yu+1040|0;Kp=Yu+1036|0;$q=Yu+1032|0;mq=Yu+1028|0;ar=Yu+1024|0;Jp=Yu+1020|0;lq=Yu+1016|0;Dp=Yu+1012|0;Yq=Yu+1008|0;Gp=Yu+1004|0;Zq=Yu+1e3|0;Cp=Yu+996|0;Fp=Yu+992|0;Ap=Yu+988|0;oq=Yu+984|0;Dt=Yu+980|0;Gt=Yu+976|0;Ht=Yu+972|0;It=Yu+968|0;pq=Yu+964|0;sq=Yu+960|0;Xq=Yu+956|0;cr=Yu+952|0;tt=Yu+948|0;At=Yu+944|0;Bt=Yu+940|0;Ct=Yu+936|0;dr=Yu+932|0;gr=Yu+928|0;Un=Yu+924|0;Yp=Yu+920|0;Qo=Yu+916|0;Ms=Yu+912|0;Ps=Yu+908|0;Vs=Yu+904|0;$p=Yu+900|0;Us=Yu+896|0;Sp=Yu+892|0;mp=Yu+888|0;Wp=Yu+884|0;gq=Yu+880|0;so=Yu+876|0;lp=Yu+872|0;Vp=Yu+868|0;dq=Yu+864|0;gn=Yu+860|0;Tn=Yu+856|0;Zp=Yu+852|0;_p=Yu+848|0;Eo=Yu+844|0;Po=Yu+840|0;Ns=Yu+836|0;Os=Yu+832|0;ip=Yu+828|0;fq=Yu+824|0;Rp=Yu+820|0;eq=Yu+816|0;hp=Yu+812|0;Qp=Yu+808|0;io=Yu+804|0;cq=Yu+800|0;ro=Yu+796|0;bq=Yu+792|0;ho=Yu+788|0;qo=Yu+784|0;Ro=Yu+780|0;Tp=Yu+776|0;Ts=Yu+772|0;Ws=Yu+768|0;Xs=Yu+764|0;Ys=Yu+760|0;Up=Yu+756|0;Xp=Yu+752|0;aq=Yu+748|0;hq=Yu+744|0;Jt=Yu+740|0;Qs=Yu+736|0;Rs=Yu+732|0;Ss=Yu+728|0;iq=Yu+724|0;np=Yu+720|0;Mm=Yu+716|0;Nn=Yu+712|0;Yt=Yu+708|0;cu=Yu+704|0;sn=Yu+700|0;bu=Yu+696|0;Qn=Yu+692|0;Vt=Yu+688|0;An=Yu+684|0;$m=Yu+680|0;Kn=Yu+676|0;Vm=Yu+672|0;Hn=Yu+668|0;an=Yu+664|0;Ln=Yu+660|0;Ym=Yu+656|0;hm=Yu+652|0;Lm=Yu+648|0;Wt=Yu+644|0;Xt=Yu+640|0;Pm=Yu+636|0;On=Yu+632|0;Sm=Yu+628|0;Pn=Yu+624|0;Nm=Yu+620|0;Om=Yu+616|0;Qm=Yu+612|0;Rm=Yu+608|0;wn=Yu+604|0;Um=Yu+600|0;zn=Yu+596|0;Tm=Yu+592|0;un=Yu+588|0;vn=Yu+584|0;xn=Yu+580|0;yn=Yu+576|0;Dn=Yu+572|0;Xm=Yu+568|0;Gn=Yu+564|0;Wm=Yu+560|0;Bn=Yu+556|0;Cn=Yu+552|0;En=Yu+548|0;Fn=Yu+544|0;tn=Yu+540|0;In=Yu+536|0;au=Yu+532|0;du=Yu+528|0;eu=Yu+524|0;fu=Yu+520|0;Jn=Yu+516|0;Mn=Yu+512|0;Rn=Yu+508|0;Zm=Yu+504|0;Fu=Yu+500|0;Zt=Yu+496|0;_t=Yu+492|0;$t=Yu+488|0;_m=Yu+484|0;bn=Yu+480|0;Mk=Yu+476|0;Tl=Yu+472|0;vu=Yu+468|0;Bu=Yu+464|0;Al=Yu+460|0;Au=Yu+456|0;Wl=Yu+452|0;qu=Yu+448|0;sm=Yu+444|0;em=Yu+440|0;Ql=Yu+436|0;_l=Yu+432|0;Nl=Yu+428|0;fm=Yu+424|0;Rl=Yu+420|0;bm=Yu+416|0;Ek=Yu+412|0;Lk=Yu+408|0;ru=Yu+404|0;uu=Yu+400|0;Tk=Yu+396|0;Ul=Yu+392|0;_k=Yu+388|0;Vl=Yu+384|0;Pk=Yu+380|0;Sk=Yu+376|0;Wk=Yu+372|0;Zk=Yu+368|0;km=Yu+364|0;Yl=Yu+360|0;rm=Yu+356|0;Zl=Yu+352|0;El=Yu+348|0;jm=Yu+344|0;nm=Yu+340|0;qm=Yu+336|0;Dm=Yu+332|0;$l=Yu+328|0;Ml=Yu+324|0;am=Yu+320|0;vm=Yu+316|0;Cm=Yu+312|0;Gm=Yu+308|0;Ll=Yu+304|0;Bl=Yu+300|0;Ol=Yu+296|0;zu=Yu+292|0;Cu=Yu+288|0;Du=Yu+284|0;Eu=Yu+280|0;Pl=Yu+276|0;Sl=Yu+272|0;Xl=Yu+268|0;cm=Yu+264|0;pu=Yu+260|0;wu=Yu+256|0;xu=Yu+252|0;yu=Yu+248|0;dm=Yu+244|0;gm=Yu+240|0;jj=Yu+236|0;il=Yu+232|0;Ut=Yu+228|0;lu=Yu+224|0;qj=Yu+220|0;ku=Yu+216|0;ll=Yu+212|0;Rt=Yu+208|0;xk=Yu+204|0;vl=Yu+200|0;fl=Yu+196|0;pl=Yu+192|0;cl=Yu+188|0;wl=Yu+184|0;gl=Yu+180|0;sl=Yu+176|0;fj=Yu+172|0;ij=Yu+168|0;St=Yu+164|0;Tt=Yu+160|0;mj=Yu+156|0;jl=Yu+152|0;pj=Yu+148|0;kl=Yu+144|0;kj=Yu+140|0;lj=Yu+136|0;nj=Yu+132|0;oj=Yu+128|0;tk=Yu+124|0;nl=Yu+120|0;wk=Yu+116|0;ol=Yu+112|0;rk=Yu+108|0;sk=Yu+104|0;uk=Yu+100|0;vk=Yu+96|0;Ak=Yu+92|0;ql=Yu+88|0;bl=Yu+84|0;rl=Yu+80|0;yk=Yu+76|0;zk=Yu+72|0;$k=Yu+68|0;al=Yu+64|0;rj=Yu+60|0;dl=Yu+56|0;ju=Yu+52|0;mu=Yu+48|0;nu=Yu+44|0;ou=Yu+40|0;el=Yu+36|0;hl=Yu+32|0;ml=Yu+28|0;tl=Yu+24|0;Qt=Yu+20|0;gu=Yu+16|0;hu=Yu+12|0;iu=Yu+8|0;ul=Yu+4|0;xl=Yu;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Zu>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Yu+4716>>2]=.290284663438797;g[Yu+4712>>2]=.9569403529167175;g[Yu+4708>>2]=.8819212913513184;g[Yu+4704>>2]=.4713967442512512;g[Yu+4700>>2]=.5555702447891235;g[Yu+4696>>2]=.8314695954322815;g[Yu+4692>>2]=.0980171412229538;g[Yu+4688>>2]=.9951847195625305;g[Yu+4684>>2]=.7730104327201843;g[Yu+4680>>2]=.6343932747840881;g[Yu+4676>>2]=.9807852506637573;g[Yu+4672>>2]=.19509032368659973;g[Yu+4668>>2]=.3826834261417389;g[Yu+4664>>2]=.9238795042037964;g[Yu+4660>>2]=.7071067690849304;c[Xu>>2]=c[Zu>>2];c[m>>2]=(c[m>>2]|0)+(((c[Zu>>2]|0)-1|0)*126<<2);while(1){if((c[Xu>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[Ur>>2]=+g[c[l>>2]>>2];g[Ib>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<5<<2)>>2];g[_d>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<5<<2)>>2];g[za>>2]=+g[(c[m>>2]|0)+248>>2];g[Rc>>2]=+g[(c[m>>2]|0)+252>>2];g[hf>>2]=+g[za>>2]*+g[Ib>>2]+ +g[Rc>>2]*+g[_d>>2];g[Tr>>2]=+g[za>>2]*+g[_d>>2]-+g[Rc>>2]*+g[Ib>>2];g[Ji>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[zl>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[Ah>>2]=+g[(c[m>>2]|0)+120>>2];g[qk>>2]=+g[(c[m>>2]|0)+124>>2];g[Im>>2]=+g[Ah>>2]*+g[Ji>>2]+ +g[qk>>2]*+g[zl>>2];g[rf>>2]=+g[Ah>>2]*+g[zl>>2]-+g[qk>>2]*+g[Ji>>2];g[ap>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*48<<2)>>2];g[sr>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*48<<2)>>2];g[Sn>>2]=+g[(c[m>>2]|0)+376>>2];g[jq>>2]=+g[(c[m>>2]|0)+380>>2];g[Bs>>2]=+g[Sn>>2]*+g[ap>>2]+ +g[jq>>2]*+g[sr>>2];g[sf>>2]=+g[Sn>>2]*+g[sr>>2]-+g[jq>>2]*+g[ap>>2];g[rg>>2]=+g[q>>2]+ +g[hf>>2];g[Kt>>2]=+g[Im>>2]+ +g[Bs>>2];g[Gu>>2]=+g[rg>>2]+ +g[Kt>>2];g[cn>>2]=+g[rg>>2]-+g[Kt>>2];g[bt>>2]=+g[Im>>2]-+g[Bs>>2];g[ct>>2]=+g[Ur>>2]-+g[Tr>>2];g[dt>>2]=+g[bt>>2]+ +g[ct>>2];g[tu>>2]=+g[ct>>2]-+g[bt>>2];g[qf>>2]=+g[q>>2]-+g[hf>>2];g[Tf>>2]=+g[rf>>2]-+g[sf>>2];g[Uf>>2]=+g[qf>>2]-+g[Tf>>2];g[yl>>2]=+g[qf>>2]+ +g[Tf>>2];g[Sr>>2]=+g[rf>>2]+ +g[sf>>2];g[Vr>>2]=+g[Tr>>2]+ +g[Ur>>2];g[Wr>>2]=+g[Sr>>2]+ +g[Vr>>2];g[xt>>2]=+g[Vr>>2]-+g[Sr>>2];g[Iu>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Ku>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Hu>>2]=+g[(c[m>>2]|0)+56>>2];g[Ju>>2]=+g[(c[m>>2]|0)+60>>2];g[Lu>>2]=+g[Hu>>2]*+g[Iu>>2]+ +g[Ju>>2]*+g[Ku>>2];g[Wf>>2]=+g[Hu>>2]*+g[Ku>>2]-+g[Ju>>2]*+g[Iu>>2];g[Nu>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*40<<2)>>2];g[Pu>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*40<<2)>>2];g[Mu>>2]=+g[(c[m>>2]|0)+312>>2];g[Ou>>2]=+g[(c[m>>2]|0)+316>>2];g[Qu>>2]=+g[Mu>>2]*+g[Nu>>2]+ +g[Ou>>2]*+g[Pu>>2];g[Xf>>2]=+g[Mu>>2]*+g[Pu>>2]-+g[Ou>>2]*+g[Nu>>2];g[Vf>>2]=+g[Lu>>2]-+g[Qu>>2];g[Yf>>2]=+g[Wf>>2]-+g[Xf>>2];g[Tu>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*56<<2)>>2];g[Vu>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*56<<2)>>2];g[Su>>2]=+g[(c[m>>2]|0)+440>>2];g[Uu>>2]=+g[(c[m>>2]|0)+444>>2];g[Wu>>2]=+g[Su>>2]*+g[Tu>>2]+ +g[Uu>>2]*+g[Vu>>2];g[$f>>2]=+g[Su>>2]*+g[Vu>>2]-+g[Uu>>2]*+g[Tu>>2];g[Tj>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[Vj>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[Sj>>2]=+g[(c[m>>2]|0)+184>>2];g[Uj>>2]=+g[(c[m>>2]|0)+188>>2];g[Wj>>2]=+g[Sj>>2]*+g[Tj>>2]+ +g[Uj>>2]*+g[Vj>>2];g[ag>>2]=+g[Sj>>2]*+g[Vj>>2]-+g[Uj>>2]*+g[Tj>>2];g[_f>>2]=+g[Wu>>2]-+g[Wj>>2];g[bg>>2]=+g[$f>>2]-+g[ag>>2];g[Ru>>2]=+g[Lu>>2]+ +g[Qu>>2];g[Xj>>2]=+g[Wu>>2]+ +g[Wj>>2];g[Yj>>2]=+g[Ru>>2]+ +g[Xj>>2];g[wt>>2]=+g[Ru>>2]-+g[Xj>>2];g[dn>>2]=+g[$f>>2]+ +g[ag>>2];g[en>>2]=+g[Wf>>2]+ +g[Xf>>2];g[fn>>2]=+g[dn>>2]-+g[en>>2];g[Rr>>2]=+g[en>>2]+ +g[dn>>2];g[Zf>>2]=+g[Vf>>2]-+g[Yf>>2];g[cg>>2]=+g[_f>>2]+ +g[bg>>2];g[dg>>2]=(+g[Zf>>2]+ +g[cg>>2])*.7071067690849304;g[su>>2]=(+g[cg>>2]-+g[Zf>>2])*.7071067690849304;g[Bk>>2]=+g[Vf>>2]+ +g[Yf>>2];g[Ck>>2]=+g[_f>>2]-+g[bg>>2];g[Dk>>2]=(+g[Bk>>2]+ +g[Ck>>2])*.7071067690849304;g[at>>2]=(+g[Bk>>2]-+g[Ck>>2])*.7071067690849304;g[$j>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[bk>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[_j>>2]=+g[(c[m>>2]|0)+24>>2];g[ak>>2]=+g[(c[m>>2]|0)+28>>2];g[ck>>2]=+g[_j>>2]*+g[$j>>2]+ +g[ak>>2]*+g[bk>>2];g[kg>>2]=+g[_j>>2]*+g[bk>>2]-+g[ak>>2]*+g[$j>>2];g[ek>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*36<<2)>>2];g[gk>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*36<<2)>>2];g[dk>>2]=+g[(c[m>>2]|0)+280>>2];g[fk>>2]=+g[(c[m>>2]|0)+284>>2];g[hk>>2]=+g[dk>>2]*+g[ek>>2]+ +g[fk>>2]*+g[gk>>2];g[lg>>2]=+g[dk>>2]*+g[gk>>2]-+g[fk>>2]*+g[ek>>2];g[ik>>2]=+g[ck>>2]+ +g[hk>>2];g[jn>>2]=+g[kg>>2]+ +g[lg>>2];g[fg>>2]=+g[ck>>2]-+g[hk>>2];g[mg>>2]=+g[kg>>2]-+g[lg>>2];g[kk>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[mk>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[jk>>2]=+g[(c[m>>2]|0)+152>>2];g[lk>>2]=+g[(c[m>>2]|0)+156>>2];g[nk>>2]=+g[jk>>2]*+g[kk>>2]+ +g[lk>>2]*+g[mk>>2];g[gg>>2]=+g[jk>>2]*+g[mk>>2]-+g[lk>>2]*+g[kk>>2];g[pk>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*52<<2)>>2];g[s>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*52<<2)>>2];g[ok>>2]=+g[(c[m>>2]|0)+408>>2];g[r>>2]=+g[(c[m>>2]|0)+412>>2];g[t>>2]=+g[ok>>2]*+g[pk>>2]+ +g[r>>2]*+g[s>>2];g[hg>>2]=+g[ok>>2]*+g[s>>2]-+g[r>>2]*+g[pk>>2];g[u>>2]=+g[nk>>2]+ +g[t>>2];g[kn>>2]=+g[gg>>2]+ +g[hg>>2];g[ig>>2]=+g[gg>>2]-+g[hg>>2];g[ng>>2]=+g[nk>>2]-+g[t>>2];g[v>>2]=+g[ik>>2]+ +g[u>>2];g[jr>>2]=+g[jn>>2]+ +g[kn>>2];g[jg>>2]=+g[fg>>2]-+g[ig>>2];g[og>>2]=+g[mg>>2]+ +g[ng>>2];g[pg>>2]=+g[jg>>2]*.9238795042037964-+g[og>>2]*.3826834261417389;g[gj>>2]=+g[og>>2]*.9238795042037964+ +g[jg>>2]*.3826834261417389;g[Fk>>2]=+g[mg>>2]-+g[ng>>2];g[Gk>>2]=+g[fg>>2]+ +g[ig>>2];g[Hk>>2]=+g[Fk>>2]*.3826834261417389+ +g[Gk>>2]*.9238795042037964;g[Jm>>2]=+g[Gk>>2]*.3826834261417389-+g[Fk>>2]*.9238795042037964;g[hn>>2]=+g[ik>>2]-+g[u>>2];g[ln>>2]=+g[jn>>2]-+g[kn>>2];g[mn>>2]=+g[hn>>2]+ +g[ln>>2];g[pp>>2]=+g[hn>>2]-+g[ln>>2];g[x>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*60<<2)>>2];g[z>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*60<<2)>>2];g[w>>2]=+g[(c[m>>2]|0)+472>>2];g[y>>2]=+g[(c[m>>2]|0)+476>>2];g[A>>2]=+g[w>>2]*+g[x>>2]+ +g[y>>2]*+g[z>>2];g[qg>>2]=+g[w>>2]*+g[z>>2]-+g[y>>2]*+g[x>>2];g[aa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[ca>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[$>>2]=+g[(c[m>>2]|0)+216>>2];g[ba>>2]=+g[(c[m>>2]|0)+220>>2];g[da>>2]=+g[$>>2]*+g[aa>>2]+ +g[ba>>2]*+g[ca>>2];g[tf>>2]=+g[$>>2]*+g[ca>>2]-+g[ba>>2]*+g[aa>>2];g[ea>>2]=+g[A>>2]+ +g[da>>2];g[on>>2]=+g[qg>>2]+ +g[tf>>2];g[uf>>2]=+g[qg>>2]-+g[tf>>2];g[xf>>2]=+g[A>>2]-+g[da>>2];g[ga>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[ia>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[fa>>2]=+g[(c[m>>2]|0)+88>>2];g[ha>>2]=+g[(c[m>>2]|0)+92>>2];g[ja>>2]=+g[fa>>2]*+g[ga>>2]+ +g[ha>>2]*+g[ia>>2];g[yf>>2]=+g[fa>>2]*+g[ia>>2]-+g[ha>>2]*+g[ga>>2];g[la>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*44<<2)>>2];g[na>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*44<<2)>>2];g[ka>>2]=+g[(c[m>>2]|0)+344>>2];g[ma>>2]=+g[(c[m>>2]|0)+348>>2];g[oa>>2]=+g[ka>>2]*+g[la>>2]+ +g[ma>>2]*+g[na>>2];g[zf>>2]=+g[ka>>2]*+g[na>>2]-+g[ma>>2]*+g[la>>2];g[pa>>2]=+g[ja>>2]+ +g[oa>>2];g[pn>>2]=+g[yf>>2]+ +g[zf>>2];g[vf>>2]=+g[ja>>2]-+g[oa>>2];g[Af>>2]=+g[yf>>2]-+g[zf>>2];g[qa>>2]=+g[ea>>2]+ +g[pa>>2];g[ir>>2]=+g[on>>2]+ +g[pn>>2];g[wf>>2]=+g[uf>>2]+ +g[vf>>2];g[Bf>>2]=+g[xf>>2]-+g[Af>>2];g[Cf>>2]=+g[wf>>2]*.3826834261417389+ +g[Bf>>2]*.9238795042037964;g[hj>>2]=+g[Bf>>2]*.3826834261417389-+g[wf>>2]*.9238795042037964;g[Ik>>2]=+g[xf>>2]+ +g[Af>>2];g[Jk>>2]=+g[uf>>2]-+g[vf>>2];g[Kk>>2]=+g[Ik>>2]*.9238795042037964-+g[Jk>>2]*.3826834261417389;g[Km>>2]=+g[Jk>>2]*.9238795042037964+ +g[Ik>>2]*.3826834261417389;g[nn>>2]=+g[ea>>2]-+g[pa>>2];g[qn>>2]=+g[on>>2]-+g[pn>>2];g[rn>>2]=+g[nn>>2]-+g[qn>>2];g[qp>>2]=+g[nn>>2]+ +g[qn>>2];g[ua>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[wa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[ta>>2]=+g[(c[m>>2]|0)+8>>2];g[va>>2]=+g[(c[m>>2]|0)+12>>2];g[xa>>2]=+g[ta>>2]*+g[ua>>2]+ +g[va>>2]*+g[wa>>2];g[vg>>2]=+g[ta>>2]*+g[wa>>2]-+g[va>>2]*+g[ua>>2];g[B>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*34<<2)>>2];g[D>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*34<<2)>>2];g[ya>>2]=+g[(c[m>>2]|0)+264>>2];g[C>>2]=+g[(c[m>>2]|0)+268>>2];g[E>>2]=+g[ya>>2]*+g[B>>2]+ +g[C>>2]*+g[D>>2];g[wg>>2]=+g[ya>>2]*+g[D>>2]-+g[C>>2]*+g[B>>2];g[F>>2]=+g[xa>>2]+ +g[E>>2];g[_n>>2]=+g[vg>>2]+ +g[wg>>2];g[H>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[J>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[G>>2]=+g[(c[m>>2]|0)+136>>2];g[I>>2]=+g[(c[m>>2]|0)+140>>2];g[K>>2]=+g[G>>2]*+g[H>>2]+ +g[I>>2]*+g[J>>2];g[Gf>>2]=+g[G>>2]*+g[J>>2]-+g[I>>2]*+g[H>>2];g[M>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*50<<2)>>2];g[O>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*50<<2)>>2];g[L>>2]=+g[(c[m>>2]|0)+392>>2];g[N>>2]=+g[(c[m>>2]|0)+396>>2];g[P>>2]=+g[L>>2]*+g[M>>2]+ +g[N>>2]*+g[O>>2];g[Hf>>2]=+g[L>>2]*+g[O>>2]-+g[N>>2]*+g[M>>2];g[Q>>2]=+g[K>>2]+ +g[P>>2];g[$n>>2]=+g[Gf>>2]+ +g[Hf>>2];g[T>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[V>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[S>>2]=+g[(c[m>>2]|0)+72>>2];g[U>>2]=+g[(c[m>>2]|0)+76>>2];g[W>>2]=+g[S>>2]*+g[T>>2]+ +g[U>>2]*+g[V>>2];g[Lf>>2]=+g[S>>2]*+g[V>>2]-+g[U>>2]*+g[T>>2];g[Y>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*42<<2)>>2];g[_>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*42<<2)>>2];g[X>>2]=+g[(c[m>>2]|0)+328>>2];g[Z>>2]=+g[(c[m>>2]|0)+332>>2];g[Aa>>2]=+g[X>>2]*+g[Y>>2]+ +g[Z>>2]*+g[_>>2];g[Mf>>2]=+g[X>>2]*+g[_>>2]-+g[Z>>2]*+g[Y>>2];g[Ba>>2]=+g[W>>2]+ +g[Aa>>2];g[Xn>>2]=+g[Lf>>2]+ +g[Mf>>2];g[Kf>>2]=+g[W>>2]-+g[Aa>>2];g[Nf>>2]=+g[Lf>>2]-+g[Mf>>2];g[Da>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*58<<2)>>2];g[Fa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*58<<2)>>2];g[Ca>>2]=+g[(c[m>>2]|0)+456>>2];g[Ea>>2]=+g[(c[m>>2]|0)+460>>2];g[Ga>>2]=+g[Ca>>2]*+g[Da>>2]+ +g[Ea>>2]*+g[Fa>>2];g[Qf>>2]=+g[Ca>>2]*+g[Fa>>2]-+g[Ea>>2]*+g[Da>>2];g[Ia>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[ib>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[Ha>>2]=+g[(c[m>>2]|0)+200>>2];g[Ja>>2]=+g[(c[m>>2]|0)+204>>2];g[jb>>2]=+g[Ha>>2]*+g[Ia>>2]+ +g[Ja>>2]*+g[ib>>2];g[Rf>>2]=+g[Ha>>2]*+g[ib>>2]-+g[Ja>>2]*+g[Ia>>2];g[kb>>2]=+g[Ga>>2]+ +g[jb>>2];g[Wn>>2]=+g[Qf>>2]+ +g[Rf>>2];g[Pf>>2]=+g[Ga>>2]-+g[jb>>2];g[Sf>>2]=+g[Qf>>2]-+g[Rf>>2];g[R>>2]=+g[F>>2]+ +g[Q>>2];g[lb>>2]=+g[Ba>>2]+ +g[kb>>2];g[mr>>2]=+g[R>>2]-+g[lb>>2];g[nr>>2]=+g[_n>>2]+ +g[$n>>2];g[or>>2]=+g[Xn>>2]+ +g[Wn>>2];g[pr>>2]=+g[nr>>2]-+g[or>>2];g[Ff>>2]=+g[xa>>2]-+g[E>>2];g[If>>2]=+g[Gf>>2]-+g[Hf>>2];g[Jf>>2]=+g[Ff>>2]-+g[If>>2];g[Nk>>2]=+g[Ff>>2]+ +g[If>>2];g[ao>>2]=+g[_n>>2]-+g[$n>>2];g[Co>>2]=+g[Ba>>2]-+g[kb>>2];g[Do>>2]=+g[ao>>2]-+g[Co>>2];g[tp>>2]=+g[ao>>2]+ +g[Co>>2];g[Of>>2]=+g[Kf>>2]-+g[Nf>>2];g[sg>>2]=+g[Pf>>2]+ +g[Sf>>2];g[tg>>2]=(+g[Of>>2]+ +g[sg>>2])*.7071067690849304;g[Rk>>2]=(+g[sg>>2]-+g[Of>>2])*.7071067690849304;g[Ag>>2]=+g[Kf>>2]+ +g[Nf>>2];g[Bg>>2]=+g[Sf>>2]-+g[Pf>>2];g[ah>>2]=(+g[Ag>>2]+ +g[Bg>>2])*.7071067690849304;g[Ok>>2]=(+g[Ag>>2]-+g[Bg>>2])*.7071067690849304;g[Vn>>2]=+g[F>>2]-+g[Q>>2];g[Yn>>2]=+g[Wn>>2]-+g[Xn>>2];g[Zn>>2]=+g[Vn>>2]-+g[Yn>>2];g[up>>2]=+g[Vn>>2]+ +g[Yn>>2];g[xg>>2]=+g[vg>>2]-+g[wg>>2];g[yg>>2]=+g[K>>2]-+g[P>>2];g[zg>>2]=+g[xg>>2]+ +g[yg>>2];g[Qk>>2]=+g[xg>>2]-+g[yg>>2];g[ob>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*62<<2)>>2];g[qb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*62<<2)>>2];g[nb>>2]=+g[(c[m>>2]|0)+488>>2];g[pb>>2]=+g[(c[m>>2]|0)+492>>2];g[rb>>2]=+g[nb>>2]*+g[ob>>2]+ +g[pb>>2]*+g[qb>>2];g[uh>>2]=+g[nb>>2]*+g[qb>>2]-+g[pb>>2]*+g[ob>>2];g[tb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[vb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[sb>>2]=+g[(c[m>>2]|0)+232>>2];g[ub>>2]=+g[(c[m>>2]|0)+236>>2];g[wb>>2]=+g[sb>>2]*+g[tb>>2]+ +g[ub>>2]*+g[vb>>2];g[vh>>2]=+g[sb>>2]*+g[vb>>2]-+g[ub>>2]*+g[tb>>2];g[xb>>2]=+g[rb>>2]+ +g[wb>>2];g[Fo>>2]=+g[uh>>2]+ +g[vh>>2];g[zb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[Bb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[yb>>2]=+g[(c[m>>2]|0)+104>>2];g[Ab>>2]=+g[(c[m>>2]|0)+108>>2];g[Cb>>2]=+g[yb>>2]*+g[zb>>2]+ +g[Ab>>2]*+g[Bb>>2];g[eh>>2]=+g[yb>>2]*+g[Bb>>2]-+g[Ab>>2]*+g[zb>>2];g[Eb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*46<<2)>>2];g[Gb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*46<<2)>>2];g[Db>>2]=+g[(c[m>>2]|0)+360>>2];g[Fb>>2]=+g[(c[m>>2]|0)+364>>2];g[Hb>>2]=+g[Db>>2]*+g[Eb>>2]+ +g[Fb>>2]*+g[Gb>>2];g[fh>>2]=+g[Db>>2]*+g[Gb>>2]-+g[Fb>>2]*+g[Eb>>2];g[Ka>>2]=+g[Cb>>2]+ +g[Hb>>2];g[Go>>2]=+g[eh>>2]+ +g[fh>>2];g[Na>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Pa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Ma>>2]=+g[(c[m>>2]|0)+40>>2];g[Oa>>2]=+g[(c[m>>2]|0)+44>>2];g[Qa>>2]=+g[Ma>>2]*+g[Na>>2]+ +g[Oa>>2]*+g[Pa>>2];g[jh>>2]=+g[Ma>>2]*+g[Pa>>2]-+g[Oa>>2]*+g[Na>>2];g[Sa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*38<<2)>>2];g[Ua>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*38<<2)>>2];g[Ra>>2]=+g[(c[m>>2]|0)+296>>2];g[Ta>>2]=+g[(c[m>>2]|0)+300>>2];g[Va>>2]=+g[Ra>>2]*+g[Sa>>2]+ +g[Ta>>2]*+g[Ua>>2];g[kh>>2]=+g[Ra>>2]*+g[Ua>>2]-+g[Ta>>2]*+g[Sa>>2];g[Wa>>2]=+g[Qa>>2]+ +g[Va>>2];g[Mo>>2]=+g[jh>>2]+ +g[kh>>2];g[ih>>2]=+g[Qa>>2]-+g[Va>>2];g[lh>>2]=+g[jh>>2]-+g[kh>>2];g[Ya>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*54<<2)>>2];g[_a>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*54<<2)>>2];g[Xa>>2]=+g[(c[m>>2]|0)+424>>2];g[Za>>2]=+g[(c[m>>2]|0)+428>>2];g[$a>>2]=+g[Xa>>2]*+g[Ya>>2]+ +g[Za>>2]*+g[_a>>2];g[oh>>2]=+g[Xa>>2]*+g[_a>>2]-+g[Za>>2]*+g[Ya>>2];g[bb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[db>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[ab>>2]=+g[(c[m>>2]|0)+168>>2];g[cb>>2]=+g[(c[m>>2]|0)+172>>2];g[eb>>2]=+g[ab>>2]*+g[bb>>2]+ +g[cb>>2]*+g[db>>2];g[ph>>2]=+g[ab>>2]*+g[db>>2]-+g[cb>>2]*+g[bb>>2];g[fb>>2]=+g[$a>>2]+ +g[eb>>2];g[Lo>>2]=+g[oh>>2]+ +g[ph>>2];g[nh>>2]=+g[$a>>2]-+g[eb>>2];g[qh>>2]=+g[oh>>2]-+g[ph>>2];g[La>>2]=+g[xb>>2]+ +g[Ka>>2];g[gb>>2]=+g[Wa>>2]+ +g[fb>>2];g[rr>>2]=+g[La>>2]-+g[gb>>2];g[uq>>2]=+g[Fo>>2]+ +g[Go>>2];g[vq>>2]=+g[Mo>>2]+ +g[Lo>>2];g[wq>>2]=+g[uq>>2]-+g[vq>>2];g[dh>>2]=+g[rb>>2]-+g[wb>>2];g[gh>>2]=+g[eh>>2]-+g[fh>>2];g[hh>>2]=+g[dh>>2]-+g[gh>>2];g[Xk>>2]=+g[dh>>2]+ +g[gh>>2];g[Ko>>2]=+g[xb>>2]-+g[Ka>>2];g[No>>2]=+g[Lo>>2]-+g[Mo>>2];g[Oo>>2]=+g[Ko>>2]-+g[No>>2];g[wp>>2]=+g[Ko>>2]+ +g[No>>2];g[mh>>2]=+g[ih>>2]-+g[lh>>2];g[rh>>2]=+g[nh>>2]+ +g[qh>>2];g[sh>>2]=(+g[mh>>2]+ +g[rh>>2])*.7071067690849304;g[Vk>>2]=(+g[rh>>2]-+g[mh>>2])*.7071067690849304;g[zh>>2]=+g[ih>>2]+ +g[lh>>2];g[Cg>>2]=+g[qh>>2]-+g[nh>>2];g[Dg>>2]=(+g[zh>>2]+ +g[Cg>>2])*.7071067690849304;g[Yk>>2]=(+g[zh>>2]-+g[Cg>>2])*.7071067690849304;g[Ho>>2]=+g[Fo>>2]-+g[Go>>2];g[Io>>2]=+g[Wa>>2]-+g[fb>>2];g[Jo>>2]=+g[Ho>>2]-+g[Io>>2];g[xp>>2]=+g[Ho>>2]+ +g[Io>>2];g[wh>>2]=+g[uh>>2]-+g[vh>>2];g[xh>>2]=+g[Cb>>2]-+g[Hb>>2];g[yh>>2]=+g[wh>>2]+ +g[xh>>2];g[Uk>>2]=+g[wh>>2]-+g[xh>>2];g[ad>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*63<<2)>>2];g[cd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*63<<2)>>2];g[Zd>>2]=+g[(c[m>>2]|0)+496>>2];g[bd>>2]=+g[(c[m>>2]|0)+500>>2];g[dd>>2]=+g[Zd>>2]*+g[ad>>2]+ +g[bd>>2]*+g[cd>>2];g[wj>>2]=+g[Zd>>2]*+g[cd>>2]-+g[bd>>2]*+g[ad>>2];g[fd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[hd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[ed>>2]=+g[(c[m>>2]|0)+240>>2];g[gd>>2]=+g[(c[m>>2]|0)+244>>2];g[id>>2]=+g[ed>>2]*+g[fd>>2]+ +g[gd>>2]*+g[hd>>2];g[xj>>2]=+g[ed>>2]*+g[hd>>2]-+g[gd>>2]*+g[fd>>2];g[jd>>2]=+g[dd>>2]+ +g[id>>2];g[to>>2]=+g[wj>>2]+ +g[xj>>2];g[ld>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[nd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[kd>>2]=+g[(c[m>>2]|0)+112>>2];g[md>>2]=+g[(c[m>>2]|0)+116>>2];g[od>>2]=+g[kd>>2]*+g[ld>>2]+ +g[md>>2]*+g[nd>>2];g[Lh>>2]=+g[kd>>2]*+g[nd>>2]-+g[md>>2]*+g[ld>>2];g[qd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*47<<2)>>2];g[sd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*47<<2)>>2];g[pd>>2]=+g[(c[m>>2]|0)+368>>2];g[rd>>2]=+g[(c[m>>2]|0)+372>>2];g[td>>2]=+g[pd>>2]*+g[qd>>2]+ +g[rd>>2]*+g[sd>>2];g[Mh>>2]=+g[pd>>2]*+g[sd>>2]-+g[rd>>2]*+g[qd>>2];g[ud>>2]=+g[od>>2]+ +g[td>>2];g[uo>>2]=+g[Lh>>2]+ +g[Mh>>2];g[xd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[zd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[wd>>2]=+g[(c[m>>2]|0)+48>>2];g[yd>>2]=+g[(c[m>>2]|0)+52>>2];g[$d>>2]=+g[wd>>2]*+g[xd>>2]+ +g[yd>>2]*+g[zd>>2];g[Qh>>2]=+g[wd>>2]*+g[zd>>2]-+g[yd>>2]*+g[xd>>2];g[be>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*39<<2)>>2];g[de>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*39<<2)>>2];g[ae>>2]=+g[(c[m>>2]|0)+304>>2];g[ce>>2]=+g[(c[m>>2]|0)+308>>2];g[ee>>2]=+g[ae>>2]*+g[be>>2]+ +g[ce>>2]*+g[de>>2];g[Rh>>2]=+g[ae>>2]*+g[de>>2]-+g[ce>>2]*+g[be>>2];g[fe>>2]=+g[$d>>2]+ +g[ee>>2];g[Lp>>2]=+g[Qh>>2]+ +g[Rh>>2];g[Ph>>2]=+g[$d>>2]-+g[ee>>2];g[Sh>>2]=+g[Qh>>2]-+g[Rh>>2];g[he>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*55<<2)>>2];g[Je>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*55<<2)>>2];g[ge>>2]=+g[(c[m>>2]|0)+432>>2];g[ie>>2]=+g[(c[m>>2]|0)+436>>2];g[Ke>>2]=+g[ge>>2]*+g[he>>2]+ +g[ie>>2]*+g[Je>>2];g[Vh>>2]=+g[ge>>2]*+g[Je>>2]-+g[ie>>2]*+g[he>>2];g[Me>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[Oe>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[Le>>2]=+g[(c[m>>2]|0)+176>>2];g[Ne>>2]=+g[(c[m>>2]|0)+180>>2];g[Pe>>2]=+g[Le>>2]*+g[Me>>2]+ +g[Ne>>2]*+g[Oe>>2];g[Wh>>2]=+g[Le>>2]*+g[Oe>>2]-+g[Ne>>2]*+g[Me>>2];g[Qe>>2]=+g[Ke>>2]+ +g[Pe>>2];g[kp>>2]=+g[Vh>>2]+ +g[Wh>>2];g[Uh>>2]=+g[Ke>>2]-+g[Pe>>2];g[Xh>>2]=+g[Vh>>2]-+g[Wh>>2];g[vd>>2]=+g[jd>>2]+ +g[ud>>2];g[Re>>2]=+g[fe>>2]+ +g[Qe>>2];g[Se>>2]=+g[vd>>2]+ +g[Re>>2];g[Lq>>2]=+g[vd>>2]-+g[Re>>2];g[jp>>2]=+g[jd>>2]-+g[ud>>2];g[Mp>>2]=+g[kp>>2]-+g[Lp>>2];g[Np>>2]=+g[jp>>2]-+g[Mp>>2];g[Ip>>2]=+g[jp>>2]+ +g[Mp>>2];g[Qq>>2]=+g[to>>2]+ +g[uo>>2];g[Rq>>2]=+g[Lp>>2]+ +g[kp>>2];g[Sq>>2]=+g[Qq>>2]-+g[Rq>>2];g[zs>>2]=+g[Qq>>2]+ +g[Rq>>2];g[Ii>>2]=+g[dd>>2]-+g[id>>2];g[Nh>>2]=+g[Lh>>2]-+g[Mh>>2];g[Oh>>2]=+g[Ii>>2]-+g[Nh>>2];g[tm>>2]=+g[Ii>>2]+ +g[Nh>>2];g[Th>>2]=+g[Ph>>2]-+g[Sh>>2];g[Yh>>2]=+g[Uh>>2]+ +g[Xh>>2];g[Zh>>2]=(+g[Th>>2]+ +g[Yh>>2])*.7071067690849304;g[Fm>>2]=(+g[Yh>>2]-+g[Th>>2])*.7071067690849304;g[Bj>>2]=+g[Ph>>2]+ +g[Sh>>2];g[Cj>>2]=+g[Xh>>2]-+g[Uh>>2];g[Dj>>2]=(+g[Bj>>2]+ +g[Cj>>2])*.7071067690849304;g[um>>2]=(+g[Bj>>2]-+g[Cj>>2])*.7071067690849304;g[vo>>2]=+g[to>>2]-+g[uo>>2];g[wo>>2]=+g[fe>>2]-+g[Qe>>2];g[xo>>2]=+g[vo>>2]-+g[wo>>2];g[kq>>2]=+g[vo>>2]+ +g[wo>>2];g[yj>>2]=+g[wj>>2]-+g[xj>>2];g[zj>>2]=+g[od>>2]-+g[td>>2];g[Aj>>2]=+g[yj>>2]+ +g[zj>>2];g[Em>>2]=+g[yj>>2]-+g[zj>>2];g[Mb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[Ob>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Lb>>2]=+g[c[m>>2]>>2];g[Nb>>2]=+g[(c[m>>2]|0)+4>>2];g[Pb>>2]=+g[Lb>>2]*+g[Mb>>2]+ +g[Nb>>2]*+g[Ob>>2];g[Ig>>2]=+g[Lb>>2]*+g[Ob>>2]-+g[Nb>>2]*+g[Mb>>2];g[Rb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*33<<2)>>2];g[rc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*33<<2)>>2];g[Qb>>2]=+g[(c[m>>2]|0)+256>>2];g[Sb>>2]=+g[(c[m>>2]|0)+260>>2];g[sc>>2]=+g[Qb>>2]*+g[Rb>>2]+ +g[Sb>>2]*+g[rc>>2];g[Jg>>2]=+g[Qb>>2]*+g[rc>>2]-+g[Sb>>2]*+g[Rb>>2];g[tc>>2]=+g[Pb>>2]+ +g[sc>>2];g[jo>>2]=+g[Ig>>2]+ +g[Jg>>2];g[vc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[xc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[uc>>2]=+g[(c[m>>2]|0)+128>>2];g[wc>>2]=+g[(c[m>>2]|0)+132>>2];g[yc>>2]=+g[uc>>2]*+g[vc>>2]+ +g[wc>>2]*+g[xc>>2];g[vi>>2]=+g[uc>>2]*+g[xc>>2]-+g[wc>>2]*+g[vc>>2];g[Ac>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*49<<2)>>2];g[Cc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*49<<2)>>2];g[zc>>2]=+g[(c[m>>2]|0)+384>>2];g[Bc>>2]=+g[(c[m>>2]|0)+388>>2];g[Dc>>2]=+g[zc>>2]*+g[Ac>>2]+ +g[Bc>>2]*+g[Cc>>2];g[wi>>2]=+g[zc>>2]*+g[Cc>>2]-+g[Bc>>2]*+g[Ac>>2];g[Ec>>2]=+g[yc>>2]+ +g[Dc>>2];g[ko>>2]=+g[vi>>2]+ +g[wi>>2];g[Hc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Jc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Gc>>2]=+g[(c[m>>2]|0)+64>>2];g[Ic>>2]=+g[(c[m>>2]|0)+68>>2];g[Kc>>2]=+g[Gc>>2]*+g[Hc>>2]+ +g[Ic>>2]*+g[Jc>>2];g[Og>>2]=+g[Gc>>2]*+g[Jc>>2]-+g[Ic>>2]*+g[Hc>>2];g[Mc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*41<<2)>>2];g[Oc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*41<<2)>>2];g[Lc>>2]=+g[(c[m>>2]|0)+320>>2];g[Nc>>2]=+g[(c[m>>2]|0)+324>>2];g[Pc>>2]=+g[Lc>>2]*+g[Mc>>2]+ +g[Nc>>2]*+g[Oc>>2];g[Pg>>2]=+g[Lc>>2]*+g[Oc>>2]-+g[Nc>>2]*+g[Mc>>2];g[Qc>>2]=+g[Kc>>2]+ +g[Pc>>2];g[Uo>>2]=+g[Og>>2]+ +g[Pg>>2];g[Ng>>2]=+g[Kc>>2]-+g[Pc>>2];g[Qg>>2]=+g[Og>>2]-+g[Pg>>2];g[Ub>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*57<<2)>>2];g[Wb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*57<<2)>>2];g[Tb>>2]=+g[(c[m>>2]|0)+448>>2];g[Vb>>2]=+g[(c[m>>2]|0)+452>>2];g[Xb>>2]=+g[Tb>>2]*+g[Ub>>2]+ +g[Vb>>2]*+g[Wb>>2];g[Sg>>2]=+g[Tb>>2]*+g[Wb>>2]-+g[Vb>>2]*+g[Ub>>2];g[Zb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[$b>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[Yb>>2]=+g[(c[m>>2]|0)+192>>2];g[_b>>2]=+g[(c[m>>2]|0)+196>>2];g[ac>>2]=+g[Yb>>2]*+g[Zb>>2]+ +g[_b>>2]*+g[$b>>2];g[Tg>>2]=+g[Yb>>2]*+g[$b>>2]-+g[_b>>2]*+g[Zb>>2];g[bc>>2]=+g[Xb>>2]+ +g[ac>>2];g[To>>2]=+g[Sg>>2]+ +g[Tg>>2];g[Ug>>2]=+g[Sg>>2]-+g[Tg>>2];g[Vg>>2]=+g[Xb>>2]-+g[ac>>2];g[Fc>>2]=+g[tc>>2]+ +g[Ec>>2];g[cc>>2]=+g[Qc>>2]+ +g[bc>>2];g[dc>>2]=+g[Fc>>2]+ +g[cc>>2];g[Fq>>2]=+g[Fc>>2]-+g[cc>>2];g[lo>>2]=+g[jo>>2]-+g[ko>>2];g[mo>>2]=+g[Qc>>2]-+g[bc>>2];g[no>>2]=+g[lo>>2]-+g[mo>>2];g[Ep>>2]=+g[lo>>2]+ +g[mo>>2];g[Aq>>2]=+g[jo>>2]+ +g[ko>>2];g[Bq>>2]=+g[Uo>>2]+ +g[To>>2];g[Cq>>2]=+g[Aq>>2]-+g[Bq>>2];g[us>>2]=+g[Aq>>2]+ +g[Bq>>2];g[Kg>>2]=+g[Ig>>2]-+g[Jg>>2];g[Lg>>2]=+g[yc>>2]-+g[Dc>>2];g[Mg>>2]=+g[Kg>>2]+ +g[Lg>>2];g[lm>>2]=+g[Kg>>2]-+g[Lg>>2];g[Rg>>2]=+g[Ng>>2]+ +g[Qg>>2];g[Wg>>2]=+g[Ug>>2]-+g[Vg>>2];g[Xg>>2]=(+g[Rg>>2]+ +g[Wg>>2])*.7071067690849304;g[Dl>>2]=(+g[Rg>>2]-+g[Wg>>2])*.7071067690849304;g[zi>>2]=+g[Ng>>2]-+g[Qg>>2];g[Ai>>2]=+g[Vg>>2]+ +g[Ug>>2];g[Bi>>2]=(+g[zi>>2]+ +g[Ai>>2])*.7071067690849304;g[mm>>2]=(+g[Ai>>2]-+g[zi>>2])*.7071067690849304;g[So>>2]=+g[tc>>2]-+g[Ec>>2];g[Vo>>2]=+g[To>>2]-+g[Uo>>2];g[Wo>>2]=+g[So>>2]-+g[Vo>>2];g[Bp>>2]=+g[So>>2]+ +g[Vo>>2];g[ui>>2]=+g[Pb>>2]-+g[sc>>2];g[xi>>2]=+g[vi>>2]-+g[wi>>2];g[yi>>2]=+g[ui>>2]-+g[xi>>2];g[Cl>>2]=+g[ui>>2]+ +g[xi>>2];g[fc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[hc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[ec>>2]=+g[(c[m>>2]|0)+32>>2];g[gc>>2]=+g[(c[m>>2]|0)+36>>2];g[ic>>2]=+g[ec>>2]*+g[fc>>2]+ +g[gc>>2]*+g[hc>>2];g[Zg>>2]=+g[ec>>2]*+g[hc>>2]-+g[gc>>2]*+g[fc>>2];g[kc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*37<<2)>>2];g[mc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*37<<2)>>2];g[jc>>2]=+g[(c[m>>2]|0)+288>>2];g[lc>>2]=+g[(c[m>>2]|0)+292>>2];g[nc>>2]=+g[jc>>2]*+g[kc>>2]+ +g[lc>>2]*+g[mc>>2];g[_g>>2]=+g[jc>>2]*+g[mc>>2]-+g[lc>>2]*+g[kc>>2];g[oc>>2]=+g[ic>>2]+ +g[nc>>2];g[Yo>>2]=+g[Zg>>2]+ +g[_g>>2];g[$g>>2]=+g[Zg>>2]-+g[_g>>2];g[Dh>>2]=+g[ic>>2]-+g[nc>>2];g[Md>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Od>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Ld>>2]=+g[(c[m>>2]|0)+96>>2];g[Nd>>2]=+g[(c[m>>2]|0)+100>>2];g[Pd>>2]=+g[Ld>>2]*+g[Md>>2]+ +g[Nd>>2]*+g[Od>>2];g[ni>>2]=+g[Ld>>2]*+g[Od>>2]-+g[Nd>>2]*+g[Md>>2];g[Rd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*45<<2)>>2];g[Td>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*45<<2)>>2];g[Qd>>2]=+g[(c[m>>2]|0)+352>>2];g[Sd>>2]=+g[(c[m>>2]|0)+356>>2];g[Ud>>2]=+g[Qd>>2]*+g[Rd>>2]+ +g[Sd>>2]*+g[Td>>2];g[oi>>2]=+g[Qd>>2]*+g[Td>>2]-+g[Sd>>2]*+g[Rd>>2];g[Vd>>2]=+g[Pd>>2]+ +g[Ud>>2];g[co>>2]=+g[ni>>2]+ +g[oi>>2];g[ki>>2]=+g[Pd>>2]-+g[Ud>>2];g[pi>>2]=+g[ni>>2]-+g[oi>>2];g[qc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[Tc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[pc>>2]=+g[(c[m>>2]|0)+160>>2];g[Sc>>2]=+g[(c[m>>2]|0)+164>>2];g[Uc>>2]=+g[pc>>2]*+g[qc>>2]+ +g[Sc>>2]*+g[Tc>>2];g[Eh>>2]=+g[pc>>2]*+g[Tc>>2]-+g[Sc>>2]*+g[qc>>2];g[Wc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*53<<2)>>2];g[Yc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*53<<2)>>2];g[Vc>>2]=+g[(c[m>>2]|0)+416>>2];g[Xc>>2]=+g[(c[m>>2]|0)+420>>2];g[Zc>>2]=+g[Vc>>2]*+g[Wc>>2]+ +g[Xc>>2]*+g[Yc>>2];g[Fh>>2]=+g[Vc>>2]*+g[Yc>>2]-+g[Xc>>2]*+g[Wc>>2];g[_c>>2]=+g[Uc>>2]+ +g[Zc>>2];g[Zo>>2]=+g[Eh>>2]+ +g[Fh>>2];g[Bh>>2]=+g[Uc>>2]-+g[Zc>>2];g[Gh>>2]=+g[Eh>>2]-+g[Fh>>2];g[Bd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*61<<2)>>2];g[Dd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*61<<2)>>2];g[Ad>>2]=+g[(c[m>>2]|0)+480>>2];g[Cd>>2]=+g[(c[m>>2]|0)+484>>2];g[Ed>>2]=+g[Ad>>2]*+g[Bd>>2]+ +g[Cd>>2]*+g[Dd>>2];g[Jh>>2]=+g[Ad>>2]*+g[Dd>>2]-+g[Cd>>2]*+g[Bd>>2];g[Gd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[Id>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[Fd>>2]=+g[(c[m>>2]|0)+224>>2];g[Hd>>2]=+g[(c[m>>2]|0)+228>>2];g[Jd>>2]=+g[Fd>>2]*+g[Gd>>2]+ +g[Hd>>2]*+g[Id>>2];g[Kh>>2]=+g[Fd>>2]*+g[Id>>2]-+g[Hd>>2]*+g[Gd>>2];g[Kd>>2]=+g[Ed>>2]+ +g[Jd>>2];g[bo>>2]=+g[Jh>>2]+ +g[Kh>>2];g[ji>>2]=+g[Jh>>2]-+g[Kh>>2];g[mi>>2]=+g[Ed>>2]-+g[Jd>>2];g[$c>>2]=+g[oc>>2]+ +g[_c>>2];g[Wd>>2]=+g[Kd>>2]+ +g[Vd>>2];g[Xd>>2]=+g[$c>>2]+ +g[Wd>>2];g[Dq>>2]=+g[$c>>2]-+g[Wd>>2];g[eo>>2]=+g[bo>>2]-+g[co>>2];g[fo>>2]=+g[Kd>>2]-+g[Vd>>2];g[go>>2]=+g[eo>>2]-+g[fo>>2];g[oo>>2]=+g[fo>>2]+ +g[eo>>2];g[Gq>>2]=+g[bo>>2]+ +g[co>>2];g[Hq>>2]=+g[Yo>>2]+ +g[Zo>>2];g[Iq>>2]=+g[Gq>>2]-+g[Hq>>2];g[vs>>2]=+g[Hq>>2]+ +g[Gq>>2];g[Ch>>2]=+g[$g>>2]+ +g[Bh>>2];g[Hh>>2]=+g[Dh>>2]-+g[Gh>>2];g[Ih>>2]=+g[Ch>>2]*.9238795042037964+ +g[Hh>>2]*.3826834261417389;g[Di>>2]=+g[Hh>>2]*.9238795042037964-+g[Ch>>2]*.3826834261417389;g[li>>2]=+g[ji>>2]+ +g[ki>>2];g[qi>>2]=+g[mi>>2]-+g[pi>>2];g[ri>>2]=+g[li>>2]*.9238795042037964-+g[qi>>2]*.3826834261417389;g[Ei>>2]=+g[li>>2]*.3826834261417389+ +g[qi>>2]*.9238795042037964;g[Il>>2]=+g[mi>>2]+ +g[pi>>2];g[Jl>>2]=+g[ji>>2]-+g[ki>>2];g[im>>2]=+g[Il>>2]*.9238795042037964-+g[Jl>>2]*.3826834261417389;g[pm>>2]=+g[Jl>>2]*.9238795042037964+ +g[Il>>2]*.3826834261417389;g[Xo>>2]=+g[oc>>2]-+g[_c>>2];g[_o>>2]=+g[Yo>>2]-+g[Zo>>2];g[$o>>2]=+g[Xo>>2]+ +g[_o>>2];g[po>>2]=+g[Xo>>2]-+g[_o>>2];g[Fl>>2]=+g[$g>>2]-+g[Bh>>2];g[Gl>>2]=+g[Dh>>2]+ +g[Gh>>2];g[Hl>>2]=+g[Fl>>2]*.3826834261417389+ +g[Gl>>2]*.9238795042037964;g[om>>2]=+g[Fl>>2]*.9238795042037964-+g[Gl>>2]*.3826834261417389;g[Ue>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[We>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Te>>2]=+g[(c[m>>2]|0)+16>>2];g[Ve>>2]=+g[(c[m>>2]|0)+20>>2];g[Xe>>2]=+g[Te>>2]*+g[Ue>>2]+ +g[Ve>>2]*+g[We>>2];g[ei>>2]=+g[Te>>2]*+g[We>>2]-+g[Ve>>2]*+g[Ue>>2];g[Ze>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*35<<2)>>2];g[$e>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*35<<2)>>2];g[Ye>>2]=+g[(c[m>>2]|0)+272>>2];g[_e>>2]=+g[(c[m>>2]|0)+276>>2];g[af>>2]=+g[Ye>>2]*+g[Ze>>2]+ +g[_e>>2]*+g[$e>>2];g[fi>>2]=+g[Ye>>2]*+g[$e>>2]-+g[_e>>2]*+g[Ze>>2];g[bf>>2]=+g[Xe>>2]+ +g[af>>2];g[dp>>2]=+g[ei>>2]+ +g[fi>>2];g[$h>>2]=+g[Xe>>2]-+g[af>>2];g[gi>>2]=+g[ei>>2]-+g[fi>>2];g[Ce>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Ee>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Be>>2]=+g[(c[m>>2]|0)+80>>2];g[De>>2]=+g[(c[m>>2]|0)+84>>2];g[Fe>>2]=+g[Be>>2]*+g[Ce>>2]+ +g[De>>2]*+g[Ee>>2];g[Ri>>2]=+g[Be>>2]*+g[Ee>>2]-+g[De>>2]*+g[Ce>>2];g[He>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*43<<2)>>2];g[jf>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*43<<2)>>2];g[Ge>>2]=+g[(c[m>>2]|0)+336>>2];g[Ie>>2]=+g[(c[m>>2]|0)+340>>2];g[kf>>2]=+g[Ge>>2]*+g[He>>2]+ +g[Ie>>2]*+g[jf>>2];g[Si>>2]=+g[Ge>>2]*+g[jf>>2]-+g[Ie>>2]*+g[He>>2];g[lf>>2]=+g[Fe>>2]+ +g[kf>>2];g[Ao>>2]=+g[Ri>>2]+ +g[Si>>2];g[Oi>>2]=+g[Fe>>2]-+g[kf>>2];g[Ti>>2]=+g[Ri>>2]-+g[Si>>2];g[df>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[ff>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[cf>>2]=+g[(c[m>>2]|0)+144>>2];g[ef>>2]=+g[(c[m>>2]|0)+148>>2];g[gf>>2]=+g[cf>>2]*+g[df>>2]+ +g[ef>>2]*+g[ff>>2];g[ai>>2]=+g[cf>>2]*+g[ff>>2]-+g[ef>>2]*+g[df>>2];g[ke>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*51<<2)>>2];g[me>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*51<<2)>>2];g[je>>2]=+g[(c[m>>2]|0)+400>>2];g[le>>2]=+g[(c[m>>2]|0)+404>>2];g[ne>>2]=+g[je>>2]*+g[ke>>2]+ +g[le>>2]*+g[me>>2];g[bi>>2]=+g[je>>2]*+g[me>>2]-+g[le>>2]*+g[ke>>2];g[oe>>2]=+g[gf>>2]+ +g[ne>>2];g[ep>>2]=+g[ai>>2]+ +g[bi>>2];g[ci>>2]=+g[ai>>2]-+g[bi>>2];g[hi>>2]=+g[gf>>2]-+g[ne>>2];g[re>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*59<<2)>>2];g[te>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*59<<2)>>2];g[qe>>2]=+g[(c[m>>2]|0)+464>>2];g[se>>2]=+g[(c[m>>2]|0)+468>>2];g[ue>>2]=+g[qe>>2]*+g[re>>2]+ +g[se>>2]*+g[te>>2];g[Li>>2]=+g[qe>>2]*+g[te>>2]-+g[se>>2]*+g[re>>2];g[we>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[ye>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[ve>>2]=+g[(c[m>>2]|0)+208>>2];g[xe>>2]=+g[(c[m>>2]|0)+212>>2];g[ze>>2]=+g[ve>>2]*+g[we>>2]+ +g[xe>>2]*+g[ye>>2];g[Mi>>2]=+g[ve>>2]*+g[ye>>2]-+g[xe>>2]*+g[we>>2];g[Ae>>2]=+g[ue>>2]+ +g[ze>>2];g[zo>>2]=+g[Li>>2]+ +g[Mi>>2];g[Ni>>2]=+g[Li>>2]-+g[Mi>>2];g[Qi>>2]=+g[ue>>2]-+g[ze>>2];g[pe>>2]=+g[bf>>2]+ +g[oe>>2];g[mf>>2]=+g[Ae>>2]+ +g[lf>>2];g[nf>>2]=+g[pe>>2]+ +g[mf>>2];g[Tq>>2]=+g[pe>>2]-+g[mf>>2];g[cp>>2]=+g[bf>>2]-+g[oe>>2];g[fp>>2]=+g[dp>>2]-+g[ep>>2];g[gp>>2]=+g[cp>>2]-+g[fp>>2];g[Op>>2]=+g[cp>>2]+ +g[fp>>2];g[Mq>>2]=+g[zo>>2]+ +g[Ao>>2];g[Nq>>2]=+g[dp>>2]+ +g[ep>>2];g[Oq>>2]=+g[Mq>>2]-+g[Nq>>2];g[As>>2]=+g[Nq>>2]+ +g[Mq>>2];g[di>>2]=+g[$h>>2]-+g[ci>>2];g[ii>>2]=+g[gi>>2]+ +g[hi>>2];g[Ki>>2]=+g[di>>2]*.9238795042037964-+g[ii>>2]*.3826834261417389;g[Fj>>2]=+g[ii>>2]*.9238795042037964+ +g[di>>2]*.3826834261417389;g[Pi>>2]=+g[Ni>>2]+ +g[Oi>>2];g[sj>>2]=+g[Qi>>2]-+g[Ti>>2];g[tj>>2]=+g[Pi>>2]*.3826834261417389+ +g[sj>>2]*.9238795042037964;g[Gj>>2]=+g[Pi>>2]*.9238795042037964-+g[sj>>2]*.3826834261417389;g[zm>>2]=+g[Qi>>2]+ +g[Ti>>2];g[Am>>2]=+g[Ni>>2]-+g[Oi>>2];g[Bm>>2]=+g[zm>>2]*.9238795042037964-+g[Am>>2]*.3826834261417389;g[Kl>>2]=+g[Am>>2]*.9238795042037964+ +g[zm>>2]*.3826834261417389;g[yo>>2]=+g[Ae>>2]-+g[lf>>2];g[Bo>>2]=+g[zo>>2]-+g[Ao>>2];g[bp>>2]=+g[yo>>2]+ +g[Bo>>2];g[Pp>>2]=+g[Bo>>2]-+g[yo>>2];g[wm>>2]=+g[gi>>2]-+g[hi>>2];g[xm>>2]=+g[$h>>2]+ +g[ci>>2];g[ym>>2]=+g[wm>>2]*.3826834261417389+ +g[xm>>2]*.9238795042037964;g[Hm>>2]=+g[wm>>2]*.9238795042037964-+g[xm>>2]*.3826834261417389;g[Zj>>2]=+g[Gu>>2]+ +g[Yj>>2];g[ra>>2]=+g[v>>2]+ +g[qa>>2];g[sa>>2]=+g[Zj>>2]+ +g[ra>>2];g[os>>2]=+g[Zj>>2]-+g[ra>>2];g[Lr>>2]=+g[zs>>2]+ +g[As>>2];g[Mr>>2]=+g[us>>2]+ +g[vs>>2];g[Nr>>2]=+g[Lr>>2]-+g[Mr>>2];g[Or>>2]=+g[Mr>>2]+ +g[Lr>>2];g[Qr>>2]=+g[jr>>2]+ +g[ir>>2];g[Xr>>2]=+g[Rr>>2]+ +g[Wr>>2];g[Yr>>2]=+g[Qr>>2]+ +g[Xr>>2];g[Ds>>2]=+g[Xr>>2]-+g[Qr>>2];g[mb>>2]=+g[R>>2]+ +g[lb>>2];g[hb>>2]=+g[La>>2]+ +g[gb>>2];g[Jb>>2]=+g[mb>>2]+ +g[hb>>2];g[Cs>>2]=+g[mb>>2]-+g[hb>>2];g[Yd>>2]=+g[dc>>2]+ +g[Xd>>2];g[of>>2]=+g[Se>>2]+ +g[nf>>2];g[pf>>2]=+g[Yd>>2]+ +g[of>>2];g[_r>>2]=+g[of>>2]-+g[Yd>>2];g[ts>>2]=+g[dc>>2]-+g[Xd>>2];g[ws>>2]=+g[us>>2]-+g[vs>>2];g[xs>>2]=+g[ts>>2]+ +g[ws>>2];g[Hr>>2]=+g[ts>>2]-+g[ws>>2];g[ys>>2]=+g[Se>>2]-+g[nf>>2];g[Dr>>2]=+g[zs>>2]-+g[As>>2];g[Er>>2]=+g[ys>>2]-+g[Dr>>2];g[Ir>>2]=+g[ys>>2]+ +g[Dr>>2];g[ps>>2]=+g[uq>>2]+ +g[vq>>2];g[qs>>2]=+g[nr>>2]+ +g[or>>2];g[rs>>2]=+g[ps>>2]-+g[qs>>2];g[Pr>>2]=+g[qs>>2]+ +g[ps>>2];g[Kb>>2]=+g[sa>>2]+ +g[Jb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[Kb>>2]-+g[pf>>2];g[c[k>>2]>>2]=+g[Kb>>2]+ +g[pf>>2];g[Zr>>2]=+g[Pr>>2]+ +g[Yr>>2];g[(c[k>>2]|0)+(c[n>>2]<<5<<2)>>2]=+g[Or>>2]-+g[Zr>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*63<<2)>>2]=+g[Or>>2]+ +g[Zr>>2];g[$r>>2]=+g[Yr>>2]-+g[Pr>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*48<<2)>>2]=+g[_r>>2]-+g[$r>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*47<<2)>>2]=+g[_r>>2]+ +g[$r>>2];g[Kr>>2]=+g[sa>>2]-+g[Jb>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Kr>>2]-+g[Nr>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Kr>>2]+ +g[Nr>>2];g[ss>>2]=+g[os>>2]-+g[rs>>2];g[Fr>>2]=(+g[xs>>2]+ +g[Er>>2])*.7071067690849304;g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[ss>>2]-+g[Fr>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[ss>>2]+ +g[Fr>>2];g[as>>2]=(+g[Er>>2]-+g[xs>>2])*.7071067690849304;g[Es>>2]=+g[Cs>>2]+ +g[Ds>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*56<<2)>>2]=+g[as>>2]-+g[Es>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*39<<2)>>2]=+g[as>>2]+ +g[Es>>2];g[Fs>>2]=(+g[Ir>>2]-+g[Hr>>2])*.7071067690849304;g[Gs>>2]=+g[Ds>>2]-+g[Cs>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*40<<2)>>2]=+g[Fs>>2]-+g[Gs>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*55<<2)>>2]=+g[Fs>>2]+ +g[Gs>>2];g[Gr>>2]=+g[os>>2]+ +g[rs>>2];g[Jr>>2]=(+g[Hr>>2]+ +g[Ir>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Gr>>2]-+g[Jr>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Gr>>2]+ +g[Jr>>2];g[eg>>2]=+g[Uf>>2]+ +g[dg>>2];g[Df>>2]=+g[pg>>2]+ +g[Cf>>2];g[Ef>>2]=+g[eg>>2]-+g[Df>>2];g[Pj>>2]=+g[eg>>2]+ +g[Df>>2];g[$s>>2]=+g[gj>>2]-+g[hj>>2];g[et>>2]=+g[at>>2]+ +g[dt>>2];g[ft>>2]=+g[$s>>2]+ +g[et>>2];g[Mt>>2]=+g[et>>2]-+g[$s>>2];g[ug>>2]=+g[Jf>>2]+ +g[tg>>2];g[bh>>2]=+g[zg>>2]+ +g[ah>>2];g[ch>>2]=+g[ug>>2]*.19509032368659973+ +g[bh>>2]*.9807852506637573;g[Qj>>2]=+g[ug>>2]*.9807852506637573-+g[bh>>2]*.19509032368659973;g[th>>2]=+g[hh>>2]+ +g[sh>>2];g[Eg>>2]=+g[yh>>2]+ +g[Dg>>2];g[Fg>>2]=+g[th>>2]*.19509032368659973-+g[Eg>>2]*.9807852506637573;g[Rj>>2]=+g[th>>2]*.9807852506637573+ +g[Eg>>2]*.19509032368659973;g[Gg>>2]=+g[ch>>2]+ +g[Fg>>2];g[Lt>>2]=+g[Rj>>2]-+g[Qj>>2];g[Ui>>2]=+g[Qj>>2]+ +g[Rj>>2];g[_s>>2]=+g[ch>>2]-+g[Fg>>2];g[Yg>>2]=+g[Mg>>2]+ +g[Xg>>2];g[si>>2]=+g[Ih>>2]+ +g[ri>>2];g[ti>>2]=+g[Yg>>2]-+g[si>>2];g[Xi>>2]=+g[Yg>>2]+ +g[si>>2];g[Ci>>2]=+g[yi>>2]+ +g[Bi>>2];g[Fi>>2]=+g[Di>>2]+ +g[Ei>>2];g[Gi>>2]=+g[Ci>>2]-+g[Fi>>2];g[Wi>>2]=+g[Ci>>2]+ +g[Fi>>2];g[Hi>>2]=+g[ti>>2]*.6343932747840881+ +g[Gi>>2]*.7730104327201843;g[cj>>2]=+g[Xi>>2]*.9951847195625305+ +g[Wi>>2]*.0980171412229538;g[Mj>>2]=+g[Gi>>2]*.6343932747840881-+g[ti>>2]*.7730104327201843;g[Yi>>2]=+g[Wi>>2]*.9951847195625305-+g[Xi>>2]*.0980171412229538;g[_h>>2]=+g[Oh>>2]+ +g[Zh>>2];g[uj>>2]=+g[Ki>>2]+ +g[tj>>2];g[vj>>2]=+g[_h>>2]-+g[uj>>2];g[_i>>2]=+g[_h>>2]+ +g[uj>>2];g[Ej>>2]=+g[Aj>>2]+ +g[Dj>>2];g[Hj>>2]=+g[Fj>>2]+ +g[Gj>>2];g[Ij>>2]=+g[Ej>>2]-+g[Hj>>2];g[Zi>>2]=+g[Ej>>2]+ +g[Hj>>2];g[Jj>>2]=+g[vj>>2]*.7730104327201843-+g[Ij>>2]*.6343932747840881;g[dj>>2]=+g[_i>>2]*.0980171412229538-+g[Zi>>2]*.9951847195625305;g[Nj>>2]=+g[Ij>>2]*.7730104327201843+ +g[vj>>2]*.6343932747840881;g[$i>>2]=+g[Zi>>2]*.0980171412229538+ +g[_i>>2]*.9951847195625305;g[Hg>>2]=+g[Ef>>2]+ +g[Gg>>2];g[Kj>>2]=+g[Hi>>2]+ +g[Jj>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Hg>>2]-+g[Kj>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Hg>>2]+ +g[Kj>>2];g[jt>>2]=+g[Nj>>2]-+g[Mj>>2];g[Nt>>2]=+g[Lt>>2]+ +g[Mt>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*39<<2)>>2]=+g[jt>>2]-+g[Nt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*56<<2)>>2]=+g[jt>>2]+ +g[Nt>>2];g[Ot>>2]=+g[Jj>>2]-+g[Hi>>2];g[Pt>>2]=+g[Mt>>2]-+g[Lt>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*55<<2)>>2]=+g[Ot>>2]-+g[Pt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*40<<2)>>2]=+g[Ot>>2]+ +g[Pt>>2];g[Lj>>2]=+g[Ef>>2]-+g[Gg>>2];g[Oj>>2]=+g[Mj>>2]+ +g[Nj>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[Lj>>2]-+g[Oj>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Lj>>2]+ +g[Oj>>2];g[Vi>>2]=+g[Pj>>2]+ +g[Ui>>2];g[aj>>2]=+g[Yi>>2]+ +g[$i>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[Vi>>2]-+g[aj>>2];g[c[l>>2]>>2]=+g[Vi>>2]+ +g[aj>>2];g[Zs>>2]=+g[dj>>2]-+g[cj>>2];g[gt>>2]=+g[_s>>2]+ +g[ft>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*63<<2)>>2]=+g[Zs>>2]-+g[gt>>2];g[(c[l>>2]|0)+(c[n>>2]<<5<<2)>>2]=+g[Zs>>2]+ +g[gt>>2];g[ht>>2]=+g[$i>>2]-+g[Yi>>2];g[it>>2]=+g[ft>>2]-+g[_s>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*47<<2)>>2]=+g[ht>>2]-+g[it>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*48<<2)>>2]=+g[ht>>2]+ +g[it>>2];g[bj>>2]=+g[Pj>>2]-+g[Ui>>2];g[ej>>2]=+g[cj>>2]+ +g[dj>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[bj>>2]-+g[ej>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[bj>>2]+ +g[ej>>2];g[hr>>2]=+g[Gu>>2]-+g[Yj>>2];g[kr>>2]=+g[ir>>2]-+g[jr>>2];g[lr>>2]=+g[hr>>2]-+g[kr>>2];g[Ar>>2]=+g[hr>>2]+ +g[kr>>2];g[gs>>2]=+g[Sq>>2]+ +g[Tq>>2];g[hs>>2]=+g[Lq>>2]+ +g[Oq>>2];g[is>>2]=+g[gs>>2]*.3826834261417389+ +g[hs>>2]*.9238795042037964;g[ms>>2]=+g[hs>>2]*.3826834261417389-+g[gs>>2]*.9238795042037964;g[Js>>2]=+g[v>>2]-+g[qa>>2];g[Ks>>2]=+g[Wr>>2]-+g[Rr>>2];g[Ls>>2]=+g[Js>>2]+ +g[Ks>>2];g[pt>>2]=+g[Ks>>2]-+g[Js>>2];g[qr>>2]=+g[mr>>2]+ +g[pr>>2];g[xq>>2]=+g[rr>>2]-+g[wq>>2];g[yq>>2]=(+g[qr>>2]+ +g[xq>>2])*.7071067690849304;g[Is>>2]=(+g[qr>>2]-+g[xq>>2])*.7071067690849304;g[Eq>>2]=+g[Cq>>2]-+g[Dq>>2];g[Jq>>2]=+g[Fq>>2]-+g[Iq>>2];g[Kq>>2]=+g[Eq>>2]*.3826834261417389+ +g[Jq>>2]*.9238795042037964;g[xr>>2]=+g[Jq>>2]*.3826834261417389-+g[Eq>>2]*.9238795042037964;g[Br>>2]=+g[mr>>2]-+g[pr>>2];g[Cr>>2]=+g[rr>>2]+ +g[wq>>2];g[bs>>2]=(+g[Br>>2]+ +g[Cr>>2])*.7071067690849304;g[ot>>2]=(+g[Cr>>2]-+g[Br>>2])*.7071067690849304;g[ds>>2]=+g[Fq>>2]+ +g[Iq>>2];g[es>>2]=+g[Cq>>2]+ +g[Dq>>2];g[fs>>2]=+g[ds>>2]*.9238795042037964-+g[es>>2]*.3826834261417389;g[ls>>2]=+g[es>>2]*.9238795042037964+ +g[ds>>2]*.3826834261417389;g[Pq>>2]=+g[Lq>>2]-+g[Oq>>2];g[tr>>2]=+g[Sq>>2]-+g[Tq>>2];g[ur>>2]=+g[Pq>>2]*.9238795042037964-+g[tr>>2]*.3826834261417389;g[yr>>2]=+g[tr>>2]*.9238795042037964+ +g[Pq>>2]*.3826834261417389;g[zq>>2]=+g[lr>>2]+ +g[yq>>2];g[vr>>2]=+g[Kq>>2]+ +g[ur>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[zq>>2]-+g[vr>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[zq>>2]+ +g[vr>>2];g[nt>>2]=+g[yr>>2]-+g[xr>>2];g[qt>>2]=+g[ot>>2]+ +g[pt>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*36<<2)>>2]=+g[nt>>2]-+g[qt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*59<<2)>>2]=+g[nt>>2]+ +g[qt>>2];g[rt>>2]=+g[ur>>2]-+g[Kq>>2];g[st>>2]=+g[pt>>2]-+g[ot>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*52<<2)>>2]=+g[rt>>2]-+g[st>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*43<<2)>>2]=+g[rt>>2]+ +g[st>>2];g[wr>>2]=+g[lr>>2]-+g[yq>>2];g[zr>>2]=+g[xr>>2]+ +g[yr>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[wr>>2]-+g[zr>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[wr>>2]+ +g[zr>>2];g[cs>>2]=+g[Ar>>2]+ +g[bs>>2];g[js>>2]=+g[fs>>2]+ +g[is>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[cs>>2]-+g[js>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[cs>>2]+ +g[js>>2];g[Hs>>2]=+g[ms>>2]-+g[ls>>2];g[kt>>2]=+g[Is>>2]+ +g[Ls>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*60<<2)>>2]=+g[Hs>>2]-+g[kt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*35<<2)>>2]=+g[Hs>>2]+ +g[kt>>2];g[lt>>2]=+g[is>>2]-+g[fs>>2];g[mt>>2]=+g[Ls>>2]-+g[Is>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*44<<2)>>2]=+g[lt>>2]-+g[mt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*51<<2)>>2]=+g[lt>>2]+ +g[mt>>2];g[ks>>2]=+g[Ar>>2]-+g[bs>>2];g[ns>>2]=+g[ls>>2]+ +g[ms>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[ks>>2]-+g[ns>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[ks>>2]+ +g[ns>>2];g[op>>2]=+g[cn>>2]+ +g[fn>>2];g[rp>>2]=(+g[pp>>2]+ +g[qp>>2])*.7071067690849304;g[sp>>2]=+g[op>>2]-+g[rp>>2];g[tq>>2]=+g[op>>2]+ +g[rp>>2];g[vp>>2]=+g[tp>>2]*.9238795042037964+ +g[up>>2]*.3826834261417389;g[yp>>2]=+g[wp>>2]*.3826834261417389-+g[xp>>2]*.9238795042037964;g[zp>>2]=+g[vp>>2]+ +g[yp>>2];g[ut>>2]=+g[vp>>2]-+g[yp>>2];g[vt>>2]=(+g[mn>>2]-+g[rn>>2])*.7071067690849304;g[yt>>2]=+g[wt>>2]+ +g[xt>>2];g[zt>>2]=+g[vt>>2]+ +g[yt>>2];g[Ft>>2]=+g[yt>>2]-+g[vt>>2];g[Uq>>2]=+g[up>>2]*.9238795042037964-+g[tp>>2]*.3826834261417389;g[Vq>>2]=+g[xp>>2]*.3826834261417389+ +g[wp>>2]*.9238795042037964;g[Wq>>2]=+g[Uq>>2]+ +g[Vq>>2];g[Et>>2]=+g[Vq>>2]-+g[Uq>>2];g[Jp>>2]=(+g[gp>>2]+ +g[bp>>2])*.7071067690849304;g[Kp>>2]=+g[Ip>>2]-+g[Jp>>2];g[$q>>2]=+g[Ip>>2]+ +g[Jp>>2];g[lq>>2]=(+g[Op>>2]+ +g[Pp>>2])*.7071067690849304;g[mq>>2]=+g[kq>>2]-+g[lq>>2];g[ar>>2]=+g[kq>>2]+ +g[lq>>2];g[nq>>2]=+g[Kp>>2]*.8314695954322815-+g[mq>>2]*.5555702447891235;g[fr>>2]=+g[$q>>2]*.9807852506637573+ +g[ar>>2]*.19509032368659973;g[rq>>2]=+g[Kp>>2]*.5555702447891235+ +g[mq>>2]*.8314695954322815;g[br>>2]=+g[$q>>2]*.19509032368659973-+g[ar>>2]*.9807852506637573;g[Cp>>2]=(+g[po>>2]+ +g[oo>>2])*.7071067690849304;g[Dp>>2]=+g[Bp>>2]-+g[Cp>>2];g[Yq>>2]=+g[Bp>>2]+ +g[Cp>>2];g[Fp>>2]=(+g[$o>>2]+ +g[go>>2])*.7071067690849304;g[Gp>>2]=+g[Ep>>2]-+g[Fp>>2];g[Zq>>2]=+g[Ep>>2]+ +g[Fp>>2];g[Hp>>2]=+g[Dp>>2]*.8314695954322815+ +g[Gp>>2]*.5555702447891235;g[er>>2]=+g[Yq>>2]*.9807852506637573-+g[Zq>>2]*.19509032368659973;g[qq>>2]=+g[Dp>>2]*.5555702447891235-+g[Gp>>2]*.8314695954322815;g[_q>>2]=+g[Yq>>2]*.19509032368659973+ +g[Zq>>2]*.9807852506637573;g[Ap>>2]=+g[sp>>2]+ +g[zp>>2];g[oq>>2]=+g[Hp>>2]+ +g[nq>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[Ap>>2]-+g[oq>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Ap>>2]+ +g[oq>>2];g[Dt>>2]=+g[rq>>2]-+g[qq>>2];g[Gt>>2]=+g[Et>>2]+ +g[Ft>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*38<<2)>>2]=+g[Dt>>2]-+g[Gt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*57<<2)>>2]=+g[Dt>>2]+ +g[Gt>>2];g[Ht>>2]=+g[nq>>2]-+g[Hp>>2];g[It>>2]=+g[Ft>>2]-+g[Et>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*54<<2)>>2]=+g[Ht>>2]-+g[It>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*41<<2)>>2]=+g[Ht>>2]+ +g[It>>2];g[pq>>2]=+g[sp>>2]-+g[zp>>2];g[sq>>2]=+g[qq>>2]+ +g[rq>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[pq>>2]-+g[sq>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[pq>>2]+ +g[sq>>2];g[Xq>>2]=+g[tq>>2]-+g[Wq>>2];g[cr>>2]=+g[_q>>2]+ +g[br>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[Xq>>2]-+g[cr>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Xq>>2]+ +g[cr>>2];g[tt>>2]=+g[br>>2]-+g[_q>>2];g[At>>2]=+g[ut>>2]+ +g[zt>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*62<<2)>>2]=+g[tt>>2]-+g[At>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*33<<2)>>2]=+g[tt>>2]+ +g[At>>2];g[Bt>>2]=+g[fr>>2]-+g[er>>2];g[Ct>>2]=+g[zt>>2]-+g[ut>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*46<<2)>>2]=+g[Bt>>2]-+g[Ct>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*49<<2)>>2]=+g[Bt>>2]+ +g[Ct>>2];g[dr>>2]=+g[tq>>2]+ +g[Wq>>2];g[gr>>2]=+g[er>>2]+ +g[fr>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[dr>>2]-+g[gr>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[dr>>2]+ +g[gr>>2];g[gn>>2]=+g[cn>>2]-+g[fn>>2];g[Tn>>2]=(+g[mn>>2]+ +g[rn>>2])*.7071067690849304;g[Un>>2]=+g[gn>>2]-+g[Tn>>2];g[Yp>>2]=+g[gn>>2]+ +g[Tn>>2];g[Eo>>2]=+g[Zn>>2]*.3826834261417389-+g[Do>>2]*.9238795042037964;g[Po>>2]=+g[Jo>>2]*.9238795042037964+ +g[Oo>>2]*.3826834261417389;g[Qo>>2]=+g[Eo>>2]+ +g[Po>>2];g[Ms>>2]=+g[Po>>2]-+g[Eo>>2];g[Ns>>2]=(+g[qp>>2]-+g[pp>>2])*.7071067690849304;g[Os>>2]=+g[xt>>2]-+g[wt>>2];g[Ps>>2]=+g[Ns>>2]+ +g[Os>>2];g[Vs>>2]=+g[Os>>2]-+g[Ns>>2];g[Zp>>2]=+g[Do>>2]*.3826834261417389+ +g[Zn>>2]*.9238795042037964;g[_p>>2]=+g[Oo>>2]*.9238795042037964-+g[Jo>>2]*.3826834261417389;g[$p>>2]=+g[Zp>>2]+ +g[_p>>2];g[Us>>2]=+g[Zp>>2]-+g[_p>>2];g[hp>>2]=(+g[bp>>2]-+g[gp>>2])*.7071067690849304;g[ip>>2]=+g[xo>>2]-+g[hp>>2];g[fq>>2]=+g[xo>>2]+ +g[hp>>2];g[Qp>>2]=(+g[Op>>2]-+g[Pp>>2])*.7071067690849304;g[Rp>>2]=+g[Np>>2]-+g[Qp>>2];g[eq>>2]=+g[Np>>2]+ +g[Qp>>2];g[Sp>>2]=+g[ip>>2]*.5555702447891235+ +g[Rp>>2]*.8314695954322815;g[mp>>2]=+g[fq>>2]*.9807852506637573+ +g[eq>>2]*.19509032368659973;g[Wp>>2]=+g[Rp>>2]*.5555702447891235-+g[ip>>2]*.8314695954322815;g[gq>>2]=+g[eq>>2]*.9807852506637573-+g[fq>>2]*.19509032368659973;g[ho>>2]=(+g[$o>>2]-+g[go>>2])*.7071067690849304;g[io>>2]=+g[Wo>>2]-+g[ho>>2];g[cq>>2]=+g[Wo>>2]+ +g[ho>>2];g[qo>>2]=(+g[oo>>2]-+g[po>>2])*.7071067690849304;g[ro>>2]=+g[no>>2]-+g[qo>>2];g[bq>>2]=+g[no>>2]+ +g[qo>>2];g[so>>2]=+g[io>>2]*.8314695954322815-+g[ro>>2]*.5555702447891235;g[lp>>2]=+g[cq>>2]*.19509032368659973-+g[bq>>2]*.9807852506637573;g[Vp>>2]=+g[ro>>2]*.8314695954322815+ +g[io>>2]*.5555702447891235;g[dq>>2]=+g[bq>>2]*.19509032368659973+ +g[cq>>2]*.9807852506637573;g[Ro>>2]=+g[Un>>2]+ +g[Qo>>2];g[Tp>>2]=+g[so>>2]+ +g[Sp>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[Ro>>2]-+g[Tp>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Ro>>2]+ +g[Tp>>2];g[Ts>>2]=+g[Wp>>2]-+g[Vp>>2];g[Ws>>2]=+g[Us>>2]+ +g[Vs>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*58<<2)>>2]=+g[Ts>>2]-+g[Ws>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*37<<2)>>2]=+g[Ts>>2]+ +g[Ws>>2];g[Xs>>2]=+g[Sp>>2]-+g[so>>2];g[Ys>>2]=+g[Vs>>2]-+g[Us>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*42<<2)>>2]=+g[Xs>>2]-+g[Ys>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*53<<2)>>2]=+g[Xs>>2]+ +g[Ys>>2];g[Up>>2]=+g[Un>>2]-+g[Qo>>2];g[Xp>>2]=+g[Vp>>2]+ +g[Wp>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[Up>>2]-+g[Xp>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Up>>2]+ +g[Xp>>2];g[aq>>2]=+g[Yp>>2]+ +g[$p>>2];g[hq>>2]=+g[dq>>2]+ +g[gq>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[aq>>2]-+g[hq>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[aq>>2]+ +g[hq>>2];g[Jt>>2]=+g[mp>>2]-+g[lp>>2];g[Qs>>2]=+g[Ms>>2]+ +g[Ps>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*34<<2)>>2]=+g[Jt>>2]-+g[Qs>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*61<<2)>>2]=+g[Jt>>2]+ +g[Qs>>2];g[Rs>>2]=+g[gq>>2]-+g[dq>>2];g[Ss>>2]=+g[Ps>>2]-+g[Ms>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*50<<2)>>2]=+g[Rs>>2]-+g[Ss>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*45<<2)>>2]=+g[Rs>>2]+ +g[Ss>>2];g[iq>>2]=+g[Yp>>2]-+g[$p>>2];g[np>>2]=+g[lp>>2]+ +g[mp>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[iq>>2]-+g[np>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[iq>>2]+ +g[np>>2];g[hm>>2]=+g[yl>>2]-+g[Dk>>2];g[Lm>>2]=+g[Jm>>2]+ +g[Km>>2];g[Mm>>2]=+g[hm>>2]-+g[Lm>>2];g[Nn>>2]=+g[hm>>2]+ +g[Lm>>2];g[Wt>>2]=+g[Hk>>2]-+g[Kk>>2];g[Xt>>2]=+g[tu>>2]-+g[su>>2];g[Yt>>2]=+g[Wt>>2]+ +g[Xt>>2];g[cu>>2]=+g[Xt>>2]-+g[Wt>>2];g[Nm>>2]=+g[Qk>>2]-+g[Rk>>2];g[Om>>2]=+g[Nk>>2]-+g[Ok>>2];g[Pm>>2]=+g[Nm>>2]*.8314695954322815+ +g[Om>>2]*.5555702447891235;g[On>>2]=+g[Om>>2]*.8314695954322815-+g[Nm>>2]*.5555702447891235;g[Qm>>2]=+g[Xk>>2]-+g[Yk>>2];g[Rm>>2]=+g[Uk>>2]-+g[Vk>>2];g[Sm>>2]=+g[Qm>>2]*.5555702447891235-+g[Rm>>2]*.8314695954322815;g[Pn>>2]=+g[Rm>>2]*.5555702447891235+ +g[Qm>>2]*.8314695954322815;g[sn>>2]=+g[Pm>>2]+ +g[Sm>>2];g[bu>>2]=+g[Pn>>2]-+g[On>>2];g[Qn>>2]=+g[On>>2]+ +g[Pn>>2];g[Vt>>2]=+g[Pm>>2]-+g[Sm>>2];g[un>>2]=+g[lm>>2]-+g[mm>>2];g[vn>>2]=+g[Hl>>2]-+g[im>>2];g[wn>>2]=+g[un>>2]-+g[vn>>2];g[Um>>2]=+g[un>>2]+ +g[vn>>2];g[xn>>2]=+g[Cl>>2]-+g[Dl>>2];g[yn>>2]=+g[pm>>2]-+g[om>>2];g[zn>>2]=+g[xn>>2]-+g[yn>>2];g[Tm>>2]=+g[xn>>2]+ +g[yn>>2];g[An>>2]=+g[wn>>2]*.4713967442512512+ +g[zn>>2]*.8819212913513184;g[$m>>2]=+g[Um>>2]*.9569403529167175+ +g[Tm>>2]*.290284663438797;g[Kn>>2]=+g[zn>>2]*.4713967442512512-+g[wn>>2]*.8819212913513184;g[Vm>>2]=+g[Tm>>2]*.9569403529167175-+g[Um>>2]*.290284663438797;g[Bn>>2]=+g[tm>>2]-+g[um>>2];g[Cn>>2]=+g[Kl>>2]-+g[Hm>>2];g[Dn>>2]=+g[Bn>>2]-+g[Cn>>2];g[Xm>>2]=+g[Bn>>2]+ +g[Cn>>2];g[En>>2]=+g[Em>>2]-+g[Fm>>2];g[Fn>>2]=+g[ym>>2]-+g[Bm>>2];g[Gn>>2]=+g[En>>2]-+g[Fn>>2];g[Wm>>2]=+g[En>>2]+ +g[Fn>>2];g[Hn>>2]=+g[Dn>>2]*.8819212913513184-+g[Gn>>2]*.4713967442512512;g[an>>2]=+g[Xm>>2]*.290284663438797-+g[Wm>>2]*.9569403529167175;g[Ln>>2]=+g[Gn>>2]*.8819212913513184+ +g[Dn>>2]*.4713967442512512;g[Ym>>2]=+g[Wm>>2]*.290284663438797+ +g[Xm>>2]*.9569403529167175;g[tn>>2]=+g[Mm>>2]+ +g[sn>>2];g[In>>2]=+g[An>>2]+ +g[Hn>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[tn>>2]-+g[In>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[tn>>2]+ +g[In>>2];g[au>>2]=+g[Ln>>2]-+g[Kn>>2];g[du>>2]=+g[bu>>2]+ +g[cu>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*37<<2)>>2]=+g[au>>2]-+g[du>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*58<<2)>>2]=+g[au>>2]+ +g[du>>2];g[eu>>2]=+g[Hn>>2]-+g[An>>2];g[fu>>2]=+g[cu>>2]-+g[bu>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*53<<2)>>2]=+g[eu>>2]-+g[fu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*42<<2)>>2]=+g[eu>>2]+ +g[fu>>2];g[Jn>>2]=+g[Mm>>2]-+g[sn>>2];g[Mn>>2]=+g[Kn>>2]+ +g[Ln>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[Jn>>2]-+g[Mn>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Jn>>2]+ +g[Mn>>2];g[Rn>>2]=+g[Nn>>2]+ +g[Qn>>2];g[Zm>>2]=+g[Vm>>2]+ +g[Ym>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[Rn>>2]-+g[Zm>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Rn>>2]+ +g[Zm>>2];g[Fu>>2]=+g[an>>2]-+g[$m>>2];g[Zt>>2]=+g[Vt>>2]+ +g[Yt>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*61<<2)>>2]=+g[Fu>>2]-+g[Zt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*34<<2)>>2]=+g[Fu>>2]+ +g[Zt>>2];g[_t>>2]=+g[Ym>>2]-+g[Vm>>2];g[$t>>2]=+g[Yt>>2]-+g[Vt>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*45<<2)>>2]=+g[_t>>2]-+g[$t>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*50<<2)>>2]=+g[_t>>2]+ +g[$t>>2];g[_m>>2]=+g[Nn>>2]-+g[Qn>>2];g[bn>>2]=+g[$m>>2]+ +g[an>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[_m>>2]-+g[bn>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[_m>>2]+ +g[bn>>2];g[Ek>>2]=+g[yl>>2]+ +g[Dk>>2];g[Lk>>2]=+g[Hk>>2]+ +g[Kk>>2];g[Mk>>2]=+g[Ek>>2]-+g[Lk>>2];g[Tl>>2]=+g[Ek>>2]+ +g[Lk>>2];g[ru>>2]=+g[Km>>2]-+g[Jm>>2];g[uu>>2]=+g[su>>2]+ +g[tu>>2];g[vu>>2]=+g[ru>>2]+ +g[uu>>2];g[Bu>>2]=+g[uu>>2]-+g[ru>>2];g[Pk>>2]=+g[Nk>>2]+ +g[Ok>>2];g[Sk>>2]=+g[Qk>>2]+ +g[Rk>>2];g[Tk>>2]=+g[Pk>>2]*.19509032368659973-+g[Sk>>2]*.9807852506637573;g[Ul>>2]=+g[Sk>>2]*.19509032368659973+ +g[Pk>>2]*.9807852506637573;g[Wk>>2]=+g[Uk>>2]+ +g[Vk>>2];g[Zk>>2]=+g[Xk>>2]+ +g[Yk>>2];g[_k>>2]=+g[Wk>>2]*.9807852506637573+ +g[Zk>>2]*.19509032368659973;g[Vl>>2]=+g[Zk>>2]*.9807852506637573-+g[Wk>>2]*.19509032368659973;g[Al>>2]=+g[Tk>>2]+ +g[_k>>2];g[Au>>2]=+g[Ul>>2]-+g[Vl>>2];g[Wl>>2]=+g[Ul>>2]+ +g[Vl>>2];g[qu>>2]=+g[_k>>2]-+g[Tk>>2];g[El>>2]=+g[Cl>>2]+ +g[Dl>>2];g[jm>>2]=+g[Hl>>2]+ +g[im>>2];g[km>>2]=+g[El>>2]-+g[jm>>2];g[Yl>>2]=+g[El>>2]+ +g[jm>>2];g[nm>>2]=+g[lm>>2]+ +g[mm>>2];g[qm>>2]=+g[om>>2]+ +g[pm>>2];g[rm>>2]=+g[nm>>2]-+g[qm>>2];g[Zl>>2]=+g[nm>>2]+ +g[qm>>2];g[sm>>2]=+g[km>>2]*.6343932747840881+ +g[rm>>2]*.7730104327201843;g[em>>2]=+g[Yl>>2]*.0980171412229538-+g[Zl>>2]*.9951847195625305;g[Ql>>2]=+g[km>>2]*.7730104327201843-+g[rm>>2]*.6343932747840881;g[_l>>2]=+g[Yl>>2]*.9951847195625305+ +g[Zl>>2]*.0980171412229538;g[vm>>2]=+g[tm>>2]+ +g[um>>2];g[Cm>>2]=+g[ym>>2]+ +g[Bm>>2];g[Dm>>2]=+g[vm>>2]-+g[Cm>>2];g[$l>>2]=+g[vm>>2]+ +g[Cm>>2];g[Gm>>2]=+g[Em>>2]+ +g[Fm>>2];g[Ll>>2]=+g[Hm>>2]+ +g[Kl>>2];g[Ml>>2]=+g[Gm>>2]-+g[Ll>>2];g[am>>2]=+g[Gm>>2]+ +g[Ll>>2];g[Nl>>2]=+g[Dm>>2]*.6343932747840881-+g[Ml>>2]*.7730104327201843;g[fm>>2]=+g[$l>>2]*.0980171412229538+ +g[am>>2]*.9951847195625305;g[Rl>>2]=+g[Dm>>2]*.7730104327201843+ +g[Ml>>2]*.6343932747840881;g[bm>>2]=+g[$l>>2]*.9951847195625305-+g[am>>2]*.0980171412229538;g[Bl>>2]=+g[Mk>>2]-+g[Al>>2];g[Ol>>2]=+g[sm>>2]+ +g[Nl>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[Bl>>2]-+g[Ol>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Bl>>2]+ +g[Ol>>2];g[zu>>2]=+g[Nl>>2]-+g[sm>>2];g[Cu>>2]=+g[Au>>2]+ +g[Bu>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*57<<2)>>2]=+g[zu>>2]-+g[Cu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*38<<2)>>2]=+g[zu>>2]+ +g[Cu>>2];g[Du>>2]=+g[Rl>>2]-+g[Ql>>2];g[Eu>>2]=+g[Bu>>2]-+g[Au>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*41<<2)>>2]=+g[Du>>2]-+g[Eu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*54<<2)>>2]=+g[Du>>2]+ +g[Eu>>2];g[Pl>>2]=+g[Mk>>2]+ +g[Al>>2];g[Sl>>2]=+g[Ql>>2]+ +g[Rl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[Pl>>2]-+g[Sl>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Pl>>2]+ +g[Sl>>2];g[Xl>>2]=+g[Tl>>2]+ +g[Wl>>2];g[cm>>2]=+g[_l>>2]+ +g[bm>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[Xl>>2]-+g[cm>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[Xl>>2]+ +g[cm>>2];g[pu>>2]=+g[fm>>2]-+g[em>>2];g[wu>>2]=+g[qu>>2]+ +g[vu>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*33<<2)>>2]=+g[pu>>2]-+g[wu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*62<<2)>>2]=+g[pu>>2]+ +g[wu>>2];g[xu>>2]=+g[bm>>2]-+g[_l>>2];g[yu>>2]=+g[vu>>2]-+g[qu>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*49<<2)>>2]=+g[xu>>2]-+g[yu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*46<<2)>>2]=+g[xu>>2]+ +g[yu>>2];g[dm>>2]=+g[Tl>>2]-+g[Wl>>2];g[gm>>2]=+g[em>>2]+ +g[fm>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[dm>>2]-+g[gm>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[dm>>2]+ +g[gm>>2];g[fj>>2]=+g[Uf>>2]-+g[dg>>2];g[ij>>2]=+g[gj>>2]+ +g[hj>>2];g[jj>>2]=+g[fj>>2]-+g[ij>>2];g[il>>2]=+g[fj>>2]+ +g[ij>>2];g[St>>2]=+g[Cf>>2]-+g[pg>>2];g[Tt>>2]=+g[dt>>2]-+g[at>>2];g[Ut>>2]=+g[St>>2]+ +g[Tt>>2];g[lu>>2]=+g[Tt>>2]-+g[St>>2];g[kj>>2]=+g[Jf>>2]-+g[tg>>2];g[lj>>2]=+g[zg>>2]-+g[ah>>2];g[mj>>2]=+g[kj>>2]*.5555702447891235-+g[lj>>2]*.8314695954322815;g[jl>>2]=+g[kj>>2]*.8314695954322815+ +g[lj>>2]*.5555702447891235;g[nj>>2]=+g[hh>>2]-+g[sh>>2];g[oj>>2]=+g[yh>>2]-+g[Dg>>2];g[pj>>2]=+g[nj>>2]*.5555702447891235+ +g[oj>>2]*.8314695954322815;g[kl>>2]=+g[nj>>2]*.8314695954322815-+g[oj>>2]*.5555702447891235;g[qj>>2]=+g[mj>>2]+ +g[pj>>2];g[ku>>2]=+g[jl>>2]-+g[kl>>2];g[ll>>2]=+g[jl>>2]+ +g[kl>>2];g[Rt>>2]=+g[pj>>2]-+g[mj>>2];g[rk>>2]=+g[yi>>2]-+g[Bi>>2];g[sk>>2]=+g[Ih>>2]-+g[ri>>2];g[tk>>2]=+g[rk>>2]-+g[sk>>2];g[nl>>2]=+g[rk>>2]+ +g[sk>>2];g[uk>>2]=+g[Mg>>2]-+g[Xg>>2];g[vk>>2]=+g[Ei>>2]-+g[Di>>2];g[wk>>2]=+g[uk>>2]-+g[vk>>2];g[ol>>2]=+g[uk>>2]+ +g[vk>>2];g[xk>>2]=+g[tk>>2]*.4713967442512512+ +g[wk>>2]*.8819212913513184;g[vl>>2]=+g[nl>>2]*.290284663438797-+g[ol>>2]*.9569403529167175;g[fl>>2]=+g[tk>>2]*.8819212913513184-+g[wk>>2]*.4713967442512512;g[pl>>2]=+g[nl>>2]*.9569403529167175+ +g[ol>>2]*.290284663438797;g[yk>>2]=+g[Oh>>2]-+g[Zh>>2];g[zk>>2]=+g[Fj>>2]-+g[Gj>>2];g[Ak>>2]=+g[yk>>2]-+g[zk>>2];g[ql>>2]=+g[yk>>2]+ +g[zk>>2];g[$k>>2]=+g[Aj>>2]-+g[Dj>>2];g[al>>2]=+g[tj>>2]-+g[Ki>>2];g[bl>>2]=+g[$k>>2]-+g[al>>2];g[rl>>2]=+g[$k>>2]+ +g[al>>2];g[cl>>2]=+g[Ak>>2]*.4713967442512512-+g[bl>>2]*.8819212913513184;g[wl>>2]=+g[ql>>2]*.290284663438797+ +g[rl>>2]*.9569403529167175;g[gl>>2]=+g[Ak>>2]*.8819212913513184+ +g[bl>>2]*.4713967442512512;g[sl>>2]=+g[ql>>2]*.9569403529167175-+g[rl>>2]*.290284663438797;g[rj>>2]=+g[jj>>2]-+g[qj>>2];g[dl>>2]=+g[xk>>2]+ +g[cl>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[rj>>2]-+g[dl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[rj>>2]+ +g[dl>>2];g[ju>>2]=+g[cl>>2]-+g[xk>>2];g[mu>>2]=+g[ku>>2]+ +g[lu>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*59<<2)>>2]=+g[ju>>2]-+g[mu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*36<<2)>>2]=+g[ju>>2]+ +g[mu>>2];g[nu>>2]=+g[gl>>2]-+g[fl>>2];g[ou>>2]=+g[lu>>2]-+g[ku>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*43<<2)>>2]=+g[nu>>2]-+g[ou>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*52<<2)>>2]=+g[nu>>2]+ +g[ou>>2];g[el>>2]=+g[jj>>2]+ +g[qj>>2];g[hl>>2]=+g[fl>>2]+ +g[gl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[el>>2]-+g[hl>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[el>>2]+ +g[hl>>2];g[ml>>2]=+g[il>>2]+ +g[ll>>2];g[tl>>2]=+g[pl>>2]+ +g[sl>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[ml>>2]-+g[tl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ml>>2]+ +g[tl>>2];g[Qt>>2]=+g[wl>>2]-+g[vl>>2];g[gu>>2]=+g[Rt>>2]+ +g[Ut>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*35<<2)>>2]=+g[Qt>>2]-+g[gu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*60<<2)>>2]=+g[Qt>>2]+ +g[gu>>2];g[hu>>2]=+g[sl>>2]-+g[pl>>2];g[iu>>2]=+g[Ut>>2]-+g[Rt>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*51<<2)>>2]=+g[hu>>2]-+g[iu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*44<<2)>>2]=+g[hu>>2]+ +g[iu>>2];g[ul>>2]=+g[il>>2]-+g[ll>>2];g[xl>>2]=+g[vl>>2]+ +g[wl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[ul>>2]-+g[xl>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[ul>>2]+ +g[xl>>2];c[Xu>>2]=(c[Xu>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+504;c[n>>2]=c[n>>2]^c[2998]}i=Yu;return}function Zr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,45,5464);i=b;return}function _r(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0;ta=i;i=i+272|0;k=ta+260|0;l=ta+256|0;m=ta+252|0;n=ta+248|0;ua=ta+244|0;o=ta+240|0;p=ta+236|0;sa=ta+224|0;w=ta+220|0;R=ta+216|0;na=ta+212|0;N=ta+208|0;la=ta+204|0;I=ta+200|0;pa=ta+196|0;E=ta+192|0;aa=ta+188|0;H=ta+184|0;oa=ta+180|0;B=ta+176|0;q=ta+172|0;L=ta+168|0;v=ta+164|0;M=ta+160|0;s=ta+156|0;u=ta+152|0;r=ta+148|0;t=ta+144|0;fa=ta+140|0;D=ta+136|0;ka=ta+132|0;C=ta+128|0;ca=ta+124|0;ea=ta+120|0;ba=ta+116|0;da=ta+112|0;ha=ta+108|0;ja=ta+104|0;ga=ta+100|0;ia=ta+96|0;W=ta+92|0;z=ta+88|0;$=ta+84|0;A=ta+80|0;y=ta+76|0;V=ta+72|0;x=ta+68|0;U=ta+64|0;Y=ta+60|0;_=ta+56|0;X=ta+52|0;Z=ta+48|0;F=ta+44|0;ma=ta+40|0;ra=ta+36|0;J=ta+32|0;qa=ta+28|0;G=ta+24|0;O=ta+20|0;K=ta+16|0;P=ta+12|0;Q=ta+8|0;S=ta+4|0;T=ta;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[ua>>2]=f;c[o>>2]=h;c[p>>2]=j;g[ta+232>>2]=.5;g[ta+228>>2]=.8660253882408142;c[sa>>2]=c[ua>>2];c[m>>2]=(c[m>>2]|0)+(((c[ua>>2]|0)-1|0)*10<<2);while(1){if((c[sa>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[L>>2]=+g[c[l>>2]>>2];g[s>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[u>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[r>>2]=+g[(c[m>>2]|0)+16>>2];g[t>>2]=+g[(c[m>>2]|0)+20>>2];g[v>>2]=+g[r>>2]*+g[s>>2]+ +g[t>>2]*+g[u>>2];g[M>>2]=+g[r>>2]*+g[u>>2]-+g[t>>2]*+g[s>>2];g[w>>2]=+g[q>>2]-+g[v>>2];g[R>>2]=+g[M>>2]+ +g[L>>2];g[na>>2]=+g[q>>2]+ +g[v>>2];g[N>>2]=+g[L>>2]-+g[M>>2];g[ca>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ea>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ba>>2]=+g[(c[m>>2]|0)+24>>2];g[da>>2]=+g[(c[m>>2]|0)+28>>2];g[fa>>2]=+g[ba>>2]*+g[ca>>2]+ +g[da>>2]*+g[ea>>2];g[D>>2]=+g[ba>>2]*+g[ea>>2]-+g[da>>2]*+g[ca>>2];g[ha>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[ja>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[ga>>2]=+g[c[m>>2]>>2];g[ia>>2]=+g[(c[m>>2]|0)+4>>2];g[ka>>2]=+g[ga>>2]*+g[ha>>2]+ +g[ia>>2]*+g[ja>>2];g[C>>2]=+g[ga>>2]*+g[ja>>2]-+g[ia>>2]*+g[ha>>2];g[la>>2]=+g[fa>>2]-+g[ka>>2];g[I>>2]=+g[D>>2]+ +g[C>>2];g[pa>>2]=+g[fa>>2]+ +g[ka>>2];g[E>>2]=+g[C>>2]-+g[D>>2];g[y>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[V>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[x>>2]=+g[(c[m>>2]|0)+8>>2];g[U>>2]=+g[(c[m>>2]|0)+12>>2];g[W>>2]=+g[x>>2]*+g[y>>2]+ +g[U>>2]*+g[V>>2];g[z>>2]=+g[x>>2]*+g[V>>2]-+g[U>>2]*+g[y>>2];g[Y>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[_>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[X>>2]=+g[(c[m>>2]|0)+32>>2];g[Z>>2]=+g[(c[m>>2]|0)+36>>2];g[$>>2]=+g[X>>2]*+g[Y>>2]+ +g[Z>>2]*+g[_>>2];g[A>>2]=+g[X>>2]*+g[_>>2]-+g[Z>>2]*+g[Y>>2];g[aa>>2]=+g[W>>2]-+g[$>>2];g[H>>2]=+g[z>>2]+ +g[A>>2];g[oa>>2]=+g[W>>2]+ +g[$>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[F>>2]=(+g[B>>2]+ +g[E>>2])*.8660253882408142;g[ma>>2]=+g[aa>>2]+ +g[la>>2];g[ra>>2]=+g[w>>2]-+g[ma>>2]*.5;g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[w>>2]+ +g[ma>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[ra>>2]+ +g[F>>2];g[c[l>>2]>>2]=+g[ra>>2]-+g[F>>2];g[J>>2]=(+g[H>>2]-+g[I>>2])*.8660253882408142;g[qa>>2]=+g[oa>>2]+ +g[pa>>2];g[G>>2]=+g[na>>2]-+g[qa>>2]*.5;g[c[k>>2]>>2]=+g[na>>2]+ +g[qa>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[G>>2]+ +g[J>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[G>>2]-+g[J>>2];g[O>>2]=(+g[la>>2]-+g[aa>>2])*.8660253882408142;g[K>>2]=+g[E>>2]-+g[B>>2];g[P>>2]=+g[K>>2]*.5+ +g[N>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[K>>2]-+g[N>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[O>>2]+ +g[P>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[O>>2]-+g[P>>2];g[Q>>2]=(+g[oa>>2]-+g[pa>>2])*.8660253882408142;g[S>>2]=+g[H>>2]+ +g[I>>2];g[T>>2]=+g[R>>2]-+g[S>>2]*.5;g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Q>>2]-+g[T>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[S>>2]+ +g[R>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Q>>2]+ +g[T>>2];c[sa>>2]=(c[sa>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+40}i=ta;return}function $r(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,46,5512);i=b;return} +function gu(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0,Ii=0,Ji=0,Ki=0,Li=0,Mi=0,Ni=0,Oi=0,Pi=0,Qi=0,Ri=0,Si=0,Ti=0,Ui=0,Vi=0,Wi=0,Xi=0,Yi=0,Zi=0,_i=0,$i=0,aj=0,bj=0,cj=0,dj=0,ej=0,fj=0,gj=0,hj=0,ij=0,jj=0,kj=0,lj=0,mj=0,nj=0,oj=0,pj=0,qj=0,rj=0,sj=0,tj=0,uj=0,vj=0,wj=0,xj=0,yj=0,zj=0,Aj=0,Bj=0,Cj=0,Dj=0,Ej=0,Fj=0,Gj=0,Hj=0,Ij=0,Jj=0,Kj=0,Lj=0,Mj=0,Nj=0,Oj=0,Pj=0,Qj=0,Rj=0,Sj=0,Tj=0,Uj=0,Vj=0,Wj=0,Xj=0,Yj=0,Zj=0,_j=0,$j=0,ak=0,bk=0,ck=0,dk=0,ek=0,fk=0,gk=0,hk=0,ik=0,jk=0,kk=0,lk=0,mk=0,nk=0,ok=0,pk=0,qk=0,rk=0,sk=0,tk=0,uk=0,vk=0,wk=0,xk=0,yk=0,zk=0,Ak=0,Bk=0,Ck=0,Dk=0,Ek=0,Fk=0,Gk=0,Hk=0,Ik=0,Jk=0,Kk=0,Lk=0,Mk=0,Nk=0,Ok=0,Pk=0,Qk=0,Rk=0,Sk=0,Tk=0,Uk=0,Vk=0,Wk=0,Xk=0,Yk=0,Zk=0,_k=0,$k=0,al=0,bl=0,cl=0,dl=0,el=0,fl=0,gl=0,hl=0,il=0,jl=0,kl=0,ll=0,ml=0,nl=0,ol=0,pl=0,ql=0,rl=0,sl=0,tl=0,ul=0,vl=0,wl=0,xl=0,yl=0,zl=0,Al=0,Bl=0,Cl=0,Dl=0,El=0,Fl=0,Gl=0,Hl=0,Il=0,Jl=0,Kl=0,Ll=0,Ml=0,Nl=0,Ol=0,Pl=0,Ql=0,Rl=0,Sl=0,Tl=0,Ul=0,Vl=0,Wl=0,Xl=0,Yl=0,Zl=0,_l=0,$l=0,am=0,bm=0,cm=0,dm=0,em=0,fm=0,gm=0,hm=0,im=0,jm=0,km=0,lm=0,mm=0,nm=0,om=0,pm=0,qm=0,rm=0,sm=0,tm=0,um=0,vm=0,wm=0,xm=0,ym=0,zm=0,Am=0,Bm=0,Cm=0,Dm=0,Em=0,Fm=0,Gm=0,Hm=0,Im=0,Jm=0,Km=0,Lm=0,Mm=0,Nm=0,Om=0,Pm=0,Qm=0,Rm=0,Sm=0,Tm=0,Um=0,Vm=0,Wm=0,Xm=0,Ym=0,Zm=0,_m=0,$m=0,an=0,bn=0,cn=0,dn=0,en=0,fn=0,gn=0,hn=0,jn=0,kn=0,ln=0,mn=0,nn=0,on=0,pn=0,qn=0,rn=0,sn=0,tn=0,un=0,vn=0,wn=0,xn=0,yn=0,zn=0,An=0,Bn=0,Cn=0,Dn=0,En=0,Fn=0,Gn=0,Hn=0,In=0,Jn=0,Kn=0,Ln=0,Mn=0,Nn=0,On=0,Pn=0,Qn=0,Rn=0,Sn=0,Tn=0,Un=0,Vn=0,Wn=0,Xn=0,Yn=0,Zn=0,_n=0,$n=0,ao=0,bo=0,co=0,eo=0,fo=0,go=0,ho=0,io=0,jo=0,ko=0,lo=0,mo=0,no=0,oo=0,po=0,qo=0,ro=0,so=0,to=0,uo=0,vo=0,wo=0,xo=0,yo=0,zo=0,Ao=0,Bo=0,Co=0,Do=0,Eo=0,Fo=0,Go=0,Ho=0,Io=0,Jo=0,Ko=0,Lo=0,Mo=0,No=0,Oo=0,Po=0,Qo=0,Ro=0,So=0,To=0,Uo=0,Vo=0,Wo=0,Xo=0,Yo=0,Zo=0,_o=0,$o=0,ap=0,bp=0,cp=0,dp=0,ep=0,fp=0,gp=0,hp=0,ip=0,jp=0,kp=0,lp=0,mp=0,np=0,op=0,pp=0,qp=0,rp=0,sp=0,tp=0,up=0,vp=0,wp=0,xp=0,yp=0,zp=0,Ap=0,Bp=0,Cp=0,Dp=0,Ep=0,Fp=0,Gp=0,Hp=0,Ip=0,Jp=0,Kp=0,Lp=0,Mp=0,Np=0,Op=0,Pp=0,Qp=0,Rp=0,Sp=0,Tp=0,Up=0,Vp=0,Wp=0,Xp=0,Yp=0,Zp=0,_p=0,$p=0,aq=0,bq=0,cq=0,dq=0,eq=0,fq=0,gq=0,hq=0,iq=0,jq=0,kq=0,lq=0,mq=0,nq=0,oq=0,pq=0,qq=0,rq=0,sq=0,tq=0,uq=0,vq=0,wq=0,xq=0,yq=0,zq=0,Aq=0,Bq=0,Cq=0,Dq=0,Eq=0,Fq=0,Gq=0,Hq=0,Iq=0,Jq=0,Kq=0,Lq=0,Mq=0,Nq=0,Oq=0,Pq=0,Qq=0,Rq=0,Sq=0,Tq=0,Uq=0,Vq=0,Wq=0,Xq=0,Yq=0,Zq=0,_q=0,$q=0,ar=0,br=0,cr=0,dr=0,er=0,fr=0,gr=0,hr=0,ir=0,jr=0,kr=0,lr=0,mr=0,nr=0,or=0,pr=0,qr=0,rr=0,sr=0,tr=0,ur=0,vr=0,wr=0,xr=0,yr=0,zr=0,Ar=0,Br=0,Cr=0,Dr=0,Er=0,Fr=0,Gr=0,Hr=0,Ir=0,Jr=0,Kr=0,Lr=0,Mr=0,Nr=0,Or=0,Pr=0,Qr=0,Rr=0,Sr=0,Tr=0,Ur=0,Vr=0,Wr=0,Xr=0,Yr=0,Zr=0,_r=0,$r=0,as=0,bs=0,cs=0,ds=0,es=0,fs=0,gs=0,hs=0,is=0,js=0,ks=0,ls=0,ms=0,ns=0,os=0,ps=0,qs=0,rs=0,ss=0,ts=0,us=0,vs=0,ws=0,xs=0,ys=0,zs=0,As=0,Bs=0,Cs=0,Ds=0,Es=0,Fs=0,Gs=0,Hs=0,Is=0,Js=0,Ks=0,Ls=0,Ms=0,Ns=0,Os=0,Ps=0,Qs=0,Rs=0,Ss=0,Ts=0,Us=0,Vs=0,Ws=0,Xs=0,Ys=0,Zs=0,_s=0,$s=0,at=0,bt=0,ct=0,dt=0,et=0,ft=0,gt=0,ht=0,it=0,jt=0,kt=0,lt=0,mt=0,nt=0,ot=0,pt=0,qt=0,rt=0,st=0,tt=0,ut=0,vt=0,wt=0,xt=0,yt=0,zt=0,At=0,Bt=0,Ct=0,Dt=0,Et=0,Ft=0,Gt=0,Ht=0,It=0,Jt=0,Kt=0,Lt=0,Mt=0,Nt=0,Ot=0,Pt=0,Qt=0,Rt=0,St=0,Tt=0,Ut=0,Vt=0,Wt=0,Xt=0,Yt=0,Zt=0,_t=0,$t=0,au=0,bu=0,cu=0,du=0,eu=0,fu=0,gu=0,hu=0,iu=0,ju=0,ku=0,lu=0,mu=0,nu=0,ou=0,pu=0,qu=0,ru=0,su=0,tu=0,uu=0,vu=0,wu=0,xu=0,yu=0,zu=0,Au=0,Bu=0,Cu=0,Du=0,Eu=0,Fu=0,Gu=0,Hu=0,Iu=0,Ju=0,Ku=0,Lu=0,Mu=0,Nu=0,Ou=0,Pu=0,Qu=0,Ru=0,Su=0,Tu=0,Uu=0,Vu=0,Wu=0,Xu=0,Yu=0,Zu=0;Yu=i;i=i+4752|0;k=Yu+4744|0;l=Yu+4740|0;m=Yu+4736|0;n=Yu+4732|0;Zu=Yu+4728|0;o=Yu+4724|0;p=Yu+4720|0;Xu=Yu+4656|0;jq=Yu+4652|0;Nh=Yu+4648|0;Uq=Yu+4644|0;ut=Yu+4640|0;hs=Yu+4636|0;Ys=Yu+4632|0;kb=Yu+4628|0;xe=Yu+4624|0;kd=Yu+4620|0;Zf=Yu+4616|0;fl=Yu+4612|0;go=Yu+4608|0;Tm=Yu+4604|0;hp=Yu+4600|0;Rg=Yu+4596|0;Oi=Yu+4592|0;ak=Yu+4588|0;oh=Yu+4584|0;Rb=Yu+4580|0;pd=Yu+4576|0;nr=Yu+4572|0;xt=Yu+4568|0;Fe=Yu+4564|0;ag=Yu+4560|0;Al=Yu+4556|0;mo=Yu+4552|0;kr=Yu+4548|0;yt=Yu+4544|0;Vh=Yu+4540|0;Ri=Yu+4536|0;Hl=Yu+4532|0;no=Yu+4528|0;pk=Yu+4524|0;lh=Yu+4520|0;Za=Yu+4516|0;qd=Yu+4512|0;gr=Yu+4508|0;At=Yu+4504|0;Ce=Yu+4500|0;bg=Yu+4496|0;Ik=Yu+4492|0;jo=Yu+4488|0;dr=Yu+4484|0;Bt=Yu+4480|0;Sh=Yu+4476|0;Si=Yu+4472|0;Pk=Yu+4468|0;ko=Yu+4464|0;Ru=Yu+4460|0;Pi=Yu+4456|0;$q=Yu+4452|0;Zs=Yu+4448|0;ks=Yu+4444|0;vt=Yu+4440|0;Db=Yu+4436|0;_f=Yu+4432|0;nd=Yu+4428|0;ye=Yu+4424|0;ul=Yu+4420|0;ip=Yu+4416|0;Wm=Yu+4412|0;ho=Yu+4408|0;Ug=Yu+4404|0;Oh=Yu+4400|0;M=Yu+4396|0;Aa=Yu+4392|0;Cg=Yu+4388|0;Fg=Yu+4384|0;Ig=Yu+4380|0;Jg=Yu+4376|0;Sc=Yu+4372|0;lf=Yu+4368|0;ur=Yu+4364|0;Ft=Yu+4360|0;Br=Yu+4356|0;Jt=Yu+4352|0;yr=Yu+4348|0;Gt=Yu+4344|0;Td=Yu+4340|0;Ie=Yu+4336|0;Jd=Yu+4332|0;jf=Yu+4328|0;Wd=Yu+4324|0;mf=Yu+4320|0;gm=Yu+4316|0;uo=Yu+4312|0;vn=Yu+4308|0;so=Yu+4304|0;Oq=Yu+4300|0;It=Yu+4296|0;_h=Yu+4292|0;Rj=Yu+4288|0;bi=Yu+4284|0;Ui=Yu+4280|0;Dn=Yu+4276|0;ro=Yu+4272|0;Gn=Yu+4268|0;vo=Yu+4264|0;fa=Yu+4260|0;ua=Yu+4256|0;rh=Yu+4252|0;uh=Yu+4248|0;xh=Yu+4244|0;yh=Yu+4240|0;Ac=Yu+4236|0;sf=Yu+4232|0;Cq=Yu+4228|0;Ps=Yu+4224|0;Jq=Yu+4220|0;Ss=Yu+4216|0;Gq=Yu+4212|0;Os=Yu+4208|0;dc=Yu+4204|0;pf=Yu+4200|0;Vb=Yu+4196|0;qf=Yu+4192|0;gc=Yu+4188|0;Tf=Yu+4184|0;tm=Yu+4180|0;Bo=Yu+4176|0;Kl=Yu+4172|0;zo=Yu+4168|0;vq=Yu+4164|0;Rs=Yu+4160|0;fi=Yu+4156|0;Wi=Yu+4152|0;ii=Yu+4148|0;Xi=Yu+4144|0;Sl=Yu+4140|0;yo=Yu+4136|0;Vl=Yu+4132|0;bp=Yu+4128|0;Ib=Yu+4124|0;xk=Yu+4120|0;hf=Yu+4116|0;Mn=Yu+4112|0;id=Yu+4108|0;yk=Yu+4104|0;fd=Yu+4100|0;Nn=Yu+4096|0;Sn=Yu+4092|0;Qn=Yu+4088|0;Ha=Yu+4084|0;dl=Yu+4080|0;qk=Yu+4076|0;Pn=Yu+4072|0;ib=Yu+4068|0;al=Yu+4064|0;q=Yu+4060|0;za=Yu+4056|0;dd=Yu+4052|0;ed=Yu+4048|0;Rc=Yu+4044|0;_d=Yu+4040|0;gd=Yu+4036|0;hd=Yu+4032|0;zl=Yu+4028|0;Im=Yu+4024|0;bl=Yu+4020|0;Fa=Yu+4016|0;Ga=Yu+4012|0;cl=Yu+4008|0;Ah=Yu+4004|0;Ji=Yu+4e3|0;Ak=Yu+3996|0;Ia=Yu+3992|0;Ja=Yu+3988|0;$k=Yu+3984|0;rg=Yu+3980|0;ap=Yu+3976|0;sq=Yu+3972|0;tq=Yu+3968|0;fs=Yu+3964|0;gs=Yu+3960|0;Ea=Yu+3956|0;jb=Yu+3952|0;cd=Yu+3948|0;jd=Yu+3944|0;zk=Yu+3940|0;el=Yu+3936|0;On=Yu+3932|0;Rn=Yu+3928|0;Pg=Yu+3924|0;Qg=Yu+3920|0;Uj=Yu+3916|0;Bl=Yu+3912|0;hb=Yu+3908|0;Rk=Yu+3904|0;fb=Yu+3900|0;Cl=Yu+3896|0;mh=Yu+3892|0;Sk=Yu+3888|0;$j=Yu+3884|0;El=Yu+3880|0;Fl=Yu+3876|0;_a=Yu+3872|0;Pb=Yu+3868|0;Wk=Yu+3864|0;Zk=Yu+3860|0;nh=Yu+3856|0;ir=Yu+3852|0;jr=Yu+3848|0;Tu=Yu+3844|0;Uu=Yu+3840|0;Vu=Yu+3836|0;Wu=Yu+3832|0;Sj=Yu+3828|0;Tj=Yu+3824|0;$a=Yu+3820|0;ab=Yu+3816|0;bb=Yu+3812|0;cb=Yu+3808|0;db=Yu+3804|0;eb=Yu+3800|0;Xj=Yu+3796|0;Uk=Yu+3792|0;Ob=Yu+3788|0;Vk=Yu+3784|0;_j=Yu+3780|0;Xk=Yu+3776|0;Lb=Yu+3772|0;Yk=Yu+3768|0;Vj=Yu+3764|0;Wj=Yu+3760|0;Mb=Yu+3756|0;Nb=Yu+3752|0;Yj=Yu+3748|0;Zj=Yu+3744|0;Jb=Yu+3740|0;Kb=Yu+3736|0;gb=Yu+3732|0;Qb=Yu+3728|0;lr=Yu+3724|0;mr=Yu+3720|0;De=Yu+3716|0;Ee=Yu+3712|0;Tk=Yu+3708|0;_k=Yu+3704|0;Th=Yu+3700|0;Uh=Yu+3696|0;Dl=Yu+3692|0;Gl=Yu+3688|0;hk=Yu+3684|0;Jk=Yu+3680|0;Qa=Yu+3676|0;wl=Yu+3672|0;Oa=Yu+3668|0;Kk=Yu+3664|0;jh=Yu+3660|0;xl=Yu+3656|0;ok=Yu+3652|0;Mk=Yu+3648|0;Nk=Yu+3644|0;Fb=Yu+3640|0;Xa=Yu+3636|0;Dk=Yu+3632|0;Gk=Yu+3628|0;kh=Yu+3624|0;br=Yu+3620|0;cr=Yu+3616|0;bk=Yu+3612|0;ck=Yu+3608|0;dk=Yu+3604|0;ek=Yu+3600|0;fk=Yu+3596|0;gk=Yu+3592|0;Gb=Yu+3588|0;Hb=Yu+3584|0;Ka=Yu+3580|0;La=Yu+3576|0;Ma=Yu+3572|0;Na=Yu+3568|0;kk=Yu+3564|0;Bk=Yu+3560|0;Wa=Yu+3556|0;Ck=Yu+3552|0;nk=Yu+3548|0;Ek=Yu+3544|0;Ta=Yu+3540|0;Fk=Yu+3536|0;ik=Yu+3532|0;jk=Yu+3528|0;Ua=Yu+3524|0;Va=Yu+3520|0;lk=Yu+3516|0;mk=Yu+3512|0;Ra=Yu+3508|0;Sa=Yu+3504|0;Pa=Yu+3500|0;Ya=Yu+3496|0;er=Yu+3492|0;fr=Yu+3488|0;Ae=Yu+3484|0;Be=Yu+3480|0;yl=Yu+3476|0;Hk=Yu+3472|0;Qh=Yu+3468|0;Rh=Yu+3464|0;Lk=Yu+3460|0;Ok=Yu+3456|0;Kt=Yu+3452|0;rb=Yu+3448|0;Iu=Yu+3444|0;ob=Yu+3440|0;lb=Yu+3436|0;sb=Yu+3432|0;Wq=Yu+3428|0;Vq=Yu+3424|0;sl=Yu+3420|0;pl=Yu+3416|0;Mu=Yu+3412|0;Ab=Yu+3408|0;Pu=Yu+3404|0;xb=Yu+3400|0;ub=Yu+3396|0;Bb=Yu+3392|0;Zq=Yu+3388|0;Yq=Yu+3384|0;ll=Yu+3380|0;il=Yu+3376|0;ql=Yu+3372|0;rl=Yu+3368|0;nl=Yu+3364|0;ol=Yu+3360|0;sr=Yu+3356|0;Bs=Yu+3352|0;pb=Yu+3348|0;qb=Yu+3344|0;Gu=Yu+3340|0;Hu=Yu+3336|0;mb=Yu+3332|0;nb=Yu+3328|0;jl=Yu+3324|0;kl=Yu+3320|0;gl=Yu+3316|0;hl=Yu+3312|0;Ku=Yu+3308|0;Lu=Yu+3304|0;yb=Yu+3300|0;zb=Yu+3296|0;Nu=Yu+3292|0;Ou=Yu+3288|0;vb=Yu+3284|0;wb=Yu+3280|0;Ju=Yu+3276|0;Qu=Yu+3272|0;Xq=Yu+3268|0;_q=Yu+3264|0;is=Yu+3260|0;js=Yu+3256|0;tb=Yu+3252|0;Cb=Yu+3248|0;ld=Yu+3244|0;md=Yu+3240|0;ml=Yu+3236|0;tl=Yu+3232|0;Um=Yu+3228|0;Vm=Yu+3224|0;Sg=Yu+3220|0;Tg=Yu+3216|0;ya=Yu+3212|0;D=Yu+3208|0;E=Yu+3204|0;xn=Yu+3200|0;Yl=Yu+3196|0;Od=Yu+3192|0;Rd=Yu+3188|0;Dg=Yu+3184|0;yn=Yu+3180|0;Zl=Yu+3176|0;H=Yu+3172|0;pc=Yu+3168|0;K=Yu+3164|0;mc=Yu+3160|0;L=Yu+3156|0;Eg=Yu+3152|0;Bn=Yu+3148|0;An=Yu+3144|0;em=Yu+3140|0;bm=Yu+3136|0;T=Yu+3132|0;Pq=Yu+3128|0;Qq=Yu+3124|0;Tc=Yu+3120|0;_c=Yu+3116|0;Rm=Yu+3112|0;tn=Yu+3108|0;Gg=Yu+3104|0;_=Yu+3100|0;Sq=Yu+3096|0;Tq=Yu+3092|0;Ad=Yu+3088|0;Hd=Yu+3084|0;Km=Yu+3080|0;Nm=Yu+3076|0;Hg=Yu+3072|0;wa=Yu+3068|0;xa=Yu+3064|0;B=Yu+3060|0;C=Yu+3056|0;Md=Yu+3052|0;Nd=Yu+3048|0;Pd=Yu+3044|0;Qd=Yu+3040|0;$l=Yu+3036|0;am=Yu+3032|0;cm=Yu+3028|0;dm=Yu+3024|0;F=Yu+3020|0;G=Yu+3016|0;nc=Yu+3012|0;oc=Yu+3008|0;I=Yu+3004|0;J=Yu+3e3|0;kc=Yu+2996|0;lc=Yu+2992|0;P=Yu+2988|0;Sm=Yu+2984|0;Zc=Yu+2980|0;sn=Yu+2976|0;S=Yu+2972|0;Pm=Yu+2968|0;Wc=Yu+2964|0;Qm=Yu+2960|0;N=Yu+2956|0;O=Yu+2952|0;Xc=Yu+2948|0;Yc=Yu+2944|0;Q=Yu+2940|0;R=Yu+2936|0;Uc=Yu+2932|0;Vc=Yu+2928|0;W=Yu+2924|0;Lm=Yu+2920|0;Gd=Yu+2916|0;Mm=Yu+2912|0;Z=Yu+2908|0;hm=Yu+2904|0;Dd=Yu+2900|0;Jm=Yu+2896|0;U=Yu+2892|0;V=Yu+2888|0;Ed=Yu+2884|0;Fd=Yu+2880|0;X=Yu+2876|0;Y=Yu+2872|0;Bd=Yu+2868|0;Cd=Yu+2864|0;jc=Yu+2860|0;qc=Yu+2856|0;Rq=Yu+2852|0;tr=Yu+2848|0;zr=Yu+2844|0;Ar=Yu+2840|0;wr=Yu+2836|0;xr=Yu+2832|0;Ld=Yu+2828|0;Sd=Yu+2824|0;$c=Yu+2820|0;Id=Yu+2816|0;Ud=Yu+2812|0;Vd=Yu+2808|0;_l=Yu+2804|0;fm=Yu+2800|0;Om=Yu+2796|0;un=Yu+2792|0;Mq=Yu+2788|0;Nq=Yu+2784|0;Yh=Yu+2780|0;Zh=Yu+2776|0;$h=Yu+2772|0;ai=Yu+2768|0;zn=Yu+2764|0;Cn=Yu+2760|0;En=Yu+2756|0;Fn=Yu+2752|0;v=Yu+2748|0;y=Yu+2744|0;z=Yu+2740|0;Ml=Yu+2736|0;jm=Yu+2732|0;_b=Yu+2728|0;bc=Yu+2724|0;sh=Yu+2720|0;Nl=Yu+2716|0;km=Yu+2712|0;aa=Yu+2708|0;yc=Yu+2704|0;da=Yu+2700|0;vc=Yu+2696|0;ea=Yu+2692|0;th=Yu+2688|0;Ql=Yu+2684|0;Pl=Yu+2680|0;rm=Yu+2676|0;om=Yu+2672|0;ma=Yu+2668|0;wq=Yu+2664|0;xq=Yu+2660|0;Bc=Yu+2656|0;Ic=Yu+2652|0;Dm=Yu+2648|0;Gm=Yu+2644|0;vh=Yu+2640|0;ta=Yu+2636|0;zq=Yu+2632|0;Aq=Yu+2628|0;Kc=Yu+2624|0;Tb=Yu+2620|0;wm=Yu+2616|0;zm=Yu+2612|0;wh=Yu+2608|0;t=Yu+2604|0;u=Yu+2600|0;w=Yu+2596|0;x=Yu+2592|0;Yb=Yu+2588|0;Zb=Yu+2584|0;$b=Yu+2580|0;ac=Yu+2576|0;mm=Yu+2572|0;nm=Yu+2568|0;pm=Yu+2564|0;qm=Yu+2560|0;A=Yu+2556|0;$=Yu+2552|0;wc=Yu+2548|0;xc=Yu+2544|0;ba=Yu+2540|0;ca=Yu+2536|0;tc=Yu+2532|0;uc=Yu+2528|0;ia=Yu+2524|0;Em=Yu+2520|0;Hc=Yu+2516|0;Fm=Yu+2512|0;la=Yu+2508|0;Bm=Yu+2504|0;Ec=Yu+2500|0;Cm=Yu+2496|0;ga=Yu+2492|0;ha=Yu+2488|0;Fc=Yu+2484|0;Gc=Yu+2480|0;ja=Yu+2476|0;ka=Yu+2472|0;Cc=Yu+2468|0;Dc=Yu+2464|0;pa=Yu+2460|0;xm=Yu+2456|0;Qc=Yu+2452|0;ym=Yu+2448|0;sa=Yu+2444|0;um=Yu+2440|0;Nc=Yu+2436|0;vm=Yu+2432|0;na=Yu+2428|0;oa=Yu+2424|0;Oc=Yu+2420|0;Pc=Yu+2416|0;qa=Yu+2412|0;ra=Yu+2408|0;Lc=Yu+2404|0;Mc=Yu+2400|0;sc=Yu+2396|0;zc=Yu+2392|0;yq=Yu+2388|0;Bq=Yu+2384|0;Hq=Yu+2380|0;Iq=Yu+2376|0;Eq=Yu+2372|0;Fq=Yu+2368|0;Xb=Yu+2364|0;cc=Yu+2360|0;Jc=Yu+2356|0;Ub=Yu+2352|0;ec=Yu+2348|0;fc=Yu+2344|0;lm=Yu+2340|0;sm=Yu+2336|0;Am=Yu+2332|0;Hm=Yu+2328|0;rr=Yu+2324|0;uq=Yu+2320|0;di=Yu+2316|0;ei=Yu+2312|0;gi=Yu+2308|0;hi=Yu+2304|0;Ol=Yu+2300|0;Rl=Yu+2296|0;Tl=Yu+2292|0;Ul=Yu+2288|0;s=Yu+2284|0;zi=Yu+2280|0;wi=Yu+2276|0;Ai=Yu+2272|0;Ca=Yu+2268|0;Ei=Yu+2264|0;ti=Yu+2260|0;Di=Yu+2256|0;Su=Yu+2252|0;r=Yu+2248|0;ui=Yu+2244|0;vi=Yu+2240|0;va=Yu+2236|0;Ba=Yu+2232|0;ri=Yu+2228|0;si=Yu+2224|0;pi=Yu+2220|0;xi=Yu+2216|0;oi=Yu+2212|0;qi=Yu+2208|0;Bi=Yu+2204|0;Fi=Yu+2200|0;yi=Yu+2196|0;Ci=Yu+2192|0;Hi=Yu+2188|0;Lh=Yu+2184|0;Gi=Yu+2180|0;Ii=Yu+2176|0;Qj=Yu+2172|0;mj=Yu+2168|0;fj=Yu+2164|0;nj=Yu+2160|0;Zi=Yu+2156|0;rj=Yu+2152|0;cj=Yu+2148|0;qj=Yu+2144|0;Oj=Yu+2140|0;Pj=Yu+2136|0;dj=Yu+2132|0;ej=Yu+2128|0;Vi=Yu+2124|0;Yi=Yu+2120|0;aj=Yu+2116|0;bj=Yu+2112|0;_i=Yu+2108|0;gj=Yu+2104|0;Nj=Yu+2100|0;$i=Yu+2096|0;tk=Yu+2092|0;vk=Yu+2088|0;sk=Yu+2084|0;uk=Yu+2080|0;ij=Yu+2076|0;kj=Yu+2072|0;hj=Yu+2068|0;jj=Yu+2064|0;oj=Yu+2060|0;rk=Yu+2056|0;lj=Yu+2052|0;pj=Yu+2048|0;Xh=Yu+2044|0;Cj=Yu+2040|0;vj=Yu+2036|0;Dj=Yu+2032|0;Li=Yu+2028|0;Hj=Yu+2024|0;sj=Yu+2020|0;Gj=Yu+2016|0;Ph=Yu+2012|0;Wh=Yu+2008|0;tj=Yu+2004|0;uj=Yu+2e3|0;ci=Yu+1996|0;Ki=Yu+1992|0;Qi=Yu+1988|0;Ti=Yu+1984|0;Mi=Yu+1980|0;wj=Yu+1976|0;Mh=Yu+1972|0;Ni=Yu+1968|0;Kj=Yu+1964|0;Mj=Yu+1960|0;Jj=Yu+1956|0;Lj=Yu+1952|0;yj=Yu+1948|0;Aj=Yu+1944|0;xj=Yu+1940|0;zj=Yu+1936|0;Ej=Yu+1932|0;Ij=Yu+1928|0;Bj=Yu+1924|0;Fj=Yu+1920|0;qh=Yu+1916|0;Fh=Yu+1912|0;Zg=Yu+1908|0;Gh=Yu+1904|0;Lg=Yu+1900|0;Kh=Yu+1896|0;Wg=Yu+1892|0;Jh=Yu+1888|0;ih=Yu+1884|0;ph=Yu+1880|0;Xg=Yu+1876|0;Yg=Yu+1872|0;zh=Yu+1868|0;Kg=Yu+1864|0;Og=Yu+1860|0;Vg=Yu+1856|0;Mg=Yu+1852|0;_g=Yu+1848|0;hh=Yu+1844|0;Ng=Yu+1840|0;li=Yu+1836|0;ni=Yu+1832|0;ki=Yu+1828|0;mi=Yu+1824|0;Bh=Yu+1820|0;Dh=Yu+1816|0;$g=Yu+1812|0;Ch=Yu+1808|0;Hh=Yu+1804|0;ji=Yu+1800|0;Eh=Yu+1796|0;Ih=Yu+1792|0;Df=Yu+1788|0;yg=Yu+1784|0;Pf=Yu+1780|0;ah=Yu+1776|0;Kf=Yu+1772|0;bh=Yu+1768|0;Sf=Yu+1764|0;zg=Yu+1760|0;Bf=Yu+1756|0;Cf=Yu+1752|0;Nf=Yu+1748|0;Of=Yu+1744|0;Gf=Yu+1740|0;Rf=Yu+1736|0;Jf=Yu+1732|0;Qf=Yu+1728|0;Ef=Yu+1724|0;Ff=Yu+1720|0;Hf=Yu+1716|0;If=Yu+1712|0;Lf=Yu+1708|0;sg=Yu+1704|0;Af=Yu+1700|0;Mf=Yu+1696|0;eh=Yu+1692|0;gh=Yu+1688|0;dh=Yu+1684|0;fh=Yu+1680|0;ug=Yu+1676|0;wg=Yu+1672|0;tg=Yu+1668|0;vg=Yu+1664|0;Ag=Yu+1660|0;ch=Yu+1656|0;xg=Yu+1652|0;Bg=Yu+1648|0;rc=Yu+1644|0;be=Yu+1640|0;sd=Yu+1636|0;fe=Yu+1632|0;Zd=Yu+1628|0;ge=Yu+1624|0;vd=Yu+1620|0;ce=Yu+1616|0;Eb=Yu+1612|0;Sb=Yu+1608|0;od=Yu+1604|0;rd=Yu+1600|0;ic=Yu+1596|0;td=Yu+1592|0;Yd=Yu+1588|0;ud=Yu+1584|0;Wb=Yu+1580|0;hc=Yu+1576|0;Kd=Yu+1572|0;Xd=Yu+1568|0;ad=Yu+1564|0;wd=Yu+1560|0;Da=Yu+1556|0;bd=Yu+1552|0;Je=Yu+1548|0;Le=Yu+1544|0;ie=Yu+1540|0;Ke=Yu+1536|0;yd=Yu+1532|0;$d=Yu+1528|0;xd=Yu+1524|0;zd=Yu+1520|0;de=Yu+1516|0;he=Yu+1512|0;ae=Yu+1508|0;ee=Yu+1504|0;Pe=Yu+1500|0;le=Yu+1496|0;$e=Yu+1492|0;pe=Yu+1488|0;We=Yu+1484|0;qe=Yu+1480|0;cf=Yu+1476|0;me=Yu+1472|0;Ne=Yu+1468|0;Oe=Yu+1464|0;Ze=Yu+1460|0;_e=Yu+1456|0;Se=Yu+1452|0;af=Yu+1448|0;Ve=Yu+1444|0;bf=Yu+1440|0;Qe=Yu+1436|0;Re=Yu+1432|0;Te=Yu+1428|0;Ue=Yu+1424|0;Xe=Yu+1420|0;df=Yu+1416|0;Me=Yu+1412|0;Ye=Yu+1408|0;te=Yu+1404|0;ve=Yu+1400|0;se=Yu+1396|0;ue=Yu+1392|0;ff=Yu+1388|0;je=Yu+1384|0;ef=Yu+1380|0;gf=Yu+1376|0;ne=Yu+1372|0;re=Yu+1368|0;ke=Yu+1364|0;oe=Yu+1360|0;He=Yu+1356|0;ng=Yu+1352|0;dg=Yu+1348|0;tf=Yu+1344|0;Wf=Yu+1340|0;uf=Yu+1336|0;gg=Yu+1332|0;og=Yu+1328|0;ze=Yu+1324|0;Ge=Yu+1320|0;$f=Yu+1316|0;cg=Yu+1312|0;of=Yu+1308|0;fg=Yu+1304|0;Vf=Yu+1300|0;eg=Yu+1296|0;kf=Yu+1292|0;nf=Yu+1288|0;rf=Yu+1284|0;Uf=Yu+1280|0;Xf=Yu+1276|0;hg=Yu+1272|0;we=Yu+1268|0;Yf=Yu+1264|0;xf=Yu+1260|0;zf=Yu+1256|0;wf=Yu+1252|0;yf=Yu+1248|0;jg=Yu+1244|0;lg=Yu+1240|0;ig=Yu+1236|0;kg=Yu+1232|0;pg=Yu+1228|0;vf=Yu+1224|0;mg=Yu+1220|0;qg=Yu+1216|0;im=Yu+1212|0;Yo=Yu+1208|0;kn=Yu+1204|0;Ko=Yu+1200|0;$m=Yu+1196|0;Uo=Yu+1192|0;on=Yu+1188|0;Zn=Yu+1184|0;Jn=Yu+1180|0;pn=Yu+1176|0;cn=Yu+1172|0;ln=Yu+1168|0;Fo=Yu+1164|0;Zo=Yu+1160|0;No=Yu+1156|0;Vo=Yu+1152|0;vl=Yu+1148|0;Io=Yu+1144|0;Jl=Yu+1140|0;Jo=Yu+1136|0;Qk=Yu+1132|0;Il=Yu+1128|0;Xm=Yu+1124|0;Xn=Yu+1120|0;_m=Yu+1116|0;Yn=Yu+1112|0;Ym=Yu+1108|0;Zm=Yu+1104|0;Xl=Yu+1100|0;an=Yu+1096|0;In=Yu+1092|0;bn=Yu+1088|0;Ll=Yu+1084|0;Wl=Yu+1080|0;wn=Yu+1076|0;Hn=Yu+1072|0;ao=Yu+1068|0;Lo=Yu+1064|0;Eo=Yu+1060|0;Mo=Yu+1056|0;_n=Yu+1052|0;$n=Yu+1048|0;Co=Yu+1044|0;Do=Yu+1040|0;Kn=Yu+1036|0;dn=Yu+1032|0;wk=Yu+1028|0;Ln=Yu+1024|0;Wo=Yu+1020|0;_o=Yu+1016|0;To=Yu+1012|0;Xo=Yu+1008|0;bo=Yu+1004|0;eo=Yu+1e3|0;$o=Yu+996|0;co=Yu+992|0;fn=Yu+988|0;hn=Yu+984|0;en=Yu+980|0;gn=Yu+976|0;mn=Yu+972|0;qn=Yu+968|0;jn=Yu+964|0;nn=Yu+960|0;Go=Yu+956|0;Oo=Yu+952|0;Wn=Yu+948|0;Ho=Yu+944|0;Qo=Yu+940|0;So=Yu+936|0;Po=Yu+932|0;Ro=Yu+928|0;Tn=Yu+924|0;Vn=Yu+920|0;rn=Yu+916|0;Un=Yu+912|0;qr=Yu+908|0;mt=Yu+904|0;zs=Yu+900|0;$r=Yu+896|0;ps=Yu+892|0;Ks=Yu+888|0;Fr=Yu+884|0;Pr=Yu+880|0;cs=Yu+876|0;Gr=Yu+872|0;ss=Yu+868|0;As=Yu+864|0;Wr=Yu+860|0;nt=Yu+856|0;Ds=Yu+852|0;Ls=Yu+848|0;ar=Yu+844|0;Zr=Yu+840|0;pr=Yu+836|0;_r=Yu+832|0;hr=Yu+828|0;or=Yu+824|0;ls=Yu+820|0;Nr=Yu+816|0;os=Yu+812|0;Or=Yu+808|0;ms=Yu+804|0;ns=Yu+800|0;Lq=Yu+796|0;qs=Yu+792|0;bs=Yu+788|0;rs=Yu+784|0;Dq=Yu+780|0;Kq=Yu+776|0;vr=Yu+772|0;Cr=Yu+768|0;Sr=Yu+764|0;as=Yu+760|0;Vr=Yu+756|0;Cs=Yu+752|0;Qr=Yu+748|0;Rr=Yu+744|0;Tr=Yu+740|0;Ur=Yu+736|0;ds=Yu+732|0;ts=Yu+728|0;rq=Yu+724|0;es=Yu+720|0;kt=Yu+716|0;ot=Yu+712|0;Js=Yu+708|0;lt=Yu+704|0;qt=Yu+700|0;st=Yu+696|0;pt=Yu+692|0;rt=Yu+688|0;vs=Yu+684|0;xs=Yu+680|0;us=Yu+676|0;ws=Yu+672|0;Dr=Yu+668|0;Hr=Yu+664|0;ys=Yu+660|0;Er=Yu+656|0;Xr=Yu+652|0;Es=Yu+648|0;Mr=Yu+644|0;Yr=Yu+640|0;Gs=Yu+636|0;Is=Yu+632|0;Fs=Yu+628|0;Hs=Yu+624|0;Jr=Yu+620|0;Lr=Yu+616|0;Ir=Yu+612|0;Kr=Yu+608|0;qo=Yu+604|0;kq=Yu+600|0;Xp=Yu+596|0;xp=Yu+592|0;Np=Yu+588|0;Hp=Yu+584|0;$p=Yu+580|0;lp=Yu+576|0;ep=Yu+572|0;aq=Yu+568|0;Qp=Yu+564|0;Yp=Yu+560|0;sp=Yu+556|0;lq=Yu+552|0;Ap=Yu+548|0;Ip=Yu+544|0;io=Yu+540|0;vp=Yu+536|0;po=Yu+532|0;wp=Yu+528|0;lo=Yu+524|0;oo=Yu+520|0;jp=Yu+516|0;hq=Yu+512|0;Mp=Yu+508|0;iq=Yu+504|0;kp=Yu+500|0;Lp=Yu+496|0;xo=Yu+492|0;Pp=Yu+488|0;dp=Yu+484|0;Op=Yu+480|0;to=Yu+476|0;wo=Yu+472|0;Ao=Yu+468|0;cp=Yu+464|0;op=Yu+460|0;zp=Yu+456|0;rp=Yu+452|0;yp=Yu+448|0;mp=Yu+444|0;np=Yu+440|0;pp=Yu+436|0;qp=Yu+432|0;fp=Yu+428|0;Rp=Yu+424|0;fo=Yu+420|0;gp=Yu+416|0;Jp=Yu+412|0;mq=Yu+408|0;Gp=Yu+404|0;Kp=Yu+400|0;oq=Yu+396|0;qq=Yu+392|0;nq=Yu+388|0;pq=Yu+384|0;Tp=Yu+380|0;Vp=Yu+376|0;Sp=Yu+372|0;Up=Yu+368|0;Zp=Yu+364|0;bq=Yu+360|0;Wp=Yu+356|0;_p=Yu+352|0;tp=Yu+348|0;Bp=Yu+344|0;gq=Yu+340|0;up=Yu+336|0;Dp=Yu+332|0;Fp=Yu+328|0;Cp=Yu+324|0;Ep=Yu+320|0;dq=Yu+316|0;fq=Yu+312|0;cq=Yu+308|0;eq=Yu+304|0;Et=Yu+300|0;$t=Yu+296|0;Nt=Yu+292|0;yu=Yu+288|0;ct=Yu+284|0;Xt=Yu+280|0;Rt=Yu+276|0;mu=Yu+272|0;Vs=Yu+268|0;St=Yu+264|0;ft=Yu+260|0;Ot=Yu+256|0;tu=Yu+252|0;au=Yu+248|0;Bu=Yu+244|0;Yt=Yu+240|0;wt=Yu+236|0;wu=Yu+232|0;Dt=Yu+228|0;xu=Yu+224|0;zt=Yu+220|0;Ct=Yu+216|0;_s=Yu+212|0;ku=Yu+208|0;bt=Yu+204|0;lu=Yu+200|0;$s=Yu+196|0;at=Yu+192|0;Ns=Yu+188|0;et=Yu+184|0;Us=Yu+180|0;dt=Yu+176|0;Ht=Yu+172|0;Ms=Yu+168|0;Qs=Yu+164|0;Ts=Yu+160|0;pu=Yu+156|0;zu=Yu+152|0;su=Yu+148|0;Au=Yu+144|0;nu=Yu+140|0;ou=Yu+136|0;qu=Yu+132|0;ru=Yu+128|0;Ws=Yu+124|0;gt=Yu+120|0;tt=Yu+116|0;Xs=Yu+112|0;Zt=Yu+108|0;bu=Yu+104|0;Wt=Yu+100|0;_t=Yu+96|0;du=Yu+92|0;fu=Yu+88|0;cu=Yu+84|0;eu=Yu+80|0;it=Yu+76|0;Lt=Yu+72|0;ht=Yu+68|0;jt=Yu+64|0;Pt=Yu+60|0;Tt=Yu+56|0;Mt=Yu+52|0;Qt=Yu+48|0;uu=Yu+44|0;Cu=Yu+40|0;ju=Yu+36|0;vu=Yu+32|0;Eu=Yu+28|0;Vt=Yu+24|0;Du=Yu+20|0;Fu=Yu+16|0;gu=Yu+12|0;iu=Yu+8|0;Ut=Yu+4|0;hu=Yu;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Zu>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Yu+4716>>2]=.0980171412229538;g[Yu+4712>>2]=.9951847195625305;g[Yu+4708>>2]=.7730104327201843;g[Yu+4704>>2]=.6343932747840881;g[Yu+4700>>2]=.4713967442512512;g[Yu+4696>>2]=.8819212913513184;g[Yu+4692>>2]=.9569403529167175;g[Yu+4688>>2]=.290284663438797;g[Yu+4684>>2]=.19509032368659973;g[Yu+4680>>2]=.9807852506637573;g[Yu+4676>>2]=.5555702447891235;g[Yu+4672>>2]=.8314695954322815;g[Yu+4668>>2]=.3826834261417389;g[Yu+4664>>2]=.9238795042037964;g[Yu+4660>>2]=.7071067690849304;c[Xu>>2]=c[Zu>>2];c[m>>2]=(c[m>>2]|0)+(((c[Zu>>2]|0)-1|0)*126<<2);while(1){if((c[Xu>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[za>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[Ib>>2]=+g[q>>2]+ +g[za>>2];g[xk>>2]=+g[q>>2]-+g[za>>2];g[Rc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[_d>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[hf>>2]=+g[Rc>>2]+ +g[_d>>2];g[Mn>>2]=+g[Rc>>2]-+g[_d>>2];g[gd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*47<<2)>>2];g[hd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*48<<2)>>2];g[id>>2]=+g[gd>>2]-+g[hd>>2];g[yk>>2]=+g[gd>>2]+ +g[hd>>2];g[dd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*63<<2)>>2];g[ed>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<5<<2)>>2];g[fd>>2]=+g[dd>>2]-+g[ed>>2];g[Nn>>2]=+g[dd>>2]+ +g[ed>>2];g[zl>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Im>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[bl>>2]=+g[zl>>2]-+g[Im>>2];g[Fa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*39<<2)>>2];g[Ga>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*56<<2)>>2];g[cl>>2]=+g[Fa>>2]+ +g[Ga>>2];g[Sn>>2]=+g[zl>>2]+ +g[Im>>2];g[Qn>>2]=+g[bl>>2]+ +g[cl>>2];g[Ha>>2]=+g[Fa>>2]-+g[Ga>>2];g[dl>>2]=+g[bl>>2]-+g[cl>>2];g[Ah>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Ji>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[Ak>>2]=+g[Ah>>2]-+g[Ji>>2];g[Ia>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*55<<2)>>2];g[Ja>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*40<<2)>>2];g[$k>>2]=+g[Ia>>2]+ +g[Ja>>2];g[qk>>2]=+g[Ah>>2]+ +g[Ji>>2];g[Pn>>2]=+g[Ak>>2]+ +g[$k>>2];g[ib>>2]=+g[Ia>>2]-+g[Ja>>2];g[al>>2]=+g[Ak>>2]-+g[$k>>2];g[rg>>2]=+g[Ib>>2]+ +g[hf>>2];g[ap>>2]=+g[qk>>2]+ +g[Sn>>2];g[jq>>2]=+g[rg>>2]+ +g[ap>>2];g[Nh>>2]=+g[rg>>2]-+g[ap>>2];g[sq>>2]=+g[xk>>2]+ +g[yk>>2];g[tq>>2]=(+g[Pn>>2]+ +g[Qn>>2])*.7071067690849304;g[Uq>>2]=+g[sq>>2]-+g[tq>>2];g[ut>>2]=+g[sq>>2]+ +g[tq>>2];g[fs>>2]=+g[Nn>>2]-+g[Mn>>2];g[gs>>2]=(+g[al>>2]-+g[dl>>2])*.7071067690849304;g[hs>>2]=+g[fs>>2]+ +g[gs>>2];g[Ys>>2]=+g[fs>>2]-+g[gs>>2];g[Ea>>2]=+g[Ib>>2]-+g[hf>>2];g[jb>>2]=+g[Ha>>2]-+g[ib>>2];g[kb>>2]=+g[Ea>>2]+ +g[jb>>2];g[xe>>2]=+g[Ea>>2]-+g[jb>>2];g[cd>>2]=+g[qk>>2]-+g[Sn>>2];g[jd>>2]=+g[fd>>2]-+g[id>>2];g[kd>>2]=+g[cd>>2]+ +g[jd>>2];g[Zf>>2]=+g[jd>>2]-+g[cd>>2];g[zk>>2]=+g[xk>>2]-+g[yk>>2];g[el>>2]=(+g[al>>2]+ +g[dl>>2])*.7071067690849304;g[fl>>2]=+g[zk>>2]-+g[el>>2];g[go>>2]=+g[zk>>2]+ +g[el>>2];g[On>>2]=+g[Mn>>2]+ +g[Nn>>2];g[Rn>>2]=(+g[Pn>>2]-+g[Qn>>2])*.7071067690849304;g[Tm>>2]=+g[On>>2]-+g[Rn>>2];g[hp>>2]=+g[On>>2]+ +g[Rn>>2];g[Pg>>2]=+g[fd>>2]+ +g[id>>2];g[Qg>>2]=+g[ib>>2]+ +g[Ha>>2];g[Rg>>2]=+g[Pg>>2]+ +g[Qg>>2];g[Oi>>2]=+g[Pg>>2]-+g[Qg>>2];g[Tu>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Uu>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[Vu>>2]=+g[Tu>>2]+ +g[Uu>>2];g[Wu>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[Sj>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Tj>>2]=+g[Wu>>2]+ +g[Sj>>2];g[Uj>>2]=+g[Vu>>2]+ +g[Tj>>2];g[Bl>>2]=+g[Wu>>2]-+g[Sj>>2];g[hb>>2]=+g[Vu>>2]-+g[Tj>>2];g[Rk>>2]=+g[Tu>>2]-+g[Uu>>2];g[$a>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*61<<2)>>2];g[ab>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*34<<2)>>2];g[bb>>2]=+g[$a>>2]-+g[ab>>2];g[cb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*45<<2)>>2];g[db>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*50<<2)>>2];g[eb>>2]=+g[cb>>2]-+g[db>>2];g[fb>>2]=+g[bb>>2]-+g[eb>>2];g[Cl>>2]=+g[$a>>2]+ +g[ab>>2];g[mh>>2]=+g[bb>>2]+ +g[eb>>2];g[Sk>>2]=+g[cb>>2]+ +g[db>>2];g[Vj>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Wj>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[Xj>>2]=+g[Vj>>2]+ +g[Wj>>2];g[Uk>>2]=+g[Vj>>2]-+g[Wj>>2];g[Mb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*53<<2)>>2];g[Nb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*42<<2)>>2];g[Ob>>2]=+g[Mb>>2]-+g[Nb>>2];g[Vk>>2]=+g[Mb>>2]+ +g[Nb>>2];g[Yj>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Zj>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[_j>>2]=+g[Yj>>2]+ +g[Zj>>2];g[Xk>>2]=+g[Yj>>2]-+g[Zj>>2];g[Jb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*37<<2)>>2];g[Kb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*58<<2)>>2];g[Lb>>2]=+g[Jb>>2]-+g[Kb>>2];g[Yk>>2]=+g[Jb>>2]+ +g[Kb>>2];g[$j>>2]=+g[Xj>>2]+ +g[_j>>2];g[El>>2]=+g[Uk>>2]+ +g[Vk>>2];g[Fl>>2]=+g[Xk>>2]+ +g[Yk>>2];g[_a>>2]=+g[Xj>>2]-+g[_j>>2];g[Pb>>2]=+g[Lb>>2]-+g[Ob>>2];g[Wk>>2]=+g[Uk>>2]-+g[Vk>>2];g[Zk>>2]=+g[Xk>>2]-+g[Yk>>2];g[nh>>2]=+g[Ob>>2]+ +g[Lb>>2];g[ak>>2]=+g[Uj>>2]+ +g[$j>>2];g[oh>>2]=+g[mh>>2]+ +g[nh>>2];g[gb>>2]=+g[_a>>2]+ +g[fb>>2];g[Qb>>2]=+g[hb>>2]+ +g[Pb>>2];g[Rb>>2]=+g[gb>>2]*.9238795042037964+ +g[Qb>>2]*.3826834261417389;g[pd>>2]=+g[Qb>>2]*.9238795042037964-+g[gb>>2]*.3826834261417389;g[lr>>2]=+g[Rk>>2]+ +g[Sk>>2];g[mr>>2]=(+g[El>>2]+ +g[Fl>>2])*.7071067690849304;g[nr>>2]=+g[lr>>2]-+g[mr>>2];g[xt>>2]=+g[lr>>2]+ +g[mr>>2];g[De>>2]=+g[fb>>2]-+g[_a>>2];g[Ee>>2]=+g[hb>>2]-+g[Pb>>2];g[Fe>>2]=+g[De>>2]*.3826834261417389+ +g[Ee>>2]*.9238795042037964;g[ag>>2]=+g[Ee>>2]*.3826834261417389-+g[De>>2]*.9238795042037964;g[Tk>>2]=+g[Rk>>2]-+g[Sk>>2];g[_k>>2]=(+g[Wk>>2]+ +g[Zk>>2])*.7071067690849304;g[Al>>2]=+g[Tk>>2]-+g[_k>>2];g[mo>>2]=+g[Tk>>2]+ +g[_k>>2];g[ir>>2]=+g[Cl>>2]-+g[Bl>>2];g[jr>>2]=(+g[Wk>>2]-+g[Zk>>2])*.7071067690849304;g[kr>>2]=+g[ir>>2]+ +g[jr>>2];g[yt>>2]=+g[ir>>2]-+g[jr>>2];g[Th>>2]=+g[Uj>>2]-+g[$j>>2];g[Uh>>2]=+g[mh>>2]-+g[nh>>2];g[Vh>>2]=+g[Th>>2]+ +g[Uh>>2];g[Ri>>2]=+g[Th>>2]-+g[Uh>>2];g[Dl>>2]=+g[Bl>>2]+ +g[Cl>>2];g[Gl>>2]=(+g[El>>2]-+g[Fl>>2])*.7071067690849304;g[Hl>>2]=+g[Dl>>2]-+g[Gl>>2];g[no>>2]=+g[Dl>>2]+ +g[Gl>>2];g[bk>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[ck>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[dk>>2]=+g[bk>>2]+ +g[ck>>2];g[ek>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[fk>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[gk>>2]=+g[ek>>2]+ +g[fk>>2];g[hk>>2]=+g[dk>>2]+ +g[gk>>2];g[Jk>>2]=+g[bk>>2]-+g[ck>>2];g[Qa>>2]=+g[dk>>2]-+g[gk>>2];g[wl>>2]=+g[ek>>2]-+g[fk>>2];g[Gb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*33<<2)>>2];g[Hb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*62<<2)>>2];g[Ka>>2]=+g[Gb>>2]-+g[Hb>>2];g[La>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*49<<2)>>2];g[Ma>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*46<<2)>>2];g[Na>>2]=+g[La>>2]-+g[Ma>>2];g[Oa>>2]=+g[Ka>>2]-+g[Na>>2];g[Kk>>2]=+g[La>>2]+ +g[Ma>>2];g[jh>>2]=+g[Ka>>2]+ +g[Na>>2];g[xl>>2]=+g[Gb>>2]+ +g[Hb>>2];g[ik>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[jk>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[kk>>2]=+g[ik>>2]+ +g[jk>>2];g[Bk>>2]=+g[ik>>2]-+g[jk>>2];g[Ua>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*57<<2)>>2];g[Va>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*38<<2)>>2];g[Wa>>2]=+g[Ua>>2]-+g[Va>>2];g[Ck>>2]=+g[Ua>>2]+ +g[Va>>2];g[lk>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[mk>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[nk>>2]=+g[lk>>2]+ +g[mk>>2];g[Ek>>2]=+g[lk>>2]-+g[mk>>2];g[Ra>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*41<<2)>>2];g[Sa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*54<<2)>>2];g[Ta>>2]=+g[Ra>>2]-+g[Sa>>2];g[Fk>>2]=+g[Ra>>2]+ +g[Sa>>2];g[ok>>2]=+g[kk>>2]+ +g[nk>>2];g[Mk>>2]=+g[Bk>>2]-+g[Ck>>2];g[Nk>>2]=+g[Ek>>2]-+g[Fk>>2];g[Fb>>2]=+g[kk>>2]-+g[nk>>2];g[Xa>>2]=+g[Ta>>2]-+g[Wa>>2];g[Dk>>2]=+g[Bk>>2]+ +g[Ck>>2];g[Gk>>2]=+g[Ek>>2]+ +g[Fk>>2];g[kh>>2]=+g[Wa>>2]+ +g[Ta>>2];g[pk>>2]=+g[hk>>2]+ +g[ok>>2];g[lh>>2]=+g[jh>>2]+ +g[kh>>2];g[Pa>>2]=+g[Fb>>2]+ +g[Oa>>2];g[Ya>>2]=+g[Qa>>2]+ +g[Xa>>2];g[Za>>2]=+g[Pa>>2]*.9238795042037964-+g[Ya>>2]*.3826834261417389;g[qd>>2]=+g[Pa>>2]*.3826834261417389+ +g[Ya>>2]*.9238795042037964;g[er>>2]=+g[Jk>>2]+ +g[Kk>>2];g[fr>>2]=(+g[Dk>>2]+ +g[Gk>>2])*.7071067690849304;g[gr>>2]=+g[er>>2]-+g[fr>>2];g[At>>2]=+g[er>>2]+ +g[fr>>2];g[Ae>>2]=+g[Oa>>2]-+g[Fb>>2];g[Be>>2]=+g[Qa>>2]-+g[Xa>>2];g[Ce>>2]=+g[Ae>>2]*.3826834261417389-+g[Be>>2]*.9238795042037964;g[bg>>2]=+g[Ae>>2]*.9238795042037964+ +g[Be>>2]*.3826834261417389;g[yl>>2]=+g[wl>>2]-+g[xl>>2];g[Hk>>2]=(+g[Dk>>2]-+g[Gk>>2])*.7071067690849304;g[Ik>>2]=+g[yl>>2]-+g[Hk>>2];g[jo>>2]=+g[yl>>2]+ +g[Hk>>2];g[br>>2]=(+g[Mk>>2]-+g[Nk>>2])*.7071067690849304;g[cr>>2]=+g[wl>>2]+ +g[xl>>2];g[dr>>2]=+g[br>>2]-+g[cr>>2];g[Bt>>2]=+g[cr>>2]+ +g[br>>2];g[Qh>>2]=+g[jh>>2]-+g[kh>>2];g[Rh>>2]=+g[hk>>2]-+g[ok>>2];g[Sh>>2]=+g[Qh>>2]-+g[Rh>>2];g[Si>>2]=+g[Rh>>2]+ +g[Qh>>2];g[Lk>>2]=+g[Jk>>2]-+g[Kk>>2];g[Ok>>2]=(+g[Mk>>2]+ +g[Nk>>2])*.7071067690849304;g[Pk>>2]=+g[Lk>>2]-+g[Ok>>2];g[ko>>2]=+g[Lk>>2]+ +g[Ok>>2];g[sr>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Bs>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[Kt>>2]=+g[sr>>2]+ +g[Bs>>2];g[ql>>2]=+g[sr>>2]-+g[Bs>>2];g[pb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*43<<2)>>2];g[qb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*52<<2)>>2];g[rb>>2]=+g[pb>>2]-+g[qb>>2];g[rl>>2]=+g[pb>>2]+ +g[qb>>2];g[Gu>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[Hu>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Iu>>2]=+g[Gu>>2]+ +g[Hu>>2];g[nl>>2]=+g[Gu>>2]-+g[Hu>>2];g[mb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*59<<2)>>2];g[nb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*36<<2)>>2];g[ob>>2]=+g[mb>>2]-+g[nb>>2];g[ol>>2]=+g[mb>>2]+ +g[nb>>2];g[lb>>2]=+g[Kt>>2]-+g[Iu>>2];g[sb>>2]=+g[ob>>2]-+g[rb>>2];g[Wq>>2]=+g[ol>>2]-+g[nl>>2];g[Vq>>2]=+g[ql>>2]+ +g[rl>>2];g[sl>>2]=+g[ql>>2]-+g[rl>>2];g[pl>>2]=+g[nl>>2]+ +g[ol>>2];g[Ku>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Lu>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[Mu>>2]=+g[Ku>>2]+ +g[Lu>>2];g[jl>>2]=+g[Ku>>2]-+g[Lu>>2];g[yb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*51<<2)>>2];g[zb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*44<<2)>>2];g[Ab>>2]=+g[yb>>2]-+g[zb>>2];g[kl>>2]=+g[yb>>2]+ +g[zb>>2];g[Nu>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Ou>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Pu>>2]=+g[Nu>>2]+ +g[Ou>>2];g[gl>>2]=+g[Nu>>2]-+g[Ou>>2];g[vb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*35<<2)>>2];g[wb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*60<<2)>>2];g[xb>>2]=+g[vb>>2]-+g[wb>>2];g[hl>>2]=+g[vb>>2]+ +g[wb>>2];g[ub>>2]=+g[Mu>>2]-+g[Pu>>2];g[Bb>>2]=+g[xb>>2]-+g[Ab>>2];g[Zq>>2]=+g[gl>>2]+ +g[hl>>2];g[Yq>>2]=+g[jl>>2]+ +g[kl>>2];g[ll>>2]=+g[jl>>2]-+g[kl>>2];g[il>>2]=+g[gl>>2]-+g[hl>>2];g[Ju>>2]=+g[Kt>>2]+ +g[Iu>>2];g[Qu>>2]=+g[Mu>>2]+ +g[Pu>>2];g[Ru>>2]=+g[Ju>>2]+ +g[Qu>>2];g[Pi>>2]=+g[Ju>>2]-+g[Qu>>2];g[Xq>>2]=+g[Vq>>2]*.3826834261417389-+g[Wq>>2]*.9238795042037964;g[_q>>2]=+g[Yq>>2]*.3826834261417389-+g[Zq>>2]*.9238795042037964;g[$q>>2]=+g[Xq>>2]+ +g[_q>>2];g[Zs>>2]=+g[Xq>>2]-+g[_q>>2];g[is>>2]=+g[Wq>>2]*.3826834261417389+ +g[Vq>>2]*.9238795042037964;g[js>>2]=+g[Zq>>2]*.3826834261417389+ +g[Yq>>2]*.9238795042037964;g[ks>>2]=+g[is>>2]-+g[js>>2];g[vt>>2]=+g[is>>2]+ +g[js>>2];g[tb>>2]=+g[lb>>2]-+g[sb>>2];g[Cb>>2]=+g[ub>>2]+ +g[Bb>>2];g[Db>>2]=(+g[tb>>2]+ +g[Cb>>2])*.7071067690849304;g[_f>>2]=(+g[tb>>2]-+g[Cb>>2])*.7071067690849304;g[ld>>2]=+g[lb>>2]+ +g[sb>>2];g[md>>2]=+g[Bb>>2]-+g[ub>>2];g[nd>>2]=(+g[ld>>2]+ +g[md>>2])*.7071067690849304;g[ye>>2]=(+g[md>>2]-+g[ld>>2])*.7071067690849304;g[ml>>2]=+g[il>>2]*.9238795042037964-+g[ll>>2]*.3826834261417389;g[tl>>2]=+g[pl>>2]*.9238795042037964+ +g[sl>>2]*.3826834261417389;g[ul>>2]=+g[ml>>2]-+g[tl>>2];g[ip>>2]=+g[tl>>2]+ +g[ml>>2];g[Um>>2]=+g[sl>>2]*.9238795042037964-+g[pl>>2]*.3826834261417389;g[Vm>>2]=+g[il>>2]*.3826834261417389+ +g[ll>>2]*.9238795042037964;g[Wm>>2]=+g[Um>>2]-+g[Vm>>2];g[ho>>2]=+g[Um>>2]+ +g[Vm>>2];g[Sg>>2]=+g[ob>>2]+ +g[rb>>2];g[Tg>>2]=+g[xb>>2]+ +g[Ab>>2];g[Ug>>2]=+g[Sg>>2]+ +g[Tg>>2];g[Oh>>2]=+g[Tg>>2]-+g[Sg>>2];g[wa>>2]=+g[c[l>>2]>>2];g[xa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[ya>>2]=+g[wa>>2]+ +g[xa>>2];g[B>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[C>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[D>>2]=+g[B>>2]+ +g[C>>2];g[E>>2]=+g[ya>>2]+ +g[D>>2];g[xn>>2]=+g[B>>2]-+g[C>>2];g[Yl>>2]=+g[wa>>2]-+g[xa>>2];g[Md>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<5<<2)>>2];g[Nd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*63<<2)>>2];g[Od>>2]=+g[Md>>2]-+g[Nd>>2];g[Pd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*48<<2)>>2];g[Qd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*47<<2)>>2];g[Rd>>2]=+g[Pd>>2]-+g[Qd>>2];g[Dg>>2]=+g[Od>>2]+ +g[Rd>>2];g[yn>>2]=+g[Md>>2]+ +g[Nd>>2];g[Zl>>2]=+g[Pd>>2]+ +g[Qd>>2];g[F>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[G>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[H>>2]=+g[F>>2]+ +g[G>>2];g[$l>>2]=+g[F>>2]-+g[G>>2];g[nc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*56<<2)>>2];g[oc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*39<<2)>>2];g[pc>>2]=+g[nc>>2]-+g[oc>>2];g[am>>2]=+g[nc>>2]+ +g[oc>>2];g[I>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[J>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[K>>2]=+g[I>>2]+ +g[J>>2];g[cm>>2]=+g[I>>2]-+g[J>>2];g[kc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*40<<2)>>2];g[lc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*55<<2)>>2];g[mc>>2]=+g[kc>>2]-+g[lc>>2];g[dm>>2]=+g[kc>>2]+ +g[lc>>2];g[L>>2]=+g[H>>2]+ +g[K>>2];g[Eg>>2]=+g[pc>>2]+ +g[mc>>2];g[Bn>>2]=+g[cm>>2]+ +g[dm>>2];g[An>>2]=+g[$l>>2]+ +g[am>>2];g[em>>2]=+g[cm>>2]-+g[dm>>2];g[bm>>2]=+g[$l>>2]-+g[am>>2];g[N>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[O>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[P>>2]=+g[N>>2]+ +g[O>>2];g[Sm>>2]=+g[N>>2]-+g[O>>2];g[Xc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*44<<2)>>2];g[Yc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*51<<2)>>2];g[Zc>>2]=+g[Xc>>2]-+g[Yc>>2];g[sn>>2]=+g[Xc>>2]+ +g[Yc>>2];g[Q>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[R>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[S>>2]=+g[Q>>2]+ +g[R>>2];g[Pm>>2]=+g[Q>>2]-+g[R>>2];g[Uc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*60<<2)>>2];g[Vc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*35<<2)>>2];g[Wc>>2]=+g[Uc>>2]-+g[Vc>>2];g[Qm>>2]=+g[Uc>>2]+ +g[Vc>>2];g[T>>2]=+g[P>>2]+ +g[S>>2];g[Pq>>2]=+g[Sm>>2]+ +g[sn>>2];g[Qq>>2]=+g[Qm>>2]-+g[Pm>>2];g[Tc>>2]=+g[P>>2]-+g[S>>2];g[_c>>2]=+g[Wc>>2]-+g[Zc>>2];g[Rm>>2]=+g[Pm>>2]+ +g[Qm>>2];g[tn>>2]=+g[Sm>>2]-+g[sn>>2];g[Gg>>2]=+g[Wc>>2]+ +g[Zc>>2];g[U>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[V>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[W>>2]=+g[U>>2]+ +g[V>>2];g[Lm>>2]=+g[U>>2]-+g[V>>2];g[Ed>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*52<<2)>>2];g[Fd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*43<<2)>>2];g[Gd>>2]=+g[Ed>>2]-+g[Fd>>2];g[Mm>>2]=+g[Ed>>2]+ +g[Fd>>2];g[X>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Y>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[Z>>2]=+g[X>>2]+ +g[Y>>2];g[hm>>2]=+g[X>>2]-+g[Y>>2];g[Bd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*36<<2)>>2];g[Cd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*59<<2)>>2];g[Dd>>2]=+g[Bd>>2]-+g[Cd>>2];g[Jm>>2]=+g[Bd>>2]+ +g[Cd>>2];g[_>>2]=+g[W>>2]+ +g[Z>>2];g[Sq>>2]=+g[Lm>>2]+ +g[Mm>>2];g[Tq>>2]=+g[hm>>2]+ +g[Jm>>2];g[Ad>>2]=+g[W>>2]-+g[Z>>2];g[Hd>>2]=+g[Dd>>2]-+g[Gd>>2];g[Km>>2]=+g[hm>>2]-+g[Jm>>2];g[Nm>>2]=+g[Lm>>2]-+g[Mm>>2];g[Hg>>2]=+g[Dd>>2]+ +g[Gd>>2];g[M>>2]=+g[E>>2]+ +g[L>>2];g[Aa>>2]=+g[T>>2]+ +g[_>>2];g[Cg>>2]=+g[M>>2]-+g[Aa>>2];g[Fg>>2]=+g[Dg>>2]+ +g[Eg>>2];g[Ig>>2]=+g[Gg>>2]+ +g[Hg>>2];g[Jg>>2]=+g[Fg>>2]-+g[Ig>>2];g[jc>>2]=+g[ya>>2]-+g[D>>2];g[qc>>2]=+g[mc>>2]-+g[pc>>2];g[Sc>>2]=+g[jc>>2]+ +g[qc>>2];g[lf>>2]=+g[jc>>2]-+g[qc>>2];g[Rq>>2]=+g[Pq>>2]*.3826834261417389-+g[Qq>>2]*.9238795042037964;g[tr>>2]=+g[Sq>>2]*.3826834261417389-+g[Tq>>2]*.9238795042037964;g[ur>>2]=+g[Rq>>2]+ +g[tr>>2];g[Ft>>2]=+g[Rq>>2]-+g[tr>>2];g[zr>>2]=+g[Qq>>2]*.3826834261417389+ +g[Pq>>2]*.9238795042037964;g[Ar>>2]=+g[Tq>>2]*.3826834261417389+ +g[Sq>>2]*.9238795042037964;g[Br>>2]=+g[zr>>2]-+g[Ar>>2];g[Jt>>2]=+g[zr>>2]+ +g[Ar>>2];g[wr>>2]=(+g[bm>>2]-+g[em>>2])*.7071067690849304;g[xr>>2]=+g[xn>>2]+ +g[yn>>2];g[yr>>2]=+g[wr>>2]-+g[xr>>2];g[Gt>>2]=+g[xr>>2]+ +g[wr>>2];g[Ld>>2]=+g[H>>2]-+g[K>>2];g[Sd>>2]=+g[Od>>2]-+g[Rd>>2];g[Td>>2]=+g[Ld>>2]+ +g[Sd>>2];g[Ie>>2]=+g[Sd>>2]-+g[Ld>>2];g[$c>>2]=+g[Tc>>2]-+g[_c>>2];g[Id>>2]=+g[Ad>>2]+ +g[Hd>>2];g[Jd>>2]=(+g[$c>>2]+ +g[Id>>2])*.7071067690849304;g[jf>>2]=(+g[$c>>2]-+g[Id>>2])*.7071067690849304;g[Ud>>2]=+g[Tc>>2]+ +g[_c>>2];g[Vd>>2]=+g[Hd>>2]-+g[Ad>>2];g[Wd>>2]=(+g[Ud>>2]+ +g[Vd>>2])*.7071067690849304;g[mf>>2]=(+g[Vd>>2]-+g[Ud>>2])*.7071067690849304;g[_l>>2]=+g[Yl>>2]-+g[Zl>>2];g[fm>>2]=(+g[bm>>2]+ +g[em>>2])*.7071067690849304;g[gm>>2]=+g[_l>>2]-+g[fm>>2];g[uo>>2]=+g[_l>>2]+ +g[fm>>2];g[Om>>2]=+g[Km>>2]*.9238795042037964-+g[Nm>>2]*.3826834261417389;g[un>>2]=+g[Rm>>2]*.9238795042037964+ +g[tn>>2]*.3826834261417389;g[vn>>2]=+g[Om>>2]-+g[un>>2];g[so>>2]=+g[un>>2]+ +g[Om>>2];g[Mq>>2]=+g[Yl>>2]+ +g[Zl>>2];g[Nq>>2]=(+g[An>>2]+ +g[Bn>>2])*.7071067690849304;g[Oq>>2]=+g[Mq>>2]-+g[Nq>>2];g[It>>2]=+g[Mq>>2]+ +g[Nq>>2];g[Yh>>2]=+g[Dg>>2]-+g[Eg>>2];g[Zh>>2]=+g[T>>2]-+g[_>>2];g[_h>>2]=+g[Yh>>2]-+g[Zh>>2];g[Rj>>2]=+g[Zh>>2]+ +g[Yh>>2];g[$h>>2]=+g[E>>2]-+g[L>>2];g[ai>>2]=+g[Hg>>2]-+g[Gg>>2];g[bi>>2]=+g[$h>>2]-+g[ai>>2];g[Ui>>2]=+g[$h>>2]+ +g[ai>>2];g[zn>>2]=+g[xn>>2]-+g[yn>>2];g[Cn>>2]=(+g[An>>2]-+g[Bn>>2])*.7071067690849304;g[Dn>>2]=+g[zn>>2]-+g[Cn>>2];g[ro>>2]=+g[zn>>2]+ +g[Cn>>2];g[En>>2]=+g[tn>>2]*.9238795042037964-+g[Rm>>2]*.3826834261417389;g[Fn>>2]=+g[Km>>2]*.3826834261417389+ +g[Nm>>2]*.9238795042037964;g[Gn>>2]=+g[En>>2]-+g[Fn>>2];g[vo>>2]=+g[En>>2]+ +g[Fn>>2];g[t>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[u>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[v>>2]=+g[t>>2]+ +g[u>>2];g[w>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[x>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[y>>2]=+g[w>>2]+ +g[x>>2];g[z>>2]=+g[v>>2]+ +g[y>>2];g[Ml>>2]=+g[w>>2]-+g[x>>2];g[jm>>2]=+g[t>>2]-+g[u>>2];g[Yb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*62<<2)>>2];g[Zb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*33<<2)>>2];g[_b>>2]=+g[Yb>>2]-+g[Zb>>2];g[$b>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*46<<2)>>2];g[ac>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*49<<2)>>2];g[bc>>2]=+g[$b>>2]-+g[ac>>2];g[sh>>2]=+g[_b>>2]+ +g[bc>>2];g[Nl>>2]=+g[Yb>>2]+ +g[Zb>>2];g[km>>2]=+g[$b>>2]+ +g[ac>>2];g[A>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[$>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[aa>>2]=+g[A>>2]+ +g[$>>2];g[mm>>2]=+g[A>>2]-+g[$>>2];g[wc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*54<<2)>>2];g[xc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*41<<2)>>2];g[yc>>2]=+g[wc>>2]-+g[xc>>2];g[nm>>2]=+g[wc>>2]+ +g[xc>>2];g[ba>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[ca>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[da>>2]=+g[ba>>2]+ +g[ca>>2];g[pm>>2]=+g[ba>>2]-+g[ca>>2];g[tc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*38<<2)>>2];g[uc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*57<<2)>>2];g[vc>>2]=+g[tc>>2]-+g[uc>>2];g[qm>>2]=+g[tc>>2]+ +g[uc>>2];g[ea>>2]=+g[aa>>2]+ +g[da>>2];g[th>>2]=+g[yc>>2]+ +g[vc>>2];g[Ql>>2]=+g[pm>>2]+ +g[qm>>2];g[Pl>>2]=+g[mm>>2]+ +g[nm>>2];g[rm>>2]=+g[pm>>2]-+g[qm>>2];g[om>>2]=+g[mm>>2]-+g[nm>>2];g[ga>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[ha>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[ia>>2]=+g[ga>>2]+ +g[ha>>2];g[Em>>2]=+g[ga>>2]-+g[ha>>2];g[Fc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*42<<2)>>2];g[Gc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*53<<2)>>2];g[Hc>>2]=+g[Fc>>2]-+g[Gc>>2];g[Fm>>2]=+g[Fc>>2]+ +g[Gc>>2];g[ja>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[ka>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[la>>2]=+g[ja>>2]+ +g[ka>>2];g[Bm>>2]=+g[ja>>2]-+g[ka>>2];g[Cc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*58<<2)>>2];g[Dc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*37<<2)>>2];g[Ec>>2]=+g[Cc>>2]-+g[Dc>>2];g[Cm>>2]=+g[Cc>>2]+ +g[Dc>>2];g[ma>>2]=+g[ia>>2]+ +g[la>>2];g[wq>>2]=+g[Em>>2]+ +g[Fm>>2];g[xq>>2]=+g[Cm>>2]-+g[Bm>>2];g[Bc>>2]=+g[ia>>2]-+g[la>>2];g[Ic>>2]=+g[Ec>>2]-+g[Hc>>2];g[Dm>>2]=+g[Bm>>2]+ +g[Cm>>2];g[Gm>>2]=+g[Em>>2]-+g[Fm>>2];g[vh>>2]=+g[Ec>>2]+ +g[Hc>>2];g[na>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[oa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[pa>>2]=+g[na>>2]+ +g[oa>>2];g[xm>>2]=+g[na>>2]-+g[oa>>2];g[Oc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*50<<2)>>2];g[Pc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*45<<2)>>2];g[Qc>>2]=+g[Oc>>2]-+g[Pc>>2];g[ym>>2]=+g[Oc>>2]+ +g[Pc>>2];g[qa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[ra>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[sa>>2]=+g[qa>>2]+ +g[ra>>2];g[um>>2]=+g[qa>>2]-+g[ra>>2];g[Lc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*34<<2)>>2];g[Mc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*61<<2)>>2];g[Nc>>2]=+g[Lc>>2]-+g[Mc>>2];g[vm>>2]=+g[Lc>>2]+ +g[Mc>>2];g[ta>>2]=+g[pa>>2]+ +g[sa>>2];g[zq>>2]=+g[xm>>2]+ +g[ym>>2];g[Aq>>2]=+g[um>>2]+ +g[vm>>2];g[Kc>>2]=+g[pa>>2]-+g[sa>>2];g[Tb>>2]=+g[Nc>>2]-+g[Qc>>2];g[wm>>2]=+g[um>>2]-+g[vm>>2];g[zm>>2]=+g[xm>>2]-+g[ym>>2];g[wh>>2]=+g[Nc>>2]+ +g[Qc>>2];g[fa>>2]=+g[z>>2]+ +g[ea>>2];g[ua>>2]=+g[ma>>2]+ +g[ta>>2];g[rh>>2]=+g[fa>>2]-+g[ua>>2];g[uh>>2]=+g[sh>>2]+ +g[th>>2];g[xh>>2]=+g[vh>>2]+ +g[wh>>2];g[yh>>2]=+g[uh>>2]-+g[xh>>2];g[sc>>2]=+g[v>>2]-+g[y>>2];g[zc>>2]=+g[vc>>2]-+g[yc>>2];g[Ac>>2]=+g[sc>>2]+ +g[zc>>2];g[sf>>2]=+g[sc>>2]-+g[zc>>2];g[yq>>2]=+g[wq>>2]*.3826834261417389-+g[xq>>2]*.9238795042037964;g[Bq>>2]=+g[zq>>2]*.3826834261417389-+g[Aq>>2]*.9238795042037964;g[Cq>>2]=+g[yq>>2]+ +g[Bq>>2];g[Ps>>2]=+g[yq>>2]-+g[Bq>>2];g[Hq>>2]=+g[xq>>2]*.3826834261417389+ +g[wq>>2]*.9238795042037964;g[Iq>>2]=+g[Aq>>2]*.3826834261417389+ +g[zq>>2]*.9238795042037964;g[Jq>>2]=+g[Hq>>2]-+g[Iq>>2];g[Ss>>2]=+g[Hq>>2]+ +g[Iq>>2];g[Eq>>2]=+g[Nl>>2]-+g[Ml>>2];g[Fq>>2]=(+g[om>>2]-+g[rm>>2])*.7071067690849304;g[Gq>>2]=+g[Eq>>2]+ +g[Fq>>2];g[Os>>2]=+g[Eq>>2]-+g[Fq>>2];g[Xb>>2]=+g[aa>>2]-+g[da>>2];g[cc>>2]=+g[_b>>2]-+g[bc>>2];g[dc>>2]=+g[Xb>>2]+ +g[cc>>2];g[pf>>2]=+g[cc>>2]-+g[Xb>>2];g[Jc>>2]=+g[Bc>>2]-+g[Ic>>2];g[Ub>>2]=+g[Kc>>2]+ +g[Tb>>2];g[Vb>>2]=(+g[Jc>>2]+ +g[Ub>>2])*.7071067690849304;g[qf>>2]=(+g[Jc>>2]-+g[Ub>>2])*.7071067690849304;g[ec>>2]=+g[Bc>>2]+ +g[Ic>>2];g[fc>>2]=+g[Tb>>2]-+g[Kc>>2];g[gc>>2]=(+g[ec>>2]+ +g[fc>>2])*.7071067690849304;g[Tf>>2]=(+g[fc>>2]-+g[ec>>2])*.7071067690849304;g[lm>>2]=+g[jm>>2]-+g[km>>2];g[sm>>2]=(+g[om>>2]+ +g[rm>>2])*.7071067690849304;g[tm>>2]=+g[lm>>2]-+g[sm>>2];g[Bo>>2]=+g[lm>>2]+ +g[sm>>2];g[Am>>2]=+g[wm>>2]*.9238795042037964-+g[zm>>2]*.3826834261417389;g[Hm>>2]=+g[Dm>>2]*.9238795042037964+ +g[Gm>>2]*.3826834261417389;g[Kl>>2]=+g[Am>>2]-+g[Hm>>2];g[zo>>2]=+g[Hm>>2]+ +g[Am>>2];g[rr>>2]=+g[jm>>2]+ +g[km>>2];g[uq>>2]=(+g[Pl>>2]+ +g[Ql>>2])*.7071067690849304;g[vq>>2]=+g[rr>>2]-+g[uq>>2];g[Rs>>2]=+g[rr>>2]+ +g[uq>>2];g[di>>2]=+g[sh>>2]-+g[th>>2];g[ei>>2]=+g[ma>>2]-+g[ta>>2];g[fi>>2]=+g[di>>2]-+g[ei>>2];g[Wi>>2]=+g[ei>>2]+ +g[di>>2];g[gi>>2]=+g[z>>2]-+g[ea>>2];g[hi>>2]=+g[wh>>2]-+g[vh>>2];g[ii>>2]=+g[gi>>2]-+g[hi>>2];g[Xi>>2]=+g[gi>>2]+ +g[hi>>2];g[Ol>>2]=+g[Ml>>2]+ +g[Nl>>2];g[Rl>>2]=(+g[Pl>>2]-+g[Ql>>2])*.7071067690849304;g[Sl>>2]=+g[Ol>>2]-+g[Rl>>2];g[yo>>2]=+g[Ol>>2]+ +g[Rl>>2];g[Tl>>2]=+g[Gm>>2]*.9238795042037964-+g[Dm>>2]*.3826834261417389;g[Ul>>2]=+g[wm>>2]*.3826834261417389+ +g[zm>>2]*.9238795042037964;g[Vl>>2]=+g[Tl>>2]-+g[Ul>>2];g[bp>>2]=+g[Tl>>2]+ +g[Ul>>2];g[Su>>2]=+g[jq>>2]+ +g[Ru>>2];g[r>>2]=+g[ak>>2]+ +g[pk>>2];g[s>>2]=+g[Su>>2]+ +g[r>>2];g[zi>>2]=+g[Su>>2]-+g[r>>2];g[ui>>2]=+g[uh>>2]+ +g[xh>>2];g[vi>>2]=+g[Fg>>2]+ +g[Ig>>2];g[wi>>2]=+g[ui>>2]+ +g[vi>>2];g[Ai>>2]=+g[vi>>2]-+g[ui>>2];g[va>>2]=+g[fa>>2]+ +g[ua>>2];g[Ba>>2]=+g[M>>2]+ +g[Aa>>2];g[Ca>>2]=+g[va>>2]+ +g[Ba>>2];g[Ei>>2]=+g[va>>2]-+g[Ba>>2];g[ri>>2]=+g[Rg>>2]+ +g[Ug>>2];g[si>>2]=+g[oh>>2]+ +g[lh>>2];g[ti>>2]=+g[ri>>2]+ +g[si>>2];g[Di>>2]=+g[ri>>2]-+g[si>>2];g[c[k>>2]>>2]=+g[s>>2]+ +g[Ca>>2];g[c[l>>2]>>2]=+g[ti>>2]+ +g[wi>>2];g[pi>>2]=+g[s>>2]-+g[Ca>>2];g[xi>>2]=+g[ti>>2]-+g[wi>>2];g[oi>>2]=+g[(c[m>>2]|0)+248>>2];g[qi>>2]=+g[(c[m>>2]|0)+252>>2];g[(c[k>>2]|0)+(c[n>>2]<<5<<2)>>2]=+g[oi>>2]*+g[pi>>2]-+g[qi>>2]*+g[xi>>2];g[(c[l>>2]|0)+(c[n>>2]<<5<<2)>>2]=+g[qi>>2]*+g[pi>>2]+ +g[oi>>2]*+g[xi>>2];g[Bi>>2]=+g[zi>>2]-+g[Ai>>2];g[Fi>>2]=+g[Di>>2]-+g[Ei>>2];g[yi>>2]=+g[(c[m>>2]|0)+376>>2];g[Ci>>2]=+g[(c[m>>2]|0)+380>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*48<<2)>>2]=+g[yi>>2]*+g[Bi>>2]-+g[Ci>>2]*+g[Fi>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*48<<2)>>2]=+g[yi>>2]*+g[Fi>>2]+ +g[Ci>>2]*+g[Bi>>2];g[Hi>>2]=+g[zi>>2]+ +g[Ai>>2];g[Lh>>2]=+g[Ei>>2]+ +g[Di>>2];g[Gi>>2]=+g[(c[m>>2]|0)+120>>2];g[Ii>>2]=+g[(c[m>>2]|0)+124>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Gi>>2]*+g[Hi>>2]-+g[Ii>>2]*+g[Lh>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Gi>>2]*+g[Lh>>2]+ +g[Ii>>2]*+g[Hi>>2];g[Oj>>2]=+g[Nh>>2]+ +g[Oh>>2];g[Pj>>2]=(+g[Ri>>2]+ +g[Si>>2])*.7071067690849304;g[Qj>>2]=+g[Oj>>2]-+g[Pj>>2];g[mj>>2]=+g[Oj>>2]+ +g[Pj>>2];g[dj>>2]=+g[Xi>>2]*.9238795042037964-+g[Wi>>2]*.3826834261417389;g[ej>>2]=+g[Rj>>2]*.3826834261417389+ +g[Ui>>2]*.9238795042037964;g[fj>>2]=+g[dj>>2]-+g[ej>>2];g[nj>>2]=+g[dj>>2]+ +g[ej>>2];g[Vi>>2]=+g[Rj>>2]*.9238795042037964-+g[Ui>>2]*.3826834261417389;g[Yi>>2]=+g[Wi>>2]*.9238795042037964+ +g[Xi>>2]*.3826834261417389;g[Zi>>2]=+g[Vi>>2]-+g[Yi>>2];g[rj>>2]=+g[Yi>>2]+ +g[Vi>>2];g[aj>>2]=+g[Pi>>2]+ +g[Oi>>2];g[bj>>2]=(+g[Vh>>2]+ +g[Sh>>2])*.7071067690849304;g[cj>>2]=+g[aj>>2]-+g[bj>>2];g[qj>>2]=+g[aj>>2]+ +g[bj>>2];g[_i>>2]=+g[Qj>>2]-+g[Zi>>2];g[gj>>2]=+g[cj>>2]-+g[fj>>2];g[Nj>>2]=+g[(c[m>>2]|0)+408>>2];g[$i>>2]=+g[(c[m>>2]|0)+412>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*52<<2)>>2]=+g[Nj>>2]*+g[_i>>2]-+g[$i>>2]*+g[gj>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*52<<2)>>2]=+g[$i>>2]*+g[_i>>2]+ +g[Nj>>2]*+g[gj>>2];g[tk>>2]=+g[mj>>2]+ +g[nj>>2];g[vk>>2]=+g[qj>>2]+ +g[rj>>2];g[sk>>2]=+g[(c[m>>2]|0)+24>>2];g[uk>>2]=+g[(c[m>>2]|0)+28>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[sk>>2]*+g[tk>>2]-+g[uk>>2]*+g[vk>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[sk>>2]*+g[vk>>2]+ +g[uk>>2]*+g[tk>>2];g[ij>>2]=+g[Qj>>2]+ +g[Zi>>2];g[kj>>2]=+g[cj>>2]+ +g[fj>>2];g[hj>>2]=+g[(c[m>>2]|0)+152>>2];g[jj>>2]=+g[(c[m>>2]|0)+156>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[hj>>2]*+g[ij>>2]-+g[jj>>2]*+g[kj>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[jj>>2]*+g[ij>>2]+ +g[hj>>2]*+g[kj>>2];g[oj>>2]=+g[mj>>2]-+g[nj>>2];g[rk>>2]=+g[qj>>2]-+g[rj>>2];g[lj>>2]=+g[(c[m>>2]|0)+280>>2];g[pj>>2]=+g[(c[m>>2]|0)+284>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*36<<2)>>2]=+g[lj>>2]*+g[oj>>2]-+g[pj>>2]*+g[rk>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*36<<2)>>2]=+g[lj>>2]*+g[rk>>2]+ +g[pj>>2]*+g[oj>>2];g[Ph>>2]=+g[Nh>>2]-+g[Oh>>2];g[Wh>>2]=(+g[Sh>>2]-+g[Vh>>2])*.7071067690849304;g[Xh>>2]=+g[Ph>>2]-+g[Wh>>2];g[Cj>>2]=+g[Ph>>2]+ +g[Wh>>2];g[tj>>2]=+g[ii>>2]*.3826834261417389-+g[fi>>2]*.9238795042037964;g[uj>>2]=+g[_h>>2]*.9238795042037964+ +g[bi>>2]*.3826834261417389;g[vj>>2]=+g[tj>>2]-+g[uj>>2];g[Dj>>2]=+g[tj>>2]+ +g[uj>>2];g[ci>>2]=+g[_h>>2]*.3826834261417389-+g[bi>>2]*.9238795042037964;g[Ki>>2]=+g[fi>>2]*.3826834261417389+ +g[ii>>2]*.9238795042037964;g[Li>>2]=+g[ci>>2]-+g[Ki>>2];g[Hj>>2]=+g[Ki>>2]+ +g[ci>>2];g[Qi>>2]=+g[Oi>>2]-+g[Pi>>2];g[Ti>>2]=(+g[Ri>>2]-+g[Si>>2])*.7071067690849304;g[sj>>2]=+g[Qi>>2]-+g[Ti>>2];g[Gj>>2]=+g[Qi>>2]+ +g[Ti>>2];g[Mi>>2]=+g[Xh>>2]-+g[Li>>2];g[wj>>2]=+g[sj>>2]-+g[vj>>2];g[Mh>>2]=+g[(c[m>>2]|0)+472>>2];g[Ni>>2]=+g[(c[m>>2]|0)+476>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*60<<2)>>2]=+g[Mh>>2]*+g[Mi>>2]-+g[Ni>>2]*+g[wj>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*60<<2)>>2]=+g[Ni>>2]*+g[Mi>>2]+ +g[Mh>>2]*+g[wj>>2];g[Kj>>2]=+g[Cj>>2]+ +g[Dj>>2];g[Mj>>2]=+g[Gj>>2]+ +g[Hj>>2];g[Jj>>2]=+g[(c[m>>2]|0)+88>>2];g[Lj>>2]=+g[(c[m>>2]|0)+92>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Jj>>2]*+g[Kj>>2]-+g[Lj>>2]*+g[Mj>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Jj>>2]*+g[Mj>>2]+ +g[Lj>>2]*+g[Kj>>2];g[yj>>2]=+g[Xh>>2]+ +g[Li>>2];g[Aj>>2]=+g[sj>>2]+ +g[vj>>2];g[xj>>2]=+g[(c[m>>2]|0)+216>>2];g[zj>>2]=+g[(c[m>>2]|0)+220>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[xj>>2]*+g[yj>>2]-+g[zj>>2]*+g[Aj>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[zj>>2]*+g[yj>>2]+ +g[xj>>2]*+g[Aj>>2];g[Ej>>2]=+g[Cj>>2]-+g[Dj>>2];g[Ij>>2]=+g[Gj>>2]-+g[Hj>>2];g[Bj>>2]=+g[(c[m>>2]|0)+344>>2];g[Fj>>2]=+g[(c[m>>2]|0)+348>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*44<<2)>>2]=+g[Bj>>2]*+g[Ej>>2]-+g[Fj>>2]*+g[Ij>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*44<<2)>>2]=+g[Bj>>2]*+g[Ij>>2]+ +g[Fj>>2]*+g[Ej>>2];g[ih>>2]=+g[jq>>2]-+g[Ru>>2];g[ph>>2]=+g[lh>>2]-+g[oh>>2];g[qh>>2]=+g[ih>>2]+ +g[ph>>2];g[Fh>>2]=+g[ih>>2]-+g[ph>>2];g[Xg>>2]=+g[rh>>2]+ +g[yh>>2];g[Yg>>2]=+g[Jg>>2]-+g[Cg>>2];g[Zg>>2]=(+g[Xg>>2]+ +g[Yg>>2])*.7071067690849304;g[Gh>>2]=(+g[Yg>>2]-+g[Xg>>2])*.7071067690849304;g[zh>>2]=+g[rh>>2]-+g[yh>>2];g[Kg>>2]=+g[Cg>>2]+ +g[Jg>>2];g[Lg>>2]=(+g[zh>>2]+ +g[Kg>>2])*.7071067690849304;g[Kh>>2]=(+g[zh>>2]-+g[Kg>>2])*.7071067690849304;g[Og>>2]=+g[ak>>2]-+g[pk>>2];g[Vg>>2]=+g[Rg>>2]-+g[Ug>>2];g[Wg>>2]=+g[Og>>2]+ +g[Vg>>2];g[Jh>>2]=+g[Vg>>2]-+g[Og>>2];g[Mg>>2]=+g[qh>>2]-+g[Lg>>2];g[_g>>2]=+g[Wg>>2]-+g[Zg>>2];g[hh>>2]=+g[(c[m>>2]|0)+312>>2];g[Ng>>2]=+g[(c[m>>2]|0)+316>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*40<<2)>>2]=+g[hh>>2]*+g[Mg>>2]-+g[Ng>>2]*+g[_g>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*40<<2)>>2]=+g[Ng>>2]*+g[Mg>>2]+ +g[hh>>2]*+g[_g>>2];g[li>>2]=+g[Fh>>2]+ +g[Gh>>2];g[ni>>2]=+g[Jh>>2]+ +g[Kh>>2];g[ki>>2]=+g[(c[m>>2]|0)+184>>2];g[mi>>2]=+g[(c[m>>2]|0)+188>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[ki>>2]*+g[li>>2]-+g[mi>>2]*+g[ni>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[ki>>2]*+g[ni>>2]+ +g[mi>>2]*+g[li>>2];g[Bh>>2]=+g[qh>>2]+ +g[Lg>>2];g[Dh>>2]=+g[Wg>>2]+ +g[Zg>>2];g[$g>>2]=+g[(c[m>>2]|0)+56>>2];g[Ch>>2]=+g[(c[m>>2]|0)+60>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[$g>>2]*+g[Bh>>2]-+g[Ch>>2]*+g[Dh>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Ch>>2]*+g[Bh>>2]+ +g[$g>>2]*+g[Dh>>2];g[Hh>>2]=+g[Fh>>2]-+g[Gh>>2];g[ji>>2]=+g[Jh>>2]-+g[Kh>>2];g[Eh>>2]=+g[(c[m>>2]|0)+440>>2];g[Ih>>2]=+g[(c[m>>2]|0)+444>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*56<<2)>>2]=+g[Eh>>2]*+g[Hh>>2]-+g[Ih>>2]*+g[ji>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*56<<2)>>2]=+g[Eh>>2]*+g[ji>>2]+ +g[Ih>>2]*+g[Hh>>2];g[Bf>>2]=+g[xe>>2]+ +g[ye>>2];g[Cf>>2]=+g[ag>>2]+ +g[bg>>2];g[Df>>2]=+g[Bf>>2]-+g[Cf>>2];g[yg>>2]=+g[Bf>>2]+ +g[Cf>>2];g[Nf>>2]=+g[Zf>>2]+ +g[_f>>2];g[Of>>2]=+g[Fe>>2]+ +g[Ce>>2];g[Pf>>2]=+g[Nf>>2]-+g[Of>>2];g[ah>>2]=+g[Nf>>2]+ +g[Of>>2];g[Ef>>2]=+g[Ie>>2]+ +g[jf>>2];g[Ff>>2]=+g[lf>>2]+ +g[mf>>2];g[Gf>>2]=+g[Ef>>2]*.8314695954322815-+g[Ff>>2]*.5555702447891235;g[Rf>>2]=+g[Ef>>2]*.5555702447891235+ +g[Ff>>2]*.8314695954322815;g[Hf>>2]=+g[pf>>2]+ +g[qf>>2];g[If>>2]=+g[sf>>2]+ +g[Tf>>2];g[Jf>>2]=+g[Hf>>2]*.8314695954322815+ +g[If>>2]*.5555702447891235;g[Qf>>2]=+g[If>>2]*.8314695954322815-+g[Hf>>2]*.5555702447891235;g[Kf>>2]=+g[Gf>>2]-+g[Jf>>2];g[bh>>2]=+g[Jf>>2]+ +g[Gf>>2];g[Sf>>2]=+g[Qf>>2]-+g[Rf>>2];g[zg>>2]=+g[Qf>>2]+ +g[Rf>>2];g[Lf>>2]=+g[Df>>2]-+g[Kf>>2];g[sg>>2]=+g[Pf>>2]-+g[Sf>>2];g[Af>>2]=+g[(c[m>>2]|0)+424>>2];g[Mf>>2]=+g[(c[m>>2]|0)+428>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*54<<2)>>2]=+g[Af>>2]*+g[Lf>>2]-+g[Mf>>2]*+g[sg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*54<<2)>>2]=+g[Mf>>2]*+g[Lf>>2]+ +g[Af>>2]*+g[sg>>2];g[eh>>2]=+g[yg>>2]+ +g[zg>>2];g[gh>>2]=+g[ah>>2]+ +g[bh>>2];g[dh>>2]=+g[(c[m>>2]|0)+40>>2];g[fh>>2]=+g[(c[m>>2]|0)+44>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[dh>>2]*+g[eh>>2]-+g[fh>>2]*+g[gh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[dh>>2]*+g[gh>>2]+ +g[fh>>2]*+g[eh>>2];g[ug>>2]=+g[Df>>2]+ +g[Kf>>2];g[wg>>2]=+g[Pf>>2]+ +g[Sf>>2];g[tg>>2]=+g[(c[m>>2]|0)+168>>2];g[vg>>2]=+g[(c[m>>2]|0)+172>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[tg>>2]*+g[ug>>2]-+g[vg>>2]*+g[wg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[vg>>2]*+g[ug>>2]+ +g[tg>>2]*+g[wg>>2];g[Ag>>2]=+g[yg>>2]-+g[zg>>2];g[ch>>2]=+g[ah>>2]-+g[bh>>2];g[xg>>2]=+g[(c[m>>2]|0)+296>>2];g[Bg>>2]=+g[(c[m>>2]|0)+300>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*38<<2)>>2]=+g[xg>>2]*+g[Ag>>2]-+g[Bg>>2]*+g[ch>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*38<<2)>>2]=+g[xg>>2]*+g[ch>>2]+ +g[Bg>>2]*+g[Ag>>2];g[Eb>>2]=+g[kb>>2]-+g[Db>>2];g[Sb>>2]=+g[Za>>2]-+g[Rb>>2];g[rc>>2]=+g[Eb>>2]+ +g[Sb>>2];g[be>>2]=+g[Eb>>2]-+g[Sb>>2];g[od>>2]=+g[kd>>2]-+g[nd>>2];g[rd>>2]=+g[pd>>2]-+g[qd>>2];g[sd>>2]=+g[od>>2]+ +g[rd>>2];g[fe>>2]=+g[od>>2]-+g[rd>>2];g[Wb>>2]=+g[Ac>>2]-+g[Vb>>2];g[hc>>2]=+g[dc>>2]-+g[gc>>2];g[ic>>2]=+g[Wb>>2]*.5555702447891235-+g[hc>>2]*.8314695954322815;g[td>>2]=+g[Wb>>2]*.8314695954322815+ +g[hc>>2]*.5555702447891235;g[Kd>>2]=+g[Sc>>2]-+g[Jd>>2];g[Xd>>2]=+g[Td>>2]-+g[Wd>>2];g[Yd>>2]=+g[Kd>>2]*.5555702447891235+ +g[Xd>>2]*.8314695954322815;g[ud>>2]=+g[Xd>>2]*.5555702447891235-+g[Kd>>2]*.8314695954322815;g[Zd>>2]=+g[ic>>2]+ +g[Yd>>2];g[ge>>2]=+g[ic>>2]-+g[Yd>>2];g[vd>>2]=+g[td>>2]+ +g[ud>>2];g[ce>>2]=+g[ud>>2]-+g[td>>2];g[ad>>2]=+g[rc>>2]-+g[Zd>>2];g[wd>>2]=+g[sd>>2]-+g[vd>>2];g[Da>>2]=+g[(c[m>>2]|0)+328>>2];g[bd>>2]=+g[(c[m>>2]|0)+332>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*42<<2)>>2]=+g[Da>>2]*+g[ad>>2]-+g[bd>>2]*+g[wd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*42<<2)>>2]=+g[bd>>2]*+g[ad>>2]+ +g[Da>>2]*+g[wd>>2];g[Je>>2]=+g[be>>2]+ +g[ce>>2];g[Le>>2]=+g[fe>>2]+ +g[ge>>2];g[ie>>2]=+g[(c[m>>2]|0)+200>>2];g[Ke>>2]=+g[(c[m>>2]|0)+204>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[ie>>2]*+g[Je>>2]-+g[Ke>>2]*+g[Le>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[ie>>2]*+g[Le>>2]+ +g[Ke>>2]*+g[Je>>2];g[yd>>2]=+g[rc>>2]+ +g[Zd>>2];g[$d>>2]=+g[sd>>2]+ +g[vd>>2];g[xd>>2]=+g[(c[m>>2]|0)+72>>2];g[zd>>2]=+g[(c[m>>2]|0)+76>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[xd>>2]*+g[yd>>2]-+g[zd>>2]*+g[$d>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[zd>>2]*+g[yd>>2]+ +g[xd>>2]*+g[$d>>2];g[de>>2]=+g[be>>2]-+g[ce>>2];g[he>>2]=+g[fe>>2]-+g[ge>>2];g[ae>>2]=+g[(c[m>>2]|0)+456>>2];g[ee>>2]=+g[(c[m>>2]|0)+460>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*58<<2)>>2]=+g[ae>>2]*+g[de>>2]-+g[ee>>2]*+g[he>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*58<<2)>>2]=+g[ae>>2]*+g[he>>2]+ +g[ee>>2]*+g[de>>2];g[Ne>>2]=+g[kb>>2]+ +g[Db>>2];g[Oe>>2]=+g[pd>>2]+ +g[qd>>2];g[Pe>>2]=+g[Ne>>2]+ +g[Oe>>2];g[le>>2]=+g[Ne>>2]-+g[Oe>>2];g[Ze>>2]=+g[kd>>2]+ +g[nd>>2];g[_e>>2]=+g[Rb>>2]+ +g[Za>>2];g[$e>>2]=+g[Ze>>2]+ +g[_e>>2];g[pe>>2]=+g[Ze>>2]-+g[_e>>2];g[Qe>>2]=+g[Ac>>2]+ +g[Vb>>2];g[Re>>2]=+g[dc>>2]+ +g[gc>>2];g[Se>>2]=+g[Qe>>2]*.9807852506637573-+g[Re>>2]*.19509032368659973;g[af>>2]=+g[Qe>>2]*.19509032368659973+ +g[Re>>2]*.9807852506637573;g[Te>>2]=+g[Sc>>2]+ +g[Jd>>2];g[Ue>>2]=+g[Td>>2]+ +g[Wd>>2];g[Ve>>2]=+g[Te>>2]*.9807852506637573+ +g[Ue>>2]*.19509032368659973;g[bf>>2]=+g[Ue>>2]*.9807852506637573-+g[Te>>2]*.19509032368659973;g[We>>2]=+g[Se>>2]+ +g[Ve>>2];g[qe>>2]=+g[Se>>2]-+g[Ve>>2];g[cf>>2]=+g[af>>2]+ +g[bf>>2];g[me>>2]=+g[bf>>2]-+g[af>>2];g[Xe>>2]=+g[Pe>>2]-+g[We>>2];g[df>>2]=+g[$e>>2]-+g[cf>>2];g[Me>>2]=+g[(c[m>>2]|0)+264>>2];g[Ye>>2]=+g[(c[m>>2]|0)+268>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*34<<2)>>2]=+g[Me>>2]*+g[Xe>>2]-+g[Ye>>2]*+g[df>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*34<<2)>>2]=+g[Ye>>2]*+g[Xe>>2]+ +g[Me>>2]*+g[df>>2];g[te>>2]=+g[le>>2]+ +g[me>>2];g[ve>>2]=+g[pe>>2]+ +g[qe>>2];g[se>>2]=+g[(c[m>>2]|0)+136>>2];g[ue>>2]=+g[(c[m>>2]|0)+140>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[se>>2]*+g[te>>2]-+g[ue>>2]*+g[ve>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[se>>2]*+g[ve>>2]+ +g[ue>>2]*+g[te>>2];g[ff>>2]=+g[Pe>>2]+ +g[We>>2];g[je>>2]=+g[$e>>2]+ +g[cf>>2];g[ef>>2]=+g[(c[m>>2]|0)+8>>2];g[gf>>2]=+g[(c[m>>2]|0)+12>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ef>>2]*+g[ff>>2]-+g[gf>>2]*+g[je>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[gf>>2]*+g[ff>>2]+ +g[ef>>2]*+g[je>>2];g[ne>>2]=+g[le>>2]-+g[me>>2];g[re>>2]=+g[pe>>2]-+g[qe>>2];g[ke>>2]=+g[(c[m>>2]|0)+392>>2];g[oe>>2]=+g[(c[m>>2]|0)+396>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*50<<2)>>2]=+g[ke>>2]*+g[ne>>2]-+g[oe>>2]*+g[re>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*50<<2)>>2]=+g[ke>>2]*+g[re>>2]+ +g[oe>>2]*+g[ne>>2];g[ze>>2]=+g[xe>>2]-+g[ye>>2];g[Ge>>2]=+g[Ce>>2]-+g[Fe>>2];g[He>>2]=+g[ze>>2]-+g[Ge>>2];g[ng>>2]=+g[ze>>2]+ +g[Ge>>2];g[$f>>2]=+g[Zf>>2]-+g[_f>>2];g[cg>>2]=+g[ag>>2]-+g[bg>>2];g[dg>>2]=+g[$f>>2]-+g[cg>>2];g[tf>>2]=+g[$f>>2]+ +g[cg>>2];g[kf>>2]=+g[Ie>>2]-+g[jf>>2];g[nf>>2]=+g[lf>>2]-+g[mf>>2];g[of>>2]=+g[kf>>2]*.19509032368659973-+g[nf>>2]*.9807852506637573;g[fg>>2]=+g[kf>>2]*.9807852506637573+ +g[nf>>2]*.19509032368659973;g[rf>>2]=+g[pf>>2]-+g[qf>>2];g[Uf>>2]=+g[sf>>2]-+g[Tf>>2];g[Vf>>2]=+g[rf>>2]*.19509032368659973+ +g[Uf>>2]*.9807852506637573;g[eg>>2]=+g[Uf>>2]*.19509032368659973-+g[rf>>2]*.9807852506637573;g[Wf>>2]=+g[of>>2]-+g[Vf>>2];g[uf>>2]=+g[Vf>>2]+ +g[of>>2];g[gg>>2]=+g[eg>>2]-+g[fg>>2];g[og>>2]=+g[eg>>2]+ +g[fg>>2];g[Xf>>2]=+g[He>>2]-+g[Wf>>2];g[hg>>2]=+g[dg>>2]-+g[gg>>2];g[we>>2]=+g[(c[m>>2]|0)+488>>2];g[Yf>>2]=+g[(c[m>>2]|0)+492>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*62<<2)>>2]=+g[we>>2]*+g[Xf>>2]-+g[Yf>>2]*+g[hg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*62<<2)>>2]=+g[Yf>>2]*+g[Xf>>2]+ +g[we>>2]*+g[hg>>2];g[xf>>2]=+g[ng>>2]+ +g[og>>2];g[zf>>2]=+g[tf>>2]+ +g[uf>>2];g[wf>>2]=+g[(c[m>>2]|0)+104>>2];g[yf>>2]=+g[(c[m>>2]|0)+108>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[wf>>2]*+g[xf>>2]-+g[yf>>2]*+g[zf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[wf>>2]*+g[zf>>2]+ +g[yf>>2]*+g[xf>>2];g[jg>>2]=+g[He>>2]+ +g[Wf>>2];g[lg>>2]=+g[dg>>2]+ +g[gg>>2];g[ig>>2]=+g[(c[m>>2]|0)+232>>2];g[kg>>2]=+g[(c[m>>2]|0)+236>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[ig>>2]*+g[jg>>2]-+g[kg>>2]*+g[lg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[kg>>2]*+g[jg>>2]+ +g[ig>>2]*+g[lg>>2];g[pg>>2]=+g[ng>>2]-+g[og>>2];g[vf>>2]=+g[tf>>2]-+g[uf>>2];g[mg>>2]=+g[(c[m>>2]|0)+360>>2];g[qg>>2]=+g[(c[m>>2]|0)+364>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*46<<2)>>2]=+g[mg>>2]*+g[pg>>2]-+g[qg>>2]*+g[vf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*46<<2)>>2]=+g[mg>>2]*+g[vf>>2]+ +g[qg>>2]*+g[pg>>2];g[vl>>2]=+g[fl>>2]-+g[ul>>2];g[Io>>2]=+g[Tm>>2]+ +g[Wm>>2];g[Qk>>2]=+g[Ik>>2]*.5555702447891235-+g[Pk>>2]*.8314695954322815;g[Il>>2]=+g[Al>>2]*.8314695954322815+ +g[Hl>>2]*.5555702447891235;g[Jl>>2]=+g[Qk>>2]-+g[Il>>2];g[Jo>>2]=+g[Il>>2]+ +g[Qk>>2];g[im>>2]=+g[vl>>2]+ +g[Jl>>2];g[Yo>>2]=+g[Io>>2]-+g[Jo>>2];g[kn>>2]=+g[vl>>2]-+g[Jl>>2];g[Ko>>2]=+g[Io>>2]+ +g[Jo>>2];g[Xm>>2]=+g[Tm>>2]-+g[Wm>>2];g[Xn>>2]=+g[fl>>2]+ +g[ul>>2];g[Ym>>2]=+g[Al>>2]*.5555702447891235-+g[Hl>>2]*.8314695954322815;g[Zm>>2]=+g[Pk>>2]*.5555702447891235+ +g[Ik>>2]*.8314695954322815;g[_m>>2]=+g[Ym>>2]-+g[Zm>>2];g[Yn>>2]=+g[Ym>>2]+ +g[Zm>>2];g[$m>>2]=+g[Xm>>2]+ +g[_m>>2];g[Uo>>2]=+g[Xn>>2]-+g[Yn>>2];g[on>>2]=+g[Xm>>2]-+g[_m>>2];g[Zn>>2]=+g[Xn>>2]+ +g[Yn>>2];g[Ll>>2]=+g[tm>>2]-+g[Kl>>2];g[Wl>>2]=+g[Sl>>2]-+g[Vl>>2];g[Xl>>2]=+g[Ll>>2]*.290284663438797-+g[Wl>>2]*.9569403529167175;g[an>>2]=+g[Ll>>2]*.9569403529167175+ +g[Wl>>2]*.290284663438797;g[wn>>2]=+g[gm>>2]-+g[vn>>2];g[Hn>>2]=+g[Dn>>2]-+g[Gn>>2];g[In>>2]=+g[wn>>2]*.290284663438797+ +g[Hn>>2]*.9569403529167175;g[bn>>2]=+g[Hn>>2]*.290284663438797-+g[wn>>2]*.9569403529167175;g[Jn>>2]=+g[Xl>>2]+ +g[In>>2];g[pn>>2]=+g[Xl>>2]-+g[In>>2];g[cn>>2]=+g[an>>2]+ +g[bn>>2];g[ln>>2]=+g[bn>>2]-+g[an>>2];g[_n>>2]=+g[tm>>2]+ +g[Kl>>2];g[$n>>2]=+g[Sl>>2]+ +g[Vl>>2];g[ao>>2]=+g[_n>>2]*.8819212913513184-+g[$n>>2]*.4713967442512512;g[Lo>>2]=+g[_n>>2]*.4713967442512512+ +g[$n>>2]*.8819212913513184;g[Co>>2]=+g[gm>>2]+ +g[vn>>2];g[Do>>2]=+g[Dn>>2]+ +g[Gn>>2];g[Eo>>2]=+g[Co>>2]*.8819212913513184+ +g[Do>>2]*.4713967442512512;g[Mo>>2]=+g[Do>>2]*.8819212913513184-+g[Co>>2]*.4713967442512512;g[Fo>>2]=+g[ao>>2]+ +g[Eo>>2];g[Zo>>2]=+g[ao>>2]-+g[Eo>>2];g[No>>2]=+g[Lo>>2]+ +g[Mo>>2];g[Vo>>2]=+g[Mo>>2]-+g[Lo>>2];g[Kn>>2]=+g[im>>2]-+g[Jn>>2];g[dn>>2]=+g[$m>>2]-+g[cn>>2];g[wk>>2]=+g[(c[m>>2]|0)+352>>2];g[Ln>>2]=+g[(c[m>>2]|0)+356>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*45<<2)>>2]=+g[wk>>2]*+g[Kn>>2]-+g[Ln>>2]*+g[dn>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*45<<2)>>2]=+g[Ln>>2]*+g[Kn>>2]+ +g[wk>>2]*+g[dn>>2];g[Wo>>2]=+g[Uo>>2]-+g[Vo>>2];g[_o>>2]=+g[Yo>>2]-+g[Zo>>2];g[To>>2]=+g[(c[m>>2]|0)+416>>2];g[Xo>>2]=+g[(c[m>>2]|0)+420>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*53<<2)>>2]=+g[To>>2]*+g[Wo>>2]-+g[Xo>>2]*+g[_o>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*53<<2)>>2]=+g[To>>2]*+g[_o>>2]+ +g[Xo>>2]*+g[Wo>>2];g[bo>>2]=+g[Uo>>2]+ +g[Vo>>2];g[eo>>2]=+g[Yo>>2]+ +g[Zo>>2];g[$o>>2]=+g[(c[m>>2]|0)+160>>2];g[co>>2]=+g[(c[m>>2]|0)+164>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[$o>>2]*+g[bo>>2]-+g[co>>2]*+g[eo>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[$o>>2]*+g[eo>>2]+ +g[co>>2]*+g[bo>>2];g[fn>>2]=+g[im>>2]+ +g[Jn>>2];g[hn>>2]=+g[$m>>2]+ +g[cn>>2];g[en>>2]=+g[(c[m>>2]|0)+96>>2];g[gn>>2]=+g[(c[m>>2]|0)+100>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[en>>2]*+g[fn>>2]-+g[gn>>2]*+g[hn>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[gn>>2]*+g[fn>>2]+ +g[en>>2]*+g[hn>>2];g[mn>>2]=+g[kn>>2]-+g[ln>>2];g[qn>>2]=+g[on>>2]-+g[pn>>2];g[jn>>2]=+g[(c[m>>2]|0)+480>>2];g[nn>>2]=+g[(c[m>>2]|0)+484>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*61<<2)>>2]=+g[jn>>2]*+g[mn>>2]-+g[nn>>2]*+g[qn>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*61<<2)>>2]=+g[jn>>2]*+g[qn>>2]+ +g[nn>>2]*+g[mn>>2];g[Go>>2]=+g[Zn>>2]-+g[Fo>>2];g[Oo>>2]=+g[Ko>>2]-+g[No>>2];g[Wn>>2]=+g[(c[m>>2]|0)+288>>2];g[Ho>>2]=+g[(c[m>>2]|0)+292>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*37<<2)>>2]=+g[Wn>>2]*+g[Go>>2]-+g[Ho>>2]*+g[Oo>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*37<<2)>>2]=+g[Ho>>2]*+g[Go>>2]+ +g[Wn>>2]*+g[Oo>>2];g[Qo>>2]=+g[Zn>>2]+ +g[Fo>>2];g[So>>2]=+g[Ko>>2]+ +g[No>>2];g[Po>>2]=+g[(c[m>>2]|0)+32>>2];g[Ro>>2]=+g[(c[m>>2]|0)+36>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Po>>2]*+g[Qo>>2]-+g[Ro>>2]*+g[So>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Ro>>2]*+g[Qo>>2]+ +g[Po>>2]*+g[So>>2];g[Tn>>2]=+g[kn>>2]+ +g[ln>>2];g[Vn>>2]=+g[on>>2]+ +g[pn>>2];g[rn>>2]=+g[(c[m>>2]|0)+224>>2];g[Un>>2]=+g[(c[m>>2]|0)+228>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[rn>>2]*+g[Tn>>2]-+g[Un>>2]*+g[Vn>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[rn>>2]*+g[Vn>>2]+ +g[Un>>2]*+g[Tn>>2];g[ar>>2]=+g[Uq>>2]-+g[$q>>2];g[Zr>>2]=+g[hs>>2]+ +g[ks>>2];g[hr>>2]=+g[dr>>2]*.8314695954322815-+g[gr>>2]*.5555702447891235;g[or>>2]=+g[kr>>2]*.8314695954322815+ +g[nr>>2]*.5555702447891235;g[pr>>2]=+g[hr>>2]-+g[or>>2];g[_r>>2]=+g[or>>2]+ +g[hr>>2];g[qr>>2]=+g[ar>>2]+ +g[pr>>2];g[mt>>2]=+g[Zr>>2]-+g[_r>>2];g[zs>>2]=+g[ar>>2]-+g[pr>>2];g[$r>>2]=+g[Zr>>2]+ +g[_r>>2];g[ls>>2]=+g[hs>>2]-+g[ks>>2];g[Nr>>2]=+g[Uq>>2]+ +g[$q>>2];g[ms>>2]=+g[nr>>2]*.8314695954322815-+g[kr>>2]*.5555702447891235;g[ns>>2]=+g[dr>>2]*.5555702447891235+ +g[gr>>2]*.8314695954322815;g[os>>2]=+g[ms>>2]-+g[ns>>2];g[Or>>2]=+g[ms>>2]+ +g[ns>>2];g[ps>>2]=+g[ls>>2]+ +g[os>>2];g[Ks>>2]=+g[Nr>>2]-+g[Or>>2];g[Fr>>2]=+g[ls>>2]-+g[os>>2];g[Pr>>2]=+g[Nr>>2]+ +g[Or>>2];g[Dq>>2]=+g[vq>>2]-+g[Cq>>2];g[Kq>>2]=+g[Gq>>2]-+g[Jq>>2];g[Lq>>2]=+g[Dq>>2]*.4713967442512512-+g[Kq>>2]*.8819212913513184;g[qs>>2]=+g[Dq>>2]*.8819212913513184+ +g[Kq>>2]*.4713967442512512;g[vr>>2]=+g[Oq>>2]-+g[ur>>2];g[Cr>>2]=+g[yr>>2]-+g[Br>>2];g[bs>>2]=+g[vr>>2]*.4713967442512512+ +g[Cr>>2]*.8819212913513184;g[rs>>2]=+g[Cr>>2]*.4713967442512512-+g[vr>>2]*.8819212913513184;g[cs>>2]=+g[Lq>>2]+ +g[bs>>2];g[Gr>>2]=+g[Lq>>2]-+g[bs>>2];g[ss>>2]=+g[qs>>2]+ +g[rs>>2];g[As>>2]=+g[rs>>2]-+g[qs>>2];g[Qr>>2]=+g[vq>>2]+ +g[Cq>>2];g[Rr>>2]=+g[Gq>>2]+ +g[Jq>>2];g[Sr>>2]=+g[Qr>>2]*.9569403529167175-+g[Rr>>2]*.290284663438797;g[as>>2]=+g[Qr>>2]*.290284663438797+ +g[Rr>>2]*.9569403529167175;g[Tr>>2]=+g[Oq>>2]+ +g[ur>>2];g[Ur>>2]=+g[yr>>2]+ +g[Br>>2];g[Vr>>2]=+g[Tr>>2]*.9569403529167175+ +g[Ur>>2]*.290284663438797;g[Cs>>2]=+g[Ur>>2]*.9569403529167175-+g[Tr>>2]*.290284663438797;g[Wr>>2]=+g[Sr>>2]+ +g[Vr>>2];g[nt>>2]=+g[Sr>>2]-+g[Vr>>2];g[Ds>>2]=+g[as>>2]+ +g[Cs>>2];g[Ls>>2]=+g[Cs>>2]-+g[as>>2];g[ds>>2]=+g[qr>>2]-+g[cs>>2];g[ts>>2]=+g[ps>>2]-+g[ss>>2];g[rq>>2]=+g[(c[m>>2]|0)+336>>2];g[es>>2]=+g[(c[m>>2]|0)+340>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*43<<2)>>2]=+g[rq>>2]*+g[ds>>2]-+g[es>>2]*+g[ts>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*43<<2)>>2]=+g[es>>2]*+g[ds>>2]+ +g[rq>>2]*+g[ts>>2];g[kt>>2]=+g[Ks>>2]-+g[Ls>>2];g[ot>>2]=+g[mt>>2]-+g[nt>>2];g[Js>>2]=+g[(c[m>>2]|0)+400>>2];g[lt>>2]=+g[(c[m>>2]|0)+404>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*51<<2)>>2]=+g[Js>>2]*+g[kt>>2]-+g[lt>>2]*+g[ot>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*51<<2)>>2]=+g[Js>>2]*+g[ot>>2]+ +g[lt>>2]*+g[kt>>2];g[qt>>2]=+g[Ks>>2]+ +g[Ls>>2];g[st>>2]=+g[mt>>2]+ +g[nt>>2];g[pt>>2]=+g[(c[m>>2]|0)+144>>2];g[rt>>2]=+g[(c[m>>2]|0)+148>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[pt>>2]*+g[qt>>2]-+g[rt>>2]*+g[st>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[pt>>2]*+g[st>>2]+ +g[rt>>2]*+g[qt>>2];g[vs>>2]=+g[qr>>2]+ +g[cs>>2];g[xs>>2]=+g[ps>>2]+ +g[ss>>2];g[us>>2]=+g[(c[m>>2]|0)+80>>2];g[ws>>2]=+g[(c[m>>2]|0)+84>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[us>>2]*+g[vs>>2]-+g[ws>>2]*+g[xs>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[ws>>2]*+g[vs>>2]+ +g[us>>2]*+g[xs>>2];g[Dr>>2]=+g[zs>>2]-+g[As>>2];g[Hr>>2]=+g[Fr>>2]-+g[Gr>>2];g[ys>>2]=+g[(c[m>>2]|0)+464>>2];g[Er>>2]=+g[(c[m>>2]|0)+468>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*59<<2)>>2]=+g[ys>>2]*+g[Dr>>2]-+g[Er>>2]*+g[Hr>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*59<<2)>>2]=+g[ys>>2]*+g[Hr>>2]+ +g[Er>>2]*+g[Dr>>2];g[Xr>>2]=+g[Pr>>2]-+g[Wr>>2];g[Es>>2]=+g[$r>>2]-+g[Ds>>2];g[Mr>>2]=+g[(c[m>>2]|0)+272>>2];g[Yr>>2]=+g[(c[m>>2]|0)+276>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*35<<2)>>2]=+g[Mr>>2]*+g[Xr>>2]-+g[Yr>>2]*+g[Es>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*35<<2)>>2]=+g[Yr>>2]*+g[Xr>>2]+ +g[Mr>>2]*+g[Es>>2];g[Gs>>2]=+g[Pr>>2]+ +g[Wr>>2];g[Is>>2]=+g[$r>>2]+ +g[Ds>>2];g[Fs>>2]=+g[(c[m>>2]|0)+16>>2];g[Hs>>2]=+g[(c[m>>2]|0)+20>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Fs>>2]*+g[Gs>>2]-+g[Hs>>2]*+g[Is>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Hs>>2]*+g[Gs>>2]+ +g[Fs>>2]*+g[Is>>2];g[Jr>>2]=+g[zs>>2]+ +g[As>>2];g[Lr>>2]=+g[Fr>>2]+ +g[Gr>>2];g[Ir>>2]=+g[(c[m>>2]|0)+208>>2];g[Kr>>2]=+g[(c[m>>2]|0)+212>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[Ir>>2]*+g[Jr>>2]-+g[Kr>>2]*+g[Lr>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[Ir>>2]*+g[Lr>>2]+ +g[Kr>>2]*+g[Jr>>2];g[io>>2]=+g[go>>2]-+g[ho>>2];g[vp>>2]=+g[hp>>2]+ +g[ip>>2];g[lo>>2]=+g[jo>>2]*.9807852506637573-+g[ko>>2]*.19509032368659973;g[oo>>2]=+g[mo>>2]*.19509032368659973+ +g[no>>2]*.9807852506637573;g[po>>2]=+g[lo>>2]-+g[oo>>2];g[wp>>2]=+g[oo>>2]+ +g[lo>>2];g[qo>>2]=+g[io>>2]-+g[po>>2];g[kq>>2]=+g[vp>>2]+ +g[wp>>2];g[Xp>>2]=+g[io>>2]+ +g[po>>2];g[xp>>2]=+g[vp>>2]-+g[wp>>2];g[jp>>2]=+g[hp>>2]-+g[ip>>2];g[hq>>2]=+g[go>>2]+ +g[ho>>2];g[kp>>2]=+g[mo>>2]*.9807852506637573-+g[no>>2]*.19509032368659973;g[Lp>>2]=+g[ko>>2]*.9807852506637573+ +g[jo>>2]*.19509032368659973;g[Mp>>2]=+g[kp>>2]-+g[Lp>>2];g[iq>>2]=+g[kp>>2]+ +g[Lp>>2];g[Np>>2]=+g[jp>>2]-+g[Mp>>2];g[Hp>>2]=+g[hq>>2]+ +g[iq>>2];g[$p>>2]=+g[jp>>2]+ +g[Mp>>2];g[lp>>2]=+g[hq>>2]-+g[iq>>2];g[to>>2]=+g[ro>>2]-+g[so>>2];g[wo>>2]=+g[uo>>2]-+g[vo>>2];g[xo>>2]=+g[to>>2]*.6343932747840881-+g[wo>>2]*.7730104327201843;g[Pp>>2]=+g[to>>2]*.7730104327201843+ +g[wo>>2]*.6343932747840881;g[Ao>>2]=+g[yo>>2]-+g[zo>>2];g[cp>>2]=+g[Bo>>2]-+g[bp>>2];g[dp>>2]=+g[Ao>>2]*.6343932747840881+ +g[cp>>2]*.7730104327201843;g[Op>>2]=+g[cp>>2]*.6343932747840881-+g[Ao>>2]*.7730104327201843;g[ep>>2]=+g[xo>>2]-+g[dp>>2];g[aq>>2]=+g[dp>>2]+ +g[xo>>2];g[Qp>>2]=+g[Op>>2]-+g[Pp>>2];g[Yp>>2]=+g[Op>>2]+ +g[Pp>>2];g[mp>>2]=+g[ro>>2]+ +g[so>>2];g[np>>2]=+g[uo>>2]+ +g[vo>>2];g[op>>2]=+g[mp>>2]*.9951847195625305-+g[np>>2]*.0980171412229538;g[zp>>2]=+g[mp>>2]*.0980171412229538+ +g[np>>2]*.9951847195625305;g[pp>>2]=+g[yo>>2]+ +g[zo>>2];g[qp>>2]=+g[Bo>>2]+ +g[bp>>2];g[rp>>2]=+g[pp>>2]*.9951847195625305+ +g[qp>>2]*.0980171412229538;g[yp>>2]=+g[qp>>2]*.9951847195625305-+g[pp>>2]*.0980171412229538;g[sp>>2]=+g[op>>2]-+g[rp>>2];g[lq>>2]=+g[rp>>2]+ +g[op>>2];g[Ap>>2]=+g[yp>>2]-+g[zp>>2];g[Ip>>2]=+g[yp>>2]+ +g[zp>>2];g[fp>>2]=+g[qo>>2]-+g[ep>>2];g[Rp>>2]=+g[Np>>2]-+g[Qp>>2];g[fo>>2]=+g[(c[m>>2]|0)+448>>2];g[gp>>2]=+g[(c[m>>2]|0)+452>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*57<<2)>>2]=+g[fo>>2]*+g[fp>>2]-+g[gp>>2]*+g[Rp>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*57<<2)>>2]=+g[gp>>2]*+g[fp>>2]+ +g[fo>>2]*+g[Rp>>2];g[Jp>>2]=+g[Hp>>2]-+g[Ip>>2];g[mq>>2]=+g[kq>>2]-+g[lq>>2];g[Gp>>2]=+g[(c[m>>2]|0)+256>>2];g[Kp>>2]=+g[(c[m>>2]|0)+260>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*33<<2)>>2]=+g[Gp>>2]*+g[Jp>>2]-+g[Kp>>2]*+g[mq>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*33<<2)>>2]=+g[Gp>>2]*+g[mq>>2]+ +g[Kp>>2]*+g[Jp>>2];g[oq>>2]=+g[Hp>>2]+ +g[Ip>>2];g[qq>>2]=+g[kq>>2]+ +g[lq>>2];g[nq>>2]=+g[c[m>>2]>>2];g[pq>>2]=+g[(c[m>>2]|0)+4>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[nq>>2]*+g[oq>>2]-+g[pq>>2]*+g[qq>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[nq>>2]*+g[qq>>2]+ +g[pq>>2]*+g[oq>>2];g[Tp>>2]=+g[qo>>2]+ +g[ep>>2];g[Vp>>2]=+g[Np>>2]+ +g[Qp>>2];g[Sp>>2]=+g[(c[m>>2]|0)+192>>2];g[Up>>2]=+g[(c[m>>2]|0)+196>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[Sp>>2]*+g[Tp>>2]-+g[Up>>2]*+g[Vp>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[Up>>2]*+g[Tp>>2]+ +g[Sp>>2]*+g[Vp>>2];g[Zp>>2]=+g[Xp>>2]-+g[Yp>>2];g[bq>>2]=+g[$p>>2]-+g[aq>>2];g[Wp>>2]=+g[(c[m>>2]|0)+320>>2];g[_p>>2]=+g[(c[m>>2]|0)+324>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*41<<2)>>2]=+g[Wp>>2]*+g[Zp>>2]-+g[_p>>2]*+g[bq>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*41<<2)>>2]=+g[Wp>>2]*+g[bq>>2]+ +g[_p>>2]*+g[Zp>>2];g[tp>>2]=+g[lp>>2]-+g[sp>>2];g[Bp>>2]=+g[xp>>2]-+g[Ap>>2];g[gq>>2]=+g[(c[m>>2]|0)+384>>2];g[up>>2]=+g[(c[m>>2]|0)+388>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*49<<2)>>2]=+g[gq>>2]*+g[tp>>2]-+g[up>>2]*+g[Bp>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*49<<2)>>2]=+g[up>>2]*+g[tp>>2]+ +g[gq>>2]*+g[Bp>>2];g[Dp>>2]=+g[lp>>2]+ +g[sp>>2];g[Fp>>2]=+g[xp>>2]+ +g[Ap>>2];g[Cp>>2]=+g[(c[m>>2]|0)+128>>2];g[Ep>>2]=+g[(c[m>>2]|0)+132>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[Cp>>2]*+g[Dp>>2]-+g[Ep>>2]*+g[Fp>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[Ep>>2]*+g[Dp>>2]+ +g[Cp>>2]*+g[Fp>>2];g[dq>>2]=+g[Xp>>2]+ +g[Yp>>2];g[fq>>2]=+g[$p>>2]+ +g[aq>>2];g[cq>>2]=+g[(c[m>>2]|0)+64>>2];g[eq>>2]=+g[(c[m>>2]|0)+68>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[cq>>2]*+g[dq>>2]-+g[eq>>2]*+g[fq>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[cq>>2]*+g[fq>>2]+ +g[eq>>2]*+g[dq>>2];g[wt>>2]=+g[ut>>2]-+g[vt>>2];g[wu>>2]=+g[Ys>>2]-+g[Zs>>2];g[zt>>2]=+g[xt>>2]*.19509032368659973-+g[yt>>2]*.9807852506637573;g[Ct>>2]=+g[At>>2]*.19509032368659973-+g[Bt>>2]*.9807852506637573;g[Dt>>2]=+g[zt>>2]+ +g[Ct>>2];g[xu>>2]=+g[zt>>2]-+g[Ct>>2];g[Et>>2]=+g[wt>>2]-+g[Dt>>2];g[$t>>2]=+g[wu>>2]-+g[xu>>2];g[Nt>>2]=+g[wt>>2]+ +g[Dt>>2];g[yu>>2]=+g[wu>>2]+ +g[xu>>2];g[_s>>2]=+g[Ys>>2]+ +g[Zs>>2];g[ku>>2]=+g[ut>>2]+ +g[vt>>2];g[$s>>2]=+g[yt>>2]*.19509032368659973+ +g[xt>>2]*.9807852506637573;g[at>>2]=+g[Bt>>2]*.19509032368659973+ +g[At>>2]*.9807852506637573;g[bt>>2]=+g[$s>>2]-+g[at>>2];g[lu>>2]=+g[$s>>2]+ +g[at>>2];g[ct>>2]=+g[_s>>2]-+g[bt>>2];g[Xt>>2]=+g[ku>>2]+ +g[lu>>2];g[Rt>>2]=+g[_s>>2]+ +g[bt>>2];g[mu>>2]=+g[ku>>2]-+g[lu>>2];g[Ht>>2]=+g[Ft>>2]-+g[Gt>>2];g[Ms>>2]=+g[It>>2]-+g[Jt>>2];g[Ns>>2]=+g[Ht>>2]*.7730104327201843-+g[Ms>>2]*.6343932747840881;g[et>>2]=+g[Ht>>2]*.6343932747840881+ +g[Ms>>2]*.7730104327201843;g[Qs>>2]=+g[Os>>2]+ +g[Ps>>2];g[Ts>>2]=+g[Rs>>2]-+g[Ss>>2];g[Us>>2]=+g[Qs>>2]*.7730104327201843+ +g[Ts>>2]*.6343932747840881;g[dt>>2]=+g[Ts>>2]*.7730104327201843-+g[Qs>>2]*.6343932747840881;g[Vs>>2]=+g[Ns>>2]-+g[Us>>2];g[St>>2]=+g[Us>>2]+ +g[Ns>>2];g[ft>>2]=+g[dt>>2]-+g[et>>2];g[Ot>>2]=+g[dt>>2]+ +g[et>>2];g[nu>>2]=+g[Rs>>2]+ +g[Ss>>2];g[ou>>2]=+g[Os>>2]-+g[Ps>>2];g[pu>>2]=+g[nu>>2]*.0980171412229538-+g[ou>>2]*.9951847195625305;g[zu>>2]=+g[ou>>2]*.0980171412229538+ +g[nu>>2]*.9951847195625305;g[qu>>2]=+g[It>>2]+ +g[Jt>>2];g[ru>>2]=+g[Gt>>2]+ +g[Ft>>2];g[su>>2]=+g[qu>>2]*.0980171412229538-+g[ru>>2]*.9951847195625305;g[Au>>2]=+g[ru>>2]*.0980171412229538+ +g[qu>>2]*.9951847195625305;g[tu>>2]=+g[pu>>2]+ +g[su>>2];g[au>>2]=+g[pu>>2]-+g[su>>2];g[Bu>>2]=+g[zu>>2]-+g[Au>>2];g[Yt>>2]=+g[zu>>2]+ +g[Au>>2];g[Ws>>2]=+g[Et>>2]-+g[Vs>>2];g[gt>>2]=+g[ct>>2]-+g[ft>>2];g[tt>>2]=+g[(c[m>>2]|0)+432>>2];g[Xs>>2]=+g[(c[m>>2]|0)+436>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*55<<2)>>2]=+g[tt>>2]*+g[Ws>>2]-+g[Xs>>2]*+g[gt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*55<<2)>>2]=+g[Xs>>2]*+g[Ws>>2]+ +g[tt>>2]*+g[gt>>2];g[Zt>>2]=+g[Xt>>2]-+g[Yt>>2];g[bu>>2]=+g[$t>>2]+ +g[au>>2];g[Wt>>2]=+g[(c[m>>2]|0)+240>>2];g[_t>>2]=+g[(c[m>>2]|0)+244>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[Wt>>2]*+g[Zt>>2]-+g[_t>>2]*+g[bu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[Wt>>2]*+g[bu>>2]+ +g[_t>>2]*+g[Zt>>2];g[du>>2]=+g[Xt>>2]+ +g[Yt>>2];g[fu>>2]=+g[$t>>2]-+g[au>>2];g[cu>>2]=+g[(c[m>>2]|0)+496>>2];g[eu>>2]=+g[(c[m>>2]|0)+500>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*63<<2)>>2]=+g[cu>>2]*+g[du>>2]-+g[eu>>2]*+g[fu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*63<<2)>>2]=+g[cu>>2]*+g[fu>>2]+ +g[eu>>2]*+g[du>>2];g[it>>2]=+g[Et>>2]+ +g[Vs>>2];g[Lt>>2]=+g[ct>>2]+ +g[ft>>2];g[ht>>2]=+g[(c[m>>2]|0)+176>>2];g[jt>>2]=+g[(c[m>>2]|0)+180>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[ht>>2]*+g[it>>2]-+g[jt>>2]*+g[Lt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[jt>>2]*+g[it>>2]+ +g[ht>>2]*+g[Lt>>2];g[Pt>>2]=+g[Nt>>2]-+g[Ot>>2];g[Tt>>2]=+g[Rt>>2]-+g[St>>2];g[Mt>>2]=+g[(c[m>>2]|0)+304>>2];g[Qt>>2]=+g[(c[m>>2]|0)+308>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*39<<2)>>2]=+g[Mt>>2]*+g[Pt>>2]-+g[Qt>>2]*+g[Tt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*39<<2)>>2]=+g[Mt>>2]*+g[Tt>>2]+ +g[Qt>>2]*+g[Pt>>2];g[uu>>2]=+g[mu>>2]-+g[tu>>2];g[Cu>>2]=+g[yu>>2]-+g[Bu>>2];g[ju>>2]=+g[(c[m>>2]|0)+368>>2];g[vu>>2]=+g[(c[m>>2]|0)+372>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*47<<2)>>2]=+g[ju>>2]*+g[uu>>2]-+g[vu>>2]*+g[Cu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*47<<2)>>2]=+g[vu>>2]*+g[uu>>2]+ +g[ju>>2]*+g[Cu>>2];g[Eu>>2]=+g[mu>>2]+ +g[tu>>2];g[Vt>>2]=+g[yu>>2]+ +g[Bu>>2];g[Du>>2]=+g[(c[m>>2]|0)+112>>2];g[Fu>>2]=+g[(c[m>>2]|0)+116>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Du>>2]*+g[Eu>>2]-+g[Fu>>2]*+g[Vt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Fu>>2]*+g[Eu>>2]+ +g[Du>>2]*+g[Vt>>2];g[gu>>2]=+g[Nt>>2]+ +g[Ot>>2];g[iu>>2]=+g[Rt>>2]+ +g[St>>2];g[Ut>>2]=+g[(c[m>>2]|0)+48>>2];g[hu>>2]=+g[(c[m>>2]|0)+52>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Ut>>2]*+g[gu>>2]-+g[hu>>2]*+g[iu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Ut>>2]*+g[iu>>2]+ +g[hu>>2]*+g[gu>>2];c[Xu>>2]=(c[Xu>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+504;c[n>>2]=c[n>>2]^c[2998]}i=Yu;return}function hu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,68,8296);i=b;return}function iu(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0;ta=i;i=i+272|0;k=ta+260|0;l=ta+256|0;m=ta+252|0;n=ta+248|0;ua=ta+244|0;o=ta+240|0;p=ta+236|0;sa=ta+224|0;s=ta+220|0;qa=ta+216|0;U=ta+212|0;N=ta+208|0;ja=ta+204|0;A=ta+200|0;X=ta+196|0;D=ta+192|0;ca=ta+188|0;K=ta+184|0;fa=ta+180|0;G=ta+176|0;q=ta+172|0;r=ta+168|0;V=ta+164|0;W=ta+160|0;v=ta+156|0;ra=ta+152|0;y=ta+148|0;z=ta+144|0;t=ta+140|0;u=ta+136|0;w=ta+132|0;x=ta+128|0;_=ta+124|0;F=ta+120|0;ba=ta+116|0;E=ta+112|0;Y=ta+108|0;Z=ta+104|0;$=ta+100|0;aa=ta+96|0;B=ta+92|0;H=ta+88|0;pa=ta+84|0;C=ta+80|0;ga=ta+76|0;ma=ta+72|0;ka=ta+68|0;oa=ta+64|0;ea=ta+60|0;ia=ta+56|0;da=ta+52|0;ha=ta+48|0;la=ta+44|0;na=ta+40|0;L=ta+36|0;R=ta+32|0;P=ta+28|0;T=ta+24|0;J=ta+20|0;O=ta+16|0;I=ta+12|0;M=ta+8|0;Q=ta+4|0;S=ta;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[ua>>2]=f;c[o>>2]=h;c[p>>2]=j;g[ta+232>>2]=.5;g[ta+228>>2]=.8660253882408142;c[sa>>2]=c[ua>>2];c[m>>2]=(c[m>>2]|0)+(((c[ua>>2]|0)-1|0)*10<<2);while(1){if((c[sa>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[r>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[s>>2]=+g[q>>2]+ +g[r>>2];g[qa>>2]=+g[q>>2]-+g[r>>2];g[t>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[u>>2]=+g[c[l>>2]>>2];g[v>>2]=+g[t>>2]+ +g[u>>2];g[ra>>2]=+g[t>>2]-+g[u>>2];g[w>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[x>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[y>>2]=+g[w>>2]+ +g[x>>2];g[z>>2]=+g[w>>2]-+g[x>>2];g[U>>2]=+g[v>>2]+ +g[y>>2];g[N>>2]=(+g[ra>>2]-+g[z>>2])*.8660253882408142;g[ja>>2]=(+g[v>>2]-+g[y>>2])*.8660253882408142;g[A>>2]=+g[ra>>2]+ +g[z>>2];g[V>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[W>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[X>>2]=+g[V>>2]-+g[W>>2];g[D>>2]=+g[V>>2]+ +g[W>>2];g[Y>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Z>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[_>>2]=+g[Y>>2]-+g[Z>>2];g[F>>2]=+g[Y>>2]+ +g[Z>>2];g[$>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[aa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ba>>2]=+g[$>>2]-+g[aa>>2];g[E>>2]=+g[$>>2]+ +g[aa>>2];g[ca>>2]=+g[_>>2]+ +g[ba>>2];g[K>>2]=(+g[F>>2]+ +g[E>>2])*.8660253882408142;g[fa>>2]=(+g[ba>>2]-+g[_>>2])*.8660253882408142;g[G>>2]=+g[E>>2]-+g[F>>2];g[c[k>>2]>>2]=+g[s>>2]+ +g[U>>2];g[c[l>>2]>>2]=+g[X>>2]+ +g[ca>>2];g[B>>2]=+g[qa>>2]+ +g[A>>2];g[H>>2]=+g[D>>2]-+g[G>>2];g[pa>>2]=+g[(c[m>>2]|0)+16>>2];g[C>>2]=+g[(c[m>>2]|0)+20>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[pa>>2]*+g[B>>2]-+g[C>>2]*+g[H>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[C>>2]*+g[B>>2]+ +g[pa>>2]*+g[H>>2];g[ea>>2]=+g[s>>2]-+g[U>>2]*.5;g[ga>>2]=+g[ea>>2]-+g[fa>>2];g[ma>>2]=+g[ea>>2]+ +g[fa>>2];g[ia>>2]=+g[X>>2]-+g[ca>>2]*.5;g[ka>>2]=+g[ia>>2]-+g[ja>>2];g[oa>>2]=+g[ja>>2]+ +g[ia>>2];g[da>>2]=+g[(c[m>>2]|0)+8>>2];g[ha>>2]=+g[(c[m>>2]|0)+12>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[da>>2]*+g[ga>>2]-+g[ha>>2]*+g[ka>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[da>>2]*+g[ka>>2]+ +g[ha>>2]*+g[ga>>2];g[la>>2]=+g[(c[m>>2]|0)+24>>2];g[na>>2]=+g[(c[m>>2]|0)+28>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[la>>2]*+g[ma>>2]-+g[na>>2]*+g[oa>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[la>>2]*+g[oa>>2]+ +g[na>>2]*+g[ma>>2];g[J>>2]=+g[qa>>2]-+g[A>>2]*.5;g[L>>2]=+g[J>>2]-+g[K>>2];g[R>>2]=+g[J>>2]+ +g[K>>2];g[O>>2]=+g[G>>2]*.5+ +g[D>>2];g[P>>2]=+g[N>>2]+ +g[O>>2];g[T>>2]=+g[O>>2]-+g[N>>2];g[I>>2]=+g[c[m>>2]>>2];g[M>>2]=+g[(c[m>>2]|0)+4>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[I>>2]*+g[L>>2]-+g[M>>2]*+g[P>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[M>>2]*+g[L>>2]+ +g[I>>2]*+g[P>>2];g[Q>>2]=+g[(c[m>>2]|0)+32>>2];g[S>>2]=+g[(c[m>>2]|0)+36>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Q>>2]*+g[R>>2]-+g[S>>2]*+g[T>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[S>>2]*+g[R>>2]+ +g[Q>>2]*+g[T>>2];c[sa>>2]=(c[sa>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+40}i=ta;return}function ju(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,69,8344);i=b;return}function ku(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0;za=i;i=i+304|0;k=za+300|0;l=za+296|0;m=za+292|0;n=za+288|0;Aa=za+284|0;o=za+280|0;p=za+276|0;ya=za+248|0;q=za+244|0;u=za+240|0;x=za+236|0;_=za+232|0;va=za+228|0;I=za+224|0;V=za+220|0;Q=za+216|0;E=za+212|0;ka=za+208|0;$=za+204|0;ca=za+200|0;fa=za+196|0;ia=za+192|0;oa=za+188|0;F=za+184|0;R=za+180|0;U=za+176|0;J=za+172|0;ra=za+168|0;sa=za+164|0;ua=za+160|0;ta=za+156|0;s=za+152|0;t=za+148|0;v=za+144|0;w=za+140|0;y=za+136|0;z=za+132|0;na=za+128|0;la=za+124|0;ma=za+120|0;aa=za+116|0;ba=za+112|0;da=za+108|0;ea=za+104|0;ga=za+100|0;ha=za+96|0;pa=za+92|0;wa=za+88|0;ja=za+84|0;qa=za+80|0;Y=za+76|0;r=za+72|0;X=za+68|0;Z=za+64|0;A=za+60|0;C=za+56|0;xa=za+52|0;B=za+48|0;M=za+44|0;O=za+40|0;L=za+36|0;N=za+32|0;S=za+28|0;W=za+24|0;P=za+20|0;T=za+16|0;G=za+12|0;K=za+8|0;D=za+4|0;H=za;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Aa>>2]=f;c[o>>2]=h;c[p>>2]=j;g[za+272>>2]=.22252093255519867;g[za+268>>2]=.9009688496589661;g[za+264>>2]=.6234897971153259;g[za+260>>2]=.7818315029144287;g[za+256>>2]=.9749279022216797;g[za+252>>2]=.4338837265968323;c[ya>>2]=c[Aa>>2];c[m>>2]=(c[m>>2]|0)+(((c[Aa>>2]|0)-1|0)*12<<2);while(1){if((c[ya>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[s>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[t>>2]=+g[c[l>>2]>>2];g[u>>2]=+g[s>>2]+ +g[t>>2];g[sa>>2]=+g[s>>2]-+g[t>>2];g[v>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[w>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[x>>2]=+g[v>>2]+ +g[w>>2];g[ua>>2]=+g[v>>2]-+g[w>>2];g[y>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[z>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[_>>2]=+g[y>>2]+ +g[z>>2];g[ta>>2]=+g[y>>2]-+g[z>>2];g[va>>2]=+g[sa>>2]*.4338837265968323+ +g[ta>>2]*.9749279022216797-+g[ua>>2]*.7818315029144287;g[I>>2]=+g[sa>>2]*.7818315029144287+ +g[ua>>2]*.9749279022216797+ +g[ta>>2]*.4338837265968323;g[V>>2]=+g[sa>>2]*.9749279022216797-+g[ta>>2]*.7818315029144287-+g[ua>>2]*.4338837265968323;g[Q>>2]=+g[_>>2]*.6234897971153259+ +g[q>>2]+-(+g[x>>2]*.9009688496589661+ +g[u>>2]*.22252093255519867);g[E>>2]=+g[u>>2]*.6234897971153259+ +g[q>>2]+-(+g[_>>2]*.9009688496589661+ +g[x>>2]*.22252093255519867);g[ka>>2]=+g[x>>2]*.6234897971153259+ +g[q>>2]+-(+g[_>>2]*.22252093255519867+ +g[u>>2]*.9009688496589661);g[$>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[aa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[ba>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[ca>>2]=+g[aa>>2]-+g[ba>>2];g[na>>2]=+g[aa>>2]+ +g[ba>>2];g[da>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ea>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[fa>>2]=+g[da>>2]-+g[ea>>2];g[la>>2]=+g[da>>2]+ +g[ea>>2];g[ga>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ha>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2];g[ma>>2]=+g[ga>>2]+ +g[ha>>2];g[oa>>2]=+g[la>>2]*.7818315029144287-+g[ma>>2]*.9749279022216797-+g[na>>2]*.4338837265968323;g[F>>2]=+g[na>>2]*.7818315029144287+ +g[la>>2]*.9749279022216797+ +g[ma>>2]*.4338837265968323;g[R>>2]=+g[la>>2]*.4338837265968323+ +g[ma>>2]*.7818315029144287-+g[na>>2]*.9749279022216797;g[U>>2]=+g[ia>>2]*.6234897971153259+ +g[$>>2]+-(+g[fa>>2]*.9009688496589661+ +g[ca>>2]*.22252093255519867);g[J>>2]=+g[ca>>2]*.6234897971153259+ +g[$>>2]+-(+g[ia>>2]*.9009688496589661+ +g[fa>>2]*.22252093255519867);g[ra>>2]=+g[fa>>2]*.6234897971153259+ +g[$>>2]+-(+g[ia>>2]*.22252093255519867+ +g[ca>>2]*.9009688496589661);g[c[k>>2]>>2]=+g[q>>2]+ +g[u>>2]+ +g[x>>2]+ +g[_>>2];g[c[l>>2]>>2]=+g[$>>2]+ +g[ca>>2]+ +g[fa>>2]+ +g[ia>>2];g[pa>>2]=+g[ka>>2]-+g[oa>>2];g[wa>>2]=+g[ra>>2]-+g[va>>2];g[ja>>2]=+g[(c[m>>2]|0)+24>>2];g[qa>>2]=+g[(c[m>>2]|0)+28>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[ja>>2]*+g[pa>>2]-+g[qa>>2]*+g[wa>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[ja>>2]*+g[wa>>2]+ +g[qa>>2]*+g[pa>>2];g[Y>>2]=+g[Q>>2]+ +g[R>>2];g[r>>2]=+g[V>>2]+ +g[U>>2];g[X>>2]=+g[(c[m>>2]|0)+8>>2];g[Z>>2]=+g[(c[m>>2]|0)+12>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[X>>2]*+g[Y>>2]-+g[Z>>2]*+g[r>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[X>>2]*+g[r>>2]+ +g[Z>>2]*+g[Y>>2];g[A>>2]=+g[ka>>2]+ +g[oa>>2];g[C>>2]=+g[va>>2]+ +g[ra>>2];g[xa>>2]=+g[(c[m>>2]|0)+16>>2];g[B>>2]=+g[(c[m>>2]|0)+20>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[xa>>2]*+g[A>>2]-+g[B>>2]*+g[C>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[xa>>2]*+g[C>>2]+ +g[B>>2]*+g[A>>2];g[M>>2]=+g[E>>2]+ +g[F>>2];g[O>>2]=+g[J>>2]-+g[I>>2];g[L>>2]=+g[(c[m>>2]|0)+40>>2];g[N>>2]=+g[(c[m>>2]|0)+44>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[L>>2]*+g[M>>2]-+g[N>>2]*+g[O>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[L>>2]*+g[O>>2]+ +g[N>>2]*+g[M>>2];g[S>>2]=+g[Q>>2]-+g[R>>2];g[W>>2]=+g[U>>2]-+g[V>>2];g[P>>2]=+g[(c[m>>2]|0)+32>>2];g[T>>2]=+g[(c[m>>2]|0)+36>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[P>>2]*+g[S>>2]-+g[T>>2]*+g[W>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[P>>2]*+g[W>>2]+ +g[T>>2]*+g[S>>2];g[G>>2]=+g[E>>2]-+g[F>>2];g[K>>2]=+g[I>>2]+ +g[J>>2];g[D>>2]=+g[c[m>>2]>>2];g[H>>2]=+g[(c[m>>2]|0)+4>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[D>>2]*+g[G>>2]-+g[H>>2]*+g[K>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[D>>2]*+g[K>>2]+ +g[H>>2]*+g[G>>2];c[ya>>2]=(c[ya>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+48}i=za;return}function lu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,70,8392);i=b;return}function mu(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0;Ra=i;i=i+368|0;k=Ra+352|0;l=Ra+348|0;m=Ra+344|0;n=Ra+340|0;Sa=Ra+336|0;o=Ra+332|0;p=Ra+328|0;Qa=Ra+320|0;P=Ra+316|0;z=Ra+312|0;D=Ra+308|0;Ea=Ra+304|0;Oa=Ra+300|0;ca=Ra+296|0;oa=Ra+292|0;U=Ra+288|0;ua=Ra+284|0;pa=Ra+280|0;r=Ra+276|0;La=Ra+272|0;Pa=Ra+268|0;fa=Ra+264|0;ia=Ra+260|0;V=Ra+256|0;L=Ra+252|0;aa=Ra+248|0;Da=Ra+244|0;ba=Ra+240|0;O=Ra+236|0;ma=Ra+232|0;Aa=Ra+228|0;na=Ra+224|0;q=Ra+220|0;K=Ra+216|0;Ba=Ra+212|0;Ca=Ra+208|0;M=Ra+204|0;N=Ra+200|0;ya=Ra+196|0;za=Ra+192|0;qa=Ra+188|0;da=Ra+184|0;Ka=Ra+180|0;ha=Ra+176|0;ta=Ra+172|0;ga=Ra+168|0;Ha=Ra+164|0;ea=Ra+160|0;Q=Ra+156|0;R=Ra+152|0;Ia=Ra+148|0;Ja=Ra+144|0;ra=Ra+140|0;sa=Ra+136|0;Fa=Ra+132|0;Ga=Ra+128|0;wa=Ra+124|0;Ma=Ra+120|0;va=Ra+116|0;xa=Ra+112|0;Y=Ra+108|0;_=Ra+104|0;X=Ra+100|0;Z=Ra+96|0;S=Ra+92|0;W=Ra+88|0;Na=Ra+84|0;T=Ra+80|0;B=Ra+76|0;H=Ra+72|0;F=Ra+68|0;J=Ra+64|0;A=Ra+60|0;E=Ra+56|0;y=Ra+52|0;C=Ra+48|0;G=Ra+44|0;I=Ra+40|0;ka=Ra+36|0;v=Ra+32|0;t=Ra+28|0;x=Ra+24|0;ja=Ra+20|0;s=Ra+16|0;$=Ra+12|0;la=Ra+8|0;u=Ra+4|0;w=Ra;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Sa>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Ra+324>>2]=.7071067690849304;c[Qa>>2]=c[Sa>>2];c[m>>2]=(c[m>>2]|0)+(((c[Sa>>2]|0)-1|0)*14<<2);while(1){if((c[Qa>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[K>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[L>>2]=+g[q>>2]+ +g[K>>2];g[aa>>2]=+g[q>>2]-+g[K>>2];g[Ba>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Ca>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Da>>2]=+g[Ba>>2]-+g[Ca>>2];g[ba>>2]=+g[Ba>>2]+ +g[Ca>>2];g[M>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[N>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[O>>2]=+g[M>>2]+ +g[N>>2];g[ma>>2]=+g[M>>2]-+g[N>>2];g[ya>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[za>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Aa>>2]=+g[ya>>2]-+g[za>>2];g[na>>2]=+g[ya>>2]+ +g[za>>2];g[P>>2]=+g[L>>2]+ +g[O>>2];g[z>>2]=+g[aa>>2]+ +g[ba>>2];g[D>>2]=+g[na>>2]-+g[ma>>2];g[Ea>>2]=+g[Aa>>2]+ +g[Da>>2];g[Oa>>2]=+g[L>>2]-+g[O>>2];g[ca>>2]=+g[aa>>2]-+g[ba>>2];g[oa>>2]=+g[ma>>2]+ +g[na>>2];g[U>>2]=+g[Aa>>2]-+g[Da>>2];g[Q>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[R>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[qa>>2]=+g[Q>>2]+ +g[R>>2];g[da>>2]=+g[Q>>2]-+g[R>>2];g[Ia>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Ja>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Ka>>2]=+g[Ia>>2]-+g[Ja>>2];g[ha>>2]=+g[Ia>>2]+ +g[Ja>>2];g[ra>>2]=+g[c[l>>2]>>2];g[sa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ta>>2]=+g[ra>>2]+ +g[sa>>2];g[ga>>2]=+g[ra>>2]-+g[sa>>2];g[Fa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Ga>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Ha>>2]=+g[Fa>>2]-+g[Ga>>2];g[ea>>2]=+g[Fa>>2]+ +g[Ga>>2];g[ua>>2]=+g[qa>>2]+ +g[ta>>2];g[pa>>2]=+g[da>>2]+ +g[ea>>2];g[r>>2]=+g[ga>>2]+ +g[ha>>2];g[La>>2]=+g[Ha>>2]+ +g[Ka>>2];g[Pa>>2]=+g[Ka>>2]-+g[Ha>>2];g[fa>>2]=+g[da>>2]-+g[ea>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2];g[V>>2]=+g[qa>>2]-+g[ta>>2];g[c[k>>2]>>2]=+g[P>>2]+ +g[ua>>2];g[c[l>>2]>>2]=+g[Ea>>2]+ +g[La>>2];g[wa>>2]=+g[P>>2]-+g[ua>>2];g[Ma>>2]=+g[Ea>>2]-+g[La>>2];g[va>>2]=+g[(c[m>>2]|0)+24>>2];g[xa>>2]=+g[(c[m>>2]|0)+28>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[va>>2]*+g[wa>>2]-+g[xa>>2]*+g[Ma>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[xa>>2]*+g[wa>>2]+ +g[va>>2]*+g[Ma>>2];g[Y>>2]=+g[Oa>>2]+ +g[Pa>>2];g[_>>2]=+g[V>>2]+ +g[U>>2];g[X>>2]=+g[(c[m>>2]|0)+8>>2];g[Z>>2]=+g[(c[m>>2]|0)+12>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[X>>2]*+g[Y>>2]-+g[Z>>2]*+g[_>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[X>>2]*+g[_>>2]+ +g[Z>>2]*+g[Y>>2];g[S>>2]=+g[Oa>>2]-+g[Pa>>2];g[W>>2]=+g[U>>2]-+g[V>>2];g[Na>>2]=+g[(c[m>>2]|0)+40>>2];g[T>>2]=+g[(c[m>>2]|0)+44>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Na>>2]*+g[S>>2]-+g[T>>2]*+g[W>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Na>>2]*+g[W>>2]+ +g[T>>2]*+g[S>>2];g[A>>2]=(+g[pa>>2]+ +g[r>>2])*.7071067690849304;g[B>>2]=+g[z>>2]-+g[A>>2];g[H>>2]=+g[z>>2]+ +g[A>>2];g[E>>2]=(+g[fa>>2]-+g[ia>>2])*.7071067690849304;g[F>>2]=+g[D>>2]+ +g[E>>2];g[J>>2]=+g[D>>2]-+g[E>>2];g[y>>2]=+g[(c[m>>2]|0)+16>>2];g[C>>2]=+g[(c[m>>2]|0)+20>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[y>>2]*+g[B>>2]-+g[C>>2]*+g[F>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[y>>2]*+g[F>>2]+ +g[C>>2]*+g[B>>2];g[G>>2]=+g[(c[m>>2]|0)+48>>2];g[I>>2]=+g[(c[m>>2]|0)+52>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[G>>2]*+g[H>>2]-+g[I>>2]*+g[J>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[G>>2]*+g[J>>2]+ +g[I>>2]*+g[H>>2];g[ja>>2]=(+g[fa>>2]+ +g[ia>>2])*.7071067690849304;g[ka>>2]=+g[ca>>2]-+g[ja>>2];g[v>>2]=+g[ca>>2]+ +g[ja>>2];g[s>>2]=(+g[pa>>2]-+g[r>>2])*.7071067690849304;g[t>>2]=+g[oa>>2]-+g[s>>2];g[x>>2]=+g[oa>>2]+ +g[s>>2];g[$>>2]=+g[(c[m>>2]|0)+32>>2];g[la>>2]=+g[(c[m>>2]|0)+36>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[$>>2]*+g[ka>>2]-+g[la>>2]*+g[t>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[la>>2]*+g[ka>>2]+ +g[$>>2]*+g[t>>2];g[u>>2]=+g[c[m>>2]>>2];g[w>>2]=+g[(c[m>>2]|0)+4>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[u>>2]*+g[v>>2]-+g[w>>2]*+g[x>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[w>>2]*+g[v>>2]+ +g[u>>2]*+g[x>>2];c[Qa>>2]=(c[Qa>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+56}i=Ra;return}function nu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,71,8440);i=b;return}function ou(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0;vb=i;i=i+512|0;k=vb+508|0;l=vb+504|0;m=vb+500|0;n=vb+496|0;wb=vb+492|0;o=vb+488|0;p=vb+484|0;ub=vb+448|0;ra=vb+444|0;fb=vb+440|0;Ia=vb+436|0;ca=vb+432|0;x=vb+428|0;ma=vb+424|0;Wa=vb+420|0;$a=vb+416|0;ab=vb+412|0;kb=vb+408|0;pb=vb+404|0;qb=vb+400|0;Pa=vb+396|0;y=vb+392|0;ia=vb+388|0;B=vb+384|0;fa=vb+380|0;na=vb+376|0;r=vb+372|0;z=vb+368|0;q=vb+364|0;bb=vb+360|0;qa=vb+356|0;v=vb+352|0;eb=vb+348|0;Ha=vb+344|0;Ga=vb+340|0;w=vb+336|0;oa=vb+332|0;pa=vb+328|0;cb=vb+324|0;db=vb+320|0;sa=vb+316|0;va=vb+312|0;Ja=vb+308|0;Ma=vb+304|0;gb=vb+300|0;jb=vb+296|0;Ka=vb+292|0;Na=vb+288|0;Xa=vb+284|0;_a=vb+280|0;Qa=vb+276|0;Ta=vb+272|0;lb=vb+268|0;ob=vb+264|0;Ra=vb+260|0;Ua=vb+256|0;ta=vb+252|0;ua=vb+248|0;hb=vb+244|0;ib=vb+240|0;Ya=vb+236|0;Za=vb+232|0;mb=vb+228|0;nb=vb+224|0;La=vb+220|0;Oa=vb+216|0;ga=vb+212|0;ha=vb+208|0;da=vb+204|0;ea=vb+200|0;Sa=vb+196|0;Va=vb+192|0;wa=vb+188|0;Ca=vb+184|0;Aa=vb+180|0;Ea=vb+176|0;sb=vb+172|0;tb=vb+168|0;ya=vb+164|0;za=vb+160|0;rb=vb+156|0;xa=vb+152|0;Ba=vb+148|0;Da=vb+144|0;T=vb+140|0;X=vb+136|0;t=vb+132|0;S=vb+128|0;Q=vb+124|0;W=vb+120|0;s=vb+116|0;A=vb+112|0;Fa=vb+108|0;u=vb+104|0;_=vb+100|0;aa=vb+96|0;Z=vb+92|0;$=vb+88|0;U=vb+84|0;Y=vb+80|0;R=vb+76|0;V=vb+72|0;G=vb+68|0;K=vb+64|0;ka=vb+60|0;F=vb+56|0;D=vb+52|0;J=vb+48|0;ja=vb+44|0;C=vb+40|0;ba=vb+36|0;la=vb+32|0;N=vb+28|0;P=vb+24|0;M=vb+20|0;O=vb+16|0;H=vb+12|0;L=vb+8|0;E=vb+4|0;I=vb;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[wb>>2]=f;c[o>>2]=h;c[p>>2]=j;g[vb+480>>2]=.9848077297210693;g[vb+476>>2]=.1736481785774231;g[vb+472>>2]=.3420201539993286;g[vb+468>>2]=.9396926164627075;g[vb+464>>2]=.6427876353263855;g[vb+460>>2]=.7660444378852844;g[vb+456>>2]=.5;g[vb+452>>2]=.8660253882408142;c[ub>>2]=c[wb>>2];c[m>>2]=(c[m>>2]|0)+((c[wb>>2]|0)-1<<4<<2);while(1){if((c[ub>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[bb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[oa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[pa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[qa>>2]=+g[oa>>2]+ +g[pa>>2];g[v>>2]=(+g[oa>>2]-+g[pa>>2])*.8660253882408142;g[cb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[db>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[eb>>2]=+g[cb>>2]-+g[db>>2];g[Ha>>2]=(+g[cb>>2]+ +g[db>>2])*.8660253882408142;g[ra>>2]=+g[q>>2]+ +g[qa>>2];g[fb>>2]=+g[bb>>2]+ +g[eb>>2];g[Ga>>2]=+g[q>>2]-+g[qa>>2]*.5;g[Ia>>2]=+g[Ga>>2]-+g[Ha>>2];g[ca>>2]=+g[Ga>>2]+ +g[Ha>>2];g[w>>2]=+g[bb>>2]-+g[eb>>2]*.5;g[x>>2]=+g[v>>2]+ +g[w>>2];g[ma>>2]=+g[w>>2]-+g[v>>2];g[sa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[ta>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ua>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[va>>2]=+g[ta>>2]+ +g[ua>>2];g[Ja>>2]=+g[sa>>2]-+g[va>>2]*.5;g[Ma>>2]=(+g[ta>>2]-+g[ua>>2])*.8660253882408142;g[gb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[hb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ib>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[jb>>2]=+g[hb>>2]-+g[ib>>2];g[Ka>>2]=(+g[hb>>2]+ +g[ib>>2])*.8660253882408142;g[Na>>2]=+g[gb>>2]-+g[jb>>2]*.5;g[Xa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Ya>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Za>>2]=+g[c[l>>2]>>2];g[_a>>2]=+g[Ya>>2]+ +g[Za>>2];g[Qa>>2]=+g[Xa>>2]-+g[_a>>2]*.5;g[Ta>>2]=(+g[Ya>>2]-+g[Za>>2])*.8660253882408142;g[lb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[mb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[nb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[ob>>2]=+g[mb>>2]+ +g[nb>>2];g[Ra>>2]=(+g[mb>>2]-+g[nb>>2])*.8660253882408142;g[Ua>>2]=+g[ob>>2]*.5+ +g[lb>>2];g[Wa>>2]=+g[sa>>2]+ +g[va>>2];g[$a>>2]=+g[Xa>>2]+ +g[_a>>2];g[ab>>2]=+g[Wa>>2]+ +g[$a>>2];g[kb>>2]=+g[gb>>2]+ +g[jb>>2];g[pb>>2]=+g[lb>>2]-+g[ob>>2];g[qb>>2]=+g[kb>>2]+ +g[pb>>2];g[La>>2]=+g[Ja>>2]-+g[Ka>>2];g[Oa>>2]=+g[Ma>>2]+ +g[Na>>2];g[Pa>>2]=+g[La>>2]*.7660444378852844-+g[Oa>>2]*.6427876353263855;g[y>>2]=+g[Oa>>2]*.7660444378852844+ +g[La>>2]*.6427876353263855;g[ga>>2]=+g[Qa>>2]-+g[Ra>>2];g[ha>>2]=+g[Ua>>2]-+g[Ta>>2];g[ia>>2]=+g[ga>>2]*.9396926164627075+ +g[ha>>2]*.3420201539993286;g[B>>2]=+g[ga>>2]*.3420201539993286-+g[ha>>2]*.9396926164627075;g[da>>2]=+g[Ja>>2]+ +g[Ka>>2];g[ea>>2]=+g[Na>>2]-+g[Ma>>2];g[fa>>2]=+g[da>>2]*.1736481785774231-+g[ea>>2]*.9848077297210693;g[na>>2]=+g[ea>>2]*.1736481785774231+ +g[da>>2]*.9848077297210693;g[Sa>>2]=+g[Qa>>2]+ +g[Ra>>2];g[Va>>2]=+g[Ta>>2]+ +g[Ua>>2];g[r>>2]=+g[Sa>>2]*.1736481785774231-+g[Va>>2]*.9848077297210693;g[z>>2]=+g[Sa>>2]*.9848077297210693+ +g[Va>>2]*.1736481785774231;g[c[k>>2]>>2]=+g[ra>>2]+ +g[ab>>2];g[c[l>>2]>>2]=+g[fb>>2]+ +g[qb>>2];g[sb>>2]=+g[ra>>2]-+g[ab>>2]*.5;g[tb>>2]=(+g[pb>>2]-+g[kb>>2])*.8660253882408142;g[wa>>2]=+g[sb>>2]-+g[tb>>2];g[Ca>>2]=+g[sb>>2]+ +g[tb>>2];g[ya>>2]=+g[fb>>2]-+g[qb>>2]*.5;g[za>>2]=(+g[Wa>>2]-+g[$a>>2])*.8660253882408142;g[Aa>>2]=+g[ya>>2]-+g[za>>2];g[Ea>>2]=+g[za>>2]+ +g[ya>>2];g[rb>>2]=+g[(c[m>>2]|0)+40>>2];g[xa>>2]=+g[(c[m>>2]|0)+44>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[rb>>2]*+g[wa>>2]-+g[xa>>2]*+g[Aa>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[rb>>2]*+g[Aa>>2]+ +g[xa>>2]*+g[wa>>2];g[Ba>>2]=+g[(c[m>>2]|0)+16>>2];g[Da>>2]=+g[(c[m>>2]|0)+20>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Ba>>2]*+g[Ca>>2]-+g[Da>>2]*+g[Ea>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Ba>>2]*+g[Ea>>2]+ +g[Da>>2]*+g[Ca>>2];g[T>>2]=(+g[z>>2]-+g[y>>2])*.8660253882408142;g[X>>2]=(+g[Pa>>2]-+g[r>>2])*.8660253882408142;g[s>>2]=+g[Pa>>2]+ +g[r>>2];g[t>>2]=+g[Ia>>2]+ +g[s>>2];g[S>>2]=+g[Ia>>2]-+g[s>>2]*.5;g[A>>2]=+g[y>>2]+ +g[z>>2];g[Q>>2]=+g[x>>2]+ +g[A>>2];g[W>>2]=+g[x>>2]-+g[A>>2]*.5;g[Fa>>2]=+g[c[m>>2]>>2];g[u>>2]=+g[(c[m>>2]|0)+4>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[Fa>>2]*+g[t>>2]-+g[u>>2]*+g[Q>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[u>>2]*+g[t>>2]+ +g[Fa>>2]*+g[Q>>2];g[_>>2]=+g[S>>2]+ +g[T>>2];g[aa>>2]=+g[X>>2]+ +g[W>>2];g[Z>>2]=+g[(c[m>>2]|0)+24>>2];g[$>>2]=+g[(c[m>>2]|0)+28>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Z>>2]*+g[_>>2]-+g[$>>2]*+g[aa>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Z>>2]*+g[aa>>2]+ +g[$>>2]*+g[_>>2];g[U>>2]=+g[S>>2]-+g[T>>2];g[Y>>2]=+g[W>>2]-+g[X>>2];g[R>>2]=+g[(c[m>>2]|0)+48>>2];g[V>>2]=+g[(c[m>>2]|0)+52>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[R>>2]*+g[U>>2]-+g[V>>2]*+g[Y>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[R>>2]*+g[Y>>2]+ +g[V>>2]*+g[U>>2];g[G>>2]=(+g[B>>2]-+g[na>>2])*.8660253882408142;g[K>>2]=(+g[fa>>2]+ +g[ia>>2])*.8660253882408142;g[ja>>2]=+g[fa>>2]-+g[ia>>2];g[ka>>2]=+g[ca>>2]+ +g[ja>>2];g[F>>2]=+g[ca>>2]-+g[ja>>2]*.5;g[C>>2]=+g[na>>2]+ +g[B>>2];g[D>>2]=+g[ma>>2]+ +g[C>>2];g[J>>2]=+g[ma>>2]-+g[C>>2]*.5;g[ba>>2]=+g[(c[m>>2]|0)+8>>2];g[la>>2]=+g[(c[m>>2]|0)+12>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ba>>2]*+g[ka>>2]-+g[la>>2]*+g[D>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ba>>2]*+g[D>>2]+ +g[la>>2]*+g[ka>>2];g[N>>2]=+g[G>>2]+ +g[F>>2];g[P>>2]=+g[J>>2]+ +g[K>>2];g[M>>2]=+g[(c[m>>2]|0)+32>>2];g[O>>2]=+g[(c[m>>2]|0)+36>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[M>>2]*+g[N>>2]-+g[O>>2]*+g[P>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[O>>2]*+g[N>>2]+ +g[M>>2]*+g[P>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[L>>2]=+g[J>>2]-+g[K>>2];g[E>>2]=+g[(c[m>>2]|0)+56>>2];g[I>>2]=+g[(c[m>>2]|0)+60>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[E>>2]*+g[H>>2]-+g[I>>2]*+g[L>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[I>>2]*+g[H>>2]+ +g[E>>2]*+g[L>>2];c[ub>>2]=(c[ub>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+64;c[n>>2]=c[n>>2]^c[2998]}i=vb;return}function pu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,52,8488,0);i=b;return}function qu(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0;Dd=i;i=i+960|0;m=Dd+944|0;n=Dd+940|0;o=Dd+936|0;p=Dd+932|0;q=Dd+928|0;r=Dd+924|0;Ed=Dd+920|0;s=Dd+916|0;t=Dd+912|0;Cd=Dd+896|0;xd=Dd+892|0;Ad=Dd+888|0;oa=Dd+884|0;qa=Dd+880|0;sa=Dd+876|0;wa=Dd+872|0;La=Dd+868|0;Ja=Dd+864|0;Bd=Dd+860|0;yd=Dd+856|0;Fc=Dd+852|0;Fa=Dd+848|0;U=Dd+844|0;M=Dd+840|0;aa=Dd+836|0;C=Dd+832|0;Q=Dd+828|0;S=Dd+824|0;ya=Dd+820|0;za=Dd+816|0;Aa=Dd+812|0;G=Dd+808|0;Ta=Dd+804|0;fb=Dd+800|0;nb=Dd+796|0;db=Dd+792|0;hc=Dd+788|0;vc=Dd+784|0;Sb=Dd+780|0;rc=Dd+776|0;Ec=Dd+772|0;O=Dd+768|0;A=Dd+764|0;L=Dd+760|0;zd=Dd+756|0;P=Dd+752|0;B=Dd+748|0;K=Dd+744|0;pa=Dd+740|0;va=Dd+736|0;ra=Dd+732|0;ua=Dd+728|0;Ra=Dd+724|0;Sa=Dd+720|0;Na=Dd+716|0;mb=Dd+712|0;fc=Dd+708|0;gc=Dd+704|0;Qb=Dd+700|0;Rb=Dd+696|0;Bc=Dd+692|0;Tb=Dd+688|0;ic=Dd+684|0;Gc=Dd+680|0;ia=Dd+676|0;qb=Dd+672|0;Wa=Dd+668|0;V=Dd+664|0;gd=Dd+660|0;Nc=Dd+656|0;W=Dd+652|0;D=Dd+648|0;Za=Dd+644|0;Ub=Dd+640|0;xb=Dd+636|0;jc=Dd+632|0;od=Dd+628|0;Y=Dd+624|0;Xc=Dd+620|0;ka=Dd+616|0;Oa=Dd+612|0;$a=Dd+608|0;$b=Dd+604|0;lc=Dd+600|0;vd=Dd+596|0;Z=Dd+592|0;x=Dd+588|0;la=Dd+584|0;Fb=Dd+580|0;ab=Dd+576|0;cc=Dd+572|0;mc=Dd+568|0;Mb=Dd+564|0;ob=Dd+560|0;ea=Dd+556|0;Va=Dd+552|0;Ac=Dd+548|0;Ua=Dd+544|0;ha=Dd+540|0;pb=Dd+536|0;u=Dd+532|0;Da=Dd+528|0;E=Dd+524|0;da=Dd+520|0;yc=Dd+516|0;zc=Dd+512|0;fa=Dd+508|0;ga=Dd+504|0;cd=Dd+500|0;rb=Dd+496|0;Mc=Dd+492|0;sb=Dd+488|0;fd=Dd+484|0;ub=Dd+480|0;Jc=Dd+476|0;vb=Dd+472|0;Cc=Dd+468|0;Dc=Dd+464|0;Kc=Dd+460|0;Lc=Dd+456|0;dd=Dd+452|0;ed=Dd+448|0;Hc=Dd+444|0;Ic=Dd+440|0;Xa=Dd+436|0;Ya=Dd+432|0;tb=Dd+428|0;wb=Dd+424|0;kd=Dd+420|0;Jb=Dd+416|0;Sc=Dd+412|0;Hb=Dd+408|0;nd=Dd+404|0;Gb=Dd+400|0;Vc=Dd+396|0;Kb=Dd+392|0;Pc=Dd+388|0;Wc=Dd+384|0;id=Dd+380|0;jd=Dd+376|0;Qc=Dd+372|0;Rc=Dd+368|0;ld=Dd+364|0;md=Dd+360|0;Tc=Dd+356|0;Uc=Dd+352|0;Ib=Dd+348|0;Lb=Dd+344|0;Wb=Dd+340|0;_b=Dd+336|0;rd=Dd+332|0;Cb=Dd+328|0;$c=Dd+324|0;Ab=Dd+320|0;ud=Dd+316|0;zb=Dd+312|0;v=Dd+308|0;Db=Dd+304|0;Yc=Dd+300|0;w=Dd+296|0;pd=Dd+292|0;qd=Dd+288|0;Zc=Dd+284|0;_c=Dd+280|0;sd=Dd+276|0;td=Dd+272|0;ad=Dd+268|0;bd=Dd+264|0;Bb=Dd+260|0;Eb=Dd+256|0;ac=Dd+252|0;bc=Dd+248|0;hd=Dd+244|0;wd=Dd+240|0;T=Dd+236|0;X=Dd+232|0;_=Dd+228|0;$=Dd+224|0;ec=Dd+220|0;pc=Dd+216|0;oc=Dd+212|0;qc=Dd+208|0;Vb=Dd+204|0;dc=Dd+200|0;kc=Dd+196|0;nc=Dd+192|0;uc=Dd+188|0;Yb=Dd+184|0;Xb=Dd+180|0;Zb=Dd+176|0;sc=Dd+172|0;tc=Dd+168|0;wc=Dd+164|0;xc=Dd+160|0;z=Dd+156|0;ta=Dd+152|0;na=Dd+148|0;xa=Dd+144|0;Oc=Dd+140|0;y=Dd+136|0;ja=Dd+132|0;ma=Dd+128|0;F=Dd+124|0;N=Dd+120|0;J=Dd+116|0;R=Dd+112|0;Ba=Dd+108|0;Ca=Dd+104|0;H=Dd+100|0;I=Dd+96|0;Qa=Dd+92|0;eb=Dd+88|0;cb=Dd+84|0;gb=Dd+80|0;yb=Dd+76|0;Pa=Dd+72|0;_a=Dd+68|0;bb=Dd+64|0;jb=Dd+60|0;Ob=Dd+56|0;Nb=Dd+52|0;Pb=Dd+48|0;hb=Dd+44|0;ib=Dd+40|0;kb=Dd+36|0;lb=Dd+32|0;Ea=Dd+28|0;Ka=Dd+24|0;Ia=Dd+20|0;Ma=Dd+16|0;ba=Dd+12|0;ca=Dd+8|0;Ga=Dd+4|0;Ha=Dd;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Ed>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Dd+908>>2]=.3826834261417389;g[Dd+904>>2]=.9238795042037964;g[Dd+900>>2]=.7071067690849304;c[Cd>>2]=c[Ed>>2];c[q>>2]=(c[q>>2]|0)+((c[Ed>>2]|0)-1<<3<<2);while(1){if((c[Cd>>2]|0)>=(c[s>>2]|0))break;g[xd>>2]=+g[c[q>>2]>>2];g[Ad>>2]=+g[(c[q>>2]|0)+4>>2];g[oa>>2]=+g[(c[q>>2]|0)+8>>2];g[qa>>2]=+g[(c[q>>2]|0)+12>>2];g[pa>>2]=+g[xd>>2]*+g[oa>>2];g[va>>2]=+g[Ad>>2]*+g[oa>>2];g[ra>>2]=+g[Ad>>2]*+g[qa>>2];g[ua>>2]=+g[xd>>2]*+g[qa>>2];g[sa>>2]=+g[pa>>2]+ +g[ra>>2];g[wa>>2]=+g[ua>>2]-+g[va>>2];g[La>>2]=+g[ua>>2]+ +g[va>>2];g[Ja>>2]=+g[pa>>2]-+g[ra>>2];g[Bd>>2]=+g[(c[q>>2]|0)+20>>2];g[Ec>>2]=+g[Ad>>2]*+g[Bd>>2];g[O>>2]=+g[oa>>2]*+g[Bd>>2];g[A>>2]=+g[xd>>2]*+g[Bd>>2];g[L>>2]=+g[qa>>2]*+g[Bd>>2];g[yd>>2]=+g[(c[q>>2]|0)+16>>2];g[zd>>2]=+g[xd>>2]*+g[yd>>2];g[P>>2]=+g[qa>>2]*+g[yd>>2];g[B>>2]=+g[Ad>>2]*+g[yd>>2];g[K>>2]=+g[oa>>2]*+g[yd>>2];g[Fc>>2]=+g[zd>>2]-+g[Ec>>2];g[Fa>>2]=+g[O>>2]+ +g[P>>2];g[U>>2]=+g[A>>2]-+g[B>>2];g[M>>2]=+g[K>>2]+ +g[L>>2];g[aa>>2]=+g[K>>2]-+g[L>>2];g[C>>2]=+g[A>>2]+ +g[B>>2];g[Q>>2]=+g[O>>2]-+g[P>>2];g[S>>2]=+g[zd>>2]+ +g[Ec>>2];g[ya>>2]=+g[(c[q>>2]|0)+24>>2];g[za>>2]=+g[(c[q>>2]|0)+28>>2];g[Aa>>2]=+g[xd>>2]*+g[ya>>2]+ +g[Ad>>2]*+g[za>>2];g[G>>2]=+g[xd>>2]*+g[za>>2]-+g[Ad>>2]*+g[ya>>2];g[Ra>>2]=+g[Ja>>2]*+g[Bd>>2];g[Sa>>2]=+g[La>>2]*+g[yd>>2];g[Ta>>2]=+g[Ra>>2]+ +g[Sa>>2];g[fb>>2]=+g[Ra>>2]-+g[Sa>>2];g[Na>>2]=+g[Ja>>2]*+g[yd>>2];g[mb>>2]=+g[La>>2]*+g[Bd>>2];g[nb>>2]=+g[Na>>2]-+g[mb>>2];g[db>>2]=+g[Na>>2]+ +g[mb>>2];g[fc>>2]=+g[sa>>2]*+g[Bd>>2];g[gc>>2]=+g[wa>>2]*+g[yd>>2];g[hc>>2]=+g[fc>>2]+ +g[gc>>2];g[vc>>2]=+g[fc>>2]-+g[gc>>2];g[Qb>>2]=+g[sa>>2]*+g[yd>>2];g[Rb>>2]=+g[wa>>2]*+g[Bd>>2];g[Sb>>2]=+g[Qb>>2]-+g[Rb>>2];g[rc>>2]=+g[Qb>>2]+ +g[Rb>>2];g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Mb>>2]=+g[u>>2]+ +g[Da>>2];g[ob>>2]=+g[u>>2]-+g[Da>>2];g[E>>2]=+g[c[n>>2]>>2];g[da>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ea>>2]=+g[E>>2]-+g[da>>2];g[Va>>2]=+g[E>>2]+ +g[da>>2];g[yc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[zc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ac>>2]=+g[yc>>2]+ +g[zc>>2];g[Ua>>2]=+g[yc>>2]-+g[zc>>2];g[fa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[ga>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ha>>2]=+g[fa>>2]-+g[ga>>2];g[pb>>2]=+g[fa>>2]+ +g[ga>>2];g[Bc>>2]=+g[Mb>>2]+ +g[Ac>>2];g[Tb>>2]=+g[ob>>2]+ +g[pb>>2];g[ic>>2]=+g[Va>>2]-+g[Ua>>2];g[Gc>>2]=+g[Mb>>2]-+g[Ac>>2];g[ia>>2]=+g[ea>>2]-+g[ha>>2];g[qb>>2]=+g[ob>>2]-+g[pb>>2];g[Wa>>2]=+g[Ua>>2]+ +g[Va>>2];g[V>>2]=+g[ea>>2]+ +g[ha>>2];g[Cc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Dc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[cd>>2]=+g[Cc>>2]+ +g[Dc>>2];g[rb>>2]=+g[Cc>>2]-+g[Dc>>2];g[Kc>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Lc>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Mc>>2]=+g[Kc>>2]-+g[Lc>>2];g[sb>>2]=+g[Kc>>2]+ +g[Lc>>2];g[dd>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[ed>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[fd>>2]=+g[dd>>2]+ +g[ed>>2];g[ub>>2]=+g[dd>>2]-+g[ed>>2];g[Hc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Ic>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Jc>>2]=+g[Hc>>2]-+g[Ic>>2];g[vb>>2]=+g[Hc>>2]+ +g[Ic>>2];g[gd>>2]=+g[cd>>2]+ +g[fd>>2];g[Nc>>2]=+g[Jc>>2]-+g[Mc>>2];g[W>>2]=+g[Mc>>2]+ +g[Jc>>2];g[D>>2]=+g[cd>>2]-+g[fd>>2];g[Xa>>2]=+g[rb>>2]+ +g[sb>>2];g[Ya>>2]=+g[ub>>2]+ +g[vb>>2];g[Za>>2]=(+g[Xa>>2]-+g[Ya>>2])*.7071067690849304;g[Ub>>2]=(+g[Xa>>2]+ +g[Ya>>2])*.7071067690849304;g[tb>>2]=+g[rb>>2]-+g[sb>>2];g[wb>>2]=+g[ub>>2]-+g[vb>>2];g[xb>>2]=(+g[tb>>2]+ +g[wb>>2])*.7071067690849304;g[jc>>2]=(+g[tb>>2]-+g[wb>>2])*.7071067690849304;g[id>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[jd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[kd>>2]=+g[id>>2]+ +g[jd>>2];g[Jb>>2]=+g[id>>2]-+g[jd>>2];g[Qc>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Rc>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Sc>>2]=+g[Qc>>2]-+g[Rc>>2];g[Hb>>2]=+g[Qc>>2]+ +g[Rc>>2];g[ld>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[md>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[nd>>2]=+g[ld>>2]+ +g[md>>2];g[Gb>>2]=+g[ld>>2]-+g[md>>2];g[Tc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Uc>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Vc>>2]=+g[Tc>>2]-+g[Uc>>2];g[Kb>>2]=+g[Tc>>2]+ +g[Uc>>2];g[od>>2]=+g[kd>>2]+ +g[nd>>2];g[Y>>2]=+g[Sc>>2]+ +g[Vc>>2];g[Pc>>2]=+g[kd>>2]-+g[nd>>2];g[Wc>>2]=+g[Sc>>2]-+g[Vc>>2];g[Xc>>2]=+g[Pc>>2]-+g[Wc>>2];g[ka>>2]=+g[Pc>>2]+ +g[Wc>>2];g[Ib>>2]=+g[Gb>>2]+ +g[Hb>>2];g[Lb>>2]=+g[Jb>>2]-+g[Kb>>2];g[Oa>>2]=+g[Ib>>2]*.9238795042037964+ +g[Lb>>2]*.3826834261417389;g[$a>>2]=+g[Lb>>2]*.9238795042037964-+g[Ib>>2]*.3826834261417389;g[Wb>>2]=+g[Jb>>2]+ +g[Kb>>2];g[_b>>2]=+g[Hb>>2]-+g[Gb>>2];g[$b>>2]=+g[Wb>>2]*.3826834261417389-+g[_b>>2]*.9238795042037964;g[lc>>2]=+g[_b>>2]*.3826834261417389+ +g[Wb>>2]*.9238795042037964;g[pd>>2]=+g[c[o>>2]>>2];g[qd>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[rd>>2]=+g[pd>>2]+ +g[qd>>2];g[Cb>>2]=+g[pd>>2]-+g[qd>>2];g[Zc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[_c>>2]=+g[c[p>>2]>>2];g[$c>>2]=+g[Zc>>2]-+g[_c>>2];g[Ab>>2]=+g[Zc>>2]+ +g[_c>>2];g[sd>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[td>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[ud>>2]=+g[sd>>2]+ +g[td>>2];g[zb>>2]=+g[sd>>2]-+g[td>>2];g[ad>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[bd>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[v>>2]=+g[ad>>2]-+g[bd>>2];g[Db>>2]=+g[ad>>2]+ +g[bd>>2];g[vd>>2]=+g[rd>>2]+ +g[ud>>2];g[Z>>2]=+g[$c>>2]+ +g[v>>2];g[Yc>>2]=+g[rd>>2]-+g[ud>>2];g[w>>2]=+g[$c>>2]-+g[v>>2];g[x>>2]=+g[Yc>>2]+ +g[w>>2];g[la>>2]=+g[w>>2]-+g[Yc>>2];g[Bb>>2]=+g[zb>>2]-+g[Ab>>2];g[Eb>>2]=+g[Cb>>2]-+g[Db>>2];g[Fb>>2]=+g[Bb>>2]*.9238795042037964-+g[Eb>>2]*.3826834261417389;g[ab>>2]=+g[Bb>>2]*.3826834261417389+ +g[Eb>>2]*.9238795042037964;g[ac>>2]=+g[Cb>>2]+ +g[Db>>2];g[bc>>2]=+g[zb>>2]+ +g[Ab>>2];g[cc>>2]=+g[ac>>2]*.3826834261417389-+g[bc>>2]*.9238795042037964;g[mc>>2]=+g[bc>>2]*.3826834261417389+ +g[ac>>2]*.9238795042037964;g[hd>>2]=+g[Bc>>2]+ +g[gd>>2];g[wd>>2]=+g[od>>2]+ +g[vd>>2];g[T>>2]=+g[hd>>2]-+g[wd>>2];g[X>>2]=+g[V>>2]+ +g[W>>2];g[_>>2]=+g[Y>>2]+ +g[Z>>2];g[$>>2]=+g[X>>2]-+g[_>>2];g[c[m>>2]>>2]=+g[hd>>2]+ +g[wd>>2];g[c[o>>2]>>2]=+g[X>>2]+ +g[_>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[S>>2]*+g[T>>2]-+g[U>>2]*+g[$>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[U>>2]*+g[T>>2]+ +g[S>>2]*+g[$>>2];g[Vb>>2]=+g[Tb>>2]-+g[Ub>>2];g[dc>>2]=+g[$b>>2]+ +g[cc>>2];g[ec>>2]=+g[Vb>>2]-+g[dc>>2];g[pc>>2]=+g[Vb>>2]+ +g[dc>>2];g[kc>>2]=+g[ic>>2]+ +g[jc>>2];g[nc>>2]=+g[lc>>2]-+g[mc>>2];g[oc>>2]=+g[kc>>2]-+g[nc>>2];g[qc>>2]=+g[kc>>2]+ +g[nc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Sb>>2]*+g[ec>>2]-+g[hc>>2]*+g[oc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[hc>>2]*+g[ec>>2]+ +g[Sb>>2]*+g[oc>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[oa>>2]*+g[pc>>2]-+g[qa>>2]*+g[qc>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[qa>>2]*+g[pc>>2]+ +g[oa>>2]*+g[qc>>2];g[sc>>2]=+g[Tb>>2]+ +g[Ub>>2];g[tc>>2]=+g[lc>>2]+ +g[mc>>2];g[uc>>2]=+g[sc>>2]-+g[tc>>2];g[Yb>>2]=+g[sc>>2]+ +g[tc>>2];g[wc>>2]=+g[ic>>2]-+g[jc>>2];g[xc>>2]=+g[$b>>2]-+g[cc>>2];g[Xb>>2]=+g[wc>>2]+ +g[xc>>2];g[Zb>>2]=+g[wc>>2]-+g[xc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[rc>>2]*+g[uc>>2]-+g[vc>>2]*+g[Xb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[rc>>2]*+g[Xb>>2]+ +g[vc>>2]*+g[uc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ya>>2]*+g[Yb>>2]-+g[za>>2]*+g[Zb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ya>>2]*+g[Zb>>2]+ +g[za>>2]*+g[Yb>>2];g[Oc>>2]=+g[Gc>>2]+ +g[Nc>>2];g[y>>2]=(+g[Xc>>2]+ +g[x>>2])*.7071067690849304;g[z>>2]=+g[Oc>>2]-+g[y>>2];g[ta>>2]=+g[Oc>>2]+ +g[y>>2];g[ja>>2]=+g[D>>2]+ +g[ia>>2];g[ma>>2]=(+g[ka>>2]+ +g[la>>2])*.7071067690849304;g[na>>2]=+g[ja>>2]-+g[ma>>2];g[xa>>2]=+g[ja>>2]+ +g[ma>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Fc>>2]*+g[z>>2]-+g[C>>2]*+g[na>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[C>>2]*+g[z>>2]+ +g[Fc>>2]*+g[na>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[sa>>2]*+g[ta>>2]-+g[wa>>2]*+g[xa>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[wa>>2]*+g[ta>>2]+ +g[sa>>2]*+g[xa>>2];g[Ba>>2]=+g[Gc>>2]-+g[Nc>>2];g[Ca>>2]=(+g[la>>2]-+g[ka>>2])*.7071067690849304;g[F>>2]=+g[Ba>>2]-+g[Ca>>2];g[N>>2]=+g[Ba>>2]+ +g[Ca>>2];g[H>>2]=+g[ia>>2]-+g[D>>2];g[I>>2]=(+g[Xc>>2]-+g[x>>2])*.7071067690849304;g[J>>2]=+g[H>>2]-+g[I>>2];g[R>>2]=+g[H>>2]+ +g[I>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Aa>>2]*+g[F>>2]-+g[G>>2]*+g[J>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Aa>>2]*+g[J>>2]+ +g[G>>2]*+g[F>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[M>>2]*+g[N>>2]-+g[Q>>2]*+g[R>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[M>>2]*+g[R>>2]+ +g[Q>>2]*+g[N>>2];g[yb>>2]=+g[qb>>2]-+g[xb>>2];g[Pa>>2]=+g[Fb>>2]-+g[Oa>>2];g[Qa>>2]=+g[yb>>2]-+g[Pa>>2];g[eb>>2]=+g[yb>>2]+ +g[Pa>>2];g[_a>>2]=+g[Wa>>2]-+g[Za>>2];g[bb>>2]=+g[$a>>2]-+g[ab>>2];g[cb>>2]=+g[_a>>2]-+g[bb>>2];g[gb>>2]=+g[_a>>2]+ +g[bb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[nb>>2]*+g[Qa>>2]-+g[Ta>>2]*+g[cb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Ta>>2]*+g[Qa>>2]+ +g[nb>>2]*+g[cb>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[db>>2]*+g[eb>>2]-+g[fb>>2]*+g[gb>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[fb>>2]*+g[eb>>2]+ +g[db>>2]*+g[gb>>2];g[hb>>2]=+g[qb>>2]+ +g[xb>>2];g[ib>>2]=+g[$a>>2]+ +g[ab>>2];g[jb>>2]=+g[hb>>2]-+g[ib>>2];g[Ob>>2]=+g[hb>>2]+ +g[ib>>2];g[kb>>2]=+g[Wa>>2]+ +g[Za>>2];g[lb>>2]=+g[Oa>>2]+ +g[Fb>>2];g[Nb>>2]=+g[kb>>2]-+g[lb>>2];g[Pb>>2]=+g[kb>>2]+ +g[lb>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[yd>>2]*+g[jb>>2]-+g[Bd>>2]*+g[Nb>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[yd>>2]*+g[Nb>>2]+ +g[Bd>>2]*+g[jb>>2];g[c[n>>2]>>2]=+g[xd>>2]*+g[Ob>>2]-+g[Ad>>2]*+g[Pb>>2];g[c[p>>2]>>2]=+g[xd>>2]*+g[Pb>>2]+ +g[Ad>>2]*+g[Ob>>2];g[ba>>2]=+g[Bc>>2]-+g[gd>>2];g[ca>>2]=+g[Z>>2]-+g[Y>>2];g[Ea>>2]=+g[ba>>2]-+g[ca>>2];g[Ka>>2]=+g[ba>>2]+ +g[ca>>2];g[Ga>>2]=+g[V>>2]-+g[W>>2];g[Ha>>2]=+g[od>>2]-+g[vd>>2];g[Ia>>2]=+g[Ga>>2]-+g[Ha>>2];g[Ma>>2]=+g[Ha>>2]+ +g[Ga>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[aa>>2]*+g[Ea>>2]-+g[Fa>>2]*+g[Ia>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[aa>>2]*+g[Ia>>2]+ +g[Fa>>2]*+g[Ea>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ja>>2]*+g[Ka>>2]-+g[La>>2]*+g[Ma>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ja>>2]*+g[Ma>>2]+ +g[La>>2]*+g[Ka>>2];c[Cd>>2]=(c[Cd>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+32;c[r>>2]=c[r>>2]^c[2998]}i=Dd;return}function ru(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,53,8536,0);i=b;return}function su(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0;bf=i;i=i+1280|0;m=bf+1268|0;n=bf+1264|0;o=bf+1260|0;p=bf+1256|0;q=bf+1252|0;r=bf+1248|0;cf=bf+1244|0;s=bf+1240|0;t=bf+1236|0;af=bf+1216|0;fe=bf+1212|0;ie=bf+1208|0;ge=bf+1204|0;je=bf+1200|0;le=bf+1196|0;wa=bf+1192|0;La=bf+1188|0;Ja=bf+1184|0;Y=bf+1180|0;W=bf+1176|0;_=bf+1172|0;vb=bf+1168|0;lb=bf+1164|0;Fa=bf+1160|0;jb=bf+1156|0;fb=bf+1152|0;zb=bf+1148|0;bb=bf+1144|0;qc=bf+1140|0;Xc=bf+1136|0;mc=bf+1132|0;sc=bf+1128|0;Na=bf+1124|0;ob=bf+1120|0;ld=bf+1116|0;pd=bf+1112|0;Q=bf+1108|0;R=bf+1104|0;S=bf+1100|0;pb=bf+1096|0;vd=bf+1092|0;U=bf+1088|0;td=bf+1084|0;dd=bf+1080|0;$a=bf+1076|0;rb=bf+1072|0;bd=bf+1068|0;Za=bf+1064|0;Z=bf+1060|0;xb=bf+1056|0;ca=bf+1052|0;ub=bf+1048|0;X=bf+1044|0;yb=bf+1040|0;Ea=bf+1036|0;tb=bf+1032|0;he=bf+1028|0;va=bf+1024|0;ke=bf+1020|0;ua=bf+1016|0;oc=bf+1012|0;pc=bf+1008|0;kc=bf+1004|0;lc=bf+1e3|0;Ka=bf+996|0;Ma=bf+992|0;mb=bf+988|0;nb=bf+984|0;$d=bf+980|0;Hd=bf+976|0;Wd=bf+972|0;me=bf+968|0;I=bf+964|0;Dc=bf+960|0;$b=bf+956|0;Ra=bf+952|0;E=bf+948|0;gc=bf+944|0;hc=bf+940|0;ra=bf+936|0;Jb=bf+932|0;gd=bf+928|0;fd=bf+924|0;Gb=bf+920|0;ya=bf+916|0;Qd=bf+912|0;Nd=bf+908|0;xa=bf+904|0;Va=bf+900|0;Ub=bf+896|0;Wa=bf+892|0;zc=bf+888|0;Kc=bf+884|0;Rc=bf+880|0;Sc=bf+876|0;Oe=bf+872|0;de=bf+868|0;ee=bf+864|0;Td=bf+860|0;Ud=bf+856|0;Xd=bf+852|0;Oa=bf+848|0;Pa=bf+844|0;Sa=bf+840|0;J=bf+836|0;K=bf+832|0;L=bf+828|0;Cd=bf+824|0;Fd=bf+820|0;Id=bf+816|0;ac=bf+812|0;bc=bf+808|0;cc=bf+804|0;pe=bf+800|0;se=bf+796|0;te=bf+792|0;cd=bf+788|0;zd=bf+784|0;Mb=bf+780|0;Bc=bf+776|0;Ca=bf+772|0;Zb=bf+768|0;_d=bf+764|0;_b=bf+760|0;H=bf+756|0;Cc=bf+752|0;u=bf+748|0;Da=bf+744|0;Aa=bf+740|0;Ba=bf+736|0;Vc=bf+732|0;Zd=bf+728|0;F=bf+724|0;G=bf+720|0;Ge=bf+716|0;Ad=bf+712|0;Ld=bf+708|0;ne=bf+704|0;w=bf+700|0;Qb=bf+696|0;Gc=bf+692|0;Eb=bf+688|0;ce=bf+684|0;Ed=bf+680|0;Pd=bf+676|0;re=bf+672|0;qa=bf+668|0;yc=bf+664|0;Qc=bf+660|0;Ib=bf+656|0;Ne=bf+652|0;Bd=bf+648|0;Md=bf+644|0;oe=bf+640|0;D=bf+636|0;Tb=bf+632|0;Jc=bf+628|0;Fb=bf+624|0;Ve=bf+620|0;Dd=bf+616|0;Od=bf+612|0;qe=bf+608|0;ja=bf+604|0;vc=bf+600|0;Nc=bf+596|0;Hb=bf+592|0;Ce=bf+588|0;Ec=bf+584|0;ze=bf+580|0;Ob=bf+576|0;Fe=bf+572|0;Pb=bf+568|0;v=bf+564|0;Fc=bf+560|0;ae=bf+556|0;be=bf+552|0;xe=bf+548|0;ye=bf+544|0;De=bf+540|0;Ee=bf+536|0;Ae=bf+532|0;Be=bf+528|0;Ye=bf+524|0;Oc=bf+520|0;ma=bf+516|0;xc=bf+512|0;$e=bf+508|0;wc=bf+504|0;pa=bf+500|0;Pc=bf+496|0;We=bf+492|0;Xe=bf+488|0;ka=bf+484|0;la=bf+480|0;Ze=bf+476|0;_e=bf+472|0;na=bf+468|0;oa=bf+464|0;Je=bf+460|0;Hc=bf+456|0;z=bf+452|0;Sb=bf+448|0;Me=bf+444|0;Rb=bf+440|0;C=bf+436|0;Ic=bf+432|0;He=bf+428|0;Ie=bf+424|0;x=bf+420|0;y=bf+416|0;Ke=bf+412|0;Le=bf+408|0;A=bf+404|0;B=bf+400|0;Re=bf+396|0;Lc=bf+392|0;fa=bf+388|0;Vb=bf+384|0;Ue=bf+380|0;Wb=bf+376|0;ia=bf+372|0;Mc=bf+368|0;Pe=bf+364|0;Qe=bf+360|0;da=bf+356|0;ea=bf+352|0;Se=bf+348|0;Te=bf+344|0;ga=bf+340|0;ha=bf+336|0;wb=bf+332|0;Ab=bf+328|0;xd=bf+324|0;yd=bf+320|0;Kb=bf+316|0;Xa=bf+312|0;hb=bf+308|0;db=bf+304|0;Ua=bf+300|0;gb=bf+296|0;Db=bf+292|0;cb=bf+288|0;Qa=bf+284|0;Ta=bf+280|0;Bb=bf+276|0;Cb=bf+272|0;Lb=bf+268|0;Ya=bf+264|0;kb=bf+260|0;Nb=bf+256|0;_a=bf+252|0;ab=bf+248|0;eb=bf+244|0;ib=bf+240|0;Rd=bf+236|0;hd=bf+232|0;rd=bf+228|0;md=bf+224|0;ed=bf+220|0;qd=bf+216|0;Kd=bf+212|0;nd=bf+208|0;Vd=bf+204|0;Yd=bf+200|0;Gd=bf+196|0;Jd=bf+192|0;Sd=bf+188|0;id=bf+184|0;ud=bf+180|0;wd=bf+176|0;jd=bf+172|0;kd=bf+168|0;od=bf+164|0;sd=bf+160|0;sa=bf+156|0;za=bf+152|0;Ga=bf+148|0;aa=bf+144|0;O=bf+140|0;Ha=bf+136|0;we=bf+132|0;$=bf+128|0;M=bf+124|0;N=bf+120|0;ue=bf+116|0;ve=bf+112|0;ta=bf+108|0;P=bf+104|0;qb=bf+100|0;sb=bf+96|0;T=bf+92|0;V=bf+88|0;ba=bf+84|0;Ia=bf+80|0;Ac=bf+76|0;ic=bf+72|0;Zc=bf+68|0;uc=bf+64|0;fc=bf+60|0;Yc=bf+56|0;Xb=bf+52|0;tc=bf+48|0;dc=bf+44|0;ec=bf+40|0;Tc=bf+36|0;Uc=bf+32|0;Yb=bf+28|0;jc=bf+24|0;$c=bf+20|0;ad=bf+16|0;nc=bf+12|0;rc=bf+8|0;Wc=bf+4|0;_c=bf;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[cf>>2]=j;c[s>>2]=k;c[t>>2]=l;g[bf+1232>>2]=.25;g[bf+1228>>2]=.55901700258255;g[bf+1224>>2]=.5877852439880371;g[bf+1220>>2]=.9510565400123596;c[af>>2]=c[cf>>2];c[q>>2]=(c[q>>2]|0)+((c[cf>>2]|0)-1<<3<<2);while(1){if((c[af>>2]|0)>=(c[s>>2]|0))break;g[fe>>2]=+g[c[q>>2]>>2];g[ie>>2]=+g[(c[q>>2]|0)+4>>2];g[ge>>2]=+g[(c[q>>2]|0)+8>>2];g[je>>2]=+g[(c[q>>2]|0)+12>>2];g[he>>2]=+g[fe>>2]*+g[ge>>2];g[va>>2]=+g[ie>>2]*+g[ge>>2];g[ke>>2]=+g[ie>>2]*+g[je>>2];g[ua>>2]=+g[fe>>2]*+g[je>>2];g[le>>2]=+g[he>>2]+ +g[ke>>2];g[wa>>2]=+g[ua>>2]-+g[va>>2];g[La>>2]=+g[ua>>2]+ +g[va>>2];g[Ja>>2]=+g[he>>2]-+g[ke>>2];g[Y>>2]=+g[(c[q>>2]|0)+20>>2];g[Z>>2]=+g[je>>2]*+g[Y>>2];g[xb>>2]=+g[fe>>2]*+g[Y>>2];g[ca>>2]=+g[ge>>2]*+g[Y>>2];g[ub>>2]=+g[ie>>2]*+g[Y>>2];g[W>>2]=+g[(c[q>>2]|0)+16>>2];g[X>>2]=+g[ge>>2]*+g[W>>2];g[yb>>2]=+g[ie>>2]*+g[W>>2];g[Ea>>2]=+g[je>>2]*+g[W>>2];g[tb>>2]=+g[fe>>2]*+g[W>>2];g[_>>2]=+g[X>>2]+ +g[Z>>2];g[vb>>2]=+g[tb>>2]-+g[ub>>2];g[lb>>2]=+g[ca>>2]+ +g[Ea>>2];g[Fa>>2]=+g[ca>>2]-+g[Ea>>2];g[jb>>2]=+g[X>>2]-+g[Z>>2];g[fb>>2]=+g[xb>>2]-+g[yb>>2];g[zb>>2]=+g[xb>>2]+ +g[yb>>2];g[bb>>2]=+g[tb>>2]+ +g[ub>>2];g[oc>>2]=+g[le>>2]*+g[Y>>2];g[pc>>2]=+g[wa>>2]*+g[W>>2];g[qc>>2]=+g[oc>>2]-+g[pc>>2];g[Xc>>2]=+g[oc>>2]+ +g[pc>>2];g[kc>>2]=+g[le>>2]*+g[W>>2];g[lc>>2]=+g[wa>>2]*+g[Y>>2];g[mc>>2]=+g[kc>>2]+ +g[lc>>2];g[sc>>2]=+g[kc>>2]-+g[lc>>2];g[Ka>>2]=+g[Ja>>2]*+g[W>>2];g[Ma>>2]=+g[La>>2]*+g[Y>>2];g[Na>>2]=+g[Ka>>2]+ +g[Ma>>2];g[mb>>2]=+g[Ja>>2]*+g[Y>>2];g[nb>>2]=+g[La>>2]*+g[W>>2];g[ob>>2]=+g[mb>>2]-+g[nb>>2];g[ld>>2]=+g[Ka>>2]-+g[Ma>>2];g[pd>>2]=+g[mb>>2]+ +g[nb>>2];g[Q>>2]=+g[(c[q>>2]|0)+24>>2];g[R>>2]=+g[(c[q>>2]|0)+28>>2];g[S>>2]=+g[fe>>2]*+g[Q>>2]+ +g[ie>>2]*+g[R>>2];g[pb>>2]=+g[Na>>2]*+g[Q>>2]+ +g[ob>>2]*+g[R>>2];g[vd>>2]=+g[le>>2]*+g[R>>2]-+g[wa>>2]*+g[Q>>2];g[U>>2]=+g[fe>>2]*+g[R>>2]-+g[ie>>2]*+g[Q>>2];g[td>>2]=+g[le>>2]*+g[Q>>2]+ +g[wa>>2]*+g[R>>2];g[dd>>2]=+g[Ja>>2]*+g[R>>2]-+g[La>>2]*+g[Q>>2];g[$a>>2]=+g[ge>>2]*+g[R>>2]-+g[je>>2]*+g[Q>>2];g[rb>>2]=+g[Na>>2]*+g[R>>2]-+g[ob>>2]*+g[Q>>2];g[bd>>2]=+g[Ja>>2]*+g[Q>>2]+ +g[La>>2]*+g[R>>2];g[Za>>2]=+g[ge>>2]*+g[Q>>2]+ +g[je>>2]*+g[R>>2];g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Mb>>2]=+g[u>>2]+ +g[Da>>2];g[Bc>>2]=+g[u>>2]-+g[Da>>2];g[Aa>>2]=+g[c[n>>2]>>2];g[Ba>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Ca>>2]=+g[Aa>>2]-+g[Ba>>2];g[Zb>>2]=+g[Aa>>2]+ +g[Ba>>2];g[Vc>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Zd>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[_d>>2]=+g[Vc>>2]+ +g[Zd>>2];g[_b>>2]=+g[Vc>>2]-+g[Zd>>2];g[F>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[G>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[Cc>>2]=+g[F>>2]+ +g[G>>2];g[$d>>2]=+g[Mb>>2]+ +g[_d>>2];g[Hd>>2]=+g[Bc>>2]-+g[Cc>>2];g[Wd>>2]=+g[_b>>2]+ +g[Zb>>2];g[me>>2]=+g[Mb>>2]-+g[_d>>2];g[I>>2]=+g[Ca>>2]-+g[H>>2];g[Dc>>2]=+g[Bc>>2]+ +g[Cc>>2];g[$b>>2]=+g[Zb>>2]-+g[_b>>2];g[Ra>>2]=+g[Ca>>2]+ +g[H>>2];g[ae>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[be>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Ce>>2]=+g[ae>>2]+ +g[be>>2];g[Ec>>2]=+g[ae>>2]-+g[be>>2];g[xe>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[ye>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[ze>>2]=+g[xe>>2]-+g[ye>>2];g[Ob>>2]=+g[xe>>2]+ +g[ye>>2];g[De>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Ee>>2]=+g[c[o>>2]>>2];g[Fe>>2]=+g[De>>2]+ +g[Ee>>2];g[Pb>>2]=+g[De>>2]-+g[Ee>>2];g[Ae>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Be>>2]=+g[c[p>>2]>>2];g[v>>2]=+g[Ae>>2]-+g[Be>>2];g[Fc>>2]=+g[Ae>>2]+ +g[Be>>2];g[Ge>>2]=+g[Ce>>2]+ +g[Fe>>2];g[Ad>>2]=+g[Ec>>2]-+g[Fc>>2];g[Ld>>2]=+g[Pb>>2]+ +g[Ob>>2];g[ne>>2]=+g[Ce>>2]-+g[Fe>>2];g[w>>2]=+g[ze>>2]-+g[v>>2];g[Qb>>2]=+g[Ob>>2]-+g[Pb>>2];g[Gc>>2]=+g[Ec>>2]+ +g[Fc>>2];g[Eb>>2]=+g[ze>>2]+ +g[v>>2];g[We>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Xe>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ye>>2]=+g[We>>2]+ +g[Xe>>2];g[Oc>>2]=+g[We>>2]-+g[Xe>>2];g[ka>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[la>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ma>>2]=+g[ka>>2]-+g[la>>2];g[xc>>2]=+g[ka>>2]+ +g[la>>2];g[Ze>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[_e>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[$e>>2]=+g[Ze>>2]+ +g[_e>>2];g[wc>>2]=+g[Ze>>2]-+g[_e>>2];g[na>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[oa>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[pa>>2]=+g[na>>2]-+g[oa>>2];g[Pc>>2]=+g[na>>2]+ +g[oa>>2];g[ce>>2]=+g[Ye>>2]+ +g[$e>>2];g[Ed>>2]=+g[Oc>>2]+ +g[Pc>>2];g[Pd>>2]=+g[wc>>2]-+g[xc>>2];g[re>>2]=+g[Ye>>2]-+g[$e>>2];g[qa>>2]=+g[ma>>2]-+g[pa>>2];g[yc>>2]=+g[wc>>2]+ +g[xc>>2];g[Qc>>2]=+g[Oc>>2]-+g[Pc>>2];g[Ib>>2]=+g[ma>>2]+ +g[pa>>2];g[He>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ie>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Je>>2]=+g[He>>2]+ +g[Ie>>2];g[Hc>>2]=+g[He>>2]-+g[Ie>>2];g[x>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[y>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[Sb>>2]=+g[x>>2]+ +g[y>>2];g[Ke>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Le>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Me>>2]=+g[Ke>>2]+ +g[Le>>2];g[Rb>>2]=+g[Ke>>2]-+g[Le>>2];g[A>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[B>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2];g[C>>2]=+g[A>>2]-+g[B>>2];g[Ic>>2]=+g[A>>2]+ +g[B>>2];g[Ne>>2]=+g[Je>>2]+ +g[Me>>2];g[Bd>>2]=+g[Hc>>2]-+g[Ic>>2];g[Md>>2]=+g[Rb>>2]-+g[Sb>>2];g[oe>>2]=+g[Je>>2]-+g[Me>>2];g[D>>2]=+g[z>>2]-+g[C>>2];g[Tb>>2]=+g[Rb>>2]+ +g[Sb>>2];g[Jc>>2]=+g[Hc>>2]+ +g[Ic>>2];g[Fb>>2]=+g[z>>2]+ +g[C>>2];g[Pe>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Qe>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Re>>2]=+g[Pe>>2]+ +g[Qe>>2];g[Lc>>2]=+g[Pe>>2]-+g[Qe>>2];g[da>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[ea>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[fa>>2]=+g[da>>2]-+g[ea>>2];g[Vb>>2]=+g[da>>2]+ +g[ea>>2];g[Se>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Te>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ue>>2]=+g[Se>>2]+ +g[Te>>2];g[Wb>>2]=+g[Se>>2]-+g[Te>>2];g[ga>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ha>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2];g[Mc>>2]=+g[ga>>2]+ +g[ha>>2];g[Ve>>2]=+g[Re>>2]+ +g[Ue>>2];g[Dd>>2]=+g[Lc>>2]+ +g[Mc>>2];g[Od>>2]=+g[Wb>>2]+ +g[Vb>>2];g[qe>>2]=+g[Re>>2]-+g[Ue>>2];g[ja>>2]=+g[fa>>2]-+g[ia>>2];g[vc>>2]=+g[Vb>>2]-+g[Wb>>2];g[Nc>>2]=+g[Lc>>2]-+g[Mc>>2];g[Hb>>2]=+g[fa>>2]+ +g[ia>>2];g[E>>2]=+g[w>>2]-+g[D>>2];g[gc>>2]=+g[Gc>>2]-+g[Jc>>2];g[hc>>2]=+g[Nc>>2]-+g[Qc>>2];g[ra>>2]=+g[ja>>2]-+g[qa>>2];g[Jb>>2]=+g[Hb>>2]-+g[Ib>>2];g[gd>>2]=+g[Dd>>2]-+g[Ed>>2];g[fd>>2]=+g[Ad>>2]-+g[Bd>>2];g[Gb>>2]=+g[Eb>>2]-+g[Fb>>2];g[ya>>2]=+g[qe>>2]-+g[re>>2];g[Qd>>2]=+g[Od>>2]-+g[Pd>>2];g[Nd>>2]=+g[Ld>>2]-+g[Md>>2];g[xa>>2]=+g[ne>>2]-+g[oe>>2];g[Va>>2]=+g[Ge>>2]-+g[Ne>>2];g[Ub>>2]=+g[Qb>>2]+ +g[Tb>>2];g[Wa>>2]=+g[Ve>>2]-+g[ce>>2];g[zc>>2]=+g[vc>>2]+ +g[yc>>2];g[Kc>>2]=+g[Gc>>2]+ +g[Jc>>2];g[Rc>>2]=+g[Nc>>2]+ +g[Qc>>2];g[Sc>>2]=+g[Kc>>2]+ +g[Rc>>2];g[Oe>>2]=+g[Ge>>2]+ +g[Ne>>2];g[de>>2]=+g[Ve>>2]+ +g[ce>>2];g[ee>>2]=+g[Oe>>2]+ +g[de>>2];g[Td>>2]=+g[Ld>>2]+ +g[Md>>2];g[Ud>>2]=+g[Od>>2]+ +g[Pd>>2];g[Xd>>2]=+g[Td>>2]+ +g[Ud>>2];g[Oa>>2]=+g[Eb>>2]+ +g[Fb>>2];g[Pa>>2]=+g[Hb>>2]+ +g[Ib>>2];g[Sa>>2]=+g[Oa>>2]+ +g[Pa>>2];g[J>>2]=+g[w>>2]+ +g[D>>2];g[K>>2]=+g[ja>>2]+ +g[qa>>2];g[L>>2]=+g[J>>2]+ +g[K>>2];g[Cd>>2]=+g[Ad>>2]+ +g[Bd>>2];g[Fd>>2]=+g[Dd>>2]+ +g[Ed>>2];g[Id>>2]=+g[Cd>>2]+ +g[Fd>>2];g[ac>>2]=+g[Qb>>2]-+g[Tb>>2];g[bc>>2]=+g[vc>>2]-+g[yc>>2];g[cc>>2]=+g[ac>>2]+ +g[bc>>2];g[pe>>2]=+g[ne>>2]+ +g[oe>>2];g[se>>2]=+g[qe>>2]+ +g[re>>2];g[te>>2]=+g[pe>>2]+ +g[se>>2];g[c[m>>2]>>2]=+g[$d>>2]+ +g[ee>>2];g[c[o>>2]>>2]=+g[Ra>>2]+ +g[Sa>>2];g[wb>>2]=+g[me>>2]+ +g[te>>2];g[Ab>>2]=+g[I>>2]+ +g[L>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[vb>>2]*+g[wb>>2]-+g[zb>>2]*+g[Ab>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[zb>>2]*+g[wb>>2]+ +g[vb>>2]*+g[Ab>>2];g[xd>>2]=+g[Hd>>2]+ +g[Id>>2];g[yd>>2]=+g[Wd>>2]+ +g[Xd>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Na>>2]*+g[xd>>2]-+g[ob>>2]*+g[yd>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Na>>2]*+g[yd>>2]+ +g[ob>>2]*+g[xd>>2];g[cd>>2]=+g[Dc>>2]+ +g[Sc>>2];g[zd>>2]=+g[$b>>2]+ +g[cc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[bd>>2]*+g[cd>>2]-+g[dd>>2]*+g[zd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[bd>>2]*+g[zd>>2]+ +g[dd>>2]*+g[cd>>2];g[Kb>>2]=+g[Gb>>2]*.9510565400123596+ +g[Jb>>2]*.5877852439880371;g[Xa>>2]=+g[Va>>2]*.9510565400123596+ +g[Wa>>2]*.5877852439880371;g[hb>>2]=+g[Va>>2]*.5877852439880371-+g[Wa>>2]*.9510565400123596;g[db>>2]=+g[Gb>>2]*.5877852439880371-+g[Jb>>2]*.9510565400123596;g[Qa>>2]=(+g[Oa>>2]-+g[Pa>>2])*.55901700258255;g[Ta>>2]=+g[Ra>>2]-+g[Sa>>2]*.25;g[Ua>>2]=+g[Qa>>2]+ +g[Ta>>2];g[gb>>2]=+g[Ta>>2]-+g[Qa>>2];g[Bb>>2]=(+g[Oe>>2]-+g[de>>2])*.55901700258255;g[Cb>>2]=+g[$d>>2]-+g[ee>>2]*.25;g[Db>>2]=+g[Bb>>2]+ +g[Cb>>2];g[cb>>2]=+g[Cb>>2]-+g[Bb>>2];g[Lb>>2]=+g[Db>>2]+ +g[Kb>>2];g[Ya>>2]=+g[Ua>>2]-+g[Xa>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ja>>2]*+g[Lb>>2]-+g[La>>2]*+g[Ya>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[La>>2]*+g[Lb>>2]+ +g[Ja>>2]*+g[Ya>>2];g[kb>>2]=+g[cb>>2]-+g[db>>2];g[Nb>>2]=+g[hb>>2]+ +g[gb>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[jb>>2]*+g[kb>>2]-+g[lb>>2]*+g[Nb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[lb>>2]*+g[kb>>2]+ +g[jb>>2]*+g[Nb>>2];g[_a>>2]=+g[Db>>2]-+g[Kb>>2];g[ab>>2]=+g[Xa>>2]+ +g[Ua>>2];g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Za>>2]*+g[_a>>2]-+g[$a>>2]*+g[ab>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[$a>>2]*+g[_a>>2]+ +g[Za>>2]*+g[ab>>2];g[eb>>2]=+g[cb>>2]+ +g[db>>2];g[ib>>2]=+g[gb>>2]-+g[hb>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[bb>>2]*+g[eb>>2]-+g[fb>>2]*+g[ib>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[fb>>2]*+g[eb>>2]+ +g[bb>>2]*+g[ib>>2];g[Rd>>2]=+g[Nd>>2]*.9510565400123596+ +g[Qd>>2]*.5877852439880371;g[hd>>2]=+g[fd>>2]*.9510565400123596+ +g[gd>>2]*.5877852439880371;g[rd>>2]=+g[fd>>2]*.5877852439880371-+g[gd>>2]*.9510565400123596;g[md>>2]=+g[Nd>>2]*.5877852439880371-+g[Qd>>2]*.9510565400123596;g[Vd>>2]=(+g[Td>>2]-+g[Ud>>2])*.55901700258255;g[Yd>>2]=+g[Wd>>2]-+g[Xd>>2]*.25;g[ed>>2]=+g[Vd>>2]+ +g[Yd>>2];g[qd>>2]=+g[Yd>>2]-+g[Vd>>2];g[Gd>>2]=(+g[Cd>>2]-+g[Fd>>2])*.55901700258255;g[Jd>>2]=+g[Hd>>2]-+g[Id>>2]*.25;g[Kd>>2]=+g[Gd>>2]+ +g[Jd>>2];g[nd>>2]=+g[Jd>>2]-+g[Gd>>2];g[Sd>>2]=+g[Kd>>2]-+g[Rd>>2];g[id>>2]=+g[ed>>2]+ +g[hd>>2];g[c[n>>2]>>2]=+g[fe>>2]*+g[Sd>>2]-+g[ie>>2]*+g[id>>2];g[c[p>>2]>>2]=+g[fe>>2]*+g[id>>2]+ +g[ie>>2]*+g[Sd>>2];g[ud>>2]=+g[nd>>2]-+g[md>>2];g[wd>>2]=+g[qd>>2]+ +g[rd>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[td>>2]*+g[ud>>2]-+g[vd>>2]*+g[wd>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[td>>2]*+g[wd>>2]+ +g[vd>>2]*+g[ud>>2];g[jd>>2]=+g[Rd>>2]+ +g[Kd>>2];g[kd>>2]=+g[ed>>2]-+g[hd>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[W>>2]*+g[jd>>2]-+g[Y>>2]*+g[kd>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[W>>2]*+g[kd>>2]+ +g[Y>>2]*+g[jd>>2];g[od>>2]=+g[md>>2]+ +g[nd>>2];g[sd>>2]=+g[qd>>2]-+g[rd>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[ld>>2]*+g[od>>2]-+g[pd>>2]*+g[sd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[ld>>2]*+g[sd>>2]+ +g[pd>>2]*+g[od>>2];g[sa>>2]=+g[E>>2]*.5877852439880371-+g[ra>>2]*.9510565400123596;g[za>>2]=+g[xa>>2]*.5877852439880371-+g[ya>>2]*.9510565400123596;g[Ga>>2]=+g[xa>>2]*.9510565400123596+ +g[ya>>2]*.5877852439880371;g[aa>>2]=+g[E>>2]*.9510565400123596+ +g[ra>>2]*.5877852439880371;g[M>>2]=+g[I>>2]-+g[L>>2]*.25;g[N>>2]=(+g[J>>2]-+g[K>>2])*.55901700258255;g[O>>2]=+g[M>>2]-+g[N>>2];g[Ha>>2]=+g[N>>2]+ +g[M>>2];g[ue>>2]=+g[me>>2]-+g[te>>2]*.25;g[ve>>2]=(+g[pe>>2]-+g[se>>2])*.55901700258255;g[we>>2]=+g[ue>>2]-+g[ve>>2];g[$>>2]=+g[ve>>2]+ +g[ue>>2];g[ta>>2]=+g[we>>2]-+g[sa>>2];g[P>>2]=+g[za>>2]+ +g[O>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[le>>2]*+g[ta>>2]-+g[wa>>2]*+g[P>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[wa>>2]*+g[ta>>2]+ +g[le>>2]*+g[P>>2];g[qb>>2]=+g[$>>2]+ +g[aa>>2];g[sb>>2]=+g[Ha>>2]-+g[Ga>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[pb>>2]*+g[qb>>2]-+g[rb>>2]*+g[sb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[rb>>2]*+g[qb>>2]+ +g[pb>>2]*+g[sb>>2];g[T>>2]=+g[we>>2]+ +g[sa>>2];g[V>>2]=+g[O>>2]-+g[za>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[S>>2]*+g[T>>2]-+g[U>>2]*+g[V>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[U>>2]*+g[T>>2]+ +g[S>>2]*+g[V>>2];g[ba>>2]=+g[$>>2]-+g[aa>>2];g[Ia>>2]=+g[Ga>>2]+ +g[Ha>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[_>>2]*+g[ba>>2]-+g[Fa>>2]*+g[Ia>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Fa>>2]*+g[ba>>2]+ +g[_>>2]*+g[Ia>>2];g[Ac>>2]=+g[Ub>>2]*.5877852439880371-+g[zc>>2]*.9510565400123596;g[ic>>2]=+g[gc>>2]*.5877852439880371-+g[hc>>2]*.9510565400123596;g[Zc>>2]=+g[gc>>2]*.9510565400123596+ +g[hc>>2]*.5877852439880371;g[uc>>2]=+g[Ub>>2]*.9510565400123596+ +g[zc>>2]*.5877852439880371;g[dc>>2]=+g[$b>>2]-+g[cc>>2]*.25;g[ec>>2]=(+g[ac>>2]-+g[bc>>2])*.55901700258255;g[fc>>2]=+g[dc>>2]-+g[ec>>2];g[Yc>>2]=+g[ec>>2]+ +g[dc>>2];g[Tc>>2]=+g[Dc>>2]-+g[Sc>>2]*.25;g[Uc>>2]=(+g[Kc>>2]-+g[Rc>>2])*.55901700258255;g[Xb>>2]=+g[Tc>>2]-+g[Uc>>2];g[tc>>2]=+g[Uc>>2]+ +g[Tc>>2];g[Yb>>2]=+g[Ac>>2]+ +g[Xb>>2];g[jc>>2]=+g[fc>>2]-+g[ic>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[ge>>2]*+g[Yb>>2]-+g[je>>2]*+g[jc>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[ge>>2]*+g[jc>>2]+ +g[je>>2]*+g[Yb>>2];g[$c>>2]=+g[uc>>2]+ +g[tc>>2];g[ad>>2]=+g[Yc>>2]-+g[Zc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Q>>2]*+g[$c>>2]-+g[R>>2]*+g[ad>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Q>>2]*+g[ad>>2]+ +g[R>>2]*+g[$c>>2];g[nc>>2]=+g[Xb>>2]-+g[Ac>>2];g[rc>>2]=+g[fc>>2]+ +g[ic>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[mc>>2]*+g[nc>>2]-+g[qc>>2]*+g[rc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[mc>>2]*+g[rc>>2]+ +g[qc>>2]*+g[nc>>2];g[Wc>>2]=+g[tc>>2]-+g[uc>>2];g[_c>>2]=+g[Yc>>2]+ +g[Zc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[sc>>2]*+g[Wc>>2]-+g[Xc>>2]*+g[_c>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[sc>>2]*+g[_c>>2]+ +g[Xc>>2]*+g[Wc>>2];c[af>>2]=(c[af>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+32;c[r>>2]=c[r>>2]^c[2998]}i=bf;return}function tu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,54,8584,0);i=b;return}function uu(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0,Ii=0,Ji=0,Ki=0,Li=0,Mi=0,Ni=0,Oi=0,Pi=0,Qi=0,Ri=0,Si=0,Ti=0,Ui=0,Vi=0,Wi=0,Xi=0,Yi=0,Zi=0,_i=0,$i=0,aj=0,bj=0,cj=0,dj=0,ej=0,fj=0,gj=0,hj=0,ij=0,jj=0,kj=0,lj=0,mj=0,nj=0,oj=0,pj=0;oj=i;i=i+2208|0;m=oj+2192|0;n=oj+2188|0;o=oj+2184|0;p=oj+2180|0;q=oj+2176|0;r=oj+2172|0;pj=oj+2168|0;s=oj+2164|0;t=oj+2160|0;nj=oj+2128|0;w=oj+2124|0;z=oj+2120|0;x=oj+2116|0;A=oj+2112|0;C=oj+2108|0;Lb=oj+2104|0;Pa=oj+2100|0;fa=oj+2096|0;D=oj+2092|0;ga=oj+2088|0;ja=oj+2084|0;na=oj+2080|0;Ta=oj+2076|0;Za=oj+2072|0;Oc=oj+2068|0;ld=oj+2064|0;Cc=oj+2060|0;qc=oj+2056|0;wc=oj+2052|0;Zd=oj+2048|0;xd=oj+2044|0;Bd=oj+2040|0;og=oj+2036|0;xg=oj+2032|0;ug=oj+2028|0;Bg=oj+2024|0;Gh=oj+2020|0;li=oj+2016|0;Vh=oj+2012|0;ni=oj+2008|0;db=oj+2004|0;hb=oj+2e3|0;di=oj+1996|0;hi=oj+1992|0;Vd=oj+1988|0;Xd=oj+1984|0;ge=oj+1980|0;wf=oj+1976|0;Ic=oj+1972|0;Mc=oj+1968|0;Vg=oj+1964|0;Zg=oj+1960|0;td=oj+1956|0;vd=oj+1952|0;Cf=oj+1948|0;Of=oj+1944|0;ia=oj+1940|0;jb=oj+1936|0;ma=oj+1932|0;kb=oj+1928|0;oa=oj+1924|0;Ob=oj+1920|0;pb=oj+1916|0;lb=oj+1912|0;dd=oj+1908|0;Ld=oj+1904|0;Gd=oj+1900|0;Md=oj+1896|0;Hd=oj+1892|0;Rd=oj+1888|0;Jd=oj+1884|0;Nd=oj+1880|0;bb=oj+1876|0;Lc=oj+1872|0;gb=oj+1868|0;Gc=oj+1864|0;cb=oj+1860|0;Kc=oj+1856|0;fb=oj+1852|0;Hc=oj+1848|0;Ra=oj+1844|0;Bc=oj+1840|0;Ya=oj+1836|0;Wb=oj+1832|0;Sa=oj+1828|0;Ac=oj+1824|0;Xa=oj+1820|0;vc=oj+1816|0;y=oj+1812|0;ea=oj+1808|0;B=oj+1804|0;da=oj+1800|0;mg=oj+1796|0;ng=oj+1792|0;sg=oj+1788|0;tg=oj+1784|0;dh=oj+1780|0;Fh=oj+1776|0;Th=oj+1772|0;Uh=oj+1768|0;E=oj+1764|0;ha=oj+1760|0;ka=oj+1756|0;la=oj+1752|0;bd=oj+1748|0;cd=oj+1744|0;Ed=oj+1740|0;Fd=oj+1736|0;je=oj+1732|0;fh=oj+1728|0;Ig=oj+1724|0;Zf=oj+1720|0;Ui=oj+1716|0;pa=oj+1712|0;ag=oj+1708|0;gh=oj+1704|0;tc=oj+1700|0;md=oj+1696|0;Fb=oj+1692|0;Pb=oj+1688|0;Qe=oj+1684|0;Jg=oj+1680|0;Rc=oj+1676|0;_d=oj+1672|0;hj=oj+1668|0;qb=oj+1664|0;kh=oj+1660|0;Lg=oj+1656|0;nh=oj+1652|0;Mg=oj+1648|0;G=oj+1644|0;Qb=oj+1640|0;Uc=oj+1636|0;Wc=oj+1632|0;Ye=oj+1628|0;dg=oj+1624|0;df=oj+1620|0;cg=oj+1616|0;Zb=oj+1612|0;uc=oj+1608|0;zi=oj+1604|0;I=oj+1600|0;zh=oj+1596|0;Lh=oj+1592|0;Ch=oj+1588|0;Kh=oj+1584|0;X=oj+1580|0;Sb=oj+1576|0;jc=oj+1572|0;gd=oj+1568|0;se=oj+1564|0;Gf=oj+1560|0;ze=oj+1556|0;Hf=oj+1552|0;mc=oj+1548|0;hd=oj+1544|0;Oi=oj+1540|0;Z=oj+1536|0;sh=oj+1532|0;Oh=oj+1528|0;vh=oj+1524|0;Nh=oj+1520|0;Na=oj+1516|0;Tb=oj+1512|0;cc=oj+1508|0;be=oj+1504|0;Le=oj+1500|0;Jf=oj+1496|0;sf=oj+1492|0;Kf=oj+1488|0;fc=oj+1484|0;ed=oj+1480|0;Mb=oj+1476|0;he=oj+1472|0;tb=oj+1468|0;Yf=oj+1464|0;mf=oj+1460|0;Xf=oj+1456|0;wb=oj+1452|0;ie=oj+1448|0;Pi=oj+1444|0;ke=oj+1440|0;Ab=oj+1436|0;le=oj+1432|0;Si=oj+1428|0;Ne=oj+1424|0;Db=oj+1420|0;Oe=oj+1416|0;u=oj+1412|0;Da=oj+1408|0;rb=oj+1404|0;sb=oj+1400|0;Vc=oj+1396|0;ce=oj+1392|0;ub=oj+1388|0;vb=oj+1384|0;Eh=oj+1380|0;oi=oj+1376|0;yb=oj+1372|0;zb=oj+1368|0;Qi=oj+1364|0;Ri=oj+1360|0;Bb=oj+1356|0;Cb=oj+1352|0;vg=oj+1348|0;Ti=oj+1344|0;xb=oj+1340|0;Eb=oj+1336|0;_f=oj+1332|0;$f=oj+1328|0;rc=oj+1324|0;sc=oj+1320|0;me=oj+1316|0;Pe=oj+1312|0;Pc=oj+1308|0;Qc=oj+1304|0;Xi=oj+1300|0;af=oj+1296|0;za=oj+1292|0;_e=oj+1288|0;_i=oj+1284|0;Ze=oj+1280|0;Ca=oj+1276|0;bf=oj+1272|0;cj=oj+1268|0;Ve=oj+1264|0;sa=oj+1260|0;Te=oj+1256|0;fj=oj+1252|0;Se=oj+1248|0;va=oj+1244|0;We=oj+1240|0;Vi=oj+1236|0;Wi=oj+1232|0;xa=oj+1228|0;ya=oj+1224|0;Yi=oj+1220|0;Zi=oj+1216|0;Aa=oj+1212|0;Ba=oj+1208|0;aj=oj+1204|0;bj=oj+1200|0;qa=oj+1196|0;ra=oj+1192|0;dj=oj+1188|0;ej=oj+1184|0;ta=oj+1180|0;ua=oj+1176|0;$i=oj+1172|0;gj=oj+1168|0;ih=oj+1164|0;jh=oj+1160|0;lh=oj+1156|0;mh=oj+1152|0;wa=oj+1148|0;F=oj+1144|0;Sc=oj+1140|0;Tc=oj+1136|0;Ue=oj+1132|0;Xe=oj+1128|0;$e=oj+1124|0;cf=oj+1120|0;Xb=oj+1116|0;Yb=oj+1112|0;lj=oj+1108|0;gf=oj+1104|0;L=oj+1100|0;ue=oj+1096|0;qi=oj+1092|0;te=oj+1088|0;O=oj+1084|0;hf=oj+1080|0;xi=oj+1076|0;xe=oj+1072|0;V=oj+1068|0;qe=oj+1064|0;ui=oj+1060|0;we=oj+1056|0;S=oj+1052|0;ne=oj+1048|0;jj=oj+1044|0;kj=oj+1040|0;M=oj+1036|0;N=oj+1032|0;J=oj+1028|0;K=oj+1024|0;mj=oj+1020|0;pi=oj+1016|0;vi=oj+1012|0;wi=oj+1008|0;oe=oj+1004|0;T=oj+1e3|0;U=oj+996|0;pe=oj+992|0;si=oj+988|0;ti=oj+984|0;kf=oj+980|0;Q=oj+976|0;R=oj+972|0;lf=oj+968|0;ri=oj+964|0;yi=oj+960|0;xh=oj+956|0;yh=oj+952|0;Ah=oj+948|0;Bh=oj+944|0;P=oj+940|0;W=oj+936|0;hc=oj+932|0;ic=oj+928|0;jf=oj+924|0;re=oj+920|0;ve=oj+916|0;ye=oj+912|0;kc=oj+908|0;lc=oj+904|0;Ci=oj+900|0;Be=oj+896|0;aa=oj+892|0;nf=oj+888|0;Fi=oj+884|0;Me=oj+880|0;Ea=oj+876|0;Ce=oj+872|0;Mi=oj+868|0;qf=oj+864|0;La=oj+860|0;Je=oj+856|0;Ji=oj+852|0;pf=oj+848|0;Ia=oj+844|0;Ge=oj+840|0;Ai=oj+836|0;Bi=oj+832|0;ba=oj+828|0;ca=oj+824|0;_=oj+820|0;$=oj+816|0;Di=oj+812|0;Ei=oj+808|0;Ki=oj+804|0;Li=oj+800|0;He=oj+796|0;Ja=oj+792|0;Ka=oj+788|0;Ie=oj+784|0;Hi=oj+780|0;Ii=oj+776|0;Ee=oj+772|0;Ga=oj+768|0;Ha=oj+764|0;Fe=oj+760|0;Gi=oj+756|0;Ni=oj+752|0;qh=oj+748|0;rh=oj+744|0;th=oj+740|0;uh=oj+736|0;Fa=oj+732|0;Ma=oj+728|0;ac=oj+724|0;bc=oj+720|0;De=oj+716|0;Ke=oj+712|0;of=oj+708|0;rf=oj+704|0;dc=oj+700|0;ec=oj+696|0;ij=oj+692|0;v=oj+688|0;Nb=oj+684|0;Rb=oj+680|0;Ub=oj+676|0;Vb=oj+672|0;zc=oj+668|0;Jc=oj+664|0;Fc=oj+660|0;Nc=oj+656|0;xc=oj+652|0;yc=oj+648|0;Dc=oj+644|0;Ec=oj+640|0;H=oj+636|0;Gb=oj+632|0;_a=oj+628|0;Ua=oj+624|0;Jb=oj+620|0;Va=oj+616|0;nb=oj+612|0;$a=oj+608|0;Hb=oj+604|0;Ib=oj+600|0;Y=oj+596|0;mb=oj+592|0;ob=oj+588|0;Kb=oj+584|0;eb=oj+580|0;ib=oj+576|0;Oa=oj+572|0;Qa=oj+568|0;Wa=oj+564|0;ab=oj+560|0;ae=oj+556|0;yd=oj+552|0;od=oj+548|0;Cd=oj+544|0;jd=oj+540|0;Dd=oj+536|0;rd=oj+532|0;zd=oj+528|0;$d=oj+524|0;nd=oj+520|0;fd=oj+516|0;id=oj+512|0;pd=oj+508|0;qd=oj+504|0;kd=oj+500|0;sd=oj+496|0;ee=oj+492|0;fe=oj+488|0;ud=oj+484|0;wd=oj+480|0;Ad=oj+476|0;de=oj+472|0;$b=oj+468|0;Od=oj+464|0;Yc=oj+460|0;Sd=oj+456|0;oc=oj+452|0;Td=oj+448|0;$c=oj+444|0;Pd=oj+440|0;_b=oj+436|0;Xc=oj+432|0;gc=oj+428|0;nc=oj+424|0;Zc=oj+420|0;_c=oj+416|0;pc=oj+412|0;ad=oj+408|0;Wd=oj+404|0;Yd=oj+400|0;Id=oj+396|0;Kd=oj+392|0;Qd=oj+388|0;Ud=oj+384|0;Ff=oj+380|0;yg=oj+376|0;Uf=oj+372|0;zg=oj+368|0;Mf=oj+364|0;Dg=oj+360|0;Rf=oj+356|0;Cg=oj+352|0;Df=oj+348|0;Ef=oj+344|0;Sf=oj+340|0;Tf=oj+336|0;If=oj+332|0;Lf=oj+328|0;Pf=oj+324|0;Qf=oj+320|0;Nf=oj+316|0;Vf=oj+312|0;Fg=oj+308|0;eh=oj+304|0;Wf=oj+300|0;wg=oj+296|0;Ag=oj+292|0;Eg=oj+288|0;Jh=oj+284|0;ei=oj+280|0;$h=oj+276|0;fi=oj+272|0;Rh=oj+268|0;ji=oj+264|0;Yh=oj+260|0;ii=oj+256|0;Hh=oj+252|0;Ih=oj+248|0;Zh=oj+244|0;_h=oj+240|0;Mh=oj+236|0;Qh=oj+232|0;Wh=oj+228|0;Xh=oj+224|0;Sh=oj+220|0;ai=oj+216|0;mi=oj+212|0;Ph=oj+208|0;bi=oj+204|0;ci=oj+200|0;gi=oj+196|0;ki=oj+192|0;ff=oj+188|0;pg=oj+184|0;ig=oj+180|0;qg=oj+176|0;uf=oj+172|0;yf=oj+168|0;fg=oj+164|0;xf=oj+160|0;Re=oj+156|0;ef=oj+152|0;gg=oj+148|0;hg=oj+144|0;Ae=oj+140|0;tf=oj+136|0;bg=oj+132|0;eg=oj+128|0;vf=oj+124|0;jg=oj+120|0;Af=oj+116|0;Bf=oj+112|0;kg=oj+108|0;lg=oj+104|0;rg=oj+100|0;zf=oj+96|0;ph=oj+92|0;Wg=oj+88|0;Rg=oj+84|0;Xg=oj+80|0;Gg=oj+76|0;$g=oj+72|0;Og=oj+68|0;_g=oj+64|0;hh=oj+60|0;oh=oj+56|0;Pg=oj+52|0;Qg=oj+48|0;wh=oj+44|0;Dh=oj+40|0;Kg=oj+36|0;Ng=oj+32|0;Hg=oj+28|0;Sg=oj+24|0;bh=oj+20|0;ch=oj+16|0;Tg=oj+12|0;Ug=oj+8|0;Yg=oj+4|0;ah=oj;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[pj>>2]=j;c[s>>2]=k;c[t>>2]=l;g[oj+2156>>2]=.5555702447891235;g[oj+2152>>2]=.8314695954322815;g[oj+2148>>2]=.9807852506637573;g[oj+2144>>2]=.19509032368659973;g[oj+2140>>2]=.9238795042037964;g[oj+2136>>2]=.3826834261417389;g[oj+2132>>2]=.7071067690849304;c[nj>>2]=c[pj>>2];c[q>>2]=(c[q>>2]|0)+((c[pj>>2]|0)-1<<3<<2);while(1){if((c[nj>>2]|0)>=(c[s>>2]|0))break;g[w>>2]=+g[c[q>>2]>>2];g[z>>2]=+g[(c[q>>2]|0)+4>>2];g[x>>2]=+g[(c[q>>2]|0)+8>>2];g[A>>2]=+g[(c[q>>2]|0)+12>>2];g[y>>2]=+g[w>>2]*+g[x>>2];g[ea>>2]=+g[z>>2]*+g[x>>2];g[B>>2]=+g[z>>2]*+g[A>>2];g[da>>2]=+g[w>>2]*+g[A>>2];g[C>>2]=+g[y>>2]+ +g[B>>2];g[Lb>>2]=+g[y>>2]-+g[B>>2];g[Pa>>2]=+g[da>>2]+ +g[ea>>2];g[fa>>2]=+g[da>>2]-+g[ea>>2];g[D>>2]=+g[(c[q>>2]|0)+16>>2];g[bb>>2]=+g[x>>2]*+g[D>>2];g[Lc>>2]=+g[z>>2]*+g[D>>2];g[gb>>2]=+g[A>>2]*+g[D>>2];g[Gc>>2]=+g[w>>2]*+g[D>>2];g[ga>>2]=+g[(c[q>>2]|0)+20>>2];g[cb>>2]=+g[A>>2]*+g[ga>>2];g[Kc>>2]=+g[w>>2]*+g[ga>>2];g[fb>>2]=+g[x>>2]*+g[ga>>2];g[Hc>>2]=+g[z>>2]*+g[ga>>2];g[ja>>2]=+g[(c[q>>2]|0)+24>>2];g[Ra>>2]=+g[w>>2]*+g[ja>>2];g[Bc>>2]=+g[A>>2]*+g[ja>>2];g[Ya>>2]=+g[z>>2]*+g[ja>>2];g[Wb>>2]=+g[x>>2]*+g[ja>>2];g[na>>2]=+g[(c[q>>2]|0)+28>>2];g[Sa>>2]=+g[z>>2]*+g[na>>2];g[Ac>>2]=+g[x>>2]*+g[na>>2];g[Xa>>2]=+g[w>>2]*+g[na>>2];g[vc>>2]=+g[A>>2]*+g[na>>2];g[Ta>>2]=+g[Ra>>2]-+g[Sa>>2];g[Za>>2]=+g[Xa>>2]+ +g[Ya>>2];g[Oc>>2]=+g[Wb>>2]-+g[vc>>2];g[ld>>2]=+g[Xa>>2]-+g[Ya>>2];g[Cc>>2]=+g[Ac>>2]-+g[Bc>>2];g[qc>>2]=+g[Ac>>2]+ +g[Bc>>2];g[wc>>2]=+g[Wb>>2]+ +g[vc>>2];g[Zd>>2]=+g[Ra>>2]+ +g[Sa>>2];g[xd>>2]=+g[D>>2]*+g[ja>>2]+ +g[ga>>2]*+g[na>>2];g[Bd>>2]=+g[D>>2]*+g[na>>2]-+g[ga>>2]*+g[ja>>2];g[mg>>2]=+g[C>>2]*+g[ja>>2];g[ng>>2]=+g[fa>>2]*+g[na>>2];g[og>>2]=+g[mg>>2]-+g[ng>>2];g[xg>>2]=+g[mg>>2]+ +g[ng>>2];g[sg>>2]=+g[C>>2]*+g[na>>2];g[tg>>2]=+g[fa>>2]*+g[ja>>2];g[ug>>2]=+g[sg>>2]+ +g[tg>>2];g[Bg>>2]=+g[sg>>2]-+g[tg>>2];g[dh>>2]=+g[Lb>>2]*+g[ja>>2];g[Fh>>2]=+g[Pa>>2]*+g[na>>2];g[Gh>>2]=+g[dh>>2]+ +g[Fh>>2];g[li>>2]=+g[dh>>2]-+g[Fh>>2];g[Th>>2]=+g[Lb>>2]*+g[na>>2];g[Uh>>2]=+g[Pa>>2]*+g[ja>>2];g[Vh>>2]=+g[Th>>2]-+g[Uh>>2];g[ni>>2]=+g[Th>>2]+ +g[Uh>>2];g[db>>2]=+g[bb>>2]-+g[cb>>2];g[hb>>2]=+g[fb>>2]+ +g[gb>>2];g[di>>2]=+g[db>>2]*+g[ja>>2]+ +g[hb>>2]*+g[na>>2];g[hi>>2]=+g[db>>2]*+g[na>>2]-+g[hb>>2]*+g[ja>>2];g[Vd>>2]=+g[bb>>2]+ +g[cb>>2];g[Xd>>2]=+g[fb>>2]-+g[gb>>2];g[ge>>2]=+g[Vd>>2]*+g[ja>>2]+ +g[Xd>>2]*+g[na>>2];g[wf>>2]=+g[Vd>>2]*+g[na>>2]-+g[Xd>>2]*+g[ja>>2];g[Ic>>2]=+g[Gc>>2]+ +g[Hc>>2];g[Mc>>2]=+g[Kc>>2]-+g[Lc>>2];g[Vg>>2]=+g[Ic>>2]*+g[ja>>2]+ +g[Mc>>2]*+g[na>>2];g[Zg>>2]=+g[Ic>>2]*+g[na>>2]-+g[Mc>>2]*+g[ja>>2];g[td>>2]=+g[Gc>>2]-+g[Hc>>2];g[vd>>2]=+g[Kc>>2]+ +g[Lc>>2];g[Cf>>2]=+g[td>>2]*+g[ja>>2]+ +g[vd>>2]*+g[na>>2];g[Of>>2]=+g[td>>2]*+g[na>>2]-+g[vd>>2]*+g[ja>>2];g[E>>2]=+g[C>>2]*+g[D>>2];g[ha>>2]=+g[fa>>2]*+g[ga>>2];g[ia>>2]=+g[E>>2]+ +g[ha>>2];g[jb>>2]=+g[E>>2]-+g[ha>>2];g[ka>>2]=+g[C>>2]*+g[ga>>2];g[la>>2]=+g[fa>>2]*+g[D>>2];g[ma>>2]=+g[ka>>2]-+g[la>>2];g[kb>>2]=+g[ka>>2]+ +g[la>>2];g[oa>>2]=+g[ia>>2]*+g[ja>>2]+ +g[ma>>2]*+g[na>>2];g[Ob>>2]=+g[jb>>2]*+g[na>>2]-+g[kb>>2]*+g[ja>>2];g[pb>>2]=+g[ia>>2]*+g[na>>2]-+g[ma>>2]*+g[ja>>2];g[lb>>2]=+g[jb>>2]*+g[ja>>2]+ +g[kb>>2]*+g[na>>2];g[bd>>2]=+g[Lb>>2]*+g[D>>2];g[cd>>2]=+g[Pa>>2]*+g[ga>>2];g[dd>>2]=+g[bd>>2]-+g[cd>>2];g[Ld>>2]=+g[bd>>2]+ +g[cd>>2];g[Ed>>2]=+g[Lb>>2]*+g[ga>>2];g[Fd>>2]=+g[Pa>>2]*+g[D>>2];g[Gd>>2]=+g[Ed>>2]+ +g[Fd>>2];g[Md>>2]=+g[Ed>>2]-+g[Fd>>2];g[Hd>>2]=+g[dd>>2]*+g[ja>>2]+ +g[Gd>>2]*+g[na>>2];g[Rd>>2]=+g[Ld>>2]*+g[na>>2]-+g[Md>>2]*+g[ja>>2];g[Jd>>2]=+g[dd>>2]*+g[na>>2]-+g[Gd>>2]*+g[ja>>2];g[Nd>>2]=+g[Ld>>2]*+g[ja>>2]+ +g[Md>>2]*+g[na>>2];g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[Mb>>2]=+g[u>>2]+ +g[Da>>2];g[he>>2]=+g[u>>2]-+g[Da>>2];g[rb>>2]=+g[c[n>>2]>>2];g[sb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[tb>>2]=+g[rb>>2]-+g[sb>>2];g[Yf>>2]=+g[rb>>2]+ +g[sb>>2];g[Vc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2];g[ce>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[mf>>2]=+g[Vc>>2]+ +g[ce>>2];g[Xf>>2]=+g[Vc>>2]-+g[ce>>2];g[ub>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[vb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[wb>>2]=+g[ub>>2]-+g[vb>>2];g[ie>>2]=+g[ub>>2]+ +g[vb>>2];g[Eh>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[oi>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[Pi>>2]=+g[Eh>>2]+ +g[oi>>2];g[ke>>2]=+g[Eh>>2]-+g[oi>>2];g[yb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[zb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[Ab>>2]=+g[yb>>2]-+g[zb>>2];g[le>>2]=+g[yb>>2]+ +g[zb>>2];g[Qi>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ri>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[Si>>2]=+g[Qi>>2]+ +g[Ri>>2];g[Ne>>2]=+g[Qi>>2]-+g[Ri>>2];g[Bb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[Cb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Db>>2]=+g[Bb>>2]-+g[Cb>>2];g[Oe>>2]=+g[Bb>>2]+ +g[Cb>>2];g[je>>2]=+g[he>>2]-+g[ie>>2];g[fh>>2]=+g[he>>2]+ +g[ie>>2];g[Ig>>2]=+g[Yf>>2]-+g[Xf>>2];g[Zf>>2]=+g[Xf>>2]+ +g[Yf>>2];g[vg>>2]=+g[Mb>>2]+ +g[mf>>2];g[Ti>>2]=+g[Pi>>2]+ +g[Si>>2];g[Ui>>2]=+g[vg>>2]+ +g[Ti>>2];g[pa>>2]=+g[vg>>2]-+g[Ti>>2];g[_f>>2]=+g[ke>>2]+ +g[le>>2];g[$f>>2]=+g[Ne>>2]+ +g[Oe>>2];g[ag>>2]=(+g[_f>>2]-+g[$f>>2])*.7071067690849304;g[gh>>2]=(+g[_f>>2]+ +g[$f>>2])*.7071067690849304;g[rc>>2]=+g[tb>>2]-+g[wb>>2];g[sc>>2]=+g[Pi>>2]-+g[Si>>2];g[tc>>2]=+g[rc>>2]-+g[sc>>2];g[md>>2]=+g[sc>>2]+ +g[rc>>2];g[xb>>2]=+g[tb>>2]+ +g[wb>>2];g[Eb>>2]=+g[Ab>>2]+ +g[Db>>2];g[Fb>>2]=+g[xb>>2]-+g[Eb>>2];g[Pb>>2]=+g[xb>>2]+ +g[Eb>>2];g[me>>2]=+g[ke>>2]-+g[le>>2];g[Pe>>2]=+g[Ne>>2]-+g[Oe>>2];g[Qe>>2]=(+g[me>>2]+ +g[Pe>>2])*.7071067690849304;g[Jg>>2]=(+g[me>>2]-+g[Pe>>2])*.7071067690849304;g[Pc>>2]=+g[Mb>>2]-+g[mf>>2];g[Qc>>2]=+g[Db>>2]-+g[Ab>>2];g[Rc>>2]=+g[Pc>>2]-+g[Qc>>2];g[_d>>2]=+g[Pc>>2]+ +g[Qc>>2];g[Vi>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Wi>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[Xi>>2]=+g[Vi>>2]+ +g[Wi>>2];g[af>>2]=+g[Vi>>2]-+g[Wi>>2];g[xa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ya>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[za>>2]=+g[xa>>2]-+g[ya>>2];g[_e>>2]=+g[xa>>2]+ +g[ya>>2];g[Yi>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Zi>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[_i>>2]=+g[Yi>>2]+ +g[Zi>>2];g[Ze>>2]=+g[Yi>>2]-+g[Zi>>2];g[Aa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Ba>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Ca>>2]=+g[Aa>>2]-+g[Ba>>2];g[bf>>2]=+g[Aa>>2]+ +g[Ba>>2];g[aj>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[bj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[cj>>2]=+g[aj>>2]+ +g[bj>>2];g[Ve>>2]=+g[aj>>2]-+g[bj>>2];g[qa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[ra>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[sa>>2]=+g[qa>>2]-+g[ra>>2];g[Te>>2]=+g[qa>>2]+ +g[ra>>2];g[dj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[ej>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[fj>>2]=+g[dj>>2]+ +g[ej>>2];g[Se>>2]=+g[dj>>2]-+g[ej>>2];g[ta>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[ua>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[va>>2]=+g[ta>>2]-+g[ua>>2];g[We>>2]=+g[ta>>2]+ +g[ua>>2];g[$i>>2]=+g[Xi>>2]+ +g[_i>>2];g[gj>>2]=+g[cj>>2]+ +g[fj>>2];g[hj>>2]=+g[$i>>2]+ +g[gj>>2];g[qb>>2]=+g[$i>>2]-+g[gj>>2];g[ih>>2]=+g[af>>2]+ +g[bf>>2];g[jh>>2]=+g[_e>>2]-+g[Ze>>2];g[kh>>2]=+g[ih>>2]*.3826834261417389-+g[jh>>2]*.9238795042037964;g[Lg>>2]=+g[jh>>2]*.3826834261417389+ +g[ih>>2]*.9238795042037964;g[lh>>2]=+g[Ve>>2]+ +g[We>>2];g[mh>>2]=+g[Se>>2]+ +g[Te>>2];g[nh>>2]=+g[lh>>2]*.3826834261417389-+g[mh>>2]*.9238795042037964;g[Mg>>2]=+g[mh>>2]*.3826834261417389+ +g[lh>>2]*.9238795042037964;g[wa>>2]=+g[sa>>2]+ +g[va>>2];g[F>>2]=+g[za>>2]+ +g[Ca>>2];g[G>>2]=+g[wa>>2]-+g[F>>2];g[Qb>>2]=+g[F>>2]+ +g[wa>>2];g[Sc>>2]=+g[sa>>2]-+g[va>>2];g[Tc>>2]=+g[cj>>2]-+g[fj>>2];g[Uc>>2]=+g[Sc>>2]-+g[Tc>>2];g[Wc>>2]=+g[Tc>>2]+ +g[Sc>>2];g[Ue>>2]=+g[Se>>2]-+g[Te>>2];g[Xe>>2]=+g[Ve>>2]-+g[We>>2];g[Ye>>2]=+g[Ue>>2]*.9238795042037964-+g[Xe>>2]*.3826834261417389;g[dg>>2]=+g[Ue>>2]*.3826834261417389+ +g[Xe>>2]*.9238795042037964;g[$e>>2]=+g[Ze>>2]+ +g[_e>>2];g[cf>>2]=+g[af>>2]-+g[bf>>2];g[df>>2]=+g[$e>>2]*.9238795042037964+ +g[cf>>2]*.3826834261417389;g[cg>>2]=+g[cf>>2]*.9238795042037964-+g[$e>>2]*.3826834261417389;g[Xb>>2]=+g[Xi>>2]-+g[_i>>2];g[Yb>>2]=+g[za>>2]-+g[Ca>>2];g[Zb>>2]=+g[Xb>>2]+ +g[Yb>>2];g[uc>>2]=+g[Xb>>2]-+g[Yb>>2];g[jj>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[kj>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[lj>>2]=+g[jj>>2]+ +g[kj>>2];g[gf>>2]=+g[jj>>2]-+g[kj>>2];g[J>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[K>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[L>>2]=+g[J>>2]-+g[K>>2];g[ue>>2]=+g[J>>2]+ +g[K>>2];g[mj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[pi>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[qi>>2]=+g[mj>>2]+ +g[pi>>2];g[te>>2]=+g[mj>>2]-+g[pi>>2];g[M>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[N>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[O>>2]=+g[M>>2]-+g[N>>2];g[hf>>2]=+g[M>>2]+ +g[N>>2];g[vi>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[wi>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[oe>>2]=+g[vi>>2]-+g[wi>>2];g[T>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[U>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[pe>>2]=+g[T>>2]+ +g[U>>2];g[xi>>2]=+g[vi>>2]+ +g[wi>>2];g[xe>>2]=+g[oe>>2]+ +g[pe>>2];g[V>>2]=+g[T>>2]-+g[U>>2];g[qe>>2]=+g[oe>>2]-+g[pe>>2];g[si>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[ti>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[kf>>2]=+g[si>>2]-+g[ti>>2];g[Q>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[R>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[lf>>2]=+g[Q>>2]+ +g[R>>2];g[ui>>2]=+g[si>>2]+ +g[ti>>2];g[we>>2]=+g[kf>>2]+ +g[lf>>2];g[S>>2]=+g[Q>>2]-+g[R>>2];g[ne>>2]=+g[kf>>2]-+g[lf>>2];g[ri>>2]=+g[lj>>2]+ +g[qi>>2];g[yi>>2]=+g[ui>>2]+ +g[xi>>2];g[zi>>2]=+g[ri>>2]+ +g[yi>>2];g[I>>2]=+g[ri>>2]-+g[yi>>2];g[xh>>2]=+g[ue>>2]-+g[te>>2];g[yh>>2]=(+g[ne>>2]-+g[qe>>2])*.7071067690849304;g[zh>>2]=+g[xh>>2]+ +g[yh>>2];g[Lh>>2]=+g[xh>>2]-+g[yh>>2];g[Ah>>2]=+g[gf>>2]+ +g[hf>>2];g[Bh>>2]=(+g[we>>2]+ +g[xe>>2])*.7071067690849304;g[Ch>>2]=+g[Ah>>2]-+g[Bh>>2];g[Kh>>2]=+g[Ah>>2]+ +g[Bh>>2];g[P>>2]=+g[L>>2]+ +g[O>>2];g[W>>2]=+g[S>>2]+ +g[V>>2];g[X>>2]=+g[P>>2]-+g[W>>2];g[Sb>>2]=+g[P>>2]+ +g[W>>2];g[hc>>2]=+g[L>>2]-+g[O>>2];g[ic>>2]=+g[ui>>2]-+g[xi>>2];g[jc>>2]=+g[hc>>2]-+g[ic>>2];g[gd>>2]=+g[ic>>2]+ +g[hc>>2];g[jf>>2]=+g[gf>>2]-+g[hf>>2];g[re>>2]=(+g[ne>>2]+ +g[qe>>2])*.7071067690849304;g[se>>2]=+g[jf>>2]-+g[re>>2];g[Gf>>2]=+g[jf>>2]+ +g[re>>2];g[ve>>2]=+g[te>>2]+ +g[ue>>2];g[ye>>2]=(+g[we>>2]-+g[xe>>2])*.7071067690849304;g[ze>>2]=+g[ve>>2]-+g[ye>>2];g[Hf>>2]=+g[ve>>2]+ +g[ye>>2];g[kc>>2]=+g[lj>>2]-+g[qi>>2];g[lc>>2]=+g[V>>2]-+g[S>>2];g[mc>>2]=+g[kc>>2]-+g[lc>>2];g[hd>>2]=+g[kc>>2]+ +g[lc>>2];g[Ai>>2]=+g[c[o>>2]>>2];g[Bi>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[Ci>>2]=+g[Ai>>2]+ +g[Bi>>2];g[Be>>2]=+g[Ai>>2]-+g[Bi>>2];g[_>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[$>>2]=+g[c[p>>2]>>2];g[aa>>2]=+g[_>>2]-+g[$>>2];g[nf>>2]=+g[_>>2]+ +g[$>>2];g[Di>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Ei>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Fi>>2]=+g[Di>>2]+ +g[Ei>>2];g[Me>>2]=+g[Di>>2]-+g[Ei>>2];g[ba>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ca>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Ea>>2]=+g[ba>>2]-+g[ca>>2];g[Ce>>2]=+g[ba>>2]+ +g[ca>>2];g[Ki>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Li>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[He>>2]=+g[Ki>>2]-+g[Li>>2];g[Ja>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[Ka>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ie>>2]=+g[Ja>>2]+ +g[Ka>>2];g[Mi>>2]=+g[Ki>>2]+ +g[Li>>2];g[qf>>2]=+g[He>>2]+ +g[Ie>>2];g[La>>2]=+g[Ja>>2]-+g[Ka>>2];g[Je>>2]=+g[He>>2]-+g[Ie>>2];g[Hi>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ii>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[Ee>>2]=+g[Hi>>2]-+g[Ii>>2];g[Ga>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ha>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[Fe>>2]=+g[Ga>>2]+ +g[Ha>>2];g[Ji>>2]=+g[Hi>>2]+ +g[Ii>>2];g[pf>>2]=+g[Ee>>2]+ +g[Fe>>2];g[Ia>>2]=+g[Ga>>2]-+g[Ha>>2];g[Ge>>2]=+g[Ee>>2]-+g[Fe>>2];g[Gi>>2]=+g[Ci>>2]+ +g[Fi>>2];g[Ni>>2]=+g[Ji>>2]+ +g[Mi>>2];g[Oi>>2]=+g[Gi>>2]+ +g[Ni>>2];g[Z>>2]=+g[Gi>>2]-+g[Ni>>2];g[qh>>2]=(+g[Ge>>2]-+g[Je>>2])*.7071067690849304;g[rh>>2]=+g[Me>>2]+ +g[nf>>2];g[sh>>2]=+g[qh>>2]-+g[rh>>2];g[Oh>>2]=+g[rh>>2]+ +g[qh>>2];g[th>>2]=+g[Be>>2]+ +g[Ce>>2];g[uh>>2]=(+g[pf>>2]+ +g[qf>>2])*.7071067690849304;g[vh>>2]=+g[th>>2]-+g[uh>>2];g[Nh>>2]=+g[th>>2]+ +g[uh>>2];g[Fa>>2]=+g[aa>>2]+ +g[Ea>>2];g[Ma>>2]=+g[Ia>>2]+ +g[La>>2];g[Na>>2]=+g[Fa>>2]-+g[Ma>>2];g[Tb>>2]=+g[Fa>>2]+ +g[Ma>>2];g[ac>>2]=+g[aa>>2]-+g[Ea>>2];g[bc>>2]=+g[Ji>>2]-+g[Mi>>2];g[cc>>2]=+g[ac>>2]-+g[bc>>2];g[be>>2]=+g[bc>>2]+ +g[ac>>2];g[De>>2]=+g[Be>>2]-+g[Ce>>2];g[Ke>>2]=(+g[Ge>>2]+ +g[Je>>2])*.7071067690849304;g[Le>>2]=+g[De>>2]-+g[Ke>>2];g[Jf>>2]=+g[De>>2]+ +g[Ke>>2];g[of>>2]=+g[Me>>2]-+g[nf>>2];g[rf>>2]=(+g[pf>>2]-+g[qf>>2])*.7071067690849304;g[sf>>2]=+g[of>>2]-+g[rf>>2];g[Kf>>2]=+g[of>>2]+ +g[rf>>2];g[dc>>2]=+g[Ci>>2]-+g[Fi>>2];g[ec>>2]=+g[La>>2]-+g[Ia>>2];g[fc>>2]=+g[dc>>2]-+g[ec>>2];g[ed>>2]=+g[dc>>2]+ +g[ec>>2];g[ij>>2]=+g[Ui>>2]+ +g[hj>>2];g[v>>2]=+g[zi>>2]+ +g[Oi>>2];g[Nb>>2]=+g[ij>>2]-+g[v>>2];g[Rb>>2]=+g[Pb>>2]+ +g[Qb>>2];g[Ub>>2]=+g[Sb>>2]+ +g[Tb>>2];g[Vb>>2]=+g[Rb>>2]-+g[Ub>>2];g[c[m>>2]>>2]=+g[ij>>2]+ +g[v>>2];g[c[o>>2]>>2]=+g[Rb>>2]+ +g[Ub>>2];g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[lb>>2]*+g[Nb>>2]-+g[Ob>>2]*+g[Vb>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Ob>>2]*+g[Nb>>2]+ +g[lb>>2]*+g[Vb>>2];g[xc>>2]=+g[Ui>>2]-+g[hj>>2];g[yc>>2]=+g[Tb>>2]-+g[Sb>>2];g[zc>>2]=+g[xc>>2]-+g[yc>>2];g[Jc>>2]=+g[xc>>2]+ +g[yc>>2];g[Dc>>2]=+g[Pb>>2]-+g[Qb>>2];g[Ec>>2]=+g[zi>>2]-+g[Oi>>2];g[Fc>>2]=+g[Dc>>2]-+g[Ec>>2];g[Nc>>2]=+g[Ec>>2]+ +g[Dc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[wc>>2]*+g[zc>>2]-+g[Cc>>2]*+g[Fc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[wc>>2]*+g[Fc>>2]+ +g[Cc>>2]*+g[zc>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ic>>2]*+g[Jc>>2]-+g[Mc>>2]*+g[Nc>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ic>>2]*+g[Nc>>2]+ +g[Mc>>2]*+g[Jc>>2];g[H>>2]=+g[pa>>2]+ +g[G>>2];g[Gb>>2]=+g[qb>>2]+ +g[Fb>>2];g[_a>>2]=+g[Fb>>2]-+g[qb>>2];g[Ua>>2]=+g[pa>>2]-+g[G>>2];g[Hb>>2]=+g[I>>2]+ +g[X>>2];g[Ib>>2]=+g[Na>>2]-+g[Z>>2];g[Jb>>2]=(+g[Hb>>2]+ +g[Ib>>2])*.7071067690849304;g[Va>>2]=(+g[Ib>>2]-+g[Hb>>2])*.7071067690849304;g[Y>>2]=+g[I>>2]-+g[X>>2];g[mb>>2]=+g[Z>>2]+ +g[Na>>2];g[nb>>2]=(+g[Y>>2]+ +g[mb>>2])*.7071067690849304;g[$a>>2]=(+g[Y>>2]-+g[mb>>2])*.7071067690849304;g[ob>>2]=+g[H>>2]-+g[nb>>2];g[Kb>>2]=+g[Gb>>2]-+g[Jb>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[oa>>2]*+g[ob>>2]-+g[pb>>2]*+g[Kb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[pb>>2]*+g[ob>>2]+ +g[oa>>2]*+g[Kb>>2];g[eb>>2]=+g[Ua>>2]+ +g[Va>>2];g[ib>>2]=+g[_a>>2]+ +g[$a>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[db>>2]*+g[eb>>2]-+g[hb>>2]*+g[ib>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[db>>2]*+g[ib>>2]+ +g[hb>>2]*+g[eb>>2];g[Oa>>2]=+g[H>>2]+ +g[nb>>2];g[Qa>>2]=+g[Gb>>2]+ +g[Jb>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Lb>>2]*+g[Oa>>2]-+g[Pa>>2]*+g[Qa>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Pa>>2]*+g[Oa>>2]+ +g[Lb>>2]*+g[Qa>>2];g[Wa>>2]=+g[Ua>>2]-+g[Va>>2];g[ab>>2]=+g[_a>>2]-+g[$a>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Ta>>2]*+g[Wa>>2]-+g[Za>>2]*+g[ab>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Ta>>2]*+g[ab>>2]+ +g[Za>>2]*+g[Wa>>2];g[$d>>2]=(+g[uc>>2]+ +g[Wc>>2])*.7071067690849304;g[ae>>2]=+g[_d>>2]-+g[$d>>2];g[yd>>2]=+g[_d>>2]+ +g[$d>>2];g[nd>>2]=(+g[Zb>>2]+ +g[Uc>>2])*.7071067690849304;g[od>>2]=+g[md>>2]-+g[nd>>2];g[Cd>>2]=+g[md>>2]+ +g[nd>>2];g[fd>>2]=+g[be>>2]*.9238795042037964-+g[ed>>2]*.3826834261417389;g[id>>2]=+g[gd>>2]*.9238795042037964+ +g[hd>>2]*.3826834261417389;g[jd>>2]=+g[fd>>2]-+g[id>>2];g[Dd>>2]=+g[id>>2]+ +g[fd>>2];g[pd>>2]=+g[hd>>2]*.9238795042037964-+g[gd>>2]*.3826834261417389;g[qd>>2]=+g[be>>2]*.3826834261417389+ +g[ed>>2]*.9238795042037964;g[rd>>2]=+g[pd>>2]-+g[qd>>2];g[zd>>2]=+g[pd>>2]+ +g[qd>>2];g[kd>>2]=+g[ae>>2]-+g[jd>>2];g[sd>>2]=+g[od>>2]-+g[rd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Zd>>2]*+g[kd>>2]-+g[ld>>2]*+g[sd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[ld>>2]*+g[kd>>2]+ +g[Zd>>2]*+g[sd>>2];g[ee>>2]=+g[yd>>2]+ +g[zd>>2];g[fe>>2]=+g[Cd>>2]+ +g[Dd>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[C>>2]*+g[ee>>2]-+g[fa>>2]*+g[fe>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[C>>2]*+g[fe>>2]+ +g[fa>>2]*+g[ee>>2];g[ud>>2]=+g[ae>>2]+ +g[jd>>2];g[wd>>2]=+g[od>>2]+ +g[rd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[td>>2]*+g[ud>>2]-+g[vd>>2]*+g[wd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[vd>>2]*+g[ud>>2]+ +g[td>>2]*+g[wd>>2];g[Ad>>2]=+g[yd>>2]-+g[zd>>2];g[de>>2]=+g[Cd>>2]-+g[Dd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[xd>>2]*+g[Ad>>2]-+g[Bd>>2]*+g[de>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[xd>>2]*+g[de>>2]+ +g[Bd>>2]*+g[Ad>>2];g[_b>>2]=(+g[Uc>>2]-+g[Zb>>2])*.7071067690849304;g[$b>>2]=+g[Rc>>2]-+g[_b>>2];g[Od>>2]=+g[Rc>>2]+ +g[_b>>2];g[Xc>>2]=(+g[uc>>2]-+g[Wc>>2])*.7071067690849304;g[Yc>>2]=+g[tc>>2]-+g[Xc>>2];g[Sd>>2]=+g[tc>>2]+ +g[Xc>>2];g[gc>>2]=+g[cc>>2]*.3826834261417389-+g[fc>>2]*.9238795042037964;g[nc>>2]=+g[jc>>2]*.3826834261417389+ +g[mc>>2]*.9238795042037964;g[oc>>2]=+g[gc>>2]-+g[nc>>2];g[Td>>2]=+g[nc>>2]+ +g[gc>>2];g[Zc>>2]=+g[mc>>2]*.3826834261417389-+g[jc>>2]*.9238795042037964;g[_c>>2]=+g[cc>>2]*.9238795042037964+ +g[fc>>2]*.3826834261417389;g[$c>>2]=+g[Zc>>2]-+g[_c>>2];g[Pd>>2]=+g[Zc>>2]+ +g[_c>>2];g[pc>>2]=+g[$b>>2]-+g[oc>>2];g[ad>>2]=+g[Yc>>2]-+g[$c>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[Oc>>2]*+g[pc>>2]-+g[qc>>2]*+g[ad>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[qc>>2]*+g[pc>>2]+ +g[Oc>>2]*+g[ad>>2];g[Wd>>2]=+g[Od>>2]+ +g[Pd>>2];g[Yd>>2]=+g[Sd>>2]+ +g[Td>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Vd>>2]*+g[Wd>>2]-+g[Xd>>2]*+g[Yd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Vd>>2]*+g[Yd>>2]+ +g[Xd>>2]*+g[Wd>>2];g[Id>>2]=+g[$b>>2]+ +g[oc>>2];g[Kd>>2]=+g[Yc>>2]+ +g[$c>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Hd>>2]*+g[Id>>2]-+g[Jd>>2]*+g[Kd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Jd>>2]*+g[Id>>2]+ +g[Hd>>2]*+g[Kd>>2];g[Qd>>2]=+g[Od>>2]-+g[Pd>>2];g[Ud>>2]=+g[Sd>>2]-+g[Td>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Nd>>2]*+g[Qd>>2]-+g[Rd>>2]*+g[Ud>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Nd>>2]*+g[Ud>>2]+ +g[Rd>>2]*+g[Qd>>2];g[Df>>2]=+g[je>>2]+ +g[Qe>>2];g[Ef>>2]=+g[cg>>2]+ +g[dg>>2];g[Ff>>2]=+g[Df>>2]+ +g[Ef>>2];g[yg>>2]=+g[Df>>2]-+g[Ef>>2];g[Sf>>2]=+g[Gf>>2]*.19509032368659973+ +g[Hf>>2]*.9807852506637573;g[Tf>>2]=+g[Kf>>2]*.9807852506637573-+g[Jf>>2]*.19509032368659973;g[Uf>>2]=+g[Sf>>2]+ +g[Tf>>2];g[zg>>2]=+g[Tf>>2]-+g[Sf>>2];g[If>>2]=+g[Gf>>2]*.9807852506637573-+g[Hf>>2]*.19509032368659973;g[Lf>>2]=+g[Jf>>2]*.9807852506637573+ +g[Kf>>2]*.19509032368659973;g[Mf>>2]=+g[If>>2]+ +g[Lf>>2];g[Dg>>2]=+g[If>>2]-+g[Lf>>2];g[Pf>>2]=+g[Zf>>2]+ +g[ag>>2];g[Qf>>2]=+g[df>>2]+ +g[Ye>>2];g[Rf>>2]=+g[Pf>>2]+ +g[Qf>>2];g[Cg>>2]=+g[Pf>>2]-+g[Qf>>2];g[Nf>>2]=+g[Ff>>2]-+g[Mf>>2];g[Vf>>2]=+g[Rf>>2]-+g[Uf>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Cf>>2]*+g[Nf>>2]-+g[Of>>2]*+g[Vf>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Of>>2]*+g[Nf>>2]+ +g[Cf>>2]*+g[Vf>>2];g[Fg>>2]=+g[yg>>2]+ +g[zg>>2];g[eh>>2]=+g[Cg>>2]+ +g[Dg>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[D>>2]*+g[Fg>>2]-+g[ga>>2]*+g[eh>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[D>>2]*+g[eh>>2]+ +g[ga>>2]*+g[Fg>>2];g[Wf>>2]=+g[Ff>>2]+ +g[Mf>>2];g[wg>>2]=+g[Rf>>2]+ +g[Uf>>2];g[c[n>>2]>>2]=+g[w>>2]*+g[Wf>>2]-+g[z>>2]*+g[wg>>2];g[c[p>>2]>>2]=+g[z>>2]*+g[Wf>>2]+ +g[w>>2]*+g[wg>>2];g[Ag>>2]=+g[yg>>2]-+g[zg>>2];g[Eg>>2]=+g[Cg>>2]-+g[Dg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[xg>>2]*+g[Ag>>2]-+g[Bg>>2]*+g[Eg>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[xg>>2]*+g[Eg>>2]+ +g[Bg>>2]*+g[Ag>>2];g[Hh>>2]=+g[fh>>2]+ +g[gh>>2];g[Ih>>2]=+g[Lg>>2]+ +g[Mg>>2];g[Jh>>2]=+g[Hh>>2]-+g[Ih>>2];g[ei>>2]=+g[Hh>>2]+ +g[Ih>>2];g[Zh>>2]=+g[Lh>>2]*.19509032368659973+ +g[Kh>>2]*.9807852506637573;g[_h>>2]=+g[Oh>>2]*.19509032368659973+ +g[Nh>>2]*.9807852506637573;g[$h>>2]=+g[Zh>>2]-+g[_h>>2];g[fi>>2]=+g[Zh>>2]+ +g[_h>>2];g[Mh>>2]=+g[Kh>>2]*.19509032368659973-+g[Lh>>2]*.9807852506637573;g[Qh>>2]=+g[Nh>>2]*.19509032368659973-+g[Oh>>2]*.9807852506637573;g[Rh>>2]=+g[Mh>>2]+ +g[Qh>>2];g[ji>>2]=+g[Mh>>2]-+g[Qh>>2];g[Wh>>2]=+g[Ig>>2]-+g[Jg>>2];g[Xh>>2]=+g[kh>>2]-+g[nh>>2];g[Yh>>2]=+g[Wh>>2]+ +g[Xh>>2];g[ii>>2]=+g[Wh>>2]-+g[Xh>>2];g[Sh>>2]=+g[Jh>>2]-+g[Rh>>2];g[ai>>2]=+g[Yh>>2]-+g[$h>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Gh>>2]*+g[Sh>>2]-+g[Vh>>2]*+g[ai>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Vh>>2]*+g[Sh>>2]+ +g[Gh>>2]*+g[ai>>2];g[mi>>2]=+g[ei>>2]+ +g[fi>>2];g[Ph>>2]=+g[ii>>2]-+g[ji>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[li>>2]*+g[mi>>2]-+g[ni>>2]*+g[Ph>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[li>>2]*+g[Ph>>2]+ +g[ni>>2]*+g[mi>>2];g[bi>>2]=+g[Jh>>2]+ +g[Rh>>2];g[ci>>2]=+g[Yh>>2]+ +g[$h>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ia>>2]*+g[bi>>2]-+g[ma>>2]*+g[ci>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ma>>2]*+g[bi>>2]+ +g[ia>>2]*+g[ci>>2];g[gi>>2]=+g[ei>>2]-+g[fi>>2];g[ki>>2]=+g[ii>>2]+ +g[ji>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[di>>2]*+g[gi>>2]-+g[hi>>2]*+g[ki>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[di>>2]*+g[ki>>2]+ +g[hi>>2]*+g[gi>>2];g[Re>>2]=+g[je>>2]-+g[Qe>>2];g[ef>>2]=+g[Ye>>2]-+g[df>>2];g[ff>>2]=+g[Re>>2]+ +g[ef>>2];g[pg>>2]=+g[Re>>2]-+g[ef>>2];g[gg>>2]=+g[se>>2]*.8314695954322815+ +g[ze>>2]*.5555702447891235;g[hg>>2]=+g[sf>>2]*.5555702447891235-+g[Le>>2]*.8314695954322815;g[ig>>2]=+g[gg>>2]+ +g[hg>>2];g[qg>>2]=+g[hg>>2]-+g[gg>>2];g[Ae>>2]=+g[se>>2]*.5555702447891235-+g[ze>>2]*.8314695954322815;g[tf>>2]=+g[Le>>2]*.5555702447891235+ +g[sf>>2]*.8314695954322815;g[uf>>2]=+g[Ae>>2]+ +g[tf>>2];g[yf>>2]=+g[Ae>>2]-+g[tf>>2];g[bg>>2]=+g[Zf>>2]-+g[ag>>2];g[eg>>2]=+g[cg>>2]-+g[dg>>2];g[fg>>2]=+g[bg>>2]+ +g[eg>>2];g[xf>>2]=+g[bg>>2]-+g[eg>>2];g[vf>>2]=+g[ff>>2]-+g[uf>>2];g[jg>>2]=+g[fg>>2]-+g[ig>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[ge>>2]*+g[vf>>2]-+g[wf>>2]*+g[jg>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[wf>>2]*+g[vf>>2]+ +g[ge>>2]*+g[jg>>2];g[Af>>2]=+g[pg>>2]+ +g[qg>>2];g[Bf>>2]=+g[xf>>2]+ +g[yf>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[dd>>2]*+g[Af>>2]-+g[Gd>>2]*+g[Bf>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[dd>>2]*+g[Bf>>2]+ +g[Gd>>2]*+g[Af>>2];g[kg>>2]=+g[ff>>2]+ +g[uf>>2];g[lg>>2]=+g[fg>>2]+ +g[ig>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ld>>2]*+g[kg>>2]-+g[Md>>2]*+g[lg>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Md>>2]*+g[kg>>2]+ +g[Ld>>2]*+g[lg>>2];g[rg>>2]=+g[pg>>2]-+g[qg>>2];g[zf>>2]=+g[xf>>2]-+g[yf>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[og>>2]*+g[rg>>2]-+g[ug>>2]*+g[zf>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[og>>2]*+g[zf>>2]+ +g[ug>>2]*+g[rg>>2];g[hh>>2]=+g[fh>>2]-+g[gh>>2];g[oh>>2]=+g[kh>>2]+ +g[nh>>2];g[ph>>2]=+g[hh>>2]-+g[oh>>2];g[Wg>>2]=+g[hh>>2]+ +g[oh>>2];g[Pg>>2]=+g[Ch>>2]*.8314695954322815-+g[zh>>2]*.5555702447891235;g[Qg>>2]=+g[sh>>2]*.5555702447891235+ +g[vh>>2]*.8314695954322815;g[Rg>>2]=+g[Pg>>2]-+g[Qg>>2];g[Xg>>2]=+g[Pg>>2]+ +g[Qg>>2];g[wh>>2]=+g[sh>>2]*.8314695954322815-+g[vh>>2]*.5555702447891235;g[Dh>>2]=+g[zh>>2]*.8314695954322815+ +g[Ch>>2]*.5555702447891235;g[Gg>>2]=+g[wh>>2]-+g[Dh>>2];g[$g>>2]=+g[Dh>>2]+ +g[wh>>2];g[Kg>>2]=+g[Ig>>2]+ +g[Jg>>2];g[Ng>>2]=+g[Lg>>2]-+g[Mg>>2];g[Og>>2]=+g[Kg>>2]-+g[Ng>>2];g[_g>>2]=+g[Kg>>2]+ +g[Ng>>2];g[Hg>>2]=+g[ph>>2]-+g[Gg>>2];g[Sg>>2]=+g[Og>>2]-+g[Rg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[ja>>2]*+g[Hg>>2]-+g[na>>2]*+g[Sg>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[na>>2]*+g[Hg>>2]+ +g[ja>>2]*+g[Sg>>2];g[bh>>2]=+g[Wg>>2]+ +g[Xg>>2];g[ch>>2]=+g[_g>>2]+ +g[$g>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[x>>2]*+g[bh>>2]-+g[A>>2]*+g[ch>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[x>>2]*+g[ch>>2]+ +g[A>>2]*+g[bh>>2];g[Tg>>2]=+g[ph>>2]+ +g[Gg>>2];g[Ug>>2]=+g[Og>>2]+ +g[Rg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[jb>>2]*+g[Tg>>2]-+g[kb>>2]*+g[Ug>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[kb>>2]*+g[Tg>>2]+ +g[jb>>2]*+g[Ug>>2];g[Yg>>2]=+g[Wg>>2]-+g[Xg>>2];g[ah>>2]=+g[_g>>2]-+g[$g>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Vg>>2]*+g[Yg>>2]-+g[Zg>>2]*+g[ah>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Vg>>2]*+g[ah>>2]+ +g[Zg>>2]*+g[Yg>>2];c[nj>>2]=(c[nj>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+32;c[r>>2]=c[r>>2]^c[2998]}i=oj;return}function vu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,55,8632,0);i=b;return}function wu(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0;X=i;i=i+160|0;m=X+148|0;n=X+144|0;o=X+140|0;p=X+136|0;q=X+132|0;r=X+128|0;Y=X+124|0;s=X+120|0;t=X+116|0;W=X+112|0;A=X+108|0;C=X+104|0;B=X+100|0;D=X+96|0;E=X+92|0;G=X+88|0;w=X+84|0;O=X+80|0;J=X+76|0;S=X+72|0;z=X+68|0;R=X+64|0;M=X+60|0;P=X+56|0;F=X+52|0;N=X+48|0;u=X+44|0;v=X+40|0;H=X+36|0;I=X+32|0;x=X+28|0;y=X+24|0;K=X+20|0;L=X+16|0;Q=X+12|0;T=X+8|0;U=X+4|0;V=X;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Y>>2]=j;c[s>>2]=k;c[t>>2]=l;c[W>>2]=c[Y>>2];c[q>>2]=(c[q>>2]|0)+((c[Y>>2]|0)-1<<2<<2);while(1){if((c[W>>2]|0)>=(c[s>>2]|0))break;g[A>>2]=+g[c[q>>2]>>2];g[C>>2]=+g[(c[q>>2]|0)+4>>2];g[B>>2]=+g[(c[q>>2]|0)+8>>2];g[D>>2]=+g[(c[q>>2]|0)+12>>2];g[E>>2]=+g[A>>2]*+g[B>>2]+ +g[C>>2]*+g[D>>2];g[G>>2]=+g[A>>2]*+g[D>>2]-+g[C>>2]*+g[B>>2];g[u>>2]=+g[c[m>>2]>>2];g[v>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[w>>2]=+g[u>>2]+ +g[v>>2];g[O>>2]=+g[u>>2]-+g[v>>2];g[H>>2]=+g[c[n>>2]>>2];g[I>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[J>>2]=+g[H>>2]-+g[I>>2];g[S>>2]=+g[H>>2]+ +g[I>>2];g[x>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[y>>2]=+g[c[o>>2]>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[R>>2]=+g[x>>2]-+g[y>>2];g[K>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[L>>2]=+g[c[p>>2]>>2];g[M>>2]=+g[K>>2]-+g[L>>2];g[P>>2]=+g[K>>2]+ +g[L>>2];g[c[m>>2]>>2]=+g[w>>2]+ +g[z>>2];g[c[o>>2]>>2]=+g[J>>2]+ +g[M>>2];g[F>>2]=+g[w>>2]-+g[z>>2];g[N>>2]=+g[J>>2]-+g[M>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[E>>2]*+g[F>>2]-+g[G>>2]*+g[N>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[G>>2]*+g[F>>2]+ +g[E>>2]*+g[N>>2];g[Q>>2]=+g[O>>2]-+g[P>>2];g[T>>2]=+g[R>>2]+ +g[S>>2];g[c[n>>2]>>2]=+g[A>>2]*+g[Q>>2]-+g[C>>2]*+g[T>>2];g[c[p>>2]>>2]=+g[A>>2]*+g[T>>2]+ +g[C>>2]*+g[Q>>2];g[U>>2]=+g[O>>2]+ +g[P>>2];g[V>>2]=+g[S>>2]-+g[R>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[B>>2]*+g[U>>2]-+g[D>>2]*+g[V>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[B>>2]*+g[V>>2]+ +g[D>>2]*+g[U>>2];c[W>>2]=(c[W>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+16}i=X;return}function xu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,56,8680,0);i=b;return}function yu(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0;Za=i;i=i+384|0;m=Za+376|0;n=Za+372|0;o=Za+368|0;p=Za+364|0;q=Za+360|0;r=Za+356|0;_a=Za+352|0;s=Za+348|0;t=Za+344|0;Ya=Za+336|0;Da=Za+332|0;Ga=Za+328|0;Ea=Za+324|0;Ha=Za+320|0;Ja=Za+316|0;Na=Za+312|0;na=Za+308|0;pa=Za+304|0;da=Za+300|0;ea=Za+296|0;fa=Za+292|0;A=Za+288|0;ja=Za+284|0;ra=Za+280|0;Fa=Za+276|0;Ma=Za+272|0;Ia=Za+268|0;La=Za+264|0;X=Za+260|0;K=Za+256|0;N=Za+252|0;Ua=Za+248|0;ga=Za+244|0;ua=Za+240|0;D=Za+236|0;ka=Za+232|0;Ca=Za+228|0;E=Za+224|0;F=Za+220|0;ba=Za+216|0;ha=Za+212|0;xa=Za+208|0;x=Za+204|0;la=Za+200|0;Ka=Za+196|0;ca=Za+192|0;T=Za+188|0;sa=Za+184|0;Qa=Za+180|0;C=Za+176|0;W=Za+172|0;B=Za+168|0;Ta=Za+164|0;ta=Za+160|0;u=Za+156|0;S=Za+152|0;Oa=Za+148|0;Pa=Za+144|0;U=Za+140|0;V=Za+136|0;Ra=Za+132|0;Sa=Za+128|0;ya=Za+124|0;va=Za+120|0;Xa=Za+116|0;wa=Za+112|0;Ba=Za+108|0;v=Za+104|0;aa=Za+100|0;w=Za+96|0;Y=Za+92|0;Z=Za+88|0;Va=Za+84|0;Wa=Za+80|0;za=Za+76|0;Aa=Za+72|0;_=Za+68|0;$=Za+64|0;oa=Za+60|0;qa=Za+56|0;ia=Za+52|0;ma=Za+48|0;M=Za+44|0;Q=Za+40|0;P=Za+36|0;R=Za+32|0;L=Za+28|0;O=Za+24|0;z=Za+20|0;I=Za+16|0;H=Za+12|0;J=Za+8|0;y=Za+4|0;G=Za;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[_a>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Za+340>>2]=.7071067690849304;c[Ya>>2]=c[_a>>2];c[q>>2]=(c[q>>2]|0)+(((c[_a>>2]|0)-1|0)*6<<2);while(1){if((c[Ya>>2]|0)>=(c[s>>2]|0))break;g[Da>>2]=+g[c[q>>2]>>2];g[Ga>>2]=+g[(c[q>>2]|0)+4>>2];g[Ea>>2]=+g[(c[q>>2]|0)+8>>2];g[Ha>>2]=+g[(c[q>>2]|0)+12>>2];g[Fa>>2]=+g[Da>>2]*+g[Ea>>2];g[Ma>>2]=+g[Ga>>2]*+g[Ea>>2];g[Ia>>2]=+g[Ga>>2]*+g[Ha>>2];g[La>>2]=+g[Da>>2]*+g[Ha>>2];g[Ja>>2]=+g[Fa>>2]-+g[Ia>>2];g[Na>>2]=+g[La>>2]+ +g[Ma>>2];g[na>>2]=+g[Fa>>2]+ +g[Ia>>2];g[pa>>2]=+g[La>>2]-+g[Ma>>2];g[da>>2]=+g[(c[q>>2]|0)+16>>2];g[ea>>2]=+g[(c[q>>2]|0)+20>>2];g[fa>>2]=+g[Da>>2]*+g[da>>2]+ +g[Ga>>2]*+g[ea>>2];g[A>>2]=+g[na>>2]*+g[ea>>2]-+g[pa>>2]*+g[da>>2];g[ja>>2]=+g[Da>>2]*+g[ea>>2]-+g[Ga>>2]*+g[da>>2];g[ra>>2]=+g[na>>2]*+g[da>>2]+ +g[pa>>2]*+g[ea>>2];g[u>>2]=+g[c[m>>2]>>2];g[S>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[T>>2]=+g[u>>2]+ +g[S>>2];g[sa>>2]=+g[u>>2]-+g[S>>2];g[Oa>>2]=+g[c[n>>2]>>2];g[Pa>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Qa>>2]=+g[Oa>>2]-+g[Pa>>2];g[C>>2]=+g[Oa>>2]+ +g[Pa>>2];g[U>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[V>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[W>>2]=+g[U>>2]+ +g[V>>2];g[B>>2]=+g[U>>2]-+g[V>>2];g[Ra>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Sa>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Ta>>2]=+g[Ra>>2]-+g[Sa>>2];g[ta>>2]=+g[Ra>>2]+ +g[Sa>>2];g[X>>2]=+g[T>>2]+ +g[W>>2];g[K>>2]=+g[sa>>2]+ +g[ta>>2];g[N>>2]=+g[C>>2]-+g[B>>2];g[Ua>>2]=+g[Qa>>2]+ +g[Ta>>2];g[ga>>2]=+g[T>>2]-+g[W>>2];g[ua>>2]=+g[sa>>2]-+g[ta>>2];g[D>>2]=+g[B>>2]+ +g[C>>2];g[ka>>2]=+g[Qa>>2]-+g[Ta>>2];g[Y>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Z>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ya>>2]=+g[Y>>2]+ +g[Z>>2];g[va>>2]=+g[Y>>2]-+g[Z>>2];g[Va>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Wa>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Xa>>2]=+g[Va>>2]-+g[Wa>>2];g[wa>>2]=+g[Va>>2]+ +g[Wa>>2];g[za>>2]=+g[c[o>>2]>>2];g[Aa>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ba>>2]=+g[za>>2]+ +g[Aa>>2];g[v>>2]=+g[za>>2]-+g[Aa>>2];g[_>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[$>>2]=+g[c[p>>2]>>2];g[aa>>2]=+g[_>>2]-+g[$>>2];g[w>>2]=+g[_>>2]+ +g[$>>2];g[Ca>>2]=+g[ya>>2]+ +g[Ba>>2];g[E>>2]=+g[va>>2]+ +g[wa>>2];g[F>>2]=+g[v>>2]+ +g[w>>2];g[ba>>2]=+g[Xa>>2]+ +g[aa>>2];g[ha>>2]=+g[aa>>2]-+g[Xa>>2];g[xa>>2]=+g[va>>2]-+g[wa>>2];g[x>>2]=+g[v>>2]-+g[w>>2];g[la>>2]=+g[ya>>2]-+g[Ba>>2];g[c[m>>2]>>2]=+g[X>>2]+ +g[Ca>>2];g[c[o>>2]>>2]=+g[Ua>>2]+ +g[ba>>2];g[Ka>>2]=+g[X>>2]-+g[Ca>>2];g[ca>>2]=+g[Ua>>2]-+g[ba>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ja>>2]*+g[Ka>>2]-+g[Na>>2]*+g[ca>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Na>>2]*+g[Ka>>2]+ +g[Ja>>2]*+g[ca>>2];g[oa>>2]=+g[ga>>2]+ +g[ha>>2];g[qa>>2]=+g[la>>2]+ +g[ka>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[na>>2]*+g[oa>>2]-+g[pa>>2]*+g[qa>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[na>>2]*+g[qa>>2]+ +g[pa>>2]*+g[oa>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2];g[ma>>2]=+g[ka>>2]-+g[la>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[fa>>2]*+g[ia>>2]-+g[ja>>2]*+g[ma>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[fa>>2]*+g[ma>>2]+ +g[ja>>2]*+g[ia>>2];g[L>>2]=(+g[E>>2]+ +g[F>>2])*.7071067690849304;g[M>>2]=+g[K>>2]-+g[L>>2];g[Q>>2]=+g[K>>2]+ +g[L>>2];g[O>>2]=(+g[xa>>2]-+g[x>>2])*.7071067690849304;g[P>>2]=+g[N>>2]+ +g[O>>2];g[R>>2]=+g[N>>2]-+g[O>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Ea>>2]*+g[M>>2]-+g[Ha>>2]*+g[P>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Ea>>2]*+g[P>>2]+ +g[Ha>>2]*+g[M>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[da>>2]*+g[Q>>2]-+g[ea>>2]*+g[R>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[da>>2]*+g[R>>2]+ +g[ea>>2]*+g[Q>>2];g[y>>2]=(+g[xa>>2]+ +g[x>>2])*.7071067690849304;g[z>>2]=+g[ua>>2]-+g[y>>2];g[I>>2]=+g[ua>>2]+ +g[y>>2];g[G>>2]=(+g[E>>2]-+g[F>>2])*.7071067690849304;g[H>>2]=+g[D>>2]-+g[G>>2];g[J>>2]=+g[D>>2]+ +g[G>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ra>>2]*+g[z>>2]-+g[A>>2]*+g[H>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[A>>2]*+g[z>>2]+ +g[ra>>2]*+g[H>>2];g[c[n>>2]>>2]=+g[Da>>2]*+g[I>>2]-+g[Ga>>2]*+g[J>>2];g[c[p>>2]>>2]=+g[Ga>>2]*+g[I>>2]+ +g[Da>>2]*+g[J>>2];c[Ya>>2]=(c[Ya>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+24;c[r>>2]=c[r>>2]^c[2998]}i=Za;return}function zu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,57,8728,0);i=b;return}function Au(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0;Hb=i;i=i+544|0;m=Hb+532|0;n=Hb+528|0;o=Hb+524|0;p=Hb+520|0;q=Hb+516|0;r=Hb+512|0;Ib=Hb+508|0;s=Hb+504|0;t=Hb+500|0;Gb=Hb+480|0;Ba=Hb+476|0;D=Hb+472|0;Ra=Hb+468|0;ia=Hb+464|0;Ma=Hb+460|0;Na=Hb+456|0;G=Hb+452|0;F=Hb+448|0;fa=Hb+444|0;ta=Hb+440|0;ob=Hb+436|0;rb=Hb+432|0;zb=Hb+428|0;Ia=Hb+424|0;wa=Hb+420|0;va=Hb+416|0;pa=Hb+412|0;J=Hb+408|0;Ua=Hb+404|0;Wa=Hb+400|0;u=Hb+396|0;Aa=Hb+392|0;Pa=Hb+388|0;Qa=Hb+384|0;Ea=Hb+380|0;E=Hb+376|0;mb=Hb+372|0;da=Hb+368|0;Ha=Hb+364|0;aa=Hb+360|0;jb=Hb+356|0;ca=Hb+352|0;Ca=Hb+348|0;Da=Hb+344|0;kb=Hb+340|0;lb=Hb+336|0;Fa=Hb+332|0;Ga=Hb+328|0;hb=Hb+324|0;ib=Hb+320|0;ba=Hb+316|0;ea=Hb+312|0;gb=Hb+308|0;nb=Hb+304|0;vb=Hb+300|0;ja=Hb+296|0;Fb=Hb+292|0;na=Hb+288|0;yb=Hb+284|0;ka=Hb+280|0;Cb=Hb+276|0;ma=Hb+272|0;tb=Hb+268|0;ub=Hb+264|0;Db=Hb+260|0;Eb=Hb+256|0;wb=Hb+252|0;xb=Hb+248|0;Ab=Hb+244|0;Bb=Hb+240|0;la=Hb+236|0;oa=Hb+232|0;Sa=Hb+228|0;Ta=Hb+224|0;ga=Hb+220|0;qa=Hb+216|0;C=Hb+212|0;ha=Hb+208|0;Ja=Hb+204|0;Oa=Hb+200|0;w=Hb+196|0;db=Hb+192|0;Xa=Hb+188|0;v=Hb+184|0;sb=Hb+180|0;cb=Hb+176|0;Va=Hb+172|0;qb=Hb+168|0;Ka=Hb+164|0;Ya=Hb+160|0;pb=Hb+156|0;La=Hb+152|0;z=Hb+148|0;B=Hb+144|0;y=Hb+140|0;A=Hb+136|0;_a=Hb+132|0;ab=Hb+128|0;Za=Hb+124|0;$a=Hb+120|0;eb=Hb+116|0;x=Hb+112|0;bb=Hb+108|0;fb=Hb+104|0;xa=Hb+100|0;H=Hb+96|0;V=Hb+92|0;S=Hb+88|0;K=Hb+84|0;W=Hb+80|0;ua=Hb+76|0;R=Hb+72|0;I=Hb+68|0;sa=Hb+64|0;ya=Hb+60|0;L=Hb+56|0;ra=Hb+52|0;za=Hb+48|0;Z=Hb+44|0;$=Hb+40|0;Y=Hb+36|0;_=Hb+32|0;N=Hb+28|0;P=Hb+24|0;M=Hb+20|0;O=Hb+16|0;T=Hb+12|0;X=Hb+8|0;Q=Hb+4|0;U=Hb;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Ib>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Hb+496>>2]=.25;g[Hb+492>>2]=.9510565400123596;g[Hb+488>>2]=.5877852439880371;g[Hb+484>>2]=.55901700258255;c[Gb>>2]=c[Ib>>2];c[q>>2]=(c[q>>2]|0)+(((c[Ib>>2]|0)-1|0)*18<<2);while(1){if((c[Gb>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[Aa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ba>>2]=+g[u>>2]+ +g[Aa>>2];g[D>>2]=+g[u>>2]-+g[Aa>>2];g[Pa>>2]=+g[c[n>>2]>>2];g[Qa>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ra>>2]=+g[Pa>>2]-+g[Qa>>2];g[ia>>2]=+g[Pa>>2]+ +g[Qa>>2];g[Ca>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Da>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ea>>2]=+g[Ca>>2]+ +g[Da>>2];g[E>>2]=+g[Ca>>2]-+g[Da>>2];g[kb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[lb>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[mb>>2]=+g[kb>>2]+ +g[lb>>2];g[da>>2]=+g[kb>>2]-+g[lb>>2];g[Fa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Ga>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ha>>2]=+g[Fa>>2]+ +g[Ga>>2];g[aa>>2]=+g[Fa>>2]-+g[Ga>>2];g[hb>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[ib>>2]=+g[c[o>>2]>>2];g[jb>>2]=+g[hb>>2]+ +g[ib>>2];g[ca>>2]=+g[hb>>2]-+g[ib>>2];g[Ma>>2]=+g[Ea>>2]-+g[Ha>>2];g[Na>>2]=+g[jb>>2]-+g[mb>>2];g[G>>2]=+g[ca>>2]-+g[da>>2];g[F>>2]=+g[E>>2]-+g[aa>>2];g[ba>>2]=+g[E>>2]+ +g[aa>>2];g[ea>>2]=+g[ca>>2]+ +g[da>>2];g[fa>>2]=+g[ba>>2]+ +g[ea>>2];g[ta>>2]=(+g[ba>>2]-+g[ea>>2])*.55901700258255;g[gb>>2]=+g[Ea>>2]+ +g[Ha>>2];g[nb>>2]=+g[jb>>2]+ +g[mb>>2];g[ob>>2]=+g[gb>>2]+ +g[nb>>2];g[rb>>2]=(+g[gb>>2]-+g[nb>>2])*.55901700258255;g[tb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ub>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[vb>>2]=+g[tb>>2]-+g[ub>>2];g[ja>>2]=+g[tb>>2]+ +g[ub>>2];g[Db>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Eb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Fb>>2]=+g[Db>>2]-+g[Eb>>2];g[na>>2]=+g[Db>>2]+ +g[Eb>>2];g[wb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[xb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[yb>>2]=+g[wb>>2]-+g[xb>>2];g[ka>>2]=+g[wb>>2]+ +g[xb>>2];g[Ab>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Bb>>2]=+g[c[p>>2]>>2];g[Cb>>2]=+g[Ab>>2]-+g[Bb>>2];g[ma>>2]=+g[Ab>>2]+ +g[Bb>>2];g[zb>>2]=+g[vb>>2]-+g[yb>>2];g[Ia>>2]=+g[Cb>>2]-+g[Fb>>2];g[wa>>2]=+g[ma>>2]+ +g[na>>2];g[va>>2]=+g[ja>>2]+ +g[ka>>2];g[la>>2]=+g[ja>>2]-+g[ka>>2];g[oa>>2]=+g[ma>>2]-+g[na>>2];g[pa>>2]=+g[la>>2]+ +g[oa>>2];g[J>>2]=(+g[la>>2]-+g[oa>>2])*.55901700258255;g[Sa>>2]=+g[vb>>2]+ +g[yb>>2];g[Ta>>2]=+g[Cb>>2]+ +g[Fb>>2];g[Ua>>2]=+g[Sa>>2]+ +g[Ta>>2];g[Wa>>2]=(+g[Sa>>2]-+g[Ta>>2])*.55901700258255;g[c[m>>2]>>2]=+g[Ba>>2]+ +g[ob>>2];g[c[o>>2]>>2]=+g[Ra>>2]+ +g[Ua>>2];g[ga>>2]=+g[D>>2]+ +g[fa>>2];g[qa>>2]=+g[ia>>2]+ +g[pa>>2];g[C>>2]=+g[(c[q>>2]|0)+32>>2];g[ha>>2]=+g[(c[q>>2]|0)+36>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[C>>2]*+g[ga>>2]-+g[ha>>2]*+g[qa>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ha>>2]*+g[ga>>2]+ +g[C>>2]*+g[qa>>2];g[Ja>>2]=+g[zb>>2]*.5877852439880371-+g[Ia>>2]*.9510565400123596;g[Oa>>2]=+g[Ma>>2]*.5877852439880371-+g[Na>>2]*.9510565400123596;g[w>>2]=+g[Ma>>2]*.9510565400123596+ +g[Na>>2]*.5877852439880371;g[db>>2]=+g[zb>>2]*.9510565400123596+ +g[Ia>>2]*.5877852439880371;g[Va>>2]=+g[Ra>>2]-+g[Ua>>2]*.25;g[Xa>>2]=+g[Va>>2]-+g[Wa>>2];g[v>>2]=+g[Wa>>2]+ +g[Va>>2];g[qb>>2]=+g[Ba>>2]-+g[ob>>2]*.25;g[sb>>2]=+g[qb>>2]-+g[rb>>2];g[cb>>2]=+g[rb>>2]+ +g[qb>>2];g[Ka>>2]=+g[sb>>2]-+g[Ja>>2];g[Ya>>2]=+g[Oa>>2]+ +g[Xa>>2];g[pb>>2]=+g[(c[q>>2]|0)+8>>2];g[La>>2]=+g[(c[q>>2]|0)+12>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[pb>>2]*+g[Ka>>2]-+g[La>>2]*+g[Ya>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[La>>2]*+g[Ka>>2]+ +g[pb>>2]*+g[Ya>>2];g[z>>2]=+g[cb>>2]-+g[db>>2];g[B>>2]=+g[w>>2]+ +g[v>>2];g[y>>2]=+g[(c[q>>2]|0)+40>>2];g[A>>2]=+g[(c[q>>2]|0)+44>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[y>>2]*+g[z>>2]-+g[A>>2]*+g[B>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[A>>2]*+g[z>>2]+ +g[y>>2]*+g[B>>2];g[_a>>2]=+g[sb>>2]+ +g[Ja>>2];g[ab>>2]=+g[Xa>>2]-+g[Oa>>2];g[Za>>2]=+g[(c[q>>2]|0)+56>>2];g[$a>>2]=+g[(c[q>>2]|0)+60>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Za>>2]*+g[_a>>2]-+g[$a>>2]*+g[ab>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[$a>>2]*+g[_a>>2]+ +g[Za>>2]*+g[ab>>2];g[eb>>2]=+g[cb>>2]+ +g[db>>2];g[x>>2]=+g[v>>2]-+g[w>>2];g[bb>>2]=+g[(c[q>>2]|0)+24>>2];g[fb>>2]=+g[(c[q>>2]|0)+28>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[bb>>2]*+g[eb>>2]-+g[fb>>2]*+g[x>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[fb>>2]*+g[eb>>2]+ +g[bb>>2]*+g[x>>2];g[xa>>2]=+g[va>>2]*.5877852439880371-+g[wa>>2]*.9510565400123596;g[H>>2]=+g[F>>2]*.5877852439880371-+g[G>>2]*.9510565400123596;g[V>>2]=+g[F>>2]*.9510565400123596+ +g[G>>2]*.5877852439880371;g[S>>2]=+g[va>>2]*.9510565400123596+ +g[wa>>2]*.5877852439880371;g[I>>2]=+g[ia>>2]-+g[pa>>2]*.25;g[K>>2]=+g[I>>2]-+g[J>>2];g[W>>2]=+g[J>>2]+ +g[I>>2];g[sa>>2]=+g[D>>2]-+g[fa>>2]*.25;g[ua>>2]=+g[sa>>2]-+g[ta>>2];g[R>>2]=+g[ta>>2]+ +g[sa>>2];g[ya>>2]=+g[ua>>2]-+g[xa>>2];g[L>>2]=+g[H>>2]+ +g[K>>2];g[ra>>2]=+g[(c[q>>2]|0)+48>>2];g[za>>2]=+g[(c[q>>2]|0)+52>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ra>>2]*+g[ya>>2]-+g[za>>2]*+g[L>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ra>>2]*+g[L>>2]+ +g[za>>2]*+g[ya>>2];g[Z>>2]=+g[R>>2]+ +g[S>>2];g[$>>2]=+g[W>>2]-+g[V>>2];g[Y>>2]=+g[(c[q>>2]|0)+64>>2];g[_>>2]=+g[(c[q>>2]|0)+68>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Y>>2]*+g[Z>>2]-+g[_>>2]*+g[$>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Y>>2]*+g[$>>2]+ +g[_>>2]*+g[Z>>2];g[N>>2]=+g[ua>>2]+ +g[xa>>2];g[P>>2]=+g[K>>2]-+g[H>>2];g[M>>2]=+g[(c[q>>2]|0)+16>>2];g[O>>2]=+g[(c[q>>2]|0)+20>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[M>>2]*+g[N>>2]-+g[O>>2]*+g[P>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[M>>2]*+g[P>>2]+ +g[O>>2]*+g[N>>2];g[T>>2]=+g[R>>2]-+g[S>>2];g[X>>2]=+g[V>>2]+ +g[W>>2];g[Q>>2]=+g[c[q>>2]>>2];g[U>>2]=+g[(c[q>>2]|0)+4>>2];g[c[n>>2]>>2]=+g[Q>>2]*+g[T>>2]-+g[U>>2]*+g[X>>2];g[c[p>>2]>>2]=+g[Q>>2]*+g[X>>2]+ +g[U>>2]*+g[T>>2];c[Gb>>2]=(c[Gb>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+72;c[r>>2]=c[r>>2]^c[2998]}i=Hb;return}function Bu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,58,8776,0);i=b;return}function Cu(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0;$b=i;i=i+608|0;m=$b+604|0;n=$b+600|0;o=$b+596|0;p=$b+592|0;q=$b+588|0;r=$b+584|0;ac=$b+580|0;s=$b+576|0;t=$b+572|0;_b=$b+560|0;Xa=$b+556|0;hb=$b+552|0;x=$b+548|0;R=$b+544|0;la=$b+540|0;Z=$b+536|0;Lb=$b+532|0;Yb=$b+528|0;fa=$b+524|0;ba=$b+520|0;va=$b+516|0;V=$b+512|0;Ab=$b+508|0;mb=$b+504|0;A=$b+500|0;S=$b+496|0;oa=$b+492|0;_=$b+488|0;Gb=$b+484|0;Tb=$b+480|0;E=$b+476|0;aa=$b+472|0;sa=$b+468|0;U=$b+464|0;u=$b+460|0;db=$b+456|0;Wa=$b+452|0;ja=$b+448|0;gb=$b+444|0;w=$b+440|0;v=$b+436|0;ka=$b+432|0;Da=$b+428|0;Va=$b+424|0;eb=$b+420|0;fb=$b+416|0;Hb=$b+412|0;Xb=$b+408|0;Kb=$b+404|0;da=$b+400|0;Wb=$b+396|0;ua=$b+392|0;ea=$b+388|0;ta=$b+384|0;Ib=$b+380|0;Jb=$b+376|0;Ub=$b+372|0;Vb=$b+368|0;Ya=$b+364|0;lb=$b+360|0;$a=$b+356|0;ma=$b+352|0;kb=$b+348|0;z=$b+344|0;y=$b+340|0;na=$b+336|0;Za=$b+332|0;_a=$b+328|0;ib=$b+324|0;jb=$b+320|0;Cb=$b+316|0;Pb=$b+312|0;Fb=$b+308|0;C=$b+304|0;Sb=$b+300|0;ra=$b+296|0;D=$b+292|0;qa=$b+288|0;Db=$b+284|0;Eb=$b+280|0;Qb=$b+276|0;Rb=$b+272|0;Bb=$b+268|0;Mb=$b+264|0;ub=$b+260|0;wb=$b+256|0;xb=$b+252|0;yb=$b+248|0;tb=$b+244|0;vb=$b+240|0;ab=$b+236|0;qb=$b+232|0;ob=$b+228|0;sb=$b+224|0;Ob=$b+220|0;Zb=$b+216|0;cb=$b+212|0;nb=$b+208|0;Nb=$b+204|0;bb=$b+200|0;pb=$b+196|0;rb=$b+192|0;Ma=$b+188|0;Sa=$b+184|0;Qa=$b+180|0;Ua=$b+176|0;Ka=$b+172|0;La=$b+168|0;Oa=$b+164|0;Pa=$b+160|0;Ja=$b+156|0;Na=$b+152|0;Ra=$b+148|0;Ta=$b+144|0;X=$b+140|0;Ga=$b+136|0;Ea=$b+132|0;Ia=$b+128|0;T=$b+124|0;W=$b+120|0;$=$b+116|0;ca=$b+112|0;Q=$b+108|0;Y=$b+104|0;Fa=$b+100|0;Ha=$b+96|0;H=$b+92|0;N=$b+88|0;L=$b+84|0;P=$b+80|0;F=$b+76|0;G=$b+72|0;J=$b+68|0;K=$b+64|0;Ca=$b+60|0;I=$b+56|0;M=$b+52|0;O=$b+48|0;ha=$b+44|0;za=$b+40|0;xa=$b+36|0;Ba=$b+32|0;B=$b+28|0;ga=$b+24|0;pa=$b+20|0;wa=$b+16|0;zb=$b+12|0;ia=$b+8|0;ya=$b+4|0;Aa=$b;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[ac>>2]=j;c[s>>2]=k;c[t>>2]=l;g[$b+568>>2]=.5;g[$b+564>>2]=.8660253882408142;c[_b>>2]=c[ac>>2];c[q>>2]=(c[q>>2]|0)+(((c[ac>>2]|0)-1|0)*22<<2);while(1){if((c[_b>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[db>>2]=+g[c[n>>2]>>2];g[Da>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Va>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Wa>>2]=+g[Da>>2]+ +g[Va>>2];g[ja>>2]=(+g[Da>>2]-+g[Va>>2])*.8660253882408142;g[eb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[fb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[gb>>2]=+g[eb>>2]-+g[fb>>2];g[w>>2]=(+g[eb>>2]+ +g[fb>>2])*.8660253882408142;g[Xa>>2]=+g[u>>2]+ +g[Wa>>2];g[hb>>2]=+g[db>>2]+ +g[gb>>2];g[v>>2]=+g[u>>2]-+g[Wa>>2]*.5;g[x>>2]=+g[v>>2]-+g[w>>2];g[R>>2]=+g[v>>2]+ +g[w>>2];g[ka>>2]=+g[db>>2]-+g[gb>>2]*.5;g[la>>2]=+g[ja>>2]+ +g[ka>>2];g[Z>>2]=+g[ka>>2]-+g[ja>>2];g[Hb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Xb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ib>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Jb>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Kb>>2]=+g[Ib>>2]+ +g[Jb>>2];g[da>>2]=(+g[Ib>>2]-+g[Jb>>2])*.8660253882408142;g[Ub>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Vb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Wb>>2]=+g[Ub>>2]+ +g[Vb>>2];g[ua>>2]=(+g[Vb>>2]-+g[Ub>>2])*.8660253882408142;g[Lb>>2]=+g[Hb>>2]+ +g[Kb>>2];g[Yb>>2]=+g[Wb>>2]-+g[Xb>>2];g[ea>>2]=+g[Wb>>2]*.5+ +g[Xb>>2];g[fa>>2]=+g[da>>2]-+g[ea>>2];g[ba>>2]=+g[da>>2]+ +g[ea>>2];g[ta>>2]=+g[Hb>>2]-+g[Kb>>2]*.5;g[va>>2]=+g[ta>>2]+ +g[ua>>2];g[V>>2]=+g[ta>>2]-+g[ua>>2];g[Ya>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[lb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Za>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[_a>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[$a>>2]=+g[Za>>2]+ +g[_a>>2];g[ma>>2]=(+g[Za>>2]-+g[_a>>2])*.8660253882408142;g[ib>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[jb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[kb>>2]=+g[ib>>2]-+g[jb>>2];g[z>>2]=(+g[ib>>2]+ +g[jb>>2])*.8660253882408142;g[Ab>>2]=+g[Ya>>2]+ +g[$a>>2];g[mb>>2]=+g[kb>>2]-+g[lb>>2];g[y>>2]=+g[Ya>>2]-+g[$a>>2]*.5;g[A>>2]=+g[y>>2]+ +g[z>>2];g[S>>2]=+g[y>>2]-+g[z>>2];g[na>>2]=+g[kb>>2]*.5+ +g[lb>>2];g[oa>>2]=+g[ma>>2]-+g[na>>2];g[_>>2]=+g[ma>>2]+ +g[na>>2];g[Cb>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Pb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Db>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Eb>>2]=+g[c[o>>2]>>2];g[Fb>>2]=+g[Db>>2]+ +g[Eb>>2];g[C>>2]=(+g[Db>>2]-+g[Eb>>2])*.8660253882408142;g[Qb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Rb>>2]=+g[c[p>>2]>>2];g[Sb>>2]=+g[Qb>>2]+ +g[Rb>>2];g[ra>>2]=(+g[Qb>>2]-+g[Rb>>2])*.8660253882408142;g[Gb>>2]=+g[Cb>>2]+ +g[Fb>>2];g[Tb>>2]=+g[Pb>>2]-+g[Sb>>2];g[D>>2]=+g[Sb>>2]*.5+ +g[Pb>>2];g[E>>2]=+g[C>>2]+ +g[D>>2];g[aa>>2]=+g[D>>2]-+g[C>>2];g[qa>>2]=+g[Cb>>2]-+g[Fb>>2]*.5;g[sa>>2]=+g[qa>>2]+ +g[ra>>2];g[U>>2]=+g[qa>>2]-+g[ra>>2];g[Bb>>2]=+g[Xa>>2]+ +g[Ab>>2];g[Mb>>2]=+g[Gb>>2]+ +g[Lb>>2];g[ub>>2]=+g[Bb>>2]-+g[Mb>>2];g[wb>>2]=+g[hb>>2]+ +g[mb>>2];g[xb>>2]=+g[Tb>>2]+ +g[Yb>>2];g[yb>>2]=+g[wb>>2]-+g[xb>>2];g[c[m>>2]>>2]=+g[Bb>>2]+ +g[Mb>>2];g[c[o>>2]>>2]=+g[wb>>2]+ +g[xb>>2];g[tb>>2]=+g[(c[q>>2]|0)+40>>2];g[vb>>2]=+g[(c[q>>2]|0)+44>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[tb>>2]*+g[ub>>2]-+g[vb>>2]*+g[yb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[vb>>2]*+g[ub>>2]+ +g[tb>>2]*+g[yb>>2];g[Ob>>2]=+g[Xa>>2]-+g[Ab>>2];g[Zb>>2]=+g[Tb>>2]-+g[Yb>>2];g[ab>>2]=+g[Ob>>2]-+g[Zb>>2];g[qb>>2]=+g[Ob>>2]+ +g[Zb>>2];g[cb>>2]=+g[Gb>>2]-+g[Lb>>2];g[nb>>2]=+g[hb>>2]-+g[mb>>2];g[ob>>2]=+g[cb>>2]+ +g[nb>>2];g[sb>>2]=+g[nb>>2]-+g[cb>>2];g[Nb>>2]=+g[(c[q>>2]|0)+64>>2];g[bb>>2]=+g[(c[q>>2]|0)+68>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Nb>>2]*+g[ab>>2]-+g[bb>>2]*+g[ob>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Nb>>2]*+g[ob>>2]+ +g[bb>>2]*+g[ab>>2];g[pb>>2]=+g[(c[q>>2]|0)+16>>2];g[rb>>2]=+g[(c[q>>2]|0)+20>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[pb>>2]*+g[qb>>2]-+g[rb>>2]*+g[sb>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[pb>>2]*+g[sb>>2]+ +g[rb>>2]*+g[qb>>2];g[Ka>>2]=+g[R>>2]-+g[S>>2];g[La>>2]=+g[aa>>2]+ +g[ba>>2];g[Ma>>2]=+g[Ka>>2]-+g[La>>2];g[Sa>>2]=+g[Ka>>2]+ +g[La>>2];g[Oa>>2]=+g[Z>>2]+ +g[_>>2];g[Pa>>2]=+g[U>>2]-+g[V>>2];g[Qa>>2]=+g[Oa>>2]+ +g[Pa>>2];g[Ua>>2]=+g[Oa>>2]-+g[Pa>>2];g[Ja>>2]=+g[(c[q>>2]|0)+32>>2];g[Na>>2]=+g[(c[q>>2]|0)+36>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ja>>2]*+g[Ma>>2]-+g[Na>>2]*+g[Qa>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ja>>2]*+g[Qa>>2]+ +g[Na>>2]*+g[Ma>>2];g[Ra>>2]=+g[(c[q>>2]|0)+80>>2];g[Ta>>2]=+g[(c[q>>2]|0)+84>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Ra>>2]*+g[Sa>>2]-+g[Ta>>2]*+g[Ua>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Ra>>2]*+g[Ua>>2]+ +g[Ta>>2]*+g[Sa>>2];g[T>>2]=+g[R>>2]+ +g[S>>2];g[W>>2]=+g[U>>2]+ +g[V>>2];g[X>>2]=+g[T>>2]-+g[W>>2];g[Ga>>2]=+g[T>>2]+ +g[W>>2];g[$>>2]=+g[Z>>2]-+g[_>>2];g[ca>>2]=+g[aa>>2]-+g[ba>>2];g[Ea>>2]=+g[$>>2]-+g[ca>>2];g[Ia>>2]=+g[$>>2]+ +g[ca>>2];g[Q>>2]=+g[(c[q>>2]|0)+8>>2];g[Y>>2]=+g[(c[q>>2]|0)+12>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[Q>>2]*+g[X>>2]-+g[Y>>2]*+g[Ea>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Y>>2]*+g[X>>2]+ +g[Q>>2]*+g[Ea>>2];g[Fa>>2]=+g[(c[q>>2]|0)+56>>2];g[Ha>>2]=+g[(c[q>>2]|0)+60>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Fa>>2]*+g[Ga>>2]-+g[Ha>>2]*+g[Ia>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ha>>2]*+g[Ga>>2]+ +g[Fa>>2]*+g[Ia>>2];g[F>>2]=+g[x>>2]+ +g[A>>2];g[G>>2]=+g[sa>>2]+ +g[va>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[N>>2]=+g[F>>2]+ +g[G>>2];g[J>>2]=+g[la>>2]+ +g[oa>>2];g[K>>2]=+g[E>>2]+ +g[fa>>2];g[L>>2]=+g[J>>2]-+g[K>>2];g[P>>2]=+g[J>>2]+ +g[K>>2];g[Ca>>2]=+g[(c[q>>2]|0)+72>>2];g[I>>2]=+g[(c[q>>2]|0)+76>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Ca>>2]*+g[H>>2]-+g[I>>2]*+g[L>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[I>>2]*+g[H>>2]+ +g[Ca>>2]*+g[L>>2];g[M>>2]=+g[(c[q>>2]|0)+24>>2];g[O>>2]=+g[(c[q>>2]|0)+28>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[M>>2]*+g[N>>2]-+g[O>>2]*+g[P>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[O>>2]*+g[N>>2]+ +g[M>>2]*+g[P>>2];g[B>>2]=+g[x>>2]-+g[A>>2];g[ga>>2]=+g[E>>2]-+g[fa>>2];g[ha>>2]=+g[B>>2]-+g[ga>>2];g[za>>2]=+g[B>>2]+ +g[ga>>2];g[pa>>2]=+g[la>>2]-+g[oa>>2];g[wa>>2]=+g[sa>>2]-+g[va>>2];g[xa>>2]=+g[pa>>2]+ +g[wa>>2];g[Ba>>2]=+g[pa>>2]-+g[wa>>2];g[zb>>2]=+g[c[q>>2]>>2];g[ia>>2]=+g[(c[q>>2]|0)+4>>2];g[c[n>>2]>>2]=+g[zb>>2]*+g[ha>>2]-+g[ia>>2]*+g[xa>>2];g[c[p>>2]>>2]=+g[zb>>2]*+g[xa>>2]+ +g[ia>>2]*+g[ha>>2];g[ya>>2]=+g[(c[q>>2]|0)+48>>2];g[Aa>>2]=+g[(c[q>>2]|0)+52>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ya>>2]*+g[za>>2]-+g[Aa>>2]*+g[Ba>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ya>>2]*+g[Ba>>2]+ +g[Aa>>2]*+g[za>>2];c[_b>>2]=(c[_b>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+88;c[r>>2]=c[r>>2]^c[2998]}i=$b;return}function Du(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,59,8824,0);i=b;return}function Eu(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0;jd=i;i=i+880|0;m=jd+864|0;n=jd+860|0;o=jd+856|0;p=jd+852|0;q=jd+848|0;r=jd+844|0;kd=jd+840|0;s=jd+836|0;t=jd+832|0;id=jd+816|0;hc=jd+812|0;Ya=jd+808|0;ib=jd+804|0;ed=jd+800|0;C=jd+796|0;X=jd+792|0;wb=jd+788|0;za=jd+784|0;Oc=jd+780|0;nc=jd+776|0;Aa=jd+772|0;v=jd+768|0;zb=jd+764|0;Za=jd+760|0;ca=jd+756|0;jb=jd+752|0;Wc=jd+748|0;Ca=jd+744|0;xc=jd+740|0;E=jd+736|0;qb=jd+732|0;Bb=jd+728|0;bb=jd+724|0;lb=jd+720|0;bd=jd+716|0;F=jd+712|0;Gc=jd+708|0;da=jd+704|0;La=jd+700|0;Cb=jd+696|0;eb=jd+692|0;Nb=jd+688|0;Mb=jd+684|0;V=jd+680|0;y=jd+676|0;vb=jd+672|0;gc=jd+668|0;ub=jd+664|0;B=jd+660|0;W=jd+656|0;u=jd+652|0;Da=jd+648|0;w=jd+644|0;x=jd+640|0;ec=jd+636|0;fc=jd+632|0;z=jd+628|0;A=jd+624|0;Kc=jd+620|0;Y=jd+616|0;mc=jd+612|0;Z=jd+608|0;Nc=jd+604|0;$=jd+600|0;hd=jd+596|0;aa=jd+592|0;ic=jd+588|0;jc=jd+584|0;kc=jd+580|0;lc=jd+576|0;Lc=jd+572|0;Mc=jd+568|0;fd=jd+564|0;gd=jd+560|0;xb=jd+556|0;yb=jd+552|0;_=jd+548|0;ba=jd+544|0;Sc=jd+540|0;nb=jd+536|0;sc=jd+532|0;Na=jd+528|0;Vc=jd+524|0;Ma=jd+520|0;vc=jd+516|0;ob=jd+512|0;pc=jd+508|0;wc=jd+504|0;Qc=jd+500|0;Rc=jd+496|0;qc=jd+492|0;rc=jd+488|0;Tc=jd+484|0;Uc=jd+480|0;tc=jd+476|0;uc=jd+472|0;mb=jd+468|0;pb=jd+464|0;$a=jd+460|0;ab=jd+456|0;Zc=jd+452|0;Ia=jd+448|0;Bc=jd+444|0;Ga=jd+440|0;ad=jd+436|0;Fa=jd+432|0;Ec=jd+428|0;Ja=jd+424|0;yc=jd+420|0;Fc=jd+416|0;Xc=jd+412|0;Yc=jd+408|0;zc=jd+404|0;Ac=jd+400|0;_c=jd+396|0;$c=jd+392|0;Cc=jd+388|0;Dc=jd+384|0;Ha=jd+380|0;Ka=jd+376|0;cb=jd+372|0;db=jd+368|0;Pc=jd+364|0;cd=jd+360|0;xa=jd+356|0;Ba=jd+352|0;G=jd+348|0;H=jd+344|0;wa=jd+340|0;ya=jd+336|0;gb=jd+332|0;Rb=jd+328|0;Pb=jd+324|0;Tb=jd+320|0;_a=jd+316|0;fb=jd+312|0;kb=jd+308|0;Ob=jd+304|0;Xa=jd+300|0;hb=jd+296|0;Qb=jd+292|0;Sb=jd+288|0;Xb=jd+284|0;bc=jd+280|0;$b=jd+276|0;dc=jd+272|0;Vb=jd+268|0;Wb=jd+264|0;Zb=jd+260|0;_b=jd+256|0;Ub=jd+252|0;Yb=jd+248|0;ac=jd+244|0;cc=jd+240|0;Ic=jd+236|0;ha=jd+232|0;fa=jd+228|0;ja=jd+224|0;oc=jd+220|0;Hc=jd+216|0;D=jd+212|0;ea=jd+208|0;dd=jd+204|0;Jc=jd+200|0;ga=jd+196|0;ia=jd+192|0;na=jd+188|0;ta=jd+184|0;ra=jd+180|0;va=jd+176|0;la=jd+172|0;ma=jd+168|0;pa=jd+164|0;qa=jd+160|0;ka=jd+156|0;oa=jd+152|0;sa=jd+148|0;ua=jd+144|0;sb=jd+140|0;Gb=jd+136|0;Eb=jd+132|0;Ib=jd+128|0;Ea=jd+124|0;rb=jd+120|0;Ab=jd+116|0;Db=jd+112|0;U=jd+108|0;tb=jd+104|0;Fb=jd+100|0;Hb=jd+96|0;Oa=jd+92|0;Ua=jd+88|0;Sa=jd+84|0;Wa=jd+80|0;Kb=jd+76|0;Lb=jd+72|0;Qa=jd+68|0;Ra=jd+64|0;Jb=jd+60|0;Pa=jd+56|0;Ta=jd+52|0;Va=jd+48|0;L=jd+44|0;R=jd+40|0;P=jd+36|0;T=jd+32|0;J=jd+28|0;K=jd+24|0;N=jd+20|0;O=jd+16|0;I=jd+12|0;M=jd+8|0;Q=jd+4|0;S=jd;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[kd>>2]=j;c[s>>2]=k;c[t>>2]=l;g[jd+828>>2]=.3826834261417389;g[jd+824>>2]=.9238795042037964;g[jd+820>>2]=.7071067690849304;c[id>>2]=c[kd>>2];c[q>>2]=(c[q>>2]|0)+(((c[kd>>2]|0)-1|0)*30<<2);while(1){if((c[id>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Mb>>2]=+g[u>>2]+ +g[Da>>2];g[V>>2]=+g[u>>2]-+g[Da>>2];g[w>>2]=+g[c[n>>2]>>2];g[x>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[vb>>2]=+g[w>>2]+ +g[x>>2];g[ec>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[fc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[gc>>2]=+g[ec>>2]+ +g[fc>>2];g[ub>>2]=+g[ec>>2]-+g[fc>>2];g[z>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[A>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[W>>2]=+g[z>>2]+ +g[A>>2];g[hc>>2]=+g[Mb>>2]+ +g[gc>>2];g[Ya>>2]=+g[V>>2]+ +g[W>>2];g[ib>>2]=+g[vb>>2]-+g[ub>>2];g[ed>>2]=+g[Mb>>2]-+g[gc>>2];g[C>>2]=+g[y>>2]-+g[B>>2];g[X>>2]=+g[V>>2]-+g[W>>2];g[wb>>2]=+g[ub>>2]+ +g[vb>>2];g[za>>2]=+g[y>>2]+ +g[B>>2];g[ic>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[jc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Kc>>2]=+g[ic>>2]+ +g[jc>>2];g[Y>>2]=+g[ic>>2]-+g[jc>>2];g[kc>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[lc>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[mc>>2]=+g[kc>>2]-+g[lc>>2];g[Z>>2]=+g[kc>>2]+ +g[lc>>2];g[Lc>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Mc>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Nc>>2]=+g[Lc>>2]+ +g[Mc>>2];g[$>>2]=+g[Lc>>2]-+g[Mc>>2];g[fd>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[gd>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[hd>>2]=+g[fd>>2]-+g[gd>>2];g[aa>>2]=+g[fd>>2]+ +g[gd>>2];g[Oc>>2]=+g[Kc>>2]+ +g[Nc>>2];g[nc>>2]=+g[hd>>2]-+g[mc>>2];g[Aa>>2]=+g[mc>>2]+ +g[hd>>2];g[v>>2]=+g[Kc>>2]-+g[Nc>>2];g[xb>>2]=+g[Y>>2]+ +g[Z>>2];g[yb>>2]=+g[$>>2]+ +g[aa>>2];g[zb>>2]=(+g[xb>>2]-+g[yb>>2])*.7071067690849304;g[Za>>2]=(+g[xb>>2]+ +g[yb>>2])*.7071067690849304;g[_>>2]=+g[Y>>2]-+g[Z>>2];g[ba>>2]=+g[$>>2]-+g[aa>>2];g[ca>>2]=(+g[_>>2]+ +g[ba>>2])*.7071067690849304;g[jb>>2]=(+g[_>>2]-+g[ba>>2])*.7071067690849304;g[Qc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Rc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Sc>>2]=+g[Qc>>2]+ +g[Rc>>2];g[nb>>2]=+g[Qc>>2]-+g[Rc>>2];g[qc>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[rc>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[sc>>2]=+g[qc>>2]-+g[rc>>2];g[Na>>2]=+g[qc>>2]+ +g[rc>>2];g[Tc>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Uc>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Vc>>2]=+g[Tc>>2]+ +g[Uc>>2];g[Ma>>2]=+g[Tc>>2]-+g[Uc>>2];g[tc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[uc>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[vc>>2]=+g[tc>>2]-+g[uc>>2];g[ob>>2]=+g[tc>>2]+ +g[uc>>2];g[Wc>>2]=+g[Sc>>2]+ +g[Vc>>2];g[Ca>>2]=+g[sc>>2]+ +g[vc>>2];g[pc>>2]=+g[Sc>>2]-+g[Vc>>2];g[wc>>2]=+g[sc>>2]-+g[vc>>2];g[xc>>2]=+g[pc>>2]-+g[wc>>2];g[E>>2]=+g[pc>>2]+ +g[wc>>2];g[mb>>2]=+g[Ma>>2]+ +g[Na>>2];g[pb>>2]=+g[nb>>2]-+g[ob>>2];g[qb>>2]=+g[mb>>2]*.9238795042037964+ +g[pb>>2]*.3826834261417389;g[Bb>>2]=+g[pb>>2]*.9238795042037964-+g[mb>>2]*.3826834261417389;g[$a>>2]=+g[nb>>2]+ +g[ob>>2];g[ab>>2]=+g[Na>>2]-+g[Ma>>2];g[bb>>2]=+g[$a>>2]*.3826834261417389-+g[ab>>2]*.9238795042037964;g[lb>>2]=+g[ab>>2]*.3826834261417389+ +g[$a>>2]*.9238795042037964;g[Xc>>2]=+g[c[o>>2]>>2];g[Yc>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Zc>>2]=+g[Xc>>2]+ +g[Yc>>2];g[Ia>>2]=+g[Xc>>2]-+g[Yc>>2];g[zc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Ac>>2]=+g[c[p>>2]>>2];g[Bc>>2]=+g[zc>>2]-+g[Ac>>2];g[Ga>>2]=+g[zc>>2]+ +g[Ac>>2];g[_c>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[$c>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[ad>>2]=+g[_c>>2]+ +g[$c>>2];g[Fa>>2]=+g[_c>>2]-+g[$c>>2];g[Cc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Dc>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ec>>2]=+g[Cc>>2]-+g[Dc>>2];g[Ja>>2]=+g[Cc>>2]+ +g[Dc>>2];g[bd>>2]=+g[Zc>>2]+ +g[ad>>2];g[F>>2]=+g[Bc>>2]+ +g[Ec>>2];g[yc>>2]=+g[Zc>>2]-+g[ad>>2];g[Fc>>2]=+g[Bc>>2]-+g[Ec>>2];g[Gc>>2]=+g[yc>>2]+ +g[Fc>>2];g[da>>2]=+g[Fc>>2]-+g[yc>>2];g[Ha>>2]=+g[Fa>>2]-+g[Ga>>2];g[Ka>>2]=+g[Ia>>2]-+g[Ja>>2];g[La>>2]=+g[Ha>>2]*.9238795042037964-+g[Ka>>2]*.3826834261417389;g[Cb>>2]=+g[Ha>>2]*.3826834261417389+ +g[Ka>>2]*.9238795042037964;g[cb>>2]=+g[Ia>>2]+ +g[Ja>>2];g[db>>2]=+g[Fa>>2]+ +g[Ga>>2];g[eb>>2]=+g[cb>>2]*.3826834261417389-+g[db>>2]*.9238795042037964;g[Nb>>2]=+g[db>>2]*.3826834261417389+ +g[cb>>2]*.9238795042037964;g[Pc>>2]=+g[hc>>2]+ +g[Oc>>2];g[cd>>2]=+g[Wc>>2]+ +g[bd>>2];g[xa>>2]=+g[Pc>>2]-+g[cd>>2];g[Ba>>2]=+g[za>>2]+ +g[Aa>>2];g[G>>2]=+g[Ca>>2]+ +g[F>>2];g[H>>2]=+g[Ba>>2]-+g[G>>2];g[c[m>>2]>>2]=+g[Pc>>2]+ +g[cd>>2];g[c[o>>2]>>2]=+g[Ba>>2]+ +g[G>>2];g[wa>>2]=+g[(c[q>>2]|0)+56>>2];g[ya>>2]=+g[(c[q>>2]|0)+60>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[wa>>2]*+g[xa>>2]-+g[ya>>2]*+g[H>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[ya>>2]*+g[xa>>2]+ +g[wa>>2]*+g[H>>2];g[_a>>2]=+g[Ya>>2]-+g[Za>>2];g[fb>>2]=+g[bb>>2]+ +g[eb>>2];g[gb>>2]=+g[_a>>2]-+g[fb>>2];g[Rb>>2]=+g[_a>>2]+ +g[fb>>2];g[kb>>2]=+g[ib>>2]+ +g[jb>>2];g[Ob>>2]=+g[lb>>2]-+g[Nb>>2];g[Pb>>2]=+g[kb>>2]-+g[Ob>>2];g[Tb>>2]=+g[kb>>2]+ +g[Ob>>2];g[Xa>>2]=+g[(c[q>>2]|0)+80>>2];g[hb>>2]=+g[(c[q>>2]|0)+84>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Xa>>2]*+g[gb>>2]-+g[hb>>2]*+g[Pb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[hb>>2]*+g[gb>>2]+ +g[Xa>>2]*+g[Pb>>2];g[Qb>>2]=+g[(c[q>>2]|0)+16>>2];g[Sb>>2]=+g[(c[q>>2]|0)+20>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Qb>>2]*+g[Rb>>2]-+g[Sb>>2]*+g[Tb>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Sb>>2]*+g[Rb>>2]+ +g[Qb>>2]*+g[Tb>>2];g[Vb>>2]=+g[Ya>>2]+ +g[Za>>2];g[Wb>>2]=+g[lb>>2]+ +g[Nb>>2];g[Xb>>2]=+g[Vb>>2]-+g[Wb>>2];g[bc>>2]=+g[Vb>>2]+ +g[Wb>>2];g[Zb>>2]=+g[ib>>2]-+g[jb>>2];g[_b>>2]=+g[bb>>2]-+g[eb>>2];g[$b>>2]=+g[Zb>>2]+ +g[_b>>2];g[dc>>2]=+g[Zb>>2]-+g[_b>>2];g[Ub>>2]=+g[(c[q>>2]|0)+48>>2];g[Yb>>2]=+g[(c[q>>2]|0)+52>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ub>>2]*+g[Xb>>2]-+g[Yb>>2]*+g[$b>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ub>>2]*+g[$b>>2]+ +g[Yb>>2]*+g[Xb>>2];g[ac>>2]=+g[(c[q>>2]|0)+112>>2];g[cc>>2]=+g[(c[q>>2]|0)+116>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ac>>2]*+g[bc>>2]-+g[cc>>2]*+g[dc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ac>>2]*+g[dc>>2]+ +g[cc>>2]*+g[bc>>2];g[oc>>2]=+g[ed>>2]+ +g[nc>>2];g[Hc>>2]=(+g[xc>>2]+ +g[Gc>>2])*.7071067690849304;g[Ic>>2]=+g[oc>>2]-+g[Hc>>2];g[ha>>2]=+g[oc>>2]+ +g[Hc>>2];g[D>>2]=+g[v>>2]+ +g[C>>2];g[ea>>2]=(+g[E>>2]+ +g[da>>2])*.7071067690849304;g[fa>>2]=+g[D>>2]-+g[ea>>2];g[ja>>2]=+g[D>>2]+ +g[ea>>2];g[dd>>2]=+g[(c[q>>2]|0)+72>>2];g[Jc>>2]=+g[(c[q>>2]|0)+76>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[dd>>2]*+g[Ic>>2]-+g[Jc>>2]*+g[fa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Jc>>2]*+g[Ic>>2]+ +g[dd>>2]*+g[fa>>2];g[ga>>2]=+g[(c[q>>2]|0)+8>>2];g[ia>>2]=+g[(c[q>>2]|0)+12>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[ga>>2]*+g[ha>>2]-+g[ia>>2]*+g[ja>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[ia>>2]*+g[ha>>2]+ +g[ga>>2]*+g[ja>>2];g[la>>2]=+g[ed>>2]-+g[nc>>2];g[ma>>2]=(+g[da>>2]-+g[E>>2])*.7071067690849304;g[na>>2]=+g[la>>2]-+g[ma>>2];g[ta>>2]=+g[la>>2]+ +g[ma>>2];g[pa>>2]=+g[C>>2]-+g[v>>2];g[qa>>2]=(+g[xc>>2]-+g[Gc>>2])*.7071067690849304;g[ra>>2]=+g[pa>>2]-+g[qa>>2];g[va>>2]=+g[pa>>2]+ +g[qa>>2];g[ka>>2]=+g[(c[q>>2]|0)+104>>2];g[oa>>2]=+g[(c[q>>2]|0)+108>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ka>>2]*+g[na>>2]-+g[oa>>2]*+g[ra>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ka>>2]*+g[ra>>2]+ +g[oa>>2]*+g[na>>2];g[sa>>2]=+g[(c[q>>2]|0)+40>>2];g[ua>>2]=+g[(c[q>>2]|0)+44>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[sa>>2]*+g[ta>>2]-+g[ua>>2]*+g[va>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[sa>>2]*+g[va>>2]+ +g[ua>>2]*+g[ta>>2];g[Ea>>2]=+g[X>>2]-+g[ca>>2];g[rb>>2]=+g[La>>2]-+g[qb>>2];g[sb>>2]=+g[Ea>>2]-+g[rb>>2];g[Gb>>2]=+g[Ea>>2]+ +g[rb>>2];g[Ab>>2]=+g[wb>>2]-+g[zb>>2];g[Db>>2]=+g[Bb>>2]-+g[Cb>>2];g[Eb>>2]=+g[Ab>>2]-+g[Db>>2];g[Ib>>2]=+g[Ab>>2]+ +g[Db>>2];g[U>>2]=+g[(c[q>>2]|0)+96>>2];g[tb>>2]=+g[(c[q>>2]|0)+100>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[U>>2]*+g[sb>>2]-+g[tb>>2]*+g[Eb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[tb>>2]*+g[sb>>2]+ +g[U>>2]*+g[Eb>>2];g[Fb>>2]=+g[(c[q>>2]|0)+32>>2];g[Hb>>2]=+g[(c[q>>2]|0)+36>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Fb>>2]*+g[Gb>>2]-+g[Hb>>2]*+g[Ib>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Hb>>2]*+g[Gb>>2]+ +g[Fb>>2]*+g[Ib>>2];g[Kb>>2]=+g[X>>2]+ +g[ca>>2];g[Lb>>2]=+g[Bb>>2]+ +g[Cb>>2];g[Oa>>2]=+g[Kb>>2]-+g[Lb>>2];g[Ua>>2]=+g[Kb>>2]+ +g[Lb>>2];g[Qa>>2]=+g[wb>>2]+ +g[zb>>2];g[Ra>>2]=+g[qb>>2]+ +g[La>>2];g[Sa>>2]=+g[Qa>>2]-+g[Ra>>2];g[Wa>>2]=+g[Qa>>2]+ +g[Ra>>2];g[Jb>>2]=+g[(c[q>>2]|0)+64>>2];g[Pa>>2]=+g[(c[q>>2]|0)+68>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Jb>>2]*+g[Oa>>2]-+g[Pa>>2]*+g[Sa>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Jb>>2]*+g[Sa>>2]+ +g[Pa>>2]*+g[Oa>>2];g[Ta>>2]=+g[c[q>>2]>>2];g[Va>>2]=+g[(c[q>>2]|0)+4>>2];g[c[n>>2]>>2]=+g[Ta>>2]*+g[Ua>>2]-+g[Va>>2]*+g[Wa>>2];g[c[p>>2]>>2]=+g[Ta>>2]*+g[Wa>>2]+ +g[Va>>2]*+g[Ua>>2];g[J>>2]=+g[hc>>2]-+g[Oc>>2];g[K>>2]=+g[F>>2]-+g[Ca>>2];g[L>>2]=+g[J>>2]-+g[K>>2];g[R>>2]=+g[J>>2]+ +g[K>>2];g[N>>2]=+g[za>>2]-+g[Aa>>2];g[O>>2]=+g[Wc>>2]-+g[bd>>2];g[P>>2]=+g[N>>2]-+g[O>>2];g[T>>2]=+g[O>>2]+ +g[N>>2];g[I>>2]=+g[(c[q>>2]|0)+88>>2];g[M>>2]=+g[(c[q>>2]|0)+92>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[I>>2]*+g[L>>2]-+g[M>>2]*+g[P>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[I>>2]*+g[P>>2]+ +g[M>>2]*+g[L>>2];g[Q>>2]=+g[(c[q>>2]|0)+24>>2];g[S>>2]=+g[(c[q>>2]|0)+28>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Q>>2]*+g[R>>2]-+g[S>>2]*+g[T>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Q>>2]*+g[T>>2]+ +g[S>>2]*+g[R>>2];c[id>>2]=(c[id>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+120;c[r>>2]=c[r>>2]^c[2998]}i=jd;return}function Fu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,60,8872,0);i=b;return}function Gu(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0;Je=i;i=i+1200|0;m=Je+1188|0;n=Je+1184|0;o=Je+1180|0;p=Je+1176|0;q=Je+1172|0;r=Je+1168|0;Ke=Je+1164|0;s=Je+1160|0;t=Je+1156|0;Ie=Je+1136|0;Hd=Je+1132|0;oc=Je+1128|0;dd=Je+1124|0;Od=Je+1120|0;ya=Je+1116|0;fb=Je+1112|0;Bc=Je+1108|0;pb=Je+1104|0;y=Je+1100|0;Ic=Je+1096|0;Jc=Je+1092|0;la=Je+1088|0;Ka=Je+1084|0;jd=Je+1080|0;id=Je+1076|0;Ha=Je+1072|0;qa=Je+1068|0;Yc=Je+1064|0;uc=Je+1060|0;pa=Je+1056|0;tb=Je+1052|0;Wa=Je+1048|0;ub=Je+1044|0;bb=Je+1040|0;Nb=Je+1036|0;Ub=Je+1032|0;Vb=Je+1028|0;ue=Je+1024|0;Ld=Je+1020|0;Md=Je+1016|0;ad=Je+1012|0;bd=Je+1008|0;fd=Je+1004|0;mb=Je+1e3|0;nb=Je+996|0;qb=Je+992|0;za=Je+988|0;Aa=Je+984|0;Ba=Je+980|0;jc=Je+976|0;mc=Je+972|0;pc=Je+968|0;Cc=Je+964|0;Dc=Je+960|0;Ec=Je+956|0;Rd=Je+952|0;Ud=Je+948|0;Vd=Je+944|0;Mb=Je+940|0;db=Je+936|0;ua=Je+932|0;zc=Je+928|0;Gd=Je+924|0;Ac=Je+920|0;xa=Je+916|0;eb=Je+912|0;u=Je+908|0;Da=Je+904|0;sa=Je+900|0;ta=Je+896|0;Vc=Je+892|0;Fd=Je+888|0;va=Je+884|0;wa=Je+880|0;me=Je+876|0;hc=Je+872|0;sc=Je+868|0;Pd=Je+864|0;de=Je+860|0;Sa=Je+856|0;ib=Je+852|0;Fa=Je+848|0;Kd=Je+844|0;lc=Je+840|0;Xc=Je+836|0;Td=Je+832|0;ka=Je+828|0;ab=Je+824|0;Tb=Je+820|0;Ja=Je+816|0;te=Je+812|0;ic=Je+808|0;tc=Je+804|0;Qd=Je+800|0;x=Je+796|0;Va=Je+792|0;lb=Je+788|0;Ga=Je+784|0;Be=Je+780|0;kc=Je+776|0;Wc=Je+772|0;Sd=Je+768|0;da=Je+764|0;Za=Je+760|0;Qb=Je+756|0;Ia=Je+752|0;ie=Je+748|0;gb=Je+744|0;$d=Je+740|0;Qa=Je+736|0;le=Je+732|0;Ra=Je+728|0;ce=Je+724|0;hb=Je+720|0;Id=Je+716|0;Jd=Je+712|0;Zd=Je+708|0;_d=Je+704|0;je=Je+700|0;ke=Je+696|0;ae=Je+692|0;be=Je+688|0;Ee=Je+684|0;Rb=Je+680|0;ga=Je+676|0;$a=Je+672|0;He=Je+668|0;_a=Je+664|0;ja=Je+660|0;Sb=Je+656|0;Ce=Je+652|0;De=Je+648|0;ea=Je+644|0;fa=Je+640|0;Fe=Je+636|0;Ge=Je+632|0;ha=Je+628|0;ia=Je+624|0;pe=Je+620|0;jb=Je+616|0;ge=Je+612|0;Ua=Je+608|0;se=Je+604|0;Ta=Je+600|0;w=Je+596|0;kb=Je+592|0;ne=Je+588|0;oe=Je+584|0;ee=Je+580|0;fe=Je+576|0;qe=Je+572|0;re=Je+568|0;he=Je+564|0;v=Je+560|0;xe=Je+556|0;Ob=Je+552|0;B=Je+548|0;Xa=Je+544|0;Ae=Je+540|0;Ya=Je+536|0;E=Je+532|0;Pb=Je+528|0;ve=Je+524|0;we=Je+520|0;z=Je+516|0;A=Je+512|0;ye=Je+508|0;ze=Je+504|0;C=Je+500|0;D=Je+496|0;Z=Je+492|0;$=Je+488|0;Y=Je+484|0;_=Je+480|0;Dd=Je+476|0;ed=Je+472|0;Cd=Je+468|0;Ed=Je+464|0;dc=Je+460|0;fc=Je+456|0;cc=Je+452|0;ec=Je+448|0;La=Je+444|0;vb=Je+440|0;Hb=Je+436|0;Db=Je+432|0;sb=Je+428|0;Gb=Je+424|0;Ea=Je+420|0;Cb=Je+416|0;ob=Je+412|0;rb=Je+408|0;ba=Je+404|0;ca=Je+400|0;Ma=Je+396|0;wb=Je+392|0;aa=Je+388|0;Na=Je+384|0;Kb=Je+380|0;Oa=Je+376|0;Jb=Je+372|0;Lb=Je+368|0;yb=Je+364|0;Ab=Je+360|0;xb=Je+356|0;zb=Je+352|0;Eb=Je+348|0;Ib=Je+344|0;Bb=Je+340|0;Fb=Je+336|0;Zc=Je+332|0;kd=Je+328|0;wd=Je+324|0;rd=Je+320|0;hd=Je+316|0;vd=Je+312|0;rc=Je+308|0;sd=Je+304|0;cd=Je+300|0;gd=Je+296|0;nc=Je+292|0;qc=Je+288|0;_c=Je+284|0;ld=Je+280|0;gc=Je+276|0;$c=Je+272|0;zd=Je+268|0;Bd=Je+264|0;yd=Je+260|0;Ad=Je+256|0;nd=Je+252|0;pd=Je+248|0;md=Je+244|0;od=Je+240|0;td=Je+236|0;xd=Je+232|0;qd=Je+228|0;ud=Je+224|0;ma=Je+220|0;ra=Je+216|0;R=Je+212|0;O=Je+208|0;G=Je+204|0;S=Je+200|0;Yd=Je+196|0;N=Je+192|0;Ca=Je+188|0;F=Je+184|0;Wd=Je+180|0;Xd=Je+176|0;na=Je+172|0;H=Je+168|0;Nd=Je+164|0;oa=Je+160|0;V=Je+156|0;X=Je+152|0;U=Je+148|0;W=Je+144|0;J=Je+140|0;L=Je+136|0;I=Je+132|0;K=Je+128|0;P=Je+124|0;T=Je+120|0;M=Je+116|0;Q=Je+112|0;cb=Je+108|0;Kc=Je+104|0;Yb=Je+100|0;Sc=Je+96|0;Hc=Je+92|0;Xb=Je+88|0;wc=Je+84|0;Rc=Je+80|0;Fc=Je+76|0;Gc=Je+72|0;Wb=Je+68|0;vc=Je+64|0;xc=Je+60|0;Lc=Je+56|0;Pa=Je+52|0;yc=Je+48|0;$b=Je+44|0;bc=Je+40|0;_b=Je+36|0;ac=Je+32|0;Nc=Je+28|0;Pc=Je+24|0;Mc=Je+20|0;Oc=Je+16|0;Tc=Je+12|0;Zb=Je+8|0;Qc=Je+4|0;Uc=Je;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Ke>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Je+1152>>2]=.25;g[Je+1148>>2]=.55901700258255;g[Je+1144>>2]=.5877852439880371;g[Je+1140>>2]=.9510565400123596;c[Ie>>2]=c[Ke>>2];c[q>>2]=(c[q>>2]|0)+(((c[Ke>>2]|0)-1|0)*38<<2);while(1){if((c[Ie>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Mb>>2]=+g[u>>2]+ +g[Da>>2];g[db>>2]=+g[u>>2]-+g[Da>>2];g[sa>>2]=+g[c[n>>2]>>2];g[ta>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[ua>>2]=+g[sa>>2]-+g[ta>>2];g[zc>>2]=+g[sa>>2]+ +g[ta>>2];g[Vc>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Fd>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Gd>>2]=+g[Vc>>2]+ +g[Fd>>2];g[Ac>>2]=+g[Vc>>2]-+g[Fd>>2];g[va>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[wa>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[xa>>2]=+g[va>>2]-+g[wa>>2];g[eb>>2]=+g[va>>2]+ +g[wa>>2];g[Hd>>2]=+g[Mb>>2]+ +g[Gd>>2];g[oc>>2]=+g[db>>2]-+g[eb>>2];g[dd>>2]=+g[Ac>>2]+ +g[zc>>2];g[Od>>2]=+g[Mb>>2]-+g[Gd>>2];g[ya>>2]=+g[ua>>2]-+g[xa>>2];g[fb>>2]=+g[db>>2]+ +g[eb>>2];g[Bc>>2]=+g[zc>>2]-+g[Ac>>2];g[pb>>2]=+g[ua>>2]+ +g[xa>>2];g[Id>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Jd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[ie>>2]=+g[Id>>2]+ +g[Jd>>2];g[gb>>2]=+g[Id>>2]-+g[Jd>>2];g[Zd>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[_d>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[$d>>2]=+g[Zd>>2]-+g[_d>>2];g[Qa>>2]=+g[Zd>>2]+ +g[_d>>2];g[je>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[ke>>2]=+g[c[o>>2]>>2];g[le>>2]=+g[je>>2]+ +g[ke>>2];g[Ra>>2]=+g[je>>2]-+g[ke>>2];g[ae>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[be>>2]=+g[c[p>>2]>>2];g[ce>>2]=+g[ae>>2]-+g[be>>2];g[hb>>2]=+g[ae>>2]+ +g[be>>2];g[me>>2]=+g[ie>>2]+ +g[le>>2];g[hc>>2]=+g[gb>>2]-+g[hb>>2];g[sc>>2]=+g[Ra>>2]+ +g[Qa>>2];g[Pd>>2]=+g[ie>>2]-+g[le>>2];g[de>>2]=+g[$d>>2]-+g[ce>>2];g[Sa>>2]=+g[Qa>>2]-+g[Ra>>2];g[ib>>2]=+g[gb>>2]+ +g[hb>>2];g[Fa>>2]=+g[$d>>2]+ +g[ce>>2];g[Ce>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[De>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ee>>2]=+g[Ce>>2]+ +g[De>>2];g[Rb>>2]=+g[Ce>>2]-+g[De>>2];g[ea>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[fa>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ga>>2]=+g[ea>>2]-+g[fa>>2];g[$a>>2]=+g[ea>>2]+ +g[fa>>2];g[Fe>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ge>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[He>>2]=+g[Fe>>2]+ +g[Ge>>2];g[_a>>2]=+g[Fe>>2]-+g[Ge>>2];g[ha>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ia>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ja>>2]=+g[ha>>2]-+g[ia>>2];g[Sb>>2]=+g[ha>>2]+ +g[ia>>2];g[Kd>>2]=+g[Ee>>2]+ +g[He>>2];g[lc>>2]=+g[Rb>>2]+ +g[Sb>>2];g[Xc>>2]=+g[_a>>2]-+g[$a>>2];g[Td>>2]=+g[Ee>>2]-+g[He>>2];g[ka>>2]=+g[ga>>2]-+g[ja>>2];g[ab>>2]=+g[_a>>2]+ +g[$a>>2];g[Tb>>2]=+g[Rb>>2]-+g[Sb>>2];g[Ja>>2]=+g[ga>>2]+ +g[ja>>2];g[ne>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[oe>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[pe>>2]=+g[ne>>2]+ +g[oe>>2];g[jb>>2]=+g[ne>>2]-+g[oe>>2];g[ee>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[fe>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ge>>2]=+g[ee>>2]-+g[fe>>2];g[Ua>>2]=+g[ee>>2]+ +g[fe>>2];g[qe>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[re>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[se>>2]=+g[qe>>2]+ +g[re>>2];g[Ta>>2]=+g[qe>>2]-+g[re>>2];g[he>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[v>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2];g[w>>2]=+g[he>>2]-+g[v>>2];g[kb>>2]=+g[he>>2]+ +g[v>>2];g[te>>2]=+g[pe>>2]+ +g[se>>2];g[ic>>2]=+g[jb>>2]-+g[kb>>2];g[tc>>2]=+g[Ta>>2]-+g[Ua>>2];g[Qd>>2]=+g[pe>>2]-+g[se>>2];g[x>>2]=+g[ge>>2]-+g[w>>2];g[Va>>2]=+g[Ta>>2]+ +g[Ua>>2];g[lb>>2]=+g[jb>>2]+ +g[kb>>2];g[Ga>>2]=+g[ge>>2]+ +g[w>>2];g[ve>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2];g[we>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[xe>>2]=+g[ve>>2]+ +g[we>>2];g[Ob>>2]=+g[ve>>2]-+g[we>>2];g[z>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[A>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[Xa>>2]=+g[z>>2]+ +g[A>>2];g[ye>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[ze>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ae>>2]=+g[ye>>2]+ +g[ze>>2];g[Ya>>2]=+g[ye>>2]-+g[ze>>2];g[C>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[D>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[E>>2]=+g[C>>2]-+g[D>>2];g[Pb>>2]=+g[C>>2]+ +g[D>>2];g[Be>>2]=+g[xe>>2]+ +g[Ae>>2];g[kc>>2]=+g[Ob>>2]+ +g[Pb>>2];g[Wc>>2]=+g[Ya>>2]+ +g[Xa>>2];g[Sd>>2]=+g[xe>>2]-+g[Ae>>2];g[da>>2]=+g[B>>2]-+g[E>>2];g[Za>>2]=+g[Xa>>2]-+g[Ya>>2];g[Qb>>2]=+g[Ob>>2]-+g[Pb>>2];g[Ia>>2]=+g[B>>2]+ +g[E>>2];g[y>>2]=+g[de>>2]-+g[x>>2];g[Ic>>2]=+g[ib>>2]-+g[lb>>2];g[Jc>>2]=+g[Qb>>2]-+g[Tb>>2];g[la>>2]=+g[da>>2]-+g[ka>>2];g[Ka>>2]=+g[Ia>>2]-+g[Ja>>2];g[jd>>2]=+g[kc>>2]-+g[lc>>2];g[id>>2]=+g[hc>>2]-+g[ic>>2];g[Ha>>2]=+g[Fa>>2]-+g[Ga>>2];g[qa>>2]=+g[Sd>>2]-+g[Td>>2];g[Yc>>2]=+g[Wc>>2]-+g[Xc>>2];g[uc>>2]=+g[sc>>2]-+g[tc>>2];g[pa>>2]=+g[Pd>>2]-+g[Qd>>2];g[tb>>2]=+g[me>>2]-+g[te>>2];g[Wa>>2]=+g[Sa>>2]+ +g[Va>>2];g[ub>>2]=+g[Be>>2]-+g[Kd>>2];g[bb>>2]=+g[Za>>2]+ +g[ab>>2];g[Nb>>2]=+g[ib>>2]+ +g[lb>>2];g[Ub>>2]=+g[Qb>>2]+ +g[Tb>>2];g[Vb>>2]=+g[Nb>>2]+ +g[Ub>>2];g[ue>>2]=+g[me>>2]+ +g[te>>2];g[Ld>>2]=+g[Be>>2]+ +g[Kd>>2];g[Md>>2]=+g[ue>>2]+ +g[Ld>>2];g[ad>>2]=+g[sc>>2]+ +g[tc>>2];g[bd>>2]=+g[Wc>>2]+ +g[Xc>>2];g[fd>>2]=+g[ad>>2]+ +g[bd>>2];g[mb>>2]=+g[Fa>>2]+ +g[Ga>>2];g[nb>>2]=+g[Ia>>2]+ +g[Ja>>2];g[qb>>2]=+g[mb>>2]+ +g[nb>>2];g[za>>2]=+g[de>>2]+ +g[x>>2];g[Aa>>2]=+g[da>>2]+ +g[ka>>2];g[Ba>>2]=+g[za>>2]+ +g[Aa>>2];g[jc>>2]=+g[hc>>2]+ +g[ic>>2];g[mc>>2]=+g[kc>>2]+ +g[lc>>2];g[pc>>2]=+g[jc>>2]+ +g[mc>>2];g[Cc>>2]=+g[Sa>>2]-+g[Va>>2];g[Dc>>2]=+g[Za>>2]-+g[ab>>2];g[Ec>>2]=+g[Cc>>2]+ +g[Dc>>2];g[Rd>>2]=+g[Pd>>2]+ +g[Qd>>2];g[Ud>>2]=+g[Sd>>2]+ +g[Td>>2];g[Vd>>2]=+g[Rd>>2]+ +g[Ud>>2];g[c[m>>2]>>2]=+g[Hd>>2]+ +g[Md>>2];g[c[o>>2]>>2]=+g[pb>>2]+ +g[qb>>2];g[Z>>2]=+g[Od>>2]+ +g[Vd>>2];g[$>>2]=+g[ya>>2]+ +g[Ba>>2];g[Y>>2]=+g[(c[q>>2]|0)+72>>2];g[_>>2]=+g[(c[q>>2]|0)+76>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Y>>2]*+g[Z>>2]-+g[_>>2]*+g[$>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[_>>2]*+g[Z>>2]+ +g[Y>>2]*+g[$>>2];g[Dd>>2]=+g[oc>>2]+ +g[pc>>2];g[ed>>2]=+g[dd>>2]+ +g[fd>>2];g[Cd>>2]=+g[(c[q>>2]|0)+32>>2];g[Ed>>2]=+g[(c[q>>2]|0)+36>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Cd>>2]*+g[Dd>>2]-+g[Ed>>2]*+g[ed>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Cd>>2]*+g[ed>>2]+ +g[Ed>>2]*+g[Dd>>2];g[dc>>2]=+g[fb>>2]+ +g[Vb>>2];g[fc>>2]=+g[Bc>>2]+ +g[Ec>>2];g[cc>>2]=+g[(c[q>>2]|0)+112>>2];g[ec>>2]=+g[(c[q>>2]|0)+116>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[cc>>2]*+g[dc>>2]-+g[ec>>2]*+g[fc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[cc>>2]*+g[fc>>2]+ +g[ec>>2]*+g[dc>>2];g[La>>2]=+g[Ha>>2]*.9510565400123596+ +g[Ka>>2]*.5877852439880371;g[vb>>2]=+g[tb>>2]*.9510565400123596+ +g[ub>>2]*.5877852439880371;g[Hb>>2]=+g[tb>>2]*.5877852439880371-+g[ub>>2]*.9510565400123596;g[Db>>2]=+g[Ha>>2]*.5877852439880371-+g[Ka>>2]*.9510565400123596;g[ob>>2]=(+g[mb>>2]-+g[nb>>2])*.55901700258255;g[rb>>2]=+g[pb>>2]-+g[qb>>2]*.25;g[sb>>2]=+g[ob>>2]+ +g[rb>>2];g[Gb>>2]=+g[rb>>2]-+g[ob>>2];g[ba>>2]=(+g[ue>>2]-+g[Ld>>2])*.55901700258255;g[ca>>2]=+g[Hd>>2]-+g[Md>>2]*.25;g[Ea>>2]=+g[ba>>2]+ +g[ca>>2];g[Cb>>2]=+g[ca>>2]-+g[ba>>2];g[Ma>>2]=+g[Ea>>2]+ +g[La>>2];g[wb>>2]=+g[sb>>2]-+g[vb>>2];g[aa>>2]=+g[(c[q>>2]|0)+24>>2];g[Na>>2]=+g[(c[q>>2]|0)+28>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[aa>>2]*+g[Ma>>2]-+g[Na>>2]*+g[wb>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Na>>2]*+g[Ma>>2]+ +g[aa>>2]*+g[wb>>2];g[Kb>>2]=+g[Cb>>2]-+g[Db>>2];g[Oa>>2]=+g[Hb>>2]+ +g[Gb>>2];g[Jb>>2]=+g[(c[q>>2]|0)+88>>2];g[Lb>>2]=+g[(c[q>>2]|0)+92>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Jb>>2]*+g[Kb>>2]-+g[Lb>>2]*+g[Oa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Lb>>2]*+g[Kb>>2]+ +g[Jb>>2]*+g[Oa>>2];g[yb>>2]=+g[Ea>>2]-+g[La>>2];g[Ab>>2]=+g[vb>>2]+ +g[sb>>2];g[xb>>2]=+g[(c[q>>2]|0)+120>>2];g[zb>>2]=+g[(c[q>>2]|0)+124>>2];g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[xb>>2]*+g[yb>>2]-+g[zb>>2]*+g[Ab>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[zb>>2]*+g[yb>>2]+ +g[xb>>2]*+g[Ab>>2];g[Eb>>2]=+g[Cb>>2]+ +g[Db>>2];g[Ib>>2]=+g[Gb>>2]-+g[Hb>>2];g[Bb>>2]=+g[(c[q>>2]|0)+56>>2];g[Fb>>2]=+g[(c[q>>2]|0)+60>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Bb>>2]*+g[Eb>>2]-+g[Fb>>2]*+g[Ib>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Fb>>2]*+g[Eb>>2]+ +g[Bb>>2]*+g[Ib>>2];g[Zc>>2]=+g[uc>>2]*.9510565400123596+ +g[Yc>>2]*.5877852439880371;g[kd>>2]=+g[id>>2]*.9510565400123596+ +g[jd>>2]*.5877852439880371;g[wd>>2]=+g[id>>2]*.5877852439880371-+g[jd>>2]*.9510565400123596;g[rd>>2]=+g[uc>>2]*.5877852439880371-+g[Yc>>2]*.9510565400123596;g[cd>>2]=(+g[ad>>2]-+g[bd>>2])*.55901700258255;g[gd>>2]=+g[dd>>2]-+g[fd>>2]*.25;g[hd>>2]=+g[cd>>2]+ +g[gd>>2];g[vd>>2]=+g[gd>>2]-+g[cd>>2];g[nc>>2]=(+g[jc>>2]-+g[mc>>2])*.55901700258255;g[qc>>2]=+g[oc>>2]-+g[pc>>2]*.25;g[rc>>2]=+g[nc>>2]+ +g[qc>>2];g[sd>>2]=+g[qc>>2]-+g[nc>>2];g[_c>>2]=+g[rc>>2]-+g[Zc>>2];g[ld>>2]=+g[hd>>2]+ +g[kd>>2];g[gc>>2]=+g[c[q>>2]>>2];g[$c>>2]=+g[(c[q>>2]|0)+4>>2];g[c[n>>2]>>2]=+g[gc>>2]*+g[_c>>2]-+g[$c>>2]*+g[ld>>2];g[c[p>>2]>>2]=+g[gc>>2]*+g[ld>>2]+ +g[$c>>2]*+g[_c>>2];g[zd>>2]=+g[sd>>2]-+g[rd>>2];g[Bd>>2]=+g[vd>>2]+ +g[wd>>2];g[yd>>2]=+g[(c[q>>2]|0)+128>>2];g[Ad>>2]=+g[(c[q>>2]|0)+132>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[yd>>2]*+g[zd>>2]-+g[Ad>>2]*+g[Bd>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[yd>>2]*+g[Bd>>2]+ +g[Ad>>2]*+g[zd>>2];g[nd>>2]=+g[Zc>>2]+ +g[rc>>2];g[pd>>2]=+g[hd>>2]-+g[kd>>2];g[md>>2]=+g[(c[q>>2]|0)+64>>2];g[od>>2]=+g[(c[q>>2]|0)+68>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[md>>2]*+g[nd>>2]-+g[od>>2]*+g[pd>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[md>>2]*+g[pd>>2]+ +g[od>>2]*+g[nd>>2];g[td>>2]=+g[rd>>2]+ +g[sd>>2];g[xd>>2]=+g[vd>>2]-+g[wd>>2];g[qd>>2]=+g[(c[q>>2]|0)+96>>2];g[ud>>2]=+g[(c[q>>2]|0)+100>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[qd>>2]*+g[td>>2]-+g[ud>>2]*+g[xd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[qd>>2]*+g[xd>>2]+ +g[ud>>2]*+g[td>>2];g[ma>>2]=+g[y>>2]*.5877852439880371-+g[la>>2]*.9510565400123596;g[ra>>2]=+g[pa>>2]*.5877852439880371-+g[qa>>2]*.9510565400123596;g[R>>2]=+g[pa>>2]*.9510565400123596+ +g[qa>>2]*.5877852439880371;g[O>>2]=+g[y>>2]*.9510565400123596+ +g[la>>2]*.5877852439880371;g[Ca>>2]=+g[ya>>2]-+g[Ba>>2]*.25;g[F>>2]=(+g[za>>2]-+g[Aa>>2])*.55901700258255;g[G>>2]=+g[Ca>>2]-+g[F>>2];g[S>>2]=+g[F>>2]+ +g[Ca>>2];g[Wd>>2]=+g[Od>>2]-+g[Vd>>2]*.25;g[Xd>>2]=(+g[Rd>>2]-+g[Ud>>2])*.55901700258255;g[Yd>>2]=+g[Wd>>2]-+g[Xd>>2];g[N>>2]=+g[Xd>>2]+ +g[Wd>>2];g[na>>2]=+g[Yd>>2]-+g[ma>>2];g[H>>2]=+g[ra>>2]+ +g[G>>2];g[Nd>>2]=+g[(c[q>>2]|0)+8>>2];g[oa>>2]=+g[(c[q>>2]|0)+12>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[Nd>>2]*+g[na>>2]-+g[oa>>2]*+g[H>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[oa>>2]*+g[na>>2]+ +g[Nd>>2]*+g[H>>2];g[V>>2]=+g[N>>2]+ +g[O>>2];g[X>>2]=+g[S>>2]-+g[R>>2];g[U>>2]=+g[(c[q>>2]|0)+104>>2];g[W>>2]=+g[(c[q>>2]|0)+108>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[U>>2]*+g[V>>2]-+g[W>>2]*+g[X>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[W>>2]*+g[V>>2]+ +g[U>>2]*+g[X>>2];g[J>>2]=+g[Yd>>2]+ +g[ma>>2];g[L>>2]=+g[G>>2]-+g[ra>>2];g[I>>2]=+g[(c[q>>2]|0)+136>>2];g[K>>2]=+g[(c[q>>2]|0)+140>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[I>>2]*+g[J>>2]-+g[K>>2]*+g[L>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[K>>2]*+g[J>>2]+ +g[I>>2]*+g[L>>2];g[P>>2]=+g[N>>2]-+g[O>>2];g[T>>2]=+g[R>>2]+ +g[S>>2];g[M>>2]=+g[(c[q>>2]|0)+40>>2];g[Q>>2]=+g[(c[q>>2]|0)+44>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[M>>2]*+g[P>>2]-+g[Q>>2]*+g[T>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Q>>2]*+g[P>>2]+ +g[M>>2]*+g[T>>2];g[cb>>2]=+g[Wa>>2]*.5877852439880371-+g[bb>>2]*.9510565400123596;g[Kc>>2]=+g[Ic>>2]*.5877852439880371-+g[Jc>>2]*.9510565400123596;g[Yb>>2]=+g[Ic>>2]*.9510565400123596+ +g[Jc>>2]*.5877852439880371;g[Sc>>2]=+g[Wa>>2]*.9510565400123596+ +g[bb>>2]*.5877852439880371;g[Fc>>2]=+g[Bc>>2]-+g[Ec>>2]*.25;g[Gc>>2]=(+g[Cc>>2]-+g[Dc>>2])*.55901700258255;g[Hc>>2]=+g[Fc>>2]-+g[Gc>>2];g[Xb>>2]=+g[Gc>>2]+ +g[Fc>>2];g[Wb>>2]=+g[fb>>2]-+g[Vb>>2]*.25;g[vc>>2]=(+g[Nb>>2]-+g[Ub>>2])*.55901700258255;g[wc>>2]=+g[Wb>>2]-+g[vc>>2];g[Rc>>2]=+g[vc>>2]+ +g[Wb>>2];g[xc>>2]=+g[cb>>2]+ +g[wc>>2];g[Lc>>2]=+g[Hc>>2]-+g[Kc>>2];g[Pa>>2]=+g[(c[q>>2]|0)+16>>2];g[yc>>2]=+g[(c[q>>2]|0)+20>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Pa>>2]*+g[xc>>2]-+g[yc>>2]*+g[Lc>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Pa>>2]*+g[Lc>>2]+ +g[yc>>2]*+g[xc>>2];g[$b>>2]=+g[Sc>>2]+ +g[Rc>>2];g[bc>>2]=+g[Xb>>2]-+g[Yb>>2];g[_b>>2]=+g[(c[q>>2]|0)+144>>2];g[ac>>2]=+g[(c[q>>2]|0)+148>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[_b>>2]*+g[$b>>2]-+g[ac>>2]*+g[bc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[_b>>2]*+g[bc>>2]+ +g[ac>>2]*+g[$b>>2];g[Nc>>2]=+g[wc>>2]-+g[cb>>2];g[Pc>>2]=+g[Hc>>2]+ +g[Kc>>2];g[Mc>>2]=+g[(c[q>>2]|0)+48>>2];g[Oc>>2]=+g[(c[q>>2]|0)+52>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Mc>>2]*+g[Nc>>2]-+g[Oc>>2]*+g[Pc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Mc>>2]*+g[Pc>>2]+ +g[Oc>>2]*+g[Nc>>2];g[Tc>>2]=+g[Rc>>2]-+g[Sc>>2];g[Zb>>2]=+g[Xb>>2]+ +g[Yb>>2];g[Qc>>2]=+g[(c[q>>2]|0)+80>>2];g[Uc>>2]=+g[(c[q>>2]|0)+84>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Qc>>2]*+g[Tc>>2]-+g[Uc>>2]*+g[Zb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Qc>>2]*+g[Zb>>2]+ +g[Uc>>2]*+g[Tc>>2];c[Ie>>2]=(c[Ie>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+152;c[r>>2]=c[r>>2]^c[2998]}i=Je;return}function Hu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,61,8920,0);i=b;return}function Iu(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0;C=i;i=i+80|0;m=C+68|0;n=C+64|0;o=C+60|0;p=C+56|0;q=C+52|0;D=C+44|0;r=C+40|0;s=C+36|0;B=C+32|0;t=C+28|0;u=C+24|0;y=C+20|0;v=C+16|0;w=C+12|0;A=C+8|0;x=C+4|0;z=C;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[C+48>>2]=h;c[D>>2]=j;c[r>>2]=k;c[s>>2]=l;c[B>>2]=c[D>>2];c[q>>2]=(c[q>>2]|0)+((c[D>>2]|0)-1<<1<<2);while(1){if((c[B>>2]|0)>=(c[r>>2]|0))break;g[t>>2]=+g[c[m>>2]>>2];g[u>>2]=+g[c[o>>2]>>2];g[y>>2]=+g[t>>2]-+g[u>>2];g[v>>2]=+g[c[n>>2]>>2];g[w>>2]=+g[c[p>>2]>>2];g[A>>2]=+g[v>>2]+ +g[w>>2];g[c[m>>2]>>2]=+g[t>>2]+ +g[u>>2];g[c[o>>2]>>2]=+g[v>>2]-+g[w>>2];g[x>>2]=+g[c[q>>2]>>2];g[z>>2]=+g[(c[q>>2]|0)+4>>2];g[c[n>>2]>>2]=+g[x>>2]*+g[y>>2]-+g[z>>2]*+g[A>>2];g[c[p>>2]>>2]=+g[z>>2]*+g[y>>2]+ +g[x>>2]*+g[A>>2];c[B>>2]=(c[B>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[s>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[s>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+8}i=C;return}function Ju(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,62,8968,0);i=b;return}function Ku(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0;Gi=i;i=i+2064|0;m=Gi+2048|0;n=Gi+2044|0;o=Gi+2040|0;p=Gi+2036|0;q=Gi+2032|0;r=Gi+2028|0;Hi=Gi+2024|0;s=Gi+2020|0;t=Gi+2016|0;Fi=Gi+1984|0;Sd=Gi+1980|0;tg=Gi+1976|0;wg=Gi+1972|0;ff=Gi+1968|0;ki=Gi+1964|0;x=Gi+1960|0;jf=Gi+1956|0;ug=Gi+1952|0;Hc=Gi+1948|0;tc=Gi+1944|0;Na=Gi+1940|0;Kb=Gi+1936|0;Zd=Gi+1932|0;xg=Gi+1928|0;gb=Gi+1924|0;hc=Gi+1920|0;zi=Gi+1916|0;Z=Gi+1912|0;Af=Gi+1908|0;zg=Gi+1904|0;Df=Gi+1900|0;Ag=Gi+1896|0;ka=Gi+1892|0;Lb=Gi+1888|0;jb=Gi+1884|0;Jc=Gi+1880|0;hd=Gi+1876|0;ne=Gi+1872|0;od=Gi+1868|0;lf=Gi+1864|0;Nb=Gi+1860|0;Ic=Gi+1856|0;Rh=Gi+1852|0;ma=Gi+1848|0;Pf=Gi+1844|0;Ah=Gi+1840|0;Sf=Gi+1836|0;zh=Gi+1832|0;Ba=Gi+1828|0;Pa=Gi+1824|0;xc=Gi+1820|0;nc=Gi+1816|0;Bd=Gi+1812|0;of=Gi+1808|0;he=Gi+1804|0;pf=Gi+1800|0;Ac=Gi+1796|0;oc=Gi+1792|0;ei=Gi+1788|0;F=Gi+1784|0;If=Gi+1780|0;Dh=Gi+1776|0;Lf=Gi+1772|0;Ch=Gi+1768|0;U=Gi+1764|0;Qa=Gi+1760|0;Sb=Gi+1756|0;kc=Gi+1752|0;Te=Gi+1748|0;rf=Gi+1744|0;_e=Gi+1740|0;sf=Gi+1736|0;Vb=Gi+1732|0;lc=Gi+1728|0;Mb=Gi+1724|0;Qd=Gi+1720|0;aa=Gi+1716|0;ef=Gi+1712|0;mf=Gi+1708|0;df=Gi+1704|0;Ea=Gi+1700|0;Rd=Gi+1696|0;fi=Gi+1692|0;Td=Gi+1688|0;Ia=Gi+1684|0;Ud=Gi+1680|0;ii=Gi+1676|0;Wd=Gi+1672|0;La=Gi+1668|0;Xd=Gi+1664|0;u=Gi+1660|0;Da=Gi+1656|0;_=Gi+1652|0;$=Gi+1648|0;Vc=Gi+1644|0;ce=Gi+1640|0;ba=Gi+1636|0;ca=Gi+1632|0;Eh=Gi+1628|0;Gh=Gi+1624|0;Ga=Gi+1620|0;Ha=Gi+1616|0;gi=Gi+1612|0;hi=Gi+1608|0;Ja=Gi+1604|0;Ka=Gi+1600|0;vg=Gi+1596|0;ji=Gi+1592|0;Fa=Gi+1588|0;Ma=Gi+1584|0;gf=Gi+1580|0;hf=Gi+1576|0;Fc=Gi+1572|0;Gc=Gi+1568|0;Vd=Gi+1564|0;Yd=Gi+1560|0;eb=Gi+1556|0;fb=Gi+1552|0;ni=Gi+1548|0;ld=Gi+1544|0;fa=Gi+1540|0;jd=Gi+1536|0;qi=Gi+1532|0;id=Gi+1528|0;ia=Gi+1524|0;md=Gi+1520|0;ui=Gi+1516|0;ed=Gi+1512|0;A=Gi+1508|0;ae=Gi+1504|0;xi=Gi+1500|0;$d=Gi+1496|0;D=Gi+1492|0;fd=Gi+1488|0;li=Gi+1484|0;mi=Gi+1480|0;da=Gi+1476|0;ea=Gi+1472|0;oi=Gi+1468|0;pi=Gi+1464|0;ga=Gi+1460|0;ha=Gi+1456|0;si=Gi+1452|0;ti=Gi+1448|0;y=Gi+1444|0;z=Gi+1440|0;vi=Gi+1436|0;wi=Gi+1432|0;B=Gi+1428|0;C=Gi+1424|0;ri=Gi+1420|0;yi=Gi+1416|0;yf=Gi+1412|0;zf=Gi+1408|0;Bf=Gi+1404|0;Cf=Gi+1400|0;E=Gi+1396|0;ja=Gi+1392|0;hb=Gi+1388|0;ib=Gi+1384|0;be=Gi+1380|0;gd=Gi+1376|0;kd=Gi+1372|0;nd=Gi+1368|0;kb=Gi+1364|0;lb=Gi+1360|0;Di=Gi+1356|0;rd=Gi+1352|0;pa=Gi+1348|0;Dd=Gi+1344|0;Ih=Gi+1340|0;Cd=Gi+1336|0;sa=Gi+1332|0;sd=Gi+1328|0;Ph=Gi+1324|0;fe=Gi+1320|0;za=Gi+1316|0;zd=Gi+1312|0;Mh=Gi+1308|0;ee=Gi+1304|0;wa=Gi+1300|0;wd=Gi+1296|0;Bi=Gi+1292|0;Ci=Gi+1288|0;qa=Gi+1284|0;ra=Gi+1280|0;na=Gi+1276|0;oa=Gi+1272|0;Ei=Gi+1268|0;Hh=Gi+1264|0;Nh=Gi+1260|0;Oh=Gi+1256|0;xd=Gi+1252|0;xa=Gi+1248|0;ya=Gi+1244|0;yd=Gi+1240|0;Kh=Gi+1236|0;Lh=Gi+1232|0;ud=Gi+1228|0;ua=Gi+1224|0;va=Gi+1220|0;vd=Gi+1216|0;Jh=Gi+1212|0;Qh=Gi+1208|0;Nf=Gi+1204|0;Of=Gi+1200|0;Qf=Gi+1196|0;Rf=Gi+1192|0;ta=Gi+1188|0;Aa=Gi+1184|0;vc=Gi+1180|0;wc=Gi+1176|0;td=Gi+1172|0;Ad=Gi+1168|0;de=Gi+1164|0;ge=Gi+1160|0;yc=Gi+1156|0;zc=Gi+1152|0;Uh=Gi+1148|0;je=Gi+1144|0;I=Gi+1140|0;Ve=Gi+1136|0;Xh=Gi+1132|0;Ue=Gi+1128|0;L=Gi+1124|0;ke=Gi+1120|0;ci=Gi+1116|0;Ye=Gi+1112|0;S=Gi+1108|0;Re=Gi+1104|0;$h=Gi+1100|0;Xe=Gi+1096|0;P=Gi+1092|0;Oe=Gi+1088|0;Sh=Gi+1084|0;Th=Gi+1080|0;J=Gi+1076|0;K=Gi+1072|0;G=Gi+1068|0;H=Gi+1064|0;Vh=Gi+1060|0;Wh=Gi+1056|0;ai=Gi+1052|0;bi=Gi+1048|0;Pe=Gi+1044|0;Q=Gi+1040|0;R=Gi+1036|0;Qe=Gi+1032|0;Zh=Gi+1028|0;_h=Gi+1024|0;me=Gi+1020|0;N=Gi+1016|0;O=Gi+1012|0;Ne=Gi+1008|0;Yh=Gi+1004|0;di=Gi+1e3|0;Gf=Gi+996|0;Hf=Gi+992|0;Jf=Gi+988|0;Kf=Gi+984|0;M=Gi+980|0;T=Gi+976|0;Qb=Gi+972|0;Rb=Gi+968|0;le=Gi+964|0;Se=Gi+960|0;We=Gi+956|0;Ze=Gi+952|0;Tb=Gi+948|0;Ub=Gi+944|0;Ai=Gi+940|0;v=Gi+936|0;Ib=Gi+932|0;Oa=Gi+928|0;Ra=Gi+924|0;Sa=Gi+920|0;Hb=Gi+916|0;Jb=Gi+912|0;Wa=Gi+908|0;ab=Gi+904|0;_a=Gi+900|0;cb=Gi+896|0;Ua=Gi+892|0;Va=Gi+888|0;Ya=Gi+884|0;Za=Gi+880|0;Ta=Gi+876|0;Xa=Gi+872|0;$a=Gi+868|0;bb=Gi+864|0;la=Gi+860|0;mb=Gi+856|0;Ab=Gi+852|0;wb=Gi+848|0;pb=Gi+844|0;xb=Gi+840|0;W=Gi+836|0;Bb=Gi+832|0;nb=Gi+828|0;ob=Gi+824|0;Ca=Gi+820|0;V=Gi+816|0;X=Gi+812|0;qb=Gi+808|0;w=Gi+804|0;Y=Gi+800|0;Eb=Gi+796|0;Gb=Gi+792|0;Db=Gi+788|0;Fb=Gi+784|0;sb=Gi+780|0;ub=Gi+776|0;rb=Gi+772|0;tb=Gi+768|0;yb=Gi+764|0;Cb=Gi+760|0;vb=Gi+756|0;zb=Gi+752|0;jc=Gi+748|0;Ed=Gi+744|0;Wc=Gi+740|0;Id=Gi+736|0;qc=Gi+732|0;Jd=Gi+728|0;Zc=Gi+724|0;Fd=Gi+720|0;ic=Gi+716|0;uc=Gi+712|0;mc=Gi+708|0;pc=Gi+704|0;Xc=Gi+700|0;Yc=Gi+696|0;rc=Gi+692|0;_c=Gi+688|0;gc=Gi+684|0;sc=Gi+680|0;Md=Gi+676|0;Od=Gi+672|0;Ld=Gi+668|0;Nd=Gi+664|0;ad=Gi+660|0;cd=Gi+656|0;$c=Gi+652|0;bd=Gi+648|0;Gd=Gi+644|0;Kd=Gi+640|0;dd=Gi+636|0;Hd=Gi+632|0;Pb=Gi+628|0;Xb=Gi+624|0;Lc=Gi+620|0;$b=Gi+616|0;Cc=Gi+612|0;ac=Gi+608|0;Oc=Gi+604|0;Yb=Gi+600|0;Ob=Gi+596|0;Kc=Gi+592|0;Wb=Gi+588|0;Bc=Gi+584|0;Mc=Gi+580|0;Nc=Gi+576|0;Dc=Gi+572|0;Pc=Gi+568|0;db=Gi+564|0;Ec=Gi+560|0;dc=Gi+556|0;fc=Gi+552|0;cc=Gi+548|0;ec=Gi+544|0;Rc=Gi+540|0;Tc=Gi+536|0;Qc=Gi+532|0;Sc=Gi+528|0;Zb=Gi+524|0;bc=Gi+520|0;Uc=Gi+516|0;_b=Gi+512|0;nf=Gi+508|0;hg=Gi+504|0;ag=Gi+500|0;ig=Gi+496|0;uf=Gi+492|0;mg=Gi+488|0;Zf=Gi+484|0;lg=Gi+480|0;Le=Gi+476|0;Me=Gi+472|0;_f=Gi+468|0;$f=Gi+464|0;qf=Gi+460|0;tf=Gi+456|0;Xf=Gi+452|0;Yf=Gi+448|0;vf=Gi+444|0;bg=Gi+440|0;Ke=Gi+436|0;wf=Gi+432|0;pg=Gi+428|0;rg=Gi+424|0;og=Gi+420|0;qg=Gi+416|0;dg=Gi+412|0;fg=Gi+408|0;cg=Gi+404|0;eg=Gi+400|0;jg=Gi+396|0;ng=Gi+392|0;gg=Gi+388|0;kg=Gi+384|0;yh=Gi+380|0;Wg=Gi+376|0;Pg=Gi+372|0;Xg=Gi+368|0;Hg=Gi+364|0;$g=Gi+360|0;Mg=Gi+356|0;_g=Gi+352|0;wh=Gi+348|0;xh=Gi+344|0;Ng=Gi+340|0;Og=Gi+336|0;Bh=Gi+332|0;Gg=Gi+328|0;Kg=Gi+324|0;Lg=Gi+320|0;Ig=Gi+316|0;Qg=Gi+312|0;vh=Gi+308|0;Jg=Gi+304|0;ch=Gi+300|0;Fh=Gi+296|0;bh=Gi+292|0;dh=Gi+288|0;Sg=Gi+284|0;Ug=Gi+280|0;Rg=Gi+276|0;Tg=Gi+272|0;Yg=Gi+268|0;ah=Gi+264|0;Vg=Gi+260|0;Zg=Gi+256|0;qd=Gi+252|0;ze=Gi+248|0;se=Gi+244|0;Ae=Gi+240|0;af=Gi+236|0;Ee=Gi+232|0;pe=Gi+228|0;De=Gi+224|0;_d=Gi+220|0;pd=Gi+216|0;qe=Gi+212|0;re=Gi+208|0;ie=Gi+204|0;$e=Gi+200|0;kf=Gi+196|0;oe=Gi+192|0;bf=Gi+188|0;te=Gi+184|0;Pd=Gi+180|0;cf=Gi+176|0;He=Gi+172|0;Je=Gi+168|0;Ge=Gi+164|0;Ie=Gi+160|0;ve=Gi+156|0;xe=Gi+152|0;ue=Gi+148|0;we=Gi+144|0;Be=Gi+140|0;Fe=Gi+136|0;ye=Gi+132|0;Ce=Gi+128|0;Ff=Gi+124|0;kh=Gi+120|0;Fg=Gi+116|0;lh=Gi+112|0;Uf=Gi+108|0;ph=Gi+104|0;Cg=Gi+100|0;oh=Gi+96|0;xf=Gi+92|0;Ef=Gi+88|0;Dg=Gi+84|0;Eg=Gi+80|0;Mf=Gi+76|0;Tf=Gi+72|0;yg=Gi+68|0;Bg=Gi+64|0;Vf=Gi+60|0;eh=Gi+56|0;sg=Gi+52|0;Wf=Gi+48|0;sh=Gi+44|0;uh=Gi+40|0;rh=Gi+36|0;th=Gi+32|0;gh=Gi+28|0;ih=Gi+24|0;fh=Gi+20|0;hh=Gi+16|0;mh=Gi+12|0;qh=Gi+8|0;jh=Gi+4|0;nh=Gi;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Hi>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Gi+2012>>2]=.5555702447891235;g[Gi+2008>>2]=.8314695954322815;g[Gi+2004>>2]=.9807852506637573;g[Gi+2e3>>2]=.19509032368659973;g[Gi+1996>>2]=.9238795042037964;g[Gi+1992>>2]=.3826834261417389;g[Gi+1988>>2]=.7071067690849304;c[Fi>>2]=c[Hi>>2];c[q>>2]=(c[q>>2]|0)+(((c[Hi>>2]|0)-1|0)*62<<2);while(1){if((c[Fi>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[Mb>>2]=+g[u>>2]+ +g[Da>>2];g[Qd>>2]=+g[u>>2]-+g[Da>>2];g[_>>2]=+g[c[n>>2]>>2];g[$>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[aa>>2]=+g[_>>2]-+g[$>>2];g[ef>>2]=+g[_>>2]+ +g[$>>2];g[Vc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2];g[ce>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[mf>>2]=+g[Vc>>2]+ +g[ce>>2];g[df>>2]=+g[Vc>>2]-+g[ce>>2];g[ba>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[ca>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Ea>>2]=+g[ba>>2]-+g[ca>>2];g[Rd>>2]=+g[ba>>2]+ +g[ca>>2];g[Eh>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Gh>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[fi>>2]=+g[Eh>>2]+ +g[Gh>>2];g[Td>>2]=+g[Eh>>2]-+g[Gh>>2];g[Ga>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ha>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[Ia>>2]=+g[Ga>>2]-+g[Ha>>2];g[Ud>>2]=+g[Ga>>2]+ +g[Ha>>2];g[gi>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[hi>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[ii>>2]=+g[gi>>2]+ +g[hi>>2];g[Wd>>2]=+g[gi>>2]-+g[hi>>2];g[Ja>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[Ka>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[La>>2]=+g[Ja>>2]-+g[Ka>>2];g[Xd>>2]=+g[Ja>>2]+ +g[Ka>>2];g[Sd>>2]=+g[Qd>>2]-+g[Rd>>2];g[tg>>2]=+g[Qd>>2]+ +g[Rd>>2];g[wg>>2]=+g[ef>>2]-+g[df>>2];g[ff>>2]=+g[df>>2]+ +g[ef>>2];g[vg>>2]=+g[Mb>>2]+ +g[mf>>2];g[ji>>2]=+g[fi>>2]+ +g[ii>>2];g[ki>>2]=+g[vg>>2]+ +g[ji>>2];g[x>>2]=+g[vg>>2]-+g[ji>>2];g[gf>>2]=+g[Td>>2]+ +g[Ud>>2];g[hf>>2]=+g[Wd>>2]+ +g[Xd>>2];g[jf>>2]=(+g[gf>>2]-+g[hf>>2])*.7071067690849304;g[ug>>2]=(+g[gf>>2]+ +g[hf>>2])*.7071067690849304;g[Fc>>2]=+g[aa>>2]-+g[Ea>>2];g[Gc>>2]=+g[fi>>2]-+g[ii>>2];g[Hc>>2]=+g[Fc>>2]-+g[Gc>>2];g[tc>>2]=+g[Gc>>2]+ +g[Fc>>2];g[Fa>>2]=+g[aa>>2]+ +g[Ea>>2];g[Ma>>2]=+g[Ia>>2]+ +g[La>>2];g[Na>>2]=+g[Fa>>2]-+g[Ma>>2];g[Kb>>2]=+g[Fa>>2]+ +g[Ma>>2];g[Vd>>2]=+g[Td>>2]-+g[Ud>>2];g[Yd>>2]=+g[Wd>>2]-+g[Xd>>2];g[Zd>>2]=(+g[Vd>>2]+ +g[Yd>>2])*.7071067690849304;g[xg>>2]=(+g[Vd>>2]-+g[Yd>>2])*.7071067690849304;g[eb>>2]=+g[Mb>>2]-+g[mf>>2];g[fb>>2]=+g[La>>2]-+g[Ia>>2];g[gb>>2]=+g[eb>>2]-+g[fb>>2];g[hc>>2]=+g[eb>>2]+ +g[fb>>2];g[li>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[mi>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[ni>>2]=+g[li>>2]+ +g[mi>>2];g[ld>>2]=+g[li>>2]-+g[mi>>2];g[da>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ea>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[fa>>2]=+g[da>>2]-+g[ea>>2];g[jd>>2]=+g[da>>2]+ +g[ea>>2];g[oi>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[pi>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[qi>>2]=+g[oi>>2]+ +g[pi>>2];g[id>>2]=+g[oi>>2]-+g[pi>>2];g[ga>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[ha>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2];g[md>>2]=+g[ga>>2]+ +g[ha>>2];g[si>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[ti>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[ui>>2]=+g[si>>2]+ +g[ti>>2];g[ed>>2]=+g[si>>2]-+g[ti>>2];g[y>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[z>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[A>>2]=+g[y>>2]-+g[z>>2];g[ae>>2]=+g[y>>2]+ +g[z>>2];g[vi>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[wi>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[xi>>2]=+g[vi>>2]+ +g[wi>>2];g[$d>>2]=+g[vi>>2]-+g[wi>>2];g[B>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[C>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[D>>2]=+g[B>>2]-+g[C>>2];g[fd>>2]=+g[B>>2]+ +g[C>>2];g[ri>>2]=+g[ni>>2]+ +g[qi>>2];g[yi>>2]=+g[ui>>2]+ +g[xi>>2];g[zi>>2]=+g[ri>>2]+ +g[yi>>2];g[Z>>2]=+g[ri>>2]-+g[yi>>2];g[yf>>2]=+g[ld>>2]+ +g[md>>2];g[zf>>2]=+g[jd>>2]-+g[id>>2];g[Af>>2]=+g[yf>>2]*.3826834261417389-+g[zf>>2]*.9238795042037964;g[zg>>2]=+g[zf>>2]*.3826834261417389+ +g[yf>>2]*.9238795042037964;g[Bf>>2]=+g[ed>>2]+ +g[fd>>2];g[Cf>>2]=+g[$d>>2]+ +g[ae>>2];g[Df>>2]=+g[Bf>>2]*.3826834261417389-+g[Cf>>2]*.9238795042037964;g[Ag>>2]=+g[Cf>>2]*.3826834261417389+ +g[Bf>>2]*.9238795042037964;g[E>>2]=+g[A>>2]+ +g[D>>2];g[ja>>2]=+g[fa>>2]+ +g[ia>>2];g[ka>>2]=+g[E>>2]-+g[ja>>2];g[Lb>>2]=+g[ja>>2]+ +g[E>>2];g[hb>>2]=+g[A>>2]-+g[D>>2];g[ib>>2]=+g[ui>>2]-+g[xi>>2];g[jb>>2]=+g[hb>>2]-+g[ib>>2];g[Jc>>2]=+g[ib>>2]+ +g[hb>>2];g[be>>2]=+g[$d>>2]-+g[ae>>2];g[gd>>2]=+g[ed>>2]-+g[fd>>2];g[hd>>2]=+g[be>>2]*.9238795042037964-+g[gd>>2]*.3826834261417389;g[ne>>2]=+g[be>>2]*.3826834261417389+ +g[gd>>2]*.9238795042037964;g[kd>>2]=+g[id>>2]+ +g[jd>>2];g[nd>>2]=+g[ld>>2]-+g[md>>2];g[od>>2]=+g[kd>>2]*.9238795042037964+ +g[nd>>2]*.3826834261417389;g[lf>>2]=+g[nd>>2]*.9238795042037964-+g[kd>>2]*.3826834261417389;g[kb>>2]=+g[ni>>2]-+g[qi>>2];g[lb>>2]=+g[fa>>2]-+g[ia>>2];g[Nb>>2]=+g[kb>>2]+ +g[lb>>2];g[Ic>>2]=+g[kb>>2]-+g[lb>>2];g[Bi>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Ci>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Di>>2]=+g[Bi>>2]+ +g[Ci>>2];g[rd>>2]=+g[Bi>>2]-+g[Ci>>2];g[na>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[oa>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[pa>>2]=+g[na>>2]-+g[oa>>2];g[Dd>>2]=+g[na>>2]+ +g[oa>>2];g[Ei>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Hh>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Ih>>2]=+g[Ei>>2]+ +g[Hh>>2];g[Cd>>2]=+g[Ei>>2]-+g[Hh>>2];g[qa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[ra>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[sa>>2]=+g[qa>>2]-+g[ra>>2];g[sd>>2]=+g[qa>>2]+ +g[ra>>2];g[Nh>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Oh>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[xd>>2]=+g[Nh>>2]-+g[Oh>>2];g[xa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[ya>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[yd>>2]=+g[xa>>2]+ +g[ya>>2];g[Ph>>2]=+g[Nh>>2]+ +g[Oh>>2];g[fe>>2]=+g[xd>>2]+ +g[yd>>2];g[za>>2]=+g[xa>>2]-+g[ya>>2];g[zd>>2]=+g[xd>>2]-+g[yd>>2];g[Kh>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Lh>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[ud>>2]=+g[Kh>>2]-+g[Lh>>2];g[ua>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[va>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[vd>>2]=+g[ua>>2]+ +g[va>>2];g[Mh>>2]=+g[Kh>>2]+ +g[Lh>>2];g[ee>>2]=+g[ud>>2]+ +g[vd>>2];g[wa>>2]=+g[ua>>2]-+g[va>>2];g[wd>>2]=+g[ud>>2]-+g[vd>>2];g[Jh>>2]=+g[Di>>2]+ +g[Ih>>2];g[Qh>>2]=+g[Mh>>2]+ +g[Ph>>2];g[Rh>>2]=+g[Jh>>2]+ +g[Qh>>2];g[ma>>2]=+g[Jh>>2]-+g[Qh>>2];g[Nf>>2]=+g[Dd>>2]-+g[Cd>>2];g[Of>>2]=(+g[wd>>2]-+g[zd>>2])*.7071067690849304;g[Pf>>2]=+g[Nf>>2]+ +g[Of>>2];g[Ah>>2]=+g[Nf>>2]-+g[Of>>2];g[Qf>>2]=+g[rd>>2]+ +g[sd>>2];g[Rf>>2]=(+g[ee>>2]+ +g[fe>>2])*.7071067690849304;g[Sf>>2]=+g[Qf>>2]-+g[Rf>>2];g[zh>>2]=+g[Qf>>2]+ +g[Rf>>2];g[ta>>2]=+g[pa>>2]+ +g[sa>>2];g[Aa>>2]=+g[wa>>2]+ +g[za>>2];g[Ba>>2]=+g[ta>>2]-+g[Aa>>2];g[Pa>>2]=+g[ta>>2]+ +g[Aa>>2];g[vc>>2]=+g[pa>>2]-+g[sa>>2];g[wc>>2]=+g[Mh>>2]-+g[Ph>>2];g[xc>>2]=+g[vc>>2]-+g[wc>>2];g[nc>>2]=+g[wc>>2]+ +g[vc>>2];g[td>>2]=+g[rd>>2]-+g[sd>>2];g[Ad>>2]=(+g[wd>>2]+ +g[zd>>2])*.7071067690849304;g[Bd>>2]=+g[td>>2]-+g[Ad>>2];g[of>>2]=+g[td>>2]+ +g[Ad>>2];g[de>>2]=+g[Cd>>2]+ +g[Dd>>2];g[ge>>2]=(+g[ee>>2]-+g[fe>>2])*.7071067690849304;g[he>>2]=+g[de>>2]-+g[ge>>2];g[pf>>2]=+g[de>>2]+ +g[ge>>2];g[yc>>2]=+g[Di>>2]-+g[Ih>>2];g[zc>>2]=+g[za>>2]-+g[wa>>2];g[Ac>>2]=+g[yc>>2]-+g[zc>>2];g[oc>>2]=+g[yc>>2]+ +g[zc>>2];g[Sh>>2]=+g[c[o>>2]>>2];g[Th>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[Uh>>2]=+g[Sh>>2]+ +g[Th>>2];g[je>>2]=+g[Sh>>2]-+g[Th>>2];g[G>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[H>>2]=+g[c[p>>2]>>2];g[I>>2]=+g[G>>2]-+g[H>>2];g[Ve>>2]=+g[G>>2]+ +g[H>>2];g[Vh>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Wh>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Xh>>2]=+g[Vh>>2]+ +g[Wh>>2];g[Ue>>2]=+g[Vh>>2]-+g[Wh>>2];g[J>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[K>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2];g[L>>2]=+g[J>>2]-+g[K>>2];g[ke>>2]=+g[J>>2]+ +g[K>>2];g[ai>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[bi>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[Pe>>2]=+g[ai>>2]-+g[bi>>2];g[Q>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[R>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Qe>>2]=+g[Q>>2]+ +g[R>>2];g[ci>>2]=+g[ai>>2]+ +g[bi>>2];g[Ye>>2]=+g[Pe>>2]+ +g[Qe>>2];g[S>>2]=+g[Q>>2]-+g[R>>2];g[Re>>2]=+g[Pe>>2]-+g[Qe>>2];g[Zh>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[_h>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[me>>2]=+g[Zh>>2]-+g[_h>>2];g[N>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[O>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[Ne>>2]=+g[N>>2]+ +g[O>>2];g[$h>>2]=+g[Zh>>2]+ +g[_h>>2];g[Xe>>2]=+g[me>>2]+ +g[Ne>>2];g[P>>2]=+g[N>>2]-+g[O>>2];g[Oe>>2]=+g[me>>2]-+g[Ne>>2];g[Yh>>2]=+g[Uh>>2]+ +g[Xh>>2];g[di>>2]=+g[$h>>2]+ +g[ci>>2];g[ei>>2]=+g[Yh>>2]+ +g[di>>2];g[F>>2]=+g[Yh>>2]-+g[di>>2];g[Gf>>2]=(+g[Oe>>2]-+g[Re>>2])*.7071067690849304;g[Hf>>2]=+g[Ue>>2]+ +g[Ve>>2];g[If>>2]=+g[Gf>>2]-+g[Hf>>2];g[Dh>>2]=+g[Hf>>2]+ +g[Gf>>2];g[Jf>>2]=+g[je>>2]+ +g[ke>>2];g[Kf>>2]=(+g[Xe>>2]+ +g[Ye>>2])*.7071067690849304;g[Lf>>2]=+g[Jf>>2]-+g[Kf>>2];g[Ch>>2]=+g[Jf>>2]+ +g[Kf>>2];g[M>>2]=+g[I>>2]+ +g[L>>2];g[T>>2]=+g[P>>2]+ +g[S>>2];g[U>>2]=+g[M>>2]-+g[T>>2];g[Qa>>2]=+g[M>>2]+ +g[T>>2];g[Qb>>2]=+g[I>>2]-+g[L>>2];g[Rb>>2]=+g[$h>>2]-+g[ci>>2];g[Sb>>2]=+g[Qb>>2]-+g[Rb>>2];g[kc>>2]=+g[Rb>>2]+ +g[Qb>>2];g[le>>2]=+g[je>>2]-+g[ke>>2];g[Se>>2]=(+g[Oe>>2]+ +g[Re>>2])*.7071067690849304;g[Te>>2]=+g[le>>2]-+g[Se>>2];g[rf>>2]=+g[le>>2]+ +g[Se>>2];g[We>>2]=+g[Ue>>2]-+g[Ve>>2];g[Ze>>2]=(+g[Xe>>2]-+g[Ye>>2])*.7071067690849304;g[_e>>2]=+g[We>>2]-+g[Ze>>2];g[sf>>2]=+g[We>>2]+ +g[Ze>>2];g[Tb>>2]=+g[Uh>>2]-+g[Xh>>2];g[Ub>>2]=+g[S>>2]-+g[P>>2];g[Vb>>2]=+g[Tb>>2]-+g[Ub>>2];g[lc>>2]=+g[Tb>>2]+ +g[Ub>>2];g[Ai>>2]=+g[ki>>2]+ +g[zi>>2];g[v>>2]=+g[Rh>>2]+ +g[ei>>2];g[Ib>>2]=+g[Ai>>2]-+g[v>>2];g[Oa>>2]=+g[Kb>>2]+ +g[Lb>>2];g[Ra>>2]=+g[Pa>>2]+ +g[Qa>>2];g[Sa>>2]=+g[Oa>>2]-+g[Ra>>2];g[c[m>>2]>>2]=+g[Ai>>2]+ +g[v>>2];g[c[o>>2]>>2]=+g[Oa>>2]+ +g[Ra>>2];g[Hb>>2]=+g[(c[q>>2]|0)+120>>2];g[Jb>>2]=+g[(c[q>>2]|0)+124>>2];g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Hb>>2]*+g[Ib>>2]-+g[Jb>>2]*+g[Sa>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Jb>>2]*+g[Ib>>2]+ +g[Hb>>2]*+g[Sa>>2];g[Ua>>2]=+g[ki>>2]-+g[zi>>2];g[Va>>2]=+g[Qa>>2]-+g[Pa>>2];g[Wa>>2]=+g[Ua>>2]-+g[Va>>2];g[ab>>2]=+g[Ua>>2]+ +g[Va>>2];g[Ya>>2]=+g[Kb>>2]-+g[Lb>>2];g[Za>>2]=+g[Rh>>2]-+g[ei>>2];g[_a>>2]=+g[Ya>>2]-+g[Za>>2];g[cb>>2]=+g[Za>>2]+ +g[Ya>>2];g[Ta>>2]=+g[(c[q>>2]|0)+184>>2];g[Xa>>2]=+g[(c[q>>2]|0)+188>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Ta>>2]*+g[Wa>>2]-+g[Xa>>2]*+g[_a>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Ta>>2]*+g[_a>>2]+ +g[Xa>>2]*+g[Wa>>2];g[$a>>2]=+g[(c[q>>2]|0)+56>>2];g[bb>>2]=+g[(c[q>>2]|0)+60>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[$a>>2]*+g[ab>>2]-+g[bb>>2]*+g[cb>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[$a>>2]*+g[cb>>2]+ +g[bb>>2]*+g[ab>>2];g[la>>2]=+g[x>>2]+ +g[ka>>2];g[mb>>2]=+g[Z>>2]+ +g[Na>>2];g[Ab>>2]=+g[Na>>2]-+g[Z>>2];g[wb>>2]=+g[x>>2]-+g[ka>>2];g[nb>>2]=+g[ma>>2]+ +g[Ba>>2];g[ob>>2]=+g[U>>2]-+g[F>>2];g[pb>>2]=(+g[nb>>2]+ +g[ob>>2])*.7071067690849304;g[xb>>2]=(+g[ob>>2]-+g[nb>>2])*.7071067690849304;g[Ca>>2]=+g[ma>>2]-+g[Ba>>2];g[V>>2]=+g[F>>2]+ +g[U>>2];g[W>>2]=(+g[Ca>>2]+ +g[V>>2])*.7071067690849304;g[Bb>>2]=(+g[Ca>>2]-+g[V>>2])*.7071067690849304;g[X>>2]=+g[la>>2]-+g[W>>2];g[qb>>2]=+g[mb>>2]-+g[pb>>2];g[w>>2]=+g[(c[q>>2]|0)+152>>2];g[Y>>2]=+g[(c[q>>2]|0)+156>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[w>>2]*+g[X>>2]-+g[Y>>2]*+g[qb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Y>>2]*+g[X>>2]+ +g[w>>2]*+g[qb>>2];g[Eb>>2]=+g[wb>>2]+ +g[xb>>2];g[Gb>>2]=+g[Ab>>2]+ +g[Bb>>2];g[Db>>2]=+g[(c[q>>2]|0)+88>>2];g[Fb>>2]=+g[(c[q>>2]|0)+92>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Db>>2]*+g[Eb>>2]-+g[Fb>>2]*+g[Gb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Db>>2]*+g[Gb>>2]+ +g[Fb>>2]*+g[Eb>>2];g[sb>>2]=+g[la>>2]+ +g[W>>2];g[ub>>2]=+g[mb>>2]+ +g[pb>>2];g[rb>>2]=+g[(c[q>>2]|0)+24>>2];g[tb>>2]=+g[(c[q>>2]|0)+28>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[rb>>2]*+g[sb>>2]-+g[tb>>2]*+g[ub>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[tb>>2]*+g[sb>>2]+ +g[rb>>2]*+g[ub>>2];g[yb>>2]=+g[wb>>2]-+g[xb>>2];g[Cb>>2]=+g[Ab>>2]-+g[Bb>>2];g[vb>>2]=+g[(c[q>>2]|0)+216>>2];g[zb>>2]=+g[(c[q>>2]|0)+220>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[vb>>2]*+g[yb>>2]-+g[zb>>2]*+g[Cb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[vb>>2]*+g[Cb>>2]+ +g[zb>>2]*+g[yb>>2];g[ic>>2]=(+g[Ic>>2]+ +g[Jc>>2])*.7071067690849304;g[jc>>2]=+g[hc>>2]-+g[ic>>2];g[Ed>>2]=+g[hc>>2]+ +g[ic>>2];g[uc>>2]=(+g[Nb>>2]+ +g[jb>>2])*.7071067690849304;g[Wc>>2]=+g[tc>>2]-+g[uc>>2];g[Id>>2]=+g[tc>>2]+ +g[uc>>2];g[mc>>2]=+g[kc>>2]*.9238795042037964-+g[lc>>2]*.3826834261417389;g[pc>>2]=+g[nc>>2]*.9238795042037964+ +g[oc>>2]*.3826834261417389;g[qc>>2]=+g[mc>>2]-+g[pc>>2];g[Jd>>2]=+g[pc>>2]+ +g[mc>>2];g[Xc>>2]=+g[oc>>2]*.9238795042037964-+g[nc>>2]*.3826834261417389;g[Yc>>2]=+g[kc>>2]*.3826834261417389+ +g[lc>>2]*.9238795042037964;g[Zc>>2]=+g[Xc>>2]-+g[Yc>>2];g[Fd>>2]=+g[Xc>>2]+ +g[Yc>>2];g[rc>>2]=+g[jc>>2]-+g[qc>>2];g[_c>>2]=+g[Wc>>2]-+g[Zc>>2];g[gc>>2]=+g[(c[q>>2]|0)+200>>2];g[sc>>2]=+g[(c[q>>2]|0)+204>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[gc>>2]*+g[rc>>2]-+g[sc>>2]*+g[_c>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[sc>>2]*+g[rc>>2]+ +g[gc>>2]*+g[_c>>2];g[Md>>2]=+g[Ed>>2]+ +g[Fd>>2];g[Od>>2]=+g[Id>>2]+ +g[Jd>>2];g[Ld>>2]=+g[(c[q>>2]|0)+8>>2];g[Nd>>2]=+g[(c[q>>2]|0)+12>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[Ld>>2]*+g[Md>>2]-+g[Nd>>2]*+g[Od>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Ld>>2]*+g[Od>>2]+ +g[Nd>>2]*+g[Md>>2];g[ad>>2]=+g[jc>>2]+ +g[qc>>2];g[cd>>2]=+g[Wc>>2]+ +g[Zc>>2];g[$c>>2]=+g[(c[q>>2]|0)+72>>2];g[bd>>2]=+g[(c[q>>2]|0)+76>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[$c>>2]*+g[ad>>2]-+g[bd>>2]*+g[cd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[bd>>2]*+g[ad>>2]+ +g[$c>>2]*+g[cd>>2];g[Gd>>2]=+g[Ed>>2]-+g[Fd>>2];g[Kd>>2]=+g[Id>>2]-+g[Jd>>2];g[dd>>2]=+g[(c[q>>2]|0)+136>>2];g[Hd>>2]=+g[(c[q>>2]|0)+140>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[dd>>2]*+g[Gd>>2]-+g[Hd>>2]*+g[Kd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[dd>>2]*+g[Kd>>2]+ +g[Hd>>2]*+g[Gd>>2];g[Ob>>2]=(+g[jb>>2]-+g[Nb>>2])*.7071067690849304;g[Pb>>2]=+g[gb>>2]-+g[Ob>>2];g[Xb>>2]=+g[gb>>2]+ +g[Ob>>2];g[Kc>>2]=(+g[Ic>>2]-+g[Jc>>2])*.7071067690849304;g[Lc>>2]=+g[Hc>>2]-+g[Kc>>2];g[$b>>2]=+g[Hc>>2]+ +g[Kc>>2];g[Wb>>2]=+g[Sb>>2]*.3826834261417389-+g[Vb>>2]*.9238795042037964;g[Bc>>2]=+g[xc>>2]*.3826834261417389+ +g[Ac>>2]*.9238795042037964;g[Cc>>2]=+g[Wb>>2]-+g[Bc>>2];g[ac>>2]=+g[Bc>>2]+ +g[Wb>>2];g[Mc>>2]=+g[Ac>>2]*.3826834261417389-+g[xc>>2]*.9238795042037964;g[Nc>>2]=+g[Sb>>2]*.9238795042037964+ +g[Vb>>2]*.3826834261417389;g[Oc>>2]=+g[Mc>>2]-+g[Nc>>2];g[Yb>>2]=+g[Mc>>2]+ +g[Nc>>2];g[Dc>>2]=+g[Pb>>2]-+g[Cc>>2];g[Pc>>2]=+g[Lc>>2]-+g[Oc>>2];g[db>>2]=+g[(c[q>>2]|0)+232>>2];g[Ec>>2]=+g[(c[q>>2]|0)+236>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[db>>2]*+g[Dc>>2]-+g[Ec>>2]*+g[Pc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[Ec>>2]*+g[Dc>>2]+ +g[db>>2]*+g[Pc>>2];g[dc>>2]=+g[Xb>>2]+ +g[Yb>>2];g[fc>>2]=+g[$b>>2]+ +g[ac>>2];g[cc>>2]=+g[(c[q>>2]|0)+40>>2];g[ec>>2]=+g[(c[q>>2]|0)+44>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[cc>>2]*+g[dc>>2]-+g[ec>>2]*+g[fc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[cc>>2]*+g[fc>>2]+ +g[ec>>2]*+g[dc>>2];g[Rc>>2]=+g[Pb>>2]+ +g[Cc>>2];g[Tc>>2]=+g[Lc>>2]+ +g[Oc>>2];g[Qc>>2]=+g[(c[q>>2]|0)+104>>2];g[Sc>>2]=+g[(c[q>>2]|0)+108>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Qc>>2]*+g[Rc>>2]-+g[Sc>>2]*+g[Tc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Sc>>2]*+g[Rc>>2]+ +g[Qc>>2]*+g[Tc>>2];g[Zb>>2]=+g[Xb>>2]-+g[Yb>>2];g[bc>>2]=+g[$b>>2]-+g[ac>>2];g[Uc>>2]=+g[(c[q>>2]|0)+168>>2];g[_b>>2]=+g[(c[q>>2]|0)+172>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Uc>>2]*+g[Zb>>2]-+g[_b>>2]*+g[bc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Uc>>2]*+g[bc>>2]+ +g[_b>>2]*+g[Zb>>2];g[Le>>2]=+g[Sd>>2]+ +g[Zd>>2];g[Me>>2]=+g[lf>>2]+ +g[ne>>2];g[nf>>2]=+g[Le>>2]+ +g[Me>>2];g[hg>>2]=+g[Le>>2]-+g[Me>>2];g[_f>>2]=+g[of>>2]*.19509032368659973+ +g[pf>>2]*.9807852506637573;g[$f>>2]=+g[sf>>2]*.9807852506637573-+g[rf>>2]*.19509032368659973;g[ag>>2]=+g[_f>>2]+ +g[$f>>2];g[ig>>2]=+g[$f>>2]-+g[_f>>2];g[qf>>2]=+g[of>>2]*.9807852506637573-+g[pf>>2]*.19509032368659973;g[tf>>2]=+g[rf>>2]*.9807852506637573+ +g[sf>>2]*.19509032368659973;g[uf>>2]=+g[qf>>2]+ +g[tf>>2];g[mg>>2]=+g[qf>>2]-+g[tf>>2];g[Xf>>2]=+g[ff>>2]+ +g[jf>>2];g[Yf>>2]=+g[od>>2]+ +g[hd>>2];g[Zf>>2]=+g[Xf>>2]+ +g[Yf>>2];g[lg>>2]=+g[Xf>>2]-+g[Yf>>2];g[vf>>2]=+g[nf>>2]-+g[uf>>2];g[bg>>2]=+g[Zf>>2]-+g[ag>>2];g[Ke>>2]=+g[(c[q>>2]|0)+128>>2];g[wf>>2]=+g[(c[q>>2]|0)+132>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Ke>>2]*+g[vf>>2]-+g[wf>>2]*+g[bg>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[wf>>2]*+g[vf>>2]+ +g[Ke>>2]*+g[bg>>2];g[pg>>2]=+g[hg>>2]+ +g[ig>>2];g[rg>>2]=+g[lg>>2]+ +g[mg>>2];g[og>>2]=+g[(c[q>>2]|0)+64>>2];g[qg>>2]=+g[(c[q>>2]|0)+68>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[og>>2]*+g[pg>>2]-+g[qg>>2]*+g[rg>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[og>>2]*+g[rg>>2]+ +g[qg>>2]*+g[pg>>2];g[dg>>2]=+g[nf>>2]+ +g[uf>>2];g[fg>>2]=+g[Zf>>2]+ +g[ag>>2];g[cg>>2]=+g[c[q>>2]>>2];g[eg>>2]=+g[(c[q>>2]|0)+4>>2];g[c[n>>2]>>2]=+g[cg>>2]*+g[dg>>2]-+g[eg>>2]*+g[fg>>2];g[c[p>>2]>>2]=+g[eg>>2]*+g[dg>>2]+ +g[cg>>2]*+g[fg>>2];g[jg>>2]=+g[hg>>2]-+g[ig>>2];g[ng>>2]=+g[lg>>2]-+g[mg>>2];g[gg>>2]=+g[(c[q>>2]|0)+192>>2];g[kg>>2]=+g[(c[q>>2]|0)+196>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[gg>>2]*+g[jg>>2]-+g[kg>>2]*+g[ng>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[gg>>2]*+g[ng>>2]+ +g[kg>>2]*+g[jg>>2];g[wh>>2]=+g[tg>>2]+ +g[ug>>2];g[xh>>2]=+g[zg>>2]+ +g[Ag>>2];g[yh>>2]=+g[wh>>2]-+g[xh>>2];g[Wg>>2]=+g[wh>>2]+ +g[xh>>2];g[Ng>>2]=+g[Ah>>2]*.19509032368659973+ +g[zh>>2]*.9807852506637573;g[Og>>2]=+g[Dh>>2]*.19509032368659973+ +g[Ch>>2]*.9807852506637573;g[Pg>>2]=+g[Ng>>2]-+g[Og>>2];g[Xg>>2]=+g[Ng>>2]+ +g[Og>>2];g[Bh>>2]=+g[zh>>2]*.19509032368659973-+g[Ah>>2]*.9807852506637573;g[Gg>>2]=+g[Ch>>2]*.19509032368659973-+g[Dh>>2]*.9807852506637573;g[Hg>>2]=+g[Bh>>2]+ +g[Gg>>2];g[$g>>2]=+g[Bh>>2]-+g[Gg>>2];g[Kg>>2]=+g[wg>>2]-+g[xg>>2];g[Lg>>2]=+g[Af>>2]-+g[Df>>2];g[Mg>>2]=+g[Kg>>2]+ +g[Lg>>2];g[_g>>2]=+g[Kg>>2]-+g[Lg>>2];g[Ig>>2]=+g[yh>>2]-+g[Hg>>2];g[Qg>>2]=+g[Mg>>2]-+g[Pg>>2];g[vh>>2]=+g[(c[q>>2]|0)+176>>2];g[Jg>>2]=+g[(c[q>>2]|0)+180>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[vh>>2]*+g[Ig>>2]-+g[Jg>>2]*+g[Qg>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Jg>>2]*+g[Ig>>2]+ +g[vh>>2]*+g[Qg>>2];g[ch>>2]=+g[Wg>>2]+ +g[Xg>>2];g[Fh>>2]=+g[_g>>2]-+g[$g>>2];g[bh>>2]=+g[(c[q>>2]|0)+240>>2];g[dh>>2]=+g[(c[q>>2]|0)+244>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[bh>>2]*+g[ch>>2]-+g[dh>>2]*+g[Fh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[bh>>2]*+g[Fh>>2]+ +g[dh>>2]*+g[ch>>2];g[Sg>>2]=+g[yh>>2]+ +g[Hg>>2];g[Ug>>2]=+g[Mg>>2]+ +g[Pg>>2];g[Rg>>2]=+g[(c[q>>2]|0)+48>>2];g[Tg>>2]=+g[(c[q>>2]|0)+52>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Rg>>2]*+g[Sg>>2]-+g[Tg>>2]*+g[Ug>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Tg>>2]*+g[Sg>>2]+ +g[Rg>>2]*+g[Ug>>2];g[Yg>>2]=+g[Wg>>2]-+g[Xg>>2];g[ah>>2]=+g[_g>>2]+ +g[$g>>2];g[Vg>>2]=+g[(c[q>>2]|0)+112>>2];g[Zg>>2]=+g[(c[q>>2]|0)+116>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Vg>>2]*+g[Yg>>2]-+g[Zg>>2]*+g[ah>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Vg>>2]*+g[ah>>2]+ +g[Zg>>2]*+g[Yg>>2];g[_d>>2]=+g[Sd>>2]-+g[Zd>>2];g[pd>>2]=+g[hd>>2]-+g[od>>2];g[qd>>2]=+g[_d>>2]+ +g[pd>>2];g[ze>>2]=+g[_d>>2]-+g[pd>>2];g[qe>>2]=+g[Bd>>2]*.8314695954322815+ +g[he>>2]*.5555702447891235;g[re>>2]=+g[_e>>2]*.5555702447891235-+g[Te>>2]*.8314695954322815;g[se>>2]=+g[qe>>2]+ +g[re>>2];g[Ae>>2]=+g[re>>2]-+g[qe>>2];g[ie>>2]=+g[Bd>>2]*.5555702447891235-+g[he>>2]*.8314695954322815;g[$e>>2]=+g[Te>>2]*.5555702447891235+ +g[_e>>2]*.8314695954322815;g[af>>2]=+g[ie>>2]+ +g[$e>>2];g[Ee>>2]=+g[ie>>2]-+g[$e>>2];g[kf>>2]=+g[ff>>2]-+g[jf>>2];g[oe>>2]=+g[lf>>2]-+g[ne>>2];g[pe>>2]=+g[kf>>2]+ +g[oe>>2];g[De>>2]=+g[kf>>2]-+g[oe>>2];g[bf>>2]=+g[qd>>2]-+g[af>>2];g[te>>2]=+g[pe>>2]-+g[se>>2];g[Pd>>2]=+g[(c[q>>2]|0)+160>>2];g[cf>>2]=+g[(c[q>>2]|0)+164>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Pd>>2]*+g[bf>>2]-+g[cf>>2]*+g[te>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[cf>>2]*+g[bf>>2]+ +g[Pd>>2]*+g[te>>2];g[He>>2]=+g[ze>>2]+ +g[Ae>>2];g[Je>>2]=+g[De>>2]+ +g[Ee>>2];g[Ge>>2]=+g[(c[q>>2]|0)+96>>2];g[Ie>>2]=+g[(c[q>>2]|0)+100>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Ge>>2]*+g[He>>2]-+g[Ie>>2]*+g[Je>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Ge>>2]*+g[Je>>2]+ +g[Ie>>2]*+g[He>>2];g[ve>>2]=+g[qd>>2]+ +g[af>>2];g[xe>>2]=+g[pe>>2]+ +g[se>>2];g[ue>>2]=+g[(c[q>>2]|0)+32>>2];g[we>>2]=+g[(c[q>>2]|0)+36>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ue>>2]*+g[ve>>2]-+g[we>>2]*+g[xe>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[we>>2]*+g[ve>>2]+ +g[ue>>2]*+g[xe>>2];g[Be>>2]=+g[ze>>2]-+g[Ae>>2];g[Fe>>2]=+g[De>>2]-+g[Ee>>2];g[ye>>2]=+g[(c[q>>2]|0)+224>>2];g[Ce>>2]=+g[(c[q>>2]|0)+228>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[ye>>2]*+g[Be>>2]-+g[Ce>>2]*+g[Fe>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[ye>>2]*+g[Fe>>2]+ +g[Ce>>2]*+g[Be>>2];g[xf>>2]=+g[tg>>2]-+g[ug>>2];g[Ef>>2]=+g[Af>>2]+ +g[Df>>2];g[Ff>>2]=+g[xf>>2]-+g[Ef>>2];g[kh>>2]=+g[xf>>2]+ +g[Ef>>2];g[Dg>>2]=+g[Sf>>2]*.8314695954322815-+g[Pf>>2]*.5555702447891235;g[Eg>>2]=+g[If>>2]*.5555702447891235+ +g[Lf>>2]*.8314695954322815;g[Fg>>2]=+g[Dg>>2]-+g[Eg>>2];g[lh>>2]=+g[Dg>>2]+ +g[Eg>>2];g[Mf>>2]=+g[If>>2]*.8314695954322815-+g[Lf>>2]*.5555702447891235;g[Tf>>2]=+g[Pf>>2]*.8314695954322815+ +g[Sf>>2]*.5555702447891235;g[Uf>>2]=+g[Mf>>2]-+g[Tf>>2];g[ph>>2]=+g[Tf>>2]+ +g[Mf>>2];g[yg>>2]=+g[wg>>2]+ +g[xg>>2];g[Bg>>2]=+g[zg>>2]-+g[Ag>>2];g[Cg>>2]=+g[yg>>2]-+g[Bg>>2];g[oh>>2]=+g[yg>>2]+ +g[Bg>>2];g[Vf>>2]=+g[Ff>>2]-+g[Uf>>2];g[eh>>2]=+g[Cg>>2]-+g[Fg>>2];g[sg>>2]=+g[(c[q>>2]|0)+208>>2];g[Wf>>2]=+g[(c[q>>2]|0)+212>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[sg>>2]*+g[Vf>>2]-+g[Wf>>2]*+g[eh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Wf>>2]*+g[Vf>>2]+ +g[sg>>2]*+g[eh>>2];g[sh>>2]=+g[kh>>2]+ +g[lh>>2];g[uh>>2]=+g[oh>>2]+ +g[ph>>2];g[rh>>2]=+g[(c[q>>2]|0)+16>>2];g[th>>2]=+g[(c[q>>2]|0)+20>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[rh>>2]*+g[sh>>2]-+g[th>>2]*+g[uh>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[rh>>2]*+g[uh>>2]+ +g[th>>2]*+g[sh>>2];g[gh>>2]=+g[Ff>>2]+ +g[Uf>>2];g[ih>>2]=+g[Cg>>2]+ +g[Fg>>2];g[fh>>2]=+g[(c[q>>2]|0)+80>>2];g[hh>>2]=+g[(c[q>>2]|0)+84>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[fh>>2]*+g[gh>>2]-+g[hh>>2]*+g[ih>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[hh>>2]*+g[gh>>2]+ +g[fh>>2]*+g[ih>>2];g[mh>>2]=+g[kh>>2]-+g[lh>>2];g[qh>>2]=+g[oh>>2]-+g[ph>>2];g[jh>>2]=+g[(c[q>>2]|0)+144>>2];g[nh>>2]=+g[(c[q>>2]|0)+148>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[jh>>2]*+g[mh>>2]-+g[nh>>2]*+g[qh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[jh>>2]*+g[qh>>2]+ +g[nh>>2]*+g[mh>>2];c[Fi>>2]=(c[Fi>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+248;c[r>>2]=c[r>>2]^c[2998]}i=Gi;return}function Lu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,63,9016,0);i=b;return}function Mu(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0;X=i;i=i+160|0;m=X+148|0;n=X+144|0;o=X+140|0;p=X+136|0;q=X+132|0;r=X+128|0;Y=X+124|0;s=X+120|0;t=X+116|0;W=X+112|0;w=X+108|0;L=X+104|0;F=X+100|0;Q=X+96|0;z=X+92|0;P=X+88|0;I=X+84|0;M=X+80|0;u=X+76|0;v=X+72|0;D=X+68|0;E=X+64|0;x=X+60|0;y=X+56|0;G=X+52|0;H=X+48|0;B=X+44|0;J=X+40|0;A=X+36|0;C=X+32|0;N=X+28|0;R=X+24|0;K=X+20|0;O=X+16|0;T=X+12|0;V=X+8|0;S=X+4|0;U=X;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Y>>2]=j;c[s>>2]=k;c[t>>2]=l;c[W>>2]=c[Y>>2];c[q>>2]=(c[q>>2]|0)+(((c[Y>>2]|0)-1|0)*6<<2);while(1){if((c[W>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[v>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[w>>2]=+g[u>>2]+ +g[v>>2];g[L>>2]=+g[u>>2]-+g[v>>2];g[D>>2]=+g[c[n>>2]>>2];g[E>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[F>>2]=+g[D>>2]-+g[E>>2];g[Q>>2]=+g[D>>2]+ +g[E>>2];g[x>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[y>>2]=+g[c[o>>2]>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[P>>2]=+g[x>>2]-+g[y>>2];g[G>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[H>>2]=+g[c[p>>2]>>2];g[I>>2]=+g[G>>2]-+g[H>>2];g[M>>2]=+g[G>>2]+ +g[H>>2];g[c[m>>2]>>2]=+g[w>>2]+ +g[z>>2];g[c[o>>2]>>2]=+g[F>>2]+ +g[I>>2];g[B>>2]=+g[w>>2]-+g[z>>2];g[J>>2]=+g[F>>2]-+g[I>>2];g[A>>2]=+g[(c[q>>2]|0)+8>>2];g[C>>2]=+g[(c[q>>2]|0)+12>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[A>>2]*+g[B>>2]-+g[C>>2]*+g[J>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[C>>2]*+g[B>>2]+ +g[A>>2]*+g[J>>2];g[N>>2]=+g[L>>2]-+g[M>>2];g[R>>2]=+g[P>>2]+ +g[Q>>2];g[K>>2]=+g[c[q>>2]>>2];g[O>>2]=+g[(c[q>>2]|0)+4>>2];g[c[n>>2]>>2]=+g[K>>2]*+g[N>>2]-+g[O>>2]*+g[R>>2];g[c[p>>2]>>2]=+g[K>>2]*+g[R>>2]+ +g[O>>2]*+g[N>>2];g[T>>2]=+g[L>>2]+ +g[M>>2];g[V>>2]=+g[Q>>2]-+g[P>>2];g[S>>2]=+g[(c[q>>2]|0)+16>>2];g[U>>2]=+g[(c[q>>2]|0)+20>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[S>>2]*+g[T>>2]-+g[U>>2]*+g[V>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[S>>2]*+g[V>>2]+ +g[U>>2]*+g[T>>2];c[W>>2]=(c[W>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+24}i=X;return}function Nu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,64,9064,0);i=b;return} +function ui(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0;Yf=i;i=i+1536|0;m=Yf+1524|0;n=Yf+1520|0;o=Yf+1516|0;p=Yf+1512|0;q=Yf+1508|0;r=Yf+1504|0;Zf=Yf+1500|0;s=Yf+1496|0;t=Yf+1492|0;Xf=Yf+1408|0;Xe=Yf+1404|0;Yd=Yf+1400|0;fb=Yf+1396|0;mf=Yf+1392|0;cc=Yf+1388|0;tf=Yf+1384|0;Se=Yf+1380|0;dc=Yf+1376|0;cb=Yf+1372|0;Zd=Yf+1368|0;Ff=Yf+1364|0;Of=Yf+1360|0;Pf=Yf+1356|0;Ge=Yf+1352|0;He=Yf+1348|0;Pe=Yf+1344|0;D=Yf+1340|0;kd=Yf+1336|0;Rb=Yf+1332|0;hc=Yf+1328|0;L=Yf+1324|0;nd=Yf+1320|0;Vb=Yf+1316|0;oc=Yf+1312|0;ya=Yf+1308|0;od=Yf+1304|0;Ub=Yf+1300|0;rc=Yf+1296|0;ma=Yf+1292|0;ld=Yf+1288|0;Sb=Yf+1284|0;kc=Yf+1280|0;Ze=Yf+1276|0;gf=Yf+1272|0;hf=Yf+1268|0;Je=Yf+1264|0;Ke=Yf+1260|0;Qe=Yf+1256|0;Y=Yf+1252|0;rd=Yf+1248|0;xc=Yf+1244|0;Xc=Yf+1240|0;Db=Yf+1236|0;ud=Yf+1232|0;zc=Yf+1228|0;Fd=Yf+1224|0;sb=Yf+1220|0;vd=Yf+1216|0;Ac=Yf+1212|0;cd=Yf+1208|0;Ia=Yf+1204|0;sd=Yf+1200|0;wc=Yf+1196|0;_c=Yf+1192|0;u=Yf+1188|0;Vc=Yf+1184|0;Ve=Yf+1180|0;We=Yf+1176|0;eb=Yf+1172|0;db=Yf+1168|0;kf=Yf+1164|0;lf=Yf+1160|0;Da=Yf+1156|0;Mb=Yf+1152|0;ce=Yf+1148|0;Ue=Yf+1144|0;$a=Yf+1140|0;Ya=Yf+1136|0;Za=Yf+1132|0;pf=Yf+1128|0;ab=Yf+1124|0;sf=Yf+1120|0;_a=Yf+1116|0;bb=Yf+1112|0;nf=Yf+1108|0;of=Yf+1104|0;qf=Yf+1100|0;rf=Yf+1096|0;xf=Yf+1092|0;fa=Yf+1088|0;Gf=Yf+1084|0;Ca=Yf+1080|0;Ef=Yf+1076|0;ka=Yf+1072|0;vf=Yf+1068|0;ja=Yf+1064|0;y=Yf+1060|0;ga=Yf+1056|0;B=Yf+1052|0;ea=Yf+1048|0;Nf=Yf+1044|0;J=Yf+1040|0;oa=Yf+1036|0;I=Yf+1032|0;ta=Yf+1028|0;F=Yf+1024|0;wa=Yf+1020|0;Ba=Yf+1016|0;yf=Yf+1012|0;zf=Yf+1008|0;Af=Yf+1004|0;Bf=Yf+1e3|0;Cf=Yf+996|0;Df=Yf+992|0;w=Yf+988|0;x=Yf+984|0;E=Yf+980|0;z=Yf+976|0;A=Yf+972|0;da=Yf+968|0;Hf=Yf+964|0;If=Yf+960|0;Jf=Yf+956|0;Kf=Yf+952|0;Lf=Yf+948|0;Mf=Yf+944|0;ra=Yf+940|0;sa=Yf+936|0;za=Yf+932|0;ua=Yf+928|0;va=Yf+924|0;Aa=Yf+920|0;C=Yf+916|0;gc=Yf+912|0;v=Yf+908|0;fc=Yf+904|0;wf=Yf+900|0;K=Yf+896|0;mc=Yf+892|0;H=Yf+888|0;nc=Yf+884|0;G=Yf+880|0;xa=Yf+876|0;qc=Yf+872|0;qa=Yf+868|0;pc=Yf+864|0;pa=Yf+860|0;la=Yf+856|0;ic=Yf+852|0;ia=Yf+848|0;jc=Yf+844|0;ha=Yf+840|0;Qf=Yf+836|0;aa=Yf+832|0;_e=Yf+828|0;wb=Yf+824|0;Ye=Yf+820|0;Ga=Yf+816|0;O=Yf+812|0;Fa=Yf+808|0;T=Yf+804|0;ba=Yf+800|0;W=Yf+796|0;$=Yf+792|0;ff=Yf+788|0;Bb=Yf+784|0;Ka=Yf+780|0;Ab=Yf+776|0;nb=Yf+772|0;xb=Yf+768|0;qb=Yf+764|0;vb=Yf+760|0;Rf=Yf+756|0;Sf=Yf+752|0;Tf=Yf+748|0;Uf=Yf+744|0;Vf=Yf+740|0;Wf=Yf+736|0;R=Yf+732|0;S=Yf+728|0;Z=Yf+724|0;U=Yf+720|0;V=Yf+716|0;_=Yf+712|0;$e=Yf+708|0;af=Yf+704|0;bf=Yf+700|0;cf=Yf+696|0;df=Yf+692|0;ef=Yf+688|0;Na=Yf+684|0;mb=Yf+680|0;tb=Yf+676|0;ob=Yf+672|0;pb=Yf+668|0;ub=Yf+664|0;X=Yf+660|0;Wc=Yf+656|0;Q=Yf+652|0;uc=Yf+648|0;P=Yf+644|0;Cb=Yf+640|0;dd=Yf+636|0;zb=Yf+632|0;Ed=Yf+628|0;yb=Yf+624|0;rb=Yf+620|0;bd=Yf+616|0;Ma=Yf+612|0;ad=Yf+608|0;La=Yf+604|0;Ha=Yf+600|0;Yc=Yf+596|0;Ea=Yf+592|0;Zc=Yf+588|0;ca=Yf+584|0;De=Yf+580|0;jf=Yf+576|0;Ee=Yf+572|0;Me=Yf+568|0;Oe=Yf+564|0;Ie=Yf+560|0;Le=Yf+556|0;Ne=Yf+552|0;Fe=Yf+548|0;Re=Yf+544|0;Te=Yf+540|0;ne=Yf+536|0;re=Yf+532|0;se=Yf+528|0;pe=Yf+524|0;qe=Yf+520|0;te=Yf+516|0;oe=Yf+512|0;uf=Yf+508|0;gb=Yf+504|0;Gb=Yf+500|0;lb=Yf+496|0;Hb=Yf+492|0;kb=Yf+488|0;Oa=Yf+484|0;hb=Yf+480|0;Ra=Yf+476|0;Xa=Yf+472|0;na=Yf+468|0;M=Yf+464|0;N=Yf+460|0;Ja=Yf+456|0;Eb=Yf+452|0;Fb=Yf+448|0;Kb=Yf+444|0;Lb=Yf+440|0;Va=Yf+436|0;Pa=Yf+432|0;Qa=Yf+428|0;Wa=Yf+424|0;Sa=Yf+420|0;Ua=Yf+416|0;Jb=Yf+412|0;Ta=Yf+408|0;Ib=Yf+404|0;Nb=Yf+400|0;Ob=Yf+396|0;jb=Yf+392|0;Pb=Yf+388|0;ib=Yf+384|0;jd=Yf+380|0;fe=Yf+376|0;yd=Yf+372|0;ve=Yf+368|0;je=Yf+364|0;ke=Yf+360|0;ee=Yf+356|0;ze=Yf+352|0;ge=Yf+348|0;ye=Yf+344|0;md=Yf+340|0;pd=Yf+336|0;qd=Yf+332|0;td=Yf+328|0;wd=Yf+324|0;xd=Yf+320|0;zd=Yf+316|0;Ad=Yf+312|0;Bd=Yf+308|0;Cd=Yf+304|0;Dd=Yf+300|0;de=Yf+296|0;le=Yf+292|0;me=Yf+288|0;ie=Yf+284|0;ue=Yf+280|0;he=Yf+276|0;Ae=Yf+272|0;Ce=Yf+268|0;xe=Yf+264|0;Be=Yf+260|0;we=Yf+256|0;ec=Yf+252|0;_d=Yf+248|0;Id=Yf+244|0;fd=Yf+240|0;Jd=Yf+236|0;ed=Yf+232|0;Od=Yf+228|0;$d=Yf+224|0;Rd=Yf+220|0;Xd=Yf+216|0;lc=Yf+212|0;sc=Yf+208|0;tc=Yf+204|0;$c=Yf+200|0;Gd=Yf+196|0;Hd=Yf+192|0;Md=Yf+188|0;Nd=Yf+184|0;Vd=Yf+180|0;Pd=Yf+176|0;Qd=Yf+172|0;Wd=Yf+168|0;Sd=Yf+164|0;Ud=Yf+160|0;Ld=Yf+156|0;Td=Yf+152|0;Kd=Yf+148|0;gd=Yf+144|0;hd=Yf+140|0;be=Yf+136|0;id=Yf+132|0;ae=Yf+128|0;Qb=Yf+124|0;Ec=Yf+120|0;Dc=Yf+116|0;Uc=Yf+112|0;Mc=Yf+108|0;Nc=Yf+104|0;Lc=Yf+100|0;_b=Yf+96|0;Qc=Yf+92|0;Zb=Yf+88|0;Tb=Yf+84|0;Wb=Yf+80|0;vc=Yf+76|0;yc=Yf+72|0;Bc=Yf+68|0;Cc=Yf+64|0;Fc=Yf+60|0;Gc=Yf+56|0;Hc=Yf+52|0;Ic=Yf+48|0;Jc=Yf+44|0;Kc=Yf+40|0;Oc=Yf+36|0;Tc=Yf+32|0;Rc=Yf+28|0;Sc=Yf+24|0;Pc=Yf+20|0;$b=Yf+16|0;bc=Yf+12|0;Yb=Yf+8|0;ac=Yf+4|0;Xb=Yf;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Zf>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Yf+1488>>2]=.4257792830467224;g[Yf+1484>>2]=.9048270583152771;g[Yf+1480>>2]=.6374239921569824;g[Yf+1476>>2]=.7705132365226746;g[Yf+1472>>2]=.9980267286300659;g[Yf+1468>>2]=.06279052048921585;g[Yf+1464>>2]=.9921147227287292;g[Yf+1460>>2]=.12533323466777802;g[Yf+1456>>2]=.6845471262931824;g[Yf+1452>>2]=.728968620300293;g[Yf+1448>>2]=.4817536771297455;g[Yf+1444>>2]=.8763066530227661;g[Yf+1440>>2]=.8443279266357422;g[Yf+1436>>2]=.5358268022537231;g[Yf+1432>>2]=.24868988990783691;g[Yf+1428>>2]=.9685831665992737;g[Yf+1424>>2]=.25;g[Yf+1420>>2]=.55901700258255;g[Yf+1416>>2]=.5877852439880371;g[Yf+1412>>2]=.9510565400123596;c[Xf>>2]=c[Zf>>2];while(1){if((c[Xf>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[Mb>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*20<<2)>>2];g[Vc>>2]=+g[Da>>2]+ +g[Mb>>2];g[ce>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[Ue>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*15<<2)>>2];g[Ve>>2]=+g[ce>>2]+ +g[Ue>>2];g[We>>2]=+g[Vc>>2]+ +g[Ve>>2];g[eb>>2]=+g[ce>>2]-+g[Ue>>2];g[db>>2]=+g[Da>>2]-+g[Mb>>2];g[Xe>>2]=+g[u>>2]+ +g[We>>2];g[Yd>>2]=+g[eb>>2]*.9510565400123596-+g[db>>2]*.5877852439880371;g[fb>>2]=+g[db>>2]*.9510565400123596+ +g[eb>>2]*.5877852439880371;g[kf>>2]=(+g[Vc>>2]-+g[Ve>>2])*.55901700258255;g[lf>>2]=+g[u>>2]-+g[We>>2]*.25;g[mf>>2]=+g[kf>>2]+ +g[lf>>2];g[cc>>2]=+g[lf>>2]-+g[kf>>2];g[$a>>2]=+g[c[n>>2]>>2];g[nf>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[of>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*20<<2)>>2];g[Ya>>2]=+g[nf>>2]+ +g[of>>2];g[qf>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[rf>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*15<<2)>>2];g[Za>>2]=+g[qf>>2]+ +g[rf>>2];g[pf>>2]=+g[nf>>2]-+g[of>>2];g[ab>>2]=+g[Ya>>2]+ +g[Za>>2];g[sf>>2]=+g[qf>>2]-+g[rf>>2];g[tf>>2]=+g[pf>>2]*.9510565400123596+ +g[sf>>2]*.5877852439880371;g[Se>>2]=+g[$a>>2]+ +g[ab>>2];g[dc>>2]=+g[sf>>2]*.9510565400123596-+g[pf>>2]*.5877852439880371;g[_a>>2]=(+g[Ya>>2]-+g[Za>>2])*.55901700258255;g[bb>>2]=+g[$a>>2]-+g[ab>>2]*.25;g[cb>>2]=+g[_a>>2]+ +g[bb>>2];g[Zd>>2]=+g[bb>>2]-+g[_a>>2];g[xf>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[fa>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[Gf>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2<<2)>>2];g[Ca>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2<<2)>>2];g[yf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[zf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*21<<2)>>2];g[Af>>2]=+g[yf>>2]+ +g[zf>>2];g[Bf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[Cf>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<4<<2)>>2];g[Df>>2]=+g[Bf>>2]+ +g[Cf>>2];g[Ef>>2]=+g[Af>>2]+ +g[Df>>2];g[ka>>2]=+g[Bf>>2]-+g[Cf>>2];g[vf>>2]=(+g[Af>>2]-+g[Df>>2])*.55901700258255;g[ja>>2]=+g[yf>>2]-+g[zf>>2];g[w>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[x>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*21<<2)>>2];g[E>>2]=+g[w>>2]+ +g[x>>2];g[z>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[A>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<4<<2)>>2];g[da>>2]=+g[z>>2]+ +g[A>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[ga>>2]=+g[E>>2]+ +g[da>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[ea>>2]=(+g[E>>2]-+g[da>>2])*.55901700258255;g[Hf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[If>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*24<<2)>>2];g[Jf>>2]=+g[Hf>>2]+ +g[If>>2];g[Kf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*14<<2)>>2];g[Lf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*19<<2)>>2];g[Mf>>2]=+g[Kf>>2]+ +g[Lf>>2];g[Nf>>2]=+g[Jf>>2]+ +g[Mf>>2];g[J>>2]=+g[Kf>>2]-+g[Lf>>2];g[oa>>2]=(+g[Jf>>2]-+g[Mf>>2])*.55901700258255;g[I>>2]=+g[Hf>>2]-+g[If>>2];g[ra>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[sa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*24<<2)>>2];g[za>>2]=+g[ra>>2]+ +g[sa>>2];g[ua>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*14<<2)>>2];g[va>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*19<<2)>>2];g[Aa>>2]=+g[ua>>2]+ +g[va>>2];g[ta>>2]=+g[ra>>2]-+g[sa>>2];g[F>>2]=+g[za>>2]+ +g[Aa>>2];g[wa>>2]=+g[ua>>2]-+g[va>>2];g[Ba>>2]=(+g[za>>2]-+g[Aa>>2])*.55901700258255;g[Ff>>2]=+g[xf>>2]+ +g[Ef>>2];g[Of>>2]=+g[Gf>>2]+ +g[Nf>>2];g[Pf>>2]=+g[Ff>>2]+ +g[Of>>2];g[Ge>>2]=+g[fa>>2]+ +g[ga>>2];g[He>>2]=+g[Ca>>2]+ +g[F>>2];g[Pe>>2]=+g[Ge>>2]+ +g[He>>2];g[C>>2]=+g[y>>2]*.9510565400123596+ +g[B>>2]*.5877852439880371;g[gc>>2]=+g[B>>2]*.9510565400123596-+g[y>>2]*.5877852439880371;g[wf>>2]=+g[xf>>2]-+g[Ef>>2]*.25;g[v>>2]=+g[vf>>2]+ +g[wf>>2];g[fc>>2]=+g[wf>>2]-+g[vf>>2];g[D>>2]=+g[v>>2]+ +g[C>>2];g[kd>>2]=+g[fc>>2]+ +g[gc>>2];g[Rb>>2]=+g[v>>2]-+g[C>>2];g[hc>>2]=+g[fc>>2]-+g[gc>>2];g[K>>2]=+g[I>>2]*.9510565400123596+ +g[J>>2]*.5877852439880371;g[mc>>2]=+g[J>>2]*.9510565400123596-+g[I>>2]*.5877852439880371;g[G>>2]=+g[Ca>>2]-+g[F>>2]*.25;g[H>>2]=+g[Ba>>2]+ +g[G>>2];g[nc>>2]=+g[G>>2]-+g[Ba>>2];g[L>>2]=+g[H>>2]-+g[K>>2];g[nd>>2]=+g[nc>>2]-+g[mc>>2];g[Vb>>2]=+g[K>>2]+ +g[H>>2];g[oc>>2]=+g[mc>>2]+ +g[nc>>2];g[xa>>2]=+g[ta>>2]*.9510565400123596+ +g[wa>>2]*.5877852439880371;g[qc>>2]=+g[wa>>2]*.9510565400123596-+g[ta>>2]*.5877852439880371;g[pa>>2]=+g[Gf>>2]-+g[Nf>>2]*.25;g[qa>>2]=+g[oa>>2]+ +g[pa>>2];g[pc>>2]=+g[pa>>2]-+g[oa>>2];g[ya>>2]=+g[qa>>2]+ +g[xa>>2];g[od>>2]=+g[pc>>2]+ +g[qc>>2];g[Ub>>2]=+g[qa>>2]-+g[xa>>2];g[rc>>2]=+g[pc>>2]-+g[qc>>2];g[la>>2]=+g[ja>>2]*.9510565400123596+ +g[ka>>2]*.5877852439880371;g[ic>>2]=+g[ka>>2]*.9510565400123596-+g[ja>>2]*.5877852439880371;g[ha>>2]=+g[fa>>2]-+g[ga>>2]*.25;g[ia>>2]=+g[ea>>2]+ +g[ha>>2];g[jc>>2]=+g[ha>>2]-+g[ea>>2];g[ma>>2]=+g[ia>>2]-+g[la>>2];g[ld>>2]=+g[jc>>2]-+g[ic>>2];g[Sb>>2]=+g[la>>2]+ +g[ia>>2];g[kc>>2]=+g[ic>>2]+ +g[jc>>2];g[Qf>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[aa>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[_e>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[wb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[Rf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[Sf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*22<<2)>>2];g[Tf>>2]=+g[Rf>>2]+ +g[Sf>>2];g[Uf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*12<<2)>>2];g[Vf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*17<<2)>>2];g[Wf>>2]=+g[Uf>>2]+ +g[Vf>>2];g[Ye>>2]=+g[Tf>>2]+ +g[Wf>>2];g[Ga>>2]=+g[Uf>>2]-+g[Vf>>2];g[O>>2]=(+g[Tf>>2]-+g[Wf>>2])*.55901700258255;g[Fa>>2]=+g[Rf>>2]-+g[Sf>>2];g[R>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[S>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*22<<2)>>2];g[Z>>2]=+g[R>>2]+ +g[S>>2];g[U>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*12<<2)>>2];g[V>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*17<<2)>>2];g[_>>2]=+g[U>>2]+ +g[V>>2];g[T>>2]=+g[R>>2]-+g[S>>2];g[ba>>2]=+g[Z>>2]+ +g[_>>2];g[W>>2]=+g[U>>2]-+g[V>>2];g[$>>2]=(+g[Z>>2]-+g[_>>2])*.55901700258255;g[$e>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<3<<2)>>2];g[af>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*23<<2)>>2];g[bf>>2]=+g[$e>>2]+ +g[af>>2];g[cf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*13<<2)>>2];g[df>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*18<<2)>>2];g[ef>>2]=+g[cf>>2]+ +g[df>>2];g[ff>>2]=+g[bf>>2]+ +g[ef>>2];g[Bb>>2]=+g[cf>>2]-+g[df>>2];g[Ka>>2]=(+g[bf>>2]-+g[ef>>2])*.55901700258255;g[Ab>>2]=+g[$e>>2]-+g[af>>2];g[Na>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<3<<2)>>2];g[mb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*23<<2)>>2];g[tb>>2]=+g[Na>>2]+ +g[mb>>2];g[ob>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*13<<2)>>2];g[pb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*18<<2)>>2];g[ub>>2]=+g[ob>>2]+ +g[pb>>2];g[nb>>2]=+g[Na>>2]-+g[mb>>2];g[xb>>2]=+g[tb>>2]+ +g[ub>>2];g[qb>>2]=+g[ob>>2]-+g[pb>>2];g[vb>>2]=(+g[tb>>2]-+g[ub>>2])*.55901700258255;g[Ze>>2]=+g[Qf>>2]+ +g[Ye>>2];g[gf>>2]=+g[_e>>2]+ +g[ff>>2];g[hf>>2]=+g[Ze>>2]+ +g[gf>>2];g[Je>>2]=+g[aa>>2]+ +g[ba>>2];g[Ke>>2]=+g[wb>>2]+ +g[xb>>2];g[Qe>>2]=+g[Je>>2]+ +g[Ke>>2];g[X>>2]=+g[T>>2]*.9510565400123596+ +g[W>>2]*.5877852439880371;g[Wc>>2]=+g[W>>2]*.9510565400123596-+g[T>>2]*.5877852439880371;g[P>>2]=+g[Qf>>2]-+g[Ye>>2]*.25;g[Q>>2]=+g[O>>2]+ +g[P>>2];g[uc>>2]=+g[P>>2]-+g[O>>2];g[Y>>2]=+g[Q>>2]+ +g[X>>2];g[rd>>2]=+g[uc>>2]+ +g[Wc>>2];g[xc>>2]=+g[Q>>2]-+g[X>>2];g[Xc>>2]=+g[uc>>2]-+g[Wc>>2];g[Cb>>2]=+g[Ab>>2]*.9510565400123596+ +g[Bb>>2]*.5877852439880371;g[dd>>2]=+g[Bb>>2]*.9510565400123596-+g[Ab>>2]*.5877852439880371;g[yb>>2]=+g[wb>>2]-+g[xb>>2]*.25;g[zb>>2]=+g[vb>>2]+ +g[yb>>2];g[Ed>>2]=+g[yb>>2]-+g[vb>>2];g[Db>>2]=+g[zb>>2]-+g[Cb>>2];g[ud>>2]=+g[Ed>>2]-+g[dd>>2];g[zc>>2]=+g[Cb>>2]+ +g[zb>>2];g[Fd>>2]=+g[dd>>2]+ +g[Ed>>2];g[rb>>2]=+g[nb>>2]*.9510565400123596+ +g[qb>>2]*.5877852439880371;g[bd>>2]=+g[qb>>2]*.9510565400123596-+g[nb>>2]*.5877852439880371;g[La>>2]=+g[_e>>2]-+g[ff>>2]*.25;g[Ma>>2]=+g[Ka>>2]+ +g[La>>2];g[ad>>2]=+g[La>>2]-+g[Ka>>2];g[sb>>2]=+g[Ma>>2]+ +g[rb>>2];g[vd>>2]=+g[ad>>2]+ +g[bd>>2];g[Ac>>2]=+g[Ma>>2]-+g[rb>>2];g[cd>>2]=+g[ad>>2]-+g[bd>>2];g[Ha>>2]=+g[Fa>>2]*.9510565400123596+ +g[Ga>>2]*.5877852439880371;g[Yc>>2]=+g[Ga>>2]*.9510565400123596-+g[Fa>>2]*.5877852439880371;g[ca>>2]=+g[aa>>2]-+g[ba>>2]*.25;g[Ea>>2]=+g[$>>2]+ +g[ca>>2];g[Zc>>2]=+g[ca>>2]-+g[$>>2];g[Ia>>2]=+g[Ea>>2]-+g[Ha>>2];g[sd>>2]=+g[Zc>>2]-+g[Yc>>2];g[wc>>2]=+g[Ha>>2]+ +g[Ea>>2];g[_c>>2]=+g[Yc>>2]+ +g[Zc>>2];g[De>>2]=(+g[Pf>>2]-+g[hf>>2])*.55901700258255;g[jf>>2]=+g[Pf>>2]+ +g[hf>>2];g[Ee>>2]=+g[Xe>>2]-+g[jf>>2]*.25;g[Ie>>2]=+g[Ge>>2]-+g[He>>2];g[Le>>2]=+g[Je>>2]-+g[Ke>>2];g[Me>>2]=+g[Ie>>2]*.9510565400123596+ +g[Le>>2]*.5877852439880371;g[Oe>>2]=+g[Le>>2]*.9510565400123596-+g[Ie>>2]*.5877852439880371;g[c[o>>2]>>2]=+g[Xe>>2]+ +g[jf>>2];g[Ne>>2]=+g[Ee>>2]-+g[De>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Ne>>2]-+g[Oe>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[Ne>>2]+ +g[Oe>>2];g[Fe>>2]=+g[De>>2]+ +g[Ee>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*20<<2)>>2]=+g[Fe>>2]-+g[Me>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Fe>>2]+ +g[Me>>2];g[Re>>2]=(+g[Pe>>2]-+g[Qe>>2])*.55901700258255;g[Te>>2]=+g[Pe>>2]+ +g[Qe>>2];g[ne>>2]=+g[Se>>2]-+g[Te>>2]*.25;g[pe>>2]=+g[Ff>>2]-+g[Of>>2];g[qe>>2]=+g[Ze>>2]-+g[gf>>2];g[re>>2]=+g[pe>>2]*.9510565400123596+ +g[qe>>2]*.5877852439880371;g[se>>2]=+g[qe>>2]*.9510565400123596-+g[pe>>2]*.5877852439880371;g[c[p>>2]>>2]=+g[Se>>2]+ +g[Te>>2];g[te>>2]=+g[ne>>2]-+g[Re>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[se>>2]+ +g[te>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[te>>2]-+g[se>>2];g[oe>>2]=+g[Re>>2]+ +g[ne>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[oe>>2]-+g[re>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*20<<2)>>2]=+g[re>>2]+ +g[oe>>2];g[uf>>2]=+g[mf>>2]+ +g[tf>>2];g[gb>>2]=+g[cb>>2]-+g[fb>>2];g[na>>2]=+g[D>>2]*.9685831665992737+ +g[ma>>2]*.24868988990783691;g[M>>2]=+g[ya>>2]*.5358268022537231+ +g[L>>2]*.8443279266357422;g[N>>2]=+g[na>>2]+ +g[M>>2];g[Ja>>2]=+g[Y>>2]*.8763066530227661+ +g[Ia>>2]*.4817536771297455;g[Eb>>2]=+g[sb>>2]*.728968620300293+ +g[Db>>2]*.6845471262931824;g[Fb>>2]=+g[Ja>>2]+ +g[Eb>>2];g[Gb>>2]=+g[N>>2]+ +g[Fb>>2];g[lb>>2]=+g[Ja>>2]-+g[Eb>>2];g[Hb>>2]=(+g[N>>2]-+g[Fb>>2])*.55901700258255;g[kb>>2]=+g[na>>2]-+g[M>>2];g[Kb>>2]=+g[ma>>2]*.9685831665992737-+g[D>>2]*.24868988990783691;g[Lb>>2]=+g[L>>2]*.5358268022537231-+g[ya>>2]*.8443279266357422;g[Va>>2]=+g[Kb>>2]+ +g[Lb>>2];g[Pa>>2]=+g[Ia>>2]*.8763066530227661-+g[Y>>2]*.4817536771297455;g[Qa>>2]=+g[Db>>2]*.728968620300293-+g[sb>>2]*.6845471262931824;g[Wa>>2]=+g[Pa>>2]+ +g[Qa>>2];g[Oa>>2]=+g[Kb>>2]-+g[Lb>>2];g[hb>>2]=+g[Va>>2]+ +g[Wa>>2];g[Ra>>2]=+g[Pa>>2]-+g[Qa>>2];g[Xa>>2]=(+g[Va>>2]-+g[Wa>>2])*.55901700258255;g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[uf>>2]+ +g[Gb>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[gb>>2]+ +g[hb>>2];g[Sa>>2]=+g[Oa>>2]*.9510565400123596+ +g[Ra>>2]*.5877852439880371;g[Ua>>2]=+g[Ra>>2]*.9510565400123596-+g[Oa>>2]*.5877852439880371;g[Ib>>2]=+g[uf>>2]-+g[Gb>>2]*.25;g[Jb>>2]=+g[Hb>>2]+ +g[Ib>>2];g[Ta>>2]=+g[Ib>>2]-+g[Hb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*21<<2)>>2]=+g[Jb>>2]-+g[Sa>>2];g[(c[o>>2]|0)+(c[r>>2]<<4<<2)>>2]=+g[Ta>>2]+ +g[Ua>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Jb>>2]+ +g[Sa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Ta>>2]-+g[Ua>>2];g[Nb>>2]=+g[kb>>2]*.9510565400123596+ +g[lb>>2]*.5877852439880371;g[Ob>>2]=+g[lb>>2]*.9510565400123596-+g[kb>>2]*.5877852439880371;g[ib>>2]=+g[gb>>2]-+g[hb>>2]*.25;g[jb>>2]=+g[Xa>>2]+ +g[ib>>2];g[Pb>>2]=+g[ib>>2]-+g[Xa>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[jb>>2]-+g[Nb>>2];g[(c[p>>2]|0)+(c[r>>2]<<4<<2)>>2]=+g[Pb>>2]-+g[Ob>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*21<<2)>>2]=+g[Nb>>2]+ +g[jb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Ob>>2]+ +g[Pb>>2];g[jd>>2]=+g[cc>>2]+ +g[dc>>2];g[fe>>2]=+g[Zd>>2]-+g[Yd>>2];g[md>>2]=+g[kd>>2]*.728968620300293+ +g[ld>>2]*.6845471262931824;g[pd>>2]=+g[nd>>2]*.12533323466777802-+g[od>>2]*.9921147227287292;g[qd>>2]=+g[md>>2]+ +g[pd>>2];g[td>>2]=+g[rd>>2]*.06279052048921585+ +g[sd>>2]*.9980267286300659;g[wd>>2]=+g[ud>>2]*.7705132365226746-+g[vd>>2]*.6374239921569824;g[xd>>2]=+g[td>>2]+ +g[wd>>2];g[yd>>2]=+g[qd>>2]+ +g[xd>>2];g[ve>>2]=(+g[qd>>2]-+g[xd>>2])*.55901700258255;g[je>>2]=+g[md>>2]-+g[pd>>2];g[ke>>2]=+g[td>>2]-+g[wd>>2];g[zd>>2]=+g[ld>>2]*.728968620300293-+g[kd>>2]*.6845471262931824;g[Ad>>2]=+g[od>>2]*.12533323466777802+ +g[nd>>2]*.9921147227287292;g[Bd>>2]=+g[zd>>2]-+g[Ad>>2];g[Cd>>2]=+g[sd>>2]*.06279052048921585-+g[rd>>2]*.9980267286300659;g[Dd>>2]=+g[vd>>2]*.7705132365226746+ +g[ud>>2]*.6374239921569824;g[de>>2]=+g[Cd>>2]-+g[Dd>>2];g[ee>>2]=(+g[Bd>>2]-+g[de>>2])*.55901700258255;g[ze>>2]=+g[Cd>>2]+ +g[Dd>>2];g[ge>>2]=+g[Bd>>2]+ +g[de>>2];g[ye>>2]=+g[zd>>2]+ +g[Ad>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[jd>>2]+ +g[yd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[fe>>2]+ +g[ge>>2];g[le>>2]=+g[je>>2]*.9510565400123596+ +g[ke>>2]*.5877852439880371;g[me>>2]=+g[ke>>2]*.9510565400123596-+g[je>>2]*.5877852439880371;g[he>>2]=+g[fe>>2]-+g[ge>>2]*.25;g[ie>>2]=+g[ee>>2]+ +g[he>>2];g[ue>>2]=+g[he>>2]-+g[ee>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[ie>>2]-+g[le>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*18<<2)>>2]=+g[ue>>2]-+g[me>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*23<<2)>>2]=+g[le>>2]+ +g[ie>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[me>>2]+ +g[ue>>2];g[Ae>>2]=+g[ye>>2]*.9510565400123596+ +g[ze>>2]*.5877852439880371;g[Ce>>2]=+g[ze>>2]*.9510565400123596-+g[ye>>2]*.5877852439880371;g[we>>2]=+g[jd>>2]-+g[yd>>2]*.25;g[xe>>2]=+g[ve>>2]+ +g[we>>2];g[Be>>2]=+g[we>>2]-+g[ve>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*23<<2)>>2]=+g[xe>>2]-+g[Ae>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*18<<2)>>2]=+g[Be>>2]+ +g[Ce>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[xe>>2]+ +g[Ae>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Be>>2]-+g[Ce>>2];g[ec>>2]=+g[cc>>2]-+g[dc>>2];g[_d>>2]=+g[Yd>>2]+ +g[Zd>>2];g[lc>>2]=+g[hc>>2]*.8763066530227661+ +g[kc>>2]*.4817536771297455;g[sc>>2]=+g[oc>>2]*.9048270583152771-+g[rc>>2]*.4257792830467224;g[tc>>2]=+g[lc>>2]+ +g[sc>>2];g[$c>>2]=+g[Xc>>2]*.5358268022537231+ +g[_c>>2]*.8443279266357422;g[Gd>>2]=+g[cd>>2]*.06279052048921585+ +g[Fd>>2]*.9980267286300659;g[Hd>>2]=+g[$c>>2]+ +g[Gd>>2];g[Id>>2]=+g[tc>>2]+ +g[Hd>>2];g[fd>>2]=+g[$c>>2]-+g[Gd>>2];g[Jd>>2]=(+g[tc>>2]-+g[Hd>>2])*.55901700258255;g[ed>>2]=+g[lc>>2]-+g[sc>>2];g[Md>>2]=+g[kc>>2]*.8763066530227661-+g[hc>>2]*.4817536771297455;g[Nd>>2]=+g[rc>>2]*.9048270583152771+ +g[oc>>2]*.4257792830467224;g[Vd>>2]=+g[Md>>2]-+g[Nd>>2];g[Pd>>2]=+g[_c>>2]*.5358268022537231-+g[Xc>>2]*.8443279266357422;g[Qd>>2]=+g[Fd>>2]*.06279052048921585-+g[cd>>2]*.9980267286300659;g[Wd>>2]=+g[Pd>>2]+ +g[Qd>>2];g[Od>>2]=+g[Md>>2]+ +g[Nd>>2];g[$d>>2]=+g[Vd>>2]+ +g[Wd>>2];g[Rd>>2]=+g[Pd>>2]-+g[Qd>>2];g[Xd>>2]=(+g[Vd>>2]-+g[Wd>>2])*.55901700258255;g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ec>>2]+ +g[Id>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[_d>>2]+ +g[$d>>2];g[Sd>>2]=+g[Od>>2]*.9510565400123596+ +g[Rd>>2]*.5877852439880371;g[Ud>>2]=+g[Rd>>2]*.9510565400123596-+g[Od>>2]*.5877852439880371;g[Kd>>2]=+g[ec>>2]-+g[Id>>2]*.25;g[Ld>>2]=+g[Jd>>2]+ +g[Kd>>2];g[Td>>2]=+g[Kd>>2]-+g[Jd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*22<<2)>>2]=+g[Ld>>2]-+g[Sd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*17<<2)>>2]=+g[Td>>2]+ +g[Ud>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Ld>>2]+ +g[Sd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Td>>2]-+g[Ud>>2];g[gd>>2]=+g[ed>>2]*.9510565400123596+ +g[fd>>2]*.5877852439880371;g[hd>>2]=+g[fd>>2]*.9510565400123596-+g[ed>>2]*.5877852439880371;g[ae>>2]=+g[_d>>2]-+g[$d>>2]*.25;g[be>>2]=+g[Xd>>2]+ +g[ae>>2];g[id>>2]=+g[ae>>2]-+g[Xd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[be>>2]-+g[gd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*17<<2)>>2]=+g[id>>2]-+g[hd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*22<<2)>>2]=+g[gd>>2]+ +g[be>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[hd>>2]+ +g[id>>2];g[Qb>>2]=+g[mf>>2]-+g[tf>>2];g[Ec>>2]=+g[fb>>2]+ +g[cb>>2];g[Tb>>2]=+g[Rb>>2]*.5358268022537231+ +g[Sb>>2]*.8443279266357422;g[Wb>>2]=+g[Ub>>2]*.6374239921569824+ +g[Vb>>2]*.7705132365226746;g[vc>>2]=+g[Tb>>2]-+g[Wb>>2];g[yc>>2]=+g[wc>>2]*.9048270583152771-+g[xc>>2]*.4257792830467224;g[Bc>>2]=+g[zc>>2]*.12533323466777802-+g[Ac>>2]*.9921147227287292;g[Cc>>2]=+g[yc>>2]+ +g[Bc>>2];g[Dc>>2]=+g[vc>>2]+ +g[Cc>>2];g[Uc>>2]=(+g[vc>>2]-+g[Cc>>2])*.55901700258255;g[Mc>>2]=+g[yc>>2]-+g[Bc>>2];g[Nc>>2]=+g[Tb>>2]+ +g[Wb>>2];g[Fc>>2]=+g[Sb>>2]*.5358268022537231-+g[Rb>>2]*.8443279266357422;g[Gc>>2]=+g[Ub>>2]*.7705132365226746-+g[Vb>>2]*.6374239921569824;g[Hc>>2]=+g[Fc>>2]+ +g[Gc>>2];g[Ic>>2]=+g[xc>>2]*.9048270583152771+ +g[wc>>2]*.4257792830467224;g[Jc>>2]=+g[Ac>>2]*.12533323466777802+ +g[zc>>2]*.9921147227287292;g[Kc>>2]=+g[Ic>>2]+ +g[Jc>>2];g[Lc>>2]=+g[Hc>>2]-+g[Kc>>2];g[_b>>2]=+g[Jc>>2]-+g[Ic>>2];g[Qc>>2]=(+g[Hc>>2]+ +g[Kc>>2])*.55901700258255;g[Zb>>2]=+g[Fc>>2]-+g[Gc>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Qb>>2]+ +g[Dc>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ec>>2]+ +g[Lc>>2];g[Oc>>2]=+g[Mc>>2]*.9510565400123596-+g[Nc>>2]*.5877852439880371;g[Tc>>2]=+g[Nc>>2]*.9510565400123596+ +g[Mc>>2]*.5877852439880371;g[Pc>>2]=+g[Ec>>2]-+g[Lc>>2]*.25;g[Rc>>2]=+g[Pc>>2]-+g[Qc>>2];g[Sc>>2]=+g[Pc>>2]+ +g[Qc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Oc>>2]+ +g[Rc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*24<<2)>>2]=+g[Tc>>2]+ +g[Sc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*19<<2)>>2]=+g[Rc>>2]-+g[Oc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Sc>>2]-+g[Tc>>2];g[$b>>2]=+g[Zb>>2]*.9510565400123596+ +g[_b>>2]*.5877852439880371;g[bc>>2]=+g[_b>>2]*.9510565400123596-+g[Zb>>2]*.5877852439880371;g[Xb>>2]=+g[Qb>>2]-+g[Dc>>2]*.25;g[Yb>>2]=+g[Uc>>2]+ +g[Xb>>2];g[ac>>2]=+g[Xb>>2]-+g[Uc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*24<<2)>>2]=+g[Yb>>2]-+g[$b>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*19<<2)>>2]=+g[ac>>2]+ +g[bc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Yb>>2]+ +g[$b>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[ac>>2]-+g[bc>>2];c[Xf>>2]=(c[Xf>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=c[q>>2]^c[2998];c[r>>2]=c[r>>2]^c[2998]}i=Yf;return}function vi(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,10,584);i=b;return}function wi(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0;z=i;i=i+64|0;m=z+52|0;n=z+48|0;o=z+44|0;p=z+40|0;q=z+36|0;r=z+32|0;A=z+28|0;s=z+24|0;t=z+20|0;y=z+16|0;u=z+12|0;v=z+8|0;w=z+4|0;x=z;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[A>>2]=j;c[s>>2]=k;c[t>>2]=l;c[y>>2]=c[A>>2];while(1){if((c[y>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[v>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[u>>2]-+g[v>>2];g[c[o>>2]>>2]=+g[u>>2]+ +g[v>>2];g[w>>2]=+g[c[n>>2]>>2];g[x>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[w>>2]-+g[x>>2];g[c[p>>2]>>2]=+g[w>>2]+ +g[x>>2];c[y>>2]=(c[y>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2)}i=z;return}function xi(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,11,648);i=b;return}function yi(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0;qg=i;i=i+1568|0;m=qg+1552|0;n=qg+1548|0;o=qg+1544|0;p=qg+1540|0;q=qg+1536|0;r=qg+1532|0;rg=qg+1528|0;s=qg+1524|0;t=qg+1520|0;pg=qg+1488|0;of=qg+1484|0;Vd=qg+1480|0;Dd=qg+1476|0;D=qg+1472|0;Ca=qg+1468|0;Oc=qg+1464|0;oc=qg+1460|0;fb=qg+1456|0;Vf=qg+1452|0;ia=qg+1448|0;de=qg+1444|0;Wd=qg+1440|0;ib=qg+1436|0;Pc=qg+1432|0;L=qg+1428|0;pc=qg+1424|0;bg=qg+1420|0;qa=qg+1416|0;T=qg+1412|0;lb=qg+1408|0;Tc=qg+1404|0;sc=qg+1400|0;_d=qg+1396|0;ge=qg+1392|0;ig=qg+1388|0;xa=qg+1384|0;_=qg+1380|0;kb=qg+1376|0;Yb=qg+1372|0;rc=qg+1368|0;be=qg+1364|0;fe=qg+1360|0;Fb=qg+1356|0;gc=qg+1352|0;jc=qg+1348|0;Ya=qg+1344|0;If=qg+1340|0;Pf=qg+1336|0;se=qg+1332|0;te=qg+1328|0;ue=qg+1324|0;ve=qg+1320|0;vd=qg+1316|0;We=qg+1312|0;Sa=qg+1308|0;kc=qg+1304|0;Ad=qg+1300|0;Xe=qg+1296|0;$a=qg+1292|0;hc=qg+1288|0;Ga=qg+1284|0;$b=qg+1280|0;cc=qg+1276|0;vb=qg+1272|0;tf=qg+1268|0;Af=qg+1264|0;ne=qg+1260|0;oe=qg+1256|0;pe=qg+1252|0;qe=qg+1248|0;kd=qg+1244|0;Te=qg+1240|0;pb=qg+1236|0;ac=qg+1232|0;pd=qg+1228|0;Ue=qg+1224|0;yb=qg+1220|0;dc=qg+1216|0;Mb=qg+1212|0;Aa=qg+1208|0;z=qg+1204|0;eb=qg+1200|0;mf=qg+1196|0;db=qg+1192|0;C=qg+1188|0;Ba=qg+1184|0;u=qg+1180|0;Da=qg+1176|0;x=qg+1172|0;y=qg+1168|0;Vc=qg+1164|0;ce=qg+1160|0;A=qg+1156|0;B=qg+1152|0;Rf=qg+1148|0;G=qg+1144|0;ea=qg+1140|0;F=qg+1136|0;Uf=qg+1132|0;I=qg+1128|0;ha=qg+1124|0;J=qg+1120|0;pf=qg+1116|0;qf=qg+1112|0;E=qg+1108|0;da=qg+1104|0;Sf=qg+1100|0;Tf=qg+1096|0;fa=qg+1092|0;ga=qg+1088|0;gb=qg+1084|0;hb=qg+1080|0;H=qg+1076|0;K=qg+1072|0;Zf=qg+1068|0;Q=qg+1064|0;ma=qg+1060|0;O=qg+1056|0;ag=qg+1052|0;N=qg+1048|0;pa=qg+1044|0;R=qg+1040|0;P=qg+1036|0;S=qg+1032|0;Xf=qg+1028|0;Yf=qg+1024|0;ka=qg+1020|0;la=qg+1016|0;_f=qg+1012|0;$f=qg+1008|0;na=qg+1004|0;oa=qg+1e3|0;Rc=qg+996|0;Sc=qg+992|0;Yd=qg+988|0;Zd=qg+984|0;eg=qg+980|0;X=qg+976|0;ta=qg+972|0;V=qg+968|0;hg=qg+964|0;U=qg+960|0;wa=qg+956|0;Y=qg+952|0;W=qg+948|0;Z=qg+944|0;cg=qg+940|0;dg=qg+936|0;ra=qg+932|0;sa=qg+928|0;fg=qg+924|0;gg=qg+920|0;ua=qg+916|0;va=qg+912|0;Uc=qg+908|0;Xb=qg+904|0;$d=qg+900|0;ae=qg+896|0;Ef=qg+892|0;Bb=qg+888|0;Xa=qg+884|0;wd=qg+880|0;Hf=qg+876|0;Ua=qg+872|0;Eb=qg+868|0;xd=qg+864|0;Lf=qg+860|0;Jb=qg+856|0;Ib=qg+852|0;sd=qg+848|0;Of=qg+844|0;Lb=qg+840|0;Qa=qg+836|0;td=qg+832|0;Cf=qg+828|0;Df=qg+824|0;Va=qg+820|0;Wa=qg+816|0;Ff=qg+812|0;Gf=qg+808|0;Cb=qg+804|0;Db=qg+800|0;Jf=qg+796|0;Kf=qg+792|0;Gb=qg+788|0;Hb=qg+784|0;Mf=qg+780|0;Nf=qg+776|0;Oa=qg+772|0;Pa=qg+768|0;rd=qg+764|0;ud=qg+760|0;Kb=qg+756|0;Ra=qg+752|0;yd=qg+748|0;zd=qg+744|0;Za=qg+740|0;_a=qg+736|0;ng=qg+732|0;rb=qg+728|0;Fa=qg+724|0;gd=qg+720|0;sf=qg+716|0;ba=qg+712|0;ub=qg+708|0;hd=qg+704|0;wf=qg+700|0;Ma=qg+696|0;nb=qg+692|0;md=qg+688|0;zf=qg+684|0;Ha=qg+680|0;Ka=qg+676|0;nd=qg+672|0;lg=qg+668|0;mg=qg+664|0;ca=qg+660|0;Ea=qg+656|0;og=qg+652|0;rf=qg+648|0;sb=qg+644|0;tb=qg+640|0;uf=qg+636|0;vf=qg+632|0;Na=qg+628|0;mb=qg+624|0;xf=qg+620|0;yf=qg+616|0;Ia=qg+612|0;Ja=qg+608|0;id=qg+604|0;jd=qg+600|0;La=qg+596|0;ob=qg+592|0;ld=qg+588|0;od=qg+584|0;wb=qg+580|0;xb=qg+576|0;fd=qg+572|0;ke=qg+568|0;Ne=qg+564|0;Pe=qg+560|0;Cd=qg+556|0;je=qg+552|0;ie=qg+548|0;Oe=qg+544|0;Xd=qg+540|0;ed=qg+536|0;le=qg+532|0;me=qg+528|0;qd=qg+524|0;Bd=qg+520|0;ee=qg+516|0;he=qg+512|0;Se=qg+508|0;cf=qg+504|0;ff=qg+500|0;hf=qg+496|0;Ze=qg+492|0;bf=qg+488|0;af=qg+484|0;gf=qg+480|0;Qe=qg+476|0;Re=qg+472|0;df=qg+468|0;ef=qg+464|0;Ve=qg+460|0;Ye=qg+456|0;_e=qg+452|0;$e=qg+448|0;lf=qg+444|0;Ce=qg+440|0;Fe=qg+436|0;He=qg+432|0;xe=qg+428|0;Be=qg+424|0;Ae=qg+420|0;Ge=qg+416|0;jf=qg+412|0;kf=qg+408|0;De=qg+404|0;Ee=qg+400|0;re=qg+396|0;we=qg+392|0;ye=qg+388|0;ze=qg+384|0;kg=qg+380|0;Ie=qg+376|0;Le=qg+372|0;nf=qg+368|0;v=qg+364|0;w=qg+360|0;za=qg+356|0;Me=qg+352|0;Wf=qg+348|0;jg=qg+344|0;Je=qg+340|0;Ke=qg+336|0;Bf=qg+332|0;Qf=qg+328|0;ja=qg+324|0;ya=qg+320|0;aa=qg+316|0;Qb=qg+312|0;Ob=qg+308|0;Ub=qg+304|0;Ab=qg+300|0;Rb=qg+296|0;bb=qg+292|0;Sb=qg+288|0;M=qg+284|0;$=qg+280|0;jb=qg+276|0;Nb=qg+272|0;qb=qg+268|0;zb=qg+264|0;Ta=qg+260|0;ab=qg+256|0;cb=qg+252|0;Vb=qg+248|0;Pb=qg+244|0;Tb=qg+240|0;_b=qg+236|0;Xc=qg+232|0;uc=qg+228|0;$c=qg+224|0;fc=qg+220|0;Yc=qg+216|0;mc=qg+212|0;Zc=qg+208|0;Qc=qg+204|0;Zb=qg+200|0;qc=qg+196|0;tc=qg+192|0;bc=qg+188|0;ec=qg+184|0;ic=qg+180|0;lc=qg+176|0;nc=qg+172|0;ad=qg+168|0;Wc=qg+164|0;_c=qg+160|0;dd=qg+156|0;Pd=qg+152|0;Nd=qg+148|0;Td=qg+144|0;Gd=qg+140|0;Qd=qg+136|0;Jd=qg+132|0;Rd=qg+128|0;bd=qg+124|0;cd=qg+120|0;Ld=qg+116|0;Md=qg+112|0;Ed=qg+108|0;Fd=qg+104|0;Hd=qg+100|0;Id=qg+96|0;Kd=qg+92|0;Ud=qg+88|0;Od=qg+84|0;Sd=qg+80|0;wc=qg+76|0;Ic=qg+72|0;Gc=qg+68|0;Mc=qg+64|0;zc=qg+60|0;Jc=qg+56|0;Cc=qg+52|0;Kc=qg+48|0;Wb=qg+44|0;vc=qg+40|0;Ec=qg+36|0;Fc=qg+32|0;xc=qg+28|0;yc=qg+24|0;Ac=qg+20|0;Bc=qg+16|0;Dc=qg+12|0;Nc=qg+8|0;Hc=qg+4|0;Lc=qg;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[rg>>2]=j;c[s>>2]=k;c[t>>2]=l;g[qg+1516>>2]=.8314695954322815;g[qg+1512>>2]=.5555702447891235;g[qg+1508>>2]=.19509032368659973;g[qg+1504>>2]=.9807852506637573;g[qg+1500>>2]=.9238795042037964;g[qg+1496>>2]=.3826834261417389;g[qg+1492>>2]=.7071067690849304;c[pg>>2]=c[rg>>2];while(1){if((c[pg>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<4<<2)>>2];g[Mb>>2]=+g[u>>2]+ +g[Da>>2];g[Aa>>2]=+g[u>>2]-+g[Da>>2];g[x>>2]=+g[c[n>>2]>>2];g[y>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<4<<2)>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[eb>>2]=+g[x>>2]-+g[y>>2];g[Vc>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<3<<2)>>2];g[ce>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*24<<2)>>2];g[mf>>2]=+g[Vc>>2]+ +g[ce>>2];g[db>>2]=+g[Vc>>2]-+g[ce>>2];g[A>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<3<<2)>>2];g[B>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*24<<2)>>2];g[C>>2]=+g[A>>2]+ +g[B>>2];g[Ba>>2]=+g[A>>2]-+g[B>>2];g[of>>2]=+g[Mb>>2]+ +g[mf>>2];g[Vd>>2]=+g[Mb>>2]-+g[mf>>2];g[Dd>>2]=+g[z>>2]-+g[C>>2];g[D>>2]=+g[z>>2]+ +g[C>>2];g[Ca>>2]=+g[Aa>>2]-+g[Ba>>2];g[Oc>>2]=+g[Aa>>2]+ +g[Ba>>2];g[oc>>2]=+g[eb>>2]-+g[db>>2];g[fb>>2]=+g[db>>2]+ +g[eb>>2];g[pf>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2<<2)>>2];g[qf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*20<<2)>>2];g[Rf>>2]=+g[pf>>2]+ +g[qf>>2];g[G>>2]=+g[pf>>2]-+g[qf>>2];g[E>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2<<2)>>2];g[da>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*20<<2)>>2];g[ea>>2]=+g[E>>2]+ +g[da>>2];g[F>>2]=+g[E>>2]-+g[da>>2];g[Sf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*28<<2)>>2];g[Tf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*12<<2)>>2];g[Uf>>2]=+g[Sf>>2]+ +g[Tf>>2];g[I>>2]=+g[Sf>>2]-+g[Tf>>2];g[fa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*28<<2)>>2];g[ga>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*12<<2)>>2];g[ha>>2]=+g[fa>>2]+ +g[ga>>2];g[J>>2]=+g[fa>>2]-+g[ga>>2];g[Vf>>2]=+g[Rf>>2]+ +g[Uf>>2];g[ia>>2]=+g[ea>>2]+ +g[ha>>2];g[de>>2]=+g[Uf>>2]-+g[Rf>>2];g[Wd>>2]=+g[ea>>2]-+g[ha>>2];g[gb>>2]=+g[I>>2]-+g[J>>2];g[hb>>2]=+g[G>>2]+ +g[F>>2];g[ib>>2]=(+g[gb>>2]-+g[hb>>2])*.7071067690849304;g[Pc>>2]=(+g[hb>>2]+ +g[gb>>2])*.7071067690849304;g[H>>2]=+g[F>>2]-+g[G>>2];g[K>>2]=+g[I>>2]+ +g[J>>2];g[L>>2]=(+g[H>>2]-+g[K>>2])*.7071067690849304;g[pc>>2]=(+g[H>>2]+ +g[K>>2])*.7071067690849304;g[Xf>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[Yf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*18<<2)>>2];g[Zf>>2]=+g[Xf>>2]+ +g[Yf>>2];g[Q>>2]=+g[Xf>>2]-+g[Yf>>2];g[ka>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[la>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*18<<2)>>2];g[ma>>2]=+g[ka>>2]+ +g[la>>2];g[O>>2]=+g[ka>>2]-+g[la>>2];g[_f>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[$f>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*26<<2)>>2];g[ag>>2]=+g[_f>>2]+ +g[$f>>2];g[N>>2]=+g[_f>>2]-+g[$f>>2];g[na>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[oa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*26<<2)>>2];g[pa>>2]=+g[na>>2]+ +g[oa>>2];g[R>>2]=+g[na>>2]-+g[oa>>2];g[bg>>2]=+g[Zf>>2]+ +g[ag>>2];g[qa>>2]=+g[ma>>2]+ +g[pa>>2];g[P>>2]=+g[N>>2]+ +g[O>>2];g[S>>2]=+g[Q>>2]-+g[R>>2];g[T>>2]=+g[P>>2]*.3826834261417389-+g[S>>2]*.9238795042037964;g[lb>>2]=+g[P>>2]*.9238795042037964+ +g[S>>2]*.3826834261417389;g[Rc>>2]=+g[O>>2]-+g[N>>2];g[Sc>>2]=+g[Q>>2]+ +g[R>>2];g[Tc>>2]=+g[Rc>>2]*.9238795042037964-+g[Sc>>2]*.3826834261417389;g[sc>>2]=+g[Rc>>2]*.3826834261417389+ +g[Sc>>2]*.9238795042037964;g[Yd>>2]=+g[ma>>2]-+g[pa>>2];g[Zd>>2]=+g[Zf>>2]-+g[ag>>2];g[_d>>2]=+g[Yd>>2]-+g[Zd>>2];g[ge>>2]=+g[Zd>>2]+ +g[Yd>>2];g[cg>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*30<<2)>>2];g[dg>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*14<<2)>>2];g[eg>>2]=+g[cg>>2]+ +g[dg>>2];g[X>>2]=+g[cg>>2]-+g[dg>>2];g[ra>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*30<<2)>>2];g[sa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*14<<2)>>2];g[ta>>2]=+g[ra>>2]+ +g[sa>>2];g[V>>2]=+g[ra>>2]-+g[sa>>2];g[fg>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[gg>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*22<<2)>>2];g[hg>>2]=+g[fg>>2]+ +g[gg>>2];g[U>>2]=+g[fg>>2]-+g[gg>>2];g[ua>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[va>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*22<<2)>>2];g[wa>>2]=+g[ua>>2]+ +g[va>>2];g[Y>>2]=+g[ua>>2]-+g[va>>2];g[ig>>2]=+g[eg>>2]+ +g[hg>>2];g[xa>>2]=+g[ta>>2]+ +g[wa>>2];g[W>>2]=+g[U>>2]+ +g[V>>2];g[Z>>2]=+g[X>>2]-+g[Y>>2];g[_>>2]=+g[W>>2]*.3826834261417389+ +g[Z>>2]*.9238795042037964;g[kb>>2]=+g[Z>>2]*.3826834261417389-+g[W>>2]*.9238795042037964;g[Uc>>2]=+g[V>>2]-+g[U>>2];g[Xb>>2]=+g[X>>2]+ +g[Y>>2];g[Yb>>2]=+g[Uc>>2]*.9238795042037964+ +g[Xb>>2]*.3826834261417389;g[rc>>2]=+g[Xb>>2]*.9238795042037964-+g[Uc>>2]*.3826834261417389;g[$d>>2]=+g[eg>>2]-+g[hg>>2];g[ae>>2]=+g[ta>>2]-+g[wa>>2];g[be>>2]=+g[$d>>2]+ +g[ae>>2];g[fe>>2]=+g[$d>>2]-+g[ae>>2];g[Cf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*31<<2)>>2];g[Df>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*15<<2)>>2];g[Ef>>2]=+g[Cf>>2]+ +g[Df>>2];g[Bb>>2]=+g[Cf>>2]-+g[Df>>2];g[Va>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*31<<2)>>2];g[Wa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*15<<2)>>2];g[Xa>>2]=+g[Va>>2]-+g[Wa>>2];g[wd>>2]=+g[Va>>2]+ +g[Wa>>2];g[Ff>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[Gf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*23<<2)>>2];g[Hf>>2]=+g[Ff>>2]+ +g[Gf>>2];g[Ua>>2]=+g[Ff>>2]-+g[Gf>>2];g[Cb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[Db>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*23<<2)>>2];g[Eb>>2]=+g[Cb>>2]-+g[Db>>2];g[xd>>2]=+g[Cb>>2]+ +g[Db>>2];g[Jf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[Kf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*19<<2)>>2];g[Lf>>2]=+g[Jf>>2]+ +g[Kf>>2];g[Jb>>2]=+g[Jf>>2]-+g[Kf>>2];g[Gb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[Hb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*19<<2)>>2];g[Ib>>2]=+g[Gb>>2]-+g[Hb>>2];g[sd>>2]=+g[Gb>>2]+ +g[Hb>>2];g[Mf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*27<<2)>>2];g[Nf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[Of>>2]=+g[Mf>>2]+ +g[Nf>>2];g[Lb>>2]=+g[Mf>>2]-+g[Nf>>2];g[Oa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*27<<2)>>2];g[Pa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[Qa>>2]=+g[Oa>>2]-+g[Pa>>2];g[td>>2]=+g[Oa>>2]+ +g[Pa>>2];g[Fb>>2]=+g[Bb>>2]-+g[Eb>>2];g[gc>>2]=+g[Bb>>2]+ +g[Eb>>2];g[jc>>2]=+g[Xa>>2]-+g[Ua>>2];g[Ya>>2]=+g[Ua>>2]+ +g[Xa>>2];g[If>>2]=+g[Ef>>2]+ +g[Hf>>2];g[Pf>>2]=+g[Lf>>2]+ +g[Of>>2];g[se>>2]=+g[If>>2]-+g[Pf>>2];g[te>>2]=+g[wd>>2]+ +g[xd>>2];g[ue>>2]=+g[sd>>2]+ +g[td>>2];g[ve>>2]=+g[te>>2]-+g[ue>>2];g[rd>>2]=+g[Ef>>2]-+g[Hf>>2];g[ud>>2]=+g[sd>>2]-+g[td>>2];g[vd>>2]=+g[rd>>2]-+g[ud>>2];g[We>>2]=+g[rd>>2]+ +g[ud>>2];g[Kb>>2]=+g[Ib>>2]-+g[Jb>>2];g[Ra>>2]=+g[Lb>>2]+ +g[Qa>>2];g[Sa>>2]=(+g[Kb>>2]-+g[Ra>>2])*.7071067690849304;g[kc>>2]=(+g[Kb>>2]+ +g[Ra>>2])*.7071067690849304;g[yd>>2]=+g[wd>>2]-+g[xd>>2];g[zd>>2]=+g[Of>>2]-+g[Lf>>2];g[Ad>>2]=+g[yd>>2]-+g[zd>>2];g[Xe>>2]=+g[zd>>2]+ +g[yd>>2];g[Za>>2]=+g[Lb>>2]-+g[Qa>>2];g[_a>>2]=+g[Jb>>2]+ +g[Ib>>2];g[$a>>2]=(+g[Za>>2]-+g[_a>>2])*.7071067690849304;g[hc>>2]=(+g[_a>>2]+ +g[Za>>2])*.7071067690849304;g[lg>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[mg>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*17<<2)>>2];g[ng>>2]=+g[lg>>2]+ +g[mg>>2];g[rb>>2]=+g[lg>>2]-+g[mg>>2];g[ca>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[Ea>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*17<<2)>>2];g[Fa>>2]=+g[ca>>2]-+g[Ea>>2];g[gd>>2]=+g[ca>>2]+ +g[Ea>>2];g[og>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[rf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*25<<2)>>2];g[sf>>2]=+g[og>>2]+ +g[rf>>2];g[ba>>2]=+g[og>>2]-+g[rf>>2];g[sb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[tb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*25<<2)>>2];g[ub>>2]=+g[sb>>2]-+g[tb>>2];g[hd>>2]=+g[sb>>2]+ +g[tb>>2];g[uf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[vf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*21<<2)>>2];g[wf>>2]=+g[uf>>2]+ +g[vf>>2];g[Ma>>2]=+g[uf>>2]-+g[vf>>2];g[Na>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[mb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*21<<2)>>2];g[nb>>2]=+g[Na>>2]-+g[mb>>2];g[md>>2]=+g[Na>>2]+ +g[mb>>2];g[xf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*29<<2)>>2];g[yf>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*13<<2)>>2];g[zf>>2]=+g[xf>>2]+ +g[yf>>2];g[Ha>>2]=+g[xf>>2]-+g[yf>>2];g[Ia>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*29<<2)>>2];g[Ja>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*13<<2)>>2];g[Ka>>2]=+g[Ia>>2]-+g[Ja>>2];g[nd>>2]=+g[Ia>>2]+ +g[Ja>>2];g[Ga>>2]=+g[ba>>2]+ +g[Fa>>2];g[$b>>2]=+g[rb>>2]+ +g[ub>>2];g[cc>>2]=+g[Fa>>2]-+g[ba>>2];g[vb>>2]=+g[rb>>2]-+g[ub>>2];g[tf>>2]=+g[ng>>2]+ +g[sf>>2];g[Af>>2]=+g[wf>>2]+ +g[zf>>2];g[ne>>2]=+g[tf>>2]-+g[Af>>2];g[oe>>2]=+g[gd>>2]+ +g[hd>>2];g[pe>>2]=+g[md>>2]+ +g[nd>>2];g[qe>>2]=+g[oe>>2]-+g[pe>>2];g[id>>2]=+g[gd>>2]-+g[hd>>2];g[jd>>2]=+g[zf>>2]-+g[wf>>2];g[kd>>2]=+g[id>>2]-+g[jd>>2];g[Te>>2]=+g[jd>>2]+ +g[id>>2];g[La>>2]=+g[Ha>>2]-+g[Ka>>2];g[ob>>2]=+g[Ma>>2]+ +g[nb>>2];g[pb>>2]=(+g[La>>2]-+g[ob>>2])*.7071067690849304;g[ac>>2]=(+g[ob>>2]+ +g[La>>2])*.7071067690849304;g[ld>>2]=+g[ng>>2]-+g[sf>>2];g[od>>2]=+g[md>>2]-+g[nd>>2];g[pd>>2]=+g[ld>>2]-+g[od>>2];g[Ue>>2]=+g[ld>>2]+ +g[od>>2];g[wb>>2]=+g[nb>>2]-+g[Ma>>2];g[xb>>2]=+g[Ha>>2]+ +g[Ka>>2];g[yb>>2]=(+g[wb>>2]-+g[xb>>2])*.7071067690849304;g[dc>>2]=(+g[wb>>2]+ +g[xb>>2])*.7071067690849304;g[Xd>>2]=+g[Vd>>2]-+g[Wd>>2];g[ed>>2]=(+g[_d>>2]-+g[be>>2])*.7071067690849304;g[fd>>2]=+g[Xd>>2]+ +g[ed>>2];g[ke>>2]=+g[Xd>>2]-+g[ed>>2];g[le>>2]=+g[kd>>2]*.3826834261417389-+g[pd>>2]*.9238795042037964;g[me>>2]=+g[Ad>>2]*.3826834261417389+ +g[vd>>2]*.9238795042037964;g[Ne>>2]=+g[le>>2]-+g[me>>2];g[Pe>>2]=+g[le>>2]+ +g[me>>2];g[qd>>2]=+g[kd>>2]*.9238795042037964+ +g[pd>>2]*.3826834261417389;g[Bd>>2]=+g[vd>>2]*.3826834261417389-+g[Ad>>2]*.9238795042037964;g[Cd>>2]=+g[qd>>2]+ +g[Bd>>2];g[je>>2]=+g[Bd>>2]-+g[qd>>2];g[ee>>2]=+g[Dd>>2]-+g[de>>2];g[he>>2]=(+g[fe>>2]-+g[ge>>2])*.7071067690849304;g[ie>>2]=+g[ee>>2]-+g[he>>2];g[Oe>>2]=+g[ee>>2]+ +g[he>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*22<<2)>>2]=+g[fd>>2]-+g[Cd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*22<<2)>>2]=+g[Oe>>2]-+g[Pe>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[fd>>2]+ +g[Cd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Oe>>2]+ +g[Pe>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*30<<2)>>2]=+g[ie>>2]-+g[je>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*30<<2)>>2]=+g[ke>>2]-+g[Ne>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[ie>>2]+ +g[je>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[ke>>2]+ +g[Ne>>2];g[Qe>>2]=+g[Vd>>2]+ +g[Wd>>2];g[Re>>2]=(+g[ge>>2]+ +g[fe>>2])*.7071067690849304;g[Se>>2]=+g[Qe>>2]+ +g[Re>>2];g[cf>>2]=+g[Qe>>2]-+g[Re>>2];g[df>>2]=+g[Te>>2]*.9238795042037964-+g[Ue>>2]*.3826834261417389;g[ef>>2]=+g[Xe>>2]*.9238795042037964+ +g[We>>2]*.3826834261417389;g[ff>>2]=+g[df>>2]-+g[ef>>2];g[hf>>2]=+g[df>>2]+ +g[ef>>2];g[Ve>>2]=+g[Te>>2]*.3826834261417389+ +g[Ue>>2]*.9238795042037964;g[Ye>>2]=+g[We>>2]*.9238795042037964-+g[Xe>>2]*.3826834261417389;g[Ze>>2]=+g[Ve>>2]+ +g[Ye>>2];g[bf>>2]=+g[Ye>>2]-+g[Ve>>2];g[_e>>2]=+g[de>>2]+ +g[Dd>>2];g[$e>>2]=(+g[_d>>2]+ +g[be>>2])*.7071067690849304;g[af>>2]=+g[_e>>2]-+g[$e>>2];g[gf>>2]=+g[_e>>2]+ +g[$e>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*18<<2)>>2]=+g[Se>>2]-+g[Ze>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*18<<2)>>2]=+g[gf>>2]-+g[hf>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Se>>2]+ +g[Ze>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[gf>>2]+ +g[hf>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*26<<2)>>2]=+g[af>>2]-+g[bf>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*26<<2)>>2]=+g[cf>>2]-+g[ff>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[af>>2]+ +g[bf>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[cf>>2]+ +g[ff>>2];g[jf>>2]=+g[of>>2]-+g[Vf>>2];g[kf>>2]=+g[qa>>2]-+g[xa>>2];g[lf>>2]=+g[jf>>2]+ +g[kf>>2];g[Ce>>2]=+g[jf>>2]-+g[kf>>2];g[De>>2]=+g[qe>>2]-+g[ne>>2];g[Ee>>2]=+g[se>>2]+ +g[ve>>2];g[Fe>>2]=(+g[De>>2]-+g[Ee>>2])*.7071067690849304;g[He>>2]=(+g[De>>2]+ +g[Ee>>2])*.7071067690849304;g[re>>2]=+g[ne>>2]+ +g[qe>>2];g[we>>2]=+g[se>>2]-+g[ve>>2];g[xe>>2]=(+g[re>>2]+ +g[we>>2])*.7071067690849304;g[Be>>2]=(+g[we>>2]-+g[re>>2])*.7071067690849304;g[ye>>2]=+g[D>>2]-+g[ia>>2];g[ze>>2]=+g[ig>>2]-+g[bg>>2];g[Ae>>2]=+g[ye>>2]-+g[ze>>2];g[Ge>>2]=+g[ze>>2]+ +g[ye>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*20<<2)>>2]=+g[lf>>2]-+g[xe>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*20<<2)>>2]=+g[Ge>>2]-+g[He>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[lf>>2]+ +g[xe>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ge>>2]+ +g[He>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*28<<2)>>2]=+g[Ae>>2]-+g[Be>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*28<<2)>>2]=+g[Ce>>2]-+g[Fe>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Ae>>2]+ +g[Be>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Ce>>2]+ +g[Fe>>2];g[Wf>>2]=+g[of>>2]+ +g[Vf>>2];g[jg>>2]=+g[bg>>2]+ +g[ig>>2];g[kg>>2]=+g[Wf>>2]+ +g[jg>>2];g[Ie>>2]=+g[Wf>>2]-+g[jg>>2];g[Je>>2]=+g[oe>>2]+ +g[pe>>2];g[Ke>>2]=+g[te>>2]+ +g[ue>>2];g[Le>>2]=+g[Je>>2]-+g[Ke>>2];g[nf>>2]=+g[Je>>2]+ +g[Ke>>2];g[Bf>>2]=+g[tf>>2]+ +g[Af>>2];g[Qf>>2]=+g[If>>2]+ +g[Pf>>2];g[v>>2]=+g[Bf>>2]+ +g[Qf>>2];g[w>>2]=+g[Qf>>2]-+g[Bf>>2];g[ja>>2]=+g[D>>2]+ +g[ia>>2];g[ya>>2]=+g[qa>>2]+ +g[xa>>2];g[za>>2]=+g[ja>>2]-+g[ya>>2];g[Me>>2]=+g[ja>>2]+ +g[ya>>2];g[(c[o>>2]|0)+(c[r>>2]<<4<<2)>>2]=+g[kg>>2]-+g[v>>2];g[(c[p>>2]|0)+(c[r>>2]<<4<<2)>>2]=+g[Me>>2]-+g[nf>>2];g[c[o>>2]>>2]=+g[kg>>2]+ +g[v>>2];g[c[p>>2]>>2]=+g[Me>>2]+ +g[nf>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[w>>2]+ +g[za>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Ie>>2]+ +g[Le>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*24<<2)>>2]=+g[za>>2]-+g[w>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*24<<2)>>2]=+g[Ie>>2]-+g[Le>>2];g[M>>2]=+g[Ca>>2]-+g[L>>2];g[$>>2]=+g[T>>2]-+g[_>>2];g[aa>>2]=+g[M>>2]+ +g[$>>2];g[Qb>>2]=+g[M>>2]-+g[$>>2];g[jb>>2]=+g[fb>>2]-+g[ib>>2];g[Nb>>2]=+g[kb>>2]-+g[lb>>2];g[Ob>>2]=+g[jb>>2]-+g[Nb>>2];g[Ub>>2]=+g[jb>>2]+ +g[Nb>>2];g[qb>>2]=+g[Ga>>2]-+g[pb>>2];g[zb>>2]=+g[vb>>2]-+g[yb>>2];g[Ab>>2]=+g[qb>>2]*.9807852506637573+ +g[zb>>2]*.19509032368659973;g[Rb>>2]=+g[qb>>2]*.19509032368659973-+g[zb>>2]*.9807852506637573;g[Ta>>2]=+g[Fb>>2]-+g[Sa>>2];g[ab>>2]=+g[Ya>>2]-+g[$a>>2];g[bb>>2]=+g[Ta>>2]*.19509032368659973-+g[ab>>2]*.9807852506637573;g[Sb>>2]=+g[ab>>2]*.19509032368659973+ +g[Ta>>2]*.9807852506637573;g[cb>>2]=+g[Ab>>2]+ +g[bb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*23<<2)>>2]=+g[aa>>2]-+g[cb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[aa>>2]+ +g[cb>>2];g[Vb>>2]=+g[Rb>>2]+ +g[Sb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*23<<2)>>2]=+g[Ub>>2]-+g[Vb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Ub>>2]+ +g[Vb>>2];g[Pb>>2]=+g[bb>>2]-+g[Ab>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*31<<2)>>2]=+g[Ob>>2]-+g[Pb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[Ob>>2]+ +g[Pb>>2];g[Tb>>2]=+g[Rb>>2]-+g[Sb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*31<<2)>>2]=+g[Qb>>2]-+g[Tb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[Qb>>2]+ +g[Tb>>2];g[Qc>>2]=+g[Oc>>2]-+g[Pc>>2];g[Zb>>2]=+g[Tc>>2]-+g[Yb>>2];g[_b>>2]=+g[Qc>>2]+ +g[Zb>>2];g[Xc>>2]=+g[Qc>>2]-+g[Zb>>2];g[qc>>2]=+g[oc>>2]-+g[pc>>2];g[tc>>2]=+g[rc>>2]-+g[sc>>2];g[uc>>2]=+g[qc>>2]-+g[tc>>2];g[$c>>2]=+g[qc>>2]+ +g[tc>>2];g[bc>>2]=+g[$b>>2]-+g[ac>>2];g[ec>>2]=+g[cc>>2]-+g[dc>>2];g[fc>>2]=+g[bc>>2]*.5555702447891235+ +g[ec>>2]*.8314695954322815;g[Yc>>2]=+g[ec>>2]*.5555702447891235-+g[bc>>2]*.8314695954322815;g[ic>>2]=+g[gc>>2]-+g[hc>>2];g[lc>>2]=+g[jc>>2]-+g[kc>>2];g[mc>>2]=+g[ic>>2]*.5555702447891235-+g[lc>>2]*.8314695954322815;g[Zc>>2]=+g[ic>>2]*.8314695954322815+ +g[lc>>2]*.5555702447891235;g[nc>>2]=+g[fc>>2]+ +g[mc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*21<<2)>>2]=+g[_b>>2]-+g[nc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[_b>>2]+ +g[nc>>2];g[ad>>2]=+g[Yc>>2]+ +g[Zc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*21<<2)>>2]=+g[$c>>2]-+g[ad>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[$c>>2]+ +g[ad>>2];g[Wc>>2]=+g[mc>>2]-+g[fc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*29<<2)>>2]=+g[uc>>2]-+g[Wc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[uc>>2]+ +g[Wc>>2];g[_c>>2]=+g[Yc>>2]-+g[Zc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*29<<2)>>2]=+g[Xc>>2]-+g[_c>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Xc>>2]+ +g[_c>>2];g[bd>>2]=+g[Oc>>2]+ +g[Pc>>2];g[cd>>2]=+g[sc>>2]+ +g[rc>>2];g[dd>>2]=+g[bd>>2]+ +g[cd>>2];g[Pd>>2]=+g[bd>>2]-+g[cd>>2];g[Ld>>2]=+g[oc>>2]+ +g[pc>>2];g[Md>>2]=+g[Tc>>2]+ +g[Yb>>2];g[Nd>>2]=+g[Ld>>2]-+g[Md>>2];g[Td>>2]=+g[Ld>>2]+ +g[Md>>2];g[Ed>>2]=+g[$b>>2]+ +g[ac>>2];g[Fd>>2]=+g[cc>>2]+ +g[dc>>2];g[Gd>>2]=+g[Ed>>2]*.9807852506637573+ +g[Fd>>2]*.19509032368659973;g[Qd>>2]=+g[Fd>>2]*.9807852506637573-+g[Ed>>2]*.19509032368659973;g[Hd>>2]=+g[gc>>2]+ +g[hc>>2];g[Id>>2]=+g[jc>>2]+ +g[kc>>2];g[Jd>>2]=+g[Hd>>2]*.9807852506637573-+g[Id>>2]*.19509032368659973;g[Rd>>2]=+g[Hd>>2]*.19509032368659973+ +g[Id>>2]*.9807852506637573;g[Kd>>2]=+g[Gd>>2]+ +g[Jd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*17<<2)>>2]=+g[dd>>2]-+g[Kd>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[dd>>2]+ +g[Kd>>2];g[Ud>>2]=+g[Qd>>2]+ +g[Rd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*17<<2)>>2]=+g[Td>>2]-+g[Ud>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Td>>2]+ +g[Ud>>2];g[Od>>2]=+g[Jd>>2]-+g[Gd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*25<<2)>>2]=+g[Nd>>2]-+g[Od>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Nd>>2]+ +g[Od>>2];g[Sd>>2]=+g[Qd>>2]-+g[Rd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*25<<2)>>2]=+g[Pd>>2]-+g[Sd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Pd>>2]+ +g[Sd>>2];g[Wb>>2]=+g[Ca>>2]+ +g[L>>2];g[vc>>2]=+g[lb>>2]+ +g[kb>>2];g[wc>>2]=+g[Wb>>2]+ +g[vc>>2];g[Ic>>2]=+g[Wb>>2]-+g[vc>>2];g[Ec>>2]=+g[fb>>2]+ +g[ib>>2];g[Fc>>2]=+g[T>>2]+ +g[_>>2];g[Gc>>2]=+g[Ec>>2]-+g[Fc>>2];g[Mc>>2]=+g[Ec>>2]+ +g[Fc>>2];g[xc>>2]=+g[Ga>>2]+ +g[pb>>2];g[yc>>2]=+g[vb>>2]+ +g[yb>>2];g[zc>>2]=+g[xc>>2]*.5555702447891235+ +g[yc>>2]*.8314695954322815;g[Jc>>2]=+g[xc>>2]*.8314695954322815-+g[yc>>2]*.5555702447891235;g[Ac>>2]=+g[Fb>>2]+ +g[Sa>>2];g[Bc>>2]=+g[Ya>>2]+ +g[$a>>2];g[Cc>>2]=+g[Ac>>2]*.8314695954322815-+g[Bc>>2]*.5555702447891235;g[Kc>>2]=+g[Bc>>2]*.8314695954322815+ +g[Ac>>2]*.5555702447891235;g[Dc>>2]=+g[zc>>2]+ +g[Cc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*19<<2)>>2]=+g[wc>>2]-+g[Dc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[wc>>2]+ +g[Dc>>2];g[Nc>>2]=+g[Jc>>2]+ +g[Kc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*19<<2)>>2]=+g[Mc>>2]-+g[Nc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Mc>>2]+ +g[Nc>>2];g[Hc>>2]=+g[Cc>>2]-+g[zc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*27<<2)>>2]=+g[Gc>>2]-+g[Hc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Gc>>2]+ +g[Hc>>2];g[Lc>>2]=+g[Jc>>2]-+g[Kc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*27<<2)>>2]=+g[Ic>>2]-+g[Lc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Ic>>2]+ +g[Lc>>2];c[pg>>2]=(c[pg>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=c[q>>2]^c[2998];c[r>>2]=c[r>>2]^c[2998]}i=qg;return}function zi(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,12,712);i=b;return}function Ai(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0;H=i;i=i+96|0;m=H+92|0;n=H+88|0;o=H+84|0;p=H+80|0;q=H+76|0;r=H+72|0;I=H+68|0;s=H+64|0;t=H+60|0;G=H+48|0;u=H+44|0;D=H+40|0;x=H+36|0;C=H+32|0;B=H+28|0;E=H+24|0;y=H+20|0;F=H+16|0;v=H+12|0;w=H+8|0;z=H+4|0;A=H;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[I>>2]=j;c[s>>2]=k;c[t>>2]=l;g[H+56>>2]=.5;g[H+52>>2]=.8660253882408142;c[G>>2]=c[I>>2];while(1){if((c[G>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[D>>2]=+g[c[n>>2]>>2];g[v>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[w>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[x>>2]=+g[v>>2]+ +g[w>>2];g[C>>2]=(+g[w>>2]-+g[v>>2])*.8660253882408142;g[z>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[A>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[B>>2]=(+g[z>>2]-+g[A>>2])*.8660253882408142;g[E>>2]=+g[z>>2]+ +g[A>>2];g[c[o>>2]>>2]=+g[u>>2]+ +g[x>>2];g[c[p>>2]>>2]=+g[D>>2]+ +g[E>>2];g[y>>2]=+g[u>>2]-+g[x>>2]*.5;g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[y>>2]-+g[B>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[y>>2]+ +g[B>>2];g[F>>2]=+g[D>>2]-+g[E>>2]*.5;g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[C>>2]+ +g[F>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[F>>2]-+g[C>>2];c[G>>2]=(c[G>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2)}i=H;return}function Bi(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,13,776);i=b;return}function Ci(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0;L=i;i=i+112|0;m=L+100|0;n=L+96|0;o=L+92|0;p=L+88|0;q=L+84|0;r=L+80|0;M=L+76|0;s=L+72|0;t=L+68|0;K=L+64|0;w=L+60|0;E=L+56|0;C=L+52|0;I=L+48|0;z=L+44|0;D=L+40|0;H=L+36|0;J=L+32|0;u=L+28|0;v=L+24|0;A=L+20|0;B=L+16|0;x=L+12|0;y=L+8|0;F=L+4|0;G=L;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[M>>2]=j;c[s>>2]=k;c[t>>2]=l;c[K>>2]=c[M>>2];while(1){if((c[K>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[v>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[w>>2]=+g[u>>2]+ +g[v>>2];g[E>>2]=+g[u>>2]-+g[v>>2];g[A>>2]=+g[c[n>>2]>>2];g[B>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[C>>2]=+g[A>>2]-+g[B>>2];g[I>>2]=+g[A>>2]+ +g[B>>2];g[x>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[y>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[D>>2]=+g[x>>2]-+g[y>>2];g[F>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[G>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[J>>2]=+g[F>>2]+ +g[G>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[w>>2]-+g[z>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[I>>2]-+g[J>>2];g[c[o>>2]>>2]=+g[w>>2]+ +g[z>>2];g[c[p>>2]>>2]=+g[I>>2]+ +g[J>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[C>>2]-+g[D>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[E>>2]+ +g[H>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[D>>2]+ +g[C>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[E>>2]-+g[H>>2];c[K>>2]=(c[K>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2)}i=L;return}function Di(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,14,840);i=b;return}function Ei(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0;$=i;i=i+192|0;m=$+180|0;n=$+176|0;o=$+172|0;p=$+168|0;q=$+164|0;r=$+160|0;aa=$+156|0;s=$+152|0;t=$+148|0;_=$+128|0;u=$+124|0;R=$+120|0;B=$+116|0;W=$+112|0;C=$+108|0;V=$+104|0;H=$+100|0;S=$+96|0;K=$+92|0;Q=$+88|0;v=$+84|0;w=$+80|0;x=$+76|0;y=$+72|0;z=$+68|0;A=$+64|0;F=$+60|0;G=$+56|0;O=$+52|0;I=$+48|0;J=$+44|0;P=$+40|0;L=$+36|0;N=$+32|0;E=$+28|0;M=$+24|0;D=$+20|0;X=$+16|0;Y=$+12|0;U=$+8|0;Z=$+4|0;T=$;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[aa>>2]=j;c[s>>2]=k;c[t>>2]=l;g[$+144>>2]=.25;g[$+140>>2]=.5877852439880371;g[$+136>>2]=.9510565400123596;g[$+132>>2]=.55901700258255;c[_>>2]=c[aa>>2];while(1){if((c[_>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[R>>2]=+g[c[n>>2]>>2];g[v>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[w>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2<<2)>>2];g[x>>2]=+g[v>>2]+ +g[w>>2];g[y>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[z>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[A>>2]=+g[y>>2]+ +g[z>>2];g[B>>2]=+g[x>>2]+ +g[A>>2];g[W>>2]=+g[y>>2]-+g[z>>2];g[C>>2]=(+g[x>>2]-+g[A>>2])*.55901700258255;g[V>>2]=+g[v>>2]-+g[w>>2];g[F>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[G>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2<<2)>>2];g[O>>2]=+g[F>>2]+ +g[G>>2];g[I>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[J>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[P>>2]=+g[I>>2]+ +g[J>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[S>>2]=+g[O>>2]+ +g[P>>2];g[K>>2]=+g[I>>2]-+g[J>>2];g[Q>>2]=(+g[O>>2]-+g[P>>2])*.55901700258255;g[c[o>>2]>>2]=+g[u>>2]+ +g[B>>2];g[c[p>>2]>>2]=+g[R>>2]+ +g[S>>2];g[L>>2]=+g[H>>2]*.9510565400123596+ +g[K>>2]*.5877852439880371;g[N>>2]=+g[K>>2]*.9510565400123596-+g[H>>2]*.5877852439880371;g[D>>2]=+g[u>>2]-+g[B>>2]*.25;g[E>>2]=+g[C>>2]+ +g[D>>2];g[M>>2]=+g[D>>2]-+g[C>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[E>>2]-+g[L>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[M>>2]+ +g[N>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[E>>2]+ +g[L>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[M>>2]-+g[N>>2];g[X>>2]=+g[V>>2]*.9510565400123596+ +g[W>>2]*.5877852439880371;g[Y>>2]=+g[W>>2]*.9510565400123596-+g[V>>2]*.5877852439880371;g[T>>2]=+g[R>>2]-+g[S>>2]*.25;g[U>>2]=+g[Q>>2]+ +g[T>>2];g[Z>>2]=+g[T>>2]-+g[Q>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[U>>2]-+g[X>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Z>>2]-+g[Y>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[X>>2]+ +g[U>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Y>>2]+ +g[Z>>2];c[_>>2]=(c[_>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=c[q>>2]^c[2998];c[r>>2]=c[r>>2]^c[2998]}i=$;return}function Fi(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,15,904);i=b;return}function Gi(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0,Ii=0,Ji=0,Ki=0,Li=0,Mi=0,Ni=0,Oi=0,Pi=0,Qi=0,Ri=0,Si=0,Ti=0,Ui=0,Vi=0,Wi=0,Xi=0,Yi=0,Zi=0,_i=0,$i=0,aj=0,bj=0,cj=0,dj=0,ej=0,fj=0,gj=0,hj=0,ij=0,jj=0,kj=0,lj=0,mj=0,nj=0,oj=0,pj=0,qj=0,rj=0,sj=0,tj=0,uj=0,vj=0,wj=0,xj=0,yj=0,zj=0,Aj=0,Bj=0,Cj=0,Dj=0,Ej=0,Fj=0,Gj=0,Hj=0,Ij=0,Jj=0,Kj=0,Lj=0,Mj=0,Nj=0,Oj=0,Pj=0,Qj=0,Rj=0,Sj=0,Tj=0,Uj=0,Vj=0,Wj=0,Xj=0,Yj=0,Zj=0,_j=0,$j=0,ak=0,bk=0,ck=0,dk=0,ek=0,fk=0,gk=0,hk=0,ik=0,jk=0,kk=0,lk=0,mk=0,nk=0,ok=0,pk=0,qk=0,rk=0,sk=0,tk=0,uk=0,vk=0,wk=0,xk=0,yk=0,zk=0,Ak=0,Bk=0,Ck=0,Dk=0,Ek=0,Fk=0,Gk=0,Hk=0,Ik=0,Jk=0,Kk=0,Lk=0,Mk=0,Nk=0,Ok=0,Pk=0,Qk=0,Rk=0,Sk=0,Tk=0,Uk=0,Vk=0,Wk=0,Xk=0,Yk=0,Zk=0,_k=0,$k=0,al=0,bl=0,cl=0,dl=0,el=0,fl=0,gl=0,hl=0,il=0,jl=0,kl=0,ll=0,ml=0,nl=0,ol=0,pl=0,ql=0,rl=0,sl=0,tl=0,ul=0,vl=0,wl=0,xl=0,yl=0,zl=0,Al=0,Bl=0,Cl=0,Dl=0,El=0,Fl=0,Gl=0,Hl=0,Il=0,Jl=0,Kl=0,Ll=0,Ml=0,Nl=0,Ol=0,Pl=0,Ql=0,Rl=0,Sl=0,Tl=0,Ul=0,Vl=0,Wl=0,Xl=0,Yl=0,Zl=0,_l=0,$l=0,am=0,bm=0,cm=0,dm=0,em=0,fm=0,gm=0,hm=0,im=0,jm=0,km=0,lm=0,mm=0,nm=0,om=0,pm=0,qm=0,rm=0,sm=0,tm=0,um=0,vm=0,wm=0,xm=0,ym=0,zm=0,Am=0,Bm=0,Cm=0,Dm=0,Em=0,Fm=0,Gm=0,Hm=0,Im=0,Jm=0,Km=0,Lm=0,Mm=0,Nm=0,Om=0,Pm=0,Qm=0,Rm=0,Sm=0,Tm=0,Um=0,Vm=0,Wm=0,Xm=0,Ym=0,Zm=0,_m=0,$m=0,an=0,bn=0,cn=0,dn=0,en=0,fn=0,gn=0,hn=0,jn=0,kn=0,ln=0,mn=0,nn=0,on=0,pn=0,qn=0,rn=0,sn=0,tn=0,un=0,vn=0,wn=0,xn=0,yn=0,zn=0,An=0,Bn=0,Cn=0,Dn=0,En=0,Fn=0,Gn=0,Hn=0,In=0,Jn=0,Kn=0,Ln=0,Mn=0,Nn=0,On=0,Pn=0,Qn=0,Rn=0,Sn=0,Tn=0,Un=0,Vn=0,Wn=0,Xn=0,Yn=0,Zn=0,_n=0,$n=0,ao=0,bo=0,co=0,eo=0,fo=0,go=0,ho=0,io=0,jo=0,ko=0,lo=0,mo=0,no=0,oo=0,po=0,qo=0,ro=0,so=0,to=0,uo=0,vo=0,wo=0,xo=0,yo=0,zo=0,Ao=0,Bo=0,Co=0,Do=0,Eo=0,Fo=0,Go=0,Ho=0,Io=0,Jo=0,Ko=0,Lo=0,Mo=0,No=0,Oo=0,Po=0,Qo=0,Ro=0,So=0,To=0,Uo=0,Vo=0,Wo=0,Xo=0,Yo=0,Zo=0,_o=0,$o=0,ap=0,bp=0,cp=0,dp=0,ep=0,fp=0,gp=0,hp=0,ip=0,jp=0,kp=0,lp=0,mp=0,np=0,op=0,pp=0,qp=0,rp=0,sp=0,tp=0,up=0,vp=0,wp=0,xp=0,yp=0,zp=0,Ap=0,Bp=0,Cp=0,Dp=0,Ep=0,Fp=0,Gp=0,Hp=0,Ip=0,Jp=0,Kp=0,Lp=0,Mp=0,Np=0,Op=0,Pp=0,Qp=0,Rp=0,Sp=0,Tp=0,Up=0,Vp=0,Wp=0,Xp=0,Yp=0,Zp=0,_p=0,$p=0,aq=0,bq=0,cq=0,dq=0,eq=0,fq=0,gq=0,hq=0,iq=0,jq=0,kq=0,lq=0,mq=0,nq=0,oq=0,pq=0,qq=0,rq=0,sq=0,tq=0;sq=i;i=i+3760|0;m=sq+3744|0;n=sq+3740|0;o=sq+3736|0;p=sq+3732|0;q=sq+3728|0;r=sq+3724|0;tq=sq+3720|0;s=sq+3716|0;t=sq+3712|0;rq=sq+3648|0;Ub=sq+3644|0;Hg=sq+3640|0;Uh=sq+3636|0;Me=sq+3632|0;Yp=sq+3628|0;fo=sq+3624|0;Pl=sq+3620|0;Ym=sq+3616|0;pf=sq+3612|0;Ig=sq+3608|0;ub=sq+3604|0;no=sq+3600|0;kl=sq+3596|0;xn=sq+3592|0;zc=sq+3588|0;Vh=sq+3584|0;lq=sq+3580|0;oo=sq+3576|0;nl=sq+3572|0;Rl=sq+3568|0;ql=sq+3564|0;Ql=sq+3560|0;Jb=sq+3556|0;Go=sq+3552|0;Hc=sq+3548|0;sf=sq+3544|0;Mg=sq+3540|0;Yh=sq+3536|0;Pg=sq+3532|0;Xh=sq+3528|0;Oc=sq+3524|0;rf=sq+3520|0;ek=sq+3516|0;Jo=sq+3512|0;vl=sq+3508|0;An=sq+3504|0;yl=sq+3500|0;Bn=sq+3496|0;$a=sq+3492|0;Io=sq+3488|0;bc=sq+3484|0;Df=sq+3480|0;Ug=sq+3476|0;Gj=sq+3472|0;Xg=sq+3468|0;Hj=sq+3464|0;ic=sq+3460|0;Ef=sq+3456|0;Q=sq+3452|0;$o=sq+3448|0;Em=sq+3444|0;Sn=sq+3440|0;io=sq+3436|0;Mp=sq+3432|0;We=sq+3428|0;Sf=sq+3424|0;De=sq+3420|0;Vf=sq+3416|0;Li=sq+3412|0;_i=sq+3408|0;nm=sq+3404|0;Pn=sq+3400|0;Ai=sq+3396|0;Vj=sq+3392|0;tk=sq+3388|0;Lo=sq+3384|0;Cl=sq+3380|0;Dn=sq+3376|0;Hk=sq+3372|0;En=sq+3368|0;Pb=sq+3364|0;Mo=sq+3360|0;uc=sq+3356|0;Gf=sq+3352|0;$g=sq+3348|0;Jj=sq+3344|0;ch=sq+3340|0;Kj=sq+3336|0;ad=sq+3332|0;Hf=sq+3328|0;ja=sq+3324|0;Vo=sq+3320|0;Fl=sq+3316|0;In=sq+3312|0;So=sq+3308|0;Hp=sq+3304|0;Ud=sq+3300|0;Of=sq+3296|0;Bd=sq+3292|0;Lf=sq+3288|0;si=sq+3284|0;Oj=sq+3280|0;Pk=sq+3276|0;Ln=sq+3272|0;Jh=sq+3268|0;Rj=sq+3264|0;ya=sq+3260|0;To=sq+3256|0;Il=sq+3252|0;Mn=sq+3248|0;Yo=sq+3244|0;Ip=sq+3240|0;rd=sq+3236|0;Mf=sq+3232|0;de=sq+3228|0;Pf=sq+3224|0;vi=sq+3220|0;Sj=sq+3216|0;_k=sq+3212|0;Jn=sq+3208|0;oi=sq+3204|0;Pj=sq+3200|0;Ea=sq+3196|0;jo=sq+3192|0;Hm=sq+3188|0;Qn=sq+3184|0;cp=sq+3180|0;Np=sq+3176|0;te=sq+3172|0;Wf=sq+3168|0;Ge=sq+3164|0;Tf=sq+3160|0;Qh=sq+3156|0;Yi=sq+3152|0;ym=sq+3148|0;Tn=sq+3144|0;Hi=sq+3140|0;$i=sq+3136|0;Mb=sq+3132|0;Sb=sq+3128|0;Ka=sq+3124|0;Le=sq+3120|0;mf=sq+3116|0;Ke=sq+3112|0;Na=sq+3108|0;Tb=sq+3104|0;uk=sq+3100|0;Wb=sq+3096|0;pb=sq+3092|0;Vb=sq+3088|0;Wn=sq+3084|0;wc=sq+3080|0;sb=sq+3076|0;xc=sq+3072|0;u=sq+3068|0;Da=sq+3064|0;Ia=sq+3060|0;Ja=sq+3056|0;Vc=sq+3052|0;ce=sq+3048|0;La=sq+3044|0;Ma=sq+3040|0;Eh=sq+3036|0;Ni=sq+3032|0;nb=sq+3028|0;ob=sq+3024|0;Dl=sq+3020|0;Mm=sq+3016|0;qb=sq+3012|0;rb=sq+3008|0;vg=sq+3004|0;ep=sq+3e3|0;mb=sq+2996|0;tb=sq+2992|0;Lm=sq+2988|0;Ol=sq+2984|0;nf=sq+2980|0;of=sq+2976|0;il=sq+2972|0;jl=sq+2968|0;vc=sq+2964|0;yc=sq+2960|0;$p=sq+2956|0;Ec=sq+2952|0;xb=sq+2948|0;Cc=sq+2944|0;cq=sq+2940|0;Bc=sq+2936|0;Ab=sq+2932|0;Fc=sq+2928|0;gq=sq+2924|0;Lc=sq+2920|0;Eb=sq+2916|0;Jc=sq+2912|0;jq=sq+2908|0;Ic=sq+2904|0;Hb=sq+2900|0;Mc=sq+2896|0;Zp=sq+2892|0;_p=sq+2888|0;vb=sq+2884|0;wb=sq+2880|0;aq=sq+2876|0;bq=sq+2872|0;yb=sq+2868|0;zb=sq+2864|0;eq=sq+2860|0;fq=sq+2856|0;Cb=sq+2852|0;Db=sq+2848|0;hq=sq+2844|0;iq=sq+2840|0;Fb=sq+2836|0;Gb=sq+2832|0;dq=sq+2828|0;kq=sq+2824|0;ll=sq+2820|0;ml=sq+2816|0;ol=sq+2812|0;pl=sq+2808|0;Bb=sq+2804|0;Ib=sq+2800|0;Dc=sq+2796|0;Gc=sq+2792|0;Kg=sq+2788|0;Lg=sq+2784|0;Ng=sq+2780|0;Og=sq+2776|0;Kc=sq+2772|0;Nc=sq+2768|0;pq=sq+2764|0;cc=sq+2760|0;Pa=sq+2756|0;Sc=sq+2752|0;Xj=sq+2748|0;Rc=sq+2744|0;Sa=sq+2740|0;dc=sq+2736|0;ck=sq+2732|0;gc=sq+2728|0;Za=sq+2724|0;Yb=sq+2720|0;$j=sq+2716|0;fc=sq+2712|0;Wa=sq+2708|0;$b=sq+2704|0;nq=sq+2700|0;oq=sq+2696|0;Qa=sq+2692|0;Ra=sq+2688|0;Lb=sq+2684|0;Oa=sq+2680|0;qq=sq+2676|0;Wj=sq+2672|0;ak=sq+2668|0;bk=sq+2664|0;Uc=sq+2660|0;Xa=sq+2656|0;Ya=sq+2652|0;Xb=sq+2648|0;Zj=sq+2644|0;_j=sq+2640|0;Zb=sq+2636|0;Ua=sq+2632|0;Va=sq+2628|0;_b=sq+2624|0;Yj=sq+2620|0;dk=sq+2616|0;tl=sq+2612|0;ul=sq+2608|0;wl=sq+2604|0;xl=sq+2600|0;Ta=sq+2596|0;_a=sq+2592|0;Tc=sq+2588|0;ac=sq+2584|0;Sg=sq+2580|0;Tg=sq+2576|0;Vg=sq+2572|0;Wg=sq+2568|0;ec=sq+2564|0;hc=sq+2560|0;Ca=sq+2556|0;ge=sq+2552|0;ye=sq+2548|0;Am=sq+2544|0;H=sq+2540|0;ve=sq+2536|0;je=sq+2532|0;Bm=sq+2528|0;O=sq+2524|0;Nl=sq+2520|0;Ue=sq+2516|0;Ae=sq+2512|0;L=sq+2508|0;Ml=sq+2504|0;Pe=sq+2500|0;Be=sq+2496|0;Aa=sq+2492|0;Ba=sq+2488|0;he=sq+2484|0;ie=sq+2480|0;we=sq+2476|0;xe=sq+2472|0;F=sq+2468|0;G=sq+2464|0;M=sq+2460|0;N=sq+2456|0;Qe=sq+2452|0;Re=sq+2448|0;Se=sq+2444|0;Te=sq+2440|0;J=sq+2436|0;K=sq+2432|0;Oe=sq+2428|0;le=sq+2424|0;me=sq+2420|0;Ne=sq+2416|0;I=sq+2412|0;P=sq+2408|0;Cm=sq+2404|0;Dm=sq+2400|0;go=sq+2396|0;ho=sq+2392|0;ke=sq+2388|0;Ve=sq+2384|0;ze=sq+2380|0;Ce=sq+2376|0;Ji=sq+2372|0;Ki=sq+2368|0;Ll=sq+2364|0;mm=sq+2360|0;yi=sq+2356|0;zi=sq+2352|0;hk=sq+2348|0;Wc=sq+2344|0;cb=sq+2340|0;lc=sq+2336|0;kk=sq+2332|0;kc=sq+2328|0;fb=sq+2324|0;Xc=sq+2320|0;rk=sq+2316|0;_c=sq+2312|0;Nb=sq+2308|0;pc=sq+2304|0;ok=sq+2300|0;Zc=sq+2296|0;jb=sq+2292|0;sc=sq+2288|0;fk=sq+2284|0;gk=sq+2280|0;db=sq+2276|0;eb=sq+2272|0;ab=sq+2268|0;bb=sq+2264|0;ik=sq+2260|0;jk=sq+2256|0;pk=sq+2252|0;qk=sq+2248|0;nc=sq+2244|0;kb=sq+2240|0;lb=sq+2236|0;oc=sq+2232|0;mk=sq+2228|0;nk=sq+2224|0;qc=sq+2220|0;hb=sq+2216|0;ib=sq+2212|0;rc=sq+2208|0;lk=sq+2204|0;sk=sq+2200|0;Al=sq+2196|0;Bl=sq+2192|0;Fk=sq+2188|0;Gk=sq+2184|0;gb=sq+2180|0;Ob=sq+2176|0;mc=sq+2172|0;tc=sq+2168|0;Zg=sq+2164|0;_g=sq+2160|0;ah=sq+2156|0;bh=sq+2152|0;Yc=sq+2148|0;$c=sq+2144|0;z=sq+2140|0;td=sq+2136|0;Hd=sq+2132|0;Lk=sq+2128|0;C=sq+2124|0;Ed=sq+2120|0;wd=sq+2116|0;Mk=sq+2112|0;ha=sq+2108|0;cl=sq+2104|0;Nd=sq+2100|0;zd=sq+2096|0;ea=sq+2092|0;bl=sq+2088|0;Sd=sq+2084|0;yd=sq+2080|0;x=sq+2076|0;y=sq+2072|0;ud=sq+2068|0;vd=sq+2064|0;Fd=sq+2060|0;Gd=sq+2056|0;A=sq+2052|0;B=sq+2048|0;fa=sq+2044|0;ga=sq+2040|0;Jd=sq+2036|0;Kd=sq+2032|0;Ld=sq+2028|0;Md=sq+2024|0;E=sq+2020|0;da=sq+2016|0;Od=sq+2012|0;Pd=sq+2008|0;Qd=sq+2004|0;Rd=sq+2e3|0;D=sq+1996|0;ia=sq+1992|0;al=sq+1988|0;El=sq+1984|0;Qo=sq+1980|0;Ro=sq+1976|0;Id=sq+1972|0;Td=sq+1968|0;xd=sq+1964|0;Ad=sq+1960|0;qi=sq+1956|0;ri=sq+1952|0;Nk=sq+1948|0;Ok=sq+1944|0;Hh=sq+1940|0;Ih=sq+1936|0;ma=sq+1932|0;Wk=sq+1928|0;pa=sq+1924|0;Xk=sq+1920|0;kd=sq+1916|0;pd=sq+1912|0;Yk=sq+1908|0;Vk=sq+1904|0;Oh=sq+1900|0;Nh=sq+1896|0;ta=sq+1892|0;Rk=sq+1888|0;wa=sq+1884|0;Sk=sq+1880|0;Zd=sq+1876|0;ed=sq+1872|0;Tk=sq+1868|0;Qk=sq+1864|0;Lh=sq+1860|0;Kh=sq+1856|0;ld=sq+1852|0;jd=sq+1848|0;gd=sq+1844|0;od=sq+1840|0;ka=sq+1836|0;la=sq+1832|0;hd=sq+1828|0;id=sq+1824|0;na=sq+1820|0;oa=sq+1816|0;md=sq+1812|0;nd=sq+1808|0;Vd=sq+1804|0;be=sq+1800|0;_d=sq+1796|0;Yd=sq+1792|0;ra=sq+1788|0;sa=sq+1784|0;$d=sq+1780|0;ae=sq+1776|0;ua=sq+1772|0;va=sq+1768|0;Wd=sq+1764|0;Xd=sq+1760|0;qa=sq+1756|0;xa=sq+1752|0;Gl=sq+1748|0;Hl=sq+1744|0;Wo=sq+1740|0;Xo=sq+1736|0;fd=sq+1732|0;qd=sq+1728|0;Cd=sq+1724|0;Dd=sq+1720|0;ti=sq+1716|0;ui=sq+1712|0;Uk=sq+1708|0;Zk=sq+1704|0;Mh=sq+1700|0;ni=sq+1696|0;T=sq+1692|0;om=sq+1688|0;W=sq+1684|0;pm=sq+1680|0;$e=sq+1676|0;ef=sq+1672|0;rm=sq+1668|0;qm=sq+1664|0;Ci=sq+1660|0;Bi=sq+1656|0;_=sq+1652|0;um=sq+1648|0;ba=sq+1644|0;vm=sq+1640|0;lf=sq+1636|0;re=sq+1632|0;wm=sq+1628|0;tm=sq+1624|0;Fi=sq+1620|0;Ei=sq+1616|0;af=sq+1612|0;_e=sq+1608|0;Xe=sq+1604|0;df=sq+1600|0;R=sq+1596|0;S=sq+1592|0;Ye=sq+1588|0;Ze=sq+1584|0;U=sq+1580|0;V=sq+1576|0;bf=sq+1572|0;cf=sq+1568|0;ne=sq+1564|0;kf=sq+1560|0;gf=sq+1556|0;qe=sq+1552|0;Y=sq+1548|0;Z=sq+1544|0;hf=sq+1540|0;jf=sq+1536|0;$=sq+1532|0;aa=sq+1528|0;oe=sq+1524|0;pe=sq+1520|0;X=sq+1516|0;ca=sq+1512|0;Fm=sq+1508|0;Gm=sq+1504|0;ap=sq+1500|0;bp=sq+1496|0;ff=sq+1492|0;se=sq+1488|0;Ee=sq+1484|0;Fe=sq+1480|0;Mi=sq+1476|0;Ph=sq+1472|0;sm=sq+1468|0;xm=sq+1464|0;Di=sq+1460|0;Gi=sq+1456|0;w=sq+1452|0;sp=sq+1448|0;vp=sq+1444|0;xp=sq+1440|0;Ga=sq+1436|0;Ha=sq+1432|0;Rb=sq+1428|0;wp=sq+1424|0;mq=sq+1420|0;v=sq+1416|0;tp=sq+1412|0;up=sq+1408|0;za=sq+1404|0;Fa=sq+1400|0;Kb=sq+1396|0;Qb=sq+1392|0;Fp=sq+1388|0;Vp=sq+1384|0;Tp=sq+1380|0;qp=sq+1376|0;Kp=sq+1372|0;Wp=sq+1368|0;Pp=sq+1364|0;Xp=sq+1360|0;Dp=sq+1356|0;Ep=sq+1352|0;Rp=sq+1348|0;Sp=sq+1344|0;Gp=sq+1340|0;Jp=sq+1336|0;Lp=sq+1332|0;Op=sq+1328|0;Qp=sq+1324|0;rp=sq+1320|0;Up=sq+1316|0;pp=sq+1312|0;Ho=sq+1308|0;Bo=sq+1304|0;kp=sq+1300|0;po=sq+1296|0;Oo=sq+1292|0;lp=sq+1288|0;ip=sq+1284|0;zp=sq+1280|0;so=sq+1276|0;Co=sq+1272|0;_o=sq+1268|0;wo=sq+1264|0;fp=sq+1260|0;yp=sq+1256|0;lo=sq+1252|0;xo=sq+1248|0;Ko=sq+1244|0;No=sq+1240|0;Uo=sq+1236|0;Zo=sq+1232|0;gp=sq+1228|0;hp=sq+1224|0;qo=sq+1220|0;ro=sq+1216|0;Eo=sq+1212|0;Fo=sq+1208|0;dp=sq+1204|0;ko=sq+1200|0;Po=sq+1196|0;mo=sq+1192|0;zo=sq+1188|0;Ao=sq+1184|0;to=sq+1180|0;uo=sq+1176|0;vo=sq+1172|0;yo=sq+1168|0;Do=sq+1164|0;jp=sq+1160|0;Bp=sq+1156|0;Cp=sq+1152|0;mp=sq+1148|0;np=sq+1144|0;op=sq+1140|0;Ap=sq+1136|0;zn=sq+1132|0;ln=sq+1128|0;_m=sq+1124|0;vn=sq+1120|0;Gn=sq+1116|0;Xn=sq+1112|0;tn=sq+1108|0;ao=sq+1104|0;On=sq+1100|0;fn=sq+1096|0;bn=sq+1092|0;mn=sq+1088|0;qn=sq+1084|0;$n=sq+1080|0;Vn=sq+1076|0;gn=sq+1072|0;yn=sq+1068|0;Zm=sq+1064|0;Cn=sq+1060|0;Fn=sq+1056|0;rn=sq+1052|0;sn=sq+1048|0;Kn=sq+1044|0;Nn=sq+1040|0;$m=sq+1036|0;an=sq+1032|0;on=sq+1028|0;pn=sq+1024|0;Rn=sq+1020|0;Un=sq+1016|0;Hn=sq+1012|0;Xm=sq+1008|0;jn=sq+1004|0;kn=sq+1e3|0;cn=sq+996|0;dn=sq+992|0;en=sq+988|0;hn=sq+984|0;nn=sq+980|0;un=sq+976|0;co=sq+972|0;eo=sq+968|0;Yn=sq+964|0;Zn=sq+960|0;_n=sq+956|0;bo=sq+952|0;sl=sq+948|0;dm=sq+944|0;Tl=sq+940|0;Om=sq+936|0;Jk=sq+932|0;Pm=sq+928|0;lm=sq+924|0;Um=sq+920|0;Kl=sq+916|0;_l=sq+912|0;Wl=sq+908|0;em=sq+904|0;im=sq+900|0;Tm=sq+896|0;Jm=sq+892|0;$l=sq+888|0;rl=sq+884|0;Sl=sq+880|0;zl=sq+876|0;Ik=sq+872|0;jm=sq+868|0;km=sq+864|0;$k=sq+860|0;Jl=sq+856|0;Ul=sq+852|0;Vl=sq+848|0;gm=sq+844|0;hm=sq+840|0;zm=sq+836|0;Im=sq+832|0;Kk=sq+828|0;Km=sq+824|0;bm=sq+820|0;cm=sq+816|0;Xl=sq+812|0;Yl=sq+808|0;Zl=sq+804|0;am=sq+800|0;fm=sq+796|0;Nm=sq+792|0;Wm=sq+788|0;wn=sq+784|0;Qm=sq+780|0;Rm=sq+776|0;Sm=sq+772|0;Vm=sq+768|0;Cf=sq+764|0;lh=sq+760|0;qh=sq+756|0;Ah=sq+752|0;th=sq+748|0;Bh=sq+744|0;Jf=sq+740|0;wh=sq+736|0;Rf=sq+732|0;gh=sq+728|0;Bg=sq+724|0;vh=sq+720|0;Eg=sq+716|0;mh=sq+712|0;xg=sq+708|0;hh=sq+704|0;Af=sq+700|0;Bf=sq+696|0;oh=sq+692|0;ph=sq+688|0;rh=sq+684|0;sh=sq+680|0;Ff=sq+676|0;If=sq+672|0;Nf=sq+668|0;Qf=sq+664|0;zg=sq+660|0;Ag=sq+656|0;Cg=sq+652|0;Dg=sq+648|0;Uf=sq+644|0;wg=sq+640|0;Kf=sq+636|0;yg=sq+632|0;jh=sq+628|0;kh=sq+624|0;Fg=sq+620|0;eh=sq+616|0;fh=sq+612|0;ih=sq+608|0;nh=sq+604|0;uh=sq+600|0;Dh=sq+596|0;Gg=sq+592|0;xh=sq+588|0;yh=sq+584|0;zh=sq+580|0;Ch=sq+576|0;Fj=sq+572|0;rj=sq+568|0;vk=sq+564|0;dl=sq+560|0;yk=sq+556|0;el=sq+552|0;Mj=sq+548|0;Bk=sq+544|0;Uj=sq+540|0;mj=sq+536|0;fj=sq+532|0;Ak=sq+528|0;ij=sq+524|0;sj=sq+520|0;bj=sq+516|0;nj=sq+512|0;Dj=sq+508|0;Ej=sq+504|0;uj=sq+500|0;vj=sq+496|0;wk=sq+492|0;xk=sq+488|0;Ij=sq+484|0;Lj=sq+480|0;Qj=sq+476|0;Tj=sq+472|0;dj=sq+468|0;ej=sq+464|0;gj=sq+460|0;hj=sq+456|0;Zi=sq+452|0;aj=sq+448|0;Nj=sq+444|0;cj=sq+440|0;pj=sq+436|0;qj=sq+432|0;jj=sq+428|0;kj=sq+424|0;lj=sq+420|0;oj=sq+416|0;tj=sq+412|0;zk=sq+408|0;gl=sq+404|0;hl=sq+400|0;Ck=sq+396|0;Dk=sq+392|0;Ek=sq+388|0;fl=sq+384|0;Qc=sq+380|0;eg=sq+376|0;jg=sq+372|0;tg=sq+368|0;mg=sq+364|0;ug=sq+360|0;cd=sq+356|0;pg=sq+352|0;fe=sq+348|0;$f=sq+344|0;uf=sq+340|0;og=sq+336|0;Xf=sq+332|0;fg=sq+328|0;Ie=sq+324|0;ag=sq+320|0;Ac=sq+316|0;Pc=sq+312|0;hg=sq+308|0;ig=sq+304|0;kg=sq+300|0;lg=sq+296|0;jc=sq+292|0;bd=sq+288|0;sd=sq+284|0;ee=sq+280|0;qf=sq+276|0;tf=sq+272|0;vf=sq+268|0;wf=sq+264|0;ue=sq+260|0;He=sq+256|0;dd=sq+252|0;Je=sq+248|0;cg=sq+244|0;dg=sq+240|0;Yf=sq+236|0;Zf=sq+232|0;_f=sq+228|0;bg=sq+224|0;gg=sq+220|0;ng=sq+216|0;yf=sq+212|0;zf=sq+208|0;qg=sq+204|0;rg=sq+200|0;sg=sq+196|0;xf=sq+192|0;Rg=sq+188|0;ki=sq+184|0;Qi=sq+180|0;yj=sq+176|0;Ti=sq+172|0;zj=sq+168|0;Fh=sq+164|0;Wi=sq+160|0;xi=sq+156|0;fi=sq+152|0;_h=sq+148|0;Vi=sq+144|0;bi=sq+140|0;li=sq+136|0;Sh=sq+132|0;gi=sq+128|0;Jg=sq+124|0;Qg=sq+120|0;Oi=sq+116|0;Pi=sq+112|0;Ri=sq+108|0;Si=sq+104|0;Yg=sq+100|0;dh=sq+96|0;pi=sq+92|0;wi=sq+88|0;Wh=sq+84|0;Zh=sq+80|0;$h=sq+76|0;ai=sq+72|0;Ii=sq+68|0;Rh=sq+64|0;Gh=sq+60|0;Th=sq+56|0;ii=sq+52|0;ji=sq+48|0;ci=sq+44|0;di=sq+40|0;ei=sq+36|0;hi=sq+32|0;mi=sq+28|0;Ui=sq+24|0;Bj=sq+20|0;Cj=sq+16|0;Xi=sq+12|0;wj=sq+8|0;xj=sq+4|0;Aj=sq;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[tq>>2]=j;c[s>>2]=k;c[t>>2]=l;g[sq+3708>>2]=.7730104327201843;g[sq+3704>>2]=.6343932747840881;g[sq+3700>>2]=.0980171412229538;g[sq+3696>>2]=.9951847195625305;g[sq+3692>>2]=.8819212913513184;g[sq+3688>>2]=.4713967442512512;g[sq+3684>>2]=.290284663438797;g[sq+3680>>2]=.9569403529167175;g[sq+3676>>2]=.8314695954322815;g[sq+3672>>2]=.5555702447891235;g[sq+3668>>2]=.19509032368659973;g[sq+3664>>2]=.9807852506637573;g[sq+3660>>2]=.9238795042037964;g[sq+3656>>2]=.3826834261417389;g[sq+3652>>2]=.7071067690849304;c[rq>>2]=c[tq>>2];while(1){if((c[rq>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<5<<2)>>2];g[Mb>>2]=+g[u>>2]+ +g[Da>>2];g[Sb>>2]=+g[u>>2]-+g[Da>>2];g[Ia>>2]=+g[c[n>>2]>>2];g[Ja>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<5<<2)>>2];g[Ka>>2]=+g[Ia>>2]+ +g[Ja>>2];g[Le>>2]=+g[Ia>>2]-+g[Ja>>2];g[Vc>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<4<<2)>>2];g[ce>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*48<<2)>>2];g[mf>>2]=+g[Vc>>2]+ +g[ce>>2];g[Ke>>2]=+g[Vc>>2]-+g[ce>>2];g[La>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<4<<2)>>2];g[Ma>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*48<<2)>>2];g[Na>>2]=+g[La>>2]+ +g[Ma>>2];g[Tb>>2]=+g[La>>2]-+g[Ma>>2];g[Eh>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<3<<2)>>2];g[Ni>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*40<<2)>>2];g[uk>>2]=+g[Eh>>2]+ +g[Ni>>2];g[Wb>>2]=+g[Eh>>2]-+g[Ni>>2];g[nb>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<3<<2)>>2];g[ob>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*40<<2)>>2];g[pb>>2]=+g[nb>>2]+ +g[ob>>2];g[Vb>>2]=+g[nb>>2]-+g[ob>>2];g[Dl>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*56<<2)>>2];g[Mm>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*24<<2)>>2];g[Wn>>2]=+g[Dl>>2]+ +g[Mm>>2];g[wc>>2]=+g[Dl>>2]-+g[Mm>>2];g[qb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*56<<2)>>2];g[rb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*24<<2)>>2];g[sb>>2]=+g[qb>>2]+ +g[rb>>2];g[xc>>2]=+g[qb>>2]-+g[rb>>2];g[Ub>>2]=+g[Sb>>2]-+g[Tb>>2];g[Hg>>2]=+g[Sb>>2]+ +g[Tb>>2];g[Uh>>2]=+g[Le>>2]-+g[Ke>>2];g[Me>>2]=+g[Ke>>2]+ +g[Le>>2];g[vg>>2]=+g[Mb>>2]+ +g[mf>>2];g[ep>>2]=+g[uk>>2]+ +g[Wn>>2];g[Yp>>2]=+g[vg>>2]+ +g[ep>>2];g[fo>>2]=+g[vg>>2]-+g[ep>>2];g[Lm>>2]=+g[Ka>>2]-+g[Na>>2];g[Ol>>2]=+g[Wn>>2]-+g[uk>>2];g[Pl>>2]=+g[Lm>>2]-+g[Ol>>2];g[Ym>>2]=+g[Ol>>2]+ +g[Lm>>2];g[nf>>2]=+g[wc>>2]-+g[xc>>2];g[of>>2]=+g[Wb>>2]+ +g[Vb>>2];g[pf>>2]=(+g[nf>>2]-+g[of>>2])*.7071067690849304;g[Ig>>2]=(+g[of>>2]+ +g[nf>>2])*.7071067690849304;g[mb>>2]=+g[Ka>>2]+ +g[Na>>2];g[tb>>2]=+g[pb>>2]+ +g[sb>>2];g[ub>>2]=+g[mb>>2]+ +g[tb>>2];g[no>>2]=+g[mb>>2]-+g[tb>>2];g[il>>2]=+g[Mb>>2]-+g[mf>>2];g[jl>>2]=+g[pb>>2]-+g[sb>>2];g[kl>>2]=+g[il>>2]-+g[jl>>2];g[xn>>2]=+g[il>>2]+ +g[jl>>2];g[vc>>2]=+g[Vb>>2]-+g[Wb>>2];g[yc>>2]=+g[wc>>2]+ +g[xc>>2];g[zc>>2]=(+g[vc>>2]-+g[yc>>2])*.7071067690849304;g[Vh>>2]=(+g[vc>>2]+ +g[yc>>2])*.7071067690849304;g[Zp>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2<<2)>>2];g[_p>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*36<<2)>>2];g[$p>>2]=+g[Zp>>2]+ +g[_p>>2];g[Ec>>2]=+g[Zp>>2]-+g[_p>>2];g[vb>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2<<2)>>2];g[wb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*36<<2)>>2];g[xb>>2]=+g[vb>>2]+ +g[wb>>2];g[Cc>>2]=+g[vb>>2]-+g[wb>>2];g[aq>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*20<<2)>>2];g[bq>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*52<<2)>>2];g[cq>>2]=+g[aq>>2]+ +g[bq>>2];g[Bc>>2]=+g[aq>>2]-+g[bq>>2];g[yb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*20<<2)>>2];g[zb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*52<<2)>>2];g[Ab>>2]=+g[yb>>2]+ +g[zb>>2];g[Fc>>2]=+g[yb>>2]-+g[zb>>2];g[eq>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*60<<2)>>2];g[fq>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*28<<2)>>2];g[gq>>2]=+g[eq>>2]+ +g[fq>>2];g[Lc>>2]=+g[eq>>2]-+g[fq>>2];g[Cb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*60<<2)>>2];g[Db>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*28<<2)>>2];g[Eb>>2]=+g[Cb>>2]+ +g[Db>>2];g[Jc>>2]=+g[Cb>>2]-+g[Db>>2];g[hq>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*12<<2)>>2];g[iq>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*44<<2)>>2];g[jq>>2]=+g[hq>>2]+ +g[iq>>2];g[Ic>>2]=+g[hq>>2]-+g[iq>>2];g[Fb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*12<<2)>>2];g[Gb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*44<<2)>>2];g[Hb>>2]=+g[Fb>>2]+ +g[Gb>>2];g[Mc>>2]=+g[Fb>>2]-+g[Gb>>2];g[dq>>2]=+g[$p>>2]+ +g[cq>>2];g[kq>>2]=+g[gq>>2]+ +g[jq>>2];g[lq>>2]=+g[dq>>2]+ +g[kq>>2];g[oo>>2]=+g[kq>>2]-+g[dq>>2];g[ll>>2]=+g[xb>>2]-+g[Ab>>2];g[ml>>2]=+g[$p>>2]-+g[cq>>2];g[nl>>2]=+g[ll>>2]-+g[ml>>2];g[Rl>>2]=+g[ml>>2]+ +g[ll>>2];g[ol>>2]=+g[gq>>2]-+g[jq>>2];g[pl>>2]=+g[Eb>>2]-+g[Hb>>2];g[ql>>2]=+g[ol>>2]+ +g[pl>>2];g[Ql>>2]=+g[ol>>2]-+g[pl>>2];g[Bb>>2]=+g[xb>>2]+ +g[Ab>>2];g[Ib>>2]=+g[Eb>>2]+ +g[Hb>>2];g[Jb>>2]=+g[Bb>>2]+ +g[Ib>>2];g[Go>>2]=+g[Bb>>2]-+g[Ib>>2];g[Dc>>2]=+g[Bc>>2]+ +g[Cc>>2];g[Gc>>2]=+g[Ec>>2]-+g[Fc>>2];g[Hc>>2]=+g[Dc>>2]*.3826834261417389-+g[Gc>>2]*.9238795042037964;g[sf>>2]=+g[Dc>>2]*.9238795042037964+ +g[Gc>>2]*.3826834261417389;g[Kg>>2]=+g[Cc>>2]-+g[Bc>>2];g[Lg>>2]=+g[Ec>>2]+ +g[Fc>>2];g[Mg>>2]=+g[Kg>>2]*.9238795042037964-+g[Lg>>2]*.3826834261417389;g[Yh>>2]=+g[Kg>>2]*.3826834261417389+ +g[Lg>>2]*.9238795042037964;g[Ng>>2]=+g[Jc>>2]-+g[Ic>>2];g[Og>>2]=+g[Lc>>2]+ +g[Mc>>2];g[Pg>>2]=+g[Ng>>2]*.9238795042037964+ +g[Og>>2]*.3826834261417389;g[Xh>>2]=+g[Og>>2]*.9238795042037964-+g[Ng>>2]*.3826834261417389;g[Kc>>2]=+g[Ic>>2]+ +g[Jc>>2];g[Nc>>2]=+g[Lc>>2]-+g[Mc>>2];g[Oc>>2]=+g[Kc>>2]*.3826834261417389+ +g[Nc>>2]*.9238795042037964;g[rf>>2]=+g[Nc>>2]*.3826834261417389-+g[Kc>>2]*.9238795042037964;g[nq>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[oq>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*34<<2)>>2];g[pq>>2]=+g[nq>>2]+ +g[oq>>2];g[cc>>2]=+g[nq>>2]-+g[oq>>2];g[Lb>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[Oa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*34<<2)>>2];g[Pa>>2]=+g[Lb>>2]+ +g[Oa>>2];g[Sc>>2]=+g[Lb>>2]-+g[Oa>>2];g[qq>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*18<<2)>>2];g[Wj>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*50<<2)>>2];g[Xj>>2]=+g[qq>>2]+ +g[Wj>>2];g[Rc>>2]=+g[qq>>2]-+g[Wj>>2];g[Qa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*18<<2)>>2];g[Ra>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*50<<2)>>2];g[Sa>>2]=+g[Qa>>2]+ +g[Ra>>2];g[dc>>2]=+g[Qa>>2]-+g[Ra>>2];g[ak>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*58<<2)>>2];g[bk>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*26<<2)>>2];g[Uc>>2]=+g[ak>>2]-+g[bk>>2];g[Xa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*58<<2)>>2];g[Ya>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*26<<2)>>2];g[Xb>>2]=+g[Xa>>2]-+g[Ya>>2];g[ck>>2]=+g[ak>>2]+ +g[bk>>2];g[gc>>2]=+g[Uc>>2]+ +g[Xb>>2];g[Za>>2]=+g[Xa>>2]+ +g[Ya>>2];g[Yb>>2]=+g[Uc>>2]-+g[Xb>>2];g[Zj>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[_j>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*42<<2)>>2];g[Zb>>2]=+g[Zj>>2]-+g[_j>>2];g[Ua>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*10<<2)>>2];g[Va>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*42<<2)>>2];g[_b>>2]=+g[Ua>>2]-+g[Va>>2];g[$j>>2]=+g[Zj>>2]+ +g[_j>>2];g[fc>>2]=+g[_b>>2]-+g[Zb>>2];g[Wa>>2]=+g[Ua>>2]+ +g[Va>>2];g[$b>>2]=+g[Zb>>2]+ +g[_b>>2];g[Yj>>2]=+g[pq>>2]+ +g[Xj>>2];g[dk>>2]=+g[$j>>2]+ +g[ck>>2];g[ek>>2]=+g[Yj>>2]+ +g[dk>>2];g[Jo>>2]=+g[Yj>>2]-+g[dk>>2];g[tl>>2]=+g[Pa>>2]-+g[Sa>>2];g[ul>>2]=+g[ck>>2]-+g[$j>>2];g[vl>>2]=+g[tl>>2]-+g[ul>>2];g[An>>2]=+g[ul>>2]+ +g[tl>>2];g[wl>>2]=+g[pq>>2]-+g[Xj>>2];g[xl>>2]=+g[Wa>>2]-+g[Za>>2];g[yl>>2]=+g[wl>>2]-+g[xl>>2];g[Bn>>2]=+g[wl>>2]+ +g[xl>>2];g[Ta>>2]=+g[Pa>>2]+ +g[Sa>>2];g[_a>>2]=+g[Wa>>2]+ +g[Za>>2];g[$a>>2]=+g[Ta>>2]+ +g[_a>>2];g[Io>>2]=+g[Ta>>2]-+g[_a>>2];g[Tc>>2]=+g[Rc>>2]+ +g[Sc>>2];g[ac>>2]=(+g[Yb>>2]-+g[$b>>2])*.7071067690849304;g[bc>>2]=+g[Tc>>2]-+g[ac>>2];g[Df>>2]=+g[Tc>>2]+ +g[ac>>2];g[Sg>>2]=+g[Sc>>2]-+g[Rc>>2];g[Tg>>2]=(+g[fc>>2]+ +g[gc>>2])*.7071067690849304;g[Ug>>2]=+g[Sg>>2]-+g[Tg>>2];g[Gj>>2]=+g[Sg>>2]+ +g[Tg>>2];g[Vg>>2]=+g[cc>>2]+ +g[dc>>2];g[Wg>>2]=(+g[$b>>2]+ +g[Yb>>2])*.7071067690849304;g[Xg>>2]=+g[Vg>>2]-+g[Wg>>2];g[Hj>>2]=+g[Vg>>2]+ +g[Wg>>2];g[ec>>2]=+g[cc>>2]-+g[dc>>2];g[hc>>2]=(+g[fc>>2]-+g[gc>>2])*.7071067690849304;g[ic>>2]=+g[ec>>2]-+g[hc>>2];g[Ef>>2]=+g[ec>>2]+ +g[hc>>2];g[Aa>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*63<<2)>>2];g[Ba>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*31<<2)>>2];g[Ca>>2]=+g[Aa>>2]+ +g[Ba>>2];g[ge>>2]=+g[Aa>>2]-+g[Ba>>2];g[we>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*63<<2)>>2];g[xe>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*31<<2)>>2];g[ye>>2]=+g[we>>2]-+g[xe>>2];g[Am>>2]=+g[we>>2]+ +g[xe>>2];g[F>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*15<<2)>>2];g[G>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*47<<2)>>2];g[H>>2]=+g[F>>2]+ +g[G>>2];g[ve>>2]=+g[F>>2]-+g[G>>2];g[he>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*15<<2)>>2];g[ie>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*47<<2)>>2];g[je>>2]=+g[he>>2]-+g[ie>>2];g[Bm>>2]=+g[he>>2]+ +g[ie>>2];g[M>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*55<<2)>>2];g[N>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*23<<2)>>2];g[Qe>>2]=+g[M>>2]-+g[N>>2];g[Re>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*55<<2)>>2];g[Se>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*23<<2)>>2];g[Te>>2]=+g[Re>>2]-+g[Se>>2];g[O>>2]=+g[M>>2]+ +g[N>>2];g[Nl>>2]=+g[Re>>2]+ +g[Se>>2];g[Ue>>2]=+g[Qe>>2]+ +g[Te>>2];g[Ae>>2]=+g[Qe>>2]-+g[Te>>2];g[J>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[K>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*39<<2)>>2];g[Oe>>2]=+g[J>>2]-+g[K>>2];g[le>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[me>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*39<<2)>>2];g[Ne>>2]=+g[le>>2]-+g[me>>2];g[L>>2]=+g[J>>2]+ +g[K>>2];g[Ml>>2]=+g[le>>2]+ +g[me>>2];g[Pe>>2]=+g[Ne>>2]-+g[Oe>>2];g[Be>>2]=+g[Oe>>2]+ +g[Ne>>2];g[I>>2]=+g[Ca>>2]+ +g[H>>2];g[P>>2]=+g[L>>2]+ +g[O>>2];g[Q>>2]=+g[I>>2]+ +g[P>>2];g[$o>>2]=+g[I>>2]-+g[P>>2];g[Cm>>2]=+g[Am>>2]-+g[Bm>>2];g[Dm>>2]=+g[O>>2]-+g[L>>2];g[Em>>2]=+g[Cm>>2]-+g[Dm>>2];g[Sn>>2]=+g[Dm>>2]+ +g[Cm>>2];g[go>>2]=+g[Am>>2]+ +g[Bm>>2];g[ho>>2]=+g[Ml>>2]+ +g[Nl>>2];g[io>>2]=+g[go>>2]-+g[ho>>2];g[Mp>>2]=+g[go>>2]+ +g[ho>>2];g[ke>>2]=+g[ge>>2]-+g[je>>2];g[Ve>>2]=(+g[Pe>>2]-+g[Ue>>2])*.7071067690849304;g[We>>2]=+g[ke>>2]-+g[Ve>>2];g[Sf>>2]=+g[ke>>2]+ +g[Ve>>2];g[ze>>2]=+g[ve>>2]+ +g[ye>>2];g[Ce>>2]=(+g[Ae>>2]-+g[Be>>2])*.7071067690849304;g[De>>2]=+g[ze>>2]-+g[Ce>>2];g[Vf>>2]=+g[ze>>2]+ +g[Ce>>2];g[Ji>>2]=+g[ye>>2]-+g[ve>>2];g[Ki>>2]=(+g[Pe>>2]+ +g[Ue>>2])*.7071067690849304;g[Li>>2]=+g[Ji>>2]-+g[Ki>>2];g[_i>>2]=+g[Ji>>2]+ +g[Ki>>2];g[Ll>>2]=+g[Ca>>2]-+g[H>>2];g[mm>>2]=+g[Ml>>2]-+g[Nl>>2];g[nm>>2]=+g[Ll>>2]-+g[mm>>2];g[Pn>>2]=+g[Ll>>2]+ +g[mm>>2];g[yi>>2]=+g[ge>>2]+ +g[je>>2];g[zi>>2]=(+g[Be>>2]+ +g[Ae>>2])*.7071067690849304;g[Ai>>2]=+g[yi>>2]-+g[zi>>2];g[Vj>>2]=+g[yi>>2]+ +g[zi>>2];g[fk>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*62<<2)>>2];g[gk>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*30<<2)>>2];g[hk>>2]=+g[fk>>2]+ +g[gk>>2];g[Wc>>2]=+g[fk>>2]-+g[gk>>2];g[ab>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*62<<2)>>2];g[bb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*30<<2)>>2];g[cb>>2]=+g[ab>>2]+ +g[bb>>2];g[lc>>2]=+g[ab>>2]-+g[bb>>2];g[ik>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*14<<2)>>2];g[jk>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*46<<2)>>2];g[kk>>2]=+g[ik>>2]+ +g[jk>>2];g[kc>>2]=+g[ik>>2]-+g[jk>>2];g[db>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*14<<2)>>2];g[eb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*46<<2)>>2];g[fb>>2]=+g[db>>2]+ +g[eb>>2];g[Xc>>2]=+g[db>>2]-+g[eb>>2];g[pk>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*54<<2)>>2];g[qk>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*22<<2)>>2];g[nc>>2]=+g[pk>>2]-+g[qk>>2];g[kb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*54<<2)>>2];g[lb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*22<<2)>>2];g[oc>>2]=+g[kb>>2]-+g[lb>>2];g[rk>>2]=+g[pk>>2]+ +g[qk>>2];g[_c>>2]=+g[nc>>2]+ +g[oc>>2];g[Nb>>2]=+g[kb>>2]+ +g[lb>>2];g[pc>>2]=+g[nc>>2]-+g[oc>>2];g[mk>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[nk>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*38<<2)>>2];g[qc>>2]=+g[mk>>2]-+g[nk>>2];g[hb>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[ib>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*38<<2)>>2];g[rc>>2]=+g[hb>>2]-+g[ib>>2];g[ok>>2]=+g[mk>>2]+ +g[nk>>2];g[Zc>>2]=+g[rc>>2]-+g[qc>>2];g[jb>>2]=+g[hb>>2]+ +g[ib>>2];g[sc>>2]=+g[qc>>2]+ +g[rc>>2];g[lk>>2]=+g[hk>>2]+ +g[kk>>2];g[sk>>2]=+g[ok>>2]+ +g[rk>>2];g[tk>>2]=+g[lk>>2]+ +g[sk>>2];g[Lo>>2]=+g[lk>>2]-+g[sk>>2];g[Al>>2]=+g[cb>>2]-+g[fb>>2];g[Bl>>2]=+g[rk>>2]-+g[ok>>2];g[Cl>>2]=+g[Al>>2]-+g[Bl>>2];g[Dn>>2]=+g[Bl>>2]+ +g[Al>>2];g[Fk>>2]=+g[hk>>2]-+g[kk>>2];g[Gk>>2]=+g[jb>>2]-+g[Nb>>2];g[Hk>>2]=+g[Fk>>2]-+g[Gk>>2];g[En>>2]=+g[Fk>>2]+ +g[Gk>>2];g[gb>>2]=+g[cb>>2]+ +g[fb>>2];g[Ob>>2]=+g[jb>>2]+ +g[Nb>>2];g[Pb>>2]=+g[gb>>2]+ +g[Ob>>2];g[Mo>>2]=+g[gb>>2]-+g[Ob>>2];g[mc>>2]=+g[kc>>2]+ +g[lc>>2];g[tc>>2]=(+g[pc>>2]-+g[sc>>2])*.7071067690849304;g[uc>>2]=+g[mc>>2]-+g[tc>>2];g[Gf>>2]=+g[mc>>2]+ +g[tc>>2];g[Zg>>2]=+g[Wc>>2]+ +g[Xc>>2];g[_g>>2]=(+g[sc>>2]+ +g[pc>>2])*.7071067690849304;g[$g>>2]=+g[Zg>>2]-+g[_g>>2];g[Jj>>2]=+g[Zg>>2]+ +g[_g>>2];g[ah>>2]=+g[lc>>2]-+g[kc>>2];g[bh>>2]=(+g[Zc>>2]+ +g[_c>>2])*.7071067690849304;g[ch>>2]=+g[ah>>2]-+g[bh>>2];g[Kj>>2]=+g[ah>>2]+ +g[bh>>2];g[Yc>>2]=+g[Wc>>2]-+g[Xc>>2];g[$c>>2]=(+g[Zc>>2]-+g[_c>>2])*.7071067690849304;g[ad>>2]=+g[Yc>>2]-+g[$c>>2];g[Hf>>2]=+g[Yc>>2]+ +g[$c>>2];g[x>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[y>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*33<<2)>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[td>>2]=+g[x>>2]-+g[y>>2];g[Fd>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[Gd>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*33<<2)>>2];g[Hd>>2]=+g[Fd>>2]-+g[Gd>>2];g[Lk>>2]=+g[Fd>>2]+ +g[Gd>>2];g[A>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*17<<2)>>2];g[B>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*49<<2)>>2];g[C>>2]=+g[A>>2]+ +g[B>>2];g[Ed>>2]=+g[A>>2]-+g[B>>2];g[ud>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*17<<2)>>2];g[vd>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*49<<2)>>2];g[wd>>2]=+g[ud>>2]-+g[vd>>2];g[Mk>>2]=+g[ud>>2]+ +g[vd>>2];g[fa>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*57<<2)>>2];g[ga>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*25<<2)>>2];g[Jd>>2]=+g[fa>>2]-+g[ga>>2];g[Kd>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*57<<2)>>2];g[Ld>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*25<<2)>>2];g[Md>>2]=+g[Kd>>2]-+g[Ld>>2];g[ha>>2]=+g[fa>>2]+ +g[ga>>2];g[cl>>2]=+g[Kd>>2]+ +g[Ld>>2];g[Nd>>2]=+g[Jd>>2]-+g[Md>>2];g[zd>>2]=+g[Jd>>2]+ +g[Md>>2];g[E>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[da>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*41<<2)>>2];g[Od>>2]=+g[E>>2]-+g[da>>2];g[Pd>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*9<<2)>>2];g[Qd>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*41<<2)>>2];g[Rd>>2]=+g[Pd>>2]-+g[Qd>>2];g[ea>>2]=+g[E>>2]+ +g[da>>2];g[bl>>2]=+g[Pd>>2]+ +g[Qd>>2];g[Sd>>2]=+g[Od>>2]+ +g[Rd>>2];g[yd>>2]=+g[Rd>>2]-+g[Od>>2];g[D>>2]=+g[z>>2]+ +g[C>>2];g[ia>>2]=+g[ea>>2]+ +g[ha>>2];g[ja>>2]=+g[D>>2]+ +g[ia>>2];g[Vo>>2]=+g[D>>2]-+g[ia>>2];g[al>>2]=+g[z>>2]-+g[C>>2];g[El>>2]=+g[bl>>2]-+g[cl>>2];g[Fl>>2]=+g[al>>2]-+g[El>>2];g[In>>2]=+g[al>>2]+ +g[El>>2];g[Qo>>2]=+g[Lk>>2]+ +g[Mk>>2];g[Ro>>2]=+g[bl>>2]+ +g[cl>>2];g[So>>2]=+g[Qo>>2]-+g[Ro>>2];g[Hp>>2]=+g[Qo>>2]+ +g[Ro>>2];g[Id>>2]=+g[Ed>>2]+ +g[Hd>>2];g[Td>>2]=(+g[Nd>>2]-+g[Sd>>2])*.7071067690849304;g[Ud>>2]=+g[Id>>2]-+g[Td>>2];g[Of>>2]=+g[Id>>2]+ +g[Td>>2];g[xd>>2]=+g[td>>2]-+g[wd>>2];g[Ad>>2]=(+g[yd>>2]-+g[zd>>2])*.7071067690849304;g[Bd>>2]=+g[xd>>2]-+g[Ad>>2];g[Lf>>2]=+g[xd>>2]+ +g[Ad>>2];g[qi>>2]=+g[td>>2]+ +g[wd>>2];g[ri>>2]=(+g[Sd>>2]+ +g[Nd>>2])*.7071067690849304;g[si>>2]=+g[qi>>2]-+g[ri>>2];g[Oj>>2]=+g[qi>>2]+ +g[ri>>2];g[Nk>>2]=+g[Lk>>2]-+g[Mk>>2];g[Ok>>2]=+g[ha>>2]-+g[ea>>2];g[Pk>>2]=+g[Nk>>2]-+g[Ok>>2];g[Ln>>2]=+g[Ok>>2]+ +g[Nk>>2];g[Hh>>2]=+g[Hd>>2]-+g[Ed>>2];g[Ih>>2]=(+g[yd>>2]+ +g[zd>>2])*.7071067690849304;g[Jh>>2]=+g[Hh>>2]-+g[Ih>>2];g[Rj>>2]=+g[Hh>>2]+ +g[Ih>>2];g[ka>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[la>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*37<<2)>>2];g[ma>>2]=+g[ka>>2]+ +g[la>>2];g[ld>>2]=+g[ka>>2]-+g[la>>2];g[hd>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[id>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*37<<2)>>2];g[jd>>2]=+g[hd>>2]-+g[id>>2];g[Wk>>2]=+g[hd>>2]+ +g[id>>2];g[na>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*21<<2)>>2];g[oa>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*53<<2)>>2];g[pa>>2]=+g[na>>2]+ +g[oa>>2];g[gd>>2]=+g[na>>2]-+g[oa>>2];g[md>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*21<<2)>>2];g[nd>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*53<<2)>>2];g[od>>2]=+g[md>>2]-+g[nd>>2];g[Xk>>2]=+g[md>>2]+ +g[nd>>2];g[kd>>2]=+g[gd>>2]+ +g[jd>>2];g[pd>>2]=+g[ld>>2]-+g[od>>2];g[Yk>>2]=+g[Wk>>2]-+g[Xk>>2];g[Vk>>2]=+g[ma>>2]-+g[pa>>2];g[Oh>>2]=+g[ld>>2]+ +g[od>>2];g[Nh>>2]=+g[jd>>2]-+g[gd>>2];g[ra>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*61<<2)>>2];g[sa>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*29<<2)>>2];g[ta>>2]=+g[ra>>2]+ +g[sa>>2];g[Vd>>2]=+g[ra>>2]-+g[sa>>2];g[$d>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*61<<2)>>2];g[ae>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*29<<2)>>2];g[be>>2]=+g[$d>>2]-+g[ae>>2];g[Rk>>2]=+g[$d>>2]+ +g[ae>>2];g[ua>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*13<<2)>>2];g[va>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*45<<2)>>2];g[wa>>2]=+g[ua>>2]+ +g[va>>2];g[_d>>2]=+g[ua>>2]-+g[va>>2];g[Wd>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*13<<2)>>2];g[Xd>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*45<<2)>>2];g[Yd>>2]=+g[Wd>>2]-+g[Xd>>2];g[Sk>>2]=+g[Wd>>2]+ +g[Xd>>2];g[Zd>>2]=+g[Vd>>2]-+g[Yd>>2];g[ed>>2]=+g[_d>>2]+ +g[be>>2];g[Tk>>2]=+g[Rk>>2]-+g[Sk>>2];g[Qk>>2]=+g[ta>>2]-+g[wa>>2];g[Lh>>2]=+g[be>>2]-+g[_d>>2];g[Kh>>2]=+g[Vd>>2]+ +g[Yd>>2];g[qa>>2]=+g[ma>>2]+ +g[pa>>2];g[xa>>2]=+g[ta>>2]+ +g[wa>>2];g[ya>>2]=+g[qa>>2]+ +g[xa>>2];g[To>>2]=+g[xa>>2]-+g[qa>>2];g[Gl>>2]=+g[Yk>>2]-+g[Vk>>2];g[Hl>>2]=+g[Qk>>2]+ +g[Tk>>2];g[Il>>2]=(+g[Gl>>2]-+g[Hl>>2])*.7071067690849304;g[Mn>>2]=(+g[Gl>>2]+ +g[Hl>>2])*.7071067690849304;g[Wo>>2]=+g[Wk>>2]+ +g[Xk>>2];g[Xo>>2]=+g[Rk>>2]+ +g[Sk>>2];g[Yo>>2]=+g[Wo>>2]-+g[Xo>>2];g[Ip>>2]=+g[Wo>>2]+ +g[Xo>>2];g[fd>>2]=+g[Zd>>2]*.3826834261417389-+g[ed>>2]*.9238795042037964;g[qd>>2]=+g[kd>>2]*.9238795042037964+ +g[pd>>2]*.3826834261417389;g[rd>>2]=+g[fd>>2]-+g[qd>>2];g[Mf>>2]=+g[qd>>2]+ +g[fd>>2];g[Cd>>2]=+g[kd>>2]*.3826834261417389-+g[pd>>2]*.9238795042037964;g[Dd>>2]=+g[ed>>2]*.3826834261417389+ +g[Zd>>2]*.9238795042037964;g[de>>2]=+g[Cd>>2]-+g[Dd>>2];g[Pf>>2]=+g[Cd>>2]+ +g[Dd>>2];g[ti>>2]=+g[Nh>>2]*.9238795042037964-+g[Oh>>2]*.3826834261417389;g[ui>>2]=+g[Lh>>2]*.9238795042037964+ +g[Kh>>2]*.3826834261417389;g[vi>>2]=+g[ti>>2]-+g[ui>>2];g[Sj>>2]=+g[ti>>2]+ +g[ui>>2];g[Uk>>2]=+g[Qk>>2]-+g[Tk>>2];g[Zk>>2]=+g[Vk>>2]+ +g[Yk>>2];g[_k>>2]=(+g[Uk>>2]-+g[Zk>>2])*.7071067690849304;g[Jn>>2]=(+g[Zk>>2]+ +g[Uk>>2])*.7071067690849304;g[Mh>>2]=+g[Kh>>2]*.9238795042037964-+g[Lh>>2]*.3826834261417389;g[ni>>2]=+g[Nh>>2]*.3826834261417389+ +g[Oh>>2]*.9238795042037964;g[oi>>2]=+g[Mh>>2]-+g[ni>>2];g[Pj>>2]=+g[ni>>2]+ +g[Mh>>2];g[R>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[S>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*35<<2)>>2];g[T>>2]=+g[R>>2]+ +g[S>>2];g[af>>2]=+g[R>>2]-+g[S>>2];g[Ye>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[Ze>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*35<<2)>>2];g[_e>>2]=+g[Ye>>2]-+g[Ze>>2];g[om>>2]=+g[Ye>>2]+ +g[Ze>>2];g[U>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*19<<2)>>2];g[V>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*51<<2)>>2];g[W>>2]=+g[U>>2]+ +g[V>>2];g[Xe>>2]=+g[U>>2]-+g[V>>2];g[bf>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*19<<2)>>2];g[cf>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*51<<2)>>2];g[df>>2]=+g[bf>>2]-+g[cf>>2];g[pm>>2]=+g[bf>>2]+ +g[cf>>2];g[$e>>2]=+g[Xe>>2]+ +g[_e>>2];g[ef>>2]=+g[af>>2]-+g[df>>2];g[rm>>2]=+g[T>>2]-+g[W>>2];g[qm>>2]=+g[om>>2]-+g[pm>>2];g[Ci>>2]=+g[af>>2]+ +g[df>>2];g[Bi>>2]=+g[_e>>2]-+g[Xe>>2];g[Y>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*59<<2)>>2];g[Z>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*27<<2)>>2];g[_>>2]=+g[Y>>2]+ +g[Z>>2];g[ne>>2]=+g[Y>>2]-+g[Z>>2];g[hf>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*59<<2)>>2];g[jf>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*27<<2)>>2];g[kf>>2]=+g[hf>>2]-+g[jf>>2];g[um>>2]=+g[hf>>2]+ +g[jf>>2];g[$>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[aa>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*43<<2)>>2];g[ba>>2]=+g[$>>2]+ +g[aa>>2];g[gf>>2]=+g[$>>2]-+g[aa>>2];g[oe>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*11<<2)>>2];g[pe>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*43<<2)>>2];g[qe>>2]=+g[oe>>2]-+g[pe>>2];g[vm>>2]=+g[oe>>2]+ +g[pe>>2];g[lf>>2]=+g[gf>>2]+ +g[kf>>2];g[re>>2]=+g[ne>>2]-+g[qe>>2];g[wm>>2]=+g[um>>2]-+g[vm>>2];g[tm>>2]=+g[_>>2]-+g[ba>>2];g[Fi>>2]=+g[ne>>2]+ +g[qe>>2];g[Ei>>2]=+g[kf>>2]-+g[gf>>2];g[X>>2]=+g[T>>2]+ +g[W>>2];g[ca>>2]=+g[_>>2]+ +g[ba>>2];g[Ea>>2]=+g[X>>2]+ +g[ca>>2];g[jo>>2]=+g[ca>>2]-+g[X>>2];g[Fm>>2]=+g[tm>>2]-+g[wm>>2];g[Gm>>2]=+g[rm>>2]+ +g[qm>>2];g[Hm>>2]=(+g[Fm>>2]-+g[Gm>>2])*.7071067690849304;g[Qn>>2]=(+g[Gm>>2]+ +g[Fm>>2])*.7071067690849304;g[ap>>2]=+g[om>>2]+ +g[pm>>2];g[bp>>2]=+g[um>>2]+ +g[vm>>2];g[cp>>2]=+g[ap>>2]-+g[bp>>2];g[Np>>2]=+g[ap>>2]+ +g[bp>>2];g[ff>>2]=+g[$e>>2]*.3826834261417389-+g[ef>>2]*.9238795042037964;g[se>>2]=+g[lf>>2]*.3826834261417389+ +g[re>>2]*.9238795042037964;g[te>>2]=+g[ff>>2]-+g[se>>2];g[Wf>>2]=+g[ff>>2]+ +g[se>>2];g[Ee>>2]=+g[re>>2]*.3826834261417389-+g[lf>>2]*.9238795042037964;g[Fe>>2]=+g[$e>>2]*.9238795042037964+ +g[ef>>2]*.3826834261417389;g[Ge>>2]=+g[Ee>>2]-+g[Fe>>2];g[Tf>>2]=+g[Fe>>2]+ +g[Ee>>2];g[Mi>>2]=+g[Fi>>2]*.9238795042037964-+g[Ei>>2]*.3826834261417389;g[Ph>>2]=+g[Bi>>2]*.3826834261417389+ +g[Ci>>2]*.9238795042037964;g[Qh>>2]=+g[Mi>>2]-+g[Ph>>2];g[Yi>>2]=+g[Ph>>2]+ +g[Mi>>2];g[sm>>2]=+g[qm>>2]-+g[rm>>2];g[xm>>2]=+g[tm>>2]+ +g[wm>>2];g[ym>>2]=(+g[sm>>2]-+g[xm>>2])*.7071067690849304;g[Tn>>2]=(+g[sm>>2]+ +g[xm>>2])*.7071067690849304;g[Di>>2]=+g[Bi>>2]*.9238795042037964-+g[Ci>>2]*.3826834261417389;g[Gi>>2]=+g[Ei>>2]*.9238795042037964+ +g[Fi>>2]*.3826834261417389;g[Hi>>2]=+g[Di>>2]-+g[Gi>>2];g[$i>>2]=+g[Di>>2]+ +g[Gi>>2];g[mq>>2]=+g[Yp>>2]+ +g[lq>>2];g[v>>2]=+g[ek>>2]+ +g[tk>>2];g[w>>2]=+g[mq>>2]+ +g[v>>2];g[sp>>2]=+g[mq>>2]-+g[v>>2];g[tp>>2]=+g[Hp>>2]+ +g[Ip>>2];g[up>>2]=+g[Mp>>2]+ +g[Np>>2];g[vp>>2]=+g[tp>>2]-+g[up>>2];g[xp>>2]=+g[tp>>2]+ +g[up>>2];g[za>>2]=+g[ja>>2]+ +g[ya>>2];g[Fa>>2]=+g[Q>>2]+ +g[Ea>>2];g[Ga>>2]=+g[za>>2]+ +g[Fa>>2];g[Ha>>2]=+g[Fa>>2]-+g[za>>2];g[Kb>>2]=+g[ub>>2]+ +g[Jb>>2];g[Qb>>2]=+g[$a>>2]+ +g[Pb>>2];g[Rb>>2]=+g[Kb>>2]-+g[Qb>>2];g[wp>>2]=+g[Kb>>2]+ +g[Qb>>2];g[(c[o>>2]|0)+(c[r>>2]<<5<<2)>>2]=+g[w>>2]-+g[Ga>>2];g[(c[p>>2]|0)+(c[r>>2]<<5<<2)>>2]=+g[wp>>2]-+g[xp>>2];g[c[o>>2]>>2]=+g[w>>2]+ +g[Ga>>2];g[c[p>>2]>>2]=+g[wp>>2]+ +g[xp>>2];g[(c[p>>2]|0)+(c[r>>2]<<4<<2)>>2]=+g[Ha>>2]+ +g[Rb>>2];g[(c[o>>2]|0)+(c[r>>2]<<4<<2)>>2]=+g[sp>>2]+ +g[vp>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*48<<2)>>2]=+g[Rb>>2]-+g[Ha>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*48<<2)>>2]=+g[sp>>2]-+g[vp>>2];g[Dp>>2]=+g[Yp>>2]-+g[lq>>2];g[Ep>>2]=+g[$a>>2]-+g[Pb>>2];g[Fp>>2]=+g[Dp>>2]+ +g[Ep>>2];g[Vp>>2]=+g[Dp>>2]-+g[Ep>>2];g[Rp>>2]=+g[ub>>2]-+g[Jb>>2];g[Sp>>2]=+g[tk>>2]-+g[ek>>2];g[Tp>>2]=+g[Rp>>2]-+g[Sp>>2];g[qp>>2]=+g[Sp>>2]+ +g[Rp>>2];g[Gp>>2]=+g[ja>>2]-+g[ya>>2];g[Jp>>2]=+g[Hp>>2]-+g[Ip>>2];g[Kp>>2]=+g[Gp>>2]+ +g[Jp>>2];g[Wp>>2]=+g[Jp>>2]-+g[Gp>>2];g[Lp>>2]=+g[Q>>2]-+g[Ea>>2];g[Op>>2]=+g[Mp>>2]-+g[Np>>2];g[Pp>>2]=+g[Lp>>2]-+g[Op>>2];g[Xp>>2]=+g[Lp>>2]+ +g[Op>>2];g[Qp>>2]=(+g[Kp>>2]+ +g[Pp>>2])*.7071067690849304;g[(c[o>>2]|0)+((c[r>>2]|0)*40<<2)>>2]=+g[Fp>>2]-+g[Qp>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Fp>>2]+ +g[Qp>>2];g[rp>>2]=(+g[Wp>>2]+ +g[Xp>>2])*.7071067690849304;g[(c[p>>2]|0)+((c[r>>2]|0)*40<<2)>>2]=+g[qp>>2]-+g[rp>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[qp>>2]+ +g[rp>>2];g[Up>>2]=(+g[Pp>>2]-+g[Kp>>2])*.7071067690849304;g[(c[p>>2]|0)+((c[r>>2]|0)*56<<2)>>2]=+g[Tp>>2]-+g[Up>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*24<<2)>>2]=+g[Tp>>2]+ +g[Up>>2];g[pp>>2]=(+g[Wp>>2]-+g[Xp>>2])*.7071067690849304;g[(c[o>>2]|0)+((c[r>>2]|0)*56<<2)>>2]=+g[Vp>>2]-+g[pp>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*24<<2)>>2]=+g[Vp>>2]+ +g[pp>>2];g[Ho>>2]=+g[fo>>2]-+g[Go>>2];g[Bo>>2]=+g[fo>>2]+ +g[Go>>2];g[kp>>2]=+g[oo>>2]+ +g[no>>2];g[po>>2]=+g[no>>2]-+g[oo>>2];g[Ko>>2]=+g[Io>>2]-+g[Jo>>2];g[No>>2]=+g[Lo>>2]+ +g[Mo>>2];g[Oo>>2]=(+g[Ko>>2]-+g[No>>2])*.7071067690849304;g[lp>>2]=(+g[Ko>>2]+ +g[No>>2])*.7071067690849304;g[gp>>2]=+g[$o>>2]+ +g[cp>>2];g[hp>>2]=+g[jo>>2]+ +g[io>>2];g[ip>>2]=+g[gp>>2]*.9238795042037964-+g[hp>>2]*.3826834261417389;g[zp>>2]=+g[hp>>2]*.9238795042037964+ +g[gp>>2]*.3826834261417389;g[qo>>2]=+g[Lo>>2]-+g[Mo>>2];g[ro>>2]=+g[Jo>>2]+ +g[Io>>2];g[so>>2]=(+g[qo>>2]-+g[ro>>2])*.7071067690849304;g[Co>>2]=(+g[ro>>2]+ +g[qo>>2])*.7071067690849304;g[Uo>>2]=+g[So>>2]-+g[To>>2];g[Zo>>2]=+g[Vo>>2]-+g[Yo>>2];g[_o>>2]=+g[Uo>>2]*.9238795042037964+ +g[Zo>>2]*.3826834261417389;g[wo>>2]=+g[Uo>>2]*.3826834261417389-+g[Zo>>2]*.9238795042037964;g[Eo>>2]=+g[To>>2]+ +g[So>>2];g[Fo>>2]=+g[Vo>>2]+ +g[Yo>>2];g[fp>>2]=+g[Eo>>2]*.3826834261417389+ +g[Fo>>2]*.9238795042037964;g[yp>>2]=+g[Eo>>2]*.9238795042037964-+g[Fo>>2]*.3826834261417389;g[dp>>2]=+g[$o>>2]-+g[cp>>2];g[ko>>2]=+g[io>>2]-+g[jo>>2];g[lo>>2]=+g[dp>>2]*.3826834261417389-+g[ko>>2]*.9238795042037964;g[xo>>2]=+g[ko>>2]*.3826834261417389+ +g[dp>>2]*.9238795042037964;g[Po>>2]=+g[Ho>>2]+ +g[Oo>>2];g[mo>>2]=+g[_o>>2]+ +g[lo>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*44<<2)>>2]=+g[Po>>2]-+g[mo>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Po>>2]+ +g[mo>>2];g[zo>>2]=+g[po>>2]+ +g[so>>2];g[Ao>>2]=+g[wo>>2]+ +g[xo>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*44<<2)>>2]=+g[zo>>2]-+g[Ao>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[zo>>2]+ +g[Ao>>2];g[to>>2]=+g[po>>2]-+g[so>>2];g[uo>>2]=+g[lo>>2]-+g[_o>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*60<<2)>>2]=+g[to>>2]-+g[uo>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*28<<2)>>2]=+g[to>>2]+ +g[uo>>2];g[vo>>2]=+g[Ho>>2]-+g[Oo>>2];g[yo>>2]=+g[wo>>2]-+g[xo>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*60<<2)>>2]=+g[vo>>2]-+g[yo>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*28<<2)>>2]=+g[vo>>2]+ +g[yo>>2];g[Do>>2]=+g[Bo>>2]+ +g[Co>>2];g[jp>>2]=+g[fp>>2]+ +g[ip>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*36<<2)>>2]=+g[Do>>2]-+g[jp>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Do>>2]+ +g[jp>>2];g[Bp>>2]=+g[kp>>2]+ +g[lp>>2];g[Cp>>2]=+g[yp>>2]+ +g[zp>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*36<<2)>>2]=+g[Bp>>2]-+g[Cp>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Bp>>2]+ +g[Cp>>2];g[mp>>2]=+g[kp>>2]-+g[lp>>2];g[np>>2]=+g[ip>>2]-+g[fp>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*52<<2)>>2]=+g[mp>>2]-+g[np>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*20<<2)>>2]=+g[mp>>2]+ +g[np>>2];g[op>>2]=+g[Bo>>2]-+g[Co>>2];g[Ap>>2]=+g[yp>>2]-+g[zp>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*52<<2)>>2]=+g[op>>2]-+g[Ap>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*20<<2)>>2]=+g[op>>2]+ +g[Ap>>2];g[yn>>2]=(+g[Rl>>2]+ +g[Ql>>2])*.7071067690849304;g[zn>>2]=+g[xn>>2]-+g[yn>>2];g[ln>>2]=+g[xn>>2]+ +g[yn>>2];g[Zm>>2]=(+g[nl>>2]+ +g[ql>>2])*.7071067690849304;g[_m>>2]=+g[Ym>>2]-+g[Zm>>2];g[vn>>2]=+g[Ym>>2]+ +g[Zm>>2];g[Cn>>2]=+g[An>>2]*.9238795042037964-+g[Bn>>2]*.3826834261417389;g[Fn>>2]=+g[Dn>>2]*.9238795042037964+ +g[En>>2]*.3826834261417389;g[Gn>>2]=+g[Cn>>2]-+g[Fn>>2];g[Xn>>2]=+g[Cn>>2]+ +g[Fn>>2];g[rn>>2]=+g[Pn>>2]+ +g[Qn>>2];g[sn>>2]=+g[Sn>>2]+ +g[Tn>>2];g[tn>>2]=+g[rn>>2]*.9807852506637573-+g[sn>>2]*.19509032368659973;g[ao>>2]=+g[rn>>2]*.19509032368659973+ +g[sn>>2]*.9807852506637573;g[Kn>>2]=+g[In>>2]-+g[Jn>>2];g[Nn>>2]=+g[Ln>>2]-+g[Mn>>2];g[On>>2]=+g[Kn>>2]*.5555702447891235+ +g[Nn>>2]*.8314695954322815;g[fn>>2]=+g[Nn>>2]*.5555702447891235-+g[Kn>>2]*.8314695954322815;g[$m>>2]=+g[En>>2]*.9238795042037964-+g[Dn>>2]*.3826834261417389;g[an>>2]=+g[An>>2]*.3826834261417389+ +g[Bn>>2]*.9238795042037964;g[bn>>2]=+g[$m>>2]-+g[an>>2];g[mn>>2]=+g[an>>2]+ +g[$m>>2];g[on>>2]=+g[In>>2]+ +g[Jn>>2];g[pn>>2]=+g[Ln>>2]+ +g[Mn>>2];g[qn>>2]=+g[on>>2]*.9807852506637573+ +g[pn>>2]*.19509032368659973;g[$n>>2]=+g[pn>>2]*.9807852506637573-+g[on>>2]*.19509032368659973;g[Rn>>2]=+g[Pn>>2]-+g[Qn>>2];g[Un>>2]=+g[Sn>>2]-+g[Tn>>2];g[Vn>>2]=+g[Rn>>2]*.5555702447891235-+g[Un>>2]*.8314695954322815;g[gn>>2]=+g[Rn>>2]*.8314695954322815+ +g[Un>>2]*.5555702447891235;g[Hn>>2]=+g[zn>>2]+ +g[Gn>>2];g[Xm>>2]=+g[On>>2]+ +g[Vn>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*42<<2)>>2]=+g[Hn>>2]-+g[Xm>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Hn>>2]+ +g[Xm>>2];g[jn>>2]=+g[_m>>2]+ +g[bn>>2];g[kn>>2]=+g[fn>>2]+ +g[gn>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*42<<2)>>2]=+g[jn>>2]-+g[kn>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[jn>>2]+ +g[kn>>2];g[cn>>2]=+g[_m>>2]-+g[bn>>2];g[dn>>2]=+g[Vn>>2]-+g[On>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*58<<2)>>2]=+g[cn>>2]-+g[dn>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*26<<2)>>2]=+g[cn>>2]+ +g[dn>>2];g[en>>2]=+g[zn>>2]-+g[Gn>>2];g[hn>>2]=+g[fn>>2]-+g[gn>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*58<<2)>>2]=+g[en>>2]-+g[hn>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*26<<2)>>2]=+g[en>>2]+ +g[hn>>2];g[nn>>2]=+g[ln>>2]+ +g[mn>>2];g[un>>2]=+g[qn>>2]+ +g[tn>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*34<<2)>>2]=+g[nn>>2]-+g[un>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[nn>>2]+ +g[un>>2];g[co>>2]=+g[vn>>2]+ +g[Xn>>2];g[eo>>2]=+g[$n>>2]+ +g[ao>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*34<<2)>>2]=+g[co>>2]-+g[eo>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[co>>2]+ +g[eo>>2];g[Yn>>2]=+g[vn>>2]-+g[Xn>>2];g[Zn>>2]=+g[tn>>2]-+g[qn>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*50<<2)>>2]=+g[Yn>>2]-+g[Zn>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*18<<2)>>2]=+g[Yn>>2]+ +g[Zn>>2];g[_n>>2]=+g[ln>>2]-+g[mn>>2];g[bo>>2]=+g[$n>>2]-+g[ao>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*50<<2)>>2]=+g[_n>>2]-+g[bo>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*18<<2)>>2]=+g[_n>>2]+ +g[bo>>2];g[rl>>2]=(+g[nl>>2]-+g[ql>>2])*.7071067690849304;g[sl>>2]=+g[kl>>2]-+g[rl>>2];g[dm>>2]=+g[kl>>2]+ +g[rl>>2];g[Sl>>2]=(+g[Ql>>2]-+g[Rl>>2])*.7071067690849304;g[Tl>>2]=+g[Pl>>2]-+g[Sl>>2];g[Om>>2]=+g[Pl>>2]+ +g[Sl>>2];g[zl>>2]=+g[vl>>2]*.3826834261417389-+g[yl>>2]*.9238795042037964;g[Ik>>2]=+g[Cl>>2]*.3826834261417389+ +g[Hk>>2]*.9238795042037964;g[Jk>>2]=+g[zl>>2]-+g[Ik>>2];g[Pm>>2]=+g[zl>>2]+ +g[Ik>>2];g[jm>>2]=+g[nm>>2]+ +g[ym>>2];g[km>>2]=+g[Em>>2]+ +g[Hm>>2];g[lm>>2]=+g[jm>>2]*.8314695954322815-+g[km>>2]*.5555702447891235;g[Um>>2]=+g[km>>2]*.8314695954322815+ +g[jm>>2]*.5555702447891235;g[$k>>2]=+g[Pk>>2]-+g[_k>>2];g[Jl>>2]=+g[Fl>>2]-+g[Il>>2];g[Kl>>2]=+g[$k>>2]*.9807852506637573+ +g[Jl>>2]*.19509032368659973;g[_l>>2]=+g[$k>>2]*.19509032368659973-+g[Jl>>2]*.9807852506637573;g[Ul>>2]=+g[Hk>>2]*.3826834261417389-+g[Cl>>2]*.9238795042037964;g[Vl>>2]=+g[vl>>2]*.9238795042037964+ +g[yl>>2]*.3826834261417389;g[Wl>>2]=+g[Ul>>2]-+g[Vl>>2];g[em>>2]=+g[Vl>>2]+ +g[Ul>>2];g[gm>>2]=+g[Pk>>2]+ +g[_k>>2];g[hm>>2]=+g[Fl>>2]+ +g[Il>>2];g[im>>2]=+g[gm>>2]*.5555702447891235+ +g[hm>>2]*.8314695954322815;g[Tm>>2]=+g[gm>>2]*.8314695954322815-+g[hm>>2]*.5555702447891235;g[zm>>2]=+g[nm>>2]-+g[ym>>2];g[Im>>2]=+g[Em>>2]-+g[Hm>>2];g[Jm>>2]=+g[zm>>2]*.19509032368659973-+g[Im>>2]*.9807852506637573;g[$l>>2]=+g[Im>>2]*.19509032368659973+ +g[zm>>2]*.9807852506637573;g[Kk>>2]=+g[sl>>2]+ +g[Jk>>2];g[Km>>2]=+g[Kl>>2]+ +g[Jm>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*46<<2)>>2]=+g[Kk>>2]-+g[Km>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Kk>>2]+ +g[Km>>2];g[bm>>2]=+g[Tl>>2]+ +g[Wl>>2];g[cm>>2]=+g[_l>>2]+ +g[$l>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*46<<2)>>2]=+g[bm>>2]-+g[cm>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[bm>>2]+ +g[cm>>2];g[Xl>>2]=+g[Tl>>2]-+g[Wl>>2];g[Yl>>2]=+g[Jm>>2]-+g[Kl>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*62<<2)>>2]=+g[Xl>>2]-+g[Yl>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*30<<2)>>2]=+g[Xl>>2]+ +g[Yl>>2];g[Zl>>2]=+g[sl>>2]-+g[Jk>>2];g[am>>2]=+g[_l>>2]-+g[$l>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*62<<2)>>2]=+g[Zl>>2]-+g[am>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*30<<2)>>2]=+g[Zl>>2]+ +g[am>>2];g[fm>>2]=+g[dm>>2]+ +g[em>>2];g[Nm>>2]=+g[im>>2]+ +g[lm>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*38<<2)>>2]=+g[fm>>2]-+g[Nm>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[fm>>2]+ +g[Nm>>2];g[Wm>>2]=+g[Om>>2]+ +g[Pm>>2];g[wn>>2]=+g[Tm>>2]+ +g[Um>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*38<<2)>>2]=+g[Wm>>2]-+g[wn>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Wm>>2]+ +g[wn>>2];g[Qm>>2]=+g[Om>>2]-+g[Pm>>2];g[Rm>>2]=+g[lm>>2]-+g[im>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*54<<2)>>2]=+g[Qm>>2]-+g[Rm>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*22<<2)>>2]=+g[Qm>>2]+ +g[Rm>>2];g[Sm>>2]=+g[dm>>2]-+g[em>>2];g[Vm>>2]=+g[Tm>>2]-+g[Um>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*54<<2)>>2]=+g[Sm>>2]-+g[Vm>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*22<<2)>>2]=+g[Sm>>2]+ +g[Vm>>2];g[Af>>2]=+g[Ub>>2]+ +g[zc>>2];g[Bf>>2]=+g[sf>>2]+ +g[rf>>2];g[Cf>>2]=+g[Af>>2]-+g[Bf>>2];g[lh>>2]=+g[Af>>2]+ +g[Bf>>2];g[oh>>2]=+g[Lf>>2]+ +g[Mf>>2];g[ph>>2]=+g[Of>>2]+ +g[Pf>>2];g[qh>>2]=+g[oh>>2]*.9569403529167175+ +g[ph>>2]*.290284663438797;g[Ah>>2]=+g[ph>>2]*.9569403529167175-+g[oh>>2]*.290284663438797;g[rh>>2]=+g[Sf>>2]+ +g[Tf>>2];g[sh>>2]=+g[Vf>>2]+ +g[Wf>>2];g[th>>2]=+g[rh>>2]*.9569403529167175-+g[sh>>2]*.290284663438797;g[Bh>>2]=+g[rh>>2]*.290284663438797+ +g[sh>>2]*.9569403529167175;g[Ff>>2]=+g[Df>>2]*.8314695954322815-+g[Ef>>2]*.5555702447891235;g[If>>2]=+g[Gf>>2]*.8314695954322815+ +g[Hf>>2]*.5555702447891235;g[Jf>>2]=+g[Ff>>2]-+g[If>>2];g[wh>>2]=+g[Ff>>2]+ +g[If>>2];g[Nf>>2]=+g[Lf>>2]-+g[Mf>>2];g[Qf>>2]=+g[Of>>2]-+g[Pf>>2];g[Rf>>2]=+g[Nf>>2]*.4713967442512512+ +g[Qf>>2]*.8819212913513184;g[gh>>2]=+g[Qf>>2]*.4713967442512512-+g[Nf>>2]*.8819212913513184;g[zg>>2]=+g[Me>>2]+ +g[pf>>2];g[Ag>>2]=+g[Hc>>2]+ +g[Oc>>2];g[Bg>>2]=+g[zg>>2]-+g[Ag>>2];g[vh>>2]=+g[zg>>2]+ +g[Ag>>2];g[Cg>>2]=+g[Hf>>2]*.8314695954322815-+g[Gf>>2]*.5555702447891235;g[Dg>>2]=+g[Df>>2]*.5555702447891235+ +g[Ef>>2]*.8314695954322815;g[Eg>>2]=+g[Cg>>2]-+g[Dg>>2];g[mh>>2]=+g[Dg>>2]+ +g[Cg>>2];g[Uf>>2]=+g[Sf>>2]-+g[Tf>>2];g[wg>>2]=+g[Vf>>2]-+g[Wf>>2];g[xg>>2]=+g[Uf>>2]*.4713967442512512-+g[wg>>2]*.8819212913513184;g[hh>>2]=+g[Uf>>2]*.8819212913513184+ +g[wg>>2]*.4713967442512512;g[Kf>>2]=+g[Cf>>2]+ +g[Jf>>2];g[yg>>2]=+g[Rf>>2]+ +g[xg>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*43<<2)>>2]=+g[Kf>>2]-+g[yg>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Kf>>2]+ +g[yg>>2];g[jh>>2]=+g[Bg>>2]+ +g[Eg>>2];g[kh>>2]=+g[gh>>2]+ +g[hh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*43<<2)>>2]=+g[jh>>2]-+g[kh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[jh>>2]+ +g[kh>>2];g[Fg>>2]=+g[Bg>>2]-+g[Eg>>2];g[eh>>2]=+g[xg>>2]-+g[Rf>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*59<<2)>>2]=+g[Fg>>2]-+g[eh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*27<<2)>>2]=+g[Fg>>2]+ +g[eh>>2];g[fh>>2]=+g[Cf>>2]-+g[Jf>>2];g[ih>>2]=+g[gh>>2]-+g[hh>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*59<<2)>>2]=+g[fh>>2]-+g[ih>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*27<<2)>>2]=+g[fh>>2]+ +g[ih>>2];g[nh>>2]=+g[lh>>2]+ +g[mh>>2];g[uh>>2]=+g[qh>>2]+ +g[th>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*35<<2)>>2]=+g[nh>>2]-+g[uh>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[nh>>2]+ +g[uh>>2];g[Dh>>2]=+g[vh>>2]+ +g[wh>>2];g[Gg>>2]=+g[Ah>>2]+ +g[Bh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*35<<2)>>2]=+g[Dh>>2]-+g[Gg>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Dh>>2]+ +g[Gg>>2];g[xh>>2]=+g[vh>>2]-+g[wh>>2];g[yh>>2]=+g[th>>2]-+g[qh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*51<<2)>>2]=+g[xh>>2]-+g[yh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*19<<2)>>2]=+g[xh>>2]+ +g[yh>>2];g[zh>>2]=+g[lh>>2]-+g[mh>>2];g[Ch>>2]=+g[Ah>>2]-+g[Bh>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*51<<2)>>2]=+g[zh>>2]-+g[Ch>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*19<<2)>>2]=+g[zh>>2]+ +g[Ch>>2];g[Dj>>2]=+g[Hg>>2]+ +g[Ig>>2];g[Ej>>2]=+g[Yh>>2]+ +g[Xh>>2];g[Fj>>2]=+g[Dj>>2]-+g[Ej>>2];g[rj>>2]=+g[Dj>>2]+ +g[Ej>>2];g[uj>>2]=+g[Oj>>2]+ +g[Pj>>2];g[vj>>2]=+g[Rj>>2]+ +g[Sj>>2];g[vk>>2]=+g[uj>>2]*.9951847195625305+ +g[vj>>2]*.0980171412229538;g[dl>>2]=+g[vj>>2]*.9951847195625305-+g[uj>>2]*.0980171412229538;g[wk>>2]=+g[Vj>>2]+ +g[Yi>>2];g[xk>>2]=+g[_i>>2]+ +g[$i>>2];g[yk>>2]=+g[wk>>2]*.9951847195625305-+g[xk>>2]*.0980171412229538;g[el>>2]=+g[wk>>2]*.0980171412229538+ +g[xk>>2]*.9951847195625305;g[Ij>>2]=+g[Gj>>2]*.9807852506637573-+g[Hj>>2]*.19509032368659973;g[Lj>>2]=+g[Jj>>2]*.19509032368659973+ +g[Kj>>2]*.9807852506637573;g[Mj>>2]=+g[Ij>>2]-+g[Lj>>2];g[Bk>>2]=+g[Ij>>2]+ +g[Lj>>2];g[Qj>>2]=+g[Oj>>2]-+g[Pj>>2];g[Tj>>2]=+g[Rj>>2]-+g[Sj>>2];g[Uj>>2]=+g[Qj>>2]*.6343932747840881+ +g[Tj>>2]*.7730104327201843;g[mj>>2]=+g[Tj>>2]*.6343932747840881-+g[Qj>>2]*.7730104327201843;g[dj>>2]=+g[Uh>>2]+ +g[Vh>>2];g[ej>>2]=+g[Mg>>2]+ +g[Pg>>2];g[fj>>2]=+g[dj>>2]-+g[ej>>2];g[Ak>>2]=+g[dj>>2]+ +g[ej>>2];g[gj>>2]=+g[Jj>>2]*.9807852506637573-+g[Kj>>2]*.19509032368659973;g[hj>>2]=+g[Hj>>2]*.9807852506637573+ +g[Gj>>2]*.19509032368659973;g[ij>>2]=+g[gj>>2]-+g[hj>>2];g[sj>>2]=+g[hj>>2]+ +g[gj>>2];g[Zi>>2]=+g[Vj>>2]-+g[Yi>>2];g[aj>>2]=+g[_i>>2]-+g[$i>>2];g[bj>>2]=+g[Zi>>2]*.6343932747840881-+g[aj>>2]*.7730104327201843;g[nj>>2]=+g[Zi>>2]*.7730104327201843+ +g[aj>>2]*.6343932747840881;g[Nj>>2]=+g[Fj>>2]+ +g[Mj>>2];g[cj>>2]=+g[Uj>>2]+ +g[bj>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*41<<2)>>2]=+g[Nj>>2]-+g[cj>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Nj>>2]+ +g[cj>>2];g[pj>>2]=+g[fj>>2]+ +g[ij>>2];g[qj>>2]=+g[mj>>2]+ +g[nj>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*41<<2)>>2]=+g[pj>>2]-+g[qj>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[pj>>2]+ +g[qj>>2];g[jj>>2]=+g[fj>>2]-+g[ij>>2];g[kj>>2]=+g[bj>>2]-+g[Uj>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*57<<2)>>2]=+g[jj>>2]-+g[kj>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*25<<2)>>2]=+g[jj>>2]+ +g[kj>>2];g[lj>>2]=+g[Fj>>2]-+g[Mj>>2];g[oj>>2]=+g[mj>>2]-+g[nj>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*57<<2)>>2]=+g[lj>>2]-+g[oj>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*25<<2)>>2]=+g[lj>>2]+ +g[oj>>2];g[tj>>2]=+g[rj>>2]+ +g[sj>>2];g[zk>>2]=+g[vk>>2]+ +g[yk>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*33<<2)>>2]=+g[tj>>2]-+g[zk>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[tj>>2]+ +g[zk>>2];g[gl>>2]=+g[Ak>>2]+ +g[Bk>>2];g[hl>>2]=+g[dl>>2]+ +g[el>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*33<<2)>>2]=+g[gl>>2]-+g[hl>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[gl>>2]+ +g[hl>>2];g[Ck>>2]=+g[Ak>>2]-+g[Bk>>2];g[Dk>>2]=+g[yk>>2]-+g[vk>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*49<<2)>>2]=+g[Ck>>2]-+g[Dk>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*17<<2)>>2]=+g[Ck>>2]+ +g[Dk>>2];g[Ek>>2]=+g[rj>>2]-+g[sj>>2];g[fl>>2]=+g[dl>>2]-+g[el>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*49<<2)>>2]=+g[Ek>>2]-+g[fl>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*17<<2)>>2]=+g[Ek>>2]+ +g[fl>>2];g[Ac>>2]=+g[Ub>>2]-+g[zc>>2];g[Pc>>2]=+g[Hc>>2]-+g[Oc>>2];g[Qc>>2]=+g[Ac>>2]-+g[Pc>>2];g[eg>>2]=+g[Ac>>2]+ +g[Pc>>2];g[hg>>2]=+g[Ud>>2]+ +g[rd>>2];g[ig>>2]=+g[Bd>>2]+ +g[de>>2];g[jg>>2]=+g[hg>>2]*.6343932747840881+ +g[ig>>2]*.7730104327201843;g[tg>>2]=+g[hg>>2]*.7730104327201843-+g[ig>>2]*.6343932747840881;g[kg>>2]=+g[We>>2]+ +g[te>>2];g[lg>>2]=+g[De>>2]+ +g[Ge>>2];g[mg>>2]=+g[kg>>2]*.7730104327201843-+g[lg>>2]*.6343932747840881;g[ug>>2]=+g[lg>>2]*.7730104327201843+ +g[kg>>2]*.6343932747840881;g[jc>>2]=+g[bc>>2]*.19509032368659973-+g[ic>>2]*.9807852506637573;g[bd>>2]=+g[uc>>2]*.19509032368659973+ +g[ad>>2]*.9807852506637573;g[cd>>2]=+g[jc>>2]-+g[bd>>2];g[pg>>2]=+g[jc>>2]+ +g[bd>>2];g[sd>>2]=+g[Ud>>2]-+g[rd>>2];g[ee>>2]=+g[Bd>>2]-+g[de>>2];g[fe>>2]=+g[sd>>2]*.9951847195625305+ +g[ee>>2]*.0980171412229538;g[$f>>2]=+g[sd>>2]*.0980171412229538-+g[ee>>2]*.9951847195625305;g[qf>>2]=+g[Me>>2]-+g[pf>>2];g[tf>>2]=+g[rf>>2]-+g[sf>>2];g[uf>>2]=+g[qf>>2]-+g[tf>>2];g[og>>2]=+g[qf>>2]+ +g[tf>>2];g[vf>>2]=+g[ad>>2]*.19509032368659973-+g[uc>>2]*.9807852506637573;g[wf>>2]=+g[bc>>2]*.9807852506637573+ +g[ic>>2]*.19509032368659973;g[Xf>>2]=+g[vf>>2]-+g[wf>>2];g[fg>>2]=+g[wf>>2]+ +g[vf>>2];g[ue>>2]=+g[We>>2]-+g[te>>2];g[He>>2]=+g[De>>2]-+g[Ge>>2];g[Ie>>2]=+g[ue>>2]*.0980171412229538-+g[He>>2]*.9951847195625305;g[ag>>2]=+g[He>>2]*.0980171412229538+ +g[ue>>2]*.9951847195625305;g[dd>>2]=+g[Qc>>2]+ +g[cd>>2];g[Je>>2]=+g[fe>>2]+ +g[Ie>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*47<<2)>>2]=+g[dd>>2]-+g[Je>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[dd>>2]+ +g[Je>>2];g[cg>>2]=+g[uf>>2]+ +g[Xf>>2];g[dg>>2]=+g[$f>>2]+ +g[ag>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*47<<2)>>2]=+g[cg>>2]-+g[dg>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[cg>>2]+ +g[dg>>2];g[Yf>>2]=+g[uf>>2]-+g[Xf>>2];g[Zf>>2]=+g[Ie>>2]-+g[fe>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*63<<2)>>2]=+g[Yf>>2]-+g[Zf>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*31<<2)>>2]=+g[Yf>>2]+ +g[Zf>>2];g[_f>>2]=+g[Qc>>2]-+g[cd>>2];g[bg>>2]=+g[$f>>2]-+g[ag>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*63<<2)>>2]=+g[_f>>2]-+g[bg>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*31<<2)>>2]=+g[_f>>2]+ +g[bg>>2];g[gg>>2]=+g[eg>>2]+ +g[fg>>2];g[ng>>2]=+g[jg>>2]+ +g[mg>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*39<<2)>>2]=+g[gg>>2]-+g[ng>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[gg>>2]+ +g[ng>>2];g[yf>>2]=+g[og>>2]+ +g[pg>>2];g[zf>>2]=+g[tg>>2]+ +g[ug>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*39<<2)>>2]=+g[yf>>2]-+g[zf>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[yf>>2]+ +g[zf>>2];g[qg>>2]=+g[og>>2]-+g[pg>>2];g[rg>>2]=+g[mg>>2]-+g[jg>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*55<<2)>>2]=+g[qg>>2]-+g[rg>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*23<<2)>>2]=+g[qg>>2]+ +g[rg>>2];g[sg>>2]=+g[eg>>2]-+g[fg>>2];g[xf>>2]=+g[tg>>2]-+g[ug>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*55<<2)>>2]=+g[sg>>2]-+g[xf>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*23<<2)>>2]=+g[sg>>2]+ +g[xf>>2];g[Jg>>2]=+g[Hg>>2]-+g[Ig>>2];g[Qg>>2]=+g[Mg>>2]-+g[Pg>>2];g[Rg>>2]=+g[Jg>>2]-+g[Qg>>2];g[ki>>2]=+g[Jg>>2]+ +g[Qg>>2];g[Oi>>2]=+g[Jh>>2]+ +g[oi>>2];g[Pi>>2]=+g[si>>2]+ +g[vi>>2];g[Qi>>2]=+g[Oi>>2]*.4713967442512512+ +g[Pi>>2]*.8819212913513184;g[yj>>2]=+g[Oi>>2]*.8819212913513184-+g[Pi>>2]*.4713967442512512;g[Ri>>2]=+g[Ai>>2]+ +g[Hi>>2];g[Si>>2]=+g[Li>>2]+ +g[Qh>>2];g[Ti>>2]=+g[Ri>>2]*.8819212913513184-+g[Si>>2]*.4713967442512512;g[zj>>2]=+g[Si>>2]*.8819212913513184+ +g[Ri>>2]*.4713967442512512;g[Yg>>2]=+g[Ug>>2]*.5555702447891235-+g[Xg>>2]*.8314695954322815;g[dh>>2]=+g[$g>>2]*.8314695954322815+ +g[ch>>2]*.5555702447891235;g[Fh>>2]=+g[Yg>>2]-+g[dh>>2];g[Wi>>2]=+g[Yg>>2]+ +g[dh>>2];g[pi>>2]=+g[Jh>>2]-+g[oi>>2];g[wi>>2]=+g[si>>2]-+g[vi>>2];g[xi>>2]=+g[pi>>2]*.9569403529167175+ +g[wi>>2]*.290284663438797;g[fi>>2]=+g[pi>>2]*.290284663438797-+g[wi>>2]*.9569403529167175;g[Wh>>2]=+g[Uh>>2]-+g[Vh>>2];g[Zh>>2]=+g[Xh>>2]-+g[Yh>>2];g[_h>>2]=+g[Wh>>2]-+g[Zh>>2];g[Vi>>2]=+g[Wh>>2]+ +g[Zh>>2];g[$h>>2]=+g[$g>>2]*.5555702447891235-+g[ch>>2]*.8314695954322815;g[ai>>2]=+g[Xg>>2]*.5555702447891235+ +g[Ug>>2]*.8314695954322815;g[bi>>2]=+g[$h>>2]-+g[ai>>2];g[li>>2]=+g[ai>>2]+ +g[$h>>2];g[Ii>>2]=+g[Ai>>2]-+g[Hi>>2];g[Rh>>2]=+g[Li>>2]-+g[Qh>>2];g[Sh>>2]=+g[Ii>>2]*.290284663438797-+g[Rh>>2]*.9569403529167175;g[gi>>2]=+g[Rh>>2]*.290284663438797+ +g[Ii>>2]*.9569403529167175;g[Gh>>2]=+g[Rg>>2]+ +g[Fh>>2];g[Th>>2]=+g[xi>>2]+ +g[Sh>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*45<<2)>>2]=+g[Gh>>2]-+g[Th>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Gh>>2]+ +g[Th>>2];g[ii>>2]=+g[_h>>2]+ +g[bi>>2];g[ji>>2]=+g[fi>>2]+ +g[gi>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*45<<2)>>2]=+g[ii>>2]-+g[ji>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[ii>>2]+ +g[ji>>2];g[ci>>2]=+g[_h>>2]-+g[bi>>2];g[di>>2]=+g[Sh>>2]-+g[xi>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*61<<2)>>2]=+g[ci>>2]-+g[di>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*29<<2)>>2]=+g[ci>>2]+ +g[di>>2];g[ei>>2]=+g[Rg>>2]-+g[Fh>>2];g[hi>>2]=+g[fi>>2]-+g[gi>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*61<<2)>>2]=+g[ei>>2]-+g[hi>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*29<<2)>>2]=+g[ei>>2]+ +g[hi>>2];g[mi>>2]=+g[ki>>2]+ +g[li>>2];g[Ui>>2]=+g[Qi>>2]+ +g[Ti>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*37<<2)>>2]=+g[mi>>2]-+g[Ui>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[mi>>2]+ +g[Ui>>2];g[Bj>>2]=+g[Vi>>2]+ +g[Wi>>2];g[Cj>>2]=+g[yj>>2]+ +g[zj>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*37<<2)>>2]=+g[Bj>>2]-+g[Cj>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Bj>>2]+ +g[Cj>>2];g[Xi>>2]=+g[Vi>>2]-+g[Wi>>2];g[wj>>2]=+g[Ti>>2]-+g[Qi>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*53<<2)>>2]=+g[Xi>>2]-+g[wj>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*21<<2)>>2]=+g[Xi>>2]+ +g[wj>>2];g[xj>>2]=+g[ki>>2]-+g[li>>2];g[Aj>>2]=+g[yj>>2]-+g[zj>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*53<<2)>>2]=+g[xj>>2]-+g[Aj>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*21<<2)>>2]=+g[xj>>2]+ +g[Aj>>2];c[rq>>2]=(c[rq>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=c[q>>2]^c[2998];c[r>>2]=c[r>>2]^c[2998]}i=sq;return}function Hi(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,16,968);i=b;return}function Ii(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0;da=i;i=i+192|0;m=da+188|0;n=da+184|0;o=da+180|0;p=da+176|0;q=da+172|0;r=da+168|0;ea=da+164|0;s=da+160|0;t=da+156|0;ca=da+144|0;w=da+140|0;F=da+136|0;U=da+132|0;$=da+128|0;z=da+124|0;G=da+120|0;C=da+116|0;H=da+112|0;E=da+108|0;I=da+104|0;M=da+100|0;Y=da+96|0;P=da+92|0;Z=da+88|0;V=da+84|0;aa=da+80|0;u=da+76|0;v=da+72|0;S=da+68|0;T=da+64|0;x=da+60|0;y=da+56|0;A=da+52|0;B=da+48|0;K=da+44|0;L=da+40|0;N=da+36|0;O=da+32|0;J=da+28|0;Q=da+24|0;R=da+20|0;W=da+16|0;X=da+12|0;_=da+8|0;ba=da+4|0;D=da;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[ea>>2]=j;c[s>>2]=k;c[t>>2]=l;g[da+152>>2]=.8660253882408142;g[da+148>>2]=.5;c[ca>>2]=c[ea>>2];while(1){if((c[ca>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[v>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[w>>2]=+g[u>>2]-+g[v>>2];g[F>>2]=+g[u>>2]+ +g[v>>2];g[S>>2]=+g[c[n>>2]>>2];g[T>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[U>>2]=+g[S>>2]-+g[T>>2];g[$>>2]=+g[S>>2]+ +g[T>>2];g[x>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[y>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[G>>2]=+g[x>>2]+ +g[y>>2];g[A>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2<<2)>>2];g[B>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[C>>2]=+g[A>>2]-+g[B>>2];g[H>>2]=+g[A>>2]+ +g[B>>2];g[E>>2]=+g[z>>2]+ +g[C>>2];g[I>>2]=+g[G>>2]+ +g[H>>2];g[K>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[L>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[M>>2]=+g[K>>2]-+g[L>>2];g[Y>>2]=+g[K>>2]+ +g[L>>2];g[N>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2<<2)>>2];g[O>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[P>>2]=+g[N>>2]-+g[O>>2];g[Z>>2]=+g[N>>2]+ +g[O>>2];g[V>>2]=+g[M>>2]+ +g[P>>2];g[aa>>2]=+g[Y>>2]+ +g[Z>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[w>>2]+ +g[E>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[U>>2]+ +g[V>>2];g[c[o>>2]>>2]=+g[F>>2]+ +g[I>>2];g[c[p>>2]>>2]=+g[$>>2]+ +g[aa>>2];g[J>>2]=+g[w>>2]-+g[E>>2]*.5;g[Q>>2]=(+g[M>>2]-+g[P>>2])*.8660253882408142;g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[J>>2]-+g[Q>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[J>>2]+ +g[Q>>2];g[R>>2]=(+g[C>>2]-+g[z>>2])*.8660253882408142;g[W>>2]=+g[U>>2]-+g[V>>2]*.5;g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[R>>2]+ +g[W>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[W>>2]-+g[R>>2];g[X>>2]=+g[F>>2]-+g[I>>2]*.5;g[_>>2]=(+g[Y>>2]-+g[Z>>2])*.8660253882408142;g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[X>>2]-+g[_>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[X>>2]+ +g[_>>2];g[ba>>2]=+g[$>>2]-+g[aa>>2]*.5;g[D>>2]=(+g[H>>2]-+g[G>>2])*.8660253882408142;g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ba>>2]-+g[D>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[D>>2]+ +g[ba>>2];c[ca>>2]=(c[ca>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=c[q>>2]^c[2998];c[r>>2]=c[r>>2]^c[2998]}i=da;return}function Ji(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,17,1032);i=b;return}function Ki(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0;fa=i;i=i+224|0;m=fa+212|0;n=fa+208|0;o=fa+204|0;p=fa+200|0;q=fa+196|0;r=fa+192|0;ga=fa+188|0;s=fa+184|0;t=fa+180|0;ea=fa+152|0;u=fa+148|0;_=fa+144|0;x=fa+140|0;W=fa+136|0;K=fa+132|0;ba=fa+128|0;A=fa+124|0;Y=fa+120|0;Q=fa+116|0;$=fa+112|0;G=fa+108|0;X=fa+104|0;N=fa+100|0;aa=fa+96|0;v=fa+92|0;w=fa+88|0;I=fa+84|0;J=fa+80|0;y=fa+76|0;z=fa+72|0;O=fa+68|0;P=fa+64|0;B=fa+60|0;C=fa+56|0;L=fa+52|0;M=fa+48|0;R=fa+44|0;H=fa+40|0;E=fa+36|0;F=fa+32|0;T=fa+28|0;S=fa+24|0;da=fa+20|0;D=fa+16|0;V=fa+12|0;U=fa+8|0;Z=fa+4|0;ca=fa;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[ga>>2]=j;c[s>>2]=k;c[t>>2]=l;g[fa+176>>2]=.22252093255519867;g[fa+172>>2]=.9009688496589661;g[fa+168>>2]=.6234897971153259;g[fa+164>>2]=.4338837265968323;g[fa+160>>2]=.7818315029144287;g[fa+156>>2]=.9749279022216797;c[ea>>2]=c[ga>>2];while(1){if((c[ea>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[_>>2]=+g[c[n>>2]>>2];g[v>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[w>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[x>>2]=+g[v>>2]+ +g[w>>2];g[W>>2]=+g[w>>2]-+g[v>>2];g[I>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[J>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[K>>2]=+g[I>>2]-+g[J>>2];g[ba>>2]=+g[I>>2]+ +g[J>>2];g[y>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[z>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[A>>2]=+g[y>>2]+ +g[z>>2];g[Y>>2]=+g[z>>2]-+g[y>>2];g[O>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[P>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[Q>>2]=+g[O>>2]-+g[P>>2];g[$>>2]=+g[O>>2]+ +g[P>>2];g[B>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[C>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2<<2)>>2];g[G>>2]=+g[B>>2]+ +g[C>>2];g[X>>2]=+g[C>>2]-+g[B>>2];g[L>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[M>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2<<2)>>2];g[N>>2]=+g[L>>2]-+g[M>>2];g[aa>>2]=+g[L>>2]+ +g[M>>2];g[c[o>>2]>>2]=+g[u>>2]+ +g[x>>2]+ +g[A>>2]+ +g[G>>2];g[c[p>>2]>>2]=+g[_>>2]+ +g[ba>>2]+ +g[$>>2]+ +g[aa>>2];g[R>>2]=+g[K>>2]*.9749279022216797-+g[N>>2]*.7818315029144287-+g[Q>>2]*.4338837265968323;g[H>>2]=+g[G>>2]*.6234897971153259+ +g[u>>2]+-(+g[A>>2]*.9009688496589661+ +g[x>>2]*.22252093255519867);g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[H>>2]-+g[R>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[H>>2]+ +g[R>>2];g[E>>2]=+g[W>>2]*.9749279022216797-+g[X>>2]*.7818315029144287-+g[Y>>2]*.4338837265968323;g[F>>2]=+g[aa>>2]*.6234897971153259+ +g[_>>2]+-(+g[$>>2]*.9009688496589661+ +g[ba>>2]*.22252093255519867);g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[E>>2]+ +g[F>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[F>>2]-+g[E>>2];g[T>>2]=+g[K>>2]*.7818315029144287+ +g[Q>>2]*.9749279022216797+ +g[N>>2]*.4338837265968323;g[S>>2]=+g[x>>2]*.6234897971153259+ +g[u>>2]+-(+g[G>>2]*.9009688496589661+ +g[A>>2]*.22252093255519867);g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[S>>2]-+g[T>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[S>>2]+ +g[T>>2];g[da>>2]=+g[W>>2]*.7818315029144287+ +g[Y>>2]*.9749279022216797+ +g[X>>2]*.4338837265968323;g[D>>2]=+g[ba>>2]*.6234897971153259+ +g[_>>2]+-(+g[aa>>2]*.9009688496589661+ +g[$>>2]*.22252093255519867);g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[da>>2]+ +g[D>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[D>>2]-+g[da>>2];g[V>>2]=+g[K>>2]*.4338837265968323+ +g[N>>2]*.9749279022216797-+g[Q>>2]*.7818315029144287;g[U>>2]=+g[A>>2]*.6234897971153259+ +g[u>>2]+-(+g[G>>2]*.22252093255519867+ +g[x>>2]*.9009688496589661);g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[U>>2]-+g[V>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[U>>2]+ +g[V>>2];g[Z>>2]=+g[W>>2]*.4338837265968323+ +g[X>>2]*.9749279022216797-+g[Y>>2]*.7818315029144287;g[ca>>2]=+g[$>>2]*.6234897971153259+ +g[_>>2]+-(+g[aa>>2]*.22252093255519867+ +g[ba>>2]*.9009688496589661);g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Z>>2]+ +g[ca>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[ca>>2]-+g[Z>>2];c[ea>>2]=(c[ea>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=c[q>>2]^c[2998];c[r>>2]=c[r>>2]^c[2998]}i=fa;return}function Li(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,18,1096);i=b;return}function Mi(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0;ta=i;i=i+256|0;m=ta+248|0;n=ta+244|0;o=ta+240|0;p=ta+236|0;q=ta+232|0;r=ta+228|0;ua=ta+224|0;s=ta+220|0;t=ta+216|0;sa=ta+208|0;w=ta+204|0;fa=ta+200|0;aa=ta+196|0;F=ta+192|0;z=ta+188|0;E=ta+184|0;da=ta+180|0;ga=ta+176|0;X=ta+172|0;Q=ta+168|0;ra=ta+164|0;K=ta+160|0;U=ta+156|0;P=ta+152|0;ma=ta+148|0;J=ta+144|0;u=ta+140|0;v=ta+136|0;ba=ta+132|0;ca=ta+128|0;_=ta+124|0;$=ta+120|0;x=ta+116|0;y=ta+112|0;V=ta+108|0;W=ta+104|0;na=ta+100|0;oa=ta+96|0;pa=ta+92|0;qa=ta+88|0;B=ta+84|0;C=ta+80|0;ia=ta+76|0;ja=ta+72|0;ka=ta+68|0;la=ta+64|0;A=ta+60|0;Y=ta+56|0;S=ta+52|0;T=ta+48|0;Z=ta+44|0;ea=ta+40|0;O=ta+36|0;R=ta+32|0;ha=ta+28|0;D=ta+24|0;M=ta+20|0;N=ta+16|0;G=ta+12|0;H=ta+8|0;I=ta+4|0;L=ta;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[ua>>2]=j;c[s>>2]=k;c[t>>2]=l;g[ta+212>>2]=.7071067690849304;c[sa>>2]=c[ua>>2];while(1){if((c[sa>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[v>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2<<2)>>2];g[w>>2]=+g[u>>2]+ +g[v>>2];g[fa>>2]=+g[u>>2]-+g[v>>2];g[_>>2]=+g[c[n>>2]>>2];g[$>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2<<2)>>2];g[aa>>2]=+g[_>>2]+ +g[$>>2];g[F>>2]=+g[_>>2]-+g[$>>2];g[x>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[y>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[E>>2]=+g[x>>2]-+g[y>>2];g[ba>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[ca>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[da>>2]=+g[ba>>2]+ +g[ca>>2];g[ga>>2]=+g[ba>>2]-+g[ca>>2];g[V>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[W>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[na>>2]=+g[V>>2]-+g[W>>2];g[oa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[pa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[qa>>2]=+g[oa>>2]-+g[pa>>2];g[X>>2]=+g[V>>2]+ +g[W>>2];g[Q>>2]=+g[oa>>2]+ +g[pa>>2];g[ra>>2]=+g[na>>2]-+g[qa>>2];g[K>>2]=+g[na>>2]+ +g[qa>>2];g[B>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[C>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[ia>>2]=+g[B>>2]-+g[C>>2];g[ja>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[ka>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[la>>2]=+g[ja>>2]-+g[ka>>2];g[U>>2]=+g[B>>2]+ +g[C>>2];g[P>>2]=+g[ja>>2]+ +g[ka>>2];g[ma>>2]=+g[ia>>2]+ +g[la>>2];g[J>>2]=+g[la>>2]-+g[ia>>2];g[A>>2]=+g[w>>2]+ +g[z>>2];g[Y>>2]=+g[U>>2]+ +g[X>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[A>>2]-+g[Y>>2];g[c[o>>2]>>2]=+g[A>>2]+ +g[Y>>2];g[S>>2]=+g[aa>>2]+ +g[da>>2];g[T>>2]=+g[P>>2]+ +g[Q>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[S>>2]-+g[T>>2];g[c[p>>2]>>2]=+g[S>>2]+ +g[T>>2];g[Z>>2]=+g[X>>2]-+g[U>>2];g[ea>>2]=+g[aa>>2]-+g[da>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Z>>2]+ +g[ea>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[ea>>2]-+g[Z>>2];g[O>>2]=+g[w>>2]-+g[z>>2];g[R>>2]=+g[P>>2]-+g[Q>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[O>>2]-+g[R>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[O>>2]+ +g[R>>2];g[ha>>2]=+g[fa>>2]+ +g[ga>>2];g[D>>2]=(+g[ma>>2]+ +g[ra>>2])*.7071067690849304;g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[ha>>2]-+g[D>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[ha>>2]+ +g[D>>2];g[M>>2]=+g[F>>2]-+g[E>>2];g[N>>2]=(+g[J>>2]+ +g[K>>2])*.7071067690849304;g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[M>>2]-+g[N>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[M>>2]+ +g[N>>2];g[G>>2]=+g[E>>2]+ +g[F>>2];g[H>>2]=(+g[ra>>2]-+g[ma>>2])*.7071067690849304;g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[G>>2]-+g[H>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[G>>2]+ +g[H>>2];g[I>>2]=+g[fa>>2]-+g[ga>>2];g[L>>2]=(+g[J>>2]-+g[K>>2])*.7071067690849304;g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[I>>2]-+g[L>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[I>>2]+ +g[L>>2];c[sa>>2]=(c[sa>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=c[q>>2]^c[2998];c[r>>2]=c[r>>2]^c[2998]}i=ta;return}function Ni(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;hh(c[d>>2]|0,19,1160);i=b;return}function Oi(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0;Va=i;i=i+400|0;m=Va+388|0;n=Va+384|0;o=Va+380|0;p=Va+376|0;q=Va+372|0;r=Va+368|0;Wa=Va+364|0;s=Va+360|0;t=Va+356|0;Ua=Va+320|0;R=Va+316|0;ia=Va+312|0;Ba=Va+308|0;Ea=Va+304|0;L=Va+300|0;la=Va+296|0;ua=Va+292|0;H=Va+288|0;Ka=Va+284|0;qa=Va+280|0;Pa=Va+276|0;ra=Va+272|0;za=Va+268|0;I=Va+264|0;X=Va+260|0;v=Va+256|0;aa=Va+252|0;ta=Va+248|0;u=Va+244|0;O=Va+240|0;P=Va+236|0;Q=Va+232|0;ja=Va+228|0;Ca=Va+224|0;Da=Va+220|0;ka=Va+216|0;S=Va+212|0;Ma=Va+208|0;V=Va+204|0;La=Va+200|0;Ja=Va+196|0;Na=Va+192|0;Ga=Va+188|0;Oa=Va+184|0;T=Va+180|0;U=Va+176|0;Ha=Va+172|0;Ia=Va+168|0;va=Va+164|0;Z=Va+160|0;ya=Va+156|0;Y=Va+152|0;W=Va+148|0;_=Va+144|0;Ra=Va+140|0;$=Va+136|0;wa=Va+132|0;xa=Va+128|0;Sa=Va+124|0;Ta=Va+120|0;J=Va+116|0;Aa=Va+112|0;G=Va+108|0;K=Va+104|0;M=Va+100|0;N=Va+96|0;Fa=Va+92|0;ma=Va+88|0;ca=Va+84|0;ha=Va+80|0;ga=Va+76|0;na=Va+72|0;da=Va+68|0;oa=Va+64|0;Qa=Va+60|0;ba=Va+56|0;ea=Va+52|0;fa=Va+48|0;pa=Va+44|0;z=Va+40|0;x=Va+36|0;y=Va+32|0;C=Va+28|0;F=Va+24|0;D=Va+20|0;E=Va+16|0;sa=Va+12|0;w=Va+8|0;A=Va+4|0;B=Va;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Wa>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Va+352>>2]=.9396926164627075;g[Va+348>>2]=.3420201539993286;g[Va+344>>2]=.9848077297210693;g[Va+340>>2]=.1736481785774231;g[Va+336>>2]=.6427876353263855;g[Va+332>>2]=.7660444378852844;g[Va+328>>2]=.5;g[Va+324>>2]=.8660253882408142;c[Ua>>2]=c[Wa>>2];while(1){if((c[Ua>>2]|0)<=0)break;g[u>>2]=+g[c[m>>2]>>2];g[O>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[P>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[Q>>2]=+g[O>>2]+ +g[P>>2];g[R>>2]=+g[u>>2]+ +g[Q>>2];g[ia>>2]=(+g[P>>2]-+g[O>>2])*.8660253882408142;g[Ba>>2]=+g[u>>2]-+g[Q>>2]*.5;g[ja>>2]=+g[c[n>>2]>>2];g[Ca>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*3<<2)>>2];g[Da>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*6<<2)>>2];g[ka>>2]=+g[Ca>>2]+ +g[Da>>2];g[Ea>>2]=(+g[Ca>>2]-+g[Da>>2])*.8660253882408142;g[L>>2]=+g[ja>>2]+ +g[ka>>2];g[la>>2]=+g[ja>>2]-+g[ka>>2]*.5;g[S>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2)>>2];g[Ma>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2)>>2];g[T>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<2<<2)>>2];g[U>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[V>>2]=+g[T>>2]+ +g[U>>2];g[La>>2]=(+g[U>>2]-+g[T>>2])*.8660253882408142;g[Ha>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<2<<2)>>2];g[Ia>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*7<<2)>>2];g[Ja>>2]=(+g[Ha>>2]-+g[Ia>>2])*.8660253882408142;g[Na>>2]=+g[Ha>>2]+ +g[Ia>>2];g[ua>>2]=+g[S>>2]+ +g[V>>2];g[H>>2]=+g[Ma>>2]+ +g[Na>>2];g[Ga>>2]=+g[S>>2]-+g[V>>2]*.5;g[Ka>>2]=+g[Ga>>2]+ +g[Ja>>2];g[qa>>2]=+g[Ga>>2]-+g[Ja>>2];g[Oa>>2]=+g[Ma>>2]-+g[Na>>2]*.5;g[Pa>>2]=+g[La>>2]+ +g[Oa>>2];g[ra>>2]=+g[Oa>>2]-+g[La>>2];g[va>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<1<<2)>>2];g[Z>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<1<<2)>>2];g[wa>>2]=+g[(c[m>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[xa>>2]=+g[(c[m>>2]|0)+(c[q>>2]<<3<<2)>>2];g[ya>>2]=+g[wa>>2]+ +g[xa>>2];g[Y>>2]=(+g[xa>>2]-+g[wa>>2])*.8660253882408142;g[Sa>>2]=+g[(c[n>>2]|0)+((c[q>>2]|0)*5<<2)>>2];g[Ta>>2]=+g[(c[n>>2]|0)+(c[q>>2]<<3<<2)>>2];g[W>>2]=(+g[Sa>>2]-+g[Ta>>2])*.8660253882408142;g[_>>2]=+g[Sa>>2]+ +g[Ta>>2];g[za>>2]=+g[va>>2]+ +g[ya>>2];g[I>>2]=+g[Z>>2]+ +g[_>>2];g[Ra>>2]=+g[va>>2]-+g[ya>>2]*.5;g[X>>2]=+g[Ra>>2]+ +g[W>>2];g[v>>2]=+g[Ra>>2]-+g[W>>2];g[$>>2]=+g[Z>>2]-+g[_>>2]*.5;g[aa>>2]=+g[Y>>2]+ +g[$>>2];g[ta>>2]=+g[$>>2]-+g[Y>>2];g[J>>2]=(+g[H>>2]-+g[I>>2])*.8660253882408142;g[Aa>>2]=+g[ua>>2]+ +g[za>>2];g[G>>2]=+g[R>>2]-+g[Aa>>2]*.5;g[c[o>>2]>>2]=+g[R>>2]+ +g[Aa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[G>>2]+ +g[J>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[G>>2]-+g[J>>2];g[K>>2]=(+g[za>>2]-+g[ua>>2])*.8660253882408142;g[M>>2]=+g[H>>2]+ +g[I>>2];g[N>>2]=+g[L>>2]-+g[M>>2]*.5;g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[K>>2]+ +g[N>>2];g[c[p>>2]>>2]=+g[L>>2]+ +g[M>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[N>>2]-+g[K>>2];g[Fa>>2]=+g[Ba>>2]+ +g[Ea>>2];g[ma>>2]=+g[ia>>2]+ +g[la>>2];g[Qa>>2]=+g[Ka>>2]*.7660444378852844+ +g[Pa>>2]*.6427876353263855;g[ba>>2]=+g[X>>2]*.1736481785774231+ +g[aa>>2]*.9848077297210693;g[ca>>2]=+g[Qa>>2]+ +g[ba>>2];g[ha>>2]=(+g[ba>>2]-+g[Qa>>2])*.8660253882408142;g[ea>>2]=+g[Pa>>2]*.7660444378852844-+g[Ka>>2]*.6427876353263855;g[fa>>2]=+g[aa>>2]*.1736481785774231-+g[X>>2]*.9848077297210693;g[ga>>2]=(+g[ea>>2]-+g[fa>>2])*.8660253882408142;g[na>>2]=+g[ea>>2]+ +g[fa>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Fa>>2]+ +g[ca>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[ma>>2]+ +g[na>>2];g[da>>2]=+g[Fa>>2]-+g[ca>>2]*.5;g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[da>>2]-+g[ga>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[da>>2]+ +g[ga>>2];g[oa>>2]=+g[ma>>2]-+g[na>>2]*.5;g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[ha>>2]+ +g[oa>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[oa>>2]-+g[ha>>2];g[pa>>2]=+g[Ba>>2]-+g[Ea>>2];g[z>>2]=+g[la>>2]-+g[ia>>2];g[sa>>2]=+g[qa>>2]*.1736481785774231+ +g[ra>>2]*.9848077297210693;g[w>>2]=+g[ta>>2]*.3420201539993286-+g[v>>2]*.9396926164627075;g[x>>2]=+g[sa>>2]+ +g[w>>2];g[y>>2]=(+g[w>>2]-+g[sa>>2])*.8660253882408142;g[A>>2]=+g[ra>>2]*.1736481785774231-+g[qa>>2]*.9848077297210693;g[B>>2]=+g[v>>2]*.3420201539993286+ +g[ta>>2]*.9396926164627075;g[C>>2]=+g[A>>2]-+g[B>>2];g[F>>2]=(+g[A>>2]+ +g[B>>2])*.8660253882408142;g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[pa>>2]+ +g[x>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[z>>2]+ +g[C>>2];g[D>>2]=+g[z>>2]-+g[C>>2]*.5;g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[y>>2]+ +g[D>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[D>>2]-+g[y>>2];g[E>>2]=+g[pa>>2]-+g[x>>2]*.5;g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[E>>2]-+g[F>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[E>>2]+ +g[F>>2];c[Ua>>2]=(c[Ua>>2]|0)-1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[t>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=c[q>>2]^c[2998];c[r>>2]=c[r>>2]^c[2998]}i=Va;return}function Pi(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;fh(c[d>>2]|0,1,1224);i=b;return}function Qi(a,b,d,e,f,h,j,k){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;var l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0;J=i;i=i+112|0;l=J+96|0;m=J+92|0;n=J+88|0;o=J+84|0;p=J+80|0;K=J+76|0;q=J+72|0;r=J+68|0;I=J+64|0;s=J+60|0;t=J+56|0;v=J+52|0;x=J+48|0;y=J+44|0;z=J+40|0;A=J+36|0;B=J+32|0;D=J+28|0;F=J+24|0;G=J+20|0;H=J+16|0;C=J+12|0;E=J+8|0;u=J+4|0;w=J;c[l>>2]=a;c[m>>2]=b;c[n>>2]=d;c[o>>2]=e;c[p>>2]=f;c[K>>2]=h;c[q>>2]=j;c[r>>2]=k;c[I>>2]=c[K>>2];c[n>>2]=(c[n>>2]|0)+(c[K>>2]<<1<<2);while(1){if((c[I>>2]|0)>=(c[q>>2]|0))break;g[s>>2]=+g[c[l>>2]>>2];g[t>>2]=+g[(c[l>>2]|0)+(c[o>>2]<<2)>>2];g[v>>2]=+g[s>>2]-+g[t>>2];g[x>>2]=+g[c[m>>2]>>2];g[y>>2]=+g[(c[m>>2]|0)+(c[o>>2]<<2)>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[A>>2]=+g[(c[l>>2]|0)+(c[p>>2]<<2)>>2];g[B>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2];g[D>>2]=+g[A>>2]-+g[B>>2];g[F>>2]=+g[(c[m>>2]|0)+(c[p>>2]<<2)>>2];g[G>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[c[l>>2]>>2]=+g[s>>2]+ +g[t>>2];g[c[m>>2]>>2]=+g[x>>2]+ +g[y>>2];g[(c[l>>2]|0)+(c[o>>2]<<2)>>2]=+g[A>>2]+ +g[B>>2];g[(c[m>>2]|0)+(c[o>>2]<<2)>>2]=+g[F>>2]+ +g[G>>2];g[C>>2]=+g[c[n>>2]>>2];g[E>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2]=+g[C>>2]*+g[D>>2]+ +g[E>>2]*+g[H>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2]=+g[C>>2]*+g[H>>2]-+g[E>>2]*+g[D>>2];g[u>>2]=+g[c[n>>2]>>2];g[w>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+(c[p>>2]<<2)>>2]=+g[u>>2]*+g[v>>2]+ +g[w>>2]*+g[z>>2];g[(c[m>>2]|0)+(c[p>>2]<<2)>>2]=+g[u>>2]*+g[z>>2]-+g[w>>2]*+g[v>>2];c[I>>2]=(c[I>>2]|0)+1;c[l>>2]=(c[l>>2]|0)+(c[r>>2]<<2);c[m>>2]=(c[m>>2]|0)+(c[r>>2]<<2);c[n>>2]=(c[n>>2]|0)+8}i=J;return}function Ri(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;fh(c[d>>2]|0,2,1288);i=b;return}function Si(a,b,d,e,f,h,j,k){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;var l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0;za=i;i=i+288|0;l=za+280|0;m=za+276|0;n=za+272|0;o=za+268|0;p=za+264|0;Aa=za+260|0;q=za+256|0;r=za+252|0;ya=za+240|0;s=za+236|0;v=za+232|0;x=za+228|0;aa=za+224|0;ba=za+220|0;ca=za+216|0;A=za+212|0;da=za+208|0;ja=za+204|0;ma=za+200|0;oa=za+196|0;ua=za+192|0;va=za+188|0;wa=za+184|0;ra=za+180|0;xa=za+176|0;S=za+172|0;T=za+168|0;O=za+164|0;U=za+160|0;G=za+156|0;J=za+152|0;L=za+148|0;R=za+144|0;t=za+140|0;u=za+136|0;pa=za+132|0;qa=za+128|0;y=za+124|0;z=za+120|0;ka=za+116|0;la=za+112|0;M=za+108|0;N=za+104|0;H=za+100|0;I=za+96|0;_=za+92|0;ea=za+88|0;w=za+84|0;$=za+80|0;X=za+76|0;Z=za+72|0;W=za+68|0;Y=za+64|0;D=za+60|0;F=za+56|0;C=za+52|0;E=za+48|0;sa=za+44|0;B=za+40|0;na=za+36|0;ta=za+32|0;P=za+28|0;V=za+24|0;K=za+20|0;Q=za+16|0;ga=za+12|0;ia=za+8|0;fa=za+4|0;ha=za;c[l>>2]=a;c[m>>2]=b;c[n>>2]=d;c[o>>2]=e;c[p>>2]=f;c[Aa>>2]=h;c[q>>2]=j;c[r>>2]=k;g[za+248>>2]=.8660253882408142;g[za+244>>2]=.5;c[ya>>2]=c[Aa>>2];c[n>>2]=(c[n>>2]|0)+(c[Aa>>2]<<2<<2);while(1){if((c[ya>>2]|0)>=(c[q>>2]|0))break;g[s>>2]=+g[c[l>>2]>>2];g[t>>2]=+g[(c[l>>2]|0)+(c[o>>2]<<2)>>2];g[u>>2]=+g[(c[l>>2]|0)+(c[o>>2]<<1<<2)>>2];g[v>>2]=+g[t>>2]+ +g[u>>2];g[x>>2]=+g[s>>2]-+g[v>>2]*.5;g[aa>>2]=(+g[u>>2]-+g[t>>2])*.8660253882408142;g[ba>>2]=+g[c[m>>2]>>2];g[y>>2]=+g[(c[m>>2]|0)+(c[o>>2]<<2)>>2];g[z>>2]=+g[(c[m>>2]|0)+(c[o>>2]<<1<<2)>>2];g[ca>>2]=+g[y>>2]+ +g[z>>2];g[A>>2]=(+g[y>>2]-+g[z>>2])*.8660253882408142;g[da>>2]=+g[ba>>2]-+g[ca>>2]*.5;g[ja>>2]=+g[(c[l>>2]|0)+(c[p>>2]<<2)>>2];g[ka>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2];g[la>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2];g[ma>>2]=+g[ka>>2]+ +g[la>>2];g[oa>>2]=+g[ja>>2]-+g[ma>>2]*.5;g[ua>>2]=(+g[la>>2]-+g[ka>>2])*.8660253882408142;g[va>>2]=+g[(c[m>>2]|0)+(c[p>>2]<<2)>>2];g[pa>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2];g[qa>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2];g[wa>>2]=+g[pa>>2]+ +g[qa>>2];g[ra>>2]=(+g[pa>>2]-+g[qa>>2])*.8660253882408142;g[xa>>2]=+g[va>>2]-+g[wa>>2]*.5;g[S>>2]=+g[(c[m>>2]|0)+(c[p>>2]<<1<<2)>>2];g[M>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2];g[N>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2];g[T>>2]=+g[M>>2]+ +g[N>>2];g[O>>2]=(+g[M>>2]-+g[N>>2])*.8660253882408142;g[U>>2]=+g[S>>2]-+g[T>>2]*.5;g[G>>2]=+g[(c[l>>2]|0)+(c[p>>2]<<1<<2)>>2];g[H>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2];g[I>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2];g[J>>2]=+g[H>>2]+ +g[I>>2];g[L>>2]=+g[G>>2]-+g[J>>2]*.5;g[R>>2]=(+g[I>>2]-+g[H>>2])*.8660253882408142;g[c[l>>2]>>2]=+g[s>>2]+ +g[v>>2];g[c[m>>2]>>2]=+g[ba>>2]+ +g[ca>>2];g[(c[l>>2]|0)+(c[o>>2]<<2)>>2]=+g[ja>>2]+ +g[ma>>2];g[(c[m>>2]|0)+(c[o>>2]<<2)>>2]=+g[va>>2]+ +g[wa>>2];g[(c[m>>2]|0)+(c[o>>2]<<1<<2)>>2]=+g[S>>2]+ +g[T>>2];g[(c[l>>2]|0)+(c[o>>2]<<1<<2)>>2]=+g[G>>2]+ +g[J>>2];g[_>>2]=+g[x>>2]+ +g[A>>2];g[ea>>2]=+g[aa>>2]+ +g[da>>2];g[w>>2]=+g[c[n>>2]>>2];g[$>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+(c[p>>2]<<2)>>2]=+g[w>>2]*+g[_>>2]+ +g[$>>2]*+g[ea>>2];g[(c[m>>2]|0)+(c[p>>2]<<2)>>2]=+g[w>>2]*+g[ea>>2]-+g[$>>2]*+g[_>>2];g[X>>2]=+g[L>>2]-+g[O>>2];g[Z>>2]=+g[U>>2]-+g[R>>2];g[W>>2]=+g[(c[n>>2]|0)+8>>2];g[Y>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2]=+g[W>>2]*+g[X>>2]+ +g[Y>>2]*+g[Z>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2]=+g[W>>2]*+g[Z>>2]-+g[Y>>2]*+g[X>>2];g[D>>2]=+g[oa>>2]-+g[ra>>2];g[F>>2]=+g[xa>>2]-+g[ua>>2];g[C>>2]=+g[(c[n>>2]|0)+8>>2];g[E>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2]=+g[C>>2]*+g[D>>2]+ +g[E>>2]*+g[F>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2]=+g[C>>2]*+g[F>>2]-+g[E>>2]*+g[D>>2];g[sa>>2]=+g[oa>>2]+ +g[ra>>2];g[B>>2]=+g[ua>>2]+ +g[xa>>2];g[na>>2]=+g[c[n>>2]>>2];g[ta>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2]=+g[na>>2]*+g[sa>>2]+ +g[ta>>2]*+g[B>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2]=+g[na>>2]*+g[B>>2]-+g[ta>>2]*+g[sa>>2];g[P>>2]=+g[L>>2]+ +g[O>>2];g[V>>2]=+g[R>>2]+ +g[U>>2];g[K>>2]=+g[c[n>>2]>>2];g[Q>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2]=+g[K>>2]*+g[P>>2]+ +g[Q>>2]*+g[V>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2]=+g[K>>2]*+g[V>>2]-+g[Q>>2]*+g[P>>2];g[ga>>2]=+g[x>>2]-+g[A>>2];g[ia>>2]=+g[da>>2]-+g[aa>>2];g[fa>>2]=+g[(c[n>>2]|0)+8>>2];g[ha>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+(c[p>>2]<<1<<2)>>2]=+g[fa>>2]*+g[ga>>2]+ +g[ha>>2]*+g[ia>>2];g[(c[m>>2]|0)+(c[p>>2]<<1<<2)>>2]=+g[fa>>2]*+g[ia>>2]-+g[ha>>2]*+g[ga>>2];c[ya>>2]=(c[ya>>2]|0)+1;c[l>>2]=(c[l>>2]|0)+(c[r>>2]<<2);c[m>>2]=(c[m>>2]|0)+(c[r>>2]<<2);c[n>>2]=(c[n>>2]|0)+16}i=za;return}function Ti(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;fh(c[d>>2]|0,3,1352);i=b;return}function Ui(a,b,d,e,f,h,j,k){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;var l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0;xb=i;i=i+496|0;l=xb+480|0;m=xb+476|0;n=xb+472|0;o=xb+468|0;p=xb+464|0;yb=xb+460|0;q=xb+456|0;r=xb+452|0;wb=xb+448|0;ra=xb+444|0;ab=xb+440|0;Za=xb+436|0;mb=xb+432|0;ua=xb+428|0;wa=xb+424|0;db=xb+420|0;nb=xb+416|0;rb=xb+412|0;Ea=xb+408|0;Ba=xb+404|0;Qa=xb+400|0;ub=xb+396|0;ya=xb+392|0;Ha=xb+388|0;Ra=xb+384|0;Va=xb+380|0;B=xb+376|0;y=xb+372|0;aa=xb+368|0;t=xb+364|0;v=xb+360|0;T=xb+356|0;ba=xb+352|0;fa=xb+348|0;D=xb+344|0;na=xb+340|0;P=xb+336|0;ia=xb+332|0;ka=xb+328|0;G=xb+324|0;Q=xb+320|0;s=xb+316|0;qa=xb+312|0;xa=xb+308|0;Ya=xb+304|0;sa=xb+300|0;ta=xb+296|0;bb=xb+292|0;cb=xb+288|0;pb=xb+284|0;qb=xb+280|0;za=xb+276|0;Aa=xb+272|0;sb=xb+268|0;tb=xb+264|0;Fa=xb+260|0;Ga=xb+256|0;Ta=xb+252|0;Ua=xb+248|0;w=xb+244|0;x=xb+240|0;Wa=xb+236|0;Xa=xb+232|0;C=xb+228|0;S=xb+224|0;da=xb+220|0;ea=xb+216|0;la=xb+212|0;ma=xb+208|0;ga=xb+204|0;ha=xb+200|0;E=xb+196|0;F=xb+192|0;_a=xb+188|0;eb=xb+184|0;va=xb+180|0;$a=xb+176|0;N=xb+172|0;R=xb+168|0;M=xb+164|0;O=xb+160|0;gb=xb+156|0;ib=xb+152|0;fb=xb+148|0;hb=xb+144|0;kb=xb+140|0;ob=xb+136|0;jb=xb+132|0;lb=xb+128|0;z=xb+124|0;U=xb+120|0;u=xb+116|0;A=xb+112|0;J=xb+108|0;L=xb+104|0;I=xb+100|0;K=xb+96|0;Oa=xb+92|0;Sa=xb+88|0;Na=xb+84|0;Pa=xb+80|0;W=xb+76|0;Y=xb+72|0;V=xb+68|0;X=xb+64|0;_=xb+60|0;ca=xb+56|0;Z=xb+52|0;$=xb+48|0;oa=xb+44|0;H=xb+40|0;ja=xb+36|0;pa=xb+32|0;Ka=xb+28|0;Ma=xb+24|0;Ja=xb+20|0;La=xb+16|0;Ca=xb+12|0;Ia=xb+8|0;vb=xb+4|0;Da=xb;c[l>>2]=a;c[m>>2]=b;c[n>>2]=d;c[o>>2]=e;c[p>>2]=f;c[yb>>2]=h;c[q>>2]=j;c[r>>2]=k;c[wb>>2]=c[yb>>2];c[n>>2]=(c[n>>2]|0)+((c[yb>>2]|0)*6<<2);while(1){if((c[wb>>2]|0)>=(c[q>>2]|0))break;g[s>>2]=+g[c[l>>2]>>2];g[qa>>2]=+g[(c[l>>2]|0)+(c[o>>2]<<1<<2)>>2];g[ra>>2]=+g[s>>2]+ +g[qa>>2];g[ab>>2]=+g[s>>2]-+g[qa>>2];g[xa>>2]=+g[c[m>>2]>>2];g[Ya>>2]=+g[(c[m>>2]|0)+(c[o>>2]<<1<<2)>>2];g[Za>>2]=+g[xa>>2]-+g[Ya>>2];g[mb>>2]=+g[xa>>2]+ +g[Ya>>2];g[sa>>2]=+g[(c[l>>2]|0)+(c[o>>2]<<2)>>2];g[ta>>2]=+g[(c[l>>2]|0)+((c[o>>2]|0)*3<<2)>>2];g[ua>>2]=+g[sa>>2]+ +g[ta>>2];g[wa>>2]=+g[sa>>2]-+g[ta>>2];g[bb>>2]=+g[(c[m>>2]|0)+(c[o>>2]<<2)>>2];g[cb>>2]=+g[(c[m>>2]|0)+((c[o>>2]|0)*3<<2)>>2];g[db>>2]=+g[bb>>2]-+g[cb>>2];g[nb>>2]=+g[bb>>2]+ +g[cb>>2];g[pb>>2]=+g[(c[l>>2]|0)+(c[p>>2]<<2)>>2];g[qb>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2];g[rb>>2]=+g[pb>>2]+ +g[qb>>2];g[Ea>>2]=+g[pb>>2]-+g[qb>>2];g[za>>2]=+g[(c[m>>2]|0)+(c[p>>2]<<2)>>2];g[Aa>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2];g[Ba>>2]=+g[za>>2]-+g[Aa>>2];g[Qa>>2]=+g[za>>2]+ +g[Aa>>2];g[sb>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2];g[tb>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[ub>>2]=+g[sb>>2]+ +g[tb>>2];g[ya>>2]=+g[sb>>2]-+g[tb>>2];g[Fa>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2];g[Ga>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[Ha>>2]=+g[Fa>>2]-+g[Ga>>2];g[Ra>>2]=+g[Fa>>2]+ +g[Ga>>2];g[Ta>>2]=+g[(c[l>>2]|0)+(c[p>>2]<<1<<2)>>2];g[Ua>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2];g[Va>>2]=+g[Ta>>2]+ +g[Ua>>2];g[B>>2]=+g[Ta>>2]-+g[Ua>>2];g[w>>2]=+g[(c[m>>2]|0)+(c[p>>2]<<1<<2)>>2];g[x>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[aa>>2]=+g[w>>2]+ +g[x>>2];g[Wa>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2];g[Xa>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*3|0)<<2)>>2];g[t>>2]=+g[Wa>>2]+ +g[Xa>>2];g[v>>2]=+g[Wa>>2]-+g[Xa>>2];g[C>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2];g[S>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*3|0)<<2)>>2];g[T>>2]=+g[C>>2]-+g[S>>2];g[ba>>2]=+g[C>>2]+ +g[S>>2];g[da>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)*3<<2)>>2];g[ea>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<1)<<2)>>2];g[fa>>2]=+g[da>>2]+ +g[ea>>2];g[D>>2]=+g[da>>2]-+g[ea>>2];g[la>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)*3<<2)>>2];g[ma>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<1)<<2)>>2];g[na>>2]=+g[la>>2]-+g[ma>>2];g[P>>2]=+g[la>>2]+ +g[ma>>2];g[ga>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]|0)<<2)>>2];g[ha>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[ia>>2]=+g[ga>>2]+ +g[ha>>2];g[ka>>2]=+g[ga>>2]-+g[ha>>2];g[E>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]|0)<<2)>>2];g[F>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[G>>2]=+g[E>>2]-+g[F>>2];g[Q>>2]=+g[E>>2]+ +g[F>>2];g[c[l>>2]>>2]=+g[ra>>2]+ +g[ua>>2];g[c[m>>2]>>2]=+g[mb>>2]+ +g[nb>>2];g[(c[l>>2]|0)+(c[o>>2]<<2)>>2]=+g[rb>>2]+ +g[ub>>2];g[(c[m>>2]|0)+(c[o>>2]<<2)>>2]=+g[Qa>>2]+ +g[Ra>>2];g[(c[l>>2]|0)+(c[o>>2]<<1<<2)>>2]=+g[Va>>2]+ +g[t>>2];g[(c[m>>2]|0)+(c[o>>2]<<1<<2)>>2]=+g[aa>>2]+ +g[ba>>2];g[(c[m>>2]|0)+((c[o>>2]|0)*3<<2)>>2]=+g[P>>2]+ +g[Q>>2];g[(c[l>>2]|0)+((c[o>>2]|0)*3<<2)>>2]=+g[fa>>2]+ +g[ia>>2];g[_a>>2]=+g[wa>>2]+ +g[Za>>2];g[eb>>2]=+g[ab>>2]-+g[db>>2];g[va>>2]=+g[(c[n>>2]|0)+16>>2];g[$a>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[m>>2]|0)+((c[p>>2]|0)*3<<2)>>2]=+g[va>>2]*+g[_a>>2]-+g[$a>>2]*+g[eb>>2];g[(c[l>>2]|0)+((c[p>>2]|0)*3<<2)>>2]=+g[$a>>2]*+g[_a>>2]+ +g[va>>2]*+g[eb>>2];g[N>>2]=+g[fa>>2]-+g[ia>>2];g[R>>2]=+g[P>>2]-+g[Q>>2];g[M>>2]=+g[(c[n>>2]|0)+8>>2];g[O>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[M>>2]*+g[N>>2]+ +g[O>>2]*+g[R>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[M>>2]*+g[R>>2]-+g[O>>2]*+g[N>>2];g[gb>>2]=+g[Za>>2]-+g[wa>>2];g[ib>>2]=+g[ab>>2]+ +g[db>>2];g[fb>>2]=+g[c[n>>2]>>2];g[hb>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[m>>2]|0)+(c[p>>2]<<2)>>2]=+g[fb>>2]*+g[gb>>2]-+g[hb>>2]*+g[ib>>2];g[(c[l>>2]|0)+(c[p>>2]<<2)>>2]=+g[hb>>2]*+g[gb>>2]+ +g[fb>>2]*+g[ib>>2];g[kb>>2]=+g[ra>>2]-+g[ua>>2];g[ob>>2]=+g[mb>>2]-+g[nb>>2];g[jb>>2]=+g[(c[n>>2]|0)+8>>2];g[lb>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+(c[p>>2]<<1<<2)>>2]=+g[jb>>2]*+g[kb>>2]+ +g[lb>>2]*+g[ob>>2];g[(c[m>>2]|0)+(c[p>>2]<<1<<2)>>2]=+g[jb>>2]*+g[ob>>2]-+g[lb>>2]*+g[kb>>2];g[z>>2]=+g[v>>2]+ +g[y>>2];g[U>>2]=+g[B>>2]-+g[T>>2];g[u>>2]=+g[(c[n>>2]|0)+16>>2];g[A>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<1)<<2)>>2]=+g[u>>2]*+g[z>>2]-+g[A>>2]*+g[U>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<1)<<2)>>2]=+g[A>>2]*+g[z>>2]+ +g[u>>2]*+g[U>>2];g[J>>2]=+g[na>>2]-+g[ka>>2];g[L>>2]=+g[D>>2]+ +g[G>>2];g[I>>2]=+g[c[n>>2]>>2];g[K>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[I>>2]*+g[J>>2]-+g[K>>2]*+g[L>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[K>>2]*+g[J>>2]+ +g[I>>2]*+g[L>>2];g[Oa>>2]=+g[rb>>2]-+g[ub>>2];g[Sa>>2]=+g[Qa>>2]-+g[Ra>>2];g[Na>>2]=+g[(c[n>>2]|0)+8>>2];g[Pa>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2]=+g[Na>>2]*+g[Oa>>2]+ +g[Pa>>2]*+g[Sa>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2]=+g[Na>>2]*+g[Sa>>2]-+g[Pa>>2]*+g[Oa>>2];g[W>>2]=+g[y>>2]-+g[v>>2];g[Y>>2]=+g[B>>2]+ +g[T>>2];g[V>>2]=+g[c[n>>2]>>2];g[X>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2]=+g[V>>2]*+g[W>>2]-+g[X>>2]*+g[Y>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2]=+g[X>>2]*+g[W>>2]+ +g[V>>2]*+g[Y>>2];g[_>>2]=+g[Va>>2]-+g[t>>2];g[ca>>2]=+g[aa>>2]-+g[ba>>2];g[Z>>2]=+g[(c[n>>2]|0)+8>>2];g[$>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2]=+g[Z>>2]*+g[_>>2]+ +g[$>>2]*+g[ca>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2]=+g[Z>>2]*+g[ca>>2]-+g[$>>2]*+g[_>>2];g[oa>>2]=+g[ka>>2]+ +g[na>>2];g[H>>2]=+g[D>>2]-+g[G>>2];g[ja>>2]=+g[(c[n>>2]|0)+16>>2];g[pa>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[ja>>2]*+g[oa>>2]-+g[pa>>2]*+g[H>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[pa>>2]*+g[oa>>2]+ +g[ja>>2]*+g[H>>2];g[Ka>>2]=+g[Ba>>2]-+g[ya>>2];g[Ma>>2]=+g[Ea>>2]+ +g[Ha>>2];g[Ja>>2]=+g[c[n>>2]>>2];g[La>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2]=+g[Ja>>2]*+g[Ka>>2]-+g[La>>2]*+g[Ma>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2]=+g[La>>2]*+g[Ka>>2]+ +g[Ja>>2]*+g[Ma>>2];g[Ca>>2]=+g[ya>>2]+ +g[Ba>>2];g[Ia>>2]=+g[Ea>>2]-+g[Ha>>2];g[vb>>2]=+g[(c[n>>2]|0)+16>>2];g[Da>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]|0)<<2)>>2]=+g[vb>>2]*+g[Ca>>2]-+g[Da>>2]*+g[Ia>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]|0)<<2)>>2]=+g[Da>>2]*+g[Ca>>2]+ +g[vb>>2]*+g[Ia>>2];c[wb>>2]=(c[wb>>2]|0)+1;c[l>>2]=(c[l>>2]|0)+(c[r>>2]<<2);c[m>>2]=(c[m>>2]|0)+(c[r>>2]<<2);c[n>>2]=(c[n>>2]|0)+24}i=xb;return}function Vi(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;fh(c[d>>2]|0,4,1416);i=b;return}function Wi(a,b,d,e,f,h,j,k){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;var l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0;Rd=i;i=i+1024|0;l=Rd+1008|0;m=Rd+1004|0;n=Rd+1e3|0;o=Rd+996|0;p=Rd+992|0;Sd=Rd+988|0;q=Rd+984|0;r=Rd+980|0;Qd=Rd+960|0;s=Rd+956|0;qd=Rd+952|0;Yc=Rd+948|0;Ld=Rd+944|0;Qc=Rd+940|0;rd=Rd+936|0;Fd=Rd+932|0;zd=Rd+928|0;Vc=Rd+924|0;Ed=Rd+920|0;Gd=Rd+916|0;Hd=Rd+912|0;dd=Rd+908|0;md=Rd+904|0;ta=Rd+900|0;ia=Rd+896|0;kd=Rd+892|0;nd=Rd+888|0;ca=Rd+884|0;y=Rd+880|0;qa=Rd+876|0;ba=Rd+872|0;da=Rd+868|0;ea=Rd+864|0;Aa=Rd+860|0;L=Rd+856|0;ob=Rd+852|0;Fa=Rd+848|0;J=Rd+844|0;M=Rd+840|0;_=Rd+836|0;U=Rd+832|0;lb=Rd+828|0;Z=Rd+824|0;$=Rd+820|0;aa=Rd+816|0;Hc=Rd+812|0;Bc=Rd+808|0;cc=Rd+804|0;Gc=Rd+800|0;Ic=Rd+796|0;Jc=Rd+792|0;Sb=Rd+788|0;sc=Rd+784|0;fc=Rd+780|0;Wb=Rd+776|0;qc=Rd+772|0;tc=Rd+768|0;vb=Rd+764|0;Eb=Rd+760|0;Lb=Rd+756|0;$a=Rd+752|0;Cb=Rd+748|0;Fb=Rd+744|0;Va=Rd+740|0;Pa=Rd+736|0;hb=Rd+732|0;Ua=Rd+728|0;Wa=Rd+724|0;Xa=Rd+720|0;Pc=Rd+716|0;Kd=Rd+712|0;Mc=Rd+708|0;Jd=Rd+704|0;Nc=Rd+700|0;Oc=Rd+696|0;Ba=Rd+692|0;Kb=Rd+688|0;yd=Rd+684|0;Dd=Rd+680|0;vd=Rd+676|0;Cd=Rd+672|0;wd=Rd+668|0;xd=Rd+664|0;td=Rd+660|0;ud=Rd+656|0;jd=Rd+652|0;ha=Rd+648|0;gd=Rd+644|0;ga=Rd+640|0;hd=Rd+636|0;id=Rd+632|0;ed=Rd+628|0;fd=Rd+624|0;x=Rd+620|0;C=Rd+616|0;u=Rd+612|0;B=Rd+608|0;v=Rd+604|0;w=Rd+600|0;pd=Rd+596|0;t=Rd+592|0;I=Rd+588|0;Ea=Rd+584|0;F=Rd+580|0;Da=Rd+576|0;G=Rd+572|0;H=Rd+568|0;D=Rd+564|0;E=Rd+560|0;T=Rd+556|0;Y=Rd+552|0;Q=Rd+548|0;X=Rd+544|0;R=Rd+540|0;S=Rd+536|0;O=Rd+532|0;P=Rd+528|0;Ac=Rd+524|0;Fc=Rd+520|0;xc=Rd+516|0;Ec=Rd+512|0;yc=Rd+508|0;zc=Rd+504|0;vc=Rd+500|0;wc=Rd+496|0;pc=Rd+492|0;Vb=Rd+488|0;mc=Rd+484|0;Lc=Rd+480|0;nc=Rd+476|0;oc=Rd+472|0;Tb=Rd+468|0;Ub=Rd+464|0;Bb=Rd+460|0;_a=Rd+456|0;yb=Rd+452|0;Za=Rd+448|0;zb=Rd+444|0;Ab=Rd+440|0;wb=Rd+436|0;xb=Rd+432|0;Oa=Rd+428|0;Ta=Rd+424|0;Jb=Rd+420|0;Sa=Rd+416|0;Ma=Rd+412|0;Na=Rd+408|0;Hb=Rd+404|0;Ib=Rd+400|0;Ad=Rd+396|0;Od=Rd+392|0;Md=Rd+388|0;Sc=Rd+384|0;sd=Rd+380|0;Id=Rd+376|0;Rc=Rd+372|0;Bd=Rd+368|0;Nd=Rd+364|0;Pd=Rd+360|0;Wc=Rd+356|0;ad=Rd+352|0;_c=Rd+348|0;cd=Rd+344|0;Uc=Rd+340|0;Zc=Rd+336|0;Tc=Rd+332|0;Xc=Rd+328|0;$c=Rd+324|0;bd=Rd+320|0;mb=Rd+316|0;sb=Rd+312|0;qb=Rd+308|0;ub=Rd+304|0;kb=Rd+300|0;pb=Rd+296|0;La=Rd+292|0;nb=Rd+288|0;rb=Rd+284|0;tb=Rd+280|0;dc=Rd+276|0;jc=Rd+272|0;hc=Rd+268|0;lc=Rd+264|0;bc=Rd+260|0;gc=Rd+256|0;ac=Rd+252|0;ec=Rd+248|0;ic=Rd+244|0;kc=Rd+240|0;V=Rd+236|0;Ia=Rd+232|0;Ga=Rd+228|0;Ka=Rd+224|0;N=Rd+220|0;Ca=Rd+216|0;K=Rd+212|0;W=Rd+208|0;Ha=Rd+204|0;Ja=Rd+200|0;Qa=Rd+196|0;cb=Rd+192|0;ab=Rd+188|0;eb=Rd+184|0;Gb=Rd+180|0;Ya=Rd+176|0;Db=Rd+172|0;Ra=Rd+168|0;bb=Rd+164|0;db=Rd+160|0;ib=Rd+156|0;Pb=Rd+152|0;Nb=Rd+148|0;Rb=Rd+144|0;gb=Rd+140|0;Mb=Rd+136|0;fb=Rd+132|0;jb=Rd+128|0;Ob=Rd+124|0;Qb=Rd+120|0;Cc=Rd+116|0;Zb=Rd+112|0;Xb=Rd+108|0;$b=Rd+104|0;uc=Rd+100|0;Kc=Rd+96|0;rc=Rd+92|0;Dc=Rd+88|0;Yb=Rd+84|0;_b=Rd+80|0;ra=Rd+76|0;xa=Rd+72|0;va=Rd+68|0;za=Rd+64|0;pa=Rd+60|0;ua=Rd+56|0;oa=Rd+52|0;sa=Rd+48|0;wa=Rd+44|0;ya=Rd+40|0;z=Rd+36|0;la=Rd+32|0;ja=Rd+28|0;na=Rd+24|0;od=Rd+20|0;fa=Rd+16|0;ld=Rd+12|0;A=Rd+8|0;ka=Rd+4|0;ma=Rd;c[l>>2]=a;c[m>>2]=b;c[n>>2]=d;c[o>>2]=e;c[p>>2]=f;c[Sd>>2]=h;c[q>>2]=j;c[r>>2]=k;g[Rd+976>>2]=.25;g[Rd+972>>2]=.5877852439880371;g[Rd+968>>2]=.9510565400123596;g[Rd+964>>2]=.55901700258255;c[Qd>>2]=c[Sd>>2];c[n>>2]=(c[n>>2]|0)+(c[Sd>>2]<<3<<2);while(1){if((c[Qd>>2]|0)>=(c[q>>2]|0))break;g[s>>2]=+g[c[l>>2]>>2];g[Nc>>2]=+g[(c[l>>2]|0)+(c[o>>2]<<1<<2)>>2];g[Oc>>2]=+g[(c[l>>2]|0)+((c[o>>2]|0)*3<<2)>>2];g[Pc>>2]=+g[Nc>>2]+ +g[Oc>>2];g[Kd>>2]=+g[Nc>>2]-+g[Oc>>2];g[Ba>>2]=+g[(c[l>>2]|0)+(c[o>>2]<<2)>>2];g[Kb>>2]=+g[(c[l>>2]|0)+(c[o>>2]<<2<<2)>>2];g[Mc>>2]=+g[Ba>>2]+ +g[Kb>>2];g[Jd>>2]=+g[Ba>>2]-+g[Kb>>2];g[qd>>2]=(+g[Mc>>2]-+g[Pc>>2])*.55901700258255;g[Yc>>2]=+g[Kd>>2]*.9510565400123596-+g[Jd>>2]*.5877852439880371;g[Ld>>2]=+g[Jd>>2]*.9510565400123596+ +g[Kd>>2]*.5877852439880371;g[Qc>>2]=+g[Mc>>2]+ +g[Pc>>2];g[rd>>2]=+g[s>>2]-+g[Qc>>2]*.25;g[Fd>>2]=+g[c[m>>2]>>2];g[wd>>2]=+g[(c[m>>2]|0)+(c[o>>2]<<1<<2)>>2];g[xd>>2]=+g[(c[m>>2]|0)+((c[o>>2]|0)*3<<2)>>2];g[yd>>2]=+g[wd>>2]-+g[xd>>2];g[Dd>>2]=+g[wd>>2]+ +g[xd>>2];g[td>>2]=+g[(c[m>>2]|0)+(c[o>>2]<<2)>>2];g[ud>>2]=+g[(c[m>>2]|0)+(c[o>>2]<<2<<2)>>2];g[vd>>2]=+g[td>>2]-+g[ud>>2];g[Cd>>2]=+g[td>>2]+ +g[ud>>2];g[zd>>2]=+g[vd>>2]*.9510565400123596+ +g[yd>>2]*.5877852439880371;g[Vc>>2]=+g[yd>>2]*.9510565400123596-+g[vd>>2]*.5877852439880371;g[Ed>>2]=(+g[Cd>>2]-+g[Dd>>2])*.55901700258255;g[Gd>>2]=+g[Cd>>2]+ +g[Dd>>2];g[Hd>>2]=+g[Fd>>2]-+g[Gd>>2]*.25;g[dd>>2]=+g[(c[l>>2]|0)+(c[p>>2]<<2)>>2];g[hd>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2];g[id>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[jd>>2]=+g[hd>>2]+ +g[id>>2];g[ha>>2]=+g[hd>>2]-+g[id>>2];g[ed>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2];g[fd>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<2)<<2)>>2];g[gd>>2]=+g[ed>>2]+ +g[fd>>2];g[ga>>2]=+g[ed>>2]-+g[fd>>2];g[md>>2]=(+g[gd>>2]-+g[jd>>2])*.55901700258255;g[ta>>2]=+g[ha>>2]*.9510565400123596-+g[ga>>2]*.5877852439880371;g[ia>>2]=+g[ga>>2]*.9510565400123596+ +g[ha>>2]*.5877852439880371;g[kd>>2]=+g[gd>>2]+ +g[jd>>2];g[nd>>2]=+g[dd>>2]-+g[kd>>2]*.25;g[ca>>2]=+g[(c[m>>2]|0)+(c[p>>2]<<2)>>2];g[v>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2];g[w>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[x>>2]=+g[v>>2]-+g[w>>2];g[C>>2]=+g[v>>2]+ +g[w>>2];g[pd>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2];g[t>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<2)<<2)>>2];g[u>>2]=+g[pd>>2]-+g[t>>2];g[B>>2]=+g[pd>>2]+ +g[t>>2];g[y>>2]=+g[u>>2]*.9510565400123596+ +g[x>>2]*.5877852439880371;g[qa>>2]=+g[x>>2]*.9510565400123596-+g[u>>2]*.5877852439880371;g[ba>>2]=(+g[B>>2]-+g[C>>2])*.55901700258255;g[da>>2]=+g[B>>2]+ +g[C>>2];g[ea>>2]=+g[ca>>2]-+g[da>>2]*.25;g[Aa>>2]=+g[(c[l>>2]|0)+(c[p>>2]<<1<<2)>>2];g[G>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2];g[H>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*3|0)<<2)>>2];g[I>>2]=+g[G>>2]+ +g[H>>2];g[Ea>>2]=+g[G>>2]-+g[H>>2];g[D>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2];g[E>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<2)<<2)>>2];g[F>>2]=+g[D>>2]+ +g[E>>2];g[Da>>2]=+g[D>>2]-+g[E>>2];g[L>>2]=(+g[F>>2]-+g[I>>2])*.55901700258255;g[ob>>2]=+g[Ea>>2]*.9510565400123596-+g[Da>>2]*.5877852439880371;g[Fa>>2]=+g[Da>>2]*.9510565400123596+ +g[Ea>>2]*.5877852439880371;g[J>>2]=+g[F>>2]+ +g[I>>2];g[M>>2]=+g[Aa>>2]-+g[J>>2]*.25;g[_>>2]=+g[(c[m>>2]|0)+(c[p>>2]<<1<<2)>>2];g[R>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2];g[S>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*3|0)<<2)>>2];g[T>>2]=+g[R>>2]-+g[S>>2];g[Y>>2]=+g[R>>2]+ +g[S>>2];g[O>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2];g[P>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<2)<<2)>>2];g[Q>>2]=+g[O>>2]-+g[P>>2];g[X>>2]=+g[O>>2]+ +g[P>>2];g[U>>2]=+g[Q>>2]*.9510565400123596+ +g[T>>2]*.5877852439880371;g[lb>>2]=+g[T>>2]*.9510565400123596-+g[Q>>2]*.5877852439880371;g[Z>>2]=(+g[X>>2]-+g[Y>>2])*.55901700258255;g[$>>2]=+g[X>>2]+ +g[Y>>2];g[aa>>2]=+g[_>>2]-+g[$>>2]*.25;g[Hc>>2]=+g[(c[m>>2]|0)+(c[p>>2]<<2<<2)>>2];g[yc>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<1)<<2)>>2];g[zc>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*3|0)<<2)>>2];g[Ac>>2]=+g[yc>>2]-+g[zc>>2];g[Fc>>2]=+g[yc>>2]+ +g[zc>>2];g[vc>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]|0)<<2)>>2];g[wc>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<2)<<2)>>2];g[xc>>2]=+g[vc>>2]-+g[wc>>2];g[Ec>>2]=+g[vc>>2]+ +g[wc>>2];g[Bc>>2]=+g[xc>>2]*.9510565400123596+ +g[Ac>>2]*.5877852439880371;g[cc>>2]=+g[Ac>>2]*.9510565400123596-+g[xc>>2]*.5877852439880371;g[Gc>>2]=(+g[Ec>>2]-+g[Fc>>2])*.55901700258255;g[Ic>>2]=+g[Ec>>2]+ +g[Fc>>2];g[Jc>>2]=+g[Hc>>2]-+g[Ic>>2]*.25;g[Sb>>2]=+g[(c[l>>2]|0)+(c[p>>2]<<2<<2)>>2];g[nc>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<1)<<2)>>2];g[oc>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*3|0)<<2)>>2];g[pc>>2]=+g[nc>>2]+ +g[oc>>2];g[Vb>>2]=+g[nc>>2]-+g[oc>>2];g[Tb>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]|0)<<2)>>2];g[Ub>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<2)<<2)>>2];g[mc>>2]=+g[Tb>>2]+ +g[Ub>>2];g[Lc>>2]=+g[Tb>>2]-+g[Ub>>2];g[sc>>2]=(+g[mc>>2]-+g[pc>>2])*.55901700258255;g[fc>>2]=+g[Vb>>2]*.9510565400123596-+g[Lc>>2]*.5877852439880371;g[Wb>>2]=+g[Lc>>2]*.9510565400123596+ +g[Vb>>2]*.5877852439880371;g[qc>>2]=+g[mc>>2]+ +g[pc>>2];g[tc>>2]=+g[Sb>>2]-+g[qc>>2]*.25;g[vb>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)*3<<2)>>2];g[zb>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<1)<<2)>>2];g[Ab>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[Bb>>2]=+g[zb>>2]+ +g[Ab>>2];g[_a>>2]=+g[zb>>2]-+g[Ab>>2];g[wb>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]|0)<<2)>>2];g[xb>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<2)<<2)>>2];g[yb>>2]=+g[wb>>2]+ +g[xb>>2];g[Za>>2]=+g[wb>>2]-+g[xb>>2];g[Eb>>2]=(+g[yb>>2]-+g[Bb>>2])*.55901700258255;g[Lb>>2]=+g[_a>>2]*.9510565400123596-+g[Za>>2]*.5877852439880371;g[$a>>2]=+g[Za>>2]*.9510565400123596+ +g[_a>>2]*.5877852439880371;g[Cb>>2]=+g[yb>>2]+ +g[Bb>>2];g[Fb>>2]=+g[vb>>2]-+g[Cb>>2]*.25;g[Va>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)*3<<2)>>2];g[Ma>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<1)<<2)>>2];g[Na>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[Oa>>2]=+g[Ma>>2]-+g[Na>>2];g[Ta>>2]=+g[Ma>>2]+ +g[Na>>2];g[Hb>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]|0)<<2)>>2];g[Ib>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<2)<<2)>>2];g[Jb>>2]=+g[Hb>>2]-+g[Ib>>2];g[Sa>>2]=+g[Hb>>2]+ +g[Ib>>2];g[Pa>>2]=+g[Jb>>2]*.9510565400123596+ +g[Oa>>2]*.5877852439880371;g[hb>>2]=+g[Oa>>2]*.9510565400123596-+g[Jb>>2]*.5877852439880371;g[Ua>>2]=(+g[Sa>>2]-+g[Ta>>2])*.55901700258255;g[Wa>>2]=+g[Sa>>2]+ +g[Ta>>2];g[Xa>>2]=+g[Va>>2]-+g[Wa>>2]*.25;g[c[l>>2]>>2]=+g[s>>2]+ +g[Qc>>2];g[c[m>>2]>>2]=+g[Fd>>2]+ +g[Gd>>2];g[(c[l>>2]|0)+(c[o>>2]<<2)>>2]=+g[dd>>2]+ +g[kd>>2];g[(c[m>>2]|0)+(c[o>>2]<<2)>>2]=+g[ca>>2]+ +g[da>>2];g[(c[l>>2]|0)+(c[o>>2]<<1<<2)>>2]=+g[Aa>>2]+ +g[J>>2];g[(c[m>>2]|0)+(c[o>>2]<<1<<2)>>2]=+g[_>>2]+ +g[$>>2];g[(c[m>>2]|0)+(c[o>>2]<<2<<2)>>2]=+g[Hc>>2]+ +g[Ic>>2];g[(c[l>>2]|0)+(c[o>>2]<<2<<2)>>2]=+g[Sb>>2]+ +g[qc>>2];g[(c[l>>2]|0)+((c[o>>2]|0)*3<<2)>>2]=+g[vb>>2]+ +g[Cb>>2];g[(c[m>>2]|0)+((c[o>>2]|0)*3<<2)>>2]=+g[Va>>2]+ +g[Wa>>2];g[sd>>2]=+g[qd>>2]+ +g[rd>>2];g[Ad>>2]=+g[sd>>2]+ +g[zd>>2];g[Od>>2]=+g[sd>>2]-+g[zd>>2];g[Id>>2]=+g[Ed>>2]+ +g[Hd>>2];g[Md>>2]=+g[Id>>2]-+g[Ld>>2];g[Sc>>2]=+g[Ld>>2]+ +g[Id>>2];g[Rc>>2]=+g[c[n>>2]>>2];g[Bd>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+(c[p>>2]<<2)>>2]=+g[Rc>>2]*+g[Ad>>2]+ +g[Bd>>2]*+g[Md>>2];g[(c[m>>2]|0)+(c[p>>2]<<2)>>2]=+g[Rc>>2]*+g[Md>>2]-+g[Bd>>2]*+g[Ad>>2];g[Nd>>2]=+g[(c[n>>2]|0)+24>>2];g[Pd>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+(c[p>>2]<<2<<2)>>2]=+g[Nd>>2]*+g[Od>>2]+ +g[Pd>>2]*+g[Sc>>2];g[(c[m>>2]|0)+(c[p>>2]<<2<<2)>>2]=+g[Nd>>2]*+g[Sc>>2]-+g[Pd>>2]*+g[Od>>2];g[Uc>>2]=+g[rd>>2]-+g[qd>>2];g[Wc>>2]=+g[Uc>>2]-+g[Vc>>2];g[ad>>2]=+g[Uc>>2]+ +g[Vc>>2];g[Zc>>2]=+g[Hd>>2]-+g[Ed>>2];g[_c>>2]=+g[Yc>>2]+ +g[Zc>>2];g[cd>>2]=+g[Zc>>2]-+g[Yc>>2];g[Tc>>2]=+g[(c[n>>2]|0)+8>>2];g[Xc>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+(c[p>>2]<<1<<2)>>2]=+g[Tc>>2]*+g[Wc>>2]+ +g[Xc>>2]*+g[_c>>2];g[(c[m>>2]|0)+(c[p>>2]<<1<<2)>>2]=+g[Tc>>2]*+g[_c>>2]-+g[Xc>>2]*+g[Wc>>2];g[$c>>2]=+g[(c[n>>2]|0)+16>>2];g[bd>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[l>>2]|0)+((c[p>>2]|0)*3<<2)>>2]=+g[$c>>2]*+g[ad>>2]+ +g[bd>>2]*+g[cd>>2];g[(c[m>>2]|0)+((c[p>>2]|0)*3<<2)>>2]=+g[$c>>2]*+g[cd>>2]-+g[bd>>2]*+g[ad>>2];g[kb>>2]=+g[M>>2]-+g[L>>2];g[mb>>2]=+g[kb>>2]-+g[lb>>2];g[sb>>2]=+g[kb>>2]+ +g[lb>>2];g[pb>>2]=+g[aa>>2]-+g[Z>>2];g[qb>>2]=+g[ob>>2]+ +g[pb>>2];g[ub>>2]=+g[pb>>2]-+g[ob>>2];g[La>>2]=+g[(c[n>>2]|0)+8>>2];g[nb>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2]=+g[La>>2]*+g[mb>>2]+ +g[nb>>2]*+g[qb>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2]=+g[La>>2]*+g[qb>>2]-+g[nb>>2]*+g[mb>>2];g[rb>>2]=+g[(c[n>>2]|0)+16>>2];g[tb>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<1)<<2)>>2]=+g[rb>>2]*+g[sb>>2]+ +g[tb>>2]*+g[ub>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<1)<<2)>>2]=+g[rb>>2]*+g[ub>>2]-+g[tb>>2]*+g[sb>>2];g[bc>>2]=+g[tc>>2]-+g[sc>>2];g[dc>>2]=+g[bc>>2]-+g[cc>>2];g[jc>>2]=+g[bc>>2]+ +g[cc>>2];g[gc>>2]=+g[Jc>>2]-+g[Gc>>2];g[hc>>2]=+g[fc>>2]+ +g[gc>>2];g[lc>>2]=+g[gc>>2]-+g[fc>>2];g[ac>>2]=+g[(c[n>>2]|0)+8>>2];g[ec>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<2)<<2)>>2]=+g[ac>>2]*+g[dc>>2]+ +g[ec>>2]*+g[hc>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<2)<<2)>>2]=+g[ac>>2]*+g[hc>>2]-+g[ec>>2]*+g[dc>>2];g[ic>>2]=+g[(c[n>>2]|0)+16>>2];g[kc>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<2)<<2)>>2]=+g[ic>>2]*+g[jc>>2]+ +g[kc>>2]*+g[lc>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<2)<<2)>>2]=+g[ic>>2]*+g[lc>>2]-+g[kc>>2]*+g[jc>>2];g[N>>2]=+g[L>>2]+ +g[M>>2];g[V>>2]=+g[N>>2]+ +g[U>>2];g[Ia>>2]=+g[N>>2]-+g[U>>2];g[Ca>>2]=+g[Z>>2]+ +g[aa>>2];g[Ga>>2]=+g[Ca>>2]-+g[Fa>>2];g[Ka>>2]=+g[Fa>>2]+ +g[Ca>>2];g[K>>2]=+g[c[n>>2]>>2];g[W>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2]=+g[K>>2]*+g[V>>2]+ +g[W>>2]*+g[Ga>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2]=+g[K>>2]*+g[Ga>>2]-+g[W>>2]*+g[V>>2];g[Ha>>2]=+g[(c[n>>2]|0)+24>>2];g[Ja>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<1)<<2)>>2]=+g[Ha>>2]*+g[Ia>>2]+ +g[Ja>>2]*+g[Ka>>2];g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<1)<<2)>>2]=+g[Ha>>2]*+g[Ka>>2]-+g[Ja>>2]*+g[Ia>>2];g[Gb>>2]=+g[Eb>>2]+ +g[Fb>>2];g[Qa>>2]=+g[Gb>>2]+ +g[Pa>>2];g[cb>>2]=+g[Gb>>2]-+g[Pa>>2];g[Ya>>2]=+g[Ua>>2]+ +g[Xa>>2];g[ab>>2]=+g[Ya>>2]-+g[$a>>2];g[eb>>2]=+g[$a>>2]+ +g[Ya>>2];g[Db>>2]=+g[c[n>>2]>>2];g[Ra>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[Db>>2]*+g[Qa>>2]+ +g[Ra>>2]*+g[ab>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[Db>>2]*+g[ab>>2]-+g[Ra>>2]*+g[Qa>>2];g[bb>>2]=+g[(c[n>>2]|0)+24>>2];g[db>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[bb>>2]*+g[cb>>2]+ +g[db>>2]*+g[eb>>2];g[(c[m>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[bb>>2]*+g[eb>>2]-+g[db>>2]*+g[cb>>2];g[gb>>2]=+g[Fb>>2]-+g[Eb>>2];g[ib>>2]=+g[gb>>2]-+g[hb>>2];g[Pb>>2]=+g[gb>>2]+ +g[hb>>2];g[Mb>>2]=+g[Xa>>2]-+g[Ua>>2];g[Nb>>2]=+g[Lb>>2]+ +g[Mb>>2];g[Rb>>2]=+g[Mb>>2]-+g[Lb>>2];g[fb>>2]=+g[(c[n>>2]|0)+8>>2];g[jb>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[fb>>2]*+g[ib>>2]+ +g[jb>>2]*+g[Nb>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[fb>>2]*+g[Nb>>2]-+g[jb>>2]*+g[ib>>2];g[Ob>>2]=+g[(c[n>>2]|0)+16>>2];g[Qb>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[Ob>>2]*+g[Pb>>2]+ +g[Qb>>2]*+g[Rb>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[Ob>>2]*+g[Rb>>2]-+g[Qb>>2]*+g[Pb>>2];g[uc>>2]=+g[sc>>2]+ +g[tc>>2];g[Cc>>2]=+g[uc>>2]+ +g[Bc>>2];g[Zb>>2]=+g[uc>>2]-+g[Bc>>2];g[Kc>>2]=+g[Gc>>2]+ +g[Jc>>2];g[Xb>>2]=+g[Kc>>2]-+g[Wb>>2];g[$b>>2]=+g[Wb>>2]+ +g[Kc>>2];g[rc>>2]=+g[c[n>>2]>>2];g[Dc>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<2)<<2)>>2]=+g[rc>>2]*+g[Cc>>2]+ +g[Dc>>2]*+g[Xb>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<2)<<2)>>2]=+g[rc>>2]*+g[Xb>>2]-+g[Dc>>2]*+g[Cc>>2];g[Yb>>2]=+g[(c[n>>2]|0)+24>>2];g[_b>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<2)<<2)>>2]=+g[Yb>>2]*+g[Zb>>2]+ +g[_b>>2]*+g[$b>>2];g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<2)<<2)>>2]=+g[Yb>>2]*+g[$b>>2]-+g[_b>>2]*+g[Zb>>2];g[pa>>2]=+g[nd>>2]-+g[md>>2];g[ra>>2]=+g[pa>>2]-+g[qa>>2];g[xa>>2]=+g[pa>>2]+ +g[qa>>2];g[ua>>2]=+g[ea>>2]-+g[ba>>2];g[va>>2]=+g[ta>>2]+ +g[ua>>2];g[za>>2]=+g[ua>>2]-+g[ta>>2];g[oa>>2]=+g[(c[n>>2]|0)+8>>2];g[sa>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2]=+g[oa>>2]*+g[ra>>2]+ +g[sa>>2]*+g[va>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2]=+g[oa>>2]*+g[va>>2]-+g[sa>>2]*+g[ra>>2];g[wa>>2]=+g[(c[n>>2]|0)+16>>2];g[ya>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]|0)<<2)>>2]=+g[wa>>2]*+g[xa>>2]+ +g[ya>>2]*+g[za>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]|0)<<2)>>2]=+g[wa>>2]*+g[za>>2]-+g[ya>>2]*+g[xa>>2];g[od>>2]=+g[md>>2]+ +g[nd>>2];g[z>>2]=+g[od>>2]+ +g[y>>2];g[la>>2]=+g[od>>2]-+g[y>>2];g[fa>>2]=+g[ba>>2]+ +g[ea>>2];g[ja>>2]=+g[fa>>2]-+g[ia>>2];g[na>>2]=+g[ia>>2]+ +g[fa>>2];g[ld>>2]=+g[c[n>>2]>>2];g[A>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2]=+g[ld>>2]*+g[z>>2]+ +g[A>>2]*+g[ja>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2]=+g[ld>>2]*+g[ja>>2]-+g[A>>2]*+g[z>>2];g[ka>>2]=+g[(c[n>>2]|0)+24>>2];g[ma>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]|0)<<2)>>2]=+g[ka>>2]*+g[la>>2]+ +g[ma>>2]*+g[na>>2];g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]|0)<<2)>>2]=+g[ka>>2]*+g[na>>2]-+g[ma>>2]*+g[la>>2];c[Qd>>2]=(c[Qd>>2]|0)+1;c[l>>2]=(c[l>>2]|0)+(c[r>>2]<<2);c[m>>2]=(c[m>>2]|0)+(c[r>>2]<<2);c[n>>2]=(c[n>>2]|0)+32}i=Rd;return}function Xi(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;fh(c[d>>2]|0,5,1480);i=b;return}function Yi(a,b,d,e,f,h,j,k){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;var l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0;Gf=i;i=i+1392|0;l=Gf+1384|0;m=Gf+1380|0;n=Gf+1376|0;o=Gf+1372|0;p=Gf+1368|0;Hf=Gf+1364|0;q=Gf+1360|0;r=Gf+1356|0;Ff=Gf+1344|0;Kb=Gf+1340|0;gf=Gf+1336|0;yf=Gf+1332|0;Se=Gf+1328|0;bf=Gf+1324|0;z=Gf+1320|0;oa=Gf+1316|0;J=Gf+1312|0;rb=Gf+1308|0;Ma=Gf+1304|0;U=Gf+1300|0;Ca=Gf+1296|0;Xa=Gf+1292|0;eb=Gf+1288|0;uc=Gf+1284|0;Nc=Gf+1280|0;_b=Gf+1276|0;hc=Gf+1272|0;Zc=Gf+1268|0;Qd=Gf+1264|0;Bd=Gf+1260|0;te=Gf+1256|0;$d=Gf+1252|0;kd=Gf+1248|0;ef=Gf+1244|0;Ve=Gf+1240|0;kf=Gf+1236|0;vf=Gf+1232|0;sf=Gf+1228|0;Te=Gf+1224|0;zf=Gf+1220|0;Pe=Gf+1216|0;x=Gf+1212|0;M=Gf+1208|0;C=Gf+1204|0;la=Gf+1200|0;ia=Gf+1196|0;K=Gf+1192|0;pa=Gf+1188|0;G=Gf+1184|0;lb=Gf+1180|0;Na=Gf+1176|0;sb=Gf+1172|0;Hb=Gf+1168|0;$=Gf+1164|0;Pa=Gf+1160|0;Fa=Gf+1156|0;ob=Gf+1152|0;cb=Gf+1148|0;Qc=Gf+1144|0;hb=Gf+1140|0;Tb=Gf+1136|0;Qb=Gf+1132|0;Oc=Gf+1128|0;vc=Gf+1124|0;Kc=Gf+1120|0;fc=Gf+1116|0;Td=Gf+1112|0;kc=Gf+1108|0;Wc=Gf+1104|0;sc=Gf+1100|0;Rd=Gf+1096|0;_c=Gf+1092|0;Nd=Gf+1088|0;vd=Gf+1084|0;ue=Gf+1080|0;be=Gf+1076|0;qe=Gf+1072|0;id=Gf+1068|0;we=Gf+1064|0;nd=Gf+1060|0;yd=Gf+1056|0;s=Gf+1052|0;Ba=Gf+1048|0;ma=Gf+1044|0;na=Gf+1040|0;wf=Gf+1036|0;xf=Gf+1032|0;$e=Gf+1028|0;af=Gf+1024|0;pb=Gf+1020|0;qb=Gf+1016|0;S=Gf+1012|0;T=Gf+1008|0;Va=Gf+1004|0;Wa=Gf+1e3|0;Xc=Gf+996|0;Yc=Gf+992|0;Ub=Gf+988|0;tc=Gf+984|0;Yb=Gf+980|0;Zb=Gf+976|0;zd=Gf+972|0;Ad=Gf+968|0;Zd=Gf+964|0;_d=Gf+960|0;Ce=Gf+956|0;hf=Gf+952|0;Fe=Gf+948|0;jf=Gf+944|0;Tc=Gf+940|0;ae=Gf+936|0;De=Gf+932|0;Ee=Gf+928|0;of=Gf+924|0;Ne=Gf+920|0;rf=Gf+916|0;Oe=Gf+912|0;mf=Gf+908|0;nf=Gf+904|0;pf=Gf+900|0;qf=Gf+896|0;t=Gf+892|0;A=Gf+888|0;w=Gf+884|0;B=Gf+880|0;cf=Gf+876|0;df=Gf+872|0;u=Gf+868|0;v=Gf+864|0;ea=Gf+860|0;E=Gf+856|0;ha=Gf+852|0;F=Gf+848|0;ca=Gf+844|0;da=Gf+840|0;fa=Gf+836|0;ga=Gf+832|0;Ja=Gf+828|0;Fb=Gf+824|0;kb=Gf+820|0;Gb=Gf+816|0;Ha=Gf+812|0;Ia=Gf+808|0;Ka=Gf+804|0;La=Gf+800|0;X=Gf+796|0;Da=Gf+792|0;_=Gf+788|0;Ea=Gf+784|0;V=Gf+780|0;W=Gf+776|0;Y=Gf+772|0;Z=Gf+768|0;_a=Gf+764|0;fb=Gf+760|0;bb=Gf+756|0;gb=Gf+752|0;Ya=Gf+748|0;Za=Gf+744|0;$a=Gf+740|0;ab=Gf+736|0;Mb=Gf+732|0;Ic=Gf+728|0;Pb=Gf+724|0;Jc=Gf+720|0;jb=Gf+716|0;Lb=Gf+712|0;Nb=Gf+708|0;Ob=Gf+704|0;bc=Gf+700|0;ic=Gf+696|0;ec=Gf+692|0;jc=Gf+688|0;$b=Gf+684|0;ac=Gf+680|0;cc=Gf+676|0;dc=Gf+672|0;oc=Gf+668|0;Ld=Gf+664|0;rc=Gf+660|0;Md=Gf+656|0;mc=Gf+652|0;nc=Gf+648|0;pc=Gf+644|0;qc=Gf+640|0;rd=Gf+636|0;oe=Gf+632|0;ud=Gf+628|0;pe=Gf+624|0;pd=Gf+620|0;qd=Gf+616|0;sd=Gf+612|0;td=Gf+608|0;ed=Gf+604|0;ld=Gf+600|0;hd=Gf+596|0;md=Gf+592|0;cd=Gf+588|0;dd=Gf+584|0;fd=Gf+580|0;gd=Gf+576|0;xa=Gf+572|0;za=Gf+568|0;wa=Gf+564|0;ya=Gf+560|0;je=Gf+556|0;le=Gf+552|0;ie=Gf+548|0;ke=Gf+544|0;Ie=Gf+540|0;Ke=Gf+536|0;He=Gf+532|0;Je=Gf+528|0;Gd=Gf+524|0;Id=Gf+520|0;Fd=Gf+516|0;Hd=Gf+512|0;Dc=Gf+508|0;Fc=Gf+504|0;Cc=Gf+500|0;Ec=Gf+496|0;Ab=Gf+492|0;Cb=Gf+488|0;zb=Gf+484|0;Bb=Gf+480|0;re=Gf+476|0;ze=Gf+472|0;xe=Gf+468|0;Be=Gf+464|0;ne=Gf+460|0;ve=Gf+456|0;me=Gf+452|0;se=Gf+448|0;ye=Gf+444|0;Ae=Gf+440|0;tf=Gf+436|0;Df=Gf+432|0;Bf=Gf+428|0;Ge=Gf+424|0;lf=Gf+420|0;Af=Gf+416|0;ff=Gf+412|0;uf=Gf+408|0;Cf=Gf+404|0;Ef=Gf+400|0;Rb=Gf+396|0;zc=Gf+392|0;xc=Gf+388|0;Bc=Gf+384|0;ib=Gf+380|0;wc=Gf+376|0;db=Gf+372|0;Sb=Gf+368|0;yc=Gf+364|0;Ac=Gf+360|0;Ib=Gf+356|0;Sa=Gf+352|0;Qa=Gf+348|0;Ua=Gf+344|0;Eb=Gf+340|0;Oa=Gf+336|0;Db=Gf+332|0;Jb=Gf+328|0;Ra=Gf+324|0;Ta=Gf+320|0;Lc=Gf+316|0;Vb=Gf+312|0;Rc=Gf+308|0;Xb=Gf+304|0;Hc=Gf+300|0;Pc=Gf+296|0;Gc=Gf+292|0;Mc=Gf+288|0;Sc=Gf+284|0;Wb=Gf+280|0;H=Gf+276|0;P=Gf+272|0;N=Gf+268|0;R=Gf+264|0;D=Gf+260|0;L=Gf+256|0;Aa=Gf+252|0;I=Gf+248|0;O=Gf+244|0;Q=Gf+240|0;Od=Gf+236|0;Wd=Gf+232|0;Ud=Gf+228|0;Yd=Gf+224|0;Kd=Gf+220|0;Sd=Gf+216|0;Jd=Gf+212|0;Pd=Gf+208|0;Vd=Gf+204|0;Xd=Gf+200|0;Qe=Gf+196|0;Ye=Gf+192|0;We=Gf+188|0;_e=Gf+184|0;Me=Gf+180|0;Ue=Gf+176|0;Le=Gf+172|0;Re=Gf+168|0;Xe=Gf+164|0;Ze=Gf+160|0;mb=Gf+156|0;wb=Gf+152|0;ub=Gf+148|0;yb=Gf+144|0;Ga=Gf+140|0;tb=Gf+136|0;aa=Gf+132|0;nb=Gf+128|0;vb=Gf+124|0;xb=Gf+120|0;Uc=Gf+116|0;Cd=Gf+112|0;ad=Gf+108|0;Ed=Gf+104|0;lc=Gf+100|0;$c=Gf+96|0;gc=Gf+92|0;Vc=Gf+88|0;bd=Gf+84|0;Dd=Gf+80|0;ja=Gf+76|0;ta=Gf+72|0;ra=Gf+68|0;va=Gf+64|0;ba=Gf+60|0;qa=Gf+56|0;y=Gf+52|0;ka=Gf+48|0;sa=Gf+44|0;ua=Gf+40|0;wd=Gf+36|0;fe=Gf+32|0;de=Gf+28|0;he=Gf+24|0;od=Gf+20|0;ce=Gf+16|0;jd=Gf+12|0;xd=Gf+8|0;ee=Gf+4|0;ge=Gf;c[l>>2]=a;c[m>>2]=b;c[n>>2]=d;c[o>>2]=e;c[p>>2]=f;c[Hf>>2]=h;c[q>>2]=j;c[r>>2]=k;g[Gf+1352>>2]=.5;g[Gf+1348>>2]=.8660253882408142;c[Ff>>2]=c[Hf>>2];c[n>>2]=(c[n>>2]|0)+((c[Hf>>2]|0)*10<<2);while(1){if((c[Ff>>2]|0)>=(c[q>>2]|0))break;g[s>>2]=+g[c[l>>2]>>2];g[Ba>>2]=+g[(c[l>>2]|0)+((c[o>>2]|0)*3<<2)>>2];g[Kb>>2]=+g[s>>2]+ +g[Ba>>2];g[gf>>2]=+g[s>>2]-+g[Ba>>2];g[wf>>2]=+g[c[m>>2]>>2];g[xf>>2]=+g[(c[m>>2]|0)+((c[o>>2]|0)*3<<2)>>2];g[yf>>2]=+g[wf>>2]-+g[xf>>2];g[Se>>2]=+g[wf>>2]+ +g[xf>>2];g[$e>>2]=+g[(c[l>>2]|0)+(c[p>>2]<<2)>>2];g[af>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[bf>>2]=+g[$e>>2]+ +g[af>>2];g[z>>2]=+g[$e>>2]-+g[af>>2];g[ma>>2]=+g[(c[m>>2]|0)+(c[p>>2]<<2)>>2];g[na>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[oa>>2]=+g[ma>>2]-+g[na>>2];g[J>>2]=+g[ma>>2]+ +g[na>>2];g[pb>>2]=+g[(c[m>>2]|0)+(c[p>>2]<<1<<2)>>2];g[qb>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*3|0)<<2)>>2];g[rb>>2]=+g[pb>>2]-+g[qb>>2];g[Ma>>2]=+g[pb>>2]+ +g[qb>>2];g[S>>2]=+g[(c[l>>2]|0)+(c[p>>2]<<1<<2)>>2];g[T>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*3|0)<<2)>>2];g[U>>2]=+g[S>>2]+ +g[T>>2];g[Ca>>2]=+g[S>>2]-+g[T>>2];g[Va>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)*3<<2)>>2];g[Wa>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[Xa>>2]=+g[Va>>2]+ +g[Wa>>2];g[eb>>2]=+g[Va>>2]-+g[Wa>>2];g[Ub>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)*3<<2)>>2];g[tc>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[uc>>2]=+g[Ub>>2]-+g[tc>>2];g[Nc>>2]=+g[Ub>>2]+ +g[tc>>2];g[Yb>>2]=+g[(c[l>>2]|0)+(c[p>>2]<<2<<2)>>2];g[Zb>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*3|0)<<2)>>2];g[_b>>2]=+g[Yb>>2]+ +g[Zb>>2];g[hc>>2]=+g[Yb>>2]-+g[Zb>>2];g[Xc>>2]=+g[(c[m>>2]|0)+(c[p>>2]<<2<<2)>>2];g[Yc>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*3|0)<<2)>>2];g[Zc>>2]=+g[Xc>>2]-+g[Yc>>2];g[Qd>>2]=+g[Xc>>2]+ +g[Yc>>2];g[zd>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)*5<<2)>>2];g[Ad>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[Bd>>2]=+g[zd>>2]-+g[Ad>>2];g[te>>2]=+g[zd>>2]+ +g[Ad>>2];g[Zd>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)*5<<2)>>2];g[_d>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[$d>>2]=+g[Zd>>2]+ +g[_d>>2];g[kd>>2]=+g[Zd>>2]-+g[_d>>2];g[Tc>>2]=+g[(c[l>>2]|0)+(c[o>>2]<<1<<2)>>2];g[ae>>2]=+g[(c[l>>2]|0)+((c[o>>2]|0)*5<<2)>>2];g[Ce>>2]=+g[Tc>>2]+ +g[ae>>2];g[hf>>2]=+g[Tc>>2]-+g[ae>>2];g[De>>2]=+g[(c[l>>2]|0)+(c[o>>2]<<2<<2)>>2];g[Ee>>2]=+g[(c[l>>2]|0)+(c[o>>2]<<2)>>2];g[Fe>>2]=+g[De>>2]+ +g[Ee>>2];g[jf>>2]=+g[De>>2]-+g[Ee>>2];g[ef>>2]=+g[Ce>>2]+ +g[Fe>>2];g[Ve>>2]=(+g[Fe>>2]-+g[Ce>>2])*.8660253882408142;g[kf>>2]=+g[hf>>2]+ +g[jf>>2];g[vf>>2]=(+g[jf>>2]-+g[hf>>2])*.8660253882408142;g[mf>>2]=+g[(c[m>>2]|0)+(c[o>>2]<<1<<2)>>2];g[nf>>2]=+g[(c[m>>2]|0)+((c[o>>2]|0)*5<<2)>>2];g[of>>2]=+g[mf>>2]-+g[nf>>2];g[Ne>>2]=+g[mf>>2]+ +g[nf>>2];g[pf>>2]=+g[(c[m>>2]|0)+(c[o>>2]<<2<<2)>>2];g[qf>>2]=+g[(c[m>>2]|0)+(c[o>>2]<<2)>>2];g[rf>>2]=+g[pf>>2]-+g[qf>>2];g[Oe>>2]=+g[pf>>2]+ +g[qf>>2];g[sf>>2]=(+g[of>>2]-+g[rf>>2])*.8660253882408142;g[Te>>2]=+g[Ne>>2]+ +g[Oe>>2];g[zf>>2]=+g[of>>2]+ +g[rf>>2];g[Pe>>2]=(+g[Ne>>2]-+g[Oe>>2])*.8660253882408142;g[cf>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2];g[df>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*5|0)<<2)>>2];g[t>>2]=+g[cf>>2]+ +g[df>>2];g[A>>2]=+g[cf>>2]-+g[df>>2];g[u>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<2)<<2)>>2];g[v>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2];g[w>>2]=+g[u>>2]+ +g[v>>2];g[B>>2]=+g[u>>2]-+g[v>>2];g[x>>2]=+g[t>>2]+ +g[w>>2];g[M>>2]=(+g[w>>2]-+g[t>>2])*.8660253882408142;g[C>>2]=+g[A>>2]+ +g[B>>2];g[la>>2]=(+g[B>>2]-+g[A>>2])*.8660253882408142;g[ca>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2];g[da>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*5|0)<<2)>>2];g[ea>>2]=+g[ca>>2]-+g[da>>2];g[E>>2]=+g[ca>>2]+ +g[da>>2];g[fa>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<2)<<2)>>2];g[ga>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2];g[ha>>2]=+g[fa>>2]-+g[ga>>2];g[F>>2]=+g[fa>>2]+ +g[ga>>2];g[ia>>2]=(+g[ea>>2]-+g[ha>>2])*.8660253882408142;g[K>>2]=+g[E>>2]+ +g[F>>2];g[pa>>2]=+g[ea>>2]+ +g[ha>>2];g[G>>2]=(+g[E>>2]-+g[F>>2])*.8660253882408142;g[Ha>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2];g[Ia>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*5|0)<<2)>>2];g[Ja>>2]=+g[Ha>>2]-+g[Ia>>2];g[Fb>>2]=+g[Ha>>2]+ +g[Ia>>2];g[Ka>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<2)<<2)>>2];g[La>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2];g[kb>>2]=+g[Ka>>2]-+g[La>>2];g[Gb>>2]=+g[Ka>>2]+ +g[La>>2];g[lb>>2]=(+g[Ja>>2]-+g[kb>>2])*.8660253882408142;g[Na>>2]=+g[Fb>>2]+ +g[Gb>>2];g[sb>>2]=+g[Ja>>2]+ +g[kb>>2];g[Hb>>2]=(+g[Fb>>2]-+g[Gb>>2])*.8660253882408142;g[V>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2];g[W>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*5|0)<<2)>>2];g[X>>2]=+g[V>>2]+ +g[W>>2];g[Da>>2]=+g[V>>2]-+g[W>>2];g[Y>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<2)<<2)>>2];g[Z>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2];g[_>>2]=+g[Y>>2]+ +g[Z>>2];g[Ea>>2]=+g[Y>>2]-+g[Z>>2];g[$>>2]=+g[X>>2]+ +g[_>>2];g[Pa>>2]=(+g[_>>2]-+g[X>>2])*.8660253882408142;g[Fa>>2]=+g[Da>>2]+ +g[Ea>>2];g[ob>>2]=(+g[Ea>>2]-+g[Da>>2])*.8660253882408142;g[Ya>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<1)<<2)>>2];g[Za>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*5|0)<<2)>>2];g[_a>>2]=+g[Ya>>2]+ +g[Za>>2];g[fb>>2]=+g[Ya>>2]-+g[Za>>2];g[$a>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<2)<<2)>>2];g[ab>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]|0)<<2)>>2];g[bb>>2]=+g[$a>>2]+ +g[ab>>2];g[gb>>2]=+g[$a>>2]-+g[ab>>2];g[cb>>2]=+g[_a>>2]+ +g[bb>>2];g[Qc>>2]=(+g[bb>>2]-+g[_a>>2])*.8660253882408142;g[hb>>2]=+g[fb>>2]+ +g[gb>>2];g[Tb>>2]=(+g[gb>>2]-+g[fb>>2])*.8660253882408142;g[jb>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<1)<<2)>>2];g[Lb>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*5|0)<<2)>>2];g[Mb>>2]=+g[jb>>2]-+g[Lb>>2];g[Ic>>2]=+g[jb>>2]+ +g[Lb>>2];g[Nb>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<2)<<2)>>2];g[Ob>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]|0)<<2)>>2];g[Pb>>2]=+g[Nb>>2]-+g[Ob>>2];g[Jc>>2]=+g[Nb>>2]+ +g[Ob>>2];g[Qb>>2]=(+g[Mb>>2]-+g[Pb>>2])*.8660253882408142;g[Oc>>2]=+g[Ic>>2]+ +g[Jc>>2];g[vc>>2]=+g[Mb>>2]+ +g[Pb>>2];g[Kc>>2]=(+g[Ic>>2]-+g[Jc>>2])*.8660253882408142;g[$b>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<1)<<2)>>2];g[ac>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*5|0)<<2)>>2];g[bc>>2]=+g[$b>>2]+ +g[ac>>2];g[ic>>2]=+g[$b>>2]-+g[ac>>2];g[cc>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<2)<<2)>>2];g[dc>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]|0)<<2)>>2];g[ec>>2]=+g[cc>>2]+ +g[dc>>2];g[jc>>2]=+g[cc>>2]-+g[dc>>2];g[fc>>2]=+g[bc>>2]+ +g[ec>>2];g[Td>>2]=(+g[ec>>2]-+g[bc>>2])*.8660253882408142;g[kc>>2]=+g[ic>>2]+ +g[jc>>2];g[Wc>>2]=(+g[jc>>2]-+g[ic>>2])*.8660253882408142;g[mc>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<1)<<2)>>2];g[nc>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*5|0)<<2)>>2];g[oc>>2]=+g[mc>>2]-+g[nc>>2];g[Ld>>2]=+g[mc>>2]+ +g[nc>>2];g[pc>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<2)<<2)>>2];g[qc>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]|0)<<2)>>2];g[rc>>2]=+g[pc>>2]-+g[qc>>2];g[Md>>2]=+g[pc>>2]+ +g[qc>>2];g[sc>>2]=(+g[oc>>2]-+g[rc>>2])*.8660253882408142;g[Rd>>2]=+g[Ld>>2]+ +g[Md>>2];g[_c>>2]=+g[oc>>2]+ +g[rc>>2];g[Nd>>2]=(+g[Ld>>2]-+g[Md>>2])*.8660253882408142;g[pd>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]<<1)<<2)>>2];g[qd>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*5|0)<<2)>>2];g[rd>>2]=+g[pd>>2]-+g[qd>>2];g[oe>>2]=+g[pd>>2]+ +g[qd>>2];g[sd>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]<<2)<<2)>>2];g[td>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]|0)<<2)>>2];g[ud>>2]=+g[sd>>2]-+g[td>>2];g[pe>>2]=+g[sd>>2]+ +g[td>>2];g[vd>>2]=(+g[rd>>2]-+g[ud>>2])*.8660253882408142;g[ue>>2]=+g[oe>>2]+ +g[pe>>2];g[be>>2]=+g[rd>>2]+ +g[ud>>2];g[qe>>2]=(+g[oe>>2]-+g[pe>>2])*.8660253882408142;g[cd>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]<<1)<<2)>>2];g[dd>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*5|0)<<2)>>2];g[ed>>2]=+g[cd>>2]+ +g[dd>>2];g[ld>>2]=+g[cd>>2]-+g[dd>>2];g[fd>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]<<2)<<2)>>2];g[gd>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]|0)<<2)>>2];g[hd>>2]=+g[fd>>2]+ +g[gd>>2];g[md>>2]=+g[fd>>2]-+g[gd>>2];g[id>>2]=+g[ed>>2]+ +g[hd>>2];g[we>>2]=(+g[hd>>2]-+g[ed>>2])*.8660253882408142;g[nd>>2]=+g[ld>>2]+ +g[md>>2];g[yd>>2]=(+g[md>>2]-+g[ld>>2])*.8660253882408142;g[c[l>>2]>>2]=+g[Kb>>2]+ +g[ef>>2];g[c[m>>2]>>2]=+g[Se>>2]+ +g[Te>>2];g[(c[l>>2]|0)+(c[o>>2]<<2)>>2]=+g[bf>>2]+ +g[x>>2];g[(c[m>>2]|0)+(c[o>>2]<<2)>>2]=+g[J>>2]+ +g[K>>2];g[(c[l>>2]|0)+((c[o>>2]|0)*3<<2)>>2]=+g[Xa>>2]+ +g[cb>>2];g[(c[l>>2]|0)+(c[o>>2]<<1<<2)>>2]=+g[U>>2]+ +g[$>>2];g[(c[m>>2]|0)+(c[o>>2]<<1<<2)>>2]=+g[Ma>>2]+ +g[Na>>2];g[(c[m>>2]|0)+((c[o>>2]|0)*3<<2)>>2]=+g[Nc>>2]+ +g[Oc>>2];g[(c[m>>2]|0)+(c[o>>2]<<2<<2)>>2]=+g[Qd>>2]+ +g[Rd>>2];g[(c[m>>2]|0)+((c[o>>2]|0)*5<<2)>>2]=+g[te>>2]+ +g[ue>>2];g[(c[l>>2]|0)+((c[o>>2]|0)*5<<2)>>2]=+g[$d>>2]+ +g[id>>2];g[(c[l>>2]|0)+(c[o>>2]<<2<<2)>>2]=+g[_b>>2]+ +g[fc>>2];g[xa>>2]=+g[z>>2]+ +g[C>>2];g[za>>2]=+g[oa>>2]+ +g[pa>>2];g[wa>>2]=+g[(c[n>>2]|0)+16>>2];g[ya>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]|0)<<2)>>2]=+g[wa>>2]*+g[xa>>2]+ +g[ya>>2]*+g[za>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]|0)<<2)>>2]=+g[wa>>2]*+g[za>>2]-+g[ya>>2]*+g[xa>>2];g[je>>2]=+g[kd>>2]+ +g[nd>>2];g[le>>2]=+g[Bd>>2]+ +g[be>>2];g[ie>>2]=+g[(c[n>>2]|0)+16>>2];g[ke>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[ie>>2]*+g[je>>2]+ +g[ke>>2]*+g[le>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[ie>>2]*+g[le>>2]-+g[ke>>2]*+g[je>>2];g[Ie>>2]=+g[gf>>2]+ +g[kf>>2];g[Ke>>2]=+g[yf>>2]+ +g[zf>>2];g[He>>2]=+g[(c[n>>2]|0)+16>>2];g[Je>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[l>>2]|0)+((c[p>>2]|0)*3<<2)>>2]=+g[He>>2]*+g[Ie>>2]+ +g[Je>>2]*+g[Ke>>2];g[(c[m>>2]|0)+((c[p>>2]|0)*3<<2)>>2]=+g[He>>2]*+g[Ke>>2]-+g[Je>>2]*+g[Ie>>2];g[Gd>>2]=+g[hc>>2]+ +g[kc>>2];g[Id>>2]=+g[Zc>>2]+ +g[_c>>2];g[Fd>>2]=+g[(c[n>>2]|0)+16>>2];g[Hd>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<2)<<2)>>2]=+g[Fd>>2]*+g[Gd>>2]+ +g[Hd>>2]*+g[Id>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<2)<<2)>>2]=+g[Fd>>2]*+g[Id>>2]-+g[Hd>>2]*+g[Gd>>2];g[Dc>>2]=+g[eb>>2]+ +g[hb>>2];g[Fc>>2]=+g[uc>>2]+ +g[vc>>2];g[Cc>>2]=+g[(c[n>>2]|0)+16>>2];g[Ec>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[Cc>>2]*+g[Dc>>2]+ +g[Ec>>2]*+g[Fc>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[Cc>>2]*+g[Fc>>2]-+g[Ec>>2]*+g[Dc>>2];g[Ab>>2]=+g[Ca>>2]+ +g[Fa>>2];g[Cb>>2]=+g[rb>>2]+ +g[sb>>2];g[zb>>2]=+g[(c[n>>2]|0)+16>>2];g[Bb>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<1)<<2)>>2]=+g[zb>>2]*+g[Ab>>2]+ +g[Bb>>2]*+g[Cb>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<1)<<2)>>2]=+g[zb>>2]*+g[Cb>>2]-+g[Bb>>2]*+g[Ab>>2];g[ne>>2]=+g[$d>>2]-+g[id>>2]*.5;g[re>>2]=+g[ne>>2]-+g[qe>>2];g[ze>>2]=+g[ne>>2]+ +g[qe>>2];g[ve>>2]=+g[te>>2]-+g[ue>>2]*.5;g[xe>>2]=+g[ve>>2]-+g[we>>2];g[Be>>2]=+g[we>>2]+ +g[ve>>2];g[me>>2]=+g[(c[n>>2]|0)+8>>2];g[se>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[me>>2]*+g[re>>2]+ +g[se>>2]*+g[xe>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[me>>2]*+g[xe>>2]-+g[se>>2]*+g[re>>2];g[ye>>2]=+g[(c[n>>2]|0)+24>>2];g[Ae>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[ye>>2]*+g[ze>>2]+ +g[Ae>>2]*+g[Be>>2];g[(c[m>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[ye>>2]*+g[Be>>2]-+g[Ae>>2]*+g[ze>>2];g[lf>>2]=+g[gf>>2]-+g[kf>>2]*.5;g[tf>>2]=+g[lf>>2]+ +g[sf>>2];g[Df>>2]=+g[lf>>2]-+g[sf>>2];g[Af>>2]=+g[yf>>2]-+g[zf>>2]*.5;g[Bf>>2]=+g[vf>>2]+ +g[Af>>2];g[Ge>>2]=+g[Af>>2]-+g[vf>>2];g[ff>>2]=+g[c[n>>2]>>2];g[uf>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+(c[p>>2]<<2)>>2]=+g[ff>>2]*+g[tf>>2]+ +g[uf>>2]*+g[Bf>>2];g[(c[m>>2]|0)+(c[p>>2]<<2)>>2]=+g[ff>>2]*+g[Bf>>2]-+g[uf>>2]*+g[tf>>2];g[Cf>>2]=+g[(c[n>>2]|0)+32>>2];g[Ef>>2]=+g[(c[n>>2]|0)+36>>2];g[(c[l>>2]|0)+((c[p>>2]|0)*5<<2)>>2]=+g[Cf>>2]*+g[Df>>2]+ +g[Ef>>2]*+g[Ge>>2];g[(c[m>>2]|0)+((c[p>>2]|0)*5<<2)>>2]=+g[Cf>>2]*+g[Ge>>2]-+g[Ef>>2]*+g[Df>>2];g[ib>>2]=+g[eb>>2]-+g[hb>>2]*.5;g[Rb>>2]=+g[ib>>2]+ +g[Qb>>2];g[zc>>2]=+g[ib>>2]-+g[Qb>>2];g[wc>>2]=+g[uc>>2]-+g[vc>>2]*.5;g[xc>>2]=+g[Tb>>2]+ +g[wc>>2];g[Bc>>2]=+g[wc>>2]-+g[Tb>>2];g[db>>2]=+g[c[n>>2]>>2];g[Sb>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[db>>2]*+g[Rb>>2]+ +g[Sb>>2]*+g[xc>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[db>>2]*+g[xc>>2]-+g[Sb>>2]*+g[Rb>>2];g[yc>>2]=+g[(c[n>>2]|0)+32>>2];g[Ac>>2]=+g[(c[n>>2]|0)+36>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[yc>>2]*+g[zc>>2]+ +g[Ac>>2]*+g[Bc>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[yc>>2]*+g[Bc>>2]-+g[Ac>>2]*+g[zc>>2];g[Eb>>2]=+g[U>>2]-+g[$>>2]*.5;g[Ib>>2]=+g[Eb>>2]-+g[Hb>>2];g[Sa>>2]=+g[Eb>>2]+ +g[Hb>>2];g[Oa>>2]=+g[Ma>>2]-+g[Na>>2]*.5;g[Qa>>2]=+g[Oa>>2]-+g[Pa>>2];g[Ua>>2]=+g[Pa>>2]+ +g[Oa>>2];g[Db>>2]=+g[(c[n>>2]|0)+8>>2];g[Jb>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2]=+g[Db>>2]*+g[Ib>>2]+ +g[Jb>>2]*+g[Qa>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2]=+g[Db>>2]*+g[Qa>>2]-+g[Jb>>2]*+g[Ib>>2];g[Ra>>2]=+g[(c[n>>2]|0)+24>>2];g[Ta>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<1)<<2)>>2]=+g[Ra>>2]*+g[Sa>>2]+ +g[Ta>>2]*+g[Ua>>2];g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<1)<<2)>>2]=+g[Ra>>2]*+g[Ua>>2]-+g[Ta>>2]*+g[Sa>>2];g[Hc>>2]=+g[Xa>>2]-+g[cb>>2]*.5;g[Lc>>2]=+g[Hc>>2]-+g[Kc>>2];g[Vb>>2]=+g[Hc>>2]+ +g[Kc>>2];g[Pc>>2]=+g[Nc>>2]-+g[Oc>>2]*.5;g[Rc>>2]=+g[Pc>>2]-+g[Qc>>2];g[Xb>>2]=+g[Qc>>2]+ +g[Pc>>2];g[Gc>>2]=+g[(c[n>>2]|0)+8>>2];g[Mc>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[Gc>>2]*+g[Lc>>2]+ +g[Mc>>2]*+g[Rc>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[Gc>>2]*+g[Rc>>2]-+g[Mc>>2]*+g[Lc>>2];g[Sc>>2]=+g[(c[n>>2]|0)+24>>2];g[Wb>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[Sc>>2]*+g[Vb>>2]+ +g[Wb>>2]*+g[Xb>>2];g[(c[m>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[Sc>>2]*+g[Xb>>2]-+g[Wb>>2]*+g[Vb>>2];g[D>>2]=+g[bf>>2]-+g[x>>2]*.5;g[H>>2]=+g[D>>2]-+g[G>>2];g[P>>2]=+g[D>>2]+ +g[G>>2];g[L>>2]=+g[J>>2]-+g[K>>2]*.5;g[N>>2]=+g[L>>2]-+g[M>>2];g[R>>2]=+g[M>>2]+ +g[L>>2];g[Aa>>2]=+g[(c[n>>2]|0)+8>>2];g[I>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2]=+g[Aa>>2]*+g[H>>2]+ +g[I>>2]*+g[N>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2]=+g[Aa>>2]*+g[N>>2]-+g[I>>2]*+g[H>>2];g[O>>2]=+g[(c[n>>2]|0)+24>>2];g[Q>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]|0)<<2)>>2]=+g[O>>2]*+g[P>>2]+ +g[Q>>2]*+g[R>>2];g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]|0)<<2)>>2]=+g[O>>2]*+g[R>>2]-+g[Q>>2]*+g[P>>2];g[Kd>>2]=+g[_b>>2]-+g[fc>>2]*.5;g[Od>>2]=+g[Kd>>2]-+g[Nd>>2];g[Wd>>2]=+g[Kd>>2]+ +g[Nd>>2];g[Sd>>2]=+g[Qd>>2]-+g[Rd>>2]*.5;g[Ud>>2]=+g[Sd>>2]-+g[Td>>2];g[Yd>>2]=+g[Td>>2]+ +g[Sd>>2];g[Jd>>2]=+g[(c[n>>2]|0)+8>>2];g[Pd>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<2)<<2)>>2]=+g[Jd>>2]*+g[Od>>2]+ +g[Pd>>2]*+g[Ud>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<2)<<2)>>2]=+g[Jd>>2]*+g[Ud>>2]-+g[Pd>>2]*+g[Od>>2];g[Vd>>2]=+g[(c[n>>2]|0)+24>>2];g[Xd>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<2)<<2)>>2]=+g[Vd>>2]*+g[Wd>>2]+ +g[Xd>>2]*+g[Yd>>2];g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<2)<<2)>>2]=+g[Vd>>2]*+g[Yd>>2]-+g[Xd>>2]*+g[Wd>>2];g[Me>>2]=+g[Kb>>2]-+g[ef>>2]*.5;g[Qe>>2]=+g[Me>>2]-+g[Pe>>2];g[Ye>>2]=+g[Me>>2]+ +g[Pe>>2];g[Ue>>2]=+g[Se>>2]-+g[Te>>2]*.5;g[We>>2]=+g[Ue>>2]-+g[Ve>>2];g[_e>>2]=+g[Ve>>2]+ +g[Ue>>2];g[Le>>2]=+g[(c[n>>2]|0)+8>>2];g[Re>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[l>>2]|0)+(c[p>>2]<<1<<2)>>2]=+g[Le>>2]*+g[Qe>>2]+ +g[Re>>2]*+g[We>>2];g[(c[m>>2]|0)+(c[p>>2]<<1<<2)>>2]=+g[Le>>2]*+g[We>>2]-+g[Re>>2]*+g[Qe>>2];g[Xe>>2]=+g[(c[n>>2]|0)+24>>2];g[Ze>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+(c[p>>2]<<2<<2)>>2]=+g[Xe>>2]*+g[Ye>>2]+ +g[Ze>>2]*+g[_e>>2];g[(c[m>>2]|0)+(c[p>>2]<<2<<2)>>2]=+g[Xe>>2]*+g[_e>>2]-+g[Ze>>2]*+g[Ye>>2];g[Ga>>2]=+g[Ca>>2]-+g[Fa>>2]*.5;g[mb>>2]=+g[Ga>>2]+ +g[lb>>2];g[wb>>2]=+g[Ga>>2]-+g[lb>>2];g[tb>>2]=+g[rb>>2]-+g[sb>>2]*.5;g[ub>>2]=+g[ob>>2]+ +g[tb>>2];g[yb>>2]=+g[tb>>2]-+g[ob>>2];g[aa>>2]=+g[c[n>>2]>>2];g[nb>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2]=+g[aa>>2]*+g[mb>>2]+ +g[nb>>2]*+g[ub>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2]=+g[aa>>2]*+g[ub>>2]-+g[nb>>2]*+g[mb>>2];g[vb>>2]=+g[(c[n>>2]|0)+32>>2];g[xb>>2]=+g[(c[n>>2]|0)+36>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]<<1)<<2)>>2]=+g[vb>>2]*+g[wb>>2]+ +g[xb>>2]*+g[yb>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]<<1)<<2)>>2]=+g[vb>>2]*+g[yb>>2]-+g[xb>>2]*+g[wb>>2];g[lc>>2]=+g[hc>>2]-+g[kc>>2]*.5;g[Uc>>2]=+g[lc>>2]+ +g[sc>>2];g[Cd>>2]=+g[lc>>2]-+g[sc>>2];g[$c>>2]=+g[Zc>>2]-+g[_c>>2]*.5;g[ad>>2]=+g[Wc>>2]+ +g[$c>>2];g[Ed>>2]=+g[$c>>2]-+g[Wc>>2];g[gc>>2]=+g[c[n>>2]>>2];g[Vc>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<2)<<2)>>2]=+g[gc>>2]*+g[Uc>>2]+ +g[Vc>>2]*+g[ad>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<2)<<2)>>2]=+g[gc>>2]*+g[ad>>2]-+g[Vc>>2]*+g[Uc>>2];g[bd>>2]=+g[(c[n>>2]|0)+32>>2];g[Dd>>2]=+g[(c[n>>2]|0)+36>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]<<2)<<2)>>2]=+g[bd>>2]*+g[Cd>>2]+ +g[Dd>>2]*+g[Ed>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]<<2)<<2)>>2]=+g[bd>>2]*+g[Ed>>2]-+g[Dd>>2]*+g[Cd>>2];g[ba>>2]=+g[z>>2]-+g[C>>2]*.5;g[ja>>2]=+g[ba>>2]+ +g[ia>>2];g[ta>>2]=+g[ba>>2]-+g[ia>>2];g[qa>>2]=+g[oa>>2]-+g[pa>>2]*.5;g[ra>>2]=+g[la>>2]+ +g[qa>>2];g[va>>2]=+g[qa>>2]-+g[la>>2];g[y>>2]=+g[c[n>>2]>>2];g[ka>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2]=+g[y>>2]*+g[ja>>2]+ +g[ka>>2]*+g[ra>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2]=+g[y>>2]*+g[ra>>2]-+g[ka>>2]*+g[ja>>2];g[sa>>2]=+g[(c[n>>2]|0)+32>>2];g[ua>>2]=+g[(c[n>>2]|0)+36>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]|0)<<2)>>2]=+g[sa>>2]*+g[ta>>2]+ +g[ua>>2]*+g[va>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]|0)<<2)>>2]=+g[sa>>2]*+g[va>>2]-+g[ua>>2]*+g[ta>>2];g[od>>2]=+g[kd>>2]-+g[nd>>2]*.5;g[wd>>2]=+g[od>>2]+ +g[vd>>2];g[fe>>2]=+g[od>>2]-+g[vd>>2];g[ce>>2]=+g[Bd>>2]-+g[be>>2]*.5;g[de>>2]=+g[yd>>2]+ +g[ce>>2];g[he>>2]=+g[ce>>2]-+g[yd>>2];g[jd>>2]=+g[c[n>>2]>>2];g[xd>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[jd>>2]*+g[wd>>2]+ +g[xd>>2]*+g[de>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[jd>>2]*+g[de>>2]-+g[xd>>2]*+g[wd>>2];g[ee>>2]=+g[(c[n>>2]|0)+32>>2];g[ge>>2]=+g[(c[n>>2]|0)+36>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[ee>>2]*+g[fe>>2]+ +g[ge>>2]*+g[he>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[ee>>2]*+g[he>>2]-+g[ge>>2]*+g[fe>>2];c[Ff>>2]=(c[Ff>>2]|0)+1;c[l>>2]=(c[l>>2]|0)+(c[r>>2]<<2);c[m>>2]=(c[m>>2]|0)+(c[r>>2]<<2);c[n>>2]=(c[n>>2]|0)+40}i=Gf;return}function Zi(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;fh(c[d>>2]|0,6,1544);i=b;return}function _i(a,b,d,e,f,h,j,k){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;var l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0,Ii=0,Ji=0,Ki=0,Li=0,Mi=0,Ni=0,Oi=0,Pi=0,Qi=0,Ri=0,Si=0,Ti=0,Ui=0,Vi=0,Wi=0,Xi=0,Yi=0,Zi=0,_i=0,$i=0,aj=0,bj=0,cj=0,dj=0,ej=0,fj=0,gj=0,hj=0,ij=0,jj=0,kj=0,lj=0,mj=0,nj=0,oj=0,pj=0,qj=0,rj=0,sj=0,tj=0,uj=0,vj=0,wj=0,xj=0,yj=0,zj=0,Aj=0,Bj=0,Cj=0,Dj=0,Ej=0,Fj=0,Gj=0,Hj=0,Ij=0,Jj=0,Kj=0,Lj=0,Mj=0,Nj=0,Oj=0,Pj=0,Qj=0,Rj=0,Sj=0,Tj=0,Uj=0,Vj=0,Wj=0,Xj=0,Yj=0,Zj=0,_j=0,$j=0,ak=0,bk=0,ck=0,dk=0,ek=0,fk=0,gk=0,hk=0,ik=0,jk=0,kk=0,lk=0,mk=0,nk=0,ok=0,pk=0,qk=0,rk=0,sk=0,tk=0,uk=0,vk=0,wk=0,xk=0,yk=0,zk=0,Ak=0,Bk=0,Ck=0,Dk=0,Ek=0,Fk=0,Gk=0,Hk=0,Ik=0,Jk=0,Kk=0,Lk=0,Mk=0,Nk=0,Ok=0,Pk=0,Qk=0,Rk=0,Sk=0,Tk=0,Uk=0,Vk=0,Wk=0,Xk=0,Yk=0,Zk=0,_k=0,$k=0,al=0,bl=0,cl=0,dl=0,el=0,fl=0,gl=0,hl=0,il=0,jl=0,kl=0,ll=0,ml=0,nl=0;ml=i;i=i+2608|0;l=ml+2596|0;m=ml+2592|0;n=ml+2588|0;o=ml+2584|0;p=ml+2580|0;nl=ml+2576|0;q=ml+2572|0;r=ml+2568|0;ll=ml+2560|0;tg=ml+2556|0;x=ml+2552|0;ha=ml+2548|0;Xk=ml+2544|0;Wj=ml+2540|0;ik=ml+2536|0;t=ml+2532|0;ek=ml+2528|0;xa=ml+2524|0;zb=ml+2520|0;Jb=ml+2516|0;K=ml+2512|0;P=ml+2508|0;Z=ml+2504|0;tb=ml+2500|0;Y=ml+2496|0;Vg=ml+2492|0;$h=ml+2488|0;Mi=ml+2484|0;Hh=ml+2480|0;xi=ml+2476|0;Nh=ml+2472|0;Xh=ml+2468|0;Hi=ml+2464|0;Aj=ml+2460|0;Ck=ml+2456|0;Mk=ml+2452|0;Lj=ml+2448|0;Qj=ml+2444|0;aj=ml+2440|0;wk=ml+2436|0;$i=ml+2432|0;Rk=ml+2428|0;A=ml+2424|0;ia=ml+2420|0;al=ml+2416|0;fl=ml+2412|0;Yj=ml+2408|0;u=ml+2404|0;Xj=ml+2400|0;qa=ml+2396|0;wb=ml+2392|0;Ib=ml+2388|0;F=ml+2384|0;X=ml+2380|0;Ka=ml+2376|0;sb=ml+2372|0;Ga=ml+2368|0;ah=ml+2364|0;ci=ml+2360|0;Ni=ml+2356|0;Mh=ml+2352|0;pi=ml+2348|0;zi=ml+2344|0;Yh=ml+2340|0;yi=ml+2336|0;Vi=ml+2332|0;zk=ml+2328|0;Lk=ml+2324|0;Gj=ml+2320|0;_i=ml+2316|0;mj=ml+2312|0;vk=ml+2308|0;ij=ml+2304|0;Ta=ml+2300|0;Zb=ml+2296|0;jc=ml+2292|0;eb=ml+2288|0;vc=ml+2284|0;Jc=ml+2280|0;Vb=ml+2276|0;Fc=ml+2272|0;_c=ml+2268|0;Bd=ml+2264|0;ke=ml+2260|0;Jd=ml+2256|0;Od=ml+2252|0;Yd=ml+2248|0;vd=ml+2244|0;Xd=ml+2240|0;Se=ml+2236|0;Zf=ml+2232|0;jg=ml+2228|0;df=ml+2224|0;xe=ml+2220|0;lf=ml+2216|0;Vf=ml+2212|0;He=ml+2208|0;Bf=ml+2204|0;Bh=ml+2200|0;Ng=ml+2196|0;Mf=ml+2192|0;Rf=ml+2188|0;Ag=ml+2184|0;vh=ml+2180|0;zg=ml+2176|0;_a=ml+2172|0;ac=ml+2168|0;kc=ml+2164|0;jb=ml+2160|0;Pb=ml+2156|0;xc=ml+2152|0;Wb=ml+2148|0;wc=ml+2144|0;sc=ml+2140|0;yd=ml+2136|0;je=ml+2132|0;Ed=ml+2128|0;Wd=ml+2124|0;kd=ml+2120|0;ud=ml+2116|0;gd=ml+2112|0;Ze=ml+2108|0;ag=ml+2104|0;kg=ml+2100|0;jf=ml+2096|0;pe=ml+2092|0;ze=ml+2088|0;Wf=ml+2084|0;ye=ml+2080|0;sg=ml+2076|0;yh=ml+2072|0;Mg=ml+2068|0;Hf=ml+2064|0;yg=ml+2060|0;kh=ml+2056|0;uh=ml+2052|0;gh=ml+2048|0;Kb=ml+2044|0;jl=ml+2040|0;Wk=ml+2036|0;qk=ml+2032|0;kf=ml+2028|0;Tk=ml+2024|0;Vj=ml+2020|0;rk=ml+2016|0;s=ml+2012|0;Ba=ml+2008|0;Uk=ml+2004|0;Vk=ml+2e3|0;Tc=ml+1996|0;ae=ml+1992|0;kl=ml+1988|0;Uj=ml+1984|0;ta=ml+1980|0;L=ml+1976|0;O=ml+1972|0;xb=ml+1968|0;wa=ml+1964|0;G=ml+1960|0;J=ml+1956|0;yb=ml+1952|0;ra=ml+1948|0;sa=ml+1944|0;M=ml+1940|0;N=ml+1936|0;ua=ml+1932|0;va=ml+1928|0;H=ml+1924|0;I=ml+1920|0;Rg=ml+1916|0;ti=ml+1912|0;Gh=ml+1908|0;Vh=ml+1904|0;Ug=ml+1900|0;Dh=ml+1896|0;wi=ml+1892|0;Wh=ml+1888|0;Pg=ml+1884|0;Qg=ml+1880|0;Eh=ml+1876|0;Fh=ml+1872|0;Sg=ml+1868|0;Tg=ml+1864|0;ui=ml+1860|0;vi=ml+1856|0;wj=ml+1852|0;Mj=ml+1848|0;Pj=ml+1844|0;Ak=ml+1840|0;zj=ml+1836|0;Hj=ml+1832|0;Kj=ml+1828|0;Bk=ml+1824|0;uj=ml+1820|0;vj=ml+1816|0;Nj=ml+1812|0;Oj=ml+1808|0;xj=ml+1804|0;yj=ml+1800|0;Ij=ml+1796|0;Jj=ml+1792|0;sk=ml+1788|0;bl=ml+1784|0;el=ml+1780|0;y=ml+1776|0;Qk=ml+1772|0;Yk=ml+1768|0;$k=ml+1764|0;z=ml+1760|0;Ch=ml+1756|0;Li=ml+1752|0;cl=ml+1748|0;dl=ml+1744|0;Ok=ml+1740|0;Pk=ml+1736|0;Zk=ml+1732|0;_k=ml+1728|0;ma=ml+1724|0;T=ml+1720|0;E=ml+1716|0;qb=ml+1712|0;pa=ml+1708|0;za=ml+1704|0;W=ml+1700|0;rb=ml+1696|0;ka=ml+1692|0;la=ml+1688|0;Aa=ml+1684|0;D=ml+1680|0;na=ml+1676|0;oa=ml+1672|0;U=ml+1668|0;V=ml+1664|0;Yg=ml+1660|0;li=ml+1656|0;oi=ml+1652|0;ai=ml+1648|0;$g=ml+1644|0;Ih=ml+1640|0;Lh=ml+1636|0;bi=ml+1632|0;Wg=ml+1628|0;Xg=ml+1624|0;mi=ml+1620|0;ni=ml+1616|0;Zg=ml+1612|0;_g=ml+1608|0;Jh=ml+1604|0;Kh=ml+1600|0;Ri=ml+1596|0;Wi=ml+1592|0;Fj=ml+1588|0;tk=ml+1584|0;Ui=ml+1580|0;Cj=ml+1576|0;Zi=ml+1572|0;uk=ml+1568|0;Pi=ml+1564|0;Qi=ml+1560|0;Dj=ml+1556|0;Ej=ml+1552|0;Si=ml+1548|0;Ti=ml+1544|0;Xi=ml+1540|0;Yi=ml+1536|0;Pa=ml+1532|0;Tb=ml+1528|0;db=ml+1524|0;Rc=ml+1520|0;Sa=ml+1516|0;ab=ml+1512|0;uc=ml+1508|0;Sc=ml+1504|0;Na=ml+1500|0;Oa=ml+1496|0;bb=ml+1492|0;cb=ml+1488|0;Qa=ml+1484|0;Ra=ml+1480|0;Ub=ml+1476|0;tc=ml+1472|0;Wc=ml+1468|0;Kd=ml+1464|0;Nd=ml+1460|0;zd=ml+1456|0;Zc=ml+1452|0;Fd=ml+1448|0;Id=ml+1444|0;Ad=ml+1440|0;Uc=ml+1436|0;Vc=ml+1432|0;Ld=ml+1428|0;Md=ml+1424|0;Xc=ml+1420|0;Yc=ml+1416|0;Gd=ml+1412|0;Hd=ml+1408|0;Oe=ml+1404|0;te=ml+1400|0;cf=ml+1396|0;tf=ml+1392|0;Re=ml+1388|0;$e=ml+1384|0;we=ml+1380|0;uf=ml+1376|0;Me=ml+1372|0;Ne=ml+1368|0;af=ml+1364|0;bf=ml+1360|0;Pe=ml+1356|0;Qe=ml+1352|0;ue=ml+1348|0;ve=ml+1344|0;xf=ml+1340|0;Nf=ml+1336|0;Qf=ml+1332|0;zh=ml+1328|0;Af=ml+1324|0;If=ml+1320|0;Lf=ml+1316|0;Ah=ml+1312|0;vf=ml+1308|0;wf=ml+1304|0;Of=ml+1300|0;Pf=ml+1296|0;yf=ml+1292|0;zf=ml+1288|0;Jf=ml+1284|0;Kf=ml+1280|0;Wa=ml+1276|0;Lb=ml+1272|0;Ob=ml+1268|0;_b=ml+1264|0;Za=ml+1260|0;fb=ml+1256|0;ib=ml+1252|0;$b=ml+1248|0;Ua=ml+1244|0;Va=ml+1240|0;Mb=ml+1236|0;Nb=ml+1232|0;Xa=ml+1228|0;Ya=ml+1224|0;gb=ml+1220|0;hb=ml+1216|0;oc=ml+1212|0;Sd=ml+1208|0;Dd=ml+1204|0;sd=ml+1200|0;rc=ml+1196|0;ad=ml+1192|0;Vd=ml+1188|0;td=ml+1184|0;mc=ml+1180|0;nc=ml+1176|0;bd=ml+1172|0;Cd=ml+1168|0;pc=ml+1164|0;qc=ml+1160|0;Td=ml+1156|0;Ud=ml+1152|0;Ve=ml+1148|0;le=ml+1144|0;oe=ml+1140|0;_f=ml+1136|0;Ye=ml+1132|0;ef=ml+1128|0;hf=ml+1124|0;$f=ml+1120|0;Te=ml+1116|0;Ue=ml+1112|0;me=ml+1108|0;ne=ml+1104|0;We=ml+1100|0;Xe=ml+1096|0;ff=ml+1092|0;gf=ml+1088|0;og=ml+1084|0;ug=ml+1080|0;Gf=ml+1076|0;sh=ml+1072|0;rg=ml+1068|0;Df=ml+1064|0;xg=ml+1060|0;th=ml+1056|0;mg=ml+1052|0;ng=ml+1048|0;Ef=ml+1044|0;Ff=ml+1040|0;pg=ml+1036|0;qg=ml+1032|0;vg=ml+1028|0;wg=ml+1024|0;v=ml+1020|0;B=ml+1016|0;pk=ml+1012|0;w=ml+1008|0;Jk=ml+1004|0;Nk=ml+1e3|0;Ik=ml+996|0;Kk=ml+992|0;Zh=ml+988|0;di=ml+984|0;Uh=ml+980|0;_h=ml+976|0;ub=ml+972|0;Ab=ml+968|0;pb=ml+964|0;vb=ml+960|0;xk=ml+956|0;Dk=ml+952|0;tj=ml+948|0;yk=ml+944|0;ji=ml+940|0;Oi=ml+936|0;ii=ml+932|0;ki=ml+928|0;Gb=ml+924|0;Ma=ml+920|0;Fb=ml+916|0;Hb=ml+912|0;Fk=ml+908|0;Hk=ml+904|0;Ek=ml+900|0;Gk=ml+896|0;fi=ml+892|0;hi=ml+888|0;ei=ml+884|0;gi=ml+880|0;ba=ml+876|0;da=ml+872|0;C=ml+868|0;ca=ml+864|0;fa=ml+860|0;ja=ml+856|0;ea=ml+852|0;ga=ml+848|0;Cb=ml+844|0;Eb=ml+840|0;Bb=ml+836|0;Db=ml+832|0;Xb=ml+828|0;bc=ml+824|0;Qc=ml+820|0;Yb=ml+816|0;wd=ml+812|0;be=ml+808|0;rd=ml+804|0;xd=ml+800|0;he=ml+796|0;Le=ml+792|0;ge=ml+788|0;ie=ml+784|0;hg=ml+780|0;lg=ml+776|0;gg=ml+772|0;ig=ml+768|0;wh=ml+764|0;Eg=ml+760|0;rh=ml+756|0;xh=ml+752|0;Xf=ml+748|0;bg=ml+744|0;sf=ml+740|0;Yf=ml+736|0;Kg=ml+732|0;Og=ml+728|0;Jg=ml+724|0;Lg=ml+720|0;hc=ml+716|0;lc=ml+712|0;gc=ml+708|0;ic=ml+704|0;dc=ml+700|0;fc=ml+696|0;cc=ml+692|0;ec=ml+688|0;Gg=ml+684|0;Ig=ml+680|0;Fg=ml+676|0;Hg=ml+672|0;dg=ml+668|0;fg=ml+664|0;cg=ml+660|0;eg=ml+656|0;de=ml+652|0;fe=ml+648|0;ce=ml+644|0;ee=ml+640|0;re=ml+636|0;De=ml+632|0;Be=ml+628|0;Fe=ml+624|0;qe=ml+620|0;Ae=ml+616|0;_e=ml+612|0;se=ml+608|0;Ce=ml+604|0;Ee=ml+600|0;hl=ml+596|0;ak=ml+592|0;_j=ml+588|0;ck=ml+584|0;gl=ml+580|0;Zj=ml+576|0;Sk=ml+572|0;il=ml+568|0;$j=ml+564|0;bk=ml+560|0;kj=ml+556|0;qj=ml+552|0;oj=ml+548|0;sj=ml+544|0;jj=ml+540|0;nj=ml+536|0;hj=ml+532|0;lj=ml+528|0;pj=ml+524|0;rj=ml+520|0;Rb=ml+516|0;Bc=ml+512|0;zc=ml+508|0;Dc=ml+504|0;Qb=ml+500|0;yc=ml+496|0;$a=ml+492|0;Sb=ml+488|0;Ac=ml+484|0;Cc=ml+480|0;Je=ml+476|0;pf=ml+472|0;nf=ml+468|0;rf=ml+464|0;Ie=ml+460|0;mf=ml+456|0;Ge=ml+452|0;Ke=ml+448|0;of=ml+444|0;qf=ml+440|0;ih=ml+436|0;oh=ml+432|0;mh=ml+428|0;qh=ml+424|0;hh=ml+420|0;lh=ml+416|0;fh=ml+412|0;jh=ml+408|0;nh=ml+404|0;ph=ml+400|0;ri=ml+396|0;Di=ml+392|0;Bi=ml+388|0;Fi=ml+384|0;qi=ml+380|0;Ai=ml+376|0;bh=ml+372|0;si=ml+368|0;Ci=ml+364|0;Ei=ml+360|0;id=ml+356|0;od=ml+352|0;md=ml+348|0;qd=ml+344|0;hd=ml+340|0;ld=ml+336|0;fd=ml+332|0;jd=ml+328|0;nd=ml+324|0;pd=ml+320|0;gk=ml+316|0;mk=ml+312|0;kk=ml+308|0;ok=ml+304|0;fk=ml+300|0;jk=ml+296|0;dk=ml+292|0;hk=ml+288|0;lk=ml+284|0;nk=ml+280|0;Ia=ml+276|0;mb=ml+272|0;kb=ml+268|0;ob=ml+264|0;Ha=ml+260|0;La=ml+256|0;Fa=ml+252|0;Ja=ml+248|0;lb=ml+244|0;nb=ml+240|0;Sj=ml+236|0;ej=ml+232|0;cj=ml+228|0;gj=ml+224|0;Rj=ml+220|0;bj=ml+216|0;Bj=ml+212|0;Tj=ml+208|0;dj=ml+204|0;fj=ml+200|0;Tf=ml+196|0;ch=ml+192|0;Cg=ml+188|0;eh=ml+184|0;Sf=ml+180|0;Bg=ml+176|0;Cf=ml+172|0;Uf=ml+168|0;Dg=ml+164|0;dh=ml+160|0;R=ml+156|0;Ca=ml+152|0;$=ml+148|0;Ea=ml+144|0;Q=ml+140|0;_=ml+136|0;ya=ml+132|0;S=ml+128|0;aa=ml+124|0;Da=ml+120|0;Qd=ml+116|0;cd=ml+112|0;_d=ml+108|0;ed=ml+104|0;Pd=ml+100|0;Zd=ml+96|0;$c=ml+92|0;Rd=ml+88|0;$d=ml+84|0;dd=ml+80|0;Hc=ml+76|0;Nc=ml+72|0;Lc=ml+68|0;Pc=ml+64|0;Gc=ml+60|0;Kc=ml+56|0;Ec=ml+52|0;Ic=ml+48|0;Mc=ml+44|0;Oc=ml+40|0;Ji=ml+36|0;Rh=ml+32|0;Ph=ml+28|0;Th=ml+24|0;Ii=ml+20|0;Oh=ml+16|0;Gi=ml+12|0;Ki=ml+8|0;Qh=ml+4|0;Sh=ml;c[l>>2]=a;c[m>>2]=b;c[n>>2]=d;c[o>>2]=e;c[p>>2]=f;c[nl>>2]=h;c[q>>2]=j;c[r>>2]=k;g[ml+2564>>2]=.7071067690849304;c[ll>>2]=c[nl>>2];c[n>>2]=(c[n>>2]|0)+((c[nl>>2]|0)*14<<2);while(1){if((c[ll>>2]|0)>=(c[q>>2]|0))break;g[s>>2]=+g[c[l>>2]>>2];g[Ba>>2]=+g[(c[l>>2]|0)+(c[o>>2]<<2<<2)>>2];g[Kb>>2]=+g[s>>2]+ +g[Ba>>2];g[jl>>2]=+g[s>>2]-+g[Ba>>2];g[Uk>>2]=+g[c[m>>2]>>2];g[Vk>>2]=+g[(c[m>>2]|0)+(c[o>>2]<<2<<2)>>2];g[Wk>>2]=+g[Uk>>2]-+g[Vk>>2];g[qk>>2]=+g[Uk>>2]+ +g[Vk>>2];g[Tc>>2]=+g[(c[l>>2]|0)+(c[o>>2]<<1<<2)>>2];g[ae>>2]=+g[(c[l>>2]|0)+((c[o>>2]|0)*6<<2)>>2];g[kf>>2]=+g[Tc>>2]+ +g[ae>>2];g[Tk>>2]=+g[Tc>>2]-+g[ae>>2];g[kl>>2]=+g[(c[m>>2]|0)+(c[o>>2]<<1<<2)>>2];g[Uj>>2]=+g[(c[m>>2]|0)+((c[o>>2]|0)*6<<2)>>2];g[Vj>>2]=+g[kl>>2]-+g[Uj>>2];g[rk>>2]=+g[kl>>2]+ +g[Uj>>2];g[tg>>2]=+g[Kb>>2]+ +g[kf>>2];g[x>>2]=+g[Kb>>2]-+g[kf>>2];g[ha>>2]=+g[qk>>2]+ +g[rk>>2];g[Xk>>2]=+g[Tk>>2]+ +g[Wk>>2];g[Wj>>2]=+g[jl>>2]-+g[Vj>>2];g[ik>>2]=+g[Wk>>2]-+g[Tk>>2];g[t>>2]=+g[qk>>2]-+g[rk>>2];g[ek>>2]=+g[jl>>2]+ +g[Vj>>2];g[ra>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2];g[sa>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*5|0)<<2)>>2];g[ta>>2]=+g[ra>>2]+ +g[sa>>2];g[L>>2]=+g[ra>>2]-+g[sa>>2];g[M>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2];g[N>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*5|0)<<2)>>2];g[O>>2]=+g[M>>2]-+g[N>>2];g[xb>>2]=+g[M>>2]+ +g[N>>2];g[ua>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*7|0)<<2)>>2];g[va>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[wa>>2]=+g[ua>>2]+ +g[va>>2];g[G>>2]=+g[ua>>2]-+g[va>>2];g[H>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*7|0)<<2)>>2];g[I>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[J>>2]=+g[H>>2]-+g[I>>2];g[yb>>2]=+g[H>>2]+ +g[I>>2];g[xa>>2]=+g[ta>>2]+ +g[wa>>2];g[zb>>2]=+g[xb>>2]-+g[yb>>2];g[Jb>>2]=+g[xb>>2]+ +g[yb>>2];g[K>>2]=+g[G>>2]-+g[J>>2];g[P>>2]=+g[L>>2]+ +g[O>>2];g[Z>>2]=+g[G>>2]+ +g[J>>2];g[tb>>2]=+g[wa>>2]-+g[ta>>2];g[Y>>2]=+g[O>>2]-+g[L>>2];g[Pg>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)*6<<2)>>2];g[Qg>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*6|0)+(c[o>>2]<<2)<<2)>>2];g[Rg>>2]=+g[Pg>>2]+ +g[Qg>>2];g[ti>>2]=+g[Pg>>2]-+g[Qg>>2];g[Eh>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)*6<<2)>>2];g[Fh>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*6|0)+(c[o>>2]<<2)<<2)>>2];g[Gh>>2]=+g[Eh>>2]-+g[Fh>>2];g[Vh>>2]=+g[Eh>>2]+ +g[Fh>>2];g[Sg>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*6|0)+(c[o>>2]<<1)<<2)>>2];g[Tg>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*6|0)+((c[o>>2]|0)*6|0)<<2)>>2];g[Ug>>2]=+g[Sg>>2]+ +g[Tg>>2];g[Dh>>2]=+g[Sg>>2]-+g[Tg>>2];g[ui>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*6|0)+(c[o>>2]<<1)<<2)>>2];g[vi>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*6|0)+((c[o>>2]|0)*6|0)<<2)>>2];g[wi>>2]=+g[ui>>2]-+g[vi>>2];g[Wh>>2]=+g[ui>>2]+ +g[vi>>2];g[Vg>>2]=+g[Rg>>2]+ +g[Ug>>2];g[$h>>2]=+g[Rg>>2]-+g[Ug>>2];g[Mi>>2]=+g[Vh>>2]+ +g[Wh>>2];g[Hh>>2]=+g[Dh>>2]+ +g[Gh>>2];g[xi>>2]=+g[ti>>2]-+g[wi>>2];g[Nh>>2]=+g[Gh>>2]-+g[Dh>>2];g[Xh>>2]=+g[Vh>>2]-+g[Wh>>2];g[Hi>>2]=+g[ti>>2]+ +g[wi>>2];g[uj>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*7|0)+(c[o>>2]|0)<<2)>>2];g[vj>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*7|0)+((c[o>>2]|0)*5|0)<<2)>>2];g[wj>>2]=+g[uj>>2]+ +g[vj>>2];g[Mj>>2]=+g[uj>>2]-+g[vj>>2];g[Nj>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*7|0)+(c[o>>2]|0)<<2)>>2];g[Oj>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*7|0)+((c[o>>2]|0)*5|0)<<2)>>2];g[Pj>>2]=+g[Nj>>2]-+g[Oj>>2];g[Ak>>2]=+g[Nj>>2]+ +g[Oj>>2];g[xj>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*7|0)+((c[o>>2]|0)*7|0)<<2)>>2];g[yj>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*7|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[zj>>2]=+g[xj>>2]+ +g[yj>>2];g[Hj>>2]=+g[xj>>2]-+g[yj>>2];g[Ij>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*7|0)+((c[o>>2]|0)*7|0)<<2)>>2];g[Jj>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*7|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[Kj>>2]=+g[Ij>>2]-+g[Jj>>2];g[Bk>>2]=+g[Ij>>2]+ +g[Jj>>2];g[Aj>>2]=+g[wj>>2]+ +g[zj>>2];g[Ck>>2]=+g[Ak>>2]-+g[Bk>>2];g[Mk>>2]=+g[Ak>>2]+ +g[Bk>>2];g[Lj>>2]=+g[Hj>>2]-+g[Kj>>2];g[Qj>>2]=+g[Mj>>2]+ +g[Pj>>2];g[aj>>2]=+g[Hj>>2]+ +g[Kj>>2];g[wk>>2]=+g[zj>>2]-+g[wj>>2];g[$i>>2]=+g[Pj>>2]-+g[Mj>>2];g[Ch>>2]=+g[(c[l>>2]|0)+(c[o>>2]<<2)>>2];g[Li>>2]=+g[(c[l>>2]|0)+((c[o>>2]|0)*5<<2)>>2];g[sk>>2]=+g[Ch>>2]+ +g[Li>>2];g[bl>>2]=+g[Ch>>2]-+g[Li>>2];g[cl>>2]=+g[(c[m>>2]|0)+(c[o>>2]<<2)>>2];g[dl>>2]=+g[(c[m>>2]|0)+((c[o>>2]|0)*5<<2)>>2];g[el>>2]=+g[cl>>2]-+g[dl>>2];g[y>>2]=+g[cl>>2]+ +g[dl>>2];g[Ok>>2]=+g[(c[l>>2]|0)+((c[o>>2]|0)*7<<2)>>2];g[Pk>>2]=+g[(c[l>>2]|0)+((c[o>>2]|0)*3<<2)>>2];g[Qk>>2]=+g[Ok>>2]+ +g[Pk>>2];g[Yk>>2]=+g[Ok>>2]-+g[Pk>>2];g[Zk>>2]=+g[(c[m>>2]|0)+((c[o>>2]|0)*7<<2)>>2];g[_k>>2]=+g[(c[m>>2]|0)+((c[o>>2]|0)*3<<2)>>2];g[$k>>2]=+g[Zk>>2]-+g[_k>>2];g[z>>2]=+g[Zk>>2]+ +g[_k>>2];g[Rk>>2]=+g[sk>>2]+ +g[Qk>>2];g[A>>2]=+g[y>>2]-+g[z>>2];g[ia>>2]=+g[y>>2]+ +g[z>>2];g[al>>2]=+g[Yk>>2]-+g[$k>>2];g[fl>>2]=+g[bl>>2]+ +g[el>>2];g[Yj>>2]=+g[Yk>>2]+ +g[$k>>2];g[u>>2]=+g[Qk>>2]-+g[sk>>2];g[Xj>>2]=+g[el>>2]-+g[bl>>2];g[ka>>2]=+g[(c[l>>2]|0)+(c[p>>2]<<2)>>2];g[la>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<2)<<2)>>2];g[ma>>2]=+g[ka>>2]+ +g[la>>2];g[T>>2]=+g[ka>>2]-+g[la>>2];g[Aa>>2]=+g[(c[m>>2]|0)+(c[p>>2]<<2)>>2];g[D>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<2)<<2)>>2];g[E>>2]=+g[Aa>>2]-+g[D>>2];g[qb>>2]=+g[Aa>>2]+ +g[D>>2];g[na>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2];g[oa>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*6|0)<<2)>>2];g[pa>>2]=+g[na>>2]+ +g[oa>>2];g[za>>2]=+g[na>>2]-+g[oa>>2];g[U>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2];g[V>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*6|0)<<2)>>2];g[W>>2]=+g[U>>2]-+g[V>>2];g[rb>>2]=+g[U>>2]+ +g[V>>2];g[qa>>2]=+g[ma>>2]+ +g[pa>>2];g[wb>>2]=+g[ma>>2]-+g[pa>>2];g[Ib>>2]=+g[qb>>2]+ +g[rb>>2];g[F>>2]=+g[za>>2]+ +g[E>>2];g[X>>2]=+g[T>>2]-+g[W>>2];g[Ka>>2]=+g[E>>2]-+g[za>>2];g[sb>>2]=+g[qb>>2]-+g[rb>>2];g[Ga>>2]=+g[T>>2]+ +g[W>>2];g[Wg>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*6|0)+(c[o>>2]|0)<<2)>>2];g[Xg>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*6|0)+((c[o>>2]|0)*5|0)<<2)>>2];g[Yg>>2]=+g[Wg>>2]+ +g[Xg>>2];g[li>>2]=+g[Wg>>2]-+g[Xg>>2];g[mi>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*6|0)+(c[o>>2]|0)<<2)>>2];g[ni>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*6|0)+((c[o>>2]|0)*5|0)<<2)>>2];g[oi>>2]=+g[mi>>2]-+g[ni>>2];g[ai>>2]=+g[mi>>2]+ +g[ni>>2];g[Zg>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*6|0)+((c[o>>2]|0)*7|0)<<2)>>2];g[_g>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*6|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[$g>>2]=+g[Zg>>2]+ +g[_g>>2];g[Ih>>2]=+g[Zg>>2]-+g[_g>>2];g[Jh>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*6|0)+((c[o>>2]|0)*7|0)<<2)>>2];g[Kh>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*6|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[Lh>>2]=+g[Jh>>2]-+g[Kh>>2];g[bi>>2]=+g[Jh>>2]+ +g[Kh>>2];g[ah>>2]=+g[Yg>>2]+ +g[$g>>2];g[ci>>2]=+g[ai>>2]-+g[bi>>2];g[Ni>>2]=+g[ai>>2]+ +g[bi>>2];g[Mh>>2]=+g[Ih>>2]-+g[Lh>>2];g[pi>>2]=+g[li>>2]+ +g[oi>>2];g[zi>>2]=+g[Ih>>2]+ +g[Lh>>2];g[Yh>>2]=+g[$g>>2]-+g[Yg>>2];g[yi>>2]=+g[oi>>2]-+g[li>>2];g[Pi>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)*7<<2)>>2];g[Qi>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*7|0)+(c[o>>2]<<2)<<2)>>2];g[Ri>>2]=+g[Pi>>2]+ +g[Qi>>2];g[Wi>>2]=+g[Pi>>2]-+g[Qi>>2];g[Dj>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)*7<<2)>>2];g[Ej>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*7|0)+(c[o>>2]<<2)<<2)>>2];g[Fj>>2]=+g[Dj>>2]-+g[Ej>>2];g[tk>>2]=+g[Dj>>2]+ +g[Ej>>2];g[Si>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*7|0)+(c[o>>2]<<1)<<2)>>2];g[Ti>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*7|0)+((c[o>>2]|0)*6|0)<<2)>>2];g[Ui>>2]=+g[Si>>2]+ +g[Ti>>2];g[Cj>>2]=+g[Si>>2]-+g[Ti>>2];g[Xi>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*7|0)+(c[o>>2]<<1)<<2)>>2];g[Yi>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*7|0)+((c[o>>2]|0)*6|0)<<2)>>2];g[Zi>>2]=+g[Xi>>2]-+g[Yi>>2];g[uk>>2]=+g[Xi>>2]+ +g[Yi>>2];g[Vi>>2]=+g[Ri>>2]+ +g[Ui>>2];g[zk>>2]=+g[Ri>>2]-+g[Ui>>2];g[Lk>>2]=+g[tk>>2]+ +g[uk>>2];g[Gj>>2]=+g[Cj>>2]+ +g[Fj>>2];g[_i>>2]=+g[Wi>>2]-+g[Zi>>2];g[mj>>2]=+g[Fj>>2]-+g[Cj>>2];g[vk>>2]=+g[tk>>2]-+g[uk>>2];g[ij>>2]=+g[Wi>>2]+ +g[Zi>>2];g[Na>>2]=+g[(c[l>>2]|0)+(c[p>>2]<<1<<2)>>2];g[Oa>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<2)<<2)>>2];g[Pa>>2]=+g[Na>>2]+ +g[Oa>>2];g[Tb>>2]=+g[Na>>2]-+g[Oa>>2];g[bb>>2]=+g[(c[m>>2]|0)+(c[p>>2]<<1<<2)>>2];g[cb>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<2)<<2)>>2];g[db>>2]=+g[bb>>2]-+g[cb>>2];g[Rc>>2]=+g[bb>>2]+ +g[cb>>2];g[Qa>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2];g[Ra>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*6|0)<<2)>>2];g[Sa>>2]=+g[Qa>>2]+ +g[Ra>>2];g[ab>>2]=+g[Qa>>2]-+g[Ra>>2];g[Ub>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2];g[tc>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*6|0)<<2)>>2];g[uc>>2]=+g[Ub>>2]-+g[tc>>2];g[Sc>>2]=+g[Ub>>2]+ +g[tc>>2];g[Ta>>2]=+g[Pa>>2]+ +g[Sa>>2];g[Zb>>2]=+g[Pa>>2]-+g[Sa>>2];g[jc>>2]=+g[Rc>>2]+ +g[Sc>>2];g[eb>>2]=+g[ab>>2]+ +g[db>>2];g[vc>>2]=+g[Tb>>2]-+g[uc>>2];g[Jc>>2]=+g[db>>2]-+g[ab>>2];g[Vb>>2]=+g[Rc>>2]-+g[Sc>>2];g[Fc>>2]=+g[Tb>>2]+ +g[uc>>2];g[Uc>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]|0)<<2)>>2];g[Vc>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*5|0)<<2)>>2];g[Wc>>2]=+g[Uc>>2]+ +g[Vc>>2];g[Kd>>2]=+g[Uc>>2]-+g[Vc>>2];g[Ld>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]|0)<<2)>>2];g[Md>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*5|0)<<2)>>2];g[Nd>>2]=+g[Ld>>2]-+g[Md>>2];g[zd>>2]=+g[Ld>>2]+ +g[Md>>2];g[Xc>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*7|0)<<2)>>2];g[Yc>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[Zc>>2]=+g[Xc>>2]+ +g[Yc>>2];g[Fd>>2]=+g[Xc>>2]-+g[Yc>>2];g[Gd>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*7|0)<<2)>>2];g[Hd>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[Id>>2]=+g[Gd>>2]-+g[Hd>>2];g[Ad>>2]=+g[Gd>>2]+ +g[Hd>>2];g[_c>>2]=+g[Wc>>2]+ +g[Zc>>2];g[Bd>>2]=+g[zd>>2]-+g[Ad>>2];g[ke>>2]=+g[zd>>2]+ +g[Ad>>2];g[Jd>>2]=+g[Fd>>2]-+g[Id>>2];g[Od>>2]=+g[Kd>>2]+ +g[Nd>>2];g[Yd>>2]=+g[Fd>>2]+ +g[Id>>2];g[vd>>2]=+g[Zc>>2]-+g[Wc>>2];g[Xd>>2]=+g[Nd>>2]-+g[Kd>>2];g[Me>>2]=+g[(c[l>>2]|0)+(c[p>>2]<<2<<2)>>2];g[Ne>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<2)<<2)>>2];g[Oe>>2]=+g[Me>>2]+ +g[Ne>>2];g[te>>2]=+g[Me>>2]-+g[Ne>>2];g[af>>2]=+g[(c[m>>2]|0)+(c[p>>2]<<2<<2)>>2];g[bf>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<2)<<2)>>2];g[cf>>2]=+g[af>>2]-+g[bf>>2];g[tf>>2]=+g[af>>2]+ +g[bf>>2];g[Pe>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<1)<<2)>>2];g[Qe>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*6|0)<<2)>>2];g[Re>>2]=+g[Pe>>2]+ +g[Qe>>2];g[$e>>2]=+g[Pe>>2]-+g[Qe>>2];g[ue>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<1)<<2)>>2];g[ve>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*6|0)<<2)>>2];g[we>>2]=+g[ue>>2]-+g[ve>>2];g[uf>>2]=+g[ue>>2]+ +g[ve>>2];g[Se>>2]=+g[Oe>>2]+ +g[Re>>2];g[Zf>>2]=+g[Oe>>2]-+g[Re>>2];g[jg>>2]=+g[tf>>2]+ +g[uf>>2];g[df>>2]=+g[$e>>2]+ +g[cf>>2];g[xe>>2]=+g[te>>2]-+g[we>>2];g[lf>>2]=+g[cf>>2]-+g[$e>>2];g[Vf>>2]=+g[tf>>2]-+g[uf>>2];g[He>>2]=+g[te>>2]+ +g[we>>2];g[vf>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]|0)<<2)>>2];g[wf>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*5|0)<<2)>>2];g[xf>>2]=+g[vf>>2]+ +g[wf>>2];g[Nf>>2]=+g[vf>>2]-+g[wf>>2];g[Of>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]|0)<<2)>>2];g[Pf>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*5|0)<<2)>>2];g[Qf>>2]=+g[Of>>2]-+g[Pf>>2];g[zh>>2]=+g[Of>>2]+ +g[Pf>>2];g[yf>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*7|0)<<2)>>2];g[zf>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[Af>>2]=+g[yf>>2]+ +g[zf>>2];g[If>>2]=+g[yf>>2]-+g[zf>>2];g[Jf>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*7|0)<<2)>>2];g[Kf>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*3|0)<<2)>>2];g[Lf>>2]=+g[Jf>>2]-+g[Kf>>2];g[Ah>>2]=+g[Jf>>2]+ +g[Kf>>2];g[Bf>>2]=+g[xf>>2]+ +g[Af>>2];g[Bh>>2]=+g[zh>>2]-+g[Ah>>2];g[Ng>>2]=+g[zh>>2]+ +g[Ah>>2];g[Mf>>2]=+g[If>>2]-+g[Lf>>2];g[Rf>>2]=+g[Nf>>2]+ +g[Qf>>2];g[Ag>>2]=+g[If>>2]+ +g[Lf>>2];g[vh>>2]=+g[Af>>2]-+g[xf>>2];g[zg>>2]=+g[Qf>>2]-+g[Nf>>2];g[Ua>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2];g[Va>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*5|0)<<2)>>2];g[Wa>>2]=+g[Ua>>2]+ +g[Va>>2];g[Lb>>2]=+g[Ua>>2]-+g[Va>>2];g[Mb>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2];g[Nb>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*5|0)<<2)>>2];g[Ob>>2]=+g[Mb>>2]-+g[Nb>>2];g[_b>>2]=+g[Mb>>2]+ +g[Nb>>2];g[Xa>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*7|0)<<2)>>2];g[Ya>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*3|0)<<2)>>2];g[Za>>2]=+g[Xa>>2]+ +g[Ya>>2];g[fb>>2]=+g[Xa>>2]-+g[Ya>>2];g[gb>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*7|0)<<2)>>2];g[hb>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*3|0)<<2)>>2];g[ib>>2]=+g[gb>>2]-+g[hb>>2];g[$b>>2]=+g[gb>>2]+ +g[hb>>2];g[_a>>2]=+g[Wa>>2]+ +g[Za>>2];g[ac>>2]=+g[_b>>2]-+g[$b>>2];g[kc>>2]=+g[_b>>2]+ +g[$b>>2];g[jb>>2]=+g[fb>>2]-+g[ib>>2];g[Pb>>2]=+g[Lb>>2]+ +g[Ob>>2];g[xc>>2]=+g[fb>>2]+ +g[ib>>2];g[Wb>>2]=+g[Za>>2]-+g[Wa>>2];g[wc>>2]=+g[Ob>>2]-+g[Lb>>2];g[mc>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)*3<<2)>>2];g[nc>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<2)<<2)>>2];g[oc>>2]=+g[mc>>2]+ +g[nc>>2];g[Sd>>2]=+g[mc>>2]-+g[nc>>2];g[bd>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)*3<<2)>>2];g[Cd>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<2)<<2)>>2];g[Dd>>2]=+g[bd>>2]-+g[Cd>>2];g[sd>>2]=+g[bd>>2]+ +g[Cd>>2];g[pc>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<1)<<2)>>2];g[qc>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*6|0)<<2)>>2];g[rc>>2]=+g[pc>>2]+ +g[qc>>2];g[ad>>2]=+g[pc>>2]-+g[qc>>2];g[Td>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<1)<<2)>>2];g[Ud>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*6|0)<<2)>>2];g[Vd>>2]=+g[Td>>2]-+g[Ud>>2];g[td>>2]=+g[Td>>2]+ +g[Ud>>2];g[sc>>2]=+g[oc>>2]+ +g[rc>>2];g[yd>>2]=+g[oc>>2]-+g[rc>>2];g[je>>2]=+g[sd>>2]+ +g[td>>2];g[Ed>>2]=+g[ad>>2]+ +g[Dd>>2];g[Wd>>2]=+g[Sd>>2]-+g[Vd>>2];g[kd>>2]=+g[Dd>>2]-+g[ad>>2];g[ud>>2]=+g[sd>>2]-+g[td>>2];g[gd>>2]=+g[Sd>>2]+ +g[Vd>>2];g[Te>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]|0)<<2)>>2];g[Ue>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*5|0)<<2)>>2];g[Ve>>2]=+g[Te>>2]+ +g[Ue>>2];g[le>>2]=+g[Te>>2]-+g[Ue>>2];g[me>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]|0)<<2)>>2];g[ne>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*5|0)<<2)>>2];g[oe>>2]=+g[me>>2]-+g[ne>>2];g[_f>>2]=+g[me>>2]+ +g[ne>>2];g[We>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*7|0)<<2)>>2];g[Xe>>2]=+g[(c[l>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*3|0)<<2)>>2];g[Ye>>2]=+g[We>>2]+ +g[Xe>>2];g[ef>>2]=+g[We>>2]-+g[Xe>>2];g[ff>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*7|0)<<2)>>2];g[gf>>2]=+g[(c[m>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*3|0)<<2)>>2];g[hf>>2]=+g[ff>>2]-+g[gf>>2];g[$f>>2]=+g[ff>>2]+ +g[gf>>2];g[Ze>>2]=+g[Ve>>2]+ +g[Ye>>2];g[ag>>2]=+g[_f>>2]-+g[$f>>2];g[kg>>2]=+g[_f>>2]+ +g[$f>>2];g[jf>>2]=+g[ef>>2]-+g[hf>>2];g[pe>>2]=+g[le>>2]+ +g[oe>>2];g[ze>>2]=+g[ef>>2]+ +g[hf>>2];g[Wf>>2]=+g[Ye>>2]-+g[Ve>>2];g[ye>>2]=+g[oe>>2]-+g[le>>2];g[mg>>2]=+g[(c[l>>2]|0)+((c[p>>2]|0)*5<<2)>>2];g[ng>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]<<2)<<2)>>2];g[og>>2]=+g[mg>>2]+ +g[ng>>2];g[ug>>2]=+g[mg>>2]-+g[ng>>2];g[Ef>>2]=+g[(c[m>>2]|0)+((c[p>>2]|0)*5<<2)>>2];g[Ff>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]<<2)<<2)>>2];g[Gf>>2]=+g[Ef>>2]-+g[Ff>>2];g[sh>>2]=+g[Ef>>2]+ +g[Ff>>2];g[pg>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]<<1)<<2)>>2];g[qg>>2]=+g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*6|0)<<2)>>2];g[rg>>2]=+g[pg>>2]+ +g[qg>>2];g[Df>>2]=+g[pg>>2]-+g[qg>>2];g[vg>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]<<1)<<2)>>2];g[wg>>2]=+g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*6|0)<<2)>>2];g[xg>>2]=+g[vg>>2]-+g[wg>>2];g[th>>2]=+g[vg>>2]+ +g[wg>>2];g[sg>>2]=+g[og>>2]+ +g[rg>>2];g[yh>>2]=+g[og>>2]-+g[rg>>2];g[Mg>>2]=+g[sh>>2]+ +g[th>>2];g[Hf>>2]=+g[Df>>2]+ +g[Gf>>2];g[yg>>2]=+g[ug>>2]-+g[xg>>2];g[kh>>2]=+g[Gf>>2]-+g[Df>>2];g[uh>>2]=+g[sh>>2]-+g[th>>2];g[gh>>2]=+g[ug>>2]+ +g[xg>>2];g[c[l>>2]>>2]=+g[tg>>2]+ +g[Rk>>2];g[c[m>>2]>>2]=+g[ha>>2]+ +g[ia>>2];g[(c[l>>2]|0)+(c[o>>2]<<2)>>2]=+g[qa>>2]+ +g[xa>>2];g[(c[m>>2]|0)+(c[o>>2]<<2)>>2]=+g[Ib>>2]+ +g[Jb>>2];g[(c[l>>2]|0)+((c[o>>2]|0)*3<<2)>>2]=+g[sc>>2]+ +g[_c>>2];g[(c[l>>2]|0)+(c[o>>2]<<1<<2)>>2]=+g[Ta>>2]+ +g[_a>>2];g[(c[m>>2]|0)+(c[o>>2]<<1<<2)>>2]=+g[jc>>2]+ +g[kc>>2];g[(c[m>>2]|0)+((c[o>>2]|0)*3<<2)>>2]=+g[je>>2]+ +g[ke>>2];g[(c[l>>2]|0)+((c[o>>2]|0)*6<<2)>>2]=+g[Vg>>2]+ +g[ah>>2];g[(c[m>>2]|0)+((c[o>>2]|0)*6<<2)>>2]=+g[Mi>>2]+ +g[Ni>>2];g[(c[m>>2]|0)+((c[o>>2]|0)*5<<2)>>2]=+g[Mg>>2]+ +g[Ng>>2];g[(c[l>>2]|0)+((c[o>>2]|0)*5<<2)>>2]=+g[sg>>2]+ +g[Bf>>2];g[(c[m>>2]|0)+(c[o>>2]<<2<<2)>>2]=+g[jg>>2]+ +g[kg>>2];g[(c[l>>2]|0)+(c[o>>2]<<2<<2)>>2]=+g[Se>>2]+ +g[Ze>>2];g[(c[l>>2]|0)+((c[o>>2]|0)*7<<2)>>2]=+g[Vi>>2]+ +g[Aj>>2];g[(c[m>>2]|0)+((c[o>>2]|0)*7<<2)>>2]=+g[Lk>>2]+ +g[Mk>>2];g[v>>2]=+g[t>>2]-+g[u>>2];g[B>>2]=+g[x>>2]-+g[A>>2];g[pk>>2]=+g[(c[n>>2]|0)+40>>2];g[w>>2]=+g[(c[n>>2]|0)+44>>2];g[(c[m>>2]|0)+((c[p>>2]|0)*6<<2)>>2]=+g[pk>>2]*+g[v>>2]-+g[w>>2]*+g[B>>2];g[(c[l>>2]|0)+((c[p>>2]|0)*6<<2)>>2]=+g[w>>2]*+g[v>>2]+ +g[pk>>2]*+g[B>>2];g[Jk>>2]=+g[Vi>>2]-+g[Aj>>2];g[Nk>>2]=+g[Lk>>2]-+g[Mk>>2];g[Ik>>2]=+g[(c[n>>2]|0)+24>>2];g[Kk>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*7|0)<<2)>>2]=+g[Ik>>2]*+g[Jk>>2]+ +g[Kk>>2]*+g[Nk>>2];g[(c[m>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*7|0)<<2)>>2]=+g[Ik>>2]*+g[Nk>>2]-+g[Kk>>2]*+g[Jk>>2];g[Zh>>2]=+g[Xh>>2]-+g[Yh>>2];g[di>>2]=+g[$h>>2]-+g[ci>>2];g[Uh>>2]=+g[(c[n>>2]|0)+40>>2];g[_h>>2]=+g[(c[n>>2]|0)+44>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*6|0)+((c[o>>2]|0)*6|0)<<2)>>2]=+g[Uh>>2]*+g[Zh>>2]-+g[_h>>2]*+g[di>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*6|0)+((c[o>>2]|0)*6|0)<<2)>>2]=+g[_h>>2]*+g[Zh>>2]+ +g[Uh>>2]*+g[di>>2];g[ub>>2]=+g[sb>>2]-+g[tb>>2];g[Ab>>2]=+g[wb>>2]-+g[zb>>2];g[pb>>2]=+g[(c[n>>2]|0)+40>>2];g[vb>>2]=+g[(c[n>>2]|0)+44>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*6|0)+(c[o>>2]|0)<<2)>>2]=+g[pb>>2]*+g[ub>>2]-+g[vb>>2]*+g[Ab>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*6|0)+(c[o>>2]|0)<<2)>>2]=+g[vb>>2]*+g[ub>>2]+ +g[pb>>2]*+g[Ab>>2];g[xk>>2]=+g[vk>>2]-+g[wk>>2];g[Dk>>2]=+g[zk>>2]-+g[Ck>>2];g[tj>>2]=+g[(c[n>>2]|0)+40>>2];g[yk>>2]=+g[(c[n>>2]|0)+44>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*6|0)+((c[o>>2]|0)*7|0)<<2)>>2]=+g[tj>>2]*+g[xk>>2]-+g[yk>>2]*+g[Dk>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*6|0)+((c[o>>2]|0)*7|0)<<2)>>2]=+g[yk>>2]*+g[xk>>2]+ +g[tj>>2]*+g[Dk>>2];g[ji>>2]=+g[Vg>>2]-+g[ah>>2];g[Oi>>2]=+g[Mi>>2]-+g[Ni>>2];g[ii>>2]=+g[(c[n>>2]|0)+24>>2];g[ki>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*6|0)<<2)>>2]=+g[ii>>2]*+g[ji>>2]+ +g[ki>>2]*+g[Oi>>2];g[(c[m>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*6|0)<<2)>>2]=+g[ii>>2]*+g[Oi>>2]-+g[ki>>2]*+g[ji>>2];g[Gb>>2]=+g[qa>>2]-+g[xa>>2];g[Ma>>2]=+g[Ib>>2]-+g[Jb>>2];g[Fb>>2]=+g[(c[n>>2]|0)+24>>2];g[Hb>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]|0)<<2)>>2]=+g[Fb>>2]*+g[Gb>>2]+ +g[Hb>>2]*+g[Ma>>2];g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]|0)<<2)>>2]=+g[Fb>>2]*+g[Ma>>2]-+g[Hb>>2]*+g[Gb>>2];g[Fk>>2]=+g[wk>>2]+ +g[vk>>2];g[Hk>>2]=+g[zk>>2]+ +g[Ck>>2];g[Ek>>2]=+g[(c[n>>2]|0)+8>>2];g[Gk>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*7|0)<<2)>>2]=+g[Ek>>2]*+g[Fk>>2]-+g[Gk>>2]*+g[Hk>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*7|0)<<2)>>2]=+g[Gk>>2]*+g[Fk>>2]+ +g[Ek>>2]*+g[Hk>>2];g[fi>>2]=+g[Yh>>2]+ +g[Xh>>2];g[hi>>2]=+g[$h>>2]+ +g[ci>>2];g[ei>>2]=+g[(c[n>>2]|0)+8>>2];g[gi>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*6|0)<<2)>>2]=+g[ei>>2]*+g[fi>>2]-+g[gi>>2]*+g[hi>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*6|0)<<2)>>2]=+g[gi>>2]*+g[fi>>2]+ +g[ei>>2]*+g[hi>>2];g[ba>>2]=+g[u>>2]+ +g[t>>2];g[da>>2]=+g[x>>2]+ +g[A>>2];g[C>>2]=+g[(c[n>>2]|0)+8>>2];g[ca>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[m>>2]|0)+(c[p>>2]<<1<<2)>>2]=+g[C>>2]*+g[ba>>2]-+g[ca>>2]*+g[da>>2];g[(c[l>>2]|0)+(c[p>>2]<<1<<2)>>2]=+g[ca>>2]*+g[ba>>2]+ +g[C>>2]*+g[da>>2];g[fa>>2]=+g[tg>>2]-+g[Rk>>2];g[ja>>2]=+g[ha>>2]-+g[ia>>2];g[ea>>2]=+g[(c[n>>2]|0)+24>>2];g[ga>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+(c[p>>2]<<2<<2)>>2]=+g[ea>>2]*+g[fa>>2]+ +g[ga>>2]*+g[ja>>2];g[(c[m>>2]|0)+(c[p>>2]<<2<<2)>>2]=+g[ea>>2]*+g[ja>>2]-+g[ga>>2]*+g[fa>>2];g[Cb>>2]=+g[tb>>2]+ +g[sb>>2];g[Eb>>2]=+g[wb>>2]+ +g[zb>>2];g[Bb>>2]=+g[(c[n>>2]|0)+8>>2];g[Db>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2]=+g[Bb>>2]*+g[Cb>>2]-+g[Db>>2]*+g[Eb>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]|0)<<2)>>2]=+g[Db>>2]*+g[Cb>>2]+ +g[Bb>>2]*+g[Eb>>2];g[Xb>>2]=+g[Vb>>2]-+g[Wb>>2];g[bc>>2]=+g[Zb>>2]-+g[ac>>2];g[Qc>>2]=+g[(c[n>>2]|0)+40>>2];g[Yb>>2]=+g[(c[n>>2]|0)+44>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*6|0)+(c[o>>2]<<1)<<2)>>2]=+g[Qc>>2]*+g[Xb>>2]-+g[Yb>>2]*+g[bc>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*6|0)+(c[o>>2]<<1)<<2)>>2]=+g[Yb>>2]*+g[Xb>>2]+ +g[Qc>>2]*+g[bc>>2];g[wd>>2]=+g[ud>>2]-+g[vd>>2];g[be>>2]=+g[yd>>2]-+g[Bd>>2];g[rd>>2]=+g[(c[n>>2]|0)+40>>2];g[xd>>2]=+g[(c[n>>2]|0)+44>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*6|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[rd>>2]*+g[wd>>2]-+g[xd>>2]*+g[be>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*6|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[xd>>2]*+g[wd>>2]+ +g[rd>>2]*+g[be>>2];g[he>>2]=+g[sc>>2]-+g[_c>>2];g[Le>>2]=+g[je>>2]-+g[ke>>2];g[ge>>2]=+g[(c[n>>2]|0)+24>>2];g[ie>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[ge>>2]*+g[he>>2]+ +g[ie>>2]*+g[Le>>2];g[(c[m>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[ge>>2]*+g[Le>>2]-+g[ie>>2]*+g[he>>2];g[hg>>2]=+g[Se>>2]-+g[Ze>>2];g[lg>>2]=+g[jg>>2]-+g[kg>>2];g[gg>>2]=+g[(c[n>>2]|0)+24>>2];g[ig>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<2)<<2)>>2]=+g[gg>>2]*+g[hg>>2]+ +g[ig>>2]*+g[lg>>2];g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<2)<<2)>>2]=+g[gg>>2]*+g[lg>>2]-+g[ig>>2]*+g[hg>>2];g[wh>>2]=+g[uh>>2]-+g[vh>>2];g[Eg>>2]=+g[yh>>2]-+g[Bh>>2];g[rh>>2]=+g[(c[n>>2]|0)+40>>2];g[xh>>2]=+g[(c[n>>2]|0)+44>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*6|0)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[rh>>2]*+g[wh>>2]-+g[xh>>2]*+g[Eg>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*6|0)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[xh>>2]*+g[wh>>2]+ +g[rh>>2]*+g[Eg>>2];g[Xf>>2]=+g[Vf>>2]-+g[Wf>>2];g[bg>>2]=+g[Zf>>2]-+g[ag>>2];g[sf>>2]=+g[(c[n>>2]|0)+40>>2];g[Yf>>2]=+g[(c[n>>2]|0)+44>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*6|0)+(c[o>>2]<<2)<<2)>>2]=+g[sf>>2]*+g[Xf>>2]-+g[Yf>>2]*+g[bg>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*6|0)+(c[o>>2]<<2)<<2)>>2]=+g[Yf>>2]*+g[Xf>>2]+ +g[sf>>2]*+g[bg>>2];g[Kg>>2]=+g[sg>>2]-+g[Bf>>2];g[Og>>2]=+g[Mg>>2]-+g[Ng>>2];g[Jg>>2]=+g[(c[n>>2]|0)+24>>2];g[Lg>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[Jg>>2]*+g[Kg>>2]+ +g[Lg>>2]*+g[Og>>2];g[(c[m>>2]|0)+((c[p>>2]<<2)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[Jg>>2]*+g[Og>>2]-+g[Lg>>2]*+g[Kg>>2];g[hc>>2]=+g[Ta>>2]-+g[_a>>2];g[lc>>2]=+g[jc>>2]-+g[kc>>2];g[gc>>2]=+g[(c[n>>2]|0)+24>>2];g[ic>>2]=+g[(c[n>>2]|0)+28>>2];g[(c[l>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<1)<<2)>>2]=+g[gc>>2]*+g[hc>>2]+ +g[ic>>2]*+g[lc>>2];g[(c[m>>2]|0)+((c[p>>2]<<2)+(c[o>>2]<<1)<<2)>>2]=+g[gc>>2]*+g[lc>>2]-+g[ic>>2]*+g[hc>>2];g[dc>>2]=+g[Wb>>2]+ +g[Vb>>2];g[fc>>2]=+g[Zb>>2]+ +g[ac>>2];g[cc>>2]=+g[(c[n>>2]|0)+8>>2];g[ec>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2]=+g[cc>>2]*+g[dc>>2]-+g[ec>>2]*+g[fc>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<1)<<2)>>2]=+g[ec>>2]*+g[dc>>2]+ +g[cc>>2]*+g[fc>>2];g[Gg>>2]=+g[vh>>2]+ +g[uh>>2];g[Ig>>2]=+g[yh>>2]+ +g[Bh>>2];g[Fg>>2]=+g[(c[n>>2]|0)+8>>2];g[Hg>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[Fg>>2]*+g[Gg>>2]-+g[Hg>>2]*+g[Ig>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[Hg>>2]*+g[Gg>>2]+ +g[Fg>>2]*+g[Ig>>2];g[dg>>2]=+g[Wf>>2]+ +g[Vf>>2];g[fg>>2]=+g[Zf>>2]+ +g[ag>>2];g[cg>>2]=+g[(c[n>>2]|0)+8>>2];g[eg>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<2)<<2)>>2]=+g[cg>>2]*+g[dg>>2]-+g[eg>>2]*+g[fg>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+(c[o>>2]<<2)<<2)>>2]=+g[eg>>2]*+g[dg>>2]+ +g[cg>>2]*+g[fg>>2];g[de>>2]=+g[vd>>2]+ +g[ud>>2];g[fe>>2]=+g[yd>>2]+ +g[Bd>>2];g[ce>>2]=+g[(c[n>>2]|0)+8>>2];g[ee>>2]=+g[(c[n>>2]|0)+12>>2];g[(c[m>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[ce>>2]*+g[de>>2]-+g[ee>>2]*+g[fe>>2];g[(c[l>>2]|0)+((c[p>>2]<<1)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[ee>>2]*+g[de>>2]+ +g[ce>>2]*+g[fe>>2];g[qe>>2]=(+g[jf>>2]-+g[pe>>2])*.7071067690849304;g[re>>2]=+g[df>>2]-+g[qe>>2];g[De>>2]=+g[df>>2]+ +g[qe>>2];g[Ae>>2]=(+g[ye>>2]-+g[ze>>2])*.7071067690849304;g[Be>>2]=+g[xe>>2]-+g[Ae>>2];g[Fe>>2]=+g[xe>>2]+ +g[Ae>>2];g[_e>>2]=+g[(c[n>>2]|0)+48>>2];g[se>>2]=+g[(c[n>>2]|0)+52>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*7|0)+(c[o>>2]<<2)<<2)>>2]=+g[_e>>2]*+g[re>>2]-+g[se>>2]*+g[Be>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*7|0)+(c[o>>2]<<2)<<2)>>2]=+g[se>>2]*+g[re>>2]+ +g[_e>>2]*+g[Be>>2];g[Ce>>2]=+g[(c[n>>2]|0)+16>>2];g[Ee>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<2)<<2)>>2]=+g[Ce>>2]*+g[De>>2]-+g[Ee>>2]*+g[Fe>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<2)<<2)>>2]=+g[Ee>>2]*+g[De>>2]+ +g[Ce>>2]*+g[Fe>>2];g[gl>>2]=(+g[al>>2]-+g[fl>>2])*.7071067690849304;g[hl>>2]=+g[Xk>>2]-+g[gl>>2];g[ak>>2]=+g[Xk>>2]+ +g[gl>>2];g[Zj>>2]=(+g[Xj>>2]-+g[Yj>>2])*.7071067690849304;g[_j>>2]=+g[Wj>>2]-+g[Zj>>2];g[ck>>2]=+g[Wj>>2]+ +g[Zj>>2];g[Sk>>2]=+g[(c[n>>2]|0)+48>>2];g[il>>2]=+g[(c[n>>2]|0)+52>>2];g[(c[m>>2]|0)+((c[p>>2]|0)*7<<2)>>2]=+g[Sk>>2]*+g[hl>>2]-+g[il>>2]*+g[_j>>2];g[(c[l>>2]|0)+((c[p>>2]|0)*7<<2)>>2]=+g[il>>2]*+g[hl>>2]+ +g[Sk>>2]*+g[_j>>2];g[$j>>2]=+g[(c[n>>2]|0)+16>>2];g[bk>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[m>>2]|0)+((c[p>>2]|0)*3<<2)>>2]=+g[$j>>2]*+g[ak>>2]-+g[bk>>2]*+g[ck>>2];g[(c[l>>2]|0)+((c[p>>2]|0)*3<<2)>>2]=+g[bk>>2]*+g[ak>>2]+ +g[$j>>2]*+g[ck>>2];g[jj>>2]=(+g[Qj>>2]+ +g[Lj>>2])*.7071067690849304;g[kj>>2]=+g[ij>>2]-+g[jj>>2];g[qj>>2]=+g[ij>>2]+ +g[jj>>2];g[nj>>2]=(+g[$i>>2]+ +g[aj>>2])*.7071067690849304;g[oj>>2]=+g[mj>>2]-+g[nj>>2];g[sj>>2]=+g[mj>>2]+ +g[nj>>2];g[hj>>2]=+g[(c[n>>2]|0)+32>>2];g[lj>>2]=+g[(c[n>>2]|0)+36>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*7|0)<<2)>>2]=+g[hj>>2]*+g[kj>>2]+ +g[lj>>2]*+g[oj>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*7|0)<<2)>>2]=+g[hj>>2]*+g[oj>>2]-+g[lj>>2]*+g[kj>>2];g[pj>>2]=+g[c[n>>2]>>2];g[rj>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*7|0)<<2)>>2]=+g[pj>>2]*+g[qj>>2]+ +g[rj>>2]*+g[sj>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*7|0)<<2)>>2]=+g[pj>>2]*+g[sj>>2]-+g[rj>>2]*+g[qj>>2];g[Qb>>2]=(+g[jb>>2]-+g[Pb>>2])*.7071067690849304;g[Rb>>2]=+g[eb>>2]-+g[Qb>>2];g[Bc>>2]=+g[eb>>2]+ +g[Qb>>2];g[yc>>2]=(+g[wc>>2]-+g[xc>>2])*.7071067690849304;g[zc>>2]=+g[vc>>2]-+g[yc>>2];g[Dc>>2]=+g[vc>>2]+ +g[yc>>2];g[$a>>2]=+g[(c[n>>2]|0)+48>>2];g[Sb>>2]=+g[(c[n>>2]|0)+52>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*7|0)+(c[o>>2]<<1)<<2)>>2]=+g[$a>>2]*+g[Rb>>2]-+g[Sb>>2]*+g[zc>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*7|0)+(c[o>>2]<<1)<<2)>>2]=+g[Sb>>2]*+g[Rb>>2]+ +g[$a>>2]*+g[zc>>2];g[Ac>>2]=+g[(c[n>>2]|0)+16>>2];g[Cc>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<1)<<2)>>2]=+g[Ac>>2]*+g[Bc>>2]-+g[Cc>>2]*+g[Dc>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]<<1)<<2)>>2]=+g[Cc>>2]*+g[Bc>>2]+ +g[Ac>>2]*+g[Dc>>2];g[Ie>>2]=(+g[pe>>2]+ +g[jf>>2])*.7071067690849304;g[Je>>2]=+g[He>>2]-+g[Ie>>2];g[pf>>2]=+g[He>>2]+ +g[Ie>>2];g[mf>>2]=(+g[ye>>2]+ +g[ze>>2])*.7071067690849304;g[nf>>2]=+g[lf>>2]-+g[mf>>2];g[rf>>2]=+g[lf>>2]+ +g[mf>>2];g[Ge>>2]=+g[(c[n>>2]|0)+32>>2];g[Ke>>2]=+g[(c[n>>2]|0)+36>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]<<2)<<2)>>2]=+g[Ge>>2]*+g[Je>>2]+ +g[Ke>>2]*+g[nf>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]<<2)<<2)>>2]=+g[Ge>>2]*+g[nf>>2]-+g[Ke>>2]*+g[Je>>2];g[of>>2]=+g[c[n>>2]>>2];g[qf>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<2)<<2)>>2]=+g[of>>2]*+g[pf>>2]+ +g[qf>>2]*+g[rf>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<2)<<2)>>2]=+g[of>>2]*+g[rf>>2]-+g[qf>>2]*+g[pf>>2];g[hh>>2]=(+g[Rf>>2]+ +g[Mf>>2])*.7071067690849304;g[ih>>2]=+g[gh>>2]-+g[hh>>2];g[oh>>2]=+g[gh>>2]+ +g[hh>>2];g[lh>>2]=(+g[zg>>2]+ +g[Ag>>2])*.7071067690849304;g[mh>>2]=+g[kh>>2]-+g[lh>>2];g[qh>>2]=+g[kh>>2]+ +g[lh>>2];g[fh>>2]=+g[(c[n>>2]|0)+32>>2];g[jh>>2]=+g[(c[n>>2]|0)+36>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[fh>>2]*+g[ih>>2]+ +g[jh>>2]*+g[mh>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[fh>>2]*+g[mh>>2]-+g[jh>>2]*+g[ih>>2];g[nh>>2]=+g[c[n>>2]>>2];g[ph>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[nh>>2]*+g[oh>>2]+ +g[ph>>2]*+g[qh>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[nh>>2]*+g[qh>>2]-+g[ph>>2]*+g[oh>>2];g[qi>>2]=(+g[Mh>>2]-+g[pi>>2])*.7071067690849304;g[ri>>2]=+g[Hh>>2]-+g[qi>>2];g[Di>>2]=+g[Hh>>2]+ +g[qi>>2];g[Ai>>2]=(+g[yi>>2]-+g[zi>>2])*.7071067690849304;g[Bi>>2]=+g[xi>>2]-+g[Ai>>2];g[Fi>>2]=+g[xi>>2]+ +g[Ai>>2];g[bh>>2]=+g[(c[n>>2]|0)+48>>2];g[si>>2]=+g[(c[n>>2]|0)+52>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*7|0)+((c[o>>2]|0)*6|0)<<2)>>2]=+g[bh>>2]*+g[ri>>2]-+g[si>>2]*+g[Bi>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*7|0)+((c[o>>2]|0)*6|0)<<2)>>2]=+g[si>>2]*+g[ri>>2]+ +g[bh>>2]*+g[Bi>>2];g[Ci>>2]=+g[(c[n>>2]|0)+16>>2];g[Ei>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*6|0)<<2)>>2]=+g[Ci>>2]*+g[Di>>2]-+g[Ei>>2]*+g[Fi>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*6|0)<<2)>>2]=+g[Ei>>2]*+g[Di>>2]+ +g[Ci>>2]*+g[Fi>>2];g[hd>>2]=(+g[Od>>2]+ +g[Jd>>2])*.7071067690849304;g[id>>2]=+g[gd>>2]-+g[hd>>2];g[od>>2]=+g[gd>>2]+ +g[hd>>2];g[ld>>2]=(+g[Xd>>2]+ +g[Yd>>2])*.7071067690849304;g[md>>2]=+g[kd>>2]-+g[ld>>2];g[qd>>2]=+g[kd>>2]+ +g[ld>>2];g[fd>>2]=+g[(c[n>>2]|0)+32>>2];g[jd>>2]=+g[(c[n>>2]|0)+36>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[fd>>2]*+g[id>>2]+ +g[jd>>2]*+g[md>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[fd>>2]*+g[md>>2]-+g[jd>>2]*+g[id>>2];g[nd>>2]=+g[c[n>>2]>>2];g[pd>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[nd>>2]*+g[od>>2]+ +g[pd>>2]*+g[qd>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[nd>>2]*+g[qd>>2]-+g[pd>>2]*+g[od>>2];g[fk>>2]=(+g[fl>>2]+ +g[al>>2])*.7071067690849304;g[gk>>2]=+g[ek>>2]-+g[fk>>2];g[mk>>2]=+g[ek>>2]+ +g[fk>>2];g[jk>>2]=(+g[Xj>>2]+ +g[Yj>>2])*.7071067690849304;g[kk>>2]=+g[ik>>2]-+g[jk>>2];g[ok>>2]=+g[ik>>2]+ +g[jk>>2];g[dk>>2]=+g[(c[n>>2]|0)+32>>2];g[hk>>2]=+g[(c[n>>2]|0)+36>>2];g[(c[l>>2]|0)+((c[p>>2]|0)*5<<2)>>2]=+g[dk>>2]*+g[gk>>2]+ +g[hk>>2]*+g[kk>>2];g[(c[m>>2]|0)+((c[p>>2]|0)*5<<2)>>2]=+g[dk>>2]*+g[kk>>2]-+g[hk>>2]*+g[gk>>2];g[lk>>2]=+g[c[n>>2]>>2];g[nk>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+(c[p>>2]<<2)>>2]=+g[lk>>2]*+g[mk>>2]+ +g[nk>>2]*+g[ok>>2];g[(c[m>>2]|0)+(c[p>>2]<<2)>>2]=+g[lk>>2]*+g[ok>>2]-+g[nk>>2]*+g[mk>>2];g[Ha>>2]=(+g[P>>2]+ +g[K>>2])*.7071067690849304;g[Ia>>2]=+g[Ga>>2]-+g[Ha>>2];g[mb>>2]=+g[Ga>>2]+ +g[Ha>>2];g[La>>2]=(+g[Y>>2]+ +g[Z>>2])*.7071067690849304;g[kb>>2]=+g[Ka>>2]-+g[La>>2];g[ob>>2]=+g[Ka>>2]+ +g[La>>2];g[Fa>>2]=+g[(c[n>>2]|0)+32>>2];g[Ja>>2]=+g[(c[n>>2]|0)+36>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]|0)<<2)>>2]=+g[Fa>>2]*+g[Ia>>2]+ +g[Ja>>2]*+g[kb>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]|0)<<2)>>2]=+g[Fa>>2]*+g[kb>>2]-+g[Ja>>2]*+g[Ia>>2];g[lb>>2]=+g[c[n>>2]>>2];g[nb>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2]=+g[lb>>2]*+g[mb>>2]+ +g[nb>>2]*+g[ob>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]|0)<<2)>>2]=+g[lb>>2]*+g[ob>>2]-+g[nb>>2]*+g[mb>>2];g[Rj>>2]=(+g[Lj>>2]-+g[Qj>>2])*.7071067690849304;g[Sj>>2]=+g[Gj>>2]-+g[Rj>>2];g[ej>>2]=+g[Gj>>2]+ +g[Rj>>2];g[bj>>2]=(+g[$i>>2]-+g[aj>>2])*.7071067690849304;g[cj>>2]=+g[_i>>2]-+g[bj>>2];g[gj>>2]=+g[_i>>2]+ +g[bj>>2];g[Bj>>2]=+g[(c[n>>2]|0)+48>>2];g[Tj>>2]=+g[(c[n>>2]|0)+52>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*7|0)+((c[o>>2]|0)*7|0)<<2)>>2]=+g[Bj>>2]*+g[Sj>>2]-+g[Tj>>2]*+g[cj>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*7|0)+((c[o>>2]|0)*7|0)<<2)>>2]=+g[Tj>>2]*+g[Sj>>2]+ +g[Bj>>2]*+g[cj>>2];g[dj>>2]=+g[(c[n>>2]|0)+16>>2];g[fj>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*7|0)<<2)>>2]=+g[dj>>2]*+g[ej>>2]-+g[fj>>2]*+g[gj>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*7|0)<<2)>>2]=+g[fj>>2]*+g[ej>>2]+ +g[dj>>2]*+g[gj>>2];g[Sf>>2]=(+g[Mf>>2]-+g[Rf>>2])*.7071067690849304;g[Tf>>2]=+g[Hf>>2]-+g[Sf>>2];g[ch>>2]=+g[Hf>>2]+ +g[Sf>>2];g[Bg>>2]=(+g[zg>>2]-+g[Ag>>2])*.7071067690849304;g[Cg>>2]=+g[yg>>2]-+g[Bg>>2];g[eh>>2]=+g[yg>>2]+ +g[Bg>>2];g[Cf>>2]=+g[(c[n>>2]|0)+48>>2];g[Uf>>2]=+g[(c[n>>2]|0)+52>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*7|0)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[Cf>>2]*+g[Tf>>2]-+g[Uf>>2]*+g[Cg>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*7|0)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[Uf>>2]*+g[Tf>>2]+ +g[Cf>>2]*+g[Cg>>2];g[Dg>>2]=+g[(c[n>>2]|0)+16>>2];g[dh>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[Dg>>2]*+g[ch>>2]-+g[dh>>2]*+g[eh>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*5|0)<<2)>>2]=+g[dh>>2]*+g[ch>>2]+ +g[Dg>>2]*+g[eh>>2];g[Q>>2]=(+g[K>>2]-+g[P>>2])*.7071067690849304;g[R>>2]=+g[F>>2]-+g[Q>>2];g[Ca>>2]=+g[F>>2]+ +g[Q>>2];g[_>>2]=(+g[Y>>2]-+g[Z>>2])*.7071067690849304;g[$>>2]=+g[X>>2]-+g[_>>2];g[Ea>>2]=+g[X>>2]+ +g[_>>2];g[ya>>2]=+g[(c[n>>2]|0)+48>>2];g[S>>2]=+g[(c[n>>2]|0)+52>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*7|0)+(c[o>>2]|0)<<2)>>2]=+g[ya>>2]*+g[R>>2]-+g[S>>2]*+g[$>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*7|0)+(c[o>>2]|0)<<2)>>2]=+g[S>>2]*+g[R>>2]+ +g[ya>>2]*+g[$>>2];g[aa>>2]=+g[(c[n>>2]|0)+16>>2];g[Da>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]|0)<<2)>>2]=+g[aa>>2]*+g[Ca>>2]-+g[Da>>2]*+g[Ea>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+(c[o>>2]|0)<<2)>>2]=+g[Da>>2]*+g[Ca>>2]+ +g[aa>>2]*+g[Ea>>2];g[Pd>>2]=(+g[Jd>>2]-+g[Od>>2])*.7071067690849304;g[Qd>>2]=+g[Ed>>2]-+g[Pd>>2];g[cd>>2]=+g[Ed>>2]+ +g[Pd>>2];g[Zd>>2]=(+g[Xd>>2]-+g[Yd>>2])*.7071067690849304;g[_d>>2]=+g[Wd>>2]-+g[Zd>>2];g[ed>>2]=+g[Wd>>2]+ +g[Zd>>2];g[$c>>2]=+g[(c[n>>2]|0)+48>>2];g[Rd>>2]=+g[(c[n>>2]|0)+52>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*7|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[$c>>2]*+g[Qd>>2]-+g[Rd>>2]*+g[_d>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*7|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[Rd>>2]*+g[Qd>>2]+ +g[$c>>2]*+g[_d>>2];g[$d>>2]=+g[(c[n>>2]|0)+16>>2];g[dd>>2]=+g[(c[n>>2]|0)+20>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[$d>>2]*+g[cd>>2]-+g[dd>>2]*+g[ed>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*3|0)+((c[o>>2]|0)*3|0)<<2)>>2]=+g[dd>>2]*+g[cd>>2]+ +g[$d>>2]*+g[ed>>2];g[Gc>>2]=(+g[Pb>>2]+ +g[jb>>2])*.7071067690849304;g[Hc>>2]=+g[Fc>>2]-+g[Gc>>2];g[Nc>>2]=+g[Fc>>2]+ +g[Gc>>2];g[Kc>>2]=(+g[wc>>2]+ +g[xc>>2])*.7071067690849304;g[Lc>>2]=+g[Jc>>2]-+g[Kc>>2];g[Pc>>2]=+g[Jc>>2]+ +g[Kc>>2];g[Ec>>2]=+g[(c[n>>2]|0)+32>>2];g[Ic>>2]=+g[(c[n>>2]|0)+36>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]<<1)<<2)>>2]=+g[Ec>>2]*+g[Hc>>2]+ +g[Ic>>2]*+g[Lc>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+(c[o>>2]<<1)<<2)>>2]=+g[Ec>>2]*+g[Lc>>2]-+g[Ic>>2]*+g[Hc>>2];g[Mc>>2]=+g[c[n>>2]>>2];g[Oc>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2]=+g[Mc>>2]*+g[Nc>>2]+ +g[Oc>>2]*+g[Pc>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+(c[o>>2]<<1)<<2)>>2]=+g[Mc>>2]*+g[Pc>>2]-+g[Oc>>2]*+g[Nc>>2];g[Ii>>2]=(+g[pi>>2]+ +g[Mh>>2])*.7071067690849304;g[Ji>>2]=+g[Hi>>2]-+g[Ii>>2];g[Rh>>2]=+g[Hi>>2]+ +g[Ii>>2];g[Oh>>2]=(+g[yi>>2]+ +g[zi>>2])*.7071067690849304;g[Ph>>2]=+g[Nh>>2]-+g[Oh>>2];g[Th>>2]=+g[Nh>>2]+ +g[Oh>>2];g[Gi>>2]=+g[(c[n>>2]|0)+32>>2];g[Ki>>2]=+g[(c[n>>2]|0)+36>>2];g[(c[l>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*6|0)<<2)>>2]=+g[Gi>>2]*+g[Ji>>2]+ +g[Ki>>2]*+g[Ph>>2];g[(c[m>>2]|0)+(((c[p>>2]|0)*5|0)+((c[o>>2]|0)*6|0)<<2)>>2]=+g[Gi>>2]*+g[Ph>>2]-+g[Ki>>2]*+g[Ji>>2];g[Qh>>2]=+g[c[n>>2]>>2];g[Sh>>2]=+g[(c[n>>2]|0)+4>>2];g[(c[l>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*6|0)<<2)>>2]=+g[Qh>>2]*+g[Rh>>2]+ +g[Sh>>2]*+g[Th>>2];g[(c[m>>2]|0)+((c[p>>2]|0)+((c[o>>2]|0)*6|0)<<2)>>2]=+g[Qh>>2]*+g[Th>>2]-+g[Sh>>2]*+g[Rh>>2];c[ll>>2]=(c[ll>>2]|0)+1;c[l>>2]=(c[l>>2]|0)+(c[r>>2]<<2);c[m>>2]=(c[m>>2]|0)+(c[r>>2]<<2);c[n>>2]=(c[n>>2]|0)+56}i=ml;return}function $i(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,1,1608);i=b;return}function aj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0;Db=i;i=i+528|0;k=Db+524|0;l=Db+520|0;m=Db+516|0;n=Db+512|0;Eb=Db+508|0;o=Db+504|0;p=Db+500|0;Cb=Db+480|0;Ba=Db+476|0;P=Db+472|0;Xa=Db+468|0;D=Db+464|0;Ja=Db+460|0;Ua=Db+456|0;Va=Db+452|0;ka=Db+448|0;la=Db+444|0;ua=Db+440|0;$a=Db+436|0;ab=Db+432|0;bb=Db+428|0;$=Db+424|0;ca=Db+420|0;N=Db+416|0;kb=Db+412|0;vb=Db+408|0;wb=Db+404|0;na=Db+400|0;oa=Db+396|0;ta=Db+392|0;Ya=Db+388|0;Za=Db+384|0;_a=Db+380|0;x=Db+376|0;A=Db+372|0;M=Db+368|0;q=Db+364|0;C=Db+360|0;Aa=Db+356|0;B=Db+352|0;xa=Db+348|0;za=Db+344|0;wa=Db+340|0;ya=Db+336|0;Bb=Db+332|0;Z=Db+328|0;Ta=Db+324|0;ba=Db+320|0;Ia=Db+316|0;_=Db+312|0;Oa=Db+308|0;aa=Db+304|0;yb=Db+300|0;Ab=Db+296|0;xb=Db+292|0;zb=Db+288|0;Qa=Db+284|0;Sa=Db+280|0;Pa=Db+276|0;Ra=Db+272|0;Fa=Db+268|0;Ha=Db+264|0;Ea=Db+260|0;Ga=Db+256|0;La=Db+252|0;Na=Db+248|0;Ka=Db+244|0;Ma=Db+240|0;eb=Db+236|0;v=Db+232|0;ub=Db+228|0;z=Db+224|0;jb=Db+220|0;w=Db+216|0;pb=Db+212|0;y=Db+208|0;Da=Db+204|0;db=Db+200|0;Ca=Db+196|0;cb=Db+192|0;rb=Db+188|0;tb=Db+184|0;qb=Db+180|0;sb=Db+176|0;gb=Db+172|0;ib=Db+168|0;fb=Db+164|0;hb=Db+160|0;mb=Db+156|0;ob=Db+152|0;lb=Db+148|0;nb=Db+144|0;s=Db+140|0;Wa=Db+136|0;t=Db+132|0;ea=Db+128|0;ga=Db+124|0;Y=Db+120|0;da=Db+116|0;fa=Db+112|0;u=Db+108|0;O=Db+104|0;Q=Db+100|0;R=Db+96|0;V=Db+92|0;X=Db+88|0;T=Db+84|0;U=Db+80|0;W=Db+76|0;S=Db+72|0;ia=Db+68|0;r=Db+64|0;ha=Db+60|0;qa=Db+56|0;sa=Db+52|0;ma=Db+48|0;pa=Db+44|0;ra=Db+40|0;ja=Db+36|0;I=Db+32|0;va=Db+28|0;H=Db+24|0;G=Db+20|0;K=Db+16|0;E=Db+12|0;F=Db+8|0;L=Db+4|0;J=Db;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Eb>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Db+496>>2]=.5877852439880371;g[Db+492>>2]=.9510565400123596;g[Db+488>>2]=.25;g[Db+484>>2]=.55901700258255;c[Cb>>2]=c[Eb>>2];c[m>>2]=(c[m>>2]|0)+((c[Eb>>2]|0)*18<<2);while(1){if((c[Cb>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[C>>2]=+g[c[l>>2]>>2];g[xa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[za>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[wa>>2]=+g[(c[m>>2]|0)+32>>2];g[ya>>2]=+g[(c[m>>2]|0)+36>>2];g[Aa>>2]=+g[wa>>2]*+g[xa>>2]+ +g[ya>>2]*+g[za>>2];g[B>>2]=+g[wa>>2]*+g[za>>2]-+g[ya>>2]*+g[xa>>2];g[Ba>>2]=+g[q>>2]-+g[Aa>>2];g[P>>2]=+g[C>>2]-+g[B>>2];g[Xa>>2]=+g[q>>2]+ +g[Aa>>2];g[D>>2]=+g[B>>2]+ +g[C>>2];g[yb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Ab>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[xb>>2]=+g[(c[m>>2]|0)+24>>2];g[zb>>2]=+g[(c[m>>2]|0)+28>>2];g[Bb>>2]=+g[xb>>2]*+g[yb>>2]+ +g[zb>>2]*+g[Ab>>2];g[Z>>2]=+g[xb>>2]*+g[Ab>>2]-+g[zb>>2]*+g[yb>>2];g[Qa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[Sa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Pa>>2]=+g[c[m>>2]>>2];g[Ra>>2]=+g[(c[m>>2]|0)+4>>2];g[Ta>>2]=+g[Pa>>2]*+g[Qa>>2]+ +g[Ra>>2]*+g[Sa>>2];g[ba>>2]=+g[Pa>>2]*+g[Sa>>2]-+g[Ra>>2]*+g[Qa>>2];g[Fa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Ha>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Ea>>2]=+g[(c[m>>2]|0)+64>>2];g[Ga>>2]=+g[(c[m>>2]|0)+68>>2];g[Ia>>2]=+g[Ea>>2]*+g[Fa>>2]+ +g[Ga>>2]*+g[Ha>>2];g[_>>2]=+g[Ea>>2]*+g[Ha>>2]-+g[Ga>>2]*+g[Fa>>2];g[La>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Na>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Ka>>2]=+g[(c[m>>2]|0)+40>>2];g[Ma>>2]=+g[(c[m>>2]|0)+44>>2];g[Oa>>2]=+g[Ka>>2]*+g[La>>2]+ +g[Ma>>2]*+g[Na>>2];g[aa>>2]=+g[Ka>>2]*+g[Na>>2]-+g[Ma>>2]*+g[La>>2];g[Ja>>2]=+g[Bb>>2]-+g[Ia>>2];g[Ua>>2]=+g[Oa>>2]-+g[Ta>>2];g[Va>>2]=+g[Ja>>2]+ +g[Ua>>2];g[ka>>2]=+g[Z>>2]+ +g[_>>2];g[la>>2]=+g[aa>>2]+ +g[ba>>2];g[ua>>2]=+g[ka>>2]+ +g[la>>2];g[$a>>2]=+g[Bb>>2]+ +g[Ia>>2];g[ab>>2]=+g[Oa>>2]+ +g[Ta>>2];g[bb>>2]=+g[$a>>2]+ +g[ab>>2];g[$>>2]=+g[Z>>2]-+g[_>>2];g[ca>>2]=+g[aa>>2]-+g[ba>>2];g[N>>2]=+g[$>>2]+ +g[ca>>2];g[Da>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[db>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Ca>>2]=+g[(c[m>>2]|0)+8>>2];g[cb>>2]=+g[(c[m>>2]|0)+12>>2];g[eb>>2]=+g[Ca>>2]*+g[Da>>2]+ +g[cb>>2]*+g[db>>2];g[v>>2]=+g[Ca>>2]*+g[db>>2]-+g[cb>>2]*+g[Da>>2];g[rb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[tb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[qb>>2]=+g[(c[m>>2]|0)+16>>2];g[sb>>2]=+g[(c[m>>2]|0)+20>>2];g[ub>>2]=+g[qb>>2]*+g[rb>>2]+ +g[sb>>2]*+g[tb>>2];g[z>>2]=+g[qb>>2]*+g[tb>>2]-+g[sb>>2]*+g[rb>>2];g[gb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[ib>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[fb>>2]=+g[(c[m>>2]|0)+48>>2];g[hb>>2]=+g[(c[m>>2]|0)+52>>2];g[jb>>2]=+g[fb>>2]*+g[gb>>2]+ +g[hb>>2]*+g[ib>>2];g[w>>2]=+g[fb>>2]*+g[ib>>2]-+g[hb>>2]*+g[gb>>2];g[mb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[ob>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[lb>>2]=+g[(c[m>>2]|0)+56>>2];g[nb>>2]=+g[(c[m>>2]|0)+60>>2];g[pb>>2]=+g[lb>>2]*+g[mb>>2]+ +g[nb>>2]*+g[ob>>2];g[y>>2]=+g[lb>>2]*+g[ob>>2]-+g[nb>>2]*+g[mb>>2];g[kb>>2]=+g[eb>>2]-+g[jb>>2];g[vb>>2]=+g[pb>>2]-+g[ub>>2];g[wb>>2]=+g[kb>>2]+ +g[vb>>2];g[na>>2]=+g[v>>2]+ +g[w>>2];g[oa>>2]=+g[y>>2]+ +g[z>>2];g[ta>>2]=+g[na>>2]+ +g[oa>>2];g[Ya>>2]=+g[eb>>2]+ +g[jb>>2];g[Za>>2]=+g[pb>>2]+ +g[ub>>2];g[_a>>2]=+g[Ya>>2]+ +g[Za>>2];g[x>>2]=+g[v>>2]-+g[w>>2];g[A>>2]=+g[y>>2]-+g[z>>2];g[M>>2]=+g[x>>2]+ +g[A>>2];g[s>>2]=(+g[wb>>2]-+g[Va>>2])*.55901700258255;g[Wa>>2]=+g[wb>>2]+ +g[Va>>2];g[t>>2]=+g[Ba>>2]-+g[Wa>>2]*.25;g[Y>>2]=+g[x>>2]-+g[A>>2];g[da>>2]=+g[$>>2]-+g[ca>>2];g[ea>>2]=+g[Y>>2]*.9510565400123596+ +g[da>>2]*.5877852439880371;g[ga>>2]=+g[da>>2]*.9510565400123596-+g[Y>>2]*.5877852439880371;g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Ba>>2]+ +g[Wa>>2];g[fa>>2]=+g[t>>2]-+g[s>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[fa>>2]-+g[ga>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[fa>>2]+ +g[ga>>2];g[u>>2]=+g[s>>2]+ +g[t>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[u>>2]-+g[ea>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[u>>2]+ +g[ea>>2];g[O>>2]=(+g[M>>2]-+g[N>>2])*.55901700258255;g[Q>>2]=+g[M>>2]+ +g[N>>2];g[R>>2]=+g[P>>2]-+g[Q>>2]*.25;g[T>>2]=+g[kb>>2]-+g[vb>>2];g[U>>2]=+g[Ja>>2]-+g[Ua>>2];g[V>>2]=+g[T>>2]*.9510565400123596+ +g[U>>2]*.5877852439880371;g[X>>2]=+g[U>>2]*.9510565400123596-+g[T>>2]*.5877852439880371;g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Q>>2]+ +g[P>>2];g[W>>2]=+g[R>>2]-+g[O>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[W>>2]-+g[X>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[X>>2]+ +g[W>>2];g[S>>2]=+g[O>>2]+ +g[R>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[S>>2]-+g[V>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[V>>2]+ +g[S>>2];g[ia>>2]=(+g[_a>>2]-+g[bb>>2])*.55901700258255;g[r>>2]=+g[_a>>2]+ +g[bb>>2];g[ha>>2]=+g[Xa>>2]-+g[r>>2]*.25;g[ma>>2]=+g[ka>>2]-+g[la>>2];g[pa>>2]=+g[na>>2]-+g[oa>>2];g[qa>>2]=+g[ma>>2]*.9510565400123596-+g[pa>>2]*.5877852439880371;g[sa>>2]=+g[pa>>2]*.9510565400123596+ +g[ma>>2]*.5877852439880371;g[c[k>>2]>>2]=+g[Xa>>2]+ +g[r>>2];g[ra>>2]=+g[ia>>2]+ +g[ha>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[ra>>2]-+g[sa>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[ra>>2]+ +g[sa>>2];g[ja>>2]=+g[ha>>2]-+g[ia>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ja>>2]-+g[qa>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[ja>>2]+ +g[qa>>2];g[I>>2]=(+g[ta>>2]-+g[ua>>2])*.55901700258255;g[va>>2]=+g[ta>>2]+ +g[ua>>2];g[H>>2]=+g[D>>2]-+g[va>>2]*.25;g[E>>2]=+g[$a>>2]-+g[ab>>2];g[F>>2]=+g[Ya>>2]-+g[Za>>2];g[G>>2]=+g[E>>2]*.9510565400123596-+g[F>>2]*.5877852439880371;g[K>>2]=+g[F>>2]*.9510565400123596+ +g[E>>2]*.5877852439880371;g[c[l>>2]>>2]=+g[va>>2]+ +g[D>>2];g[L>>2]=+g[I>>2]+ +g[H>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[K>>2]+ +g[L>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[L>>2]-+g[K>>2];g[J>>2]=+g[H>>2]-+g[I>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[G>>2]+ +g[J>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[J>>2]-+g[G>>2];c[Cb>>2]=(c[Cb>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+72;c[n>>2]=c[n>>2]^c[2998]}i=Db;return}function bj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,2,1672);i=b;return}function cj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0;Xb=i;i=i+608|0;k=Xb+596|0;l=Xb+592|0;m=Xb+588|0;n=Xb+584|0;Yb=Xb+580|0;o=Xb+576|0;p=Xb+572|0;Wb=Xb+560|0;q=Xb+556|0;X=Xb+552|0;z=Xb+548|0;Ba=Xb+544|0;yb=Xb+540|0;w=Xb+536|0;W=Xb+532|0;Ca=Xb+528|0;nb=Xb+524|0;F=Xb+520|0;na=Xb+516|0;E=Xb+512|0;t=Xb+508|0;ka=Xb+504|0;G=Xb+500|0;H=Xb+496|0;Eb=Xb+492|0;T=Xb+488|0;ca=Xb+484|0;Ea=Xb+480|0;Pb=Xb+476|0;$=Xb+472|0;U=Xb+468|0;Fa=Xb+464|0;Ya=Xb+460|0;ya=Xb+456|0;ia=Xb+452|0;xa=Xb+448|0;hb=Xb+444|0;fa=Xb+440|0;B=Xb+436|0;C=Xb+432|0;Ua=Xb+428|0;x=Xb+424|0;xb=Xb+420|0;y=Xb+416|0;Ra=Xb+412|0;Ta=Xb+408|0;za=Xb+404|0;Sa=Xb+400|0;Wa=Xb+396|0;wb=Xb+392|0;Va=Xb+388|0;Xa=Xb+384|0;s=Xb+380|0;ma=Xb+376|0;sb=Xb+372|0;la=Xb+368|0;kb=Xb+364|0;mb=Xb+360|0;jb=Xb+356|0;lb=Xb+352|0;ub=Xb+348|0;r=Xb+344|0;tb=Xb+340|0;vb=Xb+336|0;pb=Xb+332|0;rb=Xb+328|0;ob=Xb+324|0;qb=Xb+320|0;Ob=Xb+316|0;ba=Xb+312|0;Jb=Xb+308|0;aa=Xb+304|0;Bb=Xb+300|0;Db=Xb+296|0;Ab=Xb+292|0;Cb=Xb+288|0;Lb=Xb+284|0;Nb=Xb+280|0;Kb=Xb+276|0;Mb=Xb+272|0;Gb=Xb+268|0;Ib=Xb+264|0;Fb=Xb+260|0;Hb=Xb+256|0;gb=Xb+252|0;ha=Xb+248|0;bb=Xb+244|0;ga=Xb+240|0;Tb=Xb+236|0;Vb=Xb+232|0;Sb=Xb+228|0;Ub=Xb+224|0;db=Xb+220|0;fb=Xb+216|0;cb=Xb+212|0;eb=Xb+208|0;_a=Xb+204|0;ab=Xb+200|0;Za=Xb+196|0;$a=Xb+192|0;Rb=Xb+188|0;O=Xb+184|0;Z=Xb+180|0;Aa=Xb+176|0;v=Xb+172|0;_=Xb+168|0;R=Xb+164|0;S=Xb+160|0;zb=Xb+156|0;Qb=Xb+152|0;V=Xb+148|0;Y=Xb+144|0;ib=Xb+140|0;u=Xb+136|0;P=Xb+132|0;Q=Xb+128|0;sa=Xb+124|0;wa=Xb+120|0;Ha=Xb+116|0;Ka=Xb+112|0;va=Xb+108|0;Ia=Xb+104|0;J=Xb+100|0;Ja=Xb+96|0;qa=Xb+92|0;ra=Xb+88|0;Da=Xb+84|0;Ga=Xb+80|0;ta=Xb+76|0;ua=Xb+72|0;D=Xb+68|0;I=Xb+64|0;ea=Xb+60|0;K=Xb+56|0;Na=Xb+52|0;Pa=Xb+48|0;pa=Xb+44|0;Qa=Xb+40|0;N=Xb+36|0;Oa=Xb+32|0;A=Xb+28|0;da=Xb+24|0;La=Xb+20|0;Ma=Xb+16|0;ja=Xb+12|0;oa=Xb+8|0;L=Xb+4|0;M=Xb;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Yb>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Xb+568>>2]=.5;g[Xb+564>>2]=.8660253882408142;c[Wb>>2]=c[Yb>>2];c[m>>2]=(c[m>>2]|0)+((c[Yb>>2]|0)*22<<2);while(1){if((c[Wb>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[X>>2]=+g[c[l>>2]>>2];g[Ra>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Ta>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[za>>2]=+g[(c[m>>2]|0)+24>>2];g[Sa>>2]=+g[(c[m>>2]|0)+28>>2];g[Ua>>2]=+g[za>>2]*+g[Ra>>2]+ +g[Sa>>2]*+g[Ta>>2];g[x>>2]=+g[za>>2]*+g[Ta>>2]-+g[Sa>>2]*+g[Ra>>2];g[Wa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[wb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Va>>2]=+g[(c[m>>2]|0)+56>>2];g[Xa>>2]=+g[(c[m>>2]|0)+60>>2];g[xb>>2]=+g[Va>>2]*+g[Wa>>2]+ +g[Xa>>2]*+g[wb>>2];g[y>>2]=+g[Va>>2]*+g[wb>>2]-+g[Xa>>2]*+g[Wa>>2];g[z>>2]=(+g[x>>2]-+g[y>>2])*.8660253882408142;g[Ba>>2]=(+g[xb>>2]-+g[Ua>>2])*.8660253882408142;g[yb>>2]=+g[Ua>>2]+ +g[xb>>2];g[w>>2]=+g[q>>2]-+g[yb>>2]*.5;g[W>>2]=+g[x>>2]+ +g[y>>2];g[Ca>>2]=+g[X>>2]-+g[W>>2]*.5;g[kb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[mb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[jb>>2]=+g[(c[m>>2]|0)+64>>2];g[lb>>2]=+g[(c[m>>2]|0)+68>>2];g[nb>>2]=+g[jb>>2]*+g[kb>>2]+ +g[lb>>2]*+g[mb>>2];g[F>>2]=+g[jb>>2]*+g[mb>>2]-+g[lb>>2]*+g[kb>>2];g[ub>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[r>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[tb>>2]=+g[(c[m>>2]|0)+32>>2];g[vb>>2]=+g[(c[m>>2]|0)+36>>2];g[s>>2]=+g[tb>>2]*+g[ub>>2]+ +g[vb>>2]*+g[r>>2];g[ma>>2]=+g[tb>>2]*+g[r>>2]-+g[vb>>2]*+g[ub>>2];g[pb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[rb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[ob>>2]=+g[c[m>>2]>>2];g[qb>>2]=+g[(c[m>>2]|0)+4>>2];g[sb>>2]=+g[ob>>2]*+g[pb>>2]+ +g[qb>>2]*+g[rb>>2];g[la>>2]=+g[ob>>2]*+g[rb>>2]-+g[qb>>2]*+g[pb>>2];g[na>>2]=(+g[la>>2]-+g[ma>>2])*.8660253882408142;g[E>>2]=(+g[s>>2]-+g[sb>>2])*.8660253882408142;g[t>>2]=+g[sb>>2]+ +g[s>>2];g[ka>>2]=+g[nb>>2]-+g[t>>2]*.5;g[G>>2]=+g[la>>2]+ +g[ma>>2];g[H>>2]=+g[F>>2]-+g[G>>2]*.5;g[Bb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Db>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Ab>>2]=+g[(c[m>>2]|0)+40>>2];g[Cb>>2]=+g[(c[m>>2]|0)+44>>2];g[Eb>>2]=+g[Ab>>2]*+g[Bb>>2]+ +g[Cb>>2]*+g[Db>>2];g[T>>2]=+g[Ab>>2]*+g[Db>>2]-+g[Cb>>2]*+g[Bb>>2];g[Lb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Nb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Kb>>2]=+g[(c[m>>2]|0)+8>>2];g[Mb>>2]=+g[(c[m>>2]|0)+12>>2];g[Ob>>2]=+g[Kb>>2]*+g[Lb>>2]+ +g[Mb>>2]*+g[Nb>>2];g[ba>>2]=+g[Kb>>2]*+g[Nb>>2]-+g[Mb>>2]*+g[Lb>>2];g[Gb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Ib>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Fb>>2]=+g[(c[m>>2]|0)+72>>2];g[Hb>>2]=+g[(c[m>>2]|0)+76>>2];g[Jb>>2]=+g[Fb>>2]*+g[Gb>>2]+ +g[Hb>>2]*+g[Ib>>2];g[aa>>2]=+g[Fb>>2]*+g[Ib>>2]-+g[Hb>>2]*+g[Gb>>2];g[ca>>2]=(+g[aa>>2]-+g[ba>>2])*.8660253882408142;g[Ea>>2]=(+g[Ob>>2]-+g[Jb>>2])*.8660253882408142;g[Pb>>2]=+g[Jb>>2]+ +g[Ob>>2];g[$>>2]=+g[Eb>>2]-+g[Pb>>2]*.5;g[U>>2]=+g[aa>>2]+ +g[ba>>2];g[Fa>>2]=+g[T>>2]-+g[U>>2]*.5;g[Tb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Vb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Sb>>2]=+g[(c[m>>2]|0)+16>>2];g[Ub>>2]=+g[(c[m>>2]|0)+20>>2];g[Ya>>2]=+g[Sb>>2]*+g[Tb>>2]+ +g[Ub>>2]*+g[Vb>>2];g[ya>>2]=+g[Sb>>2]*+g[Vb>>2]-+g[Ub>>2]*+g[Tb>>2];g[db>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[fb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[cb>>2]=+g[(c[m>>2]|0)+80>>2];g[eb>>2]=+g[(c[m>>2]|0)+84>>2];g[gb>>2]=+g[cb>>2]*+g[db>>2]+ +g[eb>>2]*+g[fb>>2];g[ha>>2]=+g[cb>>2]*+g[fb>>2]-+g[eb>>2]*+g[db>>2];g[_a>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[ab>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Za>>2]=+g[(c[m>>2]|0)+48>>2];g[$a>>2]=+g[(c[m>>2]|0)+52>>2];g[bb>>2]=+g[Za>>2]*+g[_a>>2]+ +g[$a>>2]*+g[ab>>2];g[ga>>2]=+g[Za>>2]*+g[ab>>2]-+g[$a>>2]*+g[_a>>2];g[ia>>2]=(+g[ga>>2]-+g[ha>>2])*.8660253882408142;g[xa>>2]=(+g[gb>>2]-+g[bb>>2])*.8660253882408142;g[hb>>2]=+g[bb>>2]+ +g[gb>>2];g[fa>>2]=+g[Ya>>2]-+g[hb>>2]*.5;g[B>>2]=+g[ga>>2]+ +g[ha>>2];g[C>>2]=+g[ya>>2]-+g[B>>2]*.5;g[zb>>2]=+g[q>>2]+ +g[yb>>2];g[Qb>>2]=+g[Eb>>2]+ +g[Pb>>2];g[Rb>>2]=+g[zb>>2]+ +g[Qb>>2];g[O>>2]=+g[zb>>2]-+g[Qb>>2];g[V>>2]=+g[T>>2]+ +g[U>>2];g[Y>>2]=+g[W>>2]+ +g[X>>2];g[Z>>2]=+g[V>>2]+ +g[Y>>2];g[Aa>>2]=+g[Y>>2]-+g[V>>2];g[ib>>2]=+g[Ya>>2]+ +g[hb>>2];g[u>>2]=+g[nb>>2]+ +g[t>>2];g[v>>2]=+g[ib>>2]+ +g[u>>2];g[_>>2]=+g[ib>>2]-+g[u>>2];g[P>>2]=+g[ya>>2]+ +g[B>>2];g[Q>>2]=+g[F>>2]+ +g[G>>2];g[R>>2]=+g[P>>2]-+g[Q>>2];g[S>>2]=+g[P>>2]+ +g[Q>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Rb>>2]-+g[v>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Z>>2]-+g[S>>2];g[c[k>>2]>>2]=+g[Rb>>2]+ +g[v>>2];g[c[l>>2]>>2]=+g[S>>2]+ +g[Z>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[O>>2]-+g[R>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[_>>2]+ +g[Aa>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[O>>2]+ +g[R>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Aa>>2]-+g[_>>2];g[qa>>2]=+g[w>>2]+ +g[z>>2];g[ra>>2]=+g[$>>2]+ +g[ca>>2];g[sa>>2]=+g[qa>>2]+ +g[ra>>2];g[wa>>2]=+g[qa>>2]-+g[ra>>2];g[Da>>2]=+g[Ba>>2]+ +g[Ca>>2];g[Ga>>2]=+g[Ea>>2]+ +g[Fa>>2];g[Ha>>2]=+g[Da>>2]-+g[Ga>>2];g[Ka>>2]=+g[Ga>>2]+ +g[Da>>2];g[ta>>2]=+g[fa>>2]+ +g[ia>>2];g[ua>>2]=+g[ka>>2]+ +g[na>>2];g[va>>2]=+g[ta>>2]+ +g[ua>>2];g[Ia>>2]=+g[ta>>2]-+g[ua>>2];g[D>>2]=+g[xa>>2]+ +g[C>>2];g[I>>2]=+g[E>>2]+ +g[H>>2];g[J>>2]=+g[D>>2]-+g[I>>2];g[Ja>>2]=+g[D>>2]+ +g[I>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[sa>>2]-+g[va>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Ka>>2]-+g[Ja>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[sa>>2]+ +g[va>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Ja>>2]+ +g[Ka>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[wa>>2]-+g[J>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Ia>>2]+ +g[Ha>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[wa>>2]+ +g[J>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[Ha>>2]-+g[Ia>>2];g[A>>2]=+g[w>>2]-+g[z>>2];g[da>>2]=+g[$>>2]-+g[ca>>2];g[ea>>2]=+g[A>>2]+ +g[da>>2];g[K>>2]=+g[A>>2]-+g[da>>2];g[La>>2]=+g[Fa>>2]-+g[Ea>>2];g[Ma>>2]=+g[Ca>>2]-+g[Ba>>2];g[Na>>2]=+g[La>>2]+ +g[Ma>>2];g[Pa>>2]=+g[Ma>>2]-+g[La>>2];g[ja>>2]=+g[fa>>2]-+g[ia>>2];g[oa>>2]=+g[ka>>2]-+g[na>>2];g[pa>>2]=+g[ja>>2]+ +g[oa>>2];g[Qa>>2]=+g[ja>>2]-+g[oa>>2];g[L>>2]=+g[C>>2]-+g[xa>>2];g[M>>2]=+g[H>>2]-+g[E>>2];g[N>>2]=+g[L>>2]-+g[M>>2];g[Oa>>2]=+g[L>>2]+ +g[M>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ea>>2]-+g[pa>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Na>>2]-+g[Oa>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[ea>>2]+ +g[pa>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Oa>>2]+ +g[Na>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[K>>2]-+g[N>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Qa>>2]+ +g[Pa>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[K>>2]+ +g[N>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Pa>>2]-+g[Qa>>2];c[Wb>>2]=(c[Wb>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+88;c[n>>2]=c[n>>2]^c[2998]}i=Xb;return}function dj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,3,1736);i=b;return} +function ej(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0;nd=i;i=i+912|0;k=nd+900|0;l=nd+896|0;m=nd+892|0;n=nd+888|0;od=nd+884|0;o=nd+880|0;p=nd+876|0;md=nd+848|0;pa=nd+844|0;Nb=nd+840|0;Rc=nd+836|0;ma=nd+832|0;ab=nd+828|0;Ob=nd+824|0;u=nd+820|0;ja=nd+816|0;ka=nd+812|0;Oa=nd+808|0;Pa=nd+804|0;Ya=nd+800|0;I=nd+796|0;U=nd+792|0;sb=nd+788|0;Bb=nd+784|0;nb=nd+780|0;Ab=nd+776|0;N=nd+772|0;V=nd+768|0;gd=nd+764|0;zc=nd+760|0;Ac=nd+756|0;Ra=nd+752|0;Sa=nd+748|0;Xa=nd+744|0;va=nd+740|0;R=nd+736|0;Ja=nd+732|0;Eb=nd+728|0;Ea=nd+724|0;Db=nd+720|0;C=nd+716|0;S=nd+712|0;q=nd+708|0;$a=nd+704|0;kc=nd+700|0;na=nd+696|0;Pc=nd+692|0;oa=nd+688|0;Qc=nd+684|0;_a=nd+680|0;Ib=nd+676|0;jc=nd+672|0;za=nd+668|0;ic=nd+664|0;mc=nd+660|0;Oc=nd+656|0;lc=nd+652|0;nc=nd+648|0;Fc=nd+644|0;kb=nd+640|0;z=nd+636|0;pb=nd+632|0;Kc=nd+628|0;F=nd+624|0;s=nd+620|0;G=nd+616|0;t=nd+612|0;lb=nd+608|0;ca=nd+604|0;K=nd+600|0;ha=nd+596|0;L=nd+592|0;ia=nd+588|0;qb=nd+584|0;Cc=nd+580|0;Ec=nd+576|0;Bc=nd+572|0;Dc=nd+568|0;w=nd+564|0;y=nd+560|0;v=nd+556|0;x=nd+552|0;Hc=nd+548|0;Jc=nd+544|0;Gc=nd+540|0;Ic=nd+536|0;Mc=nd+532|0;r=nd+528|0;Lc=nd+524|0;Nc=nd+520|0;$=nd+516|0;ba=nd+512|0;A=nd+508|0;aa=nd+504|0;ea=nd+500|0;ga=nd+496|0;da=nd+492|0;fa=nd+488|0;E=nd+484|0;H=nd+480|0;ob=nd+476|0;rb=nd+472|0;jb=nd+468|0;mb=nd+464|0;J=nd+460|0;M=nd+456|0;Wc=nd+452|0;Ba=nd+448|0;ld=nd+444|0;Ga=nd+440|0;$c=nd+436|0;sa=nd+432|0;ed=nd+428|0;ta=nd+424|0;fd=nd+420|0;Ca=nd+416|0;sc=nd+412|0;xa=nd+408|0;xc=nd+404|0;ya=nd+400|0;yc=nd+396|0;Ha=nd+392|0;Tc=nd+388|0;Vc=nd+384|0;Sc=nd+380|0;Uc=nd+376|0;id=nd+372|0;kd=nd+368|0;hd=nd+364|0;jd=nd+360|0;Yc=nd+356|0;_c=nd+352|0;Xc=nd+348|0;Zc=nd+344|0;bd=nd+340|0;dd=nd+336|0;ad=nd+332|0;cd=nd+328|0;pc=nd+324|0;rc=nd+320|0;oc=nd+316|0;qc=nd+312|0;uc=nd+308|0;wc=nd+304|0;tc=nd+300|0;vc=nd+296|0;ra=nd+292|0;ua=nd+288|0;Fa=nd+284|0;Ia=nd+280|0;Aa=nd+276|0;Da=nd+272|0;wa=nd+268|0;B=nd+264|0;Ma=nd+260|0;la=nd+256|0;La=nd+252|0;Ua=nd+248|0;Wa=nd+244|0;Qa=nd+240|0;Ta=nd+236|0;Va=nd+232|0;Na=nd+228|0;cb=nd+224|0;Za=nd+220|0;bb=nd+216|0;gb=nd+212|0;Jb=nd+208|0;eb=nd+204|0;fb=nd+200|0;hb=nd+196|0;db=nd+192|0;Gb=nd+188|0;Ka=nd+184|0;qa=nd+180|0;P=nd+176|0;xb=nd+172|0;yb=nd+168|0;Hb=nd+164|0;zb=nd+160|0;Cb=nd+156|0;Fb=nd+152|0;D=nd+148|0;O=nd+144|0;_b=nd+140|0;hc=nd+136|0;$b=nd+132|0;cc=nd+128|0;dc=nd+124|0;ec=nd+120|0;gc=nd+116|0;fc=nd+112|0;Yb=nd+108|0;Zb=nd+104|0;ac=nd+100|0;bc=nd+96|0;Vb=nd+92|0;Wb=nd+88|0;Pb=nd+84|0;Qb=nd+80|0;Mb=nd+76|0;Rb=nd+72|0;Xb=nd+68|0;Sb=nd+64|0;Tb=nd+60|0;Ub=nd+56|0;Kb=nd+52|0;Lb=nd+48|0;ub=nd+44|0;wb=nd+40|0;Q=nd+36|0;X=nd+32|0;Y=nd+28|0;Z=nd+24|0;vb=nd+20|0;_=nd+16|0;ib=nd+12|0;tb=nd+8|0;T=nd+4|0;W=nd;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[od>>2]=f;c[o>>2]=h;c[p>>2]=j;g[nd+872>>2]=.5877852439880371;g[nd+868>>2]=.9510565400123596;g[nd+864>>2]=.25;g[nd+860>>2]=.55901700258255;g[nd+856>>2]=.5;g[nd+852>>2]=.8660253882408142;c[md>>2]=c[od>>2];c[m>>2]=(c[m>>2]|0)+((c[od>>2]|0)*28<<2);while(1){if((c[md>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[$a>>2]=+g[c[l>>2]>>2];g[Ib>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[jc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[za>>2]=+g[(c[m>>2]|0)+32>>2];g[ic>>2]=+g[(c[m>>2]|0)+36>>2];g[kc>>2]=+g[za>>2]*+g[Ib>>2]+ +g[ic>>2]*+g[jc>>2];g[na>>2]=+g[za>>2]*+g[jc>>2]-+g[ic>>2]*+g[Ib>>2];g[mc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Oc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[lc>>2]=+g[(c[m>>2]|0)+72>>2];g[nc>>2]=+g[(c[m>>2]|0)+76>>2];g[Pc>>2]=+g[lc>>2]*+g[mc>>2]+ +g[nc>>2]*+g[Oc>>2];g[oa>>2]=+g[lc>>2]*+g[Oc>>2]-+g[nc>>2]*+g[mc>>2];g[pa>>2]=(+g[na>>2]-+g[oa>>2])*.8660253882408142;g[Nb>>2]=(+g[Pc>>2]-+g[kc>>2])*.8660253882408142;g[Qc>>2]=+g[kc>>2]+ +g[Pc>>2];g[Rc>>2]=+g[q>>2]+ +g[Qc>>2];g[ma>>2]=+g[q>>2]-+g[Qc>>2]*.5;g[_a>>2]=+g[na>>2]+ +g[oa>>2];g[ab>>2]=+g[_a>>2]+ +g[$a>>2];g[Ob>>2]=+g[$a>>2]-+g[_a>>2]*.5;g[Cc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Ec>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Bc>>2]=+g[(c[m>>2]|0)+40>>2];g[Dc>>2]=+g[(c[m>>2]|0)+44>>2];g[Fc>>2]=+g[Bc>>2]*+g[Cc>>2]+ +g[Dc>>2]*+g[Ec>>2];g[kb>>2]=+g[Bc>>2]*+g[Ec>>2]-+g[Dc>>2]*+g[Cc>>2];g[w>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[y>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[v>>2]=+g[(c[m>>2]|0)+64>>2];g[x>>2]=+g[(c[m>>2]|0)+68>>2];g[z>>2]=+g[v>>2]*+g[w>>2]+ +g[x>>2]*+g[y>>2];g[pb>>2]=+g[v>>2]*+g[y>>2]-+g[x>>2]*+g[w>>2];g[Hc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Jc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Gc>>2]=+g[(c[m>>2]|0)+80>>2];g[Ic>>2]=+g[(c[m>>2]|0)+84>>2];g[Kc>>2]=+g[Gc>>2]*+g[Hc>>2]+ +g[Ic>>2]*+g[Jc>>2];g[F>>2]=+g[Gc>>2]*+g[Jc>>2]-+g[Ic>>2]*+g[Hc>>2];g[Mc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[r>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Lc>>2]=+g[c[m>>2]>>2];g[Nc>>2]=+g[(c[m>>2]|0)+4>>2];g[s>>2]=+g[Lc>>2]*+g[Mc>>2]+ +g[Nc>>2]*+g[r>>2];g[G>>2]=+g[Lc>>2]*+g[r>>2]-+g[Nc>>2]*+g[Mc>>2];g[t>>2]=+g[Kc>>2]+ +g[s>>2];g[lb>>2]=+g[F>>2]+ +g[G>>2];g[$>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[ba>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[A>>2]=+g[(c[m>>2]|0)+104>>2];g[aa>>2]=+g[(c[m>>2]|0)+108>>2];g[ca>>2]=+g[A>>2]*+g[$>>2]+ +g[aa>>2]*+g[ba>>2];g[K>>2]=+g[A>>2]*+g[ba>>2]-+g[aa>>2]*+g[$>>2];g[ea>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ga>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[da>>2]=+g[(c[m>>2]|0)+24>>2];g[fa>>2]=+g[(c[m>>2]|0)+28>>2];g[ha>>2]=+g[da>>2]*+g[ea>>2]+ +g[fa>>2]*+g[ga>>2];g[L>>2]=+g[da>>2]*+g[ga>>2]-+g[fa>>2]*+g[ea>>2];g[ia>>2]=+g[ca>>2]+ +g[ha>>2];g[qb>>2]=+g[K>>2]+ +g[L>>2];g[u>>2]=+g[Fc>>2]+ +g[t>>2];g[ja>>2]=+g[z>>2]+ +g[ia>>2];g[ka>>2]=+g[u>>2]+ +g[ja>>2];g[Oa>>2]=+g[kb>>2]+ +g[lb>>2];g[Pa>>2]=+g[pb>>2]+ +g[qb>>2];g[Ya>>2]=+g[Oa>>2]+ +g[Pa>>2];g[E>>2]=+g[Fc>>2]-+g[t>>2]*.5;g[H>>2]=(+g[F>>2]-+g[G>>2])*.8660253882408142;g[I>>2]=+g[E>>2]-+g[H>>2];g[U>>2]=+g[E>>2]+ +g[H>>2];g[ob>>2]=(+g[ha>>2]-+g[ca>>2])*.8660253882408142;g[rb>>2]=+g[pb>>2]-+g[qb>>2]*.5;g[sb>>2]=+g[ob>>2]+ +g[rb>>2];g[Bb>>2]=+g[rb>>2]-+g[ob>>2];g[jb>>2]=(+g[s>>2]-+g[Kc>>2])*.8660253882408142;g[mb>>2]=+g[kb>>2]-+g[lb>>2]*.5;g[nb>>2]=+g[jb>>2]+ +g[mb>>2];g[Ab>>2]=+g[mb>>2]-+g[jb>>2];g[J>>2]=+g[z>>2]-+g[ia>>2]*.5;g[M>>2]=(+g[K>>2]-+g[L>>2])*.8660253882408142;g[N>>2]=+g[J>>2]-+g[M>>2];g[V>>2]=+g[J>>2]+ +g[M>>2];g[Tc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Vc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Sc>>2]=+g[(c[m>>2]|0)+16>>2];g[Uc>>2]=+g[(c[m>>2]|0)+20>>2];g[Wc>>2]=+g[Sc>>2]*+g[Tc>>2]+ +g[Uc>>2]*+g[Vc>>2];g[Ba>>2]=+g[Sc>>2]*+g[Vc>>2]-+g[Uc>>2]*+g[Tc>>2];g[id>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[kd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[hd>>2]=+g[(c[m>>2]|0)+88>>2];g[jd>>2]=+g[(c[m>>2]|0)+92>>2];g[ld>>2]=+g[hd>>2]*+g[id>>2]+ +g[jd>>2]*+g[kd>>2];g[Ga>>2]=+g[hd>>2]*+g[kd>>2]-+g[jd>>2]*+g[id>>2];g[Yc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[_c>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Xc>>2]=+g[(c[m>>2]|0)+56>>2];g[Zc>>2]=+g[(c[m>>2]|0)+60>>2];g[$c>>2]=+g[Xc>>2]*+g[Yc>>2]+ +g[Zc>>2]*+g[_c>>2];g[sa>>2]=+g[Xc>>2]*+g[_c>>2]-+g[Zc>>2]*+g[Yc>>2];g[bd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[dd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[ad>>2]=+g[(c[m>>2]|0)+96>>2];g[cd>>2]=+g[(c[m>>2]|0)+100>>2];g[ed>>2]=+g[ad>>2]*+g[bd>>2]+ +g[cd>>2]*+g[dd>>2];g[ta>>2]=+g[ad>>2]*+g[dd>>2]-+g[cd>>2]*+g[bd>>2];g[fd>>2]=+g[$c>>2]+ +g[ed>>2];g[Ca>>2]=+g[sa>>2]+ +g[ta>>2];g[pc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[rc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[oc>>2]=+g[(c[m>>2]|0)+8>>2];g[qc>>2]=+g[(c[m>>2]|0)+12>>2];g[sc>>2]=+g[oc>>2]*+g[pc>>2]+ +g[qc>>2]*+g[rc>>2];g[xa>>2]=+g[oc>>2]*+g[rc>>2]-+g[qc>>2]*+g[pc>>2];g[uc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[wc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[tc>>2]=+g[(c[m>>2]|0)+48>>2];g[vc>>2]=+g[(c[m>>2]|0)+52>>2];g[xc>>2]=+g[tc>>2]*+g[uc>>2]+ +g[vc>>2]*+g[wc>>2];g[ya>>2]=+g[tc>>2]*+g[wc>>2]-+g[vc>>2]*+g[uc>>2];g[yc>>2]=+g[sc>>2]+ +g[xc>>2];g[Ha>>2]=+g[xa>>2]+ +g[ya>>2];g[gd>>2]=+g[Wc>>2]+ +g[fd>>2];g[zc>>2]=+g[ld>>2]+ +g[yc>>2];g[Ac>>2]=+g[gd>>2]+ +g[zc>>2];g[Ra>>2]=+g[Ba>>2]+ +g[Ca>>2];g[Sa>>2]=+g[Ga>>2]+ +g[Ha>>2];g[Xa>>2]=+g[Ra>>2]+ +g[Sa>>2];g[ra>>2]=+g[Wc>>2]-+g[fd>>2]*.5;g[ua>>2]=(+g[sa>>2]-+g[ta>>2])*.8660253882408142;g[va>>2]=+g[ra>>2]-+g[ua>>2];g[R>>2]=+g[ra>>2]+ +g[ua>>2];g[Fa>>2]=(+g[xc>>2]-+g[sc>>2])*.8660253882408142;g[Ia>>2]=+g[Ga>>2]-+g[Ha>>2]*.5;g[Ja>>2]=+g[Fa>>2]+ +g[Ia>>2];g[Eb>>2]=+g[Ia>>2]-+g[Fa>>2];g[Aa>>2]=(+g[ed>>2]-+g[$c>>2])*.8660253882408142;g[Da>>2]=+g[Ba>>2]-+g[Ca>>2]*.5;g[Ea>>2]=+g[Aa>>2]+ +g[Da>>2];g[Db>>2]=+g[Da>>2]-+g[Aa>>2];g[wa>>2]=+g[ld>>2]-+g[yc>>2]*.5;g[B>>2]=(+g[xa>>2]-+g[ya>>2])*.8660253882408142;g[C>>2]=+g[wa>>2]-+g[B>>2];g[S>>2]=+g[wa>>2]+ +g[B>>2];g[Ma>>2]=(+g[Ac>>2]-+g[ka>>2])*.55901700258255;g[la>>2]=+g[Ac>>2]+ +g[ka>>2];g[La>>2]=+g[Rc>>2]-+g[la>>2]*.25;g[Qa>>2]=+g[Oa>>2]-+g[Pa>>2];g[Ta>>2]=+g[Ra>>2]-+g[Sa>>2];g[Ua>>2]=+g[Qa>>2]*.9510565400123596-+g[Ta>>2]*.5877852439880371;g[Wa>>2]=+g[Ta>>2]*.9510565400123596+ +g[Qa>>2]*.5877852439880371;g[c[k>>2]>>2]=+g[Rc>>2]+ +g[la>>2];g[Va>>2]=+g[Ma>>2]+ +g[La>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Va>>2]-+g[Wa>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Va>>2]+ +g[Wa>>2];g[Na>>2]=+g[La>>2]-+g[Ma>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Na>>2]-+g[Ua>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Na>>2]+ +g[Ua>>2];g[cb>>2]=(+g[Xa>>2]-+g[Ya>>2])*.55901700258255;g[Za>>2]=+g[Xa>>2]+ +g[Ya>>2];g[bb>>2]=+g[ab>>2]-+g[Za>>2]*.25;g[eb>>2]=+g[u>>2]-+g[ja>>2];g[fb>>2]=+g[gd>>2]-+g[zc>>2];g[gb>>2]=+g[eb>>2]*.9510565400123596-+g[fb>>2]*.5877852439880371;g[Jb>>2]=+g[fb>>2]*.9510565400123596+ +g[eb>>2]*.5877852439880371;g[c[l>>2]>>2]=+g[Za>>2]+ +g[ab>>2];g[hb>>2]=+g[cb>>2]+ +g[bb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[hb>>2]-+g[Jb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Jb>>2]+ +g[hb>>2];g[db>>2]=+g[bb>>2]-+g[cb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[db>>2]-+g[gb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[gb>>2]+ +g[db>>2];g[Cb>>2]=+g[Ab>>2]-+g[Bb>>2];g[Fb>>2]=+g[Db>>2]-+g[Eb>>2];g[Gb>>2]=+g[Cb>>2]*.9510565400123596-+g[Fb>>2]*.5877852439880371;g[Ka>>2]=+g[Fb>>2]*.9510565400123596+ +g[Cb>>2]*.5877852439880371;g[qa>>2]=+g[ma>>2]-+g[pa>>2];g[D>>2]=+g[va>>2]+ +g[C>>2];g[O>>2]=+g[I>>2]+ +g[N>>2];g[P>>2]=+g[D>>2]+ +g[O>>2];g[xb>>2]=+g[qa>>2]-+g[P>>2]*.25;g[yb>>2]=(+g[D>>2]-+g[O>>2])*.55901700258255;g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[qa>>2]+ +g[P>>2];g[Hb>>2]=+g[yb>>2]+ +g[xb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Hb>>2]-+g[Ka>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Hb>>2]+ +g[Ka>>2];g[zb>>2]=+g[xb>>2]-+g[yb>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[zb>>2]-+g[Gb>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[zb>>2]+ +g[Gb>>2];g[Yb>>2]=+g[I>>2]-+g[N>>2];g[Zb>>2]=+g[va>>2]-+g[C>>2];g[_b>>2]=+g[Yb>>2]*.9510565400123596-+g[Zb>>2]*.5877852439880371;g[hc>>2]=+g[Zb>>2]*.9510565400123596+ +g[Yb>>2]*.5877852439880371;g[$b>>2]=+g[Ob>>2]-+g[Nb>>2];g[ac>>2]=+g[Db>>2]+ +g[Eb>>2];g[bc>>2]=+g[Ab>>2]+ +g[Bb>>2];g[cc>>2]=+g[ac>>2]+ +g[bc>>2];g[dc>>2]=+g[$b>>2]-+g[cc>>2]*.25;g[ec>>2]=(+g[ac>>2]-+g[bc>>2])*.55901700258255;g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[cc>>2]+ +g[$b>>2];g[gc>>2]=+g[ec>>2]+ +g[dc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[gc>>2]-+g[hc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[hc>>2]+ +g[gc>>2];g[fc>>2]=+g[dc>>2]-+g[ec>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[_b>>2]+ +g[fc>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[fc>>2]-+g[_b>>2];g[Tb>>2]=+g[R>>2]-+g[S>>2];g[Ub>>2]=+g[U>>2]-+g[V>>2];g[Vb>>2]=+g[Tb>>2]*.9510565400123596+ +g[Ub>>2]*.5877852439880371;g[Wb>>2]=+g[Ub>>2]*.9510565400123596-+g[Tb>>2]*.5877852439880371;g[Pb>>2]=+g[Nb>>2]+ +g[Ob>>2];g[Kb>>2]=+g[Ea>>2]+ +g[Ja>>2];g[Lb>>2]=+g[nb>>2]+ +g[sb>>2];g[Qb>>2]=+g[Kb>>2]+ +g[Lb>>2];g[Mb>>2]=(+g[Kb>>2]-+g[Lb>>2])*.55901700258255;g[Rb>>2]=+g[Pb>>2]-+g[Qb>>2]*.25;g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Qb>>2]+ +g[Pb>>2];g[Xb>>2]=+g[Rb>>2]-+g[Mb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Wb>>2]+ +g[Xb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Xb>>2]-+g[Wb>>2];g[Sb>>2]=+g[Mb>>2]+ +g[Rb>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[Sb>>2]-+g[Vb>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Vb>>2]+ +g[Sb>>2];g[ib>>2]=+g[Ea>>2]-+g[Ja>>2];g[tb>>2]=+g[nb>>2]-+g[sb>>2];g[ub>>2]=+g[ib>>2]*.9510565400123596+ +g[tb>>2]*.5877852439880371;g[wb>>2]=+g[tb>>2]*.9510565400123596-+g[ib>>2]*.5877852439880371;g[Q>>2]=+g[ma>>2]+ +g[pa>>2];g[T>>2]=+g[R>>2]+ +g[S>>2];g[W>>2]=+g[U>>2]+ +g[V>>2];g[X>>2]=+g[T>>2]+ +g[W>>2];g[Y>>2]=(+g[T>>2]-+g[W>>2])*.55901700258255;g[Z>>2]=+g[Q>>2]-+g[X>>2]*.25;g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Q>>2]+ +g[X>>2];g[vb>>2]=+g[Z>>2]-+g[Y>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[vb>>2]-+g[wb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[vb>>2]+ +g[wb>>2];g[_>>2]=+g[Y>>2]+ +g[Z>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[_>>2]-+g[ub>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[_>>2]+ +g[ub>>2];c[md>>2]=(c[md>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+112;c[n>>2]=c[n>>2]^c[2998]}i=nd;return}function fj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,4,1800);i=b;return}function gj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0;fd=i;i=i+864|0;k=fd+856|0;l=fd+852|0;m=fd+848|0;n=fd+844|0;gd=fd+840|0;o=fd+836|0;p=fd+832|0;ed=fd+816|0;dc=fd+812|0;Qb=fd+808|0;sa=fd+804|0;cb=fd+800|0;Oc=fd+796|0;Rb=fd+792|0;va=fd+788|0;$a=fd+784|0;_c=fd+780|0;Ab=fd+776|0;D=fd+772|0;kb=fd+768|0;lc=fd+764|0;Bb=fd+760|0;I=fd+756|0;lb=fd+752|0;ea=fd+748|0;pa=fd+744|0;La=fd+740|0;Ma=fd+736|0;Na=fd+732|0;Oa=fd+728|0;_=fd+724|0;rb=fd+720|0;Ea=fd+716|0;sb=fd+712|0;yc=fd+708|0;u=fd+704|0;Eb=fd+700|0;Fb=fd+696|0;Gb=fd+692|0;Hb=fd+688|0;P=fd+684|0;ob=fd+680|0;U=fd+676|0;pb=fd+672|0;q=fd+668|0;bb=fd+664|0;cc=fd+660|0;ab=fd+656|0;Ib=fd+652|0;bc=fd+648|0;za=fd+644|0;ac=fd+640|0;Ic=fd+636|0;ta=fd+632|0;Nc=fd+628|0;ua=fd+624|0;fc=fd+620|0;Hc=fd+616|0;ec=fd+612|0;Gc=fd+608|0;Kc=fd+604|0;Mc=fd+600|0;Jc=fd+596|0;Lc=fd+592|0;Uc=fd+588|0;xa=fd+584|0;Zc=fd+580|0;ya=fd+576|0;B=fd+572|0;C=fd+568|0;Rc=fd+564|0;Tc=fd+560|0;Qc=fd+556|0;Sc=fd+552|0;Wc=fd+548|0;Yc=fd+544|0;Vc=fd+540|0;Xc=fd+536|0;dd=fd+532|0;F=fd+528|0;kc=fd+524|0;G=fd+520|0;E=fd+516|0;H=fd+512|0;ad=fd+508|0;cd=fd+504|0;$c=fd+500|0;bd=fd+496|0;hc=fd+492|0;jc=fd+488|0;gc=fd+484|0;ic=fd+480|0;A=fd+476|0;Aa=fd+472|0;oa=fd+468|0;Y=fd+464|0;da=fd+460|0;Ba=fd+456|0;ja=fd+452|0;X=fd+448|0;x=fd+444|0;z=fd+440|0;w=fd+436|0;y=fd+432|0;la=fd+428|0;na=fd+424|0;ka=fd+420|0;ma=fd+416|0;aa=fd+412|0;ca=fd+408|0;$=fd+404|0;ba=fd+400|0;ga=fd+396|0;ia=fd+392|0;fa=fd+388|0;ha=fd+384|0;W=fd+380|0;Z=fd+376|0;Ca=fd+372|0;Da=fd+368|0;sc=fd+364|0;L=fd+360|0;t=fd+356|0;S=fd+352|0;xc=fd+348|0;M=fd+344|0;Dc=fd+340|0;R=fd+336|0;pc=fd+332|0;rc=fd+328|0;oc=fd+324|0;qc=fd+320|0;Fc=fd+316|0;s=fd+312|0;Ec=fd+308|0;r=fd+304|0;uc=fd+300|0;wc=fd+296|0;tc=fd+292|0;vc=fd+288|0;Ac=fd+284|0;Cc=fd+280|0;zc=fd+276|0;Bc=fd+272|0;N=fd+268|0;O=fd+264|0;Q=fd+260|0;T=fd+256|0;K=fd+252|0;Ha=fd+248|0;Zb=fd+244|0;$b=fd+240|0;Ga=fd+236|0;_b=fd+232|0;ib=fd+228|0;Wb=fd+224|0;wa=fd+220|0;J=fd+216|0;Xb=fd+212|0;Yb=fd+208|0;V=fd+204|0;Fa=fd+200|0;Ia=fd+196|0;Ja=fd+192|0;Db=fd+188|0;Ra=fd+184|0;Lb=fd+180|0;Nb=fd+176|0;Qa=fd+172|0;Mb=fd+168|0;Ua=fd+164|0;hb=fd+160|0;zb=fd+156|0;Cb=fd+152|0;Jb=fd+148|0;Kb=fd+144|0;Ka=fd+140|0;Pa=fd+136|0;Sa=fd+132|0;Ta=fd+128|0;nb=fd+124|0;vb=fd+120|0;Tb=fd+116|0;Vb=fd+112|0;ub=fd+108|0;Ub=fd+104|0;yb=fd+100|0;Ob=fd+96|0;jb=fd+92|0;mb=fd+88|0;Pb=fd+84|0;Sb=fd+80|0;qb=fd+76|0;tb=fd+72|0;wb=fd+68|0;xb=fd+64|0;nc=fd+60|0;Va=fd+56|0;eb=fd+52|0;gb=fd+48|0;ra=fd+44|0;fb=fd+40|0;Ya=fd+36|0;Za=fd+32|0;Pc=fd+28|0;mc=fd+24|0;_a=fd+20|0;db=fd+16|0;v=fd+12|0;qa=fd+8|0;Wa=fd+4|0;Xa=fd;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[gd>>2]=f;c[o>>2]=h;c[p>>2]=j;g[fd+828>>2]=.3826834261417389;g[fd+824>>2]=.9238795042037964;g[fd+820>>2]=.7071067690849304;c[ed>>2]=c[gd>>2];c[m>>2]=(c[m>>2]|0)+((c[gd>>2]|0)*30<<2);while(1){if((c[ed>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[bb>>2]=+g[c[l>>2]>>2];g[Ib>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[bc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[za>>2]=+g[(c[m>>2]|0)+56>>2];g[ac>>2]=+g[(c[m>>2]|0)+60>>2];g[cc>>2]=+g[za>>2]*+g[Ib>>2]+ +g[ac>>2]*+g[bc>>2];g[ab>>2]=+g[za>>2]*+g[bc>>2]-+g[ac>>2]*+g[Ib>>2];g[dc>>2]=+g[q>>2]+ +g[cc>>2];g[Qb>>2]=+g[bb>>2]-+g[ab>>2];g[sa>>2]=+g[q>>2]-+g[cc>>2];g[cb>>2]=+g[ab>>2]+ +g[bb>>2];g[fc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Hc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ec>>2]=+g[(c[m>>2]|0)+24>>2];g[Gc>>2]=+g[(c[m>>2]|0)+28>>2];g[Ic>>2]=+g[ec>>2]*+g[fc>>2]+ +g[Gc>>2]*+g[Hc>>2];g[ta>>2]=+g[ec>>2]*+g[Hc>>2]-+g[Gc>>2]*+g[fc>>2];g[Kc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Mc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Jc>>2]=+g[(c[m>>2]|0)+88>>2];g[Lc>>2]=+g[(c[m>>2]|0)+92>>2];g[Nc>>2]=+g[Jc>>2]*+g[Kc>>2]+ +g[Lc>>2]*+g[Mc>>2];g[ua>>2]=+g[Jc>>2]*+g[Mc>>2]-+g[Lc>>2]*+g[Kc>>2];g[Oc>>2]=+g[Ic>>2]+ +g[Nc>>2];g[Rb>>2]=+g[Ic>>2]-+g[Nc>>2];g[va>>2]=+g[ta>>2]-+g[ua>>2];g[$a>>2]=+g[ta>>2]+ +g[ua>>2];g[Rc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Tc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Qc>>2]=+g[(c[m>>2]|0)+8>>2];g[Sc>>2]=+g[(c[m>>2]|0)+12>>2];g[Uc>>2]=+g[Qc>>2]*+g[Rc>>2]+ +g[Sc>>2]*+g[Tc>>2];g[xa>>2]=+g[Qc>>2]*+g[Tc>>2]-+g[Sc>>2]*+g[Rc>>2];g[Wc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Yc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Vc>>2]=+g[(c[m>>2]|0)+72>>2];g[Xc>>2]=+g[(c[m>>2]|0)+76>>2];g[Zc>>2]=+g[Vc>>2]*+g[Wc>>2]+ +g[Xc>>2]*+g[Yc>>2];g[ya>>2]=+g[Vc>>2]*+g[Yc>>2]-+g[Xc>>2]*+g[Wc>>2];g[_c>>2]=+g[Uc>>2]+ +g[Zc>>2];g[Ab>>2]=+g[xa>>2]+ +g[ya>>2];g[B>>2]=+g[xa>>2]-+g[ya>>2];g[C>>2]=+g[Uc>>2]-+g[Zc>>2];g[D>>2]=+g[B>>2]-+g[C>>2];g[kb>>2]=+g[C>>2]+ +g[B>>2];g[ad>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[cd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[$c>>2]=+g[(c[m>>2]|0)+104>>2];g[bd>>2]=+g[(c[m>>2]|0)+108>>2];g[dd>>2]=+g[$c>>2]*+g[ad>>2]+ +g[bd>>2]*+g[cd>>2];g[F>>2]=+g[$c>>2]*+g[cd>>2]-+g[bd>>2]*+g[ad>>2];g[hc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[jc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[gc>>2]=+g[(c[m>>2]|0)+40>>2];g[ic>>2]=+g[(c[m>>2]|0)+44>>2];g[kc>>2]=+g[gc>>2]*+g[hc>>2]+ +g[ic>>2]*+g[jc>>2];g[G>>2]=+g[gc>>2]*+g[jc>>2]-+g[ic>>2]*+g[hc>>2];g[lc>>2]=+g[dd>>2]+ +g[kc>>2];g[Bb>>2]=+g[F>>2]+ +g[G>>2];g[E>>2]=+g[dd>>2]-+g[kc>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[I>>2]=+g[E>>2]+ +g[H>>2];g[lb>>2]=+g[E>>2]-+g[H>>2];g[x>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[z>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[w>>2]=+g[(c[m>>2]|0)+112>>2];g[y>>2]=+g[(c[m>>2]|0)+116>>2];g[A>>2]=+g[w>>2]*+g[x>>2]+ +g[y>>2]*+g[z>>2];g[Aa>>2]=+g[w>>2]*+g[z>>2]-+g[y>>2]*+g[x>>2];g[la>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[na>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[ka>>2]=+g[(c[m>>2]|0)+80>>2];g[ma>>2]=+g[(c[m>>2]|0)+84>>2];g[oa>>2]=+g[ka>>2]*+g[la>>2]+ +g[ma>>2]*+g[na>>2];g[Y>>2]=+g[ka>>2]*+g[na>>2]-+g[ma>>2]*+g[la>>2];g[aa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[ca>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[$>>2]=+g[(c[m>>2]|0)+48>>2];g[ba>>2]=+g[(c[m>>2]|0)+52>>2];g[da>>2]=+g[$>>2]*+g[aa>>2]+ +g[ba>>2]*+g[ca>>2];g[Ba>>2]=+g[$>>2]*+g[ca>>2]-+g[ba>>2]*+g[aa>>2];g[ga>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ia>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[fa>>2]=+g[(c[m>>2]|0)+16>>2];g[ha>>2]=+g[(c[m>>2]|0)+20>>2];g[ja>>2]=+g[fa>>2]*+g[ga>>2]+ +g[ha>>2]*+g[ia>>2];g[X>>2]=+g[fa>>2]*+g[ia>>2]-+g[ha>>2]*+g[ga>>2];g[ea>>2]=+g[A>>2]+ +g[da>>2];g[pa>>2]=+g[ja>>2]+ +g[oa>>2];g[La>>2]=+g[ea>>2]-+g[pa>>2];g[Ma>>2]=+g[Aa>>2]+ +g[Ba>>2];g[Na>>2]=+g[X>>2]+ +g[Y>>2];g[Oa>>2]=+g[Ma>>2]-+g[Na>>2];g[W>>2]=+g[A>>2]-+g[da>>2];g[Z>>2]=+g[X>>2]-+g[Y>>2];g[_>>2]=+g[W>>2]-+g[Z>>2];g[rb>>2]=+g[W>>2]+ +g[Z>>2];g[Ca>>2]=+g[Aa>>2]-+g[Ba>>2];g[Da>>2]=+g[ja>>2]-+g[oa>>2];g[Ea>>2]=+g[Ca>>2]+ +g[Da>>2];g[sb>>2]=+g[Ca>>2]-+g[Da>>2];g[pc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[rc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[oc>>2]=+g[c[m>>2]>>2];g[qc>>2]=+g[(c[m>>2]|0)+4>>2];g[sc>>2]=+g[oc>>2]*+g[pc>>2]+ +g[qc>>2]*+g[rc>>2];g[L>>2]=+g[oc>>2]*+g[rc>>2]-+g[qc>>2]*+g[pc>>2];g[Fc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[s>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Ec>>2]=+g[(c[m>>2]|0)+96>>2];g[r>>2]=+g[(c[m>>2]|0)+100>>2];g[t>>2]=+g[Ec>>2]*+g[Fc>>2]+ +g[r>>2]*+g[s>>2];g[S>>2]=+g[Ec>>2]*+g[s>>2]-+g[r>>2]*+g[Fc>>2];g[uc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[wc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[tc>>2]=+g[(c[m>>2]|0)+64>>2];g[vc>>2]=+g[(c[m>>2]|0)+68>>2];g[xc>>2]=+g[tc>>2]*+g[uc>>2]+ +g[vc>>2]*+g[wc>>2];g[M>>2]=+g[tc>>2]*+g[wc>>2]-+g[vc>>2]*+g[uc>>2];g[Ac>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Cc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[zc>>2]=+g[(c[m>>2]|0)+32>>2];g[Bc>>2]=+g[(c[m>>2]|0)+36>>2];g[Dc>>2]=+g[zc>>2]*+g[Ac>>2]+ +g[Bc>>2]*+g[Cc>>2];g[R>>2]=+g[zc>>2]*+g[Cc>>2]-+g[Bc>>2]*+g[Ac>>2];g[yc>>2]=+g[sc>>2]+ +g[xc>>2];g[u>>2]=+g[Dc>>2]+ +g[t>>2];g[Eb>>2]=+g[yc>>2]-+g[u>>2];g[Fb>>2]=+g[L>>2]+ +g[M>>2];g[Gb>>2]=+g[R>>2]+ +g[S>>2];g[Hb>>2]=+g[Fb>>2]-+g[Gb>>2];g[N>>2]=+g[L>>2]-+g[M>>2];g[O>>2]=+g[Dc>>2]-+g[t>>2];g[P>>2]=+g[N>>2]+ +g[O>>2];g[ob>>2]=+g[N>>2]-+g[O>>2];g[Q>>2]=+g[sc>>2]-+g[xc>>2];g[T>>2]=+g[R>>2]-+g[S>>2];g[U>>2]=+g[Q>>2]-+g[T>>2];g[pb>>2]=+g[Q>>2]+ +g[T>>2];g[wa>>2]=+g[sa>>2]-+g[va>>2];g[J>>2]=(+g[D>>2]-+g[I>>2])*.7071067690849304;g[K>>2]=+g[wa>>2]+ +g[J>>2];g[Ha>>2]=+g[wa>>2]-+g[J>>2];g[Xb>>2]=(+g[lb>>2]-+g[kb>>2])*.7071067690849304;g[Yb>>2]=+g[Rb>>2]+ +g[Qb>>2];g[Zb>>2]=+g[Xb>>2]+ +g[Yb>>2];g[$b>>2]=+g[Yb>>2]-+g[Xb>>2];g[V>>2]=+g[P>>2]*.9238795042037964+ +g[U>>2]*.3826834261417389;g[Fa>>2]=+g[_>>2]*.3826834261417389-+g[Ea>>2]*.9238795042037964;g[Ga>>2]=+g[V>>2]+ +g[Fa>>2];g[_b>>2]=+g[Fa>>2]-+g[V>>2];g[Ia>>2]=+g[P>>2]*.3826834261417389-+g[U>>2]*.9238795042037964;g[Ja>>2]=+g[Ea>>2]*.3826834261417389+ +g[_>>2]*.9238795042037964;g[ib>>2]=+g[Ia>>2]-+g[Ja>>2];g[Wb>>2]=+g[Ia>>2]+ +g[Ja>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[K>>2]-+g[Ga>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Zb>>2]-+g[Wb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[K>>2]+ +g[Ga>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Wb>>2]+ +g[Zb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Ha>>2]-+g[ib>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[$b>>2]-+g[_b>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Ha>>2]+ +g[ib>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[_b>>2]+ +g[$b>>2];g[zb>>2]=+g[dc>>2]-+g[Oc>>2];g[Cb>>2]=+g[Ab>>2]-+g[Bb>>2];g[Db>>2]=+g[zb>>2]+ +g[Cb>>2];g[Ra>>2]=+g[zb>>2]-+g[Cb>>2];g[Jb>>2]=+g[lc>>2]-+g[_c>>2];g[Kb>>2]=+g[cb>>2]-+g[$a>>2];g[Lb>>2]=+g[Jb>>2]+ +g[Kb>>2];g[Nb>>2]=+g[Kb>>2]-+g[Jb>>2];g[Ka>>2]=+g[Eb>>2]+ +g[Hb>>2];g[Pa>>2]=+g[La>>2]-+g[Oa>>2];g[Qa>>2]=(+g[Ka>>2]+ +g[Pa>>2])*.7071067690849304;g[Mb>>2]=(+g[Pa>>2]-+g[Ka>>2])*.7071067690849304;g[Sa>>2]=+g[Hb>>2]-+g[Eb>>2];g[Ta>>2]=+g[La>>2]+ +g[Oa>>2];g[Ua>>2]=(+g[Sa>>2]-+g[Ta>>2])*.7071067690849304;g[hb>>2]=(+g[Sa>>2]+ +g[Ta>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Db>>2]-+g[Qa>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Lb>>2]-+g[hb>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Db>>2]+ +g[Qa>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[hb>>2]+ +g[Lb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Ra>>2]-+g[Ua>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Nb>>2]-+g[Mb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Ra>>2]+ +g[Ua>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Mb>>2]+ +g[Nb>>2];g[jb>>2]=+g[sa>>2]+ +g[va>>2];g[mb>>2]=(+g[kb>>2]+ +g[lb>>2])*.7071067690849304;g[nb>>2]=+g[jb>>2]+ +g[mb>>2];g[vb>>2]=+g[jb>>2]-+g[mb>>2];g[Pb>>2]=(+g[D>>2]+ +g[I>>2])*.7071067690849304;g[Sb>>2]=+g[Qb>>2]-+g[Rb>>2];g[Tb>>2]=+g[Pb>>2]+ +g[Sb>>2];g[Vb>>2]=+g[Sb>>2]-+g[Pb>>2];g[qb>>2]=+g[ob>>2]*.3826834261417389+ +g[pb>>2]*.9238795042037964;g[tb>>2]=+g[rb>>2]*.9238795042037964-+g[sb>>2]*.3826834261417389;g[ub>>2]=+g[qb>>2]+ +g[tb>>2];g[Ub>>2]=+g[tb>>2]-+g[qb>>2];g[wb>>2]=+g[ob>>2]*.9238795042037964-+g[pb>>2]*.3826834261417389;g[xb>>2]=+g[sb>>2]*.9238795042037964+ +g[rb>>2]*.3826834261417389;g[yb>>2]=+g[wb>>2]-+g[xb>>2];g[Ob>>2]=+g[wb>>2]+ +g[xb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[nb>>2]-+g[ub>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Tb>>2]-+g[Ob>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[nb>>2]+ +g[ub>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[Ob>>2]+ +g[Tb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[vb>>2]-+g[yb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Vb>>2]-+g[Ub>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[vb>>2]+ +g[yb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Ub>>2]+ +g[Vb>>2];g[Pc>>2]=+g[dc>>2]+ +g[Oc>>2];g[mc>>2]=+g[_c>>2]+ +g[lc>>2];g[nc>>2]=+g[Pc>>2]+ +g[mc>>2];g[Va>>2]=+g[Pc>>2]-+g[mc>>2];g[_a>>2]=+g[Ab>>2]+ +g[Bb>>2];g[db>>2]=+g[$a>>2]+ +g[cb>>2];g[eb>>2]=+g[_a>>2]+ +g[db>>2];g[gb>>2]=+g[db>>2]-+g[_a>>2];g[v>>2]=+g[yc>>2]+ +g[u>>2];g[qa>>2]=+g[ea>>2]+ +g[pa>>2];g[ra>>2]=+g[v>>2]+ +g[qa>>2];g[fb>>2]=+g[qa>>2]-+g[v>>2];g[Wa>>2]=+g[Fb>>2]+ +g[Gb>>2];g[Xa>>2]=+g[Ma>>2]+ +g[Na>>2];g[Ya>>2]=+g[Wa>>2]-+g[Xa>>2];g[Za>>2]=+g[Wa>>2]+ +g[Xa>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[nc>>2]-+g[ra>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[eb>>2]-+g[Za>>2];g[c[k>>2]>>2]=+g[nc>>2]+ +g[ra>>2];g[c[l>>2]>>2]=+g[Za>>2]+ +g[eb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Va>>2]-+g[Ya>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[gb>>2]-+g[fb>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Va>>2]+ +g[Ya>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[fb>>2]+ +g[gb>>2];c[ed>>2]=(c[ed>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+120;c[n>>2]=c[n>>2]^c[2998]}i=fd;return}function hj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,5,1864);i=b;return}function ij(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0;Fe=i;i=i+1184|0;k=Fe+1180|0;l=Fe+1176|0;m=Fe+1172|0;n=Fe+1168|0;Ge=Fe+1164|0;o=Fe+1160|0;p=Fe+1156|0;Ee=Fe+1136|0;ne=Fe+1132|0;S=Fe+1128|0;hd=Fe+1124|0;qd=Fe+1120|0;yb=Fe+1116|0;Qb=Fe+1112|0;hc=Fe+1108|0;Uc=Fe+1104|0;qa=Fe+1100|0;P=Fe+1096|0;Q=Fe+1092|0;zc=Fe+1088|0;Cc=Fe+1084|0;Wc=Fe+1080|0;Wb=Fe+1076|0;Xb=Fe+1072|0;bc=Fe+1068|0;W=Fe+1064|0;X=Fe+1060|0;Y=Fe+1056|0;Da=Fe+1052|0;Ia=Fe+1048|0;sd=Fe+1044|0;eb=Fe+1040|0;fb=Fe+1036|0;dd=Fe+1032|0;Mb=Fe+1028|0;Nb=Fe+1024|0;Ob=Fe+1020|0;Qa=Fe+1016|0;Va=Fe+1012|0;Wa=Fe+1008|0;Md=Fe+1004|0;u=Fe+1e3|0;v=Fe+996|0;Gc=Fe+992|0;Jc=Fe+988|0;Vc=Fe+984|0;Tb=Fe+980|0;Ub=Fe+976|0;ac=Fe+972|0;T=Fe+968|0;U=Fe+964|0;V=Fe+960|0;mb=Fe+956|0;rb=Fe+952|0;rd=Fe+948|0;bb=Fe+944|0;cb=Fe+940|0;cd=Fe+936|0;Jb=Fe+932|0;Kb=Fe+928|0;Lb=Fe+924|0;Db=Fe+920|0;Ka=Fe+916|0;La=Fe+912|0;q=Fe+908|0;fc=Fe+904|0;Cd=Fe+900|0;ec=Fe+896|0;ge=Fe+892|0;vb=Fe+888|0;le=Fe+884|0;wb=Fe+880|0;Ib=Fe+876|0;Bd=Fe+872|0;za=Fe+868|0;Rc=Fe+864|0;Fd=Fe+860|0;fe=Fe+856|0;Ed=Fe+852|0;ee=Fe+848|0;ie=Fe+844|0;ke=Fe+840|0;he=Fe+836|0;je=Fe+832|0;Dd=Fe+828|0;me=Fe+824|0;fd=Fe+820|0;gd=Fe+816|0;ub=Fe+812|0;xb=Fe+808|0;dc=Fe+804|0;gc=Fe+800|0;ea=Fe+796|0;xc=Fe+792|0;Ba=Fe+788|0;Ma=Fe+784|0;O=Fe+780|0;Bc=Fe+776|0;Ha=Fe+772|0;Ua=Fe+768|0;pa=Fe+764|0;yc=Fe+760|0;Ca=Fe+756|0;Pa=Fe+752|0;D=Fe+748|0;Ac=Fe+744|0;Ga=Fe+740|0;Ra=Fe+736|0;A=Fe+732|0;_=Fe+728|0;da=Fe+724|0;Aa=Fe+720|0;x=Fe+716|0;z=Fe+712|0;w=Fe+708|0;y=Fe+704|0;aa=Fe+700|0;ca=Fe+696|0;$=Fe+692|0;ba=Fe+688|0;I=Fe+684|0;Sa=Fe+680|0;N=Fe+676|0;Ta=Fe+672|0;F=Fe+668|0;H=Fe+664|0;E=Fe+660|0;G=Fe+656|0;K=Fe+652|0;M=Fe+648|0;J=Fe+644|0;L=Fe+640|0;ja=Fe+636|0;Na=Fe+632|0;oa=Fe+628|0;Oa=Fe+624|0;ga=Fe+620|0;ia=Fe+616|0;fa=Fe+612|0;ha=Fe+608|0;la=Fe+604|0;na=Fe+600|0;ka=Fe+596|0;ma=Fe+592|0;va=Fe+588|0;Ea=Fe+584|0;C=Fe+580|0;Fa=Fe+576|0;sa=Fe+572|0;ua=Fe+568|0;ra=Fe+564|0;ta=Fe+560|0;xa=Fe+556|0;B=Fe+552|0;wa=Fe+548|0;ya=Fe+544|0;ye=Fe+540|0;Ec=Fe+536|0;kb=Fe+532|0;zb=Fe+528|0;t=Fe+524|0;Ic=Fe+520|0;qb=Fe+516|0;Hb=Fe+512|0;Ld=Fe+508|0;Fc=Fe+504|0;lb=Fe+500|0;Cb=Fe+496|0;Xd=Fe+492|0;Hc=Fe+488|0;pb=Fe+484|0;Eb=Fe+480|0;se=Fe+476|0;ib=Fe+472|0;xe=Fe+468|0;jb=Fe+464|0;pe=Fe+460|0;re=Fe+456|0;oe=Fe+452|0;qe=Fe+448|0;ue=Fe+444|0;we=Fe+440|0;te=Fe+436|0;ve=Fe+432|0;ae=Fe+428|0;Fb=Fe+424|0;s=Fe+420|0;Gb=Fe+416|0;Zd=Fe+412|0;$d=Fe+408|0;Yd=Fe+404|0;_d=Fe+400|0;ce=Fe+396|0;r=Fe+392|0;be=Fe+388|0;de=Fe+384|0;De=Fe+380|0;Ab=Fe+376|0;Kd=Fe+372|0;Bb=Fe+368|0;Ae=Fe+364|0;Ce=Fe+360|0;ze=Fe+356|0;Be=Fe+352|0;Hd=Fe+348|0;Jd=Fe+344|0;Gd=Fe+340|0;Id=Fe+336|0;Rd=Fe+332|0;nb=Fe+328|0;Wd=Fe+324|0;ob=Fe+320|0;Od=Fe+316|0;Qd=Fe+312|0;Nd=Fe+308|0;Pd=Fe+304|0;Td=Fe+300|0;Vd=Fe+296|0;Sd=Fe+292|0;Ud=Fe+288|0;vc=Fe+284|0;R=Fe+280|0;uc=Fe+276|0;Lc=Fe+272|0;Nc=Fe+268|0;Dc=Fe+264|0;Kc=Fe+260|0;Mc=Fe+256|0;wc=Fe+252|0;Zc=Fe+248|0;Xc=Fe+244|0;Yc=Fe+240|0;Tc=Fe+236|0;bd=Fe+232|0;qc=Fe+228|0;Sc=Fe+224|0;$c=Fe+220|0;_c=Fe+216|0;Oc=Fe+212|0;Z=Fe+208|0;Pc=Fe+204|0;Zb=Fe+200|0;$b=Fe+196|0;Vb=Fe+192|0;Yb=Fe+188|0;_b=Fe+184|0;Qc=Fe+180|0;lc=Fe+176|0;cc=Fe+172|0;mc=Fe+168|0;kc=Fe+164|0;pc=Fe+160|0;ic=Fe+156|0;jc=Fe+152|0;oc=Fe+148|0;nc=Fe+144|0;Za=Fe+140|0;Xa=Fe+136|0;Ya=Fe+132|0;tb=Fe+128|0;$a=Fe+124|0;Ja=Fe+120|0;sb=Fe+116|0;ab=Fe+112|0;_a=Fe+108|0;vd=Fe+104|0;td=Fe+100|0;ud=Fe+96|0;zd=Fe+92|0;ad=Fe+88|0;xd=Fe+84|0;yd=Fe+80|0;Ad=Fe+76|0;wd=Fe+72|0;Pb=Fe+68|0;Rb=Fe+64|0;Sb=Fe+60|0;hb=Fe+56|0;sc=Fe+52|0;db=Fe+48|0;gb=Fe+44|0;tc=Fe+40|0;rc=Fe+36|0;ed=Fe+32|0;id=Fe+28|0;jd=Fe+24|0;nd=Fe+20|0;pd=Fe+16|0;ld=Fe+12|0;md=Fe+8|0;od=Fe+4|0;kd=Fe;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Ge>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Fe+1152>>2]=.5877852439880371;g[Fe+1148>>2]=.9510565400123596;g[Fe+1144>>2]=.25;g[Fe+1140>>2]=.55901700258255;c[Ee>>2]=c[Ge>>2];c[m>>2]=(c[m>>2]|0)+((c[Ge>>2]|0)*38<<2);while(1){if((c[Ee>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[fc>>2]=+g[c[l>>2]>>2];g[Ib>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Bd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[za>>2]=+g[(c[m>>2]|0)+72>>2];g[Rc>>2]=+g[(c[m>>2]|0)+76>>2];g[Cd>>2]=+g[za>>2]*+g[Ib>>2]+ +g[Rc>>2]*+g[Bd>>2];g[ec>>2]=+g[za>>2]*+g[Bd>>2]-+g[Rc>>2]*+g[Ib>>2];g[Fd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[fe>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Ed>>2]=+g[(c[m>>2]|0)+32>>2];g[ee>>2]=+g[(c[m>>2]|0)+36>>2];g[ge>>2]=+g[Ed>>2]*+g[Fd>>2]+ +g[ee>>2]*+g[fe>>2];g[vb>>2]=+g[Ed>>2]*+g[fe>>2]-+g[ee>>2]*+g[Fd>>2];g[ie>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[ke>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[he>>2]=+g[(c[m>>2]|0)+112>>2];g[je>>2]=+g[(c[m>>2]|0)+116>>2];g[le>>2]=+g[he>>2]*+g[ie>>2]+ +g[je>>2]*+g[ke>>2];g[wb>>2]=+g[he>>2]*+g[ke>>2]-+g[je>>2]*+g[ie>>2];g[Dd>>2]=+g[q>>2]+ +g[Cd>>2];g[me>>2]=+g[ge>>2]+ +g[le>>2];g[ne>>2]=+g[Dd>>2]-+g[me>>2];g[S>>2]=+g[Dd>>2]+ +g[me>>2];g[fd>>2]=+g[fc>>2]-+g[ec>>2];g[gd>>2]=+g[ge>>2]-+g[le>>2];g[hd>>2]=+g[fd>>2]-+g[gd>>2];g[qd>>2]=+g[gd>>2]+ +g[fd>>2];g[ub>>2]=+g[q>>2]-+g[Cd>>2];g[xb>>2]=+g[vb>>2]-+g[wb>>2];g[yb>>2]=+g[ub>>2]-+g[xb>>2];g[Qb>>2]=+g[ub>>2]+ +g[xb>>2];g[dc>>2]=+g[vb>>2]+ +g[wb>>2];g[gc>>2]=+g[ec>>2]+ +g[fc>>2];g[hc>>2]=+g[dc>>2]+ +g[gc>>2];g[Uc>>2]=+g[gc>>2]-+g[dc>>2];g[x>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[z>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[w>>2]=+g[(c[m>>2]|0)+56>>2];g[y>>2]=+g[(c[m>>2]|0)+60>>2];g[A>>2]=+g[w>>2]*+g[x>>2]+ +g[y>>2]*+g[z>>2];g[_>>2]=+g[w>>2]*+g[z>>2]-+g[y>>2]*+g[x>>2];g[aa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[ca>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[$>>2]=+g[(c[m>>2]|0)+136>>2];g[ba>>2]=+g[(c[m>>2]|0)+140>>2];g[da>>2]=+g[$>>2]*+g[aa>>2]+ +g[ba>>2]*+g[ca>>2];g[Aa>>2]=+g[$>>2]*+g[ca>>2]-+g[ba>>2]*+g[aa>>2];g[ea>>2]=+g[A>>2]+ +g[da>>2];g[xc>>2]=+g[_>>2]+ +g[Aa>>2];g[Ba>>2]=+g[_>>2]-+g[Aa>>2];g[Ma>>2]=+g[A>>2]-+g[da>>2];g[F>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[H>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[E>>2]=+g[(c[m>>2]|0)+128>>2];g[G>>2]=+g[(c[m>>2]|0)+132>>2];g[I>>2]=+g[E>>2]*+g[F>>2]+ +g[G>>2]*+g[H>>2];g[Sa>>2]=+g[E>>2]*+g[H>>2]-+g[G>>2]*+g[F>>2];g[K>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[M>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[J>>2]=+g[(c[m>>2]|0)+48>>2];g[L>>2]=+g[(c[m>>2]|0)+52>>2];g[N>>2]=+g[J>>2]*+g[K>>2]+ +g[L>>2]*+g[M>>2];g[Ta>>2]=+g[J>>2]*+g[M>>2]-+g[L>>2]*+g[K>>2];g[O>>2]=+g[I>>2]+ +g[N>>2];g[Bc>>2]=+g[Sa>>2]+ +g[Ta>>2];g[Ha>>2]=+g[I>>2]-+g[N>>2];g[Ua>>2]=+g[Sa>>2]-+g[Ta>>2];g[ga>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[ia>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[fa>>2]=+g[(c[m>>2]|0)+96>>2];g[ha>>2]=+g[(c[m>>2]|0)+100>>2];g[ja>>2]=+g[fa>>2]*+g[ga>>2]+ +g[ha>>2]*+g[ia>>2];g[Na>>2]=+g[fa>>2]*+g[ia>>2]-+g[ha>>2]*+g[ga>>2];g[la>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[na>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ka>>2]=+g[(c[m>>2]|0)+16>>2];g[ma>>2]=+g[(c[m>>2]|0)+20>>2];g[oa>>2]=+g[ka>>2]*+g[la>>2]+ +g[ma>>2]*+g[na>>2];g[Oa>>2]=+g[ka>>2]*+g[na>>2]-+g[ma>>2]*+g[la>>2];g[pa>>2]=+g[ja>>2]+ +g[oa>>2];g[yc>>2]=+g[Na>>2]+ +g[Oa>>2];g[Ca>>2]=+g[ja>>2]-+g[oa>>2];g[Pa>>2]=+g[Na>>2]-+g[Oa>>2];g[sa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[ua>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[ra>>2]=+g[(c[m>>2]|0)+88>>2];g[ta>>2]=+g[(c[m>>2]|0)+92>>2];g[va>>2]=+g[ra>>2]*+g[sa>>2]+ +g[ta>>2]*+g[ua>>2];g[Ea>>2]=+g[ra>>2]*+g[ua>>2]-+g[ta>>2]*+g[sa>>2];g[xa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[B>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[wa>>2]=+g[(c[m>>2]|0)+8>>2];g[ya>>2]=+g[(c[m>>2]|0)+12>>2];g[C>>2]=+g[wa>>2]*+g[xa>>2]+ +g[ya>>2]*+g[B>>2];g[Fa>>2]=+g[wa>>2]*+g[B>>2]-+g[ya>>2]*+g[xa>>2];g[D>>2]=+g[va>>2]+ +g[C>>2];g[Ac>>2]=+g[Ea>>2]+ +g[Fa>>2];g[Ga>>2]=+g[Ea>>2]-+g[Fa>>2];g[Ra>>2]=+g[va>>2]-+g[C>>2];g[qa>>2]=+g[ea>>2]-+g[pa>>2];g[P>>2]=+g[D>>2]-+g[O>>2];g[Q>>2]=+g[qa>>2]+ +g[P>>2];g[zc>>2]=+g[xc>>2]-+g[yc>>2];g[Cc>>2]=+g[Ac>>2]-+g[Bc>>2];g[Wc>>2]=+g[zc>>2]+ +g[Cc>>2];g[Wb>>2]=+g[xc>>2]+ +g[yc>>2];g[Xb>>2]=+g[Ac>>2]+ +g[Bc>>2];g[bc>>2]=+g[Wb>>2]+ +g[Xb>>2];g[W>>2]=+g[ea>>2]+ +g[pa>>2];g[X>>2]=+g[D>>2]+ +g[O>>2];g[Y>>2]=+g[W>>2]+ +g[X>>2];g[Da>>2]=+g[Ba>>2]+ +g[Ca>>2];g[Ia>>2]=+g[Ga>>2]+ +g[Ha>>2];g[sd>>2]=+g[Da>>2]+ +g[Ia>>2];g[eb>>2]=+g[Ba>>2]-+g[Ca>>2];g[fb>>2]=+g[Ga>>2]-+g[Ha>>2];g[dd>>2]=+g[eb>>2]+ +g[fb>>2];g[Mb>>2]=+g[Ma>>2]+ +g[Pa>>2];g[Nb>>2]=+g[Ra>>2]+ +g[Ua>>2];g[Ob>>2]=+g[Mb>>2]+ +g[Nb>>2];g[Qa>>2]=+g[Ma>>2]-+g[Pa>>2];g[Va>>2]=+g[Ra>>2]-+g[Ua>>2];g[Wa>>2]=+g[Qa>>2]+ +g[Va>>2];g[pe>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[re>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[oe>>2]=+g[(c[m>>2]|0)+24>>2];g[qe>>2]=+g[(c[m>>2]|0)+28>>2];g[se>>2]=+g[oe>>2]*+g[pe>>2]+ +g[qe>>2]*+g[re>>2];g[ib>>2]=+g[oe>>2]*+g[re>>2]-+g[qe>>2]*+g[pe>>2];g[ue>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[we>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[te>>2]=+g[(c[m>>2]|0)+104>>2];g[ve>>2]=+g[(c[m>>2]|0)+108>>2];g[xe>>2]=+g[te>>2]*+g[ue>>2]+ +g[ve>>2]*+g[we>>2];g[jb>>2]=+g[te>>2]*+g[we>>2]-+g[ve>>2]*+g[ue>>2];g[ye>>2]=+g[se>>2]+ +g[xe>>2];g[Ec>>2]=+g[ib>>2]+ +g[jb>>2];g[kb>>2]=+g[ib>>2]-+g[jb>>2];g[zb>>2]=+g[se>>2]-+g[xe>>2];g[Zd>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[$d>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Yd>>2]=+g[c[m>>2]>>2];g[_d>>2]=+g[(c[m>>2]|0)+4>>2];g[ae>>2]=+g[Yd>>2]*+g[Zd>>2]+ +g[_d>>2]*+g[$d>>2];g[Fb>>2]=+g[Yd>>2]*+g[$d>>2]-+g[_d>>2]*+g[Zd>>2];g[ce>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[r>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[be>>2]=+g[(c[m>>2]|0)+80>>2];g[de>>2]=+g[(c[m>>2]|0)+84>>2];g[s>>2]=+g[be>>2]*+g[ce>>2]+ +g[de>>2]*+g[r>>2];g[Gb>>2]=+g[be>>2]*+g[r>>2]-+g[de>>2]*+g[ce>>2];g[t>>2]=+g[ae>>2]+ +g[s>>2];g[Ic>>2]=+g[Fb>>2]+ +g[Gb>>2];g[qb>>2]=+g[ae>>2]-+g[s>>2];g[Hb>>2]=+g[Fb>>2]-+g[Gb>>2];g[Ae>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Ce>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[ze>>2]=+g[(c[m>>2]|0)+64>>2];g[Be>>2]=+g[(c[m>>2]|0)+68>>2];g[De>>2]=+g[ze>>2]*+g[Ae>>2]+ +g[Be>>2]*+g[Ce>>2];g[Ab>>2]=+g[ze>>2]*+g[Ce>>2]-+g[Be>>2]*+g[Ae>>2];g[Hd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Jd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Gd>>2]=+g[(c[m>>2]|0)+144>>2];g[Id>>2]=+g[(c[m>>2]|0)+148>>2];g[Kd>>2]=+g[Gd>>2]*+g[Hd>>2]+ +g[Id>>2]*+g[Jd>>2];g[Bb>>2]=+g[Gd>>2]*+g[Jd>>2]-+g[Id>>2]*+g[Hd>>2];g[Ld>>2]=+g[De>>2]+ +g[Kd>>2];g[Fc>>2]=+g[Ab>>2]+ +g[Bb>>2];g[lb>>2]=+g[De>>2]-+g[Kd>>2];g[Cb>>2]=+g[Ab>>2]-+g[Bb>>2];g[Od>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[Qd>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[Nd>>2]=+g[(c[m>>2]|0)+120>>2];g[Pd>>2]=+g[(c[m>>2]|0)+124>>2];g[Rd>>2]=+g[Nd>>2]*+g[Od>>2]+ +g[Pd>>2]*+g[Qd>>2];g[nb>>2]=+g[Nd>>2]*+g[Qd>>2]-+g[Pd>>2]*+g[Od>>2];g[Td>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Vd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Sd>>2]=+g[(c[m>>2]|0)+40>>2];g[Ud>>2]=+g[(c[m>>2]|0)+44>>2];g[Wd>>2]=+g[Sd>>2]*+g[Td>>2]+ +g[Ud>>2]*+g[Vd>>2];g[ob>>2]=+g[Sd>>2]*+g[Vd>>2]-+g[Ud>>2]*+g[Td>>2];g[Xd>>2]=+g[Rd>>2]+ +g[Wd>>2];g[Hc>>2]=+g[nb>>2]+ +g[ob>>2];g[pb>>2]=+g[nb>>2]-+g[ob>>2];g[Eb>>2]=+g[Rd>>2]-+g[Wd>>2];g[Md>>2]=+g[ye>>2]-+g[Ld>>2];g[u>>2]=+g[Xd>>2]-+g[t>>2];g[v>>2]=+g[Md>>2]+ +g[u>>2];g[Gc>>2]=+g[Ec>>2]-+g[Fc>>2];g[Jc>>2]=+g[Hc>>2]-+g[Ic>>2];g[Vc>>2]=+g[Gc>>2]+ +g[Jc>>2];g[Tb>>2]=+g[Ec>>2]+ +g[Fc>>2];g[Ub>>2]=+g[Hc>>2]+ +g[Ic>>2];g[ac>>2]=+g[Tb>>2]+ +g[Ub>>2];g[T>>2]=+g[ye>>2]+ +g[Ld>>2];g[U>>2]=+g[Xd>>2]+ +g[t>>2];g[V>>2]=+g[T>>2]+ +g[U>>2];g[mb>>2]=+g[kb>>2]+ +g[lb>>2];g[rb>>2]=+g[pb>>2]+ +g[qb>>2];g[rd>>2]=+g[mb>>2]+ +g[rb>>2];g[bb>>2]=+g[kb>>2]-+g[lb>>2];g[cb>>2]=+g[pb>>2]-+g[qb>>2];g[cd>>2]=+g[bb>>2]+ +g[cb>>2];g[Jb>>2]=+g[zb>>2]+ +g[Cb>>2];g[Kb>>2]=+g[Eb>>2]+ +g[Hb>>2];g[Lb>>2]=+g[Jb>>2]+ +g[Kb>>2];g[Db>>2]=+g[zb>>2]-+g[Cb>>2];g[Ka>>2]=+g[Eb>>2]-+g[Hb>>2];g[La>>2]=+g[Db>>2]+ +g[Ka>>2];g[vc>>2]=(+g[v>>2]-+g[Q>>2])*.55901700258255;g[R>>2]=+g[v>>2]+ +g[Q>>2];g[uc>>2]=+g[ne>>2]-+g[R>>2]*.25;g[Dc>>2]=+g[zc>>2]-+g[Cc>>2];g[Kc>>2]=+g[Gc>>2]-+g[Jc>>2];g[Lc>>2]=+g[Dc>>2]*.9510565400123596-+g[Kc>>2]*.5877852439880371;g[Nc>>2]=+g[Kc>>2]*.9510565400123596+ +g[Dc>>2]*.5877852439880371;g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[ne>>2]+ +g[R>>2];g[Mc>>2]=+g[vc>>2]+ +g[uc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Mc>>2]-+g[Nc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Mc>>2]+ +g[Nc>>2];g[wc>>2]=+g[uc>>2]-+g[vc>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[wc>>2]-+g[Lc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[wc>>2]+ +g[Lc>>2];g[Zc>>2]=(+g[Vc>>2]-+g[Wc>>2])*.55901700258255;g[Xc>>2]=+g[Vc>>2]+ +g[Wc>>2];g[Yc>>2]=+g[Uc>>2]-+g[Xc>>2]*.25;g[qc>>2]=+g[qa>>2]-+g[P>>2];g[Sc>>2]=+g[Md>>2]-+g[u>>2];g[Tc>>2]=+g[qc>>2]*.9510565400123596-+g[Sc>>2]*.5877852439880371;g[bd>>2]=+g[Sc>>2]*.9510565400123596+ +g[qc>>2]*.5877852439880371;g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Xc>>2]+ +g[Uc>>2];g[$c>>2]=+g[Zc>>2]+ +g[Yc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[$c>>2]-+g[bd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[bd>>2]+ +g[$c>>2];g[_c>>2]=+g[Yc>>2]-+g[Zc>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Tc>>2]+ +g[_c>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[_c>>2]-+g[Tc>>2];g[Oc>>2]=(+g[V>>2]-+g[Y>>2])*.55901700258255;g[Z>>2]=+g[V>>2]+ +g[Y>>2];g[Pc>>2]=+g[S>>2]-+g[Z>>2]*.25;g[Vb>>2]=+g[Tb>>2]-+g[Ub>>2];g[Yb>>2]=+g[Wb>>2]-+g[Xb>>2];g[Zb>>2]=+g[Vb>>2]*.9510565400123596+ +g[Yb>>2]*.5877852439880371;g[$b>>2]=+g[Yb>>2]*.9510565400123596-+g[Vb>>2]*.5877852439880371;g[c[k>>2]>>2]=+g[S>>2]+ +g[Z>>2];g[_b>>2]=+g[Pc>>2]-+g[Oc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[_b>>2]-+g[$b>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[_b>>2]+ +g[$b>>2];g[Qc>>2]=+g[Oc>>2]+ +g[Pc>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Qc>>2]-+g[Zb>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Qc>>2]+ +g[Zb>>2];g[lc>>2]=(+g[ac>>2]-+g[bc>>2])*.55901700258255;g[cc>>2]=+g[ac>>2]+ +g[bc>>2];g[mc>>2]=+g[hc>>2]-+g[cc>>2]*.25;g[ic>>2]=+g[T>>2]-+g[U>>2];g[jc>>2]=+g[W>>2]-+g[X>>2];g[kc>>2]=+g[ic>>2]*.9510565400123596+ +g[jc>>2]*.5877852439880371;g[pc>>2]=+g[jc>>2]*.9510565400123596-+g[ic>>2]*.5877852439880371;g[c[l>>2]>>2]=+g[cc>>2]+ +g[hc>>2];g[oc>>2]=+g[mc>>2]-+g[lc>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[oc>>2]-+g[pc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[pc>>2]+ +g[oc>>2];g[nc>>2]=+g[lc>>2]+ +g[mc>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[kc>>2]+ +g[nc>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[nc>>2]-+g[kc>>2];g[Za>>2]=(+g[La>>2]-+g[Wa>>2])*.55901700258255;g[Xa>>2]=+g[La>>2]+ +g[Wa>>2];g[Ya>>2]=+g[yb>>2]-+g[Xa>>2]*.25;g[Ja>>2]=+g[Da>>2]-+g[Ia>>2];g[sb>>2]=+g[mb>>2]-+g[rb>>2];g[tb>>2]=+g[Ja>>2]*.9510565400123596-+g[sb>>2]*.5877852439880371;g[$a>>2]=+g[sb>>2]*.9510565400123596+ +g[Ja>>2]*.5877852439880371;g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[yb>>2]+ +g[Xa>>2];g[ab>>2]=+g[Za>>2]+ +g[Ya>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[$a>>2]+ +g[ab>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[ab>>2]-+g[$a>>2];g[_a>>2]=+g[Ya>>2]-+g[Za>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[tb>>2]+ +g[_a>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[_a>>2]-+g[tb>>2];g[vd>>2]=(+g[rd>>2]-+g[sd>>2])*.55901700258255;g[td>>2]=+g[rd>>2]+ +g[sd>>2];g[ud>>2]=+g[qd>>2]-+g[td>>2]*.25;g[xd>>2]=+g[Qa>>2]-+g[Va>>2];g[yd>>2]=+g[Db>>2]-+g[Ka>>2];g[zd>>2]=+g[xd>>2]*.9510565400123596-+g[yd>>2]*.5877852439880371;g[ad>>2]=+g[yd>>2]*.9510565400123596+ +g[xd>>2]*.5877852439880371;g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[td>>2]+ +g[qd>>2];g[Ad>>2]=+g[vd>>2]+ +g[ud>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Ad>>2]-+g[ad>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[ad>>2]+ +g[Ad>>2];g[wd>>2]=+g[ud>>2]-+g[vd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[wd>>2]-+g[zd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[zd>>2]+ +g[wd>>2];g[Pb>>2]=(+g[Lb>>2]-+g[Ob>>2])*.55901700258255;g[Rb>>2]=+g[Lb>>2]+ +g[Ob>>2];g[Sb>>2]=+g[Qb>>2]-+g[Rb>>2]*.25;g[db>>2]=+g[bb>>2]-+g[cb>>2];g[gb>>2]=+g[eb>>2]-+g[fb>>2];g[hb>>2]=+g[db>>2]*.9510565400123596+ +g[gb>>2]*.5877852439880371;g[sc>>2]=+g[gb>>2]*.9510565400123596-+g[db>>2]*.5877852439880371;g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Qb>>2]+ +g[Rb>>2];g[tc>>2]=+g[Sb>>2]-+g[Pb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[sc>>2]+ +g[tc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[tc>>2]-+g[sc>>2];g[rc>>2]=+g[Pb>>2]+ +g[Sb>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[hb>>2]+ +g[rc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[rc>>2]-+g[hb>>2];g[ed>>2]=(+g[cd>>2]-+g[dd>>2])*.55901700258255;g[id>>2]=+g[cd>>2]+ +g[dd>>2];g[jd>>2]=+g[hd>>2]-+g[id>>2]*.25;g[ld>>2]=+g[Jb>>2]-+g[Kb>>2];g[md>>2]=+g[Mb>>2]-+g[Nb>>2];g[nd>>2]=+g[ld>>2]*.9510565400123596+ +g[md>>2]*.5877852439880371;g[pd>>2]=+g[md>>2]*.9510565400123596-+g[ld>>2]*.5877852439880371;g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[id>>2]+ +g[hd>>2];g[od>>2]=+g[jd>>2]-+g[ed>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[od>>2]-+g[pd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[pd>>2]+ +g[od>>2];g[kd>>2]=+g[ed>>2]+ +g[jd>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[kd>>2]-+g[nd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[nd>>2]+ +g[kd>>2];c[Ee>>2]=(c[Ee>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+152;c[n>>2]=c[n>>2]^c[2998]}i=Fe;return}function jj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,6,1928);i=b;return}function kj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0;Ih=i;i=i+1904|0;k=Ih+1900|0;l=Ih+1896|0;m=Ih+1892|0;n=Ih+1888|0;Jh=Ih+1884|0;o=Ih+1880|0;p=Ih+1876|0;Hh=Ih+1792|0;q=Ih+1788|0;Uf=Ih+1784|0;tb=Ih+1780|0;fg=Ih+1776|0;vh=Ih+1772|0;ub=Ih+1768|0;Tf=Ih+1764|0;gg=Ih+1760|0;kg=Ih+1756|0;Lf=Ih+1752|0;Cb=Ih+1748|0;id=Ih+1744|0;qb=Ih+1740|0;jf=Ih+1736|0;fc=Ih+1732|0;me=Ih+1728|0;Rd=Ih+1724|0;he=Ih+1720|0;qc=Ih+1716|0;le=Ih+1712|0;Qd=Ih+1708|0;Ke=Ih+1704|0;$g=Ih+1700|0;Fe=Ih+1696|0;Qa=Ih+1692|0;$e=Ih+1688|0;Gd=Ih+1684|0;md=Ih+1680|0;$a=Ih+1676|0;af=Ih+1672|0;Hd=Ih+1668|0;pd=Ih+1664|0;ka=Ih+1660|0;Ge=Ih+1656|0;Mb=Ih+1652|0;df=Ih+1648|0;Jd=Ih+1644|0;wd=Ih+1640|0;vc=Ih+1636|0;cf=Ih+1632|0;Kd=Ih+1628|0;td=Ih+1624|0;Q=Ih+1620|0;Ie=Ih+1616|0;Ic=Ih+1612|0;gf=Ih+1608|0;Od=Ih+1604|0;ae=Ih+1600|0;Vb=Ih+1596|0;je=Ih+1592|0;Nd=Ih+1588|0;de=Ih+1584|0;hf=Ih+1580|0;wb=Ih+1576|0;ih=Ih+1572|0;xb=Ih+1568|0;jh=Ih+1564|0;rf=Ih+1560|0;oh=Ih+1556|0;zb=Ih+1552|0;th=Ih+1548|0;Ab=Ih+1544|0;uh=Ih+1540|0;sf=Ih+1536|0;Ib=Ih+1532|0;_d=Ih+1528|0;za=Ih+1524|0;Rc=Ih+1520|0;Hg=Ih+1516|0;hh=Ih+1512|0;rg=Ih+1508|0;Ig=Ih+1504|0;lh=Ih+1500|0;nh=Ih+1496|0;kh=Ih+1492|0;mh=Ih+1488|0;qh=Ih+1484|0;sh=Ih+1480|0;ph=Ih+1476|0;rh=Ih+1472|0;ig=Ih+1468|0;jg=Ih+1464|0;yb=Ih+1460|0;Bb=Ih+1456|0;V=Ih+1452|0;jc=Ih+1448|0;ac=Ih+1444|0;dc=Ih+1440|0;oc=Ih+1436|0;nc=Ih+1432|0;gc=Ih+1428|0;hc=Ih+1424|0;kc=Ih+1420|0;Fa=Ih+1416|0;ob=Ih+1412|0;pb=Ih+1408|0;S=Ih+1404|0;U=Ih+1400|0;R=Ih+1396|0;T=Ih+1392|0;_=Ih+1388|0;_b=Ih+1384|0;nb=Ih+1380|0;cc=Ih+1376|0;Ea=Ih+1372|0;$b=Ih+1368|0;ib=Ih+1364|0;bc=Ih+1360|0;X=Ih+1356|0;Z=Ih+1352|0;W=Ih+1348|0;Y=Ih+1344|0;kb=Ih+1340|0;mb=Ih+1336|0;jb=Ih+1332|0;lb=Ih+1328|0;Ba=Ih+1324|0;Da=Ih+1320|0;Aa=Ih+1316|0;Ca=Ih+1312|0;Ha=Ih+1308|0;Ja=Ih+1304|0;Ga=Ih+1300|0;Ia=Ih+1296|0;ec=Ih+1292|0;ge=Ih+1288|0;Zb=Ih+1284|0;fe=Ih+1280|0;Xb=Ih+1276|0;Yb=Ih+1272|0;pc=Ih+1268|0;ie=Ih+1264|0;mc=Ih+1260|0;Je=Ih+1256|0;ic=Ih+1252|0;lc=Ih+1248|0;Bh=Ih+1244|0;Ua=Ih+1240|0;La=Ih+1236|0;Oa=Ih+1232|0;Za=Ih+1228|0;Ya=Ih+1224|0;Ra=Ih+1220|0;Sa=Ih+1216|0;Va=Ih+1212|0;Og=Ih+1208|0;Zg=Ih+1204|0;_g=Ih+1200|0;yh=Ih+1196|0;Ah=Ih+1192|0;xh=Ih+1188|0;zh=Ih+1184|0;Gh=Ih+1180|0;Hb=Ih+1176|0;Yg=Ih+1172|0;Na=Ih+1168|0;Ng=Ih+1164|0;Ka=Ih+1160|0;Tg=Ih+1156|0;Ma=Ih+1152|0;Dh=Ih+1148|0;Fh=Ih+1144|0;Ch=Ih+1140|0;Eh=Ih+1136|0;Vg=Ih+1132|0;Xg=Ih+1128|0;Ug=Ih+1124|0;Wg=Ih+1120|0;Kg=Ih+1116|0;Mg=Ih+1112|0;Jg=Ih+1108|0;Lg=Ih+1104|0;Qg=Ih+1100|0;Sg=Ih+1096|0;Pg=Ih+1092|0;Rg=Ih+1088|0;Pa=Ih+1084|0;ld=Ih+1080|0;Gb=Ih+1076|0;kd=Ih+1072|0;Eb=Ih+1068|0;Fb=Ih+1064|0;_a=Ih+1060|0;nd=Ih+1056|0;Xa=Ih+1052|0;od=Ih+1048|0;Ta=Ih+1044|0;Wa=Ih+1040|0;eh=Ih+1036|0;Qb=Ih+1032|0;gb=Ih+1028|0;Kb=Ih+1024|0;tc=Ih+1020|0;sc=Ih+1016|0;Nb=Ih+1012|0;Ob=Ih+1008|0;Rb=Ih+1004|0;z=Ih+1e3|0;ia=Ih+996|0;ja=Ih+992|0;bh=Ih+988|0;dh=Ih+984|0;ah=Ih+980|0;ch=Ih+976|0;t=Ih+972|0;eb=Ih+968|0;ha=Ih+964|0;Jb=Ih+960|0;y=Ih+956|0;fb=Ih+952|0;ca=Ih+948|0;hb=Ih+944|0;gh=Ih+940|0;s=Ih+936|0;fh=Ih+932|0;r=Ih+928|0;ea=Ih+924|0;ga=Ih+920|0;da=Ih+916|0;fa=Ih+912|0;v=Ih+908|0;x=Ih+904|0;u=Ih+900|0;w=Ih+896|0;$=Ih+892|0;ba=Ih+888|0;A=Ih+884|0;aa=Ih+880|0;Lb=Ih+876|0;vd=Ih+872|0;db=Ih+868|0;ud=Ih+864|0;bb=Ih+860|0;cb=Ih+856|0;uc=Ih+852|0;rd=Ih+848|0;rc=Ih+844|0;sd=Ih+840|0;Pb=Ih+836|0;Sb=Ih+832|0;qa=Ih+828|0;Mc=Ih+824|0;Dc=Ih+820|0;Gc=Ih+816|0;Tb=Ih+812|0;Qc=Ih+808|0;Jc=Ih+804|0;Kc=Ih+800|0;Nc=Ih+796|0;D=Ih+792|0;O=Ih+788|0;P=Ih+784|0;na=Ih+780|0;pa=Ih+776|0;ma=Ih+772|0;oa=Ih+768|0;va=Ih+764|0;Bc=Ih+760|0;N=Ih+756|0;Fc=Ih+752|0;C=Ih+748|0;Cc=Ih+744|0;I=Ih+740|0;Ec=Ih+736|0;sa=Ih+732|0;ua=Ih+728|0;ra=Ih+724|0;ta=Ih+720|0;K=Ih+716|0;M=Ih+712|0;J=Ih+708|0;L=Ih+704|0;xa=Ih+700|0;B=Ih+696|0;wa=Ih+692|0;ya=Ih+688|0;F=Ih+684|0;H=Ih+680|0;E=Ih+676|0;G=Ih+672|0;Hc=Ih+668|0;$d=Ih+664|0;Ac=Ih+660|0;zd=Ih+656|0;yc=Ih+652|0;zc=Ih+648|0;Ub=Ih+644|0;be=Ih+640|0;Pc=Ih+636|0;ce=Ih+632|0;Lc=Ih+628|0;Oc=Ih+624|0;lf=Ih+620|0;nf=Ih+616|0;wh=Ih+612|0;sb=Ih+608|0;Ce=Ih+604|0;De=Ih+600|0;mf=Ih+596|0;Ee=Ih+592|0;He=Ih+588|0;kf=Ih+584|0;la=Ih+580|0;rb=Ih+576|0;$f=Ih+572|0;ag=Ih+568|0;Vf=Ih+564|0;qf=Ih+560|0;Wf=Ih+556|0;Xf=Ih+552|0;bg=Ih+548|0;Yf=Ih+544|0;Zf=Ih+540|0;_f=Ih+536|0;of=Ih+532|0;pf=Ih+528|0;Db=Ih+524|0;Fd=Ih+520|0;lg=Ih+516|0;zf=Ih+512|0;Uc=Ih+508|0;qg=Ih+504|0;Vc=Ih+500|0;pg=Ih+496|0;ad=Ih+492|0;Af=Ih+488|0;dd=Ih+484|0;yf=Ih+480|0;Ud=Ih+476|0;Ef=Ih+472|0;Vd=Ih+468|0;Df=Ih+464|0;_c=Ih+460|0;mg=Ih+456|0;Bd=Ih+452|0;eg=Ih+448|0;vb=Ih+444|0;hg=Ih+440|0;ab=Ih+436|0;wc=Ih+432|0;xc=Ih+428|0;Wb=Ih+424|0;Sc=Ih+420|0;Tc=Ih+416|0;Yd=Ih+412|0;Zd=Ih+408|0;wf=Ih+404|0;bd=Ih+400|0;cd=Ih+396|0;xf=Ih+392|0;Id=Ih+388|0;Ld=Ih+384|0;Md=Ih+380|0;Pd=Ih+376|0;Sd=Ih+372|0;Td=Ih+368|0;Yc=Ih+364|0;Zc=Ih+360|0;cg=Ih+356|0;$c=Ih+352|0;Ad=Ih+348|0;dg=Ih+344|0;Cd=Ih+340|0;Ed=Ih+336|0;Xc=Ih+332|0;Dd=Ih+328|0;Wc=Ih+324|0;tf=Ih+320|0;uf=Ih+316|0;og=Ih+312|0;vf=Ih+308|0;ng=Ih+304|0;ed=Ih+300|0;gd=Ih+296|0;Xd=Ih+292|0;fd=Ih+288|0;Wd=Ih+284|0;Ff=Ih+280|0;Gf=Ih+276|0;Cf=Ih+272|0;Hf=Ih+268|0;Bf=Ih+264|0;jd=Ih+260|0;_e=Ih+256|0;Nf=Ih+252|0;yg=Ih+248|0;Ne=Ih+244|0;Sf=Ih+240|0;Oe=Ih+236|0;Rf=Ih+232|0;ve=Ih+228|0;zg=Ih+224|0;ye=Ih+220|0;xg=Ih+216|0;pe=Ih+212|0;Dg=Ih+208|0;qe=Ih+204|0;Cg=Ih+200|0;Te=Ih+196|0;Of=Ih+192|0;We=Ih+188|0;Kf=Ih+184|0;hd=Ih+180|0;Mf=Ih+176|0;qd=Ih+172|0;xd=Ih+168|0;yd=Ih+164|0;ee=Ih+160|0;Le=Ih+156|0;Me=Ih+152|0;te=Ih+148|0;ue=Ih+144|0;vg=Ih+140|0;we=Ih+136|0;xe=Ih+132|0;wg=Ih+128|0;bf=Ih+124|0;ef=Ih+120|0;ff=Ih+116|0;ke=Ih+112|0;ne=Ih+108|0;oe=Ih+104|0;Re=Ih+100|0;Se=Ih+96|0;If=Ih+92|0;Ue=Ih+88|0;Ve=Ih+84|0;Jf=Ih+80|0;Xe=Ih+76|0;Ze=Ih+72|0;Qe=Ih+68|0;Ye=Ih+64|0;Pe=Ih+60|0;sg=Ih+56|0;tg=Ih+52|0;Qf=Ih+48|0;ug=Ih+44|0;Pf=Ih+40|0;ze=Ih+36|0;Be=Ih+32|0;se=Ih+28|0;Ae=Ih+24|0;re=Ih+20|0;Eg=Ih+16|0;Fg=Ih+12|0;Bg=Ih+8|0;Gg=Ih+4|0;Ag=Ih;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Jh>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Ih+1872>>2]=.9980267286300659;g[Ih+1868>>2]=.06279052048921585;g[Ih+1864>>2]=.4257792830467224;g[Ih+1860>>2]=.9048270583152771;g[Ih+1856>>2]=.9921147227287292;g[Ih+1852>>2]=.12533323466777802;g[Ih+1848>>2]=.6374239921569824;g[Ih+1844>>2]=.7705132365226746;g[Ih+1840>>2]=.6845471262931824;g[Ih+1836>>2]=.728968620300293;g[Ih+1832>>2]=.4817536771297455;g[Ih+1828>>2]=.8763066530227661;g[Ih+1824>>2]=.8443279266357422;g[Ih+1820>>2]=.5358268022537231;g[Ih+1816>>2]=.24868988990783691;g[Ih+1812>>2]=.9685831665992737;g[Ih+1808>>2]=.5877852439880371;g[Ih+1804>>2]=.9510565400123596;g[Ih+1800>>2]=.25;g[Ih+1796>>2]=.55901700258255;c[Hh>>2]=c[Jh>>2];c[m>>2]=(c[m>>2]|0)+((c[Jh>>2]|0)*48<<2);while(1){if((c[Hh>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[Uf>>2]=+g[c[l>>2]>>2];g[Ib>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[_d>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[za>>2]=+g[(c[m>>2]|0)+32>>2];g[Rc>>2]=+g[(c[m>>2]|0)+36>>2];g[hf>>2]=+g[za>>2]*+g[Ib>>2]+ +g[Rc>>2]*+g[_d>>2];g[wb>>2]=+g[za>>2]*+g[_d>>2]-+g[Rc>>2]*+g[Ib>>2];g[Hg>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[hh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[rg>>2]=+g[(c[m>>2]|0)+152>>2];g[Ig>>2]=+g[(c[m>>2]|0)+156>>2];g[ih>>2]=+g[rg>>2]*+g[Hg>>2]+ +g[Ig>>2]*+g[hh>>2];g[xb>>2]=+g[rg>>2]*+g[hh>>2]-+g[Ig>>2]*+g[Hg>>2];g[jh>>2]=+g[hf>>2]+ +g[ih>>2];g[rf>>2]=+g[wb>>2]+ +g[xb>>2];g[lh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[nh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[kh>>2]=+g[(c[m>>2]|0)+72>>2];g[mh>>2]=+g[(c[m>>2]|0)+76>>2];g[oh>>2]=+g[kh>>2]*+g[lh>>2]+ +g[mh>>2]*+g[nh>>2];g[zb>>2]=+g[kh>>2]*+g[nh>>2]-+g[mh>>2]*+g[lh>>2];g[qh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[sh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[ph>>2]=+g[(c[m>>2]|0)+112>>2];g[rh>>2]=+g[(c[m>>2]|0)+116>>2];g[th>>2]=+g[ph>>2]*+g[qh>>2]+ +g[rh>>2]*+g[sh>>2];g[Ab>>2]=+g[ph>>2]*+g[sh>>2]-+g[rh>>2]*+g[qh>>2];g[uh>>2]=+g[oh>>2]+ +g[th>>2];g[sf>>2]=+g[zb>>2]+ +g[Ab>>2];g[tb>>2]=(+g[jh>>2]-+g[uh>>2])*.55901700258255;g[fg>>2]=(+g[rf>>2]-+g[sf>>2])*.55901700258255;g[vh>>2]=+g[jh>>2]+ +g[uh>>2];g[ub>>2]=+g[q>>2]-+g[vh>>2]*.25;g[Tf>>2]=+g[rf>>2]+ +g[sf>>2];g[gg>>2]=+g[Uf>>2]-+g[Tf>>2]*.25;g[ig>>2]=+g[hf>>2]-+g[ih>>2];g[jg>>2]=+g[oh>>2]-+g[th>>2];g[kg>>2]=+g[ig>>2]*.9510565400123596+ +g[jg>>2]*.5877852439880371;g[Lf>>2]=+g[jg>>2]*.9510565400123596-+g[ig>>2]*.5877852439880371;g[yb>>2]=+g[wb>>2]-+g[xb>>2];g[Bb>>2]=+g[zb>>2]-+g[Ab>>2];g[Cb>>2]=+g[yb>>2]*.9510565400123596+ +g[Bb>>2]*.5877852439880371;g[id>>2]=+g[Bb>>2]*.9510565400123596-+g[yb>>2]*.5877852439880371;g[S>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[U>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[R>>2]=+g[(c[m>>2]|0)+16>>2];g[T>>2]=+g[(c[m>>2]|0)+20>>2];g[V>>2]=+g[R>>2]*+g[S>>2]+ +g[T>>2]*+g[U>>2];g[jc>>2]=+g[R>>2]*+g[U>>2]-+g[T>>2]*+g[S>>2];g[X>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Z>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[W>>2]=+g[(c[m>>2]|0)+56>>2];g[Y>>2]=+g[(c[m>>2]|0)+60>>2];g[_>>2]=+g[W>>2]*+g[X>>2]+ +g[Y>>2]*+g[Z>>2];g[_b>>2]=+g[W>>2]*+g[Z>>2]-+g[Y>>2]*+g[X>>2];g[kb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[mb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[jb>>2]=+g[(c[m>>2]|0)+136>>2];g[lb>>2]=+g[(c[m>>2]|0)+140>>2];g[nb>>2]=+g[jb>>2]*+g[kb>>2]+ +g[lb>>2]*+g[mb>>2];g[cc>>2]=+g[jb>>2]*+g[mb>>2]-+g[lb>>2]*+g[kb>>2];g[Ba>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[Da>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[Aa>>2]=+g[(c[m>>2]|0)+176>>2];g[Ca>>2]=+g[(c[m>>2]|0)+180>>2];g[Ea>>2]=+g[Aa>>2]*+g[Ba>>2]+ +g[Ca>>2]*+g[Da>>2];g[$b>>2]=+g[Aa>>2]*+g[Da>>2]-+g[Ca>>2]*+g[Ba>>2];g[Ha>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Ja>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Ga>>2]=+g[(c[m>>2]|0)+96>>2];g[Ia>>2]=+g[(c[m>>2]|0)+100>>2];g[ib>>2]=+g[Ga>>2]*+g[Ha>>2]+ +g[Ia>>2]*+g[Ja>>2];g[bc>>2]=+g[Ga>>2]*+g[Ja>>2]-+g[Ia>>2]*+g[Ha>>2];g[ac>>2]=+g[_b>>2]-+g[$b>>2];g[dc>>2]=+g[bc>>2]-+g[cc>>2];g[oc>>2]=+g[ib>>2]-+g[nb>>2];g[nc>>2]=+g[_>>2]-+g[Ea>>2];g[gc>>2]=+g[_b>>2]+ +g[$b>>2];g[hc>>2]=+g[bc>>2]+ +g[cc>>2];g[kc>>2]=+g[gc>>2]+ +g[hc>>2];g[Fa>>2]=+g[_>>2]+ +g[Ea>>2];g[ob>>2]=+g[ib>>2]+ +g[nb>>2];g[pb>>2]=+g[Fa>>2]+ +g[ob>>2];g[qb>>2]=+g[V>>2]+ +g[pb>>2];g[jf>>2]=+g[jc>>2]+ +g[kc>>2];g[ec>>2]=+g[ac>>2]*.9510565400123596+ +g[dc>>2]*.5877852439880371;g[ge>>2]=+g[dc>>2]*.9510565400123596-+g[ac>>2]*.5877852439880371;g[Xb>>2]=(+g[Fa>>2]-+g[ob>>2])*.55901700258255;g[Yb>>2]=+g[V>>2]-+g[pb>>2]*.25;g[Zb>>2]=+g[Xb>>2]+ +g[Yb>>2];g[fe>>2]=+g[Yb>>2]-+g[Xb>>2];g[fc>>2]=+g[Zb>>2]+ +g[ec>>2];g[me>>2]=+g[fe>>2]+ +g[ge>>2];g[Rd>>2]=+g[Zb>>2]-+g[ec>>2];g[he>>2]=+g[fe>>2]-+g[ge>>2];g[pc>>2]=+g[nc>>2]*.9510565400123596+ +g[oc>>2]*.5877852439880371;g[ie>>2]=+g[oc>>2]*.9510565400123596-+g[nc>>2]*.5877852439880371;g[ic>>2]=(+g[gc>>2]-+g[hc>>2])*.55901700258255;g[lc>>2]=+g[jc>>2]-+g[kc>>2]*.25;g[mc>>2]=+g[ic>>2]+ +g[lc>>2];g[Je>>2]=+g[lc>>2]-+g[ic>>2];g[qc>>2]=+g[mc>>2]-+g[pc>>2];g[le>>2]=+g[Je>>2]-+g[ie>>2];g[Qd>>2]=+g[pc>>2]+ +g[mc>>2];g[Ke>>2]=+g[ie>>2]+ +g[Je>>2];g[yh>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[Ah>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[xh>>2]=+g[c[m>>2]>>2];g[zh>>2]=+g[(c[m>>2]|0)+4>>2];g[Bh>>2]=+g[xh>>2]*+g[yh>>2]+ +g[zh>>2]*+g[Ah>>2];g[Ua>>2]=+g[xh>>2]*+g[Ah>>2]-+g[zh>>2]*+g[yh>>2];g[Dh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Fh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Ch>>2]=+g[(c[m>>2]|0)+40>>2];g[Eh>>2]=+g[(c[m>>2]|0)+44>>2];g[Gh>>2]=+g[Ch>>2]*+g[Dh>>2]+ +g[Eh>>2]*+g[Fh>>2];g[Hb>>2]=+g[Ch>>2]*+g[Fh>>2]-+g[Eh>>2]*+g[Dh>>2];g[Vg>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[Xg>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[Ug>>2]=+g[(c[m>>2]|0)+120>>2];g[Wg>>2]=+g[(c[m>>2]|0)+124>>2];g[Yg>>2]=+g[Ug>>2]*+g[Vg>>2]+ +g[Wg>>2]*+g[Xg>>2];g[Na>>2]=+g[Ug>>2]*+g[Xg>>2]-+g[Wg>>2]*+g[Vg>>2];g[Kg>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[Mg>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[Jg>>2]=+g[(c[m>>2]|0)+160>>2];g[Lg>>2]=+g[(c[m>>2]|0)+164>>2];g[Ng>>2]=+g[Jg>>2]*+g[Kg>>2]+ +g[Lg>>2]*+g[Mg>>2];g[Ka>>2]=+g[Jg>>2]*+g[Mg>>2]-+g[Lg>>2]*+g[Kg>>2];g[Qg>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Sg>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Pg>>2]=+g[(c[m>>2]|0)+80>>2];g[Rg>>2]=+g[(c[m>>2]|0)+84>>2];g[Tg>>2]=+g[Pg>>2]*+g[Qg>>2]+ +g[Rg>>2]*+g[Sg>>2];g[Ma>>2]=+g[Pg>>2]*+g[Sg>>2]-+g[Rg>>2]*+g[Qg>>2];g[La>>2]=+g[Hb>>2]-+g[Ka>>2];g[Oa>>2]=+g[Ma>>2]-+g[Na>>2];g[Za>>2]=+g[Tg>>2]-+g[Yg>>2];g[Ya>>2]=+g[Gh>>2]-+g[Ng>>2];g[Ra>>2]=+g[Hb>>2]+ +g[Ka>>2];g[Sa>>2]=+g[Ma>>2]+ +g[Na>>2];g[Va>>2]=+g[Ra>>2]+ +g[Sa>>2];g[Og>>2]=+g[Gh>>2]+ +g[Ng>>2];g[Zg>>2]=+g[Tg>>2]+ +g[Yg>>2];g[_g>>2]=+g[Og>>2]+ +g[Zg>>2];g[$g>>2]=+g[Bh>>2]+ +g[_g>>2];g[Fe>>2]=+g[Ua>>2]+ +g[Va>>2];g[Pa>>2]=+g[La>>2]*.9510565400123596+ +g[Oa>>2]*.5877852439880371;g[ld>>2]=+g[Oa>>2]*.9510565400123596-+g[La>>2]*.5877852439880371;g[Eb>>2]=(+g[Og>>2]-+g[Zg>>2])*.55901700258255;g[Fb>>2]=+g[Bh>>2]-+g[_g>>2]*.25;g[Gb>>2]=+g[Eb>>2]+ +g[Fb>>2];g[kd>>2]=+g[Fb>>2]-+g[Eb>>2];g[Qa>>2]=+g[Gb>>2]+ +g[Pa>>2];g[$e>>2]=+g[kd>>2]+ +g[ld>>2];g[Gd>>2]=+g[Gb>>2]-+g[Pa>>2];g[md>>2]=+g[kd>>2]-+g[ld>>2];g[_a>>2]=+g[Ya>>2]*.9510565400123596+ +g[Za>>2]*.5877852439880371;g[nd>>2]=+g[Za>>2]*.9510565400123596-+g[Ya>>2]*.5877852439880371;g[Ta>>2]=(+g[Ra>>2]-+g[Sa>>2])*.55901700258255;g[Wa>>2]=+g[Ua>>2]-+g[Va>>2]*.25;g[Xa>>2]=+g[Ta>>2]+ +g[Wa>>2];g[od>>2]=+g[Wa>>2]-+g[Ta>>2];g[$a>>2]=+g[Xa>>2]-+g[_a>>2];g[af>>2]=+g[od>>2]-+g[nd>>2];g[Hd>>2]=+g[_a>>2]+ +g[Xa>>2];g[pd>>2]=+g[nd>>2]+ +g[od>>2];g[bh>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[dh>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ah>>2]=+g[(c[m>>2]|0)+24>>2];g[ch>>2]=+g[(c[m>>2]|0)+28>>2];g[eh>>2]=+g[ah>>2]*+g[bh>>2]+ +g[ch>>2]*+g[dh>>2];g[Qb>>2]=+g[ah>>2]*+g[dh>>2]-+g[ch>>2]*+g[bh>>2];g[gh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[s>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[fh>>2]=+g[(c[m>>2]|0)+64>>2];g[r>>2]=+g[(c[m>>2]|0)+68>>2];g[t>>2]=+g[fh>>2]*+g[gh>>2]+ +g[r>>2]*+g[s>>2];g[eb>>2]=+g[fh>>2]*+g[s>>2]-+g[r>>2]*+g[gh>>2];g[ea>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[ga>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[da>>2]=+g[(c[m>>2]|0)+144>>2];g[fa>>2]=+g[(c[m>>2]|0)+148>>2];g[ha>>2]=+g[da>>2]*+g[ea>>2]+ +g[fa>>2]*+g[ga>>2];g[Jb>>2]=+g[da>>2]*+g[ga>>2]-+g[fa>>2]*+g[ea>>2];g[v>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[x>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[u>>2]=+g[(c[m>>2]|0)+184>>2];g[w>>2]=+g[(c[m>>2]|0)+188>>2];g[y>>2]=+g[u>>2]*+g[v>>2]+ +g[w>>2]*+g[x>>2];g[fb>>2]=+g[u>>2]*+g[x>>2]-+g[w>>2]*+g[v>>2];g[$>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[ba>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[A>>2]=+g[(c[m>>2]|0)+104>>2];g[aa>>2]=+g[(c[m>>2]|0)+108>>2];g[ca>>2]=+g[A>>2]*+g[$>>2]+ +g[aa>>2]*+g[ba>>2];g[hb>>2]=+g[A>>2]*+g[ba>>2]-+g[aa>>2]*+g[$>>2];g[gb>>2]=+g[eb>>2]-+g[fb>>2];g[Kb>>2]=+g[hb>>2]-+g[Jb>>2];g[tc>>2]=+g[ca>>2]-+g[ha>>2];g[sc>>2]=+g[t>>2]-+g[y>>2];g[Nb>>2]=+g[eb>>2]+ +g[fb>>2];g[Ob>>2]=+g[hb>>2]+ +g[Jb>>2];g[Rb>>2]=+g[Nb>>2]+ +g[Ob>>2];g[z>>2]=+g[t>>2]+ +g[y>>2];g[ia>>2]=+g[ca>>2]+ +g[ha>>2];g[ja>>2]=+g[z>>2]+ +g[ia>>2];g[ka>>2]=+g[eh>>2]+ +g[ja>>2];g[Ge>>2]=+g[Qb>>2]+ +g[Rb>>2];g[Lb>>2]=+g[gb>>2]*.9510565400123596+ +g[Kb>>2]*.5877852439880371;g[vd>>2]=+g[Kb>>2]*.9510565400123596-+g[gb>>2]*.5877852439880371;g[bb>>2]=(+g[z>>2]-+g[ia>>2])*.55901700258255;g[cb>>2]=+g[eh>>2]-+g[ja>>2]*.25;g[db>>2]=+g[bb>>2]+ +g[cb>>2];g[ud>>2]=+g[cb>>2]-+g[bb>>2];g[Mb>>2]=+g[db>>2]+ +g[Lb>>2];g[df>>2]=+g[ud>>2]+ +g[vd>>2];g[Jd>>2]=+g[db>>2]-+g[Lb>>2];g[wd>>2]=+g[ud>>2]-+g[vd>>2];g[uc>>2]=+g[sc>>2]*.9510565400123596+ +g[tc>>2]*.5877852439880371;g[rd>>2]=+g[tc>>2]*.9510565400123596-+g[sc>>2]*.5877852439880371;g[Pb>>2]=(+g[Nb>>2]-+g[Ob>>2])*.55901700258255;g[Sb>>2]=+g[Qb>>2]-+g[Rb>>2]*.25;g[rc>>2]=+g[Pb>>2]+ +g[Sb>>2];g[sd>>2]=+g[Sb>>2]-+g[Pb>>2];g[vc>>2]=+g[rc>>2]-+g[uc>>2];g[cf>>2]=+g[sd>>2]-+g[rd>>2];g[Kd>>2]=+g[uc>>2]+ +g[rc>>2];g[td>>2]=+g[rd>>2]+ +g[sd>>2];g[na>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[pa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[ma>>2]=+g[(c[m>>2]|0)+8>>2];g[oa>>2]=+g[(c[m>>2]|0)+12>>2];g[qa>>2]=+g[ma>>2]*+g[na>>2]+ +g[oa>>2]*+g[pa>>2];g[Mc>>2]=+g[ma>>2]*+g[pa>>2]-+g[oa>>2]*+g[na>>2];g[sa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[ua>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[ra>>2]=+g[(c[m>>2]|0)+48>>2];g[ta>>2]=+g[(c[m>>2]|0)+52>>2];g[va>>2]=+g[ra>>2]*+g[sa>>2]+ +g[ta>>2]*+g[ua>>2];g[Bc>>2]=+g[ra>>2]*+g[ua>>2]-+g[ta>>2]*+g[sa>>2];g[K>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[M>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[J>>2]=+g[(c[m>>2]|0)+128>>2];g[L>>2]=+g[(c[m>>2]|0)+132>>2];g[N>>2]=+g[J>>2]*+g[K>>2]+ +g[L>>2]*+g[M>>2];g[Fc>>2]=+g[J>>2]*+g[M>>2]-+g[L>>2]*+g[K>>2];g[xa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[B>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[wa>>2]=+g[(c[m>>2]|0)+168>>2];g[ya>>2]=+g[(c[m>>2]|0)+172>>2];g[C>>2]=+g[wa>>2]*+g[xa>>2]+ +g[ya>>2]*+g[B>>2];g[Cc>>2]=+g[wa>>2]*+g[B>>2]-+g[ya>>2]*+g[xa>>2];g[F>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[H>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[E>>2]=+g[(c[m>>2]|0)+88>>2];g[G>>2]=+g[(c[m>>2]|0)+92>>2];g[I>>2]=+g[E>>2]*+g[F>>2]+ +g[G>>2]*+g[H>>2];g[Ec>>2]=+g[E>>2]*+g[H>>2]-+g[G>>2]*+g[F>>2];g[Dc>>2]=+g[Bc>>2]-+g[Cc>>2];g[Gc>>2]=+g[Ec>>2]-+g[Fc>>2];g[Tb>>2]=+g[I>>2]-+g[N>>2];g[Qc>>2]=+g[va>>2]-+g[C>>2];g[Jc>>2]=+g[Bc>>2]+ +g[Cc>>2];g[Kc>>2]=+g[Ec>>2]+ +g[Fc>>2];g[Nc>>2]=+g[Jc>>2]+ +g[Kc>>2];g[D>>2]=+g[va>>2]+ +g[C>>2];g[O>>2]=+g[I>>2]+ +g[N>>2];g[P>>2]=+g[D>>2]+ +g[O>>2];g[Q>>2]=+g[qa>>2]+ +g[P>>2];g[Ie>>2]=+g[Mc>>2]+ +g[Nc>>2];g[Hc>>2]=+g[Dc>>2]*.9510565400123596+ +g[Gc>>2]*.5877852439880371;g[$d>>2]=+g[Gc>>2]*.9510565400123596-+g[Dc>>2]*.5877852439880371;g[yc>>2]=(+g[D>>2]-+g[O>>2])*.55901700258255;g[zc>>2]=+g[qa>>2]-+g[P>>2]*.25;g[Ac>>2]=+g[yc>>2]+ +g[zc>>2];g[zd>>2]=+g[zc>>2]-+g[yc>>2];g[Ic>>2]=+g[Ac>>2]+ +g[Hc>>2];g[gf>>2]=+g[zd>>2]+ +g[$d>>2];g[Od>>2]=+g[Ac>>2]-+g[Hc>>2];g[ae>>2]=+g[zd>>2]-+g[$d>>2];g[Ub>>2]=+g[Qc>>2]*.9510565400123596+ +g[Tb>>2]*.5877852439880371;g[be>>2]=+g[Tb>>2]*.9510565400123596-+g[Qc>>2]*.5877852439880371;g[Lc>>2]=(+g[Jc>>2]-+g[Kc>>2])*.55901700258255;g[Oc>>2]=+g[Mc>>2]-+g[Nc>>2]*.25;g[Pc>>2]=+g[Lc>>2]+ +g[Oc>>2];g[ce>>2]=+g[Oc>>2]-+g[Lc>>2];g[Vb>>2]=+g[Pc>>2]-+g[Ub>>2];g[je>>2]=+g[ce>>2]-+g[be>>2];g[Nd>>2]=+g[Ub>>2]+ +g[Pc>>2];g[de>>2]=+g[be>>2]+ +g[ce>>2];g[He>>2]=+g[Fe>>2]-+g[Ge>>2];g[kf>>2]=+g[Ie>>2]-+g[jf>>2];g[lf>>2]=+g[He>>2]*.9510565400123596+ +g[kf>>2]*.5877852439880371;g[nf>>2]=+g[kf>>2]*.9510565400123596-+g[He>>2]*.5877852439880371;g[wh>>2]=+g[q>>2]+ +g[vh>>2];g[la>>2]=+g[$g>>2]+ +g[ka>>2];g[rb>>2]=+g[Q>>2]+ +g[qb>>2];g[sb>>2]=+g[la>>2]+ +g[rb>>2];g[Ce>>2]=(+g[la>>2]-+g[rb>>2])*.55901700258255;g[De>>2]=+g[wh>>2]-+g[sb>>2]*.25;g[c[k>>2]>>2]=+g[wh>>2]+ +g[sb>>2];g[mf>>2]=+g[De>>2]-+g[Ce>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[mf>>2]-+g[nf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[mf>>2]+ +g[nf>>2];g[Ee>>2]=+g[Ce>>2]+ +g[De>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[Ee>>2]-+g[lf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Ee>>2]+ +g[lf>>2];g[Zf>>2]=+g[$g>>2]-+g[ka>>2];g[_f>>2]=+g[Q>>2]-+g[qb>>2];g[$f>>2]=+g[Zf>>2]*.9510565400123596+ +g[_f>>2]*.5877852439880371;g[ag>>2]=+g[_f>>2]*.9510565400123596-+g[Zf>>2]*.5877852439880371;g[Vf>>2]=+g[Tf>>2]+ +g[Uf>>2];g[of>>2]=+g[Fe>>2]+ +g[Ge>>2];g[pf>>2]=+g[Ie>>2]+ +g[jf>>2];g[qf>>2]=+g[of>>2]+ +g[pf>>2];g[Wf>>2]=(+g[of>>2]-+g[pf>>2])*.55901700258255;g[Xf>>2]=+g[Vf>>2]-+g[qf>>2]*.25;g[c[l>>2]>>2]=+g[qf>>2]+ +g[Vf>>2];g[bg>>2]=+g[Xf>>2]-+g[Wf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[ag>>2]+ +g[bg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[bg>>2]-+g[ag>>2];g[Yf>>2]=+g[Wf>>2]+ +g[Xf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Yf>>2]-+g[$f>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[$f>>2]+ +g[Yf>>2];g[vb>>2]=+g[tb>>2]+ +g[ub>>2];g[Db>>2]=+g[vb>>2]+ +g[Cb>>2];g[Fd>>2]=+g[vb>>2]-+g[Cb>>2];g[hg>>2]=+g[fg>>2]+ +g[gg>>2];g[lg>>2]=+g[hg>>2]-+g[kg>>2];g[zf>>2]=+g[kg>>2]+ +g[hg>>2];g[ab>>2]=+g[Qa>>2]*.9685831665992737+ +g[$a>>2]*.24868988990783691;g[wc>>2]=+g[Mb>>2]*.5358268022537231+ +g[vc>>2]*.8443279266357422;g[xc>>2]=+g[ab>>2]+ +g[wc>>2];g[Wb>>2]=+g[Ic>>2]*.8763066530227661+ +g[Vb>>2]*.4817536771297455;g[Sc>>2]=+g[fc>>2]*.728968620300293+ +g[qc>>2]*.6845471262931824;g[Tc>>2]=+g[Wb>>2]+ +g[Sc>>2];g[Uc>>2]=+g[xc>>2]+ +g[Tc>>2];g[qg>>2]=+g[Wb>>2]-+g[Sc>>2];g[Vc>>2]=(+g[xc>>2]-+g[Tc>>2])*.55901700258255;g[pg>>2]=+g[ab>>2]-+g[wc>>2];g[Yd>>2]=+g[Hd>>2]*.5358268022537231-+g[Gd>>2]*.8443279266357422;g[Zd>>2]=+g[Jd>>2]*.7705132365226746-+g[Kd>>2]*.6374239921569824;g[wf>>2]=+g[Yd>>2]+ +g[Zd>>2];g[bd>>2]=+g[Rd>>2]*.12533323466777802+ +g[Qd>>2]*.9921147227287292;g[cd>>2]=+g[Od>>2]*.9048270583152771+ +g[Nd>>2]*.4257792830467224;g[xf>>2]=+g[cd>>2]+ +g[bd>>2];g[ad>>2]=+g[Yd>>2]-+g[Zd>>2];g[Af>>2]=(+g[wf>>2]+ +g[xf>>2])*.55901700258255;g[dd>>2]=+g[bd>>2]-+g[cd>>2];g[yf>>2]=+g[wf>>2]-+g[xf>>2];g[Id>>2]=+g[Gd>>2]*.5358268022537231+ +g[Hd>>2]*.8443279266357422;g[Ld>>2]=+g[Jd>>2]*.6374239921569824+ +g[Kd>>2]*.7705132365226746;g[Md>>2]=+g[Id>>2]-+g[Ld>>2];g[Pd>>2]=+g[Nd>>2]*.9048270583152771-+g[Od>>2]*.4257792830467224;g[Sd>>2]=+g[Qd>>2]*.12533323466777802-+g[Rd>>2]*.9921147227287292;g[Td>>2]=+g[Pd>>2]+ +g[Sd>>2];g[Ud>>2]=+g[Md>>2]+ +g[Td>>2];g[Ef>>2]=+g[Pd>>2]-+g[Sd>>2];g[Vd>>2]=(+g[Md>>2]-+g[Td>>2])*.55901700258255;g[Df>>2]=+g[Id>>2]+ +g[Ld>>2];g[Yc>>2]=+g[$a>>2]*.9685831665992737-+g[Qa>>2]*.24868988990783691;g[Zc>>2]=+g[vc>>2]*.5358268022537231-+g[Mb>>2]*.8443279266357422;g[cg>>2]=+g[Yc>>2]+ +g[Zc>>2];g[$c>>2]=+g[Vb>>2]*.8763066530227661-+g[Ic>>2]*.4817536771297455;g[Ad>>2]=+g[qc>>2]*.728968620300293-+g[fc>>2]*.6845471262931824;g[dg>>2]=+g[$c>>2]+ +g[Ad>>2];g[_c>>2]=+g[Yc>>2]-+g[Zc>>2];g[mg>>2]=(+g[cg>>2]-+g[dg>>2])*.55901700258255;g[Bd>>2]=+g[$c>>2]-+g[Ad>>2];g[eg>>2]=+g[cg>>2]+ +g[dg>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[Db>>2]+ +g[Uc>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[eg>>2]+ +g[lg>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Fd>>2]+ +g[Ud>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[yf>>2]+ +g[zf>>2];g[Cd>>2]=+g[_c>>2]*.9510565400123596+ +g[Bd>>2]*.5877852439880371;g[Ed>>2]=+g[Bd>>2]*.9510565400123596-+g[_c>>2]*.5877852439880371;g[Wc>>2]=+g[Db>>2]-+g[Uc>>2]*.25;g[Xc>>2]=+g[Vc>>2]+ +g[Wc>>2];g[Dd>>2]=+g[Wc>>2]-+g[Vc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[Xc>>2]-+g[Cd>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Dd>>2]+ +g[Ed>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Xc>>2]+ +g[Cd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Dd>>2]-+g[Ed>>2];g[tf>>2]=+g[pg>>2]*.9510565400123596+ +g[qg>>2]*.5877852439880371;g[uf>>2]=+g[qg>>2]*.9510565400123596-+g[pg>>2]*.5877852439880371;g[ng>>2]=+g[lg>>2]-+g[eg>>2]*.25;g[og>>2]=+g[mg>>2]+ +g[ng>>2];g[vf>>2]=+g[ng>>2]-+g[mg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[og>>2]-+g[tf>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[vf>>2]-+g[uf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[tf>>2]+ +g[og>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[uf>>2]+ +g[vf>>2];g[ed>>2]=+g[ad>>2]*.9510565400123596+ +g[dd>>2]*.5877852439880371;g[gd>>2]=+g[dd>>2]*.9510565400123596-+g[ad>>2]*.5877852439880371;g[Wd>>2]=+g[Fd>>2]-+g[Ud>>2]*.25;g[Xd>>2]=+g[Vd>>2]+ +g[Wd>>2];g[fd>>2]=+g[Wd>>2]-+g[Vd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Xd>>2]-+g[ed>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[fd>>2]+ +g[gd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Xd>>2]+ +g[ed>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[fd>>2]-+g[gd>>2];g[Ff>>2]=+g[Df>>2]*.9510565400123596+ +g[Ef>>2]*.5877852439880371;g[Gf>>2]=+g[Ef>>2]*.9510565400123596-+g[Df>>2]*.5877852439880371;g[Bf>>2]=+g[zf>>2]-+g[yf>>2]*.25;g[Cf>>2]=+g[Af>>2]+ +g[Bf>>2];g[Hf>>2]=+g[Bf>>2]-+g[Af>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Cf>>2]-+g[Ff>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Hf>>2]-+g[Gf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Ff>>2]+ +g[Cf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Gf>>2]+ +g[Hf>>2];g[hd>>2]=+g[ub>>2]-+g[tb>>2];g[jd>>2]=+g[hd>>2]-+g[id>>2];g[_e>>2]=+g[hd>>2]+ +g[id>>2];g[Mf>>2]=+g[gg>>2]-+g[fg>>2];g[Nf>>2]=+g[Lf>>2]+ +g[Mf>>2];g[yg>>2]=+g[Mf>>2]-+g[Lf>>2];g[qd>>2]=+g[md>>2]*.8763066530227661+ +g[pd>>2]*.4817536771297455;g[xd>>2]=+g[td>>2]*.9048270583152771-+g[wd>>2]*.4257792830467224;g[yd>>2]=+g[qd>>2]+ +g[xd>>2];g[ee>>2]=+g[ae>>2]*.5358268022537231+ +g[de>>2]*.8443279266357422;g[Le>>2]=+g[he>>2]*.06279052048921585+ +g[Ke>>2]*.9980267286300659;g[Me>>2]=+g[ee>>2]+ +g[Le>>2];g[Ne>>2]=+g[yd>>2]+ +g[Me>>2];g[Sf>>2]=+g[ee>>2]-+g[Le>>2];g[Oe>>2]=(+g[yd>>2]-+g[Me>>2])*.55901700258255;g[Rf>>2]=+g[qd>>2]-+g[xd>>2];g[te>>2]=+g[af>>2]*.728968620300293-+g[$e>>2]*.6845471262931824;g[ue>>2]=+g[df>>2]*.12533323466777802+ +g[cf>>2]*.9921147227287292;g[vg>>2]=+g[te>>2]-+g[ue>>2];g[we>>2]=+g[je>>2]*.06279052048921585-+g[gf>>2]*.9980267286300659;g[xe>>2]=+g[me>>2]*.7705132365226746+ +g[le>>2]*.6374239921569824;g[wg>>2]=+g[we>>2]-+g[xe>>2];g[ve>>2]=+g[te>>2]+ +g[ue>>2];g[zg>>2]=(+g[vg>>2]-+g[wg>>2])*.55901700258255;g[ye>>2]=+g[we>>2]+ +g[xe>>2];g[xg>>2]=+g[vg>>2]+ +g[wg>>2];g[bf>>2]=+g[$e>>2]*.728968620300293+ +g[af>>2]*.6845471262931824;g[ef>>2]=+g[cf>>2]*.12533323466777802-+g[df>>2]*.9921147227287292;g[ff>>2]=+g[bf>>2]+ +g[ef>>2];g[ke>>2]=+g[gf>>2]*.06279052048921585+ +g[je>>2]*.9980267286300659;g[ne>>2]=+g[le>>2]*.7705132365226746-+g[me>>2]*.6374239921569824;g[oe>>2]=+g[ke>>2]+ +g[ne>>2];g[pe>>2]=+g[ff>>2]+ +g[oe>>2];g[Dg>>2]=+g[ke>>2]-+g[ne>>2];g[qe>>2]=(+g[ff>>2]-+g[oe>>2])*.55901700258255;g[Cg>>2]=+g[bf>>2]-+g[ef>>2];g[Re>>2]=+g[pd>>2]*.8763066530227661-+g[md>>2]*.4817536771297455;g[Se>>2]=+g[wd>>2]*.9048270583152771+ +g[td>>2]*.4257792830467224;g[If>>2]=+g[Re>>2]-+g[Se>>2];g[Ue>>2]=+g[de>>2]*.5358268022537231-+g[ae>>2]*.8443279266357422;g[Ve>>2]=+g[Ke>>2]*.06279052048921585-+g[he>>2]*.9980267286300659;g[Jf>>2]=+g[Ue>>2]+ +g[Ve>>2];g[Te>>2]=+g[Re>>2]+ +g[Se>>2];g[Of>>2]=(+g[If>>2]-+g[Jf>>2])*.55901700258255;g[We>>2]=+g[Ue>>2]-+g[Ve>>2];g[Kf>>2]=+g[If>>2]+ +g[Jf>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[jd>>2]+ +g[Ne>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Kf>>2]+ +g[Nf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[_e>>2]+ +g[pe>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[xg>>2]+ +g[yg>>2];g[Xe>>2]=+g[Te>>2]*.9510565400123596+ +g[We>>2]*.5877852439880371;g[Ze>>2]=+g[We>>2]*.9510565400123596-+g[Te>>2]*.5877852439880371;g[Pe>>2]=+g[jd>>2]-+g[Ne>>2]*.25;g[Qe>>2]=+g[Oe>>2]+ +g[Pe>>2];g[Ye>>2]=+g[Pe>>2]-+g[Oe>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[Qe>>2]-+g[Xe>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[Ye>>2]+ +g[Ze>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Qe>>2]+ +g[Xe>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Ye>>2]-+g[Ze>>2];g[sg>>2]=+g[Rf>>2]*.9510565400123596+ +g[Sf>>2]*.5877852439880371;g[tg>>2]=+g[Sf>>2]*.9510565400123596-+g[Rf>>2]*.5877852439880371;g[Pf>>2]=+g[Nf>>2]-+g[Kf>>2]*.25;g[Qf>>2]=+g[Of>>2]+ +g[Pf>>2];g[ug>>2]=+g[Pf>>2]-+g[Of>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Qf>>2]-+g[sg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[ug>>2]-+g[tg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[sg>>2]+ +g[Qf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[tg>>2]+ +g[ug>>2];g[ze>>2]=+g[ve>>2]*.9510565400123596+ +g[ye>>2]*.5877852439880371;g[Be>>2]=+g[ye>>2]*.9510565400123596-+g[ve>>2]*.5877852439880371;g[re>>2]=+g[_e>>2]-+g[pe>>2]*.25;g[se>>2]=+g[qe>>2]+ +g[re>>2];g[Ae>>2]=+g[re>>2]-+g[qe>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[se>>2]-+g[ze>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[Ae>>2]+ +g[Be>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[se>>2]+ +g[ze>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Ae>>2]-+g[Be>>2];g[Eg>>2]=+g[Cg>>2]*.9510565400123596+ +g[Dg>>2]*.5877852439880371;g[Fg>>2]=+g[Dg>>2]*.9510565400123596-+g[Cg>>2]*.5877852439880371;g[Ag>>2]=+g[yg>>2]-+g[xg>>2]*.25;g[Bg>>2]=+g[zg>>2]+ +g[Ag>>2];g[Gg>>2]=+g[Ag>>2]-+g[zg>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Bg>>2]-+g[Eg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[Gg>>2]-+g[Fg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[Eg>>2]+ +g[Bg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Fg>>2]+ +g[Gg>>2];c[Hh>>2]=(c[Hh>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+192;c[n>>2]=c[n>>2]^c[2998]}i=Ih;return}function lj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,7,1992);i=b;return}function mj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0;z=i;i=i+64|0;k=z+60|0;l=z+56|0;m=z+52|0;n=z+48|0;A=z+44|0;o=z+40|0;p=z+36|0;y=z+32|0;q=z+28|0;x=z+24|0;v=z+20|0;w=z+16|0;s=z+12|0;u=z+8|0;r=z+4|0;t=z;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[A>>2]=f;c[o>>2]=h;c[p>>2]=j;c[y>>2]=c[A>>2];c[m>>2]=(c[m>>2]|0)+(c[A>>2]<<1<<2);while(1){if((c[y>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[x>>2]=+g[c[l>>2]>>2];g[s>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[u>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[r>>2]=+g[c[m>>2]>>2];g[t>>2]=+g[(c[m>>2]|0)+4>>2];g[v>>2]=+g[r>>2]*+g[s>>2]+ +g[t>>2]*+g[u>>2];g[w>>2]=+g[r>>2]*+g[u>>2]-+g[t>>2]*+g[s>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[q>>2]-+g[v>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[x>>2]-+g[w>>2];g[c[k>>2]>>2]=+g[q>>2]+ +g[v>>2];g[c[l>>2]>>2]=+g[w>>2]+ +g[x>>2];c[y>>2]=(c[y>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+8}i=z;return}function nj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,8,2056);i=b;return}function oj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0;Ci=i;i=i+2048|0;k=Ci+2040|0;l=Ci+2036|0;m=Ci+2032|0;n=Ci+2028|0;Di=Ci+2024|0;o=Ci+2020|0;p=Ci+2016|0;Bi=Ci+1984|0;ki=Ci+1980|0;oe=Ci+1976|0;Eg=Ci+1972|0;Sg=Ci+1968|0;Ob=Ci+1964|0;td=Ci+1960|0;Ag=Ci+1956|0;mh=Ci+1952|0;R=Ci+1948|0;kf=Ci+1944|0;He=Ci+1940|0;Cf=Ci+1936|0;bc=Ci+1932|0;ie=Ci+1928|0;Tc=Ci+1924|0;fe=Ci+1920|0;La=Ci+1916|0;qf=Ci+1912|0;Xf=Ci+1908|0;Hf=Ci+1904|0;Bd=Ci+1900|0;Me=Ci+1896|0;Sd=Ci+1892|0;Pe=Ci+1888|0;Jh=Ci+1884|0;lh=Ci+1880|0;re=Ci+1876|0;vg=Ci+1872|0;rc=Ci+1868|0;ud=Ci+1864|0;wc=Ci+1860|0;vd=Ci+1856|0;v=Ci+1852|0;we=Ci+1848|0;ve=Ci+1844|0;xf=Ci+1840|0;Dc=Ci+1836|0;yd=Ci+1832|0;Ic=Ci+1828|0;zd=Ci+1824|0;qa=Ci+1820|0;ye=Ci+1816|0;Be=Ci+1812|0;yf=Ci+1808|0;Oc=Ci+1804|0;ae=Ci+1800|0;Vb=Ci+1796|0;be=Ci+1792|0;lb=Ci+1788|0;Ie=Ci+1784|0;nf=Ci+1780|0;Df=Ci+1776|0;mc=Ci+1772|0;ge=Ci+1768|0;Wc=Ci+1764|0;Je=Ci+1760|0;gb=Ci+1756|0;Yf=Ci+1752|0;Tf=Ci+1748|0;If=Ci+1744|0;Md=Ci+1740|0;Qe=Ci+1736|0;Vd=Ci+1732|0;Ne=Ci+1728|0;q=Ci+1724|0;yg=Ci+1720|0;hf=Ci+1716|0;xg=Ci+1712|0;di=Ci+1708|0;Lb=Ci+1704|0;ii=Ci+1700|0;Mb=Ci+1696|0;Ib=Ci+1692|0;_d=Ci+1688|0;za=Ci+1684|0;Rc=Ci+1680|0;Ch=Ci+1676|0;ci=Ci+1672|0;Ah=Ci+1668|0;bi=Ci+1664|0;fi=Ci+1660|0;hi=Ci+1656|0;ei=Ci+1652|0;gi=Ci+1648|0;rg=Ci+1644|0;ji=Ci+1640|0;Cg=Ci+1636|0;Dg=Ci+1632|0;Kb=Ci+1628|0;Nb=Ci+1624|0;wg=Ci+1620|0;zg=Ci+1616|0;xa=Ci+1612|0;Zb=Ci+1608|0;P=Ci+1604|0;qc=Ci+1600|0;E=Ci+1596|0;_b=Ci+1592|0;K=Ci+1588|0;pc=Ci+1584|0;ua=Ci+1580|0;wa=Ci+1576|0;ta=Ci+1572|0;va=Ci+1568|0;M=Ci+1564|0;O=Ci+1560|0;L=Ci+1556|0;N=Ci+1552|0;B=Ci+1548|0;D=Ci+1544|0;ya=Ci+1540|0;C=Ci+1536|0;H=Ci+1532|0;J=Ci+1528|0;G=Ci+1524|0;I=Ci+1520|0;F=Ci+1516|0;Q=Ci+1512|0;Fe=Ci+1508|0;Ge=Ci+1504|0;$b=Ci+1500|0;ac=Ci+1496|0;oc=Ci+1492|0;Sc=Ci+1488|0;rb=Ci+1484|0;Od=Ci+1480|0;Hb=Ci+1476|0;$c=Ci+1472|0;wb=Ci+1468|0;Pd=Ci+1464|0;Cb=Ci+1460|0;_c=Ci+1456|0;ob=Ci+1452|0;qb=Ci+1448|0;nb=Ci+1444|0;pb=Ci+1440|0;Eb=Ci+1436|0;Gb=Ci+1432|0;Db=Ci+1428|0;Fb=Ci+1424|0;tb=Ci+1420|0;vb=Ci+1416|0;sb=Ci+1412|0;ub=Ci+1408|0;zb=Ci+1404|0;Bb=Ci+1400|0;yb=Ci+1396|0;Ab=Ci+1392|0;xb=Ci+1388|0;Ka=Ci+1384|0;Vf=Ci+1380|0;Wf=Ci+1376|0;Zc=Ci+1372|0;Ad=Ci+1368|0;Qd=Ci+1364|0;Rd=Ci+1360|0;pi=Ci+1356|0;Pb=Ci+1352|0;Hh=Ci+1348|0;uc=Ci+1344|0;ui=Ci+1340|0;Qb=Ci+1336|0;Ai=Ci+1332|0;tc=Ci+1328|0;mi=Ci+1324|0;oi=Ci+1320|0;li=Ci+1316|0;ni=Ci+1312|0;Eh=Ci+1308|0;Gh=Ci+1304|0;Dh=Ci+1300|0;Fh=Ci+1296|0;ri=Ci+1292|0;ti=Ci+1288|0;qi=Ci+1284|0;si=Ci+1280|0;xi=Ci+1276|0;zi=Ci+1272|0;wi=Ci+1268|0;yi=Ci+1264|0;vi=Ci+1260|0;Ih=Ci+1256|0;pe=Ci+1252|0;qe=Ci+1248|0;Rb=Ci+1244|0;Sb=Ci+1240|0;sc=Ci+1236|0;vc=Ci+1232|0;Ph=Ci+1228|0;zc=Ci+1224|0;t=Ci+1220|0;Gc=Ci+1216|0;Uh=Ci+1212|0;Ac=Ci+1208|0;_h=Ci+1204|0;Fc=Ci+1200|0;Mh=Ci+1196|0;Oh=Ci+1192|0;Lh=Ci+1188|0;Nh=Ci+1184|0;ai=Ci+1180|0;s=Ci+1176|0;$h=Ci+1172|0;r=Ci+1168|0;Rh=Ci+1164|0;Th=Ci+1160|0;Qh=Ci+1156|0;Sh=Ci+1152|0;Xh=Ci+1148|0;Zh=Ci+1144|0;Wh=Ci+1140|0;Yh=Ci+1136|0;Vh=Ci+1132|0;u=Ci+1128|0;te=Ci+1124|0;ue=Ci+1120|0;Bc=Ci+1116|0;Cc=Ci+1112|0;Ec=Ci+1108|0;Hc=Ci+1104|0;A=Ci+1100|0;Kc=Ci+1096|0;oa=Ci+1092|0;Tb=Ci+1088|0;da=Ci+1084|0;Lc=Ci+1080|0;ja=Ci+1076|0;Qc=Ci+1072|0;x=Ci+1068|0;z=Ci+1064|0;w=Ci+1060|0;y=Ci+1056|0;la=Ci+1052|0;na=Ci+1048|0;ka=Ci+1044|0;ma=Ci+1040|0;aa=Ci+1036|0;ca=Ci+1032|0;$=Ci+1028|0;ba=Ci+1024|0;ga=Ci+1020|0;ia=Ci+1016|0;fa=Ci+1012|0;ha=Ci+1008|0;ea=Ci+1004|0;pa=Ci+1e3|0;ze=Ci+996|0;Ae=Ci+992|0;Mc=Ci+988|0;Nc=Ci+984|0;Pc=Ci+980|0;Ub=Ci+976|0;W=Ci+972|0;ic=Ci+968|0;Aa=Ci+964|0;jc=Ci+960|0;hc=Ci+956|0;kc=Ci+952|0;Ga=Ci+948|0;dc=Ci+944|0;jb=Ci+940|0;ec=Ci+936|0;cc=Ci+932|0;fc=Ci+928|0;T=Ci+924|0;V=Ci+920|0;S=Ci+916|0;U=Ci+912|0;Y=Ci+908|0;_=Ci+904|0;X=Ci+900|0;Z=Ci+896|0;Da=Ci+892|0;Fa=Ci+888|0;Ca=Ci+884|0;Ea=Ci+880|0;Ia=Ci+876|0;ib=Ci+872|0;Ha=Ci+868|0;Ja=Ci+864|0;Ba=Ci+860|0;kb=Ci+856|0;lf=Ci+852|0;mf=Ci+848|0;gc=Ci+844|0;lc=Ci+840|0;Uc=Ci+836|0;Vc=Ci+832|0;Qa=Ci+828|0;Cd=Ci+824|0;Va=Ci+820|0;Dd=Ci+816|0;Ed=Ci+812|0;Fd=Ci+808|0;$a=Ci+804|0;Id=Ci+800|0;eb=Ci+796|0;Jd=Ci+792|0;Hd=Ci+788|0;Kd=Ci+784|0;Na=Ci+780|0;Pa=Ci+776|0;Ma=Ci+772|0;Oa=Ci+768|0;Sa=Ci+764|0;Ua=Ci+760|0;Ra=Ci+756|0;Ta=Ci+752|0;Ya=Ci+748|0;_a=Ci+744|0;Xa=Ci+740|0;Za=Ci+736|0;bb=Ci+732|0;db=Ci+728|0;ab=Ci+724|0;cb=Ci+720|0;Wa=Ci+716|0;fb=Ci+712|0;rf=Ci+708|0;sf=Ci+704|0;Gd=Ci+700|0;Ld=Ci+696|0;Td=Ci+692|0;Ud=Ci+688|0;sa=Ci+684|0;Qf=Ci+680|0;ah=Ci+676|0;ch=Ci+672|0;Jb=Ci+668|0;bh=Ci+664|0;sg=Ci+660|0;tg=Ci+656|0;Kh=Ci+652|0;ra=Ci+648|0;ug=Ci+644|0;Bg=Ci+640|0;mb=Ci+636|0;hb=Ci+632|0;Rf=Ci+628|0;Sf=Ci+624|0;Af=Ci+620|0;Mf=Ci+616|0;gh=Ci+612|0;ih=Ci+608|0;Ff=Ci+604|0;Nf=Ci+600|0;Kf=Ci+596|0;Of=Ci+592|0;wf=Ci+588|0;zf=Ci+584|0;eh=Ci+580|0;fh=Ci+576|0;Bf=Ci+572|0;Ef=Ci+568|0;Gf=Ci+564|0;Jf=Ci+560|0;Lf=Ci+556|0;dh=Ci+552|0;Pf=Ci+548|0;hh=Ci+544|0;se=Ci+540|0;nh=Ci+536|0;th=Ci+532|0;eg=Ci+528|0;De=Ci+524|0;kh=Ci+520|0;og=Ci+516|0;uf=Ci+512|0;hg=Ci+508|0;sh=Ci+504|0;pf=Ci+500|0;bg=Ci+496|0;lg=Ci+492|0;tf=Ci+488|0;_f=Ci+484|0;cg=Ci+480|0;xe=Ci+476|0;Ce=Ci+472|0;jf=Ci+468|0;of=Ci+464|0;mg=Ci+460|0;ng=Ci+456|0;fg=Ci+452|0;gg=Ci+448|0;jg=Ci+444|0;kg=Ci+440|0;Uf=Ci+436|0;Zf=Ci+432|0;Ee=Ci+428|0;$f=Ci+424|0;rh=Ci+420|0;uh=Ci+416|0;ag=Ci+412|0;dg=Ci+408|0;vh=Ci+404|0;wh=Ci+400|0;ig=Ci+396|0;pg=Ci+392|0;jh=Ci+388|0;oh=Ci+384|0;qg=Ci+380|0;vf=Ci+376|0;ph=Ci+372|0;qh=Ci+368|0;yc=Ci+364|0;dd=Ci+360|0;Tg=Ci+356|0;Zg=Ci+352|0;Xb=Ci+348|0;Qg=Ci+344|0;nd=Ci+340|0;rd=Ci+336|0;Yc=Ci+332|0;ad=Ci+328|0;gd=Ci+324|0;Yg=Ci+320|0;kd=Ci+316|0;qd=Ci+312|0;Xd=Ci+308|0;bd=Ci+304|0;xc=Ci+300|0;Rg=Ci+296|0;Jc=Ci+292|0;Wb=Ci+288|0;ld=Ci+284|0;md=Ci+280|0;nc=Ci+276|0;Xc=Ci+272|0;ed=Ci+268|0;fd=Ci+264|0;id=Ci+260|0;jd=Ci+256|0;Nd=Ci+252|0;Wd=Ci+248|0;Yb=Ci+244|0;Yd=Ci+240|0;Xg=Ci+236|0;_g=Ci+232|0;Zd=Ci+228|0;cd=Ci+224|0;$g=Ci+220|0;Bh=Ci+216|0;hd=Ci+212|0;od=Ci+208|0;Pg=Ci+204|0;Ug=Ci+200|0;pd=Ci+196|0;sd=Ci+192|0;Vg=Ci+188|0;Wg=Ci+184|0;xd=Ci+180|0;Ye=Ci+176|0;Fg=Ci+172|0;Lg=Ci+168|0;de=Ci+164|0;yh=Ci+160|0;gf=Ci+156|0;me=Ci+152|0;Le=Ci+148|0;Ve=Ci+144|0;$e=Ci+140|0;Kg=Ci+136|0;df=Ci+132|0;le=Ci+128|0;Se=Ci+124|0;We=Ci+120|0;wd=Ci+116|0;zh=Ci+112|0;$d=Ci+108|0;ce=Ci+104|0;ef=Ci+100|0;ff=Ci+96|0;he=Ci+92|0;Ke=Ci+88|0;Ze=Ci+84|0;_e=Ci+80|0;bf=Ci+76|0;cf=Ci+72|0;Oe=Ci+68|0;Re=Ci+64|0;ee=Ci+60|0;Te=Ci+56|0;Jg=Ci+52|0;Mg=Ci+48|0;Ue=Ci+44|0;Xe=Ci+40|0;Ng=Ci+36|0;Og=Ci+32|0;af=Ci+28|0;je=Ci+24|0;xh=Ci+20|0;Gg=Ci+16|0;ke=Ci+12|0;ne=Ci+8|0;Hg=Ci+4|0;Ig=Ci;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Di>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Ci+2012>>2]=.19509032368659973;g[Ci+2008>>2]=.9807852506637573;g[Ci+2004>>2]=.5555702447891235;g[Ci+2e3>>2]=.8314695954322815;g[Ci+1996>>2]=.3826834261417389;g[Ci+1992>>2]=.9238795042037964;g[Ci+1988>>2]=.7071067690849304;c[Bi>>2]=c[Di>>2];c[m>>2]=(c[m>>2]|0)+((c[Di>>2]|0)*62<<2);while(1){if((c[Bi>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[yg>>2]=+g[c[l>>2]>>2];g[Ib>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[_d>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[za>>2]=+g[(c[m>>2]|0)+120>>2];g[Rc>>2]=+g[(c[m>>2]|0)+124>>2];g[hf>>2]=+g[za>>2]*+g[Ib>>2]+ +g[Rc>>2]*+g[_d>>2];g[xg>>2]=+g[za>>2]*+g[_d>>2]-+g[Rc>>2]*+g[Ib>>2];g[Ch>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[ci>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Ah>>2]=+g[(c[m>>2]|0)+56>>2];g[bi>>2]=+g[(c[m>>2]|0)+60>>2];g[di>>2]=+g[Ah>>2]*+g[Ch>>2]+ +g[bi>>2]*+g[ci>>2];g[Lb>>2]=+g[Ah>>2]*+g[ci>>2]-+g[bi>>2]*+g[Ch>>2];g[fi>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[hi>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[ei>>2]=+g[(c[m>>2]|0)+184>>2];g[gi>>2]=+g[(c[m>>2]|0)+188>>2];g[ii>>2]=+g[ei>>2]*+g[fi>>2]+ +g[gi>>2]*+g[hi>>2];g[Mb>>2]=+g[ei>>2]*+g[hi>>2]-+g[gi>>2]*+g[fi>>2];g[rg>>2]=+g[q>>2]+ +g[hf>>2];g[ji>>2]=+g[di>>2]+ +g[ii>>2];g[ki>>2]=+g[rg>>2]+ +g[ji>>2];g[oe>>2]=+g[rg>>2]-+g[ji>>2];g[Cg>>2]=+g[yg>>2]-+g[xg>>2];g[Dg>>2]=+g[di>>2]-+g[ii>>2];g[Eg>>2]=+g[Cg>>2]-+g[Dg>>2];g[Sg>>2]=+g[Dg>>2]+ +g[Cg>>2];g[Kb>>2]=+g[q>>2]-+g[hf>>2];g[Nb>>2]=+g[Lb>>2]-+g[Mb>>2];g[Ob>>2]=+g[Kb>>2]-+g[Nb>>2];g[td>>2]=+g[Kb>>2]+ +g[Nb>>2];g[wg>>2]=+g[Lb>>2]+ +g[Mb>>2];g[zg>>2]=+g[xg>>2]+ +g[yg>>2];g[Ag>>2]=+g[wg>>2]+ +g[zg>>2];g[mh>>2]=+g[zg>>2]-+g[wg>>2];g[ua>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[wa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[ta>>2]=+g[c[m>>2]>>2];g[va>>2]=+g[(c[m>>2]|0)+4>>2];g[xa>>2]=+g[ta>>2]*+g[ua>>2]+ +g[va>>2]*+g[wa>>2];g[Zb>>2]=+g[ta>>2]*+g[wa>>2]-+g[va>>2]*+g[ua>>2];g[M>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[O>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[L>>2]=+g[(c[m>>2]|0)+192>>2];g[N>>2]=+g[(c[m>>2]|0)+196>>2];g[P>>2]=+g[L>>2]*+g[M>>2]+ +g[N>>2]*+g[O>>2];g[qc>>2]=+g[L>>2]*+g[O>>2]-+g[N>>2]*+g[M>>2];g[B>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[D>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[ya>>2]=+g[(c[m>>2]|0)+128>>2];g[C>>2]=+g[(c[m>>2]|0)+132>>2];g[E>>2]=+g[ya>>2]*+g[B>>2]+ +g[C>>2]*+g[D>>2];g[_b>>2]=+g[ya>>2]*+g[D>>2]-+g[C>>2]*+g[B>>2];g[H>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[J>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[G>>2]=+g[(c[m>>2]|0)+64>>2];g[I>>2]=+g[(c[m>>2]|0)+68>>2];g[K>>2]=+g[G>>2]*+g[H>>2]+ +g[I>>2]*+g[J>>2];g[pc>>2]=+g[G>>2]*+g[J>>2]-+g[I>>2]*+g[H>>2];g[F>>2]=+g[xa>>2]+ +g[E>>2];g[Q>>2]=+g[K>>2]+ +g[P>>2];g[R>>2]=+g[F>>2]+ +g[Q>>2];g[kf>>2]=+g[F>>2]-+g[Q>>2];g[Fe>>2]=+g[Zb>>2]+ +g[_b>>2];g[Ge>>2]=+g[pc>>2]+ +g[qc>>2];g[He>>2]=+g[Fe>>2]-+g[Ge>>2];g[Cf>>2]=+g[Fe>>2]+ +g[Ge>>2];g[$b>>2]=+g[Zb>>2]-+g[_b>>2];g[ac>>2]=+g[K>>2]-+g[P>>2];g[bc>>2]=+g[$b>>2]+ +g[ac>>2];g[ie>>2]=+g[$b>>2]-+g[ac>>2];g[oc>>2]=+g[xa>>2]-+g[E>>2];g[Sc>>2]=+g[pc>>2]-+g[qc>>2];g[Tc>>2]=+g[oc>>2]-+g[Sc>>2];g[fe>>2]=+g[oc>>2]+ +g[Sc>>2];g[ob>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[qb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[nb>>2]=+g[(c[m>>2]|0)+240>>2];g[pb>>2]=+g[(c[m>>2]|0)+244>>2];g[rb>>2]=+g[nb>>2]*+g[ob>>2]+ +g[pb>>2]*+g[qb>>2];g[Od>>2]=+g[nb>>2]*+g[qb>>2]-+g[pb>>2]*+g[ob>>2];g[Eb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[Gb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[Db>>2]=+g[(c[m>>2]|0)+176>>2];g[Fb>>2]=+g[(c[m>>2]|0)+180>>2];g[Hb>>2]=+g[Db>>2]*+g[Eb>>2]+ +g[Fb>>2]*+g[Gb>>2];g[$c>>2]=+g[Db>>2]*+g[Gb>>2]-+g[Fb>>2]*+g[Eb>>2];g[tb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[vb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[sb>>2]=+g[(c[m>>2]|0)+112>>2];g[ub>>2]=+g[(c[m>>2]|0)+116>>2];g[wb>>2]=+g[sb>>2]*+g[tb>>2]+ +g[ub>>2]*+g[vb>>2];g[Pd>>2]=+g[sb>>2]*+g[vb>>2]-+g[ub>>2]*+g[tb>>2];g[zb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Bb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[yb>>2]=+g[(c[m>>2]|0)+48>>2];g[Ab>>2]=+g[(c[m>>2]|0)+52>>2];g[Cb>>2]=+g[yb>>2]*+g[zb>>2]+ +g[Ab>>2]*+g[Bb>>2];g[_c>>2]=+g[yb>>2]*+g[Bb>>2]-+g[Ab>>2]*+g[zb>>2];g[xb>>2]=+g[rb>>2]+ +g[wb>>2];g[Ka>>2]=+g[Cb>>2]+ +g[Hb>>2];g[La>>2]=+g[xb>>2]+ +g[Ka>>2];g[qf>>2]=+g[xb>>2]-+g[Ka>>2];g[Vf>>2]=+g[Od>>2]+ +g[Pd>>2];g[Wf>>2]=+g[_c>>2]+ +g[$c>>2];g[Xf>>2]=+g[Vf>>2]-+g[Wf>>2];g[Hf>>2]=+g[Vf>>2]+ +g[Wf>>2];g[Zc>>2]=+g[rb>>2]-+g[wb>>2];g[Ad>>2]=+g[_c>>2]-+g[$c>>2];g[Bd>>2]=+g[Zc>>2]-+g[Ad>>2];g[Me>>2]=+g[Zc>>2]+ +g[Ad>>2];g[Qd>>2]=+g[Od>>2]-+g[Pd>>2];g[Rd>>2]=+g[Cb>>2]-+g[Hb>>2];g[Sd>>2]=+g[Qd>>2]+ +g[Rd>>2];g[Pe>>2]=+g[Qd>>2]-+g[Rd>>2];g[mi>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[oi>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[li>>2]=+g[(c[m>>2]|0)+24>>2];g[ni>>2]=+g[(c[m>>2]|0)+28>>2];g[pi>>2]=+g[li>>2]*+g[mi>>2]+ +g[ni>>2]*+g[oi>>2];g[Pb>>2]=+g[li>>2]*+g[oi>>2]-+g[ni>>2]*+g[mi>>2];g[Eh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Gh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Dh>>2]=+g[(c[m>>2]|0)+88>>2];g[Fh>>2]=+g[(c[m>>2]|0)+92>>2];g[Hh>>2]=+g[Dh>>2]*+g[Eh>>2]+ +g[Fh>>2]*+g[Gh>>2];g[uc>>2]=+g[Dh>>2]*+g[Gh>>2]-+g[Fh>>2]*+g[Eh>>2];g[ri>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[ti>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[qi>>2]=+g[(c[m>>2]|0)+152>>2];g[si>>2]=+g[(c[m>>2]|0)+156>>2];g[ui>>2]=+g[qi>>2]*+g[ri>>2]+ +g[si>>2]*+g[ti>>2];g[Qb>>2]=+g[qi>>2]*+g[ti>>2]-+g[si>>2]*+g[ri>>2];g[xi>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[zi>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[wi>>2]=+g[(c[m>>2]|0)+216>>2];g[yi>>2]=+g[(c[m>>2]|0)+220>>2];g[Ai>>2]=+g[wi>>2]*+g[xi>>2]+ +g[yi>>2]*+g[zi>>2];g[tc>>2]=+g[wi>>2]*+g[zi>>2]-+g[yi>>2]*+g[xi>>2];g[vi>>2]=+g[pi>>2]+ +g[ui>>2];g[Ih>>2]=+g[Ai>>2]+ +g[Hh>>2];g[Jh>>2]=+g[vi>>2]+ +g[Ih>>2];g[lh>>2]=+g[Ih>>2]-+g[vi>>2];g[pe>>2]=+g[Pb>>2]+ +g[Qb>>2];g[qe>>2]=+g[tc>>2]+ +g[uc>>2];g[re>>2]=+g[pe>>2]-+g[qe>>2];g[vg>>2]=+g[pe>>2]+ +g[qe>>2];g[Rb>>2]=+g[Pb>>2]-+g[Qb>>2];g[Sb>>2]=+g[pi>>2]-+g[ui>>2];g[rc>>2]=+g[Rb>>2]-+g[Sb>>2];g[ud>>2]=+g[Sb>>2]+ +g[Rb>>2];g[sc>>2]=+g[Ai>>2]-+g[Hh>>2];g[vc>>2]=+g[tc>>2]-+g[uc>>2];g[wc>>2]=+g[sc>>2]+ +g[vc>>2];g[vd>>2]=+g[sc>>2]-+g[vc>>2];g[Mh>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Oh>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Lh>>2]=+g[(c[m>>2]|0)+8>>2];g[Nh>>2]=+g[(c[m>>2]|0)+12>>2];g[Ph>>2]=+g[Lh>>2]*+g[Mh>>2]+ +g[Nh>>2]*+g[Oh>>2];g[zc>>2]=+g[Lh>>2]*+g[Oh>>2]-+g[Nh>>2]*+g[Mh>>2];g[ai>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[s>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[$h>>2]=+g[(c[m>>2]|0)+200>>2];g[r>>2]=+g[(c[m>>2]|0)+204>>2];g[t>>2]=+g[$h>>2]*+g[ai>>2]+ +g[r>>2]*+g[s>>2];g[Gc>>2]=+g[$h>>2]*+g[s>>2]-+g[r>>2]*+g[ai>>2];g[Rh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[Th>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[Qh>>2]=+g[(c[m>>2]|0)+136>>2];g[Sh>>2]=+g[(c[m>>2]|0)+140>>2];g[Uh>>2]=+g[Qh>>2]*+g[Rh>>2]+ +g[Sh>>2]*+g[Th>>2];g[Ac>>2]=+g[Qh>>2]*+g[Th>>2]-+g[Sh>>2]*+g[Rh>>2];g[Xh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Zh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Wh>>2]=+g[(c[m>>2]|0)+72>>2];g[Yh>>2]=+g[(c[m>>2]|0)+76>>2];g[_h>>2]=+g[Wh>>2]*+g[Xh>>2]+ +g[Yh>>2]*+g[Zh>>2];g[Fc>>2]=+g[Wh>>2]*+g[Zh>>2]-+g[Yh>>2]*+g[Xh>>2];g[Vh>>2]=+g[Ph>>2]+ +g[Uh>>2];g[u>>2]=+g[_h>>2]+ +g[t>>2];g[v>>2]=+g[Vh>>2]+ +g[u>>2];g[we>>2]=+g[Vh>>2]-+g[u>>2];g[te>>2]=+g[zc>>2]+ +g[Ac>>2];g[ue>>2]=+g[Fc>>2]+ +g[Gc>>2];g[ve>>2]=+g[te>>2]-+g[ue>>2];g[xf>>2]=+g[te>>2]+ +g[ue>>2];g[Bc>>2]=+g[zc>>2]-+g[Ac>>2];g[Cc>>2]=+g[_h>>2]-+g[t>>2];g[Dc>>2]=+g[Bc>>2]+ +g[Cc>>2];g[yd>>2]=+g[Bc>>2]-+g[Cc>>2];g[Ec>>2]=+g[Ph>>2]-+g[Uh>>2];g[Hc>>2]=+g[Fc>>2]-+g[Gc>>2];g[Ic>>2]=+g[Ec>>2]-+g[Hc>>2];g[zd>>2]=+g[Ec>>2]+ +g[Hc>>2];g[x>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[z>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[w>>2]=+g[(c[m>>2]|0)+232>>2];g[y>>2]=+g[(c[m>>2]|0)+236>>2];g[A>>2]=+g[w>>2]*+g[x>>2]+ +g[y>>2]*+g[z>>2];g[Kc>>2]=+g[w>>2]*+g[z>>2]-+g[y>>2]*+g[x>>2];g[la>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[na>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[ka>>2]=+g[(c[m>>2]|0)+168>>2];g[ma>>2]=+g[(c[m>>2]|0)+172>>2];g[oa>>2]=+g[ka>>2]*+g[la>>2]+ +g[ma>>2]*+g[na>>2];g[Tb>>2]=+g[ka>>2]*+g[na>>2]-+g[ma>>2]*+g[la>>2];g[aa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[ca>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[$>>2]=+g[(c[m>>2]|0)+104>>2];g[ba>>2]=+g[(c[m>>2]|0)+108>>2];g[da>>2]=+g[$>>2]*+g[aa>>2]+ +g[ba>>2]*+g[ca>>2];g[Lc>>2]=+g[$>>2]*+g[ca>>2]-+g[ba>>2]*+g[aa>>2];g[ga>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[ia>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[fa>>2]=+g[(c[m>>2]|0)+40>>2];g[ha>>2]=+g[(c[m>>2]|0)+44>>2];g[ja>>2]=+g[fa>>2]*+g[ga>>2]+ +g[ha>>2]*+g[ia>>2];g[Qc>>2]=+g[fa>>2]*+g[ia>>2]-+g[ha>>2]*+g[ga>>2];g[ea>>2]=+g[A>>2]+ +g[da>>2];g[pa>>2]=+g[ja>>2]+ +g[oa>>2];g[qa>>2]=+g[ea>>2]+ +g[pa>>2];g[ye>>2]=+g[ea>>2]-+g[pa>>2];g[ze>>2]=+g[Kc>>2]+ +g[Lc>>2];g[Ae>>2]=+g[Qc>>2]+ +g[Tb>>2];g[Be>>2]=+g[ze>>2]-+g[Ae>>2];g[yf>>2]=+g[ze>>2]+ +g[Ae>>2];g[Mc>>2]=+g[Kc>>2]-+g[Lc>>2];g[Nc>>2]=+g[ja>>2]-+g[oa>>2];g[Oc>>2]=+g[Mc>>2]+ +g[Nc>>2];g[ae>>2]=+g[Mc>>2]-+g[Nc>>2];g[Pc>>2]=+g[A>>2]-+g[da>>2];g[Ub>>2]=+g[Qc>>2]-+g[Tb>>2];g[Vb>>2]=+g[Pc>>2]-+g[Ub>>2];g[be>>2]=+g[Pc>>2]+ +g[Ub>>2];g[T>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[V>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[S>>2]=+g[(c[m>>2]|0)+32>>2];g[U>>2]=+g[(c[m>>2]|0)+36>>2];g[W>>2]=+g[S>>2]*+g[T>>2]+ +g[U>>2]*+g[V>>2];g[ic>>2]=+g[S>>2]*+g[V>>2]-+g[U>>2]*+g[T>>2];g[Y>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[_>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[X>>2]=+g[(c[m>>2]|0)+160>>2];g[Z>>2]=+g[(c[m>>2]|0)+164>>2];g[Aa>>2]=+g[X>>2]*+g[Y>>2]+ +g[Z>>2]*+g[_>>2];g[jc>>2]=+g[X>>2]*+g[_>>2]-+g[Z>>2]*+g[Y>>2];g[hc>>2]=+g[W>>2]-+g[Aa>>2];g[kc>>2]=+g[ic>>2]-+g[jc>>2];g[Da>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[Fa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[Ca>>2]=+g[(c[m>>2]|0)+224>>2];g[Ea>>2]=+g[(c[m>>2]|0)+228>>2];g[Ga>>2]=+g[Ca>>2]*+g[Da>>2]+ +g[Ea>>2]*+g[Fa>>2];g[dc>>2]=+g[Ca>>2]*+g[Fa>>2]-+g[Ea>>2]*+g[Da>>2];g[Ia>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[ib>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Ha>>2]=+g[(c[m>>2]|0)+96>>2];g[Ja>>2]=+g[(c[m>>2]|0)+100>>2];g[jb>>2]=+g[Ha>>2]*+g[Ia>>2]+ +g[Ja>>2]*+g[ib>>2];g[ec>>2]=+g[Ha>>2]*+g[ib>>2]-+g[Ja>>2]*+g[Ia>>2];g[cc>>2]=+g[Ga>>2]-+g[jb>>2];g[fc>>2]=+g[dc>>2]-+g[ec>>2];g[Ba>>2]=+g[W>>2]+ +g[Aa>>2];g[kb>>2]=+g[Ga>>2]+ +g[jb>>2];g[lb>>2]=+g[Ba>>2]+ +g[kb>>2];g[Ie>>2]=+g[kb>>2]-+g[Ba>>2];g[lf>>2]=+g[ic>>2]+ +g[jc>>2];g[mf>>2]=+g[dc>>2]+ +g[ec>>2];g[nf>>2]=+g[lf>>2]-+g[mf>>2];g[Df>>2]=+g[lf>>2]+ +g[mf>>2];g[gc>>2]=+g[cc>>2]-+g[fc>>2];g[lc>>2]=+g[hc>>2]+ +g[kc>>2];g[mc>>2]=(+g[gc>>2]-+g[lc>>2])*.7071067690849304;g[ge>>2]=(+g[lc>>2]+ +g[gc>>2])*.7071067690849304;g[Uc>>2]=+g[kc>>2]-+g[hc>>2];g[Vc>>2]=+g[cc>>2]+ +g[fc>>2];g[Wc>>2]=(+g[Uc>>2]-+g[Vc>>2])*.7071067690849304;g[Je>>2]=(+g[Uc>>2]+ +g[Vc>>2])*.7071067690849304;g[Na>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Pa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Ma>>2]=+g[(c[m>>2]|0)+16>>2];g[Oa>>2]=+g[(c[m>>2]|0)+20>>2];g[Qa>>2]=+g[Ma>>2]*+g[Na>>2]+ +g[Oa>>2]*+g[Pa>>2];g[Cd>>2]=+g[Ma>>2]*+g[Pa>>2]-+g[Oa>>2]*+g[Na>>2];g[Sa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Ua>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Ra>>2]=+g[(c[m>>2]|0)+144>>2];g[Ta>>2]=+g[(c[m>>2]|0)+148>>2];g[Va>>2]=+g[Ra>>2]*+g[Sa>>2]+ +g[Ta>>2]*+g[Ua>>2];g[Dd>>2]=+g[Ra>>2]*+g[Ua>>2]-+g[Ta>>2]*+g[Sa>>2];g[Ed>>2]=+g[Cd>>2]-+g[Dd>>2];g[Fd>>2]=+g[Qa>>2]-+g[Va>>2];g[Ya>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[_a>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[Xa>>2]=+g[(c[m>>2]|0)+208>>2];g[Za>>2]=+g[(c[m>>2]|0)+212>>2];g[$a>>2]=+g[Xa>>2]*+g[Ya>>2]+ +g[Za>>2]*+g[_a>>2];g[Id>>2]=+g[Xa>>2]*+g[_a>>2]-+g[Za>>2]*+g[Ya>>2];g[bb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[db>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[ab>>2]=+g[(c[m>>2]|0)+80>>2];g[cb>>2]=+g[(c[m>>2]|0)+84>>2];g[eb>>2]=+g[ab>>2]*+g[bb>>2]+ +g[cb>>2]*+g[db>>2];g[Jd>>2]=+g[ab>>2]*+g[db>>2]-+g[cb>>2]*+g[bb>>2];g[Hd>>2]=+g[$a>>2]-+g[eb>>2];g[Kd>>2]=+g[Id>>2]-+g[Jd>>2];g[Wa>>2]=+g[Qa>>2]+ +g[Va>>2];g[fb>>2]=+g[$a>>2]+ +g[eb>>2];g[gb>>2]=+g[Wa>>2]+ +g[fb>>2];g[Yf>>2]=+g[fb>>2]-+g[Wa>>2];g[rf>>2]=+g[Cd>>2]+ +g[Dd>>2];g[sf>>2]=+g[Id>>2]+ +g[Jd>>2];g[Tf>>2]=+g[rf>>2]-+g[sf>>2];g[If>>2]=+g[rf>>2]+ +g[sf>>2];g[Gd>>2]=+g[Ed>>2]-+g[Fd>>2];g[Ld>>2]=+g[Hd>>2]+ +g[Kd>>2];g[Md>>2]=(+g[Gd>>2]-+g[Ld>>2])*.7071067690849304;g[Qe>>2]=(+g[Gd>>2]+ +g[Ld>>2])*.7071067690849304;g[Td>>2]=+g[Hd>>2]-+g[Kd>>2];g[Ud>>2]=+g[Fd>>2]+ +g[Ed>>2];g[Vd>>2]=(+g[Td>>2]-+g[Ud>>2])*.7071067690849304;g[Ne>>2]=(+g[Ud>>2]+ +g[Td>>2])*.7071067690849304;g[Kh>>2]=+g[ki>>2]+ +g[Jh>>2];g[ra>>2]=+g[v>>2]+ +g[qa>>2];g[sa>>2]=+g[Kh>>2]+ +g[ra>>2];g[Qf>>2]=+g[Kh>>2]-+g[ra>>2];g[ug>>2]=+g[xf>>2]+ +g[yf>>2];g[Bg>>2]=+g[vg>>2]+ +g[Ag>>2];g[ah>>2]=+g[ug>>2]+ +g[Bg>>2];g[ch>>2]=+g[Bg>>2]-+g[ug>>2];g[mb>>2]=+g[R>>2]+ +g[lb>>2];g[hb>>2]=+g[La>>2]+ +g[gb>>2];g[Jb>>2]=+g[mb>>2]+ +g[hb>>2];g[bh>>2]=+g[hb>>2]-+g[mb>>2];g[Rf>>2]=+g[Cf>>2]+ +g[Df>>2];g[Sf>>2]=+g[Hf>>2]+ +g[If>>2];g[sg>>2]=+g[Rf>>2]-+g[Sf>>2];g[tg>>2]=+g[Rf>>2]+ +g[Sf>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[sa>>2]-+g[Jb>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[ah>>2]-+g[tg>>2];g[c[k>>2]>>2]=+g[sa>>2]+ +g[Jb>>2];g[c[l>>2]>>2]=+g[tg>>2]+ +g[ah>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Qf>>2]-+g[sg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[ch>>2]-+g[bh>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Qf>>2]+ +g[sg>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[bh>>2]+ +g[ch>>2];g[wf>>2]=+g[ki>>2]-+g[Jh>>2];g[zf>>2]=+g[xf>>2]-+g[yf>>2];g[Af>>2]=+g[wf>>2]+ +g[zf>>2];g[Mf>>2]=+g[wf>>2]-+g[zf>>2];g[eh>>2]=+g[qa>>2]-+g[v>>2];g[fh>>2]=+g[Ag>>2]-+g[vg>>2];g[gh>>2]=+g[eh>>2]+ +g[fh>>2];g[ih>>2]=+g[fh>>2]-+g[eh>>2];g[Bf>>2]=+g[R>>2]-+g[lb>>2];g[Ef>>2]=+g[Cf>>2]-+g[Df>>2];g[Ff>>2]=+g[Bf>>2]+ +g[Ef>>2];g[Nf>>2]=+g[Ef>>2]-+g[Bf>>2];g[Gf>>2]=+g[La>>2]-+g[gb>>2];g[Jf>>2]=+g[Hf>>2]-+g[If>>2];g[Kf>>2]=+g[Gf>>2]-+g[Jf>>2];g[Of>>2]=+g[Gf>>2]+ +g[Jf>>2];g[Lf>>2]=(+g[Ff>>2]+ +g[Kf>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[Af>>2]-+g[Lf>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Af>>2]+ +g[Lf>>2];g[dh>>2]=(+g[Nf>>2]+ +g[Of>>2])*.7071067690849304;g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[dh>>2]+ +g[gh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[gh>>2]-+g[dh>>2];g[Pf>>2]=(+g[Nf>>2]-+g[Of>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[Mf>>2]-+g[Pf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Mf>>2]+ +g[Pf>>2];g[hh>>2]=(+g[Kf>>2]-+g[Ff>>2])*.7071067690849304;g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[hh>>2]+ +g[ih>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[ih>>2]-+g[hh>>2];g[se>>2]=+g[oe>>2]-+g[re>>2];g[nh>>2]=+g[lh>>2]+ +g[mh>>2];g[th>>2]=+g[mh>>2]-+g[lh>>2];g[eg>>2]=+g[oe>>2]+ +g[re>>2];g[xe>>2]=+g[ve>>2]-+g[we>>2];g[Ce>>2]=+g[ye>>2]+ +g[Be>>2];g[De>>2]=(+g[xe>>2]-+g[Ce>>2])*.7071067690849304;g[kh>>2]=(+g[xe>>2]+ +g[Ce>>2])*.7071067690849304;g[mg>>2]=+g[qf>>2]+ +g[Tf>>2];g[ng>>2]=+g[Xf>>2]+ +g[Yf>>2];g[og>>2]=+g[mg>>2]*.9238795042037964-+g[ng>>2]*.3826834261417389;g[uf>>2]=+g[ng>>2]*.9238795042037964+ +g[mg>>2]*.3826834261417389;g[fg>>2]=+g[we>>2]+ +g[ve>>2];g[gg>>2]=+g[ye>>2]-+g[Be>>2];g[hg>>2]=(+g[fg>>2]+ +g[gg>>2])*.7071067690849304;g[sh>>2]=(+g[gg>>2]-+g[fg>>2])*.7071067690849304;g[jf>>2]=+g[He>>2]-+g[Ie>>2];g[of>>2]=+g[kf>>2]-+g[nf>>2];g[pf>>2]=+g[jf>>2]*.9238795042037964+ +g[of>>2]*.3826834261417389;g[bg>>2]=+g[jf>>2]*.3826834261417389-+g[of>>2]*.9238795042037964;g[jg>>2]=+g[He>>2]+ +g[Ie>>2];g[kg>>2]=+g[kf>>2]+ +g[nf>>2];g[lg>>2]=+g[jg>>2]*.3826834261417389+ +g[kg>>2]*.9238795042037964;g[tf>>2]=+g[jg>>2]*.9238795042037964-+g[kg>>2]*.3826834261417389;g[Uf>>2]=+g[qf>>2]-+g[Tf>>2];g[Zf>>2]=+g[Xf>>2]-+g[Yf>>2];g[_f>>2]=+g[Uf>>2]*.3826834261417389-+g[Zf>>2]*.9238795042037964;g[cg>>2]=+g[Zf>>2]*.3826834261417389+ +g[Uf>>2]*.9238795042037964;g[Ee>>2]=+g[se>>2]+ +g[De>>2];g[$f>>2]=+g[pf>>2]+ +g[_f>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[Ee>>2]-+g[$f>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Ee>>2]+ +g[$f>>2];g[rh>>2]=+g[bg>>2]+ +g[cg>>2];g[uh>>2]=+g[sh>>2]+ +g[th>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[rh>>2]+ +g[uh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[uh>>2]-+g[rh>>2];g[ag>>2]=+g[se>>2]-+g[De>>2];g[dg>>2]=+g[bg>>2]-+g[cg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[ag>>2]-+g[dg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[ag>>2]+ +g[dg>>2];g[vh>>2]=+g[_f>>2]-+g[pf>>2];g[wh>>2]=+g[th>>2]-+g[sh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[vh>>2]+ +g[wh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[wh>>2]-+g[vh>>2];g[ig>>2]=+g[eg>>2]+ +g[hg>>2];g[pg>>2]=+g[lg>>2]+ +g[og>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[ig>>2]-+g[pg>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ig>>2]+ +g[pg>>2];g[jh>>2]=+g[tf>>2]+ +g[uf>>2];g[oh>>2]=+g[kh>>2]+ +g[nh>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[jh>>2]+ +g[oh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[oh>>2]-+g[jh>>2];g[qg>>2]=+g[eg>>2]-+g[hg>>2];g[vf>>2]=+g[tf>>2]-+g[uf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[qg>>2]-+g[vf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[qg>>2]+ +g[vf>>2];g[ph>>2]=+g[og>>2]-+g[lg>>2];g[qh>>2]=+g[nh>>2]-+g[kh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[ph>>2]+ +g[qh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[qh>>2]-+g[ph>>2];g[xc>>2]=(+g[rc>>2]-+g[wc>>2])*.7071067690849304;g[yc>>2]=+g[Ob>>2]-+g[xc>>2];g[dd>>2]=+g[Ob>>2]+ +g[xc>>2];g[Rg>>2]=(+g[vd>>2]-+g[ud>>2])*.7071067690849304;g[Tg>>2]=+g[Rg>>2]+ +g[Sg>>2];g[Zg>>2]=+g[Sg>>2]-+g[Rg>>2];g[Jc>>2]=+g[Dc>>2]*.3826834261417389-+g[Ic>>2]*.9238795042037964;g[Wb>>2]=+g[Oc>>2]*.3826834261417389+ +g[Vb>>2]*.9238795042037964;g[Xb>>2]=+g[Jc>>2]-+g[Wb>>2];g[Qg>>2]=+g[Jc>>2]+ +g[Wb>>2];g[ld>>2]=+g[Bd>>2]+ +g[Md>>2];g[md>>2]=+g[Sd>>2]+ +g[Vd>>2];g[nd>>2]=+g[ld>>2]*.8314695954322815-+g[md>>2]*.5555702447891235;g[rd>>2]=+g[md>>2]*.8314695954322815+ +g[ld>>2]*.5555702447891235;g[nc>>2]=+g[bc>>2]-+g[mc>>2];g[Xc>>2]=+g[Tc>>2]-+g[Wc>>2];g[Yc>>2]=+g[nc>>2]*.9807852506637573+ +g[Xc>>2]*.19509032368659973;g[ad>>2]=+g[nc>>2]*.19509032368659973-+g[Xc>>2]*.9807852506637573;g[ed>>2]=+g[Dc>>2]*.9238795042037964+ +g[Ic>>2]*.3826834261417389;g[fd>>2]=+g[Vb>>2]*.3826834261417389-+g[Oc>>2]*.9238795042037964;g[gd>>2]=+g[ed>>2]+ +g[fd>>2];g[Yg>>2]=+g[fd>>2]-+g[ed>>2];g[id>>2]=+g[bc>>2]+ +g[mc>>2];g[jd>>2]=+g[Tc>>2]+ +g[Wc>>2];g[kd>>2]=+g[id>>2]*.5555702447891235+ +g[jd>>2]*.8314695954322815;g[qd>>2]=+g[id>>2]*.8314695954322815-+g[jd>>2]*.5555702447891235;g[Nd>>2]=+g[Bd>>2]-+g[Md>>2];g[Wd>>2]=+g[Sd>>2]-+g[Vd>>2];g[Xd>>2]=+g[Nd>>2]*.19509032368659973-+g[Wd>>2]*.9807852506637573;g[bd>>2]=+g[Wd>>2]*.19509032368659973+ +g[Nd>>2]*.9807852506637573;g[Yb>>2]=+g[yc>>2]+ +g[Xb>>2];g[Yd>>2]=+g[Yc>>2]+ +g[Xd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[Yb>>2]-+g[Yd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Yb>>2]+ +g[Yd>>2];g[Xg>>2]=+g[ad>>2]+ +g[bd>>2];g[_g>>2]=+g[Yg>>2]+ +g[Zg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Xg>>2]+ +g[_g>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[_g>>2]-+g[Xg>>2];g[Zd>>2]=+g[yc>>2]-+g[Xb>>2];g[cd>>2]=+g[ad>>2]-+g[bd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[Zd>>2]-+g[cd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Zd>>2]+ +g[cd>>2];g[$g>>2]=+g[Xd>>2]-+g[Yc>>2];g[Bh>>2]=+g[Zg>>2]-+g[Yg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[$g>>2]+ +g[Bh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[Bh>>2]-+g[$g>>2];g[hd>>2]=+g[dd>>2]+ +g[gd>>2];g[od>>2]=+g[kd>>2]+ +g[nd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[hd>>2]-+g[od>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[hd>>2]+ +g[od>>2];g[Pg>>2]=+g[qd>>2]+ +g[rd>>2];g[Ug>>2]=+g[Qg>>2]+ +g[Tg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Pg>>2]+ +g[Ug>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Ug>>2]-+g[Pg>>2];g[pd>>2]=+g[dd>>2]-+g[gd>>2];g[sd>>2]=+g[qd>>2]-+g[rd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[pd>>2]-+g[sd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[pd>>2]+ +g[sd>>2];g[Vg>>2]=+g[nd>>2]-+g[kd>>2];g[Wg>>2]=+g[Tg>>2]-+g[Qg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Vg>>2]+ +g[Wg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[Wg>>2]-+g[Vg>>2];g[wd>>2]=(+g[ud>>2]+ +g[vd>>2])*.7071067690849304;g[xd>>2]=+g[td>>2]-+g[wd>>2];g[Ye>>2]=+g[td>>2]+ +g[wd>>2];g[zh>>2]=(+g[rc>>2]+ +g[wc>>2])*.7071067690849304;g[Fg>>2]=+g[zh>>2]+ +g[Eg>>2];g[Lg>>2]=+g[Eg>>2]-+g[zh>>2];g[$d>>2]=+g[yd>>2]*.9238795042037964-+g[zd>>2]*.3826834261417389;g[ce>>2]=+g[ae>>2]*.9238795042037964+ +g[be>>2]*.3826834261417389;g[de>>2]=+g[$d>>2]-+g[ce>>2];g[yh>>2]=+g[$d>>2]+ +g[ce>>2];g[ef>>2]=+g[Me>>2]+ +g[Ne>>2];g[ff>>2]=+g[Pe>>2]+ +g[Qe>>2];g[gf>>2]=+g[ef>>2]*.9807852506637573-+g[ff>>2]*.19509032368659973;g[me>>2]=+g[ef>>2]*.19509032368659973+ +g[ff>>2]*.9807852506637573;g[he>>2]=+g[fe>>2]-+g[ge>>2];g[Ke>>2]=+g[ie>>2]-+g[Je>>2];g[Le>>2]=+g[he>>2]*.5555702447891235+ +g[Ke>>2]*.8314695954322815;g[Ve>>2]=+g[Ke>>2]*.5555702447891235-+g[he>>2]*.8314695954322815;g[Ze>>2]=+g[yd>>2]*.3826834261417389+ +g[zd>>2]*.9238795042037964;g[_e>>2]=+g[be>>2]*.9238795042037964-+g[ae>>2]*.3826834261417389;g[$e>>2]=+g[Ze>>2]+ +g[_e>>2];g[Kg>>2]=+g[_e>>2]-+g[Ze>>2];g[bf>>2]=+g[fe>>2]+ +g[ge>>2];g[cf>>2]=+g[ie>>2]+ +g[Je>>2];g[df>>2]=+g[bf>>2]*.9807852506637573+ +g[cf>>2]*.19509032368659973;g[le>>2]=+g[cf>>2]*.9807852506637573-+g[bf>>2]*.19509032368659973;g[Oe>>2]=+g[Me>>2]-+g[Ne>>2];g[Re>>2]=+g[Pe>>2]-+g[Qe>>2];g[Se>>2]=+g[Oe>>2]*.5555702447891235-+g[Re>>2]*.8314695954322815;g[We>>2]=+g[Oe>>2]*.8314695954322815+ +g[Re>>2]*.5555702447891235;g[ee>>2]=+g[xd>>2]+ +g[de>>2];g[Te>>2]=+g[Le>>2]+ +g[Se>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[ee>>2]-+g[Te>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[ee>>2]+ +g[Te>>2];g[Jg>>2]=+g[Ve>>2]+ +g[We>>2];g[Mg>>2]=+g[Kg>>2]+ +g[Lg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Jg>>2]+ +g[Mg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[Mg>>2]-+g[Jg>>2];g[Ue>>2]=+g[xd>>2]-+g[de>>2];g[Xe>>2]=+g[Ve>>2]-+g[We>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[Ue>>2]-+g[Xe>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Ue>>2]+ +g[Xe>>2];g[Ng>>2]=+g[Se>>2]-+g[Le>>2];g[Og>>2]=+g[Lg>>2]-+g[Kg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Ng>>2]+ +g[Og>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[Og>>2]-+g[Ng>>2];g[af>>2]=+g[Ye>>2]+ +g[$e>>2];g[je>>2]=+g[df>>2]+ +g[gf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[af>>2]-+g[je>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[af>>2]+ +g[je>>2];g[xh>>2]=+g[le>>2]+ +g[me>>2];g[Gg>>2]=+g[yh>>2]+ +g[Fg>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[xh>>2]+ +g[Gg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[Gg>>2]-+g[xh>>2];g[ke>>2]=+g[Ye>>2]-+g[$e>>2];g[ne>>2]=+g[le>>2]-+g[me>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[ke>>2]-+g[ne>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[ke>>2]+ +g[ne>>2];g[Hg>>2]=+g[gf>>2]-+g[df>>2];g[Ig>>2]=+g[Fg>>2]-+g[yh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Hg>>2]+ +g[Ig>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[Ig>>2]-+g[Hg>>2];c[Bi>>2]=(c[Bi>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+248;c[n>>2]=c[n>>2]^c[2998]}i=Ci;return}function pj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,9,2120);i=b;return}function qj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0;L=i;i=i+128|0;k=L+116|0;l=L+112|0;m=L+108|0;n=L+104|0;M=L+100|0;o=L+96|0;p=L+92|0;K=L+80|0;q=L+76|0;H=L+72|0;v=L+68|0;D=L+64|0;A=L+60|0;E=L+56|0;B=L+52|0;G=L+48|0;s=L+44|0;u=L+40|0;r=L+36|0;t=L+32|0;x=L+28|0;z=L+24|0;w=L+20|0;y=L+16|0;C=L+12|0;F=L+8|0;I=L+4|0;J=L;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[M>>2]=f;c[o>>2]=h;c[p>>2]=j;g[L+88>>2]=.8660253882408142;g[L+84>>2]=.5;c[K>>2]=c[M>>2];c[m>>2]=(c[m>>2]|0)+(c[M>>2]<<2<<2);while(1){if((c[K>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[H>>2]=+g[c[l>>2]>>2];g[s>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[u>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[r>>2]=+g[c[m>>2]>>2];g[t>>2]=+g[(c[m>>2]|0)+4>>2];g[v>>2]=+g[r>>2]*+g[s>>2]+ +g[t>>2]*+g[u>>2];g[D>>2]=+g[r>>2]*+g[u>>2]-+g[t>>2]*+g[s>>2];g[x>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[z>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[w>>2]=+g[(c[m>>2]|0)+8>>2];g[y>>2]=+g[(c[m>>2]|0)+12>>2];g[A>>2]=+g[w>>2]*+g[x>>2]+ +g[y>>2]*+g[z>>2];g[E>>2]=+g[w>>2]*+g[z>>2]-+g[y>>2]*+g[x>>2];g[B>>2]=+g[v>>2]+ +g[A>>2];g[G>>2]=+g[D>>2]+ +g[E>>2];g[c[k>>2]>>2]=+g[q>>2]+ +g[B>>2];g[c[l>>2]>>2]=+g[G>>2]+ +g[H>>2];g[C>>2]=+g[q>>2]-+g[B>>2]*.5;g[F>>2]=(+g[D>>2]-+g[E>>2])*.8660253882408142;g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[C>>2]-+g[F>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[C>>2]+ +g[F>>2];g[I>>2]=(+g[A>>2]-+g[v>>2])*.8660253882408142;g[J>>2]=+g[H>>2]-+g[G>>2]*.5;g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[I>>2]+ +g[J>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[J>>2]-+g[I>>2];c[K>>2]=(c[K>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+16}i=L;return}function rj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,10,2184);i=b;return}function sj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0;T=i;i=i+144|0;k=T+140|0;l=T+136|0;m=T+132|0;n=T+128|0;U=T+124|0;o=T+120|0;p=T+116|0;S=T+112|0;q=T+108|0;O=T+104|0;v=T+100|0;N=T+96|0;B=T+92|0;J=T+88|0;G=T+84|0;K=T+80|0;s=T+76|0;u=T+72|0;r=T+68|0;t=T+64|0;y=T+60|0;A=T+56|0;x=T+52|0;z=T+48|0;D=T+44|0;F=T+40|0;C=T+36|0;E=T+32|0;w=T+28|0;H=T+24|0;M=T+20|0;P=T+16|0;I=T+12|0;L=T+8|0;Q=T+4|0;R=T;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[U>>2]=f;c[o>>2]=h;c[p>>2]=j;c[S>>2]=c[U>>2];c[m>>2]=(c[m>>2]|0)+((c[U>>2]|0)*6<<2);while(1){if((c[S>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[O>>2]=+g[c[l>>2]>>2];g[s>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[u>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[r>>2]=+g[(c[m>>2]|0)+8>>2];g[t>>2]=+g[(c[m>>2]|0)+12>>2];g[v>>2]=+g[r>>2]*+g[s>>2]+ +g[t>>2]*+g[u>>2];g[N>>2]=+g[r>>2]*+g[u>>2]-+g[t>>2]*+g[s>>2];g[y>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[A>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[x>>2]=+g[c[m>>2]>>2];g[z>>2]=+g[(c[m>>2]|0)+4>>2];g[B>>2]=+g[x>>2]*+g[y>>2]+ +g[z>>2]*+g[A>>2];g[J>>2]=+g[x>>2]*+g[A>>2]-+g[z>>2]*+g[y>>2];g[D>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[F>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[C>>2]=+g[(c[m>>2]|0)+16>>2];g[E>>2]=+g[(c[m>>2]|0)+20>>2];g[G>>2]=+g[C>>2]*+g[D>>2]+ +g[E>>2]*+g[F>>2];g[K>>2]=+g[C>>2]*+g[F>>2]-+g[E>>2]*+g[D>>2];g[w>>2]=+g[q>>2]+ +g[v>>2];g[H>>2]=+g[B>>2]+ +g[G>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[w>>2]-+g[H>>2];g[c[k>>2]>>2]=+g[w>>2]+ +g[H>>2];g[M>>2]=+g[J>>2]+ +g[K>>2];g[P>>2]=+g[N>>2]+ +g[O>>2];g[c[l>>2]>>2]=+g[M>>2]+ +g[P>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[P>>2]-+g[M>>2];g[I>>2]=+g[q>>2]-+g[v>>2];g[L>>2]=+g[J>>2]-+g[K>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[I>>2]-+g[L>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[I>>2]+ +g[L>>2];g[Q>>2]=+g[O>>2]-+g[N>>2];g[R>>2]=+g[B>>2]-+g[G>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[Q>>2]-+g[R>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[R>>2]+ +g[Q>>2];c[S>>2]=(c[S>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+24}i=T;return}function tj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,11,2248);i=b;return}function uj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0;la=i;i=i+240|0;k=la+236|0;l=la+232|0;m=la+228|0;n=la+224|0;ma=la+220|0;o=la+216|0;p=la+212|0;ka=la+192|0;q=la+188|0;D=la+184|0;ea=la+180|0;ha=la+176|0;I=la+172|0;H=la+168|0;A=la+164|0;B=la+160|0;C=la+156|0;O=la+152|0;Z=la+148|0;_=la+144|0;v=la+140|0;ca=la+136|0;Y=la+132|0;ga=la+128|0;N=la+124|0;da=la+120|0;T=la+116|0;fa=la+112|0;s=la+108|0;u=la+104|0;r=la+100|0;t=la+96|0;V=la+92|0;X=la+88|0;U=la+84|0;W=la+80|0;x=la+76|0;M=la+72|0;w=la+68|0;y=la+64|0;Q=la+60|0;S=la+56|0;P=la+52|0;R=la+48|0;ia=la+44|0;z=la+40|0;ba=la+36|0;ja=la+32|0;$=la+28|0;aa=la+24|0;J=la+20|0;K=la+16|0;G=la+12|0;L=la+8|0;E=la+4|0;F=la;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[ma>>2]=f;c[o>>2]=h;c[p>>2]=j;g[la+208>>2]=.25;g[la+204>>2]=.55901700258255;g[la+200>>2]=.5877852439880371;g[la+196>>2]=.9510565400123596;c[ka>>2]=c[ma>>2];c[m>>2]=(c[m>>2]|0)+(c[ma>>2]<<3<<2);while(1){if((c[ka>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[D>>2]=+g[c[l>>2]>>2];g[s>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[u>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[r>>2]=+g[c[m>>2]>>2];g[t>>2]=+g[(c[m>>2]|0)+4>>2];g[v>>2]=+g[r>>2]*+g[s>>2]+ +g[t>>2]*+g[u>>2];g[ca>>2]=+g[r>>2]*+g[u>>2]-+g[t>>2]*+g[s>>2];g[V>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[X>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[U>>2]=+g[(c[m>>2]|0)+16>>2];g[W>>2]=+g[(c[m>>2]|0)+20>>2];g[Y>>2]=+g[U>>2]*+g[V>>2]+ +g[W>>2]*+g[X>>2];g[ga>>2]=+g[U>>2]*+g[X>>2]-+g[W>>2]*+g[V>>2];g[x>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[M>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[w>>2]=+g[(c[m>>2]|0)+24>>2];g[y>>2]=+g[(c[m>>2]|0)+28>>2];g[N>>2]=+g[w>>2]*+g[x>>2]+ +g[y>>2]*+g[M>>2];g[da>>2]=+g[w>>2]*+g[M>>2]-+g[y>>2]*+g[x>>2];g[Q>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[S>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[P>>2]=+g[(c[m>>2]|0)+8>>2];g[R>>2]=+g[(c[m>>2]|0)+12>>2];g[T>>2]=+g[P>>2]*+g[Q>>2]+ +g[R>>2]*+g[S>>2];g[fa>>2]=+g[P>>2]*+g[S>>2]-+g[R>>2]*+g[Q>>2];g[ea>>2]=+g[ca>>2]-+g[da>>2];g[ha>>2]=+g[fa>>2]-+g[ga>>2];g[I>>2]=+g[T>>2]-+g[Y>>2];g[H>>2]=+g[v>>2]-+g[N>>2];g[A>>2]=+g[ca>>2]+ +g[da>>2];g[B>>2]=+g[fa>>2]+ +g[ga>>2];g[C>>2]=+g[A>>2]+ +g[B>>2];g[O>>2]=+g[v>>2]+ +g[N>>2];g[Z>>2]=+g[T>>2]+ +g[Y>>2];g[_>>2]=+g[O>>2]+ +g[Z>>2];g[c[k>>2]>>2]=+g[q>>2]+ +g[_>>2];g[c[l>>2]>>2]=+g[C>>2]+ +g[D>>2];g[ia>>2]=+g[ea>>2]*.9510565400123596+ +g[ha>>2]*.5877852439880371;g[z>>2]=+g[ha>>2]*.9510565400123596-+g[ea>>2]*.5877852439880371;g[$>>2]=(+g[O>>2]-+g[Z>>2])*.55901700258255;g[aa>>2]=+g[q>>2]-+g[_>>2]*.25;g[ba>>2]=+g[$>>2]+ +g[aa>>2];g[ja>>2]=+g[aa>>2]-+g[$>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[ba>>2]-+g[ia>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ja>>2]+ +g[z>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[ba>>2]+ +g[ia>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ja>>2]-+g[z>>2];g[J>>2]=+g[H>>2]*.9510565400123596+ +g[I>>2]*.5877852439880371;g[K>>2]=+g[I>>2]*.9510565400123596-+g[H>>2]*.5877852439880371;g[E>>2]=(+g[A>>2]-+g[B>>2])*.55901700258255;g[F>>2]=+g[D>>2]-+g[C>>2]*.25;g[G>>2]=+g[E>>2]+ +g[F>>2];g[L>>2]=+g[F>>2]-+g[E>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[G>>2]-+g[J>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[L>>2]-+g[K>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[J>>2]+ +g[G>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[K>>2]+ +g[L>>2];c[ka>>2]=(c[ka>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+32}i=la;return}function vj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,12,2312);i=b;return}function wj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0,Ii=0,Ji=0,Ki=0,Li=0,Mi=0,Ni=0,Oi=0,Pi=0,Qi=0,Ri=0,Si=0,Ti=0,Ui=0,Vi=0,Wi=0,Xi=0,Yi=0,Zi=0,_i=0,$i=0,aj=0,bj=0,cj=0,dj=0,ej=0,fj=0,gj=0,hj=0,ij=0,jj=0,kj=0,lj=0,mj=0,nj=0,oj=0,pj=0,qj=0,rj=0,sj=0,tj=0,uj=0,vj=0,wj=0,xj=0,yj=0,zj=0,Aj=0,Bj=0,Cj=0,Dj=0,Ej=0,Fj=0,Gj=0,Hj=0,Ij=0,Jj=0,Kj=0,Lj=0,Mj=0,Nj=0,Oj=0,Pj=0,Qj=0,Rj=0,Sj=0,Tj=0,Uj=0,Vj=0,Wj=0,Xj=0,Yj=0,Zj=0,_j=0,$j=0,ak=0,bk=0,ck=0,dk=0,ek=0,fk=0,gk=0,hk=0,ik=0,jk=0,kk=0,lk=0,mk=0,nk=0,ok=0,pk=0,qk=0,rk=0,sk=0,tk=0,uk=0,vk=0,wk=0,xk=0,yk=0,zk=0,Ak=0,Bk=0,Ck=0,Dk=0,Ek=0,Fk=0,Gk=0,Hk=0,Ik=0,Jk=0,Kk=0,Lk=0,Mk=0,Nk=0,Ok=0,Pk=0,Qk=0,Rk=0,Sk=0,Tk=0,Uk=0,Vk=0,Wk=0,Xk=0,Yk=0,Zk=0,_k=0,$k=0,al=0,bl=0,cl=0,dl=0,el=0,fl=0,gl=0,hl=0,il=0,jl=0,kl=0,ll=0,ml=0,nl=0,ol=0,pl=0,ql=0,rl=0,sl=0,tl=0,ul=0,vl=0,wl=0,xl=0,yl=0,zl=0,Al=0,Bl=0,Cl=0,Dl=0,El=0,Fl=0,Gl=0,Hl=0,Il=0,Jl=0,Kl=0,Ll=0,Ml=0,Nl=0,Ol=0,Pl=0,Ql=0,Rl=0,Sl=0,Tl=0,Ul=0,Vl=0,Wl=0,Xl=0,Yl=0,Zl=0,_l=0,$l=0,am=0,bm=0,cm=0,dm=0,em=0,fm=0,gm=0,hm=0,im=0,jm=0,km=0,lm=0,mm=0,nm=0,om=0,pm=0,qm=0,rm=0,sm=0,tm=0,um=0,vm=0,wm=0,xm=0,ym=0,zm=0,Am=0,Bm=0,Cm=0,Dm=0,Em=0,Fm=0,Gm=0,Hm=0,Im=0,Jm=0,Km=0,Lm=0,Mm=0,Nm=0,Om=0,Pm=0,Qm=0,Rm=0,Sm=0,Tm=0,Um=0,Vm=0,Wm=0,Xm=0,Ym=0,Zm=0,_m=0,$m=0,an=0,bn=0,cn=0,dn=0,en=0,fn=0,gn=0,hn=0,jn=0,kn=0,ln=0,mn=0,nn=0,on=0,pn=0,qn=0,rn=0,sn=0,tn=0,un=0,vn=0,wn=0,xn=0,yn=0,zn=0,An=0,Bn=0,Cn=0,Dn=0,En=0,Fn=0,Gn=0,Hn=0,In=0,Jn=0,Kn=0,Ln=0,Mn=0,Nn=0,On=0,Pn=0,Qn=0,Rn=0,Sn=0,Tn=0,Un=0,Vn=0,Wn=0,Xn=0,Yn=0,Zn=0,_n=0,$n=0,ao=0,bo=0,co=0,eo=0,fo=0,go=0,ho=0,io=0,jo=0,ko=0,lo=0,mo=0,no=0,oo=0,po=0,qo=0,ro=0,so=0,to=0,uo=0,vo=0,wo=0,xo=0,yo=0,zo=0,Ao=0,Bo=0,Co=0,Do=0,Eo=0,Fo=0,Go=0,Ho=0,Io=0,Jo=0,Ko=0,Lo=0,Mo=0,No=0,Oo=0,Po=0,Qo=0,Ro=0,So=0,To=0,Uo=0,Vo=0,Wo=0,Xo=0,Yo=0,Zo=0,_o=0,$o=0,ap=0,bp=0,cp=0,dp=0,ep=0,fp=0,gp=0,hp=0,ip=0,jp=0,kp=0,lp=0,mp=0,np=0,op=0,pp=0,qp=0,rp=0,sp=0,tp=0,up=0,vp=0,wp=0,xp=0,yp=0,zp=0,Ap=0,Bp=0,Cp=0,Dp=0,Ep=0,Fp=0,Gp=0,Hp=0,Ip=0,Jp=0,Kp=0,Lp=0,Mp=0,Np=0,Op=0,Pp=0,Qp=0,Rp=0,Sp=0,Tp=0,Up=0,Vp=0,Wp=0,Xp=0,Yp=0,Zp=0,_p=0,$p=0,aq=0,bq=0,cq=0,dq=0,eq=0,fq=0,gq=0,hq=0,iq=0,jq=0,kq=0,lq=0,mq=0,nq=0,oq=0,pq=0,qq=0,rq=0,sq=0,tq=0,uq=0,vq=0,wq=0,xq=0,yq=0,zq=0,Aq=0,Bq=0,Cq=0,Dq=0,Eq=0,Fq=0,Gq=0,Hq=0,Iq=0,Jq=0,Kq=0,Lq=0,Mq=0,Nq=0,Oq=0,Pq=0,Qq=0,Rq=0,Sq=0,Tq=0,Uq=0,Vq=0,Wq=0,Xq=0,Yq=0,Zq=0,_q=0,$q=0,ar=0,br=0,cr=0,dr=0,er=0,fr=0,gr=0,hr=0,ir=0,jr=0,kr=0,lr=0,mr=0,nr=0,or=0,pr=0,qr=0,rr=0,sr=0,tr=0,ur=0,vr=0,wr=0,xr=0,yr=0,zr=0,Ar=0,Br=0,Cr=0,Dr=0,Er=0,Fr=0,Gr=0,Hr=0,Ir=0,Jr=0,Kr=0,Lr=0,Mr=0,Nr=0,Or=0,Pr=0,Qr=0,Rr=0,Sr=0,Tr=0,Ur=0,Vr=0,Wr=0,Xr=0,Yr=0,Zr=0,_r=0,$r=0,as=0,bs=0,cs=0,ds=0,es=0,fs=0,gs=0,hs=0,is=0,js=0,ks=0,ls=0,ms=0,ns=0,os=0,ps=0,qs=0,rs=0,ss=0,ts=0,us=0,vs=0,ws=0,xs=0,ys=0,zs=0,As=0,Bs=0,Cs=0,Ds=0,Es=0,Fs=0,Gs=0,Hs=0,Is=0,Js=0,Ks=0,Ls=0,Ms=0,Ns=0,Os=0,Ps=0,Qs=0,Rs=0,Ss=0,Ts=0,Us=0,Vs=0,Ws=0,Xs=0,Ys=0,Zs=0,_s=0,$s=0,at=0,bt=0,ct=0,dt=0,et=0,ft=0,gt=0,ht=0,it=0,jt=0,kt=0,lt=0,mt=0,nt=0,ot=0,pt=0,qt=0,rt=0,st=0,tt=0,ut=0,vt=0,wt=0,xt=0,yt=0,zt=0,At=0,Bt=0,Ct=0,Dt=0,Et=0,Ft=0,Gt=0,Ht=0,It=0,Jt=0,Kt=0,Lt=0,Mt=0,Nt=0,Ot=0,Pt=0,Qt=0,Rt=0,St=0,Tt=0,Ut=0,Vt=0,Wt=0,Xt=0,Yt=0,Zt=0,_t=0,$t=0,au=0,bu=0,cu=0,du=0,eu=0,fu=0,gu=0,hu=0,iu=0,ju=0,ku=0,lu=0,mu=0,nu=0,ou=0,pu=0,qu=0,ru=0,su=0,tu=0,uu=0,vu=0,wu=0,xu=0,yu=0,zu=0,Au=0,Bu=0,Cu=0,Du=0,Eu=0,Fu=0,Gu=0,Hu=0,Iu=0,Ju=0,Ku=0,Lu=0,Mu=0,Nu=0,Ou=0,Pu=0,Qu=0,Ru=0,Su=0,Tu=0,Uu=0,Vu=0,Wu=0,Xu=0,Yu=0,Zu=0;Yu=i;i=i+4752|0;k=Yu+4744|0;l=Yu+4740|0;m=Yu+4736|0;n=Yu+4732|0;Zu=Yu+4728|0;o=Yu+4724|0;p=Yu+4720|0;Xu=Yu+4656|0;Gu=Yu+4652|0;cn=Yu+4648|0;dt=Yu+4644|0;tu=Yu+4640|0;Uf=Yu+4636|0;yl=Yu+4632|0;Wr=Yu+4628|0;xt=Yu+4624|0;Yj=Yu+4620|0;wt=Yu+4616|0;fn=Yu+4612|0;Rr=Yu+4608|0;dg=Yu+4604|0;at=Yu+4600|0;Dk=Yu+4596|0;su=Yu+4592|0;v=Yu+4588|0;ir=Yu+4584|0;pg=Yu+4580|0;gj=Yu+4576|0;Hk=Yu+4572|0;Jm=Yu+4568|0;mn=Yu+4564|0;pp=Yu+4560|0;qa=Yu+4556|0;jr=Yu+4552|0;Cf=Yu+4548|0;hj=Yu+4544|0;Kk=Yu+4540|0;Km=Yu+4536|0;rn=Yu+4532|0;qp=Yu+4528|0;R=Yu+4524|0;lb=Yu+4520|0;pr=Yu+4516|0;mr=Yu+4512|0;nr=Yu+4508|0;or=Yu+4504|0;Jf=Yu+4500|0;Nk=Yu+4496|0;Do=Yu+4492|0;up=Yu+4488|0;tg=Yu+4484|0;Rk=Yu+4480|0;ah=Yu+4476|0;Ok=Yu+4472|0;Zn=Yu+4468|0;tp=Yu+4464|0;zg=Yu+4460|0;Qk=Yu+4456|0;La=Yu+4452|0;gb=Yu+4448|0;rr=Yu+4444|0;uq=Yu+4440|0;vq=Yu+4436|0;wq=Yu+4432|0;hh=Yu+4428|0;Xk=Yu+4424|0;Oo=Yu+4420|0;xp=Yu+4416|0;sh=Yu+4412|0;Vk=Yu+4408|0;Dg=Yu+4404|0;Yk=Yu+4400|0;Jo=Yu+4396|0;wp=Yu+4392|0;yh=Yu+4388|0;Uk=Yu+4384|0;Se=Yu+4380|0;Lq=Yu+4376|0;Np=Yu+4372|0;kq=Yu+4368|0;Sq=Yu+4364|0;zs=Yu+4360|0;Oh=Yu+4356|0;tm=Yu+4352|0;Zh=Yu+4348|0;Fm=Yu+4344|0;Dj=Yu+4340|0;um=Yu+4336|0;xo=Yu+4332|0;Ip=Yu+4328|0;Aj=Yu+4324|0;Em=Yu+4320|0;dc=Yu+4316|0;Fq=Yu+4312|0;no=Yu+4308|0;Bp=Yu+4304|0;Cq=Yu+4300|0;us=Yu+4296|0;Mg=Yu+4292|0;Cl=Yu+4288|0;Xg=Yu+4284|0;mm=Yu+4280|0;Bi=Yu+4276|0;Dl=Yu+4272|0;Wo=Yu+4268|0;Ep=Yu+4264|0;yi=Yu+4260|0;lm=Yu+4256|0;Xd=Yu+4252|0;Dq=Yu+4248|0;go=Yu+4244|0;oo=Yu+4240|0;Iq=Yu+4236|0;vs=Yu+4232|0;Ih=Yu+4228|0;Ei=Yu+4224|0;ri=Yu+4220|0;Di=Yu+4216|0;im=Yu+4212|0;om=Yu+4208|0;$o=Yu+4204|0;po=Yu+4200|0;Hl=Yu+4196|0;pm=Yu+4192|0;nf=Yu+4188|0;Tq=Yu+4184|0;gp=Yu+4180|0;Op=Yu+4176|0;Oq=Yu+4172|0;As=Yu+4168|0;Ki=Yu+4164|0;Gj=Yu+4160|0;tj=Yu+4156|0;Fj=Yu+4152|0;Bm=Yu+4148|0;Hm=Yu+4144|0;bp=Yu+4140|0;Pp=Yu+4136|0;ym=Yu+4132|0;Kl=Yu+4128|0;q=Yu+4124|0;Ur=Yu+4120|0;hf=Yu+4116|0;Tr=Yu+4112|0;Im=Yu+4108|0;rf=Yu+4104|0;Bs=Yu+4100|0;sf=Yu+4096|0;Ib=Yu+4092|0;_d=Yu+4088|0;za=Yu+4084|0;Rc=Yu+4080|0;Ji=Yu+4076|0;zl=Yu+4072|0;Ah=Yu+4068|0;qk=Yu+4064|0;ap=Yu+4060|0;sr=Yu+4056|0;Sn=Yu+4052|0;jq=Yu+4048|0;rg=Yu+4044|0;Kt=Yu+4040|0;bt=Yu+4036|0;ct=Yu+4032|0;qf=Yu+4028|0;Tf=Yu+4024|0;Sr=Yu+4020|0;Vr=Yu+4016|0;Lu=Yu+4012|0;Vf=Yu+4008|0;Qu=Yu+4004|0;Wf=Yu+4e3|0;Xf=Yu+3996|0;Yf=Yu+3992|0;Wu=Yu+3988|0;$f=Yu+3984|0;Wj=Yu+3980|0;ag=Yu+3976|0;_f=Yu+3972|0;bg=Yu+3968|0;Iu=Yu+3964|0;Ku=Yu+3960|0;Hu=Yu+3956|0;Ju=Yu+3952|0;Nu=Yu+3948|0;Pu=Yu+3944|0;Mu=Yu+3940|0;Ou=Yu+3936|0;Tu=Yu+3932|0;Vu=Yu+3928|0;Su=Yu+3924|0;Uu=Yu+3920|0;Tj=Yu+3916|0;Vj=Yu+3912|0;Sj=Yu+3908|0;Uj=Yu+3904|0;Ru=Yu+3900|0;Xj=Yu+3896|0;dn=Yu+3892|0;en=Yu+3888|0;Zf=Yu+3884|0;cg=Yu+3880|0;Bk=Yu+3876|0;Ck=Yu+3872|0;ik=Yu+3868|0;hn=Yu+3864|0;hg=Yu+3860|0;kg=Yu+3856|0;u=Yu+3852|0;jn=Yu+3848|0;ig=Yu+3844|0;ng=Yu+3840|0;jg=Yu+3836|0;og=Yu+3832|0;ck=Yu+3828|0;fg=Yu+3824|0;hk=Yu+3820|0;gg=Yu+3816|0;$j=Yu+3812|0;bk=Yu+3808|0;_j=Yu+3804|0;ak=Yu+3800|0;ek=Yu+3796|0;gk=Yu+3792|0;dk=Yu+3788|0;fk=Yu+3784|0;nk=Yu+3780|0;lg=Yu+3776|0;t=Yu+3772|0;mg=Yu+3768|0;kk=Yu+3764|0;mk=Yu+3760|0;jk=Yu+3756|0;lk=Yu+3752|0;pk=Yu+3748|0;s=Yu+3744|0;ok=Yu+3740|0;r=Yu+3736|0;Fk=Yu+3732|0;Gk=Yu+3728|0;kn=Yu+3724|0;ln=Yu+3720|0;ea=Yu+3716|0;on=Yu+3712|0;uf=Yu+3708|0;xf=Yu+3704|0;pa=Yu+3700|0;pn=Yu+3696|0;vf=Yu+3692|0;Af=Yu+3688|0;wf=Yu+3684|0;Bf=Yu+3680|0;A=Yu+3676|0;qg=Yu+3672|0;da=Yu+3668|0;tf=Yu+3664|0;x=Yu+3660|0;z=Yu+3656|0;w=Yu+3652|0;y=Yu+3648|0;aa=Yu+3644|0;ca=Yu+3640|0;$=Yu+3636|0;ba=Yu+3632|0;ja=Yu+3628|0;yf=Yu+3624|0;oa=Yu+3620|0;zf=Yu+3616|0;ga=Yu+3612|0;ia=Yu+3608|0;fa=Yu+3604|0;ha=Yu+3600|0;la=Yu+3596|0;na=Yu+3592|0;ka=Yu+3588|0;ma=Yu+3584|0;Ik=Yu+3580|0;Jk=Yu+3576|0;nn=Yu+3572|0;qn=Yu+3568|0;xa=Yu+3564|0;Ff=Yu+3560|0;E=Yu+3556|0;Gf=Yu+3552|0;F=Yu+3548|0;Vn=Yu+3544|0;K=Yu+3540|0;wg=Yu+3536|0;P=Yu+3532|0;xg=Yu+3528|0;Q=Yu+3524|0;Wn=Yu+3520|0;Ba=Yu+3516|0;$n=Yu+3512|0;Pf=Yu+3508|0;Sf=Yu+3504|0;kb=Yu+3500|0;ao=Yu+3496|0;Kf=Yu+3492|0;Nf=Yu+3488|0;ua=Yu+3484|0;wa=Yu+3480|0;ta=Yu+3476|0;va=Yu+3472|0;B=Yu+3468|0;D=Yu+3464|0;ya=Yu+3460|0;C=Yu+3456|0;H=Yu+3452|0;J=Yu+3448|0;G=Yu+3444|0;I=Yu+3440|0;M=Yu+3436|0;O=Yu+3432|0;L=Yu+3428|0;N=Yu+3424|0;W=Yu+3420|0;Qf=Yu+3416|0;Aa=Yu+3412|0;Rf=Yu+3408|0;T=Yu+3404|0;V=Yu+3400|0;S=Yu+3396|0;U=Yu+3392|0;Y=Yu+3388|0;_=Yu+3384|0;X=Yu+3380|0;Z=Yu+3376|0;Ga=Yu+3372|0;Lf=Yu+3368|0;jb=Yu+3364|0;Mf=Yu+3360|0;Da=Yu+3356|0;Fa=Yu+3352|0;Ca=Yu+3348|0;Ea=Yu+3344|0;Ia=Yu+3340|0;ib=Yu+3336|0;Ha=Yu+3332|0;Ja=Yu+3328|0;Hf=Yu+3324|0;If=Yu+3320|0;_n=Yu+3316|0;Co=Yu+3312|0;Of=Yu+3308|0;sg=Yu+3304|0;Ag=Yu+3300|0;Bg=Yu+3296|0;Xn=Yu+3292|0;Yn=Yu+3288|0;vg=Yu+3284|0;yg=Yu+3280|0;rb=Yu+3276|0;dh=Yu+3272|0;wb=Yu+3268|0;eh=Yu+3264|0;xb=Yu+3260|0;Fo=Yu+3256|0;Cb=Yu+3252|0;vh=Yu+3248|0;Hb=Yu+3244|0;wh=Yu+3240|0;Ka=Yu+3236|0;Go=Yu+3232|0;Wa=Yu+3228|0;Lo=Yu+3224|0;nh=Yu+3220|0;qh=Yu+3216|0;fb=Yu+3212|0;Mo=Yu+3208|0;ih=Yu+3204|0;lh=Yu+3200|0;ob=Yu+3196|0;qb=Yu+3192|0;nb=Yu+3188|0;pb=Yu+3184|0;tb=Yu+3180|0;vb=Yu+3176|0;sb=Yu+3172|0;ub=Yu+3168|0;zb=Yu+3164|0;Bb=Yu+3160|0;yb=Yu+3156|0;Ab=Yu+3152|0;Eb=Yu+3148|0;Gb=Yu+3144|0;Db=Yu+3140|0;Fb=Yu+3136|0;Qa=Yu+3132|0;oh=Yu+3128|0;Va=Yu+3124|0;ph=Yu+3120|0;Na=Yu+3116|0;Pa=Yu+3112|0;Ma=Yu+3108|0;Oa=Yu+3104|0;Sa=Yu+3100|0;Ua=Yu+3096|0;Ra=Yu+3092|0;Ta=Yu+3088|0;$a=Yu+3084|0;jh=Yu+3080|0;eb=Yu+3076|0;kh=Yu+3072|0;Ya=Yu+3068|0;_a=Yu+3064|0;Xa=Yu+3060|0;Za=Yu+3056|0;bb=Yu+3052|0;db=Yu+3048|0;ab=Yu+3044|0;cb=Yu+3040|0;fh=Yu+3036|0;gh=Yu+3032|0;Ko=Yu+3028|0;No=Yu+3024|0;mh=Yu+3020|0;rh=Yu+3016|0;zh=Yu+3012|0;Cg=Yu+3008|0;Ho=Yu+3004|0;Io=Yu+3e3|0;uh=Yu+2996|0;xh=Yu+2992|0;dd=Yu+2988|0;wj=Yu+2984|0;id=Yu+2980|0;xj=Yu+2976|0;jd=Yu+2972|0;jp=Yu+2968|0;od=Yu+2964|0;Lh=Yu+2960|0;td=Yu+2956|0;Mh=Yu+2952|0;ud=Yu+2948|0;kp=Yu+2944|0;fe=Yu+2940|0;uo=Yu+2936|0;Rh=Yu+2932|0;Sh=Yu+2928|0;Qe=Yu+2924|0;vo=Yu+2920|0;Uh=Yu+2916|0;Xh=Yu+2912|0;ad=Yu+2908|0;cd=Yu+2904|0;Zd=Yu+2900|0;bd=Yu+2896|0;fd=Yu+2892|0;hd=Yu+2888|0;ed=Yu+2884|0;gd=Yu+2880|0;ld=Yu+2876|0;nd=Yu+2872|0;kd=Yu+2868|0;md=Yu+2864|0;qd=Yu+2860|0;sd=Yu+2856|0;pd=Yu+2852|0;rd=Yu+2848|0;$d=Yu+2844|0;Ph=Yu+2840|0;ee=Yu+2836|0;Qh=Yu+2832|0;xd=Yu+2828|0;zd=Yu+2824|0;wd=Yu+2820|0;yd=Yu+2816|0;be=Yu+2812|0;de=Yu+2808|0;ae=Yu+2804|0;ce=Yu+2800|0;Ke=Yu+2796|0;Vh=Yu+2792|0;Pe=Yu+2788|0;Wh=Yu+2784|0;he=Yu+2780|0;Je=Yu+2776|0;ge=Yu+2772|0;ie=Yu+2768|0;Me=Yu+2764|0;Oe=Yu+2760|0;Le=Yu+2756|0;Ne=Yu+2752|0;vd=Yu+2748|0;Re=Yu+2744|0;Lp=Yu+2740|0;Mp=Yu+2736|0;Qq=Yu+2732|0;Rq=Yu+2728|0;Ii=Yu+2724|0;Nh=Yu+2720|0;Th=Yu+2716|0;Yh=Yu+2712|0;Bj=Yu+2708|0;Cj=Yu+2704|0;to=Yu+2700|0;wo=Yu+2696|0;yj=Yu+2692|0;zj=Yu+2688|0;Pb=Yu+2684|0;Ig=Yu+2680|0;sc=Yu+2676|0;Jg=Yu+2672|0;tc=Yu+2668|0;So=Yu+2664|0;yc=Yu+2660|0;vi=Yu+2656|0;Dc=Yu+2652|0;wi=Yu+2648|0;Ec=Yu+2644|0;To=Yu+2640|0;Qc=Yu+2636|0;ko=Yu+2632|0;Sg=Yu+2628|0;Vg=Yu+2624|0;bc=Yu+2620|0;lo=Yu+2616|0;Ng=Yu+2612|0;Qg=Yu+2608|0;Mb=Yu+2604|0;Ob=Yu+2600|0;Lb=Yu+2596|0;Nb=Yu+2592|0;Rb=Yu+2588|0;rc=Yu+2584|0;Qb=Yu+2580|0;Sb=Yu+2576|0;vc=Yu+2572|0;xc=Yu+2568|0;uc=Yu+2564|0;wc=Yu+2560|0;Ac=Yu+2556|0;Cc=Yu+2552|0;zc=Yu+2548|0;Bc=Yu+2544|0;Kc=Yu+2540|0;Tg=Yu+2536|0;Pc=Yu+2532|0;Ug=Yu+2528|0;Hc=Yu+2524|0;Jc=Yu+2520|0;Gc=Yu+2516|0;Ic=Yu+2512|0;Mc=Yu+2508|0;Oc=Yu+2504|0;Lc=Yu+2500|0;Nc=Yu+2496|0;Xb=Yu+2492|0;Og=Yu+2488|0;ac=Yu+2484|0;Pg=Yu+2480|0;Ub=Yu+2476|0;Wb=Yu+2472|0;Tb=Yu+2468|0;Vb=Yu+2464|0;Zb=Yu+2460|0;$b=Yu+2456|0;Yb=Yu+2452|0;_b=Yu+2448|0;Fc=Yu+2444|0;cc=Yu+2440|0;jo=Yu+2436|0;mo=Yu+2432|0;Aq=Yu+2428|0;Bq=Yu+2424|0;Kg=Yu+2420|0;Lg=Yu+2416|0;Rg=Yu+2412|0;Wg=Yu+2408|0;zi=Yu+2404|0;Ai=Yu+2400|0;Uo=Yu+2396|0;Vo=Yu+2392|0;ui=Yu+2388|0;xi=Yu+2384|0;oc=Yu+2380|0;co=Yu+2376|0;ji=Yu+2372|0;mi=Yu+2368|0;Vd=Yu+2364|0;Zo=Yu+2360|0;Bh=Yu+2356|0;Gh=Yu+2352|0;_c=Yu+2348|0;eo=Yu+2344|0;ki=Yu+2340|0;pi=Yu+2336|0;Kd=Yu+2332|0;Yo=Yu+2328|0;Zg=Yu+2324|0;Fh=Yu+2320|0;ic=Yu+2316|0;Jh=Yu+2312|0;nc=Yu+2308|0;Kh=Yu+2304|0;fc=Yu+2300|0;hc=Yu+2296|0;ec=Yu+2292|0;gc=Yu+2288|0;kc=Yu+2284|0;mc=Yu+2280|0;jc=Yu+2276|0;lc=Yu+2272|0;Pd=Yu+2268|0;_g=Yu+2264|0;Ud=Yu+2260|0;$g=Yu+2256|0;Md=Yu+2252|0;Od=Yu+2248|0;Ld=Yu+2244|0;Nd=Yu+2240|0;Rd=Yu+2236|0;Td=Yu+2232|0;Qd=Yu+2228|0;Sd=Yu+2224|0;Uc=Yu+2220|0;ni=Yu+2216|0;Zc=Yu+2212|0;oi=Yu+2208|0;qc=Yu+2204|0;Tc=Yu+2200|0;pc=Yu+2196|0;Sc=Yu+2192|0;Wc=Yu+2188|0;Yc=Yu+2184|0;Vc=Yu+2180|0;Xc=Yu+2176|0;Ed=Yu+2172|0;Dh=Yu+2168|0;Jd=Yu+2164|0;Eh=Yu+2160|0;Bd=Yu+2156|0;Dd=Yu+2152|0;Ad=Yu+2148|0;Cd=Yu+2144|0;Gd=Yu+2140|0;Id=Yu+2136|0;Fd=Yu+2132|0;Hd=Yu+2128|0;$c=Yu+2124|0;Wd=Yu+2120|0;bo=Yu+2116|0;fo=Yu+2112|0;Gq=Yu+2108|0;Hq=Yu+2104|0;Ch=Yu+2100|0;Hh=Yu+2096|0;li=Yu+2092|0;qi=Yu+2088|0;Il=Yu+2084|0;Jl=Yu+2080|0;Xo=Yu+2076|0;_o=Yu+2072|0;Fl=Yu+2068|0;Gl=Yu+2064|0;bf=Yu+2060|0;yo=Yu+2056|0;bi=Yu+2052|0;ei=Yu+2048|0;lf=Yu+2044|0;ep=Yu+2040|0;Oi=Yu+2036|0;Ti=Yu+2032|0;oe=Yu+2028|0;zo=Yu+2024|0;ci=Yu+2020|0;hi=Yu+2016|0;Ae=Yu+2012|0;dp=Yu+2008|0;Ni=Yu+2004|0;Qi=Yu+2e3|0;Xe=Yu+1996|0;$h=Yu+1992|0;af=Yu+1988|0;ai=Yu+1984|0;Ue=Yu+1980|0;We=Yu+1976|0;Te=Yu+1972|0;Ve=Yu+1968|0;Ze=Yu+1964|0;$e=Yu+1960|0;Ye=Yu+1956|0;_e=Yu+1952|0;Fe=Yu+1948|0;Ri=Yu+1944|0;kf=Yu+1940|0;Si=Yu+1936|0;Ce=Yu+1932|0;Ee=Yu+1928|0;Be=Yu+1924|0;De=Yu+1920|0;He=Yu+1916|0;jf=Yu+1912|0;Ge=Yu+1908|0;Ie=Yu+1904|0;gf=Yu+1900|0;fi=Yu+1896|0;ne=Yu+1892|0;gi=Yu+1888|0;df=Yu+1884|0;ff=Yu+1880|0;cf=Yu+1876|0;ef=Yu+1872|0;ke=Yu+1868|0;me=Yu+1864|0;je=Yu+1860|0;le=Yu+1856|0;ue=Yu+1852|0;Li=Yu+1848|0;ze=Yu+1844|0;Mi=Yu+1840|0;re=Yu+1836|0;te=Yu+1832|0;qe=Yu+1828|0;se=Yu+1824|0;we=Yu+1820|0;ye=Yu+1816|0;ve=Yu+1812|0;xe=Yu+1808|0;pe=Yu+1804|0;mf=Yu+1800|0;cp=Yu+1796|0;fp=Yu+1792|0;Mq=Yu+1788|0;Nq=Yu+1784|0;di=Yu+1780|0;ii=Yu+1776|0;Pi=Yu+1772|0;sj=Yu+1768|0;zm=Yu+1764|0;Am=Yu+1760|0;Ao=Yu+1756|0;Bo=Yu+1752|0;wm=Yu+1748|0;xm=Yu+1744|0;sa=Yu+1740|0;os=Yu+1736|0;Nr=Yu+1732|0;Or=Yu+1728|0;Yr=Yu+1724|0;Ds=Yu+1720|0;Jb=Yu+1716|0;Cs=Yu+1712|0;pf=Yu+1708|0;_r=Yu+1704|0;xs=Yu+1700|0;Hr=Yu+1696|0;Er=Yu+1692|0;Ir=Yu+1688|0;rs=Yu+1684|0;Pr=Yu+1680|0;Zj=Yu+1676|0;ra=Yu+1672|0;Lr=Yu+1668|0;Mr=Yu+1664|0;Qr=Yu+1660|0;Xr=Yu+1656|0;mb=Yu+1652|0;hb=Yu+1648|0;Yd=Yu+1644|0;of=Yu+1640|0;ts=Yu+1636|0;ws=Yu+1632|0;ys=Yu+1628|0;Dr=Yu+1624|0;ps=Yu+1620|0;qs=Yu+1616|0;Kb=Yu+1612|0;Zr=Yu+1608|0;Kr=Yu+1604|0;$r=Yu+1600|0;ss=Yu+1596|0;Fr=Yu+1592|0;as=Yu+1588|0;Es=Yu+1584|0;Gr=Yu+1580|0;Jr=Yu+1576|0;Fs=Yu+1572|0;Gs=Yu+1568|0;lr=Yu+1564|0;Ar=Yu+1560|0;is=Yu+1556|0;ms=Yu+1552|0;Ls=Yu+1548|0;pt=Yu+1544|0;yq=Yu+1540|0;Is=Yu+1536|0;Kq=Yu+1532|0;xr=Yu+1528|0;bs=Yu+1524|0;ot=Yu+1520|0;fs=Yu+1516|0;ls=Yu+1512|0;ur=Yu+1508|0;yr=Yu+1504|0;hr=Yu+1500|0;kr=Yu+1496|0;gs=Yu+1492|0;hs=Yu+1488|0;Js=Yu+1484|0;Ks=Yu+1480|0;qr=Yu+1476|0;xq=Yu+1472|0;Eq=Yu+1468|0;Jq=Yu+1464|0;Br=Yu+1460|0;Cr=Yu+1456|0;ds=Yu+1452|0;es=Yu+1448|0;Pq=Yu+1444|0;tr=Yu+1440|0;zq=Yu+1436|0;vr=Yu+1432|0;nt=Yu+1428|0;qt=Yu+1424|0;wr=Yu+1420|0;zr=Yu+1416|0;rt=Yu+1412|0;st=Yu+1408|0;cs=Yu+1404|0;js=Yu+1400|0;Hs=Yu+1396|0;kt=Yu+1392|0;ks=Yu+1388|0;ns=Yu+1384|0;lt=Yu+1380|0;mt=Yu+1376|0;Un=Yu+1372|0;Yp=Yu+1368|0;Qo=Yu+1364|0;Ms=Yu+1360|0;Ps=Yu+1356|0;Vs=Yu+1352|0;$p=Yu+1348|0;Us=Yu+1344|0;Sp=Yu+1340|0;mp=Yu+1336|0;Wp=Yu+1332|0;gq=Yu+1328|0;so=Yu+1324|0;lp=Yu+1320|0;Vp=Yu+1316|0;dq=Yu+1312|0;gn=Yu+1308|0;Tn=Yu+1304|0;Zp=Yu+1300|0;_p=Yu+1296|0;Eo=Yu+1292|0;Po=Yu+1288|0;Ns=Yu+1284|0;Os=Yu+1280|0;ip=Yu+1276|0;eq=Yu+1272|0;Rp=Yu+1268|0;fq=Yu+1264|0;hp=Yu+1260|0;Qp=Yu+1256|0;io=Yu+1252|0;bq=Yu+1248|0;ro=Yu+1244|0;cq=Yu+1240|0;ho=Yu+1236|0;qo=Yu+1232|0;Ro=Yu+1228|0;Tp=Yu+1224|0;Ts=Yu+1220|0;Ws=Yu+1216|0;Up=Yu+1212|0;Xp=Yu+1208|0;Xs=Yu+1204|0;Ys=Yu+1200|0;aq=Yu+1196|0;hq=Yu+1192|0;Jt=Yu+1188|0;Qs=Yu+1184|0;iq=Yu+1180|0;np=Yu+1176|0;Rs=Yu+1172|0;Ss=Yu+1168|0;sp=Yu+1164|0;tq=Yu+1160|0;zp=Yu+1156|0;ut=Yu+1152|0;zt=Yu+1148|0;Ft=Yu+1144|0;Wq=Yu+1140|0;Et=Yu+1136|0;nq=Yu+1132|0;fr=Yu+1128|0;rq=Yu+1124|0;br=Yu+1120|0;Hp=Yu+1116|0;er=Yu+1112|0;qq=Yu+1108|0;_q=Yu+1104|0;op=Yu+1100|0;rp=Yu+1096|0;Uq=Yu+1092|0;Vq=Yu+1088|0;vp=Yu+1084|0;yp=Yu+1080|0;vt=Yu+1076|0;yt=Yu+1072|0;Kp=Yu+1068|0;$q=Yu+1064|0;mq=Yu+1060|0;ar=Yu+1056|0;Jp=Yu+1052|0;lq=Yu+1048|0;Dp=Yu+1044|0;Yq=Yu+1040|0;Gp=Yu+1036|0;Zq=Yu+1032|0;Cp=Yu+1028|0;Fp=Yu+1024|0;Ap=Yu+1020|0;oq=Yu+1016|0;Dt=Yu+1012|0;Gt=Yu+1008|0;pq=Yu+1004|0;sq=Yu+1e3|0;Ht=Yu+996|0;It=Yu+992|0;Xq=Yu+988|0;cr=Yu+984|0;tt=Yu+980|0;At=Yu+976|0;dr=Yu+972|0;gr=Yu+968|0;Bt=Yu+964|0;Ct=Yu+960|0;Ef=Yu+956|0;Pj=Yu+952|0;Yt=Yu+948|0;cu=Yu+944|0;Gg=Yu+940|0;bu=Yu+936|0;Ui=Yu+932|0;Vt=Yu+928|0;Hi=Yu+924|0;cj=Yu+920|0;Mj=Yu+916|0;Yi=Yu+912|0;Jj=Yu+908|0;dj=Yu+904|0;Nj=Yu+900|0;$i=Yu+896|0;eg=Yu+892|0;Df=Yu+888|0;Wt=Yu+884|0;Xt=Yu+880|0;ch=Yu+876|0;Qj=Yu+872|0;Fg=Yu+868|0;Rj=Yu+864|0;ug=Yu+860|0;bh=Yu+856|0;th=Yu+852|0;Eg=Yu+848|0;ti=Yu+844|0;Wi=Yu+840|0;Gi=Yu+836|0;Xi=Yu+832|0;Yg=Yu+828|0;si=Yu+824|0;Ci=Yu+820|0;Fi=Yu+816|0;vj=Yu+812|0;Zi=Yu+808|0;Ij=Yu+804|0;_i=Yu+800|0;_h=Yu+796|0;uj=Yu+792|0;Ej=Yu+788|0;Hj=Yu+784|0;Hg=Yu+780|0;Kj=Yu+776|0;au=Yu+772|0;du=Yu+768|0;Lj=Yu+764|0;Oj=Yu+760|0;eu=Yu+756|0;fu=Yu+752|0;Vi=Yu+748|0;aj=Yu+744|0;Fu=Yu+740|0;Zt=Yu+736|0;bj=Yu+732|0;ej=Yu+728|0;_t=Yu+724|0;$t=Yu+720|0;Mk=Yu+716|0;Tl=Yu+712|0;Ut=Yu+708|0;lu=Yu+704|0;Al=Yu+700|0;ku=Yu+696|0;Wl=Yu+692|0;Rt=Yu+688|0;sm=Yu+684|0;em=Yu+680|0;Ql=Yu+676|0;_l=Yu+672|0;Nl=Yu+668|0;fm=Yu+664|0;Rl=Yu+660|0;bm=Yu+656|0;Ek=Yu+652|0;Lk=Yu+648|0;St=Yu+644|0;Tt=Yu+640|0;Tk=Yu+636|0;Ul=Yu+632|0;_k=Yu+628|0;Vl=Yu+624|0;Pk=Yu+620|0;Sk=Yu+616|0;Wk=Yu+612|0;Zk=Yu+608|0;km=Yu+604|0;Yl=Yu+600|0;rm=Yu+596|0;Zl=Yu+592|0;El=Yu+588|0;jm=Yu+584|0;nm=Yu+580|0;qm=Yu+576|0;Dm=Yu+572|0;$l=Yu+568|0;Ml=Yu+564|0;am=Yu+560|0;vm=Yu+556|0;Cm=Yu+552|0;Gm=Yu+548|0;Ll=Yu+544|0;Bl=Yu+540|0;Ol=Yu+536|0;ju=Yu+532|0;mu=Yu+528|0;Pl=Yu+524|0;Sl=Yu+520|0;nu=Yu+516|0;ou=Yu+512|0;Xl=Yu+508|0;cm=Yu+504|0;Qt=Yu+500|0;gu=Yu+496|0;dm=Yu+492|0;gm=Yu+488|0;hu=Yu+484|0;iu=Yu+480|0;Mm=Yu+476|0;Nn=Yu+472|0;ft=Yu+468|0;Mt=Yu+464|0;sn=Yu+460|0;Lt=Yu+456|0;Qn=Yu+452|0;_s=Yu+448|0;An=Yu+444|0;$m=Yu+440|0;Kn=Yu+436|0;Vm=Yu+432|0;Hn=Yu+428|0;an=Yu+424|0;Ln=Yu+420|0;Ym=Yu+416|0;hm=Yu+412|0;Lm=Yu+408|0;$s=Yu+404|0;et=Yu+400|0;Pm=Yu+396|0;On=Yu+392|0;Sm=Yu+388|0;Pn=Yu+384|0;Nm=Yu+380|0;Om=Yu+376|0;Qm=Yu+372|0;Rm=Yu+368|0;wn=Yu+364|0;Tm=Yu+360|0;zn=Yu+356|0;Um=Yu+352|0;un=Yu+348|0;vn=Yu+344|0;xn=Yu+340|0;yn=Yu+336|0;Dn=Yu+332|0;Wm=Yu+328|0;Gn=Yu+324|0;Xm=Yu+320|0;Bn=Yu+316|0;Cn=Yu+312|0;En=Yu+308|0;Fn=Yu+304|0;tn=Yu+300|0;In=Yu+296|0;jt=Yu+292|0;Nt=Yu+288|0;Jn=Yu+284|0;Mn=Yu+280|0;Ot=Yu+276|0;Pt=Yu+272|0;Rn=Yu+268|0;Zm=Yu+264|0;Zs=Yu+260|0;gt=Yu+256|0;_m=Yu+252|0;bn=Yu+248|0;ht=Yu+244|0;it=Yu+240|0;jj=Yu+236|0;il=Yu+232|0;vu=Yu+228|0;Bu=Yu+224|0;qj=Yu+220|0;Au=Yu+216|0;ll=Yu+212|0;qu=Yu+208|0;xk=Yu+204|0;vl=Yu+200|0;fl=Yu+196|0;pl=Yu+192|0;cl=Yu+188|0;wl=Yu+184|0;gl=Yu+180|0;sl=Yu+176|0;fj=Yu+172|0;ij=Yu+168|0;ru=Yu+164|0;uu=Yu+160|0;mj=Yu+156|0;jl=Yu+152|0;pj=Yu+148|0;kl=Yu+144|0;kj=Yu+140|0;lj=Yu+136|0;nj=Yu+132|0;oj=Yu+128|0;tk=Yu+124|0;nl=Yu+120|0;wk=Yu+116|0;ol=Yu+112|0;rk=Yu+108|0;sk=Yu+104|0;uk=Yu+100|0;vk=Yu+96|0;Ak=Yu+92|0;ql=Yu+88|0;bl=Yu+84|0;rl=Yu+80|0;yk=Yu+76|0;zk=Yu+72|0;$k=Yu+68|0;al=Yu+64|0;rj=Yu+60|0;dl=Yu+56|0;zu=Yu+52|0;Cu=Yu+48|0;el=Yu+44|0;hl=Yu+40|0;Du=Yu+36|0;Eu=Yu+32|0;ml=Yu+28|0;tl=Yu+24|0;pu=Yu+20|0;wu=Yu+16|0;ul=Yu+12|0;xl=Yu+8|0;xu=Yu+4|0;yu=Yu;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Zu>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Yu+4716>>2]=.4713967442512512;g[Yu+4712>>2]=.8819212913513184;g[Yu+4708>>2]=.290284663438797;g[Yu+4704>>2]=.9569403529167175;g[Yu+4700>>2]=.6343932747840881;g[Yu+4696>>2]=.7730104327201843;g[Yu+4692>>2]=.0980171412229538;g[Yu+4688>>2]=.9951847195625305;g[Yu+4684>>2]=.5555702447891235;g[Yu+4680>>2]=.8314695954322815;g[Yu+4676>>2]=.9807852506637573;g[Yu+4672>>2]=.19509032368659973;g[Yu+4668>>2]=.9238795042037964;g[Yu+4664>>2]=.3826834261417389;g[Yu+4660>>2]=.7071067690849304;c[Xu>>2]=c[Zu>>2];c[m>>2]=(c[m>>2]|0)+((c[Zu>>2]|0)*126<<2);while(1){if((c[Xu>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[Ur>>2]=+g[c[l>>2]>>2];g[Ib>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<5<<2)>>2];g[_d>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<5<<2)>>2];g[za>>2]=+g[(c[m>>2]|0)+248>>2];g[Rc>>2]=+g[(c[m>>2]|0)+252>>2];g[hf>>2]=+g[za>>2]*+g[Ib>>2]+ +g[Rc>>2]*+g[_d>>2];g[Tr>>2]=+g[za>>2]*+g[_d>>2]-+g[Rc>>2]*+g[Ib>>2];g[Ji>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[zl>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[Ah>>2]=+g[(c[m>>2]|0)+120>>2];g[qk>>2]=+g[(c[m>>2]|0)+124>>2];g[Im>>2]=+g[Ah>>2]*+g[Ji>>2]+ +g[qk>>2]*+g[zl>>2];g[rf>>2]=+g[Ah>>2]*+g[zl>>2]-+g[qk>>2]*+g[Ji>>2];g[ap>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*48<<2)>>2];g[sr>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*48<<2)>>2];g[Sn>>2]=+g[(c[m>>2]|0)+376>>2];g[jq>>2]=+g[(c[m>>2]|0)+380>>2];g[Bs>>2]=+g[Sn>>2]*+g[ap>>2]+ +g[jq>>2]*+g[sr>>2];g[sf>>2]=+g[Sn>>2]*+g[sr>>2]-+g[jq>>2]*+g[ap>>2];g[rg>>2]=+g[q>>2]+ +g[hf>>2];g[Kt>>2]=+g[Im>>2]+ +g[Bs>>2];g[Gu>>2]=+g[rg>>2]+ +g[Kt>>2];g[cn>>2]=+g[rg>>2]-+g[Kt>>2];g[bt>>2]=+g[Ur>>2]-+g[Tr>>2];g[ct>>2]=+g[Im>>2]-+g[Bs>>2];g[dt>>2]=+g[bt>>2]-+g[ct>>2];g[tu>>2]=+g[ct>>2]+ +g[bt>>2];g[qf>>2]=+g[q>>2]-+g[hf>>2];g[Tf>>2]=+g[rf>>2]-+g[sf>>2];g[Uf>>2]=+g[qf>>2]-+g[Tf>>2];g[yl>>2]=+g[qf>>2]+ +g[Tf>>2];g[Sr>>2]=+g[rf>>2]+ +g[sf>>2];g[Vr>>2]=+g[Tr>>2]+ +g[Ur>>2];g[Wr>>2]=+g[Sr>>2]+ +g[Vr>>2];g[xt>>2]=+g[Vr>>2]-+g[Sr>>2];g[Iu>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Ku>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Hu>>2]=+g[(c[m>>2]|0)+56>>2];g[Ju>>2]=+g[(c[m>>2]|0)+60>>2];g[Lu>>2]=+g[Hu>>2]*+g[Iu>>2]+ +g[Ju>>2]*+g[Ku>>2];g[Vf>>2]=+g[Hu>>2]*+g[Ku>>2]-+g[Ju>>2]*+g[Iu>>2];g[Nu>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*40<<2)>>2];g[Pu>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*40<<2)>>2];g[Mu>>2]=+g[(c[m>>2]|0)+312>>2];g[Ou>>2]=+g[(c[m>>2]|0)+316>>2];g[Qu>>2]=+g[Mu>>2]*+g[Nu>>2]+ +g[Ou>>2]*+g[Pu>>2];g[Wf>>2]=+g[Mu>>2]*+g[Pu>>2]-+g[Ou>>2]*+g[Nu>>2];g[Xf>>2]=+g[Vf>>2]-+g[Wf>>2];g[Yf>>2]=+g[Lu>>2]-+g[Qu>>2];g[Tu>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*56<<2)>>2];g[Vu>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*56<<2)>>2];g[Su>>2]=+g[(c[m>>2]|0)+440>>2];g[Uu>>2]=+g[(c[m>>2]|0)+444>>2];g[Wu>>2]=+g[Su>>2]*+g[Tu>>2]+ +g[Uu>>2]*+g[Vu>>2];g[$f>>2]=+g[Su>>2]*+g[Vu>>2]-+g[Uu>>2]*+g[Tu>>2];g[Tj>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[Vj>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[Sj>>2]=+g[(c[m>>2]|0)+184>>2];g[Uj>>2]=+g[(c[m>>2]|0)+188>>2];g[Wj>>2]=+g[Sj>>2]*+g[Tj>>2]+ +g[Uj>>2]*+g[Vj>>2];g[ag>>2]=+g[Sj>>2]*+g[Vj>>2]-+g[Uj>>2]*+g[Tj>>2];g[_f>>2]=+g[Wu>>2]-+g[Wj>>2];g[bg>>2]=+g[$f>>2]-+g[ag>>2];g[Ru>>2]=+g[Lu>>2]+ +g[Qu>>2];g[Xj>>2]=+g[Wu>>2]+ +g[Wj>>2];g[Yj>>2]=+g[Ru>>2]+ +g[Xj>>2];g[wt>>2]=+g[Xj>>2]-+g[Ru>>2];g[dn>>2]=+g[Vf>>2]+ +g[Wf>>2];g[en>>2]=+g[$f>>2]+ +g[ag>>2];g[fn>>2]=+g[dn>>2]-+g[en>>2];g[Rr>>2]=+g[dn>>2]+ +g[en>>2];g[Zf>>2]=+g[Xf>>2]-+g[Yf>>2];g[cg>>2]=+g[_f>>2]+ +g[bg>>2];g[dg>>2]=(+g[Zf>>2]-+g[cg>>2])*.7071067690849304;g[at>>2]=(+g[Zf>>2]+ +g[cg>>2])*.7071067690849304;g[Bk>>2]=+g[Yf>>2]+ +g[Xf>>2];g[Ck>>2]=+g[_f>>2]-+g[bg>>2];g[Dk>>2]=(+g[Bk>>2]+ +g[Ck>>2])*.7071067690849304;g[su>>2]=(+g[Ck>>2]-+g[Bk>>2])*.7071067690849304;g[$j>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[bk>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[_j>>2]=+g[(c[m>>2]|0)+24>>2];g[ak>>2]=+g[(c[m>>2]|0)+28>>2];g[ck>>2]=+g[_j>>2]*+g[$j>>2]+ +g[ak>>2]*+g[bk>>2];g[fg>>2]=+g[_j>>2]*+g[bk>>2]-+g[ak>>2]*+g[$j>>2];g[ek>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*36<<2)>>2];g[gk>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*36<<2)>>2];g[dk>>2]=+g[(c[m>>2]|0)+280>>2];g[fk>>2]=+g[(c[m>>2]|0)+284>>2];g[hk>>2]=+g[dk>>2]*+g[ek>>2]+ +g[fk>>2]*+g[gk>>2];g[gg>>2]=+g[dk>>2]*+g[gk>>2]-+g[fk>>2]*+g[ek>>2];g[ik>>2]=+g[ck>>2]+ +g[hk>>2];g[hn>>2]=+g[fg>>2]+ +g[gg>>2];g[hg>>2]=+g[fg>>2]-+g[gg>>2];g[kg>>2]=+g[ck>>2]-+g[hk>>2];g[kk>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[mk>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[jk>>2]=+g[(c[m>>2]|0)+152>>2];g[lk>>2]=+g[(c[m>>2]|0)+156>>2];g[nk>>2]=+g[jk>>2]*+g[kk>>2]+ +g[lk>>2]*+g[mk>>2];g[lg>>2]=+g[jk>>2]*+g[mk>>2]-+g[lk>>2]*+g[kk>>2];g[pk>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*52<<2)>>2];g[s>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*52<<2)>>2];g[ok>>2]=+g[(c[m>>2]|0)+408>>2];g[r>>2]=+g[(c[m>>2]|0)+412>>2];g[t>>2]=+g[ok>>2]*+g[pk>>2]+ +g[r>>2]*+g[s>>2];g[mg>>2]=+g[ok>>2]*+g[s>>2]-+g[r>>2]*+g[pk>>2];g[u>>2]=+g[nk>>2]+ +g[t>>2];g[jn>>2]=+g[lg>>2]+ +g[mg>>2];g[ig>>2]=+g[nk>>2]-+g[t>>2];g[ng>>2]=+g[lg>>2]-+g[mg>>2];g[v>>2]=+g[ik>>2]+ +g[u>>2];g[ir>>2]=+g[hn>>2]+ +g[jn>>2];g[jg>>2]=+g[hg>>2]+ +g[ig>>2];g[og>>2]=+g[kg>>2]-+g[ng>>2];g[pg>>2]=+g[jg>>2]*.3826834261417389-+g[og>>2]*.9238795042037964;g[gj>>2]=+g[jg>>2]*.9238795042037964+ +g[og>>2]*.3826834261417389;g[Fk>>2]=+g[hg>>2]-+g[ig>>2];g[Gk>>2]=+g[kg>>2]+ +g[ng>>2];g[Hk>>2]=+g[Fk>>2]*.9238795042037964-+g[Gk>>2]*.3826834261417389;g[Jm>>2]=+g[Fk>>2]*.3826834261417389+ +g[Gk>>2]*.9238795042037964;g[kn>>2]=+g[hn>>2]-+g[jn>>2];g[ln>>2]=+g[ik>>2]-+g[u>>2];g[mn>>2]=+g[kn>>2]-+g[ln>>2];g[pp>>2]=+g[ln>>2]+ +g[kn>>2];g[x>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*60<<2)>>2];g[z>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*60<<2)>>2];g[w>>2]=+g[(c[m>>2]|0)+472>>2];g[y>>2]=+g[(c[m>>2]|0)+476>>2];g[A>>2]=+g[w>>2]*+g[x>>2]+ +g[y>>2]*+g[z>>2];g[qg>>2]=+g[w>>2]*+g[z>>2]-+g[y>>2]*+g[x>>2];g[aa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[ca>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[$>>2]=+g[(c[m>>2]|0)+216>>2];g[ba>>2]=+g[(c[m>>2]|0)+220>>2];g[da>>2]=+g[$>>2]*+g[aa>>2]+ +g[ba>>2]*+g[ca>>2];g[tf>>2]=+g[$>>2]*+g[ca>>2]-+g[ba>>2]*+g[aa>>2];g[ea>>2]=+g[A>>2]+ +g[da>>2];g[on>>2]=+g[qg>>2]+ +g[tf>>2];g[uf>>2]=+g[qg>>2]-+g[tf>>2];g[xf>>2]=+g[A>>2]-+g[da>>2];g[ga>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[ia>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[fa>>2]=+g[(c[m>>2]|0)+88>>2];g[ha>>2]=+g[(c[m>>2]|0)+92>>2];g[ja>>2]=+g[fa>>2]*+g[ga>>2]+ +g[ha>>2]*+g[ia>>2];g[yf>>2]=+g[fa>>2]*+g[ia>>2]-+g[ha>>2]*+g[ga>>2];g[la>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*44<<2)>>2];g[na>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*44<<2)>>2];g[ka>>2]=+g[(c[m>>2]|0)+344>>2];g[ma>>2]=+g[(c[m>>2]|0)+348>>2];g[oa>>2]=+g[ka>>2]*+g[la>>2]+ +g[ma>>2]*+g[na>>2];g[zf>>2]=+g[ka>>2]*+g[na>>2]-+g[ma>>2]*+g[la>>2];g[pa>>2]=+g[ja>>2]+ +g[oa>>2];g[pn>>2]=+g[yf>>2]+ +g[zf>>2];g[vf>>2]=+g[ja>>2]-+g[oa>>2];g[Af>>2]=+g[yf>>2]-+g[zf>>2];g[qa>>2]=+g[ea>>2]+ +g[pa>>2];g[jr>>2]=+g[on>>2]+ +g[pn>>2];g[wf>>2]=+g[uf>>2]+ +g[vf>>2];g[Bf>>2]=+g[xf>>2]-+g[Af>>2];g[Cf>>2]=+g[wf>>2]*.3826834261417389+ +g[Bf>>2]*.9238795042037964;g[hj>>2]=+g[Bf>>2]*.3826834261417389-+g[wf>>2]*.9238795042037964;g[Ik>>2]=+g[uf>>2]-+g[vf>>2];g[Jk>>2]=+g[xf>>2]+ +g[Af>>2];g[Kk>>2]=+g[Ik>>2]*.9238795042037964+ +g[Jk>>2]*.3826834261417389;g[Km>>2]=+g[Jk>>2]*.9238795042037964-+g[Ik>>2]*.3826834261417389;g[nn>>2]=+g[ea>>2]-+g[pa>>2];g[qn>>2]=+g[on>>2]-+g[pn>>2];g[rn>>2]=+g[nn>>2]+ +g[qn>>2];g[qp>>2]=+g[nn>>2]-+g[qn>>2];g[ua>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[wa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[ta>>2]=+g[(c[m>>2]|0)+8>>2];g[va>>2]=+g[(c[m>>2]|0)+12>>2];g[xa>>2]=+g[ta>>2]*+g[ua>>2]+ +g[va>>2]*+g[wa>>2];g[Ff>>2]=+g[ta>>2]*+g[wa>>2]-+g[va>>2]*+g[ua>>2];g[B>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*34<<2)>>2];g[D>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*34<<2)>>2];g[ya>>2]=+g[(c[m>>2]|0)+264>>2];g[C>>2]=+g[(c[m>>2]|0)+268>>2];g[E>>2]=+g[ya>>2]*+g[B>>2]+ +g[C>>2]*+g[D>>2];g[Gf>>2]=+g[ya>>2]*+g[D>>2]-+g[C>>2]*+g[B>>2];g[F>>2]=+g[xa>>2]+ +g[E>>2];g[Vn>>2]=+g[Ff>>2]+ +g[Gf>>2];g[H>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[J>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[G>>2]=+g[(c[m>>2]|0)+136>>2];g[I>>2]=+g[(c[m>>2]|0)+140>>2];g[K>>2]=+g[G>>2]*+g[H>>2]+ +g[I>>2]*+g[J>>2];g[wg>>2]=+g[G>>2]*+g[J>>2]-+g[I>>2]*+g[H>>2];g[M>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*50<<2)>>2];g[O>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*50<<2)>>2];g[L>>2]=+g[(c[m>>2]|0)+392>>2];g[N>>2]=+g[(c[m>>2]|0)+396>>2];g[P>>2]=+g[L>>2]*+g[M>>2]+ +g[N>>2]*+g[O>>2];g[xg>>2]=+g[L>>2]*+g[O>>2]-+g[N>>2]*+g[M>>2];g[Q>>2]=+g[K>>2]+ +g[P>>2];g[Wn>>2]=+g[wg>>2]+ +g[xg>>2];g[T>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[V>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[S>>2]=+g[(c[m>>2]|0)+72>>2];g[U>>2]=+g[(c[m>>2]|0)+76>>2];g[W>>2]=+g[S>>2]*+g[T>>2]+ +g[U>>2]*+g[V>>2];g[Qf>>2]=+g[S>>2]*+g[V>>2]-+g[U>>2]*+g[T>>2];g[Y>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*42<<2)>>2];g[_>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*42<<2)>>2];g[X>>2]=+g[(c[m>>2]|0)+328>>2];g[Z>>2]=+g[(c[m>>2]|0)+332>>2];g[Aa>>2]=+g[X>>2]*+g[Y>>2]+ +g[Z>>2]*+g[_>>2];g[Rf>>2]=+g[X>>2]*+g[_>>2]-+g[Z>>2]*+g[Y>>2];g[Ba>>2]=+g[W>>2]+ +g[Aa>>2];g[$n>>2]=+g[Qf>>2]+ +g[Rf>>2];g[Pf>>2]=+g[W>>2]-+g[Aa>>2];g[Sf>>2]=+g[Qf>>2]-+g[Rf>>2];g[Da>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*58<<2)>>2];g[Fa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*58<<2)>>2];g[Ca>>2]=+g[(c[m>>2]|0)+456>>2];g[Ea>>2]=+g[(c[m>>2]|0)+460>>2];g[Ga>>2]=+g[Ca>>2]*+g[Da>>2]+ +g[Ea>>2]*+g[Fa>>2];g[Lf>>2]=+g[Ca>>2]*+g[Fa>>2]-+g[Ea>>2]*+g[Da>>2];g[Ia>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[ib>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[Ha>>2]=+g[(c[m>>2]|0)+200>>2];g[Ja>>2]=+g[(c[m>>2]|0)+204>>2];g[jb>>2]=+g[Ha>>2]*+g[Ia>>2]+ +g[Ja>>2]*+g[ib>>2];g[Mf>>2]=+g[Ha>>2]*+g[ib>>2]-+g[Ja>>2]*+g[Ia>>2];g[kb>>2]=+g[Ga>>2]+ +g[jb>>2];g[ao>>2]=+g[Lf>>2]+ +g[Mf>>2];g[Kf>>2]=+g[Ga>>2]-+g[jb>>2];g[Nf>>2]=+g[Lf>>2]-+g[Mf>>2];g[R>>2]=+g[F>>2]+ +g[Q>>2];g[lb>>2]=+g[Ba>>2]+ +g[kb>>2];g[pr>>2]=+g[R>>2]-+g[lb>>2];g[mr>>2]=+g[Vn>>2]+ +g[Wn>>2];g[nr>>2]=+g[$n>>2]+ +g[ao>>2];g[or>>2]=+g[mr>>2]-+g[nr>>2];g[Hf>>2]=+g[Ff>>2]-+g[Gf>>2];g[If>>2]=+g[K>>2]-+g[P>>2];g[Jf>>2]=+g[Hf>>2]+ +g[If>>2];g[Nk>>2]=+g[Hf>>2]-+g[If>>2];g[_n>>2]=+g[F>>2]-+g[Q>>2];g[Co>>2]=+g[$n>>2]-+g[ao>>2];g[Do>>2]=+g[_n>>2]-+g[Co>>2];g[up>>2]=+g[_n>>2]+ +g[Co>>2];g[Of>>2]=+g[Kf>>2]-+g[Nf>>2];g[sg>>2]=+g[Pf>>2]+ +g[Sf>>2];g[tg>>2]=(+g[Of>>2]-+g[sg>>2])*.7071067690849304;g[Rk>>2]=(+g[sg>>2]+ +g[Of>>2])*.7071067690849304;g[Ag>>2]=+g[Sf>>2]-+g[Pf>>2];g[Bg>>2]=+g[Kf>>2]+ +g[Nf>>2];g[ah>>2]=(+g[Ag>>2]-+g[Bg>>2])*.7071067690849304;g[Ok>>2]=(+g[Ag>>2]+ +g[Bg>>2])*.7071067690849304;g[Xn>>2]=+g[Vn>>2]-+g[Wn>>2];g[Yn>>2]=+g[kb>>2]-+g[Ba>>2];g[Zn>>2]=+g[Xn>>2]-+g[Yn>>2];g[tp>>2]=+g[Xn>>2]+ +g[Yn>>2];g[vg>>2]=+g[xa>>2]-+g[E>>2];g[yg>>2]=+g[wg>>2]-+g[xg>>2];g[zg>>2]=+g[vg>>2]-+g[yg>>2];g[Qk>>2]=+g[vg>>2]+ +g[yg>>2];g[ob>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*62<<2)>>2];g[qb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*62<<2)>>2];g[nb>>2]=+g[(c[m>>2]|0)+488>>2];g[pb>>2]=+g[(c[m>>2]|0)+492>>2];g[rb>>2]=+g[nb>>2]*+g[ob>>2]+ +g[pb>>2]*+g[qb>>2];g[dh>>2]=+g[nb>>2]*+g[qb>>2]-+g[pb>>2]*+g[ob>>2];g[tb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[vb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[sb>>2]=+g[(c[m>>2]|0)+232>>2];g[ub>>2]=+g[(c[m>>2]|0)+236>>2];g[wb>>2]=+g[sb>>2]*+g[tb>>2]+ +g[ub>>2]*+g[vb>>2];g[eh>>2]=+g[sb>>2]*+g[vb>>2]-+g[ub>>2]*+g[tb>>2];g[xb>>2]=+g[rb>>2]+ +g[wb>>2];g[Fo>>2]=+g[dh>>2]+ +g[eh>>2];g[zb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[Bb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[yb>>2]=+g[(c[m>>2]|0)+104>>2];g[Ab>>2]=+g[(c[m>>2]|0)+108>>2];g[Cb>>2]=+g[yb>>2]*+g[zb>>2]+ +g[Ab>>2]*+g[Bb>>2];g[vh>>2]=+g[yb>>2]*+g[Bb>>2]-+g[Ab>>2]*+g[zb>>2];g[Eb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*46<<2)>>2];g[Gb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*46<<2)>>2];g[Db>>2]=+g[(c[m>>2]|0)+360>>2];g[Fb>>2]=+g[(c[m>>2]|0)+364>>2];g[Hb>>2]=+g[Db>>2]*+g[Eb>>2]+ +g[Fb>>2]*+g[Gb>>2];g[wh>>2]=+g[Db>>2]*+g[Gb>>2]-+g[Fb>>2]*+g[Eb>>2];g[Ka>>2]=+g[Cb>>2]+ +g[Hb>>2];g[Go>>2]=+g[vh>>2]+ +g[wh>>2];g[Na>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Pa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Ma>>2]=+g[(c[m>>2]|0)+40>>2];g[Oa>>2]=+g[(c[m>>2]|0)+44>>2];g[Qa>>2]=+g[Ma>>2]*+g[Na>>2]+ +g[Oa>>2]*+g[Pa>>2];g[oh>>2]=+g[Ma>>2]*+g[Pa>>2]-+g[Oa>>2]*+g[Na>>2];g[Sa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*38<<2)>>2];g[Ua>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*38<<2)>>2];g[Ra>>2]=+g[(c[m>>2]|0)+296>>2];g[Ta>>2]=+g[(c[m>>2]|0)+300>>2];g[Va>>2]=+g[Ra>>2]*+g[Sa>>2]+ +g[Ta>>2]*+g[Ua>>2];g[ph>>2]=+g[Ra>>2]*+g[Ua>>2]-+g[Ta>>2]*+g[Sa>>2];g[Wa>>2]=+g[Qa>>2]+ +g[Va>>2];g[Lo>>2]=+g[oh>>2]+ +g[ph>>2];g[nh>>2]=+g[Qa>>2]-+g[Va>>2];g[qh>>2]=+g[oh>>2]-+g[ph>>2];g[Ya>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*54<<2)>>2];g[_a>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*54<<2)>>2];g[Xa>>2]=+g[(c[m>>2]|0)+424>>2];g[Za>>2]=+g[(c[m>>2]|0)+428>>2];g[$a>>2]=+g[Xa>>2]*+g[Ya>>2]+ +g[Za>>2]*+g[_a>>2];g[jh>>2]=+g[Xa>>2]*+g[_a>>2]-+g[Za>>2]*+g[Ya>>2];g[bb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[db>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[ab>>2]=+g[(c[m>>2]|0)+168>>2];g[cb>>2]=+g[(c[m>>2]|0)+172>>2];g[eb>>2]=+g[ab>>2]*+g[bb>>2]+ +g[cb>>2]*+g[db>>2];g[kh>>2]=+g[ab>>2]*+g[db>>2]-+g[cb>>2]*+g[bb>>2];g[fb>>2]=+g[$a>>2]+ +g[eb>>2];g[Mo>>2]=+g[jh>>2]+ +g[kh>>2];g[ih>>2]=+g[$a>>2]-+g[eb>>2];g[lh>>2]=+g[jh>>2]-+g[kh>>2];g[La>>2]=+g[xb>>2]+ +g[Ka>>2];g[gb>>2]=+g[Wa>>2]+ +g[fb>>2];g[rr>>2]=+g[La>>2]-+g[gb>>2];g[uq>>2]=+g[Fo>>2]+ +g[Go>>2];g[vq>>2]=+g[Lo>>2]+ +g[Mo>>2];g[wq>>2]=+g[uq>>2]-+g[vq>>2];g[fh>>2]=+g[dh>>2]-+g[eh>>2];g[gh>>2]=+g[Cb>>2]-+g[Hb>>2];g[hh>>2]=+g[fh>>2]+ +g[gh>>2];g[Xk>>2]=+g[fh>>2]-+g[gh>>2];g[Ko>>2]=+g[xb>>2]-+g[Ka>>2];g[No>>2]=+g[Lo>>2]-+g[Mo>>2];g[Oo>>2]=+g[Ko>>2]-+g[No>>2];g[xp>>2]=+g[Ko>>2]+ +g[No>>2];g[mh>>2]=+g[ih>>2]-+g[lh>>2];g[rh>>2]=+g[nh>>2]+ +g[qh>>2];g[sh>>2]=(+g[mh>>2]-+g[rh>>2])*.7071067690849304;g[Vk>>2]=(+g[rh>>2]+ +g[mh>>2])*.7071067690849304;g[zh>>2]=+g[qh>>2]-+g[nh>>2];g[Cg>>2]=+g[ih>>2]+ +g[lh>>2];g[Dg>>2]=(+g[zh>>2]-+g[Cg>>2])*.7071067690849304;g[Yk>>2]=(+g[zh>>2]+ +g[Cg>>2])*.7071067690849304;g[Ho>>2]=+g[Fo>>2]-+g[Go>>2];g[Io>>2]=+g[fb>>2]-+g[Wa>>2];g[Jo>>2]=+g[Ho>>2]-+g[Io>>2];g[wp>>2]=+g[Ho>>2]+ +g[Io>>2];g[uh>>2]=+g[rb>>2]-+g[wb>>2];g[xh>>2]=+g[vh>>2]-+g[wh>>2];g[yh>>2]=+g[uh>>2]-+g[xh>>2];g[Uk>>2]=+g[uh>>2]+ +g[xh>>2];g[ad>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*63<<2)>>2];g[cd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*63<<2)>>2];g[Zd>>2]=+g[(c[m>>2]|0)+496>>2];g[bd>>2]=+g[(c[m>>2]|0)+500>>2];g[dd>>2]=+g[Zd>>2]*+g[ad>>2]+ +g[bd>>2]*+g[cd>>2];g[wj>>2]=+g[Zd>>2]*+g[cd>>2]-+g[bd>>2]*+g[ad>>2];g[fd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[hd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[ed>>2]=+g[(c[m>>2]|0)+240>>2];g[gd>>2]=+g[(c[m>>2]|0)+244>>2];g[id>>2]=+g[ed>>2]*+g[fd>>2]+ +g[gd>>2]*+g[hd>>2];g[xj>>2]=+g[ed>>2]*+g[hd>>2]-+g[gd>>2]*+g[fd>>2];g[jd>>2]=+g[dd>>2]+ +g[id>>2];g[jp>>2]=+g[wj>>2]+ +g[xj>>2];g[ld>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[nd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[kd>>2]=+g[(c[m>>2]|0)+112>>2];g[md>>2]=+g[(c[m>>2]|0)+116>>2];g[od>>2]=+g[kd>>2]*+g[ld>>2]+ +g[md>>2]*+g[nd>>2];g[Lh>>2]=+g[kd>>2]*+g[nd>>2]-+g[md>>2]*+g[ld>>2];g[qd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*47<<2)>>2];g[sd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*47<<2)>>2];g[pd>>2]=+g[(c[m>>2]|0)+368>>2];g[rd>>2]=+g[(c[m>>2]|0)+372>>2];g[td>>2]=+g[pd>>2]*+g[qd>>2]+ +g[rd>>2]*+g[sd>>2];g[Mh>>2]=+g[pd>>2]*+g[sd>>2]-+g[rd>>2]*+g[qd>>2];g[ud>>2]=+g[od>>2]+ +g[td>>2];g[kp>>2]=+g[Lh>>2]+ +g[Mh>>2];g[xd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[zd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[wd>>2]=+g[(c[m>>2]|0)+48>>2];g[yd>>2]=+g[(c[m>>2]|0)+52>>2];g[$d>>2]=+g[wd>>2]*+g[xd>>2]+ +g[yd>>2]*+g[zd>>2];g[Ph>>2]=+g[wd>>2]*+g[zd>>2]-+g[yd>>2]*+g[xd>>2];g[be>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*39<<2)>>2];g[de>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*39<<2)>>2];g[ae>>2]=+g[(c[m>>2]|0)+304>>2];g[ce>>2]=+g[(c[m>>2]|0)+308>>2];g[ee>>2]=+g[ae>>2]*+g[be>>2]+ +g[ce>>2]*+g[de>>2];g[Qh>>2]=+g[ae>>2]*+g[de>>2]-+g[ce>>2]*+g[be>>2];g[fe>>2]=+g[$d>>2]+ +g[ee>>2];g[uo>>2]=+g[Ph>>2]+ +g[Qh>>2];g[Rh>>2]=+g[Ph>>2]-+g[Qh>>2];g[Sh>>2]=+g[$d>>2]-+g[ee>>2];g[he>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*55<<2)>>2];g[Je>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*55<<2)>>2];g[ge>>2]=+g[(c[m>>2]|0)+432>>2];g[ie>>2]=+g[(c[m>>2]|0)+436>>2];g[Ke>>2]=+g[ge>>2]*+g[he>>2]+ +g[ie>>2]*+g[Je>>2];g[Vh>>2]=+g[ge>>2]*+g[Je>>2]-+g[ie>>2]*+g[he>>2];g[Me>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[Oe>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[Le>>2]=+g[(c[m>>2]|0)+176>>2];g[Ne>>2]=+g[(c[m>>2]|0)+180>>2];g[Pe>>2]=+g[Le>>2]*+g[Me>>2]+ +g[Ne>>2]*+g[Oe>>2];g[Wh>>2]=+g[Le>>2]*+g[Oe>>2]-+g[Ne>>2]*+g[Me>>2];g[Qe>>2]=+g[Ke>>2]+ +g[Pe>>2];g[vo>>2]=+g[Vh>>2]+ +g[Wh>>2];g[Uh>>2]=+g[Ke>>2]-+g[Pe>>2];g[Xh>>2]=+g[Vh>>2]-+g[Wh>>2];g[vd>>2]=+g[jd>>2]+ +g[ud>>2];g[Re>>2]=+g[fe>>2]+ +g[Qe>>2];g[Se>>2]=+g[vd>>2]+ +g[Re>>2];g[Lq>>2]=+g[vd>>2]-+g[Re>>2];g[Lp>>2]=+g[jp>>2]-+g[kp>>2];g[Mp>>2]=+g[Qe>>2]-+g[fe>>2];g[Np>>2]=+g[Lp>>2]-+g[Mp>>2];g[kq>>2]=+g[Lp>>2]+ +g[Mp>>2];g[Qq>>2]=+g[jp>>2]+ +g[kp>>2];g[Rq>>2]=+g[uo>>2]+ +g[vo>>2];g[Sq>>2]=+g[Qq>>2]-+g[Rq>>2];g[zs>>2]=+g[Qq>>2]+ +g[Rq>>2];g[Ii>>2]=+g[dd>>2]-+g[id>>2];g[Nh>>2]=+g[Lh>>2]-+g[Mh>>2];g[Oh>>2]=+g[Ii>>2]-+g[Nh>>2];g[tm>>2]=+g[Ii>>2]+ +g[Nh>>2];g[Th>>2]=+g[Rh>>2]-+g[Sh>>2];g[Yh>>2]=+g[Uh>>2]+ +g[Xh>>2];g[Zh>>2]=(+g[Th>>2]-+g[Yh>>2])*.7071067690849304;g[Fm>>2]=(+g[Th>>2]+ +g[Yh>>2])*.7071067690849304;g[Bj>>2]=+g[Uh>>2]-+g[Xh>>2];g[Cj>>2]=+g[Sh>>2]+ +g[Rh>>2];g[Dj>>2]=(+g[Bj>>2]-+g[Cj>>2])*.7071067690849304;g[um>>2]=(+g[Cj>>2]+ +g[Bj>>2])*.7071067690849304;g[to>>2]=+g[jd>>2]-+g[ud>>2];g[wo>>2]=+g[uo>>2]-+g[vo>>2];g[xo>>2]=+g[to>>2]-+g[wo>>2];g[Ip>>2]=+g[to>>2]+ +g[wo>>2];g[yj>>2]=+g[wj>>2]-+g[xj>>2];g[zj>>2]=+g[od>>2]-+g[td>>2];g[Aj>>2]=+g[yj>>2]+ +g[zj>>2];g[Em>>2]=+g[yj>>2]-+g[zj>>2];g[Mb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[Ob>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Lb>>2]=+g[c[m>>2]>>2];g[Nb>>2]=+g[(c[m>>2]|0)+4>>2];g[Pb>>2]=+g[Lb>>2]*+g[Mb>>2]+ +g[Nb>>2]*+g[Ob>>2];g[Ig>>2]=+g[Lb>>2]*+g[Ob>>2]-+g[Nb>>2]*+g[Mb>>2];g[Rb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*33<<2)>>2];g[rc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*33<<2)>>2];g[Qb>>2]=+g[(c[m>>2]|0)+256>>2];g[Sb>>2]=+g[(c[m>>2]|0)+260>>2];g[sc>>2]=+g[Qb>>2]*+g[Rb>>2]+ +g[Sb>>2]*+g[rc>>2];g[Jg>>2]=+g[Qb>>2]*+g[rc>>2]-+g[Sb>>2]*+g[Rb>>2];g[tc>>2]=+g[Pb>>2]+ +g[sc>>2];g[So>>2]=+g[Ig>>2]+ +g[Jg>>2];g[vc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[xc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[uc>>2]=+g[(c[m>>2]|0)+128>>2];g[wc>>2]=+g[(c[m>>2]|0)+132>>2];g[yc>>2]=+g[uc>>2]*+g[vc>>2]+ +g[wc>>2]*+g[xc>>2];g[vi>>2]=+g[uc>>2]*+g[xc>>2]-+g[wc>>2]*+g[vc>>2];g[Ac>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*49<<2)>>2];g[Cc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*49<<2)>>2];g[zc>>2]=+g[(c[m>>2]|0)+384>>2];g[Bc>>2]=+g[(c[m>>2]|0)+388>>2];g[Dc>>2]=+g[zc>>2]*+g[Ac>>2]+ +g[Bc>>2]*+g[Cc>>2];g[wi>>2]=+g[zc>>2]*+g[Cc>>2]-+g[Bc>>2]*+g[Ac>>2];g[Ec>>2]=+g[yc>>2]+ +g[Dc>>2];g[To>>2]=+g[vi>>2]+ +g[wi>>2];g[Hc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Jc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Gc>>2]=+g[(c[m>>2]|0)+64>>2];g[Ic>>2]=+g[(c[m>>2]|0)+68>>2];g[Kc>>2]=+g[Gc>>2]*+g[Hc>>2]+ +g[Ic>>2]*+g[Jc>>2];g[Tg>>2]=+g[Gc>>2]*+g[Jc>>2]-+g[Ic>>2]*+g[Hc>>2];g[Mc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*41<<2)>>2];g[Oc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*41<<2)>>2];g[Lc>>2]=+g[(c[m>>2]|0)+320>>2];g[Nc>>2]=+g[(c[m>>2]|0)+324>>2];g[Pc>>2]=+g[Lc>>2]*+g[Mc>>2]+ +g[Nc>>2]*+g[Oc>>2];g[Ug>>2]=+g[Lc>>2]*+g[Oc>>2]-+g[Nc>>2]*+g[Mc>>2];g[Qc>>2]=+g[Kc>>2]+ +g[Pc>>2];g[ko>>2]=+g[Tg>>2]+ +g[Ug>>2];g[Sg>>2]=+g[Kc>>2]-+g[Pc>>2];g[Vg>>2]=+g[Tg>>2]-+g[Ug>>2];g[Ub>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*57<<2)>>2];g[Wb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*57<<2)>>2];g[Tb>>2]=+g[(c[m>>2]|0)+448>>2];g[Vb>>2]=+g[(c[m>>2]|0)+452>>2];g[Xb>>2]=+g[Tb>>2]*+g[Ub>>2]+ +g[Vb>>2]*+g[Wb>>2];g[Og>>2]=+g[Tb>>2]*+g[Wb>>2]-+g[Vb>>2]*+g[Ub>>2];g[Zb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[$b>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[Yb>>2]=+g[(c[m>>2]|0)+192>>2];g[_b>>2]=+g[(c[m>>2]|0)+196>>2];g[ac>>2]=+g[Yb>>2]*+g[Zb>>2]+ +g[_b>>2]*+g[$b>>2];g[Pg>>2]=+g[Yb>>2]*+g[$b>>2]-+g[_b>>2]*+g[Zb>>2];g[bc>>2]=+g[Xb>>2]+ +g[ac>>2];g[lo>>2]=+g[Og>>2]+ +g[Pg>>2];g[Ng>>2]=+g[Xb>>2]-+g[ac>>2];g[Qg>>2]=+g[Og>>2]-+g[Pg>>2];g[Fc>>2]=+g[tc>>2]+ +g[Ec>>2];g[cc>>2]=+g[Qc>>2]+ +g[bc>>2];g[dc>>2]=+g[Fc>>2]+ +g[cc>>2];g[Fq>>2]=+g[Fc>>2]-+g[cc>>2];g[jo>>2]=+g[tc>>2]-+g[Ec>>2];g[mo>>2]=+g[ko>>2]-+g[lo>>2];g[no>>2]=+g[jo>>2]-+g[mo>>2];g[Bp>>2]=+g[jo>>2]+ +g[mo>>2];g[Aq>>2]=+g[So>>2]+ +g[To>>2];g[Bq>>2]=+g[ko>>2]+ +g[lo>>2];g[Cq>>2]=+g[Aq>>2]-+g[Bq>>2];g[us>>2]=+g[Aq>>2]+ +g[Bq>>2];g[Kg>>2]=+g[Ig>>2]-+g[Jg>>2];g[Lg>>2]=+g[yc>>2]-+g[Dc>>2];g[Mg>>2]=+g[Kg>>2]+ +g[Lg>>2];g[Cl>>2]=+g[Kg>>2]-+g[Lg>>2];g[Rg>>2]=+g[Ng>>2]-+g[Qg>>2];g[Wg>>2]=+g[Sg>>2]+ +g[Vg>>2];g[Xg>>2]=(+g[Rg>>2]-+g[Wg>>2])*.7071067690849304;g[mm>>2]=(+g[Wg>>2]+ +g[Rg>>2])*.7071067690849304;g[zi>>2]=+g[Vg>>2]-+g[Sg>>2];g[Ai>>2]=+g[Ng>>2]+ +g[Qg>>2];g[Bi>>2]=(+g[zi>>2]-+g[Ai>>2])*.7071067690849304;g[Dl>>2]=(+g[zi>>2]+ +g[Ai>>2])*.7071067690849304;g[Uo>>2]=+g[So>>2]-+g[To>>2];g[Vo>>2]=+g[bc>>2]-+g[Qc>>2];g[Wo>>2]=+g[Uo>>2]-+g[Vo>>2];g[Ep>>2]=+g[Uo>>2]+ +g[Vo>>2];g[ui>>2]=+g[Pb>>2]-+g[sc>>2];g[xi>>2]=+g[vi>>2]-+g[wi>>2];g[yi>>2]=+g[ui>>2]-+g[xi>>2];g[lm>>2]=+g[ui>>2]+ +g[xi>>2];g[fc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[hc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[ec>>2]=+g[(c[m>>2]|0)+32>>2];g[gc>>2]=+g[(c[m>>2]|0)+36>>2];g[ic>>2]=+g[ec>>2]*+g[fc>>2]+ +g[gc>>2]*+g[hc>>2];g[Jh>>2]=+g[ec>>2]*+g[hc>>2]-+g[gc>>2]*+g[fc>>2];g[kc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*37<<2)>>2];g[mc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*37<<2)>>2];g[jc>>2]=+g[(c[m>>2]|0)+288>>2];g[lc>>2]=+g[(c[m>>2]|0)+292>>2];g[nc>>2]=+g[jc>>2]*+g[kc>>2]+ +g[lc>>2]*+g[mc>>2];g[Kh>>2]=+g[jc>>2]*+g[mc>>2]-+g[lc>>2]*+g[kc>>2];g[oc>>2]=+g[ic>>2]+ +g[nc>>2];g[co>>2]=+g[Jh>>2]+ +g[Kh>>2];g[ji>>2]=+g[Jh>>2]-+g[Kh>>2];g[mi>>2]=+g[ic>>2]-+g[nc>>2];g[Md>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Od>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Ld>>2]=+g[(c[m>>2]|0)+96>>2];g[Nd>>2]=+g[(c[m>>2]|0)+100>>2];g[Pd>>2]=+g[Ld>>2]*+g[Md>>2]+ +g[Nd>>2]*+g[Od>>2];g[_g>>2]=+g[Ld>>2]*+g[Od>>2]-+g[Nd>>2]*+g[Md>>2];g[Rd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*45<<2)>>2];g[Td>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*45<<2)>>2];g[Qd>>2]=+g[(c[m>>2]|0)+352>>2];g[Sd>>2]=+g[(c[m>>2]|0)+356>>2];g[Ud>>2]=+g[Qd>>2]*+g[Rd>>2]+ +g[Sd>>2]*+g[Td>>2];g[$g>>2]=+g[Qd>>2]*+g[Td>>2]-+g[Sd>>2]*+g[Rd>>2];g[Vd>>2]=+g[Pd>>2]+ +g[Ud>>2];g[Zo>>2]=+g[_g>>2]+ +g[$g>>2];g[Bh>>2]=+g[_g>>2]-+g[$g>>2];g[Gh>>2]=+g[Pd>>2]-+g[Ud>>2];g[qc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[Tc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[pc>>2]=+g[(c[m>>2]|0)+160>>2];g[Sc>>2]=+g[(c[m>>2]|0)+164>>2];g[Uc>>2]=+g[pc>>2]*+g[qc>>2]+ +g[Sc>>2]*+g[Tc>>2];g[ni>>2]=+g[pc>>2]*+g[Tc>>2]-+g[Sc>>2]*+g[qc>>2];g[Wc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*53<<2)>>2];g[Yc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*53<<2)>>2];g[Vc>>2]=+g[(c[m>>2]|0)+416>>2];g[Xc>>2]=+g[(c[m>>2]|0)+420>>2];g[Zc>>2]=+g[Vc>>2]*+g[Wc>>2]+ +g[Xc>>2]*+g[Yc>>2];g[oi>>2]=+g[Vc>>2]*+g[Yc>>2]-+g[Xc>>2]*+g[Wc>>2];g[_c>>2]=+g[Uc>>2]+ +g[Zc>>2];g[eo>>2]=+g[ni>>2]+ +g[oi>>2];g[ki>>2]=+g[Uc>>2]-+g[Zc>>2];g[pi>>2]=+g[ni>>2]-+g[oi>>2];g[Bd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*61<<2)>>2];g[Dd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*61<<2)>>2];g[Ad>>2]=+g[(c[m>>2]|0)+480>>2];g[Cd>>2]=+g[(c[m>>2]|0)+484>>2];g[Ed>>2]=+g[Ad>>2]*+g[Bd>>2]+ +g[Cd>>2]*+g[Dd>>2];g[Dh>>2]=+g[Ad>>2]*+g[Dd>>2]-+g[Cd>>2]*+g[Bd>>2];g[Gd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[Id>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[Fd>>2]=+g[(c[m>>2]|0)+224>>2];g[Hd>>2]=+g[(c[m>>2]|0)+228>>2];g[Jd>>2]=+g[Fd>>2]*+g[Gd>>2]+ +g[Hd>>2]*+g[Id>>2];g[Eh>>2]=+g[Fd>>2]*+g[Id>>2]-+g[Hd>>2]*+g[Gd>>2];g[Kd>>2]=+g[Ed>>2]+ +g[Jd>>2];g[Yo>>2]=+g[Dh>>2]+ +g[Eh>>2];g[Zg>>2]=+g[Ed>>2]-+g[Jd>>2];g[Fh>>2]=+g[Dh>>2]-+g[Eh>>2];g[$c>>2]=+g[oc>>2]+ +g[_c>>2];g[Wd>>2]=+g[Kd>>2]+ +g[Vd>>2];g[Xd>>2]=+g[$c>>2]+ +g[Wd>>2];g[Dq>>2]=+g[Wd>>2]-+g[$c>>2];g[bo>>2]=+g[oc>>2]-+g[_c>>2];g[fo>>2]=+g[co>>2]-+g[eo>>2];g[go>>2]=+g[bo>>2]+ +g[fo>>2];g[oo>>2]=+g[fo>>2]-+g[bo>>2];g[Gq>>2]=+g[co>>2]+ +g[eo>>2];g[Hq>>2]=+g[Yo>>2]+ +g[Zo>>2];g[Iq>>2]=+g[Gq>>2]-+g[Hq>>2];g[vs>>2]=+g[Gq>>2]+ +g[Hq>>2];g[Ch>>2]=+g[Zg>>2]-+g[Bh>>2];g[Hh>>2]=+g[Fh>>2]+ +g[Gh>>2];g[Ih>>2]=+g[Ch>>2]*.3826834261417389-+g[Hh>>2]*.9238795042037964;g[Ei>>2]=+g[Hh>>2]*.3826834261417389+ +g[Ch>>2]*.9238795042037964;g[li>>2]=+g[ji>>2]+ +g[ki>>2];g[qi>>2]=+g[mi>>2]-+g[pi>>2];g[ri>>2]=+g[li>>2]*.9238795042037964+ +g[qi>>2]*.3826834261417389;g[Di>>2]=+g[li>>2]*.3826834261417389-+g[qi>>2]*.9238795042037964;g[Il>>2]=+g[ji>>2]-+g[ki>>2];g[Jl>>2]=+g[mi>>2]+ +g[pi>>2];g[im>>2]=+g[Il>>2]*.3826834261417389+ +g[Jl>>2]*.9238795042037964;g[om>>2]=+g[Il>>2]*.9238795042037964-+g[Jl>>2]*.3826834261417389;g[Xo>>2]=+g[Kd>>2]-+g[Vd>>2];g[_o>>2]=+g[Yo>>2]-+g[Zo>>2];g[$o>>2]=+g[Xo>>2]-+g[_o>>2];g[po>>2]=+g[Xo>>2]+ +g[_o>>2];g[Fl>>2]=+g[Zg>>2]+ +g[Bh>>2];g[Gl>>2]=+g[Fh>>2]-+g[Gh>>2];g[Hl>>2]=+g[Fl>>2]*.9238795042037964-+g[Gl>>2]*.3826834261417389;g[pm>>2]=+g[Gl>>2]*.9238795042037964+ +g[Fl>>2]*.3826834261417389;g[Ue>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[We>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Te>>2]=+g[(c[m>>2]|0)+16>>2];g[Ve>>2]=+g[(c[m>>2]|0)+20>>2];g[Xe>>2]=+g[Te>>2]*+g[Ue>>2]+ +g[Ve>>2]*+g[We>>2];g[$h>>2]=+g[Te>>2]*+g[We>>2]-+g[Ve>>2]*+g[Ue>>2];g[Ze>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*35<<2)>>2];g[$e>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*35<<2)>>2];g[Ye>>2]=+g[(c[m>>2]|0)+272>>2];g[_e>>2]=+g[(c[m>>2]|0)+276>>2];g[af>>2]=+g[Ye>>2]*+g[Ze>>2]+ +g[_e>>2]*+g[$e>>2];g[ai>>2]=+g[Ye>>2]*+g[$e>>2]-+g[_e>>2]*+g[Ze>>2];g[bf>>2]=+g[Xe>>2]+ +g[af>>2];g[yo>>2]=+g[$h>>2]+ +g[ai>>2];g[bi>>2]=+g[$h>>2]-+g[ai>>2];g[ei>>2]=+g[Xe>>2]-+g[af>>2];g[Ce>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Ee>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Be>>2]=+g[(c[m>>2]|0)+80>>2];g[De>>2]=+g[(c[m>>2]|0)+84>>2];g[Fe>>2]=+g[Be>>2]*+g[Ce>>2]+ +g[De>>2]*+g[Ee>>2];g[Ri>>2]=+g[Be>>2]*+g[Ee>>2]-+g[De>>2]*+g[Ce>>2];g[He>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*43<<2)>>2];g[jf>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*43<<2)>>2];g[Ge>>2]=+g[(c[m>>2]|0)+336>>2];g[Ie>>2]=+g[(c[m>>2]|0)+340>>2];g[kf>>2]=+g[Ge>>2]*+g[He>>2]+ +g[Ie>>2]*+g[jf>>2];g[Si>>2]=+g[Ge>>2]*+g[jf>>2]-+g[Ie>>2]*+g[He>>2];g[lf>>2]=+g[Fe>>2]+ +g[kf>>2];g[ep>>2]=+g[Ri>>2]+ +g[Si>>2];g[Oi>>2]=+g[Fe>>2]-+g[kf>>2];g[Ti>>2]=+g[Ri>>2]-+g[Si>>2];g[df>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[ff>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[cf>>2]=+g[(c[m>>2]|0)+144>>2];g[ef>>2]=+g[(c[m>>2]|0)+148>>2];g[gf>>2]=+g[cf>>2]*+g[df>>2]+ +g[ef>>2]*+g[ff>>2];g[fi>>2]=+g[cf>>2]*+g[ff>>2]-+g[ef>>2]*+g[df>>2];g[ke>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*51<<2)>>2];g[me>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*51<<2)>>2];g[je>>2]=+g[(c[m>>2]|0)+400>>2];g[le>>2]=+g[(c[m>>2]|0)+404>>2];g[ne>>2]=+g[je>>2]*+g[ke>>2]+ +g[le>>2]*+g[me>>2];g[gi>>2]=+g[je>>2]*+g[me>>2]-+g[le>>2]*+g[ke>>2];g[oe>>2]=+g[gf>>2]+ +g[ne>>2];g[zo>>2]=+g[fi>>2]+ +g[gi>>2];g[ci>>2]=+g[gf>>2]-+g[ne>>2];g[hi>>2]=+g[fi>>2]-+g[gi>>2];g[re>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*59<<2)>>2];g[te>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*59<<2)>>2];g[qe>>2]=+g[(c[m>>2]|0)+464>>2];g[se>>2]=+g[(c[m>>2]|0)+468>>2];g[ue>>2]=+g[qe>>2]*+g[re>>2]+ +g[se>>2]*+g[te>>2];g[Li>>2]=+g[qe>>2]*+g[te>>2]-+g[se>>2]*+g[re>>2];g[we>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[ye>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[ve>>2]=+g[(c[m>>2]|0)+208>>2];g[xe>>2]=+g[(c[m>>2]|0)+212>>2];g[ze>>2]=+g[ve>>2]*+g[we>>2]+ +g[xe>>2]*+g[ye>>2];g[Mi>>2]=+g[ve>>2]*+g[ye>>2]-+g[xe>>2]*+g[we>>2];g[Ae>>2]=+g[ue>>2]+ +g[ze>>2];g[dp>>2]=+g[Li>>2]+ +g[Mi>>2];g[Ni>>2]=+g[Li>>2]-+g[Mi>>2];g[Qi>>2]=+g[ue>>2]-+g[ze>>2];g[pe>>2]=+g[bf>>2]+ +g[oe>>2];g[mf>>2]=+g[Ae>>2]+ +g[lf>>2];g[nf>>2]=+g[pe>>2]+ +g[mf>>2];g[Tq>>2]=+g[mf>>2]-+g[pe>>2];g[cp>>2]=+g[Ae>>2]-+g[lf>>2];g[fp>>2]=+g[dp>>2]-+g[ep>>2];g[gp>>2]=+g[cp>>2]+ +g[fp>>2];g[Op>>2]=+g[cp>>2]-+g[fp>>2];g[Mq>>2]=+g[yo>>2]+ +g[zo>>2];g[Nq>>2]=+g[dp>>2]+ +g[ep>>2];g[Oq>>2]=+g[Mq>>2]-+g[Nq>>2];g[As>>2]=+g[Mq>>2]+ +g[Nq>>2];g[di>>2]=+g[bi>>2]+ +g[ci>>2];g[ii>>2]=+g[ei>>2]-+g[hi>>2];g[Ki>>2]=+g[di>>2]*.3826834261417389-+g[ii>>2]*.9238795042037964;g[Gj>>2]=+g[di>>2]*.9238795042037964+ +g[ii>>2]*.3826834261417389;g[Pi>>2]=+g[Ni>>2]+ +g[Oi>>2];g[sj>>2]=+g[Qi>>2]-+g[Ti>>2];g[tj>>2]=+g[Pi>>2]*.3826834261417389+ +g[sj>>2]*.9238795042037964;g[Fj>>2]=+g[sj>>2]*.3826834261417389-+g[Pi>>2]*.9238795042037964;g[zm>>2]=+g[Ni>>2]-+g[Oi>>2];g[Am>>2]=+g[Qi>>2]+ +g[Ti>>2];g[Bm>>2]=+g[zm>>2]*.9238795042037964+ +g[Am>>2]*.3826834261417389;g[Hm>>2]=+g[Am>>2]*.9238795042037964-+g[zm>>2]*.3826834261417389;g[Ao>>2]=+g[yo>>2]-+g[zo>>2];g[Bo>>2]=+g[bf>>2]-+g[oe>>2];g[bp>>2]=+g[Ao>>2]-+g[Bo>>2];g[Pp>>2]=+g[Bo>>2]+ +g[Ao>>2];g[wm>>2]=+g[bi>>2]-+g[ci>>2];g[xm>>2]=+g[ei>>2]+ +g[hi>>2];g[ym>>2]=+g[wm>>2]*.9238795042037964-+g[xm>>2]*.3826834261417389;g[Kl>>2]=+g[wm>>2]*.3826834261417389+ +g[xm>>2]*.9238795042037964;g[Zj>>2]=+g[Gu>>2]+ +g[Yj>>2];g[ra>>2]=+g[v>>2]+ +g[qa>>2];g[sa>>2]=+g[Zj>>2]+ +g[ra>>2];g[os>>2]=+g[Zj>>2]-+g[ra>>2];g[Lr>>2]=+g[us>>2]+ +g[vs>>2];g[Mr>>2]=+g[zs>>2]+ +g[As>>2];g[Nr>>2]=+g[Lr>>2]-+g[Mr>>2];g[Or>>2]=+g[Lr>>2]+ +g[Mr>>2];g[Qr>>2]=+g[ir>>2]+ +g[jr>>2];g[Xr>>2]=+g[Rr>>2]+ +g[Wr>>2];g[Yr>>2]=+g[Qr>>2]+ +g[Xr>>2];g[Ds>>2]=+g[Xr>>2]-+g[Qr>>2];g[mb>>2]=+g[R>>2]+ +g[lb>>2];g[hb>>2]=+g[La>>2]+ +g[gb>>2];g[Jb>>2]=+g[mb>>2]+ +g[hb>>2];g[Cs>>2]=+g[hb>>2]-+g[mb>>2];g[Yd>>2]=+g[dc>>2]+ +g[Xd>>2];g[of>>2]=+g[Se>>2]+ +g[nf>>2];g[pf>>2]=+g[Yd>>2]+ +g[of>>2];g[_r>>2]=+g[of>>2]-+g[Yd>>2];g[ts>>2]=+g[dc>>2]-+g[Xd>>2];g[ws>>2]=+g[us>>2]-+g[vs>>2];g[xs>>2]=+g[ts>>2]+ +g[ws>>2];g[Hr>>2]=+g[ws>>2]-+g[ts>>2];g[ys>>2]=+g[Se>>2]-+g[nf>>2];g[Dr>>2]=+g[zs>>2]-+g[As>>2];g[Er>>2]=+g[ys>>2]-+g[Dr>>2];g[Ir>>2]=+g[ys>>2]+ +g[Dr>>2];g[ps>>2]=+g[mr>>2]+ +g[nr>>2];g[qs>>2]=+g[uq>>2]+ +g[vq>>2];g[rs>>2]=+g[ps>>2]-+g[qs>>2];g[Pr>>2]=+g[ps>>2]+ +g[qs>>2];g[Kb>>2]=+g[sa>>2]+ +g[Jb>>2];g[(c[k>>2]|0)+(c[n>>2]<<5<<2)>>2]=+g[Kb>>2]-+g[pf>>2];g[c[k>>2]>>2]=+g[Kb>>2]+ +g[pf>>2];g[Zr>>2]=+g[Pr>>2]+ +g[Yr>>2];g[c[l>>2]>>2]=+g[Or>>2]+ +g[Zr>>2];g[(c[l>>2]|0)+(c[n>>2]<<5<<2)>>2]=+g[Zr>>2]-+g[Or>>2];g[Kr>>2]=+g[sa>>2]-+g[Jb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*48<<2)>>2]=+g[Kr>>2]-+g[Nr>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Kr>>2]+ +g[Nr>>2];g[$r>>2]=+g[Yr>>2]-+g[Pr>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[_r>>2]+ +g[$r>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*48<<2)>>2]=+g[$r>>2]-+g[_r>>2];g[ss>>2]=+g[os>>2]+ +g[rs>>2];g[Fr>>2]=(+g[xs>>2]+ +g[Er>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*40<<2)>>2]=+g[ss>>2]-+g[Fr>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[ss>>2]+ +g[Fr>>2];g[as>>2]=(+g[Hr>>2]+ +g[Ir>>2])*.7071067690849304;g[Es>>2]=+g[Cs>>2]+ +g[Ds>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[as>>2]+ +g[Es>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*40<<2)>>2]=+g[Es>>2]-+g[as>>2];g[Gr>>2]=+g[os>>2]-+g[rs>>2];g[Jr>>2]=(+g[Hr>>2]-+g[Ir>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*56<<2)>>2]=+g[Gr>>2]-+g[Jr>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Gr>>2]+ +g[Jr>>2];g[Fs>>2]=(+g[Er>>2]-+g[xs>>2])*.7071067690849304;g[Gs>>2]=+g[Ds>>2]-+g[Cs>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Fs>>2]+ +g[Gs>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*56<<2)>>2]=+g[Gs>>2]-+g[Fs>>2];g[hr>>2]=+g[Gu>>2]-+g[Yj>>2];g[kr>>2]=+g[ir>>2]-+g[jr>>2];g[lr>>2]=+g[hr>>2]-+g[kr>>2];g[Ar>>2]=+g[hr>>2]+ +g[kr>>2];g[gs>>2]=+g[Lq>>2]+ +g[Oq>>2];g[hs>>2]=+g[Sq>>2]+ +g[Tq>>2];g[is>>2]=+g[gs>>2]*.9238795042037964-+g[hs>>2]*.3826834261417389;g[ms>>2]=+g[hs>>2]*.9238795042037964+ +g[gs>>2]*.3826834261417389;g[Js>>2]=+g[qa>>2]-+g[v>>2];g[Ks>>2]=+g[Wr>>2]-+g[Rr>>2];g[Ls>>2]=+g[Js>>2]+ +g[Ks>>2];g[pt>>2]=+g[Ks>>2]-+g[Js>>2];g[qr>>2]=+g[or>>2]-+g[pr>>2];g[xq>>2]=+g[rr>>2]+ +g[wq>>2];g[yq>>2]=(+g[qr>>2]-+g[xq>>2])*.7071067690849304;g[Is>>2]=(+g[qr>>2]+ +g[xq>>2])*.7071067690849304;g[Eq>>2]=+g[Cq>>2]-+g[Dq>>2];g[Jq>>2]=+g[Fq>>2]-+g[Iq>>2];g[Kq>>2]=+g[Eq>>2]*.9238795042037964+ +g[Jq>>2]*.3826834261417389;g[xr>>2]=+g[Eq>>2]*.3826834261417389-+g[Jq>>2]*.9238795042037964;g[Br>>2]=+g[pr>>2]+ +g[or>>2];g[Cr>>2]=+g[rr>>2]-+g[wq>>2];g[bs>>2]=(+g[Br>>2]+ +g[Cr>>2])*.7071067690849304;g[ot>>2]=(+g[Cr>>2]-+g[Br>>2])*.7071067690849304;g[ds>>2]=+g[Cq>>2]+ +g[Dq>>2];g[es>>2]=+g[Fq>>2]+ +g[Iq>>2];g[fs>>2]=+g[ds>>2]*.3826834261417389+ +g[es>>2]*.9238795042037964;g[ls>>2]=+g[ds>>2]*.9238795042037964-+g[es>>2]*.3826834261417389;g[Pq>>2]=+g[Lq>>2]-+g[Oq>>2];g[tr>>2]=+g[Sq>>2]-+g[Tq>>2];g[ur>>2]=+g[Pq>>2]*.3826834261417389-+g[tr>>2]*.9238795042037964;g[yr>>2]=+g[tr>>2]*.3826834261417389+ +g[Pq>>2]*.9238795042037964;g[zq>>2]=+g[lr>>2]+ +g[yq>>2];g[vr>>2]=+g[Kq>>2]+ +g[ur>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*44<<2)>>2]=+g[zq>>2]-+g[vr>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[zq>>2]+ +g[vr>>2];g[nt>>2]=+g[xr>>2]+ +g[yr>>2];g[qt>>2]=+g[ot>>2]+ +g[pt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[nt>>2]+ +g[qt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*44<<2)>>2]=+g[qt>>2]-+g[nt>>2];g[wr>>2]=+g[lr>>2]-+g[yq>>2];g[zr>>2]=+g[xr>>2]-+g[yr>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*60<<2)>>2]=+g[wr>>2]-+g[zr>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[wr>>2]+ +g[zr>>2];g[rt>>2]=+g[ur>>2]-+g[Kq>>2];g[st>>2]=+g[pt>>2]-+g[ot>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[rt>>2]+ +g[st>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*60<<2)>>2]=+g[st>>2]-+g[rt>>2];g[cs>>2]=+g[Ar>>2]+ +g[bs>>2];g[js>>2]=+g[fs>>2]+ +g[is>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*36<<2)>>2]=+g[cs>>2]-+g[js>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[cs>>2]+ +g[js>>2];g[Hs>>2]=+g[ls>>2]+ +g[ms>>2];g[kt>>2]=+g[Is>>2]+ +g[Ls>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Hs>>2]+ +g[kt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*36<<2)>>2]=+g[kt>>2]-+g[Hs>>2];g[ks>>2]=+g[Ar>>2]-+g[bs>>2];g[ns>>2]=+g[ls>>2]-+g[ms>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*52<<2)>>2]=+g[ks>>2]-+g[ns>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[ks>>2]+ +g[ns>>2];g[lt>>2]=+g[is>>2]-+g[fs>>2];g[mt>>2]=+g[Ls>>2]-+g[Is>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[lt>>2]+ +g[mt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*52<<2)>>2]=+g[mt>>2]-+g[lt>>2];g[gn>>2]=+g[cn>>2]-+g[fn>>2];g[Tn>>2]=(+g[mn>>2]-+g[rn>>2])*.7071067690849304;g[Un>>2]=+g[gn>>2]-+g[Tn>>2];g[Yp>>2]=+g[gn>>2]+ +g[Tn>>2];g[Eo>>2]=+g[Zn>>2]*.3826834261417389-+g[Do>>2]*.9238795042037964;g[Po>>2]=+g[Jo>>2]*.3826834261417389+ +g[Oo>>2]*.9238795042037964;g[Qo>>2]=+g[Eo>>2]-+g[Po>>2];g[Ms>>2]=+g[Eo>>2]+ +g[Po>>2];g[Ns>>2]=(+g[qp>>2]-+g[pp>>2])*.7071067690849304;g[Os>>2]=+g[xt>>2]-+g[wt>>2];g[Ps>>2]=+g[Ns>>2]+ +g[Os>>2];g[Vs>>2]=+g[Os>>2]-+g[Ns>>2];g[Zp>>2]=+g[Zn>>2]*.9238795042037964+ +g[Do>>2]*.3826834261417389;g[_p>>2]=+g[Oo>>2]*.3826834261417389-+g[Jo>>2]*.9238795042037964;g[$p>>2]=+g[Zp>>2]+ +g[_p>>2];g[Us>>2]=+g[_p>>2]-+g[Zp>>2];g[hp>>2]=(+g[bp>>2]-+g[gp>>2])*.7071067690849304;g[ip>>2]=+g[xo>>2]-+g[hp>>2];g[eq>>2]=+g[xo>>2]+ +g[hp>>2];g[Qp>>2]=(+g[Op>>2]-+g[Pp>>2])*.7071067690849304;g[Rp>>2]=+g[Np>>2]-+g[Qp>>2];g[fq>>2]=+g[Np>>2]+ +g[Qp>>2];g[Sp>>2]=+g[ip>>2]*.19509032368659973-+g[Rp>>2]*.9807852506637573;g[mp>>2]=+g[fq>>2]*.8314695954322815+ +g[eq>>2]*.5555702447891235;g[Wp>>2]=+g[Rp>>2]*.19509032368659973+ +g[ip>>2]*.9807852506637573;g[gq>>2]=+g[eq>>2]*.8314695954322815-+g[fq>>2]*.5555702447891235;g[ho>>2]=(+g[$o>>2]-+g[go>>2])*.7071067690849304;g[io>>2]=+g[Wo>>2]-+g[ho>>2];g[bq>>2]=+g[Wo>>2]+ +g[ho>>2];g[qo>>2]=(+g[oo>>2]-+g[po>>2])*.7071067690849304;g[ro>>2]=+g[no>>2]-+g[qo>>2];g[cq>>2]=+g[no>>2]+ +g[qo>>2];g[so>>2]=+g[io>>2]*.9807852506637573+ +g[ro>>2]*.19509032368659973;g[lp>>2]=+g[bq>>2]*.8314695954322815-+g[cq>>2]*.5555702447891235;g[Vp>>2]=+g[io>>2]*.19509032368659973-+g[ro>>2]*.9807852506637573;g[dq>>2]=+g[bq>>2]*.5555702447891235+ +g[cq>>2]*.8314695954322815;g[Ro>>2]=+g[Un>>2]+ +g[Qo>>2];g[Tp>>2]=+g[so>>2]+ +g[Sp>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*46<<2)>>2]=+g[Ro>>2]-+g[Tp>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Ro>>2]+ +g[Tp>>2];g[Ts>>2]=+g[Vp>>2]+ +g[Wp>>2];g[Ws>>2]=+g[Us>>2]+ +g[Vs>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Ts>>2]+ +g[Ws>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*46<<2)>>2]=+g[Ws>>2]-+g[Ts>>2];g[Up>>2]=+g[Un>>2]-+g[Qo>>2];g[Xp>>2]=+g[Vp>>2]-+g[Wp>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*62<<2)>>2]=+g[Up>>2]-+g[Xp>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[Up>>2]+ +g[Xp>>2];g[Xs>>2]=+g[Sp>>2]-+g[so>>2];g[Ys>>2]=+g[Vs>>2]-+g[Us>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[Xs>>2]+ +g[Ys>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*62<<2)>>2]=+g[Ys>>2]-+g[Xs>>2];g[aq>>2]=+g[Yp>>2]+ +g[$p>>2];g[hq>>2]=+g[dq>>2]+ +g[gq>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*38<<2)>>2]=+g[aq>>2]-+g[hq>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[aq>>2]+ +g[hq>>2];g[Jt>>2]=+g[lp>>2]+ +g[mp>>2];g[Qs>>2]=+g[Ms>>2]+ +g[Ps>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Jt>>2]+ +g[Qs>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*38<<2)>>2]=+g[Qs>>2]-+g[Jt>>2];g[iq>>2]=+g[Yp>>2]-+g[$p>>2];g[np>>2]=+g[lp>>2]-+g[mp>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*54<<2)>>2]=+g[iq>>2]-+g[np>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[iq>>2]+ +g[np>>2];g[Rs>>2]=+g[gq>>2]-+g[dq>>2];g[Ss>>2]=+g[Ps>>2]-+g[Ms>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[Rs>>2]+ +g[Ss>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*54<<2)>>2]=+g[Ss>>2]-+g[Rs>>2];g[op>>2]=+g[cn>>2]+ +g[fn>>2];g[rp>>2]=(+g[pp>>2]+ +g[qp>>2])*.7071067690849304;g[sp>>2]=+g[op>>2]-+g[rp>>2];g[tq>>2]=+g[op>>2]+ +g[rp>>2];g[vp>>2]=+g[tp>>2]*.9238795042037964-+g[up>>2]*.3826834261417389;g[yp>>2]=+g[wp>>2]*.9238795042037964+ +g[xp>>2]*.3826834261417389;g[zp>>2]=+g[vp>>2]-+g[yp>>2];g[ut>>2]=+g[vp>>2]+ +g[yp>>2];g[vt>>2]=(+g[mn>>2]+ +g[rn>>2])*.7071067690849304;g[yt>>2]=+g[wt>>2]+ +g[xt>>2];g[zt>>2]=+g[vt>>2]+ +g[yt>>2];g[Ft>>2]=+g[yt>>2]-+g[vt>>2];g[Uq>>2]=+g[tp>>2]*.3826834261417389+ +g[up>>2]*.9238795042037964;g[Vq>>2]=+g[xp>>2]*.9238795042037964-+g[wp>>2]*.3826834261417389;g[Wq>>2]=+g[Uq>>2]+ +g[Vq>>2];g[Et>>2]=+g[Vq>>2]-+g[Uq>>2];g[Jp>>2]=(+g[Pp>>2]+ +g[Op>>2])*.7071067690849304;g[Kp>>2]=+g[Ip>>2]-+g[Jp>>2];g[$q>>2]=+g[Ip>>2]+ +g[Jp>>2];g[lq>>2]=(+g[bp>>2]+ +g[gp>>2])*.7071067690849304;g[mq>>2]=+g[kq>>2]-+g[lq>>2];g[ar>>2]=+g[kq>>2]+ +g[lq>>2];g[nq>>2]=+g[Kp>>2]*.5555702447891235-+g[mq>>2]*.8314695954322815;g[fr>>2]=+g[$q>>2]*.19509032368659973+ +g[ar>>2]*.9807852506637573;g[rq>>2]=+g[Kp>>2]*.8314695954322815+ +g[mq>>2]*.5555702447891235;g[br>>2]=+g[$q>>2]*.9807852506637573-+g[ar>>2]*.19509032368659973;g[Cp>>2]=(+g[go>>2]+ +g[$o>>2])*.7071067690849304;g[Dp>>2]=+g[Bp>>2]-+g[Cp>>2];g[Yq>>2]=+g[Bp>>2]+ +g[Cp>>2];g[Fp>>2]=(+g[oo>>2]+ +g[po>>2])*.7071067690849304;g[Gp>>2]=+g[Ep>>2]-+g[Fp>>2];g[Zq>>2]=+g[Ep>>2]+ +g[Fp>>2];g[Hp>>2]=+g[Dp>>2]*.5555702447891235+ +g[Gp>>2]*.8314695954322815;g[er>>2]=+g[Zq>>2]*.9807852506637573-+g[Yq>>2]*.19509032368659973;g[qq>>2]=+g[Gp>>2]*.5555702447891235-+g[Dp>>2]*.8314695954322815;g[_q>>2]=+g[Yq>>2]*.9807852506637573+ +g[Zq>>2]*.19509032368659973;g[Ap>>2]=+g[sp>>2]+ +g[zp>>2];g[oq>>2]=+g[Hp>>2]+ +g[nq>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*42<<2)>>2]=+g[Ap>>2]-+g[oq>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Ap>>2]+ +g[oq>>2];g[Dt>>2]=+g[qq>>2]+ +g[rq>>2];g[Gt>>2]=+g[Et>>2]+ +g[Ft>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Dt>>2]+ +g[Gt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*42<<2)>>2]=+g[Gt>>2]-+g[Dt>>2];g[pq>>2]=+g[sp>>2]-+g[zp>>2];g[sq>>2]=+g[qq>>2]-+g[rq>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*58<<2)>>2]=+g[pq>>2]-+g[sq>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[pq>>2]+ +g[sq>>2];g[Ht>>2]=+g[nq>>2]-+g[Hp>>2];g[It>>2]=+g[Ft>>2]-+g[Et>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[Ht>>2]+ +g[It>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*58<<2)>>2]=+g[It>>2]-+g[Ht>>2];g[Xq>>2]=+g[tq>>2]+ +g[Wq>>2];g[cr>>2]=+g[_q>>2]+ +g[br>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*34<<2)>>2]=+g[Xq>>2]-+g[cr>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Xq>>2]+ +g[cr>>2];g[tt>>2]=+g[er>>2]+ +g[fr>>2];g[At>>2]=+g[ut>>2]+ +g[zt>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[tt>>2]+ +g[At>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*34<<2)>>2]=+g[At>>2]-+g[tt>>2];g[dr>>2]=+g[tq>>2]-+g[Wq>>2];g[gr>>2]=+g[er>>2]-+g[fr>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*50<<2)>>2]=+g[dr>>2]-+g[gr>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[dr>>2]+ +g[gr>>2];g[Bt>>2]=+g[br>>2]-+g[_q>>2];g[Ct>>2]=+g[zt>>2]-+g[ut>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[Bt>>2]+ +g[Ct>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*50<<2)>>2]=+g[Ct>>2]-+g[Bt>>2];g[eg>>2]=+g[Uf>>2]-+g[dg>>2];g[Df>>2]=+g[pg>>2]-+g[Cf>>2];g[Ef>>2]=+g[eg>>2]-+g[Df>>2];g[Pj>>2]=+g[eg>>2]+ +g[Df>>2];g[Wt>>2]=+g[hj>>2]-+g[gj>>2];g[Xt>>2]=+g[tu>>2]-+g[su>>2];g[Yt>>2]=+g[Wt>>2]+ +g[Xt>>2];g[cu>>2]=+g[Xt>>2]-+g[Wt>>2];g[ug>>2]=+g[Jf>>2]-+g[tg>>2];g[bh>>2]=+g[zg>>2]-+g[ah>>2];g[ch>>2]=+g[ug>>2]*.19509032368659973-+g[bh>>2]*.9807852506637573;g[Qj>>2]=+g[ug>>2]*.9807852506637573+ +g[bh>>2]*.19509032368659973;g[th>>2]=+g[hh>>2]-+g[sh>>2];g[Eg>>2]=+g[yh>>2]-+g[Dg>>2];g[Fg>>2]=+g[th>>2]*.19509032368659973+ +g[Eg>>2]*.9807852506637573;g[Rj>>2]=+g[Eg>>2]*.19509032368659973-+g[th>>2]*.9807852506637573;g[Gg>>2]=+g[ch>>2]-+g[Fg>>2];g[bu>>2]=+g[Rj>>2]-+g[Qj>>2];g[Ui>>2]=+g[Qj>>2]+ +g[Rj>>2];g[Vt>>2]=+g[ch>>2]+ +g[Fg>>2];g[Yg>>2]=+g[Mg>>2]-+g[Xg>>2];g[si>>2]=+g[Ih>>2]-+g[ri>>2];g[ti>>2]=+g[Yg>>2]-+g[si>>2];g[Wi>>2]=+g[Yg>>2]+ +g[si>>2];g[Ci>>2]=+g[yi>>2]-+g[Bi>>2];g[Fi>>2]=+g[Di>>2]-+g[Ei>>2];g[Gi>>2]=+g[Ci>>2]-+g[Fi>>2];g[Xi>>2]=+g[Ci>>2]+ +g[Fi>>2];g[Hi>>2]=+g[ti>>2]*.9951847195625305+ +g[Gi>>2]*.0980171412229538;g[cj>>2]=+g[Wi>>2]*.7730104327201843-+g[Xi>>2]*.6343932747840881;g[Mj>>2]=+g[ti>>2]*.0980171412229538-+g[Gi>>2]*.9951847195625305;g[Yi>>2]=+g[Wi>>2]*.6343932747840881+ +g[Xi>>2]*.7730104327201843;g[_h>>2]=+g[Oh>>2]-+g[Zh>>2];g[uj>>2]=+g[Ki>>2]-+g[tj>>2];g[vj>>2]=+g[_h>>2]-+g[uj>>2];g[Zi>>2]=+g[_h>>2]+ +g[uj>>2];g[Ej>>2]=+g[Aj>>2]-+g[Dj>>2];g[Hj>>2]=+g[Fj>>2]-+g[Gj>>2];g[Ij>>2]=+g[Ej>>2]-+g[Hj>>2];g[_i>>2]=+g[Ej>>2]+ +g[Hj>>2];g[Jj>>2]=+g[vj>>2]*.0980171412229538-+g[Ij>>2]*.9951847195625305;g[dj>>2]=+g[_i>>2]*.7730104327201843+ +g[Zi>>2]*.6343932747840881;g[Nj>>2]=+g[Ij>>2]*.0980171412229538+ +g[vj>>2]*.9951847195625305;g[$i>>2]=+g[Zi>>2]*.7730104327201843-+g[_i>>2]*.6343932747840881;g[Hg>>2]=+g[Ef>>2]+ +g[Gg>>2];g[Kj>>2]=+g[Hi>>2]+ +g[Jj>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*47<<2)>>2]=+g[Hg>>2]-+g[Kj>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Hg>>2]+ +g[Kj>>2];g[au>>2]=+g[Mj>>2]+ +g[Nj>>2];g[du>>2]=+g[bu>>2]+ +g[cu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[au>>2]+ +g[du>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*47<<2)>>2]=+g[du>>2]-+g[au>>2];g[Lj>>2]=+g[Ef>>2]-+g[Gg>>2];g[Oj>>2]=+g[Mj>>2]-+g[Nj>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*63<<2)>>2]=+g[Lj>>2]-+g[Oj>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[Lj>>2]+ +g[Oj>>2];g[eu>>2]=+g[Jj>>2]-+g[Hi>>2];g[fu>>2]=+g[cu>>2]-+g[bu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[eu>>2]+ +g[fu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*63<<2)>>2]=+g[fu>>2]-+g[eu>>2];g[Vi>>2]=+g[Pj>>2]+ +g[Ui>>2];g[aj>>2]=+g[Yi>>2]+ +g[$i>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*39<<2)>>2]=+g[Vi>>2]-+g[aj>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Vi>>2]+ +g[aj>>2];g[Fu>>2]=+g[cj>>2]+ +g[dj>>2];g[Zt>>2]=+g[Vt>>2]+ +g[Yt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Fu>>2]+ +g[Zt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*39<<2)>>2]=+g[Zt>>2]-+g[Fu>>2];g[bj>>2]=+g[Pj>>2]-+g[Ui>>2];g[ej>>2]=+g[cj>>2]-+g[dj>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*55<<2)>>2]=+g[bj>>2]-+g[ej>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[bj>>2]+ +g[ej>>2];g[_t>>2]=+g[$i>>2]-+g[Yi>>2];g[$t>>2]=+g[Yt>>2]-+g[Vt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[_t>>2]+ +g[$t>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*55<<2)>>2]=+g[$t>>2]-+g[_t>>2];g[Ek>>2]=+g[yl>>2]-+g[Dk>>2];g[Lk>>2]=+g[Hk>>2]-+g[Kk>>2];g[Mk>>2]=+g[Ek>>2]-+g[Lk>>2];g[Tl>>2]=+g[Ek>>2]+ +g[Lk>>2];g[St>>2]=+g[Km>>2]-+g[Jm>>2];g[Tt>>2]=+g[dt>>2]-+g[at>>2];g[Ut>>2]=+g[St>>2]+ +g[Tt>>2];g[lu>>2]=+g[Tt>>2]-+g[St>>2];g[Pk>>2]=+g[Nk>>2]-+g[Ok>>2];g[Sk>>2]=+g[Qk>>2]-+g[Rk>>2];g[Tk>>2]=+g[Pk>>2]*.5555702447891235-+g[Sk>>2]*.8314695954322815;g[Ul>>2]=+g[Sk>>2]*.5555702447891235+ +g[Pk>>2]*.8314695954322815;g[Wk>>2]=+g[Uk>>2]-+g[Vk>>2];g[Zk>>2]=+g[Xk>>2]-+g[Yk>>2];g[_k>>2]=+g[Wk>>2]*.8314695954322815+ +g[Zk>>2]*.5555702447891235;g[Vl>>2]=+g[Wk>>2]*.5555702447891235-+g[Zk>>2]*.8314695954322815;g[Al>>2]=+g[Tk>>2]-+g[_k>>2];g[ku>>2]=+g[Vl>>2]-+g[Ul>>2];g[Wl>>2]=+g[Ul>>2]+ +g[Vl>>2];g[Rt>>2]=+g[Tk>>2]+ +g[_k>>2];g[El>>2]=+g[Cl>>2]-+g[Dl>>2];g[jm>>2]=+g[Hl>>2]-+g[im>>2];g[km>>2]=+g[El>>2]-+g[jm>>2];g[Yl>>2]=+g[El>>2]+ +g[jm>>2];g[nm>>2]=+g[lm>>2]-+g[mm>>2];g[qm>>2]=+g[om>>2]-+g[pm>>2];g[rm>>2]=+g[nm>>2]-+g[qm>>2];g[Zl>>2]=+g[nm>>2]+ +g[qm>>2];g[sm>>2]=+g[km>>2]*.9569403529167175+ +g[rm>>2]*.290284663438797;g[em>>2]=+g[Yl>>2]*.8819212913513184-+g[Zl>>2]*.4713967442512512;g[Ql>>2]=+g[km>>2]*.290284663438797-+g[rm>>2]*.9569403529167175;g[_l>>2]=+g[Yl>>2]*.4713967442512512+ +g[Zl>>2]*.8819212913513184;g[vm>>2]=+g[tm>>2]-+g[um>>2];g[Cm>>2]=+g[ym>>2]-+g[Bm>>2];g[Dm>>2]=+g[vm>>2]-+g[Cm>>2];g[$l>>2]=+g[vm>>2]+ +g[Cm>>2];g[Gm>>2]=+g[Em>>2]-+g[Fm>>2];g[Ll>>2]=+g[Hm>>2]-+g[Kl>>2];g[Ml>>2]=+g[Gm>>2]-+g[Ll>>2];g[am>>2]=+g[Gm>>2]+ +g[Ll>>2];g[Nl>>2]=+g[Dm>>2]*.290284663438797-+g[Ml>>2]*.9569403529167175;g[fm>>2]=+g[am>>2]*.8819212913513184+ +g[$l>>2]*.4713967442512512;g[Rl>>2]=+g[Ml>>2]*.290284663438797+ +g[Dm>>2]*.9569403529167175;g[bm>>2]=+g[$l>>2]*.8819212913513184-+g[am>>2]*.4713967442512512;g[Bl>>2]=+g[Mk>>2]+ +g[Al>>2];g[Ol>>2]=+g[sm>>2]+ +g[Nl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*45<<2)>>2]=+g[Bl>>2]-+g[Ol>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Bl>>2]+ +g[Ol>>2];g[ju>>2]=+g[Ql>>2]+ +g[Rl>>2];g[mu>>2]=+g[ku>>2]+ +g[lu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[ju>>2]+ +g[mu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*45<<2)>>2]=+g[mu>>2]-+g[ju>>2];g[Pl>>2]=+g[Mk>>2]-+g[Al>>2];g[Sl>>2]=+g[Ql>>2]-+g[Rl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*61<<2)>>2]=+g[Pl>>2]-+g[Sl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[Pl>>2]+ +g[Sl>>2];g[nu>>2]=+g[Nl>>2]-+g[sm>>2];g[ou>>2]=+g[lu>>2]-+g[ku>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[nu>>2]+ +g[ou>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*61<<2)>>2]=+g[ou>>2]-+g[nu>>2];g[Xl>>2]=+g[Tl>>2]+ +g[Wl>>2];g[cm>>2]=+g[_l>>2]+ +g[bm>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*37<<2)>>2]=+g[Xl>>2]-+g[cm>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Xl>>2]+ +g[cm>>2];g[Qt>>2]=+g[em>>2]+ +g[fm>>2];g[gu>>2]=+g[Rt>>2]+ +g[Ut>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Qt>>2]+ +g[gu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*37<<2)>>2]=+g[gu>>2]-+g[Qt>>2];g[dm>>2]=+g[Tl>>2]-+g[Wl>>2];g[gm>>2]=+g[em>>2]-+g[fm>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*53<<2)>>2]=+g[dm>>2]-+g[gm>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[dm>>2]+ +g[gm>>2];g[hu>>2]=+g[bm>>2]-+g[_l>>2];g[iu>>2]=+g[Ut>>2]-+g[Rt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[hu>>2]+ +g[iu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*53<<2)>>2]=+g[iu>>2]-+g[hu>>2];g[hm>>2]=+g[yl>>2]+ +g[Dk>>2];g[Lm>>2]=+g[Jm>>2]+ +g[Km>>2];g[Mm>>2]=+g[hm>>2]-+g[Lm>>2];g[Nn>>2]=+g[hm>>2]+ +g[Lm>>2];g[$s>>2]=+g[Hk>>2]+ +g[Kk>>2];g[et>>2]=+g[at>>2]+ +g[dt>>2];g[ft>>2]=+g[$s>>2]+ +g[et>>2];g[Mt>>2]=+g[et>>2]-+g[$s>>2];g[Nm>>2]=+g[Nk>>2]+ +g[Ok>>2];g[Om>>2]=+g[Qk>>2]+ +g[Rk>>2];g[Pm>>2]=+g[Nm>>2]*.9807852506637573-+g[Om>>2]*.19509032368659973;g[On>>2]=+g[Om>>2]*.9807852506637573+ +g[Nm>>2]*.19509032368659973;g[Qm>>2]=+g[Uk>>2]+ +g[Vk>>2];g[Rm>>2]=+g[Xk>>2]+ +g[Yk>>2];g[Sm>>2]=+g[Qm>>2]*.19509032368659973+ +g[Rm>>2]*.9807852506637573;g[Pn>>2]=+g[Qm>>2]*.9807852506637573-+g[Rm>>2]*.19509032368659973;g[sn>>2]=+g[Pm>>2]-+g[Sm>>2];g[Lt>>2]=+g[Pn>>2]-+g[On>>2];g[Qn>>2]=+g[On>>2]+ +g[Pn>>2];g[_s>>2]=+g[Pm>>2]+ +g[Sm>>2];g[un>>2]=+g[lm>>2]+ +g[mm>>2];g[vn>>2]=+g[im>>2]+ +g[Hl>>2];g[wn>>2]=+g[un>>2]-+g[vn>>2];g[Tm>>2]=+g[un>>2]+ +g[vn>>2];g[xn>>2]=+g[Cl>>2]+ +g[Dl>>2];g[yn>>2]=+g[om>>2]+ +g[pm>>2];g[zn>>2]=+g[xn>>2]-+g[yn>>2];g[Um>>2]=+g[xn>>2]+ +g[yn>>2];g[An>>2]=+g[wn>>2]*.6343932747840881+ +g[zn>>2]*.7730104327201843;g[$m>>2]=+g[Um>>2]*.9951847195625305-+g[Tm>>2]*.0980171412229538;g[Kn>>2]=+g[zn>>2]*.6343932747840881-+g[wn>>2]*.7730104327201843;g[Vm>>2]=+g[Tm>>2]*.9951847195625305+ +g[Um>>2]*.0980171412229538;g[Bn>>2]=+g[tm>>2]+ +g[um>>2];g[Cn>>2]=+g[Kl>>2]+ +g[Hm>>2];g[Dn>>2]=+g[Bn>>2]-+g[Cn>>2];g[Wm>>2]=+g[Bn>>2]+ +g[Cn>>2];g[En>>2]=+g[Em>>2]+ +g[Fm>>2];g[Fn>>2]=+g[ym>>2]+ +g[Bm>>2];g[Gn>>2]=+g[En>>2]-+g[Fn>>2];g[Xm>>2]=+g[En>>2]+ +g[Fn>>2];g[Hn>>2]=+g[Dn>>2]*.6343932747840881-+g[Gn>>2]*.7730104327201843;g[an>>2]=+g[Wm>>2]*.0980171412229538+ +g[Xm>>2]*.9951847195625305;g[Ln>>2]=+g[Dn>>2]*.7730104327201843+ +g[Gn>>2]*.6343932747840881;g[Ym>>2]=+g[Wm>>2]*.9951847195625305-+g[Xm>>2]*.0980171412229538;g[tn>>2]=+g[Mm>>2]+ +g[sn>>2];g[In>>2]=+g[An>>2]+ +g[Hn>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*41<<2)>>2]=+g[tn>>2]-+g[In>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[tn>>2]+ +g[In>>2];g[jt>>2]=+g[Kn>>2]+ +g[Ln>>2];g[Nt>>2]=+g[Lt>>2]+ +g[Mt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[jt>>2]+ +g[Nt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*41<<2)>>2]=+g[Nt>>2]-+g[jt>>2];g[Jn>>2]=+g[Mm>>2]-+g[sn>>2];g[Mn>>2]=+g[Kn>>2]-+g[Ln>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*57<<2)>>2]=+g[Jn>>2]-+g[Mn>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[Jn>>2]+ +g[Mn>>2];g[Ot>>2]=+g[Hn>>2]-+g[An>>2];g[Pt>>2]=+g[Mt>>2]-+g[Lt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[Ot>>2]+ +g[Pt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*57<<2)>>2]=+g[Pt>>2]-+g[Ot>>2];g[Rn>>2]=+g[Nn>>2]+ +g[Qn>>2];g[Zm>>2]=+g[Vm>>2]+ +g[Ym>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*33<<2)>>2]=+g[Rn>>2]-+g[Zm>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[Rn>>2]+ +g[Zm>>2];g[Zs>>2]=+g[$m>>2]+ +g[an>>2];g[gt>>2]=+g[_s>>2]+ +g[ft>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[Zs>>2]+ +g[gt>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*33<<2)>>2]=+g[gt>>2]-+g[Zs>>2];g[_m>>2]=+g[Nn>>2]-+g[Qn>>2];g[bn>>2]=+g[$m>>2]-+g[an>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*49<<2)>>2]=+g[_m>>2]-+g[bn>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[_m>>2]+ +g[bn>>2];g[ht>>2]=+g[Ym>>2]-+g[Vm>>2];g[it>>2]=+g[ft>>2]-+g[_s>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[ht>>2]+ +g[it>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*49<<2)>>2]=+g[it>>2]-+g[ht>>2];g[fj>>2]=+g[Uf>>2]+ +g[dg>>2];g[ij>>2]=+g[gj>>2]+ +g[hj>>2];g[jj>>2]=+g[fj>>2]-+g[ij>>2];g[il>>2]=+g[fj>>2]+ +g[ij>>2];g[ru>>2]=+g[pg>>2]+ +g[Cf>>2];g[uu>>2]=+g[su>>2]+ +g[tu>>2];g[vu>>2]=+g[ru>>2]+ +g[uu>>2];g[Bu>>2]=+g[uu>>2]-+g[ru>>2];g[kj>>2]=+g[Jf>>2]+ +g[tg>>2];g[lj>>2]=+g[zg>>2]+ +g[ah>>2];g[mj>>2]=+g[kj>>2]*.8314695954322815-+g[lj>>2]*.5555702447891235;g[jl>>2]=+g[kj>>2]*.5555702447891235+ +g[lj>>2]*.8314695954322815;g[nj>>2]=+g[hh>>2]+ +g[sh>>2];g[oj>>2]=+g[yh>>2]+ +g[Dg>>2];g[pj>>2]=+g[nj>>2]*.8314695954322815+ +g[oj>>2]*.5555702447891235;g[kl>>2]=+g[oj>>2]*.8314695954322815-+g[nj>>2]*.5555702447891235;g[qj>>2]=+g[mj>>2]-+g[pj>>2];g[Au>>2]=+g[kl>>2]-+g[jl>>2];g[ll>>2]=+g[jl>>2]+ +g[kl>>2];g[qu>>2]=+g[mj>>2]+ +g[pj>>2];g[rk>>2]=+g[yi>>2]+ +g[Bi>>2];g[sk>>2]=+g[ri>>2]+ +g[Ih>>2];g[tk>>2]=+g[rk>>2]-+g[sk>>2];g[nl>>2]=+g[rk>>2]+ +g[sk>>2];g[uk>>2]=+g[Mg>>2]+ +g[Xg>>2];g[vk>>2]=+g[Di>>2]+ +g[Ei>>2];g[wk>>2]=+g[uk>>2]-+g[vk>>2];g[ol>>2]=+g[uk>>2]+ +g[vk>>2];g[xk>>2]=+g[tk>>2]*.4713967442512512+ +g[wk>>2]*.8819212913513184;g[vl>>2]=+g[ol>>2]*.9569403529167175-+g[nl>>2]*.290284663438797;g[fl>>2]=+g[wk>>2]*.4713967442512512-+g[tk>>2]*.8819212913513184;g[pl>>2]=+g[nl>>2]*.9569403529167175+ +g[ol>>2]*.290284663438797;g[yk>>2]=+g[Oh>>2]+ +g[Zh>>2];g[zk>>2]=+g[Gj>>2]+ +g[Fj>>2];g[Ak>>2]=+g[yk>>2]-+g[zk>>2];g[ql>>2]=+g[yk>>2]+ +g[zk>>2];g[$k>>2]=+g[Aj>>2]+ +g[Dj>>2];g[al>>2]=+g[Ki>>2]+ +g[tj>>2];g[bl>>2]=+g[$k>>2]-+g[al>>2];g[rl>>2]=+g[$k>>2]+ +g[al>>2];g[cl>>2]=+g[Ak>>2]*.4713967442512512-+g[bl>>2]*.8819212913513184;g[wl>>2]=+g[ql>>2]*.290284663438797+ +g[rl>>2]*.9569403529167175;g[gl>>2]=+g[Ak>>2]*.8819212913513184+ +g[bl>>2]*.4713967442512512;g[sl>>2]=+g[ql>>2]*.9569403529167175-+g[rl>>2]*.290284663438797;g[rj>>2]=+g[jj>>2]+ +g[qj>>2];g[dl>>2]=+g[xk>>2]+ +g[cl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*43<<2)>>2]=+g[rj>>2]-+g[dl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[rj>>2]+ +g[dl>>2];g[zu>>2]=+g[fl>>2]+ +g[gl>>2];g[Cu>>2]=+g[Au>>2]+ +g[Bu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[zu>>2]+ +g[Cu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*43<<2)>>2]=+g[Cu>>2]-+g[zu>>2];g[el>>2]=+g[jj>>2]-+g[qj>>2];g[hl>>2]=+g[fl>>2]-+g[gl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*59<<2)>>2]=+g[el>>2]-+g[hl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[el>>2]+ +g[hl>>2];g[Du>>2]=+g[cl>>2]-+g[xk>>2];g[Eu>>2]=+g[Bu>>2]-+g[Au>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[Du>>2]+ +g[Eu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*59<<2)>>2]=+g[Eu>>2]-+g[Du>>2];g[ml>>2]=+g[il>>2]+ +g[ll>>2];g[tl>>2]=+g[pl>>2]+ +g[sl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*35<<2)>>2]=+g[ml>>2]-+g[tl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ml>>2]+ +g[tl>>2];g[pu>>2]=+g[vl>>2]+ +g[wl>>2];g[wu>>2]=+g[qu>>2]+ +g[vu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[pu>>2]+ +g[wu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*35<<2)>>2]=+g[wu>>2]-+g[pu>>2];g[ul>>2]=+g[il>>2]-+g[ll>>2];g[xl>>2]=+g[vl>>2]-+g[wl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*51<<2)>>2]=+g[ul>>2]-+g[xl>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[ul>>2]+ +g[xl>>2];g[xu>>2]=+g[sl>>2]-+g[pl>>2];g[yu>>2]=+g[vu>>2]-+g[qu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[xu>>2]+ +g[yu>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*51<<2)>>2]=+g[yu>>2]-+g[xu>>2];c[Xu>>2]=(c[Xu>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+504;c[n>>2]=c[n>>2]^c[2998]}i=Yu;return}function xj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,13,2376);i=b;return}function yj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0;ta=i;i=i+272|0;k=ta+260|0;l=ta+256|0;m=ta+252|0;n=ta+248|0;ua=ta+244|0;o=ta+240|0;p=ta+236|0;sa=ta+224|0;w=ta+220|0;R=ta+216|0;na=ta+212|0;N=ta+208|0;la=ta+204|0;I=ta+200|0;pa=ta+196|0;E=ta+192|0;aa=ta+188|0;H=ta+184|0;oa=ta+180|0;B=ta+176|0;q=ta+172|0;M=ta+168|0;v=ta+164|0;L=ta+160|0;s=ta+156|0;u=ta+152|0;r=ta+148|0;t=ta+144|0;fa=ta+140|0;C=ta+136|0;ka=ta+132|0;D=ta+128|0;ca=ta+124|0;ea=ta+120|0;ba=ta+116|0;da=ta+112|0;ha=ta+108|0;ja=ta+104|0;ga=ta+100|0;ia=ta+96|0;W=ta+92|0;z=ta+88|0;$=ta+84|0;A=ta+80|0;y=ta+76|0;V=ta+72|0;x=ta+68|0;U=ta+64|0;Y=ta+60|0;_=ta+56|0;X=ta+52|0;Z=ta+48|0;F=ta+44|0;ma=ta+40|0;ra=ta+36|0;Q=ta+32|0;S=ta+28|0;T=ta+24|0;J=ta+20|0;qa=ta+16|0;G=ta+12|0;P=ta+8|0;K=ta+4|0;O=ta;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[ua>>2]=f;c[o>>2]=h;c[p>>2]=j;g[ta+232>>2]=.5;g[ta+228>>2]=.8660253882408142;c[sa>>2]=c[ua>>2];c[m>>2]=(c[m>>2]|0)+((c[ua>>2]|0)*10<<2);while(1){if((c[sa>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[M>>2]=+g[c[l>>2]>>2];g[s>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[u>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[r>>2]=+g[(c[m>>2]|0)+16>>2];g[t>>2]=+g[(c[m>>2]|0)+20>>2];g[v>>2]=+g[r>>2]*+g[s>>2]+ +g[t>>2]*+g[u>>2];g[L>>2]=+g[r>>2]*+g[u>>2]-+g[t>>2]*+g[s>>2];g[w>>2]=+g[q>>2]-+g[v>>2];g[R>>2]=+g[M>>2]-+g[L>>2];g[na>>2]=+g[q>>2]+ +g[v>>2];g[N>>2]=+g[L>>2]+ +g[M>>2];g[ca>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ea>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ba>>2]=+g[(c[m>>2]|0)+24>>2];g[da>>2]=+g[(c[m>>2]|0)+28>>2];g[fa>>2]=+g[ba>>2]*+g[ca>>2]+ +g[da>>2]*+g[ea>>2];g[C>>2]=+g[ba>>2]*+g[ea>>2]-+g[da>>2]*+g[ca>>2];g[ha>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[ja>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[ga>>2]=+g[c[m>>2]>>2];g[ia>>2]=+g[(c[m>>2]|0)+4>>2];g[ka>>2]=+g[ga>>2]*+g[ha>>2]+ +g[ia>>2]*+g[ja>>2];g[D>>2]=+g[ga>>2]*+g[ja>>2]-+g[ia>>2]*+g[ha>>2];g[la>>2]=+g[fa>>2]-+g[ka>>2];g[I>>2]=+g[C>>2]+ +g[D>>2];g[pa>>2]=+g[fa>>2]+ +g[ka>>2];g[E>>2]=+g[C>>2]-+g[D>>2];g[y>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[V>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[x>>2]=+g[(c[m>>2]|0)+8>>2];g[U>>2]=+g[(c[m>>2]|0)+12>>2];g[W>>2]=+g[x>>2]*+g[y>>2]+ +g[U>>2]*+g[V>>2];g[z>>2]=+g[x>>2]*+g[V>>2]-+g[U>>2]*+g[y>>2];g[Y>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[_>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[X>>2]=+g[(c[m>>2]|0)+32>>2];g[Z>>2]=+g[(c[m>>2]|0)+36>>2];g[$>>2]=+g[X>>2]*+g[Y>>2]+ +g[Z>>2]*+g[_>>2];g[A>>2]=+g[X>>2]*+g[_>>2]-+g[Z>>2]*+g[Y>>2];g[aa>>2]=+g[W>>2]-+g[$>>2];g[H>>2]=+g[z>>2]+ +g[A>>2];g[oa>>2]=+g[W>>2]+ +g[$>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[F>>2]=(+g[B>>2]-+g[E>>2])*.8660253882408142;g[ma>>2]=+g[aa>>2]+ +g[la>>2];g[ra>>2]=+g[w>>2]-+g[ma>>2]*.5;g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[w>>2]+ +g[ma>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[ra>>2]+ +g[F>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[ra>>2]-+g[F>>2];g[Q>>2]=(+g[la>>2]-+g[aa>>2])*.8660253882408142;g[S>>2]=+g[B>>2]+ +g[E>>2];g[T>>2]=+g[R>>2]-+g[S>>2]*.5;g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[Q>>2]+ +g[T>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[S>>2]+ +g[R>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[T>>2]-+g[Q>>2];g[J>>2]=(+g[H>>2]-+g[I>>2])*.8660253882408142;g[qa>>2]=+g[oa>>2]+ +g[pa>>2];g[G>>2]=+g[na>>2]-+g[qa>>2]*.5;g[c[k>>2]>>2]=+g[na>>2]+ +g[qa>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[G>>2]+ +g[J>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[G>>2]-+g[J>>2];g[P>>2]=(+g[pa>>2]-+g[oa>>2])*.8660253882408142;g[K>>2]=+g[H>>2]+ +g[I>>2];g[O>>2]=+g[N>>2]-+g[K>>2]*.5;g[c[l>>2]>>2]=+g[K>>2]+ +g[N>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[P>>2]+ +g[O>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[O>>2]-+g[P>>2];c[sa>>2]=(c[sa>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+40}i=ta;return}function zj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,14,2440);i=b;return}function Aj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0;za=i;i=i+304|0;k=za+300|0;l=za+296|0;m=za+292|0;n=za+288|0;Aa=za+284|0;o=za+280|0;p=za+276|0;ya=za+248|0;q=za+244|0;R=za+240|0;aa=za+236|0;S=za+232|0;C=za+228|0;O=za+224|0;la=za+220|0;T=za+216|0;I=za+212|0;P=za+208|0;wa=za+204|0;U=za+200|0;F=za+196|0;Q=za+192|0;w=za+188|0;A=za+184|0;$=za+180|0;B=za+176|0;t=za+172|0;v=za+168|0;s=za+164|0;u=za+160|0;y=za+156|0;_=za+152|0;x=za+148|0;z=za+144|0;fa=za+140|0;G=za+136|0;ka=za+132|0;H=za+128|0;ca=za+124|0;ea=za+120|0;ba=za+116|0;da=za+112|0;ha=za+108|0;ja=za+104|0;ga=za+100|0;ia=za+96|0;qa=za+92|0;D=za+88|0;va=za+84|0;E=za+80|0;na=za+76|0;pa=za+72|0;ma=za+68|0;oa=za+64|0;sa=za+60|0;ua=za+56|0;ra=za+52|0;ta=za+48|0;J=za+44|0;xa=za+40|0;X=za+36|0;Y=za+32|0;L=za+28|0;K=za+24|0;V=za+20|0;W=za+16|0;N=za+12|0;M=za+8|0;Z=za+4|0;r=za;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Aa>>2]=f;c[o>>2]=h;c[p>>2]=j;g[za+272>>2]=.22252093255519867;g[za+268>>2]=.9009688496589661;g[za+264>>2]=.6234897971153259;g[za+260>>2]=.4338837265968323;g[za+256>>2]=.7818315029144287;g[za+252>>2]=.9749279022216797;c[ya>>2]=c[Aa>>2];c[m>>2]=(c[m>>2]|0)+((c[Aa>>2]|0)*12<<2);while(1){if((c[ya>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[R>>2]=+g[c[l>>2]>>2];g[t>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[v>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[s>>2]=+g[c[m>>2]>>2];g[u>>2]=+g[(c[m>>2]|0)+4>>2];g[w>>2]=+g[s>>2]*+g[t>>2]+ +g[u>>2]*+g[v>>2];g[A>>2]=+g[s>>2]*+g[v>>2]-+g[u>>2]*+g[t>>2];g[y>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[_>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[x>>2]=+g[(c[m>>2]|0)+40>>2];g[z>>2]=+g[(c[m>>2]|0)+44>>2];g[$>>2]=+g[x>>2]*+g[y>>2]+ +g[z>>2]*+g[_>>2];g[B>>2]=+g[x>>2]*+g[_>>2]-+g[z>>2]*+g[y>>2];g[aa>>2]=+g[w>>2]+ +g[$>>2];g[S>>2]=+g[$>>2]-+g[w>>2];g[C>>2]=+g[A>>2]-+g[B>>2];g[O>>2]=+g[A>>2]+ +g[B>>2];g[ca>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[ea>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[ba>>2]=+g[(c[m>>2]|0)+8>>2];g[da>>2]=+g[(c[m>>2]|0)+12>>2];g[fa>>2]=+g[ba>>2]*+g[ca>>2]+ +g[da>>2]*+g[ea>>2];g[G>>2]=+g[ba>>2]*+g[ea>>2]-+g[da>>2]*+g[ca>>2];g[ha>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[ja>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[ga>>2]=+g[(c[m>>2]|0)+32>>2];g[ia>>2]=+g[(c[m>>2]|0)+36>>2];g[ka>>2]=+g[ga>>2]*+g[ha>>2]+ +g[ia>>2]*+g[ja>>2];g[H>>2]=+g[ga>>2]*+g[ja>>2]-+g[ia>>2]*+g[ha>>2];g[la>>2]=+g[fa>>2]+ +g[ka>>2];g[T>>2]=+g[ka>>2]-+g[fa>>2];g[I>>2]=+g[G>>2]-+g[H>>2];g[P>>2]=+g[G>>2]+ +g[H>>2];g[na>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[pa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ma>>2]=+g[(c[m>>2]|0)+16>>2];g[oa>>2]=+g[(c[m>>2]|0)+20>>2];g[qa>>2]=+g[ma>>2]*+g[na>>2]+ +g[oa>>2]*+g[pa>>2];g[D>>2]=+g[ma>>2]*+g[pa>>2]-+g[oa>>2]*+g[na>>2];g[sa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ua>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ra>>2]=+g[(c[m>>2]|0)+24>>2];g[ta>>2]=+g[(c[m>>2]|0)+28>>2];g[va>>2]=+g[ra>>2]*+g[sa>>2]+ +g[ta>>2]*+g[ua>>2];g[E>>2]=+g[ra>>2]*+g[ua>>2]-+g[ta>>2]*+g[sa>>2];g[wa>>2]=+g[qa>>2]+ +g[va>>2];g[U>>2]=+g[va>>2]-+g[qa>>2];g[F>>2]=+g[D>>2]-+g[E>>2];g[Q>>2]=+g[D>>2]+ +g[E>>2];g[c[k>>2]>>2]=+g[q>>2]+ +g[aa>>2]+ +g[la>>2]+ +g[wa>>2];g[c[l>>2]>>2]=+g[O>>2]+ +g[P>>2]+ +g[Q>>2]+ +g[R>>2];g[J>>2]=+g[C>>2]*.9749279022216797-+g[F>>2]*.7818315029144287-+g[I>>2]*.4338837265968323;g[xa>>2]=+g[wa>>2]*.6234897971153259+ +g[q>>2]+-(+g[la>>2]*.9009688496589661+ +g[aa>>2]*.22252093255519867);g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[xa>>2]-+g[J>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[xa>>2]+ +g[J>>2];g[X>>2]=+g[S>>2]*.9749279022216797-+g[U>>2]*.7818315029144287-+g[T>>2]*.4338837265968323;g[Y>>2]=+g[Q>>2]*.6234897971153259+ +g[R>>2]+-(+g[P>>2]*.9009688496589661+ +g[O>>2]*.22252093255519867);g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[X>>2]+ +g[Y>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Y>>2]-+g[X>>2];g[L>>2]=+g[C>>2]*.7818315029144287+ +g[I>>2]*.9749279022216797+ +g[F>>2]*.4338837265968323;g[K>>2]=+g[aa>>2]*.6234897971153259+ +g[q>>2]+-(+g[wa>>2]*.9009688496589661+ +g[la>>2]*.22252093255519867);g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[K>>2]-+g[L>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[K>>2]+ +g[L>>2];g[V>>2]=+g[S>>2]*.7818315029144287+ +g[T>>2]*.9749279022216797+ +g[U>>2]*.4338837265968323;g[W>>2]=+g[O>>2]*.6234897971153259+ +g[R>>2]+-(+g[Q>>2]*.9009688496589661+ +g[P>>2]*.22252093255519867);g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[V>>2]+ +g[W>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[W>>2]-+g[V>>2];g[N>>2]=+g[C>>2]*.4338837265968323+ +g[F>>2]*.9749279022216797-+g[I>>2]*.7818315029144287;g[M>>2]=+g[la>>2]*.6234897971153259+ +g[q>>2]+-(+g[wa>>2]*.22252093255519867+ +g[aa>>2]*.9009688496589661);g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[M>>2]-+g[N>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[M>>2]+ +g[N>>2];g[Z>>2]=+g[S>>2]*.4338837265968323+ +g[U>>2]*.9749279022216797-+g[T>>2]*.7818315029144287;g[r>>2]=+g[P>>2]*.6234897971153259+ +g[R>>2]+-(+g[Q>>2]*.22252093255519867+ +g[O>>2]*.9009688496589661);g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Z>>2]+ +g[r>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[r>>2]-+g[Z>>2];c[ya>>2]=(c[ya>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+48}i=za;return}function Bj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,15,2504);i=b;return}function Cj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0;Ra=i;i=i+368|0;k=Ra+352|0;l=Ra+348|0;m=Ra+344|0;n=Ra+340|0;Sa=Ra+336|0;o=Ra+332|0;p=Ra+328|0;Qa=Ra+320|0;P=Ra+316|0;F=Ra+312|0;Z=Ra+308|0;A=Ra+304|0;X=Ra+300|0;u=Ra+296|0;ha=Ra+292|0;ka=Ra+288|0;ya=Ra+284|0;G=Ra+280|0;aa=Ra+276|0;x=Ra+272|0;Ka=Ra+268|0;t=Ra+264|0;ca=Ra+260|0;fa=Ra+256|0;q=Ra+252|0;z=Ra+248|0;O=Ra+244|0;y=Ra+240|0;L=Ra+236|0;N=Ra+232|0;K=Ra+228|0;M=Ra+224|0;Pa=Ra+220|0;ia=Ra+216|0;W=Ra+212|0;ja=Ra+208|0;Ma=Ra+204|0;Oa=Ra+200|0;La=Ra+196|0;Na=Ra+192|0;T=Ra+188|0;V=Ra+184|0;S=Ra+180|0;U=Ra+176|0;sa=Ra+172|0;_=Ra+168|0;xa=Ra+164|0;$=Ra+160|0;R=Ra+156|0;ra=Ra+152|0;Q=Ra+148|0;qa=Ra+144|0;ua=Ra+140|0;wa=Ra+136|0;ta=Ra+132|0;va=Ra+128|0;Ea=Ra+124|0;da=Ra+120|0;Ja=Ra+116|0;ea=Ra+112|0;Ba=Ra+108|0;Da=Ra+104|0;Aa=Ra+100|0;Ca=Ra+96|0;Ga=Ra+92|0;Ia=Ra+88|0;Fa=Ra+84|0;Ha=Ra+80|0;za=Ra+76|0;Y=Ra+72|0;C=Ra+68|0;D=Ra+64|0;w=Ra+60|0;B=Ra+56|0;s=Ra+52|0;v=Ra+48|0;na=Ra+44|0;H=Ra+40|0;r=Ra+36|0;E=Ra+32|0;oa=Ra+28|0;pa=Ra+24|0;ba=Ra+20|0;J=Ra+16|0;ma=Ra+12|0;I=Ra+8|0;ga=Ra+4|0;la=Ra;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Sa>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Ra+324>>2]=.7071067690849304;c[Qa>>2]=c[Sa>>2];c[m>>2]=(c[m>>2]|0)+((c[Sa>>2]|0)*14<<2);while(1){if((c[Qa>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[z>>2]=+g[c[l>>2]>>2];g[L>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[N>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[K>>2]=+g[(c[m>>2]|0)+24>>2];g[M>>2]=+g[(c[m>>2]|0)+28>>2];g[O>>2]=+g[K>>2]*+g[L>>2]+ +g[M>>2]*+g[N>>2];g[y>>2]=+g[K>>2]*+g[N>>2]-+g[M>>2]*+g[L>>2];g[P>>2]=+g[q>>2]+ +g[O>>2];g[F>>2]=+g[z>>2]-+g[y>>2];g[Z>>2]=+g[q>>2]-+g[O>>2];g[A>>2]=+g[y>>2]+ +g[z>>2];g[Ma>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Oa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[La>>2]=+g[(c[m>>2]|0)+48>>2];g[Na>>2]=+g[(c[m>>2]|0)+52>>2];g[Pa>>2]=+g[La>>2]*+g[Ma>>2]+ +g[Na>>2]*+g[Oa>>2];g[ia>>2]=+g[La>>2]*+g[Oa>>2]-+g[Na>>2]*+g[Ma>>2];g[T>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[V>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[S>>2]=+g[(c[m>>2]|0)+16>>2];g[U>>2]=+g[(c[m>>2]|0)+20>>2];g[W>>2]=+g[S>>2]*+g[T>>2]+ +g[U>>2]*+g[V>>2];g[ja>>2]=+g[S>>2]*+g[V>>2]-+g[U>>2]*+g[T>>2];g[X>>2]=+g[Pa>>2]+ +g[W>>2];g[u>>2]=+g[ia>>2]+ +g[ja>>2];g[ha>>2]=+g[Pa>>2]-+g[W>>2];g[ka>>2]=+g[ia>>2]-+g[ja>>2];g[R>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[ra>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Q>>2]=+g[(c[m>>2]|0)+8>>2];g[qa>>2]=+g[(c[m>>2]|0)+12>>2];g[sa>>2]=+g[Q>>2]*+g[R>>2]+ +g[qa>>2]*+g[ra>>2];g[_>>2]=+g[Q>>2]*+g[ra>>2]-+g[qa>>2]*+g[R>>2];g[ua>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[wa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[ta>>2]=+g[(c[m>>2]|0)+40>>2];g[va>>2]=+g[(c[m>>2]|0)+44>>2];g[xa>>2]=+g[ta>>2]*+g[ua>>2]+ +g[va>>2]*+g[wa>>2];g[$>>2]=+g[ta>>2]*+g[wa>>2]-+g[va>>2]*+g[ua>>2];g[ya>>2]=+g[sa>>2]+ +g[xa>>2];g[G>>2]=+g[sa>>2]-+g[xa>>2];g[aa>>2]=+g[_>>2]-+g[$>>2];g[x>>2]=+g[_>>2]+ +g[$>>2];g[Ba>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[Da>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Aa>>2]=+g[c[m>>2]>>2];g[Ca>>2]=+g[(c[m>>2]|0)+4>>2];g[Ea>>2]=+g[Aa>>2]*+g[Ba>>2]+ +g[Ca>>2]*+g[Da>>2];g[da>>2]=+g[Aa>>2]*+g[Da>>2]-+g[Ca>>2]*+g[Ba>>2];g[Ga>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Ia>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Fa>>2]=+g[(c[m>>2]|0)+32>>2];g[Ha>>2]=+g[(c[m>>2]|0)+36>>2];g[Ja>>2]=+g[Fa>>2]*+g[Ga>>2]+ +g[Ha>>2]*+g[Ia>>2];g[ea>>2]=+g[Fa>>2]*+g[Ia>>2]-+g[Ha>>2]*+g[Ga>>2];g[Ka>>2]=+g[Ea>>2]+ +g[Ja>>2];g[t>>2]=+g[da>>2]+ +g[ea>>2];g[ca>>2]=+g[Ea>>2]-+g[Ja>>2];g[fa>>2]=+g[da>>2]-+g[ea>>2];g[za>>2]=+g[P>>2]+ +g[ya>>2];g[Y>>2]=+g[Ka>>2]+ +g[X>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[za>>2]-+g[Y>>2];g[c[k>>2]>>2]=+g[za>>2]+ +g[Y>>2];g[w>>2]=+g[t>>2]+ +g[u>>2];g[B>>2]=+g[x>>2]+ +g[A>>2];g[c[l>>2]>>2]=+g[w>>2]+ +g[B>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[B>>2]-+g[w>>2];g[s>>2]=+g[P>>2]-+g[ya>>2];g[v>>2]=+g[t>>2]-+g[u>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[s>>2]-+g[v>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[s>>2]+ +g[v>>2];g[C>>2]=+g[X>>2]-+g[Ka>>2];g[D>>2]=+g[A>>2]-+g[x>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[C>>2]+ +g[D>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[D>>2]-+g[C>>2];g[na>>2]=+g[Z>>2]-+g[aa>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[oa>>2]=+g[fa>>2]-+g[ca>>2];g[pa>>2]=+g[ha>>2]+ +g[ka>>2];g[r>>2]=(+g[oa>>2]-+g[pa>>2])*.7071067690849304;g[E>>2]=(+g[oa>>2]+ +g[pa>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[na>>2]-+g[r>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[H>>2]-+g[E>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[na>>2]+ +g[r>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[E>>2]+ +g[H>>2];g[ba>>2]=+g[Z>>2]+ +g[aa>>2];g[J>>2]=+g[G>>2]+ +g[F>>2];g[ga>>2]=+g[ca>>2]+ +g[fa>>2];g[la>>2]=+g[ha>>2]-+g[ka>>2];g[ma>>2]=(+g[ga>>2]+ +g[la>>2])*.7071067690849304;g[I>>2]=(+g[la>>2]-+g[ga>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[ba>>2]-+g[ma>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[J>>2]-+g[I>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[ba>>2]+ +g[ma>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[I>>2]+ +g[J>>2];c[Qa>>2]=(c[Qa>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+56}i=Ra;return}function Dj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,16,2568);i=b;return}function Ej(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0;vb=i;i=i+512|0;k=vb+508|0;l=vb+504|0;m=vb+500|0;n=vb+496|0;wb=vb+492|0;o=vb+488|0;p=vb+484|0;ub=vb+448|0;q=vb+444|0;C=vb+440|0;Ma=vb+436|0;H=vb+432|0;Ya=vb+428|0;Ja=vb+424|0;B=vb+420|0;I=vb+416|0;Ha=vb+412|0;la=vb+408|0;y=vb+404|0;ca=vb+400|0;S=vb+396|0;ba=vb+392|0;ob=vb+388|0;ka=vb+384|0;Sa=vb+380|0;_=vb+376|0;s=vb+372|0;$=vb+368|0;sa=vb+364|0;Ka=vb+360|0;Xa=vb+356|0;La=vb+352|0;pa=vb+348|0;ra=vb+344|0;oa=vb+340|0;qa=vb+336|0;ua=vb+332|0;Wa=vb+328|0;ta=vb+324|0;va=vb+320|0;tb=vb+316|0;A=vb+312|0;Aa=vb+308|0;v=vb+304|0;Fa=vb+300|0;w=vb+296|0;Ga=vb+292|0;Q=vb+288|0;qb=vb+284|0;sb=vb+280|0;pb=vb+276|0;rb=vb+272|0;xa=vb+268|0;za=vb+264|0;wa=vb+260|0;ya=vb+256|0;Ca=vb+252|0;Ea=vb+248|0;Ba=vb+244|0;Da=vb+240|0;u=vb+236|0;x=vb+232|0;z=vb+228|0;R=vb+224|0;cb=vb+220|0;Ua=vb+216|0;hb=vb+212|0;Pa=vb+208|0;mb=vb+204|0;Qa=vb+200|0;nb=vb+196|0;Va=vb+192|0;$a=vb+188|0;bb=vb+184|0;_a=vb+180|0;ab=vb+176|0;eb=vb+172|0;gb=vb+168|0;db=vb+164|0;fb=vb+160|0;jb=vb+156|0;lb=vb+152|0;ib=vb+148|0;kb=vb+144|0;Oa=vb+140|0;Ra=vb+136|0;Ta=vb+132|0;r=vb+128|0;ma=vb+124|0;Za=vb+120|0;Ia=vb+116|0;ja=vb+112|0;E=vb+108|0;na=vb+104|0;D=vb+100|0;F=vb+96|0;Na=vb+92|0;J=vb+88|0;U=vb+84|0;K=vb+80|0;Y=vb+76|0;G=vb+72|0;V=vb+68|0;L=vb+64|0;t=vb+60|0;T=vb+56|0;W=vb+52|0;X=vb+48|0;Z=vb+44|0;N=vb+40|0;ea=vb+36|0;O=vb+32|0;ia=vb+28|0;M=vb+24|0;fa=vb+20|0;P=vb+16|0;aa=vb+12|0;da=vb+8|0;ga=vb+4|0;ha=vb;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[wb>>2]=f;c[o>>2]=h;c[p>>2]=j;g[vb+480>>2]=.9396926164627075;g[vb+476>>2]=.3420201539993286;g[vb+472>>2]=.9848077297210693;g[vb+468>>2]=.1736481785774231;g[vb+464>>2]=.6427876353263855;g[vb+460>>2]=.7660444378852844;g[vb+456>>2]=.5;g[vb+452>>2]=.8660253882408142;c[ub>>2]=c[wb>>2];c[m>>2]=(c[m>>2]|0)+(c[wb>>2]<<4<<2);while(1){if((c[ub>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[C>>2]=+g[c[l>>2]>>2];g[pa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ra>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[oa>>2]=+g[(c[m>>2]|0)+16>>2];g[qa>>2]=+g[(c[m>>2]|0)+20>>2];g[sa>>2]=+g[oa>>2]*+g[pa>>2]+ +g[qa>>2]*+g[ra>>2];g[Ka>>2]=+g[oa>>2]*+g[ra>>2]-+g[qa>>2]*+g[pa>>2];g[ua>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Wa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[ta>>2]=+g[(c[m>>2]|0)+40>>2];g[va>>2]=+g[(c[m>>2]|0)+44>>2];g[Xa>>2]=+g[ta>>2]*+g[ua>>2]+ +g[va>>2]*+g[Wa>>2];g[La>>2]=+g[ta>>2]*+g[Wa>>2]-+g[va>>2]*+g[ua>>2];g[Ma>>2]=(+g[Ka>>2]-+g[La>>2])*.8660253882408142;g[H>>2]=(+g[Xa>>2]-+g[sa>>2])*.8660253882408142;g[Ya>>2]=+g[sa>>2]+ +g[Xa>>2];g[Ja>>2]=+g[q>>2]-+g[Ya>>2]*.5;g[B>>2]=+g[Ka>>2]+ +g[La>>2];g[I>>2]=+g[C>>2]-+g[B>>2]*.5;g[qb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[sb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[pb>>2]=+g[(c[m>>2]|0)+8>>2];g[rb>>2]=+g[(c[m>>2]|0)+12>>2];g[tb>>2]=+g[pb>>2]*+g[qb>>2]+ +g[rb>>2]*+g[sb>>2];g[A>>2]=+g[pb>>2]*+g[sb>>2]-+g[rb>>2]*+g[qb>>2];g[xa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[za>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[wa>>2]=+g[(c[m>>2]|0)+32>>2];g[ya>>2]=+g[(c[m>>2]|0)+36>>2];g[Aa>>2]=+g[wa>>2]*+g[xa>>2]+ +g[ya>>2]*+g[za>>2];g[v>>2]=+g[wa>>2]*+g[za>>2]-+g[ya>>2]*+g[xa>>2];g[Ca>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Ea>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Ba>>2]=+g[(c[m>>2]|0)+56>>2];g[Da>>2]=+g[(c[m>>2]|0)+60>>2];g[Fa>>2]=+g[Ba>>2]*+g[Ca>>2]+ +g[Da>>2]*+g[Ea>>2];g[w>>2]=+g[Ba>>2]*+g[Ea>>2]-+g[Da>>2]*+g[Ca>>2];g[Ga>>2]=+g[Aa>>2]+ +g[Fa>>2];g[Q>>2]=+g[v>>2]+ +g[w>>2];g[Ha>>2]=+g[tb>>2]+ +g[Ga>>2];g[la>>2]=+g[A>>2]+ +g[Q>>2];g[u>>2]=+g[tb>>2]-+g[Ga>>2]*.5;g[x>>2]=(+g[v>>2]-+g[w>>2])*.8660253882408142;g[y>>2]=+g[u>>2]+ +g[x>>2];g[ca>>2]=+g[u>>2]-+g[x>>2];g[z>>2]=(+g[Fa>>2]-+g[Aa>>2])*.8660253882408142;g[R>>2]=+g[A>>2]-+g[Q>>2]*.5;g[S>>2]=+g[z>>2]+ +g[R>>2];g[ba>>2]=+g[R>>2]-+g[z>>2];g[$a>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[bb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[_a>>2]=+g[c[m>>2]>>2];g[ab>>2]=+g[(c[m>>2]|0)+4>>2];g[cb>>2]=+g[_a>>2]*+g[$a>>2]+ +g[ab>>2]*+g[bb>>2];g[Ua>>2]=+g[_a>>2]*+g[bb>>2]-+g[ab>>2]*+g[$a>>2];g[eb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[gb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[db>>2]=+g[(c[m>>2]|0)+24>>2];g[fb>>2]=+g[(c[m>>2]|0)+28>>2];g[hb>>2]=+g[db>>2]*+g[eb>>2]+ +g[fb>>2]*+g[gb>>2];g[Pa>>2]=+g[db>>2]*+g[gb>>2]-+g[fb>>2]*+g[eb>>2];g[jb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[lb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[ib>>2]=+g[(c[m>>2]|0)+48>>2];g[kb>>2]=+g[(c[m>>2]|0)+52>>2];g[mb>>2]=+g[ib>>2]*+g[jb>>2]+ +g[kb>>2]*+g[lb>>2];g[Qa>>2]=+g[ib>>2]*+g[lb>>2]-+g[kb>>2]*+g[jb>>2];g[nb>>2]=+g[hb>>2]+ +g[mb>>2];g[Va>>2]=+g[Pa>>2]+ +g[Qa>>2];g[ob>>2]=+g[cb>>2]+ +g[nb>>2];g[ka>>2]=+g[Ua>>2]+ +g[Va>>2];g[Oa>>2]=+g[cb>>2]-+g[nb>>2]*.5;g[Ra>>2]=(+g[Pa>>2]-+g[Qa>>2])*.8660253882408142;g[Sa>>2]=+g[Oa>>2]+ +g[Ra>>2];g[_>>2]=+g[Oa>>2]-+g[Ra>>2];g[Ta>>2]=(+g[mb>>2]-+g[hb>>2])*.8660253882408142;g[r>>2]=+g[Ua>>2]-+g[Va>>2]*.5;g[s>>2]=+g[Ta>>2]+ +g[r>>2];g[$>>2]=+g[r>>2]-+g[Ta>>2];g[ma>>2]=(+g[ka>>2]-+g[la>>2])*.8660253882408142;g[Za>>2]=+g[q>>2]+ +g[Ya>>2];g[Ia>>2]=+g[ob>>2]+ +g[Ha>>2];g[ja>>2]=+g[Za>>2]-+g[Ia>>2]*.5;g[c[k>>2]>>2]=+g[Za>>2]+ +g[Ia>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ja>>2]+ +g[ma>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[ja>>2]-+g[ma>>2];g[E>>2]=(+g[Ha>>2]-+g[ob>>2])*.8660253882408142;g[na>>2]=+g[ka>>2]+ +g[la>>2];g[D>>2]=+g[B>>2]+ +g[C>>2];g[F>>2]=+g[D>>2]-+g[na>>2]*.5;g[c[l>>2]>>2]=+g[na>>2]+ +g[D>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[F>>2]-+g[E>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[E>>2]+ +g[F>>2];g[Na>>2]=+g[Ja>>2]+ +g[Ma>>2];g[J>>2]=+g[H>>2]+ +g[I>>2];g[t>>2]=+g[Sa>>2]*.7660444378852844+ +g[s>>2]*.6427876353263855;g[T>>2]=+g[y>>2]*.1736481785774231+ +g[S>>2]*.9848077297210693;g[U>>2]=+g[t>>2]+ +g[T>>2];g[K>>2]=(+g[T>>2]-+g[t>>2])*.8660253882408142;g[W>>2]=+g[s>>2]*.7660444378852844-+g[Sa>>2]*.6427876353263855;g[X>>2]=+g[S>>2]*.1736481785774231-+g[y>>2]*.9848077297210693;g[Y>>2]=(+g[W>>2]-+g[X>>2])*.8660253882408142;g[G>>2]=+g[W>>2]+ +g[X>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[Na>>2]+ +g[U>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[G>>2]+ +g[J>>2];g[V>>2]=+g[Na>>2]-+g[U>>2]*.5;g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[V>>2]-+g[Y>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[V>>2]+ +g[Y>>2];g[L>>2]=+g[J>>2]-+g[G>>2]*.5;g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[K>>2]+ +g[L>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[L>>2]-+g[K>>2];g[Z>>2]=+g[Ja>>2]-+g[Ma>>2];g[N>>2]=+g[I>>2]-+g[H>>2];g[aa>>2]=+g[_>>2]*.1736481785774231+ +g[$>>2]*.9848077297210693;g[da>>2]=+g[ba>>2]*.3420201539993286-+g[ca>>2]*.9396926164627075;g[ea>>2]=+g[aa>>2]+ +g[da>>2];g[O>>2]=(+g[da>>2]-+g[aa>>2])*.8660253882408142;g[ga>>2]=+g[$>>2]*.1736481785774231-+g[_>>2]*.9848077297210693;g[ha>>2]=+g[ca>>2]*.3420201539993286+ +g[ba>>2]*.9396926164627075;g[ia>>2]=(+g[ga>>2]+ +g[ha>>2])*.8660253882408142;g[M>>2]=+g[ga>>2]-+g[ha>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Z>>2]+ +g[ea>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[M>>2]+ +g[N>>2];g[fa>>2]=+g[Z>>2]-+g[ea>>2]*.5;g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[fa>>2]-+g[ia>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[fa>>2]+ +g[ia>>2];g[P>>2]=+g[N>>2]-+g[M>>2]*.5;g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[O>>2]+ +g[P>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[P>>2]-+g[O>>2];c[ub>>2]=(c[ub>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+64;c[n>>2]=c[n>>2]^c[2998]}i=vb;return}function Fj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,17,2632);i=b;return}function Gj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0;Hb=i;i=i+544|0;k=Hb+540|0;l=Hb+536|0;m=Hb+532|0;n=Hb+528|0;Ib=Hb+524|0;o=Hb+520|0;p=Hb+516|0;Gb=Hb+496|0;za=Hb+492|0;Da=Hb+488|0;Ba=Hb+484|0;Ea=Hb+480|0;Ga=Hb+476|0;sb=Hb+472|0;ib=Hb+468|0;qb=Hb+464|0;Ha=Hb+460|0;jb=Hb+456|0;kb=Hb+452|0;Ua=Hb+448|0;Wa=Hb+444|0;mb=Hb+440|0;vb=Hb+436|0;Bb=Hb+432|0;Db=Hb+428|0;xb=Hb+424|0;Ca=Hb+420|0;hb=Hb+416|0;Fa=Hb+412|0;gb=Hb+408|0;pb=Hb+404|0;T=Hb+400|0;db=Hb+396|0;H=Hb+392|0;Ta=Hb+388|0;ab=Hb+384|0;bb=Hb+380|0;ra=Hb+376|0;sa=Hb+372|0;D=Hb+368|0;s=Hb+364|0;t=Hb+360|0;u=Hb+356|0;ga=Hb+352|0;ja=Hb+348|0;R=Hb+344|0;Ab=Hb+340|0;La=Hb+336|0;Ma=Hb+332|0;ua=Hb+328|0;va=Hb+324|0;C=Hb+320|0;eb=Hb+316|0;fb=Hb+312|0;r=Hb+308|0;$=Hb+304|0;ca=Hb+300|0;Q=Hb+296|0;q=Hb+292|0;G=Hb+288|0;ob=Hb+284|0;F=Hb+280|0;lb=Hb+276|0;nb=Hb+272|0;Pa=Hb+268|0;ea=Hb+264|0;$a=Hb+260|0;ia=Hb+256|0;Sa=Hb+252|0;fa=Hb+248|0;Ya=Hb+244|0;ha=Hb+240|0;Na=Hb+236|0;Oa=Hb+232|0;Za=Hb+228|0;_a=Hb+224|0;Qa=Hb+220|0;Ra=Hb+216|0;Va=Hb+212|0;Xa=Hb+208|0;ub=Hb+204|0;z=Hb+200|0;Ka=Hb+196|0;ba=Hb+192|0;zb=Hb+188|0;A=Hb+184|0;Fb=Hb+180|0;aa=Hb+176|0;rb=Hb+172|0;tb=Hb+168|0;Ia=Hb+164|0;Ja=Hb+160|0;wb=Hb+156|0;yb=Hb+152|0;Cb=Hb+148|0;Eb=Hb+144|0;w=Hb+140|0;cb=Hb+136|0;x=Hb+132|0;la=Hb+128|0;na=Hb+124|0;da=Hb+120|0;ka=Hb+116|0;ma=Hb+112|0;y=Hb+108|0;S=Hb+104|0;U=Hb+100|0;V=Hb+96|0;Z=Hb+92|0;Aa=Hb+88|0;X=Hb+84|0;Y=Hb+80|0;_=Hb+76|0;W=Hb+72|0;pa=Hb+68|0;v=Hb+64|0;oa=Hb+60|0;xa=Hb+56|0;B=Hb+52|0;ta=Hb+48|0;wa=Hb+44|0;ya=Hb+40|0;qa=Hb+36|0;M=Hb+32|0;E=Hb+28|0;L=Hb+24|0;K=Hb+20|0;O=Hb+16|0;I=Hb+12|0;J=Hb+8|0;P=Hb+4|0;N=Hb;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Ib>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Hb+512>>2]=.5877852439880371;g[Hb+508>>2]=.9510565400123596;g[Hb+504>>2]=.25;g[Hb+500>>2]=.55901700258255;c[Gb>>2]=c[Ib>>2];c[m>>2]=(c[m>>2]|0)+((c[Ib>>2]|0)*6<<2);while(1){if((c[Gb>>2]|0)>=(c[o>>2]|0))break;g[za>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[m>>2]|0)+4>>2];g[Ba>>2]=+g[(c[m>>2]|0)+8>>2];g[Ea>>2]=+g[(c[m>>2]|0)+12>>2];g[Ca>>2]=+g[za>>2]*+g[Ba>>2];g[hb>>2]=+g[Da>>2]*+g[Ba>>2];g[Fa>>2]=+g[Da>>2]*+g[Ea>>2];g[gb>>2]=+g[za>>2]*+g[Ea>>2];g[Ga>>2]=+g[Ca>>2]-+g[Fa>>2];g[sb>>2]=+g[gb>>2]-+g[hb>>2];g[ib>>2]=+g[gb>>2]+ +g[hb>>2];g[qb>>2]=+g[Ca>>2]+ +g[Fa>>2];g[Ha>>2]=+g[(c[m>>2]|0)+16>>2];g[jb>>2]=+g[(c[m>>2]|0)+20>>2];g[kb>>2]=+g[Ga>>2]*+g[Ha>>2]+ +g[ib>>2]*+g[jb>>2];g[Ua>>2]=+g[Ba>>2]*+g[Ha>>2]+ +g[Ea>>2]*+g[jb>>2];g[Wa>>2]=+g[Ba>>2]*+g[jb>>2]-+g[Ea>>2]*+g[Ha>>2];g[mb>>2]=+g[Ga>>2]*+g[jb>>2]-+g[ib>>2]*+g[Ha>>2];g[vb>>2]=+g[qb>>2]*+g[Ha>>2]+ +g[sb>>2]*+g[jb>>2];g[Bb>>2]=+g[za>>2]*+g[Ha>>2]+ +g[Da>>2]*+g[jb>>2];g[Db>>2]=+g[za>>2]*+g[jb>>2]-+g[Da>>2]*+g[Ha>>2];g[xb>>2]=+g[qb>>2]*+g[jb>>2]-+g[sb>>2]*+g[Ha>>2];g[q>>2]=+g[c[k>>2]>>2];g[G>>2]=+g[c[l>>2]>>2];g[lb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[nb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[ob>>2]=+g[kb>>2]*+g[lb>>2]+ +g[mb>>2]*+g[nb>>2];g[F>>2]=+g[kb>>2]*+g[nb>>2]-+g[mb>>2]*+g[lb>>2];g[pb>>2]=+g[q>>2]-+g[ob>>2];g[T>>2]=+g[G>>2]-+g[F>>2];g[db>>2]=+g[q>>2]+ +g[ob>>2];g[H>>2]=+g[F>>2]+ +g[G>>2];g[Na>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Oa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Pa>>2]=+g[Ga>>2]*+g[Na>>2]+ +g[ib>>2]*+g[Oa>>2];g[ea>>2]=+g[Ga>>2]*+g[Oa>>2]-+g[ib>>2]*+g[Na>>2];g[Za>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[_a>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[$a>>2]=+g[za>>2]*+g[Za>>2]+ +g[Da>>2]*+g[_a>>2];g[ia>>2]=+g[za>>2]*+g[_a>>2]-+g[Da>>2]*+g[Za>>2];g[Qa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Ra>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Sa>>2]=+g[Ha>>2]*+g[Qa>>2]+ +g[jb>>2]*+g[Ra>>2];g[fa>>2]=+g[Ha>>2]*+g[Ra>>2]-+g[jb>>2]*+g[Qa>>2];g[Va>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Xa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Ya>>2]=+g[Ua>>2]*+g[Va>>2]+ +g[Wa>>2]*+g[Xa>>2];g[ha>>2]=+g[Ua>>2]*+g[Xa>>2]-+g[Wa>>2]*+g[Va>>2];g[Ta>>2]=+g[Pa>>2]-+g[Sa>>2];g[ab>>2]=+g[Ya>>2]-+g[$a>>2];g[bb>>2]=+g[Ta>>2]+ +g[ab>>2];g[ra>>2]=+g[ea>>2]+ +g[fa>>2];g[sa>>2]=+g[ha>>2]+ +g[ia>>2];g[D>>2]=+g[ra>>2]+ +g[sa>>2];g[s>>2]=+g[Pa>>2]+ +g[Sa>>2];g[t>>2]=+g[Ya>>2]+ +g[$a>>2];g[u>>2]=+g[s>>2]+ +g[t>>2];g[ga>>2]=+g[ea>>2]-+g[fa>>2];g[ja>>2]=+g[ha>>2]-+g[ia>>2];g[R>>2]=+g[ga>>2]+ +g[ja>>2];g[rb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[tb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[ub>>2]=+g[qb>>2]*+g[rb>>2]+ +g[sb>>2]*+g[tb>>2];g[z>>2]=+g[qb>>2]*+g[tb>>2]-+g[sb>>2]*+g[rb>>2];g[Ia>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Ja>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Ka>>2]=+g[Ba>>2]*+g[Ia>>2]+ +g[Ea>>2]*+g[Ja>>2];g[ba>>2]=+g[Ba>>2]*+g[Ja>>2]-+g[Ea>>2]*+g[Ia>>2];g[wb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[yb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[zb>>2]=+g[vb>>2]*+g[wb>>2]+ +g[xb>>2]*+g[yb>>2];g[A>>2]=+g[vb>>2]*+g[yb>>2]-+g[xb>>2]*+g[wb>>2];g[Cb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Eb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Fb>>2]=+g[Bb>>2]*+g[Cb>>2]+ +g[Db>>2]*+g[Eb>>2];g[aa>>2]=+g[Bb>>2]*+g[Eb>>2]-+g[Db>>2]*+g[Cb>>2];g[Ab>>2]=+g[ub>>2]-+g[zb>>2];g[La>>2]=+g[Fb>>2]-+g[Ka>>2];g[Ma>>2]=+g[Ab>>2]+ +g[La>>2];g[ua>>2]=+g[z>>2]+ +g[A>>2];g[va>>2]=+g[aa>>2]+ +g[ba>>2];g[C>>2]=+g[ua>>2]+ +g[va>>2];g[eb>>2]=+g[ub>>2]+ +g[zb>>2];g[fb>>2]=+g[Fb>>2]+ +g[Ka>>2];g[r>>2]=+g[eb>>2]+ +g[fb>>2];g[$>>2]=+g[z>>2]-+g[A>>2];g[ca>>2]=+g[aa>>2]-+g[ba>>2];g[Q>>2]=+g[$>>2]+ +g[ca>>2];g[w>>2]=(+g[Ma>>2]-+g[bb>>2])*.55901700258255;g[cb>>2]=+g[Ma>>2]+ +g[bb>>2];g[x>>2]=+g[pb>>2]-+g[cb>>2]*.25;g[da>>2]=+g[$>>2]-+g[ca>>2];g[ka>>2]=+g[ga>>2]-+g[ja>>2];g[la>>2]=+g[da>>2]*.9510565400123596+ +g[ka>>2]*.5877852439880371;g[na>>2]=+g[ka>>2]*.9510565400123596-+g[da>>2]*.5877852439880371;g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[pb>>2]+ +g[cb>>2];g[ma>>2]=+g[x>>2]-+g[w>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[ma>>2]-+g[na>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ma>>2]+ +g[na>>2];g[y>>2]=+g[w>>2]+ +g[x>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[y>>2]-+g[la>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[y>>2]+ +g[la>>2];g[S>>2]=(+g[Q>>2]-+g[R>>2])*.55901700258255;g[U>>2]=+g[Q>>2]+ +g[R>>2];g[V>>2]=+g[T>>2]-+g[U>>2]*.25;g[X>>2]=+g[Ab>>2]-+g[La>>2];g[Y>>2]=+g[Ta>>2]-+g[ab>>2];g[Z>>2]=+g[X>>2]*.9510565400123596+ +g[Y>>2]*.5877852439880371;g[Aa>>2]=+g[Y>>2]*.9510565400123596-+g[X>>2]*.5877852439880371;g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[U>>2]+ +g[T>>2];g[_>>2]=+g[V>>2]-+g[S>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[_>>2]-+g[Aa>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Aa>>2]+ +g[_>>2];g[W>>2]=+g[S>>2]+ +g[V>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[W>>2]-+g[Z>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Z>>2]+ +g[W>>2];g[pa>>2]=(+g[r>>2]-+g[u>>2])*.55901700258255;g[v>>2]=+g[r>>2]+ +g[u>>2];g[oa>>2]=+g[db>>2]-+g[v>>2]*.25;g[ta>>2]=+g[ra>>2]-+g[sa>>2];g[wa>>2]=+g[ua>>2]-+g[va>>2];g[xa>>2]=+g[ta>>2]*.9510565400123596-+g[wa>>2]*.5877852439880371;g[B>>2]=+g[wa>>2]*.9510565400123596+ +g[ta>>2]*.5877852439880371;g[c[k>>2]>>2]=+g[db>>2]+ +g[v>>2];g[ya>>2]=+g[pa>>2]+ +g[oa>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[ya>>2]-+g[B>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[ya>>2]+ +g[B>>2];g[qa>>2]=+g[oa>>2]-+g[pa>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[qa>>2]-+g[xa>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[qa>>2]+ +g[xa>>2];g[M>>2]=(+g[C>>2]-+g[D>>2])*.55901700258255;g[E>>2]=+g[C>>2]+ +g[D>>2];g[L>>2]=+g[H>>2]-+g[E>>2]*.25;g[I>>2]=+g[s>>2]-+g[t>>2];g[J>>2]=+g[eb>>2]-+g[fb>>2];g[K>>2]=+g[I>>2]*.9510565400123596-+g[J>>2]*.5877852439880371;g[O>>2]=+g[J>>2]*.9510565400123596+ +g[I>>2]*.5877852439880371;g[c[l>>2]>>2]=+g[E>>2]+ +g[H>>2];g[P>>2]=+g[M>>2]+ +g[L>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[O>>2]+ +g[P>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[P>>2]-+g[O>>2];g[N>>2]=+g[L>>2]-+g[M>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[K>>2]+ +g[N>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[N>>2]-+g[K>>2];c[Gb>>2]=(c[Gb>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+24;c[n>>2]=c[n>>2]^c[2998]}i=Hb;return}function Hj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,18,2696);i=b;return}function Ij(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0;zd=i;i=i+944|0;k=zd+936|0;l=zd+932|0;m=zd+928|0;n=zd+924|0;Ad=zd+920|0;o=zd+916|0;p=zd+912|0;yd=zd+896|0;za=zd+892|0;vc=zd+888|0;ed=zd+884|0;gd=zd+880|0;id=zd+876|0;md=zd+872|0;Ec=zd+868|0;Cc=zd+864|0;wc=zd+860|0;Ib=zd+856|0;yc=zd+852|0;Wc=zd+848|0;Jc=zd+844|0;rd=zd+840|0;Uc=zd+836|0;ad=zd+832|0;vd=zd+828|0;Hc=zd+824|0;Nc=zd+820|0;Oc=zd+816|0;Pc=zd+812|0;Rc=zd+808|0;ea=zd+804|0;ja=zd+800|0;aa=zd+796|0;ha=zd+792|0;xa=zd+788|0;I=zd+784|0;ta=zd+780|0;G=zd+776|0;xc=zd+772|0;td=zd+768|0;_c=zd+764|0;qd=zd+760|0;uc=zd+756|0;ud=zd+752|0;$c=zd+748|0;pd=zd+744|0;fd=zd+740|0;ld=zd+736|0;hd=zd+732|0;kd=zd+728|0;ca=zd+724|0;da=zd+720|0;A=zd+716|0;$=zd+712|0;va=zd+708|0;wa=zd+704|0;ra=zd+700|0;sa=zd+696|0;dd=zd+692|0;lc=zd+688|0;O=zd+684|0;_b=zd+680|0;Ac=zd+676|0;mc=zd+672|0;R=zd+668|0;Xb=zd+664|0;Mc=zd+660|0;Wa=zd+656|0;X=zd+652|0;Eb=zd+648|0;Zc=zd+644|0;Xa=zd+640|0;Ba=zd+636|0;Fb=zd+632|0;C=zd+628|0;L=zd+624|0;db=zd+620|0;eb=zd+616|0;fb=zd+612|0;gb=zd+608|0;rb=zd+604|0;Na=zd+600|0;wb=zd+596|0;Oa=zd+592|0;z=zd+588|0;ma=zd+584|0;_a=zd+580|0;$a=zd+576|0;ab=zd+572|0;bb=zd+568|0;Ia=zd+564|0;Ka=zd+560|0;lb=zd+556|0;La=zd+552|0;q=zd+548|0;Zb=zd+544|0;cd=zd+540|0;Yb=zd+536|0;zc=zd+532|0;bd=zd+528|0;od=zd+524|0;P=zd+520|0;xd=zd+516|0;Q=zd+512|0;jd=zd+508|0;nd=zd+504|0;sd=zd+500|0;wd=zd+496|0;Gc=zd+492|0;T=zd+488|0;Lc=zd+484|0;U=zd+480|0;V=zd+476|0;W=zd+472|0;Dc=zd+468|0;Fc=zd+464|0;Ic=zd+460|0;Kc=zd+456|0;Tc=zd+452|0;Z=zd+448|0;Yc=zd+444|0;_=zd+440|0;Y=zd+436|0;Aa=zd+432|0;Qc=zd+428|0;Sc=zd+424|0;Vc=zd+420|0;Xc=zd+416|0;qa=zd+412|0;sb=zd+408|0;K=zd+404|0;pb=zd+400|0;B=zd+396|0;tb=zd+392|0;F=zd+388|0;ob=zd+384|0;oa=zd+380|0;pa=zd+376|0;H=zd+372|0;J=zd+368|0;ua=zd+364|0;ya=zd+360|0;D=zd+356|0;E=zd+352|0;nb=zd+348|0;qb=zd+344|0;ub=zd+340|0;vb=zd+336|0;v=zd+332|0;Ea=zd+328|0;la=zd+324|0;jb=zd+320|0;y=zd+316|0;Fa=zd+312|0;ga=zd+308|0;ib=zd+304|0;t=zd+300|0;u=zd+296|0;ia=zd+292|0;ka=zd+288|0;w=zd+284|0;x=zd+280|0;ba=zd+276|0;fa=zd+272|0;Ga=zd+268|0;Ha=zd+264|0;Ja=zd+260|0;kb=zd+256|0;Da=zd+252|0;zb=zd+248|0;Tb=zd+244|0;Vb=zd+240|0;yb=zd+236|0;Ub=zd+232|0;Cb=zd+228|0;rc=zd+224|0;S=zd+220|0;Ca=zd+216|0;sc=zd+212|0;tc=zd+208|0;mb=zd+204|0;xb=zd+200|0;Ab=zd+196|0;Bb=zd+192|0;Za=zd+188|0;Kb=zd+184|0;gc=zd+180|0;ic=zd+176|0;Jb=zd+172|0;hc=zd+168|0;Nb=zd+164|0;dc=zd+160|0;Va=zd+156|0;Ya=zd+152|0;ec=zd+148|0;fc=zd+144|0;cb=zd+140|0;hb=zd+136|0;Lb=zd+132|0;Mb=zd+128|0;Hb=zd+124|0;Ra=zd+120|0;oc=zd+116|0;qc=zd+112|0;Qa=zd+108|0;pc=zd+104|0;Ua=zd+100|0;jc=zd+96|0;Db=zd+92|0;Gb=zd+88|0;kc=zd+84|0;nc=zd+80|0;Ma=zd+76|0;Pa=zd+72|0;Sa=zd+68|0;Ta=zd+64|0;s=zd+60|0;Ob=zd+56|0;ac=zd+52|0;cc=zd+48|0;N=zd+44|0;bc=zd+40|0;Rb=zd+36|0;Sb=zd+32|0;Bc=zd+28|0;r=zd+24|0;Wb=zd+20|0;$b=zd+16|0;na=zd+12|0;M=zd+8|0;Pb=zd+4|0;Qb=zd;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Ad>>2]=f;c[o>>2]=h;c[p>>2]=j;g[zd+908>>2]=.3826834261417389;g[zd+904>>2]=.9238795042037964;g[zd+900>>2]=.7071067690849304;c[yd>>2]=c[Ad>>2];c[m>>2]=(c[m>>2]|0)+(c[Ad>>2]<<3<<2);while(1){if((c[yd>>2]|0)>=(c[o>>2]|0))break;g[za>>2]=+g[c[m>>2]>>2];g[vc>>2]=+g[(c[m>>2]|0)+4>>2];g[ed>>2]=+g[(c[m>>2]|0)+8>>2];g[gd>>2]=+g[(c[m>>2]|0)+12>>2];g[fd>>2]=+g[za>>2]*+g[ed>>2];g[ld>>2]=+g[vc>>2]*+g[ed>>2];g[hd>>2]=+g[vc>>2]*+g[gd>>2];g[kd>>2]=+g[za>>2]*+g[gd>>2];g[id>>2]=+g[fd>>2]-+g[hd>>2];g[md>>2]=+g[kd>>2]+ +g[ld>>2];g[Ec>>2]=+g[kd>>2]-+g[ld>>2];g[Cc>>2]=+g[fd>>2]+ +g[hd>>2];g[wc>>2]=+g[(c[m>>2]|0)+20>>2];g[xc>>2]=+g[vc>>2]*+g[wc>>2];g[td>>2]=+g[ed>>2]*+g[wc>>2];g[_c>>2]=+g[za>>2]*+g[wc>>2];g[qd>>2]=+g[gd>>2]*+g[wc>>2];g[Ib>>2]=+g[(c[m>>2]|0)+16>>2];g[uc>>2]=+g[za>>2]*+g[Ib>>2];g[ud>>2]=+g[gd>>2]*+g[Ib>>2];g[$c>>2]=+g[vc>>2]*+g[Ib>>2];g[pd>>2]=+g[ed>>2]*+g[Ib>>2];g[yc>>2]=+g[uc>>2]+ +g[xc>>2];g[Wc>>2]=+g[td>>2]-+g[ud>>2];g[Jc>>2]=+g[_c>>2]+ +g[$c>>2];g[rd>>2]=+g[pd>>2]-+g[qd>>2];g[Uc>>2]=+g[pd>>2]+ +g[qd>>2];g[ad>>2]=+g[_c>>2]-+g[$c>>2];g[vd>>2]=+g[td>>2]+ +g[ud>>2];g[Hc>>2]=+g[uc>>2]-+g[xc>>2];g[Nc>>2]=+g[(c[m>>2]|0)+24>>2];g[Oc>>2]=+g[(c[m>>2]|0)+28>>2];g[Pc>>2]=+g[za>>2]*+g[Nc>>2]+ +g[vc>>2]*+g[Oc>>2];g[Rc>>2]=+g[za>>2]*+g[Oc>>2]-+g[vc>>2]*+g[Nc>>2];g[ca>>2]=+g[id>>2]*+g[wc>>2];g[da>>2]=+g[md>>2]*+g[Ib>>2];g[ea>>2]=+g[ca>>2]-+g[da>>2];g[ja>>2]=+g[ca>>2]+ +g[da>>2];g[A>>2]=+g[id>>2]*+g[Ib>>2];g[$>>2]=+g[md>>2]*+g[wc>>2];g[aa>>2]=+g[A>>2]+ +g[$>>2];g[ha>>2]=+g[A>>2]-+g[$>>2];g[va>>2]=+g[Cc>>2]*+g[wc>>2];g[wa>>2]=+g[Ec>>2]*+g[Ib>>2];g[xa>>2]=+g[va>>2]-+g[wa>>2];g[I>>2]=+g[va>>2]+ +g[wa>>2];g[ra>>2]=+g[Cc>>2]*+g[Ib>>2];g[sa>>2]=+g[Ec>>2]*+g[wc>>2];g[ta>>2]=+g[ra>>2]+ +g[sa>>2];g[G>>2]=+g[ra>>2]-+g[sa>>2];g[q>>2]=+g[c[k>>2]>>2];g[Zb>>2]=+g[c[l>>2]>>2];g[zc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[bd>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[cd>>2]=+g[yc>>2]*+g[zc>>2]+ +g[ad>>2]*+g[bd>>2];g[Yb>>2]=+g[yc>>2]*+g[bd>>2]-+g[ad>>2]*+g[zc>>2];g[dd>>2]=+g[q>>2]+ +g[cd>>2];g[lc>>2]=+g[Zb>>2]-+g[Yb>>2];g[O>>2]=+g[q>>2]-+g[cd>>2];g[_b>>2]=+g[Yb>>2]+ +g[Zb>>2];g[jd>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[nd>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[od>>2]=+g[id>>2]*+g[jd>>2]+ +g[md>>2]*+g[nd>>2];g[P>>2]=+g[id>>2]*+g[nd>>2]-+g[md>>2]*+g[jd>>2];g[sd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[wd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[xd>>2]=+g[rd>>2]*+g[sd>>2]+ +g[vd>>2]*+g[wd>>2];g[Q>>2]=+g[rd>>2]*+g[wd>>2]-+g[vd>>2]*+g[sd>>2];g[Ac>>2]=+g[od>>2]+ +g[xd>>2];g[mc>>2]=+g[od>>2]-+g[xd>>2];g[R>>2]=+g[P>>2]-+g[Q>>2];g[Xb>>2]=+g[P>>2]+ +g[Q>>2];g[Dc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Fc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Gc>>2]=+g[Cc>>2]*+g[Dc>>2]+ +g[Ec>>2]*+g[Fc>>2];g[T>>2]=+g[Cc>>2]*+g[Fc>>2]-+g[Ec>>2]*+g[Dc>>2];g[Ic>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Kc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Lc>>2]=+g[Hc>>2]*+g[Ic>>2]+ +g[Jc>>2]*+g[Kc>>2];g[U>>2]=+g[Hc>>2]*+g[Kc>>2]-+g[Jc>>2]*+g[Ic>>2];g[Mc>>2]=+g[Gc>>2]+ +g[Lc>>2];g[Wa>>2]=+g[T>>2]+ +g[U>>2];g[V>>2]=+g[T>>2]-+g[U>>2];g[W>>2]=+g[Gc>>2]-+g[Lc>>2];g[X>>2]=+g[V>>2]-+g[W>>2];g[Eb>>2]=+g[W>>2]+ +g[V>>2];g[Qc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[Sc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[Tc>>2]=+g[Pc>>2]*+g[Qc>>2]+ +g[Rc>>2]*+g[Sc>>2];g[Z>>2]=+g[Pc>>2]*+g[Sc>>2]-+g[Rc>>2]*+g[Qc>>2];g[Vc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Xc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Yc>>2]=+g[Uc>>2]*+g[Vc>>2]+ +g[Wc>>2]*+g[Xc>>2];g[_>>2]=+g[Uc>>2]*+g[Xc>>2]-+g[Wc>>2]*+g[Vc>>2];g[Zc>>2]=+g[Tc>>2]+ +g[Yc>>2];g[Xa>>2]=+g[Z>>2]+ +g[_>>2];g[Y>>2]=+g[Tc>>2]-+g[Yc>>2];g[Aa>>2]=+g[Z>>2]-+g[_>>2];g[Ba>>2]=+g[Y>>2]+ +g[Aa>>2];g[Fb>>2]=+g[Y>>2]-+g[Aa>>2];g[oa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[pa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[qa>>2]=+g[Nc>>2]*+g[oa>>2]+ +g[Oc>>2]*+g[pa>>2];g[sb>>2]=+g[Nc>>2]*+g[pa>>2]-+g[Oc>>2]*+g[oa>>2];g[H>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[J>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[K>>2]=+g[G>>2]*+g[H>>2]+ +g[I>>2]*+g[J>>2];g[pb>>2]=+g[G>>2]*+g[J>>2]-+g[I>>2]*+g[H>>2];g[ua>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[ya>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[B>>2]=+g[ta>>2]*+g[ua>>2]+ +g[xa>>2]*+g[ya>>2];g[tb>>2]=+g[ta>>2]*+g[ya>>2]-+g[xa>>2]*+g[ua>>2];g[D>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[E>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[F>>2]=+g[ed>>2]*+g[D>>2]+ +g[gd>>2]*+g[E>>2];g[ob>>2]=+g[ed>>2]*+g[E>>2]-+g[gd>>2]*+g[D>>2];g[C>>2]=+g[qa>>2]+ +g[B>>2];g[L>>2]=+g[F>>2]+ +g[K>>2];g[db>>2]=+g[C>>2]-+g[L>>2];g[eb>>2]=+g[sb>>2]+ +g[tb>>2];g[fb>>2]=+g[ob>>2]+ +g[pb>>2];g[gb>>2]=+g[eb>>2]-+g[fb>>2];g[nb>>2]=+g[qa>>2]-+g[B>>2];g[qb>>2]=+g[ob>>2]-+g[pb>>2];g[rb>>2]=+g[nb>>2]-+g[qb>>2];g[Na>>2]=+g[nb>>2]+ +g[qb>>2];g[ub>>2]=+g[sb>>2]-+g[tb>>2];g[vb>>2]=+g[F>>2]-+g[K>>2];g[wb>>2]=+g[ub>>2]+ +g[vb>>2];g[Oa>>2]=+g[ub>>2]-+g[vb>>2];g[t>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[u>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[v>>2]=+g[za>>2]*+g[t>>2]+ +g[vc>>2]*+g[u>>2];g[Ea>>2]=+g[za>>2]*+g[u>>2]-+g[vc>>2]*+g[t>>2];g[ia>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[ka>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[la>>2]=+g[ha>>2]*+g[ia>>2]+ +g[ja>>2]*+g[ka>>2];g[jb>>2]=+g[ha>>2]*+g[ka>>2]-+g[ja>>2]*+g[ia>>2];g[w>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[x>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[y>>2]=+g[Ib>>2]*+g[w>>2]+ +g[wc>>2]*+g[x>>2];g[Fa>>2]=+g[Ib>>2]*+g[x>>2]-+g[wc>>2]*+g[w>>2];g[ba>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[fa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[ga>>2]=+g[aa>>2]*+g[ba>>2]+ +g[ea>>2]*+g[fa>>2];g[ib>>2]=+g[aa>>2]*+g[fa>>2]-+g[ea>>2]*+g[ba>>2];g[z>>2]=+g[v>>2]+ +g[y>>2];g[ma>>2]=+g[ga>>2]+ +g[la>>2];g[_a>>2]=+g[z>>2]-+g[ma>>2];g[$a>>2]=+g[Ea>>2]+ +g[Fa>>2];g[ab>>2]=+g[ib>>2]+ +g[jb>>2];g[bb>>2]=+g[$a>>2]-+g[ab>>2];g[Ga>>2]=+g[Ea>>2]-+g[Fa>>2];g[Ha>>2]=+g[ga>>2]-+g[la>>2];g[Ia>>2]=+g[Ga>>2]+ +g[Ha>>2];g[Ka>>2]=+g[Ga>>2]-+g[Ha>>2];g[Ja>>2]=+g[v>>2]-+g[y>>2];g[kb>>2]=+g[ib>>2]-+g[jb>>2];g[lb>>2]=+g[Ja>>2]-+g[kb>>2];g[La>>2]=+g[Ja>>2]+ +g[kb>>2];g[S>>2]=+g[O>>2]-+g[R>>2];g[Ca>>2]=(+g[X>>2]-+g[Ba>>2])*.7071067690849304;g[Da>>2]=+g[S>>2]+ +g[Ca>>2];g[zb>>2]=+g[S>>2]-+g[Ca>>2];g[sc>>2]=(+g[Fb>>2]-+g[Eb>>2])*.7071067690849304;g[tc>>2]=+g[mc>>2]+ +g[lc>>2];g[Tb>>2]=+g[sc>>2]+ +g[tc>>2];g[Vb>>2]=+g[tc>>2]-+g[sc>>2];g[mb>>2]=+g[Ia>>2]*.9238795042037964+ +g[lb>>2]*.3826834261417389;g[xb>>2]=+g[rb>>2]*.3826834261417389-+g[wb>>2]*.9238795042037964;g[yb>>2]=+g[mb>>2]+ +g[xb>>2];g[Ub>>2]=+g[xb>>2]-+g[mb>>2];g[Ab>>2]=+g[Ia>>2]*.3826834261417389-+g[lb>>2]*.9238795042037964;g[Bb>>2]=+g[wb>>2]*.3826834261417389+ +g[rb>>2]*.9238795042037964;g[Cb>>2]=+g[Ab>>2]-+g[Bb>>2];g[rc>>2]=+g[Ab>>2]+ +g[Bb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Da>>2]-+g[yb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Tb>>2]-+g[rc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Da>>2]+ +g[yb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[rc>>2]+ +g[Tb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[zb>>2]-+g[Cb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Vb>>2]-+g[Ub>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[zb>>2]+ +g[Cb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Ub>>2]+ +g[Vb>>2];g[Va>>2]=+g[dd>>2]-+g[Ac>>2];g[Ya>>2]=+g[Wa>>2]-+g[Xa>>2];g[Za>>2]=+g[Va>>2]+ +g[Ya>>2];g[Kb>>2]=+g[Va>>2]-+g[Ya>>2];g[ec>>2]=+g[Zc>>2]-+g[Mc>>2];g[fc>>2]=+g[_b>>2]-+g[Xb>>2];g[gc>>2]=+g[ec>>2]+ +g[fc>>2];g[ic>>2]=+g[fc>>2]-+g[ec>>2];g[cb>>2]=+g[_a>>2]+ +g[bb>>2];g[hb>>2]=+g[db>>2]-+g[gb>>2];g[Jb>>2]=(+g[cb>>2]+ +g[hb>>2])*.7071067690849304;g[hc>>2]=(+g[hb>>2]-+g[cb>>2])*.7071067690849304;g[Lb>>2]=+g[bb>>2]-+g[_a>>2];g[Mb>>2]=+g[db>>2]+ +g[gb>>2];g[Nb>>2]=(+g[Lb>>2]-+g[Mb>>2])*.7071067690849304;g[dc>>2]=(+g[Lb>>2]+ +g[Mb>>2])*.7071067690849304;g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Za>>2]-+g[Jb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[gc>>2]-+g[dc>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Za>>2]+ +g[Jb>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[dc>>2]+ +g[gc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Kb>>2]-+g[Nb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[ic>>2]-+g[hc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Kb>>2]+ +g[Nb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[hc>>2]+ +g[ic>>2];g[Db>>2]=+g[O>>2]+ +g[R>>2];g[Gb>>2]=(+g[Eb>>2]+ +g[Fb>>2])*.7071067690849304;g[Hb>>2]=+g[Db>>2]+ +g[Gb>>2];g[Ra>>2]=+g[Db>>2]-+g[Gb>>2];g[kc>>2]=(+g[X>>2]+ +g[Ba>>2])*.7071067690849304;g[nc>>2]=+g[lc>>2]-+g[mc>>2];g[oc>>2]=+g[kc>>2]+ +g[nc>>2];g[qc>>2]=+g[nc>>2]-+g[kc>>2];g[Ma>>2]=+g[Ka>>2]*.3826834261417389+ +g[La>>2]*.9238795042037964;g[Pa>>2]=+g[Na>>2]*.9238795042037964-+g[Oa>>2]*.3826834261417389;g[Qa>>2]=+g[Ma>>2]+ +g[Pa>>2];g[pc>>2]=+g[Pa>>2]-+g[Ma>>2];g[Sa>>2]=+g[Ka>>2]*.9238795042037964-+g[La>>2]*.3826834261417389;g[Ta>>2]=+g[Oa>>2]*.9238795042037964+ +g[Na>>2]*.3826834261417389;g[Ua>>2]=+g[Sa>>2]-+g[Ta>>2];g[jc>>2]=+g[Sa>>2]+ +g[Ta>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Hb>>2]-+g[Qa>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[oc>>2]-+g[jc>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[Hb>>2]+ +g[Qa>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[jc>>2]+ +g[oc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Ra>>2]-+g[Ua>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[qc>>2]-+g[pc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Ra>>2]+ +g[Ua>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[pc>>2]+ +g[qc>>2];g[Bc>>2]=+g[dd>>2]+ +g[Ac>>2];g[r>>2]=+g[Mc>>2]+ +g[Zc>>2];g[s>>2]=+g[Bc>>2]+ +g[r>>2];g[Ob>>2]=+g[Bc>>2]-+g[r>>2];g[Wb>>2]=+g[Wa>>2]+ +g[Xa>>2];g[$b>>2]=+g[Xb>>2]+ +g[_b>>2];g[ac>>2]=+g[Wb>>2]+ +g[$b>>2];g[cc>>2]=+g[$b>>2]-+g[Wb>>2];g[na>>2]=+g[z>>2]+ +g[ma>>2];g[M>>2]=+g[C>>2]+ +g[L>>2];g[N>>2]=+g[na>>2]+ +g[M>>2];g[bc>>2]=+g[M>>2]-+g[na>>2];g[Pb>>2]=+g[$a>>2]+ +g[ab>>2];g[Qb>>2]=+g[eb>>2]+ +g[fb>>2];g[Rb>>2]=+g[Pb>>2]-+g[Qb>>2];g[Sb>>2]=+g[Pb>>2]+ +g[Qb>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[s>>2]-+g[N>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[ac>>2]-+g[Sb>>2];g[c[k>>2]>>2]=+g[s>>2]+ +g[N>>2];g[c[l>>2]>>2]=+g[Sb>>2]+ +g[ac>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Ob>>2]-+g[Rb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[cc>>2]-+g[bc>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Ob>>2]+ +g[Rb>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[bc>>2]+ +g[cc>>2];c[yd>>2]=(c[yd>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+32;c[n>>2]=c[n>>2]^c[2998]}i=zd;return}function Jj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,19,2760);i=b;return}function Kj(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0;Ze=i;i=i+1264|0;k=Ze+1260|0;l=Ze+1256|0;m=Ze+1252|0;n=Ze+1248|0;_e=Ze+1244|0;o=Ze+1240|0;p=Ze+1236|0;Ye=Ze+1216|0;za=Ze+1212|0;Vd=Ze+1208|0;Ee=Ze+1204|0;Ge=Ze+1200|0;Ie=Ze+1196|0;Me=Ze+1192|0;ga=Ze+1188|0;ea=Ze+1184|0;Wd=Ze+1180|0;Ib=Ze+1176|0;Yd=Ze+1172|0;v=Ze+1168|0;R=Ze+1164|0;Ae=Ze+1160|0;P=Ze+1156|0;ua=Ze+1152|0;z=Ze+1148|0;sa=Ze+1144|0;ma=Ze+1140|0;Ea=Ze+1136|0;ia=Ze+1132|0;Ca=Ze+1128|0;Oe=Ze+1124|0;Se=Ze+1120|0;F=Ze+1116|0;H=Ze+1112|0;Ve=Ze+1108|0;We=Ze+1104|0;Xe=Ze+1100|0;he=Ze+1096|0;_=Ze+1092|0;$d=Ze+1088|0;Y=Ze+1084|0;B=Ze+1080|0;xe=Ze+1076|0;je=Ze+1072|0;xa=Ze+1068|0;ve=Ze+1064|0;Xd=Ze+1060|0;x=Ze+1056|0;ye=Ze+1052|0;u=Ze+1048|0;Rc=Ze+1044|0;y=Ze+1040|0;ze=Ze+1036|0;t=Ze+1032|0;Fe=Ze+1028|0;Le=Ze+1024|0;He=Ze+1020|0;Ke=Ze+1016|0;ka=Ze+1012|0;la=Ze+1008|0;fa=Ze+1004|0;ha=Ze+1e3|0;Je=Ze+996|0;Ne=Ze+992|0;Qe=Ze+988|0;Re=Ze+984|0;de=Ze+980|0;jb=Ze+976|0;ad=Ze+972|0;jd=Ze+968|0;Ua=Ze+964|0;Ic=Ze+960|0;vd=Ze+956|0;Hd=Ze+952|0;O=Ze+948|0;Ia=Ze+944|0;Ja=Ze+940|0;Vb=Ze+936|0;Yb=Ze+932|0;Jd=Ze+928|0;oc=Ze+924|0;pc=Ze+920|0;Wc=Ze+916|0;nb=Ze+912|0;ob=Ze+908|0;pb=Ze+904|0;vb=Ze+900|0;Ab=Ze+896|0;ld=Ze+892|0;xc=Ze+888|0;yc=Ze+884|0;Rd=Ze+880|0;Ec=Ze+876|0;Fc=Ze+872|0;Gc=Ze+868|0;Jb=Ze+864|0;Ob=Ze+860|0;Pb=Ze+856|0;ue=Ze+852|0;qa=Ze+848|0;ra=Ze+844|0;ac=Ze+840|0;dc=Ze+836|0;Id=Ze+832|0;lc=Ze+828|0;mc=Ze+824|0;Vc=Ze+820|0;kb=Ze+816|0;lb=Ze+812|0;mb=Ze+808|0;Gb=Ze+804|0;Na=Ze+800|0;kd=Ze+796|0;uc=Ze+792|0;vc=Ze+788|0;Qd=Ze+784|0;Bc=Ze+780|0;Cc=Ze+776|0;Dc=Ze+772|0;Za=Ze+768|0;cb=Ze+764|0;db=Ze+760|0;q=Ze+756|0;_c=Ze+752|0;Ce=Ze+748|0;Zc=Ze+744|0;Ue=Ze+740|0;Ra=Ze+736|0;be=Ze+732|0;Sa=Ze+728|0;Zd=Ze+724|0;Be=Ze+720|0;Pe=Ze+716|0;Te=Ze+712|0;_d=Ze+708|0;ae=Ze+704|0;De=Ze+700|0;ce=Ze+696|0;Td=Ze+692|0;Ud=Ze+688|0;Qa=Ze+684|0;Ta=Ze+680|0;Yc=Ze+676|0;$c=Ze+672|0;E=Ze+668|0;Tb=Ze+664|0;tb=Ze+660|0;eb=Ze+656|0;Ha=Ze+652|0;Xb=Ze+648|0;zb=Ze+644|0;Nb=Ze+640|0;N=Ze+636|0;Ub=Ze+632|0;ub=Ze+628|0;hb=Ze+624|0;X=Ze+620|0;Wb=Ze+616|0;yb=Ze+612|0;Kb=Ze+608|0;wa=Ze+604|0;rb=Ze+600|0;D=Ze+596|0;sb=Ze+592|0;ta=Ze+588|0;va=Ze+584|0;ya=Ze+580|0;C=Ze+576|0;Ba=Ze+572|0;Lb=Ze+568|0;Ga=Ze+564|0;Mb=Ze+560|0;Z=Ze+556|0;Aa=Ze+552|0;Da=Ze+548|0;Fa=Ze+544|0;J=Ze+540|0;fb=Ze+536|0;M=Ze+532|0;gb=Ze+528|0;G=Ze+524|0;I=Ze+520|0;K=Ze+516|0;L=Ze+512|0;T=Ze+508|0;wb=Ze+504|0;W=Ze+500|0;xb=Ze+496|0;Q=Ze+492|0;S=Ze+488|0;U=Ze+484|0;V=Ze+480|0;me=Ze+476|0;_b=Ze+472|0;Eb=Ze+468|0;Va=Ze+464|0;pa=Ze+460|0;cc=Ze+456|0;Ma=Ze+452|0;bb=Ze+448|0;te=Ze+444|0;$b=Ze+440|0;Fb=Ze+436|0;Ya=Ze+432|0;aa=Ze+428|0;bc=Ze+424|0;La=Ze+420|0;_a=Ze+416|0;ge=Ze+412|0;Cb=Ze+408|0;le=Ze+404|0;Db=Ze+400|0;ee=Ze+396|0;fe=Ze+392|0;ie=Ze+388|0;ke=Ze+384|0;da=Ze+380|0;$a=Ze+376|0;oa=Ze+372|0;ab=Ze+368|0;ba=Ze+364|0;ca=Ze+360|0;ja=Ze+356|0;na=Ze+352|0;pe=Ze+348|0;Wa=Ze+344|0;se=Ze+340|0;Xa=Ze+336|0;ne=Ze+332|0;oe=Ze+328|0;qe=Ze+324|0;re=Ze+320|0;s=Ze+316|0;Hb=Ze+312|0;$=Ze+308|0;Ka=Ze+304|0;we=Ze+300|0;r=Ze+296|0;w=Ze+292|0;A=Ze+288|0;Pc=Ze+284|0;ib=Ze+280|0;Oc=Ze+276|0;fc=Ze+272|0;hc=Ze+268|0;Zb=Ze+264|0;ec=Ze+260|0;gc=Ze+256|0;Qc=Ze+252|0;Md=Ze+248|0;Kd=Ze+244|0;Ld=Ze+240|0;Gd=Ze+236|0;Pd=Ze+232|0;Ed=Ze+228|0;Fd=Ze+224|0;Od=Ze+220|0;Nd=Ze+216|0;ic=Ze+212|0;qb=Ze+208|0;jc=Ze+204|0;Sc=Ze+200|0;Uc=Ze+196|0;nc=Ze+192|0;qc=Ze+188|0;Tc=Ze+184|0;kc=Ze+180|0;zd=Ze+176|0;Xc=Ze+172|0;Ad=Ze+168|0;yd=Ze+164|0;Dd=Ze+160|0;wd=Ze+156|0;xd=Ze+152|0;Cd=Ze+148|0;Bd=Ze+144|0;Sb=Ze+140|0;Qb=Ze+136|0;Rb=Ze+132|0;Pa=Ze+128|0;sc=Ze+124|0;Bb=Ze+120|0;Oa=Ze+116|0;tc=Ze+112|0;rc=Ze+108|0;od=Ze+104|0;md=Ze+100|0;nd=Ze+96|0;sd=Ze+92|0;ud=Ze+88|0;qd=Ze+84|0;rd=Ze+80|0;td=Ze+76|0;pd=Ze+72|0;Hc=Ze+68|0;Jc=Ze+64|0;Kc=Ze+60|0;Ac=Ze+56|0;Mc=Ze+52|0;wc=Ze+48|0;zc=Ze+44|0;Nc=Ze+40|0;Lc=Ze+36|0;Sd=Ze+32|0;bd=Ze+28|0;cd=Ze+24|0;gd=Ze+20|0;id=Ze+16|0;ed=Ze+12|0;fd=Ze+8|0;hd=Ze+4|0;dd=Ze;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[_e>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Ze+1232>>2]=.5877852439880371;g[Ze+1228>>2]=.9510565400123596;g[Ze+1224>>2]=.25;g[Ze+1220>>2]=.55901700258255;c[Ye>>2]=c[_e>>2];c[m>>2]=(c[m>>2]|0)+(c[_e>>2]<<3<<2);while(1){if((c[Ye>>2]|0)>=(c[o>>2]|0))break;g[za>>2]=+g[c[m>>2]>>2];g[Vd>>2]=+g[(c[m>>2]|0)+4>>2];g[Ee>>2]=+g[(c[m>>2]|0)+8>>2];g[Ge>>2]=+g[(c[m>>2]|0)+12>>2];g[Fe>>2]=+g[za>>2]*+g[Ee>>2];g[Le>>2]=+g[Vd>>2]*+g[Ee>>2];g[He>>2]=+g[Vd>>2]*+g[Ge>>2];g[Ke>>2]=+g[za>>2]*+g[Ge>>2];g[Ie>>2]=+g[Fe>>2]-+g[He>>2];g[Me>>2]=+g[Ke>>2]+ +g[Le>>2];g[ga>>2]=+g[Ke>>2]-+g[Le>>2];g[ea>>2]=+g[Fe>>2]+ +g[He>>2];g[Wd>>2]=+g[(c[m>>2]|0)+20>>2];g[Xd>>2]=+g[Vd>>2]*+g[Wd>>2];g[x>>2]=+g[Ee>>2]*+g[Wd>>2];g[ye>>2]=+g[za>>2]*+g[Wd>>2];g[u>>2]=+g[Ge>>2]*+g[Wd>>2];g[Ib>>2]=+g[(c[m>>2]|0)+16>>2];g[Rc>>2]=+g[za>>2]*+g[Ib>>2];g[y>>2]=+g[Ge>>2]*+g[Ib>>2];g[ze>>2]=+g[Vd>>2]*+g[Ib>>2];g[t>>2]=+g[Ee>>2]*+g[Ib>>2];g[Yd>>2]=+g[Rc>>2]-+g[Xd>>2];g[v>>2]=+g[t>>2]+ +g[u>>2];g[R>>2]=+g[x>>2]+ +g[y>>2];g[Ae>>2]=+g[ye>>2]+ +g[ze>>2];g[P>>2]=+g[t>>2]-+g[u>>2];g[ua>>2]=+g[ye>>2]-+g[ze>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[sa>>2]=+g[Rc>>2]+ +g[Xd>>2];g[ka>>2]=+g[ea>>2]*+g[Wd>>2];g[la>>2]=+g[ga>>2]*+g[Ib>>2];g[ma>>2]=+g[ka>>2]+ +g[la>>2];g[Ea>>2]=+g[ka>>2]-+g[la>>2];g[fa>>2]=+g[ea>>2]*+g[Ib>>2];g[ha>>2]=+g[ga>>2]*+g[Wd>>2];g[ia>>2]=+g[fa>>2]-+g[ha>>2];g[Ca>>2]=+g[fa>>2]+ +g[ha>>2];g[Je>>2]=+g[Ie>>2]*+g[Ib>>2];g[Ne>>2]=+g[Me>>2]*+g[Wd>>2];g[Oe>>2]=+g[Je>>2]+ +g[Ne>>2];g[Qe>>2]=+g[Ie>>2]*+g[Wd>>2];g[Re>>2]=+g[Me>>2]*+g[Ib>>2];g[Se>>2]=+g[Qe>>2]-+g[Re>>2];g[F>>2]=+g[Je>>2]-+g[Ne>>2];g[H>>2]=+g[Qe>>2]+ +g[Re>>2];g[Ve>>2]=+g[(c[m>>2]|0)+24>>2];g[We>>2]=+g[(c[m>>2]|0)+28>>2];g[Xe>>2]=+g[Ie>>2]*+g[Ve>>2]+ +g[Me>>2]*+g[We>>2];g[he>>2]=+g[Oe>>2]*+g[Ve>>2]+ +g[Se>>2]*+g[We>>2];g[_>>2]=+g[ea>>2]*+g[We>>2]-+g[ga>>2]*+g[Ve>>2];g[$d>>2]=+g[Ie>>2]*+g[We>>2]-+g[Me>>2]*+g[Ve>>2];g[Y>>2]=+g[ea>>2]*+g[Ve>>2]+ +g[ga>>2]*+g[We>>2];g[B>>2]=+g[za>>2]*+g[We>>2]-+g[Vd>>2]*+g[Ve>>2];g[xe>>2]=+g[Ee>>2]*+g[We>>2]-+g[Ge>>2]*+g[Ve>>2];g[je>>2]=+g[Oe>>2]*+g[We>>2]-+g[Se>>2]*+g[Ve>>2];g[xa>>2]=+g[za>>2]*+g[Ve>>2]+ +g[Vd>>2]*+g[We>>2];g[ve>>2]=+g[Ee>>2]*+g[Ve>>2]+ +g[Ge>>2]*+g[We>>2];g[q>>2]=+g[c[k>>2]>>2];g[_c>>2]=+g[c[l>>2]>>2];g[Zd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Be>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Ce>>2]=+g[Yd>>2]*+g[Zd>>2]+ +g[Ae>>2]*+g[Be>>2];g[Zc>>2]=+g[Yd>>2]*+g[Be>>2]-+g[Ae>>2]*+g[Zd>>2];g[Pe>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Te>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Ue>>2]=+g[Oe>>2]*+g[Pe>>2]+ +g[Se>>2]*+g[Te>>2];g[Ra>>2]=+g[Oe>>2]*+g[Te>>2]-+g[Se>>2]*+g[Pe>>2];g[_d>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[ae>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[be>>2]=+g[Xe>>2]*+g[_d>>2]+ +g[$d>>2]*+g[ae>>2];g[Sa>>2]=+g[Xe>>2]*+g[ae>>2]-+g[$d>>2]*+g[_d>>2];g[De>>2]=+g[q>>2]+ +g[Ce>>2];g[ce>>2]=+g[Ue>>2]+ +g[be>>2];g[de>>2]=+g[De>>2]-+g[ce>>2];g[jb>>2]=+g[De>>2]+ +g[ce>>2];g[Td>>2]=+g[_c>>2]-+g[Zc>>2];g[Ud>>2]=+g[Ue>>2]-+g[be>>2];g[ad>>2]=+g[Td>>2]-+g[Ud>>2];g[jd>>2]=+g[Ud>>2]+ +g[Td>>2];g[Qa>>2]=+g[q>>2]-+g[Ce>>2];g[Ta>>2]=+g[Ra>>2]-+g[Sa>>2];g[Ua>>2]=+g[Qa>>2]-+g[Ta>>2];g[Ic>>2]=+g[Qa>>2]+ +g[Ta>>2];g[Yc>>2]=+g[Ra>>2]+ +g[Sa>>2];g[$c>>2]=+g[Zc>>2]+ +g[_c>>2];g[vd>>2]=+g[Yc>>2]+ +g[$c>>2];g[Hd>>2]=+g[$c>>2]-+g[Yc>>2];g[ta>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[va>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[wa>>2]=+g[sa>>2]*+g[ta>>2]+ +g[ua>>2]*+g[va>>2];g[rb>>2]=+g[sa>>2]*+g[va>>2]-+g[ua>>2]*+g[ta>>2];g[ya>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[C>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[D>>2]=+g[xa>>2]*+g[ya>>2]+ +g[B>>2]*+g[C>>2];g[sb>>2]=+g[xa>>2]*+g[C>>2]-+g[B>>2]*+g[ya>>2];g[E>>2]=+g[wa>>2]+ +g[D>>2];g[Tb>>2]=+g[rb>>2]+ +g[sb>>2];g[tb>>2]=+g[rb>>2]-+g[sb>>2];g[eb>>2]=+g[wa>>2]-+g[D>>2];g[Z>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[Aa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[Ba>>2]=+g[Y>>2]*+g[Z>>2]+ +g[_>>2]*+g[Aa>>2];g[Lb>>2]=+g[Y>>2]*+g[Aa>>2]-+g[_>>2]*+g[Z>>2];g[Da>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Fa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Ga>>2]=+g[Ca>>2]*+g[Da>>2]+ +g[Ea>>2]*+g[Fa>>2];g[Mb>>2]=+g[Ca>>2]*+g[Fa>>2]-+g[Ea>>2]*+g[Da>>2];g[Ha>>2]=+g[Ba>>2]+ +g[Ga>>2];g[Xb>>2]=+g[Lb>>2]+ +g[Mb>>2];g[zb>>2]=+g[Ba>>2]-+g[Ga>>2];g[Nb>>2]=+g[Lb>>2]-+g[Mb>>2];g[G>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[I>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[J>>2]=+g[F>>2]*+g[G>>2]+ +g[H>>2]*+g[I>>2];g[fb>>2]=+g[F>>2]*+g[I>>2]-+g[H>>2]*+g[G>>2];g[K>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[L>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[M>>2]=+g[Ee>>2]*+g[K>>2]+ +g[Ge>>2]*+g[L>>2];g[gb>>2]=+g[Ee>>2]*+g[L>>2]-+g[Ge>>2]*+g[K>>2];g[N>>2]=+g[J>>2]+ +g[M>>2];g[Ub>>2]=+g[fb>>2]+ +g[gb>>2];g[ub>>2]=+g[J>>2]-+g[M>>2];g[hb>>2]=+g[fb>>2]-+g[gb>>2];g[Q>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[S>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[T>>2]=+g[P>>2]*+g[Q>>2]+ +g[R>>2]*+g[S>>2];g[wb>>2]=+g[P>>2]*+g[S>>2]-+g[R>>2]*+g[Q>>2];g[U>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[V>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[W>>2]=+g[ea>>2]*+g[U>>2]+ +g[ga>>2]*+g[V>>2];g[xb>>2]=+g[ea>>2]*+g[V>>2]-+g[ga>>2]*+g[U>>2];g[X>>2]=+g[T>>2]+ +g[W>>2];g[Wb>>2]=+g[wb>>2]+ +g[xb>>2];g[yb>>2]=+g[wb>>2]-+g[xb>>2];g[Kb>>2]=+g[T>>2]-+g[W>>2];g[O>>2]=+g[E>>2]-+g[N>>2];g[Ia>>2]=+g[X>>2]-+g[Ha>>2];g[Ja>>2]=+g[O>>2]+ +g[Ia>>2];g[Vb>>2]=+g[Tb>>2]-+g[Ub>>2];g[Yb>>2]=+g[Wb>>2]-+g[Xb>>2];g[Jd>>2]=+g[Vb>>2]+ +g[Yb>>2];g[oc>>2]=+g[Tb>>2]+ +g[Ub>>2];g[pc>>2]=+g[Wb>>2]+ +g[Xb>>2];g[Wc>>2]=+g[oc>>2]+ +g[pc>>2];g[nb>>2]=+g[E>>2]+ +g[N>>2];g[ob>>2]=+g[X>>2]+ +g[Ha>>2];g[pb>>2]=+g[nb>>2]+ +g[ob>>2];g[vb>>2]=+g[tb>>2]+ +g[ub>>2];g[Ab>>2]=+g[yb>>2]+ +g[zb>>2];g[ld>>2]=+g[vb>>2]+ +g[Ab>>2];g[xc>>2]=+g[tb>>2]-+g[ub>>2];g[yc>>2]=+g[yb>>2]-+g[zb>>2];g[Rd>>2]=+g[xc>>2]+ +g[yc>>2];g[Ec>>2]=+g[eb>>2]+ +g[hb>>2];g[Fc>>2]=+g[Kb>>2]+ +g[Nb>>2];g[Gc>>2]=+g[Ec>>2]+ +g[Fc>>2];g[Jb>>2]=+g[eb>>2]-+g[hb>>2];g[Ob>>2]=+g[Kb>>2]-+g[Nb>>2];g[Pb>>2]=+g[Jb>>2]+ +g[Ob>>2];g[ee>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[fe>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ge>>2]=+g[Ie>>2]*+g[ee>>2]+ +g[Me>>2]*+g[fe>>2];g[Cb>>2]=+g[Ie>>2]*+g[fe>>2]-+g[Me>>2]*+g[ee>>2];g[ie>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[ke>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[le>>2]=+g[he>>2]*+g[ie>>2]+ +g[je>>2]*+g[ke>>2];g[Db>>2]=+g[he>>2]*+g[ke>>2]-+g[je>>2]*+g[ie>>2];g[me>>2]=+g[ge>>2]+ +g[le>>2];g[_b>>2]=+g[Cb>>2]+ +g[Db>>2];g[Eb>>2]=+g[Cb>>2]-+g[Db>>2];g[Va>>2]=+g[ge>>2]-+g[le>>2];g[ba>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[ca>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[da>>2]=+g[za>>2]*+g[ba>>2]+ +g[Vd>>2]*+g[ca>>2];g[$a>>2]=+g[za>>2]*+g[ca>>2]-+g[Vd>>2]*+g[ba>>2];g[ja>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[na>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[oa>>2]=+g[ia>>2]*+g[ja>>2]+ +g[ma>>2]*+g[na>>2];g[ab>>2]=+g[ia>>2]*+g[na>>2]-+g[ma>>2]*+g[ja>>2];g[pa>>2]=+g[da>>2]+ +g[oa>>2];g[cc>>2]=+g[$a>>2]+ +g[ab>>2];g[Ma>>2]=+g[da>>2]-+g[oa>>2];g[bb>>2]=+g[$a>>2]-+g[ab>>2];g[ne>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[oe>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[pe>>2]=+g[Ib>>2]*+g[ne>>2]+ +g[Wd>>2]*+g[oe>>2];g[Wa>>2]=+g[Ib>>2]*+g[oe>>2]-+g[Wd>>2]*+g[ne>>2];g[qe>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[re>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[se>>2]=+g[Ve>>2]*+g[qe>>2]+ +g[We>>2]*+g[re>>2];g[Xa>>2]=+g[Ve>>2]*+g[re>>2]-+g[We>>2]*+g[qe>>2];g[te>>2]=+g[pe>>2]+ +g[se>>2];g[$b>>2]=+g[Wa>>2]+ +g[Xa>>2];g[Fb>>2]=+g[pe>>2]-+g[se>>2];g[Ya>>2]=+g[Wa>>2]-+g[Xa>>2];g[we>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[r>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[s>>2]=+g[ve>>2]*+g[we>>2]+ +g[xe>>2]*+g[r>>2];g[Hb>>2]=+g[ve>>2]*+g[r>>2]-+g[xe>>2]*+g[we>>2];g[w>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[A>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[$>>2]=+g[v>>2]*+g[w>>2]+ +g[z>>2]*+g[A>>2];g[Ka>>2]=+g[v>>2]*+g[A>>2]-+g[z>>2]*+g[w>>2];g[aa>>2]=+g[s>>2]+ +g[$>>2];g[bc>>2]=+g[Hb>>2]+ +g[Ka>>2];g[La>>2]=+g[Hb>>2]-+g[Ka>>2];g[_a>>2]=+g[s>>2]-+g[$>>2];g[ue>>2]=+g[me>>2]-+g[te>>2];g[qa>>2]=+g[aa>>2]-+g[pa>>2];g[ra>>2]=+g[ue>>2]+ +g[qa>>2];g[ac>>2]=+g[_b>>2]-+g[$b>>2];g[dc>>2]=+g[bc>>2]-+g[cc>>2];g[Id>>2]=+g[ac>>2]+ +g[dc>>2];g[lc>>2]=+g[_b>>2]+ +g[$b>>2];g[mc>>2]=+g[bc>>2]+ +g[cc>>2];g[Vc>>2]=+g[lc>>2]+ +g[mc>>2];g[kb>>2]=+g[me>>2]+ +g[te>>2];g[lb>>2]=+g[aa>>2]+ +g[pa>>2];g[mb>>2]=+g[kb>>2]+ +g[lb>>2];g[Gb>>2]=+g[Eb>>2]+ +g[Fb>>2];g[Na>>2]=+g[La>>2]+ +g[Ma>>2];g[kd>>2]=+g[Gb>>2]+ +g[Na>>2];g[uc>>2]=+g[Eb>>2]-+g[Fb>>2];g[vc>>2]=+g[La>>2]-+g[Ma>>2];g[Qd>>2]=+g[uc>>2]+ +g[vc>>2];g[Bc>>2]=+g[Va>>2]+ +g[Ya>>2];g[Cc>>2]=+g[_a>>2]+ +g[bb>>2];g[Dc>>2]=+g[Bc>>2]+ +g[Cc>>2];g[Za>>2]=+g[Va>>2]-+g[Ya>>2];g[cb>>2]=+g[_a>>2]-+g[bb>>2];g[db>>2]=+g[Za>>2]+ +g[cb>>2];g[Pc>>2]=(+g[ra>>2]-+g[Ja>>2])*.55901700258255;g[ib>>2]=+g[ra>>2]+ +g[Ja>>2];g[Oc>>2]=+g[de>>2]-+g[ib>>2]*.25;g[Zb>>2]=+g[Vb>>2]-+g[Yb>>2];g[ec>>2]=+g[ac>>2]-+g[dc>>2];g[fc>>2]=+g[Zb>>2]*.9510565400123596-+g[ec>>2]*.5877852439880371;g[hc>>2]=+g[ec>>2]*.9510565400123596+ +g[Zb>>2]*.5877852439880371;g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[de>>2]+ +g[ib>>2];g[gc>>2]=+g[Pc>>2]+ +g[Oc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[gc>>2]-+g[hc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[gc>>2]+ +g[hc>>2];g[Qc>>2]=+g[Oc>>2]-+g[Pc>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Qc>>2]-+g[fc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[Qc>>2]+ +g[fc>>2];g[Md>>2]=(+g[Id>>2]-+g[Jd>>2])*.55901700258255;g[Kd>>2]=+g[Id>>2]+ +g[Jd>>2];g[Ld>>2]=+g[Hd>>2]-+g[Kd>>2]*.25;g[Ed>>2]=+g[O>>2]-+g[Ia>>2];g[Fd>>2]=+g[ue>>2]-+g[qa>>2];g[Gd>>2]=+g[Ed>>2]*.9510565400123596-+g[Fd>>2]*.5877852439880371;g[Pd>>2]=+g[Fd>>2]*.9510565400123596+ +g[Ed>>2]*.5877852439880371;g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Kd>>2]+ +g[Hd>>2];g[Od>>2]=+g[Md>>2]+ +g[Ld>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Od>>2]-+g[Pd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Pd>>2]+ +g[Od>>2];g[Nd>>2]=+g[Ld>>2]-+g[Md>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Gd>>2]+ +g[Nd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[Nd>>2]-+g[Gd>>2];g[ic>>2]=(+g[mb>>2]-+g[pb>>2])*.55901700258255;g[qb>>2]=+g[mb>>2]+ +g[pb>>2];g[jc>>2]=+g[jb>>2]-+g[qb>>2]*.25;g[nc>>2]=+g[lc>>2]-+g[mc>>2];g[qc>>2]=+g[oc>>2]-+g[pc>>2];g[Sc>>2]=+g[nc>>2]*.9510565400123596+ +g[qc>>2]*.5877852439880371;g[Uc>>2]=+g[qc>>2]*.9510565400123596-+g[nc>>2]*.5877852439880371;g[c[k>>2]>>2]=+g[jb>>2]+ +g[qb>>2];g[Tc>>2]=+g[jc>>2]-+g[ic>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Tc>>2]-+g[Uc>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Tc>>2]+ +g[Uc>>2];g[kc>>2]=+g[ic>>2]+ +g[jc>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[kc>>2]-+g[Sc>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[kc>>2]+ +g[Sc>>2];g[zd>>2]=(+g[Vc>>2]-+g[Wc>>2])*.55901700258255;g[Xc>>2]=+g[Vc>>2]+ +g[Wc>>2];g[Ad>>2]=+g[vd>>2]-+g[Xc>>2]*.25;g[wd>>2]=+g[kb>>2]-+g[lb>>2];g[xd>>2]=+g[nb>>2]-+g[ob>>2];g[yd>>2]=+g[wd>>2]*.9510565400123596+ +g[xd>>2]*.5877852439880371;g[Dd>>2]=+g[xd>>2]*.9510565400123596-+g[wd>>2]*.5877852439880371;g[c[l>>2]>>2]=+g[Xc>>2]+ +g[vd>>2];g[Cd>>2]=+g[Ad>>2]-+g[zd>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Cd>>2]-+g[Dd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Dd>>2]+ +g[Cd>>2];g[Bd>>2]=+g[zd>>2]+ +g[Ad>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[yd>>2]+ +g[Bd>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Bd>>2]-+g[yd>>2];g[Sb>>2]=(+g[db>>2]-+g[Pb>>2])*.55901700258255;g[Qb>>2]=+g[db>>2]+ +g[Pb>>2];g[Rb>>2]=+g[Ua>>2]-+g[Qb>>2]*.25;g[Bb>>2]=+g[vb>>2]-+g[Ab>>2];g[Oa>>2]=+g[Gb>>2]-+g[Na>>2];g[Pa>>2]=+g[Bb>>2]*.9510565400123596-+g[Oa>>2]*.5877852439880371;g[sc>>2]=+g[Oa>>2]*.9510565400123596+ +g[Bb>>2]*.5877852439880371;g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Ua>>2]+ +g[Qb>>2];g[tc>>2]=+g[Sb>>2]+ +g[Rb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[sc>>2]+ +g[tc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[tc>>2]-+g[sc>>2];g[rc>>2]=+g[Rb>>2]-+g[Sb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Pa>>2]+ +g[rc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[rc>>2]-+g[Pa>>2];g[od>>2]=(+g[kd>>2]-+g[ld>>2])*.55901700258255;g[md>>2]=+g[kd>>2]+ +g[ld>>2];g[nd>>2]=+g[jd>>2]-+g[md>>2]*.25;g[qd>>2]=+g[Jb>>2]-+g[Ob>>2];g[rd>>2]=+g[Za>>2]-+g[cb>>2];g[sd>>2]=+g[qd>>2]*.9510565400123596-+g[rd>>2]*.5877852439880371;g[ud>>2]=+g[rd>>2]*.9510565400123596+ +g[qd>>2]*.5877852439880371;g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[md>>2]+ +g[jd>>2];g[td>>2]=+g[od>>2]+ +g[nd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[td>>2]-+g[ud>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[ud>>2]+ +g[td>>2];g[pd>>2]=+g[nd>>2]-+g[od>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[pd>>2]-+g[sd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[sd>>2]+ +g[pd>>2];g[Hc>>2]=(+g[Dc>>2]-+g[Gc>>2])*.55901700258255;g[Jc>>2]=+g[Dc>>2]+ +g[Gc>>2];g[Kc>>2]=+g[Ic>>2]-+g[Jc>>2]*.25;g[wc>>2]=+g[uc>>2]-+g[vc>>2];g[zc>>2]=+g[xc>>2]-+g[yc>>2];g[Ac>>2]=+g[wc>>2]*.9510565400123596+ +g[zc>>2]*.5877852439880371;g[Mc>>2]=+g[zc>>2]*.9510565400123596-+g[wc>>2]*.5877852439880371;g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Ic>>2]+ +g[Jc>>2];g[Nc>>2]=+g[Kc>>2]-+g[Hc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Mc>>2]+ +g[Nc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[Nc>>2]-+g[Mc>>2];g[Lc>>2]=+g[Hc>>2]+ +g[Kc>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[Ac>>2]+ +g[Lc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Lc>>2]-+g[Ac>>2];g[Sd>>2]=(+g[Qd>>2]-+g[Rd>>2])*.55901700258255;g[bd>>2]=+g[Qd>>2]+ +g[Rd>>2];g[cd>>2]=+g[ad>>2]-+g[bd>>2]*.25;g[ed>>2]=+g[Bc>>2]-+g[Cc>>2];g[fd>>2]=+g[Ec>>2]-+g[Fc>>2];g[gd>>2]=+g[ed>>2]*.9510565400123596+ +g[fd>>2]*.5877852439880371;g[id>>2]=+g[fd>>2]*.9510565400123596-+g[ed>>2]*.5877852439880371;g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[bd>>2]+ +g[ad>>2];g[hd>>2]=+g[cd>>2]-+g[Sd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[hd>>2]-+g[id>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[id>>2]+ +g[hd>>2];g[dd>>2]=+g[Sd>>2]+ +g[cd>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[dd>>2]-+g[gd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[gd>>2]+ +g[dd>>2];c[Ye>>2]=(c[Ye>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(c[p>>2]<<2);c[m>>2]=(c[m>>2]|0)+32;c[n>>2]=c[n>>2]^c[2998]}i=Ze;return}function Lj(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;gh(c[d>>2]|0,20,2824);i=b;return} +function Oq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0;he=i;i=i+1088|0;m=he+1076|0;n=he+1072|0;o=he+1068|0;p=he+1064|0;q=he+1060|0;r=he+1056|0;ie=he+1052|0;s=he+1048|0;t=he+1044|0;ge=he+1024|0;u=he+1020|0;Vc=he+1016|0;Da=he+1012|0;dd=he+1008|0;fd=he+1004|0;Ld=he+1e3|0;x=he+996|0;Gd=he+992|0;Sd=he+988|0;Qd=he+984|0;Ud=he+980|0;oa=he+976|0;ea=he+972|0;td=he+968|0;ka=he+964|0;_d=he+960|0;zd=he+956|0;C=he+952|0;ee=he+948|0;fe=he+944|0;id=he+940|0;md=he+936|0;Q=he+932|0;V=he+928|0;M=he+924|0;T=he+920|0;Ia=he+916|0;pb=he+912|0;Ea=he+908|0;nb=he+904|0;Td=he+900|0;xd=he+896|0;Yd=he+892|0;sd=he+888|0;Rd=he+884|0;yd=he+880|0;Zd=he+876|0;rd=he+872|0;Mb=he+868|0;Kd=he+864|0;ed=he+860|0;Jd=he+856|0;O=he+852|0;P=he+848|0;K=he+844|0;L=he+840|0;Ga=he+836|0;Ha=he+832|0;ba=he+828|0;ca=he+824|0;wa=he+820|0;Dc=he+816|0;xb=he+812|0;Yb=he+808|0;J=he+804|0;Oc=he+800|0;_a=he+796|0;Sc=he+792|0;ja=he+788|0;Zb=he+784|0;Xa=he+780|0;Pc=he+776|0;Y=he+772|0;Rc=he+768|0;Ab=he+764|0;Ec=he+760|0;de=he+756|0;wc=he+752|0;Qa=he+748|0;Lc=he+744|0;La=he+740|0;Hc=he+736|0;Eb=he+732|0;xc=he+728|0;Ed=he+724|0;Ac=he+720|0;Ta=he+716|0;Ic=he+712|0;sb=he+708|0;Kc=he+704|0;Hb=he+700|0;zc=he+696|0;na=he+692|0;H=he+688|0;ra=he+684|0;G=he+680|0;va=he+676|0;Ca=he+672|0;Ba=he+668|0;vb=he+664|0;sa=he+660|0;wb=he+656|0;la=he+652|0;ma=he+648|0;pa=he+644|0;qa=he+640|0;ta=he+636|0;ua=he+632|0;za=he+628|0;Aa=he+624|0;F=he+620|0;I=he+616|0;Ya=he+612|0;Za=he+608|0;w=he+604|0;R=he+600|0;A=he+596|0;N=he+592|0;da=he+588|0;W=he+584|0;ha=he+580|0;U=he+576|0;Hd=he+572|0;v=he+568|0;y=he+564|0;z=he+560|0;D=he+556|0;E=he+552|0;fa=he+548|0;ga=he+544|0;B=he+540|0;ia=he+536|0;Va=he+532|0;Wa=he+528|0;S=he+524|0;X=he+520|0;yb=he+516|0;zb=he+512|0;Id=he+508|0;$=he+504|0;Od=he+500|0;_=he+496|0;Xd=he+492|0;Ja=he+488|0;be=he+484|0;Fa=he+480|0;gd=he+476|0;hd=he+472|0;Md=he+468|0;Nd=he+464|0;Vd=he+460|0;Wd=he+456|0;$d=he+452|0;ae=he+448|0;Pd=he+444|0;ce=he+440|0;Oa=he+436|0;Pa=he+432|0;aa=he+428|0;Ka=he+424|0;Cb=he+420|0;Db=he+416|0;ld=he+412|0;Na=he+408|0;pd=he+404|0;Ma=he+400|0;wd=he+396|0;qb=he+392|0;Cd=he+388|0;ob=he+384|0;jd=he+380|0;kd=he+376|0;nd=he+372|0;od=he+368|0;ud=he+364|0;vd=he+360|0;Ad=he+356|0;Bd=he+352|0;qd=he+348|0;Dd=he+344|0;Ra=he+340|0;Sa=he+336|0;mb=he+332|0;rb=he+328|0;Fb=he+324|0;Gb=he+320|0;ya=he+316|0;Lb=he+312|0;ab=he+308|0;cb=he+304|0;ub=he+300|0;Kb=he+296|0;Jb=he+292|0;bb=he+288|0;Fd=he+284|0;xa=he+280|0;Ua=he+276|0;$a=he+272|0;Z=he+268|0;tb=he+264|0;Bb=he+260|0;Ib=he+256|0;fb=he+252|0;Sb=he+248|0;Qb=he+244|0;Wb=he+240|0;ib=he+236|0;Tb=he+232|0;lb=he+228|0;Ub=he+224|0;db=he+220|0;eb=he+216|0;Ob=he+212|0;Pb=he+208|0;gb=he+204|0;hb=he+200|0;jb=he+196|0;kb=he+192|0;Nb=he+188|0;vc=he+184|0;Rb=he+180|0;Vb=he+176|0;Fc=he+172|0;lc=he+168|0;uc=he+164|0;_b=he+160|0;Cc=he+156|0;Wc=he+152|0;sc=he+148|0;$c=he+144|0;bc=he+140|0;kc=he+136|0;Nc=he+132|0;fc=he+128|0;pc=he+124|0;_c=he+120|0;Uc=he+116|0;gc=he+112|0;yc=he+108|0;Bc=he+104|0;Jc=he+100|0;Mc=he+96|0;qc=he+92|0;rc=he+88|0;$b=he+84|0;ac=he+80|0;nc=he+76|0;oc=he+72|0;Qc=he+68|0;Tc=he+64|0;Gc=he+60|0;Xb=he+56|0;ic=he+52|0;jc=he+48|0;cc=he+44|0;dc=he+40|0;ec=he+36|0;hc=he+32|0;mc=he+28|0;tc=he+24|0;bd=he+20|0;cd=he+16|0;Xc=he+12|0;Yc=he+8|0;Zc=he+4|0;ad=he;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[ie>>2]=j;c[s>>2]=k;c[t>>2]=l;g[he+1040>>2]=.4619397521018982;g[he+1036>>2]=.19134171307086945;g[he+1032>>2]=.3535533845424652;g[he+1028>>2]=.5;c[ge>>2]=c[ie>>2];c[q>>2]=(c[q>>2]|0)+((c[ie>>2]|0)-1<<3<<2);while(1){if((c[ge>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[q>>2]>>2];g[Vc>>2]=+g[(c[q>>2]|0)+4>>2];g[Da>>2]=+g[(c[q>>2]|0)+8>>2];g[dd>>2]=+g[(c[q>>2]|0)+12>>2];g[Mb>>2]=+g[u>>2]*+g[Da>>2];g[Kd>>2]=+g[Vc>>2]*+g[Da>>2];g[ed>>2]=+g[Vc>>2]*+g[dd>>2];g[Jd>>2]=+g[u>>2]*+g[dd>>2];g[fd>>2]=+g[Mb>>2]+ +g[ed>>2];g[Ld>>2]=+g[Jd>>2]-+g[Kd>>2];g[x>>2]=+g[Jd>>2]+ +g[Kd>>2];g[Gd>>2]=+g[Mb>>2]-+g[ed>>2];g[Sd>>2]=+g[(c[q>>2]|0)+20>>2];g[Td>>2]=+g[Vc>>2]*+g[Sd>>2];g[xd>>2]=+g[Da>>2]*+g[Sd>>2];g[Yd>>2]=+g[u>>2]*+g[Sd>>2];g[sd>>2]=+g[dd>>2]*+g[Sd>>2];g[Qd>>2]=+g[(c[q>>2]|0)+16>>2];g[Rd>>2]=+g[u>>2]*+g[Qd>>2];g[yd>>2]=+g[dd>>2]*+g[Qd>>2];g[Zd>>2]=+g[Vc>>2]*+g[Qd>>2];g[rd>>2]=+g[Da>>2]*+g[Qd>>2];g[Ud>>2]=+g[Rd>>2]-+g[Td>>2];g[oa>>2]=+g[Yd>>2]-+g[Zd>>2];g[ea>>2]=+g[xd>>2]+ +g[yd>>2];g[td>>2]=+g[rd>>2]+ +g[sd>>2];g[ka>>2]=+g[Rd>>2]+ +g[Td>>2];g[_d>>2]=+g[Yd>>2]+ +g[Zd>>2];g[zd>>2]=+g[xd>>2]-+g[yd>>2];g[C>>2]=+g[rd>>2]-+g[sd>>2];g[ee>>2]=+g[(c[q>>2]|0)+24>>2];g[fe>>2]=+g[(c[q>>2]|0)+28>>2];g[id>>2]=+g[u>>2]*+g[ee>>2]+ +g[Vc>>2]*+g[fe>>2];g[md>>2]=+g[u>>2]*+g[fe>>2]-+g[Vc>>2]*+g[ee>>2];g[O>>2]=+g[Gd>>2]*+g[Sd>>2];g[P>>2]=+g[x>>2]*+g[Qd>>2];g[Q>>2]=+g[O>>2]-+g[P>>2];g[V>>2]=+g[O>>2]+ +g[P>>2];g[K>>2]=+g[Gd>>2]*+g[Qd>>2];g[L>>2]=+g[x>>2]*+g[Sd>>2];g[M>>2]=+g[K>>2]+ +g[L>>2];g[T>>2]=+g[K>>2]-+g[L>>2];g[Ga>>2]=+g[fd>>2]*+g[Sd>>2];g[Ha>>2]=+g[Ld>>2]*+g[Qd>>2];g[Ia>>2]=+g[Ga>>2]+ +g[Ha>>2];g[pb>>2]=+g[Ga>>2]-+g[Ha>>2];g[ba>>2]=+g[fd>>2]*+g[Qd>>2];g[ca>>2]=+g[Ld>>2]*+g[Sd>>2];g[Ea>>2]=+g[ba>>2]-+g[ca>>2];g[nb>>2]=+g[ba>>2]+ +g[ca>>2];g[la>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[ma>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[na>>2]=+g[la>>2]-+g[ma>>2];g[H>>2]=+g[la>>2]+ +g[ma>>2];g[pa>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[qa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[ra>>2]=+g[pa>>2]+ +g[qa>>2];g[G>>2]=+g[pa>>2]-+g[qa>>2];g[ta>>2]=+g[c[n>>2]>>2];g[ua>>2]=+g[c[p>>2]>>2];g[va>>2]=+g[ta>>2]-+g[ua>>2];g[Ca>>2]=+g[ta>>2]+ +g[ua>>2];g[za>>2]=+g[c[o>>2]>>2];g[Aa>>2]=+g[c[m>>2]>>2];g[Ba>>2]=+g[za>>2]-+g[Aa>>2];g[vb>>2]=+g[Aa>>2]+ +g[za>>2];g[sa>>2]=+g[ka>>2]*+g[na>>2]-+g[oa>>2]*+g[ra>>2];g[wa>>2]=+g[sa>>2]+ +g[va>>2];g[Dc>>2]=+g[va>>2]-+g[sa>>2];g[wb>>2]=+g[ka>>2]*+g[ra>>2]+ +g[oa>>2]*+g[na>>2];g[xb>>2]=+g[vb>>2]+ +g[wb>>2];g[Yb>>2]=+g[vb>>2]-+g[wb>>2];g[F>>2]=+g[u>>2]*+g[Ba>>2]-+g[Vc>>2]*+g[Ca>>2];g[I>>2]=+g[Qd>>2]*+g[G>>2]+ +g[Sd>>2]*+g[H>>2];g[J>>2]=+g[F>>2]-+g[I>>2];g[Oc>>2]=+g[I>>2]+ +g[F>>2];g[Ya>>2]=+g[Qd>>2]*+g[H>>2]-+g[Sd>>2]*+g[G>>2];g[Za>>2]=+g[Vc>>2]*+g[Ba>>2]+ +g[u>>2]*+g[Ca>>2];g[_a>>2]=+g[Ya>>2]+ +g[Za>>2];g[Sc>>2]=+g[Za>>2]-+g[Ya>>2];g[Hd>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[v>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[w>>2]=+g[Hd>>2]-+g[v>>2];g[R>>2]=+g[Hd>>2]+ +g[v>>2];g[y>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[z>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[A>>2]=+g[y>>2]+ +g[z>>2];g[N>>2]=+g[y>>2]-+g[z>>2];g[D>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[E>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[da>>2]=+g[D>>2]-+g[E>>2];g[W>>2]=+g[D>>2]+ +g[E>>2];g[fa>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[ga>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[ha>>2]=+g[fa>>2]+ +g[ga>>2];g[U>>2]=+g[fa>>2]-+g[ga>>2];g[B>>2]=+g[Gd>>2]*+g[w>>2]-+g[x>>2]*+g[A>>2];g[ia>>2]=+g[C>>2]*+g[da>>2]-+g[ea>>2]*+g[ha>>2];g[ja>>2]=+g[B>>2]+ +g[ia>>2];g[Zb>>2]=+g[B>>2]-+g[ia>>2];g[Va>>2]=+g[M>>2]*+g[R>>2]-+g[Q>>2]*+g[N>>2];g[Wa>>2]=+g[T>>2]*+g[W>>2]-+g[V>>2]*+g[U>>2];g[Xa>>2]=+g[Va>>2]+ +g[Wa>>2];g[Pc>>2]=+g[Va>>2]-+g[Wa>>2];g[S>>2]=+g[M>>2]*+g[N>>2]+ +g[Q>>2]*+g[R>>2];g[X>>2]=+g[T>>2]*+g[U>>2]+ +g[V>>2]*+g[W>>2];g[Y>>2]=+g[S>>2]+ +g[X>>2];g[Rc>>2]=+g[X>>2]-+g[S>>2];g[yb>>2]=+g[Gd>>2]*+g[A>>2]+ +g[x>>2]*+g[w>>2];g[zb>>2]=+g[C>>2]*+g[ha>>2]+ +g[ea>>2]*+g[da>>2];g[Ab>>2]=+g[yb>>2]+ +g[zb>>2];g[Ec>>2]=+g[yb>>2]-+g[zb>>2];g[gd>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[hd>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Id>>2]=+g[gd>>2]-+g[hd>>2];g[$>>2]=+g[gd>>2]+ +g[hd>>2];g[Md>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Nd>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Od>>2]=+g[Md>>2]+ +g[Nd>>2];g[_>>2]=+g[Md>>2]-+g[Nd>>2];g[Vd>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Wd>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Xd>>2]=+g[Vd>>2]-+g[Wd>>2];g[Ja>>2]=+g[Vd>>2]+ +g[Wd>>2];g[$d>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[ae>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[be>>2]=+g[$d>>2]+ +g[ae>>2];g[Fa>>2]=+g[$d>>2]-+g[ae>>2];g[Pd>>2]=+g[fd>>2]*+g[Id>>2]-+g[Ld>>2]*+g[Od>>2];g[ce>>2]=+g[Ud>>2]*+g[Xd>>2]-+g[_d>>2]*+g[be>>2];g[de>>2]=+g[Pd>>2]+ +g[ce>>2];g[wc>>2]=+g[Pd>>2]-+g[ce>>2];g[Oa>>2]=+g[Da>>2]*+g[$>>2]-+g[dd>>2]*+g[_>>2];g[Pa>>2]=+g[Ea>>2]*+g[Ja>>2]-+g[Ia>>2]*+g[Fa>>2];g[Qa>>2]=+g[Oa>>2]+ +g[Pa>>2];g[Lc>>2]=+g[Oa>>2]-+g[Pa>>2];g[aa>>2]=+g[Da>>2]*+g[_>>2]+ +g[dd>>2]*+g[$>>2];g[Ka>>2]=+g[Ea>>2]*+g[Fa>>2]+ +g[Ia>>2]*+g[Ja>>2];g[La>>2]=+g[aa>>2]+ +g[Ka>>2];g[Hc>>2]=+g[Ka>>2]-+g[aa>>2];g[Cb>>2]=+g[fd>>2]*+g[Od>>2]+ +g[Ld>>2]*+g[Id>>2];g[Db>>2]=+g[Ud>>2]*+g[be>>2]+ +g[_d>>2]*+g[Xd>>2];g[Eb>>2]=+g[Cb>>2]+ +g[Db>>2];g[xc>>2]=+g[Cb>>2]-+g[Db>>2];g[jd>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[kd>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ld>>2]=+g[jd>>2]-+g[kd>>2];g[Na>>2]=+g[jd>>2]+ +g[kd>>2];g[nd>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[od>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[pd>>2]=+g[nd>>2]+ +g[od>>2];g[Ma>>2]=+g[nd>>2]-+g[od>>2];g[ud>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[vd>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[wd>>2]=+g[ud>>2]-+g[vd>>2];g[qb>>2]=+g[ud>>2]+ +g[vd>>2];g[Ad>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Bd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Cd>>2]=+g[Ad>>2]+ +g[Bd>>2];g[ob>>2]=+g[Ad>>2]-+g[Bd>>2];g[qd>>2]=+g[id>>2]*+g[ld>>2]-+g[md>>2]*+g[pd>>2];g[Dd>>2]=+g[td>>2]*+g[wd>>2]-+g[zd>>2]*+g[Cd>>2];g[Ed>>2]=+g[qd>>2]+ +g[Dd>>2];g[Ac>>2]=+g[qd>>2]-+g[Dd>>2];g[Ra>>2]=+g[ee>>2]*+g[Na>>2]-+g[fe>>2]*+g[Ma>>2];g[Sa>>2]=+g[nb>>2]*+g[qb>>2]-+g[pb>>2]*+g[ob>>2];g[Ta>>2]=+g[Ra>>2]+ +g[Sa>>2];g[Ic>>2]=+g[Ra>>2]-+g[Sa>>2];g[mb>>2]=+g[ee>>2]*+g[Ma>>2]+ +g[fe>>2]*+g[Na>>2];g[rb>>2]=+g[nb>>2]*+g[ob>>2]+ +g[pb>>2]*+g[qb>>2];g[sb>>2]=+g[mb>>2]+ +g[rb>>2];g[Kc>>2]=+g[rb>>2]-+g[mb>>2];g[Fb>>2]=+g[id>>2]*+g[pd>>2]+ +g[md>>2]*+g[ld>>2];g[Gb>>2]=+g[td>>2]*+g[Cd>>2]+ +g[zd>>2]*+g[wd>>2];g[Hb>>2]=+g[Fb>>2]+ +g[Gb>>2];g[zc>>2]=+g[Fb>>2]-+g[Gb>>2];g[Fd>>2]=+g[de>>2]+ +g[Ed>>2];g[xa>>2]=+g[ja>>2]+ +g[wa>>2];g[ya>>2]=+g[Fd>>2]+ +g[xa>>2];g[Lb>>2]=+g[xa>>2]-+g[Fd>>2];g[Ua>>2]=+g[Qa>>2]+ +g[Ta>>2];g[$a>>2]=+g[Xa>>2]+ +g[_a>>2];g[ab>>2]=+g[Ua>>2]-+g[$a>>2];g[cb>>2]=+g[Ua>>2]+ +g[$a>>2];g[Z>>2]=+g[J>>2]-+g[Y>>2];g[tb>>2]=+g[La>>2]+ +g[sb>>2];g[ub>>2]=+g[Z>>2]-+g[tb>>2];g[Kb>>2]=+g[tb>>2]+ +g[Z>>2];g[Bb>>2]=+g[xb>>2]+ +g[Ab>>2];g[Ib>>2]=+g[Eb>>2]+ +g[Hb>>2];g[Jb>>2]=+g[Bb>>2]-+g[Ib>>2];g[bb>>2]=+g[Bb>>2]+ +g[Ib>>2];g[c[n>>2]>>2]=(+g[ya>>2]+ +g[ub>>2])*.5;g[c[m>>2]>>2]=(+g[bb>>2]+ +g[cb>>2])*.5;g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=(+g[ub>>2]-+g[ya>>2])*.5;g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=(+g[bb>>2]-+g[cb>>2])*.5;g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=(+g[Jb>>2]-+g[Kb>>2])*.5;g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=(+g[ab>>2]-+g[Lb>>2])*.5;g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=(+g[Jb>>2]+ +g[Kb>>2])*.5;g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=(+g[Lb>>2]+ +g[ab>>2])*.5;g[db>>2]=+g[Hb>>2]-+g[Eb>>2];g[eb>>2]=+g[wa>>2]-+g[ja>>2];g[fb>>2]=(+g[db>>2]+ +g[eb>>2])*.5;g[Sb>>2]=(+g[eb>>2]-+g[db>>2])*.5;g[Ob>>2]=+g[xb>>2]-+g[Ab>>2];g[Pb>>2]=+g[de>>2]-+g[Ed>>2];g[Qb>>2]=(+g[Ob>>2]-+g[Pb>>2])*.5;g[Wb>>2]=(+g[Ob>>2]+ +g[Pb>>2])*.5;g[gb>>2]=+g[Ta>>2]-+g[Qa>>2];g[hb>>2]=+g[La>>2]-+g[sb>>2];g[ib>>2]=+g[gb>>2]+ +g[hb>>2];g[Tb>>2]=+g[gb>>2]-+g[hb>>2];g[jb>>2]=+g[Y>>2]+ +g[J>>2];g[kb>>2]=+g[_a>>2]-+g[Xa>>2];g[lb>>2]=+g[jb>>2]-+g[kb>>2];g[Ub>>2]=+g[jb>>2]+ +g[kb>>2];g[Nb>>2]=(+g[ib>>2]+ +g[lb>>2])*.3535533845424652;g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[fb>>2]+ +g[Nb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Nb>>2]-+g[fb>>2];g[vc>>2]=(+g[Tb>>2]+ +g[Ub>>2])*.3535533845424652;g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Wb>>2]-+g[vc>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Wb>>2]+ +g[vc>>2];g[Rb>>2]=(+g[lb>>2]-+g[ib>>2])*.3535533845424652;g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Qb>>2]-+g[Rb>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Qb>>2]+ +g[Rb>>2];g[Vb>>2]=(+g[Tb>>2]-+g[Ub>>2])*.3535533845424652;g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Sb>>2]+ +g[Vb>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Vb>>2]-+g[Sb>>2];g[Fc>>2]=(+g[Dc>>2]-+g[Ec>>2])*.5;g[lc>>2]=(+g[Ec>>2]+ +g[Dc>>2])*.5;g[uc>>2]=(+g[Yb>>2]-+g[Zb>>2])*.5;g[_b>>2]=(+g[Yb>>2]+ +g[Zb>>2])*.5;g[yc>>2]=+g[wc>>2]-+g[xc>>2];g[Bc>>2]=+g[zc>>2]+ +g[Ac>>2];g[Cc>>2]=(+g[yc>>2]+ +g[Bc>>2])*.3535533845424652;g[Wc>>2]=(+g[yc>>2]-+g[Bc>>2])*.3535533845424652;g[qc>>2]=+g[Pc>>2]+ +g[Oc>>2];g[rc>>2]=+g[Sc>>2]-+g[Rc>>2];g[sc>>2]=+g[qc>>2]*.19134171307086945-+g[rc>>2]*.4619397521018982;g[$c>>2]=+g[qc>>2]*.4619397521018982+ +g[rc>>2]*.19134171307086945;g[$b>>2]=+g[xc>>2]+ +g[wc>>2];g[ac>>2]=+g[zc>>2]-+g[Ac>>2];g[bc>>2]=(+g[$b>>2]+ +g[ac>>2])*.3535533845424652;g[kc>>2]=(+g[ac>>2]-+g[$b>>2])*.3535533845424652;g[Jc>>2]=+g[Hc>>2]+ +g[Ic>>2];g[Mc>>2]=+g[Kc>>2]-+g[Lc>>2];g[Nc>>2]=+g[Jc>>2]*.19134171307086945+ +g[Mc>>2]*.4619397521018982;g[fc>>2]=+g[Jc>>2]*.4619397521018982-+g[Mc>>2]*.19134171307086945;g[nc>>2]=+g[Ic>>2]-+g[Hc>>2];g[oc>>2]=+g[Lc>>2]+ +g[Kc>>2];g[pc>>2]=+g[nc>>2]*.4619397521018982+ +g[oc>>2]*.19134171307086945;g[_c>>2]=+g[nc>>2]*.19134171307086945-+g[oc>>2]*.4619397521018982;g[Qc>>2]=+g[Oc>>2]-+g[Pc>>2];g[Tc>>2]=+g[Rc>>2]+ +g[Sc>>2];g[Uc>>2]=+g[Qc>>2]*.4619397521018982-+g[Tc>>2]*.19134171307086945;g[gc>>2]=+g[Qc>>2]*.19134171307086945+ +g[Tc>>2]*.4619397521018982;g[Gc>>2]=+g[Cc>>2]+ +g[Fc>>2];g[Xb>>2]=+g[Nc>>2]+ +g[Uc>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Gc>>2]+ +g[Xb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Xb>>2]-+g[Gc>>2];g[ic>>2]=+g[_b>>2]+ +g[bc>>2];g[jc>>2]=+g[fc>>2]+ +g[gc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[ic>>2]-+g[jc>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[ic>>2]+ +g[jc>>2];g[cc>>2]=+g[_b>>2]-+g[bc>>2];g[dc>>2]=+g[Uc>>2]-+g[Nc>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[cc>>2]-+g[dc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[cc>>2]+ +g[dc>>2];g[ec>>2]=+g[Fc>>2]-+g[Cc>>2];g[hc>>2]=+g[fc>>2]-+g[gc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[ec>>2]+ +g[hc>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[hc>>2]-+g[ec>>2];g[mc>>2]=+g[kc>>2]+ +g[lc>>2];g[tc>>2]=+g[pc>>2]+ +g[sc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[mc>>2]+ +g[tc>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[tc>>2]-+g[mc>>2];g[bd>>2]=+g[uc>>2]+ +g[Wc>>2];g[cd>>2]=+g[_c>>2]+ +g[$c>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[bd>>2]-+g[cd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[bd>>2]+ +g[cd>>2];g[Xc>>2]=+g[uc>>2]-+g[Wc>>2];g[Yc>>2]=+g[sc>>2]-+g[pc>>2];g[c[o>>2]>>2]=+g[Xc>>2]-+g[Yc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Xc>>2]+ +g[Yc>>2];g[Zc>>2]=+g[lc>>2]-+g[kc>>2];g[ad>>2]=+g[_c>>2]-+g[$c>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Zc>>2]+ +g[ad>>2];g[c[p>>2]>>2]=+g[ad>>2]-+g[Zc>>2];c[ge>>2]=(c[ge>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+32;c[r>>2]=c[r>>2]^c[2998]}i=he;return}function Pq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,39,3928,1);i=b;return}function Qq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0;Qf=i;i=i+1440|0;m=Qf+1432|0;n=Qf+1428|0;o=Qf+1424|0;p=Qf+1420|0;q=Qf+1416|0;r=Qf+1412|0;Rf=Qf+1408|0;s=Qf+1404|0;t=Qf+1400|0;Pf=Qf+1376|0;Vc=Qf+1372|0;Ne=Qf+1368|0;Bf=Qf+1364|0;Df=Qf+1360|0;Ff=Qf+1356|0;Jf=Qf+1352|0;N=Qf+1348|0;L=Qf+1344|0;Oe=Qf+1340|0;ce=Qf+1336|0;pf=Qf+1332|0;xa=Qf+1328|0;Gb=Qf+1324|0;vf=Qf+1320|0;Eb=Qf+1316|0;Fa=Qf+1312|0;F=Qf+1308|0;ca=Qf+1304|0;T=Qf+1300|0;Wa=Qf+1296|0;P=Qf+1292|0;Ua=Qf+1288|0;Lf=Qf+1284|0;Se=Qf+1280|0;mb=Qf+1276|0;qb=Qf+1272|0;Xe=Qf+1268|0;Ye=Qf+1264|0;Ze=Qf+1260|0;mf=Qf+1256|0;Ra=Qf+1252|0;bf=Qf+1248|0;Pa=Qf+1244|0;Ka=Qf+1240|0;qa=Qf+1236|0;of=Qf+1232|0;Ia=Qf+1228|0;ma=Qf+1224|0;Pe=Qf+1220|0;Ba=Qf+1216|0;tf=Qf+1212|0;wa=Qf+1208|0;Me=Qf+1204|0;Ca=Qf+1200|0;uf=Qf+1196|0;va=Qf+1192|0;Cf=Qf+1188|0;If=Qf+1184|0;Ef=Qf+1180|0;Hf=Qf+1176|0;R=Qf+1172|0;S=Qf+1168|0;M=Qf+1164|0;O=Qf+1160|0;Gf=Qf+1156|0;Kf=Qf+1152|0;Qe=Qf+1148|0;Re=Qf+1144|0;Af=Qf+1140|0;Ic=Qf+1136|0;la=Qf+1132|0;cb=Qf+1128|0;bd=Qf+1124|0;de=Qf+1120|0;wd=Qf+1116|0;ve=Qf+1112|0;_a=Qf+1108|0;fb=Qf+1104|0;md=Qf+1100|0;se=Qf+1096|0;pd=Qf+1092|0;re=Qf+1088|0;td=Qf+1084|0;ue=Qf+1080|0;Db=Qf+1076|0;eb=Qf+1072|0;aa=Qf+1068|0;bb=Qf+1064|0;Ob=Qf+1060|0;Pc=Qf+1056|0;Tb=Qf+1052|0;Oc=Qf+1048|0;$b=Qf+1044|0;Pd=Qf+1040|0;pc=Qf+1036|0;Nd=Qf+1032|0;Cc=Qf+1028|0;Mc=Qf+1024|0;ec=Qf+1020|0;Qd=Qf+1016|0;xc=Qf+1012|0;Lc=Qf+1008|0;kc=Qf+1004|0;Md=Qf+1e3|0;gf=Qf+996|0;ee=Qf+992|0;Hc=Qf+988|0;cd=Qf+984|0;Mb=Qf+980|0;$c=Qf+976|0;_=Qf+972|0;Ac=Qf+968|0;Of=Qf+964|0;Ve=Qf+960|0;lf=Qf+956|0;Xb=Qf+952|0;af=Qf+948|0;ef=Qf+944|0;w=Qf+940|0;Yb=Qf+936|0;sf=Qf+932|0;yf=Qf+928|0;V=Qf+924|0;zc=Qf+920|0;E=Qf+916|0;gc=Qf+912|0;Ha=Qf+908|0;Wb=Qf+904|0;Bb=Qf+900|0;nc=Qf+896|0;Lb=Qf+892|0;Rb=Qf+888|0;J=Qf+884|0;bc=Qf+880|0;Ya=Qf+876|0;kb=Qf+872|0;ja=Qf+868|0;hc=Qf+864|0;Ma=Qf+860|0;vc=Qf+856|0;ub=Qf+852|0;mc=Qf+848|0;Ib=Qf+844|0;Qb=Qf+840|0;ua=Qf+836|0;ac=Qf+832|0;Ta=Qf+828|0;jb=Qf+824|0;rd=Qf+820|0;sd=Qf+816|0;u=Qf+812|0;Da=Qf+808|0;W=Qf+804|0;X=Qf+800|0;Y=Qf+796|0;Z=Qf+792|0;Mf=Qf+788|0;Nf=Qf+784|0;jf=Qf+780|0;Te=Qf+776|0;Ue=Qf+772|0;kf=Qf+768|0;_e=Qf+764|0;$e=Qf+760|0;nf=Qf+756|0;cf=Qf+752|0;df=Qf+748|0;v=Qf+744|0;qf=Qf+740|0;rf=Qf+736|0;Q=Qf+732|0;wf=Qf+728|0;xf=Qf+724|0;U=Qf+720|0;A=Qf+716|0;Ea=Qf+712|0;D=Qf+708|0;Ga=Qf+704|0;y=Qf+700|0;z=Qf+696|0;B=Qf+692|0;C=Qf+688|0;xb=Qf+684|0;Jb=Qf+680|0;Ab=Qf+676|0;Kb=Qf+672|0;vb=Qf+668|0;wb=Qf+664|0;yb=Qf+660|0;zb=Qf+656|0;Aa=Qf+652|0;Va=Qf+648|0;I=Qf+644|0;Xa=Qf+640|0;ya=Qf+636|0;za=Qf+632|0;G=Qf+628|0;H=Qf+624|0;fa=Qf+620|0;Ja=Qf+616|0;ia=Qf+612|0;La=Qf+608|0;da=Qf+604|0;ea=Qf+600|0;ga=Qf+596|0;ha=Qf+592|0;pb=Qf+588|0;Fb=Qf+584|0;tb=Qf+580|0;Hb=Qf+576|0;nb=Qf+572|0;ob=Qf+568|0;rb=Qf+564|0;sb=Qf+560|0;pa=Qf+556|0;Sa=Qf+552|0;ta=Qf+548|0;Qa=Qf+544|0;na=Qf+540|0;oa=Qf+536|0;ra=Qf+532|0;sa=Qf+528|0;zf=Qf+524|0;x=Qf+520|0;ka=Qf+516|0;ad=Qf+512|0;ud=Qf+508|0;vd=Qf+504|0;Oa=Qf+500|0;Za=Qf+496|0;kd=Qf+492|0;ld=Qf+488|0;nd=Qf+484|0;od=Qf+480|0;Na=Qf+476|0;Cb=Qf+472|0;K=Qf+468|0;$=Qf+464|0;Zb=Qf+460|0;_b=Qf+456|0;ic=Qf+452|0;jc=Qf+448|0;lb=Qf+444|0;Nb=Qf+440|0;Pb=Qf+436|0;Sb=Qf+432|0;lc=Qf+428|0;oc=Qf+424|0;yc=Qf+420|0;Bc=Qf+416|0;cc=Qf+412|0;dc=Qf+408|0;Vb=Qf+404|0;wc=Qf+400|0;We=Qf+396|0;ff=Qf+392|0;Fc=Qf+388|0;Gc=Qf+384|0;rc=Qf+380|0;tc=Qf+376|0;hf=Qf+372|0;ab=Qf+368|0;Sc=Qf+364|0;Tc=Qf+360|0;sc=Qf+356|0;Uc=Qf+352|0;fc=Qf+348|0;qc=Qf+344|0;ba=Qf+340|0;$a=Qf+336|0;Xc=Qf+332|0;Hd=Qf+328|0;dd=Qf+324|0;Ed=Qf+320|0;_c=Qf+316|0;Fd=Qf+312|0;Id=Qf+308|0;Gd=Qf+304|0;uc=Qf+300|0;Wc=Qf+296|0;Yc=Qf+292|0;Zc=Qf+288|0;Sd=Qf+284|0;Ud=Qf+280|0;ib=Qf+276|0;hb=Qf+272|0;Jd=Qf+268|0;Kd=Qf+264|0;Td=Qf+260|0;Ld=Qf+256|0;Od=Qf+252|0;Rd=Qf+248|0;db=Qf+244|0;gb=Qf+240|0;Xd=Qf+236|0;fd=Qf+232|0;Yd=Qf+228|0;$d=Qf+224|0;ae=Qf+220|0;be=Qf+216|0;gd=Qf+212|0;ed=Qf+208|0;Vd=Qf+204|0;Wd=Qf+200|0;Zd=Qf+196|0;_d=Qf+192|0;xe=Qf+188|0;ze=Qf+184|0;Jc=Qf+180|0;Ec=Qf+176|0;oe=Qf+172|0;pe=Qf+168|0;ye=Qf+164|0;qe=Qf+160|0;te=Qf+156|0;we=Qf+152|0;Ub=Qf+148|0;Dc=Qf+144|0;Je=Qf+140|0;Ke=Qf+136|0;Ae=Qf+132|0;De=Qf+128|0;Ee=Qf+124|0;Fe=Qf+120|0;Le=Qf+116|0;Ge=Qf+112|0;He=Qf+108|0;Ie=Qf+104|0;Be=Qf+100|0;Ce=Qf+96|0;yd=Qf+92|0;Ad=Qf+88|0;Kc=Qf+84|0;Rc=Qf+80|0;hd=Qf+76|0;id=Qf+72|0;zd=Qf+68|0;jd=Qf+64|0;qd=Qf+60|0;xd=Qf+56|0;Nc=Qf+52|0;Qc=Qf+48|0;le=Qf+44|0;me=Qf+40|0;fe=Qf+36|0;ge=Qf+32|0;Dd=Qf+28|0;he=Qf+24|0;ne=Qf+20|0;ie=Qf+16|0;je=Qf+12|0;ke=Qf+8|0;Bd=Qf+4|0;Cd=Qf;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Rf>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Qf+1396>>2]=.125;g[Qf+1392>>2]=.5;g[Qf+1388>>2]=.279508501291275;g[Qf+1384>>2]=.29389262199401855;g[Qf+1380>>2]=.4755282700061798;c[Pf>>2]=c[Rf>>2];c[q>>2]=(c[q>>2]|0)+((c[Rf>>2]|0)-1<<3<<2);while(1){if((c[Pf>>2]|0)>=(c[s>>2]|0))break;g[Vc>>2]=+g[c[q>>2]>>2];g[Ne>>2]=+g[(c[q>>2]|0)+4>>2];g[Bf>>2]=+g[(c[q>>2]|0)+8>>2];g[Df>>2]=+g[(c[q>>2]|0)+12>>2];g[Cf>>2]=+g[Vc>>2]*+g[Bf>>2];g[If>>2]=+g[Ne>>2]*+g[Bf>>2];g[Ef>>2]=+g[Ne>>2]*+g[Df>>2];g[Hf>>2]=+g[Vc>>2]*+g[Df>>2];g[Ff>>2]=+g[Cf>>2]-+g[Ef>>2];g[Jf>>2]=+g[Hf>>2]+ +g[If>>2];g[N>>2]=+g[Hf>>2]-+g[If>>2];g[L>>2]=+g[Cf>>2]+ +g[Ef>>2];g[Oe>>2]=+g[(c[q>>2]|0)+20>>2];g[Pe>>2]=+g[Ne>>2]*+g[Oe>>2];g[Ba>>2]=+g[Bf>>2]*+g[Oe>>2];g[tf>>2]=+g[Vc>>2]*+g[Oe>>2];g[wa>>2]=+g[Df>>2]*+g[Oe>>2];g[ce>>2]=+g[(c[q>>2]|0)+16>>2];g[Me>>2]=+g[Vc>>2]*+g[ce>>2];g[Ca>>2]=+g[Df>>2]*+g[ce>>2];g[uf>>2]=+g[Ne>>2]*+g[ce>>2];g[va>>2]=+g[Bf>>2]*+g[ce>>2];g[pf>>2]=+g[Me>>2]-+g[Pe>>2];g[xa>>2]=+g[va>>2]+ +g[wa>>2];g[Gb>>2]=+g[Ba>>2]+ +g[Ca>>2];g[vf>>2]=+g[tf>>2]+ +g[uf>>2];g[Eb>>2]=+g[va>>2]-+g[wa>>2];g[Fa>>2]=+g[tf>>2]-+g[uf>>2];g[F>>2]=+g[Ba>>2]-+g[Ca>>2];g[ca>>2]=+g[Me>>2]+ +g[Pe>>2];g[R>>2]=+g[L>>2]*+g[Oe>>2];g[S>>2]=+g[N>>2]*+g[ce>>2];g[T>>2]=+g[R>>2]+ +g[S>>2];g[Wa>>2]=+g[R>>2]-+g[S>>2];g[M>>2]=+g[L>>2]*+g[ce>>2];g[O>>2]=+g[N>>2]*+g[Oe>>2];g[P>>2]=+g[M>>2]-+g[O>>2];g[Ua>>2]=+g[M>>2]+ +g[O>>2];g[Gf>>2]=+g[Ff>>2]*+g[ce>>2];g[Kf>>2]=+g[Jf>>2]*+g[Oe>>2];g[Lf>>2]=+g[Gf>>2]+ +g[Kf>>2];g[Qe>>2]=+g[Ff>>2]*+g[Oe>>2];g[Re>>2]=+g[Jf>>2]*+g[ce>>2];g[Se>>2]=+g[Qe>>2]-+g[Re>>2];g[mb>>2]=+g[Gf>>2]-+g[Kf>>2];g[qb>>2]=+g[Qe>>2]+ +g[Re>>2];g[Xe>>2]=+g[(c[q>>2]|0)+24>>2];g[Ye>>2]=+g[(c[q>>2]|0)+28>>2];g[Ze>>2]=+g[Ff>>2]*+g[Xe>>2]+ +g[Jf>>2]*+g[Ye>>2];g[mf>>2]=+g[Lf>>2]*+g[Xe>>2]+ +g[Se>>2]*+g[Ye>>2];g[Ra>>2]=+g[L>>2]*+g[Xe>>2]+ +g[N>>2]*+g[Ye>>2];g[bf>>2]=+g[Ff>>2]*+g[Ye>>2]-+g[Jf>>2]*+g[Xe>>2];g[Pa>>2]=+g[L>>2]*+g[Ye>>2]-+g[N>>2]*+g[Xe>>2];g[Ka>>2]=+g[Vc>>2]*+g[Ye>>2]-+g[Ne>>2]*+g[Xe>>2];g[qa>>2]=+g[Bf>>2]*+g[Ye>>2]-+g[Df>>2]*+g[Xe>>2];g[of>>2]=+g[Lf>>2]*+g[Ye>>2]-+g[Se>>2]*+g[Xe>>2];g[Ia>>2]=+g[Vc>>2]*+g[Xe>>2]+ +g[Ne>>2]*+g[Ye>>2];g[ma>>2]=+g[Bf>>2]*+g[Xe>>2]+ +g[Df>>2]*+g[Ye>>2];g[u>>2]=+g[c[n>>2]>>2];g[Da>>2]=+g[c[p>>2]>>2];g[W>>2]=+g[u>>2]+ +g[Da>>2];g[X>>2]=+g[c[m>>2]>>2];g[Y>>2]=+g[c[o>>2]>>2];g[Z>>2]=+g[X>>2]-+g[Y>>2];g[Mb>>2]=+g[u>>2]-+g[Da>>2];g[$c>>2]=+g[X>>2]+ +g[Y>>2];g[_>>2]=+g[Vc>>2]*+g[W>>2]-+g[Ne>>2]*+g[Z>>2];g[Ac>>2]=+g[Vc>>2]*+g[Z>>2]+ +g[Ne>>2]*+g[W>>2];g[Mf>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Nf>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[jf>>2]=+g[Mf>>2]-+g[Nf>>2];g[Te>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ue>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[kf>>2]=+g[Te>>2]+ +g[Ue>>2];g[Of>>2]=+g[Mf>>2]+ +g[Nf>>2];g[Ve>>2]=+g[Te>>2]-+g[Ue>>2];g[lf>>2]=+g[Ff>>2]*+g[jf>>2]-+g[Jf>>2]*+g[kf>>2];g[Xb>>2]=+g[Jf>>2]*+g[jf>>2]+ +g[Ff>>2]*+g[kf>>2];g[_e>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[$e>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[nf>>2]=+g[_e>>2]-+g[$e>>2];g[cf>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[df>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[v>>2]=+g[cf>>2]+ +g[df>>2];g[af>>2]=+g[_e>>2]+ +g[$e>>2];g[ef>>2]=+g[cf>>2]-+g[df>>2];g[w>>2]=+g[mf>>2]*+g[nf>>2]-+g[of>>2]*+g[v>>2];g[Yb>>2]=+g[of>>2]*+g[nf>>2]+ +g[mf>>2]*+g[v>>2];g[qf>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[rf>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Q>>2]=+g[qf>>2]+ +g[rf>>2];g[wf>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[xf>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[U>>2]=+g[wf>>2]-+g[xf>>2];g[sf>>2]=+g[qf>>2]-+g[rf>>2];g[yf>>2]=+g[wf>>2]+ +g[xf>>2];g[V>>2]=+g[P>>2]*+g[Q>>2]-+g[T>>2]*+g[U>>2];g[zc>>2]=+g[P>>2]*+g[U>>2]+ +g[T>>2]*+g[Q>>2];g[y>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[z>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[A>>2]=+g[y>>2]+ +g[z>>2];g[Ea>>2]=+g[y>>2]-+g[z>>2];g[B>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[C>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[D>>2]=+g[B>>2]-+g[C>>2];g[Ga>>2]=+g[B>>2]+ +g[C>>2];g[E>>2]=+g[ce>>2]*+g[A>>2]-+g[Oe>>2]*+g[D>>2];g[gc>>2]=+g[Fa>>2]*+g[Ea>>2]+ +g[ca>>2]*+g[Ga>>2];g[Ha>>2]=+g[ca>>2]*+g[Ea>>2]-+g[Fa>>2]*+g[Ga>>2];g[Wb>>2]=+g[Oe>>2]*+g[A>>2]+ +g[ce>>2]*+g[D>>2];g[vb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[wb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[xb>>2]=+g[vb>>2]+ +g[wb>>2];g[Jb>>2]=+g[vb>>2]-+g[wb>>2];g[yb>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[zb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Ab>>2]=+g[yb>>2]-+g[zb>>2];g[Kb>>2]=+g[yb>>2]+ +g[zb>>2];g[Bb>>2]=+g[Bf>>2]*+g[xb>>2]-+g[Df>>2]*+g[Ab>>2];g[nc>>2]=+g[N>>2]*+g[Jb>>2]+ +g[L>>2]*+g[Kb>>2];g[Lb>>2]=+g[L>>2]*+g[Jb>>2]-+g[N>>2]*+g[Kb>>2];g[Rb>>2]=+g[Df>>2]*+g[xb>>2]+ +g[Bf>>2]*+g[Ab>>2];g[ya>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[za>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Aa>>2]=+g[ya>>2]-+g[za>>2];g[Va>>2]=+g[ya>>2]+ +g[za>>2];g[G>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[H>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[I>>2]=+g[G>>2]+ +g[H>>2];g[Xa>>2]=+g[G>>2]-+g[H>>2];g[J>>2]=+g[xa>>2]*+g[Aa>>2]-+g[F>>2]*+g[I>>2];g[bc>>2]=+g[xa>>2]*+g[I>>2]+ +g[F>>2]*+g[Aa>>2];g[Ya>>2]=+g[Ua>>2]*+g[Va>>2]-+g[Wa>>2]*+g[Xa>>2];g[kb>>2]=+g[Ua>>2]*+g[Xa>>2]+ +g[Wa>>2]*+g[Va>>2];g[da>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[ea>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[fa>>2]=+g[da>>2]+ +g[ea>>2];g[Ja>>2]=+g[da>>2]-+g[ea>>2];g[ga>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[ha>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2];g[La>>2]=+g[ga>>2]+ +g[ha>>2];g[ja>>2]=+g[Xe>>2]*+g[fa>>2]-+g[Ye>>2]*+g[ia>>2];g[hc>>2]=+g[Ka>>2]*+g[Ja>>2]+ +g[Ia>>2]*+g[La>>2];g[Ma>>2]=+g[Ia>>2]*+g[Ja>>2]-+g[Ka>>2]*+g[La>>2];g[vc>>2]=+g[Ye>>2]*+g[fa>>2]+ +g[Xe>>2]*+g[ia>>2];g[nb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[ob>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[pb>>2]=+g[nb>>2]+ +g[ob>>2];g[Fb>>2]=+g[nb>>2]-+g[ob>>2];g[rb>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[sb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[tb>>2]=+g[rb>>2]-+g[sb>>2];g[Hb>>2]=+g[rb>>2]+ +g[sb>>2];g[ub>>2]=+g[mb>>2]*+g[pb>>2]-+g[qb>>2]*+g[tb>>2];g[mc>>2]=+g[Gb>>2]*+g[Fb>>2]+ +g[Eb>>2]*+g[Hb>>2];g[Ib>>2]=+g[Eb>>2]*+g[Fb>>2]-+g[Gb>>2]*+g[Hb>>2];g[Qb>>2]=+g[qb>>2]*+g[pb>>2]+ +g[mb>>2]*+g[tb>>2];g[na>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[oa>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2];g[pa>>2]=+g[na>>2]-+g[oa>>2];g[Sa>>2]=+g[na>>2]+ +g[oa>>2];g[ra>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2];g[sa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[ta>>2]=+g[ra>>2]+ +g[sa>>2];g[Qa>>2]=+g[sa>>2]-+g[ra>>2];g[ua>>2]=+g[ma>>2]*+g[pa>>2]-+g[qa>>2]*+g[ta>>2];g[ac>>2]=+g[ma>>2]*+g[ta>>2]+ +g[qa>>2]*+g[pa>>2];g[Ta>>2]=+g[Pa>>2]*+g[Qa>>2]+ +g[Ra>>2]*+g[Sa>>2];g[jb>>2]=+g[Ra>>2]*+g[Qa>>2]-+g[Pa>>2]*+g[Sa>>2];g[zf>>2]=+g[pf>>2]*+g[sf>>2]-+g[vf>>2]*+g[yf>>2];g[Af>>2]=+g[Mb>>2]-+g[zf>>2];g[Ic>>2]=+g[zf>>2]+ +g[Mb>>2];g[x>>2]=+g[lf>>2]-+g[w>>2];g[ka>>2]=+g[E>>2]-+g[ja>>2];g[la>>2]=+g[x>>2]-+g[ka>>2];g[cb>>2]=+g[x>>2]+ +g[ka>>2];g[ad>>2]=+g[pf>>2]*+g[yf>>2]+ +g[vf>>2]*+g[sf>>2];g[bd>>2]=+g[$c>>2]-+g[ad>>2];g[de>>2]=+g[$c>>2]+ +g[ad>>2];g[ud>>2]=+g[Ta>>2]+ +g[Ya>>2];g[vd>>2]=+g[mc>>2]+ +g[nc>>2];g[wd>>2]=+g[ud>>2]+ +g[vd>>2];g[ve>>2]=+g[vd>>2]-+g[ud>>2];g[Oa>>2]=+g[Ib>>2]-+g[Lb>>2];g[Za>>2]=+g[Ta>>2]-+g[Ya>>2];g[_a>>2]=+g[Oa>>2]-+g[Za>>2];g[fb>>2]=+g[Za>>2]+ +g[Oa>>2];g[kd>>2]=+g[Xb>>2]+ +g[Yb>>2];g[ld>>2]=+g[E>>2]+ +g[ja>>2];g[md>>2]=+g[kd>>2]+ +g[ld>>2];g[se>>2]=+g[kd>>2]-+g[ld>>2];g[nd>>2]=+g[ac>>2]+ +g[bc>>2];g[od>>2]=+g[V>>2]+ +g[_>>2];g[pd>>2]=+g[nd>>2]+ +g[od>>2];g[re>>2]=+g[nd>>2]-+g[od>>2];g[rd>>2]=+g[gc>>2]+ +g[hc>>2];g[sd>>2]=+g[ub>>2]+ +g[Bb>>2];g[td>>2]=+g[rd>>2]+ +g[sd>>2];g[ue>>2]=+g[rd>>2]-+g[sd>>2];g[Na>>2]=+g[Ha>>2]-+g[Ma>>2];g[Cb>>2]=+g[ub>>2]-+g[Bb>>2];g[Db>>2]=+g[Na>>2]-+g[Cb>>2];g[eb>>2]=+g[Na>>2]+ +g[Cb>>2];g[K>>2]=+g[ua>>2]-+g[J>>2];g[$>>2]=+g[V>>2]-+g[_>>2];g[aa>>2]=+g[K>>2]+ +g[$>>2];g[bb>>2]=+g[$>>2]-+g[K>>2];g[lb>>2]=+g[jb>>2]-+g[kb>>2];g[Nb>>2]=+g[Ib>>2]+ +g[Lb>>2];g[Ob>>2]=+g[lb>>2]-+g[Nb>>2];g[Pc>>2]=+g[lb>>2]+ +g[Nb>>2];g[Pb>>2]=+g[Ha>>2]+ +g[Ma>>2];g[Sb>>2]=+g[Qb>>2]+ +g[Rb>>2];g[Tb>>2]=+g[Pb>>2]+ +g[Sb>>2];g[Oc>>2]=+g[Pb>>2]-+g[Sb>>2];g[Zb>>2]=+g[Xb>>2]-+g[Yb>>2];g[_b>>2]=+g[vc>>2]-+g[Wb>>2];g[$b>>2]=+g[Zb>>2]+ +g[_b>>2];g[Pd>>2]=+g[Zb>>2]-+g[_b>>2];g[lc>>2]=+g[jb>>2]+ +g[kb>>2];g[oc>>2]=+g[mc>>2]-+g[nc>>2];g[pc>>2]=+g[lc>>2]+ +g[oc>>2];g[Nd>>2]=+g[oc>>2]-+g[lc>>2];g[yc>>2]=+g[ua>>2]+ +g[J>>2];g[Bc>>2]=+g[zc>>2]+ +g[Ac>>2];g[Cc>>2]=+g[yc>>2]+ +g[Bc>>2];g[Mc>>2]=+g[yc>>2]-+g[Bc>>2];g[cc>>2]=+g[ac>>2]-+g[bc>>2];g[dc>>2]=+g[zc>>2]-+g[Ac>>2];g[ec>>2]=+g[cc>>2]+ +g[dc>>2];g[Qd>>2]=+g[cc>>2]-+g[dc>>2];g[Vb>>2]=+g[lf>>2]+ +g[w>>2];g[wc>>2]=+g[Wb>>2]+ +g[vc>>2];g[xc>>2]=+g[Vb>>2]+ +g[wc>>2];g[Lc>>2]=+g[Vb>>2]-+g[wc>>2];g[ic>>2]=+g[gc>>2]-+g[hc>>2];g[jc>>2]=+g[Rb>>2]-+g[Qb>>2];g[kc>>2]=+g[ic>>2]+ +g[jc>>2];g[Md>>2]=+g[ic>>2]-+g[jc>>2];g[We>>2]=+g[Lf>>2]*+g[Of>>2]-+g[Se>>2]*+g[Ve>>2];g[ff>>2]=+g[Ze>>2]*+g[af>>2]-+g[bf>>2]*+g[ef>>2];g[gf>>2]=+g[We>>2]-+g[ff>>2];g[ee>>2]=+g[We>>2]+ +g[ff>>2];g[Fc>>2]=+g[Se>>2]*+g[Of>>2]+ +g[Lf>>2]*+g[Ve>>2];g[Gc>>2]=+g[bf>>2]*+g[af>>2]+ +g[Ze>>2]*+g[ef>>2];g[Hc>>2]=+g[Fc>>2]+ +g[Gc>>2];g[cd>>2]=+g[Gc>>2]-+g[Fc>>2];g[fc>>2]=+g[$b>>2]-+g[ec>>2];g[qc>>2]=+g[kc>>2]-+g[pc>>2];g[rc>>2]=+g[fc>>2]*.4755282700061798+ +g[qc>>2]*.29389262199401855;g[tc>>2]=+g[qc>>2]*.4755282700061798-+g[fc>>2]*.29389262199401855;g[hf>>2]=+g[Af>>2]-+g[gf>>2];g[ba>>2]=+g[la>>2]+ +g[aa>>2];g[$a>>2]=+g[Db>>2]+ +g[_a>>2];g[ab>>2]=+g[ba>>2]+ +g[$a>>2];g[Sc>>2]=(+g[ba>>2]-+g[$a>>2])*.279508501291275;g[Tc>>2]=+g[hf>>2]*.5-+g[ab>>2]*.125;g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=(+g[hf>>2]+ +g[ab>>2])*.5;g[sc>>2]=+g[Sc>>2]-+g[Tc>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[sc>>2]-+g[tc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[sc>>2]+ +g[tc>>2];g[Uc>>2]=+g[Sc>>2]+ +g[Tc>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Uc>>2]-+g[rc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Uc>>2]+ +g[rc>>2];g[uc>>2]=+g[la>>2]-+g[aa>>2];g[Wc>>2]=+g[Db>>2]-+g[_a>>2];g[Xc>>2]=+g[uc>>2]*.4755282700061798+ +g[Wc>>2]*.29389262199401855;g[Hd>>2]=+g[Wc>>2]*.4755282700061798-+g[uc>>2]*.29389262199401855;g[dd>>2]=+g[bd>>2]+ +g[cd>>2];g[Yc>>2]=+g[$b>>2]+ +g[ec>>2];g[Zc>>2]=+g[kc>>2]+ +g[pc>>2];g[Ed>>2]=+g[Yc>>2]+ +g[Zc>>2];g[_c>>2]=(+g[Yc>>2]-+g[Zc>>2])*.279508501291275;g[Fd>>2]=+g[dd>>2]*.5-+g[Ed>>2]*.125;g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=(+g[dd>>2]+ +g[Ed>>2])*.5;g[Id>>2]=+g[Fd>>2]-+g[_c>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Hd>>2]+ +g[Id>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Id>>2]-+g[Hd>>2];g[Gd>>2]=+g[_c>>2]+ +g[Fd>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[Xc>>2]+ +g[Gd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Gd>>2]-+g[Xc>>2];g[Od>>2]=+g[Md>>2]-+g[Nd>>2];g[Rd>>2]=+g[Pd>>2]-+g[Qd>>2];g[Sd>>2]=+g[Od>>2]*.4755282700061798-+g[Rd>>2]*.29389262199401855;g[Ud>>2]=+g[Rd>>2]*.4755282700061798+ +g[Od>>2]*.29389262199401855;g[ib>>2]=+g[gf>>2]+ +g[Af>>2];g[db>>2]=+g[bb>>2]-+g[cb>>2];g[gb>>2]=+g[eb>>2]+ +g[fb>>2];g[hb>>2]=+g[db>>2]-+g[gb>>2];g[Jd>>2]=+g[ib>>2]*.5+ +g[hb>>2]*.125;g[Kd>>2]=(+g[db>>2]+ +g[gb>>2])*.279508501291275;g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=(+g[hb>>2]-+g[ib>>2])*.5;g[Td>>2]=+g[Kd>>2]-+g[Jd>>2];g[c[p>>2]>>2]=+g[Td>>2]-+g[Ud>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Td>>2]+ +g[Ud>>2];g[Ld>>2]=+g[Jd>>2]+ +g[Kd>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ld>>2]-+g[Sd>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Ld>>2]+ +g[Sd>>2];g[Vd>>2]=+g[eb>>2]-+g[fb>>2];g[Wd>>2]=+g[cb>>2]+ +g[bb>>2];g[Xd>>2]=+g[Vd>>2]*.4755282700061798-+g[Wd>>2]*.29389262199401855;g[fd>>2]=+g[Wd>>2]*.4755282700061798+ +g[Vd>>2]*.29389262199401855;g[Yd>>2]=+g[bd>>2]-+g[cd>>2];g[Zd>>2]=+g[Pd>>2]+ +g[Qd>>2];g[_d>>2]=+g[Md>>2]+ +g[Nd>>2];g[$d>>2]=+g[Zd>>2]+ +g[_d>>2];g[ae>>2]=+g[Yd>>2]*.5-+g[$d>>2]*.125;g[be>>2]=(+g[Zd>>2]-+g[_d>>2])*.279508501291275;g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=(+g[Yd>>2]+ +g[$d>>2])*.5;g[gd>>2]=+g[be>>2]+ +g[ae>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[fd>>2]+ +g[gd>>2];g[c[o>>2]>>2]=+g[gd>>2]-+g[fd>>2];g[ed>>2]=+g[ae>>2]-+g[be>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Xd>>2]+ +g[ed>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ed>>2]-+g[Xd>>2];g[te>>2]=+g[re>>2]-+g[se>>2];g[we>>2]=+g[ue>>2]-+g[ve>>2];g[xe>>2]=+g[te>>2]*.4755282700061798-+g[we>>2]*.29389262199401855;g[ze>>2]=+g[te>>2]*.29389262199401855+ +g[we>>2]*.4755282700061798;g[Jc>>2]=+g[Hc>>2]+ +g[Ic>>2];g[Ub>>2]=+g[Ob>>2]-+g[Tb>>2];g[Dc>>2]=+g[xc>>2]+ +g[Cc>>2];g[Ec>>2]=+g[Ub>>2]-+g[Dc>>2];g[oe>>2]=+g[Jc>>2]*.5+ +g[Ec>>2]*.125;g[pe>>2]=(+g[Dc>>2]+ +g[Ub>>2])*.279508501291275;g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=(+g[Ec>>2]-+g[Jc>>2])*.5;g[ye>>2]=+g[oe>>2]-+g[pe>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ye>>2]+ +g[ze>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[ze>>2]-+g[ye>>2];g[qe>>2]=+g[oe>>2]+ +g[pe>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[qe>>2]+ +g[xe>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[xe>>2]-+g[qe>>2];g[He>>2]=+g[Tb>>2]+ +g[Ob>>2];g[Ie>>2]=+g[xc>>2]-+g[Cc>>2];g[Je>>2]=+g[He>>2]*.4755282700061798-+g[Ie>>2]*.29389262199401855;g[Ke>>2]=+g[Ie>>2]*.4755282700061798+ +g[He>>2]*.29389262199401855;g[Ae>>2]=+g[de>>2]-+g[ee>>2];g[Be>>2]=+g[se>>2]+ +g[re>>2];g[Ce>>2]=+g[ue>>2]+ +g[ve>>2];g[De>>2]=+g[Be>>2]+ +g[Ce>>2];g[Ee>>2]=+g[Ae>>2]*.5-+g[De>>2]*.125;g[Fe>>2]=(+g[Be>>2]-+g[Ce>>2])*.279508501291275;g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=(+g[Ae>>2]+ +g[De>>2])*.5;g[Le>>2]=+g[Fe>>2]+ +g[Ee>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Ke>>2]+ +g[Le>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Le>>2]-+g[Ke>>2];g[Ge>>2]=+g[Ee>>2]-+g[Fe>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ge>>2]-+g[Je>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Je>>2]+ +g[Ge>>2];g[qd>>2]=+g[md>>2]-+g[pd>>2];g[xd>>2]=+g[td>>2]-+g[wd>>2];g[yd>>2]=+g[qd>>2]*.29389262199401855-+g[xd>>2]*.4755282700061798;g[Ad>>2]=+g[qd>>2]*.4755282700061798+ +g[xd>>2]*.29389262199401855;g[Kc>>2]=+g[Ic>>2]-+g[Hc>>2];g[Nc>>2]=+g[Lc>>2]+ +g[Mc>>2];g[Qc>>2]=+g[Oc>>2]+ +g[Pc>>2];g[Rc>>2]=+g[Nc>>2]+ +g[Qc>>2];g[hd>>2]=+g[Kc>>2]*.5-+g[Rc>>2]*.125;g[id>>2]=(+g[Nc>>2]-+g[Qc>>2])*.279508501291275;g[c[n>>2]>>2]=(+g[Kc>>2]+ +g[Rc>>2])*.5;g[zd>>2]=+g[id>>2]+ +g[hd>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[zd>>2]+ +g[Ad>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ad>>2]-+g[zd>>2];g[jd>>2]=+g[hd>>2]-+g[id>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[jd>>2]+ +g[yd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[yd>>2]-+g[jd>>2];g[je>>2]=+g[Lc>>2]-+g[Mc>>2];g[ke>>2]=+g[Oc>>2]-+g[Pc>>2];g[le>>2]=+g[je>>2]*.4755282700061798+ +g[ke>>2]*.29389262199401855;g[me>>2]=+g[ke>>2]*.4755282700061798-+g[je>>2]*.29389262199401855;g[fe>>2]=+g[de>>2]+ +g[ee>>2];g[Bd>>2]=+g[md>>2]+ +g[pd>>2];g[Cd>>2]=+g[td>>2]+ +g[wd>>2];g[ge>>2]=+g[Bd>>2]+ +g[Cd>>2];g[Dd>>2]=(+g[Bd>>2]-+g[Cd>>2])*.279508501291275;g[he>>2]=+g[fe>>2]*.5-+g[ge>>2]*.125;g[c[m>>2]>>2]=(+g[fe>>2]+ +g[ge>>2])*.5;g[ne>>2]=+g[he>>2]-+g[Dd>>2];g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[me>>2]+ +g[ne>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ne>>2]-+g[me>>2];g[ie>>2]=+g[Dd>>2]+ +g[he>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[ie>>2]-+g[le>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[le>>2]+ +g[ie>>2];c[Pf>>2]=(c[Pf>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+32;c[r>>2]=c[r>>2]^c[2998]}i=Qf;return}function Rq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,40,3976,1);i=b;return}function Sq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0,Ii=0,Ji=0,Ki=0,Li=0,Mi=0,Ni=0,Oi=0,Pi=0,Qi=0,Ri=0,Si=0,Ti=0,Ui=0,Vi=0,Wi=0,Xi=0,Yi=0,Zi=0,_i=0,$i=0,aj=0,bj=0,cj=0,dj=0,ej=0,fj=0,gj=0,hj=0,ij=0,jj=0,kj=0,lj=0,mj=0,nj=0,oj=0,pj=0,qj=0,rj=0,sj=0,tj=0,uj=0,vj=0,wj=0,xj=0,yj=0,zj=0,Aj=0,Bj=0,Cj=0,Dj=0,Ej=0,Fj=0,Gj=0,Hj=0,Ij=0,Jj=0,Kj=0,Lj=0,Mj=0,Nj=0,Oj=0,Pj=0,Qj=0,Rj=0,Sj=0,Tj=0,Uj=0,Vj=0,Wj=0,Xj=0,Yj=0,Zj=0,_j=0,$j=0,ak=0,bk=0,ck=0,dk=0,ek=0,fk=0,gk=0,hk=0,ik=0,jk=0,kk=0,lk=0,mk=0,nk=0,ok=0,pk=0,qk=0,rk=0,sk=0,tk=0,uk=0,vk=0,wk=0,xk=0,yk=0,zk=0;yk=i;i=i+2464|0;m=yk+2456|0;n=yk+2452|0;o=yk+2448|0;p=yk+2444|0;q=yk+2440|0;r=yk+2436|0;zk=yk+2432|0;s=yk+2428|0;t=yk+2424|0;xk=yk+2384|0;u=yk+2380|0;Vc=yk+2376|0;Da=yk+2372|0;ce=yk+2368|0;vg=yk+2364|0;ea=yk+2360|0;ga=yk+2356|0;ak=yk+2352|0;fk=yk+2348|0;hk=yk+2344|0;gk=yk+2340|0;ik=yk+2336|0;Kj=yk+2332|0;Qj=yk+2328|0;tb=yk+2324|0;ab=yk+2320|0;B=yk+2316|0;xb=yk+2312|0;v=yk+2308|0;Ya=yk+2304|0;jk=yk+2300|0;nk=yk+2296|0;Nc=yk+2292|0;fc=yk+2288|0;Rc=yk+2284|0;hc=yk+2280|0;Id=yk+2276|0;Yd=yk+2272|0;Md=yk+2268|0;_d=yk+2264|0;vk=yk+2260|0;Dj=yk+2256|0;Cc=yk+2252|0;Ec=yk+2248|0;Cb=yk+2244|0;Gb=yk+2240|0;Pd=yk+2236|0;Rd=yk+2232|0;ya=yk+2228|0;G=yk+2224|0;$b=yk+2220|0;bc=yk+2216|0;Pa=yk+2212|0;Ta=yk+2208|0;tc=yk+2204|0;Wc=yk+2200|0;ia=yk+2196|0;L=yk+2192|0;la=yk+2188|0;M=yk+2184|0;ma=yk+2180|0;R=yk+2176|0;qa=yk+2172|0;N=yk+2168|0;Ha=yk+2164|0;gb=yk+2160|0;Ka=yk+2156|0;hb=yk+2152|0;La=yk+2148|0;Nb=yk+2144|0;nb=yk+2140|0;ib=yk+2136|0;tk=yk+2132|0;F=yk+2128|0;Cj=yk+2124|0;wa=yk+2120|0;uk=yk+2116|0;Ca=yk+2112|0;Bj=yk+2108|0;xa=yk+2104|0;Ij=yk+2100|0;A=yk+2096|0;Pj=yk+2092|0;Xj=yk+2088|0;Jj=yk+2084|0;z=yk+2080|0;Oj=yk+2076|0;Yj=yk+2072|0;Mb=yk+2068|0;$j=yk+2064|0;mf=yk+2060|0;_j=yk+2056|0;Lc=yk+2052|0;Mc=yk+2048|0;Pc=yk+2044|0;Qc=yk+2040|0;Gd=yk+2036|0;Hd=yk+2032|0;Kd=yk+2028|0;Ld=yk+2024|0;fa=yk+2020|0;ha=yk+2016|0;ja=yk+2012|0;ka=yk+2008|0;Fa=yk+2004|0;Ga=yk+2e3|0;Ia=yk+1996|0;Ja=yk+1992|0;Vb=yk+1988|0;ph=yk+1984|0;Yg=yk+1980|0;li=yk+1976|0;dh=yk+1972|0;Pi=yk+1968|0;Xb=yk+1964|0;fg=yk+1960|0;jd=yk+1956|0;Ih=yk+1952|0;pe=yk+1948|0;ig=yk+1944|0;fb=yk+1940|0;Jh=yk+1936|0;md=yk+1932|0;qh=yk+1928|0;sb=yk+1924|0;qd=yk+1920|0;td=yk+1916|0;Lb=yk+1912|0;oc=yk+1908|0;hg=yk+1904|0;Ug=yk+1900|0;$g=yk+1896|0;nh=yk+1892|0;Mh=yk+1888|0;gf=yk+1884|0;eg=yk+1880|0;Rg=yk+1876|0;_g=yk+1872|0;kh=yk+1868|0;Lh=yk+1864|0;sk=yk+1860|0;Vj=yk+1856|0;of=yk+1852|0;yd=yk+1848|0;Bd=yk+1844|0;pf=yk+1840|0;Fd=yk+1836|0;Zf=yk+1832|0;zh=yk+1828|0;Ig=yk+1824|0;Te=yk+1820|0;ag=yk+1816|0;Ag=yk+1812|0;Vh=yk+1808|0;wh=yk+1804|0;Hg=yk+1800|0;xg=yk+1796|0;Uh=yk+1792|0;va=yk+1788|0;W=yk+1784|0;sf=yk+1780|0;ee=yk+1776|0;he=yk+1772|0;rf=yk+1768|0;ed=yk+1764|0;bg=yk+1760|0;Mg=yk+1756|0;hi=yk+1752|0;_e=yk+1748|0;_f=yk+1744|0;fh=yk+1740|0;Yh=yk+1736|0;Dh=yk+1732|0;ei=yk+1728|0;Eg=yk+1724|0;Xh=yk+1720|0;Sa=yk+1716|0;Wa=yk+1712|0;Kc=yk+1708|0;hf=yk+1704|0;Ub=yk+1700|0;hd=yk+1696|0;Bc=yk+1692|0;ne=yk+1688|0;$a=yk+1684|0;db=yk+1680|0;Tc=yk+1676|0;jf=yk+1672|0;lb=yk+1668|0;Qb=yk+1664|0;Gc=yk+1660|0;lf=yk+1656|0;Qa=yk+1652|0;Ra=yk+1648|0;Jc=yk+1644|0;Ua=yk+1640|0;Va=yk+1636|0;Ic=yk+1632|0;Sb=yk+1628|0;Tb=yk+1624|0;Ac=yk+1620|0;xc=yk+1616|0;yc=yk+1612|0;zc=yk+1608|0;Za=yk+1604|0;_a=yk+1600|0;Sc=yk+1596|0;bb=yk+1592|0;cb=yk+1588|0;Oc=yk+1584|0;jb=yk+1580|0;kb=yk+1576|0;Fc=yk+1572|0;Ob=yk+1568|0;Pb=yk+1564|0;Dc=yk+1560|0;Rb=yk+1556|0;Wg=yk+1552|0;Xg=yk+1548|0;id=yk+1544|0;kf=yk+1540|0;oe=yk+1536|0;bh=yk+1532|0;ch=yk+1528|0;Hc=yk+1524|0;Uc=yk+1520|0;Xa=yk+1516|0;eb=yk+1512|0;kd=yk+1508|0;ld=yk+1504|0;Ea=yk+1500|0;af=yk+1496|0;_b=yk+1492|0;od=yk+1488|0;Kb=yk+1484|0;ef=yk+1480|0;mc=yk+1476|0;sd=yk+1472|0;rb=yk+1468|0;bf=yk+1464|0;dc=yk+1460|0;pd=yk+1456|0;Bb=yk+1452|0;df=yk+1448|0;jc=yk+1444|0;rd=yk+1440|0;$=yk+1436|0;Zb=yk+1432|0;ca=yk+1428|0;Yb=yk+1424|0;Z=yk+1420|0;_=yk+1416|0;aa=yk+1412|0;ba=yk+1408|0;Fb=yk+1404|0;lc=yk+1400|0;Jb=yk+1396|0;kc=yk+1392|0;Db=yk+1388|0;Eb=yk+1384|0;Hb=yk+1380|0;Ib=yk+1376|0;mb=yk+1372|0;cc=yk+1368|0;qb=yk+1364|0;ac=yk+1360|0;Ma=yk+1356|0;Na=yk+1352|0;ob=yk+1348|0;pb=yk+1344|0;wb=yk+1340|0;ic=yk+1336|0;Ab=yk+1332|0;gc=yk+1328|0;ub=yk+1324|0;vb=yk+1320|0;yb=yk+1316|0;zb=yk+1312|0;ec=yk+1308|0;nc=yk+1304|0;cf=yk+1300|0;ff=yk+1296|0;Sg=yk+1292|0;Tg=yk+1288|0;lh=yk+1284|0;mh=yk+1280|0;Pg=yk+1276|0;Qg=yk+1272|0;ih=yk+1268|0;jh=yk+1264|0;ek=yk+1260|0;Ne=yk+1256|0;sc=yk+1252|0;wd=yk+1248|0;Uj=yk+1244|0;Re=yk+1240|0;dd=yk+1236|0;Ad=yk+1232|0;rk=yk+1228|0;Oe=yk+1224|0;Yc=yk+1220|0;xd=yk+1216|0;Hj=yk+1212|0;Qe=yk+1208|0;ad=yk+1204|0;zd=yk+1200|0;Zj=yk+1196|0;rc=yk+1192|0;dk=yk+1188|0;qc=yk+1184|0;Eh=yk+1180|0;Ni=yk+1176|0;bk=yk+1172|0;ck=yk+1168|0;Nj=yk+1164|0;cd=yk+1160|0;Tj=yk+1156|0;bd=yk+1152|0;Lj=yk+1148|0;Mj=yk+1144|0;Rj=yk+1140|0;Sj=yk+1136|0;mk=yk+1132|0;Xc=yk+1128|0;qk=yk+1124|0;uc=yk+1120|0;kk=yk+1116|0;lk=yk+1112|0;ok=yk+1108|0;pk=yk+1104|0;Aj=yk+1100|0;$c=yk+1096|0;Gj=yk+1092|0;_c=yk+1088|0;wk=yk+1084|0;zj=yk+1080|0;Ej=yk+1076|0;Fj=yk+1072|0;Zc=yk+1068|0;Ed=yk+1064|0;xh=yk+1060|0;yh=yk+1056|0;Pe=yk+1052|0;Se=yk+1048|0;yg=yk+1044|0;zg=yk+1040|0;uh=yk+1036|0;vh=yk+1032|0;Wf=yk+1028|0;wg=yk+1024|0;da=yk+1020|0;Ue=yk+1016|0;Od=yk+1012|0;Dd=yk+1008|0;V=yk+1004|0;Ye=yk+1e3|0;ae=yk+996|0;ge=yk+992|0;ua=yk+988|0;Ve=yk+984|0;Td=yk+980|0;de=yk+976|0;K=yk+972|0;Xe=yk+968|0;Xd=yk+964|0;fe=yk+960|0;y=yk+956|0;Nd=yk+952|0;E=yk+948|0;Jd=yk+944|0;w=yk+940|0;x=yk+936|0;C=yk+932|0;D=yk+928|0;Q=yk+924|0;$d=yk+920|0;U=yk+916|0;Zd=yk+912|0;O=yk+908|0;P=yk+904|0;S=yk+900|0;T=yk+896|0;pa=yk+892|0;Sd=yk+888|0;ta=yk+884|0;Qd=yk+880|0;na=yk+876|0;oa=yk+872|0;ra=yk+868|0;sa=yk+864|0;Ba=yk+860|0;Wd=yk+856|0;J=yk+852|0;Vd=yk+848|0;za=yk+844|0;Aa=yk+840|0;H=yk+836|0;I=yk+832|0;Ud=yk+828|0;be=yk+824|0;Kg=yk+820|0;Lg=yk+816|0;We=yk+812|0;Ze=yk+808|0;Fg=yk+804|0;eh=yk+800|0;Bh=yk+796|0;Ch=yk+792|0;Cg=yk+788|0;Dg=yk+784|0;Y=yk+780|0;Fe=yk+776|0;ze=yk+772|0;Je=yk+768|0;Ce=yk+764|0;Ke=yk+760|0;vc=yk+756|0;ve=yk+752|0;gd=yk+748|0;le=yk+744|0;je=yk+740|0;ue=yk+736|0;re=yk+732|0;te=yk+728|0;vd=yk+724|0;Ee=yk+720|0;Wj=yk+716|0;X=yk+712|0;xe=yk+708|0;ye=yk+704|0;Ae=yk+700|0;Be=yk+696|0;Oa=yk+692|0;Wb=yk+688|0;pc=yk+684|0;fd=yk+680|0;Cd=yk+676|0;ie=yk+672|0;$e=yk+668|0;qe=yk+664|0;nd=yk+660|0;ud=yk+656|0;wc=yk+652|0;se=yk+648|0;ke=yk+644|0;me=yk+640|0;we=yk+636|0;De=yk+632|0;Me=yk+628|0;nf=yk+624|0;Ge=yk+620|0;He=yk+616|0;Ie=yk+612|0;Le=yk+608|0;uf=yk+604|0;Nf=yk+600|0;Hf=yk+596|0;Rf=yk+592|0;Kf=yk+588|0;Sf=yk+584|0;Xf=yk+580|0;Df=yk+576|0;dg=yk+572|0;xf=yk+568|0;og=yk+564|0;Mf=yk+560|0;rg=yk+556|0;Cf=yk+552|0;kg=yk+548|0;yf=yk+544|0;qf=yk+540|0;tf=yk+536|0;Ff=yk+532|0;Gf=yk+528|0;If=yk+524|0;Jf=yk+520|0;vf=yk+516|0;wf=yk+512|0;$f=yk+508|0;cg=yk+504|0;mg=yk+500|0;ng=yk+496|0;pg=yk+492|0;qg=yk+488|0;gg=yk+484|0;jg=yk+480|0;Yf=yk+476|0;lg=yk+472|0;Af=yk+468|0;Bf=yk+464|0;sg=yk+460|0;tg=yk+456|0;ug=yk+452|0;zf=yk+448|0;Ef=yk+444|0;Lf=yk+440|0;Uf=yk+436|0;Vf=yk+432|0;Of=yk+428|0;Pf=yk+424|0;Qf=yk+420|0;Tf=yk+416|0;hh=yk+412|0;Ji=yk+408|0;sh=yk+404|0;zi=yk+400|0;pi=yk+396|0;yi=yk+392|0;Oh=yk+388|0;Ii=yk+384|0;Gh=yk+380|0;Qh=yk+376|0;ui=yk+372|0;Gi=yk+368|0;Og=yk+364|0;Ph=yk+360|0;ti=yk+356|0;Di=yk+352|0;Bg=yk+348|0;gh=yk+344|0;Kh=yk+340|0;Nh=yk+336|0;oh=yk+332|0;rh=yk+328|0;ni=yk+324|0;oi=yk+320|0;Zg=yk+316|0;Ei=yk+312|0;Fh=yk+308|0;Fi=yk+304|0;Vg=yk+300|0;ah=yk+296|0;Gg=yk+292|0;Bi=yk+288|0;Ng=yk+284|0;Ci=yk+280|0;Ah=yk+276|0;Jg=yk+272|0;th=yk+268|0;Hh=yk+264|0;wi=yk+260|0;xi=yk+256|0;qi=yk+252|0;ri=yk+248|0;si=yk+244|0;vi=yk+240|0;Ai=yk+236|0;Hi=yk+232|0;Sh=yk+228|0;Th=yk+224|0;Ki=yk+220|0;Li=yk+216|0;Mi=yk+212|0;Rh=yk+208|0;_h=yk+204|0;tj=yk+200|0;bi=yk+196|0;jj=yk+192|0;$i=yk+188|0;ij=yk+184|0;Vi=yk+180|0;sj=yk+176|0;Ri=yk+172|0;yj=yk+168|0;ej=yk+164|0;qj=yk+160|0;ji=yk+156|0;xj=yk+152|0;dj=yk+148|0;nj=yk+144|0;Wh=yk+140|0;Zh=yk+136|0;Ti=yk+132|0;Ui=yk+128|0;$h=yk+124|0;ai=yk+120|0;Wi=yk+116|0;Xi=yk+112|0;mi=yk+108|0;oj=yk+104|0;Qi=yk+100|0;pj=yk+96|0;ki=yk+92|0;Oi=yk+88|0;fi=yk+84|0;lj=yk+80|0;ii=yk+76|0;mj=yk+72|0;di=yk+68|0;gi=yk+64|0;ci=yk+60|0;Si=yk+56|0;gj=yk+52|0;hj=yk+48|0;aj=yk+44|0;bj=yk+40|0;cj=yk+36|0;fj=yk+32|0;kj=yk+28|0;rj=yk+24|0;Zi=yk+20|0;_i=yk+16|0;uj=yk+12|0;vj=yk+8|0;wj=yk+4|0;Yi=yk;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[zk>>2]=j;c[s>>2]=k;c[t>>2]=l;g[yk+2420>>2]=.27778512239456177;g[yk+2416>>2]=.41573479771614075;g[yk+2412>>2]=.09754516184329987;g[yk+2408>>2]=.49039262533187866;g[yk+2404>>2]=.7071067690849304;g[yk+2400>>2]=.19134171307086945;g[yk+2396>>2]=.4619397521018982;g[yk+2392>>2]=.3535533845424652;g[yk+2388>>2]=.5;c[xk>>2]=c[zk>>2];c[q>>2]=(c[q>>2]|0)+((c[zk>>2]|0)-1<<3<<2);while(1){if((c[xk>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[q>>2]>>2];g[Vc>>2]=+g[(c[q>>2]|0)+4>>2];g[Da>>2]=+g[(c[q>>2]|0)+8>>2];g[ce>>2]=+g[(c[q>>2]|0)+12>>2];g[Mb>>2]=+g[u>>2]*+g[Da>>2];g[$j>>2]=+g[Vc>>2]*+g[Da>>2];g[mf>>2]=+g[Vc>>2]*+g[ce>>2];g[_j>>2]=+g[u>>2]*+g[ce>>2];g[vg>>2]=+g[Mb>>2]+ +g[mf>>2];g[ea>>2]=+g[Mb>>2]-+g[mf>>2];g[ga>>2]=+g[_j>>2]+ +g[$j>>2];g[ak>>2]=+g[_j>>2]-+g[$j>>2];g[fk>>2]=+g[(c[q>>2]|0)+16>>2];g[tk>>2]=+g[u>>2]*+g[fk>>2];g[F>>2]=+g[ce>>2]*+g[fk>>2];g[Cj>>2]=+g[Vc>>2]*+g[fk>>2];g[wa>>2]=+g[Da>>2]*+g[fk>>2];g[hk>>2]=+g[(c[q>>2]|0)+20>>2];g[uk>>2]=+g[Vc>>2]*+g[hk>>2];g[Ca>>2]=+g[Da>>2]*+g[hk>>2];g[Bj>>2]=+g[u>>2]*+g[hk>>2];g[xa>>2]=+g[ce>>2]*+g[hk>>2];g[gk>>2]=+g[(c[q>>2]|0)+24>>2];g[Ij>>2]=+g[u>>2]*+g[gk>>2];g[A>>2]=+g[ce>>2]*+g[gk>>2];g[Pj>>2]=+g[Vc>>2]*+g[gk>>2];g[Xj>>2]=+g[Da>>2]*+g[gk>>2];g[ik>>2]=+g[(c[q>>2]|0)+28>>2];g[Jj>>2]=+g[Vc>>2]*+g[ik>>2];g[z>>2]=+g[Da>>2]*+g[ik>>2];g[Oj>>2]=+g[u>>2]*+g[ik>>2];g[Yj>>2]=+g[ce>>2]*+g[ik>>2];g[Kj>>2]=+g[Ij>>2]+ +g[Jj>>2];g[Qj>>2]=+g[Oj>>2]-+g[Pj>>2];g[tb>>2]=+g[Ij>>2]-+g[Jj>>2];g[ab>>2]=+g[z>>2]-+g[A>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[xb>>2]=+g[Oj>>2]+ +g[Pj>>2];g[v>>2]=+g[Xj>>2]-+g[Yj>>2];g[Ya>>2]=+g[Xj>>2]+ +g[Yj>>2];g[jk>>2]=+g[fk>>2]*+g[gk>>2]+ +g[hk>>2]*+g[ik>>2];g[nk>>2]=+g[fk>>2]*+g[ik>>2]-+g[hk>>2]*+g[gk>>2];g[Lc>>2]=+g[vg>>2]*+g[gk>>2];g[Mc>>2]=+g[ak>>2]*+g[ik>>2];g[Nc>>2]=+g[Lc>>2]+ +g[Mc>>2];g[fc>>2]=+g[Lc>>2]-+g[Mc>>2];g[Pc>>2]=+g[vg>>2]*+g[ik>>2];g[Qc>>2]=+g[ak>>2]*+g[gk>>2];g[Rc>>2]=+g[Pc>>2]-+g[Qc>>2];g[hc>>2]=+g[Pc>>2]+ +g[Qc>>2];g[Gd>>2]=+g[ea>>2]*+g[gk>>2];g[Hd>>2]=+g[ga>>2]*+g[ik>>2];g[Id>>2]=+g[Gd>>2]-+g[Hd>>2];g[Yd>>2]=+g[Gd>>2]+ +g[Hd>>2];g[Kd>>2]=+g[ea>>2]*+g[ik>>2];g[Ld>>2]=+g[ga>>2]*+g[gk>>2];g[Md>>2]=+g[Kd>>2]+ +g[Ld>>2];g[_d>>2]=+g[Kd>>2]-+g[Ld>>2];g[vk>>2]=+g[tk>>2]-+g[uk>>2];g[Dj>>2]=+g[Bj>>2]+ +g[Cj>>2];g[Cc>>2]=+g[vk>>2]*+g[gk>>2]+ +g[Dj>>2]*+g[ik>>2];g[Ec>>2]=+g[vk>>2]*+g[ik>>2]-+g[Dj>>2]*+g[gk>>2];g[Cb>>2]=+g[wa>>2]-+g[xa>>2];g[Gb>>2]=+g[Ca>>2]+ +g[F>>2];g[Pd>>2]=+g[Cb>>2]*+g[gk>>2]+ +g[Gb>>2]*+g[ik>>2];g[Rd>>2]=+g[Cb>>2]*+g[ik>>2]-+g[Gb>>2]*+g[gk>>2];g[ya>>2]=+g[wa>>2]+ +g[xa>>2];g[G>>2]=+g[Ca>>2]-+g[F>>2];g[$b>>2]=+g[ya>>2]*+g[gk>>2]+ +g[G>>2]*+g[ik>>2];g[bc>>2]=+g[ya>>2]*+g[ik>>2]-+g[G>>2]*+g[gk>>2];g[Pa>>2]=+g[tk>>2]+ +g[uk>>2];g[Ta>>2]=+g[Bj>>2]-+g[Cj>>2];g[tc>>2]=+g[Pa>>2]*+g[gk>>2]+ +g[Ta>>2]*+g[ik>>2];g[Wc>>2]=+g[Pa>>2]*+g[ik>>2]-+g[Ta>>2]*+g[gk>>2];g[fa>>2]=+g[ea>>2]*+g[fk>>2];g[ha>>2]=+g[ga>>2]*+g[hk>>2];g[ia>>2]=+g[fa>>2]-+g[ha>>2];g[L>>2]=+g[fa>>2]+ +g[ha>>2];g[ja>>2]=+g[ea>>2]*+g[hk>>2];g[ka>>2]=+g[ga>>2]*+g[fk>>2];g[la>>2]=+g[ja>>2]+ +g[ka>>2];g[M>>2]=+g[ja>>2]-+g[ka>>2];g[ma>>2]=+g[ia>>2]*+g[gk>>2]+ +g[la>>2]*+g[ik>>2];g[R>>2]=+g[L>>2]*+g[ik>>2]-+g[M>>2]*+g[gk>>2];g[qa>>2]=+g[ia>>2]*+g[ik>>2]-+g[la>>2]*+g[gk>>2];g[N>>2]=+g[L>>2]*+g[gk>>2]+ +g[M>>2]*+g[ik>>2];g[Fa>>2]=+g[vg>>2]*+g[fk>>2];g[Ga>>2]=+g[ak>>2]*+g[hk>>2];g[Ha>>2]=+g[Fa>>2]+ +g[Ga>>2];g[gb>>2]=+g[Fa>>2]-+g[Ga>>2];g[Ia>>2]=+g[vg>>2]*+g[hk>>2];g[Ja>>2]=+g[ak>>2]*+g[fk>>2];g[Ka>>2]=+g[Ia>>2]-+g[Ja>>2];g[hb>>2]=+g[Ia>>2]+ +g[Ja>>2];g[La>>2]=+g[Ha>>2]*+g[gk>>2]+ +g[Ka>>2]*+g[ik>>2];g[Nb>>2]=+g[gb>>2]*+g[ik>>2]-+g[hb>>2]*+g[gk>>2];g[nb>>2]=+g[Ha>>2]*+g[ik>>2]-+g[Ka>>2]*+g[gk>>2];g[ib>>2]=+g[gb>>2]*+g[gk>>2]+ +g[hb>>2]*+g[ik>>2];g[Qa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ra>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Jc>>2]=+g[Qa>>2]+ +g[Ra>>2];g[Ua>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Va>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ic>>2]=+g[Ua>>2]-+g[Va>>2];g[Sa>>2]=+g[Qa>>2]-+g[Ra>>2];g[Wa>>2]=+g[Ua>>2]+ +g[Va>>2];g[Kc>>2]=+g[fk>>2]*+g[Ic>>2]+ +g[hk>>2]*+g[Jc>>2];g[hf>>2]=+g[fk>>2]*+g[Jc>>2]-+g[hk>>2]*+g[Ic>>2];g[Sb>>2]=+g[c[n>>2]>>2];g[Tb>>2]=+g[c[p>>2]>>2];g[Ac>>2]=+g[Sb>>2]+ +g[Tb>>2];g[xc>>2]=+g[c[o>>2]>>2];g[yc>>2]=+g[c[m>>2]>>2];g[zc>>2]=+g[xc>>2]-+g[yc>>2];g[Ub>>2]=+g[Sb>>2]-+g[Tb>>2];g[hd>>2]=+g[yc>>2]+ +g[xc>>2];g[Bc>>2]=+g[u>>2]*+g[zc>>2]-+g[Vc>>2]*+g[Ac>>2];g[ne>>2]=+g[Vc>>2]*+g[zc>>2]+ +g[u>>2]*+g[Ac>>2];g[Za>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[_a>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[Sc>>2]=+g[Za>>2]+ +g[_a>>2];g[bb>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[cb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[Oc>>2]=+g[bb>>2]-+g[cb>>2];g[$a>>2]=+g[Za>>2]-+g[_a>>2];g[db>>2]=+g[bb>>2]+ +g[cb>>2];g[Tc>>2]=+g[Nc>>2]*+g[Oc>>2]+ +g[Rc>>2]*+g[Sc>>2];g[jf>>2]=+g[Nc>>2]*+g[Sc>>2]-+g[Rc>>2]*+g[Oc>>2];g[jb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[kb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Fc>>2]=+g[jb>>2]+ +g[kb>>2];g[Ob>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Pb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Dc>>2]=+g[Ob>>2]-+g[Pb>>2];g[lb>>2]=+g[jb>>2]-+g[kb>>2];g[Qb>>2]=+g[Ob>>2]+ +g[Pb>>2];g[Gc>>2]=+g[Cc>>2]*+g[Dc>>2]+ +g[Ec>>2]*+g[Fc>>2];g[lf>>2]=+g[Cc>>2]*+g[Fc>>2]-+g[Ec>>2]*+g[Dc>>2];g[Rb>>2]=+g[ib>>2]*+g[lb>>2]-+g[Nb>>2]*+g[Qb>>2];g[Vb>>2]=+g[Rb>>2]+ +g[Ub>>2];g[ph>>2]=+g[Ub>>2]-+g[Rb>>2];g[Wg>>2]=+g[Gc>>2]+ +g[Bc>>2];g[Xg>>2]=+g[hf>>2]-+g[jf>>2];g[Yg>>2]=+g[Wg>>2]-+g[Xg>>2];g[li>>2]=+g[Xg>>2]+ +g[Wg>>2];g[bh>>2]=+g[Tc>>2]-+g[Kc>>2];g[ch>>2]=+g[ne>>2]-+g[lf>>2];g[dh>>2]=+g[bh>>2]+ +g[ch>>2];g[Pi>>2]=+g[ch>>2]-+g[bh>>2];g[Hc>>2]=+g[Bc>>2]-+g[Gc>>2];g[Uc>>2]=+g[Kc>>2]+ +g[Tc>>2];g[Xb>>2]=+g[Hc>>2]-+g[Uc>>2];g[fg>>2]=+g[Uc>>2]+ +g[Hc>>2];g[id>>2]=+g[ib>>2]*+g[Qb>>2]+ +g[Nb>>2]*+g[lb>>2];g[jd>>2]=+g[hd>>2]+ +g[id>>2];g[Ih>>2]=+g[hd>>2]-+g[id>>2];g[kf>>2]=+g[hf>>2]+ +g[jf>>2];g[oe>>2]=+g[lf>>2]+ +g[ne>>2];g[pe>>2]=+g[kf>>2]+ +g[oe>>2];g[ig>>2]=+g[oe>>2]-+g[kf>>2];g[Xa>>2]=+g[Pa>>2]*+g[Sa>>2]-+g[Ta>>2]*+g[Wa>>2];g[eb>>2]=+g[Ya>>2]*+g[$a>>2]-+g[ab>>2]*+g[db>>2];g[fb>>2]=+g[Xa>>2]+ +g[eb>>2];g[Jh>>2]=+g[Xa>>2]-+g[eb>>2];g[kd>>2]=+g[Pa>>2]*+g[Wa>>2]+ +g[Ta>>2]*+g[Sa>>2];g[ld>>2]=+g[Ya>>2]*+g[db>>2]+ +g[ab>>2]*+g[$a>>2];g[md>>2]=+g[kd>>2]+ +g[ld>>2];g[qh>>2]=+g[kd>>2]-+g[ld>>2];g[Z>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[_>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[$>>2]=+g[Z>>2]-+g[_>>2];g[Zb>>2]=+g[Z>>2]+ +g[_>>2];g[aa>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ba>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ca>>2]=+g[aa>>2]+ +g[ba>>2];g[Yb>>2]=+g[aa>>2]-+g[ba>>2];g[Ea>>2]=+g[ea>>2]*+g[$>>2]-+g[ga>>2]*+g[ca>>2];g[af>>2]=+g[L>>2]*+g[Zb>>2]-+g[M>>2]*+g[Yb>>2];g[_b>>2]=+g[L>>2]*+g[Yb>>2]+ +g[M>>2]*+g[Zb>>2];g[od>>2]=+g[ea>>2]*+g[ca>>2]+ +g[ga>>2]*+g[$>>2];g[Db>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Eb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Fb>>2]=+g[Db>>2]-+g[Eb>>2];g[lc>>2]=+g[Db>>2]+ +g[Eb>>2];g[Hb>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Ib>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Jb>>2]=+g[Hb>>2]+ +g[Ib>>2];g[kc>>2]=+g[Hb>>2]-+g[Ib>>2];g[Kb>>2]=+g[Cb>>2]*+g[Fb>>2]-+g[Gb>>2]*+g[Jb>>2];g[ef>>2]=+g[ia>>2]*+g[lc>>2]-+g[la>>2]*+g[kc>>2];g[mc>>2]=+g[ia>>2]*+g[kc>>2]+ +g[la>>2]*+g[lc>>2];g[sd>>2]=+g[Cb>>2]*+g[Jb>>2]+ +g[Gb>>2]*+g[Fb>>2];g[Ma>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Na>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[mb>>2]=+g[Ma>>2]-+g[Na>>2];g[cc>>2]=+g[Ma>>2]+ +g[Na>>2];g[ob>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[pb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[qb>>2]=+g[ob>>2]+ +g[pb>>2];g[ac>>2]=+g[ob>>2]-+g[pb>>2];g[rb>>2]=+g[La>>2]*+g[mb>>2]-+g[nb>>2]*+g[qb>>2];g[bf>>2]=+g[$b>>2]*+g[cc>>2]-+g[bc>>2]*+g[ac>>2];g[dc>>2]=+g[$b>>2]*+g[ac>>2]+ +g[bc>>2]*+g[cc>>2];g[pd>>2]=+g[La>>2]*+g[qb>>2]+ +g[nb>>2]*+g[mb>>2];g[ub>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[vb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[wb>>2]=+g[ub>>2]-+g[vb>>2];g[ic>>2]=+g[ub>>2]+ +g[vb>>2];g[yb>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[zb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Ab>>2]=+g[yb>>2]+ +g[zb>>2];g[gc>>2]=+g[yb>>2]-+g[zb>>2];g[Bb>>2]=+g[tb>>2]*+g[wb>>2]-+g[xb>>2]*+g[Ab>>2];g[df>>2]=+g[fc>>2]*+g[ic>>2]-+g[hc>>2]*+g[gc>>2];g[jc>>2]=+g[fc>>2]*+g[gc>>2]+ +g[hc>>2]*+g[ic>>2];g[rd>>2]=+g[tb>>2]*+g[Ab>>2]+ +g[xb>>2]*+g[wb>>2];g[sb>>2]=+g[Ea>>2]+ +g[rb>>2];g[qd>>2]=+g[od>>2]+ +g[pd>>2];g[td>>2]=+g[rd>>2]+ +g[sd>>2];g[Lb>>2]=+g[Bb>>2]+ +g[Kb>>2];g[ec>>2]=+g[_b>>2]+ +g[dc>>2];g[nc>>2]=+g[jc>>2]+ +g[mc>>2];g[oc>>2]=+g[ec>>2]+ +g[nc>>2];g[hg>>2]=+g[nc>>2]-+g[ec>>2];g[Sg>>2]=+g[df>>2]-+g[ef>>2];g[Tg>>2]=+g[mc>>2]-+g[jc>>2];g[Ug>>2]=+g[Sg>>2]+ +g[Tg>>2];g[$g>>2]=+g[Sg>>2]-+g[Tg>>2];g[lh>>2]=+g[rd>>2]-+g[sd>>2];g[mh>>2]=+g[Bb>>2]-+g[Kb>>2];g[nh>>2]=+g[lh>>2]+ +g[mh>>2];g[Mh>>2]=+g[lh>>2]-+g[mh>>2];g[cf>>2]=+g[af>>2]+ +g[bf>>2];g[ff>>2]=+g[df>>2]+ +g[ef>>2];g[gf>>2]=+g[cf>>2]+ +g[ff>>2];g[eg>>2]=+g[ff>>2]-+g[cf>>2];g[Pg>>2]=+g[dc>>2]-+g[_b>>2];g[Qg>>2]=+g[af>>2]-+g[bf>>2];g[Rg>>2]=+g[Pg>>2]-+g[Qg>>2];g[_g>>2]=+g[Qg>>2]+ +g[Pg>>2];g[ih>>2]=+g[Ea>>2]-+g[rb>>2];g[jh>>2]=+g[od>>2]-+g[pd>>2];g[kh>>2]=+g[ih>>2]-+g[jh>>2];g[Lh>>2]=+g[jh>>2]+ +g[ih>>2];g[Eh>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Ni>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Zj>>2]=+g[Eh>>2]-+g[Ni>>2];g[rc>>2]=+g[Eh>>2]+ +g[Ni>>2];g[bk>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[ck>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[dk>>2]=+g[bk>>2]+ +g[ck>>2];g[qc>>2]=+g[bk>>2]-+g[ck>>2];g[ek>>2]=+g[vg>>2]*+g[Zj>>2]-+g[ak>>2]*+g[dk>>2];g[Ne>>2]=+g[Da>>2]*+g[rc>>2]-+g[ce>>2]*+g[qc>>2];g[sc>>2]=+g[Da>>2]*+g[qc>>2]+ +g[ce>>2]*+g[rc>>2];g[wd>>2]=+g[vg>>2]*+g[dk>>2]+ +g[ak>>2]*+g[Zj>>2];g[Lj>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[Mj>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[Nj>>2]=+g[Lj>>2]-+g[Mj>>2];g[cd>>2]=+g[Lj>>2]+ +g[Mj>>2];g[Rj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[Sj>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[Tj>>2]=+g[Rj>>2]+ +g[Sj>>2];g[bd>>2]=+g[Rj>>2]-+g[Sj>>2];g[Uj>>2]=+g[Kj>>2]*+g[Nj>>2]-+g[Qj>>2]*+g[Tj>>2];g[Re>>2]=+g[gk>>2]*+g[cd>>2]-+g[ik>>2]*+g[bd>>2];g[dd>>2]=+g[gk>>2]*+g[bd>>2]+ +g[ik>>2]*+g[cd>>2];g[Ad>>2]=+g[Kj>>2]*+g[Tj>>2]+ +g[Qj>>2]*+g[Nj>>2];g[kk>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[lk>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[mk>>2]=+g[kk>>2]-+g[lk>>2];g[Xc>>2]=+g[kk>>2]+ +g[lk>>2];g[ok>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[pk>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[qk>>2]=+g[ok>>2]+ +g[pk>>2];g[uc>>2]=+g[ok>>2]-+g[pk>>2];g[rk>>2]=+g[jk>>2]*+g[mk>>2]-+g[nk>>2]*+g[qk>>2];g[Oe>>2]=+g[tc>>2]*+g[Xc>>2]-+g[Wc>>2]*+g[uc>>2];g[Yc>>2]=+g[tc>>2]*+g[uc>>2]+ +g[Wc>>2]*+g[Xc>>2];g[xd>>2]=+g[jk>>2]*+g[qk>>2]+ +g[nk>>2]*+g[mk>>2];g[wk>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[zj>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Aj>>2]=+g[wk>>2]-+g[zj>>2];g[$c>>2]=+g[wk>>2]+ +g[zj>>2];g[Ej>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Fj>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Gj>>2]=+g[Ej>>2]+ +g[Fj>>2];g[_c>>2]=+g[Ej>>2]-+g[Fj>>2];g[Hj>>2]=+g[vk>>2]*+g[Aj>>2]-+g[Dj>>2]*+g[Gj>>2];g[Qe>>2]=+g[gb>>2]*+g[$c>>2]-+g[hb>>2]*+g[_c>>2];g[ad>>2]=+g[gb>>2]*+g[_c>>2]+ +g[hb>>2]*+g[$c>>2];g[zd>>2]=+g[vk>>2]*+g[Gj>>2]+ +g[Dj>>2]*+g[Aj>>2];g[sk>>2]=+g[ek>>2]+ +g[rk>>2];g[Vj>>2]=+g[Hj>>2]+ +g[Uj>>2];g[of>>2]=+g[sk>>2]-+g[Vj>>2];g[yd>>2]=+g[wd>>2]+ +g[xd>>2];g[Bd>>2]=+g[zd>>2]+ +g[Ad>>2];g[pf>>2]=+g[yd>>2]-+g[Bd>>2];g[Zc>>2]=+g[sc>>2]+ +g[Yc>>2];g[Ed>>2]=+g[ad>>2]+ +g[dd>>2];g[Fd>>2]=+g[Zc>>2]+ +g[Ed>>2];g[Zf>>2]=+g[Ed>>2]-+g[Zc>>2];g[xh>>2]=+g[Re>>2]-+g[Qe>>2];g[yh>>2]=+g[ad>>2]-+g[dd>>2];g[zh>>2]=+g[xh>>2]+ +g[yh>>2];g[Ig>>2]=+g[xh>>2]-+g[yh>>2];g[Pe>>2]=+g[Ne>>2]+ +g[Oe>>2];g[Se>>2]=+g[Qe>>2]+ +g[Re>>2];g[Te>>2]=+g[Pe>>2]+ +g[Se>>2];g[ag>>2]=+g[Se>>2]-+g[Pe>>2];g[yg>>2]=+g[wd>>2]-+g[xd>>2];g[zg>>2]=+g[Hj>>2]-+g[Uj>>2];g[Ag>>2]=+g[yg>>2]+ +g[zg>>2];g[Vh>>2]=+g[yg>>2]-+g[zg>>2];g[uh>>2]=+g[Yc>>2]-+g[sc>>2];g[vh>>2]=+g[Ne>>2]-+g[Oe>>2];g[wh>>2]=+g[uh>>2]-+g[vh>>2];g[Hg>>2]=+g[vh>>2]+ +g[uh>>2];g[Wf>>2]=+g[ek>>2]-+g[rk>>2];g[wg>>2]=+g[zd>>2]-+g[Ad>>2];g[xg>>2]=+g[Wf>>2]-+g[wg>>2];g[Uh>>2]=+g[Wf>>2]+ +g[wg>>2];g[w>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[x>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[Nd>>2]=+g[w>>2]+ +g[x>>2];g[C>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[D>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[E>>2]=+g[C>>2]+ +g[D>>2];g[Jd>>2]=+g[C>>2]-+g[D>>2];g[da>>2]=+g[v>>2]*+g[y>>2]-+g[B>>2]*+g[E>>2];g[Ue>>2]=+g[Id>>2]*+g[Nd>>2]-+g[Md>>2]*+g[Jd>>2];g[Od>>2]=+g[Id>>2]*+g[Jd>>2]+ +g[Md>>2]*+g[Nd>>2];g[Dd>>2]=+g[v>>2]*+g[E>>2]+ +g[B>>2]*+g[y>>2];g[O>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[P>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[Q>>2]=+g[O>>2]-+g[P>>2];g[$d>>2]=+g[O>>2]+ +g[P>>2];g[S>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[T>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[U>>2]=+g[S>>2]+ +g[T>>2];g[Zd>>2]=+g[S>>2]-+g[T>>2];g[V>>2]=+g[N>>2]*+g[Q>>2]-+g[R>>2]*+g[U>>2];g[Ye>>2]=+g[Yd>>2]*+g[$d>>2]-+g[_d>>2]*+g[Zd>>2];g[ae>>2]=+g[Yd>>2]*+g[Zd>>2]+ +g[_d>>2]*+g[$d>>2];g[ge>>2]=+g[N>>2]*+g[U>>2]+ +g[R>>2]*+g[Q>>2];g[na>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[oa>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[pa>>2]=+g[na>>2]-+g[oa>>2];g[Sd>>2]=+g[na>>2]+ +g[oa>>2];g[ra>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[sa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ta>>2]=+g[ra>>2]+ +g[sa>>2];g[Qd>>2]=+g[ra>>2]-+g[sa>>2];g[ua>>2]=+g[ma>>2]*+g[pa>>2]-+g[qa>>2]*+g[ta>>2];g[Ve>>2]=+g[Pd>>2]*+g[Sd>>2]-+g[Rd>>2]*+g[Qd>>2];g[Td>>2]=+g[Pd>>2]*+g[Qd>>2]+ +g[Rd>>2]*+g[Sd>>2];g[de>>2]=+g[ma>>2]*+g[ta>>2]+ +g[qa>>2]*+g[pa>>2];g[za>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Aa>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ba>>2]=+g[za>>2]-+g[Aa>>2];g[Wd>>2]=+g[za>>2]+ +g[Aa>>2];g[H>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[I>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[J>>2]=+g[H>>2]+ +g[I>>2];g[Vd>>2]=+g[H>>2]-+g[I>>2];g[K>>2]=+g[ya>>2]*+g[Ba>>2]-+g[G>>2]*+g[J>>2];g[Xe>>2]=+g[Ha>>2]*+g[Wd>>2]-+g[Ka>>2]*+g[Vd>>2];g[Xd>>2]=+g[Ha>>2]*+g[Vd>>2]+ +g[Ka>>2]*+g[Wd>>2];g[fe>>2]=+g[ya>>2]*+g[J>>2]+ +g[G>>2]*+g[Ba>>2];g[va>>2]=+g[da>>2]+ +g[ua>>2];g[W>>2]=+g[K>>2]+ +g[V>>2];g[sf>>2]=+g[va>>2]-+g[W>>2];g[ee>>2]=+g[Dd>>2]+ +g[de>>2];g[he>>2]=+g[fe>>2]+ +g[ge>>2];g[rf>>2]=+g[ee>>2]-+g[he>>2];g[Ud>>2]=+g[Od>>2]+ +g[Td>>2];g[be>>2]=+g[Xd>>2]+ +g[ae>>2];g[ed>>2]=+g[Ud>>2]+ +g[be>>2];g[bg>>2]=+g[be>>2]-+g[Ud>>2];g[Kg>>2]=+g[Ue>>2]-+g[Ve>>2];g[Lg>>2]=+g[ae>>2]-+g[Xd>>2];g[Mg>>2]=+g[Kg>>2]+ +g[Lg>>2];g[hi>>2]=+g[Kg>>2]-+g[Lg>>2];g[We>>2]=+g[Ue>>2]+ +g[Ve>>2];g[Ze>>2]=+g[Xe>>2]+ +g[Ye>>2];g[_e>>2]=+g[We>>2]+ +g[Ze>>2];g[_f>>2]=+g[We>>2]-+g[Ze>>2];g[Fg>>2]=+g[Dd>>2]-+g[de>>2];g[eh>>2]=+g[K>>2]-+g[V>>2];g[fh>>2]=+g[Fg>>2]+ +g[eh>>2];g[Yh>>2]=+g[Fg>>2]-+g[eh>>2];g[Bh>>2]=+g[Td>>2]-+g[Od>>2];g[Ch>>2]=+g[Xe>>2]-+g[Ye>>2];g[Dh>>2]=+g[Bh>>2]-+g[Ch>>2];g[ei>>2]=+g[Bh>>2]+ +g[Ch>>2];g[Cg>>2]=+g[da>>2]-+g[ua>>2];g[Dg>>2]=+g[fe>>2]-+g[ge>>2];g[Eg>>2]=+g[Cg>>2]-+g[Dg>>2];g[Xh>>2]=+g[Cg>>2]+ +g[Dg>>2];g[Wj>>2]=+g[sk>>2]+ +g[Vj>>2];g[X>>2]=+g[va>>2]+ +g[W>>2];g[Y>>2]=+g[Wj>>2]+ +g[X>>2];g[Fe>>2]=+g[Wj>>2]-+g[X>>2];g[xe>>2]=+g[_e>>2]-+g[Te>>2];g[ye>>2]=+g[Fd>>2]-+g[ed>>2];g[ze>>2]=+g[xe>>2]+ +g[ye>>2];g[Je>>2]=+g[xe>>2]-+g[ye>>2];g[Ae>>2]=+g[oc>>2]+ +g[Xb>>2];g[Be>>2]=+g[pe>>2]-+g[gf>>2];g[Ce>>2]=+g[Ae>>2]-+g[Be>>2];g[Ke>>2]=+g[Ae>>2]+ +g[Be>>2];g[Oa>>2]=+g[sb>>2]+ +g[Lb>>2];g[Wb>>2]=+g[fb>>2]+ +g[Vb>>2];g[vc>>2]=+g[Oa>>2]+ +g[Wb>>2];g[ve>>2]=+g[Wb>>2]-+g[Oa>>2];g[pc>>2]=+g[Xb>>2]-+g[oc>>2];g[fd>>2]=+g[Fd>>2]+ +g[ed>>2];g[gd>>2]=+g[pc>>2]-+g[fd>>2];g[le>>2]=+g[fd>>2]+ +g[pc>>2];g[Cd>>2]=+g[yd>>2]+ +g[Bd>>2];g[ie>>2]=+g[ee>>2]+ +g[he>>2];g[je>>2]=+g[Cd>>2]+ +g[ie>>2];g[ue>>2]=+g[ie>>2]-+g[Cd>>2];g[$e>>2]=+g[Te>>2]+ +g[_e>>2];g[qe>>2]=+g[gf>>2]+ +g[pe>>2];g[re>>2]=+g[$e>>2]-+g[qe>>2];g[te>>2]=+g[$e>>2]+ +g[qe>>2];g[nd>>2]=+g[jd>>2]+ +g[md>>2];g[ud>>2]=+g[qd>>2]+ +g[td>>2];g[vd>>2]=+g[nd>>2]+ +g[ud>>2];g[Ee>>2]=+g[nd>>2]-+g[ud>>2];g[wc>>2]=+g[Y>>2]+ +g[vc>>2];g[c[n>>2]>>2]=(+g[wc>>2]+ +g[gd>>2])*.5;g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=(+g[gd>>2]-+g[wc>>2])*.5;g[se>>2]=+g[vd>>2]+ +g[je>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=(+g[se>>2]-+g[te>>2])*.5;g[c[m>>2]>>2]=(+g[se>>2]+ +g[te>>2])*.5;g[ke>>2]=+g[vd>>2]-+g[je>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=(+g[ke>>2]-+g[le>>2])*.5;g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2]=(+g[ke>>2]+ +g[le>>2])*.5;g[me>>2]=+g[vc>>2]-+g[Y>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=(+g[me>>2]+ +g[re>>2])*.5;g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=(+g[re>>2]-+g[me>>2])*.5;g[we>>2]=(+g[ue>>2]+ +g[ve>>2])*.5;g[De>>2]=(+g[ze>>2]+ +g[Ce>>2])*.3535533845424652;g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[we>>2]+ +g[De>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[De>>2]-+g[we>>2];g[Me>>2]=(+g[Ee>>2]+ +g[Fe>>2])*.5;g[nf>>2]=(+g[Je>>2]+ +g[Ke>>2])*.3535533845424652;g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Me>>2]-+g[nf>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Me>>2]+ +g[nf>>2];g[Ge>>2]=(+g[Ee>>2]-+g[Fe>>2])*.5;g[He>>2]=(+g[Ce>>2]-+g[ze>>2])*.3535533845424652;g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ge>>2]-+g[He>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Ge>>2]+ +g[He>>2];g[Ie>>2]=(+g[ve>>2]-+g[ue>>2])*.5;g[Le>>2]=(+g[Je>>2]-+g[Ke>>2])*.3535533845424652;g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Ie>>2]+ +g[Le>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Le>>2]-+g[Ie>>2];g[qf>>2]=+g[of>>2]-+g[pf>>2];g[tf>>2]=+g[rf>>2]+ +g[sf>>2];g[uf>>2]=(+g[qf>>2]+ +g[tf>>2])*.3535533845424652;g[Nf>>2]=(+g[qf>>2]-+g[tf>>2])*.3535533845424652;g[Ff>>2]=+g[_f>>2]-+g[Zf>>2];g[Gf>>2]=+g[bg>>2]-+g[ag>>2];g[Hf>>2]=+g[Ff>>2]*.4619397521018982+ +g[Gf>>2]*.19134171307086945;g[Rf>>2]=+g[Ff>>2]*.19134171307086945-+g[Gf>>2]*.4619397521018982;g[If>>2]=+g[fg>>2]-+g[eg>>2];g[Jf>>2]=+g[ig>>2]-+g[hg>>2];g[Kf>>2]=+g[If>>2]*.19134171307086945-+g[Jf>>2]*.4619397521018982;g[Sf>>2]=+g[If>>2]*.4619397521018982+ +g[Jf>>2]*.19134171307086945;g[vf>>2]=+g[td>>2]-+g[qd>>2];g[wf>>2]=+g[Vb>>2]-+g[fb>>2];g[Xf>>2]=(+g[vf>>2]+ +g[wf>>2])*.5;g[Df>>2]=(+g[wf>>2]-+g[vf>>2])*.5;g[$f>>2]=+g[Zf>>2]+ +g[_f>>2];g[cg>>2]=+g[ag>>2]+ +g[bg>>2];g[dg>>2]=+g[$f>>2]*.19134171307086945+ +g[cg>>2]*.4619397521018982;g[xf>>2]=+g[$f>>2]*.4619397521018982-+g[cg>>2]*.19134171307086945;g[mg>>2]=+g[jd>>2]-+g[md>>2];g[ng>>2]=+g[sb>>2]-+g[Lb>>2];g[og>>2]=(+g[mg>>2]+ +g[ng>>2])*.5;g[Mf>>2]=(+g[mg>>2]-+g[ng>>2])*.5;g[pg>>2]=+g[pf>>2]+ +g[of>>2];g[qg>>2]=+g[rf>>2]-+g[sf>>2];g[rg>>2]=(+g[pg>>2]+ +g[qg>>2])*.3535533845424652;g[Cf>>2]=(+g[qg>>2]-+g[pg>>2])*.3535533845424652;g[gg>>2]=+g[eg>>2]+ +g[fg>>2];g[jg>>2]=+g[hg>>2]+ +g[ig>>2];g[kg>>2]=+g[gg>>2]*.4619397521018982-+g[jg>>2]*.19134171307086945;g[yf>>2]=+g[gg>>2]*.19134171307086945+ +g[jg>>2]*.4619397521018982;g[Yf>>2]=+g[uf>>2]+ +g[Xf>>2];g[lg>>2]=+g[dg>>2]+ +g[kg>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Yf>>2]+ +g[lg>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[lg>>2]-+g[Yf>>2];g[Af>>2]=+g[og>>2]+ +g[rg>>2];g[Bf>>2]=+g[xf>>2]+ +g[yf>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Af>>2]-+g[Bf>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Af>>2]+ +g[Bf>>2];g[sg>>2]=+g[og>>2]-+g[rg>>2];g[tg>>2]=+g[kg>>2]-+g[dg>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[sg>>2]-+g[tg>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[sg>>2]+ +g[tg>>2];g[ug>>2]=+g[Xf>>2]-+g[uf>>2];g[zf>>2]=+g[xf>>2]-+g[yf>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[ug>>2]+ +g[zf>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[zf>>2]-+g[ug>>2];g[Ef>>2]=+g[Cf>>2]+ +g[Df>>2];g[Lf>>2]=+g[Hf>>2]+ +g[Kf>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Ef>>2]+ +g[Lf>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Lf>>2]-+g[Ef>>2];g[Uf>>2]=+g[Mf>>2]+ +g[Nf>>2];g[Vf>>2]=+g[Rf>>2]+ +g[Sf>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Uf>>2]-+g[Vf>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Uf>>2]+ +g[Vf>>2];g[Of>>2]=+g[Mf>>2]-+g[Nf>>2];g[Pf>>2]=+g[Kf>>2]-+g[Hf>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Of>>2]-+g[Pf>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Of>>2]+ +g[Pf>>2];g[Qf>>2]=+g[Df>>2]-+g[Cf>>2];g[Tf>>2]=+g[Rf>>2]-+g[Sf>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Qf>>2]+ +g[Tf>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Tf>>2]-+g[Qf>>2];g[Bg>>2]=+g[xg>>2]*.4619397521018982-+g[Ag>>2]*.19134171307086945;g[gh>>2]=+g[Eg>>2]*.4619397521018982+ +g[fh>>2]*.19134171307086945;g[hh>>2]=+g[Bg>>2]+ +g[gh>>2];g[Ji>>2]=+g[Bg>>2]-+g[gh>>2];g[oh>>2]=(+g[kh>>2]+ +g[nh>>2])*.3535533845424652;g[rh>>2]=(+g[ph>>2]-+g[qh>>2])*.5;g[sh>>2]=+g[oh>>2]+ +g[rh>>2];g[zi>>2]=+g[rh>>2]-+g[oh>>2];g[ni>>2]=+g[xg>>2]*.19134171307086945+ +g[Ag>>2]*.4619397521018982;g[oi>>2]=+g[fh>>2]*.4619397521018982-+g[Eg>>2]*.19134171307086945;g[pi>>2]=+g[ni>>2]+ +g[oi>>2];g[yi>>2]=+g[oi>>2]-+g[ni>>2];g[Kh>>2]=(+g[Ih>>2]+ +g[Jh>>2])*.5;g[Nh>>2]=(+g[Lh>>2]+ +g[Mh>>2])*.3535533845424652;g[Oh>>2]=+g[Kh>>2]+ +g[Nh>>2];g[Ii>>2]=+g[Kh>>2]-+g[Nh>>2];g[Vg>>2]=(+g[Rg>>2]+ +g[Ug>>2])*.7071067690849304;g[Zg>>2]=+g[Vg>>2]+ +g[Yg>>2];g[Ei>>2]=+g[Yg>>2]-+g[Vg>>2];g[ah>>2]=(+g[_g>>2]+ +g[$g>>2])*.7071067690849304;g[Fh>>2]=+g[ah>>2]+ +g[dh>>2];g[Fi>>2]=+g[dh>>2]-+g[ah>>2];g[Gh>>2]=+g[Zg>>2]*.49039262533187866-+g[Fh>>2]*.09754516184329987;g[Qh>>2]=+g[Ei>>2]*.41573479771614075+ +g[Fi>>2]*.27778512239456177;g[ui>>2]=+g[Zg>>2]*.09754516184329987+ +g[Fh>>2]*.49039262533187866;g[Gi>>2]=+g[Ei>>2]*.27778512239456177-+g[Fi>>2]*.41573479771614075;g[Ah>>2]=(+g[wh>>2]+ +g[zh>>2])*.7071067690849304;g[Gg>>2]=+g[Ah>>2]+ +g[Dh>>2];g[Bi>>2]=+g[Dh>>2]-+g[Ah>>2];g[Jg>>2]=(+g[Hg>>2]+ +g[Ig>>2])*.7071067690849304;g[Ng>>2]=+g[Jg>>2]+ +g[Mg>>2];g[Ci>>2]=+g[Mg>>2]-+g[Jg>>2];g[Og>>2]=+g[Gg>>2]*.49039262533187866+ +g[Ng>>2]*.09754516184329987;g[Ph>>2]=+g[Ci>>2]*.27778512239456177-+g[Bi>>2]*.41573479771614075;g[ti>>2]=+g[Ng>>2]*.49039262533187866-+g[Gg>>2]*.09754516184329987;g[Di>>2]=+g[Bi>>2]*.27778512239456177+ +g[Ci>>2]*.41573479771614075;g[th>>2]=+g[hh>>2]+ +g[sh>>2];g[Hh>>2]=+g[Og>>2]+ +g[Gh>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[th>>2]+ +g[Hh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Hh>>2]-+g[th>>2];g[wi>>2]=+g[Oh>>2]+ +g[pi>>2];g[xi>>2]=+g[ti>>2]+ +g[ui>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[wi>>2]-+g[xi>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[wi>>2]+ +g[xi>>2];g[qi>>2]=+g[Oh>>2]-+g[pi>>2];g[ri>>2]=+g[Gh>>2]-+g[Og>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[qi>>2]-+g[ri>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[qi>>2]+ +g[ri>>2];g[si>>2]=+g[sh>>2]-+g[hh>>2];g[vi>>2]=+g[ti>>2]-+g[ui>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[si>>2]+ +g[vi>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[vi>>2]-+g[si>>2];g[Ai>>2]=+g[yi>>2]+ +g[zi>>2];g[Hi>>2]=+g[Di>>2]+ +g[Gi>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Ai>>2]+ +g[Hi>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Hi>>2]-+g[Ai>>2];g[Sh>>2]=+g[Ii>>2]+ +g[Ji>>2];g[Th>>2]=+g[Ph>>2]+ +g[Qh>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Sh>>2]-+g[Th>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Sh>>2]+ +g[Th>>2];g[Ki>>2]=+g[Ii>>2]-+g[Ji>>2];g[Li>>2]=+g[Gi>>2]-+g[Di>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ki>>2]-+g[Li>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Ki>>2]+ +g[Li>>2];g[Mi>>2]=+g[zi>>2]-+g[yi>>2];g[Rh>>2]=+g[Ph>>2]-+g[Qh>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Mi>>2]+ +g[Rh>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Rh>>2]-+g[Mi>>2];g[Wh>>2]=+g[Uh>>2]*.19134171307086945-+g[Vh>>2]*.4619397521018982;g[Zh>>2]=+g[Xh>>2]*.19134171307086945+ +g[Yh>>2]*.4619397521018982;g[_h>>2]=+g[Wh>>2]+ +g[Zh>>2];g[tj>>2]=+g[Wh>>2]-+g[Zh>>2];g[$h>>2]=(+g[Mh>>2]-+g[Lh>>2])*.3535533845424652;g[ai>>2]=(+g[qh>>2]+ +g[ph>>2])*.5;g[bi>>2]=+g[$h>>2]+ +g[ai>>2];g[jj>>2]=+g[ai>>2]-+g[$h>>2];g[Wi>>2]=+g[Uh>>2]*.4619397521018982+ +g[Vh>>2]*.19134171307086945;g[Xi>>2]=+g[Yh>>2]*.19134171307086945-+g[Xh>>2]*.4619397521018982;g[$i>>2]=+g[Wi>>2]+ +g[Xi>>2];g[ij>>2]=+g[Xi>>2]-+g[Wi>>2];g[Ti>>2]=(+g[Ih>>2]-+g[Jh>>2])*.5;g[Ui>>2]=(+g[kh>>2]-+g[nh>>2])*.3535533845424652;g[Vi>>2]=+g[Ti>>2]+ +g[Ui>>2];g[sj>>2]=+g[Ti>>2]-+g[Ui>>2];g[ki>>2]=(+g[$g>>2]-+g[_g>>2])*.7071067690849304;g[mi>>2]=+g[ki>>2]+ +g[li>>2];g[oj>>2]=+g[li>>2]-+g[ki>>2];g[Oi>>2]=(+g[Rg>>2]-+g[Ug>>2])*.7071067690849304;g[Qi>>2]=+g[Oi>>2]+ +g[Pi>>2];g[pj>>2]=+g[Pi>>2]-+g[Oi>>2];g[Ri>>2]=+g[mi>>2]*.41573479771614075-+g[Qi>>2]*.27778512239456177;g[yj>>2]=+g[oj>>2]*.49039262533187866+ +g[pj>>2]*.09754516184329987;g[ej>>2]=+g[mi>>2]*.27778512239456177+ +g[Qi>>2]*.41573479771614075;g[qj>>2]=+g[oj>>2]*.09754516184329987-+g[pj>>2]*.49039262533187866;g[di>>2]=(+g[Ig>>2]-+g[Hg>>2])*.7071067690849304;g[fi>>2]=+g[di>>2]+ +g[ei>>2];g[lj>>2]=+g[ei>>2]-+g[di>>2];g[gi>>2]=(+g[wh>>2]-+g[zh>>2])*.7071067690849304;g[ii>>2]=+g[gi>>2]+ +g[hi>>2];g[mj>>2]=+g[hi>>2]-+g[gi>>2];g[ji>>2]=+g[fi>>2]*.41573479771614075+ +g[ii>>2]*.27778512239456177;g[xj>>2]=+g[mj>>2]*.09754516184329987-+g[lj>>2]*.49039262533187866;g[dj>>2]=+g[ii>>2]*.41573479771614075-+g[fi>>2]*.27778512239456177;g[nj>>2]=+g[lj>>2]*.09754516184329987+ +g[mj>>2]*.49039262533187866;g[ci>>2]=+g[_h>>2]+ +g[bi>>2];g[Si>>2]=+g[ji>>2]+ +g[Ri>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ci>>2]+ +g[Si>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Si>>2]-+g[ci>>2];g[gj>>2]=+g[Vi>>2]+ +g[$i>>2];g[hj>>2]=+g[dj>>2]+ +g[ej>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[gj>>2]-+g[hj>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[gj>>2]+ +g[hj>>2];g[aj>>2]=+g[Vi>>2]-+g[$i>>2];g[bj>>2]=+g[Ri>>2]-+g[ji>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[aj>>2]-+g[bj>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[aj>>2]+ +g[bj>>2];g[cj>>2]=+g[bi>>2]-+g[_h>>2];g[fj>>2]=+g[dj>>2]-+g[ej>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[cj>>2]+ +g[fj>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[fj>>2]-+g[cj>>2];g[kj>>2]=+g[ij>>2]+ +g[jj>>2];g[rj>>2]=+g[nj>>2]+ +g[qj>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[kj>>2]+ +g[rj>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[rj>>2]-+g[kj>>2];g[Zi>>2]=+g[sj>>2]+ +g[tj>>2];g[_i>>2]=+g[xj>>2]+ +g[yj>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Zi>>2]-+g[_i>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Zi>>2]+ +g[_i>>2];g[uj>>2]=+g[sj>>2]-+g[tj>>2];g[vj>>2]=+g[qj>>2]-+g[nj>>2];g[c[o>>2]>>2]=+g[uj>>2]-+g[vj>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[uj>>2]+ +g[vj>>2];g[wj>>2]=+g[jj>>2]-+g[ij>>2];g[Yi>>2]=+g[xj>>2]-+g[yj>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[wj>>2]+ +g[Yi>>2];g[c[p>>2]>>2]=+g[Yi>>2]-+g[wj>>2];c[xk>>2]=(c[xk>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+32;c[r>>2]=c[r>>2]^c[2998]}i=yk;return}function Tq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,41,4024,1);i=b;return}function Uq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0;da=i;i=i+192|0;m=da+184|0;n=da+180|0;o=da+176|0;p=da+172|0;q=da+168|0;r=da+164|0;ea=da+160|0;s=da+156|0;t=da+152|0;ca=da+144|0;u=da+140|0;w=da+136|0;v=da+132|0;x=da+128|0;y=da+124|0;C=da+120|0;K=da+116|0;V=da+112|0;Q=da+108|0;$=da+104|0;H=da+100|0;_=da+96|0;T=da+92|0;W=da+88|0;I=da+84|0;J=da+80|0;P=da+76|0;M=da+72|0;N=da+68|0;O=da+64|0;B=da+60|0;S=da+56|0;G=da+52|0;R=da+48|0;z=da+44|0;A=da+40|0;E=da+36|0;F=da+32|0;L=da+28|0;U=da+24|0;ba=da+20|0;D=da+16|0;X=da+12|0;Y=da+8|0;Z=da+4|0;aa=da;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[ea>>2]=j;c[s>>2]=k;c[t>>2]=l;g[da+148>>2]=.5;c[ca>>2]=c[ea>>2];c[q>>2]=(c[q>>2]|0)+((c[ea>>2]|0)-1<<2<<2);while(1){if((c[ca>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[q>>2]>>2];g[w>>2]=+g[(c[q>>2]|0)+4>>2];g[v>>2]=+g[(c[q>>2]|0)+8>>2];g[x>>2]=+g[(c[q>>2]|0)+12>>2];g[y>>2]=+g[u>>2]*+g[v>>2]+ +g[w>>2]*+g[x>>2];g[C>>2]=+g[u>>2]*+g[x>>2]-+g[w>>2]*+g[v>>2];g[I>>2]=+g[c[n>>2]>>2];g[J>>2]=+g[c[p>>2]>>2];g[P>>2]=+g[I>>2]+ +g[J>>2];g[M>>2]=+g[c[o>>2]>>2];g[N>>2]=+g[c[m>>2]>>2];g[O>>2]=+g[M>>2]-+g[N>>2];g[K>>2]=+g[I>>2]-+g[J>>2];g[V>>2]=+g[N>>2]+ +g[M>>2];g[Q>>2]=+g[u>>2]*+g[O>>2]-+g[w>>2]*+g[P>>2];g[$>>2]=+g[w>>2]*+g[O>>2]+ +g[u>>2]*+g[P>>2];g[z>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[A>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[S>>2]=+g[z>>2]+ +g[A>>2];g[E>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[F>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[G>>2]=+g[E>>2]+ +g[F>>2];g[R>>2]=+g[E>>2]-+g[F>>2];g[H>>2]=+g[y>>2]*+g[B>>2]-+g[C>>2]*+g[G>>2];g[_>>2]=+g[v>>2]*+g[S>>2]-+g[x>>2]*+g[R>>2];g[T>>2]=+g[v>>2]*+g[R>>2]+ +g[x>>2]*+g[S>>2];g[W>>2]=+g[y>>2]*+g[G>>2]+ +g[C>>2]*+g[B>>2];g[L>>2]=+g[H>>2]+ +g[K>>2];g[U>>2]=+g[Q>>2]-+g[T>>2];g[c[n>>2]>>2]=(+g[L>>2]+ +g[U>>2])*.5;g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=(+g[U>>2]-+g[L>>2])*.5;g[ba>>2]=+g[V>>2]+ +g[W>>2];g[D>>2]=+g[_>>2]+ +g[$>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=(+g[ba>>2]-+g[D>>2])*.5;g[c[m>>2]>>2]=(+g[ba>>2]+ +g[D>>2])*.5;g[X>>2]=+g[V>>2]-+g[W>>2];g[Y>>2]=+g[T>>2]+ +g[Q>>2];g[c[o>>2]>>2]=(+g[X>>2]-+g[Y>>2])*.5;g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=(+g[X>>2]+ +g[Y>>2])*.5;g[Z>>2]=+g[K>>2]-+g[H>>2];g[aa>>2]=+g[_>>2]-+g[$>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=(+g[Z>>2]+ +g[aa>>2])*.5;g[c[p>>2]>>2]=(+g[aa>>2]-+g[Z>>2])*.5;c[ca>>2]=(c[ca>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+16}i=da;return}function Vq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,42,4072,1);i=b;return}function Wq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0;nb=i;i=i+448|0;m=nb+444|0;n=nb+440|0;o=nb+436|0;p=nb+432|0;q=nb+428|0;r=nb+424|0;ob=nb+420|0;s=nb+416|0;t=nb+412|0;mb=nb+400|0;u=nb+396|0;ia=nb+392|0;ga=nb+388|0;ja=nb+384|0;gb=nb+380|0;kb=nb+376|0;la=nb+372|0;Ra=nb+368|0;Wa=nb+364|0;Xa=nb+360|0;Ya=nb+356|0;Da=nb+352|0;ab=nb+348|0;Ba=nb+344|0;ha=nb+340|0;Qa=nb+336|0;ka=nb+332|0;Pa=nb+328|0;ta=nb+324|0;w=nb+320|0;qa=nb+316|0;x=nb+312|0;L=nb+308|0;M=nb+304|0;Y=nb+300|0;Aa=nb+296|0;Fa=nb+292|0;X=nb+288|0;Va=nb+284|0;eb=nb+280|0;A=nb+276|0;z=nb+272|0;I=nb+268|0;J=nb+264|0;U=nb+260|0;Ja=nb+256|0;Ma=nb+252|0;V=nb+248|0;ra=nb+244|0;sa=nb+240|0;za=nb+236|0;wa=nb+232|0;xa=nb+228|0;ya=nb+224|0;jb=nb+220|0;Ea=nb+216|0;pa=nb+212|0;Ca=nb+208|0;hb=nb+204|0;ib=nb+200|0;lb=nb+196|0;oa=nb+192|0;Oa=nb+188|0;Ia=nb+184|0;Ua=nb+180|0;Ha=nb+176|0;$a=nb+172|0;La=nb+168|0;db=nb+164|0;Ka=nb+160|0;ma=nb+156|0;na=nb+152|0;Sa=nb+148|0;Ta=nb+144|0;Za=nb+140|0;_a=nb+136|0;bb=nb+132|0;cb=nb+128|0;T=nb+124|0;da=nb+120|0;F=nb+116|0;H=nb+112|0;_=nb+108|0;ca=nb+104|0;ba=nb+100|0;G=nb+96|0;R=nb+92|0;S=nb+88|0;ea=nb+84|0;fa=nb+80|0;W=nb+76|0;Z=nb+72|0;$=nb+68|0;aa=nb+64|0;va=nb+60|0;E=nb+56|0;O=nb+52|0;Q=nb+48|0;v=nb+44|0;D=nb+40|0;C=nb+36|0;P=nb+32|0;fb=nb+28|0;ua=nb+24|0;K=nb+20|0;N=nb+16|0;Ga=nb+12|0;Na=nb+8|0;y=nb+4|0;B=nb;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[ob>>2]=j;c[s>>2]=k;c[t>>2]=l;g[nb+408>>2]=.3535533845424652;g[nb+404>>2]=.5;c[mb>>2]=c[ob>>2];c[q>>2]=(c[q>>2]|0)+(((c[ob>>2]|0)-1|0)*6<<2);while(1){if((c[mb>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[q>>2]>>2];g[ia>>2]=+g[(c[q>>2]|0)+4>>2];g[ga>>2]=+g[(c[q>>2]|0)+8>>2];g[ja>>2]=+g[(c[q>>2]|0)+12>>2];g[ha>>2]=+g[u>>2]*+g[ga>>2];g[Qa>>2]=+g[ia>>2]*+g[ga>>2];g[ka>>2]=+g[ia>>2]*+g[ja>>2];g[Pa>>2]=+g[u>>2]*+g[ja>>2];g[gb>>2]=+g[ha>>2]-+g[ka>>2];g[kb>>2]=+g[Pa>>2]+ +g[Qa>>2];g[la>>2]=+g[ha>>2]+ +g[ka>>2];g[Ra>>2]=+g[Pa>>2]-+g[Qa>>2];g[Wa>>2]=+g[(c[q>>2]|0)+16>>2];g[Xa>>2]=+g[(c[q>>2]|0)+20>>2];g[Ya>>2]=+g[u>>2]*+g[Wa>>2]+ +g[ia>>2]*+g[Xa>>2];g[Da>>2]=+g[la>>2]*+g[Xa>>2]-+g[Ra>>2]*+g[Wa>>2];g[ab>>2]=+g[u>>2]*+g[Xa>>2]-+g[ia>>2]*+g[Wa>>2];g[Ba>>2]=+g[la>>2]*+g[Wa>>2]+ +g[Ra>>2]*+g[Xa>>2];g[ra>>2]=+g[c[n>>2]>>2];g[sa>>2]=+g[c[p>>2]>>2];g[za>>2]=+g[ra>>2]+ +g[sa>>2];g[wa>>2]=+g[c[o>>2]>>2];g[xa>>2]=+g[c[m>>2]>>2];g[ya>>2]=+g[wa>>2]-+g[xa>>2];g[hb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ib>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[jb>>2]=+g[hb>>2]-+g[ib>>2];g[Ea>>2]=+g[hb>>2]+ +g[ib>>2];g[lb>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[oa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[pa>>2]=+g[lb>>2]+ +g[oa>>2];g[Ca>>2]=+g[lb>>2]-+g[oa>>2];g[ta>>2]=+g[ra>>2]-+g[sa>>2];g[w>>2]=+g[xa>>2]+ +g[wa>>2];g[qa>>2]=+g[gb>>2]*+g[jb>>2]-+g[kb>>2]*+g[pa>>2];g[x>>2]=+g[gb>>2]*+g[pa>>2]+ +g[kb>>2]*+g[jb>>2];g[L>>2]=+g[Ba>>2]*+g[Ea>>2]-+g[Da>>2]*+g[Ca>>2];g[M>>2]=+g[ia>>2]*+g[ya>>2]+ +g[u>>2]*+g[za>>2];g[Y>>2]=+g[M>>2]-+g[L>>2];g[Aa>>2]=+g[u>>2]*+g[ya>>2]-+g[ia>>2]*+g[za>>2];g[Fa>>2]=+g[Ba>>2]*+g[Ca>>2]+ +g[Da>>2]*+g[Ea>>2];g[X>>2]=+g[Fa>>2]+ +g[Aa>>2];g[ma>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[na>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Oa>>2]=+g[ma>>2]-+g[na>>2];g[Ia>>2]=+g[ma>>2]+ +g[na>>2];g[Sa>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Ta>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Ua>>2]=+g[Sa>>2]+ +g[Ta>>2];g[Ha>>2]=+g[Sa>>2]-+g[Ta>>2];g[Za>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[_a>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[$a>>2]=+g[Za>>2]-+g[_a>>2];g[La>>2]=+g[Za>>2]+ +g[_a>>2];g[bb>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[cb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[db>>2]=+g[bb>>2]+ +g[cb>>2];g[Ka>>2]=+g[bb>>2]-+g[cb>>2];g[Va>>2]=+g[la>>2]*+g[Oa>>2]-+g[Ra>>2]*+g[Ua>>2];g[eb>>2]=+g[Ya>>2]*+g[$a>>2]-+g[ab>>2]*+g[db>>2];g[A>>2]=+g[Ya>>2]*+g[db>>2]+ +g[ab>>2]*+g[$a>>2];g[z>>2]=+g[la>>2]*+g[Ua>>2]+ +g[Ra>>2]*+g[Oa>>2];g[I>>2]=+g[ga>>2]*+g[Ia>>2]-+g[ja>>2]*+g[Ha>>2];g[J>>2]=+g[Wa>>2]*+g[La>>2]-+g[Xa>>2]*+g[Ka>>2];g[U>>2]=+g[J>>2]-+g[I>>2];g[Ja>>2]=+g[ga>>2]*+g[Ha>>2]+ +g[ja>>2]*+g[Ia>>2];g[Ma>>2]=+g[Wa>>2]*+g[Ka>>2]+ +g[Xa>>2]*+g[La>>2];g[V>>2]=+g[Ja>>2]-+g[Ma>>2];g[R>>2]=+g[ta>>2]-+g[qa>>2];g[S>>2]=+g[z>>2]-+g[A>>2];g[T>>2]=(+g[R>>2]-+g[S>>2])*.5;g[da>>2]=(+g[S>>2]+ +g[R>>2])*.5;g[ea>>2]=+g[U>>2]-+g[V>>2];g[fa>>2]=+g[X>>2]+ +g[Y>>2];g[F>>2]=(+g[ea>>2]-+g[fa>>2])*.3535533845424652;g[H>>2]=(+g[ea>>2]+ +g[fa>>2])*.3535533845424652;g[W>>2]=+g[U>>2]+ +g[V>>2];g[Z>>2]=+g[X>>2]-+g[Y>>2];g[_>>2]=(+g[W>>2]+ +g[Z>>2])*.3535533845424652;g[ca>>2]=(+g[Z>>2]-+g[W>>2])*.3535533845424652;g[$>>2]=+g[w>>2]-+g[x>>2];g[aa>>2]=+g[Va>>2]-+g[eb>>2];g[ba>>2]=(+g[$>>2]-+g[aa>>2])*.5;g[G>>2]=(+g[$>>2]+ +g[aa>>2])*.5;g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[T>>2]+ +g[_>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[G>>2]+ +g[H>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[_>>2]-+g[T>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[G>>2]-+g[H>>2];g[c[o>>2]>>2]=+g[ba>>2]-+g[ca>>2];g[c[p>>2]>>2]=+g[F>>2]-+g[da>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ba>>2]+ +g[ca>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[da>>2]+ +g[F>>2];g[fb>>2]=+g[Va>>2]+ +g[eb>>2];g[ua>>2]=+g[qa>>2]+ +g[ta>>2];g[va>>2]=+g[fb>>2]+ +g[ua>>2];g[E>>2]=+g[ua>>2]-+g[fb>>2];g[K>>2]=+g[I>>2]+ +g[J>>2];g[N>>2]=+g[L>>2]+ +g[M>>2];g[O>>2]=+g[K>>2]-+g[N>>2];g[Q>>2]=+g[K>>2]+ +g[N>>2];g[Ga>>2]=+g[Aa>>2]-+g[Fa>>2];g[Na>>2]=+g[Ja>>2]+ +g[Ma>>2];g[v>>2]=+g[Ga>>2]-+g[Na>>2];g[D>>2]=+g[Na>>2]+ +g[Ga>>2];g[y>>2]=+g[w>>2]+ +g[x>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[C>>2]=+g[y>>2]-+g[B>>2];g[P>>2]=+g[y>>2]+ +g[B>>2];g[c[n>>2]>>2]=(+g[va>>2]+ +g[v>>2])*.5;g[c[m>>2]>>2]=(+g[P>>2]+ +g[Q>>2])*.5;g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=(+g[v>>2]-+g[va>>2])*.5;g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=(+g[P>>2]-+g[Q>>2])*.5;g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=(+g[C>>2]-+g[D>>2])*.5;g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=(+g[O>>2]-+g[E>>2])*.5;g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=(+g[C>>2]+ +g[D>>2])*.5;g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=(+g[E>>2]+ +g[O>>2])*.5;c[mb>>2]=(c[mb>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+24;c[r>>2]=c[r>>2]^c[2998]}i=nb;return}function Xq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,43,4120,1);i=b;return}function Yq(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0;$b=i;i=i+624|0;m=$b+616|0;n=$b+612|0;o=$b+608|0;p=$b+604|0;q=$b+600|0;r=$b+596|0;ac=$b+592|0;s=$b+588|0;t=$b+584|0;_b=$b+560|0;Wb=$b+556|0;lb=$b+552|0;mb=$b+548|0;$=$b+544|0;aa=$b+540|0;La=$b+536|0;ca=$b+532|0;Ea=$b+528|0;Ka=$b+524|0;xb=$b+520|0;da=$b+516|0;ea=$b+512|0;ga=$b+508|0;ha=$b+504|0;ia=$b+500|0;ta=$b+496|0;wa=$b+492|0;xa=$b+488|0;Aa=$b+484|0;F=$b+480|0;G=$b+476|0;ja=$b+472|0;ka=$b+468|0;la=$b+464|0;Db=$b+460|0;Ja=$b+456|0;na=$b+452|0;K=$b+448|0;Va=$b+444|0;I=$b+440|0;E=$b+436|0;Ca=$b+432|0;Za=$b+428|0;Bb=$b+424|0;rb=$b+420|0;ya=$b+416|0;Mb=$b+412|0;ra=$b+408|0;kb=$b+404|0;va=$b+400|0;Vb=$b+396|0;sa=$b+392|0;x=$b+388|0;Ba=$b+384|0;fb=$b+380|0;ua=$b+376|0;wb=$b+372|0;za=$b+368|0;u=$b+364|0;Da=$b+360|0;D=$b+356|0;z=$b+352|0;A=$b+348|0;B=$b+344|0;y=$b+340|0;C=$b+336|0;Xa=$b+332|0;Ya=$b+328|0;ob=$b+324|0;$a=$b+320|0;Ab=$b+316|0;qb=$b+312|0;nb=$b+308|0;pb=$b+304|0;Hb=$b+300|0;jb=$b+296|0;Lb=$b+292|0;hb=$b+288|0;Fb=$b+284|0;Gb=$b+280|0;Jb=$b+276|0;Kb=$b+272|0;Eb=$b+268|0;Ib=$b+264|0;gb=$b+260|0;ib=$b+256|0;Qb=$b+252|0;zb=$b+248|0;Ub=$b+244|0;w=$b+240|0;Ob=$b+236|0;Pb=$b+232|0;Sb=$b+228|0;Tb=$b+224|0;Nb=$b+220|0;Rb=$b+216|0;yb=$b+212|0;v=$b+208|0;ab=$b+204|0;vb=$b+200|0;eb=$b+196|0;tb=$b+192|0;Yb=$b+188|0;Zb=$b+184|0;cb=$b+180|0;db=$b+176|0;Xb=$b+172|0;bb=$b+168|0;sb=$b+164|0;ub=$b+160|0;Cb=$b+156|0;J=$b+152|0;Wa=$b+148|0;_a=$b+144|0;Z=$b+140|0;fa=$b+136|0;Y=$b+132|0;Ga=$b+128|0;Ia=$b+124|0;ba=$b+120|0;Fa=$b+116|0;Ha=$b+112|0;_=$b+108|0;Oa=$b+104|0;Ma=$b+100|0;Na=$b+96|0;Sa=$b+92|0;Ua=$b+88|0;Qa=$b+84|0;Ra=$b+80|0;Ta=$b+76|0;Pa=$b+72|0;R=$b+68|0;ma=$b+64|0;Q=$b+60|0;V=$b+56|0;X=$b+52|0;T=$b+48|0;U=$b+44|0;W=$b+40|0;S=$b+36|0;H=$b+32|0;L=$b+28|0;M=$b+24|0;qa=$b+20|0;O=$b+16|0;oa=$b+12|0;pa=$b+8|0;P=$b+4|0;N=$b;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[ac>>2]=j;c[s>>2]=k;c[t>>2]=l;g[$b+580>>2]=.29389262199401855;g[$b+576>>2]=.4755282700061798;g[$b+572>>2]=.125;g[$b+568>>2]=.5;g[$b+564>>2]=.279508501291275;c[_b>>2]=c[ac>>2];c[q>>2]=(c[q>>2]|0)+(((c[ac>>2]|0)-1|0)*18<<2);while(1){if((c[_b>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[n>>2]>>2];g[Da>>2]=+g[c[p>>2]>>2];g[D>>2]=+g[u>>2]+ +g[Da>>2];g[z>>2]=+g[c[o>>2]>>2];g[A>>2]=+g[c[m>>2]>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[Va>>2]=+g[u>>2]-+g[Da>>2];g[I>>2]=+g[A>>2]+ +g[z>>2];g[y>>2]=+g[c[q>>2]>>2];g[C>>2]=+g[(c[q>>2]|0)+4>>2];g[E>>2]=+g[y>>2]*+g[B>>2]-+g[C>>2]*+g[D>>2];g[Ca>>2]=+g[C>>2]*+g[B>>2]+ +g[y>>2]*+g[D>>2];g[Xa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ya>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ob>>2]=+g[Xa>>2]-+g[Ya>>2];g[$a>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ab>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[qb>>2]=+g[$a>>2]+ +g[Ab>>2];g[Za>>2]=+g[Xa>>2]+ +g[Ya>>2];g[Bb>>2]=+g[$a>>2]-+g[Ab>>2];g[nb>>2]=+g[(c[q>>2]|0)+24>>2];g[pb>>2]=+g[(c[q>>2]|0)+28>>2];g[rb>>2]=+g[nb>>2]*+g[ob>>2]-+g[pb>>2]*+g[qb>>2];g[ya>>2]=+g[pb>>2]*+g[ob>>2]+ +g[nb>>2]*+g[qb>>2];g[Fb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Gb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Hb>>2]=+g[Fb>>2]-+g[Gb>>2];g[jb>>2]=+g[Fb>>2]+ +g[Gb>>2];g[Jb>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Kb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Lb>>2]=+g[Jb>>2]+ +g[Kb>>2];g[hb>>2]=+g[Jb>>2]-+g[Kb>>2];g[Eb>>2]=+g[(c[q>>2]|0)+8>>2];g[Ib>>2]=+g[(c[q>>2]|0)+12>>2];g[Mb>>2]=+g[Eb>>2]*+g[Hb>>2]-+g[Ib>>2]*+g[Lb>>2];g[ra>>2]=+g[Eb>>2]*+g[Lb>>2]+ +g[Ib>>2]*+g[Hb>>2];g[gb>>2]=+g[(c[q>>2]|0)+16>>2];g[ib>>2]=+g[(c[q>>2]|0)+20>>2];g[kb>>2]=+g[gb>>2]*+g[hb>>2]+ +g[ib>>2]*+g[jb>>2];g[va>>2]=+g[gb>>2]*+g[jb>>2]-+g[ib>>2]*+g[hb>>2];g[Ob>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Pb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Qb>>2]=+g[Ob>>2]+ +g[Pb>>2];g[zb>>2]=+g[Ob>>2]-+g[Pb>>2];g[Sb>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Tb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ub>>2]=+g[Sb>>2]-+g[Tb>>2];g[w>>2]=+g[Sb>>2]+ +g[Tb>>2];g[Nb>>2]=+g[(c[q>>2]|0)+52>>2];g[Rb>>2]=+g[(c[q>>2]|0)+48>>2];g[Vb>>2]=+g[Nb>>2]*+g[Qb>>2]+ +g[Rb>>2]*+g[Ub>>2];g[sa>>2]=+g[Rb>>2]*+g[Qb>>2]-+g[Nb>>2]*+g[Ub>>2];g[yb>>2]=+g[(c[q>>2]|0)+40>>2];g[v>>2]=+g[(c[q>>2]|0)+44>>2];g[x>>2]=+g[yb>>2]*+g[zb>>2]-+g[v>>2]*+g[w>>2];g[Ba>>2]=+g[v>>2]*+g[zb>>2]+ +g[yb>>2]*+g[w>>2];g[Yb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Zb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[ab>>2]=+g[Yb>>2]-+g[Zb>>2];g[vb>>2]=+g[Yb>>2]+ +g[Zb>>2];g[cb>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[db>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[eb>>2]=+g[cb>>2]+ +g[db>>2];g[tb>>2]=+g[cb>>2]-+g[db>>2];g[Xb>>2]=+g[(c[q>>2]|0)+56>>2];g[bb>>2]=+g[(c[q>>2]|0)+60>>2];g[fb>>2]=+g[Xb>>2]*+g[ab>>2]-+g[bb>>2]*+g[eb>>2];g[ua>>2]=+g[Xb>>2]*+g[eb>>2]+ +g[bb>>2]*+g[ab>>2];g[sb>>2]=+g[(c[q>>2]|0)+64>>2];g[ub>>2]=+g[(c[q>>2]|0)+68>>2];g[wb>>2]=+g[sb>>2]*+g[tb>>2]+ +g[ub>>2]*+g[vb>>2];g[za>>2]=+g[sb>>2]*+g[vb>>2]-+g[ub>>2]*+g[tb>>2];g[Wb>>2]=+g[Mb>>2]-+g[Vb>>2];g[lb>>2]=+g[fb>>2]-+g[kb>>2];g[mb>>2]=+g[Wb>>2]+ +g[lb>>2];g[$>>2]=+g[ya>>2]+ +g[za>>2];g[aa>>2]=+g[Ba>>2]+ +g[Ca>>2];g[La>>2]=+g[$>>2]+ +g[aa>>2];g[ca>>2]=+g[ra>>2]+ +g[sa>>2];g[Ea>>2]=+g[va>>2]+ +g[ua>>2];g[Ka>>2]=+g[ca>>2]+ +g[Ea>>2];g[xb>>2]=+g[rb>>2]-+g[wb>>2];g[da>>2]=+g[x>>2]+ +g[E>>2];g[ea>>2]=+g[xb>>2]+ +g[da>>2];g[ga>>2]=+g[E>>2]-+g[x>>2];g[ha>>2]=+g[rb>>2]+ +g[wb>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2];g[ta>>2]=+g[ra>>2]-+g[sa>>2];g[wa>>2]=+g[ua>>2]-+g[va>>2];g[xa>>2]=+g[ta>>2]+ +g[wa>>2];g[Aa>>2]=+g[ya>>2]-+g[za>>2];g[F>>2]=+g[Ba>>2]-+g[Ca>>2];g[G>>2]=+g[Aa>>2]+ +g[F>>2];g[ja>>2]=+g[Mb>>2]+ +g[Vb>>2];g[ka>>2]=+g[kb>>2]+ +g[fb>>2];g[la>>2]=+g[ja>>2]+ +g[ka>>2];g[Wa>>2]=+g[(c[q>>2]|0)+36>>2];g[_a>>2]=+g[(c[q>>2]|0)+32>>2];g[Cb>>2]=+g[Wa>>2]*+g[Za>>2]+ +g[_a>>2]*+g[Bb>>2];g[J>>2]=+g[_a>>2]*+g[Za>>2]-+g[Wa>>2]*+g[Bb>>2];g[Db>>2]=+g[Va>>2]-+g[Cb>>2];g[Ja>>2]=+g[I>>2]+ +g[J>>2];g[na>>2]=+g[Cb>>2]+ +g[Va>>2];g[K>>2]=+g[I>>2]-+g[J>>2];g[Z>>2]=(+g[mb>>2]-+g[ea>>2])*.279508501291275;g[fa>>2]=+g[mb>>2]+ +g[ea>>2];g[Y>>2]=+g[Db>>2]*.5-+g[fa>>2]*.125;g[ba>>2]=+g[$>>2]-+g[aa>>2];g[Fa>>2]=+g[ca>>2]-+g[Ea>>2];g[Ga>>2]=+g[ba>>2]*.4755282700061798-+g[Fa>>2]*.29389262199401855;g[Ia>>2]=+g[Fa>>2]*.4755282700061798+ +g[ba>>2]*.29389262199401855;g[c[n>>2]>>2]=(+g[Db>>2]+ +g[fa>>2])*.5;g[Ha>>2]=+g[Z>>2]+ +g[Y>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ha>>2]+ +g[Ia>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ia>>2]-+g[Ha>>2];g[_>>2]=+g[Y>>2]-+g[Z>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[_>>2]+ +g[Ga>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Ga>>2]-+g[_>>2];g[Oa>>2]=(+g[Ka>>2]-+g[La>>2])*.279508501291275;g[Ma>>2]=+g[Ka>>2]+ +g[La>>2];g[Na>>2]=+g[Ja>>2]*.5-+g[Ma>>2]*.125;g[Qa>>2]=+g[xb>>2]-+g[da>>2];g[Ra>>2]=+g[Wb>>2]-+g[lb>>2];g[Sa>>2]=+g[Qa>>2]*.4755282700061798-+g[Ra>>2]*.29389262199401855;g[Ua>>2]=+g[Ra>>2]*.4755282700061798+ +g[Qa>>2]*.29389262199401855;g[c[m>>2]>>2]=(+g[Ja>>2]+ +g[Ma>>2])*.5;g[Ta>>2]=+g[Oa>>2]+ +g[Na>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ta>>2]-+g[Ua>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ua>>2]+ +g[Ta>>2];g[Pa>>2]=+g[Na>>2]-+g[Oa>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Pa>>2]-+g[Sa>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Sa>>2]+ +g[Pa>>2];g[R>>2]=(+g[la>>2]+ +g[ia>>2])*.279508501291275;g[ma>>2]=+g[ia>>2]-+g[la>>2];g[Q>>2]=+g[na>>2]*.5+ +g[ma>>2]*.125;g[T>>2]=+g[F>>2]-+g[Aa>>2];g[U>>2]=+g[ta>>2]-+g[wa>>2];g[V>>2]=+g[T>>2]*.29389262199401855-+g[U>>2]*.4755282700061798;g[X>>2]=+g[U>>2]*.29389262199401855+ +g[T>>2]*.4755282700061798;g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=(+g[ma>>2]-+g[na>>2])*.5;g[W>>2]=+g[Q>>2]-+g[R>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[W>>2]+ +g[X>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[X>>2]-+g[W>>2];g[S>>2]=+g[Q>>2]+ +g[R>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[S>>2]+ +g[V>>2];g[c[p>>2]>>2]=+g[V>>2]-+g[S>>2];g[H>>2]=(+g[xa>>2]-+g[G>>2])*.279508501291275;g[L>>2]=+g[xa>>2]+ +g[G>>2];g[M>>2]=+g[K>>2]*.5-+g[L>>2]*.125;g[oa>>2]=+g[ja>>2]-+g[ka>>2];g[pa>>2]=+g[ha>>2]+ +g[ga>>2];g[qa>>2]=+g[oa>>2]*.4755282700061798+ +g[pa>>2]*.29389262199401855;g[O>>2]=+g[pa>>2]*.4755282700061798-+g[oa>>2]*.29389262199401855;g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=(+g[K>>2]+ +g[L>>2])*.5;g[P>>2]=+g[M>>2]-+g[H>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[O>>2]+ +g[P>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[P>>2]-+g[O>>2];g[N>>2]=+g[H>>2]+ +g[M>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[qa>>2]+ +g[N>>2];g[c[o>>2]>>2]=+g[N>>2]-+g[qa>>2];c[_b>>2]=(c[_b>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+72;c[r>>2]=c[r>>2]^c[2998]}i=$b;return}function Zq(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,44,4168,1);i=b;return}function _q(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0;xc=i;i=i+720|0;m=xc+704|0;n=xc+700|0;o=xc+696|0;p=xc+692|0;q=xc+688|0;r=xc+684|0;yc=xc+680|0;s=xc+676|0;t=xc+672|0;wc=xc+656|0;ic=xc+652|0;wa=xc+648|0;ga=xc+644|0;ab=xc+640|0;fc=xc+636|0;ba=xc+632|0;za=xc+628|0;L=xc+624|0;ta=xc+620|0;hb=xc+616|0;Z=xc+612|0;bb=xc+608|0;sc=xc+604|0;Ba=xc+600|0;C=xc+596|0;Za=xc+592|0;Nb=xc+588|0;Fa=xc+584|0;G=xc+580|0;O=xc+576|0;x=xc+572|0;lb=xc+568|0;U=xc+564|0;_a=xc+560|0;gc=xc+556|0;hc=xc+552|0;na=xc+548|0;pa=xc+544|0;qa=xc+540|0;ra=xc+536|0;sb=xc+532|0;ia=xc+528|0;wb=xc+524|0;ka=xc+520|0;dc=xc+516|0;fa=xc+512|0;$b=xc+508|0;da=xc+504|0;E=xc+500|0;ea=xc+496|0;Da=xc+492|0;rb=xc+488|0;ub=xc+484|0;vb=xc+480|0;bc=xc+476|0;cc=xc+472|0;Zb=xc+468|0;_b=xc+464|0;xb=xc+460|0;xa=xc+456|0;ec=xc+452|0;ya=xc+448|0;u=xc+444|0;tb=xc+440|0;Yb=xc+436|0;ac=xc+432|0;la=xc+428|0;X=xc+424|0;sa=xc+420|0;Y=xc+416|0;ha=xc+412|0;ja=xc+408|0;ma=xc+404|0;oa=xc+400|0;nc=xc+396|0;Rb=xc+392|0;rc=xc+388|0;Tb=xc+384|0;yb=xc+380|0;Wb=xc+376|0;Cb=xc+372|0;v=xc+368|0;Lb=xc+364|0;z=xc+360|0;Hb=xc+356|0;B=xc+352|0;lc=xc+348|0;mc=xc+344|0;Ab=xc+340|0;Bb=xc+336|0;pc=xc+332|0;qc=xc+328|0;uc=xc+324|0;vc=xc+320|0;Jb=xc+316|0;Kb=xc+312|0;Fb=xc+308|0;Gb=xc+304|0;kc=xc+300|0;oc=xc+296|0;y=xc+292|0;A=xc+288|0;Db=xc+284|0;Ca=xc+280|0;Mb=xc+276|0;F=xc+272|0;tc=xc+268|0;zb=xc+264|0;Eb=xc+260|0;Ib=xc+256|0;Ub=xc+252|0;T=xc+248|0;w=xc+244|0;S=xc+240|0;Qb=xc+236|0;Sb=xc+232|0;Vb=xc+228|0;Xb=xc+224|0;Pb=xc+220|0;Ya=xc+216|0;db=xc+212|0;fb=xc+208|0;va=xc+204|0;J=xc+200|0;I=xc+196|0;eb=xc+192|0;jc=xc+188|0;Ob=xc+184|0;$a=xc+180|0;cb=xc+176|0;D=xc+172|0;ua=xc+168|0;Aa=xc+164|0;H=xc+160|0;M=xc+156|0;Ua=xc+152|0;jb=xc+148|0;Pa=xc+144|0;mb=xc+140|0;Oa=xc+136|0;P=xc+132|0;Va=xc+128|0;V=xc+124|0;Na=xc+120|0;ca=xc+116|0;Ja=xc+112|0;Ga=xc+108|0;Ka=xc+104|0;_=xc+100|0;Ma=xc+96|0;K=xc+92|0;ib=xc+88|0;kb=xc+84|0;N=xc+80|0;R=xc+76|0;aa=xc+72|0;Ea=xc+68|0;W=xc+64|0;Q=xc+60|0;$=xc+56|0;gb=xc+52|0;nb=xc+48|0;Ha=xc+44|0;Ia=xc+40|0;ob=xc+36|0;pb=xc+32|0;La=xc+28|0;Ta=xc+24|0;qb=xc+20|0;Qa=xc+16|0;Wa=xc+12|0;Xa=xc+8|0;Ra=xc+4|0;Sa=xc;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[yc>>2]=j;c[s>>2]=k;c[t>>2]=l;g[xc+668>>2]=.25;g[xc+664>>2]=.5;g[xc+660>>2]=.4330126941204071;c[wc>>2]=c[yc>>2];c[q>>2]=(c[q>>2]|0)+(((c[yc>>2]|0)-1|0)*22<<2);while(1){if((c[wc>>2]|0)>=(c[s>>2]|0))break;g[gc>>2]=+g[c[n>>2]>>2];g[hc>>2]=+g[c[p>>2]>>2];g[na>>2]=+g[gc>>2]+ +g[hc>>2];g[pa>>2]=+g[c[m>>2]>>2];g[qa>>2]=+g[c[o>>2]>>2];g[ra>>2]=+g[pa>>2]-+g[qa>>2];g[Da>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[rb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[sb>>2]=+g[Da>>2]-+g[rb>>2];g[ia>>2]=+g[Da>>2]+ +g[rb>>2];g[ub>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[vb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[wb>>2]=+g[ub>>2]+ +g[vb>>2];g[ka>>2]=+g[ub>>2]-+g[vb>>2];g[bc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[cc>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[dc>>2]=+g[bc>>2]+ +g[cc>>2];g[fa>>2]=+g[bc>>2]-+g[cc>>2];g[Zb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[_b>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[$b>>2]=+g[Zb>>2]-+g[_b>>2];g[da>>2]=+g[Zb>>2]+ +g[_b>>2];g[ic>>2]=+g[gc>>2]-+g[hc>>2];g[wa>>2]=+g[pa>>2]+ +g[qa>>2];g[E>>2]=+g[(c[q>>2]|0)+64>>2];g[ea>>2]=+g[(c[q>>2]|0)+68>>2];g[ga>>2]=+g[E>>2]*+g[da>>2]-+g[ea>>2]*+g[fa>>2];g[ab>>2]=+g[E>>2]*+g[fa>>2]+ +g[ea>>2]*+g[da>>2];g[u>>2]=+g[(c[q>>2]|0)+24>>2];g[tb>>2]=+g[(c[q>>2]|0)+28>>2];g[xb>>2]=+g[u>>2]*+g[sb>>2]-+g[tb>>2]*+g[wb>>2];g[xa>>2]=+g[u>>2]*+g[wb>>2]+ +g[tb>>2]*+g[sb>>2];g[Yb>>2]=+g[(c[q>>2]|0)+56>>2];g[ac>>2]=+g[(c[q>>2]|0)+60>>2];g[ec>>2]=+g[Yb>>2]*+g[$b>>2]-+g[ac>>2]*+g[dc>>2];g[ya>>2]=+g[Yb>>2]*+g[dc>>2]+ +g[ac>>2]*+g[$b>>2];g[fc>>2]=+g[xb>>2]+ +g[ec>>2];g[ba>>2]=(+g[ya>>2]-+g[xa>>2])*.4330126941204071;g[za>>2]=+g[xa>>2]+ +g[ya>>2];g[L>>2]=(+g[xb>>2]-+g[ec>>2])*.4330126941204071;g[ha>>2]=+g[(c[q>>2]|0)+32>>2];g[ja>>2]=+g[(c[q>>2]|0)+36>>2];g[la>>2]=+g[ha>>2]*+g[ia>>2]-+g[ja>>2]*+g[ka>>2];g[X>>2]=+g[ha>>2]*+g[ka>>2]+ +g[ja>>2]*+g[ia>>2];g[ma>>2]=+g[c[q>>2]>>2];g[oa>>2]=+g[(c[q>>2]|0)+4>>2];g[sa>>2]=+g[ma>>2]*+g[na>>2]-+g[oa>>2]*+g[ra>>2];g[Y>>2]=+g[ma>>2]*+g[ra>>2]+ +g[oa>>2]*+g[na>>2];g[ta>>2]=+g[la>>2]+ +g[sa>>2];g[hb>>2]=(+g[la>>2]-+g[sa>>2])*.4330126941204071;g[Z>>2]=(+g[X>>2]-+g[Y>>2])*.4330126941204071;g[bb>>2]=+g[X>>2]+ +g[Y>>2];g[lc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[mc>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[nc>>2]=+g[lc>>2]-+g[mc>>2];g[Rb>>2]=+g[lc>>2]+ +g[mc>>2];g[pc>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[qc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[rc>>2]=+g[pc>>2]+ +g[qc>>2];g[Tb>>2]=+g[pc>>2]-+g[qc>>2];g[uc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[vc>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[yb>>2]=+g[uc>>2]-+g[vc>>2];g[Wb>>2]=+g[uc>>2]+ +g[vc>>2];g[Ab>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Bb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Cb>>2]=+g[Ab>>2]+ +g[Bb>>2];g[v>>2]=+g[Ab>>2]-+g[Bb>>2];g[Jb>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Kb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Lb>>2]=+g[Jb>>2]+ +g[Kb>>2];g[z>>2]=+g[Kb>>2]-+g[Jb>>2];g[Fb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Gb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Hb>>2]=+g[Fb>>2]-+g[Gb>>2];g[B>>2]=+g[Fb>>2]+ +g[Gb>>2];g[kc>>2]=+g[(c[q>>2]|0)+40>>2];g[oc>>2]=+g[(c[q>>2]|0)+44>>2];g[sc>>2]=+g[kc>>2]*+g[nc>>2]-+g[oc>>2]*+g[rc>>2];g[Ba>>2]=+g[kc>>2]*+g[rc>>2]+ +g[oc>>2]*+g[nc>>2];g[y>>2]=+g[(c[q>>2]|0)+20>>2];g[A>>2]=+g[(c[q>>2]|0)+16>>2];g[C>>2]=+g[y>>2]*+g[z>>2]+ +g[A>>2]*+g[B>>2];g[Za>>2]=+g[A>>2]*+g[z>>2]-+g[y>>2]*+g[B>>2];g[tc>>2]=+g[(c[q>>2]|0)+72>>2];g[zb>>2]=+g[(c[q>>2]|0)+76>>2];g[Db>>2]=+g[tc>>2]*+g[yb>>2]-+g[zb>>2]*+g[Cb>>2];g[Ca>>2]=+g[tc>>2]*+g[Cb>>2]+ +g[zb>>2]*+g[yb>>2];g[Eb>>2]=+g[(c[q>>2]|0)+8>>2];g[Ib>>2]=+g[(c[q>>2]|0)+12>>2];g[Mb>>2]=+g[Eb>>2]*+g[Hb>>2]-+g[Ib>>2]*+g[Lb>>2];g[F>>2]=+g[Eb>>2]*+g[Lb>>2]+ +g[Ib>>2]*+g[Hb>>2];g[Nb>>2]=+g[Db>>2]+ +g[Mb>>2];g[Fa>>2]=(+g[F>>2]-+g[Ca>>2])*.4330126941204071;g[G>>2]=+g[Ca>>2]+ +g[F>>2];g[O>>2]=(+g[Db>>2]-+g[Mb>>2])*.4330126941204071;g[Qb>>2]=+g[(c[q>>2]|0)+48>>2];g[Sb>>2]=+g[(c[q>>2]|0)+52>>2];g[Ub>>2]=+g[Qb>>2]*+g[Rb>>2]-+g[Sb>>2]*+g[Tb>>2];g[T>>2]=+g[Qb>>2]*+g[Tb>>2]+ +g[Sb>>2]*+g[Rb>>2];g[Vb>>2]=+g[(c[q>>2]|0)+80>>2];g[Xb>>2]=+g[(c[q>>2]|0)+84>>2];g[w>>2]=+g[Vb>>2]*+g[Wb>>2]-+g[Xb>>2]*+g[v>>2];g[S>>2]=+g[Vb>>2]*+g[v>>2]+ +g[Xb>>2]*+g[Wb>>2];g[x>>2]=+g[Ub>>2]+ +g[w>>2];g[lb>>2]=(+g[w>>2]-+g[Ub>>2])*.4330126941204071;g[U>>2]=(+g[S>>2]-+g[T>>2])*.4330126941204071;g[_a>>2]=+g[T>>2]+ +g[S>>2];g[jc>>2]=+g[fc>>2]+ +g[ic>>2];g[Ob>>2]=+g[sc>>2]+ +g[Nb>>2];g[Pb>>2]=+g[jc>>2]-+g[Ob>>2];g[Ya>>2]=+g[Ob>>2]+ +g[jc>>2];g[$a>>2]=+g[Za>>2]-+g[_a>>2];g[cb>>2]=+g[ab>>2]+ +g[bb>>2];g[db>>2]=+g[$a>>2]-+g[cb>>2];g[fb>>2]=+g[$a>>2]+ +g[cb>>2];g[D>>2]=+g[x>>2]+ +g[C>>2];g[ua>>2]=+g[ga>>2]+ +g[ta>>2];g[va>>2]=+g[D>>2]-+g[ua>>2];g[J>>2]=+g[D>>2]+ +g[ua>>2];g[Aa>>2]=+g[wa>>2]+ +g[za>>2];g[H>>2]=+g[Ba>>2]+ +g[G>>2];g[I>>2]=+g[Aa>>2]+ +g[H>>2];g[eb>>2]=+g[Aa>>2]-+g[H>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=(+g[Pb>>2]+ +g[va>>2])*.5;g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=(+g[eb>>2]-+g[fb>>2])*.5;g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=(+g[va>>2]-+g[Pb>>2])*.5;g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=(+g[eb>>2]+ +g[fb>>2])*.5;g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=(+g[I>>2]-+g[J>>2])*.5;g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=(+g[db>>2]-+g[Ya>>2])*.5;g[c[m>>2]>>2]=(+g[I>>2]+ +g[J>>2])*.5;g[c[n>>2]>>2]=(+g[Ya>>2]+ +g[db>>2])*.5;g[K>>2]=+g[wa>>2]*.5-+g[za>>2]*.25;g[M>>2]=+g[K>>2]-+g[L>>2];g[Ua>>2]=+g[K>>2]+ +g[L>>2];g[ib>>2]=+g[bb>>2]*.25-+g[ab>>2]*.5;g[jb>>2]=+g[hb>>2]-+g[ib>>2];g[Pa>>2]=+g[hb>>2]+ +g[ib>>2];g[kb>>2]=+g[_a>>2]*.25+ +g[Za>>2]*.5;g[mb>>2]=+g[kb>>2]-+g[lb>>2];g[Oa>>2]=+g[lb>>2]+ +g[kb>>2];g[N>>2]=+g[Ba>>2]*.5-+g[G>>2]*.25;g[P>>2]=+g[N>>2]-+g[O>>2];g[Va>>2]=+g[N>>2]+ +g[O>>2];g[R>>2]=+g[C>>2]*.5-+g[x>>2]*.25;g[V>>2]=+g[R>>2]-+g[U>>2];g[Na>>2]=+g[U>>2]+ +g[R>>2];g[aa>>2]=+g[ic>>2]*.5-+g[fc>>2]*.25;g[ca>>2]=+g[aa>>2]-+g[ba>>2];g[Ja>>2]=+g[ba>>2]+ +g[aa>>2];g[Ea>>2]=+g[sc>>2]*.5-+g[Nb>>2]*.25;g[Ga>>2]=+g[Ea>>2]-+g[Fa>>2];g[Ka>>2]=+g[Fa>>2]+ +g[Ea>>2];g[W>>2]=+g[ga>>2]*.5-+g[ta>>2]*.25;g[_>>2]=+g[W>>2]-+g[Z>>2];g[Ma>>2]=+g[W>>2]+ +g[Z>>2];g[Q>>2]=+g[M>>2]+ +g[P>>2];g[$>>2]=+g[V>>2]+ +g[_>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Q>>2]-+g[$>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Q>>2]+ +g[$>>2];g[gb>>2]=+g[Ga>>2]+ +g[ca>>2];g[nb>>2]=+g[jb>>2]-+g[mb>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[gb>>2]+ +g[nb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[nb>>2]-+g[gb>>2];g[Ha>>2]=+g[ca>>2]-+g[Ga>>2];g[Ia>>2]=+g[_>>2]-+g[V>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Ha>>2]+ +g[Ia>>2];g[c[p>>2]>>2]=+g[Ia>>2]-+g[Ha>>2];g[ob>>2]=+g[M>>2]-+g[P>>2];g[pb>>2]=+g[mb>>2]+ +g[jb>>2];g[c[o>>2]>>2]=+g[ob>>2]-+g[pb>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[ob>>2]+ +g[pb>>2];g[La>>2]=+g[Ja>>2]-+g[Ka>>2];g[Ta>>2]=+g[Ma>>2]-+g[Na>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[La>>2]+ +g[Ta>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ta>>2]-+g[La>>2];g[qb>>2]=+g[Ua>>2]-+g[Va>>2];g[Qa>>2]=+g[Oa>>2]-+g[Pa>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[qb>>2]-+g[Qa>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[qb>>2]+ +g[Qa>>2];g[Wa>>2]=+g[Ua>>2]+ +g[Va>>2];g[Xa>>2]=+g[Na>>2]+ +g[Ma>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Wa>>2]-+g[Xa>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Wa>>2]+ +g[Xa>>2];g[Ra>>2]=+g[Ka>>2]+ +g[Ja>>2];g[Sa>>2]=+g[Oa>>2]+ +g[Pa>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ra>>2]+ +g[Sa>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Sa>>2]-+g[Ra>>2];c[wc>>2]=(c[wc>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+88;c[r>>2]=c[r>>2]^c[2998]}i=xc;return}function $q(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,45,4216,1);i=b;return}function ar(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0;Pd=i;i=i+1008|0;m=Pd+996|0;n=Pd+992|0;o=Pd+988|0;p=Pd+984|0;q=Pd+980|0;r=Pd+976|0;Qd=Pd+972|0;s=Pd+968|0;t=Pd+964|0;Od=Pd+944|0;E=Pd+940|0;rc=Pd+936|0;Fa=Pd+932|0;kb=Pd+928|0;ra=Pd+924|0;nc=Pd+920|0;Eb=Pd+916|0;Wb=Pd+912|0;kd=Pd+908|0;sc=Pd+904|0;Ia=Pd+900|0;lb=Pd+896|0;Ca=Pd+892|0;mc=Pd+888|0;Bb=Pd+884|0;kc=Pd+880|0;xd=Pd+876|0;eb=Pd+872|0;Ma=Pd+868|0;db=Pd+864|0;Q=Pd+860|0;Tb=Pd+856|0;ub=Pd+852|0;Pb=Pd+848|0;Sc=Pd+844|0;hb=Pd+840|0;nb=Pd+836|0;gb=Pd+832|0;$=Pd+828|0;Sb=Pd+824|0;xb=Pd+820|0;Qb=Pd+816|0;v=Pd+812|0;pa=Pd+808|0;z=Pd+804|0;na=Pd+800|0;D=Pd+796|0;ka=Pd+792|0;ia=Pd+788|0;ca=Pd+784|0;md=Pd+780|0;nd=Pd+776|0;x=Pd+772|0;y=Pd+768|0;B=Pd+764|0;C=Pd+760|0;ga=Pd+756|0;ha=Pd+752|0;A=Pd+748|0;Ea=Pd+744|0;ld=Pd+740|0;w=Pd+736|0;la=Pd+732|0;Db=Pd+728|0;qa=Pd+724|0;Cb=Pd+720|0;fa=Pd+716|0;ja=Pd+712|0;ma=Pd+708|0;oa=Pd+704|0;Xc=Pd+700|0;va=Pd+696|0;$c=Pd+692|0;ta=Pd+688|0;ed=Pd+684|0;Aa=Pd+680|0;id=Pd+676|0;ya=Pd+672|0;Vc=Pd+668|0;Wc=Pd+664|0;Zc=Pd+660|0;_c=Pd+656|0;cd=Pd+652|0;dd=Pd+648|0;gd=Pd+644|0;hd=Pd+640|0;ad=Pd+636|0;Ga=Pd+632|0;jd=Pd+628|0;Ha=Pd+624|0;Uc=Pd+620|0;Yc=Pd+616|0;bd=Pd+612|0;fd=Pd+608|0;wa=Pd+604|0;zb=Pd+600|0;Ba=Pd+596|0;Ab=Pd+592|0;sa=Pd+588|0;ua=Pd+584|0;xa=Pd+580|0;za=Pd+576|0;Kc=Pd+572|0;J=Pd+568|0;Oc=Pd+564|0;H=Pd+560|0;rd=Pd+556|0;O=Pd+552|0;vd=Pd+548|0;M=Pd+544|0;Da=Pd+540|0;Mb=Pd+536|0;Mc=Pd+532|0;Nc=Pd+528|0;pd=Pd+524|0;qd=Pd+520|0;td=Pd+516|0;ud=Pd+512|0;Pc=Pd+508|0;Ka=Pd+504|0;wd=Pd+500|0;La=Pd+496|0;u=Pd+492|0;Lc=Pd+488|0;od=Pd+484|0;sd=Pd+480|0;K=Pd+476|0;sb=Pd+472|0;P=Pd+468|0;tb=Pd+464|0;G=Pd+460|0;I=Pd+456|0;L=Pd+452|0;N=Pd+448|0;Bd=Pd+444|0;U=Pd+440|0;Fd=Pd+436|0;S=Pd+432|0;Kd=Pd+428|0;Z=Pd+424|0;Qc=Pd+420|0;X=Pd+416|0;zd=Pd+412|0;Ad=Pd+408|0;Dd=Pd+404|0;Ed=Pd+400|0;Id=Pd+396|0;Jd=Pd+392|0;Md=Pd+388|0;Nd=Pd+384|0;Gd=Pd+380|0;Na=Pd+376|0;Rc=Pd+372|0;mb=Pd+368|0;yd=Pd+364|0;Cd=Pd+360|0;Hd=Pd+356|0;Ld=Pd+352|0;V=Pd+348|0;vb=Pd+344|0;_=Pd+340|0;wb=Pd+336|0;R=Pd+332|0;T=Pd+328|0;W=Pd+324|0;Y=Pd+320|0;ea=Pd+316|0;rb=Pd+312|0;Gb=Pd+308|0;Ib=Pd+304|0;ba=Pd+300|0;qb=Pd+296|0;pb=Pd+292|0;Hb=Pd+288|0;Tc=Pd+284|0;da=Pd+280|0;yb=Pd+276|0;Fb=Pd+272|0;F=Pd+268|0;aa=Pd+264|0;Ja=Pd+260|0;ob=Pd+256|0;Lb=Pd+252|0;Za=Pd+248|0;Xa=Pd+244|0;bb=Pd+240|0;Qa=Pd+236|0;_a=Pd+232|0;Ta=Pd+228|0;$a=Pd+224|0;Jb=Pd+220|0;Kb=Pd+216|0;Va=Pd+212|0;Wa=Pd+208|0;Oa=Pd+204|0;Pa=Pd+200|0;Ra=Pd+196|0;Sa=Pd+192|0;Ua=Pd+188|0;cb=Pd+184|0;Ya=Pd+180|0;ab=Pd+176|0;Nb=Pd+172|0;Gc=Pd+168|0;ac=Pd+164|0;tc=Pd+160|0;jb=Pd+156|0;bc=Pd+152|0;_b=Pd+148|0;gc=Pd+144|0;wc=Pd+140|0;Fc=Pd+136|0;Vb=Pd+132|0;Ac=Pd+128|0;Xb=Pd+124|0;fc=Pd+120|0;pc=Pd+116|0;Bc=Pd+112|0;fb=Pd+108|0;ib=Pd+104|0;Rb=Pd+100|0;Ub=Pd+96|0;Yb=Pd+92|0;Zb=Pd+88|0;uc=Pd+84|0;vc=Pd+80|0;Ic=Pd+76|0;Jc=Pd+72|0;lc=Pd+68|0;oc=Pd+64|0;Ob=Pd+60|0;qc=Pd+56|0;Dc=Pd+52|0;Ec=Pd+48|0;xc=Pd+44|0;yc=Pd+40|0;zc=Pd+36|0;Cc=Pd+32|0;Hc=Pd+28|0;$b=Pd+24|0;ic=Pd+20|0;jc=Pd+16|0;cc=Pd+12|0;dc=Pd+8|0;ec=Pd+4|0;hc=Pd;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Qd>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Pd+960>>2]=.4619397521018982;g[Pd+956>>2]=.19134171307086945;g[Pd+952>>2]=.3535533845424652;g[Pd+948>>2]=.5;c[Od>>2]=c[Qd>>2];c[q>>2]=(c[q>>2]|0)+(((c[Qd>>2]|0)-1|0)*30<<2);while(1){if((c[Od>>2]|0)>=(c[s>>2]|0))break;g[md>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[nd>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[v>>2]=+g[md>>2]-+g[nd>>2];g[pa>>2]=+g[md>>2]+ +g[nd>>2];g[x>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[y>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[na>>2]=+g[x>>2]-+g[y>>2];g[B>>2]=+g[c[n>>2]>>2];g[C>>2]=+g[c[p>>2]>>2];g[D>>2]=+g[B>>2]-+g[C>>2];g[ka>>2]=+g[B>>2]+ +g[C>>2];g[ga>>2]=+g[c[o>>2]>>2];g[ha>>2]=+g[c[m>>2]>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2];g[ca>>2]=+g[ha>>2]+ +g[ga>>2];g[ld>>2]=+g[(c[q>>2]|0)+56>>2];g[w>>2]=+g[(c[q>>2]|0)+60>>2];g[A>>2]=+g[ld>>2]*+g[v>>2]-+g[w>>2]*+g[z>>2];g[Ea>>2]=+g[ld>>2]*+g[z>>2]+ +g[w>>2]*+g[v>>2];g[E>>2]=+g[A>>2]+ +g[D>>2];g[rc>>2]=+g[ca>>2]-+g[Ea>>2];g[Fa>>2]=+g[ca>>2]+ +g[Ea>>2];g[kb>>2]=+g[D>>2]-+g[A>>2];g[fa>>2]=+g[c[q>>2]>>2];g[ja>>2]=+g[(c[q>>2]|0)+4>>2];g[la>>2]=+g[fa>>2]*+g[ia>>2]-+g[ja>>2]*+g[ka>>2];g[Db>>2]=+g[ja>>2]*+g[ia>>2]+ +g[fa>>2]*+g[ka>>2];g[ma>>2]=+g[(c[q>>2]|0)+64>>2];g[oa>>2]=+g[(c[q>>2]|0)+68>>2];g[qa>>2]=+g[ma>>2]*+g[na>>2]+ +g[oa>>2]*+g[pa>>2];g[Cb>>2]=+g[ma>>2]*+g[pa>>2]-+g[oa>>2]*+g[na>>2];g[ra>>2]=+g[la>>2]-+g[qa>>2];g[nc>>2]=+g[Db>>2]-+g[Cb>>2];g[Eb>>2]=+g[Cb>>2]+ +g[Db>>2];g[Wb>>2]=+g[qa>>2]+ +g[la>>2];g[Vc>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Wc>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Xc>>2]=+g[Vc>>2]-+g[Wc>>2];g[va>>2]=+g[Vc>>2]+ +g[Wc>>2];g[Zc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[_c>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[$c>>2]=+g[Zc>>2]+ +g[_c>>2];g[ta>>2]=+g[Zc>>2]-+g[_c>>2];g[cd>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[dd>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[ed>>2]=+g[cd>>2]-+g[dd>>2];g[Aa>>2]=+g[cd>>2]+ +g[dd>>2];g[gd>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[hd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[id>>2]=+g[gd>>2]+ +g[hd>>2];g[ya>>2]=+g[gd>>2]-+g[hd>>2];g[Uc>>2]=+g[(c[q>>2]|0)+24>>2];g[Yc>>2]=+g[(c[q>>2]|0)+28>>2];g[ad>>2]=+g[Uc>>2]*+g[Xc>>2]-+g[Yc>>2]*+g[$c>>2];g[Ga>>2]=+g[Uc>>2]*+g[$c>>2]+ +g[Yc>>2]*+g[Xc>>2];g[bd>>2]=+g[(c[q>>2]|0)+88>>2];g[fd>>2]=+g[(c[q>>2]|0)+92>>2];g[jd>>2]=+g[bd>>2]*+g[ed>>2]-+g[fd>>2]*+g[id>>2];g[Ha>>2]=+g[bd>>2]*+g[id>>2]+ +g[fd>>2]*+g[ed>>2];g[kd>>2]=+g[ad>>2]+ +g[jd>>2];g[sc>>2]=+g[ad>>2]-+g[jd>>2];g[Ia>>2]=+g[Ga>>2]+ +g[Ha>>2];g[lb>>2]=+g[Ga>>2]-+g[Ha>>2];g[sa>>2]=+g[(c[q>>2]|0)+32>>2];g[ua>>2]=+g[(c[q>>2]|0)+36>>2];g[wa>>2]=+g[sa>>2]*+g[ta>>2]+ +g[ua>>2]*+g[va>>2];g[zb>>2]=+g[sa>>2]*+g[va>>2]-+g[ua>>2]*+g[ta>>2];g[xa>>2]=+g[(c[q>>2]|0)+96>>2];g[za>>2]=+g[(c[q>>2]|0)+100>>2];g[Ba>>2]=+g[xa>>2]*+g[ya>>2]+ +g[za>>2]*+g[Aa>>2];g[Ab>>2]=+g[xa>>2]*+g[Aa>>2]-+g[za>>2]*+g[ya>>2];g[Ca>>2]=+g[wa>>2]+ +g[Ba>>2];g[mc>>2]=+g[Ba>>2]-+g[wa>>2];g[Bb>>2]=+g[zb>>2]+ +g[Ab>>2];g[kc>>2]=+g[zb>>2]-+g[Ab>>2];g[Da>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Mb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Kc>>2]=+g[Da>>2]-+g[Mb>>2];g[J>>2]=+g[Da>>2]+ +g[Mb>>2];g[Mc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Nc>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Oc>>2]=+g[Mc>>2]+ +g[Nc>>2];g[H>>2]=+g[Mc>>2]-+g[Nc>>2];g[pd>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[qd>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[rd>>2]=+g[pd>>2]-+g[qd>>2];g[O>>2]=+g[pd>>2]+ +g[qd>>2];g[td>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[ud>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[vd>>2]=+g[td>>2]+ +g[ud>>2];g[M>>2]=+g[td>>2]-+g[ud>>2];g[u>>2]=+g[(c[q>>2]|0)+8>>2];g[Lc>>2]=+g[(c[q>>2]|0)+12>>2];g[Pc>>2]=+g[u>>2]*+g[Kc>>2]-+g[Lc>>2]*+g[Oc>>2];g[Ka>>2]=+g[u>>2]*+g[Oc>>2]+ +g[Lc>>2]*+g[Kc>>2];g[od>>2]=+g[(c[q>>2]|0)+72>>2];g[sd>>2]=+g[(c[q>>2]|0)+76>>2];g[wd>>2]=+g[od>>2]*+g[rd>>2]-+g[sd>>2]*+g[vd>>2];g[La>>2]=+g[od>>2]*+g[vd>>2]+ +g[sd>>2]*+g[rd>>2];g[xd>>2]=+g[Pc>>2]+ +g[wd>>2];g[eb>>2]=+g[Ka>>2]-+g[La>>2];g[Ma>>2]=+g[Ka>>2]+ +g[La>>2];g[db>>2]=+g[Pc>>2]-+g[wd>>2];g[G>>2]=+g[(c[q>>2]|0)+16>>2];g[I>>2]=+g[(c[q>>2]|0)+20>>2];g[K>>2]=+g[G>>2]*+g[H>>2]+ +g[I>>2]*+g[J>>2];g[sb>>2]=+g[G>>2]*+g[J>>2]-+g[I>>2]*+g[H>>2];g[L>>2]=+g[(c[q>>2]|0)+80>>2];g[N>>2]=+g[(c[q>>2]|0)+84>>2];g[P>>2]=+g[L>>2]*+g[M>>2]+ +g[N>>2]*+g[O>>2];g[tb>>2]=+g[L>>2]*+g[O>>2]-+g[N>>2]*+g[M>>2];g[Q>>2]=+g[K>>2]+ +g[P>>2];g[Tb>>2]=+g[sb>>2]-+g[tb>>2];g[ub>>2]=+g[sb>>2]+ +g[tb>>2];g[Pb>>2]=+g[P>>2]-+g[K>>2];g[zd>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Ad>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Bd>>2]=+g[zd>>2]-+g[Ad>>2];g[U>>2]=+g[zd>>2]+ +g[Ad>>2];g[Dd>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Ed>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Fd>>2]=+g[Dd>>2]+ +g[Ed>>2];g[S>>2]=+g[Dd>>2]-+g[Ed>>2];g[Id>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Jd>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Kd>>2]=+g[Id>>2]-+g[Jd>>2];g[Z>>2]=+g[Id>>2]+ +g[Jd>>2];g[Md>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Nd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Qc>>2]=+g[Md>>2]+ +g[Nd>>2];g[X>>2]=+g[Md>>2]-+g[Nd>>2];g[yd>>2]=+g[(c[q>>2]|0)+104>>2];g[Cd>>2]=+g[(c[q>>2]|0)+108>>2];g[Gd>>2]=+g[yd>>2]*+g[Bd>>2]-+g[Cd>>2]*+g[Fd>>2];g[Na>>2]=+g[yd>>2]*+g[Fd>>2]+ +g[Cd>>2]*+g[Bd>>2];g[Hd>>2]=+g[(c[q>>2]|0)+40>>2];g[Ld>>2]=+g[(c[q>>2]|0)+44>>2];g[Rc>>2]=+g[Hd>>2]*+g[Kd>>2]-+g[Ld>>2]*+g[Qc>>2];g[mb>>2]=+g[Hd>>2]*+g[Qc>>2]+ +g[Ld>>2]*+g[Kd>>2];g[Sc>>2]=+g[Gd>>2]+ +g[Rc>>2];g[hb>>2]=+g[Gd>>2]-+g[Rc>>2];g[nb>>2]=+g[Na>>2]+ +g[mb>>2];g[gb>>2]=+g[Na>>2]-+g[mb>>2];g[R>>2]=+g[(c[q>>2]|0)+112>>2];g[T>>2]=+g[(c[q>>2]|0)+116>>2];g[V>>2]=+g[R>>2]*+g[S>>2]+ +g[T>>2]*+g[U>>2];g[vb>>2]=+g[R>>2]*+g[U>>2]-+g[T>>2]*+g[S>>2];g[W>>2]=+g[(c[q>>2]|0)+48>>2];g[Y>>2]=+g[(c[q>>2]|0)+52>>2];g[_>>2]=+g[W>>2]*+g[X>>2]+ +g[Y>>2]*+g[Z>>2];g[wb>>2]=+g[W>>2]*+g[Z>>2]-+g[Y>>2]*+g[X>>2];g[$>>2]=+g[V>>2]+ +g[_>>2];g[Sb>>2]=+g[_>>2]-+g[V>>2];g[xb>>2]=+g[vb>>2]+ +g[wb>>2];g[Qb>>2]=+g[vb>>2]-+g[wb>>2];g[Tc>>2]=+g[xd>>2]+ +g[Sc>>2];g[da>>2]=+g[kd>>2]+ +g[E>>2];g[ea>>2]=+g[Tc>>2]+ +g[da>>2];g[rb>>2]=+g[da>>2]-+g[Tc>>2];g[yb>>2]=+g[ub>>2]+ +g[xb>>2];g[Fb>>2]=+g[Bb>>2]+ +g[Eb>>2];g[Gb>>2]=+g[yb>>2]-+g[Fb>>2];g[Ib>>2]=+g[yb>>2]+ +g[Fb>>2];g[F>>2]=+g[ra>>2]-+g[Ca>>2];g[aa>>2]=+g[Q>>2]+ +g[$>>2];g[ba>>2]=+g[F>>2]-+g[aa>>2];g[qb>>2]=+g[aa>>2]+ +g[F>>2];g[Ja>>2]=+g[Fa>>2]+ +g[Ia>>2];g[ob>>2]=+g[Ma>>2]+ +g[nb>>2];g[pb>>2]=+g[Ja>>2]-+g[ob>>2];g[Hb>>2]=+g[Ja>>2]+ +g[ob>>2];g[c[n>>2]>>2]=(+g[ea>>2]+ +g[ba>>2])*.5;g[c[m>>2]>>2]=(+g[Hb>>2]+ +g[Ib>>2])*.5;g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=(+g[ba>>2]-+g[ea>>2])*.5;g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=(+g[Hb>>2]-+g[Ib>>2])*.5;g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=(+g[pb>>2]-+g[qb>>2])*.5;g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=(+g[Gb>>2]-+g[rb>>2])*.5;g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=(+g[pb>>2]+ +g[qb>>2])*.5;g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=(+g[rb>>2]+ +g[Gb>>2])*.5;g[Jb>>2]=+g[nb>>2]-+g[Ma>>2];g[Kb>>2]=+g[E>>2]-+g[kd>>2];g[Lb>>2]=(+g[Jb>>2]+ +g[Kb>>2])*.5;g[Za>>2]=(+g[Kb>>2]-+g[Jb>>2])*.5;g[Va>>2]=+g[Fa>>2]-+g[Ia>>2];g[Wa>>2]=+g[xd>>2]-+g[Sc>>2];g[Xa>>2]=(+g[Va>>2]-+g[Wa>>2])*.5;g[bb>>2]=(+g[Va>>2]+ +g[Wa>>2])*.5;g[Oa>>2]=+g[xb>>2]-+g[ub>>2];g[Pa>>2]=+g[Q>>2]-+g[$>>2];g[Qa>>2]=+g[Oa>>2]+ +g[Pa>>2];g[_a>>2]=+g[Oa>>2]-+g[Pa>>2];g[Ra>>2]=+g[Ca>>2]+ +g[ra>>2];g[Sa>>2]=+g[Eb>>2]-+g[Bb>>2];g[Ta>>2]=+g[Ra>>2]-+g[Sa>>2];g[$a>>2]=+g[Ra>>2]+ +g[Sa>>2];g[Ua>>2]=(+g[Qa>>2]+ +g[Ta>>2])*.3535533845424652;g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Lb>>2]+ +g[Ua>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Ua>>2]-+g[Lb>>2];g[cb>>2]=(+g[_a>>2]+ +g[$a>>2])*.3535533845424652;g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[bb>>2]-+g[cb>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[bb>>2]+ +g[cb>>2];g[Ya>>2]=(+g[Ta>>2]-+g[Qa>>2])*.3535533845424652;g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Xa>>2]-+g[Ya>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Xa>>2]+ +g[Ya>>2];g[ab>>2]=(+g[_a>>2]-+g[$a>>2])*.3535533845424652;g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Za>>2]+ +g[ab>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[ab>>2]-+g[Za>>2];g[Nb>>2]=(+g[kb>>2]-+g[lb>>2])*.5;g[Gc>>2]=(+g[lb>>2]+ +g[kb>>2])*.5;g[ac>>2]=(+g[rc>>2]-+g[sc>>2])*.5;g[tc>>2]=(+g[rc>>2]+ +g[sc>>2])*.5;g[fb>>2]=+g[db>>2]-+g[eb>>2];g[ib>>2]=+g[gb>>2]+ +g[hb>>2];g[jb>>2]=(+g[fb>>2]+ +g[ib>>2])*.3535533845424652;g[bc>>2]=(+g[fb>>2]-+g[ib>>2])*.3535533845424652;g[Yb>>2]=+g[kc>>2]+ +g[Wb>>2];g[Zb>>2]=+g[nc>>2]-+g[mc>>2];g[_b>>2]=+g[Yb>>2]*.19134171307086945-+g[Zb>>2]*.4619397521018982;g[gc>>2]=+g[Yb>>2]*.4619397521018982+ +g[Zb>>2]*.19134171307086945;g[uc>>2]=+g[eb>>2]+ +g[db>>2];g[vc>>2]=+g[gb>>2]-+g[hb>>2];g[wc>>2]=(+g[uc>>2]+ +g[vc>>2])*.3535533845424652;g[Fc>>2]=(+g[vc>>2]-+g[uc>>2])*.3535533845424652;g[Rb>>2]=+g[Pb>>2]+ +g[Qb>>2];g[Ub>>2]=+g[Sb>>2]-+g[Tb>>2];g[Vb>>2]=+g[Rb>>2]*.19134171307086945+ +g[Ub>>2]*.4619397521018982;g[Ac>>2]=+g[Rb>>2]*.4619397521018982-+g[Ub>>2]*.19134171307086945;g[Ic>>2]=+g[Qb>>2]-+g[Pb>>2];g[Jc>>2]=+g[Tb>>2]+ +g[Sb>>2];g[Xb>>2]=+g[Ic>>2]*.4619397521018982+ +g[Jc>>2]*.19134171307086945;g[fc>>2]=+g[Ic>>2]*.19134171307086945-+g[Jc>>2]*.4619397521018982;g[lc>>2]=+g[Wb>>2]-+g[kc>>2];g[oc>>2]=+g[mc>>2]+ +g[nc>>2];g[pc>>2]=+g[lc>>2]*.4619397521018982-+g[oc>>2]*.19134171307086945;g[Bc>>2]=+g[lc>>2]*.19134171307086945+ +g[oc>>2]*.4619397521018982;g[Ob>>2]=+g[jb>>2]+ +g[Nb>>2];g[qc>>2]=+g[Vb>>2]+ +g[pc>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Ob>>2]+ +g[qc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[qc>>2]-+g[Ob>>2];g[Dc>>2]=+g[tc>>2]+ +g[wc>>2];g[Ec>>2]=+g[Ac>>2]+ +g[Bc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Dc>>2]-+g[Ec>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[Dc>>2]+ +g[Ec>>2];g[xc>>2]=+g[tc>>2]-+g[wc>>2];g[yc>>2]=+g[pc>>2]-+g[Vb>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[xc>>2]-+g[yc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[xc>>2]+ +g[yc>>2];g[zc>>2]=+g[Nb>>2]-+g[jb>>2];g[Cc>>2]=+g[Ac>>2]-+g[Bc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[zc>>2]+ +g[Cc>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Cc>>2]-+g[zc>>2];g[Hc>>2]=+g[Fc>>2]+ +g[Gc>>2];g[$b>>2]=+g[Xb>>2]+ +g[_b>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Hc>>2]+ +g[$b>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[$b>>2]-+g[Hc>>2];g[ic>>2]=+g[ac>>2]+ +g[bc>>2];g[jc>>2]=+g[fc>>2]+ +g[gc>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[ic>>2]-+g[jc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ic>>2]+ +g[jc>>2];g[cc>>2]=+g[ac>>2]-+g[bc>>2];g[dc>>2]=+g[_b>>2]-+g[Xb>>2];g[c[o>>2]>>2]=+g[cc>>2]-+g[dc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[cc>>2]+ +g[dc>>2];g[ec>>2]=+g[Gc>>2]-+g[Fc>>2];g[hc>>2]=+g[fc>>2]-+g[gc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ec>>2]+ +g[hc>>2];g[c[p>>2]>>2]=+g[hc>>2]-+g[ec>>2];c[Od>>2]=(c[Od>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+120;c[r>>2]=c[r>>2]^c[2998]}i=Pd;return}function br(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,46,4264,1);i=b;return}function cr(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0;wf=i;i=i+1360|0;m=wf+1352|0;n=wf+1348|0;o=wf+1344|0;p=wf+1340|0;q=wf+1336|0;r=wf+1332|0;xf=wf+1328|0;s=wf+1324|0;t=wf+1320|0;vf=wf+1296|0;x=wf+1292|0;Ib=wf+1288|0;Sd=wf+1284|0;zd=wf+1280|0;Va=wf+1276|0;vc=wf+1272|0;ae=wf+1268|0;Cd=wf+1264|0;Ca=wf+1260|0;Hb=wf+1256|0;Ja=wf+1252|0;Kb=wf+1248|0;Eb=wf+1244|0;Lb=wf+1240|0;Zd=wf+1236|0;Bd=wf+1232|0;Vd=wf+1228|0;yd=wf+1224|0;Xb=wf+1220|0;uc=wf+1216|0;jb=wf+1212|0;Ub=wf+1208|0;Fc=wf+1204|0;Xc=wf+1200|0;_a=wf+1196|0;Wb=wf+1192|0;Qc=wf+1188|0;tc=wf+1184|0;eb=wf+1180|0;Tb=wf+1176|0;Kc=wf+1172|0;Yc=wf+1168|0;Ze=wf+1164|0;kd=wf+1160|0;Qb=wf+1156|0;ic=wf+1152|0;rf=wf+1148|0;ld=wf+1144|0;Pb=wf+1140|0;jc=wf+1136|0;Mb=wf+1132|0;gc=wf+1128|0;Aa=wf+1124|0;hb=wf+1120|0;bf=wf+1116|0;ff=wf+1112|0;ye=wf+1108|0;Bc=wf+1104|0;lf=wf+1100|0;pf=wf+1096|0;De=wf+1092|0;Cc=wf+1088|0;te=wf+1084|0;Xe=wf+1080|0;ta=wf+1076|0;gb=wf+1072|0;Ne=wf+1068|0;bb=wf+1064|0;K=wf+1060|0;Mc=wf+1056|0;Ha=wf+1052|0;Ya=wf+1048|0;rb=wf+1044|0;Tc=wf+1040|0;na=wf+1036|0;Hc=wf+1032|0;Cb=wf+1028|0;Sa=wf+1024|0;v=wf+1020|0;cb=wf+1016|0;P=wf+1012|0;Nc=wf+1008|0;Z=wf+1004|0;Xa=wf+1e3|0;mb=wf+996|0;Sc=wf+992|0;ea=wf+988|0;Gc=wf+984|0;xb=wf+980|0;Ra=wf+976|0;u=wf+972|0;Da=wf+968|0;va=wf+964|0;xa=wf+960|0;ya=wf+956|0;za=wf+952|0;ua=wf+948|0;wa=wf+944|0;$e=wf+940|0;af=wf+936|0;uf=wf+932|0;df=wf+928|0;ef=wf+924|0;xe=wf+920|0;tf=wf+916|0;we=wf+912|0;jf=wf+908|0;kf=wf+904|0;Ae=wf+900|0;nf=wf+896|0;of=wf+892|0;Ce=wf+888|0;ze=wf+884|0;Be=wf+880|0;ce=wf+876|0;se=wf+872|0;qa=wf+868|0;ve=wf+864|0;We=wf+860|0;sa=wf+856|0;pa=wf+852|0;ra=wf+848|0;Ie=wf+844|0;H=wf+840|0;Me=wf+836|0;J=wf+832|0;Ge=wf+828|0;He=wf+824|0;Ke=wf+820|0;Le=wf+816|0;Fe=wf+812|0;Je=wf+808|0;G=wf+804|0;I=wf+800|0;ba=wf+796|0;ob=wf+792|0;Ga=wf+788|0;qb=wf+784|0;$=wf+780|0;aa=wf+776|0;Ea=wf+772|0;Fa=wf+768|0;_=wf+764|0;ca=wf+760|0;nb=wf+756|0;pb=wf+752|0;ia=wf+748|0;zb=wf+744|0;ma=wf+740|0;Bb=wf+736|0;ga=wf+732|0;ha=wf+728|0;ka=wf+724|0;la=wf+720|0;fa=wf+716|0;ja=wf+712|0;yb=wf+708|0;Ab=wf+704|0;Re=wf+700|0;M=wf+696|0;Ve=wf+692|0;O=wf+688|0;Pe=wf+684|0;Qe=wf+680|0;Te=wf+676|0;Ue=wf+672|0;Oe=wf+668|0;Se=wf+664|0;L=wf+660|0;N=wf+656|0;U=wf+652|0;La=wf+648|0;Y=wf+644|0;Na=wf+640|0;S=wf+636|0;T=wf+632|0;W=wf+628|0;X=wf+624|0;R=wf+620|0;V=wf+616|0;Ka=wf+612|0;Ma=wf+608|0;B=wf+604|0;wb=wf+600|0;da=wf+596|0;ub=wf+592|0;z=wf+588|0;A=wf+584|0;D=wf+580|0;E=wf+576|0;y=wf+572|0;C=wf+568|0;tb=wf+564|0;vb=wf+560|0;sb=wf+556|0;Db=wf+552|0;Ic=wf+548|0;Jc=wf+544|0;Ee=wf+540|0;w=wf+536|0;Qd=wf+532|0;Rd=wf+528|0;Ta=wf+524|0;Ua=wf+520|0;_d=wf+516|0;$d=wf+512|0;oa=wf+508|0;Ba=wf+504|0;Q=wf+500|0;Ia=wf+496|0;Xd=wf+492|0;Yd=wf+488|0;Td=wf+484|0;Ud=wf+480|0;Rc=wf+476|0;Uc=wf+472|0;fb=wf+468|0;ib=wf+464|0;Dc=wf+460|0;Ec=wf+456|0;Wa=wf+452|0;Za=wf+448|0;Oc=wf+444|0;Pc=wf+440|0;ab=wf+436|0;db=wf+432|0;Ye=wf+428|0;hc=wf+424|0;Vc=wf+420|0;ue=wf+416|0;gf=wf+412|0;Nb=wf+408|0;qf=wf+404|0;Ob=wf+400|0;_e=wf+396|0;cf=wf+392|0;hf=wf+388|0;mf=wf+384|0;Zb=wf+380|0;$b=wf+376|0;sf=wf+372|0;Gb=wf+368|0;yc=wf+364|0;zc=wf+360|0;_b=wf+356|0;Ac=wf+352|0;Lc=wf+348|0;Yb=wf+344|0;F=wf+340|0;Fb=wf+336|0;cc=wf+332|0;oc=wf+328|0;kc=wf+324|0;lc=wf+320|0;fc=wf+316|0;mc=wf+312|0;pc=wf+308|0;nc=wf+304|0;ac=wf+300|0;bc=wf+296|0;dc=wf+292|0;ec=wf+288|0;_c=wf+284|0;ad=wf+280|0;Qa=wf+276|0;Pa=wf+272|0;qc=wf+268|0;rc=wf+264|0;$c=wf+260|0;sc=wf+256|0;Wc=wf+252|0;Zc=wf+248|0;Jb=wf+244|0;Oa=wf+240|0;dd=wf+236|0;Ld=wf+232|0;Ed=wf+228|0;Hd=wf+224|0;Id=wf+220|0;Jd=wf+216|0;Md=wf+212|0;Kd=wf+208|0;bd=wf+204|0;cd=wf+200|0;Fd=wf+196|0;Gd=wf+192|0;de=wf+188|0;fe=wf+184|0;Rb=wf+180|0;lb=wf+176|0;vd=wf+172|0;wd=wf+168|0;ee=wf+164|0;xd=wf+160|0;Ad=wf+156|0;Dd=wf+152|0;$a=wf+148|0;kb=wf+144|0;pe=wf+140|0;qe=wf+136|0;ge=wf+132|0;je=wf+128|0;ke=wf+124|0;le=wf+120|0;re=wf+116|0;me=wf+112|0;ne=wf+108|0;oe=wf+104|0;he=wf+100|0;ie=wf+96|0;ed=wf+92|0;gd=wf+88|0;Sb=wf+84|0;xc=wf+80|0;Nd=wf+76|0;Od=wf+72|0;fd=wf+68|0;Pd=wf+64|0;Wd=wf+60|0;be=wf+56|0;Vb=wf+52|0;wc=wf+48|0;sd=wf+44|0;td=wf+40|0;md=wf+36|0;nd=wf+32|0;jd=wf+28|0;od=wf+24|0;ud=wf+20|0;pd=wf+16|0;qd=wf+12|0;rd=wf+8|0;hd=wf+4|0;id=wf;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[xf>>2]=j;c[s>>2]=k;c[t>>2]=l;g[wf+1316>>2]=.125;g[wf+1312>>2]=.5;g[wf+1308>>2]=.279508501291275;g[wf+1304>>2]=.29389262199401855;g[wf+1300>>2]=.4755282700061798;c[vf>>2]=c[xf>>2];c[q>>2]=(c[q>>2]|0)+(((c[xf>>2]|0)-1|0)*38<<2);while(1){if((c[vf>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[n>>2]>>2];g[Da>>2]=+g[c[p>>2]>>2];g[va>>2]=+g[u>>2]+ +g[Da>>2];g[xa>>2]=+g[c[m>>2]>>2];g[ya>>2]=+g[c[o>>2]>>2];g[za>>2]=+g[xa>>2]-+g[ya>>2];g[Mb>>2]=+g[u>>2]-+g[Da>>2];g[gc>>2]=+g[xa>>2]+ +g[ya>>2];g[ua>>2]=+g[c[q>>2]>>2];g[wa>>2]=+g[(c[q>>2]|0)+4>>2];g[Aa>>2]=+g[ua>>2]*+g[va>>2]-+g[wa>>2]*+g[za>>2];g[hb>>2]=+g[ua>>2]*+g[za>>2]+ +g[wa>>2]*+g[va>>2];g[$e>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[af>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[uf>>2]=+g[$e>>2]-+g[af>>2];g[df>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ef>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[xe>>2]=+g[df>>2]+ +g[ef>>2];g[bf>>2]=+g[$e>>2]+ +g[af>>2];g[ff>>2]=+g[df>>2]-+g[ef>>2];g[tf>>2]=+g[(c[q>>2]|0)+24>>2];g[we>>2]=+g[(c[q>>2]|0)+28>>2];g[ye>>2]=+g[tf>>2]*+g[uf>>2]-+g[we>>2]*+g[xe>>2];g[Bc>>2]=+g[we>>2]*+g[uf>>2]+ +g[tf>>2]*+g[xe>>2];g[jf>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[kf>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Ae>>2]=+g[jf>>2]-+g[kf>>2];g[nf>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[of>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Ce>>2]=+g[nf>>2]+ +g[of>>2];g[lf>>2]=+g[jf>>2]+ +g[kf>>2];g[pf>>2]=+g[nf>>2]-+g[of>>2];g[ze>>2]=+g[(c[q>>2]|0)+104>>2];g[Be>>2]=+g[(c[q>>2]|0)+108>>2];g[De>>2]=+g[ze>>2]*+g[Ae>>2]-+g[Be>>2]*+g[Ce>>2];g[Cc>>2]=+g[Be>>2]*+g[Ae>>2]+ +g[ze>>2]*+g[Ce>>2];g[ce>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[se>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[qa>>2]=+g[ce>>2]+ +g[se>>2];g[ve>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[We>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[sa>>2]=+g[ve>>2]-+g[We>>2];g[te>>2]=+g[ce>>2]-+g[se>>2];g[Xe>>2]=+g[ve>>2]+ +g[We>>2];g[pa>>2]=+g[(c[q>>2]|0)+80>>2];g[ra>>2]=+g[(c[q>>2]|0)+84>>2];g[ta>>2]=+g[pa>>2]*+g[qa>>2]-+g[ra>>2]*+g[sa>>2];g[gb>>2]=+g[pa>>2]*+g[sa>>2]+ +g[ra>>2]*+g[qa>>2];g[Ge>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[He>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ie>>2]=+g[Ge>>2]+ +g[He>>2];g[H>>2]=+g[Ge>>2]-+g[He>>2];g[Ke>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Le>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Me>>2]=+g[Ke>>2]-+g[Le>>2];g[J>>2]=+g[Ke>>2]+ +g[Le>>2];g[Fe>>2]=+g[(c[q>>2]|0)+64>>2];g[Je>>2]=+g[(c[q>>2]|0)+68>>2];g[Ne>>2]=+g[Fe>>2]*+g[Ie>>2]-+g[Je>>2]*+g[Me>>2];g[bb>>2]=+g[Je>>2]*+g[Ie>>2]+ +g[Fe>>2]*+g[Me>>2];g[G>>2]=+g[(c[q>>2]|0)+56>>2];g[I>>2]=+g[(c[q>>2]|0)+60>>2];g[K>>2]=+g[G>>2]*+g[H>>2]-+g[I>>2]*+g[J>>2];g[Mc>>2]=+g[I>>2]*+g[H>>2]+ +g[G>>2]*+g[J>>2];g[$>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[aa>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[ba>>2]=+g[$>>2]+ +g[aa>>2];g[ob>>2]=+g[$>>2]-+g[aa>>2];g[Ea>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Fa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Ga>>2]=+g[Ea>>2]-+g[Fa>>2];g[qb>>2]=+g[Ea>>2]+ +g[Fa>>2];g[_>>2]=+g[(c[q>>2]|0)+16>>2];g[ca>>2]=+g[(c[q>>2]|0)+20>>2];g[Ha>>2]=+g[_>>2]*+g[ba>>2]-+g[ca>>2]*+g[Ga>>2];g[Ya>>2]=+g[ca>>2]*+g[ba>>2]+ +g[_>>2]*+g[Ga>>2];g[nb>>2]=+g[(c[q>>2]|0)+8>>2];g[pb>>2]=+g[(c[q>>2]|0)+12>>2];g[rb>>2]=+g[nb>>2]*+g[ob>>2]-+g[pb>>2]*+g[qb>>2];g[Tc>>2]=+g[pb>>2]*+g[ob>>2]+ +g[nb>>2]*+g[qb>>2];g[ga>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ha>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2];g[zb>>2]=+g[ga>>2]+ +g[ha>>2];g[ka>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[la>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ma>>2]=+g[ka>>2]+ +g[la>>2];g[Bb>>2]=+g[ka>>2]-+g[la>>2];g[fa>>2]=+g[(c[q>>2]|0)+40>>2];g[ja>>2]=+g[(c[q>>2]|0)+44>>2];g[na>>2]=+g[fa>>2]*+g[ia>>2]-+g[ja>>2]*+g[ma>>2];g[Hc>>2]=+g[fa>>2]*+g[ma>>2]+ +g[ja>>2]*+g[ia>>2];g[yb>>2]=+g[(c[q>>2]|0)+48>>2];g[Ab>>2]=+g[(c[q>>2]|0)+52>>2];g[Cb>>2]=+g[yb>>2]*+g[zb>>2]-+g[Ab>>2]*+g[Bb>>2];g[Sa>>2]=+g[yb>>2]*+g[Bb>>2]+ +g[Ab>>2]*+g[zb>>2];g[Pe>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Qe>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Re>>2]=+g[Pe>>2]+ +g[Qe>>2];g[M>>2]=+g[Pe>>2]-+g[Qe>>2];g[Te>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Ue>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Ve>>2]=+g[Te>>2]-+g[Ue>>2];g[O>>2]=+g[Te>>2]+ +g[Ue>>2];g[Oe>>2]=+g[(c[q>>2]|0)+144>>2];g[Se>>2]=+g[(c[q>>2]|0)+148>>2];g[v>>2]=+g[Oe>>2]*+g[Re>>2]-+g[Se>>2]*+g[Ve>>2];g[cb>>2]=+g[Se>>2]*+g[Re>>2]+ +g[Oe>>2]*+g[Ve>>2];g[L>>2]=+g[(c[q>>2]|0)+136>>2];g[N>>2]=+g[(c[q>>2]|0)+140>>2];g[P>>2]=+g[L>>2]*+g[M>>2]-+g[N>>2]*+g[O>>2];g[Nc>>2]=+g[N>>2]*+g[M>>2]+ +g[L>>2]*+g[O>>2];g[S>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[T>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[U>>2]=+g[S>>2]+ +g[T>>2];g[La>>2]=+g[S>>2]-+g[T>>2];g[W>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[X>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Y>>2]=+g[W>>2]-+g[X>>2];g[Na>>2]=+g[W>>2]+ +g[X>>2];g[R>>2]=+g[(c[q>>2]|0)+96>>2];g[V>>2]=+g[(c[q>>2]|0)+100>>2];g[Z>>2]=+g[R>>2]*+g[U>>2]-+g[V>>2]*+g[Y>>2];g[Xa>>2]=+g[V>>2]*+g[U>>2]+ +g[R>>2]*+g[Y>>2];g[Ka>>2]=+g[(c[q>>2]|0)+88>>2];g[Ma>>2]=+g[(c[q>>2]|0)+92>>2];g[mb>>2]=+g[Ka>>2]*+g[La>>2]-+g[Ma>>2]*+g[Na>>2];g[Sc>>2]=+g[Ma>>2]*+g[La>>2]+ +g[Ka>>2]*+g[Na>>2];g[z>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[A>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[wb>>2]=+g[z>>2]+ +g[A>>2];g[D>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2];g[E>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[da>>2]=+g[D>>2]+ +g[E>>2];g[ub>>2]=+g[E>>2]-+g[D>>2];g[y>>2]=+g[(c[q>>2]|0)+120>>2];g[C>>2]=+g[(c[q>>2]|0)+124>>2];g[ea>>2]=+g[y>>2]*+g[B>>2]-+g[C>>2]*+g[da>>2];g[Gc>>2]=+g[y>>2]*+g[da>>2]+ +g[C>>2]*+g[B>>2];g[tb>>2]=+g[(c[q>>2]|0)+132>>2];g[vb>>2]=+g[(c[q>>2]|0)+128>>2];g[xb>>2]=+g[tb>>2]*+g[ub>>2]+ +g[vb>>2]*+g[wb>>2];g[Ra>>2]=+g[vb>>2]*+g[ub>>2]-+g[tb>>2]*+g[wb>>2];g[Ee>>2]=+g[ye>>2]-+g[De>>2];g[w>>2]=+g[Ne>>2]-+g[v>>2];g[x>>2]=+g[Ee>>2]-+g[w>>2];g[Ib>>2]=+g[Ee>>2]+ +g[w>>2];g[Qd>>2]=+g[Bc>>2]+ +g[Cc>>2];g[Rd>>2]=+g[Ne>>2]+ +g[v>>2];g[Sd>>2]=+g[Qd>>2]+ +g[Rd>>2];g[zd>>2]=+g[Qd>>2]-+g[Rd>>2];g[Ta>>2]=+g[Ra>>2]-+g[Sa>>2];g[Ua>>2]=+g[mb>>2]+ +g[rb>>2];g[Va>>2]=+g[Ta>>2]-+g[Ua>>2];g[vc>>2]=+g[Ta>>2]+ +g[Ua>>2];g[_d>>2]=+g[xb>>2]+ +g[Cb>>2];g[$d>>2]=+g[Sc>>2]+ +g[Tc>>2];g[ae>>2]=+g[_d>>2]+ +g[$d>>2];g[Cd>>2]=+g[$d>>2]-+g[_d>>2];g[oa>>2]=+g[ea>>2]-+g[na>>2];g[Ba>>2]=+g[ta>>2]-+g[Aa>>2];g[Ca>>2]=+g[oa>>2]+ +g[Ba>>2];g[Hb>>2]=+g[Ba>>2]-+g[oa>>2];g[Q>>2]=+g[K>>2]-+g[P>>2];g[Ia>>2]=+g[Z>>2]-+g[Ha>>2];g[Ja>>2]=+g[Q>>2]-+g[Ia>>2];g[Kb>>2]=+g[Q>>2]+ +g[Ia>>2];g[sb>>2]=+g[mb>>2]-+g[rb>>2];g[Db>>2]=+g[xb>>2]-+g[Cb>>2];g[Eb>>2]=+g[sb>>2]-+g[Db>>2];g[Lb>>2]=+g[Db>>2]+ +g[sb>>2];g[Xd>>2]=+g[Mc>>2]+ +g[Nc>>2];g[Yd>>2]=+g[Z>>2]+ +g[Ha>>2];g[Zd>>2]=+g[Xd>>2]+ +g[Yd>>2];g[Bd>>2]=+g[Xd>>2]-+g[Yd>>2];g[Td>>2]=+g[Gc>>2]+ +g[Hc>>2];g[Ud>>2]=+g[ta>>2]+ +g[Aa>>2];g[Vd>>2]=+g[Td>>2]+ +g[Ud>>2];g[yd>>2]=+g[Td>>2]-+g[Ud>>2];g[Rc>>2]=+g[Ra>>2]+ +g[Sa>>2];g[Uc>>2]=+g[Sc>>2]-+g[Tc>>2];g[Xb>>2]=+g[Rc>>2]+ +g[Uc>>2];g[uc>>2]=+g[Uc>>2]-+g[Rc>>2];g[fb>>2]=+g[ea>>2]+ +g[na>>2];g[ib>>2]=+g[gb>>2]+ +g[hb>>2];g[jb>>2]=+g[fb>>2]+ +g[ib>>2];g[Ub>>2]=+g[fb>>2]-+g[ib>>2];g[Dc>>2]=+g[Bc>>2]-+g[Cc>>2];g[Ec>>2]=+g[cb>>2]-+g[bb>>2];g[Fc>>2]=+g[Dc>>2]+ +g[Ec>>2];g[Xc>>2]=+g[Dc>>2]-+g[Ec>>2];g[Wa>>2]=+g[K>>2]+ +g[P>>2];g[Za>>2]=+g[Xa>>2]+ +g[Ya>>2];g[_a>>2]=+g[Wa>>2]+ +g[Za>>2];g[Wb>>2]=+g[Wa>>2]-+g[Za>>2];g[Oc>>2]=+g[Mc>>2]-+g[Nc>>2];g[Pc>>2]=+g[Ya>>2]-+g[Xa>>2];g[Qc>>2]=+g[Oc>>2]+ +g[Pc>>2];g[tc>>2]=+g[Oc>>2]-+g[Pc>>2];g[ab>>2]=+g[ye>>2]+ +g[De>>2];g[db>>2]=+g[bb>>2]+ +g[cb>>2];g[eb>>2]=+g[ab>>2]+ +g[db>>2];g[Tb>>2]=+g[ab>>2]-+g[db>>2];g[Ic>>2]=+g[Gc>>2]-+g[Hc>>2];g[Jc>>2]=+g[gb>>2]-+g[hb>>2];g[Kc>>2]=+g[Ic>>2]+ +g[Jc>>2];g[Yc>>2]=+g[Ic>>2]-+g[Jc>>2];g[Vc>>2]=+g[(c[q>>2]|0)+72>>2];g[ue>>2]=+g[(c[q>>2]|0)+76>>2];g[Ye>>2]=+g[Vc>>2]*+g[te>>2]-+g[ue>>2]*+g[Xe>>2];g[hc>>2]=+g[Vc>>2]*+g[Xe>>2]+ +g[ue>>2]*+g[te>>2];g[Ze>>2]=+g[Mb>>2]-+g[Ye>>2];g[kd>>2]=+g[gc>>2]+ +g[hc>>2];g[Qb>>2]=+g[Ye>>2]+ +g[Mb>>2];g[ic>>2]=+g[gc>>2]-+g[hc>>2];g[_e>>2]=+g[(c[q>>2]|0)+32>>2];g[cf>>2]=+g[(c[q>>2]|0)+36>>2];g[gf>>2]=+g[_e>>2]*+g[bf>>2]-+g[cf>>2]*+g[ff>>2];g[Nb>>2]=+g[cf>>2]*+g[bf>>2]+ +g[_e>>2]*+g[ff>>2];g[hf>>2]=+g[(c[q>>2]|0)+112>>2];g[mf>>2]=+g[(c[q>>2]|0)+116>>2];g[qf>>2]=+g[hf>>2]*+g[lf>>2]-+g[mf>>2]*+g[pf>>2];g[Ob>>2]=+g[mf>>2]*+g[lf>>2]+ +g[hf>>2]*+g[pf>>2];g[rf>>2]=+g[gf>>2]-+g[qf>>2];g[ld>>2]=+g[gf>>2]+ +g[qf>>2];g[Pb>>2]=+g[Nb>>2]+ +g[Ob>>2];g[jc>>2]=+g[Ob>>2]-+g[Nb>>2];g[Lc>>2]=+g[Fc>>2]-+g[Kc>>2];g[Yb>>2]=+g[Qc>>2]-+g[Xb>>2];g[Zb>>2]=+g[Lc>>2]*.4755282700061798+ +g[Yb>>2]*.29389262199401855;g[$b>>2]=+g[Yb>>2]*.4755282700061798-+g[Lc>>2]*.29389262199401855;g[sf>>2]=+g[Ze>>2]-+g[rf>>2];g[F>>2]=+g[x>>2]+ +g[Ca>>2];g[Fb>>2]=+g[Ja>>2]+ +g[Eb>>2];g[Gb>>2]=+g[F>>2]+ +g[Fb>>2];g[yc>>2]=(+g[F>>2]-+g[Fb>>2])*.279508501291275;g[zc>>2]=+g[sf>>2]*.5-+g[Gb>>2]*.125;g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=(+g[sf>>2]+ +g[Gb>>2])*.5;g[_b>>2]=+g[yc>>2]-+g[zc>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[_b>>2]-+g[$b>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[_b>>2]+ +g[$b>>2];g[Ac>>2]=+g[yc>>2]+ +g[zc>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Ac>>2]-+g[Zb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Ac>>2]+ +g[Zb>>2];g[ac>>2]=+g[x>>2]-+g[Ca>>2];g[bc>>2]=+g[Ja>>2]-+g[Eb>>2];g[cc>>2]=+g[ac>>2]*.4755282700061798+ +g[bc>>2]*.29389262199401855;g[oc>>2]=+g[bc>>2]*.4755282700061798-+g[ac>>2]*.29389262199401855;g[kc>>2]=+g[ic>>2]+ +g[jc>>2];g[dc>>2]=+g[Fc>>2]+ +g[Kc>>2];g[ec>>2]=+g[Qc>>2]+ +g[Xb>>2];g[lc>>2]=+g[dc>>2]+ +g[ec>>2];g[fc>>2]=(+g[dc>>2]-+g[ec>>2])*.279508501291275;g[mc>>2]=+g[kc>>2]*.5-+g[lc>>2]*.125;g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=(+g[kc>>2]+ +g[lc>>2])*.5;g[pc>>2]=+g[mc>>2]-+g[fc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[oc>>2]+ +g[pc>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[pc>>2]-+g[oc>>2];g[nc>>2]=+g[fc>>2]+ +g[mc>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[cc>>2]+ +g[nc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[nc>>2]-+g[cc>>2];g[Wc>>2]=+g[tc>>2]-+g[uc>>2];g[Zc>>2]=+g[Xc>>2]-+g[Yc>>2];g[_c>>2]=+g[Wc>>2]*.4755282700061798-+g[Zc>>2]*.29389262199401855;g[ad>>2]=+g[Zc>>2]*.4755282700061798+ +g[Wc>>2]*.29389262199401855;g[Qa>>2]=+g[rf>>2]+ +g[Ze>>2];g[Jb>>2]=+g[Hb>>2]-+g[Ib>>2];g[Oa>>2]=+g[Kb>>2]+ +g[Lb>>2];g[Pa>>2]=+g[Jb>>2]-+g[Oa>>2];g[qc>>2]=+g[Qa>>2]*.5+ +g[Pa>>2]*.125;g[rc>>2]=(+g[Jb>>2]+ +g[Oa>>2])*.279508501291275;g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=(+g[Pa>>2]-+g[Qa>>2])*.5;g[$c>>2]=+g[rc>>2]-+g[qc>>2];g[c[p>>2]>>2]=+g[$c>>2]-+g[ad>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[$c>>2]+ +g[ad>>2];g[sc>>2]=+g[qc>>2]+ +g[rc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[sc>>2]-+g[_c>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[sc>>2]+ +g[_c>>2];g[bd>>2]=+g[Kb>>2]-+g[Lb>>2];g[cd>>2]=+g[Ib>>2]+ +g[Hb>>2];g[dd>>2]=+g[bd>>2]*.4755282700061798-+g[cd>>2]*.29389262199401855;g[Ld>>2]=+g[cd>>2]*.4755282700061798+ +g[bd>>2]*.29389262199401855;g[Ed>>2]=+g[ic>>2]-+g[jc>>2];g[Fd>>2]=+g[Xc>>2]+ +g[Yc>>2];g[Gd>>2]=+g[tc>>2]+ +g[uc>>2];g[Hd>>2]=+g[Fd>>2]+ +g[Gd>>2];g[Id>>2]=+g[Ed>>2]*.5-+g[Hd>>2]*.125;g[Jd>>2]=(+g[Fd>>2]-+g[Gd>>2])*.279508501291275;g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=(+g[Ed>>2]+ +g[Hd>>2])*.5;g[Md>>2]=+g[Jd>>2]+ +g[Id>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Ld>>2]+ +g[Md>>2];g[c[o>>2]>>2]=+g[Md>>2]-+g[Ld>>2];g[Kd>>2]=+g[Id>>2]-+g[Jd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[dd>>2]+ +g[Kd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Kd>>2]-+g[dd>>2];g[Ad>>2]=+g[yd>>2]-+g[zd>>2];g[Dd>>2]=+g[Bd>>2]-+g[Cd>>2];g[de>>2]=+g[Ad>>2]*.4755282700061798-+g[Dd>>2]*.29389262199401855;g[fe>>2]=+g[Ad>>2]*.29389262199401855+ +g[Dd>>2]*.4755282700061798;g[Rb>>2]=+g[Pb>>2]+ +g[Qb>>2];g[$a>>2]=+g[Va>>2]-+g[_a>>2];g[kb>>2]=+g[eb>>2]+ +g[jb>>2];g[lb>>2]=+g[$a>>2]-+g[kb>>2];g[vd>>2]=+g[Rb>>2]*.5+ +g[lb>>2]*.125;g[wd>>2]=(+g[kb>>2]+ +g[$a>>2])*.279508501291275;g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=(+g[lb>>2]-+g[Rb>>2])*.5;g[ee>>2]=+g[vd>>2]-+g[wd>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ee>>2]+ +g[fe>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[fe>>2]-+g[ee>>2];g[xd>>2]=+g[vd>>2]+ +g[wd>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[xd>>2]+ +g[de>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[de>>2]-+g[xd>>2];g[ne>>2]=+g[_a>>2]+ +g[Va>>2];g[oe>>2]=+g[eb>>2]-+g[jb>>2];g[pe>>2]=+g[ne>>2]*.4755282700061798-+g[oe>>2]*.29389262199401855;g[qe>>2]=+g[oe>>2]*.4755282700061798+ +g[ne>>2]*.29389262199401855;g[ge>>2]=+g[kd>>2]-+g[ld>>2];g[he>>2]=+g[zd>>2]+ +g[yd>>2];g[ie>>2]=+g[Bd>>2]+ +g[Cd>>2];g[je>>2]=+g[he>>2]+ +g[ie>>2];g[ke>>2]=+g[ge>>2]*.5-+g[je>>2]*.125;g[le>>2]=(+g[he>>2]-+g[ie>>2])*.279508501291275;g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=(+g[ge>>2]+ +g[je>>2])*.5;g[re>>2]=+g[le>>2]+ +g[ke>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[qe>>2]+ +g[re>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[re>>2]-+g[qe>>2];g[me>>2]=+g[ke>>2]-+g[le>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[me>>2]-+g[pe>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[pe>>2]+ +g[me>>2];g[Wd>>2]=+g[Sd>>2]-+g[Vd>>2];g[be>>2]=+g[Zd>>2]-+g[ae>>2];g[ed>>2]=+g[Wd>>2]*.29389262199401855-+g[be>>2]*.4755282700061798;g[gd>>2]=+g[Wd>>2]*.4755282700061798+ +g[be>>2]*.29389262199401855;g[Sb>>2]=+g[Qb>>2]-+g[Pb>>2];g[Vb>>2]=+g[Tb>>2]+ +g[Ub>>2];g[wc>>2]=+g[Wb>>2]+ +g[vc>>2];g[xc>>2]=+g[Vb>>2]+ +g[wc>>2];g[Nd>>2]=+g[Sb>>2]*.5-+g[xc>>2]*.125;g[Od>>2]=(+g[Vb>>2]-+g[wc>>2])*.279508501291275;g[c[n>>2]>>2]=(+g[Sb>>2]+ +g[xc>>2])*.5;g[fd>>2]=+g[Od>>2]+ +g[Nd>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[fd>>2]+ +g[gd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[gd>>2]-+g[fd>>2];g[Pd>>2]=+g[Nd>>2]-+g[Od>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Pd>>2]+ +g[ed>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ed>>2]-+g[Pd>>2];g[qd>>2]=+g[Tb>>2]-+g[Ub>>2];g[rd>>2]=+g[Wb>>2]-+g[vc>>2];g[sd>>2]=+g[qd>>2]*.4755282700061798+ +g[rd>>2]*.29389262199401855;g[td>>2]=+g[rd>>2]*.4755282700061798-+g[qd>>2]*.29389262199401855;g[md>>2]=+g[kd>>2]+ +g[ld>>2];g[hd>>2]=+g[Sd>>2]+ +g[Vd>>2];g[id>>2]=+g[Zd>>2]+ +g[ae>>2];g[nd>>2]=+g[hd>>2]+ +g[id>>2];g[jd>>2]=(+g[hd>>2]-+g[id>>2])*.279508501291275;g[od>>2]=+g[md>>2]*.5-+g[nd>>2]*.125;g[c[m>>2]>>2]=(+g[md>>2]+ +g[nd>>2])*.5;g[ud>>2]=+g[od>>2]-+g[jd>>2];g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[td>>2]+ +g[ud>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ud>>2]-+g[td>>2];g[pd>>2]=+g[jd>>2]+ +g[od>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[pd>>2]-+g[sd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[sd>>2]+ +g[pd>>2];c[vf>>2]=(c[vf>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+152;c[r>>2]=c[r>>2]^c[2998]}i=wf;return}function dr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,47,4312,1);i=b;return}function er(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0;G=i;i=i+96|0;m=G+88|0;n=G+84|0;o=G+80|0;p=G+76|0;q=G+72|0;H=G+64|0;r=G+60|0;s=G+56|0;F=G+48|0;v=G+44|0;B=G+40|0;z=G+36|0;D=G+32|0;t=G+28|0;u=G+24|0;x=G+20|0;y=G+16|0;C=G+12|0;E=G+8|0;w=G+4|0;A=G;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[G+68>>2]=h;c[H>>2]=j;c[r>>2]=k;c[s>>2]=l;g[G+52>>2]=.5;c[F>>2]=c[H>>2];c[q>>2]=(c[q>>2]|0)+((c[H>>2]|0)-1<<1<<2);while(1){if((c[F>>2]|0)>=(c[r>>2]|0))break;g[t>>2]=+g[c[n>>2]>>2];g[u>>2]=+g[c[p>>2]>>2];g[v>>2]=+g[t>>2]-+g[u>>2];g[B>>2]=+g[t>>2]+ +g[u>>2];g[x>>2]=+g[c[o>>2]>>2];g[y>>2]=+g[c[m>>2]>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[D>>2]=+g[y>>2]+ +g[x>>2];g[w>>2]=+g[c[q>>2]>>2];g[A>>2]=+g[(c[q>>2]|0)+4>>2];g[C>>2]=+g[w>>2]*+g[z>>2]-+g[A>>2]*+g[B>>2];g[E>>2]=+g[A>>2]*+g[z>>2]+ +g[w>>2]*+g[B>>2];g[c[n>>2]>>2]=(+g[v>>2]+ +g[C>>2])*.5;g[c[m>>2]>>2]=(+g[D>>2]+ +g[E>>2])*.5;g[c[p>>2]>>2]=(+g[C>>2]-+g[v>>2])*.5;g[c[o>>2]>>2]=(+g[D>>2]-+g[E>>2])*.5;c[F>>2]=(c[F>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[s>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[s>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+8}i=G;return}function fr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,48,4360,1);i=b;return}function gr(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0,Ii=0,Ji=0,Ki=0,Li=0,Mi=0,Ni=0,Oi=0,Pi=0,Qi=0,Ri=0,Si=0,Ti=0,Ui=0,Vi=0,Wi=0,Xi=0,Yi=0,Zi=0,_i=0,$i=0,aj=0,bj=0,cj=0,dj=0,ej=0,fj=0,gj=0,hj=0,ij=0,jj=0,kj=0,lj=0,mj=0,nj=0,oj=0,pj=0,qj=0,rj=0,sj=0,tj=0,uj=0,vj=0,wj=0,xj=0,yj=0,zj=0,Aj=0,Bj=0,Cj=0,Dj=0,Ej=0,Fj=0,Gj=0,Hj=0,Ij=0,Jj=0,Kj=0,Lj=0,Mj=0,Nj=0,Oj=0,Pj=0,Qj=0,Rj=0;Qj=i;i=i+2320|0;m=Qj+2312|0;n=Qj+2308|0;o=Qj+2304|0;p=Qj+2300|0;q=Qj+2296|0;r=Qj+2292|0;Rj=Qj+2288|0;s=Qj+2284|0;t=Qj+2280|0;Pj=Qj+2240|0;eb=Qj+2236|0;xe=Qj+2232|0;fe=Qj+2228|0;Ae=Qj+2224|0;th=Qj+2220|0;Ei=Qj+2216|0;mh=Qj+2212|0;Bi=Qj+2208|0;Cb=Qj+2204|0;xh=Qj+2200|0;$c=Qj+2196|0;If=Qj+2192|0;pb=Qj+2188|0;yh=Qj+2184|0;cd=Qj+2180|0;Jf=Qj+2176|0;F=Qj+2172|0;Gd=Qj+2168|0;Jd=Qj+2164|0;Y=Qj+2160|0;Ac=Qj+2156|0;ze=Qj+2152|0;ih=Qj+2148|0;ph=Qj+2144|0;Gf=Qj+2140|0;Bh=Qj+2136|0;zd=Qj+2132|0;we=Qj+2128|0;fh=Qj+2124|0;oh=Qj+2120|0;Df=Qj+2116|0;Ah=Qj+2112|0;yj=Qj+2108|0;Ti=Qj+2104|0;cf=Qj+2100|0;Od=Qj+2096|0;Rd=Qj+2092|0;df=Qj+2088|0;_b=Qj+2084|0;pe=Qj+2080|0;Sf=Qj+2076|0;yg=Qj+2072|0;kd=Qj+2068|0;se=Qj+2064|0;pg=Qj+2060|0;Lh=Qj+2056|0;Pf=Qj+2052|0;xg=Qj+2048|0;mg=Qj+2044|0;Kh=Qj+2040|0;lj=Qj+2036|0;ia=Qj+2032|0;gf=Qj+2028|0;Vd=Qj+2024|0;Yd=Qj+2020|0;ff=Qj+2016|0;Wc=Qj+2012|0;te=Qj+2008|0;Cg=Qj+2004|0;xi=Qj+2e3|0;rd=Qj+1996|0;qe=Qj+1992|0;yf=Qj+1988|0;Oh=Qj+1984|0;Wf=Qj+1980|0;ui=Qj+1976|0;tg=Qj+1972|0;Nh=Qj+1968|0;ba=Qj+1964|0;Ga=Qj+1960|0;Za=Qj+1956|0;Ad=Qj+1952|0;Bb=Qj+1948|0;Zc=Qj+1944|0;Oa=Qj+1940|0;de=Qj+1936|0;La=Qj+1932|0;nb=Qj+1928|0;cb=Qj+1924|0;Bd=Qj+1920|0;tb=Qj+1916|0;xb=Qj+1912|0;Ta=Qj+1908|0;Dd=Qj+1904|0;$=Qj+1900|0;aa=Qj+1896|0;Ya=Qj+1892|0;Ea=Qj+1888|0;Fa=Qj+1884|0;Wa=Qj+1880|0;Va=Qj+1876|0;Xa=Qj+1872|0;zb=Qj+1868|0;Ab=Qj+1864|0;Lb=Qj+1860|0;Hb=Qj+1856|0;Ib=Qj+1852|0;Jb=Qj+1848|0;Gb=Qj+1844|0;Kb=Qj+1840|0;Ja=Qj+1836|0;Ka=Qj+1832|0;bb=Qj+1828|0;Na=Qj+1824|0;mb=Qj+1820|0;$a=Qj+1816|0;_a=Qj+1812|0;ab=Qj+1808|0;rb=Qj+1804|0;sb=Qj+1800|0;Sa=Qj+1796|0;vb=Qj+1792|0;wb=Qj+1788|0;Qa=Qj+1784|0;Pa=Qj+1780|0;Ra=Qj+1776|0;Ua=Qj+1772|0;db=Qj+1768|0;kh=Qj+1764|0;lh=Qj+1760|0;Cd=Qj+1756|0;ee=Qj+1752|0;rh=Qj+1748|0;sh=Qj+1744|0;yb=Qj+1740|0;_c=Qj+1736|0;qb=Qj+1732|0;ub=Qj+1728|0;Ha=Qj+1724|0;ad=Qj+1720|0;ob=Qj+1716|0;bd=Qj+1712|0;_=Qj+1708|0;ca=Qj+1704|0;Ia=Qj+1700|0;Ma=Qj+1696|0;ta=Qj+1692|0;Ed=Qj+1688|0;jb=Qj+1684|0;td=Qj+1680|0;X=Qj+1676|0;Id=Qj+1672|0;yc=Qj+1668|0;xd=Qj+1664|0;Ca=Qj+1660|0;Fd=Qj+1656|0;Pb=Qj+1652|0;ud=Qj+1648|0;O=Qj+1644|0;Hd=Qj+1640|0;Vb=Qj+1636|0;wd=Qj+1632|0;oa=Qj+1628|0;ib=Qj+1624|0;sa=Qj+1620|0;gb=Qj+1616|0;ma=Qj+1612|0;na=Qj+1608|0;qa=Qj+1604|0;ra=Qj+1600|0;la=Qj+1596|0;pa=Qj+1592|0;fb=Qj+1588|0;hb=Qj+1584|0;S=Qj+1580|0;xc=Qj+1576|0;W=Qj+1572|0;vc=Qj+1568|0;Q=Qj+1564|0;R=Qj+1560|0;U=Qj+1556|0;V=Qj+1552|0;P=Qj+1548|0;T=Qj+1544|0;Wb=Qj+1540|0;wc=Qj+1536|0;xa=Qj+1532|0;Ob=Qj+1528|0;Ba=Qj+1524|0;lb=Qj+1520|0;va=Qj+1516|0;wa=Qj+1512|0;za=Qj+1508|0;Aa=Qj+1504|0;ua=Qj+1500|0;ya=Qj+1496|0;kb=Qj+1492|0;Nb=Qj+1488|0;J=Qj+1484|0;Ub=Qj+1480|0;N=Qj+1476|0;Sb=Qj+1472|0;H=Qj+1468|0;I=Qj+1464|0;L=Qj+1460|0;M=Qj+1456|0;G=Qj+1452|0;K=Qj+1448|0;Rb=Qj+1444|0;Tb=Qj+1440|0;Qb=Qj+1436|0;zc=Qj+1432|0;vd=Qj+1428|0;yd=Qj+1424|0;gh=Qj+1420|0;hh=Qj+1416|0;Ef=Qj+1412|0;Ff=Qj+1408|0;Fg=Qj+1404|0;eh=Qj+1400|0;Bf=Qj+1396|0;Cf=Qj+1392|0;Ni=Qj+1388|0;Md=Qj+1384|0;Gc=Qj+1380|0;ed=Qj+1376|0;Si=Qj+1372|0;Qd=Qj+1368|0;Yb=Qj+1364|0;id=Qj+1360|0;xj=Qj+1356|0;Nd=Qj+1352|0;Lc=Qj+1348|0;fd=Qj+1344|0;Hj=Qj+1340|0;Pd=Qj+1336|0;Rc=Qj+1332|0;hd=Qj+1328|0;Vc=Qj+1324|0;Fc=Qj+1320|0;Eh=Qj+1316|0;Dc=Qj+1312|0;Da=Qj+1308|0;Mb=Qj+1304|0;mf=Qj+1300|0;vg=Qj+1296|0;u=Qj+1292|0;ce=Qj+1288|0;Cc=Qj+1284|0;Ec=Qj+1280|0;Lj=Qj+1276|0;Xb=Qj+1272|0;Ri=Qj+1268|0;Tc=Qj+1264|0;Jj=Qj+1260|0;Kj=Qj+1256|0;Nj=Qj+1252|0;Oj=Qj+1248|0;Ij=Qj+1244|0;Mj=Qj+1240|0;Sc=Qj+1236|0;Uc=Qj+1232|0;sj=Qj+1228|0;Kc=Qj+1224|0;wj=Qj+1220|0;Ic=Qj+1216|0;qj=Qj+1212|0;rj=Qj+1208|0;uj=Qj+1204|0;vj=Qj+1200|0;pj=Qj+1196|0;tj=Qj+1192|0;Hc=Qj+1188|0;Jc=Qj+1184|0;Cj=Qj+1180|0;Qc=Qj+1176|0;Gj=Qj+1172|0;Oc=Qj+1168|0;Aj=Qj+1164|0;Bj=Qj+1160|0;Ej=Qj+1156|0;Fj=Qj+1152|0;zj=Qj+1148|0;Dj=Qj+1144|0;Nc=Qj+1140|0;Pc=Qj+1136|0;Mc=Qj+1132|0;Zb=Qj+1128|0;Qf=Qj+1124|0;Rf=Qj+1120|0;gd=Qj+1116|0;jd=Qj+1112|0;ng=Qj+1108|0;og=Qj+1104|0;Nf=Qj+1100|0;Of=Qj+1096|0;kg=Qj+1092|0;lg=Qj+1088|0;bj=Qj+1084|0;Td=Qj+1080|0;dc=Qj+1076|0;ld=Qj+1072|0;ha=Qj+1068|0;Xd=Qj+1064|0;tc=Qj+1060|0;pd=Qj+1056|0;kj=Qj+1052|0;Ud=Qj+1048|0;ic=Qj+1044|0;md=Qj+1040|0;A=Qj+1036|0;Wd=Qj+1032|0;oc=Qj+1028|0;od=Qj+1024|0;Yi=Qj+1020|0;cc=Qj+1016|0;aj=Qj+1012|0;ac=Qj+1008|0;Wi=Qj+1004|0;Xi=Qj+1e3|0;_i=Qj+996|0;$i=Qj+992|0;Vi=Qj+988|0;Zi=Qj+984|0;$b=Qj+980|0;bc=Qj+976|0;E=Qj+972|0;sc=Qj+968|0;ga=Qj+964|0;qc=Qj+960|0;C=Qj+956|0;D=Qj+952|0;ea=Qj+948|0;fa=Qj+944|0;B=Qj+940|0;da=Qj+936|0;pc=Qj+932|0;rc=Qj+928|0;fj=Qj+924|0;hc=Qj+920|0;jj=Qj+916|0;fc=Qj+912|0;dj=Qj+908|0;ej=Qj+904|0;hj=Qj+900|0;ij=Qj+896|0;cj=Qj+892|0;gj=Qj+888|0;ec=Qj+884|0;gc=Qj+880|0;v=Qj+876|0;nc=Qj+872|0;z=Qj+868|0;lc=Qj+864|0;nj=Qj+860|0;oj=Qj+856|0;x=Qj+852|0;y=Qj+848|0;mj=Qj+844|0;w=Qj+840|0;kc=Qj+836|0;mc=Qj+832|0;jc=Qj+828|0;uc=Qj+824|0;Ag=Qj+820|0;Bg=Qj+816|0;nd=Qj+812|0;qd=Qj+808|0;ug=Qj+804|0;xf=Qj+800|0;Uf=Qj+796|0;Vf=Qj+792|0;rg=Qj+788|0;sg=Qj+784|0;ka=Qj+780|0;Ve=Qj+776|0;Pe=Qj+772|0;Ze=Qj+768|0;Se=Qj+764|0;_e=Qj+760|0;Eb=Qj+756|0;le=Qj+752|0;Yc=Qj+748|0;ae=Qj+744|0;_d=Qj+740|0;ke=Qj+736|0;he=Qj+732|0;je=Qj+728|0;Ld=Qj+724|0;Ue=Qj+720|0;Ui=Qj+716|0;ja=Qj+712|0;Ne=Qj+708|0;Oe=Qj+704|0;Qe=Qj+700|0;Re=Qj+696|0;Z=Qj+692|0;Db=Qj+688|0;Bc=Qj+684|0;Xc=Qj+680|0;Sd=Qj+676|0;Zd=Qj+672|0;sd=Qj+668|0;ge=Qj+664|0;dd=Qj+660|0;Kd=Qj+656|0;Fb=Qj+652|0;ie=Qj+648|0;$d=Qj+644|0;be=Qj+640|0;me=Qj+636|0;Te=Qj+632|0;af=Qj+628|0;bf=Qj+624|0;We=Qj+620|0;Xe=Qj+616|0;Ye=Qj+612|0;$e=Qj+608|0;jf=Qj+604|0;bg=Qj+600|0;Xf=Qj+596|0;fg=Qj+592|0;_f=Qj+588|0;gg=Qj+584|0;ne=Qj+580|0;tf=Qj+576|0;ve=Qj+572|0;nf=Qj+568|0;Ge=Qj+564|0;ag=Qj+560|0;Je=Qj+556|0;sf=Qj+552|0;Ce=Qj+548|0;of=Qj+544|0;ef=Qj+540|0;hf=Qj+536|0;vf=Qj+532|0;wf=Qj+528|0;Yf=Qj+524|0;Zf=Qj+520|0;kf=Qj+516|0;lf=Qj+512|0;re=Qj+508|0;ue=Qj+504|0;Ee=Qj+500|0;Fe=Qj+496|0;He=Qj+492|0;Ie=Qj+488|0;ye=Qj+484|0;Be=Qj+480|0;oe=Qj+476|0;De=Qj+472|0;qf=Qj+468|0;rf=Qj+464|0;Ke=Qj+460|0;Le=Qj+456|0;Me=Qj+452|0;pf=Qj+448|0;uf=Qj+444|0;$f=Qj+440|0;ig=Qj+436|0;jg=Qj+432|0;cg=Qj+428|0;dg=Qj+424|0;eg=Qj+420|0;hg=Qj+416|0;Af=Qj+412|0;ah=Qj+408|0;Lf=Qj+404|0;Sg=Qj+400|0;Ig=Qj+396|0;Rg=Qj+392|0;Dh=Qj+388|0;$g=Qj+384|0;vh=Qj+380|0;Gh=Qj+376|0;Ng=Qj+372|0;Zg=Qj+368|0;Eg=Qj+364|0;Fh=Qj+360|0;Mg=Qj+356|0;Wg=Qj+352|0;qg=Qj+348|0;zf=Qj+344|0;zh=Qj+340|0;Ch=Qj+336|0;Hf=Qj+332|0;Kf=Qj+328|0;Gg=Qj+324|0;Hg=Qj+320|0;nh=Qj+316|0;Xg=Qj+312|0;uh=Qj+308|0;Yg=Qj+304|0;jh=Qj+300|0;qh=Qj+296|0;wg=Qj+292|0;Ug=Qj+288|0;Dg=Qj+284|0;Vg=Qj+280|0;Tf=Qj+276|0;zg=Qj+272|0;Mf=Qj+268|0;wh=Qj+264|0;Pg=Qj+260|0;Qg=Qj+256|0;Jg=Qj+252|0;Kg=Qj+248|0;Lg=Qj+244|0;Og=Qj+240|0;Tg=Qj+236|0;_g=Qj+232|0;Ih=Qj+228|0;Jh=Qj+224|0;bh=Qj+220|0;ch=Qj+216|0;dh=Qj+212|0;Hh=Qj+208|0;oi=Qj+204|0;hi=Qj+200|0;ri=Qj+196|0;Zh=Qj+192|0;Ph=Qj+188|0;Yh=Qj+184|0;Ki=Qj+180|0;gi=Qj+176|0;Gi=Qj+172|0;mi=Qj+168|0;Uh=Qj+164|0;ei=Qj+160|0;zi=Qj+156|0;li=Qj+152|0;Th=Qj+148|0;bi=Qj+144|0;Mh=Qj+140|0;ni=Qj+136|0;Ii=Qj+132|0;Ji=Qj+128|0;pi=Qj+124|0;qi=Qj+120|0;Li=Qj+116|0;Mi=Qj+112|0;Ci=Qj+108|0;ci=Qj+104|0;Fi=Qj+100|0;di=Qj+96|0;Ai=Qj+92|0;Di=Qj+88|0;vi=Qj+84|0;$h=Qj+80|0;yi=Qj+76|0;ai=Qj+72|0;ti=Qj+68|0;wi=Qj+64|0;si=Qj+60|0;Hi=Qj+56|0;Wh=Qj+52|0;Xh=Qj+48|0;Qh=Qj+44|0;Rh=Qj+40|0;Sh=Qj+36|0;Vh=Qj+32|0;_h=Qj+28|0;fi=Qj+24|0;Pi=Qj+20|0;Qi=Qj+16|0;ii=Qj+12|0;ji=Qj+8|0;ki=Qj+4|0;Oi=Qj;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Rj>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Qj+2276>>2]=.27778512239456177;g[Qj+2272>>2]=.41573479771614075;g[Qj+2268>>2]=.09754516184329987;g[Qj+2264>>2]=.49039262533187866;g[Qj+2260>>2]=.7071067690849304;g[Qj+2256>>2]=.19134171307086945;g[Qj+2252>>2]=.4619397521018982;g[Qj+2248>>2]=.3535533845424652;g[Qj+2244>>2]=.5;c[Pj>>2]=c[Rj>>2];c[q>>2]=(c[q>>2]|0)+(((c[Rj>>2]|0)-1|0)*62<<2);while(1){if((c[Pj>>2]|0)>=(c[s>>2]|0))break;g[$>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[aa>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ya>>2]=+g[$>>2]+ +g[aa>>2];g[Ea>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Fa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Wa>>2]=+g[Ea>>2]-+g[Fa>>2];g[ba>>2]=+g[$>>2]-+g[aa>>2];g[Ga>>2]=+g[Ea>>2]+ +g[Fa>>2];g[Va>>2]=+g[(c[q>>2]|0)+64>>2];g[Xa>>2]=+g[(c[q>>2]|0)+68>>2];g[Za>>2]=+g[Va>>2]*+g[Wa>>2]+ +g[Xa>>2]*+g[Ya>>2];g[Ad>>2]=+g[Va>>2]*+g[Ya>>2]-+g[Xa>>2]*+g[Wa>>2];g[zb>>2]=+g[c[n>>2]>>2];g[Ab>>2]=+g[c[p>>2]>>2];g[Lb>>2]=+g[zb>>2]+ +g[Ab>>2];g[Hb>>2]=+g[c[o>>2]>>2];g[Ib>>2]=+g[c[m>>2]>>2];g[Jb>>2]=+g[Hb>>2]-+g[Ib>>2];g[Bb>>2]=+g[zb>>2]-+g[Ab>>2];g[Zc>>2]=+g[Ib>>2]+ +g[Hb>>2];g[Gb>>2]=+g[c[q>>2]>>2];g[Kb>>2]=+g[(c[q>>2]|0)+4>>2];g[Oa>>2]=+g[Gb>>2]*+g[Jb>>2]-+g[Kb>>2]*+g[Lb>>2];g[de>>2]=+g[Kb>>2]*+g[Jb>>2]+ +g[Gb>>2]*+g[Lb>>2];g[Ja>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[Ka>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[bb>>2]=+g[Ja>>2]+ +g[Ka>>2];g[Na>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[mb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[$a>>2]=+g[Na>>2]-+g[mb>>2];g[La>>2]=+g[Ja>>2]-+g[Ka>>2];g[nb>>2]=+g[Na>>2]+ +g[mb>>2];g[_a>>2]=+g[(c[q>>2]|0)+192>>2];g[ab>>2]=+g[(c[q>>2]|0)+196>>2];g[cb>>2]=+g[_a>>2]*+g[$a>>2]+ +g[ab>>2]*+g[bb>>2];g[Bd>>2]=+g[_a>>2]*+g[bb>>2]-+g[ab>>2]*+g[$a>>2];g[rb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[sb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Sa>>2]=+g[rb>>2]+ +g[sb>>2];g[vb>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2];g[wb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Qa>>2]=+g[vb>>2]-+g[wb>>2];g[tb>>2]=+g[rb>>2]-+g[sb>>2];g[xb>>2]=+g[vb>>2]+ +g[wb>>2];g[Pa>>2]=+g[(c[q>>2]|0)+128>>2];g[Ra>>2]=+g[(c[q>>2]|0)+132>>2];g[Ta>>2]=+g[Pa>>2]*+g[Qa>>2]+ +g[Ra>>2]*+g[Sa>>2];g[Dd>>2]=+g[Pa>>2]*+g[Sa>>2]-+g[Ra>>2]*+g[Qa>>2];g[Ua>>2]=+g[Oa>>2]-+g[Ta>>2];g[db>>2]=+g[Za>>2]+ +g[cb>>2];g[eb>>2]=+g[Ua>>2]-+g[db>>2];g[xe>>2]=+g[db>>2]+ +g[Ua>>2];g[Cd>>2]=+g[Ad>>2]+ +g[Bd>>2];g[ee>>2]=+g[Dd>>2]+ +g[de>>2];g[fe>>2]=+g[Cd>>2]+ +g[ee>>2];g[Ae>>2]=+g[ee>>2]-+g[Cd>>2];g[rh>>2]=+g[cb>>2]-+g[Za>>2];g[sh>>2]=+g[de>>2]-+g[Dd>>2];g[th>>2]=+g[rh>>2]+ +g[sh>>2];g[Ei>>2]=+g[sh>>2]-+g[rh>>2];g[kh>>2]=+g[Ta>>2]+ +g[Oa>>2];g[lh>>2]=+g[Ad>>2]-+g[Bd>>2];g[mh>>2]=+g[kh>>2]-+g[lh>>2];g[Bi>>2]=+g[lh>>2]+ +g[kh>>2];g[qb>>2]=+g[(c[q>>2]|0)+120>>2];g[ub>>2]=+g[(c[q>>2]|0)+124>>2];g[yb>>2]=+g[qb>>2]*+g[tb>>2]-+g[ub>>2]*+g[xb>>2];g[_c>>2]=+g[qb>>2]*+g[xb>>2]+ +g[ub>>2]*+g[tb>>2];g[Cb>>2]=+g[yb>>2]+ +g[Bb>>2];g[xh>>2]=+g[Zc>>2]-+g[_c>>2];g[$c>>2]=+g[Zc>>2]+ +g[_c>>2];g[If>>2]=+g[Bb>>2]-+g[yb>>2];g[_>>2]=+g[(c[q>>2]|0)+56>>2];g[ca>>2]=+g[(c[q>>2]|0)+60>>2];g[Ha>>2]=+g[_>>2]*+g[ba>>2]-+g[ca>>2]*+g[Ga>>2];g[ad>>2]=+g[_>>2]*+g[Ga>>2]+ +g[ca>>2]*+g[ba>>2];g[Ia>>2]=+g[(c[q>>2]|0)+184>>2];g[Ma>>2]=+g[(c[q>>2]|0)+188>>2];g[ob>>2]=+g[Ia>>2]*+g[La>>2]-+g[Ma>>2]*+g[nb>>2];g[bd>>2]=+g[Ia>>2]*+g[nb>>2]+ +g[Ma>>2]*+g[La>>2];g[pb>>2]=+g[Ha>>2]+ +g[ob>>2];g[yh>>2]=+g[Ha>>2]-+g[ob>>2];g[cd>>2]=+g[ad>>2]+ +g[bd>>2];g[Jf>>2]=+g[ad>>2]-+g[bd>>2];g[ma>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[na>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[oa>>2]=+g[ma>>2]-+g[na>>2];g[ib>>2]=+g[ma>>2]+ +g[na>>2];g[qa>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ra>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[sa>>2]=+g[qa>>2]+ +g[ra>>2];g[gb>>2]=+g[qa>>2]-+g[ra>>2];g[la>>2]=+g[(c[q>>2]|0)+24>>2];g[pa>>2]=+g[(c[q>>2]|0)+28>>2];g[ta>>2]=+g[la>>2]*+g[oa>>2]-+g[pa>>2]*+g[sa>>2];g[Ed>>2]=+g[la>>2]*+g[sa>>2]+ +g[pa>>2]*+g[oa>>2];g[fb>>2]=+g[(c[q>>2]|0)+32>>2];g[hb>>2]=+g[(c[q>>2]|0)+36>>2];g[jb>>2]=+g[fb>>2]*+g[gb>>2]+ +g[hb>>2]*+g[ib>>2];g[td>>2]=+g[fb>>2]*+g[ib>>2]-+g[hb>>2]*+g[gb>>2];g[Q>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[R>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[S>>2]=+g[Q>>2]-+g[R>>2];g[xc>>2]=+g[Q>>2]+ +g[R>>2];g[U>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[V>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[W>>2]=+g[U>>2]+ +g[V>>2];g[vc>>2]=+g[U>>2]-+g[V>>2];g[P>>2]=+g[(c[q>>2]|0)+88>>2];g[T>>2]=+g[(c[q>>2]|0)+92>>2];g[X>>2]=+g[P>>2]*+g[S>>2]-+g[T>>2]*+g[W>>2];g[Id>>2]=+g[P>>2]*+g[W>>2]+ +g[T>>2]*+g[S>>2];g[Wb>>2]=+g[(c[q>>2]|0)+96>>2];g[wc>>2]=+g[(c[q>>2]|0)+100>>2];g[yc>>2]=+g[Wb>>2]*+g[vc>>2]+ +g[wc>>2]*+g[xc>>2];g[xd>>2]=+g[Wb>>2]*+g[xc>>2]-+g[wc>>2]*+g[vc>>2];g[va>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[wa>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[xa>>2]=+g[va>>2]-+g[wa>>2];g[Ob>>2]=+g[va>>2]+ +g[wa>>2];g[za>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Aa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Ba>>2]=+g[za>>2]+ +g[Aa>>2];g[lb>>2]=+g[za>>2]-+g[Aa>>2];g[ua>>2]=+g[(c[q>>2]|0)+152>>2];g[ya>>2]=+g[(c[q>>2]|0)+156>>2];g[Ca>>2]=+g[ua>>2]*+g[xa>>2]-+g[ya>>2]*+g[Ba>>2];g[Fd>>2]=+g[ua>>2]*+g[Ba>>2]+ +g[ya>>2]*+g[xa>>2];g[kb>>2]=+g[(c[q>>2]|0)+160>>2];g[Nb>>2]=+g[(c[q>>2]|0)+164>>2];g[Pb>>2]=+g[kb>>2]*+g[lb>>2]+ +g[Nb>>2]*+g[Ob>>2];g[ud>>2]=+g[kb>>2]*+g[Ob>>2]-+g[Nb>>2]*+g[lb>>2];g[H>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[I>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[J>>2]=+g[H>>2]-+g[I>>2];g[Ub>>2]=+g[H>>2]+ +g[I>>2];g[L>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[M>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[N>>2]=+g[L>>2]+ +g[M>>2];g[Sb>>2]=+g[L>>2]-+g[M>>2];g[G>>2]=+g[(c[q>>2]|0)+216>>2];g[K>>2]=+g[(c[q>>2]|0)+220>>2];g[O>>2]=+g[G>>2]*+g[J>>2]-+g[K>>2]*+g[N>>2];g[Hd>>2]=+g[G>>2]*+g[N>>2]+ +g[K>>2]*+g[J>>2];g[Rb>>2]=+g[(c[q>>2]|0)+224>>2];g[Tb>>2]=+g[(c[q>>2]|0)+228>>2];g[Vb>>2]=+g[Rb>>2]*+g[Sb>>2]+ +g[Tb>>2]*+g[Ub>>2];g[wd>>2]=+g[Rb>>2]*+g[Ub>>2]-+g[Tb>>2]*+g[Sb>>2];g[F>>2]=+g[ta>>2]+ +g[Ca>>2];g[Gd>>2]=+g[Ed>>2]+ +g[Fd>>2];g[Jd>>2]=+g[Hd>>2]+ +g[Id>>2];g[Y>>2]=+g[O>>2]+ +g[X>>2];g[Qb>>2]=+g[jb>>2]+ +g[Pb>>2];g[zc>>2]=+g[Vb>>2]+ +g[yc>>2];g[Ac>>2]=+g[Qb>>2]+ +g[zc>>2];g[ze>>2]=+g[zc>>2]-+g[Qb>>2];g[gh>>2]=+g[wd>>2]-+g[xd>>2];g[hh>>2]=+g[yc>>2]-+g[Vb>>2];g[ih>>2]=+g[gh>>2]+ +g[hh>>2];g[ph>>2]=+g[gh>>2]-+g[hh>>2];g[Ef>>2]=+g[Hd>>2]-+g[Id>>2];g[Ff>>2]=+g[O>>2]-+g[X>>2];g[Gf>>2]=+g[Ef>>2]+ +g[Ff>>2];g[Bh>>2]=+g[Ef>>2]-+g[Ff>>2];g[vd>>2]=+g[td>>2]+ +g[ud>>2];g[yd>>2]=+g[wd>>2]+ +g[xd>>2];g[zd>>2]=+g[vd>>2]+ +g[yd>>2];g[we>>2]=+g[yd>>2]-+g[vd>>2];g[Fg>>2]=+g[Pb>>2]-+g[jb>>2];g[eh>>2]=+g[td>>2]-+g[ud>>2];g[fh>>2]=+g[Fg>>2]-+g[eh>>2];g[oh>>2]=+g[eh>>2]+ +g[Fg>>2];g[Bf>>2]=+g[ta>>2]-+g[Ca>>2];g[Cf>>2]=+g[Ed>>2]-+g[Fd>>2];g[Df>>2]=+g[Bf>>2]-+g[Cf>>2];g[Ah>>2]=+g[Cf>>2]+ +g[Bf>>2];g[Da>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Mb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Vc>>2]=+g[Da>>2]-+g[Mb>>2];g[Fc>>2]=+g[Da>>2]+ +g[Mb>>2];g[mf>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[vg>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Eh>>2]=+g[mf>>2]+ +g[vg>>2];g[Dc>>2]=+g[mf>>2]-+g[vg>>2];g[u>>2]=+g[(c[q>>2]|0)+8>>2];g[ce>>2]=+g[(c[q>>2]|0)+12>>2];g[Ni>>2]=+g[u>>2]*+g[Vc>>2]-+g[ce>>2]*+g[Eh>>2];g[Md>>2]=+g[u>>2]*+g[Eh>>2]+ +g[ce>>2]*+g[Vc>>2];g[Cc>>2]=+g[(c[q>>2]|0)+16>>2];g[Ec>>2]=+g[(c[q>>2]|0)+20>>2];g[Gc>>2]=+g[Cc>>2]*+g[Dc>>2]+ +g[Ec>>2]*+g[Fc>>2];g[ed>>2]=+g[Cc>>2]*+g[Fc>>2]-+g[Ec>>2]*+g[Dc>>2];g[Jj>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[Kj>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[Lj>>2]=+g[Jj>>2]-+g[Kj>>2];g[Xb>>2]=+g[Jj>>2]+ +g[Kj>>2];g[Nj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[Oj>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[Ri>>2]=+g[Nj>>2]+ +g[Oj>>2];g[Tc>>2]=+g[Nj>>2]-+g[Oj>>2];g[Ij>>2]=+g[(c[q>>2]|0)+200>>2];g[Mj>>2]=+g[(c[q>>2]|0)+204>>2];g[Si>>2]=+g[Ij>>2]*+g[Lj>>2]-+g[Mj>>2]*+g[Ri>>2];g[Qd>>2]=+g[Ij>>2]*+g[Ri>>2]+ +g[Mj>>2]*+g[Lj>>2];g[Sc>>2]=+g[(c[q>>2]|0)+208>>2];g[Uc>>2]=+g[(c[q>>2]|0)+212>>2];g[Yb>>2]=+g[Sc>>2]*+g[Tc>>2]+ +g[Uc>>2]*+g[Xb>>2];g[id>>2]=+g[Sc>>2]*+g[Xb>>2]-+g[Uc>>2]*+g[Tc>>2];g[qj>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[rj>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[sj>>2]=+g[qj>>2]-+g[rj>>2];g[Kc>>2]=+g[qj>>2]+ +g[rj>>2];g[uj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[vj>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[wj>>2]=+g[uj>>2]+ +g[vj>>2];g[Ic>>2]=+g[uj>>2]-+g[vj>>2];g[pj>>2]=+g[(c[q>>2]|0)+136>>2];g[tj>>2]=+g[(c[q>>2]|0)+140>>2];g[xj>>2]=+g[pj>>2]*+g[sj>>2]-+g[tj>>2]*+g[wj>>2];g[Nd>>2]=+g[pj>>2]*+g[wj>>2]+ +g[tj>>2]*+g[sj>>2];g[Hc>>2]=+g[(c[q>>2]|0)+144>>2];g[Jc>>2]=+g[(c[q>>2]|0)+148>>2];g[Lc>>2]=+g[Hc>>2]*+g[Ic>>2]+ +g[Jc>>2]*+g[Kc>>2];g[fd>>2]=+g[Hc>>2]*+g[Kc>>2]-+g[Jc>>2]*+g[Ic>>2];g[Aj>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Bj>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Cj>>2]=+g[Aj>>2]-+g[Bj>>2];g[Qc>>2]=+g[Aj>>2]+ +g[Bj>>2];g[Ej>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Fj>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Gj>>2]=+g[Ej>>2]+ +g[Fj>>2];g[Oc>>2]=+g[Ej>>2]-+g[Fj>>2];g[zj>>2]=+g[(c[q>>2]|0)+72>>2];g[Dj>>2]=+g[(c[q>>2]|0)+76>>2];g[Hj>>2]=+g[zj>>2]*+g[Cj>>2]-+g[Dj>>2]*+g[Gj>>2];g[Pd>>2]=+g[zj>>2]*+g[Gj>>2]+ +g[Dj>>2]*+g[Cj>>2];g[Nc>>2]=+g[(c[q>>2]|0)+80>>2];g[Pc>>2]=+g[(c[q>>2]|0)+84>>2];g[Rc>>2]=+g[Nc>>2]*+g[Oc>>2]+ +g[Pc>>2]*+g[Qc>>2];g[hd>>2]=+g[Nc>>2]*+g[Qc>>2]-+g[Pc>>2]*+g[Oc>>2];g[yj>>2]=+g[Ni>>2]+ +g[xj>>2];g[Ti>>2]=+g[Hj>>2]+ +g[Si>>2];g[cf>>2]=+g[yj>>2]-+g[Ti>>2];g[Od>>2]=+g[Md>>2]+ +g[Nd>>2];g[Rd>>2]=+g[Pd>>2]+ +g[Qd>>2];g[df>>2]=+g[Od>>2]-+g[Rd>>2];g[Mc>>2]=+g[Gc>>2]+ +g[Lc>>2];g[Zb>>2]=+g[Rc>>2]+ +g[Yb>>2];g[_b>>2]=+g[Mc>>2]+ +g[Zb>>2];g[pe>>2]=+g[Zb>>2]-+g[Mc>>2];g[Qf>>2]=+g[id>>2]-+g[hd>>2];g[Rf>>2]=+g[Rc>>2]-+g[Yb>>2];g[Sf>>2]=+g[Qf>>2]+ +g[Rf>>2];g[yg>>2]=+g[Qf>>2]-+g[Rf>>2];g[gd>>2]=+g[ed>>2]+ +g[fd>>2];g[jd>>2]=+g[hd>>2]+ +g[id>>2];g[kd>>2]=+g[gd>>2]+ +g[jd>>2];g[se>>2]=+g[jd>>2]-+g[gd>>2];g[ng>>2]=+g[Md>>2]-+g[Nd>>2];g[og>>2]=+g[Hj>>2]-+g[Si>>2];g[pg>>2]=+g[ng>>2]+ +g[og>>2];g[Lh>>2]=+g[ng>>2]-+g[og>>2];g[Nf>>2]=+g[Lc>>2]-+g[Gc>>2];g[Of>>2]=+g[ed>>2]-+g[fd>>2];g[Pf>>2]=+g[Nf>>2]-+g[Of>>2];g[xg>>2]=+g[Of>>2]+ +g[Nf>>2];g[kg>>2]=+g[Ni>>2]-+g[xj>>2];g[lg>>2]=+g[Pd>>2]-+g[Qd>>2];g[mg>>2]=+g[kg>>2]-+g[lg>>2];g[Kh>>2]=+g[kg>>2]+ +g[lg>>2];g[Wi>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[Xi>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[Yi>>2]=+g[Wi>>2]-+g[Xi>>2];g[cc>>2]=+g[Wi>>2]+ +g[Xi>>2];g[_i>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[$i>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[aj>>2]=+g[_i>>2]+ +g[$i>>2];g[ac>>2]=+g[_i>>2]-+g[$i>>2];g[Vi>>2]=+g[(c[q>>2]|0)+232>>2];g[Zi>>2]=+g[(c[q>>2]|0)+236>>2];g[bj>>2]=+g[Vi>>2]*+g[Yi>>2]-+g[Zi>>2]*+g[aj>>2];g[Td>>2]=+g[Vi>>2]*+g[aj>>2]+ +g[Zi>>2]*+g[Yi>>2];g[$b>>2]=+g[(c[q>>2]|0)+240>>2];g[bc>>2]=+g[(c[q>>2]|0)+244>>2];g[dc>>2]=+g[$b>>2]*+g[ac>>2]+ +g[bc>>2]*+g[cc>>2];g[ld>>2]=+g[$b>>2]*+g[cc>>2]-+g[bc>>2]*+g[ac>>2];g[C>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[D>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[E>>2]=+g[C>>2]-+g[D>>2];g[sc>>2]=+g[C>>2]+ +g[D>>2];g[ea>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[fa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[ga>>2]=+g[ea>>2]+ +g[fa>>2];g[qc>>2]=+g[ea>>2]-+g[fa>>2];g[B>>2]=+g[(c[q>>2]|0)+168>>2];g[da>>2]=+g[(c[q>>2]|0)+172>>2];g[ha>>2]=+g[B>>2]*+g[E>>2]-+g[da>>2]*+g[ga>>2];g[Xd>>2]=+g[B>>2]*+g[ga>>2]+ +g[da>>2]*+g[E>>2];g[pc>>2]=+g[(c[q>>2]|0)+176>>2];g[rc>>2]=+g[(c[q>>2]|0)+180>>2];g[tc>>2]=+g[pc>>2]*+g[qc>>2]+ +g[rc>>2]*+g[sc>>2];g[pd>>2]=+g[pc>>2]*+g[sc>>2]-+g[rc>>2]*+g[qc>>2];g[dj>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ej>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[fj>>2]=+g[dj>>2]-+g[ej>>2];g[hc>>2]=+g[dj>>2]+ +g[ej>>2];g[hj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ij>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[jj>>2]=+g[hj>>2]+ +g[ij>>2];g[fc>>2]=+g[hj>>2]-+g[ij>>2];g[cj>>2]=+g[(c[q>>2]|0)+104>>2];g[gj>>2]=+g[(c[q>>2]|0)+108>>2];g[kj>>2]=+g[cj>>2]*+g[fj>>2]-+g[gj>>2]*+g[jj>>2];g[Ud>>2]=+g[cj>>2]*+g[jj>>2]+ +g[gj>>2]*+g[fj>>2];g[ec>>2]=+g[(c[q>>2]|0)+112>>2];g[gc>>2]=+g[(c[q>>2]|0)+116>>2];g[ic>>2]=+g[ec>>2]*+g[fc>>2]+ +g[gc>>2]*+g[hc>>2];g[md>>2]=+g[ec>>2]*+g[hc>>2]-+g[gc>>2]*+g[fc>>2];g[nj>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[oj>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[v>>2]=+g[nj>>2]-+g[oj>>2];g[nc>>2]=+g[nj>>2]+ +g[oj>>2];g[x>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[y>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[lc>>2]=+g[x>>2]-+g[y>>2];g[mj>>2]=+g[(c[q>>2]|0)+40>>2];g[w>>2]=+g[(c[q>>2]|0)+44>>2];g[A>>2]=+g[mj>>2]*+g[v>>2]-+g[w>>2]*+g[z>>2];g[Wd>>2]=+g[mj>>2]*+g[z>>2]+ +g[w>>2]*+g[v>>2];g[kc>>2]=+g[(c[q>>2]|0)+48>>2];g[mc>>2]=+g[(c[q>>2]|0)+52>>2];g[oc>>2]=+g[kc>>2]*+g[lc>>2]+ +g[mc>>2]*+g[nc>>2];g[od>>2]=+g[kc>>2]*+g[nc>>2]-+g[mc>>2]*+g[lc>>2];g[lj>>2]=+g[bj>>2]+ +g[kj>>2];g[ia>>2]=+g[A>>2]+ +g[ha>>2];g[gf>>2]=+g[lj>>2]-+g[ia>>2];g[Vd>>2]=+g[Td>>2]+ +g[Ud>>2];g[Yd>>2]=+g[Wd>>2]+ +g[Xd>>2];g[ff>>2]=+g[Vd>>2]-+g[Yd>>2];g[jc>>2]=+g[dc>>2]+ +g[ic>>2];g[uc>>2]=+g[oc>>2]+ +g[tc>>2];g[Wc>>2]=+g[jc>>2]+ +g[uc>>2];g[te>>2]=+g[uc>>2]-+g[jc>>2];g[Ag>>2]=+g[ld>>2]-+g[md>>2];g[Bg>>2]=+g[tc>>2]-+g[oc>>2];g[Cg>>2]=+g[Ag>>2]+ +g[Bg>>2];g[xi>>2]=+g[Ag>>2]-+g[Bg>>2];g[nd>>2]=+g[ld>>2]+ +g[md>>2];g[qd>>2]=+g[od>>2]+ +g[pd>>2];g[rd>>2]=+g[nd>>2]+ +g[qd>>2];g[qe>>2]=+g[nd>>2]-+g[qd>>2];g[ug>>2]=+g[Td>>2]-+g[Ud>>2];g[xf>>2]=+g[A>>2]-+g[ha>>2];g[yf>>2]=+g[ug>>2]+ +g[xf>>2];g[Oh>>2]=+g[ug>>2]-+g[xf>>2];g[Uf>>2]=+g[ic>>2]-+g[dc>>2];g[Vf>>2]=+g[od>>2]-+g[pd>>2];g[Wf>>2]=+g[Uf>>2]-+g[Vf>>2];g[ui>>2]=+g[Uf>>2]+ +g[Vf>>2];g[rg>>2]=+g[bj>>2]-+g[kj>>2];g[sg>>2]=+g[Wd>>2]-+g[Xd>>2];g[tg>>2]=+g[rg>>2]-+g[sg>>2];g[Nh>>2]=+g[rg>>2]+ +g[sg>>2];g[Ui>>2]=+g[yj>>2]+ +g[Ti>>2];g[ja>>2]=+g[lj>>2]+ +g[ia>>2];g[ka>>2]=+g[Ui>>2]+ +g[ja>>2];g[Ve>>2]=+g[Ui>>2]-+g[ja>>2];g[Ne>>2]=+g[rd>>2]-+g[kd>>2];g[Oe>>2]=+g[_b>>2]-+g[Wc>>2];g[Pe>>2]=+g[Ne>>2]+ +g[Oe>>2];g[Ze>>2]=+g[Ne>>2]-+g[Oe>>2];g[Qe>>2]=+g[Ac>>2]+ +g[eb>>2];g[Re>>2]=+g[fe>>2]-+g[zd>>2];g[Se>>2]=+g[Qe>>2]-+g[Re>>2];g[_e>>2]=+g[Qe>>2]+ +g[Re>>2];g[Z>>2]=+g[F>>2]+ +g[Y>>2];g[Db>>2]=+g[pb>>2]+ +g[Cb>>2];g[Eb>>2]=+g[Z>>2]+ +g[Db>>2];g[le>>2]=+g[Db>>2]-+g[Z>>2];g[Bc>>2]=+g[eb>>2]-+g[Ac>>2];g[Xc>>2]=+g[_b>>2]+ +g[Wc>>2];g[Yc>>2]=+g[Bc>>2]-+g[Xc>>2];g[ae>>2]=+g[Xc>>2]+ +g[Bc>>2];g[Sd>>2]=+g[Od>>2]+ +g[Rd>>2];g[Zd>>2]=+g[Vd>>2]+ +g[Yd>>2];g[_d>>2]=+g[Sd>>2]+ +g[Zd>>2];g[ke>>2]=+g[Zd>>2]-+g[Sd>>2];g[sd>>2]=+g[kd>>2]+ +g[rd>>2];g[ge>>2]=+g[zd>>2]+ +g[fe>>2];g[he>>2]=+g[sd>>2]-+g[ge>>2];g[je>>2]=+g[sd>>2]+ +g[ge>>2];g[dd>>2]=+g[$c>>2]+ +g[cd>>2];g[Kd>>2]=+g[Gd>>2]+ +g[Jd>>2];g[Ld>>2]=+g[dd>>2]+ +g[Kd>>2];g[Ue>>2]=+g[dd>>2]-+g[Kd>>2];g[Fb>>2]=+g[ka>>2]+ +g[Eb>>2];g[c[n>>2]>>2]=(+g[Fb>>2]+ +g[Yc>>2])*.5;g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=(+g[Yc>>2]-+g[Fb>>2])*.5;g[ie>>2]=+g[Ld>>2]+ +g[_d>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=(+g[ie>>2]-+g[je>>2])*.5;g[c[m>>2]>>2]=(+g[ie>>2]+ +g[je>>2])*.5;g[$d>>2]=+g[Ld>>2]-+g[_d>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=(+g[$d>>2]-+g[ae>>2])*.5;g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2]=(+g[$d>>2]+ +g[ae>>2])*.5;g[be>>2]=+g[Eb>>2]-+g[ka>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=(+g[be>>2]+ +g[he>>2])*.5;g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=(+g[he>>2]-+g[be>>2])*.5;g[me>>2]=(+g[ke>>2]+ +g[le>>2])*.5;g[Te>>2]=(+g[Pe>>2]+ +g[Se>>2])*.3535533845424652;g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[me>>2]+ +g[Te>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Te>>2]-+g[me>>2];g[af>>2]=(+g[Ue>>2]+ +g[Ve>>2])*.5;g[bf>>2]=(+g[Ze>>2]+ +g[_e>>2])*.3535533845424652;g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[af>>2]-+g[bf>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[af>>2]+ +g[bf>>2];g[We>>2]=(+g[Ue>>2]-+g[Ve>>2])*.5;g[Xe>>2]=(+g[Se>>2]-+g[Pe>>2])*.3535533845424652;g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[We>>2]-+g[Xe>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[We>>2]+ +g[Xe>>2];g[Ye>>2]=(+g[le>>2]-+g[ke>>2])*.5;g[$e>>2]=(+g[Ze>>2]-+g[_e>>2])*.3535533845424652;g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Ye>>2]+ +g[$e>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[$e>>2]-+g[Ye>>2];g[ef>>2]=+g[cf>>2]-+g[df>>2];g[hf>>2]=+g[ff>>2]+ +g[gf>>2];g[jf>>2]=(+g[ef>>2]+ +g[hf>>2])*.3535533845424652;g[bg>>2]=(+g[ef>>2]-+g[hf>>2])*.3535533845424652;g[vf>>2]=+g[qe>>2]-+g[pe>>2];g[wf>>2]=+g[te>>2]-+g[se>>2];g[Xf>>2]=+g[vf>>2]*.4619397521018982+ +g[wf>>2]*.19134171307086945;g[fg>>2]=+g[vf>>2]*.19134171307086945-+g[wf>>2]*.4619397521018982;g[Yf>>2]=+g[xe>>2]-+g[we>>2];g[Zf>>2]=+g[Ae>>2]-+g[ze>>2];g[_f>>2]=+g[Yf>>2]*.19134171307086945-+g[Zf>>2]*.4619397521018982;g[gg>>2]=+g[Yf>>2]*.4619397521018982+ +g[Zf>>2]*.19134171307086945;g[kf>>2]=+g[Jd>>2]-+g[Gd>>2];g[lf>>2]=+g[Cb>>2]-+g[pb>>2];g[ne>>2]=(+g[kf>>2]+ +g[lf>>2])*.5;g[tf>>2]=(+g[lf>>2]-+g[kf>>2])*.5;g[re>>2]=+g[pe>>2]+ +g[qe>>2];g[ue>>2]=+g[se>>2]+ +g[te>>2];g[ve>>2]=+g[re>>2]*.19134171307086945+ +g[ue>>2]*.4619397521018982;g[nf>>2]=+g[re>>2]*.4619397521018982-+g[ue>>2]*.19134171307086945;g[Ee>>2]=+g[$c>>2]-+g[cd>>2];g[Fe>>2]=+g[F>>2]-+g[Y>>2];g[Ge>>2]=(+g[Ee>>2]+ +g[Fe>>2])*.5;g[ag>>2]=(+g[Ee>>2]-+g[Fe>>2])*.5;g[He>>2]=+g[df>>2]+ +g[cf>>2];g[Ie>>2]=+g[ff>>2]-+g[gf>>2];g[Je>>2]=(+g[He>>2]+ +g[Ie>>2])*.3535533845424652;g[sf>>2]=(+g[Ie>>2]-+g[He>>2])*.3535533845424652;g[ye>>2]=+g[we>>2]+ +g[xe>>2];g[Be>>2]=+g[ze>>2]+ +g[Ae>>2];g[Ce>>2]=+g[ye>>2]*.4619397521018982-+g[Be>>2]*.19134171307086945;g[of>>2]=+g[ye>>2]*.19134171307086945+ +g[Be>>2]*.4619397521018982;g[oe>>2]=+g[jf>>2]+ +g[ne>>2];g[De>>2]=+g[ve>>2]+ +g[Ce>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[oe>>2]+ +g[De>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[De>>2]-+g[oe>>2];g[qf>>2]=+g[Ge>>2]+ +g[Je>>2];g[rf>>2]=+g[nf>>2]+ +g[of>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[qf>>2]-+g[rf>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[qf>>2]+ +g[rf>>2];g[Ke>>2]=+g[Ge>>2]-+g[Je>>2];g[Le>>2]=+g[Ce>>2]-+g[ve>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Ke>>2]-+g[Le>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Ke>>2]+ +g[Le>>2];g[Me>>2]=+g[ne>>2]-+g[jf>>2];g[pf>>2]=+g[nf>>2]-+g[of>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Me>>2]+ +g[pf>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[pf>>2]-+g[Me>>2];g[uf>>2]=+g[sf>>2]+ +g[tf>>2];g[$f>>2]=+g[Xf>>2]+ +g[_f>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[uf>>2]+ +g[$f>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[$f>>2]-+g[uf>>2];g[ig>>2]=+g[ag>>2]+ +g[bg>>2];g[jg>>2]=+g[fg>>2]+ +g[gg>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[ig>>2]-+g[jg>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[ig>>2]+ +g[jg>>2];g[cg>>2]=+g[ag>>2]-+g[bg>>2];g[dg>>2]=+g[_f>>2]-+g[Xf>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[cg>>2]-+g[dg>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[cg>>2]+ +g[dg>>2];g[eg>>2]=+g[tf>>2]-+g[sf>>2];g[hg>>2]=+g[fg>>2]-+g[gg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[eg>>2]+ +g[hg>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[hg>>2]-+g[eg>>2];g[qg>>2]=+g[mg>>2]*.4619397521018982-+g[pg>>2]*.19134171307086945;g[zf>>2]=+g[tg>>2]*.4619397521018982+ +g[yf>>2]*.19134171307086945;g[Af>>2]=+g[qg>>2]+ +g[zf>>2];g[ah>>2]=+g[qg>>2]-+g[zf>>2];g[Hf>>2]=(+g[Df>>2]+ +g[Gf>>2])*.3535533845424652;g[Kf>>2]=(+g[If>>2]-+g[Jf>>2])*.5;g[Lf>>2]=+g[Hf>>2]+ +g[Kf>>2];g[Sg>>2]=+g[Kf>>2]-+g[Hf>>2];g[Gg>>2]=+g[mg>>2]*.19134171307086945+ +g[pg>>2]*.4619397521018982;g[Hg>>2]=+g[yf>>2]*.4619397521018982-+g[tg>>2]*.19134171307086945;g[Ig>>2]=+g[Gg>>2]+ +g[Hg>>2];g[Rg>>2]=+g[Hg>>2]-+g[Gg>>2];g[zh>>2]=(+g[xh>>2]+ +g[yh>>2])*.5;g[Ch>>2]=(+g[Ah>>2]+ +g[Bh>>2])*.3535533845424652;g[Dh>>2]=+g[zh>>2]+ +g[Ch>>2];g[$g>>2]=+g[zh>>2]-+g[Ch>>2];g[jh>>2]=(+g[fh>>2]+ +g[ih>>2])*.7071067690849304;g[nh>>2]=+g[jh>>2]+ +g[mh>>2];g[Xg>>2]=+g[mh>>2]-+g[jh>>2];g[qh>>2]=(+g[oh>>2]+ +g[ph>>2])*.7071067690849304;g[uh>>2]=+g[qh>>2]+ +g[th>>2];g[Yg>>2]=+g[th>>2]-+g[qh>>2];g[vh>>2]=+g[nh>>2]*.49039262533187866-+g[uh>>2]*.09754516184329987;g[Gh>>2]=+g[Xg>>2]*.41573479771614075+ +g[Yg>>2]*.27778512239456177;g[Ng>>2]=+g[nh>>2]*.09754516184329987+ +g[uh>>2]*.49039262533187866;g[Zg>>2]=+g[Xg>>2]*.27778512239456177-+g[Yg>>2]*.41573479771614075;g[Tf>>2]=(+g[Pf>>2]+ +g[Sf>>2])*.7071067690849304;g[wg>>2]=+g[Tf>>2]+ +g[Wf>>2];g[Ug>>2]=+g[Wf>>2]-+g[Tf>>2];g[zg>>2]=(+g[xg>>2]+ +g[yg>>2])*.7071067690849304;g[Dg>>2]=+g[zg>>2]+ +g[Cg>>2];g[Vg>>2]=+g[Cg>>2]-+g[zg>>2];g[Eg>>2]=+g[wg>>2]*.49039262533187866+ +g[Dg>>2]*.09754516184329987;g[Fh>>2]=+g[Vg>>2]*.27778512239456177-+g[Ug>>2]*.41573479771614075;g[Mg>>2]=+g[Dg>>2]*.49039262533187866-+g[wg>>2]*.09754516184329987;g[Wg>>2]=+g[Ug>>2]*.27778512239456177+ +g[Vg>>2]*.41573479771614075;g[Mf>>2]=+g[Af>>2]+ +g[Lf>>2];g[wh>>2]=+g[Eg>>2]+ +g[vh>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Mf>>2]+ +g[wh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[wh>>2]-+g[Mf>>2];g[Pg>>2]=+g[Dh>>2]+ +g[Ig>>2];g[Qg>>2]=+g[Mg>>2]+ +g[Ng>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Pg>>2]-+g[Qg>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[Pg>>2]+ +g[Qg>>2];g[Jg>>2]=+g[Dh>>2]-+g[Ig>>2];g[Kg>>2]=+g[vh>>2]-+g[Eg>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Jg>>2]-+g[Kg>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Jg>>2]+ +g[Kg>>2];g[Lg>>2]=+g[Lf>>2]-+g[Af>>2];g[Og>>2]=+g[Mg>>2]-+g[Ng>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Lg>>2]+ +g[Og>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Og>>2]-+g[Lg>>2];g[Tg>>2]=+g[Rg>>2]+ +g[Sg>>2];g[_g>>2]=+g[Wg>>2]+ +g[Zg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Tg>>2]+ +g[_g>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[_g>>2]-+g[Tg>>2];g[Ih>>2]=+g[$g>>2]+ +g[ah>>2];g[Jh>>2]=+g[Fh>>2]+ +g[Gh>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Ih>>2]-+g[Jh>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Ih>>2]+ +g[Jh>>2];g[bh>>2]=+g[$g>>2]-+g[ah>>2];g[ch>>2]=+g[Zg>>2]-+g[Wg>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[bh>>2]-+g[ch>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[bh>>2]+ +g[ch>>2];g[dh>>2]=+g[Sg>>2]-+g[Rg>>2];g[Hh>>2]=+g[Fh>>2]-+g[Gh>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[dh>>2]+ +g[Hh>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Hh>>2]-+g[dh>>2];g[Mh>>2]=+g[Kh>>2]*.19134171307086945-+g[Lh>>2]*.4619397521018982;g[ni>>2]=+g[Nh>>2]*.19134171307086945+ +g[Oh>>2]*.4619397521018982;g[oi>>2]=+g[Mh>>2]+ +g[ni>>2];g[hi>>2]=+g[Mh>>2]-+g[ni>>2];g[pi>>2]=(+g[Bh>>2]-+g[Ah>>2])*.3535533845424652;g[qi>>2]=(+g[Jf>>2]+ +g[If>>2])*.5;g[ri>>2]=+g[pi>>2]+ +g[qi>>2];g[Zh>>2]=+g[qi>>2]-+g[pi>>2];g[Li>>2]=+g[Kh>>2]*.4619397521018982+ +g[Lh>>2]*.19134171307086945;g[Mi>>2]=+g[Oh>>2]*.19134171307086945-+g[Nh>>2]*.4619397521018982;g[Ph>>2]=+g[Li>>2]+ +g[Mi>>2];g[Yh>>2]=+g[Mi>>2]-+g[Li>>2];g[Ii>>2]=(+g[xh>>2]-+g[yh>>2])*.5;g[Ji>>2]=(+g[Df>>2]-+g[Gf>>2])*.3535533845424652;g[Ki>>2]=+g[Ii>>2]+ +g[Ji>>2];g[gi>>2]=+g[Ii>>2]-+g[Ji>>2];g[Ai>>2]=(+g[ph>>2]-+g[oh>>2])*.7071067690849304;g[Ci>>2]=+g[Ai>>2]+ +g[Bi>>2];g[ci>>2]=+g[Bi>>2]-+g[Ai>>2];g[Di>>2]=(+g[fh>>2]-+g[ih>>2])*.7071067690849304;g[Fi>>2]=+g[Di>>2]+ +g[Ei>>2];g[di>>2]=+g[Ei>>2]-+g[Di>>2];g[Gi>>2]=+g[Ci>>2]*.41573479771614075-+g[Fi>>2]*.27778512239456177;g[mi>>2]=+g[ci>>2]*.49039262533187866+ +g[di>>2]*.09754516184329987;g[Uh>>2]=+g[Ci>>2]*.27778512239456177+ +g[Fi>>2]*.41573479771614075;g[ei>>2]=+g[ci>>2]*.09754516184329987-+g[di>>2]*.49039262533187866;g[ti>>2]=(+g[yg>>2]-+g[xg>>2])*.7071067690849304;g[vi>>2]=+g[ti>>2]+ +g[ui>>2];g[$h>>2]=+g[ui>>2]-+g[ti>>2];g[wi>>2]=(+g[Pf>>2]-+g[Sf>>2])*.7071067690849304;g[yi>>2]=+g[wi>>2]+ +g[xi>>2];g[ai>>2]=+g[xi>>2]-+g[wi>>2];g[zi>>2]=+g[vi>>2]*.41573479771614075+ +g[yi>>2]*.27778512239456177;g[li>>2]=+g[ai>>2]*.09754516184329987-+g[$h>>2]*.49039262533187866;g[Th>>2]=+g[yi>>2]*.41573479771614075-+g[vi>>2]*.27778512239456177;g[bi>>2]=+g[$h>>2]*.09754516184329987+ +g[ai>>2]*.49039262533187866;g[si>>2]=+g[oi>>2]+ +g[ri>>2];g[Hi>>2]=+g[zi>>2]+ +g[Gi>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[si>>2]+ +g[Hi>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Hi>>2]-+g[si>>2];g[Wh>>2]=+g[Ki>>2]+ +g[Ph>>2];g[Xh>>2]=+g[Th>>2]+ +g[Uh>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Wh>>2]-+g[Xh>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Wh>>2]+ +g[Xh>>2];g[Qh>>2]=+g[Ki>>2]-+g[Ph>>2];g[Rh>>2]=+g[Gi>>2]-+g[zi>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Qh>>2]-+g[Rh>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Qh>>2]+ +g[Rh>>2];g[Sh>>2]=+g[ri>>2]-+g[oi>>2];g[Vh>>2]=+g[Th>>2]-+g[Uh>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Sh>>2]+ +g[Vh>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Vh>>2]-+g[Sh>>2];g[_h>>2]=+g[Yh>>2]+ +g[Zh>>2];g[fi>>2]=+g[bi>>2]+ +g[ei>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[_h>>2]+ +g[fi>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[fi>>2]-+g[_h>>2];g[Pi>>2]=+g[gi>>2]+ +g[hi>>2];g[Qi>>2]=+g[li>>2]+ +g[mi>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Pi>>2]-+g[Qi>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Pi>>2]+ +g[Qi>>2];g[ii>>2]=+g[gi>>2]-+g[hi>>2];g[ji>>2]=+g[ei>>2]-+g[bi>>2];g[c[o>>2]>>2]=+g[ii>>2]-+g[ji>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[ii>>2]+ +g[ji>>2];g[ki>>2]=+g[Zh>>2]-+g[Yh>>2];g[Oi>>2]=+g[li>>2]-+g[mi>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[ki>>2]+ +g[Oi>>2];g[c[p>>2]>>2]=+g[Oi>>2]-+g[ki>>2];c[Pj>>2]=(c[Pj>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+248;c[r>>2]=c[r>>2]^c[2998]}i=Qj;return}function hr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,49,4408,1);i=b;return}function ir(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0;da=i;i=i+192|0;m=da+184|0;n=da+180|0;o=da+176|0;p=da+172|0;q=da+168|0;r=da+164|0;ea=da+160|0;s=da+156|0;t=da+152|0;ca=da+144|0;G=da+140|0;V=da+136|0;O=da+132|0;$=da+128|0;C=da+124|0;W=da+120|0;T=da+116|0;_=da+112|0;E=da+108|0;F=da+104|0;N=da+100|0;J=da+96|0;K=da+92|0;L=da+88|0;I=da+84|0;M=da+80|0;x=da+76|0;S=da+72|0;B=da+68|0;Q=da+64|0;v=da+60|0;w=da+56|0;z=da+52|0;A=da+48|0;u=da+44|0;y=da+40|0;P=da+36|0;R=da+32|0;H=da+28|0;U=da+24|0;ba=da+20|0;D=da+16|0;X=da+12|0;Y=da+8|0;Z=da+4|0;aa=da;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[ea>>2]=j;c[s>>2]=k;c[t>>2]=l;g[da+148>>2]=.5;c[ca>>2]=c[ea>>2];c[q>>2]=(c[q>>2]|0)+(((c[ea>>2]|0)-1|0)*6<<2);while(1){if((c[ca>>2]|0)>=(c[s>>2]|0))break;g[E>>2]=+g[c[n>>2]>>2];g[F>>2]=+g[c[p>>2]>>2];g[N>>2]=+g[E>>2]+ +g[F>>2];g[J>>2]=+g[c[o>>2]>>2];g[K>>2]=+g[c[m>>2]>>2];g[L>>2]=+g[J>>2]-+g[K>>2];g[G>>2]=+g[E>>2]-+g[F>>2];g[V>>2]=+g[K>>2]+ +g[J>>2];g[I>>2]=+g[c[q>>2]>>2];g[M>>2]=+g[(c[q>>2]|0)+4>>2];g[O>>2]=+g[I>>2]*+g[L>>2]-+g[M>>2]*+g[N>>2];g[$>>2]=+g[M>>2]*+g[L>>2]+ +g[I>>2]*+g[N>>2];g[v>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[w>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[x>>2]=+g[v>>2]-+g[w>>2];g[S>>2]=+g[v>>2]+ +g[w>>2];g[z>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[A>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[Q>>2]=+g[z>>2]-+g[A>>2];g[u>>2]=+g[(c[q>>2]|0)+8>>2];g[y>>2]=+g[(c[q>>2]|0)+12>>2];g[C>>2]=+g[u>>2]*+g[x>>2]-+g[y>>2]*+g[B>>2];g[W>>2]=+g[u>>2]*+g[B>>2]+ +g[y>>2]*+g[x>>2];g[P>>2]=+g[(c[q>>2]|0)+16>>2];g[R>>2]=+g[(c[q>>2]|0)+20>>2];g[T>>2]=+g[P>>2]*+g[Q>>2]+ +g[R>>2]*+g[S>>2];g[_>>2]=+g[P>>2]*+g[S>>2]-+g[R>>2]*+g[Q>>2];g[H>>2]=+g[C>>2]+ +g[G>>2];g[U>>2]=+g[O>>2]-+g[T>>2];g[c[n>>2]>>2]=(+g[H>>2]+ +g[U>>2])*.5;g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=(+g[U>>2]-+g[H>>2])*.5;g[ba>>2]=+g[V>>2]+ +g[W>>2];g[D>>2]=+g[_>>2]+ +g[$>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=(+g[ba>>2]-+g[D>>2])*.5;g[c[m>>2]>>2]=(+g[ba>>2]+ +g[D>>2])*.5;g[X>>2]=+g[V>>2]-+g[W>>2];g[Y>>2]=+g[T>>2]+ +g[O>>2];g[c[o>>2]>>2]=(+g[X>>2]-+g[Y>>2])*.5;g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=(+g[X>>2]+ +g[Y>>2])*.5;g[Z>>2]=+g[G>>2]-+g[C>>2];g[aa>>2]=+g[_>>2]-+g[$>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=(+g[Z>>2]+ +g[aa>>2])*.5;g[c[p>>2]>>2]=(+g[aa>>2]-+g[Z>>2])*.5;c[ca>>2]=(c[ca>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+24}i=da;return}function jr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,50,4456,1);i=b;return}function kr(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0;Ja=i;i=i+336|0;m=Ja+320|0;n=Ja+316|0;o=Ja+312|0;p=Ja+308|0;q=Ja+304|0;r=Ja+300|0;Ka=Ja+296|0;s=Ja+292|0;t=Ja+288|0;Ia=Ja+272|0;D=Ja+268|0;W=Ja+264|0;ka=Ja+260|0;X=Ja+256|0;Aa=Ja+252|0;v=Ja+248|0;S=Ja+244|0;$=Ja+240|0;P=Ja+236|0;w=Ja+232|0;R=Ja+228|0;ca=Ja+224|0;u=Ja+220|0;C=Ja+216|0;N=Ja+212|0;Ha=Ja+208|0;K=Ja+204|0;L=Ja+200|0;H=Ja+196|0;na=Ja+192|0;ja=Ja+188|0;pa=Ja+184|0;ya=Ja+180|0;Ea=Ja+176|0;ua=Ja+172|0;Ca=Ja+168|0;E=Ja+164|0;I=Ja+160|0;F=Ja+156|0;G=Ja+152|0;J=Ja+148|0;ia=Ja+144|0;wa=Ja+140|0;xa=Ja+136|0;sa=Ja+132|0;ta=Ja+128|0;qa=Ja+124|0;Z=Ja+120|0;za=Ja+116|0;_=Ja+112|0;ma=Ja+108|0;oa=Ja+104|0;ra=Ja+100|0;va=Ja+96|0;Fa=Ja+92|0;aa=Ja+88|0;O=Ja+84|0;ba=Ja+80|0;Ba=Ja+76|0;Da=Ja+72|0;Ga=Ja+68|0;M=Ja+64|0;x=Ja+60|0;la=Ja+56|0;Q=Ja+52|0;ha=Ja+48|0;B=Ja+44|0;y=Ja+40|0;z=Ja+36|0;A=Ja+32|0;ga=Ja+28|0;T=Ja+24|0;U=Ja+20|0;fa=Ja+16|0;V=Ja+12|0;Y=Ja+8|0;da=Ja+4|0;ea=Ja;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Ka>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Ja+284>>2]=.25;g[Ja+280>>2]=.5;g[Ja+276>>2]=.4330126941204071;c[Ia>>2]=c[Ka>>2];c[q>>2]=(c[q>>2]|0)+(((c[Ka>>2]|0)-1|0)*10<<2);while(1){if((c[Ia>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[n>>2]>>2];g[C>>2]=+g[c[p>>2]>>2];g[N>>2]=+g[u>>2]+ +g[C>>2];g[Ha>>2]=+g[c[o>>2]>>2];g[K>>2]=+g[c[m>>2]>>2];g[L>>2]=+g[Ha>>2]-+g[K>>2];g[F>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[G>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[H>>2]=+g[F>>2]+ +g[G>>2];g[na>>2]=+g[F>>2]-+g[G>>2];g[J>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[ia>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[ja>>2]=+g[J>>2]-+g[ia>>2];g[pa>>2]=+g[J>>2]+ +g[ia>>2];g[wa>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[xa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ya>>2]=+g[wa>>2]-+g[xa>>2];g[Ea>>2]=+g[wa>>2]+ +g[xa>>2];g[sa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ta>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ua>>2]=+g[sa>>2]+ +g[ta>>2];g[Ca>>2]=+g[sa>>2]-+g[ta>>2];g[D>>2]=+g[u>>2]-+g[C>>2];g[W>>2]=+g[K>>2]+ +g[Ha>>2];g[E>>2]=+g[(c[q>>2]|0)+20>>2];g[I>>2]=+g[(c[q>>2]|0)+16>>2];g[ka>>2]=+g[E>>2]*+g[H>>2]+ +g[I>>2]*+g[ja>>2];g[X>>2]=+g[I>>2]*+g[H>>2]-+g[E>>2]*+g[ja>>2];g[ma>>2]=+g[(c[q>>2]|0)+8>>2];g[oa>>2]=+g[(c[q>>2]|0)+12>>2];g[qa>>2]=+g[ma>>2]*+g[na>>2]-+g[oa>>2]*+g[pa>>2];g[Z>>2]=+g[oa>>2]*+g[na>>2]+ +g[ma>>2]*+g[pa>>2];g[ra>>2]=+g[(c[q>>2]|0)+36>>2];g[va>>2]=+g[(c[q>>2]|0)+32>>2];g[za>>2]=+g[ra>>2]*+g[ua>>2]+ +g[va>>2]*+g[ya>>2];g[_>>2]=+g[va>>2]*+g[ua>>2]-+g[ra>>2]*+g[ya>>2];g[Aa>>2]=+g[qa>>2]-+g[za>>2];g[v>>2]=+g[Z>>2]+ +g[_>>2];g[S>>2]=+g[qa>>2]+ +g[za>>2];g[$>>2]=+g[Z>>2]-+g[_>>2];g[Ba>>2]=+g[(c[q>>2]|0)+24>>2];g[Da>>2]=+g[(c[q>>2]|0)+28>>2];g[Fa>>2]=+g[Ba>>2]*+g[Ca>>2]-+g[Da>>2]*+g[Ea>>2];g[aa>>2]=+g[Da>>2]*+g[Ca>>2]+ +g[Ba>>2]*+g[Ea>>2];g[Ga>>2]=+g[c[q>>2]>>2];g[M>>2]=+g[(c[q>>2]|0)+4>>2];g[O>>2]=+g[Ga>>2]*+g[L>>2]-+g[M>>2]*+g[N>>2];g[ba>>2]=+g[M>>2]*+g[L>>2]+ +g[Ga>>2]*+g[N>>2];g[P>>2]=+g[Fa>>2]+ +g[O>>2];g[w>>2]=+g[aa>>2]+ +g[ba>>2];g[R>>2]=+g[O>>2]-+g[Fa>>2];g[ca>>2]=+g[aa>>2]-+g[ba>>2];g[x>>2]=(+g[v>>2]-+g[w>>2])*.4330126941204071;g[la>>2]=+g[D>>2]-+g[ka>>2];g[Q>>2]=+g[Aa>>2]+ +g[P>>2];g[ha>>2]=+g[la>>2]*.5-+g[Q>>2]*.25;g[c[n>>2]>>2]=(+g[la>>2]+ +g[Q>>2])*.5;g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[x>>2]-+g[ha>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ha>>2]+ +g[x>>2];g[B>>2]=(+g[Aa>>2]-+g[P>>2])*.4330126941204071;g[y>>2]=+g[W>>2]+ +g[X>>2];g[z>>2]=+g[v>>2]+ +g[w>>2];g[A>>2]=+g[y>>2]*.5-+g[z>>2]*.25;g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[A>>2]-+g[B>>2];g[c[m>>2]>>2]=(+g[y>>2]+ +g[z>>2])*.5;g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[B>>2]+ +g[A>>2];g[ga>>2]=(+g[ca>>2]-+g[$>>2])*.4330126941204071;g[T>>2]=+g[R>>2]-+g[S>>2];g[U>>2]=+g[ka>>2]+ +g[D>>2];g[fa>>2]=+g[U>>2]*.5+ +g[T>>2]*.25;g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=(+g[T>>2]-+g[U>>2])*.5;g[c[p>>2]>>2]=+g[ga>>2]-+g[fa>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[fa>>2]+ +g[ga>>2];g[V>>2]=(+g[S>>2]+ +g[R>>2])*.4330126941204071;g[Y>>2]=+g[W>>2]-+g[X>>2];g[da>>2]=+g[$>>2]+ +g[ca>>2];g[ea>>2]=+g[Y>>2]*.5-+g[da>>2]*.25;g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[V>>2]+ +g[ea>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=(+g[Y>>2]+ +g[da>>2])*.5;g[c[o>>2]>>2]=+g[ea>>2]-+g[V>>2];c[Ia>>2]=(c[Ia>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+40;c[r>>2]=c[r>>2]^c[2998]}i=Ja;return}function lr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,51,4504,1);i=b;return}function mr(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0;jb=i;i=i+432|0;m=jb+428|0;n=jb+424|0;o=jb+420|0;p=jb+416|0;q=jb+412|0;r=jb+408|0;kb=jb+404|0;s=jb+400|0;t=jb+396|0;ib=jb+384|0;db=jb+380|0;Ha=jb+376|0;ab=jb+372|0;Ia=jb+368|0;oa=jb+364|0;F=jb+360|0;ta=jb+356|0;E=jb+352|0;Q=jb+348|0;R=jb+344|0;ja=jb+340|0;v=jb+336|0;Sa=jb+332|0;w=jb+328|0;za=jb+324|0;B=jb+320|0;Ea=jb+316|0;C=jb+312|0;N=jb+308|0;O=jb+304|0;bb=jb+300|0;cb=jb+296|0;na=jb+292|0;hb=jb+288|0;ka=jb+284|0;la=jb+280|0;Xa=jb+276|0;sa=jb+272|0;$a=jb+268|0;qa=jb+264|0;Ua=jb+260|0;Ya=jb+256|0;Va=jb+252|0;Wa=jb+248|0;Za=jb+244|0;_a=jb+240|0;gb=jb+236|0;ma=jb+232|0;pa=jb+228|0;ra=jb+224|0;ea=jb+220|0;ya=jb+216|0;ia=jb+212|0;wa=jb+208|0;Na=jb+204|0;Da=jb+200|0;Ra=jb+196|0;Ba=jb+192|0;ca=jb+188|0;da=jb+184|0;ga=jb+180|0;ha=jb+176|0;La=jb+172|0;Ma=jb+168|0;Pa=jb+164|0;Qa=jb+160|0;u=jb+156|0;fa=jb+152|0;Ka=jb+148|0;Oa=jb+144|0;va=jb+140|0;xa=jb+136|0;Aa=jb+132|0;Ca=jb+128|0;M=jb+124|0;Y=jb+120|0;$=jb+116|0;ba=jb+112|0;T=jb+108|0;X=jb+104|0;W=jb+100|0;aa=jb+96|0;K=jb+92|0;L=jb+88|0;Z=jb+84|0;_=jb+80|0;P=jb+76|0;S=jb+72|0;U=jb+68|0;V=jb+64|0;fb=jb+60|0;A=jb+56|0;H=jb+52|0;J=jb+48|0;Ga=jb+44|0;z=jb+40|0;y=jb+36|0;I=jb+32|0;Ta=jb+28|0;eb=jb+24|0;D=jb+20|0;G=jb+16|0;ua=jb+12|0;Fa=jb+8|0;Ja=jb+4|0;x=jb;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[kb>>2]=j;c[s>>2]=k;c[t>>2]=l;g[jb+392>>2]=.3535533845424652;g[jb+388>>2]=.5;c[ib>>2]=c[kb>>2];c[q>>2]=(c[q>>2]|0)+(((c[kb>>2]|0)-1|0)*14<<2);while(1){if((c[ib>>2]|0)>=(c[s>>2]|0))break;g[bb>>2]=+g[c[n>>2]>>2];g[cb>>2]=+g[c[p>>2]>>2];g[na>>2]=+g[bb>>2]+ +g[cb>>2];g[hb>>2]=+g[c[o>>2]>>2];g[ka>>2]=+g[c[m>>2]>>2];g[la>>2]=+g[hb>>2]-+g[ka>>2];g[Va>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Wa>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Xa>>2]=+g[Va>>2]-+g[Wa>>2];g[sa>>2]=+g[Va>>2]+ +g[Wa>>2];g[Za>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[_a>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[$a>>2]=+g[Za>>2]+ +g[_a>>2];g[qa>>2]=+g[Za>>2]-+g[_a>>2];g[db>>2]=+g[bb>>2]-+g[cb>>2];g[Ha>>2]=+g[ka>>2]+ +g[hb>>2];g[Ua>>2]=+g[(c[q>>2]|0)+24>>2];g[Ya>>2]=+g[(c[q>>2]|0)+28>>2];g[ab>>2]=+g[Ua>>2]*+g[Xa>>2]-+g[Ya>>2]*+g[$a>>2];g[Ia>>2]=+g[Ua>>2]*+g[$a>>2]+ +g[Ya>>2]*+g[Xa>>2];g[gb>>2]=+g[c[q>>2]>>2];g[ma>>2]=+g[(c[q>>2]|0)+4>>2];g[oa>>2]=+g[gb>>2]*+g[la>>2]-+g[ma>>2]*+g[na>>2];g[F>>2]=+g[ma>>2]*+g[la>>2]+ +g[gb>>2]*+g[na>>2];g[pa>>2]=+g[(c[q>>2]|0)+32>>2];g[ra>>2]=+g[(c[q>>2]|0)+36>>2];g[ta>>2]=+g[pa>>2]*+g[qa>>2]+ +g[ra>>2]*+g[sa>>2];g[E>>2]=+g[pa>>2]*+g[sa>>2]-+g[ra>>2]*+g[qa>>2];g[Q>>2]=+g[ta>>2]+ +g[oa>>2];g[R>>2]=+g[F>>2]-+g[E>>2];g[ca>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[da>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[ea>>2]=+g[ca>>2]-+g[da>>2];g[ya>>2]=+g[ca>>2]+ +g[da>>2];g[ga>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[ha>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[ia>>2]=+g[ga>>2]+ +g[ha>>2];g[wa>>2]=+g[ga>>2]-+g[ha>>2];g[La>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ma>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Na>>2]=+g[La>>2]-+g[Ma>>2];g[Da>>2]=+g[La>>2]+ +g[Ma>>2];g[Pa>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Qa>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ra>>2]=+g[Pa>>2]+ +g[Qa>>2];g[Ba>>2]=+g[Pa>>2]-+g[Qa>>2];g[u>>2]=+g[(c[q>>2]|0)+8>>2];g[fa>>2]=+g[(c[q>>2]|0)+12>>2];g[ja>>2]=+g[u>>2]*+g[ea>>2]-+g[fa>>2]*+g[ia>>2];g[v>>2]=+g[u>>2]*+g[ia>>2]+ +g[fa>>2]*+g[ea>>2];g[Ka>>2]=+g[(c[q>>2]|0)+40>>2];g[Oa>>2]=+g[(c[q>>2]|0)+44>>2];g[Sa>>2]=+g[Ka>>2]*+g[Na>>2]-+g[Oa>>2]*+g[Ra>>2];g[w>>2]=+g[Ka>>2]*+g[Ra>>2]+ +g[Oa>>2]*+g[Na>>2];g[va>>2]=+g[(c[q>>2]|0)+16>>2];g[xa>>2]=+g[(c[q>>2]|0)+20>>2];g[za>>2]=+g[va>>2]*+g[wa>>2]+ +g[xa>>2]*+g[ya>>2];g[B>>2]=+g[va>>2]*+g[ya>>2]-+g[xa>>2]*+g[wa>>2];g[Aa>>2]=+g[(c[q>>2]|0)+48>>2];g[Ca>>2]=+g[(c[q>>2]|0)+52>>2];g[Ea>>2]=+g[Aa>>2]*+g[Ba>>2]+ +g[Ca>>2]*+g[Da>>2];g[C>>2]=+g[Aa>>2]*+g[Da>>2]-+g[Ca>>2]*+g[Ba>>2];g[N>>2]=+g[C>>2]-+g[B>>2];g[O>>2]=+g[za>>2]-+g[Ea>>2];g[K>>2]=+g[db>>2]-+g[ab>>2];g[L>>2]=+g[v>>2]-+g[w>>2];g[M>>2]=(+g[K>>2]-+g[L>>2])*.5;g[Y>>2]=(+g[L>>2]+ +g[K>>2])*.5;g[Z>>2]=+g[N>>2]-+g[O>>2];g[_>>2]=+g[Q>>2]+ +g[R>>2];g[$>>2]=(+g[Z>>2]-+g[_>>2])*.3535533845424652;g[ba>>2]=(+g[Z>>2]+ +g[_>>2])*.3535533845424652;g[P>>2]=+g[N>>2]+ +g[O>>2];g[S>>2]=+g[Q>>2]-+g[R>>2];g[T>>2]=(+g[P>>2]+ +g[S>>2])*.3535533845424652;g[X>>2]=(+g[S>>2]-+g[P>>2])*.3535533845424652;g[U>>2]=+g[Ha>>2]-+g[Ia>>2];g[V>>2]=+g[ja>>2]-+g[Sa>>2];g[W>>2]=(+g[U>>2]-+g[V>>2])*.5;g[aa>>2]=(+g[U>>2]+ +g[V>>2])*.5;g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[M>>2]+ +g[T>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[aa>>2]+ +g[ba>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[T>>2]-+g[M>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[aa>>2]-+g[ba>>2];g[c[o>>2]>>2]=+g[W>>2]-+g[X>>2];g[c[p>>2]>>2]=+g[$>>2]-+g[Y>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[W>>2]+ +g[X>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Y>>2]+ +g[$>>2];g[Ta>>2]=+g[ja>>2]+ +g[Sa>>2];g[eb>>2]=+g[ab>>2]+ +g[db>>2];g[fb>>2]=+g[Ta>>2]+ +g[eb>>2];g[A>>2]=+g[eb>>2]-+g[Ta>>2];g[D>>2]=+g[B>>2]+ +g[C>>2];g[G>>2]=+g[E>>2]+ +g[F>>2];g[H>>2]=+g[D>>2]-+g[G>>2];g[J>>2]=+g[D>>2]+ +g[G>>2];g[ua>>2]=+g[oa>>2]-+g[ta>>2];g[Fa>>2]=+g[za>>2]+ +g[Ea>>2];g[Ga>>2]=+g[ua>>2]-+g[Fa>>2];g[z>>2]=+g[Fa>>2]+ +g[ua>>2];g[Ja>>2]=+g[Ha>>2]+ +g[Ia>>2];g[x>>2]=+g[v>>2]+ +g[w>>2];g[y>>2]=+g[Ja>>2]-+g[x>>2];g[I>>2]=+g[Ja>>2]+ +g[x>>2];g[c[n>>2]>>2]=(+g[fb>>2]+ +g[Ga>>2])*.5;g[c[m>>2]>>2]=(+g[I>>2]+ +g[J>>2])*.5;g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=(+g[Ga>>2]-+g[fb>>2])*.5;g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=(+g[I>>2]-+g[J>>2])*.5;g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=(+g[y>>2]-+g[z>>2])*.5;g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=(+g[H>>2]-+g[A>>2])*.5;g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=(+g[y>>2]+ +g[z>>2])*.5;g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=(+g[A>>2]+ +g[H>>2])*.5;c[ib>>2]=(c[ib>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+56;c[r>>2]=c[r>>2]^c[2998]}i=jb;return}function nr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,26,4552);i=b;return}function or(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0;zd=i;i=i+944|0;k=zd+936|0;l=zd+932|0;m=zd+928|0;n=zd+924|0;Ad=zd+920|0;o=zd+916|0;p=zd+912|0;yd=zd+896|0;za=zd+892|0;vc=zd+888|0;ed=zd+884|0;gd=zd+880|0;id=zd+876|0;md=zd+872|0;Ec=zd+868|0;Cc=zd+864|0;wc=zd+860|0;Ib=zd+856|0;yc=zd+852|0;Wc=zd+848|0;Jc=zd+844|0;rd=zd+840|0;Uc=zd+836|0;ad=zd+832|0;vd=zd+828|0;Hc=zd+824|0;Nc=zd+820|0;Oc=zd+816|0;Pc=zd+812|0;Rc=zd+808|0;ea=zd+804|0;ja=zd+800|0;aa=zd+796|0;ha=zd+792|0;xa=zd+788|0;I=zd+784|0;ta=zd+780|0;G=zd+776|0;xc=zd+772|0;td=zd+768|0;_c=zd+764|0;qd=zd+760|0;uc=zd+756|0;ud=zd+752|0;$c=zd+748|0;pd=zd+744|0;fd=zd+740|0;ld=zd+736|0;hd=zd+732|0;kd=zd+728|0;ca=zd+724|0;da=zd+720|0;A=zd+716|0;$=zd+712|0;va=zd+708|0;wa=zd+704|0;ra=zd+700|0;sa=zd+696|0;dd=zd+692|0;mc=zd+688|0;O=zd+684|0;_b=zd+680|0;Ac=zd+676|0;lc=zd+672|0;R=zd+668|0;Xb=zd+664|0;Mc=zd+660|0;Xa=zd+656|0;X=zd+652|0;Eb=zd+648|0;Zc=zd+644|0;Wa=zd+640|0;Ba=zd+636|0;Fb=zd+632|0;C=zd+628|0;L=zd+624|0;db=zd+620|0;eb=zd+616|0;fb=zd+612|0;gb=zd+608|0;rb=zd+604|0;Oa=zd+600|0;wb=zd+596|0;Na=zd+592|0;z=zd+588|0;ma=zd+584|0;_a=zd+580|0;$a=zd+576|0;ab=zd+572|0;bb=zd+568|0;Ia=zd+564|0;La=zd+560|0;lb=zd+556|0;Ka=zd+552|0;q=zd+548|0;Zb=zd+544|0;cd=zd+540|0;Yb=zd+536|0;zc=zd+532|0;bd=zd+528|0;od=zd+524|0;P=zd+520|0;xd=zd+516|0;Q=zd+512|0;jd=zd+508|0;nd=zd+504|0;sd=zd+500|0;wd=zd+496|0;Gc=zd+492|0;U=zd+488|0;Lc=zd+484|0;V=zd+480|0;T=zd+476|0;W=zd+472|0;Dc=zd+468|0;Fc=zd+464|0;Ic=zd+460|0;Kc=zd+456|0;Tc=zd+452|0;Z=zd+448|0;Yc=zd+444|0;_=zd+440|0;Y=zd+436|0;Aa=zd+432|0;Qc=zd+428|0;Sc=zd+424|0;Vc=zd+420|0;Xc=zd+416|0;qa=zd+412|0;nb=zd+408|0;K=zd+404|0;ub=zd+400|0;B=zd+396|0;ob=zd+392|0;F=zd+388|0;tb=zd+384|0;oa=zd+380|0;pa=zd+376|0;H=zd+372|0;J=zd+368|0;ua=zd+364|0;ya=zd+360|0;D=zd+356|0;E=zd+352|0;pb=zd+348|0;qb=zd+344|0;sb=zd+340|0;vb=zd+336|0;v=zd+332|0;Ja=zd+328|0;la=zd+324|0;Ga=zd+320|0;y=zd+316|0;ib=zd+312|0;ga=zd+308|0;Fa=zd+304|0;t=zd+300|0;u=zd+296|0;ia=zd+292|0;ka=zd+288|0;w=zd+284|0;x=zd+280|0;ba=zd+276|0;fa=zd+272|0;Ea=zd+268|0;Ha=zd+264|0;jb=zd+260|0;kb=zd+256|0;Da=zd+252|0;zb=zd+248|0;oc=zd+244|0;qc=zd+240|0;yb=zd+236|0;pc=zd+232|0;Cb=zd+228|0;jc=zd+224|0;S=zd+220|0;Ca=zd+216|0;kc=zd+212|0;nc=zd+208|0;mb=zd+204|0;xb=zd+200|0;Ab=zd+196|0;Bb=zd+192|0;s=zd+188|0;Ob=zd+184|0;ac=zd+180|0;cc=zd+176|0;N=zd+172|0;bc=zd+168|0;Rb=zd+164|0;Sb=zd+160|0;Bc=zd+156|0;r=zd+152|0;Wb=zd+148|0;$b=zd+144|0;na=zd+140|0;M=zd+136|0;Pb=zd+132|0;Qb=zd+128|0;Hb=zd+124|0;Ra=zd+120|0;Tb=zd+116|0;Vb=zd+112|0;Qa=zd+108|0;Ub=zd+104|0;Ua=zd+100|0;rc=zd+96|0;Db=zd+92|0;Gb=zd+88|0;sc=zd+84|0;tc=zd+80|0;Ma=zd+76|0;Pa=zd+72|0;Sa=zd+68|0;Ta=zd+64|0;Za=zd+60|0;Kb=zd+56|0;gc=zd+52|0;ic=zd+48|0;Jb=zd+44|0;dc=zd+40|0;Nb=zd+36|0;hc=zd+32|0;Va=zd+28|0;Ya=zd+24|0;ec=zd+20|0;fc=zd+16|0;cb=zd+12|0;hb=zd+8|0;Lb=zd+4|0;Mb=zd;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Ad>>2]=f;c[o>>2]=h;c[p>>2]=j;g[zd+908>>2]=.3826834261417389;g[zd+904>>2]=.9238795042037964;g[zd+900>>2]=.7071067690849304;c[yd>>2]=c[Ad>>2];c[m>>2]=(c[m>>2]|0)+((c[Ad>>2]|0)-1<<3<<2);while(1){if((c[yd>>2]|0)>=(c[o>>2]|0))break;g[za>>2]=+g[c[m>>2]>>2];g[vc>>2]=+g[(c[m>>2]|0)+4>>2];g[ed>>2]=+g[(c[m>>2]|0)+8>>2];g[gd>>2]=+g[(c[m>>2]|0)+12>>2];g[fd>>2]=+g[za>>2]*+g[ed>>2];g[ld>>2]=+g[vc>>2]*+g[ed>>2];g[hd>>2]=+g[vc>>2]*+g[gd>>2];g[kd>>2]=+g[za>>2]*+g[gd>>2];g[id>>2]=+g[fd>>2]-+g[hd>>2];g[md>>2]=+g[kd>>2]+ +g[ld>>2];g[Ec>>2]=+g[kd>>2]-+g[ld>>2];g[Cc>>2]=+g[fd>>2]+ +g[hd>>2];g[wc>>2]=+g[(c[m>>2]|0)+20>>2];g[xc>>2]=+g[vc>>2]*+g[wc>>2];g[td>>2]=+g[ed>>2]*+g[wc>>2];g[_c>>2]=+g[za>>2]*+g[wc>>2];g[qd>>2]=+g[gd>>2]*+g[wc>>2];g[Ib>>2]=+g[(c[m>>2]|0)+16>>2];g[uc>>2]=+g[za>>2]*+g[Ib>>2];g[ud>>2]=+g[gd>>2]*+g[Ib>>2];g[$c>>2]=+g[vc>>2]*+g[Ib>>2];g[pd>>2]=+g[ed>>2]*+g[Ib>>2];g[yc>>2]=+g[uc>>2]+ +g[xc>>2];g[Wc>>2]=+g[td>>2]-+g[ud>>2];g[Jc>>2]=+g[_c>>2]+ +g[$c>>2];g[rd>>2]=+g[pd>>2]-+g[qd>>2];g[Uc>>2]=+g[pd>>2]+ +g[qd>>2];g[ad>>2]=+g[_c>>2]-+g[$c>>2];g[vd>>2]=+g[td>>2]+ +g[ud>>2];g[Hc>>2]=+g[uc>>2]-+g[xc>>2];g[Nc>>2]=+g[(c[m>>2]|0)+24>>2];g[Oc>>2]=+g[(c[m>>2]|0)+28>>2];g[Pc>>2]=+g[za>>2]*+g[Nc>>2]+ +g[vc>>2]*+g[Oc>>2];g[Rc>>2]=+g[za>>2]*+g[Oc>>2]-+g[vc>>2]*+g[Nc>>2];g[ca>>2]=+g[id>>2]*+g[wc>>2];g[da>>2]=+g[md>>2]*+g[Ib>>2];g[ea>>2]=+g[ca>>2]-+g[da>>2];g[ja>>2]=+g[ca>>2]+ +g[da>>2];g[A>>2]=+g[id>>2]*+g[Ib>>2];g[$>>2]=+g[md>>2]*+g[wc>>2];g[aa>>2]=+g[A>>2]+ +g[$>>2];g[ha>>2]=+g[A>>2]-+g[$>>2];g[va>>2]=+g[Cc>>2]*+g[wc>>2];g[wa>>2]=+g[Ec>>2]*+g[Ib>>2];g[xa>>2]=+g[va>>2]-+g[wa>>2];g[I>>2]=+g[va>>2]+ +g[wa>>2];g[ra>>2]=+g[Cc>>2]*+g[Ib>>2];g[sa>>2]=+g[Ec>>2]*+g[wc>>2];g[ta>>2]=+g[ra>>2]+ +g[sa>>2];g[G>>2]=+g[ra>>2]-+g[sa>>2];g[q>>2]=+g[c[k>>2]>>2];g[Zb>>2]=+g[c[l>>2]>>2];g[zc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[bd>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[cd>>2]=+g[yc>>2]*+g[zc>>2]+ +g[ad>>2]*+g[bd>>2];g[Yb>>2]=+g[yc>>2]*+g[bd>>2]-+g[ad>>2]*+g[zc>>2];g[dd>>2]=+g[q>>2]+ +g[cd>>2];g[mc>>2]=+g[Zb>>2]-+g[Yb>>2];g[O>>2]=+g[q>>2]-+g[cd>>2];g[_b>>2]=+g[Yb>>2]+ +g[Zb>>2];g[jd>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[nd>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[od>>2]=+g[id>>2]*+g[jd>>2]+ +g[md>>2]*+g[nd>>2];g[P>>2]=+g[id>>2]*+g[nd>>2]-+g[md>>2]*+g[jd>>2];g[sd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[wd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[xd>>2]=+g[rd>>2]*+g[sd>>2]+ +g[vd>>2]*+g[wd>>2];g[Q>>2]=+g[rd>>2]*+g[wd>>2]-+g[vd>>2]*+g[sd>>2];g[Ac>>2]=+g[od>>2]+ +g[xd>>2];g[lc>>2]=+g[od>>2]-+g[xd>>2];g[R>>2]=+g[P>>2]-+g[Q>>2];g[Xb>>2]=+g[P>>2]+ +g[Q>>2];g[Dc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Fc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Gc>>2]=+g[Cc>>2]*+g[Dc>>2]+ +g[Ec>>2]*+g[Fc>>2];g[U>>2]=+g[Cc>>2]*+g[Fc>>2]-+g[Ec>>2]*+g[Dc>>2];g[Ic>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Kc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Lc>>2]=+g[Hc>>2]*+g[Ic>>2]+ +g[Jc>>2]*+g[Kc>>2];g[V>>2]=+g[Hc>>2]*+g[Kc>>2]-+g[Jc>>2]*+g[Ic>>2];g[Mc>>2]=+g[Gc>>2]+ +g[Lc>>2];g[Xa>>2]=+g[U>>2]+ +g[V>>2];g[T>>2]=+g[Gc>>2]-+g[Lc>>2];g[W>>2]=+g[U>>2]-+g[V>>2];g[X>>2]=+g[T>>2]-+g[W>>2];g[Eb>>2]=+g[T>>2]+ +g[W>>2];g[Qc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[Sc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[Tc>>2]=+g[Pc>>2]*+g[Qc>>2]+ +g[Rc>>2]*+g[Sc>>2];g[Z>>2]=+g[Pc>>2]*+g[Sc>>2]-+g[Rc>>2]*+g[Qc>>2];g[Vc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Xc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Yc>>2]=+g[Uc>>2]*+g[Vc>>2]+ +g[Wc>>2]*+g[Xc>>2];g[_>>2]=+g[Uc>>2]*+g[Xc>>2]-+g[Wc>>2]*+g[Vc>>2];g[Zc>>2]=+g[Tc>>2]+ +g[Yc>>2];g[Wa>>2]=+g[Z>>2]+ +g[_>>2];g[Y>>2]=+g[Tc>>2]-+g[Yc>>2];g[Aa>>2]=+g[Z>>2]-+g[_>>2];g[Ba>>2]=+g[Y>>2]+ +g[Aa>>2];g[Fb>>2]=+g[Y>>2]-+g[Aa>>2];g[oa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[pa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[qa>>2]=+g[Nc>>2]*+g[oa>>2]+ +g[Oc>>2]*+g[pa>>2];g[nb>>2]=+g[Nc>>2]*+g[pa>>2]-+g[Oc>>2]*+g[oa>>2];g[H>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[J>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[K>>2]=+g[G>>2]*+g[H>>2]+ +g[I>>2]*+g[J>>2];g[ub>>2]=+g[G>>2]*+g[J>>2]-+g[I>>2]*+g[H>>2];g[ua>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[ya>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[B>>2]=+g[ta>>2]*+g[ua>>2]+ +g[xa>>2]*+g[ya>>2];g[ob>>2]=+g[ta>>2]*+g[ya>>2]-+g[xa>>2]*+g[ua>>2];g[D>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[E>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[F>>2]=+g[ed>>2]*+g[D>>2]+ +g[gd>>2]*+g[E>>2];g[tb>>2]=+g[ed>>2]*+g[E>>2]-+g[gd>>2]*+g[D>>2];g[C>>2]=+g[qa>>2]+ +g[B>>2];g[L>>2]=+g[F>>2]+ +g[K>>2];g[db>>2]=+g[C>>2]-+g[L>>2];g[eb>>2]=+g[nb>>2]+ +g[ob>>2];g[fb>>2]=+g[tb>>2]+ +g[ub>>2];g[gb>>2]=+g[eb>>2]-+g[fb>>2];g[pb>>2]=+g[nb>>2]-+g[ob>>2];g[qb>>2]=+g[F>>2]-+g[K>>2];g[rb>>2]=+g[pb>>2]+ +g[qb>>2];g[Oa>>2]=+g[pb>>2]-+g[qb>>2];g[sb>>2]=+g[qa>>2]-+g[B>>2];g[vb>>2]=+g[tb>>2]-+g[ub>>2];g[wb>>2]=+g[sb>>2]-+g[vb>>2];g[Na>>2]=+g[sb>>2]+ +g[vb>>2];g[t>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[u>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[v>>2]=+g[za>>2]*+g[t>>2]+ +g[vc>>2]*+g[u>>2];g[Ja>>2]=+g[za>>2]*+g[u>>2]-+g[vc>>2]*+g[t>>2];g[ia>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[ka>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[la>>2]=+g[ha>>2]*+g[ia>>2]+ +g[ja>>2]*+g[ka>>2];g[Ga>>2]=+g[ha>>2]*+g[ka>>2]-+g[ja>>2]*+g[ia>>2];g[w>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[x>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[y>>2]=+g[Ib>>2]*+g[w>>2]+ +g[wc>>2]*+g[x>>2];g[ib>>2]=+g[Ib>>2]*+g[x>>2]-+g[wc>>2]*+g[w>>2];g[ba>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[fa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[ga>>2]=+g[aa>>2]*+g[ba>>2]+ +g[ea>>2]*+g[fa>>2];g[Fa>>2]=+g[aa>>2]*+g[fa>>2]-+g[ea>>2]*+g[ba>>2];g[z>>2]=+g[v>>2]+ +g[y>>2];g[ma>>2]=+g[ga>>2]+ +g[la>>2];g[_a>>2]=+g[z>>2]-+g[ma>>2];g[$a>>2]=+g[Ja>>2]+ +g[ib>>2];g[ab>>2]=+g[Fa>>2]+ +g[Ga>>2];g[bb>>2]=+g[$a>>2]-+g[ab>>2];g[Ea>>2]=+g[v>>2]-+g[y>>2];g[Ha>>2]=+g[Fa>>2]-+g[Ga>>2];g[Ia>>2]=+g[Ea>>2]-+g[Ha>>2];g[La>>2]=+g[Ea>>2]+ +g[Ha>>2];g[jb>>2]=+g[Ja>>2]-+g[ib>>2];g[kb>>2]=+g[ga>>2]-+g[la>>2];g[lb>>2]=+g[jb>>2]+ +g[kb>>2];g[Ka>>2]=+g[jb>>2]-+g[kb>>2];g[S>>2]=+g[O>>2]-+g[R>>2];g[Ca>>2]=(+g[X>>2]+ +g[Ba>>2])*.7071067690849304;g[Da>>2]=+g[S>>2]+ +g[Ca>>2];g[zb>>2]=+g[S>>2]-+g[Ca>>2];g[kc>>2]=(+g[Eb>>2]-+g[Fb>>2])*.7071067690849304;g[nc>>2]=+g[lc>>2]+ +g[mc>>2];g[oc>>2]=+g[kc>>2]+ +g[nc>>2];g[qc>>2]=+g[nc>>2]-+g[kc>>2];g[mb>>2]=+g[Ia>>2]*.9238795042037964-+g[lb>>2]*.3826834261417389;g[xb>>2]=+g[rb>>2]*.3826834261417389+ +g[wb>>2]*.9238795042037964;g[yb>>2]=+g[mb>>2]+ +g[xb>>2];g[pc>>2]=+g[xb>>2]-+g[mb>>2];g[Ab>>2]=+g[lb>>2]*.9238795042037964+ +g[Ia>>2]*.3826834261417389;g[Bb>>2]=+g[wb>>2]*.3826834261417389-+g[rb>>2]*.9238795042037964;g[Cb>>2]=+g[Ab>>2]+ +g[Bb>>2];g[jc>>2]=+g[Bb>>2]-+g[Ab>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Da>>2]-+g[yb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[pc>>2]-+g[qc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[pc>>2]+ +g[qc>>2];g[c[l>>2]>>2]=+g[Da>>2]+ +g[yb>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[zb>>2]-+g[Cb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[jc>>2]-+g[oc>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[jc>>2]+ +g[oc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[zb>>2]+ +g[Cb>>2];g[Bc>>2]=+g[dd>>2]+ +g[Ac>>2];g[r>>2]=+g[Mc>>2]+ +g[Zc>>2];g[s>>2]=+g[Bc>>2]+ +g[r>>2];g[Ob>>2]=+g[Bc>>2]-+g[r>>2];g[Wb>>2]=+g[Xa>>2]+ +g[Wa>>2];g[$b>>2]=+g[Xb>>2]+ +g[_b>>2];g[ac>>2]=+g[Wb>>2]+ +g[$b>>2];g[cc>>2]=+g[$b>>2]-+g[Wb>>2];g[na>>2]=+g[z>>2]+ +g[ma>>2];g[M>>2]=+g[C>>2]+ +g[L>>2];g[N>>2]=+g[na>>2]+ +g[M>>2];g[bc>>2]=+g[M>>2]-+g[na>>2];g[Pb>>2]=+g[eb>>2]+ +g[fb>>2];g[Qb>>2]=+g[$a>>2]+ +g[ab>>2];g[Rb>>2]=+g[Pb>>2]-+g[Qb>>2];g[Sb>>2]=+g[Qb>>2]+ +g[Pb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[s>>2]-+g[N>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[bc>>2]-+g[cc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[bc>>2]+ +g[cc>>2];g[c[k>>2]>>2]=+g[s>>2]+ +g[N>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Ob>>2]-+g[Rb>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Sb>>2]-+g[ac>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Sb>>2]+ +g[ac>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Ob>>2]+ +g[Rb>>2];g[Db>>2]=+g[O>>2]+ +g[R>>2];g[Gb>>2]=(+g[Eb>>2]+ +g[Fb>>2])*.7071067690849304;g[Hb>>2]=+g[Db>>2]+ +g[Gb>>2];g[Ra>>2]=+g[Db>>2]-+g[Gb>>2];g[sc>>2]=(+g[Ba>>2]-+g[X>>2])*.7071067690849304;g[tc>>2]=+g[mc>>2]-+g[lc>>2];g[Tb>>2]=+g[sc>>2]+ +g[tc>>2];g[Vb>>2]=+g[tc>>2]-+g[sc>>2];g[Ma>>2]=+g[Ka>>2]*.3826834261417389+ +g[La>>2]*.9238795042037964;g[Pa>>2]=+g[Na>>2]*.9238795042037964-+g[Oa>>2]*.3826834261417389;g[Qa>>2]=+g[Ma>>2]+ +g[Pa>>2];g[Ub>>2]=+g[Pa>>2]-+g[Ma>>2];g[Sa>>2]=+g[La>>2]*.3826834261417389-+g[Ka>>2]*.9238795042037964;g[Ta>>2]=+g[Oa>>2]*.9238795042037964+ +g[Na>>2]*.3826834261417389;g[Ua>>2]=+g[Sa>>2]+ +g[Ta>>2];g[rc>>2]=+g[Ta>>2]-+g[Sa>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Hb>>2]-+g[Qa>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Ub>>2]-+g[Vb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Ub>>2]+ +g[Vb>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[Hb>>2]+ +g[Qa>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Ra>>2]-+g[Ua>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[rc>>2]-+g[Tb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[rc>>2]+ +g[Tb>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Ra>>2]+ +g[Ua>>2];g[Va>>2]=+g[dd>>2]-+g[Ac>>2];g[Ya>>2]=+g[Wa>>2]-+g[Xa>>2];g[Za>>2]=+g[Va>>2]-+g[Ya>>2];g[Kb>>2]=+g[Va>>2]+ +g[Ya>>2];g[ec>>2]=+g[Mc>>2]-+g[Zc>>2];g[fc>>2]=+g[_b>>2]-+g[Xb>>2];g[gc>>2]=+g[ec>>2]+ +g[fc>>2];g[ic>>2]=+g[fc>>2]-+g[ec>>2];g[cb>>2]=+g[_a>>2]+ +g[bb>>2];g[hb>>2]=+g[db>>2]-+g[gb>>2];g[Jb>>2]=(+g[cb>>2]+ +g[hb>>2])*.7071067690849304;g[dc>>2]=(+g[hb>>2]-+g[cb>>2])*.7071067690849304;g[Lb>>2]=+g[_a>>2]-+g[bb>>2];g[Mb>>2]=+g[db>>2]+ +g[gb>>2];g[Nb>>2]=(+g[Lb>>2]+ +g[Mb>>2])*.7071067690849304;g[hc>>2]=(+g[Mb>>2]-+g[Lb>>2])*.7071067690849304;g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Za>>2]-+g[Jb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[hc>>2]-+g[ic>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[hc>>2]+ +g[ic>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Za>>2]+ +g[Jb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Kb>>2]-+g[Nb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[dc>>2]-+g[gc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[dc>>2]+ +g[gc>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[Kb>>2]+ +g[Nb>>2];c[yd>>2]=(c[yd>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+32;c[n>>2]=c[n>>2]^c[2998]}i=zd;return}function pr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,27,4600);i=b;return}function qr(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0;Ze=i;i=i+1264|0;k=Ze+1260|0;l=Ze+1256|0;m=Ze+1252|0;n=Ze+1248|0;_e=Ze+1244|0;o=Ze+1240|0;p=Ze+1236|0;Ye=Ze+1216|0;za=Ze+1212|0;Vd=Ze+1208|0;Ee=Ze+1204|0;Ge=Ze+1200|0;Ie=Ze+1196|0;Me=Ze+1192|0;ga=Ze+1188|0;ea=Ze+1184|0;Wd=Ze+1180|0;Ib=Ze+1176|0;Yd=Ze+1172|0;v=Ze+1168|0;R=Ze+1164|0;Ae=Ze+1160|0;P=Ze+1156|0;ua=Ze+1152|0;z=Ze+1148|0;sa=Ze+1144|0;ma=Ze+1140|0;Ea=Ze+1136|0;ia=Ze+1132|0;Ca=Ze+1128|0;Oe=Ze+1124|0;Se=Ze+1120|0;F=Ze+1116|0;H=Ze+1112|0;Ve=Ze+1108|0;We=Ze+1104|0;Xe=Ze+1100|0;he=Ze+1096|0;_=Ze+1092|0;$d=Ze+1088|0;Y=Ze+1084|0;B=Ze+1080|0;xe=Ze+1076|0;je=Ze+1072|0;xa=Ze+1068|0;ve=Ze+1064|0;Xd=Ze+1060|0;x=Ze+1056|0;ye=Ze+1052|0;u=Ze+1048|0;Rc=Ze+1044|0;y=Ze+1040|0;ze=Ze+1036|0;t=Ze+1032|0;Fe=Ze+1028|0;Le=Ze+1024|0;He=Ze+1020|0;Ke=Ze+1016|0;ka=Ze+1012|0;la=Ze+1008|0;fa=Ze+1004|0;ha=Ze+1e3|0;Je=Ze+996|0;Ne=Ze+992|0;Qe=Ze+988|0;Re=Ze+984|0;de=Ze+980|0;jb=Ze+976|0;dd=Ze+972|0;md=Ze+968|0;Ua=Ze+964|0;Ic=Ze+960|0;vd=Ze+956|0;Hd=Ze+952|0;O=Ze+948|0;Ia=Ze+944|0;Ja=Ze+940|0;Vb=Ze+936|0;Yb=Ze+932|0;Vc=Ze+928|0;oc=Ze+924|0;pc=Ze+920|0;Jd=Ze+916|0;nb=Ze+912|0;ob=Ze+908|0;pb=Ze+904|0;Gb=Ze+900|0;Na=Ze+896|0;Oa=Ze+892|0;xc=Ze+888|0;yc=Ze+884|0;Ud=Ze+880|0;Ec=Ze+876|0;Fc=Ze+872|0;Gc=Ze+868|0;Mb=Ze+864|0;Rb=Ze+860|0;kd=Ze+856|0;ue=Ze+852|0;qa=Ze+848|0;ra=Ze+844|0;ac=Ze+840|0;dc=Ze+836|0;Wc=Ze+832|0;lc=Ze+828|0;mc=Ze+824|0;Id=Ze+820|0;kb=Ze+816|0;lb=Ze+812|0;mb=Ze+808|0;vb=Ze+804|0;Ab=Ze+800|0;Bb=Ze+796|0;uc=Ze+792|0;vc=Ze+788|0;Td=Ze+784|0;Bc=Ze+780|0;Cc=Ze+776|0;Dc=Ze+772|0;ab=Ze+768|0;fb=Ze+764|0;jd=Ze+760|0;q=Ze+756|0;Zc=Ze+752|0;Ce=Ze+748|0;Yc=Ze+744|0;Ue=Ze+740|0;Ra=Ze+736|0;be=Ze+732|0;Sa=Ze+728|0;Zd=Ze+724|0;Be=Ze+720|0;Pe=Ze+716|0;Te=Ze+712|0;_d=Ze+708|0;ae=Ze+704|0;De=Ze+700|0;ce=Ze+696|0;bd=Ze+692|0;cd=Ze+688|0;Qa=Ze+684|0;Ta=Ze+680|0;_c=Ze+676|0;$c=Ze+672|0;E=Ze+668|0;Tb=Ze+664|0;Cb=Ze+660|0;Kb=Ze+656|0;Ha=Ze+652|0;Wb=Ze+648|0;Ma=Ze+644|0;Qb=Ze+640|0;N=Ze+636|0;Ub=Ze+632|0;Fb=Ze+628|0;Lb=Ze+624|0;X=Ze+620|0;Xb=Ze+616|0;Hb=Ze+612|0;Pb=Ze+608|0;wa=Ze+604|0;hb=Ze+600|0;D=Ze+596|0;Jb=Ze+592|0;ta=Ze+588|0;va=Ze+584|0;ya=Ze+580|0;C=Ze+576|0;Ba=Ze+572|0;Ka=Ze+568|0;Ga=Ze+564|0;La=Ze+560|0;Z=Ze+556|0;Aa=Ze+552|0;Da=Ze+548|0;Fa=Ze+544|0;J=Ze+540|0;Db=Ze+536|0;M=Ze+532|0;Eb=Ze+528|0;G=Ze+524|0;I=Ze+520|0;K=Ze+516|0;L=Ze+512|0;T=Ze+508|0;Nb=Ze+504|0;W=Ze+500|0;Ob=Ze+496|0;Q=Ze+492|0;S=Ze+488|0;U=Ze+484|0;V=Ze+480|0;me=Ze+476|0;_b=Ze+472|0;rb=Ze+468|0;_a=Ze+464|0;pa=Ze+460|0;cc=Ze+456|0;zb=Ze+452|0;bb=Ze+448|0;te=Ze+444|0;$b=Ze+440|0;ub=Ze+436|0;$a=Ze+432|0;aa=Ze+428|0;bc=Ze+424|0;wb=Ze+420|0;eb=Ze+416|0;ge=Ze+412|0;Ya=Ze+408|0;le=Ze+404|0;Za=Ze+400|0;ee=Ze+396|0;fe=Ze+392|0;ie=Ze+388|0;ke=Ze+384|0;da=Ze+380|0;xb=Ze+376|0;oa=Ze+372|0;yb=Ze+368|0;ba=Ze+364|0;ca=Ze+360|0;ja=Ze+356|0;na=Ze+352|0;pe=Ze+348|0;sb=Ze+344|0;se=Ze+340|0;tb=Ze+336|0;ne=Ze+332|0;oe=Ze+328|0;qe=Ze+324|0;re=Ze+320|0;s=Ze+316|0;cb=Ze+312|0;$=Ze+308|0;db=Ze+304|0;we=Ze+300|0;r=Ze+296|0;w=Ze+292|0;A=Ze+288|0;Pc=Ze+284|0;ib=Ze+280|0;Oc=Ze+276|0;fc=Ze+272|0;hc=Ze+268|0;Zb=Ze+264|0;ec=Ze+260|0;gc=Ze+256|0;Qc=Ze+252|0;Hc=Ze+248|0;Jc=Ze+244|0;Kc=Ze+240|0;Ac=Ze+236|0;Nc=Ze+232|0;wc=Ze+228|0;zc=Ze+224|0;Mc=Ze+220|0;Lc=Ze+216|0;ic=Ze+212|0;qb=Ze+208|0;jc=Ze+204|0;Sc=Ze+200|0;Uc=Ze+196|0;nc=Ze+192|0;qc=Ze+188|0;Tc=Ze+184|0;kc=Ze+180|0;Pa=Ze+176|0;Va=Ze+172|0;Wa=Ze+168|0;rc=Ze+164|0;sc=Ze+160|0;gb=Ze+156|0;Sb=Ze+152|0;tc=Ze+148|0;Xa=Ze+144|0;zd=Ze+140|0;Xc=Ze+136|0;Ad=Ze+132|0;yd=Ze+128|0;Cd=Ze+124|0;wd=Ze+120|0;xd=Ze+116|0;Dd=Ze+112|0;Bd=Ze+108|0;ad=Ze+104|0;ed=Ze+100|0;fd=Ze+96|0;Sd=Ze+92|0;hd=Ze+88|0;Qd=Ze+84|0;Rd=Ze+80|0;id=Ze+76|0;gd=Ze+72|0;Md=Ze+68|0;Kd=Ze+64|0;Ld=Ze+60|0;Gd=Ze+56|0;Od=Ze+52|0;Ed=Ze+48|0;Fd=Ze+44|0;Pd=Ze+40|0;Nd=Ze+36|0;qd=Ze+32|0;ld=Ze+28|0;rd=Ze+24|0;pd=Ze+20|0;td=Ze+16|0;nd=Ze+12|0;od=Ze+8|0;ud=Ze+4|0;sd=Ze;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[_e>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Ze+1232>>2]=.5877852439880371;g[Ze+1228>>2]=.9510565400123596;g[Ze+1224>>2]=.25;g[Ze+1220>>2]=.55901700258255;c[Ye>>2]=c[_e>>2];c[m>>2]=(c[m>>2]|0)+((c[_e>>2]|0)-1<<3<<2);while(1){if((c[Ye>>2]|0)>=(c[o>>2]|0))break;g[za>>2]=+g[c[m>>2]>>2];g[Vd>>2]=+g[(c[m>>2]|0)+4>>2];g[Ee>>2]=+g[(c[m>>2]|0)+8>>2];g[Ge>>2]=+g[(c[m>>2]|0)+12>>2];g[Fe>>2]=+g[za>>2]*+g[Ee>>2];g[Le>>2]=+g[Vd>>2]*+g[Ee>>2];g[He>>2]=+g[Vd>>2]*+g[Ge>>2];g[Ke>>2]=+g[za>>2]*+g[Ge>>2];g[Ie>>2]=+g[Fe>>2]-+g[He>>2];g[Me>>2]=+g[Ke>>2]+ +g[Le>>2];g[ga>>2]=+g[Ke>>2]-+g[Le>>2];g[ea>>2]=+g[Fe>>2]+ +g[He>>2];g[Wd>>2]=+g[(c[m>>2]|0)+20>>2];g[Xd>>2]=+g[Vd>>2]*+g[Wd>>2];g[x>>2]=+g[Ee>>2]*+g[Wd>>2];g[ye>>2]=+g[za>>2]*+g[Wd>>2];g[u>>2]=+g[Ge>>2]*+g[Wd>>2];g[Ib>>2]=+g[(c[m>>2]|0)+16>>2];g[Rc>>2]=+g[za>>2]*+g[Ib>>2];g[y>>2]=+g[Ge>>2]*+g[Ib>>2];g[ze>>2]=+g[Vd>>2]*+g[Ib>>2];g[t>>2]=+g[Ee>>2]*+g[Ib>>2];g[Yd>>2]=+g[Rc>>2]-+g[Xd>>2];g[v>>2]=+g[t>>2]+ +g[u>>2];g[R>>2]=+g[x>>2]+ +g[y>>2];g[Ae>>2]=+g[ye>>2]+ +g[ze>>2];g[P>>2]=+g[t>>2]-+g[u>>2];g[ua>>2]=+g[ye>>2]-+g[ze>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[sa>>2]=+g[Rc>>2]+ +g[Xd>>2];g[ka>>2]=+g[ea>>2]*+g[Wd>>2];g[la>>2]=+g[ga>>2]*+g[Ib>>2];g[ma>>2]=+g[ka>>2]+ +g[la>>2];g[Ea>>2]=+g[ka>>2]-+g[la>>2];g[fa>>2]=+g[ea>>2]*+g[Ib>>2];g[ha>>2]=+g[ga>>2]*+g[Wd>>2];g[ia>>2]=+g[fa>>2]-+g[ha>>2];g[Ca>>2]=+g[fa>>2]+ +g[ha>>2];g[Je>>2]=+g[Ie>>2]*+g[Ib>>2];g[Ne>>2]=+g[Me>>2]*+g[Wd>>2];g[Oe>>2]=+g[Je>>2]+ +g[Ne>>2];g[Qe>>2]=+g[Ie>>2]*+g[Wd>>2];g[Re>>2]=+g[Me>>2]*+g[Ib>>2];g[Se>>2]=+g[Qe>>2]-+g[Re>>2];g[F>>2]=+g[Je>>2]-+g[Ne>>2];g[H>>2]=+g[Qe>>2]+ +g[Re>>2];g[Ve>>2]=+g[(c[m>>2]|0)+24>>2];g[We>>2]=+g[(c[m>>2]|0)+28>>2];g[Xe>>2]=+g[Ie>>2]*+g[Ve>>2]+ +g[Me>>2]*+g[We>>2];g[he>>2]=+g[Oe>>2]*+g[Ve>>2]+ +g[Se>>2]*+g[We>>2];g[_>>2]=+g[ea>>2]*+g[We>>2]-+g[ga>>2]*+g[Ve>>2];g[$d>>2]=+g[Ie>>2]*+g[We>>2]-+g[Me>>2]*+g[Ve>>2];g[Y>>2]=+g[ea>>2]*+g[Ve>>2]+ +g[ga>>2]*+g[We>>2];g[B>>2]=+g[za>>2]*+g[We>>2]-+g[Vd>>2]*+g[Ve>>2];g[xe>>2]=+g[Ee>>2]*+g[We>>2]-+g[Ge>>2]*+g[Ve>>2];g[je>>2]=+g[Oe>>2]*+g[We>>2]-+g[Se>>2]*+g[Ve>>2];g[xa>>2]=+g[za>>2]*+g[Ve>>2]+ +g[Vd>>2]*+g[We>>2];g[ve>>2]=+g[Ee>>2]*+g[Ve>>2]+ +g[Ge>>2]*+g[We>>2];g[q>>2]=+g[c[k>>2]>>2];g[Zc>>2]=+g[c[l>>2]>>2];g[Zd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Be>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Ce>>2]=+g[Yd>>2]*+g[Zd>>2]+ +g[Ae>>2]*+g[Be>>2];g[Yc>>2]=+g[Yd>>2]*+g[Be>>2]-+g[Ae>>2]*+g[Zd>>2];g[Pe>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Te>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Ue>>2]=+g[Oe>>2]*+g[Pe>>2]+ +g[Se>>2]*+g[Te>>2];g[Ra>>2]=+g[Oe>>2]*+g[Te>>2]-+g[Se>>2]*+g[Pe>>2];g[_d>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[ae>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[be>>2]=+g[Xe>>2]*+g[_d>>2]+ +g[$d>>2]*+g[ae>>2];g[Sa>>2]=+g[Xe>>2]*+g[ae>>2]-+g[$d>>2]*+g[_d>>2];g[De>>2]=+g[q>>2]+ +g[Ce>>2];g[ce>>2]=+g[Ue>>2]+ +g[be>>2];g[de>>2]=+g[De>>2]-+g[ce>>2];g[jb>>2]=+g[De>>2]+ +g[ce>>2];g[bd>>2]=+g[Zc>>2]-+g[Yc>>2];g[cd>>2]=+g[Ue>>2]-+g[be>>2];g[dd>>2]=+g[bd>>2]-+g[cd>>2];g[md>>2]=+g[cd>>2]+ +g[bd>>2];g[Qa>>2]=+g[q>>2]-+g[Ce>>2];g[Ta>>2]=+g[Ra>>2]-+g[Sa>>2];g[Ua>>2]=+g[Qa>>2]-+g[Ta>>2];g[Ic>>2]=+g[Qa>>2]+ +g[Ta>>2];g[_c>>2]=+g[Yc>>2]+ +g[Zc>>2];g[$c>>2]=+g[Ra>>2]+ +g[Sa>>2];g[vd>>2]=+g[_c>>2]-+g[$c>>2];g[Hd>>2]=+g[$c>>2]+ +g[_c>>2];g[ta>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[va>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[wa>>2]=+g[sa>>2]*+g[ta>>2]+ +g[ua>>2]*+g[va>>2];g[hb>>2]=+g[sa>>2]*+g[va>>2]-+g[ua>>2]*+g[ta>>2];g[ya>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[C>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[D>>2]=+g[xa>>2]*+g[ya>>2]+ +g[B>>2]*+g[C>>2];g[Jb>>2]=+g[xa>>2]*+g[C>>2]-+g[B>>2]*+g[ya>>2];g[E>>2]=+g[wa>>2]+ +g[D>>2];g[Tb>>2]=+g[hb>>2]+ +g[Jb>>2];g[Cb>>2]=+g[wa>>2]-+g[D>>2];g[Kb>>2]=+g[hb>>2]-+g[Jb>>2];g[Z>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[Aa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[Ba>>2]=+g[Y>>2]*+g[Z>>2]+ +g[_>>2]*+g[Aa>>2];g[Ka>>2]=+g[Y>>2]*+g[Aa>>2]-+g[_>>2]*+g[Z>>2];g[Da>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Fa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Ga>>2]=+g[Ca>>2]*+g[Da>>2]+ +g[Ea>>2]*+g[Fa>>2];g[La>>2]=+g[Ca>>2]*+g[Fa>>2]-+g[Ea>>2]*+g[Da>>2];g[Ha>>2]=+g[Ba>>2]+ +g[Ga>>2];g[Wb>>2]=+g[Ka>>2]+ +g[La>>2];g[Ma>>2]=+g[Ka>>2]-+g[La>>2];g[Qb>>2]=+g[Ba>>2]-+g[Ga>>2];g[G>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[I>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[J>>2]=+g[F>>2]*+g[G>>2]+ +g[H>>2]*+g[I>>2];g[Db>>2]=+g[F>>2]*+g[I>>2]-+g[H>>2]*+g[G>>2];g[K>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[L>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[M>>2]=+g[Ee>>2]*+g[K>>2]+ +g[Ge>>2]*+g[L>>2];g[Eb>>2]=+g[Ee>>2]*+g[L>>2]-+g[Ge>>2]*+g[K>>2];g[N>>2]=+g[J>>2]+ +g[M>>2];g[Ub>>2]=+g[Db>>2]+ +g[Eb>>2];g[Fb>>2]=+g[Db>>2]-+g[Eb>>2];g[Lb>>2]=+g[J>>2]-+g[M>>2];g[Q>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[S>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[T>>2]=+g[P>>2]*+g[Q>>2]+ +g[R>>2]*+g[S>>2];g[Nb>>2]=+g[P>>2]*+g[S>>2]-+g[R>>2]*+g[Q>>2];g[U>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[V>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[W>>2]=+g[ea>>2]*+g[U>>2]+ +g[ga>>2]*+g[V>>2];g[Ob>>2]=+g[ea>>2]*+g[V>>2]-+g[ga>>2]*+g[U>>2];g[X>>2]=+g[T>>2]+ +g[W>>2];g[Xb>>2]=+g[Nb>>2]+ +g[Ob>>2];g[Hb>>2]=+g[T>>2]-+g[W>>2];g[Pb>>2]=+g[Nb>>2]-+g[Ob>>2];g[O>>2]=+g[E>>2]-+g[N>>2];g[Ia>>2]=+g[X>>2]-+g[Ha>>2];g[Ja>>2]=+g[O>>2]+ +g[Ia>>2];g[Vb>>2]=+g[Tb>>2]-+g[Ub>>2];g[Yb>>2]=+g[Wb>>2]-+g[Xb>>2];g[Vc>>2]=+g[Yb>>2]-+g[Vb>>2];g[oc>>2]=+g[Tb>>2]+ +g[Ub>>2];g[pc>>2]=+g[Xb>>2]+ +g[Wb>>2];g[Jd>>2]=+g[oc>>2]+ +g[pc>>2];g[nb>>2]=+g[E>>2]+ +g[N>>2];g[ob>>2]=+g[X>>2]+ +g[Ha>>2];g[pb>>2]=+g[nb>>2]+ +g[ob>>2];g[Gb>>2]=+g[Cb>>2]-+g[Fb>>2];g[Na>>2]=+g[Hb>>2]-+g[Ma>>2];g[Oa>>2]=+g[Gb>>2]+ +g[Na>>2];g[xc>>2]=+g[Kb>>2]-+g[Lb>>2];g[yc>>2]=+g[Pb>>2]-+g[Qb>>2];g[Ud>>2]=+g[xc>>2]+ +g[yc>>2];g[Ec>>2]=+g[Cb>>2]+ +g[Fb>>2];g[Fc>>2]=+g[Hb>>2]+ +g[Ma>>2];g[Gc>>2]=+g[Ec>>2]+ +g[Fc>>2];g[Mb>>2]=+g[Kb>>2]+ +g[Lb>>2];g[Rb>>2]=+g[Pb>>2]+ +g[Qb>>2];g[kd>>2]=+g[Mb>>2]+ +g[Rb>>2];g[ee>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[fe>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ge>>2]=+g[Ie>>2]*+g[ee>>2]+ +g[Me>>2]*+g[fe>>2];g[Ya>>2]=+g[Ie>>2]*+g[fe>>2]-+g[Me>>2]*+g[ee>>2];g[ie>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[ke>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[le>>2]=+g[he>>2]*+g[ie>>2]+ +g[je>>2]*+g[ke>>2];g[Za>>2]=+g[he>>2]*+g[ke>>2]-+g[je>>2]*+g[ie>>2];g[me>>2]=+g[ge>>2]+ +g[le>>2];g[_b>>2]=+g[Ya>>2]+ +g[Za>>2];g[rb>>2]=+g[ge>>2]-+g[le>>2];g[_a>>2]=+g[Ya>>2]-+g[Za>>2];g[ba>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[ca>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[da>>2]=+g[za>>2]*+g[ba>>2]+ +g[Vd>>2]*+g[ca>>2];g[xb>>2]=+g[za>>2]*+g[ca>>2]-+g[Vd>>2]*+g[ba>>2];g[ja>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[na>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[oa>>2]=+g[ia>>2]*+g[ja>>2]+ +g[ma>>2]*+g[na>>2];g[yb>>2]=+g[ia>>2]*+g[na>>2]-+g[ma>>2]*+g[ja>>2];g[pa>>2]=+g[da>>2]+ +g[oa>>2];g[cc>>2]=+g[xb>>2]+ +g[yb>>2];g[zb>>2]=+g[xb>>2]-+g[yb>>2];g[bb>>2]=+g[oa>>2]-+g[da>>2];g[ne>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[oe>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[pe>>2]=+g[Ib>>2]*+g[ne>>2]+ +g[Wd>>2]*+g[oe>>2];g[sb>>2]=+g[Ib>>2]*+g[oe>>2]-+g[Wd>>2]*+g[ne>>2];g[qe>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[re>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[se>>2]=+g[Ve>>2]*+g[qe>>2]+ +g[We>>2]*+g[re>>2];g[tb>>2]=+g[Ve>>2]*+g[re>>2]-+g[We>>2]*+g[qe>>2];g[te>>2]=+g[pe>>2]+ +g[se>>2];g[$b>>2]=+g[sb>>2]+ +g[tb>>2];g[ub>>2]=+g[sb>>2]-+g[tb>>2];g[$a>>2]=+g[pe>>2]-+g[se>>2];g[we>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[r>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[s>>2]=+g[ve>>2]*+g[we>>2]+ +g[xe>>2]*+g[r>>2];g[cb>>2]=+g[ve>>2]*+g[r>>2]-+g[xe>>2]*+g[we>>2];g[w>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[A>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[$>>2]=+g[v>>2]*+g[w>>2]+ +g[z>>2]*+g[A>>2];g[db>>2]=+g[v>>2]*+g[A>>2]-+g[z>>2]*+g[w>>2];g[aa>>2]=+g[s>>2]+ +g[$>>2];g[bc>>2]=+g[cb>>2]+ +g[db>>2];g[wb>>2]=+g[s>>2]-+g[$>>2];g[eb>>2]=+g[cb>>2]-+g[db>>2];g[ue>>2]=+g[me>>2]-+g[te>>2];g[qa>>2]=+g[aa>>2]-+g[pa>>2];g[ra>>2]=+g[ue>>2]+ +g[qa>>2];g[ac>>2]=+g[_b>>2]-+g[$b>>2];g[dc>>2]=+g[bc>>2]-+g[cc>>2];g[Wc>>2]=+g[ac>>2]+ +g[dc>>2];g[lc>>2]=+g[_b>>2]+ +g[$b>>2];g[mc>>2]=+g[bc>>2]+ +g[cc>>2];g[Id>>2]=+g[lc>>2]+ +g[mc>>2];g[kb>>2]=+g[me>>2]+ +g[te>>2];g[lb>>2]=+g[aa>>2]+ +g[pa>>2];g[mb>>2]=+g[kb>>2]+ +g[lb>>2];g[vb>>2]=+g[rb>>2]-+g[ub>>2];g[Ab>>2]=+g[wb>>2]-+g[zb>>2];g[Bb>>2]=+g[vb>>2]+ +g[Ab>>2];g[uc>>2]=+g[_a>>2]-+g[$a>>2];g[vc>>2]=+g[eb>>2]+ +g[bb>>2];g[Td>>2]=+g[uc>>2]+ +g[vc>>2];g[Bc>>2]=+g[rb>>2]+ +g[ub>>2];g[Cc>>2]=+g[wb>>2]+ +g[zb>>2];g[Dc>>2]=+g[Bc>>2]+ +g[Cc>>2];g[ab>>2]=+g[_a>>2]+ +g[$a>>2];g[fb>>2]=+g[bb>>2]-+g[eb>>2];g[jd>>2]=+g[fb>>2]-+g[ab>>2];g[Pc>>2]=(+g[ra>>2]-+g[Ja>>2])*.55901700258255;g[ib>>2]=+g[ra>>2]+ +g[Ja>>2];g[Oc>>2]=+g[de>>2]-+g[ib>>2]*.25;g[Zb>>2]=+g[Vb>>2]+ +g[Yb>>2];g[ec>>2]=+g[ac>>2]-+g[dc>>2];g[fc>>2]=+g[Zb>>2]*.9510565400123596-+g[ec>>2]*.5877852439880371;g[hc>>2]=+g[ec>>2]*.9510565400123596+ +g[Zb>>2]*.5877852439880371;g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[de>>2]+ +g[ib>>2];g[gc>>2]=+g[Pc>>2]+ +g[Oc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[gc>>2]-+g[hc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[gc>>2]+ +g[hc>>2];g[Qc>>2]=+g[Oc>>2]-+g[Pc>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Qc>>2]-+g[fc>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[Qc>>2]+ +g[fc>>2];g[Hc>>2]=(+g[Dc>>2]-+g[Gc>>2])*.55901700258255;g[Jc>>2]=+g[Dc>>2]+ +g[Gc>>2];g[Kc>>2]=+g[Ic>>2]-+g[Jc>>2]*.25;g[wc>>2]=+g[uc>>2]-+g[vc>>2];g[zc>>2]=+g[xc>>2]-+g[yc>>2];g[Ac>>2]=+g[wc>>2]*.9510565400123596+ +g[zc>>2]*.5877852439880371;g[Nc>>2]=+g[zc>>2]*.9510565400123596-+g[wc>>2]*.5877852439880371;g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Ic>>2]+ +g[Jc>>2];g[Mc>>2]=+g[Kc>>2]-+g[Hc>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Mc>>2]-+g[Nc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Nc>>2]+ +g[Mc>>2];g[Lc>>2]=+g[Hc>>2]+ +g[Kc>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[Ac>>2]+ +g[Lc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Lc>>2]-+g[Ac>>2];g[ic>>2]=(+g[mb>>2]-+g[pb>>2])*.55901700258255;g[qb>>2]=+g[mb>>2]+ +g[pb>>2];g[jc>>2]=+g[jb>>2]-+g[qb>>2]*.25;g[nc>>2]=+g[lc>>2]-+g[mc>>2];g[qc>>2]=+g[oc>>2]-+g[pc>>2];g[Sc>>2]=+g[nc>>2]*.9510565400123596+ +g[qc>>2]*.5877852439880371;g[Uc>>2]=+g[qc>>2]*.9510565400123596-+g[nc>>2]*.5877852439880371;g[c[k>>2]>>2]=+g[jb>>2]+ +g[qb>>2];g[Tc>>2]=+g[jc>>2]-+g[ic>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Tc>>2]-+g[Uc>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Tc>>2]+ +g[Uc>>2];g[kc>>2]=+g[ic>>2]+ +g[jc>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[kc>>2]-+g[Sc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[kc>>2]+ +g[Sc>>2];g[Pa>>2]=(+g[Bb>>2]-+g[Oa>>2])*.55901700258255;g[Va>>2]=+g[Bb>>2]+ +g[Oa>>2];g[Wa>>2]=+g[Ua>>2]-+g[Va>>2]*.25;g[gb>>2]=+g[ab>>2]+ +g[fb>>2];g[Sb>>2]=+g[Mb>>2]-+g[Rb>>2];g[rc>>2]=+g[gb>>2]*.9510565400123596+ +g[Sb>>2]*.5877852439880371;g[sc>>2]=+g[Sb>>2]*.9510565400123596-+g[gb>>2]*.5877852439880371;g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Ua>>2]+ +g[Va>>2];g[tc>>2]=+g[Wa>>2]-+g[Pa>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[sc>>2]+ +g[tc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[tc>>2]-+g[sc>>2];g[Xa>>2]=+g[Pa>>2]+ +g[Wa>>2];g[c[l>>2]>>2]=+g[Xa>>2]-+g[rc>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[rc>>2]+ +g[Xa>>2];g[zd>>2]=(+g[Wc>>2]+ +g[Vc>>2])*.55901700258255;g[Xc>>2]=+g[Vc>>2]-+g[Wc>>2];g[Ad>>2]=+g[Xc>>2]*.25+ +g[vd>>2];g[wd>>2]=+g[qa>>2]-+g[ue>>2];g[xd>>2]=+g[O>>2]-+g[Ia>>2];g[yd>>2]=+g[wd>>2]*.9510565400123596-+g[xd>>2]*.5877852439880371;g[Cd>>2]=+g[wd>>2]*.5877852439880371+ +g[xd>>2]*.9510565400123596;g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Xc>>2]-+g[vd>>2];g[Dd>>2]=+g[Ad>>2]-+g[zd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[Cd>>2]-+g[Dd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[Cd>>2]+ +g[Dd>>2];g[Bd>>2]=+g[zd>>2]+ +g[Ad>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[yd>>2]-+g[Bd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[yd>>2]+ +g[Bd>>2];g[ad>>2]=(+g[Td>>2]-+g[Ud>>2])*.55901700258255;g[ed>>2]=+g[Td>>2]+ +g[Ud>>2];g[fd>>2]=+g[dd>>2]-+g[ed>>2]*.25;g[Qd>>2]=+g[Ec>>2]-+g[Fc>>2];g[Rd>>2]=+g[Bc>>2]-+g[Cc>>2];g[Sd>>2]=+g[Qd>>2]*.9510565400123596-+g[Rd>>2]*.5877852439880371;g[hd>>2]=+g[Rd>>2]*.9510565400123596+ +g[Qd>>2]*.5877852439880371;g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[ed>>2]+ +g[dd>>2];g[id>>2]=+g[ad>>2]+ +g[fd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[hd>>2]+ +g[id>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[id>>2]-+g[hd>>2];g[gd>>2]=+g[ad>>2]-+g[fd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Sd>>2]+ +g[gd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[gd>>2]-+g[Sd>>2];g[Md>>2]=(+g[Id>>2]-+g[Jd>>2])*.55901700258255;g[Kd>>2]=+g[Id>>2]+ +g[Jd>>2];g[Ld>>2]=+g[Hd>>2]-+g[Kd>>2]*.25;g[Ed>>2]=+g[kb>>2]-+g[lb>>2];g[Fd>>2]=+g[nb>>2]-+g[ob>>2];g[Gd>>2]=+g[Ed>>2]*.5877852439880371-+g[Fd>>2]*.9510565400123596;g[Od>>2]=+g[Ed>>2]*.9510565400123596+ +g[Fd>>2]*.5877852439880371;g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Kd>>2]+ +g[Hd>>2];g[Pd>>2]=+g[Md>>2]+ +g[Ld>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Od>>2]-+g[Pd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Od>>2]+ +g[Pd>>2];g[Nd>>2]=+g[Ld>>2]-+g[Md>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Gd>>2]-+g[Nd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Gd>>2]+ +g[Nd>>2];g[qd>>2]=(+g[jd>>2]+ +g[kd>>2])*.55901700258255;g[ld>>2]=+g[jd>>2]-+g[kd>>2];g[rd>>2]=+g[ld>>2]*.25+ +g[md>>2];g[nd>>2]=+g[vb>>2]-+g[Ab>>2];g[od>>2]=+g[Gb>>2]-+g[Na>>2];g[pd>>2]=+g[nd>>2]*.9510565400123596+ +g[od>>2]*.5877852439880371;g[td>>2]=+g[od>>2]*.9510565400123596-+g[nd>>2]*.5877852439880371;g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[ld>>2]-+g[md>>2];g[ud>>2]=+g[qd>>2]+ +g[rd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[td>>2]+ +g[ud>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[ud>>2]-+g[td>>2];g[sd>>2]=+g[qd>>2]-+g[rd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[pd>>2]+ +g[sd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[sd>>2]-+g[pd>>2];c[Ye>>2]=(c[Ye>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+32;c[n>>2]=c[n>>2]^c[2998]}i=Ze;return}function rr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,28,4648);i=b;return}function sr(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0;ai=i;i=i+1984|0;k=ai+1980|0;l=ai+1976|0;m=ai+1972|0;n=ai+1968|0;bi=ai+1964|0;o=ai+1960|0;p=ai+1956|0;$h=ai+1872|0;za=ai+1868|0;_d=ai+1864|0;Ib=ai+1860|0;hf=ai+1856|0;$g=ai+1852|0;Eh=ai+1848|0;x=ai+1844|0;v=ai+1840|0;Fh=ai+1836|0;ah=ai+1832|0;Ba=ai+1828|0;Da=ai+1824|0;Yh=ai+1820|0;sh=ai+1816|0;fa=ai+1812|0;ch=ai+1808|0;ea=ai+1804|0;wh=ai+1800|0;R=ai+1796|0;Hh=ai+1792|0;T=ai+1788|0;Lh=ai+1784|0;z=ai+1780|0;Ab=ai+1776|0;ba=ai+1772|0;yb=ai+1768|0;Oh=ai+1764|0;Ph=ai+1760|0;Qh=ai+1756|0;Sh=ai+1752|0;Ia=ai+1748|0;Fb=ai+1744|0;zh=ai+1740|0;sb=ai+1736|0;ub=ai+1732|0;Db=ai+1728|0;hh=ai+1724|0;fh=ai+1720|0;r=ai+1716|0;ga=ai+1712|0;F=ai+1708|0;Ga=ai+1704|0;C=ai+1700|0;H=ai+1696|0;W=ai+1692|0;Y=ai+1688|0;ya=ai+1684|0;ia=ai+1680|0;Xh=ai+1676|0;uh=ai+1672|0;_h=ai+1668|0;rh=ai+1664|0;Wh=ai+1660|0;vh=ai+1656|0;bh=ai+1652|0;qh=ai+1648|0;Rc=ai+1644|0;Dh=ai+1640|0;rg=ai+1636|0;Ch=ai+1632|0;w=ai+1628|0;y=ai+1624|0;Bh=ai+1620|0;Gh=ai+1616|0;$=ai+1612|0;aa=ai+1608|0;Jh=ai+1604|0;Kh=ai+1600|0;q=ai+1596|0;mg=ai+1592|0;Pa=ai+1588|0;tf=ai+1584|0;lh=ai+1580|0;Qa=ai+1576|0;pg=ai+1572|0;qg=ai+1568|0;lg=ai+1564|0;tg=ai+1560|0;Ya=ai+1556|0;be=ai+1552|0;Ma=ai+1548|0;bg=ai+1544|0;Cd=ai+1540|0;Ge=ai+1536|0;ld=ai+1532|0;$e=ai+1528|0;Jd=ai+1524|0;Fe=ai+1520|0;kd=ai+1516|0;cf=ai+1512|0;na=ai+1508|0;Zf=ai+1504|0;Jb=ai+1500|0;ve=ai+1496|0;ad=ai+1492|0;fe=ai+1488|0;sc=ai+1484|0;we=ai+1480|0;bd=ai+1476|0;ie=ai+1472|0;M=ai+1468|0;_f=ai+1464|0;Ic=ai+1460|0;ye=ai+1456|0;ed=ai+1452|0;Me=ai+1448|0;Pc=ai+1444|0;ze=ai+1440|0;dd=ai+1436|0;Pe=ai+1432|0;lb=ai+1428|0;ag=ai+1424|0;gc=ai+1420|0;De=ai+1416|0;id=ai+1412|0;Xe=ai+1408|0;nc=ai+1404|0;Ce=ai+1400|0;hd=ai+1396|0;Ue=ai+1392|0;Nh=ai+1388|0;Sa=ai+1384|0;Uh=ai+1380|0;Ta=ai+1376|0;Vh=ai+1372|0;ng=ai+1368|0;eh=ai+1364|0;Va=ai+1360|0;jh=ai+1356|0;Wa=ai+1352|0;kh=ai+1348|0;og=ai+1344|0;Ih=ai+1340|0;Mh=ai+1336|0;Rh=ai+1332|0;Th=ai+1328|0;Zh=ai+1324|0;dh=ai+1320|0;gh=ai+1316|0;ih=ai+1312|0;jg=ai+1308|0;kg=ai+1304|0;Ua=ai+1300|0;Xa=ai+1296|0;ob=ai+1292|0;_c=ai+1288|0;pc=ai+1284|0;qc=ai+1280|0;Hd=ai+1276|0;Gd=ai+1272|0;Vc=ai+1268|0;Yc=ai+1264|0;$c=ai+1260|0;xb=ai+1256|0;Ka=ai+1252|0;La=ai+1248|0;mb=ai+1244|0;nb=ai+1240|0;rb=ai+1236|0;Tc=ai+1232|0;Hb=ai+1228|0;Xc=ai+1224|0;wb=ai+1220|0;Uc=ai+1216|0;Cb=ai+1212|0;Wc=ai+1208|0;pb=ai+1204|0;qb=ai+1200|0;Eb=ai+1196|0;Gb=ai+1192|0;tb=ai+1188|0;vb=ai+1184|0;zb=ai+1180|0;Bb=ai+1176|0;Sc=ai+1172|0;_e=ai+1168|0;Bd=ai+1164|0;Ze=ai+1160|0;Zc=ai+1156|0;Ad=ai+1152|0;Id=ai+1148|0;bf=ai+1144|0;Fd=ai+1140|0;af=ai+1136|0;Dd=ai+1132|0;Ed=ai+1128|0;ph=ai+1124|0;Qb=ai+1120|0;db=ai+1116|0;gb=ai+1112|0;Lb=ai+1108|0;Kb=ai+1104|0;Nb=ai+1100|0;Ob=ai+1096|0;Rb=ai+1092|0;u=ai+1088|0;la=ai+1084|0;ma=ai+1080|0;nh=ai+1076|0;oh=ai+1072|0;yh=ai+1068|0;bb=ai+1064|0;ka=ai+1060|0;fb=ai+1056|0;t=ai+1052|0;cb=ai+1048|0;da=ai+1044|0;eb=ai+1040|0;th=ai+1036|0;xh=ai+1032|0;ha=ai+1028|0;ja=ai+1024|0;Ah=ai+1020|0;s=ai+1016|0;A=ai+1012|0;ca=ai+1008|0;hb=ai+1004|0;ee=ai+1e3|0;ab=ai+996|0;de=ai+992|0;_a=ai+988|0;$a=ai+984|0;Mb=ai+980|0;he=ai+976|0;rc=ai+972|0;ge=ai+968|0;Pb=ai+964|0;Sb=ai+960|0;qa=ai+956|0;Ec=ai+952|0;uc=ai+948|0;vc=ai+944|0;Nc=ai+940|0;Mc=ai+936|0;zc=ai+932|0;Cc=ai+928|0;Fc=ai+924|0;xa=ai+920|0;K=ai+916|0;L=ai+912|0;oa=ai+908|0;pa=ai+904|0;ta=ai+900|0;xc=ai+896|0;J=ai+892|0;Bc=ai+888|0;wa=ai+884|0;yc=ai+880|0;E=ai+876|0;Ac=ai+872|0;ra=ai+868|0;sa=ai+864|0;G=ai+860|0;I=ai+856|0;ua=ai+852|0;va=ai+848|0;B=ai+844|0;D=ai+840|0;wc=ai+836|0;Le=ai+832|0;Hc=ai+828|0;Ke=ai+824|0;Dc=ai+820|0;Gc=ai+816|0;Oc=ai+812|0;Oe=ai+808|0;Lc=ai+804|0;Ne=ai+800|0;Jc=ai+796|0;Kc=ai+792|0;Q=ai+788|0;cc=ai+784|0;Ub=ai+780|0;Vb=ai+776|0;lc=ai+772|0;kc=ai+768|0;Zb=ai+764|0;ac=ai+760|0;dc=ai+756|0;Aa=ai+752|0;jb=ai+748|0;kb=ai+744|0;O=ai+740|0;P=ai+736|0;V=ai+732|0;Xb=ai+728|0;ib=ai+724|0;$b=ai+720|0;_=ai+716|0;Yb=ai+712|0;Fa=ai+708|0;_b=ai+704|0;S=ai+700|0;U=ai+696|0;Ha=ai+692|0;Ja=ai+688|0;X=ai+684|0;Z=ai+680|0;Ca=ai+676|0;Ea=ai+672|0;Wb=ai+668|0;We=ai+664|0;fc=ai+660|0;Ve=ai+656|0;bc=ai+652|0;ec=ai+648|0;mc=ai+644|0;Te=ai+640|0;jc=ai+636|0;Se=ai+632|0;hc=ai+628|0;ic=ai+624|0;dg=ai+620|0;fg=ai+616|0;mh=ai+612|0;Oa=ai+608|0;Wf=ai+604|0;Xf=ai+600|0;eg=ai+596|0;Yf=ai+592|0;$f=ai+588|0;cg=ai+584|0;N=ai+580|0;Na=ai+576|0;Za=ai+572|0;Zd=ai+568|0;ug=ai+564|0;Gg=ai+560|0;Md=ai+556|0;Lg=ai+552|0;Nd=ai+548|0;Kg=ai+544|0;ud=ai+540|0;zg=ai+536|0;xd=ai+532|0;xg=ai+528|0;od=ai+524|0;Rf=ai+520|0;pd=ai+516|0;Qf=ai+512|0;Sd=ai+508|0;Hg=ai+504|0;Vd=ai+500|0;Fg=ai+496|0;Ra=ai+492|0;sg=ai+488|0;tc=ai+484|0;Qc=ai+480|0;Tb=ai+476|0;oc=ai+472|0;Kd=ai+468|0;Ld=ai+464|0;sd=ai+460|0;td=ai+456|0;vg=ai+452|0;vd=ai+448|0;wd=ai+444|0;wg=ai+440|0;cd=ai+436|0;fd=ai+432|0;gd=ai+428|0;jd=ai+424|0;md=ai+420|0;nd=ai+416|0;Qd=ai+412|0;Rd=ai+408|0;Dg=ai+404|0;Td=ai+400|0;Ud=ai+396|0;Eg=ai+392|0;Wd=ai+388|0;Yd=ai+384|0;Pd=ai+380|0;Xd=ai+376|0;Od=ai+372|0;Sf=ai+368|0;Bg=ai+364|0;Ag=ai+360|0;Cg=ai+356|0;yg=ai+352|0;Mg=ai+348|0;Ng=ai+344|0;Jg=ai+340|0;Og=ai+336|0;Ig=ai+332|0;yd=ai+328|0;$d=ai+324|0;rd=ai+320|0;zd=ai+316|0;qd=ai+312|0;Rg=ai+308|0;Zg=ai+304|0;Sg=ai+300|0;Vg=ai+296|0;Wg=ai+292|0;Xg=ai+288|0;_g=ai+284|0;Yg=ai+280|0;Pg=ai+276|0;Qg=ai+272|0;Tg=ai+268|0;Ug=ai+264|0;ce=ai+260|0;ue=ai+256|0;vf=ai+252|0;Hf=ai+248|0;ff=ai+244|0;Mf=ai+240|0;gf=ai+236|0;Lf=ai+232|0;pf=ai+228|0;Af=ai+224|0;sf=ai+220|0;yf=ai+216|0;jf=ai+212|0;hg=ai+208|0;kf=ai+204|0;gg=ai+200|0;ne=ai+196|0;If=ai+192|0;qe=ai+188|0;Gf=ai+184|0;ae=ai+180|0;uf=ai+176|0;Je=ai+172|0;Qe=ai+168|0;Re=ai+164|0;Ye=ai+160|0;df=ai+156|0;ef=ai+152|0;nf=ai+148|0;of=ai+144|0;wf=ai+140|0;qf=ai+136|0;rf=ai+132|0;xf=ai+128|0;xe=ai+124|0;Ae=ai+120|0;Be=ai+116|0;Ee=ai+112|0;He=ai+108|0;Ie=ai+104|0;le=ai+100|0;me=ai+96|0;Ef=ai+92|0;oe=ai+88|0;pe=ai+84|0;Ff=ai+80|0;ig=ai+76|0;Cf=ai+72|0;Bf=ai+68|0;Df=ai+64|0;zf=ai+60|0;Tf=ai+56|0;Vf=ai+52|0;mf=ai+48|0;Uf=ai+44|0;lf=ai+40|0;re=ai+36|0;te=ai+32|0;ke=ai+28|0;se=ai+24|0;je=ai+20|0;Nf=ai+16|0;Of=ai+12|0;Kf=ai+8|0;Pf=ai+4|0;Jf=ai;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[bi>>2]=f;c[o>>2]=h;c[p>>2]=j;g[ai+1952>>2]=.9980267286300659;g[ai+1948>>2]=.06279052048921585;g[ai+1944>>2]=.6845471262931824;g[ai+1940>>2]=.728968620300293;g[ai+1936>>2]=.4817536771297455;g[ai+1932>>2]=.8763066530227661;g[ai+1928>>2]=.24868988990783691;g[ai+1924>>2]=.9685831665992737;g[ai+1920>>2]=.9921147227287292;g[ai+1916>>2]=.12533323466777802;g[ai+1912>>2]=.4257792830467224;g[ai+1908>>2]=.9048270583152771;g[ai+1904>>2]=.6374239921569824;g[ai+1900>>2]=.7705132365226746;g[ai+1896>>2]=.8443279266357422;g[ai+1892>>2]=.5358268022537231;g[ai+1888>>2]=.5877852439880371;g[ai+1884>>2]=.9510565400123596;g[ai+1880>>2]=.25;g[ai+1876>>2]=.55901700258255;c[$h>>2]=c[bi>>2];c[m>>2]=(c[m>>2]|0)+((c[bi>>2]|0)-1<<3<<2);while(1){if((c[$h>>2]|0)>=(c[o>>2]|0))break;g[za>>2]=+g[c[m>>2]>>2];g[_d>>2]=+g[(c[m>>2]|0)+4>>2];g[Ib>>2]=+g[(c[m>>2]|0)+8>>2];g[hf>>2]=+g[(c[m>>2]|0)+12>>2];g[Rc>>2]=+g[za>>2]*+g[Ib>>2];g[Dh>>2]=+g[_d>>2]*+g[Ib>>2];g[rg>>2]=+g[_d>>2]*+g[hf>>2];g[Ch>>2]=+g[za>>2]*+g[hf>>2];g[$g>>2]=+g[Rc>>2]-+g[rg>>2];g[Eh>>2]=+g[Ch>>2]+ +g[Dh>>2];g[x>>2]=+g[Ch>>2]-+g[Dh>>2];g[v>>2]=+g[Rc>>2]+ +g[rg>>2];g[Fh>>2]=+g[(c[m>>2]|0)+20>>2];g[Xh>>2]=+g[_d>>2]*+g[Fh>>2];g[uh>>2]=+g[Ib>>2]*+g[Fh>>2];g[_h>>2]=+g[za>>2]*+g[Fh>>2];g[rh>>2]=+g[hf>>2]*+g[Fh>>2];g[ah>>2]=+g[(c[m>>2]|0)+16>>2];g[Wh>>2]=+g[za>>2]*+g[ah>>2];g[vh>>2]=+g[hf>>2]*+g[ah>>2];g[bh>>2]=+g[_d>>2]*+g[ah>>2];g[qh>>2]=+g[Ib>>2]*+g[ah>>2];g[Ba>>2]=+g[qh>>2]-+g[rh>>2];g[Da>>2]=+g[uh>>2]+ +g[vh>>2];g[Yh>>2]=+g[Wh>>2]-+g[Xh>>2];g[sh>>2]=+g[qh>>2]+ +g[rh>>2];g[fa>>2]=+g[_h>>2]-+g[bh>>2];g[ch>>2]=+g[_h>>2]+ +g[bh>>2];g[ea>>2]=+g[Wh>>2]+ +g[Xh>>2];g[wh>>2]=+g[uh>>2]-+g[vh>>2];g[w>>2]=+g[v>>2]*+g[ah>>2];g[y>>2]=+g[x>>2]*+g[Fh>>2];g[R>>2]=+g[w>>2]+ +g[y>>2];g[Bh>>2]=+g[$g>>2]*+g[ah>>2];g[Gh>>2]=+g[Eh>>2]*+g[Fh>>2];g[Hh>>2]=+g[Bh>>2]+ +g[Gh>>2];g[$>>2]=+g[v>>2]*+g[Fh>>2];g[aa>>2]=+g[x>>2]*+g[ah>>2];g[T>>2]=+g[$>>2]-+g[aa>>2];g[Jh>>2]=+g[$g>>2]*+g[Fh>>2];g[Kh>>2]=+g[Eh>>2]*+g[ah>>2];g[Lh>>2]=+g[Jh>>2]-+g[Kh>>2];g[z>>2]=+g[w>>2]-+g[y>>2];g[Ab>>2]=+g[Jh>>2]+ +g[Kh>>2];g[ba>>2]=+g[$>>2]+ +g[aa>>2];g[yb>>2]=+g[Bh>>2]-+g[Gh>>2];g[Oh>>2]=+g[(c[m>>2]|0)+24>>2];g[Ph>>2]=+g[(c[m>>2]|0)+28>>2];g[Qh>>2]=+g[$g>>2]*+g[Oh>>2]+ +g[Eh>>2]*+g[Ph>>2];g[Sh>>2]=+g[$g>>2]*+g[Ph>>2]-+g[Eh>>2]*+g[Oh>>2];g[Ia>>2]=+g[R>>2]*+g[Ph>>2]-+g[T>>2]*+g[Oh>>2];g[Fb>>2]=+g[sh>>2]*+g[Ph>>2]-+g[wh>>2]*+g[Oh>>2];g[zh>>2]=+g[Ib>>2]*+g[Oh>>2]+ +g[hf>>2]*+g[Ph>>2];g[sb>>2]=+g[za>>2]*+g[Oh>>2]+ +g[_d>>2]*+g[Ph>>2];g[ub>>2]=+g[za>>2]*+g[Ph>>2]-+g[_d>>2]*+g[Oh>>2];g[Db>>2]=+g[sh>>2]*+g[Oh>>2]+ +g[wh>>2]*+g[Ph>>2];g[hh>>2]=+g[ah>>2]*+g[Ph>>2]-+g[Fh>>2]*+g[Oh>>2];g[fh>>2]=+g[ah>>2]*+g[Oh>>2]+ +g[Fh>>2]*+g[Ph>>2];g[r>>2]=+g[Ib>>2]*+g[Ph>>2]-+g[hf>>2]*+g[Oh>>2];g[ga>>2]=+g[ea>>2]*+g[Oh>>2]+ +g[fa>>2]*+g[Ph>>2];g[F>>2]=+g[Hh>>2]*+g[Oh>>2]+ +g[Lh>>2]*+g[Ph>>2];g[Ga>>2]=+g[R>>2]*+g[Oh>>2]+ +g[T>>2]*+g[Ph>>2];g[C>>2]=+g[Yh>>2]*+g[Ph>>2]-+g[ch>>2]*+g[Oh>>2];g[H>>2]=+g[Hh>>2]*+g[Ph>>2]-+g[Lh>>2]*+g[Oh>>2];g[W>>2]=+g[v>>2]*+g[Oh>>2]+ +g[x>>2]*+g[Ph>>2];g[Y>>2]=+g[v>>2]*+g[Ph>>2]-+g[x>>2]*+g[Oh>>2];g[ya>>2]=+g[Yh>>2]*+g[Oh>>2]+ +g[ch>>2]*+g[Ph>>2];g[ia>>2]=+g[ea>>2]*+g[Ph>>2]-+g[fa>>2]*+g[Oh>>2];g[q>>2]=+g[c[k>>2]>>2];g[mg>>2]=+g[c[l>>2]>>2];g[Ih>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Mh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Nh>>2]=+g[Hh>>2]*+g[Ih>>2]+ +g[Lh>>2]*+g[Mh>>2];g[Sa>>2]=+g[Hh>>2]*+g[Mh>>2]-+g[Lh>>2]*+g[Ih>>2];g[Rh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[Th>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[Uh>>2]=+g[Qh>>2]*+g[Rh>>2]+ +g[Sh>>2]*+g[Th>>2];g[Ta>>2]=+g[Qh>>2]*+g[Th>>2]-+g[Sh>>2]*+g[Rh>>2];g[Vh>>2]=+g[Nh>>2]+ +g[Uh>>2];g[ng>>2]=+g[Sa>>2]+ +g[Ta>>2];g[Zh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[dh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[eh>>2]=+g[Yh>>2]*+g[Zh>>2]+ +g[ch>>2]*+g[dh>>2];g[Va>>2]=+g[Yh>>2]*+g[dh>>2]-+g[ch>>2]*+g[Zh>>2];g[gh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[ih>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[jh>>2]=+g[fh>>2]*+g[gh>>2]+ +g[hh>>2]*+g[ih>>2];g[Wa>>2]=+g[fh>>2]*+g[ih>>2]-+g[hh>>2]*+g[gh>>2];g[kh>>2]=+g[eh>>2]+ +g[jh>>2];g[og>>2]=+g[Va>>2]+ +g[Wa>>2];g[Pa>>2]=(+g[Vh>>2]-+g[kh>>2])*.55901700258255;g[tf>>2]=(+g[ng>>2]-+g[og>>2])*.55901700258255;g[lh>>2]=+g[Vh>>2]+ +g[kh>>2];g[Qa>>2]=+g[q>>2]-+g[lh>>2]*.25;g[pg>>2]=+g[ng>>2]+ +g[og>>2];g[qg>>2]=+g[mg>>2]-+g[pg>>2]*.25;g[jg>>2]=+g[eh>>2]-+g[jh>>2];g[kg>>2]=+g[Nh>>2]-+g[Uh>>2];g[lg>>2]=+g[jg>>2]*.9510565400123596-+g[kg>>2]*.5877852439880371;g[tg>>2]=+g[kg>>2]*.9510565400123596+ +g[jg>>2]*.5877852439880371;g[Ua>>2]=+g[Sa>>2]-+g[Ta>>2];g[Xa>>2]=+g[Va>>2]-+g[Wa>>2];g[Ya>>2]=+g[Ua>>2]*.9510565400123596+ +g[Xa>>2]*.5877852439880371;g[be>>2]=+g[Xa>>2]*.9510565400123596-+g[Ua>>2]*.5877852439880371;g[mb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[nb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ob>>2]=+g[Ib>>2]*+g[mb>>2]+ +g[hf>>2]*+g[nb>>2];g[_c>>2]=+g[Ib>>2]*+g[nb>>2]-+g[hf>>2]*+g[mb>>2];g[pb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[qb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[rb>>2]=+g[ea>>2]*+g[pb>>2]+ +g[fa>>2]*+g[qb>>2];g[Tc>>2]=+g[ea>>2]*+g[qb>>2]-+g[fa>>2]*+g[pb>>2];g[Eb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[Gb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[Hb>>2]=+g[Db>>2]*+g[Eb>>2]+ +g[Fb>>2]*+g[Gb>>2];g[Xc>>2]=+g[Db>>2]*+g[Gb>>2]-+g[Fb>>2]*+g[Eb>>2];g[tb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[vb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[wb>>2]=+g[sb>>2]*+g[tb>>2]+ +g[ub>>2]*+g[vb>>2];g[Uc>>2]=+g[sb>>2]*+g[vb>>2]-+g[ub>>2]*+g[tb>>2];g[zb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Bb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Cb>>2]=+g[yb>>2]*+g[zb>>2]+ +g[Ab>>2]*+g[Bb>>2];g[Wc>>2]=+g[yb>>2]*+g[Bb>>2]-+g[Ab>>2]*+g[zb>>2];g[pc>>2]=+g[rb>>2]-+g[wb>>2];g[qc>>2]=+g[Cb>>2]-+g[Hb>>2];g[Hd>>2]=+g[Wc>>2]-+g[Xc>>2];g[Gd>>2]=+g[Tc>>2]-+g[Uc>>2];g[Vc>>2]=+g[Tc>>2]+ +g[Uc>>2];g[Yc>>2]=+g[Wc>>2]+ +g[Xc>>2];g[$c>>2]=+g[Vc>>2]+ +g[Yc>>2];g[xb>>2]=+g[rb>>2]+ +g[wb>>2];g[Ka>>2]=+g[Cb>>2]+ +g[Hb>>2];g[La>>2]=+g[xb>>2]+ +g[Ka>>2];g[Ma>>2]=+g[ob>>2]+ +g[La>>2];g[bg>>2]=+g[_c>>2]+ +g[$c>>2];g[Sc>>2]=+g[pc>>2]*.9510565400123596+ +g[qc>>2]*.5877852439880371;g[_e>>2]=+g[qc>>2]*.9510565400123596-+g[pc>>2]*.5877852439880371;g[Zc>>2]=(+g[Vc>>2]-+g[Yc>>2])*.55901700258255;g[Ad>>2]=+g[_c>>2]-+g[$c>>2]*.25;g[Bd>>2]=+g[Zc>>2]+ +g[Ad>>2];g[Ze>>2]=+g[Ad>>2]-+g[Zc>>2];g[Cd>>2]=+g[Sc>>2]+ +g[Bd>>2];g[Ge>>2]=+g[_e>>2]+ +g[Ze>>2];g[ld>>2]=+g[Bd>>2]-+g[Sc>>2];g[$e>>2]=+g[Ze>>2]-+g[_e>>2];g[Id>>2]=+g[Gd>>2]*.9510565400123596+ +g[Hd>>2]*.5877852439880371;g[bf>>2]=+g[Hd>>2]*.9510565400123596-+g[Gd>>2]*.5877852439880371;g[Dd>>2]=(+g[xb>>2]-+g[Ka>>2])*.55901700258255;g[Ed>>2]=+g[ob>>2]-+g[La>>2]*.25;g[Fd>>2]=+g[Dd>>2]+ +g[Ed>>2];g[af>>2]=+g[Ed>>2]-+g[Dd>>2];g[Jd>>2]=+g[Fd>>2]-+g[Id>>2];g[Fe>>2]=+g[af>>2]-+g[bf>>2];g[kd>>2]=+g[Fd>>2]+ +g[Id>>2];g[cf>>2]=+g[af>>2]+ +g[bf>>2];g[nh>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[oh>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[ph>>2]=+g[za>>2]*+g[nh>>2]+ +g[_d>>2]*+g[oh>>2];g[Qb>>2]=+g[za>>2]*+g[oh>>2]-+g[_d>>2]*+g[nh>>2];g[th>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[xh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[yh>>2]=+g[sh>>2]*+g[th>>2]+ +g[wh>>2]*+g[xh>>2];g[bb>>2]=+g[sh>>2]*+g[xh>>2]-+g[wh>>2]*+g[th>>2];g[ha>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[ja>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[ka>>2]=+g[ga>>2]*+g[ha>>2]+ +g[ia>>2]*+g[ja>>2];g[fb>>2]=+g[ga>>2]*+g[ja>>2]-+g[ia>>2]*+g[ha>>2];g[Ah>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[s>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[t>>2]=+g[zh>>2]*+g[Ah>>2]+ +g[r>>2]*+g[s>>2];g[cb>>2]=+g[zh>>2]*+g[s>>2]-+g[r>>2]*+g[Ah>>2];g[A>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[ca>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[da>>2]=+g[z>>2]*+g[A>>2]+ +g[ba>>2]*+g[ca>>2];g[eb>>2]=+g[z>>2]*+g[ca>>2]-+g[ba>>2]*+g[A>>2];g[db>>2]=+g[bb>>2]-+g[cb>>2];g[gb>>2]=+g[eb>>2]-+g[fb>>2];g[Lb>>2]=+g[da>>2]-+g[ka>>2];g[Kb>>2]=+g[yh>>2]-+g[t>>2];g[Nb>>2]=+g[bb>>2]+ +g[cb>>2];g[Ob>>2]=+g[eb>>2]+ +g[fb>>2];g[Rb>>2]=+g[Nb>>2]+ +g[Ob>>2];g[u>>2]=+g[yh>>2]+ +g[t>>2];g[la>>2]=+g[da>>2]+ +g[ka>>2];g[ma>>2]=+g[u>>2]+ +g[la>>2];g[na>>2]=+g[ph>>2]+ +g[ma>>2];g[Zf>>2]=+g[Qb>>2]+ +g[Rb>>2];g[hb>>2]=+g[db>>2]*.9510565400123596+ +g[gb>>2]*.5877852439880371;g[ee>>2]=+g[gb>>2]*.9510565400123596-+g[db>>2]*.5877852439880371;g[_a>>2]=(+g[u>>2]-+g[la>>2])*.55901700258255;g[$a>>2]=+g[ph>>2]-+g[ma>>2]*.25;g[ab>>2]=+g[_a>>2]+ +g[$a>>2];g[de>>2]=+g[$a>>2]-+g[_a>>2];g[Jb>>2]=+g[ab>>2]-+g[hb>>2];g[ve>>2]=+g[de>>2]-+g[ee>>2];g[ad>>2]=+g[ab>>2]+ +g[hb>>2];g[fe>>2]=+g[de>>2]+ +g[ee>>2];g[Mb>>2]=+g[Kb>>2]*.9510565400123596+ +g[Lb>>2]*.5877852439880371;g[he>>2]=+g[Lb>>2]*.9510565400123596-+g[Kb>>2]*.5877852439880371;g[Pb>>2]=(+g[Nb>>2]-+g[Ob>>2])*.55901700258255;g[Sb>>2]=+g[Qb>>2]-+g[Rb>>2]*.25;g[rc>>2]=+g[Pb>>2]+ +g[Sb>>2];g[ge>>2]=+g[Sb>>2]-+g[Pb>>2];g[sc>>2]=+g[Mb>>2]+ +g[rc>>2];g[we>>2]=+g[he>>2]+ +g[ge>>2];g[bd>>2]=+g[rc>>2]-+g[Mb>>2];g[ie>>2]=+g[ge>>2]-+g[he>>2];g[oa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[pa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[qa>>2]=+g[$g>>2]*+g[oa>>2]+ +g[Eh>>2]*+g[pa>>2];g[Ec>>2]=+g[$g>>2]*+g[pa>>2]-+g[Eh>>2]*+g[oa>>2];g[ra>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[sa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[ta>>2]=+g[ah>>2]*+g[ra>>2]+ +g[Fh>>2]*+g[sa>>2];g[xc>>2]=+g[ah>>2]*+g[sa>>2]-+g[Fh>>2]*+g[ra>>2];g[G>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[I>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[J>>2]=+g[F>>2]*+g[G>>2]+ +g[H>>2]*+g[I>>2];g[Bc>>2]=+g[F>>2]*+g[I>>2]-+g[H>>2]*+g[G>>2];g[ua>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[va>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[wa>>2]=+g[Oh>>2]*+g[ua>>2]+ +g[Ph>>2]*+g[va>>2];g[yc>>2]=+g[Oh>>2]*+g[va>>2]-+g[Ph>>2]*+g[ua>>2];g[B>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[D>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[E>>2]=+g[ya>>2]*+g[B>>2]+ +g[C>>2]*+g[D>>2];g[Ac>>2]=+g[ya>>2]*+g[D>>2]-+g[C>>2]*+g[B>>2];g[uc>>2]=+g[wa>>2]-+g[ta>>2];g[vc>>2]=+g[E>>2]-+g[J>>2];g[Nc>>2]=+g[Ac>>2]-+g[Bc>>2];g[Mc>>2]=+g[xc>>2]-+g[yc>>2];g[zc>>2]=+g[xc>>2]+ +g[yc>>2];g[Cc>>2]=+g[Ac>>2]+ +g[Bc>>2];g[Fc>>2]=+g[zc>>2]+ +g[Cc>>2];g[xa>>2]=+g[ta>>2]+ +g[wa>>2];g[K>>2]=+g[E>>2]+ +g[J>>2];g[L>>2]=+g[xa>>2]+ +g[K>>2];g[M>>2]=+g[qa>>2]+ +g[L>>2];g[_f>>2]=+g[Ec>>2]+ +g[Fc>>2];g[wc>>2]=+g[uc>>2]*.9510565400123596-+g[vc>>2]*.5877852439880371;g[Le>>2]=+g[uc>>2]*.5877852439880371+ +g[vc>>2]*.9510565400123596;g[Dc>>2]=(+g[zc>>2]-+g[Cc>>2])*.55901700258255;g[Gc>>2]=+g[Ec>>2]-+g[Fc>>2]*.25;g[Hc>>2]=+g[Dc>>2]+ +g[Gc>>2];g[Ke>>2]=+g[Gc>>2]-+g[Dc>>2];g[Ic>>2]=+g[wc>>2]-+g[Hc>>2];g[ye>>2]=+g[Le>>2]+ +g[Ke>>2];g[ed>>2]=+g[wc>>2]+ +g[Hc>>2];g[Me>>2]=+g[Ke>>2]-+g[Le>>2];g[Oc>>2]=+g[Mc>>2]*.9510565400123596+ +g[Nc>>2]*.5877852439880371;g[Oe>>2]=+g[Nc>>2]*.9510565400123596-+g[Mc>>2]*.5877852439880371;g[Jc>>2]=(+g[xa>>2]-+g[K>>2])*.55901700258255;g[Kc>>2]=+g[qa>>2]-+g[L>>2]*.25;g[Lc>>2]=+g[Jc>>2]+ +g[Kc>>2];g[Ne>>2]=+g[Kc>>2]-+g[Jc>>2];g[Pc>>2]=+g[Lc>>2]-+g[Oc>>2];g[ze>>2]=+g[Ne>>2]-+g[Oe>>2];g[dd>>2]=+g[Lc>>2]+ +g[Oc>>2];g[Pe>>2]=+g[Ne>>2]+ +g[Oe>>2];g[O>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[P>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Q>>2]=+g[v>>2]*+g[O>>2]+ +g[x>>2]*+g[P>>2];g[cc>>2]=+g[v>>2]*+g[P>>2]-+g[x>>2]*+g[O>>2];g[S>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[U>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[V>>2]=+g[R>>2]*+g[S>>2]+ +g[T>>2]*+g[U>>2];g[Xb>>2]=+g[R>>2]*+g[U>>2]-+g[T>>2]*+g[S>>2];g[Ha>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[Ja>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[ib>>2]=+g[Ga>>2]*+g[Ha>>2]+ +g[Ia>>2]*+g[Ja>>2];g[$b>>2]=+g[Ga>>2]*+g[Ja>>2]-+g[Ia>>2]*+g[Ha>>2];g[X>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[Z>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[_>>2]=+g[W>>2]*+g[X>>2]+ +g[Y>>2]*+g[Z>>2];g[Yb>>2]=+g[W>>2]*+g[Z>>2]-+g[Y>>2]*+g[X>>2];g[Ca>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Ea>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Fa>>2]=+g[Ba>>2]*+g[Ca>>2]+ +g[Da>>2]*+g[Ea>>2];g[_b>>2]=+g[Ba>>2]*+g[Ea>>2]-+g[Da>>2]*+g[Ca>>2];g[Ub>>2]=+g[V>>2]-+g[_>>2];g[Vb>>2]=+g[Fa>>2]-+g[ib>>2];g[lc>>2]=+g[_b>>2]-+g[$b>>2];g[kc>>2]=+g[Xb>>2]-+g[Yb>>2];g[Zb>>2]=+g[Xb>>2]+ +g[Yb>>2];g[ac>>2]=+g[_b>>2]+ +g[$b>>2];g[dc>>2]=+g[Zb>>2]+ +g[ac>>2];g[Aa>>2]=+g[V>>2]+ +g[_>>2];g[jb>>2]=+g[Fa>>2]+ +g[ib>>2];g[kb>>2]=+g[Aa>>2]+ +g[jb>>2];g[lb>>2]=+g[Q>>2]+ +g[kb>>2];g[ag>>2]=+g[cc>>2]+ +g[dc>>2];g[Wb>>2]=+g[Ub>>2]*.9510565400123596+ +g[Vb>>2]*.5877852439880371;g[We>>2]=+g[Vb>>2]*.9510565400123596-+g[Ub>>2]*.5877852439880371;g[bc>>2]=(+g[Zb>>2]-+g[ac>>2])*.55901700258255;g[ec>>2]=+g[cc>>2]-+g[dc>>2]*.25;g[fc>>2]=+g[bc>>2]+ +g[ec>>2];g[Ve>>2]=+g[ec>>2]-+g[bc>>2];g[gc>>2]=+g[Wb>>2]+ +g[fc>>2];g[De>>2]=+g[We>>2]+ +g[Ve>>2];g[id>>2]=+g[fc>>2]-+g[Wb>>2];g[Xe>>2]=+g[Ve>>2]-+g[We>>2];g[mc>>2]=+g[kc>>2]*.9510565400123596+ +g[lc>>2]*.5877852439880371;g[Te>>2]=+g[lc>>2]*.9510565400123596-+g[kc>>2]*.5877852439880371;g[hc>>2]=(+g[Aa>>2]-+g[jb>>2])*.55901700258255;g[ic>>2]=+g[Q>>2]-+g[kb>>2]*.25;g[jc>>2]=+g[hc>>2]+ +g[ic>>2];g[Se>>2]=+g[ic>>2]-+g[hc>>2];g[nc>>2]=+g[jc>>2]-+g[mc>>2];g[Ce>>2]=+g[Se>>2]-+g[Te>>2];g[hd>>2]=+g[jc>>2]+ +g[mc>>2];g[Ue>>2]=+g[Se>>2]+ +g[Te>>2];g[$f>>2]=+g[Zf>>2]-+g[_f>>2];g[cg>>2]=+g[ag>>2]-+g[bg>>2];g[dg>>2]=+g[$f>>2]*.9510565400123596+ +g[cg>>2]*.5877852439880371;g[fg>>2]=+g[cg>>2]*.9510565400123596-+g[$f>>2]*.5877852439880371;g[mh>>2]=+g[q>>2]+ +g[lh>>2];g[N>>2]=+g[na>>2]+ +g[M>>2];g[Na>>2]=+g[lb>>2]+ +g[Ma>>2];g[Oa>>2]=+g[N>>2]+ +g[Na>>2];g[Wf>>2]=(+g[N>>2]-+g[Na>>2])*.55901700258255;g[Xf>>2]=+g[mh>>2]-+g[Oa>>2]*.25;g[c[k>>2]>>2]=+g[mh>>2]+ +g[Oa>>2];g[eg>>2]=+g[Xf>>2]-+g[Wf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[eg>>2]-+g[fg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[eg>>2]+ +g[fg>>2];g[Yf>>2]=+g[Wf>>2]+ +g[Xf>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Yf>>2]-+g[dg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Yf>>2]+ +g[dg>>2];g[Ra>>2]=+g[Pa>>2]+ +g[Qa>>2];g[Za>>2]=+g[Ra>>2]-+g[Ya>>2];g[Zd>>2]=+g[Ra>>2]+ +g[Ya>>2];g[sg>>2]=+g[tf>>2]+ +g[qg>>2];g[ug>>2]=+g[sg>>2]-+g[tg>>2];g[Gg>>2]=+g[tg>>2]+ +g[sg>>2];g[tc>>2]=+g[Jb>>2]*.5358268022537231+ +g[sc>>2]*.8443279266357422;g[Qc>>2]=+g[Ic>>2]*.7705132365226746-+g[Pc>>2]*.6374239921569824;g[Tb>>2]=+g[tc>>2]+ +g[Qc>>2];g[oc>>2]=+g[gc>>2]*.9048270583152771-+g[nc>>2]*.4257792830467224;g[Kd>>2]=+g[Cd>>2]*.12533323466777802-+g[Jd>>2]*.9921147227287292;g[Ld>>2]=+g[oc>>2]+ +g[Kd>>2];g[Md>>2]=+g[Tb>>2]+ +g[Ld>>2];g[Lg>>2]=+g[oc>>2]-+g[Kd>>2];g[Nd>>2]=(+g[Tb>>2]-+g[Ld>>2])*.55901700258255;g[Kg>>2]=+g[Qc>>2]-+g[tc>>2];g[sd>>2]=+g[bd>>2]*.9685831665992737-+g[ad>>2]*.24868988990783691;g[td>>2]=+g[ed>>2]*.5358268022537231-+g[dd>>2]*.8443279266357422;g[vg>>2]=+g[sd>>2]+ +g[td>>2];g[vd>>2]=+g[id>>2]*.8763066530227661-+g[hd>>2]*.4817536771297455;g[wd>>2]=+g[ld>>2]*.728968620300293-+g[kd>>2]*.6845471262931824;g[wg>>2]=+g[vd>>2]+ +g[wd>>2];g[ud>>2]=+g[sd>>2]-+g[td>>2];g[zg>>2]=(+g[vg>>2]-+g[wg>>2])*.55901700258255;g[xd>>2]=+g[vd>>2]-+g[wd>>2];g[xg>>2]=+g[vg>>2]+ +g[wg>>2];g[cd>>2]=+g[ad>>2]*.9685831665992737+ +g[bd>>2]*.24868988990783691;g[fd>>2]=+g[dd>>2]*.5358268022537231+ +g[ed>>2]*.8443279266357422;g[gd>>2]=+g[cd>>2]+ +g[fd>>2];g[jd>>2]=+g[hd>>2]*.8763066530227661+ +g[id>>2]*.4817536771297455;g[md>>2]=+g[kd>>2]*.728968620300293+ +g[ld>>2]*.6845471262931824;g[nd>>2]=+g[jd>>2]+ +g[md>>2];g[od>>2]=+g[gd>>2]+ +g[nd>>2];g[Rf>>2]=+g[jd>>2]-+g[md>>2];g[pd>>2]=(+g[gd>>2]-+g[nd>>2])*.55901700258255;g[Qf>>2]=+g[fd>>2]-+g[cd>>2];g[Qd>>2]=+g[sc>>2]*.5358268022537231-+g[Jb>>2]*.8443279266357422;g[Rd>>2]=+g[Pc>>2]*.7705132365226746+ +g[Ic>>2]*.6374239921569824;g[Dg>>2]=+g[Qd>>2]+ +g[Rd>>2];g[Td>>2]=+g[Jd>>2]*.12533323466777802+ +g[Cd>>2]*.9921147227287292;g[Ud>>2]=+g[nc>>2]*.9048270583152771+ +g[gc>>2]*.4257792830467224;g[Eg>>2]=+g[Ud>>2]+ +g[Td>>2];g[Sd>>2]=+g[Qd>>2]-+g[Rd>>2];g[Hg>>2]=+g[Dg>>2]-+g[Eg>>2];g[Vd>>2]=+g[Td>>2]-+g[Ud>>2];g[Fg>>2]=(+g[Dg>>2]+ +g[Eg>>2])*.55901700258255;g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Za>>2]+ +g[Md>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[xg>>2]+ +g[ug>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[Hg>>2]+ +g[Gg>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[Zd>>2]+ +g[od>>2];g[Wd>>2]=+g[Sd>>2]*.9510565400123596+ +g[Vd>>2]*.5877852439880371;g[Yd>>2]=+g[Vd>>2]*.9510565400123596-+g[Sd>>2]*.5877852439880371;g[Od>>2]=+g[Za>>2]-+g[Md>>2]*.25;g[Pd>>2]=+g[Nd>>2]+ +g[Od>>2];g[Xd>>2]=+g[Od>>2]-+g[Nd>>2];g[c[l>>2]>>2]=+g[Pd>>2]-+g[Wd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Xd>>2]+ +g[Yd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Pd>>2]+ +g[Wd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Xd>>2]-+g[Yd>>2];g[Sf>>2]=+g[Qf>>2]*.5877852439880371+ +g[Rf>>2]*.9510565400123596;g[Bg>>2]=+g[Qf>>2]*.9510565400123596-+g[Rf>>2]*.5877852439880371;g[yg>>2]=+g[ug>>2]-+g[xg>>2]*.25;g[Ag>>2]=+g[yg>>2]-+g[zg>>2];g[Cg>>2]=+g[zg>>2]+ +g[yg>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Sf>>2]-+g[Ag>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[Bg>>2]+ +g[Cg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Sf>>2]+ +g[Ag>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[Bg>>2]-+g[Cg>>2];g[Mg>>2]=+g[Kg>>2]*.5877852439880371+ +g[Lg>>2]*.9510565400123596;g[Ng>>2]=+g[Kg>>2]*.9510565400123596-+g[Lg>>2]*.5877852439880371;g[Ig>>2]=+g[Gg>>2]-+g[Hg>>2]*.25;g[Jg>>2]=+g[Fg>>2]-+g[Ig>>2];g[Og>>2]=+g[Fg>>2]+ +g[Ig>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Jg>>2]-+g[Mg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Ng>>2]+ +g[Og>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Mg>>2]+ +g[Jg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Ng>>2]-+g[Og>>2];g[yd>>2]=+g[ud>>2]*.9510565400123596+ +g[xd>>2]*.5877852439880371;g[$d>>2]=+g[xd>>2]*.9510565400123596-+g[ud>>2]*.5877852439880371;g[qd>>2]=+g[Zd>>2]-+g[od>>2]*.25;g[rd>>2]=+g[pd>>2]+ +g[qd>>2];g[zd>>2]=+g[qd>>2]-+g[pd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[rd>>2]-+g[yd>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[zd>>2]+ +g[$d>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[rd>>2]+ +g[yd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[zd>>2]-+g[$d>>2];g[Pg>>2]=+g[M>>2]-+g[na>>2];g[Qg>>2]=+g[lb>>2]-+g[Ma>>2];g[Rg>>2]=+g[Pg>>2]*.5877852439880371+ +g[Qg>>2]*.9510565400123596;g[Zg>>2]=+g[Pg>>2]*.9510565400123596-+g[Qg>>2]*.5877852439880371;g[Sg>>2]=+g[pg>>2]+ +g[mg>>2];g[Tg>>2]=+g[Zf>>2]+ +g[_f>>2];g[Ug>>2]=+g[ag>>2]+ +g[bg>>2];g[Vg>>2]=+g[Tg>>2]+ +g[Ug>>2];g[Wg>>2]=+g[Sg>>2]-+g[Vg>>2]*.25;g[Xg>>2]=(+g[Tg>>2]-+g[Ug>>2])*.55901700258255;g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Vg>>2]+ +g[Sg>>2];g[_g>>2]=+g[Xg>>2]+ +g[Wg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[Zg>>2]-+g[_g>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Zg>>2]+ +g[_g>>2];g[Yg>>2]=+g[Wg>>2]-+g[Xg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Rg>>2]-+g[Yg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Rg>>2]+ +g[Yg>>2];g[ae>>2]=+g[Qa>>2]-+g[Pa>>2];g[ce>>2]=+g[ae>>2]+ +g[be>>2];g[ue>>2]=+g[ae>>2]-+g[be>>2];g[uf>>2]=+g[qg>>2]-+g[tf>>2];g[vf>>2]=+g[lg>>2]+ +g[uf>>2];g[Hf>>2]=+g[uf>>2]-+g[lg>>2];g[Je>>2]=+g[fe>>2]*.728968620300293+ +g[ie>>2]*.6845471262931824;g[Qe>>2]=+g[Me>>2]*.12533323466777802-+g[Pe>>2]*.9921147227287292;g[Re>>2]=+g[Je>>2]+ +g[Qe>>2];g[Ye>>2]=+g[Ue>>2]*.06279052048921585+ +g[Xe>>2]*.9980267286300659;g[df>>2]=+g[$e>>2]*.7705132365226746-+g[cf>>2]*.6374239921569824;g[ef>>2]=+g[Ye>>2]+ +g[df>>2];g[ff>>2]=+g[Re>>2]+ +g[ef>>2];g[Mf>>2]=+g[Ye>>2]-+g[df>>2];g[gf>>2]=(+g[Re>>2]-+g[ef>>2])*.55901700258255;g[Lf>>2]=+g[Qe>>2]-+g[Je>>2];g[nf>>2]=+g[we>>2]*.8763066530227661-+g[ve>>2]*.4817536771297455;g[of>>2]=+g[ze>>2]*.9048270583152771+ +g[ye>>2]*.4257792830467224;g[wf>>2]=+g[nf>>2]-+g[of>>2];g[qf>>2]=+g[De>>2]*.5358268022537231-+g[Ce>>2]*.8443279266357422;g[rf>>2]=+g[Ge>>2]*.06279052048921585-+g[Fe>>2]*.9980267286300659;g[xf>>2]=+g[qf>>2]+ +g[rf>>2];g[pf>>2]=+g[nf>>2]+ +g[of>>2];g[Af>>2]=(+g[wf>>2]-+g[xf>>2])*.55901700258255;g[sf>>2]=+g[qf>>2]-+g[rf>>2];g[yf>>2]=+g[wf>>2]+ +g[xf>>2];g[xe>>2]=+g[ve>>2]*.8763066530227661+ +g[we>>2]*.4817536771297455;g[Ae>>2]=+g[ye>>2]*.9048270583152771-+g[ze>>2]*.4257792830467224;g[Be>>2]=+g[xe>>2]+ +g[Ae>>2];g[Ee>>2]=+g[Ce>>2]*.5358268022537231+ +g[De>>2]*.8443279266357422;g[He>>2]=+g[Fe>>2]*.06279052048921585+ +g[Ge>>2]*.9980267286300659;g[Ie>>2]=+g[Ee>>2]+ +g[He>>2];g[jf>>2]=+g[Be>>2]+ +g[Ie>>2];g[hg>>2]=+g[Ee>>2]-+g[He>>2];g[kf>>2]=(+g[Be>>2]-+g[Ie>>2])*.55901700258255;g[gg>>2]=+g[Ae>>2]-+g[xe>>2];g[le>>2]=+g[ie>>2]*.728968620300293-+g[fe>>2]*.6845471262931824;g[me>>2]=+g[Pe>>2]*.12533323466777802+ +g[Me>>2]*.9921147227287292;g[Ef>>2]=+g[le>>2]-+g[me>>2];g[oe>>2]=+g[Xe>>2]*.06279052048921585-+g[Ue>>2]*.9980267286300659;g[pe>>2]=+g[cf>>2]*.7705132365226746+ +g[$e>>2]*.6374239921569824;g[Ff>>2]=+g[oe>>2]-+g[pe>>2];g[ne>>2]=+g[le>>2]+ +g[me>>2];g[If>>2]=+g[Ef>>2]+ +g[Ff>>2];g[qe>>2]=+g[oe>>2]+ +g[pe>>2];g[Gf>>2]=(+g[Ef>>2]-+g[Ff>>2])*.55901700258255;g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ce>>2]+ +g[ff>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[yf>>2]+ +g[vf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[If>>2]+ +g[Hf>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ue>>2]+ +g[jf>>2];g[ig>>2]=+g[gg>>2]*.5877852439880371+ +g[hg>>2]*.9510565400123596;g[Cf>>2]=+g[gg>>2]*.9510565400123596-+g[hg>>2]*.5877852439880371;g[zf>>2]=+g[vf>>2]-+g[yf>>2]*.25;g[Bf>>2]=+g[zf>>2]-+g[Af>>2];g[Df>>2]=+g[Af>>2]+ +g[zf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[ig>>2]-+g[Bf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[Cf>>2]+ +g[Df>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[ig>>2]+ +g[Bf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[Cf>>2]-+g[Df>>2];g[Tf>>2]=+g[pf>>2]*.9510565400123596+ +g[sf>>2]*.5877852439880371;g[Vf>>2]=+g[sf>>2]*.9510565400123596-+g[pf>>2]*.5877852439880371;g[lf>>2]=+g[ue>>2]-+g[jf>>2]*.25;g[mf>>2]=+g[kf>>2]+ +g[lf>>2];g[Uf>>2]=+g[lf>>2]-+g[kf>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[mf>>2]-+g[Tf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Uf>>2]+ +g[Vf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[mf>>2]+ +g[Tf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Uf>>2]-+g[Vf>>2];g[re>>2]=+g[ne>>2]*.9510565400123596+ +g[qe>>2]*.5877852439880371;g[te>>2]=+g[qe>>2]*.9510565400123596-+g[ne>>2]*.5877852439880371;g[je>>2]=+g[ce>>2]-+g[ff>>2]*.25;g[ke>>2]=+g[gf>>2]+ +g[je>>2];g[se>>2]=+g[je>>2]-+g[gf>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[ke>>2]-+g[re>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[se>>2]+ +g[te>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[ke>>2]+ +g[re>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[se>>2]-+g[te>>2];g[Nf>>2]=+g[Lf>>2]*.5877852439880371+ +g[Mf>>2]*.9510565400123596;g[Of>>2]=+g[Lf>>2]*.9510565400123596-+g[Mf>>2]*.5877852439880371;g[Jf>>2]=+g[Hf>>2]-+g[If>>2]*.25;g[Kf>>2]=+g[Gf>>2]-+g[Jf>>2];g[Pf>>2]=+g[Gf>>2]+ +g[Jf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Kf>>2]-+g[Nf>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Of>>2]+ +g[Pf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[Nf>>2]+ +g[Kf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[Of>>2]-+g[Pf>>2];c[$h>>2]=(c[$h>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+32;c[n>>2]=c[n>>2]^c[2998]}i=ai;return}function tr(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,29,4696);i=b;return} +function st(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0;K=i;i=i+128|0;n=K+116|0;o=K+112|0;p=K+108|0;q=K+104|0;r=K+100|0;s=K+96|0;t=K+92|0;L=K+88|0;u=K+84|0;v=K+80|0;J=K+52|0;w=K+48|0;F=K+44|0;G=K+40|0;z=K+36|0;I=K+32|0;C=K+28|0;H=K+24|0;D=K+20|0;E=K+16|0;x=K+12|0;y=K+8|0;A=K+4|0;B=K;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[L>>2]=k;c[u>>2]=l;c[v>>2]=m;g[K+76>>2]=.22252093255519867;g[K+72>>2]=.9009688496589661;g[K+68>>2]=.6234897971153259;g[K+64>>2]=.4338837265968323;g[K+60>>2]=.7818315029144287;g[K+56>>2]=.9749279022216797;c[J>>2]=c[L>>2];while(1){if((c[J>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[D>>2]=+g[c[o>>2]>>2];g[E>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[F>>2]=+g[D>>2]+ +g[E>>2];g[G>>2]=+g[E>>2]-+g[D>>2];g[x>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[y>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[I>>2]=+g[y>>2]-+g[x>>2];g[A>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[B>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[C>>2]=+g[A>>2]+ +g[B>>2];g[H>>2]=+g[B>>2]-+g[A>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[G>>2]*.9749279022216797-+g[H>>2]*.7818315029144287-+g[I>>2]*.4338837265968323;g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[G>>2]*.7818315029144287+ +g[I>>2]*.9749279022216797+ +g[H>>2]*.4338837265968323;g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[C>>2]*.6234897971153259+ +g[w>>2]+-(+g[z>>2]*.9009688496589661+ +g[F>>2]*.22252093255519867);g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[G>>2]*.4338837265968323+ +g[H>>2]*.9749279022216797-+g[I>>2]*.7818315029144287;g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[z>>2]*.6234897971153259+ +g[w>>2]+-(+g[C>>2]*.22252093255519867+ +g[F>>2]*.9009688496589661);g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[F>>2]*.6234897971153259+ +g[w>>2]+-(+g[C>>2]*.9009688496589661+ +g[z>>2]*.22252093255519867);g[c[p>>2]>>2]=+g[w>>2]+ +g[F>>2]+ +g[z>>2]+ +g[C>>2];c[J>>2]=(c[J>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=K;return}function tt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,35,7288);i=b;return}function ut(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0;R=i;i=i+128|0;n=R+124|0;o=R+120|0;p=R+116|0;q=R+112|0;r=R+108|0;s=R+104|0;t=R+100|0;S=R+96|0;u=R+92|0;v=R+88|0;Q=R+80|0;y=R+76|0;C=R+72|0;I=R+68|0;O=R+64|0;B=R+60|0;L=R+56|0;F=R+52|0;N=R+48|0;w=R+44|0;x=R+40|0;G=R+36|0;H=R+32|0;z=R+28|0;A=R+24|0;D=R+20|0;E=R+16|0;J=R+12|0;K=R+8|0;M=R+4|0;P=R;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[S>>2]=k;c[u>>2]=l;c[v>>2]=m;g[R+84>>2]=.7071067690849304;c[Q>>2]=c[S>>2];while(1){if((c[Q>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[x>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[y>>2]=+g[w>>2]+ +g[x>>2];g[C>>2]=+g[w>>2]-+g[x>>2];g[G>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[H>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[I>>2]=+g[G>>2]-+g[H>>2];g[O>>2]=+g[G>>2]+ +g[H>>2];g[z>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[A>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[L>>2]=+g[z>>2]-+g[A>>2];g[D>>2]=+g[c[o>>2]>>2];g[E>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[F>>2]=+g[D>>2]-+g[E>>2];g[N>>2]=+g[D>>2]+ +g[E>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[y>>2]-+g[B>>2];g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[O>>2]-+g[N>>2];g[J>>2]=(+g[F>>2]+ +g[I>>2])*.7071067690849304;g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[C>>2]-+g[J>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[C>>2]+ +g[J>>2];g[K>>2]=(+g[I>>2]-+g[F>>2])*.7071067690849304;g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[K>>2]-+g[L>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=+g[L>>2]+ +g[K>>2];g[M>>2]=+g[y>>2]+ +g[B>>2];g[P>>2]=+g[N>>2]+ +g[O>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=+g[M>>2]-+g[P>>2];g[c[p>>2]>>2]=+g[M>>2]+ +g[P>>2];c[Q>>2]=(c[Q>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=R;return}function vt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,36,7336);i=b;return}function wt(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0;_=i;i=i+224|0;n=_+212|0;o=_+208|0;p=_+204|0;q=_+200|0;r=_+196|0;s=_+192|0;t=_+188|0;$=_+184|0;u=_+180|0;v=_+176|0;Z=_+116|0;w=_+112|0;z=_+108|0;W=_+104|0;F=_+100|0;Q=_+96|0;N=_+92|0;K=_+88|0;P=_+84|0;O=_+80|0;x=_+76|0;y=_+72|0;A=_+68|0;L=_+64|0;B=_+60|0;C=_+56|0;D=_+52|0;E=_+48|0;G=_+44|0;H=_+40|0;I=_+36|0;J=_+32|0;Y=_+28|0;M=_+24|0;R=_+20|0;S=_+16|0;T=_+12|0;U=_+8|0;V=_+4|0;X=_;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[$>>2]=k;c[u>>2]=l;c[v>>2]=m;g[_+172>>2]=.9396926164627075;g[_+168>>2]=.29619812965393066;g[_+164>>2]=.3420201539993286;g[_+160>>2]=.813797652721405;g[_+156>>2]=.9848077297210693;g[_+152>>2]=.15038372576236725;g[_+148>>2]=.6427876353263855;g[_+144>>2]=.663413941860199;g[_+140>>2]=.8528685569763184;g[_+136>>2]=.1736481785774231;g[_+132>>2]=.5566704273223877;g[_+128>>2]=.7660444378852844;g[_+124>>2]=.8660253882408142;g[_+120>>2]=.5;c[Z>>2]=c[$>>2];while(1){if((c[Z>>2]|0)<=0)break;g[w>>2]=+g[c[n>>2]>>2];g[x>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[y>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[W>>2]=+g[y>>2]-+g[x>>2];g[B>>2]=+g[c[o>>2]>>2];g[C>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[D>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[E>>2]=+g[C>>2]+ +g[D>>2];g[F>>2]=+g[B>>2]+ +g[E>>2];g[Q>>2]=+g[D>>2]-+g[C>>2];g[N>>2]=+g[B>>2]-+g[E>>2]*.5;g[G>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[H>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[I>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[J>>2]=+g[H>>2]+ +g[I>>2];g[K>>2]=+g[G>>2]+ +g[J>>2];g[P>>2]=+g[G>>2]-+g[J>>2]*.5;g[O>>2]=+g[I>>2]-+g[H>>2];g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2]=(+g[K>>2]-+g[F>>2])*.8660253882408142;g[A>>2]=+g[w>>2]+ +g[z>>2];g[L>>2]=+g[F>>2]+ +g[K>>2];g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2]=+g[A>>2]-+g[L>>2]*.5;g[c[p>>2]>>2]=+g[A>>2]+ +g[L>>2];g[Y>>2]=+g[W>>2]*.8660253882408142;g[M>>2]=+g[w>>2]-+g[z>>2]*.5;g[R>>2]=+g[N>>2]*.7660444378852844+ +g[Q>>2]*.5566704273223877;g[S>>2]=+g[P>>2]*.1736481785774231+ +g[O>>2]*.8528685569763184;g[T>>2]=+g[R>>2]+ +g[S>>2];g[U>>2]=+g[Q>>2]*.663413941860199-+g[N>>2]*.6427876353263855;g[V>>2]=+g[O>>2]*.15038372576236725-+g[P>>2]*.9848077297210693;g[X>>2]=+g[U>>2]+ +g[V>>2];g[(c[p>>2]|0)+(c[s>>2]<<2)>>2]=+g[M>>2]+ +g[T>>2];g[(c[q>>2]|0)+(c[t>>2]<<2)>>2]=+g[Y>>2]+ +g[X>>2];g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2]=(+g[U>>2]-+g[V>>2])*.8660253882408142+ +g[M>>2]-+g[T>>2]*.5;g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2]=(+g[W>>2]+(+g[S>>2]-+g[R>>2]))*.8660253882408142-+g[X>>2]*.5;g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2]=+g[O>>2]*.813797652721405-+g[P>>2]*.3420201539993286+-(+g[Q>>2]*.15038372576236725+ +g[N>>2]*.9848077297210693)-+g[Y>>2];g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2]=+g[N>>2]*.1736481785774231+ +g[M>>2]+-(+g[O>>2]*.29619812965393066+ +g[P>>2]*.9396926164627075)-+g[Q>>2]*.8528685569763184;c[Z>>2]=(c[Z>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[v>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[v>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=_;return}function xt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,49,7384);i=b;return}function yt(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0;zd=i;i=i+944|0;k=zd+936|0;l=zd+932|0;m=zd+928|0;n=zd+924|0;Ad=zd+920|0;o=zd+916|0;p=zd+912|0;yd=zd+896|0;td=zd+892|0;wd=zd+888|0;ka=zd+884|0;ma=zd+880|0;oa=zd+876|0;sa=zd+872|0;Ha=zd+868|0;Fa=zd+864|0;xd=zd+860|0;ud=zd+856|0;Bc=zd+852|0;Ba=zd+848|0;Q=zd+844|0;I=zd+840|0;Y=zd+836|0;y=zd+832|0;M=zd+828|0;O=zd+824|0;ua=zd+820|0;va=zd+816|0;wa=zd+812|0;C=zd+808|0;Pa=zd+804|0;bb=zd+800|0;jb=zd+796|0;$a=zd+792|0;dc=zd+788|0;rc=zd+784|0;Ob=zd+780|0;nc=zd+776|0;Ac=zd+772|0;K=zd+768|0;w=zd+764|0;H=zd+760|0;vd=zd+756|0;L=zd+752|0;x=zd+748|0;G=zd+744|0;la=zd+740|0;ra=zd+736|0;na=zd+732|0;qa=zd+728|0;Na=zd+724|0;Oa=zd+720|0;Ja=zd+716|0;ib=zd+712|0;bc=zd+708|0;cc=zd+704|0;Mb=zd+700|0;Nb=zd+696|0;xc=zd+692|0;Pb=zd+688|0;ec=zd+684|0;Cc=zd+680|0;ea=zd+676|0;mb=zd+672|0;Sa=zd+668|0;R=zd+664|0;cd=zd+660|0;Jc=zd+656|0;S=zd+652|0;z=zd+648|0;Va=zd+644|0;Qb=zd+640|0;tb=zd+636|0;fc=zd+632|0;kd=zd+628|0;U=zd+624|0;Tc=zd+620|0;ga=zd+616|0;Ka=zd+612|0;Xa=zd+608|0;Xb=zd+604|0;hc=zd+600|0;rd=zd+596|0;V=zd+592|0;t=zd+588|0;ha=zd+584|0;Bb=zd+580|0;Ya=zd+576|0;_b=zd+572|0;ic=zd+568|0;Ib=zd+564|0;kb=zd+560|0;da=zd+556|0;lb=zd+552|0;wc=zd+548|0;Qa=zd+544|0;aa=zd+540|0;Ra=zd+536|0;q=zd+532|0;za=zd+528|0;ba=zd+524|0;ca=zd+520|0;uc=zd+516|0;vc=zd+512|0;A=zd+508|0;$=zd+504|0;_c=zd+500|0;nb=zd+496|0;Ic=zd+492|0;ob=zd+488|0;bd=zd+484|0;qb=zd+480|0;Fc=zd+476|0;rb=zd+472|0;yc=zd+468|0;zc=zd+464|0;Gc=zd+460|0;Hc=zd+456|0;$c=zd+452|0;ad=zd+448|0;Dc=zd+444|0;Ec=zd+440|0;Ta=zd+436|0;Ua=zd+432|0;pb=zd+428|0;sb=zd+424|0;gd=zd+420|0;Fb=zd+416|0;Rc=zd+412|0;Gb=zd+408|0;jd=zd+404|0;Cb=zd+400|0;Oc=zd+396|0;Db=zd+392|0;Lc=zd+388|0;Sc=zd+384|0;ed=zd+380|0;fd=zd+376|0;Pc=zd+372|0;Qc=zd+368|0;hd=zd+364|0;id=zd+360|0;Mc=zd+356|0;Nc=zd+352|0;Eb=zd+348|0;Hb=zd+344|0;Sb=zd+340|0;Wb=zd+336|0;nd=zd+332|0;yb=zd+328|0;r=zd+324|0;zb=zd+320|0;qd=zd+316|0;vb=zd+312|0;Xc=zd+308|0;wb=zd+304|0;Uc=zd+300|0;s=zd+296|0;ld=zd+292|0;md=zd+288|0;Yc=zd+284|0;Zc=zd+280|0;od=zd+276|0;pd=zd+272|0;Vc=zd+268|0;Wc=zd+264|0;xb=zd+260|0;Ab=zd+256|0;Yb=zd+252|0;Zb=zd+248|0;dd=zd+244|0;sd=zd+240|0;P=zd+236|0;T=zd+232|0;W=zd+228|0;X=zd+224|0;ac=zd+220|0;lc=zd+216|0;kc=zd+212|0;mc=zd+208|0;Rb=zd+204|0;$b=zd+200|0;gc=zd+196|0;jc=zd+192|0;qc=zd+188|0;Ub=zd+184|0;Tb=zd+180|0;Vb=zd+176|0;oc=zd+172|0;pc=zd+168|0;sc=zd+164|0;tc=zd+160|0;v=zd+156|0;pa=zd+152|0;ja=zd+148|0;ta=zd+144|0;Kc=zd+140|0;u=zd+136|0;fa=zd+132|0;ia=zd+128|0;B=zd+124|0;J=zd+120|0;F=zd+116|0;N=zd+112|0;xa=zd+108|0;ya=zd+104|0;D=zd+100|0;E=zd+96|0;Ma=zd+92|0;ab=zd+88|0;_a=zd+84|0;cb=zd+80|0;ub=zd+76|0;La=zd+72|0;Wa=zd+68|0;Za=zd+64|0;fb=zd+60|0;Kb=zd+56|0;Jb=zd+52|0;Lb=zd+48|0;db=zd+44|0;eb=zd+40|0;gb=zd+36|0;hb=zd+32|0;Aa=zd+28|0;Ga=zd+24|0;Ea=zd+20|0;Ia=zd+16|0;Z=zd+12|0;_=zd+8|0;Ca=zd+4|0;Da=zd;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Ad>>2]=f;c[o>>2]=h;c[p>>2]=j;g[zd+908>>2]=.3826834261417389;g[zd+904>>2]=.9238795042037964;g[zd+900>>2]=.7071067690849304;c[yd>>2]=c[Ad>>2];c[m>>2]=(c[m>>2]|0)+((c[Ad>>2]|0)-1<<3<<2);while(1){if((c[yd>>2]|0)>=(c[o>>2]|0))break;g[td>>2]=+g[c[m>>2]>>2];g[wd>>2]=+g[(c[m>>2]|0)+4>>2];g[ka>>2]=+g[(c[m>>2]|0)+8>>2];g[ma>>2]=+g[(c[m>>2]|0)+12>>2];g[la>>2]=+g[td>>2]*+g[ka>>2];g[ra>>2]=+g[wd>>2]*+g[ka>>2];g[na>>2]=+g[wd>>2]*+g[ma>>2];g[qa>>2]=+g[td>>2]*+g[ma>>2];g[oa>>2]=+g[la>>2]+ +g[na>>2];g[sa>>2]=+g[qa>>2]-+g[ra>>2];g[Ha>>2]=+g[qa>>2]+ +g[ra>>2];g[Fa>>2]=+g[la>>2]-+g[na>>2];g[xd>>2]=+g[(c[m>>2]|0)+20>>2];g[Ac>>2]=+g[wd>>2]*+g[xd>>2];g[K>>2]=+g[ka>>2]*+g[xd>>2];g[w>>2]=+g[td>>2]*+g[xd>>2];g[H>>2]=+g[ma>>2]*+g[xd>>2];g[ud>>2]=+g[(c[m>>2]|0)+16>>2];g[vd>>2]=+g[td>>2]*+g[ud>>2];g[L>>2]=+g[ma>>2]*+g[ud>>2];g[x>>2]=+g[wd>>2]*+g[ud>>2];g[G>>2]=+g[ka>>2]*+g[ud>>2];g[Bc>>2]=+g[vd>>2]-+g[Ac>>2];g[Ba>>2]=+g[K>>2]+ +g[L>>2];g[Q>>2]=+g[w>>2]-+g[x>>2];g[I>>2]=+g[G>>2]+ +g[H>>2];g[Y>>2]=+g[G>>2]-+g[H>>2];g[y>>2]=+g[w>>2]+ +g[x>>2];g[M>>2]=+g[K>>2]-+g[L>>2];g[O>>2]=+g[vd>>2]+ +g[Ac>>2];g[ua>>2]=+g[(c[m>>2]|0)+24>>2];g[va>>2]=+g[(c[m>>2]|0)+28>>2];g[wa>>2]=+g[td>>2]*+g[ua>>2]+ +g[wd>>2]*+g[va>>2];g[C>>2]=+g[td>>2]*+g[va>>2]-+g[wd>>2]*+g[ua>>2];g[Na>>2]=+g[Fa>>2]*+g[xd>>2];g[Oa>>2]=+g[Ha>>2]*+g[ud>>2];g[Pa>>2]=+g[Na>>2]+ +g[Oa>>2];g[bb>>2]=+g[Na>>2]-+g[Oa>>2];g[Ja>>2]=+g[Fa>>2]*+g[ud>>2];g[ib>>2]=+g[Ha>>2]*+g[xd>>2];g[jb>>2]=+g[Ja>>2]-+g[ib>>2];g[$a>>2]=+g[Ja>>2]+ +g[ib>>2];g[bc>>2]=+g[oa>>2]*+g[xd>>2];g[cc>>2]=+g[sa>>2]*+g[ud>>2];g[dc>>2]=+g[bc>>2]+ +g[cc>>2];g[rc>>2]=+g[bc>>2]-+g[cc>>2];g[Mb>>2]=+g[oa>>2]*+g[ud>>2];g[Nb>>2]=+g[sa>>2]*+g[xd>>2];g[Ob>>2]=+g[Mb>>2]-+g[Nb>>2];g[nc>>2]=+g[Mb>>2]+ +g[Nb>>2];g[q>>2]=+g[c[k>>2]>>2];g[za>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Ib>>2]=+g[q>>2]+ +g[za>>2];g[kb>>2]=+g[q>>2]-+g[za>>2];g[ba>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[ca>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[da>>2]=+g[ba>>2]-+g[ca>>2];g[lb>>2]=+g[ba>>2]+ +g[ca>>2];g[uc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[vc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[wc>>2]=+g[uc>>2]+ +g[vc>>2];g[Qa>>2]=+g[uc>>2]-+g[vc>>2];g[A>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[$>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[aa>>2]=+g[A>>2]-+g[$>>2];g[Ra>>2]=+g[A>>2]+ +g[$>>2];g[xc>>2]=+g[Ib>>2]+ +g[wc>>2];g[Pb>>2]=+g[kb>>2]+ +g[lb>>2];g[ec>>2]=+g[Ra>>2]-+g[Qa>>2];g[Cc>>2]=+g[Ib>>2]-+g[wc>>2];g[ea>>2]=+g[aa>>2]-+g[da>>2];g[mb>>2]=+g[kb>>2]-+g[lb>>2];g[Sa>>2]=+g[Qa>>2]+ +g[Ra>>2];g[R>>2]=+g[aa>>2]+ +g[da>>2];g[yc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[zc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[_c>>2]=+g[yc>>2]+ +g[zc>>2];g[nb>>2]=+g[yc>>2]-+g[zc>>2];g[Gc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Hc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Ic>>2]=+g[Gc>>2]-+g[Hc>>2];g[ob>>2]=+g[Gc>>2]+ +g[Hc>>2];g[$c>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[ad>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[bd>>2]=+g[$c>>2]+ +g[ad>>2];g[qb>>2]=+g[$c>>2]-+g[ad>>2];g[Dc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Ec>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[Fc>>2]=+g[Dc>>2]-+g[Ec>>2];g[rb>>2]=+g[Dc>>2]+ +g[Ec>>2];g[cd>>2]=+g[_c>>2]+ +g[bd>>2];g[Jc>>2]=+g[Fc>>2]-+g[Ic>>2];g[S>>2]=+g[Ic>>2]+ +g[Fc>>2];g[z>>2]=+g[_c>>2]-+g[bd>>2];g[Ta>>2]=+g[nb>>2]+ +g[ob>>2];g[Ua>>2]=+g[qb>>2]+ +g[rb>>2];g[Va>>2]=(+g[Ta>>2]-+g[Ua>>2])*.7071067690849304;g[Qb>>2]=(+g[Ta>>2]+ +g[Ua>>2])*.7071067690849304;g[pb>>2]=+g[nb>>2]-+g[ob>>2];g[sb>>2]=+g[qb>>2]-+g[rb>>2];g[tb>>2]=(+g[pb>>2]+ +g[sb>>2])*.7071067690849304;g[fc>>2]=(+g[pb>>2]-+g[sb>>2])*.7071067690849304;g[ed>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[fd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[gd>>2]=+g[ed>>2]+ +g[fd>>2];g[Fb>>2]=+g[ed>>2]-+g[fd>>2];g[Pc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Qc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Rc>>2]=+g[Pc>>2]-+g[Qc>>2];g[Gb>>2]=+g[Pc>>2]+ +g[Qc>>2];g[hd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[id>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[jd>>2]=+g[hd>>2]+ +g[id>>2];g[Cb>>2]=+g[hd>>2]-+g[id>>2];g[Mc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[Nc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Oc>>2]=+g[Mc>>2]-+g[Nc>>2];g[Db>>2]=+g[Mc>>2]+ +g[Nc>>2];g[kd>>2]=+g[gd>>2]+ +g[jd>>2];g[U>>2]=+g[Oc>>2]+ +g[Rc>>2];g[Lc>>2]=+g[gd>>2]-+g[jd>>2];g[Sc>>2]=+g[Oc>>2]-+g[Rc>>2];g[Tc>>2]=+g[Lc>>2]-+g[Sc>>2];g[ga>>2]=+g[Lc>>2]+ +g[Sc>>2];g[Eb>>2]=+g[Cb>>2]+ +g[Db>>2];g[Hb>>2]=+g[Fb>>2]-+g[Gb>>2];g[Ka>>2]=+g[Eb>>2]*.9238795042037964+ +g[Hb>>2]*.3826834261417389;g[Xa>>2]=+g[Hb>>2]*.9238795042037964-+g[Eb>>2]*.3826834261417389;g[Sb>>2]=+g[Fb>>2]+ +g[Gb>>2];g[Wb>>2]=+g[Db>>2]-+g[Cb>>2];g[Xb>>2]=+g[Sb>>2]*.3826834261417389-+g[Wb>>2]*.9238795042037964;g[hc>>2]=+g[Wb>>2]*.3826834261417389+ +g[Sb>>2]*.9238795042037964;g[ld>>2]=+g[c[l>>2]>>2];g[md>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[nd>>2]=+g[ld>>2]+ +g[md>>2];g[yb>>2]=+g[ld>>2]-+g[md>>2];g[Yc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Zc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[r>>2]=+g[Yc>>2]-+g[Zc>>2];g[zb>>2]=+g[Yc>>2]+ +g[Zc>>2];g[od>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[pd>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[qd>>2]=+g[od>>2]+ +g[pd>>2];g[vb>>2]=+g[od>>2]-+g[pd>>2];g[Vc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Wc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[Xc>>2]=+g[Vc>>2]-+g[Wc>>2];g[wb>>2]=+g[Vc>>2]+ +g[Wc>>2];g[rd>>2]=+g[nd>>2]+ +g[qd>>2];g[V>>2]=+g[Xc>>2]+ +g[r>>2];g[Uc>>2]=+g[nd>>2]-+g[qd>>2];g[s>>2]=+g[Xc>>2]-+g[r>>2];g[t>>2]=+g[Uc>>2]+ +g[s>>2];g[ha>>2]=+g[s>>2]-+g[Uc>>2];g[xb>>2]=+g[vb>>2]-+g[wb>>2];g[Ab>>2]=+g[yb>>2]-+g[zb>>2];g[Bb>>2]=+g[xb>>2]*.9238795042037964-+g[Ab>>2]*.3826834261417389;g[Ya>>2]=+g[xb>>2]*.3826834261417389+ +g[Ab>>2]*.9238795042037964;g[Yb>>2]=+g[yb>>2]+ +g[zb>>2];g[Zb>>2]=+g[vb>>2]+ +g[wb>>2];g[_b>>2]=+g[Yb>>2]*.3826834261417389-+g[Zb>>2]*.9238795042037964;g[ic>>2]=+g[Zb>>2]*.3826834261417389+ +g[Yb>>2]*.9238795042037964;g[dd>>2]=+g[xc>>2]+ +g[cd>>2];g[sd>>2]=+g[kd>>2]+ +g[rd>>2];g[P>>2]=+g[dd>>2]-+g[sd>>2];g[T>>2]=+g[R>>2]+ +g[S>>2];g[W>>2]=+g[U>>2]+ +g[V>>2];g[X>>2]=+g[T>>2]-+g[W>>2];g[c[k>>2]>>2]=+g[dd>>2]+ +g[sd>>2];g[c[l>>2]>>2]=+g[T>>2]+ +g[W>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[O>>2]*+g[P>>2]-+g[Q>>2]*+g[X>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Q>>2]*+g[P>>2]+ +g[O>>2]*+g[X>>2];g[Rb>>2]=+g[Pb>>2]-+g[Qb>>2];g[$b>>2]=+g[Xb>>2]+ +g[_b>>2];g[ac>>2]=+g[Rb>>2]-+g[$b>>2];g[lc>>2]=+g[Rb>>2]+ +g[$b>>2];g[gc>>2]=+g[ec>>2]+ +g[fc>>2];g[jc>>2]=+g[hc>>2]-+g[ic>>2];g[kc>>2]=+g[gc>>2]-+g[jc>>2];g[mc>>2]=+g[gc>>2]+ +g[jc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Ob>>2]*+g[ac>>2]-+g[dc>>2]*+g[kc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[dc>>2]*+g[ac>>2]+ +g[Ob>>2]*+g[kc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ka>>2]*+g[lc>>2]-+g[ma>>2]*+g[mc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ma>>2]*+g[lc>>2]+ +g[ka>>2]*+g[mc>>2];g[oc>>2]=+g[Pb>>2]+ +g[Qb>>2];g[pc>>2]=+g[hc>>2]+ +g[ic>>2];g[qc>>2]=+g[oc>>2]-+g[pc>>2];g[Ub>>2]=+g[oc>>2]+ +g[pc>>2];g[sc>>2]=+g[ec>>2]-+g[fc>>2];g[tc>>2]=+g[Xb>>2]-+g[_b>>2];g[Tb>>2]=+g[sc>>2]+ +g[tc>>2];g[Vb>>2]=+g[sc>>2]-+g[tc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[nc>>2]*+g[qc>>2]-+g[rc>>2]*+g[Tb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[nc>>2]*+g[Tb>>2]+ +g[rc>>2]*+g[qc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[ua>>2]*+g[Ub>>2]-+g[va>>2]*+g[Vb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[ua>>2]*+g[Vb>>2]+ +g[va>>2]*+g[Ub>>2];g[Kc>>2]=+g[Cc>>2]+ +g[Jc>>2];g[u>>2]=(+g[Tc>>2]+ +g[t>>2])*.7071067690849304;g[v>>2]=+g[Kc>>2]-+g[u>>2];g[pa>>2]=+g[Kc>>2]+ +g[u>>2];g[fa>>2]=+g[z>>2]+ +g[ea>>2];g[ia>>2]=(+g[ga>>2]+ +g[ha>>2])*.7071067690849304;g[ja>>2]=+g[fa>>2]-+g[ia>>2];g[ta>>2]=+g[fa>>2]+ +g[ia>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Bc>>2]*+g[v>>2]-+g[y>>2]*+g[ja>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[y>>2]*+g[v>>2]+ +g[Bc>>2]*+g[ja>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[oa>>2]*+g[pa>>2]-+g[sa>>2]*+g[ta>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[sa>>2]*+g[pa>>2]+ +g[oa>>2]*+g[ta>>2];g[xa>>2]=+g[Cc>>2]-+g[Jc>>2];g[ya>>2]=(+g[ha>>2]-+g[ga>>2])*.7071067690849304;g[B>>2]=+g[xa>>2]-+g[ya>>2];g[J>>2]=+g[xa>>2]+ +g[ya>>2];g[D>>2]=+g[ea>>2]-+g[z>>2];g[E>>2]=(+g[Tc>>2]-+g[t>>2])*.7071067690849304;g[F>>2]=+g[D>>2]-+g[E>>2];g[N>>2]=+g[D>>2]+ +g[E>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[wa>>2]*+g[B>>2]-+g[C>>2]*+g[F>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[wa>>2]*+g[F>>2]+ +g[C>>2]*+g[B>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[I>>2]*+g[J>>2]-+g[M>>2]*+g[N>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[I>>2]*+g[N>>2]+ +g[M>>2]*+g[J>>2];g[ub>>2]=+g[mb>>2]-+g[tb>>2];g[La>>2]=+g[Bb>>2]-+g[Ka>>2];g[Ma>>2]=+g[ub>>2]-+g[La>>2];g[ab>>2]=+g[ub>>2]+ +g[La>>2];g[Wa>>2]=+g[Sa>>2]-+g[Va>>2];g[Za>>2]=+g[Xa>>2]-+g[Ya>>2];g[_a>>2]=+g[Wa>>2]-+g[Za>>2];g[cb>>2]=+g[Wa>>2]+ +g[Za>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[jb>>2]*+g[Ma>>2]-+g[Pa>>2]*+g[_a>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Pa>>2]*+g[Ma>>2]+ +g[jb>>2]*+g[_a>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[$a>>2]*+g[ab>>2]-+g[bb>>2]*+g[cb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[bb>>2]*+g[ab>>2]+ +g[$a>>2]*+g[cb>>2];g[db>>2]=+g[mb>>2]+ +g[tb>>2];g[eb>>2]=+g[Xa>>2]+ +g[Ya>>2];g[fb>>2]=+g[db>>2]-+g[eb>>2];g[Kb>>2]=+g[db>>2]+ +g[eb>>2];g[gb>>2]=+g[Sa>>2]+ +g[Va>>2];g[hb>>2]=+g[Ka>>2]+ +g[Bb>>2];g[Jb>>2]=+g[gb>>2]-+g[hb>>2];g[Lb>>2]=+g[gb>>2]+ +g[hb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[ud>>2]*+g[fb>>2]-+g[xd>>2]*+g[Jb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[ud>>2]*+g[Jb>>2]+ +g[xd>>2]*+g[fb>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[td>>2]*+g[Kb>>2]-+g[wd>>2]*+g[Lb>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[td>>2]*+g[Lb>>2]+ +g[wd>>2]*+g[Kb>>2];g[Z>>2]=+g[xc>>2]-+g[cd>>2];g[_>>2]=+g[V>>2]-+g[U>>2];g[Aa>>2]=+g[Z>>2]-+g[_>>2];g[Ga>>2]=+g[Z>>2]+ +g[_>>2];g[Ca>>2]=+g[R>>2]-+g[S>>2];g[Da>>2]=+g[kd>>2]-+g[rd>>2];g[Ea>>2]=+g[Ca>>2]-+g[Da>>2];g[Ia>>2]=+g[Da>>2]+ +g[Ca>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Y>>2]*+g[Aa>>2]-+g[Ba>>2]*+g[Ea>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Y>>2]*+g[Ea>>2]+ +g[Ba>>2]*+g[Aa>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Fa>>2]*+g[Ga>>2]-+g[Ha>>2]*+g[Ia>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Fa>>2]*+g[Ia>>2]+ +g[Ha>>2]*+g[Ga>>2];c[yd>>2]=(c[yd>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+32;c[n>>2]=c[n>>2]^c[2998]}i=zd;return}function zt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,50,7432);i=b;return}function At(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0;Ze=i;i=i+1264|0;k=Ze+1260|0;l=Ze+1256|0;m=Ze+1252|0;n=Ze+1248|0;_e=Ze+1244|0;o=Ze+1240|0;p=Ze+1236|0;Ye=Ze+1216|0;be=Ze+1212|0;ee=Ze+1208|0;ce=Ze+1204|0;fe=Ze+1200|0;he=Ze+1196|0;sa=Ze+1192|0;Ha=Ze+1188|0;Fa=Ze+1184|0;U=Ze+1180|0;S=Ze+1176|0;W=Ze+1172|0;rb=Ze+1168|0;hb=Ze+1164|0;Ba=Ze+1160|0;fb=Ze+1156|0;bb=Ze+1152|0;vb=Ze+1148|0;Za=Ze+1144|0;mc=Ze+1140|0;Tc=Ze+1136|0;ic=Ze+1132|0;oc=Ze+1128|0;Ja=Ze+1124|0;kb=Ze+1120|0;hd=Ze+1116|0;ld=Ze+1112|0;M=Ze+1108|0;N=Ze+1104|0;O=Ze+1100|0;lb=Ze+1096|0;rd=Ze+1092|0;Q=Ze+1088|0;pd=Ze+1084|0;$c=Ze+1080|0;Xa=Ze+1076|0;nb=Ze+1072|0;Zc=Ze+1068|0;Va=Ze+1064|0;V=Ze+1060|0;tb=Ze+1056|0;_=Ze+1052|0;qb=Ze+1048|0;T=Ze+1044|0;ub=Ze+1040|0;Aa=Ze+1036|0;pb=Ze+1032|0;de=Ze+1028|0;ra=Ze+1024|0;ge=Ze+1020|0;qa=Ze+1016|0;kc=Ze+1012|0;lc=Ze+1008|0;gc=Ze+1004|0;hc=Ze+1e3|0;Ga=Ze+996|0;Ia=Ze+992|0;ib=Ze+988|0;jb=Ze+984|0;Xd=Ze+980|0;Dd=Ze+976|0;Sd=Ze+972|0;ie=Ze+968|0;E=Ze+964|0;zc=Ze+960|0;Xb=Ze+956|0;Na=Ze+952|0;A=Ze+948|0;cc=Ze+944|0;dc=Ze+940|0;na=Ze+936|0;Fb=Ze+932|0;cd=Ze+928|0;bd=Ze+924|0;Cb=Ze+920|0;ua=Ze+916|0;Md=Ze+912|0;Jd=Ze+908|0;ta=Ze+904|0;Ra=Ze+900|0;Qb=Ze+896|0;Sa=Ze+892|0;vc=Ze+888|0;Gc=Ze+884|0;Nc=Ze+880|0;Oc=Ze+876|0;Ke=Ze+872|0;$d=Ze+868|0;ae=Ze+864|0;Pd=Ze+860|0;Qd=Ze+856|0;Td=Ze+852|0;Ka=Ze+848|0;La=Ze+844|0;Oa=Ze+840|0;F=Ze+836|0;G=Ze+832|0;H=Ze+828|0;yd=Ze+824|0;Bd=Ze+820|0;Ed=Ze+816|0;Yb=Ze+812|0;Zb=Ze+808|0;_b=Ze+804|0;le=Ze+800|0;oe=Ze+796|0;pe=Ze+792|0;_c=Ze+788|0;vd=Ze+784|0;Ib=Ze+780|0;xc=Ze+776|0;D=Ze+772|0;yc=Ze+768|0;Wd=Ze+764|0;Wb=Ze+760|0;ya=Ze+756|0;Vb=Ze+752|0;q=Ze+748|0;za=Ze+744|0;B=Ze+740|0;C=Ze+736|0;Rc=Ze+732|0;Vd=Ze+728|0;wa=Ze+724|0;xa=Ze+720|0;Ce=Ze+716|0;wd=Ze+712|0;Hd=Ze+708|0;je=Ze+704|0;s=Ze+700|0;Mb=Ze+696|0;Cc=Ze+692|0;Ab=Ze+688|0;_d=Ze+684|0;Ad=Ze+680|0;Ld=Ze+676|0;ne=Ze+672|0;ma=Ze+668|0;uc=Ze+664|0;Mc=Ze+660|0;Eb=Ze+656|0;Je=Ze+652|0;xd=Ze+648|0;Id=Ze+644|0;ke=Ze+640|0;z=Ze+636|0;Pb=Ze+632|0;Fc=Ze+628|0;Bb=Ze+624|0;Re=Ze+620|0;zd=Ze+616|0;Kd=Ze+612|0;me=Ze+608|0;fa=Ze+604|0;rc=Ze+600|0;Jc=Ze+596|0;Db=Ze+592|0;ye=Ze+588|0;Ac=Ze+584|0;r=Ze+580|0;Bc=Ze+576|0;Be=Ze+572|0;Lb=Ze+568|0;ve=Ze+564|0;Kb=Ze+560|0;Yd=Ze+556|0;Zd=Ze+552|0;we=Ze+548|0;xe=Ze+544|0;ze=Ze+540|0;Ae=Ze+536|0;te=Ze+532|0;ue=Ze+528|0;Ue=Ze+524|0;Kc=Ze+520|0;Xe=Ze+516|0;sc=Ze+512|0;ia=Ze+508|0;tc=Ze+504|0;la=Ze+500|0;Lc=Ze+496|0;Se=Ze+492|0;Te=Ze+488|0;Ve=Ze+484|0;We=Ze+480|0;ga=Ze+476|0;ha=Ze+472|0;ja=Ze+468|0;ka=Ze+464|0;Fe=Ze+460|0;Dc=Ze+456|0;y=Ze+452|0;Ec=Ze+448|0;Ie=Ze+444|0;Nb=Ze+440|0;v=Ze+436|0;Ob=Ze+432|0;De=Ze+428|0;Ee=Ze+424|0;w=Ze+420|0;x=Ze+416|0;Ge=Ze+412|0;He=Ze+408|0;t=Ze+404|0;u=Ze+400|0;Ne=Ze+396|0;Hc=Ze+392|0;ea=Ze+388|0;Ic=Ze+384|0;Qe=Ze+380|0;Sb=Ze+376|0;ba=Ze+372|0;Rb=Ze+368|0;Le=Ze+364|0;Me=Ze+360|0;ca=Ze+356|0;da=Ze+352|0;Oe=Ze+348|0;Pe=Ze+344|0;$=Ze+340|0;aa=Ze+336|0;sb=Ze+332|0;wb=Ze+328|0;td=Ze+324|0;ud=Ze+320|0;Gb=Ze+316|0;Ta=Ze+312|0;db=Ze+308|0;$a=Ze+304|0;Qa=Ze+300|0;cb=Ze+296|0;zb=Ze+292|0;_a=Ze+288|0;Ma=Ze+284|0;Pa=Ze+280|0;xb=Ze+276|0;yb=Ze+272|0;Hb=Ze+268|0;Ua=Ze+264|0;gb=Ze+260|0;Jb=Ze+256|0;Wa=Ze+252|0;Ya=Ze+248|0;ab=Ze+244|0;eb=Ze+240|0;Nd=Ze+236|0;dd=Ze+232|0;nd=Ze+228|0;id=Ze+224|0;ad=Ze+220|0;md=Ze+216|0;Gd=Ze+212|0;jd=Ze+208|0;Rd=Ze+204|0;Ud=Ze+200|0;Cd=Ze+196|0;Fd=Ze+192|0;Od=Ze+188|0;ed=Ze+184|0;qd=Ze+180|0;sd=Ze+176|0;fd=Ze+172|0;gd=Ze+168|0;kd=Ze+164|0;od=Ze+160|0;oa=Ze+156|0;va=Ze+152|0;Ca=Ze+148|0;Y=Ze+144|0;K=Ze+140|0;Da=Ze+136|0;se=Ze+132|0;X=Ze+128|0;I=Ze+124|0;J=Ze+120|0;qe=Ze+116|0;re=Ze+112|0;pa=Ze+108|0;L=Ze+104|0;mb=Ze+100|0;ob=Ze+96|0;P=Ze+92|0;R=Ze+88|0;Z=Ze+84|0;Ea=Ze+80|0;wc=Ze+76|0;ec=Ze+72|0;Vc=Ze+68|0;qc=Ze+64|0;bc=Ze+60|0;Uc=Ze+56|0;Tb=Ze+52|0;pc=Ze+48|0;$b=Ze+44|0;ac=Ze+40|0;Pc=Ze+36|0;Qc=Ze+32|0;Ub=Ze+28|0;fc=Ze+24|0;Xc=Ze+20|0;Yc=Ze+16|0;jc=Ze+12|0;nc=Ze+8|0;Sc=Ze+4|0;Wc=Ze;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[_e>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Ze+1232>>2]=.25;g[Ze+1228>>2]=.55901700258255;g[Ze+1224>>2]=.5877852439880371;g[Ze+1220>>2]=.9510565400123596;c[Ye>>2]=c[_e>>2];c[m>>2]=(c[m>>2]|0)+((c[_e>>2]|0)-1<<3<<2);while(1){if((c[Ye>>2]|0)>=(c[o>>2]|0))break;g[be>>2]=+g[c[m>>2]>>2];g[ee>>2]=+g[(c[m>>2]|0)+4>>2];g[ce>>2]=+g[(c[m>>2]|0)+8>>2];g[fe>>2]=+g[(c[m>>2]|0)+12>>2];g[de>>2]=+g[be>>2]*+g[ce>>2];g[ra>>2]=+g[ee>>2]*+g[ce>>2];g[ge>>2]=+g[ee>>2]*+g[fe>>2];g[qa>>2]=+g[be>>2]*+g[fe>>2];g[he>>2]=+g[de>>2]+ +g[ge>>2];g[sa>>2]=+g[qa>>2]-+g[ra>>2];g[Ha>>2]=+g[qa>>2]+ +g[ra>>2];g[Fa>>2]=+g[de>>2]-+g[ge>>2];g[U>>2]=+g[(c[m>>2]|0)+20>>2];g[V>>2]=+g[fe>>2]*+g[U>>2];g[tb>>2]=+g[be>>2]*+g[U>>2];g[_>>2]=+g[ce>>2]*+g[U>>2];g[qb>>2]=+g[ee>>2]*+g[U>>2];g[S>>2]=+g[(c[m>>2]|0)+16>>2];g[T>>2]=+g[ce>>2]*+g[S>>2];g[ub>>2]=+g[ee>>2]*+g[S>>2];g[Aa>>2]=+g[fe>>2]*+g[S>>2];g[pb>>2]=+g[be>>2]*+g[S>>2];g[W>>2]=+g[T>>2]+ +g[V>>2];g[rb>>2]=+g[pb>>2]-+g[qb>>2];g[hb>>2]=+g[_>>2]+ +g[Aa>>2];g[Ba>>2]=+g[_>>2]-+g[Aa>>2];g[fb>>2]=+g[T>>2]-+g[V>>2];g[bb>>2]=+g[tb>>2]-+g[ub>>2];g[vb>>2]=+g[tb>>2]+ +g[ub>>2];g[Za>>2]=+g[pb>>2]+ +g[qb>>2];g[kc>>2]=+g[he>>2]*+g[U>>2];g[lc>>2]=+g[sa>>2]*+g[S>>2];g[mc>>2]=+g[kc>>2]-+g[lc>>2];g[Tc>>2]=+g[kc>>2]+ +g[lc>>2];g[gc>>2]=+g[he>>2]*+g[S>>2];g[hc>>2]=+g[sa>>2]*+g[U>>2];g[ic>>2]=+g[gc>>2]+ +g[hc>>2];g[oc>>2]=+g[gc>>2]-+g[hc>>2];g[Ga>>2]=+g[Fa>>2]*+g[S>>2];g[Ia>>2]=+g[Ha>>2]*+g[U>>2];g[Ja>>2]=+g[Ga>>2]+ +g[Ia>>2];g[ib>>2]=+g[Fa>>2]*+g[U>>2];g[jb>>2]=+g[Ha>>2]*+g[S>>2];g[kb>>2]=+g[ib>>2]-+g[jb>>2];g[hd>>2]=+g[Ga>>2]-+g[Ia>>2];g[ld>>2]=+g[ib>>2]+ +g[jb>>2];g[M>>2]=+g[(c[m>>2]|0)+24>>2];g[N>>2]=+g[(c[m>>2]|0)+28>>2];g[O>>2]=+g[be>>2]*+g[M>>2]+ +g[ee>>2]*+g[N>>2];g[lb>>2]=+g[Ja>>2]*+g[M>>2]+ +g[kb>>2]*+g[N>>2];g[rd>>2]=+g[he>>2]*+g[N>>2]-+g[sa>>2]*+g[M>>2];g[Q>>2]=+g[be>>2]*+g[N>>2]-+g[ee>>2]*+g[M>>2];g[pd>>2]=+g[he>>2]*+g[M>>2]+ +g[sa>>2]*+g[N>>2];g[$c>>2]=+g[Fa>>2]*+g[N>>2]-+g[Ha>>2]*+g[M>>2];g[Xa>>2]=+g[ce>>2]*+g[N>>2]-+g[fe>>2]*+g[M>>2];g[nb>>2]=+g[Ja>>2]*+g[N>>2]-+g[kb>>2]*+g[M>>2];g[Zc>>2]=+g[Fa>>2]*+g[M>>2]+ +g[Ha>>2]*+g[N>>2];g[Va>>2]=+g[ce>>2]*+g[M>>2]+ +g[fe>>2]*+g[N>>2];g[q>>2]=+g[c[k>>2]>>2];g[za>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Ib>>2]=+g[q>>2]+ +g[za>>2];g[xc>>2]=+g[q>>2]-+g[za>>2];g[B>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[C>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[D>>2]=+g[B>>2]-+g[C>>2];g[yc>>2]=+g[B>>2]+ +g[C>>2];g[Rc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Vd>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Wd>>2]=+g[Rc>>2]+ +g[Vd>>2];g[Wb>>2]=+g[Rc>>2]-+g[Vd>>2];g[wa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[xa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[ya>>2]=+g[wa>>2]-+g[xa>>2];g[Vb>>2]=+g[wa>>2]+ +g[xa>>2];g[Xd>>2]=+g[Ib>>2]+ +g[Wd>>2];g[Dd>>2]=+g[xc>>2]-+g[yc>>2];g[Sd>>2]=+g[Wb>>2]+ +g[Vb>>2];g[ie>>2]=+g[Ib>>2]-+g[Wd>>2];g[E>>2]=+g[ya>>2]-+g[D>>2];g[zc>>2]=+g[xc>>2]+ +g[yc>>2];g[Xb>>2]=+g[Vb>>2]-+g[Wb>>2];g[Na>>2]=+g[ya>>2]+ +g[D>>2];g[Yd>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Zd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[ye>>2]=+g[Yd>>2]+ +g[Zd>>2];g[Ac>>2]=+g[Yd>>2]-+g[Zd>>2];g[we>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[xe>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[r>>2]=+g[we>>2]-+g[xe>>2];g[Bc>>2]=+g[we>>2]+ +g[xe>>2];g[ze>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Ae>>2]=+g[c[l>>2]>>2];g[Be>>2]=+g[ze>>2]+ +g[Ae>>2];g[Lb>>2]=+g[ze>>2]-+g[Ae>>2];g[te>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[ue>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[ve>>2]=+g[te>>2]-+g[ue>>2];g[Kb>>2]=+g[te>>2]+ +g[ue>>2];g[Ce>>2]=+g[ye>>2]+ +g[Be>>2];g[wd>>2]=+g[Ac>>2]-+g[Bc>>2];g[Hd>>2]=+g[Lb>>2]+ +g[Kb>>2];g[je>>2]=+g[ye>>2]-+g[Be>>2];g[s>>2]=+g[ve>>2]-+g[r>>2];g[Mb>>2]=+g[Kb>>2]-+g[Lb>>2];g[Cc>>2]=+g[Ac>>2]+ +g[Bc>>2];g[Ab>>2]=+g[ve>>2]+ +g[r>>2];g[Se>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Te>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Ue>>2]=+g[Se>>2]+ +g[Te>>2];g[Kc>>2]=+g[Se>>2]-+g[Te>>2];g[Ve>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[We>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Xe>>2]=+g[Ve>>2]+ +g[We>>2];g[sc>>2]=+g[Ve>>2]-+g[We>>2];g[ga>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[ha>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2];g[tc>>2]=+g[ga>>2]+ +g[ha>>2];g[ja>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[ka>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[la>>2]=+g[ja>>2]-+g[ka>>2];g[Lc>>2]=+g[ja>>2]+ +g[ka>>2];g[_d>>2]=+g[Ue>>2]+ +g[Xe>>2];g[Ad>>2]=+g[Kc>>2]+ +g[Lc>>2];g[Ld>>2]=+g[sc>>2]-+g[tc>>2];g[ne>>2]=+g[Ue>>2]-+g[Xe>>2];g[ma>>2]=+g[ia>>2]-+g[la>>2];g[uc>>2]=+g[sc>>2]+ +g[tc>>2];g[Mc>>2]=+g[Kc>>2]-+g[Lc>>2];g[Eb>>2]=+g[ia>>2]+ +g[la>>2];g[De>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Ee>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Fe>>2]=+g[De>>2]+ +g[Ee>>2];g[Dc>>2]=+g[De>>2]-+g[Ee>>2];g[w>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[x>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[Ec>>2]=+g[w>>2]+ +g[x>>2];g[Ge>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[He>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Ie>>2]=+g[Ge>>2]+ +g[He>>2];g[Nb>>2]=+g[Ge>>2]-+g[He>>2];g[t>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[u>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[v>>2]=+g[t>>2]-+g[u>>2];g[Ob>>2]=+g[t>>2]+ +g[u>>2];g[Je>>2]=+g[Fe>>2]+ +g[Ie>>2];g[xd>>2]=+g[Dc>>2]-+g[Ec>>2];g[Id>>2]=+g[Nb>>2]-+g[Ob>>2];g[ke>>2]=+g[Fe>>2]-+g[Ie>>2];g[z>>2]=+g[v>>2]-+g[y>>2];g[Pb>>2]=+g[Nb>>2]+ +g[Ob>>2];g[Fc>>2]=+g[Dc>>2]+ +g[Ec>>2];g[Bb>>2]=+g[v>>2]+ +g[y>>2];g[Le>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Me>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Ne>>2]=+g[Le>>2]+ +g[Me>>2];g[Hc>>2]=+g[Le>>2]-+g[Me>>2];g[ca>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[da>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[ea>>2]=+g[ca>>2]-+g[da>>2];g[Ic>>2]=+g[ca>>2]+ +g[da>>2];g[Oe>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Pe>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Qe>>2]=+g[Oe>>2]+ +g[Pe>>2];g[Sb>>2]=+g[Oe>>2]-+g[Pe>>2];g[$>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[aa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[ba>>2]=+g[$>>2]-+g[aa>>2];g[Rb>>2]=+g[$>>2]+ +g[aa>>2];g[Re>>2]=+g[Ne>>2]+ +g[Qe>>2];g[zd>>2]=+g[Hc>>2]+ +g[Ic>>2];g[Kd>>2]=+g[Sb>>2]+ +g[Rb>>2];g[me>>2]=+g[Ne>>2]-+g[Qe>>2];g[fa>>2]=+g[ba>>2]-+g[ea>>2];g[rc>>2]=+g[Rb>>2]-+g[Sb>>2];g[Jc>>2]=+g[Hc>>2]-+g[Ic>>2];g[Db>>2]=+g[ba>>2]+ +g[ea>>2];g[A>>2]=+g[s>>2]-+g[z>>2];g[cc>>2]=+g[Cc>>2]-+g[Fc>>2];g[dc>>2]=+g[Jc>>2]-+g[Mc>>2];g[na>>2]=+g[fa>>2]-+g[ma>>2];g[Fb>>2]=+g[Db>>2]-+g[Eb>>2];g[cd>>2]=+g[zd>>2]-+g[Ad>>2];g[bd>>2]=+g[wd>>2]-+g[xd>>2];g[Cb>>2]=+g[Ab>>2]-+g[Bb>>2];g[ua>>2]=+g[me>>2]-+g[ne>>2];g[Md>>2]=+g[Kd>>2]-+g[Ld>>2];g[Jd>>2]=+g[Hd>>2]-+g[Id>>2];g[ta>>2]=+g[je>>2]-+g[ke>>2];g[Ra>>2]=+g[Ce>>2]-+g[Je>>2];g[Qb>>2]=+g[Mb>>2]+ +g[Pb>>2];g[Sa>>2]=+g[Re>>2]-+g[_d>>2];g[vc>>2]=+g[rc>>2]+ +g[uc>>2];g[Gc>>2]=+g[Cc>>2]+ +g[Fc>>2];g[Nc>>2]=+g[Jc>>2]+ +g[Mc>>2];g[Oc>>2]=+g[Gc>>2]+ +g[Nc>>2];g[Ke>>2]=+g[Ce>>2]+ +g[Je>>2];g[$d>>2]=+g[Re>>2]+ +g[_d>>2];g[ae>>2]=+g[Ke>>2]+ +g[$d>>2];g[Pd>>2]=+g[Hd>>2]+ +g[Id>>2];g[Qd>>2]=+g[Kd>>2]+ +g[Ld>>2];g[Td>>2]=+g[Pd>>2]+ +g[Qd>>2];g[Ka>>2]=+g[Ab>>2]+ +g[Bb>>2];g[La>>2]=+g[Db>>2]+ +g[Eb>>2];g[Oa>>2]=+g[Ka>>2]+ +g[La>>2];g[F>>2]=+g[s>>2]+ +g[z>>2];g[G>>2]=+g[fa>>2]+ +g[ma>>2];g[H>>2]=+g[F>>2]+ +g[G>>2];g[yd>>2]=+g[wd>>2]+ +g[xd>>2];g[Bd>>2]=+g[zd>>2]+ +g[Ad>>2];g[Ed>>2]=+g[yd>>2]+ +g[Bd>>2];g[Yb>>2]=+g[Mb>>2]-+g[Pb>>2];g[Zb>>2]=+g[rc>>2]-+g[uc>>2];g[_b>>2]=+g[Yb>>2]+ +g[Zb>>2];g[le>>2]=+g[je>>2]+ +g[ke>>2];g[oe>>2]=+g[me>>2]+ +g[ne>>2];g[pe>>2]=+g[le>>2]+ +g[oe>>2];g[c[k>>2]>>2]=+g[Xd>>2]+ +g[ae>>2];g[c[l>>2]>>2]=+g[Na>>2]+ +g[Oa>>2];g[sb>>2]=+g[ie>>2]+ +g[pe>>2];g[wb>>2]=+g[E>>2]+ +g[H>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[rb>>2]*+g[sb>>2]-+g[vb>>2]*+g[wb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[vb>>2]*+g[sb>>2]+ +g[rb>>2]*+g[wb>>2];g[td>>2]=+g[Dd>>2]+ +g[Ed>>2];g[ud>>2]=+g[Sd>>2]+ +g[Td>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Ja>>2]*+g[td>>2]-+g[kb>>2]*+g[ud>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Ja>>2]*+g[ud>>2]+ +g[kb>>2]*+g[td>>2];g[_c>>2]=+g[zc>>2]+ +g[Oc>>2];g[vd>>2]=+g[Xb>>2]+ +g[_b>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Zc>>2]*+g[_c>>2]-+g[$c>>2]*+g[vd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Zc>>2]*+g[vd>>2]+ +g[$c>>2]*+g[_c>>2];g[Gb>>2]=+g[Cb>>2]*.9510565400123596+ +g[Fb>>2]*.5877852439880371;g[Ta>>2]=+g[Ra>>2]*.9510565400123596+ +g[Sa>>2]*.5877852439880371;g[db>>2]=+g[Ra>>2]*.5877852439880371-+g[Sa>>2]*.9510565400123596;g[$a>>2]=+g[Cb>>2]*.5877852439880371-+g[Fb>>2]*.9510565400123596;g[Ma>>2]=(+g[Ka>>2]-+g[La>>2])*.55901700258255;g[Pa>>2]=+g[Na>>2]-+g[Oa>>2]*.25;g[Qa>>2]=+g[Ma>>2]+ +g[Pa>>2];g[cb>>2]=+g[Pa>>2]-+g[Ma>>2];g[xb>>2]=(+g[Ke>>2]-+g[$d>>2])*.55901700258255;g[yb>>2]=+g[Xd>>2]-+g[ae>>2]*.25;g[zb>>2]=+g[xb>>2]+ +g[yb>>2];g[_a>>2]=+g[yb>>2]-+g[xb>>2];g[Hb>>2]=+g[zb>>2]+ +g[Gb>>2];g[Ua>>2]=+g[Qa>>2]-+g[Ta>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Fa>>2]*+g[Hb>>2]-+g[Ha>>2]*+g[Ua>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Ha>>2]*+g[Hb>>2]+ +g[Fa>>2]*+g[Ua>>2];g[gb>>2]=+g[_a>>2]-+g[$a>>2];g[Jb>>2]=+g[db>>2]+ +g[cb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[fb>>2]*+g[gb>>2]-+g[hb>>2]*+g[Jb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[hb>>2]*+g[gb>>2]+ +g[fb>>2]*+g[Jb>>2];g[Wa>>2]=+g[zb>>2]-+g[Gb>>2];g[Ya>>2]=+g[Ta>>2]+ +g[Qa>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Va>>2]*+g[Wa>>2]-+g[Xa>>2]*+g[Ya>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Xa>>2]*+g[Wa>>2]+ +g[Va>>2]*+g[Ya>>2];g[ab>>2]=+g[_a>>2]+ +g[$a>>2];g[eb>>2]=+g[cb>>2]-+g[db>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Za>>2]*+g[ab>>2]-+g[bb>>2]*+g[eb>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[bb>>2]*+g[ab>>2]+ +g[Za>>2]*+g[eb>>2];g[Nd>>2]=+g[Jd>>2]*.9510565400123596+ +g[Md>>2]*.5877852439880371;g[dd>>2]=+g[bd>>2]*.9510565400123596+ +g[cd>>2]*.5877852439880371;g[nd>>2]=+g[bd>>2]*.5877852439880371-+g[cd>>2]*.9510565400123596;g[id>>2]=+g[Jd>>2]*.5877852439880371-+g[Md>>2]*.9510565400123596;g[Rd>>2]=(+g[Pd>>2]-+g[Qd>>2])*.55901700258255;g[Ud>>2]=+g[Sd>>2]-+g[Td>>2]*.25;g[ad>>2]=+g[Rd>>2]+ +g[Ud>>2];g[md>>2]=+g[Ud>>2]-+g[Rd>>2];g[Cd>>2]=(+g[yd>>2]-+g[Bd>>2])*.55901700258255;g[Fd>>2]=+g[Dd>>2]-+g[Ed>>2]*.25;g[Gd>>2]=+g[Cd>>2]+ +g[Fd>>2];g[jd>>2]=+g[Fd>>2]-+g[Cd>>2];g[Od>>2]=+g[Gd>>2]-+g[Nd>>2];g[ed>>2]=+g[ad>>2]+ +g[dd>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[be>>2]*+g[Od>>2]-+g[ee>>2]*+g[ed>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[be>>2]*+g[ed>>2]+ +g[ee>>2]*+g[Od>>2];g[qd>>2]=+g[jd>>2]-+g[id>>2];g[sd>>2]=+g[md>>2]+ +g[nd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[pd>>2]*+g[qd>>2]-+g[rd>>2]*+g[sd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[pd>>2]*+g[sd>>2]+ +g[rd>>2]*+g[qd>>2];g[fd>>2]=+g[Nd>>2]+ +g[Gd>>2];g[gd>>2]=+g[ad>>2]-+g[dd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[S>>2]*+g[fd>>2]-+g[U>>2]*+g[gd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[S>>2]*+g[gd>>2]+ +g[U>>2]*+g[fd>>2];g[kd>>2]=+g[id>>2]+ +g[jd>>2];g[od>>2]=+g[md>>2]-+g[nd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[hd>>2]*+g[kd>>2]-+g[ld>>2]*+g[od>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[hd>>2]*+g[od>>2]+ +g[ld>>2]*+g[kd>>2];g[oa>>2]=+g[A>>2]*.5877852439880371-+g[na>>2]*.9510565400123596;g[va>>2]=+g[ta>>2]*.5877852439880371-+g[ua>>2]*.9510565400123596;g[Ca>>2]=+g[ta>>2]*.9510565400123596+ +g[ua>>2]*.5877852439880371;g[Y>>2]=+g[A>>2]*.9510565400123596+ +g[na>>2]*.5877852439880371;g[I>>2]=+g[E>>2]-+g[H>>2]*.25;g[J>>2]=(+g[F>>2]-+g[G>>2])*.55901700258255;g[K>>2]=+g[I>>2]-+g[J>>2];g[Da>>2]=+g[J>>2]+ +g[I>>2];g[qe>>2]=+g[ie>>2]-+g[pe>>2]*.25;g[re>>2]=(+g[le>>2]-+g[oe>>2])*.55901700258255;g[se>>2]=+g[qe>>2]-+g[re>>2];g[X>>2]=+g[re>>2]+ +g[qe>>2];g[pa>>2]=+g[se>>2]-+g[oa>>2];g[L>>2]=+g[va>>2]+ +g[K>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[he>>2]*+g[pa>>2]-+g[sa>>2]*+g[L>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[sa>>2]*+g[pa>>2]+ +g[he>>2]*+g[L>>2];g[mb>>2]=+g[X>>2]+ +g[Y>>2];g[ob>>2]=+g[Da>>2]-+g[Ca>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[lb>>2]*+g[mb>>2]-+g[nb>>2]*+g[ob>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[nb>>2]*+g[mb>>2]+ +g[lb>>2]*+g[ob>>2];g[P>>2]=+g[se>>2]+ +g[oa>>2];g[R>>2]=+g[K>>2]-+g[va>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[O>>2]*+g[P>>2]-+g[Q>>2]*+g[R>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[Q>>2]*+g[P>>2]+ +g[O>>2]*+g[R>>2];g[Z>>2]=+g[X>>2]-+g[Y>>2];g[Ea>>2]=+g[Ca>>2]+ +g[Da>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[W>>2]*+g[Z>>2]-+g[Ba>>2]*+g[Ea>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Ba>>2]*+g[Z>>2]+ +g[W>>2]*+g[Ea>>2];g[wc>>2]=+g[Qb>>2]*.5877852439880371-+g[vc>>2]*.9510565400123596;g[ec>>2]=+g[cc>>2]*.5877852439880371-+g[dc>>2]*.9510565400123596;g[Vc>>2]=+g[cc>>2]*.9510565400123596+ +g[dc>>2]*.5877852439880371;g[qc>>2]=+g[Qb>>2]*.9510565400123596+ +g[vc>>2]*.5877852439880371;g[$b>>2]=+g[Xb>>2]-+g[_b>>2]*.25;g[ac>>2]=(+g[Yb>>2]-+g[Zb>>2])*.55901700258255;g[bc>>2]=+g[$b>>2]-+g[ac>>2];g[Uc>>2]=+g[ac>>2]+ +g[$b>>2];g[Pc>>2]=+g[zc>>2]-+g[Oc>>2]*.25;g[Qc>>2]=(+g[Gc>>2]-+g[Nc>>2])*.55901700258255;g[Tb>>2]=+g[Pc>>2]-+g[Qc>>2];g[pc>>2]=+g[Qc>>2]+ +g[Pc>>2];g[Ub>>2]=+g[wc>>2]+ +g[Tb>>2];g[fc>>2]=+g[bc>>2]-+g[ec>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ce>>2]*+g[Ub>>2]-+g[fe>>2]*+g[fc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ce>>2]*+g[fc>>2]+ +g[fe>>2]*+g[Ub>>2];g[Xc>>2]=+g[qc>>2]+ +g[pc>>2];g[Yc>>2]=+g[Uc>>2]-+g[Vc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[M>>2]*+g[Xc>>2]-+g[N>>2]*+g[Yc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[M>>2]*+g[Yc>>2]+ +g[N>>2]*+g[Xc>>2];g[jc>>2]=+g[Tb>>2]-+g[wc>>2];g[nc>>2]=+g[bc>>2]+ +g[ec>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[ic>>2]*+g[jc>>2]-+g[mc>>2]*+g[nc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[ic>>2]*+g[nc>>2]+ +g[mc>>2]*+g[jc>>2];g[Sc>>2]=+g[pc>>2]-+g[qc>>2];g[Wc>>2]=+g[Uc>>2]+ +g[Vc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[oc>>2]*+g[Sc>>2]-+g[Tc>>2]*+g[Wc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[oc>>2]*+g[Wc>>2]+ +g[Tc>>2]*+g[Sc>>2];c[Ye>>2]=(c[Ye>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+32;c[n>>2]=c[n>>2]^c[2998]}i=Ze;return}function Bt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,51,7480);i=b;return}function Ct(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0;ai=i;i=i+1984|0;k=ai+1980|0;l=ai+1976|0;m=ai+1972|0;n=ai+1968|0;bi=ai+1964|0;o=ai+1960|0;p=ai+1956|0;$h=ai+1872|0;oh=ai+1868|0;rh=ai+1864|0;ph=ai+1860|0;sh=ai+1856|0;uh=ai+1852|0;zh=ai+1848|0;Bb=ai+1844|0;zb=ai+1840|0;Ah=ai+1836|0;vh=ai+1832|0;Fd=ai+1828|0;Ld=ai+1824|0;lb=ai+1820|0;Vd=ai+1816|0;Ve=ai+1812|0;rb=ai+1808|0;Ue=ai+1804|0;xd=ai+1800|0;Db=ai+1796|0;s=ai+1792|0;ic=ai+1788|0;M=ai+1784|0;Me=ai+1780|0;og=ai+1776|0;Qe=ai+1772|0;kg=ai+1768|0;Fa=ai+1764|0;Ga=ai+1760|0;Ha=ai+1756|0;Ja=ai+1752|0;wf=ai+1748|0;Xg=ai+1744|0;$c=ai+1740|0;Ng=ai+1736|0;Rg=ai+1732|0;Vg=ai+1728|0;xb=ai+1724|0;vb=ai+1720|0;Bd=ai+1716|0;Pd=ai+1712|0;We=ai+1708|0;uf=ai+1704|0;Ke=ai+1700|0;Ye=ai+1696|0;gg=ai+1692|0;ig=ai+1688|0;ie=ai+1684|0;Rd=ai+1680|0;kb=ai+1676|0;Jd=ai+1672|0;pb=ai+1668|0;Ed=ai+1664|0;jb=ai+1660|0;Kd=ai+1656|0;qb=ai+1652|0;Dd=ai+1648|0;qh=ai+1644|0;yh=ai+1640|0;th=ai+1636|0;xh=ai+1632|0;Ab=ai+1628|0;Cb=ai+1624|0;wh=ai+1620|0;r=ai+1616|0;gc=ai+1612|0;hc=ai+1608|0;K=ai+1604|0;L=ai+1600|0;ah=ai+1596|0;$f=ai+1592|0;Sc=ai+1588|0;Qc=ai+1584|0;He=ai+1580|0;Jh=ai+1576|0;Sh=ai+1572|0;Th=ai+1568|0;ca=ai+1564|0;la=ai+1560|0;Q=ai+1556|0;Ua=ai+1552|0;pd=ai+1548|0;_b=ai+1544|0;Yd=ai+1540|0;pe=ai+1536|0;tg=ai+1532|0;of=ai+1528|0;Gf=ai+1524|0;gf=ai+1520|0;sg=ai+1516|0;nf=ai+1512|0;Df=ai+1508|0;hb=ai+1504|0;qd=ai+1500|0;$b=ai+1496|0;bd=ai+1492|0;Aa=ai+1488|0;Ie=ai+1484|0;Vb=ai+1480|0;Vc=ai+1476|0;ag=ai+1472|0;ch=ai+1468|0;lh=ai+1464|0;mh=ai+1460|0;va=ai+1456|0;G=ai+1452|0;R=ai+1448|0;wc=ai+1444|0;sd=ai+1440|0;bc=ai+1436|0;fd=ai+1432|0;Ee=ai+1428|0;wg=ai+1424|0;rf=ai+1420|0;Nf=ai+1416|0;xe=ai+1412|0;vg=ai+1408|0;qf=ai+1404|0;Kf=ai+1400|0;Lc=ai+1396|0;td=ai+1392|0;cc=ai+1388|0;id=ai+1384|0;q=ai+1380|0;Rc=ai+1376|0;rg=ai+1372|0;$g=ai+1368|0;qc=ai+1364|0;pc=ai+1360|0;Oc=ai+1356|0;Pc=ai+1352|0;za=ai+1348|0;Ib=ai+1344|0;_d=ai+1340|0;hf=ai+1336|0;Bh=ai+1332|0;Fb=ai+1328|0;df=ai+1324|0;Pa=ai+1320|0;Ih=ai+1316|0;Eb=ai+1312|0;da=ai+1308|0;Za=ai+1304|0;ke=ai+1300|0;fb=ai+1296|0;ka=ai+1292|0;Ya=ai+1288|0;Kh=ai+1284|0;Xa=ai+1280|0;me=ai+1276|0;bb=ai+1272|0;Rh=ai+1268|0;ab=ai+1264|0;w=ai+1260|0;La=ai+1256|0;bf=ai+1252|0;Ra=ai+1248|0;ba=ai+1244|0;Qa=ai+1240|0;Hh=ai+1236|0;Oa=ai+1232|0;Eh=ai+1228|0;Na=ai+1224|0;Fh=ai+1220|0;Gh=ai+1216|0;Ch=ai+1212|0;Dh=ai+1208|0;ja=ai+1204|0;eb=ai+1200|0;ga=ai+1196|0;db=ai+1192|0;ha=ai+1188|0;ia=ai+1184|0;ea=ai+1180|0;fa=ai+1176|0;Qh=ai+1172|0;Wa=ai+1168|0;Nh=ai+1164|0;Va=ai+1160|0;Oh=ai+1156|0;Ph=ai+1152|0;Lh=ai+1148|0;Mh=ai+1144|0;aa=ai+1140|0;Ka=ai+1136|0;z=ai+1132|0;Hb=ai+1128|0;A=ai+1124|0;$=ai+1120|0;x=ai+1116|0;y=ai+1112|0;Ma=ai+1108|0;Wd=ai+1104|0;Ta=ai+1100|0;Xd=ai+1096|0;Gb=ai+1092|0;Sa=ai+1088|0;le=ai+1084|0;Ff=ai+1080|0;oe=ai+1076|0;Ef=ai+1072|0;je=ai+1068|0;ne=ai+1064|0;cf=ai+1060|0;Bf=ai+1056|0;ff=ai+1052|0;Cf=ai+1048|0;af=ai+1044|0;ef=ai+1040|0;$a=ai+1036|0;Zd=ai+1032|0;gb=ai+1028|0;ad=ai+1024|0;_a=ai+1020|0;cb=ai+1016|0;T=ai+1012|0;W=ai+1008|0;Z=ai+1004|0;_=ai+1e3|0;Ub=ai+996|0;Tb=ai+992|0;Tc=ai+988|0;Uc=ai+984|0;U=ai+980|0;V=ai+976|0;X=ai+972|0;Y=ai+968|0;Uh=ai+964|0;Lb=ai+960|0;ue=ai+956|0;rc=ai+952|0;bh=ai+948|0;Kb=ai+944|0;na=ai+940|0;Pb=ai+936|0;se=ai+932|0;tc=ai+928|0;ua=ai+924|0;sc=ai+920|0;dh=ai+916|0;yc=ai+912|0;Be=ai+908|0;Gc=ai+904|0;kh=ai+900|0;xc=ai+896|0;wa=ai+892|0;Cc=ai+888|0;ze=ai+884|0;Ic=ai+880|0;F=ai+876|0;Hc=ai+872|0;Xh=ai+868|0;Rb=ai+864|0;_h=ai+860|0;Sb=ai+856|0;Vh=ai+852|0;Wh=ai+848|0;Yh=ai+844|0;Zh=ai+840|0;qa=ai+836|0;Nb=ai+832|0;ta=ai+828|0;Ob=ai+824|0;oa=ai+820|0;pa=ai+816|0;ra=ai+812|0;sa=ai+808|0;jh=ai+804|0;Fc=ai+800|0;gh=ai+796|0;Ec=ai+792|0;hh=ai+788|0;ih=ai+784|0;eh=ai+780|0;fh=ai+776|0;E=ai+772|0;Bc=ai+768|0;B=ai+764|0;Ac=ai+760|0;C=ai+756|0;D=ai+752|0;xa=ai+748|0;ya=ai+744|0;Qb=ai+740|0;dd=ai+736|0;vc=ai+732|0;ed=ai+728|0;Mb=ai+724|0;uc=ai+720|0;Ae=ai+716|0;Mf=ai+712|0;De=ai+708|0;Lf=ai+704|0;ye=ai+700|0;Ce=ai+696|0;te=ai+692|0;Jf=ai+688|0;we=ai+684|0;If=ai+680|0;re=ai+676|0;ve=ai+672|0;Dc=ai+668|0;hd=ai+664|0;Kc=ai+660|0;gd=ai+656|0;zc=ai+652|0;Jc=ai+648|0;nh=ai+644|0;v=ai+640|0;mb=ai+636|0;Ba=ai+632|0;Da=ai+628|0;tb=ai+624|0;I=ai+620|0;nb=ai+616|0;P=ai+612|0;sb=ai+608|0;t=ai+604|0;u=ai+600|0;S=ai+596|0;Ca=ai+592|0;ma=ai+588|0;H=ai+584|0;N=ai+580|0;O=ai+576|0;J=ai+572|0;Ea=ai+568|0;wb=ai+564|0;yb=ai+560|0;ob=ai+556|0;ub=ai+552|0;Ia=ai+548|0;ib=ai+544|0;yg=ai+540|0;Pg=ai+536|0;Cg=ai+532|0;Sg=ai+528|0;Af=ai+524|0;Pf=ai+520|0;Qf=ai+516|0;Rf=ai+512|0;Gg=ai+508|0;Hg=ai+504|0;Fg=ai+500|0;Ig=ai+496|0;Zg=ai+492|0;_g=ai+488|0;ug=ai+484|0;xg=ai+480|0;Ag=ai+476|0;Bg=ai+472|0;Hf=ai+468|0;Of=ai+464|0;Dg=ai+460|0;Eg=ai+456|0;Qg=ai+452|0;Wg=ai+448|0;Ug=ai+444|0;Yg=ai+440|0;Og=ai+436|0;Tg=ai+432|0;zg=ai+428|0;Lg=ai+424|0;Kg=ai+420|0;Mg=ai+416|0;Sf=ai+412|0;Jg=ai+408|0;ec=ai+404|0;Hd=ai+400|0;lc=ai+396|0;Md=ai+392|0;Wb=ai+388|0;Xb=ai+384|0;Nc=ai+380|0;Yb=ai+376|0;Wc=ai+372|0;Xc=ai+368|0;oc=ai+364|0;Yc=ai+360|0;Td=ai+356|0;Ud=ai+352|0;ac=ai+348|0;dc=ai+344|0;jc=ai+340|0;kc=ai+336|0;Jb=ai+332|0;Mc=ai+328|0;mc=ai+324|0;nc=ai+320|0;Id=ai+316|0;Qd=ai+312|0;Od=ai+308|0;Sd=ai+304|0;Gd=ai+300|0;Nd=ai+296|0;fc=ai+292|0;Ad=ai+288|0;_c=ai+284|0;Cd=ai+280|0;Zb=ai+276|0;Zc=ai+272|0;vd=ai+268|0;Oe=ai+264|0;$d=ai+260|0;Re=ai+256|0;ld=ai+252|0;md=ai+248|0;kd=ai+244|0;nd=ai+240|0;de=ai+236|0;ee=ai+232|0;ce=ai+228|0;fe=ai+224|0;_e=ai+220|0;$e=ai+216|0;rd=ai+212|0;ud=ai+208|0;yd=ai+204|0;zd=ai+200|0;cd=ai+196|0;jd=ai+192|0;ae=ai+188|0;be=ai+184|0;Pe=ai+180|0;Xe=ai+176|0;Te=ai+172|0;Ze=ai+168|0;Ne=ai+164|0;Se=ai+160|0;wd=ai+156|0;Je=ai+152|0;he=ai+148|0;Le=ai+144|0;od=ai+140|0;ge=ai+136|0;Tf=ai+132|0;mg=ai+128|0;Xf=ai+124|0;pg=ai+120|0;jf=ai+116|0;kf=ai+112|0;Ge=ai+108|0;lf=ai+104|0;bg=ai+100|0;cg=ai+96|0;_f=ai+92|0;dg=ai+88|0;yf=ai+84|0;zf=ai+80|0;pf=ai+76|0;sf=ai+72|0;Vf=ai+68|0;Wf=ai+64|0;qe=ai+60|0;Fe=ai+56|0;Yf=ai+52|0;Zf=ai+48|0;ng=ai+44|0;vf=ai+40|0;tf=ai+36|0;xf=ai+32|0;lg=ai+28|0;qg=ai+24|0;Uf=ai+20|0;hg=ai+16|0;fg=ai+12|0;jg=ai+8|0;mf=ai+4|0;eg=ai;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[bi>>2]=f;c[o>>2]=h;c[p>>2]=j;g[ai+1952>>2]=.9980267286300659;g[ai+1948>>2]=.06279052048921585;g[ai+1944>>2]=.9921147227287292;g[ai+1940>>2]=.12533323466777802;g[ai+1936>>2]=.4257792830467224;g[ai+1932>>2]=.9048270583152771;g[ai+1928>>2]=.24868988990783691;g[ai+1924>>2]=.9685831665992737;g[ai+1920>>2]=.7705132365226746;g[ai+1916>>2]=.6374239921569824;g[ai+1912>>2]=.8443279266357422;g[ai+1908>>2]=.5358268022537231;g[ai+1904>>2]=.6845471262931824;g[ai+1900>>2]=.728968620300293;g[ai+1896>>2]=.4817536771297455;g[ai+1892>>2]=.8763066530227661;g[ai+1888>>2]=.55901700258255;g[ai+1884>>2]=.25;g[ai+1880>>2]=.5877852439880371;g[ai+1876>>2]=.9510565400123596;c[$h>>2]=c[bi>>2];c[m>>2]=(c[m>>2]|0)+((c[bi>>2]|0)-1<<3<<2);while(1){if((c[$h>>2]|0)>=(c[o>>2]|0))break;g[oh>>2]=+g[c[m>>2]>>2];g[rh>>2]=+g[(c[m>>2]|0)+4>>2];g[ph>>2]=+g[(c[m>>2]|0)+8>>2];g[sh>>2]=+g[(c[m>>2]|0)+12>>2];g[qh>>2]=+g[oh>>2]*+g[ph>>2];g[yh>>2]=+g[rh>>2]*+g[ph>>2];g[th>>2]=+g[rh>>2]*+g[sh>>2];g[xh>>2]=+g[oh>>2]*+g[sh>>2];g[uh>>2]=+g[qh>>2]-+g[th>>2];g[zh>>2]=+g[xh>>2]+ +g[yh>>2];g[Bb>>2]=+g[xh>>2]-+g[yh>>2];g[zb>>2]=+g[qh>>2]+ +g[th>>2];g[Ah>>2]=+g[(c[m>>2]|0)+20>>2];g[kb>>2]=+g[rh>>2]*+g[Ah>>2];g[Jd>>2]=+g[ph>>2]*+g[Ah>>2];g[pb>>2]=+g[oh>>2]*+g[Ah>>2];g[Ed>>2]=+g[sh>>2]*+g[Ah>>2];g[vh>>2]=+g[(c[m>>2]|0)+16>>2];g[jb>>2]=+g[oh>>2]*+g[vh>>2];g[Kd>>2]=+g[sh>>2]*+g[vh>>2];g[qb>>2]=+g[rh>>2]*+g[vh>>2];g[Dd>>2]=+g[ph>>2]*+g[vh>>2];g[Fd>>2]=+g[Dd>>2]-+g[Ed>>2];g[Ld>>2]=+g[Jd>>2]+ +g[Kd>>2];g[lb>>2]=+g[jb>>2]-+g[kb>>2];g[Vd>>2]=+g[jb>>2]+ +g[kb>>2];g[Ve>>2]=+g[Jd>>2]-+g[Kd>>2];g[rb>>2]=+g[pb>>2]+ +g[qb>>2];g[Ue>>2]=+g[Dd>>2]+ +g[Ed>>2];g[xd>>2]=+g[pb>>2]-+g[qb>>2];g[Ab>>2]=+g[zb>>2]*+g[vh>>2];g[Cb>>2]=+g[Bb>>2]*+g[Ah>>2];g[Db>>2]=+g[Ab>>2]+ +g[Cb>>2];g[wh>>2]=+g[uh>>2]*+g[vh>>2];g[r>>2]=+g[zh>>2]*+g[Ah>>2];g[s>>2]=+g[wh>>2]+ +g[r>>2];g[gc>>2]=+g[zb>>2]*+g[Ah>>2];g[hc>>2]=+g[Bb>>2]*+g[vh>>2];g[ic>>2]=+g[gc>>2]-+g[hc>>2];g[K>>2]=+g[uh>>2]*+g[Ah>>2];g[L>>2]=+g[zh>>2]*+g[vh>>2];g[M>>2]=+g[K>>2]-+g[L>>2];g[Me>>2]=+g[wh>>2]-+g[r>>2];g[og>>2]=+g[gc>>2]+ +g[hc>>2];g[Qe>>2]=+g[K>>2]+ +g[L>>2];g[kg>>2]=+g[Ab>>2]-+g[Cb>>2];g[Fa>>2]=+g[(c[m>>2]|0)+24>>2];g[Ga>>2]=+g[(c[m>>2]|0)+28>>2];g[Ha>>2]=+g[uh>>2]*+g[Fa>>2]+ +g[zh>>2]*+g[Ga>>2];g[Ja>>2]=+g[uh>>2]*+g[Ga>>2]-+g[zh>>2]*+g[Fa>>2];g[wf>>2]=+g[Vd>>2]*+g[Ga>>2]-+g[xd>>2]*+g[Fa>>2];g[Xg>>2]=+g[s>>2]*+g[Ga>>2]-+g[M>>2]*+g[Fa>>2];g[$c>>2]=+g[zb>>2]*+g[Fa>>2]+ +g[Bb>>2]*+g[Ga>>2];g[Ng>>2]=+g[lb>>2]*+g[Fa>>2]+ +g[rb>>2]*+g[Ga>>2];g[Rg>>2]=+g[lb>>2]*+g[Ga>>2]-+g[rb>>2]*+g[Fa>>2];g[Vg>>2]=+g[s>>2]*+g[Fa>>2]+ +g[M>>2]*+g[Ga>>2];g[xb>>2]=+g[vh>>2]*+g[Ga>>2]-+g[Ah>>2]*+g[Fa>>2];g[vb>>2]=+g[vh>>2]*+g[Fa>>2]+ +g[Ah>>2]*+g[Ga>>2];g[Bd>>2]=+g[zb>>2]*+g[Ga>>2]-+g[Bb>>2]*+g[Fa>>2];g[Pd>>2]=+g[Db>>2]*+g[Fa>>2]+ +g[ic>>2]*+g[Ga>>2];g[We>>2]=+g[Ue>>2]*+g[Fa>>2]+ +g[Ve>>2]*+g[Ga>>2];g[uf>>2]=+g[Vd>>2]*+g[Fa>>2]+ +g[xd>>2]*+g[Ga>>2];g[Ke>>2]=+g[oh>>2]*+g[Ga>>2]-+g[rh>>2]*+g[Fa>>2];g[Ye>>2]=+g[Ue>>2]*+g[Ga>>2]-+g[Ve>>2]*+g[Fa>>2];g[gg>>2]=+g[ph>>2]*+g[Fa>>2]+ +g[sh>>2]*+g[Ga>>2];g[ig>>2]=+g[ph>>2]*+g[Ga>>2]-+g[sh>>2]*+g[Fa>>2];g[ie>>2]=+g[oh>>2]*+g[Fa>>2]+ +g[rh>>2]*+g[Ga>>2];g[Rd>>2]=+g[Db>>2]*+g[Ga>>2]-+g[ic>>2]*+g[Fa>>2];g[q>>2]=+g[c[k>>2]>>2];g[za>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Ib>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Rc>>2]=+g[za>>2]+ +g[Ib>>2];g[_d>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[hf>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[rg>>2]=+g[_d>>2]+ +g[hf>>2];g[$g>>2]=+g[Rc>>2]+ +g[rg>>2];g[qc>>2]=+g[_d>>2]-+g[hf>>2];g[pc>>2]=+g[za>>2]-+g[Ib>>2];g[ah>>2]=+g[q>>2]+ +g[$g>>2];g[$f>>2]=+g[pc>>2]*.9510565400123596+ +g[qc>>2]*.5877852439880371;g[Sc>>2]=+g[pc>>2]*.5877852439880371-+g[qc>>2]*.9510565400123596;g[Oc>>2]=+g[q>>2]-+g[$g>>2]*.25;g[Pc>>2]=(+g[Rc>>2]-+g[rg>>2])*.55901700258255;g[Qc>>2]=+g[Oc>>2]-+g[Pc>>2];g[He>>2]=+g[Pc>>2]+ +g[Oc>>2];g[Bh>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[Fh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Gh>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Hh>>2]=+g[Fh>>2]+ +g[Gh>>2];g[Oa>>2]=+g[Fh>>2]-+g[Gh>>2];g[Ch>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Dh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Eh>>2]=+g[Ch>>2]+ +g[Dh>>2];g[Na>>2]=+g[Ch>>2]-+g[Dh>>2];g[Fb>>2]=(+g[Eh>>2]-+g[Hh>>2])*.55901700258255;g[df>>2]=+g[Na>>2]*.9510565400123596+ +g[Oa>>2]*.5877852439880371;g[Pa>>2]=+g[Na>>2]*.5877852439880371-+g[Oa>>2]*.9510565400123596;g[Ih>>2]=+g[Eh>>2]+ +g[Hh>>2];g[Eb>>2]=+g[Bh>>2]-+g[Ih>>2]*.25;g[da>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[ha>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[ia>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[ja>>2]=+g[ha>>2]+ +g[ia>>2];g[eb>>2]=+g[ia>>2]-+g[ha>>2];g[ea>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[fa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[ga>>2]=+g[ea>>2]-+g[fa>>2];g[db>>2]=+g[ea>>2]+ +g[fa>>2];g[Za>>2]=(+g[ga>>2]+ +g[ja>>2])*.55901700258255;g[ke>>2]=+g[db>>2]*.9510565400123596+ +g[eb>>2]*.5877852439880371;g[fb>>2]=+g[db>>2]*.5877852439880371-+g[eb>>2]*.9510565400123596;g[ka>>2]=+g[ga>>2]-+g[ja>>2];g[Ya>>2]=+g[da>>2]-+g[ka>>2]*.25;g[Kh>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Oh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Ph>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Qh>>2]=+g[Oh>>2]+ +g[Ph>>2];g[Wa>>2]=+g[Oh>>2]-+g[Ph>>2];g[Lh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Mh>>2]=+g[c[l>>2]>>2];g[Nh>>2]=+g[Lh>>2]+ +g[Mh>>2];g[Va>>2]=+g[Lh>>2]-+g[Mh>>2];g[Xa>>2]=+g[Va>>2]*.5877852439880371-+g[Wa>>2]*.9510565400123596;g[me>>2]=+g[Va>>2]*.9510565400123596+ +g[Wa>>2]*.5877852439880371;g[bb>>2]=(+g[Nh>>2]-+g[Qh>>2])*.55901700258255;g[Rh>>2]=+g[Nh>>2]+ +g[Qh>>2];g[ab>>2]=+g[Kh>>2]-+g[Rh>>2]*.25;g[w>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[A>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[$>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[aa>>2]=+g[A>>2]-+g[$>>2];g[Ka>>2]=+g[A>>2]+ +g[$>>2];g[x>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[y>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[Hb>>2]=+g[x>>2]+ +g[y>>2];g[La>>2]=+g[Hb>>2]*.5877852439880371-+g[Ka>>2]*.9510565400123596;g[bf>>2]=+g[Hb>>2]*.9510565400123596+ +g[Ka>>2]*.5877852439880371;g[Ra>>2]=(+g[z>>2]-+g[aa>>2])*.55901700258255;g[ba>>2]=+g[z>>2]+ +g[aa>>2];g[Qa>>2]=+g[w>>2]-+g[ba>>2]*.25;g[Jh>>2]=+g[Bh>>2]+ +g[Ih>>2];g[Sh>>2]=+g[Kh>>2]+ +g[Rh>>2];g[Th>>2]=+g[Jh>>2]+ +g[Sh>>2];g[ca>>2]=+g[w>>2]+ +g[ba>>2];g[la>>2]=+g[da>>2]+ +g[ka>>2];g[Q>>2]=+g[ca>>2]+ +g[la>>2];g[Gb>>2]=+g[Eb>>2]-+g[Fb>>2];g[Ma>>2]=+g[Gb>>2]-+g[La>>2];g[Wd>>2]=+g[Gb>>2]+ +g[La>>2];g[Sa>>2]=+g[Qa>>2]-+g[Ra>>2];g[Ta>>2]=+g[Pa>>2]+ +g[Sa>>2];g[Xd>>2]=+g[Sa>>2]-+g[Pa>>2];g[Ua>>2]=+g[Ma>>2]*.8763066530227661-+g[Ta>>2]*.4817536771297455;g[pd>>2]=+g[Xd>>2]*.728968620300293+ +g[Wd>>2]*.6845471262931824;g[_b>>2]=+g[Ta>>2]*.8763066530227661+ +g[Ma>>2]*.4817536771297455;g[Yd>>2]=+g[Wd>>2]*.728968620300293-+g[Xd>>2]*.6845471262931824;g[je>>2]=+g[bb>>2]+ +g[ab>>2];g[le>>2]=+g[je>>2]-+g[ke>>2];g[Ff>>2]=+g[je>>2]+ +g[ke>>2];g[ne>>2]=+g[Ya>>2]+ +g[Za>>2];g[oe>>2]=+g[me>>2]+ +g[ne>>2];g[Ef>>2]=+g[ne>>2]-+g[me>>2];g[pe>>2]=+g[le>>2]*.5358268022537231-+g[oe>>2]*.8443279266357422;g[tg>>2]=+g[Ef>>2]*.6374239921569824+ +g[Ff>>2]*.7705132365226746;g[of>>2]=+g[oe>>2]*.5358268022537231+ +g[le>>2]*.8443279266357422;g[Gf>>2]=+g[Ef>>2]*.7705132365226746-+g[Ff>>2]*.6374239921569824;g[af>>2]=+g[Fb>>2]+ +g[Eb>>2];g[cf>>2]=+g[af>>2]-+g[bf>>2];g[Bf>>2]=+g[af>>2]+ +g[bf>>2];g[ef>>2]=+g[Ra>>2]+ +g[Qa>>2];g[ff>>2]=+g[df>>2]+ +g[ef>>2];g[Cf>>2]=+g[ef>>2]-+g[df>>2];g[gf>>2]=+g[cf>>2]*.9685831665992737-+g[ff>>2]*.24868988990783691;g[sg>>2]=+g[Cf>>2]*.5358268022537231+ +g[Bf>>2]*.8443279266357422;g[nf>>2]=+g[ff>>2]*.9685831665992737+ +g[cf>>2]*.24868988990783691;g[Df>>2]=+g[Bf>>2]*.5358268022537231-+g[Cf>>2]*.8443279266357422;g[_a>>2]=+g[Ya>>2]-+g[Za>>2];g[$a>>2]=+g[Xa>>2]+ +g[_a>>2];g[Zd>>2]=+g[_a>>2]-+g[Xa>>2];g[cb>>2]=+g[ab>>2]-+g[bb>>2];g[gb>>2]=+g[cb>>2]-+g[fb>>2];g[ad>>2]=+g[cb>>2]+ +g[fb>>2];g[hb>>2]=+g[$a>>2]*.9048270583152771+ +g[gb>>2]*.4257792830467224;g[qd>>2]=+g[ad>>2]*.12533323466777802-+g[Zd>>2]*.9921147227287292;g[$b>>2]=+g[gb>>2]*.9048270583152771-+g[$a>>2]*.4257792830467224;g[bd>>2]=+g[Zd>>2]*.12533323466777802+ +g[ad>>2]*.9921147227287292;g[T>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[U>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[V>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[W>>2]=+g[U>>2]-+g[V>>2];g[X>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[Y>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[Z>>2]=+g[X>>2]-+g[Y>>2];g[_>>2]=+g[W>>2]+ +g[Z>>2];g[Ub>>2]=+g[X>>2]+ +g[Y>>2];g[Tb>>2]=+g[U>>2]+ +g[V>>2];g[Aa>>2]=+g[T>>2]+ +g[_>>2];g[Ie>>2]=+g[Tb>>2]*.9510565400123596+ +g[Ub>>2]*.5877852439880371;g[Vb>>2]=+g[Tb>>2]*.5877852439880371-+g[Ub>>2]*.9510565400123596;g[Tc>>2]=+g[T>>2]-+g[_>>2]*.25;g[Uc>>2]=(+g[W>>2]-+g[Z>>2])*.55901700258255;g[Vc>>2]=+g[Tc>>2]-+g[Uc>>2];g[ag>>2]=+g[Uc>>2]+ +g[Tc>>2];g[Uh>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Vh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Wh>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Xh>>2]=+g[Vh>>2]+ +g[Wh>>2];g[Rb>>2]=+g[Vh>>2]-+g[Wh>>2];g[Yh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Zh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[_h>>2]=+g[Yh>>2]+ +g[Zh>>2];g[Sb>>2]=+g[Yh>>2]-+g[Zh>>2];g[Lb>>2]=(+g[Xh>>2]-+g[_h>>2])*.55901700258255;g[ue>>2]=+g[Rb>>2]*.9510565400123596+ +g[Sb>>2]*.5877852439880371;g[rc>>2]=+g[Rb>>2]*.5877852439880371-+g[Sb>>2]*.9510565400123596;g[bh>>2]=+g[Xh>>2]+ +g[_h>>2];g[Kb>>2]=+g[Uh>>2]-+g[bh>>2]*.25;g[na>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[oa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[pa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[qa>>2]=+g[oa>>2]-+g[pa>>2];g[Nb>>2]=+g[oa>>2]+ +g[pa>>2];g[ra>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[sa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[ta>>2]=+g[ra>>2]-+g[sa>>2];g[Ob>>2]=+g[ra>>2]+ +g[sa>>2];g[Pb>>2]=+g[Nb>>2]*.5877852439880371-+g[Ob>>2]*.9510565400123596;g[se>>2]=+g[Nb>>2]*.9510565400123596+ +g[Ob>>2]*.5877852439880371;g[tc>>2]=(+g[qa>>2]-+g[ta>>2])*.55901700258255;g[ua>>2]=+g[qa>>2]+ +g[ta>>2];g[sc>>2]=+g[na>>2]-+g[ua>>2]*.25;g[dh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[hh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[ih>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[jh>>2]=+g[hh>>2]+ +g[ih>>2];g[Fc>>2]=+g[hh>>2]-+g[ih>>2];g[eh>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[fh>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[gh>>2]=+g[eh>>2]+ +g[fh>>2];g[Ec>>2]=+g[eh>>2]-+g[fh>>2];g[yc>>2]=(+g[gh>>2]-+g[jh>>2])*.55901700258255;g[Be>>2]=+g[Ec>>2]*.9510565400123596+ +g[Fc>>2]*.5877852439880371;g[Gc>>2]=+g[Ec>>2]*.5877852439880371-+g[Fc>>2]*.9510565400123596;g[kh>>2]=+g[gh>>2]+ +g[jh>>2];g[xc>>2]=+g[dh>>2]-+g[kh>>2]*.25;g[wa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[C>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[D>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[E>>2]=+g[C>>2]+ +g[D>>2];g[Bc>>2]=+g[D>>2]-+g[C>>2];g[xa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[ya>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[B>>2]=+g[xa>>2]-+g[ya>>2];g[Ac>>2]=+g[xa>>2]+ +g[ya>>2];g[Cc>>2]=+g[Ac>>2]*.5877852439880371-+g[Bc>>2]*.9510565400123596;g[ze>>2]=+g[Ac>>2]*.9510565400123596+ +g[Bc>>2]*.5877852439880371;g[Ic>>2]=(+g[B>>2]+ +g[E>>2])*.55901700258255;g[F>>2]=+g[B>>2]-+g[E>>2];g[Hc>>2]=+g[wa>>2]-+g[F>>2]*.25;g[ch>>2]=+g[Uh>>2]+ +g[bh>>2];g[lh>>2]=+g[dh>>2]+ +g[kh>>2];g[mh>>2]=+g[ch>>2]+ +g[lh>>2];g[va>>2]=+g[na>>2]+ +g[ua>>2];g[G>>2]=+g[wa>>2]+ +g[F>>2];g[R>>2]=+g[va>>2]+ +g[G>>2];g[Mb>>2]=+g[Kb>>2]-+g[Lb>>2];g[Qb>>2]=+g[Mb>>2]-+g[Pb>>2];g[dd>>2]=+g[Mb>>2]+ +g[Pb>>2];g[uc>>2]=+g[sc>>2]-+g[tc>>2];g[vc>>2]=+g[rc>>2]+ +g[uc>>2];g[ed>>2]=+g[uc>>2]-+g[rc>>2];g[wc>>2]=+g[Qb>>2]*.5358268022537231-+g[vc>>2]*.8443279266357422;g[sd>>2]=+g[ed>>2]*.06279052048921585+ +g[dd>>2]*.9980267286300659;g[bc>>2]=+g[vc>>2]*.5358268022537231+ +g[Qb>>2]*.8443279266357422;g[fd>>2]=+g[dd>>2]*.06279052048921585-+g[ed>>2]*.9980267286300659;g[ye>>2]=+g[yc>>2]+ +g[xc>>2];g[Ae>>2]=+g[ye>>2]-+g[ze>>2];g[Mf>>2]=+g[ye>>2]+ +g[ze>>2];g[Ce>>2]=+g[Hc>>2]+ +g[Ic>>2];g[De>>2]=+g[Be>>2]+ +g[Ce>>2];g[Lf>>2]=+g[Ce>>2]-+g[Be>>2];g[Ee>>2]=+g[Ae>>2]*.728968620300293-+g[De>>2]*.6845471262931824;g[wg>>2]=+g[Mf>>2]*.12533323466777802-+g[Lf>>2]*.9921147227287292;g[rf>>2]=+g[De>>2]*.728968620300293+ +g[Ae>>2]*.6845471262931824;g[Nf>>2]=+g[Lf>>2]*.12533323466777802+ +g[Mf>>2]*.9921147227287292;g[re>>2]=+g[Lb>>2]+ +g[Kb>>2];g[te>>2]=+g[re>>2]-+g[se>>2];g[Jf>>2]=+g[re>>2]+ +g[se>>2];g[ve>>2]=+g[tc>>2]+ +g[sc>>2];g[we>>2]=+g[ue>>2]+ +g[ve>>2];g[If>>2]=+g[ve>>2]-+g[ue>>2];g[xe>>2]=+g[te>>2]*.8763066530227661-+g[we>>2]*.4817536771297455;g[vg>>2]=+g[Jf>>2]*.9048270583152771-+g[If>>2]*.4257792830467224;g[qf>>2]=+g[we>>2]*.8763066530227661+ +g[te>>2]*.4817536771297455;g[Kf>>2]=+g[If>>2]*.9048270583152771+ +g[Jf>>2]*.4257792830467224;g[zc>>2]=+g[xc>>2]-+g[yc>>2];g[Dc>>2]=+g[zc>>2]-+g[Cc>>2];g[hd>>2]=+g[zc>>2]+ +g[Cc>>2];g[Jc>>2]=+g[Hc>>2]-+g[Ic>>2];g[Kc>>2]=+g[Gc>>2]+ +g[Jc>>2];g[gd>>2]=+g[Jc>>2]-+g[Gc>>2];g[Lc>>2]=+g[Dc>>2]*.06279052048921585-+g[Kc>>2]*.9980267286300659;g[td>>2]=+g[hd>>2]*.7705132365226746-+g[gd>>2]*.6374239921569824;g[cc>>2]=+g[Kc>>2]*.06279052048921585+ +g[Dc>>2]*.9980267286300659;g[id>>2]=+g[gd>>2]*.7705132365226746+ +g[hd>>2]*.6374239921569824;g[t>>2]=(+g[Th>>2]-+g[mh>>2])*.55901700258255;g[nh>>2]=+g[Th>>2]+ +g[mh>>2];g[u>>2]=+g[ah>>2]-+g[nh>>2]*.25;g[v>>2]=+g[t>>2]+ +g[u>>2];g[mb>>2]=+g[u>>2]-+g[t>>2];g[S>>2]=(+g[Q>>2]-+g[R>>2])*.55901700258255;g[Ba>>2]=+g[Q>>2]+ +g[R>>2];g[Ca>>2]=+g[Aa>>2]-+g[Ba>>2]*.25;g[Da>>2]=+g[S>>2]+ +g[Ca>>2];g[tb>>2]=+g[Ca>>2]-+g[S>>2];g[ma>>2]=+g[ca>>2]-+g[la>>2];g[H>>2]=+g[va>>2]-+g[G>>2];g[I>>2]=+g[ma>>2]*.9510565400123596+ +g[H>>2]*.5877852439880371;g[nb>>2]=+g[ma>>2]*.5877852439880371-+g[H>>2]*.9510565400123596;g[N>>2]=+g[Jh>>2]-+g[Sh>>2];g[O>>2]=+g[ch>>2]-+g[lh>>2];g[P>>2]=+g[N>>2]*.9510565400123596+ +g[O>>2]*.5877852439880371;g[sb>>2]=+g[N>>2]*.5877852439880371-+g[O>>2]*.9510565400123596;g[c[k>>2]>>2]=+g[ah>>2]+ +g[nh>>2];g[c[l>>2]>>2]=+g[Aa>>2]+ +g[Ba>>2];g[J>>2]=+g[v>>2]-+g[I>>2];g[Ea>>2]=+g[P>>2]+ +g[Da>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[s>>2]*+g[J>>2]-+g[M>>2]*+g[Ea>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[M>>2]*+g[J>>2]+ +g[s>>2]*+g[Ea>>2];g[wb>>2]=+g[mb>>2]+ +g[nb>>2];g[yb>>2]=+g[tb>>2]-+g[sb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[vb>>2]*+g[wb>>2]-+g[xb>>2]*+g[yb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[xb>>2]*+g[wb>>2]+ +g[vb>>2]*+g[yb>>2];g[ob>>2]=+g[mb>>2]-+g[nb>>2];g[ub>>2]=+g[sb>>2]+ +g[tb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[lb>>2]*+g[ob>>2]-+g[rb>>2]*+g[ub>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[rb>>2]*+g[ob>>2]+ +g[lb>>2]*+g[ub>>2];g[Ia>>2]=+g[v>>2]+ +g[I>>2];g[ib>>2]=+g[Da>>2]-+g[P>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[Ha>>2]*+g[Ia>>2]-+g[Ja>>2]*+g[ib>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[Ja>>2]*+g[Ia>>2]+ +g[Ha>>2]*+g[ib>>2];g[ug>>2]=+g[sg>>2]+ +g[tg>>2];g[xg>>2]=+g[vg>>2]-+g[wg>>2];g[yg>>2]=+g[ug>>2]*.9510565400123596+ +g[xg>>2]*.5877852439880371;g[Pg>>2]=+g[ug>>2]*.5877852439880371-+g[xg>>2]*.9510565400123596;g[Ag>>2]=+g[Df>>2]-+g[Gf>>2];g[Bg>>2]=+g[Nf>>2]-+g[Kf>>2];g[Cg>>2]=+g[Ag>>2]*.9510565400123596+ +g[Bg>>2]*.5877852439880371;g[Sg>>2]=+g[Ag>>2]*.5877852439880371-+g[Bg>>2]*.9510565400123596;g[Af>>2]=+g[He>>2]+ +g[Ie>>2];g[Hf>>2]=+g[Df>>2]+ +g[Gf>>2];g[Of>>2]=+g[Kf>>2]+ +g[Nf>>2];g[Pf>>2]=+g[Hf>>2]-+g[Of>>2];g[Qf>>2]=+g[Af>>2]-+g[Pf>>2]*.25;g[Rf>>2]=(+g[Hf>>2]+ +g[Of>>2])*.55901700258255;g[Gg>>2]=+g[ag>>2]-+g[$f>>2];g[Dg>>2]=+g[sg>>2]-+g[tg>>2];g[Eg>>2]=+g[vg>>2]+ +g[wg>>2];g[Hg>>2]=+g[Dg>>2]+ +g[Eg>>2];g[Fg>>2]=(+g[Dg>>2]-+g[Eg>>2])*.55901700258255;g[Ig>>2]=+g[Gg>>2]-+g[Hg>>2]*.25;g[Zg>>2]=+g[Af>>2]+ +g[Pf>>2];g[_g>>2]=+g[Gg>>2]+ +g[Hg>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[uh>>2]*+g[Zg>>2]-+g[zh>>2]*+g[_g>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[zh>>2]*+g[Zg>>2]+ +g[uh>>2]*+g[_g>>2];g[Og>>2]=+g[Qf>>2]-+g[Rf>>2];g[Qg>>2]=+g[Og>>2]-+g[Pg>>2];g[Wg>>2]=+g[Og>>2]+ +g[Pg>>2];g[Tg>>2]=+g[Ig>>2]-+g[Fg>>2];g[Ug>>2]=+g[Sg>>2]+ +g[Tg>>2];g[Yg>>2]=+g[Tg>>2]-+g[Sg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Ng>>2]*+g[Qg>>2]-+g[Rg>>2]*+g[Ug>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Rg>>2]*+g[Qg>>2]+ +g[Ng>>2]*+g[Ug>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Vg>>2]*+g[Wg>>2]-+g[Xg>>2]*+g[Yg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Xg>>2]*+g[Wg>>2]+ +g[Vg>>2]*+g[Yg>>2];g[Sf>>2]=+g[Qf>>2]+ +g[Rf>>2];g[zg>>2]=+g[Sf>>2]-+g[yg>>2];g[Lg>>2]=+g[Sf>>2]+ +g[yg>>2];g[Jg>>2]=+g[Fg>>2]+ +g[Ig>>2];g[Kg>>2]=+g[Cg>>2]+ +g[Jg>>2];g[Mg>>2]=+g[Jg>>2]-+g[Cg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[vh>>2]*+g[zg>>2]-+g[Ah>>2]*+g[Kg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Ah>>2]*+g[zg>>2]+ +g[vh>>2]*+g[Kg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Fa>>2]*+g[Lg>>2]-+g[Ga>>2]*+g[Mg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Ga>>2]*+g[Lg>>2]+ +g[Fa>>2]*+g[Mg>>2];g[ac>>2]=+g[_b>>2]-+g[$b>>2];g[dc>>2]=+g[bc>>2]-+g[cc>>2];g[ec>>2]=+g[ac>>2]*.9510565400123596+ +g[dc>>2]*.5877852439880371;g[Hd>>2]=+g[ac>>2]*.5877852439880371-+g[dc>>2]*.9510565400123596;g[jc>>2]=+g[Ua>>2]+ +g[hb>>2];g[kc>>2]=+g[wc>>2]-+g[Lc>>2];g[lc>>2]=+g[jc>>2]*.9510565400123596+ +g[kc>>2]*.5877852439880371;g[Md>>2]=+g[jc>>2]*.5877852439880371-+g[kc>>2]*.9510565400123596;g[Wb>>2]=+g[Qc>>2]-+g[Vb>>2];g[Jb>>2]=+g[Ua>>2]-+g[hb>>2];g[Mc>>2]=+g[wc>>2]+ +g[Lc>>2];g[Xb>>2]=+g[Jb>>2]+ +g[Mc>>2];g[Nc>>2]=(+g[Jb>>2]-+g[Mc>>2])*.55901700258255;g[Yb>>2]=+g[Wb>>2]-+g[Xb>>2]*.25;g[Wc>>2]=+g[Sc>>2]+ +g[Vc>>2];g[mc>>2]=+g[_b>>2]+ +g[$b>>2];g[nc>>2]=+g[bc>>2]+ +g[cc>>2];g[Xc>>2]=+g[mc>>2]+ +g[nc>>2];g[oc>>2]=(+g[mc>>2]-+g[nc>>2])*.55901700258255;g[Yc>>2]=+g[Wc>>2]-+g[Xc>>2]*.25;g[Td>>2]=+g[Wb>>2]+ +g[Xb>>2];g[Ud>>2]=+g[Wc>>2]+ +g[Xc>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[zb>>2]*+g[Td>>2]-+g[Bb>>2]*+g[Ud>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Bb>>2]*+g[Td>>2]+ +g[zb>>2]*+g[Ud>>2];g[Gd>>2]=+g[Yb>>2]-+g[Nc>>2];g[Id>>2]=+g[Gd>>2]-+g[Hd>>2];g[Qd>>2]=+g[Gd>>2]+ +g[Hd>>2];g[Nd>>2]=+g[Yc>>2]-+g[oc>>2];g[Od>>2]=+g[Md>>2]+ +g[Nd>>2];g[Sd>>2]=+g[Nd>>2]-+g[Md>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Fd>>2]*+g[Id>>2]-+g[Ld>>2]*+g[Od>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Ld>>2]*+g[Id>>2]+ +g[Fd>>2]*+g[Od>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[Pd>>2]*+g[Qd>>2]-+g[Rd>>2]*+g[Sd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[Rd>>2]*+g[Qd>>2]+ +g[Pd>>2]*+g[Sd>>2];g[Zb>>2]=+g[Nc>>2]+ +g[Yb>>2];g[fc>>2]=+g[Zb>>2]-+g[ec>>2];g[Ad>>2]=+g[Zb>>2]+ +g[ec>>2];g[Zc>>2]=+g[oc>>2]+ +g[Yc>>2];g[_c>>2]=+g[lc>>2]+ +g[Zc>>2];g[Cd>>2]=+g[Zc>>2]-+g[lc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Db>>2]*+g[fc>>2]-+g[ic>>2]*+g[_c>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[ic>>2]*+g[fc>>2]+ +g[Db>>2]*+g[_c>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[$c>>2]*+g[Ad>>2]-+g[Bd>>2]*+g[Cd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[Bd>>2]*+g[Ad>>2]+ +g[$c>>2]*+g[Cd>>2];g[rd>>2]=+g[pd>>2]-+g[qd>>2];g[ud>>2]=+g[sd>>2]-+g[td>>2];g[vd>>2]=+g[rd>>2]*.9510565400123596+ +g[ud>>2]*.5877852439880371;g[Oe>>2]=+g[rd>>2]*.5877852439880371-+g[ud>>2]*.9510565400123596;g[yd>>2]=+g[Yd>>2]+ +g[bd>>2];g[zd>>2]=+g[fd>>2]+ +g[id>>2];g[$d>>2]=+g[yd>>2]*.9510565400123596+ +g[zd>>2]*.5877852439880371;g[Re>>2]=+g[yd>>2]*.5877852439880371-+g[zd>>2]*.9510565400123596;g[ld>>2]=+g[Qc>>2]+ +g[Vb>>2];g[cd>>2]=+g[Yd>>2]-+g[bd>>2];g[jd>>2]=+g[fd>>2]-+g[id>>2];g[md>>2]=+g[cd>>2]+ +g[jd>>2];g[kd>>2]=(+g[cd>>2]-+g[jd>>2])*.55901700258255;g[nd>>2]=+g[ld>>2]-+g[md>>2]*.25;g[de>>2]=+g[Vc>>2]-+g[Sc>>2];g[ae>>2]=+g[pd>>2]+ +g[qd>>2];g[be>>2]=+g[sd>>2]+ +g[td>>2];g[ee>>2]=+g[ae>>2]+ +g[be>>2];g[ce>>2]=(+g[ae>>2]-+g[be>>2])*.55901700258255;g[fe>>2]=+g[de>>2]-+g[ee>>2]*.25;g[_e>>2]=+g[ld>>2]+ +g[md>>2];g[$e>>2]=+g[de>>2]+ +g[ee>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ph>>2]*+g[_e>>2]-+g[sh>>2]*+g[$e>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[sh>>2]*+g[_e>>2]+ +g[ph>>2]*+g[$e>>2];g[Ne>>2]=+g[nd>>2]-+g[kd>>2];g[Pe>>2]=+g[Ne>>2]-+g[Oe>>2];g[Xe>>2]=+g[Ne>>2]+ +g[Oe>>2];g[Se>>2]=+g[fe>>2]-+g[ce>>2];g[Te>>2]=+g[Re>>2]+ +g[Se>>2];g[Ze>>2]=+g[Se>>2]-+g[Re>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Me>>2]*+g[Pe>>2]-+g[Qe>>2]*+g[Te>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Qe>>2]*+g[Pe>>2]+ +g[Me>>2]*+g[Te>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[We>>2]*+g[Xe>>2]-+g[Ye>>2]*+g[Ze>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[Ye>>2]*+g[Xe>>2]+ +g[We>>2]*+g[Ze>>2];g[od>>2]=+g[kd>>2]+ +g[nd>>2];g[wd>>2]=+g[od>>2]-+g[vd>>2];g[Je>>2]=+g[od>>2]+ +g[vd>>2];g[ge>>2]=+g[ce>>2]+ +g[fe>>2];g[he>>2]=+g[$d>>2]+ +g[ge>>2];g[Le>>2]=+g[ge>>2]-+g[$d>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Vd>>2]*+g[wd>>2]-+g[xd>>2]*+g[he>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[xd>>2]*+g[wd>>2]+ +g[Vd>>2]*+g[he>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[ie>>2]*+g[Je>>2]-+g[Ke>>2]*+g[Le>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[Ke>>2]*+g[Je>>2]+ +g[ie>>2]*+g[Le>>2];g[pf>>2]=+g[nf>>2]-+g[of>>2];g[sf>>2]=+g[qf>>2]-+g[rf>>2];g[Tf>>2]=+g[pf>>2]*.9510565400123596+ +g[sf>>2]*.5877852439880371;g[mg>>2]=+g[pf>>2]*.5877852439880371-+g[sf>>2]*.9510565400123596;g[Vf>>2]=+g[gf>>2]-+g[pe>>2];g[Wf>>2]=+g[xe>>2]-+g[Ee>>2];g[Xf>>2]=+g[Vf>>2]*.9510565400123596+ +g[Wf>>2]*.5877852439880371;g[pg>>2]=+g[Vf>>2]*.5877852439880371-+g[Wf>>2]*.9510565400123596;g[jf>>2]=+g[He>>2]-+g[Ie>>2];g[qe>>2]=+g[gf>>2]+ +g[pe>>2];g[Fe>>2]=+g[xe>>2]+ +g[Ee>>2];g[kf>>2]=+g[qe>>2]+ +g[Fe>>2];g[Ge>>2]=(+g[qe>>2]-+g[Fe>>2])*.55901700258255;g[lf>>2]=+g[jf>>2]-+g[kf>>2]*.25;g[bg>>2]=+g[$f>>2]+ +g[ag>>2];g[Yf>>2]=+g[nf>>2]+ +g[of>>2];g[Zf>>2]=+g[qf>>2]+ +g[rf>>2];g[cg>>2]=+g[Yf>>2]+ +g[Zf>>2];g[_f>>2]=(+g[Yf>>2]-+g[Zf>>2])*.55901700258255;g[dg>>2]=+g[bg>>2]-+g[cg>>2]*.25;g[yf>>2]=+g[jf>>2]+ +g[kf>>2];g[zf>>2]=+g[bg>>2]+ +g[cg>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[oh>>2]*+g[yf>>2]-+g[rh>>2]*+g[zf>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[rh>>2]*+g[yf>>2]+ +g[oh>>2]*+g[zf>>2];g[lg>>2]=+g[lf>>2]-+g[Ge>>2];g[ng>>2]=+g[lg>>2]-+g[mg>>2];g[vf>>2]=+g[lg>>2]+ +g[mg>>2];g[qg>>2]=+g[dg>>2]-+g[_f>>2];g[tf>>2]=+g[pg>>2]+ +g[qg>>2];g[xf>>2]=+g[qg>>2]-+g[pg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[kg>>2]*+g[ng>>2]-+g[og>>2]*+g[tf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[og>>2]*+g[ng>>2]+ +g[kg>>2]*+g[tf>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[uf>>2]*+g[vf>>2]-+g[wf>>2]*+g[xf>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[wf>>2]*+g[vf>>2]+ +g[uf>>2]*+g[xf>>2];g[mf>>2]=+g[Ge>>2]+ +g[lf>>2];g[Uf>>2]=+g[mf>>2]-+g[Tf>>2];g[hg>>2]=+g[mf>>2]+ +g[Tf>>2];g[eg>>2]=+g[_f>>2]+ +g[dg>>2];g[fg>>2]=+g[Xf>>2]+ +g[eg>>2];g[jg>>2]=+g[eg>>2]-+g[Xf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Ue>>2]*+g[Uf>>2]-+g[Ve>>2]*+g[fg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Ve>>2]*+g[Uf>>2]+ +g[Ue>>2]*+g[fg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[gg>>2]*+g[hg>>2]-+g[ig>>2]*+g[jg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[ig>>2]*+g[hg>>2]+ +g[gg>>2]*+g[jg>>2];c[$h>>2]=(c[$h>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+32;c[n>>2]=c[n>>2]^c[2998]}i=ai;return}function Dt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,52,7528);i=b;return}function Et(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0,Ii=0,Ji=0,Ki=0,Li=0,Mi=0,Ni=0,Oi=0,Pi=0,Qi=0,Ri=0,Si=0,Ti=0,Ui=0,Vi=0,Wi=0,Xi=0,Yi=0,Zi=0,_i=0,$i=0,aj=0,bj=0,cj=0,dj=0,ej=0,fj=0,gj=0,hj=0,ij=0,jj=0,kj=0,lj=0;kj=i;i=i+2192|0;k=kj+2184|0;l=kj+2180|0;m=kj+2176|0;n=kj+2172|0;lj=kj+2168|0;o=kj+2164|0;p=kj+2160|0;jj=kj+2128|0;s=kj+2124|0;v=kj+2120|0;t=kj+2116|0;w=kj+2112|0;y=kj+2108|0;Hb=kj+2104|0;La=kj+2100|0;ba=kj+2096|0;z=kj+2092|0;ca=kj+2088|0;fa=kj+2084|0;ja=kj+2080|0;Pa=kj+2076|0;Va=kj+2072|0;Kc=kj+2068|0;hd=kj+2064|0;yc=kj+2060|0;mc=kj+2056|0;sc=kj+2052|0;Vd=kj+2048|0;td=kj+2044|0;xd=kj+2040|0;kg=kj+2036|0;tg=kj+2032|0;qg=kj+2028|0;xg=kj+2024|0;Ch=kj+2020|0;hi=kj+2016|0;Rh=kj+2012|0;ji=kj+2008|0;$a=kj+2004|0;db=kj+2e3|0;$h=kj+1996|0;di=kj+1992|0;Rd=kj+1988|0;Td=kj+1984|0;ce=kj+1980|0;sf=kj+1976|0;Ec=kj+1972|0;Ic=kj+1968|0;Rg=kj+1964|0;Vg=kj+1960|0;pd=kj+1956|0;rd=kj+1952|0;yf=kj+1948|0;Kf=kj+1944|0;ea=kj+1940|0;fb=kj+1936|0;ia=kj+1932|0;gb=kj+1928|0;ka=kj+1924|0;Kb=kj+1920|0;lb=kj+1916|0;hb=kj+1912|0;$c=kj+1908|0;Hd=kj+1904|0;Cd=kj+1900|0;Id=kj+1896|0;Dd=kj+1892|0;Nd=kj+1888|0;Fd=kj+1884|0;Jd=kj+1880|0;Za=kj+1876|0;Hc=kj+1872|0;cb=kj+1868|0;Cc=kj+1864|0;_a=kj+1860|0;Gc=kj+1856|0;bb=kj+1852|0;Dc=kj+1848|0;Na=kj+1844|0;xc=kj+1840|0;Ua=kj+1836|0;Sb=kj+1832|0;Oa=kj+1828|0;wc=kj+1824|0;Ta=kj+1820|0;rc=kj+1816|0;u=kj+1812|0;aa=kj+1808|0;x=kj+1804|0;$=kj+1800|0;ig=kj+1796|0;jg=kj+1792|0;og=kj+1788|0;pg=kj+1784|0;$g=kj+1780|0;Bh=kj+1776|0;Ph=kj+1772|0;Qh=kj+1768|0;A=kj+1764|0;da=kj+1760|0;ga=kj+1756|0;ha=kj+1752|0;Zc=kj+1748|0;_c=kj+1744|0;Ad=kj+1740|0;Bd=kj+1736|0;fe=kj+1732|0;bh=kj+1728|0;Eg=kj+1724|0;Vf=kj+1720|0;Qi=kj+1716|0;la=kj+1712|0;Yf=kj+1708|0;ch=kj+1704|0;pc=kj+1700|0;id=kj+1696|0;Bb=kj+1692|0;Lb=kj+1688|0;Me=kj+1684|0;Fg=kj+1680|0;Nc=kj+1676|0;Wd=kj+1672|0;dj=kj+1668|0;mb=kj+1664|0;gh=kj+1660|0;Hg=kj+1656|0;jh=kj+1652|0;Ig=kj+1648|0;C=kj+1644|0;Mb=kj+1640|0;Qc=kj+1636|0;Sc=kj+1632|0;Ue=kj+1628|0;$f=kj+1624|0;$e=kj+1620|0;_f=kj+1616|0;Vb=kj+1612|0;qc=kj+1608|0;vi=kj+1604|0;E=kj+1600|0;vh=kj+1596|0;Hh=kj+1592|0;yh=kj+1588|0;Gh=kj+1584|0;T=kj+1580|0;Ob=kj+1576|0;fc=kj+1572|0;cd=kj+1568|0;oe=kj+1564|0;Cf=kj+1560|0;ve=kj+1556|0;Df=kj+1552|0;ic=kj+1548|0;dd=kj+1544|0;Ki=kj+1540|0;V=kj+1536|0;oh=kj+1532|0;Kh=kj+1528|0;rh=kj+1524|0;Jh=kj+1520|0;Ja=kj+1516|0;Pb=kj+1512|0;_b=kj+1508|0;Zd=kj+1504|0;He=kj+1500|0;Ff=kj+1496|0;of=kj+1492|0;Gf=kj+1488|0;bc=kj+1484|0;ad=kj+1480|0;Ib=kj+1476|0;de=kj+1472|0;wb=kj+1468|0;he=kj+1464|0;zb=kj+1460|0;Ke=kj+1456|0;hf=kj+1452|0;Tf=kj+1448|0;Li=kj+1444|0;ge=kj+1440|0;pb=kj+1436|0;Uf=kj+1432|0;sb=kj+1428|0;ee=kj+1424|0;Oi=kj+1420|0;Je=kj+1416|0;q=kj+1412|0;za=kj+1408|0;ub=kj+1404|0;vb=kj+1400|0;xb=kj+1396|0;yb=kj+1392|0;Rc=kj+1388|0;_d=kj+1384|0;Ah=kj+1380|0;ki=kj+1376|0;nb=kj+1372|0;ob=kj+1368|0;qb=kj+1364|0;rb=kj+1360|0;Mi=kj+1356|0;Ni=kj+1352|0;rg=kj+1348|0;Pi=kj+1344|0;tb=kj+1340|0;Ab=kj+1336|0;Wf=kj+1332|0;Xf=kj+1328|0;nc=kj+1324|0;oc=kj+1320|0;ie=kj+1316|0;Le=kj+1312|0;Lc=kj+1308|0;Mc=kj+1304|0;Ti=kj+1300|0;Ye=kj+1296|0;va=kj+1292|0;We=kj+1288|0;ya=kj+1284|0;Ze=kj+1280|0;Wi=kj+1276|0;Ve=kj+1272|0;_i=kj+1268|0;Re=kj+1264|0;oa=kj+1260|0;Pe=kj+1256|0;ra=kj+1252|0;Se=kj+1248|0;bj=kj+1244|0;Oe=kj+1240|0;Ri=kj+1236|0;Si=kj+1232|0;ta=kj+1228|0;ua=kj+1224|0;wa=kj+1220|0;xa=kj+1216|0;Ui=kj+1212|0;Vi=kj+1208|0;Yi=kj+1204|0;Zi=kj+1200|0;ma=kj+1196|0;na=kj+1192|0;pa=kj+1188|0;qa=kj+1184|0;$i=kj+1180|0;aj=kj+1176|0;Xi=kj+1172|0;cj=kj+1168|0;eh=kj+1164|0;fh=kj+1160|0;hh=kj+1156|0;ih=kj+1152|0;sa=kj+1148|0;B=kj+1144|0;Oc=kj+1140|0;Pc=kj+1136|0;Qe=kj+1132|0;Te=kj+1128|0;Xe=kj+1124|0;_e=kj+1120|0;Tb=kj+1116|0;Ub=kj+1112|0;hj=kj+1108|0;cf=kj+1104|0;mi=kj+1100|0;pe=kj+1096|0;K=kj+1092|0;df=kj+1088|0;H=kj+1084|0;qe=kj+1080|0;ti=kj+1076|0;te=kj+1072|0;R=kj+1068|0;me=kj+1064|0;qi=kj+1060|0;se=kj+1056|0;O=kj+1052|0;je=kj+1048|0;fj=kj+1044|0;gj=kj+1040|0;F=kj+1036|0;G=kj+1032|0;ij=kj+1028|0;li=kj+1024|0;I=kj+1020|0;J=kj+1016|0;ri=kj+1012|0;si=kj+1008|0;ke=kj+1004|0;P=kj+1e3|0;Q=kj+996|0;le=kj+992|0;oi=kj+988|0;pi=kj+984|0;ff=kj+980|0;M=kj+976|0;N=kj+972|0;gf=kj+968|0;ni=kj+964|0;ui=kj+960|0;th=kj+956|0;uh=kj+952|0;wh=kj+948|0;xh=kj+944|0;L=kj+940|0;S=kj+936|0;dc=kj+932|0;ec=kj+928|0;ef=kj+924|0;ne=kj+920|0;re=kj+916|0;ue=kj+912|0;gc=kj+908|0;hc=kj+904|0;yi=kj+900|0;xe=kj+896|0;Bi=kj+892|0;Ie=kj+888|0;Aa=kj+884|0;ye=kj+880|0;Y=kj+876|0;jf=kj+872|0;Ii=kj+868|0;mf=kj+864|0;Ha=kj+860|0;Fe=kj+856|0;Fi=kj+852|0;lf=kj+848|0;Ea=kj+844|0;Ce=kj+840|0;wi=kj+836|0;xi=kj+832|0;W=kj+828|0;X=kj+824|0;zi=kj+820|0;Ai=kj+816|0;Z=kj+812|0;_=kj+808|0;Gi=kj+804|0;Hi=kj+800|0;De=kj+796|0;Fa=kj+792|0;Ga=kj+788|0;Ee=kj+784|0;Di=kj+780|0;Ei=kj+776|0;Ae=kj+772|0;Ca=kj+768|0;Da=kj+764|0;Be=kj+760|0;Ci=kj+756|0;Ji=kj+752|0;mh=kj+748|0;nh=kj+744|0;ph=kj+740|0;qh=kj+736|0;Ba=kj+732|0;Ia=kj+728|0;Yb=kj+724|0;Zb=kj+720|0;ze=kj+716|0;Ge=kj+712|0;kf=kj+708|0;nf=kj+704|0;$b=kj+700|0;ac=kj+696|0;ej=kj+692|0;r=kj+688|0;Jb=kj+684|0;Nb=kj+680|0;Qb=kj+676|0;Rb=kj+672|0;vc=kj+668|0;Fc=kj+664|0;Bc=kj+660|0;Jc=kj+656|0;tc=kj+652|0;uc=kj+648|0;zc=kj+644|0;Ac=kj+640|0;D=kj+636|0;Cb=kj+632|0;Wa=kj+628|0;Qa=kj+624|0;Fb=kj+620|0;Ra=kj+616|0;jb=kj+612|0;Xa=kj+608|0;Db=kj+604|0;Eb=kj+600|0;U=kj+596|0;ib=kj+592|0;kb=kj+588|0;Gb=kj+584|0;ab=kj+580|0;eb=kj+576|0;Ka=kj+572|0;Ma=kj+568|0;Sa=kj+564|0;Ya=kj+560|0;Yd=kj+556|0;ud=kj+552|0;kd=kj+548|0;yd=kj+544|0;fd=kj+540|0;zd=kj+536|0;nd=kj+532|0;vd=kj+528|0;Xd=kj+524|0;jd=kj+520|0;bd=kj+516|0;ed=kj+512|0;ld=kj+508|0;md=kj+504|0;gd=kj+500|0;od=kj+496|0;ae=kj+492|0;be=kj+488|0;qd=kj+484|0;sd=kj+480|0;wd=kj+476|0;$d=kj+472|0;Xb=kj+468|0;Kd=kj+464|0;Uc=kj+460|0;Od=kj+456|0;kc=kj+452|0;Pd=kj+448|0;Xc=kj+444|0;Ld=kj+440|0;Wb=kj+436|0;Tc=kj+432|0;cc=kj+428|0;jc=kj+424|0;Vc=kj+420|0;Wc=kj+416|0;lc=kj+412|0;Yc=kj+408|0;Sd=kj+404|0;Ud=kj+400|0;Ed=kj+396|0;Gd=kj+392|0;Md=kj+388|0;Qd=kj+384|0;Bf=kj+380|0;ug=kj+376|0;Qf=kj+372|0;vg=kj+368|0;If=kj+364|0;zg=kj+360|0;Nf=kj+356|0;yg=kj+352|0;zf=kj+348|0;Af=kj+344|0;Of=kj+340|0;Pf=kj+336|0;Ef=kj+332|0;Hf=kj+328|0;Lf=kj+324|0;Mf=kj+320|0;Jf=kj+316|0;Rf=kj+312|0;Bg=kj+308|0;ah=kj+304|0;Sf=kj+300|0;sg=kj+296|0;wg=kj+292|0;Ag=kj+288|0;Fh=kj+284|0;ai=kj+280|0;Xh=kj+276|0;bi=kj+272|0;Nh=kj+268|0;fi=kj+264|0;Uh=kj+260|0;ei=kj+256|0;Dh=kj+252|0;Eh=kj+248|0;Vh=kj+244|0;Wh=kj+240|0;Ih=kj+236|0;Mh=kj+232|0;Sh=kj+228|0;Th=kj+224|0;Oh=kj+220|0;Yh=kj+216|0;ii=kj+212|0;Lh=kj+208|0;Zh=kj+204|0;_h=kj+200|0;ci=kj+196|0;gi=kj+192|0;bf=kj+188|0;lg=kj+184|0;eg=kj+180|0;mg=kj+176|0;qf=kj+172|0;uf=kj+168|0;bg=kj+164|0;tf=kj+160|0;Ne=kj+156|0;af=kj+152|0;cg=kj+148|0;dg=kj+144|0;we=kj+140|0;pf=kj+136|0;Zf=kj+132|0;ag=kj+128|0;rf=kj+124|0;fg=kj+120|0;wf=kj+116|0;xf=kj+112|0;gg=kj+108|0;hg=kj+104|0;ng=kj+100|0;vf=kj+96|0;lh=kj+92|0;Sg=kj+88|0;Ng=kj+84|0;Tg=kj+80|0;Cg=kj+76|0;Xg=kj+72|0;Kg=kj+68|0;Wg=kj+64|0;dh=kj+60|0;kh=kj+56|0;Lg=kj+52|0;Mg=kj+48|0;sh=kj+44|0;zh=kj+40|0;Gg=kj+36|0;Jg=kj+32|0;Dg=kj+28|0;Og=kj+24|0;Zg=kj+20|0;_g=kj+16|0;Pg=kj+12|0;Qg=kj+8|0;Ug=kj+4|0;Yg=kj;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[lj>>2]=f;c[o>>2]=h;c[p>>2]=j;g[kj+2156>>2]=.5555702447891235;g[kj+2152>>2]=.8314695954322815;g[kj+2148>>2]=.9807852506637573;g[kj+2144>>2]=.19509032368659973;g[kj+2140>>2]=.9238795042037964;g[kj+2136>>2]=.3826834261417389;g[kj+2132>>2]=.7071067690849304;c[jj>>2]=c[lj>>2];c[m>>2]=(c[m>>2]|0)+((c[lj>>2]|0)-1<<3<<2);while(1){if((c[jj>>2]|0)>=(c[o>>2]|0))break;g[s>>2]=+g[c[m>>2]>>2];g[v>>2]=+g[(c[m>>2]|0)+4>>2];g[t>>2]=+g[(c[m>>2]|0)+8>>2];g[w>>2]=+g[(c[m>>2]|0)+12>>2];g[u>>2]=+g[s>>2]*+g[t>>2];g[aa>>2]=+g[v>>2]*+g[t>>2];g[x>>2]=+g[v>>2]*+g[w>>2];g[$>>2]=+g[s>>2]*+g[w>>2];g[y>>2]=+g[u>>2]+ +g[x>>2];g[Hb>>2]=+g[u>>2]-+g[x>>2];g[La>>2]=+g[$>>2]+ +g[aa>>2];g[ba>>2]=+g[$>>2]-+g[aa>>2];g[z>>2]=+g[(c[m>>2]|0)+16>>2];g[Za>>2]=+g[t>>2]*+g[z>>2];g[Hc>>2]=+g[v>>2]*+g[z>>2];g[cb>>2]=+g[w>>2]*+g[z>>2];g[Cc>>2]=+g[s>>2]*+g[z>>2];g[ca>>2]=+g[(c[m>>2]|0)+20>>2];g[_a>>2]=+g[w>>2]*+g[ca>>2];g[Gc>>2]=+g[s>>2]*+g[ca>>2];g[bb>>2]=+g[t>>2]*+g[ca>>2];g[Dc>>2]=+g[v>>2]*+g[ca>>2];g[fa>>2]=+g[(c[m>>2]|0)+24>>2];g[Na>>2]=+g[s>>2]*+g[fa>>2];g[xc>>2]=+g[w>>2]*+g[fa>>2];g[Ua>>2]=+g[v>>2]*+g[fa>>2];g[Sb>>2]=+g[t>>2]*+g[fa>>2];g[ja>>2]=+g[(c[m>>2]|0)+28>>2];g[Oa>>2]=+g[v>>2]*+g[ja>>2];g[wc>>2]=+g[t>>2]*+g[ja>>2];g[Ta>>2]=+g[s>>2]*+g[ja>>2];g[rc>>2]=+g[w>>2]*+g[ja>>2];g[Pa>>2]=+g[Na>>2]-+g[Oa>>2];g[Va>>2]=+g[Ta>>2]+ +g[Ua>>2];g[Kc>>2]=+g[Sb>>2]-+g[rc>>2];g[hd>>2]=+g[Ta>>2]-+g[Ua>>2];g[yc>>2]=+g[wc>>2]-+g[xc>>2];g[mc>>2]=+g[wc>>2]+ +g[xc>>2];g[sc>>2]=+g[Sb>>2]+ +g[rc>>2];g[Vd>>2]=+g[Na>>2]+ +g[Oa>>2];g[td>>2]=+g[z>>2]*+g[fa>>2]+ +g[ca>>2]*+g[ja>>2];g[xd>>2]=+g[z>>2]*+g[ja>>2]-+g[ca>>2]*+g[fa>>2];g[ig>>2]=+g[y>>2]*+g[fa>>2];g[jg>>2]=+g[ba>>2]*+g[ja>>2];g[kg>>2]=+g[ig>>2]-+g[jg>>2];g[tg>>2]=+g[ig>>2]+ +g[jg>>2];g[og>>2]=+g[y>>2]*+g[ja>>2];g[pg>>2]=+g[ba>>2]*+g[fa>>2];g[qg>>2]=+g[og>>2]+ +g[pg>>2];g[xg>>2]=+g[og>>2]-+g[pg>>2];g[$g>>2]=+g[Hb>>2]*+g[fa>>2];g[Bh>>2]=+g[La>>2]*+g[ja>>2];g[Ch>>2]=+g[$g>>2]+ +g[Bh>>2];g[hi>>2]=+g[$g>>2]-+g[Bh>>2];g[Ph>>2]=+g[Hb>>2]*+g[ja>>2];g[Qh>>2]=+g[La>>2]*+g[fa>>2];g[Rh>>2]=+g[Ph>>2]-+g[Qh>>2];g[ji>>2]=+g[Ph>>2]+ +g[Qh>>2];g[$a>>2]=+g[Za>>2]-+g[_a>>2];g[db>>2]=+g[bb>>2]+ +g[cb>>2];g[$h>>2]=+g[$a>>2]*+g[fa>>2]+ +g[db>>2]*+g[ja>>2];g[di>>2]=+g[$a>>2]*+g[ja>>2]-+g[db>>2]*+g[fa>>2];g[Rd>>2]=+g[Za>>2]+ +g[_a>>2];g[Td>>2]=+g[bb>>2]-+g[cb>>2];g[ce>>2]=+g[Rd>>2]*+g[fa>>2]+ +g[Td>>2]*+g[ja>>2];g[sf>>2]=+g[Rd>>2]*+g[ja>>2]-+g[Td>>2]*+g[fa>>2];g[Ec>>2]=+g[Cc>>2]+ +g[Dc>>2];g[Ic>>2]=+g[Gc>>2]-+g[Hc>>2];g[Rg>>2]=+g[Ec>>2]*+g[fa>>2]+ +g[Ic>>2]*+g[ja>>2];g[Vg>>2]=+g[Ec>>2]*+g[ja>>2]-+g[Ic>>2]*+g[fa>>2];g[pd>>2]=+g[Cc>>2]-+g[Dc>>2];g[rd>>2]=+g[Gc>>2]+ +g[Hc>>2];g[yf>>2]=+g[pd>>2]*+g[fa>>2]+ +g[rd>>2]*+g[ja>>2];g[Kf>>2]=+g[pd>>2]*+g[ja>>2]-+g[rd>>2]*+g[fa>>2];g[A>>2]=+g[y>>2]*+g[z>>2];g[da>>2]=+g[ba>>2]*+g[ca>>2];g[ea>>2]=+g[A>>2]+ +g[da>>2];g[fb>>2]=+g[A>>2]-+g[da>>2];g[ga>>2]=+g[y>>2]*+g[ca>>2];g[ha>>2]=+g[ba>>2]*+g[z>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2];g[gb>>2]=+g[ga>>2]+ +g[ha>>2];g[ka>>2]=+g[ea>>2]*+g[fa>>2]+ +g[ia>>2]*+g[ja>>2];g[Kb>>2]=+g[fb>>2]*+g[ja>>2]-+g[gb>>2]*+g[fa>>2];g[lb>>2]=+g[ea>>2]*+g[ja>>2]-+g[ia>>2]*+g[fa>>2];g[hb>>2]=+g[fb>>2]*+g[fa>>2]+ +g[gb>>2]*+g[ja>>2];g[Zc>>2]=+g[Hb>>2]*+g[z>>2];g[_c>>2]=+g[La>>2]*+g[ca>>2];g[$c>>2]=+g[Zc>>2]-+g[_c>>2];g[Hd>>2]=+g[Zc>>2]+ +g[_c>>2];g[Ad>>2]=+g[Hb>>2]*+g[ca>>2];g[Bd>>2]=+g[La>>2]*+g[z>>2];g[Cd>>2]=+g[Ad>>2]+ +g[Bd>>2];g[Id>>2]=+g[Ad>>2]-+g[Bd>>2];g[Dd>>2]=+g[$c>>2]*+g[fa>>2]+ +g[Cd>>2]*+g[ja>>2];g[Nd>>2]=+g[Hd>>2]*+g[ja>>2]-+g[Id>>2]*+g[fa>>2];g[Fd>>2]=+g[$c>>2]*+g[ja>>2]-+g[Cd>>2]*+g[fa>>2];g[Jd>>2]=+g[Hd>>2]*+g[fa>>2]+ +g[Id>>2]*+g[ja>>2];g[q>>2]=+g[c[k>>2]>>2];g[za>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[Ib>>2]=+g[q>>2]+ +g[za>>2];g[de>>2]=+g[q>>2]-+g[za>>2];g[ub>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[vb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[wb>>2]=+g[ub>>2]-+g[vb>>2];g[he>>2]=+g[ub>>2]+ +g[vb>>2];g[xb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[yb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[zb>>2]=+g[xb>>2]-+g[yb>>2];g[Ke>>2]=+g[xb>>2]+ +g[yb>>2];g[Rc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[_d>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[hf>>2]=+g[Rc>>2]+ +g[_d>>2];g[Tf>>2]=+g[Rc>>2]-+g[_d>>2];g[Ah>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ki>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Li>>2]=+g[Ah>>2]+ +g[ki>>2];g[ge>>2]=+g[Ah>>2]-+g[ki>>2];g[nb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[ob>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[pb>>2]=+g[nb>>2]-+g[ob>>2];g[Uf>>2]=+g[nb>>2]+ +g[ob>>2];g[qb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[rb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[sb>>2]=+g[qb>>2]-+g[rb>>2];g[ee>>2]=+g[qb>>2]+ +g[rb>>2];g[Mi>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Ni>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Oi>>2]=+g[Mi>>2]+ +g[Ni>>2];g[Je>>2]=+g[Mi>>2]-+g[Ni>>2];g[fe>>2]=+g[de>>2]-+g[ee>>2];g[bh>>2]=+g[de>>2]+ +g[ee>>2];g[Eg>>2]=+g[Uf>>2]-+g[Tf>>2];g[Vf>>2]=+g[Tf>>2]+ +g[Uf>>2];g[rg>>2]=+g[Ib>>2]+ +g[hf>>2];g[Pi>>2]=+g[Li>>2]+ +g[Oi>>2];g[Qi>>2]=+g[rg>>2]+ +g[Pi>>2];g[la>>2]=+g[rg>>2]-+g[Pi>>2];g[Wf>>2]=+g[ge>>2]+ +g[he>>2];g[Xf>>2]=+g[Je>>2]+ +g[Ke>>2];g[Yf>>2]=(+g[Wf>>2]-+g[Xf>>2])*.7071067690849304;g[ch>>2]=(+g[Wf>>2]+ +g[Xf>>2])*.7071067690849304;g[nc>>2]=+g[pb>>2]-+g[sb>>2];g[oc>>2]=+g[Li>>2]-+g[Oi>>2];g[pc>>2]=+g[nc>>2]-+g[oc>>2];g[id>>2]=+g[oc>>2]+ +g[nc>>2];g[tb>>2]=+g[pb>>2]+ +g[sb>>2];g[Ab>>2]=+g[wb>>2]+ +g[zb>>2];g[Bb>>2]=+g[tb>>2]-+g[Ab>>2];g[Lb>>2]=+g[tb>>2]+ +g[Ab>>2];g[ie>>2]=+g[ge>>2]-+g[he>>2];g[Le>>2]=+g[Je>>2]-+g[Ke>>2];g[Me>>2]=(+g[ie>>2]+ +g[Le>>2])*.7071067690849304;g[Fg>>2]=(+g[ie>>2]-+g[Le>>2])*.7071067690849304;g[Lc>>2]=+g[Ib>>2]-+g[hf>>2];g[Mc>>2]=+g[zb>>2]-+g[wb>>2];g[Nc>>2]=+g[Lc>>2]-+g[Mc>>2];g[Wd>>2]=+g[Lc>>2]+ +g[Mc>>2];g[Ri>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Si>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[Ti>>2]=+g[Ri>>2]+ +g[Si>>2];g[Ye>>2]=+g[Ri>>2]-+g[Si>>2];g[ta>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[ua>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[va>>2]=+g[ta>>2]-+g[ua>>2];g[We>>2]=+g[ta>>2]+ +g[ua>>2];g[wa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[xa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[ya>>2]=+g[wa>>2]-+g[xa>>2];g[Ze>>2]=+g[wa>>2]+ +g[xa>>2];g[Ui>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Vi>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Wi>>2]=+g[Ui>>2]+ +g[Vi>>2];g[Ve>>2]=+g[Ui>>2]-+g[Vi>>2];g[Yi>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Zi>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[_i>>2]=+g[Yi>>2]+ +g[Zi>>2];g[Re>>2]=+g[Yi>>2]-+g[Zi>>2];g[ma>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[na>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[oa>>2]=+g[ma>>2]-+g[na>>2];g[Pe>>2]=+g[ma>>2]+ +g[na>>2];g[pa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[qa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[ra>>2]=+g[pa>>2]-+g[qa>>2];g[Se>>2]=+g[pa>>2]+ +g[qa>>2];g[$i>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[aj>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[bj>>2]=+g[$i>>2]+ +g[aj>>2];g[Oe>>2]=+g[$i>>2]-+g[aj>>2];g[Xi>>2]=+g[Ti>>2]+ +g[Wi>>2];g[cj>>2]=+g[_i>>2]+ +g[bj>>2];g[dj>>2]=+g[Xi>>2]+ +g[cj>>2];g[mb>>2]=+g[Xi>>2]-+g[cj>>2];g[eh>>2]=+g[Ye>>2]+ +g[Ze>>2];g[fh>>2]=+g[We>>2]-+g[Ve>>2];g[gh>>2]=+g[eh>>2]*.3826834261417389-+g[fh>>2]*.9238795042037964;g[Hg>>2]=+g[fh>>2]*.3826834261417389+ +g[eh>>2]*.9238795042037964;g[hh>>2]=+g[Re>>2]+ +g[Se>>2];g[ih>>2]=+g[Oe>>2]+ +g[Pe>>2];g[jh>>2]=+g[hh>>2]*.3826834261417389-+g[ih>>2]*.9238795042037964;g[Ig>>2]=+g[ih>>2]*.3826834261417389+ +g[hh>>2]*.9238795042037964;g[sa>>2]=+g[oa>>2]+ +g[ra>>2];g[B>>2]=+g[va>>2]+ +g[ya>>2];g[C>>2]=+g[sa>>2]-+g[B>>2];g[Mb>>2]=+g[B>>2]+ +g[sa>>2];g[Oc>>2]=+g[oa>>2]-+g[ra>>2];g[Pc>>2]=+g[_i>>2]-+g[bj>>2];g[Qc>>2]=+g[Oc>>2]-+g[Pc>>2];g[Sc>>2]=+g[Pc>>2]+ +g[Oc>>2];g[Qe>>2]=+g[Oe>>2]-+g[Pe>>2];g[Te>>2]=+g[Re>>2]-+g[Se>>2];g[Ue>>2]=+g[Qe>>2]*.9238795042037964-+g[Te>>2]*.3826834261417389;g[$f>>2]=+g[Qe>>2]*.3826834261417389+ +g[Te>>2]*.9238795042037964;g[Xe>>2]=+g[Ve>>2]+ +g[We>>2];g[_e>>2]=+g[Ye>>2]-+g[Ze>>2];g[$e>>2]=+g[Xe>>2]*.9238795042037964+ +g[_e>>2]*.3826834261417389;g[_f>>2]=+g[_e>>2]*.9238795042037964-+g[Xe>>2]*.3826834261417389;g[Tb>>2]=+g[Ti>>2]-+g[Wi>>2];g[Ub>>2]=+g[va>>2]-+g[ya>>2];g[Vb>>2]=+g[Tb>>2]+ +g[Ub>>2];g[qc>>2]=+g[Tb>>2]-+g[Ub>>2];g[fj>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[gj>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[hj>>2]=+g[fj>>2]+ +g[gj>>2];g[cf>>2]=+g[fj>>2]-+g[gj>>2];g[ij>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[li>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[mi>>2]=+g[ij>>2]+ +g[li>>2];g[pe>>2]=+g[ij>>2]-+g[li>>2];g[I>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[J>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[K>>2]=+g[I>>2]-+g[J>>2];g[df>>2]=+g[I>>2]+ +g[J>>2];g[F>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[G>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[qe>>2]=+g[F>>2]+ +g[G>>2];g[ri>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[si>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[ke>>2]=+g[ri>>2]-+g[si>>2];g[P>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[Q>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[le>>2]=+g[P>>2]+ +g[Q>>2];g[ti>>2]=+g[ri>>2]+ +g[si>>2];g[te>>2]=+g[ke>>2]+ +g[le>>2];g[R>>2]=+g[P>>2]-+g[Q>>2];g[me>>2]=+g[ke>>2]-+g[le>>2];g[oi>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[pi>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[ff>>2]=+g[oi>>2]-+g[pi>>2];g[M>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[N>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[gf>>2]=+g[M>>2]+ +g[N>>2];g[qi>>2]=+g[oi>>2]+ +g[pi>>2];g[se>>2]=+g[ff>>2]+ +g[gf>>2];g[O>>2]=+g[M>>2]-+g[N>>2];g[je>>2]=+g[ff>>2]-+g[gf>>2];g[ni>>2]=+g[hj>>2]+ +g[mi>>2];g[ui>>2]=+g[qi>>2]+ +g[ti>>2];g[vi>>2]=+g[ni>>2]+ +g[ui>>2];g[E>>2]=+g[ni>>2]-+g[ui>>2];g[th>>2]=+g[qe>>2]-+g[pe>>2];g[uh>>2]=(+g[je>>2]-+g[me>>2])*.7071067690849304;g[vh>>2]=+g[th>>2]+ +g[uh>>2];g[Hh>>2]=+g[th>>2]-+g[uh>>2];g[wh>>2]=+g[cf>>2]+ +g[df>>2];g[xh>>2]=(+g[se>>2]+ +g[te>>2])*.7071067690849304;g[yh>>2]=+g[wh>>2]-+g[xh>>2];g[Gh>>2]=+g[wh>>2]+ +g[xh>>2];g[L>>2]=+g[H>>2]+ +g[K>>2];g[S>>2]=+g[O>>2]+ +g[R>>2];g[T>>2]=+g[L>>2]-+g[S>>2];g[Ob>>2]=+g[L>>2]+ +g[S>>2];g[dc>>2]=+g[H>>2]-+g[K>>2];g[ec>>2]=+g[qi>>2]-+g[ti>>2];g[fc>>2]=+g[dc>>2]-+g[ec>>2];g[cd>>2]=+g[ec>>2]+ +g[dc>>2];g[ef>>2]=+g[cf>>2]-+g[df>>2];g[ne>>2]=(+g[je>>2]+ +g[me>>2])*.7071067690849304;g[oe>>2]=+g[ef>>2]-+g[ne>>2];g[Cf>>2]=+g[ef>>2]+ +g[ne>>2];g[re>>2]=+g[pe>>2]+ +g[qe>>2];g[ue>>2]=(+g[se>>2]-+g[te>>2])*.7071067690849304;g[ve>>2]=+g[re>>2]-+g[ue>>2];g[Df>>2]=+g[re>>2]+ +g[ue>>2];g[gc>>2]=+g[hj>>2]-+g[mi>>2];g[hc>>2]=+g[R>>2]-+g[O>>2];g[ic>>2]=+g[gc>>2]-+g[hc>>2];g[dd>>2]=+g[gc>>2]+ +g[hc>>2];g[wi>>2]=+g[c[l>>2]>>2];g[xi>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[yi>>2]=+g[wi>>2]+ +g[xi>>2];g[xe>>2]=+g[wi>>2]-+g[xi>>2];g[zi>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Ai>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Bi>>2]=+g[zi>>2]+ +g[Ai>>2];g[Ie>>2]=+g[zi>>2]-+g[Ai>>2];g[Z>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[_>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[Aa>>2]=+g[Z>>2]-+g[_>>2];g[ye>>2]=+g[Z>>2]+ +g[_>>2];g[W>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[X>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[Y>>2]=+g[W>>2]-+g[X>>2];g[jf>>2]=+g[W>>2]+ +g[X>>2];g[Gi>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Hi>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[De>>2]=+g[Gi>>2]-+g[Hi>>2];g[Fa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[Ga>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[Ee>>2]=+g[Fa>>2]+ +g[Ga>>2];g[Ii>>2]=+g[Gi>>2]+ +g[Hi>>2];g[mf>>2]=+g[De>>2]+ +g[Ee>>2];g[Ha>>2]=+g[Fa>>2]-+g[Ga>>2];g[Fe>>2]=+g[De>>2]-+g[Ee>>2];g[Di>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Ei>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Ae>>2]=+g[Di>>2]-+g[Ei>>2];g[Ca>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[Da>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Be>>2]=+g[Ca>>2]+ +g[Da>>2];g[Fi>>2]=+g[Di>>2]+ +g[Ei>>2];g[lf>>2]=+g[Ae>>2]+ +g[Be>>2];g[Ea>>2]=+g[Ca>>2]-+g[Da>>2];g[Ce>>2]=+g[Ae>>2]-+g[Be>>2];g[Ci>>2]=+g[yi>>2]+ +g[Bi>>2];g[Ji>>2]=+g[Fi>>2]+ +g[Ii>>2];g[Ki>>2]=+g[Ci>>2]+ +g[Ji>>2];g[V>>2]=+g[Ci>>2]-+g[Ji>>2];g[mh>>2]=(+g[Ce>>2]-+g[Fe>>2])*.7071067690849304;g[nh>>2]=+g[Ie>>2]+ +g[jf>>2];g[oh>>2]=+g[mh>>2]-+g[nh>>2];g[Kh>>2]=+g[nh>>2]+ +g[mh>>2];g[ph>>2]=+g[xe>>2]+ +g[ye>>2];g[qh>>2]=(+g[lf>>2]+ +g[mf>>2])*.7071067690849304;g[rh>>2]=+g[ph>>2]-+g[qh>>2];g[Jh>>2]=+g[ph>>2]+ +g[qh>>2];g[Ba>>2]=+g[Y>>2]+ +g[Aa>>2];g[Ia>>2]=+g[Ea>>2]+ +g[Ha>>2];g[Ja>>2]=+g[Ba>>2]-+g[Ia>>2];g[Pb>>2]=+g[Ba>>2]+ +g[Ia>>2];g[Yb>>2]=+g[Y>>2]-+g[Aa>>2];g[Zb>>2]=+g[Fi>>2]-+g[Ii>>2];g[_b>>2]=+g[Yb>>2]-+g[Zb>>2];g[Zd>>2]=+g[Zb>>2]+ +g[Yb>>2];g[ze>>2]=+g[xe>>2]-+g[ye>>2];g[Ge>>2]=(+g[Ce>>2]+ +g[Fe>>2])*.7071067690849304;g[He>>2]=+g[ze>>2]-+g[Ge>>2];g[Ff>>2]=+g[ze>>2]+ +g[Ge>>2];g[kf>>2]=+g[Ie>>2]-+g[jf>>2];g[nf>>2]=(+g[lf>>2]-+g[mf>>2])*.7071067690849304;g[of>>2]=+g[kf>>2]-+g[nf>>2];g[Gf>>2]=+g[kf>>2]+ +g[nf>>2];g[$b>>2]=+g[yi>>2]-+g[Bi>>2];g[ac>>2]=+g[Ha>>2]-+g[Ea>>2];g[bc>>2]=+g[$b>>2]-+g[ac>>2];g[ad>>2]=+g[$b>>2]+ +g[ac>>2];g[ej>>2]=+g[Qi>>2]+ +g[dj>>2];g[r>>2]=+g[vi>>2]+ +g[Ki>>2];g[Jb>>2]=+g[ej>>2]-+g[r>>2];g[Nb>>2]=+g[Lb>>2]+ +g[Mb>>2];g[Qb>>2]=+g[Ob>>2]+ +g[Pb>>2];g[Rb>>2]=+g[Nb>>2]-+g[Qb>>2];g[c[k>>2]>>2]=+g[ej>>2]+ +g[r>>2];g[c[l>>2]>>2]=+g[Nb>>2]+ +g[Qb>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[hb>>2]*+g[Jb>>2]-+g[Kb>>2]*+g[Rb>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Kb>>2]*+g[Jb>>2]+ +g[hb>>2]*+g[Rb>>2];g[tc>>2]=+g[Qi>>2]-+g[dj>>2];g[uc>>2]=+g[Pb>>2]-+g[Ob>>2];g[vc>>2]=+g[tc>>2]-+g[uc>>2];g[Fc>>2]=+g[tc>>2]+ +g[uc>>2];g[zc>>2]=+g[Lb>>2]-+g[Mb>>2];g[Ac>>2]=+g[vi>>2]-+g[Ki>>2];g[Bc>>2]=+g[zc>>2]-+g[Ac>>2];g[Jc>>2]=+g[Ac>>2]+ +g[zc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[sc>>2]*+g[vc>>2]-+g[yc>>2]*+g[Bc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[sc>>2]*+g[Bc>>2]+ +g[yc>>2]*+g[vc>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Ec>>2]*+g[Fc>>2]-+g[Ic>>2]*+g[Jc>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Ec>>2]*+g[Jc>>2]+ +g[Ic>>2]*+g[Fc>>2];g[D>>2]=+g[la>>2]+ +g[C>>2];g[Cb>>2]=+g[mb>>2]+ +g[Bb>>2];g[Wa>>2]=+g[Bb>>2]-+g[mb>>2];g[Qa>>2]=+g[la>>2]-+g[C>>2];g[Db>>2]=+g[E>>2]+ +g[T>>2];g[Eb>>2]=+g[Ja>>2]-+g[V>>2];g[Fb>>2]=(+g[Db>>2]+ +g[Eb>>2])*.7071067690849304;g[Ra>>2]=(+g[Eb>>2]-+g[Db>>2])*.7071067690849304;g[U>>2]=+g[E>>2]-+g[T>>2];g[ib>>2]=+g[V>>2]+ +g[Ja>>2];g[jb>>2]=(+g[U>>2]+ +g[ib>>2])*.7071067690849304;g[Xa>>2]=(+g[U>>2]-+g[ib>>2])*.7071067690849304;g[kb>>2]=+g[D>>2]-+g[jb>>2];g[Gb>>2]=+g[Cb>>2]-+g[Fb>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[ka>>2]*+g[kb>>2]-+g[lb>>2]*+g[Gb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[lb>>2]*+g[kb>>2]+ +g[ka>>2]*+g[Gb>>2];g[ab>>2]=+g[Qa>>2]+ +g[Ra>>2];g[eb>>2]=+g[Wa>>2]+ +g[Xa>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[$a>>2]*+g[ab>>2]-+g[db>>2]*+g[eb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[$a>>2]*+g[eb>>2]+ +g[db>>2]*+g[ab>>2];g[Ka>>2]=+g[D>>2]+ +g[jb>>2];g[Ma>>2]=+g[Cb>>2]+ +g[Fb>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Hb>>2]*+g[Ka>>2]-+g[La>>2]*+g[Ma>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[La>>2]*+g[Ka>>2]+ +g[Hb>>2]*+g[Ma>>2];g[Sa>>2]=+g[Qa>>2]-+g[Ra>>2];g[Ya>>2]=+g[Wa>>2]-+g[Xa>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[Pa>>2]*+g[Sa>>2]-+g[Va>>2]*+g[Ya>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[Pa>>2]*+g[Ya>>2]+ +g[Va>>2]*+g[Sa>>2];g[Xd>>2]=(+g[qc>>2]+ +g[Sc>>2])*.7071067690849304;g[Yd>>2]=+g[Wd>>2]-+g[Xd>>2];g[ud>>2]=+g[Wd>>2]+ +g[Xd>>2];g[jd>>2]=(+g[Vb>>2]+ +g[Qc>>2])*.7071067690849304;g[kd>>2]=+g[id>>2]-+g[jd>>2];g[yd>>2]=+g[id>>2]+ +g[jd>>2];g[bd>>2]=+g[Zd>>2]*.9238795042037964-+g[ad>>2]*.3826834261417389;g[ed>>2]=+g[cd>>2]*.9238795042037964+ +g[dd>>2]*.3826834261417389;g[fd>>2]=+g[bd>>2]-+g[ed>>2];g[zd>>2]=+g[ed>>2]+ +g[bd>>2];g[ld>>2]=+g[dd>>2]*.9238795042037964-+g[cd>>2]*.3826834261417389;g[md>>2]=+g[Zd>>2]*.3826834261417389+ +g[ad>>2]*.9238795042037964;g[nd>>2]=+g[ld>>2]-+g[md>>2];g[vd>>2]=+g[ld>>2]+ +g[md>>2];g[gd>>2]=+g[Yd>>2]-+g[fd>>2];g[od>>2]=+g[kd>>2]-+g[nd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[Vd>>2]*+g[gd>>2]-+g[hd>>2]*+g[od>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[hd>>2]*+g[gd>>2]+ +g[Vd>>2]*+g[od>>2];g[ae>>2]=+g[ud>>2]+ +g[vd>>2];g[be>>2]=+g[yd>>2]+ +g[zd>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[y>>2]*+g[ae>>2]-+g[ba>>2]*+g[be>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[y>>2]*+g[be>>2]+ +g[ba>>2]*+g[ae>>2];g[qd>>2]=+g[Yd>>2]+ +g[fd>>2];g[sd>>2]=+g[kd>>2]+ +g[nd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[pd>>2]*+g[qd>>2]-+g[rd>>2]*+g[sd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[rd>>2]*+g[qd>>2]+ +g[pd>>2]*+g[sd>>2];g[wd>>2]=+g[ud>>2]-+g[vd>>2];g[$d>>2]=+g[yd>>2]-+g[zd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[td>>2]*+g[wd>>2]-+g[xd>>2]*+g[$d>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[td>>2]*+g[$d>>2]+ +g[xd>>2]*+g[wd>>2];g[Wb>>2]=(+g[Qc>>2]-+g[Vb>>2])*.7071067690849304;g[Xb>>2]=+g[Nc>>2]-+g[Wb>>2];g[Kd>>2]=+g[Nc>>2]+ +g[Wb>>2];g[Tc>>2]=(+g[qc>>2]-+g[Sc>>2])*.7071067690849304;g[Uc>>2]=+g[pc>>2]-+g[Tc>>2];g[Od>>2]=+g[pc>>2]+ +g[Tc>>2];g[cc>>2]=+g[_b>>2]*.3826834261417389-+g[bc>>2]*.9238795042037964;g[jc>>2]=+g[fc>>2]*.3826834261417389+ +g[ic>>2]*.9238795042037964;g[kc>>2]=+g[cc>>2]-+g[jc>>2];g[Pd>>2]=+g[jc>>2]+ +g[cc>>2];g[Vc>>2]=+g[ic>>2]*.3826834261417389-+g[fc>>2]*.9238795042037964;g[Wc>>2]=+g[_b>>2]*.9238795042037964+ +g[bc>>2]*.3826834261417389;g[Xc>>2]=+g[Vc>>2]-+g[Wc>>2];g[Ld>>2]=+g[Vc>>2]+ +g[Wc>>2];g[lc>>2]=+g[Xb>>2]-+g[kc>>2];g[Yc>>2]=+g[Uc>>2]-+g[Xc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[Kc>>2]*+g[lc>>2]-+g[mc>>2]*+g[Yc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[mc>>2]*+g[lc>>2]+ +g[Kc>>2]*+g[Yc>>2];g[Sd>>2]=+g[Kd>>2]+ +g[Ld>>2];g[Ud>>2]=+g[Od>>2]+ +g[Pd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Rd>>2]*+g[Sd>>2]-+g[Td>>2]*+g[Ud>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[Rd>>2]*+g[Ud>>2]+ +g[Td>>2]*+g[Sd>>2];g[Ed>>2]=+g[Xb>>2]+ +g[kc>>2];g[Gd>>2]=+g[Uc>>2]+ +g[Xc>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Dd>>2]*+g[Ed>>2]-+g[Fd>>2]*+g[Gd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Fd>>2]*+g[Ed>>2]+ +g[Dd>>2]*+g[Gd>>2];g[Md>>2]=+g[Kd>>2]-+g[Ld>>2];g[Qd>>2]=+g[Od>>2]-+g[Pd>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[Jd>>2]*+g[Md>>2]-+g[Nd>>2]*+g[Qd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[Jd>>2]*+g[Qd>>2]+ +g[Nd>>2]*+g[Md>>2];g[zf>>2]=+g[fe>>2]+ +g[Me>>2];g[Af>>2]=+g[_f>>2]+ +g[$f>>2];g[Bf>>2]=+g[zf>>2]+ +g[Af>>2];g[ug>>2]=+g[zf>>2]-+g[Af>>2];g[Of>>2]=+g[Cf>>2]*.19509032368659973+ +g[Df>>2]*.9807852506637573;g[Pf>>2]=+g[Gf>>2]*.9807852506637573-+g[Ff>>2]*.19509032368659973;g[Qf>>2]=+g[Of>>2]+ +g[Pf>>2];g[vg>>2]=+g[Pf>>2]-+g[Of>>2];g[Ef>>2]=+g[Cf>>2]*.9807852506637573-+g[Df>>2]*.19509032368659973;g[Hf>>2]=+g[Ff>>2]*.9807852506637573+ +g[Gf>>2]*.19509032368659973;g[If>>2]=+g[Ef>>2]+ +g[Hf>>2];g[zg>>2]=+g[Ef>>2]-+g[Hf>>2];g[Lf>>2]=+g[Vf>>2]+ +g[Yf>>2];g[Mf>>2]=+g[$e>>2]+ +g[Ue>>2];g[Nf>>2]=+g[Lf>>2]+ +g[Mf>>2];g[yg>>2]=+g[Lf>>2]-+g[Mf>>2];g[Jf>>2]=+g[Bf>>2]-+g[If>>2];g[Rf>>2]=+g[Nf>>2]-+g[Qf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[yf>>2]*+g[Jf>>2]-+g[Kf>>2]*+g[Rf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[Kf>>2]*+g[Jf>>2]+ +g[yf>>2]*+g[Rf>>2];g[Bg>>2]=+g[ug>>2]+ +g[vg>>2];g[ah>>2]=+g[yg>>2]+ +g[zg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[z>>2]*+g[Bg>>2]-+g[ca>>2]*+g[ah>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[z>>2]*+g[ah>>2]+ +g[ca>>2]*+g[Bg>>2];g[Sf>>2]=+g[Bf>>2]+ +g[If>>2];g[sg>>2]=+g[Nf>>2]+ +g[Qf>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[s>>2]*+g[Sf>>2]-+g[v>>2]*+g[sg>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[v>>2]*+g[Sf>>2]+ +g[s>>2]*+g[sg>>2];g[wg>>2]=+g[ug>>2]-+g[vg>>2];g[Ag>>2]=+g[yg>>2]-+g[zg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[tg>>2]*+g[wg>>2]-+g[xg>>2]*+g[Ag>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[tg>>2]*+g[Ag>>2]+ +g[xg>>2]*+g[wg>>2];g[Dh>>2]=+g[bh>>2]+ +g[ch>>2];g[Eh>>2]=+g[Hg>>2]+ +g[Ig>>2];g[Fh>>2]=+g[Dh>>2]-+g[Eh>>2];g[ai>>2]=+g[Dh>>2]+ +g[Eh>>2];g[Vh>>2]=+g[Hh>>2]*.19509032368659973+ +g[Gh>>2]*.9807852506637573;g[Wh>>2]=+g[Kh>>2]*.19509032368659973+ +g[Jh>>2]*.9807852506637573;g[Xh>>2]=+g[Vh>>2]-+g[Wh>>2];g[bi>>2]=+g[Vh>>2]+ +g[Wh>>2];g[Ih>>2]=+g[Gh>>2]*.19509032368659973-+g[Hh>>2]*.9807852506637573;g[Mh>>2]=+g[Jh>>2]*.19509032368659973-+g[Kh>>2]*.9807852506637573;g[Nh>>2]=+g[Ih>>2]+ +g[Mh>>2];g[fi>>2]=+g[Ih>>2]-+g[Mh>>2];g[Sh>>2]=+g[Eg>>2]-+g[Fg>>2];g[Th>>2]=+g[gh>>2]-+g[jh>>2];g[Uh>>2]=+g[Sh>>2]+ +g[Th>>2];g[ei>>2]=+g[Sh>>2]-+g[Th>>2];g[Oh>>2]=+g[Fh>>2]-+g[Nh>>2];g[Yh>>2]=+g[Uh>>2]-+g[Xh>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[Ch>>2]*+g[Oh>>2]-+g[Rh>>2]*+g[Yh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[Rh>>2]*+g[Oh>>2]+ +g[Ch>>2]*+g[Yh>>2];g[ii>>2]=+g[ai>>2]+ +g[bi>>2];g[Lh>>2]=+g[ei>>2]-+g[fi>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[hi>>2]*+g[ii>>2]-+g[ji>>2]*+g[Lh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[hi>>2]*+g[Lh>>2]+ +g[ji>>2]*+g[ii>>2];g[Zh>>2]=+g[Fh>>2]+ +g[Nh>>2];g[_h>>2]=+g[Uh>>2]+ +g[Xh>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[ea>>2]*+g[Zh>>2]-+g[ia>>2]*+g[_h>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[ia>>2]*+g[Zh>>2]+ +g[ea>>2]*+g[_h>>2];g[ci>>2]=+g[ai>>2]-+g[bi>>2];g[gi>>2]=+g[ei>>2]+ +g[fi>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[$h>>2]*+g[ci>>2]-+g[di>>2]*+g[gi>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[$h>>2]*+g[gi>>2]+ +g[di>>2]*+g[ci>>2];g[Ne>>2]=+g[fe>>2]-+g[Me>>2];g[af>>2]=+g[Ue>>2]-+g[$e>>2];g[bf>>2]=+g[Ne>>2]+ +g[af>>2];g[lg>>2]=+g[Ne>>2]-+g[af>>2];g[cg>>2]=+g[oe>>2]*.8314695954322815+ +g[ve>>2]*.5555702447891235;g[dg>>2]=+g[of>>2]*.5555702447891235-+g[He>>2]*.8314695954322815;g[eg>>2]=+g[cg>>2]+ +g[dg>>2];g[mg>>2]=+g[dg>>2]-+g[cg>>2];g[we>>2]=+g[oe>>2]*.5555702447891235-+g[ve>>2]*.8314695954322815;g[pf>>2]=+g[He>>2]*.5555702447891235+ +g[of>>2]*.8314695954322815;g[qf>>2]=+g[we>>2]+ +g[pf>>2];g[uf>>2]=+g[we>>2]-+g[pf>>2];g[Zf>>2]=+g[Vf>>2]-+g[Yf>>2];g[ag>>2]=+g[_f>>2]-+g[$f>>2];g[bg>>2]=+g[Zf>>2]+ +g[ag>>2];g[tf>>2]=+g[Zf>>2]-+g[ag>>2];g[rf>>2]=+g[bf>>2]-+g[qf>>2];g[fg>>2]=+g[bg>>2]-+g[eg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[ce>>2]*+g[rf>>2]-+g[sf>>2]*+g[fg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[sf>>2]*+g[rf>>2]+ +g[ce>>2]*+g[fg>>2];g[wf>>2]=+g[lg>>2]+ +g[mg>>2];g[xf>>2]=+g[tf>>2]+ +g[uf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[$c>>2]*+g[wf>>2]-+g[Cd>>2]*+g[xf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[$c>>2]*+g[xf>>2]+ +g[Cd>>2]*+g[wf>>2];g[gg>>2]=+g[bf>>2]+ +g[qf>>2];g[hg>>2]=+g[bg>>2]+ +g[eg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Hd>>2]*+g[gg>>2]-+g[Id>>2]*+g[hg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Id>>2]*+g[gg>>2]+ +g[Hd>>2]*+g[hg>>2];g[ng>>2]=+g[lg>>2]-+g[mg>>2];g[vf>>2]=+g[tf>>2]-+g[uf>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[kg>>2]*+g[ng>>2]-+g[qg>>2]*+g[vf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[kg>>2]*+g[vf>>2]+ +g[qg>>2]*+g[ng>>2];g[dh>>2]=+g[bh>>2]-+g[ch>>2];g[kh>>2]=+g[gh>>2]+ +g[jh>>2];g[lh>>2]=+g[dh>>2]-+g[kh>>2];g[Sg>>2]=+g[dh>>2]+ +g[kh>>2];g[Lg>>2]=+g[yh>>2]*.8314695954322815-+g[vh>>2]*.5555702447891235;g[Mg>>2]=+g[oh>>2]*.5555702447891235+ +g[rh>>2]*.8314695954322815;g[Ng>>2]=+g[Lg>>2]-+g[Mg>>2];g[Tg>>2]=+g[Lg>>2]+ +g[Mg>>2];g[sh>>2]=+g[oh>>2]*.8314695954322815-+g[rh>>2]*.5555702447891235;g[zh>>2]=+g[vh>>2]*.8314695954322815+ +g[yh>>2]*.5555702447891235;g[Cg>>2]=+g[sh>>2]-+g[zh>>2];g[Xg>>2]=+g[zh>>2]+ +g[sh>>2];g[Gg>>2]=+g[Eg>>2]+ +g[Fg>>2];g[Jg>>2]=+g[Hg>>2]-+g[Ig>>2];g[Kg>>2]=+g[Gg>>2]-+g[Jg>>2];g[Wg>>2]=+g[Gg>>2]+ +g[Jg>>2];g[Dg>>2]=+g[lh>>2]-+g[Cg>>2];g[Og>>2]=+g[Kg>>2]-+g[Ng>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[fa>>2]*+g[Dg>>2]-+g[ja>>2]*+g[Og>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[ja>>2]*+g[Dg>>2]+ +g[fa>>2]*+g[Og>>2];g[Zg>>2]=+g[Sg>>2]+ +g[Tg>>2];g[_g>>2]=+g[Wg>>2]+ +g[Xg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[t>>2]*+g[Zg>>2]-+g[w>>2]*+g[_g>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[t>>2]*+g[_g>>2]+ +g[w>>2]*+g[Zg>>2];g[Pg>>2]=+g[lh>>2]+ +g[Cg>>2];g[Qg>>2]=+g[Kg>>2]+ +g[Ng>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[fb>>2]*+g[Pg>>2]-+g[gb>>2]*+g[Qg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[gb>>2]*+g[Pg>>2]+ +g[fb>>2]*+g[Qg>>2];g[Ug>>2]=+g[Sg>>2]-+g[Tg>>2];g[Yg>>2]=+g[Wg>>2]-+g[Xg>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Rg>>2]*+g[Ug>>2]-+g[Vg>>2]*+g[Yg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Rg>>2]*+g[Yg>>2]+ +g[Vg>>2]*+g[Ug>>2];c[jj>>2]=(c[jj>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+32;c[n>>2]=c[n>>2]^c[2998]}i=kj;return}function Ft(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,53,7576);i=b;return}function Gt(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0;T=i;i=i+144|0;k=T+140|0;l=T+136|0;m=T+132|0;n=T+128|0;U=T+124|0;o=T+120|0;p=T+116|0;S=T+112|0;w=T+108|0;y=T+104|0;x=T+100|0;z=T+96|0;A=T+92|0;C=T+88|0;s=T+84|0;K=T+80|0;v=T+76|0;N=T+72|0;F=T+68|0;O=T+64|0;I=T+60|0;L=T+56|0;B=T+52|0;J=T+48|0;q=T+44|0;r=T+40|0;t=T+36|0;u=T+32|0;D=T+28|0;E=T+24|0;G=T+20|0;H=T+16|0;M=T+12|0;P=T+8|0;Q=T+4|0;R=T;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[U>>2]=f;c[o>>2]=h;c[p>>2]=j;c[S>>2]=c[U>>2];c[m>>2]=(c[m>>2]|0)+((c[U>>2]|0)-1<<2<<2);while(1){if((c[S>>2]|0)>=(c[o>>2]|0))break;g[w>>2]=+g[c[m>>2]>>2];g[y>>2]=+g[(c[m>>2]|0)+4>>2];g[x>>2]=+g[(c[m>>2]|0)+8>>2];g[z>>2]=+g[(c[m>>2]|0)+12>>2];g[A>>2]=+g[w>>2]*+g[x>>2]+ +g[y>>2]*+g[z>>2];g[C>>2]=+g[w>>2]*+g[z>>2]-+g[y>>2]*+g[x>>2];g[q>>2]=+g[c[k>>2]>>2];g[r>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[s>>2]=+g[q>>2]+ +g[r>>2];g[K>>2]=+g[q>>2]-+g[r>>2];g[t>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[u>>2]=+g[c[l>>2]>>2];g[v>>2]=+g[t>>2]+ +g[u>>2];g[N>>2]=+g[t>>2]-+g[u>>2];g[D>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[E>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[F>>2]=+g[D>>2]-+g[E>>2];g[O>>2]=+g[D>>2]+ +g[E>>2];g[G>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[H>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[I>>2]=+g[G>>2]-+g[H>>2];g[L>>2]=+g[G>>2]+ +g[H>>2];g[c[k>>2]>>2]=+g[s>>2]+ +g[v>>2];g[c[l>>2]>>2]=+g[F>>2]+ +g[I>>2];g[B>>2]=+g[s>>2]-+g[v>>2];g[J>>2]=+g[F>>2]-+g[I>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[A>>2]*+g[B>>2]-+g[C>>2]*+g[J>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[C>>2]*+g[B>>2]+ +g[A>>2]*+g[J>>2];g[M>>2]=+g[K>>2]-+g[L>>2];g[P>>2]=+g[N>>2]+ +g[O>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[w>>2]*+g[M>>2]-+g[y>>2]*+g[P>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[w>>2]*+g[P>>2]+ +g[y>>2]*+g[M>>2];g[Q>>2]=+g[K>>2]+ +g[L>>2];g[R>>2]=+g[O>>2]-+g[N>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[x>>2]*+g[Q>>2]-+g[z>>2]*+g[R>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[x>>2]*+g[R>>2]+ +g[z>>2]*+g[Q>>2];c[S>>2]=(c[S>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+16}i=T;return}function Ht(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,54,7624);i=b;return}function It(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0;pa=i;i=i+256|0;k=pa+252|0;l=pa+248|0;m=pa+244|0;n=pa+240|0;qa=pa+236|0;o=pa+232|0;p=pa+228|0;oa=pa+208|0;X=pa+204|0;_=pa+200|0;Y=pa+196|0;$=pa+192|0;ba=pa+188|0;O=pa+184|0;la=pa+180|0;M=pa+176|0;Z=pa+172|0;ka=pa+168|0;aa=pa+164|0;ja=pa+160|0;q=pa+156|0;da=pa+152|0;J=pa+148|0;z=pa+144|0;x=pa+140|0;ca=pa+136|0;y=pa+132|0;ha=pa+128|0;H=pa+124|0;B=pa+120|0;W=pa+116|0;A=pa+112|0;t=pa+108|0;ma=pa+104|0;w=pa+100|0;na=pa+96|0;r=pa+92|0;s=pa+88|0;u=pa+84|0;v=pa+80|0;S=pa+76|0;fa=pa+72|0;V=pa+68|0;ga=pa+64|0;Q=pa+60|0;R=pa+56|0;T=pa+52|0;U=pa+48|0;ia=pa+44|0;E=pa+40|0;D=pa+36|0;F=pa+32|0;ea=pa+28|0;C=pa+24|0;I=pa+20|0;N=pa+16|0;L=pa+12|0;P=pa+8|0;G=pa+4|0;K=pa;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[qa>>2]=f;c[o>>2]=h;c[p>>2]=j;g[pa+224>>2]=.25;g[pa+220>>2]=.5877852439880371;g[pa+216>>2]=.9510565400123596;g[pa+212>>2]=.55901700258255;c[oa>>2]=c[qa>>2];c[m>>2]=(c[m>>2]|0)+((c[qa>>2]|0)-1<<2<<2);while(1){if((c[oa>>2]|0)>=(c[o>>2]|0))break;g[X>>2]=+g[c[m>>2]>>2];g[_>>2]=+g[(c[m>>2]|0)+4>>2];g[Y>>2]=+g[(c[m>>2]|0)+8>>2];g[$>>2]=+g[(c[m>>2]|0)+12>>2];g[Z>>2]=+g[X>>2]*+g[Y>>2];g[ka>>2]=+g[_>>2]*+g[Y>>2];g[aa>>2]=+g[_>>2]*+g[$>>2];g[ja>>2]=+g[X>>2]*+g[$>>2];g[ba>>2]=+g[Z>>2]+ +g[aa>>2];g[O>>2]=+g[ja>>2]+ +g[ka>>2];g[la>>2]=+g[ja>>2]-+g[ka>>2];g[M>>2]=+g[Z>>2]-+g[aa>>2];g[q>>2]=+g[c[k>>2]>>2];g[r>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[s>>2]=+g[c[l>>2]>>2];g[t>>2]=+g[r>>2]+ +g[s>>2];g[ma>>2]=+g[r>>2]-+g[s>>2];g[u>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[v>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[w>>2]=+g[u>>2]+ +g[v>>2];g[na>>2]=+g[u>>2]-+g[v>>2];g[da>>2]=(+g[t>>2]-+g[w>>2])*.55901700258255;g[J>>2]=+g[ma>>2]*.9510565400123596+ +g[na>>2]*.5877852439880371;g[z>>2]=+g[ma>>2]*.5877852439880371-+g[na>>2]*.9510565400123596;g[x>>2]=+g[t>>2]+ +g[w>>2];g[ca>>2]=+g[q>>2]-+g[x>>2]*.25;g[y>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Q>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[R>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[S>>2]=+g[Q>>2]-+g[R>>2];g[fa>>2]=+g[Q>>2]+ +g[R>>2];g[T>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[U>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[V>>2]=+g[T>>2]-+g[U>>2];g[ga>>2]=+g[T>>2]+ +g[U>>2];g[ha>>2]=+g[fa>>2]*.5877852439880371-+g[ga>>2]*.9510565400123596;g[H>>2]=+g[fa>>2]*.9510565400123596+ +g[ga>>2]*.5877852439880371;g[B>>2]=(+g[S>>2]-+g[V>>2])*.55901700258255;g[W>>2]=+g[S>>2]+ +g[V>>2];g[A>>2]=+g[y>>2]-+g[W>>2]*.25;g[c[k>>2]>>2]=+g[q>>2]+ +g[x>>2];g[c[l>>2]>>2]=+g[y>>2]+ +g[W>>2];g[ea>>2]=+g[ca>>2]-+g[da>>2];g[ia>>2]=+g[ea>>2]-+g[ha>>2];g[E>>2]=+g[ea>>2]+ +g[ha>>2];g[C>>2]=+g[A>>2]-+g[B>>2];g[D>>2]=+g[z>>2]+ +g[C>>2];g[F>>2]=+g[C>>2]-+g[z>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ba>>2]*+g[ia>>2]-+g[la>>2]*+g[D>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ba>>2]*+g[D>>2]+ +g[la>>2]*+g[ia>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Y>>2]*+g[E>>2]-+g[$>>2]*+g[F>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Y>>2]*+g[F>>2]+ +g[$>>2]*+g[E>>2];g[G>>2]=+g[da>>2]+ +g[ca>>2];g[I>>2]=+g[G>>2]-+g[H>>2];g[N>>2]=+g[G>>2]+ +g[H>>2];g[K>>2]=+g[B>>2]+ +g[A>>2];g[L>>2]=+g[J>>2]+ +g[K>>2];g[P>>2]=+g[K>>2]-+g[J>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[X>>2]*+g[I>>2]-+g[_>>2]*+g[L>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[X>>2]*+g[L>>2]+ +g[_>>2]*+g[I>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[M>>2]*+g[N>>2]-+g[O>>2]*+g[P>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[M>>2]*+g[P>>2]+ +g[O>>2]*+g[N>>2];c[oa>>2]=(c[oa>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+16}i=pa;return}function Jt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,55,7672);i=b;return}function Kt(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0;Va=i;i=i+384|0;k=Va+368|0;l=Va+364|0;m=Va+360|0;n=Va+356|0;Wa=Va+352|0;o=Va+348|0;p=Va+344|0;Ua=Va+336|0;za=Va+332|0;Ca=Va+328|0;Aa=Va+324|0;Da=Va+320|0;Fa=Va+316|0;Ja=Va+312|0;ja=Va+308|0;la=Va+304|0;$=Va+300|0;aa=Va+296|0;ba=Va+292|0;w=Va+288|0;fa=Va+284|0;na=Va+280|0;Ba=Va+276|0;Ia=Va+272|0;Ea=Va+268|0;Ha=Va+264|0;T=Va+260|0;G=Va+256|0;J=Va+252|0;Qa=Va+248|0;ca=Va+244|0;qa=Va+240|0;z=Va+236|0;ga=Va+232|0;ya=Va+228|0;A=Va+224|0;B=Va+220|0;Z=Va+216|0;da=Va+212|0;ta=Va+208|0;t=Va+204|0;ha=Va+200|0;Ga=Va+196|0;_=Va+192|0;P=Va+188|0;oa=Va+184|0;Pa=Va+180|0;pa=Va+176|0;S=Va+172|0;x=Va+168|0;Ma=Va+164|0;y=Va+160|0;q=Va+156|0;O=Va+152|0;Na=Va+148|0;Oa=Va+144|0;Q=Va+140|0;R=Va+136|0;Ka=Va+132|0;La=Va+128|0;ua=Va+124|0;ra=Va+120|0;Y=Va+116|0;s=Va+112|0;xa=Va+108|0;r=Va+104|0;Ta=Va+100|0;sa=Va+96|0;U=Va+92|0;V=Va+88|0;W=Va+84|0;X=Va+80|0;va=Va+76|0;wa=Va+72|0;Ra=Va+68|0;Sa=Va+64|0;ka=Va+60|0;ma=Va+56|0;ea=Va+52|0;ia=Va+48|0;I=Va+44|0;M=Va+40|0;L=Va+36|0;N=Va+32|0;H=Va+28|0;K=Va+24|0;v=Va+20|0;E=Va+16|0;D=Va+12|0;F=Va+8|0;u=Va+4|0;C=Va;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Wa>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Va+340>>2]=.7071067690849304;c[Ua>>2]=c[Wa>>2];c[m>>2]=(c[m>>2]|0)+(((c[Wa>>2]|0)-1|0)*6<<2);while(1){if((c[Ua>>2]|0)>=(c[o>>2]|0))break;g[za>>2]=+g[c[m>>2]>>2];g[Ca>>2]=+g[(c[m>>2]|0)+4>>2];g[Aa>>2]=+g[(c[m>>2]|0)+8>>2];g[Da>>2]=+g[(c[m>>2]|0)+12>>2];g[Ba>>2]=+g[za>>2]*+g[Aa>>2];g[Ia>>2]=+g[Ca>>2]*+g[Aa>>2];g[Ea>>2]=+g[Ca>>2]*+g[Da>>2];g[Ha>>2]=+g[za>>2]*+g[Da>>2];g[Fa>>2]=+g[Ba>>2]-+g[Ea>>2];g[Ja>>2]=+g[Ha>>2]+ +g[Ia>>2];g[ja>>2]=+g[Ba>>2]+ +g[Ea>>2];g[la>>2]=+g[Ha>>2]-+g[Ia>>2];g[$>>2]=+g[(c[m>>2]|0)+16>>2];g[aa>>2]=+g[(c[m>>2]|0)+20>>2];g[ba>>2]=+g[za>>2]*+g[$>>2]+ +g[Ca>>2]*+g[aa>>2];g[w>>2]=+g[ja>>2]*+g[aa>>2]-+g[la>>2]*+g[$>>2];g[fa>>2]=+g[za>>2]*+g[aa>>2]-+g[Ca>>2]*+g[$>>2];g[na>>2]=+g[ja>>2]*+g[$>>2]+ +g[la>>2]*+g[aa>>2];g[q>>2]=+g[c[k>>2]>>2];g[O>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[P>>2]=+g[q>>2]+ +g[O>>2];g[oa>>2]=+g[q>>2]-+g[O>>2];g[Na>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Oa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Pa>>2]=+g[Na>>2]-+g[Oa>>2];g[pa>>2]=+g[Na>>2]+ +g[Oa>>2];g[Q>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[R>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[S>>2]=+g[Q>>2]+ +g[R>>2];g[x>>2]=+g[Q>>2]-+g[R>>2];g[Ka>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[La>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Ma>>2]=+g[Ka>>2]-+g[La>>2];g[y>>2]=+g[Ka>>2]+ +g[La>>2];g[T>>2]=+g[P>>2]+ +g[S>>2];g[G>>2]=+g[oa>>2]+ +g[pa>>2];g[J>>2]=+g[y>>2]-+g[x>>2];g[Qa>>2]=+g[Ma>>2]+ +g[Pa>>2];g[ca>>2]=+g[P>>2]-+g[S>>2];g[qa>>2]=+g[oa>>2]-+g[pa>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[ga>>2]=+g[Ma>>2]-+g[Pa>>2];g[U>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[V>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[ua>>2]=+g[U>>2]+ +g[V>>2];g[ra>>2]=+g[U>>2]-+g[V>>2];g[W>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[X>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Y>>2]=+g[W>>2]-+g[X>>2];g[s>>2]=+g[W>>2]+ +g[X>>2];g[va>>2]=+g[c[l>>2]>>2];g[wa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[xa>>2]=+g[va>>2]+ +g[wa>>2];g[r>>2]=+g[va>>2]-+g[wa>>2];g[Ra>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Sa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Ta>>2]=+g[Ra>>2]-+g[Sa>>2];g[sa>>2]=+g[Ra>>2]+ +g[Sa>>2];g[ya>>2]=+g[ua>>2]+ +g[xa>>2];g[A>>2]=+g[ra>>2]+ +g[sa>>2];g[B>>2]=+g[r>>2]+ +g[s>>2];g[Z>>2]=+g[Ta>>2]+ +g[Y>>2];g[da>>2]=+g[Y>>2]-+g[Ta>>2];g[ta>>2]=+g[ra>>2]-+g[sa>>2];g[t>>2]=+g[r>>2]-+g[s>>2];g[ha>>2]=+g[ua>>2]-+g[xa>>2];g[c[k>>2]>>2]=+g[T>>2]+ +g[ya>>2];g[c[l>>2]>>2]=+g[Qa>>2]+ +g[Z>>2];g[Ga>>2]=+g[T>>2]-+g[ya>>2];g[_>>2]=+g[Qa>>2]-+g[Z>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Fa>>2]*+g[Ga>>2]-+g[Ja>>2]*+g[_>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Ja>>2]*+g[Ga>>2]+ +g[Fa>>2]*+g[_>>2];g[ka>>2]=+g[ca>>2]+ +g[da>>2];g[ma>>2]=+g[ha>>2]+ +g[ga>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ja>>2]*+g[ka>>2]-+g[la>>2]*+g[ma>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ja>>2]*+g[ma>>2]+ +g[la>>2]*+g[ka>>2];g[ea>>2]=+g[ca>>2]-+g[da>>2];g[ia>>2]=+g[ga>>2]-+g[ha>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[ba>>2]*+g[ea>>2]-+g[fa>>2]*+g[ia>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[ba>>2]*+g[ia>>2]+ +g[fa>>2]*+g[ea>>2];g[H>>2]=(+g[A>>2]+ +g[B>>2])*.7071067690849304;g[I>>2]=+g[G>>2]-+g[H>>2];g[M>>2]=+g[G>>2]+ +g[H>>2];g[K>>2]=(+g[ta>>2]-+g[t>>2])*.7071067690849304;g[L>>2]=+g[J>>2]+ +g[K>>2];g[N>>2]=+g[J>>2]-+g[K>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Aa>>2]*+g[I>>2]-+g[Da>>2]*+g[L>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Aa>>2]*+g[L>>2]+ +g[Da>>2]*+g[I>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[$>>2]*+g[M>>2]-+g[aa>>2]*+g[N>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[$>>2]*+g[N>>2]+ +g[aa>>2]*+g[M>>2];g[u>>2]=(+g[ta>>2]+ +g[t>>2])*.7071067690849304;g[v>>2]=+g[qa>>2]-+g[u>>2];g[E>>2]=+g[qa>>2]+ +g[u>>2];g[C>>2]=(+g[A>>2]-+g[B>>2])*.7071067690849304;g[D>>2]=+g[z>>2]-+g[C>>2];g[F>>2]=+g[z>>2]+ +g[C>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[na>>2]*+g[v>>2]-+g[w>>2]*+g[D>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[w>>2]*+g[v>>2]+ +g[na>>2]*+g[D>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[za>>2]*+g[E>>2]-+g[Ca>>2]*+g[F>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[Ca>>2]*+g[E>>2]+ +g[za>>2]*+g[F>>2];c[Ua>>2]=(c[Ua>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+24}i=Va;return}function Lt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,56,7720);i=b;return}function Mt(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0;Db=i;i=i+528|0;k=Db+524|0;l=Db+520|0;m=Db+516|0;n=Db+512|0;Eb=Db+508|0;o=Db+504|0;p=Db+500|0;Cb=Db+480|0;xa=Db+476|0;z=Db+472|0;Ia=Db+468|0;Ja=Db+464|0;C=Db+460|0;B=Db+456|0;ba=Db+452|0;pa=Db+448|0;kb=Db+444|0;nb=Db+440|0;Na=Db+436|0;ea=Db+432|0;vb=Db+428|0;Ea=Db+424|0;sa=Db+420|0;ra=Db+416|0;la=Db+412|0;F=Db+408|0;Qa=Db+404|0;Sa=Db+400|0;q=Db+396|0;wa=Db+392|0;La=Db+388|0;Ma=Db+384|0;Aa=Db+380|0;A=Db+376|0;ib=Db+372|0;$=Db+368|0;Da=Db+364|0;Y=Db+360|0;fb=Db+356|0;_=Db+352|0;ya=Db+348|0;za=Db+344|0;gb=Db+340|0;hb=Db+336|0;Ba=Db+332|0;Ca=Db+328|0;db=Db+324|0;eb=Db+320|0;Z=Db+316|0;aa=Db+312|0;cb=Db+308|0;jb=Db+304|0;rb=Db+300|0;fa=Db+296|0;Bb=Db+292|0;ja=Db+288|0;ub=Db+284|0;ga=Db+280|0;yb=Db+276|0;ia=Db+272|0;pb=Db+268|0;qb=Db+264|0;zb=Db+260|0;Ab=Db+256|0;sb=Db+252|0;tb=Db+248|0;wb=Db+244|0;xb=Db+240|0;ha=Db+236|0;ka=Db+232|0;Oa=Db+228|0;Pa=Db+224|0;ca=Db+220|0;ma=Db+216|0;y=Db+212|0;da=Db+208|0;Fa=Db+204|0;Ka=Db+200|0;s=Db+196|0;$a=Db+192|0;Ta=Db+188|0;r=Db+184|0;ob=Db+180|0;_a=Db+176|0;Ra=Db+172|0;mb=Db+168|0;Ga=Db+164|0;Ua=Db+160|0;lb=Db+156|0;Ha=Db+152|0;v=Db+148|0;x=Db+144|0;u=Db+140|0;w=Db+136|0;Wa=Db+132|0;Ya=Db+128|0;Va=Db+124|0;Xa=Db+120|0;ab=Db+116|0;t=Db+112|0;Za=Db+108|0;bb=Db+104|0;ta=Db+100|0;D=Db+96|0;R=Db+92|0;O=Db+88|0;G=Db+84|0;S=Db+80|0;qa=Db+76|0;N=Db+72|0;E=Db+68|0;oa=Db+64|0;ua=Db+60|0;H=Db+56|0;na=Db+52|0;va=Db+48|0;V=Db+44|0;X=Db+40|0;U=Db+36|0;W=Db+32|0;J=Db+28|0;L=Db+24|0;I=Db+20|0;K=Db+16|0;P=Db+12|0;T=Db+8|0;M=Db+4|0;Q=Db;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Eb>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Db+496>>2]=.25;g[Db+492>>2]=.9510565400123596;g[Db+488>>2]=.5877852439880371;g[Db+484>>2]=.55901700258255;c[Cb>>2]=c[Eb>>2];c[m>>2]=(c[m>>2]|0)+(((c[Eb>>2]|0)-1|0)*18<<2);while(1){if((c[Cb>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[wa>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[xa>>2]=+g[q>>2]+ +g[wa>>2];g[z>>2]=+g[q>>2]-+g[wa>>2];g[ya>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[za>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Aa>>2]=+g[ya>>2]+ +g[za>>2];g[A>>2]=+g[ya>>2]-+g[za>>2];g[gb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[hb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[ib>>2]=+g[gb>>2]+ +g[hb>>2];g[$>>2]=+g[gb>>2]-+g[hb>>2];g[Ba>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Ca>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Da>>2]=+g[Ba>>2]+ +g[Ca>>2];g[Y>>2]=+g[Ba>>2]-+g[Ca>>2];g[db>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[eb>>2]=+g[c[l>>2]>>2];g[fb>>2]=+g[db>>2]+ +g[eb>>2];g[_>>2]=+g[db>>2]-+g[eb>>2];g[Ia>>2]=+g[Aa>>2]-+g[Da>>2];g[Ja>>2]=+g[fb>>2]-+g[ib>>2];g[C>>2]=+g[_>>2]-+g[$>>2];g[B>>2]=+g[A>>2]-+g[Y>>2];g[Z>>2]=+g[A>>2]+ +g[Y>>2];g[aa>>2]=+g[_>>2]+ +g[$>>2];g[ba>>2]=+g[Z>>2]+ +g[aa>>2];g[pa>>2]=(+g[Z>>2]-+g[aa>>2])*.55901700258255;g[cb>>2]=+g[Aa>>2]+ +g[Da>>2];g[jb>>2]=+g[fb>>2]+ +g[ib>>2];g[kb>>2]=+g[cb>>2]+ +g[jb>>2];g[nb>>2]=(+g[cb>>2]-+g[jb>>2])*.55901700258255;g[La>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Ma>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Na>>2]=+g[La>>2]-+g[Ma>>2];g[ea>>2]=+g[La>>2]+ +g[Ma>>2];g[pb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[qb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[rb>>2]=+g[pb>>2]-+g[qb>>2];g[fa>>2]=+g[pb>>2]+ +g[qb>>2];g[zb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Ab>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Bb>>2]=+g[zb>>2]-+g[Ab>>2];g[ja>>2]=+g[zb>>2]+ +g[Ab>>2];g[sb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[tb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[ub>>2]=+g[sb>>2]-+g[tb>>2];g[ga>>2]=+g[sb>>2]+ +g[tb>>2];g[wb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[xb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[yb>>2]=+g[wb>>2]-+g[xb>>2];g[ia>>2]=+g[wb>>2]+ +g[xb>>2];g[vb>>2]=+g[rb>>2]-+g[ub>>2];g[Ea>>2]=+g[yb>>2]-+g[Bb>>2];g[sa>>2]=+g[ia>>2]+ +g[ja>>2];g[ra>>2]=+g[fa>>2]+ +g[ga>>2];g[ha>>2]=+g[fa>>2]-+g[ga>>2];g[ka>>2]=+g[ia>>2]-+g[ja>>2];g[la>>2]=+g[ha>>2]+ +g[ka>>2];g[F>>2]=(+g[ha>>2]-+g[ka>>2])*.55901700258255;g[Oa>>2]=+g[rb>>2]+ +g[ub>>2];g[Pa>>2]=+g[yb>>2]+ +g[Bb>>2];g[Qa>>2]=+g[Oa>>2]+ +g[Pa>>2];g[Sa>>2]=(+g[Oa>>2]-+g[Pa>>2])*.55901700258255;g[c[k>>2]>>2]=+g[xa>>2]+ +g[kb>>2];g[c[l>>2]>>2]=+g[Na>>2]+ +g[Qa>>2];g[ca>>2]=+g[z>>2]+ +g[ba>>2];g[ma>>2]=+g[ea>>2]+ +g[la>>2];g[y>>2]=+g[(c[m>>2]|0)+32>>2];g[da>>2]=+g[(c[m>>2]|0)+36>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[y>>2]*+g[ca>>2]-+g[da>>2]*+g[ma>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[da>>2]*+g[ca>>2]+ +g[y>>2]*+g[ma>>2];g[Fa>>2]=+g[vb>>2]*.5877852439880371-+g[Ea>>2]*.9510565400123596;g[Ka>>2]=+g[Ia>>2]*.5877852439880371-+g[Ja>>2]*.9510565400123596;g[s>>2]=+g[Ia>>2]*.9510565400123596+ +g[Ja>>2]*.5877852439880371;g[$a>>2]=+g[vb>>2]*.9510565400123596+ +g[Ea>>2]*.5877852439880371;g[Ra>>2]=+g[Na>>2]-+g[Qa>>2]*.25;g[Ta>>2]=+g[Ra>>2]-+g[Sa>>2];g[r>>2]=+g[Sa>>2]+ +g[Ra>>2];g[mb>>2]=+g[xa>>2]-+g[kb>>2]*.25;g[ob>>2]=+g[mb>>2]-+g[nb>>2];g[_a>>2]=+g[nb>>2]+ +g[mb>>2];g[Ga>>2]=+g[ob>>2]-+g[Fa>>2];g[Ua>>2]=+g[Ka>>2]+ +g[Ta>>2];g[lb>>2]=+g[(c[m>>2]|0)+8>>2];g[Ha>>2]=+g[(c[m>>2]|0)+12>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[lb>>2]*+g[Ga>>2]-+g[Ha>>2]*+g[Ua>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Ha>>2]*+g[Ga>>2]+ +g[lb>>2]*+g[Ua>>2];g[v>>2]=+g[_a>>2]-+g[$a>>2];g[x>>2]=+g[s>>2]+ +g[r>>2];g[u>>2]=+g[(c[m>>2]|0)+40>>2];g[w>>2]=+g[(c[m>>2]|0)+44>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[u>>2]*+g[v>>2]-+g[w>>2]*+g[x>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[w>>2]*+g[v>>2]+ +g[u>>2]*+g[x>>2];g[Wa>>2]=+g[ob>>2]+ +g[Fa>>2];g[Ya>>2]=+g[Ta>>2]-+g[Ka>>2];g[Va>>2]=+g[(c[m>>2]|0)+56>>2];g[Xa>>2]=+g[(c[m>>2]|0)+60>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Va>>2]*+g[Wa>>2]-+g[Xa>>2]*+g[Ya>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Xa>>2]*+g[Wa>>2]+ +g[Va>>2]*+g[Ya>>2];g[ab>>2]=+g[_a>>2]+ +g[$a>>2];g[t>>2]=+g[r>>2]-+g[s>>2];g[Za>>2]=+g[(c[m>>2]|0)+24>>2];g[bb>>2]=+g[(c[m>>2]|0)+28>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Za>>2]*+g[ab>>2]-+g[bb>>2]*+g[t>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[bb>>2]*+g[ab>>2]+ +g[Za>>2]*+g[t>>2];g[ta>>2]=+g[ra>>2]*.5877852439880371-+g[sa>>2]*.9510565400123596;g[D>>2]=+g[B>>2]*.5877852439880371-+g[C>>2]*.9510565400123596;g[R>>2]=+g[B>>2]*.9510565400123596+ +g[C>>2]*.5877852439880371;g[O>>2]=+g[ra>>2]*.9510565400123596+ +g[sa>>2]*.5877852439880371;g[E>>2]=+g[ea>>2]-+g[la>>2]*.25;g[G>>2]=+g[E>>2]-+g[F>>2];g[S>>2]=+g[F>>2]+ +g[E>>2];g[oa>>2]=+g[z>>2]-+g[ba>>2]*.25;g[qa>>2]=+g[oa>>2]-+g[pa>>2];g[N>>2]=+g[pa>>2]+ +g[oa>>2];g[ua>>2]=+g[qa>>2]-+g[ta>>2];g[H>>2]=+g[D>>2]+ +g[G>>2];g[na>>2]=+g[(c[m>>2]|0)+48>>2];g[va>>2]=+g[(c[m>>2]|0)+52>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[na>>2]*+g[ua>>2]-+g[va>>2]*+g[H>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[na>>2]*+g[H>>2]+ +g[va>>2]*+g[ua>>2];g[V>>2]=+g[N>>2]+ +g[O>>2];g[X>>2]=+g[S>>2]-+g[R>>2];g[U>>2]=+g[(c[m>>2]|0)+64>>2];g[W>>2]=+g[(c[m>>2]|0)+68>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[U>>2]*+g[V>>2]-+g[W>>2]*+g[X>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[U>>2]*+g[X>>2]+ +g[W>>2]*+g[V>>2];g[J>>2]=+g[qa>>2]+ +g[ta>>2];g[L>>2]=+g[G>>2]-+g[D>>2];g[I>>2]=+g[(c[m>>2]|0)+16>>2];g[K>>2]=+g[(c[m>>2]|0)+20>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[I>>2]*+g[J>>2]-+g[K>>2]*+g[L>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[I>>2]*+g[L>>2]+ +g[K>>2]*+g[J>>2];g[P>>2]=+g[N>>2]-+g[O>>2];g[T>>2]=+g[R>>2]+ +g[S>>2];g[M>>2]=+g[c[m>>2]>>2];g[Q>>2]=+g[(c[m>>2]|0)+4>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[M>>2]*+g[P>>2]-+g[Q>>2]*+g[T>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[M>>2]*+g[T>>2]+ +g[Q>>2]*+g[P>>2];c[Cb>>2]=(c[Cb>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+72;c[n>>2]=c[n>>2]^c[2998]}i=Db;return}function Nt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,57,7768);i=b;return}function Ot(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0;Xb=i;i=i+608|0;k=Xb+596|0;l=Xb+592|0;m=Xb+588|0;n=Xb+584|0;Yb=Xb+580|0;o=Xb+576|0;p=Xb+572|0;Wb=Xb+560|0;Ta=Xb+556|0;db=Xb+552|0;t=Xb+548|0;N=Xb+544|0;ha=Xb+540|0;V=Xb+536|0;Cb=Xb+532|0;Pb=Xb+528|0;A=Xb+524|0;Y=Xb+520|0;oa=Xb+516|0;Q=Xb+512|0;wb=Xb+508|0;ib=Xb+504|0;w=Xb+500|0;O=Xb+496|0;ka=Xb+492|0;W=Xb+488|0;Hb=Xb+484|0;Ub=Xb+480|0;ba=Xb+476|0;Z=Xb+472|0;ra=Xb+468|0;R=Xb+464|0;q=Xb+460|0;$a=Xb+456|0;Sa=Xb+452|0;fa=Xb+448|0;cb=Xb+444|0;s=Xb+440|0;r=Xb+436|0;ga=Xb+432|0;za=Xb+428|0;Ra=Xb+424|0;ab=Xb+420|0;bb=Xb+416|0;yb=Xb+412|0;Lb=Xb+408|0;Bb=Xb+404|0;y=Xb+400|0;Ob=Xb+396|0;na=Xb+392|0;z=Xb+388|0;ma=Xb+384|0;zb=Xb+380|0;Ab=Xb+376|0;Mb=Xb+372|0;Nb=Xb+368|0;Ua=Xb+364|0;hb=Xb+360|0;Xa=Xb+356|0;ia=Xb+352|0;gb=Xb+348|0;v=Xb+344|0;u=Xb+340|0;ja=Xb+336|0;Va=Xb+332|0;Wa=Xb+328|0;eb=Xb+324|0;fb=Xb+320|0;Db=Xb+316|0;Tb=Xb+312|0;Gb=Xb+308|0;$=Xb+304|0;Sb=Xb+300|0;qa=Xb+296|0;aa=Xb+292|0;pa=Xb+288|0;Eb=Xb+284|0;Fb=Xb+280|0;Qb=Xb+276|0;Rb=Xb+272|0;xb=Xb+268|0;Ib=Xb+264|0;qb=Xb+260|0;sb=Xb+256|0;tb=Xb+252|0;ub=Xb+248|0;pb=Xb+244|0;rb=Xb+240|0;Ya=Xb+236|0;mb=Xb+232|0;kb=Xb+228|0;ob=Xb+224|0;Kb=Xb+220|0;Vb=Xb+216|0;_a=Xb+212|0;jb=Xb+208|0;Jb=Xb+204|0;Za=Xb+200|0;lb=Xb+196|0;nb=Xb+192|0;Ia=Xb+188|0;Oa=Xb+184|0;Ma=Xb+180|0;Qa=Xb+176|0;Ga=Xb+172|0;Ha=Xb+168|0;Ka=Xb+164|0;La=Xb+160|0;Fa=Xb+156|0;Ja=Xb+152|0;Na=Xb+148|0;Pa=Xb+144|0;T=Xb+140|0;Ca=Xb+136|0;Aa=Xb+132|0;Ea=Xb+128|0;P=Xb+124|0;S=Xb+120|0;X=Xb+116|0;_=Xb+112|0;M=Xb+108|0;U=Xb+104|0;Ba=Xb+100|0;Da=Xb+96|0;D=Xb+92|0;J=Xb+88|0;H=Xb+84|0;L=Xb+80|0;B=Xb+76|0;C=Xb+72|0;F=Xb+68|0;G=Xb+64|0;ya=Xb+60|0;E=Xb+56|0;I=Xb+52|0;K=Xb+48|0;da=Xb+44|0;va=Xb+40|0;ta=Xb+36|0;xa=Xb+32|0;x=Xb+28|0;ca=Xb+24|0;la=Xb+20|0;sa=Xb+16|0;vb=Xb+12|0;ea=Xb+8|0;ua=Xb+4|0;wa=Xb;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Yb>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Xb+568>>2]=.5;g[Xb+564>>2]=.8660253882408142;c[Wb>>2]=c[Yb>>2];c[m>>2]=(c[m>>2]|0)+(((c[Yb>>2]|0)-1|0)*22<<2);while(1){if((c[Wb>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[$a>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[za>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Ra>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Sa>>2]=+g[za>>2]+ +g[Ra>>2];g[fa>>2]=(+g[za>>2]-+g[Ra>>2])*.8660253882408142;g[ab>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[bb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[cb>>2]=+g[ab>>2]-+g[bb>>2];g[s>>2]=(+g[ab>>2]+ +g[bb>>2])*.8660253882408142;g[Ta>>2]=+g[q>>2]+ +g[Sa>>2];g[db>>2]=+g[$a>>2]+ +g[cb>>2];g[r>>2]=+g[q>>2]-+g[Sa>>2]*.5;g[t>>2]=+g[r>>2]-+g[s>>2];g[N>>2]=+g[r>>2]+ +g[s>>2];g[ga>>2]=+g[$a>>2]-+g[cb>>2]*.5;g[ha>>2]=+g[fa>>2]+ +g[ga>>2];g[V>>2]=+g[ga>>2]-+g[fa>>2];g[yb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Lb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[zb>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Ab>>2]=+g[c[l>>2]>>2];g[Bb>>2]=+g[zb>>2]+ +g[Ab>>2];g[y>>2]=(+g[zb>>2]-+g[Ab>>2])*.8660253882408142;g[Mb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Nb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Ob>>2]=+g[Mb>>2]+ +g[Nb>>2];g[na>>2]=(+g[Mb>>2]-+g[Nb>>2])*.8660253882408142;g[Cb>>2]=+g[yb>>2]+ +g[Bb>>2];g[Pb>>2]=+g[Lb>>2]-+g[Ob>>2];g[z>>2]=+g[Ob>>2]*.5+ +g[Lb>>2];g[A>>2]=+g[y>>2]+ +g[z>>2];g[Y>>2]=+g[z>>2]-+g[y>>2];g[ma>>2]=+g[yb>>2]-+g[Bb>>2]*.5;g[oa>>2]=+g[ma>>2]+ +g[na>>2];g[Q>>2]=+g[ma>>2]-+g[na>>2];g[Ua>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[hb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Va>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Wa>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Xa>>2]=+g[Va>>2]+ +g[Wa>>2];g[ia>>2]=(+g[Va>>2]-+g[Wa>>2])*.8660253882408142;g[eb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[fb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[gb>>2]=+g[eb>>2]-+g[fb>>2];g[v>>2]=(+g[eb>>2]+ +g[fb>>2])*.8660253882408142;g[wb>>2]=+g[Ua>>2]+ +g[Xa>>2];g[ib>>2]=+g[gb>>2]-+g[hb>>2];g[u>>2]=+g[Ua>>2]-+g[Xa>>2]*.5;g[w>>2]=+g[u>>2]+ +g[v>>2];g[O>>2]=+g[u>>2]-+g[v>>2];g[ja>>2]=+g[gb>>2]*.5+ +g[hb>>2];g[ka>>2]=+g[ia>>2]-+g[ja>>2];g[W>>2]=+g[ia>>2]+ +g[ja>>2];g[Db>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Tb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Eb>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[Fb>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Gb>>2]=+g[Eb>>2]+ +g[Fb>>2];g[$>>2]=(+g[Eb>>2]-+g[Fb>>2])*.8660253882408142;g[Qb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Rb>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Sb>>2]=+g[Qb>>2]+ +g[Rb>>2];g[qa>>2]=(+g[Rb>>2]-+g[Qb>>2])*.8660253882408142;g[Hb>>2]=+g[Db>>2]+ +g[Gb>>2];g[Ub>>2]=+g[Sb>>2]-+g[Tb>>2];g[aa>>2]=+g[Sb>>2]*.5+ +g[Tb>>2];g[ba>>2]=+g[$>>2]-+g[aa>>2];g[Z>>2]=+g[$>>2]+ +g[aa>>2];g[pa>>2]=+g[Db>>2]-+g[Gb>>2]*.5;g[ra>>2]=+g[pa>>2]+ +g[qa>>2];g[R>>2]=+g[pa>>2]-+g[qa>>2];g[xb>>2]=+g[Ta>>2]+ +g[wb>>2];g[Ib>>2]=+g[Cb>>2]+ +g[Hb>>2];g[qb>>2]=+g[xb>>2]-+g[Ib>>2];g[sb>>2]=+g[db>>2]+ +g[ib>>2];g[tb>>2]=+g[Pb>>2]+ +g[Ub>>2];g[ub>>2]=+g[sb>>2]-+g[tb>>2];g[c[k>>2]>>2]=+g[xb>>2]+ +g[Ib>>2];g[c[l>>2]>>2]=+g[sb>>2]+ +g[tb>>2];g[pb>>2]=+g[(c[m>>2]|0)+40>>2];g[rb>>2]=+g[(c[m>>2]|0)+44>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[pb>>2]*+g[qb>>2]-+g[rb>>2]*+g[ub>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[rb>>2]*+g[qb>>2]+ +g[pb>>2]*+g[ub>>2];g[Kb>>2]=+g[Ta>>2]-+g[wb>>2];g[Vb>>2]=+g[Pb>>2]-+g[Ub>>2];g[Ya>>2]=+g[Kb>>2]-+g[Vb>>2];g[mb>>2]=+g[Kb>>2]+ +g[Vb>>2];g[_a>>2]=+g[Cb>>2]-+g[Hb>>2];g[jb>>2]=+g[db>>2]-+g[ib>>2];g[kb>>2]=+g[_a>>2]+ +g[jb>>2];g[ob>>2]=+g[jb>>2]-+g[_a>>2];g[Jb>>2]=+g[(c[m>>2]|0)+64>>2];g[Za>>2]=+g[(c[m>>2]|0)+68>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Jb>>2]*+g[Ya>>2]-+g[Za>>2]*+g[kb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Jb>>2]*+g[kb>>2]+ +g[Za>>2]*+g[Ya>>2];g[lb>>2]=+g[(c[m>>2]|0)+16>>2];g[nb>>2]=+g[(c[m>>2]|0)+20>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[lb>>2]*+g[mb>>2]-+g[nb>>2]*+g[ob>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[lb>>2]*+g[ob>>2]+ +g[nb>>2]*+g[mb>>2];g[Ga>>2]=+g[N>>2]-+g[O>>2];g[Ha>>2]=+g[Y>>2]+ +g[Z>>2];g[Ia>>2]=+g[Ga>>2]-+g[Ha>>2];g[Oa>>2]=+g[Ga>>2]+ +g[Ha>>2];g[Ka>>2]=+g[V>>2]+ +g[W>>2];g[La>>2]=+g[Q>>2]-+g[R>>2];g[Ma>>2]=+g[Ka>>2]+ +g[La>>2];g[Qa>>2]=+g[Ka>>2]-+g[La>>2];g[Fa>>2]=+g[(c[m>>2]|0)+32>>2];g[Ja>>2]=+g[(c[m>>2]|0)+36>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Fa>>2]*+g[Ia>>2]-+g[Ja>>2]*+g[Ma>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Fa>>2]*+g[Ma>>2]+ +g[Ja>>2]*+g[Ia>>2];g[Na>>2]=+g[(c[m>>2]|0)+80>>2];g[Pa>>2]=+g[(c[m>>2]|0)+84>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Na>>2]*+g[Oa>>2]-+g[Pa>>2]*+g[Qa>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Na>>2]*+g[Qa>>2]+ +g[Pa>>2]*+g[Oa>>2];g[P>>2]=+g[N>>2]+ +g[O>>2];g[S>>2]=+g[Q>>2]+ +g[R>>2];g[T>>2]=+g[P>>2]-+g[S>>2];g[Ca>>2]=+g[P>>2]+ +g[S>>2];g[X>>2]=+g[V>>2]-+g[W>>2];g[_>>2]=+g[Y>>2]-+g[Z>>2];g[Aa>>2]=+g[X>>2]-+g[_>>2];g[Ea>>2]=+g[X>>2]+ +g[_>>2];g[M>>2]=+g[(c[m>>2]|0)+8>>2];g[U>>2]=+g[(c[m>>2]|0)+12>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[M>>2]*+g[T>>2]-+g[U>>2]*+g[Aa>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[U>>2]*+g[T>>2]+ +g[M>>2]*+g[Aa>>2];g[Ba>>2]=+g[(c[m>>2]|0)+56>>2];g[Da>>2]=+g[(c[m>>2]|0)+60>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Ba>>2]*+g[Ca>>2]-+g[Da>>2]*+g[Ea>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Da>>2]*+g[Ca>>2]+ +g[Ba>>2]*+g[Ea>>2];g[B>>2]=+g[t>>2]+ +g[w>>2];g[C>>2]=+g[oa>>2]+ +g[ra>>2];g[D>>2]=+g[B>>2]-+g[C>>2];g[J>>2]=+g[B>>2]+ +g[C>>2];g[F>>2]=+g[ha>>2]+ +g[ka>>2];g[G>>2]=+g[A>>2]+ +g[ba>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[L>>2]=+g[F>>2]+ +g[G>>2];g[ya>>2]=+g[(c[m>>2]|0)+72>>2];g[E>>2]=+g[(c[m>>2]|0)+76>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[ya>>2]*+g[D>>2]-+g[E>>2]*+g[H>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[E>>2]*+g[D>>2]+ +g[ya>>2]*+g[H>>2];g[I>>2]=+g[(c[m>>2]|0)+24>>2];g[K>>2]=+g[(c[m>>2]|0)+28>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[I>>2]*+g[J>>2]-+g[K>>2]*+g[L>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[K>>2]*+g[J>>2]+ +g[I>>2]*+g[L>>2];g[x>>2]=+g[t>>2]-+g[w>>2];g[ca>>2]=+g[A>>2]-+g[ba>>2];g[da>>2]=+g[x>>2]-+g[ca>>2];g[va>>2]=+g[x>>2]+ +g[ca>>2];g[la>>2]=+g[ha>>2]-+g[ka>>2];g[sa>>2]=+g[oa>>2]-+g[ra>>2];g[ta>>2]=+g[la>>2]+ +g[sa>>2];g[xa>>2]=+g[la>>2]-+g[sa>>2];g[vb>>2]=+g[c[m>>2]>>2];g[ea>>2]=+g[(c[m>>2]|0)+4>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[vb>>2]*+g[da>>2]-+g[ea>>2]*+g[ta>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[vb>>2]*+g[ta>>2]+ +g[ea>>2]*+g[da>>2];g[ua>>2]=+g[(c[m>>2]|0)+48>>2];g[wa>>2]=+g[(c[m>>2]|0)+52>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[ua>>2]*+g[va>>2]-+g[wa>>2]*+g[xa>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[ua>>2]*+g[xa>>2]+ +g[wa>>2]*+g[va>>2];c[Wb>>2]=(c[Wb>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+88;c[n>>2]=c[n>>2]^c[2998]}i=Xb;return}function Pt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,58,7816);i=b;return}function Qt(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0;nd=i;i=i+912|0;k=nd+900|0;l=nd+896|0;m=nd+892|0;n=nd+888|0;od=nd+884|0;o=nd+880|0;p=nd+876|0;md=nd+848|0;jc=nd+844|0;r=nd+840|0;K=nd+836|0;Ma=nd+832|0;kb=nd+828|0;Wa=nd+824|0;vc=nd+820|0;z=nd+816|0;y=nd+812|0;Gc=nd+808|0;_a=nd+804|0;$a=nd+800|0;ab=nd+796|0;Uc=nd+792|0;dd=nd+788|0;ed=nd+784|0;s=nd+780|0;t=nd+776|0;u=nd+772|0;Xa=nd+768|0;Ya=nd+764|0;Za=nd+760|0;ta=nd+756|0;wa=nd+752|0;xa=nd+748|0;X=nd+744|0;_=nd+740|0;Ia=nd+736|0;Q=nd+732|0;T=nd+728|0;Ha=nd+724|0;C=nd+720|0;F=nd+716|0;G=nd+712|0;Qa=nd+708|0;Ra=nd+704|0;Sa=nd+700|0;Na=nd+696|0;Oa=nd+692|0;Pa=nd+688|0;q=nd+684|0;Kc=nd+680|0;ic=nd+676|0;ib=nd+672|0;Nc=nd+668|0;J=nd+664|0;I=nd+660|0;jb=nd+656|0;za=nd+652|0;Ib=nd+648|0;Lc=nd+644|0;Mc=nd+640|0;Oc=nd+636|0;O=nd+632|0;ra=nd+628|0;Zc=nd+624|0;V=nd+620|0;ya=nd+616|0;Tc=nd+612|0;R=nd+608|0;ua=nd+604|0;uc=nd+600|0;S=nd+596|0;va=nd+592|0;cd=nd+588|0;Y=nd+584|0;D=nd+580|0;Ac=nd+576|0;W=nd+572|0;B=nd+568|0;pc=nd+564|0;P=nd+560|0;sa=nd+556|0;Fc=nd+552|0;Z=nd+548|0;E=nd+544|0;kc=nd+540|0;lc=nd+536|0;mc=nd+532|0;nc=nd+528|0;Vc=nd+524|0;Wc=nd+520|0;Xc=nd+516|0;Yc=nd+512|0;Pc=nd+508|0;Qc=nd+504|0;Rc=nd+500|0;Sc=nd+496|0;tc=nd+492|0;qc=nd+488|0;rc=nd+484|0;sc=nd+480|0;_c=nd+476|0;$c=nd+472|0;ad=nd+468|0;bd=nd+464|0;wc=nd+460|0;xc=nd+456|0;yc=nd+452|0;zc=nd+448|0;jd=nd+444|0;kd=nd+440|0;ld=nd+436|0;oc=nd+432|0;Ec=nd+428|0;Bc=nd+424|0;Cc=nd+420|0;Dc=nd+416|0;Hc=nd+412|0;A=nd+408|0;ja=nd+404|0;ga=nd+400|0;x=nd+396|0;ka=nd+392|0;id=nd+388|0;fa=nd+384|0;v=nd+380|0;w=nd+376|0;gd=nd+372|0;hd=nd+368|0;Ic=nd+364|0;$=nd+360|0;fd=nd+356|0;Jc=nd+352|0;na=nd+348|0;pa=nd+344|0;ma=nd+340|0;oa=nd+336|0;ba=nd+332|0;da=nd+328|0;aa=nd+324|0;ca=nd+320|0;ha=nd+316|0;la=nd+312|0;ea=nd+308|0;ia=nd+304|0;Ba=nd+300|0;vb=nd+296|0;Ga=nd+292|0;yb=nd+288|0;N=nd+284|0;Gb=nd+280|0;ub=nd+276|0;nb=nd+272|0;Ka=nd+268|0;zb=nd+264|0;Fb=nd+260|0;Hb=nd+256|0;U=nd+252|0;Aa=nd+248|0;Ea=nd+244|0;Fa=nd+240|0;H=nd+236|0;L=nd+232|0;M=nd+228|0;Ja=nd+224|0;lb=nd+220|0;mb=nd+216|0;Cb=nd+212|0;Eb=nd+208|0;Bb=nd+204|0;Db=nd+200|0;Ca=nd+196|0;ob=nd+192|0;qa=nd+188|0;Da=nd+184|0;qb=nd+180|0;sb=nd+176|0;pb=nd+172|0;rb=nd+168|0;wb=nd+164|0;Ab=nd+160|0;tb=nd+156|0;xb=nd+152|0;Kb=nd+148|0;_b=nd+144|0;Pb=nd+140|0;bc=nd+136|0;Ua=nd+132|0;Zb=nd+128|0;gb=nd+124|0;cb=nd+120|0;cc=nd+116|0;Sb=nd+112|0;La=nd+108|0;Va=nd+104|0;hb=nd+100|0;Jb=nd+96|0;Nb=nd+92|0;Ob=nd+88|0;fb=nd+84|0;Ta=nd+80|0;eb=nd+76|0;Rb=nd+72|0;bb=nd+68|0;Qb=nd+64|0;fc=nd+60|0;hc=nd+56|0;ec=nd+52|0;gc=nd+48|0;Lb=nd+44|0;Tb=nd+40|0;db=nd+36|0;Mb=nd+32|0;Vb=nd+28|0;Xb=nd+24|0;Ub=nd+20|0;Wb=nd+16|0;$b=nd+12|0;dc=nd+8|0;Yb=nd+4|0;ac=nd;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[od>>2]=f;c[o>>2]=h;c[p>>2]=j;g[nd+872>>2]=.55901700258255;g[nd+868>>2]=.25;g[nd+864>>2]=.9510565400123596;g[nd+860>>2]=.5877852439880371;g[nd+856>>2]=.5;g[nd+852>>2]=.8660253882408142;c[md>>2]=c[od>>2];c[m>>2]=(c[m>>2]|0)+(((c[od>>2]|0)-1|0)*28<<2);while(1){if((c[md>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[Kc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[za>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Ib>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[ic>>2]=+g[za>>2]+ +g[Ib>>2];g[ib>>2]=(+g[za>>2]-+g[Ib>>2])*.8660253882408142;g[Lc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Mc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Nc>>2]=+g[Lc>>2]-+g[Mc>>2];g[J>>2]=(+g[Lc>>2]+ +g[Mc>>2])*.8660253882408142;g[jc>>2]=+g[q>>2]+ +g[ic>>2];g[r>>2]=+g[Kc>>2]+ +g[Nc>>2];g[I>>2]=+g[q>>2]-+g[ic>>2]*.5;g[K>>2]=+g[I>>2]-+g[J>>2];g[Ma>>2]=+g[I>>2]+ +g[J>>2];g[jb>>2]=+g[Kc>>2]-+g[Nc>>2]*.5;g[kb>>2]=+g[ib>>2]+ +g[jb>>2];g[Wa>>2]=+g[jb>>2]-+g[ib>>2];g[kc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[lc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[mc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[nc>>2]=+g[lc>>2]+ +g[mc>>2];g[Oc>>2]=+g[kc>>2]+ +g[nc>>2];g[O>>2]=(+g[lc>>2]-+g[mc>>2])*.8660253882408142;g[ra>>2]=+g[kc>>2]-+g[nc>>2]*.5;g[Vc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Wc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Xc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[Yc>>2]=+g[Wc>>2]+ +g[Xc>>2];g[Zc>>2]=+g[Vc>>2]+ +g[Yc>>2];g[V>>2]=(+g[Wc>>2]-+g[Xc>>2])*.8660253882408142;g[ya>>2]=+g[Vc>>2]-+g[Yc>>2]*.5;g[Pc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Qc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Rc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Sc>>2]=+g[Qc>>2]+ +g[Rc>>2];g[Tc>>2]=+g[Pc>>2]+ +g[Sc>>2];g[R>>2]=(+g[Qc>>2]-+g[Rc>>2])*.8660253882408142;g[ua>>2]=+g[Pc>>2]-+g[Sc>>2]*.5;g[tc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[qc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[rc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[sc>>2]=+g[qc>>2]+ +g[rc>>2];g[uc>>2]=+g[sc>>2]-+g[tc>>2];g[S>>2]=+g[sc>>2]*.5+ +g[tc>>2];g[va>>2]=(+g[rc>>2]-+g[qc>>2])*.8660253882408142;g[_c>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[$c>>2]=+g[c[l>>2]>>2];g[ad>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[bd>>2]=+g[$c>>2]+ +g[ad>>2];g[cd>>2]=+g[_c>>2]+ +g[bd>>2];g[Y>>2]=(+g[$c>>2]-+g[ad>>2])*.8660253882408142;g[D>>2]=+g[_c>>2]-+g[bd>>2]*.5;g[wc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[xc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[yc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[zc>>2]=+g[xc>>2]-+g[yc>>2];g[Ac>>2]=+g[wc>>2]+ +g[zc>>2];g[W>>2]=+g[wc>>2]-+g[zc>>2]*.5;g[B>>2]=(+g[xc>>2]+ +g[yc>>2])*.8660253882408142;g[jd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[kd>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[ld>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[oc>>2]=+g[kd>>2]+ +g[ld>>2];g[pc>>2]=+g[jd>>2]-+g[oc>>2];g[P>>2]=+g[oc>>2]*.5+ +g[jd>>2];g[sa>>2]=(+g[kd>>2]-+g[ld>>2])*.8660253882408142;g[Ec>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Bc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Cc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[Dc>>2]=+g[Bc>>2]-+g[Cc>>2];g[Fc>>2]=+g[Dc>>2]-+g[Ec>>2];g[Z>>2]=+g[Dc>>2]*.5+ +g[Ec>>2];g[E>>2]=(+g[Bc>>2]+ +g[Cc>>2])*.8660253882408142;g[vc>>2]=+g[pc>>2]-+g[uc>>2];g[z>>2]=+g[Zc>>2]-+g[cd>>2];g[y>>2]=+g[Oc>>2]-+g[Tc>>2];g[Gc>>2]=+g[Ac>>2]-+g[Fc>>2];g[_a>>2]=+g[W>>2]-+g[V>>2];g[$a>>2]=+g[Y>>2]+ +g[Z>>2];g[ab>>2]=+g[_a>>2]-+g[$a>>2];g[Uc>>2]=+g[Oc>>2]+ +g[Tc>>2];g[dd>>2]=+g[Zc>>2]+ +g[cd>>2];g[ed>>2]=+g[Uc>>2]+ +g[dd>>2];g[s>>2]=+g[pc>>2]+ +g[uc>>2];g[t>>2]=+g[Ac>>2]+ +g[Fc>>2];g[u>>2]=+g[s>>2]+ +g[t>>2];g[Xa>>2]=+g[P>>2]-+g[O>>2];g[Ya>>2]=+g[R>>2]+ +g[S>>2];g[Za>>2]=+g[Xa>>2]-+g[Ya>>2];g[ta>>2]=+g[ra>>2]+ +g[sa>>2];g[wa>>2]=+g[ua>>2]+ +g[va>>2];g[xa>>2]=+g[ta>>2]+ +g[wa>>2];g[X>>2]=+g[V>>2]+ +g[W>>2];g[_>>2]=+g[Y>>2]-+g[Z>>2];g[Ia>>2]=+g[X>>2]+ +g[_>>2];g[Q>>2]=+g[O>>2]+ +g[P>>2];g[T>>2]=+g[R>>2]-+g[S>>2];g[Ha>>2]=+g[Q>>2]+ +g[T>>2];g[C>>2]=+g[ya>>2]+ +g[B>>2];g[F>>2]=+g[D>>2]+ +g[E>>2];g[G>>2]=+g[C>>2]+ +g[F>>2];g[Qa>>2]=+g[ya>>2]-+g[B>>2];g[Ra>>2]=+g[D>>2]-+g[E>>2];g[Sa>>2]=+g[Qa>>2]+ +g[Ra>>2];g[Na>>2]=+g[ra>>2]-+g[sa>>2];g[Oa>>2]=+g[ua>>2]-+g[va>>2];g[Pa>>2]=+g[Na>>2]+ +g[Oa>>2];g[c[k>>2]>>2]=+g[jc>>2]+ +g[ed>>2];g[c[l>>2]>>2]=+g[r>>2]+ +g[u>>2];g[Hc>>2]=+g[vc>>2]*.5877852439880371-+g[Gc>>2]*.9510565400123596;g[A>>2]=+g[y>>2]*.5877852439880371-+g[z>>2]*.9510565400123596;g[ja>>2]=+g[y>>2]*.9510565400123596+ +g[z>>2]*.5877852439880371;g[ga>>2]=+g[vc>>2]*.9510565400123596+ +g[Gc>>2]*.5877852439880371;g[v>>2]=+g[r>>2]-+g[u>>2]*.25;g[w>>2]=(+g[s>>2]-+g[t>>2])*.55901700258255;g[x>>2]=+g[v>>2]-+g[w>>2];g[ka>>2]=+g[w>>2]+ +g[v>>2];g[gd>>2]=+g[jc>>2]-+g[ed>>2]*.25;g[hd>>2]=(+g[Uc>>2]-+g[dd>>2])*.55901700258255;g[id>>2]=+g[gd>>2]-+g[hd>>2];g[fa>>2]=+g[hd>>2]+ +g[gd>>2];g[Ic>>2]=+g[id>>2]+ +g[Hc>>2];g[$>>2]=+g[x>>2]-+g[A>>2];g[fd>>2]=+g[(c[m>>2]|0)+16>>2];g[Jc>>2]=+g[(c[m>>2]|0)+20>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[fd>>2]*+g[Ic>>2]-+g[Jc>>2]*+g[$>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Jc>>2]*+g[Ic>>2]+ +g[fd>>2]*+g[$>>2];g[na>>2]=+g[fa>>2]+ +g[ga>>2];g[pa>>2]=+g[ka>>2]-+g[ja>>2];g[ma>>2]=+g[(c[m>>2]|0)+64>>2];g[oa>>2]=+g[(c[m>>2]|0)+68>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[ma>>2]*+g[na>>2]-+g[oa>>2]*+g[pa>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[oa>>2]*+g[na>>2]+ +g[ma>>2]*+g[pa>>2];g[ba>>2]=+g[id>>2]-+g[Hc>>2];g[da>>2]=+g[A>>2]+ +g[x>>2];g[aa>>2]=+g[(c[m>>2]|0)+88>>2];g[ca>>2]=+g[(c[m>>2]|0)+92>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[aa>>2]*+g[ba>>2]-+g[ca>>2]*+g[da>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[ca>>2]*+g[ba>>2]+ +g[aa>>2]*+g[da>>2];g[ha>>2]=+g[fa>>2]-+g[ga>>2];g[la>>2]=+g[ja>>2]+ +g[ka>>2];g[ea>>2]=+g[(c[m>>2]|0)+40>>2];g[ia>>2]=+g[(c[m>>2]|0)+44>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[ea>>2]*+g[ha>>2]-+g[ia>>2]*+g[la>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[ia>>2]*+g[ha>>2]+ +g[ea>>2]*+g[la>>2];g[U>>2]=+g[Q>>2]-+g[T>>2];g[Aa>>2]=+g[X>>2]-+g[_>>2];g[Ba>>2]=+g[U>>2]*.9510565400123596+ +g[Aa>>2]*.5877852439880371;g[vb>>2]=+g[U>>2]*.5877852439880371-+g[Aa>>2]*.9510565400123596;g[Ea>>2]=+g[ta>>2]-+g[wa>>2];g[Fa>>2]=+g[C>>2]-+g[F>>2];g[Ga>>2]=+g[Ea>>2]*.9510565400123596+ +g[Fa>>2]*.5877852439880371;g[yb>>2]=+g[Ea>>2]*.5877852439880371-+g[Fa>>2]*.9510565400123596;g[H>>2]=(+g[xa>>2]-+g[G>>2])*.55901700258255;g[L>>2]=+g[xa>>2]+ +g[G>>2];g[M>>2]=+g[K>>2]-+g[L>>2]*.25;g[N>>2]=+g[H>>2]+ +g[M>>2];g[Gb>>2]=+g[K>>2]+ +g[L>>2];g[ub>>2]=+g[M>>2]-+g[H>>2];g[Ja>>2]=(+g[Ha>>2]-+g[Ia>>2])*.55901700258255;g[lb>>2]=+g[Ha>>2]+ +g[Ia>>2];g[mb>>2]=+g[kb>>2]-+g[lb>>2]*.25;g[nb>>2]=+g[Ja>>2]+ +g[mb>>2];g[Ka>>2]=+g[kb>>2]+ +g[lb>>2];g[zb>>2]=+g[mb>>2]-+g[Ja>>2];g[Fb>>2]=+g[(c[m>>2]|0)+72>>2];g[Hb>>2]=+g[(c[m>>2]|0)+76>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Fb>>2]*+g[Gb>>2]-+g[Hb>>2]*+g[Ka>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Hb>>2]*+g[Gb>>2]+ +g[Fb>>2]*+g[Ka>>2];g[Cb>>2]=+g[ub>>2]+ +g[vb>>2];g[Eb>>2]=+g[zb>>2]-+g[yb>>2];g[Bb>>2]=+g[(c[m>>2]|0)+96>>2];g[Db>>2]=+g[(c[m>>2]|0)+100>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Bb>>2]*+g[Cb>>2]-+g[Db>>2]*+g[Eb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Db>>2]*+g[Cb>>2]+ +g[Bb>>2]*+g[Eb>>2];g[Ca>>2]=+g[N>>2]-+g[Ba>>2];g[ob>>2]=+g[Ga>>2]+ +g[nb>>2];g[qa>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[m>>2]|0)+4>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[qa>>2]*+g[Ca>>2]-+g[Da>>2]*+g[ob>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[Da>>2]*+g[Ca>>2]+ +g[qa>>2]*+g[ob>>2];g[qb>>2]=+g[N>>2]+ +g[Ba>>2];g[sb>>2]=+g[nb>>2]-+g[Ga>>2];g[pb>>2]=+g[(c[m>>2]|0)+24>>2];g[rb>>2]=+g[(c[m>>2]|0)+28>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[pb>>2]*+g[qb>>2]-+g[rb>>2]*+g[sb>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[rb>>2]*+g[qb>>2]+ +g[pb>>2]*+g[sb>>2];g[wb>>2]=+g[ub>>2]-+g[vb>>2];g[Ab>>2]=+g[yb>>2]+ +g[zb>>2];g[tb>>2]=+g[(c[m>>2]|0)+48>>2];g[xb>>2]=+g[(c[m>>2]|0)+52>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[tb>>2]*+g[wb>>2]-+g[xb>>2]*+g[Ab>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[xb>>2]*+g[wb>>2]+ +g[tb>>2]*+g[Ab>>2];g[hb>>2]=+g[Xa>>2]+ +g[Ya>>2];g[Jb>>2]=+g[_a>>2]+ +g[$a>>2];g[Kb>>2]=+g[hb>>2]*.5877852439880371-+g[Jb>>2]*.9510565400123596;g[_b>>2]=+g[hb>>2]*.9510565400123596+ +g[Jb>>2]*.5877852439880371;g[Nb>>2]=+g[Na>>2]-+g[Oa>>2];g[Ob>>2]=+g[Qa>>2]-+g[Ra>>2];g[Pb>>2]=+g[Nb>>2]*.5877852439880371-+g[Ob>>2]*.9510565400123596;g[bc>>2]=+g[Nb>>2]*.9510565400123596+ +g[Ob>>2]*.5877852439880371;g[fb>>2]=(+g[Pa>>2]-+g[Sa>>2])*.55901700258255;g[Ta>>2]=+g[Pa>>2]+ +g[Sa>>2];g[eb>>2]=+g[Ma>>2]-+g[Ta>>2]*.25;g[Ua>>2]=+g[Ma>>2]+ +g[Ta>>2];g[Zb>>2]=+g[fb>>2]+ +g[eb>>2];g[gb>>2]=+g[eb>>2]-+g[fb>>2];g[Rb>>2]=(+g[Za>>2]-+g[ab>>2])*.55901700258255;g[bb>>2]=+g[Za>>2]+ +g[ab>>2];g[Qb>>2]=+g[Wa>>2]-+g[bb>>2]*.25;g[cb>>2]=+g[Wa>>2]+ +g[bb>>2];g[cc>>2]=+g[Rb>>2]+ +g[Qb>>2];g[Sb>>2]=+g[Qb>>2]-+g[Rb>>2];g[La>>2]=+g[(c[m>>2]|0)+32>>2];g[Va>>2]=+g[(c[m>>2]|0)+36>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[La>>2]*+g[Ua>>2]-+g[Va>>2]*+g[cb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Va>>2]*+g[Ua>>2]+ +g[La>>2]*+g[cb>>2];g[fc>>2]=+g[Zb>>2]+ +g[_b>>2];g[hc>>2]=+g[cc>>2]-+g[bc>>2];g[ec>>2]=+g[(c[m>>2]|0)+104>>2];g[gc>>2]=+g[(c[m>>2]|0)+108>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[ec>>2]*+g[fc>>2]-+g[gc>>2]*+g[hc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[ec>>2]*+g[hc>>2]+ +g[gc>>2]*+g[fc>>2];g[Lb>>2]=+g[gb>>2]-+g[Kb>>2];g[Tb>>2]=+g[Pb>>2]+ +g[Sb>>2];g[db>>2]=+g[(c[m>>2]|0)+8>>2];g[Mb>>2]=+g[(c[m>>2]|0)+12>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[db>>2]*+g[Lb>>2]-+g[Mb>>2]*+g[Tb>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[db>>2]*+g[Tb>>2]+ +g[Mb>>2]*+g[Lb>>2];g[Vb>>2]=+g[gb>>2]+ +g[Kb>>2];g[Xb>>2]=+g[Sb>>2]-+g[Pb>>2];g[Ub>>2]=+g[(c[m>>2]|0)+56>>2];g[Wb>>2]=+g[(c[m>>2]|0)+60>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Ub>>2]*+g[Vb>>2]-+g[Wb>>2]*+g[Xb>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Ub>>2]*+g[Xb>>2]+ +g[Wb>>2]*+g[Vb>>2];g[$b>>2]=+g[Zb>>2]-+g[_b>>2];g[dc>>2]=+g[bc>>2]+ +g[cc>>2];g[Yb>>2]=+g[(c[m>>2]|0)+80>>2];g[ac>>2]=+g[(c[m>>2]|0)+84>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Yb>>2]*+g[$b>>2]-+g[ac>>2]*+g[dc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Yb>>2]*+g[dc>>2]+ +g[ac>>2]*+g[$b>>2];c[md>>2]=(c[md>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+112;c[n>>2]=c[n>>2]^c[2998]}i=nd;return}function Rt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,59,7864);i=b;return}function St(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0;fd=i;i=i+864|0;k=fd+856|0;l=fd+852|0;m=fd+848|0;n=fd+844|0;gd=fd+840|0;o=fd+836|0;p=fd+832|0;ed=fd+816|0;dc=fd+812|0;Ua=fd+808|0;eb=fd+804|0;ad=fd+800|0;y=fd+796|0;T=fd+792|0;sb=fd+788|0;va=fd+784|0;Kc=fd+780|0;jc=fd+776|0;wa=fd+772|0;r=fd+768|0;vb=fd+764|0;Va=fd+760|0;_=fd+756|0;fb=fd+752|0;Sc=fd+748|0;ya=fd+744|0;tc=fd+740|0;A=fd+736|0;mb=fd+732|0;xb=fd+728|0;Za=fd+724|0;hb=fd+720|0;Zc=fd+716|0;B=fd+712|0;Cc=fd+708|0;$=fd+704|0;Ha=fd+700|0;yb=fd+696|0;ab=fd+692|0;Jb=fd+688|0;Ib=fd+684|0;R=fd+680|0;x=fd+676|0;S=fd+672|0;cc=fd+668|0;qb=fd+664|0;u=fd+660|0;rb=fd+656|0;q=fd+652|0;za=fd+648|0;v=fd+644|0;w=fd+640|0;ac=fd+636|0;bc=fd+632|0;s=fd+628|0;t=fd+624|0;Gc=fd+620|0;U=fd+616|0;ic=fd+612|0;V=fd+608|0;Jc=fd+604|0;X=fd+600|0;dd=fd+596|0;Y=fd+592|0;ec=fd+588|0;fc=fd+584|0;gc=fd+580|0;hc=fd+576|0;Hc=fd+572|0;Ic=fd+568|0;bd=fd+564|0;cd=fd+560|0;tb=fd+556|0;ub=fd+552|0;W=fd+548|0;Z=fd+544|0;Oc=fd+540|0;jb=fd+536|0;rc=fd+532|0;kb=fd+528|0;Rc=fd+524|0;Ia=fd+520|0;oc=fd+516|0;Ja=fd+512|0;lc=fd+508|0;sc=fd+504|0;Mc=fd+500|0;Nc=fd+496|0;pc=fd+492|0;qc=fd+488|0;Pc=fd+484|0;Qc=fd+480|0;mc=fd+476|0;nc=fd+472|0;ib=fd+468|0;lb=fd+464|0;Xa=fd+460|0;Ya=fd+456|0;Vc=fd+452|0;Ea=fd+448|0;Ac=fd+444|0;Fa=fd+440|0;Yc=fd+436|0;Ba=fd+432|0;xc=fd+428|0;Ca=fd+424|0;uc=fd+420|0;Bc=fd+416|0;Tc=fd+412|0;Uc=fd+408|0;yc=fd+404|0;zc=fd+400|0;Wc=fd+396|0;Xc=fd+392|0;vc=fd+388|0;wc=fd+384|0;Da=fd+380|0;Ga=fd+376|0;_a=fd+372|0;$a=fd+368|0;Lc=fd+364|0;_c=fd+360|0;ta=fd+356|0;xa=fd+352|0;C=fd+348|0;D=fd+344|0;sa=fd+340|0;ua=fd+336|0;cb=fd+332|0;Nb=fd+328|0;Lb=fd+324|0;Pb=fd+320|0;Wa=fd+316|0;bb=fd+312|0;gb=fd+308|0;Kb=fd+304|0;Ta=fd+300|0;db=fd+296|0;Mb=fd+292|0;Ob=fd+288|0;Tb=fd+284|0;Zb=fd+280|0;Xb=fd+276|0;$b=fd+272|0;Rb=fd+268|0;Sb=fd+264|0;Vb=fd+260|0;Wb=fd+256|0;Qb=fd+252|0;Ub=fd+248|0;Yb=fd+244|0;_b=fd+240|0;Ec=fd+236|0;da=fd+232|0;ba=fd+228|0;fa=fd+224|0;kc=fd+220|0;Dc=fd+216|0;z=fd+212|0;aa=fd+208|0;$c=fd+204|0;Fc=fd+200|0;ca=fd+196|0;ea=fd+192|0;ja=fd+188|0;pa=fd+184|0;na=fd+180|0;ra=fd+176|0;ha=fd+172|0;ia=fd+168|0;la=fd+164|0;ma=fd+160|0;ga=fd+156|0;ka=fd+152|0;oa=fd+148|0;qa=fd+144|0;ob=fd+140|0;Cb=fd+136|0;Ab=fd+132|0;Eb=fd+128|0;Aa=fd+124|0;nb=fd+120|0;wb=fd+116|0;zb=fd+112|0;Q=fd+108|0;pb=fd+104|0;Bb=fd+100|0;Db=fd+96|0;Ka=fd+92|0;Qa=fd+88|0;Oa=fd+84|0;Sa=fd+80|0;Gb=fd+76|0;Hb=fd+72|0;Ma=fd+68|0;Na=fd+64|0;Fb=fd+60|0;La=fd+56|0;Pa=fd+52|0;Ra=fd+48|0;H=fd+44|0;N=fd+40|0;L=fd+36|0;P=fd+32|0;F=fd+28|0;G=fd+24|0;J=fd+20|0;K=fd+16|0;E=fd+12|0;I=fd+8|0;M=fd+4|0;O=fd;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[gd>>2]=f;c[o>>2]=h;c[p>>2]=j;g[fd+828>>2]=.3826834261417389;g[fd+824>>2]=.9238795042037964;g[fd+820>>2]=.7071067690849304;c[ed>>2]=c[gd>>2];c[m>>2]=(c[m>>2]|0)+(((c[gd>>2]|0)-1|0)*30<<2);while(1){if((c[ed>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[za>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Ib>>2]=+g[q>>2]+ +g[za>>2];g[R>>2]=+g[q>>2]-+g[za>>2];g[v>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[w>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[x>>2]=+g[v>>2]-+g[w>>2];g[S>>2]=+g[v>>2]+ +g[w>>2];g[ac>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[bc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[cc>>2]=+g[ac>>2]+ +g[bc>>2];g[qb>>2]=+g[ac>>2]-+g[bc>>2];g[s>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[t>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[u>>2]=+g[s>>2]-+g[t>>2];g[rb>>2]=+g[s>>2]+ +g[t>>2];g[dc>>2]=+g[Ib>>2]+ +g[cc>>2];g[Ua>>2]=+g[R>>2]+ +g[S>>2];g[eb>>2]=+g[rb>>2]-+g[qb>>2];g[ad>>2]=+g[Ib>>2]-+g[cc>>2];g[y>>2]=+g[u>>2]-+g[x>>2];g[T>>2]=+g[R>>2]-+g[S>>2];g[sb>>2]=+g[qb>>2]+ +g[rb>>2];g[va>>2]=+g[u>>2]+ +g[x>>2];g[ec>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[fc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Gc>>2]=+g[ec>>2]+ +g[fc>>2];g[U>>2]=+g[ec>>2]-+g[fc>>2];g[gc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[hc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[ic>>2]=+g[gc>>2]-+g[hc>>2];g[V>>2]=+g[gc>>2]+ +g[hc>>2];g[Hc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Ic>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Jc>>2]=+g[Hc>>2]+ +g[Ic>>2];g[X>>2]=+g[Hc>>2]-+g[Ic>>2];g[bd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[cd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[dd>>2]=+g[bd>>2]-+g[cd>>2];g[Y>>2]=+g[bd>>2]+ +g[cd>>2];g[Kc>>2]=+g[Gc>>2]+ +g[Jc>>2];g[jc>>2]=+g[dd>>2]-+g[ic>>2];g[wa>>2]=+g[ic>>2]+ +g[dd>>2];g[r>>2]=+g[Gc>>2]-+g[Jc>>2];g[tb>>2]=+g[U>>2]+ +g[V>>2];g[ub>>2]=+g[X>>2]+ +g[Y>>2];g[vb>>2]=(+g[tb>>2]-+g[ub>>2])*.7071067690849304;g[Va>>2]=(+g[tb>>2]+ +g[ub>>2])*.7071067690849304;g[W>>2]=+g[U>>2]-+g[V>>2];g[Z>>2]=+g[X>>2]-+g[Y>>2];g[_>>2]=(+g[W>>2]+ +g[Z>>2])*.7071067690849304;g[fb>>2]=(+g[W>>2]-+g[Z>>2])*.7071067690849304;g[Mc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[Nc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Oc>>2]=+g[Mc>>2]+ +g[Nc>>2];g[jb>>2]=+g[Mc>>2]-+g[Nc>>2];g[pc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[qc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[rc>>2]=+g[pc>>2]-+g[qc>>2];g[kb>>2]=+g[pc>>2]+ +g[qc>>2];g[Pc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Qc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Rc>>2]=+g[Pc>>2]+ +g[Qc>>2];g[Ia>>2]=+g[Pc>>2]-+g[Qc>>2];g[mc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[nc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[oc>>2]=+g[mc>>2]-+g[nc>>2];g[Ja>>2]=+g[mc>>2]+ +g[nc>>2];g[Sc>>2]=+g[Oc>>2]+ +g[Rc>>2];g[ya>>2]=+g[oc>>2]+ +g[rc>>2];g[lc>>2]=+g[Oc>>2]-+g[Rc>>2];g[sc>>2]=+g[oc>>2]-+g[rc>>2];g[tc>>2]=+g[lc>>2]-+g[sc>>2];g[A>>2]=+g[lc>>2]+ +g[sc>>2];g[ib>>2]=+g[Ia>>2]+ +g[Ja>>2];g[lb>>2]=+g[jb>>2]-+g[kb>>2];g[mb>>2]=+g[ib>>2]*.9238795042037964+ +g[lb>>2]*.3826834261417389;g[xb>>2]=+g[lb>>2]*.9238795042037964-+g[ib>>2]*.3826834261417389;g[Xa>>2]=+g[jb>>2]+ +g[kb>>2];g[Ya>>2]=+g[Ja>>2]-+g[Ia>>2];g[Za>>2]=+g[Xa>>2]*.3826834261417389-+g[Ya>>2]*.9238795042037964;g[hb>>2]=+g[Ya>>2]*.3826834261417389+ +g[Xa>>2]*.9238795042037964;g[Tc>>2]=+g[c[l>>2]>>2];g[Uc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Vc>>2]=+g[Tc>>2]+ +g[Uc>>2];g[Ea>>2]=+g[Tc>>2]-+g[Uc>>2];g[yc>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[zc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Ac>>2]=+g[yc>>2]-+g[zc>>2];g[Fa>>2]=+g[yc>>2]+ +g[zc>>2];g[Wc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Xc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Yc>>2]=+g[Wc>>2]+ +g[Xc>>2];g[Ba>>2]=+g[Wc>>2]-+g[Xc>>2];g[vc>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[wc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[xc>>2]=+g[vc>>2]-+g[wc>>2];g[Ca>>2]=+g[vc>>2]+ +g[wc>>2];g[Zc>>2]=+g[Vc>>2]+ +g[Yc>>2];g[B>>2]=+g[xc>>2]+ +g[Ac>>2];g[uc>>2]=+g[Vc>>2]-+g[Yc>>2];g[Bc>>2]=+g[xc>>2]-+g[Ac>>2];g[Cc>>2]=+g[uc>>2]+ +g[Bc>>2];g[$>>2]=+g[Bc>>2]-+g[uc>>2];g[Da>>2]=+g[Ba>>2]-+g[Ca>>2];g[Ga>>2]=+g[Ea>>2]-+g[Fa>>2];g[Ha>>2]=+g[Da>>2]*.9238795042037964-+g[Ga>>2]*.3826834261417389;g[yb>>2]=+g[Da>>2]*.3826834261417389+ +g[Ga>>2]*.9238795042037964;g[_a>>2]=+g[Ea>>2]+ +g[Fa>>2];g[$a>>2]=+g[Ba>>2]+ +g[Ca>>2];g[ab>>2]=+g[_a>>2]*.3826834261417389-+g[$a>>2]*.9238795042037964;g[Jb>>2]=+g[$a>>2]*.3826834261417389+ +g[_a>>2]*.9238795042037964;g[Lc>>2]=+g[dc>>2]+ +g[Kc>>2];g[_c>>2]=+g[Sc>>2]+ +g[Zc>>2];g[ta>>2]=+g[Lc>>2]-+g[_c>>2];g[xa>>2]=+g[va>>2]+ +g[wa>>2];g[C>>2]=+g[ya>>2]+ +g[B>>2];g[D>>2]=+g[xa>>2]-+g[C>>2];g[c[k>>2]>>2]=+g[Lc>>2]+ +g[_c>>2];g[c[l>>2]>>2]=+g[xa>>2]+ +g[C>>2];g[sa>>2]=+g[(c[m>>2]|0)+56>>2];g[ua>>2]=+g[(c[m>>2]|0)+60>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[sa>>2]*+g[ta>>2]-+g[ua>>2]*+g[D>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[ua>>2]*+g[ta>>2]+ +g[sa>>2]*+g[D>>2];g[Wa>>2]=+g[Ua>>2]-+g[Va>>2];g[bb>>2]=+g[Za>>2]+ +g[ab>>2];g[cb>>2]=+g[Wa>>2]-+g[bb>>2];g[Nb>>2]=+g[Wa>>2]+ +g[bb>>2];g[gb>>2]=+g[eb>>2]+ +g[fb>>2];g[Kb>>2]=+g[hb>>2]-+g[Jb>>2];g[Lb>>2]=+g[gb>>2]-+g[Kb>>2];g[Pb>>2]=+g[gb>>2]+ +g[Kb>>2];g[Ta>>2]=+g[(c[m>>2]|0)+80>>2];g[db>>2]=+g[(c[m>>2]|0)+84>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Ta>>2]*+g[cb>>2]-+g[db>>2]*+g[Lb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[db>>2]*+g[cb>>2]+ +g[Ta>>2]*+g[Lb>>2];g[Mb>>2]=+g[(c[m>>2]|0)+16>>2];g[Ob>>2]=+g[(c[m>>2]|0)+20>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Mb>>2]*+g[Nb>>2]-+g[Ob>>2]*+g[Pb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[Ob>>2]*+g[Nb>>2]+ +g[Mb>>2]*+g[Pb>>2];g[Rb>>2]=+g[Ua>>2]+ +g[Va>>2];g[Sb>>2]=+g[hb>>2]+ +g[Jb>>2];g[Tb>>2]=+g[Rb>>2]-+g[Sb>>2];g[Zb>>2]=+g[Rb>>2]+ +g[Sb>>2];g[Vb>>2]=+g[eb>>2]-+g[fb>>2];g[Wb>>2]=+g[Za>>2]-+g[ab>>2];g[Xb>>2]=+g[Vb>>2]+ +g[Wb>>2];g[$b>>2]=+g[Vb>>2]-+g[Wb>>2];g[Qb>>2]=+g[(c[m>>2]|0)+48>>2];g[Ub>>2]=+g[(c[m>>2]|0)+52>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Qb>>2]*+g[Tb>>2]-+g[Ub>>2]*+g[Xb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Qb>>2]*+g[Xb>>2]+ +g[Ub>>2]*+g[Tb>>2];g[Yb>>2]=+g[(c[m>>2]|0)+112>>2];g[_b>>2]=+g[(c[m>>2]|0)+116>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Yb>>2]*+g[Zb>>2]-+g[_b>>2]*+g[$b>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Yb>>2]*+g[$b>>2]+ +g[_b>>2]*+g[Zb>>2];g[kc>>2]=+g[ad>>2]+ +g[jc>>2];g[Dc>>2]=(+g[tc>>2]+ +g[Cc>>2])*.7071067690849304;g[Ec>>2]=+g[kc>>2]-+g[Dc>>2];g[da>>2]=+g[kc>>2]+ +g[Dc>>2];g[z>>2]=+g[r>>2]+ +g[y>>2];g[aa>>2]=(+g[A>>2]+ +g[$>>2])*.7071067690849304;g[ba>>2]=+g[z>>2]-+g[aa>>2];g[fa>>2]=+g[z>>2]+ +g[aa>>2];g[$c>>2]=+g[(c[m>>2]|0)+72>>2];g[Fc>>2]=+g[(c[m>>2]|0)+76>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[$c>>2]*+g[Ec>>2]-+g[Fc>>2]*+g[ba>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Fc>>2]*+g[Ec>>2]+ +g[$c>>2]*+g[ba>>2];g[ca>>2]=+g[(c[m>>2]|0)+8>>2];g[ea>>2]=+g[(c[m>>2]|0)+12>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ca>>2]*+g[da>>2]-+g[ea>>2]*+g[fa>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ea>>2]*+g[da>>2]+ +g[ca>>2]*+g[fa>>2];g[ha>>2]=+g[ad>>2]-+g[jc>>2];g[ia>>2]=(+g[$>>2]-+g[A>>2])*.7071067690849304;g[ja>>2]=+g[ha>>2]-+g[ia>>2];g[pa>>2]=+g[ha>>2]+ +g[ia>>2];g[la>>2]=+g[y>>2]-+g[r>>2];g[ma>>2]=(+g[tc>>2]-+g[Cc>>2])*.7071067690849304;g[na>>2]=+g[la>>2]-+g[ma>>2];g[ra>>2]=+g[la>>2]+ +g[ma>>2];g[ga>>2]=+g[(c[m>>2]|0)+104>>2];g[ka>>2]=+g[(c[m>>2]|0)+108>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[ga>>2]*+g[ja>>2]-+g[ka>>2]*+g[na>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[ga>>2]*+g[na>>2]+ +g[ka>>2]*+g[ja>>2];g[oa>>2]=+g[(c[m>>2]|0)+40>>2];g[qa>>2]=+g[(c[m>>2]|0)+44>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[oa>>2]*+g[pa>>2]-+g[qa>>2]*+g[ra>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[oa>>2]*+g[ra>>2]+ +g[qa>>2]*+g[pa>>2];g[Aa>>2]=+g[T>>2]-+g[_>>2];g[nb>>2]=+g[Ha>>2]-+g[mb>>2];g[ob>>2]=+g[Aa>>2]-+g[nb>>2];g[Cb>>2]=+g[Aa>>2]+ +g[nb>>2];g[wb>>2]=+g[sb>>2]-+g[vb>>2];g[zb>>2]=+g[xb>>2]-+g[yb>>2];g[Ab>>2]=+g[wb>>2]-+g[zb>>2];g[Eb>>2]=+g[wb>>2]+ +g[zb>>2];g[Q>>2]=+g[(c[m>>2]|0)+96>>2];g[pb>>2]=+g[(c[m>>2]|0)+100>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Q>>2]*+g[ob>>2]-+g[pb>>2]*+g[Ab>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[pb>>2]*+g[ob>>2]+ +g[Q>>2]*+g[Ab>>2];g[Bb>>2]=+g[(c[m>>2]|0)+32>>2];g[Db>>2]=+g[(c[m>>2]|0)+36>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Bb>>2]*+g[Cb>>2]-+g[Db>>2]*+g[Eb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Db>>2]*+g[Cb>>2]+ +g[Bb>>2]*+g[Eb>>2];g[Gb>>2]=+g[T>>2]+ +g[_>>2];g[Hb>>2]=+g[xb>>2]+ +g[yb>>2];g[Ka>>2]=+g[Gb>>2]-+g[Hb>>2];g[Qa>>2]=+g[Gb>>2]+ +g[Hb>>2];g[Ma>>2]=+g[sb>>2]+ +g[vb>>2];g[Na>>2]=+g[mb>>2]+ +g[Ha>>2];g[Oa>>2]=+g[Ma>>2]-+g[Na>>2];g[Sa>>2]=+g[Ma>>2]+ +g[Na>>2];g[Fb>>2]=+g[(c[m>>2]|0)+64>>2];g[La>>2]=+g[(c[m>>2]|0)+68>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Fb>>2]*+g[Ka>>2]-+g[La>>2]*+g[Oa>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Fb>>2]*+g[Oa>>2]+ +g[La>>2]*+g[Ka>>2];g[Pa>>2]=+g[c[m>>2]>>2];g[Ra>>2]=+g[(c[m>>2]|0)+4>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[Pa>>2]*+g[Qa>>2]-+g[Ra>>2]*+g[Sa>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[Pa>>2]*+g[Sa>>2]+ +g[Ra>>2]*+g[Qa>>2];g[F>>2]=+g[dc>>2]-+g[Kc>>2];g[G>>2]=+g[B>>2]-+g[ya>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[N>>2]=+g[F>>2]+ +g[G>>2];g[J>>2]=+g[va>>2]-+g[wa>>2];g[K>>2]=+g[Sc>>2]-+g[Zc>>2];g[L>>2]=+g[J>>2]-+g[K>>2];g[P>>2]=+g[K>>2]+ +g[J>>2];g[E>>2]=+g[(c[m>>2]|0)+88>>2];g[I>>2]=+g[(c[m>>2]|0)+92>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[E>>2]*+g[H>>2]-+g[I>>2]*+g[L>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[E>>2]*+g[L>>2]+ +g[I>>2]*+g[H>>2];g[M>>2]=+g[(c[m>>2]|0)+24>>2];g[O>>2]=+g[(c[m>>2]|0)+28>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[M>>2]*+g[N>>2]-+g[O>>2]*+g[P>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[M>>2]*+g[P>>2]+ +g[O>>2]*+g[N>>2];c[ed>>2]=(c[ed>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+120;c[n>>2]=c[n>>2]^c[2998]}i=fd;return}function Tt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,60,7912);i=b;return}function Ut(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0;Fe=i;i=i+1184|0;k=Fe+1180|0;l=Fe+1176|0;m=Fe+1172|0;n=Fe+1168|0;Ge=Fe+1164|0;o=Fe+1160|0;p=Fe+1156|0;Ee=Fe+1136|0;Dd=Fe+1132|0;kc=Fe+1128|0;$c=Fe+1124|0;Kd=Fe+1120|0;ua=Fe+1116|0;bb=Fe+1112|0;xc=Fe+1108|0;lb=Fe+1104|0;u=Fe+1100|0;Ec=Fe+1096|0;Fc=Fe+1092|0;ha=Fe+1088|0;Ga=Fe+1084|0;fd=Fe+1080|0;ed=Fe+1076|0;Da=Fe+1072|0;ma=Fe+1068|0;Uc=Fe+1064|0;qc=Fe+1060|0;la=Fe+1056|0;pb=Fe+1052|0;Sa=Fe+1048|0;qb=Fe+1044|0;Za=Fe+1040|0;Jb=Fe+1036|0;Qb=Fe+1032|0;Rb=Fe+1028|0;qe=Fe+1024|0;Hd=Fe+1020|0;Id=Fe+1016|0;Yc=Fe+1012|0;Zc=Fe+1008|0;bd=Fe+1004|0;ib=Fe+1e3|0;jb=Fe+996|0;mb=Fe+992|0;va=Fe+988|0;wa=Fe+984|0;xa=Fe+980|0;fc=Fe+976|0;ic=Fe+972|0;lc=Fe+968|0;yc=Fe+964|0;zc=Fe+960|0;Ac=Fe+956|0;Nd=Fe+952|0;Qd=Fe+948|0;Rd=Fe+944|0;Ib=Fe+940|0;$a=Fe+936|0;ta=Fe+932|0;ab=Fe+928|0;Cd=Fe+924|0;wc=Fe+920|0;qa=Fe+916|0;vc=Fe+912|0;q=Fe+908|0;za=Fe+904|0;ra=Fe+900|0;sa=Fe+896|0;Rc=Fe+892|0;Bd=Fe+888|0;oa=Fe+884|0;pa=Fe+880|0;ie=Fe+876|0;dc=Fe+872|0;oc=Fe+868|0;Ld=Fe+864|0;$d=Fe+860|0;Oa=Fe+856|0;eb=Fe+852|0;Ba=Fe+848|0;Gd=Fe+844|0;hc=Fe+840|0;Tc=Fe+836|0;Pd=Fe+832|0;ga=Fe+828|0;Ya=Fe+824|0;Pb=Fe+820|0;Fa=Fe+816|0;pe=Fe+812|0;ec=Fe+808|0;pc=Fe+804|0;Md=Fe+800|0;t=Fe+796|0;Ra=Fe+792|0;hb=Fe+788|0;Ca=Fe+784|0;xe=Fe+780|0;gc=Fe+776|0;Sc=Fe+772|0;Od=Fe+768|0;$=Fe+764|0;Va=Fe+760|0;Mb=Fe+756|0;Ea=Fe+752|0;ee=Fe+748|0;cb=Fe+744|0;_d=Fe+740|0;db=Fe+736|0;he=Fe+732|0;Na=Fe+728|0;Xd=Fe+724|0;Ma=Fe+720|0;Ed=Fe+716|0;Fd=Fe+712|0;Yd=Fe+708|0;Zd=Fe+704|0;fe=Fe+700|0;ge=Fe+696|0;Vd=Fe+692|0;Wd=Fe+688|0;Ae=Fe+684|0;Nb=Fe+680|0;De=Fe+676|0;Wa=Fe+672|0;ca=Fe+668|0;Xa=Fe+664|0;fa=Fe+660|0;Ob=Fe+656|0;ye=Fe+652|0;ze=Fe+648|0;Be=Fe+644|0;Ce=Fe+640|0;aa=Fe+636|0;ba=Fe+632|0;da=Fe+628|0;ea=Fe+624|0;le=Fe+620|0;fb=Fe+616|0;s=Fe+612|0;gb=Fe+608|0;oe=Fe+604|0;Pa=Fe+600|0;ce=Fe+596|0;Qa=Fe+592|0;je=Fe+588|0;ke=Fe+584|0;de=Fe+580|0;r=Fe+576|0;me=Fe+572|0;ne=Fe+568|0;ae=Fe+564|0;be=Fe+560|0;te=Fe+556|0;Kb=Fe+552|0;A=Fe+548|0;Lb=Fe+544|0;we=Fe+540|0;Ua=Fe+536|0;x=Fe+532|0;Ta=Fe+528|0;re=Fe+524|0;se=Fe+520|0;y=Fe+516|0;z=Fe+512|0;ue=Fe+508|0;ve=Fe+504|0;v=Fe+500|0;w=Fe+496|0;V=Fe+492|0;X=Fe+488|0;U=Fe+484|0;W=Fe+480|0;zd=Fe+476|0;ad=Fe+472|0;yd=Fe+468|0;Ad=Fe+464|0;$b=Fe+460|0;bc=Fe+456|0;_b=Fe+452|0;ac=Fe+448|0;Ha=Fe+444|0;rb=Fe+440|0;Db=Fe+436|0;zb=Fe+432|0;ob=Fe+428|0;Cb=Fe+424|0;Aa=Fe+420|0;yb=Fe+416|0;kb=Fe+412|0;nb=Fe+408|0;Z=Fe+404|0;_=Fe+400|0;Ia=Fe+396|0;sb=Fe+392|0;Y=Fe+388|0;Ja=Fe+384|0;Gb=Fe+380|0;Ka=Fe+376|0;Fb=Fe+372|0;Hb=Fe+368|0;ub=Fe+364|0;wb=Fe+360|0;tb=Fe+356|0;vb=Fe+352|0;Ab=Fe+348|0;Eb=Fe+344|0;xb=Fe+340|0;Bb=Fe+336|0;Vc=Fe+332|0;gd=Fe+328|0;sd=Fe+324|0;nd=Fe+320|0;dd=Fe+316|0;rd=Fe+312|0;nc=Fe+308|0;od=Fe+304|0;_c=Fe+300|0;cd=Fe+296|0;jc=Fe+292|0;mc=Fe+288|0;Wc=Fe+284|0;hd=Fe+280|0;cc=Fe+276|0;Xc=Fe+272|0;vd=Fe+268|0;xd=Fe+264|0;ud=Fe+260|0;wd=Fe+256|0;jd=Fe+252|0;ld=Fe+248|0;id=Fe+244|0;kd=Fe+240|0;pd=Fe+236|0;td=Fe+232|0;md=Fe+228|0;qd=Fe+224|0;ia=Fe+220|0;na=Fe+216|0;N=Fe+212|0;K=Fe+208|0;C=Fe+204|0;O=Fe+200|0;Ud=Fe+196|0;J=Fe+192|0;ya=Fe+188|0;B=Fe+184|0;Sd=Fe+180|0;Td=Fe+176|0;ja=Fe+172|0;D=Fe+168|0;Jd=Fe+164|0;ka=Fe+160|0;R=Fe+156|0;T=Fe+152|0;Q=Fe+148|0;S=Fe+144|0;F=Fe+140|0;H=Fe+136|0;E=Fe+132|0;G=Fe+128|0;L=Fe+124|0;P=Fe+120|0;I=Fe+116|0;M=Fe+112|0;_a=Fe+108|0;Gc=Fe+104|0;Ub=Fe+100|0;Oc=Fe+96|0;Dc=Fe+92|0;Tb=Fe+88|0;sc=Fe+84|0;Nc=Fe+80|0;Bc=Fe+76|0;Cc=Fe+72|0;Sb=Fe+68|0;rc=Fe+64|0;tc=Fe+60|0;Hc=Fe+56|0;La=Fe+52|0;uc=Fe+48|0;Xb=Fe+44|0;Zb=Fe+40|0;Wb=Fe+36|0;Yb=Fe+32|0;Jc=Fe+28|0;Lc=Fe+24|0;Ic=Fe+20|0;Kc=Fe+16|0;Pc=Fe+12|0;Vb=Fe+8|0;Mc=Fe+4|0;Qc=Fe;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Ge>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Fe+1152>>2]=.25;g[Fe+1148>>2]=.55901700258255;g[Fe+1144>>2]=.5877852439880371;g[Fe+1140>>2]=.9510565400123596;c[Ee>>2]=c[Ge>>2];c[m>>2]=(c[m>>2]|0)+(((c[Ge>>2]|0)-1|0)*38<<2);while(1){if((c[Ee>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[za>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Ib>>2]=+g[q>>2]+ +g[za>>2];g[$a>>2]=+g[q>>2]-+g[za>>2];g[ra>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[sa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[ta>>2]=+g[ra>>2]-+g[sa>>2];g[ab>>2]=+g[ra>>2]+ +g[sa>>2];g[Rc>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Bd>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Cd>>2]=+g[Rc>>2]+ +g[Bd>>2];g[wc>>2]=+g[Rc>>2]-+g[Bd>>2];g[oa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[pa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[qa>>2]=+g[oa>>2]-+g[pa>>2];g[vc>>2]=+g[oa>>2]+ +g[pa>>2];g[Dd>>2]=+g[Ib>>2]+ +g[Cd>>2];g[kc>>2]=+g[$a>>2]-+g[ab>>2];g[$c>>2]=+g[wc>>2]+ +g[vc>>2];g[Kd>>2]=+g[Ib>>2]-+g[Cd>>2];g[ua>>2]=+g[qa>>2]-+g[ta>>2];g[bb>>2]=+g[$a>>2]+ +g[ab>>2];g[xc>>2]=+g[vc>>2]-+g[wc>>2];g[lb>>2]=+g[qa>>2]+ +g[ta>>2];g[Ed>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Fd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[ee>>2]=+g[Ed>>2]+ +g[Fd>>2];g[cb>>2]=+g[Ed>>2]-+g[Fd>>2];g[Yd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[Zd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[_d>>2]=+g[Yd>>2]-+g[Zd>>2];g[db>>2]=+g[Yd>>2]+ +g[Zd>>2];g[fe>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[ge>>2]=+g[c[l>>2]>>2];g[he>>2]=+g[fe>>2]+ +g[ge>>2];g[Na>>2]=+g[fe>>2]-+g[ge>>2];g[Vd>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[Wd>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[Xd>>2]=+g[Vd>>2]-+g[Wd>>2];g[Ma>>2]=+g[Vd>>2]+ +g[Wd>>2];g[ie>>2]=+g[ee>>2]+ +g[he>>2];g[dc>>2]=+g[cb>>2]-+g[db>>2];g[oc>>2]=+g[Na>>2]+ +g[Ma>>2];g[Ld>>2]=+g[ee>>2]-+g[he>>2];g[$d>>2]=+g[Xd>>2]-+g[_d>>2];g[Oa>>2]=+g[Ma>>2]-+g[Na>>2];g[eb>>2]=+g[cb>>2]+ +g[db>>2];g[Ba>>2]=+g[Xd>>2]+ +g[_d>>2];g[ye>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[ze>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Ae>>2]=+g[ye>>2]+ +g[ze>>2];g[Nb>>2]=+g[ye>>2]-+g[ze>>2];g[Be>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Ce>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[De>>2]=+g[Be>>2]+ +g[Ce>>2];g[Wa>>2]=+g[Be>>2]-+g[Ce>>2];g[aa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[ba>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[ca>>2]=+g[aa>>2]-+g[ba>>2];g[Xa>>2]=+g[aa>>2]+ +g[ba>>2];g[da>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[ea>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[fa>>2]=+g[da>>2]-+g[ea>>2];g[Ob>>2]=+g[da>>2]+ +g[ea>>2];g[Gd>>2]=+g[Ae>>2]+ +g[De>>2];g[hc>>2]=+g[Nb>>2]+ +g[Ob>>2];g[Tc>>2]=+g[Wa>>2]-+g[Xa>>2];g[Pd>>2]=+g[Ae>>2]-+g[De>>2];g[ga>>2]=+g[ca>>2]-+g[fa>>2];g[Ya>>2]=+g[Wa>>2]+ +g[Xa>>2];g[Pb>>2]=+g[Nb>>2]-+g[Ob>>2];g[Fa>>2]=+g[ca>>2]+ +g[fa>>2];g[je>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[ke>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[le>>2]=+g[je>>2]+ +g[ke>>2];g[fb>>2]=+g[je>>2]-+g[ke>>2];g[de>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[r>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[s>>2]=+g[de>>2]-+g[r>>2];g[gb>>2]=+g[de>>2]+ +g[r>>2];g[me>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[ne>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[oe>>2]=+g[me>>2]+ +g[ne>>2];g[Pa>>2]=+g[me>>2]-+g[ne>>2];g[ae>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[be>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[ce>>2]=+g[ae>>2]-+g[be>>2];g[Qa>>2]=+g[ae>>2]+ +g[be>>2];g[pe>>2]=+g[le>>2]+ +g[oe>>2];g[ec>>2]=+g[fb>>2]-+g[gb>>2];g[pc>>2]=+g[Pa>>2]-+g[Qa>>2];g[Md>>2]=+g[le>>2]-+g[oe>>2];g[t>>2]=+g[ce>>2]-+g[s>>2];g[Ra>>2]=+g[Pa>>2]+ +g[Qa>>2];g[hb>>2]=+g[fb>>2]+ +g[gb>>2];g[Ca>>2]=+g[ce>>2]+ +g[s>>2];g[re>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[se>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[te>>2]=+g[re>>2]+ +g[se>>2];g[Kb>>2]=+g[re>>2]-+g[se>>2];g[y>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[z>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[A>>2]=+g[y>>2]-+g[z>>2];g[Lb>>2]=+g[y>>2]+ +g[z>>2];g[ue>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[ve>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[we>>2]=+g[ue>>2]+ +g[ve>>2];g[Ua>>2]=+g[ue>>2]-+g[ve>>2];g[v>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[w>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[x>>2]=+g[v>>2]-+g[w>>2];g[Ta>>2]=+g[v>>2]+ +g[w>>2];g[xe>>2]=+g[te>>2]+ +g[we>>2];g[gc>>2]=+g[Kb>>2]+ +g[Lb>>2];g[Sc>>2]=+g[Ua>>2]+ +g[Ta>>2];g[Od>>2]=+g[te>>2]-+g[we>>2];g[$>>2]=+g[x>>2]-+g[A>>2];g[Va>>2]=+g[Ta>>2]-+g[Ua>>2];g[Mb>>2]=+g[Kb>>2]-+g[Lb>>2];g[Ea>>2]=+g[x>>2]+ +g[A>>2];g[u>>2]=+g[$d>>2]-+g[t>>2];g[Ec>>2]=+g[eb>>2]-+g[hb>>2];g[Fc>>2]=+g[Mb>>2]-+g[Pb>>2];g[ha>>2]=+g[$>>2]-+g[ga>>2];g[Ga>>2]=+g[Ea>>2]-+g[Fa>>2];g[fd>>2]=+g[gc>>2]-+g[hc>>2];g[ed>>2]=+g[dc>>2]-+g[ec>>2];g[Da>>2]=+g[Ba>>2]-+g[Ca>>2];g[ma>>2]=+g[Od>>2]-+g[Pd>>2];g[Uc>>2]=+g[Sc>>2]-+g[Tc>>2];g[qc>>2]=+g[oc>>2]-+g[pc>>2];g[la>>2]=+g[Ld>>2]-+g[Md>>2];g[pb>>2]=+g[ie>>2]-+g[pe>>2];g[Sa>>2]=+g[Oa>>2]+ +g[Ra>>2];g[qb>>2]=+g[xe>>2]-+g[Gd>>2];g[Za>>2]=+g[Va>>2]+ +g[Ya>>2];g[Jb>>2]=+g[eb>>2]+ +g[hb>>2];g[Qb>>2]=+g[Mb>>2]+ +g[Pb>>2];g[Rb>>2]=+g[Jb>>2]+ +g[Qb>>2];g[qe>>2]=+g[ie>>2]+ +g[pe>>2];g[Hd>>2]=+g[xe>>2]+ +g[Gd>>2];g[Id>>2]=+g[qe>>2]+ +g[Hd>>2];g[Yc>>2]=+g[oc>>2]+ +g[pc>>2];g[Zc>>2]=+g[Sc>>2]+ +g[Tc>>2];g[bd>>2]=+g[Yc>>2]+ +g[Zc>>2];g[ib>>2]=+g[Ba>>2]+ +g[Ca>>2];g[jb>>2]=+g[Ea>>2]+ +g[Fa>>2];g[mb>>2]=+g[ib>>2]+ +g[jb>>2];g[va>>2]=+g[$d>>2]+ +g[t>>2];g[wa>>2]=+g[$>>2]+ +g[ga>>2];g[xa>>2]=+g[va>>2]+ +g[wa>>2];g[fc>>2]=+g[dc>>2]+ +g[ec>>2];g[ic>>2]=+g[gc>>2]+ +g[hc>>2];g[lc>>2]=+g[fc>>2]+ +g[ic>>2];g[yc>>2]=+g[Oa>>2]-+g[Ra>>2];g[zc>>2]=+g[Va>>2]-+g[Ya>>2];g[Ac>>2]=+g[yc>>2]+ +g[zc>>2];g[Nd>>2]=+g[Ld>>2]+ +g[Md>>2];g[Qd>>2]=+g[Od>>2]+ +g[Pd>>2];g[Rd>>2]=+g[Nd>>2]+ +g[Qd>>2];g[c[k>>2]>>2]=+g[Dd>>2]+ +g[Id>>2];g[c[l>>2]>>2]=+g[lb>>2]+ +g[mb>>2];g[V>>2]=+g[Kd>>2]+ +g[Rd>>2];g[X>>2]=+g[ua>>2]+ +g[xa>>2];g[U>>2]=+g[(c[m>>2]|0)+72>>2];g[W>>2]=+g[(c[m>>2]|0)+76>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[U>>2]*+g[V>>2]-+g[W>>2]*+g[X>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[W>>2]*+g[V>>2]+ +g[U>>2]*+g[X>>2];g[zd>>2]=+g[kc>>2]+ +g[lc>>2];g[ad>>2]=+g[$c>>2]+ +g[bd>>2];g[yd>>2]=+g[(c[m>>2]|0)+32>>2];g[Ad>>2]=+g[(c[m>>2]|0)+36>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[yd>>2]*+g[zd>>2]-+g[Ad>>2]*+g[ad>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[yd>>2]*+g[ad>>2]+ +g[Ad>>2]*+g[zd>>2];g[$b>>2]=+g[bb>>2]+ +g[Rb>>2];g[bc>>2]=+g[xc>>2]+ +g[Ac>>2];g[_b>>2]=+g[(c[m>>2]|0)+112>>2];g[ac>>2]=+g[(c[m>>2]|0)+116>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[_b>>2]*+g[$b>>2]-+g[ac>>2]*+g[bc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[_b>>2]*+g[bc>>2]+ +g[ac>>2]*+g[$b>>2];g[Ha>>2]=+g[Da>>2]*.9510565400123596+ +g[Ga>>2]*.5877852439880371;g[rb>>2]=+g[pb>>2]*.9510565400123596+ +g[qb>>2]*.5877852439880371;g[Db>>2]=+g[pb>>2]*.5877852439880371-+g[qb>>2]*.9510565400123596;g[zb>>2]=+g[Da>>2]*.5877852439880371-+g[Ga>>2]*.9510565400123596;g[kb>>2]=(+g[ib>>2]-+g[jb>>2])*.55901700258255;g[nb>>2]=+g[lb>>2]-+g[mb>>2]*.25;g[ob>>2]=+g[kb>>2]+ +g[nb>>2];g[Cb>>2]=+g[nb>>2]-+g[kb>>2];g[Z>>2]=(+g[qe>>2]-+g[Hd>>2])*.55901700258255;g[_>>2]=+g[Dd>>2]-+g[Id>>2]*.25;g[Aa>>2]=+g[Z>>2]+ +g[_>>2];g[yb>>2]=+g[_>>2]-+g[Z>>2];g[Ia>>2]=+g[Aa>>2]+ +g[Ha>>2];g[sb>>2]=+g[ob>>2]-+g[rb>>2];g[Y>>2]=+g[(c[m>>2]|0)+24>>2];g[Ja>>2]=+g[(c[m>>2]|0)+28>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Y>>2]*+g[Ia>>2]-+g[Ja>>2]*+g[sb>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Ja>>2]*+g[Ia>>2]+ +g[Y>>2]*+g[sb>>2];g[Gb>>2]=+g[yb>>2]-+g[zb>>2];g[Ka>>2]=+g[Db>>2]+ +g[Cb>>2];g[Fb>>2]=+g[(c[m>>2]|0)+88>>2];g[Hb>>2]=+g[(c[m>>2]|0)+92>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Fb>>2]*+g[Gb>>2]-+g[Hb>>2]*+g[Ka>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[Hb>>2]*+g[Gb>>2]+ +g[Fb>>2]*+g[Ka>>2];g[ub>>2]=+g[Aa>>2]-+g[Ha>>2];g[wb>>2]=+g[rb>>2]+ +g[ob>>2];g[tb>>2]=+g[(c[m>>2]|0)+120>>2];g[vb>>2]=+g[(c[m>>2]|0)+124>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[tb>>2]*+g[ub>>2]-+g[vb>>2]*+g[wb>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[vb>>2]*+g[ub>>2]+ +g[tb>>2]*+g[wb>>2];g[Ab>>2]=+g[yb>>2]+ +g[zb>>2];g[Eb>>2]=+g[Cb>>2]-+g[Db>>2];g[xb>>2]=+g[(c[m>>2]|0)+56>>2];g[Bb>>2]=+g[(c[m>>2]|0)+60>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[xb>>2]*+g[Ab>>2]-+g[Bb>>2]*+g[Eb>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Bb>>2]*+g[Ab>>2]+ +g[xb>>2]*+g[Eb>>2];g[Vc>>2]=+g[qc>>2]*.9510565400123596+ +g[Uc>>2]*.5877852439880371;g[gd>>2]=+g[ed>>2]*.9510565400123596+ +g[fd>>2]*.5877852439880371;g[sd>>2]=+g[ed>>2]*.5877852439880371-+g[fd>>2]*.9510565400123596;g[nd>>2]=+g[qc>>2]*.5877852439880371-+g[Uc>>2]*.9510565400123596;g[_c>>2]=(+g[Yc>>2]-+g[Zc>>2])*.55901700258255;g[cd>>2]=+g[$c>>2]-+g[bd>>2]*.25;g[dd>>2]=+g[_c>>2]+ +g[cd>>2];g[rd>>2]=+g[cd>>2]-+g[_c>>2];g[jc>>2]=(+g[fc>>2]-+g[ic>>2])*.55901700258255;g[mc>>2]=+g[kc>>2]-+g[lc>>2]*.25;g[nc>>2]=+g[jc>>2]+ +g[mc>>2];g[od>>2]=+g[mc>>2]-+g[jc>>2];g[Wc>>2]=+g[nc>>2]-+g[Vc>>2];g[hd>>2]=+g[dd>>2]+ +g[gd>>2];g[cc>>2]=+g[c[m>>2]>>2];g[Xc>>2]=+g[(c[m>>2]|0)+4>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[cc>>2]*+g[Wc>>2]-+g[Xc>>2]*+g[hd>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[cc>>2]*+g[hd>>2]+ +g[Xc>>2]*+g[Wc>>2];g[vd>>2]=+g[od>>2]-+g[nd>>2];g[xd>>2]=+g[rd>>2]+ +g[sd>>2];g[ud>>2]=+g[(c[m>>2]|0)+128>>2];g[wd>>2]=+g[(c[m>>2]|0)+132>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[ud>>2]*+g[vd>>2]-+g[wd>>2]*+g[xd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[ud>>2]*+g[xd>>2]+ +g[wd>>2]*+g[vd>>2];g[jd>>2]=+g[Vc>>2]+ +g[nc>>2];g[ld>>2]=+g[dd>>2]-+g[gd>>2];g[id>>2]=+g[(c[m>>2]|0)+64>>2];g[kd>>2]=+g[(c[m>>2]|0)+68>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[id>>2]*+g[jd>>2]-+g[kd>>2]*+g[ld>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[id>>2]*+g[ld>>2]+ +g[kd>>2]*+g[jd>>2];g[pd>>2]=+g[nd>>2]+ +g[od>>2];g[td>>2]=+g[rd>>2]-+g[sd>>2];g[md>>2]=+g[(c[m>>2]|0)+96>>2];g[qd>>2]=+g[(c[m>>2]|0)+100>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[md>>2]*+g[pd>>2]-+g[qd>>2]*+g[td>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[md>>2]*+g[td>>2]+ +g[qd>>2]*+g[pd>>2];g[ia>>2]=+g[u>>2]*.5877852439880371-+g[ha>>2]*.9510565400123596;g[na>>2]=+g[la>>2]*.5877852439880371-+g[ma>>2]*.9510565400123596;g[N>>2]=+g[la>>2]*.9510565400123596+ +g[ma>>2]*.5877852439880371;g[K>>2]=+g[u>>2]*.9510565400123596+ +g[ha>>2]*.5877852439880371;g[ya>>2]=+g[ua>>2]-+g[xa>>2]*.25;g[B>>2]=(+g[va>>2]-+g[wa>>2])*.55901700258255;g[C>>2]=+g[ya>>2]-+g[B>>2];g[O>>2]=+g[B>>2]+ +g[ya>>2];g[Sd>>2]=+g[Kd>>2]-+g[Rd>>2]*.25;g[Td>>2]=(+g[Nd>>2]-+g[Qd>>2])*.55901700258255;g[Ud>>2]=+g[Sd>>2]-+g[Td>>2];g[J>>2]=+g[Td>>2]+ +g[Sd>>2];g[ja>>2]=+g[Ud>>2]-+g[ia>>2];g[D>>2]=+g[na>>2]+ +g[C>>2];g[Jd>>2]=+g[(c[m>>2]|0)+8>>2];g[ka>>2]=+g[(c[m>>2]|0)+12>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Jd>>2]*+g[ja>>2]-+g[ka>>2]*+g[D>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[ka>>2]*+g[ja>>2]+ +g[Jd>>2]*+g[D>>2];g[R>>2]=+g[J>>2]+ +g[K>>2];g[T>>2]=+g[O>>2]-+g[N>>2];g[Q>>2]=+g[(c[m>>2]|0)+104>>2];g[S>>2]=+g[(c[m>>2]|0)+108>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Q>>2]*+g[R>>2]-+g[S>>2]*+g[T>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[S>>2]*+g[R>>2]+ +g[Q>>2]*+g[T>>2];g[F>>2]=+g[Ud>>2]+ +g[ia>>2];g[H>>2]=+g[C>>2]-+g[na>>2];g[E>>2]=+g[(c[m>>2]|0)+136>>2];g[G>>2]=+g[(c[m>>2]|0)+140>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[E>>2]*+g[F>>2]-+g[G>>2]*+g[H>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[G>>2]*+g[F>>2]+ +g[E>>2]*+g[H>>2];g[L>>2]=+g[J>>2]-+g[K>>2];g[P>>2]=+g[N>>2]+ +g[O>>2];g[I>>2]=+g[(c[m>>2]|0)+40>>2];g[M>>2]=+g[(c[m>>2]|0)+44>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[I>>2]*+g[L>>2]-+g[M>>2]*+g[P>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[M>>2]*+g[L>>2]+ +g[I>>2]*+g[P>>2];g[_a>>2]=+g[Sa>>2]*.5877852439880371-+g[Za>>2]*.9510565400123596;g[Gc>>2]=+g[Ec>>2]*.5877852439880371-+g[Fc>>2]*.9510565400123596;g[Ub>>2]=+g[Ec>>2]*.9510565400123596+ +g[Fc>>2]*.5877852439880371;g[Oc>>2]=+g[Sa>>2]*.9510565400123596+ +g[Za>>2]*.5877852439880371;g[Bc>>2]=+g[xc>>2]-+g[Ac>>2]*.25;g[Cc>>2]=(+g[yc>>2]-+g[zc>>2])*.55901700258255;g[Dc>>2]=+g[Bc>>2]-+g[Cc>>2];g[Tb>>2]=+g[Cc>>2]+ +g[Bc>>2];g[Sb>>2]=+g[bb>>2]-+g[Rb>>2]*.25;g[rc>>2]=(+g[Jb>>2]-+g[Qb>>2])*.55901700258255;g[sc>>2]=+g[Sb>>2]-+g[rc>>2];g[Nc>>2]=+g[rc>>2]+ +g[Sb>>2];g[tc>>2]=+g[_a>>2]+ +g[sc>>2];g[Hc>>2]=+g[Dc>>2]-+g[Gc>>2];g[La>>2]=+g[(c[m>>2]|0)+16>>2];g[uc>>2]=+g[(c[m>>2]|0)+20>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[La>>2]*+g[tc>>2]-+g[uc>>2]*+g[Hc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[La>>2]*+g[Hc>>2]+ +g[uc>>2]*+g[tc>>2];g[Xb>>2]=+g[Oc>>2]+ +g[Nc>>2];g[Zb>>2]=+g[Tb>>2]-+g[Ub>>2];g[Wb>>2]=+g[(c[m>>2]|0)+144>>2];g[Yb>>2]=+g[(c[m>>2]|0)+148>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Wb>>2]*+g[Xb>>2]-+g[Yb>>2]*+g[Zb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Wb>>2]*+g[Zb>>2]+ +g[Yb>>2]*+g[Xb>>2];g[Jc>>2]=+g[sc>>2]-+g[_a>>2];g[Lc>>2]=+g[Dc>>2]+ +g[Gc>>2];g[Ic>>2]=+g[(c[m>>2]|0)+48>>2];g[Kc>>2]=+g[(c[m>>2]|0)+52>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Ic>>2]*+g[Jc>>2]-+g[Kc>>2]*+g[Lc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Ic>>2]*+g[Lc>>2]+ +g[Kc>>2]*+g[Jc>>2];g[Pc>>2]=+g[Nc>>2]-+g[Oc>>2];g[Vb>>2]=+g[Tb>>2]+ +g[Ub>>2];g[Mc>>2]=+g[(c[m>>2]|0)+80>>2];g[Qc>>2]=+g[(c[m>>2]|0)+84>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Mc>>2]*+g[Pc>>2]-+g[Qc>>2]*+g[Vb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[Mc>>2]*+g[Vb>>2]+ +g[Qc>>2]*+g[Pc>>2];c[Ee>>2]=(c[Ee>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+152;c[n>>2]=c[n>>2]^c[2998]}i=Fe;return}function Vt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,61,7960);i=b;return}function Wt(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0;Ih=i;i=i+1904|0;k=Ih+1900|0;l=Ih+1896|0;m=Ih+1892|0;n=Ih+1888|0;Jh=Ih+1884|0;o=Ih+1880|0;p=Ih+1876|0;Hh=Ih+1792|0;Ig=Ih+1788|0;ze=Ih+1784|0;Pc=Ih+1780|0;Sb=Ih+1776|0;cf=Ih+1772|0;ph=Ih+1768|0;yh=Ih+1764|0;zh=Ih+1760|0;gh=Ih+1756|0;z=Ih+1752|0;ya=Ih+1748|0;sb=Ih+1744|0;Ld=Ih+1740|0;yc=Ih+1736|0;Wc=Ih+1732|0;Me=Ih+1728|0;vf=Ih+1724|0;le=Ih+1720|0;fg=Ih+1716|0;fe=Ih+1712|0;uf=Ih+1708|0;ke=Ih+1704|0;cg=Ih+1700|0;Hb=Ih+1696|0;Md=Ih+1692|0;zc=Ih+1688|0;Zc=Ih+1684|0;L=Ih+1680|0;df=Ih+1676|0;tc=Ih+1672|0;Ub=Ih+1668|0;Ae=Ih+1664|0;Kg=Ih+1660|0;Tg=Ih+1656|0;Ug=Ih+1652|0;ha=Ih+1648|0;qa=Ih+1644|0;B=Ih+1640|0;Za=Ih+1636|0;Od=Ih+1632|0;Bc=Ih+1628|0;Bd=Ih+1624|0;$e=Ih+1620|0;yf=Ih+1616|0;oe=Ih+1612|0;mg=Ih+1608|0;Ue=Ih+1604|0;xf=Ih+1600|0;ne=Ih+1596|0;jg=Ih+1592|0;Nb=Ih+1588|0;Pd=Ih+1584|0;Cc=Ih+1580|0;Ed=Ih+1576|0;q=Ih+1572|0;Rc=Ih+1568|0;rg=Ih+1564|0;Hg=Ih+1560|0;Oc=Ih+1556|0;Nc=Ih+1552|0;Qb=Ih+1548|0;Rb=Ih+1544|0;za=Ih+1540|0;Ib=Ih+1536|0;_d=Ih+1532|0;hf=Ih+1528|0;hh=Ih+1524|0;Ha=Ih+1520|0;ce=Ih+1516|0;nb=Ih+1512|0;oh=Ih+1508|0;Ga=Ih+1504|0;r=Ih+1500|0;xb=Ih+1496|0;he=Ih+1492|0;Fb=Ih+1488|0;y=Ih+1484|0;wb=Ih+1480|0;qh=Ih+1476|0;vb=Ih+1472|0;Je=Ih+1468|0;Bb=Ih+1464|0;xh=Ih+1460|0;Ab=Ih+1456|0;_g=Ih+1452|0;jb=Ih+1448|0;ae=Ih+1444|0;pb=Ih+1440|0;fh=Ih+1436|0;ob=Ih+1432|0;nh=Ih+1428|0;mb=Ih+1424|0;kh=Ih+1420|0;lb=Ih+1416|0;lh=Ih+1412|0;mh=Ih+1408|0;ih=Ih+1404|0;jh=Ih+1400|0;x=Ih+1396|0;Eb=Ih+1392|0;u=Ih+1388|0;Db=Ih+1384|0;v=Ih+1380|0;w=Ih+1376|0;s=Ih+1372|0;t=Ih+1368|0;wh=Ih+1364|0;ub=Ih+1360|0;th=Ih+1356|0;tb=Ih+1352|0;uh=Ih+1348|0;vh=Ih+1344|0;rh=Ih+1340|0;sh=Ih+1336|0;eh=Ih+1332|0;ib=Ih+1328|0;bh=Ih+1324|0;Ja=Ih+1320|0;ch=Ih+1316|0;dh=Ih+1312|0;$g=Ih+1308|0;ah=Ih+1304|0;kb=Ih+1300|0;Uc=Ih+1296|0;rb=Ih+1292|0;Vc=Ih+1288|0;Ia=Ih+1284|0;qb=Ih+1280|0;ie=Ih+1276|0;eg=Ih+1272|0;Le=Ih+1268|0;dg=Ih+1264|0;ge=Ih+1260|0;Ke=Ih+1256|0;be=Ih+1252|0;ag=Ih+1248|0;ee=Ih+1244|0;bg=Ih+1240|0;$d=Ih+1236|0;de=Ih+1232|0;zb=Ih+1228|0;Xc=Ih+1224|0;Gb=Ih+1220|0;Yc=Ih+1216|0;yb=Ih+1212|0;Cb=Ih+1208|0;D=Ih+1204|0;G=Ih+1200|0;J=Ih+1196|0;K=Ih+1192|0;sc=Ih+1188|0;rc=Ih+1184|0;Qc=Ih+1180|0;Tb=Ih+1176|0;E=Ih+1172|0;F=Ih+1168|0;H=Ih+1164|0;I=Ih+1160|0;Ah=Ih+1156|0;Ma=Ih+1152|0;Re=Ih+1148|0;Ua=Ih+1144|0;Jg=Ih+1140|0;La=Ih+1136|0;$=Ih+1132|0;Qa=Ih+1128|0;Pe=Ih+1124|0;Wa=Ih+1120|0;ga=Ih+1116|0;Va=Ih+1112|0;Lg=Ih+1108|0;$a=Ih+1104|0;Ye=Ih+1100|0;hb=Ih+1096|0;Sg=Ih+1092|0;_a=Ih+1088|0;ia=Ih+1084|0;db=Ih+1080|0;We=Ih+1076|0;Kb=Ih+1072|0;pa=Ih+1068|0;Jb=Ih+1064|0;Dh=Ih+1060|0;Sa=Ih+1056|0;Gh=Ih+1052|0;Ta=Ih+1048|0;Bh=Ih+1044|0;Ch=Ih+1040|0;Eh=Ih+1036|0;Fh=Ih+1032|0;ca=Ih+1028|0;Oa=Ih+1024|0;fa=Ih+1020|0;Pa=Ih+1016|0;aa=Ih+1012|0;ba=Ih+1008|0;da=Ih+1004|0;ea=Ih+1e3|0;Rg=Ih+996|0;gb=Ih+992|0;Og=Ih+988|0;fb=Ih+984|0;Pg=Ih+980|0;Qg=Ih+976|0;Mg=Ih+972|0;Ng=Ih+968|0;oa=Ih+964|0;cb=Ih+960|0;la=Ih+956|0;bb=Ih+952|0;ma=Ih+948|0;na=Ih+944|0;ja=Ih+940|0;ka=Ih+936|0;Ra=Ih+932|0;$c=Ih+928|0;Ya=Ih+924|0;Ad=Ih+920|0;Na=Ih+916|0;Xa=Ih+912|0;Xe=Ih+908|0;lg=Ih+904|0;_e=Ih+900|0;kg=Ih+896|0;Ve=Ih+892|0;Ze=Ih+888|0;Qe=Ih+884|0;ig=Ih+880|0;Te=Ih+876|0;hg=Ih+872|0;Oe=Ih+868|0;Se=Ih+864|0;eb=Ih+860|0;Dd=Ih+856|0;Mb=Ih+852|0;Cd=Ih+848|0;ab=Ih+844|0;Lb=Ih+840|0;Vg=Ih+836|0;Zg=Ih+832|0;V=Ih+828|0;M=Ih+824|0;O=Ih+820|0;_=Ih+816|0;sa=Ih+812|0;W=Ih+808|0;xa=Ih+804|0;Z=Ih+800|0;Xg=Ih+796|0;Yg=Ih+792|0;C=Ih+788|0;N=Ih+784|0;A=Ih+780|0;ra=Ih+776|0;va=Ih+772|0;wa=Ih+768|0;ta=Ih+764|0;P=Ih+760|0;Wg=Ih+756|0;ua=Ih+752|0;Ca=Ih+748|0;Ea=Ih+744|0;Ba=Ih+740|0;Da=Ih+736|0;X=Ih+732|0;Aa=Ih+728|0;U=Ih+724|0;Y=Ih+720|0;R=Ih+716|0;T=Ih+712|0;Q=Ih+708|0;S=Ih+704|0;Af=Ih+700|0;tg=Ih+696|0;Ff=Ih+692|0;wg=Ih+688|0;$f=Ih+684|0;og=Ih+680|0;pg=Ih+676|0;qg=Ih+672|0;Jf=Ih+668|0;Kf=Ih+664|0;If=Ih+660|0;Lf=Ih+656|0;wf=Ih+652|0;zf=Ih+648|0;Df=Ih+644|0;Ef=Ih+640|0;gg=Ih+636|0;ng=Ih+632|0;Gf=Ih+628|0;Hf=Ih+624|0;Eg=Ih+620|0;Gg=Ih+616|0;Dg=Ih+612|0;Fg=Ih+608|0;ug=Ih+604|0;Ag=Ih+600|0;yg=Ih+596|0;Cg=Ih+592|0;sg=Ih+588|0;xg=Ih+584|0;Sf=Ih+580|0;vg=Ih+576|0;zg=Ih+572|0;Bg=Ih+568|0;Bf=Ih+564|0;Pf=Ih+560|0;Nf=Ih+556|0;Rf=Ih+552|0;tf=Ih+548|0;Mf=Ih+544|0;_f=Ih+540|0;Cf=Ih+536|0;Of=Ih+532|0;Qf=Ih+528|0;Ec=Ih+524|0;ec=Ih+520|0;Jc=Ih+516|0;hc=Ih+512|0;uc=Ih+508|0;vc=Ih+504|0;Pb=Ih+500|0;wc=Ih+496|0;Vb=Ih+492|0;Wb=Ih+488|0;Mc=Ih+484|0;Xb=Ih+480|0;Ac=Ih+476|0;Dc=Ih+472|0;Hc=Ih+468|0;Ic=Ih+464|0;Ka=Ih+460|0;Ob=Ih+456|0;Kc=Ih+452|0;Lc=Ih+448|0;pc=Ih+444|0;Sc=Ih+440|0;oc=Ih+436|0;qc=Ih+432|0;fc=Ih+428|0;lc=Ih+424|0;jc=Ih+420|0;nc=Ih+416|0;dc=Ih+412|0;ic=Ih+408|0;cc=Ih+404|0;gc=Ih+400|0;kc=Ih+396|0;mc=Ih+392|0;Fc=Ih+388|0;$b=Ih+384|0;Zb=Ih+380|0;bc=Ih+376|0;xc=Ih+372|0;Yb=Ih+368|0;Fa=Ih+364|0;Gc=Ih+360|0;_b=Ih+356|0;ac=Ih+352|0;Rd=Ih+348|0;ld=Ih+344|0;Wd=Ih+340|0;od=Ih+336|0;Hd=Ih+332|0;Id=Ih+328|0;Gd=Ih+324|0;Jd=Ih+320|0;ad=Ih+316|0;bd=Ih+312|0;Zd=Ih+308|0;cd=Ih+304|0;Nd=Ih+300|0;Qd=Ih+296|0;Ud=Ih+292|0;Vd=Ih+288|0;_c=Ih+284|0;Fd=Ih+280|0;Xd=Ih+276|0;Yd=Ih+272|0;wd=Ih+268|0;yd=Ih+264|0;vd=Ih+260|0;xd=Ih+256|0;md=Ih+252|0;sd=Ih+248|0;qd=Ih+244|0;ud=Ih+240|0;kd=Ih+236|0;pd=Ih+232|0;jd=Ih+228|0;nd=Ih+224|0;rd=Ih+220|0;td=Ih+216|0;Sd=Ih+212|0;gd=Ih+208|0;ed=Ih+204|0;id=Ih+200|0;Kd=Ih+196|0;dd=Ih+192|0;Tc=Ih+188|0;Td=Ih+184|0;fd=Ih+180|0;hd=Ih+176|0;qe=Ih+172|0;mf=Ih+168|0;ve=Ih+164|0;pf=Ih+160|0;ef=Ih+156|0;ff=Ih+152|0;bf=Ih+148|0;gf=Ih+144|0;Be=Ih+140|0;Ce=Ih+136|0;ye=Ih+132|0;De=Ih+128|0;me=Ih+124|0;pe=Ih+120|0;te=Ih+116|0;ue=Ih+112|0;Ne=Ih+108|0;af=Ih+104|0;we=Ih+100|0;xe=Ih+96|0;Xf=Ih+92|0;Zf=Ih+88|0;Wf=Ih+84|0;Yf=Ih+80|0;nf=Ih+76|0;Tf=Ih+72|0;rf=Ih+68|0;Vf=Ih+64|0;lf=Ih+60|0;qf=Ih+56|0;kf=Ih+52|0;of=Ih+48|0;sf=Ih+44|0;Uf=Ih+40|0;re=Ih+36|0;He=Ih+32|0;Fe=Ih+28|0;jf=Ih+24|0;je=Ih+20|0;Ee=Ih+16|0;zd=Ih+12|0;se=Ih+8|0;Ge=Ih+4|0;Ie=Ih;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Jh>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Ih+1872>>2]=.9980267286300659;g[Ih+1868>>2]=.06279052048921585;g[Ih+1864>>2]=.9921147227287292;g[Ih+1860>>2]=.12533323466777802;g[Ih+1856>>2]=.4257792830467224;g[Ih+1852>>2]=.9048270583152771;g[Ih+1848>>2]=.24868988990783691;g[Ih+1844>>2]=.9685831665992737;g[Ih+1840>>2]=.7705132365226746;g[Ih+1836>>2]=.6374239921569824;g[Ih+1832>>2]=.8443279266357422;g[Ih+1828>>2]=.5358268022537231;g[Ih+1824>>2]=.6845471262931824;g[Ih+1820>>2]=.728968620300293;g[Ih+1816>>2]=.4817536771297455;g[Ih+1812>>2]=.8763066530227661;g[Ih+1808>>2]=.55901700258255;g[Ih+1804>>2]=.25;g[Ih+1800>>2]=.5877852439880371;g[Ih+1796>>2]=.9510565400123596;c[Hh>>2]=c[Jh>>2];c[m>>2]=(c[m>>2]|0)+(((c[Jh>>2]|0)-1|0)*48<<2);while(1){if((c[Hh>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[za>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Ib>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Rc>>2]=+g[za>>2]+ +g[Ib>>2];g[_d>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[hf>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[rg>>2]=+g[_d>>2]+ +g[hf>>2];g[Hg>>2]=+g[Rc>>2]+ +g[rg>>2];g[Oc>>2]=+g[_d>>2]-+g[hf>>2];g[Nc>>2]=+g[za>>2]-+g[Ib>>2];g[Ig>>2]=+g[q>>2]+ +g[Hg>>2];g[ze>>2]=+g[Nc>>2]*.9510565400123596+ +g[Oc>>2]*.5877852439880371;g[Pc>>2]=+g[Nc>>2]*.5877852439880371-+g[Oc>>2]*.9510565400123596;g[Qb>>2]=+g[q>>2]-+g[Hg>>2]*.25;g[Rb>>2]=(+g[Rc>>2]-+g[rg>>2])*.55901700258255;g[Sb>>2]=+g[Qb>>2]-+g[Rb>>2];g[cf>>2]=+g[Rb>>2]+ +g[Qb>>2];g[hh>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[lh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[mh>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[nh>>2]=+g[lh>>2]+ +g[mh>>2];g[mb>>2]=+g[lh>>2]-+g[mh>>2];g[ih>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[jh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[kh>>2]=+g[ih>>2]+ +g[jh>>2];g[lb>>2]=+g[ih>>2]-+g[jh>>2];g[Ha>>2]=(+g[kh>>2]-+g[nh>>2])*.55901700258255;g[ce>>2]=+g[lb>>2]*.9510565400123596+ +g[mb>>2]*.5877852439880371;g[nb>>2]=+g[lb>>2]*.5877852439880371-+g[mb>>2]*.9510565400123596;g[oh>>2]=+g[kh>>2]+ +g[nh>>2];g[Ga>>2]=+g[hh>>2]-+g[oh>>2]*.25;g[r>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[v>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[w>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[x>>2]=+g[v>>2]+ +g[w>>2];g[Eb>>2]=+g[w>>2]-+g[v>>2];g[s>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[t>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[u>>2]=+g[s>>2]-+g[t>>2];g[Db>>2]=+g[s>>2]+ +g[t>>2];g[xb>>2]=(+g[u>>2]+ +g[x>>2])*.55901700258255;g[he>>2]=+g[Db>>2]*.9510565400123596+ +g[Eb>>2]*.5877852439880371;g[Fb>>2]=+g[Db>>2]*.5877852439880371-+g[Eb>>2]*.9510565400123596;g[y>>2]=+g[u>>2]-+g[x>>2];g[wb>>2]=+g[r>>2]-+g[y>>2]*.25;g[qh>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[uh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[vh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[wh>>2]=+g[uh>>2]+ +g[vh>>2];g[ub>>2]=+g[uh>>2]-+g[vh>>2];g[rh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[sh>>2]=+g[c[l>>2]>>2];g[th>>2]=+g[rh>>2]+ +g[sh>>2];g[tb>>2]=+g[rh>>2]-+g[sh>>2];g[vb>>2]=+g[tb>>2]*.5877852439880371-+g[ub>>2]*.9510565400123596;g[Je>>2]=+g[tb>>2]*.9510565400123596+ +g[ub>>2]*.5877852439880371;g[Bb>>2]=(+g[th>>2]-+g[wh>>2])*.55901700258255;g[xh>>2]=+g[th>>2]+ +g[wh>>2];g[Ab>>2]=+g[qh>>2]-+g[xh>>2]*.25;g[_g>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[ch>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[dh>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[eh>>2]=+g[ch>>2]-+g[dh>>2];g[ib>>2]=+g[ch>>2]+ +g[dh>>2];g[$g>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[ah>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[bh>>2]=+g[$g>>2]-+g[ah>>2];g[Ja>>2]=+g[$g>>2]+ +g[ah>>2];g[jb>>2]=+g[Ja>>2]*.5877852439880371-+g[ib>>2]*.9510565400123596;g[ae>>2]=+g[Ja>>2]*.9510565400123596+ +g[ib>>2]*.5877852439880371;g[pb>>2]=(+g[bh>>2]-+g[eh>>2])*.55901700258255;g[fh>>2]=+g[bh>>2]+ +g[eh>>2];g[ob>>2]=+g[_g>>2]-+g[fh>>2]*.25;g[ph>>2]=+g[hh>>2]+ +g[oh>>2];g[yh>>2]=+g[qh>>2]+ +g[xh>>2];g[zh>>2]=+g[ph>>2]+ +g[yh>>2];g[gh>>2]=+g[_g>>2]+ +g[fh>>2];g[z>>2]=+g[r>>2]+ +g[y>>2];g[ya>>2]=+g[gh>>2]+ +g[z>>2];g[Ia>>2]=+g[Ga>>2]-+g[Ha>>2];g[kb>>2]=+g[Ia>>2]-+g[jb>>2];g[Uc>>2]=+g[Ia>>2]+ +g[jb>>2];g[qb>>2]=+g[ob>>2]-+g[pb>>2];g[rb>>2]=+g[nb>>2]+ +g[qb>>2];g[Vc>>2]=+g[qb>>2]-+g[nb>>2];g[sb>>2]=+g[kb>>2]*.8763066530227661-+g[rb>>2]*.4817536771297455;g[Ld>>2]=+g[Vc>>2]*.728968620300293+ +g[Uc>>2]*.6845471262931824;g[yc>>2]=+g[rb>>2]*.8763066530227661+ +g[kb>>2]*.4817536771297455;g[Wc>>2]=+g[Uc>>2]*.728968620300293-+g[Vc>>2]*.6845471262931824;g[ge>>2]=+g[Bb>>2]+ +g[Ab>>2];g[ie>>2]=+g[ge>>2]-+g[he>>2];g[eg>>2]=+g[ge>>2]+ +g[he>>2];g[Ke>>2]=+g[wb>>2]+ +g[xb>>2];g[Le>>2]=+g[Je>>2]+ +g[Ke>>2];g[dg>>2]=+g[Ke>>2]-+g[Je>>2];g[Me>>2]=+g[ie>>2]*.5358268022537231-+g[Le>>2]*.8443279266357422;g[vf>>2]=+g[dg>>2]*.6374239921569824+ +g[eg>>2]*.7705132365226746;g[le>>2]=+g[Le>>2]*.5358268022537231+ +g[ie>>2]*.8443279266357422;g[fg>>2]=+g[dg>>2]*.7705132365226746-+g[eg>>2]*.6374239921569824;g[$d>>2]=+g[Ha>>2]+ +g[Ga>>2];g[be>>2]=+g[$d>>2]-+g[ae>>2];g[ag>>2]=+g[$d>>2]+ +g[ae>>2];g[de>>2]=+g[pb>>2]+ +g[ob>>2];g[ee>>2]=+g[ce>>2]+ +g[de>>2];g[bg>>2]=+g[de>>2]-+g[ce>>2];g[fe>>2]=+g[be>>2]*.9685831665992737-+g[ee>>2]*.24868988990783691;g[uf>>2]=+g[bg>>2]*.5358268022537231+ +g[ag>>2]*.8443279266357422;g[ke>>2]=+g[ee>>2]*.9685831665992737+ +g[be>>2]*.24868988990783691;g[cg>>2]=+g[ag>>2]*.5358268022537231-+g[bg>>2]*.8443279266357422;g[yb>>2]=+g[wb>>2]-+g[xb>>2];g[zb>>2]=+g[vb>>2]+ +g[yb>>2];g[Xc>>2]=+g[yb>>2]-+g[vb>>2];g[Cb>>2]=+g[Ab>>2]-+g[Bb>>2];g[Gb>>2]=+g[Cb>>2]-+g[Fb>>2];g[Yc>>2]=+g[Cb>>2]+ +g[Fb>>2];g[Hb>>2]=+g[zb>>2]*.9048270583152771+ +g[Gb>>2]*.4257792830467224;g[Md>>2]=+g[Yc>>2]*.12533323466777802-+g[Xc>>2]*.9921147227287292;g[zc>>2]=+g[Gb>>2]*.9048270583152771-+g[zb>>2]*.4257792830467224;g[Zc>>2]=+g[Xc>>2]*.12533323466777802+ +g[Yc>>2]*.9921147227287292;g[D>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[E>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[F>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[G>>2]=+g[E>>2]-+g[F>>2];g[H>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[I>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[J>>2]=+g[H>>2]-+g[I>>2];g[K>>2]=+g[G>>2]+ +g[J>>2];g[sc>>2]=+g[H>>2]+ +g[I>>2];g[rc>>2]=+g[E>>2]+ +g[F>>2];g[L>>2]=+g[D>>2]+ +g[K>>2];g[df>>2]=+g[rc>>2]*.9510565400123596+ +g[sc>>2]*.5877852439880371;g[tc>>2]=+g[rc>>2]*.5877852439880371-+g[sc>>2]*.9510565400123596;g[Qc>>2]=+g[D>>2]-+g[K>>2]*.25;g[Tb>>2]=(+g[G>>2]-+g[J>>2])*.55901700258255;g[Ub>>2]=+g[Qc>>2]-+g[Tb>>2];g[Ae>>2]=+g[Tb>>2]+ +g[Qc>>2];g[Ah>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Bh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Ch>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Dh>>2]=+g[Bh>>2]+ +g[Ch>>2];g[Sa>>2]=+g[Bh>>2]-+g[Ch>>2];g[Eh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[Fh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Gh>>2]=+g[Eh>>2]+ +g[Fh>>2];g[Ta>>2]=+g[Eh>>2]-+g[Fh>>2];g[Ma>>2]=(+g[Dh>>2]-+g[Gh>>2])*.55901700258255;g[Re>>2]=+g[Sa>>2]*.9510565400123596+ +g[Ta>>2]*.5877852439880371;g[Ua>>2]=+g[Sa>>2]*.5877852439880371-+g[Ta>>2]*.9510565400123596;g[Jg>>2]=+g[Dh>>2]+ +g[Gh>>2];g[La>>2]=+g[Ah>>2]-+g[Jg>>2]*.25;g[$>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[aa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[ba>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[ca>>2]=+g[aa>>2]-+g[ba>>2];g[Oa>>2]=+g[aa>>2]+ +g[ba>>2];g[da>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[ea>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[fa>>2]=+g[da>>2]-+g[ea>>2];g[Pa>>2]=+g[da>>2]+ +g[ea>>2];g[Qa>>2]=+g[Oa>>2]*.5877852439880371-+g[Pa>>2]*.9510565400123596;g[Pe>>2]=+g[Oa>>2]*.9510565400123596+ +g[Pa>>2]*.5877852439880371;g[Wa>>2]=(+g[ca>>2]-+g[fa>>2])*.55901700258255;g[ga>>2]=+g[ca>>2]+ +g[fa>>2];g[Va>>2]=+g[$>>2]-+g[ga>>2]*.25;g[Lg>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Pg>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Qg>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Rg>>2]=+g[Pg>>2]+ +g[Qg>>2];g[gb>>2]=+g[Pg>>2]-+g[Qg>>2];g[Mg>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Ng>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[Og>>2]=+g[Mg>>2]+ +g[Ng>>2];g[fb>>2]=+g[Mg>>2]-+g[Ng>>2];g[$a>>2]=(+g[Og>>2]-+g[Rg>>2])*.55901700258255;g[Ye>>2]=+g[fb>>2]*.9510565400123596+ +g[gb>>2]*.5877852439880371;g[hb>>2]=+g[fb>>2]*.5877852439880371-+g[gb>>2]*.9510565400123596;g[Sg>>2]=+g[Og>>2]+ +g[Rg>>2];g[_a>>2]=+g[Lg>>2]-+g[Sg>>2]*.25;g[ia>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[ma>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[na>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[oa>>2]=+g[ma>>2]+ +g[na>>2];g[cb>>2]=+g[na>>2]-+g[ma>>2];g[ja>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[ka>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[la>>2]=+g[ja>>2]-+g[ka>>2];g[bb>>2]=+g[ja>>2]+ +g[ka>>2];g[db>>2]=+g[bb>>2]*.5877852439880371-+g[cb>>2]*.9510565400123596;g[We>>2]=+g[bb>>2]*.9510565400123596+ +g[cb>>2]*.5877852439880371;g[Kb>>2]=(+g[la>>2]+ +g[oa>>2])*.55901700258255;g[pa>>2]=+g[la>>2]-+g[oa>>2];g[Jb>>2]=+g[ia>>2]-+g[pa>>2]*.25;g[Kg>>2]=+g[Ah>>2]+ +g[Jg>>2];g[Tg>>2]=+g[Lg>>2]+ +g[Sg>>2];g[Ug>>2]=+g[Kg>>2]+ +g[Tg>>2];g[ha>>2]=+g[$>>2]+ +g[ga>>2];g[qa>>2]=+g[ia>>2]+ +g[pa>>2];g[B>>2]=+g[ha>>2]+ +g[qa>>2];g[Na>>2]=+g[La>>2]-+g[Ma>>2];g[Ra>>2]=+g[Na>>2]-+g[Qa>>2];g[$c>>2]=+g[Na>>2]+ +g[Qa>>2];g[Xa>>2]=+g[Va>>2]-+g[Wa>>2];g[Ya>>2]=+g[Ua>>2]+ +g[Xa>>2];g[Ad>>2]=+g[Xa>>2]-+g[Ua>>2];g[Za>>2]=+g[Ra>>2]*.5358268022537231-+g[Ya>>2]*.8443279266357422;g[Od>>2]=+g[Ad>>2]*.06279052048921585+ +g[$c>>2]*.9980267286300659;g[Bc>>2]=+g[Ya>>2]*.5358268022537231+ +g[Ra>>2]*.8443279266357422;g[Bd>>2]=+g[$c>>2]*.06279052048921585-+g[Ad>>2]*.9980267286300659;g[Ve>>2]=+g[$a>>2]+ +g[_a>>2];g[Xe>>2]=+g[Ve>>2]-+g[We>>2];g[lg>>2]=+g[Ve>>2]+ +g[We>>2];g[Ze>>2]=+g[Jb>>2]+ +g[Kb>>2];g[_e>>2]=+g[Ye>>2]+ +g[Ze>>2];g[kg>>2]=+g[Ze>>2]-+g[Ye>>2];g[$e>>2]=+g[Xe>>2]*.728968620300293-+g[_e>>2]*.6845471262931824;g[yf>>2]=+g[lg>>2]*.12533323466777802-+g[kg>>2]*.9921147227287292;g[oe>>2]=+g[_e>>2]*.728968620300293+ +g[Xe>>2]*.6845471262931824;g[mg>>2]=+g[kg>>2]*.12533323466777802+ +g[lg>>2]*.9921147227287292;g[Oe>>2]=+g[Ma>>2]+ +g[La>>2];g[Qe>>2]=+g[Oe>>2]-+g[Pe>>2];g[ig>>2]=+g[Oe>>2]+ +g[Pe>>2];g[Se>>2]=+g[Wa>>2]+ +g[Va>>2];g[Te>>2]=+g[Re>>2]+ +g[Se>>2];g[hg>>2]=+g[Se>>2]-+g[Re>>2];g[Ue>>2]=+g[Qe>>2]*.8763066530227661-+g[Te>>2]*.4817536771297455;g[xf>>2]=+g[ig>>2]*.9048270583152771-+g[hg>>2]*.4257792830467224;g[ne>>2]=+g[Te>>2]*.8763066530227661+ +g[Qe>>2]*.4817536771297455;g[jg>>2]=+g[hg>>2]*.9048270583152771+ +g[ig>>2]*.4257792830467224;g[ab>>2]=+g[_a>>2]-+g[$a>>2];g[eb>>2]=+g[ab>>2]-+g[db>>2];g[Dd>>2]=+g[ab>>2]+ +g[db>>2];g[Lb>>2]=+g[Jb>>2]-+g[Kb>>2];g[Mb>>2]=+g[hb>>2]+ +g[Lb>>2];g[Cd>>2]=+g[Lb>>2]-+g[hb>>2];g[Nb>>2]=+g[eb>>2]*.06279052048921585-+g[Mb>>2]*.9980267286300659;g[Pd>>2]=+g[Dd>>2]*.7705132365226746-+g[Cd>>2]*.6374239921569824;g[Cc>>2]=+g[Mb>>2]*.06279052048921585+ +g[eb>>2]*.9980267286300659;g[Ed>>2]=+g[Cd>>2]*.7705132365226746+ +g[Dd>>2]*.6374239921569824;g[Xg>>2]=(+g[zh>>2]-+g[Ug>>2])*.55901700258255;g[Vg>>2]=+g[zh>>2]+ +g[Ug>>2];g[Yg>>2]=+g[Ig>>2]-+g[Vg>>2]*.25;g[Zg>>2]=+g[Xg>>2]+ +g[Yg>>2];g[V>>2]=+g[Yg>>2]-+g[Xg>>2];g[C>>2]=(+g[ya>>2]-+g[B>>2])*.55901700258255;g[M>>2]=+g[ya>>2]+ +g[B>>2];g[N>>2]=+g[L>>2]-+g[M>>2]*.25;g[O>>2]=+g[C>>2]+ +g[N>>2];g[_>>2]=+g[N>>2]-+g[C>>2];g[A>>2]=+g[gh>>2]-+g[z>>2];g[ra>>2]=+g[ha>>2]-+g[qa>>2];g[sa>>2]=+g[A>>2]*.9510565400123596+ +g[ra>>2]*.5877852439880371;g[W>>2]=+g[A>>2]*.5877852439880371-+g[ra>>2]*.9510565400123596;g[va>>2]=+g[ph>>2]-+g[yh>>2];g[wa>>2]=+g[Kg>>2]-+g[Tg>>2];g[xa>>2]=+g[va>>2]*.9510565400123596+ +g[wa>>2]*.5877852439880371;g[Z>>2]=+g[va>>2]*.5877852439880371-+g[wa>>2]*.9510565400123596;g[c[k>>2]>>2]=+g[Ig>>2]+ +g[Vg>>2];g[c[l>>2]>>2]=+g[L>>2]+ +g[M>>2];g[ta>>2]=+g[Zg>>2]-+g[sa>>2];g[P>>2]=+g[xa>>2]+ +g[O>>2];g[Wg>>2]=+g[(c[m>>2]|0)+32>>2];g[ua>>2]=+g[(c[m>>2]|0)+36>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[Wg>>2]*+g[ta>>2]-+g[ua>>2]*+g[P>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[ua>>2]*+g[ta>>2]+ +g[Wg>>2]*+g[P>>2];g[Ca>>2]=+g[V>>2]+ +g[W>>2];g[Ea>>2]=+g[_>>2]-+g[Z>>2];g[Ba>>2]=+g[(c[m>>2]|0)+112>>2];g[Da>>2]=+g[(c[m>>2]|0)+116>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Ba>>2]*+g[Ca>>2]-+g[Da>>2]*+g[Ea>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Da>>2]*+g[Ca>>2]+ +g[Ba>>2]*+g[Ea>>2];g[X>>2]=+g[V>>2]-+g[W>>2];g[Aa>>2]=+g[Z>>2]+ +g[_>>2];g[U>>2]=+g[(c[m>>2]|0)+72>>2];g[Y>>2]=+g[(c[m>>2]|0)+76>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[U>>2]*+g[X>>2]-+g[Y>>2]*+g[Aa>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Y>>2]*+g[X>>2]+ +g[U>>2]*+g[Aa>>2];g[R>>2]=+g[Zg>>2]+ +g[sa>>2];g[T>>2]=+g[O>>2]-+g[xa>>2];g[Q>>2]=+g[(c[m>>2]|0)+152>>2];g[S>>2]=+g[(c[m>>2]|0)+156>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[Q>>2]*+g[R>>2]-+g[S>>2]*+g[T>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[S>>2]*+g[R>>2]+ +g[Q>>2]*+g[T>>2];g[wf>>2]=+g[uf>>2]+ +g[vf>>2];g[zf>>2]=+g[xf>>2]-+g[yf>>2];g[Af>>2]=+g[wf>>2]*.9510565400123596+ +g[zf>>2]*.5877852439880371;g[tg>>2]=+g[wf>>2]*.5877852439880371-+g[zf>>2]*.9510565400123596;g[Df>>2]=+g[cg>>2]-+g[fg>>2];g[Ef>>2]=+g[mg>>2]-+g[jg>>2];g[Ff>>2]=+g[Df>>2]*.9510565400123596+ +g[Ef>>2]*.5877852439880371;g[wg>>2]=+g[Df>>2]*.5877852439880371-+g[Ef>>2]*.9510565400123596;g[$f>>2]=+g[cf>>2]+ +g[df>>2];g[gg>>2]=+g[cg>>2]+ +g[fg>>2];g[ng>>2]=+g[jg>>2]+ +g[mg>>2];g[og>>2]=+g[gg>>2]-+g[ng>>2];g[pg>>2]=+g[$f>>2]-+g[og>>2]*.25;g[qg>>2]=(+g[gg>>2]+ +g[ng>>2])*.55901700258255;g[Jf>>2]=+g[Ae>>2]-+g[ze>>2];g[Gf>>2]=+g[uf>>2]-+g[vf>>2];g[Hf>>2]=+g[xf>>2]+ +g[yf>>2];g[Kf>>2]=+g[Gf>>2]+ +g[Hf>>2];g[If>>2]=(+g[Gf>>2]-+g[Hf>>2])*.55901700258255;g[Lf>>2]=+g[Jf>>2]-+g[Kf>>2]*.25;g[Eg>>2]=+g[$f>>2]+ +g[og>>2];g[Gg>>2]=+g[Jf>>2]+ +g[Kf>>2];g[Dg>>2]=+g[(c[m>>2]|0)+24>>2];g[Fg>>2]=+g[(c[m>>2]|0)+28>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Dg>>2]*+g[Eg>>2]-+g[Fg>>2]*+g[Gg>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[Fg>>2]*+g[Eg>>2]+ +g[Dg>>2]*+g[Gg>>2];g[sg>>2]=+g[pg>>2]-+g[qg>>2];g[ug>>2]=+g[sg>>2]-+g[tg>>2];g[Ag>>2]=+g[sg>>2]+ +g[tg>>2];g[xg>>2]=+g[Lf>>2]-+g[If>>2];g[yg>>2]=+g[wg>>2]+ +g[xg>>2];g[Cg>>2]=+g[xg>>2]-+g[wg>>2];g[Sf>>2]=+g[(c[m>>2]|0)+104>>2];g[vg>>2]=+g[(c[m>>2]|0)+108>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Sf>>2]*+g[ug>>2]-+g[vg>>2]*+g[yg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[vg>>2]*+g[ug>>2]+ +g[Sf>>2]*+g[yg>>2];g[zg>>2]=+g[(c[m>>2]|0)+144>>2];g[Bg>>2]=+g[(c[m>>2]|0)+148>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[zg>>2]*+g[Ag>>2]-+g[Bg>>2]*+g[Cg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[Bg>>2]*+g[Ag>>2]+ +g[zg>>2]*+g[Cg>>2];g[tf>>2]=+g[pg>>2]+ +g[qg>>2];g[Bf>>2]=+g[tf>>2]-+g[Af>>2];g[Pf>>2]=+g[tf>>2]+ +g[Af>>2];g[Mf>>2]=+g[If>>2]+ +g[Lf>>2];g[Nf>>2]=+g[Ff>>2]+ +g[Mf>>2];g[Rf>>2]=+g[Mf>>2]-+g[Ff>>2];g[_f>>2]=+g[(c[m>>2]|0)+64>>2];g[Cf>>2]=+g[(c[m>>2]|0)+68>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[_f>>2]*+g[Bf>>2]-+g[Cf>>2]*+g[Nf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[Cf>>2]*+g[Bf>>2]+ +g[_f>>2]*+g[Nf>>2];g[Of>>2]=+g[(c[m>>2]|0)+184>>2];g[Qf>>2]=+g[(c[m>>2]|0)+188>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Of>>2]*+g[Pf>>2]-+g[Qf>>2]*+g[Rf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Qf>>2]*+g[Pf>>2]+ +g[Of>>2]*+g[Rf>>2];g[Ac>>2]=+g[yc>>2]-+g[zc>>2];g[Dc>>2]=+g[Bc>>2]-+g[Cc>>2];g[Ec>>2]=+g[Ac>>2]*.9510565400123596+ +g[Dc>>2]*.5877852439880371;g[ec>>2]=+g[Ac>>2]*.5877852439880371-+g[Dc>>2]*.9510565400123596;g[Hc>>2]=+g[sb>>2]+ +g[Hb>>2];g[Ic>>2]=+g[Za>>2]-+g[Nb>>2];g[Jc>>2]=+g[Hc>>2]*.9510565400123596+ +g[Ic>>2]*.5877852439880371;g[hc>>2]=+g[Hc>>2]*.5877852439880371-+g[Ic>>2]*.9510565400123596;g[uc>>2]=+g[Sb>>2]-+g[tc>>2];g[Ka>>2]=+g[sb>>2]-+g[Hb>>2];g[Ob>>2]=+g[Za>>2]+ +g[Nb>>2];g[vc>>2]=+g[Ka>>2]+ +g[Ob>>2];g[Pb>>2]=(+g[Ka>>2]-+g[Ob>>2])*.55901700258255;g[wc>>2]=+g[uc>>2]-+g[vc>>2]*.25;g[Vb>>2]=+g[Pc>>2]+ +g[Ub>>2];g[Kc>>2]=+g[yc>>2]+ +g[zc>>2];g[Lc>>2]=+g[Bc>>2]+ +g[Cc>>2];g[Wb>>2]=+g[Kc>>2]+ +g[Lc>>2];g[Mc>>2]=(+g[Kc>>2]-+g[Lc>>2])*.55901700258255;g[Xb>>2]=+g[Vb>>2]-+g[Wb>>2]*.25;g[pc>>2]=+g[uc>>2]+ +g[vc>>2];g[Sc>>2]=+g[Vb>>2]+ +g[Wb>>2];g[oc>>2]=+g[(c[m>>2]|0)+8>>2];g[qc>>2]=+g[(c[m>>2]|0)+12>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[oc>>2]*+g[pc>>2]-+g[qc>>2]*+g[Sc>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[qc>>2]*+g[pc>>2]+ +g[oc>>2]*+g[Sc>>2];g[dc>>2]=+g[wc>>2]-+g[Pb>>2];g[fc>>2]=+g[dc>>2]-+g[ec>>2];g[lc>>2]=+g[dc>>2]+ +g[ec>>2];g[ic>>2]=+g[Xb>>2]-+g[Mc>>2];g[jc>>2]=+g[hc>>2]+ +g[ic>>2];g[nc>>2]=+g[ic>>2]-+g[hc>>2];g[cc>>2]=+g[(c[m>>2]|0)+88>>2];g[gc>>2]=+g[(c[m>>2]|0)+92>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[cc>>2]*+g[fc>>2]-+g[gc>>2]*+g[jc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[gc>>2]*+g[fc>>2]+ +g[cc>>2]*+g[jc>>2];g[kc>>2]=+g[(c[m>>2]|0)+128>>2];g[mc>>2]=+g[(c[m>>2]|0)+132>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[kc>>2]*+g[lc>>2]-+g[mc>>2]*+g[nc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[mc>>2]*+g[lc>>2]+ +g[kc>>2]*+g[nc>>2];g[xc>>2]=+g[Pb>>2]+ +g[wc>>2];g[Fc>>2]=+g[xc>>2]-+g[Ec>>2];g[$b>>2]=+g[xc>>2]+ +g[Ec>>2];g[Yb>>2]=+g[Mc>>2]+ +g[Xb>>2];g[Zb>>2]=+g[Jc>>2]+ +g[Yb>>2];g[bc>>2]=+g[Yb>>2]-+g[Jc>>2];g[Fa>>2]=+g[(c[m>>2]|0)+48>>2];g[Gc>>2]=+g[(c[m>>2]|0)+52>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Fa>>2]*+g[Fc>>2]-+g[Gc>>2]*+g[Zb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Gc>>2]*+g[Fc>>2]+ +g[Fa>>2]*+g[Zb>>2];g[_b>>2]=+g[(c[m>>2]|0)+168>>2];g[ac>>2]=+g[(c[m>>2]|0)+172>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[_b>>2]*+g[$b>>2]-+g[ac>>2]*+g[bc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[ac>>2]*+g[$b>>2]+ +g[_b>>2]*+g[bc>>2];g[Nd>>2]=+g[Ld>>2]-+g[Md>>2];g[Qd>>2]=+g[Od>>2]-+g[Pd>>2];g[Rd>>2]=+g[Nd>>2]*.9510565400123596+ +g[Qd>>2]*.5877852439880371;g[ld>>2]=+g[Nd>>2]*.5877852439880371-+g[Qd>>2]*.9510565400123596;g[Ud>>2]=+g[Wc>>2]+ +g[Zc>>2];g[Vd>>2]=+g[Bd>>2]+ +g[Ed>>2];g[Wd>>2]=+g[Ud>>2]*.9510565400123596+ +g[Vd>>2]*.5877852439880371;g[od>>2]=+g[Ud>>2]*.5877852439880371-+g[Vd>>2]*.9510565400123596;g[Hd>>2]=+g[Sb>>2]+ +g[tc>>2];g[_c>>2]=+g[Wc>>2]-+g[Zc>>2];g[Fd>>2]=+g[Bd>>2]-+g[Ed>>2];g[Id>>2]=+g[_c>>2]+ +g[Fd>>2];g[Gd>>2]=(+g[_c>>2]-+g[Fd>>2])*.55901700258255;g[Jd>>2]=+g[Hd>>2]-+g[Id>>2]*.25;g[ad>>2]=+g[Ub>>2]-+g[Pc>>2];g[Xd>>2]=+g[Ld>>2]+ +g[Md>>2];g[Yd>>2]=+g[Od>>2]+ +g[Pd>>2];g[bd>>2]=+g[Xd>>2]+ +g[Yd>>2];g[Zd>>2]=(+g[Xd>>2]-+g[Yd>>2])*.55901700258255;g[cd>>2]=+g[ad>>2]-+g[bd>>2]*.25;g[wd>>2]=+g[Hd>>2]+ +g[Id>>2];g[yd>>2]=+g[ad>>2]+ +g[bd>>2];g[vd>>2]=+g[(c[m>>2]|0)+16>>2];g[xd>>2]=+g[(c[m>>2]|0)+20>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[vd>>2]*+g[wd>>2]-+g[xd>>2]*+g[yd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[xd>>2]*+g[wd>>2]+ +g[vd>>2]*+g[yd>>2];g[kd>>2]=+g[Jd>>2]-+g[Gd>>2];g[md>>2]=+g[kd>>2]-+g[ld>>2];g[sd>>2]=+g[kd>>2]+ +g[ld>>2];g[pd>>2]=+g[cd>>2]-+g[Zd>>2];g[qd>>2]=+g[od>>2]+ +g[pd>>2];g[ud>>2]=+g[pd>>2]-+g[od>>2];g[jd>>2]=+g[(c[m>>2]|0)+96>>2];g[nd>>2]=+g[(c[m>>2]|0)+100>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[jd>>2]*+g[md>>2]-+g[nd>>2]*+g[qd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[nd>>2]*+g[md>>2]+ +g[jd>>2]*+g[qd>>2];g[rd>>2]=+g[(c[m>>2]|0)+136>>2];g[td>>2]=+g[(c[m>>2]|0)+140>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[rd>>2]*+g[sd>>2]-+g[td>>2]*+g[ud>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[td>>2]*+g[sd>>2]+ +g[rd>>2]*+g[ud>>2];g[Kd>>2]=+g[Gd>>2]+ +g[Jd>>2];g[Sd>>2]=+g[Kd>>2]-+g[Rd>>2];g[gd>>2]=+g[Kd>>2]+ +g[Rd>>2];g[dd>>2]=+g[Zd>>2]+ +g[cd>>2];g[ed>>2]=+g[Wd>>2]+ +g[dd>>2];g[id>>2]=+g[dd>>2]-+g[Wd>>2];g[Tc>>2]=+g[(c[m>>2]|0)+56>>2];g[Td>>2]=+g[(c[m>>2]|0)+60>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Tc>>2]*+g[Sd>>2]-+g[Td>>2]*+g[ed>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Td>>2]*+g[Sd>>2]+ +g[Tc>>2]*+g[ed>>2];g[fd>>2]=+g[(c[m>>2]|0)+176>>2];g[hd>>2]=+g[(c[m>>2]|0)+180>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[fd>>2]*+g[gd>>2]-+g[hd>>2]*+g[id>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[hd>>2]*+g[gd>>2]+ +g[fd>>2]*+g[id>>2];g[me>>2]=+g[ke>>2]-+g[le>>2];g[pe>>2]=+g[ne>>2]-+g[oe>>2];g[qe>>2]=+g[me>>2]*.9510565400123596+ +g[pe>>2]*.5877852439880371;g[mf>>2]=+g[me>>2]*.5877852439880371-+g[pe>>2]*.9510565400123596;g[te>>2]=+g[fe>>2]-+g[Me>>2];g[ue>>2]=+g[Ue>>2]-+g[$e>>2];g[ve>>2]=+g[te>>2]*.9510565400123596+ +g[ue>>2]*.5877852439880371;g[pf>>2]=+g[te>>2]*.5877852439880371-+g[ue>>2]*.9510565400123596;g[ef>>2]=+g[cf>>2]-+g[df>>2];g[Ne>>2]=+g[fe>>2]+ +g[Me>>2];g[af>>2]=+g[Ue>>2]+ +g[$e>>2];g[ff>>2]=+g[Ne>>2]+ +g[af>>2];g[bf>>2]=(+g[Ne>>2]-+g[af>>2])*.55901700258255;g[gf>>2]=+g[ef>>2]-+g[ff>>2]*.25;g[Be>>2]=+g[ze>>2]+ +g[Ae>>2];g[we>>2]=+g[ke>>2]+ +g[le>>2];g[xe>>2]=+g[ne>>2]+ +g[oe>>2];g[Ce>>2]=+g[we>>2]+ +g[xe>>2];g[ye>>2]=(+g[we>>2]-+g[xe>>2])*.55901700258255;g[De>>2]=+g[Be>>2]-+g[Ce>>2]*.25;g[Xf>>2]=+g[ef>>2]+ +g[ff>>2];g[Zf>>2]=+g[Be>>2]+ +g[Ce>>2];g[Wf>>2]=+g[c[m>>2]>>2];g[Yf>>2]=+g[(c[m>>2]|0)+4>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[Wf>>2]*+g[Xf>>2]-+g[Yf>>2]*+g[Zf>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[Yf>>2]*+g[Xf>>2]+ +g[Wf>>2]*+g[Zf>>2];g[lf>>2]=+g[gf>>2]-+g[bf>>2];g[nf>>2]=+g[lf>>2]-+g[mf>>2];g[Tf>>2]=+g[lf>>2]+ +g[mf>>2];g[qf>>2]=+g[De>>2]-+g[ye>>2];g[rf>>2]=+g[pf>>2]+ +g[qf>>2];g[Vf>>2]=+g[qf>>2]-+g[pf>>2];g[kf>>2]=+g[(c[m>>2]|0)+80>>2];g[of>>2]=+g[(c[m>>2]|0)+84>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[kf>>2]*+g[nf>>2]-+g[of>>2]*+g[rf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[of>>2]*+g[nf>>2]+ +g[kf>>2]*+g[rf>>2];g[sf>>2]=+g[(c[m>>2]|0)+120>>2];g[Uf>>2]=+g[(c[m>>2]|0)+124>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[sf>>2]*+g[Tf>>2]-+g[Uf>>2]*+g[Vf>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Uf>>2]*+g[Tf>>2]+ +g[sf>>2]*+g[Vf>>2];g[je>>2]=+g[bf>>2]+ +g[gf>>2];g[re>>2]=+g[je>>2]-+g[qe>>2];g[He>>2]=+g[je>>2]+ +g[qe>>2];g[Ee>>2]=+g[ye>>2]+ +g[De>>2];g[Fe>>2]=+g[ve>>2]+ +g[Ee>>2];g[jf>>2]=+g[Ee>>2]-+g[ve>>2];g[zd>>2]=+g[(c[m>>2]|0)+40>>2];g[se>>2]=+g[(c[m>>2]|0)+44>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[zd>>2]*+g[re>>2]-+g[se>>2]*+g[Fe>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[se>>2]*+g[re>>2]+ +g[zd>>2]*+g[Fe>>2];g[Ge>>2]=+g[(c[m>>2]|0)+160>>2];g[Ie>>2]=+g[(c[m>>2]|0)+164>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[Ge>>2]*+g[He>>2]-+g[Ie>>2]*+g[jf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[Ie>>2]*+g[He>>2]+ +g[Ge>>2]*+g[jf>>2];c[Hh>>2]=(c[Hh>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+192;c[n>>2]=c[n>>2]^c[2998]}i=Ih;return}function Xt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,62,8008);i=b;return}function Yt(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0;z=i;i=i+64|0;k=z+60|0;l=z+56|0;m=z+52|0;n=z+48|0;A=z+44|0;o=z+40|0;p=z+36|0;y=z+32|0;q=z+28|0;r=z+24|0;v=z+20|0;s=z+16|0;t=z+12|0;x=z+8|0;u=z+4|0;w=z;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[A>>2]=f;c[o>>2]=h;c[p>>2]=j;c[y>>2]=c[A>>2];c[m>>2]=(c[m>>2]|0)+((c[A>>2]|0)-1<<1<<2);while(1){if((c[y>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[r>>2]=+g[c[l>>2]>>2];g[v>>2]=+g[q>>2]-+g[r>>2];g[s>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[t>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[x>>2]=+g[s>>2]+ +g[t>>2];g[c[k>>2]>>2]=+g[q>>2]+ +g[r>>2];g[c[l>>2]>>2]=+g[s>>2]-+g[t>>2];g[u>>2]=+g[c[m>>2]>>2];g[w>>2]=+g[(c[m>>2]|0)+4>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[u>>2]*+g[v>>2]-+g[w>>2]*+g[x>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[w>>2]*+g[v>>2]+ +g[u>>2]*+g[x>>2];c[y>>2]=(c[y>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+8}i=z;return}function Zt(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,63,8056);i=b;return}function _t(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0;Ci=i;i=i+2048|0;k=Ci+2040|0;l=Ci+2036|0;m=Ci+2032|0;n=Ci+2028|0;Di=Ci+2024|0;o=Ci+2020|0;p=Ci+2016|0;Bi=Ci+1984|0;Od=Ci+1980|0;pg=Ci+1976|0;sg=Ci+1972|0;bf=Ci+1968|0;gi=Ci+1964|0;t=Ci+1960|0;ef=Ci+1956|0;qg=Ci+1952|0;Dc=Ci+1948|0;pc=Ci+1944|0;Ja=Ci+1940|0;Gb=Ci+1936|0;Vd=Ci+1932|0;tg=Ci+1928|0;cb=Ci+1924|0;dc=Ci+1920|0;vi=Ci+1916|0;V=Ci+1912|0;wf=Ci+1908|0;vg=Ci+1904|0;zf=Ci+1900|0;wg=Ci+1896|0;ga=Ci+1892|0;Hb=Ci+1888|0;fb=Ci+1884|0;Fc=Ci+1880|0;dd=Ci+1876|0;je=Ci+1872|0;kd=Ci+1868|0;gf=Ci+1864|0;Jb=Ci+1860|0;Ec=Ci+1856|0;Nh=Ci+1852|0;ia=Ci+1848|0;Lf=Ci+1844|0;wh=Ci+1840|0;Of=Ci+1836|0;vh=Ci+1832|0;xa=Ci+1828|0;La=Ci+1824|0;tc=Ci+1820|0;jc=Ci+1816|0;xd=Ci+1812|0;kf=Ci+1808|0;de=Ci+1804|0;lf=Ci+1800|0;wc=Ci+1796|0;kc=Ci+1792|0;ai=Ci+1788|0;B=Ci+1784|0;Ef=Ci+1780|0;zh=Ci+1776|0;Hf=Ci+1772|0;yh=Ci+1768|0;Q=Ci+1764|0;Ma=Ci+1760|0;Ob=Ci+1756|0;gc=Ci+1752|0;Pe=Ci+1748|0;nf=Ci+1744|0;We=Ci+1740|0;of=Ci+1736|0;Rb=Ci+1732|0;hc=Ci+1728|0;Ib=Ci+1724|0;Md=Ci+1720|0;Ea=Ci+1716|0;Qd=Ci+1712|0;Ha=Ci+1708|0;Td=Ci+1704|0;hf=Ci+1700|0;$e=Ci+1696|0;bi=Ci+1692|0;Pd=Ci+1688|0;Y=Ci+1684|0;af=Ci+1680|0;Aa=Ci+1676|0;Nd=Ci+1672|0;ei=Ci+1668|0;Sd=Ci+1664|0;q=Ci+1660|0;za=Ci+1656|0;Ca=Ci+1652|0;Da=Ci+1648|0;Fa=Ci+1644|0;Ga=Ci+1640|0;Rc=Ci+1636|0;_d=Ci+1632|0;Ah=Ci+1628|0;Ch=Ci+1624|0;W=Ci+1620|0;X=Ci+1616|0;Z=Ci+1612|0;_=Ci+1608|0;ci=Ci+1604|0;di=Ci+1600|0;rg=Ci+1596|0;fi=Ci+1592|0;Ba=Ci+1588|0;Ia=Ci+1584|0;cf=Ci+1580|0;df=Ci+1576|0;Bc=Ci+1572|0;Cc=Ci+1568|0;Rd=Ci+1564|0;Ud=Ci+1560|0;ab=Ci+1556|0;bb=Ci+1552|0;ji=Ci+1548|0;hd=Ci+1544|0;ba=Ci+1540|0;fd=Ci+1536|0;ea=Ci+1532|0;id=Ci+1528|0;mi=Ci+1524|0;ed=Ci+1520|0;qi=Ci+1516|0;ad=Ci+1512|0;w=Ci+1508|0;Yd=Ci+1504|0;z=Ci+1500|0;bd=Ci+1496|0;ti=Ci+1492|0;Xd=Ci+1488|0;hi=Ci+1484|0;ii=Ci+1480|0;$=Ci+1476|0;aa=Ci+1472|0;ca=Ci+1468|0;da=Ci+1464|0;ki=Ci+1460|0;li=Ci+1456|0;oi=Ci+1452|0;pi=Ci+1448|0;u=Ci+1444|0;v=Ci+1440|0;x=Ci+1436|0;y=Ci+1432|0;ri=Ci+1428|0;si=Ci+1424|0;ni=Ci+1420|0;ui=Ci+1416|0;uf=Ci+1412|0;vf=Ci+1408|0;xf=Ci+1404|0;yf=Ci+1400|0;A=Ci+1396|0;fa=Ci+1392|0;db=Ci+1388|0;eb=Ci+1384|0;Zd=Ci+1380|0;cd=Ci+1376|0;gd=Ci+1372|0;jd=Ci+1368|0;gb=Ci+1364|0;hb=Ci+1360|0;zi=Ci+1356|0;nd=Ci+1352|0;Eh=Ci+1348|0;yd=Ci+1344|0;oa=Ci+1340|0;od=Ci+1336|0;la=Ci+1332|0;zd=Ci+1328|0;Lh=Ci+1324|0;be=Ci+1320|0;va=Ci+1316|0;vd=Ci+1312|0;Ih=Ci+1308|0;ae=Ci+1304|0;sa=Ci+1300|0;sd=Ci+1296|0;xi=Ci+1292|0;yi=Ci+1288|0;ja=Ci+1284|0;ka=Ci+1280|0;Ai=Ci+1276|0;Dh=Ci+1272|0;ma=Ci+1268|0;na=Ci+1264|0;Jh=Ci+1260|0;Kh=Ci+1256|0;td=Ci+1252|0;ta=Ci+1248|0;ua=Ci+1244|0;ud=Ci+1240|0;Gh=Ci+1236|0;Hh=Ci+1232|0;qd=Ci+1228|0;qa=Ci+1224|0;ra=Ci+1220|0;rd=Ci+1216|0;Fh=Ci+1212|0;Mh=Ci+1208|0;Jf=Ci+1204|0;Kf=Ci+1200|0;Mf=Ci+1196|0;Nf=Ci+1192|0;pa=Ci+1188|0;wa=Ci+1184|0;rc=Ci+1180|0;sc=Ci+1176|0;pd=Ci+1172|0;wd=Ci+1168|0;$d=Ci+1164|0;ce=Ci+1160|0;uc=Ci+1156|0;vc=Ci+1152|0;Qh=Ci+1148|0;fe=Ci+1144|0;Th=Ci+1140|0;Qe=Ci+1136|0;H=Ci+1132|0;ge=Ci+1128|0;E=Ci+1124|0;Re=Ci+1120|0;_h=Ci+1116|0;Ue=Ci+1112|0;O=Ci+1108|0;Ne=Ci+1104|0;Xh=Ci+1100|0;Te=Ci+1096|0;L=Ci+1092|0;Ke=Ci+1088|0;Oh=Ci+1084|0;Ph=Ci+1080|0;C=Ci+1076|0;D=Ci+1072|0;Rh=Ci+1068|0;Sh=Ci+1064|0;F=Ci+1060|0;G=Ci+1056|0;Yh=Ci+1052|0;Zh=Ci+1048|0;Le=Ci+1044|0;M=Ci+1040|0;N=Ci+1036|0;Me=Ci+1032|0;Vh=Ci+1028|0;Wh=Ci+1024|0;ie=Ci+1020|0;J=Ci+1016|0;K=Ci+1012|0;Je=Ci+1008|0;Uh=Ci+1004|0;$h=Ci+1e3|0;Cf=Ci+996|0;Df=Ci+992|0;Ff=Ci+988|0;Gf=Ci+984|0;I=Ci+980|0;P=Ci+976|0;Mb=Ci+972|0;Nb=Ci+968|0;he=Ci+964|0;Oe=Ci+960|0;Se=Ci+956|0;Ve=Ci+952|0;Pb=Ci+948|0;Qb=Ci+944|0;wi=Ci+940|0;r=Ci+936|0;Eb=Ci+932|0;Ka=Ci+928|0;Na=Ci+924|0;Oa=Ci+920|0;Db=Ci+916|0;Fb=Ci+912|0;Sa=Ci+908|0;Ya=Ci+904|0;Wa=Ci+900|0;_a=Ci+896|0;Qa=Ci+892|0;Ra=Ci+888|0;Ua=Ci+884|0;Va=Ci+880|0;Pa=Ci+876|0;Ta=Ci+872|0;Xa=Ci+868|0;Za=Ci+864|0;ha=Ci+860|0;ib=Ci+856|0;wb=Ci+852|0;sb=Ci+848|0;lb=Ci+844|0;tb=Ci+840|0;S=Ci+836|0;xb=Ci+832|0;jb=Ci+828|0;kb=Ci+824|0;ya=Ci+820|0;R=Ci+816|0;T=Ci+812|0;mb=Ci+808|0;s=Ci+804|0;U=Ci+800|0;Ab=Ci+796|0;Cb=Ci+792|0;zb=Ci+788|0;Bb=Ci+784|0;ob=Ci+780|0;qb=Ci+776|0;nb=Ci+772|0;pb=Ci+768|0;ub=Ci+764|0;yb=Ci+760|0;rb=Ci+756|0;vb=Ci+752|0;fc=Ci+748|0;Ad=Ci+744|0;Sc=Ci+740|0;Ed=Ci+736|0;mc=Ci+732|0;Fd=Ci+728|0;Vc=Ci+724|0;Bd=Ci+720|0;ec=Ci+716|0;qc=Ci+712|0;ic=Ci+708|0;lc=Ci+704|0;Tc=Ci+700|0;Uc=Ci+696|0;nc=Ci+692|0;Wc=Ci+688|0;cc=Ci+684|0;oc=Ci+680|0;Id=Ci+676|0;Kd=Ci+672|0;Hd=Ci+668|0;Jd=Ci+664|0;Yc=Ci+660|0;_c=Ci+656|0;Xc=Ci+652|0;Zc=Ci+648|0;Cd=Ci+644|0;Gd=Ci+640|0;$c=Ci+636|0;Dd=Ci+632|0;Lb=Ci+628|0;Tb=Ci+624|0;Hc=Ci+620|0;Xb=Ci+616|0;yc=Ci+612|0;Yb=Ci+608|0;Kc=Ci+604|0;Ub=Ci+600|0;Kb=Ci+596|0;Gc=Ci+592|0;Sb=Ci+588|0;xc=Ci+584|0;Ic=Ci+580|0;Jc=Ci+576|0;zc=Ci+572|0;Lc=Ci+568|0;$a=Ci+564|0;Ac=Ci+560|0;$b=Ci+556|0;bc=Ci+552|0;_b=Ci+548|0;ac=Ci+544|0;Nc=Ci+540|0;Pc=Ci+536|0;Mc=Ci+532|0;Oc=Ci+528|0;Vb=Ci+524|0;Zb=Ci+520|0;Qc=Ci+516|0;Wb=Ci+512|0;jf=Ci+508|0;dg=Ci+504|0;Yf=Ci+500|0;eg=Ci+496|0;qf=Ci+492|0;ig=Ci+488|0;Vf=Ci+484|0;hg=Ci+480|0;He=Ci+476|0;Ie=Ci+472|0;Wf=Ci+468|0;Xf=Ci+464|0;mf=Ci+460|0;pf=Ci+456|0;Tf=Ci+452|0;Uf=Ci+448|0;rf=Ci+444|0;Zf=Ci+440|0;Ge=Ci+436|0;sf=Ci+432|0;lg=Ci+428|0;ng=Ci+424|0;kg=Ci+420|0;mg=Ci+416|0;$f=Ci+412|0;bg=Ci+408|0;_f=Ci+404|0;ag=Ci+400|0;fg=Ci+396|0;jg=Ci+392|0;cg=Ci+388|0;gg=Ci+384|0;uh=Ci+380|0;Sg=Ci+376|0;Lg=Ci+372|0;Tg=Ci+368|0;Dg=Ci+364|0;Xg=Ci+360|0;Ig=Ci+356|0;Wg=Ci+352|0;sh=Ci+348|0;th=Ci+344|0;Jg=Ci+340|0;Kg=Ci+336|0;xh=Ci+332|0;Cg=Ci+328|0;Gg=Ci+324|0;Hg=Ci+320|0;Eg=Ci+316|0;Mg=Ci+312|0;rh=Ci+308|0;Fg=Ci+304|0;_g=Ci+300|0;Bh=Ci+296|0;Zg=Ci+292|0;$g=Ci+288|0;Og=Ci+284|0;Qg=Ci+280|0;Ng=Ci+276|0;Pg=Ci+272|0;Ug=Ci+268|0;Yg=Ci+264|0;Rg=Ci+260|0;Vg=Ci+256|0;md=Ci+252|0;ve=Ci+248|0;oe=Ci+244|0;we=Ci+240|0;Ye=Ci+236|0;Ae=Ci+232|0;le=Ci+228|0;ze=Ci+224|0;Wd=Ci+220|0;ld=Ci+216|0;me=Ci+212|0;ne=Ci+208|0;ee=Ci+204|0;Xe=Ci+200|0;ff=Ci+196|0;ke=Ci+192|0;Ze=Ci+188|0;pe=Ci+184|0;Ld=Ci+180|0;_e=Ci+176|0;De=Ci+172|0;Fe=Ci+168|0;Ce=Ci+164|0;Ee=Ci+160|0;re=Ci+156|0;te=Ci+152|0;qe=Ci+148|0;se=Ci+144|0;xe=Ci+140|0;Be=Ci+136|0;ue=Ci+132|0;ye=Ci+128|0;Bf=Ci+124|0;gh=Ci+120|0;Bg=Ci+116|0;hh=Ci+112|0;Qf=Ci+108|0;lh=Ci+104|0;yg=Ci+100|0;kh=Ci+96|0;tf=Ci+92|0;Af=Ci+88|0;zg=Ci+84|0;Ag=Ci+80|0;If=Ci+76|0;Pf=Ci+72|0;ug=Ci+68|0;xg=Ci+64|0;Rf=Ci+60|0;ah=Ci+56|0;og=Ci+52|0;Sf=Ci+48|0;oh=Ci+44|0;qh=Ci+40|0;nh=Ci+36|0;ph=Ci+32|0;ch=Ci+28|0;eh=Ci+24|0;bh=Ci+20|0;dh=Ci+16|0;ih=Ci+12|0;mh=Ci+8|0;fh=Ci+4|0;jh=Ci;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[Di>>2]=f;c[o>>2]=h;c[p>>2]=j;g[Ci+2012>>2]=.5555702447891235;g[Ci+2008>>2]=.8314695954322815;g[Ci+2004>>2]=.9807852506637573;g[Ci+2e3>>2]=.19509032368659973;g[Ci+1996>>2]=.9238795042037964;g[Ci+1992>>2]=.3826834261417389;g[Ci+1988>>2]=.7071067690849304;c[Bi>>2]=c[Di>>2];c[m>>2]=(c[m>>2]|0)+(((c[Di>>2]|0)-1|0)*62<<2);while(1){if((c[Bi>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[za>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[Ib>>2]=+g[q>>2]+ +g[za>>2];g[Md>>2]=+g[q>>2]-+g[za>>2];g[Ca>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[Da>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[Ea>>2]=+g[Ca>>2]-+g[Da>>2];g[Qd>>2]=+g[Ca>>2]+ +g[Da>>2];g[Fa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Ga>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[Ha>>2]=+g[Fa>>2]-+g[Ga>>2];g[Td>>2]=+g[Fa>>2]+ +g[Ga>>2];g[Rc>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2];g[_d>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[hf>>2]=+g[Rc>>2]+ +g[_d>>2];g[$e>>2]=+g[Rc>>2]-+g[_d>>2];g[Ah>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Ch>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[bi>>2]=+g[Ah>>2]+ +g[Ch>>2];g[Pd>>2]=+g[Ah>>2]-+g[Ch>>2];g[W>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[X>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2];g[Y>>2]=+g[W>>2]-+g[X>>2];g[af>>2]=+g[W>>2]+ +g[X>>2];g[Z>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[_>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[Aa>>2]=+g[Z>>2]-+g[_>>2];g[Nd>>2]=+g[Z>>2]+ +g[_>>2];g[ci>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[di>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[ei>>2]=+g[ci>>2]+ +g[di>>2];g[Sd>>2]=+g[ci>>2]-+g[di>>2];g[Od>>2]=+g[Md>>2]-+g[Nd>>2];g[pg>>2]=+g[Md>>2]+ +g[Nd>>2];g[sg>>2]=+g[af>>2]-+g[$e>>2];g[bf>>2]=+g[$e>>2]+ +g[af>>2];g[rg>>2]=+g[Ib>>2]+ +g[hf>>2];g[fi>>2]=+g[bi>>2]+ +g[ei>>2];g[gi>>2]=+g[rg>>2]+ +g[fi>>2];g[t>>2]=+g[rg>>2]-+g[fi>>2];g[cf>>2]=+g[Pd>>2]+ +g[Qd>>2];g[df>>2]=+g[Sd>>2]+ +g[Td>>2];g[ef>>2]=(+g[cf>>2]-+g[df>>2])*.7071067690849304;g[qg>>2]=(+g[cf>>2]+ +g[df>>2])*.7071067690849304;g[Bc>>2]=+g[Y>>2]-+g[Aa>>2];g[Cc>>2]=+g[bi>>2]-+g[ei>>2];g[Dc>>2]=+g[Bc>>2]-+g[Cc>>2];g[pc>>2]=+g[Cc>>2]+ +g[Bc>>2];g[Ba>>2]=+g[Y>>2]+ +g[Aa>>2];g[Ia>>2]=+g[Ea>>2]+ +g[Ha>>2];g[Ja>>2]=+g[Ba>>2]-+g[Ia>>2];g[Gb>>2]=+g[Ba>>2]+ +g[Ia>>2];g[Rd>>2]=+g[Pd>>2]-+g[Qd>>2];g[Ud>>2]=+g[Sd>>2]-+g[Td>>2];g[Vd>>2]=(+g[Rd>>2]+ +g[Ud>>2])*.7071067690849304;g[tg>>2]=(+g[Rd>>2]-+g[Ud>>2])*.7071067690849304;g[ab>>2]=+g[Ib>>2]-+g[hf>>2];g[bb>>2]=+g[Ha>>2]-+g[Ea>>2];g[cb>>2]=+g[ab>>2]-+g[bb>>2];g[dc>>2]=+g[ab>>2]+ +g[bb>>2];g[hi>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[ii>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[ji>>2]=+g[hi>>2]+ +g[ii>>2];g[hd>>2]=+g[hi>>2]-+g[ii>>2];g[$>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[aa>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[ba>>2]=+g[$>>2]-+g[aa>>2];g[fd>>2]=+g[$>>2]+ +g[aa>>2];g[ca>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[da>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[ea>>2]=+g[ca>>2]-+g[da>>2];g[id>>2]=+g[ca>>2]+ +g[da>>2];g[ki>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[li>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[mi>>2]=+g[ki>>2]+ +g[li>>2];g[ed>>2]=+g[ki>>2]-+g[li>>2];g[oi>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[pi>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[qi>>2]=+g[oi>>2]+ +g[pi>>2];g[ad>>2]=+g[oi>>2]-+g[pi>>2];g[u>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[v>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[w>>2]=+g[u>>2]-+g[v>>2];g[Yd>>2]=+g[u>>2]+ +g[v>>2];g[x>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[y>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[bd>>2]=+g[x>>2]+ +g[y>>2];g[ri>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[si>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[ti>>2]=+g[ri>>2]+ +g[si>>2];g[Xd>>2]=+g[ri>>2]-+g[si>>2];g[ni>>2]=+g[ji>>2]+ +g[mi>>2];g[ui>>2]=+g[qi>>2]+ +g[ti>>2];g[vi>>2]=+g[ni>>2]+ +g[ui>>2];g[V>>2]=+g[ni>>2]-+g[ui>>2];g[uf>>2]=+g[hd>>2]+ +g[id>>2];g[vf>>2]=+g[fd>>2]-+g[ed>>2];g[wf>>2]=+g[uf>>2]*.3826834261417389-+g[vf>>2]*.9238795042037964;g[vg>>2]=+g[vf>>2]*.3826834261417389+ +g[uf>>2]*.9238795042037964;g[xf>>2]=+g[ad>>2]+ +g[bd>>2];g[yf>>2]=+g[Xd>>2]+ +g[Yd>>2];g[zf>>2]=+g[xf>>2]*.3826834261417389-+g[yf>>2]*.9238795042037964;g[wg>>2]=+g[yf>>2]*.3826834261417389+ +g[xf>>2]*.9238795042037964;g[A>>2]=+g[w>>2]+ +g[z>>2];g[fa>>2]=+g[ba>>2]+ +g[ea>>2];g[ga>>2]=+g[A>>2]-+g[fa>>2];g[Hb>>2]=+g[fa>>2]+ +g[A>>2];g[db>>2]=+g[w>>2]-+g[z>>2];g[eb>>2]=+g[qi>>2]-+g[ti>>2];g[fb>>2]=+g[db>>2]-+g[eb>>2];g[Fc>>2]=+g[eb>>2]+ +g[db>>2];g[Zd>>2]=+g[Xd>>2]-+g[Yd>>2];g[cd>>2]=+g[ad>>2]-+g[bd>>2];g[dd>>2]=+g[Zd>>2]*.9238795042037964-+g[cd>>2]*.3826834261417389;g[je>>2]=+g[Zd>>2]*.3826834261417389+ +g[cd>>2]*.9238795042037964;g[gd>>2]=+g[ed>>2]+ +g[fd>>2];g[jd>>2]=+g[hd>>2]-+g[id>>2];g[kd>>2]=+g[gd>>2]*.9238795042037964+ +g[jd>>2]*.3826834261417389;g[gf>>2]=+g[jd>>2]*.9238795042037964-+g[gd>>2]*.3826834261417389;g[gb>>2]=+g[ji>>2]-+g[mi>>2];g[hb>>2]=+g[ba>>2]-+g[ea>>2];g[Jb>>2]=+g[gb>>2]+ +g[hb>>2];g[Ec>>2]=+g[gb>>2]-+g[hb>>2];g[xi>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[yi>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2];g[zi>>2]=+g[xi>>2]+ +g[yi>>2];g[nd>>2]=+g[xi>>2]-+g[yi>>2];g[Ai>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2];g[Dh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2];g[Eh>>2]=+g[Ai>>2]+ +g[Dh>>2];g[yd>>2]=+g[Ai>>2]-+g[Dh>>2];g[ma>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2];g[na>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2];g[oa>>2]=+g[ma>>2]-+g[na>>2];g[od>>2]=+g[ma>>2]+ +g[na>>2];g[ja>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2];g[ka>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2];g[la>>2]=+g[ja>>2]-+g[ka>>2];g[zd>>2]=+g[ja>>2]+ +g[ka>>2];g[Jh>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Kh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2];g[td>>2]=+g[Jh>>2]-+g[Kh>>2];g[ta>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2];g[ua>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2];g[ud>>2]=+g[ta>>2]+ +g[ua>>2];g[Lh>>2]=+g[Jh>>2]+ +g[Kh>>2];g[be>>2]=+g[td>>2]+ +g[ud>>2];g[va>>2]=+g[ta>>2]-+g[ua>>2];g[vd>>2]=+g[td>>2]-+g[ud>>2];g[Gh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2];g[Hh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2];g[qd>>2]=+g[Gh>>2]-+g[Hh>>2];g[qa>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2];g[ra>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2];g[rd>>2]=+g[qa>>2]+ +g[ra>>2];g[Ih>>2]=+g[Gh>>2]+ +g[Hh>>2];g[ae>>2]=+g[qd>>2]+ +g[rd>>2];g[sa>>2]=+g[qa>>2]-+g[ra>>2];g[sd>>2]=+g[qd>>2]-+g[rd>>2];g[Fh>>2]=+g[zi>>2]+ +g[Eh>>2];g[Mh>>2]=+g[Ih>>2]+ +g[Lh>>2];g[Nh>>2]=+g[Fh>>2]+ +g[Mh>>2];g[ia>>2]=+g[Fh>>2]-+g[Mh>>2];g[Jf>>2]=+g[zd>>2]-+g[yd>>2];g[Kf>>2]=(+g[sd>>2]-+g[vd>>2])*.7071067690849304;g[Lf>>2]=+g[Jf>>2]+ +g[Kf>>2];g[wh>>2]=+g[Jf>>2]-+g[Kf>>2];g[Mf>>2]=+g[nd>>2]+ +g[od>>2];g[Nf>>2]=(+g[ae>>2]+ +g[be>>2])*.7071067690849304;g[Of>>2]=+g[Mf>>2]-+g[Nf>>2];g[vh>>2]=+g[Mf>>2]+ +g[Nf>>2];g[pa>>2]=+g[la>>2]+ +g[oa>>2];g[wa>>2]=+g[sa>>2]+ +g[va>>2];g[xa>>2]=+g[pa>>2]-+g[wa>>2];g[La>>2]=+g[pa>>2]+ +g[wa>>2];g[rc>>2]=+g[la>>2]-+g[oa>>2];g[sc>>2]=+g[Ih>>2]-+g[Lh>>2];g[tc>>2]=+g[rc>>2]-+g[sc>>2];g[jc>>2]=+g[sc>>2]+ +g[rc>>2];g[pd>>2]=+g[nd>>2]-+g[od>>2];g[wd>>2]=(+g[sd>>2]+ +g[vd>>2])*.7071067690849304;g[xd>>2]=+g[pd>>2]-+g[wd>>2];g[kf>>2]=+g[pd>>2]+ +g[wd>>2];g[$d>>2]=+g[yd>>2]+ +g[zd>>2];g[ce>>2]=(+g[ae>>2]-+g[be>>2])*.7071067690849304;g[de>>2]=+g[$d>>2]-+g[ce>>2];g[lf>>2]=+g[$d>>2]+ +g[ce>>2];g[uc>>2]=+g[zi>>2]-+g[Eh>>2];g[vc>>2]=+g[va>>2]-+g[sa>>2];g[wc>>2]=+g[uc>>2]-+g[vc>>2];g[kc>>2]=+g[uc>>2]+ +g[vc>>2];g[Oh>>2]=+g[c[l>>2]>>2];g[Ph>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2];g[Qh>>2]=+g[Oh>>2]+ +g[Ph>>2];g[fe>>2]=+g[Oh>>2]-+g[Ph>>2];g[Rh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2];g[Sh>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2];g[Th>>2]=+g[Rh>>2]+ +g[Sh>>2];g[Qe>>2]=+g[Rh>>2]-+g[Sh>>2];g[F>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2];g[G>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2];g[H>>2]=+g[F>>2]-+g[G>>2];g[ge>>2]=+g[F>>2]+ +g[G>>2];g[C>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2];g[D>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2];g[E>>2]=+g[C>>2]-+g[D>>2];g[Re>>2]=+g[C>>2]+ +g[D>>2];g[Yh>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[Zh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2];g[Le>>2]=+g[Yh>>2]-+g[Zh>>2];g[M>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2];g[N>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2];g[Me>>2]=+g[M>>2]+ +g[N>>2];g[_h>>2]=+g[Yh>>2]+ +g[Zh>>2];g[Ue>>2]=+g[Le>>2]+ +g[Me>>2];g[O>>2]=+g[M>>2]-+g[N>>2];g[Ne>>2]=+g[Le>>2]-+g[Me>>2];g[Vh>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[Wh>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2];g[ie>>2]=+g[Vh>>2]-+g[Wh>>2];g[J>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2];g[K>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2];g[Je>>2]=+g[J>>2]+ +g[K>>2];g[Xh>>2]=+g[Vh>>2]+ +g[Wh>>2];g[Te>>2]=+g[ie>>2]+ +g[Je>>2];g[L>>2]=+g[J>>2]-+g[K>>2];g[Ke>>2]=+g[ie>>2]-+g[Je>>2];g[Uh>>2]=+g[Qh>>2]+ +g[Th>>2];g[$h>>2]=+g[Xh>>2]+ +g[_h>>2];g[ai>>2]=+g[Uh>>2]+ +g[$h>>2];g[B>>2]=+g[Uh>>2]-+g[$h>>2];g[Cf>>2]=(+g[Ke>>2]-+g[Ne>>2])*.7071067690849304;g[Df>>2]=+g[Qe>>2]+ +g[Re>>2];g[Ef>>2]=+g[Cf>>2]-+g[Df>>2];g[zh>>2]=+g[Df>>2]+ +g[Cf>>2];g[Ff>>2]=+g[fe>>2]+ +g[ge>>2];g[Gf>>2]=(+g[Te>>2]+ +g[Ue>>2])*.7071067690849304;g[Hf>>2]=+g[Ff>>2]-+g[Gf>>2];g[yh>>2]=+g[Ff>>2]+ +g[Gf>>2];g[I>>2]=+g[E>>2]+ +g[H>>2];g[P>>2]=+g[L>>2]+ +g[O>>2];g[Q>>2]=+g[I>>2]-+g[P>>2];g[Ma>>2]=+g[I>>2]+ +g[P>>2];g[Mb>>2]=+g[E>>2]-+g[H>>2];g[Nb>>2]=+g[Xh>>2]-+g[_h>>2];g[Ob>>2]=+g[Mb>>2]-+g[Nb>>2];g[gc>>2]=+g[Nb>>2]+ +g[Mb>>2];g[he>>2]=+g[fe>>2]-+g[ge>>2];g[Oe>>2]=(+g[Ke>>2]+ +g[Ne>>2])*.7071067690849304;g[Pe>>2]=+g[he>>2]-+g[Oe>>2];g[nf>>2]=+g[he>>2]+ +g[Oe>>2];g[Se>>2]=+g[Qe>>2]-+g[Re>>2];g[Ve>>2]=(+g[Te>>2]-+g[Ue>>2])*.7071067690849304;g[We>>2]=+g[Se>>2]-+g[Ve>>2];g[of>>2]=+g[Se>>2]+ +g[Ve>>2];g[Pb>>2]=+g[Qh>>2]-+g[Th>>2];g[Qb>>2]=+g[O>>2]-+g[L>>2];g[Rb>>2]=+g[Pb>>2]-+g[Qb>>2];g[hc>>2]=+g[Pb>>2]+ +g[Qb>>2];g[wi>>2]=+g[gi>>2]+ +g[vi>>2];g[r>>2]=+g[Nh>>2]+ +g[ai>>2];g[Eb>>2]=+g[wi>>2]-+g[r>>2];g[Ka>>2]=+g[Gb>>2]+ +g[Hb>>2];g[Na>>2]=+g[La>>2]+ +g[Ma>>2];g[Oa>>2]=+g[Ka>>2]-+g[Na>>2];g[c[k>>2]>>2]=+g[wi>>2]+ +g[r>>2];g[c[l>>2]>>2]=+g[Ka>>2]+ +g[Na>>2];g[Db>>2]=+g[(c[m>>2]|0)+120>>2];g[Fb>>2]=+g[(c[m>>2]|0)+124>>2];g[(c[k>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Db>>2]*+g[Eb>>2]-+g[Fb>>2]*+g[Oa>>2];g[(c[l>>2]|0)+(c[n>>2]<<4<<2)>>2]=+g[Fb>>2]*+g[Eb>>2]+ +g[Db>>2]*+g[Oa>>2];g[Qa>>2]=+g[gi>>2]-+g[vi>>2];g[Ra>>2]=+g[Ma>>2]-+g[La>>2];g[Sa>>2]=+g[Qa>>2]-+g[Ra>>2];g[Ya>>2]=+g[Qa>>2]+ +g[Ra>>2];g[Ua>>2]=+g[Gb>>2]-+g[Hb>>2];g[Va>>2]=+g[Nh>>2]-+g[ai>>2];g[Wa>>2]=+g[Ua>>2]-+g[Va>>2];g[_a>>2]=+g[Va>>2]+ +g[Ua>>2];g[Pa>>2]=+g[(c[m>>2]|0)+184>>2];g[Ta>>2]=+g[(c[m>>2]|0)+188>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Pa>>2]*+g[Sa>>2]-+g[Ta>>2]*+g[Wa>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*24<<2)>>2]=+g[Pa>>2]*+g[Wa>>2]+ +g[Ta>>2]*+g[Sa>>2];g[Xa>>2]=+g[(c[m>>2]|0)+56>>2];g[Za>>2]=+g[(c[m>>2]|0)+60>>2];g[(c[k>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Xa>>2]*+g[Ya>>2]-+g[Za>>2]*+g[_a>>2];g[(c[l>>2]|0)+(c[n>>2]<<3<<2)>>2]=+g[Xa>>2]*+g[_a>>2]+ +g[Za>>2]*+g[Ya>>2];g[ha>>2]=+g[t>>2]+ +g[ga>>2];g[ib>>2]=+g[V>>2]+ +g[Ja>>2];g[wb>>2]=+g[Ja>>2]-+g[V>>2];g[sb>>2]=+g[t>>2]-+g[ga>>2];g[jb>>2]=+g[ia>>2]+ +g[xa>>2];g[kb>>2]=+g[Q>>2]-+g[B>>2];g[lb>>2]=(+g[jb>>2]+ +g[kb>>2])*.7071067690849304;g[tb>>2]=(+g[kb>>2]-+g[jb>>2])*.7071067690849304;g[ya>>2]=+g[ia>>2]-+g[xa>>2];g[R>>2]=+g[B>>2]+ +g[Q>>2];g[S>>2]=(+g[ya>>2]+ +g[R>>2])*.7071067690849304;g[xb>>2]=(+g[ya>>2]-+g[R>>2])*.7071067690849304;g[T>>2]=+g[ha>>2]-+g[S>>2];g[mb>>2]=+g[ib>>2]-+g[lb>>2];g[s>>2]=+g[(c[m>>2]|0)+152>>2];g[U>>2]=+g[(c[m>>2]|0)+156>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[s>>2]*+g[T>>2]-+g[U>>2]*+g[mb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*20<<2)>>2]=+g[U>>2]*+g[T>>2]+ +g[s>>2]*+g[mb>>2];g[Ab>>2]=+g[sb>>2]+ +g[tb>>2];g[Cb>>2]=+g[wb>>2]+ +g[xb>>2];g[zb>>2]=+g[(c[m>>2]|0)+88>>2];g[Bb>>2]=+g[(c[m>>2]|0)+92>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[zb>>2]*+g[Ab>>2]-+g[Bb>>2]*+g[Cb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*12<<2)>>2]=+g[zb>>2]*+g[Cb>>2]+ +g[Bb>>2]*+g[Ab>>2];g[ob>>2]=+g[ha>>2]+ +g[S>>2];g[qb>>2]=+g[ib>>2]+ +g[lb>>2];g[nb>>2]=+g[(c[m>>2]|0)+24>>2];g[pb>>2]=+g[(c[m>>2]|0)+28>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[nb>>2]*+g[ob>>2]-+g[pb>>2]*+g[qb>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[pb>>2]*+g[ob>>2]+ +g[nb>>2]*+g[qb>>2];g[ub>>2]=+g[sb>>2]-+g[tb>>2];g[yb>>2]=+g[wb>>2]-+g[xb>>2];g[rb>>2]=+g[(c[m>>2]|0)+216>>2];g[vb>>2]=+g[(c[m>>2]|0)+220>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[rb>>2]*+g[ub>>2]-+g[vb>>2]*+g[yb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*28<<2)>>2]=+g[rb>>2]*+g[yb>>2]+ +g[vb>>2]*+g[ub>>2];g[ec>>2]=(+g[Ec>>2]+ +g[Fc>>2])*.7071067690849304;g[fc>>2]=+g[dc>>2]-+g[ec>>2];g[Ad>>2]=+g[dc>>2]+ +g[ec>>2];g[qc>>2]=(+g[Jb>>2]+ +g[fb>>2])*.7071067690849304;g[Sc>>2]=+g[pc>>2]-+g[qc>>2];g[Ed>>2]=+g[pc>>2]+ +g[qc>>2];g[ic>>2]=+g[gc>>2]*.9238795042037964-+g[hc>>2]*.3826834261417389;g[lc>>2]=+g[jc>>2]*.9238795042037964+ +g[kc>>2]*.3826834261417389;g[mc>>2]=+g[ic>>2]-+g[lc>>2];g[Fd>>2]=+g[lc>>2]+ +g[ic>>2];g[Tc>>2]=+g[kc>>2]*.9238795042037964-+g[jc>>2]*.3826834261417389;g[Uc>>2]=+g[gc>>2]*.3826834261417389+ +g[hc>>2]*.9238795042037964;g[Vc>>2]=+g[Tc>>2]-+g[Uc>>2];g[Bd>>2]=+g[Tc>>2]+ +g[Uc>>2];g[nc>>2]=+g[fc>>2]-+g[mc>>2];g[Wc>>2]=+g[Sc>>2]-+g[Vc>>2];g[cc>>2]=+g[(c[m>>2]|0)+200>>2];g[oc>>2]=+g[(c[m>>2]|0)+204>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[cc>>2]*+g[nc>>2]-+g[oc>>2]*+g[Wc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*26<<2)>>2]=+g[oc>>2]*+g[nc>>2]+ +g[cc>>2]*+g[Wc>>2];g[Id>>2]=+g[Ad>>2]+ +g[Bd>>2];g[Kd>>2]=+g[Ed>>2]+ +g[Fd>>2];g[Hd>>2]=+g[(c[m>>2]|0)+8>>2];g[Jd>>2]=+g[(c[m>>2]|0)+12>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Hd>>2]*+g[Id>>2]-+g[Jd>>2]*+g[Kd>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[Hd>>2]*+g[Kd>>2]+ +g[Jd>>2]*+g[Id>>2];g[Yc>>2]=+g[fc>>2]+ +g[mc>>2];g[_c>>2]=+g[Sc>>2]+ +g[Vc>>2];g[Xc>>2]=+g[(c[m>>2]|0)+72>>2];g[Zc>>2]=+g[(c[m>>2]|0)+76>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Xc>>2]*+g[Yc>>2]-+g[Zc>>2]*+g[_c>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*10<<2)>>2]=+g[Zc>>2]*+g[Yc>>2]+ +g[Xc>>2]*+g[_c>>2];g[Cd>>2]=+g[Ad>>2]-+g[Bd>>2];g[Gd>>2]=+g[Ed>>2]-+g[Fd>>2];g[$c>>2]=+g[(c[m>>2]|0)+136>>2];g[Dd>>2]=+g[(c[m>>2]|0)+140>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[$c>>2]*+g[Cd>>2]-+g[Dd>>2]*+g[Gd>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*18<<2)>>2]=+g[$c>>2]*+g[Gd>>2]+ +g[Dd>>2]*+g[Cd>>2];g[Kb>>2]=(+g[fb>>2]-+g[Jb>>2])*.7071067690849304;g[Lb>>2]=+g[cb>>2]-+g[Kb>>2];g[Tb>>2]=+g[cb>>2]+ +g[Kb>>2];g[Gc>>2]=(+g[Ec>>2]-+g[Fc>>2])*.7071067690849304;g[Hc>>2]=+g[Dc>>2]-+g[Gc>>2];g[Xb>>2]=+g[Dc>>2]+ +g[Gc>>2];g[Sb>>2]=+g[Ob>>2]*.3826834261417389-+g[Rb>>2]*.9238795042037964;g[xc>>2]=+g[tc>>2]*.3826834261417389+ +g[wc>>2]*.9238795042037964;g[yc>>2]=+g[Sb>>2]-+g[xc>>2];g[Yb>>2]=+g[xc>>2]+ +g[Sb>>2];g[Ic>>2]=+g[wc>>2]*.3826834261417389-+g[tc>>2]*.9238795042037964;g[Jc>>2]=+g[Ob>>2]*.9238795042037964+ +g[Rb>>2]*.3826834261417389;g[Kc>>2]=+g[Ic>>2]-+g[Jc>>2];g[Ub>>2]=+g[Ic>>2]+ +g[Jc>>2];g[zc>>2]=+g[Lb>>2]-+g[yc>>2];g[Lc>>2]=+g[Hc>>2]-+g[Kc>>2];g[$a>>2]=+g[(c[m>>2]|0)+232>>2];g[Ac>>2]=+g[(c[m>>2]|0)+236>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[$a>>2]*+g[zc>>2]-+g[Ac>>2]*+g[Lc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*30<<2)>>2]=+g[Ac>>2]*+g[zc>>2]+ +g[$a>>2]*+g[Lc>>2];g[$b>>2]=+g[Tb>>2]+ +g[Ub>>2];g[bc>>2]=+g[Xb>>2]+ +g[Yb>>2];g[_b>>2]=+g[(c[m>>2]|0)+40>>2];g[ac>>2]=+g[(c[m>>2]|0)+44>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[_b>>2]*+g[$b>>2]-+g[ac>>2]*+g[bc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*6<<2)>>2]=+g[_b>>2]*+g[bc>>2]+ +g[ac>>2]*+g[$b>>2];g[Nc>>2]=+g[Lb>>2]+ +g[yc>>2];g[Pc>>2]=+g[Hc>>2]+ +g[Kc>>2];g[Mc>>2]=+g[(c[m>>2]|0)+104>>2];g[Oc>>2]=+g[(c[m>>2]|0)+108>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Mc>>2]*+g[Nc>>2]-+g[Oc>>2]*+g[Pc>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*14<<2)>>2]=+g[Oc>>2]*+g[Nc>>2]+ +g[Mc>>2]*+g[Pc>>2];g[Vb>>2]=+g[Tb>>2]-+g[Ub>>2];g[Zb>>2]=+g[Xb>>2]-+g[Yb>>2];g[Qc>>2]=+g[(c[m>>2]|0)+168>>2];g[Wb>>2]=+g[(c[m>>2]|0)+172>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[Qc>>2]*+g[Vb>>2]-+g[Wb>>2]*+g[Zb>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*22<<2)>>2]=+g[Qc>>2]*+g[Zb>>2]+ +g[Wb>>2]*+g[Vb>>2];g[He>>2]=+g[Od>>2]+ +g[Vd>>2];g[Ie>>2]=+g[gf>>2]+ +g[je>>2];g[jf>>2]=+g[He>>2]+ +g[Ie>>2];g[dg>>2]=+g[He>>2]-+g[Ie>>2];g[Wf>>2]=+g[kf>>2]*.19509032368659973+ +g[lf>>2]*.9807852506637573;g[Xf>>2]=+g[of>>2]*.9807852506637573-+g[nf>>2]*.19509032368659973;g[Yf>>2]=+g[Wf>>2]+ +g[Xf>>2];g[eg>>2]=+g[Xf>>2]-+g[Wf>>2];g[mf>>2]=+g[kf>>2]*.9807852506637573-+g[lf>>2]*.19509032368659973;g[pf>>2]=+g[nf>>2]*.9807852506637573+ +g[of>>2]*.19509032368659973;g[qf>>2]=+g[mf>>2]+ +g[pf>>2];g[ig>>2]=+g[mf>>2]-+g[pf>>2];g[Tf>>2]=+g[bf>>2]+ +g[ef>>2];g[Uf>>2]=+g[kd>>2]+ +g[dd>>2];g[Vf>>2]=+g[Tf>>2]+ +g[Uf>>2];g[hg>>2]=+g[Tf>>2]-+g[Uf>>2];g[rf>>2]=+g[jf>>2]-+g[qf>>2];g[Zf>>2]=+g[Vf>>2]-+g[Yf>>2];g[Ge>>2]=+g[(c[m>>2]|0)+128>>2];g[sf>>2]=+g[(c[m>>2]|0)+132>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[Ge>>2]*+g[rf>>2]-+g[sf>>2]*+g[Zf>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*17<<2)>>2]=+g[sf>>2]*+g[rf>>2]+ +g[Ge>>2]*+g[Zf>>2];g[lg>>2]=+g[dg>>2]+ +g[eg>>2];g[ng>>2]=+g[hg>>2]+ +g[ig>>2];g[kg>>2]=+g[(c[m>>2]|0)+64>>2];g[mg>>2]=+g[(c[m>>2]|0)+68>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[kg>>2]*+g[lg>>2]-+g[mg>>2]*+g[ng>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*9<<2)>>2]=+g[kg>>2]*+g[ng>>2]+ +g[mg>>2]*+g[lg>>2];g[$f>>2]=+g[jf>>2]+ +g[qf>>2];g[bg>>2]=+g[Vf>>2]+ +g[Yf>>2];g[_f>>2]=+g[c[m>>2]>>2];g[ag>>2]=+g[(c[m>>2]|0)+4>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[_f>>2]*+g[$f>>2]-+g[ag>>2]*+g[bg>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[ag>>2]*+g[$f>>2]+ +g[_f>>2]*+g[bg>>2];g[fg>>2]=+g[dg>>2]-+g[eg>>2];g[jg>>2]=+g[hg>>2]-+g[ig>>2];g[cg>>2]=+g[(c[m>>2]|0)+192>>2];g[gg>>2]=+g[(c[m>>2]|0)+196>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[cg>>2]*+g[fg>>2]-+g[gg>>2]*+g[jg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*25<<2)>>2]=+g[cg>>2]*+g[jg>>2]+ +g[gg>>2]*+g[fg>>2];g[sh>>2]=+g[pg>>2]+ +g[qg>>2];g[th>>2]=+g[vg>>2]+ +g[wg>>2];g[uh>>2]=+g[sh>>2]-+g[th>>2];g[Sg>>2]=+g[sh>>2]+ +g[th>>2];g[Jg>>2]=+g[wh>>2]*.19509032368659973+ +g[vh>>2]*.9807852506637573;g[Kg>>2]=+g[zh>>2]*.19509032368659973+ +g[yh>>2]*.9807852506637573;g[Lg>>2]=+g[Jg>>2]-+g[Kg>>2];g[Tg>>2]=+g[Jg>>2]+ +g[Kg>>2];g[xh>>2]=+g[vh>>2]*.19509032368659973-+g[wh>>2]*.9807852506637573;g[Cg>>2]=+g[yh>>2]*.19509032368659973-+g[zh>>2]*.9807852506637573;g[Dg>>2]=+g[xh>>2]+ +g[Cg>>2];g[Xg>>2]=+g[xh>>2]-+g[Cg>>2];g[Gg>>2]=+g[sg>>2]-+g[tg>>2];g[Hg>>2]=+g[wf>>2]-+g[zf>>2];g[Ig>>2]=+g[Gg>>2]+ +g[Hg>>2];g[Wg>>2]=+g[Gg>>2]-+g[Hg>>2];g[Eg>>2]=+g[uh>>2]-+g[Dg>>2];g[Mg>>2]=+g[Ig>>2]-+g[Lg>>2];g[rh>>2]=+g[(c[m>>2]|0)+176>>2];g[Fg>>2]=+g[(c[m>>2]|0)+180>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[rh>>2]*+g[Eg>>2]-+g[Fg>>2]*+g[Mg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*23<<2)>>2]=+g[Fg>>2]*+g[Eg>>2]+ +g[rh>>2]*+g[Mg>>2];g[_g>>2]=+g[Sg>>2]+ +g[Tg>>2];g[Bh>>2]=+g[Wg>>2]-+g[Xg>>2];g[Zg>>2]=+g[(c[m>>2]|0)+240>>2];g[$g>>2]=+g[(c[m>>2]|0)+244>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[Zg>>2]*+g[_g>>2]-+g[$g>>2]*+g[Bh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*31<<2)>>2]=+g[Zg>>2]*+g[Bh>>2]+ +g[$g>>2]*+g[_g>>2];g[Og>>2]=+g[uh>>2]+ +g[Dg>>2];g[Qg>>2]=+g[Ig>>2]+ +g[Lg>>2];g[Ng>>2]=+g[(c[m>>2]|0)+48>>2];g[Pg>>2]=+g[(c[m>>2]|0)+52>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Ng>>2]*+g[Og>>2]-+g[Pg>>2]*+g[Qg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*7<<2)>>2]=+g[Pg>>2]*+g[Og>>2]+ +g[Ng>>2]*+g[Qg>>2];g[Ug>>2]=+g[Sg>>2]-+g[Tg>>2];g[Yg>>2]=+g[Wg>>2]+ +g[Xg>>2];g[Rg>>2]=+g[(c[m>>2]|0)+112>>2];g[Vg>>2]=+g[(c[m>>2]|0)+116>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Rg>>2]*+g[Ug>>2]-+g[Vg>>2]*+g[Yg>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*15<<2)>>2]=+g[Rg>>2]*+g[Yg>>2]+ +g[Vg>>2]*+g[Ug>>2];g[Wd>>2]=+g[Od>>2]-+g[Vd>>2];g[ld>>2]=+g[dd>>2]-+g[kd>>2];g[md>>2]=+g[Wd>>2]+ +g[ld>>2];g[ve>>2]=+g[Wd>>2]-+g[ld>>2];g[me>>2]=+g[xd>>2]*.8314695954322815+ +g[de>>2]*.5555702447891235;g[ne>>2]=+g[We>>2]*.5555702447891235-+g[Pe>>2]*.8314695954322815;g[oe>>2]=+g[me>>2]+ +g[ne>>2];g[we>>2]=+g[ne>>2]-+g[me>>2];g[ee>>2]=+g[xd>>2]*.5555702447891235-+g[de>>2]*.8314695954322815;g[Xe>>2]=+g[Pe>>2]*.5555702447891235+ +g[We>>2]*.8314695954322815;g[Ye>>2]=+g[ee>>2]+ +g[Xe>>2];g[Ae>>2]=+g[ee>>2]-+g[Xe>>2];g[ff>>2]=+g[bf>>2]-+g[ef>>2];g[ke>>2]=+g[gf>>2]-+g[je>>2];g[le>>2]=+g[ff>>2]+ +g[ke>>2];g[ze>>2]=+g[ff>>2]-+g[ke>>2];g[Ze>>2]=+g[md>>2]-+g[Ye>>2];g[pe>>2]=+g[le>>2]-+g[oe>>2];g[Ld>>2]=+g[(c[m>>2]|0)+160>>2];g[_e>>2]=+g[(c[m>>2]|0)+164>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[Ld>>2]*+g[Ze>>2]-+g[_e>>2]*+g[pe>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*21<<2)>>2]=+g[_e>>2]*+g[Ze>>2]+ +g[Ld>>2]*+g[pe>>2];g[De>>2]=+g[ve>>2]+ +g[we>>2];g[Fe>>2]=+g[ze>>2]+ +g[Ae>>2];g[Ce>>2]=+g[(c[m>>2]|0)+96>>2];g[Ee>>2]=+g[(c[m>>2]|0)+100>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Ce>>2]*+g[De>>2]-+g[Ee>>2]*+g[Fe>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*13<<2)>>2]=+g[Ce>>2]*+g[Fe>>2]+ +g[Ee>>2]*+g[De>>2];g[re>>2]=+g[md>>2]+ +g[Ye>>2];g[te>>2]=+g[le>>2]+ +g[oe>>2];g[qe>>2]=+g[(c[m>>2]|0)+32>>2];g[se>>2]=+g[(c[m>>2]|0)+36>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[qe>>2]*+g[re>>2]-+g[se>>2]*+g[te>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*5<<2)>>2]=+g[se>>2]*+g[re>>2]+ +g[qe>>2]*+g[te>>2];g[xe>>2]=+g[ve>>2]-+g[we>>2];g[Be>>2]=+g[ze>>2]-+g[Ae>>2];g[ue>>2]=+g[(c[m>>2]|0)+224>>2];g[ye>>2]=+g[(c[m>>2]|0)+228>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[ue>>2]*+g[xe>>2]-+g[ye>>2]*+g[Be>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*29<<2)>>2]=+g[ue>>2]*+g[Be>>2]+ +g[ye>>2]*+g[xe>>2];g[tf>>2]=+g[pg>>2]-+g[qg>>2];g[Af>>2]=+g[wf>>2]+ +g[zf>>2];g[Bf>>2]=+g[tf>>2]-+g[Af>>2];g[gh>>2]=+g[tf>>2]+ +g[Af>>2];g[zg>>2]=+g[Of>>2]*.8314695954322815-+g[Lf>>2]*.5555702447891235;g[Ag>>2]=+g[Ef>>2]*.5555702447891235+ +g[Hf>>2]*.8314695954322815;g[Bg>>2]=+g[zg>>2]-+g[Ag>>2];g[hh>>2]=+g[zg>>2]+ +g[Ag>>2];g[If>>2]=+g[Ef>>2]*.8314695954322815-+g[Hf>>2]*.5555702447891235;g[Pf>>2]=+g[Lf>>2]*.8314695954322815+ +g[Of>>2]*.5555702447891235;g[Qf>>2]=+g[If>>2]-+g[Pf>>2];g[lh>>2]=+g[Pf>>2]+ +g[If>>2];g[ug>>2]=+g[sg>>2]+ +g[tg>>2];g[xg>>2]=+g[vg>>2]-+g[wg>>2];g[yg>>2]=+g[ug>>2]-+g[xg>>2];g[kh>>2]=+g[ug>>2]+ +g[xg>>2];g[Rf>>2]=+g[Bf>>2]-+g[Qf>>2];g[ah>>2]=+g[yg>>2]-+g[Bg>>2];g[og>>2]=+g[(c[m>>2]|0)+208>>2];g[Sf>>2]=+g[(c[m>>2]|0)+212>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[og>>2]*+g[Rf>>2]-+g[Sf>>2]*+g[ah>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*27<<2)>>2]=+g[Sf>>2]*+g[Rf>>2]+ +g[og>>2]*+g[ah>>2];g[oh>>2]=+g[gh>>2]+ +g[hh>>2];g[qh>>2]=+g[kh>>2]+ +g[lh>>2];g[nh>>2]=+g[(c[m>>2]|0)+16>>2];g[ph>>2]=+g[(c[m>>2]|0)+20>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[nh>>2]*+g[oh>>2]-+g[ph>>2]*+g[qh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[nh>>2]*+g[qh>>2]+ +g[ph>>2]*+g[oh>>2];g[ch>>2]=+g[Bf>>2]+ +g[Qf>>2];g[eh>>2]=+g[yg>>2]+ +g[Bg>>2];g[bh>>2]=+g[(c[m>>2]|0)+80>>2];g[dh>>2]=+g[(c[m>>2]|0)+84>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[bh>>2]*+g[ch>>2]-+g[dh>>2]*+g[eh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*11<<2)>>2]=+g[dh>>2]*+g[ch>>2]+ +g[bh>>2]*+g[eh>>2];g[ih>>2]=+g[gh>>2]-+g[hh>>2];g[mh>>2]=+g[kh>>2]-+g[lh>>2];g[fh>>2]=+g[(c[m>>2]|0)+144>>2];g[jh>>2]=+g[(c[m>>2]|0)+148>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[fh>>2]*+g[ih>>2]-+g[jh>>2]*+g[mh>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*19<<2)>>2]=+g[fh>>2]*+g[mh>>2]+ +g[jh>>2]*+g[ih>>2];c[Bi>>2]=(c[Bi>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+248;c[n>>2]=c[n>>2]^c[2998]}i=Ci;return}function $t(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,64,8104);i=b;return}function au(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0;L=i;i=i+128|0;k=L+116|0;l=L+112|0;m=L+108|0;n=L+104|0;M=L+100|0;o=L+96|0;p=L+92|0;K=L+80|0;q=L+76|0;t=L+72|0;z=L+68|0;D=L+64|0;u=L+60|0;x=L+56|0;A=L+52|0;E=L+48|0;r=L+44|0;s=L+40|0;v=L+36|0;w=L+32|0;B=L+28|0;F=L+24|0;y=L+20|0;C=L+16|0;H=L+12|0;J=L+8|0;G=L+4|0;I=L;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[M>>2]=f;c[o>>2]=h;c[p>>2]=j;g[L+88>>2]=.8660253882408142;g[L+84>>2]=.5;c[K>>2]=c[M>>2];c[m>>2]=(c[m>>2]|0)+((c[M>>2]|0)-1<<2<<2);while(1){if((c[K>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[r>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[s>>2]=+g[c[l>>2]>>2];g[t>>2]=+g[r>>2]+ +g[s>>2];g[z>>2]=+g[q>>2]-+g[t>>2]*.5;g[D>>2]=(+g[r>>2]-+g[s>>2])*.8660253882408142;g[u>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[v>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[w>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[x>>2]=+g[v>>2]-+g[w>>2];g[A>>2]=(+g[v>>2]+ +g[w>>2])*.8660253882408142;g[E>>2]=+g[u>>2]-+g[x>>2]*.5;g[c[k>>2]>>2]=+g[q>>2]+ +g[t>>2];g[c[l>>2]>>2]=+g[u>>2]+ +g[x>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[F>>2]=+g[D>>2]+ +g[E>>2];g[y>>2]=+g[c[m>>2]>>2];g[C>>2]=+g[(c[m>>2]|0)+4>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[y>>2]*+g[B>>2]-+g[C>>2]*+g[F>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[y>>2]*+g[F>>2]+ +g[C>>2]*+g[B>>2];g[H>>2]=+g[z>>2]+ +g[A>>2];g[J>>2]=+g[E>>2]-+g[D>>2];g[G>>2]=+g[(c[m>>2]|0)+8>>2];g[I>>2]=+g[(c[m>>2]|0)+12>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[G>>2]*+g[H>>2]-+g[I>>2]*+g[J>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[G>>2]*+g[J>>2]+ +g[I>>2]*+g[H>>2];c[K>>2]=(c[K>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+16}i=L;return}function bu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,65,8152);i=b;return}function cu(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0;T=i;i=i+144|0;k=T+140|0;l=T+136|0;m=T+132|0;n=T+128|0;U=T+124|0;o=T+120|0;p=T+116|0;S=T+112|0;s=T+108|0;H=T+104|0;v=T+100|0;L=T+96|0;B=T+92|0;M=T+88|0;E=T+84|0;I=T+80|0;q=T+76|0;r=T+72|0;t=T+68|0;u=T+64|0;z=T+60|0;A=T+56|0;C=T+52|0;D=T+48|0;x=T+44|0;F=T+40|0;w=T+36|0;y=T+32|0;J=T+28|0;N=T+24|0;G=T+20|0;K=T+16|0;P=T+12|0;R=T+8|0;O=T+4|0;Q=T;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[U>>2]=f;c[o>>2]=h;c[p>>2]=j;c[S>>2]=c[U>>2];c[m>>2]=(c[m>>2]|0)+(((c[U>>2]|0)-1|0)*6<<2);while(1){if((c[S>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[r>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[s>>2]=+g[q>>2]+ +g[r>>2];g[H>>2]=+g[q>>2]-+g[r>>2];g[t>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[u>>2]=+g[c[l>>2]>>2];g[v>>2]=+g[t>>2]+ +g[u>>2];g[L>>2]=+g[t>>2]-+g[u>>2];g[z>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[A>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[B>>2]=+g[z>>2]-+g[A>>2];g[M>>2]=+g[z>>2]+ +g[A>>2];g[C>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[D>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[E>>2]=+g[C>>2]-+g[D>>2];g[I>>2]=+g[C>>2]+ +g[D>>2];g[c[k>>2]>>2]=+g[s>>2]+ +g[v>>2];g[c[l>>2]>>2]=+g[B>>2]+ +g[E>>2];g[x>>2]=+g[s>>2]-+g[v>>2];g[F>>2]=+g[B>>2]-+g[E>>2];g[w>>2]=+g[(c[m>>2]|0)+8>>2];g[y>>2]=+g[(c[m>>2]|0)+12>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[w>>2]*+g[x>>2]-+g[y>>2]*+g[F>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[y>>2]*+g[x>>2]+ +g[w>>2]*+g[F>>2];g[J>>2]=+g[H>>2]-+g[I>>2];g[N>>2]=+g[L>>2]+ +g[M>>2];g[G>>2]=+g[c[m>>2]>>2];g[K>>2]=+g[(c[m>>2]|0)+4>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[G>>2]*+g[J>>2]-+g[K>>2]*+g[N>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[G>>2]*+g[N>>2]+ +g[K>>2]*+g[J>>2];g[P>>2]=+g[H>>2]+ +g[I>>2];g[R>>2]=+g[M>>2]-+g[L>>2];g[O>>2]=+g[(c[m>>2]|0)+16>>2];g[Q>>2]=+g[(c[m>>2]|0)+20>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[O>>2]*+g[P>>2]-+g[Q>>2]*+g[R>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[O>>2]*+g[R>>2]+ +g[Q>>2]*+g[P>>2];c[S>>2]=(c[S>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+24}i=T;return}function du(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,66,8200);i=b;return}function eu(a,b,d,e,f,h,j){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;var k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0;la=i;i=i+240|0;k=la+236|0;l=la+232|0;m=la+228|0;n=la+224|0;ma=la+220|0;o=la+216|0;p=la+212|0;ka=la+192|0;q=la+188|0;V=la+184|0;F=la+180|0;ca=la+176|0;x=la+172|0;U=la+168|0;y=la+164|0;Z=la+160|0;C=la+156|0;ea=la+152|0;S=la+148|0;da=la+144|0;t=la+140|0;aa=la+136|0;w=la+132|0;ba=la+128|0;r=la+124|0;s=la+120|0;u=la+116|0;v=la+112|0;O=la+108|0;X=la+104|0;R=la+100|0;Y=la+96|0;M=la+92|0;N=la+88|0;P=la+84|0;Q=la+80|0;_=la+76|0;ia=la+72|0;ga=la+68|0;z=la+64|0;W=la+60|0;fa=la+56|0;T=la+52|0;$=la+48|0;ha=la+44|0;ja=la+40|0;D=la+36|0;J=la+32|0;H=la+28|0;L=la+24|0;B=la+20|0;G=la+16|0;A=la+12|0;E=la+8|0;I=la+4|0;K=la;c[k>>2]=a;c[l>>2]=b;c[m>>2]=d;c[n>>2]=e;c[ma>>2]=f;c[o>>2]=h;c[p>>2]=j;g[la+208>>2]=.25;g[la+204>>2]=.5877852439880371;g[la+200>>2]=.9510565400123596;g[la+196>>2]=.55901700258255;c[ka>>2]=c[ma>>2];c[m>>2]=(c[m>>2]|0)+((c[ma>>2]|0)-1<<3<<2);while(1){if((c[ka>>2]|0)>=(c[o>>2]|0))break;g[q>>2]=+g[c[k>>2]>>2];g[r>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2)>>2];g[s>>2]=+g[c[l>>2]>>2];g[t>>2]=+g[r>>2]+ +g[s>>2];g[aa>>2]=+g[r>>2]-+g[s>>2];g[u>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2];g[v>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2)>>2];g[w>>2]=+g[u>>2]+ +g[v>>2];g[ba>>2]=+g[u>>2]-+g[v>>2];g[V>>2]=(+g[t>>2]-+g[w>>2])*.55901700258255;g[F>>2]=+g[aa>>2]*.9510565400123596+ +g[ba>>2]*.5877852439880371;g[ca>>2]=+g[aa>>2]*.5877852439880371-+g[ba>>2]*.9510565400123596;g[x>>2]=+g[t>>2]+ +g[w>>2];g[U>>2]=+g[q>>2]-+g[x>>2]*.25;g[y>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2];g[M>>2]=+g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[N>>2]=+g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2];g[O>>2]=+g[M>>2]-+g[N>>2];g[X>>2]=+g[M>>2]+ +g[N>>2];g[P>>2]=+g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2];g[Q>>2]=+g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2];g[R>>2]=+g[P>>2]-+g[Q>>2];g[Y>>2]=+g[P>>2]+ +g[Q>>2];g[Z>>2]=+g[X>>2]*.5877852439880371-+g[Y>>2]*.9510565400123596;g[C>>2]=+g[X>>2]*.9510565400123596+ +g[Y>>2]*.5877852439880371;g[ea>>2]=(+g[O>>2]-+g[R>>2])*.55901700258255;g[S>>2]=+g[O>>2]+ +g[R>>2];g[da>>2]=+g[y>>2]-+g[S>>2]*.25;g[c[k>>2]>>2]=+g[q>>2]+ +g[x>>2];g[c[l>>2]>>2]=+g[y>>2]+ +g[S>>2];g[W>>2]=+g[U>>2]-+g[V>>2];g[_>>2]=+g[W>>2]-+g[Z>>2];g[ia>>2]=+g[W>>2]+ +g[Z>>2];g[fa>>2]=+g[da>>2]-+g[ea>>2];g[ga>>2]=+g[ca>>2]+ +g[fa>>2];g[z>>2]=+g[fa>>2]-+g[ca>>2];g[T>>2]=+g[(c[m>>2]|0)+8>>2];g[$>>2]=+g[(c[m>>2]|0)+12>>2];g[(c[k>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[T>>2]*+g[_>>2]-+g[$>>2]*+g[ga>>2];g[(c[l>>2]|0)+(c[n>>2]<<1<<2)>>2]=+g[T>>2]*+g[ga>>2]+ +g[$>>2]*+g[_>>2];g[ha>>2]=+g[(c[m>>2]|0)+16>>2];g[ja>>2]=+g[(c[m>>2]|0)+20>>2];g[(c[k>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ha>>2]*+g[ia>>2]-+g[ja>>2]*+g[z>>2];g[(c[l>>2]|0)+((c[n>>2]|0)*3<<2)>>2]=+g[ha>>2]*+g[z>>2]+ +g[ja>>2]*+g[ia>>2];g[B>>2]=+g[V>>2]+ +g[U>>2];g[D>>2]=+g[B>>2]-+g[C>>2];g[J>>2]=+g[B>>2]+ +g[C>>2];g[G>>2]=+g[ea>>2]+ +g[da>>2];g[H>>2]=+g[F>>2]+ +g[G>>2];g[L>>2]=+g[G>>2]-+g[F>>2];g[A>>2]=+g[c[m>>2]>>2];g[E>>2]=+g[(c[m>>2]|0)+4>>2];g[(c[k>>2]|0)+(c[n>>2]<<2)>>2]=+g[A>>2]*+g[D>>2]-+g[E>>2]*+g[H>>2];g[(c[l>>2]|0)+(c[n>>2]<<2)>>2]=+g[A>>2]*+g[H>>2]+ +g[E>>2]*+g[D>>2];g[I>>2]=+g[(c[m>>2]|0)+24>>2];g[K>>2]=+g[(c[m>>2]|0)+28>>2];g[(c[k>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[I>>2]*+g[J>>2]-+g[K>>2]*+g[L>>2];g[(c[l>>2]|0)+(c[n>>2]<<2<<2)>>2]=+g[I>>2]*+g[L>>2]+ +g[K>>2]*+g[J>>2];c[ka>>2]=(c[ka>>2]|0)+1;c[k>>2]=(c[k>>2]|0)+(c[p>>2]<<2);c[l>>2]=(c[l>>2]|0)+(0-(c[p>>2]|0)<<2);c[m>>2]=(c[m>>2]|0)+32}i=la;return}function fu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;bn(c[d>>2]|0,67,8248);i=b;return} +function Ou(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0;xa=i;i=i+272|0;m=xa+268|0;n=xa+264|0;o=xa+260|0;p=xa+256|0;q=xa+252|0;r=xa+248|0;ya=xa+244|0;s=xa+240|0;t=xa+236|0;wa=xa+224|0;w=xa+220|0;ua=xa+216|0;$=xa+212|0;H=xa+208|0;Y=xa+204|0;R=xa+200|0;na=xa+196|0;E=xa+192|0;ga=xa+188|0;O=xa+184|0;ja=xa+180|0;K=xa+176|0;u=xa+172|0;v=xa+168|0;Z=xa+164|0;_=xa+160|0;z=xa+156|0;va=xa+152|0;C=xa+148|0;D=xa+144|0;x=xa+140|0;y=xa+136|0;A=xa+132|0;B=xa+128|0;ca=xa+124|0;J=xa+120|0;fa=xa+116|0;I=xa+112|0;aa=xa+108|0;ba=xa+104|0;da=xa+100|0;ea=xa+96|0;F=xa+92|0;L=xa+88|0;ta=xa+84|0;G=xa+80|0;ka=xa+76|0;qa=xa+72|0;oa=xa+68|0;sa=xa+64|0;ia=xa+60|0;ma=xa+56|0;ha=xa+52|0;la=xa+48|0;pa=xa+44|0;ra=xa+40|0;P=xa+36|0;V=xa+32|0;T=xa+28|0;X=xa+24|0;N=xa+20|0;S=xa+16|0;M=xa+12|0;Q=xa+8|0;U=xa+4|0;W=xa;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[ya>>2]=j;c[s>>2]=k;c[t>>2]=l;g[xa+232>>2]=.5;g[xa+228>>2]=.8660253882408142;c[wa>>2]=c[ya>>2];c[q>>2]=(c[q>>2]|0)+(((c[ya>>2]|0)-1|0)*10<<2);while(1){if((c[wa>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[v>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[w>>2]=+g[u>>2]+ +g[v>>2];g[ua>>2]=+g[u>>2]-+g[v>>2];g[Z>>2]=+g[c[n>>2]>>2];g[_>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[$>>2]=+g[Z>>2]-+g[_>>2];g[H>>2]=+g[Z>>2]+ +g[_>>2];g[x>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[y>>2]=+g[c[o>>2]>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[va>>2]=+g[x>>2]-+g[y>>2];g[A>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[B>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[C>>2]=+g[A>>2]+ +g[B>>2];g[D>>2]=+g[A>>2]-+g[B>>2];g[Y>>2]=+g[z>>2]+ +g[C>>2];g[R>>2]=(+g[va>>2]-+g[D>>2])*.8660253882408142;g[na>>2]=(+g[z>>2]-+g[C>>2])*.8660253882408142;g[E>>2]=+g[va>>2]+ +g[D>>2];g[aa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ba>>2]=+g[c[p>>2]>>2];g[ca>>2]=+g[aa>>2]-+g[ba>>2];g[J>>2]=+g[aa>>2]+ +g[ba>>2];g[da>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[ea>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[fa>>2]=+g[da>>2]-+g[ea>>2];g[I>>2]=+g[da>>2]+ +g[ea>>2];g[ga>>2]=+g[ca>>2]+ +g[fa>>2];g[O>>2]=(+g[J>>2]+ +g[I>>2])*.8660253882408142;g[ja>>2]=(+g[fa>>2]-+g[ca>>2])*.8660253882408142;g[K>>2]=+g[I>>2]-+g[J>>2];g[c[m>>2]>>2]=+g[w>>2]+ +g[Y>>2];g[c[o>>2]>>2]=+g[$>>2]+ +g[ga>>2];g[F>>2]=+g[ua>>2]+ +g[E>>2];g[L>>2]=+g[H>>2]-+g[K>>2];g[ta>>2]=+g[(c[q>>2]|0)+16>>2];g[G>>2]=+g[(c[q>>2]|0)+20>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[ta>>2]*+g[F>>2]-+g[G>>2]*+g[L>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[G>>2]*+g[F>>2]+ +g[ta>>2]*+g[L>>2];g[ia>>2]=+g[w>>2]-+g[Y>>2]*.5;g[ka>>2]=+g[ia>>2]-+g[ja>>2];g[qa>>2]=+g[ia>>2]+ +g[ja>>2];g[ma>>2]=+g[$>>2]-+g[ga>>2]*.5;g[oa>>2]=+g[ma>>2]-+g[na>>2];g[sa>>2]=+g[na>>2]+ +g[ma>>2];g[ha>>2]=+g[(c[q>>2]|0)+8>>2];g[la>>2]=+g[(c[q>>2]|0)+12>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[ha>>2]*+g[ka>>2]-+g[la>>2]*+g[oa>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[ha>>2]*+g[oa>>2]+ +g[la>>2]*+g[ka>>2];g[pa>>2]=+g[(c[q>>2]|0)+24>>2];g[ra>>2]=+g[(c[q>>2]|0)+28>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[pa>>2]*+g[qa>>2]-+g[ra>>2]*+g[sa>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[pa>>2]*+g[sa>>2]+ +g[ra>>2]*+g[qa>>2];g[N>>2]=+g[ua>>2]-+g[E>>2]*.5;g[P>>2]=+g[N>>2]-+g[O>>2];g[V>>2]=+g[N>>2]+ +g[O>>2];g[S>>2]=+g[K>>2]*.5+ +g[H>>2];g[T>>2]=+g[R>>2]+ +g[S>>2];g[X>>2]=+g[S>>2]-+g[R>>2];g[M>>2]=+g[c[q>>2]>>2];g[Q>>2]=+g[(c[q>>2]|0)+4>>2];g[c[n>>2]>>2]=+g[M>>2]*+g[P>>2]-+g[Q>>2]*+g[T>>2];g[c[p>>2]>>2]=+g[Q>>2]*+g[P>>2]+ +g[M>>2]*+g[T>>2];g[U>>2]=+g[(c[q>>2]|0)+32>>2];g[W>>2]=+g[(c[q>>2]|0)+36>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[U>>2]*+g[V>>2]-+g[W>>2]*+g[X>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[W>>2]*+g[V>>2]+ +g[U>>2]*+g[X>>2];c[wa>>2]=(c[wa>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+40;c[r>>2]=c[r>>2]^c[2998]}i=xa;return}function Pu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,65,9112,0);i=b;return}function Qu(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0;Va=i;i=i+368|0;m=Va+360|0;n=Va+356|0;o=Va+352|0;p=Va+348|0;q=Va+344|0;r=Va+340|0;Wa=Va+336|0;s=Va+332|0;t=Va+328|0;Ua=Va+320|0;T=Va+316|0;D=Va+312|0;H=Va+308|0;Ia=Va+304|0;Sa=Va+300|0;ga=Va+296|0;sa=Va+292|0;Y=Va+288|0;ya=Va+284|0;ta=Va+280|0;v=Va+276|0;Pa=Va+272|0;Ta=Va+268|0;ja=Va+264|0;ma=Va+260|0;Z=Va+256|0;P=Va+252|0;ea=Va+248|0;Ea=Va+244|0;ra=Va+240|0;S=Va+236|0;qa=Va+232|0;Ha=Va+228|0;fa=Va+224|0;u=Va+220|0;O=Va+216|0;Ca=Va+212|0;Da=Va+208|0;Q=Va+204|0;R=Va+200|0;Fa=Va+196|0;Ga=Va+192|0;ua=Va+188|0;ha=Va+184|0;La=Va+180|0;ia=Va+176|0;xa=Va+172|0;ka=Va+168|0;Oa=Va+164|0;la=Va+160|0;U=Va+156|0;V=Va+152|0;Ja=Va+148|0;Ka=Va+144|0;va=Va+140|0;wa=Va+136|0;Ma=Va+132|0;Na=Va+128|0;Aa=Va+124|0;Qa=Va+120|0;za=Va+116|0;Ba=Va+112|0;aa=Va+108|0;ca=Va+104|0;$=Va+100|0;ba=Va+96|0;W=Va+92|0;_=Va+88|0;Ra=Va+84|0;X=Va+80|0;F=Va+76|0;L=Va+72|0;J=Va+68|0;N=Va+64|0;E=Va+60|0;I=Va+56|0;C=Va+52|0;G=Va+48|0;K=Va+44|0;M=Va+40|0;oa=Va+36|0;z=Va+32|0;x=Va+28|0;B=Va+24|0;na=Va+20|0;w=Va+16|0;da=Va+12|0;pa=Va+8|0;y=Va+4|0;A=Va;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Wa>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Va+324>>2]=.7071067690849304;c[Ua>>2]=c[Wa>>2];c[q>>2]=(c[q>>2]|0)+(((c[Wa>>2]|0)-1|0)*14<<2);while(1){if((c[Ua>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[O>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[P>>2]=+g[u>>2]+ +g[O>>2];g[ea>>2]=+g[u>>2]-+g[O>>2];g[Ca>>2]=+g[c[n>>2]>>2];g[Da>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ea>>2]=+g[Ca>>2]-+g[Da>>2];g[ra>>2]=+g[Ca>>2]+ +g[Da>>2];g[Q>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[R>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[S>>2]=+g[Q>>2]+ +g[R>>2];g[qa>>2]=+g[Q>>2]-+g[R>>2];g[Fa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ga>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Ha>>2]=+g[Fa>>2]-+g[Ga>>2];g[fa>>2]=+g[Fa>>2]+ +g[Ga>>2];g[T>>2]=+g[P>>2]+ +g[S>>2];g[D>>2]=+g[ea>>2]+ +g[fa>>2];g[H>>2]=+g[ra>>2]-+g[qa>>2];g[Ia>>2]=+g[Ea>>2]+ +g[Ha>>2];g[Sa>>2]=+g[P>>2]-+g[S>>2];g[ga>>2]=+g[ea>>2]-+g[fa>>2];g[sa>>2]=+g[qa>>2]+ +g[ra>>2];g[Y>>2]=+g[Ea>>2]-+g[Ha>>2];g[U>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[V>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[ua>>2]=+g[U>>2]+ +g[V>>2];g[ha>>2]=+g[U>>2]-+g[V>>2];g[Ja>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Ka>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[La>>2]=+g[Ja>>2]-+g[Ka>>2];g[ia>>2]=+g[Ja>>2]+ +g[Ka>>2];g[va>>2]=+g[c[o>>2]>>2];g[wa>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[xa>>2]=+g[va>>2]+ +g[wa>>2];g[ka>>2]=+g[va>>2]-+g[wa>>2];g[Ma>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Na>>2]=+g[c[p>>2]>>2];g[Oa>>2]=+g[Ma>>2]-+g[Na>>2];g[la>>2]=+g[Ma>>2]+ +g[Na>>2];g[ya>>2]=+g[ua>>2]+ +g[xa>>2];g[ta>>2]=+g[ha>>2]+ +g[ia>>2];g[v>>2]=+g[ka>>2]+ +g[la>>2];g[Pa>>2]=+g[La>>2]+ +g[Oa>>2];g[Ta>>2]=+g[Oa>>2]-+g[La>>2];g[ja>>2]=+g[ha>>2]-+g[ia>>2];g[ma>>2]=+g[ka>>2]-+g[la>>2];g[Z>>2]=+g[ua>>2]-+g[xa>>2];g[c[m>>2]>>2]=+g[T>>2]+ +g[ya>>2];g[c[o>>2]>>2]=+g[Ia>>2]+ +g[Pa>>2];g[Aa>>2]=+g[T>>2]-+g[ya>>2];g[Qa>>2]=+g[Ia>>2]-+g[Pa>>2];g[za>>2]=+g[(c[q>>2]|0)+24>>2];g[Ba>>2]=+g[(c[q>>2]|0)+28>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[za>>2]*+g[Aa>>2]-+g[Ba>>2]*+g[Qa>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ba>>2]*+g[Aa>>2]+ +g[za>>2]*+g[Qa>>2];g[aa>>2]=+g[Sa>>2]+ +g[Ta>>2];g[ca>>2]=+g[Z>>2]+ +g[Y>>2];g[$>>2]=+g[(c[q>>2]|0)+8>>2];g[ba>>2]=+g[(c[q>>2]|0)+12>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[$>>2]*+g[aa>>2]-+g[ba>>2]*+g[ca>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[$>>2]*+g[ca>>2]+ +g[ba>>2]*+g[aa>>2];g[W>>2]=+g[Sa>>2]-+g[Ta>>2];g[_>>2]=+g[Y>>2]-+g[Z>>2];g[Ra>>2]=+g[(c[q>>2]|0)+40>>2];g[X>>2]=+g[(c[q>>2]|0)+44>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ra>>2]*+g[W>>2]-+g[X>>2]*+g[_>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ra>>2]*+g[_>>2]+ +g[X>>2]*+g[W>>2];g[E>>2]=(+g[ta>>2]+ +g[v>>2])*.7071067690849304;g[F>>2]=+g[D>>2]-+g[E>>2];g[L>>2]=+g[D>>2]+ +g[E>>2];g[I>>2]=(+g[ja>>2]-+g[ma>>2])*.7071067690849304;g[J>>2]=+g[H>>2]+ +g[I>>2];g[N>>2]=+g[H>>2]-+g[I>>2];g[C>>2]=+g[(c[q>>2]|0)+16>>2];g[G>>2]=+g[(c[q>>2]|0)+20>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[C>>2]*+g[F>>2]-+g[G>>2]*+g[J>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[C>>2]*+g[J>>2]+ +g[G>>2]*+g[F>>2];g[K>>2]=+g[(c[q>>2]|0)+48>>2];g[M>>2]=+g[(c[q>>2]|0)+52>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[K>>2]*+g[L>>2]-+g[M>>2]*+g[N>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[K>>2]*+g[N>>2]+ +g[M>>2]*+g[L>>2];g[na>>2]=(+g[ja>>2]+ +g[ma>>2])*.7071067690849304;g[oa>>2]=+g[ga>>2]-+g[na>>2];g[z>>2]=+g[ga>>2]+ +g[na>>2];g[w>>2]=(+g[ta>>2]-+g[v>>2])*.7071067690849304;g[x>>2]=+g[sa>>2]-+g[w>>2];g[B>>2]=+g[sa>>2]+ +g[w>>2];g[da>>2]=+g[(c[q>>2]|0)+32>>2];g[pa>>2]=+g[(c[q>>2]|0)+36>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[da>>2]*+g[oa>>2]-+g[pa>>2]*+g[x>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[pa>>2]*+g[oa>>2]+ +g[da>>2]*+g[x>>2];g[y>>2]=+g[c[q>>2]>>2];g[A>>2]=+g[(c[q>>2]|0)+4>>2];g[c[n>>2]>>2]=+g[y>>2]*+g[z>>2]-+g[A>>2]*+g[B>>2];g[c[p>>2]>>2]=+g[A>>2]*+g[z>>2]+ +g[y>>2]*+g[B>>2];c[Ua>>2]=(c[Ua>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+56;c[r>>2]=c[r>>2]^c[2998]}i=Va;return}function Ru(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,66,9160,1);i=b;return}function Su(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0;Pd=i;i=i+1008|0;m=Pd+992|0;n=Pd+988|0;o=Pd+984|0;p=Pd+980|0;q=Pd+976|0;r=Pd+972|0;Qd=Pd+968|0;s=Pd+964|0;t=Pd+960|0;Od=Pd+944|0;Rc=Pd+940|0;Za=Pd+936|0;Nb=Pd+932|0;qa=Pd+928|0;td=Pd+924|0;Z=Pd+920|0;Va=Pd+916|0;zc=Pd+912|0;J=Pd+908|0;ca=Pd+904|0;ad=Pd+900|0;Ob=Pd+896|0;Eb=Pd+892|0;uc=Pd+888|0;la=Pd+884|0;_a=Pd+880|0;Id=Pd+876|0;ba=Pd+872|0;cb=Pd+868|0;jb=Pd+864|0;fb=Pd+860|0;kb=Pd+856|0;md=Pd+852|0;ga=Pd+848|0;E=Pd+844|0;ha=Pd+840|0;Hb=Pd+836|0;Qa=Pd+832|0;Kb=Pd+828|0;Ra=Pd+824|0;Aa=Pd+820|0;_=Pd+816|0;Mb=Pd+812|0;ma=Pd+808|0;Qc=Pd+804|0;G=Pd+800|0;Mc=Pd+796|0;Ld=Pd+792|0;pa=Pd+788|0;H=Pd+784|0;od=Pd+780|0;Sc=Pd+776|0;Vc=Pd+772|0;Ba=Pd+768|0;rd=Pd+764|0;Xc=Pd+760|0;_c=Pd+756|0;Ca=Pd+752|0;u=Pd+748|0;Da=Pd+744|0;Md=Pd+740|0;Nd=Pd+736|0;Kc=Pd+732|0;Lc=Pd+728|0;na=Pd+724|0;oa=Pd+720|0;Oc=Pd+716|0;Pc=Pd+712|0;Tc=Pd+708|0;Uc=Pd+704|0;pd=Pd+700|0;qd=Pd+696|0;Yc=Pd+692|0;Zc=Pd+688|0;Nc=Pd+684|0;sd=Pd+680|0;Wc=Pd+676|0;$c=Pd+672|0;Ta=Pd+668|0;Ua=Pd+664|0;F=Pd+660|0;I=Pd+656|0;Cb=Pd+652|0;Db=Pd+648|0;ja=Pd+644|0;ka=Pd+640|0;wd=Pd+636|0;hd=Pd+632|0;fd=Pd+628|0;ua=Pd+624|0;zd=Pd+620|0;cd=Pd+616|0;kd=Pd+612|0;va=Pd+608|0;Dd=Pd+604|0;z=Pd+600|0;x=Pd+596|0;xa=Pd+592|0;Gd=Pd+588|0;nd=Pd+584|0;C=Pd+580|0;ya=Pd+576|0;ud=Pd+572|0;vd=Pd+568|0;dd=Pd+564|0;ed=Pd+560|0;xd=Pd+556|0;yd=Pd+552|0;id=Pd+548|0;jd=Pd+544|0;Bd=Pd+540|0;Cd=Pd+536|0;v=Pd+532|0;w=Pd+528|0;Ed=Pd+524|0;Fd=Pd+520|0;A=Pd+516|0;B=Pd+512|0;Ad=Pd+508|0;Hd=Pd+504|0;ab=Pd+500|0;bb=Pd+496|0;db=Pd+492|0;eb=Pd+488|0;gd=Pd+484|0;ld=Pd+480|0;y=Pd+476|0;D=Pd+472|0;Fb=Pd+468|0;Gb=Pd+464|0;Ib=Pd+460|0;Jb=Pd+456|0;wa=Pd+452|0;za=Pd+448|0;Jd=Pd+444|0;K=Pd+440|0;ea=Pd+436|0;S=Pd+432|0;sa=Pd+428|0;U=Pd+424|0;Q=Pd+420|0;W=Pd+416|0;bd=Pd+412|0;da=Pd+408|0;ia=Pd+404|0;ra=Pd+400|0;N=Pd+396|0;P=Pd+392|0;M=Pd+388|0;O=Pd+384|0;ta=Pd+380|0;L=Pd+376|0;Kd=Pd+372|0;fa=Pd+368|0;V=Pd+364|0;X=Pd+360|0;R=Pd+356|0;T=Pd+352|0;Ja=Pd+348|0;vb=Pd+344|0;Na=Pd+340|0;xb=Pd+336|0;Fa=Pd+332|0;nb=Pd+328|0;tb=Pd+324|0;zb=Pd+320|0;Ha=Pd+316|0;Ia=Pd+312|0;La=Pd+308|0;Ma=Pd+304|0;$=Pd+300|0;Ea=Pd+296|0;Y=Pd+292|0;aa=Pd+288|0;qb=Pd+284|0;sb=Pd+280|0;pb=Pd+276|0;rb=Pd+272|0;mb=Pd+268|0;ob=Pd+264|0;Ga=Pd+260|0;Ka=Pd+256|0;yb=Pd+252|0;Ab=Pd+248|0;ub=Pd+244|0;wb=Pd+240|0;Oa=Pd+236|0;Vb=Pd+232|0;Wa=Pd+228|0;kc=Pd+224|0;hb=Pd+220|0;nc=Pd+216|0;Qb=Pd+212|0;pc=Pd+208|0;Lb=Pd+204|0;Sa=Pd+200|0;$a=Pd+196|0;gb=Pd+192|0;lb=Pd+188|0;Pb=Pd+184|0;Xa=Pd+180|0;Sb=Pd+176|0;Rb=Pd+172|0;Tb=Pd+168|0;Bb=Pd+164|0;Pa=Pd+160|0;Ya=Pd+156|0;ib=Pd+152|0;lc=Pd+148|0;rc=Pd+144|0;qc=Pd+140|0;sc=Pd+136|0;Ub=Pd+132|0;Wb=Pd+128|0;mc=Pd+124|0;oc=Pd+120|0;wc=Pd+116|0;$b=Pd+112|0;Ac=Pd+108|0;bc=Pd+104|0;Fc=Pd+100|0;ec=Pd+96|0;Jc=Pd+92|0;gc=Pd+88|0;vc=Pd+84|0;yc=Pd+80|0;Dc=Pd+76|0;Ec=Pd+72|0;Hc=Pd+68|0;Ic=Pd+64|0;Bc=Pd+60|0;Yb=Pd+56|0;Xb=Pd+52|0;Zb=Pd+48|0;tc=Pd+44|0;xc=Pd+40|0;Cc=Pd+36|0;Gc=Pd+32|0;cc=Pd+28|0;ic=Pd+24|0;hc=Pd+20|0;jc=Pd+16|0;_b=Pd+12|0;ac=Pd+8|0;dc=Pd+4|0;fc=Pd;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Qd>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Pd+956>>2]=.9238795042037964;g[Pd+952>>2]=.3826834261417389;g[Pd+948>>2]=.7071067690849304;c[Od>>2]=c[Qd>>2];c[q>>2]=(c[q>>2]|0)+(((c[Qd>>2]|0)-1|0)*30<<2);while(1){if((c[Od>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Mb>>2]=+g[u>>2]+ +g[Da>>2];g[ma>>2]=+g[u>>2]-+g[Da>>2];g[Md>>2]=+g[c[n>>2]>>2];g[Nd>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Qc>>2]=+g[Md>>2]+ +g[Nd>>2];g[G>>2]=+g[Md>>2]-+g[Nd>>2];g[Kc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Lc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Mc>>2]=+g[Kc>>2]+ +g[Lc>>2];g[Ld>>2]=+g[Kc>>2]-+g[Lc>>2];g[na>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[oa>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[pa>>2]=+g[na>>2]+ +g[oa>>2];g[H>>2]=+g[na>>2]-+g[oa>>2];g[Oc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Pc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[od>>2]=+g[Oc>>2]+ +g[Pc>>2];g[Sc>>2]=+g[Oc>>2]-+g[Pc>>2];g[Tc>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Uc>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Vc>>2]=+g[Tc>>2]+ +g[Uc>>2];g[Ba>>2]=+g[Tc>>2]-+g[Uc>>2];g[pd>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[qd>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[rd>>2]=+g[pd>>2]+ +g[qd>>2];g[Xc>>2]=+g[pd>>2]-+g[qd>>2];g[Yc>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Zc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[_c>>2]=+g[Yc>>2]+ +g[Zc>>2];g[Ca>>2]=+g[Zc>>2]-+g[Yc>>2];g[Rc>>2]=+g[Ld>>2]+ +g[Qc>>2];g[Za>>2]=+g[Qc>>2]-+g[Ld>>2];g[Nb>>2]=+g[ma>>2]+ +g[pa>>2];g[qa>>2]=+g[ma>>2]-+g[pa>>2];g[Nc>>2]=+g[Mb>>2]+ +g[Mc>>2];g[sd>>2]=+g[od>>2]+ +g[rd>>2];g[td>>2]=+g[Nc>>2]+ +g[sd>>2];g[Z>>2]=+g[Nc>>2]-+g[sd>>2];g[Ta>>2]=+g[od>>2]-+g[rd>>2];g[Ua>>2]=+g[G>>2]-+g[H>>2];g[Va>>2]=+g[Ta>>2]+ +g[Ua>>2];g[zc>>2]=+g[Ua>>2]-+g[Ta>>2];g[F>>2]=+g[Ba>>2]+ +g[Ca>>2];g[I>>2]=+g[G>>2]+ +g[H>>2];g[J>>2]=+g[F>>2]+ +g[I>>2];g[ca>>2]=+g[I>>2]-+g[F>>2];g[Wc>>2]=+g[Sc>>2]+ +g[Vc>>2];g[$c>>2]=+g[Xc>>2]+ +g[_c>>2];g[ad>>2]=(+g[Wc>>2]-+g[$c>>2])*.7071067690849304;g[Ob>>2]=(+g[Wc>>2]+ +g[$c>>2])*.7071067690849304;g[Cb>>2]=+g[Mb>>2]-+g[Mc>>2];g[Db>>2]=+g[Ca>>2]-+g[Ba>>2];g[Eb>>2]=+g[Cb>>2]+ +g[Db>>2];g[uc>>2]=+g[Cb>>2]-+g[Db>>2];g[ja>>2]=+g[Sc>>2]-+g[Vc>>2];g[ka>>2]=+g[Xc>>2]-+g[_c>>2];g[la>>2]=(+g[ja>>2]+ +g[ka>>2])*.7071067690849304;g[_a>>2]=(+g[ja>>2]-+g[ka>>2])*.7071067690849304;g[ud>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[vd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[wd>>2]=+g[ud>>2]+ +g[vd>>2];g[hd>>2]=+g[ud>>2]-+g[vd>>2];g[dd>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[ed>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[fd>>2]=+g[dd>>2]+ +g[ed>>2];g[ua>>2]=+g[dd>>2]-+g[ed>>2];g[xd>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[yd>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[zd>>2]=+g[xd>>2]+ +g[yd>>2];g[cd>>2]=+g[xd>>2]-+g[yd>>2];g[id>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[jd>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[kd>>2]=+g[id>>2]+ +g[jd>>2];g[va>>2]=+g[id>>2]-+g[jd>>2];g[Bd>>2]=+g[c[o>>2]>>2];g[Cd>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Dd>>2]=+g[Bd>>2]+ +g[Cd>>2];g[z>>2]=+g[Bd>>2]-+g[Cd>>2];g[v>>2]=+g[c[p>>2]>>2];g[w>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[x>>2]=+g[v>>2]+ +g[w>>2];g[xa>>2]=+g[w>>2]-+g[v>>2];g[Ed>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Fd>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Gd>>2]=+g[Ed>>2]+ +g[Fd>>2];g[nd>>2]=+g[Ed>>2]-+g[Fd>>2];g[A>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[B>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[C>>2]=+g[A>>2]+ +g[B>>2];g[ya>>2]=+g[A>>2]-+g[B>>2];g[Ad>>2]=+g[wd>>2]+ +g[zd>>2];g[Hd>>2]=+g[Dd>>2]+ +g[Gd>>2];g[Id>>2]=+g[Ad>>2]+ +g[Hd>>2];g[ba>>2]=+g[Ad>>2]-+g[Hd>>2];g[ab>>2]=+g[fd>>2]-+g[cd>>2];g[bb>>2]=+g[hd>>2]+ +g[kd>>2];g[cb>>2]=+g[ab>>2]*.3826834261417389+ +g[bb>>2]*.9238795042037964;g[jb>>2]=+g[bb>>2]*.3826834261417389-+g[ab>>2]*.9238795042037964;g[db>>2]=+g[nd>>2]+ +g[x>>2];g[eb>>2]=+g[z>>2]+ +g[C>>2];g[fb>>2]=+g[db>>2]*.3826834261417389+ +g[eb>>2]*.9238795042037964;g[kb>>2]=+g[eb>>2]*.3826834261417389-+g[db>>2]*.9238795042037964;g[gd>>2]=+g[cd>>2]+ +g[fd>>2];g[ld>>2]=+g[hd>>2]-+g[kd>>2];g[md>>2]=+g[gd>>2]*.9238795042037964+ +g[ld>>2]*.3826834261417389;g[ga>>2]=+g[ld>>2]*.9238795042037964-+g[gd>>2]*.3826834261417389;g[y>>2]=+g[nd>>2]-+g[x>>2];g[D>>2]=+g[z>>2]-+g[C>>2];g[E>>2]=+g[y>>2]*.9238795042037964-+g[D>>2]*.3826834261417389;g[ha>>2]=+g[y>>2]*.3826834261417389+ +g[D>>2]*.9238795042037964;g[Fb>>2]=+g[wd>>2]-+g[zd>>2];g[Gb>>2]=+g[ua>>2]-+g[va>>2];g[Hb>>2]=+g[Fb>>2]-+g[Gb>>2];g[Qa>>2]=+g[Fb>>2]+ +g[Gb>>2];g[Ib>>2]=+g[Dd>>2]-+g[Gd>>2];g[Jb>>2]=+g[xa>>2]-+g[ya>>2];g[Kb>>2]=+g[Ib>>2]+ +g[Jb>>2];g[Ra>>2]=+g[Jb>>2]-+g[Ib>>2];g[wa>>2]=+g[ua>>2]+ +g[va>>2];g[za>>2]=+g[xa>>2]+ +g[ya>>2];g[Aa>>2]=+g[wa>>2]+ +g[za>>2];g[_>>2]=+g[za>>2]-+g[wa>>2];g[Jd>>2]=+g[td>>2]+ +g[Id>>2];g[K>>2]=+g[Aa>>2]+ +g[J>>2];g[bd>>2]=+g[Rc>>2]+ +g[ad>>2];g[da>>2]=+g[md>>2]+ +g[E>>2];g[ea>>2]=+g[bd>>2]+ +g[da>>2];g[S>>2]=+g[bd>>2]-+g[da>>2];g[ia>>2]=+g[ga>>2]+ +g[ha>>2];g[ra>>2]=+g[la>>2]+ +g[qa>>2];g[sa>>2]=+g[ia>>2]+ +g[ra>>2];g[U>>2]=+g[ra>>2]-+g[ia>>2];g[N>>2]=+g[td>>2]-+g[Id>>2];g[P>>2]=+g[J>>2]-+g[Aa>>2];g[M>>2]=+g[(c[q>>2]|0)+56>>2];g[O>>2]=+g[(c[q>>2]|0)+60>>2];g[Q>>2]=+g[M>>2]*+g[N>>2]-+g[O>>2]*+g[P>>2];g[W>>2]=+g[O>>2]*+g[N>>2]+ +g[M>>2]*+g[P>>2];g[Kd>>2]=+g[c[q>>2]>>2];g[fa>>2]=+g[(c[q>>2]|0)+4>>2];g[ta>>2]=+g[Kd>>2]*+g[ea>>2]+ +g[fa>>2]*+g[sa>>2];g[L>>2]=+g[Kd>>2]*+g[sa>>2]-+g[fa>>2]*+g[ea>>2];g[c[m>>2]>>2]=+g[Jd>>2]-+g[ta>>2];g[c[n>>2]>>2]=+g[K>>2]+ +g[L>>2];g[c[o>>2]>>2]=+g[Jd>>2]+ +g[ta>>2];g[c[p>>2]>>2]=+g[L>>2]-+g[K>>2];g[R>>2]=+g[(c[q>>2]|0)+64>>2];g[T>>2]=+g[(c[q>>2]|0)+68>>2];g[V>>2]=+g[R>>2]*+g[S>>2]+ +g[T>>2]*+g[U>>2];g[X>>2]=+g[R>>2]*+g[U>>2]-+g[T>>2]*+g[S>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Q>>2]-+g[V>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[W>>2]+ +g[X>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Q>>2]+ +g[V>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[X>>2]-+g[W>>2];g[Ha>>2]=+g[Rc>>2]-+g[ad>>2];g[Ia>>2]=+g[ga>>2]-+g[ha>>2];g[Ja>>2]=+g[Ha>>2]+ +g[Ia>>2];g[vb>>2]=+g[Ha>>2]-+g[Ia>>2];g[La>>2]=+g[E>>2]-+g[md>>2];g[Ma>>2]=+g[qa>>2]-+g[la>>2];g[Na>>2]=+g[La>>2]+ +g[Ma>>2];g[xb>>2]=+g[Ma>>2]-+g[La>>2];g[$>>2]=+g[Z>>2]+ +g[_>>2];g[Ea>>2]=+g[ba>>2]+ +g[ca>>2];g[Y>>2]=+g[(c[q>>2]|0)+24>>2];g[aa>>2]=+g[(c[q>>2]|0)+28>>2];g[Fa>>2]=+g[Y>>2]*+g[$>>2]-+g[aa>>2]*+g[Ea>>2];g[nb>>2]=+g[aa>>2]*+g[$>>2]+ +g[Y>>2]*+g[Ea>>2];g[qb>>2]=+g[Z>>2]-+g[_>>2];g[sb>>2]=+g[ca>>2]-+g[ba>>2];g[pb>>2]=+g[(c[q>>2]|0)+88>>2];g[rb>>2]=+g[(c[q>>2]|0)+92>>2];g[tb>>2]=+g[pb>>2]*+g[qb>>2]-+g[rb>>2]*+g[sb>>2];g[zb>>2]=+g[rb>>2]*+g[qb>>2]+ +g[pb>>2]*+g[sb>>2];g[Ga>>2]=+g[(c[q>>2]|0)+32>>2];g[Ka>>2]=+g[(c[q>>2]|0)+36>>2];g[mb>>2]=+g[Ga>>2]*+g[Ja>>2]+ +g[Ka>>2]*+g[Na>>2];g[ob>>2]=+g[Ga>>2]*+g[Na>>2]-+g[Ka>>2]*+g[Ja>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Fa>>2]-+g[mb>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[nb>>2]+ +g[ob>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Fa>>2]+ +g[mb>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ob>>2]-+g[nb>>2];g[ub>>2]=+g[(c[q>>2]|0)+96>>2];g[wb>>2]=+g[(c[q>>2]|0)+100>>2];g[yb>>2]=+g[ub>>2]*+g[vb>>2]+ +g[wb>>2]*+g[xb>>2];g[Ab>>2]=+g[ub>>2]*+g[xb>>2]-+g[wb>>2]*+g[vb>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[tb>>2]-+g[yb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[zb>>2]+ +g[Ab>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[tb>>2]+ +g[yb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Ab>>2]-+g[zb>>2];g[Lb>>2]=(+g[Hb>>2]+ +g[Kb>>2])*.7071067690849304;g[Oa>>2]=+g[Eb>>2]+ +g[Lb>>2];g[Vb>>2]=+g[Eb>>2]-+g[Lb>>2];g[Sa>>2]=(+g[Qa>>2]+ +g[Ra>>2])*.7071067690849304;g[Wa>>2]=+g[Sa>>2]+ +g[Va>>2];g[kc>>2]=+g[Va>>2]-+g[Sa>>2];g[$a>>2]=+g[Za>>2]+ +g[_a>>2];g[gb>>2]=+g[cb>>2]-+g[fb>>2];g[hb>>2]=+g[$a>>2]+ +g[gb>>2];g[nc>>2]=+g[$a>>2]-+g[gb>>2];g[lb>>2]=+g[jb>>2]+ +g[kb>>2];g[Pb>>2]=+g[Nb>>2]-+g[Ob>>2];g[Qb>>2]=+g[lb>>2]+ +g[Pb>>2];g[pc>>2]=+g[Pb>>2]-+g[lb>>2];g[Bb>>2]=+g[(c[q>>2]|0)+8>>2];g[Pa>>2]=+g[(c[q>>2]|0)+12>>2];g[Xa>>2]=+g[Bb>>2]*+g[Oa>>2]-+g[Pa>>2]*+g[Wa>>2];g[Sb>>2]=+g[Pa>>2]*+g[Oa>>2]+ +g[Bb>>2]*+g[Wa>>2];g[Ya>>2]=+g[(c[q>>2]|0)+16>>2];g[ib>>2]=+g[(c[q>>2]|0)+20>>2];g[Rb>>2]=+g[Ya>>2]*+g[hb>>2]+ +g[ib>>2]*+g[Qb>>2];g[Tb>>2]=+g[Ya>>2]*+g[Qb>>2]-+g[ib>>2]*+g[hb>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[Xa>>2]-+g[Rb>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Sb>>2]+ +g[Tb>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Xa>>2]+ +g[Rb>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Tb>>2]-+g[Sb>>2];g[Ub>>2]=+g[(c[q>>2]|0)+72>>2];g[Wb>>2]=+g[(c[q>>2]|0)+76>>2];g[lc>>2]=+g[Ub>>2]*+g[Vb>>2]-+g[Wb>>2]*+g[kc>>2];g[rc>>2]=+g[Wb>>2]*+g[Vb>>2]+ +g[Ub>>2]*+g[kc>>2];g[mc>>2]=+g[(c[q>>2]|0)+80>>2];g[oc>>2]=+g[(c[q>>2]|0)+84>>2];g[qc>>2]=+g[mc>>2]*+g[nc>>2]+ +g[oc>>2]*+g[pc>>2];g[sc>>2]=+g[mc>>2]*+g[pc>>2]-+g[oc>>2]*+g[nc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[lc>>2]-+g[qc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[rc>>2]+ +g[sc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[lc>>2]+ +g[qc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[sc>>2]-+g[rc>>2];g[vc>>2]=(+g[Ra>>2]-+g[Qa>>2])*.7071067690849304;g[wc>>2]=+g[uc>>2]+ +g[vc>>2];g[$b>>2]=+g[uc>>2]-+g[vc>>2];g[yc>>2]=(+g[Hb>>2]-+g[Kb>>2])*.7071067690849304;g[Ac>>2]=+g[yc>>2]+ +g[zc>>2];g[bc>>2]=+g[zc>>2]-+g[yc>>2];g[Dc>>2]=+g[Za>>2]-+g[_a>>2];g[Ec>>2]=+g[jb>>2]-+g[kb>>2];g[Fc>>2]=+g[Dc>>2]+ +g[Ec>>2];g[ec>>2]=+g[Dc>>2]-+g[Ec>>2];g[Hc>>2]=+g[Ob>>2]+ +g[Nb>>2];g[Ic>>2]=+g[cb>>2]+ +g[fb>>2];g[Jc>>2]=+g[Hc>>2]-+g[Ic>>2];g[gc>>2]=+g[Ic>>2]+ +g[Hc>>2];g[tc>>2]=+g[(c[q>>2]|0)+40>>2];g[xc>>2]=+g[(c[q>>2]|0)+44>>2];g[Bc>>2]=+g[tc>>2]*+g[wc>>2]-+g[xc>>2]*+g[Ac>>2];g[Yb>>2]=+g[xc>>2]*+g[wc>>2]+ +g[tc>>2]*+g[Ac>>2];g[Cc>>2]=+g[(c[q>>2]|0)+48>>2];g[Gc>>2]=+g[(c[q>>2]|0)+52>>2];g[Xb>>2]=+g[Cc>>2]*+g[Fc>>2]+ +g[Gc>>2]*+g[Jc>>2];g[Zb>>2]=+g[Cc>>2]*+g[Jc>>2]-+g[Gc>>2]*+g[Fc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Bc>>2]-+g[Xb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Yb>>2]+ +g[Zb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Bc>>2]+ +g[Xb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Zb>>2]-+g[Yb>>2];g[_b>>2]=+g[(c[q>>2]|0)+104>>2];g[ac>>2]=+g[(c[q>>2]|0)+108>>2];g[cc>>2]=+g[_b>>2]*+g[$b>>2]-+g[ac>>2]*+g[bc>>2];g[ic>>2]=+g[ac>>2]*+g[$b>>2]+ +g[_b>>2]*+g[bc>>2];g[dc>>2]=+g[(c[q>>2]|0)+112>>2];g[fc>>2]=+g[(c[q>>2]|0)+116>>2];g[hc>>2]=+g[dc>>2]*+g[ec>>2]+ +g[fc>>2]*+g[gc>>2];g[jc>>2]=+g[dc>>2]*+g[gc>>2]-+g[fc>>2]*+g[ec>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[cc>>2]-+g[hc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ic>>2]+ +g[jc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[cc>>2]+ +g[hc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[jc>>2]-+g[ic>>2];c[Od>>2]=(c[Od>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+120;c[r>>2]=c[r>>2]^c[2998]}i=Pd;return}function Tu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,67,9208,1);i=b;return}function Uu(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0;wf=i;i=i+1360|0;m=wf+1348|0;n=wf+1344|0;o=wf+1340|0;p=wf+1336|0;q=wf+1332|0;r=wf+1328|0;xf=wf+1324|0;s=wf+1320|0;t=wf+1316|0;vf=wf+1296|0;te=wf+1292|0;ic=wf+1288|0;Ed=wf+1284|0;B=wf+1280|0;L=wf+1276|0;Bc=wf+1272|0;_b=wf+1268|0;Ka=wf+1264|0;na=wf+1260|0;Xb=wf+1256|0;Yb=wf+1252|0;ya=wf+1248|0;qb=wf+1244|0;cd=wf+1240|0;bd=wf+1236|0;pb=wf+1232|0;Q=wf+1228|0;Zc=wf+1224|0;Wc=wf+1220|0;P=wf+1216|0;xb=wf+1212|0;Oc=wf+1208|0;yb=wf+1204|0;Rc=wf+1200|0;Ic=wf+1196|0;Kc=wf+1192|0;ye=wf+1188|0;nb=wf+1184|0;Hd=wf+1180|0;Jd=wf+1176|0;Ha=wf+1172|0;vb=wf+1168|0;G=wf+1164|0;M=wf+1160|0;pc=wf+1156|0;rc=wf+1152|0;bc=wf+1148|0;dc=wf+1144|0;w=wf+1140|0;C=wf+1136|0;Mb=wf+1132|0;H=wf+1128|0;A=wf+1124|0;Ia=wf+1120|0;se=wf+1116|0;x=wf+1112|0;K=wf+1108|0;Ja=wf+1104|0;u=wf+1100|0;Da=wf+1096|0;y=wf+1092|0;z=wf+1088|0;Vc=wf+1084|0;ce=wf+1080|0;I=wf+1076|0;J=wf+1072|0;_e=wf+1068|0;jc=wf+1064|0;tc=wf+1060|0;Fe=wf+1056|0;ha=wf+1052|0;Cc=wf+1048|0;Mc=wf+1044|0;W=wf+1040|0;we=wf+1036|0;nc=wf+1032|0;Yc=wf+1028|0;Ve=wf+1024|0;xa=wf+1020|0;Gc=wf+1016|0;Qc=wf+1012|0;Fa=wf+1008|0;ff=wf+1004|0;kc=wf+1e3|0;uc=wf+996|0;Ke=wf+992|0;ma=wf+988|0;Dc=wf+984|0;Nc=wf+980|0;Z=wf+976|0;of=wf+972|0;mc=wf+968|0;Xc=wf+964|0;Qe=wf+960|0;sa=wf+956|0;Fc=wf+952|0;Pc=wf+948|0;ba=wf+944|0;We=wf+940|0;da=wf+936|0;Ee=wf+932|0;U=wf+928|0;Ze=wf+924|0;Be=wf+920|0;ga=wf+916|0;V=wf+912|0;ue=wf+908|0;ve=wf+904|0;Ce=wf+900|0;De=wf+896|0;Xe=wf+892|0;Ye=wf+888|0;ea=wf+884|0;fa=wf+880|0;rf=wf+876|0;ta=wf+872|0;Ue=wf+868|0;ca=wf+864|0;uf=wf+860|0;Re=wf+856|0;wa=wf+852|0;Ea=wf+848|0;pf=wf+844|0;qf=wf+840|0;Se=wf+836|0;Te=wf+832|0;sf=wf+828|0;tf=wf+824|0;ua=wf+820|0;va=wf+816|0;bf=wf+812|0;ia=wf+808|0;Je=wf+804|0;X=wf+800|0;ef=wf+796|0;Ge=wf+792|0;la=wf+788|0;Y=wf+784|0;$e=wf+780|0;af=wf+776|0;He=wf+772|0;Ie=wf+768|0;cf=wf+764|0;df=wf+760|0;ja=wf+756|0;ka=wf+752|0;kf=wf+748|0;oa=wf+744|0;Pe=wf+740|0;$=wf+736|0;nf=wf+732|0;Me=wf+728|0;ra=wf+724|0;aa=wf+720|0;hf=wf+716|0;jf=wf+712|0;Ne=wf+708|0;Oe=wf+704|0;lf=wf+700|0;mf=wf+696|0;pa=wf+692|0;qa=wf+688|0;Ec=wf+684|0;Hc=wf+680|0;gf=wf+676|0;xe=wf+672|0;Fd=wf+668|0;Gd=wf+664|0;lc=wf+660|0;oc=wf+656|0;_=wf+652|0;Ga=wf+648|0;Ca=wf+644|0;F=wf+640|0;$b=wf+636|0;ac=wf+632|0;Le=wf+628|0;v=wf+624|0;ze=wf+620|0;La=wf+616|0;xc=wf+612|0;zc=wf+608|0;Ab=wf+604|0;Tb=wf+600|0;Oa=wf+596|0;gb=wf+592|0;S=wf+588|0;lb=wf+584|0;Fb=wf+580|0;Xa=wf+576|0;Aa=wf+572|0;jb=wf+568|0;Db=wf+564|0;Ta=wf+560|0;sb=wf+556|0;Rb=wf+552|0;Kb=wf+548|0;cb=wf+544|0;Wb=wf+540|0;wc=wf+536|0;Vb=wf+532|0;vc=wf+528|0;zb=wf+524|0;eb=wf+520|0;wb=wf+516|0;fb=wf+512|0;ub=wf+508|0;R=wf+504|0;Va=wf+500|0;O=wf+496|0;Wa=wf+492|0;N=wf+488|0;za=wf+484|0;Sa=wf+480|0;E=wf+476|0;Ra=wf+472|0;D=wf+468|0;rb=wf+464|0;bb=wf+460|0;ob=wf+456|0;ab=wf+452|0;mb=wf+448|0;T=wf+444|0;Ma=wf+440|0;Ae=wf+436|0;Ba=wf+432|0;Ub=wf+428|0;yc=wf+424|0;Qb=wf+420|0;Sb=wf+416|0;Bb=wf+412|0;Hb=wf+408|0;Gb=wf+404|0;Ib=wf+400|0;Na=wf+396|0;tb=wf+392|0;Cb=wf+388|0;Eb=wf+384|0;Pa=wf+380|0;Za=wf+376|0;Ya=wf+372|0;_a=wf+368|0;Jb=wf+364|0;Lb=wf+360|0;Qa=wf+356|0;Ua=wf+352|0;hb=wf+348|0;Ob=wf+344|0;Nb=wf+340|0;Pb=wf+336|0;$a=wf+332|0;db=wf+328|0;ib=wf+324|0;kb=wf+320|0;jd=wf+316|0;td=wf+312|0;pe=wf+308|0;re=wf+304|0;Tc=wf+300|0;he=wf+296|0;Sd=wf+292|0;wd=wf+288|0;Ld=wf+284|0;Dd=wf+280|0;$d=wf+276|0;rd=wf+272|0;$c=wf+268|0;Bd=wf+264|0;Zd=wf+260|0;nd=wf+256|0;fc=wf+252|0;je=wf+248|0;Wd=wf+244|0;yd=wf+240|0;gd=wf+236|0;id=wf+232|0;fd=wf+228|0;hd=wf+224|0;me=wf+220|0;oe=wf+216|0;le=wf+212|0;ne=wf+208|0;Sc=wf+204|0;Rd=wf+200|0;Lc=wf+196|0;Qd=wf+192|0;Jc=wf+188|0;dd=wf+184|0;qd=wf+180|0;Kd=wf+176|0;pd=wf+172|0;Id=wf+168|0;_c=wf+164|0;md=wf+160|0;sc=wf+156|0;ld=wf+152|0;qc=wf+148|0;Zb=wf+144|0;Ud=wf+140|0;ec=wf+136|0;Vd=wf+132|0;cc=wf+128|0;sd=wf+124|0;ud=wf+120|0;kd=wf+116|0;od=wf+112|0;ke=wf+108|0;qe=wf+104|0;ge=wf+100|0;ie=wf+96|0;gc=wf+92|0;Nd=wf+88|0;Md=wf+84|0;Od=wf+80|0;Ac=wf+76|0;Uc=wf+72|0;hc=wf+68|0;ad=wf+64|0;Xd=wf+60|0;be=wf+56|0;ae=wf+52|0;ed=wf+48|0;Pd=wf+44|0;Td=wf+40|0;Yd=wf+36|0;_d=wf+32|0;zd=wf+28|0;ee=wf+24|0;de=wf+20|0;fe=wf+16|0;vd=wf+12|0;xd=wf+8|0;Ad=wf+4|0;Cd=wf;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[xf>>2]=j;c[s>>2]=k;c[t>>2]=l;g[wf+1312>>2]=.25;g[wf+1308>>2]=.9510565400123596;g[wf+1304>>2]=.5877852439880371;g[wf+1300>>2]=.55901700258255;c[vf>>2]=c[xf>>2];c[q>>2]=(c[q>>2]|0)+(((c[xf>>2]|0)-1|0)*38<<2);while(1){if((c[vf>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Mb>>2]=+g[u>>2]+ +g[Da>>2];g[H>>2]=+g[u>>2]-+g[Da>>2];g[y>>2]=+g[c[n>>2]>>2];g[z>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[A>>2]=+g[y>>2]+ +g[z>>2];g[Ia>>2]=+g[y>>2]-+g[z>>2];g[Vc>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[ce>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[se>>2]=+g[Vc>>2]+ +g[ce>>2];g[x>>2]=+g[Vc>>2]-+g[ce>>2];g[I>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[J>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[K>>2]=+g[I>>2]+ +g[J>>2];g[Ja>>2]=+g[I>>2]-+g[J>>2];g[te>>2]=+g[Mb>>2]+ +g[se>>2];g[ic>>2]=+g[A>>2]-+g[x>>2];g[Ed>>2]=+g[H>>2]+ +g[K>>2];g[B>>2]=+g[x>>2]+ +g[A>>2];g[L>>2]=+g[H>>2]-+g[K>>2];g[Bc>>2]=+g[Mb>>2]-+g[se>>2];g[_b>>2]=+g[Ia>>2]-+g[Ja>>2];g[Ka>>2]=+g[Ia>>2]+ +g[Ja>>2];g[ue>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[ve>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[We>>2]=+g[ue>>2]+ +g[ve>>2];g[da>>2]=+g[ue>>2]-+g[ve>>2];g[Ce>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[De>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Ee>>2]=+g[Ce>>2]+ +g[De>>2];g[U>>2]=+g[Ce>>2]-+g[De>>2];g[Xe>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Ye>>2]=+g[c[o>>2]>>2];g[Ze>>2]=+g[Xe>>2]+ +g[Ye>>2];g[Be>>2]=+g[Xe>>2]-+g[Ye>>2];g[ea>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[fa>>2]=+g[c[p>>2]>>2];g[ga>>2]=+g[ea>>2]+ +g[fa>>2];g[V>>2]=+g[ea>>2]-+g[fa>>2];g[_e>>2]=+g[We>>2]+ +g[Ze>>2];g[jc>>2]=+g[Ee>>2]-+g[Be>>2];g[tc>>2]=+g[da>>2]+ +g[ga>>2];g[Fe>>2]=+g[Be>>2]+ +g[Ee>>2];g[ha>>2]=+g[da>>2]-+g[ga>>2];g[Cc>>2]=+g[We>>2]-+g[Ze>>2];g[Mc>>2]=+g[U>>2]-+g[V>>2];g[W>>2]=+g[U>>2]+ +g[V>>2];g[pf>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[qf>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[rf>>2]=+g[pf>>2]+ +g[qf>>2];g[ta>>2]=+g[pf>>2]-+g[qf>>2];g[Se>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Te>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ue>>2]=+g[Se>>2]+ +g[Te>>2];g[ca>>2]=+g[Te>>2]-+g[Se>>2];g[sf>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[tf>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[uf>>2]=+g[sf>>2]+ +g[tf>>2];g[Re>>2]=+g[sf>>2]-+g[tf>>2];g[ua>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[va>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[wa>>2]=+g[ua>>2]+ +g[va>>2];g[Ea>>2]=+g[va>>2]-+g[ua>>2];g[we>>2]=+g[rf>>2]+ +g[uf>>2];g[nc>>2]=+g[Re>>2]+ +g[Ue>>2];g[Yc>>2]=+g[ta>>2]-+g[wa>>2];g[Ve>>2]=+g[Re>>2]-+g[Ue>>2];g[xa>>2]=+g[ta>>2]+ +g[wa>>2];g[Gc>>2]=+g[rf>>2]-+g[uf>>2];g[Qc>>2]=+g[ca>>2]-+g[Ea>>2];g[Fa>>2]=+g[ca>>2]+ +g[Ea>>2];g[$e>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[af>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[bf>>2]=+g[$e>>2]+ +g[af>>2];g[ia>>2]=+g[$e>>2]-+g[af>>2];g[He>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ie>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Je>>2]=+g[He>>2]+ +g[Ie>>2];g[X>>2]=+g[Ie>>2]-+g[He>>2];g[cf>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[df>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[ef>>2]=+g[cf>>2]+ +g[df>>2];g[Ge>>2]=+g[cf>>2]-+g[df>>2];g[ja>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[ka>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2];g[la>>2]=+g[ja>>2]+ +g[ka>>2];g[Y>>2]=+g[ja>>2]-+g[ka>>2];g[ff>>2]=+g[bf>>2]+ +g[ef>>2];g[kc>>2]=+g[Ge>>2]+ +g[Je>>2];g[uc>>2]=+g[ia>>2]+ +g[la>>2];g[Ke>>2]=+g[Ge>>2]-+g[Je>>2];g[ma>>2]=+g[ia>>2]-+g[la>>2];g[Dc>>2]=+g[bf>>2]-+g[ef>>2];g[Nc>>2]=+g[X>>2]-+g[Y>>2];g[Z>>2]=+g[X>>2]+ +g[Y>>2];g[hf>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2];g[jf>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[kf>>2]=+g[hf>>2]+ +g[jf>>2];g[oa>>2]=+g[hf>>2]-+g[jf>>2];g[Ne>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Oe>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Pe>>2]=+g[Ne>>2]+ +g[Oe>>2];g[$>>2]=+g[Ne>>2]-+g[Oe>>2];g[lf>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[mf>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[nf>>2]=+g[lf>>2]+ +g[mf>>2];g[Me>>2]=+g[lf>>2]-+g[mf>>2];g[pa>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[qa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ra>>2]=+g[pa>>2]+ +g[qa>>2];g[aa>>2]=+g[qa>>2]-+g[pa>>2];g[of>>2]=+g[kf>>2]+ +g[nf>>2];g[mc>>2]=+g[Pe>>2]-+g[Me>>2];g[Xc>>2]=+g[oa>>2]-+g[ra>>2];g[Qe>>2]=+g[Me>>2]+ +g[Pe>>2];g[sa>>2]=+g[oa>>2]+ +g[ra>>2];g[Fc>>2]=+g[kf>>2]-+g[nf>>2];g[Pc>>2]=+g[$>>2]-+g[aa>>2];g[ba>>2]=+g[$>>2]+ +g[aa>>2];g[na>>2]=+g[ha>>2]-+g[ma>>2];g[Xb>>2]=+g[Cc>>2]-+g[Dc>>2];g[Yb>>2]=+g[Fc>>2]-+g[Gc>>2];g[ya>>2]=+g[sa>>2]-+g[xa>>2];g[qb>>2]=+g[ba>>2]-+g[Fa>>2];g[cd>>2]=+g[mc>>2]+ +g[nc>>2];g[bd>>2]=+g[jc>>2]+ +g[kc>>2];g[pb>>2]=+g[W>>2]-+g[Z>>2];g[Q>>2]=+g[Qe>>2]-+g[Ve>>2];g[Zc>>2]=+g[Xc>>2]-+g[Yc>>2];g[Wc>>2]=+g[tc>>2]-+g[uc>>2];g[P>>2]=+g[Fe>>2]-+g[Ke>>2];g[xb>>2]=+g[_e>>2]-+g[ff>>2];g[Oc>>2]=+g[Mc>>2]-+g[Nc>>2];g[yb>>2]=+g[of>>2]-+g[we>>2];g[Rc>>2]=+g[Pc>>2]-+g[Qc>>2];g[Ec>>2]=+g[Cc>>2]+ +g[Dc>>2];g[Hc>>2]=+g[Fc>>2]+ +g[Gc>>2];g[Ic>>2]=+g[Ec>>2]+ +g[Hc>>2];g[Kc>>2]=(+g[Ec>>2]-+g[Hc>>2])*.55901700258255;g[gf>>2]=+g[_e>>2]+ +g[ff>>2];g[xe>>2]=+g[of>>2]+ +g[we>>2];g[ye>>2]=+g[gf>>2]+ +g[xe>>2];g[nb>>2]=(+g[gf>>2]-+g[xe>>2])*.55901700258255;g[Fd>>2]=+g[tc>>2]+ +g[uc>>2];g[Gd>>2]=+g[Xc>>2]+ +g[Yc>>2];g[Hd>>2]=+g[Fd>>2]+ +g[Gd>>2];g[Jd>>2]=(+g[Fd>>2]-+g[Gd>>2])*.55901700258255;g[_>>2]=+g[W>>2]+ +g[Z>>2];g[Ga>>2]=+g[ba>>2]+ +g[Fa>>2];g[Ha>>2]=+g[_>>2]+ +g[Ga>>2];g[vb>>2]=(+g[_>>2]-+g[Ga>>2])*.55901700258255;g[Ca>>2]=+g[ha>>2]+ +g[ma>>2];g[F>>2]=+g[sa>>2]+ +g[xa>>2];g[G>>2]=(+g[Ca>>2]-+g[F>>2])*.55901700258255;g[M>>2]=+g[Ca>>2]+ +g[F>>2];g[lc>>2]=+g[jc>>2]-+g[kc>>2];g[oc>>2]=+g[mc>>2]-+g[nc>>2];g[pc>>2]=+g[lc>>2]+ +g[oc>>2];g[rc>>2]=(+g[lc>>2]-+g[oc>>2])*.55901700258255;g[$b>>2]=+g[Mc>>2]+ +g[Nc>>2];g[ac>>2]=+g[Pc>>2]+ +g[Qc>>2];g[bc>>2]=+g[$b>>2]+ +g[ac>>2];g[dc>>2]=(+g[$b>>2]-+g[ac>>2])*.55901700258255;g[Le>>2]=+g[Fe>>2]+ +g[Ke>>2];g[v>>2]=+g[Qe>>2]+ +g[Ve>>2];g[w>>2]=(+g[Le>>2]-+g[v>>2])*.55901700258255;g[C>>2]=+g[Le>>2]+ +g[v>>2];g[ze>>2]=+g[te>>2]+ +g[ye>>2];g[La>>2]=+g[Ha>>2]+ +g[Ka>>2];g[Wb>>2]=+g[B>>2]+ +g[C>>2];g[wc>>2]=+g[M>>2]+ +g[L>>2];g[Vb>>2]=+g[(c[q>>2]|0)+32>>2];g[vc>>2]=+g[(c[q>>2]|0)+36>>2];g[xc>>2]=+g[Vb>>2]*+g[Wb>>2]+ +g[vc>>2]*+g[wc>>2];g[zc>>2]=+g[Vb>>2]*+g[wc>>2]-+g[vc>>2]*+g[Wb>>2];g[zb>>2]=+g[xb>>2]*.5877852439880371-+g[yb>>2]*.9510565400123596;g[eb>>2]=+g[xb>>2]*.9510565400123596+ +g[yb>>2]*.5877852439880371;g[ub>>2]=+g[Ka>>2]-+g[Ha>>2]*.25;g[wb>>2]=+g[ub>>2]-+g[vb>>2];g[fb>>2]=+g[vb>>2]+ +g[ub>>2];g[Ab>>2]=+g[wb>>2]-+g[zb>>2];g[Tb>>2]=+g[fb>>2]-+g[eb>>2];g[Oa>>2]=+g[zb>>2]+ +g[wb>>2];g[gb>>2]=+g[eb>>2]+ +g[fb>>2];g[R>>2]=+g[P>>2]*.9510565400123596+ +g[Q>>2]*.5877852439880371;g[Va>>2]=+g[P>>2]*.5877852439880371-+g[Q>>2]*.9510565400123596;g[N>>2]=+g[L>>2]-+g[M>>2]*.25;g[O>>2]=+g[G>>2]+ +g[N>>2];g[Wa>>2]=+g[N>>2]-+g[G>>2];g[S>>2]=+g[O>>2]-+g[R>>2];g[lb>>2]=+g[Wa>>2]-+g[Va>>2];g[Fb>>2]=+g[R>>2]+ +g[O>>2];g[Xa>>2]=+g[Va>>2]+ +g[Wa>>2];g[za>>2]=+g[na>>2]*.9510565400123596+ +g[ya>>2]*.5877852439880371;g[Sa>>2]=+g[na>>2]*.5877852439880371-+g[ya>>2]*.9510565400123596;g[D>>2]=+g[B>>2]-+g[C>>2]*.25;g[E>>2]=+g[w>>2]+ +g[D>>2];g[Ra>>2]=+g[D>>2]-+g[w>>2];g[Aa>>2]=+g[E>>2]+ +g[za>>2];g[jb>>2]=+g[Ra>>2]+ +g[Sa>>2];g[Db>>2]=+g[E>>2]-+g[za>>2];g[Ta>>2]=+g[Ra>>2]-+g[Sa>>2];g[rb>>2]=+g[pb>>2]*.5877852439880371-+g[qb>>2]*.9510565400123596;g[bb>>2]=+g[pb>>2]*.9510565400123596+ +g[qb>>2]*.5877852439880371;g[mb>>2]=+g[te>>2]-+g[ye>>2]*.25;g[ob>>2]=+g[mb>>2]-+g[nb>>2];g[ab>>2]=+g[nb>>2]+ +g[mb>>2];g[sb>>2]=+g[ob>>2]+ +g[rb>>2];g[Rb>>2]=+g[ab>>2]+ +g[bb>>2];g[Kb>>2]=+g[ob>>2]-+g[rb>>2];g[cb>>2]=+g[ab>>2]-+g[bb>>2];g[Ae>>2]=+g[c[q>>2]>>2];g[Ba>>2]=+g[(c[q>>2]|0)+4>>2];g[T>>2]=+g[Ae>>2]*+g[Aa>>2]+ +g[Ba>>2]*+g[S>>2];g[Ma>>2]=+g[Ae>>2]*+g[S>>2]-+g[Ba>>2]*+g[Aa>>2];g[c[m>>2]>>2]=+g[ze>>2]-+g[T>>2];g[c[n>>2]>>2]=+g[La>>2]+ +g[Ma>>2];g[c[o>>2]>>2]=+g[ze>>2]+ +g[T>>2];g[c[p>>2]>>2]=+g[Ma>>2]-+g[La>>2];g[Qb>>2]=+g[(c[q>>2]|0)+24>>2];g[Sb>>2]=+g[(c[q>>2]|0)+28>>2];g[Ub>>2]=+g[Qb>>2]*+g[Rb>>2]-+g[Sb>>2]*+g[Tb>>2];g[yc>>2]=+g[Sb>>2]*+g[Rb>>2]+ +g[Qb>>2]*+g[Tb>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ub>>2]-+g[xc>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[yc>>2]+ +g[zc>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ub>>2]+ +g[xc>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[zc>>2]-+g[yc>>2];g[Na>>2]=+g[(c[q>>2]|0)+56>>2];g[tb>>2]=+g[(c[q>>2]|0)+60>>2];g[Bb>>2]=+g[Na>>2]*+g[sb>>2]-+g[tb>>2]*+g[Ab>>2];g[Hb>>2]=+g[tb>>2]*+g[sb>>2]+ +g[Na>>2]*+g[Ab>>2];g[Cb>>2]=+g[(c[q>>2]|0)+64>>2];g[Eb>>2]=+g[(c[q>>2]|0)+68>>2];g[Gb>>2]=+g[Cb>>2]*+g[Db>>2]+ +g[Eb>>2]*+g[Fb>>2];g[Ib>>2]=+g[Cb>>2]*+g[Fb>>2]-+g[Eb>>2]*+g[Db>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Bb>>2]-+g[Gb>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Hb>>2]+ +g[Ib>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Bb>>2]+ +g[Gb>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ib>>2]-+g[Hb>>2];g[Jb>>2]=+g[(c[q>>2]|0)+88>>2];g[Lb>>2]=+g[(c[q>>2]|0)+92>>2];g[Pa>>2]=+g[Jb>>2]*+g[Kb>>2]-+g[Lb>>2]*+g[Oa>>2];g[Za>>2]=+g[Lb>>2]*+g[Kb>>2]+ +g[Jb>>2]*+g[Oa>>2];g[Qa>>2]=+g[(c[q>>2]|0)+96>>2];g[Ua>>2]=+g[(c[q>>2]|0)+100>>2];g[Ya>>2]=+g[Qa>>2]*+g[Ta>>2]+ +g[Ua>>2]*+g[Xa>>2];g[_a>>2]=+g[Qa>>2]*+g[Xa>>2]-+g[Ua>>2]*+g[Ta>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Pa>>2]-+g[Ya>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Za>>2]+ +g[_a>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Pa>>2]+ +g[Ya>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[_a>>2]-+g[Za>>2];g[$a>>2]=+g[(c[q>>2]|0)+120>>2];g[db>>2]=+g[(c[q>>2]|0)+124>>2];g[hb>>2]=+g[$a>>2]*+g[cb>>2]-+g[db>>2]*+g[gb>>2];g[Ob>>2]=+g[db>>2]*+g[cb>>2]+ +g[$a>>2]*+g[gb>>2];g[ib>>2]=+g[(c[q>>2]|0)+128>>2];g[kb>>2]=+g[(c[q>>2]|0)+132>>2];g[Nb>>2]=+g[ib>>2]*+g[jb>>2]+ +g[kb>>2]*+g[lb>>2];g[Pb>>2]=+g[ib>>2]*+g[lb>>2]-+g[kb>>2]*+g[jb>>2];g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[hb>>2]-+g[Nb>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Ob>>2]+ +g[Pb>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[hb>>2]+ +g[Nb>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Pb>>2]-+g[Ob>>2];g[gd>>2]=+g[Bc>>2]+ +g[Ic>>2];g[id>>2]=+g[bc>>2]+ +g[_b>>2];g[fd>>2]=+g[(c[q>>2]|0)+72>>2];g[hd>>2]=+g[(c[q>>2]|0)+76>>2];g[jd>>2]=+g[fd>>2]*+g[gd>>2]-+g[hd>>2]*+g[id>>2];g[td>>2]=+g[hd>>2]*+g[gd>>2]+ +g[fd>>2]*+g[id>>2];g[me>>2]=+g[ic>>2]+ +g[pc>>2];g[oe>>2]=+g[Hd>>2]+ +g[Ed>>2];g[le>>2]=+g[(c[q>>2]|0)+112>>2];g[ne>>2]=+g[(c[q>>2]|0)+116>>2];g[pe>>2]=+g[le>>2]*+g[me>>2]+ +g[ne>>2]*+g[oe>>2];g[re>>2]=+g[le>>2]*+g[oe>>2]-+g[ne>>2]*+g[me>>2];g[Sc>>2]=+g[Oc>>2]*.5877852439880371-+g[Rc>>2]*.9510565400123596;g[Rd>>2]=+g[Oc>>2]*.9510565400123596+ +g[Rc>>2]*.5877852439880371;g[Jc>>2]=+g[Bc>>2]-+g[Ic>>2]*.25;g[Lc>>2]=+g[Jc>>2]-+g[Kc>>2];g[Qd>>2]=+g[Kc>>2]+ +g[Jc>>2];g[Tc>>2]=+g[Lc>>2]-+g[Sc>>2];g[he>>2]=+g[Qd>>2]+ +g[Rd>>2];g[Sd>>2]=+g[Qd>>2]-+g[Rd>>2];g[wd>>2]=+g[Lc>>2]+ +g[Sc>>2];g[dd>>2]=+g[bd>>2]*.5877852439880371-+g[cd>>2]*.9510565400123596;g[qd>>2]=+g[bd>>2]*.9510565400123596+ +g[cd>>2]*.5877852439880371;g[Id>>2]=+g[Ed>>2]-+g[Hd>>2]*.25;g[Kd>>2]=+g[Id>>2]-+g[Jd>>2];g[pd>>2]=+g[Jd>>2]+ +g[Id>>2];g[Ld>>2]=+g[dd>>2]+ +g[Kd>>2];g[Dd>>2]=+g[qd>>2]+ +g[pd>>2];g[$d>>2]=+g[Kd>>2]-+g[dd>>2];g[rd>>2]=+g[pd>>2]-+g[qd>>2];g[_c>>2]=+g[Wc>>2]*.5877852439880371-+g[Zc>>2]*.9510565400123596;g[md>>2]=+g[Wc>>2]*.9510565400123596+ +g[Zc>>2]*.5877852439880371;g[qc>>2]=+g[ic>>2]-+g[pc>>2]*.25;g[sc>>2]=+g[qc>>2]-+g[rc>>2];g[ld>>2]=+g[rc>>2]+ +g[qc>>2];g[$c>>2]=+g[sc>>2]-+g[_c>>2];g[Bd>>2]=+g[ld>>2]-+g[md>>2];g[Zd>>2]=+g[sc>>2]+ +g[_c>>2];g[nd>>2]=+g[ld>>2]+ +g[md>>2];g[Zb>>2]=+g[Xb>>2]*.5877852439880371-+g[Yb>>2]*.9510565400123596;g[Ud>>2]=+g[Xb>>2]*.9510565400123596+ +g[Yb>>2]*.5877852439880371;g[cc>>2]=+g[_b>>2]-+g[bc>>2]*.25;g[ec>>2]=+g[cc>>2]-+g[dc>>2];g[Vd>>2]=+g[dc>>2]+ +g[cc>>2];g[fc>>2]=+g[Zb>>2]+ +g[ec>>2];g[je>>2]=+g[Vd>>2]-+g[Ud>>2];g[Wd>>2]=+g[Ud>>2]+ +g[Vd>>2];g[yd>>2]=+g[ec>>2]-+g[Zb>>2];g[kd>>2]=+g[(c[q>>2]|0)+80>>2];g[od>>2]=+g[(c[q>>2]|0)+84>>2];g[sd>>2]=+g[kd>>2]*+g[nd>>2]+ +g[od>>2]*+g[rd>>2];g[ud>>2]=+g[kd>>2]*+g[rd>>2]-+g[od>>2]*+g[nd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[jd>>2]-+g[sd>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[td>>2]+ +g[ud>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[jd>>2]+ +g[sd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[ud>>2]-+g[td>>2];g[ge>>2]=+g[(c[q>>2]|0)+104>>2];g[ie>>2]=+g[(c[q>>2]|0)+108>>2];g[ke>>2]=+g[ge>>2]*+g[he>>2]-+g[ie>>2]*+g[je>>2];g[qe>>2]=+g[ie>>2]*+g[he>>2]+ +g[ge>>2]*+g[je>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ke>>2]-+g[pe>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[qe>>2]+ +g[re>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ke>>2]+ +g[pe>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[re>>2]-+g[qe>>2];g[Ac>>2]=+g[(c[q>>2]|0)+8>>2];g[Uc>>2]=+g[(c[q>>2]|0)+12>>2];g[gc>>2]=+g[Ac>>2]*+g[Tc>>2]-+g[Uc>>2]*+g[fc>>2];g[Nd>>2]=+g[Uc>>2]*+g[Tc>>2]+ +g[Ac>>2]*+g[fc>>2];g[hc>>2]=+g[(c[q>>2]|0)+16>>2];g[ad>>2]=+g[(c[q>>2]|0)+20>>2];g[Md>>2]=+g[hc>>2]*+g[$c>>2]+ +g[ad>>2]*+g[Ld>>2];g[Od>>2]=+g[hc>>2]*+g[Ld>>2]-+g[ad>>2]*+g[$c>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[gc>>2]-+g[Md>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Nd>>2]+ +g[Od>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[gc>>2]+ +g[Md>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Od>>2]-+g[Nd>>2];g[Pd>>2]=+g[(c[q>>2]|0)+40>>2];g[Td>>2]=+g[(c[q>>2]|0)+44>>2];g[Xd>>2]=+g[Pd>>2]*+g[Sd>>2]-+g[Td>>2]*+g[Wd>>2];g[be>>2]=+g[Td>>2]*+g[Sd>>2]+ +g[Pd>>2]*+g[Wd>>2];g[Yd>>2]=+g[(c[q>>2]|0)+48>>2];g[_d>>2]=+g[(c[q>>2]|0)+52>>2];g[ae>>2]=+g[Yd>>2]*+g[Zd>>2]+ +g[_d>>2]*+g[$d>>2];g[ed>>2]=+g[Yd>>2]*+g[$d>>2]-+g[_d>>2]*+g[Zd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Xd>>2]-+g[ae>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[be>>2]+ +g[ed>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Xd>>2]+ +g[ae>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ed>>2]-+g[be>>2];g[vd>>2]=+g[(c[q>>2]|0)+136>>2];g[xd>>2]=+g[(c[q>>2]|0)+140>>2];g[zd>>2]=+g[vd>>2]*+g[wd>>2]-+g[xd>>2]*+g[yd>>2];g[ee>>2]=+g[xd>>2]*+g[wd>>2]+ +g[vd>>2]*+g[yd>>2];g[Ad>>2]=+g[(c[q>>2]|0)+144>>2];g[Cd>>2]=+g[(c[q>>2]|0)+148>>2];g[de>>2]=+g[Ad>>2]*+g[Bd>>2]+ +g[Cd>>2]*+g[Dd>>2];g[fe>>2]=+g[Ad>>2]*+g[Dd>>2]-+g[Cd>>2]*+g[Bd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[zd>>2]-+g[de>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[ee>>2]+ +g[fe>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[zd>>2]+ +g[de>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[fe>>2]-+g[ee>>2];c[vf>>2]=(c[vf>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+152;c[r>>2]=c[r>>2]^c[2998]}i=wf;return}function Vu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,68,9256,1);i=b;return}function Wu(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0,Ii=0,Ji=0,Ki=0,Li=0,Mi=0,Ni=0,Oi=0,Pi=0,Qi=0,Ri=0,Si=0,Ti=0,Ui=0,Vi=0,Wi=0,Xi=0,Yi=0,Zi=0,_i=0,$i=0,aj=0,bj=0,cj=0,dj=0,ej=0,fj=0,gj=0,hj=0,ij=0,jj=0,kj=0,lj=0,mj=0,nj=0,oj=0,pj=0,qj=0,rj=0,sj=0,tj=0,uj=0,vj=0,wj=0,xj=0,yj=0,zj=0,Aj=0,Bj=0,Cj=0,Dj=0,Ej=0,Fj=0,Gj=0,Hj=0,Ij=0,Jj=0,Kj=0,Lj=0,Mj=0,Nj=0,Oj=0,Pj=0,Qj=0,Rj=0;Qj=i;i=i+2320|0;m=Qj+2304|0;n=Qj+2300|0;o=Qj+2296|0;p=Qj+2292|0;q=Qj+2288|0;r=Qj+2284|0;Rj=Qj+2280|0;s=Qj+2276|0;t=Qj+2272|0;Pj=Qj+2240|0;uj=Qj+2236|0;Ed=Qj+2232|0;cg=Qj+2228|0;dh=Qj+2224|0;Mf=Qj+2220|0;ri=Qj+2216|0;ma=Qj+2212|0;Zd=Qj+2208|0;db=Qj+2204|0;pd=Qj+2200|0;pe=Qj+2196|0;Kg=Qj+2192|0;Xf=Qj+2188|0;_g=Qj+2184|0;Ic=Qj+2180|0;Ud=Qj+2176|0;oj=Qj+2172|0;Vb=Qj+2168|0;Bb=Qj+2164|0;fd=Qj+2160|0;Sg=Qj+2156|0;Xg=Qj+2152|0;Kb=Qj+2148|0;gd=Qj+2144|0;Le=Qj+2140|0;qf=Qj+2136|0;zf=Qj+2132|0;Lh=Qj+2128|0;Md=Qj+2124|0;Rd=Qj+2120|0;ug=Qj+2116|0;Kh=Qj+2112|0;$i=Qj+2108|0;Ob=Qj+2104|0;ba=Qj+2100|0;ae=Qj+2096|0;Pg=Qj+2092|0;Wg=Qj+2088|0;La=Qj+2084|0;be=Qj+2080|0;Ee=Qj+2076|0;pf=Qj+2072|0;qg=Qj+2068|0;Ih=Qj+2064|0;Jd=Qj+2060|0;Qd=Qj+2056|0;ng=Qj+2052|0;Hh=Qj+2048|0;Jj=Qj+2044|0;Td=Qj+2040|0;jg=Qj+2036|0;si=Qj+2032|0;Jf=Qj+2028|0;Fh=Qj+2024|0;L=Qj+2020|0;od=Qj+2016|0;Wa=Qj+2012|0;_d=Qj+2008|0;we=Qj+2004|0;Zg=Qj+2e3|0;uf=Qj+1996|0;Lg=Qj+1992|0;Bc=Qj+1988|0;Fd=Qj+1984|0;Mb=Qj+1980|0;_a=Qj+1976|0;B=Qj+1972|0;Fc=Qj+1968|0;mf=Qj+1964|0;y=Qj+1960|0;bb=Qj+1956|0;Gc=Qj+1952|0;sj=Qj+1948|0;Dc=Qj+1944|0;ka=Qj+1940|0;Ya=Qj+1936|0;pj=Qj+1932|0;Cc=Qj+1928|0;fa=Qj+1924|0;Xa=Qj+1920|0;u=Qj+1916|0;Da=Qj+1912|0;$a=Qj+1908|0;ab=Qj+1904|0;z=Qj+1900|0;A=Qj+1896|0;Vc=Qj+1892|0;ce=Qj+1888|0;qj=Qj+1884|0;rj=Qj+1880|0;ga=Qj+1876|0;ha=Qj+1872|0;ia=Qj+1868|0;ja=Qj+1864|0;Eh=Qj+1860|0;Ni=Qj+1856|0;D=Qj+1852|0;E=Qj+1848|0;da=Qj+1844|0;ea=Qj+1840|0;vg=Qj+1836|0;tj=Qj+1832|0;ag=Qj+1828|0;bg=Qj+1824|0;Kf=Qj+1820|0;Lf=Qj+1816|0;C=Qj+1812|0;la=Qj+1808|0;Za=Qj+1804|0;cb=Qj+1800|0;ne=Qj+1796|0;oe=Qj+1792|0;vf=Qj+1788|0;wf=Qj+1784|0;Ec=Qj+1780|0;Hc=Qj+1776|0;gj=Qj+1772|0;Fe=Qj+1768|0;Na=Qj+1764|0;Fb=Qj+1760|0;ob=Qj+1756|0;Je=Qj+1752|0;Ib=Qj+1748|0;Ub=Qj+1744|0;nj=Qj+1740|0;Ge=Qj+1736|0;Ie=Qj+1732|0;ub=Qj+1728|0;zb=Qj+1724|0;Db=Qj+1720|0;Rb=Qj+1716|0;Cb=Qj+1712|0;xf=Qj+1708|0;yf=Qj+1704|0;aj=Qj+1700|0;bj=Qj+1696|0;cj=Qj+1692|0;dj=Qj+1688|0;ej=Qj+1684|0;fj=Qj+1680|0;mb=Qj+1676|0;nb=Qj+1672|0;Sb=Qj+1668|0;Gb=Qj+1664|0;Hb=Qj+1660|0;Tb=Qj+1656|0;jj=Qj+1652|0;qb=Qj+1648|0;tb=Qj+1644|0;Pb=Qj+1640|0;mj=Qj+1636|0;vb=Qj+1632|0;yb=Qj+1628|0;Qb=Qj+1624|0;hj=Qj+1620|0;ij=Qj+1616|0;rb=Qj+1612|0;sb=Qj+1608|0;kj=Qj+1604|0;lj=Qj+1600|0;wb=Qj+1596|0;xb=Qj+1592|0;pb=Qj+1588|0;Ab=Qj+1584|0;Qg=Qj+1580|0;Rg=Qj+1576|0;Eb=Qj+1572|0;Jb=Qj+1568|0;He=Qj+1564|0;Ke=Qj+1560|0;Kd=Qj+1556|0;Ld=Qj+1552|0;sg=Qj+1548|0;tg=Qj+1544|0;Ti=Qj+1540|0;ye=Qj+1536|0;N=Qj+1532|0;Ga=Qj+1528|0;Q=Qj+1524|0;Ce=Qj+1520|0;Ja=Qj+1516|0;Nb=Qj+1512|0;_i=Qj+1508|0;ze=Qj+1504|0;Be=Qj+1500|0;W=Qj+1496|0;$=Qj+1492|0;Ea=Qj+1488|0;jb=Qj+1484|0;ca=Qj+1480|0;og=Qj+1476|0;pg=Qj+1472|0;Lj=Qj+1468|0;Mj=Qj+1464|0;Nj=Qj+1460|0;Oj=Qj+1456|0;Ri=Qj+1452|0;Si=Qj+1448|0;O=Qj+1444|0;P=Qj+1440|0;kb=Qj+1436|0;Ha=Qj+1432|0;Ia=Qj+1428|0;lb=Qj+1424|0;Wi=Qj+1420|0;S=Qj+1416|0;V=Qj+1412|0;hb=Qj+1408|0;Zi=Qj+1404|0;X=Qj+1400|0;_=Qj+1396|0;ib=Qj+1392|0;Ui=Qj+1388|0;Vi=Qj+1384|0;T=Qj+1380|0;U=Qj+1376|0;Xi=Qj+1372|0;Yi=Qj+1368|0;Y=Qj+1364|0;Z=Qj+1360|0;R=Qj+1356|0;aa=Qj+1352|0;Ng=Qj+1348|0;Og=Qj+1344|0;Fa=Qj+1340|0;Ka=Qj+1336|0;Ae=Qj+1332|0;De=Qj+1328|0;Hd=Qj+1324|0;Id=Qj+1320|0;lg=Qj+1316|0;mg=Qj+1312|0;xj=Qj+1308|0;vc=Qj+1304|0;Aj=Qj+1300|0;wc=Qj+1296|0;ra=Qj+1292|0;wa=Qj+1288|0;eg=Qj+1284|0;dg=Qj+1280|0;re=Qj+1276|0;qe=Qj+1272|0;Ej=Qj+1268|0;yc=Qj+1264|0;Hj=Qj+1260|0;zc=Qj+1256|0;Ca=Qj+1252|0;J=Qj+1248|0;hg=Qj+1244|0;gg=Qj+1240|0;ue=Qj+1236|0;te=Qj+1232|0;sa=Qj+1228|0;qa=Qj+1224|0;na=Qj+1220|0;va=Qj+1216|0;vj=Qj+1212|0;wj=Qj+1208|0;oa=Qj+1204|0;pa=Qj+1200|0;yj=Qj+1196|0;zj=Qj+1192|0;ta=Qj+1188|0;ua=Qj+1184|0;F=Qj+1180|0;Ba=Qj+1176|0;ya=Qj+1172|0;I=Qj+1168|0;Cj=Qj+1164|0;Dj=Qj+1160|0;za=Qj+1156|0;Aa=Qj+1152|0;Fj=Qj+1148|0;Gj=Qj+1144|0;G=Qj+1140|0;H=Qj+1136|0;Bj=Qj+1132|0;Ij=Qj+1128|0;fg=Qj+1124|0;ig=Qj+1120|0;Hf=Qj+1116|0;If=Qj+1112|0;xa=Qj+1108|0;K=Qj+1104|0;Ua=Qj+1100|0;Va=Qj+1096|0;se=Qj+1092|0;ve=Qj+1088|0;sf=Qj+1084|0;tf=Qj+1080|0;xc=Qj+1076|0;Ac=Qj+1072|0;w=Qj+1068|0;Nc=Qj+1064|0;Kc=Qj+1060|0;Pc=Qj+1056|0;fc=Qj+1052|0;Wc=Qj+1048|0;bc=Qj+1044|0;tc=Qj+1040|0;fb=Qj+1036|0;Zc=Qj+1032|0;Uc=Qj+1028|0;kc=Qj+1024|0;Pa=Qj+1020|0;$c=Qj+1016|0;Sc=Qj+1012|0;oc=Qj+1008|0;Kj=Qj+1004|0;v=Qj+1e3|0;$b=Qj+996|0;ac=Qj+992|0;Wb=Qj+988|0;Jc=Qj+984|0;dc=Qj+980|0;ec=Qj+976|0;eb=Qj+972|0;ic=Qj+968|0;Ta=Qj+964|0;jc=Qj+960|0;Ra=Qj+956|0;Sa=Qj+952|0;M=Qj+948|0;nc=Qj+944|0;Oa=Qj+940|0;mc=Qj+936|0;Ma=Qj+932|0;Lb=Qj+928|0;gb=Qj+924|0;Lc=Qj+920|0;x=Qj+916|0;Qa=Qj+912|0;Xc=Qj+908|0;bd=Qj+904|0;ad=Qj+900|0;cd=Qj+896|0;sc=Qj+892|0;uc=Qj+888|0;Yc=Qj+884|0;_c=Qj+880|0;Qc=Qj+876|0;Yb=Qj+872|0;Xb=Qj+868|0;Zb=Qj+864|0;Mc=Qj+860|0;Oc=Qj+856|0;Rc=Qj+852|0;Tc=Qj+848|0;gc=Qj+844|0;qc=Qj+840|0;pc=Qj+836|0;rc=Qj+832|0;_b=Qj+828|0;cc=Qj+824|0;hc=Qj+820|0;lc=Qj+816|0;Ug=Qj+812|0;zi=Qj+808|0;ah=Qj+804|0;Bi=Qj+800|0;Th=Qj+796|0;hi=Qj+792|0;Ph=Qj+788|0;fi=Qj+784|0;ui=Qj+780|0;ki=Qj+776|0;Gi=Qj+772|0;Yh=Qj+768|0;Oh=Qj+764|0;mi=Qj+760|0;Ei=Qj+756|0;ai=Qj+752|0;Mg=Qj+748|0;Tg=Qj+744|0;Li=Qj+740|0;Mi=Qj+736|0;Yg=Qj+732|0;$g=Qj+728|0;Rh=Qj+724|0;Sh=Qj+720|0;ti=Qj+716|0;Wh=Qj+712|0;qi=Qj+708|0;Xh=Qj+704|0;oi=Qj+700|0;pi=Qj+696|0;Gh=Qj+692|0;_h=Qj+688|0;Nh=Qj+684|0;$h=Qj+680|0;Jh=Qj+676|0;Mh=Qj+672|0;bh=Qj+668|0;wi=Qj+664|0;vi=Qj+660|0;xi=Qj+656|0;Jg=Qj+652|0;Vg=Qj+648|0;ch=Qj+644|0;ni=Qj+640|0;ii=Qj+636|0;Pi=Qj+632|0;Oi=Qj+628|0;Qi=Qj+624|0;ei=Qj+620|0;gi=Qj+616|0;ji=Qj+612|0;li=Qj+608|0;Ci=Qj+604|0;Ii=Qj+600|0;Hi=Qj+596|0;Ji=Qj+592|0;yi=Qj+588|0;Ai=Qj+584|0;Di=Qj+580|0;Fi=Qj+576|0;Uh=Qj+572|0;ci=Qj+568|0;bi=Qj+564|0;di=Qj+560|0;Ki=Qj+556|0;Qh=Qj+552|0;Vh=Qj+548|0;Zh=Qj+544|0;Od=Qj+540|0;wd=Qj+536|0;Wd=Qj+532|0;yd=Qj+528|0;Ne=Qj+524|0;bf=Qj+520|0;je=Qj+516|0;$e=Qj+512|0;rd=Qj+508|0;ef=Qj+504|0;Dd=Qj+500|0;Se=Qj+496|0;jd=Qj+492|0;gf=Qj+488|0;Bd=Qj+484|0;We=Qj+480|0;Gd=Qj+476|0;Nd=Qj+472|0;he=Qj+468|0;ie=Qj+464|0;Sd=Qj+460|0;Vd=Qj+456|0;le=Qj+452|0;me=Qj+448|0;qd=Qj+444|0;Qe=Qj+440|0;nd=Qj+436|0;Re=Qj+432|0;ld=Qj+428|0;md=Qj+424|0;$d=Qj+420|0;Ve=Qj+416|0;id=Qj+412|0;Ue=Qj+408|0;ed=Qj+404|0;hd=Qj+400|0;Xd=Qj+396|0;td=Qj+392|0;sd=Qj+388|0;ud=Qj+384|0;dd=Qj+380|0;Pd=Qj+376|0;Yd=Qj+372|0;kd=Qj+368|0;cf=Qj+364|0;jf=Qj+360|0;hf=Qj+356|0;kf=Qj+352|0;_e=Qj+348|0;af=Qj+344|0;df=Qj+340|0;ff=Qj+336|0;zd=Qj+332|0;ee=Qj+328|0;de=Qj+324|0;fe=Qj+320|0;vd=Qj+316|0;xd=Qj+312|0;Ad=Qj+308|0;Cd=Qj+304|0;Oe=Qj+300|0;Ye=Qj+296|0;Xe=Qj+292|0;Ze=Qj+288|0;ge=Qj+284|0;ke=Qj+280|0;Pe=Qj+276|0;Te=Qj+272|0;nf=Qj+268|0;Tf=Qj+264|0;Zf=Qj+260|0;Vf=Qj+256|0;ih=Qj+252|0;yh=Qj+248|0;eh=Qj+244|0;wh=Qj+240|0;Of=Qj+236|0;Bh=Qj+232|0;zg=Qj+228|0;nh=Qj+224|0;Cf=Qj+220|0;Dh=Qj+216|0;xg=Qj+212|0;rh=Qj+208|0;xe=Qj+204|0;Me=Qj+200|0;Eg=Qj+196|0;Fg=Qj+192|0;rf=Qj+188|0;Yf=Qj+184|0;gh=Qj+180|0;hh=Qj+176|0;Nf=Qj+172|0;lh=Qj+168|0;Gf=Qj+164|0;mh=Qj+160|0;Ef=Qj+156|0;Ff=Qj+152|0;kg=Qj+148|0;qh=Qj+144|0;Bf=Qj+140|0;ph=Qj+136|0;rg=Qj+132|0;Af=Qj+128|0;_f=Qj+124|0;Qf=Qj+120|0;Pf=Qj+116|0;Rf=Qj+112|0;lf=Qj+108|0;of=Qj+104|0;$f=Qj+100|0;Df=Qj+96|0;zh=Qj+92|0;Hg=Qj+88|0;Gg=Qj+84|0;Ig=Qj+80|0;vh=Qj+76|0;xh=Qj+72|0;Ah=Qj+68|0;Ch=Qj+64|0;Wf=Qj+60|0;Bg=Qj+56|0;Ag=Qj+52|0;Cg=Qj+48|0;Sf=Qj+44|0;Uf=Qj+40|0;wg=Qj+36|0;yg=Qj+32|0;jh=Qj+28|0;th=Qj+24|0;sh=Qj+20|0;uh=Qj+16|0;Dg=Qj+12|0;fh=Qj+8|0;kh=Qj+4|0;oh=Qj;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Rj>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Qj+2268>>2]=.8314695954322815;g[Qj+2264>>2]=.5555702447891235;g[Qj+2260>>2]=.19509032368659973;g[Qj+2256>>2]=.9807852506637573;g[Qj+2252>>2]=.9238795042037964;g[Qj+2248>>2]=.3826834261417389;g[Qj+2244>>2]=.7071067690849304;c[Pj>>2]=c[Rj>>2];c[q>>2]=(c[q>>2]|0)+(((c[Rj>>2]|0)-1|0)*62<<2);while(1){if((c[Pj>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[Mb>>2]=+g[u>>2]+ +g[Da>>2];g[_a>>2]=+g[u>>2]-+g[Da>>2];g[z>>2]=+g[c[n>>2]>>2];g[A>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[Fc>>2]=+g[z>>2]-+g[A>>2];g[Vc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2];g[ce>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[mf>>2]=+g[Vc>>2]+ +g[ce>>2];g[y>>2]=+g[Vc>>2]-+g[ce>>2];g[$a>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[ab>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[bb>>2]=+g[$a>>2]+ +g[ab>>2];g[Gc>>2]=+g[$a>>2]-+g[ab>>2];g[qj>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[rj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[ga>>2]=+g[qj>>2]-+g[rj>>2];g[ha>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ia>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[ja>>2]=+g[ha>>2]+ +g[ia>>2];g[sj>>2]=+g[qj>>2]+ +g[rj>>2];g[Dc>>2]=+g[ia>>2]-+g[ha>>2];g[ka>>2]=+g[ga>>2]+ +g[ja>>2];g[Ya>>2]=+g[ga>>2]-+g[ja>>2];g[Eh>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ni>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[D>>2]=+g[Eh>>2]-+g[Ni>>2];g[E>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[da>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[ea>>2]=+g[E>>2]+ +g[da>>2];g[pj>>2]=+g[Eh>>2]+ +g[Ni>>2];g[Cc>>2]=+g[E>>2]-+g[da>>2];g[fa>>2]=+g[D>>2]+ +g[ea>>2];g[Xa>>2]=+g[D>>2]-+g[ea>>2];g[vg>>2]=+g[Mb>>2]+ +g[mf>>2];g[tj>>2]=+g[pj>>2]+ +g[sj>>2];g[uj>>2]=+g[vg>>2]+ +g[tj>>2];g[Ed>>2]=+g[vg>>2]-+g[tj>>2];g[ag>>2]=+g[B>>2]-+g[y>>2];g[bg>>2]=(+g[Xa>>2]-+g[Ya>>2])*.7071067690849304;g[cg>>2]=+g[ag>>2]+ +g[bg>>2];g[dh>>2]=+g[ag>>2]-+g[bg>>2];g[Kf>>2]=+g[_a>>2]+ +g[bb>>2];g[Lf>>2]=(+g[fa>>2]+ +g[ka>>2])*.7071067690849304;g[Mf>>2]=+g[Kf>>2]-+g[Lf>>2];g[ri>>2]=+g[Lf>>2]+ +g[Kf>>2];g[C>>2]=+g[y>>2]+ +g[B>>2];g[la>>2]=(+g[fa>>2]-+g[ka>>2])*.7071067690849304;g[ma>>2]=+g[C>>2]+ +g[la>>2];g[Zd>>2]=+g[C>>2]-+g[la>>2];g[Za>>2]=(+g[Xa>>2]+ +g[Ya>>2])*.7071067690849304;g[cb>>2]=+g[_a>>2]-+g[bb>>2];g[db>>2]=+g[Za>>2]+ +g[cb>>2];g[pd>>2]=+g[cb>>2]-+g[Za>>2];g[ne>>2]=+g[Mb>>2]-+g[mf>>2];g[oe>>2]=+g[Dc>>2]-+g[Cc>>2];g[pe>>2]=+g[ne>>2]+ +g[oe>>2];g[Kg>>2]=+g[ne>>2]-+g[oe>>2];g[vf>>2]=+g[pj>>2]-+g[sj>>2];g[wf>>2]=+g[Fc>>2]-+g[Gc>>2];g[Xf>>2]=+g[vf>>2]+ +g[wf>>2];g[_g>>2]=+g[wf>>2]-+g[vf>>2];g[Ec>>2]=+g[Cc>>2]+ +g[Dc>>2];g[Hc>>2]=+g[Fc>>2]+ +g[Gc>>2];g[Ic>>2]=+g[Ec>>2]+ +g[Hc>>2];g[Ud>>2]=+g[Hc>>2]-+g[Ec>>2];g[aj>>2]=+g[c[o>>2]>>2];g[bj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[cj>>2]=+g[aj>>2]+ +g[bj>>2];g[dj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ej>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[fj>>2]=+g[dj>>2]+ +g[ej>>2];g[gj>>2]=+g[cj>>2]+ +g[fj>>2];g[Fe>>2]=+g[cj>>2]-+g[fj>>2];g[Na>>2]=+g[dj>>2]-+g[ej>>2];g[Fb>>2]=+g[aj>>2]-+g[bj>>2];g[mb>>2]=+g[c[p>>2]>>2];g[nb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[Sb>>2]=+g[nb>>2]-+g[mb>>2];g[Gb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Hb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Tb>>2]=+g[Gb>>2]-+g[Hb>>2];g[ob>>2]=+g[mb>>2]+ +g[nb>>2];g[Je>>2]=+g[Sb>>2]-+g[Tb>>2];g[Ib>>2]=+g[Gb>>2]+ +g[Hb>>2];g[Ub>>2]=+g[Sb>>2]+ +g[Tb>>2];g[hj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ij>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[jj>>2]=+g[hj>>2]+ +g[ij>>2];g[qb>>2]=+g[hj>>2]-+g[ij>>2];g[rb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[sb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[tb>>2]=+g[rb>>2]+ +g[sb>>2];g[Pb>>2]=+g[rb>>2]-+g[sb>>2];g[kj>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[lj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[mj>>2]=+g[kj>>2]+ +g[lj>>2];g[vb>>2]=+g[kj>>2]-+g[lj>>2];g[wb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[xb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[yb>>2]=+g[wb>>2]+ +g[xb>>2];g[Qb>>2]=+g[xb>>2]-+g[wb>>2];g[nj>>2]=+g[jj>>2]+ +g[mj>>2];g[Ge>>2]=+g[Qb>>2]-+g[Pb>>2];g[Ie>>2]=+g[jj>>2]-+g[mj>>2];g[ub>>2]=+g[qb>>2]+ +g[tb>>2];g[zb>>2]=+g[vb>>2]+ +g[yb>>2];g[Db>>2]=+g[vb>>2]-+g[yb>>2];g[Rb>>2]=+g[Pb>>2]+ +g[Qb>>2];g[Cb>>2]=+g[qb>>2]-+g[tb>>2];g[oj>>2]=+g[gj>>2]+ +g[nj>>2];g[Vb>>2]=+g[Rb>>2]+ +g[Ub>>2];g[pb>>2]=+g[Na>>2]-+g[ob>>2];g[Ab>>2]=(+g[ub>>2]-+g[zb>>2])*.7071067690849304;g[Bb>>2]=+g[pb>>2]+ +g[Ab>>2];g[fd>>2]=+g[pb>>2]-+g[Ab>>2];g[Qg>>2]=+g[Fe>>2]-+g[Ge>>2];g[Rg>>2]=+g[Je>>2]-+g[Ie>>2];g[Sg>>2]=+g[Qg>>2]*.3826834261417389+ +g[Rg>>2]*.9238795042037964;g[Xg>>2]=+g[Rg>>2]*.3826834261417389-+g[Qg>>2]*.9238795042037964;g[Eb>>2]=(+g[Cb>>2]+ +g[Db>>2])*.7071067690849304;g[Jb>>2]=+g[Fb>>2]-+g[Ib>>2];g[Kb>>2]=+g[Eb>>2]+ +g[Jb>>2];g[gd>>2]=+g[Jb>>2]-+g[Eb>>2];g[He>>2]=+g[Fe>>2]+ +g[Ge>>2];g[Ke>>2]=+g[Ie>>2]+ +g[Je>>2];g[Le>>2]=+g[He>>2]*.9238795042037964+ +g[Ke>>2]*.3826834261417389;g[qf>>2]=+g[Ke>>2]*.9238795042037964-+g[He>>2]*.3826834261417389;g[xf>>2]=+g[Fb>>2]+ +g[Ib>>2];g[yf>>2]=(+g[ub>>2]+ +g[zb>>2])*.7071067690849304;g[zf>>2]=+g[xf>>2]-+g[yf>>2];g[Lh>>2]=+g[yf>>2]+ +g[xf>>2];g[Kd>>2]=+g[gj>>2]-+g[nj>>2];g[Ld>>2]=+g[Ub>>2]-+g[Rb>>2];g[Md>>2]=+g[Kd>>2]+ +g[Ld>>2];g[Rd>>2]=+g[Ld>>2]-+g[Kd>>2];g[sg>>2]=(+g[Cb>>2]-+g[Db>>2])*.7071067690849304;g[tg>>2]=+g[Na>>2]+ +g[ob>>2];g[ug>>2]=+g[sg>>2]-+g[tg>>2];g[Kh>>2]=+g[tg>>2]+ +g[sg>>2];g[Lj>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Mj>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Nj>>2]=+g[Lj>>2]+ +g[Mj>>2];g[Oj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Ri>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Si>>2]=+g[Oj>>2]+ +g[Ri>>2];g[Ti>>2]=+g[Nj>>2]+ +g[Si>>2];g[ye>>2]=+g[Nj>>2]-+g[Si>>2];g[N>>2]=+g[Oj>>2]-+g[Ri>>2];g[Ga>>2]=+g[Lj>>2]-+g[Mj>>2];g[O>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[P>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[kb>>2]=+g[O>>2]-+g[P>>2];g[Ha>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Ia>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[lb>>2]=+g[Ha>>2]-+g[Ia>>2];g[Q>>2]=+g[O>>2]+ +g[P>>2];g[Ce>>2]=+g[kb>>2]-+g[lb>>2];g[Ja>>2]=+g[Ha>>2]+ +g[Ia>>2];g[Nb>>2]=+g[kb>>2]+ +g[lb>>2];g[Ui>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Vi>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Wi>>2]=+g[Ui>>2]+ +g[Vi>>2];g[S>>2]=+g[Ui>>2]-+g[Vi>>2];g[T>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[U>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[V>>2]=+g[T>>2]+ +g[U>>2];g[hb>>2]=+g[T>>2]-+g[U>>2];g[Xi>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Yi>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[Zi>>2]=+g[Xi>>2]+ +g[Yi>>2];g[X>>2]=+g[Xi>>2]-+g[Yi>>2];g[Y>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Z>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[_>>2]=+g[Y>>2]+ +g[Z>>2];g[ib>>2]=+g[Z>>2]-+g[Y>>2];g[_i>>2]=+g[Wi>>2]+ +g[Zi>>2];g[ze>>2]=+g[ib>>2]-+g[hb>>2];g[Be>>2]=+g[Wi>>2]-+g[Zi>>2];g[W>>2]=+g[S>>2]+ +g[V>>2];g[$>>2]=+g[X>>2]+ +g[_>>2];g[Ea>>2]=+g[X>>2]-+g[_>>2];g[jb>>2]=+g[hb>>2]+ +g[ib>>2];g[ca>>2]=+g[S>>2]-+g[V>>2];g[$i>>2]=+g[Ti>>2]+ +g[_i>>2];g[Ob>>2]=+g[jb>>2]+ +g[Nb>>2];g[R>>2]=+g[N>>2]+ +g[Q>>2];g[aa>>2]=(+g[W>>2]-+g[$>>2])*.7071067690849304;g[ba>>2]=+g[R>>2]+ +g[aa>>2];g[ae>>2]=+g[R>>2]-+g[aa>>2];g[Ng>>2]=+g[ye>>2]-+g[ze>>2];g[Og>>2]=+g[Ce>>2]-+g[Be>>2];g[Pg>>2]=+g[Ng>>2]*.3826834261417389-+g[Og>>2]*.9238795042037964;g[Wg>>2]=+g[Ng>>2]*.9238795042037964+ +g[Og>>2]*.3826834261417389;g[Fa>>2]=(+g[ca>>2]+ +g[Ea>>2])*.7071067690849304;g[Ka>>2]=+g[Ga>>2]-+g[Ja>>2];g[La>>2]=+g[Fa>>2]+ +g[Ka>>2];g[be>>2]=+g[Ka>>2]-+g[Fa>>2];g[Ae>>2]=+g[ye>>2]+ +g[ze>>2];g[De>>2]=+g[Be>>2]+ +g[Ce>>2];g[Ee>>2]=+g[Ae>>2]*.9238795042037964-+g[De>>2]*.3826834261417389;g[pf>>2]=+g[Ae>>2]*.3826834261417389+ +g[De>>2]*.9238795042037964;g[og>>2]=+g[Ga>>2]+ +g[Ja>>2];g[pg>>2]=(+g[W>>2]+ +g[$>>2])*.7071067690849304;g[qg>>2]=+g[og>>2]-+g[pg>>2];g[Ih>>2]=+g[pg>>2]+ +g[og>>2];g[Hd>>2]=+g[Ti>>2]-+g[_i>>2];g[Id>>2]=+g[Nb>>2]-+g[jb>>2];g[Jd>>2]=+g[Hd>>2]-+g[Id>>2];g[Qd>>2]=+g[Hd>>2]+ +g[Id>>2];g[lg>>2]=+g[Q>>2]-+g[N>>2];g[mg>>2]=(+g[ca>>2]-+g[Ea>>2])*.7071067690849304;g[ng>>2]=+g[lg>>2]+ +g[mg>>2];g[Hh>>2]=+g[lg>>2]-+g[mg>>2];g[vj>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[wj>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[xj>>2]=+g[vj>>2]+ +g[wj>>2];g[sa>>2]=+g[vj>>2]-+g[wj>>2];g[oa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[pa>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[qa>>2]=+g[oa>>2]+ +g[pa>>2];g[vc>>2]=+g[oa>>2]-+g[pa>>2];g[yj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[zj>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Aj>>2]=+g[yj>>2]+ +g[zj>>2];g[na>>2]=+g[yj>>2]-+g[zj>>2];g[ta>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[ua>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[va>>2]=+g[ta>>2]+ +g[ua>>2];g[wc>>2]=+g[ta>>2]-+g[ua>>2];g[ra>>2]=+g[na>>2]+ +g[qa>>2];g[wa>>2]=+g[sa>>2]-+g[va>>2];g[eg>>2]=+g[sa>>2]+ +g[va>>2];g[dg>>2]=+g[qa>>2]-+g[na>>2];g[re>>2]=+g[vc>>2]-+g[wc>>2];g[qe>>2]=+g[xj>>2]-+g[Aj>>2];g[Cj>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Dj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Ej>>2]=+g[Cj>>2]+ +g[Dj>>2];g[F>>2]=+g[Cj>>2]-+g[Dj>>2];g[za>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Aa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Ba>>2]=+g[za>>2]+ +g[Aa>>2];g[yc>>2]=+g[Aa>>2]-+g[za>>2];g[Fj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Gj>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Hj>>2]=+g[Fj>>2]+ +g[Gj>>2];g[ya>>2]=+g[Fj>>2]-+g[Gj>>2];g[G>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[H>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[I>>2]=+g[G>>2]+ +g[H>>2];g[zc>>2]=+g[G>>2]-+g[H>>2];g[Ca>>2]=+g[ya>>2]-+g[Ba>>2];g[J>>2]=+g[F>>2]-+g[I>>2];g[hg>>2]=+g[F>>2]+ +g[I>>2];g[gg>>2]=+g[ya>>2]+ +g[Ba>>2];g[ue>>2]=+g[yc>>2]-+g[zc>>2];g[te>>2]=+g[Ej>>2]-+g[Hj>>2];g[Bj>>2]=+g[xj>>2]+ +g[Aj>>2];g[Ij>>2]=+g[Ej>>2]+ +g[Hj>>2];g[Jj>>2]=+g[Bj>>2]+ +g[Ij>>2];g[Td>>2]=+g[Bj>>2]-+g[Ij>>2];g[fg>>2]=+g[dg>>2]*.3826834261417389+ +g[eg>>2]*.9238795042037964;g[ig>>2]=+g[gg>>2]*.3826834261417389+ +g[hg>>2]*.9238795042037964;g[jg>>2]=+g[fg>>2]-+g[ig>>2];g[si>>2]=+g[fg>>2]+ +g[ig>>2];g[Hf>>2]=+g[eg>>2]*.3826834261417389-+g[dg>>2]*.9238795042037964;g[If>>2]=+g[hg>>2]*.3826834261417389-+g[gg>>2]*.9238795042037964;g[Jf>>2]=+g[Hf>>2]+ +g[If>>2];g[Fh>>2]=+g[Hf>>2]-+g[If>>2];g[xa>>2]=+g[ra>>2]*.9238795042037964+ +g[wa>>2]*.3826834261417389;g[K>>2]=+g[Ca>>2]*.9238795042037964-+g[J>>2]*.3826834261417389;g[L>>2]=+g[xa>>2]+ +g[K>>2];g[od>>2]=+g[K>>2]-+g[xa>>2];g[Ua>>2]=+g[wa>>2]*.9238795042037964-+g[ra>>2]*.3826834261417389;g[Va>>2]=+g[Ca>>2]*.3826834261417389+ +g[J>>2]*.9238795042037964;g[Wa>>2]=+g[Ua>>2]+ +g[Va>>2];g[_d>>2]=+g[Ua>>2]-+g[Va>>2];g[se>>2]=+g[qe>>2]-+g[re>>2];g[ve>>2]=+g[te>>2]+ +g[ue>>2];g[we>>2]=(+g[se>>2]+ +g[ve>>2])*.7071067690849304;g[Zg>>2]=(+g[se>>2]-+g[ve>>2])*.7071067690849304;g[sf>>2]=+g[qe>>2]+ +g[re>>2];g[tf>>2]=+g[ue>>2]-+g[te>>2];g[uf>>2]=(+g[sf>>2]+ +g[tf>>2])*.7071067690849304;g[Lg>>2]=(+g[tf>>2]-+g[sf>>2])*.7071067690849304;g[xc>>2]=+g[vc>>2]+ +g[wc>>2];g[Ac>>2]=+g[yc>>2]+ +g[zc>>2];g[Bc>>2]=+g[xc>>2]+ +g[Ac>>2];g[Fd>>2]=+g[Ac>>2]-+g[xc>>2];g[Kj>>2]=+g[uj>>2]+ +g[Jj>>2];g[v>>2]=+g[$i>>2]+ +g[oj>>2];g[w>>2]=+g[Kj>>2]+ +g[v>>2];g[Nc>>2]=+g[Kj>>2]-+g[v>>2];g[Wb>>2]=+g[Ob>>2]+ +g[Vb>>2];g[Jc>>2]=+g[Bc>>2]+ +g[Ic>>2];g[Kc>>2]=+g[Wb>>2]+ +g[Jc>>2];g[Pc>>2]=+g[Jc>>2]-+g[Wb>>2];g[dc>>2]=+g[$i>>2]-+g[oj>>2];g[ec>>2]=+g[Ic>>2]-+g[Bc>>2];g[fc>>2]=+g[dc>>2]+ +g[ec>>2];g[Wc>>2]=+g[ec>>2]-+g[dc>>2];g[$b>>2]=+g[uj>>2]-+g[Jj>>2];g[ac>>2]=+g[Vb>>2]-+g[Ob>>2];g[bc>>2]=+g[$b>>2]+ +g[ac>>2];g[tc>>2]=+g[$b>>2]-+g[ac>>2];g[eb>>2]=+g[Wa>>2]+ +g[db>>2];g[ic>>2]=+g[ma>>2]-+g[L>>2];g[Ra>>2]=+g[La>>2]*.9807852506637573-+g[ba>>2]*.19509032368659973;g[Sa>>2]=+g[Bb>>2]*.19509032368659973+ +g[Kb>>2]*.9807852506637573;g[Ta>>2]=+g[Ra>>2]+ +g[Sa>>2];g[jc>>2]=+g[Ra>>2]-+g[Sa>>2];g[fb>>2]=+g[Ta>>2]+ +g[eb>>2];g[Zc>>2]=+g[ic>>2]-+g[jc>>2];g[Uc>>2]=+g[eb>>2]-+g[Ta>>2];g[kc>>2]=+g[ic>>2]+ +g[jc>>2];g[M>>2]=+g[ma>>2]+ +g[L>>2];g[nc>>2]=+g[db>>2]-+g[Wa>>2];g[Ma>>2]=+g[ba>>2]*.9807852506637573+ +g[La>>2]*.19509032368659973;g[Lb>>2]=+g[Bb>>2]*.9807852506637573-+g[Kb>>2]*.19509032368659973;g[Oa>>2]=+g[Ma>>2]+ +g[Lb>>2];g[mc>>2]=+g[Lb>>2]-+g[Ma>>2];g[Pa>>2]=+g[M>>2]+ +g[Oa>>2];g[$c>>2]=+g[nc>>2]-+g[mc>>2];g[Sc>>2]=+g[M>>2]-+g[Oa>>2];g[oc>>2]=+g[mc>>2]+ +g[nc>>2];g[x>>2]=+g[c[q>>2]>>2];g[Qa>>2]=+g[(c[q>>2]|0)+4>>2];g[gb>>2]=+g[x>>2]*+g[Pa>>2]+ +g[Qa>>2]*+g[fb>>2];g[Lc>>2]=+g[x>>2]*+g[fb>>2]-+g[Qa>>2]*+g[Pa>>2];g[c[m>>2]>>2]=+g[w>>2]-+g[gb>>2];g[c[n>>2]>>2]=+g[Kc>>2]+ +g[Lc>>2];g[c[o>>2]>>2]=+g[w>>2]+ +g[gb>>2];g[c[p>>2]>>2]=+g[Lc>>2]-+g[Kc>>2];g[sc>>2]=+g[(c[q>>2]|0)+184>>2];g[uc>>2]=+g[(c[q>>2]|0)+188>>2];g[Xc>>2]=+g[sc>>2]*+g[tc>>2]-+g[uc>>2]*+g[Wc>>2];g[bd>>2]=+g[uc>>2]*+g[tc>>2]+ +g[sc>>2]*+g[Wc>>2];g[Yc>>2]=+g[(c[q>>2]|0)+192>>2];g[_c>>2]=+g[(c[q>>2]|0)+196>>2];g[ad>>2]=+g[Yc>>2]*+g[Zc>>2]+ +g[_c>>2]*+g[$c>>2];g[cd>>2]=+g[Yc>>2]*+g[$c>>2]-+g[_c>>2]*+g[Zc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Xc>>2]-+g[ad>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[bd>>2]+ +g[cd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Xc>>2]+ +g[ad>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[cd>>2]-+g[bd>>2];g[Mc>>2]=+g[(c[q>>2]|0)+120>>2];g[Oc>>2]=+g[(c[q>>2]|0)+124>>2];g[Qc>>2]=+g[Mc>>2]*+g[Nc>>2]-+g[Oc>>2]*+g[Pc>>2];g[Yb>>2]=+g[Oc>>2]*+g[Nc>>2]+ +g[Mc>>2]*+g[Pc>>2];g[Rc>>2]=+g[(c[q>>2]|0)+128>>2];g[Tc>>2]=+g[(c[q>>2]|0)+132>>2];g[Xb>>2]=+g[Rc>>2]*+g[Sc>>2]+ +g[Tc>>2]*+g[Uc>>2];g[Zb>>2]=+g[Rc>>2]*+g[Uc>>2]-+g[Tc>>2]*+g[Sc>>2];g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Qc>>2]-+g[Xb>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Yb>>2]+ +g[Zb>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Qc>>2]+ +g[Xb>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Zb>>2]-+g[Yb>>2];g[_b>>2]=+g[(c[q>>2]|0)+56>>2];g[cc>>2]=+g[(c[q>>2]|0)+60>>2];g[gc>>2]=+g[_b>>2]*+g[bc>>2]-+g[cc>>2]*+g[fc>>2];g[qc>>2]=+g[cc>>2]*+g[bc>>2]+ +g[_b>>2]*+g[fc>>2];g[hc>>2]=+g[(c[q>>2]|0)+64>>2];g[lc>>2]=+g[(c[q>>2]|0)+68>>2];g[pc>>2]=+g[hc>>2]*+g[kc>>2]+ +g[lc>>2]*+g[oc>>2];g[rc>>2]=+g[hc>>2]*+g[oc>>2]-+g[lc>>2]*+g[kc>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[gc>>2]-+g[pc>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[qc>>2]+ +g[rc>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[gc>>2]+ +g[pc>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[rc>>2]-+g[qc>>2];g[Mg>>2]=+g[Kg>>2]+ +g[Lg>>2];g[Tg>>2]=+g[Pg>>2]+ +g[Sg>>2];g[Ug>>2]=+g[Mg>>2]+ +g[Tg>>2];g[zi>>2]=+g[Mg>>2]-+g[Tg>>2];g[Yg>>2]=+g[Wg>>2]+ +g[Xg>>2];g[$g>>2]=+g[Zg>>2]+ +g[_g>>2];g[ah>>2]=+g[Yg>>2]+ +g[$g>>2];g[Bi>>2]=+g[$g>>2]-+g[Yg>>2];g[Rh>>2]=+g[Pg>>2]-+g[Sg>>2];g[Sh>>2]=+g[_g>>2]-+g[Zg>>2];g[Th>>2]=+g[Rh>>2]+ +g[Sh>>2];g[hi>>2]=+g[Sh>>2]-+g[Rh>>2];g[Li>>2]=+g[Kg>>2]-+g[Lg>>2];g[Mi>>2]=+g[Xg>>2]-+g[Wg>>2];g[Ph>>2]=+g[Li>>2]+ +g[Mi>>2];g[fi>>2]=+g[Li>>2]-+g[Mi>>2];g[ti>>2]=+g[ri>>2]-+g[si>>2];g[Wh>>2]=+g[dh>>2]-+g[Fh>>2];g[oi>>2]=+g[Ih>>2]*.19509032368659973-+g[Hh>>2]*.9807852506637573;g[pi>>2]=+g[Lh>>2]*.19509032368659973-+g[Kh>>2]*.9807852506637573;g[qi>>2]=+g[oi>>2]+ +g[pi>>2];g[Xh>>2]=+g[oi>>2]-+g[pi>>2];g[ui>>2]=+g[qi>>2]+ +g[ti>>2];g[ki>>2]=+g[Wh>>2]-+g[Xh>>2];g[Gi>>2]=+g[ti>>2]-+g[qi>>2];g[Yh>>2]=+g[Wh>>2]+ +g[Xh>>2];g[Gh>>2]=+g[dh>>2]+ +g[Fh>>2];g[_h>>2]=+g[si>>2]+ +g[ri>>2];g[Jh>>2]=+g[Hh>>2]*.19509032368659973+ +g[Ih>>2]*.9807852506637573;g[Mh>>2]=+g[Kh>>2]*.19509032368659973+ +g[Lh>>2]*.9807852506637573;g[Nh>>2]=+g[Jh>>2]-+g[Mh>>2];g[$h>>2]=+g[Jh>>2]+ +g[Mh>>2];g[Oh>>2]=+g[Gh>>2]+ +g[Nh>>2];g[mi>>2]=+g[$h>>2]+ +g[_h>>2];g[Ei>>2]=+g[Gh>>2]-+g[Nh>>2];g[ai>>2]=+g[_h>>2]-+g[$h>>2];g[Jg>>2]=+g[(c[q>>2]|0)+40>>2];g[Vg>>2]=+g[(c[q>>2]|0)+44>>2];g[bh>>2]=+g[Jg>>2]*+g[Ug>>2]-+g[Vg>>2]*+g[ah>>2];g[wi>>2]=+g[Vg>>2]*+g[Ug>>2]+ +g[Jg>>2]*+g[ah>>2];g[ch>>2]=+g[(c[q>>2]|0)+48>>2];g[ni>>2]=+g[(c[q>>2]|0)+52>>2];g[vi>>2]=+g[ch>>2]*+g[Oh>>2]+ +g[ni>>2]*+g[ui>>2];g[xi>>2]=+g[ch>>2]*+g[ui>>2]-+g[ni>>2]*+g[Oh>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[bh>>2]-+g[vi>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[wi>>2]+ +g[xi>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[bh>>2]+ +g[vi>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[xi>>2]-+g[wi>>2];g[ei>>2]=+g[(c[q>>2]|0)+232>>2];g[gi>>2]=+g[(c[q>>2]|0)+236>>2];g[ii>>2]=+g[ei>>2]*+g[fi>>2]-+g[gi>>2]*+g[hi>>2];g[Pi>>2]=+g[gi>>2]*+g[fi>>2]+ +g[ei>>2]*+g[hi>>2];g[ji>>2]=+g[(c[q>>2]|0)+240>>2];g[li>>2]=+g[(c[q>>2]|0)+244>>2];g[Oi>>2]=+g[ji>>2]*+g[ki>>2]+ +g[li>>2]*+g[mi>>2];g[Qi>>2]=+g[ji>>2]*+g[mi>>2]-+g[li>>2]*+g[ki>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[ii>>2]-+g[Oi>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[Pi>>2]+ +g[Qi>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[ii>>2]+ +g[Oi>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[Qi>>2]-+g[Pi>>2];g[yi>>2]=+g[(c[q>>2]|0)+168>>2];g[Ai>>2]=+g[(c[q>>2]|0)+172>>2];g[Ci>>2]=+g[yi>>2]*+g[zi>>2]-+g[Ai>>2]*+g[Bi>>2];g[Ii>>2]=+g[Ai>>2]*+g[zi>>2]+ +g[yi>>2]*+g[Bi>>2];g[Di>>2]=+g[(c[q>>2]|0)+176>>2];g[Fi>>2]=+g[(c[q>>2]|0)+180>>2];g[Hi>>2]=+g[Di>>2]*+g[Ei>>2]+ +g[Fi>>2]*+g[Gi>>2];g[Ji>>2]=+g[Di>>2]*+g[Gi>>2]-+g[Fi>>2]*+g[Ei>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Ci>>2]-+g[Hi>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Ii>>2]+ +g[Ji>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Ci>>2]+ +g[Hi>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Ji>>2]-+g[Ii>>2];g[Ki>>2]=+g[(c[q>>2]|0)+104>>2];g[Qh>>2]=+g[(c[q>>2]|0)+108>>2];g[Uh>>2]=+g[Ki>>2]*+g[Ph>>2]-+g[Qh>>2]*+g[Th>>2];g[ci>>2]=+g[Qh>>2]*+g[Ph>>2]+ +g[Ki>>2]*+g[Th>>2];g[Vh>>2]=+g[(c[q>>2]|0)+112>>2];g[Zh>>2]=+g[(c[q>>2]|0)+116>>2];g[bi>>2]=+g[Vh>>2]*+g[Yh>>2]+ +g[Zh>>2]*+g[ai>>2];g[di>>2]=+g[Vh>>2]*+g[ai>>2]-+g[Zh>>2]*+g[Yh>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Uh>>2]-+g[bi>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ci>>2]+ +g[di>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Uh>>2]+ +g[bi>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[di>>2]-+g[ci>>2];g[Gd>>2]=+g[Ed>>2]+ +g[Fd>>2];g[Nd>>2]=(+g[Jd>>2]+ +g[Md>>2])*.7071067690849304;g[Od>>2]=+g[Gd>>2]+ +g[Nd>>2];g[wd>>2]=+g[Gd>>2]-+g[Nd>>2];g[Sd>>2]=(+g[Qd>>2]+ +g[Rd>>2])*.7071067690849304;g[Vd>>2]=+g[Td>>2]+ +g[Ud>>2];g[Wd>>2]=+g[Sd>>2]+ +g[Vd>>2];g[yd>>2]=+g[Vd>>2]-+g[Sd>>2];g[le>>2]=(+g[Jd>>2]-+g[Md>>2])*.7071067690849304;g[me>>2]=+g[Ud>>2]-+g[Td>>2];g[Ne>>2]=+g[le>>2]+ +g[me>>2];g[bf>>2]=+g[me>>2]-+g[le>>2];g[he>>2]=+g[Ed>>2]-+g[Fd>>2];g[ie>>2]=(+g[Rd>>2]-+g[Qd>>2])*.7071067690849304;g[je>>2]=+g[he>>2]+ +g[ie>>2];g[$e>>2]=+g[he>>2]-+g[ie>>2];g[qd>>2]=+g[od>>2]+ +g[pd>>2];g[Qe>>2]=+g[Zd>>2]-+g[_d>>2];g[ld>>2]=+g[be>>2]*.5555702447891235-+g[ae>>2]*.8314695954322815;g[md>>2]=+g[fd>>2]*.8314695954322815+ +g[gd>>2]*.5555702447891235;g[nd>>2]=+g[ld>>2]+ +g[md>>2];g[Re>>2]=+g[ld>>2]-+g[md>>2];g[rd>>2]=+g[nd>>2]+ +g[qd>>2];g[ef>>2]=+g[Qe>>2]-+g[Re>>2];g[Dd>>2]=+g[qd>>2]-+g[nd>>2];g[Se>>2]=+g[Qe>>2]+ +g[Re>>2];g[$d>>2]=+g[Zd>>2]+ +g[_d>>2];g[Ve>>2]=+g[pd>>2]-+g[od>>2];g[ed>>2]=+g[ae>>2]*.5555702447891235+ +g[be>>2]*.8314695954322815;g[hd>>2]=+g[fd>>2]*.5555702447891235-+g[gd>>2]*.8314695954322815;g[id>>2]=+g[ed>>2]+ +g[hd>>2];g[Ue>>2]=+g[hd>>2]-+g[ed>>2];g[jd>>2]=+g[$d>>2]+ +g[id>>2];g[gf>>2]=+g[Ve>>2]-+g[Ue>>2];g[Bd>>2]=+g[$d>>2]-+g[id>>2];g[We>>2]=+g[Ue>>2]+ +g[Ve>>2];g[dd>>2]=+g[(c[q>>2]|0)+24>>2];g[Pd>>2]=+g[(c[q>>2]|0)+28>>2];g[Xd>>2]=+g[dd>>2]*+g[Od>>2]-+g[Pd>>2]*+g[Wd>>2];g[td>>2]=+g[Pd>>2]*+g[Od>>2]+ +g[dd>>2]*+g[Wd>>2];g[Yd>>2]=+g[(c[q>>2]|0)+32>>2];g[kd>>2]=+g[(c[q>>2]|0)+36>>2];g[sd>>2]=+g[Yd>>2]*+g[jd>>2]+ +g[kd>>2]*+g[rd>>2];g[ud>>2]=+g[Yd>>2]*+g[rd>>2]-+g[kd>>2]*+g[jd>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Xd>>2]-+g[sd>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[td>>2]+ +g[ud>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Xd>>2]+ +g[sd>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ud>>2]-+g[td>>2];g[_e>>2]=+g[(c[q>>2]|0)+216>>2];g[af>>2]=+g[(c[q>>2]|0)+220>>2];g[cf>>2]=+g[_e>>2]*+g[$e>>2]-+g[af>>2]*+g[bf>>2];g[jf>>2]=+g[af>>2]*+g[$e>>2]+ +g[_e>>2]*+g[bf>>2];g[df>>2]=+g[(c[q>>2]|0)+224>>2];g[ff>>2]=+g[(c[q>>2]|0)+228>>2];g[hf>>2]=+g[df>>2]*+g[ef>>2]+ +g[ff>>2]*+g[gf>>2];g[kf>>2]=+g[df>>2]*+g[gf>>2]-+g[ff>>2]*+g[ef>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[cf>>2]-+g[hf>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[jf>>2]+ +g[kf>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[cf>>2]+ +g[hf>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[kf>>2]-+g[jf>>2];g[vd>>2]=+g[(c[q>>2]|0)+152>>2];g[xd>>2]=+g[(c[q>>2]|0)+156>>2];g[zd>>2]=+g[vd>>2]*+g[wd>>2]-+g[xd>>2]*+g[yd>>2];g[ee>>2]=+g[xd>>2]*+g[wd>>2]+ +g[vd>>2]*+g[yd>>2];g[Ad>>2]=+g[(c[q>>2]|0)+160>>2];g[Cd>>2]=+g[(c[q>>2]|0)+164>>2];g[de>>2]=+g[Ad>>2]*+g[Bd>>2]+ +g[Cd>>2]*+g[Dd>>2];g[fe>>2]=+g[Ad>>2]*+g[Dd>>2]-+g[Cd>>2]*+g[Bd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[zd>>2]-+g[de>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[ee>>2]+ +g[fe>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[zd>>2]+ +g[de>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[fe>>2]-+g[ee>>2];g[ge>>2]=+g[(c[q>>2]|0)+88>>2];g[ke>>2]=+g[(c[q>>2]|0)+92>>2];g[Oe>>2]=+g[ge>>2]*+g[je>>2]-+g[ke>>2]*+g[Ne>>2];g[Ye>>2]=+g[ke>>2]*+g[je>>2]+ +g[ge>>2]*+g[Ne>>2];g[Pe>>2]=+g[(c[q>>2]|0)+96>>2];g[Te>>2]=+g[(c[q>>2]|0)+100>>2];g[Xe>>2]=+g[Pe>>2]*+g[Se>>2]+ +g[Te>>2]*+g[We>>2];g[Ze>>2]=+g[Pe>>2]*+g[We>>2]-+g[Te>>2]*+g[Se>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Oe>>2]-+g[Xe>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Ye>>2]+ +g[Ze>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Oe>>2]+ +g[Xe>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Ze>>2]-+g[Ye>>2];g[xe>>2]=+g[pe>>2]+ +g[we>>2];g[Me>>2]=+g[Ee>>2]+ +g[Le>>2];g[nf>>2]=+g[xe>>2]+ +g[Me>>2];g[Tf>>2]=+g[xe>>2]-+g[Me>>2];g[rf>>2]=+g[pf>>2]+ +g[qf>>2];g[Yf>>2]=+g[uf>>2]+ +g[Xf>>2];g[Zf>>2]=+g[rf>>2]+ +g[Yf>>2];g[Vf>>2]=+g[Yf>>2]-+g[rf>>2];g[gh>>2]=+g[Ee>>2]-+g[Le>>2];g[hh>>2]=+g[Xf>>2]-+g[uf>>2];g[ih>>2]=+g[gh>>2]+ +g[hh>>2];g[yh>>2]=+g[hh>>2]-+g[gh>>2];g[Eg>>2]=+g[pe>>2]-+g[we>>2];g[Fg>>2]=+g[qf>>2]-+g[pf>>2];g[eh>>2]=+g[Eg>>2]+ +g[Fg>>2];g[wh>>2]=+g[Eg>>2]-+g[Fg>>2];g[Nf>>2]=+g[Jf>>2]+ +g[Mf>>2];g[lh>>2]=+g[cg>>2]-+g[jg>>2];g[Ef>>2]=+g[qg>>2]*.8314695954322815-+g[ng>>2]*.5555702447891235;g[Ff>>2]=+g[ug>>2]*.5555702447891235+ +g[zf>>2]*.8314695954322815;g[Gf>>2]=+g[Ef>>2]+ +g[Ff>>2];g[mh>>2]=+g[Ef>>2]-+g[Ff>>2];g[Of>>2]=+g[Gf>>2]+ +g[Nf>>2];g[Bh>>2]=+g[lh>>2]-+g[mh>>2];g[zg>>2]=+g[Nf>>2]-+g[Gf>>2];g[nh>>2]=+g[lh>>2]+ +g[mh>>2];g[kg>>2]=+g[cg>>2]+ +g[jg>>2];g[qh>>2]=+g[Mf>>2]-+g[Jf>>2];g[rg>>2]=+g[ng>>2]*.8314695954322815+ +g[qg>>2]*.5555702447891235;g[Af>>2]=+g[ug>>2]*.8314695954322815-+g[zf>>2]*.5555702447891235;g[Bf>>2]=+g[rg>>2]+ +g[Af>>2];g[ph>>2]=+g[Af>>2]-+g[rg>>2];g[Cf>>2]=+g[kg>>2]+ +g[Bf>>2];g[Dh>>2]=+g[qh>>2]-+g[ph>>2];g[xg>>2]=+g[kg>>2]-+g[Bf>>2];g[rh>>2]=+g[ph>>2]+ +g[qh>>2];g[lf>>2]=+g[(c[q>>2]|0)+8>>2];g[of>>2]=+g[(c[q>>2]|0)+12>>2];g[_f>>2]=+g[lf>>2]*+g[nf>>2]-+g[of>>2]*+g[Zf>>2];g[Qf>>2]=+g[of>>2]*+g[nf>>2]+ +g[lf>>2]*+g[Zf>>2];g[$f>>2]=+g[(c[q>>2]|0)+16>>2];g[Df>>2]=+g[(c[q>>2]|0)+20>>2];g[Pf>>2]=+g[$f>>2]*+g[Cf>>2]+ +g[Df>>2]*+g[Of>>2];g[Rf>>2]=+g[$f>>2]*+g[Of>>2]-+g[Df>>2]*+g[Cf>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[_f>>2]-+g[Pf>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Qf>>2]+ +g[Rf>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[_f>>2]+ +g[Pf>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Rf>>2]-+g[Qf>>2];g[vh>>2]=+g[(c[q>>2]|0)+200>>2];g[xh>>2]=+g[(c[q>>2]|0)+204>>2];g[zh>>2]=+g[vh>>2]*+g[wh>>2]-+g[xh>>2]*+g[yh>>2];g[Hg>>2]=+g[xh>>2]*+g[wh>>2]+ +g[vh>>2]*+g[yh>>2];g[Ah>>2]=+g[(c[q>>2]|0)+208>>2];g[Ch>>2]=+g[(c[q>>2]|0)+212>>2];g[Gg>>2]=+g[Ah>>2]*+g[Bh>>2]+ +g[Ch>>2]*+g[Dh>>2];g[Ig>>2]=+g[Ah>>2]*+g[Dh>>2]-+g[Ch>>2]*+g[Bh>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[zh>>2]-+g[Gg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Hg>>2]+ +g[Ig>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[zh>>2]+ +g[Gg>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Ig>>2]-+g[Hg>>2];g[Sf>>2]=+g[(c[q>>2]|0)+136>>2];g[Uf>>2]=+g[(c[q>>2]|0)+140>>2];g[Wf>>2]=+g[Sf>>2]*+g[Tf>>2]-+g[Uf>>2]*+g[Vf>>2];g[Bg>>2]=+g[Uf>>2]*+g[Tf>>2]+ +g[Sf>>2]*+g[Vf>>2];g[wg>>2]=+g[(c[q>>2]|0)+144>>2];g[yg>>2]=+g[(c[q>>2]|0)+148>>2];g[Ag>>2]=+g[wg>>2]*+g[xg>>2]+ +g[yg>>2]*+g[zg>>2];g[Cg>>2]=+g[wg>>2]*+g[zg>>2]-+g[yg>>2]*+g[xg>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Wf>>2]-+g[Ag>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Bg>>2]+ +g[Cg>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Wf>>2]+ +g[Ag>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Cg>>2]-+g[Bg>>2];g[Dg>>2]=+g[(c[q>>2]|0)+72>>2];g[fh>>2]=+g[(c[q>>2]|0)+76>>2];g[jh>>2]=+g[Dg>>2]*+g[eh>>2]-+g[fh>>2]*+g[ih>>2];g[th>>2]=+g[fh>>2]*+g[eh>>2]+ +g[Dg>>2]*+g[ih>>2];g[kh>>2]=+g[(c[q>>2]|0)+80>>2];g[oh>>2]=+g[(c[q>>2]|0)+84>>2];g[sh>>2]=+g[kh>>2]*+g[nh>>2]+ +g[oh>>2]*+g[rh>>2];g[uh>>2]=+g[kh>>2]*+g[rh>>2]-+g[oh>>2]*+g[nh>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[jh>>2]-+g[sh>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[th>>2]+ +g[uh>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[jh>>2]+ +g[sh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[uh>>2]-+g[th>>2];c[Pj>>2]=(c[Pj>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+248;c[r>>2]=c[r>>2]^c[2998]}i=Qj;return}function Xu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,69,9304,1);i=b;return}function Yu(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0;da=i;i=i+192|0;m=da+180|0;n=da+176|0;o=da+172|0;p=da+168|0;q=da+164|0;r=da+160|0;ea=da+156|0;s=da+152|0;t=da+148|0;ca=da+144|0;w=da+140|0;P=da+136|0;z=da+132|0;Q=da+128|0;H=da+124|0;N=da+120|0;$=da+116|0;Z=da+112|0;W=da+108|0;U=da+104|0;J=da+100|0;G=da+96|0;C=da+92|0;M=da+88|0;u=da+84|0;v=da+80|0;E=da+76|0;F=da+72|0;x=da+68|0;y=da+64|0;K=da+60|0;L=da+56|0;A=da+52|0;R=da+48|0;O=da+44|0;S=da+40|0;B=da+36|0;I=da+32|0;X=da+28|0;ba=da+24|0;aa=da+20|0;D=da+16|0;T=da+12|0;V=da+8|0;Y=da+4|0;_=da;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[ea>>2]=j;c[s>>2]=k;c[t>>2]=l;c[ca>>2]=c[ea>>2];c[q>>2]=(c[q>>2]|0)+(((c[ea>>2]|0)-1|0)*6<<2);while(1){if((c[ca>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[v>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[w>>2]=+g[u>>2]+ +g[v>>2];g[J>>2]=+g[u>>2]-+g[v>>2];g[E>>2]=+g[c[n>>2]>>2];g[F>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[G>>2]=+g[E>>2]+ +g[F>>2];g[P>>2]=+g[E>>2]-+g[F>>2];g[x>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[y>>2]=+g[c[o>>2]>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[C>>2]=+g[x>>2]-+g[y>>2];g[K>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[L>>2]=+g[c[p>>2]>>2];g[M>>2]=+g[K>>2]+ +g[L>>2];g[Q>>2]=+g[K>>2]-+g[L>>2];g[H>>2]=+g[C>>2]+ +g[G>>2];g[N>>2]=+g[J>>2]-+g[M>>2];g[$>>2]=+g[J>>2]+ +g[M>>2];g[Z>>2]=+g[G>>2]-+g[C>>2];g[W>>2]=+g[P>>2]-+g[Q>>2];g[U>>2]=+g[w>>2]-+g[z>>2];g[A>>2]=+g[w>>2]+ +g[z>>2];g[R>>2]=+g[P>>2]+ +g[Q>>2];g[B>>2]=+g[c[q>>2]>>2];g[I>>2]=+g[(c[q>>2]|0)+4>>2];g[O>>2]=+g[B>>2]*+g[H>>2]+ +g[I>>2]*+g[N>>2];g[S>>2]=+g[B>>2]*+g[N>>2]-+g[I>>2]*+g[H>>2];g[c[m>>2]>>2]=+g[A>>2]-+g[O>>2];g[c[n>>2]>>2]=+g[R>>2]+ +g[S>>2];g[c[o>>2]>>2]=+g[A>>2]+ +g[O>>2];g[c[p>>2]>>2]=+g[S>>2]-+g[R>>2];g[T>>2]=+g[(c[q>>2]|0)+8>>2];g[V>>2]=+g[(c[q>>2]|0)+12>>2];g[X>>2]=+g[T>>2]*+g[U>>2]-+g[V>>2]*+g[W>>2];g[ba>>2]=+g[V>>2]*+g[U>>2]+ +g[T>>2]*+g[W>>2];g[Y>>2]=+g[(c[q>>2]|0)+16>>2];g[_>>2]=+g[(c[q>>2]|0)+20>>2];g[aa>>2]=+g[Y>>2]*+g[Z>>2]+ +g[_>>2]*+g[$>>2];g[D>>2]=+g[Y>>2]*+g[$>>2]-+g[_>>2]*+g[Z>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[X>>2]-+g[aa>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[ba>>2]+ +g[D>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[X>>2]+ +g[aa>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[D>>2]-+g[ba>>2];c[ca>>2]=(c[ca>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+24}i=da;return}function Zu(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,70,9352,1);i=b;return}function _u(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0;jb=i;i=i+432|0;m=jb+424|0;n=jb+420|0;o=jb+416|0;p=jb+412|0;q=jb+408|0;r=jb+404|0;kb=jb+400|0;s=jb+396|0;t=jb+392|0;ib=jb+384|0;ha=jb+380|0;I=jb+376|0;M=jb+372|0;Va=jb+368|0;qa=jb+364|0;z=jb+360|0;E=jb+356|0;ya=jb+352|0;Oa=jb+348|0;va=jb+344|0;D=jb+340|0;A=jb+336|0;la=jb+332|0;J=jb+328|0;eb=jb+324|0;N=jb+320|0;da=jb+316|0;ma=jb+312|0;Ua=jb+308|0;wa=jb+304|0;ga=jb+300|0;Ra=jb+296|0;pa=jb+292|0;xa=jb+288|0;u=jb+284|0;ca=jb+280|0;Sa=jb+276|0;Ta=jb+272|0;ea=jb+268|0;fa=jb+264|0;na=jb+260|0;oa=jb+256|0;Ka=jb+252|0;Wa=jb+248|0;Za=jb+244|0;ta=jb+240|0;Na=jb+236|0;$a=jb+232|0;cb=jb+228|0;ua=jb+224|0;ia=jb+220|0;ja=jb+216|0;Xa=jb+212|0;Ya=jb+208|0;La=jb+204|0;Ma=jb+200|0;ab=jb+196|0;bb=jb+192|0;hb=jb+188|0;ka=jb+184|0;_a=jb+180|0;db=jb+176|0;Pa=jb+172|0;za=jb+168|0;sa=jb+164|0;Aa=jb+160|0;fb=jb+156|0;ra=jb+152|0;Qa=jb+148|0;gb=jb+144|0;W=jb+140|0;aa=jb+136|0;$=jb+132|0;ba=jb+128|0;T=jb+124|0;V=jb+120|0;S=jb+116|0;U=jb+112|0;Y=jb+108|0;_=jb+104|0;X=jb+100|0;Z=jb+96|0;Fa=jb+92|0;w=jb+88|0;v=jb+84|0;x=jb+80|0;Ca=jb+76|0;Ea=jb+72|0;Ba=jb+68|0;Da=jb+64|0;Ha=jb+60|0;Ja=jb+56|0;Ga=jb+52|0;Ia=jb+48|0;G=jb+44|0;Q=jb+40|0;P=jb+36|0;R=jb+32|0;B=jb+28|0;F=jb+24|0;y=jb+20|0;C=jb+16|0;K=jb+12|0;O=jb+8|0;H=jb+4|0;L=jb;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[kb>>2]=j;c[s>>2]=k;c[t>>2]=l;g[jb+388>>2]=.7071067690849304;c[ib>>2]=c[kb>>2];c[q>>2]=(c[q>>2]|0)+(((c[kb>>2]|0)-1|0)*14<<2);while(1){if((c[ib>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[ca>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[da>>2]=+g[u>>2]+ +g[ca>>2];g[ma>>2]=+g[u>>2]-+g[ca>>2];g[Sa>>2]=+g[c[n>>2]>>2];g[Ta>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ua>>2]=+g[Sa>>2]+ +g[Ta>>2];g[wa>>2]=+g[Sa>>2]-+g[Ta>>2];g[ea>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[fa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[ga>>2]=+g[ea>>2]+ +g[fa>>2];g[Ra>>2]=+g[ea>>2]-+g[fa>>2];g[na>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[oa>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[pa>>2]=+g[na>>2]+ +g[oa>>2];g[xa>>2]=+g[na>>2]-+g[oa>>2];g[ha>>2]=+g[da>>2]+ +g[ga>>2];g[I>>2]=+g[Ua>>2]-+g[Ra>>2];g[M>>2]=+g[ma>>2]+ +g[pa>>2];g[Va>>2]=+g[Ra>>2]+ +g[Ua>>2];g[qa>>2]=+g[ma>>2]-+g[pa>>2];g[z>>2]=+g[da>>2]-+g[ga>>2];g[E>>2]=+g[wa>>2]-+g[xa>>2];g[ya>>2]=+g[wa>>2]+ +g[xa>>2];g[ia>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[ja>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ka>>2]=+g[ia>>2]+ +g[ja>>2];g[Wa>>2]=+g[ia>>2]-+g[ja>>2];g[Xa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Ya>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Za>>2]=+g[Xa>>2]+ +g[Ya>>2];g[ta>>2]=+g[Xa>>2]-+g[Ya>>2];g[La>>2]=+g[c[o>>2]>>2];g[Ma>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Na>>2]=+g[La>>2]+ +g[Ma>>2];g[$a>>2]=+g[La>>2]-+g[Ma>>2];g[ab>>2]=+g[c[p>>2]>>2];g[bb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[cb>>2]=+g[ab>>2]+ +g[bb>>2];g[ua>>2]=+g[bb>>2]-+g[ab>>2];g[Oa>>2]=+g[Ka>>2]+ +g[Na>>2];g[va>>2]=+g[ta>>2]+ +g[ua>>2];g[D>>2]=+g[Ka>>2]-+g[Na>>2];g[A>>2]=+g[ua>>2]-+g[ta>>2];g[hb>>2]=+g[Wa>>2]-+g[Za>>2];g[ka>>2]=+g[$a>>2]-+g[cb>>2];g[la>>2]=(+g[hb>>2]+ +g[ka>>2])*.7071067690849304;g[J>>2]=(+g[hb>>2]-+g[ka>>2])*.7071067690849304;g[_a>>2]=+g[Wa>>2]+ +g[Za>>2];g[db>>2]=+g[$a>>2]+ +g[cb>>2];g[eb>>2]=(+g[_a>>2]-+g[db>>2])*.7071067690849304;g[N>>2]=(+g[_a>>2]+ +g[db>>2])*.7071067690849304;g[Pa>>2]=+g[ha>>2]+ +g[Oa>>2];g[za>>2]=+g[va>>2]+ +g[ya>>2];g[fb>>2]=+g[Va>>2]+ +g[eb>>2];g[ra>>2]=+g[la>>2]+ +g[qa>>2];g[Qa>>2]=+g[c[q>>2]>>2];g[gb>>2]=+g[(c[q>>2]|0)+4>>2];g[sa>>2]=+g[Qa>>2]*+g[fb>>2]+ +g[gb>>2]*+g[ra>>2];g[Aa>>2]=+g[Qa>>2]*+g[ra>>2]-+g[gb>>2]*+g[fb>>2];g[c[m>>2]>>2]=+g[Pa>>2]-+g[sa>>2];g[c[n>>2]>>2]=+g[za>>2]+ +g[Aa>>2];g[c[o>>2]>>2]=+g[Pa>>2]+ +g[sa>>2];g[c[p>>2]>>2]=+g[Aa>>2]-+g[za>>2];g[T>>2]=+g[z>>2]-+g[A>>2];g[V>>2]=+g[E>>2]-+g[D>>2];g[S>>2]=+g[(c[q>>2]|0)+40>>2];g[U>>2]=+g[(c[q>>2]|0)+44>>2];g[W>>2]=+g[S>>2]*+g[T>>2]-+g[U>>2]*+g[V>>2];g[aa>>2]=+g[U>>2]*+g[T>>2]+ +g[S>>2]*+g[V>>2];g[Y>>2]=+g[I>>2]-+g[J>>2];g[_>>2]=+g[N>>2]+ +g[M>>2];g[X>>2]=+g[(c[q>>2]|0)+48>>2];g[Z>>2]=+g[(c[q>>2]|0)+52>>2];g[$>>2]=+g[X>>2]*+g[Y>>2]+ +g[Z>>2]*+g[_>>2];g[ba>>2]=+g[X>>2]*+g[_>>2]-+g[Z>>2]*+g[Y>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[W>>2]-+g[$>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[aa>>2]+ +g[ba>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[W>>2]+ +g[$>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ba>>2]-+g[aa>>2];g[Ca>>2]=+g[ha>>2]-+g[Oa>>2];g[Ea>>2]=+g[ya>>2]-+g[va>>2];g[Ba>>2]=+g[(c[q>>2]|0)+24>>2];g[Da>>2]=+g[(c[q>>2]|0)+28>>2];g[Fa>>2]=+g[Ba>>2]*+g[Ca>>2]-+g[Da>>2]*+g[Ea>>2];g[w>>2]=+g[Da>>2]*+g[Ca>>2]+ +g[Ba>>2]*+g[Ea>>2];g[Ha>>2]=+g[Va>>2]-+g[eb>>2];g[Ja>>2]=+g[qa>>2]-+g[la>>2];g[Ga>>2]=+g[(c[q>>2]|0)+32>>2];g[Ia>>2]=+g[(c[q>>2]|0)+36>>2];g[v>>2]=+g[Ga>>2]*+g[Ha>>2]+ +g[Ia>>2]*+g[Ja>>2];g[x>>2]=+g[Ga>>2]*+g[Ja>>2]-+g[Ia>>2]*+g[Ha>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Fa>>2]-+g[v>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[w>>2]+ +g[x>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Fa>>2]+ +g[v>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[x>>2]-+g[w>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[F>>2]=+g[D>>2]+ +g[E>>2];g[y>>2]=+g[(c[q>>2]|0)+8>>2];g[C>>2]=+g[(c[q>>2]|0)+12>>2];g[G>>2]=+g[y>>2]*+g[B>>2]-+g[C>>2]*+g[F>>2];g[Q>>2]=+g[C>>2]*+g[B>>2]+ +g[y>>2]*+g[F>>2];g[K>>2]=+g[I>>2]+ +g[J>>2];g[O>>2]=+g[M>>2]-+g[N>>2];g[H>>2]=+g[(c[q>>2]|0)+16>>2];g[L>>2]=+g[(c[q>>2]|0)+20>>2];g[P>>2]=+g[H>>2]*+g[K>>2]+ +g[L>>2]*+g[O>>2];g[R>>2]=+g[H>>2]*+g[O>>2]-+g[L>>2]*+g[K>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[G>>2]-+g[P>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Q>>2]+ +g[R>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[G>>2]+ +g[P>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[R>>2]-+g[Q>>2];c[ib>>2]=(c[ib>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+56;c[r>>2]=c[r>>2]^c[2998]}i=jb;return}function $u(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,71,9400,1);i=b;return}function av(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0;$b=i;i=i+624|0;m=$b+612|0;n=$b+608|0;o=$b+604|0;p=$b+600|0;q=$b+596|0;r=$b+592|0;ac=$b+588|0;s=$b+584|0;t=$b+580|0;_b=$b+560|0;Va=$b+556|0;sb=$b+552|0;rb=$b+548|0;y=$b+544|0;Ib=$b+540|0;x=$b+536|0;tb=$b+532|0;ub=$b+528|0;ja=$b+524|0;Y=$b+520|0;Rb=$b+516|0;va=$b+512|0;jb=$b+508|0;ka=$b+504|0;gb=$b+500|0;pa=$b+496|0;kb=$b+492|0;lb=$b+488|0;na=$b+484|0;oa=$b+480|0;ea=$b+476|0;U=$b+472|0;yb=$b+468|0;za=$b+464|0;Db=$b+460|0;Ob=$b+456|0;Gb=$b+452|0;Pb=$b+448|0;Hb=$b+444|0;qb=$b+440|0;Ya=$b+436|0;Lb=$b+432|0;$a=$b+428|0;Mb=$b+424|0;Ab=$b+420|0;pb=$b+416|0;u=$b+412|0;Da=$b+408|0;Bb=$b+404|0;Cb=$b+400|0;Eb=$b+396|0;Fb=$b+392|0;Wa=$b+388|0;Xa=$b+384|0;Za=$b+380|0;_a=$b+376|0;ha=$b+372|0;ia=$b+368|0;Nb=$b+364|0;Qb=$b+360|0;bb=$b+356|0;D=$b+352|0;eb=$b+348|0;E=$b+344|0;fb=$b+340|0;ma=$b+336|0;Ub=$b+332|0;A=$b+328|0;Xb=$b+324|0;B=$b+320|0;Yb=$b+316|0;la=$b+312|0;hb=$b+308|0;ib=$b+304|0;Zb=$b+300|0;ab=$b+296|0;cb=$b+292|0;db=$b+288|0;Sb=$b+284|0;Tb=$b+280|0;Vb=$b+276|0;Wb=$b+272|0;C=$b+268|0;da=$b+264|0;wb=$b+260|0;xb=$b+256|0;Jb=$b+252|0;Ua=$b+248|0;nb=$b+244|0;M=$b+240|0;Z=$b+236|0;Ka=$b+232|0;zb=$b+228|0;O=$b+224|0;V=$b+220|0;Ia=$b+216|0;fa=$b+212|0;H=$b+208|0;wa=$b+204|0;Na=$b+200|0;ra=$b+196|0;J=$b+192|0;Aa=$b+188|0;Pa=$b+184|0;Ea=$b+180|0;Fa=$b+176|0;mb=$b+172|0;X=$b+168|0;vb=$b+164|0;T=$b+160|0;z=$b+156|0;ua=$b+152|0;qa=$b+148|0;ya=$b+144|0;aa=$b+140|0;ca=$b+136|0;$=$b+132|0;ba=$b+128|0;v=$b+124|0;Ta=$b+120|0;Kb=$b+116|0;ob=$b+112|0;_=$b+108|0;Ga=$b+104|0;S=$b+100|0;W=$b+96|0;sa=$b+92|0;F=$b+88|0;Ba=$b+84|0;Ca=$b+80|0;w=$b+76|0;ga=$b+72|0;ta=$b+68|0;xa=$b+64|0;K=$b+60|0;R=$b+56|0;P=$b+52|0;Q=$b+48|0;G=$b+44|0;I=$b+40|0;L=$b+36|0;N=$b+32|0;La=$b+28|0;Sa=$b+24|0;Qa=$b+20|0;Ra=$b+16|0;Ha=$b+12|0;Ja=$b+8|0;Ma=$b+4|0;Oa=$b;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[ac>>2]=j;c[s>>2]=k;c[t>>2]=l;g[$b+576>>2]=.9510565400123596;g[$b+572>>2]=.5877852439880371;g[$b+568>>2]=.25;g[$b+564>>2]=.55901700258255;c[_b>>2]=c[ac>>2];c[q>>2]=(c[q>>2]|0)+(((c[ac>>2]|0)-1|0)*18<<2);while(1){if((c[_b>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Va>>2]=+g[u>>2]+ +g[Da>>2];g[sb>>2]=+g[u>>2]-+g[Da>>2];g[Bb>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Cb>>2]=+g[c[o>>2]>>2];g[Db>>2]=+g[Bb>>2]+ +g[Cb>>2];g[Ob>>2]=+g[Bb>>2]-+g[Cb>>2];g[Eb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Fb>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Gb>>2]=+g[Eb>>2]+ +g[Fb>>2];g[Pb>>2]=+g[Eb>>2]-+g[Fb>>2];g[Hb>>2]=+g[Db>>2]+ +g[Gb>>2];g[qb>>2]=+g[Ob>>2]+ +g[Pb>>2];g[Wa>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Xa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ya>>2]=+g[Wa>>2]+ +g[Xa>>2];g[Lb>>2]=+g[Wa>>2]-+g[Xa>>2];g[Za>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[_a>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[$a>>2]=+g[Za>>2]+ +g[_a>>2];g[Mb>>2]=+g[Za>>2]-+g[_a>>2];g[Ab>>2]=+g[Ya>>2]+ +g[$a>>2];g[pb>>2]=+g[Lb>>2]+ +g[Mb>>2];g[rb>>2]=(+g[pb>>2]-+g[qb>>2])*.55901700258255;g[y>>2]=(+g[Ab>>2]-+g[Hb>>2])*.55901700258255;g[Ib>>2]=+g[Ab>>2]+ +g[Hb>>2];g[x>>2]=+g[Va>>2]-+g[Ib>>2]*.25;g[tb>>2]=+g[pb>>2]+ +g[qb>>2];g[ub>>2]=+g[sb>>2]-+g[tb>>2]*.25;g[ha>>2]=+g[Ya>>2]-+g[$a>>2];g[ia>>2]=+g[Db>>2]-+g[Gb>>2];g[ja>>2]=+g[ha>>2]*.5877852439880371-+g[ia>>2]*.9510565400123596;g[Y>>2]=+g[ha>>2]*.9510565400123596+ +g[ia>>2]*.5877852439880371;g[Nb>>2]=+g[Lb>>2]-+g[Mb>>2];g[Qb>>2]=+g[Ob>>2]-+g[Pb>>2];g[Rb>>2]=+g[Nb>>2]*.9510565400123596+ +g[Qb>>2]*.5877852439880371;g[va>>2]=+g[Nb>>2]*.5877852439880371-+g[Qb>>2]*.9510565400123596;g[hb>>2]=+g[c[n>>2]>>2];g[ib>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[jb>>2]=+g[hb>>2]+ +g[ib>>2];g[ka>>2]=+g[hb>>2]-+g[ib>>2];g[Zb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[ab>>2]=+g[c[p>>2]>>2];g[bb>>2]=+g[Zb>>2]+ +g[ab>>2];g[D>>2]=+g[Zb>>2]-+g[ab>>2];g[cb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[db>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[eb>>2]=+g[cb>>2]+ +g[db>>2];g[E>>2]=+g[db>>2]-+g[cb>>2];g[fb>>2]=+g[bb>>2]-+g[eb>>2];g[ma>>2]=+g[D>>2]+ +g[E>>2];g[Sb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Tb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ub>>2]=+g[Sb>>2]+ +g[Tb>>2];g[A>>2]=+g[Sb>>2]-+g[Tb>>2];g[Vb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Wb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Xb>>2]=+g[Vb>>2]+ +g[Wb>>2];g[B>>2]=+g[Wb>>2]-+g[Vb>>2];g[Yb>>2]=+g[Ub>>2]-+g[Xb>>2];g[la>>2]=+g[A>>2]+ +g[B>>2];g[gb>>2]=(+g[Yb>>2]-+g[fb>>2])*.55901700258255;g[pa>>2]=(+g[la>>2]-+g[ma>>2])*.55901700258255;g[kb>>2]=+g[Yb>>2]+ +g[fb>>2];g[lb>>2]=+g[jb>>2]-+g[kb>>2]*.25;g[na>>2]=+g[la>>2]+ +g[ma>>2];g[oa>>2]=+g[ka>>2]-+g[na>>2]*.25;g[C>>2]=+g[A>>2]-+g[B>>2];g[da>>2]=+g[D>>2]-+g[E>>2];g[ea>>2]=+g[C>>2]*.5877852439880371-+g[da>>2]*.9510565400123596;g[U>>2]=+g[C>>2]*.9510565400123596+ +g[da>>2]*.5877852439880371;g[wb>>2]=+g[Ub>>2]+ +g[Xb>>2];g[xb>>2]=+g[bb>>2]+ +g[eb>>2];g[yb>>2]=+g[wb>>2]*.9510565400123596+ +g[xb>>2]*.5877852439880371;g[za>>2]=+g[wb>>2]*.5877852439880371-+g[xb>>2]*.9510565400123596;g[Jb>>2]=+g[Va>>2]+ +g[Ib>>2];g[Ua>>2]=+g[ka>>2]+ +g[na>>2];g[mb>>2]=+g[gb>>2]+ +g[lb>>2];g[nb>>2]=+g[Rb>>2]+ +g[mb>>2];g[M>>2]=+g[mb>>2]-+g[Rb>>2];g[X>>2]=+g[pa>>2]+ +g[oa>>2];g[Z>>2]=+g[X>>2]-+g[Y>>2];g[Ka>>2]=+g[Y>>2]+ +g[X>>2];g[vb>>2]=+g[rb>>2]+ +g[ub>>2];g[zb>>2]=+g[vb>>2]-+g[yb>>2];g[O>>2]=+g[vb>>2]+ +g[yb>>2];g[T>>2]=+g[y>>2]+ +g[x>>2];g[V>>2]=+g[T>>2]+ +g[U>>2];g[Ia>>2]=+g[T>>2]-+g[U>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[fa>>2]=+g[z>>2]-+g[ea>>2];g[H>>2]=+g[z>>2]+ +g[ea>>2];g[ua>>2]=+g[lb>>2]-+g[gb>>2];g[wa>>2]=+g[ua>>2]-+g[va>>2];g[Na>>2]=+g[va>>2]+ +g[ua>>2];g[qa>>2]=+g[oa>>2]-+g[pa>>2];g[ra>>2]=+g[ja>>2]+ +g[qa>>2];g[J>>2]=+g[qa>>2]-+g[ja>>2];g[ya>>2]=+g[ub>>2]-+g[rb>>2];g[Aa>>2]=+g[ya>>2]+ +g[za>>2];g[Pa>>2]=+g[ya>>2]-+g[za>>2];g[aa>>2]=+g[sb>>2]+ +g[tb>>2];g[ca>>2]=+g[jb>>2]+ +g[kb>>2];g[$>>2]=+g[(c[q>>2]|0)+36>>2];g[ba>>2]=+g[(c[q>>2]|0)+32>>2];g[Ea>>2]=+g[$>>2]*+g[aa>>2]+ +g[ba>>2]*+g[ca>>2];g[Fa>>2]=+g[ba>>2]*+g[aa>>2]-+g[$>>2]*+g[ca>>2];g[Kb>>2]=+g[c[q>>2]>>2];g[ob>>2]=+g[(c[q>>2]|0)+4>>2];g[v>>2]=+g[Kb>>2]*+g[nb>>2]+ +g[ob>>2]*+g[zb>>2];g[Ta>>2]=+g[Kb>>2]*+g[zb>>2]-+g[ob>>2]*+g[nb>>2];g[c[m>>2]>>2]=+g[Jb>>2]-+g[v>>2];g[c[n>>2]>>2]=+g[Ta>>2]+ +g[Ua>>2];g[c[o>>2]>>2]=+g[Jb>>2]+ +g[v>>2];g[c[p>>2]>>2]=+g[Ta>>2]-+g[Ua>>2];g[S>>2]=+g[(c[q>>2]|0)+24>>2];g[W>>2]=+g[(c[q>>2]|0)+28>>2];g[_>>2]=+g[S>>2]*+g[V>>2]-+g[W>>2]*+g[Z>>2];g[Ga>>2]=+g[W>>2]*+g[V>>2]+ +g[S>>2]*+g[Z>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[_>>2]-+g[Ea>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Fa>>2]+ +g[Ga>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ea>>2]+ +g[_>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Fa>>2]-+g[Ga>>2];g[w>>2]=+g[(c[q>>2]|0)+8>>2];g[ga>>2]=+g[(c[q>>2]|0)+12>>2];g[sa>>2]=+g[w>>2]*+g[fa>>2]-+g[ga>>2]*+g[ra>>2];g[F>>2]=+g[ga>>2]*+g[fa>>2]+ +g[w>>2]*+g[ra>>2];g[ta>>2]=+g[(c[q>>2]|0)+16>>2];g[xa>>2]=+g[(c[q>>2]|0)+20>>2];g[Ba>>2]=+g[ta>>2]*+g[wa>>2]+ +g[xa>>2]*+g[Aa>>2];g[Ca>>2]=+g[ta>>2]*+g[Aa>>2]-+g[xa>>2]*+g[wa>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[sa>>2]-+g[Ba>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Ca>>2]+ +g[F>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Ba>>2]+ +g[sa>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Ca>>2]-+g[F>>2];g[G>>2]=+g[(c[q>>2]|0)+56>>2];g[I>>2]=+g[(c[q>>2]|0)+60>>2];g[K>>2]=+g[G>>2]*+g[H>>2]-+g[I>>2]*+g[J>>2];g[R>>2]=+g[I>>2]*+g[H>>2]+ +g[G>>2]*+g[J>>2];g[L>>2]=+g[(c[q>>2]|0)+64>>2];g[N>>2]=+g[(c[q>>2]|0)+68>>2];g[P>>2]=+g[L>>2]*+g[M>>2]+ +g[N>>2]*+g[O>>2];g[Q>>2]=+g[L>>2]*+g[O>>2]-+g[N>>2]*+g[M>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[K>>2]-+g[P>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Q>>2]+ +g[R>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[P>>2]+ +g[K>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Q>>2]-+g[R>>2];g[Ha>>2]=+g[(c[q>>2]|0)+40>>2];g[Ja>>2]=+g[(c[q>>2]|0)+44>>2];g[La>>2]=+g[Ha>>2]*+g[Ia>>2]-+g[Ja>>2]*+g[Ka>>2];g[Sa>>2]=+g[Ja>>2]*+g[Ia>>2]+ +g[Ha>>2]*+g[Ka>>2];g[Ma>>2]=+g[(c[q>>2]|0)+48>>2];g[Oa>>2]=+g[(c[q>>2]|0)+52>>2];g[Qa>>2]=+g[Ma>>2]*+g[Na>>2]+ +g[Oa>>2]*+g[Pa>>2];g[Ra>>2]=+g[Ma>>2]*+g[Pa>>2]-+g[Oa>>2]*+g[Na>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[La>>2]-+g[Qa>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ra>>2]+ +g[Sa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Qa>>2]+ +g[La>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Ra>>2]-+g[Sa>>2];c[_b>>2]=(c[_b>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+72;c[r>>2]=c[r>>2]^c[2998]}i=$b;return}function bv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,72,9448,1);i=b;return}function cv(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0;xc=i;i=i+704|0;m=xc+700|0;n=xc+696|0;o=xc+692|0;p=xc+688|0;q=xc+684|0;r=xc+680|0;yc=xc+676|0;s=xc+672|0;t=xc+668|0;wc=xc+656|0;rc=xc+652|0;J=xc+648|0;Ab=xc+644|0;K=xc+640|0;Ub=xc+636|0;Aa=xc+632|0;Rb=xc+628|0;za=xc+624|0;ga=xc+620|0;S=xc+616|0;Zb=xc+612|0;W=xc+608|0;Gb=xc+604|0;Ca=xc+600|0;Lb=xc+596|0;F=xc+592|0;C=xc+588|0;N=xc+584|0;x=xc+580|0;M=xc+576|0;ja=xc+572|0;X=xc+568|0;ic=xc+564|0;T=xc+560|0;u=xc+556|0;mc=xc+552|0;ub=xc+548|0;yb=xc+544|0;sb=xc+540|0;lc=xc+536|0;pc=xc+532|0;Qb=xc+528|0;xb=xc+524|0;sc=xc+520|0;vc=xc+516|0;Tb=xc+512|0;Da=xc+508|0;rb=xc+504|0;nc=xc+500|0;oc=xc+496|0;vb=xc+492|0;wb=xc+488|0;tc=xc+484|0;uc=xc+480|0;qc=xc+476|0;zb=xc+472|0;Sb=xc+468|0;Pb=xc+464|0;ea=xc+460|0;fa=xc+456|0;tb=xc+452|0;Yb=xc+448|0;_b=xc+444|0;v=xc+440|0;dc=xc+436|0;A=xc+432|0;bc=xc+428|0;Wb=xc+424|0;Fb=xc+420|0;Xb=xc+416|0;gc=xc+412|0;y=xc+408|0;Kb=xc+404|0;z=xc+400|0;$b=xc+396|0;ac=xc+392|0;Db=xc+388|0;Eb=xc+384|0;ec=xc+380|0;fc=xc+376|0;Ib=xc+372|0;Jb=xc+368|0;Cb=xc+364|0;Hb=xc+360|0;B=xc+356|0;w=xc+352|0;ha=xc+348|0;ia=xc+344|0;cc=xc+340|0;hc=xc+336|0;jc=xc+332|0;ka=xc+328|0;Nb=xc+324|0;sa=xc+320|0;E=xc+316|0;ua=xc+312|0;qa=xc+308|0;wa=xc+304|0;Bb=xc+300|0;Mb=xc+296|0;Vb=xc+292|0;D=xc+288|0;na=xc+284|0;pa=xc+280|0;ma=xc+276|0;oa=xc+272|0;da=xc+268|0;la=xc+264|0;kc=xc+260|0;Ob=xc+256|0;va=xc+252|0;xa=xc+248|0;ra=xc+244|0;ta=xc+240|0;H=xc+236|0;ba=xc+232|0;P=xc+228|0;Ea=xc+224|0;Z=xc+220|0;_=xc+216|0;Ka=xc+212|0;La=xc+208|0;Ba=xc+204|0;G=xc+200|0;L=xc+196|0;O=xc+192|0;U=xc+188|0;Y=xc+184|0;R=xc+180|0;V=xc+176|0;Ha=xc+172|0;Ja=xc+168|0;Ga=xc+164|0;Ia=xc+160|0;Q=xc+156|0;$=xc+152|0;ya=xc+148|0;I=xc+144|0;Fa=xc+140|0;Ma=xc+136|0;aa=xc+132|0;ca=xc+128|0;Va=xc+124|0;lb=xc+120|0;gb=xc+116|0;Pa=xc+112|0;Za=xc+108|0;nb=xc+104|0;cb=xc+100|0;qb=xc+96|0;Ta=xc+92|0;Ua=xc+88|0;eb=xc+84|0;fb=xc+80|0;Xa=xc+76|0;Ya=xc+72|0;ab=xc+68|0;bb=xc+64|0;_a=xc+60|0;ib=xc+56|0;hb=xc+52|0;jb=xc+48|0;Na=xc+44|0;Wa=xc+40|0;$a=xc+36|0;db=xc+32|0;ob=xc+28|0;Ra=xc+24|0;Qa=xc+20|0;Sa=xc+16|0;kb=xc+12|0;mb=xc+8|0;pb=xc+4|0;Oa=xc;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[yc>>2]=j;c[s>>2]=k;c[t>>2]=l;g[xc+664>>2]=.5;g[xc+660>>2]=.8660253882408142;c[wc>>2]=c[yc>>2];c[q>>2]=(c[q>>2]|0)+(((c[yc>>2]|0)-1|0)*22<<2);while(1){if((c[wc>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[mc>>2]=+g[c[n>>2]>>2];g[ub>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[yb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Da>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[rb>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[sb>>2]=+g[Da>>2]+ +g[rb>>2];g[lc>>2]=(+g[Da>>2]-+g[rb>>2])*.8660253882408142;g[nc>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[oc>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[pc>>2]=+g[nc>>2]-+g[oc>>2];g[Qb>>2]=(+g[nc>>2]+ +g[oc>>2])*.8660253882408142;g[vb>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[wb>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[xb>>2]=+g[vb>>2]+ +g[wb>>2];g[sc>>2]=(+g[vb>>2]-+g[wb>>2])*.8660253882408142;g[tc>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[uc>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[vc>>2]=+g[tc>>2]-+g[uc>>2];g[Tb>>2]=(+g[tc>>2]+ +g[uc>>2])*.8660253882408142;g[qc>>2]=+g[pc>>2]*.5+ +g[mc>>2];g[rc>>2]=+g[lc>>2]+ +g[qc>>2];g[J>>2]=+g[qc>>2]-+g[lc>>2];g[zb>>2]=+g[vc>>2]*.5-+g[yb>>2];g[Ab>>2]=+g[sc>>2]+ +g[zb>>2];g[K>>2]=+g[zb>>2]-+g[sc>>2];g[Sb>>2]=+g[ub>>2]-+g[xb>>2]*.5;g[Ub>>2]=+g[Sb>>2]+ +g[Tb>>2];g[Aa>>2]=+g[Sb>>2]-+g[Tb>>2];g[Pb>>2]=+g[u>>2]-+g[sb>>2]*.5;g[Rb>>2]=+g[Pb>>2]-+g[Qb>>2];g[za>>2]=+g[Pb>>2]+ +g[Qb>>2];g[ea>>2]=+g[mc>>2]-+g[pc>>2];g[fa>>2]=+g[vc>>2]+ +g[yb>>2];g[ga>>2]=+g[ea>>2]-+g[fa>>2];g[S>>2]=+g[ea>>2]+ +g[fa>>2];g[tb>>2]=+g[u>>2]+ +g[sb>>2];g[Yb>>2]=+g[ub>>2]+ +g[xb>>2];g[Zb>>2]=+g[tb>>2]+ +g[Yb>>2];g[W>>2]=+g[tb>>2]-+g[Yb>>2];g[_b>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[v>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[dc>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[A>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[$b>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[ac>>2]=+g[c[o>>2]>>2];g[bc>>2]=+g[$b>>2]+ +g[ac>>2];g[Wb>>2]=(+g[$b>>2]-+g[ac>>2])*.8660253882408142;g[Db>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Eb>>2]=+g[c[p>>2]>>2];g[Fb>>2]=(+g[Db>>2]-+g[Eb>>2])*.8660253882408142;g[Xb>>2]=+g[Db>>2]+ +g[Eb>>2];g[ec>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[fc>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[gc>>2]=+g[ec>>2]+ +g[fc>>2];g[y>>2]=(+g[ec>>2]-+g[fc>>2])*.8660253882408142;g[Ib>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Jb>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Kb>>2]=(+g[Ib>>2]-+g[Jb>>2])*.8660253882408142;g[z>>2]=+g[Ib>>2]+ +g[Jb>>2];g[Cb>>2]=+g[_b>>2]-+g[bc>>2]*.5;g[Gb>>2]=+g[Cb>>2]+ +g[Fb>>2];g[Ca>>2]=+g[Cb>>2]-+g[Fb>>2];g[Hb>>2]=+g[dc>>2]-+g[gc>>2]*.5;g[Lb>>2]=+g[Hb>>2]+ +g[Kb>>2];g[F>>2]=+g[Hb>>2]-+g[Kb>>2];g[B>>2]=+g[z>>2]*.5+ +g[A>>2];g[C>>2]=+g[y>>2]-+g[B>>2];g[N>>2]=+g[y>>2]+ +g[B>>2];g[w>>2]=+g[Xb>>2]*.5+ +g[v>>2];g[x>>2]=+g[Wb>>2]+ +g[w>>2];g[M>>2]=+g[w>>2]-+g[Wb>>2];g[ha>>2]=+g[v>>2]-+g[Xb>>2];g[ia>>2]=+g[z>>2]-+g[A>>2];g[ja>>2]=+g[ha>>2]+ +g[ia>>2];g[X>>2]=+g[ha>>2]-+g[ia>>2];g[cc>>2]=+g[_b>>2]+ +g[bc>>2];g[hc>>2]=+g[dc>>2]+ +g[gc>>2];g[ic>>2]=+g[cc>>2]+ +g[hc>>2];g[T>>2]=+g[cc>>2]-+g[hc>>2];g[jc>>2]=+g[Zb>>2]+ +g[ic>>2];g[ka>>2]=+g[ga>>2]+ +g[ja>>2];g[Bb>>2]=+g[rc>>2]-+g[Ab>>2];g[Mb>>2]=+g[Gb>>2]-+g[Lb>>2];g[Nb>>2]=+g[Bb>>2]+ +g[Mb>>2];g[sa>>2]=+g[Bb>>2]-+g[Mb>>2];g[Vb>>2]=+g[Rb>>2]-+g[Ub>>2];g[D>>2]=+g[x>>2]-+g[C>>2];g[E>>2]=+g[Vb>>2]-+g[D>>2];g[ua>>2]=+g[Vb>>2]+ +g[D>>2];g[na>>2]=+g[Zb>>2]-+g[ic>>2];g[pa>>2]=+g[ga>>2]-+g[ja>>2];g[ma>>2]=+g[(c[q>>2]|0)+40>>2];g[oa>>2]=+g[(c[q>>2]|0)+44>>2];g[qa>>2]=+g[ma>>2]*+g[na>>2]-+g[oa>>2]*+g[pa>>2];g[wa>>2]=+g[oa>>2]*+g[na>>2]+ +g[ma>>2]*+g[pa>>2];g[kc>>2]=+g[c[q>>2]>>2];g[Ob>>2]=+g[(c[q>>2]|0)+4>>2];g[da>>2]=+g[kc>>2]*+g[Nb>>2]+ +g[Ob>>2]*+g[E>>2];g[la>>2]=+g[kc>>2]*+g[E>>2]-+g[Ob>>2]*+g[Nb>>2];g[c[m>>2]>>2]=+g[jc>>2]-+g[da>>2];g[c[n>>2]>>2]=+g[ka>>2]+ +g[la>>2];g[c[o>>2]>>2]=+g[jc>>2]+ +g[da>>2];g[c[p>>2]>>2]=+g[la>>2]-+g[ka>>2];g[ra>>2]=+g[(c[q>>2]|0)+48>>2];g[ta>>2]=+g[(c[q>>2]|0)+52>>2];g[va>>2]=+g[ra>>2]*+g[sa>>2]+ +g[ta>>2]*+g[ua>>2];g[xa>>2]=+g[ra>>2]*+g[ua>>2]-+g[ta>>2]*+g[sa>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[qa>>2]-+g[va>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[wa>>2]+ +g[xa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[qa>>2]+ +g[va>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[xa>>2]-+g[wa>>2];g[Ba>>2]=+g[za>>2]+ +g[Aa>>2];g[G>>2]=+g[Ca>>2]+ +g[F>>2];g[H>>2]=+g[Ba>>2]-+g[G>>2];g[ba>>2]=+g[Ba>>2]+ +g[G>>2];g[L>>2]=+g[J>>2]+ +g[K>>2];g[O>>2]=+g[M>>2]-+g[N>>2];g[P>>2]=+g[L>>2]-+g[O>>2];g[Ea>>2]=+g[L>>2]+ +g[O>>2];g[U>>2]=+g[S>>2]-+g[T>>2];g[Y>>2]=+g[W>>2]+ +g[X>>2];g[R>>2]=+g[(c[q>>2]|0)+16>>2];g[V>>2]=+g[(c[q>>2]|0)+20>>2];g[Z>>2]=+g[R>>2]*+g[U>>2]+ +g[V>>2]*+g[Y>>2];g[_>>2]=+g[R>>2]*+g[Y>>2]-+g[V>>2]*+g[U>>2];g[Ha>>2]=+g[T>>2]+ +g[S>>2];g[Ja>>2]=+g[W>>2]-+g[X>>2];g[Ga>>2]=+g[(c[q>>2]|0)+64>>2];g[Ia>>2]=+g[(c[q>>2]|0)+68>>2];g[Ka>>2]=+g[Ga>>2]*+g[Ha>>2]+ +g[Ia>>2]*+g[Ja>>2];g[La>>2]=+g[Ga>>2]*+g[Ja>>2]-+g[Ia>>2]*+g[Ha>>2];g[ya>>2]=+g[(c[q>>2]|0)+8>>2];g[I>>2]=+g[(c[q>>2]|0)+12>>2];g[Q>>2]=+g[ya>>2]*+g[H>>2]-+g[I>>2]*+g[P>>2];g[$>>2]=+g[I>>2]*+g[H>>2]+ +g[ya>>2]*+g[P>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[Q>>2]-+g[Z>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[_>>2]+ +g[$>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Z>>2]+ +g[Q>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[_>>2]-+g[$>>2];g[aa>>2]=+g[(c[q>>2]|0)+56>>2];g[ca>>2]=+g[(c[q>>2]|0)+60>>2];g[Fa>>2]=+g[aa>>2]*+g[ba>>2]-+g[ca>>2]*+g[Ea>>2];g[Ma>>2]=+g[ca>>2]*+g[ba>>2]+ +g[aa>>2]*+g[Ea>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Fa>>2]-+g[Ka>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[La>>2]+ +g[Ma>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ka>>2]+ +g[Fa>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[La>>2]-+g[Ma>>2];g[Ta>>2]=+g[Rb>>2]+ +g[Ub>>2];g[Ua>>2]=+g[Gb>>2]+ +g[Lb>>2];g[Va>>2]=+g[Ta>>2]+ +g[Ua>>2];g[lb>>2]=+g[Ta>>2]-+g[Ua>>2];g[eb>>2]=+g[za>>2]-+g[Aa>>2];g[fb>>2]=+g[M>>2]+ +g[N>>2];g[gb>>2]=+g[eb>>2]-+g[fb>>2];g[Pa>>2]=+g[eb>>2]+ +g[fb>>2];g[Xa>>2]=+g[rc>>2]+ +g[Ab>>2];g[Ya>>2]=+g[x>>2]+ +g[C>>2];g[Za>>2]=+g[Xa>>2]+ +g[Ya>>2];g[nb>>2]=+g[Xa>>2]-+g[Ya>>2];g[ab>>2]=+g[J>>2]-+g[K>>2];g[bb>>2]=+g[Ca>>2]-+g[F>>2];g[cb>>2]=+g[ab>>2]+ +g[bb>>2];g[qb>>2]=+g[ab>>2]-+g[bb>>2];g[Na>>2]=+g[(c[q>>2]|0)+24>>2];g[Wa>>2]=+g[(c[q>>2]|0)+28>>2];g[_a>>2]=+g[Na>>2]*+g[Va>>2]-+g[Wa>>2]*+g[Za>>2];g[ib>>2]=+g[Wa>>2]*+g[Va>>2]+ +g[Na>>2]*+g[Za>>2];g[$a>>2]=+g[(c[q>>2]|0)+32>>2];g[db>>2]=+g[(c[q>>2]|0)+36>>2];g[hb>>2]=+g[$a>>2]*+g[cb>>2]+ +g[db>>2]*+g[gb>>2];g[jb>>2]=+g[$a>>2]*+g[gb>>2]-+g[db>>2]*+g[cb>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[_a>>2]-+g[hb>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ib>>2]+ +g[jb>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[_a>>2]+ +g[hb>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[jb>>2]-+g[ib>>2];g[kb>>2]=+g[(c[q>>2]|0)+72>>2];g[mb>>2]=+g[(c[q>>2]|0)+76>>2];g[ob>>2]=+g[kb>>2]*+g[lb>>2]-+g[mb>>2]*+g[nb>>2];g[Ra>>2]=+g[mb>>2]*+g[lb>>2]+ +g[kb>>2]*+g[nb>>2];g[pb>>2]=+g[(c[q>>2]|0)+80>>2];g[Oa>>2]=+g[(c[q>>2]|0)+84>>2];g[Qa>>2]=+g[pb>>2]*+g[qb>>2]+ +g[Oa>>2]*+g[Pa>>2];g[Sa>>2]=+g[pb>>2]*+g[Pa>>2]-+g[Oa>>2]*+g[qb>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[ob>>2]-+g[Qa>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Ra>>2]+ +g[Sa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[ob>>2]+ +g[Qa>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Sa>>2]-+g[Ra>>2];c[wc>>2]=(c[wc>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+88;c[r>>2]=c[r>>2]^c[2998]}i=xc;return}function dv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,73,9496,1);i=b;return}function ev(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0;Pd=i;i=i+1008|0;m=Pd+992|0;n=Pd+988|0;o=Pd+984|0;p=Pd+980|0;q=Pd+976|0;r=Pd+972|0;Qd=Pd+968|0;s=Pd+964|0;t=Pd+960|0;Od=Pd+944|0;Rc=Pd+940|0;Za=Pd+936|0;Nb=Pd+932|0;qa=Pd+928|0;td=Pd+924|0;Z=Pd+920|0;Va=Pd+916|0;zc=Pd+912|0;J=Pd+908|0;ca=Pd+904|0;ad=Pd+900|0;Ob=Pd+896|0;Eb=Pd+892|0;uc=Pd+888|0;la=Pd+884|0;_a=Pd+880|0;Id=Pd+876|0;ba=Pd+872|0;cb=Pd+868|0;jb=Pd+864|0;fb=Pd+860|0;kb=Pd+856|0;md=Pd+852|0;ga=Pd+848|0;E=Pd+844|0;ha=Pd+840|0;Hb=Pd+836|0;Qa=Pd+832|0;Kb=Pd+828|0;Ra=Pd+824|0;Aa=Pd+820|0;_=Pd+816|0;Mb=Pd+812|0;ma=Pd+808|0;Qc=Pd+804|0;G=Pd+800|0;Mc=Pd+796|0;Ld=Pd+792|0;pa=Pd+788|0;H=Pd+784|0;od=Pd+780|0;Sc=Pd+776|0;Vc=Pd+772|0;Ba=Pd+768|0;rd=Pd+764|0;Xc=Pd+760|0;_c=Pd+756|0;Ca=Pd+752|0;u=Pd+748|0;Da=Pd+744|0;Md=Pd+740|0;Nd=Pd+736|0;Kc=Pd+732|0;Lc=Pd+728|0;na=Pd+724|0;oa=Pd+720|0;Oc=Pd+716|0;Pc=Pd+712|0;Tc=Pd+708|0;Uc=Pd+704|0;pd=Pd+700|0;qd=Pd+696|0;Yc=Pd+692|0;Zc=Pd+688|0;Nc=Pd+684|0;sd=Pd+680|0;Wc=Pd+676|0;$c=Pd+672|0;Ta=Pd+668|0;Ua=Pd+664|0;F=Pd+660|0;I=Pd+656|0;Cb=Pd+652|0;Db=Pd+648|0;ja=Pd+644|0;ka=Pd+640|0;wd=Pd+636|0;hd=Pd+632|0;fd=Pd+628|0;ua=Pd+624|0;zd=Pd+620|0;cd=Pd+616|0;kd=Pd+612|0;va=Pd+608|0;Dd=Pd+604|0;z=Pd+600|0;x=Pd+596|0;xa=Pd+592|0;Gd=Pd+588|0;nd=Pd+584|0;C=Pd+580|0;ya=Pd+576|0;ud=Pd+572|0;vd=Pd+568|0;dd=Pd+564|0;ed=Pd+560|0;xd=Pd+556|0;yd=Pd+552|0;id=Pd+548|0;jd=Pd+544|0;Bd=Pd+540|0;Cd=Pd+536|0;v=Pd+532|0;w=Pd+528|0;Ed=Pd+524|0;Fd=Pd+520|0;A=Pd+516|0;B=Pd+512|0;Ad=Pd+508|0;Hd=Pd+504|0;ab=Pd+500|0;bb=Pd+496|0;db=Pd+492|0;eb=Pd+488|0;gd=Pd+484|0;ld=Pd+480|0;y=Pd+476|0;D=Pd+472|0;Fb=Pd+468|0;Gb=Pd+464|0;Ib=Pd+460|0;Jb=Pd+456|0;wa=Pd+452|0;za=Pd+448|0;Jd=Pd+444|0;K=Pd+440|0;ea=Pd+436|0;S=Pd+432|0;sa=Pd+428|0;U=Pd+424|0;Q=Pd+420|0;W=Pd+416|0;bd=Pd+412|0;da=Pd+408|0;ia=Pd+404|0;ra=Pd+400|0;N=Pd+396|0;P=Pd+392|0;M=Pd+388|0;O=Pd+384|0;ta=Pd+380|0;L=Pd+376|0;Kd=Pd+372|0;fa=Pd+368|0;V=Pd+364|0;X=Pd+360|0;R=Pd+356|0;T=Pd+352|0;Ja=Pd+348|0;vb=Pd+344|0;Na=Pd+340|0;xb=Pd+336|0;Fa=Pd+332|0;nb=Pd+328|0;tb=Pd+324|0;zb=Pd+320|0;Ha=Pd+316|0;Ia=Pd+312|0;La=Pd+308|0;Ma=Pd+304|0;$=Pd+300|0;Ea=Pd+296|0;Y=Pd+292|0;aa=Pd+288|0;qb=Pd+284|0;sb=Pd+280|0;pb=Pd+276|0;rb=Pd+272|0;mb=Pd+268|0;ob=Pd+264|0;Ga=Pd+260|0;Ka=Pd+256|0;yb=Pd+252|0;Ab=Pd+248|0;ub=Pd+244|0;wb=Pd+240|0;Oa=Pd+236|0;Vb=Pd+232|0;Wa=Pd+228|0;kc=Pd+224|0;hb=Pd+220|0;nc=Pd+216|0;Qb=Pd+212|0;pc=Pd+208|0;Lb=Pd+204|0;Sa=Pd+200|0;$a=Pd+196|0;gb=Pd+192|0;lb=Pd+188|0;Pb=Pd+184|0;Xa=Pd+180|0;Sb=Pd+176|0;Rb=Pd+172|0;Tb=Pd+168|0;Bb=Pd+164|0;Pa=Pd+160|0;Ya=Pd+156|0;ib=Pd+152|0;lc=Pd+148|0;rc=Pd+144|0;qc=Pd+140|0;sc=Pd+136|0;Ub=Pd+132|0;Wb=Pd+128|0;mc=Pd+124|0;oc=Pd+120|0;wc=Pd+116|0;$b=Pd+112|0;Ac=Pd+108|0;bc=Pd+104|0;Fc=Pd+100|0;ec=Pd+96|0;Jc=Pd+92|0;gc=Pd+88|0;vc=Pd+84|0;yc=Pd+80|0;Dc=Pd+76|0;Ec=Pd+72|0;Hc=Pd+68|0;Ic=Pd+64|0;Bc=Pd+60|0;Yb=Pd+56|0;Xb=Pd+52|0;Zb=Pd+48|0;tc=Pd+44|0;xc=Pd+40|0;Cc=Pd+36|0;Gc=Pd+32|0;cc=Pd+28|0;ic=Pd+24|0;hc=Pd+20|0;jc=Pd+16|0;_b=Pd+12|0;ac=Pd+8|0;dc=Pd+4|0;fc=Pd;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Qd>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Pd+956>>2]=.9238795042037964;g[Pd+952>>2]=.3826834261417389;g[Pd+948>>2]=.7071067690849304;c[Od>>2]=c[Qd>>2];c[q>>2]=(c[q>>2]|0)+(((c[Qd>>2]|0)-1|0)*30<<2);while(1){if((c[Od>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Mb>>2]=+g[u>>2]+ +g[Da>>2];g[ma>>2]=+g[u>>2]-+g[Da>>2];g[Md>>2]=+g[c[n>>2]>>2];g[Nd>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Qc>>2]=+g[Md>>2]+ +g[Nd>>2];g[G>>2]=+g[Md>>2]-+g[Nd>>2];g[Kc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Lc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Mc>>2]=+g[Kc>>2]+ +g[Lc>>2];g[Ld>>2]=+g[Kc>>2]-+g[Lc>>2];g[na>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[oa>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[pa>>2]=+g[na>>2]+ +g[oa>>2];g[H>>2]=+g[na>>2]-+g[oa>>2];g[Oc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Pc>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[od>>2]=+g[Oc>>2]+ +g[Pc>>2];g[Sc>>2]=+g[Oc>>2]-+g[Pc>>2];g[Tc>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Uc>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Vc>>2]=+g[Tc>>2]+ +g[Uc>>2];g[Ba>>2]=+g[Tc>>2]-+g[Uc>>2];g[pd>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[qd>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[rd>>2]=+g[pd>>2]+ +g[qd>>2];g[Xc>>2]=+g[pd>>2]-+g[qd>>2];g[Yc>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Zc>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[_c>>2]=+g[Yc>>2]+ +g[Zc>>2];g[Ca>>2]=+g[Zc>>2]-+g[Yc>>2];g[Rc>>2]=+g[Ld>>2]+ +g[Qc>>2];g[Za>>2]=+g[Qc>>2]-+g[Ld>>2];g[Nb>>2]=+g[ma>>2]+ +g[pa>>2];g[qa>>2]=+g[ma>>2]-+g[pa>>2];g[Nc>>2]=+g[Mb>>2]+ +g[Mc>>2];g[sd>>2]=+g[od>>2]+ +g[rd>>2];g[td>>2]=+g[Nc>>2]+ +g[sd>>2];g[Z>>2]=+g[Nc>>2]-+g[sd>>2];g[Ta>>2]=+g[od>>2]-+g[rd>>2];g[Ua>>2]=+g[G>>2]-+g[H>>2];g[Va>>2]=+g[Ta>>2]+ +g[Ua>>2];g[zc>>2]=+g[Ua>>2]-+g[Ta>>2];g[F>>2]=+g[Ba>>2]+ +g[Ca>>2];g[I>>2]=+g[G>>2]+ +g[H>>2];g[J>>2]=+g[F>>2]+ +g[I>>2];g[ca>>2]=+g[I>>2]-+g[F>>2];g[Wc>>2]=+g[Sc>>2]+ +g[Vc>>2];g[$c>>2]=+g[Xc>>2]+ +g[_c>>2];g[ad>>2]=(+g[Wc>>2]-+g[$c>>2])*.7071067690849304;g[Ob>>2]=(+g[Wc>>2]+ +g[$c>>2])*.7071067690849304;g[Cb>>2]=+g[Mb>>2]-+g[Mc>>2];g[Db>>2]=+g[Ca>>2]-+g[Ba>>2];g[Eb>>2]=+g[Cb>>2]+ +g[Db>>2];g[uc>>2]=+g[Cb>>2]-+g[Db>>2];g[ja>>2]=+g[Sc>>2]-+g[Vc>>2];g[ka>>2]=+g[Xc>>2]-+g[_c>>2];g[la>>2]=(+g[ja>>2]+ +g[ka>>2])*.7071067690849304;g[_a>>2]=(+g[ja>>2]-+g[ka>>2])*.7071067690849304;g[ud>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[vd>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[wd>>2]=+g[ud>>2]+ +g[vd>>2];g[hd>>2]=+g[ud>>2]-+g[vd>>2];g[dd>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[ed>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[fd>>2]=+g[dd>>2]+ +g[ed>>2];g[ua>>2]=+g[dd>>2]-+g[ed>>2];g[xd>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[yd>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[zd>>2]=+g[xd>>2]+ +g[yd>>2];g[cd>>2]=+g[xd>>2]-+g[yd>>2];g[id>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[jd>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[kd>>2]=+g[id>>2]+ +g[jd>>2];g[va>>2]=+g[id>>2]-+g[jd>>2];g[Bd>>2]=+g[c[o>>2]>>2];g[Cd>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Dd>>2]=+g[Bd>>2]+ +g[Cd>>2];g[z>>2]=+g[Bd>>2]-+g[Cd>>2];g[v>>2]=+g[c[p>>2]>>2];g[w>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[x>>2]=+g[v>>2]+ +g[w>>2];g[xa>>2]=+g[w>>2]-+g[v>>2];g[Ed>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Fd>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Gd>>2]=+g[Ed>>2]+ +g[Fd>>2];g[nd>>2]=+g[Ed>>2]-+g[Fd>>2];g[A>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[B>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[C>>2]=+g[A>>2]+ +g[B>>2];g[ya>>2]=+g[A>>2]-+g[B>>2];g[Ad>>2]=+g[wd>>2]+ +g[zd>>2];g[Hd>>2]=+g[Dd>>2]+ +g[Gd>>2];g[Id>>2]=+g[Ad>>2]+ +g[Hd>>2];g[ba>>2]=+g[Ad>>2]-+g[Hd>>2];g[ab>>2]=+g[fd>>2]-+g[cd>>2];g[bb>>2]=+g[hd>>2]+ +g[kd>>2];g[cb>>2]=+g[ab>>2]*.3826834261417389+ +g[bb>>2]*.9238795042037964;g[jb>>2]=+g[bb>>2]*.3826834261417389-+g[ab>>2]*.9238795042037964;g[db>>2]=+g[nd>>2]+ +g[x>>2];g[eb>>2]=+g[z>>2]+ +g[C>>2];g[fb>>2]=+g[db>>2]*.3826834261417389+ +g[eb>>2]*.9238795042037964;g[kb>>2]=+g[eb>>2]*.3826834261417389-+g[db>>2]*.9238795042037964;g[gd>>2]=+g[cd>>2]+ +g[fd>>2];g[ld>>2]=+g[hd>>2]-+g[kd>>2];g[md>>2]=+g[gd>>2]*.9238795042037964+ +g[ld>>2]*.3826834261417389;g[ga>>2]=+g[ld>>2]*.9238795042037964-+g[gd>>2]*.3826834261417389;g[y>>2]=+g[nd>>2]-+g[x>>2];g[D>>2]=+g[z>>2]-+g[C>>2];g[E>>2]=+g[y>>2]*.9238795042037964-+g[D>>2]*.3826834261417389;g[ha>>2]=+g[y>>2]*.3826834261417389+ +g[D>>2]*.9238795042037964;g[Fb>>2]=+g[wd>>2]-+g[zd>>2];g[Gb>>2]=+g[ua>>2]-+g[va>>2];g[Hb>>2]=+g[Fb>>2]-+g[Gb>>2];g[Qa>>2]=+g[Fb>>2]+ +g[Gb>>2];g[Ib>>2]=+g[Dd>>2]-+g[Gd>>2];g[Jb>>2]=+g[xa>>2]-+g[ya>>2];g[Kb>>2]=+g[Ib>>2]+ +g[Jb>>2];g[Ra>>2]=+g[Jb>>2]-+g[Ib>>2];g[wa>>2]=+g[ua>>2]+ +g[va>>2];g[za>>2]=+g[xa>>2]+ +g[ya>>2];g[Aa>>2]=+g[wa>>2]+ +g[za>>2];g[_>>2]=+g[za>>2]-+g[wa>>2];g[Jd>>2]=+g[td>>2]+ +g[Id>>2];g[K>>2]=+g[Aa>>2]+ +g[J>>2];g[bd>>2]=+g[Rc>>2]+ +g[ad>>2];g[da>>2]=+g[md>>2]+ +g[E>>2];g[ea>>2]=+g[bd>>2]+ +g[da>>2];g[S>>2]=+g[bd>>2]-+g[da>>2];g[ia>>2]=+g[ga>>2]+ +g[ha>>2];g[ra>>2]=+g[la>>2]+ +g[qa>>2];g[sa>>2]=+g[ia>>2]+ +g[ra>>2];g[U>>2]=+g[ra>>2]-+g[ia>>2];g[N>>2]=+g[td>>2]-+g[Id>>2];g[P>>2]=+g[J>>2]-+g[Aa>>2];g[M>>2]=+g[(c[q>>2]|0)+56>>2];g[O>>2]=+g[(c[q>>2]|0)+60>>2];g[Q>>2]=+g[M>>2]*+g[N>>2]-+g[O>>2]*+g[P>>2];g[W>>2]=+g[O>>2]*+g[N>>2]+ +g[M>>2]*+g[P>>2];g[Kd>>2]=+g[c[q>>2]>>2];g[fa>>2]=+g[(c[q>>2]|0)+4>>2];g[ta>>2]=+g[Kd>>2]*+g[ea>>2]+ +g[fa>>2]*+g[sa>>2];g[L>>2]=+g[Kd>>2]*+g[sa>>2]-+g[fa>>2]*+g[ea>>2];g[c[m>>2]>>2]=+g[Jd>>2]-+g[ta>>2];g[c[n>>2]>>2]=+g[K>>2]+ +g[L>>2];g[c[o>>2]>>2]=+g[Jd>>2]+ +g[ta>>2];g[c[p>>2]>>2]=+g[L>>2]-+g[K>>2];g[R>>2]=+g[(c[q>>2]|0)+64>>2];g[T>>2]=+g[(c[q>>2]|0)+68>>2];g[V>>2]=+g[R>>2]*+g[S>>2]+ +g[T>>2]*+g[U>>2];g[X>>2]=+g[R>>2]*+g[U>>2]-+g[T>>2]*+g[S>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Q>>2]-+g[V>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[W>>2]+ +g[X>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Q>>2]+ +g[V>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[X>>2]-+g[W>>2];g[Ha>>2]=+g[Rc>>2]-+g[ad>>2];g[Ia>>2]=+g[ga>>2]-+g[ha>>2];g[Ja>>2]=+g[Ha>>2]+ +g[Ia>>2];g[vb>>2]=+g[Ha>>2]-+g[Ia>>2];g[La>>2]=+g[E>>2]-+g[md>>2];g[Ma>>2]=+g[qa>>2]-+g[la>>2];g[Na>>2]=+g[La>>2]+ +g[Ma>>2];g[xb>>2]=+g[Ma>>2]-+g[La>>2];g[$>>2]=+g[Z>>2]+ +g[_>>2];g[Ea>>2]=+g[ba>>2]+ +g[ca>>2];g[Y>>2]=+g[(c[q>>2]|0)+24>>2];g[aa>>2]=+g[(c[q>>2]|0)+28>>2];g[Fa>>2]=+g[Y>>2]*+g[$>>2]-+g[aa>>2]*+g[Ea>>2];g[nb>>2]=+g[aa>>2]*+g[$>>2]+ +g[Y>>2]*+g[Ea>>2];g[qb>>2]=+g[Z>>2]-+g[_>>2];g[sb>>2]=+g[ca>>2]-+g[ba>>2];g[pb>>2]=+g[(c[q>>2]|0)+88>>2];g[rb>>2]=+g[(c[q>>2]|0)+92>>2];g[tb>>2]=+g[pb>>2]*+g[qb>>2]-+g[rb>>2]*+g[sb>>2];g[zb>>2]=+g[rb>>2]*+g[qb>>2]+ +g[pb>>2]*+g[sb>>2];g[Ga>>2]=+g[(c[q>>2]|0)+32>>2];g[Ka>>2]=+g[(c[q>>2]|0)+36>>2];g[mb>>2]=+g[Ga>>2]*+g[Ja>>2]+ +g[Ka>>2]*+g[Na>>2];g[ob>>2]=+g[Ga>>2]*+g[Na>>2]-+g[Ka>>2]*+g[Ja>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Fa>>2]-+g[mb>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[nb>>2]+ +g[ob>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Fa>>2]+ +g[mb>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ob>>2]-+g[nb>>2];g[ub>>2]=+g[(c[q>>2]|0)+96>>2];g[wb>>2]=+g[(c[q>>2]|0)+100>>2];g[yb>>2]=+g[ub>>2]*+g[vb>>2]+ +g[wb>>2]*+g[xb>>2];g[Ab>>2]=+g[ub>>2]*+g[xb>>2]-+g[wb>>2]*+g[vb>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[tb>>2]-+g[yb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[zb>>2]+ +g[Ab>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[tb>>2]+ +g[yb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Ab>>2]-+g[zb>>2];g[Lb>>2]=(+g[Hb>>2]+ +g[Kb>>2])*.7071067690849304;g[Oa>>2]=+g[Eb>>2]+ +g[Lb>>2];g[Vb>>2]=+g[Eb>>2]-+g[Lb>>2];g[Sa>>2]=(+g[Qa>>2]+ +g[Ra>>2])*.7071067690849304;g[Wa>>2]=+g[Sa>>2]+ +g[Va>>2];g[kc>>2]=+g[Va>>2]-+g[Sa>>2];g[$a>>2]=+g[Za>>2]+ +g[_a>>2];g[gb>>2]=+g[cb>>2]-+g[fb>>2];g[hb>>2]=+g[$a>>2]+ +g[gb>>2];g[nc>>2]=+g[$a>>2]-+g[gb>>2];g[lb>>2]=+g[jb>>2]+ +g[kb>>2];g[Pb>>2]=+g[Nb>>2]-+g[Ob>>2];g[Qb>>2]=+g[lb>>2]+ +g[Pb>>2];g[pc>>2]=+g[Pb>>2]-+g[lb>>2];g[Bb>>2]=+g[(c[q>>2]|0)+8>>2];g[Pa>>2]=+g[(c[q>>2]|0)+12>>2];g[Xa>>2]=+g[Bb>>2]*+g[Oa>>2]-+g[Pa>>2]*+g[Wa>>2];g[Sb>>2]=+g[Pa>>2]*+g[Oa>>2]+ +g[Bb>>2]*+g[Wa>>2];g[Ya>>2]=+g[(c[q>>2]|0)+16>>2];g[ib>>2]=+g[(c[q>>2]|0)+20>>2];g[Rb>>2]=+g[Ya>>2]*+g[hb>>2]+ +g[ib>>2]*+g[Qb>>2];g[Tb>>2]=+g[Ya>>2]*+g[Qb>>2]-+g[ib>>2]*+g[hb>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[Xa>>2]-+g[Rb>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Sb>>2]+ +g[Tb>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[Xa>>2]+ +g[Rb>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Tb>>2]-+g[Sb>>2];g[Ub>>2]=+g[(c[q>>2]|0)+72>>2];g[Wb>>2]=+g[(c[q>>2]|0)+76>>2];g[lc>>2]=+g[Ub>>2]*+g[Vb>>2]-+g[Wb>>2]*+g[kc>>2];g[rc>>2]=+g[Wb>>2]*+g[Vb>>2]+ +g[Ub>>2]*+g[kc>>2];g[mc>>2]=+g[(c[q>>2]|0)+80>>2];g[oc>>2]=+g[(c[q>>2]|0)+84>>2];g[qc>>2]=+g[mc>>2]*+g[nc>>2]+ +g[oc>>2]*+g[pc>>2];g[sc>>2]=+g[mc>>2]*+g[pc>>2]-+g[oc>>2]*+g[nc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[lc>>2]-+g[qc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[rc>>2]+ +g[sc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[lc>>2]+ +g[qc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[sc>>2]-+g[rc>>2];g[vc>>2]=(+g[Ra>>2]-+g[Qa>>2])*.7071067690849304;g[wc>>2]=+g[uc>>2]+ +g[vc>>2];g[$b>>2]=+g[uc>>2]-+g[vc>>2];g[yc>>2]=(+g[Hb>>2]-+g[Kb>>2])*.7071067690849304;g[Ac>>2]=+g[yc>>2]+ +g[zc>>2];g[bc>>2]=+g[zc>>2]-+g[yc>>2];g[Dc>>2]=+g[Za>>2]-+g[_a>>2];g[Ec>>2]=+g[jb>>2]-+g[kb>>2];g[Fc>>2]=+g[Dc>>2]+ +g[Ec>>2];g[ec>>2]=+g[Dc>>2]-+g[Ec>>2];g[Hc>>2]=+g[Ob>>2]+ +g[Nb>>2];g[Ic>>2]=+g[cb>>2]+ +g[fb>>2];g[Jc>>2]=+g[Hc>>2]-+g[Ic>>2];g[gc>>2]=+g[Ic>>2]+ +g[Hc>>2];g[tc>>2]=+g[(c[q>>2]|0)+40>>2];g[xc>>2]=+g[(c[q>>2]|0)+44>>2];g[Bc>>2]=+g[tc>>2]*+g[wc>>2]-+g[xc>>2]*+g[Ac>>2];g[Yb>>2]=+g[xc>>2]*+g[wc>>2]+ +g[tc>>2]*+g[Ac>>2];g[Cc>>2]=+g[(c[q>>2]|0)+48>>2];g[Gc>>2]=+g[(c[q>>2]|0)+52>>2];g[Xb>>2]=+g[Cc>>2]*+g[Fc>>2]+ +g[Gc>>2]*+g[Jc>>2];g[Zb>>2]=+g[Cc>>2]*+g[Jc>>2]-+g[Gc>>2]*+g[Fc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Bc>>2]-+g[Xb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Yb>>2]+ +g[Zb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Bc>>2]+ +g[Xb>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Zb>>2]-+g[Yb>>2];g[_b>>2]=+g[(c[q>>2]|0)+104>>2];g[ac>>2]=+g[(c[q>>2]|0)+108>>2];g[cc>>2]=+g[_b>>2]*+g[$b>>2]-+g[ac>>2]*+g[bc>>2];g[ic>>2]=+g[ac>>2]*+g[$b>>2]+ +g[_b>>2]*+g[bc>>2];g[dc>>2]=+g[(c[q>>2]|0)+112>>2];g[fc>>2]=+g[(c[q>>2]|0)+116>>2];g[hc>>2]=+g[dc>>2]*+g[ec>>2]+ +g[fc>>2]*+g[gc>>2];g[jc>>2]=+g[dc>>2]*+g[gc>>2]-+g[fc>>2]*+g[ec>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[cc>>2]-+g[hc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ic>>2]+ +g[jc>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[cc>>2]+ +g[hc>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[jc>>2]-+g[ic>>2];c[Od>>2]=(c[Od>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+120;c[r>>2]=c[r>>2]^c[2998]}i=Pd;return}function fv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,74,9544,1);i=b;return}function gv(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0;wf=i;i=i+1360|0;m=wf+1348|0;n=wf+1344|0;o=wf+1340|0;p=wf+1336|0;q=wf+1332|0;r=wf+1328|0;xf=wf+1324|0;s=wf+1320|0;t=wf+1316|0;vf=wf+1296|0;te=wf+1292|0;ic=wf+1288|0;Ed=wf+1284|0;B=wf+1280|0;L=wf+1276|0;Bc=wf+1272|0;_b=wf+1268|0;Ka=wf+1264|0;na=wf+1260|0;Xb=wf+1256|0;Yb=wf+1252|0;ya=wf+1248|0;qb=wf+1244|0;cd=wf+1240|0;bd=wf+1236|0;pb=wf+1232|0;Q=wf+1228|0;Zc=wf+1224|0;Wc=wf+1220|0;P=wf+1216|0;xb=wf+1212|0;Oc=wf+1208|0;yb=wf+1204|0;Rc=wf+1200|0;Ic=wf+1196|0;Kc=wf+1192|0;ye=wf+1188|0;nb=wf+1184|0;Hd=wf+1180|0;Jd=wf+1176|0;Ha=wf+1172|0;vb=wf+1168|0;G=wf+1164|0;M=wf+1160|0;pc=wf+1156|0;rc=wf+1152|0;bc=wf+1148|0;dc=wf+1144|0;w=wf+1140|0;C=wf+1136|0;Mb=wf+1132|0;H=wf+1128|0;A=wf+1124|0;Ia=wf+1120|0;se=wf+1116|0;x=wf+1112|0;K=wf+1108|0;Ja=wf+1104|0;u=wf+1100|0;Da=wf+1096|0;y=wf+1092|0;z=wf+1088|0;Vc=wf+1084|0;ce=wf+1080|0;I=wf+1076|0;J=wf+1072|0;_e=wf+1068|0;jc=wf+1064|0;tc=wf+1060|0;Fe=wf+1056|0;ha=wf+1052|0;Cc=wf+1048|0;Mc=wf+1044|0;W=wf+1040|0;we=wf+1036|0;nc=wf+1032|0;Yc=wf+1028|0;Ve=wf+1024|0;xa=wf+1020|0;Gc=wf+1016|0;Qc=wf+1012|0;Fa=wf+1008|0;ff=wf+1004|0;kc=wf+1e3|0;uc=wf+996|0;Ke=wf+992|0;ma=wf+988|0;Dc=wf+984|0;Nc=wf+980|0;Z=wf+976|0;of=wf+972|0;mc=wf+968|0;Xc=wf+964|0;Qe=wf+960|0;sa=wf+956|0;Fc=wf+952|0;Pc=wf+948|0;ba=wf+944|0;We=wf+940|0;da=wf+936|0;Ee=wf+932|0;U=wf+928|0;Ze=wf+924|0;Be=wf+920|0;ga=wf+916|0;V=wf+912|0;ue=wf+908|0;ve=wf+904|0;Ce=wf+900|0;De=wf+896|0;Xe=wf+892|0;Ye=wf+888|0;ea=wf+884|0;fa=wf+880|0;rf=wf+876|0;ta=wf+872|0;Ue=wf+868|0;ca=wf+864|0;uf=wf+860|0;Re=wf+856|0;wa=wf+852|0;Ea=wf+848|0;pf=wf+844|0;qf=wf+840|0;Se=wf+836|0;Te=wf+832|0;sf=wf+828|0;tf=wf+824|0;ua=wf+820|0;va=wf+816|0;bf=wf+812|0;ia=wf+808|0;Je=wf+804|0;X=wf+800|0;ef=wf+796|0;Ge=wf+792|0;la=wf+788|0;Y=wf+784|0;$e=wf+780|0;af=wf+776|0;He=wf+772|0;Ie=wf+768|0;cf=wf+764|0;df=wf+760|0;ja=wf+756|0;ka=wf+752|0;kf=wf+748|0;oa=wf+744|0;Pe=wf+740|0;$=wf+736|0;nf=wf+732|0;Me=wf+728|0;ra=wf+724|0;aa=wf+720|0;hf=wf+716|0;jf=wf+712|0;Ne=wf+708|0;Oe=wf+704|0;lf=wf+700|0;mf=wf+696|0;pa=wf+692|0;qa=wf+688|0;Ec=wf+684|0;Hc=wf+680|0;gf=wf+676|0;xe=wf+672|0;Fd=wf+668|0;Gd=wf+664|0;lc=wf+660|0;oc=wf+656|0;_=wf+652|0;Ga=wf+648|0;Ca=wf+644|0;F=wf+640|0;$b=wf+636|0;ac=wf+632|0;Le=wf+628|0;v=wf+624|0;ze=wf+620|0;La=wf+616|0;xc=wf+612|0;zc=wf+608|0;Ab=wf+604|0;Tb=wf+600|0;Oa=wf+596|0;gb=wf+592|0;S=wf+588|0;lb=wf+584|0;Fb=wf+580|0;Xa=wf+576|0;Aa=wf+572|0;jb=wf+568|0;Db=wf+564|0;Ta=wf+560|0;sb=wf+556|0;Rb=wf+552|0;Kb=wf+548|0;cb=wf+544|0;Wb=wf+540|0;wc=wf+536|0;Vb=wf+532|0;vc=wf+528|0;zb=wf+524|0;eb=wf+520|0;wb=wf+516|0;fb=wf+512|0;ub=wf+508|0;R=wf+504|0;Va=wf+500|0;O=wf+496|0;Wa=wf+492|0;N=wf+488|0;za=wf+484|0;Sa=wf+480|0;E=wf+476|0;Ra=wf+472|0;D=wf+468|0;rb=wf+464|0;bb=wf+460|0;ob=wf+456|0;ab=wf+452|0;mb=wf+448|0;T=wf+444|0;Ma=wf+440|0;Ae=wf+436|0;Ba=wf+432|0;Ub=wf+428|0;yc=wf+424|0;Qb=wf+420|0;Sb=wf+416|0;Bb=wf+412|0;Hb=wf+408|0;Gb=wf+404|0;Ib=wf+400|0;Na=wf+396|0;tb=wf+392|0;Cb=wf+388|0;Eb=wf+384|0;Pa=wf+380|0;Za=wf+376|0;Ya=wf+372|0;_a=wf+368|0;Jb=wf+364|0;Lb=wf+360|0;Qa=wf+356|0;Ua=wf+352|0;hb=wf+348|0;Ob=wf+344|0;Nb=wf+340|0;Pb=wf+336|0;$a=wf+332|0;db=wf+328|0;ib=wf+324|0;kb=wf+320|0;jd=wf+316|0;td=wf+312|0;pe=wf+308|0;re=wf+304|0;Tc=wf+300|0;he=wf+296|0;Sd=wf+292|0;wd=wf+288|0;Ld=wf+284|0;Dd=wf+280|0;$d=wf+276|0;rd=wf+272|0;$c=wf+268|0;Bd=wf+264|0;Zd=wf+260|0;nd=wf+256|0;fc=wf+252|0;je=wf+248|0;Wd=wf+244|0;yd=wf+240|0;gd=wf+236|0;id=wf+232|0;fd=wf+228|0;hd=wf+224|0;me=wf+220|0;oe=wf+216|0;le=wf+212|0;ne=wf+208|0;Sc=wf+204|0;Rd=wf+200|0;Lc=wf+196|0;Qd=wf+192|0;Jc=wf+188|0;dd=wf+184|0;qd=wf+180|0;Kd=wf+176|0;pd=wf+172|0;Id=wf+168|0;_c=wf+164|0;md=wf+160|0;sc=wf+156|0;ld=wf+152|0;qc=wf+148|0;Zb=wf+144|0;Ud=wf+140|0;ec=wf+136|0;Vd=wf+132|0;cc=wf+128|0;sd=wf+124|0;ud=wf+120|0;kd=wf+116|0;od=wf+112|0;ke=wf+108|0;qe=wf+104|0;ge=wf+100|0;ie=wf+96|0;gc=wf+92|0;Nd=wf+88|0;Md=wf+84|0;Od=wf+80|0;Ac=wf+76|0;Uc=wf+72|0;hc=wf+68|0;ad=wf+64|0;Xd=wf+60|0;be=wf+56|0;ae=wf+52|0;ed=wf+48|0;Pd=wf+44|0;Td=wf+40|0;Yd=wf+36|0;_d=wf+32|0;zd=wf+28|0;ee=wf+24|0;de=wf+20|0;fe=wf+16|0;vd=wf+12|0;xd=wf+8|0;Ad=wf+4|0;Cd=wf;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[xf>>2]=j;c[s>>2]=k;c[t>>2]=l;g[wf+1312>>2]=.25;g[wf+1308>>2]=.9510565400123596;g[wf+1304>>2]=.5877852439880371;g[wf+1300>>2]=.55901700258255;c[vf>>2]=c[xf>>2];c[q>>2]=(c[q>>2]|0)+(((c[xf>>2]|0)-1|0)*38<<2);while(1){if((c[vf>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Mb>>2]=+g[u>>2]+ +g[Da>>2];g[H>>2]=+g[u>>2]-+g[Da>>2];g[y>>2]=+g[c[n>>2]>>2];g[z>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[A>>2]=+g[y>>2]+ +g[z>>2];g[Ia>>2]=+g[y>>2]-+g[z>>2];g[Vc>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[ce>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[se>>2]=+g[Vc>>2]+ +g[ce>>2];g[x>>2]=+g[Vc>>2]-+g[ce>>2];g[I>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[J>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[K>>2]=+g[I>>2]+ +g[J>>2];g[Ja>>2]=+g[I>>2]-+g[J>>2];g[te>>2]=+g[Mb>>2]+ +g[se>>2];g[ic>>2]=+g[A>>2]-+g[x>>2];g[Ed>>2]=+g[H>>2]+ +g[K>>2];g[B>>2]=+g[x>>2]+ +g[A>>2];g[L>>2]=+g[H>>2]-+g[K>>2];g[Bc>>2]=+g[Mb>>2]-+g[se>>2];g[_b>>2]=+g[Ia>>2]-+g[Ja>>2];g[Ka>>2]=+g[Ia>>2]+ +g[Ja>>2];g[ue>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[ve>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[We>>2]=+g[ue>>2]+ +g[ve>>2];g[da>>2]=+g[ue>>2]-+g[ve>>2];g[Ce>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[De>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Ee>>2]=+g[Ce>>2]+ +g[De>>2];g[U>>2]=+g[Ce>>2]-+g[De>>2];g[Xe>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Ye>>2]=+g[c[o>>2]>>2];g[Ze>>2]=+g[Xe>>2]+ +g[Ye>>2];g[Be>>2]=+g[Xe>>2]-+g[Ye>>2];g[ea>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[fa>>2]=+g[c[p>>2]>>2];g[ga>>2]=+g[ea>>2]+ +g[fa>>2];g[V>>2]=+g[ea>>2]-+g[fa>>2];g[_e>>2]=+g[We>>2]+ +g[Ze>>2];g[jc>>2]=+g[Ee>>2]-+g[Be>>2];g[tc>>2]=+g[da>>2]+ +g[ga>>2];g[Fe>>2]=+g[Be>>2]+ +g[Ee>>2];g[ha>>2]=+g[da>>2]-+g[ga>>2];g[Cc>>2]=+g[We>>2]-+g[Ze>>2];g[Mc>>2]=+g[U>>2]-+g[V>>2];g[W>>2]=+g[U>>2]+ +g[V>>2];g[pf>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[qf>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[rf>>2]=+g[pf>>2]+ +g[qf>>2];g[ta>>2]=+g[pf>>2]-+g[qf>>2];g[Se>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Te>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ue>>2]=+g[Se>>2]+ +g[Te>>2];g[ca>>2]=+g[Te>>2]-+g[Se>>2];g[sf>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[tf>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[uf>>2]=+g[sf>>2]+ +g[tf>>2];g[Re>>2]=+g[sf>>2]-+g[tf>>2];g[ua>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[va>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[wa>>2]=+g[ua>>2]+ +g[va>>2];g[Ea>>2]=+g[va>>2]-+g[ua>>2];g[we>>2]=+g[rf>>2]+ +g[uf>>2];g[nc>>2]=+g[Re>>2]+ +g[Ue>>2];g[Yc>>2]=+g[ta>>2]-+g[wa>>2];g[Ve>>2]=+g[Re>>2]-+g[Ue>>2];g[xa>>2]=+g[ta>>2]+ +g[wa>>2];g[Gc>>2]=+g[rf>>2]-+g[uf>>2];g[Qc>>2]=+g[ca>>2]-+g[Ea>>2];g[Fa>>2]=+g[ca>>2]+ +g[Ea>>2];g[$e>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[af>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[bf>>2]=+g[$e>>2]+ +g[af>>2];g[ia>>2]=+g[$e>>2]-+g[af>>2];g[He>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ie>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Je>>2]=+g[He>>2]+ +g[Ie>>2];g[X>>2]=+g[Ie>>2]-+g[He>>2];g[cf>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[df>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[ef>>2]=+g[cf>>2]+ +g[df>>2];g[Ge>>2]=+g[cf>>2]-+g[df>>2];g[ja>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[ka>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2];g[la>>2]=+g[ja>>2]+ +g[ka>>2];g[Y>>2]=+g[ja>>2]-+g[ka>>2];g[ff>>2]=+g[bf>>2]+ +g[ef>>2];g[kc>>2]=+g[Ge>>2]+ +g[Je>>2];g[uc>>2]=+g[ia>>2]+ +g[la>>2];g[Ke>>2]=+g[Ge>>2]-+g[Je>>2];g[ma>>2]=+g[ia>>2]-+g[la>>2];g[Dc>>2]=+g[bf>>2]-+g[ef>>2];g[Nc>>2]=+g[X>>2]-+g[Y>>2];g[Z>>2]=+g[X>>2]+ +g[Y>>2];g[hf>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2];g[jf>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[kf>>2]=+g[hf>>2]+ +g[jf>>2];g[oa>>2]=+g[hf>>2]-+g[jf>>2];g[Ne>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Oe>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Pe>>2]=+g[Ne>>2]+ +g[Oe>>2];g[$>>2]=+g[Ne>>2]-+g[Oe>>2];g[lf>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[mf>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[nf>>2]=+g[lf>>2]+ +g[mf>>2];g[Me>>2]=+g[lf>>2]-+g[mf>>2];g[pa>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[qa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ra>>2]=+g[pa>>2]+ +g[qa>>2];g[aa>>2]=+g[qa>>2]-+g[pa>>2];g[of>>2]=+g[kf>>2]+ +g[nf>>2];g[mc>>2]=+g[Pe>>2]-+g[Me>>2];g[Xc>>2]=+g[oa>>2]-+g[ra>>2];g[Qe>>2]=+g[Me>>2]+ +g[Pe>>2];g[sa>>2]=+g[oa>>2]+ +g[ra>>2];g[Fc>>2]=+g[kf>>2]-+g[nf>>2];g[Pc>>2]=+g[$>>2]-+g[aa>>2];g[ba>>2]=+g[$>>2]+ +g[aa>>2];g[na>>2]=+g[ha>>2]-+g[ma>>2];g[Xb>>2]=+g[Cc>>2]-+g[Dc>>2];g[Yb>>2]=+g[Fc>>2]-+g[Gc>>2];g[ya>>2]=+g[sa>>2]-+g[xa>>2];g[qb>>2]=+g[ba>>2]-+g[Fa>>2];g[cd>>2]=+g[mc>>2]+ +g[nc>>2];g[bd>>2]=+g[jc>>2]+ +g[kc>>2];g[pb>>2]=+g[W>>2]-+g[Z>>2];g[Q>>2]=+g[Qe>>2]-+g[Ve>>2];g[Zc>>2]=+g[Xc>>2]-+g[Yc>>2];g[Wc>>2]=+g[tc>>2]-+g[uc>>2];g[P>>2]=+g[Fe>>2]-+g[Ke>>2];g[xb>>2]=+g[_e>>2]-+g[ff>>2];g[Oc>>2]=+g[Mc>>2]-+g[Nc>>2];g[yb>>2]=+g[of>>2]-+g[we>>2];g[Rc>>2]=+g[Pc>>2]-+g[Qc>>2];g[Ec>>2]=+g[Cc>>2]+ +g[Dc>>2];g[Hc>>2]=+g[Fc>>2]+ +g[Gc>>2];g[Ic>>2]=+g[Ec>>2]+ +g[Hc>>2];g[Kc>>2]=(+g[Ec>>2]-+g[Hc>>2])*.55901700258255;g[gf>>2]=+g[_e>>2]+ +g[ff>>2];g[xe>>2]=+g[of>>2]+ +g[we>>2];g[ye>>2]=+g[gf>>2]+ +g[xe>>2];g[nb>>2]=(+g[gf>>2]-+g[xe>>2])*.55901700258255;g[Fd>>2]=+g[tc>>2]+ +g[uc>>2];g[Gd>>2]=+g[Xc>>2]+ +g[Yc>>2];g[Hd>>2]=+g[Fd>>2]+ +g[Gd>>2];g[Jd>>2]=(+g[Fd>>2]-+g[Gd>>2])*.55901700258255;g[_>>2]=+g[W>>2]+ +g[Z>>2];g[Ga>>2]=+g[ba>>2]+ +g[Fa>>2];g[Ha>>2]=+g[_>>2]+ +g[Ga>>2];g[vb>>2]=(+g[_>>2]-+g[Ga>>2])*.55901700258255;g[Ca>>2]=+g[ha>>2]+ +g[ma>>2];g[F>>2]=+g[sa>>2]+ +g[xa>>2];g[G>>2]=(+g[Ca>>2]-+g[F>>2])*.55901700258255;g[M>>2]=+g[Ca>>2]+ +g[F>>2];g[lc>>2]=+g[jc>>2]-+g[kc>>2];g[oc>>2]=+g[mc>>2]-+g[nc>>2];g[pc>>2]=+g[lc>>2]+ +g[oc>>2];g[rc>>2]=(+g[lc>>2]-+g[oc>>2])*.55901700258255;g[$b>>2]=+g[Mc>>2]+ +g[Nc>>2];g[ac>>2]=+g[Pc>>2]+ +g[Qc>>2];g[bc>>2]=+g[$b>>2]+ +g[ac>>2];g[dc>>2]=(+g[$b>>2]-+g[ac>>2])*.55901700258255;g[Le>>2]=+g[Fe>>2]+ +g[Ke>>2];g[v>>2]=+g[Qe>>2]+ +g[Ve>>2];g[w>>2]=(+g[Le>>2]-+g[v>>2])*.55901700258255;g[C>>2]=+g[Le>>2]+ +g[v>>2];g[ze>>2]=+g[te>>2]+ +g[ye>>2];g[La>>2]=+g[Ha>>2]+ +g[Ka>>2];g[Wb>>2]=+g[B>>2]+ +g[C>>2];g[wc>>2]=+g[M>>2]+ +g[L>>2];g[Vb>>2]=+g[(c[q>>2]|0)+32>>2];g[vc>>2]=+g[(c[q>>2]|0)+36>>2];g[xc>>2]=+g[Vb>>2]*+g[Wb>>2]+ +g[vc>>2]*+g[wc>>2];g[zc>>2]=+g[Vb>>2]*+g[wc>>2]-+g[vc>>2]*+g[Wb>>2];g[zb>>2]=+g[xb>>2]*.5877852439880371-+g[yb>>2]*.9510565400123596;g[eb>>2]=+g[xb>>2]*.9510565400123596+ +g[yb>>2]*.5877852439880371;g[ub>>2]=+g[Ka>>2]-+g[Ha>>2]*.25;g[wb>>2]=+g[ub>>2]-+g[vb>>2];g[fb>>2]=+g[vb>>2]+ +g[ub>>2];g[Ab>>2]=+g[wb>>2]-+g[zb>>2];g[Tb>>2]=+g[fb>>2]-+g[eb>>2];g[Oa>>2]=+g[zb>>2]+ +g[wb>>2];g[gb>>2]=+g[eb>>2]+ +g[fb>>2];g[R>>2]=+g[P>>2]*.9510565400123596+ +g[Q>>2]*.5877852439880371;g[Va>>2]=+g[P>>2]*.5877852439880371-+g[Q>>2]*.9510565400123596;g[N>>2]=+g[L>>2]-+g[M>>2]*.25;g[O>>2]=+g[G>>2]+ +g[N>>2];g[Wa>>2]=+g[N>>2]-+g[G>>2];g[S>>2]=+g[O>>2]-+g[R>>2];g[lb>>2]=+g[Wa>>2]-+g[Va>>2];g[Fb>>2]=+g[R>>2]+ +g[O>>2];g[Xa>>2]=+g[Va>>2]+ +g[Wa>>2];g[za>>2]=+g[na>>2]*.9510565400123596+ +g[ya>>2]*.5877852439880371;g[Sa>>2]=+g[na>>2]*.5877852439880371-+g[ya>>2]*.9510565400123596;g[D>>2]=+g[B>>2]-+g[C>>2]*.25;g[E>>2]=+g[w>>2]+ +g[D>>2];g[Ra>>2]=+g[D>>2]-+g[w>>2];g[Aa>>2]=+g[E>>2]+ +g[za>>2];g[jb>>2]=+g[Ra>>2]+ +g[Sa>>2];g[Db>>2]=+g[E>>2]-+g[za>>2];g[Ta>>2]=+g[Ra>>2]-+g[Sa>>2];g[rb>>2]=+g[pb>>2]*.5877852439880371-+g[qb>>2]*.9510565400123596;g[bb>>2]=+g[pb>>2]*.9510565400123596+ +g[qb>>2]*.5877852439880371;g[mb>>2]=+g[te>>2]-+g[ye>>2]*.25;g[ob>>2]=+g[mb>>2]-+g[nb>>2];g[ab>>2]=+g[nb>>2]+ +g[mb>>2];g[sb>>2]=+g[ob>>2]+ +g[rb>>2];g[Rb>>2]=+g[ab>>2]+ +g[bb>>2];g[Kb>>2]=+g[ob>>2]-+g[rb>>2];g[cb>>2]=+g[ab>>2]-+g[bb>>2];g[Ae>>2]=+g[c[q>>2]>>2];g[Ba>>2]=+g[(c[q>>2]|0)+4>>2];g[T>>2]=+g[Ae>>2]*+g[Aa>>2]+ +g[Ba>>2]*+g[S>>2];g[Ma>>2]=+g[Ae>>2]*+g[S>>2]-+g[Ba>>2]*+g[Aa>>2];g[c[m>>2]>>2]=+g[ze>>2]-+g[T>>2];g[c[n>>2]>>2]=+g[La>>2]+ +g[Ma>>2];g[c[o>>2]>>2]=+g[ze>>2]+ +g[T>>2];g[c[p>>2]>>2]=+g[Ma>>2]-+g[La>>2];g[Qb>>2]=+g[(c[q>>2]|0)+24>>2];g[Sb>>2]=+g[(c[q>>2]|0)+28>>2];g[Ub>>2]=+g[Qb>>2]*+g[Rb>>2]-+g[Sb>>2]*+g[Tb>>2];g[yc>>2]=+g[Sb>>2]*+g[Rb>>2]+ +g[Qb>>2]*+g[Tb>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ub>>2]-+g[xc>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[yc>>2]+ +g[zc>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ub>>2]+ +g[xc>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[zc>>2]-+g[yc>>2];g[Na>>2]=+g[(c[q>>2]|0)+56>>2];g[tb>>2]=+g[(c[q>>2]|0)+60>>2];g[Bb>>2]=+g[Na>>2]*+g[sb>>2]-+g[tb>>2]*+g[Ab>>2];g[Hb>>2]=+g[tb>>2]*+g[sb>>2]+ +g[Na>>2]*+g[Ab>>2];g[Cb>>2]=+g[(c[q>>2]|0)+64>>2];g[Eb>>2]=+g[(c[q>>2]|0)+68>>2];g[Gb>>2]=+g[Cb>>2]*+g[Db>>2]+ +g[Eb>>2]*+g[Fb>>2];g[Ib>>2]=+g[Cb>>2]*+g[Fb>>2]-+g[Eb>>2]*+g[Db>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Bb>>2]-+g[Gb>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Hb>>2]+ +g[Ib>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Bb>>2]+ +g[Gb>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[Ib>>2]-+g[Hb>>2];g[Jb>>2]=+g[(c[q>>2]|0)+88>>2];g[Lb>>2]=+g[(c[q>>2]|0)+92>>2];g[Pa>>2]=+g[Jb>>2]*+g[Kb>>2]-+g[Lb>>2]*+g[Oa>>2];g[Za>>2]=+g[Lb>>2]*+g[Kb>>2]+ +g[Jb>>2]*+g[Oa>>2];g[Qa>>2]=+g[(c[q>>2]|0)+96>>2];g[Ua>>2]=+g[(c[q>>2]|0)+100>>2];g[Ya>>2]=+g[Qa>>2]*+g[Ta>>2]+ +g[Ua>>2]*+g[Xa>>2];g[_a>>2]=+g[Qa>>2]*+g[Xa>>2]-+g[Ua>>2]*+g[Ta>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Pa>>2]-+g[Ya>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Za>>2]+ +g[_a>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Pa>>2]+ +g[Ya>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[_a>>2]-+g[Za>>2];g[$a>>2]=+g[(c[q>>2]|0)+120>>2];g[db>>2]=+g[(c[q>>2]|0)+124>>2];g[hb>>2]=+g[$a>>2]*+g[cb>>2]-+g[db>>2]*+g[gb>>2];g[Ob>>2]=+g[db>>2]*+g[cb>>2]+ +g[$a>>2]*+g[gb>>2];g[ib>>2]=+g[(c[q>>2]|0)+128>>2];g[kb>>2]=+g[(c[q>>2]|0)+132>>2];g[Nb>>2]=+g[ib>>2]*+g[jb>>2]+ +g[kb>>2]*+g[lb>>2];g[Pb>>2]=+g[ib>>2]*+g[lb>>2]-+g[kb>>2]*+g[jb>>2];g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[hb>>2]-+g[Nb>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Ob>>2]+ +g[Pb>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[hb>>2]+ +g[Nb>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Pb>>2]-+g[Ob>>2];g[gd>>2]=+g[Bc>>2]+ +g[Ic>>2];g[id>>2]=+g[bc>>2]+ +g[_b>>2];g[fd>>2]=+g[(c[q>>2]|0)+72>>2];g[hd>>2]=+g[(c[q>>2]|0)+76>>2];g[jd>>2]=+g[fd>>2]*+g[gd>>2]-+g[hd>>2]*+g[id>>2];g[td>>2]=+g[hd>>2]*+g[gd>>2]+ +g[fd>>2]*+g[id>>2];g[me>>2]=+g[ic>>2]+ +g[pc>>2];g[oe>>2]=+g[Hd>>2]+ +g[Ed>>2];g[le>>2]=+g[(c[q>>2]|0)+112>>2];g[ne>>2]=+g[(c[q>>2]|0)+116>>2];g[pe>>2]=+g[le>>2]*+g[me>>2]+ +g[ne>>2]*+g[oe>>2];g[re>>2]=+g[le>>2]*+g[oe>>2]-+g[ne>>2]*+g[me>>2];g[Sc>>2]=+g[Oc>>2]*.5877852439880371-+g[Rc>>2]*.9510565400123596;g[Rd>>2]=+g[Oc>>2]*.9510565400123596+ +g[Rc>>2]*.5877852439880371;g[Jc>>2]=+g[Bc>>2]-+g[Ic>>2]*.25;g[Lc>>2]=+g[Jc>>2]-+g[Kc>>2];g[Qd>>2]=+g[Kc>>2]+ +g[Jc>>2];g[Tc>>2]=+g[Lc>>2]-+g[Sc>>2];g[he>>2]=+g[Qd>>2]+ +g[Rd>>2];g[Sd>>2]=+g[Qd>>2]-+g[Rd>>2];g[wd>>2]=+g[Lc>>2]+ +g[Sc>>2];g[dd>>2]=+g[bd>>2]*.5877852439880371-+g[cd>>2]*.9510565400123596;g[qd>>2]=+g[bd>>2]*.9510565400123596+ +g[cd>>2]*.5877852439880371;g[Id>>2]=+g[Ed>>2]-+g[Hd>>2]*.25;g[Kd>>2]=+g[Id>>2]-+g[Jd>>2];g[pd>>2]=+g[Jd>>2]+ +g[Id>>2];g[Ld>>2]=+g[dd>>2]+ +g[Kd>>2];g[Dd>>2]=+g[qd>>2]+ +g[pd>>2];g[$d>>2]=+g[Kd>>2]-+g[dd>>2];g[rd>>2]=+g[pd>>2]-+g[qd>>2];g[_c>>2]=+g[Wc>>2]*.5877852439880371-+g[Zc>>2]*.9510565400123596;g[md>>2]=+g[Wc>>2]*.9510565400123596+ +g[Zc>>2]*.5877852439880371;g[qc>>2]=+g[ic>>2]-+g[pc>>2]*.25;g[sc>>2]=+g[qc>>2]-+g[rc>>2];g[ld>>2]=+g[rc>>2]+ +g[qc>>2];g[$c>>2]=+g[sc>>2]-+g[_c>>2];g[Bd>>2]=+g[ld>>2]-+g[md>>2];g[Zd>>2]=+g[sc>>2]+ +g[_c>>2];g[nd>>2]=+g[ld>>2]+ +g[md>>2];g[Zb>>2]=+g[Xb>>2]*.5877852439880371-+g[Yb>>2]*.9510565400123596;g[Ud>>2]=+g[Xb>>2]*.9510565400123596+ +g[Yb>>2]*.5877852439880371;g[cc>>2]=+g[_b>>2]-+g[bc>>2]*.25;g[ec>>2]=+g[cc>>2]-+g[dc>>2];g[Vd>>2]=+g[dc>>2]+ +g[cc>>2];g[fc>>2]=+g[Zb>>2]+ +g[ec>>2];g[je>>2]=+g[Vd>>2]-+g[Ud>>2];g[Wd>>2]=+g[Ud>>2]+ +g[Vd>>2];g[yd>>2]=+g[ec>>2]-+g[Zb>>2];g[kd>>2]=+g[(c[q>>2]|0)+80>>2];g[od>>2]=+g[(c[q>>2]|0)+84>>2];g[sd>>2]=+g[kd>>2]*+g[nd>>2]+ +g[od>>2]*+g[rd>>2];g[ud>>2]=+g[kd>>2]*+g[rd>>2]-+g[od>>2]*+g[nd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[jd>>2]-+g[sd>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[td>>2]+ +g[ud>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[jd>>2]+ +g[sd>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[ud>>2]-+g[td>>2];g[ge>>2]=+g[(c[q>>2]|0)+104>>2];g[ie>>2]=+g[(c[q>>2]|0)+108>>2];g[ke>>2]=+g[ge>>2]*+g[he>>2]-+g[ie>>2]*+g[je>>2];g[qe>>2]=+g[ie>>2]*+g[he>>2]+ +g[ge>>2]*+g[je>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ke>>2]-+g[pe>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[qe>>2]+ +g[re>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ke>>2]+ +g[pe>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[re>>2]-+g[qe>>2];g[Ac>>2]=+g[(c[q>>2]|0)+8>>2];g[Uc>>2]=+g[(c[q>>2]|0)+12>>2];g[gc>>2]=+g[Ac>>2]*+g[Tc>>2]-+g[Uc>>2]*+g[fc>>2];g[Nd>>2]=+g[Uc>>2]*+g[Tc>>2]+ +g[Ac>>2]*+g[fc>>2];g[hc>>2]=+g[(c[q>>2]|0)+16>>2];g[ad>>2]=+g[(c[q>>2]|0)+20>>2];g[Md>>2]=+g[hc>>2]*+g[$c>>2]+ +g[ad>>2]*+g[Ld>>2];g[Od>>2]=+g[hc>>2]*+g[Ld>>2]-+g[ad>>2]*+g[$c>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[gc>>2]-+g[Md>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Nd>>2]+ +g[Od>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[gc>>2]+ +g[Md>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Od>>2]-+g[Nd>>2];g[Pd>>2]=+g[(c[q>>2]|0)+40>>2];g[Td>>2]=+g[(c[q>>2]|0)+44>>2];g[Xd>>2]=+g[Pd>>2]*+g[Sd>>2]-+g[Td>>2]*+g[Wd>>2];g[be>>2]=+g[Td>>2]*+g[Sd>>2]+ +g[Pd>>2]*+g[Wd>>2];g[Yd>>2]=+g[(c[q>>2]|0)+48>>2];g[_d>>2]=+g[(c[q>>2]|0)+52>>2];g[ae>>2]=+g[Yd>>2]*+g[Zd>>2]+ +g[_d>>2]*+g[$d>>2];g[ed>>2]=+g[Yd>>2]*+g[$d>>2]-+g[_d>>2]*+g[Zd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Xd>>2]-+g[ae>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[be>>2]+ +g[ed>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Xd>>2]+ +g[ae>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ed>>2]-+g[be>>2];g[vd>>2]=+g[(c[q>>2]|0)+136>>2];g[xd>>2]=+g[(c[q>>2]|0)+140>>2];g[zd>>2]=+g[vd>>2]*+g[wd>>2]-+g[xd>>2]*+g[yd>>2];g[ee>>2]=+g[xd>>2]*+g[wd>>2]+ +g[vd>>2]*+g[yd>>2];g[Ad>>2]=+g[(c[q>>2]|0)+144>>2];g[Cd>>2]=+g[(c[q>>2]|0)+148>>2];g[de>>2]=+g[Ad>>2]*+g[Bd>>2]+ +g[Cd>>2]*+g[Dd>>2];g[fe>>2]=+g[Ad>>2]*+g[Dd>>2]-+g[Cd>>2]*+g[Bd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[zd>>2]-+g[de>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[ee>>2]+ +g[fe>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[zd>>2]+ +g[de>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[fe>>2]-+g[ee>>2];c[vf>>2]=(c[vf>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+152;c[r>>2]=c[r>>2]^c[2998]}i=wf;return}function hv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,75,9592,1);i=b;return}function iv(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0;G=i;i=i+96|0;m=G+84|0;n=G+80|0;o=G+76|0;p=G+72|0;q=G+68|0;H=G+60|0;r=G+56|0;s=G+52|0;F=G+48|0;v=G+44|0;B=G+40|0;z=G+36|0;D=G+32|0;t=G+28|0;u=G+24|0;x=G+20|0;y=G+16|0;C=G+12|0;E=G+8|0;w=G+4|0;A=G;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[G+64>>2]=h;c[H>>2]=j;c[r>>2]=k;c[s>>2]=l;c[F>>2]=c[H>>2];c[q>>2]=(c[q>>2]|0)+((c[H>>2]|0)-1<<1<<2);while(1){if((c[F>>2]|0)>=(c[r>>2]|0))break;g[t>>2]=+g[c[n>>2]>>2];g[u>>2]=+g[c[p>>2]>>2];g[v>>2]=+g[t>>2]-+g[u>>2];g[B>>2]=+g[t>>2]+ +g[u>>2];g[x>>2]=+g[c[m>>2]>>2];g[y>>2]=+g[c[o>>2]>>2];g[z>>2]=+g[x>>2]-+g[y>>2];g[D>>2]=+g[x>>2]+ +g[y>>2];g[w>>2]=+g[c[q>>2]>>2];g[A>>2]=+g[(c[q>>2]|0)+4>>2];g[C>>2]=+g[w>>2]*+g[z>>2]-+g[A>>2]*+g[B>>2];g[E>>2]=+g[A>>2]*+g[z>>2]+ +g[w>>2]*+g[B>>2];g[c[n>>2]>>2]=+g[v>>2]+ +g[C>>2];g[c[m>>2]>>2]=+g[D>>2]-+g[E>>2];g[c[p>>2]>>2]=+g[C>>2]-+g[v>>2];g[c[o>>2]>>2]=+g[D>>2]+ +g[E>>2];c[F>>2]=(c[F>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[s>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[s>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[s>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+8}i=G;return}function jv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,76,9640,1);i=b;return}function kv(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0,Lc=0,Mc=0,Nc=0,Oc=0,Pc=0,Qc=0,Rc=0,Sc=0,Tc=0,Uc=0,Vc=0,Wc=0,Xc=0,Yc=0,Zc=0,_c=0,$c=0,ad=0,bd=0,cd=0,dd=0,ed=0,fd=0,gd=0,hd=0,id=0,jd=0,kd=0,ld=0,md=0,nd=0,od=0,pd=0,qd=0,rd=0,sd=0,td=0,ud=0,vd=0,wd=0,xd=0,yd=0,zd=0,Ad=0,Bd=0,Cd=0,Dd=0,Ed=0,Fd=0,Gd=0,Hd=0,Id=0,Jd=0,Kd=0,Ld=0,Md=0,Nd=0,Od=0,Pd=0,Qd=0,Rd=0,Sd=0,Td=0,Ud=0,Vd=0,Wd=0,Xd=0,Yd=0,Zd=0,_d=0,$d=0,ae=0,be=0,ce=0,de=0,ee=0,fe=0,ge=0,he=0,ie=0,je=0,ke=0,le=0,me=0,ne=0,oe=0,pe=0,qe=0,re=0,se=0,te=0,ue=0,ve=0,we=0,xe=0,ye=0,ze=0,Ae=0,Be=0,Ce=0,De=0,Ee=0,Fe=0,Ge=0,He=0,Ie=0,Je=0,Ke=0,Le=0,Me=0,Ne=0,Oe=0,Pe=0,Qe=0,Re=0,Se=0,Te=0,Ue=0,Ve=0,We=0,Xe=0,Ye=0,Ze=0,_e=0,$e=0,af=0,bf=0,cf=0,df=0,ef=0,ff=0,gf=0,hf=0,jf=0,kf=0,lf=0,mf=0,nf=0,of=0,pf=0,qf=0,rf=0,sf=0,tf=0,uf=0,vf=0,wf=0,xf=0,yf=0,zf=0,Af=0,Bf=0,Cf=0,Df=0,Ef=0,Ff=0,Gf=0,Hf=0,If=0,Jf=0,Kf=0,Lf=0,Mf=0,Nf=0,Of=0,Pf=0,Qf=0,Rf=0,Sf=0,Tf=0,Uf=0,Vf=0,Wf=0,Xf=0,Yf=0,Zf=0,_f=0,$f=0,ag=0,bg=0,cg=0,dg=0,eg=0,fg=0,gg=0,hg=0,ig=0,jg=0,kg=0,lg=0,mg=0,ng=0,og=0,pg=0,qg=0,rg=0,sg=0,tg=0,ug=0,vg=0,wg=0,xg=0,yg=0,zg=0,Ag=0,Bg=0,Cg=0,Dg=0,Eg=0,Fg=0,Gg=0,Hg=0,Ig=0,Jg=0,Kg=0,Lg=0,Mg=0,Ng=0,Og=0,Pg=0,Qg=0,Rg=0,Sg=0,Tg=0,Ug=0,Vg=0,Wg=0,Xg=0,Yg=0,Zg=0,_g=0,$g=0,ah=0,bh=0,ch=0,dh=0,eh=0,fh=0,gh=0,hh=0,ih=0,jh=0,kh=0,lh=0,mh=0,nh=0,oh=0,ph=0,qh=0,rh=0,sh=0,th=0,uh=0,vh=0,wh=0,xh=0,yh=0,zh=0,Ah=0,Bh=0,Ch=0,Dh=0,Eh=0,Fh=0,Gh=0,Hh=0,Ih=0,Jh=0,Kh=0,Lh=0,Mh=0,Nh=0,Oh=0,Ph=0,Qh=0,Rh=0,Sh=0,Th=0,Uh=0,Vh=0,Wh=0,Xh=0,Yh=0,Zh=0,_h=0,$h=0,ai=0,bi=0,ci=0,di=0,ei=0,fi=0,gi=0,hi=0,ii=0,ji=0,ki=0,li=0,mi=0,ni=0,oi=0,pi=0,qi=0,ri=0,si=0,ti=0,ui=0,vi=0,wi=0,xi=0,yi=0,zi=0,Ai=0,Bi=0,Ci=0,Di=0,Ei=0,Fi=0,Gi=0,Hi=0,Ii=0,Ji=0,Ki=0,Li=0,Mi=0,Ni=0,Oi=0,Pi=0,Qi=0,Ri=0,Si=0,Ti=0,Ui=0,Vi=0,Wi=0,Xi=0,Yi=0,Zi=0,_i=0,$i=0,aj=0,bj=0,cj=0,dj=0,ej=0,fj=0,gj=0,hj=0,ij=0,jj=0,kj=0,lj=0,mj=0,nj=0,oj=0,pj=0,qj=0,rj=0,sj=0,tj=0,uj=0,vj=0,wj=0,xj=0,yj=0,zj=0,Aj=0,Bj=0,Cj=0,Dj=0,Ej=0,Fj=0,Gj=0,Hj=0,Ij=0,Jj=0,Kj=0,Lj=0,Mj=0,Nj=0,Oj=0,Pj=0,Qj=0,Rj=0;Qj=i;i=i+2320|0;m=Qj+2304|0;n=Qj+2300|0;o=Qj+2296|0;p=Qj+2292|0;q=Qj+2288|0;r=Qj+2284|0;Rj=Qj+2280|0;s=Qj+2276|0;t=Qj+2272|0;Pj=Qj+2240|0;uj=Qj+2236|0;Ed=Qj+2232|0;cg=Qj+2228|0;dh=Qj+2224|0;Mf=Qj+2220|0;ri=Qj+2216|0;ma=Qj+2212|0;Zd=Qj+2208|0;db=Qj+2204|0;pd=Qj+2200|0;pe=Qj+2196|0;Kg=Qj+2192|0;Xf=Qj+2188|0;_g=Qj+2184|0;Ic=Qj+2180|0;Ud=Qj+2176|0;oj=Qj+2172|0;Vb=Qj+2168|0;Bb=Qj+2164|0;fd=Qj+2160|0;Sg=Qj+2156|0;Xg=Qj+2152|0;Kb=Qj+2148|0;gd=Qj+2144|0;Le=Qj+2140|0;qf=Qj+2136|0;zf=Qj+2132|0;Lh=Qj+2128|0;Md=Qj+2124|0;Rd=Qj+2120|0;ug=Qj+2116|0;Kh=Qj+2112|0;$i=Qj+2108|0;Ob=Qj+2104|0;ba=Qj+2100|0;ae=Qj+2096|0;Pg=Qj+2092|0;Wg=Qj+2088|0;La=Qj+2084|0;be=Qj+2080|0;Ee=Qj+2076|0;pf=Qj+2072|0;qg=Qj+2068|0;Ih=Qj+2064|0;Jd=Qj+2060|0;Qd=Qj+2056|0;ng=Qj+2052|0;Hh=Qj+2048|0;Jj=Qj+2044|0;Td=Qj+2040|0;jg=Qj+2036|0;si=Qj+2032|0;Jf=Qj+2028|0;Fh=Qj+2024|0;L=Qj+2020|0;od=Qj+2016|0;Wa=Qj+2012|0;_d=Qj+2008|0;we=Qj+2004|0;Zg=Qj+2e3|0;uf=Qj+1996|0;Lg=Qj+1992|0;Bc=Qj+1988|0;Fd=Qj+1984|0;Mb=Qj+1980|0;_a=Qj+1976|0;B=Qj+1972|0;Fc=Qj+1968|0;mf=Qj+1964|0;y=Qj+1960|0;bb=Qj+1956|0;Gc=Qj+1952|0;sj=Qj+1948|0;Dc=Qj+1944|0;ka=Qj+1940|0;Ya=Qj+1936|0;pj=Qj+1932|0;Cc=Qj+1928|0;fa=Qj+1924|0;Xa=Qj+1920|0;u=Qj+1916|0;Da=Qj+1912|0;$a=Qj+1908|0;ab=Qj+1904|0;z=Qj+1900|0;A=Qj+1896|0;Vc=Qj+1892|0;ce=Qj+1888|0;qj=Qj+1884|0;rj=Qj+1880|0;ga=Qj+1876|0;ha=Qj+1872|0;ia=Qj+1868|0;ja=Qj+1864|0;Eh=Qj+1860|0;Ni=Qj+1856|0;D=Qj+1852|0;E=Qj+1848|0;da=Qj+1844|0;ea=Qj+1840|0;vg=Qj+1836|0;tj=Qj+1832|0;ag=Qj+1828|0;bg=Qj+1824|0;Kf=Qj+1820|0;Lf=Qj+1816|0;C=Qj+1812|0;la=Qj+1808|0;Za=Qj+1804|0;cb=Qj+1800|0;ne=Qj+1796|0;oe=Qj+1792|0;vf=Qj+1788|0;wf=Qj+1784|0;Ec=Qj+1780|0;Hc=Qj+1776|0;gj=Qj+1772|0;Fe=Qj+1768|0;Na=Qj+1764|0;Fb=Qj+1760|0;ob=Qj+1756|0;Je=Qj+1752|0;Ib=Qj+1748|0;Ub=Qj+1744|0;nj=Qj+1740|0;Ge=Qj+1736|0;Ie=Qj+1732|0;ub=Qj+1728|0;zb=Qj+1724|0;Db=Qj+1720|0;Rb=Qj+1716|0;Cb=Qj+1712|0;xf=Qj+1708|0;yf=Qj+1704|0;aj=Qj+1700|0;bj=Qj+1696|0;cj=Qj+1692|0;dj=Qj+1688|0;ej=Qj+1684|0;fj=Qj+1680|0;mb=Qj+1676|0;nb=Qj+1672|0;Sb=Qj+1668|0;Gb=Qj+1664|0;Hb=Qj+1660|0;Tb=Qj+1656|0;jj=Qj+1652|0;qb=Qj+1648|0;tb=Qj+1644|0;Pb=Qj+1640|0;mj=Qj+1636|0;vb=Qj+1632|0;yb=Qj+1628|0;Qb=Qj+1624|0;hj=Qj+1620|0;ij=Qj+1616|0;rb=Qj+1612|0;sb=Qj+1608|0;kj=Qj+1604|0;lj=Qj+1600|0;wb=Qj+1596|0;xb=Qj+1592|0;pb=Qj+1588|0;Ab=Qj+1584|0;Qg=Qj+1580|0;Rg=Qj+1576|0;Eb=Qj+1572|0;Jb=Qj+1568|0;He=Qj+1564|0;Ke=Qj+1560|0;Kd=Qj+1556|0;Ld=Qj+1552|0;sg=Qj+1548|0;tg=Qj+1544|0;Ti=Qj+1540|0;ye=Qj+1536|0;N=Qj+1532|0;Ga=Qj+1528|0;Q=Qj+1524|0;Ce=Qj+1520|0;Ja=Qj+1516|0;Nb=Qj+1512|0;_i=Qj+1508|0;ze=Qj+1504|0;Be=Qj+1500|0;W=Qj+1496|0;$=Qj+1492|0;Ea=Qj+1488|0;jb=Qj+1484|0;ca=Qj+1480|0;og=Qj+1476|0;pg=Qj+1472|0;Lj=Qj+1468|0;Mj=Qj+1464|0;Nj=Qj+1460|0;Oj=Qj+1456|0;Ri=Qj+1452|0;Si=Qj+1448|0;O=Qj+1444|0;P=Qj+1440|0;kb=Qj+1436|0;Ha=Qj+1432|0;Ia=Qj+1428|0;lb=Qj+1424|0;Wi=Qj+1420|0;S=Qj+1416|0;V=Qj+1412|0;hb=Qj+1408|0;Zi=Qj+1404|0;X=Qj+1400|0;_=Qj+1396|0;ib=Qj+1392|0;Ui=Qj+1388|0;Vi=Qj+1384|0;T=Qj+1380|0;U=Qj+1376|0;Xi=Qj+1372|0;Yi=Qj+1368|0;Y=Qj+1364|0;Z=Qj+1360|0;R=Qj+1356|0;aa=Qj+1352|0;Ng=Qj+1348|0;Og=Qj+1344|0;Fa=Qj+1340|0;Ka=Qj+1336|0;Ae=Qj+1332|0;De=Qj+1328|0;Hd=Qj+1324|0;Id=Qj+1320|0;lg=Qj+1316|0;mg=Qj+1312|0;xj=Qj+1308|0;vc=Qj+1304|0;Aj=Qj+1300|0;wc=Qj+1296|0;ra=Qj+1292|0;wa=Qj+1288|0;eg=Qj+1284|0;dg=Qj+1280|0;re=Qj+1276|0;qe=Qj+1272|0;Ej=Qj+1268|0;yc=Qj+1264|0;Hj=Qj+1260|0;zc=Qj+1256|0;Ca=Qj+1252|0;J=Qj+1248|0;hg=Qj+1244|0;gg=Qj+1240|0;ue=Qj+1236|0;te=Qj+1232|0;sa=Qj+1228|0;qa=Qj+1224|0;na=Qj+1220|0;va=Qj+1216|0;vj=Qj+1212|0;wj=Qj+1208|0;oa=Qj+1204|0;pa=Qj+1200|0;yj=Qj+1196|0;zj=Qj+1192|0;ta=Qj+1188|0;ua=Qj+1184|0;F=Qj+1180|0;Ba=Qj+1176|0;ya=Qj+1172|0;I=Qj+1168|0;Cj=Qj+1164|0;Dj=Qj+1160|0;za=Qj+1156|0;Aa=Qj+1152|0;Fj=Qj+1148|0;Gj=Qj+1144|0;G=Qj+1140|0;H=Qj+1136|0;Bj=Qj+1132|0;Ij=Qj+1128|0;fg=Qj+1124|0;ig=Qj+1120|0;Hf=Qj+1116|0;If=Qj+1112|0;xa=Qj+1108|0;K=Qj+1104|0;Ua=Qj+1100|0;Va=Qj+1096|0;se=Qj+1092|0;ve=Qj+1088|0;sf=Qj+1084|0;tf=Qj+1080|0;xc=Qj+1076|0;Ac=Qj+1072|0;w=Qj+1068|0;Nc=Qj+1064|0;Kc=Qj+1060|0;Pc=Qj+1056|0;fc=Qj+1052|0;Wc=Qj+1048|0;bc=Qj+1044|0;tc=Qj+1040|0;fb=Qj+1036|0;Zc=Qj+1032|0;Uc=Qj+1028|0;kc=Qj+1024|0;Pa=Qj+1020|0;$c=Qj+1016|0;Sc=Qj+1012|0;oc=Qj+1008|0;Kj=Qj+1004|0;v=Qj+1e3|0;$b=Qj+996|0;ac=Qj+992|0;Wb=Qj+988|0;Jc=Qj+984|0;dc=Qj+980|0;ec=Qj+976|0;eb=Qj+972|0;ic=Qj+968|0;Ta=Qj+964|0;jc=Qj+960|0;Ra=Qj+956|0;Sa=Qj+952|0;M=Qj+948|0;nc=Qj+944|0;Oa=Qj+940|0;mc=Qj+936|0;Ma=Qj+932|0;Lb=Qj+928|0;gb=Qj+924|0;Lc=Qj+920|0;x=Qj+916|0;Qa=Qj+912|0;Xc=Qj+908|0;bd=Qj+904|0;ad=Qj+900|0;cd=Qj+896|0;sc=Qj+892|0;uc=Qj+888|0;Yc=Qj+884|0;_c=Qj+880|0;Qc=Qj+876|0;Yb=Qj+872|0;Xb=Qj+868|0;Zb=Qj+864|0;Mc=Qj+860|0;Oc=Qj+856|0;Rc=Qj+852|0;Tc=Qj+848|0;gc=Qj+844|0;qc=Qj+840|0;pc=Qj+836|0;rc=Qj+832|0;_b=Qj+828|0;cc=Qj+824|0;hc=Qj+820|0;lc=Qj+816|0;Ug=Qj+812|0;zi=Qj+808|0;ah=Qj+804|0;Bi=Qj+800|0;Th=Qj+796|0;hi=Qj+792|0;Ph=Qj+788|0;fi=Qj+784|0;ui=Qj+780|0;ki=Qj+776|0;Gi=Qj+772|0;Yh=Qj+768|0;Oh=Qj+764|0;mi=Qj+760|0;Ei=Qj+756|0;ai=Qj+752|0;Mg=Qj+748|0;Tg=Qj+744|0;Li=Qj+740|0;Mi=Qj+736|0;Yg=Qj+732|0;$g=Qj+728|0;Rh=Qj+724|0;Sh=Qj+720|0;ti=Qj+716|0;Wh=Qj+712|0;qi=Qj+708|0;Xh=Qj+704|0;oi=Qj+700|0;pi=Qj+696|0;Gh=Qj+692|0;_h=Qj+688|0;Nh=Qj+684|0;$h=Qj+680|0;Jh=Qj+676|0;Mh=Qj+672|0;bh=Qj+668|0;wi=Qj+664|0;vi=Qj+660|0;xi=Qj+656|0;Jg=Qj+652|0;Vg=Qj+648|0;ch=Qj+644|0;ni=Qj+640|0;ii=Qj+636|0;Pi=Qj+632|0;Oi=Qj+628|0;Qi=Qj+624|0;ei=Qj+620|0;gi=Qj+616|0;ji=Qj+612|0;li=Qj+608|0;Ci=Qj+604|0;Ii=Qj+600|0;Hi=Qj+596|0;Ji=Qj+592|0;yi=Qj+588|0;Ai=Qj+584|0;Di=Qj+580|0;Fi=Qj+576|0;Uh=Qj+572|0;ci=Qj+568|0;bi=Qj+564|0;di=Qj+560|0;Ki=Qj+556|0;Qh=Qj+552|0;Vh=Qj+548|0;Zh=Qj+544|0;Od=Qj+540|0;wd=Qj+536|0;Wd=Qj+532|0;yd=Qj+528|0;Ne=Qj+524|0;bf=Qj+520|0;je=Qj+516|0;$e=Qj+512|0;rd=Qj+508|0;ef=Qj+504|0;Dd=Qj+500|0;Se=Qj+496|0;jd=Qj+492|0;gf=Qj+488|0;Bd=Qj+484|0;We=Qj+480|0;Gd=Qj+476|0;Nd=Qj+472|0;he=Qj+468|0;ie=Qj+464|0;Sd=Qj+460|0;Vd=Qj+456|0;le=Qj+452|0;me=Qj+448|0;qd=Qj+444|0;Qe=Qj+440|0;nd=Qj+436|0;Re=Qj+432|0;ld=Qj+428|0;md=Qj+424|0;$d=Qj+420|0;Ve=Qj+416|0;id=Qj+412|0;Ue=Qj+408|0;ed=Qj+404|0;hd=Qj+400|0;Xd=Qj+396|0;td=Qj+392|0;sd=Qj+388|0;ud=Qj+384|0;dd=Qj+380|0;Pd=Qj+376|0;Yd=Qj+372|0;kd=Qj+368|0;cf=Qj+364|0;jf=Qj+360|0;hf=Qj+356|0;kf=Qj+352|0;_e=Qj+348|0;af=Qj+344|0;df=Qj+340|0;ff=Qj+336|0;zd=Qj+332|0;ee=Qj+328|0;de=Qj+324|0;fe=Qj+320|0;vd=Qj+316|0;xd=Qj+312|0;Ad=Qj+308|0;Cd=Qj+304|0;Oe=Qj+300|0;Ye=Qj+296|0;Xe=Qj+292|0;Ze=Qj+288|0;ge=Qj+284|0;ke=Qj+280|0;Pe=Qj+276|0;Te=Qj+272|0;nf=Qj+268|0;Tf=Qj+264|0;Zf=Qj+260|0;Vf=Qj+256|0;ih=Qj+252|0;yh=Qj+248|0;eh=Qj+244|0;wh=Qj+240|0;Of=Qj+236|0;Bh=Qj+232|0;zg=Qj+228|0;nh=Qj+224|0;Cf=Qj+220|0;Dh=Qj+216|0;xg=Qj+212|0;rh=Qj+208|0;xe=Qj+204|0;Me=Qj+200|0;Eg=Qj+196|0;Fg=Qj+192|0;rf=Qj+188|0;Yf=Qj+184|0;gh=Qj+180|0;hh=Qj+176|0;Nf=Qj+172|0;lh=Qj+168|0;Gf=Qj+164|0;mh=Qj+160|0;Ef=Qj+156|0;Ff=Qj+152|0;kg=Qj+148|0;qh=Qj+144|0;Bf=Qj+140|0;ph=Qj+136|0;rg=Qj+132|0;Af=Qj+128|0;_f=Qj+124|0;Qf=Qj+120|0;Pf=Qj+116|0;Rf=Qj+112|0;lf=Qj+108|0;of=Qj+104|0;$f=Qj+100|0;Df=Qj+96|0;zh=Qj+92|0;Hg=Qj+88|0;Gg=Qj+84|0;Ig=Qj+80|0;vh=Qj+76|0;xh=Qj+72|0;Ah=Qj+68|0;Ch=Qj+64|0;Wf=Qj+60|0;Bg=Qj+56|0;Ag=Qj+52|0;Cg=Qj+48|0;Sf=Qj+44|0;Uf=Qj+40|0;wg=Qj+36|0;yg=Qj+32|0;jh=Qj+28|0;th=Qj+24|0;sh=Qj+20|0;uh=Qj+16|0;Dg=Qj+12|0;fh=Qj+8|0;kh=Qj+4|0;oh=Qj;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Rj>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Qj+2268>>2]=.8314695954322815;g[Qj+2264>>2]=.5555702447891235;g[Qj+2260>>2]=.19509032368659973;g[Qj+2256>>2]=.9807852506637573;g[Qj+2252>>2]=.9238795042037964;g[Qj+2248>>2]=.3826834261417389;g[Qj+2244>>2]=.7071067690849304;c[Pj>>2]=c[Rj>>2];c[q>>2]=(c[q>>2]|0)+(((c[Rj>>2]|0)-1|0)*62<<2);while(1){if((c[Pj>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[Da>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[Mb>>2]=+g[u>>2]+ +g[Da>>2];g[_a>>2]=+g[u>>2]-+g[Da>>2];g[z>>2]=+g[c[n>>2]>>2];g[A>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[Fc>>2]=+g[z>>2]-+g[A>>2];g[Vc>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2];g[ce>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[mf>>2]=+g[Vc>>2]+ +g[ce>>2];g[y>>2]=+g[Vc>>2]-+g[ce>>2];g[$a>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2];g[ab>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[bb>>2]=+g[$a>>2]+ +g[ab>>2];g[Gc>>2]=+g[$a>>2]-+g[ab>>2];g[qj>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[rj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[ga>>2]=+g[qj>>2]-+g[rj>>2];g[ha>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ia>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[ja>>2]=+g[ha>>2]+ +g[ia>>2];g[sj>>2]=+g[qj>>2]+ +g[rj>>2];g[Dc>>2]=+g[ia>>2]-+g[ha>>2];g[ka>>2]=+g[ga>>2]+ +g[ja>>2];g[Ya>>2]=+g[ga>>2]-+g[ja>>2];g[Eh>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2];g[Ni>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[D>>2]=+g[Eh>>2]-+g[Ni>>2];g[E>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2];g[da>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[ea>>2]=+g[E>>2]+ +g[da>>2];g[pj>>2]=+g[Eh>>2]+ +g[Ni>>2];g[Cc>>2]=+g[E>>2]-+g[da>>2];g[fa>>2]=+g[D>>2]+ +g[ea>>2];g[Xa>>2]=+g[D>>2]-+g[ea>>2];g[vg>>2]=+g[Mb>>2]+ +g[mf>>2];g[tj>>2]=+g[pj>>2]+ +g[sj>>2];g[uj>>2]=+g[vg>>2]+ +g[tj>>2];g[Ed>>2]=+g[vg>>2]-+g[tj>>2];g[ag>>2]=+g[B>>2]-+g[y>>2];g[bg>>2]=(+g[Xa>>2]-+g[Ya>>2])*.7071067690849304;g[cg>>2]=+g[ag>>2]+ +g[bg>>2];g[dh>>2]=+g[ag>>2]-+g[bg>>2];g[Kf>>2]=+g[_a>>2]+ +g[bb>>2];g[Lf>>2]=(+g[fa>>2]+ +g[ka>>2])*.7071067690849304;g[Mf>>2]=+g[Kf>>2]-+g[Lf>>2];g[ri>>2]=+g[Lf>>2]+ +g[Kf>>2];g[C>>2]=+g[y>>2]+ +g[B>>2];g[la>>2]=(+g[fa>>2]-+g[ka>>2])*.7071067690849304;g[ma>>2]=+g[C>>2]+ +g[la>>2];g[Zd>>2]=+g[C>>2]-+g[la>>2];g[Za>>2]=(+g[Xa>>2]+ +g[Ya>>2])*.7071067690849304;g[cb>>2]=+g[_a>>2]-+g[bb>>2];g[db>>2]=+g[Za>>2]+ +g[cb>>2];g[pd>>2]=+g[cb>>2]-+g[Za>>2];g[ne>>2]=+g[Mb>>2]-+g[mf>>2];g[oe>>2]=+g[Dc>>2]-+g[Cc>>2];g[pe>>2]=+g[ne>>2]+ +g[oe>>2];g[Kg>>2]=+g[ne>>2]-+g[oe>>2];g[vf>>2]=+g[pj>>2]-+g[sj>>2];g[wf>>2]=+g[Fc>>2]-+g[Gc>>2];g[Xf>>2]=+g[vf>>2]+ +g[wf>>2];g[_g>>2]=+g[wf>>2]-+g[vf>>2];g[Ec>>2]=+g[Cc>>2]+ +g[Dc>>2];g[Hc>>2]=+g[Fc>>2]+ +g[Gc>>2];g[Ic>>2]=+g[Ec>>2]+ +g[Hc>>2];g[Ud>>2]=+g[Hc>>2]-+g[Ec>>2];g[aj>>2]=+g[c[o>>2]>>2];g[bj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[cj>>2]=+g[aj>>2]+ +g[bj>>2];g[dj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[ej>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2];g[fj>>2]=+g[dj>>2]+ +g[ej>>2];g[gj>>2]=+g[cj>>2]+ +g[fj>>2];g[Fe>>2]=+g[cj>>2]-+g[fj>>2];g[Na>>2]=+g[dj>>2]-+g[ej>>2];g[Fb>>2]=+g[aj>>2]-+g[bj>>2];g[mb>>2]=+g[c[p>>2]>>2];g[nb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2];g[Sb>>2]=+g[nb>>2]-+g[mb>>2];g[Gb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2];g[Hb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2];g[Tb>>2]=+g[Gb>>2]-+g[Hb>>2];g[ob>>2]=+g[mb>>2]+ +g[nb>>2];g[Je>>2]=+g[Sb>>2]-+g[Tb>>2];g[Ib>>2]=+g[Gb>>2]+ +g[Hb>>2];g[Ub>>2]=+g[Sb>>2]+ +g[Tb>>2];g[hj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[ij>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[jj>>2]=+g[hj>>2]+ +g[ij>>2];g[qb>>2]=+g[hj>>2]-+g[ij>>2];g[rb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[sb>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2];g[tb>>2]=+g[rb>>2]+ +g[sb>>2];g[Pb>>2]=+g[rb>>2]-+g[sb>>2];g[kj>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2];g[lj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[mj>>2]=+g[kj>>2]+ +g[lj>>2];g[vb>>2]=+g[kj>>2]-+g[lj>>2];g[wb>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2];g[xb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2];g[yb>>2]=+g[wb>>2]+ +g[xb>>2];g[Qb>>2]=+g[xb>>2]-+g[wb>>2];g[nj>>2]=+g[jj>>2]+ +g[mj>>2];g[Ge>>2]=+g[Qb>>2]-+g[Pb>>2];g[Ie>>2]=+g[jj>>2]-+g[mj>>2];g[ub>>2]=+g[qb>>2]+ +g[tb>>2];g[zb>>2]=+g[vb>>2]+ +g[yb>>2];g[Db>>2]=+g[vb>>2]-+g[yb>>2];g[Rb>>2]=+g[Pb>>2]+ +g[Qb>>2];g[Cb>>2]=+g[qb>>2]-+g[tb>>2];g[oj>>2]=+g[gj>>2]+ +g[nj>>2];g[Vb>>2]=+g[Rb>>2]+ +g[Ub>>2];g[pb>>2]=+g[Na>>2]-+g[ob>>2];g[Ab>>2]=(+g[ub>>2]-+g[zb>>2])*.7071067690849304;g[Bb>>2]=+g[pb>>2]+ +g[Ab>>2];g[fd>>2]=+g[pb>>2]-+g[Ab>>2];g[Qg>>2]=+g[Fe>>2]-+g[Ge>>2];g[Rg>>2]=+g[Je>>2]-+g[Ie>>2];g[Sg>>2]=+g[Qg>>2]*.3826834261417389+ +g[Rg>>2]*.9238795042037964;g[Xg>>2]=+g[Rg>>2]*.3826834261417389-+g[Qg>>2]*.9238795042037964;g[Eb>>2]=(+g[Cb>>2]+ +g[Db>>2])*.7071067690849304;g[Jb>>2]=+g[Fb>>2]-+g[Ib>>2];g[Kb>>2]=+g[Eb>>2]+ +g[Jb>>2];g[gd>>2]=+g[Jb>>2]-+g[Eb>>2];g[He>>2]=+g[Fe>>2]+ +g[Ge>>2];g[Ke>>2]=+g[Ie>>2]+ +g[Je>>2];g[Le>>2]=+g[He>>2]*.9238795042037964+ +g[Ke>>2]*.3826834261417389;g[qf>>2]=+g[Ke>>2]*.9238795042037964-+g[He>>2]*.3826834261417389;g[xf>>2]=+g[Fb>>2]+ +g[Ib>>2];g[yf>>2]=(+g[ub>>2]+ +g[zb>>2])*.7071067690849304;g[zf>>2]=+g[xf>>2]-+g[yf>>2];g[Lh>>2]=+g[yf>>2]+ +g[xf>>2];g[Kd>>2]=+g[gj>>2]-+g[nj>>2];g[Ld>>2]=+g[Ub>>2]-+g[Rb>>2];g[Md>>2]=+g[Kd>>2]+ +g[Ld>>2];g[Rd>>2]=+g[Ld>>2]-+g[Kd>>2];g[sg>>2]=(+g[Cb>>2]-+g[Db>>2])*.7071067690849304;g[tg>>2]=+g[Na>>2]+ +g[ob>>2];g[ug>>2]=+g[sg>>2]-+g[tg>>2];g[Kh>>2]=+g[tg>>2]+ +g[sg>>2];g[Lj>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[Mj>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Nj>>2]=+g[Lj>>2]+ +g[Mj>>2];g[Oj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Ri>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Si>>2]=+g[Oj>>2]+ +g[Ri>>2];g[Ti>>2]=+g[Nj>>2]+ +g[Si>>2];g[ye>>2]=+g[Nj>>2]-+g[Si>>2];g[N>>2]=+g[Oj>>2]-+g[Ri>>2];g[Ga>>2]=+g[Lj>>2]-+g[Mj>>2];g[O>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[P>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[kb>>2]=+g[O>>2]-+g[P>>2];g[Ha>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Ia>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[lb>>2]=+g[Ha>>2]-+g[Ia>>2];g[Q>>2]=+g[O>>2]+ +g[P>>2];g[Ce>>2]=+g[kb>>2]-+g[lb>>2];g[Ja>>2]=+g[Ha>>2]+ +g[Ia>>2];g[Nb>>2]=+g[kb>>2]+ +g[lb>>2];g[Ui>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Vi>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[Wi>>2]=+g[Ui>>2]+ +g[Vi>>2];g[S>>2]=+g[Ui>>2]-+g[Vi>>2];g[T>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[U>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[V>>2]=+g[T>>2]+ +g[U>>2];g[hb>>2]=+g[T>>2]-+g[U>>2];g[Xi>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Yi>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[Zi>>2]=+g[Xi>>2]+ +g[Yi>>2];g[X>>2]=+g[Xi>>2]-+g[Yi>>2];g[Y>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Z>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[_>>2]=+g[Y>>2]+ +g[Z>>2];g[ib>>2]=+g[Z>>2]-+g[Y>>2];g[_i>>2]=+g[Wi>>2]+ +g[Zi>>2];g[ze>>2]=+g[ib>>2]-+g[hb>>2];g[Be>>2]=+g[Wi>>2]-+g[Zi>>2];g[W>>2]=+g[S>>2]+ +g[V>>2];g[$>>2]=+g[X>>2]+ +g[_>>2];g[Ea>>2]=+g[X>>2]-+g[_>>2];g[jb>>2]=+g[hb>>2]+ +g[ib>>2];g[ca>>2]=+g[S>>2]-+g[V>>2];g[$i>>2]=+g[Ti>>2]+ +g[_i>>2];g[Ob>>2]=+g[jb>>2]+ +g[Nb>>2];g[R>>2]=+g[N>>2]+ +g[Q>>2];g[aa>>2]=(+g[W>>2]-+g[$>>2])*.7071067690849304;g[ba>>2]=+g[R>>2]+ +g[aa>>2];g[ae>>2]=+g[R>>2]-+g[aa>>2];g[Ng>>2]=+g[ye>>2]-+g[ze>>2];g[Og>>2]=+g[Ce>>2]-+g[Be>>2];g[Pg>>2]=+g[Ng>>2]*.3826834261417389-+g[Og>>2]*.9238795042037964;g[Wg>>2]=+g[Ng>>2]*.9238795042037964+ +g[Og>>2]*.3826834261417389;g[Fa>>2]=(+g[ca>>2]+ +g[Ea>>2])*.7071067690849304;g[Ka>>2]=+g[Ga>>2]-+g[Ja>>2];g[La>>2]=+g[Fa>>2]+ +g[Ka>>2];g[be>>2]=+g[Ka>>2]-+g[Fa>>2];g[Ae>>2]=+g[ye>>2]+ +g[ze>>2];g[De>>2]=+g[Be>>2]+ +g[Ce>>2];g[Ee>>2]=+g[Ae>>2]*.9238795042037964-+g[De>>2]*.3826834261417389;g[pf>>2]=+g[Ae>>2]*.3826834261417389+ +g[De>>2]*.9238795042037964;g[og>>2]=+g[Ga>>2]+ +g[Ja>>2];g[pg>>2]=(+g[W>>2]+ +g[$>>2])*.7071067690849304;g[qg>>2]=+g[og>>2]-+g[pg>>2];g[Ih>>2]=+g[pg>>2]+ +g[og>>2];g[Hd>>2]=+g[Ti>>2]-+g[_i>>2];g[Id>>2]=+g[Nb>>2]-+g[jb>>2];g[Jd>>2]=+g[Hd>>2]-+g[Id>>2];g[Qd>>2]=+g[Hd>>2]+ +g[Id>>2];g[lg>>2]=+g[Q>>2]-+g[N>>2];g[mg>>2]=(+g[ca>>2]-+g[Ea>>2])*.7071067690849304;g[ng>>2]=+g[lg>>2]+ +g[mg>>2];g[Hh>>2]=+g[lg>>2]-+g[mg>>2];g[vj>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[wj>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[xj>>2]=+g[vj>>2]+ +g[wj>>2];g[sa>>2]=+g[vj>>2]-+g[wj>>2];g[oa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[pa>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2];g[qa>>2]=+g[oa>>2]+ +g[pa>>2];g[vc>>2]=+g[oa>>2]-+g[pa>>2];g[yj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[zj>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[Aj>>2]=+g[yj>>2]+ +g[zj>>2];g[na>>2]=+g[yj>>2]-+g[zj>>2];g[ta>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2];g[ua>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2];g[va>>2]=+g[ta>>2]+ +g[ua>>2];g[wc>>2]=+g[ta>>2]-+g[ua>>2];g[ra>>2]=+g[na>>2]+ +g[qa>>2];g[wa>>2]=+g[sa>>2]-+g[va>>2];g[eg>>2]=+g[sa>>2]+ +g[va>>2];g[dg>>2]=+g[qa>>2]-+g[na>>2];g[re>>2]=+g[vc>>2]-+g[wc>>2];g[qe>>2]=+g[xj>>2]-+g[Aj>>2];g[Cj>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[Dj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Ej>>2]=+g[Cj>>2]+ +g[Dj>>2];g[F>>2]=+g[Cj>>2]-+g[Dj>>2];g[za>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[Aa>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2];g[Ba>>2]=+g[za>>2]+ +g[Aa>>2];g[yc>>2]=+g[Aa>>2]-+g[za>>2];g[Fj>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[Gj>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[Hj>>2]=+g[Fj>>2]+ +g[Gj>>2];g[ya>>2]=+g[Fj>>2]-+g[Gj>>2];g[G>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2];g[H>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2];g[I>>2]=+g[G>>2]+ +g[H>>2];g[zc>>2]=+g[G>>2]-+g[H>>2];g[Ca>>2]=+g[ya>>2]-+g[Ba>>2];g[J>>2]=+g[F>>2]-+g[I>>2];g[hg>>2]=+g[F>>2]+ +g[I>>2];g[gg>>2]=+g[ya>>2]+ +g[Ba>>2];g[ue>>2]=+g[yc>>2]-+g[zc>>2];g[te>>2]=+g[Ej>>2]-+g[Hj>>2];g[Bj>>2]=+g[xj>>2]+ +g[Aj>>2];g[Ij>>2]=+g[Ej>>2]+ +g[Hj>>2];g[Jj>>2]=+g[Bj>>2]+ +g[Ij>>2];g[Td>>2]=+g[Bj>>2]-+g[Ij>>2];g[fg>>2]=+g[dg>>2]*.3826834261417389+ +g[eg>>2]*.9238795042037964;g[ig>>2]=+g[gg>>2]*.3826834261417389+ +g[hg>>2]*.9238795042037964;g[jg>>2]=+g[fg>>2]-+g[ig>>2];g[si>>2]=+g[fg>>2]+ +g[ig>>2];g[Hf>>2]=+g[eg>>2]*.3826834261417389-+g[dg>>2]*.9238795042037964;g[If>>2]=+g[hg>>2]*.3826834261417389-+g[gg>>2]*.9238795042037964;g[Jf>>2]=+g[Hf>>2]+ +g[If>>2];g[Fh>>2]=+g[Hf>>2]-+g[If>>2];g[xa>>2]=+g[ra>>2]*.9238795042037964+ +g[wa>>2]*.3826834261417389;g[K>>2]=+g[Ca>>2]*.9238795042037964-+g[J>>2]*.3826834261417389;g[L>>2]=+g[xa>>2]+ +g[K>>2];g[od>>2]=+g[K>>2]-+g[xa>>2];g[Ua>>2]=+g[wa>>2]*.9238795042037964-+g[ra>>2]*.3826834261417389;g[Va>>2]=+g[Ca>>2]*.3826834261417389+ +g[J>>2]*.9238795042037964;g[Wa>>2]=+g[Ua>>2]+ +g[Va>>2];g[_d>>2]=+g[Ua>>2]-+g[Va>>2];g[se>>2]=+g[qe>>2]-+g[re>>2];g[ve>>2]=+g[te>>2]+ +g[ue>>2];g[we>>2]=(+g[se>>2]+ +g[ve>>2])*.7071067690849304;g[Zg>>2]=(+g[se>>2]-+g[ve>>2])*.7071067690849304;g[sf>>2]=+g[qe>>2]+ +g[re>>2];g[tf>>2]=+g[ue>>2]-+g[te>>2];g[uf>>2]=(+g[sf>>2]+ +g[tf>>2])*.7071067690849304;g[Lg>>2]=(+g[tf>>2]-+g[sf>>2])*.7071067690849304;g[xc>>2]=+g[vc>>2]+ +g[wc>>2];g[Ac>>2]=+g[yc>>2]+ +g[zc>>2];g[Bc>>2]=+g[xc>>2]+ +g[Ac>>2];g[Fd>>2]=+g[Ac>>2]-+g[xc>>2];g[Kj>>2]=+g[uj>>2]+ +g[Jj>>2];g[v>>2]=+g[$i>>2]+ +g[oj>>2];g[w>>2]=+g[Kj>>2]+ +g[v>>2];g[Nc>>2]=+g[Kj>>2]-+g[v>>2];g[Wb>>2]=+g[Ob>>2]+ +g[Vb>>2];g[Jc>>2]=+g[Bc>>2]+ +g[Ic>>2];g[Kc>>2]=+g[Wb>>2]+ +g[Jc>>2];g[Pc>>2]=+g[Jc>>2]-+g[Wb>>2];g[dc>>2]=+g[$i>>2]-+g[oj>>2];g[ec>>2]=+g[Ic>>2]-+g[Bc>>2];g[fc>>2]=+g[dc>>2]+ +g[ec>>2];g[Wc>>2]=+g[ec>>2]-+g[dc>>2];g[$b>>2]=+g[uj>>2]-+g[Jj>>2];g[ac>>2]=+g[Vb>>2]-+g[Ob>>2];g[bc>>2]=+g[$b>>2]+ +g[ac>>2];g[tc>>2]=+g[$b>>2]-+g[ac>>2];g[eb>>2]=+g[Wa>>2]+ +g[db>>2];g[ic>>2]=+g[ma>>2]-+g[L>>2];g[Ra>>2]=+g[La>>2]*.9807852506637573-+g[ba>>2]*.19509032368659973;g[Sa>>2]=+g[Bb>>2]*.19509032368659973+ +g[Kb>>2]*.9807852506637573;g[Ta>>2]=+g[Ra>>2]+ +g[Sa>>2];g[jc>>2]=+g[Ra>>2]-+g[Sa>>2];g[fb>>2]=+g[Ta>>2]+ +g[eb>>2];g[Zc>>2]=+g[ic>>2]-+g[jc>>2];g[Uc>>2]=+g[eb>>2]-+g[Ta>>2];g[kc>>2]=+g[ic>>2]+ +g[jc>>2];g[M>>2]=+g[ma>>2]+ +g[L>>2];g[nc>>2]=+g[db>>2]-+g[Wa>>2];g[Ma>>2]=+g[ba>>2]*.9807852506637573+ +g[La>>2]*.19509032368659973;g[Lb>>2]=+g[Bb>>2]*.9807852506637573-+g[Kb>>2]*.19509032368659973;g[Oa>>2]=+g[Ma>>2]+ +g[Lb>>2];g[mc>>2]=+g[Lb>>2]-+g[Ma>>2];g[Pa>>2]=+g[M>>2]+ +g[Oa>>2];g[$c>>2]=+g[nc>>2]-+g[mc>>2];g[Sc>>2]=+g[M>>2]-+g[Oa>>2];g[oc>>2]=+g[mc>>2]+ +g[nc>>2];g[x>>2]=+g[c[q>>2]>>2];g[Qa>>2]=+g[(c[q>>2]|0)+4>>2];g[gb>>2]=+g[x>>2]*+g[Pa>>2]+ +g[Qa>>2]*+g[fb>>2];g[Lc>>2]=+g[x>>2]*+g[fb>>2]-+g[Qa>>2]*+g[Pa>>2];g[c[m>>2]>>2]=+g[w>>2]-+g[gb>>2];g[c[n>>2]>>2]=+g[Kc>>2]+ +g[Lc>>2];g[c[o>>2]>>2]=+g[w>>2]+ +g[gb>>2];g[c[p>>2]>>2]=+g[Lc>>2]-+g[Kc>>2];g[sc>>2]=+g[(c[q>>2]|0)+184>>2];g[uc>>2]=+g[(c[q>>2]|0)+188>>2];g[Xc>>2]=+g[sc>>2]*+g[tc>>2]-+g[uc>>2]*+g[Wc>>2];g[bd>>2]=+g[uc>>2]*+g[tc>>2]+ +g[sc>>2]*+g[Wc>>2];g[Yc>>2]=+g[(c[q>>2]|0)+192>>2];g[_c>>2]=+g[(c[q>>2]|0)+196>>2];g[ad>>2]=+g[Yc>>2]*+g[Zc>>2]+ +g[_c>>2]*+g[$c>>2];g[cd>>2]=+g[Yc>>2]*+g[$c>>2]-+g[_c>>2]*+g[Zc>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Xc>>2]-+g[ad>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[bd>>2]+ +g[cd>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[Xc>>2]+ +g[ad>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[cd>>2]-+g[bd>>2];g[Mc>>2]=+g[(c[q>>2]|0)+120>>2];g[Oc>>2]=+g[(c[q>>2]|0)+124>>2];g[Qc>>2]=+g[Mc>>2]*+g[Nc>>2]-+g[Oc>>2]*+g[Pc>>2];g[Yb>>2]=+g[Oc>>2]*+g[Nc>>2]+ +g[Mc>>2]*+g[Pc>>2];g[Rc>>2]=+g[(c[q>>2]|0)+128>>2];g[Tc>>2]=+g[(c[q>>2]|0)+132>>2];g[Xb>>2]=+g[Rc>>2]*+g[Sc>>2]+ +g[Tc>>2]*+g[Uc>>2];g[Zb>>2]=+g[Rc>>2]*+g[Uc>>2]-+g[Tc>>2]*+g[Sc>>2];g[(c[m>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Qc>>2]-+g[Xb>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Yb>>2]+ +g[Zb>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Qc>>2]+ +g[Xb>>2];g[(c[p>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Zb>>2]-+g[Yb>>2];g[_b>>2]=+g[(c[q>>2]|0)+56>>2];g[cc>>2]=+g[(c[q>>2]|0)+60>>2];g[gc>>2]=+g[_b>>2]*+g[bc>>2]-+g[cc>>2]*+g[fc>>2];g[qc>>2]=+g[cc>>2]*+g[bc>>2]+ +g[_b>>2]*+g[fc>>2];g[hc>>2]=+g[(c[q>>2]|0)+64>>2];g[lc>>2]=+g[(c[q>>2]|0)+68>>2];g[pc>>2]=+g[hc>>2]*+g[kc>>2]+ +g[lc>>2]*+g[oc>>2];g[rc>>2]=+g[hc>>2]*+g[oc>>2]-+g[lc>>2]*+g[kc>>2];g[(c[m>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[gc>>2]-+g[pc>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[qc>>2]+ +g[rc>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[gc>>2]+ +g[pc>>2];g[(c[p>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[rc>>2]-+g[qc>>2];g[Mg>>2]=+g[Kg>>2]+ +g[Lg>>2];g[Tg>>2]=+g[Pg>>2]+ +g[Sg>>2];g[Ug>>2]=+g[Mg>>2]+ +g[Tg>>2];g[zi>>2]=+g[Mg>>2]-+g[Tg>>2];g[Yg>>2]=+g[Wg>>2]+ +g[Xg>>2];g[$g>>2]=+g[Zg>>2]+ +g[_g>>2];g[ah>>2]=+g[Yg>>2]+ +g[$g>>2];g[Bi>>2]=+g[$g>>2]-+g[Yg>>2];g[Rh>>2]=+g[Pg>>2]-+g[Sg>>2];g[Sh>>2]=+g[_g>>2]-+g[Zg>>2];g[Th>>2]=+g[Rh>>2]+ +g[Sh>>2];g[hi>>2]=+g[Sh>>2]-+g[Rh>>2];g[Li>>2]=+g[Kg>>2]-+g[Lg>>2];g[Mi>>2]=+g[Xg>>2]-+g[Wg>>2];g[Ph>>2]=+g[Li>>2]+ +g[Mi>>2];g[fi>>2]=+g[Li>>2]-+g[Mi>>2];g[ti>>2]=+g[ri>>2]-+g[si>>2];g[Wh>>2]=+g[dh>>2]-+g[Fh>>2];g[oi>>2]=+g[Ih>>2]*.19509032368659973-+g[Hh>>2]*.9807852506637573;g[pi>>2]=+g[Lh>>2]*.19509032368659973-+g[Kh>>2]*.9807852506637573;g[qi>>2]=+g[oi>>2]+ +g[pi>>2];g[Xh>>2]=+g[oi>>2]-+g[pi>>2];g[ui>>2]=+g[qi>>2]+ +g[ti>>2];g[ki>>2]=+g[Wh>>2]-+g[Xh>>2];g[Gi>>2]=+g[ti>>2]-+g[qi>>2];g[Yh>>2]=+g[Wh>>2]+ +g[Xh>>2];g[Gh>>2]=+g[dh>>2]+ +g[Fh>>2];g[_h>>2]=+g[si>>2]+ +g[ri>>2];g[Jh>>2]=+g[Hh>>2]*.19509032368659973+ +g[Ih>>2]*.9807852506637573;g[Mh>>2]=+g[Kh>>2]*.19509032368659973+ +g[Lh>>2]*.9807852506637573;g[Nh>>2]=+g[Jh>>2]-+g[Mh>>2];g[$h>>2]=+g[Jh>>2]+ +g[Mh>>2];g[Oh>>2]=+g[Gh>>2]+ +g[Nh>>2];g[mi>>2]=+g[$h>>2]+ +g[_h>>2];g[Ei>>2]=+g[Gh>>2]-+g[Nh>>2];g[ai>>2]=+g[_h>>2]-+g[$h>>2];g[Jg>>2]=+g[(c[q>>2]|0)+40>>2];g[Vg>>2]=+g[(c[q>>2]|0)+44>>2];g[bh>>2]=+g[Jg>>2]*+g[Ug>>2]-+g[Vg>>2]*+g[ah>>2];g[wi>>2]=+g[Vg>>2]*+g[Ug>>2]+ +g[Jg>>2]*+g[ah>>2];g[ch>>2]=+g[(c[q>>2]|0)+48>>2];g[ni>>2]=+g[(c[q>>2]|0)+52>>2];g[vi>>2]=+g[ch>>2]*+g[Oh>>2]+ +g[ni>>2]*+g[ui>>2];g[xi>>2]=+g[ch>>2]*+g[ui>>2]-+g[ni>>2]*+g[Oh>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[bh>>2]-+g[vi>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[wi>>2]+ +g[xi>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[bh>>2]+ +g[vi>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[xi>>2]-+g[wi>>2];g[ei>>2]=+g[(c[q>>2]|0)+232>>2];g[gi>>2]=+g[(c[q>>2]|0)+236>>2];g[ii>>2]=+g[ei>>2]*+g[fi>>2]-+g[gi>>2]*+g[hi>>2];g[Pi>>2]=+g[gi>>2]*+g[fi>>2]+ +g[ei>>2]*+g[hi>>2];g[ji>>2]=+g[(c[q>>2]|0)+240>>2];g[li>>2]=+g[(c[q>>2]|0)+244>>2];g[Oi>>2]=+g[ji>>2]*+g[ki>>2]+ +g[li>>2]*+g[mi>>2];g[Qi>>2]=+g[ji>>2]*+g[mi>>2]-+g[li>>2]*+g[ki>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[ii>>2]-+g[Oi>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[Pi>>2]+ +g[Qi>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[ii>>2]+ +g[Oi>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[Qi>>2]-+g[Pi>>2];g[yi>>2]=+g[(c[q>>2]|0)+168>>2];g[Ai>>2]=+g[(c[q>>2]|0)+172>>2];g[Ci>>2]=+g[yi>>2]*+g[zi>>2]-+g[Ai>>2]*+g[Bi>>2];g[Ii>>2]=+g[Ai>>2]*+g[zi>>2]+ +g[yi>>2]*+g[Bi>>2];g[Di>>2]=+g[(c[q>>2]|0)+176>>2];g[Fi>>2]=+g[(c[q>>2]|0)+180>>2];g[Hi>>2]=+g[Di>>2]*+g[Ei>>2]+ +g[Fi>>2]*+g[Gi>>2];g[Ji>>2]=+g[Di>>2]*+g[Gi>>2]-+g[Fi>>2]*+g[Ei>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Ci>>2]-+g[Hi>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Ii>>2]+ +g[Ji>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Ci>>2]+ +g[Hi>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[Ji>>2]-+g[Ii>>2];g[Ki>>2]=+g[(c[q>>2]|0)+104>>2];g[Qh>>2]=+g[(c[q>>2]|0)+108>>2];g[Uh>>2]=+g[Ki>>2]*+g[Ph>>2]-+g[Qh>>2]*+g[Th>>2];g[ci>>2]=+g[Qh>>2]*+g[Ph>>2]+ +g[Ki>>2]*+g[Th>>2];g[Vh>>2]=+g[(c[q>>2]|0)+112>>2];g[Zh>>2]=+g[(c[q>>2]|0)+116>>2];g[bi>>2]=+g[Vh>>2]*+g[Yh>>2]+ +g[Zh>>2]*+g[ai>>2];g[di>>2]=+g[Vh>>2]*+g[ai>>2]-+g[Zh>>2]*+g[Yh>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Uh>>2]-+g[bi>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ci>>2]+ +g[di>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Uh>>2]+ +g[bi>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[di>>2]-+g[ci>>2];g[Gd>>2]=+g[Ed>>2]+ +g[Fd>>2];g[Nd>>2]=(+g[Jd>>2]+ +g[Md>>2])*.7071067690849304;g[Od>>2]=+g[Gd>>2]+ +g[Nd>>2];g[wd>>2]=+g[Gd>>2]-+g[Nd>>2];g[Sd>>2]=(+g[Qd>>2]+ +g[Rd>>2])*.7071067690849304;g[Vd>>2]=+g[Td>>2]+ +g[Ud>>2];g[Wd>>2]=+g[Sd>>2]+ +g[Vd>>2];g[yd>>2]=+g[Vd>>2]-+g[Sd>>2];g[le>>2]=(+g[Jd>>2]-+g[Md>>2])*.7071067690849304;g[me>>2]=+g[Ud>>2]-+g[Td>>2];g[Ne>>2]=+g[le>>2]+ +g[me>>2];g[bf>>2]=+g[me>>2]-+g[le>>2];g[he>>2]=+g[Ed>>2]-+g[Fd>>2];g[ie>>2]=(+g[Rd>>2]-+g[Qd>>2])*.7071067690849304;g[je>>2]=+g[he>>2]+ +g[ie>>2];g[$e>>2]=+g[he>>2]-+g[ie>>2];g[qd>>2]=+g[od>>2]+ +g[pd>>2];g[Qe>>2]=+g[Zd>>2]-+g[_d>>2];g[ld>>2]=+g[be>>2]*.5555702447891235-+g[ae>>2]*.8314695954322815;g[md>>2]=+g[fd>>2]*.8314695954322815+ +g[gd>>2]*.5555702447891235;g[nd>>2]=+g[ld>>2]+ +g[md>>2];g[Re>>2]=+g[ld>>2]-+g[md>>2];g[rd>>2]=+g[nd>>2]+ +g[qd>>2];g[ef>>2]=+g[Qe>>2]-+g[Re>>2];g[Dd>>2]=+g[qd>>2]-+g[nd>>2];g[Se>>2]=+g[Qe>>2]+ +g[Re>>2];g[$d>>2]=+g[Zd>>2]+ +g[_d>>2];g[Ve>>2]=+g[pd>>2]-+g[od>>2];g[ed>>2]=+g[ae>>2]*.5555702447891235+ +g[be>>2]*.8314695954322815;g[hd>>2]=+g[fd>>2]*.5555702447891235-+g[gd>>2]*.8314695954322815;g[id>>2]=+g[ed>>2]+ +g[hd>>2];g[Ue>>2]=+g[hd>>2]-+g[ed>>2];g[jd>>2]=+g[$d>>2]+ +g[id>>2];g[gf>>2]=+g[Ve>>2]-+g[Ue>>2];g[Bd>>2]=+g[$d>>2]-+g[id>>2];g[We>>2]=+g[Ue>>2]+ +g[Ve>>2];g[dd>>2]=+g[(c[q>>2]|0)+24>>2];g[Pd>>2]=+g[(c[q>>2]|0)+28>>2];g[Xd>>2]=+g[dd>>2]*+g[Od>>2]-+g[Pd>>2]*+g[Wd>>2];g[td>>2]=+g[Pd>>2]*+g[Od>>2]+ +g[dd>>2]*+g[Wd>>2];g[Yd>>2]=+g[(c[q>>2]|0)+32>>2];g[kd>>2]=+g[(c[q>>2]|0)+36>>2];g[sd>>2]=+g[Yd>>2]*+g[jd>>2]+ +g[kd>>2]*+g[rd>>2];g[ud>>2]=+g[Yd>>2]*+g[rd>>2]-+g[kd>>2]*+g[jd>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Xd>>2]-+g[sd>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[td>>2]+ +g[ud>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Xd>>2]+ +g[sd>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ud>>2]-+g[td>>2];g[_e>>2]=+g[(c[q>>2]|0)+216>>2];g[af>>2]=+g[(c[q>>2]|0)+220>>2];g[cf>>2]=+g[_e>>2]*+g[$e>>2]-+g[af>>2]*+g[bf>>2];g[jf>>2]=+g[af>>2]*+g[$e>>2]+ +g[_e>>2]*+g[bf>>2];g[df>>2]=+g[(c[q>>2]|0)+224>>2];g[ff>>2]=+g[(c[q>>2]|0)+228>>2];g[hf>>2]=+g[df>>2]*+g[ef>>2]+ +g[ff>>2]*+g[gf>>2];g[kf>>2]=+g[df>>2]*+g[gf>>2]-+g[ff>>2]*+g[ef>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[cf>>2]-+g[hf>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[jf>>2]+ +g[kf>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[cf>>2]+ +g[hf>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[kf>>2]-+g[jf>>2];g[vd>>2]=+g[(c[q>>2]|0)+152>>2];g[xd>>2]=+g[(c[q>>2]|0)+156>>2];g[zd>>2]=+g[vd>>2]*+g[wd>>2]-+g[xd>>2]*+g[yd>>2];g[ee>>2]=+g[xd>>2]*+g[wd>>2]+ +g[vd>>2]*+g[yd>>2];g[Ad>>2]=+g[(c[q>>2]|0)+160>>2];g[Cd>>2]=+g[(c[q>>2]|0)+164>>2];g[de>>2]=+g[Ad>>2]*+g[Bd>>2]+ +g[Cd>>2]*+g[Dd>>2];g[fe>>2]=+g[Ad>>2]*+g[Dd>>2]-+g[Cd>>2]*+g[Bd>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[zd>>2]-+g[de>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[ee>>2]+ +g[fe>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[zd>>2]+ +g[de>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[fe>>2]-+g[ee>>2];g[ge>>2]=+g[(c[q>>2]|0)+88>>2];g[ke>>2]=+g[(c[q>>2]|0)+92>>2];g[Oe>>2]=+g[ge>>2]*+g[je>>2]-+g[ke>>2]*+g[Ne>>2];g[Ye>>2]=+g[ke>>2]*+g[je>>2]+ +g[ge>>2]*+g[Ne>>2];g[Pe>>2]=+g[(c[q>>2]|0)+96>>2];g[Te>>2]=+g[(c[q>>2]|0)+100>>2];g[Xe>>2]=+g[Pe>>2]*+g[Se>>2]+ +g[Te>>2]*+g[We>>2];g[Ze>>2]=+g[Pe>>2]*+g[We>>2]-+g[Te>>2]*+g[Se>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Oe>>2]-+g[Xe>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Ye>>2]+ +g[Ze>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Oe>>2]+ +g[Xe>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Ze>>2]-+g[Ye>>2];g[xe>>2]=+g[pe>>2]+ +g[we>>2];g[Me>>2]=+g[Ee>>2]+ +g[Le>>2];g[nf>>2]=+g[xe>>2]+ +g[Me>>2];g[Tf>>2]=+g[xe>>2]-+g[Me>>2];g[rf>>2]=+g[pf>>2]+ +g[qf>>2];g[Yf>>2]=+g[uf>>2]+ +g[Xf>>2];g[Zf>>2]=+g[rf>>2]+ +g[Yf>>2];g[Vf>>2]=+g[Yf>>2]-+g[rf>>2];g[gh>>2]=+g[Ee>>2]-+g[Le>>2];g[hh>>2]=+g[Xf>>2]-+g[uf>>2];g[ih>>2]=+g[gh>>2]+ +g[hh>>2];g[yh>>2]=+g[hh>>2]-+g[gh>>2];g[Eg>>2]=+g[pe>>2]-+g[we>>2];g[Fg>>2]=+g[qf>>2]-+g[pf>>2];g[eh>>2]=+g[Eg>>2]+ +g[Fg>>2];g[wh>>2]=+g[Eg>>2]-+g[Fg>>2];g[Nf>>2]=+g[Jf>>2]+ +g[Mf>>2];g[lh>>2]=+g[cg>>2]-+g[jg>>2];g[Ef>>2]=+g[qg>>2]*.8314695954322815-+g[ng>>2]*.5555702447891235;g[Ff>>2]=+g[ug>>2]*.5555702447891235+ +g[zf>>2]*.8314695954322815;g[Gf>>2]=+g[Ef>>2]+ +g[Ff>>2];g[mh>>2]=+g[Ef>>2]-+g[Ff>>2];g[Of>>2]=+g[Gf>>2]+ +g[Nf>>2];g[Bh>>2]=+g[lh>>2]-+g[mh>>2];g[zg>>2]=+g[Nf>>2]-+g[Gf>>2];g[nh>>2]=+g[lh>>2]+ +g[mh>>2];g[kg>>2]=+g[cg>>2]+ +g[jg>>2];g[qh>>2]=+g[Mf>>2]-+g[Jf>>2];g[rg>>2]=+g[ng>>2]*.8314695954322815+ +g[qg>>2]*.5555702447891235;g[Af>>2]=+g[ug>>2]*.8314695954322815-+g[zf>>2]*.5555702447891235;g[Bf>>2]=+g[rg>>2]+ +g[Af>>2];g[ph>>2]=+g[Af>>2]-+g[rg>>2];g[Cf>>2]=+g[kg>>2]+ +g[Bf>>2];g[Dh>>2]=+g[qh>>2]-+g[ph>>2];g[xg>>2]=+g[kg>>2]-+g[Bf>>2];g[rh>>2]=+g[ph>>2]+ +g[qh>>2];g[lf>>2]=+g[(c[q>>2]|0)+8>>2];g[of>>2]=+g[(c[q>>2]|0)+12>>2];g[_f>>2]=+g[lf>>2]*+g[nf>>2]-+g[of>>2]*+g[Zf>>2];g[Qf>>2]=+g[of>>2]*+g[nf>>2]+ +g[lf>>2]*+g[Zf>>2];g[$f>>2]=+g[(c[q>>2]|0)+16>>2];g[Df>>2]=+g[(c[q>>2]|0)+20>>2];g[Pf>>2]=+g[$f>>2]*+g[Cf>>2]+ +g[Df>>2]*+g[Of>>2];g[Rf>>2]=+g[$f>>2]*+g[Of>>2]-+g[Df>>2]*+g[Cf>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[_f>>2]-+g[Pf>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Qf>>2]+ +g[Rf>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[_f>>2]+ +g[Pf>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Rf>>2]-+g[Qf>>2];g[vh>>2]=+g[(c[q>>2]|0)+200>>2];g[xh>>2]=+g[(c[q>>2]|0)+204>>2];g[zh>>2]=+g[vh>>2]*+g[wh>>2]-+g[xh>>2]*+g[yh>>2];g[Hg>>2]=+g[xh>>2]*+g[wh>>2]+ +g[vh>>2]*+g[yh>>2];g[Ah>>2]=+g[(c[q>>2]|0)+208>>2];g[Ch>>2]=+g[(c[q>>2]|0)+212>>2];g[Gg>>2]=+g[Ah>>2]*+g[Bh>>2]+ +g[Ch>>2]*+g[Dh>>2];g[Ig>>2]=+g[Ah>>2]*+g[Dh>>2]-+g[Ch>>2]*+g[Bh>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[zh>>2]-+g[Gg>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Hg>>2]+ +g[Ig>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[zh>>2]+ +g[Gg>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[Ig>>2]-+g[Hg>>2];g[Sf>>2]=+g[(c[q>>2]|0)+136>>2];g[Uf>>2]=+g[(c[q>>2]|0)+140>>2];g[Wf>>2]=+g[Sf>>2]*+g[Tf>>2]-+g[Uf>>2]*+g[Vf>>2];g[Bg>>2]=+g[Uf>>2]*+g[Tf>>2]+ +g[Sf>>2]*+g[Vf>>2];g[wg>>2]=+g[(c[q>>2]|0)+144>>2];g[yg>>2]=+g[(c[q>>2]|0)+148>>2];g[Ag>>2]=+g[wg>>2]*+g[xg>>2]+ +g[yg>>2]*+g[zg>>2];g[Cg>>2]=+g[wg>>2]*+g[zg>>2]-+g[yg>>2]*+g[xg>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Wf>>2]-+g[Ag>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Bg>>2]+ +g[Cg>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Wf>>2]+ +g[Ag>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[Cg>>2]-+g[Bg>>2];g[Dg>>2]=+g[(c[q>>2]|0)+72>>2];g[fh>>2]=+g[(c[q>>2]|0)+76>>2];g[jh>>2]=+g[Dg>>2]*+g[eh>>2]-+g[fh>>2]*+g[ih>>2];g[th>>2]=+g[fh>>2]*+g[eh>>2]+ +g[Dg>>2]*+g[ih>>2];g[kh>>2]=+g[(c[q>>2]|0)+80>>2];g[oh>>2]=+g[(c[q>>2]|0)+84>>2];g[sh>>2]=+g[kh>>2]*+g[nh>>2]+ +g[oh>>2]*+g[rh>>2];g[uh>>2]=+g[kh>>2]*+g[rh>>2]-+g[oh>>2]*+g[nh>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[jh>>2]-+g[sh>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[th>>2]+ +g[uh>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[jh>>2]+ +g[sh>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[uh>>2]-+g[th>>2];c[Pj>>2]=(c[Pj>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+248;c[r>>2]=c[r>>2]^c[2998]}i=Qj;return}function lv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,77,9688,1);i=b;return}function mv(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0;da=i;i=i+192|0;m=da+180|0;n=da+176|0;o=da+172|0;p=da+168|0;q=da+164|0;r=da+160|0;ea=da+156|0;s=da+152|0;t=da+148|0;ca=da+144|0;w=da+140|0;P=da+136|0;z=da+132|0;Q=da+128|0;H=da+124|0;N=da+120|0;$=da+116|0;Z=da+112|0;W=da+108|0;U=da+104|0;J=da+100|0;G=da+96|0;C=da+92|0;M=da+88|0;u=da+84|0;v=da+80|0;E=da+76|0;F=da+72|0;x=da+68|0;y=da+64|0;K=da+60|0;L=da+56|0;A=da+52|0;R=da+48|0;O=da+44|0;S=da+40|0;B=da+36|0;I=da+32|0;X=da+28|0;ba=da+24|0;aa=da+20|0;D=da+16|0;T=da+12|0;V=da+8|0;Y=da+4|0;_=da;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[ea>>2]=j;c[s>>2]=k;c[t>>2]=l;c[ca>>2]=c[ea>>2];c[q>>2]=(c[q>>2]|0)+(((c[ea>>2]|0)-1|0)*6<<2);while(1){if((c[ca>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[v>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[w>>2]=+g[u>>2]+ +g[v>>2];g[J>>2]=+g[u>>2]-+g[v>>2];g[E>>2]=+g[c[n>>2]>>2];g[F>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[G>>2]=+g[E>>2]+ +g[F>>2];g[P>>2]=+g[E>>2]-+g[F>>2];g[x>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[y>>2]=+g[c[o>>2]>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[C>>2]=+g[x>>2]-+g[y>>2];g[K>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[L>>2]=+g[c[p>>2]>>2];g[M>>2]=+g[K>>2]+ +g[L>>2];g[Q>>2]=+g[K>>2]-+g[L>>2];g[H>>2]=+g[C>>2]+ +g[G>>2];g[N>>2]=+g[J>>2]-+g[M>>2];g[$>>2]=+g[J>>2]+ +g[M>>2];g[Z>>2]=+g[G>>2]-+g[C>>2];g[W>>2]=+g[P>>2]-+g[Q>>2];g[U>>2]=+g[w>>2]-+g[z>>2];g[A>>2]=+g[w>>2]+ +g[z>>2];g[R>>2]=+g[P>>2]+ +g[Q>>2];g[B>>2]=+g[c[q>>2]>>2];g[I>>2]=+g[(c[q>>2]|0)+4>>2];g[O>>2]=+g[B>>2]*+g[H>>2]+ +g[I>>2]*+g[N>>2];g[S>>2]=+g[B>>2]*+g[N>>2]-+g[I>>2]*+g[H>>2];g[c[m>>2]>>2]=+g[A>>2]-+g[O>>2];g[c[n>>2]>>2]=+g[R>>2]+ +g[S>>2];g[c[o>>2]>>2]=+g[A>>2]+ +g[O>>2];g[c[p>>2]>>2]=+g[S>>2]-+g[R>>2];g[T>>2]=+g[(c[q>>2]|0)+8>>2];g[V>>2]=+g[(c[q>>2]|0)+12>>2];g[X>>2]=+g[T>>2]*+g[U>>2]-+g[V>>2]*+g[W>>2];g[ba>>2]=+g[V>>2]*+g[U>>2]+ +g[T>>2]*+g[W>>2];g[Y>>2]=+g[(c[q>>2]|0)+16>>2];g[_>>2]=+g[(c[q>>2]|0)+20>>2];g[aa>>2]=+g[Y>>2]*+g[Z>>2]+ +g[_>>2]*+g[$>>2];g[D>>2]=+g[Y>>2]*+g[$>>2]-+g[_>>2]*+g[Z>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[X>>2]-+g[aa>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[ba>>2]+ +g[D>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[X>>2]+ +g[aa>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[D>>2]-+g[ba>>2];c[ca>>2]=(c[ca>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+24}i=da;return}function nv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,78,9736,1);i=b;return}function ov(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0;Ja=i;i=i+320|0;m=Ja+316|0;n=Ja+312|0;o=Ja+308|0;p=Ja+304|0;q=Ja+300|0;r=Ja+296|0;Ka=Ja+292|0;s=Ja+288|0;t=Ja+284|0;Ia=Ja+272|0;E=Ja+268|0;Da=Ja+264|0;za=Ja+260|0;V=Ja+256|0;ja=Ja+252|0;ka=Ja+248|0;Ga=Ja+244|0;Z=Ja+240|0;wa=Ja+236|0;L=Ja+232|0;ra=Ja+228|0;_=Ja+224|0;xa=Ja+220|0;ya=Ja+216|0;O=Ja+212|0;W=Ja+208|0;ia=Ja+204|0;Fa=Ja+200|0;H=Ja+196|0;Ea=Ja+192|0;C=Ja+188|0;D=Ja+184|0;I=Ja+180|0;J=Ja+176|0;F=Ja+172|0;G=Ja+168|0;na=Ja+164|0;M=Ja+160|0;qa=Ja+156|0;N=Ja+152|0;la=Ja+148|0;ma=Ja+144|0;ua=Ja+140|0;va=Ja+136|0;oa=Ja+132|0;pa=Ja+128|0;T=Ja+124|0;ba=Ja+120|0;aa=Ja+116|0;ca=Ja+112|0;X=Ja+108|0;$=Ja+104|0;U=Ja+100|0;Y=Ja+96|0;ha=Ja+92|0;A=Ja+88|0;z=Ja+84|0;B=Ja+80|0;ea=Ja+76|0;ga=Ja+72|0;da=Ja+68|0;fa=Ja+64|0;w=Ja+60|0;y=Ja+56|0;v=Ja+52|0;x=Ja+48|0;Ba=Ja+44|0;R=Ja+40|0;Q=Ja+36|0;S=Ja+32|0;sa=Ja+28|0;Aa=Ja+24|0;u=Ja+20|0;ta=Ja+16|0;Ha=Ja+12|0;P=Ja+8|0;Ca=Ja+4|0;K=Ja;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[Ka>>2]=j;c[s>>2]=k;c[t>>2]=l;g[Ja+280>>2]=.5;g[Ja+276>>2]=.8660253882408142;c[Ia>>2]=c[Ka>>2];c[q>>2]=(c[q>>2]|0)+(((c[Ka>>2]|0)-1|0)*10<<2);while(1){if((c[Ia>>2]|0)>=(c[s>>2]|0))break;g[C>>2]=+g[c[m>>2]>>2];g[D>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[E>>2]=+g[C>>2]+ +g[D>>2];g[Da>>2]=+g[C>>2]-+g[D>>2];g[I>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[J>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[ia>>2]=+g[I>>2]+ +g[J>>2];g[Fa>>2]=+g[I>>2]-+g[J>>2];g[F>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[G>>2]=+g[c[o>>2]>>2];g[H>>2]=+g[F>>2]+ +g[G>>2];g[Ea>>2]=+g[F>>2]-+g[G>>2];g[za>>2]=(+g[H>>2]-+g[ia>>2])*.8660253882408142;g[V>>2]=(+g[Ea>>2]-+g[Fa>>2])*.8660253882408142;g[ja>>2]=+g[H>>2]+ +g[ia>>2];g[ka>>2]=+g[E>>2]-+g[ja>>2]*.5;g[Ga>>2]=+g[Ea>>2]+ +g[Fa>>2];g[Z>>2]=+g[Da>>2]-+g[Ga>>2]*.5;g[la>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[ma>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[na>>2]=+g[la>>2]-+g[ma>>2];g[M>>2]=+g[ma>>2]+ +g[la>>2];g[ua>>2]=+g[c[n>>2]>>2];g[va>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[wa>>2]=+g[ua>>2]-+g[va>>2];g[L>>2]=+g[ua>>2]+ +g[va>>2];g[oa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[pa>>2]=+g[c[p>>2]>>2];g[qa>>2]=+g[oa>>2]-+g[pa>>2];g[N>>2]=+g[oa>>2]+ +g[pa>>2];g[ra>>2]=(+g[na>>2]-+g[qa>>2])*.8660253882408142;g[_>>2]=(+g[M>>2]+ +g[N>>2])*.8660253882408142;g[xa>>2]=+g[na>>2]+ +g[qa>>2];g[ya>>2]=+g[wa>>2]-+g[xa>>2]*.5;g[O>>2]=+g[M>>2]-+g[N>>2];g[W>>2]=+g[O>>2]*.5+ +g[L>>2];g[T>>2]=+g[E>>2]+ +g[ja>>2];g[ba>>2]=+g[wa>>2]+ +g[xa>>2];g[X>>2]=+g[V>>2]+ +g[W>>2];g[$>>2]=+g[Z>>2]-+g[_>>2];g[U>>2]=+g[c[q>>2]>>2];g[Y>>2]=+g[(c[q>>2]|0)+4>>2];g[aa>>2]=+g[U>>2]*+g[X>>2]+ +g[Y>>2]*+g[$>>2];g[ca>>2]=+g[U>>2]*+g[$>>2]-+g[Y>>2]*+g[X>>2];g[c[m>>2]>>2]=+g[T>>2]-+g[aa>>2];g[c[n>>2]>>2]=+g[ba>>2]+ +g[ca>>2];g[c[o>>2]>>2]=+g[T>>2]+ +g[aa>>2];g[c[p>>2]>>2]=+g[ca>>2]-+g[ba>>2];g[ea>>2]=+g[ka>>2]+ +g[ra>>2];g[ga>>2]=+g[za>>2]+ +g[ya>>2];g[da>>2]=+g[(c[q>>2]|0)+24>>2];g[fa>>2]=+g[(c[q>>2]|0)+28>>2];g[ha>>2]=+g[da>>2]*+g[ea>>2]-+g[fa>>2]*+g[ga>>2];g[A>>2]=+g[fa>>2]*+g[ea>>2]+ +g[da>>2]*+g[ga>>2];g[w>>2]=+g[W>>2]-+g[V>>2];g[y>>2]=+g[Z>>2]+ +g[_>>2];g[v>>2]=+g[(c[q>>2]|0)+32>>2];g[x>>2]=+g[(c[q>>2]|0)+36>>2];g[z>>2]=+g[v>>2]*+g[w>>2]+ +g[x>>2]*+g[y>>2];g[B>>2]=+g[v>>2]*+g[y>>2]-+g[x>>2]*+g[w>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ha>>2]-+g[z>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[A>>2]+ +g[B>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[ha>>2]+ +g[z>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[B>>2]-+g[A>>2];g[sa>>2]=+g[ka>>2]-+g[ra>>2];g[Aa>>2]=+g[ya>>2]-+g[za>>2];g[u>>2]=+g[(c[q>>2]|0)+12>>2];g[ta>>2]=+g[(c[q>>2]|0)+8>>2];g[Ba>>2]=+g[u>>2]*+g[sa>>2]+ +g[ta>>2]*+g[Aa>>2];g[R>>2]=+g[ta>>2]*+g[sa>>2]-+g[u>>2]*+g[Aa>>2];g[Ha>>2]=+g[Da>>2]+ +g[Ga>>2];g[P>>2]=+g[L>>2]-+g[O>>2];g[Ca>>2]=+g[(c[q>>2]|0)+16>>2];g[K>>2]=+g[(c[q>>2]|0)+20>>2];g[Q>>2]=+g[Ca>>2]*+g[Ha>>2]-+g[K>>2]*+g[P>>2];g[S>>2]=+g[K>>2]*+g[Ha>>2]+ +g[Ca>>2]*+g[P>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Ba>>2]+ +g[Q>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[R>>2]-+g[S>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[Q>>2]-+g[Ba>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[R>>2]+ +g[S>>2];c[Ia>>2]=(c[Ia>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+40;c[r>>2]=c[r>>2]^c[2998]}i=Ja;return}function pv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;an(c[d>>2]|0,79,9784,1);i=b;return}function qv(a,b,d,e,f,h,j,k,l){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;var m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0;jb=i;i=i+432|0;m=jb+424|0;n=jb+420|0;o=jb+416|0;p=jb+412|0;q=jb+408|0;r=jb+404|0;kb=jb+400|0;s=jb+396|0;t=jb+392|0;ib=jb+384|0;ha=jb+380|0;I=jb+376|0;M=jb+372|0;Va=jb+368|0;qa=jb+364|0;z=jb+360|0;E=jb+356|0;ya=jb+352|0;Oa=jb+348|0;va=jb+344|0;D=jb+340|0;A=jb+336|0;la=jb+332|0;J=jb+328|0;eb=jb+324|0;N=jb+320|0;da=jb+316|0;ma=jb+312|0;Ua=jb+308|0;wa=jb+304|0;ga=jb+300|0;Ra=jb+296|0;pa=jb+292|0;xa=jb+288|0;u=jb+284|0;ca=jb+280|0;Sa=jb+276|0;Ta=jb+272|0;ea=jb+268|0;fa=jb+264|0;na=jb+260|0;oa=jb+256|0;Ka=jb+252|0;Wa=jb+248|0;Za=jb+244|0;ta=jb+240|0;Na=jb+236|0;$a=jb+232|0;cb=jb+228|0;ua=jb+224|0;ia=jb+220|0;ja=jb+216|0;Xa=jb+212|0;Ya=jb+208|0;La=jb+204|0;Ma=jb+200|0;ab=jb+196|0;bb=jb+192|0;hb=jb+188|0;ka=jb+184|0;_a=jb+180|0;db=jb+176|0;Pa=jb+172|0;za=jb+168|0;sa=jb+164|0;Aa=jb+160|0;fb=jb+156|0;ra=jb+152|0;Qa=jb+148|0;gb=jb+144|0;W=jb+140|0;aa=jb+136|0;$=jb+132|0;ba=jb+128|0;T=jb+124|0;V=jb+120|0;S=jb+116|0;U=jb+112|0;Y=jb+108|0;_=jb+104|0;X=jb+100|0;Z=jb+96|0;Fa=jb+92|0;w=jb+88|0;v=jb+84|0;x=jb+80|0;Ca=jb+76|0;Ea=jb+72|0;Ba=jb+68|0;Da=jb+64|0;Ha=jb+60|0;Ja=jb+56|0;Ga=jb+52|0;Ia=jb+48|0;G=jb+44|0;Q=jb+40|0;P=jb+36|0;R=jb+32|0;B=jb+28|0;F=jb+24|0;y=jb+20|0;C=jb+16|0;K=jb+12|0;O=jb+8|0;H=jb+4|0;L=jb;c[m>>2]=a;c[n>>2]=b;c[o>>2]=d;c[p>>2]=e;c[q>>2]=f;c[r>>2]=h;c[kb>>2]=j;c[s>>2]=k;c[t>>2]=l;g[jb+388>>2]=.7071067690849304;c[ib>>2]=c[kb>>2];c[q>>2]=(c[q>>2]|0)+(((c[kb>>2]|0)-1|0)*14<<2);while(1){if((c[ib>>2]|0)>=(c[s>>2]|0))break;g[u>>2]=+g[c[m>>2]>>2];g[ca>>2]=+g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[da>>2]=+g[u>>2]+ +g[ca>>2];g[ma>>2]=+g[u>>2]-+g[ca>>2];g[Sa>>2]=+g[c[n>>2]>>2];g[Ta>>2]=+g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Ua>>2]=+g[Sa>>2]+ +g[Ta>>2];g[wa>>2]=+g[Sa>>2]-+g[Ta>>2];g[ea>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2];g[fa>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<2)>>2];g[ga>>2]=+g[ea>>2]+ +g[fa>>2];g[Ra>>2]=+g[ea>>2]-+g[fa>>2];g[na>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2];g[oa>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<2)>>2];g[pa>>2]=+g[na>>2]+ +g[oa>>2];g[xa>>2]=+g[na>>2]-+g[oa>>2];g[ha>>2]=+g[da>>2]+ +g[ga>>2];g[I>>2]=+g[Ua>>2]-+g[Ra>>2];g[M>>2]=+g[ma>>2]+ +g[pa>>2];g[Va>>2]=+g[Ra>>2]+ +g[Ua>>2];g[qa>>2]=+g[ma>>2]-+g[pa>>2];g[z>>2]=+g[da>>2]-+g[ga>>2];g[E>>2]=+g[wa>>2]-+g[xa>>2];g[ya>>2]=+g[wa>>2]+ +g[xa>>2];g[ia>>2]=+g[(c[m>>2]|0)+(c[r>>2]<<2)>>2];g[ja>>2]=+g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Ka>>2]=+g[ia>>2]+ +g[ja>>2];g[Wa>>2]=+g[ia>>2]-+g[ja>>2];g[Xa>>2]=+g[(c[n>>2]|0)+(c[r>>2]<<2)>>2];g[Ya>>2]=+g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2];g[Za>>2]=+g[Xa>>2]+ +g[Ya>>2];g[ta>>2]=+g[Xa>>2]-+g[Ya>>2];g[La>>2]=+g[c[o>>2]>>2];g[Ma>>2]=+g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[Na>>2]=+g[La>>2]+ +g[Ma>>2];g[$a>>2]=+g[La>>2]-+g[Ma>>2];g[ab>>2]=+g[c[p>>2]>>2];g[bb>>2]=+g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2];g[cb>>2]=+g[ab>>2]+ +g[bb>>2];g[ua>>2]=+g[bb>>2]-+g[ab>>2];g[Oa>>2]=+g[Ka>>2]+ +g[Na>>2];g[va>>2]=+g[ta>>2]+ +g[ua>>2];g[D>>2]=+g[Ka>>2]-+g[Na>>2];g[A>>2]=+g[ua>>2]-+g[ta>>2];g[hb>>2]=+g[Wa>>2]-+g[Za>>2];g[ka>>2]=+g[$a>>2]-+g[cb>>2];g[la>>2]=(+g[hb>>2]+ +g[ka>>2])*.7071067690849304;g[J>>2]=(+g[hb>>2]-+g[ka>>2])*.7071067690849304;g[_a>>2]=+g[Wa>>2]+ +g[Za>>2];g[db>>2]=+g[$a>>2]+ +g[cb>>2];g[eb>>2]=(+g[_a>>2]-+g[db>>2])*.7071067690849304;g[N>>2]=(+g[_a>>2]+ +g[db>>2])*.7071067690849304;g[Pa>>2]=+g[ha>>2]+ +g[Oa>>2];g[za>>2]=+g[va>>2]+ +g[ya>>2];g[fb>>2]=+g[Va>>2]+ +g[eb>>2];g[ra>>2]=+g[la>>2]+ +g[qa>>2];g[Qa>>2]=+g[c[q>>2]>>2];g[gb>>2]=+g[(c[q>>2]|0)+4>>2];g[sa>>2]=+g[Qa>>2]*+g[fb>>2]+ +g[gb>>2]*+g[ra>>2];g[Aa>>2]=+g[Qa>>2]*+g[ra>>2]-+g[gb>>2]*+g[fb>>2];g[c[m>>2]>>2]=+g[Pa>>2]-+g[sa>>2];g[c[n>>2]>>2]=+g[za>>2]+ +g[Aa>>2];g[c[o>>2]>>2]=+g[Pa>>2]+ +g[sa>>2];g[c[p>>2]>>2]=+g[Aa>>2]-+g[za>>2];g[T>>2]=+g[z>>2]-+g[A>>2];g[V>>2]=+g[E>>2]-+g[D>>2];g[S>>2]=+g[(c[q>>2]|0)+40>>2];g[U>>2]=+g[(c[q>>2]|0)+44>>2];g[W>>2]=+g[S>>2]*+g[T>>2]-+g[U>>2]*+g[V>>2];g[aa>>2]=+g[U>>2]*+g[T>>2]+ +g[S>>2]*+g[V>>2];g[Y>>2]=+g[I>>2]-+g[J>>2];g[_>>2]=+g[N>>2]+ +g[M>>2];g[X>>2]=+g[(c[q>>2]|0)+48>>2];g[Z>>2]=+g[(c[q>>2]|0)+52>>2];g[$>>2]=+g[X>>2]*+g[Y>>2]+ +g[Z>>2]*+g[_>>2];g[ba>>2]=+g[X>>2]*+g[_>>2]-+g[Z>>2]*+g[Y>>2];g[(c[m>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[W>>2]-+g[$>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[aa>>2]+ +g[ba>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[W>>2]+ +g[$>>2];g[(c[p>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ba>>2]-+g[aa>>2];g[Ca>>2]=+g[ha>>2]-+g[Oa>>2];g[Ea>>2]=+g[ya>>2]-+g[va>>2];g[Ba>>2]=+g[(c[q>>2]|0)+24>>2];g[Da>>2]=+g[(c[q>>2]|0)+28>>2];g[Fa>>2]=+g[Ba>>2]*+g[Ca>>2]-+g[Da>>2]*+g[Ea>>2];g[w>>2]=+g[Da>>2]*+g[Ca>>2]+ +g[Ba>>2]*+g[Ea>>2];g[Ha>>2]=+g[Va>>2]-+g[eb>>2];g[Ja>>2]=+g[qa>>2]-+g[la>>2];g[Ga>>2]=+g[(c[q>>2]|0)+32>>2];g[Ia>>2]=+g[(c[q>>2]|0)+36>>2];g[v>>2]=+g[Ga>>2]*+g[Ha>>2]+ +g[Ia>>2]*+g[Ja>>2];g[x>>2]=+g[Ga>>2]*+g[Ja>>2]-+g[Ia>>2]*+g[Ha>>2];g[(c[m>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Fa>>2]-+g[v>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[w>>2]+ +g[x>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Fa>>2]+ +g[v>>2];g[(c[p>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[x>>2]-+g[w>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[F>>2]=+g[D>>2]+ +g[E>>2];g[y>>2]=+g[(c[q>>2]|0)+8>>2];g[C>>2]=+g[(c[q>>2]|0)+12>>2];g[G>>2]=+g[y>>2]*+g[B>>2]-+g[C>>2]*+g[F>>2];g[Q>>2]=+g[C>>2]*+g[B>>2]+ +g[y>>2]*+g[F>>2];g[K>>2]=+g[I>>2]+ +g[J>>2];g[O>>2]=+g[M>>2]-+g[N>>2];g[H>>2]=+g[(c[q>>2]|0)+16>>2];g[L>>2]=+g[(c[q>>2]|0)+20>>2];g[P>>2]=+g[H>>2]*+g[K>>2]+ +g[L>>2]*+g[O>>2];g[R>>2]=+g[H>>2]*+g[O>>2]-+g[L>>2]*+g[K>>2];g[(c[m>>2]|0)+(c[r>>2]<<2)>>2]=+g[G>>2]-+g[P>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[Q>>2]+ +g[R>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[G>>2]+ +g[P>>2];g[(c[p>>2]|0)+(c[r>>2]<<2)>>2]=+g[R>>2]-+g[Q>>2];c[ib>>2]=(c[ib>>2]|0)+1;c[m>>2]=(c[m>>2]|0)+(c[t>>2]<<2);c[n>>2]=(c[n>>2]|0)+(c[t>>2]<<2);c[o>>2]=(c[o>>2]|0)+(0-(c[t>>2]|0)<<2);c[p>>2]=(c[p>>2]|0)+(0-(c[t>>2]|0)<<2);c[q>>2]=(c[q>>2]|0)+56;c[r>>2]=c[r>>2]^c[2998]}i=jb;return}function rv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,37,9832);i=b;return}function sv(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0;ba=i;i=i+192|0;n=ba+188|0;o=ba+184|0;p=ba+180|0;q=ba+176|0;r=ba+172|0;s=ba+168|0;t=ba+164|0;ca=ba+160|0;u=ba+156|0;v=ba+152|0;aa=ba+128|0;w=ba+124|0;T=ba+120|0;D=ba+116|0;V=ba+112|0;F=ba+108|0;U=ba+104|0;J=ba+100|0;X=ba+96|0;M=ba+92|0;S=ba+88|0;x=ba+84|0;y=ba+80|0;z=ba+76|0;A=ba+72|0;B=ba+68|0;C=ba+64|0;H=ba+60|0;I=ba+56|0;R=ba+52|0;K=ba+48|0;L=ba+44|0;Q=ba+40|0;N=ba+36|0;O=ba+32|0;G=ba+28|0;P=ba+24|0;E=ba+20|0;W=ba+16|0;_=ba+12|0;Z=ba+8|0;$=ba+4|0;Y=ba;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[ca>>2]=k;c[u>>2]=l;c[v>>2]=m;g[ba+148>>2]=.5;g[ba+144>>2]=1.9021130800247192;g[ba+140>>2]=1.1755704879760742;g[ba+136>>2]=2.0;g[ba+132>>2]=1.1180340051651;c[aa>>2]=c[ca>>2];while(1){if((c[aa>>2]|0)<=0)break;g[w>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[T>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[x>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[y>>2]=+g[c[p>>2]>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[A>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[B>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[C>>2]=+g[A>>2]+ +g[B>>2];g[D>>2]=+g[z>>2]+ +g[C>>2];g[V>>2]=+g[A>>2]-+g[B>>2];g[F>>2]=(+g[C>>2]-+g[z>>2])*1.1180340051651;g[U>>2]=+g[x>>2]-+g[y>>2];g[H>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[I>>2]=+g[c[q>>2]>>2];g[R>>2]=+g[H>>2]+ +g[I>>2];g[K>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[L>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[Q>>2]=+g[L>>2]+ +g[K>>2];g[J>>2]=+g[H>>2]-+g[I>>2];g[X>>2]=(+g[Q>>2]+ +g[R>>2])*1.1180340051651;g[M>>2]=+g[K>>2]-+g[L>>2];g[S>>2]=+g[Q>>2]-+g[R>>2];g[c[n>>2]>>2]=(+g[w>>2]+ +g[D>>2])*2.0;g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=(+g[S>>2]-+g[T>>2])*2.0;g[N>>2]=+g[J>>2]*1.1755704879760742-+g[M>>2]*1.9021130800247192;g[O>>2]=+g[M>>2]*1.1755704879760742+ +g[J>>2]*1.9021130800247192;g[E>>2]=+g[D>>2]*.5-+g[w>>2]*2.0;g[G>>2]=+g[E>>2]-+g[F>>2];g[P>>2]=+g[E>>2]+ +g[F>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[G>>2]+ +g[N>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[P>>2]+ +g[O>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[N>>2]-+g[G>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[O>>2]-+g[P>>2];g[W>>2]=+g[U>>2]*1.9021130800247192+ +g[V>>2]*1.1755704879760742;g[_>>2]=+g[V>>2]*1.9021130800247192-+g[U>>2]*1.1755704879760742;g[Y>>2]=+g[S>>2]*.5+ +g[T>>2]*2.0;g[Z>>2]=+g[X>>2]+ +g[Y>>2];g[$>>2]=+g[Y>>2]-+g[X>>2];g[c[o>>2]>>2]=-(+g[W>>2]+ +g[Z>>2]);g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[$>>2]-+g[_>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[W>>2]-+g[Z>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[_>>2]+ +g[$>>2];c[aa>>2]=(c[aa>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=ba;return}function tv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,38,9880);i=b;return}function uv(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0;la=i;i=i+240|0;n=la+224|0;o=la+220|0;p=la+216|0;q=la+212|0;r=la+208|0;s=la+204|0;t=la+200|0;ma=la+196|0;u=la+192|0;v=la+188|0;ka=la+168|0;A=la+164|0;ga=la+160|0;N=la+156|0;Q=la+152|0;ha=la+148|0;ca=la+144|0;M=la+140|0;F=la+136|0;S=la+132|0;V=la+128|0;ja=la+124|0;$=la+120|0;da=la+116|0;ea=la+112|0;w=la+108|0;x=la+104|0;y=la+100|0;z=la+96|0;aa=la+92|0;O=la+88|0;P=la+84|0;ba=la+80|0;B=la+76|0;C=la+72|0;D=la+68|0;E=la+64|0;_=la+60|0;T=la+56|0;U=la+52|0;Z=la+48|0;R=la+44|0;W=la+40|0;fa=la+36|0;ia=la+32|0;G=la+28|0;H=la+24|0;X=la+20|0;Y=la+16|0;K=la+12|0;I=la+8|0;J=la+4|0;L=la;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[ma>>2]=k;c[u>>2]=l;c[v>>2]=m;g[la+184>>2]=1.4142135381698608;g[la+180>>2]=2.0;g[la+176>>2]=.5;g[la+172>>2]=.8660253882408142;c[ka>>2]=c[ma>>2];while(1){if((c[ka>>2]|0)<=0)break;g[w>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[x>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[y>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[A>>2]=+g[w>>2]+ +g[z>>2];g[ga>>2]=(+g[x>>2]-+g[y>>2])*.8660253882408142;g[N>>2]=+g[w>>2]-+g[z>>2]*.5;g[aa>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[O>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[P>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[ba>>2]=+g[P>>2]-+g[O>>2];g[Q>>2]=(+g[O>>2]+ +g[P>>2])*.8660253882408142;g[ha>>2]=+g[ba>>2]*.5+ +g[aa>>2];g[ca>>2]=+g[aa>>2]-+g[ba>>2];g[B>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[C>>2]=+g[c[p>>2]>>2];g[D>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[E>>2]=+g[C>>2]+ +g[D>>2];g[M>>2]=+g[B>>2]+ +g[E>>2];g[F>>2]=(+g[C>>2]-+g[D>>2])*.8660253882408142;g[S>>2]=+g[B>>2]-+g[E>>2]*.5;g[_>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[T>>2]=+g[c[q>>2]>>2];g[U>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[Z>>2]=+g[U>>2]-+g[T>>2];g[V>>2]=(+g[T>>2]+ +g[U>>2])*.8660253882408142;g[ja>>2]=+g[Z>>2]*.5+ +g[_>>2];g[$>>2]=+g[Z>>2]-+g[_>>2];g[c[n>>2]>>2]=(+g[A>>2]+ +g[M>>2])*2.0;g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=(+g[ca>>2]+ +g[$>>2])*2.0;g[da>>2]=+g[$>>2]-+g[ca>>2];g[ea>>2]=+g[A>>2]-+g[M>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=(+g[da>>2]-+g[ea>>2])*1.4142135381698608;g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=(+g[ea>>2]+ +g[da>>2])*1.4142135381698608;g[R>>2]=+g[N>>2]-+g[Q>>2];g[W>>2]=+g[S>>2]+ +g[V>>2];g[fa>>2]=+g[R>>2]-+g[W>>2];g[ia>>2]=+g[ga>>2]+ +g[ha>>2];g[G>>2]=+g[ja>>2]-+g[F>>2];g[H>>2]=+g[ia>>2]+ +g[G>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=-((+g[R>>2]+ +g[W>>2])*2.0);g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=(+g[G>>2]-+g[ia>>2])*2.0;g[c[o>>2]>>2]=(+g[fa>>2]-+g[H>>2])*1.4142135381698608;g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=(+g[fa>>2]+ +g[H>>2])*1.4142135381698608;g[X>>2]=+g[N>>2]+ +g[Q>>2];g[Y>>2]=+g[S>>2]-+g[V>>2];g[K>>2]=+g[Y>>2]-+g[X>>2];g[I>>2]=+g[F>>2]+ +g[ja>>2];g[J>>2]=+g[ha>>2]-+g[ga>>2];g[L>>2]=+g[J>>2]+ +g[I>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=(+g[X>>2]+ +g[Y>>2])*2.0;g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=(+g[K>>2]+ +g[L>>2])*1.4142135381698608;g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=(+g[I>>2]-+g[J>>2])*2.0;g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=(+g[K>>2]-+g[L>>2])*1.4142135381698608;c[ka>>2]=(c[ka>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=la;return}function vv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,39,9928);i=b;return}function wv(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0;Ha=i;i=i+368|0;n=Ha+352|0;o=Ha+348|0;p=Ha+344|0;q=Ha+340|0;r=Ha+336|0;s=Ha+332|0;t=Ha+328|0;Ia=Ha+324|0;u=Ha+320|0;v=Ha+316|0;Ga=Ha+256|0;Ba=Ha+252|0;L=Ha+248|0;D=Ha+244|0;ya=Ha+240|0;K=Ha+236|0;E=Ha+232|0;la=Ha+228|0;ca=Ha+224|0;S=Ha+220|0;ja=Ha+216|0;ma=Ha+212|0;X=Ha+208|0;ua=Ha+204|0;V=Ha+200|0;I=Ha+196|0;W=Ha+192|0;Y=Ha+188|0;za=Ha+184|0;Aa=Ha+180|0;z=Ha+176|0;ka=Ha+172|0;y=Ha+168|0;w=Ha+164|0;C=Ha+160|0;wa=Ha+156|0;A=Ha+152|0;B=Ha+148|0;xa=Ha+144|0;ia=Ha+140|0;R=Ha+136|0;H=Ha+132|0;Q=Ha+128|0;ga=Ha+124|0;ha=Ha+120|0;F=Ha+116|0;G=Ha+112|0;ta=Ha+108|0;U=Ha+104|0;qa=Ha+100|0;T=Ha+96|0;ra=Ha+92|0;sa=Ha+88|0;oa=Ha+84|0;pa=Ha+80|0;M=Ha+76|0;O=Ha+72|0;J=Ha+68|0;N=Ha+64|0;ea=Ha+60|0;x=Ha+56|0;Fa=Ha+52|0;da=Ha+48|0;ba=Ha+44|0;fa=Ha+40|0;Ca=Ha+36|0;Ea=Ha+32|0;va=Ha+28|0;Da=Ha+24|0;_=Ha+20|0;aa=Ha+16|0;na=Ha+12|0;Z=Ha+8|0;P=Ha+4|0;$=Ha;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[Ia>>2]=k;c[u>>2]=l;c[v>>2]=m;g[Ha+312>>2]=1.7320507764816284;g[Ha+308>>2]=.4330126941204071;g[Ha+304>>2]=.9682458639144897;g[Ha+300>>2]=.5877852439880371;g[Ha+296>>2]=.9510565400123596;g[Ha+292>>2]=.25;g[Ha+288>>2]=1.6472781896591187;g[Ha+284>>2]=1.0180739164352417;g[Ha+280>>2]=.55901700258255;g[Ha+276>>2]=.5;g[Ha+272>>2]=2.0;g[Ha+268>>2]=1.1180340051651;g[Ha+264>>2]=1.1755704879760742;g[Ha+260>>2]=1.9021130800247192;c[Ga>>2]=c[Ia>>2];while(1){if((c[Ga>>2]|0)<=0)break;g[za>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[Aa>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[Ba>>2]=+g[za>>2]*1.9021130800247192+ +g[Aa>>2]*1.1755704879760742;g[L>>2]=+g[Aa>>2]*1.9021130800247192-+g[za>>2]*1.1755704879760742;g[w>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2];g[A>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[B>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[C>>2]=+g[A>>2]+ +g[B>>2];g[wa>>2]=(+g[A>>2]-+g[B>>2])*1.1180340051651;g[D>>2]=+g[C>>2]*2.0+ +g[w>>2];g[xa>>2]=+g[w>>2]-+g[C>>2]*.5;g[ya>>2]=+g[wa>>2]+ +g[xa>>2];g[K>>2]=+g[xa>>2]-+g[wa>>2];g[E>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[ga>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[ha>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2];g[ia>>2]=+g[ga>>2]+ +g[ha>>2];g[R>>2]=+g[ga>>2]-+g[ha>>2];g[F>>2]=+g[c[p>>2]>>2];g[G>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[H>>2]=+g[F>>2]+ +g[G>>2];g[Q>>2]=+g[F>>2]-+g[G>>2];g[la>>2]=(+g[H>>2]-+g[ia>>2])*.55901700258255;g[ca>>2]=+g[Q>>2]*1.0180739164352417-+g[R>>2]*1.6472781896591187;g[S>>2]=+g[Q>>2]*1.6472781896591187+ +g[R>>2]*1.0180739164352417;g[ja>>2]=+g[H>>2]+ +g[ia>>2];g[ma>>2]=+g[E>>2]-+g[ja>>2]*.25;g[X>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[ra>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[sa>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2];g[ta>>2]=+g[ra>>2]-+g[sa>>2];g[U>>2]=+g[ra>>2]+ +g[sa>>2];g[oa>>2]=+g[c[q>>2]>>2];g[pa>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[qa>>2]=+g[oa>>2]+ +g[pa>>2];g[T>>2]=+g[oa>>2]-+g[pa>>2];g[ua>>2]=+g[qa>>2]*.9510565400123596+ +g[ta>>2]*.5877852439880371;g[V>>2]=(+g[T>>2]-+g[U>>2])*.9682458639144897;g[I>>2]=+g[ta>>2]*.9510565400123596-+g[qa>>2]*.5877852439880371;g[W>>2]=+g[T>>2]+ +g[U>>2];g[Y>>2]=+g[W>>2]*.4330126941204071+ +g[X>>2]*1.7320507764816284;g[z>>2]=(+g[X>>2]-+g[W>>2])*1.7320507764816284;g[ka>>2]=+g[E>>2]+ +g[ja>>2];g[y>>2]=+g[ka>>2]-+g[D>>2];g[c[n>>2]>>2]=+g[ka>>2]*2.0+ +g[D>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[z>>2]-+g[y>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[y>>2]+ +g[z>>2];g[M>>2]=+g[K>>2]-+g[L>>2];g[O>>2]=+g[K>>2]+ +g[L>>2];g[Fa>>2]=+g[ma>>2]-+g[la>>2];g[J>>2]=+g[Fa>>2]+ +g[I>>2];g[N>>2]=+g[I>>2]-+g[Fa>>2];g[da>>2]=+g[V>>2]+ +g[Y>>2];g[ea>>2]=+g[ca>>2]-+g[da>>2];g[x>>2]=+g[ca>>2]+ +g[da>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[J>>2]*2.0+ +g[M>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[N>>2]*2.0-+g[O>>2];g[ba>>2]=+g[M>>2]-+g[J>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[ba>>2]+ +g[ea>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ea>>2]-+g[ba>>2];g[fa>>2]=+g[N>>2]+ +g[O>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[fa>>2]-+g[x>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=-(+g[fa>>2]+ +g[x>>2]);g[Ca>>2]=+g[ya>>2]-+g[Ba>>2];g[Ea>>2]=+g[ya>>2]+ +g[Ba>>2];g[na>>2]=+g[la>>2]+ +g[ma>>2];g[va>>2]=+g[na>>2]+ +g[ua>>2];g[Da>>2]=+g[na>>2]-+g[ua>>2];g[Z>>2]=+g[V>>2]-+g[Y>>2];g[_>>2]=+g[S>>2]+ +g[Z>>2];g[aa>>2]=+g[Z>>2]-+g[S>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=-(+g[va>>2]*2.0+ +g[Ca>>2]);g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Da>>2]*2.0+ +g[Ea>>2];g[P>>2]=+g[Da>>2]-+g[Ea>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[P>>2]-+g[_>>2];g[c[o>>2]>>2]=+g[P>>2]+ +g[_>>2];g[$>>2]=+g[Ca>>2]-+g[va>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[$>>2]-+g[aa>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[$>>2]+ +g[aa>>2];c[Ga>>2]=(c[Ga>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=Ha;return}function xv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,40,9976);i=b;return}function yv(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0;Ja=i;i=i+352|0;n=Ja+340|0;o=Ja+336|0;p=Ja+332|0;q=Ja+328|0;r=Ja+324|0;s=Ja+320|0;t=Ja+316|0;Ka=Ja+312|0;u=Ja+308|0;v=Ja+304|0;Ia=Ja+264|0;H=Ja+260|0;ea=Ja+256|0;A=Ja+252|0;ra=Ja+248|0;N=Ja+244|0;U=Ja+240|0;Z=Ja+236|0;R=Ja+232|0;ma=Ja+228|0;fa=Ja+224|0;z=Ja+220|0;wa=Ja+216|0;Ba=Ja+212|0;Fa=Ja+208|0;aa=Ja+204|0;Ea=Ja+200|0;ba=Ja+196|0;ga=Ja+192|0;D=Ja+188|0;na=Ja+184|0;M=Ja+180|0;da=Ja+176|0;G=Ja+172|0;Ha=Ja+168|0;qa=Ja+164|0;ca=Ja+160|0;w=Ja+156|0;C=Ja+152|0;K=Ja+148|0;L=Ja+144|0;E=Ja+140|0;F=Ja+136|0;oa=Ja+132|0;pa=Ja+128|0;ia=Ja+124|0;sa=Ja+120|0;va=Ja+116|0;$=Ja+112|0;la=Ja+108|0;xa=Ja+104|0;Aa=Ja+100|0;_=Ja+96|0;I=Ja+92|0;J=Ja+88|0;ta=Ja+84|0;ua=Ja+80|0;ja=Ja+76|0;ka=Ja+72|0;ya=Ja+68|0;za=Ja+64|0;y=Ja+60|0;B=Ja+56|0;ha=Ja+52|0;x=Ja+48|0;T=Ja+44|0;X=Ja+40|0;W=Ja+36|0;Y=Ja+32|0;S=Ja+28|0;V=Ja+24|0;Da=Ja+20|0;P=Ja+16|0;O=Ja+12|0;Q=Ja+8|0;Ca=Ja+4|0;Ga=Ja;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[Ka>>2]=k;c[u>>2]=l;c[v>>2]=m;g[Ja+300>>2]=1.9615705013275146;g[Ja+296>>2]=.39018064737319946;g[Ja+292>>2]=1.111140489578247;g[Ja+288>>2]=1.662939190864563;g[Ja+284>>2]=.7071067690849304;g[Ja+280>>2]=1.4142135381698608;g[Ja+276>>2]=.7653668522834778;g[Ja+272>>2]=1.8477590084075928;g[Ja+268>>2]=2.0;c[Ia>>2]=c[Ka>>2];while(1){if((c[Ia>>2]|0)<=0)break;g[w>>2]=+g[c[p>>2]>>2];g[C>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2];g[D>>2]=+g[w>>2]+ +g[C>>2];g[na>>2]=+g[w>>2]-+g[C>>2];g[K>>2]=+g[c[q>>2]>>2];g[L>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2];g[M>>2]=+g[K>>2]+ +g[L>>2];g[da>>2]=+g[L>>2]-+g[K>>2];g[E>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[F>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[G>>2]=+g[E>>2]+ +g[F>>2];g[Ha>>2]=+g[E>>2]-+g[F>>2];g[oa>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[pa>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[qa>>2]=+g[oa>>2]+ +g[pa>>2];g[ca>>2]=+g[oa>>2]-+g[pa>>2];g[H>>2]=+g[D>>2]+ +g[G>>2];g[ea>>2]=+g[ca>>2]+ +g[da>>2];g[A>>2]=+g[da>>2]-+g[ca>>2];g[ra>>2]=+g[na>>2]-+g[qa>>2];g[N>>2]=+g[Ha>>2]+ +g[M>>2];g[U>>2]=+g[Ha>>2]-+g[M>>2];g[Z>>2]=+g[D>>2]-+g[G>>2];g[R>>2]=+g[na>>2]+ +g[qa>>2];g[I>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[J>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[ia>>2]=+g[I>>2]+ +g[J>>2];g[sa>>2]=+g[I>>2]-+g[J>>2];g[ta>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[ua>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[va>>2]=+g[ta>>2]+ +g[ua>>2];g[$>>2]=+g[ta>>2]-+g[ua>>2];g[ja>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[ka>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2];g[la>>2]=+g[ja>>2]+ +g[ka>>2];g[xa>>2]=+g[ja>>2]-+g[ka>>2];g[ya>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[za>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2];g[Aa>>2]=+g[ya>>2]+ +g[za>>2];g[_>>2]=+g[za>>2]-+g[ya>>2];g[ma>>2]=+g[ia>>2]+ +g[la>>2];g[fa>>2]=+g[ia>>2]-+g[la>>2];g[z>>2]=+g[$>>2]+ +g[_>>2];g[wa>>2]=+g[sa>>2]-+g[va>>2];g[Ba>>2]=+g[xa>>2]-+g[Aa>>2];g[Fa>>2]=+g[xa>>2]+ +g[Aa>>2];g[aa>>2]=+g[_>>2]-+g[$>>2];g[Ea>>2]=+g[sa>>2]+ +g[va>>2];g[c[n>>2]>>2]=(+g[H>>2]+ +g[ma>>2])*2.0;g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=(+g[A>>2]-+g[z>>2])*2.0;g[ba>>2]=+g[Z>>2]+ +g[aa>>2];g[ga>>2]=+g[ea>>2]-+g[fa>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[ba>>2]*1.8477590084075928+ +g[ga>>2]*.7653668522834778;g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[ga>>2]*1.8477590084075928-+g[ba>>2]*.7653668522834778;g[y>>2]=+g[H>>2]-+g[ma>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=(+g[y>>2]+ +g[B>>2])*1.4142135381698608;g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=(+g[B>>2]-+g[y>>2])*1.4142135381698608;g[ha>>2]=+g[Z>>2]-+g[aa>>2];g[x>>2]=+g[fa>>2]+ +g[ea>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[ha>>2]*.7653668522834778+ +g[x>>2]*1.8477590084075928;g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[x>>2]*.7653668522834778-+g[ha>>2]*1.8477590084075928;g[S>>2]=(+g[Ea>>2]+ +g[Fa>>2])*.7071067690849304;g[T>>2]=+g[R>>2]-+g[S>>2];g[X>>2]=+g[R>>2]+ +g[S>>2];g[V>>2]=(+g[wa>>2]-+g[Ba>>2])*.7071067690849304;g[W>>2]=+g[U>>2]-+g[V>>2];g[Y>>2]=+g[V>>2]+ +g[U>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[T>>2]*1.662939190864563+ +g[W>>2]*1.111140489578247;g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Y>>2]*.39018064737319946-+g[X>>2]*1.9615705013275146;g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[W>>2]*1.662939190864563-+g[T>>2]*1.111140489578247;g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[X>>2]*.39018064737319946+ +g[Y>>2]*1.9615705013275146;g[Ca>>2]=(+g[wa>>2]+ +g[Ba>>2])*.7071067690849304;g[Da>>2]=+g[ra>>2]+ +g[Ca>>2];g[P>>2]=+g[ra>>2]-+g[Ca>>2];g[Ga>>2]=(+g[Ea>>2]-+g[Fa>>2])*.7071067690849304;g[O>>2]=+g[Ga>>2]+ +g[N>>2];g[Q>>2]=+g[Ga>>2]-+g[N>>2];g[c[o>>2]>>2]=+g[Da>>2]*1.9615705013275146-+g[O>>2]*.39018064737319946;g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Q>>2]*1.111140489578247-+g[P>>2]*1.662939190864563;g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=-(+g[Da>>2]*.39018064737319946+ +g[O>>2]*1.9615705013275146);g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[P>>2]*1.111140489578247+ +g[Q>>2]*1.662939190864563;c[Ia>>2]=(c[Ia>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=Ja;return}function zv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,41,10024);i=b;return}function Av(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0;jb=i;i=i+448|0;n=jb+440|0;o=jb+436|0;p=jb+432|0;q=jb+428|0;r=jb+424|0;s=jb+420|0;t=jb+416|0;kb=jb+412|0;u=jb+408|0;v=jb+404|0;ib=jb+376|0;w=jb+372|0;Ta=jb+368|0;R=jb+364|0;A=jb+360|0;ia=jb+356|0;Ua=jb+352|0;E=jb+348|0;ab=jb+344|0;D=jb+340|0;sa=jb+336|0;F=jb+332|0;G=jb+328|0;Ka=jb+324|0;cb=jb+320|0;P=jb+316|0;Ca=jb+312|0;Ra=jb+308|0;db=jb+304|0;Ha=jb+300|0;na=jb+296|0;Fa=jb+292|0;va=jb+288|0;Ga=jb+284|0;Ia=jb+280|0;ha=jb+276|0;z=jb+272|0;ea=jb+268|0;y=jb+264|0;fa=jb+260|0;ga=jb+256|0;ca=jb+252|0;da=jb+248|0;$a=jb+244|0;C=jb+240|0;Ya=jb+236|0;B=jb+232|0;Za=jb+228|0;_a=jb+224|0;Wa=jb+220|0;Xa=jb+216|0;Qa=jb+212|0;Ba=jb+208|0;Na=jb+204|0;Aa=jb+200|0;Oa=jb+196|0;Pa=jb+192|0;La=jb+188|0;Ma=jb+184|0;ma=jb+180|0;Ea=jb+176|0;hb=jb+172|0;Da=jb+168|0;ka=jb+164|0;la=jb+160|0;fb=jb+156|0;gb=jb+152|0;ja=jb+148|0;Sa=jb+144|0;ba=jb+140|0;_=jb+136|0;$=jb+132|0;aa=jb+128|0;ta=jb+124|0;ya=jb+120|0;T=jb+116|0;X=jb+112|0;wa=jb+108|0;xa=jb+104|0;Q=jb+100|0;Y=jb+96|0;ra=jb+92|0;S=jb+88|0;ua=jb+84|0;O=jb+80|0;W=jb+76|0;Z=jb+72|0;U=jb+68|0;V=jb+64|0;bb=jb+60|0;qa=jb+56|0;I=jb+52|0;M=jb+48|0;oa=jb+44|0;pa=jb+40|0;x=jb+36|0;L=jb+32|0;Va=jb+28|0;H=jb+24|0;eb=jb+20|0;Ja=jb+16|0;K=jb+12|0;N=jb+8|0;za=jb+4|0;J=jb;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[kb>>2]=k;c[u>>2]=l;c[v>>2]=m;g[jb+400>>2]=1.4142135381698608;g[jb+396>>2]=2.0;g[jb+392>>2]=.25;g[jb+388>>2]=.9510565400123596;g[jb+384>>2]=.5877852439880371;g[jb+380>>2]=.55901700258255;c[ib>>2]=c[kb>>2];while(1){if((c[ib>>2]|0)<=0)break;g[w>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[fa>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2];g[ga>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[ha>>2]=+g[fa>>2]+ +g[ga>>2];g[z>>2]=+g[fa>>2]-+g[ga>>2];g[ca>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2];g[da>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[ea>>2]=+g[ca>>2]+ +g[da>>2];g[y>>2]=+g[ca>>2]-+g[da>>2];g[Ta>>2]=(+g[ea>>2]-+g[ha>>2])*.55901700258255;g[R>>2]=+g[y>>2]*.5877852439880371-+g[z>>2]*.9510565400123596;g[A>>2]=+g[y>>2]*.9510565400123596+ +g[z>>2]*.5877852439880371;g[ia>>2]=+g[ea>>2]+ +g[ha>>2];g[Ua>>2]=+g[w>>2]-+g[ia>>2]*.25;g[E>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[Za>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[_a>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2];g[$a>>2]=+g[Za>>2]-+g[_a>>2];g[C>>2]=+g[Za>>2]+ +g[_a>>2];g[Wa>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2];g[Xa>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[Ya>>2]=+g[Wa>>2]+ +g[Xa>>2];g[B>>2]=+g[Wa>>2]-+g[Xa>>2];g[ab>>2]=+g[Ya>>2]*.9510565400123596+ +g[$a>>2]*.5877852439880371;g[D>>2]=(+g[B>>2]+ +g[C>>2])*.55901700258255;g[sa>>2]=+g[Ya>>2]*.5877852439880371-+g[$a>>2]*.9510565400123596;g[F>>2]=+g[B>>2]-+g[C>>2];g[G>>2]=+g[E>>2]-+g[F>>2]*.25;g[Ka>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2];g[Oa>>2]=+g[c[p>>2]>>2];g[Pa>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[Qa>>2]=+g[Oa>>2]+ +g[Pa>>2];g[Ba>>2]=+g[Oa>>2]-+g[Pa>>2];g[La>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[Ma>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2];g[Na>>2]=+g[La>>2]+ +g[Ma>>2];g[Aa>>2]=+g[La>>2]-+g[Ma>>2];g[cb>>2]=(+g[Na>>2]-+g[Qa>>2])*.55901700258255;g[P>>2]=+g[Aa>>2]*.5877852439880371-+g[Ba>>2]*.9510565400123596;g[Ca>>2]=+g[Aa>>2]*.9510565400123596+ +g[Ba>>2]*.5877852439880371;g[Ra>>2]=+g[Na>>2]+ +g[Qa>>2];g[db>>2]=+g[Ka>>2]-+g[Ra>>2]*.25;g[Ha>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2];g[ka>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[la>>2]=+g[c[q>>2]>>2];g[ma>>2]=+g[ka>>2]-+g[la>>2];g[Ea>>2]=+g[la>>2]+ +g[ka>>2];g[fb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[gb>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2];g[hb>>2]=+g[fb>>2]+ +g[gb>>2];g[Da>>2]=+g[gb>>2]-+g[fb>>2];g[na>>2]=+g[hb>>2]*.9510565400123596+ +g[ma>>2]*.5877852439880371;g[Fa>>2]=(+g[Da>>2]-+g[Ea>>2])*.55901700258255;g[va>>2]=+g[ma>>2]*.9510565400123596-+g[hb>>2]*.5877852439880371;g[Ga>>2]=+g[Da>>2]+ +g[Ea>>2];g[Ia>>2]=+g[Ga>>2]*.25+ +g[Ha>>2];g[ja>>2]=+g[w>>2]+ +g[ia>>2];g[Sa>>2]=+g[Ka>>2]+ +g[Ra>>2];g[ba>>2]=+g[ja>>2]-+g[Sa>>2];g[_>>2]=+g[F>>2]+ +g[E>>2];g[$>>2]=+g[Ha>>2]-+g[Ga>>2];g[aa>>2]=+g[_>>2]+ +g[$>>2];g[c[n>>2]>>2]=(+g[ja>>2]+ +g[Sa>>2])*2.0;g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=(+g[$>>2]-+g[_>>2])*2.0;g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=(+g[aa>>2]-+g[ba>>2])*1.4142135381698608;g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=(+g[ba>>2]+ +g[aa>>2])*1.4142135381698608;g[ra>>2]=+g[Ua>>2]-+g[Ta>>2];g[ta>>2]=+g[ra>>2]+ +g[sa>>2];g[ya>>2]=+g[ra>>2]-+g[sa>>2];g[S>>2]=+g[G>>2]-+g[D>>2];g[T>>2]=+g[R>>2]+ +g[S>>2];g[X>>2]=+g[S>>2]-+g[R>>2];g[ua>>2]=+g[db>>2]-+g[cb>>2];g[wa>>2]=+g[ua>>2]+ +g[va>>2];g[xa>>2]=+g[va>>2]-+g[ua>>2];g[O>>2]=+g[Fa>>2]+ +g[Ia>>2];g[Q>>2]=+g[O>>2]-+g[P>>2];g[Y>>2]=+g[P>>2]+ +g[O>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=(+g[ta>>2]+ +g[wa>>2])*2.0;g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=(+g[xa>>2]-+g[ya>>2])*2.0;g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=(+g[Y>>2]-+g[X>>2])*2.0;g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=(+g[Q>>2]-+g[T>>2])*2.0;g[W>>2]=+g[wa>>2]-+g[ta>>2];g[Z>>2]=+g[X>>2]+ +g[Y>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=(+g[W>>2]-+g[Z>>2])*1.4142135381698608;g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=(+g[W>>2]+ +g[Z>>2])*1.4142135381698608;g[U>>2]=+g[ya>>2]+ +g[xa>>2];g[V>>2]=+g[T>>2]+ +g[Q>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=(+g[U>>2]-+g[V>>2])*1.4142135381698608;g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=(+g[U>>2]+ +g[V>>2])*1.4142135381698608;g[Va>>2]=+g[Ta>>2]+ +g[Ua>>2];g[bb>>2]=+g[Va>>2]-+g[ab>>2];g[qa>>2]=+g[Va>>2]+ +g[ab>>2];g[H>>2]=+g[D>>2]+ +g[G>>2];g[I>>2]=+g[A>>2]+ +g[H>>2];g[M>>2]=+g[H>>2]-+g[A>>2];g[eb>>2]=+g[cb>>2]+ +g[db>>2];g[oa>>2]=+g[eb>>2]+ +g[na>>2];g[pa>>2]=+g[na>>2]-+g[eb>>2];g[Ja>>2]=+g[Fa>>2]-+g[Ia>>2];g[x>>2]=+g[Ca>>2]+ +g[Ja>>2];g[L>>2]=+g[Ja>>2]-+g[Ca>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=(+g[bb>>2]+ +g[oa>>2])*2.0;g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=(+g[pa>>2]-+g[qa>>2])*2.0;g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=(+g[M>>2]+ +g[L>>2])*2.0;g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=(+g[I>>2]+ +g[x>>2])*2.0;g[K>>2]=+g[qa>>2]+ +g[pa>>2];g[N>>2]=+g[L>>2]-+g[M>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=(+g[K>>2]+ +g[N>>2])*1.4142135381698608;g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=(+g[N>>2]-+g[K>>2])*1.4142135381698608;g[za>>2]=+g[bb>>2]-+g[oa>>2];g[J>>2]=+g[x>>2]-+g[I>>2];g[c[o>>2]>>2]=(+g[za>>2]+ +g[J>>2])*1.4142135381698608;g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=(+g[J>>2]-+g[za>>2])*1.4142135381698608;c[ib>>2]=(c[ib>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=jb;return}function Bv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,42,10072);i=b;return}function Cv(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0;nc=i;i=i+736|0;n=nc+732|0;o=nc+728|0;p=nc+724|0;q=nc+720|0;r=nc+716|0;s=nc+712|0;t=nc+708|0;oc=nc+704|0;u=nc+700|0;v=nc+696|0;mc=nc+608|0;Gb=nc+604|0;V=nc+600|0;jb=nc+596|0;Db=nc+592|0;U=nc+588|0;wb=nc+584|0;vb=nc+580|0;Sb=nc+576|0;E=nc+572|0;Xa=nc+568|0;Da=nc+564|0;aa=nc+560|0;x=nc+556|0;Wa=nc+552|0;Ca=nc+548|0;Z=nc+544|0;lc=nc+540|0;kc=nc+536|0;$b=nc+532|0;ra=nc+528|0;Ua=nc+524|0;I=nc+520|0;Ia=nc+516|0;ka=nc+512|0;Ta=nc+508|0;H=nc+504|0;ea=nc+500|0;Eb=nc+496|0;Fb=nc+492|0;w=nc+488|0;ib=nc+484|0;Bb=nc+480|0;Fa=nc+476|0;hb=nc+472|0;Cb=nc+468|0;kb=nc+464|0;Rb=nc+460|0;C=nc+456|0;Ib=nc+452|0;B=nc+448|0;y=nc+444|0;Lb=nc+440|0;Mb=nc+436|0;lb=nc+432|0;mb=nc+428|0;nb=nc+424|0;Ob=nc+420|0;Pb=nc+416|0;Qb=nc+412|0;pb=nc+408|0;qb=nc+404|0;rb=nc+400|0;sb=nc+396|0;tb=nc+392|0;ub=nc+388|0;D=nc+384|0;_=nc+380|0;A=nc+376|0;$=nc+372|0;z=nc+368|0;Nb=nc+364|0;Y=nc+360|0;Kb=nc+356|0;X=nc+352|0;Jb=nc+348|0;Tb=nc+344|0;_b=nc+340|0;pa=nc+336|0;G=nc+332|0;oa=nc+328|0;la=nc+324|0;ha=nc+320|0;ia=nc+316|0;Ub=nc+312|0;Vb=nc+308|0;Wb=nc+304|0;Xb=nc+300|0;Yb=nc+296|0;Zb=nc+292|0;ec=nc+288|0;fc=nc+284|0;gc=nc+280|0;hc=nc+276|0;ic=nc+272|0;jc=nc+268|0;qa=nc+264|0;Ga=nc+260|0;na=nc+256|0;Ha=nc+252|0;ma=nc+248|0;ja=nc+244|0;da=nc+240|0;ga=nc+236|0;ca=nc+232|0;fa=nc+228|0;cc=nc+224|0;ac=nc+220|0;bc=nc+216|0;yb=nc+212|0;Ab=nc+208|0;ob=nc+204|0;xb=nc+200|0;zb=nc+196|0;dc=nc+192|0;eb=nc+188|0;gb=nc+184|0;_a=nc+180|0;Za=nc+176|0;$a=nc+172|0;ab=nc+168|0;fb=nc+164|0;bb=nc+160|0;cb=nc+156|0;db=nc+152|0;Va=nc+148|0;Ya=nc+144|0;za=nc+140|0;Ba=nc+136|0;Hb=nc+132|0;ta=nc+128|0;ua=nc+124|0;va=nc+120|0;Aa=nc+116|0;wa=nc+112|0;xa=nc+108|0;ya=nc+104|0;F=nc+100|0;sa=nc+96|0;O=nc+92|0;S=nc+88|0;L=nc+84|0;K=nc+80|0;P=nc+76|0;Q=nc+72|0;T=nc+68|0;R=nc+64|0;M=nc+60|0;N=nc+56|0;Ea=nc+52|0;J=nc+48|0;Qa=nc+44|0;Sa=nc+40|0;W=nc+36|0;Ka=nc+32|0;La=nc+28|0;Ma=nc+24|0;Ra=nc+20|0;Na=nc+16|0;Oa=nc+12|0;Pa=nc+8|0;ba=nc+4|0;Ja=nc;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[oc>>2]=k;c[u>>2]=l;c[v>>2]=m;g[nc+692>>2]=.9685831665992737;g[nc+688>>2]=.24868988990783691;g[nc+684>>2]=.6845471262931824;g[nc+680>>2]=.728968620300293;g[nc+676>>2]=.06279052048921585;g[nc+672>>2]=.9980267286300659;g[nc+668>>2]=.8763066530227661;g[nc+664>>2]=.4817536771297455;g[nc+660>>2]=.5358268022537231;g[nc+656>>2]=.8443279266357422;g[nc+652>>2]=.9048270583152771;g[nc+648>>2]=.4257792830467224;g[nc+644>>2]=.25;g[nc+640>>2]=.9510565400123596;g[nc+636>>2]=.5877852439880371;g[nc+632>>2]=.55901700258255;g[nc+628>>2]=.5;g[nc+624>>2]=2.0;g[nc+620>>2]=1.1180340051651;g[nc+616>>2]=1.1755704879760742;g[nc+612>>2]=1.9021130800247192;c[mc>>2]=c[oc>>2];while(1){if((c[mc>>2]|0)<=0)break;g[Eb>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[Fb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2];g[Gb>>2]=+g[Eb>>2]*1.9021130800247192-+g[Fb>>2]*1.1755704879760742;g[V>>2]=+g[Fb>>2]*1.9021130800247192+ +g[Eb>>2]*1.1755704879760742;g[w>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*12<<2)>>2];g[Fa>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2];g[hb>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[ib>>2]=+g[Fa>>2]+ +g[hb>>2];g[Bb>>2]=(+g[hb>>2]-+g[Fa>>2])*1.1180340051651;g[jb>>2]=+g[ib>>2]*2.0+ +g[w>>2];g[Cb>>2]=+g[ib>>2]*.5-+g[w>>2];g[Db>>2]=+g[Bb>>2]-+g[Cb>>2];g[U>>2]=+g[Cb>>2]+ +g[Bb>>2];g[kb>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*11<<2)>>2];g[wb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*11<<2)>>2];g[lb>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2];g[mb>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2];g[nb>>2]=+g[lb>>2]+ +g[mb>>2];g[Ob>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[Pb>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[Qb>>2]=+g[Ob>>2]+ +g[Pb>>2];g[Rb>>2]=+g[nb>>2]+ +g[Qb>>2];g[C>>2]=+g[Ob>>2]-+g[Pb>>2];g[Ib>>2]=(+g[Qb>>2]-+g[nb>>2])*.55901700258255;g[B>>2]=+g[mb>>2]-+g[lb>>2];g[pb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2];g[qb>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2];g[rb>>2]=+g[pb>>2]-+g[qb>>2];g[sb>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[tb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[ub>>2]=+g[sb>>2]-+g[tb>>2];g[vb>>2]=+g[rb>>2]+ +g[ub>>2];g[y>>2]=(+g[rb>>2]-+g[ub>>2])*.55901700258255;g[Lb>>2]=+g[pb>>2]+ +g[qb>>2];g[Mb>>2]=+g[sb>>2]+ +g[tb>>2];g[Sb>>2]=+g[kb>>2]+ +g[Rb>>2];g[D>>2]=+g[B>>2]*.5877852439880371+ +g[C>>2]*.9510565400123596;g[_>>2]=+g[B>>2]*.9510565400123596-+g[C>>2]*.5877852439880371;g[z>>2]=+g[wb>>2]-+g[vb>>2]*.25;g[A>>2]=+g[y>>2]-+g[z>>2];g[$>>2]=+g[y>>2]+ +g[z>>2];g[E>>2]=+g[A>>2]-+g[D>>2];g[Xa>>2]=+g[_>>2]-+g[$>>2];g[Da>>2]=+g[D>>2]+ +g[A>>2];g[aa>>2]=+g[_>>2]+ +g[$>>2];g[Nb>>2]=+g[Lb>>2]*.5877852439880371-+g[Mb>>2]*.9510565400123596;g[Y>>2]=+g[Lb>>2]*.9510565400123596+ +g[Mb>>2]*.5877852439880371;g[Jb>>2]=+g[Rb>>2]*.25-+g[kb>>2];g[Kb>>2]=+g[Ib>>2]-+g[Jb>>2];g[X>>2]=+g[Jb>>2]+ +g[Ib>>2];g[x>>2]=+g[Kb>>2]+ +g[Nb>>2];g[Wa>>2]=+g[X>>2]+ +g[Y>>2];g[Ca>>2]=+g[Nb>>2]-+g[Kb>>2];g[Z>>2]=+g[X>>2]-+g[Y>>2];g[Tb>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*10<<2)>>2];g[lc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*10<<2)>>2];g[Ub>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[Vb>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2];g[Wb>>2]=+g[Ub>>2]+ +g[Vb>>2];g[Xb>>2]=+g[c[p>>2]>>2];g[Yb>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[Zb>>2]=+g[Xb>>2]+ +g[Yb>>2];g[_b>>2]=+g[Wb>>2]+ +g[Zb>>2];g[pa>>2]=+g[Xb>>2]-+g[Yb>>2];g[G>>2]=(+g[Zb>>2]-+g[Wb>>2])*.55901700258255;g[oa>>2]=+g[Vb>>2]-+g[Ub>>2];g[ec>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[fc>>2]=+g[c[q>>2]>>2];g[gc>>2]=+g[ec>>2]-+g[fc>>2];g[hc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[ic>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2];g[jc>>2]=+g[hc>>2]-+g[ic>>2];g[kc>>2]=+g[gc>>2]-+g[jc>>2];g[la>>2]=(+g[jc>>2]+ +g[gc>>2])*.55901700258255;g[ha>>2]=+g[hc>>2]+ +g[ic>>2];g[ia>>2]=+g[fc>>2]+ +g[ec>>2];g[$b>>2]=+g[Tb>>2]+ +g[_b>>2];g[qa>>2]=+g[oa>>2]*.5877852439880371+ +g[pa>>2]*.9510565400123596;g[Ga>>2]=+g[oa>>2]*.9510565400123596-+g[pa>>2]*.5877852439880371;g[ma>>2]=+g[kc>>2]*.25+ +g[lc>>2];g[na>>2]=+g[la>>2]-+g[ma>>2];g[Ha>>2]=+g[la>>2]+ +g[ma>>2];g[ra>>2]=+g[na>>2]-+g[qa>>2];g[Ua>>2]=+g[Ha>>2]-+g[Ga>>2];g[I>>2]=+g[qa>>2]+ +g[na>>2];g[Ia>>2]=+g[Ga>>2]+ +g[Ha>>2];g[ja>>2]=+g[ha>>2]*.5877852439880371-+g[ia>>2]*.9510565400123596;g[da>>2]=+g[ha>>2]*.9510565400123596+ +g[ia>>2]*.5877852439880371;g[fa>>2]=+g[_b>>2]*.25-+g[Tb>>2];g[ga>>2]=+g[G>>2]-+g[fa>>2];g[ca>>2]=+g[fa>>2]+ +g[G>>2];g[ka>>2]=+g[ga>>2]+ +g[ja>>2];g[Ta>>2]=+g[ca>>2]+ +g[da>>2];g[H>>2]=+g[ja>>2]-+g[ga>>2];g[ea>>2]=+g[ca>>2]-+g[da>>2];g[cc>>2]=(+g[$b>>2]-+g[Sb>>2])*1.1180340051651;g[ac>>2]=+g[Sb>>2]+ +g[$b>>2];g[bc>>2]=+g[ac>>2]*.5-+g[jb>>2];g[ob>>2]=+g[kc>>2]-+g[lc>>2];g[xb>>2]=+g[vb>>2]+ +g[wb>>2];g[yb>>2]=+g[ob>>2]*1.1755704879760742-+g[xb>>2]*1.9021130800247192;g[Ab>>2]=+g[xb>>2]*1.1755704879760742+ +g[ob>>2]*1.9021130800247192;g[c[n>>2]>>2]=+g[ac>>2]*2.0+ +g[jb>>2];g[zb>>2]=+g[cc>>2]-+g[bc>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[zb>>2]+ +g[Ab>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Ab>>2]-+g[zb>>2];g[dc>>2]=+g[bc>>2]+ +g[cc>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[dc>>2]+ +g[yb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[yb>>2]-+g[dc>>2];g[cb>>2]=+g[Ua>>2]*.4257792830467224-+g[Ta>>2]*.9048270583152771;g[db>>2]=+g[Wa>>2]*.8443279266357422-+g[Xa>>2]*.5358268022537231;g[eb>>2]=+g[cb>>2]*1.1755704879760742-+g[db>>2]*1.9021130800247192;g[gb>>2]=+g[db>>2]*1.1755704879760742+ +g[cb>>2]*1.9021130800247192;g[_a>>2]=+g[U>>2]+ +g[V>>2];g[Va>>2]=+g[Ta>>2]*.4257792830467224+ +g[Ua>>2]*.9048270583152771;g[Ya>>2]=+g[Wa>>2]*.5358268022537231+ +g[Xa>>2]*.8443279266357422;g[Za>>2]=+g[Va>>2]-+g[Ya>>2];g[$a>>2]=+g[Za>>2]*.5+ +g[_a>>2];g[ab>>2]=(+g[Ya>>2]+ +g[Va>>2])*1.1180340051651;g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Za>>2]*2.0-+g[_a>>2];g[fb>>2]=+g[ab>>2]-+g[$a>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[fb>>2]+ +g[gb>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[gb>>2]-+g[fb>>2];g[bb>>2]=+g[$a>>2]+ +g[ab>>2];g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[bb>>2]+ +g[eb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[eb>>2]-+g[bb>>2];g[xa>>2]=+g[x>>2]*.4817536771297455+ +g[E>>2]*.8763066530227661;g[ya>>2]=+g[ka>>2]*.8443279266357422+ +g[ra>>2]*.5358268022537231;g[za>>2]=+g[xa>>2]*1.9021130800247192+ +g[ya>>2]*1.1755704879760742;g[Ba>>2]=+g[ya>>2]*1.9021130800247192-+g[xa>>2]*1.1755704879760742;g[Hb>>2]=+g[Db>>2]-+g[Gb>>2];g[F>>2]=+g[x>>2]*.8763066530227661-+g[E>>2]*.4817536771297455;g[sa>>2]=+g[ka>>2]*.5358268022537231-+g[ra>>2]*.8443279266357422;g[ta>>2]=+g[F>>2]+ +g[sa>>2];g[ua>>2]=+g[ta>>2]*.5-+g[Hb>>2];g[va>>2]=(+g[sa>>2]-+g[F>>2])*1.1180340051651;g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[ta>>2]*2.0+ +g[Hb>>2];g[Aa>>2]=+g[va>>2]-+g[ua>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Aa>>2]+ +g[Ba>>2];g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Ba>>2]-+g[Aa>>2];g[wa>>2]=+g[ua>>2]+ +g[va>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[wa>>2]+ +g[za>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[za>>2]-+g[wa>>2];g[M>>2]=+g[H>>2]*.9980267286300659-+g[I>>2]*.06279052048921585;g[N>>2]=+g[Da>>2]*.728968620300293-+g[Ca>>2]*.6845471262931824;g[O>>2]=+g[M>>2]*1.1755704879760742-+g[N>>2]*1.9021130800247192;g[S>>2]=+g[N>>2]*1.1755704879760742+ +g[M>>2]*1.9021130800247192;g[L>>2]=+g[Db>>2]+ +g[Gb>>2];g[Ea>>2]=+g[Ca>>2]*.728968620300293+ +g[Da>>2]*.6845471262931824;g[J>>2]=+g[H>>2]*.06279052048921585+ +g[I>>2]*.9980267286300659;g[K>>2]=+g[Ea>>2]+ +g[J>>2];g[P>>2]=+g[K>>2]*.5+ +g[L>>2];g[Q>>2]=(+g[J>>2]-+g[Ea>>2])*1.1180340051651;g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[K>>2]*2.0-+g[L>>2];g[T>>2]=+g[Q>>2]-+g[P>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[S>>2]-+g[T>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[S>>2]+ +g[T>>2];g[R>>2]=+g[P>>2]+ +g[Q>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[O>>2]-+g[R>>2];g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[O>>2]+ +g[R>>2];g[Oa>>2]=+g[Z>>2]*.24868988990783691+ +g[aa>>2]*.9685831665992737;g[Pa>>2]=+g[ea>>2]*.4817536771297455+ +g[Ia>>2]*.8763066530227661;g[Qa>>2]=+g[Oa>>2]*1.9021130800247192+ +g[Pa>>2]*1.1755704879760742;g[Sa>>2]=+g[Pa>>2]*1.9021130800247192-+g[Oa>>2]*1.1755704879760742;g[W>>2]=+g[U>>2]-+g[V>>2];g[ba>>2]=+g[Z>>2]*.9685831665992737-+g[aa>>2]*.24868988990783691;g[Ja>>2]=+g[ea>>2]*.8763066530227661-+g[Ia>>2]*.4817536771297455;g[Ka>>2]=+g[ba>>2]+ +g[Ja>>2];g[La>>2]=+g[Ka>>2]*.5-+g[W>>2];g[Ma>>2]=(+g[Ja>>2]-+g[ba>>2])*1.1180340051651;g[c[o>>2]>>2]=+g[Ka>>2]*2.0+ +g[W>>2];g[Ra>>2]=+g[Ma>>2]-+g[La>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[Ra>>2]+ +g[Sa>>2];g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=+g[Sa>>2]-+g[Ra>>2];g[Na>>2]=+g[La>>2]+ +g[Ma>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[Na>>2]+ +g[Qa>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Qa>>2]-+g[Na>>2];c[mc>>2]=(c[mc>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=nc;return}function Dv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,43,10120);i=b;return}function Ev(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0;w=i;i=i+64|0;n=w+52|0;o=w+48|0;p=w+44|0;q=w+40|0;x=w+24|0;r=w+20|0;s=w+16|0;v=w+8|0;t=w+4|0;u=w;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[w+36>>2]=f;c[w+32>>2]=h;c[w+28>>2]=j;c[x>>2]=k;c[r>>2]=l;c[s>>2]=m;g[w+12>>2]=2.0;c[v>>2]=c[x>>2];while(1){if((c[v>>2]|0)<=0)break;g[t>>2]=+g[c[p>>2]>>2];g[u>>2]=+g[c[q>>2]>>2];g[c[n>>2]>>2]=+g[t>>2]*2.0;g[c[o>>2]>>2]=-(+g[u>>2]*2.0);c[v>>2]=(c[v>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[s>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[s>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[r>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[r>>2]<<2)}i=w;return}function Fv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,44,10168);i=b;return}function Gv(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,_=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0,ia=0,ja=0,ka=0,la=0,ma=0,na=0,oa=0,pa=0,qa=0,ra=0,sa=0,ta=0,ua=0,va=0,wa=0,xa=0,ya=0,za=0,Aa=0,Ba=0,Ca=0,Da=0,Ea=0,Fa=0,Ga=0,Ha=0,Ia=0,Ja=0,Ka=0,La=0,Ma=0,Na=0,Oa=0,Pa=0,Qa=0,Ra=0,Sa=0,Ta=0,Ua=0,Va=0,Wa=0,Xa=0,Ya=0,Za=0,_a=0,$a=0,ab=0,bb=0,cb=0,db=0,eb=0,fb=0,gb=0,hb=0,ib=0,jb=0,kb=0,lb=0,mb=0,nb=0,ob=0,pb=0,qb=0,rb=0,sb=0,tb=0,ub=0,vb=0,wb=0,xb=0,yb=0,zb=0,Ab=0,Bb=0,Cb=0,Db=0,Eb=0,Fb=0,Gb=0,Hb=0,Ib=0,Jb=0,Kb=0,Lb=0,Mb=0,Nb=0,Ob=0,Pb=0,Qb=0,Rb=0,Sb=0,Tb=0,Ub=0,Vb=0,Wb=0,Xb=0,Yb=0,Zb=0,_b=0,$b=0,ac=0,bc=0,cc=0,dc=0,ec=0,fc=0,gc=0,hc=0,ic=0,jc=0,kc=0,lc=0,mc=0,nc=0,oc=0,pc=0,qc=0,rc=0,sc=0,tc=0,uc=0,vc=0,wc=0,xc=0,yc=0,zc=0,Ac=0,Bc=0,Cc=0,Dc=0,Ec=0,Fc=0,Gc=0,Hc=0,Ic=0,Jc=0,Kc=0;Jc=i;i=i+816|0;n=Jc+812|0;o=Jc+808|0;p=Jc+804|0;q=Jc+800|0;r=Jc+796|0;s=Jc+792|0;t=Jc+788|0;Kc=Jc+784|0;u=Jc+780|0;v=Jc+776|0;Ic=Jc+696|0;Hb=Jc+692|0;lb=Jc+688|0;Va=Jc+684|0;Hc=Jc+680|0;pa=Jc+676|0;P=Jc+672|0;ea=Jc+668|0;Ca=Jc+664|0;mc=Jc+660|0;Ia=Jc+656|0;Ua=Jc+652|0;mb=Jc+648|0;ka=Jc+644|0;Da=Jc+640|0;Ub=Jc+636|0;Q=Jc+632|0;uc=Jc+628|0;Ra=Jc+624|0;ec=Jc+620|0;fa=Jc+616|0;J=Jc+612|0;S=Jc+608|0;Oa=Jc+604|0;ob=Jc+600|0;Bc=Jc+596|0;Qa=Jc+592|0;E=Jc+588|0;ga=Jc+584|0;M=Jc+580|0;T=Jc+576|0;gb=Jc+572|0;pb=Jc+568|0;Db=Jc+564|0;Dc=Jc+560|0;oa=Jc+556|0;kb=Jc+552|0;Gb=Jc+548|0;la=Jc+544|0;Gc=Jc+540|0;jb=Jc+536|0;w=Jc+532|0;Fa=Jc+528|0;ma=Jc+524|0;na=Jc+520|0;Eb=Jc+516|0;Fb=Jc+512|0;Ec=Jc+508|0;Fc=Jc+504|0;ic=Jc+500|0;Kb=Jc+496|0;Nb=Jc+492|0;Ha=Jc+488|0;lc=Jc+484|0;Pb=Jc+480|0;Sb=Jc+476|0;Ga=Jc+472|0;Ib=Jc+468|0;Jb=Jc+464|0;Lb=Jc+460|0;Mb=Jc+456|0;jc=Jc+452|0;kc=Jc+448|0;Qb=Jc+444|0;Rb=Jc+440|0;ia=Jc+436|0;ja=Jc+432|0;Ob=Jc+428|0;Tb=Jc+424|0;qc=Jc+420|0;Wb=Jc+416|0;cc=Jc+412|0;La=Jc+408|0;tc=Jc+404|0;$b=Jc+400|0;Zb=Jc+396|0;Ma=Jc+392|0;_b=Jc+388|0;dc=Jc+384|0;oc=Jc+380|0;pc=Jc+376|0;ac=Jc+372|0;bc=Jc+368|0;rc=Jc+364|0;sc=Jc+360|0;Xb=Jc+356|0;Yb=Jc+352|0;H=Jc+348|0;I=Jc+344|0;Ka=Jc+340|0;Na=Jc+336|0;xc=Jc+332|0;fc=Jc+328|0;C=Jc+324|0;db=Jc+320|0;Ac=Jc+316|0;z=Jc+312|0;x=Jc+308|0;eb=Jc+304|0;y=Jc+300|0;D=Jc+296|0;vc=Jc+292|0;wc=Jc+288|0;A=Jc+284|0;B=Jc+280|0;yc=Jc+276|0;zc=Jc+272|0;gc=Jc+268|0;hc=Jc+264|0;K=Jc+260|0;L=Jc+256|0;Pa=Jc+252|0;fb=Jc+248|0;nc=Jc+244|0;Cc=Jc+240|0;$a=Jc+236|0;ab=Jc+232|0;bb=Jc+228|0;cb=Jc+224|0;wb=Jc+220|0;Ab=Jc+216|0;zb=Jc+212|0;Bb=Jc+208|0;ub=Jc+204|0;vb=Jc+200|0;xb=Jc+196|0;yb=Jc+192|0;Ta=Jc+188|0;Za=Jc+184|0;Ya=Jc+180|0;_a=Jc+176|0;Cb=Jc+172|0;Sa=Jc+168|0;Wa=Jc+164|0;Xa=Jc+160|0;G=Jc+156|0;sa=Jc+152|0;ra=Jc+148|0;ta=Jc+144|0;Vb=Jc+140|0;F=Jc+136|0;ha=Jc+132|0;qa=Jc+128|0;wa=Jc+124|0;Aa=Jc+120|0;za=Jc+116|0;Ba=Jc+112|0;ua=Jc+108|0;va=Jc+104|0;xa=Jc+100|0;ya=Jc+96|0;_=Jc+92|0;ca=Jc+88|0;ba=Jc+84|0;da=Jc+80|0;Y=Jc+76|0;Z=Jc+72|0;$=Jc+68|0;aa=Jc+64|0;ib=Jc+60|0;sb=Jc+56|0;rb=Jc+52|0;tb=Jc+48|0;Ja=Jc+44|0;hb=Jc+40|0;nb=Jc+36|0;qb=Jc+32|0;O=Jc+28|0;W=Jc+24|0;V=Jc+20|0;X=Jc+16|0;Ea=Jc+12|0;N=Jc+8|0;R=Jc+4|0;U=Jc;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[Kc>>2]=k;c[u>>2]=l;c[v>>2]=m;g[Jc+772>>2]=1.913880705833435;g[Jc+768>>2]=.580569326877594;g[Jc+764>>2]=.9427934885025024;g[Jc+760>>2]=1.7638425827026367;g[Jc+756>>2]=1.5460208654403687;g[Jc+752>>2]=1.2687865495681763;g[Jc+748>>2]=.1960342824459076;g[Jc+744>>2]=1.990369439125061;g[Jc+740>>2]=.7653668522834778;g[Jc+736>>2]=1.8477590084075928;g[Jc+732>>2]=1.9615705013275146;g[Jc+728>>2]=.39018064737319946;g[Jc+724>>2]=1.111140489578247;g[Jc+720>>2]=1.662939190864563;g[Jc+716>>2]=1.4142135381698608;g[Jc+712>>2]=2.0;g[Jc+708>>2]=.3826834261417389;g[Jc+704>>2]=.9238795042037964;g[Jc+700>>2]=.7071067690849304;c[Ic>>2]=c[Kc>>2];while(1){if((c[Ic>>2]|0)<=0)break;g[w>>2]=+g[c[p>>2]>>2];g[Fa>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*15<<2)>>2];g[Db>>2]=+g[w>>2]+ +g[Fa>>2];g[Dc>>2]=+g[w>>2]-+g[Fa>>2];g[ma>>2]=+g[c[q>>2]>>2];g[na>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*15<<2)>>2];g[oa>>2]=+g[ma>>2]+ +g[na>>2];g[kb>>2]=+g[na>>2]-+g[ma>>2];g[Eb>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<3<<2)>>2];g[Fb>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*7<<2)>>2];g[Gb>>2]=+g[Eb>>2]+ +g[Fb>>2];g[la>>2]=+g[Eb>>2]-+g[Fb>>2];g[Ec>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<3<<2)>>2];g[Fc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*7<<2)>>2];g[Gc>>2]=+g[Ec>>2]+ +g[Fc>>2];g[jb>>2]=+g[Ec>>2]-+g[Fc>>2];g[Hb>>2]=+g[Db>>2]+ +g[Gb>>2];g[lb>>2]=+g[jb>>2]+ +g[kb>>2];g[Va>>2]=+g[kb>>2]-+g[jb>>2];g[Hc>>2]=+g[Dc>>2]-+g[Gc>>2];g[pa>>2]=+g[la>>2]+ +g[oa>>2];g[P>>2]=+g[la>>2]-+g[oa>>2];g[ea>>2]=+g[Db>>2]-+g[Gb>>2];g[Ca>>2]=+g[Dc>>2]+ +g[Gc>>2];g[Ib>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2<<2)>>2];g[Jb>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*11<<2)>>2];g[ic>>2]=+g[Ib>>2]+ +g[Jb>>2];g[Kb>>2]=+g[Ib>>2]-+g[Jb>>2];g[Lb>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2<<2)>>2];g[Mb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*11<<2)>>2];g[Nb>>2]=+g[Lb>>2]+ +g[Mb>>2];g[Ha>>2]=+g[Lb>>2]-+g[Mb>>2];g[jc>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*3<<2)>>2];g[kc>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*12<<2)>>2];g[lc>>2]=+g[jc>>2]+ +g[kc>>2];g[Pb>>2]=+g[jc>>2]-+g[kc>>2];g[Qb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*3<<2)>>2];g[Rb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*12<<2)>>2];g[Sb>>2]=+g[Qb>>2]+ +g[Rb>>2];g[Ga>>2]=+g[Rb>>2]-+g[Qb>>2];g[mc>>2]=+g[ic>>2]+ +g[lc>>2];g[Ia>>2]=+g[Ga>>2]-+g[Ha>>2];g[Ua>>2]=+g[Ha>>2]+ +g[Ga>>2];g[mb>>2]=+g[ic>>2]-+g[lc>>2];g[ia>>2]=+g[Kb>>2]+ +g[Nb>>2];g[ja>>2]=+g[Pb>>2]+ +g[Sb>>2];g[ka>>2]=(+g[ia>>2]-+g[ja>>2])*.7071067690849304;g[Da>>2]=(+g[ia>>2]+ +g[ja>>2])*.7071067690849304;g[Ob>>2]=+g[Kb>>2]-+g[Nb>>2];g[Tb>>2]=+g[Pb>>2]-+g[Sb>>2];g[Ub>>2]=(+g[Ob>>2]+ +g[Tb>>2])*.7071067690849304;g[Q>>2]=(+g[Ob>>2]-+g[Tb>>2])*.7071067690849304;g[oc>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[pc>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*13<<2)>>2];g[qc>>2]=+g[oc>>2]+ +g[pc>>2];g[Wb>>2]=+g[oc>>2]-+g[pc>>2];g[ac>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<1<<2)>>2];g[bc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*13<<2)>>2];g[cc>>2]=+g[ac>>2]+ +g[bc>>2];g[La>>2]=+g[ac>>2]-+g[bc>>2];g[rc>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*10<<2)>>2];g[sc>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*5<<2)>>2];g[tc>>2]=+g[rc>>2]+ +g[sc>>2];g[$b>>2]=+g[rc>>2]-+g[sc>>2];g[Xb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*10<<2)>>2];g[Yb>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*5<<2)>>2];g[Zb>>2]=+g[Xb>>2]+ +g[Yb>>2];g[Ma>>2]=+g[Xb>>2]-+g[Yb>>2];g[uc>>2]=+g[qc>>2]+ +g[tc>>2];g[Ra>>2]=+g[Ma>>2]+ +g[La>>2];g[_b>>2]=+g[Wb>>2]-+g[Zb>>2];g[dc>>2]=+g[$b>>2]+ +g[cc>>2];g[ec>>2]=+g[_b>>2]*.9238795042037964-+g[dc>>2]*.3826834261417389;g[fa>>2]=+g[_b>>2]*.3826834261417389+ +g[dc>>2]*.9238795042037964;g[H>>2]=+g[Wb>>2]+ +g[Zb>>2];g[I>>2]=+g[cc>>2]-+g[$b>>2];g[J>>2]=+g[H>>2]*.3826834261417389-+g[I>>2]*.9238795042037964;g[S>>2]=+g[H>>2]*.9238795042037964+ +g[I>>2]*.3826834261417389;g[Ka>>2]=+g[qc>>2]-+g[tc>>2];g[Na>>2]=+g[La>>2]-+g[Ma>>2];g[Oa>>2]=+g[Ka>>2]-+g[Na>>2];g[ob>>2]=+g[Ka>>2]+ +g[Na>>2];g[vc>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[wc>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*14<<2)>>2];g[xc>>2]=+g[vc>>2]+ +g[wc>>2];g[fc>>2]=+g[vc>>2]-+g[wc>>2];g[A>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[B>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*14<<2)>>2];g[C>>2]=+g[A>>2]+ +g[B>>2];g[db>>2]=+g[B>>2]-+g[A>>2];g[yc>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*6<<2)>>2];g[zc>>2]=+g[(c[p>>2]|0)+((c[s>>2]|0)*9<<2)>>2];g[Ac>>2]=+g[yc>>2]+ +g[zc>>2];g[z>>2]=+g[yc>>2]-+g[zc>>2];g[gc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*6<<2)>>2];g[hc>>2]=+g[(c[q>>2]|0)+((c[t>>2]|0)*9<<2)>>2];g[x>>2]=+g[gc>>2]+ +g[hc>>2];g[eb>>2]=+g[gc>>2]-+g[hc>>2];g[Bc>>2]=+g[xc>>2]+ +g[Ac>>2];g[Qa>>2]=+g[eb>>2]+ +g[db>>2];g[y>>2]=+g[fc>>2]-+g[x>>2];g[D>>2]=+g[z>>2]-+g[C>>2];g[E>>2]=+g[y>>2]*.9238795042037964+ +g[D>>2]*.3826834261417389;g[ga>>2]=+g[D>>2]*.9238795042037964-+g[y>>2]*.3826834261417389;g[K>>2]=+g[fc>>2]+ +g[x>>2];g[L>>2]=+g[z>>2]+ +g[C>>2];g[M>>2]=+g[K>>2]*.3826834261417389-+g[L>>2]*.9238795042037964;g[T>>2]=+g[K>>2]*.9238795042037964+ +g[L>>2]*.3826834261417389;g[Pa>>2]=+g[xc>>2]-+g[Ac>>2];g[fb>>2]=+g[db>>2]-+g[eb>>2];g[gb>>2]=+g[Pa>>2]+ +g[fb>>2];g[pb>>2]=+g[fb>>2]-+g[Pa>>2];g[nc>>2]=+g[Hb>>2]+ +g[mc>>2];g[Cc>>2]=+g[uc>>2]+ +g[Bc>>2];g[$a>>2]=+g[nc>>2]-+g[Cc>>2];g[ab>>2]=+g[Ra>>2]+ +g[Qa>>2];g[bb>>2]=+g[Va>>2]-+g[Ua>>2];g[cb>>2]=+g[ab>>2]+ +g[bb>>2];g[c[n>>2]>>2]=(+g[nc>>2]+ +g[Cc>>2])*2.0;g[(c[n>>2]|0)+(c[r>>2]<<3<<2)>>2]=(+g[bb>>2]-+g[ab>>2])*2.0;g[(c[n>>2]|0)+(c[r>>2]<<2<<2)>>2]=(+g[$a>>2]+ +g[cb>>2])*1.4142135381698608;g[(c[n>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=(+g[cb>>2]-+g[$a>>2])*1.4142135381698608;g[ub>>2]=+g[ea>>2]-+g[Ia>>2];g[vb>>2]=(+g[pb>>2]-+g[ob>>2])*.7071067690849304;g[wb>>2]=+g[ub>>2]+ +g[vb>>2];g[Ab>>2]=+g[ub>>2]-+g[vb>>2];g[xb>>2]=+g[mb>>2]+ +g[lb>>2];g[yb>>2]=(+g[Oa>>2]-+g[gb>>2])*.7071067690849304;g[zb>>2]=+g[xb>>2]-+g[yb>>2];g[Bb>>2]=+g[yb>>2]+ +g[xb>>2];g[(c[n>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[wb>>2]*1.662939190864563+ +g[zb>>2]*1.111140489578247;g[(c[n>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[Bb>>2]*.39018064737319946-+g[Ab>>2]*1.9615705013275146;g[(c[n>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[zb>>2]*1.662939190864563-+g[wb>>2]*1.111140489578247;g[(c[n>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[Ab>>2]*.39018064737319946+ +g[Bb>>2]*1.9615705013275146;g[Cb>>2]=+g[Hb>>2]-+g[mc>>2];g[Sa>>2]=+g[Qa>>2]-+g[Ra>>2];g[Ta>>2]=+g[Cb>>2]+ +g[Sa>>2];g[Za>>2]=+g[Cb>>2]-+g[Sa>>2];g[Wa>>2]=+g[Ua>>2]+ +g[Va>>2];g[Xa>>2]=+g[uc>>2]-+g[Bc>>2];g[Ya>>2]=+g[Wa>>2]-+g[Xa>>2];g[_a>>2]=+g[Xa>>2]+ +g[Wa>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[Ta>>2]*1.8477590084075928+ +g[Ya>>2]*.7653668522834778;g[(c[n>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[_a>>2]*.7653668522834778-+g[Za>>2]*1.8477590084075928;g[(c[n>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[Ya>>2]*1.8477590084075928-+g[Ta>>2]*.7653668522834778;g[(c[n>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Za>>2]*.7653668522834778+ +g[_a>>2]*1.8477590084075928;g[Vb>>2]=+g[Hc>>2]+ +g[Ub>>2];g[F>>2]=+g[ec>>2]+ +g[E>>2];g[G>>2]=+g[Vb>>2]+ +g[F>>2];g[sa>>2]=+g[Vb>>2]-+g[F>>2];g[ha>>2]=+g[fa>>2]+ +g[ga>>2];g[qa>>2]=+g[ka>>2]+ +g[pa>>2];g[ra>>2]=+g[ha>>2]+ +g[qa>>2];g[ta>>2]=+g[ha>>2]-+g[qa>>2];g[c[o>>2]>>2]=+g[G>>2]*1.990369439125061-+g[ra>>2]*.1960342824459076;g[(c[o>>2]|0)+((c[r>>2]|0)*12<<2)>>2]=+g[ta>>2]*1.2687865495681763-+g[sa>>2]*1.5460208654403687;g[(c[o>>2]|0)+(c[r>>2]<<3<<2)>>2]=-(+g[G>>2]*.1960342824459076+ +g[ra>>2]*1.990369439125061);g[(c[o>>2]|0)+(c[r>>2]<<2<<2)>>2]=+g[sa>>2]*1.2687865495681763+ +g[ta>>2]*1.5460208654403687;g[ua>>2]=+g[Hc>>2]-+g[Ub>>2];g[va>>2]=+g[ga>>2]-+g[fa>>2];g[wa>>2]=+g[ua>>2]+ +g[va>>2];g[Aa>>2]=+g[ua>>2]-+g[va>>2];g[xa>>2]=+g[ka>>2]-+g[pa>>2];g[ya>>2]=+g[ec>>2]-+g[E>>2];g[za>>2]=+g[xa>>2]-+g[ya>>2];g[Ba>>2]=+g[ya>>2]+ +g[xa>>2];g[(c[o>>2]|0)+(c[r>>2]<<1<<2)>>2]=+g[wa>>2]*1.7638425827026367+ +g[za>>2]*.9427934885025024;g[(c[o>>2]|0)+((c[r>>2]|0)*14<<2)>>2]=+g[Ba>>2]*.580569326877594-+g[Aa>>2]*1.913880705833435;g[(c[o>>2]|0)+((c[r>>2]|0)*10<<2)>>2]=+g[za>>2]*1.7638425827026367-+g[wa>>2]*.9427934885025024;g[(c[o>>2]|0)+((c[r>>2]|0)*6<<2)>>2]=+g[Aa>>2]*.580569326877594+ +g[Ba>>2]*1.913880705833435;g[Y>>2]=+g[Ca>>2]+ +g[Da>>2];g[Z>>2]=+g[S>>2]+ +g[T>>2];g[_>>2]=+g[Y>>2]-+g[Z>>2];g[ca>>2]=+g[Y>>2]+ +g[Z>>2];g[$>>2]=+g[Q>>2]+ +g[P>>2];g[aa>>2]=+g[J>>2]-+g[M>>2];g[ba>>2]=+g[$>>2]-+g[aa>>2];g[da>>2]=+g[aa>>2]+ +g[$>>2];g[(c[o>>2]|0)+((c[r>>2]|0)*3<<2)>>2]=+g[_>>2]*1.5460208654403687+ +g[ba>>2]*1.2687865495681763;g[(c[o>>2]|0)+((c[r>>2]|0)*15<<2)>>2]=+g[da>>2]*.1960342824459076-+g[ca>>2]*1.990369439125061;g[(c[o>>2]|0)+((c[r>>2]|0)*11<<2)>>2]=+g[ba>>2]*1.5460208654403687-+g[_>>2]*1.2687865495681763;g[(c[o>>2]|0)+((c[r>>2]|0)*7<<2)>>2]=+g[ca>>2]*.1960342824459076+ +g[da>>2]*1.990369439125061;g[Ja>>2]=+g[ea>>2]+ +g[Ia>>2];g[hb>>2]=(+g[Oa>>2]+ +g[gb>>2])*.7071067690849304;g[ib>>2]=+g[Ja>>2]+ +g[hb>>2];g[sb>>2]=+g[Ja>>2]-+g[hb>>2];g[nb>>2]=+g[lb>>2]-+g[mb>>2];g[qb>>2]=(+g[ob>>2]+ +g[pb>>2])*.7071067690849304;g[rb>>2]=+g[nb>>2]-+g[qb>>2];g[tb>>2]=+g[qb>>2]+ +g[nb>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[ib>>2]*1.9615705013275146+ +g[rb>>2]*.39018064737319946;g[(c[n>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[tb>>2]*1.111140489578247-+g[sb>>2]*1.662939190864563;g[(c[n>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[rb>>2]*1.9615705013275146-+g[ib>>2]*.39018064737319946;g[(c[n>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[sb>>2]*1.111140489578247+ +g[tb>>2]*1.662939190864563;g[Ea>>2]=+g[Ca>>2]-+g[Da>>2];g[N>>2]=+g[J>>2]+ +g[M>>2];g[O>>2]=+g[Ea>>2]+ +g[N>>2];g[W>>2]=+g[Ea>>2]-+g[N>>2];g[R>>2]=+g[P>>2]-+g[Q>>2];g[U>>2]=+g[S>>2]-+g[T>>2];g[V>>2]=+g[R>>2]-+g[U>>2];g[X>>2]=+g[U>>2]+ +g[R>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[O>>2]*1.913880705833435+ +g[V>>2]*.580569326877594;g[(c[o>>2]|0)+((c[r>>2]|0)*13<<2)>>2]=+g[X>>2]*.9427934885025024-+g[W>>2]*1.7638425827026367;g[(c[o>>2]|0)+((c[r>>2]|0)*9<<2)>>2]=+g[V>>2]*1.913880705833435-+g[O>>2]*.580569326877594;g[(c[o>>2]|0)+((c[r>>2]|0)*5<<2)>>2]=+g[W>>2]*.9427934885025024+ +g[X>>2]*1.7638425827026367;c[Ic>>2]=(c[Ic>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=Jc;return}function Hv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,45,10216);i=b;return}function Iv(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0;B=i;i=i+80|0;n=B+68|0;o=B+64|0;p=B+60|0;q=B+56|0;r=B+52|0;s=B+48|0;C=B+40|0;t=B+36|0;u=B+32|0;A=B+20|0;z=B+16|0;v=B+12|0;w=B+8|0;x=B+4|0;y=B;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[B+44>>2]=j;c[C>>2]=k;c[t>>2]=l;c[u>>2]=m;g[B+28>>2]=2.0;g[B+24>>2]=1.7320507764816284;c[A>>2]=c[C>>2];while(1){if((c[A>>2]|0)<=0)break;g[y>>2]=+g[c[q>>2]>>2];g[z>>2]=+g[y>>2]*1.7320507764816284;g[v>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[w>>2]=+g[c[p>>2]>>2];g[x>>2]=+g[w>>2]-+g[v>>2];g[c[n>>2]>>2]=+g[w>>2]*2.0+ +g[v>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=-(+g[x>>2]+ +g[z>>2]);g[c[o>>2]>>2]=+g[x>>2]-+g[z>>2];c[A>>2]=(c[A>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[u>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[u>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[t>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[t>>2]<<2)}i=B;return}function Jv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,46,10264);i=b;return}function Kv(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0;D=i;i=i+80|0;n=D+72|0;o=D+68|0;p=D+64|0;q=D+60|0;r=D+56|0;s=D+52|0;t=D+48|0;E=D+44|0;u=D+40|0;v=D+36|0;C=D+24|0;w=D+20|0;x=D+16|0;y=D+12|0;z=D+8|0;A=D+4|0;B=D;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[E>>2]=k;c[u>>2]=l;c[v>>2]=m;g[D+32>>2]=1.4142135381698608;g[D+28>>2]=2.0;c[C>>2]=c[E>>2];while(1){if((c[C>>2]|0)<=0)break;g[w>>2]=+g[c[p>>2]>>2];g[x>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[y>>2]=+g[w>>2]-+g[x>>2];g[z>>2]=+g[c[q>>2]>>2];g[A>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[B>>2]=+g[z>>2]+ +g[A>>2];g[c[n>>2]>>2]=(+g[w>>2]+ +g[x>>2])*2.0;g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=(+g[A>>2]-+g[z>>2])*2.0;g[c[o>>2]>>2]=(+g[y>>2]-+g[B>>2])*1.4142135381698608;g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=-((+g[y>>2]+ +g[B>>2])*1.4142135381698608);c[C>>2]=(c[C>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2)}i=D;return}function Lv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,47,10312);i=b;return}function Mv(a,b,d,e,f,h,j,k,l,m){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;var n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0;J=i;i=i+112|0;n=J+108|0;o=J+104|0;p=J+100|0;q=J+96|0;r=J+92|0;s=J+88|0;t=J+84|0;K=J+80|0;u=J+76|0;v=J+72|0;I=J+48|0;F=J+44|0;H=J+40|0;w=J+36|0;z=J+32|0;A=J+28|0;B=J+24|0;G=J+20|0;C=J+16|0;D=J+12|0;E=J+8|0;x=J+4|0;y=J;c[n>>2]=a;c[o>>2]=b;c[p>>2]=d;c[q>>2]=e;c[r>>2]=f;c[s>>2]=h;c[t>>2]=j;c[K>>2]=k;c[u>>2]=l;c[v>>2]=m;g[J+68>>2]=2.0;g[J+64>>2]=1.1180340051651;g[J+60>>2]=.5;g[J+56>>2]=1.1755704879760742;g[J+52>>2]=1.9021130800247192;c[I>>2]=c[K>>2];while(1){if((c[I>>2]|0)<=0)break;g[D>>2]=+g[(c[q>>2]|0)+(c[t>>2]<<2)>>2];g[E>>2]=+g[c[q>>2]>>2];g[F>>2]=+g[D>>2]*1.9021130800247192+ +g[E>>2]*1.1755704879760742;g[H>>2]=+g[D>>2]*1.1755704879760742-+g[E>>2]*1.9021130800247192;g[w>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<1<<2)>>2];g[x>>2]=+g[(c[p>>2]|0)+(c[s>>2]<<2)>>2];g[y>>2]=+g[c[p>>2]>>2];g[z>>2]=+g[x>>2]+ +g[y>>2];g[A>>2]=+g[z>>2]*.5-+g[w>>2];g[B>>2]=(+g[y>>2]-+g[x>>2])*1.1180340051651;g[c[n>>2]>>2]=+g[z>>2]*2.0+ +g[w>>2];g[G>>2]=+g[B>>2]-+g[A>>2];g[(c[n>>2]|0)+(c[r>>2]<<2)>>2]=+g[G>>2]+ +g[H>>2];g[(c[o>>2]|0)+(c[r>>2]<<2)>>2]=+g[H>>2]-+g[G>>2];g[C>>2]=+g[A>>2]+ +g[B>>2];g[c[o>>2]>>2]=+g[C>>2]-+g[F>>2];g[(c[n>>2]|0)+(c[r>>2]<<1<<2)>>2]=-(+g[C>>2]+ +g[F>>2]);c[I>>2]=(c[I>>2]|0)-1;c[n>>2]=(c[n>>2]|0)+(c[v>>2]<<2);c[o>>2]=(c[o>>2]|0)+(c[v>>2]<<2);c[p>>2]=(c[p>>2]|0)+(c[u>>2]<<2);c[q>>2]=(c[q>>2]|0)+(c[u>>2]<<2);c[r>>2]=c[r>>2]^c[2998];c[s>>2]=c[s>>2]^c[2998];c[t>>2]=c[t>>2]^c[2998]}i=J;return}function Nv(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=a;cn(c[d>>2]|0,48,10360);i=b;return} +function iy(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0;g=i;i=i+32|0;m=g+16|0;l=g+12|0;k=g+8|0;j=g+4|0;h=g;c[m>>2]=a;c[l>>2]=b;c[k>>2]=d;c[j>>2]=e;c[h>>2]=f;e=ky(c[m>>2]|0,c[l>>2]|0,1,c[k>>2]|0,0,1,1,c[j>>2]|0,0,1,1,c[h>>2]|0)|0;i=g;return e|0}function jy(a,b,d,e,f,g,h,j,k,l,m,n){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;var o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0;H=i;i=i+80|0;o=H+72|0;p=H+68|0;t=H+64|0;u=H+60|0;v=H+56|0;w=H+52|0;x=H+48|0;y=H+44|0;z=H+40|0;A=H+36|0;q=H+32|0;r=H+28|0;s=H+24|0;G=H+20|0;B=H+16|0;D=H+12|0;E=H+8|0;C=H+4|0;F=H;c[p>>2]=a;c[t>>2]=b;c[u>>2]=d;c[v>>2]=e;c[w>>2]=f;c[x>>2]=g;c[y>>2]=h;c[z>>2]=j;c[A>>2]=k;c[q>>2]=l;c[r>>2]=m;c[s>>2]=n;if(!(dy(c[p>>2]|0,c[t>>2]|0,c[u>>2]|0)|0)){c[o>>2]=0;b=c[o>>2]|0;i=H;return b|0}Rb(-1,c[v>>2]|0,G,B);c[C>>2]=(c[z>>2]|0)==(c[G>>2]|0)&1;if(!(c[C>>2]|0))c[s>>2]=c[s>>2]|1;f=c[s>>2]|0;h=c[p>>2]|0;e=c[t>>2]|0;b=ly(c[p>>2]|0,c[t>>2]|0,c[w>>2]|0,c[C>>2]|0,1,D)|0;g=ly(c[p>>2]|0,c[t>>2]|0,c[A>>2]|0,c[C>>2]|0,0,E)|0;g=cy(h,e,b,g,c[x>>2]<<1,c[q>>2]|0)|0;b=Ed(c[u>>2]|0,c[y>>2]<<1,c[r>>2]|0)|0;c[F>>2]=Ux(0,f,wn(g,b,c[z>>2]|0,c[G>>2]|0,c[B>>2]|0,4)|0)|0;yb(c[D>>2]|0);yb(c[E>>2]|0);c[o>>2]=c[F>>2];b=c[o>>2]|0;i=H;return b|0}function ky(a,b,d,e,f,g,h,j,k,l,m,n){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;var o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0;H=i;i=i+80|0;o=H+72|0;p=H+68|0;t=H+64|0;u=H+60|0;v=H+56|0;w=H+52|0;x=H+48|0;y=H+44|0;z=H+40|0;A=H+36|0;q=H+32|0;r=H+28|0;s=H+24|0;G=H+20|0;C=H+16|0;D=H+12|0;E=H+8|0;B=H+4|0;F=H;c[p>>2]=a;c[t>>2]=b;c[u>>2]=d;c[v>>2]=e;c[w>>2]=f;c[x>>2]=g;c[y>>2]=h;c[z>>2]=j;c[A>>2]=k;c[q>>2]=l;c[r>>2]=m;c[s>>2]=n;if(dy(c[p>>2]|0,c[t>>2]|0,c[u>>2]|0)|0){Rb(-1,c[z>>2]|0,G,C);c[B>>2]=(c[v>>2]|0)==(c[G>>2]|0)&1;f=c[s>>2]|0;h=c[p>>2]|0;e=c[t>>2]|0;b=ly(c[p>>2]|0,c[t>>2]|0,c[w>>2]|0,c[B>>2]|0,0,D)|0;g=ly(c[p>>2]|0,c[t>>2]|0,c[A>>2]|0,c[B>>2]|0,1,E)|0;g=cy(h,e,b,g,c[x>>2]|0,c[q>>2]<<1)|0;b=Ed(c[u>>2]|0,c[y>>2]|0,c[r>>2]<<1)|0;c[F>>2]=Ux(0,f,wn(g,b,c[v>>2]|0,c[G>>2]|0,c[C>>2]|0,0)|0)|0;yb(c[D>>2]|0);yb(c[E>>2]|0);c[o>>2]=c[F>>2];b=c[o>>2]|0;i=H;return b|0}else{c[o>>2]=0;b=c[o>>2]|0;i=H;return b|0}return 0}function ly(a,b,d,e,f,g){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;var h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;p=i;i=i+32|0;h=p+24|0;j=p+20|0;k=p+16|0;l=p+12|0;m=p+8|0;n=p+4|0;o=p;c[h>>2]=a;c[j>>2]=b;c[k>>2]=d;c[l>>2]=e;c[m>>2]=f;c[n>>2]=g;c[c[n>>2]>>2]=0;if(!((c[k>>2]|0)==0&(c[h>>2]|0)>0)){d=c[k>>2]|0;i=p;return d|0}if((c[l>>2]|0)!=0|(c[m>>2]|0)!=0){c[o>>2]=wb(c[h>>2]<<2)|0;Zy(c[o>>2]|0,c[j>>2]|0,c[h>>2]<<2|0)|0;d=_(((c[(c[j>>2]|0)+((c[h>>2]|0)-1<<2)>>2]|0)/2|0)+1|0,1+(((c[m>>2]|0)!=0^1)&1)|0)|0;c[(c[o>>2]|0)+((c[h>>2]|0)-1<<2)>>2]=d;d=c[o>>2]|0;c[c[n>>2]>>2]=d;c[k>>2]=d;d=c[k>>2]|0;i=p;return d|0}else{c[k>>2]=c[j>>2];d=c[k>>2]|0;i=p;return d|0}return 0}function my(){if(!(c[4259]|0)){c[4259]=tc()|0;Zx(c[4259]|0)}return c[4259]|0}function ny(a,b){a=a|0;b=b|0;if(!a)a=0;else a=oy(a,b,0)|0;return a|0}function oy(b,d,e){b=b|0;d=d|0;e=e|0;do if(b){if(d>>>0<128){a[b>>0]=d;b=1;break}if(d>>>0<2048){a[b>>0]=d>>>6|192;a[b+1>>0]=d&63|128;b=2;break}if(d>>>0<55296|(d&-8192|0)==57344){a[b>>0]=d>>>12|224;a[b+1>>0]=d>>>6&63|128;a[b+2>>0]=d&63|128;b=3;break}if((d+-65536|0)>>>0<1048576){a[b>>0]=d>>>18|240;a[b+1>>0]=d>>>12&63|128;a[b+2>>0]=d>>>6&63|128;a[b+3>>0]=d&63|128;b=4;break}else{c[(py()|0)>>2]=84;b=-1;break}}else b=1;while(0);return b|0}function py(){var a=0;if(!(c[4260]|0))a=17096;else a=c[(Ia()|0)+60>>2]|0;return a|0}function qy(b){b=b|0;var c=0,e=0;c=0;while(1){if((d[29647+c>>0]|0)==(b|0)){e=2;break}c=c+1|0;if((c|0)==87){c=87;b=29735;e=5;break}}if((e|0)==2)if(!c)b=29735;else{b=29735;e=5}if((e|0)==5)while(1){while(1){e=b+1|0;if(!(a[b>>0]|0)){b=e;break}else b=e}c=c+-1|0;if(!c)break;else e=5}return b|0}function ry(a,b){a=+a;b=b|0;var d=0,e=0,f=0;h[k>>3]=a;d=c[k>>2]|0;e=c[k+4>>2]|0;f=Wy(d|0,e|0,52)|0;f=f&2047;switch(f|0){case 0:{if(a!=0.0){a=+ry(a*18446744073709551616.0,b);d=(c[b>>2]|0)+-64|0}else d=0;c[b>>2]=d;break}case 2047:break;default:{c[b>>2]=f+-1022;c[k>>2]=d;c[k+4>>2]=e&-2146435073|1071644672;a=+h[k>>3]}}return +a}function sy(a,b){a=+a;b=b|0;return +(+ry(a,b))}function ty(a){a=a|0;if(a>>>0>4294963200){c[(py()|0)>>2]=0-a;a=-1}return a|0}function uy(b,d,e){b=b|0;d=d|0;e=e|0;var f=0,g=0;g=i;i=i+80|0;f=g;c[b+36>>2]=44;if((c[b>>2]&64|0)==0?(c[f>>2]=c[b+60>>2],c[f+4>>2]=21505,c[f+8>>2]=g+12,(Aa(54,f|0)|0)!=0):0)a[b+75>>0]=-1;f=vy(b,d,e)|0;i=g;return f|0}function vy(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0;q=i;i=i+48|0;n=q+16|0;m=q;e=q+32|0;o=a+28|0;f=c[o>>2]|0;c[e>>2]=f;p=a+20|0;f=(c[p>>2]|0)-f|0;c[e+4>>2]=f;c[e+8>>2]=b;c[e+12>>2]=d;k=a+60|0;l=a+44|0;b=2;f=f+d|0;while(1){if(!(c[4260]|0)){c[n>>2]=c[k>>2];c[n+4>>2]=e;c[n+8>>2]=b;h=ty(Sa(146,n|0)|0)|0}else{Oa(313,a|0);c[m>>2]=c[k>>2];c[m+4>>2]=e;c[m+8>>2]=b;h=ty(Sa(146,m|0)|0)|0;xa(0)}if((f|0)==(h|0)){f=6;break}if((h|0)<0){f=8;break}f=f-h|0;g=c[e+4>>2]|0;if(h>>>0<=g>>>0)if((b|0)==2){c[o>>2]=(c[o>>2]|0)+h;j=g;b=2}else j=g;else{j=c[l>>2]|0;c[o>>2]=j;c[p>>2]=j;j=c[e+12>>2]|0;h=h-g|0;e=e+8|0;b=b+-1|0}c[e>>2]=(c[e>>2]|0)+h;c[e+4>>2]=j-h}if((f|0)==6){m=c[l>>2]|0;c[a+16>>2]=m+(c[a+48>>2]|0);c[o>>2]=m;c[p>>2]=m}else if((f|0)==8){c[a+16>>2]=0;c[o>>2]=0;c[p>>2]=0;c[a>>2]=c[a>>2]|32;if((b|0)==2)d=0;else d=d-(c[e+4>>2]|0)|0}i=q;return d|0}function wy(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0;e=i;i=i+16|0;f=e;c[f>>2]=d;a=Ey(a,b,f)|0;i=e;return a|0}function xy(a){a=a|0;var b=0,d=0;b=i;i=i+16|0;d=b;c[d>>2]=c[a+60>>2];a=ty(Na(6,d|0)|0)|0;i=b;return a|0}function yy(a){a=a|0;var b=0,d=0;do if(a){if((c[a+76>>2]|0)<=-1){b=Jy(a)|0;break}d=(Cy(a)|0)==0;b=Jy(a)|0;if(!d)Dy(a)}else{if(!(c[4273]|0))b=0;else b=yy(c[4273]|0)|0;Ma(17068);a=c[4266]|0;if(a)do{if((c[a+76>>2]|0)>-1)d=Cy(a)|0;else d=0;if((c[a+20>>2]|0)>>>0>(c[a+28>>2]|0)>>>0)b=Jy(a)|0|b;if(d)Dy(a);a=c[a+56>>2]|0}while((a|0)!=0);Ka(17068)}while(0);return b|0}function zy(b){b=b|0;var d=0,e=0;d=b+74|0;e=a[d>>0]|0;a[d>>0]=e+255|e;d=c[b>>2]|0;if(!(d&8)){c[b+8>>2]=0;c[b+4>>2]=0;d=c[b+44>>2]|0;c[b+28>>2]=d;c[b+20>>2]=d;c[b+16>>2]=d+(c[b+48>>2]|0);d=0}else{c[b>>2]=d|32;d=-1}return d|0}function Ay(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0;f=i;i=i+32|0;g=f;e=f+20|0;c[g>>2]=c[a+60>>2];c[g+4>>2]=0;c[g+8>>2]=b;c[g+12>>2]=e;c[g+16>>2]=d;if((ty(Ra(140,g|0)|0)|0)<0){c[e>>2]=-1;a=-1}else a=c[e>>2]|0;i=f;return a|0}function By(b,d,e){b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,i=0;f=e+16|0;g=c[f>>2]|0;if(!g)if(!(zy(e)|0)){g=c[f>>2]|0;h=4}else f=0;else h=4;a:do if((h|0)==4){i=e+20|0;h=c[i>>2]|0;if((g-h|0)>>>0>>0){f=Va[c[e+36>>2]&63](e,b,d)|0;break}b:do if((a[e+75>>0]|0)>-1){f=d;while(1){if(!f){g=h;f=0;break b}g=f+-1|0;if((a[b+g>>0]|0)==10)break;else f=g}if((Va[c[e+36>>2]&63](e,b,f)|0)>>>0>>0)break a;d=d-f|0;b=b+f|0;g=c[i>>2]|0}else{g=h;f=0}while(0);Zy(g|0,b|0,d|0)|0;c[i>>2]=(c[i>>2]|0)+d;f=f+d|0}while(0);return f|0}function Cy(a){a=a|0;return 0}function Dy(a){a=a|0;return}function Ey(b,d,e){b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0;s=i;i=i+224|0;o=s+80|0;r=s+96|0;q=s;p=s+136|0;f=r;g=f+40|0;do{c[f>>2]=0;f=f+4|0}while((f|0)<(g|0));c[o>>2]=c[e>>2];if((Ky(0,d,o,q,r)|0)<0)f=-1;else{if((c[b+76>>2]|0)>-1)m=Cy(b)|0;else m=0;e=c[b>>2]|0;n=e&32;if((a[b+74>>0]|0)<1)c[b>>2]=e&-33;g=b+48|0;if(!(c[g>>2]|0)){f=b+44|0;h=c[f>>2]|0;c[f>>2]=p;j=b+28|0;c[j>>2]=p;k=b+20|0;c[k>>2]=p;c[g>>2]=80;l=b+16|0;c[l>>2]=p+80;e=Ky(b,d,o,q,r)|0;if(h){Va[c[b+36>>2]&63](b,0,0)|0;e=(c[k>>2]|0)==0?-1:e;c[f>>2]=h;c[g>>2]=0;c[l>>2]=0;c[j>>2]=0;c[k>>2]=0}}else e=Ky(b,d,o,q,r)|0;f=c[b>>2]|0;c[b>>2]=f|n;if(m)Dy(b);f=(f&32|0)==0?e:-1}i=s;return f|0}function Fy(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0;r=i;i=i+208|0;o=r+8|0;p=r;f=_(d,b)|0;n=p;c[n>>2]=1;c[n+4>>2]=0;if(f){j=f-d|0;c[o+4>>2]=d;c[o>>2]=d;g=d;b=d;h=2;while(1){b=b+d+g|0;c[o+(h<<2)>>2]=b;if(b>>>0>>0){n=g;g=b;h=h+1|0;b=n}else break}n=0-d|0;b=a+j|0;m=p+4|0;if((j|0)>0){k=b;j=1;h=1;while(1){do if((j&3|0)==3){Ly(a,d,e,h,o);l=c[m>>2]|0;g=l<<30|(c[p>>2]|0)>>>2;c[p>>2]=g;c[m>>2]=l>>>2;h=h+2|0}else{g=h+-1|0;if((c[o+(g<<2)>>2]|0)>>>0<(k-a|0)>>>0)Ly(a,d,e,h,o);else My(a,d,e,p,h,0,o);if((h|0)==1){g=c[p>>2]|0;c[m>>2]=g>>>31|c[m>>2]<<1;g=g<<1;c[p>>2]=g;h=0;break}if(g>>>0>31){f=c[p>>2]|0;c[m>>2]=f;c[p>>2]=0;g=h+-33|0;h=f;f=0}else{h=c[m>>2]|0;f=c[p>>2]|0}c[m>>2]=f>>>(32-g|0)|h<>2]=g;h=1}while(0);j=g|1;c[p>>2]=j;g=a+d|0;if(g>>>0>=b>>>0)break;else a=g}}else{g=a;h=1}My(g,d,e,p,h,0,o);l=p+4|0;b=c[p>>2]|0;if(!((h|0)==1&(b|0)==1&(c[l>>2]|0)==0))do{if((h|0)<2){f=b+-1|0;do if(f){if(!(f&1)){j=f;f=0;do{f=f+1|0;j=j>>>1}while((j&1|0)==0);if(!f)q=24}else q=24;if((q|0)==24){q=0;k=c[m>>2]|0;if(!k){f=64;q=30;break}if(!(k&1)){f=k;j=0}else{j=0;a=k;f=0;break}while(1){a=j+1|0;f=f>>>1;if(f&1){f=a;break}else j=a}if(!f){j=0;a=k;f=0;break}else f=j+33|0}if(f>>>0>31)q=30;else{j=f;a=c[m>>2]|0}}else{f=32;q=30}while(0);if((q|0)==30){q=0;b=c[m>>2]|0;c[p>>2]=b;c[m>>2]=0;j=f+-32|0;a=0}c[p>>2]=a<<32-j|b>>>j;c[m>>2]=a>>>j;g=g+n|0;h=f+h|0}else{j=b>>>30;a=j|c[m>>2]<<2;k=h+-2|0;c[p>>2]=(b<<1&2147483646|j<<31)^3;c[m>>2]=a>>>1;My(g+(0-((c[o+(k<<2)>>2]|0)+d))|0,d,e,p,h+-1|0,1,o);h=c[p>>2]|0;c[m>>2]=h>>>31|c[m>>2]<<1;c[p>>2]=h<<1|1;g=g+n|0;My(g,d,e,p,k,1,o);h=k}b=c[p>>2]|0}while(!((h|0)==1&(b|0)==1&(c[l>>2]|0)==0))}i=r;return}function Gy(b,c){b=b|0;c=c|0;var d=0,e=0;e=a[b>>0]|0;d=a[c>>0]|0;if(e<<24>>24==0?1:e<<24>>24!=d<<24>>24)c=e;else{do{b=b+1|0;c=c+1|0;e=a[b>>0]|0;d=a[c>>0]|0}while(!(e<<24>>24==0?1:e<<24>>24!=d<<24>>24));c=e}return (c&255)-(d&255)|0}function Hy(b,d,e){b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,i=0;h=d&255;f=(e|0)!=0;a:do if(f&(b&3|0)!=0){g=d&255;while(1){if((a[b>>0]|0)==g<<24>>24){i=6;break a}b=b+1|0;e=e+-1|0;f=(e|0)!=0;if(!(f&(b&3|0)!=0)){i=5;break}}}else i=5;while(0);if((i|0)==5)if(f)i=6;else e=0;b:do if((i|0)==6){g=d&255;if((a[b>>0]|0)!=g<<24>>24){f=_(h,16843009)|0;c:do if(e>>>0>3)while(1){d=c[b>>2]^f;if((d&-2139062144^-2139062144)&d+-16843009)break;b=b+4|0;e=e+-4|0;if(e>>>0<=3){i=11;break c}}else i=11;while(0);if((i|0)==11)if(!e){e=0;break}while(1){if((a[b>>0]|0)==g<<24>>24)break b;b=b+1|0;e=e+-1|0;if(!e){e=0;break}}}}while(0);return ((e|0)!=0?b:0)|0}function Iy(a){a=a|0;if(!(c[a+68>>2]|0))Dy(a);return}function Jy(a){a=a|0;var b=0,d=0,e=0,f=0,g=0,h=0;b=a+20|0;g=a+28|0;if((c[b>>2]|0)>>>0>(c[g>>2]|0)>>>0?(Va[c[a+36>>2]&63](a,0,0)|0,(c[b>>2]|0)==0):0)b=-1;else{h=a+4|0;d=c[h>>2]|0;e=a+8|0;f=c[e>>2]|0;if(d>>>0>>0)Va[c[a+40>>2]&63](a,d-f|0,1)|0;c[a+16>>2]=0;c[g>>2]=0;c[b>>2]=0;c[e>>2]=0;c[h>>2]=0;b=0}return b|0}function Ky(e,f,g,j,l){e=e|0;f=f|0;g=g|0;j=j|0;l=l|0;var m=0,n=0,o=0,p=0,q=0.0,r=0,s=0,t=0,u=0,v=0,w=0.0,x=0,y=0,z=0,A=0,B=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0,O=0,P=0,Q=0,R=0,S=0,T=0,U=0,V=0,W=0,X=0,Y=0,Z=0,$=0,aa=0,ba=0,ca=0,da=0,ea=0,fa=0,ga=0,ha=0;ha=i;i=i+624|0;ca=ha+24|0;ea=ha+16|0;da=ha+588|0;Y=ha+576|0;ba=ha;V=ha+536|0;ga=ha+8|0;fa=ha+528|0;M=(e|0)!=0;N=V+40|0;U=N;V=V+39|0;W=ga+4|0;X=Y+12|0;Y=Y+11|0;Z=da;$=X;aa=$-Z|0;O=-2-Z|0;P=$+2|0;Q=ca+288|0;R=da+9|0;S=R;T=da+8|0;x=f;m=0;n=0;f=0;a:while(1){do if((m|0)>-1)if((n|0)>(2147483647-m|0)){c[(py()|0)>>2]=75;m=-1;break}else{m=n+m|0;break}while(0);n=a[x>>0]|0;if(!(n<<24>>24)){L=245;break}else o=x;b:while(1){switch(n<<24>>24){case 37:{n=o;L=9;break b}case 0:{n=o;break b}default:{}}K=o+1|0;n=a[K>>0]|0;o=K}c:do if((L|0)==9)while(1){L=0;if((a[n+1>>0]|0)!=37)break c;o=o+1|0;n=n+2|0;if((a[n>>0]|0)==37)L=9;else break}while(0);A=o-x|0;if(M?(c[e>>2]&32|0)==0:0)By(x,A,e)|0;if((o|0)!=(x|0)){x=n;n=A;continue}r=n+1|0;o=a[r>>0]|0;p=(o<<24>>24)+-48|0;if(p>>>0<10){K=(a[n+2>>0]|0)==36;r=K?n+3|0:r;o=a[r>>0]|0;B=K?p:-1;f=K?1:f}else B=-1;n=o<<24>>24;d:do if((n&-32|0)==32){p=0;while(1){if(!(1<>24)+-32|p;r=r+1|0;o=a[r>>0]|0;n=o<<24>>24;if((n&-32|0)!=32){n=p;break}}}else n=0;while(0);do if(o<<24>>24==42){o=r+1|0;p=(a[o>>0]|0)+-48|0;if(p>>>0<10?(a[r+2>>0]|0)==36:0){c[l+(p<<2)>>2]=10;f=1;p=r+3|0;r=c[j+((a[o>>0]|0)+-48<<3)>>2]|0}else{if(f){m=-1;break a}if(!M){z=n;f=0;K=0;break}f=(c[g>>2]|0)+(4-1)&~(4-1);r=c[f>>2]|0;c[g>>2]=f+4;f=0;p=o}if((r|0)<0){o=p;z=n|8192;K=0-r|0}else{o=p;z=n;K=r}}else{p=(o<<24>>24)+-48|0;if(p>>>0<10){o=r;r=0;do{r=(r*10|0)+p|0;o=o+1|0;p=(a[o>>0]|0)+-48|0}while(p>>>0<10);if((r|0)<0){m=-1;break a}else{z=n;K=r}}else{o=r;z=n;K=0}}while(0);e:do if((a[o>>0]|0)==46){p=o+1|0;r=a[p>>0]|0;if(r<<24>>24!=42){n=(r<<24>>24)+-48|0;if(n>>>0<10){o=p;r=0}else{o=p;t=0;break}while(1){r=(r*10|0)+n|0;o=o+1|0;n=(a[o>>0]|0)+-48|0;if(n>>>0>=10){t=r;break e}}}p=o+2|0;r=(a[p>>0]|0)+-48|0;if(r>>>0<10?(a[o+3>>0]|0)==36:0){c[l+(r<<2)>>2]=10;o=o+4|0;t=c[j+((a[p>>0]|0)+-48<<3)>>2]|0;break}if(f){m=-1;break a}if(M){o=(c[g>>2]|0)+(4-1)&~(4-1);t=c[o>>2]|0;c[g>>2]=o+4;o=p}else{o=p;t=0}}else t=-1;while(0);v=0;while(1){r=(a[o>>0]|0)+-65|0;if(r>>>0>57){m=-1;break a}s=o+1|0;p=a[32579+(v*58|0)+r>>0]|0;r=p&255;if((r+-1|0)>>>0<8){o=s;v=r}else{J=s;break}}if(!(p<<24>>24)){m=-1;break}s=(B|0)>-1;do if(p<<24>>24==19)if(s){m=-1;break a}else L=52;else{if(s){c[l+(B<<2)>>2]=r;s=j+(B<<3)|0;u=c[s+4>>2]|0;L=ba;c[L>>2]=c[s>>2];c[L+4>>2]=u;L=52;break}if(!M){m=0;break a}Oy(ba,r,g)}while(0);if((L|0)==52?(L=0,!M):0){x=J;n=A;continue}E=a[o>>0]|0;E=(v|0)!=0&(E&15|0)==3?E&-33:E;p=z&-65537;I=(z&8192|0)==0?z:p;f:do switch(E|0){case 110:switch(v|0){case 0:{c[c[ba>>2]>>2]=m;x=J;n=A;continue a}case 1:{c[c[ba>>2]>>2]=m;x=J;n=A;continue a}case 2:{x=c[ba>>2]|0;c[x>>2]=m;c[x+4>>2]=((m|0)<0)<<31>>31;x=J;n=A;continue a}case 3:{b[c[ba>>2]>>1]=m;x=J;n=A;continue a}case 4:{a[c[ba>>2]>>0]=m;x=J;n=A;continue a}case 6:{c[c[ba>>2]>>2]=m;x=J;n=A;continue a}case 7:{x=c[ba>>2]|0;c[x>>2]=m;c[x+4>>2]=((m|0)<0)<<31>>31;x=J;n=A;continue a}default:{x=J;n=A;continue a}}case 112:{v=I|8;t=t>>>0>8?t:8;u=120;L=64;break}case 88:case 120:{v=I;u=E;L=64;break}case 111:{p=ba;o=c[p>>2]|0;p=c[p+4>>2]|0;if((o|0)==0&(p|0)==0)n=N;else{n=N;do{n=n+-1|0;a[n>>0]=o&7|48;o=Wy(o|0,p|0,3)|0;p=C}while(!((o|0)==0&(p|0)==0))}if(!(I&8)){r=I;s=0;o=33059;L=77}else{s=U-n+1|0;r=I;t=(t|0)<(s|0)?s:t;s=0;o=33059;L=77}break}case 105:case 100:{n=ba;o=c[n>>2]|0;n=c[n+4>>2]|0;if((n|0)<0){o=Uy(0,0,o|0,n|0)|0;n=C;s=ba;c[s>>2]=o;c[s+4>>2]=n;s=1;p=33059;L=76;break f}if(!(I&2048)){p=I&1;s=p;p=(p|0)==0?33059:33061;L=76}else{s=1;p=33060;L=76}break}case 117:{n=ba;o=c[n>>2]|0;n=c[n+4>>2]|0;s=0;p=33059;L=76;break}case 99:{a[V>>0]=c[ba>>2];x=V;n=1;v=0;u=33059;r=N;break}case 109:{r=qy(c[(py()|0)>>2]|0)|0;L=82;break}case 115:{r=c[ba>>2]|0;r=(r|0)!=0?r:33069;L=82;break}case 67:{c[ga>>2]=c[ba>>2];c[W>>2]=0;c[ba>>2]=ga;n=-1;L=86;break}case 83:{if(!t){Qy(e,32,K,0,I);o=0;L=98}else{n=t;L=86}break}case 65:case 71:case 70:case 69:case 97:case 103:case 102:case 101:{q=+h[ba>>3];c[ea>>2]=0;h[k>>3]=q;if((c[k+4>>2]|0)>=0)if(!(I&2048)){H=I&1;G=H;H=(H|0)==0?33077:33082}else{G=1;H=33079}else{q=-q;G=1;H=33076}h[k>>3]=q;v=c[k+4>>2]&2146435072;do if(v>>>0<2146435072|(v|0)==2146435072&0<0){w=+sy(q,ea)*2.0;s=w!=0.0;if(s)c[ea>>2]=(c[ea>>2]|0)+-1;A=E|32;if((A|0)==97){y=E&32;v=(y|0)==0?H:H+9|0;o=G|2;r=12-t|0;do if(!(t>>>0>11|(r|0)==0)){q=8.0;do{r=r+-1|0;q=q*16.0}while((r|0)!=0);if((a[v>>0]|0)==45){q=-(q+(-w-q));break}else{q=w+q-q;break}}else q=w;while(0);r=c[ea>>2]|0;s=(r|0)<0?0-r|0:r;s=Py(s,((s|0)<0)<<31>>31,X)|0;if((s|0)==(X|0)){a[Y>>0]=48;s=Y}a[s+-1>>0]=(r>>31&2)+43;x=s+-2|0;a[x>>0]=E+15;p=(t|0)<1;u=(I&8|0)==0;r=da;do{n=~~q;s=r+1|0;a[r>>0]=d[33043+n>>0]|y;q=(q-+(n|0))*16.0;do if((s-Z|0)==1){if(u&(p&q==0.0)){r=s;break}a[s>>0]=46;r=r+2|0}else r=s;while(0)}while(q!=0.0);n=(t|0)!=0&(O+r|0)<(t|0)?P+t-x|0:aa-x+r|0;s=n+o|0;Qy(e,32,K,s,I);if(!(c[e>>2]&32))By(v,o,e)|0;Qy(e,48,K,s,I^65536);r=r-Z|0;if(!(c[e>>2]&32))By(da,r,e)|0;p=$-x|0;Qy(e,48,n-(r+p)|0,0,0);if(!(c[e>>2]&32))By(x,p,e)|0;Qy(e,32,K,s,I^8192);n=(s|0)<(K|0)?K:s;break}o=(t|0)<0?6:t;if(s){r=(c[ea>>2]|0)+-28|0;c[ea>>2]=r;q=w*268435456.0}else{q=w;r=c[ea>>2]|0}F=(r|0)<0?ca:Q;D=F;s=F;do{v=~~q>>>0;c[s>>2]=v;s=s+4|0;q=(q-+(v>>>0))*1.0e9}while(q!=0.0);u=s;s=c[ea>>2]|0;if((s|0)>0){v=F;do{x=(s|0)>29?29:s;t=u+-4|0;do if(t>>>0>=v>>>0){s=0;do{r=Xy(c[t>>2]|0,0,x|0)|0;r=Yy(r|0,C|0,s|0,0)|0;s=C;p=gz(r|0,s|0,1e9,0)|0;c[t>>2]=p;s=fz(r|0,s|0,1e9,0)|0;t=t+-4|0}while(t>>>0>=v>>>0);if(!s)break;v=v+-4|0;c[v>>2]=s}while(0);while(1){if(u>>>0<=v>>>0)break;s=u+-4|0;if(!(c[s>>2]|0))u=s;else break}s=(c[ea>>2]|0)-x|0;c[ea>>2]=s}while((s|0)>0)}else v=F;if((s|0)<0){y=((o+25|0)/9|0)+1|0;p=(A|0)==102;do{n=0-s|0;n=(n|0)>9?9:n;do if(v>>>0>>0){s=(1<>>n;t=0;x=v;do{z=c[x>>2]|0;c[x>>2]=(z>>>n)+t;t=_(z&s,r)|0;x=x+4|0}while(x>>>0>>0);v=(c[v>>2]|0)==0?v+4|0:v;if(!t)break;c[u>>2]=t;u=u+4|0}else v=(c[v>>2]|0)==0?v+4|0:v;while(0);s=p?F:v;u=(u-s>>2|0)>(y|0)?s+(y<<2)|0:u;s=(c[ea>>2]|0)+n|0;c[ea>>2]=s}while((s|0)<0)}do if(v>>>0>>0){s=(D-v>>2)*9|0;r=c[v>>2]|0;if(r>>>0<10)break;else t=10;do{t=t*10|0;s=s+1|0}while(r>>>0>=t>>>0)}else s=0;while(0);z=(A|0)==103;B=(o|0)!=0;t=o-((A|0)!=102?s:0)+((B&z)<<31>>31)|0;if((t|0)<(((u-D>>2)*9|0)+-9|0)){p=t+9216|0;y=(p|0)/9|0;t=F+(y+-1023<<2)|0;p=((p|0)%9|0)+1|0;if((p|0)<9){r=10;do{r=r*10|0;p=p+1|0}while((p|0)!=9)}else r=10;n=c[t>>2]|0;p=(n>>>0)%(r>>>0)|0;if(!((p|0)==0?(F+(y+-1022<<2)|0)==(u|0):0))L=163;do if((L|0)==163){L=0;q=(((n>>>0)/(r>>>0)|0)&1|0)==0?9007199254740992.0:9007199254740994.0;x=(r|0)/2|0;do if(p>>>0>>0)w=.5;else{if((p|0)==(x|0)?(F+(y+-1022<<2)|0)==(u|0):0){w=1.0;break}w=1.5}while(0);do if(G){if((a[H>>0]|0)!=45)break;q=-q;w=-w}while(0);p=n-p|0;c[t>>2]=p;if(!(q+w!=q))break;s=p+r|0;c[t>>2]=s;if(s>>>0>999999999){s=v;while(1){r=t+-4|0;c[t>>2]=0;if(r>>>0>>0){s=s+-4|0;c[s>>2]=0}v=(c[r>>2]|0)+1|0;c[r>>2]=v;if(v>>>0>999999999)t=r;else{v=s;t=r;break}}}s=(D-v>>2)*9|0;p=c[v>>2]|0;if(p>>>0<10)break;else r=10;do{r=r*10|0;s=s+1|0}while(p>>>0>=r>>>0)}while(0);t=t+4|0;u=u>>>0>t>>>0?t:u}y=0-s|0;while(1){if(u>>>0<=v>>>0){A=0;break}t=u+-4|0;if(!(c[t>>2]|0))u=t;else{A=1;break}}do if(z){t=(B&1^1)+o|0;if((t|0)>(s|0)&(s|0)>-5){x=E+-1|0;o=t+-1-s|0}else{x=E+-2|0;o=t+-1|0}t=I&8;if(t){r=t;break}do if(A){t=c[u+-4>>2]|0;if(!t){r=9;break}if(!((t>>>0)%10|0)){p=10;r=0}else{r=0;break}do{p=p*10|0;r=r+1|0}while(((t>>>0)%(p>>>0)|0|0)==0)}else r=9;while(0);t=((u-D>>2)*9|0)+-9|0;if((x|32|0)==102){r=t-r|0;r=(r|0)<0?0:r;o=(o|0)<(r|0)?o:r;r=0;break}else{r=t+s-r|0;r=(r|0)<0?0:r;o=(o|0)<(r|0)?o:r;r=0;break}}else{x=E;r=I&8}while(0);z=o|r;n=(z|0)!=0&1;p=(x|32|0)==102;if(p){t=(s|0)>0?s:0;y=0}else{t=(s|0)<0?y:s;t=Py(t,((t|0)<0)<<31>>31,X)|0;if(($-t|0)<2)do{t=t+-1|0;a[t>>0]=48}while(($-t|0)<2);a[t+-1>>0]=(s>>31&2)+43;y=t+-2|0;a[y>>0]=x;t=$-y|0}n=G+1+o+n+t|0;Qy(e,32,K,n,I);if(!(c[e>>2]&32))By(H,G,e)|0;Qy(e,48,K,n,I^65536);do if(p){v=v>>>0>F>>>0?F:v;r=v;do{s=Py(c[r>>2]|0,0,R)|0;do if((r|0)==(v|0)){if((s|0)!=(R|0))break;a[T>>0]=48;s=T}else{if(s>>>0<=da>>>0)break;do{s=s+-1|0;a[s>>0]=48}while(s>>>0>da>>>0)}while(0);if(!(c[e>>2]&32))By(s,S-s|0,e)|0;r=r+4|0}while(r>>>0<=F>>>0);do if(z){if(c[e>>2]&32)break;By(33111,1,e)|0}while(0);if((o|0)>0&r>>>0>>0){p=o;s=r;while(1){r=Py(c[s>>2]|0,0,R)|0;if(r>>>0>da>>>0)do{r=r+-1|0;a[r>>0]=48}while(r>>>0>da>>>0);if(!(c[e>>2]&32))By(r,(p|0)>9?9:p,e)|0;s=s+4|0;o=p+-9|0;if(!((p|0)>9&s>>>0>>0))break;else p=o}}Qy(e,48,o+9|0,9,0)}else{x=A?u:v+4|0;if((o|0)>-1){t=(r|0)==0;u=v;do{s=Py(c[u>>2]|0,0,R)|0;if((s|0)==(R|0)){a[T>>0]=48;s=T}do if((u|0)==(v|0)){r=s+1|0;if(!(c[e>>2]&32))By(s,1,e)|0;if(t&(o|0)<1){s=r;break}if(c[e>>2]&32){s=r;break}By(33111,1,e)|0;s=r}else{if(s>>>0<=da>>>0)break;do{s=s+-1|0;a[s>>0]=48}while(s>>>0>da>>>0)}while(0);r=S-s|0;if(!(c[e>>2]&32))By(s,(o|0)>(r|0)?r:o,e)|0;o=o-r|0;u=u+4|0}while(u>>>0>>0&(o|0)>-1)}Qy(e,48,o+18|0,18,0);if(c[e>>2]&32)break;By(y,$-y|0,e)|0}while(0);Qy(e,32,K,n,I^8192);n=(n|0)<(K|0)?K:n}else{o=(E&32|0)!=0;n=q!=q|0.0!=0.0;r=n?0:G;s=r+3|0;Qy(e,32,K,s,p);p=c[e>>2]|0;if(!(p&32)){By(H,r,e)|0;p=c[e>>2]|0}if(!(p&32))By(n?(o?33103:33107):o?33095:33099,3,e)|0;Qy(e,32,K,s,I^8192);n=(s|0)<(K|0)?K:s}while(0);x=J;continue a}default:{p=I;n=t;v=0;u=33059;r=N}}while(0);g:do if((L|0)==64){p=ba;r=c[p>>2]|0;p=c[p+4>>2]|0;s=u&32;if(!((r|0)==0&(p|0)==0)){n=N;do{n=n+-1|0;a[n>>0]=d[33043+(r&15)>>0]|s;r=Wy(r|0,p|0,4)|0;p=C}while(!((r|0)==0&(p|0)==0));L=ba;if((v&8|0)==0|(c[L>>2]|0)==0&(c[L+4>>2]|0)==0){r=v;s=0;o=33059;L=77}else{r=v;s=2;o=33059+(u>>4)|0;L=77}}else{n=N;r=v;s=0;o=33059;L=77}}else if((L|0)==76){n=Py(o,n,N)|0;r=I;o=p;L=77}else if((L|0)==82){L=0;s=Hy(r,0,t)|0;o=(s|0)==0;x=r;n=o?t:s-r|0;v=0;u=33059;r=o?r+t|0:s}else if((L|0)==86){L=0;p=0;o=0;s=c[ba>>2]|0;while(1){r=c[s>>2]|0;if(!r)break;o=ny(fa,r)|0;if((o|0)<0|o>>>0>(n-p|0)>>>0)break;p=o+p|0;if(n>>>0>p>>>0)s=s+4|0;else break}if((o|0)<0){m=-1;break a}Qy(e,32,K,p,I);if(!p){o=0;L=98}else{n=0;r=c[ba>>2]|0;while(1){o=c[r>>2]|0;if(!o){o=p;L=98;break g}o=ny(fa,o)|0;n=o+n|0;if((n|0)>(p|0)){o=p;L=98;break g}if(!(c[e>>2]&32))By(fa,o,e)|0;if(n>>>0>=p>>>0){o=p;L=98;break}else r=r+4|0}}}while(0);if((L|0)==98){L=0;Qy(e,32,K,o,I^8192);x=J;n=(K|0)>(o|0)?K:o;continue}if((L|0)==77){L=0;p=(t|0)>-1?r&-65537:r;r=ba;r=(c[r>>2]|0)!=0|(c[r+4>>2]|0)!=0;if((t|0)!=0|r){v=(r&1^1)+(U-n)|0;x=n;n=(t|0)>(v|0)?t:v;v=s;u=o;r=N}else{x=N;n=0;v=s;u=o;r=N}}s=r-x|0;r=(n|0)<(s|0)?s:n;o=v+r|0;n=(K|0)<(o|0)?o:K;Qy(e,32,n,o,p);if(!(c[e>>2]&32))By(u,v,e)|0;Qy(e,48,n,o,p^65536);Qy(e,48,r,s,0);if(!(c[e>>2]&32))By(x,s,e)|0;Qy(e,32,n,o,p^8192);x=J}h:do if((L|0)==245)if(!e)if(f){m=1;while(1){f=c[l+(m<<2)>>2]|0;if(!f)break;Oy(j+(m<<3)|0,f,g);m=m+1|0;if((m|0)>=10){m=1;break h}}if((m|0)<10)while(1){if(c[l+(m<<2)>>2]|0){m=-1;break h}m=m+1|0;if((m|0)>=10){m=1;break}}else m=1}else m=0;while(0);i=ha;return m|0}function Ly(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;p=i;i=i+240|0;o=p;c[o>>2]=a;a:do if((e|0)>1){n=0-b|0;g=e;h=a;k=a;l=1;while(1){a=h+n|0;m=g+-2|0;j=h+(0-((c[f+(m<<2)>>2]|0)+b))|0;if((jb[d&15](k,j)|0)>-1?(jb[d&15](k,a)|0)>-1:0){e=l;break a}e=l+1|0;h=o+(l<<2)|0;if((jb[d&15](j,a)|0)>-1){c[h>>2]=j;a=j;g=g+-1|0}else{c[h>>2]=a;g=m}if((g|0)<=1)break a;h=a;k=c[o>>2]|0;l=e}}else e=1;while(0);Ny(b,o,e);i=p;return}function My(a,b,d,e,f,g,h){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;var j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0;r=i;i=i+240|0;p=r;l=c[e>>2]|0;k=c[e+4>>2]|0;c[p>>2]=a;o=0-b|0;a:do if((k|0)!=0|(l|0)!=1?(j=a+(0-(c[h+(f<<2)>>2]|0))|0,(jb[d&15](j,a)|0)>=1):0){g=(g|0)==0;m=l;n=k;l=1;while(1){if(g&(f|0)>1){g=c[h+(f+-2<<2)>>2]|0;if((jb[d&15](a+o|0,j)|0)>-1){j=a;g=f;e=l;q=20;break a}if((jb[d&15](a+(0-(g+b))|0,j)|0)>-1){j=a;g=f;e=l;q=20;break a}}e=l+1|0;c[p+(l<<2)>>2]=j;g=m+-1|0;do if(g){if(!(g&1)){a=g;g=0;do{g=g+1|0;a=a>>>1}while((a&1|0)==0);if(!g)q=11}else q=11;if((q|0)==11){q=0;if(!n){g=64;q=16;break}if(!(n&1)){g=n;a=0}else{l=0;k=m;a=n;g=0;break}while(1){k=a+1|0;g=g>>>1;if(g&1){g=k;break}else a=k}if(!g){l=0;k=m;a=n;g=0;break}else g=a+33|0}if(g>>>0>31)q=16;else{l=g;k=m;a=n}}else{g=32;q=16}while(0);if((q|0)==16){q=0;l=g+-32|0;k=n;a=0}m=a<<32-l|k>>>l;n=a>>>l;g=g+f|0;if(!((n|0)!=0|(m|0)!=1)){q=20;break a}k=j+(0-(c[h+(g<<2)>>2]|0))|0;if((jb[d&15](k,c[p>>2]|0)|0)<1){f=g;g=0;q=19;break}else{a=j;f=g;g=1;j=k;l=e}}}else{j=a;e=1;q=19}while(0);if((q|0)==19?(g|0)==0:0){g=f;q=20}if((q|0)==20){Ny(b,p,e);Ly(j,b,d,g,h)}i=r;return}function Ny(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0,h=0,j=0;h=i;i=i+256|0;e=h;a:do if((d|0)>=2?(g=b+(d<<2)|0,c[g>>2]=e,(a|0)!=0):0)while(1){f=a>>>0>256?256:a;Zy(e|0,c[b>>2]|0,f|0)|0;e=0;do{j=b+(e<<2)|0;e=e+1|0;Zy(c[j>>2]|0,c[b+(e<<2)>>2]|0,f|0)|0;c[j>>2]=(c[j>>2]|0)+f}while((e|0)!=(d|0));if((a|0)==(f|0))break a;a=a-f|0;e=c[g>>2]|0}while(0);i=h;return}function Oy(a,b,d){a=a|0;b=b|0;d=d|0;var e=0,f=0,g=0.0;a:do if(b>>>0<=20)do switch(b|0){case 9:{e=(c[d>>2]|0)+(4-1)&~(4-1);b=c[e>>2]|0;c[d>>2]=e+4;c[a>>2]=b;break a}case 10:{e=(c[d>>2]|0)+(4-1)&~(4-1);b=c[e>>2]|0;c[d>>2]=e+4;d=a;c[d>>2]=b;c[d+4>>2]=((b|0)<0)<<31>>31;break a}case 11:{e=(c[d>>2]|0)+(4-1)&~(4-1);b=c[e>>2]|0;c[d>>2]=e+4;d=a;c[d>>2]=b;c[d+4>>2]=0;break a}case 12:{f=(c[d>>2]|0)+(8-1)&~(8-1);b=f;e=c[b>>2]|0;b=c[b+4>>2]|0;c[d>>2]=f+8;d=a;c[d>>2]=e;c[d+4>>2]=b;break a}case 13:{e=(c[d>>2]|0)+(4-1)&~(4-1);b=c[e>>2]|0;c[d>>2]=e+4;b=(b&65535)<<16>>16;e=a;c[e>>2]=b;c[e+4>>2]=((b|0)<0)<<31>>31;break a}case 14:{e=(c[d>>2]|0)+(4-1)&~(4-1);b=c[e>>2]|0;c[d>>2]=e+4;e=a;c[e>>2]=b&65535;c[e+4>>2]=0;break a}case 15:{e=(c[d>>2]|0)+(4-1)&~(4-1);b=c[e>>2]|0;c[d>>2]=e+4;b=(b&255)<<24>>24;e=a;c[e>>2]=b;c[e+4>>2]=((b|0)<0)<<31>>31;break a}case 16:{e=(c[d>>2]|0)+(4-1)&~(4-1);b=c[e>>2]|0;c[d>>2]=e+4;e=a;c[e>>2]=b&255;c[e+4>>2]=0;break a}case 17:{e=(c[d>>2]|0)+(8-1)&~(8-1);g=+h[e>>3];c[d>>2]=e+8;h[a>>3]=g;break a}case 18:{e=(c[d>>2]|0)+(8-1)&~(8-1);g=+h[e>>3];c[d>>2]=e+8;h[a>>3]=g;break a}default:break a}while(0);while(0);return}function Py(b,c,d){b=b|0;c=c|0;d=d|0;var e=0;if(c>>>0>0|(c|0)==0&b>>>0>4294967295){e=b;while(1){b=gz(e|0,c|0,10,0)|0;d=d+-1|0;a[d>>0]=b|48;b=fz(e|0,c|0,10,0)|0;if(c>>>0>9|(c|0)==9&e>>>0>4294967295){e=b;c=C}else break}}if(b)while(1){d=d+-1|0;a[d>>0]=(b>>>0)%10|0|48;if(b>>>0<10)break;else b=(b>>>0)/10|0}return d|0}function Qy(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,j=0;j=i;i=i+256|0;h=j;do if((d|0)>(e|0)&(f&73728|0)==0){g=d-e|0;Vy(h|0,b|0,(g>>>0>256?256:g)|0)|0;f=c[a>>2]|0;b=(f&32|0)==0;if(g>>>0>255){d=d-e|0;do{if(b){By(h,256,a)|0;f=c[a>>2]|0}g=g+-256|0;b=(f&32|0)==0}while(g>>>0>255);if(b)g=d&255;else break}else if(!b)break;By(h,g,a)|0}while(0);i=j;return}function Ry(a){a=a|0;var b=0,d=0,e=0,f=0,g=0,h=0,i=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,A=0,B=0,C=0,D=0,E=0,F=0,G=0,H=0,I=0,J=0,K=0,L=0,M=0,N=0;do if(a>>>0<245){q=a>>>0<11?16:a+11&-8;a=q>>>3;l=c[4331]|0;j=l>>>a;if(j&3){e=(j&1^1)+a|0;f=e<<1;b=17364+(f<<2)|0;f=17364+(f+2<<2)|0;g=c[f>>2]|0;h=g+8|0;i=c[h>>2]|0;do if((b|0)!=(i|0)){if(i>>>0<(c[4335]|0)>>>0)Ba();d=i+12|0;if((c[d>>2]|0)==(g|0)){c[d>>2]=b;c[f>>2]=i;break}else Ba()}else c[4331]=l&~(1<>2]=w|3;w=g+(w|4)|0;c[w>>2]=c[w>>2]|1;w=h;return w|0}f=c[4333]|0;if(q>>>0>f>>>0){if(j){b=2<>>12&16;b=b>>>a;d=b>>>5&8;b=b>>>d;e=b>>>2&4;b=b>>>e;g=b>>>1&2;b=b>>>g;h=b>>>1&1;h=(d|a|e|g|h)+(b>>>h)|0;b=h<<1;g=17364+(b<<2)|0;b=17364+(b+2<<2)|0;e=c[b>>2]|0;a=e+8|0;d=c[a>>2]|0;do if((g|0)!=(d|0)){if(d>>>0<(c[4335]|0)>>>0)Ba();i=d+12|0;if((c[i>>2]|0)==(e|0)){c[i>>2]=g;c[b>>2]=d;k=c[4333]|0;break}else Ba()}else{c[4331]=l&~(1<>2]=q|3;j=e+q|0;c[e+(q|4)>>2]=f|1;c[e+w>>2]=f;if(k){d=c[4336]|0;g=k>>>3;i=g<<1;b=17364+(i<<2)|0;h=c[4331]|0;g=1<>2]|0;if(i>>>0<(c[4335]|0)>>>0)Ba();else{m=h;n=i}}else{c[4331]=h|g;m=17364+(i+2<<2)|0;n=b}c[m>>2]=d;c[n+12>>2]=d;c[d+8>>2]=n;c[d+12>>2]=b}c[4333]=f;c[4336]=j;w=a;return w|0}a=c[4332]|0;if(a){h=(a&0-a)+-1|0;v=h>>>12&16;h=h>>>v;u=h>>>5&8;h=h>>>u;w=h>>>2&4;h=h>>>w;i=h>>>1&2;h=h>>>i;g=h>>>1&1;g=c[17628+((u|v|w|i|g)+(h>>>g)<<2)>>2]|0;h=(c[g+4>>2]&-8)-q|0;i=g;while(1){d=c[i+16>>2]|0;if(!d){d=c[i+20>>2]|0;if(!d){l=h;k=g;break}}i=(c[d+4>>2]&-8)-q|0;w=i>>>0>>0;h=w?i:h;i=d;g=w?d:g}a=c[4335]|0;if(k>>>0>>0)Ba();f=k+q|0;if(k>>>0>=f>>>0)Ba();j=c[k+24>>2]|0;g=c[k+12>>2]|0;do if((g|0)==(k|0)){h=k+20|0;i=c[h>>2]|0;if(!i){h=k+16|0;i=c[h>>2]|0;if(!i){e=0;break}}while(1){g=i+20|0;b=c[g>>2]|0;if(b){i=b;h=g;continue}g=i+16|0;b=c[g>>2]|0;if(!b)break;else{i=b;h=g}}if(h>>>0>>0)Ba();else{c[h>>2]=0;e=i;break}}else{b=c[k+8>>2]|0;if(b>>>0>>0)Ba();i=b+12|0;if((c[i>>2]|0)!=(k|0))Ba();h=g+8|0;if((c[h>>2]|0)==(k|0)){c[i>>2]=g;c[h>>2]=b;e=g;break}else Ba()}while(0);do if(j){i=c[k+28>>2]|0;h=17628+(i<<2)|0;if((k|0)==(c[h>>2]|0)){c[h>>2]=e;if(!e){c[4332]=c[4332]&~(1<>>0<(c[4335]|0)>>>0)Ba();i=j+16|0;if((c[i>>2]|0)==(k|0))c[i>>2]=e;else c[j+20>>2]=e;if(!e)break}h=c[4335]|0;if(e>>>0>>0)Ba();c[e+24>>2]=j;i=c[k+16>>2]|0;do if(i)if(i>>>0>>0)Ba();else{c[e+16>>2]=i;c[i+24>>2]=e;break}while(0);i=c[k+20>>2]|0;if(i)if(i>>>0<(c[4335]|0)>>>0)Ba();else{c[e+20>>2]=i;c[i+24>>2]=e;break}}while(0);if(l>>>0<16){w=l+q|0;c[k+4>>2]=w|3;w=k+(w+4)|0;c[w>>2]=c[w>>2]|1}else{c[k+4>>2]=q|3;c[k+(q|4)>>2]=l|1;c[k+(l+q)>>2]=l;d=c[4333]|0;if(d){e=c[4336]|0;g=d>>>3;i=g<<1;b=17364+(i<<2)|0;h=c[4331]|0;g=1<>2]|0;if(h>>>0<(c[4335]|0)>>>0)Ba();else{p=i;o=h}}else{c[4331]=h|g;p=17364+(i+2<<2)|0;o=b}c[p>>2]=e;c[o+12>>2]=e;c[e+8>>2]=o;c[e+12>>2]=b}c[4333]=l;c[4336]=f}w=k+8|0;return w|0}else z=q}else z=q}else if(a>>>0<=4294967231){a=a+11|0;p=a&-8;k=c[4332]|0;if(k){j=0-p|0;a=a>>>8;if(a)if(p>>>0>16777215)l=31;else{q=(a+1048320|0)>>>16&8;w=a<>>16&4;w=w<>>16&2;l=14-(o|q|l)+(w<>>15)|0;l=p>>>(l+7|0)&1|l<<1}else l=0;a=c[17628+(l<<2)>>2]|0;a:do if(!a){h=0;a=0;w=86}else{d=j;h=0;e=p<<((l|0)==31?0:25-(l>>>1)|0);f=a;a=0;while(1){g=c[f+4>>2]&-8;j=g-p|0;if(j>>>0>>0)if((g|0)==(p|0)){g=f;a=f;w=90;break a}else a=f;else j=d;w=c[f+20>>2]|0;f=c[f+16+(e>>>31<<2)>>2]|0;h=(w|0)==0|(w|0)==(f|0)?h:w;if(!f){w=86;break}else{d=j;e=e<<1}}}while(0);if((w|0)==86){if((h|0)==0&(a|0)==0){a=2<>>12&16;a=a>>>n;m=a>>>5&8;a=a>>>m;o=a>>>2&4;a=a>>>o;q=a>>>1&2;a=a>>>q;h=a>>>1&1;h=c[17628+((m|n|o|q|h)+(a>>>h)<<2)>>2]|0;a=0}if(!h){n=j;q=a}else{g=h;w=90}}if((w|0)==90)while(1){w=0;q=(c[g+4>>2]&-8)-p|0;h=q>>>0>>0;j=h?q:j;a=h?g:a;h=c[g+16>>2]|0;if(h){g=h;w=90;continue}g=c[g+20>>2]|0;if(!g){n=j;q=a;break}else w=90}if((q|0)!=0?n>>>0<((c[4333]|0)-p|0)>>>0:0){a=c[4335]|0;if(q>>>0>>0)Ba();m=q+p|0;if(q>>>0>=m>>>0)Ba();j=c[q+24>>2]|0;g=c[q+12>>2]|0;do if((g|0)==(q|0)){h=q+20|0;i=c[h>>2]|0;if(!i){h=q+16|0;i=c[h>>2]|0;if(!i){s=0;break}}while(1){g=i+20|0;b=c[g>>2]|0;if(b){i=b;h=g;continue}g=i+16|0;b=c[g>>2]|0;if(!b)break;else{i=b;h=g}}if(h>>>0>>0)Ba();else{c[h>>2]=0;s=i;break}}else{b=c[q+8>>2]|0;if(b>>>0>>0)Ba();i=b+12|0;if((c[i>>2]|0)!=(q|0))Ba();h=g+8|0;if((c[h>>2]|0)==(q|0)){c[i>>2]=g;c[h>>2]=b;s=g;break}else Ba()}while(0);do if(j){i=c[q+28>>2]|0;h=17628+(i<<2)|0;if((q|0)==(c[h>>2]|0)){c[h>>2]=s;if(!s){c[4332]=c[4332]&~(1<>>0<(c[4335]|0)>>>0)Ba();i=j+16|0;if((c[i>>2]|0)==(q|0))c[i>>2]=s;else c[j+20>>2]=s;if(!s)break}h=c[4335]|0;if(s>>>0>>0)Ba();c[s+24>>2]=j;i=c[q+16>>2]|0;do if(i)if(i>>>0>>0)Ba();else{c[s+16>>2]=i;c[i+24>>2]=s;break}while(0);i=c[q+20>>2]|0;if(i)if(i>>>0<(c[4335]|0)>>>0)Ba();else{c[s+20>>2]=i;c[i+24>>2]=s;break}}while(0);b:do if(n>>>0>=16){c[q+4>>2]=p|3;c[q+(p|4)>>2]=n|1;c[q+(n+p)>>2]=n;i=n>>>3;if(n>>>0<256){h=i<<1;b=17364+(h<<2)|0;g=c[4331]|0;i=1<>2]|0;if(h>>>0<(c[4335]|0)>>>0)Ba();else{t=i;u=h}}else{c[4331]=g|i;t=17364+(h+2<<2)|0;u=b}c[t>>2]=m;c[u+12>>2]=m;c[q+(p+8)>>2]=u;c[q+(p+12)>>2]=b;break}d=n>>>8;if(d)if(n>>>0>16777215)b=31;else{v=(d+1048320|0)>>>16&8;w=d<>>16&4;w=w<>>16&2;b=14-(u|v|b)+(w<>>15)|0;b=n>>>(b+7|0)&1|b<<1}else b=0;i=17628+(b<<2)|0;c[q+(p+28)>>2]=b;c[q+(p+20)>>2]=0;c[q+(p+16)>>2]=0;h=c[4332]|0;g=1<>2]=m;c[q+(p+24)>>2]=i;c[q+(p+12)>>2]=m;c[q+(p+8)>>2]=m;break}d=c[i>>2]|0;c:do if((c[d+4>>2]&-8|0)!=(n|0)){h=n<<((b|0)==31?0:25-(b>>>1)|0);while(1){b=d+16+(h>>>31<<2)|0;i=c[b>>2]|0;if(!i)break;if((c[i+4>>2]&-8|0)==(n|0)){z=i;break c}else{h=h<<1;d=i}}if(b>>>0<(c[4335]|0)>>>0)Ba();else{c[b>>2]=m;c[q+(p+24)>>2]=d;c[q+(p+12)>>2]=m;c[q+(p+8)>>2]=m;break b}}else z=d;while(0);d=z+8|0;b=c[d>>2]|0;w=c[4335]|0;if(b>>>0>=w>>>0&z>>>0>=w>>>0){c[b+12>>2]=m;c[d>>2]=m;c[q+(p+8)>>2]=b;c[q+(p+12)>>2]=z;c[q+(p+24)>>2]=0;break}else Ba()}else{w=n+p|0;c[q+4>>2]=w|3;w=q+(w+4)|0;c[w>>2]=c[w>>2]|1}while(0);w=q+8|0;return w|0}else z=p}else z=p}else z=-1;while(0);a=c[4333]|0;if(a>>>0>=z>>>0){b=a-z|0;d=c[4336]|0;if(b>>>0>15){c[4336]=d+z;c[4333]=b;c[d+(z+4)>>2]=b|1;c[d+a>>2]=b;c[d+4>>2]=z|3}else{c[4333]=0;c[4336]=0;c[d+4>>2]=a|3;w=d+(a+4)|0;c[w>>2]=c[w>>2]|1}w=d+8|0;return w|0}a=c[4334]|0;if(a>>>0>z>>>0){v=a-z|0;c[4334]=v;w=c[4337]|0;c[4337]=w+z;c[w+(z+4)>>2]=v|1;c[w+4>>2]=z|3;w=w+8|0;return w|0}do if(!(c[4449]|0)){a=Ga(30)|0;if(!(a+-1&a)){c[4451]=a;c[4450]=a;c[4452]=-1;c[4453]=-1;c[4454]=0;c[4442]=0;c[4449]=(Pa(0)|0)&-16^1431655768;break}else Ba()}while(0);l=z+48|0;e=c[4451]|0;f=z+47|0;d=e+f|0;e=0-e|0;m=d&e;if(m>>>0<=z>>>0){w=0;return w|0}a=c[4441]|0;if((a|0)!=0?(t=c[4439]|0,u=t+m|0,u>>>0<=t>>>0|u>>>0>a>>>0):0){w=0;return w|0}d:do if(!(c[4442]&4)){a=c[4337]|0;e:do if(a){h=17772;while(1){j=c[h>>2]|0;if(j>>>0<=a>>>0?(r=h+4|0,(j+(c[r>>2]|0)|0)>>>0>a>>>0):0){g=h;a=r;break}h=c[h+8>>2]|0;if(!h){w=174;break e}}j=d-(c[4334]|0)&e;if(j>>>0<2147483647){h=Ea(j|0)|0;u=(h|0)==((c[g>>2]|0)+(c[a>>2]|0)|0);a=u?j:0;if(u){if((h|0)!=(-1|0)){x=h;w=194;break d}}else w=184}else a=0}else w=174;while(0);do if((w|0)==174){g=Ea(0)|0;if((g|0)!=(-1|0)){a=g;j=c[4450]|0;h=j+-1|0;if(!(h&a))j=m;else j=m-a+(h+a&0-j)|0;a=c[4439]|0;h=a+j|0;if(j>>>0>z>>>0&j>>>0<2147483647){u=c[4441]|0;if((u|0)!=0?h>>>0<=a>>>0|h>>>0>u>>>0:0){a=0;break}h=Ea(j|0)|0;w=(h|0)==(g|0);a=w?j:0;if(w){x=g;w=194;break d}else w=184}else a=0}else a=0}while(0);f:do if((w|0)==184){g=0-j|0;do if(l>>>0>j>>>0&(j>>>0<2147483647&(h|0)!=(-1|0))?(v=c[4451]|0,v=f-j+v&0-v,v>>>0<2147483647):0)if((Ea(v|0)|0)==(-1|0)){Ea(g|0)|0;break f}else{j=v+j|0;break}while(0);if((h|0)!=(-1|0)){x=h;a=j;w=194;break d}}while(0);c[4442]=c[4442]|4;w=191}else{a=0;w=191}while(0);if((((w|0)==191?m>>>0<2147483647:0)?(x=Ea(m|0)|0,y=Ea(0)|0,x>>>0>>0&((x|0)!=(-1|0)&(y|0)!=(-1|0))):0)?(A=y-x|0,B=A>>>0>(z+40|0)>>>0,B):0){a=B?A:a;w=194}if((w|0)==194){j=(c[4439]|0)+a|0;c[4439]=j;if(j>>>0>(c[4440]|0)>>>0)c[4440]=j;n=c[4337]|0;g:do if(n){d=17772;do{j=c[d>>2]|0;h=d+4|0;g=c[h>>2]|0;if((x|0)==(j+g|0)){C=j;D=h;E=g;F=d;w=204;break}d=c[d+8>>2]|0}while((d|0)!=0);if(((w|0)==204?(c[F+12>>2]&8|0)==0:0)?n>>>0>>0&n>>>0>=C>>>0:0){c[D>>2]=E+a;w=(c[4334]|0)+a|0;v=n+8|0;v=(v&7|0)==0?0:0-v&7;u=w-v|0;c[4337]=n+v;c[4334]=u;c[n+(v+4)>>2]=u|1;c[n+(w+4)>>2]=40;c[4338]=c[4453];break}j=c[4335]|0;if(x>>>0>>0){c[4335]=x;j=x}h=x+a|0;d=17772;while(1){if((c[d>>2]|0)==(h|0)){g=d;h=d;w=212;break}d=c[d+8>>2]|0;if(!d){g=17772;break}}if((w|0)==212)if(!(c[h+12>>2]&8)){c[g>>2]=x;p=h+4|0;c[p>>2]=(c[p>>2]|0)+a;p=x+8|0;p=(p&7|0)==0?0:0-p&7;k=x+(a+8)|0;k=(k&7|0)==0?0:0-k&7;i=x+(k+a)|0;o=p+z|0;q=x+o|0;m=i-(x+p)-z|0;c[x+(p+4)>>2]=z|3;h:do if((i|0)!=(n|0)){if((i|0)==(c[4336]|0)){w=(c[4333]|0)+m|0;c[4333]=w;c[4336]=q;c[x+(o+4)>>2]=w|1;c[x+(w+o)>>2]=w;break}l=a+4|0;h=c[x+(l+k)>>2]|0;if((h&3|0)==1){f=h&-8;d=h>>>3;i:do if(h>>>0>=256){e=c[x+((k|24)+a)>>2]|0;g=c[x+(a+12+k)>>2]|0;do if((g|0)==(i|0)){b=k|16;g=x+(l+b)|0;h=c[g>>2]|0;if(!h){g=x+(b+a)|0;h=c[g>>2]|0;if(!h){K=0;break}}while(1){b=h+20|0;d=c[b>>2]|0;if(d){h=d;g=b;continue}b=h+16|0;d=c[b>>2]|0;if(!d)break;else{h=d;g=b}}if(g>>>0>>0)Ba();else{c[g>>2]=0;K=h;break}}else{b=c[x+((k|8)+a)>>2]|0;if(b>>>0>>0)Ba();j=b+12|0;if((c[j>>2]|0)!=(i|0))Ba();h=g+8|0;if((c[h>>2]|0)==(i|0)){c[j>>2]=g;c[h>>2]=b;K=g;break}else Ba()}while(0);if(!e)break;j=c[x+(a+28+k)>>2]|0;h=17628+(j<<2)|0;do if((i|0)!=(c[h>>2]|0)){if(e>>>0<(c[4335]|0)>>>0)Ba();j=e+16|0;if((c[j>>2]|0)==(i|0))c[j>>2]=K;else c[e+20>>2]=K;if(!K)break i}else{c[h>>2]=K;if(K)break;c[4332]=c[4332]&~(1<>>0>>0)Ba();c[K+24>>2]=e;j=k|16;i=c[x+(j+a)>>2]|0;do if(i)if(i>>>0>>0)Ba();else{c[K+16>>2]=i;c[i+24>>2]=K;break}while(0);i=c[x+(l+j)>>2]|0;if(!i)break;if(i>>>0<(c[4335]|0)>>>0)Ba();else{c[K+20>>2]=i;c[i+24>>2]=K;break}}else{g=c[x+((k|8)+a)>>2]|0;b=c[x+(a+12+k)>>2]|0;h=17364+(d<<1<<2)|0;do if((g|0)!=(h|0)){if(g>>>0>>0)Ba();if((c[g+12>>2]|0)==(i|0))break;Ba()}while(0);if((b|0)==(g|0)){c[4331]=c[4331]&~(1<>>0>>0)Ba();j=b+8|0;if((c[j>>2]|0)==(i|0)){G=j;break}Ba()}while(0);c[g+12>>2]=b;c[G>>2]=g}while(0);i=x+((f|k)+a)|0;j=f+m|0}else j=m;i=i+4|0;c[i>>2]=c[i>>2]&-2;c[x+(o+4)>>2]=j|1;c[x+(j+o)>>2]=j;i=j>>>3;if(j>>>0<256){h=i<<1;b=17364+(h<<2)|0;g=c[4331]|0;i=1<>2]|0;if(h>>>0>=(c[4335]|0)>>>0){L=i;M=h;break}Ba()}while(0);c[L>>2]=q;c[M+12>>2]=q;c[x+(o+8)>>2]=M;c[x+(o+12)>>2]=b;break}d=j>>>8;do if(!d)b=0;else{if(j>>>0>16777215){b=31;break}v=(d+1048320|0)>>>16&8;w=d<>>16&4;w=w<>>16&2;b=14-(u|v|b)+(w<>>15)|0;b=j>>>(b+7|0)&1|b<<1}while(0);i=17628+(b<<2)|0;c[x+(o+28)>>2]=b;c[x+(o+20)>>2]=0;c[x+(o+16)>>2]=0;h=c[4332]|0;g=1<>2]=q;c[x+(o+24)>>2]=i;c[x+(o+12)>>2]=q;c[x+(o+8)>>2]=q;break}d=c[i>>2]|0;j:do if((c[d+4>>2]&-8|0)!=(j|0)){h=j<<((b|0)==31?0:25-(b>>>1)|0);while(1){b=d+16+(h>>>31<<2)|0;i=c[b>>2]|0;if(!i)break;if((c[i+4>>2]&-8|0)==(j|0)){N=i;break j}else{h=h<<1;d=i}}if(b>>>0<(c[4335]|0)>>>0)Ba();else{c[b>>2]=q;c[x+(o+24)>>2]=d;c[x+(o+12)>>2]=q;c[x+(o+8)>>2]=q;break h}}else N=d;while(0);d=N+8|0;b=c[d>>2]|0;w=c[4335]|0;if(b>>>0>=w>>>0&N>>>0>=w>>>0){c[b+12>>2]=q;c[d>>2]=q;c[x+(o+8)>>2]=b;c[x+(o+12)>>2]=N;c[x+(o+24)>>2]=0;break}else Ba()}else{w=(c[4334]|0)+m|0;c[4334]=w;c[4337]=q;c[x+(o+4)>>2]=w|1}while(0);w=x+(p|8)|0;return w|0}else g=17772;while(1){h=c[g>>2]|0;if(h>>>0<=n>>>0?(i=c[g+4>>2]|0,b=h+i|0,b>>>0>n>>>0):0)break;g=c[g+8>>2]|0}j=h+(i+-39)|0;h=h+(i+-47+((j&7|0)==0?0:0-j&7))|0;j=n+16|0;h=h>>>0>>0?n:h;i=h+8|0;g=x+8|0;g=(g&7|0)==0?0:0-g&7;w=a+-40-g|0;c[4337]=x+g;c[4334]=w;c[x+(g+4)>>2]=w|1;c[x+(a+-36)>>2]=40;c[4338]=c[4453];g=h+4|0;c[g>>2]=27;c[i>>2]=c[4443];c[i+4>>2]=c[4444];c[i+8>>2]=c[4445];c[i+12>>2]=c[4446];c[4443]=x;c[4444]=a;c[4446]=0;c[4445]=i;i=h+28|0;c[i>>2]=7;if((h+32|0)>>>0>>0)do{w=i;i=i+4|0;c[i>>2]=7}while((w+8|0)>>>0>>0);if((h|0)!=(n|0)){f=h-n|0;c[g>>2]=c[g>>2]&-2;c[n+4>>2]=f|1;c[h>>2]=f;i=f>>>3;if(f>>>0<256){h=i<<1;e=17364+(h<<2)|0;g=c[4331]|0;i=1<>2]|0;if(b>>>0<(c[4335]|0)>>>0)Ba();else{H=d;I=b}}else{c[4331]=g|i;H=17364+(h+2<<2)|0;I=e}c[H>>2]=n;c[I+12>>2]=n;c[n+8>>2]=I;c[n+12>>2]=e;break}d=f>>>8;if(d)if(f>>>0>16777215)h=31;else{v=(d+1048320|0)>>>16&8;w=d<>>16&4;w=w<>>16&2;h=14-(u|v|h)+(w<>>15)|0;h=f>>>(h+7|0)&1|h<<1}else h=0;i=17628+(h<<2)|0;c[n+28>>2]=h;c[n+20>>2]=0;c[j>>2]=0;d=c[4332]|0;b=1<>2]=n;c[n+24>>2]=i;c[n+12>>2]=n;c[n+8>>2]=n;break}d=c[i>>2]|0;k:do if((c[d+4>>2]&-8|0)!=(f|0)){i=f<<((h|0)==31?0:25-(h>>>1)|0);while(1){b=d+16+(i>>>31<<2)|0;e=c[b>>2]|0;if(!e)break;if((c[e+4>>2]&-8|0)==(f|0)){J=e;break k}else{i=i<<1;d=e}}if(b>>>0<(c[4335]|0)>>>0)Ba();else{c[b>>2]=n;c[n+24>>2]=d;c[n+12>>2]=n;c[n+8>>2]=n;break g}}else J=d;while(0);d=J+8|0;b=c[d>>2]|0;w=c[4335]|0;if(b>>>0>=w>>>0&J>>>0>=w>>>0){c[b+12>>2]=n;c[d>>2]=n;c[n+8>>2]=b;c[n+12>>2]=J;c[n+24>>2]=0;break}else Ba()}}else{w=c[4335]|0;if((w|0)==0|x>>>0>>0)c[4335]=x;c[4443]=x;c[4444]=a;c[4446]=0;c[4340]=c[4449];c[4339]=-1;d=0;do{w=d<<1;v=17364+(w<<2)|0;c[17364+(w+3<<2)>>2]=v;c[17364+(w+2<<2)>>2]=v;d=d+1|0}while((d|0)!=32);w=x+8|0;w=(w&7|0)==0?0:0-w&7;v=a+-40-w|0;c[4337]=x+w;c[4334]=v;c[x+(w+4)>>2]=v|1;c[x+(a+-36)>>2]=40;c[4338]=c[4453]}while(0);b=c[4334]|0;if(b>>>0>z>>>0){v=b-z|0;c[4334]=v;w=c[4337]|0;c[4337]=w+z;c[w+(z+4)>>2]=v|1;c[w+4>>2]=z|3;w=w+8|0;return w|0}}c[(py()|0)>>2]=12;w=0;return w|0}function Sy(a){a=a|0;var b=0,d=0,e=0,f=0,g=0,h=0,i=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0;if(!a)return;g=a+-8|0;h=c[4335]|0;if(g>>>0>>0)Ba();b=c[a+-4>>2]|0;d=b&3;if((d|0)==1)Ba();o=b&-8;q=a+(o+-8)|0;do if(!(b&1)){g=c[g>>2]|0;if(!d)return;i=-8-g|0;l=a+i|0;m=g+o|0;if(l>>>0>>0)Ba();if((l|0)==(c[4336]|0)){g=a+(o+-4)|0;b=c[g>>2]|0;if((b&3|0)!=3){u=l;k=m;break}c[4333]=m;c[g>>2]=b&-2;c[a+(i+4)>>2]=m|1;c[q>>2]=m;return}e=g>>>3;if(g>>>0<256){d=c[a+(i+8)>>2]|0;b=c[a+(i+12)>>2]|0;g=17364+(e<<1<<2)|0;if((d|0)!=(g|0)){if(d>>>0>>0)Ba();if((c[d+12>>2]|0)!=(l|0))Ba()}if((b|0)==(d|0)){c[4331]=c[4331]&~(1<>>0>>0)Ba();g=b+8|0;if((c[g>>2]|0)==(l|0))f=g;else Ba()}else f=b+8|0;c[d+12>>2]=b;c[f>>2]=d;u=l;k=m;break}f=c[a+(i+24)>>2]|0;d=c[a+(i+12)>>2]|0;do if((d|0)==(l|0)){b=a+(i+20)|0;g=c[b>>2]|0;if(!g){b=a+(i+16)|0;g=c[b>>2]|0;if(!g){j=0;break}}while(1){d=g+20|0;e=c[d>>2]|0;if(e){g=e;b=d;continue}d=g+16|0;e=c[d>>2]|0;if(!e)break;else{g=e;b=d}}if(b>>>0>>0)Ba();else{c[b>>2]=0;j=g;break}}else{e=c[a+(i+8)>>2]|0;if(e>>>0>>0)Ba();g=e+12|0;if((c[g>>2]|0)!=(l|0))Ba();b=d+8|0;if((c[b>>2]|0)==(l|0)){c[g>>2]=d;c[b>>2]=e;j=d;break}else Ba()}while(0);if(f){g=c[a+(i+28)>>2]|0;b=17628+(g<<2)|0;if((l|0)==(c[b>>2]|0)){c[b>>2]=j;if(!j){c[4332]=c[4332]&~(1<>>0<(c[4335]|0)>>>0)Ba();g=f+16|0;if((c[g>>2]|0)==(l|0))c[g>>2]=j;else c[f+20>>2]=j;if(!j){u=l;k=m;break}}b=c[4335]|0;if(j>>>0>>0)Ba();c[j+24>>2]=f;g=c[a+(i+16)>>2]|0;do if(g)if(g>>>0>>0)Ba();else{c[j+16>>2]=g;c[g+24>>2]=j;break}while(0);g=c[a+(i+20)>>2]|0;if(g)if(g>>>0<(c[4335]|0)>>>0)Ba();else{c[j+20>>2]=g;c[g+24>>2]=j;u=l;k=m;break}else{u=l;k=m}}else{u=l;k=m}}else{u=g;k=o}while(0);if(u>>>0>=q>>>0)Ba();g=a+(o+-4)|0;b=c[g>>2]|0;if(!(b&1))Ba();if(!(b&2)){if((q|0)==(c[4337]|0)){l=(c[4334]|0)+k|0;c[4334]=l;c[4337]=u;c[u+4>>2]=l|1;if((u|0)!=(c[4336]|0))return;c[4336]=0;c[4333]=0;return}if((q|0)==(c[4336]|0)){l=(c[4333]|0)+k|0;c[4333]=l;c[4336]=u;c[u+4>>2]=l|1;c[u+l>>2]=l;return}h=(b&-8)+k|0;e=b>>>3;do if(b>>>0>=256){f=c[a+(o+16)>>2]|0;g=c[a+(o|4)>>2]|0;do if((g|0)==(q|0)){b=a+(o+12)|0;g=c[b>>2]|0;if(!g){b=a+(o+8)|0;g=c[b>>2]|0;if(!g){p=0;break}}while(1){d=g+20|0;e=c[d>>2]|0;if(e){g=e;b=d;continue}d=g+16|0;e=c[d>>2]|0;if(!e)break;else{g=e;b=d}}if(b>>>0<(c[4335]|0)>>>0)Ba();else{c[b>>2]=0;p=g;break}}else{b=c[a+o>>2]|0;if(b>>>0<(c[4335]|0)>>>0)Ba();d=b+12|0;if((c[d>>2]|0)!=(q|0))Ba();e=g+8|0;if((c[e>>2]|0)==(q|0)){c[d>>2]=g;c[e>>2]=b;p=g;break}else Ba()}while(0);if(f){g=c[a+(o+20)>>2]|0;b=17628+(g<<2)|0;if((q|0)==(c[b>>2]|0)){c[b>>2]=p;if(!p){c[4332]=c[4332]&~(1<>>0<(c[4335]|0)>>>0)Ba();g=f+16|0;if((c[g>>2]|0)==(q|0))c[g>>2]=p;else c[f+20>>2]=p;if(!p)break}g=c[4335]|0;if(p>>>0>>0)Ba();c[p+24>>2]=f;f=c[a+(o+8)>>2]|0;do if(f)if(f>>>0>>0)Ba();else{c[p+16>>2]=f;c[f+24>>2]=p;break}while(0);d=c[a+(o+12)>>2]|0;if(d)if(d>>>0<(c[4335]|0)>>>0)Ba();else{c[p+20>>2]=d;c[d+24>>2]=p;break}}}else{d=c[a+o>>2]|0;b=c[a+(o|4)>>2]|0;g=17364+(e<<1<<2)|0;if((d|0)!=(g|0)){if(d>>>0<(c[4335]|0)>>>0)Ba();if((c[d+12>>2]|0)!=(q|0))Ba()}if((b|0)==(d|0)){c[4331]=c[4331]&~(1<>>0<(c[4335]|0)>>>0)Ba();f=b+8|0;if((c[f>>2]|0)==(q|0))n=f;else Ba()}else n=b+8|0;c[d+12>>2]=b;c[n>>2]=d}while(0);c[u+4>>2]=h|1;c[u+h>>2]=h;if((u|0)==(c[4336]|0)){c[4333]=h;return}else g=h}else{c[g>>2]=b&-2;c[u+4>>2]=k|1;c[u+k>>2]=k;g=k}f=g>>>3;if(g>>>0<256){e=f<<1;g=17364+(e<<2)|0;b=c[4331]|0;d=1<>2]|0;if(b>>>0<(c[4335]|0)>>>0)Ba();else{r=d;s=b}}else{c[4331]=b|d;r=17364+(e+2<<2)|0;s=g}c[r>>2]=u;c[s+12>>2]=u;c[u+8>>2]=s;c[u+12>>2]=g;return}b=g>>>8;if(b)if(g>>>0>16777215)f=31;else{k=(b+1048320|0)>>>16&8;l=b<>>16&4;l=l<>>16&2;f=14-(j|k|f)+(l<>>15)|0;f=g>>>(f+7|0)&1|f<<1}else f=0;d=17628+(f<<2)|0;c[u+28>>2]=f;c[u+20>>2]=0;c[u+16>>2]=0;b=c[4332]|0;e=1<>2]|0;b:do if((c[d+4>>2]&-8|0)!=(g|0)){f=g<<((f|0)==31?0:25-(f>>>1)|0);while(1){b=d+16+(f>>>31<<2)|0;e=c[b>>2]|0;if(!e)break;if((c[e+4>>2]&-8|0)==(g|0)){t=e;break b}else{f=f<<1;d=e}}if(b>>>0<(c[4335]|0)>>>0)Ba();else{c[b>>2]=u;c[u+24>>2]=d;c[u+12>>2]=u;c[u+8>>2]=u;break a}}else t=d;while(0);b=t+8|0;d=c[b>>2]|0;l=c[4335]|0;if(d>>>0>=l>>>0&t>>>0>=l>>>0){c[d+12>>2]=u;c[b>>2]=u;c[u+8>>2]=d;c[u+12>>2]=t;c[u+24>>2]=0;break}else Ba()}else{c[4332]=b|e;c[d>>2]=u;c[u+24>>2]=d;c[u+12>>2]=u;c[u+8>>2]=u}while(0);l=(c[4339]|0)+-1|0;c[4339]=l;if(!l)b=17780;else return;while(1){b=c[b>>2]|0;if(!b)break;else b=b+8|0}c[4339]=-1;return}function Ty(){}function Uy(a,b,c,d){a=a|0;b=b|0;c=c|0;d=d|0;b=b-d-(c>>>0>a>>>0|0)>>>0;return (C=b,a-c>>>0|0)|0}function Vy(b,d,e){b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,i=0;f=b+e|0;if((e|0)>=20){d=d&255;h=b&3;i=d|d<<8|d<<16|d<<24;g=f&~3;if(h){h=b+4-h|0;while((b|0)<(h|0)){a[b>>0]=d;b=b+1|0}}while((b|0)<(g|0)){c[b>>2]=i;b=b+4|0}}while((b|0)<(f|0)){a[b>>0]=d;b=b+1|0}return b-e|0}function Wy(a,b,c){a=a|0;b=b|0;c=c|0;if((c|0)<32){C=b>>>c;return a>>>c|(b&(1<>>c-32|0}function Xy(a,b,c){a=a|0;b=b|0;c=c|0;if((c|0)<32){C=b<>>32-c;return a<>>0;return (C=b+d+(c>>>0>>0|0)>>>0,c|0)|0}function Zy(b,d,e){b=b|0;d=d|0;e=e|0;var f=0;if((e|0)>=4096)return Fa(b|0,d|0,e|0)|0;f=b|0;if((b&3)==(d&3)){while(b&3){if(!e)return f|0;a[b>>0]=a[d>>0]|0;b=b+1|0;d=d+1|0;e=e-1|0}while((e|0)>=4){c[b>>2]=c[d>>2];b=b+4|0;d=d+4|0;e=e-4|0}}while((e|0)>0){a[b>>0]=a[d>>0]|0;b=b+1|0;d=d+1|0;e=e-1|0}return f|0}function _y(b,c,d){b=b|0;c=c|0;d=d|0;var e=0;if((c|0)<(b|0)&(b|0)<(c+d|0)){e=b;c=c+d|0;b=b+d|0;while((d|0)>0){b=b-1|0;c=c-1|0;d=d-1|0;a[b>>0]=a[c>>0]|0}b=e}else Zy(b,c,d)|0;return b|0}function $y(a,b,c){a=a|0;b=b|0;c=c|0;if((c|0)<32){C=b>>c;return a>>>c|(b&(1<>c-32|0}function az(b){b=b|0;var c=0;c=a[m+(b&255)>>0]|0;if((c|0)<8)return c|0;c=a[m+(b>>8&255)>>0]|0;if((c|0)<8)return c+8|0;c=a[m+(b>>16&255)>>0]|0;if((c|0)<8)return c+16|0;return (a[m+(b>>>24)>>0]|0)+24|0}function bz(a,b){a=a|0;b=b|0;var c=0,d=0,e=0,f=0;f=a&65535;d=b&65535;c=_(d,f)|0;e=a>>>16;d=(c>>>16)+(_(d,e)|0)|0;b=b>>>16;a=_(b,f)|0;return (C=(d>>>16)+(_(b,e)|0)+(((d&65535)+a|0)>>>16)|0,d+a<<16|c&65535|0)|0}function cz(a,b,c,d){a=a|0;b=b|0;c=c|0;d=d|0;var e=0,f=0,g=0,h=0,i=0,j=0;j=b>>31|((b|0)<0?-1:0)<<1;i=((b|0)<0?-1:0)>>31|((b|0)<0?-1:0)<<1;f=d>>31|((d|0)<0?-1:0)<<1;e=((d|0)<0?-1:0)>>31|((d|0)<0?-1:0)<<1;h=Uy(j^a,i^b,j,i)|0;g=C;b=f^j;a=e^i;return Uy((hz(h,g,Uy(f^c,e^d,f,e)|0,C,0)|0)^b,C^a,b,a)|0}function dz(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0,h=0,j=0,k=0,l=0;f=i;i=i+16|0;j=f|0;h=b>>31|((b|0)<0?-1:0)<<1;g=((b|0)<0?-1:0)>>31|((b|0)<0?-1:0)<<1;l=e>>31|((e|0)<0?-1:0)<<1;k=((e|0)<0?-1:0)>>31|((e|0)<0?-1:0)<<1;b=Uy(h^a,g^b,h,g)|0;a=C;hz(b,a,Uy(l^d,k^e,l,k)|0,C,j)|0;a=Uy(c[j>>2]^h,c[j+4>>2]^g,h,g)|0;b=C;i=f;return (C=b,a)|0}function ez(a,b,c,d){a=a|0;b=b|0;c=c|0;d=d|0;var e=0,f=0;e=a;f=c;a=bz(e,f)|0;c=C;return (C=(_(b,f)|0)+(_(d,e)|0)+c|c&0,a|0|0)|0}function fz(a,b,c,d){a=a|0;b=b|0;c=c|0;d=d|0;return hz(a,b,c,d,0)|0}function gz(a,b,d,e){a=a|0;b=b|0;d=d|0;e=e|0;var f=0,g=0;g=i;i=i+16|0;f=g|0;hz(a,b,d,e,f)|0;i=g;return (C=c[f+4>>2]|0,c[f>>2]|0)|0}function hz(a,b,d,e,f){a=a|0;b=b|0;d=d|0;e=e|0;f=f|0;var g=0,h=0,i=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0;n=a;l=b;m=l;k=d;o=e;h=o;if(!m){g=(f|0)!=0;if(!h){if(g){c[f>>2]=(n>>>0)%(k>>>0);c[f+4>>2]=0}l=0;m=(n>>>0)/(k>>>0)>>>0;return (C=l,m)|0}else{if(!g){l=0;m=0;return (C=l,m)|0}c[f>>2]=a|0;c[f+4>>2]=b&0;l=0;m=0;return (C=l,m)|0}}j=(h|0)==0;do if(k){if(!j){i=(aa(h|0)|0)-(aa(m|0)|0)|0;if(i>>>0<=31){g=i+1|0;l=31-i|0;k=i-31>>31;h=g;j=n>>>(g>>>0)&k|m<>>(g>>>0)&k;g=0;i=n<>2]=a|0;c[f+4>>2]=l|b&0;l=0;m=0;return (C=l,m)|0}j=k-1|0;if(j&k){i=(aa(k|0)|0)+33-(aa(m|0)|0)|0;p=64-i|0;l=32-i|0;a=l>>31;b=i-32|0;k=b>>31;h=i;j=l-1>>31&m>>>(b>>>0)|(m<>>(i>>>0))&k;k=k&m>>>(i>>>0);g=n<>>(b>>>0))&a|n<>31;break}if(f){c[f>>2]=j&n;c[f+4>>2]=0}if((k|0)==1){l=l|b&0;m=a|0|0;return (C=l,m)|0}else{a=az(k|0)|0;l=m>>>(a>>>0)|0;m=m<<32-a|n>>>(a>>>0)|0;return (C=l,m)|0}}else{if(j){if(f){c[f>>2]=(m>>>0)%(k>>>0);c[f+4>>2]=0}l=0;m=(m>>>0)/(k>>>0)>>>0;return (C=l,m)|0}if(!n){if(f){c[f>>2]=0;c[f+4>>2]=(m>>>0)%(h>>>0)}l=0;m=(m>>>0)/(h>>>0)>>>0;return (C=l,m)|0}j=h-1|0;if(!(j&h)){if(f){c[f>>2]=a|0;c[f+4>>2]=j&m|b&0}l=0;m=m>>>((az(h|0)|0)>>>0);return (C=l,m)|0}i=(aa(h|0)|0)-(aa(m|0)|0)|0;if(i>>>0<=30){k=i+1|0;i=31-i|0;h=k;j=m<>>(k>>>0);k=m>>>(k>>>0);g=0;i=n<>2]=a|0;c[f+4>>2]=l|b&0;l=0;m=0;return (C=l,m)|0}while(0);if(!h){l=i;d=0;i=0}else{m=d|0|0;l=o|e&0;b=Yy(m|0,l|0,-1,-1)|0;a=C;d=i;i=0;do{p=d;d=g>>>31|d<<1;g=i|g<<1;p=j<<1|p>>>31|0;o=j>>>31|k<<1|0;Uy(b,a,p,o)|0;n=C;e=n>>31|((n|0)<0?-1:0)<<1;i=e&1;j=Uy(p,o,e&m,(((n|0)<0?-1:0)>>31|((n|0)<0?-1:0)<<1)&l)|0;k=C;h=h-1|0}while((h|0)!=0);l=d;d=0}h=0;if(f){c[f>>2]=j;c[f+4>>2]=k}l=(g|0)>>>31|(l|h)<<1|(h<<1|g>>>31)&0|d;m=(g<<1|0>>>31)&-2|i;return (C=l,m)|0}function iz(a,b){a=a|0;b=b|0;Ua[a&511](b|0)}function jz(a,b,c,d){a=a|0;b=b|0;c=c|0;d=d|0;return Va[a&63](b|0,c|0,d|0)|0}function kz(a,b,c,d,e,f,g,h,i,j,k){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;i=i|0;j=j|0;k=k|0;Wa[a&127](b|0,c|0,d|0,e|0,f|0,g|0,h|0,i|0,j|0,k|0)}function lz(a,b,c,d,e,f,g,h){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;Xa[a&127](b|0,c|0,d|0,e|0,f|0,g|0,h|0)}function mz(a,b,c,d,e,f){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;Ya[a&63](b|0,c|0,d|0,e|0,f|0)}function nz(a,b,c,d,e,f,g){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;g=g|0;return Za[a&3](b|0,c|0,d|0,e|0,f|0,g|0)|0}function oz(a,b,c,d,e,f,g,h,i,j,k){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;i=i|0;j=j|0;k=k|0;return _a[a&1](b|0,c|0,d|0,e|0,f|0,g|0,h|0,i|0,j|0,k|0)|0}function pz(a,b,c){a=a|0;b=b|0;c=c|0;$a[a&127](b|0,c|0)}function qz(a,b,c,d){a=a|0;b=b|0;c=+c;d=d|0;return +ab[a&0](b|0,+c,d|0)}function rz(a,b){a=a|0;b=b|0;return bb[a&7](b|0)|0}function sz(a,b,c,d,e,f,g,h,i,j){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;i=i|0;j=j|0;return cb[a&1](b|0,c|0,d|0,e|0,f|0,g|0,h|0,i|0,j|0)|0}function tz(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;i=i|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;o=o|0;return db[a&7](b|0,c|0,d|0,e|0,f|0,g|0,h|0,i|0,j|0,k|0,l|0,m|0,n|0,o|0)|0}function uz(a,b,c,d){a=a|0;b=b|0;c=c|0;d=d|0;eb[a&63](b|0,c|0,d|0)}function vz(a,b,c,d,e,f,g,h,i){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;i=i|0;fb[a&7](b|0,c|0,d|0,e|0,f|0,g|0,h|0,i|0)}function wz(a,b,c,d,e,f,g,h,i,j,k,l){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;i=i|0;j=j|0;k=k|0;l=l|0;return gb[a&7](b|0,c|0,d|0,e|0,f|0,g|0,h|0,i|0,j|0,k|0,l|0)|0}function xz(a,b,c,d,e,f,g,h,i,j){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;i=i|0;j=j|0;hb[a&127](b|0,c|0,d|0,e|0,f|0,g|0,h|0,i|0,j|0)}function yz(a,b,c,d,e){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;ib[a&0](b|0,c|0,d|0,e|0)}function zz(a,b,c){a=a|0;b=b|0;c=c|0;return jb[a&15](b|0,c|0)|0}function Az(a,b,c,d,e,f){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;return kb[a&0](b|0,c|0,d|0,e|0,f|0)|0}function Bz(a,b,c,d,e,f){a=a|0;b=b|0;c=c|0;d=+d;e=+e;f=f|0;lb[a&3](b|0,c|0,+d,+e,f|0)}function Cz(a){a=a|0;ba(0)}function Dz(a,b,c){a=a|0;b=b|0;c=c|0;ba(1);return 0}function Ez(a,b,c,d,e,f,g,h,i,j){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;i=i|0;j=j|0;ba(2)}function Fz(a,b,c,d,e,f,g){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;g=g|0;ba(3)}function Gz(a,b,c,d,e){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;ba(4)}function Hz(a,b,c,d,e,f){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;ba(5);return 0}function Iz(a,b,c,d,e,f,g,h,i,j){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;i=i|0;j=j|0;ba(6);return 0}function Jz(a,b){a=a|0;b=b|0;ba(7)}function Kz(a,b,c){a=a|0;b=+b;c=c|0;ba(8);return 0.0}function Lz(a){a=a|0;ba(9);return 0}function Mz(a,b,c,d,e,f,g,h,i){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;i=i|0;ba(10);return 0}function Nz(a,b,c,d,e,f,g,h,i,j,k,l,m,n){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;i=i|0;j=j|0;k=k|0;l=l|0;m=m|0;n=n|0;ba(11);return 0}function Oz(a,b,c){a=a|0;b=b|0;c=c|0;ba(12)}function Pz(a,b,c,d,e,f,g,h){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;ba(13)}function Qz(a,b,c,d,e,f,g,h,i,j,k){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;i=i|0;j=j|0;k=k|0;ba(14);return 0}function Rz(a,b,c,d,e,f,g,h,i){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;f=f|0;g=g|0;h=h|0;i=i|0;ba(15)}function Sz(a,b,c,d){a=a|0;b=b|0;c=c|0;d=d|0;ba(16)}function Tz(a,b){a=a|0;b=b|0;ba(17);return 0}function Uz(a,b,c,d,e){a=a|0;b=b|0;c=c|0;d=d|0;e=e|0;ba(18);return 0}function Vz(a,b,c,d,e){a=a|0;b=b|0;c=+c;d=+d;e=e|0;ba(19)} + +// EMSCRIPTEN_END_FUNCS +var Ua=[Cz,td,vd,bf,mf,Vg,Lg,Gh,Rh,ef,Cg,vh,Ve,ih,Vf,eg,yf,Kf,Uf,bg,ng,Bg,qc,Sg,$g,sh,uh,Dh,Nh,Yh,vi,zi,Bi,Di,Hi,Ji,Li,Ni,di,fi,hi,ji,li,ni,pi,xi,Fi,ri,ti,lj,pj,rj,tj,xj,zj,Bj,Dj,$i,bj,dj,fj,nj,vj,hj,jj,Pj,Vj,Hj,Nj,Tj,Rj,Fj,Jj,Lj,Pi,Ti,Zi,Ri,Vi,Xi,dk,nk,Qm,vo,Pp,Fp,ln,fk,Wl,ao,Qk,Uo,_k,hl,vp,en,lo,Xj,Rn,ep,sm,Bk,Pk,Xk,fl,pl,Hl,Ol,Vl,pm,Am,Pm,Wm,yn,An,Nn,Pn,Zn,io,uo,bp,pp,Cp,Mp,Xp,dt,ht,jt,lt,pt,rt,tt,vt,Ls,Ns,Rs,Ts,Vs,Xs,Zs,ft,nt,Ps,$s,bt,Nr,Rr,Tr,Vr,Zr,$r,bs,ds,Br,Dr,Fr,Hr,Pr,Xr,Jr,Lr,vr,zr,nr,tr,xr,pr,rr,rs,vs,xs,zs,Ds,Fs,Hs,Js,fs,hs,js,ls,ts,Bs,ns,ps,Dq,Hq,Jq,Lq,vq,xq,zq,Fq,Bq,rq,tq,lq,pq,nq,dr,hr,jr,lr,Xq,Zq,$q,fr,br,Tq,Vq,Nq,Rq,Pq,pw,tw,vw,xw,Bw,Dw,Fw,Hw,Xv,Zv,bw,dw,fw,hw,jw,rw,zw,$v,lw,nw,Xt,$t,bu,du,hu,ju,lu,nu,Lt,Nt,Pt,Rt,Zt,fu,Tt,Vt,Ft,Jt,xt,Dt,Ht,zt,Bt,Dv,Hv,Jv,Lv,Pv,Rv,Tv,Vv,rv,tv,vv,xv,Fv,Nv,zv,Bv,Hu,Lu,Nu,Pu,zu,Bu,Du,Ju,Fu,vu,xu,pu,tu,ru,hv,lv,nv,pv,$u,bv,dv,jv,fv,Xu,Zu,Ru,Vu,Tu,Jw,Lw,Ow,Lx,Xw,fx,Bx,rx,Vw,dx,px,zx,Jx,Sx,Iy,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz,Cz];var Va=[Dz,Xe,gf,tf,sg,Eg,Ng,Xg,kh,xh,Ih,Th,Zj,hk,Ik,Sk,al,jl,vl,Kl,Ql,Yl,Km,Sm,gn,nn,Tn,co,no,Qo,Wo,gp,xp,Hp,Rp,jq,fq,bq,Qw,Zw,hx,tx,Dx,Nx,vy,Ay,uy,Dz,Dz,Dz,Dz,Dz,Dz,Dz,Dz,Dz,Dz,Dz,Dz,Dz,Dz,Dz,Dz,Dz];var Wa=[Ez,gs,is,ks,ms,os,qs,ss,us,ws,ys,As,Cs,Es,Gs,Is,Ks,Ms,Os,Qs,Ss,Us,Ws,Ys,_s,at,ct,et,gt,it,kt,mt,ot,qt,st,ut,wt,sv,uv,wv,yv,Av,Cv,Ev,Gv,Iv,Kv,Mv,Ov,Qv,Sv,Uv,Wv,Yv,_v,aw,cw,ew,gw,iw,kw,mw,ow,qw,sw,uw,ww,yw,Aw,Cw,Ew,Gw,Iw,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez,Ez];var Xa=[Fz,aj,cj,ej,gj,ij,kj,mj,oj,qj,sj,uj,wj,yj,Aj,Cj,Ej,Gj,Ij,Kj,Mj,Oj,Qj,Sj,Uj,Wj,or,qr,sr,ur,wr,yr,Ar,Cr,Er,Gr,Ir,Kr,Mr,Or,Qr,Sr,Ur,Wr,Yr,_r,as,cs,es,yt,At,Ct,Et,Gt,It,Kt,Mt,Ot,Qt,St,Ut,Wt,Yt,_t,au,cu,eu,gu,iu,ku,mu,ou,Kw,Mw,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz,Fz];var Ya=[Gz,ch,bh,Ob,Pb,xe,ze,_e,jf,uf,vf,ug,xg,wg,Gg,Pg,mh,zh,Kh,Vh,$j,ak,Jk,Kk,Lk,Mk,Uk,Il,El,Tl,Sl,jn,Wn,Vn,qo,po,ro,ue,ve,we,jp,kp,zp,Gz,Gz,Gz,Gz,Gz,Gz,Gz,Gz,Gz,Gz,Gz,Gz,Gz,Gz,Gz,Gz,Gz,Gz,Gz,Gz,Gz];var Za=[Hz,iq,eq,aq];var _a=[Iz,ci];var $a=[Jz,Ic,Kc,Lc,sd,ud,Qh,$e,af,kf,lf,wf,xf,If,Jf,Sf,Tf,$f,ag,lg,mg,Ab,Ag,Jg,Kg,Qg,Rg,Zg,_g,nh,rh,th,Bh,Ch,Lh,Mh,Wh,Xh,tp,bk,ck,up,lk,mk,zk,Ak,Nk,Ok,Vk,Wk,dl,el,nl,ol,Gl,Nl,Ul,am,bm,nm,om,ym,zm,Nm,Om,Um,Vm,kn,qn,xn,zn,Mn,On,Xn,Yn,go,ho,so,to,To,$o,ap,np,op,Ap,Bp,Kp,Lp,Vp,Wp,Tw,Uw,bx,cx,nx,ox,xx,yx,Hx,Ix,Qx,Rx,lm,km,xm,wm,Jz,Jz,Jz,Jz,Jz,Jz,Jz,Jz,Jz,Jz,Jz,Jz,Jz,Jz,Jz,Jz,Jz,Jz,Jz,Jz,Jz,Jz];var ab=[Kz];var bb=[Lz,eh,dh,$m,_m,xy,Lz,Lz];var cb=[Mz,kq];var db=[Nz,Bf,Pf,Xf,gg,Nz,Nz,Nz];var eb=[Oz,Zm,Ym,wo,yo,Ao,Co,Eo,Go,Ho,Jo,Lo,hq,dq,$p,Ee,Ge,He,Ie,Je,Df,Ff,Ef,Rf,_f,Zf,ig,kk,jk,uk,wk,vk,cl,ml,zl,yl,Bl,Al,Ml,$l,_l,Lm,Mm,pn,fo,Zo,Yo,_o,Jp,Sw,ax,$w,jx,kx,lx,mx,wx,vx,Gx,Fx,Px,Oz,Oz,Oz];var fb=[Pz,Qi,Si,Ui,Wi,Yi,_i,Pz];var gb=[Qz,bi,sk,im,um,Qz,Qz,Qz];var hb=[Rz,ei,gi,ii,ki,mi,oi,qi,si,ui,wi,yi,Ai,Ci,Ei,Gi,Ii,Ki,Mi,Oi,Kb,Lb,Mb,Nb,mq,oq,qq,sq,uq,wq,yq,Aq,Cq,Eq,Gq,Iq,Kq,Mq,Oq,Qq,Sq,Uq,Wq,Yq,_q,ar,cr,er,gr,ir,kr,mr,qu,su,uu,wu,yu,Au,Cu,Eu,Gu,Iu,Ku,Mu,Ou,Qu,Su,Uu,Wu,Yu,_u,av,cv,ev,gv,iv,kv,mv,ov,qv,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz,Rz];var ib=[Sz];var jb=[Tz,Jc,Mc,xo,zo,Bo,Do,Fo,Io,Ko,ce,Vd,Tz,Tz,Tz,Tz];var kb=[Uz];var lb=[Vz,Fe,Ke,Vz];return{_i64Subtract:Uy,_free:Sy,_i64Add:Yy,_memmove:_y,_fftwf_plan_dft_r2c_1d:hy,_memset:Vy,_malloc:Ry,_memcpy:Zy,_fftwf_destroy_plan:Vx,_bitshift64Lshr:Wy,_fftwf_plan_dft_c2r_1d:fy,_fftwf_execute:_x,_bitshift64Shl:Xy,runPostSets:Ty,stackAlloc:mb,stackSave:nb,stackRestore:ob,establishStackSpace:pb,setThrew:qb,setTempRet0:tb,getTempRet0:ub,dynCall_vi:iz,dynCall_iiii:jz,dynCall_viiiiiiiiii:kz,dynCall_viiiiiii:lz,dynCall_viiiii:mz,dynCall_iiiiiii:nz,dynCall_iiiiiiiiiii:oz,dynCall_vii:pz,dynCall_didi:qz,dynCall_ii:rz,dynCall_iiiiiiiiii:sz,dynCall_iiiiiiiiiiiiiii:tz,dynCall_viii:uz,dynCall_viiiiiiii:vz,dynCall_iiiiiiiiiiii:wz,dynCall_viiiiiiiii:xz,dynCall_viiii:yz,dynCall_iii:zz,dynCall_iiiiii:Az,dynCall_viiddi:Bz}}) + + +// EMSCRIPTEN_END_ASM +(Module.asmGlobalArg,Module.asmLibraryArg,buffer);var _i64Subtract=Module["_i64Subtract"]=asm["_i64Subtract"];var _free=Module["_free"]=asm["_free"];var runPostSets=Module["runPostSets"]=asm["runPostSets"];var _i64Add=Module["_i64Add"]=asm["_i64Add"];var _memmove=Module["_memmove"]=asm["_memmove"];var _fftwf_plan_dft_r2c_1d=Module["_fftwf_plan_dft_r2c_1d"]=asm["_fftwf_plan_dft_r2c_1d"];var _memset=Module["_memset"]=asm["_memset"];var _malloc=Module["_malloc"]=asm["_malloc"];var _memcpy=Module["_memcpy"]=asm["_memcpy"];var _fftwf_destroy_plan=Module["_fftwf_destroy_plan"]=asm["_fftwf_destroy_plan"];var _bitshift64Lshr=Module["_bitshift64Lshr"]=asm["_bitshift64Lshr"];var _fftwf_plan_dft_c2r_1d=Module["_fftwf_plan_dft_c2r_1d"]=asm["_fftwf_plan_dft_c2r_1d"];var _fftwf_execute=Module["_fftwf_execute"]=asm["_fftwf_execute"];var _bitshift64Shl=Module["_bitshift64Shl"]=asm["_bitshift64Shl"];var dynCall_vi=Module["dynCall_vi"]=asm["dynCall_vi"];var dynCall_iiii=Module["dynCall_iiii"]=asm["dynCall_iiii"];var dynCall_viiiiiiiiii=Module["dynCall_viiiiiiiiii"]=asm["dynCall_viiiiiiiiii"];var dynCall_viiiiiii=Module["dynCall_viiiiiii"]=asm["dynCall_viiiiiii"];var dynCall_viiiii=Module["dynCall_viiiii"]=asm["dynCall_viiiii"];var dynCall_iiiiiii=Module["dynCall_iiiiiii"]=asm["dynCall_iiiiiii"];var dynCall_iiiiiiiiiii=Module["dynCall_iiiiiiiiiii"]=asm["dynCall_iiiiiiiiiii"];var dynCall_vii=Module["dynCall_vii"]=asm["dynCall_vii"];var dynCall_didi=Module["dynCall_didi"]=asm["dynCall_didi"];var dynCall_ii=Module["dynCall_ii"]=asm["dynCall_ii"];var dynCall_iiiiiiiiii=Module["dynCall_iiiiiiiiii"]=asm["dynCall_iiiiiiiiii"];var dynCall_iiiiiiiiiiiiiii=Module["dynCall_iiiiiiiiiiiiiii"]=asm["dynCall_iiiiiiiiiiiiiii"];var dynCall_viii=Module["dynCall_viii"]=asm["dynCall_viii"];var dynCall_viiiiiiii=Module["dynCall_viiiiiiii"]=asm["dynCall_viiiiiiii"];var dynCall_iiiiiiiiiiii=Module["dynCall_iiiiiiiiiiii"]=asm["dynCall_iiiiiiiiiiii"];var dynCall_viiiiiiiii=Module["dynCall_viiiiiiiii"]=asm["dynCall_viiiiiiiii"];var dynCall_viiii=Module["dynCall_viiii"]=asm["dynCall_viiii"];var dynCall_iii=Module["dynCall_iii"]=asm["dynCall_iii"];var dynCall_iiiiii=Module["dynCall_iiiiii"]=asm["dynCall_iiiiii"];var dynCall_viiddi=Module["dynCall_viiddi"]=asm["dynCall_viiddi"];Runtime.stackAlloc=asm["stackAlloc"];Runtime.stackSave=asm["stackSave"];Runtime.stackRestore=asm["stackRestore"];Runtime.establishStackSpace=asm["establishStackSpace"];Runtime.setTempRet0=asm["setTempRet0"];Runtime.getTempRet0=asm["getTempRet0"];function ExitStatus(status){this.name="ExitStatus";this.message="Program terminated with exit("+status+")";this.status=status}ExitStatus.prototype=new Error;ExitStatus.prototype.constructor=ExitStatus;var initialStackTop;var preloadStartTime=null;var calledMain=false;dependenciesFulfilled=function runCaller(){if(!Module["calledRun"])run();if(!Module["calledRun"])dependenciesFulfilled=runCaller};Module["callMain"]=Module.callMain=function callMain(args){assert(runDependencies==0,"cannot call main when async dependencies remain! (listen on __ATMAIN__)");assert(__ATPRERUN__.length==0,"cannot call main when preRun functions remain to be called");args=args||[];ensureInitRuntime();var argc=args.length+1;function pad(){for(var i=0;i<4-1;i++){argv.push(0)}}var argv=[allocate(intArrayFromString(Module["thisProgram"]),"i8",ALLOC_NORMAL)];pad();for(var i=0;i0){return}preRun();if(runDependencies>0)return;if(Module["calledRun"])return;function doRun(){if(Module["calledRun"])return;Module["calledRun"]=true;if(ABORT)return;ensureInitRuntime();preMain();if(Module["onRuntimeInitialized"])Module["onRuntimeInitialized"]();if(Module["_main"]&&shouldRunNow)Module["callMain"](args);postRun()}if(Module["setStatus"]){Module["setStatus"]("Running...");setTimeout((function(){setTimeout((function(){Module["setStatus"]("")}),1);doRun()}),1)}else{doRun()}}Module["run"]=Module.run=run;function exit(status,implicit){if(implicit&&Module["noExitRuntime"]){return}if(Module["noExitRuntime"]){}else{ABORT=true;EXITSTATUS=status;STACKTOP=initialStackTop;exitRuntime();if(Module["onExit"])Module["onExit"](status)}if(ENVIRONMENT_IS_NODE){process["stdout"]["once"]("drain",(function(){process["exit"](status)}));console.log(" ");setTimeout((function(){process["exit"](status)}),500)}else if(ENVIRONMENT_IS_SHELL&&typeof quit==="function"){quit(status)}throw new ExitStatus(status)}Module["exit"]=Module.exit=exit;var abortDecorators=[];function abort(what){if(what!==undefined){Module.print(what);Module.printErr(what);what=JSON.stringify(what)}else{what=""}ABORT=true;EXITSTATUS=1;var extra="\nIf this abort() is unexpected, build with -s ASSERTIONS=1 which can give more information.";var output="abort("+what+") at "+stackTrace()+extra;if(abortDecorators){abortDecorators.forEach((function(decorator){output=decorator(output,what)}))}throw output}Module["abort"]=Module.abort=abort;if(Module["preInit"]){if(typeof Module["preInit"]=="function")Module["preInit"]=[Module["preInit"]];while(Module["preInit"].length>0){Module["preInit"].pop()()}}var shouldRunNow=true;if(Module["noInitialRun"]){shouldRunNow=false}run() + + + + + + return Module; +}; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/Makefile.emscripten --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/Makefile.emscripten Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,15 @@ + +FFTW.js: + emcc -O3 \ + --memory-init-file 0 \ + -s NO_FILESYSTEM=1 \ + -s NO_BROWSER=1 \ + -s MODULARIZE=1 \ + -s EXPORT_NAME="'FFTWModule'" \ + -s EXPORTED_FUNCTIONS="['_fftwf_plan_dft_r2c_1d','_fftwf_plan_dft_c2r_1d','_fftwf_destroy_plan','_fftwf_execute']" \ + -o FFTW.js \ + fftw-3.3.4/.libs/libfftw3f.a + +clean: + rm -f FFTW.js + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/build.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/build.sh Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,11 @@ +#!/bin/bash + +set -e + +( + cd fftw-3.3.4 + emconfigure ./configure --disable-fortran --enable-single && emmake make +) + +make -f Makefile.emscripten + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/AUTHORS --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/AUTHORS Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,14 @@ +Authors of FFTW (reachable at fftw@fftw.org): + +Matteo Frigo +Steven G. Johnson + +Stefan Kral wrote genfft-k7/*.ml*, which was +added in fftw-3.0 and removed in fftw-3.2. + +Support for the Cell Broadband Engine was graciously donated by the +IBM Austin Research Lab, which was added in fftw-3.2 and removed in +fftw-3.3. + +Support for MIPS64 paired-single SIMD instructions was graciously +donated by CodeSourcery, Inc. diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/CONVENTIONS --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/CONVENTIONS Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,65 @@ +Code conventions used internally by fftw3 (not in API): + +LEARN FROM THE MASTERS: read Ken Thompson's C compiler in Plan 9. + Avoid learning from C++/Java programs. + +INDENTATION: K&R, 5 spaces/tab. In case of doubt, indent -kr -i5. + +NAMES: keep them short. Shorter than you think. The Bible was written + without vowels. Don't outsmart the Bible. + + Common names: + + R : real type, aka fftw_real + E : real type for local variables (possibly extra precision) + C : complex type + sz : size + vecsz : vector size + is, os : input/output stride + ri, ii : real/imag input (complex data) + ro, io : real/imag output (complex data) + I, O : real input/output (real data) + A : assert + CK : check + S : solver, defined internally to each solver file + P : plan, defined internally to each solver file + k : codelet + X(...) : used for mangling of external names (see below) + K(...) : floating-point constant, in E precision + + If a name is used often and must have the form fftw_foo to avoid + namespace pollution, #define FOO fftw_foo and use the short name. + + Leave that hungarian crap to MS. foo_t counts as hungarian: use + foo instead. foo is lowercase so that it does not look like a DOS + program. Exception: typedef struct foo_s {...} foo; instead of + typedef struct foo {...} foo; for C++ compatibility. + +NAME MANGLING: use X(foo) for external names instead of fftw_foo. + X(foo) expands to fftwf_foo or fftw_foo, depending on the + precision. (Unfortunately, this is a ugly form of hungarian + notation. Grrr...) Names that are not exported do not need to be + mangled. + +REPEATED CODE: favor a table. E.g., do not write + + foo("xxx", 1); + foo("yyy", 2); + foo("zzz", -1); + + Instead write + + struct { const char *nam, int arg } footab[] = { + { "xxx", 1 }, + { "yyy", 2 }, + { "zzz", -1 } + }; + + and loop over footab. Rationale: it saves code space. + Similarly, replace a switch statement with a table whenever + possible. + +C++: The code should compile as a C++ program. Run the code through + gcc -xc++ . The extra C++ restrictions are unnecessary, of + course, but this will save us from a flood of complaints when + we release the code. diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/COPYING --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/COPYING Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/COPYRIGHT --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/COPYRIGHT Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/INSTALL --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/INSTALL Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,370 @@ +Installation Instructions +************************* + +Copyright (C) 1994-1996, 1999-2002, 2004-2013 Free Software Foundation, +Inc. + + Copying and distribution of this file, with or without modification, +are permitted in any medium without royalty provided the copyright +notice and this notice are preserved. This file is offered as-is, +without warranty of any kind. + +Basic Installation +================== + + Briefly, the shell commands `./configure; make; make install' should +configure, build, and install this package. The following +more-detailed instructions are generic; see the `README' file for +instructions specific to this package. Some packages provide this +`INSTALL' file but do not implement all of the features documented +below. The lack of an optional feature in a given package is not +necessarily a bug. More recommendations for GNU packages can be found +in *note Makefile Conventions: (standards)Makefile Conventions. + + The `configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a `Makefile' in each directory of the package. +It may also create one or more `.h' files containing system-dependent +definitions. Finally, it creates a shell script `config.status' that +you can run in the future to recreate the current configuration, and a +file `config.log' containing compiler output (useful mainly for +debugging `configure'). + + It can also use an optional file (typically called `config.cache' +and enabled with `--cache-file=config.cache' or simply `-C') that saves +the results of its tests to speed up reconfiguring. Caching is +disabled by default to prevent problems with accidental use of stale +cache files. + + If you need to do unusual things to compile the package, please try +to figure out how `configure' could check whether to do them, and mail +diffs or instructions to the address given in the `README' so they can +be considered for the next release. If you are using the cache, and at +some point `config.cache' contains results you don't want to keep, you +may remove or edit it. + + The file `configure.ac' (or `configure.in') is used to create +`configure' by a program called `autoconf'. You need `configure.ac' if +you want to change it or regenerate `configure' using a newer version +of `autoconf'. + + The simplest way to compile this package is: + + 1. `cd' to the directory containing the package's source code and type + `./configure' to configure the package for your system. + + Running `configure' might take a while. While running, it prints + some messages telling which features it is checking for. + + 2. Type `make' to compile the package. + + 3. Optionally, type `make check' to run any self-tests that come with + the package, generally using the just-built uninstalled binaries. + + 4. Type `make install' to install the programs and any data files and + documentation. When installing into a prefix owned by root, it is + recommended that the package be configured and built as a regular + user, and only the `make install' phase executed with root + privileges. + + 5. Optionally, type `make installcheck' to repeat any self-tests, but + this time using the binaries in their final installed location. + This target does not install anything. Running this target as a + regular user, particularly if the prior `make install' required + root privileges, verifies that the installation completed + correctly. + + 6. You can remove the program binaries and object files from the + source code directory by typing `make clean'. To also remove the + files that `configure' created (so you can compile the package for + a different kind of computer), type `make distclean'. There is + also a `make maintainer-clean' target, but that is intended mainly + for the package's developers. If you use it, you may have to get + all sorts of other programs in order to regenerate files that came + with the distribution. + + 7. Often, you can also type `make uninstall' to remove the installed + files again. In practice, not all packages have tested that + uninstallation works correctly, even though it is required by the + GNU Coding Standards. + + 8. Some packages, particularly those that use Automake, provide `make + distcheck', which can by used by developers to test that all other + targets like `make install' and `make uninstall' work correctly. + This target is generally not run by end users. + +Compilers and Options +===================== + + Some systems require unusual options for compilation or linking that +the `configure' script does not know about. Run `./configure --help' +for details on some of the pertinent environment variables. + + You can give `configure' initial values for configuration parameters +by setting variables in the command line or in the environment. Here +is an example: + + ./configure CC=c99 CFLAGS=-g LIBS=-lposix + + *Note Defining Variables::, for more details. + +Compiling For Multiple Architectures +==================================== + + You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you can use GNU `make'. `cd' to the +directory where you want the object files and executables to go and run +the `configure' script. `configure' automatically checks for the +source code in the directory that `configure' is in and in `..'. This +is known as a "VPATH" build. + + With a non-GNU `make', it is safer to compile the package for one +architecture at a time in the source code directory. After you have +installed the package for one architecture, use `make distclean' before +reconfiguring for another architecture. + + On MacOS X 10.5 and later systems, you can create libraries and +executables that work on multiple system types--known as "fat" or +"universal" binaries--by specifying multiple `-arch' options to the +compiler but only a single `-arch' option to the preprocessor. Like +this: + + ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CPP="gcc -E" CXXCPP="g++ -E" + + This is not guaranteed to produce working output in all cases, you +may have to build one architecture at a time and combine the results +using the `lipo' tool if you have problems. + +Installation Names +================== + + By default, `make install' installs the package's commands under +`/usr/local/bin', include files under `/usr/local/include', etc. You +can specify an installation prefix other than `/usr/local' by giving +`configure' the option `--prefix=PREFIX', where PREFIX must be an +absolute file name. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +pass the option `--exec-prefix=PREFIX' to `configure', the package uses +PREFIX as the prefix for installing programs and libraries. +Documentation and other data files still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like `--bindir=DIR' to specify different values for particular +kinds of files. Run `configure --help' for a list of the directories +you can set and what kinds of files go in them. In general, the +default for these options is expressed in terms of `${prefix}', so that +specifying just `--prefix' will affect all of the other directory +specifications that were not explicitly provided. + + The most portable way to affect installation locations is to pass the +correct locations to `configure'; however, many packages provide one or +both of the following shortcuts of passing variable assignments to the +`make install' command line to change installation locations without +having to reconfigure or recompile. + + The first method involves providing an override variable for each +affected directory. For example, `make install +prefix=/alternate/directory' will choose an alternate location for all +directory configuration variables that were expressed in terms of +`${prefix}'. Any directories that were specified during `configure', +but not in terms of `${prefix}', must each be overridden at install +time for the entire installation to be relocated. The approach of +makefile variable overrides for each directory variable is required by +the GNU Coding Standards, and ideally causes no recompilation. +However, some platforms have known limitations with the semantics of +shared libraries that end up requiring recompilation when using this +method, particularly noticeable in packages that use GNU Libtool. + + The second method involves providing the `DESTDIR' variable. For +example, `make install DESTDIR=/alternate/directory' will prepend +`/alternate/directory' before all installation names. The approach of +`DESTDIR' overrides is not required by the GNU Coding Standards, and +does not work on platforms that have drive letters. On the other hand, +it does better at avoiding recompilation issues, and works well even +when some directory options were not specified in terms of `${prefix}' +at `configure' time. + +Optional Features +================= + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving `configure' the +option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. + + Some packages pay attention to `--enable-FEATURE' options to +`configure', where FEATURE indicates an optional part of the package. +They may also pay attention to `--with-PACKAGE' options, where PACKAGE +is something like `gnu-as' or `x' (for the X Window System). The +`README' should mention any `--enable-' and `--with-' options that the +package recognizes. + + For packages that use the X Window System, `configure' can usually +find the X include and library files automatically, but if it doesn't, +you can use the `configure' options `--x-includes=DIR' and +`--x-libraries=DIR' to specify their locations. + + Some packages offer the ability to configure how verbose the +execution of `make' will be. For these packages, running `./configure +--enable-silent-rules' sets the default to minimal output, which can be +overridden with `make V=1'; while running `./configure +--disable-silent-rules' sets the default to verbose, which can be +overridden with `make V=0'. + +Particular systems +================== + + On HP-UX, the default C compiler is not ANSI C compatible. If GNU +CC is not installed, it is recommended to use the following options in +order to use an ANSI C compiler: + + ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" + +and if that doesn't work, install pre-built binaries of GCC for HP-UX. + + HP-UX `make' updates targets which have the same time stamps as +their prerequisites, which makes it generally unusable when shipped +generated files such as `configure' are involved. Use GNU `make' +instead. + + On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot +parse its `' header file. The option `-nodtk' can be used as +a workaround. If GNU CC is not installed, it is therefore recommended +to try + + ./configure CC="cc" + +and if that doesn't work, try + + ./configure CC="cc -nodtk" + + On Solaris, don't put `/usr/ucb' early in your `PATH'. This +directory contains several dysfunctional programs; working variants of +these programs are available in `/usr/bin'. So, if you need `/usr/ucb' +in your `PATH', put it _after_ `/usr/bin'. + + On Haiku, software installed for all users goes in `/boot/common', +not `/usr/local'. It is recommended to use the following options: + + ./configure --prefix=/boot/common + +Specifying the System Type +========================== + + There may be some features `configure' cannot figure out +automatically, but needs to determine by the type of machine the package +will run on. Usually, assuming the package is built to be run on the +_same_ architectures, `configure' can figure that out, but if it prints +a message saying it cannot guess the machine type, give it the +`--build=TYPE' option. TYPE can either be a short name for the system +type, such as `sun4', or a canonical name which has the form: + + CPU-COMPANY-SYSTEM + +where SYSTEM can have one of these forms: + + OS + KERNEL-OS + + See the file `config.sub' for the possible values of each field. If +`config.sub' isn't included in this package, then this package doesn't +need to know the machine type. + + If you are _building_ compiler tools for cross-compiling, you should +use the option `--target=TYPE' to select the type of system they will +produce code for. + + If you want to _use_ a cross compiler, that generates code for a +platform different from the build platform, you should specify the +"host" platform (i.e., that on which the generated programs will +eventually be run) with `--host=TYPE'. + +Sharing Defaults +================ + + If you want to set default values for `configure' scripts to share, +you can create a site shell script called `config.site' that gives +default values for variables like `CC', `cache_file', and `prefix'. +`configure' looks for `PREFIX/share/config.site' if it exists, then +`PREFIX/etc/config.site' if it exists. Or, you can set the +`CONFIG_SITE' environment variable to the location of the site script. +A warning: not all `configure' scripts look for a site script. + +Defining Variables +================== + + Variables not defined in a site shell script can be set in the +environment passed to `configure'. However, some packages may run +configure again during the build, and the customized values of these +variables may be lost. In order to avoid this problem, you should set +them in the `configure' command line, using `VAR=value'. For example: + + ./configure CC=/usr/local2/bin/gcc + +causes the specified `gcc' to be used as the C compiler (unless it is +overridden in the site shell script). + +Unfortunately, this technique does not work for `CONFIG_SHELL' due to +an Autoconf limitation. Until the limitation is lifted, you can use +this workaround: + + CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash + +`configure' Invocation +====================== + + `configure' recognizes the following options to control how it +operates. + +`--help' +`-h' + Print a summary of all of the options to `configure', and exit. + +`--help=short' +`--help=recursive' + Print a summary of the options unique to this package's + `configure', and exit. The `short' variant lists options used + only in the top level, while the `recursive' variant lists options + also present in any nested packages. + +`--version' +`-V' + Print the version of Autoconf used to generate the `configure' + script, and exit. + +`--cache-file=FILE' + Enable the cache: use and save the results of the tests in FILE, + traditionally `config.cache'. FILE defaults to `/dev/null' to + disable caching. + +`--config-cache' +`-C' + Alias for `--cache-file=config.cache'. + +`--quiet' +`--silent' +`-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to `/dev/null' (any error + messages will still be shown). + +`--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + `configure' can determine that directory automatically. + +`--prefix=DIR' + Use DIR as the installation prefix. *note Installation Names:: + for more details, including other options available for fine-tuning + the installation locations. + +`--no-create' +`-n' + Run the configure checks, but stop before creating any output + files. + +`configure' also accepts some other, not widely useful, options. Run +`configure --help' for more details. diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,108 @@ +OPTIONS_AUTOMAKE=gnu +lib_LTLIBRARIES = libfftw3@PREC_SUFFIX@.la + +# pkgincludedir = $(includedir)/fftw3@PREC_SUFFIX@ +# nodist_pkginclude_HEADERS = config.h + +# recompile genfft if maintainer mode is true +if MAINTAINER_MODE +GENFFT = genfft +else +GENFFT = +endif + +ACLOCAL_AMFLAGS=-I m4 + +# when using combined thread libraries (necessary on Windows), we want +# to build threads/ first, because libfftw3_threads is added to +# libfftw3. +# +# Otherwise, we want to build libfftw3_threads after libfftw3 +# so that we can track the fact that libfftw3_threads depends upon +# libfftw3. +# +# This is the inescapable result of combining three bad ideas +# (threads, Windows, and shared libraries). +# +if COMBINED_THREADS +CHICKEN_EGG=threads . +else +CHICKEN_EGG=. threads +endif + +SUBDIRS=support $(GENFFT) kernel simd-support dft rdft reodft api \ +libbench2 $(CHICKEN_EGG) tests mpi doc tools m4 +EXTRA_DIST=COPYRIGHT bootstrap.sh CONVENTIONS fftw.pc.in + +SIMD_LIBS = \ + simd-support/libsimd_support.la \ + simd-support/libsimd_sse2_nonportable.la + +if HAVE_SSE2 +SSE2_LIBS = dft/simd/sse2/libdft_sse2_codelets.la \ +rdft/simd/sse2/librdft_sse2_codelets.la +endif + +if HAVE_AVX +AVX_LIBS = dft/simd/avx/libdft_avx_codelets.la \ +rdft/simd/avx/librdft_avx_codelets.la +endif + +if HAVE_ALTIVEC +ALTIVEC_LIBS = dft/simd/altivec/libdft_altivec_codelets.la \ +rdft/simd/altivec/librdft_altivec_codelets.la +endif + +if HAVE_NEON +NEON_LIBS = dft/simd/neon/libdft_neon_codelets.la \ +rdft/simd/neon/librdft_neon_codelets.la +endif + +if THREADS +if COMBINED_THREADS +COMBINED_THREADLIBS=threads/libfftw3@PREC_SUFFIX@_threads.la +endif +endif + +libfftw3@PREC_SUFFIX@_la_SOURCES = + +libfftw3@PREC_SUFFIX@_la_LIBADD = \ + kernel/libkernel.la \ + dft/libdft.la \ + dft/scalar/libdft_scalar.la \ + dft/scalar/codelets/libdft_scalar_codelets.la \ + rdft/librdft.la \ + rdft/scalar/librdft_scalar.la \ + rdft/scalar/r2cf/librdft_scalar_r2cf.la \ + rdft/scalar/r2cb/librdft_scalar_r2cb.la \ + rdft/scalar/r2r/librdft_scalar_r2r.la \ + reodft/libreodft.la \ + api/libapi.la \ + $(SIMD_LIBS) $(SSE2_LIBS) $(AVX_LIBS) $(ALTIVEC_LIBS) $(NEON_LIBS) \ + $(COMBINED_THREADLIBS) + +if QUAD +# cannot use -no-undefined since dependent on libquadmath +libfftw3@PREC_SUFFIX@_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ +else +libfftw3@PREC_SUFFIX@_la_LDFLAGS = -no-undefined -version-info \ +@SHARED_VERSION_INFO@ +endif + +fftw3@PREC_SUFFIX@.pc: fftw.pc + cp -f fftw.pc fftw3@PREC_SUFFIX@.pc +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = fftw3@PREC_SUFFIX@.pc + +WISDOM_DIR = /etc/fftw +WISDOM = wisdom@PREC_SUFFIX@ + +WISDOM_TIME=12 # default to 12-hour limit, i.e. overnight +WISDOM_FLAGS=--verbose --canonical --time-limit=$(WISDOM_TIME) + +wisdom: + tools/fftw@PREC_SUFFIX@-wisdom -o $@ $(WISDOM_FLAGS) + +install-wisdom: wisdom + $(mkinstalldirs) $(WISDOM_DIR) + $(INSTALL_DATA) wisdom $(WISDOM_DIR)/$(WISDOM) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1062 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = . +DIST_COMMON = INSTALL NEWS README AUTHORS ChangeLog \ + $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/configure $(am__configure_deps) \ + $(srcdir)/config.h.in $(srcdir)/fftw.pc.in COPYING TODO \ + compile config.guess config.sub depcomp install-sh missing \ + ltmain.sh +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ + configure.lineno config.status.lineno +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = config.h +CONFIG_CLEAN_FILES = fftw.pc +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgconfigdir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libfftw3@PREC_SUFFIX@_la_DEPENDENCIES = kernel/libkernel.la \ + dft/libdft.la dft/scalar/libdft_scalar.la \ + dft/scalar/codelets/libdft_scalar_codelets.la rdft/librdft.la \ + rdft/scalar/librdft_scalar.la \ + rdft/scalar/r2cf/librdft_scalar_r2cf.la \ + rdft/scalar/r2cb/librdft_scalar_r2cb.la \ + rdft/scalar/r2r/librdft_scalar_r2r.la reodft/libreodft.la \ + api/libapi.la $(SIMD_LIBS) $(SSE2_LIBS) $(AVX_LIBS) \ + $(ALTIVEC_LIBS) $(NEON_LIBS) $(COMBINED_THREADLIBS) +am_libfftw3@PREC_SUFFIX@_la_OBJECTS = +libfftw3@PREC_SUFFIX@_la_OBJECTS = \ + $(am_libfftw3@PREC_SUFFIX@_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libfftw3@PREC_SUFFIX@_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(libfftw3@PREC_SUFFIX@_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libfftw3@PREC_SUFFIX@_la_SOURCES) +DIST_SOURCES = $(libfftw3@PREC_SUFFIX@_la_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +DATA = $(pkgconfig_DATA) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + cscope distdir dist dist-all distcheck +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) \ + $(LISP)config.h.in +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +CSCOPE = cscope +DIST_SUBDIRS = support genfft kernel simd-support dft rdft reodft api \ + libbench2 . threads tests mpi doc tools m4 +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +distdir = $(PACKAGE)-$(VERSION) +top_distdir = $(distdir) +am__remove_distdir = \ + if test -d "$(distdir)"; then \ + find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ + && rm -rf "$(distdir)" \ + || { sleep 5 && rm -rf "$(distdir)"; }; \ + else :; fi +am__post_remove_distdir = $(am__remove_distdir) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +DIST_ARCHIVES = $(distdir).tar.gz +GZIP_ENV = --best +DIST_TARGETS = dist-gzip +distuninstallcheck_listfiles = find . -type f -print +am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \ + | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$' +distcleancheck_listfiles = find . -type f -print +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +OPTIONS_AUTOMAKE = gnu +lib_LTLIBRARIES = libfftw3@PREC_SUFFIX@.la +@MAINTAINER_MODE_FALSE@GENFFT = + +# pkgincludedir = $(includedir)/fftw3@PREC_SUFFIX@ +# nodist_pkginclude_HEADERS = config.h + +# recompile genfft if maintainer mode is true +@MAINTAINER_MODE_TRUE@GENFFT = genfft +ACLOCAL_AMFLAGS = -I m4 +@COMBINED_THREADS_FALSE@CHICKEN_EGG = . threads + +# when using combined thread libraries (necessary on Windows), we want +# to build threads/ first, because libfftw3_threads is added to +# libfftw3. +# +# Otherwise, we want to build libfftw3_threads after libfftw3 +# so that we can track the fact that libfftw3_threads depends upon +# libfftw3. +# +# This is the inescapable result of combining three bad ideas +# (threads, Windows, and shared libraries). +# +@COMBINED_THREADS_TRUE@CHICKEN_EGG = threads . +SUBDIRS = support $(GENFFT) kernel simd-support dft rdft reodft api \ +libbench2 $(CHICKEN_EGG) tests mpi doc tools m4 + +EXTRA_DIST = COPYRIGHT bootstrap.sh CONVENTIONS fftw.pc.in +SIMD_LIBS = \ + simd-support/libsimd_support.la \ + simd-support/libsimd_sse2_nonportable.la + +@HAVE_SSE2_TRUE@SSE2_LIBS = dft/simd/sse2/libdft_sse2_codelets.la \ +@HAVE_SSE2_TRUE@rdft/simd/sse2/librdft_sse2_codelets.la + +@HAVE_AVX_TRUE@AVX_LIBS = dft/simd/avx/libdft_avx_codelets.la \ +@HAVE_AVX_TRUE@rdft/simd/avx/librdft_avx_codelets.la + +@HAVE_ALTIVEC_TRUE@ALTIVEC_LIBS = dft/simd/altivec/libdft_altivec_codelets.la \ +@HAVE_ALTIVEC_TRUE@rdft/simd/altivec/librdft_altivec_codelets.la + +@HAVE_NEON_TRUE@NEON_LIBS = dft/simd/neon/libdft_neon_codelets.la \ +@HAVE_NEON_TRUE@rdft/simd/neon/librdft_neon_codelets.la + +@COMBINED_THREADS_TRUE@@THREADS_TRUE@COMBINED_THREADLIBS = threads/libfftw3@PREC_SUFFIX@_threads.la +libfftw3@PREC_SUFFIX@_la_SOURCES = +libfftw3@PREC_SUFFIX@_la_LIBADD = \ + kernel/libkernel.la \ + dft/libdft.la \ + dft/scalar/libdft_scalar.la \ + dft/scalar/codelets/libdft_scalar_codelets.la \ + rdft/librdft.la \ + rdft/scalar/librdft_scalar.la \ + rdft/scalar/r2cf/librdft_scalar_r2cf.la \ + rdft/scalar/r2cb/librdft_scalar_r2cb.la \ + rdft/scalar/r2r/librdft_scalar_r2r.la \ + reodft/libreodft.la \ + api/libapi.la \ + $(SIMD_LIBS) $(SSE2_LIBS) $(AVX_LIBS) $(ALTIVEC_LIBS) $(NEON_LIBS) \ + $(COMBINED_THREADLIBS) + +@QUAD_FALSE@libfftw3@PREC_SUFFIX@_la_LDFLAGS = -no-undefined -version-info \ +@QUAD_FALSE@@SHARED_VERSION_INFO@ + + +# cannot use -no-undefined since dependent on libquadmath +@QUAD_TRUE@libfftw3@PREC_SUFFIX@_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = fftw3@PREC_SUFFIX@.pc +WISDOM_DIR = /etc/fftw +WISDOM = wisdom@PREC_SUFFIX@ +WISDOM_TIME = 12 # default to 12-hour limit, i.e. overnight +WISDOM_FLAGS = --verbose --canonical --time-limit=$(WISDOM_TIME) +all: config.h + $(MAKE) $(AM_MAKEFLAGS) all-recursive + +.SUFFIXES: +am--refresh: Makefile + @: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + echo ' cd $(srcdir) && $(AUTOMAKE) --gnu'; \ + $(am__cd) $(srcdir) && $(AUTOMAKE) --gnu \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + echo ' $(SHELL) ./config.status'; \ + $(SHELL) ./config.status;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + $(SHELL) ./config.status --recheck + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + $(am__cd) $(srcdir) && $(AUTOCONF) +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) +$(am__aclocal_m4_deps): + +config.h: stamp-h1 + @test -f $@ || rm -f stamp-h1 + @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) stamp-h1 + +stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status + @rm -f stamp-h1 + cd $(top_builddir) && $(SHELL) ./config.status config.h +$(srcdir)/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) + rm -f stamp-h1 + touch $@ + +distclean-hdr: + -rm -f config.h stamp-h1 +fftw.pc: $(top_builddir)/config.status $(srcdir)/fftw.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libfftw3@PREC_SUFFIX@.la: $(libfftw3@PREC_SUFFIX@_la_OBJECTS) $(libfftw3@PREC_SUFFIX@_la_DEPENDENCIES) $(EXTRA_libfftw3@PREC_SUFFIX@_la_DEPENDENCIES) + $(AM_V_CCLD)$(libfftw3@PREC_SUFFIX@_la_LINK) -rpath $(libdir) $(libfftw3@PREC_SUFFIX@_la_OBJECTS) $(libfftw3@PREC_SUFFIX@_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + -rm -f libtool config.lt +install-pkgconfigDATA: $(pkgconfig_DATA) + @$(NORMAL_INSTALL) + @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \ + done + +uninstall-pkgconfigDATA: + @$(NORMAL_UNINSTALL) + @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscope: cscope.files + test ! -s cscope.files \ + || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS) +clean-cscope: + -rm -f cscope.files +cscope.files: clean-cscope cscopelist +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + -rm -f cscope.out cscope.in.out cscope.po.out cscope.files + +distdir: $(DISTFILES) + $(am__remove_distdir) + test -d "$(distdir)" || mkdir "$(distdir)" + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done + -test -n "$(am__skip_mode_fix)" \ + || find "$(distdir)" -type d ! -perm -755 \ + -exec chmod u+rwx,go+rx {} \; -o \ + ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ + || chmod -R a+r "$(distdir)" +dist-gzip: distdir + tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz + $(am__post_remove_distdir) + +dist-bzip2: distdir + tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 + $(am__post_remove_distdir) + +dist-lzip: distdir + tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz + $(am__post_remove_distdir) + +dist-xz: distdir + tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz + $(am__post_remove_distdir) + +dist-tarZ: distdir + @echo WARNING: "Support for shar distribution archives is" \ + "deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z + $(am__post_remove_distdir) + +dist-shar: distdir + @echo WARNING: "Support for distribution archives compressed with" \ + "legacy program 'compress' is deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz + $(am__post_remove_distdir) + +dist-zip: distdir + -rm -f $(distdir).zip + zip -rq $(distdir).zip $(distdir) + $(am__post_remove_distdir) + +dist dist-all: + $(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:' + $(am__post_remove_distdir) + +# This target untars the dist file and tries a VPATH configuration. Then +# it guarantees that the distribution is self-contained by making another +# tarfile. +distcheck: dist + case '$(DIST_ARCHIVES)' in \ + *.tar.gz*) \ + GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\ + *.tar.bz2*) \ + bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ + *.tar.lz*) \ + lzip -dc $(distdir).tar.lz | $(am__untar) ;;\ + *.tar.xz*) \ + xz -dc $(distdir).tar.xz | $(am__untar) ;;\ + *.tar.Z*) \ + uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ + *.shar.gz*) \ + GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\ + *.zip*) \ + unzip $(distdir).zip ;;\ + esac + chmod -R a-w $(distdir) + chmod u+w $(distdir) + mkdir $(distdir)/_build $(distdir)/_inst + chmod a-w $(distdir) + test -d $(distdir)/_build || exit 0; \ + dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ + && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ + && am__cwd=`pwd` \ + && $(am__cd) $(distdir)/_build \ + && ../configure --srcdir=.. --prefix="$$dc_install_base" \ + $(AM_DISTCHECK_CONFIGURE_FLAGS) \ + $(DISTCHECK_CONFIGURE_FLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) dvi \ + && $(MAKE) $(AM_MAKEFLAGS) check \ + && $(MAKE) $(AM_MAKEFLAGS) install \ + && $(MAKE) $(AM_MAKEFLAGS) installcheck \ + && $(MAKE) $(AM_MAKEFLAGS) uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ + distuninstallcheck \ + && chmod -R a-w "$$dc_install_base" \ + && ({ \ + (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ + distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ + } || { rm -rf "$$dc_destdir"; exit 1; }) \ + && rm -rf "$$dc_destdir" \ + && $(MAKE) $(AM_MAKEFLAGS) dist \ + && rm -rf $(DIST_ARCHIVES) \ + && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \ + && cd "$$am__cwd" \ + || exit 1 + $(am__post_remove_distdir) + @(echo "$(distdir) archives ready for distribution: "; \ + list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ + sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' +distuninstallcheck: + @test -n '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: trying to run $@ with an empty' \ + '$$(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + $(am__cd) '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left after uninstall:" ; \ + if test -n "$(DESTDIR)"; then \ + echo " (check DESTDIR support)"; \ + fi ; \ + $(distuninstallcheck_listfiles) ; \ + exit 1; } >&2 +distcleancheck: distclean + @if test '$(srcdir)' = . ; then \ + echo "ERROR: distcleancheck can only run from a VPATH build" ; \ + exit 1 ; \ + fi + @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left in build directory after distclean:" ; \ + $(distcleancheck_listfiles) ; \ + exit 1; } >&2 +check-am: all-am +check: check-recursive +all-am: Makefile $(LTLIBRARIES) $(DATA) config.h +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgconfigdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-hdr distclean-libtool distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-pkgconfigDATA + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf $(top_srcdir)/autom4te.cache + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-libLTLIBRARIES uninstall-pkgconfigDATA + +.MAKE: $(am__recursive_targets) all install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--refresh check check-am clean clean-cscope clean-generic \ + clean-libLTLIBRARIES clean-libtool cscope cscopelist-am ctags \ + ctags-am dist dist-all dist-bzip2 dist-gzip dist-lzip \ + dist-shar dist-tarZ dist-xz dist-zip distcheck distclean \ + distclean-compile distclean-generic distclean-hdr \ + distclean-libtool distclean-tags distcleancheck distdir \ + distuninstallcheck dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am \ + install-libLTLIBRARIES install-man install-pdf install-pdf-am \ + install-pkgconfigDATA install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-libLTLIBRARIES uninstall-pkgconfigDATA + + +fftw3@PREC_SUFFIX@.pc: fftw.pc + cp -f fftw.pc fftw3@PREC_SUFFIX@.pc + +wisdom: + tools/fftw@PREC_SUFFIX@-wisdom -o $@ $(WISDOM_FLAGS) + +install-wisdom: wisdom + $(mkinstalldirs) $(WISDOM_DIR) + $(INSTALL_DATA) wisdom $(WISDOM_DIR)/$(WISDOM) + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/NEWS --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/NEWS Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,564 @@ +FFTW 3.3.4 + +* New functions fftw_alignment_of (to check whether two arrays are + equally aligned for the purposes of applying a plan) and fftw_sprint_plan + (to output a description of plan to a string). + +* Bugfix in fftw-wisdom-to-conf; thanks to Florian Oppermann for the + bug report. + +* Fixed manual to work with texinfo-5. + +* Increased timing interval on x86_64 to reduce timing errors. + +* Default to Win32 threads, not pthreads, if both are present. + +* Various build-script fixes. + +FFTW 3.3.3 + +* Fix deadlock bug in MPI transforms (thanks to Michael Pippig for the + bug report and patch, and to Graham Dennis for the bug report). + +* Use 128-bit ARM NEON instructions instead of 64-bits. This change + appears to speed up even ARM processors with a 64-bit NEON pipe. + +* Speed improvements for single-precision AVX. + +* Speed up planner on machines without "official" cycle counters, such as ARM. + +FFTW 3.3.2 + +* Removed an archaic stack-alignment hack that was failing with + gcc-4.7/i386. + +* Added stack-alignment hack necessary for gcc on Windows/i386. We + will regret this in ten years (see previous change). + +* Fix incompatibility with Intel icc which pretends to be gcc + but does not support quad precision. + +* make libfftw{threads,mpi} depend upon libfftw when using libtool; + this is consistent with most other libraries and simplifies the life + of various distributors of GNU/Linux. + +FFTW 3.3.1 + +* Changes since 3.3.1-beta1: + + - Reduced planning time in estimate mode for sizes with large + prime factors. + + - Added AVX autodetection under Visual Studio. Thanks Carsten + Steger for submitting the necessary code. + + - Modern Fortran interface now uses a separate fftw3l.f03 interface + file for the long double interface, which is not supported by + some Fortran compilers. Provided new fftw3q.f03 interface file + to access the quadruple-precision FFTW routines with recent + versions of gcc/gfortran. + +* Added support for the NEON extensions to the ARM ISA. (Note to beta + users: an ARM cycle counter is not yet implemented; please contact + fftw@fftw.org if you know how to do it right.) + +* MPI code now compiles even if mpicc is a C++ compiler; thanks to + Kyle Spyksma for the bug report. + +FFTW 3.3 + +* Changes since 3.3-beta1: + + - Compiling OpenMP support (--enable-openmp) now installs a + fftw3_omp library, instead of fftw3_threads, so that OpenMP + and POSIX threads (--enable-threads) libraries can be built + and installed at the same time. + + - Various minor compilation fixes, corrections of manual typos, and + improvements to the benchmark test program. + +* Add support for the AVX extensions to x86 and x86-64. The AVX code + works with 16-byte alignment (as opposed to 32-byte alignment), + so there is no ABI change compared to FFTW 3.2.2. + +* Added Fortran 2003 interface, which should be usable on most modern + Fortran compilers (e.g. gfortran) and provides type-checked access + to the the C FFTW interface. (The legacy Fortran-77 interface is + still included also.) + +* Added MPI distributed-memory transforms. Compared to 3.3alpha, + the major changes in the MPI transforms are: + - Fixed some deadlock and crashing bugs. + - Added Fortran 2003 interface. + - Added new-array execute functions for MPI plans. + - Eliminated use of large MPI tags, since Cray MPI requires tags < 2^24; + thanks to Jonathan Bentz for the bug report. + - Expanded documentation. + - 'make check' now runs MPI tests + - Some ABI changes - not binary-compatible with 3.3alpha MPI. + +* Add support for quad-precision __float128 in gcc 4.6 or later (on x86. + x86-64, and Itanium). The new routines use the fftwq_ prefix. + +* Removed support for MIPS paired-single instructions due to lack of + available hardware for testing. Users who want this functionality + should continue using FFTW 3.2.x. (Note that FFTW 3.3 still works + on MIPS; this only concerns special instructions available on some + MIPS chips.) + +* Removed support for the Cell Broadband Engine. Cell users should + use FFTW 3.2.x. + +* New convenience functions fftw_alloc_real and fftw_alloc_complex + to use fftw_malloc for real and complex arrays without typecasts + or sizeof. + +* New convenience functions fftw_export_wisdom_to_filename and + fftw_import_wisdom_from_filename that export/import wisdom + to a file, which don't require you to open/close the file yourself. + +* New function fftw_cost to return FFTW's internal cost metric for + a given plan; thanks to Rhys Ulerich and Nathanael Schaeffer for the + suggestion. + +* The --enable-sse2 configure flag now works in both double and single + precision (and is equivalent to --enable-sse in the latter case). + +* Remove --enable-portable-binary flag: we new produce portable binaries + by default. + +* Remove the automatic detection of native architecture flag for gcc + which was introduced in fftw-3.1, since new gcc supports -mtune=native. + Remove the --with-gcc-arch flag; if you want to specify a particlar + arch to configure, use ./configure CC="gcc -mtune=...". + +* --with-our-malloc16 configure flag is now renamed --with-our-malloc. + +* Fixed build problem failure when srand48 declaration is missing; + thanks to Ralf Wildenhues for the bug report. + +* Fixed bug in fftw_set_timelimit: ensure that a negative timelimit + is equivalent to no timelimit in all cases. Thanks to William Andrew + Burnson for the bug report. + +* Fixed stack-overflow problem on OpenBSD caused by using alloca with + too large a buffer. + +FFTW 3.2.2 + +* Improve performance of some copy operations of complex arrays on + x86 machines. + +* Add configure flag to disable alloca(), which is broken in mingw64. + +* Planning in FFTW_ESTIMATE mode for r2r transforms became slower + between fftw-3.1.3 and 3.2. This regression has now been fixed. + +FFTW 3.2.1 + +* Performance improvements for some multidimensional r2c/c2r transforms; + thanks to Eugene Miloslavsky for his benchmark reports. + +* Compile with icc on MacOS X, use better icc compiler flags. + +* Compilation fixes for systems where snprintf is defined as a macro; + thanks to Marcus Mae for the bug report. + +* Fortran documentation now recommends not using dfftw_execute, + because of reports of problems with various Fortran compilers; + it is better to use dfftw_execute_dft etcetera. + +* Some documentation clarifications, e.g. of fact that --enable-openmp + and --enable-threads are mutually exclusive (thanks to Long To), + and document slightly odd behavior of plan_guru_r2r in Fortran + (thanks to Alexander Pozdneev). + +* FAQ was accidentally omitted from 3.2 tarball. + +* Remove some extraneous (harmless) files accidentally included in + a subdirectory of the 3.2 tarball. + +FFTW 3.2 + +* Worked around apparent glibc bug that leads to rare hangs when freeing + semaphores. + +* Fixed segfault due to unaligned access in certain obscure problems + that use SSE and multiple threads. + +* MPI transforms not included, as they are still in alpha; the alpha + versions of the MPI transforms have been moved to FFTW 3.3alpha1. + +FFTW 3.2alpha3 + +* Performance improvements for sizes with factors of 5 and 10. + +* Documented FFTW_WISDOM_ONLY flag, at the suggestion of Mario + Emmenlauer and Phil Dumont. + +* Port Cell code to SDK2.1 (libspe2), as opposed to the old libspe1 code. + +* Performance improvements in Cell code for N < 32k, thanks to Jan Wagner + for the suggestions. + +* Cycle counter for Sun x86_64 compiler, and compilation fix in cycle + counter for AIX/xlc (thanks to Jeff Haferman for the bug report). + +* Fixed incorrect type prefix in MPI code that prevented wisdom routines + from working in single precision (thanks to Eric A. Borisch for the report). + +* Added 'make check' for MPI code (which still fails in a couple corner + cases, but should be much better than in alpha2). + +* Many other small fixes. + +FFTW 3.2alpha2 + +* Support for the Cell processor, donated by IBM Research; see README.Cell + and the Cell section of the manual. + +* New 64-bit API: for every "plan_guru" function there is a new "plan_guru64" + function with the same semantics, but which takes fftw_iodim64 instead of + fftw_iodim. fftw_iodim64 is the same as fftw_iodim, except that it takes + ptrdiff_t integer types as parameters, which is a 64-bit type on + 64-bit machines. This is only useful for specifying very large transforms + on 64-bit machines. (Internally, FFTW uses ptrdiff_t everywhere + regardless of what API you choose.) + +* Experimental MPI support. Complex one- and multi-dimensional FFTs, + multi-dimensional r2r, multi-dimensional r2c/c2r transforms, and + distributed transpose operations, with 1d block distributions. + (This is an alpha preview: routines have not been exhaustively + tested, documentation is incomplete, and some functionality is + missing, e.g. Fortran support.) See mpi/README and also the MPI + section of the manual. + +* Significantly faster r2c/c2r transforms, especially on machines with SIMD. + +* Rewritten multi-threaded support for better performance by + re-using a fixed pool of threads rather than continually + respawning and joining (which nowadays is much slower). + +* Support for MIPS paired-single SIMD instructions, donated by + Codesourcery. + +* FFTW_WISDOM_ONLY planner flag, to create plan only if wisdom is + available and return NULL otherwise. + +* Removed k7 support, which only worked in 32-bit mode and is + becoming obsolete. Use --enable-sse instead. + +* Added --with-g77-wrappers configure option to force inclusion + of g77 wrappers, in addition to whatever is needed for the + detected Fortran compilers. This is mainly intended for GNU/Linux + distros switching to gfortran that wish to include both + gfortran and g77 support in FFTW. + +* In manual, renamed "guru execute" functions to "new-array execute" + functions, to reduce confusion with the guru planner interface. + (The programming interface is unchanged.) + +* Add missing __declspec attribute to threads API functions when compiling + for Windows; thanks to Robert O. Morris for the bug report. + +* Fixed missing return value from dfftw_init_threads in Fortran; + thanks to Markus Wetzstein for the bug report. + +FFTW 3.1.3 + +* Bug fix: FFTW computes incorrect results when the user plans both + REDFT11 and RODFT11 transforms of certain sizes. The bug is caused + by incorrect sharing of twiddle-factor tables between the two + transforms, and only occurs when both are used. Thanks to Paul + A. Valiant for the bug report. + +FFTW 3.1.2 + +* Correct bug in configure script: --enable-portable-binary option was ignored! + Thanks to Andrew Salamon for the bug report. + +* Threads compilation fix on AIX: prefer xlc_r to cc_r, and don't use + either if we are using gcc. Thanks to Guy Moebs for the bug report. + +* Updated FAQ to note that Apple gcc 4.0.1 on MacOS/Intel is broken, + and suggest a workaround. configure script now detects Core/Duo arch. + +* Use -maltivec when checking for altivec.h. Fixes Gentoo bug #129304, + thanks to Markus Dittrich. + +FFTW 3.1.1 + +* Performance improvements for Intel EMT64. + +* Performance improvements for large-size transforms with SIMD. + +* Cycle counter support for Intel icc and Visual C++ on x86-64. + +* In fftw-wisdom tool, replaced obsolete --impatient with --measure. + +* Fixed compilation failure with AIX/xlc; thanks to Joseph Thomas. + +* Windows DLL support for Fortran API (added missing __declspec(dllexport)). + +* SSE/SSE2 code works properly (i.e. disables itself) on older 386 and 486 + CPUs lacking a CPUID instruction; thanks to Eric Korpela. + +FFTW 3.1 + +* Faster FFTW_ESTIMATE planner. + +* New (faster) algorithm for REDFT00/RODFT00 (type-I DCT/DST) of odd size. + +* "4-step" algorithm for faster FFTs of very large sizes (> 2^18). + +* Faster in-place real-data DFTs (for R2HC and HC2R r2r formats). + +* Faster in-place non-square transpositions (FFTW uses these internally + for in-place FFTs, and you can also perform them explicitly using + the guru interface). + +* Faster prime-size DFTs: implemented Bluestein's algorithm, as well + as a zero-padded Rader variant to limit recursive use of Rader's algorithm. + +* SIMD support for split complex arrays. + +* Much faster Altivec/VMX performance. + +* New fftw_set_timelimit function to specify a (rough) upper bound to the + planning time (does not affect ESTIMATE mode). + +* Removed --enable-3dnow support; use --enable-k7 instead. + +* FMA (fused multiply-add) version is now included in "standard" FFTW, + and is enabled with --enable-fma (the default on PowerPC and Itanium). + +* Automatic detection of native architecture flag for gcc. New + configure options: --enable-portable-binary and --with-gcc-arch=, + for people distributing compiled binaries of FFTW (see manual). + +* Automatic detection of Altivec under Linux with gcc 3.4 (so that + same binary should work on both Altivec and non-Altivec PowerPCs). + +* Compiler-specific tweaks/flags/workarounds for gcc 3.4, xlc, HP/UX, + Solaris/Intel. + +* Various documentation clarifications. + +* 64-bit clean. (Fixes a bug affecting the split guru planner on + 64-bit machines, reported by David Necas.) + +* Fixed Debian bug #259612: inadvertent use of SSE instructions on + non-SSE machines (causing a crash) for --enable-sse binaries. + +* Fixed bug that caused HC2R transforms to destroy the input in + certain cases, even if the user specified FFTW_PRESERVE_INPUT. + +* Fixed bug where wisdom would be lost under rare circumstances, + causing excessive planning time. + +* FAQ notes bug in gcc-3.4.[1-3] that causes FFTW to crash with SSE/SSE2. + +* Fixed accidentally exported symbol that prohibited simultaneous + linking to double/single multithreaded FFTW (thanks to Alessio Massaro). + +* Support Win32 threads under MinGW (thanks to Alessio Massaro). + +* Fixed problem with building DLL under Cygwin; thanks to Stephane Fillod. + +* Fix build failure if no Fortran compiler is found (thanks to Charles + Radley for the bug report). + +* Fixed compilation failure with icc 8.0 and SSE/SSE2. Automatic + detection of icc architecture flag (e.g. -xW). + +* Fixed compilation with OpenMP on AIX (thanks to Greg Bauer). + +* Fixed compilation failure on x86-64 with gcc (thanks to Orion Poplawski). + +* Incorporated patch from FreeBSD ports (FreeBSD does not have memalign, + but its malloc is 16-byte aligned). + +* Cycle-counter compilation fixes for Itanium, Alpha, x86-64, Sparc, + MacOS (thanks to Matt Boman, John Bowman, and James A. Treacy for + reports/fixes). Added x86-64 cycle counter for PGI compilers, + courtesy Cristiano Calonaci. + +* Fix compilation problem in test program due to C99 conflict. + +* Portability fix for import_system_wisdom with djgpp (thanks to Juan + Manuel Guerrero). + +* Fixed compilation failure on MacOS 10.3 due to getopt conflict. + +* Work around Visual C++ (version 6/7) bug in SSE compilation; + thanks to Eddie Yee for his detailed report. + +Changes from FFTW 3.1 beta 2: + +* Several minor compilation fixes. + +* Eliminate FFTW_TIMELIMIT flag and replace fftw_timelimit global with + fftw_set_timelimit function. Make wisdom work with time-limited plans. + +Changes from FFTW 3.1 beta 1: + +* Fixes for creating DLLs under Windows; thanks to John Pavel for his feedback. + +* Fixed more 64-bit problems, thanks to John Pavel for the bug report. + +* Further speed improvements for Altivec/VMX. + +* Further speed improvements for non-square transpositions. + +* Many minor tweaks. + +FFTW 3.0.1 + +* Some speed improvements in SIMD code. + +* --without-cycle-counter option is removed. If no cycle counter is found, + then the estimator is always used. A --with-slow-timer option is provided + to force the use of lower-resolution timers. + +* Several fixes for compilation under Visual C++, with help from Stefane Ruel. + +* Added x86 cycle counter for Visual C++, with help from Morten Nissov. + +* Added S390 cycle counter, courtesy of James Treacy. + +* Added missing static keyword that prevented simultaneous linkage + of different-precision versions; thanks to Rasmus Larsen for the bug report. + +* Corrected accidental omission of f77_wisdom.f file; thanks to Alan Watson. + +* Support -xopenmp flag for SunOS; thanks to John Lou for the bug report. + +* Compilation with HP/UX cc requires -Wp,-H128000 flag to increase + preprocessor limits; thanks to Peter Vouras for the bug report. + +* Removed non-portable use of 'tempfile' in fftw-wisdom-to-conf script; + thanks to Nicolas Decoster for the patch. + +* Added 'make smallcheck' target in tests/ directory, at the request of + James Treacy. + +FFTW 3.0 + +Major goals of this release: + +* Speed: often 20% or more faster than FFTW 2.x, even without SIMD (see below). + +* Complete rewrite, to make it easier to add new algorithms and transforms. + +* New API, to support more general semantics. + +Other enhancements: + +* SIMD acceleration on supporting CPUs (SSE, SSE2, 3DNow!, and AltiVec). + (With special thanks to Franz Franchetti for many experimental prototypes + and to Stefan Kral for the vectorizing generator from fftwgel.) + +* True in-place 1d transforms of large sizes (as well as compressed + twiddle tables for additional memory/cache savings). + +* More arbitrary placement of real & imaginary data, e.g. including + interleaved (as in FFTW 2.x) as well as separate real/imag arrays. + +* Efficient prime-size transforms of real data. + +* Multidimensional transforms can operate on a subset of a larger matrix, + and/or transform selected dimensions of a multidimensional array. + +* By popular demand, simultaneous linking to double precision (fftw), + single precision (fftwf), and long-double precision (fftwl) versions + of FFTW is now supported. + +* Cycle counters (on all modern CPUs) are exploited to speed planning. + +* Efficient transforms of real even/odd arrays, a.k.a. discrete + cosine/sine transforms (types I-IV). (Currently work via pre/post + processing of real transforms, ala FFTPACK, so are not optimal.) + +* DHTs (Discrete Hartley Transforms), again via post-processing + of real transforms (and thus suboptimal, for now). + +* Support for linking to just those parts of FFTW that you need, + greatly reducing the size of statically linked programs when + only a limited set of transform sizes/types are required. + +* Canonical global wisdom file (/etc/fftw/wisdom) on Unix, along + with a command-line tool (fftw-wisdom) to generate/update it. + +* Fortran API can be used with both g77 and non-g77 compilers + simultaneously. + +* Multi-threaded version has optional OpenMP support. + +* Authors' good looks have greatly improved with age. + +Changes from 3.0beta3: + +* Separate FMA distribution to better exploit fused multiply-add instructions + on PowerPC (and possibly other) architectures. + +* Performance improvements via some inlining tweaks. + +* fftw_flops now returns double arguments, not int, to avoid overflows + for large sizes. + +* Workarounds for automake bugs. + +Changes from 3.0beta2: + +* The standard REDFT00/RODFT00 (DCT-I/DST-I) algorithm (used in + FFTPACK, NR, etcetera) turns out to have poor numerical accuracy, so + we replaced it with a slower routine that is more accurate. + +* The guru planner and execute functions now have two variants, one that + takes complex arguments and one that takes separate real/imag pointers. + +* Execute and planner routines now automatically align the stack on x86, + in case the calling program is misaligned. + +* README file for test program. + +* Fixed bugs in the combination of SIMD with multi-threaded transforms. + +* Eliminated internal fftw_threads_init function, which some people were + calling accidentally instead of the fftw_init_threads API function. + +* Check for -openmp flag (Intel C compiler) when --enable-openmp is used. + +* Support AMD x86-64 SIMD and cycle counter. + +* Support SSE2 intrinsics in forthcoming gcc 3.3. + +Changes from 3.0beta1: + +* Faster in-place 1d transforms of non-power-of-two sizes. + +* SIMD improvements for in-place, multi-dimensional, and/or non-FFTW_PATIENT + transforms. + +* Added support for hard-coded DCT/DST/DHT codelets of small sizes; the + default distribution only includes hard-coded size-8 DCT-II/III, however. + +* Many minor improvements to the manual. Added section on using the + codelet generator to customize and enhance FFTW. + +* The default 'make check' should now only take a few minutes; for more + strenuous tests (which may take a day or so), do 'cd tests; make bigcheck'. + +* fftw_print_plan is split into fftw_fprint_plan and fftw_print_plan, where + the latter uses stdout. + +* Fixed ability to compile with a C++ compiler. + +* Fixed support for C99 complex type under glibc. + +* Fixed problems with alloca under MinGW, AIX. + +* Workaround for gcc/SPARC bug. + +* Fixed multi-threaded initialization failure on IRIX due to lack of + user-accessible PTHREAD_SCOPE_SYSTEM there. diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/README Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,42 @@ +FFTW is a free collection of fast C routines for computing the +Discrete Fourier Transform in one or more dimensions. It includes +complex, real, symmetric, and parallel transforms, and can handle +arbitrary array sizes efficiently. FFTW is typically faster than +other publically-available FFT implementations, and is even +competitive with vendor-tuned libraries. (See our web page +http://fftw.org/ for extensive benchmarks.) To achieve this +performance, FFTW uses novel code-generation and runtime +self-optimization techniques (along with many other tricks). + +The doc/ directory contains the manual in texinfo, PDF, info, and HTML +formats. Frequently asked questions and answers can be found in the +doc/FAQ/ directory in ASCII and HTML. + +For a quick introduction to calling FFTW, see the "Tutorial" section +of the manual. + +INSTALLATION +------------ + +If you have downloaded an official release, please read chapter +10 "Installation and Customization" of the manual. In short: + + ./configure + make + make install + +If you are using the git repository, install ocaml, autoconf, +automake, and libtool, and execute the bootstrap.sh script. Most of +the source code of fftw is generated automatically, and this script +generates all the required source files. + + +CONTACTS +-------- + +FFTW was written by Matteo Frigo and Steven G. Johnson. You can +contact them at fftw@fftw.org. The latest version of FFTW, +benchmarks, links, and other information can be found at the FFTW home +page (http://www.fftw.org). You can also sign up to the fftw-announce +Google group to receive (infrequent) updates and information about new +releases. diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/TODO --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/TODO Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,39 @@ +TODO before FFTW-$2\pi$: + +* Wisdom: make it clear that it is specific to the exact fftw version + and configuration. Report error codes when reading wisdom. Maybe + have multiple system wisdom files, one per version? + +* DCT/DST codelets? which kinds? + +* investigate the addition-chain trig computation + +* I can't believe that there isn't a closed form for the omega + array in Rader. + +* convolution problem type(s) + +* Explore the idea of having n < 0 in tensors, possibly to mean + inverse DFT. + +* better estimator: possibly, let "other" cost be coef * n, where + coef is a per-solver constant determined via some big numerical + optimization/fit. + +* vector radix, multidimensional codelets + +* it may be a good idea to unify all those little loops that do + copying, (X[i], X[n-i]) <- (X[i] + X[n-i], X[i] - X[n-i]), + and multiplication of vectors by twiddle factors. + +* Pruned FFTs (basically, a vecloop that skips zeros). + +* Try FFTPACK-style back-and-forth (Stockham) FFT. (We tried this a + few years ago and it was slower, but perhaps matters have changed.) + +* Generate assembly directly for more processors, or maybe fork gcc. =) + +* ensure that threaded solvers generate (block_size % 4 == 0) + to allow SIMD to be used. + +* memoize triggen. diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/aclocal.m4 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/aclocal.m4 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1227 @@ +# generated automatically by aclocal 1.14 -*- Autoconf -*- + +# Copyright (C) 1996-2013 Free Software Foundation, Inc. + +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])]) +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],, +[m4_warning([this file was generated for autoconf 2.69. +You have another version of autoconf. It may work, but is not guaranteed to. +If you have problems, you may need to regenerate the build system entirely. +To do so, use the procedure documented by the package, typically 'autoreconf'.])]) + +# Copyright (C) 2002-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_AUTOMAKE_VERSION(VERSION) +# ---------------------------- +# Automake X.Y traces this macro to ensure aclocal.m4 has been +# generated from the m4 files accompanying Automake X.Y. +# (This private macro should not be called outside this file.) +AC_DEFUN([AM_AUTOMAKE_VERSION], +[am__api_version='1.14' +dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to +dnl require some minimum version. Point them to the right macro. +m4_if([$1], [1.14], [], + [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl +]) + +# _AM_AUTOCONF_VERSION(VERSION) +# ----------------------------- +# aclocal traces this macro to find the Autoconf version. +# This is a private macro too. Using m4_define simplifies +# the logic in aclocal, which can simply ignore this definition. +m4_define([_AM_AUTOCONF_VERSION], []) + +# AM_SET_CURRENT_AUTOMAKE_VERSION +# ------------------------------- +# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. +# This function is AC_REQUIREd by AM_INIT_AUTOMAKE. +AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], +[AM_AUTOMAKE_VERSION([1.14])dnl +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) + +# AM_AUX_DIR_EXPAND -*- Autoconf -*- + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets +# $ac_aux_dir to '$srcdir/foo'. In other projects, it is set to +# '$srcdir', '$srcdir/..', or '$srcdir/../..'. +# +# Of course, Automake must honor this variable whenever it calls a +# tool from the auxiliary directory. The problem is that $srcdir (and +# therefore $ac_aux_dir as well) can be either absolute or relative, +# depending on how configure is run. This is pretty annoying, since +# it makes $ac_aux_dir quite unusable in subdirectories: in the top +# source directory, any form will work fine, but in subdirectories a +# relative path needs to be adjusted first. +# +# $ac_aux_dir/missing +# fails when called from a subdirectory if $ac_aux_dir is relative +# $top_srcdir/$ac_aux_dir/missing +# fails if $ac_aux_dir is absolute, +# fails when called from a subdirectory in a VPATH build with +# a relative $ac_aux_dir +# +# The reason of the latter failure is that $top_srcdir and $ac_aux_dir +# are both prefixed by $srcdir. In an in-source build this is usually +# harmless because $srcdir is '.', but things will broke when you +# start a VPATH build or use an absolute $srcdir. +# +# So we could use something similar to $top_srcdir/$ac_aux_dir/missing, +# iff we strip the leading $srcdir from $ac_aux_dir. That would be: +# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` +# and then we would define $MISSING as +# MISSING="\${SHELL} $am_aux_dir/missing" +# This will work as long as MISSING is not called from configure, because +# unfortunately $(top_srcdir) has no meaning in configure. +# However there are other variables, like CC, which are often used in +# configure, and could therefore not use this "fixed" $ac_aux_dir. +# +# Another solution, used here, is to always expand $ac_aux_dir to an +# absolute PATH. The drawback is that using absolute paths prevent a +# configured tree to be moved without reconfiguration. + +AC_DEFUN([AM_AUX_DIR_EXPAND], +[dnl Rely on autoconf to set up CDPATH properly. +AC_PREREQ([2.50])dnl +# expand $ac_aux_dir to an absolute path +am_aux_dir=`cd $ac_aux_dir && pwd` +]) + +# AM_CONDITIONAL -*- Autoconf -*- + +# Copyright (C) 1997-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_CONDITIONAL(NAME, SHELL-CONDITION) +# ------------------------------------- +# Define a conditional. +AC_DEFUN([AM_CONDITIONAL], +[AC_PREREQ([2.52])dnl + m4_if([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], + [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl +AC_SUBST([$1_TRUE])dnl +AC_SUBST([$1_FALSE])dnl +_AM_SUBST_NOTMAKE([$1_TRUE])dnl +_AM_SUBST_NOTMAKE([$1_FALSE])dnl +m4_define([_AM_COND_VALUE_$1], [$2])dnl +if $2; then + $1_TRUE= + $1_FALSE='#' +else + $1_TRUE='#' + $1_FALSE= +fi +AC_CONFIG_COMMANDS_PRE( +[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then + AC_MSG_ERROR([[conditional "$1" was never defined. +Usually this means the macro was only invoked conditionally.]]) +fi])]) + +# Copyright (C) 1999-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + + +# There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be +# written in clear, in which case automake, when reading aclocal.m4, +# will think it sees a *use*, and therefore will trigger all it's +# C support machinery. Also note that it means that autoscan, seeing +# CC etc. in the Makefile, will ask for an AC_PROG_CC use... + + +# _AM_DEPENDENCIES(NAME) +# ---------------------- +# See how the compiler implements dependency checking. +# NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC". +# We try a few techniques and use that to set a single cache variable. +# +# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was +# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular +# dependency, and given that the user is not expected to run this macro, +# just rely on AC_PROG_CC. +AC_DEFUN([_AM_DEPENDENCIES], +[AC_REQUIRE([AM_SET_DEPDIR])dnl +AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl +AC_REQUIRE([AM_MAKE_INCLUDE])dnl +AC_REQUIRE([AM_DEP_TRACK])dnl + +m4_if([$1], [CC], [depcc="$CC" am_compiler_list=], + [$1], [CXX], [depcc="$CXX" am_compiler_list=], + [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'], + [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'], + [$1], [UPC], [depcc="$UPC" am_compiler_list=], + [$1], [GCJ], [depcc="$GCJ" am_compiler_list='gcc3 gcc'], + [depcc="$$1" am_compiler_list=]) + +AC_CACHE_CHECK([dependency style of $depcc], + [am_cv_$1_dependencies_compiler_type], +[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_$1_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` + fi + am__universal=false + m4_case([$1], [CC], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac], + [CXX], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac]) + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_$1_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_$1_dependencies_compiler_type=none +fi +]) +AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) +AM_CONDITIONAL([am__fastdep$1], [ + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) +]) + + +# AM_SET_DEPDIR +# ------------- +# Choose a directory name for dependency files. +# This macro is AC_REQUIREd in _AM_DEPENDENCIES. +AC_DEFUN([AM_SET_DEPDIR], +[AC_REQUIRE([AM_SET_LEADING_DOT])dnl +AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl +]) + + +# AM_DEP_TRACK +# ------------ +AC_DEFUN([AM_DEP_TRACK], +[AC_ARG_ENABLE([dependency-tracking], [dnl +AS_HELP_STRING( + [--enable-dependency-tracking], + [do not reject slow dependency extractors]) +AS_HELP_STRING( + [--disable-dependency-tracking], + [speeds up one-time build])]) +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi +AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) +AC_SUBST([AMDEPBACKSLASH])dnl +_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl +AC_SUBST([am__nodep])dnl +_AM_SUBST_NOTMAKE([am__nodep])dnl +]) + +# Generate code to set up dependency tracking. -*- Autoconf -*- + +# Copyright (C) 1999-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + + +# _AM_OUTPUT_DEPENDENCY_COMMANDS +# ------------------------------ +AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], +[{ + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + case $CONFIG_FILES in + *\'*) eval set x "$CONFIG_FILES" ;; + *) set x $CONFIG_FILES ;; + esac + shift + for mf + do + # Strip MF so we end up with the name of the file. + mf=`echo "$mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile or not. + # We used to match only the files named 'Makefile.in', but + # some people rename them; so instead we look at the file content. + # Grep'ing the first line is not enough: some people post-process + # each Makefile.in and add a new line on top of each file to say so. + # Grep'ing the whole file is not good either: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then + dirpart=`AS_DIRNAME("$mf")` + else + continue + fi + # Extract the definition of DEPDIR, am__include, and am__quote + # from the Makefile without running 'make'. + DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` + test -z "$DEPDIR" && continue + am__include=`sed -n 's/^am__include = //p' < "$mf"` + test -z "$am__include" && continue + am__quote=`sed -n 's/^am__quote = //p' < "$mf"` + # Find all dependency output files, they are included files with + # $(DEPDIR) in their names. We invoke sed twice because it is the + # simplest approach to changing $(DEPDIR) to its actual value in the + # expansion. + for file in `sed -n " + s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ + sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do + # Make sure the directory exists. + test -f "$dirpart/$file" && continue + fdir=`AS_DIRNAME(["$file"])` + AS_MKDIR_P([$dirpart/$fdir]) + # echo "creating $dirpart/$file" + echo '# dummy' > "$dirpart/$file" + done + done +} +])# _AM_OUTPUT_DEPENDENCY_COMMANDS + + +# AM_OUTPUT_DEPENDENCY_COMMANDS +# ----------------------------- +# This macro should only be invoked once -- use via AC_REQUIRE. +# +# This code is only required when automatic dependency tracking +# is enabled. FIXME. This creates each '.P' file that we will +# need in order to bootstrap the dependency handling code. +AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], +[AC_CONFIG_COMMANDS([depfiles], + [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], + [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"]) +]) + +# Do all the work for Automake. -*- Autoconf -*- + +# Copyright (C) 1996-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This macro actually does too much. Some checks are only needed if +# your package does certain things. But this isn't really a big deal. + +dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O. +m4_define([AC_PROG_CC], +m4_defn([AC_PROG_CC]) +[_AM_PROG_CC_C_O +]) + +# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) +# AM_INIT_AUTOMAKE([OPTIONS]) +# ----------------------------------------------- +# The call with PACKAGE and VERSION arguments is the old style +# call (pre autoconf-2.50), which is being phased out. PACKAGE +# and VERSION should now be passed to AC_INIT and removed from +# the call to AM_INIT_AUTOMAKE. +# We support both call styles for the transition. After +# the next Automake release, Autoconf can make the AC_INIT +# arguments mandatory, and then we can depend on a new Autoconf +# release and drop the old call support. +AC_DEFUN([AM_INIT_AUTOMAKE], +[AC_PREREQ([2.65])dnl +dnl Autoconf wants to disallow AM_ names. We explicitly allow +dnl the ones we care about. +m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl +AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl +AC_REQUIRE([AC_PROG_INSTALL])dnl +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi +AC_SUBST([CYGPATH_W]) + +# Define the identity of the package. +dnl Distinguish between old-style and new-style calls. +m4_ifval([$2], +[AC_DIAGNOSE([obsolete], + [$0: two- and three-arguments forms are deprecated.]) +m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl + AC_SUBST([PACKAGE], [$1])dnl + AC_SUBST([VERSION], [$2])], +[_AM_SET_OPTIONS([$1])dnl +dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. +m4_if( + m4_ifdef([AC_PACKAGE_NAME], [ok]):m4_ifdef([AC_PACKAGE_VERSION], [ok]), + [ok:ok],, + [m4_fatal([AC_INIT should be called with package and version arguments])])dnl + AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl + AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl + +_AM_IF_OPTION([no-define],, +[AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package]) + AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl + +# Some tools Automake needs. +AC_REQUIRE([AM_SANITY_CHECK])dnl +AC_REQUIRE([AC_ARG_PROGRAM])dnl +AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}]) +AM_MISSING_PROG([AUTOCONF], [autoconf]) +AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}]) +AM_MISSING_PROG([AUTOHEADER], [autoheader]) +AM_MISSING_PROG([MAKEINFO], [makeinfo]) +AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl +AC_REQUIRE([AC_PROG_MKDIR_P])dnl +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +AC_SUBST([mkdir_p], ['$(MKDIR_P)']) +# We need awk for the "check" target. The system "awk" is bad on +# some platforms. +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([AC_PROG_MAKE_SET])dnl +AC_REQUIRE([AM_SET_LEADING_DOT])dnl +_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], + [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], + [_AM_PROG_TAR([v7])])]) +_AM_IF_OPTION([no-dependencies],, +[AC_PROVIDE_IFELSE([AC_PROG_CC], + [_AM_DEPENDENCIES([CC])], + [m4_define([AC_PROG_CC], + m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [_AM_DEPENDENCIES([CXX])], + [m4_define([AC_PROG_CXX], + m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJC], + [_AM_DEPENDENCIES([OBJC])], + [m4_define([AC_PROG_OBJC], + m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJCXX], + [_AM_DEPENDENCIES([OBJCXX])], + [m4_define([AC_PROG_OBJCXX], + m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl +]) +AC_REQUIRE([AM_SILENT_RULES])dnl +dnl The testsuite driver may need to know about EXEEXT, so add the +dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This +dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below. +AC_CONFIG_COMMANDS_PRE(dnl +[m4_provide_if([_AM_COMPILER_EXEEXT], + [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + AC_MSG_ERROR([Your 'rm' program is bad, sorry.]) + fi +fi]) + +dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not +dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further +dnl mangled by Autoconf and run in a shell conditional statement. +m4_define([_AC_COMPILER_EXEEXT], +m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])]) + +# When config.status generates a header, we must update the stamp-h file. +# This file resides in the same directory as the config header +# that is generated. The stamp files are numbered to have different names. + +# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the +# loop where config.status creates the headers, so we can generate +# our stamp files there. +AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], +[# Compute $1's index in $config_headers. +_am_arg=$1 +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_SH +# ------------------ +# Define $install_sh. +AC_DEFUN([AM_PROG_INSTALL_SH], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +if test x"${install_sh}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi +AC_SUBST([install_sh])]) + +# Copyright (C) 2003-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# Check whether the underlying file-system supports filenames +# with a leading dot. For instance MS-DOS doesn't. +AC_DEFUN([AM_SET_LEADING_DOT], +[rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null +AC_SUBST([am__leading_dot])]) + +# Add --enable-maintainer-mode option to configure. -*- Autoconf -*- +# From Jim Meyering + +# Copyright (C) 1996-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MAINTAINER_MODE([DEFAULT-MODE]) +# ---------------------------------- +# Control maintainer-specific portions of Makefiles. +# Default is to disable them, unless 'enable' is passed literally. +# For symmetry, 'disable' may be passed as well. Anyway, the user +# can override the default with the --enable/--disable switch. +AC_DEFUN([AM_MAINTAINER_MODE], +[m4_case(m4_default([$1], [disable]), + [enable], [m4_define([am_maintainer_other], [disable])], + [disable], [m4_define([am_maintainer_other], [enable])], + [m4_define([am_maintainer_other], [enable]) + m4_warn([syntax], [unexpected argument to AM@&t@_MAINTAINER_MODE: $1])]) +AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles]) + dnl maintainer-mode's default is 'disable' unless 'enable' is passed + AC_ARG_ENABLE([maintainer-mode], + [AS_HELP_STRING([--]am_maintainer_other[-maintainer-mode], + am_maintainer_other[ make rules and dependencies not useful + (and sometimes confusing) to the casual installer])], + [USE_MAINTAINER_MODE=$enableval], + [USE_MAINTAINER_MODE=]m4_if(am_maintainer_other, [enable], [no], [yes])) + AC_MSG_RESULT([$USE_MAINTAINER_MODE]) + AM_CONDITIONAL([MAINTAINER_MODE], [test $USE_MAINTAINER_MODE = yes]) + MAINT=$MAINTAINER_MODE_TRUE + AC_SUBST([MAINT])dnl +] +) + +# Check to see how 'make' treats includes. -*- Autoconf -*- + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MAKE_INCLUDE() +# ----------------- +# Check to see how make treats includes. +AC_DEFUN([AM_MAKE_INCLUDE], +[am_make=${MAKE-make} +cat > confinc << 'END' +am__doit: + @echo this is the am__doit target +.PHONY: am__doit +END +# If we don't find an include directive, just comment out the code. +AC_MSG_CHECKING([for style of include used by $am_make]) +am__include="#" +am__quote= +_am_result=none +# First try GNU make style include. +echo "include confinc" > confmf +# Ignore all kinds of additional output from 'make'. +case `$am_make -s -f confmf 2> /dev/null` in #( +*the\ am__doit\ target*) + am__include=include + am__quote= + _am_result=GNU + ;; +esac +# Now try BSD make style include. +if test "$am__include" = "#"; then + echo '.include "confinc"' > confmf + case `$am_make -s -f confmf 2> /dev/null` in #( + *the\ am__doit\ target*) + am__include=.include + am__quote="\"" + _am_result=BSD + ;; + esac +fi +AC_SUBST([am__include]) +AC_SUBST([am__quote]) +AC_MSG_RESULT([$_am_result]) +rm -f confinc confmf +]) + +# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- + +# Copyright (C) 1997-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MISSING_PROG(NAME, PROGRAM) +# ------------------------------ +AC_DEFUN([AM_MISSING_PROG], +[AC_REQUIRE([AM_MISSING_HAS_RUN]) +$1=${$1-"${am_missing_run}$2"} +AC_SUBST($1)]) + +# AM_MISSING_HAS_RUN +# ------------------ +# Define MISSING if not defined so far and test if it is modern enough. +# If it is, set am_missing_run to use it, otherwise, to nothing. +AC_DEFUN([AM_MISSING_HAS_RUN], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([missing])dnl +if test x"${MISSING+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; + *) + MISSING="\${SHELL} $am_aux_dir/missing" ;; + esac +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + AC_MSG_WARN(['missing' script is too old or missing]) +fi +]) + +# -*- Autoconf -*- +# Obsolete and "removed" macros, that must however still report explicit +# error messages when used, to smooth transition. +# +# Copyright (C) 1996-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +AC_DEFUN([AM_CONFIG_HEADER], +[AC_DIAGNOSE([obsolete], +['$0': this macro is obsolete. +You should use the 'AC][_CONFIG_HEADERS' macro instead.])dnl +AC_CONFIG_HEADERS($@)]) + +AC_DEFUN([AM_PROG_CC_STDC], +[AC_PROG_CC +am_cv_prog_cc_stdc=$ac_cv_prog_cc_stdc +AC_DIAGNOSE([obsolete], +['$0': this macro is obsolete. +You should simply use the 'AC][_PROG_CC' macro instead. +Also, your code should no longer depend upon 'am_cv_prog_cc_stdc', +but upon 'ac_cv_prog_cc_stdc'.])]) + +AC_DEFUN([AM_C_PROTOTYPES], + [AC_FATAL([automatic de-ANSI-fication support has been removed])]) +AU_DEFUN([fp_C_PROTOTYPES], [AM_C_PROTOTYPES]) + +# Helper functions for option handling. -*- Autoconf -*- + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_MANGLE_OPTION(NAME) +# ----------------------- +AC_DEFUN([_AM_MANGLE_OPTION], +[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) + +# _AM_SET_OPTION(NAME) +# -------------------- +# Set option NAME. Presently that only means defining a flag for this option. +AC_DEFUN([_AM_SET_OPTION], +[m4_define(_AM_MANGLE_OPTION([$1]), [1])]) + +# _AM_SET_OPTIONS(OPTIONS) +# ------------------------ +# OPTIONS is a space-separated list of Automake options. +AC_DEFUN([_AM_SET_OPTIONS], +[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) + +# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) +# ------------------------------------------- +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +AC_DEFUN([_AM_IF_OPTION], +[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) + +# Copyright (C) 1999-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_CC_C_O +# --------------- +# Like AC_PROG_CC_C_O, but changed for automake. We rewrite AC_PROG_CC +# to automatically call this. +AC_DEFUN([_AM_PROG_CC_C_O], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([compile])dnl +AC_LANG_PUSH([C])dnl +AC_CACHE_CHECK( + [whether $CC understands -c and -o together], + [am_cv_prog_cc_c_o], + [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])]) + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i]) +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +AC_LANG_POP([C])]) + +# For backward compatibility. +AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])]) + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_RUN_LOG(COMMAND) +# ------------------- +# Run COMMAND, save the exit status in ac_status, and log it. +# (This has been adapted from Autoconf's _AC_RUN_LOG macro.) +AC_DEFUN([AM_RUN_LOG], +[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD + ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + (exit $ac_status); }]) + +# Check to make sure that the build environment is sane. -*- Autoconf -*- + +# Copyright (C) 1996-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SANITY_CHECK +# --------------- +AC_DEFUN([AM_SANITY_CHECK], +[AC_MSG_CHECKING([whether build environment is sane]) +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[[\\\"\#\$\&\'\`$am_lf]]*) + AC_MSG_ERROR([unsafe absolute working directory name]);; +esac +case $srcdir in + *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*) + AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$[*]" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$[*]" != "X $srcdir/configure conftest.file" \ + && test "$[*]" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken + alias in your environment]) + fi + if test "$[2]" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$[2]" = conftest.file + ) +then + # Ok. + : +else + AC_MSG_ERROR([newly created file is older than distributed files! +Check your system clock]) +fi +AC_MSG_RESULT([yes]) +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi +AC_CONFIG_COMMANDS_PRE( + [AC_MSG_CHECKING([that generated files are newer than configure]) + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + AC_MSG_RESULT([done])]) +rm -f conftest.file +]) + +# Copyright (C) 2009-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SILENT_RULES([DEFAULT]) +# -------------------------- +# Enable less verbose build rules; with the default set to DEFAULT +# ("yes" being less verbose, "no" or empty being verbose). +AC_DEFUN([AM_SILENT_RULES], +[AC_ARG_ENABLE([silent-rules], [dnl +AS_HELP_STRING( + [--enable-silent-rules], + [less verbose build output (undo: "make V=1")]) +AS_HELP_STRING( + [--disable-silent-rules], + [verbose build output (undo: "make V=0")])dnl +]) +case $enable_silent_rules in @%:@ ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);; +esac +dnl +dnl A few 'make' implementations (e.g., NonStop OS and NextStep) +dnl do not support nested variable expansions. +dnl See automake bug#9928 and bug#10237. +am_make=${MAKE-make} +AC_CACHE_CHECK([whether $am_make supports nested variables], + [am_cv_make_support_nested_variables], + [if AS_ECHO([['TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi]) +if test $am_cv_make_support_nested_variables = yes; then + dnl Using '$V' instead of '$(V)' breaks IRIX make. + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AC_SUBST([AM_V])dnl +AM_SUBST_NOTMAKE([AM_V])dnl +AC_SUBST([AM_DEFAULT_V])dnl +AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl +AC_SUBST([AM_DEFAULT_VERBOSITY])dnl +AM_BACKSLASH='\' +AC_SUBST([AM_BACKSLASH])dnl +_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl +]) + +# Copyright (C) 2001-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_STRIP +# --------------------- +# One issue with vendor 'install' (even GNU) is that you can't +# specify the program used to strip binaries. This is especially +# annoying in cross-compiling environments, where the build's strip +# is unlikely to handle the host's binaries. +# Fortunately install-sh will honor a STRIPPROG variable, so we +# always use install-sh in "make install-strip", and initialize +# STRIPPROG with the value of the STRIP variable (set by the user). +AC_DEFUN([AM_PROG_INSTALL_STRIP], +[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +dnl Don't test for $cross_compiling = yes, because it might be 'maybe'. +if test "$cross_compiling" != no; then + AC_CHECK_TOOL([STRIP], [strip], :) +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" +AC_SUBST([INSTALL_STRIP_PROGRAM])]) + +# Copyright (C) 2006-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_SUBST_NOTMAKE(VARIABLE) +# --------------------------- +# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in. +# This macro is traced by Automake. +AC_DEFUN([_AM_SUBST_NOTMAKE]) + +# AM_SUBST_NOTMAKE(VARIABLE) +# -------------------------- +# Public sister of _AM_SUBST_NOTMAKE. +AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)]) + +# Check how to create a tarball. -*- Autoconf -*- + +# Copyright (C) 2004-2013 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_TAR(FORMAT) +# -------------------- +# Check how to create a tarball in format FORMAT. +# FORMAT should be one of 'v7', 'ustar', or 'pax'. +# +# Substitute a variable $(am__tar) that is a command +# writing to stdout a FORMAT-tarball containing the directory +# $tardir. +# tardir=directory && $(am__tar) > result.tar +# +# Substitute a variable $(am__untar) that extract such +# a tarball read from stdin. +# $(am__untar) < result.tar +# +AC_DEFUN([_AM_PROG_TAR], +[# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AC_SUBST([AMTAR], ['$${TAR-tar}']) + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' + +m4_if([$1], [v7], + [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'], + + [m4_case([$1], + [ustar], + [# The POSIX 1988 'ustar' format is defined with fixed-size fields. + # There is notably a 21 bits limit for the UID and the GID. In fact, + # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343 + # and bug#13588). + am_max_uid=2097151 # 2^21 - 1 + am_max_gid=$am_max_uid + # The $UID and $GID variables are not portable, so we need to resort + # to the POSIX-mandated id(1) utility. Errors in the 'id' calls + # below are definitely unexpected, so allow the users to see them + # (that is, avoid stderr redirection). + am_uid=`id -u || echo unknown` + am_gid=`id -g || echo unknown` + AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format]) + if test $am_uid -le $am_max_uid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi + AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format]) + if test $am_gid -le $am_max_gid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi], + + [pax], + [], + + [m4_fatal([Unknown tar format])]) + + AC_MSG_CHECKING([how to create a $1 tar archive]) + + # Go ahead even if we have the value already cached. We do so because we + # need to set the values for the 'am__tar' and 'am__untar' variables. + _am_tools=${am_cv_prog_tar_$1-$_am_tools} + + for _am_tool in $_am_tools; do + case $_am_tool in + gnutar) + for _am_tar in tar gnutar gtar; do + AM_RUN_LOG([$_am_tar --version]) && break + done + am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' + am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' + am__untar="$_am_tar -xf -" + ;; + plaintar) + # Must skip GNU tar: if it does not support --format= it doesn't create + # ustar tarball either. + (tar --version) >/dev/null 2>&1 && continue + am__tar='tar chf - "$$tardir"' + am__tar_='tar chf - "$tardir"' + am__untar='tar xf -' + ;; + pax) + am__tar='pax -L -x $1 -w "$$tardir"' + am__tar_='pax -L -x $1 -w "$tardir"' + am__untar='pax -r' + ;; + cpio) + am__tar='find "$$tardir" -print | cpio -o -H $1 -L' + am__tar_='find "$tardir" -print | cpio -o -H $1 -L' + am__untar='cpio -i -H $1 -d' + ;; + none) + am__tar=false + am__tar_=false + am__untar=false + ;; + esac + + # If the value was cached, stop now. We just wanted to have am__tar + # and am__untar set. + test -n "${am_cv_prog_tar_$1}" && break + + # tar/untar a dummy directory, and stop if the command works. + rm -rf conftest.dir + mkdir conftest.dir + echo GrepMe > conftest.dir/file + AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) + rm -rf conftest.dir + if test -s conftest.tar; then + AM_RUN_LOG([$am__untar /dev/null 2>&1 && break + fi + done + rm -rf conftest.dir + + AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) + AC_MSG_RESULT([$am_cv_prog_tar_$1])]) + +AC_SUBST([am__tar]) +AC_SUBST([am__untar]) +]) # _AM_PROG_TAR + +m4_include([m4/acx_mpi.m4]) +m4_include([m4/acx_pthread.m4]) +m4_include([m4/ax_cc_maxopt.m4]) +m4_include([m4/ax_check_compiler_flags.m4]) +m4_include([m4/ax_compiler_vendor.m4]) +m4_include([m4/ax_gcc_aligns_stack.m4]) +m4_include([m4/ax_gcc_version.m4]) +m4_include([m4/ax_openmp.m4]) +m4_include([m4/libtool.m4]) +m4_include([m4/ltoptions.m4]) +m4_include([m4/ltsugar.m4]) +m4_include([m4/ltversion.m4]) +m4_include([m4/lt~obsolete.m4]) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,63 @@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft \ +-I$(top_srcdir)/rdft -I$(top_srcdir)/reodft +AM_CFLAGS = $(STACK_ALIGN_CFLAGS) + +EXTRA_DIST = f03api.sh genf03.pl fftw3.f03.in + +include_HEADERS = fftw3.h fftw3.f fftw3l.f03 fftw3q.f03 +nodist_include_HEADERS = fftw3.f03 +noinst_LTLIBRARIES = libapi.la + +# pkgincludedir = $(includedir)/fftw3@PREC_SUFFIX@ +# pkginclude_HEADERS = api.h x77.h guru.h guru64.h + +libapi_la_SOURCES = apiplan.c configure.c execute-dft-c2r.c \ +execute-dft-r2c.c execute-dft.c execute-r2r.c execute-split-dft-c2r.c \ +execute-split-dft-r2c.c execute-split-dft.c execute.c \ +export-wisdom-to-file.c export-wisdom-to-string.c export-wisdom.c \ +f77api.c flops.c forget-wisdom.c import-system-wisdom.c \ +import-wisdom-from-file.c import-wisdom-from-string.c import-wisdom.c \ +malloc.c map-r2r-kind.c mapflags.c mkprinter-file.c mkprinter-str.c \ +mktensor-iodims.c mktensor-rowmajor.c plan-dft-1d.c plan-dft-2d.c \ +plan-dft-3d.c plan-dft-c2r-1d.c plan-dft-c2r-2d.c plan-dft-c2r-3d.c \ +plan-dft-c2r.c plan-dft-r2c-1d.c plan-dft-r2c-2d.c plan-dft-r2c-3d.c \ +plan-dft-r2c.c plan-dft.c plan-guru-dft-c2r.c plan-guru-dft-r2c.c \ +plan-guru-dft.c plan-guru-r2r.c plan-guru-split-dft-c2r.c \ +plan-guru-split-dft-r2c.c plan-guru-split-dft.c plan-many-dft-c2r.c \ +plan-many-dft-r2c.c plan-many-dft.c plan-many-r2r.c plan-r2r-1d.c \ +plan-r2r-2d.c plan-r2r-3d.c plan-r2r.c print-plan.c rdft2-pad.c \ +the-planner.c version.c api.h f77funcs.h fftw3.h x77.h guru.h \ +guru64.h mktensor-iodims.h plan-guru-dft-c2r.h plan-guru-dft-r2c.h \ +plan-guru-dft.h plan-guru-r2r.h plan-guru-split-dft-c2r.h \ +plan-guru-split-dft-r2c.h plan-guru-split-dft.h plan-guru64-dft-c2r.c \ +plan-guru64-dft-r2c.c plan-guru64-dft.c plan-guru64-r2r.c \ +plan-guru64-split-dft-c2r.c plan-guru64-split-dft-r2c.c \ +plan-guru64-split-dft.c mktensor-iodims64.c + +BUILT_SOURCES = fftw3.f fftw3.f03.in fftw3.f03 fftw3l.f03 fftw3q.f03 +CLEANFILES = fftw3.f03 + +fftw3.f03: fftw3.f03.in + (echo "! Generated automatically. DO NOT EDIT!"; echo; \ + echo " integer, parameter :: C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@"; \ + grep -v "Generated automatically" $(srcdir)/fftw3.f03.in) > $@ + +if MAINTAINER_MODE + +# convert constants to F77 PARAMETER statements +fftw3.f: fftw3.h + rm -f $@ + perl -pe 's/([A-Z0-9_]+)=([+-]?[0-9]+)/\n INTEGER \1\n PARAMETER (\1=\2)\n/g' $< |egrep 'PARAMETER|INTEGER' > $@ + perl -pe 's/#define +([A-Z0-9_]+) +\(([+-]?[0-9]+)U?\)/\n INTEGER \1\n PARAMETER (\1=\2)\n/g' $< |egrep 'PARAMETER|INTEGER' >> $@ + perl -pe 'if (/#define +([A-Z0-9_]+) +\(([0-9]+)U? *<< *([0-9]+)\)/) { print "\n INTEGER $$1\n PARAMETER ($$1=",$$2 << $$3,")\n"; }' $< |egrep 'PARAMETER|INTEGER' >> $@ + +fftw3.f03.in: fftw3.h f03api.sh genf03.pl + sh $(srcdir)/f03api.sh d f > $@ + +fftw3l.f03: fftw3.h f03api.sh genf03.pl + sh $(srcdir)/f03api.sh l | grep -v parameter > $@ + +fftw3q.f03: fftw3.h f03api.sh genf03.pl + sh $(srcdir)/f03api.sh q | grep -v parameter > $@ + +endif # MAINTAINER_MODE diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,831 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = api +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp $(include_HEADERS) +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libapi_la_LIBADD = +am_libapi_la_OBJECTS = apiplan.lo configure.lo execute-dft-c2r.lo \ + execute-dft-r2c.lo execute-dft.lo execute-r2r.lo \ + execute-split-dft-c2r.lo execute-split-dft-r2c.lo \ + execute-split-dft.lo execute.lo export-wisdom-to-file.lo \ + export-wisdom-to-string.lo export-wisdom.lo f77api.lo flops.lo \ + forget-wisdom.lo import-system-wisdom.lo \ + import-wisdom-from-file.lo import-wisdom-from-string.lo \ + import-wisdom.lo malloc.lo map-r2r-kind.lo mapflags.lo \ + mkprinter-file.lo mkprinter-str.lo mktensor-iodims.lo \ + mktensor-rowmajor.lo plan-dft-1d.lo plan-dft-2d.lo \ + plan-dft-3d.lo plan-dft-c2r-1d.lo plan-dft-c2r-2d.lo \ + plan-dft-c2r-3d.lo plan-dft-c2r.lo plan-dft-r2c-1d.lo \ + plan-dft-r2c-2d.lo plan-dft-r2c-3d.lo plan-dft-r2c.lo \ + plan-dft.lo plan-guru-dft-c2r.lo plan-guru-dft-r2c.lo \ + plan-guru-dft.lo plan-guru-r2r.lo plan-guru-split-dft-c2r.lo \ + plan-guru-split-dft-r2c.lo plan-guru-split-dft.lo \ + plan-many-dft-c2r.lo plan-many-dft-r2c.lo plan-many-dft.lo \ + plan-many-r2r.lo plan-r2r-1d.lo plan-r2r-2d.lo plan-r2r-3d.lo \ + plan-r2r.lo print-plan.lo rdft2-pad.lo the-planner.lo \ + version.lo plan-guru64-dft-c2r.lo plan-guru64-dft-r2c.lo \ + plan-guru64-dft.lo plan-guru64-r2r.lo \ + plan-guru64-split-dft-c2r.lo plan-guru64-split-dft-r2c.lo \ + plan-guru64-split-dft.lo mktensor-iodims64.lo +libapi_la_OBJECTS = $(am_libapi_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libapi_la_SOURCES) +DIST_SOURCES = $(libapi_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)" +HEADERS = $(include_HEADERS) $(nodist_include_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft \ +-I$(top_srcdir)/rdft -I$(top_srcdir)/reodft + +AM_CFLAGS = $(STACK_ALIGN_CFLAGS) +EXTRA_DIST = f03api.sh genf03.pl fftw3.f03.in +include_HEADERS = fftw3.h fftw3.f fftw3l.f03 fftw3q.f03 +nodist_include_HEADERS = fftw3.f03 +noinst_LTLIBRARIES = libapi.la + +# pkgincludedir = $(includedir)/fftw3@PREC_SUFFIX@ +# pkginclude_HEADERS = api.h x77.h guru.h guru64.h +libapi_la_SOURCES = apiplan.c configure.c execute-dft-c2r.c \ +execute-dft-r2c.c execute-dft.c execute-r2r.c execute-split-dft-c2r.c \ +execute-split-dft-r2c.c execute-split-dft.c execute.c \ +export-wisdom-to-file.c export-wisdom-to-string.c export-wisdom.c \ +f77api.c flops.c forget-wisdom.c import-system-wisdom.c \ +import-wisdom-from-file.c import-wisdom-from-string.c import-wisdom.c \ +malloc.c map-r2r-kind.c mapflags.c mkprinter-file.c mkprinter-str.c \ +mktensor-iodims.c mktensor-rowmajor.c plan-dft-1d.c plan-dft-2d.c \ +plan-dft-3d.c plan-dft-c2r-1d.c plan-dft-c2r-2d.c plan-dft-c2r-3d.c \ +plan-dft-c2r.c plan-dft-r2c-1d.c plan-dft-r2c-2d.c plan-dft-r2c-3d.c \ +plan-dft-r2c.c plan-dft.c plan-guru-dft-c2r.c plan-guru-dft-r2c.c \ +plan-guru-dft.c plan-guru-r2r.c plan-guru-split-dft-c2r.c \ +plan-guru-split-dft-r2c.c plan-guru-split-dft.c plan-many-dft-c2r.c \ +plan-many-dft-r2c.c plan-many-dft.c plan-many-r2r.c plan-r2r-1d.c \ +plan-r2r-2d.c plan-r2r-3d.c plan-r2r.c print-plan.c rdft2-pad.c \ +the-planner.c version.c api.h f77funcs.h fftw3.h x77.h guru.h \ +guru64.h mktensor-iodims.h plan-guru-dft-c2r.h plan-guru-dft-r2c.h \ +plan-guru-dft.h plan-guru-r2r.h plan-guru-split-dft-c2r.h \ +plan-guru-split-dft-r2c.h plan-guru-split-dft.h plan-guru64-dft-c2r.c \ +plan-guru64-dft-r2c.c plan-guru64-dft.c plan-guru64-r2r.c \ +plan-guru64-split-dft-c2r.c plan-guru64-split-dft-r2c.c \ +plan-guru64-split-dft.c mktensor-iodims64.c + +BUILT_SOURCES = fftw3.f fftw3.f03.in fftw3.f03 fftw3l.f03 fftw3q.f03 +CLEANFILES = fftw3.f03 +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu api/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu api/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libapi.la: $(libapi_la_OBJECTS) $(libapi_la_DEPENDENCIES) $(EXTRA_libapi_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(libapi_la_OBJECTS) $(libapi_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/apiplan.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/configure.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/execute-dft-c2r.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/execute-dft-r2c.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/execute-dft.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/execute-r2r.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/execute-split-dft-c2r.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/execute-split-dft-r2c.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/execute-split-dft.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/execute.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/export-wisdom-to-file.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/export-wisdom-to-string.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/export-wisdom.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/f77api.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/flops.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/forget-wisdom.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/import-system-wisdom.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/import-wisdom-from-file.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/import-wisdom-from-string.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/import-wisdom.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/malloc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/map-r2r-kind.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mapflags.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkprinter-file.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkprinter-str.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mktensor-iodims.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mktensor-iodims64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mktensor-rowmajor.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-dft-1d.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-dft-2d.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-dft-3d.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-dft-c2r-1d.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-dft-c2r-2d.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-dft-c2r-3d.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-dft-c2r.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-dft-r2c-1d.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-dft-r2c-2d.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-dft-r2c-3d.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-dft-r2c.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-dft.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-guru-dft-c2r.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-guru-dft-r2c.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-guru-dft.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-guru-r2r.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-guru-split-dft-c2r.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-guru-split-dft-r2c.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-guru-split-dft.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-guru64-dft-c2r.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-guru64-dft-r2c.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-guru64-dft.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-guru64-r2r.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-guru64-split-dft-c2r.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-guru64-split-dft-r2c.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-guru64-split-dft.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-many-dft-c2r.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-many-dft-r2c.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-many-dft.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-many-r2r.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-r2r-1d.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-r2r-2d.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-r2r-3d.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan-r2r.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/print-plan.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdft2-pad.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/the-planner.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/version.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-includeHEADERS: $(include_HEADERS) + @$(NORMAL_INSTALL) + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \ + done + +uninstall-includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) +install-nodist_includeHEADERS: $(nodist_include_HEADERS) + @$(NORMAL_INSTALL) + @list='$(nodist_include_HEADERS)'; test -n "$(includedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \ + done + +uninstall-nodist_includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(nodist_include_HEADERS)'; test -n "$(includedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-includeHEADERS install-nodist_includeHEADERS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-includeHEADERS uninstall-nodist_includeHEADERS + +.MAKE: all check install install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am \ + install-includeHEADERS install-info install-info-am \ + install-man install-nodist_includeHEADERS install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-includeHEADERS \ + uninstall-nodist_includeHEADERS + + +fftw3.f03: fftw3.f03.in + (echo "! Generated automatically. DO NOT EDIT!"; echo; \ + echo " integer, parameter :: C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@"; \ + grep -v "Generated automatically" $(srcdir)/fftw3.f03.in) > $@ + +# convert constants to F77 PARAMETER statements +@MAINTAINER_MODE_TRUE@fftw3.f: fftw3.h +@MAINTAINER_MODE_TRUE@ rm -f $@ +@MAINTAINER_MODE_TRUE@ perl -pe 's/([A-Z0-9_]+)=([+-]?[0-9]+)/\n INTEGER \1\n PARAMETER (\1=\2)\n/g' $< |egrep 'PARAMETER|INTEGER' > $@ +@MAINTAINER_MODE_TRUE@ perl -pe 's/#define +([A-Z0-9_]+) +\(([+-]?[0-9]+)U?\)/\n INTEGER \1\n PARAMETER (\1=\2)\n/g' $< |egrep 'PARAMETER|INTEGER' >> $@ +@MAINTAINER_MODE_TRUE@ perl -pe 'if (/#define +([A-Z0-9_]+) +\(([0-9]+)U? *<< *([0-9]+)\)/) { print "\n INTEGER $$1\n PARAMETER ($$1=",$$2 << $$3,")\n"; }' $< |egrep 'PARAMETER|INTEGER' >> $@ + +@MAINTAINER_MODE_TRUE@fftw3.f03.in: fftw3.h f03api.sh genf03.pl +@MAINTAINER_MODE_TRUE@ sh $(srcdir)/f03api.sh d f > $@ + +@MAINTAINER_MODE_TRUE@fftw3l.f03: fftw3.h f03api.sh genf03.pl +@MAINTAINER_MODE_TRUE@ sh $(srcdir)/f03api.sh l | grep -v parameter > $@ + +@MAINTAINER_MODE_TRUE@fftw3q.f03: fftw3.h f03api.sh genf03.pl +@MAINTAINER_MODE_TRUE@ sh $(srcdir)/f03api.sh q | grep -v parameter > $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/api.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/api.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* internal API definitions */ +#ifndef __API_H__ +#define __API_H__ + +#ifndef CALLING_FFTW /* defined in hook.c, when calling internal functions */ +# define COMPILING_FFTW /* used for DLL symbol exporting in fftw3.h */ +#endif + +/* When compiling with GNU libtool on Windows, DLL_EXPORT is #defined + for compiling the shared-library code. In this case, we'll #define + FFTW_DLL to add dllexport attributes to the specified functions in + fftw3.h. + + If we don't specify dllexport explicitly, then libtool + automatically exports all symbols. However, if we specify + dllexport explicitly for any functions, then libtool apparently + doesn't do any automatic exporting. (Not documented, grrr, but + this is the observed behavior with libtool 1.5.8.) Thus, using + this forces us to correctly dllexport every exported symbol, or + linking bench.exe will fail. This has the advantage of forcing + us to mark things correctly, which is necessary for other compilers + (such as MS VC++). */ +#ifdef DLL_EXPORT +# define FFTW_DLL +#endif + +/* just in case: force not to use C99 complex numbers + (we need this for IBM xlc because _Complex_I is treated specially + and is defined even if is not included) */ +#define FFTW_NO_Complex + +#include "fftw3.h" +#include "ifftw.h" +#include "rdft.h" + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +/* the API ``plan'' contains both the kernel plan and problem */ +struct X(plan_s) { + plan *pln; + problem *prb; + int sign; +}; + +/* shorthand */ +typedef struct X(plan_s) apiplan; + +/* complex type for internal use */ +typedef R C[2]; + +#define EXTRACT_REIM(sign, c, r, i) X(extract_reim)(sign, (c)[0], r, i) + +#define TAINT_UNALIGNED(p, flg) TAINT(p, ((flg) & FFTW_UNALIGNED) != 0) + +tensor *X(mktensor_rowmajor)(int rnk, const int *n, + const int *niphys, const int *nophys, + int is, int os); + +tensor *X(mktensor_iodims)(int rank, const X(iodim) *dims, int is, int os); +tensor *X(mktensor_iodims64)(int rank, const X(iodim64) *dims, int is, int os); +const int *X(rdft2_pad)(int rnk, const int *n, const int *nembed, + int inplace, int cmplx, int **nfree); + +int X(many_kosherp)(int rnk, const int *n, int howmany); +int X(guru_kosherp)(int rank, const X(iodim) *dims, + int howmany_rank, const X(iodim) *howmany_dims); +int X(guru64_kosherp)(int rank, const X(iodim64) *dims, + int howmany_rank, const X(iodim64) *howmany_dims); + +/* Note: FFTW_EXTERN is used for "internal" functions used in tests/hook.c */ + +FFTW_EXTERN printer *X(mkprinter_file)(FILE *f); + +printer *X(mkprinter_cnt)(int *cnt); +printer *X(mkprinter_str)(char *s); + +FFTW_EXTERN planner *X(the_planner)(void); +void X(configure_planner)(planner *plnr); + +void X(mapflags)(planner *, unsigned); + +apiplan *X(mkapiplan)(int sign, unsigned flags, problem *prb); + +rdft_kind *X(map_r2r_kind)(int rank, const X(r2r_kind) * kind); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* __API_H__ */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/apiplan.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/apiplan.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +static plan *mkplan0(planner *plnr, unsigned flags, + const problem *prb, int hash_info, + wisdom_state_t wisdom_state) +{ + /* map API flags into FFTW flags */ + X(mapflags)(plnr, flags); + + plnr->flags.hash_info = hash_info; + plnr->wisdom_state = wisdom_state; + + /* create plan */ + return plnr->adt->mkplan(plnr, prb); +} + +static unsigned force_estimator(unsigned flags) +{ + flags &= ~(FFTW_MEASURE | FFTW_PATIENT | FFTW_EXHAUSTIVE); + return (flags | FFTW_ESTIMATE); +} + +static plan *mkplan(planner *plnr, unsigned flags, + const problem *prb, int hash_info) +{ + plan *pln; + + pln = mkplan0(plnr, flags, prb, hash_info, WISDOM_NORMAL); + + if (plnr->wisdom_state == WISDOM_NORMAL && !pln) { + /* maybe the planner failed because of inconsistent wisdom; + plan again ignoring infeasible wisdom */ + pln = mkplan0(plnr, force_estimator(flags), prb, + hash_info, WISDOM_IGNORE_INFEASIBLE); + } + + if (plnr->wisdom_state == WISDOM_IS_BOGUS) { + /* if the planner detected a wisdom inconsistency, + forget all wisdom and plan again */ + plnr->adt->forget(plnr, FORGET_EVERYTHING); + + A(!pln); + pln = mkplan0(plnr, flags, prb, hash_info, WISDOM_NORMAL); + + if (plnr->wisdom_state == WISDOM_IS_BOGUS) { + /* if it still fails, plan without wisdom */ + plnr->adt->forget(plnr, FORGET_EVERYTHING); + + A(!pln); + pln = mkplan0(plnr, force_estimator(flags), + prb, hash_info, WISDOM_IGNORE_ALL); + } + } + + return pln; +} + +apiplan *X(mkapiplan)(int sign, unsigned flags, problem *prb) +{ + apiplan *p = 0; + plan *pln; + unsigned flags_used_for_planning; + planner *plnr = X(the_planner)(); + unsigned int pats[] = {FFTW_ESTIMATE, FFTW_MEASURE, + FFTW_PATIENT, FFTW_EXHAUSTIVE}; + int pat, pat_max; + double pcost = 0; + + if (flags & FFTW_WISDOM_ONLY) { + /* Special mode that returns a plan only if wisdom is present, + and returns 0 otherwise. This is now documented in the manual, + as a way to detect whether wisdom is available for a problem. */ + flags_used_for_planning = flags; + pln = mkplan0(plnr, flags, prb, 0, WISDOM_ONLY); + } else { + pat_max = flags & FFTW_ESTIMATE ? 0 : + (flags & FFTW_EXHAUSTIVE ? 3 : + (flags & FFTW_PATIENT ? 2 : 1)); + pat = plnr->timelimit >= 0 ? 0 : pat_max; + + flags &= ~(FFTW_ESTIMATE | FFTW_MEASURE | + FFTW_PATIENT | FFTW_EXHAUSTIVE); + + plnr->start_time = X(get_crude_time)(); + + /* plan at incrementally increasing patience until we run + out of time */ + for (pln = 0, flags_used_for_planning = 0; pat <= pat_max; ++pat) { + plan *pln1; + unsigned tmpflags = flags | pats[pat]; + pln1 = mkplan(plnr, tmpflags, prb, 0); + + if (!pln1) { + /* don't bother continuing if planner failed or timed out */ + A(!pln || plnr->timed_out); + break; + } + + X(plan_destroy_internal)(pln); + pln = pln1; + flags_used_for_planning = tmpflags; + pcost = pln->pcost; + } + } + + if (pln) { + /* build apiplan */ + p = (apiplan *) MALLOC(sizeof(apiplan), PLANS); + p->prb = prb; + p->sign = sign; /* cache for execute_dft */ + + /* re-create plan from wisdom, adding blessing */ + p->pln = mkplan(plnr, flags_used_for_planning, prb, BLESSING); + + /* record pcost from most recent measurement for use in X(cost) */ + p->pln->pcost = pcost; + + if (sizeof(trigreal) > sizeof(R)) { + /* this is probably faster, and we have enough trigreal + bits to maintain accuracy */ + X(plan_awake)(p->pln, AWAKE_SQRTN_TABLE); + } else { + /* more accurate */ + X(plan_awake)(p->pln, AWAKE_SINCOS); + } + + /* we don't use pln for p->pln, above, since by re-creating the + plan we might use more patient wisdom from a timed-out mkplan */ + X(plan_destroy_internal)(pln); + } else + X(problem_destroy)(prb); + + /* discard all information not necessary to reconstruct the plan */ + plnr->adt->forget(plnr, FORGET_ACCURSED); + +#ifdef FFTW_RANDOM_ESTIMATOR + X(random_estimate_seed)++; /* subsequent "random" plans are distinct */ +#endif + + return p; +} + +void X(destroy_plan)(X(plan) p) +{ + if (p) { + X(plan_awake)(p->pln, SLEEPY); + X(plan_destroy_internal)(p->pln); + X(problem_destroy)(p->prb); + X(ifree)(p); + } +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/configure.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/configure.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "dft.h" +#include "rdft.h" +#include "reodft.h" + +void X(configure_planner)(planner *plnr) +{ + X(dft_conf_standard)(plnr); + X(rdft_conf_standard)(plnr); + X(reodft_conf_standard)(plnr); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/execute-dft-c2r.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/execute-dft-c2r.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "rdft.h" + +/* guru interface: requires care in alignment, r - i, etcetera. */ +void X(execute_dft_c2r)(const X(plan) p, C *in, R *out) +{ + plan_rdft2 *pln = (plan_rdft2 *) p->pln; + problem_rdft2 *prb = (problem_rdft2 *) p->prb; + pln->apply((plan *) pln, out, out + (prb->r1 - prb->r0), in[0], in[0]+1); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/execute-dft-r2c.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/execute-dft-r2c.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "rdft.h" + +/* guru interface: requires care in alignment, r - i, etcetera. */ +void X(execute_dft_r2c)(const X(plan) p, R *in, C *out) +{ + plan_rdft2 *pln = (plan_rdft2 *) p->pln; + problem_rdft2 *prb = (problem_rdft2 *) p->prb; + pln->apply((plan *) pln, in, in + (prb->r1 - prb->r0), out[0], out[0]+1); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/execute-dft.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/execute-dft.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "dft.h" + +/* guru interface: requires care in alignment etcetera. */ +void X(execute_dft)(const X(plan) p, C *in, C *out) +{ + plan_dft *pln = (plan_dft *) p->pln; + if (p->sign == FFT_SIGN) + pln->apply((plan *) pln, in[0], in[0]+1, out[0], out[0]+1); + else + pln->apply((plan *) pln, in[0]+1, in[0], out[0]+1, out[0]); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/execute-r2r.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/execute-r2r.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "rdft.h" + +/* guru interface: requires care in alignment, etcetera. */ +void X(execute_r2r)(const X(plan) p, R *in, R *out) +{ + plan_rdft *pln = (plan_rdft *) p->pln; + pln->apply((plan *) pln, in, out); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/execute-split-dft-c2r.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/execute-split-dft-c2r.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "rdft.h" + +/* guru interface: requires care in alignment, r - i, etcetera. */ +void X(execute_split_dft_c2r)(const X(plan) p, R *ri, R *ii, R *out) +{ + plan_rdft2 *pln = (plan_rdft2 *) p->pln; + problem_rdft2 *prb = (problem_rdft2 *) p->prb; + pln->apply((plan *) pln, out, out + (prb->r1 - prb->r0), ri, ii); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/execute-split-dft-r2c.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/execute-split-dft-r2c.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "rdft.h" + +/* guru interface: requires care in alignment, r - i, etcetera. */ +void X(execute_split_dft_r2c)(const X(plan) p, R *in, R *ro, R *io) +{ + plan_rdft2 *pln = (plan_rdft2 *) p->pln; + problem_rdft2 *prb = (problem_rdft2 *) p->prb; + pln->apply((plan *) pln, in, in + (prb->r1 - prb->r0), ro, io); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/execute-split-dft.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/execute-split-dft.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "dft.h" + +/* guru interface: requires care in alignment, r - i, etcetera. */ +void X(execute_split_dft)(const X(plan) p, R *ri, R *ii, R *ro, R *io) +{ + plan_dft *pln = (plan_dft *) p->pln; + pln->apply((plan *) pln, ri, ii, ro, io); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/execute.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/execute.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +void X(execute)(const X(plan) p) +{ + plan *pln = p->pln; + pln->adt->solve(pln, p->prb); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/export-wisdom-to-file.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/export-wisdom-to-file.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +void X(export_wisdom_to_file)(FILE *output_file) +{ + printer *p = X(mkprinter_file)(output_file); + planner *plnr = X(the_planner)(); + plnr->adt->exprt(plnr, p); + X(printer_destroy)(p); +} + +int X(export_wisdom_to_filename)(const char *filename) +{ + FILE *f = fopen(filename, "w"); + int ret; + if (!f) return 0; /* error opening file */ + X(export_wisdom_to_file)(f); + ret = !ferror(f); + if (fclose(f)) ret = 0; /* error closing file */ + return ret; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/export-wisdom-to-string.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/export-wisdom-to-string.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +char *X(export_wisdom_to_string)(void) +{ + printer *p; + planner *plnr = X(the_planner)(); + int cnt; + char *s; + + p = X(mkprinter_cnt)(&cnt); + plnr->adt->exprt(plnr, p); + X(printer_destroy)(p); + + s = (char *) malloc(sizeof(char) * (cnt + 1)); + if (s) { + p = X(mkprinter_str)(s); + plnr->adt->exprt(plnr, p); + X(printer_destroy)(p); + } + + return s; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/export-wisdom.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/export-wisdom.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +typedef struct { + printer super; + void (*write_char)(char c, void *); + void *data; +} P; + +static void putchr_generic(printer * p_, char c) +{ + P *p = (P *) p_; + (p->write_char)(c, p->data); +} + +void X(export_wisdom)(void (*write_char)(char c, void *), void *data) +{ + P *p = (P *) X(mkprinter)(sizeof(P), putchr_generic, 0); + planner *plnr = X(the_planner)(); + + p->write_char = write_char; + p->data = data; + plnr->adt->exprt(plnr, (printer *) p); + X(printer_destroy)((printer *) p); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/f03api.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/f03api.sh Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,42 @@ +#! /bin/sh + +# Script to generate Fortran 2003 interface declarations for FFTW from +# the fftw3.h header file. + +# This is designed so that the Fortran caller can do: +# use, intrinsic :: iso_c_binding +# implicit none +# include 'fftw3.f03' +# and then call the C FFTW functions directly, with type checking. + +echo "! Generated automatically. DO NOT EDIT!" +echo + +# C_FFTW_R2R_KIND is determined by configure and inserted by the Makefile +# echo " integer, parameter :: C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@" + +# Extract constants +perl -pe 's/([A-Z0-9_]+)=([+-]?[0-9]+)/\n integer\(C_INT\), parameter :: \1 = \2\n/g' < fftw3.h | grep 'integer(C_INT)' +perl -pe 's/#define +([A-Z0-9_]+) +\(([+-]?[0-9]+)U?\)/\n integer\(C_INT\), parameter :: \1 = \2\n/g' < fftw3.h | grep 'integer(C_INT)' +perl -pe 'if (/#define +([A-Z0-9_]+) +\(([0-9]+)U? *<< *([0-9]+)\)/) { print "\n integer\(C_INT\), parameter :: $1 = ",$2 << $3,"\n"; }' < fftw3.h | grep 'integer(C_INT)' + +# Extract function declarations +for p in $*; do + if test "$p" = "d"; then p=""; fi + + echo + cat <f77_write_char(&c, ad->data); +} + +typedef struct { + void (*f77_read_char)(int *, void *); + void *data; +} read_char_data; + +static int read_char(void *d) +{ + read_char_data *ed = (read_char_data *) d; + int c; + ed->f77_read_char(&c, ed->data); + return (c < 0 ? EOF : c); +} + +static X(r2r_kind) *ints2kinds(int rnk, const int *ik) +{ + if (!FINITE_RNK(rnk) || rnk == 0) + return 0; + else { + int i; + X(r2r_kind) *k; + + k = (X(r2r_kind) *) MALLOC(sizeof(X(r2r_kind)) * rnk, PROBLEMS); + /* reverse order for Fortran -> C */ + for (i = 0; i < rnk; ++i) + k[i] = (X(r2r_kind)) ik[rnk - 1 - i]; + return k; + } +} + +/*-----------------------------------------------------------------------*/ + +#define F77(a, A) F77x(x77(a), X77(A)) + +#ifndef WINDOWS_F77_MANGLING + +#if defined(F77_FUNC) +# define F77x(a, A) F77_FUNC(a, A) +# include "f77funcs.h" +#endif + +/* If identifiers with underscores are mangled differently than those + without underscores, then we include *both* mangling versions. The + reason is that the only Fortran compiler that does such differing + mangling is currently g77 (which adds an extra underscore to names + with underscores), whereas other compilers running on the same + machine are likely to use non-underscored mangling. (I'm sick + of users complaining that FFTW works with g77 but not with e.g. + pgf77 or ifc on the same machine.) Note that all FFTW identifiers + contain underscores, and configure picks g77 by default. */ +#if defined(F77_FUNC_) && !defined(F77_FUNC_EQUIV) +# undef F77x +# define F77x(a, A) F77_FUNC_(a, A) +# include "f77funcs.h" +#endif + +#else /* WINDOWS_F77_MANGLING */ + +/* Various mangling conventions common (?) under Windows. */ + +/* g77 */ +# define WINDOWS_F77_FUNC(a, A) a ## __ +# define F77x(a, A) WINDOWS_F77_FUNC(a, A) +# include "f77funcs.h" + +/* Intel, etc. */ +# undef WINDOWS_F77_FUNC +# define WINDOWS_F77_FUNC(a, A) a ## _ +# include "f77funcs.h" + +/* Digital/Compaq/HP Visual Fortran, Intel Fortran. stdcall attribute + is apparently required to adjust for calling conventions (callee + pops stack in stdcall). See also: + http://msdn.microsoft.com/library/en-us/vccore98/html/_core_mixed.2d.language_programming.3a_.overview.asp +*/ +# undef WINDOWS_F77_FUNC +# if defined(__GNUC__) +# define WINDOWS_F77_FUNC(a, A) __attribute__((stdcall)) A +# elif defined(_MSC_VER) || defined(_ICC) || defined(_STDCALL_SUPPORTED) +# define WINDOWS_F77_FUNC(a, A) __stdcall A +# else +# define WINDOWS_F77_FUNC(a, A) A /* oh well */ +# endif +# include "f77funcs.h" + +#endif /* WINDOWS_F77_MANGLING */ + +#endif /* F77_FUNC */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/f77funcs.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/f77funcs.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,458 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* Functions in the FFTW Fortran API, mangled according to the + F77(...) macro. This file is designed to be #included by + f77api.c, possibly multiple times in order to support multiple + compiler manglings (via redefinition of F77). */ + +FFTW_VOIDFUNC F77(execute, EXECUTE)(X(plan) * const p) +{ + plan *pln = (*p)->pln; + pln->adt->solve(pln, (*p)->prb); +} + +FFTW_VOIDFUNC F77(destroy_plan, DESTROY_PLAN)(X(plan) *p) +{ + X(destroy_plan)(*p); +} + +FFTW_VOIDFUNC F77(cleanup, CLEANUP)(void) +{ + X(cleanup)(); +} + +FFTW_VOIDFUNC F77(forget_wisdom, FORGET_WISDOM)(void) +{ + X(forget_wisdom)(); +} + +FFTW_VOIDFUNC F77(export_wisdom, EXPORT_WISDOM)(void (*f77_write_char)(char *, void *), + void *data) +{ + write_char_data ad; + ad.f77_write_char = f77_write_char; + ad.data = data; + X(export_wisdom)(write_char, (void *) &ad); +} + +FFTW_VOIDFUNC F77(import_wisdom, IMPORT_WISDOM)(int *isuccess, + void (*f77_read_char)(int *, void *), + void *data) +{ + read_char_data ed; + ed.f77_read_char = f77_read_char; + ed.data = data; + *isuccess = X(import_wisdom)(read_char, (void *) &ed); +} + +FFTW_VOIDFUNC F77(import_system_wisdom, IMPORT_SYSTEM_WISDOM)(int *isuccess) +{ + *isuccess = X(import_system_wisdom)(); +} + +FFTW_VOIDFUNC F77(print_plan, PRINT_PLAN)(X(plan) * const p) +{ + X(print_plan)(*p); + fflush(stdout); +} + +FFTW_VOIDFUNC F77(flops,FLOPS)(X(plan) *p, double *add, double *mul, double *fma) +{ + X(flops)(*p, add, mul, fma); +} + +FFTW_VOIDFUNC F77(estimate_cost,ESTIMATE_COST)(double *cost, X(plan) * const p) +{ + *cost = X(estimate_cost)(*p); +} + +FFTW_VOIDFUNC F77(cost,COST)(double *cost, X(plan) * const p) +{ + *cost = X(cost)(*p); +} + +FFTW_VOIDFUNC F77(set_timelimit,SET_TIMELIMIT)(double *t) +{ + X(set_timelimit)(*t); +} + +/******************************** DFT ***********************************/ + +FFTW_VOIDFUNC F77(plan_dft, PLAN_DFT)(X(plan) *p, int *rank, const int *n, + C *in, C *out, int *sign, int *flags) +{ + int *nrev = reverse_n(*rank, n); + *p = X(plan_dft)(*rank, nrev, in, out, *sign, *flags); + X(ifree0)(nrev); +} + +FFTW_VOIDFUNC F77(plan_dft_1d, PLAN_DFT_1D)(X(plan) *p, int *n, C *in, C *out, + int *sign, int *flags) +{ + *p = X(plan_dft_1d)(*n, in, out, *sign, *flags); +} + +FFTW_VOIDFUNC F77(plan_dft_2d, PLAN_DFT_2D)(X(plan) *p, int *nx, int *ny, + C *in, C *out, int *sign, int *flags) +{ + *p = X(plan_dft_2d)(*ny, *nx, in, out, *sign, *flags); +} + +FFTW_VOIDFUNC F77(plan_dft_3d, PLAN_DFT_3D)(X(plan) *p, int *nx, int *ny, int *nz, + C *in, C *out, + int *sign, int *flags) +{ + *p = X(plan_dft_3d)(*nz, *ny, *nx, in, out, *sign, *flags); +} + +FFTW_VOIDFUNC F77(plan_many_dft, PLAN_MANY_DFT)(X(plan) *p, int *rank, const int *n, + int *howmany, + C *in, const int *inembed, + int *istride, int *idist, + C *out, const int *onembed, + int *ostride, int *odist, + int *sign, int *flags) +{ + int *nrev = reverse_n(*rank, n); + int *inembedrev = reverse_n(*rank, inembed); + int *onembedrev = reverse_n(*rank, onembed); + *p = X(plan_many_dft)(*rank, nrev, *howmany, + in, inembedrev, *istride, *idist, + out, onembedrev, *ostride, *odist, + *sign, *flags); + X(ifree0)(onembedrev); + X(ifree0)(inembedrev); + X(ifree0)(nrev); +} + +FFTW_VOIDFUNC F77(plan_guru_dft, PLAN_GURU_DFT)(X(plan) *p, int *rank, const int *n, + const int *is, const int *os, + int *howmany_rank, const int *h_n, + const int *h_is, const int *h_os, + C *in, C *out, int *sign, int *flags) +{ + X(iodim) *dims = make_dims(*rank, n, is, os); + X(iodim) *howmany_dims = make_dims(*howmany_rank, h_n, h_is, h_os); + *p = X(plan_guru_dft)(*rank, dims, *howmany_rank, howmany_dims, + in, out, *sign, *flags); + X(ifree0)(howmany_dims); + X(ifree0)(dims); +} + +FFTW_VOIDFUNC F77(plan_guru_split_dft, PLAN_GURU_SPLIT_DFT)(X(plan) *p, int *rank, const int *n, + const int *is, const int *os, + int *howmany_rank, const int *h_n, + const int *h_is, const int *h_os, + R *ri, R *ii, R *ro, R *io, int *flags) +{ + X(iodim) *dims = make_dims(*rank, n, is, os); + X(iodim) *howmany_dims = make_dims(*howmany_rank, h_n, h_is, h_os); + *p = X(plan_guru_split_dft)(*rank, dims, *howmany_rank, howmany_dims, + ri, ii, ro, io, *flags); + X(ifree0)(howmany_dims); + X(ifree0)(dims); +} + +FFTW_VOIDFUNC F77(execute_dft, EXECUTE_DFT)(X(plan) * const p, C *in, C *out) +{ + plan_dft *pln = (plan_dft *) (*p)->pln; + if ((*p)->sign == FFT_SIGN) + pln->apply((plan *) pln, in[0], in[0]+1, out[0], out[0]+1); + else + pln->apply((plan *) pln, in[0]+1, in[0], out[0]+1, out[0]); +} + +FFTW_VOIDFUNC F77(execute_split_dft, EXECUTE_SPLIT_DFT)(X(plan) * const p, + R *ri, R *ii, R *ro, R *io) +{ + plan_dft *pln = (plan_dft *) (*p)->pln; + pln->apply((plan *) pln, ri, ii, ro, io); +} + +/****************************** DFT r2c *********************************/ + +FFTW_VOIDFUNC F77(plan_dft_r2c, PLAN_DFT_R2C)(X(plan) *p, int *rank, const int *n, + R *in, C *out, int *flags) +{ + int *nrev = reverse_n(*rank, n); + *p = X(plan_dft_r2c)(*rank, nrev, in, out, *flags); + X(ifree0)(nrev); +} + +FFTW_VOIDFUNC F77(plan_dft_r2c_1d, PLAN_DFT_R2C_1D)(X(plan) *p, int *n, R *in, C *out, + int *flags) +{ + *p = X(plan_dft_r2c_1d)(*n, in, out, *flags); +} + +FFTW_VOIDFUNC F77(plan_dft_r2c_2d, PLAN_DFT_R2C_2D)(X(plan) *p, int *nx, int *ny, + R *in, C *out, int *flags) +{ + *p = X(plan_dft_r2c_2d)(*ny, *nx, in, out, *flags); +} + +FFTW_VOIDFUNC F77(plan_dft_r2c_3d, PLAN_DFT_R2C_3D)(X(plan) *p, + int *nx, int *ny, int *nz, + R *in, C *out, + int *flags) +{ + *p = X(plan_dft_r2c_3d)(*nz, *ny, *nx, in, out, *flags); +} + +FFTW_VOIDFUNC F77(plan_many_dft_r2c, PLAN_MANY_DFT_R2C)( + X(plan) *p, int *rank, const int *n, + int *howmany, + R *in, const int *inembed, int *istride, int *idist, + C *out, const int *onembed, int *ostride, int *odist, + int *flags) +{ + int *nrev = reverse_n(*rank, n); + int *inembedrev = reverse_n(*rank, inembed); + int *onembedrev = reverse_n(*rank, onembed); + *p = X(plan_many_dft_r2c)(*rank, nrev, *howmany, + in, inembedrev, *istride, *idist, + out, onembedrev, *ostride, *odist, + *flags); + X(ifree0)(onembedrev); + X(ifree0)(inembedrev); + X(ifree0)(nrev); +} + +FFTW_VOIDFUNC F77(plan_guru_dft_r2c, PLAN_GURU_DFT_R2C)( + X(plan) *p, int *rank, const int *n, + const int *is, const int *os, + int *howmany_rank, const int *h_n, + const int *h_is, const int *h_os, + R *in, C *out, int *flags) +{ + X(iodim) *dims = make_dims(*rank, n, is, os); + X(iodim) *howmany_dims = make_dims(*howmany_rank, h_n, h_is, h_os); + *p = X(plan_guru_dft_r2c)(*rank, dims, *howmany_rank, howmany_dims, + in, out, *flags); + X(ifree0)(howmany_dims); + X(ifree0)(dims); +} + +FFTW_VOIDFUNC F77(plan_guru_split_dft_r2c, PLAN_GURU_SPLIT_DFT_R2C)( + X(plan) *p, int *rank, const int *n, + const int *is, const int *os, + int *howmany_rank, const int *h_n, + const int *h_is, const int *h_os, + R *in, R *ro, R *io, int *flags) +{ + X(iodim) *dims = make_dims(*rank, n, is, os); + X(iodim) *howmany_dims = make_dims(*howmany_rank, h_n, h_is, h_os); + *p = X(plan_guru_split_dft_r2c)(*rank, dims, *howmany_rank, howmany_dims, + in, ro, io, *flags); + X(ifree0)(howmany_dims); + X(ifree0)(dims); +} + +FFTW_VOIDFUNC F77(execute_dft_r2c, EXECUTE_DFT_R2C)(X(plan) * const p, R *in, C *out) +{ + plan_rdft2 *pln = (plan_rdft2 *) (*p)->pln; + problem_rdft2 *prb = (problem_rdft2 *) (*p)->prb; + pln->apply((plan *) pln, in, in + (prb->r1 - prb->r0), out[0], out[0]+1); +} + +FFTW_VOIDFUNC F77(execute_split_dft_r2c, EXECUTE_SPLIT_DFT_R2C)(X(plan) * const p, + R *in, R *ro, R *io) +{ + plan_rdft2 *pln = (plan_rdft2 *) (*p)->pln; + problem_rdft2 *prb = (problem_rdft2 *) (*p)->prb; + pln->apply((plan *) pln, in, in + (prb->r1 - prb->r0), ro, io); +} + +/****************************** DFT c2r *********************************/ + +FFTW_VOIDFUNC F77(plan_dft_c2r, PLAN_DFT_C2R)(X(plan) *p, int *rank, const int *n, + C *in, R *out, int *flags) +{ + int *nrev = reverse_n(*rank, n); + *p = X(plan_dft_c2r)(*rank, nrev, in, out, *flags); + X(ifree0)(nrev); +} + +FFTW_VOIDFUNC F77(plan_dft_c2r_1d, PLAN_DFT_C2R_1D)(X(plan) *p, int *n, C *in, R *out, + int *flags) +{ + *p = X(plan_dft_c2r_1d)(*n, in, out, *flags); +} + +FFTW_VOIDFUNC F77(plan_dft_c2r_2d, PLAN_DFT_C2R_2D)(X(plan) *p, int *nx, int *ny, + C *in, R *out, int *flags) +{ + *p = X(plan_dft_c2r_2d)(*ny, *nx, in, out, *flags); +} + +FFTW_VOIDFUNC F77(plan_dft_c2r_3d, PLAN_DFT_C2R_3D)(X(plan) *p, + int *nx, int *ny, int *nz, + C *in, R *out, + int *flags) +{ + *p = X(plan_dft_c2r_3d)(*nz, *ny, *nx, in, out, *flags); +} + +FFTW_VOIDFUNC F77(plan_many_dft_c2r, PLAN_MANY_DFT_C2R)( + X(plan) *p, int *rank, const int *n, + int *howmany, + C *in, const int *inembed, int *istride, int *idist, + R *out, const int *onembed, int *ostride, int *odist, + int *flags) +{ + int *nrev = reverse_n(*rank, n); + int *inembedrev = reverse_n(*rank, inembed); + int *onembedrev = reverse_n(*rank, onembed); + *p = X(plan_many_dft_c2r)(*rank, nrev, *howmany, + in, inembedrev, *istride, *idist, + out, onembedrev, *ostride, *odist, + *flags); + X(ifree0)(onembedrev); + X(ifree0)(inembedrev); + X(ifree0)(nrev); +} + +FFTW_VOIDFUNC F77(plan_guru_dft_c2r, PLAN_GURU_DFT_C2R)( + X(plan) *p, int *rank, const int *n, + const int *is, const int *os, + int *howmany_rank, const int *h_n, + const int *h_is, const int *h_os, + C *in, R *out, int *flags) +{ + X(iodim) *dims = make_dims(*rank, n, is, os); + X(iodim) *howmany_dims = make_dims(*howmany_rank, h_n, h_is, h_os); + *p = X(plan_guru_dft_c2r)(*rank, dims, *howmany_rank, howmany_dims, + in, out, *flags); + X(ifree0)(howmany_dims); + X(ifree0)(dims); +} + +FFTW_VOIDFUNC F77(plan_guru_split_dft_c2r, PLAN_GURU_SPLIT_DFT_C2R)( + X(plan) *p, int *rank, const int *n, + const int *is, const int *os, + int *howmany_rank, const int *h_n, + const int *h_is, const int *h_os, + R *ri, R *ii, R *out, int *flags) +{ + X(iodim) *dims = make_dims(*rank, n, is, os); + X(iodim) *howmany_dims = make_dims(*howmany_rank, h_n, h_is, h_os); + *p = X(plan_guru_split_dft_c2r)(*rank, dims, *howmany_rank, howmany_dims, + ri, ii, out, *flags); + X(ifree0)(howmany_dims); + X(ifree0)(dims); +} + +FFTW_VOIDFUNC F77(execute_dft_c2r, EXECUTE_DFT_C2R)(X(plan) * const p, C *in, R *out) +{ + plan_rdft2 *pln = (plan_rdft2 *) (*p)->pln; + problem_rdft2 *prb = (problem_rdft2 *) (*p)->prb; + pln->apply((plan *) pln, out, out + (prb->r1 - prb->r0), in[0], in[0]+1); +} + +FFTW_VOIDFUNC F77(execute_split_dft_c2r, EXECUTE_SPLIT_DFT_C2R)(X(plan) * const p, + R *ri, R *ii, R *out) +{ + plan_rdft2 *pln = (plan_rdft2 *) (*p)->pln; + problem_rdft2 *prb = (problem_rdft2 *) (*p)->prb; + pln->apply((plan *) pln, out, out + (prb->r1 - prb->r0), ri, ii); +} + +/****************************** r2r *********************************/ + +FFTW_VOIDFUNC F77(plan_r2r, PLAN_R2R)(X(plan) *p, int *rank, const int *n, + R *in, R *out, + int *kind, int *flags) +{ + int *nrev = reverse_n(*rank, n); + X(r2r_kind) *k = ints2kinds(*rank, kind); + *p = X(plan_r2r)(*rank, nrev, in, out, k, *flags); + X(ifree0)(k); + X(ifree0)(nrev); +} + +FFTW_VOIDFUNC F77(plan_r2r_1d, PLAN_R2R_1D)(X(plan) *p, int *n, R *in, R *out, + int *kind, int *flags) +{ + *p = X(plan_r2r_1d)(*n, in, out, (X(r2r_kind)) *kind, *flags); +} + +FFTW_VOIDFUNC F77(plan_r2r_2d, PLAN_R2R_2D)(X(plan) *p, int *nx, int *ny, + R *in, R *out, + int *kindx, int *kindy, int *flags) +{ + *p = X(plan_r2r_2d)(*ny, *nx, in, out, + (X(r2r_kind)) *kindy, (X(r2r_kind)) *kindx, *flags); +} + +FFTW_VOIDFUNC F77(plan_r2r_3d, PLAN_R2R_3D)(X(plan) *p, + int *nx, int *ny, int *nz, + R *in, R *out, + int *kindx, int *kindy, int *kindz, + int *flags) +{ + *p = X(plan_r2r_3d)(*nz, *ny, *nx, in, out, + (X(r2r_kind)) *kindz, (X(r2r_kind)) *kindy, + (X(r2r_kind)) *kindx, *flags); +} + +FFTW_VOIDFUNC F77(plan_many_r2r, PLAN_MANY_R2R)( + X(plan) *p, int *rank, const int *n, + int *howmany, + R *in, const int *inembed, int *istride, int *idist, + R *out, const int *onembed, int *ostride, int *odist, + int *kind, int *flags) +{ + int *nrev = reverse_n(*rank, n); + int *inembedrev = reverse_n(*rank, inembed); + int *onembedrev = reverse_n(*rank, onembed); + X(r2r_kind) *k = ints2kinds(*rank, kind); + *p = X(plan_many_r2r)(*rank, nrev, *howmany, + in, inembedrev, *istride, *idist, + out, onembedrev, *ostride, *odist, + k, *flags); + X(ifree0)(k); + X(ifree0)(onembedrev); + X(ifree0)(inembedrev); + X(ifree0)(nrev); +} + +FFTW_VOIDFUNC F77(plan_guru_r2r, PLAN_GURU_R2R)( + X(plan) *p, int *rank, const int *n, + const int *is, const int *os, + int *howmany_rank, const int *h_n, + const int *h_is, const int *h_os, + R *in, R *out, int *kind, int *flags) +{ + X(iodim) *dims = make_dims(*rank, n, is, os); + X(iodim) *howmany_dims = make_dims(*howmany_rank, h_n, h_is, h_os); + X(r2r_kind) *k = ints2kinds(*rank, kind); + *p = X(plan_guru_r2r)(*rank, dims, *howmany_rank, howmany_dims, + in, out, k, *flags); + X(ifree0)(k); + X(ifree0)(howmany_dims); + X(ifree0)(dims); +} + +FFTW_VOIDFUNC F77(execute_r2r, EXECUTE_R2R)(X(plan) * const p, R *in, R *out) +{ + plan_rdft *pln = (plan_rdft *) (*p)->pln; + pln->apply((plan *) pln, in, out); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/fftw3.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/fftw3.f Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,72 @@ + INTEGER FFTW_R2HC + PARAMETER (FFTW_R2HC=0) + INTEGER FFTW_HC2R + PARAMETER (FFTW_HC2R=1) + INTEGER FFTW_DHT + PARAMETER (FFTW_DHT=2) + INTEGER FFTW_REDFT00 + PARAMETER (FFTW_REDFT00=3) + INTEGER FFTW_REDFT01 + PARAMETER (FFTW_REDFT01=4) + INTEGER FFTW_REDFT10 + PARAMETER (FFTW_REDFT10=5) + INTEGER FFTW_REDFT11 + PARAMETER (FFTW_REDFT11=6) + INTEGER FFTW_RODFT00 + PARAMETER (FFTW_RODFT00=7) + INTEGER FFTW_RODFT01 + PARAMETER (FFTW_RODFT01=8) + INTEGER FFTW_RODFT10 + PARAMETER (FFTW_RODFT10=9) + INTEGER FFTW_RODFT11 + PARAMETER (FFTW_RODFT11=10) + INTEGER FFTW_FORWARD + PARAMETER (FFTW_FORWARD=-1) + INTEGER FFTW_BACKWARD + PARAMETER (FFTW_BACKWARD=+1) + INTEGER FFTW_MEASURE + PARAMETER (FFTW_MEASURE=0) + INTEGER FFTW_DESTROY_INPUT + PARAMETER (FFTW_DESTROY_INPUT=1) + INTEGER FFTW_UNALIGNED + PARAMETER (FFTW_UNALIGNED=2) + INTEGER FFTW_CONSERVE_MEMORY + PARAMETER (FFTW_CONSERVE_MEMORY=4) + INTEGER FFTW_EXHAUSTIVE + PARAMETER (FFTW_EXHAUSTIVE=8) + INTEGER FFTW_PRESERVE_INPUT + PARAMETER (FFTW_PRESERVE_INPUT=16) + INTEGER FFTW_PATIENT + PARAMETER (FFTW_PATIENT=32) + INTEGER FFTW_ESTIMATE + PARAMETER (FFTW_ESTIMATE=64) + INTEGER FFTW_WISDOM_ONLY + PARAMETER (FFTW_WISDOM_ONLY=2097152) + INTEGER FFTW_ESTIMATE_PATIENT + PARAMETER (FFTW_ESTIMATE_PATIENT=128) + INTEGER FFTW_BELIEVE_PCOST + PARAMETER (FFTW_BELIEVE_PCOST=256) + INTEGER FFTW_NO_DFT_R2HC + PARAMETER (FFTW_NO_DFT_R2HC=512) + INTEGER FFTW_NO_NONTHREADED + PARAMETER (FFTW_NO_NONTHREADED=1024) + INTEGER FFTW_NO_BUFFERING + PARAMETER (FFTW_NO_BUFFERING=2048) + INTEGER FFTW_NO_INDIRECT_OP + PARAMETER (FFTW_NO_INDIRECT_OP=4096) + INTEGER FFTW_ALLOW_LARGE_GENERIC + PARAMETER (FFTW_ALLOW_LARGE_GENERIC=8192) + INTEGER FFTW_NO_RANK_SPLITS + PARAMETER (FFTW_NO_RANK_SPLITS=16384) + INTEGER FFTW_NO_VRANK_SPLITS + PARAMETER (FFTW_NO_VRANK_SPLITS=32768) + INTEGER FFTW_NO_VRECURSE + PARAMETER (FFTW_NO_VRECURSE=65536) + INTEGER FFTW_NO_SIMD + PARAMETER (FFTW_NO_SIMD=131072) + INTEGER FFTW_NO_SLOW + PARAMETER (FFTW_NO_SLOW=262144) + INTEGER FFTW_NO_FIXED_RADIX_LARGE_N + PARAMETER (FFTW_NO_FIXED_RADIX_LARGE_N=524288) + INTEGER FFTW_ALLOW_PRUNING + PARAMETER (FFTW_ALLOW_PRUNING=1048576) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/fftw3.f03.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/fftw3.f03.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1244 @@ +! Generated automatically. DO NOT EDIT! + + integer(C_INT), parameter :: FFTW_R2HC = 0 + integer(C_INT), parameter :: FFTW_HC2R = 1 + integer(C_INT), parameter :: FFTW_DHT = 2 + integer(C_INT), parameter :: FFTW_REDFT00 = 3 + integer(C_INT), parameter :: FFTW_REDFT01 = 4 + integer(C_INT), parameter :: FFTW_REDFT10 = 5 + integer(C_INT), parameter :: FFTW_REDFT11 = 6 + integer(C_INT), parameter :: FFTW_RODFT00 = 7 + integer(C_INT), parameter :: FFTW_RODFT01 = 8 + integer(C_INT), parameter :: FFTW_RODFT10 = 9 + integer(C_INT), parameter :: FFTW_RODFT11 = 10 + integer(C_INT), parameter :: FFTW_FORWARD = -1 + integer(C_INT), parameter :: FFTW_BACKWARD = +1 + integer(C_INT), parameter :: FFTW_MEASURE = 0 + integer(C_INT), parameter :: FFTW_DESTROY_INPUT = 1 + integer(C_INT), parameter :: FFTW_UNALIGNED = 2 + integer(C_INT), parameter :: FFTW_CONSERVE_MEMORY = 4 + integer(C_INT), parameter :: FFTW_EXHAUSTIVE = 8 + integer(C_INT), parameter :: FFTW_PRESERVE_INPUT = 16 + integer(C_INT), parameter :: FFTW_PATIENT = 32 + integer(C_INT), parameter :: FFTW_ESTIMATE = 64 + integer(C_INT), parameter :: FFTW_WISDOM_ONLY = 2097152 + integer(C_INT), parameter :: FFTW_ESTIMATE_PATIENT = 128 + integer(C_INT), parameter :: FFTW_BELIEVE_PCOST = 256 + integer(C_INT), parameter :: FFTW_NO_DFT_R2HC = 512 + integer(C_INT), parameter :: FFTW_NO_NONTHREADED = 1024 + integer(C_INT), parameter :: FFTW_NO_BUFFERING = 2048 + integer(C_INT), parameter :: FFTW_NO_INDIRECT_OP = 4096 + integer(C_INT), parameter :: FFTW_ALLOW_LARGE_GENERIC = 8192 + integer(C_INT), parameter :: FFTW_NO_RANK_SPLITS = 16384 + integer(C_INT), parameter :: FFTW_NO_VRANK_SPLITS = 32768 + integer(C_INT), parameter :: FFTW_NO_VRECURSE = 65536 + integer(C_INT), parameter :: FFTW_NO_SIMD = 131072 + integer(C_INT), parameter :: FFTW_NO_SLOW = 262144 + integer(C_INT), parameter :: FFTW_NO_FIXED_RADIX_LARGE_N = 524288 + integer(C_INT), parameter :: FFTW_ALLOW_PRUNING = 1048576 + + type, bind(C) :: fftw_iodim + integer(C_INT) n, is, os + end type fftw_iodim + type, bind(C) :: fftw_iodim64 + integer(C_INTPTR_T) n, is, os + end type fftw_iodim64 + + interface + type(C_PTR) function fftw_plan_dft(rank,n,in,out,sign,flags) bind(C, name='fftw_plan_dft') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftw_plan_dft + + type(C_PTR) function fftw_plan_dft_1d(n,in,out,sign,flags) bind(C, name='fftw_plan_dft_1d') + import + integer(C_INT), value :: n + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftw_plan_dft_1d + + type(C_PTR) function fftw_plan_dft_2d(n0,n1,in,out,sign,flags) bind(C, name='fftw_plan_dft_2d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftw_plan_dft_2d + + type(C_PTR) function fftw_plan_dft_3d(n0,n1,n2,in,out,sign,flags) bind(C, name='fftw_plan_dft_3d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + integer(C_INT), value :: n2 + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftw_plan_dft_3d + + type(C_PTR) function fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags) & + bind(C, name='fftw_plan_many_dft') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + integer(C_INT), value :: howmany + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + integer(C_INT), dimension(*), intent(in) :: inembed + integer(C_INT), value :: istride + integer(C_INT), value :: idist + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), dimension(*), intent(in) :: onembed + integer(C_INT), value :: ostride + integer(C_INT), value :: odist + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftw_plan_many_dft + + type(C_PTR) function fftw_plan_guru_dft(rank,dims,howmany_rank,howmany_dims,in,out,sign,flags) & + bind(C, name='fftw_plan_guru_dft') + import + integer(C_INT), value :: rank + type(fftw_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftw_iodim), dimension(*), intent(in) :: howmany_dims + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftw_plan_guru_dft + + type(C_PTR) function fftw_plan_guru_split_dft(rank,dims,howmany_rank,howmany_dims,ri,ii,ro,io,flags) & + bind(C, name='fftw_plan_guru_split_dft') + import + integer(C_INT), value :: rank + type(fftw_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftw_iodim), dimension(*), intent(in) :: howmany_dims + real(C_DOUBLE), dimension(*), intent(out) :: ri + real(C_DOUBLE), dimension(*), intent(out) :: ii + real(C_DOUBLE), dimension(*), intent(out) :: ro + real(C_DOUBLE), dimension(*), intent(out) :: io + integer(C_INT), value :: flags + end function fftw_plan_guru_split_dft + + type(C_PTR) function fftw_plan_guru64_dft(rank,dims,howmany_rank,howmany_dims,in,out,sign,flags) & + bind(C, name='fftw_plan_guru64_dft') + import + integer(C_INT), value :: rank + type(fftw_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftw_iodim64), dimension(*), intent(in) :: howmany_dims + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftw_plan_guru64_dft + + type(C_PTR) function fftw_plan_guru64_split_dft(rank,dims,howmany_rank,howmany_dims,ri,ii,ro,io,flags) & + bind(C, name='fftw_plan_guru64_split_dft') + import + integer(C_INT), value :: rank + type(fftw_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftw_iodim64), dimension(*), intent(in) :: howmany_dims + real(C_DOUBLE), dimension(*), intent(out) :: ri + real(C_DOUBLE), dimension(*), intent(out) :: ii + real(C_DOUBLE), dimension(*), intent(out) :: ro + real(C_DOUBLE), dimension(*), intent(out) :: io + integer(C_INT), value :: flags + end function fftw_plan_guru64_split_dft + + subroutine fftw_execute_dft(p,in,out) bind(C, name='fftw_execute_dft') + import + type(C_PTR), value :: p + complex(C_DOUBLE_COMPLEX), dimension(*), intent(inout) :: in + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + end subroutine fftw_execute_dft + + subroutine fftw_execute_split_dft(p,ri,ii,ro,io) bind(C, name='fftw_execute_split_dft') + import + type(C_PTR), value :: p + real(C_DOUBLE), dimension(*), intent(inout) :: ri + real(C_DOUBLE), dimension(*), intent(inout) :: ii + real(C_DOUBLE), dimension(*), intent(out) :: ro + real(C_DOUBLE), dimension(*), intent(out) :: io + end subroutine fftw_execute_split_dft + + type(C_PTR) function fftw_plan_many_dft_r2c(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,flags) & + bind(C, name='fftw_plan_many_dft_r2c') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + integer(C_INT), value :: howmany + real(C_DOUBLE), dimension(*), intent(out) :: in + integer(C_INT), dimension(*), intent(in) :: inembed + integer(C_INT), value :: istride + integer(C_INT), value :: idist + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), dimension(*), intent(in) :: onembed + integer(C_INT), value :: ostride + integer(C_INT), value :: odist + integer(C_INT), value :: flags + end function fftw_plan_many_dft_r2c + + type(C_PTR) function fftw_plan_dft_r2c(rank,n,in,out,flags) bind(C, name='fftw_plan_dft_r2c') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + real(C_DOUBLE), dimension(*), intent(out) :: in + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftw_plan_dft_r2c + + type(C_PTR) function fftw_plan_dft_r2c_1d(n,in,out,flags) bind(C, name='fftw_plan_dft_r2c_1d') + import + integer(C_INT), value :: n + real(C_DOUBLE), dimension(*), intent(out) :: in + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftw_plan_dft_r2c_1d + + type(C_PTR) function fftw_plan_dft_r2c_2d(n0,n1,in,out,flags) bind(C, name='fftw_plan_dft_r2c_2d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + real(C_DOUBLE), dimension(*), intent(out) :: in + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftw_plan_dft_r2c_2d + + type(C_PTR) function fftw_plan_dft_r2c_3d(n0,n1,n2,in,out,flags) bind(C, name='fftw_plan_dft_r2c_3d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + integer(C_INT), value :: n2 + real(C_DOUBLE), dimension(*), intent(out) :: in + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftw_plan_dft_r2c_3d + + type(C_PTR) function fftw_plan_many_dft_c2r(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,flags) & + bind(C, name='fftw_plan_many_dft_c2r') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + integer(C_INT), value :: howmany + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + integer(C_INT), dimension(*), intent(in) :: inembed + integer(C_INT), value :: istride + integer(C_INT), value :: idist + real(C_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), dimension(*), intent(in) :: onembed + integer(C_INT), value :: ostride + integer(C_INT), value :: odist + integer(C_INT), value :: flags + end function fftw_plan_many_dft_c2r + + type(C_PTR) function fftw_plan_dft_c2r(rank,n,in,out,flags) bind(C, name='fftw_plan_dft_c2r') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + real(C_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftw_plan_dft_c2r + + type(C_PTR) function fftw_plan_dft_c2r_1d(n,in,out,flags) bind(C, name='fftw_plan_dft_c2r_1d') + import + integer(C_INT), value :: n + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + real(C_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftw_plan_dft_c2r_1d + + type(C_PTR) function fftw_plan_dft_c2r_2d(n0,n1,in,out,flags) bind(C, name='fftw_plan_dft_c2r_2d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + real(C_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftw_plan_dft_c2r_2d + + type(C_PTR) function fftw_plan_dft_c2r_3d(n0,n1,n2,in,out,flags) bind(C, name='fftw_plan_dft_c2r_3d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + integer(C_INT), value :: n2 + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + real(C_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftw_plan_dft_c2r_3d + + type(C_PTR) function fftw_plan_guru_dft_r2c(rank,dims,howmany_rank,howmany_dims,in,out,flags) & + bind(C, name='fftw_plan_guru_dft_r2c') + import + integer(C_INT), value :: rank + type(fftw_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftw_iodim), dimension(*), intent(in) :: howmany_dims + real(C_DOUBLE), dimension(*), intent(out) :: in + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftw_plan_guru_dft_r2c + + type(C_PTR) function fftw_plan_guru_dft_c2r(rank,dims,howmany_rank,howmany_dims,in,out,flags) & + bind(C, name='fftw_plan_guru_dft_c2r') + import + integer(C_INT), value :: rank + type(fftw_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftw_iodim), dimension(*), intent(in) :: howmany_dims + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + real(C_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftw_plan_guru_dft_c2r + + type(C_PTR) function fftw_plan_guru_split_dft_r2c(rank,dims,howmany_rank,howmany_dims,in,ro,io,flags) & + bind(C, name='fftw_plan_guru_split_dft_r2c') + import + integer(C_INT), value :: rank + type(fftw_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftw_iodim), dimension(*), intent(in) :: howmany_dims + real(C_DOUBLE), dimension(*), intent(out) :: in + real(C_DOUBLE), dimension(*), intent(out) :: ro + real(C_DOUBLE), dimension(*), intent(out) :: io + integer(C_INT), value :: flags + end function fftw_plan_guru_split_dft_r2c + + type(C_PTR) function fftw_plan_guru_split_dft_c2r(rank,dims,howmany_rank,howmany_dims,ri,ii,out,flags) & + bind(C, name='fftw_plan_guru_split_dft_c2r') + import + integer(C_INT), value :: rank + type(fftw_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftw_iodim), dimension(*), intent(in) :: howmany_dims + real(C_DOUBLE), dimension(*), intent(out) :: ri + real(C_DOUBLE), dimension(*), intent(out) :: ii + real(C_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftw_plan_guru_split_dft_c2r + + type(C_PTR) function fftw_plan_guru64_dft_r2c(rank,dims,howmany_rank,howmany_dims,in,out,flags) & + bind(C, name='fftw_plan_guru64_dft_r2c') + import + integer(C_INT), value :: rank + type(fftw_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftw_iodim64), dimension(*), intent(in) :: howmany_dims + real(C_DOUBLE), dimension(*), intent(out) :: in + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftw_plan_guru64_dft_r2c + + type(C_PTR) function fftw_plan_guru64_dft_c2r(rank,dims,howmany_rank,howmany_dims,in,out,flags) & + bind(C, name='fftw_plan_guru64_dft_c2r') + import + integer(C_INT), value :: rank + type(fftw_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftw_iodim64), dimension(*), intent(in) :: howmany_dims + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + real(C_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftw_plan_guru64_dft_c2r + + type(C_PTR) function fftw_plan_guru64_split_dft_r2c(rank,dims,howmany_rank,howmany_dims,in,ro,io,flags) & + bind(C, name='fftw_plan_guru64_split_dft_r2c') + import + integer(C_INT), value :: rank + type(fftw_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftw_iodim64), dimension(*), intent(in) :: howmany_dims + real(C_DOUBLE), dimension(*), intent(out) :: in + real(C_DOUBLE), dimension(*), intent(out) :: ro + real(C_DOUBLE), dimension(*), intent(out) :: io + integer(C_INT), value :: flags + end function fftw_plan_guru64_split_dft_r2c + + type(C_PTR) function fftw_plan_guru64_split_dft_c2r(rank,dims,howmany_rank,howmany_dims,ri,ii,out,flags) & + bind(C, name='fftw_plan_guru64_split_dft_c2r') + import + integer(C_INT), value :: rank + type(fftw_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftw_iodim64), dimension(*), intent(in) :: howmany_dims + real(C_DOUBLE), dimension(*), intent(out) :: ri + real(C_DOUBLE), dimension(*), intent(out) :: ii + real(C_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftw_plan_guru64_split_dft_c2r + + subroutine fftw_execute_dft_r2c(p,in,out) bind(C, name='fftw_execute_dft_r2c') + import + type(C_PTR), value :: p + real(C_DOUBLE), dimension(*), intent(inout) :: in + complex(C_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + end subroutine fftw_execute_dft_r2c + + subroutine fftw_execute_dft_c2r(p,in,out) bind(C, name='fftw_execute_dft_c2r') + import + type(C_PTR), value :: p + complex(C_DOUBLE_COMPLEX), dimension(*), intent(inout) :: in + real(C_DOUBLE), dimension(*), intent(out) :: out + end subroutine fftw_execute_dft_c2r + + subroutine fftw_execute_split_dft_r2c(p,in,ro,io) bind(C, name='fftw_execute_split_dft_r2c') + import + type(C_PTR), value :: p + real(C_DOUBLE), dimension(*), intent(inout) :: in + real(C_DOUBLE), dimension(*), intent(out) :: ro + real(C_DOUBLE), dimension(*), intent(out) :: io + end subroutine fftw_execute_split_dft_r2c + + subroutine fftw_execute_split_dft_c2r(p,ri,ii,out) bind(C, name='fftw_execute_split_dft_c2r') + import + type(C_PTR), value :: p + real(C_DOUBLE), dimension(*), intent(inout) :: ri + real(C_DOUBLE), dimension(*), intent(inout) :: ii + real(C_DOUBLE), dimension(*), intent(out) :: out + end subroutine fftw_execute_split_dft_c2r + + type(C_PTR) function fftw_plan_many_r2r(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,kind,flags) & + bind(C, name='fftw_plan_many_r2r') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + integer(C_INT), value :: howmany + real(C_DOUBLE), dimension(*), intent(out) :: in + integer(C_INT), dimension(*), intent(in) :: inembed + integer(C_INT), value :: istride + integer(C_INT), value :: idist + real(C_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), dimension(*), intent(in) :: onembed + integer(C_INT), value :: ostride + integer(C_INT), value :: odist + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftw_plan_many_r2r + + type(C_PTR) function fftw_plan_r2r(rank,n,in,out,kind,flags) bind(C, name='fftw_plan_r2r') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + real(C_DOUBLE), dimension(*), intent(out) :: in + real(C_DOUBLE), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftw_plan_r2r + + type(C_PTR) function fftw_plan_r2r_1d(n,in,out,kind,flags) bind(C, name='fftw_plan_r2r_1d') + import + integer(C_INT), value :: n + real(C_DOUBLE), dimension(*), intent(out) :: in + real(C_DOUBLE), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), value :: kind + integer(C_INT), value :: flags + end function fftw_plan_r2r_1d + + type(C_PTR) function fftw_plan_r2r_2d(n0,n1,in,out,kind0,kind1,flags) bind(C, name='fftw_plan_r2r_2d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + real(C_DOUBLE), dimension(*), intent(out) :: in + real(C_DOUBLE), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), value :: kind0 + integer(C_FFTW_R2R_KIND), value :: kind1 + integer(C_INT), value :: flags + end function fftw_plan_r2r_2d + + type(C_PTR) function fftw_plan_r2r_3d(n0,n1,n2,in,out,kind0,kind1,kind2,flags) bind(C, name='fftw_plan_r2r_3d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + integer(C_INT), value :: n2 + real(C_DOUBLE), dimension(*), intent(out) :: in + real(C_DOUBLE), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), value :: kind0 + integer(C_FFTW_R2R_KIND), value :: kind1 + integer(C_FFTW_R2R_KIND), value :: kind2 + integer(C_INT), value :: flags + end function fftw_plan_r2r_3d + + type(C_PTR) function fftw_plan_guru_r2r(rank,dims,howmany_rank,howmany_dims,in,out,kind,flags) & + bind(C, name='fftw_plan_guru_r2r') + import + integer(C_INT), value :: rank + type(fftw_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftw_iodim), dimension(*), intent(in) :: howmany_dims + real(C_DOUBLE), dimension(*), intent(out) :: in + real(C_DOUBLE), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftw_plan_guru_r2r + + type(C_PTR) function fftw_plan_guru64_r2r(rank,dims,howmany_rank,howmany_dims,in,out,kind,flags) & + bind(C, name='fftw_plan_guru64_r2r') + import + integer(C_INT), value :: rank + type(fftw_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftw_iodim64), dimension(*), intent(in) :: howmany_dims + real(C_DOUBLE), dimension(*), intent(out) :: in + real(C_DOUBLE), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftw_plan_guru64_r2r + + subroutine fftw_execute_r2r(p,in,out) bind(C, name='fftw_execute_r2r') + import + type(C_PTR), value :: p + real(C_DOUBLE), dimension(*), intent(inout) :: in + real(C_DOUBLE), dimension(*), intent(out) :: out + end subroutine fftw_execute_r2r + + subroutine fftw_destroy_plan(p) bind(C, name='fftw_destroy_plan') + import + type(C_PTR), value :: p + end subroutine fftw_destroy_plan + + subroutine fftw_forget_wisdom() bind(C, name='fftw_forget_wisdom') + import + end subroutine fftw_forget_wisdom + + subroutine fftw_cleanup() bind(C, name='fftw_cleanup') + import + end subroutine fftw_cleanup + + subroutine fftw_set_timelimit(t) bind(C, name='fftw_set_timelimit') + import + real(C_DOUBLE), value :: t + end subroutine fftw_set_timelimit + + subroutine fftw_plan_with_nthreads(nthreads) bind(C, name='fftw_plan_with_nthreads') + import + integer(C_INT), value :: nthreads + end subroutine fftw_plan_with_nthreads + + integer(C_INT) function fftw_init_threads() bind(C, name='fftw_init_threads') + import + end function fftw_init_threads + + subroutine fftw_cleanup_threads() bind(C, name='fftw_cleanup_threads') + import + end subroutine fftw_cleanup_threads + + integer(C_INT) function fftw_export_wisdom_to_filename(filename) bind(C, name='fftw_export_wisdom_to_filename') + import + character(C_CHAR), dimension(*), intent(in) :: filename + end function fftw_export_wisdom_to_filename + + subroutine fftw_export_wisdom_to_file(output_file) bind(C, name='fftw_export_wisdom_to_file') + import + type(C_PTR), value :: output_file + end subroutine fftw_export_wisdom_to_file + + type(C_PTR) function fftw_export_wisdom_to_string() bind(C, name='fftw_export_wisdom_to_string') + import + end function fftw_export_wisdom_to_string + + subroutine fftw_export_wisdom(write_char,data) bind(C, name='fftw_export_wisdom') + import + type(C_FUNPTR), value :: write_char + type(C_PTR), value :: data + end subroutine fftw_export_wisdom + + integer(C_INT) function fftw_import_system_wisdom() bind(C, name='fftw_import_system_wisdom') + import + end function fftw_import_system_wisdom + + integer(C_INT) function fftw_import_wisdom_from_filename(filename) bind(C, name='fftw_import_wisdom_from_filename') + import + character(C_CHAR), dimension(*), intent(in) :: filename + end function fftw_import_wisdom_from_filename + + integer(C_INT) function fftw_import_wisdom_from_file(input_file) bind(C, name='fftw_import_wisdom_from_file') + import + type(C_PTR), value :: input_file + end function fftw_import_wisdom_from_file + + integer(C_INT) function fftw_import_wisdom_from_string(input_string) bind(C, name='fftw_import_wisdom_from_string') + import + character(C_CHAR), dimension(*), intent(in) :: input_string + end function fftw_import_wisdom_from_string + + integer(C_INT) function fftw_import_wisdom(read_char,data) bind(C, name='fftw_import_wisdom') + import + type(C_FUNPTR), value :: read_char + type(C_PTR), value :: data + end function fftw_import_wisdom + + subroutine fftw_fprint_plan(p,output_file) bind(C, name='fftw_fprint_plan') + import + type(C_PTR), value :: p + type(C_PTR), value :: output_file + end subroutine fftw_fprint_plan + + subroutine fftw_print_plan(p) bind(C, name='fftw_print_plan') + import + type(C_PTR), value :: p + end subroutine fftw_print_plan + + type(C_PTR) function fftw_sprint_plan(p) bind(C, name='fftw_sprint_plan') + import + type(C_PTR), value :: p + end function fftw_sprint_plan + + type(C_PTR) function fftw_malloc(n) bind(C, name='fftw_malloc') + import + integer(C_SIZE_T), value :: n + end function fftw_malloc + + type(C_PTR) function fftw_alloc_real(n) bind(C, name='fftw_alloc_real') + import + integer(C_SIZE_T), value :: n + end function fftw_alloc_real + + type(C_PTR) function fftw_alloc_complex(n) bind(C, name='fftw_alloc_complex') + import + integer(C_SIZE_T), value :: n + end function fftw_alloc_complex + + subroutine fftw_free(p) bind(C, name='fftw_free') + import + type(C_PTR), value :: p + end subroutine fftw_free + + subroutine fftw_flops(p,add,mul,fmas) bind(C, name='fftw_flops') + import + type(C_PTR), value :: p + real(C_DOUBLE), intent(out) :: add + real(C_DOUBLE), intent(out) :: mul + real(C_DOUBLE), intent(out) :: fmas + end subroutine fftw_flops + + real(C_DOUBLE) function fftw_estimate_cost(p) bind(C, name='fftw_estimate_cost') + import + type(C_PTR), value :: p + end function fftw_estimate_cost + + real(C_DOUBLE) function fftw_cost(p) bind(C, name='fftw_cost') + import + type(C_PTR), value :: p + end function fftw_cost + + integer(C_INT) function fftw_alignment_of(p) bind(C, name='fftw_alignment_of') + import + real(C_DOUBLE), dimension(*), intent(out) :: p + end function fftw_alignment_of + + end interface + + type, bind(C) :: fftwf_iodim + integer(C_INT) n, is, os + end type fftwf_iodim + type, bind(C) :: fftwf_iodim64 + integer(C_INTPTR_T) n, is, os + end type fftwf_iodim64 + + interface + type(C_PTR) function fftwf_plan_dft(rank,n,in,out,sign,flags) bind(C, name='fftwf_plan_dft') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: in + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwf_plan_dft + + type(C_PTR) function fftwf_plan_dft_1d(n,in,out,sign,flags) bind(C, name='fftwf_plan_dft_1d') + import + integer(C_INT), value :: n + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: in + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwf_plan_dft_1d + + type(C_PTR) function fftwf_plan_dft_2d(n0,n1,in,out,sign,flags) bind(C, name='fftwf_plan_dft_2d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: in + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwf_plan_dft_2d + + type(C_PTR) function fftwf_plan_dft_3d(n0,n1,n2,in,out,sign,flags) bind(C, name='fftwf_plan_dft_3d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + integer(C_INT), value :: n2 + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: in + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwf_plan_dft_3d + + type(C_PTR) function fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags) & + bind(C, name='fftwf_plan_many_dft') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + integer(C_INT), value :: howmany + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: in + integer(C_INT), dimension(*), intent(in) :: inembed + integer(C_INT), value :: istride + integer(C_INT), value :: idist + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), dimension(*), intent(in) :: onembed + integer(C_INT), value :: ostride + integer(C_INT), value :: odist + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwf_plan_many_dft + + type(C_PTR) function fftwf_plan_guru_dft(rank,dims,howmany_rank,howmany_dims,in,out,sign,flags) & + bind(C, name='fftwf_plan_guru_dft') + import + integer(C_INT), value :: rank + type(fftwf_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwf_iodim), dimension(*), intent(in) :: howmany_dims + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: in + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwf_plan_guru_dft + + type(C_PTR) function fftwf_plan_guru_split_dft(rank,dims,howmany_rank,howmany_dims,ri,ii,ro,io,flags) & + bind(C, name='fftwf_plan_guru_split_dft') + import + integer(C_INT), value :: rank + type(fftwf_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwf_iodim), dimension(*), intent(in) :: howmany_dims + real(C_FLOAT), dimension(*), intent(out) :: ri + real(C_FLOAT), dimension(*), intent(out) :: ii + real(C_FLOAT), dimension(*), intent(out) :: ro + real(C_FLOAT), dimension(*), intent(out) :: io + integer(C_INT), value :: flags + end function fftwf_plan_guru_split_dft + + type(C_PTR) function fftwf_plan_guru64_dft(rank,dims,howmany_rank,howmany_dims,in,out,sign,flags) & + bind(C, name='fftwf_plan_guru64_dft') + import + integer(C_INT), value :: rank + type(fftwf_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwf_iodim64), dimension(*), intent(in) :: howmany_dims + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: in + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwf_plan_guru64_dft + + type(C_PTR) function fftwf_plan_guru64_split_dft(rank,dims,howmany_rank,howmany_dims,ri,ii,ro,io,flags) & + bind(C, name='fftwf_plan_guru64_split_dft') + import + integer(C_INT), value :: rank + type(fftwf_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwf_iodim64), dimension(*), intent(in) :: howmany_dims + real(C_FLOAT), dimension(*), intent(out) :: ri + real(C_FLOAT), dimension(*), intent(out) :: ii + real(C_FLOAT), dimension(*), intent(out) :: ro + real(C_FLOAT), dimension(*), intent(out) :: io + integer(C_INT), value :: flags + end function fftwf_plan_guru64_split_dft + + subroutine fftwf_execute_dft(p,in,out) bind(C, name='fftwf_execute_dft') + import + type(C_PTR), value :: p + complex(C_FLOAT_COMPLEX), dimension(*), intent(inout) :: in + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: out + end subroutine fftwf_execute_dft + + subroutine fftwf_execute_split_dft(p,ri,ii,ro,io) bind(C, name='fftwf_execute_split_dft') + import + type(C_PTR), value :: p + real(C_FLOAT), dimension(*), intent(inout) :: ri + real(C_FLOAT), dimension(*), intent(inout) :: ii + real(C_FLOAT), dimension(*), intent(out) :: ro + real(C_FLOAT), dimension(*), intent(out) :: io + end subroutine fftwf_execute_split_dft + + type(C_PTR) function fftwf_plan_many_dft_r2c(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,flags) & + bind(C, name='fftwf_plan_many_dft_r2c') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + integer(C_INT), value :: howmany + real(C_FLOAT), dimension(*), intent(out) :: in + integer(C_INT), dimension(*), intent(in) :: inembed + integer(C_INT), value :: istride + integer(C_INT), value :: idist + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), dimension(*), intent(in) :: onembed + integer(C_INT), value :: ostride + integer(C_INT), value :: odist + integer(C_INT), value :: flags + end function fftwf_plan_many_dft_r2c + + type(C_PTR) function fftwf_plan_dft_r2c(rank,n,in,out,flags) bind(C, name='fftwf_plan_dft_r2c') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + real(C_FLOAT), dimension(*), intent(out) :: in + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwf_plan_dft_r2c + + type(C_PTR) function fftwf_plan_dft_r2c_1d(n,in,out,flags) bind(C, name='fftwf_plan_dft_r2c_1d') + import + integer(C_INT), value :: n + real(C_FLOAT), dimension(*), intent(out) :: in + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwf_plan_dft_r2c_1d + + type(C_PTR) function fftwf_plan_dft_r2c_2d(n0,n1,in,out,flags) bind(C, name='fftwf_plan_dft_r2c_2d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + real(C_FLOAT), dimension(*), intent(out) :: in + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwf_plan_dft_r2c_2d + + type(C_PTR) function fftwf_plan_dft_r2c_3d(n0,n1,n2,in,out,flags) bind(C, name='fftwf_plan_dft_r2c_3d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + integer(C_INT), value :: n2 + real(C_FLOAT), dimension(*), intent(out) :: in + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwf_plan_dft_r2c_3d + + type(C_PTR) function fftwf_plan_many_dft_c2r(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,flags) & + bind(C, name='fftwf_plan_many_dft_c2r') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + integer(C_INT), value :: howmany + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: in + integer(C_INT), dimension(*), intent(in) :: inembed + integer(C_INT), value :: istride + integer(C_INT), value :: idist + real(C_FLOAT), dimension(*), intent(out) :: out + integer(C_INT), dimension(*), intent(in) :: onembed + integer(C_INT), value :: ostride + integer(C_INT), value :: odist + integer(C_INT), value :: flags + end function fftwf_plan_many_dft_c2r + + type(C_PTR) function fftwf_plan_dft_c2r(rank,n,in,out,flags) bind(C, name='fftwf_plan_dft_c2r') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: in + real(C_FLOAT), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwf_plan_dft_c2r + + type(C_PTR) function fftwf_plan_dft_c2r_1d(n,in,out,flags) bind(C, name='fftwf_plan_dft_c2r_1d') + import + integer(C_INT), value :: n + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: in + real(C_FLOAT), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwf_plan_dft_c2r_1d + + type(C_PTR) function fftwf_plan_dft_c2r_2d(n0,n1,in,out,flags) bind(C, name='fftwf_plan_dft_c2r_2d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: in + real(C_FLOAT), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwf_plan_dft_c2r_2d + + type(C_PTR) function fftwf_plan_dft_c2r_3d(n0,n1,n2,in,out,flags) bind(C, name='fftwf_plan_dft_c2r_3d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + integer(C_INT), value :: n2 + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: in + real(C_FLOAT), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwf_plan_dft_c2r_3d + + type(C_PTR) function fftwf_plan_guru_dft_r2c(rank,dims,howmany_rank,howmany_dims,in,out,flags) & + bind(C, name='fftwf_plan_guru_dft_r2c') + import + integer(C_INT), value :: rank + type(fftwf_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwf_iodim), dimension(*), intent(in) :: howmany_dims + real(C_FLOAT), dimension(*), intent(out) :: in + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwf_plan_guru_dft_r2c + + type(C_PTR) function fftwf_plan_guru_dft_c2r(rank,dims,howmany_rank,howmany_dims,in,out,flags) & + bind(C, name='fftwf_plan_guru_dft_c2r') + import + integer(C_INT), value :: rank + type(fftwf_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwf_iodim), dimension(*), intent(in) :: howmany_dims + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: in + real(C_FLOAT), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwf_plan_guru_dft_c2r + + type(C_PTR) function fftwf_plan_guru_split_dft_r2c(rank,dims,howmany_rank,howmany_dims,in,ro,io,flags) & + bind(C, name='fftwf_plan_guru_split_dft_r2c') + import + integer(C_INT), value :: rank + type(fftwf_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwf_iodim), dimension(*), intent(in) :: howmany_dims + real(C_FLOAT), dimension(*), intent(out) :: in + real(C_FLOAT), dimension(*), intent(out) :: ro + real(C_FLOAT), dimension(*), intent(out) :: io + integer(C_INT), value :: flags + end function fftwf_plan_guru_split_dft_r2c + + type(C_PTR) function fftwf_plan_guru_split_dft_c2r(rank,dims,howmany_rank,howmany_dims,ri,ii,out,flags) & + bind(C, name='fftwf_plan_guru_split_dft_c2r') + import + integer(C_INT), value :: rank + type(fftwf_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwf_iodim), dimension(*), intent(in) :: howmany_dims + real(C_FLOAT), dimension(*), intent(out) :: ri + real(C_FLOAT), dimension(*), intent(out) :: ii + real(C_FLOAT), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwf_plan_guru_split_dft_c2r + + type(C_PTR) function fftwf_plan_guru64_dft_r2c(rank,dims,howmany_rank,howmany_dims,in,out,flags) & + bind(C, name='fftwf_plan_guru64_dft_r2c') + import + integer(C_INT), value :: rank + type(fftwf_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwf_iodim64), dimension(*), intent(in) :: howmany_dims + real(C_FLOAT), dimension(*), intent(out) :: in + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwf_plan_guru64_dft_r2c + + type(C_PTR) function fftwf_plan_guru64_dft_c2r(rank,dims,howmany_rank,howmany_dims,in,out,flags) & + bind(C, name='fftwf_plan_guru64_dft_c2r') + import + integer(C_INT), value :: rank + type(fftwf_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwf_iodim64), dimension(*), intent(in) :: howmany_dims + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: in + real(C_FLOAT), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwf_plan_guru64_dft_c2r + + type(C_PTR) function fftwf_plan_guru64_split_dft_r2c(rank,dims,howmany_rank,howmany_dims,in,ro,io,flags) & + bind(C, name='fftwf_plan_guru64_split_dft_r2c') + import + integer(C_INT), value :: rank + type(fftwf_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwf_iodim64), dimension(*), intent(in) :: howmany_dims + real(C_FLOAT), dimension(*), intent(out) :: in + real(C_FLOAT), dimension(*), intent(out) :: ro + real(C_FLOAT), dimension(*), intent(out) :: io + integer(C_INT), value :: flags + end function fftwf_plan_guru64_split_dft_r2c + + type(C_PTR) function fftwf_plan_guru64_split_dft_c2r(rank,dims,howmany_rank,howmany_dims,ri,ii,out,flags) & + bind(C, name='fftwf_plan_guru64_split_dft_c2r') + import + integer(C_INT), value :: rank + type(fftwf_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwf_iodim64), dimension(*), intent(in) :: howmany_dims + real(C_FLOAT), dimension(*), intent(out) :: ri + real(C_FLOAT), dimension(*), intent(out) :: ii + real(C_FLOAT), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwf_plan_guru64_split_dft_c2r + + subroutine fftwf_execute_dft_r2c(p,in,out) bind(C, name='fftwf_execute_dft_r2c') + import + type(C_PTR), value :: p + real(C_FLOAT), dimension(*), intent(inout) :: in + complex(C_FLOAT_COMPLEX), dimension(*), intent(out) :: out + end subroutine fftwf_execute_dft_r2c + + subroutine fftwf_execute_dft_c2r(p,in,out) bind(C, name='fftwf_execute_dft_c2r') + import + type(C_PTR), value :: p + complex(C_FLOAT_COMPLEX), dimension(*), intent(inout) :: in + real(C_FLOAT), dimension(*), intent(out) :: out + end subroutine fftwf_execute_dft_c2r + + subroutine fftwf_execute_split_dft_r2c(p,in,ro,io) bind(C, name='fftwf_execute_split_dft_r2c') + import + type(C_PTR), value :: p + real(C_FLOAT), dimension(*), intent(inout) :: in + real(C_FLOAT), dimension(*), intent(out) :: ro + real(C_FLOAT), dimension(*), intent(out) :: io + end subroutine fftwf_execute_split_dft_r2c + + subroutine fftwf_execute_split_dft_c2r(p,ri,ii,out) bind(C, name='fftwf_execute_split_dft_c2r') + import + type(C_PTR), value :: p + real(C_FLOAT), dimension(*), intent(inout) :: ri + real(C_FLOAT), dimension(*), intent(inout) :: ii + real(C_FLOAT), dimension(*), intent(out) :: out + end subroutine fftwf_execute_split_dft_c2r + + type(C_PTR) function fftwf_plan_many_r2r(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,kind,flags) & + bind(C, name='fftwf_plan_many_r2r') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + integer(C_INT), value :: howmany + real(C_FLOAT), dimension(*), intent(out) :: in + integer(C_INT), dimension(*), intent(in) :: inembed + integer(C_INT), value :: istride + integer(C_INT), value :: idist + real(C_FLOAT), dimension(*), intent(out) :: out + integer(C_INT), dimension(*), intent(in) :: onembed + integer(C_INT), value :: ostride + integer(C_INT), value :: odist + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftwf_plan_many_r2r + + type(C_PTR) function fftwf_plan_r2r(rank,n,in,out,kind,flags) bind(C, name='fftwf_plan_r2r') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + real(C_FLOAT), dimension(*), intent(out) :: in + real(C_FLOAT), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftwf_plan_r2r + + type(C_PTR) function fftwf_plan_r2r_1d(n,in,out,kind,flags) bind(C, name='fftwf_plan_r2r_1d') + import + integer(C_INT), value :: n + real(C_FLOAT), dimension(*), intent(out) :: in + real(C_FLOAT), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), value :: kind + integer(C_INT), value :: flags + end function fftwf_plan_r2r_1d + + type(C_PTR) function fftwf_plan_r2r_2d(n0,n1,in,out,kind0,kind1,flags) bind(C, name='fftwf_plan_r2r_2d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + real(C_FLOAT), dimension(*), intent(out) :: in + real(C_FLOAT), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), value :: kind0 + integer(C_FFTW_R2R_KIND), value :: kind1 + integer(C_INT), value :: flags + end function fftwf_plan_r2r_2d + + type(C_PTR) function fftwf_plan_r2r_3d(n0,n1,n2,in,out,kind0,kind1,kind2,flags) bind(C, name='fftwf_plan_r2r_3d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + integer(C_INT), value :: n2 + real(C_FLOAT), dimension(*), intent(out) :: in + real(C_FLOAT), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), value :: kind0 + integer(C_FFTW_R2R_KIND), value :: kind1 + integer(C_FFTW_R2R_KIND), value :: kind2 + integer(C_INT), value :: flags + end function fftwf_plan_r2r_3d + + type(C_PTR) function fftwf_plan_guru_r2r(rank,dims,howmany_rank,howmany_dims,in,out,kind,flags) & + bind(C, name='fftwf_plan_guru_r2r') + import + integer(C_INT), value :: rank + type(fftwf_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwf_iodim), dimension(*), intent(in) :: howmany_dims + real(C_FLOAT), dimension(*), intent(out) :: in + real(C_FLOAT), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftwf_plan_guru_r2r + + type(C_PTR) function fftwf_plan_guru64_r2r(rank,dims,howmany_rank,howmany_dims,in,out,kind,flags) & + bind(C, name='fftwf_plan_guru64_r2r') + import + integer(C_INT), value :: rank + type(fftwf_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwf_iodim64), dimension(*), intent(in) :: howmany_dims + real(C_FLOAT), dimension(*), intent(out) :: in + real(C_FLOAT), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftwf_plan_guru64_r2r + + subroutine fftwf_execute_r2r(p,in,out) bind(C, name='fftwf_execute_r2r') + import + type(C_PTR), value :: p + real(C_FLOAT), dimension(*), intent(inout) :: in + real(C_FLOAT), dimension(*), intent(out) :: out + end subroutine fftwf_execute_r2r + + subroutine fftwf_destroy_plan(p) bind(C, name='fftwf_destroy_plan') + import + type(C_PTR), value :: p + end subroutine fftwf_destroy_plan + + subroutine fftwf_forget_wisdom() bind(C, name='fftwf_forget_wisdom') + import + end subroutine fftwf_forget_wisdom + + subroutine fftwf_cleanup() bind(C, name='fftwf_cleanup') + import + end subroutine fftwf_cleanup + + subroutine fftwf_set_timelimit(t) bind(C, name='fftwf_set_timelimit') + import + real(C_DOUBLE), value :: t + end subroutine fftwf_set_timelimit + + subroutine fftwf_plan_with_nthreads(nthreads) bind(C, name='fftwf_plan_with_nthreads') + import + integer(C_INT), value :: nthreads + end subroutine fftwf_plan_with_nthreads + + integer(C_INT) function fftwf_init_threads() bind(C, name='fftwf_init_threads') + import + end function fftwf_init_threads + + subroutine fftwf_cleanup_threads() bind(C, name='fftwf_cleanup_threads') + import + end subroutine fftwf_cleanup_threads + + integer(C_INT) function fftwf_export_wisdom_to_filename(filename) bind(C, name='fftwf_export_wisdom_to_filename') + import + character(C_CHAR), dimension(*), intent(in) :: filename + end function fftwf_export_wisdom_to_filename + + subroutine fftwf_export_wisdom_to_file(output_file) bind(C, name='fftwf_export_wisdom_to_file') + import + type(C_PTR), value :: output_file + end subroutine fftwf_export_wisdom_to_file + + type(C_PTR) function fftwf_export_wisdom_to_string() bind(C, name='fftwf_export_wisdom_to_string') + import + end function fftwf_export_wisdom_to_string + + subroutine fftwf_export_wisdom(write_char,data) bind(C, name='fftwf_export_wisdom') + import + type(C_FUNPTR), value :: write_char + type(C_PTR), value :: data + end subroutine fftwf_export_wisdom + + integer(C_INT) function fftwf_import_system_wisdom() bind(C, name='fftwf_import_system_wisdom') + import + end function fftwf_import_system_wisdom + + integer(C_INT) function fftwf_import_wisdom_from_filename(filename) bind(C, name='fftwf_import_wisdom_from_filename') + import + character(C_CHAR), dimension(*), intent(in) :: filename + end function fftwf_import_wisdom_from_filename + + integer(C_INT) function fftwf_import_wisdom_from_file(input_file) bind(C, name='fftwf_import_wisdom_from_file') + import + type(C_PTR), value :: input_file + end function fftwf_import_wisdom_from_file + + integer(C_INT) function fftwf_import_wisdom_from_string(input_string) bind(C, name='fftwf_import_wisdom_from_string') + import + character(C_CHAR), dimension(*), intent(in) :: input_string + end function fftwf_import_wisdom_from_string + + integer(C_INT) function fftwf_import_wisdom(read_char,data) bind(C, name='fftwf_import_wisdom') + import + type(C_FUNPTR), value :: read_char + type(C_PTR), value :: data + end function fftwf_import_wisdom + + subroutine fftwf_fprint_plan(p,output_file) bind(C, name='fftwf_fprint_plan') + import + type(C_PTR), value :: p + type(C_PTR), value :: output_file + end subroutine fftwf_fprint_plan + + subroutine fftwf_print_plan(p) bind(C, name='fftwf_print_plan') + import + type(C_PTR), value :: p + end subroutine fftwf_print_plan + + type(C_PTR) function fftwf_sprint_plan(p) bind(C, name='fftwf_sprint_plan') + import + type(C_PTR), value :: p + end function fftwf_sprint_plan + + type(C_PTR) function fftwf_malloc(n) bind(C, name='fftwf_malloc') + import + integer(C_SIZE_T), value :: n + end function fftwf_malloc + + type(C_PTR) function fftwf_alloc_real(n) bind(C, name='fftwf_alloc_real') + import + integer(C_SIZE_T), value :: n + end function fftwf_alloc_real + + type(C_PTR) function fftwf_alloc_complex(n) bind(C, name='fftwf_alloc_complex') + import + integer(C_SIZE_T), value :: n + end function fftwf_alloc_complex + + subroutine fftwf_free(p) bind(C, name='fftwf_free') + import + type(C_PTR), value :: p + end subroutine fftwf_free + + subroutine fftwf_flops(p,add,mul,fmas) bind(C, name='fftwf_flops') + import + type(C_PTR), value :: p + real(C_DOUBLE), intent(out) :: add + real(C_DOUBLE), intent(out) :: mul + real(C_DOUBLE), intent(out) :: fmas + end subroutine fftwf_flops + + real(C_DOUBLE) function fftwf_estimate_cost(p) bind(C, name='fftwf_estimate_cost') + import + type(C_PTR), value :: p + end function fftwf_estimate_cost + + real(C_DOUBLE) function fftwf_cost(p) bind(C, name='fftwf_cost') + import + type(C_PTR), value :: p + end function fftwf_cost + + integer(C_INT) function fftwf_alignment_of(p) bind(C, name='fftwf_alignment_of') + import + real(C_FLOAT), dimension(*), intent(out) :: p + end function fftwf_alignment_of + + end interface diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/fftw3.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/fftw3.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,412 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * The following statement of license applies *only* to this header file, + * and *not* to the other files distributed with FFTW or derived therefrom: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/***************************** NOTE TO USERS ********************************* + * + * THIS IS A HEADER FILE, NOT A MANUAL + * + * If you want to know how to use FFTW, please read the manual, + * online at http://www.fftw.org/doc/ and also included with FFTW. + * For a quick start, see the manual's tutorial section. + * + * (Reading header files to learn how to use a library is a habit + * stemming from code lacking a proper manual. Arguably, it's a + * *bad* habit in most cases, because header files can contain + * interfaces that are not part of the public, stable API.) + * + ****************************************************************************/ + +#ifndef FFTW3_H +#define FFTW3_H + +#include + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +/* If is included, use the C99 complex type. Otherwise + define a type bit-compatible with C99 complex */ +#if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I) +# define FFTW_DEFINE_COMPLEX(R, C) typedef R _Complex C +#else +# define FFTW_DEFINE_COMPLEX(R, C) typedef R C[2] +#endif + +#define FFTW_CONCAT(prefix, name) prefix ## name +#define FFTW_MANGLE_DOUBLE(name) FFTW_CONCAT(fftw_, name) +#define FFTW_MANGLE_FLOAT(name) FFTW_CONCAT(fftwf_, name) +#define FFTW_MANGLE_LONG_DOUBLE(name) FFTW_CONCAT(fftwl_, name) +#define FFTW_MANGLE_QUAD(name) FFTW_CONCAT(fftwq_, name) + +/* IMPORTANT: for Windows compilers, you should add a line + #define FFTW_DLL + here and in kernel/ifftw.h if you are compiling/using FFTW as a + DLL, in order to do the proper importing/exporting, or + alternatively compile with -DFFTW_DLL or the equivalent + command-line flag. This is not necessary under MinGW/Cygwin, where + libtool does the imports/exports automatically. */ +#if defined(FFTW_DLL) && (defined(_WIN32) || defined(__WIN32__)) + /* annoying Windows syntax for shared-library declarations */ +# if defined(COMPILING_FFTW) /* defined in api.h when compiling FFTW */ +# define FFTW_EXTERN extern __declspec(dllexport) +# else /* user is calling FFTW; import symbol */ +# define FFTW_EXTERN extern __declspec(dllimport) +# endif +#else +# define FFTW_EXTERN extern +#endif + +enum fftw_r2r_kind_do_not_use_me { + FFTW_R2HC=0, FFTW_HC2R=1, FFTW_DHT=2, + FFTW_REDFT00=3, FFTW_REDFT01=4, FFTW_REDFT10=5, FFTW_REDFT11=6, + FFTW_RODFT00=7, FFTW_RODFT01=8, FFTW_RODFT10=9, FFTW_RODFT11=10 +}; + +struct fftw_iodim_do_not_use_me { + int n; /* dimension size */ + int is; /* input stride */ + int os; /* output stride */ +}; + +#include /* for ptrdiff_t */ +struct fftw_iodim64_do_not_use_me { + ptrdiff_t n; /* dimension size */ + ptrdiff_t is; /* input stride */ + ptrdiff_t os; /* output stride */ +}; + +typedef void (*fftw_write_char_func_do_not_use_me)(char c, void *); +typedef int (*fftw_read_char_func_do_not_use_me)(void *); + +/* + huge second-order macro that defines prototypes for all API + functions. We expand this macro for each supported precision + + X: name-mangling macro + R: real data type + C: complex data type +*/ + +#define FFTW_DEFINE_API(X, R, C) \ + \ +FFTW_DEFINE_COMPLEX(R, C); \ + \ +typedef struct X(plan_s) *X(plan); \ + \ +typedef struct fftw_iodim_do_not_use_me X(iodim); \ +typedef struct fftw_iodim64_do_not_use_me X(iodim64); \ + \ +typedef enum fftw_r2r_kind_do_not_use_me X(r2r_kind); \ + \ +typedef fftw_write_char_func_do_not_use_me X(write_char_func); \ +typedef fftw_read_char_func_do_not_use_me X(read_char_func); \ + \ +FFTW_EXTERN void X(execute)(const X(plan) p); \ + \ +FFTW_EXTERN X(plan) X(plan_dft)(int rank, const int *n, \ + C *in, C *out, int sign, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_dft_1d)(int n, C *in, C *out, int sign, \ + unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_dft_2d)(int n0, int n1, \ + C *in, C *out, int sign, unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_dft_3d)(int n0, int n1, int n2, \ + C *in, C *out, int sign, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_many_dft)(int rank, const int *n, \ + int howmany, \ + C *in, const int *inembed, \ + int istride, int idist, \ + C *out, const int *onembed, \ + int ostride, int odist, \ + int sign, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_guru_dft)(int rank, const X(iodim) *dims, \ + int howmany_rank, \ + const X(iodim) *howmany_dims, \ + C *in, C *out, \ + int sign, unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_guru_split_dft)(int rank, const X(iodim) *dims, \ + int howmany_rank, \ + const X(iodim) *howmany_dims, \ + R *ri, R *ii, R *ro, R *io, \ + unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_guru64_dft)(int rank, \ + const X(iodim64) *dims, \ + int howmany_rank, \ + const X(iodim64) *howmany_dims, \ + C *in, C *out, \ + int sign, unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_guru64_split_dft)(int rank, \ + const X(iodim64) *dims, \ + int howmany_rank, \ + const X(iodim64) *howmany_dims, \ + R *ri, R *ii, R *ro, R *io, \ + unsigned flags); \ + \ +FFTW_EXTERN void X(execute_dft)(const X(plan) p, C *in, C *out); \ +FFTW_EXTERN void X(execute_split_dft)(const X(plan) p, R *ri, R *ii, \ + R *ro, R *io); \ + \ +FFTW_EXTERN X(plan) X(plan_many_dft_r2c)(int rank, const int *n, \ + int howmany, \ + R *in, const int *inembed, \ + int istride, int idist, \ + C *out, const int *onembed, \ + int ostride, int odist, \ + unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_dft_r2c)(int rank, const int *n, \ + R *in, C *out, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_dft_r2c_1d)(int n,R *in,C *out,unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_dft_r2c_2d)(int n0, int n1, \ + R *in, C *out, unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_dft_r2c_3d)(int n0, int n1, \ + int n2, \ + R *in, C *out, unsigned flags); \ + \ + \ +FFTW_EXTERN X(plan) X(plan_many_dft_c2r)(int rank, const int *n, \ + int howmany, \ + C *in, const int *inembed, \ + int istride, int idist, \ + R *out, const int *onembed, \ + int ostride, int odist, \ + unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_dft_c2r)(int rank, const int *n, \ + C *in, R *out, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_dft_c2r_1d)(int n,C *in,R *out,unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_dft_c2r_2d)(int n0, int n1, \ + C *in, R *out, unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_dft_c2r_3d)(int n0, int n1, \ + int n2, \ + C *in, R *out, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_guru_dft_r2c)(int rank, const X(iodim) *dims, \ + int howmany_rank, \ + const X(iodim) *howmany_dims, \ + R *in, C *out, \ + unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_guru_dft_c2r)(int rank, const X(iodim) *dims, \ + int howmany_rank, \ + const X(iodim) *howmany_dims, \ + C *in, R *out, \ + unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_guru_split_dft_r2c)( \ + int rank, const X(iodim) *dims, \ + int howmany_rank, \ + const X(iodim) *howmany_dims, \ + R *in, R *ro, R *io, \ + unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_guru_split_dft_c2r)( \ + int rank, const X(iodim) *dims, \ + int howmany_rank, \ + const X(iodim) *howmany_dims, \ + R *ri, R *ii, R *out, \ + unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_guru64_dft_r2c)(int rank, \ + const X(iodim64) *dims, \ + int howmany_rank, \ + const X(iodim64) *howmany_dims, \ + R *in, C *out, \ + unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_guru64_dft_c2r)(int rank, \ + const X(iodim64) *dims, \ + int howmany_rank, \ + const X(iodim64) *howmany_dims, \ + C *in, R *out, \ + unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_guru64_split_dft_r2c)( \ + int rank, const X(iodim64) *dims, \ + int howmany_rank, \ + const X(iodim64) *howmany_dims, \ + R *in, R *ro, R *io, \ + unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_guru64_split_dft_c2r)( \ + int rank, const X(iodim64) *dims, \ + int howmany_rank, \ + const X(iodim64) *howmany_dims, \ + R *ri, R *ii, R *out, \ + unsigned flags); \ + \ +FFTW_EXTERN void X(execute_dft_r2c)(const X(plan) p, R *in, C *out); \ +FFTW_EXTERN void X(execute_dft_c2r)(const X(plan) p, C *in, R *out); \ + \ +FFTW_EXTERN void X(execute_split_dft_r2c)(const X(plan) p, \ + R *in, R *ro, R *io); \ +FFTW_EXTERN void X(execute_split_dft_c2r)(const X(plan) p, \ + R *ri, R *ii, R *out); \ + \ +FFTW_EXTERN X(plan) X(plan_many_r2r)(int rank, const int *n, \ + int howmany, \ + R *in, const int *inembed, \ + int istride, int idist, \ + R *out, const int *onembed, \ + int ostride, int odist, \ + const X(r2r_kind) *kind, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_r2r)(int rank, const int *n, R *in, R *out, \ + const X(r2r_kind) *kind, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_r2r_1d)(int n, R *in, R *out, \ + X(r2r_kind) kind, unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_r2r_2d)(int n0, int n1, R *in, R *out, \ + X(r2r_kind) kind0, X(r2r_kind) kind1, \ + unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_r2r_3d)(int n0, int n1, int n2, \ + R *in, R *out, X(r2r_kind) kind0, \ + X(r2r_kind) kind1, X(r2r_kind) kind2, \ + unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_guru_r2r)(int rank, const X(iodim) *dims, \ + int howmany_rank, \ + const X(iodim) *howmany_dims, \ + R *in, R *out, \ + const X(r2r_kind) *kind, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_guru64_r2r)(int rank, const X(iodim64) *dims, \ + int howmany_rank, \ + const X(iodim64) *howmany_dims, \ + R *in, R *out, \ + const X(r2r_kind) *kind, unsigned flags); \ + \ +FFTW_EXTERN void X(execute_r2r)(const X(plan) p, R *in, R *out); \ + \ +FFTW_EXTERN void X(destroy_plan)(X(plan) p); \ +FFTW_EXTERN void X(forget_wisdom)(void); \ +FFTW_EXTERN void X(cleanup)(void); \ + \ +FFTW_EXTERN void X(set_timelimit)(double t); \ + \ +FFTW_EXTERN void X(plan_with_nthreads)(int nthreads); \ +FFTW_EXTERN int X(init_threads)(void); \ +FFTW_EXTERN void X(cleanup_threads)(void); \ + \ +FFTW_EXTERN int X(export_wisdom_to_filename)(const char *filename); \ +FFTW_EXTERN void X(export_wisdom_to_file)(FILE *output_file); \ +FFTW_EXTERN char *X(export_wisdom_to_string)(void); \ +FFTW_EXTERN void X(export_wisdom)(X(write_char_func) write_char, \ + void *data); \ +FFTW_EXTERN int X(import_system_wisdom)(void); \ +FFTW_EXTERN int X(import_wisdom_from_filename)(const char *filename); \ +FFTW_EXTERN int X(import_wisdom_from_file)(FILE *input_file); \ +FFTW_EXTERN int X(import_wisdom_from_string)(const char *input_string); \ +FFTW_EXTERN int X(import_wisdom)(X(read_char_func) read_char, void *data); \ + \ +FFTW_EXTERN void X(fprint_plan)(const X(plan) p, FILE *output_file); \ +FFTW_EXTERN void X(print_plan)(const X(plan) p); \ +FFTW_EXTERN char *X(sprint_plan)(const X(plan) p); \ + \ +FFTW_EXTERN void *X(malloc)(size_t n); \ +FFTW_EXTERN R *X(alloc_real)(size_t n); \ +FFTW_EXTERN C *X(alloc_complex)(size_t n); \ +FFTW_EXTERN void X(free)(void *p); \ + \ +FFTW_EXTERN void X(flops)(const X(plan) p, \ + double *add, double *mul, double *fmas); \ +FFTW_EXTERN double X(estimate_cost)(const X(plan) p); \ +FFTW_EXTERN double X(cost)(const X(plan) p); \ + \ +FFTW_EXTERN int X(alignment_of)(R *p); \ +FFTW_EXTERN const char X(version)[]; \ +FFTW_EXTERN const char X(cc)[]; \ +FFTW_EXTERN const char X(codelet_optim)[]; + + +/* end of FFTW_DEFINE_API macro */ + +FFTW_DEFINE_API(FFTW_MANGLE_DOUBLE, double, fftw_complex) +FFTW_DEFINE_API(FFTW_MANGLE_FLOAT, float, fftwf_complex) +FFTW_DEFINE_API(FFTW_MANGLE_LONG_DOUBLE, long double, fftwl_complex) + +/* __float128 (quad precision) is a gcc extension on i386, x86_64, and ia64 + for gcc >= 4.6 (compiled in FFTW with --enable-quad-precision) */ +#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) \ + && !(defined(__ICC) || defined(__INTEL_COMPILER)) \ + && (defined(__i386__) || defined(__x86_64__) || defined(__ia64__)) +# if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I) +/* note: __float128 is a typedef, which is not supported with the _Complex + keyword in gcc, so instead we use this ugly __attribute__ version. + However, we can't simply pass the __attribute__ version to + FFTW_DEFINE_API because the __attribute__ confuses gcc in pointer + types. Hence redefining FFTW_DEFINE_COMPLEX. Ugh. */ +# undef FFTW_DEFINE_COMPLEX +# define FFTW_DEFINE_COMPLEX(R, C) typedef _Complex float __attribute__((mode(TC))) C +# endif +FFTW_DEFINE_API(FFTW_MANGLE_QUAD, __float128, fftwq_complex) +#endif + +#define FFTW_FORWARD (-1) +#define FFTW_BACKWARD (+1) + +#define FFTW_NO_TIMELIMIT (-1.0) + +/* documented flags */ +#define FFTW_MEASURE (0U) +#define FFTW_DESTROY_INPUT (1U << 0) +#define FFTW_UNALIGNED (1U << 1) +#define FFTW_CONSERVE_MEMORY (1U << 2) +#define FFTW_EXHAUSTIVE (1U << 3) /* NO_EXHAUSTIVE is default */ +#define FFTW_PRESERVE_INPUT (1U << 4) /* cancels FFTW_DESTROY_INPUT */ +#define FFTW_PATIENT (1U << 5) /* IMPATIENT is default */ +#define FFTW_ESTIMATE (1U << 6) +#define FFTW_WISDOM_ONLY (1U << 21) + +/* undocumented beyond-guru flags */ +#define FFTW_ESTIMATE_PATIENT (1U << 7) +#define FFTW_BELIEVE_PCOST (1U << 8) +#define FFTW_NO_DFT_R2HC (1U << 9) +#define FFTW_NO_NONTHREADED (1U << 10) +#define FFTW_NO_BUFFERING (1U << 11) +#define FFTW_NO_INDIRECT_OP (1U << 12) +#define FFTW_ALLOW_LARGE_GENERIC (1U << 13) /* NO_LARGE_GENERIC is default */ +#define FFTW_NO_RANK_SPLITS (1U << 14) +#define FFTW_NO_VRANK_SPLITS (1U << 15) +#define FFTW_NO_VRECURSE (1U << 16) +#define FFTW_NO_SIMD (1U << 17) +#define FFTW_NO_SLOW (1U << 18) +#define FFTW_NO_FIXED_RADIX_LARGE_N (1U << 19) +#define FFTW_ALLOW_PRUNING (1U << 20) + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* FFTW3_H */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/fftw3l.f03 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/fftw3l.f03 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,605 @@ +! Generated automatically. DO NOT EDIT! + + + type, bind(C) :: fftwl_iodim + integer(C_INT) n, is, os + end type fftwl_iodim + type, bind(C) :: fftwl_iodim64 + integer(C_INTPTR_T) n, is, os + end type fftwl_iodim64 + + interface + type(C_PTR) function fftwl_plan_dft(rank,n,in,out,sign,flags) bind(C, name='fftwl_plan_dft') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwl_plan_dft + + type(C_PTR) function fftwl_plan_dft_1d(n,in,out,sign,flags) bind(C, name='fftwl_plan_dft_1d') + import + integer(C_INT), value :: n + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwl_plan_dft_1d + + type(C_PTR) function fftwl_plan_dft_2d(n0,n1,in,out,sign,flags) bind(C, name='fftwl_plan_dft_2d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwl_plan_dft_2d + + type(C_PTR) function fftwl_plan_dft_3d(n0,n1,n2,in,out,sign,flags) bind(C, name='fftwl_plan_dft_3d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + integer(C_INT), value :: n2 + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwl_plan_dft_3d + + type(C_PTR) function fftwl_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags) & + bind(C, name='fftwl_plan_many_dft') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + integer(C_INT), value :: howmany + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + integer(C_INT), dimension(*), intent(in) :: inembed + integer(C_INT), value :: istride + integer(C_INT), value :: idist + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), dimension(*), intent(in) :: onembed + integer(C_INT), value :: ostride + integer(C_INT), value :: odist + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwl_plan_many_dft + + type(C_PTR) function fftwl_plan_guru_dft(rank,dims,howmany_rank,howmany_dims,in,out,sign,flags) & + bind(C, name='fftwl_plan_guru_dft') + import + integer(C_INT), value :: rank + type(fftwl_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwl_iodim), dimension(*), intent(in) :: howmany_dims + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwl_plan_guru_dft + + type(C_PTR) function fftwl_plan_guru_split_dft(rank,dims,howmany_rank,howmany_dims,ri,ii,ro,io,flags) & + bind(C, name='fftwl_plan_guru_split_dft') + import + integer(C_INT), value :: rank + type(fftwl_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwl_iodim), dimension(*), intent(in) :: howmany_dims + real(C_LONG_DOUBLE), dimension(*), intent(out) :: ri + real(C_LONG_DOUBLE), dimension(*), intent(out) :: ii + real(C_LONG_DOUBLE), dimension(*), intent(out) :: ro + real(C_LONG_DOUBLE), dimension(*), intent(out) :: io + integer(C_INT), value :: flags + end function fftwl_plan_guru_split_dft + + type(C_PTR) function fftwl_plan_guru64_dft(rank,dims,howmany_rank,howmany_dims,in,out,sign,flags) & + bind(C, name='fftwl_plan_guru64_dft') + import + integer(C_INT), value :: rank + type(fftwl_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwl_iodim64), dimension(*), intent(in) :: howmany_dims + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwl_plan_guru64_dft + + type(C_PTR) function fftwl_plan_guru64_split_dft(rank,dims,howmany_rank,howmany_dims,ri,ii,ro,io,flags) & + bind(C, name='fftwl_plan_guru64_split_dft') + import + integer(C_INT), value :: rank + type(fftwl_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwl_iodim64), dimension(*), intent(in) :: howmany_dims + real(C_LONG_DOUBLE), dimension(*), intent(out) :: ri + real(C_LONG_DOUBLE), dimension(*), intent(out) :: ii + real(C_LONG_DOUBLE), dimension(*), intent(out) :: ro + real(C_LONG_DOUBLE), dimension(*), intent(out) :: io + integer(C_INT), value :: flags + end function fftwl_plan_guru64_split_dft + + subroutine fftwl_execute_dft(p,in,out) bind(C, name='fftwl_execute_dft') + import + type(C_PTR), value :: p + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(inout) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + end subroutine fftwl_execute_dft + + subroutine fftwl_execute_split_dft(p,ri,ii,ro,io) bind(C, name='fftwl_execute_split_dft') + import + type(C_PTR), value :: p + real(C_LONG_DOUBLE), dimension(*), intent(inout) :: ri + real(C_LONG_DOUBLE), dimension(*), intent(inout) :: ii + real(C_LONG_DOUBLE), dimension(*), intent(out) :: ro + real(C_LONG_DOUBLE), dimension(*), intent(out) :: io + end subroutine fftwl_execute_split_dft + + type(C_PTR) function fftwl_plan_many_dft_r2c(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,flags) & + bind(C, name='fftwl_plan_many_dft_r2c') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + integer(C_INT), value :: howmany + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + integer(C_INT), dimension(*), intent(in) :: inembed + integer(C_INT), value :: istride + integer(C_INT), value :: idist + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), dimension(*), intent(in) :: onembed + integer(C_INT), value :: ostride + integer(C_INT), value :: odist + integer(C_INT), value :: flags + end function fftwl_plan_many_dft_r2c + + type(C_PTR) function fftwl_plan_dft_r2c(rank,n,in,out,flags) bind(C, name='fftwl_plan_dft_r2c') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwl_plan_dft_r2c + + type(C_PTR) function fftwl_plan_dft_r2c_1d(n,in,out,flags) bind(C, name='fftwl_plan_dft_r2c_1d') + import + integer(C_INT), value :: n + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwl_plan_dft_r2c_1d + + type(C_PTR) function fftwl_plan_dft_r2c_2d(n0,n1,in,out,flags) bind(C, name='fftwl_plan_dft_r2c_2d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwl_plan_dft_r2c_2d + + type(C_PTR) function fftwl_plan_dft_r2c_3d(n0,n1,n2,in,out,flags) bind(C, name='fftwl_plan_dft_r2c_3d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + integer(C_INT), value :: n2 + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwl_plan_dft_r2c_3d + + type(C_PTR) function fftwl_plan_many_dft_c2r(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,flags) & + bind(C, name='fftwl_plan_many_dft_c2r') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + integer(C_INT), value :: howmany + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + integer(C_INT), dimension(*), intent(in) :: inembed + integer(C_INT), value :: istride + integer(C_INT), value :: idist + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), dimension(*), intent(in) :: onembed + integer(C_INT), value :: ostride + integer(C_INT), value :: odist + integer(C_INT), value :: flags + end function fftwl_plan_many_dft_c2r + + type(C_PTR) function fftwl_plan_dft_c2r(rank,n,in,out,flags) bind(C, name='fftwl_plan_dft_c2r') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwl_plan_dft_c2r + + type(C_PTR) function fftwl_plan_dft_c2r_1d(n,in,out,flags) bind(C, name='fftwl_plan_dft_c2r_1d') + import + integer(C_INT), value :: n + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwl_plan_dft_c2r_1d + + type(C_PTR) function fftwl_plan_dft_c2r_2d(n0,n1,in,out,flags) bind(C, name='fftwl_plan_dft_c2r_2d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwl_plan_dft_c2r_2d + + type(C_PTR) function fftwl_plan_dft_c2r_3d(n0,n1,n2,in,out,flags) bind(C, name='fftwl_plan_dft_c2r_3d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + integer(C_INT), value :: n2 + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwl_plan_dft_c2r_3d + + type(C_PTR) function fftwl_plan_guru_dft_r2c(rank,dims,howmany_rank,howmany_dims,in,out,flags) & + bind(C, name='fftwl_plan_guru_dft_r2c') + import + integer(C_INT), value :: rank + type(fftwl_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwl_iodim), dimension(*), intent(in) :: howmany_dims + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwl_plan_guru_dft_r2c + + type(C_PTR) function fftwl_plan_guru_dft_c2r(rank,dims,howmany_rank,howmany_dims,in,out,flags) & + bind(C, name='fftwl_plan_guru_dft_c2r') + import + integer(C_INT), value :: rank + type(fftwl_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwl_iodim), dimension(*), intent(in) :: howmany_dims + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwl_plan_guru_dft_c2r + + type(C_PTR) function fftwl_plan_guru_split_dft_r2c(rank,dims,howmany_rank,howmany_dims,in,ro,io,flags) & + bind(C, name='fftwl_plan_guru_split_dft_r2c') + import + integer(C_INT), value :: rank + type(fftwl_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwl_iodim), dimension(*), intent(in) :: howmany_dims + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: ro + real(C_LONG_DOUBLE), dimension(*), intent(out) :: io + integer(C_INT), value :: flags + end function fftwl_plan_guru_split_dft_r2c + + type(C_PTR) function fftwl_plan_guru_split_dft_c2r(rank,dims,howmany_rank,howmany_dims,ri,ii,out,flags) & + bind(C, name='fftwl_plan_guru_split_dft_c2r') + import + integer(C_INT), value :: rank + type(fftwl_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwl_iodim), dimension(*), intent(in) :: howmany_dims + real(C_LONG_DOUBLE), dimension(*), intent(out) :: ri + real(C_LONG_DOUBLE), dimension(*), intent(out) :: ii + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwl_plan_guru_split_dft_c2r + + type(C_PTR) function fftwl_plan_guru64_dft_r2c(rank,dims,howmany_rank,howmany_dims,in,out,flags) & + bind(C, name='fftwl_plan_guru64_dft_r2c') + import + integer(C_INT), value :: rank + type(fftwl_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwl_iodim64), dimension(*), intent(in) :: howmany_dims + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwl_plan_guru64_dft_r2c + + type(C_PTR) function fftwl_plan_guru64_dft_c2r(rank,dims,howmany_rank,howmany_dims,in,out,flags) & + bind(C, name='fftwl_plan_guru64_dft_c2r') + import + integer(C_INT), value :: rank + type(fftwl_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwl_iodim64), dimension(*), intent(in) :: howmany_dims + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwl_plan_guru64_dft_c2r + + type(C_PTR) function fftwl_plan_guru64_split_dft_r2c(rank,dims,howmany_rank,howmany_dims,in,ro,io,flags) & + bind(C, name='fftwl_plan_guru64_split_dft_r2c') + import + integer(C_INT), value :: rank + type(fftwl_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwl_iodim64), dimension(*), intent(in) :: howmany_dims + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: ro + real(C_LONG_DOUBLE), dimension(*), intent(out) :: io + integer(C_INT), value :: flags + end function fftwl_plan_guru64_split_dft_r2c + + type(C_PTR) function fftwl_plan_guru64_split_dft_c2r(rank,dims,howmany_rank,howmany_dims,ri,ii,out,flags) & + bind(C, name='fftwl_plan_guru64_split_dft_c2r') + import + integer(C_INT), value :: rank + type(fftwl_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwl_iodim64), dimension(*), intent(in) :: howmany_dims + real(C_LONG_DOUBLE), dimension(*), intent(out) :: ri + real(C_LONG_DOUBLE), dimension(*), intent(out) :: ii + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwl_plan_guru64_split_dft_c2r + + subroutine fftwl_execute_dft_r2c(p,in,out) bind(C, name='fftwl_execute_dft_r2c') + import + type(C_PTR), value :: p + real(C_LONG_DOUBLE), dimension(*), intent(inout) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + end subroutine fftwl_execute_dft_r2c + + subroutine fftwl_execute_dft_c2r(p,in,out) bind(C, name='fftwl_execute_dft_c2r') + import + type(C_PTR), value :: p + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(inout) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + end subroutine fftwl_execute_dft_c2r + + subroutine fftwl_execute_split_dft_r2c(p,in,ro,io) bind(C, name='fftwl_execute_split_dft_r2c') + import + type(C_PTR), value :: p + real(C_LONG_DOUBLE), dimension(*), intent(inout) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: ro + real(C_LONG_DOUBLE), dimension(*), intent(out) :: io + end subroutine fftwl_execute_split_dft_r2c + + subroutine fftwl_execute_split_dft_c2r(p,ri,ii,out) bind(C, name='fftwl_execute_split_dft_c2r') + import + type(C_PTR), value :: p + real(C_LONG_DOUBLE), dimension(*), intent(inout) :: ri + real(C_LONG_DOUBLE), dimension(*), intent(inout) :: ii + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + end subroutine fftwl_execute_split_dft_c2r + + type(C_PTR) function fftwl_plan_many_r2r(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,kind,flags) & + bind(C, name='fftwl_plan_many_r2r') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + integer(C_INT), value :: howmany + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + integer(C_INT), dimension(*), intent(in) :: inembed + integer(C_INT), value :: istride + integer(C_INT), value :: idist + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_INT), dimension(*), intent(in) :: onembed + integer(C_INT), value :: ostride + integer(C_INT), value :: odist + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftwl_plan_many_r2r + + type(C_PTR) function fftwl_plan_r2r(rank,n,in,out,kind,flags) bind(C, name='fftwl_plan_r2r') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftwl_plan_r2r + + type(C_PTR) function fftwl_plan_r2r_1d(n,in,out,kind,flags) bind(C, name='fftwl_plan_r2r_1d') + import + integer(C_INT), value :: n + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), value :: kind + integer(C_INT), value :: flags + end function fftwl_plan_r2r_1d + + type(C_PTR) function fftwl_plan_r2r_2d(n0,n1,in,out,kind0,kind1,flags) bind(C, name='fftwl_plan_r2r_2d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), value :: kind0 + integer(C_FFTW_R2R_KIND), value :: kind1 + integer(C_INT), value :: flags + end function fftwl_plan_r2r_2d + + type(C_PTR) function fftwl_plan_r2r_3d(n0,n1,n2,in,out,kind0,kind1,kind2,flags) bind(C, name='fftwl_plan_r2r_3d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + integer(C_INT), value :: n2 + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), value :: kind0 + integer(C_FFTW_R2R_KIND), value :: kind1 + integer(C_FFTW_R2R_KIND), value :: kind2 + integer(C_INT), value :: flags + end function fftwl_plan_r2r_3d + + type(C_PTR) function fftwl_plan_guru_r2r(rank,dims,howmany_rank,howmany_dims,in,out,kind,flags) & + bind(C, name='fftwl_plan_guru_r2r') + import + integer(C_INT), value :: rank + type(fftwl_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwl_iodim), dimension(*), intent(in) :: howmany_dims + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftwl_plan_guru_r2r + + type(C_PTR) function fftwl_plan_guru64_r2r(rank,dims,howmany_rank,howmany_dims,in,out,kind,flags) & + bind(C, name='fftwl_plan_guru64_r2r') + import + integer(C_INT), value :: rank + type(fftwl_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwl_iodim64), dimension(*), intent(in) :: howmany_dims + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftwl_plan_guru64_r2r + + subroutine fftwl_execute_r2r(p,in,out) bind(C, name='fftwl_execute_r2r') + import + type(C_PTR), value :: p + real(C_LONG_DOUBLE), dimension(*), intent(inout) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + end subroutine fftwl_execute_r2r + + subroutine fftwl_destroy_plan(p) bind(C, name='fftwl_destroy_plan') + import + type(C_PTR), value :: p + end subroutine fftwl_destroy_plan + + subroutine fftwl_forget_wisdom() bind(C, name='fftwl_forget_wisdom') + import + end subroutine fftwl_forget_wisdom + + subroutine fftwl_cleanup() bind(C, name='fftwl_cleanup') + import + end subroutine fftwl_cleanup + + subroutine fftwl_set_timelimit(t) bind(C, name='fftwl_set_timelimit') + import + real(C_DOUBLE), value :: t + end subroutine fftwl_set_timelimit + + subroutine fftwl_plan_with_nthreads(nthreads) bind(C, name='fftwl_plan_with_nthreads') + import + integer(C_INT), value :: nthreads + end subroutine fftwl_plan_with_nthreads + + integer(C_INT) function fftwl_init_threads() bind(C, name='fftwl_init_threads') + import + end function fftwl_init_threads + + subroutine fftwl_cleanup_threads() bind(C, name='fftwl_cleanup_threads') + import + end subroutine fftwl_cleanup_threads + + integer(C_INT) function fftwl_export_wisdom_to_filename(filename) bind(C, name='fftwl_export_wisdom_to_filename') + import + character(C_CHAR), dimension(*), intent(in) :: filename + end function fftwl_export_wisdom_to_filename + + subroutine fftwl_export_wisdom_to_file(output_file) bind(C, name='fftwl_export_wisdom_to_file') + import + type(C_PTR), value :: output_file + end subroutine fftwl_export_wisdom_to_file + + type(C_PTR) function fftwl_export_wisdom_to_string() bind(C, name='fftwl_export_wisdom_to_string') + import + end function fftwl_export_wisdom_to_string + + subroutine fftwl_export_wisdom(write_char,data) bind(C, name='fftwl_export_wisdom') + import + type(C_FUNPTR), value :: write_char + type(C_PTR), value :: data + end subroutine fftwl_export_wisdom + + integer(C_INT) function fftwl_import_system_wisdom() bind(C, name='fftwl_import_system_wisdom') + import + end function fftwl_import_system_wisdom + + integer(C_INT) function fftwl_import_wisdom_from_filename(filename) bind(C, name='fftwl_import_wisdom_from_filename') + import + character(C_CHAR), dimension(*), intent(in) :: filename + end function fftwl_import_wisdom_from_filename + + integer(C_INT) function fftwl_import_wisdom_from_file(input_file) bind(C, name='fftwl_import_wisdom_from_file') + import + type(C_PTR), value :: input_file + end function fftwl_import_wisdom_from_file + + integer(C_INT) function fftwl_import_wisdom_from_string(input_string) bind(C, name='fftwl_import_wisdom_from_string') + import + character(C_CHAR), dimension(*), intent(in) :: input_string + end function fftwl_import_wisdom_from_string + + integer(C_INT) function fftwl_import_wisdom(read_char,data) bind(C, name='fftwl_import_wisdom') + import + type(C_FUNPTR), value :: read_char + type(C_PTR), value :: data + end function fftwl_import_wisdom + + subroutine fftwl_fprint_plan(p,output_file) bind(C, name='fftwl_fprint_plan') + import + type(C_PTR), value :: p + type(C_PTR), value :: output_file + end subroutine fftwl_fprint_plan + + subroutine fftwl_print_plan(p) bind(C, name='fftwl_print_plan') + import + type(C_PTR), value :: p + end subroutine fftwl_print_plan + + type(C_PTR) function fftwl_sprint_plan(p) bind(C, name='fftwl_sprint_plan') + import + type(C_PTR), value :: p + end function fftwl_sprint_plan + + type(C_PTR) function fftwl_malloc(n) bind(C, name='fftwl_malloc') + import + integer(C_SIZE_T), value :: n + end function fftwl_malloc + + type(C_PTR) function fftwl_alloc_real(n) bind(C, name='fftwl_alloc_real') + import + integer(C_SIZE_T), value :: n + end function fftwl_alloc_real + + type(C_PTR) function fftwl_alloc_complex(n) bind(C, name='fftwl_alloc_complex') + import + integer(C_SIZE_T), value :: n + end function fftwl_alloc_complex + + subroutine fftwl_free(p) bind(C, name='fftwl_free') + import + type(C_PTR), value :: p + end subroutine fftwl_free + + subroutine fftwl_flops(p,add,mul,fmas) bind(C, name='fftwl_flops') + import + type(C_PTR), value :: p + real(C_DOUBLE), intent(out) :: add + real(C_DOUBLE), intent(out) :: mul + real(C_DOUBLE), intent(out) :: fmas + end subroutine fftwl_flops + + real(C_DOUBLE) function fftwl_estimate_cost(p) bind(C, name='fftwl_estimate_cost') + import + type(C_PTR), value :: p + end function fftwl_estimate_cost + + real(C_DOUBLE) function fftwl_cost(p) bind(C, name='fftwl_cost') + import + type(C_PTR), value :: p + end function fftwl_cost + + integer(C_INT) function fftwl_alignment_of(p) bind(C, name='fftwl_alignment_of') + import + real(C_LONG_DOUBLE), dimension(*), intent(out) :: p + end function fftwl_alignment_of + + end interface diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/fftw3q.f03 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/fftw3q.f03 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,601 @@ +! Generated automatically. DO NOT EDIT! + + + type, bind(C) :: fftwq_iodim + integer(C_INT) n, is, os + end type fftwq_iodim + type, bind(C) :: fftwq_iodim64 + integer(C_INTPTR_T) n, is, os + end type fftwq_iodim64 + + interface + type(C_PTR) function fftwq_plan_dft(rank,n,in,out,sign,flags) bind(C, name='fftwq_plan_dft') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + complex(16), dimension(*), intent(out) :: in + complex(16), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwq_plan_dft + + type(C_PTR) function fftwq_plan_dft_1d(n,in,out,sign,flags) bind(C, name='fftwq_plan_dft_1d') + import + integer(C_INT), value :: n + complex(16), dimension(*), intent(out) :: in + complex(16), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwq_plan_dft_1d + + type(C_PTR) function fftwq_plan_dft_2d(n0,n1,in,out,sign,flags) bind(C, name='fftwq_plan_dft_2d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + complex(16), dimension(*), intent(out) :: in + complex(16), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwq_plan_dft_2d + + type(C_PTR) function fftwq_plan_dft_3d(n0,n1,n2,in,out,sign,flags) bind(C, name='fftwq_plan_dft_3d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + integer(C_INT), value :: n2 + complex(16), dimension(*), intent(out) :: in + complex(16), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwq_plan_dft_3d + + type(C_PTR) function fftwq_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags) & + bind(C, name='fftwq_plan_many_dft') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + integer(C_INT), value :: howmany + complex(16), dimension(*), intent(out) :: in + integer(C_INT), dimension(*), intent(in) :: inembed + integer(C_INT), value :: istride + integer(C_INT), value :: idist + complex(16), dimension(*), intent(out) :: out + integer(C_INT), dimension(*), intent(in) :: onembed + integer(C_INT), value :: ostride + integer(C_INT), value :: odist + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwq_plan_many_dft + + type(C_PTR) function fftwq_plan_guru_dft(rank,dims,howmany_rank,howmany_dims,in,out,sign,flags) & + bind(C, name='fftwq_plan_guru_dft') + import + integer(C_INT), value :: rank + type(fftwq_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwq_iodim), dimension(*), intent(in) :: howmany_dims + complex(16), dimension(*), intent(out) :: in + complex(16), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwq_plan_guru_dft + + type(C_PTR) function fftwq_plan_guru_split_dft(rank,dims,howmany_rank,howmany_dims,ri,ii,ro,io,flags) & + bind(C, name='fftwq_plan_guru_split_dft') + import + integer(C_INT), value :: rank + type(fftwq_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwq_iodim), dimension(*), intent(in) :: howmany_dims + real(16), dimension(*), intent(out) :: ri + real(16), dimension(*), intent(out) :: ii + real(16), dimension(*), intent(out) :: ro + real(16), dimension(*), intent(out) :: io + integer(C_INT), value :: flags + end function fftwq_plan_guru_split_dft + + type(C_PTR) function fftwq_plan_guru64_dft(rank,dims,howmany_rank,howmany_dims,in,out,sign,flags) & + bind(C, name='fftwq_plan_guru64_dft') + import + integer(C_INT), value :: rank + type(fftwq_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwq_iodim64), dimension(*), intent(in) :: howmany_dims + complex(16), dimension(*), intent(out) :: in + complex(16), dimension(*), intent(out) :: out + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwq_plan_guru64_dft + + type(C_PTR) function fftwq_plan_guru64_split_dft(rank,dims,howmany_rank,howmany_dims,ri,ii,ro,io,flags) & + bind(C, name='fftwq_plan_guru64_split_dft') + import + integer(C_INT), value :: rank + type(fftwq_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwq_iodim64), dimension(*), intent(in) :: howmany_dims + real(16), dimension(*), intent(out) :: ri + real(16), dimension(*), intent(out) :: ii + real(16), dimension(*), intent(out) :: ro + real(16), dimension(*), intent(out) :: io + integer(C_INT), value :: flags + end function fftwq_plan_guru64_split_dft + + subroutine fftwq_execute_dft(p,in,out) bind(C, name='fftwq_execute_dft') + import + type(C_PTR), value :: p + complex(16), dimension(*), intent(inout) :: in + complex(16), dimension(*), intent(out) :: out + end subroutine fftwq_execute_dft + + subroutine fftwq_execute_split_dft(p,ri,ii,ro,io) bind(C, name='fftwq_execute_split_dft') + import + type(C_PTR), value :: p + real(16), dimension(*), intent(inout) :: ri + real(16), dimension(*), intent(inout) :: ii + real(16), dimension(*), intent(out) :: ro + real(16), dimension(*), intent(out) :: io + end subroutine fftwq_execute_split_dft + + type(C_PTR) function fftwq_plan_many_dft_r2c(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,flags) & + bind(C, name='fftwq_plan_many_dft_r2c') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + integer(C_INT), value :: howmany + real(16), dimension(*), intent(out) :: in + integer(C_INT), dimension(*), intent(in) :: inembed + integer(C_INT), value :: istride + integer(C_INT), value :: idist + complex(16), dimension(*), intent(out) :: out + integer(C_INT), dimension(*), intent(in) :: onembed + integer(C_INT), value :: ostride + integer(C_INT), value :: odist + integer(C_INT), value :: flags + end function fftwq_plan_many_dft_r2c + + type(C_PTR) function fftwq_plan_dft_r2c(rank,n,in,out,flags) bind(C, name='fftwq_plan_dft_r2c') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + real(16), dimension(*), intent(out) :: in + complex(16), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwq_plan_dft_r2c + + type(C_PTR) function fftwq_plan_dft_r2c_1d(n,in,out,flags) bind(C, name='fftwq_plan_dft_r2c_1d') + import + integer(C_INT), value :: n + real(16), dimension(*), intent(out) :: in + complex(16), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwq_plan_dft_r2c_1d + + type(C_PTR) function fftwq_plan_dft_r2c_2d(n0,n1,in,out,flags) bind(C, name='fftwq_plan_dft_r2c_2d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + real(16), dimension(*), intent(out) :: in + complex(16), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwq_plan_dft_r2c_2d + + type(C_PTR) function fftwq_plan_dft_r2c_3d(n0,n1,n2,in,out,flags) bind(C, name='fftwq_plan_dft_r2c_3d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + integer(C_INT), value :: n2 + real(16), dimension(*), intent(out) :: in + complex(16), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwq_plan_dft_r2c_3d + + type(C_PTR) function fftwq_plan_many_dft_c2r(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,flags) & + bind(C, name='fftwq_plan_many_dft_c2r') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + integer(C_INT), value :: howmany + complex(16), dimension(*), intent(out) :: in + integer(C_INT), dimension(*), intent(in) :: inembed + integer(C_INT), value :: istride + integer(C_INT), value :: idist + real(16), dimension(*), intent(out) :: out + integer(C_INT), dimension(*), intent(in) :: onembed + integer(C_INT), value :: ostride + integer(C_INT), value :: odist + integer(C_INT), value :: flags + end function fftwq_plan_many_dft_c2r + + type(C_PTR) function fftwq_plan_dft_c2r(rank,n,in,out,flags) bind(C, name='fftwq_plan_dft_c2r') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + complex(16), dimension(*), intent(out) :: in + real(16), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwq_plan_dft_c2r + + type(C_PTR) function fftwq_plan_dft_c2r_1d(n,in,out,flags) bind(C, name='fftwq_plan_dft_c2r_1d') + import + integer(C_INT), value :: n + complex(16), dimension(*), intent(out) :: in + real(16), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwq_plan_dft_c2r_1d + + type(C_PTR) function fftwq_plan_dft_c2r_2d(n0,n1,in,out,flags) bind(C, name='fftwq_plan_dft_c2r_2d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + complex(16), dimension(*), intent(out) :: in + real(16), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwq_plan_dft_c2r_2d + + type(C_PTR) function fftwq_plan_dft_c2r_3d(n0,n1,n2,in,out,flags) bind(C, name='fftwq_plan_dft_c2r_3d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + integer(C_INT), value :: n2 + complex(16), dimension(*), intent(out) :: in + real(16), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwq_plan_dft_c2r_3d + + type(C_PTR) function fftwq_plan_guru_dft_r2c(rank,dims,howmany_rank,howmany_dims,in,out,flags) & + bind(C, name='fftwq_plan_guru_dft_r2c') + import + integer(C_INT), value :: rank + type(fftwq_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwq_iodim), dimension(*), intent(in) :: howmany_dims + real(16), dimension(*), intent(out) :: in + complex(16), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwq_plan_guru_dft_r2c + + type(C_PTR) function fftwq_plan_guru_dft_c2r(rank,dims,howmany_rank,howmany_dims,in,out,flags) & + bind(C, name='fftwq_plan_guru_dft_c2r') + import + integer(C_INT), value :: rank + type(fftwq_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwq_iodim), dimension(*), intent(in) :: howmany_dims + complex(16), dimension(*), intent(out) :: in + real(16), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwq_plan_guru_dft_c2r + + type(C_PTR) function fftwq_plan_guru_split_dft_r2c(rank,dims,howmany_rank,howmany_dims,in,ro,io,flags) & + bind(C, name='fftwq_plan_guru_split_dft_r2c') + import + integer(C_INT), value :: rank + type(fftwq_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwq_iodim), dimension(*), intent(in) :: howmany_dims + real(16), dimension(*), intent(out) :: in + real(16), dimension(*), intent(out) :: ro + real(16), dimension(*), intent(out) :: io + integer(C_INT), value :: flags + end function fftwq_plan_guru_split_dft_r2c + + type(C_PTR) function fftwq_plan_guru_split_dft_c2r(rank,dims,howmany_rank,howmany_dims,ri,ii,out,flags) & + bind(C, name='fftwq_plan_guru_split_dft_c2r') + import + integer(C_INT), value :: rank + type(fftwq_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwq_iodim), dimension(*), intent(in) :: howmany_dims + real(16), dimension(*), intent(out) :: ri + real(16), dimension(*), intent(out) :: ii + real(16), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwq_plan_guru_split_dft_c2r + + type(C_PTR) function fftwq_plan_guru64_dft_r2c(rank,dims,howmany_rank,howmany_dims,in,out,flags) & + bind(C, name='fftwq_plan_guru64_dft_r2c') + import + integer(C_INT), value :: rank + type(fftwq_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwq_iodim64), dimension(*), intent(in) :: howmany_dims + real(16), dimension(*), intent(out) :: in + complex(16), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwq_plan_guru64_dft_r2c + + type(C_PTR) function fftwq_plan_guru64_dft_c2r(rank,dims,howmany_rank,howmany_dims,in,out,flags) & + bind(C, name='fftwq_plan_guru64_dft_c2r') + import + integer(C_INT), value :: rank + type(fftwq_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwq_iodim64), dimension(*), intent(in) :: howmany_dims + complex(16), dimension(*), intent(out) :: in + real(16), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwq_plan_guru64_dft_c2r + + type(C_PTR) function fftwq_plan_guru64_split_dft_r2c(rank,dims,howmany_rank,howmany_dims,in,ro,io,flags) & + bind(C, name='fftwq_plan_guru64_split_dft_r2c') + import + integer(C_INT), value :: rank + type(fftwq_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwq_iodim64), dimension(*), intent(in) :: howmany_dims + real(16), dimension(*), intent(out) :: in + real(16), dimension(*), intent(out) :: ro + real(16), dimension(*), intent(out) :: io + integer(C_INT), value :: flags + end function fftwq_plan_guru64_split_dft_r2c + + type(C_PTR) function fftwq_plan_guru64_split_dft_c2r(rank,dims,howmany_rank,howmany_dims,ri,ii,out,flags) & + bind(C, name='fftwq_plan_guru64_split_dft_c2r') + import + integer(C_INT), value :: rank + type(fftwq_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwq_iodim64), dimension(*), intent(in) :: howmany_dims + real(16), dimension(*), intent(out) :: ri + real(16), dimension(*), intent(out) :: ii + real(16), dimension(*), intent(out) :: out + integer(C_INT), value :: flags + end function fftwq_plan_guru64_split_dft_c2r + + subroutine fftwq_execute_dft_r2c(p,in,out) bind(C, name='fftwq_execute_dft_r2c') + import + type(C_PTR), value :: p + real(16), dimension(*), intent(inout) :: in + complex(16), dimension(*), intent(out) :: out + end subroutine fftwq_execute_dft_r2c + + subroutine fftwq_execute_dft_c2r(p,in,out) bind(C, name='fftwq_execute_dft_c2r') + import + type(C_PTR), value :: p + complex(16), dimension(*), intent(inout) :: in + real(16), dimension(*), intent(out) :: out + end subroutine fftwq_execute_dft_c2r + + subroutine fftwq_execute_split_dft_r2c(p,in,ro,io) bind(C, name='fftwq_execute_split_dft_r2c') + import + type(C_PTR), value :: p + real(16), dimension(*), intent(inout) :: in + real(16), dimension(*), intent(out) :: ro + real(16), dimension(*), intent(out) :: io + end subroutine fftwq_execute_split_dft_r2c + + subroutine fftwq_execute_split_dft_c2r(p,ri,ii,out) bind(C, name='fftwq_execute_split_dft_c2r') + import + type(C_PTR), value :: p + real(16), dimension(*), intent(inout) :: ri + real(16), dimension(*), intent(inout) :: ii + real(16), dimension(*), intent(out) :: out + end subroutine fftwq_execute_split_dft_c2r + + type(C_PTR) function fftwq_plan_many_r2r(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,kind,flags) & + bind(C, name='fftwq_plan_many_r2r') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + integer(C_INT), value :: howmany + real(16), dimension(*), intent(out) :: in + integer(C_INT), dimension(*), intent(in) :: inembed + integer(C_INT), value :: istride + integer(C_INT), value :: idist + real(16), dimension(*), intent(out) :: out + integer(C_INT), dimension(*), intent(in) :: onembed + integer(C_INT), value :: ostride + integer(C_INT), value :: odist + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftwq_plan_many_r2r + + type(C_PTR) function fftwq_plan_r2r(rank,n,in,out,kind,flags) bind(C, name='fftwq_plan_r2r') + import + integer(C_INT), value :: rank + integer(C_INT), dimension(*), intent(in) :: n + real(16), dimension(*), intent(out) :: in + real(16), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftwq_plan_r2r + + type(C_PTR) function fftwq_plan_r2r_1d(n,in,out,kind,flags) bind(C, name='fftwq_plan_r2r_1d') + import + integer(C_INT), value :: n + real(16), dimension(*), intent(out) :: in + real(16), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), value :: kind + integer(C_INT), value :: flags + end function fftwq_plan_r2r_1d + + type(C_PTR) function fftwq_plan_r2r_2d(n0,n1,in,out,kind0,kind1,flags) bind(C, name='fftwq_plan_r2r_2d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + real(16), dimension(*), intent(out) :: in + real(16), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), value :: kind0 + integer(C_FFTW_R2R_KIND), value :: kind1 + integer(C_INT), value :: flags + end function fftwq_plan_r2r_2d + + type(C_PTR) function fftwq_plan_r2r_3d(n0,n1,n2,in,out,kind0,kind1,kind2,flags) bind(C, name='fftwq_plan_r2r_3d') + import + integer(C_INT), value :: n0 + integer(C_INT), value :: n1 + integer(C_INT), value :: n2 + real(16), dimension(*), intent(out) :: in + real(16), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), value :: kind0 + integer(C_FFTW_R2R_KIND), value :: kind1 + integer(C_FFTW_R2R_KIND), value :: kind2 + integer(C_INT), value :: flags + end function fftwq_plan_r2r_3d + + type(C_PTR) function fftwq_plan_guru_r2r(rank,dims,howmany_rank,howmany_dims,in,out,kind,flags) & + bind(C, name='fftwq_plan_guru_r2r') + import + integer(C_INT), value :: rank + type(fftwq_iodim), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwq_iodim), dimension(*), intent(in) :: howmany_dims + real(16), dimension(*), intent(out) :: in + real(16), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftwq_plan_guru_r2r + + type(C_PTR) function fftwq_plan_guru64_r2r(rank,dims,howmany_rank,howmany_dims,in,out,kind,flags) & + bind(C, name='fftwq_plan_guru64_r2r') + import + integer(C_INT), value :: rank + type(fftwq_iodim64), dimension(*), intent(in) :: dims + integer(C_INT), value :: howmany_rank + type(fftwq_iodim64), dimension(*), intent(in) :: howmany_dims + real(16), dimension(*), intent(out) :: in + real(16), dimension(*), intent(out) :: out + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftwq_plan_guru64_r2r + + subroutine fftwq_execute_r2r(p,in,out) bind(C, name='fftwq_execute_r2r') + import + type(C_PTR), value :: p + real(16), dimension(*), intent(inout) :: in + real(16), dimension(*), intent(out) :: out + end subroutine fftwq_execute_r2r + + subroutine fftwq_destroy_plan(p) bind(C, name='fftwq_destroy_plan') + import + type(C_PTR), value :: p + end subroutine fftwq_destroy_plan + + subroutine fftwq_forget_wisdom() bind(C, name='fftwq_forget_wisdom') + import + end subroutine fftwq_forget_wisdom + + subroutine fftwq_cleanup() bind(C, name='fftwq_cleanup') + import + end subroutine fftwq_cleanup + + subroutine fftwq_set_timelimit(t) bind(C, name='fftwq_set_timelimit') + import + real(C_DOUBLE), value :: t + end subroutine fftwq_set_timelimit + + subroutine fftwq_plan_with_nthreads(nthreads) bind(C, name='fftwq_plan_with_nthreads') + import + integer(C_INT), value :: nthreads + end subroutine fftwq_plan_with_nthreads + + integer(C_INT) function fftwq_init_threads() bind(C, name='fftwq_init_threads') + import + end function fftwq_init_threads + + subroutine fftwq_cleanup_threads() bind(C, name='fftwq_cleanup_threads') + import + end subroutine fftwq_cleanup_threads + + integer(C_INT) function fftwq_export_wisdom_to_filename(filename) bind(C, name='fftwq_export_wisdom_to_filename') + import + character(C_CHAR), dimension(*), intent(in) :: filename + end function fftwq_export_wisdom_to_filename + + subroutine fftwq_export_wisdom_to_file(output_file) bind(C, name='fftwq_export_wisdom_to_file') + import + type(C_PTR), value :: output_file + end subroutine fftwq_export_wisdom_to_file + + type(C_PTR) function fftwq_export_wisdom_to_string() bind(C, name='fftwq_export_wisdom_to_string') + import + end function fftwq_export_wisdom_to_string + + subroutine fftwq_export_wisdom(write_char,data) bind(C, name='fftwq_export_wisdom') + import + type(C_FUNPTR), value :: write_char + type(C_PTR), value :: data + end subroutine fftwq_export_wisdom + + integer(C_INT) function fftwq_import_system_wisdom() bind(C, name='fftwq_import_system_wisdom') + import + end function fftwq_import_system_wisdom + + integer(C_INT) function fftwq_import_wisdom_from_filename(filename) bind(C, name='fftwq_import_wisdom_from_filename') + import + character(C_CHAR), dimension(*), intent(in) :: filename + end function fftwq_import_wisdom_from_filename + + integer(C_INT) function fftwq_import_wisdom_from_file(input_file) bind(C, name='fftwq_import_wisdom_from_file') + import + type(C_PTR), value :: input_file + end function fftwq_import_wisdom_from_file + + integer(C_INT) function fftwq_import_wisdom_from_string(input_string) bind(C, name='fftwq_import_wisdom_from_string') + import + character(C_CHAR), dimension(*), intent(in) :: input_string + end function fftwq_import_wisdom_from_string + + integer(C_INT) function fftwq_import_wisdom(read_char,data) bind(C, name='fftwq_import_wisdom') + import + type(C_FUNPTR), value :: read_char + type(C_PTR), value :: data + end function fftwq_import_wisdom + + subroutine fftwq_fprint_plan(p,output_file) bind(C, name='fftwq_fprint_plan') + import + type(C_PTR), value :: p + type(C_PTR), value :: output_file + end subroutine fftwq_fprint_plan + + subroutine fftwq_print_plan(p) bind(C, name='fftwq_print_plan') + import + type(C_PTR), value :: p + end subroutine fftwq_print_plan + + type(C_PTR) function fftwq_sprint_plan(p) bind(C, name='fftwq_sprint_plan') + import + type(C_PTR), value :: p + end function fftwq_sprint_plan + + type(C_PTR) function fftwq_malloc(n) bind(C, name='fftwq_malloc') + import + integer(C_SIZE_T), value :: n + end function fftwq_malloc + +! Unable to generate Fortran interface for fftwq_alloc_real + type(C_PTR) function fftwq_alloc_complex(n) bind(C, name='fftwq_alloc_complex') + import + integer(C_SIZE_T), value :: n + end function fftwq_alloc_complex + + subroutine fftwq_free(p) bind(C, name='fftwq_free') + import + type(C_PTR), value :: p + end subroutine fftwq_free + + subroutine fftwq_flops(p,add,mul,fmas) bind(C, name='fftwq_flops') + import + type(C_PTR), value :: p + real(C_DOUBLE), intent(out) :: add + real(C_DOUBLE), intent(out) :: mul + real(C_DOUBLE), intent(out) :: fmas + end subroutine fftwq_flops + + real(C_DOUBLE) function fftwq_estimate_cost(p) bind(C, name='fftwq_estimate_cost') + import + type(C_PTR), value :: p + end function fftwq_estimate_cost + + real(C_DOUBLE) function fftwq_cost(p) bind(C, name='fftwq_cost') + import + type(C_PTR), value :: p + end function fftwq_cost + + integer(C_INT) function fftwq_alignment_of(p) bind(C, name='fftwq_alignment_of') + import + real(16), dimension(*), intent(out) :: p + end function fftwq_alignment_of + + end interface diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/flops.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/flops.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +void X(flops)(const X(plan) p, double *add, double *mul, double *fma) +{ + planner *plnr = X(the_planner)(); + opcnt *o = &p->pln->ops; + *add = o->add; *mul = o->mul; *fma = o->fma; + if (plnr->cost_hook) { + *add = plnr->cost_hook(p->prb, *add, COST_SUM); + *mul = plnr->cost_hook(p->prb, *mul, COST_SUM); + *fma = plnr->cost_hook(p->prb, *fma, COST_SUM); + } +} + +double X(estimate_cost)(const X(plan) p) +{ + return X(iestimate_cost)(X(the_planner)(), p->pln, p->prb); +} + +double X(cost)(const X(plan) p) +{ + return p->pln->pcost; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/forget-wisdom.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/forget-wisdom.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +void X(forget_wisdom)(void) +{ + planner *plnr = X(the_planner)(); + plnr->adt->forget(plnr, FORGET_EVERYTHING); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/genf03.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/genf03.pl Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,213 @@ +#!/usr/bin/perl -w +# Generate Fortran 2003 interfaces from a sequence of C function declarations +# of the form (one per line): +# extern (...args...) +# extern (...args...) +# ... +# with no line breaks within a given function. (It's too much work to +# write a general parser, since we just have to handle FFTW's header files.) + +sub canonicalize_type { + my($type); + ($type) = @_; + $type =~ s/ +/ /g; + $type =~ s/^ //; + $type =~ s/ $//; + $type =~ s/([^\* ])\*/$1 \*/g; + return $type; +} + +# C->Fortran map of supported return types +%return_types = ( + "int" => "integer(C_INT)", + "ptrdiff_t" => "integer(C_INTPTR_T)", + "size_t" => "integer(C_SIZE_T)", + "double" => "real(C_DOUBLE)", + "float" => "real(C_FLOAT)", + "long double" => "real(C_LONG_DOUBLE)", + "float128__" => "real(16)", + "fftw_plan" => "type(C_PTR)", + "fftwf_plan" => "type(C_PTR)", + "fftwl_plan" => "type(C_PTR)", + "fftwq_plan" => "type(C_PTR)", + "void *" => "type(C_PTR)", + "char *" => "type(C_PTR)", + "double *" => "type(C_PTR)", + "float *" => "type(C_PTR)", + "long double *" => "type(C_PTR)", + "float128__ *" => "type(C_PTR)", + "fftw_complex *" => "type(C_PTR)", + "fftwf_complex *" => "type(C_PTR)", + "fftwl_complex *" => "type(C_PTR)", + "fftwq_complex *" => "type(C_PTR)", + ); + +# C->Fortran map of supported argument types +%arg_types = ( + "int" => "integer(C_INT), value", + "unsigned" => "integer(C_INT), value", + "size_t" => "integer(C_SIZE_T), value", + "ptrdiff_t" => "integer(C_INTPTR_T), value", + + "fftw_r2r_kind" => "integer(C_FFTW_R2R_KIND), value", + "fftwf_r2r_kind" => "integer(C_FFTW_R2R_KIND), value", + "fftwl_r2r_kind" => "integer(C_FFTW_R2R_KIND), value", + "fftwq_r2r_kind" => "integer(C_FFTW_R2R_KIND), value", + + "double" => "real(C_DOUBLE), value", + "float" => "real(C_FLOAT), value", + "long double" => "real(C_LONG_DOUBLE), value", + "__float128" => "real(16), value", + + "fftw_complex" => "complex(C_DOUBLE_COMPLEX), value", + "fftwf_complex" => "complex(C_DOUBLE_COMPLEX), value", + "fftwl_complex" => "complex(C_LONG_DOUBLE), value", + "fftwq_complex" => "complex(16), value", + + "fftw_plan" => "type(C_PTR), value", + "fftwf_plan" => "type(C_PTR), value", + "fftwl_plan" => "type(C_PTR), value", + "fftwq_plan" => "type(C_PTR), value", + "const fftw_plan" => "type(C_PTR), value", + "const fftwf_plan" => "type(C_PTR), value", + "const fftwl_plan" => "type(C_PTR), value", + "const fftwq_plan" => "type(C_PTR), value", + + "const int *" => "integer(C_INT), dimension(*), intent(in)", + "ptrdiff_t *" => "integer(C_INTPTR_T), intent(out)", + "const ptrdiff_t *" => "integer(C_INTPTR_T), dimension(*), intent(in)", + + "const fftw_r2r_kind *" => "integer(C_FFTW_R2R_KIND), dimension(*), intent(in)", + "const fftwf_r2r_kind *" => "integer(C_FFTW_R2R_KIND), dimension(*), intent(in)", + "const fftwl_r2r_kind *" => "integer(C_FFTW_R2R_KIND), dimension(*), intent(in)", + "const fftwq_r2r_kind *" => "integer(C_FFTW_R2R_KIND), dimension(*), intent(in)", + + "double *" => "real(C_DOUBLE), dimension(*), intent(out)", + "float *" => "real(C_FLOAT), dimension(*), intent(out)", + "long double *" => "real(C_LONG_DOUBLE), dimension(*), intent(out)", + "__float128 *" => "real(16), dimension(*), intent(out)", + + "fftw_complex *" => "complex(C_DOUBLE_COMPLEX), dimension(*), intent(out)", + "fftwf_complex *" => "complex(C_FLOAT_COMPLEX), dimension(*), intent(out)", + "fftwl_complex *" => "complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out)", + "fftwq_complex *" => "complex(16), dimension(*), intent(out)", + + "const fftw_iodim *" => "type(fftw_iodim), dimension(*), intent(in)", + "const fftwf_iodim *" => "type(fftwf_iodim), dimension(*), intent(in)", + "const fftwl_iodim *" => "type(fftwl_iodim), dimension(*), intent(in)", + "const fftwq_iodim *" => "type(fftwq_iodim), dimension(*), intent(in)", + + "const fftw_iodim64 *" => "type(fftw_iodim64), dimension(*), intent(in)", + "const fftwf_iodim64 *" => "type(fftwf_iodim64), dimension(*), intent(in)", + "const fftwl_iodim64 *" => "type(fftwl_iodim64), dimension(*), intent(in)", + "const fftwq_iodim64 *" => "type(fftwq_iodim64), dimension(*), intent(in)", + + "void *" => "type(C_PTR), value", + "FILE *" => "type(C_PTR), value", + + "const char *" => "character(C_CHAR), dimension(*), intent(in)", + + "fftw_write_char_func" => "type(C_FUNPTR), value", + "fftwf_write_char_func" => "type(C_FUNPTR), value", + "fftwl_write_char_func" => "type(C_FUNPTR), value", + "fftwq_write_char_func" => "type(C_FUNPTR), value", + "fftw_read_char_func" => "type(C_FUNPTR), value", + "fftwf_read_char_func" => "type(C_FUNPTR), value", + "fftwl_read_char_func" => "type(C_FUNPTR), value", + "fftwq_read_char_func" => "type(C_FUNPTR), value", + + # Although the MPI standard defines this type as simply "integer", + # if we use integer without a 'C_' kind in a bind(C) interface then + # gfortran complains. Instead, since MPI also requires the C type + # MPI_Fint to match Fortran integers, we use the size of this type + # (extracted by configure and substituted by the Makefile). + "MPI_Comm" => "integer(C_MPI_FINT), value" + ); + +while (<>) { + next if /^ *$/; + if (/^ *extern +([a-zA-Z_0-9 ]+[ \*]) *([a-zA-Z_0-9]+) *\((.*)\) *$/) { + $ret = &canonicalize_type($1); + $name = $2; + + $args = $3; + $args =~ s/^ *void *$//; + + $bad = ($ret ne "void") && !exists($return_types{$ret}); + foreach $arg (split(/ *, */, $args)) { + $arg =~ /^([a-zA-Z_0-9 ]+[ \*]) *([a-zA-Z_0-9]+) *$/; + $argtype = &canonicalize_type($1); + $bad = 1 if !exists($arg_types{$argtype}); + } + if ($bad) { + print "! Unable to generate Fortran interface for $name\n"; + next; + } + + # any function taking an MPI_Comm arg needs a C wrapper (grr). + if ($args =~ /MPI_Comm/) { + $cname = $name . "_f03"; + } + else { + $cname = $name; + } + + # Fortran has a 132-character line-length limit by default (grr) + $len = 0; + + print " "; $len = $len + length(" "); + if ($ret eq "void") { + $kind = "subroutine" + } + else { + print "$return_types{$ret} "; + $len = $len + length("$return_types{$ret} "); + $kind = "function" + } + print "$kind $name("; $len = $len + length("$kind $name("); + $len0 = $len; + + $argnames = $args; + $argnames =~ s/([a-zA-Z_0-9 ]+[ \*]) *([a-zA-Z_0-9]+) */$2/g; + $comma = ""; + foreach $argname (split(/ *, */, $argnames)) { + if ($len + length("$comma$argname") + 3 > 132) { + printf ", &\n%*s", $len0, ""; + $len = $len0; + $comma = ""; + } + print "$comma$argname"; + $len = $len + length("$comma$argname"); + $comma = ","; + } + print ") "; $len = $len + 2; + + if ($len + length("bind(C, name='$cname')") > 132) { + printf "&\n%*s", $len0 - length("$name("), ""; + } + print "bind(C, name='$cname')\n"; + + print " import\n"; + foreach $arg (split(/ *, */, $args)) { + $arg =~ /^([a-zA-Z_0-9 ]+[ \*]) *([a-zA-Z_0-9]+) *$/; + $argtype = &canonicalize_type($1); + $argname = $2; + $ftype = $arg_types{$argtype}; + + # Various special cases for argument types: + if ($name =~ /_flops$/ && $argtype eq "double *") { + $ftype = "real(C_DOUBLE), intent(out)" + } + if ($name =~ /_execute/ && ($argname eq "ri" || + $argname eq "ii" || + $argname eq "in")) { + $ftype =~ s/intent\(out\)/intent(inout)/; + } + + print " $ftype :: $argname\n" + } + + print " end $kind $name\n"; + print " \n"; + } +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/guru.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/guru.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,4 @@ +#define XGURU(name) X(plan_guru_ ## name) +#define IODIM X(iodim) +#define MKTENSOR_IODIMS X(mktensor_iodims) +#define GURU_KOSHERP X(guru_kosherp) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/guru64.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/guru64.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,4 @@ +#define XGURU(name) X(plan_guru64_ ## name) +#define IODIM X(iodim64) +#define MKTENSOR_IODIMS X(mktensor_iodims64) +#define GURU_KOSHERP X(guru64_kosherp) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/import-system-wisdom.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/import-system-wisdom.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +#if defined(FFTW_SINGLE) +# define WISDOM_NAME "wisdomf" +#elif defined(FFTW_LDOUBLE) +# define WISDOM_NAME "wisdoml" +#else +# define WISDOM_NAME "wisdom" +#endif + +/* OS-specific configuration-file directory */ +#if defined(__DJGPP__) +# define WISDOM_DIR "/dev/env/DJDIR/etc/fftw/" +#else +# define WISDOM_DIR "/etc/fftw/" +#endif + +int X(import_system_wisdom)(void) +{ +#if defined(__WIN32__) || defined(WIN32) || defined(_WINDOWS) + return 0; /* TODO? */ +#else + + FILE *f; + f = fopen(WISDOM_DIR WISDOM_NAME, "r"); + if (f) { + int ret = X(import_wisdom_from_file)(f); + fclose(f); + return ret; + } else + return 0; +#endif +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/import-wisdom-from-file.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/import-wisdom-from-file.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include + +/* getc()/putc() are *unbelievably* slow on linux. Looks like glibc + is grabbing a lock for each call to getc()/putc(), or something + like that. You pay the price for these idiotic posix threads + whether you use them or not. + + So, we do our own buffering. This completely defeats the purpose + of having stdio in the first place, of course. +*/ + +#define BUFSZ 256 + +typedef struct { + scanner super; + FILE *f; + char buf[BUFSZ]; + char *bufr, *bufw; +} S; + +static int getchr_file(scanner * sc_) +{ + S *sc = (S *) sc_; + + if (sc->bufr >= sc->bufw) { + sc->bufr = sc->buf; + sc->bufw = sc->buf + fread(sc->buf, 1, BUFSZ, sc->f); + if (sc->bufr >= sc->bufw) + return EOF; + } + + return *(sc->bufr++); +} + +static scanner *mkscanner_file(FILE *f) +{ + S *sc = (S *) X(mkscanner)(sizeof(S), getchr_file); + sc->f = f; + sc->bufr = sc->bufw = sc->buf; + return &sc->super; +} + +int X(import_wisdom_from_file)(FILE *input_file) +{ + scanner *s = mkscanner_file(input_file); + planner *plnr = X(the_planner)(); + int ret = plnr->adt->imprt(plnr, s); + X(scanner_destroy)(s); + return ret; +} + +int X(import_wisdom_from_filename)(const char *filename) +{ + FILE *f = fopen(filename, "r"); + int ret; + if (!f) return 0; /* error opening file */ + ret = X(import_wisdom_from_file)(f); + if (fclose(f)) ret = 0; /* error closing file */ + return ret; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/import-wisdom-from-string.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/import-wisdom-from-string.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +typedef struct { + scanner super; + const char *s; +} S_str; + +static int getchr_str(scanner * sc_) +{ + S_str *sc = (S_str *) sc_; + if (!*sc->s) + return EOF; + return *sc->s++; +} + +static scanner *mkscanner_str(const char *s) +{ + S_str *sc = (S_str *) X(mkscanner)(sizeof(S_str), getchr_str); + sc->s = s; + return &sc->super; +} + +int X(import_wisdom_from_string)(const char *input_string) +{ + scanner *s = mkscanner_str(input_string); + planner *plnr = X(the_planner)(); + int ret = plnr->adt->imprt(plnr, s); + X(scanner_destroy)(s); + return ret; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/import-wisdom.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/import-wisdom.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +typedef struct { + scanner super; + int (*read_char)(void *); + void *data; +} S; + +static int getchr_generic(scanner * s_) +{ + S *s = (S *) s_; + return (s->read_char)(s->data); +} + +int X(import_wisdom)(int (*read_char)(void *), void *data) +{ + S *s = (S *) X(mkscanner)(sizeof(S), getchr_generic); + planner *plnr = X(the_planner)(); + int ret; + + s->read_char = read_char; + s->data = data; + ret = plnr->adt->imprt(plnr, (scanner *) s); + X(scanner_destroy)((scanner *) s); + return ret; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/malloc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/malloc.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + + +void *X(malloc)(size_t n) +{ + return X(kernel_malloc)(n); +} + +void X(free)(void *p) +{ + X(kernel_free)(p); +} + +/* The following two routines are mainly for the convenience of + the Fortran 2003 API, although C users may find them convienent + as well. The problem is that, although Fortran 2003 has a + c_sizeof intrinsic that is equivalent to sizeof, it is broken + in some gfortran versions, and in any case is a bit unnatural + in a Fortran context. So we provide routines to allocate real + and complex arrays, which are all that are really needed by FFTW. */ + +R *X(alloc_real)(size_t n) +{ + return (R *) X(malloc)(sizeof(R) * n); +} + +C *X(alloc_complex)(size_t n) +{ + return (C *) X(malloc)(sizeof(C) * n); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/map-r2r-kind.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/map-r2r-kind.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "rdft.h" + +rdft_kind *X(map_r2r_kind)(int rank, const X(r2r_kind) * kind) +{ + int i; + rdft_kind *k; + + A(FINITE_RNK(rank)); + k = (rdft_kind *) MALLOC(rank * sizeof(rdft_kind), PROBLEMS); + for (i = 0; i < rank; ++i) { + rdft_kind m; + switch (kind[i]) { + case FFTW_R2HC: m = R2HC; break; + case FFTW_HC2R: m = HC2R; break; + case FFTW_DHT: m = DHT; break; + case FFTW_REDFT00: m = REDFT00; break; + case FFTW_REDFT01: m = REDFT01; break; + case FFTW_REDFT10: m = REDFT10; break; + case FFTW_REDFT11: m = REDFT11; break; + case FFTW_RODFT00: m = RODFT00; break; + case FFTW_RODFT01: m = RODFT01; break; + case FFTW_RODFT10: m = RODFT10; break; + case FFTW_RODFT11: m = RODFT11; break; + default: m = R2HC; A(0); + } + k[i] = m; + } + return k; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/mapflags.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/mapflags.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include + +/* a flag operation: x is either a flag, in which case xm == 0, or + a mask, in which case xm == x; using this we can compactly code + the various bit operations via (flags & x) ^ xm or (flags | x) ^ xm. */ +typedef struct { + unsigned x, xm; +} flagmask; + +typedef struct { + flagmask flag; + flagmask op; +} flagop; + +#define FLAGP(f, msk)(((f) & (msk).x) ^ (msk).xm) +#define OP(f, msk)(((f) | (msk).x) ^ (msk).xm) + +#define YES(x) {x, 0} +#define NO(x) {x, x} +#define IMPLIES(predicate, consequence) { predicate, consequence } +#define EQV(a, b) IMPLIES(YES(a), YES(b)), IMPLIES(NO(a), NO(b)) +#define NEQV(a, b) IMPLIES(YES(a), NO(b)), IMPLIES(NO(a), YES(b)) + +static void map_flags(unsigned *iflags, unsigned *oflags, + const flagop flagmap[], int nmap) +{ + int i; + for (i = 0; i < nmap; ++i) + if (FLAGP(*iflags, flagmap[i].flag)) + *oflags = OP(*oflags, flagmap[i].op); +} + +/* encoding of the planner timelimit into a BITS_FOR_TIMELIMIT-bits + nonnegative integer, such that we can still view the integer as + ``impatience'': higher means *lower* time limit, and 0 is the + highest possible value (about 1 year of calendar time) */ +static unsigned timelimit_to_flags(double timelimit) +{ + const double tmax = 365 * 24 * 3600; + const double tstep = 1.05; + const int nsteps = (1 << BITS_FOR_TIMELIMIT); + int x; + + if (timelimit < 0 || timelimit >= tmax) + return 0; + if (timelimit <= 1.0e-10) + return nsteps - 1; + + x = (int) (0.5 + (log(tmax / timelimit) / log(tstep))); + + if (x < 0) x = 0; + if (x >= nsteps) x = nsteps - 1; + return x; +} + +void X(mapflags)(planner *plnr, unsigned flags) +{ + unsigned l, u, t; + + /* map of api flags -> api flags, to implement consistency rules + and combination flags */ + const flagop self_flagmap[] = { + /* in some cases (notably for halfcomplex->real transforms), + DESTROY_INPUT is the default, so we need to support + an inverse flag to disable it. + + (PRESERVE, DESTROY) -> (PRESERVE, DESTROY) + (0, 0) (1, 0) + (0, 1) (0, 1) + (1, 0) (1, 0) + (1, 1) (1, 0) + */ + IMPLIES(YES(FFTW_PRESERVE_INPUT), NO(FFTW_DESTROY_INPUT)), + IMPLIES(NO(FFTW_DESTROY_INPUT), YES(FFTW_PRESERVE_INPUT)), + + IMPLIES(YES(FFTW_EXHAUSTIVE), YES(FFTW_PATIENT)), + + IMPLIES(YES(FFTW_ESTIMATE), NO(FFTW_PATIENT)), + IMPLIES(YES(FFTW_ESTIMATE), + YES(FFTW_ESTIMATE_PATIENT + | FFTW_NO_INDIRECT_OP + | FFTW_ALLOW_PRUNING)), + + IMPLIES(NO(FFTW_EXHAUSTIVE), + YES(FFTW_NO_SLOW)), + + /* a canonical set of fftw2-like impatience flags */ + IMPLIES(NO(FFTW_PATIENT), + YES(FFTW_NO_VRECURSE + | FFTW_NO_RANK_SPLITS + | FFTW_NO_VRANK_SPLITS + | FFTW_NO_NONTHREADED + | FFTW_NO_DFT_R2HC + | FFTW_NO_FIXED_RADIX_LARGE_N + | FFTW_BELIEVE_PCOST)) + }; + + /* map of (processed) api flags to internal problem/planner flags */ + const flagop l_flagmap[] = { + EQV(FFTW_PRESERVE_INPUT, NO_DESTROY_INPUT), + EQV(FFTW_NO_SIMD, NO_SIMD), + EQV(FFTW_CONSERVE_MEMORY, CONSERVE_MEMORY), + EQV(FFTW_NO_BUFFERING, NO_BUFFERING), + NEQV(FFTW_ALLOW_LARGE_GENERIC, NO_LARGE_GENERIC) + }; + + const flagop u_flagmap[] = { + IMPLIES(YES(FFTW_EXHAUSTIVE), NO(0xFFFFFFFF)), + IMPLIES(NO(FFTW_EXHAUSTIVE), YES(NO_UGLY)), + + /* the following are undocumented, "beyond-guru" flags that + require some understanding of FFTW internals */ + EQV(FFTW_ESTIMATE_PATIENT, ESTIMATE), + EQV(FFTW_ALLOW_PRUNING, ALLOW_PRUNING), + EQV(FFTW_BELIEVE_PCOST, BELIEVE_PCOST), + EQV(FFTW_NO_DFT_R2HC, NO_DFT_R2HC), + EQV(FFTW_NO_NONTHREADED, NO_NONTHREADED), + EQV(FFTW_NO_INDIRECT_OP, NO_INDIRECT_OP), + EQV(FFTW_NO_RANK_SPLITS, NO_RANK_SPLITS), + EQV(FFTW_NO_VRANK_SPLITS, NO_VRANK_SPLITS), + EQV(FFTW_NO_VRECURSE, NO_VRECURSE), + EQV(FFTW_NO_SLOW, NO_SLOW), + EQV(FFTW_NO_FIXED_RADIX_LARGE_N, NO_FIXED_RADIX_LARGE_N) + }; + + map_flags(&flags, &flags, self_flagmap, NELEM(self_flagmap)); + + l = u = 0; + map_flags(&flags, &l, l_flagmap, NELEM(l_flagmap)); + map_flags(&flags, &u, u_flagmap, NELEM(u_flagmap)); + + /* enforce l <= u */ + PLNR_L(plnr) = l; + PLNR_U(plnr) = u | l; + + /* assert that the conversion didn't lose bits */ + A(PLNR_L(plnr) == l); + A(PLNR_U(plnr) == (u | l)); + + /* compute flags representation of the timelimit */ + t = timelimit_to_flags(plnr->timelimit); + + PLNR_TIMELIMIT_IMPATIENCE(plnr) = t; + A(PLNR_TIMELIMIT_IMPATIENCE(plnr) == t); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/mkprinter-file.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/mkprinter-file.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include + +#define BUFSZ 256 + +typedef struct { + printer super; + FILE *f; + char buf[BUFSZ]; + char *bufw; +} P; + +static void myflush(P *p) +{ + fwrite(p->buf, 1, p->bufw - p->buf, p->f); + p->bufw = p->buf; +} + +static void myputchr(printer *p_, char c) +{ + P *p = (P *) p_; + if (p->bufw >= p->buf + BUFSZ) + myflush(p); + *p->bufw++ = c; +} + +static void mycleanup(printer *p_) +{ + P *p = (P *) p_; + myflush(p); +} + +printer *X(mkprinter_file)(FILE *f) +{ + P *p = (P *) X(mkprinter)(sizeof(P), myputchr, mycleanup); + p->f = f; + p->bufw = p->buf; + return &p->super; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/mkprinter-str.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/mkprinter-str.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +typedef struct { + printer super; + int *cnt; +} P_cnt; + +static void putchr_cnt(printer * p_, char c) +{ + P_cnt *p = (P_cnt *) p_; + UNUSED(c); + ++*p->cnt; +} + +printer *X(mkprinter_cnt)(int *cnt) +{ + P_cnt *p = (P_cnt *) X(mkprinter)(sizeof(P_cnt), putchr_cnt, 0); + p->cnt = cnt; + *cnt = 0; + return &p->super; +} + +typedef struct { + printer super; + char *s; +} P_str; + +static void putchr_str(printer * p_, char c) +{ + P_str *p = (P_str *) p_; + *p->s++ = c; + *p->s = 0; +} + +printer *X(mkprinter_str)(char *s) +{ + P_str *p = (P_str *) X(mkprinter)(sizeof(P_str), putchr_str, 0); + p->s = s; + *s = 0; + return &p->super; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/mktensor-iodims.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/mktensor-iodims.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +#include "guru.h" +#include "mktensor-iodims.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/mktensor-iodims.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/mktensor-iodims.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +tensor *MKTENSOR_IODIMS(int rank, const IODIM *dims, int is, int os) +{ + int i; + tensor *x = X(mktensor)(rank); + + if (FINITE_RNK(rank)) { + for (i = 0; i < rank; ++i) { + x->dims[i].n = dims[i].n; + x->dims[i].is = dims[i].is * is; + x->dims[i].os = dims[i].os * os; + } + } + return x; +} + +static int iodims_kosherp(int rank, const IODIM *dims, int allow_minfty) +{ + int i; + + if (rank < 0) return 0; + + if (allow_minfty) { + if (!FINITE_RNK(rank)) return 1; + for (i = 0; i < rank; ++i) + if (dims[i].n < 0) return 0; + } else { + if (!FINITE_RNK(rank)) return 0; + for (i = 0; i < rank; ++i) + if (dims[i].n <= 0) return 0; + } + + return 1; +} + +int GURU_KOSHERP(int rank, const IODIM *dims, + int howmany_rank, const IODIM *howmany_dims) +{ + return (iodims_kosherp(rank, dims, 0) && + iodims_kosherp(howmany_rank, howmany_dims, 1)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/mktensor-iodims64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/mktensor-iodims64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +#include "guru64.h" +#include "mktensor-iodims.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/mktensor-rowmajor.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/mktensor-rowmajor.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +tensor *X(mktensor_rowmajor)(int rnk, const int *n, + const int *niphys, const int *nophys, + int is, int os) +{ + tensor *x = X(mktensor)(rnk); + + if (FINITE_RNK(rnk) && rnk > 0) { + int i; + + A(n && niphys && nophys); + x->dims[rnk - 1].is = is; + x->dims[rnk - 1].os = os; + x->dims[rnk - 1].n = n[rnk - 1]; + for (i = rnk - 1; i > 0; --i) { + x->dims[i - 1].is = x->dims[i].is * niphys[i]; + x->dims[i - 1].os = x->dims[i].os * nophys[i]; + x->dims[i - 1].n = n[i - 1]; + } + } + return x; +} + +static int rowmajor_kosherp(int rnk, const int *n) +{ + int i; + + if (!FINITE_RNK(rnk)) return 0; + if (rnk < 0) return 0; + + for (i = 0; i < rnk; ++i) + if (n[i] <= 0) return 0; + + return 1; +} + +int X(many_kosherp)(int rnk, const int *n, int howmany) +{ + return (howmany >= 0) && rowmajor_kosherp(rnk, n); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-dft-1d.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-dft-1d.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "dft.h" + +X(plan) X(plan_dft_1d)(int n, C *in, C *out, int sign, unsigned flags) +{ + return X(plan_dft)(1, &n, in, out, sign, flags); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-dft-2d.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-dft-2d.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "dft.h" + +X(plan) X(plan_dft_2d)(int nx, int ny, C *in, C *out, int sign, unsigned flags) +{ + int n[2]; + n[0] = nx; + n[1] = ny; + return X(plan_dft)(2, n, in, out, sign, flags); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-dft-3d.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-dft-3d.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "dft.h" + +X(plan) X(plan_dft_3d)(int nx, int ny, int nz, + C *in, C *out, int sign, unsigned flags) +{ + int n[3]; + n[0] = nx; + n[1] = ny; + n[2] = nz; + return X(plan_dft)(3, n, in, out, sign, flags); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-dft-c2r-1d.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-dft-c2r-1d.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +X(plan) X(plan_dft_c2r_1d)(int n, C *in, R *out, unsigned flags) +{ + return X(plan_dft_c2r)(1, &n, in, out, flags); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-dft-c2r-2d.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-dft-c2r-2d.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +X(plan) X(plan_dft_c2r_2d)(int nx, int ny, C *in, R *out, unsigned flags) +{ + int n[2]; + n[0] = nx; + n[1] = ny; + return X(plan_dft_c2r)(2, n, in, out, flags); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-dft-c2r-3d.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-dft-c2r-3d.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +X(plan) X(plan_dft_c2r_3d)(int nx, int ny, int nz, + C *in, R *out, unsigned flags) +{ + int n[3]; + n[0] = nx; + n[1] = ny; + n[2] = nz; + return X(plan_dft_c2r)(3, n, in, out, flags); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-dft-c2r.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-dft-c2r.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +X(plan) X(plan_dft_c2r)(int rank, const int *n, C *in, R *out, unsigned flags) +{ + return X(plan_many_dft_c2r)(rank, n, 1, + in, 0, 1, 1, out, 0, 1, 1, flags); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-dft-r2c-1d.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-dft-r2c-1d.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +X(plan) X(plan_dft_r2c_1d)(int n, R *in, C *out, unsigned flags) +{ + return X(plan_dft_r2c)(1, &n, in, out, flags); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-dft-r2c-2d.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-dft-r2c-2d.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +X(plan) X(plan_dft_r2c_2d)(int nx, int ny, R *in, C *out, unsigned flags) +{ + int n[2]; + n[0] = nx; + n[1] = ny; + return X(plan_dft_r2c)(2, n, in, out, flags); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-dft-r2c-3d.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-dft-r2c-3d.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +X(plan) X(plan_dft_r2c_3d)(int nx, int ny, int nz, + R *in, C *out, unsigned flags) +{ + int n[3]; + n[0] = nx; + n[1] = ny; + n[2] = nz; + return X(plan_dft_r2c)(3, n, in, out, flags); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-dft-r2c.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-dft-r2c.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +X(plan) X(plan_dft_r2c)(int rank, const int *n, R *in, C *out, unsigned flags) +{ + return X(plan_many_dft_r2c)(rank, n, 1, + in, 0, 1, 1, + out, 0, 1, 1, + flags); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-dft.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-dft.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +X(plan) X(plan_dft)(int rank, const int *n, + C *in, C *out, int sign, unsigned flags) +{ + return X(plan_many_dft)(rank, n, 1, + in, 0, 1, 1, + out, 0, 1, 1, + sign, flags); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru-dft-c2r.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru-dft-c2r.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +#include "guru.h" +#include "plan-guru-dft-c2r.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru-dft-c2r.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru-dft-c2r.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "rdft.h" + +X(plan) XGURU(dft_c2r)(int rank, const IODIM *dims, + int howmany_rank, const IODIM *howmany_dims, + C *in, R *out, unsigned flags) +{ + R *ri, *ii; + + if (!GURU_KOSHERP(rank, dims, howmany_rank, howmany_dims)) return 0; + + EXTRACT_REIM(FFT_SIGN, in, &ri, &ii); + + if (out != ri) + flags |= FFTW_DESTROY_INPUT; + return X(mkapiplan)( + 0, flags, + X(mkproblem_rdft2_d_3pointers)( + MKTENSOR_IODIMS(rank, dims, 2, 1), + MKTENSOR_IODIMS(howmany_rank, howmany_dims, 2, 1), + TAINT_UNALIGNED(out, flags), + TAINT_UNALIGNED(ri, flags), + TAINT_UNALIGNED(ii, flags), HC2R)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru-dft-r2c.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru-dft-r2c.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +#include "guru.h" +#include "plan-guru-dft-r2c.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru-dft-r2c.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru-dft-r2c.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "rdft.h" + +X(plan) XGURU(dft_r2c)(int rank, const IODIM *dims, + int howmany_rank, + const IODIM *howmany_dims, + R *in, C *out, unsigned flags) +{ + R *ro, *io; + + if (!GURU_KOSHERP(rank, dims, howmany_rank, howmany_dims)) return 0; + + EXTRACT_REIM(FFT_SIGN, out, &ro, &io); + + return X(mkapiplan)( + 0, flags, + X(mkproblem_rdft2_d_3pointers)( + MKTENSOR_IODIMS(rank, dims, 1, 2), + MKTENSOR_IODIMS(howmany_rank, howmany_dims, 1, 2), + TAINT_UNALIGNED(in, flags), + TAINT_UNALIGNED(ro, flags), + TAINT_UNALIGNED(io, flags), R2HC)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru-dft.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru-dft.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +#include "guru.h" +#include "plan-guru-dft.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru-dft.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru-dft.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "dft.h" + +X(plan) XGURU(dft)(int rank, const IODIM *dims, + int howmany_rank, const IODIM *howmany_dims, + C *in, C *out, int sign, unsigned flags) +{ + R *ri, *ii, *ro, *io; + + if (!GURU_KOSHERP(rank, dims, howmany_rank, howmany_dims)) return 0; + + EXTRACT_REIM(sign, in, &ri, &ii); + EXTRACT_REIM(sign, out, &ro, &io); + + return X(mkapiplan)( + sign, flags, + X(mkproblem_dft_d)(MKTENSOR_IODIMS(rank, dims, 2, 2), + MKTENSOR_IODIMS(howmany_rank, howmany_dims, + 2, 2), + TAINT_UNALIGNED(ri, flags), + TAINT_UNALIGNED(ii, flags), + TAINT_UNALIGNED(ro, flags), + TAINT_UNALIGNED(io, flags))); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru-r2r.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru-r2r.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +#include "guru.h" +#include "plan-guru-r2r.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru-r2r.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru-r2r.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "rdft.h" + +X(plan) XGURU(r2r)(int rank, const IODIM *dims, + int howmany_rank, + const IODIM *howmany_dims, + R *in, R *out, + const X(r2r_kind) * kind, unsigned flags) +{ + X(plan) p; + rdft_kind *k; + + if (!GURU_KOSHERP(rank, dims, howmany_rank, howmany_dims)) return 0; + + k = X(map_r2r_kind)(rank, kind); + p = X(mkapiplan)( + 0, flags, + X(mkproblem_rdft_d)(MKTENSOR_IODIMS(rank, dims, 1, 1), + MKTENSOR_IODIMS(howmany_rank, howmany_dims, + 1, 1), + TAINT_UNALIGNED(in, flags), + TAINT_UNALIGNED(out, flags), k)); + X(ifree0)(k); + return p; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru-split-dft-c2r.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru-split-dft-c2r.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +#include "guru.h" +#include "plan-guru-split-dft-c2r.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru-split-dft-c2r.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru-split-dft-c2r.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "rdft.h" + +X(plan) XGURU(split_dft_c2r)(int rank, const IODIM *dims, + int howmany_rank, const IODIM *howmany_dims, + R *ri, R *ii, R *out, unsigned flags) +{ + if (!GURU_KOSHERP(rank, dims, howmany_rank, howmany_dims)) return 0; + + if (out != ri) + flags |= FFTW_DESTROY_INPUT; + return X(mkapiplan)( + 0, flags, + X(mkproblem_rdft2_d_3pointers)( + MKTENSOR_IODIMS(rank, dims, 1, 1), + MKTENSOR_IODIMS(howmany_rank, howmany_dims, 1, 1), + TAINT_UNALIGNED(out, flags), + TAINT_UNALIGNED(ri, flags), + TAINT_UNALIGNED(ii, flags), HC2R)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru-split-dft-r2c.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru-split-dft-r2c.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +#include "guru.h" +#include "plan-guru-split-dft-r2c.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru-split-dft-r2c.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru-split-dft-r2c.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "rdft.h" + +X(plan) XGURU(split_dft_r2c)(int rank, const IODIM *dims, + int howmany_rank, + const IODIM *howmany_dims, + R *in, R *ro, R *io, unsigned flags) +{ + if (!GURU_KOSHERP(rank, dims, howmany_rank, howmany_dims)) return 0; + + return X(mkapiplan)( + 0, flags, + X(mkproblem_rdft2_d_3pointers)( + MKTENSOR_IODIMS(rank, dims, 1, 1), + MKTENSOR_IODIMS(howmany_rank, howmany_dims, 1, 1), + TAINT_UNALIGNED(in, flags), + TAINT_UNALIGNED(ro, flags), + TAINT_UNALIGNED(io, flags), R2HC)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru-split-dft.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru-split-dft.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +#include "guru.h" +#include "plan-guru-split-dft.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru-split-dft.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru-split-dft.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "dft.h" + +X(plan) XGURU(split_dft)(int rank, const IODIM *dims, + int howmany_rank, const IODIM *howmany_dims, + R *ri, R *ii, R *ro, R *io, unsigned flags) +{ + if (!GURU_KOSHERP(rank, dims, howmany_rank, howmany_dims)) return 0; + + return X(mkapiplan)( + ii - ri == 1 && io - ro == 1 ? FFT_SIGN : -FFT_SIGN, flags, + X(mkproblem_dft_d)(MKTENSOR_IODIMS(rank, dims, 1, 1), + MKTENSOR_IODIMS(howmany_rank, howmany_dims, + 1, 1), + TAINT_UNALIGNED(ri, flags), + TAINT_UNALIGNED(ii, flags), + TAINT_UNALIGNED(ro, flags), + TAINT_UNALIGNED(io, flags))); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru64-dft-c2r.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru64-dft-c2r.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +#include "guru64.h" +#include "plan-guru-dft-c2r.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru64-dft-r2c.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru64-dft-r2c.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +#include "guru64.h" +#include "plan-guru-dft-r2c.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru64-dft.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru64-dft.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +#include "guru64.h" +#include "plan-guru-dft.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru64-r2r.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru64-r2r.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +#include "guru64.h" +#include "plan-guru-r2r.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru64-split-dft-c2r.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru64-split-dft-c2r.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +#include "guru64.h" +#include "plan-guru-split-dft-c2r.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru64-split-dft-r2c.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru64-split-dft-r2c.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +#include "guru64.h" +#include "plan-guru-split-dft-r2c.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-guru64-split-dft.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-guru64-split-dft.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +#include "guru64.h" +#include "plan-guru-split-dft.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-many-dft-c2r.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-many-dft-c2r.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "rdft.h" + +X(plan) X(plan_many_dft_c2r)(int rank, const int *n, + int howmany, + C *in, const int *inembed, + int istride, int idist, + R *out, const int *onembed, + int ostride, int odist, unsigned flags) +{ + R *ri, *ii; + int *nfi, *nfo; + int inplace; + X(plan) p; + + if (!X(many_kosherp)(rank, n, howmany)) return 0; + + EXTRACT_REIM(FFT_SIGN, in, &ri, &ii); + inplace = out == ri; + + if (!inplace) + flags |= FFTW_DESTROY_INPUT; + p = X(mkapiplan)( + 0, flags, + X(mkproblem_rdft2_d_3pointers)( + X(mktensor_rowmajor)( + rank, n, + X(rdft2_pad)(rank, n, inembed, inplace, 1, &nfi), + X(rdft2_pad)(rank, n, onembed, inplace, 0, &nfo), + 2 * istride, ostride), + X(mktensor_1d)(howmany, 2 * idist, odist), + TAINT_UNALIGNED(out, flags), + TAINT_UNALIGNED(ri, flags), TAINT_UNALIGNED(ii, flags), + HC2R)); + + X(ifree0)(nfi); + X(ifree0)(nfo); + return p; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-many-dft-r2c.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-many-dft-r2c.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "rdft.h" + +X(plan) X(plan_many_dft_r2c)(int rank, const int *n, + int howmany, + R *in, const int *inembed, + int istride, int idist, + C *out, const int *onembed, + int ostride, int odist, unsigned flags) +{ + R *ro, *io; + int *nfi, *nfo; + int inplace; + X(plan) p; + + if (!X(many_kosherp)(rank, n, howmany)) return 0; + + EXTRACT_REIM(FFT_SIGN, out, &ro, &io); + inplace = in == ro; + + p = X(mkapiplan)( + 0, flags, + X(mkproblem_rdft2_d_3pointers)( + X(mktensor_rowmajor)( + rank, n, + X(rdft2_pad)(rank, n, inembed, inplace, 0, &nfi), + X(rdft2_pad)(rank, n, onembed, inplace, 1, &nfo), + istride, 2 * ostride), + X(mktensor_1d)(howmany, idist, 2 * odist), + TAINT_UNALIGNED(in, flags), + TAINT_UNALIGNED(ro, flags), TAINT_UNALIGNED(io, flags), + R2HC)); + + X(ifree0)(nfi); + X(ifree0)(nfo); + return p; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-many-dft.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-many-dft.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "dft.h" + +#define N0(nembed)((nembed) ? (nembed) : n) + +X(plan) X(plan_many_dft)(int rank, const int *n, + int howmany, + C *in, const int *inembed, + int istride, int idist, + C *out, const int *onembed, + int ostride, int odist, int sign, unsigned flags) +{ + R *ri, *ii, *ro, *io; + + if (!X(many_kosherp)(rank, n, howmany)) return 0; + + EXTRACT_REIM(sign, in, &ri, &ii); + EXTRACT_REIM(sign, out, &ro, &io); + + return + X(mkapiplan)(sign, flags, + X(mkproblem_dft_d)( + X(mktensor_rowmajor)(rank, n, + N0(inembed), N0(onembed), + 2 * istride, 2 * ostride), + X(mktensor_1d)(howmany, 2 * idist, 2 * odist), + TAINT_UNALIGNED(ri, flags), + TAINT_UNALIGNED(ii, flags), + TAINT_UNALIGNED(ro, flags), + TAINT_UNALIGNED(io, flags))); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-many-r2r.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-many-r2r.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "rdft.h" + +#define N0(nembed)((nembed) ? (nembed) : n) + +X(plan) X(plan_many_r2r)(int rank, const int *n, + int howmany, + R *in, const int *inembed, + int istride, int idist, + R *out, const int *onembed, + int ostride, int odist, + const X(r2r_kind) * kind, unsigned flags) +{ + X(plan) p; + rdft_kind *k; + + if (!X(many_kosherp)(rank, n, howmany)) return 0; + + k = X(map_r2r_kind)(rank, kind); + p = X(mkapiplan)( + 0, flags, + X(mkproblem_rdft_d)(X(mktensor_rowmajor)(rank, n, + N0(inembed), N0(onembed), + istride, ostride), + X(mktensor_1d)(howmany, idist, odist), + TAINT_UNALIGNED(in, flags), + TAINT_UNALIGNED(out, flags), k)); + X(ifree0)(k); + return p; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-r2r-1d.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-r2r-1d.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +X(plan) X(plan_r2r_1d)(int n, R *in, R *out, X(r2r_kind) kind, unsigned flags) +{ + return X(plan_r2r)(1, &n, in, out, &kind, flags); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-r2r-2d.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-r2r-2d.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +X(plan) X(plan_r2r_2d)(int nx, int ny, R *in, R *out, + X(r2r_kind) kindx, X(r2r_kind) kindy, unsigned flags) +{ + int n[2]; + X(r2r_kind) kind[2]; + n[0] = nx; + n[1] = ny; + kind[0] = kindx; + kind[1] = kindy; + return X(plan_r2r)(2, n, in, out, kind, flags); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-r2r-3d.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-r2r-3d.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +X(plan) X(plan_r2r_3d)(int nx, int ny, int nz, + R *in, R *out, X(r2r_kind) kindx, + X(r2r_kind) kindy, X(r2r_kind) kindz, unsigned flags) +{ + int n[3]; + X(r2r_kind) kind[3]; + n[0] = nx; + n[1] = ny; + n[2] = nz; + kind[0] = kindx; + kind[1] = kindy; + kind[2] = kindz; + return X(plan_r2r)(3, n, in, out, kind, flags); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/plan-r2r.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/plan-r2r.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +X(plan) X(plan_r2r)(int rank, const int *n, R *in, R *out, + const X(r2r_kind) * kind, unsigned flags) +{ + return X(plan_many_r2r)(rank, n, 1, in, 0, 1, 1, out, 0, 1, 1, kind, + flags); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/print-plan.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/print-plan.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +char *X(sprint_plan)(const X(plan) p) +{ + int cnt; + char *s; + plan *pln = p->pln; + + printer *pr = X(mkprinter_cnt)(&cnt); + pln->adt->print(pln, pr); + X(printer_destroy)(pr); + + s = (char *) malloc(sizeof(char) * (cnt + 1)); + if (s) { + pr = X(mkprinter_str)(s); + pln->adt->print(pln, pr); + X(printer_destroy)(pr); + } + return s; +} + +void X(fprint_plan)(const X(plan) p, FILE *output_file) +{ + printer *pr = X(mkprinter_file)(output_file); + plan *pln = p->pln; + pln->adt->print(pln, pr); + X(printer_destroy)(pr); +} + +void X(print_plan)(const X(plan) p) +{ + X(fprint_plan)(p, stdout); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/rdft2-pad.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/rdft2-pad.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include "api.h" + +const int *X(rdft2_pad)(int rnk, const int *n, const int *nembed, + int inplace, int cmplx, int **nfree) +{ + A(FINITE_RNK(rnk)); + *nfree = 0; + if (!nembed && rnk > 0) { + if (inplace || cmplx) { + int *np = (int *) MALLOC(sizeof(int) * rnk, PROBLEMS); + memcpy(np, n, sizeof(int) * rnk); + np[rnk - 1] = (n[rnk - 1] / 2 + 1) * (1 + !cmplx); + nembed = *nfree = np; + } else + nembed = n; + } + return nembed; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/the-planner.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/the-planner.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +static planner *plnr = 0; + +/* create the planner for the rest of the API */ +planner *X(the_planner)(void) +{ + if (!plnr) { + plnr = X(mkplanner)(); + X(configure_planner)(plnr); + } + + return plnr; +} + +void X(cleanup)(void) +{ + if (plnr) { + X(planner_destroy)(plnr); + plnr = 0; + } +} + +void X(set_timelimit)(double tlim) +{ + /* PLNR is not necessarily initialized when this function is + called, so use X(the_planner)() */ + X(the_planner)()->timelimit = tlim; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/version.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/version.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "api.h" + +const char X(cc)[] = FFTW_CC; + +/* fftw <= 3.2.2 had special compiler flags for codelets, which are + not used anymore. We keep this variable around because it is part + of the ABI */ +const char X(codelet_optim)[] = ""; + +const char X(version)[] = PACKAGE "-" PACKAGE_VERSION + +#if HAVE_FMA + "-fma" +#endif + +#if HAVE_SSE2 + "-sse2" +#endif + +#if HAVE_AVX + "-avx" +#endif + +#if HAVE_ALTIVEC + "-altivec" +#endif + +#if HAVE_NEON + "-neon" +#endif + +; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/api/x77.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/api/x77.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* Fortran-like (e.g. as in BLAS) type prefixes for F77 interface */ +#if defined(FFTW_SINGLE) +# define x77(name) CONCAT(sfftw_, name) +# define X77(NAME) CONCAT(SFFTW_, NAME) +#elif defined(FFTW_LDOUBLE) +/* FIXME: what is best? BLAS uses D..._X, apparently. Ugh. */ +# define x77(name) CONCAT(lfftw_, name) +# define X77(NAME) CONCAT(LFFTW_, NAME) +#elif defined(FFTW_QUAD) +# define x77(name) CONCAT(qfftw_, name) +# define X77(NAME) CONCAT(QFFTW_, NAME) +#else +# define x77(name) CONCAT(dfftw_, name) +# define X77(NAME) CONCAT(DFFTW_, NAME) +#endif + +/* If F77_FUNC is not defined and the user didn't explicitly specify + --disable-fortran, then make our best guess at default wrappers + (since F77_FUNC_EQUIV should not be defined in this case, we + will use both double-underscored g77 wrappers and single- or + non-underscored wrappers). This saves us from dealing with + complaints in the cases where the user failed to specify + an F77 compiler or wrapper detection failed for some reason. */ +#if !defined(F77_FUNC) && !defined(DISABLE_FORTRAN) +# if (defined(_WIN32) || defined(__WIN32__)) && !defined(WINDOWS_F77_MANGLING) +# define WINDOWS_F77_MANGLING 1 +# endif +# if defined(_AIX) || defined(__hpux) || defined(hpux) +# define F77_FUNC(a, A) a +# elif defined(CRAY) || defined(_CRAY) || defined(_UNICOS) +# define F77_FUNC(a, A) A +# else +# define F77_FUNC(a, A) a ## _ +# endif +# define F77_FUNC_(a, A) a ## __ +#endif + +#if defined(WITH_G77_WRAPPERS) && !defined(DISABLE_FORTRAN) +# undef F77_FUNC_ +# define F77_FUNC_(a, A) a ## __ +# undef F77_FUNC_EQUIV +#endif + +/* annoying Windows syntax for shared-library declarations */ +#if defined(FFTW_DLL) && (defined(_WIN32) || defined(__WIN32__)) +# define FFTW_VOIDFUNC __declspec(dllexport) void +#else +# define FFTW_VOIDFUNC void +#endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/bootstrap.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/bootstrap.sh Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,29 @@ +#! /bin/sh +############################################################################ +# +# NOTE: If you just want to build FFTW, do not use this file. Just use +# the ordinary ./configure && make commmands as described in the installation +# section of the manual. +# +# This file is only for users that want to generate their own codelets, +# as described in the "generating your own code" section of the manual. +# +############################################################################ + +touch ChangeLog + +echo "PLEASE IGNORE WARNINGS AND ERRORS" + +# paranoia: sometimes autoconf doesn't get things right the first time +rm -rf autom4te.cache +autoreconf --verbose --install --symlink --force +autoreconf --verbose --install --symlink --force +autoreconf --verbose --install --symlink --force + +rm -f config.cache + +# --enable-maintainer-mode enables build of genfft and automatic +# rebuild of codelets whenever genfft changes +( + ./configure --disable-shared --enable-maintainer-mode --enable-threads $* +) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/compile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/compile Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,347 @@ +#! /bin/sh +# Wrapper for compilers which do not understand '-c -o'. + +scriptversion=2012-10-14.11; # UTC + +# Copyright (C) 1999-2013 Free Software Foundation, Inc. +# Written by Tom Tromey . +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + +nl=' +' + +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent tools from complaining about whitespace usage. +IFS=" "" $nl" + +file_conv= + +# func_file_conv build_file lazy +# Convert a $build file to $host form and store it in $file +# Currently only supports Windows hosts. If the determined conversion +# type is listed in (the comma separated) LAZY, no conversion will +# take place. +func_file_conv () +{ + file=$1 + case $file in + / | /[!/]*) # absolute file, and not a UNC file + if test -z "$file_conv"; then + # lazily determine how to convert abs files + case `uname -s` in + MINGW*) + file_conv=mingw + ;; + CYGWIN*) + file_conv=cygwin + ;; + *) + file_conv=wine + ;; + esac + fi + case $file_conv/,$2, in + *,$file_conv,*) + ;; + mingw/*) + file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` + ;; + cygwin/*) + file=`cygpath -m "$file" || echo "$file"` + ;; + wine/*) + file=`winepath -w "$file" || echo "$file"` + ;; + esac + ;; + esac +} + +# func_cl_dashL linkdir +# Make cl look for libraries in LINKDIR +func_cl_dashL () +{ + func_file_conv "$1" + if test -z "$lib_path"; then + lib_path=$file + else + lib_path="$lib_path;$file" + fi + linker_opts="$linker_opts -LIBPATH:$file" +} + +# func_cl_dashl library +# Do a library search-path lookup for cl +func_cl_dashl () +{ + lib=$1 + found=no + save_IFS=$IFS + IFS=';' + for dir in $lib_path $LIB + do + IFS=$save_IFS + if $shared && test -f "$dir/$lib.dll.lib"; then + found=yes + lib=$dir/$lib.dll.lib + break + fi + if test -f "$dir/$lib.lib"; then + found=yes + lib=$dir/$lib.lib + break + fi + if test -f "$dir/lib$lib.a"; then + found=yes + lib=$dir/lib$lib.a + break + fi + done + IFS=$save_IFS + + if test "$found" != yes; then + lib=$lib.lib + fi +} + +# func_cl_wrapper cl arg... +# Adjust compile command to suit cl +func_cl_wrapper () +{ + # Assume a capable shell + lib_path= + shared=: + linker_opts= + for arg + do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + eat=1 + case $2 in + *.o | *.[oO][bB][jJ]) + func_file_conv "$2" + set x "$@" -Fo"$file" + shift + ;; + *) + func_file_conv "$2" + set x "$@" -Fe"$file" + shift + ;; + esac + ;; + -I) + eat=1 + func_file_conv "$2" mingw + set x "$@" -I"$file" + shift + ;; + -I*) + func_file_conv "${1#-I}" mingw + set x "$@" -I"$file" + shift + ;; + -l) + eat=1 + func_cl_dashl "$2" + set x "$@" "$lib" + shift + ;; + -l*) + func_cl_dashl "${1#-l}" + set x "$@" "$lib" + shift + ;; + -L) + eat=1 + func_cl_dashL "$2" + ;; + -L*) + func_cl_dashL "${1#-L}" + ;; + -static) + shared=false + ;; + -Wl,*) + arg=${1#-Wl,} + save_ifs="$IFS"; IFS=',' + for flag in $arg; do + IFS="$save_ifs" + linker_opts="$linker_opts $flag" + done + IFS="$save_ifs" + ;; + -Xlinker) + eat=1 + linker_opts="$linker_opts $2" + ;; + -*) + set x "$@" "$1" + shift + ;; + *.cc | *.CC | *.cxx | *.CXX | *.[cC]++) + func_file_conv "$1" + set x "$@" -Tp"$file" + shift + ;; + *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO]) + func_file_conv "$1" mingw + set x "$@" "$file" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift + done + if test -n "$linker_opts"; then + linker_opts="-link$linker_opts" + fi + exec "$@" $linker_opts + exit 1 +} + +eat= + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: compile [--help] [--version] PROGRAM [ARGS] + +Wrapper for compilers which do not understand '-c -o'. +Remove '-o dest.o' from ARGS, run PROGRAM with the remaining +arguments, and rename the output as expected. + +If you are trying to build a whole package this is not the +right script to run: please start by reading the file 'INSTALL'. + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "compile $scriptversion" + exit $? + ;; + cl | *[/\\]cl | cl.exe | *[/\\]cl.exe ) + func_cl_wrapper "$@" # Doesn't return... + ;; +esac + +ofile= +cfile= + +for arg +do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + # So we strip '-o arg' only if arg is an object. + eat=1 + case $2 in + *.o | *.obj) + ofile=$2 + ;; + *) + set x "$@" -o "$2" + shift + ;; + esac + ;; + *.c) + cfile=$1 + set x "$@" "$1" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift +done + +if test -z "$ofile" || test -z "$cfile"; then + # If no '-o' option was seen then we might have been invoked from a + # pattern rule where we don't need one. That is ok -- this is a + # normal compilation that the losing compiler can handle. If no + # '.c' file was seen then we are probably linking. That is also + # ok. + exec "$@" +fi + +# Name of file we expect compiler to create. +cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'` + +# Create the lock directory. +# Note: use '[/\\:.-]' here to ensure that we don't use the same name +# that we are using for the .o file. Also, base the name on the expected +# object file name, since that is what matters with a parallel build. +lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d +while true; do + if mkdir "$lockdir" >/dev/null 2>&1; then + break + fi + sleep 1 +done +# FIXME: race condition here if user kills between mkdir and trap. +trap "rmdir '$lockdir'; exit 1" 1 2 15 + +# Run the compile. +"$@" +ret=$? + +if test -f "$cofile"; then + test "$cofile" = "$ofile" || mv "$cofile" "$ofile" +elif test -f "${cofile}bj"; then + test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile" +fi + +rmdir "$lockdir" +exit $ret + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/config.guess --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/config.guess Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1558 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright 1992-2013 Free Software Foundation, Inc. + +timestamp='2013-06-10' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). +# +# Originally written by Per Bothner. +# +# You can get the latest version of this script from: +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD +# +# Please send patches with a ChangeLog entry to config-patches@gnu.org. + + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright 1992-2013 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +trap 'exit 1' 1 2 15 + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +set_cc_for_build=' +trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; +trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; +: ${TMPDIR=/tmp} ; + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; +dummy=$tmp/dummy ; +tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; +case $CC_FOR_BUILD,$HOST_CC,$CC in + ,,) echo "int x;" > $dummy.c ; + for c in cc gcc c89 c99 ; do + if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then + CC_FOR_BUILD="$c"; break ; + fi ; + done ; + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found ; + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; +esac ; set_cc_for_build= ;' + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if (test -f /.attbin/uname) >/dev/null 2>&1 ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +case "${UNAME_SYSTEM}" in +Linux|GNU|GNU/*) + # If the system lacks a compiler, then just pick glibc. + # We could probably try harder. + LIBC=gnu + + eval $set_cc_for_build + cat <<-EOF > $dummy.c + #include + #if defined(__UCLIBC__) + LIBC=uclibc + #elif defined(__dietlibc__) + LIBC=dietlibc + #else + LIBC=gnu + #endif + EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` + ;; +esac + +# Note: order is significant - the case branches are not exclusive. + +case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + sysctl="sysctl -n hw.machine_arch" + UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ + /usr/sbin/$sysctl 2>/dev/null || echo unknown)` + case "${UNAME_MACHINE_ARCH}" in + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; + *) machine=${UNAME_MACHINE_ARCH}-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently, or will in the future. + case "${UNAME_MACHINE_ARCH}" in + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + eval $set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ELF__ + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case "${UNAME_VERSION}" in + Debian*) + release='-gnu' + ;; + *) + release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "${machine}-${os}${release}" + exit ;; + *:Bitrig:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE} + exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} + exit ;; + *:ekkoBSD:*:*) + echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} + exit ;; + *:SolidBSD:*:*) + echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} + exit ;; + macppc:MirBSD:*:*) + echo powerpc-unknown-mirbsd${UNAME_RELEASE} + exit ;; + *:MirBSD:*:*) + echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} + exit ;; + alpha:OSF1:*:*) + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case "$ALPHA_CPU_TYPE" in + "EV4 (21064)") + UNAME_MACHINE="alpha" ;; + "EV4.5 (21064)") + UNAME_MACHINE="alpha" ;; + "LCA4 (21066/21068)") + UNAME_MACHINE="alpha" ;; + "EV5 (21164)") + UNAME_MACHINE="alphaev5" ;; + "EV5.6 (21164A)") + UNAME_MACHINE="alphaev56" ;; + "EV5.6 (21164PC)") + UNAME_MACHINE="alphapca56" ;; + "EV5.7 (21164PC)") + UNAME_MACHINE="alphapca57" ;; + "EV6 (21264)") + UNAME_MACHINE="alphaev6" ;; + "EV6.7 (21264A)") + UNAME_MACHINE="alphaev67" ;; + "EV6.8CB (21264C)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8AL (21264B)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8CX (21264D)") + UNAME_MACHINE="alphaev68" ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE="alphaev69" ;; + "EV7 (21364)") + UNAME_MACHINE="alphaev7" ;; + "EV7.9 (21364A)") + UNAME_MACHINE="alphaev79" ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + # Reset EXIT trap before exiting to avoid spurious non-zero exit code. + exitcode=$? + trap '' 0 + exit $exitcode ;; + Alpha\ *:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # Should we change UNAME_MACHINE based on the output of uname instead + # of the specific Alpha model? + echo alpha-pc-interix + exit ;; + 21064:Windows_NT:50:3) + echo alpha-dec-winnt3.5 + exit ;; + Amiga*:UNIX_System_V:4.0:*) + echo m68k-unknown-sysv4 + exit ;; + *:[Aa]miga[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-amigaos + exit ;; + *:[Mm]orph[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-morphos + exit ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix${UNAME_RELEASE} + exit ;; + arm*:riscos:*:*|arm*:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + echo hppa1.1-hitachi-hiuxmpp + exit ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + if test "`(/bin/universe) 2>/dev/null`" = att ; then + echo pyramid-pyramid-sysv3 + else + echo pyramid-pyramid-bsd + fi + exit ;; + NILE*:*:*:dcosx) + echo pyramid-pyramid-svr4 + exit ;; + DRS?6000:unix:4.0:6*) + echo sparc-icl-nx6 + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7; exit ;; + esac ;; + s390x:SunOS:*:*) + echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) + echo i386-pc-auroraux${UNAME_RELEASE} + exit ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + eval $set_cc_for_build + SUN_ARCH="i386" + # If there is a compiler, see if it is configured for 64-bit objects. + # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. + # This test works for both compilers. + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + SUN_ARCH="x86_64" + fi + fi + echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` + exit ;; + sun3*:SunOS:*:*) + echo m68k-sun-sunos${UNAME_RELEASE} + exit ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 + case "`/bin/arch`" in + sun3) + echo m68k-sun-sunos${UNAME_RELEASE} + ;; + sun4) + echo sparc-sun-sunos${UNAME_RELEASE} + ;; + esac + exit ;; + aushp:SunOS:*:*) + echo sparc-auspex-sunos${UNAME_RELEASE} + exit ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint${UNAME_RELEASE} + exit ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint${UNAME_RELEASE} + exit ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint${UNAME_RELEASE} + exit ;; + m68k:machten:*:*) + echo m68k-apple-machten${UNAME_RELEASE} + exit ;; + powerpc:machten:*:*) + echo powerpc-apple-machten${UNAME_RELEASE} + exit ;; + RISC*:Mach:*:*) + echo mips-dec-mach_bsd4.3 + exit ;; + RISC*:ULTRIX:*:*) + echo mips-dec-ultrix${UNAME_RELEASE} + exit ;; + VAX*:ULTRIX*:*:*) + echo vax-dec-ultrix${UNAME_RELEASE} + exit ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + echo clipper-intergraph-clix${UNAME_RELEASE} + exit ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && + dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`$dummy $dummyarg` && + { echo "$SYSTEM_NAME"; exit; } + echo mips-mips-riscos${UNAME_RELEASE} + exit ;; + Motorola:PowerMAX_OS:*:*) + echo powerpc-motorola-powermax + exit ;; + Motorola:*:4.3:PL8-*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:Power_UNIX:*:*) + echo powerpc-harris-powerunix + exit ;; + m88k:CX/UX:7*:*) + echo m88k-harris-cxux7 + exit ;; + m88k:*:4*:R4*) + echo m88k-motorola-sysv4 + exit ;; + m88k:*:3*:R3*) + echo m88k-motorola-sysv3 + exit ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] + then + if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ + [ ${TARGET_BINARY_INTERFACE}x = x ] + then + echo m88k-dg-dgux${UNAME_RELEASE} + else + echo m88k-dg-dguxbcs${UNAME_RELEASE} + fi + else + echo i586-dg-dgux${UNAME_RELEASE} + fi + exit ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + echo m88k-motorola-sysv3 + exit ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + echo m88k-tektronix-sysv3 + exit ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + echo m68k-tektronix-bsd + exit ;; + *:IRIX*:*:*) + echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` + exit ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + echo i386-ibm-aix + exit ;; + ia64:AIX:*:*) + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} + exit ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + echo rs6000-ibm-aix3.2.4 + else + echo rs6000-ibm-aix3.2 + fi + exit ;; + *:AIX:*:[4567]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${IBM_ARCH}-ibm-aix${IBM_REV} + exit ;; + *:AIX:*:*) + echo rs6000-ibm-aix + exit ;; + ibmrt:4.4BSD:*|romp-ibm:BSD:*) + echo romp-ibm-bsd4.4 + exit ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to + exit ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + echo rs6000-bull-bosx + exit ;; + DPX/2?00:B.O.S.:*:*) + echo m68k-bull-sysv3 + exit ;; + 9000/[34]??:4.3bsd:1.*:*) + echo m68k-hp-bsd + exit ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + echo m68k-hp-bsd4.4 + exit ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + case "${UNAME_MACHINE}" in + 9000/31? ) HP_ARCH=m68000 ;; + 9000/[34]?? ) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "${sc_cpu_version}" in + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "${sc_kernel_bits}" in + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; + '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 + esac ;; + esac + fi + if [ "${HP_ARCH}" = "" ]; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if [ ${HP_ARCH} = "hppa2.0w" ] + then + eval $set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + grep -q __LP64__ + then + HP_ARCH="hppa2.0w" + else + HP_ARCH="hppa64" + fi + fi + echo ${HP_ARCH}-hp-hpux${HPUX_REV} + exit ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux${HPUX_REV} + exit ;; + 3050*:HI-UX:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + echo unknown-hitachi-hiuxwe2 + exit ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) + echo hppa1.1-hp-bsd + exit ;; + 9000/8??:4.3bsd:*:*) + echo hppa1.0-hp-bsd + exit ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) + echo hppa1.1-hp-osf + exit ;; + hp8??:OSF1:*:*) + echo hppa1.0-hp-osf + exit ;; + i*86:OSF1:*:*) + if [ -x /usr/sbin/sysversion ] ; then + echo ${UNAME_MACHINE}-unknown-osf1mk + else + echo ${UNAME_MACHINE}-unknown-osf1 + fi + exit ;; + parisc*:Lites*:*:*) + echo hppa1.1-hp-lites + exit ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd + exit ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd + exit ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd + exit ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd + exit ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*[A-Z]90:*:*:*) + echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + *:UNICOS/mp:*:*) + echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} + exit ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:BSD/OS:*:*) + echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:FreeBSD:*:*) + UNAME_PROCESSOR=`/usr/bin/uname -p` + case ${UNAME_PROCESSOR} in + amd64) + echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + *) + echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + esac + exit ;; + i*:CYGWIN*:*) + echo ${UNAME_MACHINE}-pc-cygwin + exit ;; + *:MINGW64*:*) + echo ${UNAME_MACHINE}-pc-mingw64 + exit ;; + *:MINGW*:*) + echo ${UNAME_MACHINE}-pc-mingw32 + exit ;; + i*:MSYS*:*) + echo ${UNAME_MACHINE}-pc-msys + exit ;; + i*:windows32*:*) + # uname -m includes "-pc" on this system. + echo ${UNAME_MACHINE}-mingw32 + exit ;; + i*:PW*:*) + echo ${UNAME_MACHINE}-pc-pw32 + exit ;; + *:Interix*:*) + case ${UNAME_MACHINE} in + x86) + echo i586-pc-interix${UNAME_RELEASE} + exit ;; + authenticamd | genuineintel | EM64T) + echo x86_64-unknown-interix${UNAME_RELEASE} + exit ;; + IA64) + echo ia64-unknown-interix${UNAME_RELEASE} + exit ;; + esac ;; + [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) + echo i${UNAME_MACHINE}-pc-mks + exit ;; + 8664:Windows_NT:*) + echo x86_64-pc-mks + exit ;; + i*:Windows_NT*:* | Pentium*:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we + # UNAME_MACHINE based on the output of uname instead of i386? + echo i586-pc-interix + exit ;; + i*:UWIN*:*) + echo ${UNAME_MACHINE}-pc-uwin + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; + p*:CYGWIN*:*) + echo powerpcle-unknown-cygwin + exit ;; + prep*:SunOS:5.*:*) + echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + *:GNU:*:*) + # the GNU system + echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} + exit ;; + i*86:Minix:*:*) + echo ${UNAME_MACHINE}-pc-minix + exit ;; + aarch64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 + if test "$?" = 0 ; then LIBC="gnulibc1" ; fi + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + arc:Linux:*:* | arceb:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + arm*:Linux:*:*) + eval $set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + else + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi + else + echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf + fi + fi + exit ;; + avr32*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + cris:Linux:*:*) + echo ${UNAME_MACHINE}-axis-linux-${LIBC} + exit ;; + crisv32:Linux:*:*) + echo ${UNAME_MACHINE}-axis-linux-${LIBC} + exit ;; + frv:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + hexagon:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + i*86:Linux:*:*) + echo ${UNAME_MACHINE}-pc-linux-${LIBC} + exit ;; + ia64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + m32r*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + m68*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + mips:Linux:*:* | mips64:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef ${UNAME_MACHINE} + #undef ${UNAME_MACHINE}el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=${UNAME_MACHINE}el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=${UNAME_MACHINE} + #else + CPU= + #endif + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; } + ;; + or1k:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + or32:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + padre:Linux:*:*) + echo sparc-unknown-linux-${LIBC} + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-${LIBC} + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) echo hppa1.1-unknown-linux-${LIBC} ;; + PA8*) echo hppa2.0-unknown-linux-${LIBC} ;; + *) echo hppa-unknown-linux-${LIBC} ;; + esac + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-${LIBC} + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-${LIBC} + exit ;; + ppc64le:Linux:*:*) + echo powerpc64le-unknown-linux-${LIBC} + exit ;; + ppcle:Linux:*:*) + echo powerpcle-unknown-linux-${LIBC} + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo ${UNAME_MACHINE}-ibm-linux-${LIBC} + exit ;; + sh64*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + sh*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + tile*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + vax:Linux:*:*) + echo ${UNAME_MACHINE}-dec-linux-${LIBC} + exit ;; + x86_64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + xtensa*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + echo i386-sequent-sysv4 + exit ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} + exit ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + echo ${UNAME_MACHINE}-pc-os2-emx + exit ;; + i*86:XTS-300:*:STOP) + echo ${UNAME_MACHINE}-unknown-stop + exit ;; + i*86:atheos:*:*) + echo ${UNAME_MACHINE}-unknown-atheos + exit ;; + i*86:syllable:*:*) + echo ${UNAME_MACHINE}-pc-syllable + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) + echo i386-unknown-lynxos${UNAME_RELEASE} + exit ;; + i*86:*DOS:*:*) + echo ${UNAME_MACHINE}-pc-msdosdjgpp + exit ;; + i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) + UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} + else + echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} + fi + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} + exit ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + echo ${UNAME_MACHINE}-pc-sco$UNAME_REL + else + echo ${UNAME_MACHINE}-pc-sysv32 + fi + exit ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i586. + # Note: whatever this is, it MUST be the same as what config.sub + # prints for the "djgpp" host, or else GDB configury will decide that + # this is a cross-build. + echo i586-pc-msdosdjgpp + exit ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit ;; + paragon:*:*:*) + echo i860-intel-osf1 + exit ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 + fi + exit ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + echo m68010-convergent-sysv + exit ;; + mc68k:UNIX:SYSTEM5:3.51m) + echo m68k-convergent-sysv + exit ;; + M680?0:D-NIX:5.3:*) + echo m68k-diab-dnix + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + NCR*:*:4.2:* | MPRAS*:*:4.2:*) + OS_REL='.3' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + echo m68k-unknown-lynxos${UNAME_RELEASE} + exit ;; + mc68030:UNIX_System_V:4.*:*) + echo m68k-atari-sysv4 + exit ;; + TSUNAMI:LynxOS:2.*:*) + echo sparc-unknown-lynxos${UNAME_RELEASE} + exit ;; + rs6000:LynxOS:2.*:*) + echo rs6000-unknown-lynxos${UNAME_RELEASE} + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) + echo powerpc-unknown-lynxos${UNAME_RELEASE} + exit ;; + SM[BE]S:UNIX_SV:*:*) + echo mips-dde-sysv${UNAME_RELEASE} + exit ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + RM*:SINIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + echo ${UNAME_MACHINE}-sni-sysv4 + else + echo ns32k-sni-sysv + fi + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm + echo hppa1.1-stratus-sysv4 + exit ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + echo i860-stratus-sysv4 + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo ${UNAME_MACHINE}-stratus-vos + exit ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + echo hppa1.1-stratus-vos + exit ;; + mc68*:A/UX:*:*) + echo m68k-apple-aux${UNAME_RELEASE} + exit ;; + news*:NEWS-OS:6*:*) + echo mips-sony-newsos6 + exit ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then + echo mips-nec-sysv${UNAME_RELEASE} + else + echo mips-unknown-sysv${UNAME_RELEASE} + fi + exit ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit ;; + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + echo i586-pc-haiku + exit ;; + x86_64:Haiku:*:*) + echo x86_64-unknown-haiku + exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux${UNAME_RELEASE} + exit ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux${UNAME_RELEASE} + exit ;; + SX-6:SUPER-UX:*:*) + echo sx6-nec-superux${UNAME_RELEASE} + exit ;; + SX-7:SUPER-UX:*:*) + echo sx7-nec-superux${UNAME_RELEASE} + exit ;; + SX-8:SUPER-UX:*:*) + echo sx8-nec-superux${UNAME_RELEASE} + exit ;; + SX-8R:SUPER-UX:*:*) + echo sx8r-nec-superux${UNAME_RELEASE} + exit ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Rhapsody:*:*) + echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + eval $set_cc_for_build + if test "$UNAME_PROCESSOR" = unknown ; then + UNAME_PROCESSOR=powerpc + fi + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + case $UNAME_PROCESSOR in + i386) UNAME_PROCESSOR=x86_64 ;; + powerpc) UNAME_PROCESSOR=powerpc64 ;; + esac + fi + fi + echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = "x86"; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} + exit ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit ;; + NEO-?:NONSTOP_KERNEL:*:*) + echo neo-tandem-nsk${UNAME_RELEASE} + exit ;; + NSE-*:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk${UNAME_RELEASE} + exit ;; + NSR-?:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk${UNAME_RELEASE} + exit ;; + *:NonStop-UX:*:*) + echo mips-compaq-nonstopux + exit ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit ;; + DS/*:UNIX_System_V:*:*) + echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} + exit ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "$cputype" = "386"; then + UNAME_MACHINE=i386 + else + UNAME_MACHINE="$cputype" + fi + echo ${UNAME_MACHINE}-unknown-plan9 + exit ;; + *:TOPS-10:*:*) + echo pdp10-unknown-tops10 + exit ;; + *:TENEX:*:*) + echo pdp10-unknown-tenex + exit ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + echo pdp10-dec-tops20 + exit ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + echo pdp10-xkl-tops20 + exit ;; + *:TOPS-20:*:*) + echo pdp10-unknown-tops20 + exit ;; + *:ITS:*:*) + echo pdp10-unknown-its + exit ;; + SEI:*:*:SEIUX) + echo mips-sei-seiux${UNAME_RELEASE} + exit ;; + *:DragonFly:*:*) + echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "${UNAME_MACHINE}" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + exit ;; + i*86:rdos:*:*) + echo ${UNAME_MACHINE}-pc-rdos + exit ;; + i*86:AROS:*:*) + echo ${UNAME_MACHINE}-pc-aros + exit ;; + x86_64:VMkernel:*:*) + echo ${UNAME_MACHINE}-unknown-esx + exit ;; +esac + +eval $set_cc_for_build +cat >$dummy.c < +# include +#endif +main () +{ +#if defined (sony) +#if defined (MIPSEB) + /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, + I don't know.... */ + printf ("mips-sony-bsd\n"); exit (0); +#else +#include + printf ("m68k-sony-newsos%s\n", +#ifdef NEWSOS4 + "4" +#else + "" +#endif + ); exit (0); +#endif +#endif + +#if defined (__arm) && defined (__acorn) && defined (__unix) + printf ("arm-acorn-riscix\n"); exit (0); +#endif + +#if defined (hp300) && !defined (hpux) + printf ("m68k-hp-bsd\n"); exit (0); +#endif + +#if defined (NeXT) +#if !defined (__ARCHITECTURE__) +#define __ARCHITECTURE__ "m68k" +#endif + int version; + version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); + exit (0); +#endif + +#if defined (MULTIMAX) || defined (n16) +#if defined (UMAXV) + printf ("ns32k-encore-sysv\n"); exit (0); +#else +#if defined (CMU) + printf ("ns32k-encore-mach\n"); exit (0); +#else + printf ("ns32k-encore-bsd\n"); exit (0); +#endif +#endif +#endif + +#if defined (__386BSD__) + printf ("i386-pc-bsd\n"); exit (0); +#endif + +#if defined (sequent) +#if defined (i386) + printf ("i386-sequent-dynix\n"); exit (0); +#endif +#if defined (ns32000) + printf ("ns32k-sequent-dynix\n"); exit (0); +#endif +#endif + +#if defined (_SEQUENT_) + struct utsname un; + + uname(&un); + + if (strncmp(un.version, "V2", 2) == 0) { + printf ("i386-sequent-ptx2\n"); exit (0); + } + if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ + printf ("i386-sequent-ptx1\n"); exit (0); + } + printf ("i386-sequent-ptx\n"); exit (0); + +#endif + +#if defined (vax) +# if !defined (ultrix) +# include +# if defined (BSD) +# if BSD == 43 + printf ("vax-dec-bsd4.3\n"); exit (0); +# else +# if BSD == 199006 + printf ("vax-dec-bsd4.3reno\n"); exit (0); +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# endif +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# else + printf ("vax-dec-ultrix\n"); exit (0); +# endif +#endif + +#if defined (alliant) && defined (i860) + printf ("i860-alliant-bsd\n"); exit (0); +#endif + + exit (1); +} +EOF + +$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + +# Apollos put the system type in the environment. + +test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } + +# Convex versions that predate uname can use getsysinfo(1) + +if [ -x /usr/convex/getsysinfo ] +then + case `getsysinfo -f cpu_type` in + c1*) + echo c1-convex-bsd + exit ;; + c2*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + c34*) + echo c34-convex-bsd + exit ;; + c38*) + echo c38-convex-bsd + exit ;; + c4*) + echo c4-convex-bsd + exit ;; + esac +fi + +cat >&2 < in order to provide the needed +information to handle your system. + +config.guess timestamp = $timestamp + +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = ${UNAME_MACHINE} +UNAME_RELEASE = ${UNAME_RELEASE} +UNAME_SYSTEM = ${UNAME_SYSTEM} +UNAME_VERSION = ${UNAME_VERSION} +EOF + +exit 1 + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/config.h.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/config.h.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,395 @@ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to compile in long-double precision. */ +#undef BENCHFFT_LDOUBLE + +/* Define to compile in quad precision. */ +#undef BENCHFFT_QUAD + +/* Define to compile in single precision. */ +#undef BENCHFFT_SINGLE + +/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP + systems. This function is required for `alloca.c' support on those systems. + */ +#undef CRAY_STACKSEG_END + +/* Define to 1 if using `alloca.c'. */ +#undef C_ALLOCA + +/* Define to disable Fortran wrappers. */ +#undef DISABLE_FORTRAN + +/* Define to dummy `main' function (if any) required to link to the Fortran + libraries. */ +#undef F77_DUMMY_MAIN + +/* Define to a macro mangling the given C identifier (in lower and upper + case), which must not contain underscores, for linking with Fortran. */ +#undef F77_FUNC + +/* As F77_FUNC, but for C identifiers containing underscores. */ +#undef F77_FUNC_ + +/* Define if F77_FUNC and F77_FUNC_ are equivalent. */ +#undef F77_FUNC_EQUIV + +/* Define if F77 and FC dummy `main' functions are identical. */ +#undef FC_DUMMY_MAIN_EQ_F77 + +/* C compiler name and flags */ +#undef FFTW_CC + +/* Define to enable extra FFTW debugging code. */ +#undef FFTW_DEBUG + +/* Define to enable alignment debugging hacks. */ +#undef FFTW_DEBUG_ALIGNMENT + +/* Define to enable debugging malloc. */ +#undef FFTW_DEBUG_MALLOC + +/* Define to enable the use of alloca(). */ +#undef FFTW_ENABLE_ALLOCA + +/* Define to compile in long-double precision. */ +#undef FFTW_LDOUBLE + +/* Define to compile in quad precision. */ +#undef FFTW_QUAD + +/* Define to enable pseudorandom estimate planning for debugging. */ +#undef FFTW_RANDOM_ESTIMATOR + +/* Define to compile in single precision. */ +#undef FFTW_SINGLE + +/* Define to 1 if you have the `abort' function. */ +#undef HAVE_ABORT + +/* Define to 1 if you have `alloca', as a function or macro. */ +#undef HAVE_ALLOCA + +/* Define to 1 if you have and it should be used (not on Ultrix). + */ +#undef HAVE_ALLOCA_H + +/* Define to enable Altivec optimizations. */ +#undef HAVE_ALTIVEC + +/* Define to 1 if you have the header file. */ +#undef HAVE_ALTIVEC_H + +/* Define to enable AVX optimizations. */ +#undef HAVE_AVX + +/* Define to 1 if you have the `BSDgettimeofday' function. */ +#undef HAVE_BSDGETTIMEOFDAY + +/* Define to 1 if you have the `clock_gettime' function. */ +#undef HAVE_CLOCK_GETTIME + +/* Define to 1 if you have the `cosl' function. */ +#undef HAVE_COSL + +/* Define to 1 if you have the header file. */ +#undef HAVE_C_ASM_H + +/* Define to 1 if you have the declaration of `cosl', and to 0 if you don't. + */ +#undef HAVE_DECL_COSL + +/* Define to 1 if you have the declaration of `cosq', and to 0 if you don't. + */ +#undef HAVE_DECL_COSQ + +/* Define to 1 if you have the declaration of `drand48', and to 0 if you + don't. */ +#undef HAVE_DECL_DRAND48 + +/* Define to 1 if you have the declaration of `memalign', and to 0 if you + don't. */ +#undef HAVE_DECL_MEMALIGN + +/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if + you don't. */ +#undef HAVE_DECL_POSIX_MEMALIGN + +/* Define to 1 if you have the declaration of `sinl', and to 0 if you don't. + */ +#undef HAVE_DECL_SINL + +/* Define to 1 if you have the declaration of `sinq', and to 0 if you don't. + */ +#undef HAVE_DECL_SINQ + +/* Define to 1 if you have the declaration of `srand48', and to 0 if you + don't. */ +#undef HAVE_DECL_SRAND48 + +/* Define to 1 if you have the header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if you don't have `vprintf' but do have `_doprnt.' */ +#undef HAVE_DOPRNT + +/* Define to 1 if you have the `drand48' function. */ +#undef HAVE_DRAND48 + +/* Define if you have a machine with fused multiply-add */ +#undef HAVE_FMA + +/* Define to 1 if you have the `gethrtime' function. */ +#undef HAVE_GETHRTIME + +/* Define to 1 if you have the `gettimeofday' function. */ +#undef HAVE_GETTIMEOFDAY + +/* Define to 1 if hrtime_t is defined in */ +#undef HAVE_HRTIME_T + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTRINSICS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define if the isnan() function/macro is available. */ +#undef HAVE_ISNAN + +/* Define to 1 if you have the header file. */ +#undef HAVE_LIBINTL_H + +/* Define to 1 if you have the `m' library (-lm). */ +#undef HAVE_LIBM + +/* Define to 1 if you have the `quadmath' library (-lquadmath). */ +#undef HAVE_LIBQUADMATH + +/* Define to 1 if you have the header file. */ +#undef HAVE_LIMITS_H + +/* Define to 1 if the compiler supports `long double' */ +#undef HAVE_LONG_DOUBLE + +/* Define to 1 if you have the `mach_absolute_time' function. */ +#undef HAVE_MACH_ABSOLUTE_TIME + +/* Define to 1 if you have the header file. */ +#undef HAVE_MACH_MACH_TIME_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_MALLOC_H + +/* Define to 1 if you have the `memalign' function. */ +#undef HAVE_MEMALIGN + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the `memset' function. */ +#undef HAVE_MEMSET + +/* Define to enable use of MIPS ZBus cycle-counter. */ +#undef HAVE_MIPS_ZBUS_TIMER + +/* Define if you have the MPI library. */ +#undef HAVE_MPI + +/* Define to enable ARM NEON optimizations. */ +#undef HAVE_NEON + +/* Define if OpenMP is enabled */ +#undef HAVE_OPENMP + +/* Define to 1 if you have the `posix_memalign' function. */ +#undef HAVE_POSIX_MEMALIGN + +/* Define if you have POSIX threads libraries and header files. */ +#undef HAVE_PTHREAD + +/* Define to 1 if you have the `read_real_time' function. */ +#undef HAVE_READ_REAL_TIME + +/* Define to 1 if you have the `sinl' function. */ +#undef HAVE_SINL + +/* Define to 1 if you have the `snprintf' function. */ +#undef HAVE_SNPRINTF + +/* Define to 1 if you have the `sqrt' function. */ +#undef HAVE_SQRT + +/* Define to enable SSE/SSE2 optimizations. */ +#undef HAVE_SSE2 + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDDEF_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the `sysctl' function. */ +#undef HAVE_SYSCTL + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_SYSCTL_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TIME_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the `tanl' function. */ +#undef HAVE_TANL + +/* Define if we have a threads library. */ +#undef HAVE_THREADS + +/* Define to 1 if you have the `time_base_to_time' function. */ +#undef HAVE_TIME_BASE_TO_TIME + +/* Define to 1 if the system has the type `uintptr_t'. */ +#undef HAVE_UINTPTR_T + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if you have the `vprintf' function. */ +#undef HAVE_VPRINTF + +/* Define to 1 if you have the `_mm_free' function. */ +#undef HAVE__MM_FREE + +/* Define to 1 if you have the `_mm_malloc' function. */ +#undef HAVE__MM_MALLOC + +/* Define if you have the UNICOS _rtc() intrinsic. */ +#undef HAVE__RTC + +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#undef LT_OBJDIR + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +#undef PTHREAD_CREATE_JOINABLE + +/* The size of `double', as computed by sizeof. */ +#undef SIZEOF_DOUBLE + +/* The size of `fftw_r2r_kind', as computed by sizeof. */ +#undef SIZEOF_FFTW_R2R_KIND + +/* The size of `float', as computed by sizeof. */ +#undef SIZEOF_FLOAT + +/* The size of `int', as computed by sizeof. */ +#undef SIZEOF_INT + +/* The size of `long', as computed by sizeof. */ +#undef SIZEOF_LONG + +/* The size of `long long', as computed by sizeof. */ +#undef SIZEOF_LONG_LONG + +/* The size of `MPI_Fint', as computed by sizeof. */ +#undef SIZEOF_MPI_FINT + +/* The size of `ptrdiff_t', as computed by sizeof. */ +#undef SIZEOF_PTRDIFF_T + +/* The size of `size_t', as computed by sizeof. */ +#undef SIZEOF_SIZE_T + +/* The size of `unsigned int', as computed by sizeof. */ +#undef SIZEOF_UNSIGNED_INT + +/* The size of `unsigned long', as computed by sizeof. */ +#undef SIZEOF_UNSIGNED_LONG + +/* The size of `unsigned long long', as computed by sizeof. */ +#undef SIZEOF_UNSIGNED_LONG_LONG + +/* The size of `void *', as computed by sizeof. */ +#undef SIZEOF_VOID_P + +/* If using the C implementation of alloca, define if you know the + direction of stack growth for your system; otherwise it will be + automatically deduced at runtime. + STACK_DIRECTION > 0 => grows toward higher addresses + STACK_DIRECTION < 0 => grows toward lower addresses + STACK_DIRECTION = 0 => direction of growth unknown */ +#undef STACK_DIRECTION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define to 1 if you can safely include both and . */ +#undef TIME_WITH_SYS_TIME + +/* Define if we have and are using POSIX threads. */ +#undef USING_POSIX_THREADS + +/* Version number of package */ +#undef VERSION + +/* Use common Windows Fortran mangling styles for the Fortran interfaces. */ +#undef WINDOWS_F77_MANGLING + +/* Include g77-compatible wrappers in addition to any other Fortran wrappers. + */ +#undef WITH_G77_WRAPPERS + +/* Use our own aligned malloc routine; mainly helpful for Windows systems + lacking aligned allocation system-library routines. */ +#undef WITH_OUR_MALLOC + +/* Use low-precision timers, making planner very slow */ +#undef WITH_SLOW_TIMER + +/* Define to empty if `const' does not conform to ANSI C. */ +#undef const + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +#undef inline +#endif + +/* Define to `unsigned int' if does not define. */ +#undef size_t diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/config.sub --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/config.sub Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1788 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright 1992-2013 Free Software Foundation, Inc. + +timestamp='2013-04-24' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). + + +# Please send patches with a ChangeLog entry to config-patches@gnu.org. +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# You can get the latest version of this script from: +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS + $0 [OPTION] ALIAS + +Canonicalize a configuration name. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright 1992-2013 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo $1 + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). +# Here we must recognize all the valid KERNEL-OS combinations. +maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` +case $maybe_os in + nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ + linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ + knetbsd*-gnu* | netbsd*-gnu* | \ + kopensolaris*-gnu* | \ + storm-chaos* | os2-emx* | rtmk-nova*) + os=-$maybe_os + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` + ;; + android-linux) + os=-linux-android + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown + ;; + *) + basic_machine=`echo $1 | sed 's/-[^-]*$//'` + if [ $basic_machine != $1 ] + then os=`echo $1 | sed 's/.*-/-/'` + else os=; fi + ;; +esac + +### Let's recognize common machines as not being operating systems so +### that things like config.sub decstation-3100 work. We also +### recognize some manufacturers as not being operating systems, so we +### can provide default operating systems below. +case $os in + -sun*os*) + # Prevent following clause from handling this invalid input. + ;; + -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ + -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ + -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ + -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ + -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ + -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ + -apple | -axis | -knuth | -cray | -microblaze*) + os= + basic_machine=$1 + ;; + -bluegene*) + os=-cnk + ;; + -sim | -cisco | -oki | -wec | -winbond) + os= + basic_machine=$1 + ;; + -scout) + ;; + -wrs) + os=-vxworks + basic_machine=$1 + ;; + -chorusos*) + os=-chorusos + basic_machine=$1 + ;; + -chorusrdb) + os=-chorusrdb + basic_machine=$1 + ;; + -hiux*) + os=-hiuxwe2 + ;; + -sco6) + os=-sco5v6 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco5) + os=-sco3.2v5 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco4) + os=-sco3.2v4 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2.[4-9]*) + os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2v[4-9]*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco*) + os=-sco3.2v2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -udk*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -isc) + os=-isc2.2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -clix*) + basic_machine=clipper-intergraph + ;; + -isc*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -lynx*178) + os=-lynxos178 + ;; + -lynx*5) + os=-lynxos5 + ;; + -lynx*) + os=-lynxos + ;; + -ptx*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` + ;; + -windowsnt*) + os=`echo $os | sed -e 's/windowsnt/winnt/'` + ;; + -psos*) + os=-psos + ;; + -mint | -mint[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; +esac + +# Decode aliases for certain CPU-COMPANY combinations. +case $basic_machine in + # Recognize the basic CPU types without company name. + # Some are omitted here because they have special meanings below. + 1750a | 580 \ + | a29k \ + | aarch64 | aarch64_be \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ + | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ + | am33_2.0 \ + | arc | arceb \ + | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \ + | avr | avr32 \ + | be32 | be64 \ + | bfin \ + | c4x | clipper \ + | d10v | d30v | dlx | dsp16xx \ + | epiphany \ + | fido | fr30 | frv \ + | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | hexagon \ + | i370 | i860 | i960 | ia64 \ + | ip2k | iq2000 \ + | le32 | le64 \ + | lm32 \ + | m32c | m32r | m32rle | m68000 | m68k | m88k \ + | maxq | mb | microblaze | microblazeel | mcore | mep | metag \ + | mips | mipsbe | mipseb | mipsel | mipsle \ + | mips16 \ + | mips64 | mips64el \ + | mips64octeon | mips64octeonel \ + | mips64orion | mips64orionel \ + | mips64r5900 | mips64r5900el \ + | mips64vr | mips64vrel \ + | mips64vr4100 | mips64vr4100el \ + | mips64vr4300 | mips64vr4300el \ + | mips64vr5000 | mips64vr5000el \ + | mips64vr5900 | mips64vr5900el \ + | mipsisa32 | mipsisa32el \ + | mipsisa32r2 | mipsisa32r2el \ + | mipsisa64 | mipsisa64el \ + | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64sb1 | mipsisa64sb1el \ + | mipsisa64sr71k | mipsisa64sr71kel \ + | mipsr5900 | mipsr5900el \ + | mipstx39 | mipstx39el \ + | mn10200 | mn10300 \ + | moxie \ + | mt \ + | msp430 \ + | nds32 | nds32le | nds32be \ + | nios | nios2 | nios2eb | nios2el \ + | ns16k | ns32k \ + | open8 \ + | or1k | or32 \ + | pdp10 | pdp11 | pj | pjl \ + | powerpc | powerpc64 | powerpc64le | powerpcle \ + | pyramid \ + | rl78 | rx \ + | score \ + | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ + | sh64 | sh64le \ + | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ + | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ + | spu \ + | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ + | ubicom32 \ + | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ + | we32k \ + | x86 | xc16x | xstormy16 | xtensa \ + | z8k | z80) + basic_machine=$basic_machine-unknown + ;; + c54x) + basic_machine=tic54x-unknown + ;; + c55x) + basic_machine=tic55x-unknown + ;; + c6x) + basic_machine=tic6x-unknown + ;; + m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip) + basic_machine=$basic_machine-unknown + os=-none + ;; + m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) + ;; + ms1) + basic_machine=mt-unknown + ;; + + strongarm | thumb | xscale) + basic_machine=arm-unknown + ;; + xgate) + basic_machine=$basic_machine-unknown + os=-none + ;; + xscaleeb) + basic_machine=armeb-unknown + ;; + + xscaleel) + basic_machine=armel-unknown + ;; + + # We use `pc' rather than `unknown' + # because (1) that's what they normally are, and + # (2) the word "unknown" tends to confuse beginning users. + i*86 | x86_64) + basic_machine=$basic_machine-pc + ;; + # Object if more than one company name word. + *-*-*) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; + # Recognize the basic CPU types with company name. + 580-* \ + | a29k-* \ + | aarch64-* | aarch64_be-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ + | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \ + | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ + | avr-* | avr32-* \ + | be32-* | be64-* \ + | bfin-* | bs2000-* \ + | c[123]* | c30-* | [cjt]90-* | c4x-* \ + | clipper-* | craynv-* | cydra-* \ + | d10v-* | d30v-* | dlx-* \ + | elxsi-* \ + | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ + | h8300-* | h8500-* \ + | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ + | hexagon-* \ + | i*86-* | i860-* | i960-* | ia64-* \ + | ip2k-* | iq2000-* \ + | le32-* | le64-* \ + | lm32-* \ + | m32c-* | m32r-* | m32rle-* \ + | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ + | m88110-* | m88k-* | maxq-* | mcore-* | metag-* \ + | microblaze-* | microblazeel-* \ + | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ + | mips16-* \ + | mips64-* | mips64el-* \ + | mips64octeon-* | mips64octeonel-* \ + | mips64orion-* | mips64orionel-* \ + | mips64r5900-* | mips64r5900el-* \ + | mips64vr-* | mips64vrel-* \ + | mips64vr4100-* | mips64vr4100el-* \ + | mips64vr4300-* | mips64vr4300el-* \ + | mips64vr5000-* | mips64vr5000el-* \ + | mips64vr5900-* | mips64vr5900el-* \ + | mipsisa32-* | mipsisa32el-* \ + | mipsisa32r2-* | mipsisa32r2el-* \ + | mipsisa64-* | mipsisa64el-* \ + | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64sb1-* | mipsisa64sb1el-* \ + | mipsisa64sr71k-* | mipsisa64sr71kel-* \ + | mipsr5900-* | mipsr5900el-* \ + | mipstx39-* | mipstx39el-* \ + | mmix-* \ + | mt-* \ + | msp430-* \ + | nds32-* | nds32le-* | nds32be-* \ + | nios-* | nios2-* | nios2eb-* | nios2el-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ + | open8-* \ + | orion-* \ + | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ + | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ + | pyramid-* \ + | rl78-* | romp-* | rs6000-* | rx-* \ + | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ + | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ + | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ + | sparclite-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \ + | tahoe-* \ + | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ + | tile*-* \ + | tron-* \ + | ubicom32-* \ + | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ + | vax-* \ + | we32k-* \ + | x86-* | x86_64-* | xc16x-* | xps100-* \ + | xstormy16-* | xtensa*-* \ + | ymp-* \ + | z8k-* | z80-*) + ;; + # Recognize the basic CPU types without company name, with glob match. + xtensa*) + basic_machine=$basic_machine-unknown + ;; + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 386bsd) + basic_machine=i386-unknown + os=-bsd + ;; + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + basic_machine=m68000-att + ;; + 3b*) + basic_machine=we32k-att + ;; + a29khif) + basic_machine=a29k-amd + os=-udi + ;; + abacus) + basic_machine=abacus-unknown + ;; + adobe68k) + basic_machine=m68010-adobe + os=-scout + ;; + alliant | fx80) + basic_machine=fx80-alliant + ;; + altos | altos3068) + basic_machine=m68k-altos + ;; + am29k) + basic_machine=a29k-none + os=-bsd + ;; + amd64) + basic_machine=x86_64-pc + ;; + amd64-*) + basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + amdahl) + basic_machine=580-amdahl + os=-sysv + ;; + amiga | amiga-*) + basic_machine=m68k-unknown + ;; + amigaos | amigados) + basic_machine=m68k-unknown + os=-amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + os=-sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + os=-sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + os=-bsd + ;; + aros) + basic_machine=i386-pc + os=-aros + ;; + aux) + basic_machine=m68k-apple + os=-aux + ;; + balance) + basic_machine=ns32k-sequent + os=-dynix + ;; + blackfin) + basic_machine=bfin-unknown + os=-linux + ;; + blackfin-*) + basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + bluegene*) + basic_machine=powerpc-ibm + os=-cnk + ;; + c54x-*) + basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c55x-*) + basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c6x-*) + basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c90) + basic_machine=c90-cray + os=-unicos + ;; + cegcc) + basic_machine=arm-unknown + os=-cegcc + ;; + convex-c1) + basic_machine=c1-convex + os=-bsd + ;; + convex-c2) + basic_machine=c2-convex + os=-bsd + ;; + convex-c32) + basic_machine=c32-convex + os=-bsd + ;; + convex-c34) + basic_machine=c34-convex + os=-bsd + ;; + convex-c38) + basic_machine=c38-convex + os=-bsd + ;; + cray | j90) + basic_machine=j90-cray + os=-unicos + ;; + craynv) + basic_machine=craynv-cray + os=-unicosmp + ;; + cr16 | cr16-*) + basic_machine=cr16-unknown + os=-elf + ;; + crds | unos) + basic_machine=m68k-crds + ;; + crisv32 | crisv32-* | etraxfs*) + basic_machine=crisv32-axis + ;; + cris | cris-* | etrax*) + basic_machine=cris-axis + ;; + crx) + basic_machine=crx-unknown + os=-elf + ;; + da30 | da30-*) + basic_machine=m68k-da30 + ;; + decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) + basic_machine=mips-dec + ;; + decsystem10* | dec10*) + basic_machine=pdp10-dec + os=-tops10 + ;; + decsystem20* | dec20*) + basic_machine=pdp10-dec + os=-tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + basic_machine=m68k-motorola + ;; + delta88) + basic_machine=m88k-motorola + os=-sysv3 + ;; + dicos) + basic_machine=i686-pc + os=-dicos + ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp + ;; + dpx20 | dpx20-*) + basic_machine=rs6000-bull + os=-bosx + ;; + dpx2* | dpx2*-bull) + basic_machine=m68k-bull + os=-sysv3 + ;; + ebmon29k) + basic_machine=a29k-amd + os=-ebmon + ;; + elxsi) + basic_machine=elxsi-elxsi + os=-bsd + ;; + encore | umax | mmax) + basic_machine=ns32k-encore + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=-ose + ;; + fx2800) + basic_machine=i860-alliant + ;; + genix) + basic_machine=ns32k-ns + ;; + gmicro) + basic_machine=tron-gmicro + os=-sysv + ;; + go32) + basic_machine=i386-pc + os=-go32 + ;; + h3050r* | hiux*) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + h8300hms) + basic_machine=h8300-hitachi + os=-hms + ;; + h8300xray) + basic_machine=h8300-hitachi + os=-xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=-hms + ;; + harris) + basic_machine=m88k-harris + os=-sysv3 + ;; + hp300-*) + basic_machine=m68k-hp + ;; + hp300bsd) + basic_machine=m68k-hp + os=-bsd + ;; + hp300hpux) + basic_machine=m68k-hp + os=-hpux + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + basic_machine=m68000-hp + ;; + hp9k3[2-9][0-9]) + basic_machine=m68k-hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + basic_machine=hppa1.1-hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hppa-next) + os=-nextstep3 + ;; + hppaosf) + basic_machine=hppa1.1-hp + os=-osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=-proelf + ;; + i370-ibm* | ibm*) + basic_machine=i370-ibm + ;; + i*86v32) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv32 + ;; + i*86v4*) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv4 + ;; + i*86v) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv + ;; + i*86sol2) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-solaris2 + ;; + i386mach) + basic_machine=i386-mach + os=-mach + ;; + i386-vsta | vsta) + basic_machine=i386-unknown + os=-vsta + ;; + iris | iris4d) + basic_machine=mips-sgi + case $os in + -irix*) + ;; + *) + os=-irix4 + ;; + esac + ;; + isi68 | isi) + basic_machine=m68k-isi + os=-sysv + ;; + m68knommu) + basic_machine=m68k-unknown + os=-linux + ;; + m68knommu-*) + basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + m88k-omron*) + basic_machine=m88k-omron + ;; + magnum | m3230) + basic_machine=mips-mips + os=-sysv + ;; + merlin) + basic_machine=ns32k-utek + os=-sysv + ;; + microblaze*) + basic_machine=microblaze-xilinx + ;; + mingw64) + basic_machine=x86_64-pc + os=-mingw64 + ;; + mingw32) + basic_machine=i386-pc + os=-mingw32 + ;; + mingw32ce) + basic_machine=arm-unknown + os=-mingw32ce + ;; + miniframe) + basic_machine=m68000-convergent + ;; + *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; + mips3*-*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` + ;; + mips3*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown + ;; + monitor) + basic_machine=m68k-rom68k + os=-coff + ;; + morphos) + basic_machine=powerpc-unknown + os=-morphos + ;; + msdos) + basic_machine=i386-pc + os=-msdos + ;; + ms1-*) + basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` + ;; + msys) + basic_machine=i386-pc + os=-msys + ;; + mvs) + basic_machine=i370-ibm + os=-mvs + ;; + nacl) + basic_machine=le32-unknown + os=-nacl + ;; + ncr3000) + basic_machine=i486-ncr + os=-sysv4 + ;; + netbsd386) + basic_machine=i386-unknown + os=-netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=-linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + os=-newsos + ;; + news1000) + basic_machine=m68030-sony + os=-newsos + ;; + news-3600 | risc-news) + basic_machine=mips-sony + os=-newsos + ;; + necv70) + basic_machine=v70-nec + os=-sysv + ;; + next | m*-next ) + basic_machine=m68k-next + case $os in + -nextstep* ) + ;; + -ns2*) + os=-nextstep2 + ;; + *) + os=-nextstep3 + ;; + esac + ;; + nh3000) + basic_machine=m68k-harris + os=-cxux + ;; + nh[45]000) + basic_machine=m88k-harris + os=-cxux + ;; + nindy960) + basic_machine=i960-intel + os=-nindy + ;; + mon960) + basic_machine=i960-intel + os=-mon960 + ;; + nonstopux) + basic_machine=mips-compaq + os=-nonstopux + ;; + np1) + basic_machine=np1-gould + ;; + neo-tandem) + basic_machine=neo-tandem + ;; + nse-tandem) + basic_machine=nse-tandem + ;; + nsr-tandem) + basic_machine=nsr-tandem + ;; + op50n-* | op60c-*) + basic_machine=hppa1.1-oki + os=-proelf + ;; + openrisc | openrisc-*) + basic_machine=or32-unknown + ;; + os400) + basic_machine=powerpc-ibm + os=-os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=-ose + ;; + os68k) + basic_machine=m68k-none + os=-os68k + ;; + pa-hitachi) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + paragon) + basic_machine=i860-intel + os=-osf + ;; + parisc) + basic_machine=hppa-unknown + os=-linux + ;; + parisc-*) + basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + pbd) + basic_machine=sparc-tti + ;; + pbb) + basic_machine=m68k-tti + ;; + pc532 | pc532-*) + basic_machine=ns32k-pc532 + ;; + pc98) + basic_machine=i386-pc + ;; + pc98-*) + basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium | p5 | k5 | k6 | nexgen | viac3) + basic_machine=i586-pc + ;; + pentiumpro | p6 | 6x86 | athlon | athlon_*) + basic_machine=i686-pc + ;; + pentiumii | pentium2 | pentiumiii | pentium3) + basic_machine=i686-pc + ;; + pentium4) + basic_machine=i786-pc + ;; + pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) + basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumpro-* | p6-* | 6x86-* | athlon-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium4-*) + basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pn) + basic_machine=pn-gould + ;; + power) basic_machine=power-ibm + ;; + ppc | ppcbe) basic_machine=powerpc-unknown + ;; + ppc-* | ppcbe-*) + basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppcle | powerpclittle | ppc-le | powerpc-little) + basic_machine=powerpcle-unknown + ;; + ppcle-* | powerpclittle-*) + basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64) basic_machine=powerpc64-unknown + ;; + ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64le | powerpc64little | ppc64-le | powerpc64-little) + basic_machine=powerpc64le-unknown + ;; + ppc64le-* | powerpc64little-*) + basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ps2) + basic_machine=i386-ibm + ;; + pw32) + basic_machine=i586-unknown + os=-pw32 + ;; + rdos | rdos64) + basic_machine=x86_64-pc + os=-rdos + ;; + rdos32) + basic_machine=i386-pc + os=-rdos + ;; + rom68k) + basic_machine=m68k-rom68k + os=-coff + ;; + rm[46]00) + basic_machine=mips-siemens + ;; + rtpc | rtpc-*) + basic_machine=romp-ibm + ;; + s390 | s390-*) + basic_machine=s390-ibm + ;; + s390x | s390x-*) + basic_machine=s390x-ibm + ;; + sa29200) + basic_machine=a29k-amd + os=-udi + ;; + sb1) + basic_machine=mipsisa64sb1-unknown + ;; + sb1el) + basic_machine=mipsisa64sb1el-unknown + ;; + sde) + basic_machine=mipsisa32-sde + os=-elf + ;; + sei) + basic_machine=mips-sei + os=-seiux + ;; + sequent) + basic_machine=i386-sequent + ;; + sh) + basic_machine=sh-hitachi + os=-hms + ;; + sh5el) + basic_machine=sh5le-unknown + ;; + sh64) + basic_machine=sh64-unknown + ;; + sparclite-wrs | simso-wrs) + basic_machine=sparclite-wrs + os=-vxworks + ;; + sps7) + basic_machine=m68k-bull + os=-sysv2 + ;; + spur) + basic_machine=spur-unknown + ;; + st2000) + basic_machine=m68k-tandem + ;; + stratus) + basic_machine=i860-stratus + os=-sysv4 + ;; + strongarm-* | thumb-*) + basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + sun2) + basic_machine=m68000-sun + ;; + sun2os3) + basic_machine=m68000-sun + os=-sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + os=-sunos4 + ;; + sun3os3) + basic_machine=m68k-sun + os=-sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + os=-sunos4 + ;; + sun4os3) + basic_machine=sparc-sun + os=-sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + os=-sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + os=-solaris2 + ;; + sun3 | sun3-*) + basic_machine=m68k-sun + ;; + sun4) + basic_machine=sparc-sun + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + ;; + sv1) + basic_machine=sv1-cray + os=-unicos + ;; + symmetry) + basic_machine=i386-sequent + os=-dynix + ;; + t3e) + basic_machine=alphaev5-cray + os=-unicos + ;; + t90) + basic_machine=t90-cray + os=-unicos + ;; + tile*) + basic_machine=$basic_machine-unknown + os=-linux-gnu + ;; + tx39) + basic_machine=mipstx39-unknown + ;; + tx39el) + basic_machine=mipstx39el-unknown + ;; + toad1) + basic_machine=pdp10-xkl + os=-tops20 + ;; + tower | tower-32) + basic_machine=m68k-ncr + ;; + tpf) + basic_machine=s390x-ibm + os=-tpf + ;; + udi29k) + basic_machine=a29k-amd + os=-udi + ;; + ultra3) + basic_machine=a29k-nyu + os=-sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + os=-none + ;; + vaxv) + basic_machine=vax-dec + os=-sysv + ;; + vms) + basic_machine=vax-dec + os=-vms + ;; + vpp*|vx|vx-*) + basic_machine=f301-fujitsu + ;; + vxworks960) + basic_machine=i960-wrs + os=-vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + os=-vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + os=-vxworks + ;; + w65*) + basic_machine=w65-wdc + os=-none + ;; + w89k-*) + basic_machine=hppa1.1-winbond + os=-proelf + ;; + xbox) + basic_machine=i686-pc + os=-mingw32 + ;; + xps | xps100) + basic_machine=xps100-honeywell + ;; + xscale-* | xscalee[bl]-*) + basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'` + ;; + ymp) + basic_machine=ymp-cray + os=-unicos + ;; + z8k-*-coff) + basic_machine=z8k-unknown + os=-sim + ;; + z80-*-coff) + basic_machine=z80-unknown + os=-sim + ;; + none) + basic_machine=none-none + os=-none + ;; + +# Here we handle the default manufacturer of certain CPU types. It is in +# some cases the only manufacturer, in others, it is the most popular. + w89k) + basic_machine=hppa1.1-winbond + ;; + op50n) + basic_machine=hppa1.1-oki + ;; + op60c) + basic_machine=hppa1.1-oki + ;; + romp) + basic_machine=romp-ibm + ;; + mmix) + basic_machine=mmix-knuth + ;; + rs6000) + basic_machine=rs6000-ibm + ;; + vax) + basic_machine=vax-dec + ;; + pdp10) + # there are many clones, so DEC is not a safe bet + basic_machine=pdp10-unknown + ;; + pdp11) + basic_machine=pdp11-dec + ;; + we32k) + basic_machine=we32k-att + ;; + sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele) + basic_machine=sh-unknown + ;; + sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) + basic_machine=sparc-sun + ;; + cydra) + basic_machine=cydra-cydrome + ;; + orion) + basic_machine=orion-highlevel + ;; + orion105) + basic_machine=clipper-highlevel + ;; + mac | mpw | mac-mpw) + basic_machine=m68k-apple + ;; + pmac | pmac-mpw) + basic_machine=powerpc-apple + ;; + *-unknown) + # Make sure to match an already-canonicalized machine name. + ;; + *) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $basic_machine in + *-digital*) + basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` + ;; + *-commodore*) + basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if [ x"$os" != x"" ] +then +case $os in + # First match some system type aliases + # that might get confused with valid system types. + # -solaris* is a basic system type, with this one exception. + -auroraux) + os=-auroraux + ;; + -solaris1 | -solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; + -solaris) + os=-solaris2 + ;; + -svr4*) + os=-sysv4 + ;; + -unixware*) + os=-sysv4.2uw + ;; + -gnu/linux*) + os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` + ;; + # First accept the basic system types. + # The portable systems comes first. + # Each alternative MUST END IN A *, to match a version number. + # -sysv* is not here because it comes later, after sysvr4. + -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ + | -sym* | -kopensolaris* | -plan9* \ + | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ + | -aos* | -aros* \ + | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ + | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ + | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ + | -bitrig* | -openbsd* | -solidbsd* \ + | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ + | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ + | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ + | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ + | -chorusos* | -chorusrdb* | -cegcc* \ + | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ + | -linux-newlib* | -linux-musl* | -linux-uclibc* \ + | -uxpv* | -beos* | -mpeix* | -udk* \ + | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ + | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ + | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ + | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ + | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + -qnx*) + case $basic_machine in + x86-* | i*86-*) + ;; + *) + os=-nto$os + ;; + esac + ;; + -nto-qnx*) + ;; + -nto*) + os=`echo $os | sed -e 's|nto|nto-qnx|'` + ;; + -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ + | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ + | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) + ;; + -mac*) + os=`echo $os | sed -e 's|mac|macos|'` + ;; + -linux-dietlibc) + os=-linux-dietlibc + ;; + -linux*) + os=`echo $os | sed -e 's|linux|linux-gnu|'` + ;; + -sunos5*) + os=`echo $os | sed -e 's|sunos5|solaris2|'` + ;; + -sunos6*) + os=`echo $os | sed -e 's|sunos6|solaris3|'` + ;; + -opened*) + os=-openedition + ;; + -os400*) + os=-os400 + ;; + -wince*) + os=-wince + ;; + -osfrose*) + os=-osfrose + ;; + -osf*) + os=-osf + ;; + -utek*) + os=-bsd + ;; + -dynix*) + os=-bsd + ;; + -acis*) + os=-aos + ;; + -atheos*) + os=-atheos + ;; + -syllable*) + os=-syllable + ;; + -386bsd) + os=-bsd + ;; + -ctix* | -uts*) + os=-sysv + ;; + -nova*) + os=-rtmk-nova + ;; + -ns2 ) + os=-nextstep2 + ;; + -nsk*) + os=-nsk + ;; + # Preserve the version number of sinix5. + -sinix5.*) + os=`echo $os | sed -e 's|sinix|sysv|'` + ;; + -sinix*) + os=-sysv4 + ;; + -tpf*) + os=-tpf + ;; + -triton*) + os=-sysv3 + ;; + -oss*) + os=-sysv3 + ;; + -svr4) + os=-sysv4 + ;; + -svr3) + os=-sysv3 + ;; + -sysvr4) + os=-sysv4 + ;; + # This must come after -sysvr4. + -sysv*) + ;; + -ose*) + os=-ose + ;; + -es1800*) + os=-ose + ;; + -xenix) + os=-xenix + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + os=-mint + ;; + -aros*) + os=-aros + ;; + -zvmoe) + os=-zvmoe + ;; + -dicos*) + os=-dicos + ;; + -nacl*) + ;; + -none) + ;; + *) + # Get rid of the `-' at the beginning of $os. + os=`echo $os | sed 's/[^-]*-//'` + echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 + exit 1 + ;; +esac +else + +# Here we handle the default operating systems that come with various machines. +# The value should be what the vendor currently ships out the door with their +# machine or put another way, the most popular os provided with the machine. + +# Note that if you're going to try to match "-MANUFACTURER" here (say, +# "-sun"), then you have to tell the case statement up towards the top +# that MANUFACTURER isn't an operating system. Otherwise, code above +# will signal an error saying that MANUFACTURER isn't an operating +# system, and we'll never get to this point. + +case $basic_machine in + score-*) + os=-elf + ;; + spu-*) + os=-elf + ;; + *-acorn) + os=-riscix1.2 + ;; + arm*-rebel) + os=-linux + ;; + arm*-semi) + os=-aout + ;; + c4x-* | tic4x-*) + os=-coff + ;; + hexagon-*) + os=-elf + ;; + tic54x-*) + os=-coff + ;; + tic55x-*) + os=-coff + ;; + tic6x-*) + os=-coff + ;; + # This must come before the *-dec entry. + pdp10-*) + os=-tops20 + ;; + pdp11-*) + os=-none + ;; + *-dec | vax-*) + os=-ultrix4.2 + ;; + m68*-apollo) + os=-domain + ;; + i386-sun) + os=-sunos4.0.2 + ;; + m68000-sun) + os=-sunos3 + ;; + m68*-cisco) + os=-aout + ;; + mep-*) + os=-elf + ;; + mips*-cisco) + os=-elf + ;; + mips*-*) + os=-elf + ;; + or1k-*) + os=-elf + ;; + or32-*) + os=-coff + ;; + *-tti) # must be before sparc entry or we get the wrong os. + os=-sysv3 + ;; + sparc-* | *-sun) + os=-sunos4.1.1 + ;; + *-be) + os=-beos + ;; + *-haiku) + os=-haiku + ;; + *-ibm) + os=-aix + ;; + *-knuth) + os=-mmixware + ;; + *-wec) + os=-proelf + ;; + *-winbond) + os=-proelf + ;; + *-oki) + os=-proelf + ;; + *-hp) + os=-hpux + ;; + *-hitachi) + os=-hiux + ;; + i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) + os=-sysv + ;; + *-cbm) + os=-amigaos + ;; + *-dg) + os=-dgux + ;; + *-dolphin) + os=-sysv3 + ;; + m68k-ccur) + os=-rtu + ;; + m88k-omron*) + os=-luna + ;; + *-next ) + os=-nextstep + ;; + *-sequent) + os=-ptx + ;; + *-crds) + os=-unos + ;; + *-ns) + os=-genix + ;; + i370-*) + os=-mvs + ;; + *-next) + os=-nextstep3 + ;; + *-gould) + os=-sysv + ;; + *-highlevel) + os=-bsd + ;; + *-encore) + os=-bsd + ;; + *-sgi) + os=-irix + ;; + *-siemens) + os=-sysv4 + ;; + *-masscomp) + os=-rtu + ;; + f30[01]-fujitsu | f700-fujitsu) + os=-uxpv + ;; + *-rom68k) + os=-coff + ;; + *-*bug) + os=-coff + ;; + *-apple) + os=-macos + ;; + *-atari*) + os=-mint + ;; + *) + os=-none + ;; +esac +fi + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +vendor=unknown +case $basic_machine in + *-unknown) + case $os in + -riscix*) + vendor=acorn + ;; + -sunos*) + vendor=sun + ;; + -cnk*|-aix*) + vendor=ibm + ;; + -beos*) + vendor=be + ;; + -hpux*) + vendor=hp + ;; + -mpeix*) + vendor=hp + ;; + -hiux*) + vendor=hitachi + ;; + -unos*) + vendor=crds + ;; + -dgux*) + vendor=dg + ;; + -luna*) + vendor=omron + ;; + -genix*) + vendor=ns + ;; + -mvs* | -opened*) + vendor=ibm + ;; + -os400*) + vendor=ibm + ;; + -ptx*) + vendor=sequent + ;; + -tpf*) + vendor=ibm + ;; + -vxsim* | -vxworks* | -windiss*) + vendor=wrs + ;; + -aux*) + vendor=apple + ;; + -hms*) + vendor=hitachi + ;; + -mpw* | -macos*) + vendor=apple + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + vendor=atari + ;; + -vos*) + vendor=stratus + ;; + esac + basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` + ;; +esac + +echo $basic_machine$os +exit + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/configure --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/configure Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,22710 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.69 for fftw 3.3.4. +# +# Report bugs to . +# +# +# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +# Use a proper internal environment variable to ensure we don't fall + # into an infinite loop, continuously re-executing ourselves. + if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then + _as_can_reexec=no; export _as_can_reexec; + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +as_fn_exit 255 + fi + # We don't want this to propagate to other subprocesses. + { _as_can_reexec=; unset _as_can_reexec;} +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : + +else + exitcode=1; echo positional parameters were not saved. +fi +test x\$exitcode = x0 || exit 1 +test -x / || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 + + test -n \"\${ZSH_VERSION+set}\${BASH_VERSION+set}\" || ( + ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' + ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO + ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO + PATH=/empty FPATH=/empty; export PATH FPATH + test \"X\`printf %s \$ECHO\`\" = \"X\$ECHO\" \\ + || test \"X\`print -r -- \$ECHO\`\" = \"X\$ECHO\" ) || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1" + if (eval "$as_required") 2>/dev/null; then : + as_have_required=yes +else + as_have_required=no +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir/$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + CONFIG_SHELL=$as_shell as_have_required=yes + if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + break 2 +fi +fi + done;; + esac + as_found=false +done +$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi; } +IFS=$as_save_IFS + + + if test "x$CONFIG_SHELL" != x; then : + export CONFIG_SHELL + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 +fi + + if test x$as_have_required = xno; then : + $as_echo "$0: This script requires a shell more modern than all" + $as_echo "$0: the shells that I found on your system." + if test x${ZSH_VERSION+set} = xset ; then + $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" + $as_echo "$0: be upgraded to zsh 4.3.4 or later." + else + $as_echo "$0: Please tell bug-autoconf@gnu.org and fftw@fftw.org +$0: about your system, including any error possibly output +$0: before this message. Then install a modern shell, or +$0: manually run the script under such a shell if you do +$0: have one." + fi + exit 1 +fi +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # If we had to re-execute with $CONFIG_SHELL, we're ensured to have + # already done that, so ensure we don't try to do so again and fall + # in an infinite loop. This has already happened in practice. + _as_can_reexec=no; export _as_can_reexec + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + +SHELL=${CONFIG_SHELL-/bin/sh} + + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME='fftw' +PACKAGE_TARNAME='fftw' +PACKAGE_VERSION='3.3.4' +PACKAGE_STRING='fftw 3.3.4' +PACKAGE_BUGREPORT='fftw@fftw.org' +PACKAGE_URL='' + +ac_unique_file="kernel/ifftw.h" +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include +# endif +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='am__EXEEXT_FALSE +am__EXEEXT_TRUE +LTLIBOBJS +COMBINED_THREADS_FALSE +COMBINED_THREADS_TRUE +SMP_FALSE +SMP_TRUE +OPENMP_FALSE +OPENMP_TRUE +THREADS_FALSE +THREADS_TRUE +THREADLIBS +PTHREAD_CFLAGS +PTHREAD_LIBS +PTHREAD_CC +acx_pthread_config +OPENMP_CFLAGS +FLIBS +ac_ct_F77 +FFLAGS +F77 +LIBQUADMATH +LIBOBJS +POW_LIB +ALLOCA +C_FFTW_R2R_KIND +STACK_ALIGN_CFLAGS +NEON_CFLAGS +ALTIVEC_CFLAGS +AVX_CFLAGS +SSE2_CFLAGS +MPI_FALSE +MPI_TRUE +C_MPI_FINT +MPIRUN +MPILIBS +MPICC +OCAMLBUILD +CPP +OTOOL64 +OTOOL +LIPO +NMEDIT +DSYMUTIL +MANIFEST_TOOL +RANLIB +ac_ct_AR +AR +NM +ac_ct_DUMPBIN +DUMPBIN +LD +FGREP +EGREP +GREP +SED +LIBTOOL +OBJDUMP +DLLTOOL +AS +LN_S +am__fastdepCC_FALSE +am__fastdepCC_TRUE +CCDEPMODE +am__nodep +AMDEPBACKSLASH +AMDEP_FALSE +AMDEP_TRUE +am__quote +am__include +DEPDIR +OBJEXT +EXEEXT +ac_ct_CC +CPPFLAGS +LDFLAGS +CFLAGS +CC +PREC_SUFFIX +HAVE_NEON_FALSE +HAVE_NEON_TRUE +HAVE_ALTIVEC_FALSE +HAVE_ALTIVEC_TRUE +HAVE_AVX_FALSE +HAVE_AVX_TRUE +HAVE_SSE2_FALSE +HAVE_SSE2_TRUE +CHECK_PL_OPTS +PRECISION +QUAD_FALSE +QUAD_TRUE +LDOUBLE_FALSE +LDOUBLE_TRUE +SINGLE_FALSE +SINGLE_TRUE +host_os +host_vendor +host_cpu +host +build_os +build_vendor +build_cpu +build +SHARED_VERSION_INFO +MAINT +MAINTAINER_MODE_FALSE +MAINTAINER_MODE_TRUE +AM_BACKSLASH +AM_DEFAULT_VERBOSITY +AM_DEFAULT_V +AM_V +am__untar +am__tar +AMTAR +am__leading_dot +SET_MAKE +AWK +mkdir_p +MKDIR_P +INSTALL_STRIP_PROGRAM +STRIP +install_sh +MAKEINFO +AUTOHEADER +AUTOMAKE +AUTOCONF +ACLOCAL +VERSION +PACKAGE +CYGPATH_W +am__isrc +INSTALL_DATA +INSTALL_SCRIPT +INSTALL_PROGRAM +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +enable_silent_rules +enable_maintainer_mode +enable_shared +enable_fma +enable_debug +enable_debug_malloc +enable_debug_alignment +enable_random_estimator +enable_alloca +enable_single +enable_float +enable_long_double +enable_quad_precision +enable_sse +enable_sse2 +enable_avx +enable_altivec +enable_neon +with_slow_timer +enable_mips_zbus_timer +with_our_malloc +with_our_malloc16 +with_windows_f77_mangling +with_incoming_stack_boundary +enable_dependency_tracking +enable_static +with_pic +enable_fast_install +with_gnu_ld +with_sysroot +enable_libtool_lock +enable_mpi +enable_fortran +with_g77_wrappers +enable_openmp +enable_threads +with_combined_threads +' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +CPP +MPICC +F77 +FFLAGS' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures fftw 3.3.4 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/fftw] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF + +Program names: + --program-prefix=PREFIX prepend PREFIX to installed program names + --program-suffix=SUFFIX append SUFFIX to installed program names + --program-transform-name=PROGRAM run sed PROGRAM on installed program names + +System types: + --build=BUILD configure for building on BUILD [guessed] + --host=HOST cross-compile to build programs to run on HOST [BUILD] +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of fftw 3.3.4:";; + esac + cat <<\_ACEOF + +Optional Features: + --disable-option-checking ignore unrecognized --enable/--with options + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --enable-silent-rules less verbose build output (undo: "make V=1") + --disable-silent-rules verbose build output (undo: "make V=0") + --enable-maintainer-mode + enable make rules and dependencies not useful (and + sometimes confusing) to the casual installer + --enable-shared[=PKGS] build shared libraries [default=no] + --enable-fma enable optimizations for machines with fused + multiply-add + --enable-debug compile fftw with extra runtime checks for debugging + --enable-debug-malloc enable malloc debugging version + --enable-debug-alignment + enable alignment debugging hacks + --enable-random-estimator + enable pseudorandom estimator (debugging hack) + --disable-alloca disable use of the alloca() function (may be broken + on mingw64) + --enable-single compile fftw in single precision + --enable-float synonym for --enable-single + --enable-long-double compile fftw in long-double precision + --enable-quad-precision compile fftw in quadruple precision if available + --enable-sse enable SSE optimizations + --enable-sse2 enable SSE/SSE2 optimizations + --enable-avx enable AVX optimizations + --enable-altivec enable Altivec optimizations + --enable-neon enable ARM NEON optimizations + --enable-mips-zbus-timer + use MIPS ZBus cycle-counter + --enable-dependency-tracking + do not reject slow dependency extractors + --disable-dependency-tracking + speeds up one-time build + --enable-static[=PKGS] build static libraries [default=yes] + --enable-fast-install[=PKGS] + optimize for fast installation [default=yes] + --disable-libtool-lock avoid locking (might break parallel builds) + --enable-mpi compile FFTW MPI library + --disable-fortran don't include Fortran-callable wrappers + --enable-openmp use OpenMP directives for parallelism + --enable-threads compile FFTW SMP threads library + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-slow-timer use low-precision timers (SLOW) + --with-our-malloc use our aligned malloc (helpful for Win32) + --with-our-malloc16 Obsolete alias for --with-our-malloc16 + --with-windows-f77-mangling + use common Win32 Fortran interface styles + --with-incoming-stack-boundary=X + Assume that stack is aligned to (1< if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CPP C preprocessor + MPICC MPI C compiler command + F77 Fortran 77 compiler command + FFLAGS Fortran 77 compiler flags + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to . +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +fftw configure 3.3.4 +generated by GNU Autoconf 2.69 + +Copyright (C) 2012 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_c_try_link LINENO +# ----------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_link + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile + +# ac_fn_c_try_cpp LINENO +# ---------------------- +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_cpp + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes +# that executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + ac_retval=0 +else + $as_echo "$as_me: program exited with status $ac_status" >&5 + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run + +# ac_fn_c_check_func LINENO FUNC VAR +# ---------------------------------- +# Tests whether FUNC exists, setting the cache variable VAR accordingly +ac_fn_c_check_func () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +/* Define $2 to an innocuous variant, in case declares $2. + For example, HP-UX 11i declares gettimeofday. */ +#define $2 innocuous_$2 + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $2 (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $2 + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $2 (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$2 || defined __stub___$2 +choke me +#endif + +int +main () +{ +return $2 (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_func + +# ac_fn_c_compute_int LINENO EXPR VAR INCLUDES +# -------------------------------------------- +# Tries to find the compile-time value of EXPR in a program that includes +# INCLUDES, setting VAR accordingly. Returns whether the value could be +# computed +ac_fn_c_compute_int () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if test "$cross_compiling" = yes; then + # Depending upon the size, compute the lo and hi bounds. +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +static int test_array [1 - 2 * !(($2) >= 0)]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_lo=0 ac_mid=0 + while :; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +static int test_array [1 - 2 * !(($2) <= $ac_mid)]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_hi=$ac_mid; break +else + as_fn_arith $ac_mid + 1 && ac_lo=$as_val + if test $ac_lo -le $ac_mid; then + ac_lo= ac_hi= + break + fi + as_fn_arith 2 '*' $ac_mid + 1 && ac_mid=$as_val +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + done +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +static int test_array [1 - 2 * !(($2) < 0)]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_hi=-1 ac_mid=-1 + while :; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +static int test_array [1 - 2 * !(($2) >= $ac_mid)]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_lo=$ac_mid; break +else + as_fn_arith '(' $ac_mid ')' - 1 && ac_hi=$as_val + if test $ac_mid -le $ac_hi; then + ac_lo= ac_hi= + break + fi + as_fn_arith 2 '*' $ac_mid && ac_mid=$as_val +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + done +else + ac_lo= ac_hi= +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +# Binary search between lo and hi bounds. +while test "x$ac_lo" != "x$ac_hi"; do + as_fn_arith '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo && ac_mid=$as_val + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +static int test_array [1 - 2 * !(($2) <= $ac_mid)]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_hi=$ac_mid +else + as_fn_arith '(' $ac_mid ')' + 1 && ac_lo=$as_val +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +done +case $ac_lo in #(( +?*) eval "$3=\$ac_lo"; ac_retval=0 ;; +'') ac_retval=1 ;; +esac + else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +static long int longval () { return $2; } +static unsigned long int ulongval () { return $2; } +#include +#include +int +main () +{ + + FILE *f = fopen ("conftest.val", "w"); + if (! f) + return 1; + if (($2) < 0) + { + long int i = longval (); + if (i != ($2)) + return 1; + fprintf (f, "%ld", i); + } + else + { + unsigned long int i = ulongval (); + if (i != ($2)) + return 1; + fprintf (f, "%lu", i); + } + /* Do not output a trailing newline, as this causes \r\n confusion + on some platforms. */ + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + echo >>conftest.val; read $3 &5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +else + # Is the header compilable? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 +$as_echo_n "checking $2 usability... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_header_compiler=yes +else + ac_header_compiler=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 +$as_echo "$ac_header_compiler" >&6; } + +# Is the header present? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 +$as_echo_n "checking $2 presence... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <$2> +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + ac_header_preproc=yes +else + ac_header_preproc=no +fi +rm -f conftest.err conftest.i conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 +$as_echo "$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( + yes:no: ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 +$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; + no:yes:* ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 +$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 +$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 +$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 +$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} +( $as_echo "## ---------------------------- ## +## Report this to fftw@fftw.org ## +## ---------------------------- ##" + ) | sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=\$ac_header_compiler" +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_mongrel + +# ac_fn_c_check_type LINENO TYPE VAR INCLUDES +# ------------------------------------------- +# Tests whether TYPE exists after having included INCLUDES, setting cache +# variable VAR accordingly. +ac_fn_c_check_type () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof ($2)) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof (($2))) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + eval "$3=yes" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_type + +# ac_fn_c_check_decl LINENO SYMBOL VAR INCLUDES +# --------------------------------------------- +# Tests whether SYMBOL is declared in INCLUDES, setting cache variable VAR +# accordingly. +ac_fn_c_check_decl () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + as_decl_name=`echo $2|sed 's/ *(.*//'` + as_decl_use=`echo $2|sed -e 's/(/((/' -e 's/)/) 0&/' -e 's/,/) 0& (/g'` + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $as_decl_name is declared" >&5 +$as_echo_n "checking whether $as_decl_name is declared... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +#ifndef $as_decl_name +#ifdef __cplusplus + (void) $as_decl_use; +#else + (void) $as_decl_name; +#endif +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_decl + +# ac_fn_f77_try_compile LINENO +# ---------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_f77_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_f77_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_f77_try_compile + +# ac_fn_f77_try_link LINENO +# ------------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_f77_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_f77_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_f77_try_link +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by fftw $as_me 3.3.4, which was +generated by GNU Autoconf 2.69. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + $as_echo "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + $as_echo "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + $as_echo "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + $as_echo "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + $as_echo "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + $as_echo "$as_me: caught signal $ac_signal" + $as_echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +$as_echo "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_URL "$PACKAGE_URL" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +ac_site_file1=NONE +ac_site_file2=NONE +if test -n "$CONFIG_SITE"; then + # We do not want a PATH search for config.site. + case $CONFIG_SITE in #(( + -*) ac_site_file1=./$CONFIG_SITE;; + */*) ac_site_file1=$CONFIG_SITE;; + *) ac_site_file1=./$CONFIG_SITE;; + esac +elif test "x$prefix" != xNONE; then + ac_site_file1=$prefix/share/config.site + ac_site_file2=$prefix/etc/config.site +else + ac_site_file1=$ac_default_prefix/share/config.site + ac_site_file2=$ac_default_prefix/etc/config.site +fi +for ac_site_file in "$ac_site_file1" "$ac_site_file2" +do + test "x$ac_site_file" = xNONE && continue + if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +$as_echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See \`config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +$as_echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +$as_echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + +SHARED_VERSION_INFO="7:4:4" # CURRENT:REVISION:AGE + +am__api_version='1.14' + +ac_aux_dir= +for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do + if test -f "$ac_dir/install-sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f "$ac_dir/install.sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + elif test -f "$ac_dir/shtool"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/shtool install -c" + break + fi +done +if test -z "$ac_aux_dir"; then + as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5 +fi + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. +ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. +ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. + + +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +# Reject install programs that cannot install multiple files. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 +$as_echo_n "checking for a BSD-compatible install... " >&6; } +if test -z "$INSTALL"; then +if ${ac_cv_path_install+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + # Account for people who put trailing slashes in PATH elements. +case $as_dir/ in #(( + ./ | .// | /[cC]/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then + if test $ac_prog = install && + grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + rm -rf conftest.one conftest.two conftest.dir + echo one > conftest.one + echo two > conftest.two + mkdir conftest.dir + if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" && + test -s conftest.one && test -s conftest.two && + test -s conftest.dir/conftest.one && + test -s conftest.dir/conftest.two + then + ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + fi + done + done + ;; +esac + + done +IFS=$as_save_IFS + +rm -rf conftest.one conftest.two conftest.dir + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. Don't cache a + # value for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + INSTALL=$ac_install_sh + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 +$as_echo "$INSTALL" >&6; } + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 +$as_echo_n "checking whether build environment is sane... " >&6; } +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[\\\"\#\$\&\'\`$am_lf]*) + as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;; +esac +case $srcdir in + *[\\\"\#\$\&\'\`$am_lf\ \ ]*) + as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$*" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$*" != "X $srcdir/configure conftest.file" \ + && test "$*" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + as_fn_error $? "ls -t appears to fail. Make sure there is not a broken + alias in your environment" "$LINENO" 5 + fi + if test "$2" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$2" = conftest.file + ) +then + # Ok. + : +else + as_fn_error $? "newly created file is older than distributed files! +Check your system clock" "$LINENO" 5 +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi + +rm -f conftest.file + +test "$program_prefix" != NONE && + program_transform_name="s&^&$program_prefix&;$program_transform_name" +# Use a double $ so make ignores it. +test "$program_suffix" != NONE && + program_transform_name="s&\$&$program_suffix&;$program_transform_name" +# Double any \ or $. +# By default was `s,x,x', remove it if useless. +ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' +program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` + +# expand $ac_aux_dir to an absolute path +am_aux_dir=`cd $ac_aux_dir && pwd` + +if test x"${MISSING+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; + *) + MISSING="\${SHELL} $am_aux_dir/missing" ;; + esac +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5 +$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;} +fi + +if test x"${install_sh}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi + +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +if test "$cross_compiling" != no; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +$as_echo "$STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +$as_echo "$ac_ct_STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5 +$as_echo_n "checking for a thread-safe mkdir -p... " >&6; } +if test -z "$MKDIR_P"; then + if ${ac_cv_path_mkdir+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in mkdir gmkdir; do + for ac_exec_ext in '' $ac_executable_extensions; do + as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue + case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #( + 'mkdir (GNU coreutils) '* | \ + 'mkdir (coreutils) '* | \ + 'mkdir (fileutils) '4.1*) + ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext + break 3;; + esac + done + done + done +IFS=$as_save_IFS + +fi + + test -d ./--version && rmdir ./--version + if test "${ac_cv_path_mkdir+set}" = set; then + MKDIR_P="$ac_cv_path_mkdir -p" + else + # As a last resort, use the slow shell script. Don't cache a + # value for MKDIR_P within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + MKDIR_P="$ac_install_sh -d" + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 +$as_echo "$MKDIR_P" >&6; } + +for ac_prog in gawk mawk nawk awk +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_AWK+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_AWK="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AWK=$ac_cv_prog_AWK +if test -n "$AWK"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 +$as_echo "$AWK" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$AWK" && break +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } +set x ${MAKE-make} +ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` +if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat >conftest.make <<\_ACEOF +SHELL = /bin/sh +all: + @echo '@@@%%%=$(MAKE)=@@@%%%' +_ACEOF +# GNU make sometimes prints "make[1]: Entering ...", which would confuse us. +case `${MAKE-make} -f conftest.make 2>/dev/null` in + *@@@%%%=?*=@@@%%%*) + eval ac_cv_prog_make_${ac_make}_set=yes;; + *) + eval ac_cv_prog_make_${ac_make}_set=no;; +esac +rm -f conftest.make +fi +if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + SET_MAKE= +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + SET_MAKE="MAKE=${MAKE-make}" +fi + +rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null + +# Check whether --enable-silent-rules was given. +if test "${enable_silent_rules+set}" = set; then : + enableval=$enable_silent_rules; +fi + +case $enable_silent_rules in # ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=1;; +esac +am_make=${MAKE-make} +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 +$as_echo_n "checking whether $am_make supports nested variables... " >&6; } +if ${am_cv_make_support_nested_variables+:} false; then : + $as_echo_n "(cached) " >&6 +else + if $as_echo 'TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 +$as_echo "$am_cv_make_support_nested_variables" >&6; } +if test $am_cv_make_support_nested_variables = yes; then + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AM_BACKSLASH='\' + +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + am__isrc=' -I$(srcdir)' + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5 + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi + + +# Define the identity of the package. + PACKAGE='fftw' + VERSION='3.3.4' + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE "$PACKAGE" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define VERSION "$VERSION" +_ACEOF + +# Some tools Automake needs. + +ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} + + +AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} + + +AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} + + +AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} + + +MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} + +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +mkdir_p='$(MKDIR_P)' + +# We need awk for the "check" target. The system "awk" is bad on +# some platforms. +# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AMTAR='$${TAR-tar}' + + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar pax cpio none' + +am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -' + + + + + + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5 + fi +fi +ac_config_headers="$ac_config_headers config.h" + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5 +$as_echo_n "checking whether to enable maintainer-specific portions of Makefiles... " >&6; } + # Check whether --enable-maintainer-mode was given. +if test "${enable_maintainer_mode+set}" = set; then : + enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval +else + USE_MAINTAINER_MODE=no +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5 +$as_echo "$USE_MAINTAINER_MODE" >&6; } + if test $USE_MAINTAINER_MODE = yes; then + MAINTAINER_MODE_TRUE= + MAINTAINER_MODE_FALSE='#' +else + MAINTAINER_MODE_TRUE='#' + MAINTAINER_MODE_FALSE= +fi + + MAINT=$MAINTAINER_MODE_TRUE + + + +# Check whether --enable-shared was given. +if test "${enable_shared+set}" = set; then : + enableval=$enable_shared; p=${PACKAGE-default} + case $enableval in + yes) enable_shared=yes ;; + no) enable_shared=no ;; + *) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac +else + enable_shared=no +fi + + + + + + + + + # Make sure we can run config.sub. +$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || + as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5 + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 +$as_echo_n "checking build system type... " >&6; } +if ${ac_cv_build+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_build_alias=$build_alias +test "x$ac_build_alias" = x && + ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"` +test "x$ac_build_alias" = x && + as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 +ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` || + as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5 + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 +$as_echo "$ac_cv_build" >&6; } +case $ac_cv_build in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; +esac +build=$ac_cv_build +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_build +shift +build_cpu=$1 +build_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +build_os=$* +IFS=$ac_save_IFS +case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 +$as_echo_n "checking host system type... " >&6; } +if ${ac_cv_host+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "x$host_alias" = x; then + ac_cv_host=$ac_cv_build +else + ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` || + as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5 +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 +$as_echo "$ac_cv_host" >&6; } +case $ac_cv_host in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;; +esac +host=$ac_cv_host +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_host +shift +host_cpu=$1 +host_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +host_os=$* +IFS=$ac_save_IFS +case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac + + + +case "${host_cpu}" in + powerpc*) have_fma=yes;; + ia64*) have_fma=yes;; + hppa*) have_fma=yes;; + mips64*) have_fma=yes;; + *) have_fma=no;; +esac + +# Check whether --enable-fma was given. +if test "${enable_fma+set}" = set; then : + enableval=$enable_fma; have_fma=$enableval +fi + +if test "$have_fma"x = "yes"x; then + +$as_echo "#define HAVE_FMA 1" >>confdefs.h + +fi + + +# Check whether --enable-debug was given. +if test "${enable_debug+set}" = set; then : + enableval=$enable_debug; ok=$enableval +else + ok=no +fi + +if test "$ok" = "yes"; then + +$as_echo "#define FFTW_DEBUG 1" >>confdefs.h + + debug_malloc=yes +else + debug_malloc=no +fi + +# Check whether --enable-debug-malloc was given. +if test "${enable_debug_malloc+set}" = set; then : + enableval=$enable_debug_malloc; ok=$enableval +else + ok=$debug_malloc +fi + +if test "$ok" = "yes"; then + +$as_echo "#define FFTW_DEBUG_MALLOC 1" >>confdefs.h + +fi + +# Check whether --enable-debug-alignment was given. +if test "${enable_debug_alignment+set}" = set; then : + enableval=$enable_debug_alignment; ok=$enableval +else + ok=no +fi + +if test "$ok" = "yes"; then + +$as_echo "#define FFTW_DEBUG_ALIGNMENT 1" >>confdefs.h + +fi + +# Check whether --enable-random-estimator was given. +if test "${enable_random_estimator+set}" = set; then : + enableval=$enable_random_estimator; ok=$enableval +else + ok=no +fi + +if test "$ok" = "yes"; then + +$as_echo "#define FFTW_RANDOM_ESTIMATOR 1" >>confdefs.h + + CHECK_PL_OPTS="--estimate" +fi + +# Check whether --enable-alloca was given. +if test "${enable_alloca+set}" = set; then : + enableval=$enable_alloca; ok=$enableval +else + ok=yes +fi + +if test "$ok" = "yes"; then + +$as_echo "#define FFTW_ENABLE_ALLOCA 1" >>confdefs.h + +fi + +# Check whether --enable-single was given. +if test "${enable_single+set}" = set; then : + enableval=$enable_single; ok=$enableval +else + ok=no +fi + +# Check whether --enable-float was given. +if test "${enable_float+set}" = set; then : + enableval=$enable_float; ok=$enableval +fi + +if test "$ok" = "yes"; then + +$as_echo "#define FFTW_SINGLE 1" >>confdefs.h + + +$as_echo "#define BENCHFFT_SINGLE 1" >>confdefs.h + + PRECISION=s +else + PRECISION=d +fi + if test "$ok" = "yes"; then + SINGLE_TRUE= + SINGLE_FALSE='#' +else + SINGLE_TRUE='#' + SINGLE_FALSE= +fi + + +# Check whether --enable-long-double was given. +if test "${enable_long_double+set}" = set; then : + enableval=$enable_long_double; ok=$enableval +else + ok=no +fi + +if test "$ok" = "yes"; then + if test "$PRECISION" = "s"; then + as_fn_error $? "--enable-single/--enable-long-double conflict" "$LINENO" 5 + fi + +$as_echo "#define FFTW_LDOUBLE 1" >>confdefs.h + + +$as_echo "#define BENCHFFT_LDOUBLE 1" >>confdefs.h + + PRECISION=l +fi + if test "$ok" = "yes"; then + LDOUBLE_TRUE= + LDOUBLE_FALSE='#' +else + LDOUBLE_TRUE='#' + LDOUBLE_FALSE= +fi + + +# Check whether --enable-quad-precision was given. +if test "${enable_quad_precision+set}" = set; then : + enableval=$enable_quad_precision; ok=$enableval +else + ok=no +fi + +if test "$ok" = "yes"; then + if test "$PRECISION" != "d"; then + as_fn_error $? "conflicting precisions specified" "$LINENO" 5 + fi + +$as_echo "#define FFTW_QUAD 1" >>confdefs.h + + +$as_echo "#define BENCHFFT_QUAD 1" >>confdefs.h + + PRECISION=q +fi + if test "$ok" = "yes"; then + QUAD_TRUE= + QUAD_FALSE='#' +else + QUAD_TRUE='#' + QUAD_FALSE= +fi + + + + + +# Check whether --enable-sse was given. +if test "${enable_sse+set}" = set; then : + enableval=$enable_sse; have_sse=$enableval +else + have_sse=no +fi + +if test "$have_sse" = "yes"; then + if test "$PRECISION" != "s"; then + as_fn_error $? "SSE requires single precision" "$LINENO" 5 + fi +fi + +# Check whether --enable-sse2 was given. +if test "${enable_sse2+set}" = set; then : + enableval=$enable_sse2; have_sse2=$enableval +else + have_sse2=no +fi + +if test "$have_sse" = "yes"; then have_sse2=yes; fi +if test "$have_sse2" = "yes"; then + +$as_echo "#define HAVE_SSE2 1" >>confdefs.h + + if test "$PRECISION" != "d" -a "$PRECISION" != "s"; then + as_fn_error $? "SSE2 requires single or double precision" "$LINENO" 5 + fi +fi + if test "$have_sse2" = "yes"; then + HAVE_SSE2_TRUE= + HAVE_SSE2_FALSE='#' +else + HAVE_SSE2_TRUE='#' + HAVE_SSE2_FALSE= +fi + + +# Check whether --enable-avx was given. +if test "${enable_avx+set}" = set; then : + enableval=$enable_avx; have_avx=$enableval +else + have_avx=no +fi + +if test "$have_avx" = "yes"; then + +$as_echo "#define HAVE_AVX 1" >>confdefs.h + + if test "$PRECISION" != "d" -a "$PRECISION" != "s"; then + as_fn_error $? "AVX requires single or double precision" "$LINENO" 5 + fi +fi + if test "$have_avx" = "yes"; then + HAVE_AVX_TRUE= + HAVE_AVX_FALSE='#' +else + HAVE_AVX_TRUE='#' + HAVE_AVX_FALSE= +fi + + +# Check whether --enable-altivec was given. +if test "${enable_altivec+set}" = set; then : + enableval=$enable_altivec; have_altivec=$enableval +else + have_altivec=no +fi + +if test "$have_altivec" = "yes"; then + +$as_echo "#define HAVE_ALTIVEC 1" >>confdefs.h + + if test "$PRECISION" != "s"; then + as_fn_error $? "Altivec requires single precision" "$LINENO" 5 + fi +fi + if test "$have_altivec" = "yes"; then + HAVE_ALTIVEC_TRUE= + HAVE_ALTIVEC_FALSE='#' +else + HAVE_ALTIVEC_TRUE='#' + HAVE_ALTIVEC_FALSE= +fi + + +# Check whether --enable-neon was given. +if test "${enable_neon+set}" = set; then : + enableval=$enable_neon; have_neon=$enableval +else + have_neon=no +fi + +if test "$have_neon" = "yes"; then + +$as_echo "#define HAVE_NEON 1" >>confdefs.h + + if test "$PRECISION" != "s"; then + as_fn_error $? "NEON requires single precision" "$LINENO" 5 + fi +fi + if test "$have_neon" = "yes"; then + HAVE_NEON_TRUE= + HAVE_NEON_FALSE='#' +else + HAVE_NEON_TRUE='#' + HAVE_NEON_FALSE= +fi + + + + +# Check whether --with-slow-timer was given. +if test "${with_slow_timer+set}" = set; then : + withval=$with_slow_timer; with_slow_timer=$withval +else + with_slow_timer=no +fi + +if test "$with_slow_timer" = "yes"; then + +$as_echo "#define WITH_SLOW_TIMER 1" >>confdefs.h + +fi + +# Check whether --enable-mips_zbus_timer was given. +if test "${enable_mips_zbus_timer+set}" = set; then : + enableval=$enable_mips_zbus_timer; have_mips_zbus_timer=$enableval +else + have_mips_zbus_timer=no +fi + +if test "$have_mips_zbus_timer" = "yes"; then + +$as_echo "#define HAVE_MIPS_ZBUS_TIMER 1" >>confdefs.h + +fi + + +# Check whether --with-our-malloc was given. +if test "${with_our_malloc+set}" = set; then : + withval=$with_our_malloc; with_our_malloc=$withval +else + with_our_malloc=no +fi + + +# Check whether --with-our-malloc16 was given. +if test "${with_our_malloc16+set}" = set; then : + withval=$with_our_malloc16; with_our_malloc=$withval +fi + +if test "$with_our_malloc" = "yes"; then + +$as_echo "#define WITH_OUR_MALLOC 1" >>confdefs.h + +fi + + +# Check whether --with-windows-f77-mangling was given. +if test "${with_windows_f77_mangling+set}" = set; then : + withval=$with_windows_f77_mangling; with_windows_f77_mangling=$withval +else + with_windows_f77_mangling=no +fi + +if test "$with_windows_f77_mangling" = "yes"; then + +$as_echo "#define WINDOWS_F77_MANGLING 1" >>confdefs.h + +fi + + +# Check whether --with-incoming-stack-boundary was given. +if test "${with_incoming_stack_boundary+set}" = set; then : + withval=$with_incoming_stack_boundary; with_incoming_stack_boundary=$withval +else + with_incoming_stack_boundary=no +fi + + +case "$PRECISION" in + s) PREC_SUFFIX=f;; + d) PREC_SUFFIX=;; + l) PREC_SUFFIX=l;; + q) PREC_SUFFIX=q;; +esac + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +$as_echo_n "checking whether the C compiler works... " >&6; } +ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi +if test -z "$ac_file"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See \`config.log' for more details" "$LINENO" 5; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +$as_echo_n "checking for C compiler default output file name... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +$as_echo "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +$as_echo_n "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest conftest$ac_cv_exeext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +$as_echo "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +$as_echo_n "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +$as_echo "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +$as_echo_n "checking for suffix of object files... " >&6; } +if ${ac_cv_objext+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +$as_echo "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if ${ac_cv_c_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if ${ac_cv_prog_cc_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +else + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if ${ac_cv_prog_cc_c89+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 +$as_echo_n "checking whether $CC understands -c and -o together... " >&6; } +if ${am_cv_prog_cc_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5 + ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 +$as_echo "$am_cv_prog_cc_c_o" >&6; } +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +DEPDIR="${am__leading_dot}deps" + +ac_config_commands="$ac_config_commands depfiles" + + +am_make=${MAKE-make} +cat > confinc << 'END' +am__doit: + @echo this is the am__doit target +.PHONY: am__doit +END +# If we don't find an include directive, just comment out the code. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for style of include used by $am_make" >&5 +$as_echo_n "checking for style of include used by $am_make... " >&6; } +am__include="#" +am__quote= +_am_result=none +# First try GNU make style include. +echo "include confinc" > confmf +# Ignore all kinds of additional output from 'make'. +case `$am_make -s -f confmf 2> /dev/null` in #( +*the\ am__doit\ target*) + am__include=include + am__quote= + _am_result=GNU + ;; +esac +# Now try BSD make style include. +if test "$am__include" = "#"; then + echo '.include "confinc"' > confmf + case `$am_make -s -f confmf 2> /dev/null` in #( + *the\ am__doit\ target*) + am__include=.include + am__quote="\"" + _am_result=BSD + ;; + esac +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $_am_result" >&5 +$as_echo "$_am_result" >&6; } +rm -f confinc confmf + +# Check whether --enable-dependency-tracking was given. +if test "${enable_dependency_tracking+set}" = set; then : + enableval=$enable_dependency_tracking; +fi + +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi + if test "x$enable_dependency_tracking" != xno; then + AMDEP_TRUE= + AMDEP_FALSE='#' +else + AMDEP_TRUE='#' + AMDEP_FALSE= +fi + + + +depcc="$CC" am_compiler_list= + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +$as_echo_n "checking dependency style of $depcc... " >&6; } +if ${am_cv_CC_dependencies_compiler_type+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CC_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CC_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CC_dependencies_compiler_type=none +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 +$as_echo "$am_cv_CC_dependencies_compiler_type" >&6; } +CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then + am__fastdepCC_TRUE= + am__fastdepCC_FALSE='#' +else + am__fastdepCC_TRUE='#' + am__fastdepCC_FALSE= +fi + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler vendor" >&5 +$as_echo_n "checking for C compiler vendor... " >&6; } +if ${ax_cv_c_compiler_vendor+:} false; then : + $as_echo_n "(cached) " >&6 +else + ax_cv_c_compiler_vendor=unknown + # note: don't check for gcc first since some other compilers define __GNUC__ + for ventest in intel:__ICC,__ECC,__INTEL_COMPILER ibm:__xlc__,__xlC__,__IBMC__,__IBMCPP__ pathscale:__PATHCC__,__PATHSCALE__ gnu:__GNUC__ sun:__SUNPRO_C,__SUNPRO_CC hp:__HP_cc,__HP_aCC dec:__DECC,__DECCXX,__DECC_VER,__DECCXX_VER borland:__BORLANDC__,__TURBOC__ comeau:__COMO__ cray:_CRAYC kai:__KCC lcc:__LCC__ metrowerks:__MWERKS__ sgi:__sgi,sgi microsoft:_MSC_VER watcom:__WATCOMC__ portland:__PGI; do + vencpp="defined("`echo $ventest | cut -d: -f2 | sed 's/,/) || defined(/g'`")" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + +#if !($vencpp) + thisisanerror; +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ax_cv_c_compiler_vendor=`echo $ventest | cut -d: -f1`; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + done + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_c_compiler_vendor" >&5 +$as_echo "$ax_cv_c_compiler_vendor" >&6; } + + case $ac_cv_prog_cc_stdc in #( + no) : + ac_cv_prog_cc_c99=no; ac_cv_prog_cc_c89=no ;; #( + *) : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C99" >&5 +$as_echo_n "checking for $CC option to accept ISO C99... " >&6; } +if ${ac_cv_prog_cc_c99+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c99=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include +#include + +// Check varargs macros. These examples are taken from C99 6.10.3.5. +#define debug(...) fprintf (stderr, __VA_ARGS__) +#define showlist(...) puts (#__VA_ARGS__) +#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__)) +static void +test_varargs_macros (void) +{ + int x = 1234; + int y = 5678; + debug ("Flag"); + debug ("X = %d\n", x); + showlist (The first, second, and third items.); + report (x>y, "x is %d but y is %d", x, y); +} + +// Check long long types. +#define BIG64 18446744073709551615ull +#define BIG32 4294967295ul +#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0) +#if !BIG_OK + your preprocessor is broken; +#endif +#if BIG_OK +#else + your preprocessor is broken; +#endif +static long long int bignum = -9223372036854775807LL; +static unsigned long long int ubignum = BIG64; + +struct incomplete_array +{ + int datasize; + double data[]; +}; + +struct named_init { + int number; + const wchar_t *name; + double average; +}; + +typedef const char *ccp; + +static inline int +test_restrict (ccp restrict text) +{ + // See if C++-style comments work. + // Iterate through items via the restricted pointer. + // Also check for declarations in for loops. + for (unsigned int i = 0; *(text+i) != '\0'; ++i) + continue; + return 0; +} + +// Check varargs and va_copy. +static void +test_varargs (const char *format, ...) +{ + va_list args; + va_start (args, format); + va_list args_copy; + va_copy (args_copy, args); + + const char *str; + int number; + float fnumber; + + while (*format) + { + switch (*format++) + { + case 's': // string + str = va_arg (args_copy, const char *); + break; + case 'd': // int + number = va_arg (args_copy, int); + break; + case 'f': // float + fnumber = va_arg (args_copy, double); + break; + default: + break; + } + } + va_end (args_copy); + va_end (args); +} + +int +main () +{ + + // Check bool. + _Bool success = false; + + // Check restrict. + if (test_restrict ("String literal") == 0) + success = true; + char *restrict newvar = "Another string"; + + // Check varargs. + test_varargs ("s, d' f .", "string", 65, 34.234); + test_varargs_macros (); + + // Check flexible array members. + struct incomplete_array *ia = + malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10)); + ia->datasize = 10; + for (int i = 0; i < ia->datasize; ++i) + ia->data[i] = i * 1.234; + + // Check named initializers. + struct named_init ni = { + .number = 34, + .name = L"Test wide string", + .average = 543.34343, + }; + + ni.number = 58; + + int dynamic_array[ni.number]; + dynamic_array[ni.number - 1] = 543; + + // work around unused variable warnings + return (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == 'x' + || dynamic_array[ni.number - 1] != 543); + + ; + return 0; +} +_ACEOF +for ac_arg in '' -std=gnu99 -std=c99 -c99 -AC99 -D_STDC_C99= -qlanglvl=extc99 +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c99=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c99" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c99" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c99" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 +$as_echo "$ac_cv_prog_cc_c99" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c99" != xno; then : + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99 +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if ${ac_cv_prog_cc_c89+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89 +else + ac_cv_prog_cc_stdc=no +fi + +fi + ;; +esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO Standard C" >&5 +$as_echo_n "checking for $CC option to accept ISO Standard C... " >&6; } + if ${ac_cv_prog_cc_stdc+:} false; then : + $as_echo_n "(cached) " >&6 +fi + + case $ac_cv_prog_cc_stdc in #( + no) : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; #( + '') : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; #( + *) : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_stdc" >&5 +$as_echo "$ac_cv_prog_cc_stdc" >&6; } ;; +esac + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5 +$as_echo_n "checking whether ln -s works... " >&6; } +LN_S=$as_ln_s +if test "$LN_S" = "ln -s"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5 +$as_echo "no, using $LN_S" >&6; } +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } +set x ${MAKE-make} +ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` +if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat >conftest.make <<\_ACEOF +SHELL = /bin/sh +all: + @echo '@@@%%%=$(MAKE)=@@@%%%' +_ACEOF +# GNU make sometimes prints "make[1]: Entering ...", which would confuse us. +case `${MAKE-make} -f conftest.make 2>/dev/null` in + *@@@%%%=?*=@@@%%%*) + eval ac_cv_prog_make_${ac_make}_set=yes;; + *) + eval ac_cv_prog_make_${ac_make}_set=no;; +esac +rm -f conftest.make +fi +if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + SET_MAKE= +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + SET_MAKE="MAKE=${MAKE-make}" +fi + +enable_win32_dll=yes + +case $host in +*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}as", so it can be a program name with args. +set dummy ${ac_tool_prefix}as; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_AS+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$AS"; then + ac_cv_prog_AS="$AS" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_AS="${ac_tool_prefix}as" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AS=$ac_cv_prog_AS +if test -n "$AS"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AS" >&5 +$as_echo "$AS" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_AS"; then + ac_ct_AS=$AS + # Extract the first word of "as", so it can be a program name with args. +set dummy as; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_AS+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_AS"; then + ac_cv_prog_ac_ct_AS="$ac_ct_AS" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_AS="as" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_AS=$ac_cv_prog_ac_ct_AS +if test -n "$ac_ct_AS"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AS" >&5 +$as_echo "$ac_ct_AS" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_AS" = x; then + AS="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + AS=$ac_ct_AS + fi +else + AS="$ac_cv_prog_AS" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args. +set dummy ${ac_tool_prefix}dlltool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_DLLTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$DLLTOOL"; then + ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DLLTOOL=$ac_cv_prog_DLLTOOL +if test -n "$DLLTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5 +$as_echo "$DLLTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DLLTOOL"; then + ac_ct_DLLTOOL=$DLLTOOL + # Extract the first word of "dlltool", so it can be a program name with args. +set dummy dlltool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_DLLTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_DLLTOOL"; then + ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DLLTOOL="dlltool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL +if test -n "$ac_ct_DLLTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5 +$as_echo "$ac_ct_DLLTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_DLLTOOL" = x; then + DLLTOOL="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DLLTOOL=$ac_ct_DLLTOOL + fi +else + DLLTOOL="$ac_cv_prog_DLLTOOL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args. +set dummy ${ac_tool_prefix}objdump; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_OBJDUMP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$OBJDUMP"; then + ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OBJDUMP=$ac_cv_prog_OBJDUMP +if test -n "$OBJDUMP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5 +$as_echo "$OBJDUMP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OBJDUMP"; then + ac_ct_OBJDUMP=$OBJDUMP + # Extract the first word of "objdump", so it can be a program name with args. +set dummy objdump; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_OBJDUMP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_OBJDUMP"; then + ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OBJDUMP="objdump" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP +if test -n "$ac_ct_OBJDUMP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5 +$as_echo "$ac_ct_OBJDUMP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_OBJDUMP" = x; then + OBJDUMP="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OBJDUMP=$ac_ct_OBJDUMP + fi +else + OBJDUMP="$ac_cv_prog_OBJDUMP" +fi + + ;; +esac + +test -z "$AS" && AS=as + + + + + +test -z "$DLLTOOL" && DLLTOOL=dlltool + + + + + +test -z "$OBJDUMP" && OBJDUMP=objdump + + + + + + + +case `pwd` in + *\ * | *\ *) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&5 +$as_echo "$as_me: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&2;} ;; +esac + + + +macro_version='2.4.2' +macro_revision='1.3337' + + + + + + + + + + + + + +ltmain="$ac_aux_dir/ltmain.sh" + +# Backslashify metacharacters that are still active within +# double-quoted strings. +sed_quote_subst='s/\(["`$\\]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\(["`\\]\)/\\\1/g' + +# Sed substitution to delay expansion of an escaped shell variable in a +# double_quote_subst'ed string. +delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' + +# Sed substitution to delay expansion of an escaped single quote. +delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' + +# Sed substitution to avoid accidental globbing in evaled expressions +no_glob_subst='s/\*/\\\*/g' + +ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to print strings" >&5 +$as_echo_n "checking how to print strings... " >&6; } +# Test print first, because it will be a builtin if present. +if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ + test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='print -r --' +elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='printf %s\n' +else + # Use this function as a fallback that always works. + func_fallback_echo () + { + eval 'cat <<_LTECHO_EOF +$1 +_LTECHO_EOF' + } + ECHO='func_fallback_echo' +fi + +# func_echo_all arg... +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "" +} + +case "$ECHO" in + printf*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: printf" >&5 +$as_echo "printf" >&6; } ;; + print*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: print -r" >&5 +$as_echo "print -r" >&6; } ;; + *) { $as_echo "$as_me:${as_lineno-$LINENO}: result: cat" >&5 +$as_echo "cat" >&6; } ;; +esac + + + + + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5 +$as_echo_n "checking for a sed that does not truncate output... " >&6; } +if ${ac_cv_path_SED+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ + for ac_i in 1 2 3 4 5 6 7; do + ac_script="$ac_script$as_nl$ac_script" + done + echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed + { ac_script=; unset ac_script;} + if test -z "$SED"; then + ac_path_SED_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in sed gsed; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_SED="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_SED" || continue +# Check for GNU ac_path_SED and select it if it is found. + # Check for GNU $ac_path_SED +case `"$ac_path_SED" --version 2>&1` in +*GNU*) + ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo '' >> "conftest.nl" + "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_SED_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_SED="$ac_path_SED" + ac_path_SED_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_SED_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_SED"; then + as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5 + fi +else + ac_cv_path_SED=$SED +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5 +$as_echo "$ac_cv_path_SED" >&6; } + SED="$ac_cv_path_SED" + rm -f conftest.sed + +test -z "$SED" && SED=sed +Xsed="$SED -e 1s/^X//" + + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if ${ac_cv_path_GREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if ${ac_cv_path_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5 +$as_echo_n "checking for fgrep... " >&6; } +if ${ac_cv_path_FGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1 + then ac_cv_path_FGREP="$GREP -F" + else + if test -z "$FGREP"; then + ac_path_FGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in fgrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_FGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_FGREP" || continue +# Check for GNU ac_path_FGREP and select it if it is found. + # Check for GNU $ac_path_FGREP +case `"$ac_path_FGREP" --version 2>&1` in +*GNU*) + ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'FGREP' >> "conftest.nl" + "$ac_path_FGREP" FGREP < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_FGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_FGREP="$ac_path_FGREP" + ac_path_FGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_FGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_FGREP"; then + as_fn_error $? "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_FGREP=$FGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_FGREP" >&5 +$as_echo "$ac_cv_path_FGREP" >&6; } + FGREP="$ac_cv_path_FGREP" + + +test -z "$GREP" && GREP=grep + + + + + + + + + + + + + + + + + + + +# Check whether --with-gnu-ld was given. +if test "${with_gnu_ld+set}" = set; then : + withval=$with_gnu_ld; test "$withval" = no || with_gnu_ld=yes +else + with_gnu_ld=no +fi + +ac_prog=ld +if test "$GCC" = yes; then + # Check if gcc -print-prog-name=ld gives a path. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5 +$as_echo_n "checking for ld used by $CC... " >&6; } + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [\\/]* | ?:[\\/]*) + re_direlt='/[^/][^/]*/\.\./' + # Canonicalize the pathname of ld + ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` + while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do + ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` + done + test -z "$LD" && LD="$ac_prog" + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test "$with_gnu_ld" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5 +$as_echo_n "checking for GNU ld... " >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5 +$as_echo_n "checking for non-GNU ld... " >&6; } +fi +if ${lt_cv_path_LD+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$LD"; then + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD="$ac_dir/$ac_prog" + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some variants of GNU ld only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$lt_cv_path_LD" -v 2>&1 &5 +$as_echo "$LD" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5 +$as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; } +if ${lt_cv_prog_gnu_ld+:} false; then : + $as_echo_n "(cached) " >&6 +else + # I'd rather use --version here, but apparently some GNU lds only accept -v. +case `$LD -v 2>&1 &5 +$as_echo "$lt_cv_prog_gnu_ld" >&6; } +with_gnu_ld=$lt_cv_prog_gnu_ld + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5 +$as_echo_n "checking for BSD- or MS-compatible name lister (nm)... " >&6; } +if ${lt_cv_path_NM+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$NM"; then + # Let the user override the test. + lt_cv_path_NM="$NM" +else + lt_nm_to_check="${ac_tool_prefix}nm" + if test -n "$ac_tool_prefix" && test "$build" = "$host"; then + lt_nm_to_check="$lt_nm_to_check nm" + fi + for lt_tmp_nm in $lt_nm_to_check; do + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + tmp_nm="$ac_dir/$lt_tmp_nm" + if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the `sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + # Tru64's nm complains that /dev/null is an invalid object file + case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in + */dev/null* | *'Invalid file or object type'*) + lt_cv_path_NM="$tmp_nm -B" + break + ;; + *) + case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in + */dev/null*) + lt_cv_path_NM="$tmp_nm -p" + break + ;; + *) + lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + ;; + esac + ;; + esac + fi + done + IFS="$lt_save_ifs" + done + : ${lt_cv_path_NM=no} +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_NM" >&5 +$as_echo "$lt_cv_path_NM" >&6; } +if test "$lt_cv_path_NM" != "no"; then + NM="$lt_cv_path_NM" +else + # Didn't find any BSD compatible name lister, look for dumpbin. + if test -n "$DUMPBIN"; then : + # Let the user override the test. + else + if test -n "$ac_tool_prefix"; then + for ac_prog in dumpbin "link -dump" + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_DUMPBIN+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$DUMPBIN"; then + ac_cv_prog_DUMPBIN="$DUMPBIN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DUMPBIN=$ac_cv_prog_DUMPBIN +if test -n "$DUMPBIN"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DUMPBIN" >&5 +$as_echo "$DUMPBIN" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$DUMPBIN" && break + done +fi +if test -z "$DUMPBIN"; then + ac_ct_DUMPBIN=$DUMPBIN + for ac_prog in dumpbin "link -dump" +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_DUMPBIN+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_DUMPBIN"; then + ac_cv_prog_ac_ct_DUMPBIN="$ac_ct_DUMPBIN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DUMPBIN="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DUMPBIN=$ac_cv_prog_ac_ct_DUMPBIN +if test -n "$ac_ct_DUMPBIN"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DUMPBIN" >&5 +$as_echo "$ac_ct_DUMPBIN" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_DUMPBIN" && break +done + + if test "x$ac_ct_DUMPBIN" = x; then + DUMPBIN=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DUMPBIN=$ac_ct_DUMPBIN + fi +fi + + case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in + *COFF*) + DUMPBIN="$DUMPBIN -symbols" + ;; + *) + DUMPBIN=: + ;; + esac + fi + + if test "$DUMPBIN" != ":"; then + NM="$DUMPBIN" + fi +fi +test -z "$NM" && NM=nm + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5 +$as_echo_n "checking the name lister ($NM) interface... " >&6; } +if ${lt_cv_nm_interface+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_nm_interface="BSD nm" + echo "int some_variable = 0;" > conftest.$ac_ext + (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&5) + (eval "$ac_compile" 2>conftest.err) + cat conftest.err >&5 + (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&5) + (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) + cat conftest.err >&5 + (eval echo "\"\$as_me:$LINENO: output\"" >&5) + cat conftest.out >&5 + if $GREP 'External.*some_variable' conftest.out > /dev/null; then + lt_cv_nm_interface="MS dumpbin" + fi + rm -f conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_nm_interface" >&5 +$as_echo "$lt_cv_nm_interface" >&6; } + +# find the maximum length of command line arguments +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the maximum length of command line arguments" >&5 +$as_echo_n "checking the maximum length of command line arguments... " >&6; } +if ${lt_cv_sys_max_cmd_len+:} false; then : + $as_echo_n "(cached) " >&6 +else + i=0 + teststring="ABCD" + + case $build_os in + msdosdjgpp*) + # On DJGPP, this test can blow up pretty badly due to problems in libc + # (any single argument exceeding 2000 bytes causes a buffer overrun + # during glob expansion). Even if it were fixed, the result of this + # check would be larger than it should be. + lt_cv_sys_max_cmd_len=12288; # 12K is about right + ;; + + gnu*) + # Under GNU Hurd, this test is not required because there is + # no limit to the length of command line arguments. + # Libtool will interpret -1 as no limit whatsoever + lt_cv_sys_max_cmd_len=-1; + ;; + + cygwin* | mingw* | cegcc*) + # On Win9x/ME, this test blows up -- it succeeds, but takes + # about 5 minutes as the teststring grows exponentially. + # Worse, since 9x/ME are not pre-emptively multitasking, + # you end up with a "frozen" computer, even though with patience + # the test eventually succeeds (with a max line length of 256k). + # Instead, let's just punt: use the minimum linelength reported by + # all of the supported platforms: 8192 (on NT/2K/XP). + lt_cv_sys_max_cmd_len=8192; + ;; + + mint*) + # On MiNT this can take a long time and run out of memory. + lt_cv_sys_max_cmd_len=8192; + ;; + + amigaos*) + # On AmigaOS with pdksh, this test takes hours, literally. + # So we just punt and use a minimum line length of 8192. + lt_cv_sys_max_cmd_len=8192; + ;; + + netbsd* | freebsd* | openbsd* | darwin* | dragonfly*) + # This has been around since 386BSD, at least. Likely further. + if test -x /sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` + elif test -x /usr/sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` + else + lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs + fi + # And add a safety zone + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + ;; + + interix*) + # We know the value 262144 and hardcode it with a safety zone (like BSD) + lt_cv_sys_max_cmd_len=196608 + ;; + + os2*) + # The test takes a long time on OS/2. + lt_cv_sys_max_cmd_len=8192 + ;; + + osf*) + # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure + # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not + # nice to cause kernel panics so lets avoid the loop below. + # First set a reasonable default. + lt_cv_sys_max_cmd_len=16384 + # + if test -x /sbin/sysconfig; then + case `/sbin/sysconfig -q proc exec_disable_arg_limit` in + *1*) lt_cv_sys_max_cmd_len=-1 ;; + esac + fi + ;; + sco3.2v5*) + lt_cv_sys_max_cmd_len=102400 + ;; + sysv5* | sco5v6* | sysv4.2uw2*) + kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` + if test -n "$kargmax"; then + lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[ ]//'` + else + lt_cv_sys_max_cmd_len=32768 + fi + ;; + *) + lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` + if test -n "$lt_cv_sys_max_cmd_len"; then + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + else + # Make teststring a little bigger before we do anything with it. + # a 1K string should be a reasonable start. + for i in 1 2 3 4 5 6 7 8 ; do + teststring=$teststring$teststring + done + SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} + # If test is not a shell built-in, we'll probably end up computing a + # maximum length that is only half of the actual maximum length, but + # we can't tell. + while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \ + = "X$teststring$teststring"; } >/dev/null 2>&1 && + test $i != 17 # 1/2 MB should be enough + do + i=`expr $i + 1` + teststring=$teststring$teststring + done + # Only check the string length outside the loop. + lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` + teststring= + # Add a significant safety factor because C++ compilers can tack on + # massive amounts of additional arguments before passing them to the + # linker. It appears as though 1/2 is a usable value. + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` + fi + ;; + esac + +fi + +if test -n $lt_cv_sys_max_cmd_len ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sys_max_cmd_len" >&5 +$as_echo "$lt_cv_sys_max_cmd_len" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none" >&5 +$as_echo "none" >&6; } +fi +max_cmd_len=$lt_cv_sys_max_cmd_len + + + + + + +: ${CP="cp -f"} +: ${MV="mv -f"} +: ${RM="rm -f"} + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the shell understands some XSI constructs" >&5 +$as_echo_n "checking whether the shell understands some XSI constructs... " >&6; } +# Try some XSI features +xsi_shell=no +( _lt_dummy="a/b/c" + test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \ + = c,a/b,b/c, \ + && eval 'test $(( 1 + 1 )) -eq 2 \ + && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \ + && xsi_shell=yes +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $xsi_shell" >&5 +$as_echo "$xsi_shell" >&6; } + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the shell understands \"+=\"" >&5 +$as_echo_n "checking whether the shell understands \"+=\"... " >&6; } +lt_shell_append=no +( foo=bar; set foo baz; eval "$1+=\$2" && test "$foo" = barbaz ) \ + >/dev/null 2>&1 \ + && lt_shell_append=yes +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_shell_append" >&5 +$as_echo "$lt_shell_append" >&6; } + + +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + lt_unset=unset +else + lt_unset=false +fi + + + + + +# test EBCDIC or ASCII +case `echo X|tr X '\101'` in + A) # ASCII based system + # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr + lt_SP2NL='tr \040 \012' + lt_NL2SP='tr \015\012 \040\040' + ;; + *) # EBCDIC based system + lt_SP2NL='tr \100 \n' + lt_NL2SP='tr \r\n \100\100' + ;; +esac + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to $host format" >&5 +$as_echo_n "checking how to convert $build file names to $host format... " >&6; } +if ${lt_cv_to_host_file_cmd+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 + ;; + esac + ;; + *-*-cygwin* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin + ;; + esac + ;; + * ) # unhandled hosts (and "normal" native builds) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; +esac + +fi + +to_host_file_cmd=$lt_cv_to_host_file_cmd +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_host_file_cmd" >&5 +$as_echo "$lt_cv_to_host_file_cmd" >&6; } + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to toolchain format" >&5 +$as_echo_n "checking how to convert $build file names to toolchain format... " >&6; } +if ${lt_cv_to_tool_file_cmd+:} false; then : + $as_echo_n "(cached) " >&6 +else + #assume ordinary cross tools, or native build. +lt_cv_to_tool_file_cmd=func_convert_file_noop +case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 + ;; + esac + ;; +esac + +fi + +to_tool_file_cmd=$lt_cv_to_tool_file_cmd +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_tool_file_cmd" >&5 +$as_echo "$lt_cv_to_tool_file_cmd" >&6; } + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $LD option to reload object files" >&5 +$as_echo_n "checking for $LD option to reload object files... " >&6; } +if ${lt_cv_ld_reload_flag+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_ld_reload_flag='-r' +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_reload_flag" >&5 +$as_echo "$lt_cv_ld_reload_flag" >&6; } +reload_flag=$lt_cv_ld_reload_flag +case $reload_flag in +"" | " "*) ;; +*) reload_flag=" $reload_flag" ;; +esac +reload_cmds='$LD$reload_flag -o $output$reload_objs' +case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + if test "$GCC" != yes; then + reload_cmds=false + fi + ;; + darwin*) + if test "$GCC" = yes; then + reload_cmds='$LTCC $LTCFLAGS -nostdlib ${wl}-r -o $output$reload_objs' + else + reload_cmds='$LD$reload_flag -o $output$reload_objs' + fi + ;; +esac + + + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args. +set dummy ${ac_tool_prefix}objdump; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_OBJDUMP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$OBJDUMP"; then + ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OBJDUMP=$ac_cv_prog_OBJDUMP +if test -n "$OBJDUMP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5 +$as_echo "$OBJDUMP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OBJDUMP"; then + ac_ct_OBJDUMP=$OBJDUMP + # Extract the first word of "objdump", so it can be a program name with args. +set dummy objdump; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_OBJDUMP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_OBJDUMP"; then + ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OBJDUMP="objdump" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP +if test -n "$ac_ct_OBJDUMP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5 +$as_echo "$ac_ct_OBJDUMP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_OBJDUMP" = x; then + OBJDUMP="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OBJDUMP=$ac_ct_OBJDUMP + fi +else + OBJDUMP="$ac_cv_prog_OBJDUMP" +fi + +test -z "$OBJDUMP" && OBJDUMP=objdump + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to recognize dependent libraries" >&5 +$as_echo_n "checking how to recognize dependent libraries... " >&6; } +if ${lt_cv_deplibs_check_method+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_file_magic_cmd='$MAGIC_CMD' +lt_cv_file_magic_test_file= +lt_cv_deplibs_check_method='unknown' +# Need to set the preceding variable on all platforms that support +# interlibrary dependencies. +# 'none' -- dependencies not supported. +# `unknown' -- same as none, but documents that we really don't know. +# 'pass_all' -- all dependencies passed with no checks. +# 'test_compile' -- check by making test program. +# 'file_magic [[regex]]' -- check by looking for files in library path +# which responds to the $file_magic_cmd with a given extended regex. +# If you have `file' or equivalent on your system and you're not sure +# whether `pass_all' will *always* work, you probably want this one. + +case $host_os in +aix[4-9]*) + lt_cv_deplibs_check_method=pass_all + ;; + +beos*) + lt_cv_deplibs_check_method=pass_all + ;; + +bsdi[45]*) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)' + lt_cv_file_magic_cmd='/usr/bin/file -L' + lt_cv_file_magic_test_file=/shlib/libc.so + ;; + +cygwin*) + # func_win32_libid is a shell function defined in ltmain.sh + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + ;; + +mingw* | pw32*) + # Base MSYS/MinGW do not provide the 'file' command needed by + # func_win32_libid shell function, so use a weaker test based on 'objdump', + # unless we find 'file', for example because we are cross-compiling. + # func_win32_libid assumes BSD nm, so disallow it if using MS dumpbin. + if ( test "$lt_cv_nm_interface" = "BSD nm" && file / ) >/dev/null 2>&1; then + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + else + # Keep this pattern in sync with the one in func_win32_libid. + lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' + lt_cv_file_magic_cmd='$OBJDUMP -f' + fi + ;; + +cegcc*) + # use the weaker test based on 'objdump'. See mingw*. + lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' + lt_cv_file_magic_cmd='$OBJDUMP -f' + ;; + +darwin* | rhapsody*) + lt_cv_deplibs_check_method=pass_all + ;; + +freebsd* | dragonfly*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + case $host_cpu in + i*86 ) + # Not sure whether the presence of OpenBSD here was a mistake. + # Let's accept both of them until this is cleared up. + lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[3-9]86 (compact )?demand paged shared library' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + ;; + esac + else + lt_cv_deplibs_check_method=pass_all + fi + ;; + +gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +haiku*) + lt_cv_deplibs_check_method=pass_all + ;; + +hpux10.20* | hpux11*) + lt_cv_file_magic_cmd=/usr/bin/file + case $host_cpu in + ia64*) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - IA64' + lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so + ;; + hppa*64*) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]' + lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl + ;; + *) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9]\.[0-9]) shared library' + lt_cv_file_magic_test_file=/usr/lib/libc.sl + ;; + esac + ;; + +interix[3-9]*) + # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|\.a)$' + ;; + +irix5* | irix6* | nonstopux*) + case $LD in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu) + lt_cv_deplibs_check_method=pass_all + ;; + +netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|_pic\.a)$' + fi + ;; + +newos6*) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=/usr/lib/libnls.so + ;; + +*nto* | *qnx*) + lt_cv_deplibs_check_method=pass_all + ;; + +openbsd*) + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|\.so|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' + fi + ;; + +osf3* | osf4* | osf5*) + lt_cv_deplibs_check_method=pass_all + ;; + +rdos*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv4 | sysv4.3*) + case $host_vendor in + motorola) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]' + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + sequent) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' + ;; + sni) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib" + lt_cv_file_magic_test_file=/lib/libc.so + ;; + siemens) + lt_cv_deplibs_check_method=pass_all + ;; + pc) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; + +tpf*) + lt_cv_deplibs_check_method=pass_all + ;; +esac + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_deplibs_check_method" >&5 +$as_echo "$lt_cv_deplibs_check_method" >&6; } + +file_magic_glob= +want_nocaseglob=no +if test "$build" = "$host"; then + case $host_os in + mingw* | pw32*) + if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then + want_nocaseglob=yes + else + file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[\1]\/[\1]\/g;/g"` + fi + ;; + esac +fi + +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method +test -z "$deplibs_check_method" && deplibs_check_method=unknown + + + + + + + + + + + + + + + + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args. +set dummy ${ac_tool_prefix}dlltool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_DLLTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$DLLTOOL"; then + ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DLLTOOL=$ac_cv_prog_DLLTOOL +if test -n "$DLLTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5 +$as_echo "$DLLTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DLLTOOL"; then + ac_ct_DLLTOOL=$DLLTOOL + # Extract the first word of "dlltool", so it can be a program name with args. +set dummy dlltool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_DLLTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_DLLTOOL"; then + ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DLLTOOL="dlltool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL +if test -n "$ac_ct_DLLTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5 +$as_echo "$ac_ct_DLLTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_DLLTOOL" = x; then + DLLTOOL="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DLLTOOL=$ac_ct_DLLTOOL + fi +else + DLLTOOL="$ac_cv_prog_DLLTOOL" +fi + +test -z "$DLLTOOL" && DLLTOOL=dlltool + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to associate runtime and link libraries" >&5 +$as_echo_n "checking how to associate runtime and link libraries... " >&6; } +if ${lt_cv_sharedlib_from_linklib_cmd+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_sharedlib_from_linklib_cmd='unknown' + +case $host_os in +cygwin* | mingw* | pw32* | cegcc*) + # two different shell functions defined in ltmain.sh + # decide which to use based on capabilities of $DLLTOOL + case `$DLLTOOL --help 2>&1` in + *--identify-strict*) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib + ;; + *) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback + ;; + esac + ;; +*) + # fallback: assume linklib IS sharedlib + lt_cv_sharedlib_from_linklib_cmd="$ECHO" + ;; +esac + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sharedlib_from_linklib_cmd" >&5 +$as_echo "$lt_cv_sharedlib_from_linklib_cmd" >&6; } +sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd +test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO + + + + + + + +if test -n "$ac_tool_prefix"; then + for ac_prog in ar + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_AR+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$AR"; then + ac_cv_prog_AR="$AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_AR="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AR=$ac_cv_prog_AR +if test -n "$AR"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5 +$as_echo "$AR" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$AR" && break + done +fi +if test -z "$AR"; then + ac_ct_AR=$AR + for ac_prog in ar +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_AR+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_AR"; then + ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_AR="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_AR=$ac_cv_prog_ac_ct_AR +if test -n "$ac_ct_AR"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5 +$as_echo "$ac_ct_AR" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_AR" && break +done + + if test "x$ac_ct_AR" = x; then + AR="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + AR=$ac_ct_AR + fi +fi + +: ${AR=ar} +: ${AR_FLAGS=cru} + + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for archiver @FILE support" >&5 +$as_echo_n "checking for archiver @FILE support... " >&6; } +if ${lt_cv_ar_at_file+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_ar_at_file=no + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + echo conftest.$ac_objext > conftest.lst + lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 + (eval $lt_ar_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test "$ac_status" -eq 0; then + # Ensure the archiver fails upon bogus file names. + rm -f conftest.$ac_objext libconftest.a + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 + (eval $lt_ar_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test "$ac_status" -ne 0; then + lt_cv_ar_at_file=@ + fi + fi + rm -f conftest.* libconftest.a + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ar_at_file" >&5 +$as_echo "$lt_cv_ar_at_file" >&6; } + +if test "x$lt_cv_ar_at_file" = xno; then + archiver_list_spec= +else + archiver_list_spec=$lt_cv_ar_at_file +fi + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +$as_echo "$STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +$as_echo "$ac_ct_STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +test -z "$STRIP" && STRIP=: + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +RANLIB=$ac_cv_prog_RANLIB +if test -n "$RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 +$as_echo "$RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_RANLIB"; then + ac_ct_RANLIB=$RANLIB + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_RANLIB"; then + ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_RANLIB="ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB +if test -n "$ac_ct_RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 +$as_echo "$ac_ct_RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_RANLIB" = x; then + RANLIB=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + RANLIB=$ac_ct_RANLIB + fi +else + RANLIB="$ac_cv_prog_RANLIB" +fi + +test -z "$RANLIB" && RANLIB=: + + + + + + +# Determine commands to create old-style static archives. +old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' +old_postinstall_cmds='chmod 644 $oldlib' +old_postuninstall_cmds= + +if test -n "$RANLIB"; then + case $host_os in + openbsd*) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" + ;; + *) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" + ;; + esac + old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" +fi + +case $host_os in + darwin*) + lock_old_archive_extraction=yes ;; + *) + lock_old_archive_extraction=no ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + + +# Check for command to grab the raw symbol name followed by C symbol from nm. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking command to parse $NM output from $compiler object" >&5 +$as_echo_n "checking command to parse $NM output from $compiler object... " >&6; } +if ${lt_cv_sys_global_symbol_pipe+:} false; then : + $as_echo_n "(cached) " >&6 +else + +# These are sane defaults that work on at least a few old systems. +# [They come from Ultrix. What could be older than Ultrix?!! ;)] + +# Character class describing NM global symbol codes. +symcode='[BCDEGRST]' + +# Regexp to match symbols that can be accessed directly from C. +sympat='\([_A-Za-z][_A-Za-z0-9]*\)' + +# Define system-specific variables. +case $host_os in +aix*) + symcode='[BCDT]' + ;; +cygwin* | mingw* | pw32* | cegcc*) + symcode='[ABCDGISTW]' + ;; +hpux*) + if test "$host_cpu" = ia64; then + symcode='[ABCDEGRST]' + fi + ;; +irix* | nonstopux*) + symcode='[BCDEGRST]' + ;; +osf*) + symcode='[BCDEGQRST]' + ;; +solaris*) + symcode='[BDRT]' + ;; +sco3.2v5*) + symcode='[DT]' + ;; +sysv4.2uw2*) + symcode='[DT]' + ;; +sysv5* | sco5v6* | unixware* | OpenUNIX*) + symcode='[ABDT]' + ;; +sysv4) + symcode='[DFNSTU]' + ;; +esac + +# If we're using GNU nm, then use its standard symbol codes. +case `$NM -V 2>&1` in +*GNU* | *'with BFD'*) + symcode='[ABCDGIRSTW]' ;; +esac + +# Transform an extracted symbol line into a proper C declaration. +# Some systems (esp. on ia64) link data and code symbols differently, +# so use this general approach. +lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" + +# Transform an extracted symbol line into symbol name and symbol address +lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\)[ ]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/ {\"\2\", (void *) \&\2},/p'" +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([^ ]*\)[ ]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \(lib[^ ]*\)$/ {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/ {\"lib\2\", (void *) \&\2},/p'" + +# Handle CRLF in mingw tool chain +opt_cr= +case $build_os in +mingw*) + opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp + ;; +esac + +# Try without a prefix underscore, then with it. +for ac_symprfx in "" "_"; do + + # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. + symxfrm="\\1 $ac_symprfx\\2 \\2" + + # Write the raw and C identifiers. + if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Fake it for dumpbin and say T for any non-static function + # and D for any global variable. + # Also find C++ and __fastcall symbols from MSVC++, + # which start with @ or ?. + lt_cv_sys_global_symbol_pipe="$AWK '"\ +" {last_section=section; section=\$ 3};"\ +" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ +" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ +" \$ 0!~/External *\|/{next};"\ +" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ +" {if(hide[section]) next};"\ +" {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\ +" {split(\$ 0, a, /\||\r/); split(a[2], s)};"\ +" s[1]~/^[@?]/{print s[1], s[1]; next};"\ +" s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\ +" ' prfx=^$ac_symprfx" + else + lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" + fi + lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'" + + # Check to see that the pipe works correctly. + pipe_works=no + + rm -f conftest* + cat > conftest.$ac_ext <<_LT_EOF +#ifdef __cplusplus +extern "C" { +#endif +char nm_test_var; +void nm_test_func(void); +void nm_test_func(void){} +#ifdef __cplusplus +} +#endif +int main(){nm_test_var='a';nm_test_func();return(0);} +_LT_EOF + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + # Now try to grab the symbols. + nlist=conftest.nm + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist\""; } >&5 + (eval $NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s "$nlist"; then + # Try sorting and uniquifying the output. + if sort "$nlist" | uniq > "$nlist"T; then + mv -f "$nlist"T "$nlist" + else + rm -f "$nlist"T + fi + + # Make sure that we snagged all the symbols we need. + if $GREP ' nm_test_var$' "$nlist" >/dev/null; then + if $GREP ' nm_test_func$' "$nlist" >/dev/null; then + cat <<_LT_EOF > conftest.$ac_ext +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE) +/* DATA imports from DLLs on WIN32 con't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT_DLSYM_CONST +#elif defined(__osf__) +/* This system does not cope well with relocations in const data. */ +# define LT_DLSYM_CONST +#else +# define LT_DLSYM_CONST const +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +_LT_EOF + # Now generate the symbol file. + eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' + + cat <<_LT_EOF >> conftest.$ac_ext + +/* The mapping between symbol names and symbols. */ +LT_DLSYM_CONST struct { + const char *name; + void *address; +} +lt__PROGRAM__LTX_preloaded_symbols[] = +{ + { "@PROGRAM@", (void *) 0 }, +_LT_EOF + $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext + cat <<\_LT_EOF >> conftest.$ac_ext + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt__PROGRAM__LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif +_LT_EOF + # Now try linking the two files. + mv conftest.$ac_objext conftstm.$ac_objext + lt_globsym_save_LIBS=$LIBS + lt_globsym_save_CFLAGS=$CFLAGS + LIBS="conftstm.$ac_objext" + CFLAGS="$CFLAGS$lt_prog_compiler_no_builtin_flag" + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s conftest${ac_exeext}; then + pipe_works=yes + fi + LIBS=$lt_globsym_save_LIBS + CFLAGS=$lt_globsym_save_CFLAGS + else + echo "cannot find nm_test_func in $nlist" >&5 + fi + else + echo "cannot find nm_test_var in $nlist" >&5 + fi + else + echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5 + fi + else + echo "$progname: failed program was:" >&5 + cat conftest.$ac_ext >&5 + fi + rm -rf conftest* conftst* + + # Do not use the global_symbol_pipe unless it works. + if test "$pipe_works" = yes; then + break + else + lt_cv_sys_global_symbol_pipe= + fi +done + +fi + +if test -z "$lt_cv_sys_global_symbol_pipe"; then + lt_cv_sys_global_symbol_to_cdecl= +fi +if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: failed" >&5 +$as_echo "failed" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5 +$as_echo "ok" >&6; } +fi + +# Response file support. +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + nm_file_list_spec='@' +elif $NM --help 2>/dev/null | grep '[@]FILE' >/dev/null; then + nm_file_list_spec='@' +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for sysroot" >&5 +$as_echo_n "checking for sysroot... " >&6; } + +# Check whether --with-sysroot was given. +if test "${with_sysroot+set}" = set; then : + withval=$with_sysroot; +else + with_sysroot=no +fi + + +lt_sysroot= +case ${with_sysroot} in #( + yes) + if test "$GCC" = yes; then + lt_sysroot=`$CC --print-sysroot 2>/dev/null` + fi + ;; #( + /*) + lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"` + ;; #( + no|'') + ;; #( + *) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${with_sysroot}" >&5 +$as_echo "${with_sysroot}" >&6; } + as_fn_error $? "The sysroot must be an absolute path." "$LINENO" 5 + ;; +esac + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${lt_sysroot:-no}" >&5 +$as_echo "${lt_sysroot:-no}" >&6; } + + + + + +# Check whether --enable-libtool-lock was given. +if test "${enable_libtool_lock+set}" = set; then : + enableval=$enable_libtool_lock; +fi + +test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case $host in +ia64-*-hpux*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `/usr/bin/file conftest.$ac_objext` in + *ELF-32*) + HPUX_IA64_MODE="32" + ;; + *ELF-64*) + HPUX_IA64_MODE="64" + ;; + esac + fi + rm -rf conftest* + ;; +*-*-irix6*) + # Find out which ABI we are using. + echo '#line '$LINENO' "configure"' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + if test "$lt_cv_prog_gnu_ld" = yes; then + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -melf32bsmip" + ;; + *N32*) + LD="${LD-ld} -melf32bmipn32" + ;; + *64-bit*) + LD="${LD-ld} -melf64bmip" + ;; + esac + else + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + fi + rm -rf conftest* + ;; + +x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \ +s390*-*linux*|s390*-*tpf*|sparc*-*linux*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `/usr/bin/file conftest.o` in + *32-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_i386_fbsd" + ;; + x86_64-*linux*) + LD="${LD-ld} -m elf_i386" + ;; + ppc64-*linux*|powerpc64-*linux*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) + LD="${LD-ld} -m elf_s390" + ;; + sparc64-*linux*) + LD="${LD-ld} -m elf32_sparc" + ;; + esac + ;; + *64-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_x86_64_fbsd" + ;; + x86_64-*linux*) + LD="${LD-ld} -m elf_x86_64" + ;; + ppc*-*linux*|powerpc*-*linux*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) + LD="${LD-ld} -m elf64_s390" + ;; + sparc*-*linux*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -belf" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler needs -belf" >&5 +$as_echo_n "checking whether the C compiler needs -belf... " >&6; } +if ${lt_cv_cc_needs_belf+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + lt_cv_cc_needs_belf=yes +else + lt_cv_cc_needs_belf=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_cc_needs_belf" >&5 +$as_echo "$lt_cv_cc_needs_belf" >&6; } + if test x"$lt_cv_cc_needs_belf" != x"yes"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS="$SAVE_CFLAGS" + fi + ;; +*-*solaris*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `/usr/bin/file conftest.o` in + *64-bit*) + case $lt_cv_prog_gnu_ld in + yes*) + case $host in + i?86-*-solaris*) + LD="${LD-ld} -m elf_x86_64" + ;; + sparc*-*-solaris*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + # GNU ld 2.21 introduced _sol2 emulations. Use them if available. + if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then + LD="${LD-ld}_sol2" + fi + ;; + *) + if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then + LD="${LD-ld} -64" + fi + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; +esac + +need_locks="$enable_libtool_lock" + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}mt", so it can be a program name with args. +set dummy ${ac_tool_prefix}mt; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_MANIFEST_TOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$MANIFEST_TOOL"; then + ac_cv_prog_MANIFEST_TOOL="$MANIFEST_TOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +MANIFEST_TOOL=$ac_cv_prog_MANIFEST_TOOL +if test -n "$MANIFEST_TOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MANIFEST_TOOL" >&5 +$as_echo "$MANIFEST_TOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_MANIFEST_TOOL"; then + ac_ct_MANIFEST_TOOL=$MANIFEST_TOOL + # Extract the first word of "mt", so it can be a program name with args. +set dummy mt; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_MANIFEST_TOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_MANIFEST_TOOL"; then + ac_cv_prog_ac_ct_MANIFEST_TOOL="$ac_ct_MANIFEST_TOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_MANIFEST_TOOL="mt" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_MANIFEST_TOOL=$ac_cv_prog_ac_ct_MANIFEST_TOOL +if test -n "$ac_ct_MANIFEST_TOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_MANIFEST_TOOL" >&5 +$as_echo "$ac_ct_MANIFEST_TOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_MANIFEST_TOOL" = x; then + MANIFEST_TOOL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + MANIFEST_TOOL=$ac_ct_MANIFEST_TOOL + fi +else + MANIFEST_TOOL="$ac_cv_prog_MANIFEST_TOOL" +fi + +test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $MANIFEST_TOOL is a manifest tool" >&5 +$as_echo_n "checking if $MANIFEST_TOOL is a manifest tool... " >&6; } +if ${lt_cv_path_mainfest_tool+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_path_mainfest_tool=no + echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&5 + $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out + cat conftest.err >&5 + if $GREP 'Manifest Tool' conftest.out > /dev/null; then + lt_cv_path_mainfest_tool=yes + fi + rm -f conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_mainfest_tool" >&5 +$as_echo "$lt_cv_path_mainfest_tool" >&6; } +if test "x$lt_cv_path_mainfest_tool" != xyes; then + MANIFEST_TOOL=: +fi + + + + + + + case $host_os in + rhapsody* | darwin*) + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dsymutil", so it can be a program name with args. +set dummy ${ac_tool_prefix}dsymutil; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_DSYMUTIL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$DSYMUTIL"; then + ac_cv_prog_DSYMUTIL="$DSYMUTIL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DSYMUTIL=$ac_cv_prog_DSYMUTIL +if test -n "$DSYMUTIL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DSYMUTIL" >&5 +$as_echo "$DSYMUTIL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DSYMUTIL"; then + ac_ct_DSYMUTIL=$DSYMUTIL + # Extract the first word of "dsymutil", so it can be a program name with args. +set dummy dsymutil; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_DSYMUTIL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_DSYMUTIL"; then + ac_cv_prog_ac_ct_DSYMUTIL="$ac_ct_DSYMUTIL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DSYMUTIL="dsymutil" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DSYMUTIL=$ac_cv_prog_ac_ct_DSYMUTIL +if test -n "$ac_ct_DSYMUTIL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DSYMUTIL" >&5 +$as_echo "$ac_ct_DSYMUTIL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_DSYMUTIL" = x; then + DSYMUTIL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DSYMUTIL=$ac_ct_DSYMUTIL + fi +else + DSYMUTIL="$ac_cv_prog_DSYMUTIL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}nmedit", so it can be a program name with args. +set dummy ${ac_tool_prefix}nmedit; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_NMEDIT+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$NMEDIT"; then + ac_cv_prog_NMEDIT="$NMEDIT" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +NMEDIT=$ac_cv_prog_NMEDIT +if test -n "$NMEDIT"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $NMEDIT" >&5 +$as_echo "$NMEDIT" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_NMEDIT"; then + ac_ct_NMEDIT=$NMEDIT + # Extract the first word of "nmedit", so it can be a program name with args. +set dummy nmedit; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_NMEDIT+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_NMEDIT"; then + ac_cv_prog_ac_ct_NMEDIT="$ac_ct_NMEDIT" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_NMEDIT="nmedit" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_NMEDIT=$ac_cv_prog_ac_ct_NMEDIT +if test -n "$ac_ct_NMEDIT"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_NMEDIT" >&5 +$as_echo "$ac_ct_NMEDIT" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_NMEDIT" = x; then + NMEDIT=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + NMEDIT=$ac_ct_NMEDIT + fi +else + NMEDIT="$ac_cv_prog_NMEDIT" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}lipo", so it can be a program name with args. +set dummy ${ac_tool_prefix}lipo; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_LIPO+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$LIPO"; then + ac_cv_prog_LIPO="$LIPO" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_LIPO="${ac_tool_prefix}lipo" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +LIPO=$ac_cv_prog_LIPO +if test -n "$LIPO"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LIPO" >&5 +$as_echo "$LIPO" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_LIPO"; then + ac_ct_LIPO=$LIPO + # Extract the first word of "lipo", so it can be a program name with args. +set dummy lipo; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_LIPO+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_LIPO"; then + ac_cv_prog_ac_ct_LIPO="$ac_ct_LIPO" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_LIPO="lipo" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_LIPO=$ac_cv_prog_ac_ct_LIPO +if test -n "$ac_ct_LIPO"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_LIPO" >&5 +$as_echo "$ac_ct_LIPO" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_LIPO" = x; then + LIPO=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + LIPO=$ac_ct_LIPO + fi +else + LIPO="$ac_cv_prog_LIPO" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}otool", so it can be a program name with args. +set dummy ${ac_tool_prefix}otool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_OTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$OTOOL"; then + ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_OTOOL="${ac_tool_prefix}otool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OTOOL=$ac_cv_prog_OTOOL +if test -n "$OTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL" >&5 +$as_echo "$OTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OTOOL"; then + ac_ct_OTOOL=$OTOOL + # Extract the first word of "otool", so it can be a program name with args. +set dummy otool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_OTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_OTOOL"; then + ac_cv_prog_ac_ct_OTOOL="$ac_ct_OTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OTOOL="otool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OTOOL=$ac_cv_prog_ac_ct_OTOOL +if test -n "$ac_ct_OTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL" >&5 +$as_echo "$ac_ct_OTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_OTOOL" = x; then + OTOOL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OTOOL=$ac_ct_OTOOL + fi +else + OTOOL="$ac_cv_prog_OTOOL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}otool64", so it can be a program name with args. +set dummy ${ac_tool_prefix}otool64; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_OTOOL64+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$OTOOL64"; then + ac_cv_prog_OTOOL64="$OTOOL64" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OTOOL64=$ac_cv_prog_OTOOL64 +if test -n "$OTOOL64"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL64" >&5 +$as_echo "$OTOOL64" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OTOOL64"; then + ac_ct_OTOOL64=$OTOOL64 + # Extract the first word of "otool64", so it can be a program name with args. +set dummy otool64; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_OTOOL64+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_OTOOL64"; then + ac_cv_prog_ac_ct_OTOOL64="$ac_ct_OTOOL64" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OTOOL64="otool64" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OTOOL64=$ac_cv_prog_ac_ct_OTOOL64 +if test -n "$ac_ct_OTOOL64"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL64" >&5 +$as_echo "$ac_ct_OTOOL64" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_OTOOL64" = x; then + OTOOL64=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OTOOL64=$ac_ct_OTOOL64 + fi +else + OTOOL64="$ac_cv_prog_OTOOL64" +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -single_module linker flag" >&5 +$as_echo_n "checking for -single_module linker flag... " >&6; } +if ${lt_cv_apple_cc_single_mod+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_apple_cc_single_mod=no + if test -z "${LT_MULTI_MODULE}"; then + # By default we will add the -single_module flag. You can override + # by either setting the environment variable LT_MULTI_MODULE + # non-empty at configure time, or by adding -multi_module to the + # link flags. + rm -rf libconftest.dylib* + echo "int foo(void){return 1;}" > conftest.c + echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ +-dynamiclib -Wl,-single_module conftest.c" >&5 + $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ + -dynamiclib -Wl,-single_module conftest.c 2>conftest.err + _lt_result=$? + # If there is a non-empty error log, and "single_module" + # appears in it, assume the flag caused a linker warning + if test -s conftest.err && $GREP single_module conftest.err; then + cat conftest.err >&5 + # Otherwise, if the output was created with a 0 exit code from + # the compiler, it worked. + elif test -f libconftest.dylib && test $_lt_result -eq 0; then + lt_cv_apple_cc_single_mod=yes + else + cat conftest.err >&5 + fi + rm -rf libconftest.dylib* + rm -f conftest.* + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5 +$as_echo "$lt_cv_apple_cc_single_mod" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5 +$as_echo_n "checking for -exported_symbols_list linker flag... " >&6; } +if ${lt_cv_ld_exported_symbols_list+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_ld_exported_symbols_list=no + save_LDFLAGS=$LDFLAGS + echo "_main" > conftest.sym + LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + lt_cv_ld_exported_symbols_list=yes +else + lt_cv_ld_exported_symbols_list=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS="$save_LDFLAGS" + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5 +$as_echo "$lt_cv_ld_exported_symbols_list" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5 +$as_echo_n "checking for -force_load linker flag... " >&6; } +if ${lt_cv_ld_force_load+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_ld_force_load=no + cat > conftest.c << _LT_EOF +int forced_loaded() { return 2;} +_LT_EOF + echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&5 + $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&5 + echo "$AR cru libconftest.a conftest.o" >&5 + $AR cru libconftest.a conftest.o 2>&5 + echo "$RANLIB libconftest.a" >&5 + $RANLIB libconftest.a 2>&5 + cat > conftest.c << _LT_EOF +int main() { return 0;} +_LT_EOF + echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&5 + $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err + _lt_result=$? + if test -s conftest.err && $GREP force_load conftest.err; then + cat conftest.err >&5 + elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then + lt_cv_ld_force_load=yes + else + cat conftest.err >&5 + fi + rm -f conftest.err libconftest.a conftest conftest.c + rm -rf conftest.dSYM + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_force_load" >&5 +$as_echo "$lt_cv_ld_force_load" >&6; } + case $host_os in + rhapsody* | darwin1.[012]) + _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;; + darwin1.*) + _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; + darwin*) # darwin 5.x on + # if running on 10.5 or later, the deployment target defaults + # to the OS version, if on x86, and 10.4, the deployment + # target defaults to 10.4. Don't you love it? + case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in + 10.0,*86*-darwin8*|10.0,*-darwin[91]*) + _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; + 10.[012]*) + _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; + 10.*) + _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; + esac + ;; + esac + if test "$lt_cv_apple_cc_single_mod" = "yes"; then + _lt_dar_single_mod='$single_module' + fi + if test "$lt_cv_ld_exported_symbols_list" = "yes"; then + _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym' + else + _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}' + fi + if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then + _lt_dsymutil='~$DSYMUTIL $lib || :' + else + _lt_dsymutil= + fi + ;; + esac + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +$as_echo_n "checking how to run the C preprocessor... " >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if ${ac_cv_prog_CPP+:} false; then : + $as_echo_n "(cached) " >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +$as_echo "$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if ${ac_cv_header_stdc+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdc=yes +else + ac_cv_header_stdc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +$as_echo "#define STDC_HEADERS 1" >>confdefs.h + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default +" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +for ac_header in dlfcn.h +do : + ac_fn_c_check_header_compile "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default +" +if test "x$ac_cv_header_dlfcn_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_DLFCN_H 1 +_ACEOF + +fi + +done + + + + + +# Set options + + + + enable_dlopen=no + + + + + # Check whether --enable-static was given. +if test "${enable_static+set}" = set; then : + enableval=$enable_static; p=${PACKAGE-default} + case $enableval in + yes) enable_static=yes ;; + no) enable_static=no ;; + *) + enable_static=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac +else + enable_static=yes +fi + + + + + + + + + + +# Check whether --with-pic was given. +if test "${with_pic+set}" = set; then : + withval=$with_pic; lt_p=${PACKAGE-default} + case $withval in + yes|no) pic_mode=$withval ;; + *) + pic_mode=default + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for lt_pkg in $withval; do + IFS="$lt_save_ifs" + if test "X$lt_pkg" = "X$lt_p"; then + pic_mode=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac +else + pic_mode=default +fi + + +test -z "$pic_mode" && pic_mode=default + + + + + + + + # Check whether --enable-fast-install was given. +if test "${enable_fast_install+set}" = set; then : + enableval=$enable_fast_install; p=${PACKAGE-default} + case $enableval in + yes) enable_fast_install=yes ;; + no) enable_fast_install=no ;; + *) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac +else + enable_fast_install=yes +fi + + + + + + + + + + + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS="$ltmain" + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +test -z "$LN_S" && LN_S="ln -s" + + + + + + + + + + + + + + +if test -n "${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5 +$as_echo_n "checking for objdir... " >&6; } +if ${lt_cv_objdir+:} false; then : + $as_echo_n "(cached) " >&6 +else + rm -f .libs 2>/dev/null +mkdir .libs 2>/dev/null +if test -d .libs; then + lt_cv_objdir=.libs +else + # MS-DOS does not allow filenames that begin with a dot. + lt_cv_objdir=_libs +fi +rmdir .libs 2>/dev/null +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_objdir" >&5 +$as_echo "$lt_cv_objdir" >&6; } +objdir=$lt_cv_objdir + + + + + +cat >>confdefs.h <<_ACEOF +#define LT_OBJDIR "$lt_cv_objdir/" +_ACEOF + + + + +case $host_os in +aix3*) + # AIX sometimes has problems with the GCC collect2 program. For some + # reason, if we set the COLLECT_NAMES environment variable, the problems + # vanish in a puff of smoke. + if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES + fi + ;; +esac + +# Global variables: +ofile=libtool +can_build_shared=yes + +# All known linkers require a `.a' archive for static linking (except MSVC, +# which needs '.lib'). +libext=a + +with_gnu_ld="$lt_cv_prog_gnu_ld" + +old_CC="$CC" +old_CFLAGS="$CFLAGS" + +# Set sane defaults for various variables +test -z "$CC" && CC=cc +test -z "$LTCC" && LTCC=$CC +test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS +test -z "$LD" && LD=ld +test -z "$ac_objext" && ac_objext=o + +for cc_temp in $compiler""; do + case $cc_temp in + compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; + distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; + \-*) ;; + *) break;; + esac +done +cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` + + +# Only perform the check for file, if the check method requires it +test -z "$MAGIC_CMD" && MAGIC_CMD=file +case $deplibs_check_method in +file_magic*) + if test "$file_magic_cmd" = '$MAGIC_CMD'; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ${ac_tool_prefix}file" >&5 +$as_echo_n "checking for ${ac_tool_prefix}file... " >&6; } +if ${lt_cv_path_MAGIC_CMD+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $MAGIC_CMD in +[\\/*] | ?:[\\/]*) + lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD="$MAGIC_CMD" + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" + for ac_dir in $ac_dummy; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/${ac_tool_prefix}file; then + lt_cv_path_MAGIC_CMD="$ac_dir/${ac_tool_prefix}file" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD="$lt_cv_path_MAGIC_CMD" + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS="$lt_save_ifs" + MAGIC_CMD="$lt_save_MAGIC_CMD" + ;; +esac +fi + +MAGIC_CMD="$lt_cv_path_MAGIC_CMD" +if test -n "$MAGIC_CMD"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 +$as_echo "$MAGIC_CMD" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + + + +if test -z "$lt_cv_path_MAGIC_CMD"; then + if test -n "$ac_tool_prefix"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for file" >&5 +$as_echo_n "checking for file... " >&6; } +if ${lt_cv_path_MAGIC_CMD+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $MAGIC_CMD in +[\\/*] | ?:[\\/]*) + lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD="$MAGIC_CMD" + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" + for ac_dir in $ac_dummy; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/file; then + lt_cv_path_MAGIC_CMD="$ac_dir/file" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD="$lt_cv_path_MAGIC_CMD" + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS="$lt_save_ifs" + MAGIC_CMD="$lt_save_MAGIC_CMD" + ;; +esac +fi + +MAGIC_CMD="$lt_cv_path_MAGIC_CMD" +if test -n "$MAGIC_CMD"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 +$as_echo "$MAGIC_CMD" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + else + MAGIC_CMD=: + fi +fi + + fi + ;; +esac + +# Use C for the default configuration in the libtool script + +lt_save_CC="$CC" +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# Source file extension for C test sources. +ac_ext=c + +# Object file extension for compiled C test sources. +objext=o +objext=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="int some_variable = 0;" + +# Code to be used in simple link tests +lt_simple_link_test_code='int main(){return(0);}' + + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + +# Save the default compiler, since it gets overwritten when the other +# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. +compiler_DEFAULT=$CC + +# save warnings/boilerplate of simple test code +ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* + +ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* + + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + +lt_prog_compiler_no_builtin_flag= + +if test "$GCC" = yes; then + case $cc_basename in + nvcc*) + lt_prog_compiler_no_builtin_flag=' -Xcompiler -fno-builtin' ;; + *) + lt_prog_compiler_no_builtin_flag=' -fno-builtin' ;; + esac + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -fno-rtti -fno-exceptions" >&5 +$as_echo_n "checking if $compiler supports -fno-rtti -fno-exceptions... " >&6; } +if ${lt_cv_prog_compiler_rtti_exceptions+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_rtti_exceptions=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="-fno-rtti -fno-exceptions" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_rtti_exceptions=yes + fi + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_rtti_exceptions" >&5 +$as_echo "$lt_cv_prog_compiler_rtti_exceptions" >&6; } + +if test x"$lt_cv_prog_compiler_rtti_exceptions" = xyes; then + lt_prog_compiler_no_builtin_flag="$lt_prog_compiler_no_builtin_flag -fno-rtti -fno-exceptions" +else + : +fi + +fi + + + + + + + lt_prog_compiler_wl= +lt_prog_compiler_pic= +lt_prog_compiler_static= + + + if test "$GCC" = yes; then + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_static='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static='-Bstatic' + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + lt_prog_compiler_pic='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the `-m68020' flag to GCC prevents building anything better, + # like `-m68040'. + lt_prog_compiler_pic='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + lt_prog_compiler_pic='-DDLL_EXPORT' + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic='-fno-common' + ;; + + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + lt_prog_compiler_static= + ;; + + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic='-fPIC' + ;; + esac + ;; + + interix[3-9]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + + msdosdjgpp*) + # Just because we use GCC doesn't mean we suddenly get shared libraries + # on systems that don't support them. + lt_prog_compiler_can_build_shared=no + enable_shared=no + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic='-fPIC -shared' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic=-Kconform_pic + fi + ;; + + *) + lt_prog_compiler_pic='-fPIC' + ;; + esac + + case $cc_basename in + nvcc*) # Cuda Compiler Driver 2.2 + lt_prog_compiler_wl='-Xlinker ' + if test -n "$lt_prog_compiler_pic"; then + lt_prog_compiler_pic="-Xcompiler $lt_prog_compiler_pic" + fi + ;; + esac + else + # PORTME Check for flag to pass linker flags through the system compiler. + case $host_os in + aix*) + lt_prog_compiler_wl='-Wl,' + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static='-Bstatic' + else + lt_prog_compiler_static='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + lt_prog_compiler_pic='-DDLL_EXPORT' + ;; + + hpux9* | hpux10* | hpux11*) + lt_prog_compiler_wl='-Wl,' + # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but + # not for PA HP-UX. + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic='+Z' + ;; + esac + # Is there a better lt_prog_compiler_static that works with the bundled CC? + lt_prog_compiler_static='${wl}-a ${wl}archive' + ;; + + irix5* | irix6* | nonstopux*) + lt_prog_compiler_wl='-Wl,' + # PIC (with -KPIC) is the default. + lt_prog_compiler_static='-non_shared' + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu) + case $cc_basename in + # old Intel for x86_64 which still supported -KPIC. + ecc*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-static' + ;; + # icc used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + icc* | ifort*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + # Lahey Fortran 8.1. + lf95*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='--shared' + lt_prog_compiler_static='--static' + ;; + nagfor*) + # NAG Fortran compiler + lt_prog_compiler_wl='-Wl,-Wl,,' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group compilers (*not* the Pentium gcc compiler, + # which looks to be a dead project) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fpic' + lt_prog_compiler_static='-Bstatic' + ;; + ccc*) + lt_prog_compiler_wl='-Wl,' + # All Alpha code is PIC. + lt_prog_compiler_static='-non_shared' + ;; + xl* | bgxl* | bgf* | mpixl*) + # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-qpic' + lt_prog_compiler_static='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='' + ;; + *Sun\ F* | *Sun*Fortran*) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='-Qoption ld ' + ;; + *Sun\ C*) + # Sun C 5.9 + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='-Wl,' + ;; + *Intel*\ [CF]*Compiler*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + *Portland\ Group*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fpic' + lt_prog_compiler_static='-Bstatic' + ;; + esac + ;; + esac + ;; + + newsos6) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic='-fPIC -shared' + ;; + + osf3* | osf4* | osf5*) + lt_prog_compiler_wl='-Wl,' + # All OSF/1 code is PIC. + lt_prog_compiler_static='-non_shared' + ;; + + rdos*) + lt_prog_compiler_static='-non_shared' + ;; + + solaris*) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + case $cc_basename in + f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) + lt_prog_compiler_wl='-Qoption ld ';; + *) + lt_prog_compiler_wl='-Wl,';; + esac + ;; + + sunos4*) + lt_prog_compiler_wl='-Qoption ld ' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec ;then + lt_prog_compiler_pic='-Kconform_pic' + lt_prog_compiler_static='-Bstatic' + fi + ;; + + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + unicos*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_can_build_shared=no + ;; + + uts4*) + lt_prog_compiler_pic='-pic' + lt_prog_compiler_static='-Bstatic' + ;; + + *) + lt_prog_compiler_can_build_shared=no + ;; + esac + fi + +case $host_os in + # For platforms which do not support PIC, -DPIC is meaningless: + *djgpp*) + lt_prog_compiler_pic= + ;; + *) + lt_prog_compiler_pic="$lt_prog_compiler_pic -DPIC" + ;; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 +$as_echo_n "checking for $compiler option to produce PIC... " >&6; } +if ${lt_cv_prog_compiler_pic+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_pic=$lt_prog_compiler_pic +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic" >&5 +$as_echo "$lt_cv_prog_compiler_pic" >&6; } +lt_prog_compiler_pic=$lt_cv_prog_compiler_pic + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$lt_prog_compiler_pic"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic works" >&5 +$as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic works... " >&6; } +if ${lt_cv_prog_compiler_pic_works+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_pic_works=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$lt_prog_compiler_pic -DPIC" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_pic_works=yes + fi + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works" >&5 +$as_echo "$lt_cv_prog_compiler_pic_works" >&6; } + +if test x"$lt_cv_prog_compiler_pic_works" = xyes; then + case $lt_prog_compiler_pic in + "" | " "*) ;; + *) lt_prog_compiler_pic=" $lt_prog_compiler_pic" ;; + esac +else + lt_prog_compiler_pic= + lt_prog_compiler_can_build_shared=no +fi + +fi + + + + + + + + + + + +# +# Check to make sure the static flag actually works. +# +wl=$lt_prog_compiler_wl eval lt_tmp_static_flag=\"$lt_prog_compiler_static\" +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 +$as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } +if ${lt_cv_prog_compiler_static_works+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_static_works=no + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS $lt_tmp_static_flag" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_static_works=yes + fi + else + lt_cv_prog_compiler_static_works=yes + fi + fi + $RM -r conftest* + LDFLAGS="$save_LDFLAGS" + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works" >&5 +$as_echo "$lt_cv_prog_compiler_static_works" >&6; } + +if test x"$lt_cv_prog_compiler_static_works" = xyes; then + : +else + lt_prog_compiler_static= +fi + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if ${lt_cv_prog_compiler_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_c_o=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 +$as_echo "$lt_cv_prog_compiler_c_o" >&6; } + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if ${lt_cv_prog_compiler_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_c_o=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 +$as_echo "$lt_cv_prog_compiler_c_o" >&6; } + + + + +hard_links="nottested" +if test "$lt_cv_prog_compiler_c_o" = no && test "$need_locks" != no; then + # do not overwrite the value of need_locks provided by the user + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 +$as_echo_n "checking if we can lock with hard links... " >&6; } + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 +$as_echo "$hard_links" >&6; } + if test "$hard_links" = no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5 +$as_echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;} + need_locks=warn + fi +else + need_locks=no +fi + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 +$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } + + runpath_var= + allow_undefined_flag= + always_export_symbols=no + archive_cmds= + archive_expsym_cmds= + compiler_needs_object=no + enable_shared_with_static_runtimes=no + export_dynamic_flag_spec= + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + hardcode_automatic=no + hardcode_direct=no + hardcode_direct_absolute=no + hardcode_libdir_flag_spec= + hardcode_libdir_separator= + hardcode_minus_L=no + hardcode_shlibpath_var=unsupported + inherit_rpath=no + link_all_deplibs=unknown + module_cmds= + module_expsym_cmds= + old_archive_from_new_cmds= + old_archive_from_expsyms_cmds= + thread_safe_flag_spec= + whole_archive_flag_spec= + # include_expsyms should be a list of space-separated symbols to be *always* + # included in the symbol list + include_expsyms= + # exclude_expsyms can be an extended regexp of symbols to exclude + # it will be wrapped by ` (' and `)$', so one must not match beginning or + # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', + # as well as any symbol that contains `d'. + exclude_expsyms='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' + # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out + # platforms (ab)use it in PIC code, but their linkers get confused if + # the symbol is explicitly referenced. Since portable code cannot + # rely on this symbol name, it's probably fine to never include it in + # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. + extract_expsyms_cmds= + + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + # FIXME: the MSVC++ port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + if test "$GCC" != yes; then + with_gnu_ld=no + fi + ;; + interix*) + # we just hope/assume this is gcc and not c89 (= MSVC++) + with_gnu_ld=yes + ;; + openbsd*) + with_gnu_ld=no + ;; + esac + + ld_shlibs=yes + + # On some targets, GNU ld is compatible enough with the native linker + # that we're better off using the native interface for both. + lt_use_gnu_ld_interface=no + if test "$with_gnu_ld" = yes; then + case $host_os in + aix*) + # The AIX port of GNU ld has always aspired to compatibility + # with the native linker. However, as the warning in the GNU ld + # block says, versions before 2.19.5* couldn't really create working + # shared libraries, regardless of the interface used. + case `$LD -v 2>&1` in + *\ \(GNU\ Binutils\)\ 2.19.5*) ;; + *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;; + *\ \(GNU\ Binutils\)\ [3-9]*) ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + fi + + if test "$lt_use_gnu_ld_interface" = yes; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='${wl}' + + # Set some defaults for GNU ld with shared library support. These + # are reset later if shared libraries are not supported. Putting them + # here allows them to be overridden if necessary. + runpath_var=LD_RUN_PATH + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + export_dynamic_flag_spec='${wl}--export-dynamic' + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + else + whole_archive_flag_spec= + fi + supports_anon_versioning=no + case `$LD -v 2>&1` in + *GNU\ gold*) supports_anon_versioning=yes ;; + *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 + *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... + *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... + *\ 2.11.*) ;; # other 2.11 versions + *) supports_anon_versioning=yes ;; + esac + + # See if GNU ld supports shared libraries. + case $host_os in + aix[3-9]*) + # On AIX/PPC, the GNU linker is very broken + if test "$host_cpu" != ia64; then + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.19, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to install binutils +*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. +*** You will then need to restart the configuration process. + +_LT_EOF + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='' + ;; + m68k) + archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + ;; + esac + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + allow_undefined_flag=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + else + ld_shlibs=no + fi + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # _LT_TAGVAR(hardcode_libdir_flag_spec, ) is actually meaningless, + # as there is no search path for DLLs. + hardcode_libdir_flag_spec='-L$libdir' + export_dynamic_flag_spec='${wl}--export-all-symbols' + allow_undefined_flag=unsupported + always_export_symbols=no + enable_shared_with_static_runtimes=yes + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' + exclude_expsyms='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file (1st line + # is EXPORTS), use it as is; otherwise, prepend... + archive_expsym_cmds='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + ld_shlibs=no + fi + ;; + + haiku*) + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + link_all_deplibs=yes + ;; + + interix[3-9]*) + hardcode_direct=no + hardcode_shlibpath_var=no + hardcode_libdir_flag_spec='${wl}-rpath,$libdir' + export_dynamic_flag_spec='${wl}-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + archive_expsym_cmds='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + + gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) + tmp_diet=no + if test "$host_os" = linux-dietlibc; then + case $cc_basename in + diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) + esac + fi + if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ + && test "$tmp_diet" = no + then + tmp_addflag=' $pic_flag' + tmp_sharedflag='-shared' + case $cc_basename,$host_cpu in + pgcc*) # Portland Group C compiler + whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + tmp_addflag=' $pic_flag' + ;; + pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group f77 and f90 compilers + whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + tmp_addflag=' $pic_flag -Mnomain' ;; + ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 + tmp_addflag=' -i_dynamic' ;; + efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 + tmp_addflag=' -i_dynamic -nofor_main' ;; + ifc* | ifort*) # Intel Fortran compiler + tmp_addflag=' -nofor_main' ;; + lf95*) # Lahey Fortran 8.1 + whole_archive_flag_spec= + tmp_sharedflag='--shared' ;; + xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below) + tmp_sharedflag='-qmkshrobj' + tmp_addflag= ;; + nvcc*) # Cuda Compiler Driver 2.2 + whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + compiler_needs_object=yes + ;; + esac + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) # Sun C 5.9 + whole_archive_flag_spec='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + compiler_needs_object=yes + tmp_sharedflag='-G' ;; + *Sun\ F*) # Sun Fortran 8.3 + tmp_sharedflag='-G' ;; + esac + archive_cmds='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + + if test "x$supports_anon_versioning" = xyes; then + archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' + fi + + case $cc_basename in + xlf* | bgf* | bgxlf* | mpixlf*) + # IBM XL Fortran 10.1 on PPC cannot create shared libs itself + whole_archive_flag_spec='--whole-archive$convenience --no-whole-archive' + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' + if test "x$supports_anon_versioning" = xyes; then + archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' + fi + ;; + esac + else + ld_shlibs=no + fi + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris*) + if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + + sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) + case `$LD -v 2>&1` in + *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*) + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not +*** reliably create shared libraries on SCO systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.16.91.0.3 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + ;; + *) + # For security reasons, it is highly recommended that you always + # use absolute paths for naming shared libraries, and exclude the + # DT_RUNPATH tag from executables and libraries. But doing so + # requires that you compile everything twice, which is a pain. + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + esac + ;; + + sunos4*) + archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + *) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + esac + + if test "$ld_shlibs" = no; then + runpath_var= + hardcode_libdir_flag_spec= + export_dynamic_flag_spec= + whole_archive_flag_spec= + fi + else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + allow_undefined_flag=unsupported + always_export_symbols=yes + archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + hardcode_minus_L=yes + if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + hardcode_direct=unsupported + fi + ;; + + aix[4-9]*) + if test "$host_cpu" = ia64; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag="" + else + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to AIX nm, but means don't demangle with GNU nm + # Also, AIX nm treats weak defined symbols like other global + # defined symbols, whereas GNU nm marks them as "W". + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + else + export_symbols_cmds='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + fi + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # need to do runtime linking. + case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) + for ld_flag in $LDFLAGS; do + if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then + aix_use_runtimelinking=yes + break + fi + done + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + archive_cmds='' + hardcode_direct=yes + hardcode_direct_absolute=yes + hardcode_libdir_separator=':' + link_all_deplibs=yes + file_list_spec='${wl}-f,' + + if test "$GCC" = yes; then + case $host_os in aix4.[012]|aix4.[012].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`${CC} -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + hardcode_direct=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + hardcode_minus_L=yes + hardcode_libdir_flag_spec='-L$libdir' + hardcode_libdir_separator= + fi + ;; + esac + shared_flag='-shared' + if test "$aix_use_runtimelinking" = yes; then + shared_flag="$shared_flag "'${wl}-G' + fi + else + # not using gcc + if test "$host_cpu" = ia64; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test "$aix_use_runtimelinking" = yes; then + shared_flag='${wl}-G' + else + shared_flag='${wl}-bM:SRE' + fi + fi + fi + + export_dynamic_flag_spec='${wl}-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to export. + always_export_symbols=yes + if test "$aix_use_runtimelinking" = yes; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + allow_undefined_flag='-berok' + # Determine the default libpath from the value encoded in an + # empty executable. + if test "${lt_cv_aix_libpath+set}" = set; then + aix_libpath=$lt_cv_aix_libpath +else + if ${lt_cv_aix_libpath_+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_="/usr/lib:/lib" + fi + +fi + + aix_libpath=$lt_cv_aix_libpath_ +fi + + hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath" + archive_expsym_cmds='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" + else + if test "$host_cpu" = ia64; then + hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib' + allow_undefined_flag="-z nodefs" + archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + if test "${lt_cv_aix_libpath+set}" = set; then + aix_libpath=$lt_cv_aix_libpath +else + if ${lt_cv_aix_libpath_+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_="/usr/lib:/lib" + fi + +fi + + aix_libpath=$lt_cv_aix_libpath_ +fi + + hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + no_undefined_flag=' ${wl}-bernotok' + allow_undefined_flag=' ${wl}-berok' + if test "$with_gnu_ld" = yes; then + # We only use this code for GNU lds that support --whole-archive. + whole_archive_flag_spec='${wl}--whole-archive$convenience ${wl}--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + whole_archive_flag_spec='$convenience' + fi + archive_cmds_need_lc=yes + # This is similar to how AIX traditionally builds its shared libraries. + archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' + fi + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='' + ;; + m68k) + archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + ;; + esac + ;; + + bsdi[45]*) + export_dynamic_flag_spec=-rdynamic + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + case $cc_basename in + cl*) + # Native MSVC + hardcode_libdir_flag_spec=' ' + allow_undefined_flag=unsupported + always_export_symbols=yes + file_list_spec='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + archive_cmds='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' + archive_expsym_cmds='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; + else + sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, )='true' + enable_shared_with_static_runtimes=yes + exclude_expsyms='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' + # Don't use ranlib + old_postinstall_cmds='chmod 644 $oldlib' + postlink_cmds='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile="$lt_outputfile.exe" + lt_tool_outputfile="$lt_tool_outputfile.exe" + ;; + esac~ + if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # Assume MSVC wrapper + hardcode_libdir_flag_spec=' ' + allow_undefined_flag=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + archive_cmds='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + old_archive_from_new_cmds='true' + # FIXME: Should let the user specify the lib program. + old_archive_cmds='lib -OUT:$oldlib$oldobjs$old_deplibs' + enable_shared_with_static_runtimes=yes + ;; + esac + ;; + + darwin* | rhapsody*) + + + archive_cmds_need_lc=no + hardcode_direct=no + hardcode_automatic=yes + hardcode_shlibpath_var=unsupported + if test "$lt_cv_ld_force_load" = "yes"; then + whole_archive_flag_spec='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + + else + whole_archive_flag_spec='' + fi + link_all_deplibs=yes + allow_undefined_flag="$_lt_dar_allow_undefined" + case $cc_basename in + ifort*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test "$_lt_dar_can_shared" = "yes"; then + output_verbose_link_cmd=func_echo_all + archive_cmds="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}" + module_cmds="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}" + archive_expsym_cmds="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}" + module_expsym_cmds="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}" + + else + ld_shlibs=no + fi + + ;; + + dgux*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2.*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd* | dragonfly*) + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + hpux9*) + if test "$GCC" = yes; then + archive_cmds='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + else + archive_cmds='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + fi + hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' + hardcode_libdir_separator=: + hardcode_direct=yes + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + export_dynamic_flag_spec='${wl}-E' + ;; + + hpux10*) + if test "$GCC" = yes && test "$with_gnu_ld" = no; then + archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' + fi + if test "$with_gnu_ld" = no; then + hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' + hardcode_libdir_separator=: + hardcode_direct=yes + hardcode_direct_absolute=yes + export_dynamic_flag_spec='${wl}-E' + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + fi + ;; + + hpux11*) + if test "$GCC" = yes && test "$with_gnu_ld" = no; then + case $host_cpu in + hppa*64*) + archive_cmds='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + else + case $host_cpu in + hppa*64*) + archive_cmds='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + + # Older versions of the 11.00 compiler do not understand -b yet + # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CC understands -b" >&5 +$as_echo_n "checking if $CC understands -b... " >&6; } +if ${lt_cv_prog_compiler__b+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler__b=no + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -b" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler__b=yes + fi + else + lt_cv_prog_compiler__b=yes + fi + fi + $RM -r conftest* + LDFLAGS="$save_LDFLAGS" + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler__b" >&5 +$as_echo "$lt_cv_prog_compiler__b" >&6; } + +if test x"$lt_cv_prog_compiler__b" = xyes; then + archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' +else + archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' +fi + + ;; + esac + fi + if test "$with_gnu_ld" = no; then + hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' + hardcode_libdir_separator=: + + case $host_cpu in + hppa*64*|ia64*) + hardcode_direct=no + hardcode_shlibpath_var=no + ;; + *) + hardcode_direct=yes + hardcode_direct_absolute=yes + export_dynamic_flag_spec='${wl}-E' + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + ;; + esac + fi + ;; + + irix5* | irix6* | nonstopux*) + if test "$GCC" = yes; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + # Try to use the -exported_symbol ld option, if it does not + # work, assume that -exports_file does not work either and + # implicitly export all symbols. + # This should be the same for all languages, so no per-tag cache variable. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5 +$as_echo_n "checking whether the $host_os linker accepts -exported_symbol... " >&6; } +if ${lt_cv_irix_exported_symbol+:} false; then : + $as_echo_n "(cached) " >&6 +else + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo (void) { return 0; } +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + lt_cv_irix_exported_symbol=yes +else + lt_cv_irix_exported_symbol=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS="$save_LDFLAGS" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5 +$as_echo "$lt_cv_irix_exported_symbol" >&6; } + if test "$lt_cv_irix_exported_symbol" = yes; then + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib' + fi + else + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib' + fi + archive_cmds_need_lc='no' + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator=: + inherit_rpath=yes + link_all_deplibs=yes + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + newsos6) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator=: + hardcode_shlibpath_var=no + ;; + + *nto* | *qnx*) + ;; + + openbsd*) + if test -f /usr/libexec/ld.so; then + hardcode_direct=yes + hardcode_shlibpath_var=no + hardcode_direct_absolute=yes + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols' + hardcode_libdir_flag_spec='${wl}-rpath,$libdir' + export_dynamic_flag_spec='${wl}-E' + else + case $host_os in + openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-R$libdir' + ;; + *) + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='${wl}-rpath,$libdir' + ;; + esac + fi + else + ld_shlibs=no + fi + ;; + + os2*) + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + allow_undefined_flag=unsupported + archive_cmds='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' + old_archive_from_new_cmds='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' + ;; + + osf3*) + if test "$GCC" = yes; then + allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' + archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + fi + archive_cmds_need_lc='no' + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test "$GCC" = yes; then + allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' + archive_cmds='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + archive_expsym_cmds='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ + $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp' + + # Both c and cxx compiler support -rpath directly + hardcode_libdir_flag_spec='-rpath $libdir' + fi + archive_cmds_need_lc='no' + hardcode_libdir_separator=: + ;; + + solaris*) + no_undefined_flag=' -z defs' + if test "$GCC" = yes; then + wlarc='${wl}' + archive_cmds='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + else + case `$CC -V 2>&1` in + *"Compilers 5.0"*) + wlarc='' + archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' + ;; + *) + wlarc='${wl}' + archive_cmds='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + ;; + esac + fi + hardcode_libdir_flag_spec='-R$libdir' + hardcode_shlibpath_var=no + case $host_os in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands `-z linker_flag'. GCC discards it without `$wl', + # but is careful enough not to reorder. + # Supported since Solaris 2.6 (maybe 2.5.1?) + if test "$GCC" = yes; then + whole_archive_flag_spec='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' + else + whole_archive_flag_spec='-z allextract$convenience -z defaultextract' + fi + ;; + esac + link_all_deplibs=yes + ;; + + sunos4*) + if test "x$host_vendor" = xsequent; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + archive_cmds='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + hardcode_libdir_flag_spec='-L$libdir' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + sysv4) + case $host_vendor in + sni) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags' + reload_cmds='$CC -r -o $output$reload_objs' + hardcode_direct=no + ;; + motorola) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + hardcode_shlibpath_var=no + ;; + + sysv4.3*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + export_dynamic_flag_spec='-Bexport' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + ld_shlibs=yes + fi + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) + no_undefined_flag='${wl}-z,text' + archive_cmds_need_lc=no + hardcode_shlibpath_var=no + runpath_var='LD_RUN_PATH' + + if test "$GCC" = yes; then + archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We can NOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + no_undefined_flag='${wl}-z,text' + allow_undefined_flag='${wl}-z,nodefs' + archive_cmds_need_lc=no + hardcode_shlibpath_var=no + hardcode_libdir_flag_spec='${wl}-R,$libdir' + hardcode_libdir_separator=':' + link_all_deplibs=yes + export_dynamic_flag_spec='${wl}-Bexport' + runpath_var='LD_RUN_PATH' + + if test "$GCC" = yes; then + archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + uts4*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + *) + ld_shlibs=no + ;; + esac + + if test x$host_vendor = xsni; then + case $host in + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + export_dynamic_flag_spec='${wl}-Blargedynsym' + ;; + esac + fi + fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs" >&5 +$as_echo "$ld_shlibs" >&6; } +test "$ld_shlibs" = no && can_build_shared=no + +with_gnu_ld=$with_gnu_ld + + + + + + + + + + + + + + + +# +# Do we need to explicitly link libc? +# +case "x$archive_cmds_need_lc" in +x|xyes) + # Assume -lc should be added + archive_cmds_need_lc=yes + + if test "$enable_shared" = yes && test "$GCC" = yes; then + case $archive_cmds in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 +$as_echo_n "checking whether -lc should be explicitly linked in... " >&6; } +if ${lt_cv_archive_cmds_need_lc+:} false; then : + $as_echo_n "(cached) " >&6 +else + $RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$lt_prog_compiler_wl + pic_flag=$lt_prog_compiler_pic + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$allow_undefined_flag + allow_undefined_flag= + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 + (eval $archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + then + lt_cv_archive_cmds_need_lc=no + else + lt_cv_archive_cmds_need_lc=yes + fi + allow_undefined_flag=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc" >&5 +$as_echo "$lt_cv_archive_cmds_need_lc" >&6; } + archive_cmds_need_lc=$lt_cv_archive_cmds_need_lc + ;; + esac + fi + ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 +$as_echo_n "checking dynamic linker characteristics... " >&6; } + +if test "$GCC" = yes; then + case $host_os in + darwin*) lt_awk_arg="/^libraries:/,/LR/" ;; + *) lt_awk_arg="/^libraries:/" ;; + esac + case $host_os in + mingw* | cegcc*) lt_sed_strip_eq="s,=\([A-Za-z]:\),\1,g" ;; + *) lt_sed_strip_eq="s,=/,/,g" ;; + esac + lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` + case $lt_search_path_spec in + *\;*) + # if the path contains ";" then we assume it to be the separator + # otherwise default to the standard path separator (i.e. ":") - it is + # assumed that no part of a normal pathname contains ";" but that should + # okay in the real world where ";" in dirpaths is itself problematic. + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` + ;; + *) + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` + ;; + esac + # Ok, now we have the path, separated by spaces, we can step through it + # and add multilib dir if necessary. + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path/$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" + else + test -d "$lt_sys_path" && \ + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" + fi + done + lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' +BEGIN {RS=" "; FS="/|\n";} { + lt_foo=""; + lt_count=0; + for (lt_i = NF; lt_i > 0; lt_i--) { + if ($lt_i != "" && $lt_i != ".") { + if ($lt_i == "..") { + lt_count++; + } else { + if (lt_count == 0) { + lt_foo="/" $lt_i lt_foo; + } else { + lt_count--; + } + } + } + } + if (lt_foo != "") { lt_freq[lt_foo]++; } + if (lt_freq[lt_foo] == 1) { print lt_foo; } +}'` + # AWK program above erroneously prepends '/' to C:/dos/paths + # for these hosts. + case $host_os in + mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ + $SED 's,/\([A-Za-z]:\),\1,g'` ;; + esac + sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` +else + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" +fi +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=".so" +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + +case $host_os in +aix3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='${libname}${release}${shared_ext}$major' + ;; + +aix[4-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test "$host_cpu" = ia64; then + # AIX 5 supports IA64 + library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line `#! .'. This would cause the generated library to + # depend on `.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[01] | aix4.[01].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # AIX (on Power*) has no versioning support, so currently we can not hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + if test "$aix_use_runtimelinking" = yes; then + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + else + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='${libname}${release}.a $libname.a' + soname_spec='${libname}${release}${shared_ext}$major' + fi + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='${libname}${shared_ext}' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[45]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=".dll" + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \${file}`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api" + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl*) + # Native MSVC + libname_spec='$name' + soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + library_names_spec='${libname}.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec="$LIB" + if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \${file}`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC wrapper + library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext' + soname_spec='${libname}${release}${major}$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' + + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib" + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd* | dragonfly*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[23].*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2.*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[01]* | freebsdelf3.[01]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ + freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +gnu*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +haiku*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=yes + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + if test "X$HPUX_IA64_MODE" = X32; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + fi + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[3-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test "$lt_cv_prog_gnu_ld" = yes; then + version_type=linux # correct to gnu/linux during the next big refactor + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}${shared_ext}$major' + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" + sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + if ${lt_cv_shlibpath_overrides_runpath+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$lt_prog_compiler_wl\"; \ + LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec\"" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null; then : + lt_cv_shlibpath_overrides_runpath=yes +fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + +fi + + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Append ld.so.conf contents to the search path + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd*) + version_type=sunos + sys_lib_dlsearch_path_spec="/usr/lib" + need_lib_prefix=no + # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. + case $host_os in + openbsd3.3 | openbsd3.3.*) need_version=yes ;; + *) need_version=no ;; + esac + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + case $host_os in + openbsd2.[89] | openbsd2.[89].*) + shlibpath_overrides_runpath=no + ;; + *) + shlibpath_overrides_runpath=yes + ;; + esac + else + shlibpath_overrides_runpath=yes + fi + ;; + +os2*) + libname_spec='$name' + shrext_cmds=".dll" + need_lib_prefix=no + library_names_spec='$libname${shared_ext} $libname.a' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=LIBPATH + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}${shared_ext}$major' + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test "$with_gnu_ld" = yes; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec ;then + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' + soname_spec='$libname${shared_ext}.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=freebsd-elf + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test "$with_gnu_ld" = yes; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 +$as_echo "$dynamic_linker" >&6; } +test "$dynamic_linker" = no && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test "$GCC" = yes; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then + sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec" +fi +if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then + sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec" +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 +$as_echo_n "checking how to hardcode library paths into programs... " >&6; } +hardcode_action= +if test -n "$hardcode_libdir_flag_spec" || + test -n "$runpath_var" || + test "X$hardcode_automatic" = "Xyes" ; then + + # We can hardcode non-existent directories. + if test "$hardcode_direct" != no && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test "$_LT_TAGVAR(hardcode_shlibpath_var, )" != no && + test "$hardcode_minus_L" != no; then + # Linking always hardcodes the temporary library directory. + hardcode_action=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + hardcode_action=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + hardcode_action=unsupported +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hardcode_action" >&5 +$as_echo "$hardcode_action" >&6; } + +if test "$hardcode_action" = relink || + test "$inherit_rpath" = yes; then + # Fast installation is not supported + enable_fast_install=no +elif test "$shlibpath_overrides_runpath" = yes || + test "$enable_shared" = no; then + # Fast installation is not necessary + enable_fast_install=needless +fi + + + + + + + if test "x$enable_dlopen" != xyes; then + enable_dlopen=unknown + enable_dlopen_self=unknown + enable_dlopen_self_static=unknown +else + lt_cv_dlopen=no + lt_cv_dlopen_libs= + + case $host_os in + beos*) + lt_cv_dlopen="load_add_on" + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + + mingw* | pw32* | cegcc*) + lt_cv_dlopen="LoadLibrary" + lt_cv_dlopen_libs= + ;; + + cygwin*) + lt_cv_dlopen="dlopen" + lt_cv_dlopen_libs= + ;; + + darwin*) + # if libdl is installed we need to link against it + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 +$as_echo_n "checking for dlopen in -ldl... " >&6; } +if ${ac_cv_lib_dl_dlopen+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen (); +int +main () +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dl_dlopen=yes +else + ac_cv_lib_dl_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 +$as_echo "$ac_cv_lib_dl_dlopen" >&6; } +if test "x$ac_cv_lib_dl_dlopen" = xyes; then : + lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl" +else + + lt_cv_dlopen="dyld" + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + +fi + + ;; + + *) + ac_fn_c_check_func "$LINENO" "shl_load" "ac_cv_func_shl_load" +if test "x$ac_cv_func_shl_load" = xyes; then : + lt_cv_dlopen="shl_load" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for shl_load in -ldld" >&5 +$as_echo_n "checking for shl_load in -ldld... " >&6; } +if ${ac_cv_lib_dld_shl_load+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char shl_load (); +int +main () +{ +return shl_load (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dld_shl_load=yes +else + ac_cv_lib_dld_shl_load=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_shl_load" >&5 +$as_echo "$ac_cv_lib_dld_shl_load" >&6; } +if test "x$ac_cv_lib_dld_shl_load" = xyes; then : + lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld" +else + ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen" +if test "x$ac_cv_func_dlopen" = xyes; then : + lt_cv_dlopen="dlopen" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 +$as_echo_n "checking for dlopen in -ldl... " >&6; } +if ${ac_cv_lib_dl_dlopen+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen (); +int +main () +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dl_dlopen=yes +else + ac_cv_lib_dl_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 +$as_echo "$ac_cv_lib_dl_dlopen" >&6; } +if test "x$ac_cv_lib_dl_dlopen" = xyes; then : + lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" >&5 +$as_echo_n "checking for dlopen in -lsvld... " >&6; } +if ${ac_cv_lib_svld_dlopen+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lsvld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen (); +int +main () +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_svld_dlopen=yes +else + ac_cv_lib_svld_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svld_dlopen" >&5 +$as_echo "$ac_cv_lib_svld_dlopen" >&6; } +if test "x$ac_cv_lib_svld_dlopen" = xyes; then : + lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dld_link in -ldld" >&5 +$as_echo_n "checking for dld_link in -ldld... " >&6; } +if ${ac_cv_lib_dld_dld_link+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dld_link (); +int +main () +{ +return dld_link (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dld_dld_link=yes +else + ac_cv_lib_dld_dld_link=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_dld_link" >&5 +$as_echo "$ac_cv_lib_dld_dld_link" >&6; } +if test "x$ac_cv_lib_dld_dld_link" = xyes; then : + lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld" +fi + + +fi + + +fi + + +fi + + +fi + + +fi + + ;; + esac + + if test "x$lt_cv_dlopen" != xno; then + enable_dlopen=yes + else + enable_dlopen=no + fi + + case $lt_cv_dlopen in + dlopen) + save_CPPFLAGS="$CPPFLAGS" + test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" + + save_LDFLAGS="$LDFLAGS" + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" + + save_LIBS="$LIBS" + LIBS="$lt_cv_dlopen_libs $LIBS" + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program can dlopen itself" >&5 +$as_echo_n "checking whether a program can dlopen itself... " >&6; } +if ${lt_cv_dlopen_self+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + lt_cv_dlopen_self=cross +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisbility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +} +_LT_EOF + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then + (./conftest; exit; ) >&5 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) lt_cv_dlopen_self=yes ;; + x$lt_dlneed_uscore) lt_cv_dlopen_self=yes ;; + x$lt_dlunknown|x*) lt_cv_dlopen_self=no ;; + esac + else : + # compilation failed + lt_cv_dlopen_self=no + fi +fi +rm -fr conftest* + + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self" >&5 +$as_echo "$lt_cv_dlopen_self" >&6; } + + if test "x$lt_cv_dlopen_self" = xyes; then + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a statically linked program can dlopen itself" >&5 +$as_echo_n "checking whether a statically linked program can dlopen itself... " >&6; } +if ${lt_cv_dlopen_self_static+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + lt_cv_dlopen_self_static=cross +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisbility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +} +_LT_EOF + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then + (./conftest; exit; ) >&5 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) lt_cv_dlopen_self_static=yes ;; + x$lt_dlneed_uscore) lt_cv_dlopen_self_static=yes ;; + x$lt_dlunknown|x*) lt_cv_dlopen_self_static=no ;; + esac + else : + # compilation failed + lt_cv_dlopen_self_static=no + fi +fi +rm -fr conftest* + + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self_static" >&5 +$as_echo "$lt_cv_dlopen_self_static" >&6; } + fi + + CPPFLAGS="$save_CPPFLAGS" + LDFLAGS="$save_LDFLAGS" + LIBS="$save_LIBS" + ;; + esac + + case $lt_cv_dlopen_self in + yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; + *) enable_dlopen_self=unknown ;; + esac + + case $lt_cv_dlopen_self_static in + yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; + *) enable_dlopen_self_static=unknown ;; + esac +fi + + + + + + + + + + + + + + + + + +striplib= +old_striplib= +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether stripping libraries is possible" >&5 +$as_echo_n "checking whether stripping libraries is possible... " >&6; } +if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then + test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" + test -z "$striplib" && striplib="$STRIP --strip-unneeded" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else +# FIXME - insert some real tests, host_os isn't really good enough + case $host_os in + darwin*) + if test -n "$STRIP" ; then + striplib="$STRIP -x" + old_striplib="$STRIP -S" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + fi + ;; + *) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + ;; + esac +fi + + + + + + + + + + + + + # Report which library types will actually be built + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5 +$as_echo_n "checking if libtool supports shared libraries... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5 +$as_echo "$can_build_shared" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5 +$as_echo_n "checking whether to build shared libraries... " >&6; } + test "$can_build_shared" = "no" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test "$enable_shared" = yes && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + + aix[4-9]*) + if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then + test "$enable_shared" = yes && enable_static=no + fi + ;; + esac + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5 +$as_echo "$enable_shared" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5 +$as_echo_n "checking whether to build static libraries... " >&6; } + # Make sure either enable_shared or enable_static is yes. + test "$enable_shared" = yes || enable_static=yes + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5 +$as_echo "$enable_static" >&6; } + + + + +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +CC="$lt_save_CC" + + + + + + + + + + + + + + + + ac_config_commands="$ac_config_commands libtool" + + + + +# Only expand once: + + + +# Extract the first word of "ocamlbuild", so it can be a program name with args. +set dummy ocamlbuild; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_OCAMLBUILD+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$OCAMLBUILD"; then + ac_cv_prog_OCAMLBUILD="$OCAMLBUILD" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_OCAMLBUILD="ocamlbuild" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OCAMLBUILD=$ac_cv_prog_OCAMLBUILD +if test -n "$OCAMLBUILD"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OCAMLBUILD" >&5 +$as_echo "$OCAMLBUILD" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + + +# Check whether --enable-mpi was given. +if test "${enable_mpi+set}" = set; then : + enableval=$enable_mpi; enable_mpi=$enableval +else + enable_mpi=no +fi + + +if test "$enable_mpi" = "yes"; then + if test $PRECISION = q; then + as_fn_error $? "quad precision is not supported in MPI" "$LINENO" 5 + fi + + + + + + for ac_prog in mpicc hcc mpcc mpcc_r mpxlc cmpicc +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_MPICC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$MPICC"; then + ac_cv_prog_MPICC="$MPICC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_MPICC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +MPICC=$ac_cv_prog_MPICC +if test -n "$MPICC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MPICC" >&5 +$as_echo "$MPICC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$MPICC" && break +done +test -n "$MPICC" || MPICC="$CC" + + acx_mpi_save_CC="$CC" + CC="$MPICC" + + + +if test x = x"$MPILIBS"; then + ac_fn_c_check_func "$LINENO" "MPI_Init" "ac_cv_func_MPI_Init" +if test "x$ac_cv_func_MPI_Init" = xyes; then : + MPILIBS=" " +fi + +fi +if test x = x"$MPILIBS"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for MPI_Init in -lmpi" >&5 +$as_echo_n "checking for MPI_Init in -lmpi... " >&6; } +if ${ac_cv_lib_mpi_MPI_Init+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lmpi $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char MPI_Init (); +int +main () +{ +return MPI_Init (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_mpi_MPI_Init=yes +else + ac_cv_lib_mpi_MPI_Init=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mpi_MPI_Init" >&5 +$as_echo "$ac_cv_lib_mpi_MPI_Init" >&6; } +if test "x$ac_cv_lib_mpi_MPI_Init" = xyes; then : + MPILIBS="-lmpi" +fi + +fi +if test x = x"$MPILIBS"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for MPI_Init in -lmpich" >&5 +$as_echo_n "checking for MPI_Init in -lmpich... " >&6; } +if ${ac_cv_lib_mpich_MPI_Init+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lmpich $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char MPI_Init (); +int +main () +{ +return MPI_Init (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_mpich_MPI_Init=yes +else + ac_cv_lib_mpich_MPI_Init=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mpich_MPI_Init" >&5 +$as_echo "$ac_cv_lib_mpich_MPI_Init" >&6; } +if test "x$ac_cv_lib_mpich_MPI_Init" = xyes; then : + MPILIBS="-lmpich" +fi + +fi + +if test x != x"$MPILIBS"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for mpi.h" >&5 +$as_echo_n "checking for mpi.h... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + MPILIBS="" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +CC="$acx_mpi_save_CC" + + + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x = x"$MPILIBS"; then + as_fn_error $? "could not find mpi library for --enable-mpi" "$LINENO" 5 + : +else + +$as_echo "#define HAVE_MPI 1" >>confdefs.h + + : +fi + + # Extract the first word of "mpirun", so it can be a program name with args. +set dummy mpirun; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_MPIRUN+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$MPIRUN"; then + ac_cv_prog_MPIRUN="$MPIRUN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_MPIRUN="mpirun" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +MPIRUN=$ac_cv_prog_MPIRUN +if test -n "$MPIRUN"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MPIRUN" >&5 +$as_echo "$MPIRUN" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + + + save_CC=$CC + CC=$MPICC + # The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of MPI_Fint" >&5 +$as_echo_n "checking size of MPI_Fint... " >&6; } +if ${ac_cv_sizeof_MPI_Fint+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (MPI_Fint))" "ac_cv_sizeof_MPI_Fint" "#include +"; then : + +else + if test "$ac_cv_type_MPI_Fint" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (MPI_Fint) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_MPI_Fint=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_MPI_Fint" >&5 +$as_echo "$ac_cv_sizeof_MPI_Fint" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_MPI_FINT $ac_cv_sizeof_MPI_Fint +_ACEOF + + + CC=$save_CC + if test 0 = $ac_cv_sizeof_MPI_Fint; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: sizeof(MPI_Fint) test failed" >&5 +$as_echo "$as_me: WARNING: sizeof(MPI_Fint) test failed" >&2;}; + # The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of int" >&5 +$as_echo_n "checking size of int... " >&6; } +if ${ac_cv_sizeof_int+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (int))" "ac_cv_sizeof_int" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_int" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (int) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_int=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_int" >&5 +$as_echo "$ac_cv_sizeof_int" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_INT $ac_cv_sizeof_int +_ACEOF + + + if test 0 = $ac_cv_sizeof_int; then as_fn_error $? "sizeof(int) test failed" "$LINENO" 5; fi + ac_cv_sizeof_MPI_Fint=$ac_cv_sizeof_int + fi + C_MPI_FINT=C_INT`expr $ac_cv_sizeof_MPI_Fint \* 8`_T + +fi + if test "$enable_mpi" = "yes"; then + MPI_TRUE= + MPI_FALSE='#' +else + MPI_TRUE='#' + MPI_FALSE= +fi + + + + + + + + +# Try to determine "good" native compiler flags if none specified via CFLAGS +if test "$ac_test_CFLAGS" != "set"; then + CFLAGS="" + case $ax_cv_c_compiler_vendor in + dec) CFLAGS="-newc -w0 -O5 -ansi_alias -ansi_args -fp_reorder -tune host" + ;; + + sun) CFLAGS="-native -fast -xO5 -dalign" + ;; + + hp) CFLAGS="+Oall +Optrs_ansi +DSnative" + ;; + + ibm) xlc_opt="-qtune=auto" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $xlc_opt" >&5 +$as_echo_n "checking whether C compiler accepts $xlc_opt... " >&6; } +ax_save_FLAGS=$CFLAGS + CFLAGS="$xlc_opt" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval `$as_echo "ax_cv_c_flags_$xlc_opt" | $as_tr_sh`=yes +else + eval `$as_echo "ax_cv_c_flags_$xlc_opt" | $as_tr_sh`=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +eval ax_check_compiler_flags=$`$as_echo "ax_cv_c_flags_$xlc_opt" | $as_tr_sh` +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + CFLAGS="-O3 -qansialias -w $xlc_opt" +else + CFLAGS="-O3 -qansialias -w" + echo "******************************************************" + echo "* You seem to have the IBM C compiler. It is *" + echo "* recommended for best performance that you use: *" + echo "* *" + echo "* CFLAGS=-O3 -qarch=xxx -qtune=xxx -qansialias -w *" + echo "* ^^^ ^^^ *" + echo "* where xxx is pwr2, pwr3, 604, or whatever kind of *" + echo "* CPU you have. (Set the CFLAGS environment var. *" + echo "* and re-run configure.) For more info, man cc. *" + echo "******************************************************" +fi + + ;; + + intel) CFLAGS="-O3" + # Intel seems to have changed the spelling of this flag recently + icc_ansi_alias="unknown" + for flag in -ansi-alias -ansi_alias; do + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $flag" >&5 +$as_echo_n "checking whether C compiler accepts $flag... " >&6; } +ax_save_FLAGS=$CFLAGS + CFLAGS="$flag" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval `$as_echo "ax_cv_c_flags_$flag" | $as_tr_sh`=yes +else + eval `$as_echo "ax_cv_c_flags_$flag" | $as_tr_sh`=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +eval ax_check_compiler_flags=$`$as_echo "ax_cv_c_flags_$flag" | $as_tr_sh` +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + icc_ansi_alias=$flag; break +else + : +fi + + done + if test "x$icc_ansi_alias" != xunknown; then + CFLAGS="$CFLAGS $icc_ansi_alias" + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -malign-double" >&5 +$as_echo_n "checking whether C compiler accepts -malign-double... " >&6; } +if ${ax_cv_c_flags__malign_double+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_save_FLAGS=$CFLAGS + CFLAGS="-malign-double" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ax_cv_c_flags__malign_double=yes +else + ax_cv_c_flags__malign_double=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +fi + +eval ax_check_compiler_flags=$ax_cv_c_flags__malign_double +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + CFLAGS="$CFLAGS -malign-double" +else + : +fi + + # We used to check for architecture flags here, e.g. -xHost etc., + # but these flags are problematic. On icc-12.0.0, "-mavx -xHost" + # overrides -mavx with -xHost, generating SSE2 code instead of AVX + # code. ICC does not seem to support -mtune=host or equivalent + # non-ABI changing flag. + ;; + + gnu) + # Default optimization flags for gcc on all systems. + # Somehow -O3 does not imply -fomit-frame-pointer on ia32 + CFLAGS="-O3 -fomit-frame-pointer" + + # tune for the host by default + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mtune=native" >&5 +$as_echo_n "checking whether C compiler accepts -mtune=native... " >&6; } +if ${ax_cv_c_flags__mtune_native+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_save_FLAGS=$CFLAGS + CFLAGS="-mtune=native" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ax_cv_c_flags__mtune_native=yes +else + ax_cv_c_flags__mtune_native=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +fi + +eval ax_check_compiler_flags=$ax_cv_c_flags__mtune_native +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + CFLAGS="$CFLAGS -mtune=native" +else + : +fi + + + # -malign-double for x86 systems + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -malign-double" >&5 +$as_echo_n "checking whether C compiler accepts -malign-double... " >&6; } +if ${ax_cv_c_flags__malign_double+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_save_FLAGS=$CFLAGS + CFLAGS="-malign-double" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ax_cv_c_flags__malign_double=yes +else + ax_cv_c_flags__malign_double=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +fi + +eval ax_check_compiler_flags=$ax_cv_c_flags__malign_double +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + CFLAGS="$CFLAGS -malign-double" +else + : +fi + + + # -fstrict-aliasing for gcc-2.95+ + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -fstrict-aliasing" >&5 +$as_echo_n "checking whether C compiler accepts -fstrict-aliasing... " >&6; } +if ${ax_cv_c_flags__fstrict_aliasing+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_save_FLAGS=$CFLAGS + CFLAGS="-fstrict-aliasing" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ax_cv_c_flags__fstrict_aliasing=yes +else + ax_cv_c_flags__fstrict_aliasing=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +fi + +eval ax_check_compiler_flags=$ax_cv_c_flags__fstrict_aliasing +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + CFLAGS="$CFLAGS -fstrict-aliasing" +else + : +fi + + + # -fno-schedule-insns is pretty much required on all risc + # processors. + # + # gcc performs one pass of instruction scheduling, then a pass of + # register allocation, then another pass of instruction + # scheduling. The first pass reorders instructions in a way that + # is pretty much the worst possible for the purposes of register + # allocation. We disable the first pass. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -fno-schedule-insns" >&5 +$as_echo_n "checking whether C compiler accepts -fno-schedule-insns... " >&6; } +if ${ax_cv_c_flags__fno_schedule_insns+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_save_FLAGS=$CFLAGS + CFLAGS="-fno-schedule-insns" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ax_cv_c_flags__fno_schedule_insns=yes +else + ax_cv_c_flags__fno_schedule_insns=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +fi + +eval ax_check_compiler_flags=$ax_cv_c_flags__fno_schedule_insns +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + CFLAGS="$CFLAGS -fno-schedule-insns" +else + : +fi + + + # note that we enable "unsafe" fp optimization with other compilers, too + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -ffast-math" >&5 +$as_echo_n "checking whether C compiler accepts -ffast-math... " >&6; } +if ${ax_cv_c_flags__ffast_math+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_save_FLAGS=$CFLAGS + CFLAGS="-ffast-math" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ax_cv_c_flags__ffast_math=yes +else + ax_cv_c_flags__ffast_math=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +fi + +eval ax_check_compiler_flags=$ax_cv_c_flags__ffast_math +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + CFLAGS="$CFLAGS -ffast-math" +else + : +fi + + + ;; + esac + + if test -z "$CFLAGS"; then + echo "" + echo "********************************************************" + echo "* WARNING: Don't know the best CFLAGS for this system *" + echo "* Use ./configure CFLAGS=... to specify your own flags *" + echo "* (otherwise, a default of CFLAGS=-O3 will be used) *" + echo "********************************************************" + echo "" + CFLAGS="-O3" + fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $CFLAGS" >&5 +$as_echo_n "checking whether C compiler accepts $CFLAGS... " >&6; } +ax_save_FLAGS=$CFLAGS + CFLAGS="$CFLAGS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval `$as_echo "ax_cv_c_flags_$CFLAGS" | $as_tr_sh`=yes +else + eval `$as_echo "ax_cv_c_flags_$CFLAGS" | $as_tr_sh`=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +eval ax_check_compiler_flags=$`$as_echo "ax_cv_c_flags_$CFLAGS" | $as_tr_sh` +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + : +else + + echo "" + echo "********************************************************" + echo "* WARNING: The guessed CFLAGS don't seem to work with *" + echo "* your compiler. *" + echo "* Use ./configure CFLAGS=... to specify your own flags *" + echo "********************************************************" + echo "" + CFLAGS="" + +fi + + +fi + + +case "${ax_cv_c_compiler_vendor}" in + intel) # Stop icc from defining __GNUC__, except on MacOS where this fails + case "${host_os}" in + *darwin*) ;; # icc -no-gcc fails to compile some system headers + *) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -no-gcc" >&5 +$as_echo_n "checking whether C compiler accepts -no-gcc... " >&6; } +if ${ax_cv_c_flags__no_gcc+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_save_FLAGS=$CFLAGS + CFLAGS="-no-gcc" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ax_cv_c_flags__no_gcc=yes +else + ax_cv_c_flags__no_gcc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +fi + +eval ax_check_compiler_flags=$ax_cv_c_flags__no_gcc +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + CC="$CC -no-gcc" +else + : +fi + + ;; + esac + ;; + + hp) # must (sometimes) manually increase cpp limits to handle fftw3.h + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -Wp,-H128000" >&5 +$as_echo_n "checking whether C compiler accepts -Wp,-H128000... " >&6; } +if ${ax_cv_c_flags__Wp+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_save_FLAGS=$CFLAGS + CFLAGS="-Wp,-H128000" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ax_cv_c_flags__Wp=yes +else + ax_cv_c_flags__Wp=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +fi + +eval ax_check_compiler_flags=$ax_cv_c_flags__Wp +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + CC="$CC -Wp,-H128000" +else + : +fi + + ;; + + portland) # -Masmkeyword required for asm("") cycle counters + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -Masmkeyword" >&5 +$as_echo_n "checking whether C compiler accepts -Masmkeyword... " >&6; } +if ${ax_cv_c_flags__Masmkeyword+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_save_FLAGS=$CFLAGS + CFLAGS="-Masmkeyword" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ax_cv_c_flags__Masmkeyword=yes +else + ax_cv_c_flags__Masmkeyword=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +fi + +eval ax_check_compiler_flags=$ax_cv_c_flags__Masmkeyword +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + CC="$CC -Masmkeyword" +else + : +fi + + ;; +esac + +case "${ax_cv_c_compiler_vendor}" in + gnu|intel) + # SSE/SSE2 + if test "$have_sse2" = "yes" -a "x$SSE2_CFLAGS" = x; then + if test "$PRECISION" = d; then flag=msse2; else flag=msse; fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -$flag" >&5 +$as_echo_n "checking whether C compiler accepts -$flag... " >&6; } +ax_save_FLAGS=$CFLAGS + CFLAGS="-$flag" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval `$as_echo "ax_cv_c_flags_-$flag" | $as_tr_sh`=yes +else + eval `$as_echo "ax_cv_c_flags_-$flag" | $as_tr_sh`=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +eval ax_check_compiler_flags=$`$as_echo "ax_cv_c_flags_-$flag" | $as_tr_sh` +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + SSE2_CFLAGS="-$flag" +else + as_fn_error $? "Need a version of gcc with -$flag" "$LINENO" 5 +fi + + fi + + # AVX + if test "$have_avx" = "yes" -a "x$AVX_CFLAGS" = x; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx" >&5 +$as_echo_n "checking whether C compiler accepts -mavx... " >&6; } +if ${ax_cv_c_flags__mavx+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_save_FLAGS=$CFLAGS + CFLAGS="-mavx" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ax_cv_c_flags__mavx=yes +else + ax_cv_c_flags__mavx=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +fi + +eval ax_check_compiler_flags=$ax_cv_c_flags__mavx +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + AVX_CFLAGS="-mavx" +else + as_fn_error $? "Need a version of gcc with -mavx" "$LINENO" 5 +fi + + fi + + if test "$have_altivec" = "yes" -a "x$ALTIVEC_CFLAGS" = x; then + # -DFAKE__VEC__ is a workaround because gcc-3.3 does not + # #define __VEC__ with -maltivec. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -faltivec" >&5 +$as_echo_n "checking whether C compiler accepts -faltivec... " >&6; } +if ${ax_cv_c_flags__faltivec+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_save_FLAGS=$CFLAGS + CFLAGS="-faltivec" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ax_cv_c_flags__faltivec=yes +else + ax_cv_c_flags__faltivec=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +fi + +eval ax_check_compiler_flags=$ax_cv_c_flags__faltivec +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + ALTIVEC_CFLAGS="-faltivec" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -maltivec -mabi=altivec" >&5 +$as_echo_n "checking whether C compiler accepts -maltivec -mabi=altivec... " >&6; } +if ${ax_cv_c_flags__maltivec__mabi_altivec+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_save_FLAGS=$CFLAGS + CFLAGS="-maltivec -mabi=altivec" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ax_cv_c_flags__maltivec__mabi_altivec=yes +else + ax_cv_c_flags__maltivec__mabi_altivec=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +fi + +eval ax_check_compiler_flags=$ax_cv_c_flags__maltivec__mabi_altivec +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + ALTIVEC_CFLAGS="-maltivec -mabi=altivec -DFAKE__VEC__" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -fvec" >&5 +$as_echo_n "checking whether C compiler accepts -fvec... " >&6; } +if ${ax_cv_c_flags__fvec+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_save_FLAGS=$CFLAGS + CFLAGS="-fvec" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ax_cv_c_flags__fvec=yes +else + ax_cv_c_flags__fvec=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +fi + +eval ax_check_compiler_flags=$ax_cv_c_flags__fvec +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + ALTIVEC_CFLAGS="-fvec" +else + as_fn_error $? "Need a version of gcc with -maltivec" "$LINENO" 5 +fi + +fi + +fi + + fi + + if test "$have_neon" = "yes" -a "x$NEON_CFLAGS" = x; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mfpu=neon" >&5 +$as_echo_n "checking whether C compiler accepts -mfpu=neon... " >&6; } +if ${ax_cv_c_flags__mfpu_neon+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_save_FLAGS=$CFLAGS + CFLAGS="-mfpu=neon" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ax_cv_c_flags__mfpu_neon=yes +else + ax_cv_c_flags__mfpu_neon=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +fi + +eval ax_check_compiler_flags=$ax_cv_c_flags__mfpu_neon +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + NEON_CFLAGS="-mfpu=neon" +else + as_fn_error $? "Need a version of gcc with -mfpu=neon" "$LINENO" 5 +fi + + fi + + ;; +esac + + + + + + +if test "$with_incoming_stack_boundary"x != "no"x; then + case "${ax_cv_c_compiler_vendor}" in + gnu) + tentative_flags="-mincoming-stack-boundary=$with_incoming_stack_boundary"; + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $tentative_flags" >&5 +$as_echo_n "checking whether C compiler accepts $tentative_flags... " >&6; } +ax_save_FLAGS=$CFLAGS + CFLAGS="$tentative_flags" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval `$as_echo "ax_cv_c_flags_$tentative_flags" | $as_tr_sh`=yes +else + eval `$as_echo "ax_cv_c_flags_$tentative_flags" | $as_tr_sh`=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +eval ax_check_compiler_flags=$`$as_echo "ax_cv_c_flags_$tentative_flags" | $as_tr_sh` +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + STACK_ALIGN_CFLAGS=$tentative_flags +else + : +fi + + ;; + esac +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if ${ac_cv_header_stdc+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdc=yes +else + ac_cv_header_stdc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +$as_echo "#define STDC_HEADERS 1" >>confdefs.h + +fi + +for ac_header in libintl.h malloc.h stddef.h stdlib.h string.h strings.h sys/time.h unistd.h limits.h c_asm.h intrinsics.h stdint.h mach/mach_time.h sys/sysctl.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +save_CFLAGS="$CFLAGS" +save_CPPFLAGS="$CPPFLAGS" +CFLAGS="$CFLAGS $ALTIVEC_CFLAGS" +CPPFLAGS="$CPPFLAGS $ALTIVEC_CFLAGS" +for ac_header in altivec.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "altivec.h" "ac_cv_header_altivec_h" "$ac_includes_default" +if test "x$ac_cv_header_altivec_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_ALTIVEC_H 1 +_ACEOF + +fi + +done + +CFLAGS="$save_CFLAGS" +CPPFLAGS="$save_CPPFLAGS" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for an ANSI C-conforming const" >&5 +$as_echo_n "checking for an ANSI C-conforming const... " >&6; } +if ${ac_cv_c_const+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + +#ifndef __cplusplus + /* Ultrix mips cc rejects this sort of thing. */ + typedef int charset[2]; + const charset cs = { 0, 0 }; + /* SunOS 4.1.1 cc rejects this. */ + char const *const *pcpcc; + char **ppc; + /* NEC SVR4.0.2 mips cc rejects this. */ + struct point {int x, y;}; + static struct point const zero = {0,0}; + /* AIX XL C 1.02.0.0 rejects this. + It does not let you subtract one const X* pointer from another in + an arm of an if-expression whose if-part is not a constant + expression */ + const char *g = "string"; + pcpcc = &g + (g ? g-g : 0); + /* HPUX 7.0 cc rejects these. */ + ++pcpcc; + ppc = (char**) pcpcc; + pcpcc = (char const *const *) ppc; + { /* SCO 3.2v4 cc rejects this sort of thing. */ + char tx; + char *t = &tx; + char const *s = 0 ? (char *) 0 : (char const *) 0; + + *t++ = 0; + if (s) return 0; + } + { /* Someone thinks the Sun supposedly-ANSI compiler will reject this. */ + int x[] = {25, 17}; + const int *foo = &x[0]; + ++foo; + } + { /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */ + typedef const int *iptr; + iptr p = 0; + ++p; + } + { /* AIX XL C 1.02.0.0 rejects this sort of thing, saying + "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */ + struct s { int j; const int *ap[3]; } bx; + struct s *b = &bx; b->j = 5; + } + { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ + const int foo = 10; + if (!foo) return 0; + } + return !cs[0] && !zero.x; +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_c_const=yes +else + ac_cv_c_const=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_const" >&5 +$as_echo "$ac_cv_c_const" >&6; } +if test $ac_cv_c_const = no; then + +$as_echo "#define const /**/" >>confdefs.h + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for inline" >&5 +$as_echo_n "checking for inline... " >&6; } +if ${ac_cv_c_inline+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_c_inline=no +for ac_kw in inline __inline__ __inline; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifndef __cplusplus +typedef int foo_t; +static $ac_kw foo_t static_foo () {return 0; } +$ac_kw foo_t foo () {return 0; } +#endif + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_c_inline=$ac_kw +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + test "$ac_cv_c_inline" != no && break +done + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_inline" >&5 +$as_echo "$ac_cv_c_inline" >&6; } + +case $ac_cv_c_inline in + inline | yes) ;; + *) + case $ac_cv_c_inline in + no) ac_val=;; + *) ac_val=$ac_cv_c_inline;; + esac + cat >>confdefs.h <<_ACEOF +#ifndef __cplusplus +#define inline $ac_val +#endif +_ACEOF + ;; +esac + +ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" "$ac_includes_default" +if test "x$ac_cv_type_size_t" = xyes; then : + +else + +cat >>confdefs.h <<_ACEOF +#define size_t unsigned int +_ACEOF + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether time.h and sys/time.h may both be included" >&5 +$as_echo_n "checking whether time.h and sys/time.h may both be included... " >&6; } +if ${ac_cv_header_time+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include + +int +main () +{ +if ((struct tm *) 0) +return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_time=yes +else + ac_cv_header_time=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_time" >&5 +$as_echo "$ac_cv_header_time" >&6; } +if test $ac_cv_header_time = yes; then + +$as_echo "#define TIME_WITH_SYS_TIME 1" >>confdefs.h + +fi + +ac_fn_c_check_type "$LINENO" "long double" "ac_cv_type_long_double" "$ac_includes_default" +if test "x$ac_cv_type_long_double" = xyes; then : + +$as_echo "#define HAVE_LONG_DOUBLE 1" >>confdefs.h + +else + +if test $PRECISION = l; then + as_fn_error $? "long double is not a supported type with your compiler." "$LINENO" 5 +fi + +fi + +ac_fn_c_check_type "$LINENO" "hrtime_t" "ac_cv_type_hrtime_t" " +#if HAVE_SYS_TIME_H +#include +#endif + +" +if test "x$ac_cv_type_hrtime_t" = xyes; then : + +$as_echo "#define HAVE_HRTIME_T 1" >>confdefs.h + +fi + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of int" >&5 +$as_echo_n "checking size of int... " >&6; } +if ${ac_cv_sizeof_int+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (int))" "ac_cv_sizeof_int" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_int" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (int) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_int=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_int" >&5 +$as_echo "$ac_cv_sizeof_int" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_INT $ac_cv_sizeof_int +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned int" >&5 +$as_echo_n "checking size of unsigned int... " >&6; } +if ${ac_cv_sizeof_unsigned_int+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned int))" "ac_cv_sizeof_unsigned_int" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_unsigned_int" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (unsigned int) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_unsigned_int=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_unsigned_int" >&5 +$as_echo "$ac_cv_sizeof_unsigned_int" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_UNSIGNED_INT $ac_cv_sizeof_unsigned_int +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of long" >&5 +$as_echo_n "checking size of long... " >&6; } +if ${ac_cv_sizeof_long+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (long))" "ac_cv_sizeof_long" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_long" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (long) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_long=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_long" >&5 +$as_echo "$ac_cv_sizeof_long" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_LONG $ac_cv_sizeof_long +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned long" >&5 +$as_echo_n "checking size of unsigned long... " >&6; } +if ${ac_cv_sizeof_unsigned_long+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned long))" "ac_cv_sizeof_unsigned_long" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_unsigned_long" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (unsigned long) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_unsigned_long=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_unsigned_long" >&5 +$as_echo "$ac_cv_sizeof_unsigned_long" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_UNSIGNED_LONG $ac_cv_sizeof_unsigned_long +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of long long" >&5 +$as_echo_n "checking size of long long... " >&6; } +if ${ac_cv_sizeof_long_long+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (long long))" "ac_cv_sizeof_long_long" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_long_long" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (long long) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_long_long=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_long_long" >&5 +$as_echo "$ac_cv_sizeof_long_long" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_LONG_LONG $ac_cv_sizeof_long_long +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned long long" >&5 +$as_echo_n "checking size of unsigned long long... " >&6; } +if ${ac_cv_sizeof_unsigned_long_long+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned long long))" "ac_cv_sizeof_unsigned_long_long" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_unsigned_long_long" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (unsigned long long) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_unsigned_long_long=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_unsigned_long_long" >&5 +$as_echo "$ac_cv_sizeof_unsigned_long_long" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_UNSIGNED_LONG_LONG $ac_cv_sizeof_unsigned_long_long +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of size_t" >&5 +$as_echo_n "checking size of size_t... " >&6; } +if ${ac_cv_sizeof_size_t+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (size_t))" "ac_cv_sizeof_size_t" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_size_t" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (size_t) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_size_t=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_size_t" >&5 +$as_echo "$ac_cv_sizeof_size_t" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_SIZE_T $ac_cv_sizeof_size_t +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of ptrdiff_t" >&5 +$as_echo_n "checking size of ptrdiff_t... " >&6; } +if ${ac_cv_sizeof_ptrdiff_t+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (ptrdiff_t))" "ac_cv_sizeof_ptrdiff_t" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_ptrdiff_t" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (ptrdiff_t) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_ptrdiff_t=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_ptrdiff_t" >&5 +$as_echo "$ac_cv_sizeof_ptrdiff_t" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_PTRDIFF_T $ac_cv_sizeof_ptrdiff_t +_ACEOF + + + +ac_fn_c_check_type "$LINENO" "uintptr_t" "ac_cv_type_uintptr_t" "$ac_includes_default +#ifdef HAVE_STDINT_H +# include +#endif +" +if test "x$ac_cv_type_uintptr_t" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_UINTPTR_T 1 +_ACEOF + + +else + # The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of void *" >&5 +$as_echo_n "checking size of void *... " >&6; } +if ${ac_cv_sizeof_void_p+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (void *))" "ac_cv_sizeof_void_p" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_void_p" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (void *) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_void_p=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_void_p" >&5 +$as_echo "$ac_cv_sizeof_void_p" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_VOID_P $ac_cv_sizeof_void_p +_ACEOF + + +fi + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of float" >&5 +$as_echo_n "checking size of float... " >&6; } +if ${ac_cv_sizeof_float+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (float))" "ac_cv_sizeof_float" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_float" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (float) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_float=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_float" >&5 +$as_echo "$ac_cv_sizeof_float" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_FLOAT $ac_cv_sizeof_float +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of double" >&5 +$as_echo_n "checking size of double... " >&6; } +if ${ac_cv_sizeof_double+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (double))" "ac_cv_sizeof_double" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_double" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (double) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_double=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_double" >&5 +$as_echo "$ac_cv_sizeof_double" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_DOUBLE $ac_cv_sizeof_double +_ACEOF + + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of fftw_r2r_kind" >&5 +$as_echo_n "checking size of fftw_r2r_kind... " >&6; } +if ${ac_cv_sizeof_fftw_r2r_kind+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (fftw_r2r_kind))" "ac_cv_sizeof_fftw_r2r_kind" "typedef enum { + FFTW_R2HC=0, FFTW_HC2R=1, FFTW_DHT=2, + FFTW_REDFT00=3, FFTW_REDFT01=4, FFTW_REDFT10=5, FFTW_REDFT11=6, + FFTW_RODFT00=7, FFTW_RODFT01=8, FFTW_RODFT10=9, FFTW_RODFT11=10 +} fftw_r2r_kind; +"; then : + +else + if test "$ac_cv_type_fftw_r2r_kind" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (fftw_r2r_kind) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_fftw_r2r_kind=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_fftw_r2r_kind" >&5 +$as_echo "$ac_cv_sizeof_fftw_r2r_kind" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_FFTW_R2R_KIND $ac_cv_sizeof_fftw_r2r_kind +_ACEOF + + +if test 0 = $ac_cv_sizeof_fftw_r2r_kind; then as_fn_error $? "sizeof(fftw_r2r_kind) test failed" "$LINENO" 5; fi +C_FFTW_R2R_KIND=C_INT`expr $ac_cv_sizeof_fftw_r2r_kind \* 8`_T + + +# The Ultrix 4.2 mips builtin alloca declared by alloca.h only works +# for constant arguments. Useless! +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for working alloca.h" >&5 +$as_echo_n "checking for working alloca.h... " >&6; } +if ${ac_cv_working_alloca_h+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +char *p = (char *) alloca (2 * sizeof (int)); + if (p) return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_working_alloca_h=yes +else + ac_cv_working_alloca_h=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_working_alloca_h" >&5 +$as_echo "$ac_cv_working_alloca_h" >&6; } +if test $ac_cv_working_alloca_h = yes; then + +$as_echo "#define HAVE_ALLOCA_H 1" >>confdefs.h + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for alloca" >&5 +$as_echo_n "checking for alloca... " >&6; } +if ${ac_cv_func_alloca_works+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __GNUC__ +# define alloca __builtin_alloca +#else +# ifdef _MSC_VER +# include +# define alloca _alloca +# else +# ifdef HAVE_ALLOCA_H +# include +# else +# ifdef _AIX + #pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +void *alloca (size_t); +# endif +# endif +# endif +# endif +#endif + +int +main () +{ +char *p = (char *) alloca (1); + if (p) return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_func_alloca_works=yes +else + ac_cv_func_alloca_works=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_alloca_works" >&5 +$as_echo "$ac_cv_func_alloca_works" >&6; } + +if test $ac_cv_func_alloca_works = yes; then + +$as_echo "#define HAVE_ALLOCA 1" >>confdefs.h + +else + # The SVR3 libPW and SVR4 libucb both contain incompatible functions +# that cause trouble. Some versions do not even contain alloca or +# contain a buggy version. If you still want to use their alloca, +# use ar to extract alloca.o from them instead of compiling alloca.c. + +ALLOCA=\${LIBOBJDIR}alloca.$ac_objext + +$as_echo "#define C_ALLOCA 1" >>confdefs.h + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether \`alloca.c' needs Cray hooks" >&5 +$as_echo_n "checking whether \`alloca.c' needs Cray hooks... " >&6; } +if ${ac_cv_os_cray+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#if defined CRAY && ! defined CRAY2 +webecray +#else +wenotbecray +#endif + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "webecray" >/dev/null 2>&1; then : + ac_cv_os_cray=yes +else + ac_cv_os_cray=no +fi +rm -f conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_os_cray" >&5 +$as_echo "$ac_cv_os_cray" >&6; } +if test $ac_cv_os_cray = yes; then + for ac_func in _getb67 GETB67 getb67; do + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + +cat >>confdefs.h <<_ACEOF +#define CRAY_STACKSEG_END $ac_func +_ACEOF + + break +fi + + done +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking stack direction for C alloca" >&5 +$as_echo_n "checking stack direction for C alloca... " >&6; } +if ${ac_cv_c_stack_direction+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + ac_cv_c_stack_direction=0 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default +int +find_stack_direction (int *addr, int depth) +{ + int dir, dummy = 0; + if (! addr) + addr = &dummy; + *addr = addr < &dummy ? 1 : addr == &dummy ? 0 : -1; + dir = depth ? find_stack_direction (addr, depth - 1) : 0; + return dir + dummy; +} + +int +main (int argc, char **argv) +{ + return find_stack_direction (0, argc + !argv + 20) < 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ac_cv_c_stack_direction=1 +else + ac_cv_c_stack_direction=-1 +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_stack_direction" >&5 +$as_echo "$ac_cv_c_stack_direction" >&6; } +cat >>confdefs.h <<_ACEOF +#define STACK_DIRECTION $ac_cv_c_stack_direction +_ACEOF + + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for working strtod" >&5 +$as_echo_n "checking for working strtod... " >&6; } +if ${ac_cv_func_strtod+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + ac_cv_func_strtod=no +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +$ac_includes_default +#ifndef strtod +double strtod (); +#endif +int +main() +{ + { + /* Some versions of Linux strtod mis-parse strings with leading '+'. */ + char *string = " +69"; + char *term; + double value; + value = strtod (string, &term); + if (value != 69 || term != (string + 4)) + return 1; + } + + { + /* Under Solaris 2.4, strtod returns the wrong value for the + terminating character under some conditions. */ + char *string = "NaN"; + char *term; + strtod (string, &term); + if (term != string && *(term - 1) == 0) + return 1; + } + return 0; +} + +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ac_cv_func_strtod=yes +else + ac_cv_func_strtod=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_strtod" >&5 +$as_echo "$ac_cv_func_strtod" >&6; } +if test $ac_cv_func_strtod = no; then + case " $LIBOBJS " in + *" strtod.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS strtod.$ac_objext" + ;; +esac + +ac_fn_c_check_func "$LINENO" "pow" "ac_cv_func_pow" +if test "x$ac_cv_func_pow" = xyes; then : + +fi + +if test $ac_cv_func_pow = no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pow in -lm" >&5 +$as_echo_n "checking for pow in -lm... " >&6; } +if ${ac_cv_lib_m_pow+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lm $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pow (); +int +main () +{ +return pow (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_m_pow=yes +else + ac_cv_lib_m_pow=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_pow" >&5 +$as_echo "$ac_cv_lib_m_pow" >&6; } +if test "x$ac_cv_lib_m_pow" = xyes; then : + POW_LIB=-lm +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cannot find library containing definition of pow" >&5 +$as_echo "$as_me: WARNING: cannot find library containing definition of pow" >&2;} +fi + +fi + +fi + +for ac_func in vprintf +do : + ac_fn_c_check_func "$LINENO" "vprintf" "ac_cv_func_vprintf" +if test "x$ac_cv_func_vprintf" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_VPRINTF 1 +_ACEOF + +ac_fn_c_check_func "$LINENO" "_doprnt" "ac_cv_func__doprnt" +if test "x$ac_cv_func__doprnt" = xyes; then : + +$as_echo "#define HAVE_DOPRNT 1" >>confdefs.h + +fi + +fi +done + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for sin in -lm" >&5 +$as_echo_n "checking for sin in -lm... " >&6; } +if ${ac_cv_lib_m_sin+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lm $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char sin (); +int +main () +{ +return sin (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_m_sin=yes +else + ac_cv_lib_m_sin=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_sin" >&5 +$as_echo "$ac_cv_lib_m_sin" >&6; } +if test "x$ac_cv_lib_m_sin" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBM 1 +_ACEOF + + LIBS="-lm $LIBS" + +fi + + +if test $PRECISION = q; then + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using gcc 4.6.0 or later" >&5 +$as_echo_n "checking whether we are using gcc 4.6.0 or later... " >&6; } +if ${ax_cv_gcc_4_6_0+:} false; then : + $as_echo_n "(cached) " >&6 +else + +ax_cv_gcc_4_6_0=no +if test "$GCC" = "yes"; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifdef __GNUC__ +# if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ > 6) \ + || (__GNUC__ == 4 && __GNUC_MINOR__ == 6 && __GNUC_PATCHLEVEL__ >= 0) + yes; +# endif +#endif + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "yes" >/dev/null 2>&1; then : + ax_cv_gcc_4_6_0=yes +fi +rm -f conftest* + +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_gcc_4_6_0" >&5 +$as_echo "$ax_cv_gcc_4_6_0" >&6; } +if test "$ax_cv_gcc_4_6_0" = yes; then + : +else + as_fn_error $? "gcc 4.6 or later required for quad precision support" "$LINENO" 5 +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sinq in -lquadmath" >&5 +$as_echo_n "checking for sinq in -lquadmath... " >&6; } +if ${ac_cv_lib_quadmath_sinq+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lquadmath $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char sinq (); +int +main () +{ +return sinq (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_quadmath_sinq=yes +else + ac_cv_lib_quadmath_sinq=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_quadmath_sinq" >&5 +$as_echo "$ac_cv_lib_quadmath_sinq" >&6; } +if test "x$ac_cv_lib_quadmath_sinq" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBQUADMATH 1 +_ACEOF + + LIBS="-lquadmath $LIBS" + +else + as_fn_error $? "quad precision requires libquadmath for quad-precision trigonometric routines" "$LINENO" 5 +fi + + LIBQUADMATH=-lquadmath +fi + + +for ac_func in BSDgettimeofday gettimeofday gethrtime read_real_time time_base_to_time drand48 sqrt memset posix_memalign memalign _mm_malloc _mm_free clock_gettime mach_absolute_time sysctl abort sinl cosl snprintf +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + +ac_fn_c_check_decl "$LINENO" "sinl" "ac_cv_have_decl_sinl" "#include +" +if test "x$ac_cv_have_decl_sinl" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_SINL $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "cosl" "ac_cv_have_decl_cosl" "#include +" +if test "x$ac_cv_have_decl_cosl" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_COSL $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "sinq" "ac_cv_have_decl_sinq" "#include +" +if test "x$ac_cv_have_decl_sinq" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_SINQ $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "cosq" "ac_cv_have_decl_cosq" "#include +" +if test "x$ac_cv_have_decl_cosq" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_COSQ $ac_have_decl +_ACEOF + +ac_fn_c_check_decl "$LINENO" "memalign" "ac_cv_have_decl_memalign" " +#ifdef HAVE_MALLOC_H +#include +#endif +" +if test "x$ac_cv_have_decl_memalign" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_MEMALIGN $ac_have_decl +_ACEOF + +ac_fn_c_check_decl "$LINENO" "drand48" "ac_cv_have_decl_drand48" "$ac_includes_default" +if test "x$ac_cv_have_decl_drand48" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_DRAND48 $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "srand48" "ac_cv_have_decl_srand48" "$ac_includes_default" +if test "x$ac_cv_have_decl_srand48" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_SRAND48 $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "posix_memalign" "ac_cv_have_decl_posix_memalign" "$ac_includes_default" +if test "x$ac_cv_have_decl_posix_memalign" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_POSIX_MEMALIGN $ac_have_decl +_ACEOF + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _rtc intrinsic" >&5 +$as_echo_n "checking for _rtc intrinsic... " >&6; } +rtc_ok=yes +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef HAVE_INTRINSICS_H +#include +#endif +int +main () +{ +_rtc() + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + +$as_echo "#define HAVE__RTC 1" >>confdefs.h + +else + rtc_ok=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $rtc_ok" >&5 +$as_echo "$rtc_ok" >&6; } + +if test "$PRECISION" = "l"; then + for ac_func in cosl sinl tanl +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +else + as_fn_error $? "long-double precision requires long-double trigonometric routines" "$LINENO" 5 +fi +done + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for isnan" >&5 +$as_echo_n "checking for isnan... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +int +main () +{ +if (!isnan(3.14159)) isnan(2.7183); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ok=yes +else + ok=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +if test "$ok" = "yes"; then + +$as_echo "#define HAVE_ISNAN 1" >>confdefs.h + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${ok}" >&5 +$as_echo "${ok}" >&6; } + + + +ax_gcc_aligns_stack=no +if test "$GCC" = "yes"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mpreferred-stack-boundary=4" >&5 +$as_echo_n "checking whether C compiler accepts -mpreferred-stack-boundary=4... " >&6; } +if ${ax_cv_c_flags__mpreferred_stack_boundary_4+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_save_FLAGS=$CFLAGS + CFLAGS="-mpreferred-stack-boundary=4" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ax_cv_c_flags__mpreferred_stack_boundary_4=yes +else + ax_cv_c_flags__mpreferred_stack_boundary_4=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +fi + +eval ax_check_compiler_flags=$ax_cv_c_flags__mpreferred_stack_boundary_4 +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the stack is at least 8-byte aligned by gcc" >&5 +$as_echo_n "checking whether the stack is at least 8-byte aligned by gcc... " >&6; } + save_CFLAGS="$CFLAGS" + CFLAGS="-O" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -malign-double" >&5 +$as_echo_n "checking whether C compiler accepts -malign-double... " >&6; } +if ${ax_cv_c_flags__malign_double+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_save_FLAGS=$CFLAGS + CFLAGS="-malign-double" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ax_cv_c_flags__malign_double=yes +else + ax_cv_c_flags__malign_double=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_save_FLAGS +fi + +eval ax_check_compiler_flags=$ax_cv_c_flags__malign_double +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_check_compiler_flags" >&5 +$as_echo "$ax_check_compiler_flags" >&6; } +if test "x$ax_check_compiler_flags" = xyes; then + CFLAGS="$CFLAGS -malign-double" +else + : +fi + + if test "$cross_compiling" = yes; then : + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using gcc 3.0.0 or later" >&5 +$as_echo_n "checking whether we are using gcc 3.0.0 or later... " >&6; } +if ${ax_cv_gcc_3_0_0+:} false; then : + $as_echo_n "(cached) " >&6 +else + +ax_cv_gcc_3_0_0=no +if test "$GCC" = "yes"; then +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifdef __GNUC__ +# if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ > 0) \ + || (__GNUC__ == 3 && __GNUC_MINOR__ == 0 && __GNUC_PATCHLEVEL__ >= 0) + yes; +# endif +#endif + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "yes" >/dev/null 2>&1; then : + ax_cv_gcc_3_0_0=yes +fi +rm -f conftest* + +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_gcc_3_0_0" >&5 +$as_echo "$ax_cv_gcc_3_0_0" >&6; } +if test "$ax_cv_gcc_3_0_0" = yes; then + ax_gcc_stack_align_bug=no +else + ax_gcc_stack_align_bug=yes +fi + +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +# include + struct yuck { int blechh; }; + int one(void) { return 1; } + struct yuck ick(void) { struct yuck y; y.blechh = 3; return y; } +# define CHK_ALIGN(x) if ((((long) &(x)) & 0x7)) { fprintf(stderr, "bad alignment of " #x "\n"); exit(1); } + void blah(int foo) { double foobar; CHK_ALIGN(foobar); } + int main2(void) {double ok1; struct yuck y; double ok2; CHK_ALIGN(ok1); + CHK_ALIGN(ok2); y = ick(); blah(one()); return 0;} + int main(void) { if ((((long) (__builtin_alloca(0))) & 0x7)) __builtin_alloca(4); return main2(); } + +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ax_gcc_aligns_stack=yes; ax_gcc_stack_align_bug=no +else + ax_gcc_stack_align_bug=yes +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + CFLAGS="$save_CFLAGS" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_gcc_aligns_stack" >&5 +$as_echo "$ax_gcc_aligns_stack" >&6; } + +else + : +fi + +fi +if test "$ax_gcc_aligns_stack" = yes; then + : +else + : +fi + + +if test "${enable_debug}" = "yes"; then + CFLAGS="-g" +fi + +if test "$enable_debug" = yes || test "$USE_MAINTAINER_MODE" = yes; then +if test "$ac_test_CFLAGS" != "set"; then + if test $ac_cv_c_compiler_gnu = yes; then + CFLAGS="$CFLAGS -Wall -W -Wcast-qual -Wpointer-arith -Wcast-align -pedantic -Wno-long-long -Wshadow -Wbad-function-cast -Wwrite-strings -Wstrict-prototypes -Wredundant-decls -Wnested-externs" # -Wundef -Wconversion -Wmissing-prototypes -Wmissing-declarations + fi +fi +fi + + +# Check whether --enable-fortran was given. +if test "${enable_fortran+set}" = set; then : + enableval=$enable_fortran; enable_fortran=$enableval +else + enable_fortran=yes +fi + + +if test "$enable_fortran" = "yes"; then + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu +if test -n "$ac_tool_prefix"; then + for ac_prog in g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77 xlf90 f90 pgf90 pghpf epcf90 gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn nagfor + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_F77+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$F77"; then + ac_cv_prog_F77="$F77" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_F77="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +F77=$ac_cv_prog_F77 +if test -n "$F77"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $F77" >&5 +$as_echo "$F77" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$F77" && break + done +fi +if test -z "$F77"; then + ac_ct_F77=$F77 + for ac_prog in g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77 xlf90 f90 pgf90 pghpf epcf90 gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn nagfor +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_F77+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_F77"; then + ac_cv_prog_ac_ct_F77="$ac_ct_F77" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_F77="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_F77=$ac_cv_prog_ac_ct_F77 +if test -n "$ac_ct_F77"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_F77" >&5 +$as_echo "$ac_ct_F77" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_F77" && break +done + + if test "x$ac_ct_F77" = x; then + F77="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + F77=$ac_ct_F77 + fi +fi + + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for Fortran 77 compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done +rm -f a.out + +# If we don't use `.F' as extension, the preprocessor is not run on the +# input file. (Note that this only needs to work for GNU compilers.) +ac_save_ext=$ac_ext +ac_ext=F +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU Fortran 77 compiler" >&5 +$as_echo_n "checking whether we are using the GNU Fortran 77 compiler... " >&6; } +if ${ac_cv_f77_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat > conftest.$ac_ext <<_ACEOF + program main +#ifndef __GNUC__ + choke me +#endif + + end +_ACEOF +if ac_fn_f77_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_f77_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_f77_compiler_gnu" >&5 +$as_echo "$ac_cv_f77_compiler_gnu" >&6; } +ac_ext=$ac_save_ext +ac_test_FFLAGS=${FFLAGS+set} +ac_save_FFLAGS=$FFLAGS +FFLAGS= +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $F77 accepts -g" >&5 +$as_echo_n "checking whether $F77 accepts -g... " >&6; } +if ${ac_cv_prog_f77_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + FFLAGS=-g +cat > conftest.$ac_ext <<_ACEOF + program main + + end +_ACEOF +if ac_fn_f77_try_compile "$LINENO"; then : + ac_cv_prog_f77_g=yes +else + ac_cv_prog_f77_g=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_f77_g" >&5 +$as_echo "$ac_cv_prog_f77_g" >&6; } +if test "$ac_test_FFLAGS" = set; then + FFLAGS=$ac_save_FFLAGS +elif test $ac_cv_prog_f77_g = yes; then + if test "x$ac_cv_f77_compiler_gnu" = xyes; then + FFLAGS="-g -O2" + else + FFLAGS="-g" + fi +else + if test "x$ac_cv_f77_compiler_gnu" = xyes; then + FFLAGS="-O2" + else + FFLAGS= + fi +fi + +if test $ac_compiler_gnu = yes; then + G77=yes +else + G77= +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + +if test -z "$F77" || test "X$F77" = "Xno"; then + _lt_disable_F77=yes +fi + +archive_cmds_need_lc_F77=no +allow_undefined_flag_F77= +always_export_symbols_F77=no +archive_expsym_cmds_F77= +export_dynamic_flag_spec_F77= +hardcode_direct_F77=no +hardcode_direct_absolute_F77=no +hardcode_libdir_flag_spec_F77= +hardcode_libdir_separator_F77= +hardcode_minus_L_F77=no +hardcode_automatic_F77=no +inherit_rpath_F77=no +module_cmds_F77= +module_expsym_cmds_F77= +link_all_deplibs_F77=unknown +old_archive_cmds_F77=$old_archive_cmds +reload_flag_F77=$reload_flag +reload_cmds_F77=$reload_cmds +no_undefined_flag_F77= +whole_archive_flag_spec_F77= +enable_shared_with_static_runtimes_F77=no + +# Source file extension for f77 test sources. +ac_ext=f + +# Object file extension for compiled f77 test sources. +objext=o +objext_F77=$objext + +# No sense in running all these tests if we already determined that +# the F77 compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test "$_lt_disable_F77" != yes; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="\ + subroutine t + return + end +" + + # Code to be used in simple link tests + lt_simple_link_test_code="\ + program t + end +" + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + + + # save warnings/boilerplate of simple test code + ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* + + ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* + + + # Allow CC to be a program name with arguments. + lt_save_CC="$CC" + lt_save_GCC=$GCC + lt_save_CFLAGS=$CFLAGS + CC=${F77-"f77"} + CFLAGS=$FFLAGS + compiler=$CC + compiler_F77=$CC + for cc_temp in $compiler""; do + case $cc_temp in + compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; + distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; + \-*) ;; + *) break;; + esac +done +cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` + + GCC=$G77 + if test -n "$compiler"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5 +$as_echo_n "checking if libtool supports shared libraries... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5 +$as_echo "$can_build_shared" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5 +$as_echo_n "checking whether to build shared libraries... " >&6; } + test "$can_build_shared" = "no" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test "$enable_shared" = yes && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + aix[4-9]*) + if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then + test "$enable_shared" = yes && enable_static=no + fi + ;; + esac + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5 +$as_echo "$enable_shared" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5 +$as_echo_n "checking whether to build static libraries... " >&6; } + # Make sure either enable_shared or enable_static is yes. + test "$enable_shared" = yes || enable_static=yes + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5 +$as_echo "$enable_static" >&6; } + + GCC_F77="$G77" + LD_F77="$LD" + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + lt_prog_compiler_wl_F77= +lt_prog_compiler_pic_F77= +lt_prog_compiler_static_F77= + + + if test "$GCC" = yes; then + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_static_F77='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static_F77='-Bstatic' + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + lt_prog_compiler_pic_F77='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the `-m68020' flag to GCC prevents building anything better, + # like `-m68040'. + lt_prog_compiler_pic_F77='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + lt_prog_compiler_pic_F77='-DDLL_EXPORT' + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic_F77='-fno-common' + ;; + + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + lt_prog_compiler_static_F77= + ;; + + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic_F77='-fPIC' + ;; + esac + ;; + + interix[3-9]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + + msdosdjgpp*) + # Just because we use GCC doesn't mean we suddenly get shared libraries + # on systems that don't support them. + lt_prog_compiler_can_build_shared_F77=no + enable_shared=no + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic_F77='-fPIC -shared' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic_F77=-Kconform_pic + fi + ;; + + *) + lt_prog_compiler_pic_F77='-fPIC' + ;; + esac + + case $cc_basename in + nvcc*) # Cuda Compiler Driver 2.2 + lt_prog_compiler_wl_F77='-Xlinker ' + if test -n "$lt_prog_compiler_pic_F77"; then + lt_prog_compiler_pic_F77="-Xcompiler $lt_prog_compiler_pic_F77" + fi + ;; + esac + else + # PORTME Check for flag to pass linker flags through the system compiler. + case $host_os in + aix*) + lt_prog_compiler_wl_F77='-Wl,' + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static_F77='-Bstatic' + else + lt_prog_compiler_static_F77='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + lt_prog_compiler_pic_F77='-DDLL_EXPORT' + ;; + + hpux9* | hpux10* | hpux11*) + lt_prog_compiler_wl_F77='-Wl,' + # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but + # not for PA HP-UX. + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic_F77='+Z' + ;; + esac + # Is there a better lt_prog_compiler_static that works with the bundled CC? + lt_prog_compiler_static_F77='${wl}-a ${wl}archive' + ;; + + irix5* | irix6* | nonstopux*) + lt_prog_compiler_wl_F77='-Wl,' + # PIC (with -KPIC) is the default. + lt_prog_compiler_static_F77='-non_shared' + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu) + case $cc_basename in + # old Intel for x86_64 which still supported -KPIC. + ecc*) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-KPIC' + lt_prog_compiler_static_F77='-static' + ;; + # icc used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + icc* | ifort*) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-fPIC' + lt_prog_compiler_static_F77='-static' + ;; + # Lahey Fortran 8.1. + lf95*) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='--shared' + lt_prog_compiler_static_F77='--static' + ;; + nagfor*) + # NAG Fortran compiler + lt_prog_compiler_wl_F77='-Wl,-Wl,,' + lt_prog_compiler_pic_F77='-PIC' + lt_prog_compiler_static_F77='-Bstatic' + ;; + pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group compilers (*not* the Pentium gcc compiler, + # which looks to be a dead project) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-fpic' + lt_prog_compiler_static_F77='-Bstatic' + ;; + ccc*) + lt_prog_compiler_wl_F77='-Wl,' + # All Alpha code is PIC. + lt_prog_compiler_static_F77='-non_shared' + ;; + xl* | bgxl* | bgf* | mpixl*) + # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-qpic' + lt_prog_compiler_static_F77='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + lt_prog_compiler_pic_F77='-KPIC' + lt_prog_compiler_static_F77='-Bstatic' + lt_prog_compiler_wl_F77='' + ;; + *Sun\ F* | *Sun*Fortran*) + lt_prog_compiler_pic_F77='-KPIC' + lt_prog_compiler_static_F77='-Bstatic' + lt_prog_compiler_wl_F77='-Qoption ld ' + ;; + *Sun\ C*) + # Sun C 5.9 + lt_prog_compiler_pic_F77='-KPIC' + lt_prog_compiler_static_F77='-Bstatic' + lt_prog_compiler_wl_F77='-Wl,' + ;; + *Intel*\ [CF]*Compiler*) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-fPIC' + lt_prog_compiler_static_F77='-static' + ;; + *Portland\ Group*) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-fpic' + lt_prog_compiler_static_F77='-Bstatic' + ;; + esac + ;; + esac + ;; + + newsos6) + lt_prog_compiler_pic_F77='-KPIC' + lt_prog_compiler_static_F77='-Bstatic' + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic_F77='-fPIC -shared' + ;; + + osf3* | osf4* | osf5*) + lt_prog_compiler_wl_F77='-Wl,' + # All OSF/1 code is PIC. + lt_prog_compiler_static_F77='-non_shared' + ;; + + rdos*) + lt_prog_compiler_static_F77='-non_shared' + ;; + + solaris*) + lt_prog_compiler_pic_F77='-KPIC' + lt_prog_compiler_static_F77='-Bstatic' + case $cc_basename in + f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) + lt_prog_compiler_wl_F77='-Qoption ld ';; + *) + lt_prog_compiler_wl_F77='-Wl,';; + esac + ;; + + sunos4*) + lt_prog_compiler_wl_F77='-Qoption ld ' + lt_prog_compiler_pic_F77='-PIC' + lt_prog_compiler_static_F77='-Bstatic' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3*) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-KPIC' + lt_prog_compiler_static_F77='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec ;then + lt_prog_compiler_pic_F77='-Kconform_pic' + lt_prog_compiler_static_F77='-Bstatic' + fi + ;; + + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_pic_F77='-KPIC' + lt_prog_compiler_static_F77='-Bstatic' + ;; + + unicos*) + lt_prog_compiler_wl_F77='-Wl,' + lt_prog_compiler_can_build_shared_F77=no + ;; + + uts4*) + lt_prog_compiler_pic_F77='-pic' + lt_prog_compiler_static_F77='-Bstatic' + ;; + + *) + lt_prog_compiler_can_build_shared_F77=no + ;; + esac + fi + +case $host_os in + # For platforms which do not support PIC, -DPIC is meaningless: + *djgpp*) + lt_prog_compiler_pic_F77= + ;; + *) + lt_prog_compiler_pic_F77="$lt_prog_compiler_pic_F77" + ;; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 +$as_echo_n "checking for $compiler option to produce PIC... " >&6; } +if ${lt_cv_prog_compiler_pic_F77+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_pic_F77=$lt_prog_compiler_pic_F77 +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_F77" >&5 +$as_echo "$lt_cv_prog_compiler_pic_F77" >&6; } +lt_prog_compiler_pic_F77=$lt_cv_prog_compiler_pic_F77 + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$lt_prog_compiler_pic_F77"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_F77 works" >&5 +$as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic_F77 works... " >&6; } +if ${lt_cv_prog_compiler_pic_works_F77+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_pic_works_F77=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$lt_prog_compiler_pic_F77" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_pic_works_F77=yes + fi + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works_F77" >&5 +$as_echo "$lt_cv_prog_compiler_pic_works_F77" >&6; } + +if test x"$lt_cv_prog_compiler_pic_works_F77" = xyes; then + case $lt_prog_compiler_pic_F77 in + "" | " "*) ;; + *) lt_prog_compiler_pic_F77=" $lt_prog_compiler_pic_F77" ;; + esac +else + lt_prog_compiler_pic_F77= + lt_prog_compiler_can_build_shared_F77=no +fi + +fi + + + + + +# +# Check to make sure the static flag actually works. +# +wl=$lt_prog_compiler_wl_F77 eval lt_tmp_static_flag=\"$lt_prog_compiler_static_F77\" +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 +$as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } +if ${lt_cv_prog_compiler_static_works_F77+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_static_works_F77=no + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS $lt_tmp_static_flag" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_static_works_F77=yes + fi + else + lt_cv_prog_compiler_static_works_F77=yes + fi + fi + $RM -r conftest* + LDFLAGS="$save_LDFLAGS" + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works_F77" >&5 +$as_echo "$lt_cv_prog_compiler_static_works_F77" >&6; } + +if test x"$lt_cv_prog_compiler_static_works_F77" = xyes; then + : +else + lt_prog_compiler_static_F77= +fi + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if ${lt_cv_prog_compiler_c_o_F77+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_c_o_F77=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o_F77=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_F77" >&5 +$as_echo "$lt_cv_prog_compiler_c_o_F77" >&6; } + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if ${lt_cv_prog_compiler_c_o_F77+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_c_o_F77=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o_F77=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_F77" >&5 +$as_echo "$lt_cv_prog_compiler_c_o_F77" >&6; } + + + + +hard_links="nottested" +if test "$lt_cv_prog_compiler_c_o_F77" = no && test "$need_locks" != no; then + # do not overwrite the value of need_locks provided by the user + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 +$as_echo_n "checking if we can lock with hard links... " >&6; } + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 +$as_echo "$hard_links" >&6; } + if test "$hard_links" = no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5 +$as_echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;} + need_locks=warn + fi +else + need_locks=no +fi + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 +$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } + + runpath_var= + allow_undefined_flag_F77= + always_export_symbols_F77=no + archive_cmds_F77= + archive_expsym_cmds_F77= + compiler_needs_object_F77=no + enable_shared_with_static_runtimes_F77=no + export_dynamic_flag_spec_F77= + export_symbols_cmds_F77='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + hardcode_automatic_F77=no + hardcode_direct_F77=no + hardcode_direct_absolute_F77=no + hardcode_libdir_flag_spec_F77= + hardcode_libdir_separator_F77= + hardcode_minus_L_F77=no + hardcode_shlibpath_var_F77=unsupported + inherit_rpath_F77=no + link_all_deplibs_F77=unknown + module_cmds_F77= + module_expsym_cmds_F77= + old_archive_from_new_cmds_F77= + old_archive_from_expsyms_cmds_F77= + thread_safe_flag_spec_F77= + whole_archive_flag_spec_F77= + # include_expsyms should be a list of space-separated symbols to be *always* + # included in the symbol list + include_expsyms_F77= + # exclude_expsyms can be an extended regexp of symbols to exclude + # it will be wrapped by ` (' and `)$', so one must not match beginning or + # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', + # as well as any symbol that contains `d'. + exclude_expsyms_F77='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' + # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out + # platforms (ab)use it in PIC code, but their linkers get confused if + # the symbol is explicitly referenced. Since portable code cannot + # rely on this symbol name, it's probably fine to never include it in + # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. + extract_expsyms_cmds= + + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + # FIXME: the MSVC++ port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + if test "$GCC" != yes; then + with_gnu_ld=no + fi + ;; + interix*) + # we just hope/assume this is gcc and not c89 (= MSVC++) + with_gnu_ld=yes + ;; + openbsd*) + with_gnu_ld=no + ;; + esac + + ld_shlibs_F77=yes + + # On some targets, GNU ld is compatible enough with the native linker + # that we're better off using the native interface for both. + lt_use_gnu_ld_interface=no + if test "$with_gnu_ld" = yes; then + case $host_os in + aix*) + # The AIX port of GNU ld has always aspired to compatibility + # with the native linker. However, as the warning in the GNU ld + # block says, versions before 2.19.5* couldn't really create working + # shared libraries, regardless of the interface used. + case `$LD -v 2>&1` in + *\ \(GNU\ Binutils\)\ 2.19.5*) ;; + *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;; + *\ \(GNU\ Binutils\)\ [3-9]*) ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + fi + + if test "$lt_use_gnu_ld_interface" = yes; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='${wl}' + + # Set some defaults for GNU ld with shared library support. These + # are reset later if shared libraries are not supported. Putting them + # here allows them to be overridden if necessary. + runpath_var=LD_RUN_PATH + hardcode_libdir_flag_spec_F77='${wl}-rpath ${wl}$libdir' + export_dynamic_flag_spec_F77='${wl}--export-dynamic' + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + whole_archive_flag_spec_F77="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + else + whole_archive_flag_spec_F77= + fi + supports_anon_versioning=no + case `$LD -v 2>&1` in + *GNU\ gold*) supports_anon_versioning=yes ;; + *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 + *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... + *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... + *\ 2.11.*) ;; # other 2.11 versions + *) supports_anon_versioning=yes ;; + esac + + # See if GNU ld supports shared libraries. + case $host_os in + aix[3-9]*) + # On AIX/PPC, the GNU linker is very broken + if test "$host_cpu" != ia64; then + ld_shlibs_F77=no + cat <<_LT_EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.19, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to install binutils +*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. +*** You will then need to restart the configuration process. + +_LT_EOF + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds_F77='' + ;; + m68k) + archive_cmds_F77='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec_F77='-L$libdir' + hardcode_minus_L_F77=yes + ;; + esac + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + allow_undefined_flag_F77=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + archive_cmds_F77='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + else + ld_shlibs_F77=no + fi + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # _LT_TAGVAR(hardcode_libdir_flag_spec, F77) is actually meaningless, + # as there is no search path for DLLs. + hardcode_libdir_flag_spec_F77='-L$libdir' + export_dynamic_flag_spec_F77='${wl}--export-all-symbols' + allow_undefined_flag_F77=unsupported + always_export_symbols_F77=no + enable_shared_with_static_runtimes_F77=yes + export_symbols_cmds_F77='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' + exclude_expsyms_F77='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file (1st line + # is EXPORTS), use it as is; otherwise, prepend... + archive_expsym_cmds_F77='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + ld_shlibs_F77=no + fi + ;; + + haiku*) + archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + link_all_deplibs_F77=yes + ;; + + interix[3-9]*) + hardcode_direct_F77=no + hardcode_shlibpath_var_F77=no + hardcode_libdir_flag_spec_F77='${wl}-rpath,$libdir' + export_dynamic_flag_spec_F77='${wl}-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + archive_expsym_cmds_F77='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + + gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) + tmp_diet=no + if test "$host_os" = linux-dietlibc; then + case $cc_basename in + diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) + esac + fi + if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ + && test "$tmp_diet" = no + then + tmp_addflag=' $pic_flag' + tmp_sharedflag='-shared' + case $cc_basename,$host_cpu in + pgcc*) # Portland Group C compiler + whole_archive_flag_spec_F77='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + tmp_addflag=' $pic_flag' + ;; + pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group f77 and f90 compilers + whole_archive_flag_spec_F77='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + tmp_addflag=' $pic_flag -Mnomain' ;; + ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 + tmp_addflag=' -i_dynamic' ;; + efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 + tmp_addflag=' -i_dynamic -nofor_main' ;; + ifc* | ifort*) # Intel Fortran compiler + tmp_addflag=' -nofor_main' ;; + lf95*) # Lahey Fortran 8.1 + whole_archive_flag_spec_F77= + tmp_sharedflag='--shared' ;; + xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below) + tmp_sharedflag='-qmkshrobj' + tmp_addflag= ;; + nvcc*) # Cuda Compiler Driver 2.2 + whole_archive_flag_spec_F77='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + compiler_needs_object_F77=yes + ;; + esac + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) # Sun C 5.9 + whole_archive_flag_spec_F77='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + compiler_needs_object_F77=yes + tmp_sharedflag='-G' ;; + *Sun\ F*) # Sun Fortran 8.3 + tmp_sharedflag='-G' ;; + esac + archive_cmds_F77='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + + if test "x$supports_anon_versioning" = xyes; then + archive_expsym_cmds_F77='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' + fi + + case $cc_basename in + xlf* | bgf* | bgxlf* | mpixlf*) + # IBM XL Fortran 10.1 on PPC cannot create shared libs itself + whole_archive_flag_spec_F77='--whole-archive$convenience --no-whole-archive' + hardcode_libdir_flag_spec_F77='${wl}-rpath ${wl}$libdir' + archive_cmds_F77='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' + if test "x$supports_anon_versioning" = xyes; then + archive_expsym_cmds_F77='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' + fi + ;; + esac + else + ld_shlibs_F77=no + fi + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds_F77='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris*) + if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then + ld_shlibs_F77=no + cat <<_LT_EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs_F77=no + fi + ;; + + sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) + case `$LD -v 2>&1` in + *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*) + ld_shlibs_F77=no + cat <<_LT_EOF 1>&2 + +*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not +*** reliably create shared libraries on SCO systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.16.91.0.3 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + ;; + *) + # For security reasons, it is highly recommended that you always + # use absolute paths for naming shared libraries, and exclude the + # DT_RUNPATH tag from executables and libraries. But doing so + # requires that you compile everything twice, which is a pain. + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + hardcode_libdir_flag_spec_F77='${wl}-rpath ${wl}$libdir' + archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs_F77=no + fi + ;; + esac + ;; + + sunos4*) + archive_cmds_F77='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + hardcode_direct_F77=yes + hardcode_shlibpath_var_F77=no + ;; + + *) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs_F77=no + fi + ;; + esac + + if test "$ld_shlibs_F77" = no; then + runpath_var= + hardcode_libdir_flag_spec_F77= + export_dynamic_flag_spec_F77= + whole_archive_flag_spec_F77= + fi + else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + allow_undefined_flag_F77=unsupported + always_export_symbols_F77=yes + archive_expsym_cmds_F77='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + hardcode_minus_L_F77=yes + if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + hardcode_direct_F77=unsupported + fi + ;; + + aix[4-9]*) + if test "$host_cpu" = ia64; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag="" + else + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to AIX nm, but means don't demangle with GNU nm + # Also, AIX nm treats weak defined symbols like other global + # defined symbols, whereas GNU nm marks them as "W". + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + export_symbols_cmds_F77='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + else + export_symbols_cmds_F77='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + fi + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # need to do runtime linking. + case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) + for ld_flag in $LDFLAGS; do + if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then + aix_use_runtimelinking=yes + break + fi + done + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + archive_cmds_F77='' + hardcode_direct_F77=yes + hardcode_direct_absolute_F77=yes + hardcode_libdir_separator_F77=':' + link_all_deplibs_F77=yes + file_list_spec_F77='${wl}-f,' + + if test "$GCC" = yes; then + case $host_os in aix4.[012]|aix4.[012].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`${CC} -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + hardcode_direct_F77=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + hardcode_minus_L_F77=yes + hardcode_libdir_flag_spec_F77='-L$libdir' + hardcode_libdir_separator_F77= + fi + ;; + esac + shared_flag='-shared' + if test "$aix_use_runtimelinking" = yes; then + shared_flag="$shared_flag "'${wl}-G' + fi + else + # not using gcc + if test "$host_cpu" = ia64; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test "$aix_use_runtimelinking" = yes; then + shared_flag='${wl}-G' + else + shared_flag='${wl}-bM:SRE' + fi + fi + fi + + export_dynamic_flag_spec_F77='${wl}-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to export. + always_export_symbols_F77=yes + if test "$aix_use_runtimelinking" = yes; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + allow_undefined_flag_F77='-berok' + # Determine the default libpath from the value encoded in an + # empty executable. + if test "${lt_cv_aix_libpath+set}" = set; then + aix_libpath=$lt_cv_aix_libpath +else + if ${lt_cv_aix_libpath__F77+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat > conftest.$ac_ext <<_ACEOF + program main + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO"; then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath__F77=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath__F77"; then + lt_cv_aix_libpath__F77=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath__F77"; then + lt_cv_aix_libpath__F77="/usr/lib:/lib" + fi + +fi + + aix_libpath=$lt_cv_aix_libpath__F77 +fi + + hardcode_libdir_flag_spec_F77='${wl}-blibpath:$libdir:'"$aix_libpath" + archive_expsym_cmds_F77='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" + else + if test "$host_cpu" = ia64; then + hardcode_libdir_flag_spec_F77='${wl}-R $libdir:/usr/lib:/lib' + allow_undefined_flag_F77="-z nodefs" + archive_expsym_cmds_F77="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + if test "${lt_cv_aix_libpath+set}" = set; then + aix_libpath=$lt_cv_aix_libpath +else + if ${lt_cv_aix_libpath__F77+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat > conftest.$ac_ext <<_ACEOF + program main + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO"; then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath__F77=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath__F77"; then + lt_cv_aix_libpath__F77=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath__F77"; then + lt_cv_aix_libpath__F77="/usr/lib:/lib" + fi + +fi + + aix_libpath=$lt_cv_aix_libpath__F77 +fi + + hardcode_libdir_flag_spec_F77='${wl}-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + no_undefined_flag_F77=' ${wl}-bernotok' + allow_undefined_flag_F77=' ${wl}-berok' + if test "$with_gnu_ld" = yes; then + # We only use this code for GNU lds that support --whole-archive. + whole_archive_flag_spec_F77='${wl}--whole-archive$convenience ${wl}--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + whole_archive_flag_spec_F77='$convenience' + fi + archive_cmds_need_lc_F77=yes + # This is similar to how AIX traditionally builds its shared libraries. + archive_expsym_cmds_F77="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' + fi + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds_F77='' + ;; + m68k) + archive_cmds_F77='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec_F77='-L$libdir' + hardcode_minus_L_F77=yes + ;; + esac + ;; + + bsdi[45]*) + export_dynamic_flag_spec_F77=-rdynamic + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + case $cc_basename in + cl*) + # Native MSVC + hardcode_libdir_flag_spec_F77=' ' + allow_undefined_flag_F77=unsupported + always_export_symbols_F77=yes + file_list_spec_F77='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + archive_cmds_F77='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' + archive_expsym_cmds_F77='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; + else + sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, F77)='true' + enable_shared_with_static_runtimes_F77=yes + exclude_expsyms_F77='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + export_symbols_cmds_F77='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' + # Don't use ranlib + old_postinstall_cmds_F77='chmod 644 $oldlib' + postlink_cmds_F77='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile="$lt_outputfile.exe" + lt_tool_outputfile="$lt_tool_outputfile.exe" + ;; + esac~ + if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # Assume MSVC wrapper + hardcode_libdir_flag_spec_F77=' ' + allow_undefined_flag_F77=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + archive_cmds_F77='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + old_archive_from_new_cmds_F77='true' + # FIXME: Should let the user specify the lib program. + old_archive_cmds_F77='lib -OUT:$oldlib$oldobjs$old_deplibs' + enable_shared_with_static_runtimes_F77=yes + ;; + esac + ;; + + darwin* | rhapsody*) + + + archive_cmds_need_lc_F77=no + hardcode_direct_F77=no + hardcode_automatic_F77=yes + hardcode_shlibpath_var_F77=unsupported + if test "$lt_cv_ld_force_load" = "yes"; then + whole_archive_flag_spec_F77='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + compiler_needs_object_F77=yes + else + whole_archive_flag_spec_F77='' + fi + link_all_deplibs_F77=yes + allow_undefined_flag_F77="$_lt_dar_allow_undefined" + case $cc_basename in + ifort*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test "$_lt_dar_can_shared" = "yes"; then + output_verbose_link_cmd=func_echo_all + archive_cmds_F77="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}" + module_cmds_F77="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}" + archive_expsym_cmds_F77="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}" + module_expsym_cmds_F77="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}" + + else + ld_shlibs_F77=no + fi + + ;; + + dgux*) + archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec_F77='-L$libdir' + hardcode_shlibpath_var_F77=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + archive_cmds_F77='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + hardcode_libdir_flag_spec_F77='-R$libdir' + hardcode_direct_F77=yes + hardcode_shlibpath_var_F77=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2.*) + archive_cmds_F77='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct_F77=yes + hardcode_minus_L_F77=yes + hardcode_shlibpath_var_F77=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd* | dragonfly*) + archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec_F77='-R$libdir' + hardcode_direct_F77=yes + hardcode_shlibpath_var_F77=no + ;; + + hpux9*) + if test "$GCC" = yes; then + archive_cmds_F77='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + else + archive_cmds_F77='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + fi + hardcode_libdir_flag_spec_F77='${wl}+b ${wl}$libdir' + hardcode_libdir_separator_F77=: + hardcode_direct_F77=yes + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L_F77=yes + export_dynamic_flag_spec_F77='${wl}-E' + ;; + + hpux10*) + if test "$GCC" = yes && test "$with_gnu_ld" = no; then + archive_cmds_F77='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds_F77='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' + fi + if test "$with_gnu_ld" = no; then + hardcode_libdir_flag_spec_F77='${wl}+b ${wl}$libdir' + hardcode_libdir_separator_F77=: + hardcode_direct_F77=yes + hardcode_direct_absolute_F77=yes + export_dynamic_flag_spec_F77='${wl}-E' + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L_F77=yes + fi + ;; + + hpux11*) + if test "$GCC" = yes && test "$with_gnu_ld" = no; then + case $host_cpu in + hppa*64*) + archive_cmds_F77='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds_F77='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + archive_cmds_F77='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + else + case $host_cpu in + hppa*64*) + archive_cmds_F77='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds_F77='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + archive_cmds_F77='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + fi + if test "$with_gnu_ld" = no; then + hardcode_libdir_flag_spec_F77='${wl}+b ${wl}$libdir' + hardcode_libdir_separator_F77=: + + case $host_cpu in + hppa*64*|ia64*) + hardcode_direct_F77=no + hardcode_shlibpath_var_F77=no + ;; + *) + hardcode_direct_F77=yes + hardcode_direct_absolute_F77=yes + export_dynamic_flag_spec_F77='${wl}-E' + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L_F77=yes + ;; + esac + fi + ;; + + irix5* | irix6* | nonstopux*) + if test "$GCC" = yes; then + archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + # Try to use the -exported_symbol ld option, if it does not + # work, assume that -exports_file does not work either and + # implicitly export all symbols. + # This should be the same for all languages, so no per-tag cache variable. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5 +$as_echo_n "checking whether the $host_os linker accepts -exported_symbol... " >&6; } +if ${lt_cv_irix_exported_symbol+:} false; then : + $as_echo_n "(cached) " >&6 +else + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null" + cat > conftest.$ac_ext <<_ACEOF + + subroutine foo + end +_ACEOF +if ac_fn_f77_try_link "$LINENO"; then : + lt_cv_irix_exported_symbol=yes +else + lt_cv_irix_exported_symbol=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS="$save_LDFLAGS" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5 +$as_echo "$lt_cv_irix_exported_symbol" >&6; } + if test "$lt_cv_irix_exported_symbol" = yes; then + archive_expsym_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib' + fi + else + archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + archive_expsym_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib' + fi + archive_cmds_need_lc_F77='no' + hardcode_libdir_flag_spec_F77='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator_F77=: + inherit_rpath_F77=yes + link_all_deplibs_F77=yes + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds_F77='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + archive_cmds_F77='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + hardcode_libdir_flag_spec_F77='-R$libdir' + hardcode_direct_F77=yes + hardcode_shlibpath_var_F77=no + ;; + + newsos6) + archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct_F77=yes + hardcode_libdir_flag_spec_F77='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator_F77=: + hardcode_shlibpath_var_F77=no + ;; + + *nto* | *qnx*) + ;; + + openbsd*) + if test -f /usr/libexec/ld.so; then + hardcode_direct_F77=yes + hardcode_shlibpath_var_F77=no + hardcode_direct_absolute_F77=yes + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols' + hardcode_libdir_flag_spec_F77='${wl}-rpath,$libdir' + export_dynamic_flag_spec_F77='${wl}-E' + else + case $host_os in + openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*) + archive_cmds_F77='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec_F77='-R$libdir' + ;; + *) + archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec_F77='${wl}-rpath,$libdir' + ;; + esac + fi + else + ld_shlibs_F77=no + fi + ;; + + os2*) + hardcode_libdir_flag_spec_F77='-L$libdir' + hardcode_minus_L_F77=yes + allow_undefined_flag_F77=unsupported + archive_cmds_F77='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' + old_archive_from_new_cmds_F77='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' + ;; + + osf3*) + if test "$GCC" = yes; then + allow_undefined_flag_F77=' ${wl}-expect_unresolved ${wl}\*' + archive_cmds_F77='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + else + allow_undefined_flag_F77=' -expect_unresolved \*' + archive_cmds_F77='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + fi + archive_cmds_need_lc_F77='no' + hardcode_libdir_flag_spec_F77='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator_F77=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test "$GCC" = yes; then + allow_undefined_flag_F77=' ${wl}-expect_unresolved ${wl}\*' + archive_cmds_F77='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + hardcode_libdir_flag_spec_F77='${wl}-rpath ${wl}$libdir' + else + allow_undefined_flag_F77=' -expect_unresolved \*' + archive_cmds_F77='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + archive_expsym_cmds_F77='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ + $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp' + + # Both c and cxx compiler support -rpath directly + hardcode_libdir_flag_spec_F77='-rpath $libdir' + fi + archive_cmds_need_lc_F77='no' + hardcode_libdir_separator_F77=: + ;; + + solaris*) + no_undefined_flag_F77=' -z defs' + if test "$GCC" = yes; then + wlarc='${wl}' + archive_cmds_F77='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_F77='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + else + case `$CC -V 2>&1` in + *"Compilers 5.0"*) + wlarc='' + archive_cmds_F77='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' + archive_expsym_cmds_F77='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' + ;; + *) + wlarc='${wl}' + archive_cmds_F77='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_F77='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + ;; + esac + fi + hardcode_libdir_flag_spec_F77='-R$libdir' + hardcode_shlibpath_var_F77=no + case $host_os in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands `-z linker_flag'. GCC discards it without `$wl', + # but is careful enough not to reorder. + # Supported since Solaris 2.6 (maybe 2.5.1?) + if test "$GCC" = yes; then + whole_archive_flag_spec_F77='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' + else + whole_archive_flag_spec_F77='-z allextract$convenience -z defaultextract' + fi + ;; + esac + link_all_deplibs_F77=yes + ;; + + sunos4*) + if test "x$host_vendor" = xsequent; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + archive_cmds_F77='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds_F77='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + hardcode_libdir_flag_spec_F77='-L$libdir' + hardcode_direct_F77=yes + hardcode_minus_L_F77=yes + hardcode_shlibpath_var_F77=no + ;; + + sysv4) + case $host_vendor in + sni) + archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct_F77=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + archive_cmds_F77='$LD -G -o $lib $libobjs $deplibs $linker_flags' + reload_cmds_F77='$CC -r -o $output$reload_objs' + hardcode_direct_F77=no + ;; + motorola) + archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct_F77=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + hardcode_shlibpath_var_F77=no + ;; + + sysv4.3*) + archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var_F77=no + export_dynamic_flag_spec_F77='-Bexport' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var_F77=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + ld_shlibs_F77=yes + fi + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) + no_undefined_flag_F77='${wl}-z,text' + archive_cmds_need_lc_F77=no + hardcode_shlibpath_var_F77=no + runpath_var='LD_RUN_PATH' + + if test "$GCC" = yes; then + archive_cmds_F77='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_F77='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds_F77='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_F77='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We can NOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + no_undefined_flag_F77='${wl}-z,text' + allow_undefined_flag_F77='${wl}-z,nodefs' + archive_cmds_need_lc_F77=no + hardcode_shlibpath_var_F77=no + hardcode_libdir_flag_spec_F77='${wl}-R,$libdir' + hardcode_libdir_separator_F77=':' + link_all_deplibs_F77=yes + export_dynamic_flag_spec_F77='${wl}-Bexport' + runpath_var='LD_RUN_PATH' + + if test "$GCC" = yes; then + archive_cmds_F77='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_F77='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds_F77='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_F77='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + uts4*) + archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec_F77='-L$libdir' + hardcode_shlibpath_var_F77=no + ;; + + *) + ld_shlibs_F77=no + ;; + esac + + if test x$host_vendor = xsni; then + case $host in + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + export_dynamic_flag_spec_F77='${wl}-Blargedynsym' + ;; + esac + fi + fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_F77" >&5 +$as_echo "$ld_shlibs_F77" >&6; } +test "$ld_shlibs_F77" = no && can_build_shared=no + +with_gnu_ld_F77=$with_gnu_ld + + + + + + +# +# Do we need to explicitly link libc? +# +case "x$archive_cmds_need_lc_F77" in +x|xyes) + # Assume -lc should be added + archive_cmds_need_lc_F77=yes + + if test "$enable_shared" = yes && test "$GCC" = yes; then + case $archive_cmds_F77 in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 +$as_echo_n "checking whether -lc should be explicitly linked in... " >&6; } +if ${lt_cv_archive_cmds_need_lc_F77+:} false; then : + $as_echo_n "(cached) " >&6 +else + $RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$lt_prog_compiler_wl_F77 + pic_flag=$lt_prog_compiler_pic_F77 + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$allow_undefined_flag_F77 + allow_undefined_flag_F77= + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds_F77 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 + (eval $archive_cmds_F77 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + then + lt_cv_archive_cmds_need_lc_F77=no + else + lt_cv_archive_cmds_need_lc_F77=yes + fi + allow_undefined_flag_F77=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc_F77" >&5 +$as_echo "$lt_cv_archive_cmds_need_lc_F77" >&6; } + archive_cmds_need_lc_F77=$lt_cv_archive_cmds_need_lc_F77 + ;; + esac + fi + ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 +$as_echo_n "checking dynamic linker characteristics... " >&6; } + +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=".so" +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + +case $host_os in +aix3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='${libname}${release}${shared_ext}$major' + ;; + +aix[4-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test "$host_cpu" = ia64; then + # AIX 5 supports IA64 + library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line `#! .'. This would cause the generated library to + # depend on `.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[01] | aix4.[01].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # AIX (on Power*) has no versioning support, so currently we can not hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + if test "$aix_use_runtimelinking" = yes; then + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + else + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='${libname}${release}.a $libname.a' + soname_spec='${libname}${release}${shared_ext}$major' + fi + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='${libname}${shared_ext}' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[45]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=".dll" + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \${file}`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl*) + # Native MSVC + libname_spec='$name' + soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' + library_names_spec='${libname}.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec="$LIB" + if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \${file}`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC wrapper + library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext' + soname_spec='${libname}${release}${major}$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' + + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd* | dragonfly*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[23].*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2.*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[01]* | freebsdelf3.[01]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ + freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +gnu*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +haiku*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=yes + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + if test "X$HPUX_IA64_MODE" = X32; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + fi + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[3-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test "$lt_cv_prog_gnu_ld" = yes; then + version_type=linux # correct to gnu/linux during the next big refactor + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}${shared_ext}$major' + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" + sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + if ${lt_cv_shlibpath_overrides_runpath+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$lt_prog_compiler_wl_F77\"; \ + LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec_F77\"" + cat > conftest.$ac_ext <<_ACEOF + program main + + end +_ACEOF +if ac_fn_f77_try_link "$LINENO"; then : + if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null; then : + lt_cv_shlibpath_overrides_runpath=yes +fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + +fi + + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Append ld.so.conf contents to the search path + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd*) + version_type=sunos + sys_lib_dlsearch_path_spec="/usr/lib" + need_lib_prefix=no + # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. + case $host_os in + openbsd3.3 | openbsd3.3.*) need_version=yes ;; + *) need_version=no ;; + esac + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + case $host_os in + openbsd2.[89] | openbsd2.[89].*) + shlibpath_overrides_runpath=no + ;; + *) + shlibpath_overrides_runpath=yes + ;; + esac + else + shlibpath_overrides_runpath=yes + fi + ;; + +os2*) + libname_spec='$name' + shrext_cmds=".dll" + need_lib_prefix=no + library_names_spec='$libname${shared_ext} $libname.a' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=LIBPATH + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}${shared_ext}$major' + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test "$with_gnu_ld" = yes; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec ;then + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' + soname_spec='$libname${shared_ext}.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=freebsd-elf + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test "$with_gnu_ld" = yes; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 +$as_echo "$dynamic_linker" >&6; } +test "$dynamic_linker" = no && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test "$GCC" = yes; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then + sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec" +fi +if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then + sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec" +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 +$as_echo_n "checking how to hardcode library paths into programs... " >&6; } +hardcode_action_F77= +if test -n "$hardcode_libdir_flag_spec_F77" || + test -n "$runpath_var_F77" || + test "X$hardcode_automatic_F77" = "Xyes" ; then + + # We can hardcode non-existent directories. + if test "$hardcode_direct_F77" != no && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test "$_LT_TAGVAR(hardcode_shlibpath_var, F77)" != no && + test "$hardcode_minus_L_F77" != no; then + # Linking always hardcodes the temporary library directory. + hardcode_action_F77=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + hardcode_action_F77=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + hardcode_action_F77=unsupported +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hardcode_action_F77" >&5 +$as_echo "$hardcode_action_F77" >&6; } + +if test "$hardcode_action_F77" = relink || + test "$inherit_rpath_F77" = yes; then + # Fast installation is not supported + enable_fast_install=no +elif test "$shlibpath_overrides_runpath" = yes || + test "$enable_shared" = no; then + # Fast installation is not necessary + enable_fast_install=needless +fi + + + + + + + + fi # test -n "$compiler" + + GCC=$lt_save_GCC + CC="$lt_save_CC" + CFLAGS="$lt_save_CFLAGS" +fi # test "$_lt_disable_F77" != yes + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + if test -z "$F77"; then + enable_fortran=no + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: *** Couldn't find f77 compiler; using default Fortran wrappers." >&5 +$as_echo "$as_me: WARNING: *** Couldn't find f77 compiler; using default Fortran wrappers." >&2;} + else + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to get verbose linking output from $F77" >&5 +$as_echo_n "checking how to get verbose linking output from $F77... " >&6; } +if ${ac_cv_prog_f77_v+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat > conftest.$ac_ext <<_ACEOF + program main + + end +_ACEOF +if ac_fn_f77_try_compile "$LINENO"; then : + ac_cv_prog_f77_v= +# Try some options frequently used verbose output +for ac_verb in -v -verbose --verbose -V -\#\#\#; do + cat > conftest.$ac_ext <<_ACEOF + program main + + end +_ACEOF + +# Compile and link our simple test program by passing a flag (argument +# 1 to this macro) to the Fortran compiler in order to get +# "verbose" output that we can then parse for the Fortran linker +# flags. +ac_save_FFLAGS=$FFLAGS +FFLAGS="$FFLAGS $ac_verb" +eval "set x $ac_link" +shift +$as_echo "$as_me:${as_lineno-$LINENO}: $*" >&5 +# gfortran 4.3 outputs lines setting COLLECT_GCC_OPTIONS, COMPILER_PATH, +# LIBRARY_PATH; skip all such settings. +ac_f77_v_output=`eval $ac_link 5>&1 2>&1 | + sed '/^Driving:/d; /^Configured with:/d; + '"/^[_$as_cr_Letters][_$as_cr_alnum]*=/d"` +$as_echo "$ac_f77_v_output" >&5 +FFLAGS=$ac_save_FFLAGS + +rm -rf conftest* + +# On HP/UX there is a line like: "LPATH is: /foo:/bar:/baz" where +# /foo, /bar, and /baz are search directories for the Fortran linker. +# Here, we change these into -L/foo -L/bar -L/baz (and put it first): +ac_f77_v_output="`echo $ac_f77_v_output | + grep 'LPATH is:' | + sed 's|.*LPATH is\(: *[^ ]*\).*|\1|;s|: */| -L/|g'` $ac_f77_v_output" + +# FIXME: we keep getting bitten by quoted arguments; a more general fix +# that detects unbalanced quotes in FLIBS should be implemented +# and (ugh) tested at some point. +case $ac_f77_v_output in + # With xlf replace commas with spaces, + # and remove "-link" and closing parenthesis. + *xlfentry*) + ac_f77_v_output=`echo $ac_f77_v_output | + sed ' + s/,/ /g + s/ -link / /g + s/) *$// + ' + ` ;; + + # With Intel ifc, ignore the quoted -mGLOB_options_string stuff (quoted + # $LIBS confuse us, and the libraries appear later in the output anyway). + *mGLOB_options_string*) + ac_f77_v_output=`echo $ac_f77_v_output | sed 's/"-mGLOB[^"]*"/ /g'` ;; + + # Portland Group compiler has singly- or doubly-quoted -cmdline argument + # Singly-quoted arguments were reported for versions 5.2-4 and 6.0-4. + # Doubly-quoted arguments were reported for "PGF90/x86 Linux/x86 5.0-2". + *-cmdline\ * | *-ignore\ * | *-def\ *) + ac_f77_v_output=`echo $ac_f77_v_output | sed "\ + s/-cmdline *'[^']*'/ /g; s/-cmdline *\"[^\"]*\"/ /g + s/-ignore *'[^']*'/ /g; s/-ignore *\"[^\"]*\"/ /g + s/-def *'[^']*'/ /g; s/-def *\"[^\"]*\"/ /g"` ;; + + # If we are using fort77 (the f2c wrapper) then filter output and delete quotes. + *fort77*f2c*gcc*) + ac_f77_v_output=`echo "$ac_f77_v_output" | sed -n ' + /:[ ]\+Running[ ]\{1,\}"gcc"/{ + /"-c"/d + /[.]c"*/d + s/^.*"gcc"/"gcc"/ + s/"//gp + }'` ;; + + # If we are using Cray Fortran then delete quotes. + *cft90*) + ac_f77_v_output=`echo $ac_f77_v_output | sed 's/"//g'` ;; +esac + + + # look for -l* and *.a constructs in the output + for ac_arg in $ac_f77_v_output; do + case $ac_arg in + [\\/]*.a | ?:[\\/]*.a | -[lLRu]*) + ac_cv_prog_f77_v=$ac_verb + break 2 ;; + esac + done +done +if test -z "$ac_cv_prog_f77_v"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cannot determine how to obtain linking information from $F77" >&5 +$as_echo "$as_me: WARNING: cannot determine how to obtain linking information from $F77" >&2;} +fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: compilation failed" >&5 +$as_echo "$as_me: WARNING: compilation failed" >&2;} +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_f77_v" >&5 +$as_echo "$ac_cv_prog_f77_v" >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Fortran 77 libraries of $F77" >&5 +$as_echo_n "checking for Fortran 77 libraries of $F77... " >&6; } +if ${ac_cv_f77_libs+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "x$FLIBS" != "x"; then + ac_cv_f77_libs="$FLIBS" # Let the user override the test. +else + +cat > conftest.$ac_ext <<_ACEOF + program main + + end +_ACEOF + +# Compile and link our simple test program by passing a flag (argument +# 1 to this macro) to the Fortran compiler in order to get +# "verbose" output that we can then parse for the Fortran linker +# flags. +ac_save_FFLAGS=$FFLAGS +FFLAGS="$FFLAGS $ac_cv_prog_f77_v" +eval "set x $ac_link" +shift +$as_echo "$as_me:${as_lineno-$LINENO}: $*" >&5 +# gfortran 4.3 outputs lines setting COLLECT_GCC_OPTIONS, COMPILER_PATH, +# LIBRARY_PATH; skip all such settings. +ac_f77_v_output=`eval $ac_link 5>&1 2>&1 | + sed '/^Driving:/d; /^Configured with:/d; + '"/^[_$as_cr_Letters][_$as_cr_alnum]*=/d"` +$as_echo "$ac_f77_v_output" >&5 +FFLAGS=$ac_save_FFLAGS + +rm -rf conftest* + +# On HP/UX there is a line like: "LPATH is: /foo:/bar:/baz" where +# /foo, /bar, and /baz are search directories for the Fortran linker. +# Here, we change these into -L/foo -L/bar -L/baz (and put it first): +ac_f77_v_output="`echo $ac_f77_v_output | + grep 'LPATH is:' | + sed 's|.*LPATH is\(: *[^ ]*\).*|\1|;s|: */| -L/|g'` $ac_f77_v_output" + +# FIXME: we keep getting bitten by quoted arguments; a more general fix +# that detects unbalanced quotes in FLIBS should be implemented +# and (ugh) tested at some point. +case $ac_f77_v_output in + # With xlf replace commas with spaces, + # and remove "-link" and closing parenthesis. + *xlfentry*) + ac_f77_v_output=`echo $ac_f77_v_output | + sed ' + s/,/ /g + s/ -link / /g + s/) *$// + ' + ` ;; + + # With Intel ifc, ignore the quoted -mGLOB_options_string stuff (quoted + # $LIBS confuse us, and the libraries appear later in the output anyway). + *mGLOB_options_string*) + ac_f77_v_output=`echo $ac_f77_v_output | sed 's/"-mGLOB[^"]*"/ /g'` ;; + + # Portland Group compiler has singly- or doubly-quoted -cmdline argument + # Singly-quoted arguments were reported for versions 5.2-4 and 6.0-4. + # Doubly-quoted arguments were reported for "PGF90/x86 Linux/x86 5.0-2". + *-cmdline\ * | *-ignore\ * | *-def\ *) + ac_f77_v_output=`echo $ac_f77_v_output | sed "\ + s/-cmdline *'[^']*'/ /g; s/-cmdline *\"[^\"]*\"/ /g + s/-ignore *'[^']*'/ /g; s/-ignore *\"[^\"]*\"/ /g + s/-def *'[^']*'/ /g; s/-def *\"[^\"]*\"/ /g"` ;; + + # If we are using fort77 (the f2c wrapper) then filter output and delete quotes. + *fort77*f2c*gcc*) + ac_f77_v_output=`echo "$ac_f77_v_output" | sed -n ' + /:[ ]\+Running[ ]\{1,\}"gcc"/{ + /"-c"/d + /[.]c"*/d + s/^.*"gcc"/"gcc"/ + s/"//gp + }'` ;; + + # If we are using Cray Fortran then delete quotes. + *cft90*) + ac_f77_v_output=`echo $ac_f77_v_output | sed 's/"//g'` ;; +esac + + + +ac_cv_f77_libs= + +# Save positional arguments (if any) +ac_save_positional="$@" + +set X $ac_f77_v_output +while test $# != 1; do + shift + ac_arg=$1 + case $ac_arg in + [\\/]*.a | ?:[\\/]*.a) + ac_exists=false + for ac_i in $ac_cv_f77_libs; do + if test x"$ac_arg" = x"$ac_i"; then + ac_exists=true + break + fi + done + + if test x"$ac_exists" = xtrue; then : + +else + ac_cv_f77_libs="$ac_cv_f77_libs $ac_arg" +fi + ;; + -bI:*) + ac_exists=false + for ac_i in $ac_cv_f77_libs; do + if test x"$ac_arg" = x"$ac_i"; then + ac_exists=true + break + fi + done + + if test x"$ac_exists" = xtrue; then : + +else + if test "$ac_compiler_gnu" = yes; then + for ac_link_opt in $ac_arg; do + ac_cv_f77_libs="$ac_cv_f77_libs -Xlinker $ac_link_opt" + done +else + ac_cv_f77_libs="$ac_cv_f77_libs $ac_arg" +fi +fi + ;; + # Ignore these flags. + -lang* | -lcrt*.o | -lc | -lgcc* | -lSystem | -libmil | -little \ + |-LANG:=* | -LIST:* | -LNO:* | -link) + ;; + -lkernel32) + case $host_os in + *cygwin*) ;; + *) ac_cv_f77_libs="$ac_cv_f77_libs $ac_arg" + ;; + esac + ;; + -[LRuYz]) + # These flags, when seen by themselves, take an argument. + # We remove the space between option and argument and re-iterate + # unless we find an empty arg or a new option (starting with -) + case $2 in + "" | -*);; + *) + ac_arg="$ac_arg$2" + shift; shift + set X $ac_arg "$@" + ;; + esac + ;; + -YP,*) + for ac_j in `$as_echo "$ac_arg" | sed -e 's/-YP,/-L/;s/:/ -L/g'`; do + ac_exists=false + for ac_i in $ac_cv_f77_libs; do + if test x"$ac_j" = x"$ac_i"; then + ac_exists=true + break + fi + done + + if test x"$ac_exists" = xtrue; then : + +else + ac_arg="$ac_arg $ac_j" + ac_cv_f77_libs="$ac_cv_f77_libs $ac_j" +fi + done + ;; + -[lLR]*) + ac_exists=false + for ac_i in $ac_cv_f77_libs; do + if test x"$ac_arg" = x"$ac_i"; then + ac_exists=true + break + fi + done + + if test x"$ac_exists" = xtrue; then : + +else + ac_cv_f77_libs="$ac_cv_f77_libs $ac_arg" +fi + ;; + -zallextract*| -zdefaultextract) + ac_cv_f77_libs="$ac_cv_f77_libs $ac_arg" + ;; + # Ignore everything else. + esac +done +# restore positional arguments +set X $ac_save_positional; shift + +# We only consider "LD_RUN_PATH" on Solaris systems. If this is seen, +# then we insist that the "run path" must be an absolute path (i.e. it +# must begin with a "/"). +case `(uname -sr) 2>/dev/null` in + "SunOS 5"*) + ac_ld_run_path=`$as_echo "$ac_f77_v_output" | + sed -n 's,^.*LD_RUN_PATH *= *\(/[^ ]*\).*$,-R\1,p'` + test "x$ac_ld_run_path" != x && + if test "$ac_compiler_gnu" = yes; then + for ac_link_opt in $ac_ld_run_path; do + ac_cv_f77_libs="$ac_cv_f77_libs -Xlinker $ac_link_opt" + done +else + ac_cv_f77_libs="$ac_cv_f77_libs $ac_ld_run_path" +fi + ;; +esac +fi # test "x$[]_AC_LANG_PREFIX[]LIBS" = "x" + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_f77_libs" >&5 +$as_echo "$ac_cv_f77_libs" >&6; } +FLIBS="$ac_cv_f77_libs" + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dummy main to link with Fortran 77 libraries" >&5 +$as_echo_n "checking for dummy main to link with Fortran 77 libraries... " >&6; } +if ${ac_cv_f77_dummy_main+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_f77_dm_save_LIBS=$LIBS + LIBS="$LIBS $FLIBS" + ac_fortran_dm_var=F77_DUMMY_MAIN + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + # First, try linking without a dummy main: + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifdef F77_DUMMY_MAIN + +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } + +#endif +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_fortran_dummy_main=none +else + ac_cv_fortran_dummy_main=unknown +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + + if test $ac_cv_fortran_dummy_main = unknown; then + for ac_func in MAIN__ MAIN_ __main MAIN _MAIN __MAIN main_ main__ _main; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#define $ac_fortran_dm_var $ac_func +#ifdef F77_DUMMY_MAIN + +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } + +#endif +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_fortran_dummy_main=$ac_func; break +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + done + fi + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + ac_cv_f77_dummy_main=$ac_cv_fortran_dummy_main + rm -rf conftest* + LIBS=$ac_f77_dm_save_LIBS + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_f77_dummy_main" >&5 +$as_echo "$ac_cv_f77_dummy_main" >&6; } +F77_DUMMY_MAIN=$ac_cv_f77_dummy_main +if test "$F77_DUMMY_MAIN" != unknown; then : + if test $F77_DUMMY_MAIN != none; then + +cat >>confdefs.h <<_ACEOF +#define F77_DUMMY_MAIN $F77_DUMMY_MAIN +_ACEOF + + if test "x$ac_cv_fc_dummy_main" = "x$ac_cv_f77_dummy_main"; then + +$as_echo "#define FC_DUMMY_MAIN_EQ_F77 1" >>confdefs.h + + fi +fi +else + enable_fortran=no + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: *** Couldn't figure out how to link C and Fortran; using default Fortran wrappers." >&5 +$as_echo "$as_me: WARNING: *** Couldn't figure out how to link C and Fortran; using default Fortran wrappers." >&2;} +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + fi +else + +$as_echo "#define DISABLE_FORTRAN 1" >>confdefs.h + +fi + +if test "x$enable_fortran" = xyes; then + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Fortran 77 name-mangling scheme" >&5 +$as_echo_n "checking for Fortran 77 name-mangling scheme... " >&6; } +if ${ac_cv_f77_mangling+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat > conftest.$ac_ext <<_ACEOF + subroutine foobar() + return + end + subroutine foo_bar() + return + end +_ACEOF +if ac_fn_f77_try_compile "$LINENO"; then : + mv conftest.$ac_objext cfortran_test.$ac_objext + + ac_save_LIBS=$LIBS + LIBS="cfortran_test.$ac_objext $LIBS $FLIBS" + + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + ac_success=no + for ac_foobar in foobar FOOBAR; do + for ac_underscore in "" "_"; do + ac_func="$ac_foobar$ac_underscore" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +#ifdef F77_DUMMY_MAIN + +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } + +#endif +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_success=yes; break 2 +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + done + done + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + if test "$ac_success" = "yes"; then + case $ac_foobar in + foobar) + ac_case=lower + ac_foo_bar=foo_bar + ;; + FOOBAR) + ac_case=upper + ac_foo_bar=FOO_BAR + ;; + esac + + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + ac_success_extra=no + for ac_extra in "" "_"; do + ac_func="$ac_foo_bar$ac_underscore$ac_extra" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +#ifdef F77_DUMMY_MAIN + +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } + +#endif +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_success_extra=yes; break +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + done + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu + + if test "$ac_success_extra" = "yes"; then + ac_cv_f77_mangling="$ac_case case" + if test -z "$ac_underscore"; then + ac_cv_f77_mangling="$ac_cv_f77_mangling, no underscore" + else + ac_cv_f77_mangling="$ac_cv_f77_mangling, underscore" + fi + if test -z "$ac_extra"; then + ac_cv_f77_mangling="$ac_cv_f77_mangling, no extra underscore" + else + ac_cv_f77_mangling="$ac_cv_f77_mangling, extra underscore" + fi + else + ac_cv_f77_mangling="unknown" + fi + else + ac_cv_f77_mangling="unknown" + fi + + LIBS=$ac_save_LIBS + rm -rf conftest* + rm -f cfortran_test* +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compile a simple Fortran program +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_f77_mangling" >&5 +$as_echo "$ac_cv_f77_mangling" >&6; } + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu +case $ac_cv_f77_mangling in + "lower case, no underscore, no extra underscore") + $as_echo "#define F77_FUNC(name,NAME) name" >>confdefs.h + + $as_echo "#define F77_FUNC_(name,NAME) name" >>confdefs.h + ;; + "lower case, no underscore, extra underscore") + $as_echo "#define F77_FUNC(name,NAME) name" >>confdefs.h + + $as_echo "#define F77_FUNC_(name,NAME) name ## _" >>confdefs.h + ;; + "lower case, underscore, no extra underscore") + $as_echo "#define F77_FUNC(name,NAME) name ## _" >>confdefs.h + + $as_echo "#define F77_FUNC_(name,NAME) name ## _" >>confdefs.h + ;; + "lower case, underscore, extra underscore") + $as_echo "#define F77_FUNC(name,NAME) name ## _" >>confdefs.h + + $as_echo "#define F77_FUNC_(name,NAME) name ## __" >>confdefs.h + ;; + "upper case, no underscore, no extra underscore") + $as_echo "#define F77_FUNC(name,NAME) NAME" >>confdefs.h + + $as_echo "#define F77_FUNC_(name,NAME) NAME" >>confdefs.h + ;; + "upper case, no underscore, extra underscore") + $as_echo "#define F77_FUNC(name,NAME) NAME" >>confdefs.h + + $as_echo "#define F77_FUNC_(name,NAME) NAME ## _" >>confdefs.h + ;; + "upper case, underscore, no extra underscore") + $as_echo "#define F77_FUNC(name,NAME) NAME ## _" >>confdefs.h + + $as_echo "#define F77_FUNC_(name,NAME) NAME ## _" >>confdefs.h + ;; + "upper case, underscore, extra underscore") + $as_echo "#define F77_FUNC(name,NAME) NAME ## _" >>confdefs.h + + $as_echo "#define F77_FUNC_(name,NAME) NAME ## __" >>confdefs.h + ;; + *) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unknown Fortran name-mangling scheme" >&5 +$as_echo "$as_me: WARNING: unknown Fortran name-mangling scheme" >&2;} + ;; +esac + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu +case $ac_cv_f77_mangling in + upper*) ac_val="F77FOO" ;; + lower*) ac_val="f77foo" ;; + *) ac_val="unknown" ;; +esac +case $ac_cv_f77_mangling in *," underscore"*) ac_val="$ac_val"_ ;; esac + +f77foo="$ac_val" + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu +case $ac_cv_f77_mangling in + upper*) ac_val="F77_FOO" ;; + lower*) ac_val="f77_foo" ;; + *) ac_val="unknown" ;; +esac +case $ac_cv_f77_mangling in *," underscore"*) ac_val="$ac_val"_ ;; esac +case $ac_cv_f77_mangling in *," extra underscore"*) ac_val="$ac_val"_ ;; esac + +f77_foo="$ac_val" + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + f77_foo2=`echo $f77foo | sed 's/77/77_/'` + if test "$f77_foo" = "$f77_foo2"; then + +$as_echo "#define F77_FUNC_EQUIV 1" >>confdefs.h + + + # Include g77 wrappers by default for GNU systems or gfortran + with_g77_wrappers=$ac_cv_f77_compiler_gnu + case $host_os in *gnu*) with_g77_wrappers=yes ;; esac + fi +else + with_g77_wrappers=no +fi + + +# Check whether --with-g77-wrappers was given. +if test "${with_g77_wrappers+set}" = set; then : + withval=$with_g77_wrappers; with_g77_wrappers=$withval +fi + +if test "x$with_g77_wrappers" = "xyes"; then + +$as_echo "#define WITH_G77_WRAPPERS 1" >>confdefs.h + +fi + +have_smp="no" +# Check whether --enable-openmp was given. +if test "${enable_openmp+set}" = set; then : + enableval=$enable_openmp; enable_openmp=$enableval +else + enable_openmp=no +fi + + +if test "$enable_openmp" = "yes"; then + +$as_echo "#define HAVE_OPENMP 1" >>confdefs.h + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for OpenMP flag of C compiler" >&5 +$as_echo_n "checking for OpenMP flag of C compiler... " >&6; } +if ${ax_cv_c_openmp+:} false; then : + $as_echo_n "(cached) " >&6 +else + saveCFLAGS=$CFLAGS +ax_cv_c_openmp=unknown +# Flags to try: -fopenmp (gcc), -openmp (icc), -mp (SGI & PGI), +# -xopenmp (Sun), -omp (Tru64), -qsmp=omp (AIX), none +ax_openmp_flags="-fopenmp -openmp -mp -xopenmp -omp -qsmp=omp none" +if test "x$OPENMP_CFLAGS" != x; then + ax_openmp_flags="$OPENMP_CFLAGS $ax_openmp_flags" +fi +for ax_openmp_flag in $ax_openmp_flags; do + case $ax_openmp_flag in + none) CFLAGS=$saveC ;; + *) CFLAGS="$saveCFLAGS $ax_openmp_flag" ;; + esac + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char omp_set_num_threads (); +#ifdef F77_DUMMY_MAIN + +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } + +#endif +int +main () +{ +return omp_set_num_threads (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ax_cv_c_openmp=$ax_openmp_flag; break +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +done +CFLAGS=$saveCFLAGS + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_c_openmp" >&5 +$as_echo "$ax_cv_c_openmp" >&6; } +if test "x$ax_cv_c_openmp" = "xunknown"; then + as_fn_error $? "don't know how to enable OpenMP" "$LINENO" 5 +else + if test "x$ax_cv_c_openmp" != "xnone"; then + OPENMP_CFLAGS=$ax_cv_c_openmp + fi + +$as_echo "#define HAVE_OPENMP 1" >>confdefs.h + +fi + + +fi + +# Check whether --enable-threads was given. +if test "${enable_threads+set}" = set; then : + enableval=$enable_threads; enable_threads=$enableval +else + enable_threads=no +fi + + +if test "$enable_threads" = "yes"; then + +$as_echo "#define HAVE_THREADS 1" >>confdefs.h + +fi + + +# Check whether --with-combined-threads was given. +if test "${with_combined_threads+set}" = set; then : + withval=$with_combined_threads; with_combined_threads=$withval +else + with_combined_threads=no +fi + + +if test "$with_combined_threads" = yes; then + if test "$enable_openmp" = "yes"; then + as_fn_error $? "--with-combined-threads incompatible with --enable-openmp" "$LINENO" 5 + fi + if test "$enable_threads" != "yes"; then + as_fn_error $? "--with-combined-threads requires --enable-threads" "$LINENO" 5 + fi +fi + +THREADLIBS="" +if test "$enable_threads" = "yes"; then + # Win32 threads are the default on Windows: + if test -z "$THREADLIBS"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for Win32 threads" >&5 +$as_echo_n "checking for Win32 threads... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#ifdef F77_DUMMY_MAIN + +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } + +#endif +int +main () +{ +_beginthreadex(0,0,0,0,0,0); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + THREADLIBS=" "; { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + fi + + # POSIX threads, the default choice everywhere else: + if test -z "$THREADLIBS"; then + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +acx_pthread_ok=no + +# We used to check for pthread.h first, but this fails if pthread.h +# requires special compiler flags (e.g. on True64 or Sequent). +# It gets checked for in the link test anyway. + +# First of all, check if the user has set any of the PTHREAD_LIBS, +# etcetera environment variables, and if threads linking works using +# them: +if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS" >&5 +$as_echo_n "checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pthread_join (); +#ifdef F77_DUMMY_MAIN + +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } + +#endif +int +main () +{ +return pthread_join (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + acx_pthread_ok=yes +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $acx_pthread_ok" >&5 +$as_echo "$acx_pthread_ok" >&6; } + if test x"$acx_pthread_ok" = xno; then + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" + fi + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" +fi + +# We must check for the threads library under a number of different +# names; the ordering is very important because some systems +# (e.g. DEC) have both -lpthread and -lpthreads, where one of the +# libraries is broken (non-POSIX). + +# Create a list of thread flags to try. Items starting with a "-" are +# C compiler flags, and other items are library names, except for "none" +# which indicates that we try without any flags at all, and "pthread-config" +# which is a program returning the flags for the Pth emulation library. + +acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mt -mthreads pthread --thread-safe pthread-config" + +# The ordering *is* (sometimes) important. Some notes on the +# individual items follow: + +# pthreads: AIX (must check this before -lpthread) +# none: in case threads are in libc; should be tried before -Kthread and +# other compiler flags to prevent continual compiler warnings +# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) +# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) +# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) +# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) +# -pthreads: Solaris/gcc +# -mthreads: Mingw32/gcc, Lynx/gcc +# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it +# doesn't hurt to check since this sometimes defines pthreads too; +# also defines -D_REENTRANT) +# ... -mt is also the pthreads flag for HP/aCC +# (where it should come before -mthreads to avoid spurious warnings) +# pthread: Linux, etcetera +# --thread-safe: KAI C++ +# pthread-config: use pthread-config program (for GNU Pth library) + +case "${host_cpu}-${host_os}" in + *solaris*) + + # On Solaris (at least, for some versions), libc contains stubbed + # (non-functional) versions of the pthreads routines, so link-based + # tests will erroneously succeed. (We need to link with -pthreads/-mt/ + # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather + # a function called by this macro, so we could check for that, but + # who knows whether they'll stub that too in a future libc.) So, + # we'll just look for -pthreads and -lpthread first: + + acx_pthread_flags="-pthreads pthread -mt -pthread $acx_pthread_flags" + ;; +esac + +if test x"$acx_pthread_ok" = xno; then +for flag in $acx_pthread_flags; do + + case $flag in + none) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pthreads work without any flags" >&5 +$as_echo_n "checking whether pthreads work without any flags... " >&6; } + ;; + + -*) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pthreads work with $flag" >&5 +$as_echo_n "checking whether pthreads work with $flag... " >&6; } + PTHREAD_CFLAGS="$flag" + ;; + + pthread-config) + # Extract the first word of "pthread-config", so it can be a program name with args. +set dummy pthread-config; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_acx_pthread_config+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$acx_pthread_config"; then + ac_cv_prog_acx_pthread_config="$acx_pthread_config" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_acx_pthread_config="yes" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_prog_acx_pthread_config" && ac_cv_prog_acx_pthread_config="no" +fi +fi +acx_pthread_config=$ac_cv_prog_acx_pthread_config +if test -n "$acx_pthread_config"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $acx_pthread_config" >&5 +$as_echo "$acx_pthread_config" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + if test x"$acx_pthread_config" = xno; then continue; fi + PTHREAD_CFLAGS="`pthread-config --cflags`" + PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" + ;; + + *) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for the pthreads library -l$flag" >&5 +$as_echo_n "checking for the pthreads library -l$flag... " >&6; } + PTHREAD_LIBS="-l$flag" + ;; + esac + + save_LIBS="$LIBS" + save_CFLAGS="$CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Check for various functions. We must include pthread.h, + # since some functions may be macros. (On the Sequent, we + # need a special flag -Kthread to make this header compile.) + # We check for pthread_join because it is in -lpthread on IRIX + # while pthread_create is in libc. We check for pthread_attr_init + # due to DEC craziness with -lpthreads. We check for + # pthread_cleanup_push because it is one of the few pthread + # functions on Solaris that doesn't have a non-functional libc stub. + # We try pthread_create on general principles. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#ifdef F77_DUMMY_MAIN + +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } + +#endif +int +main () +{ +pthread_t th; pthread_join(th, (void**) 0); + pthread_attr_init((pthread_attr_t*) 0); + pthread_cleanup_push((void(*)(void *)) 0, (void*) 0); + pthread_create((pthread_t*) 0, (pthread_attr_t*) 0, + (void*(*)(void *)) 0, (void*) 0); + pthread_cleanup_pop(0); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + acx_pthread_ok=yes +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $acx_pthread_ok" >&5 +$as_echo "$acx_pthread_ok" >&6; } + if test "x$acx_pthread_ok" = xyes; then + break; + fi + + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" +done +fi + +# Various other checks: +if test "x$acx_pthread_ok" = xyes; then + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for joinable pthread attribute" >&5 +$as_echo_n "checking for joinable pthread attribute... " >&6; } + attr_name=unknown + for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#ifdef F77_DUMMY_MAIN + +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } + +#endif +int +main () +{ +int attr=$attr; return attr; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + attr_name=$attr; break +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + done + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $attr_name" >&5 +$as_echo "$attr_name" >&6; } + if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then + +cat >>confdefs.h <<_ACEOF +#define PTHREAD_CREATE_JOINABLE $attr_name +_ACEOF + + fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if more special flags are required for pthreads" >&5 +$as_echo_n "checking if more special flags are required for pthreads... " >&6; } + flag=no + case "${host_cpu}-${host_os}" in + *-aix* | *-freebsd* | *-darwin*) flag="-D_THREAD_SAFE";; + *solaris* | *-osf* | *-hpux*) flag="-D_REENTRANT";; + esac + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${flag}" >&5 +$as_echo "${flag}" >&6; } + if test "x$flag" != xno; then + PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" + fi + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + # More AIX lossage: must compile with xlc_r or cc_r + if test x"$GCC" != xyes; then + for ac_prog in xlc_r cc_r +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_PTHREAD_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$PTHREAD_CC"; then + ac_cv_prog_PTHREAD_CC="$PTHREAD_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_PTHREAD_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +PTHREAD_CC=$ac_cv_prog_PTHREAD_CC +if test -n "$PTHREAD_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PTHREAD_CC" >&5 +$as_echo "$PTHREAD_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$PTHREAD_CC" && break +done +test -n "$PTHREAD_CC" || PTHREAD_CC="${CC}" + + else + PTHREAD_CC=$CC + fi +else + PTHREAD_CC="$CC" +fi + + + + + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$acx_pthread_ok" = xyes; then + THREADLIBS="$PTHREAD_LIBS " + CC="$PTHREAD_CC" + +$as_echo "#define USING_POSIX_THREADS 1" >>confdefs.h + + : +else + acx_pthread_ok=no + +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + fi + + if test -z "$THREADLIBS"; then + as_fn_error $? "couldn't find threads library for --enable-threads" "$LINENO" 5 + fi + +$as_echo "#define HAVE_THREADS 1" >>confdefs.h + +fi + + if test "$enable_threads" = "yes"; then + THREADS_TRUE= + THREADS_FALSE='#' +else + THREADS_TRUE='#' + THREADS_FALSE= +fi + + if test "$enable_openmp" = "yes"; then + OPENMP_TRUE= + OPENMP_FALSE='#' +else + OPENMP_TRUE='#' + OPENMP_FALSE= +fi + + if test "$enable_threads" = "yes" -o "$enable_openmp" = "yes"; then + SMP_TRUE= + SMP_FALSE='#' +else + SMP_TRUE='#' + SMP_FALSE= +fi + + if test x"$with_combined_threads" = xyes; then + COMBINED_THREADS_TRUE= + COMBINED_THREADS_FALSE='#' +else + COMBINED_THREADS_TRUE='#' + COMBINED_THREADS_FALSE= +fi + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a cycle counter is available" >&5 +$as_echo_n "checking whether a cycle counter is available... " >&6; } +save_CPPFLAGS=$CPPFLAGS +CPPFLAGS="$CPPFLAGS -I$srcdir/kernel" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include "cycle.h" +#ifndef HAVE_TICK_COUNTER +# error No cycle counter +#endif +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + ok=yes +else + ok=no +fi +rm -f conftest.err conftest.i conftest.$ac_ext +CPPFLAGS=$save_CPPFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ok" >&5 +$as_echo "$ok" >&6; } +if test $ok = no && test "x$with_slow_timer" = xno; then + echo "***************************************************************" + echo "WARNING: No cycle counter found. FFTW will use ESTIMATE mode " + echo " for all plans. See the manual for more information." + echo "***************************************************************" +fi + + + +cat >>confdefs.h <<_ACEOF +#define FFTW_CC "$CC $CFLAGS" +_ACEOF + + +ac_config_files="$ac_config_files Makefile support/Makefile genfft/Makefile kernel/Makefile simd-support/Makefile dft/Makefile dft/scalar/Makefile dft/scalar/codelets/Makefile dft/simd/Makefile dft/simd/common/Makefile dft/simd/sse2/Makefile dft/simd/avx/Makefile dft/simd/altivec/Makefile dft/simd/neon/Makefile rdft/Makefile rdft/scalar/Makefile rdft/scalar/r2cf/Makefile rdft/scalar/r2cb/Makefile rdft/scalar/r2r/Makefile rdft/simd/Makefile rdft/simd/common/Makefile rdft/simd/sse2/Makefile rdft/simd/avx/Makefile rdft/simd/altivec/Makefile rdft/simd/neon/Makefile reodft/Makefile threads/Makefile api/Makefile mpi/Makefile libbench2/Makefile tests/Makefile doc/Makefile doc/FAQ/Makefile tools/Makefile tools/fftw_wisdom.1 tools/fftw-wisdom-to-conf m4/Makefile fftw.pc" + + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +$as_echo "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5 +$as_echo_n "checking that generated files are newer than configure... " >&6; } + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5 +$as_echo "done" >&6; } + if test -n "$EXEEXT"; then + am__EXEEXT_TRUE= + am__EXEEXT_FALSE='#' +else + am__EXEEXT_TRUE='#' + am__EXEEXT_FALSE= +fi + +if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then + as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${SINGLE_TRUE}" && test -z "${SINGLE_FALSE}"; then + as_fn_error $? "conditional \"SINGLE\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${LDOUBLE_TRUE}" && test -z "${LDOUBLE_FALSE}"; then + as_fn_error $? "conditional \"LDOUBLE\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${QUAD_TRUE}" && test -z "${QUAD_FALSE}"; then + as_fn_error $? "conditional \"QUAD\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_SSE2_TRUE}" && test -z "${HAVE_SSE2_FALSE}"; then + as_fn_error $? "conditional \"HAVE_SSE2\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_AVX_TRUE}" && test -z "${HAVE_AVX_FALSE}"; then + as_fn_error $? "conditional \"HAVE_AVX\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_ALTIVEC_TRUE}" && test -z "${HAVE_ALTIVEC_FALSE}"; then + as_fn_error $? "conditional \"HAVE_ALTIVEC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_NEON_TRUE}" && test -z "${HAVE_NEON_FALSE}"; then + as_fn_error $? "conditional \"HAVE_NEON\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then + as_fn_error $? "conditional \"AMDEP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${MPI_TRUE}" && test -z "${MPI_FALSE}"; then + as_fn_error $? "conditional \"MPI\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${THREADS_TRUE}" && test -z "${THREADS_FALSE}"; then + as_fn_error $? "conditional \"THREADS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${OPENMP_TRUE}" && test -z "${OPENMP_FALSE}"; then + as_fn_error $? "conditional \"OPENMP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${SMP_TRUE}" && test -z "${SMP_FALSE}"; then + as_fn_error $? "conditional \"SMP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${COMBINED_THREADS_TRUE}" && test -z "${COMBINED_THREADS_FALSE}"; then + as_fn_error $? "conditional \"COMBINED_THREADS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by fftw $as_me 3.3.4, which was +generated by GNU Autoconf 2.69. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + +case $ac_config_headers in *" +"*) set x $ac_config_headers; shift; ac_config_headers=$*;; +esac + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" +config_headers="$ac_config_headers" +config_commands="$ac_config_commands" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Configuration commands: +$config_commands + +Report bugs to ." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_version="\\ +fftw config.status 3.3.4 +configured by $0, generated by GNU Autoconf 2.69, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2012 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +INSTALL='$INSTALL' +MKDIR_P='$MKDIR_P' +AWK='$AWK' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + $as_echo "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + $as_echo "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append CONFIG_HEADERS " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + as_fn_error $? "ambiguous option: \`$1' +Try \`$0 --help' for more information.";; + --help | --hel | -h ) + $as_echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + $as_echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# +# INIT-COMMANDS +# +AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir" + + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +sed_quote_subst='$sed_quote_subst' +double_quote_subst='$double_quote_subst' +delay_variable_subst='$delay_variable_subst' +enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`' +AS='`$ECHO "$AS" | $SED "$delay_single_quote_subst"`' +DLLTOOL='`$ECHO "$DLLTOOL" | $SED "$delay_single_quote_subst"`' +OBJDUMP='`$ECHO "$OBJDUMP" | $SED "$delay_single_quote_subst"`' +macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`' +macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`' +enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`' +pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`' +enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`' +SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`' +ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`' +PATH_SEPARATOR='`$ECHO "$PATH_SEPARATOR" | $SED "$delay_single_quote_subst"`' +host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`' +host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`' +host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`' +build_alias='`$ECHO "$build_alias" | $SED "$delay_single_quote_subst"`' +build='`$ECHO "$build" | $SED "$delay_single_quote_subst"`' +build_os='`$ECHO "$build_os" | $SED "$delay_single_quote_subst"`' +SED='`$ECHO "$SED" | $SED "$delay_single_quote_subst"`' +Xsed='`$ECHO "$Xsed" | $SED "$delay_single_quote_subst"`' +GREP='`$ECHO "$GREP" | $SED "$delay_single_quote_subst"`' +EGREP='`$ECHO "$EGREP" | $SED "$delay_single_quote_subst"`' +FGREP='`$ECHO "$FGREP" | $SED "$delay_single_quote_subst"`' +LD='`$ECHO "$LD" | $SED "$delay_single_quote_subst"`' +NM='`$ECHO "$NM" | $SED "$delay_single_quote_subst"`' +LN_S='`$ECHO "$LN_S" | $SED "$delay_single_quote_subst"`' +max_cmd_len='`$ECHO "$max_cmd_len" | $SED "$delay_single_quote_subst"`' +ac_objext='`$ECHO "$ac_objext" | $SED "$delay_single_quote_subst"`' +exeext='`$ECHO "$exeext" | $SED "$delay_single_quote_subst"`' +lt_unset='`$ECHO "$lt_unset" | $SED "$delay_single_quote_subst"`' +lt_SP2NL='`$ECHO "$lt_SP2NL" | $SED "$delay_single_quote_subst"`' +lt_NL2SP='`$ECHO "$lt_NL2SP" | $SED "$delay_single_quote_subst"`' +lt_cv_to_host_file_cmd='`$ECHO "$lt_cv_to_host_file_cmd" | $SED "$delay_single_quote_subst"`' +lt_cv_to_tool_file_cmd='`$ECHO "$lt_cv_to_tool_file_cmd" | $SED "$delay_single_quote_subst"`' +reload_flag='`$ECHO "$reload_flag" | $SED "$delay_single_quote_subst"`' +reload_cmds='`$ECHO "$reload_cmds" | $SED "$delay_single_quote_subst"`' +deplibs_check_method='`$ECHO "$deplibs_check_method" | $SED "$delay_single_quote_subst"`' +file_magic_cmd='`$ECHO "$file_magic_cmd" | $SED "$delay_single_quote_subst"`' +file_magic_glob='`$ECHO "$file_magic_glob" | $SED "$delay_single_quote_subst"`' +want_nocaseglob='`$ECHO "$want_nocaseglob" | $SED "$delay_single_quote_subst"`' +sharedlib_from_linklib_cmd='`$ECHO "$sharedlib_from_linklib_cmd" | $SED "$delay_single_quote_subst"`' +AR='`$ECHO "$AR" | $SED "$delay_single_quote_subst"`' +AR_FLAGS='`$ECHO "$AR_FLAGS" | $SED "$delay_single_quote_subst"`' +archiver_list_spec='`$ECHO "$archiver_list_spec" | $SED "$delay_single_quote_subst"`' +STRIP='`$ECHO "$STRIP" | $SED "$delay_single_quote_subst"`' +RANLIB='`$ECHO "$RANLIB" | $SED "$delay_single_quote_subst"`' +old_postinstall_cmds='`$ECHO "$old_postinstall_cmds" | $SED "$delay_single_quote_subst"`' +old_postuninstall_cmds='`$ECHO "$old_postuninstall_cmds" | $SED "$delay_single_quote_subst"`' +old_archive_cmds='`$ECHO "$old_archive_cmds" | $SED "$delay_single_quote_subst"`' +lock_old_archive_extraction='`$ECHO "$lock_old_archive_extraction" | $SED "$delay_single_quote_subst"`' +CC='`$ECHO "$CC" | $SED "$delay_single_quote_subst"`' +CFLAGS='`$ECHO "$CFLAGS" | $SED "$delay_single_quote_subst"`' +compiler='`$ECHO "$compiler" | $SED "$delay_single_quote_subst"`' +GCC='`$ECHO "$GCC" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_pipe='`$ECHO "$lt_cv_sys_global_symbol_pipe" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_cdecl='`$ECHO "$lt_cv_sys_global_symbol_to_cdecl" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_c_name_address='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address_lib_prefix" | $SED "$delay_single_quote_subst"`' +nm_file_list_spec='`$ECHO "$nm_file_list_spec" | $SED "$delay_single_quote_subst"`' +lt_sysroot='`$ECHO "$lt_sysroot" | $SED "$delay_single_quote_subst"`' +objdir='`$ECHO "$objdir" | $SED "$delay_single_quote_subst"`' +MAGIC_CMD='`$ECHO "$MAGIC_CMD" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_no_builtin_flag='`$ECHO "$lt_prog_compiler_no_builtin_flag" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_pic='`$ECHO "$lt_prog_compiler_pic" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_wl='`$ECHO "$lt_prog_compiler_wl" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_static='`$ECHO "$lt_prog_compiler_static" | $SED "$delay_single_quote_subst"`' +lt_cv_prog_compiler_c_o='`$ECHO "$lt_cv_prog_compiler_c_o" | $SED "$delay_single_quote_subst"`' +need_locks='`$ECHO "$need_locks" | $SED "$delay_single_quote_subst"`' +MANIFEST_TOOL='`$ECHO "$MANIFEST_TOOL" | $SED "$delay_single_quote_subst"`' +DSYMUTIL='`$ECHO "$DSYMUTIL" | $SED "$delay_single_quote_subst"`' +NMEDIT='`$ECHO "$NMEDIT" | $SED "$delay_single_quote_subst"`' +LIPO='`$ECHO "$LIPO" | $SED "$delay_single_quote_subst"`' +OTOOL='`$ECHO "$OTOOL" | $SED "$delay_single_quote_subst"`' +OTOOL64='`$ECHO "$OTOOL64" | $SED "$delay_single_quote_subst"`' +libext='`$ECHO "$libext" | $SED "$delay_single_quote_subst"`' +shrext_cmds='`$ECHO "$shrext_cmds" | $SED "$delay_single_quote_subst"`' +extract_expsyms_cmds='`$ECHO "$extract_expsyms_cmds" | $SED "$delay_single_quote_subst"`' +archive_cmds_need_lc='`$ECHO "$archive_cmds_need_lc" | $SED "$delay_single_quote_subst"`' +enable_shared_with_static_runtimes='`$ECHO "$enable_shared_with_static_runtimes" | $SED "$delay_single_quote_subst"`' +export_dynamic_flag_spec='`$ECHO "$export_dynamic_flag_spec" | $SED "$delay_single_quote_subst"`' +whole_archive_flag_spec='`$ECHO "$whole_archive_flag_spec" | $SED "$delay_single_quote_subst"`' +compiler_needs_object='`$ECHO "$compiler_needs_object" | $SED "$delay_single_quote_subst"`' +old_archive_from_new_cmds='`$ECHO "$old_archive_from_new_cmds" | $SED "$delay_single_quote_subst"`' +old_archive_from_expsyms_cmds='`$ECHO "$old_archive_from_expsyms_cmds" | $SED "$delay_single_quote_subst"`' +archive_cmds='`$ECHO "$archive_cmds" | $SED "$delay_single_quote_subst"`' +archive_expsym_cmds='`$ECHO "$archive_expsym_cmds" | $SED "$delay_single_quote_subst"`' +module_cmds='`$ECHO "$module_cmds" | $SED "$delay_single_quote_subst"`' +module_expsym_cmds='`$ECHO "$module_expsym_cmds" | $SED "$delay_single_quote_subst"`' +with_gnu_ld='`$ECHO "$with_gnu_ld" | $SED "$delay_single_quote_subst"`' +allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED "$delay_single_quote_subst"`' +no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED "$delay_single_quote_subst"`' +hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`' +hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED "$delay_single_quote_subst"`' +hardcode_minus_L='`$ECHO "$hardcode_minus_L" | $SED "$delay_single_quote_subst"`' +hardcode_shlibpath_var='`$ECHO "$hardcode_shlibpath_var" | $SED "$delay_single_quote_subst"`' +hardcode_automatic='`$ECHO "$hardcode_automatic" | $SED "$delay_single_quote_subst"`' +inherit_rpath='`$ECHO "$inherit_rpath" | $SED "$delay_single_quote_subst"`' +link_all_deplibs='`$ECHO "$link_all_deplibs" | $SED "$delay_single_quote_subst"`' +always_export_symbols='`$ECHO "$always_export_symbols" | $SED "$delay_single_quote_subst"`' +export_symbols_cmds='`$ECHO "$export_symbols_cmds" | $SED "$delay_single_quote_subst"`' +exclude_expsyms='`$ECHO "$exclude_expsyms" | $SED "$delay_single_quote_subst"`' +include_expsyms='`$ECHO "$include_expsyms" | $SED "$delay_single_quote_subst"`' +prelink_cmds='`$ECHO "$prelink_cmds" | $SED "$delay_single_quote_subst"`' +postlink_cmds='`$ECHO "$postlink_cmds" | $SED "$delay_single_quote_subst"`' +file_list_spec='`$ECHO "$file_list_spec" | $SED "$delay_single_quote_subst"`' +variables_saved_for_relink='`$ECHO "$variables_saved_for_relink" | $SED "$delay_single_quote_subst"`' +need_lib_prefix='`$ECHO "$need_lib_prefix" | $SED "$delay_single_quote_subst"`' +need_version='`$ECHO "$need_version" | $SED "$delay_single_quote_subst"`' +version_type='`$ECHO "$version_type" | $SED "$delay_single_quote_subst"`' +runpath_var='`$ECHO "$runpath_var" | $SED "$delay_single_quote_subst"`' +shlibpath_var='`$ECHO "$shlibpath_var" | $SED "$delay_single_quote_subst"`' +shlibpath_overrides_runpath='`$ECHO "$shlibpath_overrides_runpath" | $SED "$delay_single_quote_subst"`' +libname_spec='`$ECHO "$libname_spec" | $SED "$delay_single_quote_subst"`' +library_names_spec='`$ECHO "$library_names_spec" | $SED "$delay_single_quote_subst"`' +soname_spec='`$ECHO "$soname_spec" | $SED "$delay_single_quote_subst"`' +install_override_mode='`$ECHO "$install_override_mode" | $SED "$delay_single_quote_subst"`' +postinstall_cmds='`$ECHO "$postinstall_cmds" | $SED "$delay_single_quote_subst"`' +postuninstall_cmds='`$ECHO "$postuninstall_cmds" | $SED "$delay_single_quote_subst"`' +finish_cmds='`$ECHO "$finish_cmds" | $SED "$delay_single_quote_subst"`' +finish_eval='`$ECHO "$finish_eval" | $SED "$delay_single_quote_subst"`' +hardcode_into_libs='`$ECHO "$hardcode_into_libs" | $SED "$delay_single_quote_subst"`' +sys_lib_search_path_spec='`$ECHO "$sys_lib_search_path_spec" | $SED "$delay_single_quote_subst"`' +sys_lib_dlsearch_path_spec='`$ECHO "$sys_lib_dlsearch_path_spec" | $SED "$delay_single_quote_subst"`' +hardcode_action='`$ECHO "$hardcode_action" | $SED "$delay_single_quote_subst"`' +enable_dlopen='`$ECHO "$enable_dlopen" | $SED "$delay_single_quote_subst"`' +enable_dlopen_self='`$ECHO "$enable_dlopen_self" | $SED "$delay_single_quote_subst"`' +enable_dlopen_self_static='`$ECHO "$enable_dlopen_self_static" | $SED "$delay_single_quote_subst"`' +old_striplib='`$ECHO "$old_striplib" | $SED "$delay_single_quote_subst"`' +striplib='`$ECHO "$striplib" | $SED "$delay_single_quote_subst"`' +LD_F77='`$ECHO "$LD_F77" | $SED "$delay_single_quote_subst"`' +reload_flag_F77='`$ECHO "$reload_flag_F77" | $SED "$delay_single_quote_subst"`' +reload_cmds_F77='`$ECHO "$reload_cmds_F77" | $SED "$delay_single_quote_subst"`' +old_archive_cmds_F77='`$ECHO "$old_archive_cmds_F77" | $SED "$delay_single_quote_subst"`' +compiler_F77='`$ECHO "$compiler_F77" | $SED "$delay_single_quote_subst"`' +GCC_F77='`$ECHO "$GCC_F77" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_no_builtin_flag_F77='`$ECHO "$lt_prog_compiler_no_builtin_flag_F77" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_pic_F77='`$ECHO "$lt_prog_compiler_pic_F77" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_wl_F77='`$ECHO "$lt_prog_compiler_wl_F77" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_static_F77='`$ECHO "$lt_prog_compiler_static_F77" | $SED "$delay_single_quote_subst"`' +lt_cv_prog_compiler_c_o_F77='`$ECHO "$lt_cv_prog_compiler_c_o_F77" | $SED "$delay_single_quote_subst"`' +archive_cmds_need_lc_F77='`$ECHO "$archive_cmds_need_lc_F77" | $SED "$delay_single_quote_subst"`' +enable_shared_with_static_runtimes_F77='`$ECHO "$enable_shared_with_static_runtimes_F77" | $SED "$delay_single_quote_subst"`' +export_dynamic_flag_spec_F77='`$ECHO "$export_dynamic_flag_spec_F77" | $SED "$delay_single_quote_subst"`' +whole_archive_flag_spec_F77='`$ECHO "$whole_archive_flag_spec_F77" | $SED "$delay_single_quote_subst"`' +compiler_needs_object_F77='`$ECHO "$compiler_needs_object_F77" | $SED "$delay_single_quote_subst"`' +old_archive_from_new_cmds_F77='`$ECHO "$old_archive_from_new_cmds_F77" | $SED "$delay_single_quote_subst"`' +old_archive_from_expsyms_cmds_F77='`$ECHO "$old_archive_from_expsyms_cmds_F77" | $SED "$delay_single_quote_subst"`' +archive_cmds_F77='`$ECHO "$archive_cmds_F77" | $SED "$delay_single_quote_subst"`' +archive_expsym_cmds_F77='`$ECHO "$archive_expsym_cmds_F77" | $SED "$delay_single_quote_subst"`' +module_cmds_F77='`$ECHO "$module_cmds_F77" | $SED "$delay_single_quote_subst"`' +module_expsym_cmds_F77='`$ECHO "$module_expsym_cmds_F77" | $SED "$delay_single_quote_subst"`' +with_gnu_ld_F77='`$ECHO "$with_gnu_ld_F77" | $SED "$delay_single_quote_subst"`' +allow_undefined_flag_F77='`$ECHO "$allow_undefined_flag_F77" | $SED "$delay_single_quote_subst"`' +no_undefined_flag_F77='`$ECHO "$no_undefined_flag_F77" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_flag_spec_F77='`$ECHO "$hardcode_libdir_flag_spec_F77" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_separator_F77='`$ECHO "$hardcode_libdir_separator_F77" | $SED "$delay_single_quote_subst"`' +hardcode_direct_F77='`$ECHO "$hardcode_direct_F77" | $SED "$delay_single_quote_subst"`' +hardcode_direct_absolute_F77='`$ECHO "$hardcode_direct_absolute_F77" | $SED "$delay_single_quote_subst"`' +hardcode_minus_L_F77='`$ECHO "$hardcode_minus_L_F77" | $SED "$delay_single_quote_subst"`' +hardcode_shlibpath_var_F77='`$ECHO "$hardcode_shlibpath_var_F77" | $SED "$delay_single_quote_subst"`' +hardcode_automatic_F77='`$ECHO "$hardcode_automatic_F77" | $SED "$delay_single_quote_subst"`' +inherit_rpath_F77='`$ECHO "$inherit_rpath_F77" | $SED "$delay_single_quote_subst"`' +link_all_deplibs_F77='`$ECHO "$link_all_deplibs_F77" | $SED "$delay_single_quote_subst"`' +always_export_symbols_F77='`$ECHO "$always_export_symbols_F77" | $SED "$delay_single_quote_subst"`' +export_symbols_cmds_F77='`$ECHO "$export_symbols_cmds_F77" | $SED "$delay_single_quote_subst"`' +exclude_expsyms_F77='`$ECHO "$exclude_expsyms_F77" | $SED "$delay_single_quote_subst"`' +include_expsyms_F77='`$ECHO "$include_expsyms_F77" | $SED "$delay_single_quote_subst"`' +prelink_cmds_F77='`$ECHO "$prelink_cmds_F77" | $SED "$delay_single_quote_subst"`' +postlink_cmds_F77='`$ECHO "$postlink_cmds_F77" | $SED "$delay_single_quote_subst"`' +file_list_spec_F77='`$ECHO "$file_list_spec_F77" | $SED "$delay_single_quote_subst"`' +hardcode_action_F77='`$ECHO "$hardcode_action_F77" | $SED "$delay_single_quote_subst"`' + +LTCC='$LTCC' +LTCFLAGS='$LTCFLAGS' +compiler='$compiler_DEFAULT' + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$1 +_LTECHO_EOF' +} + +# Quote evaled strings. +for var in AS \ +DLLTOOL \ +OBJDUMP \ +SHELL \ +ECHO \ +PATH_SEPARATOR \ +SED \ +GREP \ +EGREP \ +FGREP \ +LD \ +NM \ +LN_S \ +lt_SP2NL \ +lt_NL2SP \ +reload_flag \ +deplibs_check_method \ +file_magic_cmd \ +file_magic_glob \ +want_nocaseglob \ +sharedlib_from_linklib_cmd \ +AR \ +AR_FLAGS \ +archiver_list_spec \ +STRIP \ +RANLIB \ +CC \ +CFLAGS \ +compiler \ +lt_cv_sys_global_symbol_pipe \ +lt_cv_sys_global_symbol_to_cdecl \ +lt_cv_sys_global_symbol_to_c_name_address \ +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix \ +nm_file_list_spec \ +lt_prog_compiler_no_builtin_flag \ +lt_prog_compiler_pic \ +lt_prog_compiler_wl \ +lt_prog_compiler_static \ +lt_cv_prog_compiler_c_o \ +need_locks \ +MANIFEST_TOOL \ +DSYMUTIL \ +NMEDIT \ +LIPO \ +OTOOL \ +OTOOL64 \ +shrext_cmds \ +export_dynamic_flag_spec \ +whole_archive_flag_spec \ +compiler_needs_object \ +with_gnu_ld \ +allow_undefined_flag \ +no_undefined_flag \ +hardcode_libdir_flag_spec \ +hardcode_libdir_separator \ +exclude_expsyms \ +include_expsyms \ +file_list_spec \ +variables_saved_for_relink \ +libname_spec \ +library_names_spec \ +soname_spec \ +install_override_mode \ +finish_eval \ +old_striplib \ +striplib \ +LD_F77 \ +reload_flag_F77 \ +compiler_F77 \ +lt_prog_compiler_no_builtin_flag_F77 \ +lt_prog_compiler_pic_F77 \ +lt_prog_compiler_wl_F77 \ +lt_prog_compiler_static_F77 \ +lt_cv_prog_compiler_c_o_F77 \ +export_dynamic_flag_spec_F77 \ +whole_archive_flag_spec_F77 \ +compiler_needs_object_F77 \ +with_gnu_ld_F77 \ +allow_undefined_flag_F77 \ +no_undefined_flag_F77 \ +hardcode_libdir_flag_spec_F77 \ +hardcode_libdir_separator_F77 \ +exclude_expsyms_F77 \ +include_expsyms_F77 \ +file_list_spec_F77; do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[\\\\\\\`\\"\\\$]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +# Double-quote double-evaled strings. +for var in reload_cmds \ +old_postinstall_cmds \ +old_postuninstall_cmds \ +old_archive_cmds \ +extract_expsyms_cmds \ +old_archive_from_new_cmds \ +old_archive_from_expsyms_cmds \ +archive_cmds \ +archive_expsym_cmds \ +module_cmds \ +module_expsym_cmds \ +export_symbols_cmds \ +prelink_cmds \ +postlink_cmds \ +postinstall_cmds \ +postuninstall_cmds \ +finish_cmds \ +sys_lib_search_path_spec \ +sys_lib_dlsearch_path_spec \ +reload_cmds_F77 \ +old_archive_cmds_F77 \ +old_archive_from_new_cmds_F77 \ +old_archive_from_expsyms_cmds_F77 \ +archive_cmds_F77 \ +archive_expsym_cmds_F77 \ +module_cmds_F77 \ +module_expsym_cmds_F77 \ +export_symbols_cmds_F77 \ +prelink_cmds_F77 \ +postlink_cmds_F77; do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[\\\\\\\`\\"\\\$]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +ac_aux_dir='$ac_aux_dir' +xsi_shell='$xsi_shell' +lt_shell_append='$lt_shell_append' + +# See if we are running on zsh, and set the options which allow our +# commands through without removal of \ escapes INIT. +if test -n "\${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST +fi + + + PACKAGE='$PACKAGE' + VERSION='$VERSION' + TIMESTAMP='$TIMESTAMP' + RM='$RM' + ofile='$ofile' + + + + + + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; + "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; + "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "support/Makefile") CONFIG_FILES="$CONFIG_FILES support/Makefile" ;; + "genfft/Makefile") CONFIG_FILES="$CONFIG_FILES genfft/Makefile" ;; + "kernel/Makefile") CONFIG_FILES="$CONFIG_FILES kernel/Makefile" ;; + "simd-support/Makefile") CONFIG_FILES="$CONFIG_FILES simd-support/Makefile" ;; + "dft/Makefile") CONFIG_FILES="$CONFIG_FILES dft/Makefile" ;; + "dft/scalar/Makefile") CONFIG_FILES="$CONFIG_FILES dft/scalar/Makefile" ;; + "dft/scalar/codelets/Makefile") CONFIG_FILES="$CONFIG_FILES dft/scalar/codelets/Makefile" ;; + "dft/simd/Makefile") CONFIG_FILES="$CONFIG_FILES dft/simd/Makefile" ;; + "dft/simd/common/Makefile") CONFIG_FILES="$CONFIG_FILES dft/simd/common/Makefile" ;; + "dft/simd/sse2/Makefile") CONFIG_FILES="$CONFIG_FILES dft/simd/sse2/Makefile" ;; + "dft/simd/avx/Makefile") CONFIG_FILES="$CONFIG_FILES dft/simd/avx/Makefile" ;; + "dft/simd/altivec/Makefile") CONFIG_FILES="$CONFIG_FILES dft/simd/altivec/Makefile" ;; + "dft/simd/neon/Makefile") CONFIG_FILES="$CONFIG_FILES dft/simd/neon/Makefile" ;; + "rdft/Makefile") CONFIG_FILES="$CONFIG_FILES rdft/Makefile" ;; + "rdft/scalar/Makefile") CONFIG_FILES="$CONFIG_FILES rdft/scalar/Makefile" ;; + "rdft/scalar/r2cf/Makefile") CONFIG_FILES="$CONFIG_FILES rdft/scalar/r2cf/Makefile" ;; + "rdft/scalar/r2cb/Makefile") CONFIG_FILES="$CONFIG_FILES rdft/scalar/r2cb/Makefile" ;; + "rdft/scalar/r2r/Makefile") CONFIG_FILES="$CONFIG_FILES rdft/scalar/r2r/Makefile" ;; + "rdft/simd/Makefile") CONFIG_FILES="$CONFIG_FILES rdft/simd/Makefile" ;; + "rdft/simd/common/Makefile") CONFIG_FILES="$CONFIG_FILES rdft/simd/common/Makefile" ;; + "rdft/simd/sse2/Makefile") CONFIG_FILES="$CONFIG_FILES rdft/simd/sse2/Makefile" ;; + "rdft/simd/avx/Makefile") CONFIG_FILES="$CONFIG_FILES rdft/simd/avx/Makefile" ;; + "rdft/simd/altivec/Makefile") CONFIG_FILES="$CONFIG_FILES rdft/simd/altivec/Makefile" ;; + "rdft/simd/neon/Makefile") CONFIG_FILES="$CONFIG_FILES rdft/simd/neon/Makefile" ;; + "reodft/Makefile") CONFIG_FILES="$CONFIG_FILES reodft/Makefile" ;; + "threads/Makefile") CONFIG_FILES="$CONFIG_FILES threads/Makefile" ;; + "api/Makefile") CONFIG_FILES="$CONFIG_FILES api/Makefile" ;; + "mpi/Makefile") CONFIG_FILES="$CONFIG_FILES mpi/Makefile" ;; + "libbench2/Makefile") CONFIG_FILES="$CONFIG_FILES libbench2/Makefile" ;; + "tests/Makefile") CONFIG_FILES="$CONFIG_FILES tests/Makefile" ;; + "doc/Makefile") CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;; + "doc/FAQ/Makefile") CONFIG_FILES="$CONFIG_FILES doc/FAQ/Makefile" ;; + "tools/Makefile") CONFIG_FILES="$CONFIG_FILES tools/Makefile" ;; + "tools/fftw_wisdom.1") CONFIG_FILES="$CONFIG_FILES tools/fftw_wisdom.1" ;; + "tools/fftw-wisdom-to-conf") CONFIG_FILES="$CONFIG_FILES tools/fftw-wisdom-to-conf" ;; + "m4/Makefile") CONFIG_FILES="$CONFIG_FILES m4/Makefile" ;; + "fftw.pc") CONFIG_FILES="$CONFIG_FILES fftw.pc" ;; + + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers + test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + +# Set up the scripts for CONFIG_HEADERS section. +# No need to generate them if there are no CONFIG_HEADERS. +# This happens for instance with `./config.status Makefile'. +if test -n "$CONFIG_HEADERS"; then +cat >"$ac_tmp/defines.awk" <<\_ACAWK || +BEGIN { +_ACEOF + +# Transform confdefs.h into an awk script `defines.awk', embedded as +# here-document in config.status, that substitutes the proper values into +# config.h.in to produce config.h. + +# Create a delimiter string that does not exist in confdefs.h, to ease +# handling of long lines. +ac_delim='%!_!# ' +for ac_last_try in false false :; do + ac_tt=`sed -n "/$ac_delim/p" confdefs.h` + if test -z "$ac_tt"; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +# For the awk script, D is an array of macro values keyed by name, +# likewise P contains macro parameters if any. Preserve backslash +# newline sequences. + +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +sed -n ' +s/.\{148\}/&'"$ac_delim"'/g +t rset +:rset +s/^[ ]*#[ ]*define[ ][ ]*/ / +t def +d +:def +s/\\$// +t bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3"/p +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p +d +:bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3\\\\\\n"\\/p +t cont +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p +t cont +d +:cont +n +s/.\{148\}/&'"$ac_delim"'/g +t clear +:clear +s/\\$// +t bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/"/p +d +:bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p +b cont +' >$CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + for (key in D) D_is_set[key] = 1 + FS = "" +} +/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { + line = \$ 0 + split(line, arg, " ") + if (arg[1] == "#") { + defundef = arg[2] + mac1 = arg[3] + } else { + defundef = substr(arg[1], 2) + mac1 = arg[2] + } + split(mac1, mac2, "(") #) + macro = mac2[1] + prefix = substr(line, 1, index(line, defundef) - 1) + if (D_is_set[macro]) { + # Preserve the white space surrounding the "#". + print prefix "define", macro P[macro] D[macro] + next + } else { + # Replace #undef with comments. This is necessary, for example, + # in the case of _POSIX_SOURCE, which is predefined and required + # on some systems where configure will not decide to define it. + if (defundef == "undef") { + print "/*", prefix defundef, macro, "*/" + next + } + } +} +{ print } +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 +fi # test -n "$CONFIG_HEADERS" + + +eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :C $CONFIG_COMMANDS" +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +$as_echo "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`$as_echo "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + + case $INSTALL in + [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; + *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; + esac + ac_MKDIR_P=$MKDIR_P + case $MKDIR_P in + [\\/$]* | ?:[\\/]* ) ;; + */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; + esac +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +s&@INSTALL@&$ac_INSTALL&;t t +s&@MKDIR_P@&$ac_MKDIR_P&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + :H) + # + # CONFIG_HEADER + # + if test x"$ac_file" != x-; then + { + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" + } >"$ac_tmp/config.h" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +$as_echo "$as_me: $ac_file is unchanged" >&6;} + else + rm -f "$ac_file" + mv "$ac_tmp/config.h" "$ac_file" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + fi + else + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ + || as_fn_error $? "could not create -" "$LINENO" 5 + fi +# Compute "$ac_file"'s index in $config_headers. +_am_arg="$ac_file" +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" || +$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$_am_arg" : 'X\(//\)[^/]' \| \ + X"$_am_arg" : 'X\(//\)$' \| \ + X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$_am_arg" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'`/stamp-h$_am_stamp_count + ;; + + :C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 +$as_echo "$as_me: executing $ac_file commands" >&6;} + ;; + esac + + + case $ac_file$ac_mode in + "depfiles":C) test x"$AMDEP_TRUE" != x"" || { + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + case $CONFIG_FILES in + *\'*) eval set x "$CONFIG_FILES" ;; + *) set x $CONFIG_FILES ;; + esac + shift + for mf + do + # Strip MF so we end up with the name of the file. + mf=`echo "$mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile or not. + # We used to match only the files named 'Makefile.in', but + # some people rename them; so instead we look at the file content. + # Grep'ing the first line is not enough: some people post-process + # each Makefile.in and add a new line on top of each file to say so. + # Grep'ing the whole file is not good either: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then + dirpart=`$as_dirname -- "$mf" || +$as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$mf" : 'X\(//\)[^/]' \| \ + X"$mf" : 'X\(//\)$' \| \ + X"$mf" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$mf" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + else + continue + fi + # Extract the definition of DEPDIR, am__include, and am__quote + # from the Makefile without running 'make'. + DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` + test -z "$DEPDIR" && continue + am__include=`sed -n 's/^am__include = //p' < "$mf"` + test -z "$am__include" && continue + am__quote=`sed -n 's/^am__quote = //p' < "$mf"` + # Find all dependency output files, they are included files with + # $(DEPDIR) in their names. We invoke sed twice because it is the + # simplest approach to changing $(DEPDIR) to its actual value in the + # expansion. + for file in `sed -n " + s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ + sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do + # Make sure the directory exists. + test -f "$dirpart/$file" && continue + fdir=`$as_dirname -- "$file" || +$as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$file" : 'X\(//\)[^/]' \| \ + X"$file" : 'X\(//\)$' \| \ + X"$file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir=$dirpart/$fdir; as_fn_mkdir_p + # echo "creating $dirpart/$file" + echo '# dummy' > "$dirpart/$file" + done + done +} + ;; + "libtool":C) + + # See if we are running on zsh, and set the options which allow our + # commands through without removal of \ escapes. + if test -n "${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST + fi + + cfgfile="${ofile}T" + trap "$RM \"$cfgfile\"; exit 1" 1 2 15 + $RM "$cfgfile" + + cat <<_LT_EOF >> "$cfgfile" +#! $SHELL + +# `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services. +# Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION +# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: +# NOTE: Changes made to this file will be lost: look at ltmain.sh. +# +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, +# 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# Written by Gordon Matzigkeit, 1996 +# +# This file is part of GNU Libtool. +# +# GNU Libtool is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# As a special exception to the GNU General Public License, +# if you distribute this file as part of a program or library that +# is built using GNU Libtool, you may include this file under the +# same distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Libtool; see the file COPYING. If not, a copy +# can be downloaded from http://www.gnu.org/licenses/gpl.html, or +# obtained by writing to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + +# The names of the tagged configurations supported by this script. +available_tags="F77 " + +# ### BEGIN LIBTOOL CONFIG + +# Whether or not to build shared libraries. +build_libtool_libs=$enable_shared + +# Assembler program. +AS=$lt_AS + +# DLL creation program. +DLLTOOL=$lt_DLLTOOL + +# Object dumper program. +OBJDUMP=$lt_OBJDUMP + +# Which release of libtool.m4 was used? +macro_version=$macro_version +macro_revision=$macro_revision + +# Whether or not to build static libraries. +build_old_libs=$enable_static + +# What type of objects to build. +pic_mode=$pic_mode + +# Whether or not to optimize for fast installation. +fast_install=$enable_fast_install + +# Shell to use when invoking shell scripts. +SHELL=$lt_SHELL + +# An echo program that protects backslashes. +ECHO=$lt_ECHO + +# The PATH separator for the build system. +PATH_SEPARATOR=$lt_PATH_SEPARATOR + +# The host system. +host_alias=$host_alias +host=$host +host_os=$host_os + +# The build system. +build_alias=$build_alias +build=$build +build_os=$build_os + +# A sed program that does not truncate output. +SED=$lt_SED + +# Sed that helps us avoid accidentally triggering echo(1) options like -n. +Xsed="\$SED -e 1s/^X//" + +# A grep program that handles long lines. +GREP=$lt_GREP + +# An ERE matcher. +EGREP=$lt_EGREP + +# A literal string matcher. +FGREP=$lt_FGREP + +# A BSD- or MS-compatible name lister. +NM=$lt_NM + +# Whether we need soft or hard links. +LN_S=$lt_LN_S + +# What is the maximum length of a command? +max_cmd_len=$max_cmd_len + +# Object file suffix (normally "o"). +objext=$ac_objext + +# Executable file suffix (normally ""). +exeext=$exeext + +# whether the shell understands "unset". +lt_unset=$lt_unset + +# turn spaces into newlines. +SP2NL=$lt_lt_SP2NL + +# turn newlines into spaces. +NL2SP=$lt_lt_NL2SP + +# convert \$build file names to \$host format. +to_host_file_cmd=$lt_cv_to_host_file_cmd + +# convert \$build files to toolchain format. +to_tool_file_cmd=$lt_cv_to_tool_file_cmd + +# Method to check whether dependent libraries are shared objects. +deplibs_check_method=$lt_deplibs_check_method + +# Command to use when deplibs_check_method = "file_magic". +file_magic_cmd=$lt_file_magic_cmd + +# How to find potential files when deplibs_check_method = "file_magic". +file_magic_glob=$lt_file_magic_glob + +# Find potential files using nocaseglob when deplibs_check_method = "file_magic". +want_nocaseglob=$lt_want_nocaseglob + +# Command to associate shared and link libraries. +sharedlib_from_linklib_cmd=$lt_sharedlib_from_linklib_cmd + +# The archiver. +AR=$lt_AR + +# Flags to create an archive. +AR_FLAGS=$lt_AR_FLAGS + +# How to feed a file listing to the archiver. +archiver_list_spec=$lt_archiver_list_spec + +# A symbol stripping program. +STRIP=$lt_STRIP + +# Commands used to install an old-style archive. +RANLIB=$lt_RANLIB +old_postinstall_cmds=$lt_old_postinstall_cmds +old_postuninstall_cmds=$lt_old_postuninstall_cmds + +# Whether to use a lock for old archive extraction. +lock_old_archive_extraction=$lock_old_archive_extraction + +# A C compiler. +LTCC=$lt_CC + +# LTCC compiler flags. +LTCFLAGS=$lt_CFLAGS + +# Take the output of nm and produce a listing of raw symbols and C names. +global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe + +# Transform the output of nm in a proper C declaration. +global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl + +# Transform the output of nm in a C name address pair. +global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address + +# Transform the output of nm in a C name address pair when lib prefix is needed. +global_symbol_to_c_name_address_lib_prefix=$lt_lt_cv_sys_global_symbol_to_c_name_address_lib_prefix + +# Specify filename containing input files for \$NM. +nm_file_list_spec=$lt_nm_file_list_spec + +# The root where to search for dependent libraries,and in which our libraries should be installed. +lt_sysroot=$lt_sysroot + +# The name of the directory that contains temporary libtool files. +objdir=$objdir + +# Used to examine libraries when file_magic_cmd begins with "file". +MAGIC_CMD=$MAGIC_CMD + +# Must we lock files when doing compilation? +need_locks=$lt_need_locks + +# Manifest tool. +MANIFEST_TOOL=$lt_MANIFEST_TOOL + +# Tool to manipulate archived DWARF debug symbol files on Mac OS X. +DSYMUTIL=$lt_DSYMUTIL + +# Tool to change global to local symbols on Mac OS X. +NMEDIT=$lt_NMEDIT + +# Tool to manipulate fat objects and archives on Mac OS X. +LIPO=$lt_LIPO + +# ldd/readelf like tool for Mach-O binaries on Mac OS X. +OTOOL=$lt_OTOOL + +# ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4. +OTOOL64=$lt_OTOOL64 + +# Old archive suffix (normally "a"). +libext=$libext + +# Shared library suffix (normally ".so"). +shrext_cmds=$lt_shrext_cmds + +# The commands to extract the exported symbol list from a shared archive. +extract_expsyms_cmds=$lt_extract_expsyms_cmds + +# Variables whose values should be saved in libtool wrapper scripts and +# restored at link time. +variables_saved_for_relink=$lt_variables_saved_for_relink + +# Do we need the "lib" prefix for modules? +need_lib_prefix=$need_lib_prefix + +# Do we need a version for libraries? +need_version=$need_version + +# Library versioning type. +version_type=$version_type + +# Shared library runtime path variable. +runpath_var=$runpath_var + +# Shared library path variable. +shlibpath_var=$shlibpath_var + +# Is shlibpath searched before the hard-coded library search path? +shlibpath_overrides_runpath=$shlibpath_overrides_runpath + +# Format of library name prefix. +libname_spec=$lt_libname_spec + +# List of archive names. First name is the real one, the rest are links. +# The last name is the one that the linker finds with -lNAME +library_names_spec=$lt_library_names_spec + +# The coded name of the library, if different from the real name. +soname_spec=$lt_soname_spec + +# Permission mode override for installation of shared libraries. +install_override_mode=$lt_install_override_mode + +# Command to use after installation of a shared archive. +postinstall_cmds=$lt_postinstall_cmds + +# Command to use after uninstallation of a shared archive. +postuninstall_cmds=$lt_postuninstall_cmds + +# Commands used to finish a libtool library installation in a directory. +finish_cmds=$lt_finish_cmds + +# As "finish_cmds", except a single script fragment to be evaled but +# not shown. +finish_eval=$lt_finish_eval + +# Whether we should hardcode library paths into libraries. +hardcode_into_libs=$hardcode_into_libs + +# Compile-time system search path for libraries. +sys_lib_search_path_spec=$lt_sys_lib_search_path_spec + +# Run-time system search path for libraries. +sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec + +# Whether dlopen is supported. +dlopen_support=$enable_dlopen + +# Whether dlopen of programs is supported. +dlopen_self=$enable_dlopen_self + +# Whether dlopen of statically linked programs is supported. +dlopen_self_static=$enable_dlopen_self_static + +# Commands to strip libraries. +old_striplib=$lt_old_striplib +striplib=$lt_striplib + + +# The linker used to build libraries. +LD=$lt_LD + +# How to create reloadable object files. +reload_flag=$lt_reload_flag +reload_cmds=$lt_reload_cmds + +# Commands used to build an old-style archive. +old_archive_cmds=$lt_old_archive_cmds + +# A language specific compiler. +CC=$lt_compiler + +# Is the compiler the GNU compiler? +with_gcc=$GCC + +# Compiler flag to turn off builtin functions. +no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag + +# Additional compiler flags for building library objects. +pic_flag=$lt_lt_prog_compiler_pic + +# How to pass a linker flag through the compiler. +wl=$lt_lt_prog_compiler_wl + +# Compiler flag to prevent dynamic linking. +link_static_flag=$lt_lt_prog_compiler_static + +# Does compiler simultaneously support -c and -o options? +compiler_c_o=$lt_lt_cv_prog_compiler_c_o + +# Whether or not to add -lc for building shared libraries. +build_libtool_need_lc=$archive_cmds_need_lc + +# Whether or not to disallow shared libs when runtime libs are static. +allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes + +# Compiler flag to allow reflexive dlopens. +export_dynamic_flag_spec=$lt_export_dynamic_flag_spec + +# Compiler flag to generate shared objects directly from archives. +whole_archive_flag_spec=$lt_whole_archive_flag_spec + +# Whether the compiler copes with passing no objects directly. +compiler_needs_object=$lt_compiler_needs_object + +# Create an old-style archive from a shared archive. +old_archive_from_new_cmds=$lt_old_archive_from_new_cmds + +# Create a temporary old-style archive to link instead of a shared archive. +old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds + +# Commands used to build a shared archive. +archive_cmds=$lt_archive_cmds +archive_expsym_cmds=$lt_archive_expsym_cmds + +# Commands used to build a loadable module if different from building +# a shared archive. +module_cmds=$lt_module_cmds +module_expsym_cmds=$lt_module_expsym_cmds + +# Whether we are building with GNU ld or not. +with_gnu_ld=$lt_with_gnu_ld + +# Flag that allows shared libraries with undefined symbols to be built. +allow_undefined_flag=$lt_allow_undefined_flag + +# Flag that enforces no undefined symbols. +no_undefined_flag=$lt_no_undefined_flag + +# Flag to hardcode \$libdir into a binary during linking. +# This must work even if \$libdir does not exist +hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec + +# Whether we need a single "-rpath" flag with a separated argument. +hardcode_libdir_separator=$lt_hardcode_libdir_separator + +# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes +# DIR into the resulting binary. +hardcode_direct=$hardcode_direct + +# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes +# DIR into the resulting binary and the resulting library dependency is +# "absolute",i.e impossible to change by setting \${shlibpath_var} if the +# library is relocated. +hardcode_direct_absolute=$hardcode_direct_absolute + +# Set to "yes" if using the -LDIR flag during linking hardcodes DIR +# into the resulting binary. +hardcode_minus_L=$hardcode_minus_L + +# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR +# into the resulting binary. +hardcode_shlibpath_var=$hardcode_shlibpath_var + +# Set to "yes" if building a shared library automatically hardcodes DIR +# into the library and all subsequent libraries and executables linked +# against it. +hardcode_automatic=$hardcode_automatic + +# Set to yes if linker adds runtime paths of dependent libraries +# to runtime path list. +inherit_rpath=$inherit_rpath + +# Whether libtool must link a program against all its dependency libraries. +link_all_deplibs=$link_all_deplibs + +# Set to "yes" if exported symbols are required. +always_export_symbols=$always_export_symbols + +# The commands to list exported symbols. +export_symbols_cmds=$lt_export_symbols_cmds + +# Symbols that should not be listed in the preloaded symbols. +exclude_expsyms=$lt_exclude_expsyms + +# Symbols that must always be exported. +include_expsyms=$lt_include_expsyms + +# Commands necessary for linking programs (against libraries) with templates. +prelink_cmds=$lt_prelink_cmds + +# Commands necessary for finishing linking programs. +postlink_cmds=$lt_postlink_cmds + +# Specify filename containing input files. +file_list_spec=$lt_file_list_spec + +# How to hardcode a shared library path into an executable. +hardcode_action=$hardcode_action + +# ### END LIBTOOL CONFIG + +_LT_EOF + + case $host_os in + aix3*) + cat <<\_LT_EOF >> "$cfgfile" +# AIX sometimes has problems with the GCC collect2 program. For some +# reason, if we set the COLLECT_NAMES environment variable, the problems +# vanish in a puff of smoke. +if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES +fi +_LT_EOF + ;; + esac + + +ltmain="$ac_aux_dir/ltmain.sh" + + + # We use sed instead of cat because bash on DJGPP gets confused if + # if finds mixed CR/LF and LF-only lines. Since sed operates in + # text mode, it properly converts lines to CR/LF. This bash problem + # is reportedly fixed, but why not run on old versions too? + sed '$q' "$ltmain" >> "$cfgfile" \ + || (rm -f "$cfgfile"; exit 1) + + if test x"$xsi_shell" = xyes; then + sed -e '/^func_dirname ()$/,/^} # func_dirname /c\ +func_dirname ()\ +{\ +\ case ${1} in\ +\ */*) func_dirname_result="${1%/*}${2}" ;;\ +\ * ) func_dirname_result="${3}" ;;\ +\ esac\ +} # Extended-shell func_dirname implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_basename ()$/,/^} # func_basename /c\ +func_basename ()\ +{\ +\ func_basename_result="${1##*/}"\ +} # Extended-shell func_basename implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_dirname_and_basename ()$/,/^} # func_dirname_and_basename /c\ +func_dirname_and_basename ()\ +{\ +\ case ${1} in\ +\ */*) func_dirname_result="${1%/*}${2}" ;;\ +\ * ) func_dirname_result="${3}" ;;\ +\ esac\ +\ func_basename_result="${1##*/}"\ +} # Extended-shell func_dirname_and_basename implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_stripname ()$/,/^} # func_stripname /c\ +func_stripname ()\ +{\ +\ # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are\ +\ # positional parameters, so assign one to ordinary parameter first.\ +\ func_stripname_result=${3}\ +\ func_stripname_result=${func_stripname_result#"${1}"}\ +\ func_stripname_result=${func_stripname_result%"${2}"}\ +} # Extended-shell func_stripname implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_split_long_opt ()$/,/^} # func_split_long_opt /c\ +func_split_long_opt ()\ +{\ +\ func_split_long_opt_name=${1%%=*}\ +\ func_split_long_opt_arg=${1#*=}\ +} # Extended-shell func_split_long_opt implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_split_short_opt ()$/,/^} # func_split_short_opt /c\ +func_split_short_opt ()\ +{\ +\ func_split_short_opt_arg=${1#??}\ +\ func_split_short_opt_name=${1%"$func_split_short_opt_arg"}\ +} # Extended-shell func_split_short_opt implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_lo2o ()$/,/^} # func_lo2o /c\ +func_lo2o ()\ +{\ +\ case ${1} in\ +\ *.lo) func_lo2o_result=${1%.lo}.${objext} ;;\ +\ *) func_lo2o_result=${1} ;;\ +\ esac\ +} # Extended-shell func_lo2o implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_xform ()$/,/^} # func_xform /c\ +func_xform ()\ +{\ + func_xform_result=${1%.*}.lo\ +} # Extended-shell func_xform implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_arith ()$/,/^} # func_arith /c\ +func_arith ()\ +{\ + func_arith_result=$(( $* ))\ +} # Extended-shell func_arith implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_len ()$/,/^} # func_len /c\ +func_len ()\ +{\ + func_len_result=${#1}\ +} # Extended-shell func_len implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + +fi + +if test x"$lt_shell_append" = xyes; then + sed -e '/^func_append ()$/,/^} # func_append /c\ +func_append ()\ +{\ + eval "${1}+=\\${2}"\ +} # Extended-shell func_append implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + sed -e '/^func_append_quoted ()$/,/^} # func_append_quoted /c\ +func_append_quoted ()\ +{\ +\ func_quote_for_eval "${2}"\ +\ eval "${1}+=\\\\ \\$func_quote_for_eval_result"\ +} # Extended-shell func_append_quoted implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: + + + # Save a `func_append' function call where possible by direct use of '+=' + sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") + test 0 -eq $? || _lt_function_replace_fail=: +else + # Save a `func_append' function call even when '+=' is not available + sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") + test 0 -eq $? || _lt_function_replace_fail=: +fi + +if test x"$_lt_function_replace_fail" = x":"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Unable to substitute extended shell functions in $ofile" >&5 +$as_echo "$as_me: WARNING: Unable to substitute extended shell functions in $ofile" >&2;} +fi + + + mv -f "$cfgfile" "$ofile" || + (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") + chmod +x "$ofile" + + + cat <<_LT_EOF >> "$ofile" + +# ### BEGIN LIBTOOL TAG CONFIG: F77 + +# The linker used to build libraries. +LD=$lt_LD_F77 + +# How to create reloadable object files. +reload_flag=$lt_reload_flag_F77 +reload_cmds=$lt_reload_cmds_F77 + +# Commands used to build an old-style archive. +old_archive_cmds=$lt_old_archive_cmds_F77 + +# A language specific compiler. +CC=$lt_compiler_F77 + +# Is the compiler the GNU compiler? +with_gcc=$GCC_F77 + +# Compiler flag to turn off builtin functions. +no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_F77 + +# Additional compiler flags for building library objects. +pic_flag=$lt_lt_prog_compiler_pic_F77 + +# How to pass a linker flag through the compiler. +wl=$lt_lt_prog_compiler_wl_F77 + +# Compiler flag to prevent dynamic linking. +link_static_flag=$lt_lt_prog_compiler_static_F77 + +# Does compiler simultaneously support -c and -o options? +compiler_c_o=$lt_lt_cv_prog_compiler_c_o_F77 + +# Whether or not to add -lc for building shared libraries. +build_libtool_need_lc=$archive_cmds_need_lc_F77 + +# Whether or not to disallow shared libs when runtime libs are static. +allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_F77 + +# Compiler flag to allow reflexive dlopens. +export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_F77 + +# Compiler flag to generate shared objects directly from archives. +whole_archive_flag_spec=$lt_whole_archive_flag_spec_F77 + +# Whether the compiler copes with passing no objects directly. +compiler_needs_object=$lt_compiler_needs_object_F77 + +# Create an old-style archive from a shared archive. +old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_F77 + +# Create a temporary old-style archive to link instead of a shared archive. +old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_F77 + +# Commands used to build a shared archive. +archive_cmds=$lt_archive_cmds_F77 +archive_expsym_cmds=$lt_archive_expsym_cmds_F77 + +# Commands used to build a loadable module if different from building +# a shared archive. +module_cmds=$lt_module_cmds_F77 +module_expsym_cmds=$lt_module_expsym_cmds_F77 + +# Whether we are building with GNU ld or not. +with_gnu_ld=$lt_with_gnu_ld_F77 + +# Flag that allows shared libraries with undefined symbols to be built. +allow_undefined_flag=$lt_allow_undefined_flag_F77 + +# Flag that enforces no undefined symbols. +no_undefined_flag=$lt_no_undefined_flag_F77 + +# Flag to hardcode \$libdir into a binary during linking. +# This must work even if \$libdir does not exist +hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_F77 + +# Whether we need a single "-rpath" flag with a separated argument. +hardcode_libdir_separator=$lt_hardcode_libdir_separator_F77 + +# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes +# DIR into the resulting binary. +hardcode_direct=$hardcode_direct_F77 + +# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes +# DIR into the resulting binary and the resulting library dependency is +# "absolute",i.e impossible to change by setting \${shlibpath_var} if the +# library is relocated. +hardcode_direct_absolute=$hardcode_direct_absolute_F77 + +# Set to "yes" if using the -LDIR flag during linking hardcodes DIR +# into the resulting binary. +hardcode_minus_L=$hardcode_minus_L_F77 + +# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR +# into the resulting binary. +hardcode_shlibpath_var=$hardcode_shlibpath_var_F77 + +# Set to "yes" if building a shared library automatically hardcodes DIR +# into the library and all subsequent libraries and executables linked +# against it. +hardcode_automatic=$hardcode_automatic_F77 + +# Set to yes if linker adds runtime paths of dependent libraries +# to runtime path list. +inherit_rpath=$inherit_rpath_F77 + +# Whether libtool must link a program against all its dependency libraries. +link_all_deplibs=$link_all_deplibs_F77 + +# Set to "yes" if exported symbols are required. +always_export_symbols=$always_export_symbols_F77 + +# The commands to list exported symbols. +export_symbols_cmds=$lt_export_symbols_cmds_F77 + +# Symbols that should not be listed in the preloaded symbols. +exclude_expsyms=$lt_exclude_expsyms_F77 + +# Symbols that must always be exported. +include_expsyms=$lt_include_expsyms_F77 + +# Commands necessary for linking programs (against libraries) with templates. +prelink_cmds=$lt_prelink_cmds_F77 + +# Commands necessary for finishing linking programs. +postlink_cmds=$lt_postlink_cmds_F77 + +# Specify filename containing input files. +file_list_spec=$lt_file_list_spec_F77 + +# How to hardcode a shared library path into an executable. +hardcode_action=$hardcode_action_F77 + +# ### END LIBTOOL TAG CONFIG: F77 +_LT_EOF + + ;; + + esac +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/configure.ac --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/configure.ac Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,616 @@ +dnl Process this file with autoconf to produce a configure script. + +dnl Define the fftw version number as M4 macros, so that we can enforce +dnl the invariant that the minor version number in FFTW-X.Y.MINOR is the same +dnl as the revision number in SHARED_VERSION_INFO. +define(FFTW_MAJOR_VERSION, 3.3)dnl +define(FFTW_MINOR_VERSION, 4)dnl + +dnl Version number of the FFTW source package. +AC_INIT(fftw, FFTW_MAJOR_VERSION.FFTW_MINOR_VERSION, fftw@fftw.org) +AC_CONFIG_SRCDIR(kernel/ifftw.h) + +dnl Version number for libtool shared libraries. Libtool wants a string +dnl of the form CURRENT:REVISION:AGE. We adopt the convention that +dnl REVISION is the same as the FFTW minor version number. +dnl fftw-3.1.x was 4:x:1 +dnl fftw-3.2.x was 5:x:2 +dnl fftw-3.3.x was 6:x:3 for x < 4 and 7:x:4 for x >= 4 +SHARED_VERSION_INFO="7:FFTW_MINOR_VERSION:4" # CURRENT:REVISION:AGE + +AM_INIT_AUTOMAKE(1.7) +AM_CONFIG_HEADER(config.h) +AC_CONFIG_MACRO_DIR([m4]) +AM_MAINTAINER_MODE +AC_SUBST(SHARED_VERSION_INFO) +AC_DISABLE_SHARED dnl to hell with shared libraries +AC_CANONICAL_HOST + +dnl configure options +case "${host_cpu}" in + powerpc*) have_fma=yes;; + ia64*) have_fma=yes;; + hppa*) have_fma=yes;; + mips64*) have_fma=yes;; + *) have_fma=no;; +esac + +AC_ARG_ENABLE(fma, [AC_HELP_STRING([--enable-fma],[enable optimizations for machines with fused multiply-add])], have_fma=$enableval) +if test "$have_fma"x = "yes"x; then + AC_DEFINE(HAVE_FMA,1,[Define if you have a machine with fused multiply-add]) +fi + + +AC_ARG_ENABLE(debug, [AC_HELP_STRING([--enable-debug],[compile fftw with extra runtime checks for debugging])], ok=$enableval, ok=no) +if test "$ok" = "yes"; then + AC_DEFINE(FFTW_DEBUG,1,[Define to enable extra FFTW debugging code.]) + debug_malloc=yes +else + debug_malloc=no +fi + +AC_ARG_ENABLE(debug-malloc, [AC_HELP_STRING([--enable-debug-malloc],[enable malloc debugging version])], ok=$enableval, ok=$debug_malloc) +if test "$ok" = "yes"; then + AC_DEFINE(FFTW_DEBUG_MALLOC,1,[Define to enable debugging malloc.]) +fi + +AC_ARG_ENABLE(debug-alignment, [AC_HELP_STRING([--enable-debug-alignment],[enable alignment debugging hacks])], ok=$enableval, ok=no) +if test "$ok" = "yes"; then + AC_DEFINE(FFTW_DEBUG_ALIGNMENT,1,[Define to enable alignment debugging hacks.]) +fi + +AC_ARG_ENABLE(random-estimator, [AC_HELP_STRING([--enable-random-estimator],[enable pseudorandom estimator (debugging hack)])], ok=$enableval, ok=no) +if test "$ok" = "yes"; then + AC_DEFINE(FFTW_RANDOM_ESTIMATOR,1,[Define to enable pseudorandom estimate planning for debugging.]) + CHECK_PL_OPTS="--estimate" +fi + +AC_ARG_ENABLE(alloca, [AC_HELP_STRING([--disable-alloca],[disable use of the alloca() function (may be broken on mingw64)])], ok=$enableval, ok=yes) +if test "$ok" = "yes"; then + AC_DEFINE(FFTW_ENABLE_ALLOCA,1,[Define to enable the use of alloca().]) +fi + +AC_ARG_ENABLE(single, [AC_HELP_STRING([--enable-single],[compile fftw in single precision])], ok=$enableval, ok=no) +AC_ARG_ENABLE(float, [AC_HELP_STRING([--enable-float],[synonym for --enable-single])], ok=$enableval) +if test "$ok" = "yes"; then + AC_DEFINE(FFTW_SINGLE,1,[Define to compile in single precision.]) + AC_DEFINE(BENCHFFT_SINGLE,1,[Define to compile in single precision.]) + PRECISION=s +else + PRECISION=d +fi +AM_CONDITIONAL(SINGLE, test "$ok" = "yes") + +AC_ARG_ENABLE(long-double, [AC_HELP_STRING([--enable-long-double],[compile fftw in long-double precision])], ok=$enableval, ok=no) +if test "$ok" = "yes"; then + if test "$PRECISION" = "s"; then + AC_MSG_ERROR([--enable-single/--enable-long-double conflict]) + fi + AC_DEFINE(FFTW_LDOUBLE,1,[Define to compile in long-double precision.]) + AC_DEFINE(BENCHFFT_LDOUBLE,1,[Define to compile in long-double precision.]) + PRECISION=l +fi +AM_CONDITIONAL(LDOUBLE, test "$ok" = "yes") + +AC_ARG_ENABLE(quad-precision, [AC_HELP_STRING([--enable-quad-precision],[compile fftw in quadruple precision if available])], ok=$enableval, ok=no) +if test "$ok" = "yes"; then + if test "$PRECISION" != "d"; then + AC_MSG_ERROR([conflicting precisions specified]) + fi + AC_DEFINE(FFTW_QUAD,1,[Define to compile in quad precision.]) + AC_DEFINE(BENCHFFT_QUAD,1,[Define to compile in quad precision.]) + PRECISION=q +fi +AM_CONDITIONAL(QUAD, test "$ok" = "yes") + +AC_SUBST(PRECISION) +AC_SUBST(CHECK_PL_OPTS) + +AC_ARG_ENABLE(sse, [AC_HELP_STRING([--enable-sse],[enable SSE optimizations])], have_sse=$enableval, have_sse=no) +if test "$have_sse" = "yes"; then + if test "$PRECISION" != "s"; then + AC_MSG_ERROR([SSE requires single precision]) + fi +fi + +AC_ARG_ENABLE(sse2, [AC_HELP_STRING([--enable-sse2],[enable SSE/SSE2 optimizations])], have_sse2=$enableval, have_sse2=no) +if test "$have_sse" = "yes"; then have_sse2=yes; fi +if test "$have_sse2" = "yes"; then + AC_DEFINE(HAVE_SSE2,1,[Define to enable SSE/SSE2 optimizations.]) + if test "$PRECISION" != "d" -a "$PRECISION" != "s"; then + AC_MSG_ERROR([SSE2 requires single or double precision]) + fi +fi +AM_CONDITIONAL(HAVE_SSE2, test "$have_sse2" = "yes") + +AC_ARG_ENABLE(avx, [AC_HELP_STRING([--enable-avx],[enable AVX optimizations])], have_avx=$enableval, have_avx=no) +if test "$have_avx" = "yes"; then + AC_DEFINE(HAVE_AVX,1,[Define to enable AVX optimizations.]) + if test "$PRECISION" != "d" -a "$PRECISION" != "s"; then + AC_MSG_ERROR([AVX requires single or double precision]) + fi +fi +AM_CONDITIONAL(HAVE_AVX, test "$have_avx" = "yes") + +AC_ARG_ENABLE(altivec, [AC_HELP_STRING([--enable-altivec],[enable Altivec optimizations])], have_altivec=$enableval, have_altivec=no) +if test "$have_altivec" = "yes"; then + AC_DEFINE(HAVE_ALTIVEC,1,[Define to enable Altivec optimizations.]) + if test "$PRECISION" != "s"; then + AC_MSG_ERROR([Altivec requires single precision]) + fi +fi +AM_CONDITIONAL(HAVE_ALTIVEC, test "$have_altivec" = "yes") + +AC_ARG_ENABLE(neon, [AC_HELP_STRING([--enable-neon],[enable ARM NEON optimizations])], have_neon=$enableval, have_neon=no) +if test "$have_neon" = "yes"; then + AC_DEFINE(HAVE_NEON,1,[Define to enable ARM NEON optimizations.]) + if test "$PRECISION" != "s"; then + AC_MSG_ERROR([NEON requires single precision]) + fi +fi +AM_CONDITIONAL(HAVE_NEON, test "$have_neon" = "yes") + +dnl FIXME: +dnl AC_ARG_ENABLE(mips-ps, [AC_HELP_STRING([--enable-mips-ps],[enable MIPS pair-single optimizations])], have_mips_ps=$enableval, have_mips_ps=no) +dnl if test "$have_mips_ps" = "yes"; then +dnl AC_DEFINE(HAVE_MIPS_PS,1,[Define to enable MIPS paired-single optimizations.]) +dnl if test "$PRECISION" != "s"; then +dnl AC_MSG_ERROR([MIPS paired-single requires single precision]) +dnl fi +dnl fi +dnl AM_CONDITIONAL(HAVE_MIPS_PS, test "$have_mips_ps" = "yes") + +AC_ARG_WITH(slow-timer, [AC_HELP_STRING([--with-slow-timer],[use low-precision timers (SLOW)])], with_slow_timer=$withval, with_slow_timer=no) +if test "$with_slow_timer" = "yes"; then + AC_DEFINE(WITH_SLOW_TIMER,1,[Use low-precision timers, making planner very slow]) +fi + +AC_ARG_ENABLE(mips_zbus_timer, [AC_HELP_STRING([--enable-mips-zbus-timer],[use MIPS ZBus cycle-counter])], have_mips_zbus_timer=$enableval, have_mips_zbus_timer=no) +if test "$have_mips_zbus_timer" = "yes"; then + AC_DEFINE(HAVE_MIPS_ZBUS_TIMER,1,[Define to enable use of MIPS ZBus cycle-counter.]) +fi + +AC_ARG_WITH(our-malloc, [AC_HELP_STRING([--with-our-malloc],[use our aligned malloc (helpful for Win32)])], with_our_malloc=$withval, with_our_malloc=no) +AC_ARG_WITH(our-malloc16, [AC_HELP_STRING([--with-our-malloc16],[Obsolete alias for --with-our-malloc16])], with_our_malloc=$withval) +if test "$with_our_malloc" = "yes"; then + AC_DEFINE(WITH_OUR_MALLOC,1,[Use our own aligned malloc routine; mainly helpful for Windows systems lacking aligned allocation system-library routines.]) +fi + +AC_ARG_WITH(windows-f77-mangling, [AC_HELP_STRING([--with-windows-f77-mangling],[use common Win32 Fortran interface styles])], with_windows_f77_mangling=$withval, with_windows_f77_mangling=no) +if test "$with_windows_f77_mangling" = "yes"; then + AC_DEFINE(WINDOWS_F77_MANGLING,1,[Use common Windows Fortran mangling styles for the Fortran interfaces.]) +fi + +AC_ARG_WITH(incoming-stack-boundary, [AC_HELP_STRING([--with-incoming-stack-boundary=X],[Assume that stack is aligned to (1<]) + CC=$save_CC + if test 0 = $ac_cv_sizeof_MPI_Fint; then + AC_MSG_WARN([sizeof(MPI_Fint) test failed]); + dnl As a backup, assume Fortran integer == C int + AC_CHECK_SIZEOF(int) + if test 0 = $ac_cv_sizeof_int; then AC_MSG_ERROR([sizeof(int) test failed]); fi + ac_cv_sizeof_MPI_Fint=$ac_cv_sizeof_int + fi + C_MPI_FINT=C_INT`expr $ac_cv_sizeof_MPI_Fint \* 8`_T + AC_SUBST(C_MPI_FINT) +fi +AM_CONDITIONAL(MPI, test "$enable_mpi" = "yes") + +dnl ----------------------------------------------------------------------- + +dnl determine CFLAGS first +AX_CC_MAXOPT + +case "${ax_cv_c_compiler_vendor}" in + intel) # Stop icc from defining __GNUC__, except on MacOS where this fails + case "${host_os}" in + *darwin*) ;; # icc -no-gcc fails to compile some system headers + *) + AX_CHECK_COMPILER_FLAGS([-no-gcc], [CC="$CC -no-gcc"]) + ;; + esac + ;; + + hp) # must (sometimes) manually increase cpp limits to handle fftw3.h + AX_CHECK_COMPILER_FLAGS([-Wp,-H128000], + [CC="$CC -Wp,-H128000"]) + ;; + + portland) # -Masmkeyword required for asm("") cycle counters + AX_CHECK_COMPILER_FLAGS([-Masmkeyword], + [CC="$CC -Masmkeyword"]) + ;; +esac + +dnl Determine SIMD CFLAGS at least for gcc and icc +case "${ax_cv_c_compiler_vendor}" in + gnu|intel) + # SSE/SSE2 + if test "$have_sse2" = "yes" -a "x$SSE2_CFLAGS" = x; then + if test "$PRECISION" = d; then flag=msse2; else flag=msse; fi + AX_CHECK_COMPILER_FLAGS(-$flag, [SSE2_CFLAGS="-$flag"], + [AC_MSG_ERROR([Need a version of gcc with -$flag])]) + fi + + # AVX + if test "$have_avx" = "yes" -a "x$AVX_CFLAGS" = x; then + AX_CHECK_COMPILER_FLAGS(-mavx, [AVX_CFLAGS="-mavx"], + [AC_MSG_ERROR([Need a version of gcc with -mavx])]) + fi + + if test "$have_altivec" = "yes" -a "x$ALTIVEC_CFLAGS" = x; then + # -DFAKE__VEC__ is a workaround because gcc-3.3 does not + # #define __VEC__ with -maltivec. + AX_CHECK_COMPILER_FLAGS(-faltivec, [ALTIVEC_CFLAGS="-faltivec"], + [AX_CHECK_COMPILER_FLAGS(-maltivec -mabi=altivec, + [ALTIVEC_CFLAGS="-maltivec -mabi=altivec -DFAKE__VEC__"], + [AX_CHECK_COMPILER_FLAGS(-fvec, [ALTIVEC_CFLAGS="-fvec"], + [AC_MSG_ERROR([Need a version of gcc with -maltivec])])])]) + fi + + if test "$have_neon" = "yes" -a "x$NEON_CFLAGS" = x; then + AX_CHECK_COMPILER_FLAGS(-mfpu=neon, [NEON_CFLAGS="-mfpu=neon"], + [AC_MSG_ERROR([Need a version of gcc with -mfpu=neon])]) + fi + + dnl FIXME: + dnl elif test "$have_mips_ps" = "yes"; then + dnl # Just punt here and use only new 4.2 compiler :( + dnl # Should add section for older compilers... + dnl AX_CHECK_COMPILER_FLAGS(-mpaired-single, + dnl [SIMD_CFLAGS="-mpaired-single"], + dnl #[AC_MSG_ERROR([Need a version of gcc with -mpaired-single])]) + dnl [AX_CHECK_COMPILER_FLAGS(-march=mips64, + dnl [SIMD_CFLAGS="-march=mips64"], + dnl [AC_MSG_ERROR( + dnl [Need a version of gcc with -mpaired-single or -march=mips64]) + dnl ])]) + dnl fi + ;; +esac + +AC_SUBST(SSE2_CFLAGS) +AC_SUBST(AVX_CFLAGS) +AC_SUBST(ALTIVEC_CFLAGS) +AC_SUBST(NEON_CFLAGS) + +dnl add stack alignment CFLAGS if so requested +if test "$with_incoming_stack_boundary"x != "no"x; then + case "${ax_cv_c_compiler_vendor}" in + gnu) + tentative_flags="-mincoming-stack-boundary=$with_incoming_stack_boundary"; + AX_CHECK_COMPILER_FLAGS($tentative_flags, + [STACK_ALIGN_CFLAGS=$tentative_flags]) + ;; + esac +fi +AC_SUBST(STACK_ALIGN_CFLAGS) + +dnl Checks for header files. +AC_HEADER_STDC +AC_CHECK_HEADERS([libintl.h malloc.h stddef.h stdlib.h string.h strings.h sys/time.h unistd.h limits.h c_asm.h intrinsics.h stdint.h mach/mach_time.h sys/sysctl.h]) +dnl c_asm.h: Header file for enabling asm() on Digital Unix +dnl intrinsics.h: cray unicos +dnl sys/sysctl.h: MacOS X altivec detection + +dnl altivec.h requires $ALTIVEC_CFLAGS +save_CFLAGS="$CFLAGS" +save_CPPFLAGS="$CPPFLAGS" +CFLAGS="$CFLAGS $ALTIVEC_CFLAGS" +CPPFLAGS="$CPPFLAGS $ALTIVEC_CFLAGS" +AC_CHECK_HEADERS([altivec.h]) +CFLAGS="$save_CFLAGS" +CPPFLAGS="$save_CPPFLAGS" + + +dnl Checks for typedefs, structures, and compiler characteristics. +AC_C_CONST +AC_C_INLINE +AC_TYPE_SIZE_T +AC_HEADER_TIME +AC_CHECK_TYPE([long double], + [AC_DEFINE(HAVE_LONG_DOUBLE, 1, [Define to 1 if the compiler supports `long double'])], +[ +if test $PRECISION = l; then + AC_MSG_ERROR([long double is not a supported type with your compiler.]) +fi +]) +AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in ])],, +[ +#if HAVE_SYS_TIME_H +#include +#endif +]) + +AC_CHECK_SIZEOF(int) +AC_CHECK_SIZEOF(unsigned int) +AC_CHECK_SIZEOF(long) +AC_CHECK_SIZEOF(unsigned long) +AC_CHECK_SIZEOF(long long) +AC_CHECK_SIZEOF(unsigned long long) +AC_CHECK_SIZEOF(size_t) +AC_CHECK_SIZEOF(ptrdiff_t) + +AC_CHECK_TYPES(uintptr_t, [], [AC_CHECK_SIZEOF(void *)], [$ac_includes_default +#ifdef HAVE_STDINT_H +# include +#endif]) + +AC_CHECK_SIZEOF(float) +AC_CHECK_SIZEOF(double) + +dnl Check sizeof fftw_r2r_kind for Fortran interface [it has == sizeof(int) +dnl for years, but being paranoid]. Note: the definition here must match +dnl the one in api/fftw3.h! +AC_CHECK_SIZEOF(fftw_r2r_kind, [], [typedef enum { + FFTW_R2HC=0, FFTW_HC2R=1, FFTW_DHT=2, + FFTW_REDFT00=3, FFTW_REDFT01=4, FFTW_REDFT10=5, FFTW_REDFT11=6, + FFTW_RODFT00=7, FFTW_RODFT01=8, FFTW_RODFT10=9, FFTW_RODFT11=10 +} fftw_r2r_kind;]) +if test 0 = $ac_cv_sizeof_fftw_r2r_kind; then AC_MSG_ERROR([sizeof(fftw_r2r_kind) test failed]); fi +C_FFTW_R2R_KIND=C_INT`expr $ac_cv_sizeof_fftw_r2r_kind \* 8`_T +AC_SUBST(C_FFTW_R2R_KIND) + +dnl Checks for library functions. +AC_FUNC_ALLOCA +AC_FUNC_STRTOD +AC_FUNC_VPRINTF +AC_CHECK_LIB(m, sin) + +if test $PRECISION = q; then + AX_GCC_VERSION(4,6,0,[],[AC_MSG_ERROR([gcc 4.6 or later required for quad precision support])]) + AC_CHECK_LIB(quadmath, sinq, [], [AC_MSG_ERROR([quad precision requires libquadmath for quad-precision trigonometric routines])]) + LIBQUADMATH=-lquadmath +fi +AC_SUBST(LIBQUADMATH) + +AC_CHECK_FUNCS([BSDgettimeofday gettimeofday gethrtime read_real_time time_base_to_time drand48 sqrt memset posix_memalign memalign _mm_malloc _mm_free clock_gettime mach_absolute_time sysctl abort sinl cosl snprintf]) +AC_CHECK_DECLS([sinl, cosl, sinq, cosq],,,[#include ]) +AC_CHECK_DECLS([memalign],,,[ +#ifdef HAVE_MALLOC_H +#include +#endif]) +AC_CHECK_DECLS([drand48, srand48, posix_memalign]) dnl in stdlib.h + +dnl Cray UNICOS _rtc() (real-time clock) intrinsic +AC_MSG_CHECKING([for _rtc intrinsic]) +rtc_ok=yes +AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H +#include +#endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no]) +AC_MSG_RESULT($rtc_ok) + +if test "$PRECISION" = "l"; then + AC_CHECK_FUNCS([cosl sinl tanl], [], [AC_MSG_ERROR([long-double precision requires long-double trigonometric routines])]) +fi + +AC_MSG_CHECKING([for isnan]) +AC_TRY_LINK([#include +], if (!isnan(3.14159)) isnan(2.7183);, ok=yes, ok=no) +if test "$ok" = "yes"; then + AC_DEFINE(HAVE_ISNAN,1,[Define if the isnan() function/macro is available.]) +fi +AC_MSG_RESULT(${ok}) + +dnl TODO +AX_GCC_ALIGNS_STACK() + +dnl override CFLAGS selection when debugging +if test "${enable_debug}" = "yes"; then + CFLAGS="-g" +fi + +dnl add gcc warnings, in debug/maintainer mode only +if test "$enable_debug" = yes || test "$USE_MAINTAINER_MODE" = yes; then +if test "$ac_test_CFLAGS" != "set"; then + if test $ac_cv_prog_gcc = yes; then + CFLAGS="$CFLAGS -Wall -W -Wcast-qual -Wpointer-arith -Wcast-align -pedantic -Wno-long-long -Wshadow -Wbad-function-cast -Wwrite-strings -Wstrict-prototypes -Wredundant-decls -Wnested-externs" # -Wundef -Wconversion -Wmissing-prototypes -Wmissing-declarations + fi +fi +fi + +dnl ----------------------------------------------------------------------- + +AC_ARG_ENABLE(fortran, [AC_HELP_STRING([--disable-fortran],[don't include Fortran-callable wrappers])], enable_fortran=$enableval, enable_fortran=yes) + +if test "$enable_fortran" = "yes"; then + AC_PROG_F77 + if test -z "$F77"; then + enable_fortran=no + AC_MSG_WARN([*** Couldn't find f77 compiler; using default Fortran wrappers.]) + else + AC_F77_DUMMY_MAIN([], [enable_fortran=no + AC_MSG_WARN([*** Couldn't figure out how to link C and Fortran; using default Fortran wrappers.])]) + fi +else + AC_DEFINE([DISABLE_FORTRAN], 1, [Define to disable Fortran wrappers.]) +fi + +if test "x$enable_fortran" = xyes; then + AC_F77_WRAPPERS + AC_F77_FUNC(f77foo) + AC_F77_FUNC(f77_foo) + f77_foo2=`echo $f77foo | sed 's/77/77_/'` + if test "$f77_foo" = "$f77_foo2"; then + AC_DEFINE(F77_FUNC_EQUIV, 1, [Define if F77_FUNC and F77_FUNC_ are equivalent.]) + + # Include g77 wrappers by default for GNU systems or gfortran + with_g77_wrappers=$ac_cv_f77_compiler_gnu + case $host_os in *gnu*) with_g77_wrappers=yes ;; esac + fi +else + with_g77_wrappers=no +fi + +AC_ARG_WITH(g77-wrappers, [AC_HELP_STRING([--with-g77-wrappers],[force inclusion of g77-compatible wrappers in addition to any other Fortran compiler that is detected])], with_g77_wrappers=$withval) +if test "x$with_g77_wrappers" = "xyes"; then + AC_DEFINE(WITH_G77_WRAPPERS,1,[Include g77-compatible wrappers in addition to any other Fortran wrappers.]) +fi + +dnl ----------------------------------------------------------------------- +have_smp="no" +AC_ARG_ENABLE(openmp, [AC_HELP_STRING([--enable-openmp],[use OpenMP directives for parallelism])], enable_openmp=$enableval, enable_openmp=no) + +if test "$enable_openmp" = "yes"; then + AC_DEFINE(HAVE_OPENMP,1,[Define to enable OpenMP]) + AX_OPENMP([], [AC_MSG_ERROR([don't know how to enable OpenMP])]) +fi + +AC_ARG_ENABLE(threads, [AC_HELP_STRING([--enable-threads],[compile FFTW SMP threads library])], enable_threads=$enableval, enable_threads=no) + +if test "$enable_threads" = "yes"; then + AC_DEFINE(HAVE_THREADS,1,[Define to enable SMP threads]) +fi + +AC_ARG_WITH(combined-threads, [AC_HELP_STRING([--with-combined-threads],[combine threads into main libfftw3])], with_combined_threads=$withval, with_combined_threads=no) + +if test "$with_combined_threads" = yes; then + if test "$enable_openmp" = "yes"; then + AC_MSG_ERROR([--with-combined-threads incompatible with --enable-openmp]) + fi + if test "$enable_threads" != "yes"; then + AC_MSG_ERROR([--with-combined-threads requires --enable-threads]) + fi +fi + +dnl Check for threads library... +THREADLIBS="" +if test "$enable_threads" = "yes"; then + # Win32 threads are the default on Windows: + if test -z "$THREADLIBS"; then + AC_MSG_CHECKING([for Win32 threads]) + AC_TRY_LINK([#include ], + [_beginthreadex(0,0,0,0,0,0);], + [THREADLIBS=" "; AC_MSG_RESULT(yes)], + [AC_MSG_RESULT(no)]) + fi + + # POSIX threads, the default choice everywhere else: + if test -z "$THREADLIBS"; then + ACX_PTHREAD([THREADLIBS="$PTHREAD_LIBS " + CC="$PTHREAD_CC" + AC_DEFINE(USING_POSIX_THREADS, 1, [Define if we have and are using POSIX threads.])]) + fi + + if test -z "$THREADLIBS"; then + AC_MSG_ERROR([couldn't find threads library for --enable-threads]) + fi + AC_DEFINE(HAVE_THREADS, 1, [Define if we have a threads library.]) +fi +AC_SUBST(THREADLIBS) +AM_CONDITIONAL(THREADS, test "$enable_threads" = "yes") +AM_CONDITIONAL(OPENMP, test "$enable_openmp" = "yes") +AM_CONDITIONAL(SMP, test "$enable_threads" = "yes" -o "$enable_openmp" = "yes") +AM_CONDITIONAL(COMBINED_THREADS, test x"$with_combined_threads" = xyes) + +dnl ----------------------------------------------------------------------- + +AC_MSG_CHECKING([whether a cycle counter is available]) +save_CPPFLAGS=$CPPFLAGS +CPPFLAGS="$CPPFLAGS -I$srcdir/kernel" +AC_TRY_CPP([#include "cycle.h" +#ifndef HAVE_TICK_COUNTER +# error No cycle counter +#endif], [ok=yes], [ok=no]) +CPPFLAGS=$save_CPPFLAGS +AC_MSG_RESULT($ok) +if test $ok = no && test "x$with_slow_timer" = xno; then + echo "***************************************************************" + echo "WARNING: No cycle counter found. FFTW will use ESTIMATE mode " + echo " for all plans. See the manual for more information." + echo "***************************************************************" +fi + +dnl ----------------------------------------------------------------------- + +AC_DEFINE_UNQUOTED(FFTW_CC, "$CC $CFLAGS", [C compiler name and flags]) + +AC_CONFIG_FILES([ + Makefile + support/Makefile + genfft/Makefile + kernel/Makefile + simd-support/Makefile + + dft/Makefile + dft/scalar/Makefile + dft/scalar/codelets/Makefile + dft/simd/Makefile + dft/simd/common/Makefile + dft/simd/sse2/Makefile + dft/simd/avx/Makefile + dft/simd/altivec/Makefile + dft/simd/neon/Makefile + + rdft/Makefile + rdft/scalar/Makefile + rdft/scalar/r2cf/Makefile + rdft/scalar/r2cb/Makefile + rdft/scalar/r2r/Makefile + rdft/simd/Makefile + rdft/simd/common/Makefile + rdft/simd/sse2/Makefile + rdft/simd/avx/Makefile + rdft/simd/altivec/Makefile + rdft/simd/neon/Makefile + + reodft/Makefile + + threads/Makefile + + api/Makefile + + mpi/Makefile + + libbench2/Makefile + tests/Makefile + doc/Makefile + doc/FAQ/Makefile + + tools/Makefile + tools/fftw_wisdom.1 + tools/fftw-wisdom-to-conf + + m4/Makefile + + fftw.pc +]) + +AC_OUTPUT diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/depcomp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/depcomp Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,791 @@ +#! /bin/sh +# depcomp - compile a program generating dependencies as side-effects + +scriptversion=2013-05-30.07; # UTC + +# Copyright (C) 1999-2013 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Originally written by Alexandre Oliva . + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: depcomp [--help] [--version] PROGRAM [ARGS] + +Run PROGRAMS ARGS to compile a file, generating dependencies +as side-effects. + +Environment variables: + depmode Dependency tracking mode. + source Source file read by 'PROGRAMS ARGS'. + object Object file output by 'PROGRAMS ARGS'. + DEPDIR directory where to store dependencies. + depfile Dependency file to output. + tmpdepfile Temporary file to use when outputting dependencies. + libtool Whether libtool is used (yes/no). + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "depcomp $scriptversion" + exit $? + ;; +esac + +# Get the directory component of the given path, and save it in the +# global variables '$dir'. Note that this directory component will +# be either empty or ending with a '/' character. This is deliberate. +set_dir_from () +{ + case $1 in + */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;; + *) dir=;; + esac +} + +# Get the suffix-stripped basename of the given path, and save it the +# global variable '$base'. +set_base_from () +{ + base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'` +} + +# If no dependency file was actually created by the compiler invocation, +# we still have to create a dummy depfile, to avoid errors with the +# Makefile "include basename.Plo" scheme. +make_dummy_depfile () +{ + echo "#dummy" > "$depfile" +} + +# Factor out some common post-processing of the generated depfile. +# Requires the auxiliary global variable '$tmpdepfile' to be set. +aix_post_process_depfile () +{ + # If the compiler actually managed to produce a dependency file, + # post-process it. + if test -f "$tmpdepfile"; then + # Each line is of the form 'foo.o: dependency.h'. + # Do two passes, one to just change these to + # $object: dependency.h + # and one to simply output + # dependency.h: + # which is needed to avoid the deleted-header problem. + { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile" + sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile" + } > "$depfile" + rm -f "$tmpdepfile" + else + make_dummy_depfile + fi +} + +# A tabulation character. +tab=' ' +# A newline character. +nl=' +' +# Character ranges might be problematic outside the C locale. +# These definitions help. +upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ +lower=abcdefghijklmnopqrstuvwxyz +digits=0123456789 +alpha=${upper}${lower} + +if test -z "$depmode" || test -z "$source" || test -z "$object"; then + echo "depcomp: Variables source, object and depmode must be set" 1>&2 + exit 1 +fi + +# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. +depfile=${depfile-`echo "$object" | + sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} +tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} + +rm -f "$tmpdepfile" + +# Avoid interferences from the environment. +gccflag= dashmflag= + +# Some modes work just like other modes, but use different flags. We +# parameterize here, but still list the modes in the big case below, +# to make depend.m4 easier to write. Note that we *cannot* use a case +# here, because this file can only contain one case statement. +if test "$depmode" = hp; then + # HP compiler uses -M and no extra arg. + gccflag=-M + depmode=gcc +fi + +if test "$depmode" = dashXmstdout; then + # This is just like dashmstdout with a different argument. + dashmflag=-xM + depmode=dashmstdout +fi + +cygpath_u="cygpath -u -f -" +if test "$depmode" = msvcmsys; then + # This is just like msvisualcpp but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvisualcpp +fi + +if test "$depmode" = msvc7msys; then + # This is just like msvc7 but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvc7 +fi + +if test "$depmode" = xlc; then + # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information. + gccflag=-qmakedep=gcc,-MF + depmode=gcc +fi + +case "$depmode" in +gcc3) +## gcc 3 implements dependency tracking that does exactly what +## we want. Yay! Note: for some reason libtool 1.4 doesn't like +## it if -MD -MP comes after the -MF stuff. Hmm. +## Unfortunately, FreeBSD c89 acceptance of flags depends upon +## the command line argument order; so add the flags where they +## appear in depend2.am. Note that the slowdown incurred here +## affects only configure: in makefiles, %FASTDEP% shortcuts this. + for arg + do + case $arg in + -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; + *) set fnord "$@" "$arg" ;; + esac + shift # fnord + shift # $arg + done + "$@" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + mv "$tmpdepfile" "$depfile" + ;; + +gcc) +## Note that this doesn't just cater to obsosete pre-3.x GCC compilers. +## but also to in-use compilers like IMB xlc/xlC and the HP C compiler. +## (see the conditional assignment to $gccflag above). +## There are various ways to get dependency output from gcc. Here's +## why we pick this rather obscure method: +## - Don't want to use -MD because we'd like the dependencies to end +## up in a subdir. Having to rename by hand is ugly. +## (We might end up doing this anyway to support other compilers.) +## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like +## -MM, not -M (despite what the docs say). Also, it might not be +## supported by the other compilers which use the 'gcc' depmode. +## - Using -M directly means running the compiler twice (even worse +## than renaming). + if test -z "$gccflag"; then + gccflag=-MD, + fi + "$@" -Wp,"$gccflag$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The second -e expression handles DOS-style file names with drive + # letters. + sed -e 's/^[^:]*: / /' \ + -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" +## This next piece of magic avoids the "deleted header file" problem. +## The problem is that when a header file which appears in a .P file +## is deleted, the dependency causes make to die (because there is +## typically no way to rebuild the header). We avoid this by adding +## dummy dependencies for each header file. Too bad gcc doesn't do +## this for us directly. +## Some versions of gcc put a space before the ':'. On the theory +## that the space means something, we add a space to the output as +## well. hp depmode also adds that space, but also prefixes the VPATH +## to the object. Take care to not repeat it in the output. +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +sgi) + if test "$libtool" = yes; then + "$@" "-Wp,-MDupdate,$tmpdepfile" + else + "$@" -MDupdate "$tmpdepfile" + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + + if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files + echo "$object : \\" > "$depfile" + # Clip off the initial element (the dependent). Don't try to be + # clever and replace this with sed code, as IRIX sed won't handle + # lines with more than a fixed number of characters (4096 in + # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; + # the IRIX cc adds comments like '#:fec' to the end of the + # dependency line. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \ + | tr "$nl" ' ' >> "$depfile" + echo >> "$depfile" + # The second pass generates a dummy entry for each header file. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ + >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" + ;; + +xlc) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +aix) + # The C for AIX Compiler uses -M and outputs the dependencies + # in a .u file. In older versions, this file always lives in the + # current directory. Also, the AIX compiler puts '$object:' at the + # start of each line; $object doesn't have directory information. + # Version 6 uses the directory in both cases. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.u + tmpdepfile2=$base.u + tmpdepfile3=$dir.libs/$base.u + "$@" -Wc,-M + else + tmpdepfile1=$dir$base.u + tmpdepfile2=$dir$base.u + tmpdepfile3=$dir$base.u + "$@" -M + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + aix_post_process_depfile + ;; + +tcc) + # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26 + # FIXME: That version still under development at the moment of writing. + # Make that this statement remains true also for stable, released + # versions. + # It will wrap lines (doesn't matter whether long or short) with a + # trailing '\', as in: + # + # foo.o : \ + # foo.c \ + # foo.h \ + # + # It will put a trailing '\' even on the last line, and will use leading + # spaces rather than leading tabs (at least since its commit 0394caf7 + # "Emit spaces for -MD"). + "$@" -MD -MF "$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'. + # We have to change lines of the first kind to '$object: \'. + sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile" + # And for each line of the second kind, we have to emit a 'dep.h:' + # dummy dependency, to avoid the deleted-header problem. + sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile" + rm -f "$tmpdepfile" + ;; + +## The order of this option in the case statement is important, since the +## shell code in configure will try each of these formats in the order +## listed in this file. A plain '-MD' option would be understood by many +## compilers, so we must ensure this comes after the gcc and icc options. +pgcc) + # Portland's C compiler understands '-MD'. + # Will always output deps to 'file.d' where file is the root name of the + # source file under compilation, even if file resides in a subdirectory. + # The object file name does not affect the name of the '.d' file. + # pgcc 10.2 will output + # foo.o: sub/foo.c sub/foo.h + # and will wrap long lines using '\' : + # foo.o: sub/foo.c ... \ + # sub/foo.h ... \ + # ... + set_dir_from "$object" + # Use the source, not the object, to determine the base name, since + # that's sadly what pgcc will do too. + set_base_from "$source" + tmpdepfile=$base.d + + # For projects that build the same source file twice into different object + # files, the pgcc approach of using the *source* file root name can cause + # problems in parallel builds. Use a locking strategy to avoid stomping on + # the same $tmpdepfile. + lockdir=$base.d-lock + trap " + echo '$0: caught signal, cleaning up...' >&2 + rmdir '$lockdir' + exit 1 + " 1 2 13 15 + numtries=100 + i=$numtries + while test $i -gt 0; do + # mkdir is a portable test-and-set. + if mkdir "$lockdir" 2>/dev/null; then + # This process acquired the lock. + "$@" -MD + stat=$? + # Release the lock. + rmdir "$lockdir" + break + else + # If the lock is being held by a different process, wait + # until the winning process is done or we timeout. + while test -d "$lockdir" && test $i -gt 0; do + sleep 1 + i=`expr $i - 1` + done + fi + i=`expr $i - 1` + done + trap - 1 2 13 15 + if test $i -le 0; then + echo "$0: failed to acquire lock after $numtries attempts" >&2 + echo "$0: check lockdir '$lockdir'" >&2 + exit 1 + fi + + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each line is of the form `foo.o: dependent.h', + # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp2) + # The "hp" stanza above does not work with aCC (C++) and HP's ia64 + # compilers, which have integrated preprocessors. The correct option + # to use with these is +Maked; it writes dependencies to a file named + # 'foo.d', which lands next to the object file, wherever that + # happens to be. + # Much of this is similar to the tru64 case; see comments there. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir.libs/$base.d + "$@" -Wc,+Maked + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + "$@" +Maked + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" + do + test -f "$tmpdepfile" && break + done + if test -f "$tmpdepfile"; then + sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile" + # Add 'dependent.h:' lines. + sed -ne '2,${ + s/^ *// + s/ \\*$// + s/$/:/ + p + }' "$tmpdepfile" >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" "$tmpdepfile2" + ;; + +tru64) + # The Tru64 compiler uses -MD to generate dependencies as a side + # effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'. + # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put + # dependencies in 'foo.d' instead, so we check for that too. + # Subdirectories are respected. + set_dir_from "$object" + set_base_from "$object" + + if test "$libtool" = yes; then + # Libtool generates 2 separate objects for the 2 libraries. These + # two compilations output dependencies in $dir.libs/$base.o.d and + # in $dir$base.o.d. We have to check for both files, because + # one of the two compilations can be disabled. We should prefer + # $dir$base.o.d over $dir.libs/$base.o.d because the latter is + # automatically cleaned when .libs/ is deleted, while ignoring + # the former would cause a distcleancheck panic. + tmpdepfile1=$dir$base.o.d # libtool 1.5 + tmpdepfile2=$dir.libs/$base.o.d # Likewise. + tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504 + "$@" -Wc,-MD + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + tmpdepfile3=$dir$base.d + "$@" -MD + fi + + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + # Same post-processing that is required for AIX mode. + aix_post_process_depfile + ;; + +msvc7) + if test "$libtool" = yes; then + showIncludes=-Wc,-showIncludes + else + showIncludes=-showIncludes + fi + "$@" $showIncludes > "$tmpdepfile" + stat=$? + grep -v '^Note: including file: ' "$tmpdepfile" + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The first sed program below extracts the file names and escapes + # backslashes for cygpath. The second sed program outputs the file + # name when reading, but also accumulates all include files in the + # hold buffer in order to output them again at the end. This only + # works with sed implementations that can handle large buffers. + sed < "$tmpdepfile" -n ' +/^Note: including file: *\(.*\)/ { + s//\1/ + s/\\/\\\\/g + p +}' | $cygpath_u | sort -u | sed -n ' +s/ /\\ /g +s/\(.*\)/'"$tab"'\1 \\/p +s/.\(.*\) \\/\1:/ +H +$ { + s/.*/'"$tab"'/ + G + p +}' >> "$depfile" + echo >> "$depfile" # make sure the fragment doesn't end with a backslash + rm -f "$tmpdepfile" + ;; + +msvc7msys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +#nosideeffect) + # This comment above is used by automake to tell side-effect + # dependency tracking mechanisms from slower ones. + +dashmstdout) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + test -z "$dashmflag" && dashmflag=-M + # Require at least two characters before searching for ':' + # in the target name. This is to cope with DOS-style filenames: + # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise. + "$@" $dashmflag | + sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this sed invocation + # correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +dashXmstdout) + # This case only exists to satisfy depend.m4. It is never actually + # run, as this mode is specially recognized in the preamble. + exit 1 + ;; + +makedepend) + "$@" || exit $? + # Remove any Libtool call + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + # X makedepend + shift + cleared=no eat=no + for arg + do + case $cleared in + no) + set ""; shift + cleared=yes ;; + esac + if test $eat = yes; then + eat=no + continue + fi + case "$arg" in + -D*|-I*) + set fnord "$@" "$arg"; shift ;; + # Strip any option that makedepend may not understand. Remove + # the object too, otherwise makedepend will parse it as a source file. + -arch) + eat=yes ;; + -*|$object) + ;; + *) + set fnord "$@" "$arg"; shift ;; + esac + done + obj_suffix=`echo "$object" | sed 's/^.*\././'` + touch "$tmpdepfile" + ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" + rm -f "$depfile" + # makedepend may prepend the VPATH from the source file name to the object. + # No need to regex-escape $object, excess matching of '.' is harmless. + sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process the last invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed '1,2d' "$tmpdepfile" \ + | tr ' ' "$nl" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" "$tmpdepfile".bak + ;; + +cpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + "$@" -E \ + | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + | sed '$ s: \\$::' > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + cat < "$tmpdepfile" >> "$depfile" + sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvisualcpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + IFS=" " + for arg + do + case "$arg" in + -o) + shift + ;; + $object) + shift + ;; + "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") + set fnord "$@" + shift + shift + ;; + *) + set fnord "$@" "$arg" + shift + shift + ;; + esac + done + "$@" -E 2>/dev/null | + sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile" + echo "$tab" >> "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvcmsys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +none) + exec "$@" + ;; + +*) + echo "Unknown depmode $depmode" 1>&2 + exit 1 + ;; +esac + +exit 0 + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,13 @@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel +SUBDIRS = scalar simd + +noinst_LTLIBRARIES = libdft.la + +# pkgincludedir = $(includedir)/fftw3@PREC_SUFFIX@ +# pkginclude_HEADERS = codelet-dft.h dft.h + +libdft_la_SOURCES = bluestein.c buffered.c conf.c ct.c dftw-direct.c \ +dftw-directsq.c dftw-generic.c dftw-genericbuf.c direct.c generic.c \ +indirect.c indirect-transpose.c kdft-dif.c kdft-difsq.c kdft-dit.c \ +kdft.c nop.c plan.c problem.c rader.c rank-geq2.c solve.c vrank-geq1.c \ +zero.c codelet-dft.h ct.h dft.h diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,761 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = dft +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libdft_la_LIBADD = +am_libdft_la_OBJECTS = bluestein.lo buffered.lo conf.lo ct.lo \ + dftw-direct.lo dftw-directsq.lo dftw-generic.lo \ + dftw-genericbuf.lo direct.lo generic.lo indirect.lo \ + indirect-transpose.lo kdft-dif.lo kdft-difsq.lo kdft-dit.lo \ + kdft.lo nop.lo plan.lo problem.lo rader.lo rank-geq2.lo \ + solve.lo vrank-geq1.lo zero.lo +libdft_la_OBJECTS = $(am_libdft_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libdft_la_SOURCES) +DIST_SOURCES = $(libdft_la_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel +SUBDIRS = scalar simd +noinst_LTLIBRARIES = libdft.la + +# pkgincludedir = $(includedir)/fftw3@PREC_SUFFIX@ +# pkginclude_HEADERS = codelet-dft.h dft.h +libdft_la_SOURCES = bluestein.c buffered.c conf.c ct.c dftw-direct.c \ +dftw-directsq.c dftw-generic.c dftw-genericbuf.c direct.c generic.c \ +indirect.c indirect-transpose.c kdft-dif.c kdft-difsq.c kdft-dit.c \ +kdft.c nop.c plan.c problem.c rader.c rank-geq2.c solve.c vrank-geq1.c \ +zero.c codelet-dft.h ct.h dft.h + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu dft/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu dft/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libdft.la: $(libdft_la_OBJECTS) $(libdft_la_DEPENDENCIES) $(EXTRA_libdft_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(libdft_la_OBJECTS) $(libdft_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bluestein.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/buffered.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/conf.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ct.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dftw-direct.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dftw-directsq.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dftw-generic.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dftw-genericbuf.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/direct.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/generic.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/indirect-transpose.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/indirect.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kdft-dif.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kdft-difsq.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kdft-dit.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kdft.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nop.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/problem.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rader.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rank-geq2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/solve.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vrank-geq1.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/zero.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(LTLIBRARIES) +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool \ + clean-noinstLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/bluestein.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/bluestein.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "dft.h" + +typedef struct { + solver super; +} S; + +typedef struct { + plan_dft super; + INT n; /* problem size */ + INT nb; /* size of convolution */ + R *w; /* lambda k . exp(2*pi*i*k^2/(2*n)) */ + R *W; /* DFT(w) */ + plan *cldf; + INT is, os; +} P; + +static void bluestein_sequence(enum wakefulness wakefulness, INT n, R *w) +{ + INT k, ksq, n2 = 2 * n; + triggen *t = X(mktriggen)(wakefulness, n2); + + ksq = 0; + for (k = 0; k < n; ++k) { + t->cexp(t, ksq, w+2*k); + /* careful with overflow */ + ksq += 2*k + 1; while (ksq > n2) ksq -= n2; + } + + X(triggen_destroy)(t); +} + +static void mktwiddle(enum wakefulness wakefulness, P *p) +{ + INT i; + INT n = p->n, nb = p->nb; + R *w, *W; + E nbf = (E)nb; + + p->w = w = (R *) MALLOC(2 * n * sizeof(R), TWIDDLES); + p->W = W = (R *) MALLOC(2 * nb * sizeof(R), TWIDDLES); + + bluestein_sequence(wakefulness, n, w); + + for (i = 0; i < nb; ++i) + W[2*i] = W[2*i+1] = K(0.0); + + W[0] = w[0] / nbf; + W[1] = w[1] / nbf; + + for (i = 1; i < n; ++i) { + W[2*i] = W[2*(nb-i)] = w[2*i] / nbf; + W[2*i+1] = W[2*(nb-i)+1] = w[2*i+1] / nbf; + } + + { + plan_dft *cldf = (plan_dft *)p->cldf; + /* cldf must be awake */ + cldf->apply(p->cldf, W, W+1, W, W+1); + } +} + +static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + INT i, n = ego->n, nb = ego->nb, is = ego->is, os = ego->os; + R *w = ego->w, *W = ego->W; + R *b = (R *) MALLOC(2 * nb * sizeof(R), BUFFERS); + + /* multiply input by conjugate bluestein sequence */ + for (i = 0; i < n; ++i) { + E xr = ri[i*is], xi = ii[i*is]; + E wr = w[2*i], wi = w[2*i+1]; + b[2*i] = xr * wr + xi * wi; + b[2*i+1] = xi * wr - xr * wi; + } + + for (; i < nb; ++i) b[2*i] = b[2*i+1] = K(0.0); + + /* convolution: FFT */ + { + plan_dft *cldf = (plan_dft *)ego->cldf; + cldf->apply(ego->cldf, b, b+1, b, b+1); + } + + /* convolution: pointwise multiplication */ + for (i = 0; i < nb; ++i) { + E xr = b[2*i], xi = b[2*i+1]; + E wr = W[2*i], wi = W[2*i+1]; + b[2*i] = xi * wr + xr * wi; + b[2*i+1] = xr * wr - xi * wi; + } + + /* convolution: IFFT by FFT with real/imag input/output swapped */ + { + plan_dft *cldf = (plan_dft *)ego->cldf; + cldf->apply(ego->cldf, b, b+1, b, b+1); + } + + /* multiply output by conjugate bluestein sequence */ + for (i = 0; i < n; ++i) { + E xi = b[2*i], xr = b[2*i+1]; + E wr = w[2*i], wi = w[2*i+1]; + ro[i*os] = xr * wr + xi * wi; + io[i*os] = xi * wr - xr * wi; + } + + X(ifree)(b); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + + X(plan_awake)(ego->cldf, wakefulness); + + switch (wakefulness) { + case SLEEPY: + X(ifree0)(ego->w); ego->w = 0; + X(ifree0)(ego->W); ego->W = 0; + break; + default: + A(!ego->w); + mktwiddle(wakefulness, ego); + break; + } +} + +static int applicable(const solver *ego, const problem *p_, + const planner *plnr) +{ + const problem_dft *p = (const problem_dft *) p_; + UNUSED(ego); + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk == 0 + /* FIXME: allow other sizes */ + && X(is_prime)(p->sz->dims[0].n) + + /* FIXME: avoid infinite recursion of bluestein with itself. + This works because all factors in child problems are 2, 3, 5 */ + && p->sz->dims[0].n > 16 + + && CIMPLIES(NO_SLOWP(plnr), p->sz->dims[0].n > BLUESTEIN_MAX_SLOW) + ); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cldf); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *)ego_; + p->print(p, "(dft-bluestein-%D/%D%(%p%))", + ego->n, ego->nb, ego->cldf); +} + +static INT choose_transform_size(INT minsz) +{ + while (!X(factors_into_small_primes)(minsz)) + ++minsz; + return minsz; +} + +static plan *mkplan(const solver *ego, const problem *p_, planner *plnr) +{ + const problem_dft *p = (const problem_dft *) p_; + P *pln; + INT n, nb; + plan *cldf = 0; + R *buf = (R *) 0; + + static const plan_adt padt = { + X(dft_solve), awake, print, destroy + }; + + if (!applicable(ego, p_, plnr)) + return (plan *) 0; + + n = p->sz->dims[0].n; + nb = choose_transform_size(2 * n - 1); + buf = (R *) MALLOC(2 * nb * sizeof(R), BUFFERS); + + cldf = X(mkplan_f_d)(plnr, + X(mkproblem_dft_d)(X(mktensor_1d)(nb, 2, 2), + X(mktensor_1d)(1, 0, 0), + buf, buf+1, + buf, buf+1), + NO_SLOW, 0, 0); + if (!cldf) goto nada; + + X(ifree)(buf); + + pln = MKPLAN_DFT(P, &padt, apply); + + pln->n = n; + pln->nb = nb; + pln->w = 0; + pln->W = 0; + pln->cldf = cldf; + pln->is = p->sz->dims[0].is; + pln->os = p->sz->dims[0].os; + + X(ops_add)(&cldf->ops, &cldf->ops, &pln->super.super.ops); + pln->super.super.ops.add += 4 * n + 2 * nb; + pln->super.super.ops.mul += 8 * n + 4 * nb; + pln->super.super.ops.other += 6 * (n + nb); + + return &(pln->super.super); + + nada: + X(ifree0)(buf); + X(plan_destroy_internal)(cldf); + return (plan *)0; +} + + +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return &(slv->super); +} + +void X(dft_bluestein_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/buffered.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/buffered.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "dft.h" + +typedef struct { + solver super; + int maxnbuf_ndx; +} S; + +static const INT maxnbufs[] = { 8, 256 }; + +typedef struct { + plan_dft super; + + plan *cld, *cldcpy, *cldrest; + INT n, vl, nbuf, bufdist; + INT ivs_by_nbuf, ovs_by_nbuf; + INT roffset, ioffset; +} P; + +/* transform a vector input with the help of bufs */ +static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + INT nbuf = ego->nbuf; + R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist * 2, BUFFERS); + + plan_dft *cld = (plan_dft *) ego->cld; + plan_dft *cldcpy = (plan_dft *) ego->cldcpy; + plan_dft *cldrest; + INT i, vl = ego->vl; + INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf; + INT roffset = ego->roffset, ioffset = ego->ioffset; + + for (i = nbuf; i <= vl; i += nbuf) { + /* transform to bufs: */ + cld->apply((plan *) cld, ri, ii, bufs + roffset, bufs + ioffset); + ri += ivs_by_nbuf; ii += ivs_by_nbuf; + + /* copy back */ + cldcpy->apply((plan *) cldcpy, bufs+roffset, bufs+ioffset, ro, io); + ro += ovs_by_nbuf; io += ovs_by_nbuf; + } + + X(ifree)(bufs); + + /* Do the remaining transforms, if any: */ + cldrest = (plan_dft *) ego->cldrest; + cldrest->apply((plan *) cldrest, ri, ii, ro, io); +} + + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + + X(plan_awake)(ego->cld, wakefulness); + X(plan_awake)(ego->cldcpy, wakefulness); + X(plan_awake)(ego->cldrest, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cldrest); + X(plan_destroy_internal)(ego->cldcpy); + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(dft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))", + ego->n, ego->nbuf, + ego->vl, ego->bufdist % ego->n, + ego->cld, ego->cldcpy, ego->cldrest); +} + +static int applicable0(const S *ego, const problem *p_, const planner *plnr) +{ + const problem_dft *p = (const problem_dft *) p_; + const iodim *d = p->sz->dims; + + if (1 + && p->vecsz->rnk <= 1 + && p->sz->rnk == 1 + ) { + INT vl, ivs, ovs; + X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs); + + if (X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr)) + return 0; + + /* if this solver is redundant, in the sense that a solver + of lower index generates the same plan, then prune this + solver */ + if (X(nbuf_redundant)(d[0].n, vl, + ego->maxnbuf_ndx, + maxnbufs, NELEM(maxnbufs))) + return 0; + + /* + In principle, the buffered transforms might be useful + when working out of place. However, in order to + prevent infinite loops in the planner, we require + that the output stride of the buffered transforms be + greater than 2. + */ + if (p->ri != p->ro) + return (d[0].os > 2); + + /* + * If the problem is in place, the input/output strides must + * be the same or the whole thing must fit in the buffer. + */ + if (X(tensor_inplace_strides2)(p->sz, p->vecsz)) + return 1; + + if (/* fits into buffer: */ + ((p->vecsz->rnk == 0) + || + (X(nbuf)(d[0].n, p->vecsz->dims[0].n, + maxnbufs[ego->maxnbuf_ndx]) + == p->vecsz->dims[0].n))) + return 1; + } + + return 0; +} + +static int applicable(const S *ego, const problem *p_, const planner *plnr) +{ + if (NO_BUFFERINGP(plnr)) return 0; + if (!applicable0(ego, p_, plnr)) return 0; + + if (NO_UGLYP(plnr)) { + const problem_dft *p = (const problem_dft *) p_; + if (p->ri != p->ro) return 0; + if (X(toobig)(p->sz->dims[0].n)) return 0; + } + return 1; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + P *pln; + const S *ego = (const S *)ego_; + plan *cld = (plan *) 0; + plan *cldcpy = (plan *) 0; + plan *cldrest = (plan *) 0; + const problem_dft *p = (const problem_dft *) p_; + R *bufs = (R *) 0; + INT nbuf = 0, bufdist, n, vl; + INT ivs, ovs, roffset, ioffset; + + static const plan_adt padt = { + X(dft_solve), awake, print, destroy + }; + + if (!applicable(ego, p_, plnr)) + goto nada; + + n = X(tensor_sz)(p->sz); + + X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs); + + nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]); + bufdist = X(bufdist)(n, vl); + A(nbuf > 0); + + /* attempt to keep real and imaginary part in the same order, + so as to allow optimizations in the the copy plan */ + roffset = (p->ri - p->ii > 0) ? (INT)1 : (INT)0; + ioffset = 1 - roffset; + + /* initial allocation for the purpose of planning */ + bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist * 2, BUFFERS); + + /* allow destruction of input if problem is in place */ + cld = X(mkplan_f_d)(plnr, + X(mkproblem_dft_d)( + X(mktensor_1d)(n, p->sz->dims[0].is, 2), + X(mktensor_1d)(nbuf, ivs, bufdist * 2), + TAINT(p->ri, ivs * nbuf), + TAINT(p->ii, ivs * nbuf), + bufs + roffset, + bufs + ioffset), + 0, 0, (p->ri == p->ro) ? NO_DESTROY_INPUT : 0); + if (!cld) + goto nada; + + /* copying back from the buffer is a rank-0 transform: */ + cldcpy = X(mkplan_d)(plnr, + X(mkproblem_dft_d)( + X(mktensor_0d)(), + X(mktensor_2d)(nbuf, bufdist * 2, ovs, + n, 2, p->sz->dims[0].os), + bufs + roffset, + bufs + ioffset, + TAINT(p->ro, ovs * nbuf), + TAINT(p->io, ovs * nbuf))); + if (!cldcpy) + goto nada; + + /* deallocate buffers, let apply() allocate them for real */ + X(ifree)(bufs); + bufs = 0; + + /* plan the leftover transforms (cldrest): */ + { + INT id = ivs * (nbuf * (vl / nbuf)); + INT od = ovs * (nbuf * (vl / nbuf)); + cldrest = X(mkplan_d)(plnr, + X(mkproblem_dft_d)( + X(tensor_copy)(p->sz), + X(mktensor_1d)(vl % nbuf, ivs, ovs), + p->ri+id, p->ii+id, p->ro+od, p->io+od)); + } + if (!cldrest) + goto nada; + + pln = MKPLAN_DFT(P, &padt, apply); + pln->cld = cld; + pln->cldcpy = cldcpy; + pln->cldrest = cldrest; + pln->n = n; + pln->vl = vl; + pln->ivs_by_nbuf = ivs * nbuf; + pln->ovs_by_nbuf = ovs * nbuf; + pln->roffset = roffset; + pln->ioffset = ioffset; + + pln->nbuf = nbuf; + pln->bufdist = bufdist; + + { + opcnt t; + X(ops_add)(&cld->ops, &cldcpy->ops, &t); + X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops); + } + + return &(pln->super.super); + + nada: + X(ifree0)(bufs); + X(plan_destroy_internal)(cldrest); + X(plan_destroy_internal)(cldcpy); + X(plan_destroy_internal)(cld); + return (plan *) 0; +} + +static solver *mksolver(int maxnbuf_ndx) +{ + static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->maxnbuf_ndx = maxnbuf_ndx; + return &(slv->super); +} + +void X(dft_buffered_register)(planner *p) +{ + size_t i; + for (i = 0; i < NELEM(maxnbufs); ++i) + REGISTER_SOLVER(p, mksolver(i)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/codelet-dft.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/codelet-dft.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* + * This header file must include every file or define every + * type or macro which is required to compile a codelet. + */ + +#ifndef __DFT_CODELET_H__ +#define __DFT_CODELET_H__ + +#include "ifftw.h" + +/************************************************************** + * types of codelets + **************************************************************/ + +/* DFT codelets */ +typedef struct kdft_desc_s kdft_desc; + +typedef struct { + int (*okp)( + const kdft_desc *desc, + const R *ri, const R *ii, const R *ro, const R *io, + INT is, INT os, INT vl, INT ivs, INT ovs, + const planner *plnr); + INT vl; +} kdft_genus; + +struct kdft_desc_s { + INT sz; /* size of transform computed */ + const char *nam; + opcnt ops; + const kdft_genus *genus; + INT is; + INT os; + INT ivs; + INT ovs; +}; + +typedef void (*kdft) (const R *ri, const R *ii, R *ro, R *io, + stride is, stride os, INT vl, INT ivs, INT ovs); +void X(kdft_register)(planner *p, kdft codelet, const kdft_desc *desc); + + +typedef struct ct_desc_s ct_desc; + +typedef struct { + int (*okp)( + const struct ct_desc_s *desc, + const R *rio, const R *iio, + INT rs, INT vs, INT m, INT mb, INT me, INT ms, + const planner *plnr); + INT vl; +} ct_genus; + +struct ct_desc_s { + INT radix; + const char *nam; + const tw_instr *tw; + const ct_genus *genus; + opcnt ops; + INT rs; + INT vs; + INT ms; +}; + +typedef void (*kdftw) (R *rioarray, R *iioarray, const R *W, + stride ios, INT mb, INT me, INT ms); +void X(kdft_dit_register)(planner *p, kdftw codelet, const ct_desc *desc); +void X(kdft_dif_register)(planner *p, kdftw codelet, const ct_desc *desc); + + +typedef void (*kdftwsq) (R *rioarray, R *iioarray, + const R *W, stride is, stride vs, + INT mb, INT me, INT ms); +void X(kdft_difsq_register)(planner *p, kdftwsq codelet, const ct_desc *desc); + + +extern const solvtab X(solvtab_dft_standard); +extern const solvtab X(solvtab_dft_sse2); +extern const solvtab X(solvtab_dft_avx); +extern const solvtab X(solvtab_dft_altivec); +extern const solvtab X(solvtab_dft_neon); + +#endif /* __DFT_CODELET_H__ */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/conf.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/conf.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "dft.h" + +static const solvtab s = +{ + SOLVTAB(X(dft_indirect_register)), + SOLVTAB(X(dft_indirect_transpose_register)), + SOLVTAB(X(dft_rank_geq2_register)), + SOLVTAB(X(dft_vrank_geq1_register)), + SOLVTAB(X(dft_buffered_register)), + SOLVTAB(X(dft_generic_register)), + SOLVTAB(X(dft_rader_register)), + SOLVTAB(X(dft_bluestein_register)), + SOLVTAB(X(dft_nop_register)), + SOLVTAB(X(ct_generic_register)), + SOLVTAB(X(ct_genericbuf_register)), + SOLVTAB_END +}; + +void X(dft_conf_standard)(planner *p) +{ + X(solvtab_exec)(s, p); + X(solvtab_exec)(X(solvtab_dft_standard), p); +#if HAVE_SSE2 + if (X(have_simd_sse2)()) + X(solvtab_exec)(X(solvtab_dft_sse2), p); +#endif +#if HAVE_AVX + if (X(have_simd_avx)()) + X(solvtab_exec)(X(solvtab_dft_avx), p); +#endif +#if HAVE_ALTIVEC + if (X(have_simd_altivec)()) + X(solvtab_exec)(X(solvtab_dft_altivec), p); +#endif +#if HAVE_NEON + if (X(have_simd_neon)()) + X(solvtab_exec)(X(solvtab_dft_neon), p); +#endif +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/ct.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/ct.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ct.h" + +ct_solver *(*X(mksolver_ct_hook))(size_t, INT, int, + ct_mkinferior, ct_force_vrecursion) = 0; + +typedef struct { + plan_dft super; + plan *cld; + plan *cldw; + INT r; +} P; + +static void apply_dit(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + plan_dft *cld; + plan_dftw *cldw; + + cld = (plan_dft *) ego->cld; + cld->apply(ego->cld, ri, ii, ro, io); + + cldw = (plan_dftw *) ego->cldw; + cldw->apply(ego->cldw, ro, io); +} + +static void apply_dif(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + plan_dft *cld; + plan_dftw *cldw; + + cldw = (plan_dftw *) ego->cldw; + cldw->apply(ego->cldw, ri, ii); + + cld = (plan_dft *) ego->cld; + cld->apply(ego->cld, ri, ii, ro, io); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld, wakefulness); + X(plan_awake)(ego->cldw, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cldw); + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(dft-ct-%s/%D%(%p%)%(%p%))", + ego->super.apply == apply_dit ? "dit" : "dif", + ego->r, ego->cldw, ego->cld); +} + +static int applicable0(const ct_solver *ego, const problem *p_, planner *plnr) +{ + const problem_dft *p = (const problem_dft *) p_; + INT r; + + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk <= 1 + + /* DIF destroys the input and we don't like it */ + && (ego->dec == DECDIT || + p->ri == p->ro || + !NO_DESTROY_INPUTP(plnr)) + + && ((r = X(choose_radix)(ego->r, p->sz->dims[0].n)) > 1) + && p->sz->dims[0].n > r); +} + + +int X(ct_applicable)(const ct_solver *ego, const problem *p_, planner *plnr) +{ + const problem_dft *p; + + if (!applicable0(ego, p_, plnr)) + return 0; + + p = (const problem_dft *) p_; + + return (0 + || ego->dec == DECDIF+TRANSPOSE + || p->vecsz->rnk == 0 + || !NO_VRECURSEP(plnr) + || (ego->force_vrecursionp && ego->force_vrecursionp(ego, p)) + ); +} + + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const ct_solver *ego = (const ct_solver *) ego_; + const problem_dft *p; + P *pln = 0; + plan *cld = 0, *cldw = 0; + INT n, r, m, v, ivs, ovs; + iodim *d; + + static const plan_adt padt = { + X(dft_solve), awake, print, destroy + }; + + if ((NO_NONTHREADEDP(plnr)) || !X(ct_applicable)(ego, p_, plnr)) + return (plan *) 0; + + p = (const problem_dft *) p_; + d = p->sz->dims; + n = d[0].n; + r = X(choose_radix)(ego->r, n); + m = n / r; + + X(tensor_tornk1)(p->vecsz, &v, &ivs, &ovs); + + switch (ego->dec) { + case DECDIT: + { + cldw = ego->mkcldw(ego, + r, m * d[0].os, m * d[0].os, + m, d[0].os, + v, ovs, ovs, + 0, m, + p->ro, p->io, plnr); + if (!cldw) goto nada; + + cld = X(mkplan_d)(plnr, + X(mkproblem_dft_d)( + X(mktensor_1d)(m, r * d[0].is, d[0].os), + X(mktensor_2d)(r, d[0].is, m * d[0].os, + v, ivs, ovs), + p->ri, p->ii, p->ro, p->io) + ); + if (!cld) goto nada; + + pln = MKPLAN_DFT(P, &padt, apply_dit); + break; + } + case DECDIF: + case DECDIF+TRANSPOSE: + { + INT cors, covs; /* cldw ors, ovs */ + if (ego->dec == DECDIF+TRANSPOSE) { + cors = ivs; + covs = m * d[0].is; + /* ensure that we generate well-formed dftw subproblems */ + /* FIXME: too conservative */ + if (!(1 + && r == v + && d[0].is == r * cors)) + goto nada; + + /* FIXME: allow in-place only for now, like in + fftw-3.[01] */ + if (!(1 + && p->ri == p->ro + && d[0].is == r * d[0].os + && cors == d[0].os + && covs == ovs + )) + goto nada; + } else { + cors = m * d[0].is; + covs = ivs; + } + + cldw = ego->mkcldw(ego, + r, m * d[0].is, cors, + m, d[0].is, + v, ivs, covs, + 0, m, + p->ri, p->ii, plnr); + if (!cldw) goto nada; + + cld = X(mkplan_d)(plnr, + X(mkproblem_dft_d)( + X(mktensor_1d)(m, d[0].is, r * d[0].os), + X(mktensor_2d)(r, cors, d[0].os, + v, covs, ovs), + p->ri, p->ii, p->ro, p->io) + ); + if (!cld) goto nada; + + pln = MKPLAN_DFT(P, &padt, apply_dif); + break; + } + + default: A(0); + + } + + pln->cld = cld; + pln->cldw = cldw; + pln->r = r; + X(ops_add)(&cld->ops, &cldw->ops, &pln->super.super.ops); + + /* inherit could_prune_now_p attribute from cldw */ + pln->super.super.could_prune_now_p = cldw->could_prune_now_p; + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cldw); + X(plan_destroy_internal)(cld); + return (plan *) 0; +} + +ct_solver *X(mksolver_ct)(size_t size, INT r, int dec, + ct_mkinferior mkcldw, + ct_force_vrecursion force_vrecursionp) +{ + static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; + ct_solver *slv = (ct_solver *)X(mksolver)(size, &sadt); + slv->r = r; + slv->dec = dec; + slv->mkcldw = mkcldw; + slv->force_vrecursionp = force_vrecursionp; + return slv; +} + +plan *X(mkplan_dftw)(size_t size, const plan_adt *adt, dftwapply apply) +{ + plan_dftw *ego; + + ego = (plan_dftw *) X(mkplan)(size, adt); + ego->apply = apply; + + return &(ego->super); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/ct.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/ct.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "dft.h" + +typedef void (*dftwapply)(const plan *ego, R *rio, R *iio); +typedef struct ct_solver_s ct_solver; +typedef plan *(*ct_mkinferior)(const ct_solver *ego, + INT r, INT irs, INT ors, + INT m, INT ms, + INT v, INT ivs, INT ovs, + INT mstart, INT mcount, + R *rio, R *iio, planner *plnr); +typedef int (*ct_force_vrecursion)(const ct_solver *ego, + const problem_dft *p); + +typedef struct { + plan super; + dftwapply apply; +} plan_dftw; + +extern plan *X(mkplan_dftw)(size_t size, const plan_adt *adt, dftwapply apply); + +#define MKPLAN_DFTW(type, adt, apply) \ + (type *)X(mkplan_dftw)(sizeof(type), adt, apply) + +struct ct_solver_s { + solver super; + INT r; + int dec; +# define DECDIF 0 +# define DECDIT 1 +# define TRANSPOSE 2 + ct_mkinferior mkcldw; + ct_force_vrecursion force_vrecursionp; +}; + +int X(ct_applicable)(const ct_solver *, const problem *, planner *); +ct_solver *X(mksolver_ct)(size_t size, INT r, int dec, + ct_mkinferior mkcldw, + ct_force_vrecursion force_vrecursionp); +extern ct_solver *(*X(mksolver_ct_hook))(size_t, INT, int, + ct_mkinferior, ct_force_vrecursion); + +void X(regsolver_ct_directw)(planner *plnr, + kdftw codelet, const ct_desc *desc, int dec); +void X(regsolver_ct_directwbuf)(planner *plnr, + kdftw codelet, const ct_desc *desc, int dec); +solver *X(mksolver_ctsq)(kdftwsq codelet, const ct_desc *desc, int dec); +void X(regsolver_ct_directwsq)(planner *plnr, kdftwsq codelet, + const ct_desc *desc, int dec); diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/dft.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/dft.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#ifndef __DFT_H__ +#define __DFT_H__ + +#include "ifftw.h" +#include "codelet-dft.h" + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +/* problem.c: */ +typedef struct { + problem super; + tensor *sz, *vecsz; + R *ri, *ii, *ro, *io; +} problem_dft; + +void X(dft_zerotens)(tensor *sz, R *ri, R *ii); +problem *X(mkproblem_dft)(const tensor *sz, const tensor *vecsz, + R *ri, R *ii, R *ro, R *io); +problem *X(mkproblem_dft_d)(tensor *sz, tensor *vecsz, + R *ri, R *ii, R *ro, R *io); + +/* solve.c: */ +void X(dft_solve)(const plan *ego_, const problem *p_); + +/* plan.c: */ +typedef void (*dftapply) (const plan *ego, R *ri, R *ii, R *ro, R *io); + +typedef struct { + plan super; + dftapply apply; +} plan_dft; + +plan *X(mkplan_dft)(size_t size, const plan_adt *adt, dftapply apply); + +#define MKPLAN_DFT(type, adt, apply) \ + (type *)X(mkplan_dft)(sizeof(type), adt, apply) + +/* various solvers */ +solver *X(mksolver_dft_direct)(kdft k, const kdft_desc *desc); +solver *X(mksolver_dft_directbuf)(kdft k, const kdft_desc *desc); + +void X(dft_rank0_register)(planner *p); +void X(dft_rank_geq2_register)(planner *p); +void X(dft_indirect_register)(planner *p); +void X(dft_indirect_transpose_register)(planner *p); +void X(dft_vrank_geq1_register)(planner *p); +void X(dft_vrank2_transpose_register)(planner *p); +void X(dft_vrank3_transpose_register)(planner *p); +void X(dft_buffered_register)(planner *p); +void X(dft_generic_register)(planner *p); +void X(dft_rader_register)(planner *p); +void X(dft_bluestein_register)(planner *p); +void X(dft_nop_register)(planner *p); +void X(ct_generic_register)(planner *p); +void X(ct_genericbuf_register)(planner *p); + +/* configurations */ +void X(dft_conf_standard)(planner *p); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* __DFT_H__ */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/dftw-direct.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/dftw-direct.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ct.h" + +typedef struct { + ct_solver super; + const ct_desc *desc; + int bufferedp; + kdftw k; +} S; + +typedef struct { + plan_dftw super; + kdftw k; + INT r; + stride rs; + INT m, ms, v, vs, mb, me, extra_iter; + stride brs; + twid *td; + const S *slv; +} P; + + +/************************************************************* + Nonbuffered code + *************************************************************/ +static void apply(const plan *ego_, R *rio, R *iio) +{ + const P *ego = (const P *) ego_; + INT i; + ASSERT_ALIGNED_DOUBLE; + for (i = 0; i < ego->v; ++i, rio += ego->vs, iio += ego->vs) { + INT mb = ego->mb, ms = ego->ms; + ego->k(rio + mb*ms, iio + mb*ms, ego->td->W, + ego->rs, mb, ego->me, ms); + } +} + +static void apply_extra_iter(const plan *ego_, R *rio, R *iio) +{ + const P *ego = (const P *) ego_; + INT i, v = ego->v, vs = ego->vs; + INT mb = ego->mb, me = ego->me, mm = me - 1, ms = ego->ms; + ASSERT_ALIGNED_DOUBLE; + for (i = 0; i < v; ++i, rio += vs, iio += vs) { + ego->k(rio + mb*ms, iio + mb*ms, ego->td->W, + ego->rs, mb, mm, ms); + ego->k(rio + mm*ms, iio + mm*ms, ego->td->W, + ego->rs, mm, mm+2, 0); + } +} + +/************************************************************* + Buffered code + *************************************************************/ +static void dobatch(const P *ego, R *rA, R *iA, INT mb, INT me, R *buf) +{ + INT brs = WS(ego->brs, 1); + INT rs = WS(ego->rs, 1); + INT ms = ego->ms; + + X(cpy2d_pair_ci)(rA + mb*ms, iA + mb*ms, buf, buf + 1, + ego->r, rs, brs, + me - mb, ms, 2); + ego->k(buf, buf + 1, ego->td->W, ego->brs, mb, me, 2); + X(cpy2d_pair_co)(buf, buf + 1, rA + mb*ms, iA + mb*ms, + ego->r, brs, rs, + me - mb, 2, ms); +} + +/* must be even for SIMD alignment; should not be 2^k to avoid + associativity conflicts */ +static INT compute_batchsize(INT radix) +{ + /* round up to multiple of 4 */ + radix += 3; + radix &= -4; + + return (radix + 2); +} + +static void apply_buf(const plan *ego_, R *rio, R *iio) +{ + const P *ego = (const P *) ego_; + INT i, j, v = ego->v, r = ego->r; + INT batchsz = compute_batchsize(r); + R *buf; + INT mb = ego->mb, me = ego->me; + size_t bufsz = r * batchsz * 2 * sizeof(R); + + BUF_ALLOC(R *, buf, bufsz); + + for (i = 0; i < v; ++i, rio += ego->vs, iio += ego->vs) { + for (j = mb; j + batchsz < me; j += batchsz) + dobatch(ego, rio, iio, j, j + batchsz, buf); + + dobatch(ego, rio, iio, j, me, buf); + } + + BUF_FREE(buf, bufsz); +} + +/************************************************************* + common code + *************************************************************/ +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + + X(twiddle_awake)(wakefulness, &ego->td, ego->slv->desc->tw, + ego->r * ego->m, ego->r, ego->m + ego->extra_iter); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(stride_destroy)(ego->brs); + X(stride_destroy)(ego->rs); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *slv = ego->slv; + const ct_desc *e = slv->desc; + + if (slv->bufferedp) + p->print(p, "(dftw-directbuf/%D-%D/%D%v \"%s\")", + compute_batchsize(ego->r), ego->r, + X(twiddle_length)(ego->r, e->tw), ego->v, e->nam); + else + p->print(p, "(dftw-direct-%D/%D%v \"%s\")", + ego->r, X(twiddle_length)(ego->r, e->tw), ego->v, e->nam); +} + +static int applicable0(const S *ego, + INT r, INT irs, INT ors, + INT m, INT ms, + INT v, INT ivs, INT ovs, + INT mb, INT me, + R *rio, R *iio, + const planner *plnr, INT *extra_iter) +{ + const ct_desc *e = ego->desc; + UNUSED(v); + + return ( + 1 + && r == e->radix + && irs == ors /* in-place along R */ + && ivs == ovs /* in-place along V */ + + /* check for alignment/vector length restrictions */ + && ((*extra_iter = 0, + e->genus->okp(e, rio, iio, irs, ivs, m, mb, me, ms, plnr)) + || + (*extra_iter = 1, + (1 + /* FIXME: require full array, otherwise some threads + may be extra_iter and other threads won't be. + Generating the proper twiddle factors is a pain in + this case */ + && mb == 0 && me == m + && e->genus->okp(e, rio, iio, irs, ivs, + m, mb, me - 1, ms, plnr) + && e->genus->okp(e, rio, iio, irs, ivs, + m, me - 1, me + 1, ms, plnr)))) + + && (e->genus->okp(e, rio + ivs, iio + ivs, irs, ivs, + m, mb, me - *extra_iter, ms, plnr)) + + ); +} + +static int applicable0_buf(const S *ego, + INT r, INT irs, INT ors, + INT m, INT ms, + INT v, INT ivs, INT ovs, + INT mb, INT me, + R *rio, R *iio, + const planner *plnr) +{ + const ct_desc *e = ego->desc; + INT batchsz; + UNUSED(v); UNUSED(ms); UNUSED(rio); UNUSED(iio); + + return ( + 1 + && r == e->radix + && irs == ors /* in-place along R */ + && ivs == ovs /* in-place along V */ + + /* check for alignment/vector length restrictions, both for + batchsize and for the remainder */ + && (batchsz = compute_batchsize(r), 1) + && (e->genus->okp(e, 0, ((const R *)0) + 1, 2 * batchsz, 0, + m, mb, mb + batchsz, 2, plnr)) + && (e->genus->okp(e, 0, ((const R *)0) + 1, 2 * batchsz, 0, + m, mb, me, 2, plnr)) + ); +} + +static int applicable(const S *ego, + INT r, INT irs, INT ors, + INT m, INT ms, + INT v, INT ivs, INT ovs, + INT mb, INT me, + R *rio, R *iio, + const planner *plnr, INT *extra_iter) +{ + if (ego->bufferedp) { + *extra_iter = 0; + if (!applicable0_buf(ego, + r, irs, ors, m, ms, v, ivs, ovs, mb, me, + rio, iio, plnr)) + return 0; + } else { + if (!applicable0(ego, + r, irs, ors, m, ms, v, ivs, ovs, mb, me, + rio, iio, plnr, extra_iter)) + return 0; + } + + if (NO_UGLYP(plnr) && X(ct_uglyp)((ego->bufferedp? (INT)512 : (INT)16), + v, m * r, r)) + return 0; + + if (m * r > 262144 && NO_FIXED_RADIX_LARGE_NP(plnr)) + return 0; + + return 1; +} + +static plan *mkcldw(const ct_solver *ego_, + INT r, INT irs, INT ors, + INT m, INT ms, + INT v, INT ivs, INT ovs, + INT mstart, INT mcount, + R *rio, R *iio, + planner *plnr) +{ + const S *ego = (const S *) ego_; + P *pln; + const ct_desc *e = ego->desc; + INT extra_iter; + + static const plan_adt padt = { + 0, awake, print, destroy + }; + + A(mstart >= 0 && mstart + mcount <= m); + if (!applicable(ego, + r, irs, ors, m, ms, v, ivs, ovs, mstart, mstart + mcount, + rio, iio, plnr, &extra_iter)) + return (plan *)0; + + if (ego->bufferedp) { + pln = MKPLAN_DFTW(P, &padt, apply_buf); + } else { + pln = MKPLAN_DFTW(P, &padt, extra_iter ? apply_extra_iter : apply); + } + + pln->k = ego->k; + pln->rs = X(mkstride)(r, irs); + pln->td = 0; + pln->r = r; + pln->m = m; + pln->ms = ms; + pln->v = v; + pln->vs = ivs; + pln->mb = mstart; + pln->me = mstart + mcount; + pln->slv = ego; + pln->brs = X(mkstride)(r, 2 * compute_batchsize(r)); + pln->extra_iter = extra_iter; + + X(ops_zero)(&pln->super.super.ops); + X(ops_madd2)(v * (mcount/e->genus->vl), &e->ops, &pln->super.super.ops); + + if (ego->bufferedp) { + /* 8 load/stores * N * V */ + pln->super.super.ops.other += 8 * r * mcount * v; + } + + pln->super.super.could_prune_now_p = + (!ego->bufferedp && r >= 5 && r < 64 && m >= r); + return &(pln->super.super); +} + +static void regone(planner *plnr, kdftw codelet, + const ct_desc *desc, int dec, int bufferedp) +{ + S *slv = (S *)X(mksolver_ct)(sizeof(S), desc->radix, dec, mkcldw, 0); + slv->k = codelet; + slv->desc = desc; + slv->bufferedp = bufferedp; + REGISTER_SOLVER(plnr, &(slv->super.super)); + if (X(mksolver_ct_hook)) { + slv = (S *)X(mksolver_ct_hook)(sizeof(S), desc->radix, + dec, mkcldw, 0); + slv->k = codelet; + slv->desc = desc; + slv->bufferedp = bufferedp; + REGISTER_SOLVER(plnr, &(slv->super.super)); + } +} + +void X(regsolver_ct_directw)(planner *plnr, kdftw codelet, + const ct_desc *desc, int dec) +{ + regone(plnr, codelet, desc, dec, /* bufferedp */ 0); + regone(plnr, codelet, desc, dec, /* bufferedp */ 1); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/dftw-directsq.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/dftw-directsq.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ct.h" + +typedef struct { + ct_solver super; + const ct_desc *desc; + kdftwsq k; +} S; + +typedef struct { + plan_dftw super; + kdftwsq k; + INT r; + stride rs, vs; + INT m, ms, v, mb, me; + twid *td; + const S *slv; +} P; + + +static void apply(const plan *ego_, R *rio, R *iio) +{ + const P *ego = (const P *) ego_; + INT mb = ego->mb, ms = ego->ms; + ego->k(rio + mb*ms, iio + mb*ms, ego->td->W, ego->rs, ego->vs, + mb, ego->me, ms); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + + X(twiddle_awake)(wakefulness, &ego->td, ego->slv->desc->tw, + ego->r * ego->m, ego->r, ego->m); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(stride_destroy)(ego->rs); + X(stride_destroy)(ego->vs); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *slv = ego->slv; + const ct_desc *e = slv->desc; + + p->print(p, "(dftw-directsq-%D/%D%v \"%s\")", + ego->r, X(twiddle_length)(ego->r, e->tw), ego->v, e->nam); +} + +static int applicable(const S *ego, + INT r, INT irs, INT ors, + INT m, INT ms, + INT v, INT ivs, INT ovs, + INT mb, INT me, + R *rio, R *iio, + const planner *plnr) +{ + const ct_desc *e = ego->desc; + UNUSED(v); + + return ( + 1 + && r == e->radix + + /* transpose r, v */ + && r == v + && irs == ovs + && ivs == ors + + /* check for alignment/vector length restrictions */ + && e->genus->okp(e, rio, iio, irs, ivs, m, mb, me, ms, plnr) + + ); +} + +static plan *mkcldw(const ct_solver *ego_, + INT r, INT irs, INT ors, + INT m, INT ms, + INT v, INT ivs, INT ovs, + INT mstart, INT mcount, + R *rio, R *iio, + planner *plnr) +{ + const S *ego = (const S *) ego_; + P *pln; + const ct_desc *e = ego->desc; + + static const plan_adt padt = { + 0, awake, print, destroy + }; + + A(mstart >= 0 && mstart + mcount <= m); + if (!applicable(ego, + r, irs, ors, m, ms, v, ivs, ovs, mstart, mstart + mcount, + rio, iio, plnr)) + return (plan *)0; + + pln = MKPLAN_DFTW(P, &padt, apply); + + pln->k = ego->k; + pln->rs = X(mkstride)(r, irs); + pln->vs = X(mkstride)(v, ivs); + pln->td = 0; + pln->r = r; + pln->m = m; + pln->ms = ms; + pln->v = v; + pln->mb = mstart; + pln->me = mstart + mcount; + pln->slv = ego; + + X(ops_zero)(&pln->super.super.ops); + X(ops_madd2)(mcount/e->genus->vl, &e->ops, &pln->super.super.ops); + + return &(pln->super.super); +} + +static void regone(planner *plnr, kdftwsq codelet, + const ct_desc *desc, int dec) +{ + S *slv = (S *)X(mksolver_ct)(sizeof(S), desc->radix, dec, mkcldw, 0); + slv->k = codelet; + slv->desc = desc; + REGISTER_SOLVER(plnr, &(slv->super.super)); + if (X(mksolver_ct_hook)) { + slv = (S *)X(mksolver_ct_hook)(sizeof(S), desc->radix, dec, + mkcldw, 0); + slv->k = codelet; + slv->desc = desc; + REGISTER_SOLVER(plnr, &(slv->super.super)); + } +} + +void X(regsolver_ct_directwsq)(planner *plnr, kdftwsq codelet, + const ct_desc *desc, int dec) +{ + regone(plnr, codelet, desc, dec+TRANSPOSE); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/dftw-generic.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/dftw-generic.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* express a twiddle problem in terms of dft + multiplication by + twiddle factors */ + +#include "ct.h" + +typedef ct_solver S; + +typedef struct { + plan_dftw super; + + INT r, rs, m, mb, me, ms, v, vs; + + plan *cld; + + twid *td; + + const S *slv; + int dec; +} P; + +static void mktwiddle(P *ego, enum wakefulness wakefulness) +{ + static const tw_instr tw[] = { { TW_FULL, 0, 0 }, { TW_NEXT, 1, 0 } }; + + /* note that R and M are swapped, to allow for sequential + access both to data and twiddles */ + X(twiddle_awake)(wakefulness, &ego->td, tw, + ego->r * ego->m, ego->m, ego->r); +} + +static void bytwiddle(const P *ego, R *rio, R *iio) +{ + INT iv, ir, im; + INT r = ego->r, rs = ego->rs; + INT m = ego->m, mb = ego->mb, me = ego->me, ms = ego->ms; + INT v = ego->v, vs = ego->vs; + const R *W = ego->td->W; + + mb += (mb == 0); /* skip m=0 iteration */ + for (iv = 0; iv < v; ++iv) { + for (ir = 1; ir < r; ++ir) { + for (im = mb; im < me; ++im) { + R *pr = rio + ms * im + rs * ir; + R *pi = iio + ms * im + rs * ir; + E xr = *pr; + E xi = *pi; + E wr = W[2 * im + (2 * (m-1)) * ir - 2]; + E wi = W[2 * im + (2 * (m-1)) * ir - 1]; + *pr = xr * wr + xi * wi; + *pi = xi * wr - xr * wi; + } + } + rio += vs; + iio += vs; + } +} + +static int applicable(INT irs, INT ors, INT ivs, INT ovs, + const planner *plnr) +{ + return (1 + && irs == ors + && ivs == ovs + && !NO_SLOWP(plnr) + ); +} + +static void apply_dit(const plan *ego_, R *rio, R *iio) +{ + const P *ego = (const P *) ego_; + plan_dft *cld; + INT dm = ego->ms * ego->mb; + + bytwiddle(ego, rio, iio); + + cld = (plan_dft *) ego->cld; + cld->apply(ego->cld, rio + dm, iio + dm, rio + dm, iio + dm); +} + +static void apply_dif(const plan *ego_, R *rio, R *iio) +{ + const P *ego = (const P *) ego_; + plan_dft *cld; + INT dm = ego->ms * ego->mb; + + cld = (plan_dft *) ego->cld; + cld->apply(ego->cld, rio + dm, iio + dm, rio + dm, iio + dm); + + bytwiddle(ego, rio, iio); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld, wakefulness); + mktwiddle(ego, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(dftw-generic-%s-%D-%D%v%(%p%))", + ego->dec == DECDIT ? "dit" : "dif", + ego->r, ego->m, ego->v, ego->cld); +} + +static plan *mkcldw(const ct_solver *ego_, + INT r, INT irs, INT ors, + INT m, INT ms, + INT v, INT ivs, INT ovs, + INT mstart, INT mcount, + R *rio, R *iio, + planner *plnr) +{ + const S *ego = (const S *)ego_; + P *pln; + plan *cld = 0; + INT dm = ms * mstart; + + static const plan_adt padt = { + 0, awake, print, destroy + }; + + A(mstart >= 0 && mstart + mcount <= m); + if (!applicable(irs, ors, ivs, ovs, plnr)) + return (plan *)0; + + cld = X(mkplan_d)(plnr, + X(mkproblem_dft_d)( + X(mktensor_1d)(r, irs, irs), + X(mktensor_2d)(mcount, ms, ms, v, ivs, ivs), + rio + dm, iio + dm, rio + dm, iio + dm) + ); + if (!cld) goto nada; + + pln = MKPLAN_DFTW(P, &padt, ego->dec == DECDIT ? apply_dit : apply_dif); + pln->slv = ego; + pln->cld = cld; + pln->r = r; + pln->rs = irs; + pln->m = m; + pln->ms = ms; + pln->v = v; + pln->vs = ivs; + pln->mb = mstart; + pln->me = mstart + mcount; + pln->dec = ego->dec; + pln->td = 0; + + { + double n0 = (r - 1) * (mcount - 1) * v; + pln->super.super.ops = cld->ops; + pln->super.super.ops.mul += 8 * n0; + pln->super.super.ops.add += 4 * n0; + pln->super.super.ops.other += 8 * n0; + } + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cld); + return (plan *) 0; +} + +static void regsolver(planner *plnr, INT r, int dec) +{ + S *slv = (S *)X(mksolver_ct)(sizeof(S), r, dec, mkcldw, 0); + REGISTER_SOLVER(plnr, &(slv->super)); + if (X(mksolver_ct_hook)) { + slv = (S *)X(mksolver_ct_hook)(sizeof(S), r, dec, mkcldw, 0); + REGISTER_SOLVER(plnr, &(slv->super)); + } +} + +void X(ct_generic_register)(planner *p) +{ + regsolver(p, 0, DECDIT); + regsolver(p, 0, DECDIF); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/dftw-genericbuf.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/dftw-genericbuf.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* express a twiddle problem in terms of dft + multiplication by + twiddle factors */ + +#include "ct.h" + +typedef struct { + ct_solver super; + INT batchsz; +} S; + +typedef struct { + plan_dftw super; + + INT r, rs, m, ms, v, vs, mb, me; + INT batchsz; + plan *cld; + + triggen *t; + const S *slv; +} P; + + +#define BATCHDIST(r) ((r) + 16) + +/**************************************************************/ +static void bytwiddle(const P *ego, INT mb, INT me, R *buf, R *rio, R *iio) +{ + INT j, k; + INT r = ego->r, rs = ego->rs, ms = ego->ms; + triggen *t = ego->t; + for (j = 0; j < r; ++j) { + for (k = mb; k < me; ++k) + t->rotate(t, j * k, + rio[j * rs + k * ms], + iio[j * rs + k * ms], + &buf[j * 2 + 2 * BATCHDIST(r) * (k - mb) + 0]); + } +} + +static int applicable0(const S *ego, + INT r, INT irs, INT ors, + INT m, INT v, + INT mcount) +{ + return (1 + && v == 1 + && irs == ors + && mcount >= ego->batchsz + && mcount % ego->batchsz == 0 + && r >= 64 + && m >= r + ); +} + +static int applicable(const S *ego, + INT r, INT irs, INT ors, + INT m, INT v, + INT mcount, + const planner *plnr) +{ + if (!applicable0(ego, r, irs, ors, m, v, mcount)) + return 0; + if (NO_UGLYP(plnr) && m * r < 65536) + return 0; + + return 1; +} + +static void dobatch(const P *ego, INT mb, INT me, R *buf, R *rio, R *iio) +{ + plan_dft *cld; + INT ms = ego->ms; + + bytwiddle(ego, mb, me, buf, rio, iio); + + cld = (plan_dft *) ego->cld; + cld->apply(ego->cld, buf, buf + 1, buf, buf + 1); + X(cpy2d_pair_co)(buf, buf + 1, + rio + ms * mb, iio + ms * mb, + me-mb, 2 * BATCHDIST(ego->r), ms, + ego->r, 2, ego->rs); +} + +static void apply(const plan *ego_, R *rio, R *iio) +{ + const P *ego = (const P *) ego_; + R *buf = (R *) MALLOC(sizeof(R) * 2 * BATCHDIST(ego->r) * ego->batchsz, + BUFFERS); + INT m; + + for (m = ego->mb; m < ego->me; m += ego->batchsz) + dobatch(ego, m, m + ego->batchsz, buf, rio, iio); + + A(m == ego->me); + + X(ifree)(buf); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld, wakefulness); + + switch (wakefulness) { + case SLEEPY: + X(triggen_destroy)(ego->t); ego->t = 0; + break; + default: + ego->t = X(mktriggen)(AWAKE_SQRTN_TABLE, ego->r * ego->m); + break; + } +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(dftw-genericbuf/%D-%D-%D%(%p%))", + ego->batchsz, ego->r, ego->m, ego->cld); +} + +static plan *mkcldw(const ct_solver *ego_, + INT r, INT irs, INT ors, + INT m, INT ms, + INT v, INT ivs, INT ovs, + INT mstart, INT mcount, + R *rio, R *iio, + planner *plnr) +{ + const S *ego = (const S *)ego_; + P *pln; + plan *cld = 0; + R *buf; + + static const plan_adt padt = { + 0, awake, print, destroy + }; + + UNUSED(ivs); UNUSED(ovs); UNUSED(rio); UNUSED(iio); + + A(mstart >= 0 && mstart + mcount <= m); + if (!applicable(ego, r, irs, ors, m, v, mcount, plnr)) + return (plan *)0; + + buf = (R *) MALLOC(sizeof(R) * 2 * BATCHDIST(r) * ego->batchsz, BUFFERS); + cld = X(mkplan_d)(plnr, + X(mkproblem_dft_d)( + X(mktensor_1d)(r, 2, 2), + X(mktensor_1d)(ego->batchsz, + 2 * BATCHDIST(r), + 2 * BATCHDIST(r)), + buf, buf + 1, buf, buf + 1 + ) + ); + X(ifree)(buf); + if (!cld) goto nada; + + pln = MKPLAN_DFTW(P, &padt, apply); + pln->slv = ego; + pln->cld = cld; + pln->r = r; + pln->m = m; + pln->ms = ms; + pln->rs = irs; + pln->batchsz = ego->batchsz; + pln->mb = mstart; + pln->me = mstart + mcount; + + { + double n0 = (r - 1) * (mcount - 1); + pln->super.super.ops = cld->ops; + pln->super.super.ops.mul += 8 * n0; + pln->super.super.ops.add += 4 * n0; + pln->super.super.ops.other += 8 * n0; + } + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cld); + return (plan *) 0; +} + +static void regsolver(planner *plnr, INT r, INT batchsz) +{ + S *slv = (S *)X(mksolver_ct)(sizeof(S), r, DECDIT, mkcldw, 0); + slv->batchsz = batchsz; + REGISTER_SOLVER(plnr, &(slv->super.super)); + + if (X(mksolver_ct_hook)) { + slv = (S *)X(mksolver_ct_hook)(sizeof(S), r, DECDIT, mkcldw, 0); + slv->batchsz = batchsz; + REGISTER_SOLVER(plnr, &(slv->super.super)); + } + +} + +void X(ct_genericbuf_register)(planner *p) +{ + static const INT radices[] = { -1, -2, -4, -8, -16, -32, -64 }; + static const INT batchsizes[] = { 4, 8, 16, 32, 64 }; + unsigned i, j; + + for (i = 0; i < sizeof(radices) / sizeof(radices[0]); ++i) + for (j = 0; j < sizeof(batchsizes) / sizeof(batchsizes[0]); ++j) + regsolver(p, radices[i], batchsizes[j]); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/direct.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/direct.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* direct DFT solver, if we have a codelet */ + +#include "dft.h" + +typedef struct { + solver super; + const kdft_desc *desc; + kdft k; + int bufferedp; +} S; + +typedef struct { + plan_dft super; + + stride is, os, bufstride; + INT n, vl, ivs, ovs; + kdft k; + const S *slv; +} P; + +static void dobatch(const P *ego, R *ri, R *ii, R *ro, R *io, + R *buf, INT batchsz) +{ + X(cpy2d_pair_ci)(ri, ii, buf, buf+1, + ego->n, WS(ego->is, 1), WS(ego->bufstride, 1), + batchsz, ego->ivs, 2); + + if (IABS(WS(ego->os, 1)) < IABS(ego->ovs)) { + /* transform directly to output */ + ego->k(buf, buf+1, ro, io, + ego->bufstride, ego->os, batchsz, 2, ego->ovs); + } else { + /* transform to buffer and copy back */ + ego->k(buf, buf+1, buf, buf+1, + ego->bufstride, ego->bufstride, batchsz, 2, 2); + X(cpy2d_pair_co)(buf, buf+1, ro, io, + ego->n, WS(ego->bufstride, 1), WS(ego->os, 1), + batchsz, 2, ego->ovs); + } +} + +static INT compute_batchsize(INT n) +{ + /* round up to multiple of 4 */ + n += 3; + n &= -4; + + return (n + 2); +} + +static void apply_buf(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + R *buf; + INT vl = ego->vl, n = ego->n, batchsz = compute_batchsize(n); + INT i; + size_t bufsz = n * batchsz * 2 * sizeof(R); + + BUF_ALLOC(R *, buf, bufsz); + + for (i = 0; i < vl - batchsz; i += batchsz) { + dobatch(ego, ri, ii, ro, io, buf, batchsz); + ri += batchsz * ego->ivs; ii += batchsz * ego->ivs; + ro += batchsz * ego->ovs; io += batchsz * ego->ovs; + } + dobatch(ego, ri, ii, ro, io, buf, vl - i); + + BUF_FREE(buf, bufsz); +} + +static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + ASSERT_ALIGNED_DOUBLE; + ego->k(ri, ii, ro, io, ego->is, ego->os, ego->vl, ego->ivs, ego->ovs); +} + +static void apply_extra_iter(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + INT vl = ego->vl; + + ASSERT_ALIGNED_DOUBLE; + + /* for 4-way SIMD when VL is odd: iterate over an + even vector length VL, and then execute the last + iteration as a 2-vector with vector stride 0. */ + ego->k(ri, ii, ro, io, ego->is, ego->os, vl - 1, ego->ivs, ego->ovs); + + ego->k(ri + (vl - 1) * ego->ivs, ii + (vl - 1) * ego->ivs, + ro + (vl - 1) * ego->ovs, io + (vl - 1) * ego->ovs, + ego->is, ego->os, 1, 0, 0); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(stride_destroy)(ego->is); + X(stride_destroy)(ego->os); + X(stride_destroy)(ego->bufstride); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *s = ego->slv; + const kdft_desc *d = s->desc; + + if (ego->slv->bufferedp) + p->print(p, "(dft-directbuf/%D-%D%v \"%s\")", + compute_batchsize(d->sz), d->sz, ego->vl, d->nam); + else + p->print(p, "(dft-direct-%D%v \"%s\")", d->sz, ego->vl, d->nam); +} + +static int applicable_buf(const solver *ego_, const problem *p_, + const planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_dft *p = (const problem_dft *) p_; + const kdft_desc *d = ego->desc; + INT vl; + INT ivs, ovs; + INT batchsz; + + return ( + 1 + && p->sz->rnk == 1 + && p->vecsz->rnk == 1 + && p->sz->dims[0].n == d->sz + + /* check strides etc */ + && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs) + + /* UGLY if IS <= IVS */ + && !(NO_UGLYP(plnr) && + X(iabs)(p->sz->dims[0].is) <= X(iabs)(ivs)) + + && (batchsz = compute_batchsize(d->sz), 1) + && (d->genus->okp(d, 0, ((const R *)0) + 1, p->ro, p->io, + 2 * batchsz, p->sz->dims[0].os, + batchsz, 2, ovs, plnr)) + && (d->genus->okp(d, 0, ((const R *)0) + 1, p->ro, p->io, + 2 * batchsz, p->sz->dims[0].os, + vl % batchsz, 2, ovs, plnr)) + + + && (0 + /* can operate out-of-place */ + || p->ri != p->ro + + /* can operate in-place as long as strides are the same */ + || X(tensor_inplace_strides2)(p->sz, p->vecsz) + + /* can do it if the problem fits in the buffer, no matter + what the strides are */ + || vl <= batchsz + ) + ); +} + +static int applicable(const solver *ego_, const problem *p_, + const planner *plnr, int *extra_iterp) +{ + const S *ego = (const S *) ego_; + const problem_dft *p = (const problem_dft *) p_; + const kdft_desc *d = ego->desc; + INT vl; + INT ivs, ovs; + + return ( + 1 + && p->sz->rnk == 1 + && p->vecsz->rnk <= 1 + && p->sz->dims[0].n == d->sz + + /* check strides etc */ + && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs) + + && ((*extra_iterp = 0, + (d->genus->okp(d, p->ri, p->ii, p->ro, p->io, + p->sz->dims[0].is, p->sz->dims[0].os, + vl, ivs, ovs, plnr))) + || + (*extra_iterp = 1, + ((d->genus->okp(d, p->ri, p->ii, p->ro, p->io, + p->sz->dims[0].is, p->sz->dims[0].os, + vl - 1, ivs, ovs, plnr)) + && + (d->genus->okp(d, p->ri, p->ii, p->ro, p->io, + p->sz->dims[0].is, p->sz->dims[0].os, + 2, 0, 0, plnr))))) + + && (0 + /* can operate out-of-place */ + || p->ri != p->ro + + /* can always compute one transform */ + || vl == 1 + + /* can operate in-place as long as strides are the same */ + || X(tensor_inplace_strides2)(p->sz, p->vecsz) + ) + ); +} + + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + P *pln; + const problem_dft *p; + iodim *d; + const kdft_desc *e = ego->desc; + + static const plan_adt padt = { + X(dft_solve), X(null_awake), print, destroy + }; + + UNUSED(plnr); + + if (ego->bufferedp) { + if (!applicable_buf(ego_, p_, plnr)) + return (plan *)0; + pln = MKPLAN_DFT(P, &padt, apply_buf); + } else { + int extra_iterp = 0; + if (!applicable(ego_, p_, plnr, &extra_iterp)) + return (plan *)0; + pln = MKPLAN_DFT(P, &padt, extra_iterp ? apply_extra_iter : apply); + } + + p = (const problem_dft *) p_; + d = p->sz->dims; + pln->k = ego->k; + pln->n = d[0].n; + pln->is = X(mkstride)(pln->n, d[0].is); + pln->os = X(mkstride)(pln->n, d[0].os); + pln->bufstride = X(mkstride)(pln->n, 2 * compute_batchsize(pln->n)); + + X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); + pln->slv = ego; + + X(ops_zero)(&pln->super.super.ops); + X(ops_madd2)(pln->vl / e->genus->vl, &e->ops, &pln->super.super.ops); + + if (ego->bufferedp) + pln->super.super.ops.other += 4 * pln->n * pln->vl; + + pln->super.super.could_prune_now_p = !ego->bufferedp; + return &(pln->super.super); +} + +static solver *mksolver(kdft k, const kdft_desc *desc, int bufferedp) +{ + static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->k = k; + slv->desc = desc; + slv->bufferedp = bufferedp; + return &(slv->super); +} + +solver *X(mksolver_dft_direct)(kdft k, const kdft_desc *desc) +{ + return mksolver(k, desc, 0); +} + +solver *X(mksolver_dft_directbuf)(kdft k, const kdft_desc *desc) +{ + return mksolver(k, desc, 1); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/generic.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/generic.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "dft.h" + +typedef struct { + solver super; +} S; + +typedef struct { + plan_dft super; + twid *td; + INT n, is, os; +} P; + + +static void cdot(INT n, const E *x, const R *w, + R *or0, R *oi0, R *or1, R *oi1) +{ + INT i; + + E rr = x[0], ri = 0, ir = x[1], ii = 0; + x += 2; + for (i = 1; i + i < n; ++i) { + rr += x[0] * w[0]; + ir += x[1] * w[0]; + ri += x[2] * w[1]; + ii += x[3] * w[1]; + x += 4; w += 2; + } + *or0 = rr + ii; + *oi0 = ir - ri; + *or1 = rr - ii; + *oi1 = ir + ri; +} + +static void hartley(INT n, const R *xr, const R *xi, INT xs, E *o, + R *pr, R *pi) +{ + INT i; + E sr, si; + o[0] = sr = xr[0]; o[1] = si = xi[0]; o += 2; + for (i = 1; i + i < n; ++i) { + sr += (o[0] = xr[i * xs] + xr[(n - i) * xs]); + si += (o[1] = xi[i * xs] + xi[(n - i) * xs]); + o[2] = xr[i * xs] - xr[(n - i) * xs]; + o[3] = xi[i * xs] - xi[(n - i) * xs]; + o += 4; + } + *pr = sr; + *pi = si; +} + +static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + INT i; + INT n = ego->n, is = ego->is, os = ego->os; + const R *W = ego->td->W; + E *buf; + size_t bufsz = n * 2 * sizeof(E); + + BUF_ALLOC(E *, buf, bufsz); + hartley(n, ri, ii, is, buf, ro, io); + + for (i = 1; i + i < n; ++i) { + cdot(n, buf, W, + ro + i * os, io + i * os, + ro + (n - i) * os, io + (n - i) * os); + W += n - 1; + } + + BUF_FREE(buf, bufsz); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + static const tw_instr half_tw[] = { + { TW_HALF, 1, 0 }, + { TW_NEXT, 1, 0 } + }; + + X(twiddle_awake)(wakefulness, &ego->td, half_tw, ego->n, ego->n, + (ego->n - 1) / 2); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + + p->print(p, "(dft-generic-%D)", ego->n); +} + +static int applicable(const solver *ego, const problem *p_, + const planner *plnr) +{ + const problem_dft *p = (const problem_dft *) p_; + UNUSED(ego); + + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk == 0 + && (p->sz->dims[0].n % 2) == 1 + && CIMPLIES(NO_LARGE_GENERICP(plnr), p->sz->dims[0].n < GENERIC_MIN_BAD) + && CIMPLIES(NO_SLOWP(plnr), p->sz->dims[0].n > GENERIC_MAX_SLOW) + && X(is_prime)(p->sz->dims[0].n) + ); +} + +static plan *mkplan(const solver *ego, const problem *p_, planner *plnr) +{ + const problem_dft *p; + P *pln; + INT n; + + static const plan_adt padt = { + X(dft_solve), awake, print, X(plan_null_destroy) + }; + + if (!applicable(ego, p_, plnr)) + return (plan *)0; + + pln = MKPLAN_DFT(P, &padt, apply); + + p = (const problem_dft *) p_; + pln->n = n = p->sz->dims[0].n; + pln->is = p->sz->dims[0].is; + pln->os = p->sz->dims[0].os; + pln->td = 0; + + pln->super.super.ops.add = (n-1) * 5; + pln->super.super.ops.mul = 0; + pln->super.super.ops.fma = (n-1) * (n-1) ; +#if 0 /* these are nice pipelined sequential loads and should cost nothing */ + pln->super.super.ops.other = (n-1)*(4 + 1 + 2 * (n-1)); /* approximate */ +#endif + + return &(pln->super.super); +} + +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return &(slv->super); +} + +void X(dft_generic_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/indirect-transpose.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/indirect-transpose.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* solvers/plans for vectors of DFTs corresponding to the columns + of a matrix: first transpose the matrix so that the DFTs are + contiguous, then do DFTs with transposed output. In particular, + we restrict ourselves to the case of a square transpose (or a + sequence thereof). */ + +#include "dft.h" + +typedef solver S; + +typedef struct { + plan_dft super; + INT vl, ivs, ovs; + plan *cldtrans, *cld, *cldrest; +} P; + +/* initial transpose is out-of-place from input to output */ +static void apply_op(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + INT vl = ego->vl, ivs = ego->ivs, ovs = ego->ovs, i; + + for (i = 0; i < vl; ++i) { + { + plan_dft *cldtrans = (plan_dft *) ego->cldtrans; + cldtrans->apply(ego->cldtrans, ri, ii, ro, io); + } + { + plan_dft *cld = (plan_dft *) ego->cld; + cld->apply(ego->cld, ro, io, ro, io); + } + ri += ivs; ii += ivs; + ro += ovs; io += ovs; + } + { + plan_dft *cldrest = (plan_dft *) ego->cldrest; + cldrest->apply(ego->cldrest, ri, ii, ro, io); + } +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cldrest); + X(plan_destroy_internal)(ego->cld); + X(plan_destroy_internal)(ego->cldtrans); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cldtrans, wakefulness); + X(plan_awake)(ego->cld, wakefulness); + X(plan_awake)(ego->cldrest, wakefulness); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(indirect-transpose%v%(%p%)%(%p%)%(%p%))", + ego->vl, ego->cldtrans, ego->cld, ego->cldrest); +} + +static int pickdim(const tensor *vs, const tensor *s, int *pdim0, int *pdim1) +{ + int dim0, dim1; + *pdim0 = *pdim1 = -1; + for (dim0 = 0; dim0 < vs->rnk; ++dim0) + for (dim1 = 0; dim1 < s->rnk; ++dim1) + if (vs->dims[dim0].n * X(iabs)(vs->dims[dim0].is) <= X(iabs)(s->dims[dim1].is) + && vs->dims[dim0].n >= s->dims[dim1].n + && (*pdim0 == -1 + || (X(iabs)(vs->dims[dim0].is) <= X(iabs)(vs->dims[*pdim0].is) + && X(iabs)(s->dims[dim1].is) >= X(iabs)(s->dims[*pdim1].is)))) { + *pdim0 = dim0; + *pdim1 = dim1; + } + return (*pdim0 != -1 && *pdim1 != -1); +} + +static int applicable0(const solver *ego_, const problem *p_, + const planner *plnr, + int *pdim0, int *pdim1) +{ + const problem_dft *p = (const problem_dft *) p_; + UNUSED(ego_); UNUSED(plnr); + + return (1 + && FINITE_RNK(p->vecsz->rnk) && FINITE_RNK(p->sz->rnk) + + /* FIXME: can/should we relax this constraint? */ + && X(tensor_inplace_strides2)(p->vecsz, p->sz) + + && pickdim(p->vecsz, p->sz, pdim0, pdim1) + + /* output should not *already* include the transpose + (in which case we duplicate the regular indirect.c) */ + && (p->sz->dims[*pdim1].os != p->vecsz->dims[*pdim0].is) + ); +} + +static int applicable(const solver *ego_, const problem *p_, + const planner *plnr, + int *pdim0, int *pdim1) +{ + if (!applicable0(ego_, p_, plnr, pdim0, pdim1)) return 0; + { + const problem_dft *p = (const problem_dft *) p_; + INT u = p->ri == p->ii + 1 || p->ii == p->ri + 1 ? (INT)2 : (INT)1; + + /* UGLY if does not result in contiguous transforms or + transforms of contiguous vectors (since the latter at + least have efficient transpositions) */ + if (NO_UGLYP(plnr) + && p->vecsz->dims[*pdim0].is != u + && !(p->vecsz->rnk == 2 + && p->vecsz->dims[1-*pdim0].is == u + && p->vecsz->dims[*pdim0].is + == u * p->vecsz->dims[1-*pdim0].n)) + return 0; + + if (NO_INDIRECT_OP_P(plnr) && p->ri != p->ro) return 0; + } + return 1; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const problem_dft *p = (const problem_dft *) p_; + P *pln; + plan *cld = 0, *cldtrans = 0, *cldrest = 0; + int pdim0, pdim1; + tensor *ts, *tv; + INT vl, ivs, ovs; + R *rit, *iit, *rot, *iot; + + static const plan_adt padt = { + X(dft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr, &pdim0, &pdim1)) + return (plan *) 0; + + vl = p->vecsz->dims[pdim0].n / p->sz->dims[pdim1].n; + A(vl >= 1); + ivs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].is; + ovs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].os; + rit = TAINT(p->ri, vl == 1 ? 0 : ivs); + iit = TAINT(p->ii, vl == 1 ? 0 : ivs); + rot = TAINT(p->ro, vl == 1 ? 0 : ovs); + iot = TAINT(p->io, vl == 1 ? 0 : ovs); + + ts = X(tensor_copy_inplace)(p->sz, INPLACE_IS); + ts->dims[pdim1].os = p->vecsz->dims[pdim0].is; + tv = X(tensor_copy_inplace)(p->vecsz, INPLACE_IS); + tv->dims[pdim0].os = p->sz->dims[pdim1].is; + tv->dims[pdim0].n = p->sz->dims[pdim1].n; + cldtrans = X(mkplan_d)(plnr, + X(mkproblem_dft_d)(X(mktensor_0d)(), + X(tensor_append)(tv, ts), + rit, iit, + rot, iot)); + X(tensor_destroy2)(ts, tv); + if (!cldtrans) goto nada; + + ts = X(tensor_copy)(p->sz); + ts->dims[pdim1].is = p->vecsz->dims[pdim0].is; + tv = X(tensor_copy)(p->vecsz); + tv->dims[pdim0].is = p->sz->dims[pdim1].is; + tv->dims[pdim0].n = p->sz->dims[pdim1].n; + cld = X(mkplan_d)(plnr, X(mkproblem_dft_d)(ts, tv, + rot, iot, + rot, iot)); + if (!cld) goto nada; + + tv = X(tensor_copy)(p->vecsz); + tv->dims[pdim0].n -= vl * p->sz->dims[pdim1].n; + cldrest = X(mkplan_d)(plnr, X(mkproblem_dft_d)(X(tensor_copy)(p->sz), tv, + p->ri + ivs * vl, + p->ii + ivs * vl, + p->ro + ovs * vl, + p->io + ovs * vl)); + if (!cldrest) goto nada; + + pln = MKPLAN_DFT(P, &padt, apply_op); + pln->cldtrans = cldtrans; + pln->cld = cld; + pln->cldrest = cldrest; + pln->vl = vl; + pln->ivs = ivs; + pln->ovs = ovs; + X(ops_cpy)(&cldrest->ops, &pln->super.super.ops); + X(ops_madd2)(vl, &cld->ops, &pln->super.super.ops); + X(ops_madd2)(vl, &cldtrans->ops, &pln->super.super.ops); + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cldrest); + X(plan_destroy_internal)(cld); + X(plan_destroy_internal)(cldtrans); + return (plan *)0; +} + +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return slv; +} + +void X(dft_indirect_transpose_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/indirect.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/indirect.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + + +/* solvers/plans for vectors of small DFT's that cannot be done + in-place directly. Use a rank-0 plan to rearrange the data + before or after the transform. Can also change an out-of-place + plan into a copy + in-place (where the in-place transform + is e.g. unit stride). */ + +/* FIXME: merge with rank-geq2.c(?), since this is just a special case + of a rank split where the first/second transform has rank 0. */ + +#include "dft.h" + +typedef problem *(*mkcld_t) (const problem_dft *p); + +typedef struct { + dftapply apply; + problem *(*mkcld)(const problem_dft *p); + const char *nam; +} ndrct_adt; + +typedef struct { + solver super; + const ndrct_adt *adt; +} S; + +typedef struct { + plan_dft super; + plan *cldcpy, *cld; + const S *slv; +} P; + +/*-----------------------------------------------------------------------*/ +/* first rearrange, then transform */ +static void apply_before(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + + { + plan_dft *cldcpy = (plan_dft *) ego->cldcpy; + cldcpy->apply(ego->cldcpy, ri, ii, ro, io); + } + { + plan_dft *cld = (plan_dft *) ego->cld; + cld->apply(ego->cld, ro, io, ro, io); + } +} + +static problem *mkcld_before(const problem_dft *p) +{ + return X(mkproblem_dft_d)(X(tensor_copy_inplace)(p->sz, INPLACE_OS), + X(tensor_copy_inplace)(p->vecsz, INPLACE_OS), + p->ro, p->io, p->ro, p->io); +} + +static const ndrct_adt adt_before = +{ + apply_before, mkcld_before, "dft-indirect-before" +}; + +/*-----------------------------------------------------------------------*/ +/* first transform, then rearrange */ + +static void apply_after(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + + { + plan_dft *cld = (plan_dft *) ego->cld; + cld->apply(ego->cld, ri, ii, ri, ii); + } + { + plan_dft *cldcpy = (plan_dft *) ego->cldcpy; + cldcpy->apply(ego->cldcpy, ri, ii, ro, io); + } +} + +static problem *mkcld_after(const problem_dft *p) +{ + return X(mkproblem_dft_d)(X(tensor_copy_inplace)(p->sz, INPLACE_IS), + X(tensor_copy_inplace)(p->vecsz, INPLACE_IS), + p->ri, p->ii, p->ri, p->ii); +} + +static const ndrct_adt adt_after = +{ + apply_after, mkcld_after, "dft-indirect-after" +}; + +/*-----------------------------------------------------------------------*/ +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); + X(plan_destroy_internal)(ego->cldcpy); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cldcpy, wakefulness); + X(plan_awake)(ego->cld, wakefulness); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *s = ego->slv; + p->print(p, "(%s%(%p%)%(%p%))", s->adt->nam, ego->cld, ego->cldcpy); +} + +static int applicable0(const solver *ego_, const problem *p_, + const planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_dft *p = (const problem_dft *) p_; + return (1 + && FINITE_RNK(p->vecsz->rnk) + + /* problem must be a nontrivial transform, not just a copy */ + && p->sz->rnk > 0 + + && (0 + + /* problem must be in-place & require some + rearrangement of the data; to prevent + infinite loops with indirect-transpose, we + further require that at least some transform + strides must decrease */ + || (p->ri == p->ro + && !X(tensor_inplace_strides2)(p->sz, p->vecsz) + && X(tensor_strides_decrease)( + p->sz, p->vecsz, + ego->adt->apply == apply_after ? + INPLACE_IS : INPLACE_OS)) + + /* or problem must be out of place, transforming + from stride 1/2 to bigger stride, for apply_after */ + || (p->ri != p->ro && ego->adt->apply == apply_after + && !NO_DESTROY_INPUTP(plnr) + && X(tensor_min_istride)(p->sz) <= 2 + && X(tensor_min_ostride)(p->sz) > 2) + + /* or problem must be out of place, transforming + to stride 1/2 from bigger stride, for apply_before */ + || (p->ri != p->ro && ego->adt->apply == apply_before + && X(tensor_min_ostride)(p->sz) <= 2 + && X(tensor_min_istride)(p->sz) > 2) + ) + ); +} + +static int applicable(const solver *ego_, const problem *p_, + const planner *plnr) +{ + if (!applicable0(ego_, p_, plnr)) return 0; + { + const problem_dft *p = (const problem_dft *) p_; + if (NO_INDIRECT_OP_P(plnr) && p->ri != p->ro) return 0; + } + return 1; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const problem_dft *p = (const problem_dft *) p_; + const S *ego = (const S *) ego_; + P *pln; + plan *cld = 0, *cldcpy = 0; + + static const plan_adt padt = { + X(dft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr)) + return (plan *) 0; + + cldcpy = + X(mkplan_d)(plnr, + X(mkproblem_dft_d)(X(mktensor_0d)(), + X(tensor_append)(p->vecsz, p->sz), + p->ri, p->ii, p->ro, p->io)); + + if (!cldcpy) goto nada; + + cld = X(mkplan_f_d)(plnr, ego->adt->mkcld(p), NO_BUFFERING, 0, 0); + if (!cld) goto nada; + + pln = MKPLAN_DFT(P, &padt, ego->adt->apply); + pln->cld = cld; + pln->cldcpy = cldcpy; + pln->slv = ego; + X(ops_add)(&cld->ops, &cldcpy->ops, &pln->super.super.ops); + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cld); + X(plan_destroy_internal)(cldcpy); + return (plan *)0; +} + +static solver *mksolver(const ndrct_adt *adt) +{ + static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->adt = adt; + return &(slv->super); +} + +void X(dft_indirect_register)(planner *p) +{ + unsigned i; + static const ndrct_adt *const adts[] = { + &adt_before, &adt_after + }; + + for (i = 0; i < sizeof(adts) / sizeof(adts[0]); ++i) + REGISTER_SOLVER(p, mksolver(adts[i])); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/kdft-dif.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/kdft-dif.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ct.h" + +void X(kdft_dif_register)(planner *p, kdftw codelet, const ct_desc *desc) +{ + X(regsolver_ct_directw)(p, codelet, desc, DECDIF); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/kdft-difsq.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/kdft-difsq.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ct.h" + +void X(kdft_difsq_register)(planner *p, kdftwsq k, const ct_desc *desc) +{ + X(regsolver_ct_directwsq)(p, k, desc, DECDIF); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/kdft-dit.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/kdft-dit.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ct.h" + +void X(kdft_dit_register)(planner *p, kdftw codelet, const ct_desc *desc) +{ + X(regsolver_ct_directw)(p, codelet, desc, DECDIT); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/kdft.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/kdft.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "dft.h" + +void X(kdft_register)(planner *p, kdft codelet, const kdft_desc *desc) +{ + REGISTER_SOLVER(p, X(mksolver_dft_direct)(codelet, desc)); + REGISTER_SOLVER(p, X(mksolver_dft_directbuf)(codelet, desc)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/nop.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/nop.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* plans for vrank -infty DFTs (nothing to do) */ + +#include "dft.h" + +static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + UNUSED(ego_); + UNUSED(ri); + UNUSED(ii); + UNUSED(ro); + UNUSED(io); +} + +static int applicable(const solver *ego_, const problem *p_) +{ + const problem_dft *p = (const problem_dft *) p_; + + UNUSED(ego_); + + return 0 + /* case 1 : -infty vector rank */ + || (!FINITE_RNK(p->vecsz->rnk)) + + /* case 2 : rank-0 in-place dft */ + || (1 + && p->sz->rnk == 0 + && FINITE_RNK(p->vecsz->rnk) + && p->ro == p->ri + && X(tensor_inplace_strides)(p->vecsz) + ); +} + +static void print(const plan *ego, printer *p) +{ + UNUSED(ego); + p->print(p, "(dft-nop)"); +} + +static plan *mkplan(const solver *ego, const problem *p, planner *plnr) +{ + static const plan_adt padt = { + X(dft_solve), X(null_awake), print, X(plan_null_destroy) + }; + plan_dft *pln; + + UNUSED(plnr); + + if (!applicable(ego, p)) + return (plan *) 0; + pln = MKPLAN_DFT(plan_dft, &padt, apply); + X(ops_zero)(&pln->super.ops); + + return &(pln->super); +} + +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; + return MKSOLVER(solver, &sadt); +} + +void X(dft_nop_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/plan.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/plan.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "dft.h" + +plan *X(mkplan_dft)(size_t size, const plan_adt *adt, dftapply apply) +{ + plan_dft *ego; + + ego = (plan_dft *) X(mkplan)(size, adt); + ego->apply = apply; + + return &(ego->super); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/problem.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/problem.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "dft.h" +#include + +static void destroy(problem *ego_) +{ + problem_dft *ego = (problem_dft *) ego_; + X(tensor_destroy2)(ego->vecsz, ego->sz); + X(ifree)(ego_); +} + +static void hash(const problem *p_, md5 *m) +{ + const problem_dft *p = (const problem_dft *) p_; + X(md5puts)(m, "dft"); + X(md5int)(m, p->ri == p->ro); + X(md5INT)(m, p->ii - p->ri); + X(md5INT)(m, p->io - p->ro); + X(md5int)(m, X(alignment_of)(p->ri)); + X(md5int)(m, X(alignment_of)(p->ii)); + X(md5int)(m, X(alignment_of)(p->ro)); + X(md5int)(m, X(alignment_of)(p->io)); + X(tensor_md5)(m, p->sz); + X(tensor_md5)(m, p->vecsz); +} + +static void print(const problem *ego_, printer *p) +{ + const problem_dft *ego = (const problem_dft *) ego_; + p->print(p, "(dft %d %d %d %D %D %T %T)", + ego->ri == ego->ro, + X(alignment_of)(ego->ri), + X(alignment_of)(ego->ro), + (INT)(ego->ii - ego->ri), + (INT)(ego->io - ego->ro), + ego->sz, + ego->vecsz); +} + +static void zero(const problem *ego_) +{ + const problem_dft *ego = (const problem_dft *) ego_; + tensor *sz = X(tensor_append)(ego->vecsz, ego->sz); + X(dft_zerotens)(sz, UNTAINT(ego->ri), UNTAINT(ego->ii)); + X(tensor_destroy)(sz); +} + +static const problem_adt padt = +{ + PROBLEM_DFT, + hash, + zero, + print, + destroy +}; + +problem *X(mkproblem_dft)(const tensor *sz, const tensor *vecsz, + R *ri, R *ii, R *ro, R *io) +{ + problem_dft *ego; + + /* enforce pointer equality if untainted pointers are equal */ + if (UNTAINT(ri) == UNTAINT(ro)) + ri = ro = JOIN_TAINT(ri, ro); + if (UNTAINT(ii) == UNTAINT(io)) + ii = io = JOIN_TAINT(ii, io); + + /* more correctness conditions: */ + A(TAINTOF(ri) == TAINTOF(ii)); + A(TAINTOF(ro) == TAINTOF(io)); + + A(X(tensor_kosherp)(sz)); + A(X(tensor_kosherp)(vecsz)); + + if (ri == ro || ii == io) { + /* If either real or imag pointers are in place, both must be. */ + if (ri != ro || ii != io || !X(tensor_inplace_locations)(sz, vecsz)) + return X(mkproblem_unsolvable)(); + } + + ego = (problem_dft *)X(mkproblem)(sizeof(problem_dft), &padt); + + ego->sz = X(tensor_compress)(sz); + ego->vecsz = X(tensor_compress_contiguous)(vecsz); + ego->ri = ri; + ego->ii = ii; + ego->ro = ro; + ego->io = io; + + A(FINITE_RNK(ego->sz->rnk)); + return &(ego->super); +} + +/* Same as X(mkproblem_dft), but also destroy input tensors. */ +problem *X(mkproblem_dft_d)(tensor *sz, tensor *vecsz, + R *ri, R *ii, R *ro, R *io) +{ + problem *p = X(mkproblem_dft)(sz, vecsz, ri, ii, ro, io); + X(tensor_destroy2)(vecsz, sz); + return p; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/rader.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/rader.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "dft.h" + +/* + * Compute transforms of prime sizes using Rader's trick: turn them + * into convolutions of size n - 1, which you then perform via a pair + * of FFTs. + */ + +typedef struct { + solver super; +} S; + +typedef struct { + plan_dft super; + + plan *cld1, *cld2; + R *omega; + INT n, g, ginv; + INT is, os; + plan *cld_omega; +} P; + +static rader_tl *omegas = 0; + +static R *mkomega(enum wakefulness wakefulness, plan *p_, INT n, INT ginv) +{ + plan_dft *p = (plan_dft *) p_; + R *omega; + INT i, gpower; + trigreal scale; + triggen *t; + + if ((omega = X(rader_tl_find)(n, n, ginv, omegas))) + return omega; + + omega = (R *)MALLOC(sizeof(R) * (n - 1) * 2, TWIDDLES); + + scale = n - 1.0; /* normalization for convolution */ + + t = X(mktriggen)(wakefulness, n); + for (i = 0, gpower = 1; i < n-1; ++i, gpower = MULMOD(gpower, ginv, n)) { + trigreal w[2]; + t->cexpl(t, gpower, w); + omega[2*i] = w[0] / scale; + omega[2*i+1] = FFT_SIGN * w[1] / scale; + } + X(triggen_destroy)(t); + A(gpower == 1); + + p->apply(p_, omega, omega + 1, omega, omega + 1); + + X(rader_tl_insert)(n, n, ginv, omega, &omegas); + return omega; +} + +static void free_omega(R *omega) +{ + X(rader_tl_delete)(omega, &omegas); +} + + +/***************************************************************************/ + +/* Below, we extensively use the identity that fft(x*)* = ifft(x) in + order to share data between forward and backward transforms and to + obviate the necessity of having separate forward and backward + plans. (Although we often compute separate plans these days anyway + due to the differing strides, etcetera.) + + Of course, since the new FFTW gives us separate pointers to + the real and imaginary parts, we could have instead used the + fft(r,i) = ifft(i,r) form of this identity, but it was easier to + reuse the code from our old version. */ + +static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + INT is, os; + INT k, gpower, g, r; + R *buf; + R r0 = ri[0], i0 = ii[0]; + + r = ego->n; is = ego->is; os = ego->os; g = ego->g; + buf = (R *) MALLOC(sizeof(R) * (r - 1) * 2, BUFFERS); + + /* First, permute the input, storing in buf: */ + for (gpower = 1, k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) { + R rA, iA; + rA = ri[gpower * is]; + iA = ii[gpower * is]; + buf[2*k] = rA; buf[2*k + 1] = iA; + } + /* gpower == g^(r-1) mod r == 1 */; + + + /* compute DFT of buf, storing in output (except DC): */ + { + plan_dft *cld = (plan_dft *) ego->cld1; + cld->apply(ego->cld1, buf, buf+1, ro+os, io+os); + } + + /* set output DC component: */ + { + ro[0] = r0 + ro[os]; + io[0] = i0 + io[os]; + } + + /* now, multiply by omega: */ + { + const R *omega = ego->omega; + for (k = 0; k < r - 1; ++k) { + E rB, iB, rW, iW; + rW = omega[2*k]; + iW = omega[2*k+1]; + rB = ro[(k+1)*os]; + iB = io[(k+1)*os]; + ro[(k+1)*os] = rW * rB - iW * iB; + io[(k+1)*os] = -(rW * iB + iW * rB); + } + } + + /* this will add input[0] to all of the outputs after the ifft */ + ro[os] += r0; + io[os] -= i0; + + /* inverse FFT: */ + { + plan_dft *cld = (plan_dft *) ego->cld2; + cld->apply(ego->cld2, ro+os, io+os, buf, buf+1); + } + + /* finally, do inverse permutation to unshuffle the output: */ + { + INT ginv = ego->ginv; + gpower = 1; + for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) { + ro[gpower * os] = buf[2*k]; + io[gpower * os] = -buf[2*k+1]; + } + A(gpower == 1); + } + + + X(ifree)(buf); +} + +/***************************************************************************/ + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + + X(plan_awake)(ego->cld1, wakefulness); + X(plan_awake)(ego->cld2, wakefulness); + X(plan_awake)(ego->cld_omega, wakefulness); + + switch (wakefulness) { + case SLEEPY: + free_omega(ego->omega); + ego->omega = 0; + break; + default: + ego->g = X(find_generator)(ego->n); + ego->ginv = X(power_mod)(ego->g, ego->n - 2, ego->n); + A(MULMOD(ego->g, ego->ginv, ego->n) == 1); + + ego->omega = mkomega(wakefulness, + ego->cld_omega, ego->n, ego->ginv); + break; + } +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld_omega); + X(plan_destroy_internal)(ego->cld2); + X(plan_destroy_internal)(ego->cld1); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *)ego_; + p->print(p, "(dft-rader-%D%ois=%oos=%(%p%)", + ego->n, ego->is, ego->os, ego->cld1); + if (ego->cld2 != ego->cld1) + p->print(p, "%(%p%)", ego->cld2); + if (ego->cld_omega != ego->cld1 && ego->cld_omega != ego->cld2) + p->print(p, "%(%p%)", ego->cld_omega); + p->putchr(p, ')'); +} + +static int applicable(const solver *ego_, const problem *p_, + const planner *plnr) +{ + const problem_dft *p = (const problem_dft *) p_; + UNUSED(ego_); + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk == 0 + && CIMPLIES(NO_SLOWP(plnr), p->sz->dims[0].n > RADER_MAX_SLOW) + && X(is_prime)(p->sz->dims[0].n) + + /* proclaim the solver SLOW if p-1 is not easily factorizable. + Bluestein should take care of this case. */ + && CIMPLIES(NO_SLOWP(plnr), X(factors_into_small_primes)(p->sz->dims[0].n - 1)) + ); +} + +static int mkP(P *pln, INT n, INT is, INT os, R *ro, R *io, + planner *plnr) +{ + plan *cld1 = (plan *) 0; + plan *cld2 = (plan *) 0; + plan *cld_omega = (plan *) 0; + R *buf = (R *) 0; + + /* initial allocation for the purpose of planning */ + buf = (R *) MALLOC(sizeof(R) * (n - 1) * 2, BUFFERS); + + cld1 = X(mkplan_f_d)(plnr, + X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, os), + X(mktensor_1d)(1, 0, 0), + buf, buf + 1, ro + os, io + os), + NO_SLOW, 0, 0); + if (!cld1) goto nada; + + cld2 = X(mkplan_f_d)(plnr, + X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, os, 2), + X(mktensor_1d)(1, 0, 0), + ro + os, io + os, buf, buf + 1), + NO_SLOW, 0, 0); + + if (!cld2) goto nada; + + /* plan for omega array */ + cld_omega = X(mkplan_f_d)(plnr, + X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, 2), + X(mktensor_1d)(1, 0, 0), + buf, buf + 1, buf, buf + 1), + NO_SLOW, ESTIMATE, 0); + if (!cld_omega) goto nada; + + /* deallocate buffers; let awake() or apply() allocate them for real */ + X(ifree)(buf); + buf = 0; + + pln->cld1 = cld1; + pln->cld2 = cld2; + pln->cld_omega = cld_omega; + pln->omega = 0; + pln->n = n; + pln->is = is; + pln->os = os; + + X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops); + pln->super.super.ops.other += (n - 1) * (4 * 2 + 6) + 6; + pln->super.super.ops.add += (n - 1) * 2 + 4; + pln->super.super.ops.mul += (n - 1) * 4; + + return 1; + + nada: + X(ifree0)(buf); + X(plan_destroy_internal)(cld_omega); + X(plan_destroy_internal)(cld2); + X(plan_destroy_internal)(cld1); + return 0; +} + +static plan *mkplan(const solver *ego, const problem *p_, planner *plnr) +{ + const problem_dft *p = (const problem_dft *) p_; + P *pln; + INT n; + INT is, os; + + static const plan_adt padt = { + X(dft_solve), awake, print, destroy + }; + + if (!applicable(ego, p_, plnr)) + return (plan *) 0; + + n = p->sz->dims[0].n; + is = p->sz->dims[0].is; + os = p->sz->dims[0].os; + + pln = MKPLAN_DFT(P, &padt, apply); + if (!mkP(pln, n, is, os, p->ro, p->io, plnr)) { + X(ifree)(pln); + return (plan *) 0; + } + return &(pln->super.super); +} + +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return &(slv->super); +} + +void X(dft_rader_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/rank-geq2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/rank-geq2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* plans for DFT of rank >= 2 (multidimensional) */ + +#include "dft.h" + +typedef struct { + solver super; + int spltrnk; + const int *buddies; + int nbuddies; +} S; + +typedef struct { + plan_dft super; + + plan *cld1, *cld2; + const S *solver; +} P; + +/* Compute multi-dimensional DFT by applying the two cld plans + (lower-rnk DFTs). */ +static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + plan_dft *cld1, *cld2; + + cld1 = (plan_dft *) ego->cld1; + cld1->apply(ego->cld1, ri, ii, ro, io); + + cld2 = (plan_dft *) ego->cld2; + cld2->apply(ego->cld2, ro, io, ro, io); +} + + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld1, wakefulness); + X(plan_awake)(ego->cld2, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld2); + X(plan_destroy_internal)(ego->cld1); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *s = ego->solver; + p->print(p, "(dft-rank>=2/%d%(%p%)%(%p%))", + s->spltrnk, ego->cld1, ego->cld2); +} + +static int picksplit(const S *ego, const tensor *sz, int *rp) +{ + A(sz->rnk > 1); /* cannot split rnk <= 1 */ + if (!X(pickdim)(ego->spltrnk, ego->buddies, ego->nbuddies, sz, 1, rp)) + return 0; + *rp += 1; /* convert from dim. index to rank */ + if (*rp >= sz->rnk) /* split must reduce rank */ + return 0; + return 1; +} + +static int applicable0(const solver *ego_, const problem *p_, int *rp) +{ + const problem_dft *p = (const problem_dft *) p_; + const S *ego = (const S *)ego_; + return (1 + && FINITE_RNK(p->sz->rnk) && FINITE_RNK(p->vecsz->rnk) + && p->sz->rnk >= 2 + && picksplit(ego, p->sz, rp) + ); +} + +/* TODO: revise this. */ +static int applicable(const solver *ego_, const problem *p_, + const planner *plnr, int *rp) +{ + const S *ego = (const S *)ego_; + const problem_dft *p = (const problem_dft *) p_; + + if (!applicable0(ego_, p_, rp)) return 0; + + if (NO_RANK_SPLITSP(plnr) && (ego->spltrnk != ego->buddies[0])) return 0; + + /* Heuristic: if the vector stride is greater than the transform + sz, don't use (prefer to do the vector loop first with a + vrank-geq1 plan). */ + if (NO_UGLYP(plnr)) + if (p->vecsz->rnk > 0 && + X(tensor_min_stride)(p->vecsz) > X(tensor_max_index)(p->sz)) + return 0; + + return 1; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_dft *p; + P *pln; + plan *cld1 = 0, *cld2 = 0; + tensor *sz1, *sz2, *vecszi, *sz2i; + int spltrnk; + + static const plan_adt padt = { + X(dft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr, &spltrnk)) + return (plan *) 0; + + p = (const problem_dft *) p_; + X(tensor_split)(p->sz, &sz1, spltrnk, &sz2); + vecszi = X(tensor_copy_inplace)(p->vecsz, INPLACE_OS); + sz2i = X(tensor_copy_inplace)(sz2, INPLACE_OS); + + cld1 = X(mkplan_d)(plnr, + X(mkproblem_dft_d)(X(tensor_copy)(sz2), + X(tensor_append)(p->vecsz, sz1), + p->ri, p->ii, p->ro, p->io)); + if (!cld1) goto nada; + + cld2 = X(mkplan_d)(plnr, + X(mkproblem_dft_d)( + X(tensor_copy_inplace)(sz1, INPLACE_OS), + X(tensor_append)(vecszi, sz2i), + p->ro, p->io, p->ro, p->io)); + if (!cld2) goto nada; + + pln = MKPLAN_DFT(P, &padt, apply); + + pln->cld1 = cld1; + pln->cld2 = cld2; + + pln->solver = ego; + X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops); + + X(tensor_destroy4)(sz1, sz2, vecszi, sz2i); + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cld2); + X(plan_destroy_internal)(cld1); + X(tensor_destroy4)(sz1, sz2, vecszi, sz2i); + return (plan *) 0; +} + +static solver *mksolver(int spltrnk, const int *buddies, int nbuddies) +{ + static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->spltrnk = spltrnk; + slv->buddies = buddies; + slv->nbuddies = nbuddies; + return &(slv->super); +} + +void X(dft_rank_geq2_register)(planner *p) +{ + int i; + static const int buddies[] = { 1, 0, -2 }; + + const int nbuddies = (int)(sizeof(buddies) / sizeof(buddies[0])); + + for (i = 0; i < nbuddies; ++i) + REGISTER_SOLVER(p, mksolver(buddies[i], buddies, nbuddies)); + + /* FIXME: + + Should we try more buddies? + + Another possible variant is to swap cld1 and cld2 (or rather, + to swap their problems; they are not interchangeable because + cld2 must be in-place). In past versions of FFTW, however, I + seem to recall that such rearrangements have made little or no + difference. + */ +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,6 @@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft +SUBDIRS=codelets +noinst_LTLIBRARIES = libdft_scalar.la + +libdft_scalar_la_SOURCES = n.c t.c f.h n.h q.h t.h + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,726 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = dft/scalar +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libdft_scalar_la_LIBADD = +am_libdft_scalar_la_OBJECTS = n.lo t.lo +libdft_scalar_la_OBJECTS = $(am_libdft_scalar_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libdft_scalar_la_SOURCES) +DIST_SOURCES = $(libdft_scalar_la_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft +SUBDIRS = codelets +noinst_LTLIBRARIES = libdft_scalar.la +libdft_scalar_la_SOURCES = n.c t.c f.h n.h q.h t.h +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu dft/scalar/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu dft/scalar/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libdft_scalar.la: $(libdft_scalar_la_OBJECTS) $(libdft_scalar_la_DEPENDENCIES) $(EXTRA_libdft_scalar_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(libdft_scalar_la_OBJECTS) $(libdft_scalar_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(LTLIBRARIES) +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool \ + clean-noinstLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,97 @@ +# This Makefile.am specifies a set of codelets, efficient transforms +# of small sizes, that are used as building blocks (kernels) by FFTW +# to build up large transforms, as well as the options for generating +# and compiling them. + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +########################################################################### +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft \ +-I$(top_srcdir)/dft/scalar +noinst_LTLIBRARIES = libdft_scalar_codelets.la + +########################################################################### +# n1_ is a hard-coded FFT of size (base cases of FFT recursion) +N1 = n1_2.c n1_3.c n1_4.c n1_5.c n1_6.c n1_7.c n1_8.c n1_9.c n1_10.c \ +n1_11.c n1_12.c n1_13.c n1_14.c n1_15.c n1_16.c n1_32.c n1_64.c \ +n1_20.c n1_25.c # n1_30.c n1_40.c n1_50.c + +########################################################################### +# t1_ is a "twiddle" FFT of size , implementing a radix-r DIT step +T1 = t1_2.c t1_3.c t1_4.c t1_5.c t1_6.c t1_7.c t1_8.c t1_9.c \ +t1_10.c t1_12.c t1_15.c t1_16.c t1_32.c t1_64.c \ +t1_20.c t1_25.c # t1_30.c t1_40.c t1_50.c + +# t2_ is also a twiddle FFT, but instead of using a complete lookup table +# of trig. functions, it partially generates the trig. values on the fly +# (this is faster for large sizes). +T2 = t2_4.c t2_8.c t2_16.c t2_32.c t2_64.c \ + t2_5.c t2_10.c t2_20.c t2_25.c + +########################################################################### +# The F (DIF) codelets are used for a kind of in-place transform algorithm, +# but the planner seems to never (or hardly ever) use them on the machines +# we have access to, preferring the Q codelets and the use of buffers +# for sub-transforms. So, we comment them out, at least for now. + +# f1_ is a "twiddle" FFT of size , implementing a radix-r DIF step +F1 = # f1_2.c f1_3.c f1_4.c f1_5.c f1_6.c f1_7.c f1_8.c f1_9.c f1_10.c f1_12.c f1_15.c f1_16.c f1_32.c f1_64.c + +# like f1, but partially generates its trig. table on the fly +F2 = # f2_4.c f2_8.c f2_16.c f2_32.c f2_64.c + +########################################################################### +# q1_ is twiddle FFTs of size (DIF step), where the output is +# transposed. This is used for in-place transposes in sizes that are +# divisible by ^2. These codelets have size ~ ^2, so you should +# probably not use bigger than 8 or so. +Q1 = q1_2.c q1_4.c q1_8.c q1_3.c q1_5.c q1_6.c + +########################################################################### +ALL_CODELETS = $(N1) $(T1) $(T2) $(F1) $(F2) $(Q1) +BUILT_SOURCES= $(ALL_CODELETS) $(CODLIST) + +libdft_scalar_codelets_la_SOURCES = $(BUILT_SOURCES) + +SOLVTAB_NAME = X(solvtab_dft_standard) +XRENAME=X + +# special rules for regenerating codelets. +include $(top_srcdir)/support/Makefile.codelets + +if MAINTAINER_MODE +FLAGS_N1=$(DFT_FLAGS_COMMON) +FLAGS_T1=$(DFT_FLAGS_COMMON) +FLAGS_T2=$(DFT_FLAGS_COMMON) -twiddle-log3 -precompute-twiddles +FLAGS_F1=$(DFT_FLAGS_COMMON) +FLAGS_F2=$(DFT_FLAGS_COMMON) -twiddle-log3 -precompute-twiddles +FLAGS_Q1=$(DFT_FLAGS_COMMON) -reload-twiddle +FLAGS_Q2=$(DFT_FLAGS_COMMON) -twiddle-log3 -precompute-twiddles + +n1_%.c: $(CODELET_DEPS) $(GEN_NOTW) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_NOTW) $(FLAGS_N1) -n $* -name n1_$* -include "n.h") | $(ADD_DATE) | $(INDENT) >$@ + +t1_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE) $(FLAGS_T1) -n $* -name t1_$* -include "t.h") | $(ADD_DATE) | $(INDENT) >$@ + +t2_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE) $(FLAGS_T2) -n $* -name t2_$* -include "t.h") | $(ADD_DATE) | $(INDENT) >$@ + +f1_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE) $(FLAGS_F1) -dif -n $* -name f1_$* -include "f.h") | $(ADD_DATE) | $(INDENT) >$@ + +f2_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE) $(FLAGS_F2) -dif -n $* -name f2_$* -include "f.h") | $(ADD_DATE) | $(INDENT) >$@ + +q1_%.c: $(CODELET_DEPS) $(GEN_TWIDSQ) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDSQ) $(FLAGS_Q1) -dif -n $* -name q1_$* -include "q.h") | $(ADD_DATE) | $(INDENT) >$@ + +q2_%.c: $(CODELET_DEPS) $(GEN_TWIDSQ) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDSQ) $(FLAGS_Q2) -dif -n $* -name q2_$* -include "q.h") | $(ADD_DATE) | $(INDENT) >$@ + +endif # MAINTAINER_MODE diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,841 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# This Makefile.am specifies a set of codelets, efficient transforms +# of small sizes, that are used as building blocks (kernels) by FFTW +# to build up large transforms, as well as the options for generating +# and compiling them. + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +# -*- makefile -*- +# This file contains special make rules to generate codelets. +# Most of this file requires GNU make . + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +DIST_COMMON = $(top_srcdir)/support/Makefile.codelets \ + $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp +subdir = dft/scalar/codelets +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libdft_scalar_codelets_la_LIBADD = +am__objects_1 = n1_2.lo n1_3.lo n1_4.lo n1_5.lo n1_6.lo n1_7.lo \ + n1_8.lo n1_9.lo n1_10.lo n1_11.lo n1_12.lo n1_13.lo n1_14.lo \ + n1_15.lo n1_16.lo n1_32.lo n1_64.lo n1_20.lo n1_25.lo +am__objects_2 = t1_2.lo t1_3.lo t1_4.lo t1_5.lo t1_6.lo t1_7.lo \ + t1_8.lo t1_9.lo t1_10.lo t1_12.lo t1_15.lo t1_16.lo t1_32.lo \ + t1_64.lo t1_20.lo t1_25.lo +am__objects_3 = t2_4.lo t2_8.lo t2_16.lo t2_32.lo t2_64.lo t2_5.lo \ + t2_10.lo t2_20.lo t2_25.lo +am__objects_4 = +am__objects_5 = q1_2.lo q1_4.lo q1_8.lo q1_3.lo q1_5.lo q1_6.lo +am__objects_6 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \ + $(am__objects_4) $(am__objects_4) $(am__objects_5) +am__objects_7 = codlist.lo +am__objects_8 = $(am__objects_6) $(am__objects_7) +am_libdft_scalar_codelets_la_OBJECTS = $(am__objects_8) +libdft_scalar_codelets_la_OBJECTS = \ + $(am_libdft_scalar_codelets_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libdft_scalar_codelets_la_SOURCES) +DIST_SOURCES = $(libdft_scalar_codelets_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ + +########################################################################### +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft \ +-I$(top_srcdir)/dft/scalar + +noinst_LTLIBRARIES = libdft_scalar_codelets.la + +########################################################################### +# n1_ is a hard-coded FFT of size (base cases of FFT recursion) +N1 = n1_2.c n1_3.c n1_4.c n1_5.c n1_6.c n1_7.c n1_8.c n1_9.c n1_10.c \ +n1_11.c n1_12.c n1_13.c n1_14.c n1_15.c n1_16.c n1_32.c n1_64.c \ +n1_20.c n1_25.c # n1_30.c n1_40.c n1_50.c + + +########################################################################### +# t1_ is a "twiddle" FFT of size , implementing a radix-r DIT step +T1 = t1_2.c t1_3.c t1_4.c t1_5.c t1_6.c t1_7.c t1_8.c t1_9.c \ +t1_10.c t1_12.c t1_15.c t1_16.c t1_32.c t1_64.c \ +t1_20.c t1_25.c # t1_30.c t1_40.c t1_50.c + + +# t2_ is also a twiddle FFT, but instead of using a complete lookup table +# of trig. functions, it partially generates the trig. values on the fly +# (this is faster for large sizes). +T2 = t2_4.c t2_8.c t2_16.c t2_32.c t2_64.c \ + t2_5.c t2_10.c t2_20.c t2_25.c + + +########################################################################### +# The F (DIF) codelets are used for a kind of in-place transform algorithm, +# but the planner seems to never (or hardly ever) use them on the machines +# we have access to, preferring the Q codelets and the use of buffers +# for sub-transforms. So, we comment them out, at least for now. + +# f1_ is a "twiddle" FFT of size , implementing a radix-r DIF step +F1 = # f1_2.c f1_3.c f1_4.c f1_5.c f1_6.c f1_7.c f1_8.c f1_9.c f1_10.c f1_12.c f1_15.c f1_16.c f1_32.c f1_64.c + +# like f1, but partially generates its trig. table on the fly +F2 = # f2_4.c f2_8.c f2_16.c f2_32.c f2_64.c + +########################################################################### +# q1_ is twiddle FFTs of size (DIF step), where the output is +# transposed. This is used for in-place transposes in sizes that are +# divisible by ^2. These codelets have size ~ ^2, so you should +# probably not use bigger than 8 or so. +Q1 = q1_2.c q1_4.c q1_8.c q1_3.c q1_5.c q1_6.c + +########################################################################### +ALL_CODELETS = $(N1) $(T1) $(T2) $(F1) $(F2) $(Q1) +BUILT_SOURCES = $(ALL_CODELETS) $(CODLIST) +libdft_scalar_codelets_la_SOURCES = $(BUILT_SOURCES) +SOLVTAB_NAME = X(solvtab_dft_standard) +XRENAME = X +CODLIST = codlist.c +CODELET_NAME = codelet_ +@MAINTAINER_MODE_TRUE@INDENT = indent -kr -cs -i5 -l800 -fca -nfc1 -sc -sob -cli4 -TR -Tplanner -TV +@MAINTAINER_MODE_TRUE@TWOVERS = sh ${top_srcdir}/support/twovers.sh +@MAINTAINER_MODE_TRUE@GENFFTDIR = ${top_builddir}/genfft +@MAINTAINER_MODE_TRUE@GEN_NOTW = ${GENFFTDIR}/gen_notw.native +@MAINTAINER_MODE_TRUE@GEN_NOTW_C = ${GENFFTDIR}/gen_notw_c.native +@MAINTAINER_MODE_TRUE@GEN_TWIDDLE = ${GENFFTDIR}/gen_twiddle.native +@MAINTAINER_MODE_TRUE@GEN_TWIDDLE_C = ${GENFFTDIR}/gen_twiddle_c.native +@MAINTAINER_MODE_TRUE@GEN_TWIDSQ = ${GENFFTDIR}/gen_twidsq.native +@MAINTAINER_MODE_TRUE@GEN_TWIDSQ_C = ${GENFFTDIR}/gen_twidsq_c.native +@MAINTAINER_MODE_TRUE@GEN_R2CF = ${GENFFTDIR}/gen_r2cf.native +@MAINTAINER_MODE_TRUE@GEN_R2CB = ${GENFFTDIR}/gen_r2cb.native +@MAINTAINER_MODE_TRUE@GEN_HC2HC = ${GENFFTDIR}/gen_hc2hc.native +@MAINTAINER_MODE_TRUE@GEN_HC2C = ${GENFFTDIR}/gen_hc2c.native +@MAINTAINER_MODE_TRUE@GEN_HC2CDFT = ${GENFFTDIR}/gen_hc2cdft.native +@MAINTAINER_MODE_TRUE@GEN_HC2CDFT_C = ${GENFFTDIR}/gen_hc2cdft_c.native +@MAINTAINER_MODE_TRUE@GEN_R2R = ${GENFFTDIR}/gen_r2r.native +@MAINTAINER_MODE_TRUE@PRELUDE_DFT = ${top_srcdir}/support/codelet_prelude.dft +@MAINTAINER_MODE_TRUE@PRELUDE_RDFT = ${top_srcdir}/support/codelet_prelude.rdft +@MAINTAINER_MODE_TRUE@ADD_DATE = sed -e s/@DATE@/"`date`"/ +@MAINTAINER_MODE_TRUE@COPYRIGHT = ${top_srcdir}/COPYRIGHT +@MAINTAINER_MODE_TRUE@CODELET_DEPS = $(COPYRIGHT) $(PRELUDE) +@MAINTAINER_MODE_TRUE@PRELUDE_COMMANDS_DFT = cat $(COPYRIGHT) $(PRELUDE_DFT) +@MAINTAINER_MODE_TRUE@PRELUDE_COMMANDS_RDFT = cat $(COPYRIGHT) $(PRELUDE_RDFT) +@MAINTAINER_MODE_TRUE@FLAGS_COMMON = -compact -variables 4 +@MAINTAINER_MODE_TRUE@DFT_FLAGS_COMMON = $(FLAGS_COMMON) -pipeline-latency 4 +@MAINTAINER_MODE_TRUE@RDFT_FLAGS_COMMON = $(FLAGS_COMMON) -pipeline-latency 4 + +# special rules for regenerating codelets. +@MAINTAINER_MODE_TRUE@FLAGS_N1 = $(DFT_FLAGS_COMMON) +@MAINTAINER_MODE_TRUE@FLAGS_T1 = $(DFT_FLAGS_COMMON) +@MAINTAINER_MODE_TRUE@FLAGS_T2 = $(DFT_FLAGS_COMMON) -twiddle-log3 -precompute-twiddles +@MAINTAINER_MODE_TRUE@FLAGS_F1 = $(DFT_FLAGS_COMMON) +@MAINTAINER_MODE_TRUE@FLAGS_F2 = $(DFT_FLAGS_COMMON) -twiddle-log3 -precompute-twiddles +@MAINTAINER_MODE_TRUE@FLAGS_Q1 = $(DFT_FLAGS_COMMON) -reload-twiddle +@MAINTAINER_MODE_TRUE@FLAGS_Q2 = $(DFT_FLAGS_COMMON) -twiddle-log3 -precompute-twiddles +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/support/Makefile.codelets $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu dft/scalar/codelets/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu dft/scalar/codelets/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(top_srcdir)/support/Makefile.codelets: + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libdft_scalar_codelets.la: $(libdft_scalar_codelets_la_OBJECTS) $(libdft_scalar_codelets_la_DEPENDENCIES) $(EXTRA_libdft_scalar_codelets_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(libdft_scalar_codelets_la_OBJECTS) $(libdft_scalar_codelets_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/codlist.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_11.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_13.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2_8.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic \ + maintainer-clean-local + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: all check install install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic maintainer-clean-local mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am + + +# rule to build codlist +$(CODLIST): Makefile + ( \ + echo "#include \"ifftw.h\""; \ + echo $(INCLUDE_SIMD_HEADER); \ + echo; \ + for i in $(ALL_CODELETS) NIL; do \ + if test "$$i" != NIL; then \ + j=`basename $$i | sed -e 's/[.][cS]$$//g'`; \ + echo "extern void $(XRENAME)($(CODELET_NAME)$$j)(planner *);"; \ + fi \ + done; \ + echo; \ + echo; \ + echo "extern const solvtab $(SOLVTAB_NAME);"; \ + echo "const solvtab $(SOLVTAB_NAME) = {"; \ + for i in $(ALL_CODELETS) NIL; do \ + if test "$$i" != NIL; then \ + j=`basename $$i | sed -e 's/[.][cS]$$//g'`; \ + echo " SOLVTAB($(XRENAME)($(CODELET_NAME)$$j)),"; \ + fi \ + done; \ + echo " SOLVTAB_END"; \ + echo "};"; \ + ) >$@ + +# only delete codlist.c in maintainer-mode, since it is included in the dist +# FIXME: is there a way to delete in 'make clean' only when builddir != srcdir? +maintainer-clean-local: + rm -f $(CODLIST) + +# cancel the hideous builtin rules that cause an infinite loop +@MAINTAINER_MODE_TRUE@%: %.o +@MAINTAINER_MODE_TRUE@%: %.s +@MAINTAINER_MODE_TRUE@%: %.c +@MAINTAINER_MODE_TRUE@%: %.S + +@MAINTAINER_MODE_TRUE@n1_%.c: $(CODELET_DEPS) $(GEN_NOTW) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_NOTW) $(FLAGS_N1) -n $* -name n1_$* -include "n.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@t1_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE) $(FLAGS_T1) -n $* -name t1_$* -include "t.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@t2_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE) $(FLAGS_T2) -n $* -name t2_$* -include "t.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@f1_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE) $(FLAGS_F1) -dif -n $* -name f1_$* -include "f.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@f2_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE) $(FLAGS_F2) -dif -n $* -name f2_$* -include "f.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@q1_%.c: $(CODELET_DEPS) $(GEN_TWIDSQ) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDSQ) $(FLAGS_Q1) -dif -n $* -name q1_$* -include "q.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@q2_%.c: $(CODELET_DEPS) $(GEN_TWIDSQ) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDSQ) $(FLAGS_Q2) -dif -n $* -name q2_$* -include "q.h") | $(ADD_DATE) | $(INDENT) >$@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/codlist.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/codlist.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,109 @@ +#include "ifftw.h" + + +extern void X(codelet_n1_2)(planner *); +extern void X(codelet_n1_3)(planner *); +extern void X(codelet_n1_4)(planner *); +extern void X(codelet_n1_5)(planner *); +extern void X(codelet_n1_6)(planner *); +extern void X(codelet_n1_7)(planner *); +extern void X(codelet_n1_8)(planner *); +extern void X(codelet_n1_9)(planner *); +extern void X(codelet_n1_10)(planner *); +extern void X(codelet_n1_11)(planner *); +extern void X(codelet_n1_12)(planner *); +extern void X(codelet_n1_13)(planner *); +extern void X(codelet_n1_14)(planner *); +extern void X(codelet_n1_15)(planner *); +extern void X(codelet_n1_16)(planner *); +extern void X(codelet_n1_32)(planner *); +extern void X(codelet_n1_64)(planner *); +extern void X(codelet_n1_20)(planner *); +extern void X(codelet_n1_25)(planner *); +extern void X(codelet_t1_2)(planner *); +extern void X(codelet_t1_3)(planner *); +extern void X(codelet_t1_4)(planner *); +extern void X(codelet_t1_5)(planner *); +extern void X(codelet_t1_6)(planner *); +extern void X(codelet_t1_7)(planner *); +extern void X(codelet_t1_8)(planner *); +extern void X(codelet_t1_9)(planner *); +extern void X(codelet_t1_10)(planner *); +extern void X(codelet_t1_12)(planner *); +extern void X(codelet_t1_15)(planner *); +extern void X(codelet_t1_16)(planner *); +extern void X(codelet_t1_32)(planner *); +extern void X(codelet_t1_64)(planner *); +extern void X(codelet_t1_20)(planner *); +extern void X(codelet_t1_25)(planner *); +extern void X(codelet_t2_4)(planner *); +extern void X(codelet_t2_8)(planner *); +extern void X(codelet_t2_16)(planner *); +extern void X(codelet_t2_32)(planner *); +extern void X(codelet_t2_64)(planner *); +extern void X(codelet_t2_5)(planner *); +extern void X(codelet_t2_10)(planner *); +extern void X(codelet_t2_20)(planner *); +extern void X(codelet_t2_25)(planner *); +extern void X(codelet_q1_2)(planner *); +extern void X(codelet_q1_4)(planner *); +extern void X(codelet_q1_8)(planner *); +extern void X(codelet_q1_3)(planner *); +extern void X(codelet_q1_5)(planner *); +extern void X(codelet_q1_6)(planner *); + + +extern const solvtab X(solvtab_dft_standard); +const solvtab X(solvtab_dft_standard) = { + SOLVTAB(X(codelet_n1_2)), + SOLVTAB(X(codelet_n1_3)), + SOLVTAB(X(codelet_n1_4)), + SOLVTAB(X(codelet_n1_5)), + SOLVTAB(X(codelet_n1_6)), + SOLVTAB(X(codelet_n1_7)), + SOLVTAB(X(codelet_n1_8)), + SOLVTAB(X(codelet_n1_9)), + SOLVTAB(X(codelet_n1_10)), + SOLVTAB(X(codelet_n1_11)), + SOLVTAB(X(codelet_n1_12)), + SOLVTAB(X(codelet_n1_13)), + SOLVTAB(X(codelet_n1_14)), + SOLVTAB(X(codelet_n1_15)), + SOLVTAB(X(codelet_n1_16)), + SOLVTAB(X(codelet_n1_32)), + SOLVTAB(X(codelet_n1_64)), + SOLVTAB(X(codelet_n1_20)), + SOLVTAB(X(codelet_n1_25)), + SOLVTAB(X(codelet_t1_2)), + SOLVTAB(X(codelet_t1_3)), + SOLVTAB(X(codelet_t1_4)), + SOLVTAB(X(codelet_t1_5)), + SOLVTAB(X(codelet_t1_6)), + SOLVTAB(X(codelet_t1_7)), + SOLVTAB(X(codelet_t1_8)), + SOLVTAB(X(codelet_t1_9)), + SOLVTAB(X(codelet_t1_10)), + SOLVTAB(X(codelet_t1_12)), + SOLVTAB(X(codelet_t1_15)), + SOLVTAB(X(codelet_t1_16)), + SOLVTAB(X(codelet_t1_32)), + SOLVTAB(X(codelet_t1_64)), + SOLVTAB(X(codelet_t1_20)), + SOLVTAB(X(codelet_t1_25)), + SOLVTAB(X(codelet_t2_4)), + SOLVTAB(X(codelet_t2_8)), + SOLVTAB(X(codelet_t2_16)), + SOLVTAB(X(codelet_t2_32)), + SOLVTAB(X(codelet_t2_64)), + SOLVTAB(X(codelet_t2_5)), + SOLVTAB(X(codelet_t2_10)), + SOLVTAB(X(codelet_t2_20)), + SOLVTAB(X(codelet_t2_25)), + SOLVTAB(X(codelet_q1_2)), + SOLVTAB(X(codelet_q1_4)), + SOLVTAB(X(codelet_q1_8)), + SOLVTAB(X(codelet_q1_3)), + SOLVTAB(X(codelet_q1_5)), + SOLVTAB(X(codelet_q1_6)), + SOLVTAB_END +}; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 10 -name n1_10 -include n.h */ + +/* + * This function contains 84 FP additions, 36 FP multiplications, + * (or, 48 additions, 0 multiplications, 36 fused multiply/add), + * 59 stack variables, 4 constants, and 40 memory accesses + */ +#include "n.h" + +static void n1_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(40, is), MAKE_VOLATILE_STRIDE(40, os)) { + E T1g, T1a, T18, T1m, T1k, T1f, T19, T11, T1h, T1l; + { + E Tj, T3, T1b, TN, T1j, TU, T1i, TV, Tq, T10, Ti, Ts, Tw, T15, Tx; + E T13, TG, Ty, TB, TC; + { + E T1, T2, TL, TM; + T1 = ri[0]; + T2 = ri[WS(is, 5)]; + TL = ii[0]; + TM = ii[WS(is, 5)]; + { + E T7, Tk, T6, To, Tg, T8, Tb, Tc; + { + E T4, T5, Te, Tf; + T4 = ri[WS(is, 2)]; + Tj = T1 + T2; + T3 = T1 - T2; + T1b = TL + TM; + TN = TL - TM; + T5 = ri[WS(is, 7)]; + Te = ri[WS(is, 6)]; + Tf = ri[WS(is, 1)]; + T7 = ri[WS(is, 8)]; + Tk = T4 + T5; + T6 = T4 - T5; + To = Te + Tf; + Tg = Te - Tf; + T8 = ri[WS(is, 3)]; + Tb = ri[WS(is, 4)]; + Tc = ri[WS(is, 9)]; + } + { + E TE, TF, Tu, Tv; + { + E Ta, Th, Tl, T9; + Tu = ii[WS(is, 2)]; + Tl = T7 + T8; + T9 = T7 - T8; + { + E Tn, Td, Tm, Tp; + Tn = Tb + Tc; + Td = Tb - Tc; + Tm = Tk + Tl; + T1j = Tk - Tl; + Ta = T6 + T9; + TU = T6 - T9; + Tp = Tn + To; + T1i = Tn - To; + Th = Td + Tg; + TV = Td - Tg; + Tq = Tm + Tp; + T10 = Tm - Tp; + Tv = ii[WS(is, 7)]; + } + Ti = Ta + Th; + Ts = Ta - Th; + } + TE = ii[WS(is, 6)]; + TF = ii[WS(is, 1)]; + Tw = Tu - Tv; + T15 = Tu + Tv; + Tx = ii[WS(is, 8)]; + T13 = TE + TF; + TG = TE - TF; + Ty = ii[WS(is, 3)]; + TB = ii[WS(is, 4)]; + TC = ii[WS(is, 9)]; + } + } + } + { + E T17, TA, T14, TH, T1e, TQ, TS; + { + E TO, TP, T16, Tz; + ro[WS(os, 5)] = T3 + Ti; + T16 = Tx + Ty; + Tz = Tx - Ty; + { + E T12, TD, T1c, T1d; + T12 = TB + TC; + TD = TB - TC; + T1c = T15 + T16; + T17 = T15 - T16; + TO = Tw + Tz; + TA = Tw - Tz; + T1d = T12 + T13; + T14 = T12 - T13; + TP = TD + TG; + TH = TD - TG; + T1e = T1c + T1d; + T1g = T1c - T1d; + } + ro[0] = Tj + Tq; + TQ = TO + TP; + TS = TO - TP; + } + { + E TK, TI, TY, TW, TR, TJ, Tt, Tr, TZ, TX, TT; + TK = FNMS(KP618033988, TA, TH); + TI = FMA(KP618033988, TH, TA); + io[0] = T1b + T1e; + io[WS(os, 5)] = TN + TQ; + Tr = FNMS(KP250000000, Ti, T3); + TY = FNMS(KP618033988, TU, TV); + TW = FMA(KP618033988, TV, TU); + TR = FNMS(KP250000000, TQ, TN); + TJ = FNMS(KP559016994, Ts, Tr); + Tt = FMA(KP559016994, Ts, Tr); + T1a = FMA(KP618033988, T14, T17); + T18 = FNMS(KP618033988, T17, T14); + ro[WS(os, 7)] = FNMS(KP951056516, TK, TJ); + ro[WS(os, 3)] = FMA(KP951056516, TK, TJ); + ro[WS(os, 1)] = FMA(KP951056516, TI, Tt); + ro[WS(os, 9)] = FNMS(KP951056516, TI, Tt); + TX = FNMS(KP559016994, TS, TR); + TT = FMA(KP559016994, TS, TR); + TZ = FNMS(KP250000000, Tq, Tj); + io[WS(os, 3)] = FNMS(KP951056516, TY, TX); + io[WS(os, 7)] = FMA(KP951056516, TY, TX); + io[WS(os, 9)] = FMA(KP951056516, TW, TT); + io[WS(os, 1)] = FNMS(KP951056516, TW, TT); + T1m = FMA(KP618033988, T1i, T1j); + T1k = FNMS(KP618033988, T1j, T1i); + T1f = FNMS(KP250000000, T1e, T1b); + T19 = FMA(KP559016994, T10, TZ); + T11 = FNMS(KP559016994, T10, TZ); + } + } + } + ro[WS(os, 4)] = FNMS(KP951056516, T1a, T19); + ro[WS(os, 6)] = FMA(KP951056516, T1a, T19); + ro[WS(os, 8)] = FMA(KP951056516, T18, T11); + ro[WS(os, 2)] = FNMS(KP951056516, T18, T11); + T1h = FNMS(KP559016994, T1g, T1f); + T1l = FMA(KP559016994, T1g, T1f); + io[WS(os, 4)] = FMA(KP951056516, T1m, T1l); + io[WS(os, 6)] = FNMS(KP951056516, T1m, T1l); + io[WS(os, 8)] = FNMS(KP951056516, T1k, T1h); + io[WS(os, 2)] = FMA(KP951056516, T1k, T1h); + } + } +} + +static const kdft_desc desc = { 10, "n1_10", {48, 0, 36, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_10) (planner *p) { + X(kdft_register) (p, n1_10, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 10 -name n1_10 -include n.h */ + +/* + * This function contains 84 FP additions, 24 FP multiplications, + * (or, 72 additions, 12 multiplications, 12 fused multiply/add), + * 41 stack variables, 4 constants, and 40 memory accesses + */ +#include "n.h" + +static void n1_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(40, is), MAKE_VOLATILE_STRIDE(40, os)) { + E T3, Tj, TQ, T1e, TU, TV, T1c, T1b, Tm, Tp, Tq, Ta, Th, Ti, TA; + E TH, T17, T14, T1f, T1g, T1h, TL, TM, TR; + { + E T1, T2, TO, TP; + T1 = ri[0]; + T2 = ri[WS(is, 5)]; + T3 = T1 - T2; + Tj = T1 + T2; + TO = ii[0]; + TP = ii[WS(is, 5)]; + TQ = TO - TP; + T1e = TO + TP; + } + { + E T6, Tk, Tg, To, T9, Tl, Td, Tn; + { + E T4, T5, Te, Tf; + T4 = ri[WS(is, 2)]; + T5 = ri[WS(is, 7)]; + T6 = T4 - T5; + Tk = T4 + T5; + Te = ri[WS(is, 6)]; + Tf = ri[WS(is, 1)]; + Tg = Te - Tf; + To = Te + Tf; + } + { + E T7, T8, Tb, Tc; + T7 = ri[WS(is, 8)]; + T8 = ri[WS(is, 3)]; + T9 = T7 - T8; + Tl = T7 + T8; + Tb = ri[WS(is, 4)]; + Tc = ri[WS(is, 9)]; + Td = Tb - Tc; + Tn = Tb + Tc; + } + TU = T6 - T9; + TV = Td - Tg; + T1c = Tk - Tl; + T1b = Tn - To; + Tm = Tk + Tl; + Tp = Tn + To; + Tq = Tm + Tp; + Ta = T6 + T9; + Th = Td + Tg; + Ti = Ta + Th; + } + { + E Tw, T15, TG, T13, Tz, T16, TD, T12; + { + E Tu, Tv, TE, TF; + Tu = ii[WS(is, 2)]; + Tv = ii[WS(is, 7)]; + Tw = Tu - Tv; + T15 = Tu + Tv; + TE = ii[WS(is, 6)]; + TF = ii[WS(is, 1)]; + TG = TE - TF; + T13 = TE + TF; + } + { + E Tx, Ty, TB, TC; + Tx = ii[WS(is, 8)]; + Ty = ii[WS(is, 3)]; + Tz = Tx - Ty; + T16 = Tx + Ty; + TB = ii[WS(is, 4)]; + TC = ii[WS(is, 9)]; + TD = TB - TC; + T12 = TB + TC; + } + TA = Tw - Tz; + TH = TD - TG; + T17 = T15 - T16; + T14 = T12 - T13; + T1f = T15 + T16; + T1g = T12 + T13; + T1h = T1f + T1g; + TL = Tw + Tz; + TM = TD + TG; + TR = TL + TM; + } + ro[WS(os, 5)] = T3 + Ti; + io[WS(os, 5)] = TQ + TR; + ro[0] = Tj + Tq; + io[0] = T1e + T1h; + { + E TI, TK, Tt, TJ, Tr, Ts; + TI = FMA(KP951056516, TA, KP587785252 * TH); + TK = FNMS(KP587785252, TA, KP951056516 * TH); + Tr = KP559016994 * (Ta - Th); + Ts = FNMS(KP250000000, Ti, T3); + Tt = Tr + Ts; + TJ = Ts - Tr; + ro[WS(os, 9)] = Tt - TI; + ro[WS(os, 3)] = TJ + TK; + ro[WS(os, 1)] = Tt + TI; + ro[WS(os, 7)] = TJ - TK; + } + { + E TW, TY, TT, TX, TN, TS; + TW = FMA(KP951056516, TU, KP587785252 * TV); + TY = FNMS(KP587785252, TU, KP951056516 * TV); + TN = KP559016994 * (TL - TM); + TS = FNMS(KP250000000, TR, TQ); + TT = TN + TS; + TX = TS - TN; + io[WS(os, 1)] = TT - TW; + io[WS(os, 7)] = TY + TX; + io[WS(os, 9)] = TW + TT; + io[WS(os, 3)] = TX - TY; + } + { + E T18, T1a, T11, T19, TZ, T10; + T18 = FNMS(KP587785252, T17, KP951056516 * T14); + T1a = FMA(KP951056516, T17, KP587785252 * T14); + TZ = FNMS(KP250000000, Tq, Tj); + T10 = KP559016994 * (Tm - Tp); + T11 = TZ - T10; + T19 = T10 + TZ; + ro[WS(os, 2)] = T11 - T18; + ro[WS(os, 6)] = T19 + T1a; + ro[WS(os, 8)] = T11 + T18; + ro[WS(os, 4)] = T19 - T1a; + } + { + E T1d, T1l, T1k, T1m, T1i, T1j; + T1d = FNMS(KP587785252, T1c, KP951056516 * T1b); + T1l = FMA(KP951056516, T1c, KP587785252 * T1b); + T1i = FNMS(KP250000000, T1h, T1e); + T1j = KP559016994 * (T1f - T1g); + T1k = T1i - T1j; + T1m = T1j + T1i; + io[WS(os, 2)] = T1d + T1k; + io[WS(os, 6)] = T1m - T1l; + io[WS(os, 8)] = T1k - T1d; + io[WS(os, 4)] = T1l + T1m; + } + } + } +} + +static const kdft_desc desc = { 10, "n1_10", {72, 12, 12, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_10) (planner *p) { + X(kdft_register) (p, n1_10, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_11.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_11.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 11 -name n1_11 -include n.h */ + +/* + * This function contains 140 FP additions, 110 FP multiplications, + * (or, 30 additions, 0 multiplications, 110 fused multiply/add), + * 84 stack variables, 10 constants, and 44 memory accesses + */ +#include "n.h" + +static void n1_11(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP989821441, +0.989821441880932732376092037776718787376519372); + DK(KP959492973, +0.959492973614497389890368057066327699062454848); + DK(KP918985947, +0.918985947228994779780736114132655398124909697); + DK(KP876768831, +0.876768831002589333891339807079336796764054852); + DK(KP830830026, +0.830830026003772851058548298459246407048009821); + DK(KP778434453, +0.778434453334651800608337670740821884709317477); + DK(KP715370323, +0.715370323453429719112414662767260662417897278); + DK(KP634356270, +0.634356270682424498893150776899916060542806975); + DK(KP342584725, +0.342584725681637509502641509861112333758894680); + DK(KP521108558, +0.521108558113202722944698153526659300680427422); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(44, is), MAKE_VOLATILE_STRIDE(44, os)) { + E T1, TA, T1p, T1y, T19, T1d, T1a, T1e; + { + E T1f, T1u, T4, T1q, Tg, T1t, T7, T1s, Ta, Td, T1r, TP, T1X, T26, Ti; + E TG, T1O, T1w, TY, T1F, T17, To, T1i, T1k, T1h, Tr, T1j, Tu, T1g, Tx; + E T21, TU, TL, TC, T1S, T1J, T1m, T12, T1z, T1b; + T1 = ri[0]; + T1f = ii[0]; + { + E T1E, T16, Tb, Tc, Tv, Tw; + { + E T2, T3, Te, Tf; + T2 = ri[WS(is, 1)]; + T3 = ri[WS(is, 10)]; + Te = ri[WS(is, 5)]; + Tf = ri[WS(is, 6)]; + { + E T5, T6, T8, T9; + T5 = ri[WS(is, 2)]; + T1u = T3 - T2; + T4 = T2 + T3; + T1q = Tf - Te; + Tg = Te + Tf; + T6 = ri[WS(is, 9)]; + T8 = ri[WS(is, 3)]; + T9 = ri[WS(is, 8)]; + Tb = ri[WS(is, 4)]; + T1t = T6 - T5; + T7 = T5 + T6; + T1s = T9 - T8; + Ta = T8 + T9; + Tc = ri[WS(is, 7)]; + } + } + { + E T25, Th, T1W, TO; + T25 = FMA(KP521108558, T1q, T1u); + T1W = FMA(KP521108558, T1s, T1q); + TO = FNMS(KP342584725, T4, Ta); + Th = FNMS(KP342584725, Ta, T7); + Td = Tb + Tc; + T1r = Tc - Tb; + TP = FNMS(KP634356270, TO, Tg); + T1X = FNMS(KP715370323, T1W, T1t); + T26 = FMA(KP715370323, T25, T1r); + { + E TF, T1N, T1v, TX; + TF = FNMS(KP342584725, Td, T4); + Ti = FNMS(KP634356270, Th, Td); + T1N = FNMS(KP521108558, T1t, T1r); + T1v = FNMS(KP521108558, T1u, T1t); + TG = FNMS(KP634356270, TF, T7); + TX = FNMS(KP342584725, T7, Tg); + T1O = FMA(KP715370323, T1N, T1q); + T1w = FNMS(KP715370323, T1v, T1s); + T1E = FMA(KP521108558, T1r, T1s); + TY = FNMS(KP634356270, TX, T4); + T16 = FNMS(KP342584725, Tg, Td); + } + } + { + E Ty, Tz, Tm, Tn; + Tm = ii[WS(is, 3)]; + T1F = FMA(KP715370323, T1E, T1u); + Tn = ii[WS(is, 8)]; + T17 = FNMS(KP634356270, T16, Ta); + Ty = ii[WS(is, 5)]; + Tz = ii[WS(is, 6)]; + To = Tm - Tn; + T1i = Tm + Tn; + { + E Tp, Tq, Ts, Tt; + Tp = ii[WS(is, 2)]; + T1k = Ty + Tz; + TA = Ty - Tz; + Tq = ii[WS(is, 9)]; + Ts = ii[WS(is, 4)]; + Tt = ii[WS(is, 7)]; + Tv = ii[WS(is, 1)]; + T1h = Tp + Tq; + Tr = Tp - Tq; + T1j = Ts + Tt; + Tu = Ts - Tt; + Tw = ii[WS(is, 10)]; + } + } + { + E TB, T1R, T20, TK, TT, T1I, T1l; + T20 = FNMS(KP342584725, T1i, T1h); + TK = FMA(KP521108558, To, TA); + TT = FNMS(KP521108558, Tr, Tu); + T1g = Tv + Tw; + Tx = Tv - Tw; + T21 = FNMS(KP634356270, T20, T1j); + TU = FMA(KP715370323, TT, TA); + TL = FNMS(KP715370323, TK, Tr); + TB = FMA(KP521108558, TA, Tx); + T1R = FNMS(KP342584725, T1j, T1g); + T1I = FNMS(KP342584725, T1g, T1i); + T1l = FNMS(KP342584725, T1k, T1j); + TC = FMA(KP715370323, TB, Tu); + T1S = FNMS(KP634356270, T1R, T1h); + T1J = FNMS(KP634356270, T1I, T1k); + T1m = FNMS(KP634356270, T1l, T1i); + T12 = FMA(KP521108558, Tu, To); + T1z = FNMS(KP342584725, T1h, T1k); + T1b = FNMS(KP521108558, Tx, Tr); + } + } + { + E T13, T1A, T1c, T1Z, T1V, TH, TM, Tj, TD; + ro[0] = T1 + T4 + T7 + Ta + Td + Tg; + T13 = FMA(KP715370323, T12, Tx); + T1A = FNMS(KP634356270, T1z, T1g); + T1c = FNMS(KP715370323, T1b, To); + io[0] = T1f + T1g + T1h + T1i + T1j + T1k; + Tj = FNMS(KP778434453, Ti, T4); + TD = FMA(KP830830026, TC, Tr); + { + E TE, T23, T28, Tl, Tk, T22, T27; + T22 = FNMS(KP778434453, T21, T1g); + T27 = FMA(KP830830026, T26, T1t); + Tk = FNMS(KP876768831, Tj, Tg); + TE = FMA(KP918985947, TD, To); + T23 = FNMS(KP876768831, T22, T1k); + T28 = FMA(KP918985947, T27, T1s); + Tl = FNMS(KP959492973, Tk, T1); + { + E T1U, T1T, T24, T1Y; + T1T = FNMS(KP778434453, T1S, T1k); + T24 = FNMS(KP959492973, T23, T1f); + T1Y = FMA(KP830830026, T1X, T1u); + ro[WS(os, 1)] = FMA(KP989821441, TE, Tl); + ro[WS(os, 10)] = FNMS(KP989821441, TE, Tl); + T1U = FNMS(KP876768831, T1T, T1i); + io[WS(os, 10)] = FNMS(KP989821441, T28, T24); + io[WS(os, 1)] = FMA(KP989821441, T28, T24); + T1Z = FNMS(KP918985947, T1Y, T1r); + T1V = FNMS(KP959492973, T1U, T1f); + } + TH = FNMS(KP778434453, TG, Tg); + TM = FMA(KP830830026, TL, Tx); + } + { + E T1M, TZ, T14, T1Q; + { + E TN, TR, TV, TJ, TI, TQ, T1P; + TQ = FNMS(KP778434453, TP, Td); + io[WS(os, 9)] = FMA(KP989821441, T1Z, T1V); + io[WS(os, 2)] = FNMS(KP989821441, T1Z, T1V); + TI = FNMS(KP876768831, TH, Ta); + TN = FNMS(KP918985947, TM, Tu); + TR = FNMS(KP876768831, TQ, T7); + TV = FNMS(KP830830026, TU, To); + TJ = FNMS(KP959492973, TI, T1); + { + E T1L, TS, TW, T1K; + T1K = FNMS(KP778434453, T1J, T1j); + TS = FNMS(KP959492973, TR, T1); + TW = FNMS(KP918985947, TV, Tx); + ro[WS(os, 9)] = FMA(KP989821441, TN, TJ); + ro[WS(os, 2)] = FNMS(KP989821441, TN, TJ); + T1L = FNMS(KP876768831, T1K, T1h); + ro[WS(os, 3)] = FMA(KP989821441, TW, TS); + ro[WS(os, 8)] = FNMS(KP989821441, TW, TS); + T1P = FNMS(KP830830026, T1O, T1s); + T1M = FNMS(KP959492973, T1L, T1f); + } + TZ = FNMS(KP778434453, TY, Ta); + T14 = FNMS(KP830830026, T13, TA); + T1Q = FNMS(KP918985947, T1P, T1u); + } + { + E T15, T11, T1C, T1G, T1B, T10; + T1B = FNMS(KP778434453, T1A, T1i); + T10 = FNMS(KP876768831, TZ, Td); + T15 = FMA(KP918985947, T14, Tr); + io[WS(os, 8)] = FNMS(KP989821441, T1Q, T1M); + io[WS(os, 3)] = FMA(KP989821441, T1Q, T1M); + T11 = FNMS(KP959492973, T10, T1); + T1C = FNMS(KP876768831, T1B, T1j); + T1G = FNMS(KP830830026, T1F, T1q); + { + E T1D, T1H, T1o, T1x, T1n, T18; + T1n = FNMS(KP778434453, T1m, T1h); + ro[WS(os, 7)] = FMA(KP989821441, T15, T11); + ro[WS(os, 4)] = FNMS(KP989821441, T15, T11); + T1D = FNMS(KP959492973, T1C, T1f); + T1H = FMA(KP918985947, T1G, T1t); + T1o = FNMS(KP876768831, T1n, T1g); + T1x = FNMS(KP830830026, T1w, T1r); + T18 = FNMS(KP778434453, T17, T7); + io[WS(os, 7)] = FMA(KP989821441, T1H, T1D); + io[WS(os, 4)] = FNMS(KP989821441, T1H, T1D); + T1p = FNMS(KP959492973, T1o, T1f); + T1y = FNMS(KP918985947, T1x, T1q); + T19 = FNMS(KP876768831, T18, T4); + T1d = FNMS(KP830830026, T1c, Tu); + } + } + } + } + } + io[WS(os, 6)] = FNMS(KP989821441, T1y, T1p); + io[WS(os, 5)] = FMA(KP989821441, T1y, T1p); + T1a = FNMS(KP959492973, T19, T1); + T1e = FNMS(KP918985947, T1d, TA); + ro[WS(os, 5)] = FMA(KP989821441, T1e, T1a); + ro[WS(os, 6)] = FNMS(KP989821441, T1e, T1a); + } + } +} + +static const kdft_desc desc = { 11, "n1_11", {30, 0, 110, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_11) (planner *p) { + X(kdft_register) (p, n1_11, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 11 -name n1_11 -include n.h */ + +/* + * This function contains 140 FP additions, 100 FP multiplications, + * (or, 60 additions, 20 multiplications, 80 fused multiply/add), + * 41 stack variables, 10 constants, and 44 memory accesses + */ +#include "n.h" + +static void n1_11(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP654860733, +0.654860733945285064056925072466293553183791199); + DK(KP142314838, +0.142314838273285140443792668616369668791051361); + DK(KP959492973, +0.959492973614497389890368057066327699062454848); + DK(KP415415013, +0.415415013001886425529274149229623203524004910); + DK(KP841253532, +0.841253532831181168861811648919367717513292498); + DK(KP989821441, +0.989821441880932732376092037776718787376519372); + DK(KP909631995, +0.909631995354518371411715383079028460060241051); + DK(KP281732556, +0.281732556841429697711417915346616899035777899); + DK(KP540640817, +0.540640817455597582107635954318691695431770608); + DK(KP755749574, +0.755749574354258283774035843972344420179717445); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(44, is), MAKE_VOLATILE_STRIDE(44, os)) { + E T1, TM, T4, TG, Tk, TR, Tw, TN, T7, TK, Ta, TH, Tn, TQ, Td; + E TJ, Tq, TO, Tt, TP, Tg, TI; + { + E T2, T3, Ti, Tj; + T1 = ri[0]; + TM = ii[0]; + T2 = ri[WS(is, 1)]; + T3 = ri[WS(is, 10)]; + T4 = T2 + T3; + TG = T3 - T2; + Ti = ii[WS(is, 1)]; + Tj = ii[WS(is, 10)]; + Tk = Ti - Tj; + TR = Ti + Tj; + { + E Tu, Tv, T5, T6; + Tu = ii[WS(is, 2)]; + Tv = ii[WS(is, 9)]; + Tw = Tu - Tv; + TN = Tu + Tv; + T5 = ri[WS(is, 2)]; + T6 = ri[WS(is, 9)]; + T7 = T5 + T6; + TK = T6 - T5; + } + } + { + E T8, T9, To, Tp; + T8 = ri[WS(is, 3)]; + T9 = ri[WS(is, 8)]; + Ta = T8 + T9; + TH = T9 - T8; + { + E Tl, Tm, Tb, Tc; + Tl = ii[WS(is, 3)]; + Tm = ii[WS(is, 8)]; + Tn = Tl - Tm; + TQ = Tl + Tm; + Tb = ri[WS(is, 4)]; + Tc = ri[WS(is, 7)]; + Td = Tb + Tc; + TJ = Tc - Tb; + } + To = ii[WS(is, 4)]; + Tp = ii[WS(is, 7)]; + Tq = To - Tp; + TO = To + Tp; + { + E Tr, Ts, Te, Tf; + Tr = ii[WS(is, 5)]; + Ts = ii[WS(is, 6)]; + Tt = Tr - Ts; + TP = Tr + Ts; + Te = ri[WS(is, 5)]; + Tf = ri[WS(is, 6)]; + Tg = Te + Tf; + TI = Tf - Te; + } + } + { + E Tx, Th, TZ, T10; + ro[0] = T1 + T4 + T7 + Ta + Td + Tg; + io[0] = TM + TR + TN + TQ + TO + TP; + Tx = FMA(KP755749574, Tk, KP540640817 * Tn) + FNMS(KP909631995, Tt, KP281732556 * Tq) - (KP989821441 * Tw); + Th = FMA(KP841253532, Ta, T1) + FNMS(KP959492973, Td, KP415415013 * Tg) + FNMA(KP142314838, T7, KP654860733 * T4); + ro[WS(os, 7)] = Th - Tx; + ro[WS(os, 4)] = Th + Tx; + TZ = FMA(KP755749574, TG, KP540640817 * TH) + FNMS(KP909631995, TI, KP281732556 * TJ) - (KP989821441 * TK); + T10 = FMA(KP841253532, TQ, TM) + FNMS(KP959492973, TO, KP415415013 * TP) + FNMA(KP142314838, TN, KP654860733 * TR); + io[WS(os, 4)] = TZ + T10; + io[WS(os, 7)] = T10 - TZ; + { + E TX, TY, Tz, Ty; + TX = FMA(KP909631995, TG, KP755749574 * TK) + FNMA(KP540640817, TI, KP989821441 * TJ) - (KP281732556 * TH); + TY = FMA(KP415415013, TR, TM) + FNMS(KP142314838, TO, KP841253532 * TP) + FNMA(KP959492973, TQ, KP654860733 * TN); + io[WS(os, 2)] = TX + TY; + io[WS(os, 9)] = TY - TX; + Tz = FMA(KP909631995, Tk, KP755749574 * Tw) + FNMA(KP540640817, Tt, KP989821441 * Tq) - (KP281732556 * Tn); + Ty = FMA(KP415415013, T4, T1) + FNMS(KP142314838, Td, KP841253532 * Tg) + FNMA(KP959492973, Ta, KP654860733 * T7); + ro[WS(os, 9)] = Ty - Tz; + ro[WS(os, 2)] = Ty + Tz; + } + } + { + E TB, TA, TT, TU; + TB = FMA(KP540640817, Tk, KP909631995 * Tw) + FMA(KP989821441, Tn, KP755749574 * Tq) + (KP281732556 * Tt); + TA = FMA(KP841253532, T4, T1) + FNMS(KP959492973, Tg, KP415415013 * T7) + FNMA(KP654860733, Td, KP142314838 * Ta); + ro[WS(os, 10)] = TA - TB; + ro[WS(os, 1)] = TA + TB; + { + E TV, TW, TD, TC; + TV = FMA(KP540640817, TG, KP909631995 * TK) + FMA(KP989821441, TH, KP755749574 * TJ) + (KP281732556 * TI); + TW = FMA(KP841253532, TR, TM) + FNMS(KP959492973, TP, KP415415013 * TN) + FNMA(KP654860733, TO, KP142314838 * TQ); + io[WS(os, 1)] = TV + TW; + io[WS(os, 10)] = TW - TV; + TD = FMA(KP989821441, Tk, KP540640817 * Tq) + FNMS(KP909631995, Tn, KP755749574 * Tt) - (KP281732556 * Tw); + TC = FMA(KP415415013, Ta, T1) + FNMS(KP654860733, Tg, KP841253532 * Td) + FNMA(KP959492973, T7, KP142314838 * T4); + ro[WS(os, 8)] = TC - TD; + ro[WS(os, 3)] = TC + TD; + } + TT = FMA(KP989821441, TG, KP540640817 * TJ) + FNMS(KP909631995, TH, KP755749574 * TI) - (KP281732556 * TK); + TU = FMA(KP415415013, TQ, TM) + FNMS(KP654860733, TP, KP841253532 * TO) + FNMA(KP959492973, TN, KP142314838 * TR); + io[WS(os, 3)] = TT + TU; + io[WS(os, 8)] = TU - TT; + { + E TL, TS, TF, TE; + TL = FMA(KP281732556, TG, KP755749574 * TH) + FNMS(KP909631995, TJ, KP989821441 * TI) - (KP540640817 * TK); + TS = FMA(KP841253532, TN, TM) + FNMS(KP142314838, TP, KP415415013 * TO) + FNMA(KP654860733, TQ, KP959492973 * TR); + io[WS(os, 5)] = TL + TS; + io[WS(os, 6)] = TS - TL; + TF = FMA(KP281732556, Tk, KP755749574 * Tn) + FNMS(KP909631995, Tq, KP989821441 * Tt) - (KP540640817 * Tw); + TE = FMA(KP841253532, T7, T1) + FNMS(KP142314838, Tg, KP415415013 * Td) + FNMA(KP654860733, Ta, KP959492973 * T4); + ro[WS(os, 6)] = TE - TF; + ro[WS(os, 5)] = TE + TF; + } + } + } + } +} + +static const kdft_desc desc = { 11, "n1_11", {60, 20, 80, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_11) (planner *p) { + X(kdft_register) (p, n1_11, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,401 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 12 -name n1_12 -include n.h */ + +/* + * This function contains 96 FP additions, 24 FP multiplications, + * (or, 72 additions, 0 multiplications, 24 fused multiply/add), + * 63 stack variables, 2 constants, and 48 memory accesses + */ +#include "n.h" + +static void n1_12(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(48, is), MAKE_VOLATILE_STRIDE(48, os)) { + E TT, TW, TF, T1q, TY, TQ, TX, T1n; + { + E TA, TS, TR, T5, Ts, Tz, TD, TV, TU, Ta, Tx, TC, T1d, Th, TJ; + E TG, Tg, T1u, T1c, T1f, TM, TN, Tk, T1i; + { + E T6, Tt, Tu, Tv, T9; + { + E T1, To, Tp, Tq, T4, T2, T3, T7, T8, Tr; + T1 = ri[0]; + T2 = ri[WS(is, 4)]; + T3 = ri[WS(is, 8)]; + To = ii[0]; + Tp = ii[WS(is, 4)]; + Tq = ii[WS(is, 8)]; + T4 = T2 + T3; + TA = T3 - T2; + T6 = ri[WS(is, 6)]; + TS = Tp - Tq; + Tr = Tp + Tq; + TR = FNMS(KP500000000, T4, T1); + T5 = T1 + T4; + T7 = ri[WS(is, 10)]; + Ts = To + Tr; + Tz = FNMS(KP500000000, Tr, To); + T8 = ri[WS(is, 2)]; + Tt = ii[WS(is, 6)]; + Tu = ii[WS(is, 10)]; + Tv = ii[WS(is, 2)]; + T9 = T7 + T8; + TD = T8 - T7; + } + { + E Tc, T1a, TH, TI, Tf, Td, Te, Tw, Ti, Tj, T1b; + Tc = ri[WS(is, 3)]; + TV = Tu - Tv; + Tw = Tu + Tv; + TU = FNMS(KP500000000, T9, T6); + Ta = T6 + T9; + Td = ri[WS(is, 7)]; + Tx = Tt + Tw; + TC = FNMS(KP500000000, Tw, Tt); + Te = ri[WS(is, 11)]; + T1a = ii[WS(is, 3)]; + TH = ii[WS(is, 7)]; + TI = ii[WS(is, 11)]; + Tf = Td + Te; + T1d = Te - Td; + Th = ri[WS(is, 9)]; + TJ = TH - TI; + T1b = TH + TI; + TG = FNMS(KP500000000, Tf, Tc); + Tg = Tc + Tf; + Ti = ri[WS(is, 1)]; + T1u = T1a + T1b; + T1c = FNMS(KP500000000, T1b, T1a); + Tj = ri[WS(is, 5)]; + T1f = ii[WS(is, 9)]; + TM = ii[WS(is, 1)]; + TN = ii[WS(is, 5)]; + Tk = Ti + Tj; + T1i = Tj - Ti; + } + } + { + E T1t, TO, TL, T1h, T1w, Tb, T1g, Tl; + T1t = T5 - Ta; + Tb = T5 + Ta; + TO = TM - TN; + T1g = TM + TN; + TL = FNMS(KP500000000, Tk, Th); + Tl = Th + Tk; + { + E T1x, Ty, T1v, Tn, Tm, T1y; + T1x = Ts + Tx; + Ty = Ts - Tx; + T1v = T1f + T1g; + T1h = FNMS(KP500000000, T1g, T1f); + Tn = Tg - Tl; + Tm = Tg + Tl; + T1y = T1u + T1v; + T1w = T1u - T1v; + ro[0] = Tb + Tm; + ro[WS(os, 6)] = Tb - Tm; + io[WS(os, 3)] = Tn + Ty; + io[0] = T1x + T1y; + io[WS(os, 6)] = T1x - T1y; + io[WS(os, 9)] = Ty - Tn; + } + { + E TB, TE, T1o, T11, T1p, TK, TP, T15, T1k, T18, T14, T16, T1l, T1m; + { + E T1e, T1j, TZ, T10, T12, T13; + TB = FNMS(KP866025403, TA, Tz); + TZ = FMA(KP866025403, TA, Tz); + T10 = FMA(KP866025403, TD, TC); + TE = FNMS(KP866025403, TD, TC); + T1o = FNMS(KP866025403, T1d, T1c); + T1e = FMA(KP866025403, T1d, T1c); + ro[WS(os, 9)] = T1t + T1w; + ro[WS(os, 3)] = T1t - T1w; + T1l = TZ + T10; + T11 = TZ - T10; + T1j = FMA(KP866025403, T1i, T1h); + T1p = FNMS(KP866025403, T1i, T1h); + TK = FNMS(KP866025403, TJ, TG); + T12 = FMA(KP866025403, TJ, TG); + T13 = FMA(KP866025403, TO, TL); + TP = FNMS(KP866025403, TO, TL); + TT = FNMS(KP866025403, TS, TR); + T15 = FMA(KP866025403, TS, TR); + T1m = T1e + T1j; + T1k = T1e - T1j; + T18 = T12 + T13; + T14 = T12 - T13; + T16 = FMA(KP866025403, TV, TU); + TW = FNMS(KP866025403, TV, TU); + } + io[WS(os, 10)] = T1l - T1m; + io[WS(os, 4)] = T1l + T1m; + io[WS(os, 7)] = T11 + T14; + io[WS(os, 1)] = T11 - T14; + { + E T17, T19, T1r, T1s; + T17 = T15 + T16; + T19 = T15 - T16; + ro[WS(os, 7)] = T19 - T1k; + ro[WS(os, 1)] = T19 + T1k; + ro[WS(os, 4)] = T17 + T18; + ro[WS(os, 10)] = T17 - T18; + T1r = TB + TE; + TF = TB - TE; + T1s = T1o + T1p; + T1q = T1o - T1p; + TY = TK + TP; + TQ = TK - TP; + io[WS(os, 2)] = T1r - T1s; + io[WS(os, 8)] = T1r + T1s; + } + } + } + } + io[WS(os, 11)] = TF + TQ; + io[WS(os, 5)] = TF - TQ; + TX = TT + TW; + T1n = TT - TW; + ro[WS(os, 11)] = T1n - T1q; + ro[WS(os, 5)] = T1n + T1q; + ro[WS(os, 8)] = TX + TY; + ro[WS(os, 2)] = TX - TY; + } + } +} + +static const kdft_desc desc = { 12, "n1_12", {72, 0, 24, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_12) (planner *p) { + X(kdft_register) (p, n1_12, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 12 -name n1_12 -include n.h */ + +/* + * This function contains 96 FP additions, 16 FP multiplications, + * (or, 88 additions, 8 multiplications, 8 fused multiply/add), + * 43 stack variables, 2 constants, and 48 memory accesses + */ +#include "n.h" + +static void n1_12(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(48, is), MAKE_VOLATILE_STRIDE(48, os)) { + E T5, TR, TA, Ts, TS, Tz, Ta, TU, TD, Tx, TV, TC, Tg, T1a, TG; + E TJ, T1u, T1d, Tl, T1f, TL, TO, T1v, T1i; + { + E T1, T2, T3, T4; + T1 = ri[0]; + T2 = ri[WS(is, 4)]; + T3 = ri[WS(is, 8)]; + T4 = T2 + T3; + T5 = T1 + T4; + TR = FNMS(KP500000000, T4, T1); + TA = KP866025403 * (T3 - T2); + } + { + E To, Tp, Tq, Tr; + To = ii[0]; + Tp = ii[WS(is, 4)]; + Tq = ii[WS(is, 8)]; + Tr = Tp + Tq; + Ts = To + Tr; + TS = KP866025403 * (Tp - Tq); + Tz = FNMS(KP500000000, Tr, To); + } + { + E T6, T7, T8, T9; + T6 = ri[WS(is, 6)]; + T7 = ri[WS(is, 10)]; + T8 = ri[WS(is, 2)]; + T9 = T7 + T8; + Ta = T6 + T9; + TU = FNMS(KP500000000, T9, T6); + TD = KP866025403 * (T8 - T7); + } + { + E Tt, Tu, Tv, Tw; + Tt = ii[WS(is, 6)]; + Tu = ii[WS(is, 10)]; + Tv = ii[WS(is, 2)]; + Tw = Tu + Tv; + Tx = Tt + Tw; + TV = KP866025403 * (Tu - Tv); + TC = FNMS(KP500000000, Tw, Tt); + } + { + E Tc, Td, Te, Tf; + Tc = ri[WS(is, 3)]; + Td = ri[WS(is, 7)]; + Te = ri[WS(is, 11)]; + Tf = Td + Te; + Tg = Tc + Tf; + T1a = KP866025403 * (Te - Td); + TG = FNMS(KP500000000, Tf, Tc); + } + { + E T1b, TH, TI, T1c; + T1b = ii[WS(is, 3)]; + TH = ii[WS(is, 7)]; + TI = ii[WS(is, 11)]; + T1c = TH + TI; + TJ = KP866025403 * (TH - TI); + T1u = T1b + T1c; + T1d = FNMS(KP500000000, T1c, T1b); + } + { + E Th, Ti, Tj, Tk; + Th = ri[WS(is, 9)]; + Ti = ri[WS(is, 1)]; + Tj = ri[WS(is, 5)]; + Tk = Ti + Tj; + Tl = Th + Tk; + T1f = KP866025403 * (Tj - Ti); + TL = FNMS(KP500000000, Tk, Th); + } + { + E T1g, TM, TN, T1h; + T1g = ii[WS(is, 9)]; + TM = ii[WS(is, 1)]; + TN = ii[WS(is, 5)]; + T1h = TM + TN; + TO = KP866025403 * (TM - TN); + T1v = T1g + T1h; + T1i = FNMS(KP500000000, T1h, T1g); + } + { + E Tb, Tm, T1t, T1w; + Tb = T5 + Ta; + Tm = Tg + Tl; + ro[WS(os, 6)] = Tb - Tm; + ro[0] = Tb + Tm; + { + E T1x, T1y, Tn, Ty; + T1x = Ts + Tx; + T1y = T1u + T1v; + io[WS(os, 6)] = T1x - T1y; + io[0] = T1x + T1y; + Tn = Tg - Tl; + Ty = Ts - Tx; + io[WS(os, 3)] = Tn + Ty; + io[WS(os, 9)] = Ty - Tn; + } + T1t = T5 - Ta; + T1w = T1u - T1v; + ro[WS(os, 3)] = T1t - T1w; + ro[WS(os, 9)] = T1t + T1w; + { + E T11, T1l, T1k, T1m, T14, T18, T17, T19; + { + E TZ, T10, T1e, T1j; + TZ = TA + Tz; + T10 = TD + TC; + T11 = TZ - T10; + T1l = TZ + T10; + T1e = T1a + T1d; + T1j = T1f + T1i; + T1k = T1e - T1j; + T1m = T1e + T1j; + } + { + E T12, T13, T15, T16; + T12 = TG + TJ; + T13 = TL + TO; + T14 = T12 - T13; + T18 = T12 + T13; + T15 = TR + TS; + T16 = TU + TV; + T17 = T15 + T16; + T19 = T15 - T16; + } + io[WS(os, 1)] = T11 - T14; + ro[WS(os, 1)] = T19 + T1k; + io[WS(os, 7)] = T11 + T14; + ro[WS(os, 7)] = T19 - T1k; + ro[WS(os, 10)] = T17 - T18; + io[WS(os, 10)] = T1l - T1m; + ro[WS(os, 4)] = T17 + T18; + io[WS(os, 4)] = T1l + T1m; + } + { + E TF, T1r, T1q, T1s, TQ, TY, TX, T1n; + { + E TB, TE, T1o, T1p; + TB = Tz - TA; + TE = TC - TD; + TF = TB - TE; + T1r = TB + TE; + T1o = T1d - T1a; + T1p = T1i - T1f; + T1q = T1o - T1p; + T1s = T1o + T1p; + } + { + E TK, TP, TT, TW; + TK = TG - TJ; + TP = TL - TO; + TQ = TK - TP; + TY = TK + TP; + TT = TR - TS; + TW = TU - TV; + TX = TT + TW; + T1n = TT - TW; + } + io[WS(os, 5)] = TF - TQ; + ro[WS(os, 5)] = T1n + T1q; + io[WS(os, 11)] = TF + TQ; + ro[WS(os, 11)] = T1n - T1q; + ro[WS(os, 2)] = TX - TY; + io[WS(os, 2)] = T1r - T1s; + ro[WS(os, 8)] = TX + TY; + io[WS(os, 8)] = T1r + T1s; + } + } + } + } +} + +static const kdft_desc desc = { 12, "n1_12", {88, 8, 8, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_12) (planner *p) { + X(kdft_register) (p, n1_12, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_13.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_13.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,679 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:49 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 13 -name n1_13 -include n.h */ + +/* + * This function contains 176 FP additions, 114 FP multiplications, + * (or, 62 additions, 0 multiplications, 114 fused multiply/add), + * 87 stack variables, 25 constants, and 52 memory accesses + */ +#include "n.h" + +static void n1_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP875502302, +0.875502302409147941146295545768755143177842006); + DK(KP520028571, +0.520028571888864619117130500499232802493238139); + DK(KP575140729, +0.575140729474003121368385547455453388461001608); + DK(KP600477271, +0.600477271932665282925769253334763009352012849); + DK(KP300462606, +0.300462606288665774426601772289207995520941381); + DK(KP516520780, +0.516520780623489722840901288569017135705033622); + DK(KP968287244, +0.968287244361984016049539446938120421179794516); + DK(KP503537032, +0.503537032863766627246873853868466977093348562); + DK(KP251768516, +0.251768516431883313623436926934233488546674281); + DK(KP581704778, +0.581704778510515730456870384989698884939833902); + DK(KP859542535, +0.859542535098774820163672132761689612766401925); + DK(KP083333333, +0.083333333333333333333333333333333333333333333); + DK(KP957805992, +0.957805992594665126462521754605754580515587217); + DK(KP522026385, +0.522026385161275033714027226654165028300441940); + DK(KP853480001, +0.853480001859823990758994934970528322872359049); + DK(KP769338817, +0.769338817572980603471413688209101117038278899); + DK(KP612264650, +0.612264650376756543746494474777125408779395514); + DK(KP038632954, +0.038632954644348171955506895830342264440241080); + DK(KP302775637, +0.302775637731994646559610633735247973125648287); + DK(KP514918778, +0.514918778086315755491789696138117261566051239); + DK(KP686558370, +0.686558370781754340655719594850823015421401653); + DK(KP226109445, +0.226109445035782405468510155372505010481906348); + DK(KP301479260, +0.301479260047709873958013540496673347309208464); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(52, is), MAKE_VOLATILE_STRIDE(52, os)) { + E T2B, T2H, T2I, T2G; + { + E T1, T1P, T2n, T2o, To, TH, T2h, T2k, TE, TB, TF, Tw, T2j, T2c, T1m; + E T1W, T1X, T1c, T19, T1j, T12, T1f, T21, T24, T27, T1U; + T1 = ri[0]; + T1P = ii[0]; + { + E T2b, Tv, Ts, T2a; + { + E T2d, Tf, Tq, Ty, Tb, Tr, T6, Tx, Ti, Tt, Tu, Tl; + { + E T7, T8, T9, Td, Te; + Td = ri[WS(is, 8)]; + Te = ri[WS(is, 5)]; + T7 = ri[WS(is, 12)]; + T8 = ri[WS(is, 10)]; + T9 = ri[WS(is, 4)]; + T2d = Td - Te; + Tf = Td + Te; + { + E T2, Ta, T3, T4; + T2 = ri[WS(is, 1)]; + Ta = T8 + T9; + Tq = T8 - T9; + T3 = ri[WS(is, 3)]; + T4 = ri[WS(is, 9)]; + { + E Tg, T5, Th, Tj, Tk; + Tg = ri[WS(is, 11)]; + Ty = FMS(KP500000000, Ta, T7); + Tb = T7 + Ta; + Tr = T4 - T3; + T5 = T3 + T4; + Th = ri[WS(is, 6)]; + Tj = ri[WS(is, 7)]; + Tk = ri[WS(is, 2)]; + T6 = T2 + T5; + Tx = FNMS(KP500000000, T5, T2); + Ti = Tg + Th; + Tt = Tg - Th; + Tu = Tj - Tk; + Tl = Tj + Tk; + } + } + } + { + E Tc, Tm, T2e, T2g; + Tc = T6 + Tb; + T2n = T6 - Tb; + T2b = Ti - Tl; + Tm = Ti + Tl; + T2e = Tt + Tu; + Tv = Tt - Tu; + Ts = Tq - Tr; + T2g = Tr + Tq; + { + E Tz, TA, Tn, T2f; + Tz = Tx - Ty; + T2a = Tx + Ty; + TA = FNMS(KP500000000, Tm, Tf); + Tn = Tf + Tm; + T2f = FNMS(KP500000000, T2e, T2d); + T2o = T2d + T2e; + To = Tc + Tn; + TH = Tc - Tn; + T2h = FMA(KP866025403, T2g, T2f); + T2k = FNMS(KP866025403, T2g, T2f); + TE = Tz - TA; + TB = Tz + TA; + } + } + } + { + E T1R, TM, T10, T18, T1l, TX, T1k, T15, TP, T1a, T1b, TS; + { + E T16, TY, TZ, TK, TL; + TK = ii[WS(is, 8)]; + TF = Ts - Tv; + Tw = Ts + Tv; + T2j = FNMS(KP866025403, T2b, T2a); + T2c = FMA(KP866025403, T2b, T2a); + TL = ii[WS(is, 5)]; + T16 = ii[WS(is, 12)]; + TY = ii[WS(is, 10)]; + TZ = ii[WS(is, 4)]; + T1R = TK + TL; + TM = TK - TL; + { + E T13, T17, TV, TW; + T13 = ii[WS(is, 1)]; + T17 = TY + TZ; + T10 = TY - TZ; + TV = ii[WS(is, 9)]; + TW = ii[WS(is, 3)]; + { + E TN, T14, TO, TQ, TR; + TN = ii[WS(is, 11)]; + T18 = FMS(KP500000000, T17, T16); + T1l = T16 + T17; + TX = TV - TW; + T14 = TW + TV; + TO = ii[WS(is, 6)]; + TQ = ii[WS(is, 7)]; + TR = ii[WS(is, 2)]; + T1k = T13 + T14; + T15 = FNMS(KP500000000, T14, T13); + TP = TN - TO; + T1a = TN + TO; + T1b = TQ + TR; + TS = TQ - TR; + } + } + } + { + E T1Q, T11, TT, T1S; + T1Q = T1k + T1l; + T1m = T1k - T1l; + T11 = TX + T10; + T1W = T10 - TX; + T1X = TP - TS; + TT = TP + TS; + T1S = T1a + T1b; + T1c = T1a - T1b; + { + E T1Z, TU, T1T, T20; + T19 = T15 + T18; + T1Z = T15 - T18; + T1j = TM + TT; + TU = FNMS(KP500000000, TT, TM); + T1T = T1R + T1S; + T20 = FNMS(KP500000000, T1S, T1R); + T12 = FMA(KP866025403, T11, TU); + T1f = FNMS(KP866025403, T11, TU); + T21 = T1Z + T20; + T24 = T1Z - T20; + T27 = T1Q - T1T; + T1U = T1Q + T1T; + } + } + } + } + { + E T1g, T1d, T25, T1Y; + ro[0] = T1 + To; + T1g = FNMS(KP866025403, T1c, T19); + T1d = FMA(KP866025403, T1c, T19); + T25 = T1W - T1X; + T1Y = T1W + T1X; + io[0] = T1P + T1U; + { + E T1C, T1B, T1F, T1K; + { + E TC, T1J, T1z, T1w, T1I, T1O, Tp, T1E, T1q, TI, T1o, T1s; + { + E TG, T1n, T1G, T1u, T1e, T1h, T1v, T1x, T1y, T1H, T1i; + TC = FMA(KP301479260, TB, Tw); + T1x = FNMS(KP226109445, Tw, TB); + T1y = FMA(KP686558370, TE, TF); + TG = FNMS(KP514918778, TF, TE); + T1n = FNMS(KP302775637, T1m, T1j); + T1G = FMA(KP302775637, T1j, T1m); + T1u = FNMS(KP038632954, T12, T1d); + T1e = FMA(KP038632954, T1d, T12); + T1h = FMA(KP612264650, T1g, T1f); + T1v = FNMS(KP612264650, T1f, T1g); + T1J = FMA(KP769338817, T1y, T1x); + T1z = FNMS(KP769338817, T1y, T1x); + T1H = FNMS(KP853480001, T1v, T1u); + T1w = FMA(KP853480001, T1v, T1u); + T1I = FNMS(KP522026385, T1H, T1G); + T1O = FMA(KP957805992, T1G, T1H); + Tp = FNMS(KP083333333, To, T1); + T1E = FMA(KP853480001, T1h, T1e); + T1i = FNMS(KP853480001, T1h, T1e); + T1q = FNMS(KP859542535, TG, TH); + TI = FMA(KP581704778, TH, TG); + T1o = FMA(KP957805992, T1n, T1i); + T1s = FNMS(KP522026385, T1i, T1n); + } + { + E T1A, T1D, T1t, T1L, T1M; + { + E T1p, TD, TJ, T1N, T1r; + T1p = FNMS(KP251768516, TC, Tp); + TD = FMA(KP503537032, TC, Tp); + T1C = FNMS(KP968287244, T1z, T1w); + T1A = FMA(KP968287244, T1z, T1w); + TJ = FMA(KP516520780, TI, TD); + T1N = FNMS(KP516520780, TI, TD); + T1D = FNMS(KP300462606, T1q, T1p); + T1r = FMA(KP300462606, T1q, T1p); + ro[WS(os, 8)] = FNMS(KP600477271, T1O, T1N); + ro[WS(os, 12)] = FMA(KP600477271, T1o, TJ); + ro[WS(os, 1)] = FNMS(KP600477271, T1o, TJ); + T1t = FNMS(KP575140729, T1s, T1r); + T1B = FMA(KP575140729, T1s, T1r); + ro[WS(os, 5)] = FMA(KP600477271, T1O, T1N); + } + T1L = FNMS(KP520028571, T1E, T1D); + T1F = FMA(KP520028571, T1E, T1D); + T1K = FMA(KP875502302, T1J, T1I); + T1M = FNMS(KP875502302, T1J, T1I); + ro[WS(os, 3)] = FMA(KP520028571, T1A, T1t); + ro[WS(os, 9)] = FNMS(KP520028571, T1A, T1t); + ro[WS(os, 6)] = FMA(KP575140729, T1M, T1L); + ro[WS(os, 11)] = FNMS(KP575140729, T1M, T1L); + } + } + { + E T22, T2F, T2N, T2K, T2w, T2A, T1V, T2C, T28, T2y, T2M, T2q; + { + E T26, T2v, T2p, T2i, T2s, T2t, T2l, T2D, T2E, T2u, T2m; + T2D = FNMS(KP226109445, T1Y, T21); + T22 = FMA(KP301479260, T21, T1Y); + ro[WS(os, 2)] = FMA(KP575140729, T1K, T1F); + ro[WS(os, 7)] = FNMS(KP575140729, T1K, T1F); + ro[WS(os, 4)] = FMA(KP520028571, T1C, T1B); + ro[WS(os, 10)] = FNMS(KP520028571, T1C, T1B); + T26 = FNMS(KP514918778, T25, T24); + T2E = FMA(KP686558370, T24, T25); + T2v = FNMS(KP302775637, T2n, T2o); + T2p = FMA(KP302775637, T2o, T2n); + T2i = FNMS(KP038632954, T2h, T2c); + T2s = FMA(KP038632954, T2c, T2h); + T2t = FMA(KP612264650, T2j, T2k); + T2l = FNMS(KP612264650, T2k, T2j); + T2F = FNMS(KP769338817, T2E, T2D); + T2N = FMA(KP769338817, T2E, T2D); + T2K = FMA(KP853480001, T2t, T2s); + T2u = FNMS(KP853480001, T2t, T2s); + T2w = FMA(KP957805992, T2v, T2u); + T2A = FNMS(KP522026385, T2u, T2v); + T1V = FNMS(KP083333333, T1U, T1P); + T2m = FNMS(KP853480001, T2l, T2i); + T2C = FMA(KP853480001, T2l, T2i); + T28 = FMA(KP581704778, T27, T26); + T2y = FNMS(KP859542535, T26, T27); + T2M = FNMS(KP522026385, T2m, T2p); + T2q = FMA(KP957805992, T2p, T2m); + } + { + E T2O, T2Q, T2z, T2P, T2L; + { + E T23, T2x, T2r, T29, T2J; + T23 = FMA(KP503537032, T22, T1V); + T2x = FNMS(KP251768516, T22, T1V); + T2O = FNMS(KP875502302, T2N, T2M); + T2Q = FMA(KP875502302, T2N, T2M); + T2r = FMA(KP516520780, T28, T23); + T29 = FNMS(KP516520780, T28, T23); + T2z = FMA(KP300462606, T2y, T2x); + T2J = FNMS(KP300462606, T2y, T2x); + io[WS(os, 12)] = FNMS(KP600477271, T2w, T2r); + io[WS(os, 1)] = FMA(KP600477271, T2w, T2r); + io[WS(os, 8)] = FMA(KP600477271, T2q, T29); + io[WS(os, 5)] = FNMS(KP600477271, T2q, T29); + T2P = FMA(KP520028571, T2K, T2J); + T2L = FNMS(KP520028571, T2K, T2J); + } + T2B = FMA(KP575140729, T2A, T2z); + T2H = FNMS(KP575140729, T2A, T2z); + io[WS(os, 11)] = FMA(KP575140729, T2Q, T2P); + io[WS(os, 6)] = FNMS(KP575140729, T2Q, T2P); + io[WS(os, 7)] = FMA(KP575140729, T2O, T2L); + io[WS(os, 2)] = FNMS(KP575140729, T2O, T2L); + T2I = FMA(KP968287244, T2F, T2C); + T2G = FNMS(KP968287244, T2F, T2C); + } + } + } + } + } + io[WS(os, 10)] = FMA(KP520028571, T2I, T2H); + io[WS(os, 4)] = FNMS(KP520028571, T2I, T2H); + io[WS(os, 9)] = FMA(KP520028571, T2G, T2B); + io[WS(os, 3)] = FNMS(KP520028571, T2G, T2B); + } + } +} + +static const kdft_desc desc = { 13, "n1_13", {62, 0, 114, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_13) (planner *p) { + X(kdft_register) (p, n1_13, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 13 -name n1_13 -include n.h */ + +/* + * This function contains 176 FP additions, 68 FP multiplications, + * (or, 138 additions, 30 multiplications, 38 fused multiply/add), + * 71 stack variables, 20 constants, and 52 memory accesses + */ +#include "n.h" + +static void n1_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP083333333, +0.083333333333333333333333333333333333333333333); + DK(KP251768516, +0.251768516431883313623436926934233488546674281); + DK(KP075902986, +0.075902986037193865983102897245103540356428373); + DK(KP132983124, +0.132983124607418643793760531921092974399165133); + DK(KP258260390, +0.258260390311744861420450644284508567852516811); + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + DK(KP300238635, +0.300238635966332641462884626667381504676006424); + DK(KP011599105, +0.011599105605768290721655456654083252189827041); + DK(KP156891391, +0.156891391051584611046832726756003269660212636); + DK(KP256247671, +0.256247671582936600958684654061725059144125175); + DK(KP174138601, +0.174138601152135905005660794929264742616964676); + DK(KP575140729, +0.575140729474003121368385547455453388461001608); + DK(KP503537032, +0.503537032863766627246873853868466977093348562); + DK(KP113854479, +0.113854479055790798974654345867655310534642560); + DK(KP265966249, +0.265966249214837287587521063842185948798330267); + DK(KP387390585, +0.387390585467617292130675966426762851778775217); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP300462606, +0.300462606288665774426601772289207995520941381); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(52, is), MAKE_VOLATILE_STRIDE(52, os)) { + E T1, T1q, Tt, Tu, To, T22, T20, T24, TF, TH, TA, TI, T1X, T25, T2a; + E T2d, T18, T1n, T2k, T2n, T1l, T1r, T1f, T1o, T2h, T2m; + T1 = ri[0]; + T1q = ii[0]; + { + E Tf, Tp, Tb, TC, Tx, T6, TB, Tw, Ti, Tq, Tl, Tr, Tm, Ts, Td; + E Te, Tc, Tn; + Td = ri[WS(is, 8)]; + Te = ri[WS(is, 5)]; + Tf = Td + Te; + Tp = Td - Te; + { + E T7, T8, T9, Ta; + T7 = ri[WS(is, 12)]; + T8 = ri[WS(is, 10)]; + T9 = ri[WS(is, 4)]; + Ta = T8 + T9; + Tb = T7 + Ta; + TC = T8 - T9; + Tx = FNMS(KP500000000, Ta, T7); + } + { + E T2, T3, T4, T5; + T2 = ri[WS(is, 1)]; + T3 = ri[WS(is, 3)]; + T4 = ri[WS(is, 9)]; + T5 = T3 + T4; + T6 = T2 + T5; + TB = T3 - T4; + Tw = FNMS(KP500000000, T5, T2); + } + { + E Tg, Th, Tj, Tk; + Tg = ri[WS(is, 11)]; + Th = ri[WS(is, 6)]; + Ti = Tg + Th; + Tq = Tg - Th; + Tj = ri[WS(is, 7)]; + Tk = ri[WS(is, 2)]; + Tl = Tj + Tk; + Tr = Tj - Tk; + } + Tm = Ti + Tl; + Ts = Tq + Tr; + Tt = Tp + Ts; + Tu = T6 - Tb; + Tc = T6 + Tb; + Tn = Tf + Tm; + To = Tc + Tn; + T22 = KP300462606 * (Tc - Tn); + { + E T1Y, T1Z, TD, TE; + T1Y = TB + TC; + T1Z = Tq - Tr; + T20 = T1Y - T1Z; + T24 = T1Y + T1Z; + TD = KP866025403 * (TB - TC); + TE = FNMS(KP500000000, Ts, Tp); + TF = TD - TE; + TH = TD + TE; + } + { + E Ty, Tz, T1V, T1W; + Ty = Tw - Tx; + Tz = KP866025403 * (Ti - Tl); + TA = Ty + Tz; + TI = Ty - Tz; + T1V = Tw + Tx; + T1W = FNMS(KP500000000, Tm, Tf); + T1X = T1V - T1W; + T25 = T1V + T1W; + } + } + { + E TZ, T2b, TV, T1i, T1a, TQ, T1h, T19, T12, T1d, T15, T1c, T16, T2c, TX; + E TY, TW, T17; + TX = ii[WS(is, 8)]; + TY = ii[WS(is, 5)]; + TZ = TX + TY; + T2b = TX - TY; + { + E TR, TS, TT, TU; + TR = ii[WS(is, 12)]; + TS = ii[WS(is, 10)]; + TT = ii[WS(is, 4)]; + TU = TS + TT; + TV = FNMS(KP500000000, TU, TR); + T1i = TR + TU; + T1a = TS - TT; + } + { + E TM, TN, TO, TP; + TM = ii[WS(is, 1)]; + TN = ii[WS(is, 3)]; + TO = ii[WS(is, 9)]; + TP = TN + TO; + TQ = FNMS(KP500000000, TP, TM); + T1h = TM + TP; + T19 = TN - TO; + } + { + E T10, T11, T13, T14; + T10 = ii[WS(is, 11)]; + T11 = ii[WS(is, 6)]; + T12 = T10 + T11; + T1d = T10 - T11; + T13 = ii[WS(is, 7)]; + T14 = ii[WS(is, 2)]; + T15 = T13 + T14; + T1c = T13 - T14; + } + T16 = T12 + T15; + T2c = T1d + T1c; + T2a = T1h - T1i; + T2d = T2b + T2c; + TW = TQ + TV; + T17 = FNMS(KP500000000, T16, TZ); + T18 = TW - T17; + T1n = TW + T17; + { + E T2i, T2j, T1j, T1k; + T2i = TQ - TV; + T2j = KP866025403 * (T15 - T12); + T2k = T2i + T2j; + T2n = T2i - T2j; + T1j = T1h + T1i; + T1k = TZ + T16; + T1l = KP300462606 * (T1j - T1k); + T1r = T1j + T1k; + } + { + E T1b, T1e, T2f, T2g; + T1b = T19 + T1a; + T1e = T1c - T1d; + T1f = T1b + T1e; + T1o = T1e - T1b; + T2f = FNMS(KP500000000, T2c, T2b); + T2g = KP866025403 * (T1a - T19); + T2h = T2f - T2g; + T2m = T2g + T2f; + } + } + ro[0] = T1 + To; + io[0] = T1q + T1r; + { + E T1D, T1N, T1y, T1x, T1E, T1O, Tv, TK, T1J, T1Q, T1m, T1R, T1t, T1I, TG; + E TJ; + { + E T1B, T1C, T1v, T1w; + T1B = FMA(KP387390585, T1f, KP265966249 * T18); + T1C = FMA(KP113854479, T1o, KP503537032 * T1n); + T1D = T1B + T1C; + T1N = T1C - T1B; + T1y = FMA(KP575140729, Tu, KP174138601 * Tt); + T1v = FNMS(KP156891391, TH, KP256247671 * TI); + T1w = FMA(KP011599105, TF, KP300238635 * TA); + T1x = T1v - T1w; + T1E = T1y + T1x; + T1O = KP1_732050807 * (T1v + T1w); + } + Tv = FNMS(KP174138601, Tu, KP575140729 * Tt); + TG = FNMS(KP300238635, TF, KP011599105 * TA); + TJ = FMA(KP256247671, TH, KP156891391 * TI); + TK = TG - TJ; + T1J = KP1_732050807 * (TJ + TG); + T1Q = Tv - TK; + { + E T1g, T1H, T1p, T1s, T1G; + T1g = FNMS(KP132983124, T1f, KP258260390 * T18); + T1H = T1l - T1g; + T1p = FNMS(KP251768516, T1o, KP075902986 * T1n); + T1s = FNMS(KP083333333, T1r, T1q); + T1G = T1s - T1p; + T1m = FMA(KP2_000000000, T1g, T1l); + T1R = T1H + T1G; + T1t = FMA(KP2_000000000, T1p, T1s); + T1I = T1G - T1H; + } + { + E TL, T1u, T1P, T1S; + TL = FMA(KP2_000000000, TK, Tv); + T1u = T1m + T1t; + io[WS(os, 1)] = TL + T1u; + io[WS(os, 12)] = T1u - TL; + { + E T1z, T1A, T1T, T1U; + T1z = FMS(KP2_000000000, T1x, T1y); + T1A = T1t - T1m; + io[WS(os, 5)] = T1z + T1A; + io[WS(os, 8)] = T1A - T1z; + T1T = T1R - T1Q; + T1U = T1O + T1N; + io[WS(os, 4)] = T1T - T1U; + io[WS(os, 10)] = T1U + T1T; + } + T1P = T1N - T1O; + T1S = T1Q + T1R; + io[WS(os, 3)] = T1P + T1S; + io[WS(os, 9)] = T1S - T1P; + { + E T1L, T1M, T1F, T1K; + T1L = T1J + T1I; + T1M = T1E + T1D; + io[WS(os, 6)] = T1L - T1M; + io[WS(os, 11)] = T1M + T1L; + T1F = T1D - T1E; + T1K = T1I - T1J; + io[WS(os, 2)] = T1F + T1K; + io[WS(os, 7)] = T1K - T1F; + } + } + } + { + E T2y, T2I, T2J, T2K, T2B, T2L, T2e, T2p, T2u, T2G, T23, T2F, T28, T2t, T2l; + E T2o; + { + E T2w, T2x, T2z, T2A; + T2w = FMA(KP387390585, T20, KP265966249 * T1X); + T2x = FNMS(KP503537032, T25, KP113854479 * T24); + T2y = T2w + T2x; + T2I = T2w - T2x; + T2J = FMA(KP575140729, T2a, KP174138601 * T2d); + T2z = FNMS(KP300238635, T2n, KP011599105 * T2m); + T2A = FNMS(KP156891391, T2h, KP256247671 * T2k); + T2K = T2z + T2A; + T2B = KP1_732050807 * (T2z - T2A); + T2L = T2J + T2K; + } + T2e = FNMS(KP575140729, T2d, KP174138601 * T2a); + T2l = FMA(KP256247671, T2h, KP156891391 * T2k); + T2o = FMA(KP300238635, T2m, KP011599105 * T2n); + T2p = T2l - T2o; + T2u = T2e - T2p; + T2G = KP1_732050807 * (T2o + T2l); + { + E T21, T2r, T26, T27, T2s; + T21 = FNMS(KP132983124, T20, KP258260390 * T1X); + T2r = T22 - T21; + T26 = FMA(KP251768516, T24, KP075902986 * T25); + T27 = FNMS(KP083333333, To, T1); + T2s = T27 - T26; + T23 = FMA(KP2_000000000, T21, T22); + T2F = T2s - T2r; + T28 = FMA(KP2_000000000, T26, T27); + T2t = T2r + T2s; + } + { + E T29, T2q, T2N, T2O; + T29 = T23 + T28; + T2q = FMA(KP2_000000000, T2p, T2e); + ro[WS(os, 12)] = T29 - T2q; + ro[WS(os, 1)] = T29 + T2q; + { + E T2v, T2C, T2P, T2Q; + T2v = T2t - T2u; + T2C = T2y - T2B; + ro[WS(os, 10)] = T2v - T2C; + ro[WS(os, 4)] = T2v + T2C; + T2P = T28 - T23; + T2Q = FMS(KP2_000000000, T2K, T2J); + ro[WS(os, 5)] = T2P - T2Q; + ro[WS(os, 8)] = T2P + T2Q; + } + T2N = T2F - T2G; + T2O = T2L - T2I; + ro[WS(os, 11)] = T2N - T2O; + ro[WS(os, 6)] = T2N + T2O; + { + E T2H, T2M, T2D, T2E; + T2H = T2F + T2G; + T2M = T2I + T2L; + ro[WS(os, 7)] = T2H - T2M; + ro[WS(os, 2)] = T2H + T2M; + T2D = T2t + T2u; + T2E = T2y + T2B; + ro[WS(os, 3)] = T2D - T2E; + ro[WS(os, 9)] = T2D + T2E; + } + } + } + } + } +} + +static const kdft_desc desc = { 13, "n1_13", {138, 30, 38, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_13) (planner *p) { + X(kdft_register) (p, n1_13, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,509 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:49 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 14 -name n1_14 -include n.h */ + +/* + * This function contains 148 FP additions, 84 FP multiplications, + * (or, 64 additions, 0 multiplications, 84 fused multiply/add), + * 80 stack variables, 6 constants, and 56 memory accesses + */ +#include "n.h" + +static void n1_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP974927912, +0.974927912181823607018131682993931217232785801); + DK(KP801937735, +0.801937735804838252472204639014890102331838324); + DK(KP900968867, +0.900968867902419126236102319507445051165919162); + DK(KP554958132, +0.554958132087371191422194871006410481067288862); + DK(KP692021471, +0.692021471630095869627814897002069140197260599); + DK(KP356895867, +0.356895867892209443894399510021300583399127187); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(56, is), MAKE_VOLATILE_STRIDE(56, os)) { + E Tp, T1L, T24, T1W, T1X, T28, T2a, T1Y, T29, T2b; + { + E T3, T1x, T1b, To, T1i, T1M, Ts, Ta, T1k, Tv, Th, T1j, T1K, Ty, TZ; + E T14, Tz, T1Z, T27, T2c, T1d, TI, T23, T1G, T1D, TW, T1e, T22, T1A, TP; + E T1c, T1n, T1s, T1f, T1P; + { + E T1, T2, T19, T1a; + T1 = ri[0]; + T2 = ri[WS(is, 7)]; + T19 = ii[0]; + T1a = ii[WS(is, 7)]; + { + E Tq, T6, Tr, T9, Te, Tx, Tn, Tw, Tk, Tf, Tb, Tc; + { + E Tl, Tm, Ti, Tj; + { + E T4, T5, T7, T8; + T4 = ri[WS(is, 2)]; + Tp = T1 + T2; + T3 = T1 - T2; + T1x = T19 + T1a; + T1b = T19 - T1a; + T5 = ri[WS(is, 9)]; + T7 = ri[WS(is, 12)]; + T8 = ri[WS(is, 5)]; + Tl = ri[WS(is, 8)]; + Tq = T4 + T5; + T6 = T4 - T5; + Tr = T7 + T8; + T9 = T7 - T8; + Tm = ri[WS(is, 1)]; + } + Ti = ri[WS(is, 6)]; + Tj = ri[WS(is, 13)]; + Te = ri[WS(is, 10)]; + Tx = Tl + Tm; + Tn = Tl - Tm; + Tw = Ti + Tj; + Tk = Ti - Tj; + Tf = ri[WS(is, 3)]; + Tb = ri[WS(is, 4)]; + Tc = ri[WS(is, 11)]; + } + { + E Tu, Tg, Tt, Td; + To = Tk + Tn; + T1i = Tn - Tk; + Tu = Te + Tf; + Tg = Te - Tf; + Tt = Tb + Tc; + Td = Tb - Tc; + T1M = Tr - Tq; + Ts = Tq + Tr; + Ta = T6 + T9; + T1k = T9 - T6; + T1L = Tt - Tu; + Tv = Tt + Tu; + Th = Td + Tg; + T1j = Tg - Td; + T1K = Tw - Tx; + Ty = Tw + Tx; + TZ = FNMS(KP356895867, Ta, To); + T14 = FNMS(KP356895867, To, Th); + Tz = FNMS(KP356895867, Th, Ta); + T1Z = FNMS(KP356895867, Ts, Ty); + } + } + { + E T1B, TE, T1C, TH, T1F, TV, TJ, T1E, TS, T1z, TO, TK, T1y, TL; + { + E TF, TG, TT, TU, TC, TD; + TC = ii[WS(is, 4)]; + TD = ii[WS(is, 11)]; + T27 = FNMS(KP356895867, Tv, Ts); + T2c = FNMS(KP356895867, Ty, Tv); + TF = ii[WS(is, 10)]; + T1B = TC + TD; + TE = TC - TD; + TG = ii[WS(is, 3)]; + TT = ii[WS(is, 8)]; + TU = ii[WS(is, 1)]; + { + E TQ, TR, TM, TN; + TQ = ii[WS(is, 6)]; + T1C = TF + TG; + TH = TF - TG; + T1F = TT + TU; + TV = TT - TU; + TR = ii[WS(is, 13)]; + TM = ii[WS(is, 12)]; + TN = ii[WS(is, 5)]; + TJ = ii[WS(is, 2)]; + T1E = TQ + TR; + TS = TQ - TR; + T1z = TM + TN; + TO = TM - TN; + TK = ii[WS(is, 9)]; + } + } + T1d = TE + TH; + TI = TE - TH; + T23 = T1F - T1E; + T1G = T1E + T1F; + T1D = T1B + T1C; + T24 = T1C - T1B; + T1y = TJ + TK; + TL = TJ - TK; + TW = TS - TV; + T1e = TS + TV; + T22 = T1y - T1z; + T1A = T1y + T1z; + TP = TL - TO; + T1c = TL + TO; + T1n = FNMS(KP356895867, T1c, T1e); + T1s = FNMS(KP356895867, T1d, T1c); + T1f = FNMS(KP356895867, T1e, T1d); + T1P = FNMS(KP356895867, T1A, T1G); + } + } + { + E T1U, T1H, T11, T12, T1o, T1q; + ro[WS(os, 7)] = T3 + Ta + Th + To; + io[WS(os, 7)] = T1b + T1c + T1d + T1e; + T1U = FNMS(KP356895867, T1D, T1A); + T1H = FNMS(KP356895867, T1G, T1D); + ro[0] = Tp + Ts + Tv + Ty; + io[0] = T1x + T1A + T1D + T1G; + { + E TB, TY, T1u, T1w, T10; + { + E TA, TX, T1t, T1v; + TA = FNMS(KP692021471, Tz, To); + TX = FMA(KP554958132, TW, TP); + T1t = FNMS(KP692021471, T1s, T1e); + T1v = FMA(KP554958132, T1i, T1k); + TB = FNMS(KP900968867, TA, T3); + TY = FMA(KP801937735, TX, TI); + T1u = FNMS(KP900968867, T1t, T1b); + T1w = FMA(KP801937735, T1v, T1j); + } + T10 = FNMS(KP692021471, TZ, Th); + ro[WS(os, 1)] = FMA(KP974927912, TY, TB); + ro[WS(os, 13)] = FNMS(KP974927912, TY, TB); + io[WS(os, 13)] = FNMS(KP974927912, T1w, T1u); + io[WS(os, 1)] = FMA(KP974927912, T1w, T1u); + T11 = FNMS(KP900968867, T10, T3); + T12 = FMA(KP554958132, TI, TW); + T1o = FNMS(KP692021471, T1n, T1d); + T1q = FMA(KP554958132, T1j, T1i); + } + { + E T1J, T1N, T2d, T2f; + { + E T16, T17, T1g, T1l; + { + E T13, T1p, T1r, T15; + T15 = FNMS(KP692021471, T14, Ta); + T13 = FNMS(KP801937735, T12, TP); + T1p = FNMS(KP900968867, T1o, T1b); + T1r = FNMS(KP801937735, T1q, T1k); + T16 = FNMS(KP900968867, T15, T3); + ro[WS(os, 9)] = FMA(KP974927912, T13, T11); + ro[WS(os, 5)] = FNMS(KP974927912, T13, T11); + io[WS(os, 9)] = FMA(KP974927912, T1r, T1p); + io[WS(os, 5)] = FNMS(KP974927912, T1r, T1p); + T17 = FNMS(KP554958132, TP, TI); + } + T1g = FNMS(KP692021471, T1f, T1c); + T1l = FNMS(KP554958132, T1k, T1j); + { + E T18, T1h, T1m, T1I; + T1I = FNMS(KP692021471, T1H, T1A); + T18 = FNMS(KP801937735, T17, TW); + T1h = FNMS(KP900968867, T1g, T1b); + T1m = FNMS(KP801937735, T1l, T1i); + T1J = FNMS(KP900968867, T1I, T1x); + ro[WS(os, 3)] = FMA(KP974927912, T18, T16); + ro[WS(os, 11)] = FNMS(KP974927912, T18, T16); + io[WS(os, 11)] = FNMS(KP974927912, T1m, T1h); + io[WS(os, 3)] = FMA(KP974927912, T1m, T1h); + T1N = FMA(KP554958132, T1M, T1L); + } + T2d = FNMS(KP692021471, T2c, Ts); + T2f = FMA(KP554958132, T22, T24); + } + { + E T1R, T1S, T20, T25; + { + E T1O, T2e, T2g, T1Q; + T1Q = FNMS(KP692021471, T1P, T1D); + T1O = FNMS(KP801937735, T1N, T1K); + T2e = FNMS(KP900968867, T2d, Tp); + T2g = FNMS(KP801937735, T2f, T23); + T1R = FNMS(KP900968867, T1Q, T1x); + io[WS(os, 10)] = FNMS(KP974927912, T1O, T1J); + io[WS(os, 4)] = FMA(KP974927912, T1O, T1J); + ro[WS(os, 4)] = FMA(KP974927912, T2g, T2e); + ro[WS(os, 10)] = FNMS(KP974927912, T2g, T2e); + T1S = FMA(KP554958132, T1L, T1K); + } + T20 = FNMS(KP692021471, T1Z, Tv); + T25 = FMA(KP554958132, T24, T23); + { + E T1T, T21, T26, T1V; + T1V = FNMS(KP692021471, T1U, T1G); + T1T = FMA(KP801937735, T1S, T1M); + T21 = FNMS(KP900968867, T20, Tp); + T26 = FMA(KP801937735, T25, T22); + T1W = FNMS(KP900968867, T1V, T1x); + io[WS(os, 12)] = FNMS(KP974927912, T1T, T1R); + io[WS(os, 2)] = FMA(KP974927912, T1T, T1R); + ro[WS(os, 2)] = FMA(KP974927912, T26, T21); + ro[WS(os, 12)] = FNMS(KP974927912, T26, T21); + T1X = FNMS(KP554958132, T1K, T1M); + } + T28 = FNMS(KP692021471, T27, Ty); + T2a = FNMS(KP554958132, T23, T22); + } + } + } + } + T1Y = FNMS(KP801937735, T1X, T1L); + T29 = FNMS(KP900968867, T28, Tp); + T2b = FNMS(KP801937735, T2a, T24); + io[WS(os, 8)] = FNMS(KP974927912, T1Y, T1W); + io[WS(os, 6)] = FMA(KP974927912, T1Y, T1W); + ro[WS(os, 6)] = FMA(KP974927912, T2b, T29); + ro[WS(os, 8)] = FNMS(KP974927912, T2b, T29); + } + } +} + +static const kdft_desc desc = { 14, "n1_14", {64, 0, 84, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_14) (planner *p) { + X(kdft_register) (p, n1_14, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 14 -name n1_14 -include n.h */ + +/* + * This function contains 148 FP additions, 72 FP multiplications, + * (or, 100 additions, 24 multiplications, 48 fused multiply/add), + * 43 stack variables, 6 constants, and 56 memory accesses + */ +#include "n.h" + +static void n1_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP222520933, +0.222520933956314404288902564496794759466355569); + DK(KP900968867, +0.900968867902419126236102319507445051165919162); + DK(KP623489801, +0.623489801858733530525004884004239810632274731); + DK(KP433883739, +0.433883739117558120475768332848358754609990728); + DK(KP781831482, +0.781831482468029808708444526674057750232334519); + DK(KP974927912, +0.974927912181823607018131682993931217232785801); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(56, is), MAKE_VOLATILE_STRIDE(56, os)) { + E T3, Tp, T16, T1f, Ta, T1q, Ts, T10, TG, T1z, T19, T1i, Th, T1s, Tv; + E T12, TU, T1B, T17, T1o, To, T1r, Ty, T11, TN, T1A, T18, T1l; + { + E T1, T2, T14, T15; + T1 = ri[0]; + T2 = ri[WS(is, 7)]; + T3 = T1 - T2; + Tp = T1 + T2; + T14 = ii[0]; + T15 = ii[WS(is, 7)]; + T16 = T14 - T15; + T1f = T14 + T15; + } + { + E T6, Tq, T9, Tr; + { + E T4, T5, T7, T8; + T4 = ri[WS(is, 2)]; + T5 = ri[WS(is, 9)]; + T6 = T4 - T5; + Tq = T4 + T5; + T7 = ri[WS(is, 12)]; + T8 = ri[WS(is, 5)]; + T9 = T7 - T8; + Tr = T7 + T8; + } + Ta = T6 + T9; + T1q = Tr - Tq; + Ts = Tq + Tr; + T10 = T9 - T6; + } + { + E TC, T1g, TF, T1h; + { + E TA, TB, TD, TE; + TA = ii[WS(is, 2)]; + TB = ii[WS(is, 9)]; + TC = TA - TB; + T1g = TA + TB; + TD = ii[WS(is, 12)]; + TE = ii[WS(is, 5)]; + TF = TD - TE; + T1h = TD + TE; + } + TG = TC - TF; + T1z = T1g - T1h; + T19 = TC + TF; + T1i = T1g + T1h; + } + { + E Td, Tt, Tg, Tu; + { + E Tb, Tc, Te, Tf; + Tb = ri[WS(is, 4)]; + Tc = ri[WS(is, 11)]; + Td = Tb - Tc; + Tt = Tb + Tc; + Te = ri[WS(is, 10)]; + Tf = ri[WS(is, 3)]; + Tg = Te - Tf; + Tu = Te + Tf; + } + Th = Td + Tg; + T1s = Tt - Tu; + Tv = Tt + Tu; + T12 = Tg - Td; + } + { + E TQ, T1m, TT, T1n; + { + E TO, TP, TR, TS; + TO = ii[WS(is, 4)]; + TP = ii[WS(is, 11)]; + TQ = TO - TP; + T1m = TO + TP; + TR = ii[WS(is, 10)]; + TS = ii[WS(is, 3)]; + TT = TR - TS; + T1n = TR + TS; + } + TU = TQ - TT; + T1B = T1n - T1m; + T17 = TQ + TT; + T1o = T1m + T1n; + } + { + E Tk, Tw, Tn, Tx; + { + E Ti, Tj, Tl, Tm; + Ti = ri[WS(is, 6)]; + Tj = ri[WS(is, 13)]; + Tk = Ti - Tj; + Tw = Ti + Tj; + Tl = ri[WS(is, 8)]; + Tm = ri[WS(is, 1)]; + Tn = Tl - Tm; + Tx = Tl + Tm; + } + To = Tk + Tn; + T1r = Tw - Tx; + Ty = Tw + Tx; + T11 = Tn - Tk; + } + { + E TJ, T1j, TM, T1k; + { + E TH, TI, TK, TL; + TH = ii[WS(is, 6)]; + TI = ii[WS(is, 13)]; + TJ = TH - TI; + T1j = TH + TI; + TK = ii[WS(is, 8)]; + TL = ii[WS(is, 1)]; + TM = TK - TL; + T1k = TK + TL; + } + TN = TJ - TM; + T1A = T1k - T1j; + T18 = TJ + TM; + T1l = T1j + T1k; + } + ro[WS(os, 7)] = T3 + Ta + Th + To; + io[WS(os, 7)] = T16 + T19 + T17 + T18; + ro[0] = Tp + Ts + Tv + Ty; + io[0] = T1f + T1i + T1o + T1l; + { + E TV, Tz, T1e, T1d; + TV = FNMS(KP781831482, TN, KP974927912 * TG) - (KP433883739 * TU); + Tz = FMA(KP623489801, To, T3) + FNMA(KP900968867, Th, KP222520933 * Ta); + ro[WS(os, 5)] = Tz - TV; + ro[WS(os, 9)] = Tz + TV; + T1e = FNMS(KP781831482, T11, KP974927912 * T10) - (KP433883739 * T12); + T1d = FMA(KP623489801, T18, T16) + FNMA(KP900968867, T17, KP222520933 * T19); + io[WS(os, 5)] = T1d - T1e; + io[WS(os, 9)] = T1e + T1d; + } + { + E TX, TW, T1b, T1c; + TX = FMA(KP781831482, TG, KP974927912 * TU) + (KP433883739 * TN); + TW = FMA(KP623489801, Ta, T3) + FNMA(KP900968867, To, KP222520933 * Th); + ro[WS(os, 13)] = TW - TX; + ro[WS(os, 1)] = TW + TX; + T1b = FMA(KP781831482, T10, KP974927912 * T12) + (KP433883739 * T11); + T1c = FMA(KP623489801, T19, T16) + FNMA(KP900968867, T18, KP222520933 * T17); + io[WS(os, 1)] = T1b + T1c; + io[WS(os, 13)] = T1c - T1b; + } + { + E TZ, TY, T13, T1a; + TZ = FMA(KP433883739, TG, KP974927912 * TN) - (KP781831482 * TU); + TY = FMA(KP623489801, Th, T3) + FNMA(KP222520933, To, KP900968867 * Ta); + ro[WS(os, 11)] = TY - TZ; + ro[WS(os, 3)] = TY + TZ; + T13 = FMA(KP433883739, T10, KP974927912 * T11) - (KP781831482 * T12); + T1a = FMA(KP623489801, T17, T16) + FNMA(KP222520933, T18, KP900968867 * T19); + io[WS(os, 3)] = T13 + T1a; + io[WS(os, 11)] = T1a - T13; + } + { + E T1t, T1p, T1C, T1y; + T1t = FNMS(KP433883739, T1r, KP781831482 * T1q) - (KP974927912 * T1s); + T1p = FMA(KP623489801, T1i, T1f) + FNMA(KP900968867, T1l, KP222520933 * T1o); + io[WS(os, 6)] = T1p - T1t; + io[WS(os, 8)] = T1t + T1p; + T1C = FNMS(KP433883739, T1A, KP781831482 * T1z) - (KP974927912 * T1B); + T1y = FMA(KP623489801, Ts, Tp) + FNMA(KP900968867, Ty, KP222520933 * Tv); + ro[WS(os, 6)] = T1y - T1C; + ro[WS(os, 8)] = T1y + T1C; + } + { + E T1v, T1u, T1E, T1D; + T1v = FMA(KP433883739, T1q, KP781831482 * T1s) - (KP974927912 * T1r); + T1u = FMA(KP623489801, T1o, T1f) + FNMA(KP222520933, T1l, KP900968867 * T1i); + io[WS(os, 4)] = T1u - T1v; + io[WS(os, 10)] = T1v + T1u; + T1E = FMA(KP433883739, T1z, KP781831482 * T1B) - (KP974927912 * T1A); + T1D = FMA(KP623489801, Tv, Tp) + FNMA(KP222520933, Ty, KP900968867 * Ts); + ro[WS(os, 4)] = T1D - T1E; + ro[WS(os, 10)] = T1D + T1E; + } + { + E T1w, T1x, T1G, T1F; + T1w = FMA(KP974927912, T1q, KP433883739 * T1s) + (KP781831482 * T1r); + T1x = FMA(KP623489801, T1l, T1f) + FNMA(KP900968867, T1o, KP222520933 * T1i); + io[WS(os, 2)] = T1w + T1x; + io[WS(os, 12)] = T1x - T1w; + T1G = FMA(KP974927912, T1z, KP433883739 * T1B) + (KP781831482 * T1A); + T1F = FMA(KP623489801, Ty, Tp) + FNMA(KP900968867, Tv, KP222520933 * Ts); + ro[WS(os, 12)] = T1F - T1G; + ro[WS(os, 2)] = T1F + T1G; + } + } + } +} + +static const kdft_desc desc = { 14, "n1_14", {100, 24, 48, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_14) (planner *p) { + X(kdft_register) (p, n1_14, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,580 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:49 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 15 -name n1_15 -include n.h */ + +/* + * This function contains 156 FP additions, 84 FP multiplications, + * (or, 72 additions, 0 multiplications, 84 fused multiply/add), + * 75 stack variables, 6 constants, and 60 memory accesses + */ +#include "n.h" + +static void n1_15(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(60, is), MAKE_VOLATILE_STRIDE(60, os)) { + E T1r, T1g, T14, T13; + { + E T5, T2l, Tx, TV, T1z, T1X, T2s, Tr, T24, TT, T2e, T2n, T1Z, T1Q, T1B; + E T11, T1H, TW, T2t, Tg, TX, T25, TI, T2h, T2m, T1Y, T1T, T1A; + { + E T1, T1v, T2, T3, Tu, Tv, TZ, T10; + T1 = ri[0]; + T1v = ii[0]; + T2 = ri[WS(is, 5)]; + T3 = ri[WS(is, 10)]; + Tu = ii[WS(is, 5)]; + Tv = ii[WS(is, 10)]; + { + E T1k, Tm, TM, TJ, Tl, T2c, T1j, T1m, TP, T1p, Tp, TQ; + { + E Th, T1h, TK, TL, Tk, Tn, To, T1i; + { + E Ti, Tj, T1y, T4; + Th = ri[WS(is, 6)]; + T1y = T3 - T2; + T4 = T2 + T3; + { + E T1w, Tw, Tt, T1x; + T1w = Tu + Tv; + Tw = Tu - Tv; + Ti = ri[WS(is, 11)]; + T5 = T1 + T4; + Tt = FNMS(KP500000000, T4, T1); + T2l = T1v + T1w; + T1x = FNMS(KP500000000, T1w, T1v); + Tx = FNMS(KP866025403, Tw, Tt); + TV = FMA(KP866025403, Tw, Tt); + T1z = FMA(KP866025403, T1y, T1x); + T1X = FNMS(KP866025403, T1y, T1x); + Tj = ri[WS(is, 1)]; + } + T1h = ii[WS(is, 6)]; + TK = ii[WS(is, 11)]; + TL = ii[WS(is, 1)]; + Tk = Ti + Tj; + T1k = Tj - Ti; + } + Tm = ri[WS(is, 9)]; + TM = TK - TL; + T1i = TK + TL; + TJ = FNMS(KP500000000, Tk, Th); + Tl = Th + Tk; + Tn = ri[WS(is, 14)]; + To = ri[WS(is, 4)]; + T2c = T1h + T1i; + T1j = FNMS(KP500000000, T1i, T1h); + T1m = ii[WS(is, 9)]; + TP = ii[WS(is, 14)]; + T1p = To - Tn; + Tp = Tn + To; + TQ = ii[WS(is, 4)]; + } + { + E TN, TS, T1o, T2d; + { + E TO, T1n, TR, Tq; + TN = FNMS(KP866025403, TM, TJ); + TZ = FMA(KP866025403, TM, TJ); + TO = FNMS(KP500000000, Tp, Tm); + Tq = Tm + Tp; + T1n = TP + TQ; + TR = TP - TQ; + T2s = Tl - Tq; + Tr = Tl + Tq; + T10 = FMA(KP866025403, TR, TO); + TS = FNMS(KP866025403, TR, TO); + T1o = FNMS(KP500000000, T1n, T1m); + T2d = T1m + T1n; + } + { + E T1O, T1l, T1P, T1q; + T1O = FNMS(KP866025403, T1k, T1j); + T1l = FMA(KP866025403, T1k, T1j); + T24 = TN - TS; + TT = TN + TS; + T1P = FNMS(KP866025403, T1p, T1o); + T1q = FMA(KP866025403, T1p, T1o); + T2e = T2c - T2d; + T2n = T2c + T2d; + T1Z = T1O + T1P; + T1Q = T1O - T1P; + T1r = T1l - T1q; + T1B = T1l + T1q; + } + } + } + { + E T19, Tb, TB, Ty, Ta, T2f, T18, T1b, TE, T1e, Te, TF; + { + E T6, T16, Tz, TA, T9, T7, T8, Tc, Td, T17; + T6 = ri[WS(is, 3)]; + T7 = ri[WS(is, 8)]; + T11 = TZ + T10; + T1H = TZ - T10; + T8 = ri[WS(is, 13)]; + T16 = ii[WS(is, 3)]; + Tz = ii[WS(is, 8)]; + TA = ii[WS(is, 13)]; + T9 = T7 + T8; + T19 = T8 - T7; + Tb = ri[WS(is, 12)]; + TB = Tz - TA; + T17 = Tz + TA; + Ty = FNMS(KP500000000, T9, T6); + Ta = T6 + T9; + Tc = ri[WS(is, 2)]; + Td = ri[WS(is, 7)]; + T2f = T16 + T17; + T18 = FNMS(KP500000000, T17, T16); + T1b = ii[WS(is, 12)]; + TE = ii[WS(is, 2)]; + T1e = Td - Tc; + Te = Tc + Td; + TF = ii[WS(is, 7)]; + } + { + E TC, TH, T1d, T2g; + { + E TD, T1c, TG, Tf; + TC = FNMS(KP866025403, TB, Ty); + TW = FMA(KP866025403, TB, Ty); + TD = FNMS(KP500000000, Te, Tb); + Tf = Tb + Te; + T1c = TE + TF; + TG = TE - TF; + T2t = Ta - Tf; + Tg = Ta + Tf; + TX = FMA(KP866025403, TG, TD); + TH = FNMS(KP866025403, TG, TD); + T1d = FNMS(KP500000000, T1c, T1b); + T2g = T1b + T1c; + } + { + E T1R, T1a, T1S, T1f; + T1R = FNMS(KP866025403, T19, T18); + T1a = FMA(KP866025403, T19, T18); + T25 = TC - TH; + TI = TC + TH; + T1S = FNMS(KP866025403, T1e, T1d); + T1f = FMA(KP866025403, T1e, T1d); + T2h = T2f - T2g; + T2m = T2f + T2g; + T1Y = T1R + T1S; + T1T = T1R - T1S; + T1g = T1a - T1f; + T1A = T1a + T1f; + } + } + } + } + { + E TY, T1G, T1M, T1L, T2a, T29, Ts, T22, T21, T20; + T2a = Tg - Tr; + Ts = Tg + Tr; + TY = TW + TX; + T1G = TW - TX; + T29 = FNMS(KP250000000, Ts, T5); + ro[0] = T5 + Ts; + { + E T2q, T2p, T2o, TU; + T2o = T2m + T2n; + T2q = T2m - T2n; + { + E T2k, T2i, T2b, T2j; + T2k = FMA(KP618033988, T2e, T2h); + T2i = FNMS(KP618033988, T2h, T2e); + T2b = FNMS(KP559016994, T2a, T29); + T2j = FMA(KP559016994, T2a, T29); + ro[WS(os, 3)] = FMA(KP951056516, T2i, T2b); + ro[WS(os, 12)] = FNMS(KP951056516, T2i, T2b); + ro[WS(os, 6)] = FMA(KP951056516, T2k, T2j); + ro[WS(os, 9)] = FNMS(KP951056516, T2k, T2j); + T2p = FNMS(KP250000000, T2o, T2l); + } + io[0] = T2l + T2o; + TU = TI + TT; + T1M = TI - TT; + { + E T2r, T2v, T2w, T2u; + T2r = FNMS(KP559016994, T2q, T2p); + T2v = FMA(KP559016994, T2q, T2p); + T2w = FMA(KP618033988, T2s, T2t); + T2u = FNMS(KP618033988, T2t, T2s); + io[WS(os, 9)] = FMA(KP951056516, T2w, T2v); + io[WS(os, 6)] = FNMS(KP951056516, T2w, T2v); + io[WS(os, 12)] = FMA(KP951056516, T2u, T2r); + io[WS(os, 3)] = FNMS(KP951056516, T2u, T2r); + T1L = FNMS(KP250000000, TU, Tx); + } + ro[WS(os, 5)] = Tx + TU; + } + T20 = T1Y + T1Z; + T22 = T1Y - T1Z; + { + E T1N, T1V, T1W, T1U; + T1N = FNMS(KP559016994, T1M, T1L); + T1V = FMA(KP559016994, T1M, T1L); + T1W = FMA(KP618033988, T1Q, T1T); + T1U = FNMS(KP618033988, T1T, T1Q); + ro[WS(os, 11)] = FMA(KP951056516, T1W, T1V); + ro[WS(os, 14)] = FNMS(KP951056516, T1W, T1V); + ro[WS(os, 8)] = FMA(KP951056516, T1U, T1N); + ro[WS(os, 2)] = FNMS(KP951056516, T1U, T1N); + T21 = FNMS(KP250000000, T20, T1X); + } + io[WS(os, 5)] = T1X + T20; + { + E T1E, T1D, T1C, T12; + T1C = T1A + T1B; + T1E = T1A - T1B; + { + E T23, T27, T28, T26; + T23 = FNMS(KP559016994, T22, T21); + T27 = FMA(KP559016994, T22, T21); + T28 = FMA(KP618033988, T24, T25); + T26 = FNMS(KP618033988, T25, T24); + io[WS(os, 14)] = FMA(KP951056516, T28, T27); + io[WS(os, 11)] = FNMS(KP951056516, T28, T27); + io[WS(os, 8)] = FNMS(KP951056516, T26, T23); + io[WS(os, 2)] = FMA(KP951056516, T26, T23); + T1D = FNMS(KP250000000, T1C, T1z); + } + io[WS(os, 10)] = T1z + T1C; + T12 = TY + T11; + T14 = TY - T11; + { + E T1F, T1J, T1K, T1I; + T1F = FMA(KP559016994, T1E, T1D); + T1J = FNMS(KP559016994, T1E, T1D); + T1K = FNMS(KP618033988, T1G, T1H); + T1I = FMA(KP618033988, T1H, T1G); + io[WS(os, 13)] = FNMS(KP951056516, T1K, T1J); + io[WS(os, 7)] = FMA(KP951056516, T1K, T1J); + io[WS(os, 4)] = FMA(KP951056516, T1I, T1F); + io[WS(os, 1)] = FNMS(KP951056516, T1I, T1F); + T13 = FNMS(KP250000000, T12, TV); + } + ro[WS(os, 10)] = TV + T12; + } + } + } + { + E T1t, T15, T1s, T1u; + T1t = FNMS(KP559016994, T14, T13); + T15 = FMA(KP559016994, T14, T13); + T1s = FMA(KP618033988, T1r, T1g); + T1u = FNMS(KP618033988, T1g, T1r); + ro[WS(os, 13)] = FMA(KP951056516, T1u, T1t); + ro[WS(os, 7)] = FNMS(KP951056516, T1u, T1t); + ro[WS(os, 1)] = FMA(KP951056516, T1s, T15); + ro[WS(os, 4)] = FNMS(KP951056516, T1s, T15); + } + } + } +} + +static const kdft_desc desc = { 15, "n1_15", {72, 0, 84, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_15) (planner *p) { + X(kdft_register) (p, n1_15, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 15 -name n1_15 -include n.h */ + +/* + * This function contains 156 FP additions, 56 FP multiplications, + * (or, 128 additions, 28 multiplications, 28 fused multiply/add), + * 69 stack variables, 6 constants, and 60 memory accesses + */ +#include "n.h" + +static void n1_15(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(60, is), MAKE_VOLATILE_STRIDE(60, os)) { + E T5, T2l, Tx, TV, T1C, T20, Tl, Tq, Tr, TN, TS, TT, T2c, T2d, T2n; + E T1O, T1P, T22, T1l, T1q, T1w, TZ, T10, T11, Ta, Tf, Tg, TC, TH, TI; + E T2f, T2g, T2m, T1R, T1S, T21, T1a, T1f, T1v, TW, TX, TY; + { + E T1, T1z, T4, T1y, Tw, T1A, Tt, T1B; + T1 = ri[0]; + T1z = ii[0]; + { + E T2, T3, Tu, Tv; + T2 = ri[WS(is, 5)]; + T3 = ri[WS(is, 10)]; + T4 = T2 + T3; + T1y = KP866025403 * (T3 - T2); + Tu = ii[WS(is, 5)]; + Tv = ii[WS(is, 10)]; + Tw = KP866025403 * (Tu - Tv); + T1A = Tu + Tv; + } + T5 = T1 + T4; + T2l = T1z + T1A; + Tt = FNMS(KP500000000, T4, T1); + Tx = Tt - Tw; + TV = Tt + Tw; + T1B = FNMS(KP500000000, T1A, T1z); + T1C = T1y + T1B; + T20 = T1B - T1y; + } + { + E Th, Tk, TJ, T1h, T1i, T1j, TM, T1k, Tm, Tp, TO, T1m, T1n, T1o, TR; + E T1p; + { + E Ti, Tj, TK, TL; + Th = ri[WS(is, 6)]; + Ti = ri[WS(is, 11)]; + Tj = ri[WS(is, 1)]; + Tk = Ti + Tj; + TJ = FNMS(KP500000000, Tk, Th); + T1h = KP866025403 * (Tj - Ti); + T1i = ii[WS(is, 6)]; + TK = ii[WS(is, 11)]; + TL = ii[WS(is, 1)]; + T1j = TK + TL; + TM = KP866025403 * (TK - TL); + T1k = FNMS(KP500000000, T1j, T1i); + } + { + E Tn, To, TP, TQ; + Tm = ri[WS(is, 9)]; + Tn = ri[WS(is, 14)]; + To = ri[WS(is, 4)]; + Tp = Tn + To; + TO = FNMS(KP500000000, Tp, Tm); + T1m = KP866025403 * (To - Tn); + T1n = ii[WS(is, 9)]; + TP = ii[WS(is, 14)]; + TQ = ii[WS(is, 4)]; + T1o = TP + TQ; + TR = KP866025403 * (TP - TQ); + T1p = FNMS(KP500000000, T1o, T1n); + } + Tl = Th + Tk; + Tq = Tm + Tp; + Tr = Tl + Tq; + TN = TJ - TM; + TS = TO - TR; + TT = TN + TS; + T2c = T1i + T1j; + T2d = T1n + T1o; + T2n = T2c + T2d; + T1O = T1k - T1h; + T1P = T1p - T1m; + T22 = T1O + T1P; + T1l = T1h + T1k; + T1q = T1m + T1p; + T1w = T1l + T1q; + TZ = TJ + TM; + T10 = TO + TR; + T11 = TZ + T10; + } + { + E T6, T9, Ty, T16, T17, T18, TB, T19, Tb, Te, TD, T1b, T1c, T1d, TG; + E T1e; + { + E T7, T8, Tz, TA; + T6 = ri[WS(is, 3)]; + T7 = ri[WS(is, 8)]; + T8 = ri[WS(is, 13)]; + T9 = T7 + T8; + Ty = FNMS(KP500000000, T9, T6); + T16 = KP866025403 * (T8 - T7); + T17 = ii[WS(is, 3)]; + Tz = ii[WS(is, 8)]; + TA = ii[WS(is, 13)]; + T18 = Tz + TA; + TB = KP866025403 * (Tz - TA); + T19 = FNMS(KP500000000, T18, T17); + } + { + E Tc, Td, TE, TF; + Tb = ri[WS(is, 12)]; + Tc = ri[WS(is, 2)]; + Td = ri[WS(is, 7)]; + Te = Tc + Td; + TD = FNMS(KP500000000, Te, Tb); + T1b = KP866025403 * (Td - Tc); + T1c = ii[WS(is, 12)]; + TE = ii[WS(is, 2)]; + TF = ii[WS(is, 7)]; + T1d = TE + TF; + TG = KP866025403 * (TE - TF); + T1e = FNMS(KP500000000, T1d, T1c); + } + Ta = T6 + T9; + Tf = Tb + Te; + Tg = Ta + Tf; + TC = Ty - TB; + TH = TD - TG; + TI = TC + TH; + T2f = T17 + T18; + T2g = T1c + T1d; + T2m = T2f + T2g; + T1R = T19 - T16; + T1S = T1e - T1b; + T21 = T1R + T1S; + T1a = T16 + T19; + T1f = T1b + T1e; + T1v = T1a + T1f; + TW = Ty + TB; + TX = TD + TG; + TY = TW + TX; + } + { + E T2a, Ts, T29, T2i, T2k, T2e, T2h, T2j, T2b; + T2a = KP559016994 * (Tg - Tr); + Ts = Tg + Tr; + T29 = FNMS(KP250000000, Ts, T5); + T2e = T2c - T2d; + T2h = T2f - T2g; + T2i = FNMS(KP587785252, T2h, KP951056516 * T2e); + T2k = FMA(KP951056516, T2h, KP587785252 * T2e); + ro[0] = T5 + Ts; + T2j = T2a + T29; + ro[WS(os, 9)] = T2j - T2k; + ro[WS(os, 6)] = T2j + T2k; + T2b = T29 - T2a; + ro[WS(os, 12)] = T2b - T2i; + ro[WS(os, 3)] = T2b + T2i; + } + { + E T2q, T2o, T2p, T2u, T2w, T2s, T2t, T2v, T2r; + T2q = KP559016994 * (T2m - T2n); + T2o = T2m + T2n; + T2p = FNMS(KP250000000, T2o, T2l); + T2s = Tl - Tq; + T2t = Ta - Tf; + T2u = FNMS(KP587785252, T2t, KP951056516 * T2s); + T2w = FMA(KP951056516, T2t, KP587785252 * T2s); + io[0] = T2l + T2o; + T2v = T2q + T2p; + io[WS(os, 6)] = T2v - T2w; + io[WS(os, 9)] = T2w + T2v; + T2r = T2p - T2q; + io[WS(os, 3)] = T2r - T2u; + io[WS(os, 12)] = T2u + T2r; + } + { + E T1M, TU, T1L, T1U, T1W, T1Q, T1T, T1V, T1N; + T1M = KP559016994 * (TI - TT); + TU = TI + TT; + T1L = FNMS(KP250000000, TU, Tx); + T1Q = T1O - T1P; + T1T = T1R - T1S; + T1U = FNMS(KP587785252, T1T, KP951056516 * T1Q); + T1W = FMA(KP951056516, T1T, KP587785252 * T1Q); + ro[WS(os, 5)] = Tx + TU; + T1V = T1M + T1L; + ro[WS(os, 14)] = T1V - T1W; + ro[WS(os, 11)] = T1V + T1W; + T1N = T1L - T1M; + ro[WS(os, 2)] = T1N - T1U; + ro[WS(os, 8)] = T1N + T1U; + } + { + E T25, T23, T24, T1Z, T28, T1X, T1Y, T27, T26; + T25 = KP559016994 * (T21 - T22); + T23 = T21 + T22; + T24 = FNMS(KP250000000, T23, T20); + T1X = TN - TS; + T1Y = TC - TH; + T1Z = FNMS(KP587785252, T1Y, KP951056516 * T1X); + T28 = FMA(KP951056516, T1Y, KP587785252 * T1X); + io[WS(os, 5)] = T20 + T23; + T27 = T25 + T24; + io[WS(os, 11)] = T27 - T28; + io[WS(os, 14)] = T28 + T27; + T26 = T24 - T25; + io[WS(os, 2)] = T1Z + T26; + io[WS(os, 8)] = T26 - T1Z; + } + { + E T1x, T1D, T1E, T1I, T1J, T1G, T1H, T1K, T1F; + T1x = KP559016994 * (T1v - T1w); + T1D = T1v + T1w; + T1E = FNMS(KP250000000, T1D, T1C); + T1G = TW - TX; + T1H = TZ - T10; + T1I = FMA(KP951056516, T1G, KP587785252 * T1H); + T1J = FNMS(KP587785252, T1G, KP951056516 * T1H); + io[WS(os, 10)] = T1C + T1D; + T1K = T1E - T1x; + io[WS(os, 7)] = T1J + T1K; + io[WS(os, 13)] = T1K - T1J; + T1F = T1x + T1E; + io[WS(os, 1)] = T1F - T1I; + io[WS(os, 4)] = T1I + T1F; + } + { + E T13, T12, T14, T1s, T1u, T1g, T1r, T1t, T15; + T13 = KP559016994 * (TY - T11); + T12 = TY + T11; + T14 = FNMS(KP250000000, T12, TV); + T1g = T1a - T1f; + T1r = T1l - T1q; + T1s = FMA(KP951056516, T1g, KP587785252 * T1r); + T1u = FNMS(KP587785252, T1g, KP951056516 * T1r); + ro[WS(os, 10)] = TV + T12; + T1t = T14 - T13; + ro[WS(os, 7)] = T1t - T1u; + ro[WS(os, 13)] = T1t + T1u; + T15 = T13 + T14; + ro[WS(os, 4)] = T15 - T1s; + ro[WS(os, 1)] = T15 + T1s; + } + } + } +} + +static const kdft_desc desc = { 15, "n1_15", {128, 28, 28, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_15) (planner *p) { + X(kdft_register) (p, n1_15, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,556 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:49 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 16 -name n1_16 -include n.h */ + +/* + * This function contains 144 FP additions, 40 FP multiplications, + * (or, 104 additions, 0 multiplications, 40 fused multiply/add), + * 82 stack variables, 3 constants, and 64 memory accesses + */ +#include "n.h" + +static void n1_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(64, is), MAKE_VOLATILE_STRIDE(64, os)) { + E T1z, T1L, T1M, T1N, T1P, T1J, T1K, T1G, T1O, T1Q; + { + E T1l, T1H, T1R, T7, T1x, TN, TC, T25, T1E, T1b, T1Z, Tt, T2h, T22, T1D; + E T1g, T1n, TQ, Te, T26, TT, T1m, TJ, T1S, Tj, T11, Ti, T1V, TZ, Tk; + E T12, T13; + { + E Tq, T1c, Tp, T20, T1a, Tr, T1d, T1e; + { + E T4, TL, T3, T1k, Ty, T5, Tz, TA; + { + E T1, T2, Tw, Tx; + T1 = ri[0]; + T2 = ri[WS(is, 8)]; + Tw = ii[0]; + Tx = ii[WS(is, 8)]; + T4 = ri[WS(is, 4)]; + TL = T1 - T2; + T3 = T1 + T2; + T1k = Tw - Tx; + Ty = Tw + Tx; + T5 = ri[WS(is, 12)]; + Tz = ii[WS(is, 4)]; + TA = ii[WS(is, 12)]; + } + { + E Tn, To, T18, T19; + Tn = ri[WS(is, 15)]; + { + E T1j, T6, TM, TB; + T1j = T4 - T5; + T6 = T4 + T5; + TM = Tz - TA; + TB = Tz + TA; + T1l = T1j + T1k; + T1H = T1k - T1j; + T1R = T3 - T6; + T7 = T3 + T6; + T1x = TL + TM; + TN = TL - TM; + TC = Ty + TB; + T25 = Ty - TB; + To = ri[WS(is, 7)]; + } + T18 = ii[WS(is, 15)]; + T19 = ii[WS(is, 7)]; + Tq = ri[WS(is, 3)]; + T1c = Tn - To; + Tp = Tn + To; + T20 = T18 + T19; + T1a = T18 - T19; + Tr = ri[WS(is, 11)]; + T1d = ii[WS(is, 3)]; + T1e = ii[WS(is, 11)]; + } + } + { + E Tb, TP, Ta, TO, TF, Tc, TG, TH; + { + E T8, T9, TD, TE; + T8 = ri[WS(is, 2)]; + { + E T17, Ts, T21, T1f; + T17 = Tq - Tr; + Ts = Tq + Tr; + T21 = T1d + T1e; + T1f = T1d - T1e; + T1E = T1a - T17; + T1b = T17 + T1a; + T1Z = Tp - Ts; + Tt = Tp + Ts; + T2h = T20 + T21; + T22 = T20 - T21; + T1D = T1c + T1f; + T1g = T1c - T1f; + T9 = ri[WS(is, 10)]; + } + TD = ii[WS(is, 2)]; + TE = ii[WS(is, 10)]; + Tb = ri[WS(is, 14)]; + TP = T8 - T9; + Ta = T8 + T9; + TO = TD - TE; + TF = TD + TE; + Tc = ri[WS(is, 6)]; + TG = ii[WS(is, 14)]; + TH = ii[WS(is, 6)]; + } + { + E TR, Td, TS, TI; + T1n = TP + TO; + TQ = TO - TP; + TR = Tb - Tc; + Td = Tb + Tc; + TS = TG - TH; + TI = TG + TH; + Te = Ta + Td; + T26 = Td - Ta; + TT = TR + TS; + T1m = TR - TS; + TJ = TF + TI; + T1S = TF - TI; + } + } + { + E Tg, Th, TX, TY; + Tg = ri[WS(is, 1)]; + Th = ri[WS(is, 9)]; + TX = ii[WS(is, 1)]; + TY = ii[WS(is, 9)]; + Tj = ri[WS(is, 5)]; + T11 = Tg - Th; + Ti = Tg + Th; + T1V = TX + TY; + TZ = TX - TY; + Tk = ri[WS(is, 13)]; + T12 = ii[WS(is, 5)]; + T13 = ii[WS(is, 13)]; + } + } + { + E T2f, T1B, T10, T1U, T1X, T1A, T15, Tv, TK, T2i; + { + E Tf, Tu, T2j, T2k, T2g; + T2f = T7 - Te; + Tf = T7 + Te; + { + E TW, Tl, T1W, T14, Tm; + TW = Tj - Tk; + Tl = Tj + Tk; + T1W = T12 + T13; + T14 = T12 - T13; + T1B = TZ - TW; + T10 = TW + TZ; + T1U = Ti - Tl; + Tm = Ti + Tl; + T2g = T1V + T1W; + T1X = T1V - T1W; + T1A = T11 + T14; + T15 = T11 - T14; + Tu = Tm + Tt; + Tv = Tt - Tm; + } + TK = TC - TJ; + T2j = TC + TJ; + T2k = T2g + T2h; + T2i = T2g - T2h; + ro[0] = Tf + Tu; + ro[WS(os, 8)] = Tf - Tu; + io[0] = T2j + T2k; + io[WS(os, 8)] = T2j - T2k; + } + { + E T29, T1T, T27, T2d, T2a, T2b, T28, T24, T1Y, T23; + T29 = T1R - T1S; + T1T = T1R + T1S; + io[WS(os, 12)] = TK - Tv; + io[WS(os, 4)] = Tv + TK; + ro[WS(os, 4)] = T2f + T2i; + ro[WS(os, 12)] = T2f - T2i; + T27 = T25 - T26; + T2d = T26 + T25; + T2a = T1X - T1U; + T1Y = T1U + T1X; + T23 = T1Z - T22; + T2b = T1Z + T22; + T28 = T23 - T1Y; + T24 = T1Y + T23; + { + E T1I, TV, T1v, T1y, T1t, T1s, T1r, T1p, T1q, T1i; + { + E T1o, T2e, T2c, TU, T16, T1h; + T1I = TQ + TT; + TU = TQ - TT; + io[WS(os, 14)] = FNMS(KP707106781, T28, T27); + io[WS(os, 6)] = FMA(KP707106781, T28, T27); + ro[WS(os, 2)] = FMA(KP707106781, T24, T1T); + ro[WS(os, 10)] = FNMS(KP707106781, T24, T1T); + T2e = T2a + T2b; + T2c = T2a - T2b; + TV = FMA(KP707106781, TU, TN); + T1v = FNMS(KP707106781, TU, TN); + io[WS(os, 10)] = FNMS(KP707106781, T2e, T2d); + io[WS(os, 2)] = FMA(KP707106781, T2e, T2d); + ro[WS(os, 6)] = FMA(KP707106781, T2c, T29); + ro[WS(os, 14)] = FNMS(KP707106781, T2c, T29); + T1o = T1m - T1n; + T1y = T1n + T1m; + T1t = FNMS(KP414213562, T10, T15); + T16 = FMA(KP414213562, T15, T10); + T1h = FNMS(KP414213562, T1g, T1b); + T1s = FMA(KP414213562, T1b, T1g); + T1r = FMA(KP707106781, T1o, T1l); + T1p = FNMS(KP707106781, T1o, T1l); + T1q = T16 + T1h; + T1i = T16 - T1h; + } + { + E T1w, T1u, T1C, T1F; + io[WS(os, 15)] = FMA(KP923879532, T1q, T1p); + io[WS(os, 7)] = FNMS(KP923879532, T1q, T1p); + ro[WS(os, 3)] = FMA(KP923879532, T1i, TV); + ro[WS(os, 11)] = FNMS(KP923879532, T1i, TV); + T1w = T1t + T1s; + T1u = T1s - T1t; + T1z = FMA(KP707106781, T1y, T1x); + T1L = FNMS(KP707106781, T1y, T1x); + ro[WS(os, 15)] = FMA(KP923879532, T1w, T1v); + ro[WS(os, 7)] = FNMS(KP923879532, T1w, T1v); + io[WS(os, 3)] = FMA(KP923879532, T1u, T1r); + io[WS(os, 11)] = FNMS(KP923879532, T1u, T1r); + T1M = FNMS(KP414213562, T1A, T1B); + T1C = FMA(KP414213562, T1B, T1A); + T1F = FNMS(KP414213562, T1E, T1D); + T1N = FMA(KP414213562, T1D, T1E); + T1P = FMA(KP707106781, T1I, T1H); + T1J = FNMS(KP707106781, T1I, T1H); + T1K = T1F - T1C; + T1G = T1C + T1F; + } + } + } + } + } + io[WS(os, 5)] = FMA(KP923879532, T1K, T1J); + io[WS(os, 13)] = FNMS(KP923879532, T1K, T1J); + ro[WS(os, 1)] = FMA(KP923879532, T1G, T1z); + ro[WS(os, 9)] = FNMS(KP923879532, T1G, T1z); + T1O = T1M - T1N; + T1Q = T1M + T1N; + io[WS(os, 1)] = FMA(KP923879532, T1Q, T1P); + io[WS(os, 9)] = FNMS(KP923879532, T1Q, T1P); + ro[WS(os, 5)] = FMA(KP923879532, T1O, T1L); + ro[WS(os, 13)] = FNMS(KP923879532, T1O, T1L); + } + } +} + +static const kdft_desc desc = { 16, "n1_16", {104, 0, 40, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_16) (planner *p) { + X(kdft_register) (p, n1_16, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 16 -name n1_16 -include n.h */ + +/* + * This function contains 144 FP additions, 24 FP multiplications, + * (or, 136 additions, 16 multiplications, 8 fused multiply/add), + * 50 stack variables, 3 constants, and 64 memory accesses + */ +#include "n.h" + +static void n1_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(64, is), MAKE_VOLATILE_STRIDE(64, os)) { + E T7, T1R, T25, TC, TN, T1x, T1H, T1l, Tt, T22, T2h, T1b, T1g, T1E, T1Z; + E T1D, Te, T1S, T26, TJ, TQ, T1m, T1n, TT, Tm, T1X, T2g, T10, T15, T1B; + E T1U, T1A; + { + E T3, TL, Ty, T1k, T6, T1j, TB, TM; + { + E T1, T2, Tw, Tx; + T1 = ri[0]; + T2 = ri[WS(is, 8)]; + T3 = T1 + T2; + TL = T1 - T2; + Tw = ii[0]; + Tx = ii[WS(is, 8)]; + Ty = Tw + Tx; + T1k = Tw - Tx; + } + { + E T4, T5, Tz, TA; + T4 = ri[WS(is, 4)]; + T5 = ri[WS(is, 12)]; + T6 = T4 + T5; + T1j = T4 - T5; + Tz = ii[WS(is, 4)]; + TA = ii[WS(is, 12)]; + TB = Tz + TA; + TM = Tz - TA; + } + T7 = T3 + T6; + T1R = T3 - T6; + T25 = Ty - TB; + TC = Ty + TB; + TN = TL - TM; + T1x = TL + TM; + T1H = T1k - T1j; + T1l = T1j + T1k; + } + { + E Tp, T17, T1f, T20, Ts, T1c, T1a, T21; + { + E Tn, To, T1d, T1e; + Tn = ri[WS(is, 15)]; + To = ri[WS(is, 7)]; + Tp = Tn + To; + T17 = Tn - To; + T1d = ii[WS(is, 15)]; + T1e = ii[WS(is, 7)]; + T1f = T1d - T1e; + T20 = T1d + T1e; + } + { + E Tq, Tr, T18, T19; + Tq = ri[WS(is, 3)]; + Tr = ri[WS(is, 11)]; + Ts = Tq + Tr; + T1c = Tq - Tr; + T18 = ii[WS(is, 3)]; + T19 = ii[WS(is, 11)]; + T1a = T18 - T19; + T21 = T18 + T19; + } + Tt = Tp + Ts; + T22 = T20 - T21; + T2h = T20 + T21; + T1b = T17 - T1a; + T1g = T1c + T1f; + T1E = T1f - T1c; + T1Z = Tp - Ts; + T1D = T17 + T1a; + } + { + E Ta, TP, TF, TO, Td, TR, TI, TS; + { + E T8, T9, TD, TE; + T8 = ri[WS(is, 2)]; + T9 = ri[WS(is, 10)]; + Ta = T8 + T9; + TP = T8 - T9; + TD = ii[WS(is, 2)]; + TE = ii[WS(is, 10)]; + TF = TD + TE; + TO = TD - TE; + } + { + E Tb, Tc, TG, TH; + Tb = ri[WS(is, 14)]; + Tc = ri[WS(is, 6)]; + Td = Tb + Tc; + TR = Tb - Tc; + TG = ii[WS(is, 14)]; + TH = ii[WS(is, 6)]; + TI = TG + TH; + TS = TG - TH; + } + Te = Ta + Td; + T1S = TF - TI; + T26 = Td - Ta; + TJ = TF + TI; + TQ = TO - TP; + T1m = TR - TS; + T1n = TP + TO; + TT = TR + TS; + } + { + E Ti, T11, TZ, T1V, Tl, TW, T14, T1W; + { + E Tg, Th, TX, TY; + Tg = ri[WS(is, 1)]; + Th = ri[WS(is, 9)]; + Ti = Tg + Th; + T11 = Tg - Th; + TX = ii[WS(is, 1)]; + TY = ii[WS(is, 9)]; + TZ = TX - TY; + T1V = TX + TY; + } + { + E Tj, Tk, T12, T13; + Tj = ri[WS(is, 5)]; + Tk = ri[WS(is, 13)]; + Tl = Tj + Tk; + TW = Tj - Tk; + T12 = ii[WS(is, 5)]; + T13 = ii[WS(is, 13)]; + T14 = T12 - T13; + T1W = T12 + T13; + } + Tm = Ti + Tl; + T1X = T1V - T1W; + T2g = T1V + T1W; + T10 = TW + TZ; + T15 = T11 - T14; + T1B = T11 + T14; + T1U = Ti - Tl; + T1A = TZ - TW; + } + { + E Tf, Tu, T2j, T2k; + Tf = T7 + Te; + Tu = Tm + Tt; + ro[WS(os, 8)] = Tf - Tu; + ro[0] = Tf + Tu; + T2j = TC + TJ; + T2k = T2g + T2h; + io[WS(os, 8)] = T2j - T2k; + io[0] = T2j + T2k; + } + { + E Tv, TK, T2f, T2i; + Tv = Tt - Tm; + TK = TC - TJ; + io[WS(os, 4)] = Tv + TK; + io[WS(os, 12)] = TK - Tv; + T2f = T7 - Te; + T2i = T2g - T2h; + ro[WS(os, 12)] = T2f - T2i; + ro[WS(os, 4)] = T2f + T2i; + } + { + E T1T, T27, T24, T28, T1Y, T23; + T1T = T1R + T1S; + T27 = T25 - T26; + T1Y = T1U + T1X; + T23 = T1Z - T22; + T24 = KP707106781 * (T1Y + T23); + T28 = KP707106781 * (T23 - T1Y); + ro[WS(os, 10)] = T1T - T24; + io[WS(os, 6)] = T27 + T28; + ro[WS(os, 2)] = T1T + T24; + io[WS(os, 14)] = T27 - T28; + } + { + E T29, T2d, T2c, T2e, T2a, T2b; + T29 = T1R - T1S; + T2d = T26 + T25; + T2a = T1X - T1U; + T2b = T1Z + T22; + T2c = KP707106781 * (T2a - T2b); + T2e = KP707106781 * (T2a + T2b); + ro[WS(os, 14)] = T29 - T2c; + io[WS(os, 2)] = T2d + T2e; + ro[WS(os, 6)] = T29 + T2c; + io[WS(os, 10)] = T2d - T2e; + } + { + E TV, T1r, T1p, T1v, T1i, T1q, T1u, T1w, TU, T1o; + TU = KP707106781 * (TQ - TT); + TV = TN + TU; + T1r = TN - TU; + T1o = KP707106781 * (T1m - T1n); + T1p = T1l - T1o; + T1v = T1l + T1o; + { + E T16, T1h, T1s, T1t; + T16 = FMA(KP923879532, T10, KP382683432 * T15); + T1h = FNMS(KP923879532, T1g, KP382683432 * T1b); + T1i = T16 + T1h; + T1q = T1h - T16; + T1s = FNMS(KP923879532, T15, KP382683432 * T10); + T1t = FMA(KP382683432, T1g, KP923879532 * T1b); + T1u = T1s - T1t; + T1w = T1s + T1t; + } + ro[WS(os, 11)] = TV - T1i; + io[WS(os, 11)] = T1v - T1w; + ro[WS(os, 3)] = TV + T1i; + io[WS(os, 3)] = T1v + T1w; + io[WS(os, 15)] = T1p - T1q; + ro[WS(os, 15)] = T1r - T1u; + io[WS(os, 7)] = T1p + T1q; + ro[WS(os, 7)] = T1r + T1u; + } + { + E T1z, T1L, T1J, T1P, T1G, T1K, T1O, T1Q, T1y, T1I; + T1y = KP707106781 * (T1n + T1m); + T1z = T1x + T1y; + T1L = T1x - T1y; + T1I = KP707106781 * (TQ + TT); + T1J = T1H - T1I; + T1P = T1H + T1I; + { + E T1C, T1F, T1M, T1N; + T1C = FMA(KP382683432, T1A, KP923879532 * T1B); + T1F = FNMS(KP382683432, T1E, KP923879532 * T1D); + T1G = T1C + T1F; + T1K = T1F - T1C; + T1M = FNMS(KP382683432, T1B, KP923879532 * T1A); + T1N = FMA(KP923879532, T1E, KP382683432 * T1D); + T1O = T1M - T1N; + T1Q = T1M + T1N; + } + ro[WS(os, 9)] = T1z - T1G; + io[WS(os, 9)] = T1P - T1Q; + ro[WS(os, 1)] = T1z + T1G; + io[WS(os, 1)] = T1P + T1Q; + io[WS(os, 13)] = T1J - T1K; + ro[WS(os, 13)] = T1L - T1O; + io[WS(os, 5)] = T1J + T1K; + ro[WS(os, 5)] = T1L + T1O; + } + } + } +} + +static const kdft_desc desc = { 16, "n1_16", {136, 16, 8, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_16) (planner *p) { + X(kdft_register) (p, n1_16, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 2 -name n1_2 -include n.h */ + +/* + * This function contains 4 FP additions, 0 FP multiplications, + * (or, 4 additions, 0 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 8 memory accesses + */ +#include "n.h" + +static void n1_2(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(8, is), MAKE_VOLATILE_STRIDE(8, os)) { + E T1, T2, T3, T4; + T1 = ri[0]; + T2 = ri[WS(is, 1)]; + T3 = ii[0]; + T4 = ii[WS(is, 1)]; + ro[0] = T1 + T2; + ro[WS(os, 1)] = T1 - T2; + io[0] = T3 + T4; + io[WS(os, 1)] = T3 - T4; + } + } +} + +static const kdft_desc desc = { 2, "n1_2", {4, 0, 0, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_2) (planner *p) { + X(kdft_register) (p, n1_2, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 2 -name n1_2 -include n.h */ + +/* + * This function contains 4 FP additions, 0 FP multiplications, + * (or, 4 additions, 0 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 8 memory accesses + */ +#include "n.h" + +static void n1_2(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(8, is), MAKE_VOLATILE_STRIDE(8, os)) { + E T1, T2, T3, T4; + T1 = ri[0]; + T2 = ri[WS(is, 1)]; + ro[WS(os, 1)] = T1 - T2; + ro[0] = T1 + T2; + T3 = ii[0]; + T4 = ii[WS(is, 1)]; + io[WS(os, 1)] = T3 - T4; + io[0] = T3 + T4; + } + } +} + +static const kdft_desc desc = { 2, "n1_2", {4, 0, 0, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_2) (planner *p) { + X(kdft_register) (p, n1_2, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,749 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:49 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 20 -name n1_20 -include n.h */ + +/* + * This function contains 208 FP additions, 72 FP multiplications, + * (or, 136 additions, 0 multiplications, 72 fused multiply/add), + * 86 stack variables, 4 constants, and 80 memory accesses + */ +#include "n.h" + +static void n1_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(80, is), MAKE_VOLATILE_STRIDE(80, os)) { + E T1Y, T1Z, T1W, T1V; + { + E T1d, TP, TD, T7, T3b, T2N, T2f, T1R, T2U, TB, T2P, T2A, T3d, T37, T3j; + E TJ, T2n, T1b, T1T, T1y, T2b, T2h, T1j, T2V, Tm, T2O, T2H, T3c, T34, T1e; + E T1f, T3i, TG, T2m, T10, T1S, T1J, T28, T2g; + { + E T4, T1N, T3, T2L, TN, T5, T1O, T1P, T1h, T1i; + { + E T1, T2, TL, TM; + T1 = ri[0]; + T2 = ri[WS(is, 10)]; + TL = ii[0]; + TM = ii[WS(is, 10)]; + T4 = ri[WS(is, 5)]; + T1N = T1 - T2; + T3 = T1 + T2; + T2L = TL + TM; + TN = TL - TM; + T5 = ri[WS(is, 15)]; + T1O = ii[WS(is, 5)]; + T1P = ii[WS(is, 15)]; + } + { + E T1o, Tp, T2u, T13, T14, Ts, T2v, T1r, Tx, T1t, Tw, T2x, T18, Ty, T1u; + E T1v; + { + E Tq, Tr, T1p, T1q; + { + E Tn, To, T11, T12; + Tn = ri[WS(is, 8)]; + { + E TO, T6, T2M, T1Q; + TO = T4 - T5; + T6 = T4 + T5; + T2M = T1O + T1P; + T1Q = T1O - T1P; + T1d = TO + TN; + TP = TN - TO; + TD = T3 + T6; + T7 = T3 - T6; + T3b = T2L + T2M; + T2N = T2L - T2M; + T2f = T1N + T1Q; + T1R = T1N - T1Q; + To = ri[WS(is, 18)]; + } + T11 = ii[WS(is, 8)]; + T12 = ii[WS(is, 18)]; + Tq = ri[WS(is, 13)]; + T1o = Tn - To; + Tp = Tn + To; + T2u = T11 + T12; + T13 = T11 - T12; + Tr = ri[WS(is, 3)]; + T1p = ii[WS(is, 13)]; + T1q = ii[WS(is, 3)]; + } + { + E Tu, Tv, T16, T17; + Tu = ri[WS(is, 12)]; + T14 = Tq - Tr; + Ts = Tq + Tr; + T2v = T1p + T1q; + T1r = T1p - T1q; + Tv = ri[WS(is, 2)]; + T16 = ii[WS(is, 12)]; + T17 = ii[WS(is, 2)]; + Tx = ri[WS(is, 17)]; + T1t = Tu - Tv; + Tw = Tu + Tv; + T2x = T16 + T17; + T18 = T16 - T17; + Ty = ri[WS(is, 7)]; + T1u = ii[WS(is, 17)]; + T1v = ii[WS(is, 7)]; + } + } + { + E TH, T19, T1w, TI; + { + E Tt, T2w, T35, TA, T2z, T36, Tz, T2y; + TH = Tp + Ts; + Tt = Tp - Ts; + T19 = Tx - Ty; + Tz = Tx + Ty; + T2y = T1u + T1v; + T1w = T1u - T1v; + T2w = T2u - T2v; + T35 = T2u + T2v; + TI = Tw + Tz; + TA = Tw - Tz; + T2z = T2x - T2y; + T36 = T2x + T2y; + T2U = Tt - TA; + TB = Tt + TA; + T2P = T2w + T2z; + T2A = T2w - T2z; + T3d = T35 + T36; + T37 = T35 - T36; + } + { + E T1s, T29, T1x, T2a, T15, T1a; + T15 = T13 - T14; + T1h = T14 + T13; + T1i = T19 + T18; + T1a = T18 - T19; + T1s = T1o - T1r; + T29 = T1o + T1r; + T3j = TH - TI; + TJ = TH + TI; + T1x = T1t - T1w; + T2a = T1t + T1w; + T2n = T15 - T1a; + T1b = T15 + T1a; + T1T = T1s + T1x; + T1y = T1s - T1x; + T2b = T29 - T2a; + T2h = T29 + T2a; + } + } + } + { + E Ta, T1z, T2B, TS, TT, Td, T2C, T1C, Ti, T1E, Th, T2E, TX, Tj, T1F; + E T1G; + { + E Tb, Tc, T1A, T1B; + { + E TQ, TR, T8, T9; + T8 = ri[WS(is, 4)]; + T9 = ri[WS(is, 14)]; + T1j = T1h + T1i; + T1Y = T1h - T1i; + TQ = ii[WS(is, 4)]; + TR = ii[WS(is, 14)]; + Ta = T8 + T9; + T1z = T8 - T9; + Tb = ri[WS(is, 9)]; + T2B = TQ + TR; + TS = TQ - TR; + Tc = ri[WS(is, 19)]; + T1A = ii[WS(is, 9)]; + T1B = ii[WS(is, 19)]; + } + { + E Tf, Tg, TV, TW; + Tf = ri[WS(is, 16)]; + TT = Tb - Tc; + Td = Tb + Tc; + T2C = T1A + T1B; + T1C = T1A - T1B; + Tg = ri[WS(is, 6)]; + TV = ii[WS(is, 16)]; + TW = ii[WS(is, 6)]; + Ti = ri[WS(is, 1)]; + T1E = Tf - Tg; + Th = Tf + Tg; + T2E = TV + TW; + TX = TV - TW; + Tj = ri[WS(is, 11)]; + T1F = ii[WS(is, 1)]; + T1G = ii[WS(is, 11)]; + } + } + { + E TE, TY, T1H, TF; + { + E Te, T2D, T32, Tl, T2G, T33, Tk, T2F; + TE = Ta + Td; + Te = Ta - Td; + TY = Ti - Tj; + Tk = Ti + Tj; + T2F = T1F + T1G; + T1H = T1F - T1G; + T2D = T2B - T2C; + T32 = T2B + T2C; + TF = Th + Tk; + Tl = Th - Tk; + T2G = T2E - T2F; + T33 = T2E + T2F; + T2V = Te - Tl; + Tm = Te + Tl; + T2O = T2D + T2G; + T2H = T2D - T2G; + T3c = T32 + T33; + T34 = T32 - T33; + } + { + E T1D, T26, T1I, T27, TU, TZ; + TU = TS - TT; + T1e = TT + TS; + T1f = TY + TX; + TZ = TX - TY; + T1D = T1z - T1C; + T26 = T1z + T1C; + T3i = TE - TF; + TG = TE + TF; + T1I = T1E - T1H; + T27 = T1E + T1H; + T2m = TU - TZ; + T10 = TU + TZ; + T1S = T1D + T1I; + T1J = T1D - T1I; + T28 = T26 - T27; + T2g = T26 + T27; + } + } + } + } + { + E T1g, T3g, T3f, T2S, T2R, T2k, T2j; + { + E T2s, T2r, TC, T2Q; + T2s = Tm - TB; + TC = Tm + TB; + T1g = T1e + T1f; + T1Z = T1e - T1f; + T2r = FNMS(KP250000000, TC, T7); + ro[WS(os, 10)] = T7 + TC; + T2Q = T2O + T2P; + T2S = T2O - T2P; + { + E T2K, T2I, T2t, T2J; + T2K = FMA(KP618033988, T2A, T2H); + T2I = FNMS(KP618033988, T2H, T2A); + T2t = FNMS(KP559016994, T2s, T2r); + T2J = FMA(KP559016994, T2s, T2r); + ro[WS(os, 18)] = FMA(KP951056516, T2I, T2t); + ro[WS(os, 2)] = FNMS(KP951056516, T2I, T2t); + ro[WS(os, 6)] = FMA(KP951056516, T2K, T2J); + ro[WS(os, 14)] = FNMS(KP951056516, T2K, T2J); + T2R = FNMS(KP250000000, T2Q, T2N); + } + io[WS(os, 10)] = T2N + T2Q; + } + { + E T30, T2Z, TK, T3e; + TK = TG + TJ; + T30 = TG - TJ; + { + E T2T, T2X, T2Y, T2W; + T2T = FNMS(KP559016994, T2S, T2R); + T2X = FMA(KP559016994, T2S, T2R); + T2Y = FMA(KP618033988, T2U, T2V); + T2W = FNMS(KP618033988, T2V, T2U); + io[WS(os, 14)] = FMA(KP951056516, T2Y, T2X); + io[WS(os, 6)] = FNMS(KP951056516, T2Y, T2X); + io[WS(os, 18)] = FNMS(KP951056516, T2W, T2T); + io[WS(os, 2)] = FMA(KP951056516, T2W, T2T); + T2Z = FNMS(KP250000000, TK, TD); + } + ro[0] = TD + TK; + T3e = T3c + T3d; + T3g = T3c - T3d; + { + E T31, T39, T3a, T38; + T31 = FMA(KP559016994, T30, T2Z); + T39 = FNMS(KP559016994, T30, T2Z); + T3a = FNMS(KP618033988, T34, T37); + T38 = FMA(KP618033988, T37, T34); + ro[WS(os, 8)] = FMA(KP951056516, T3a, T39); + ro[WS(os, 12)] = FNMS(KP951056516, T3a, T39); + ro[WS(os, 16)] = FMA(KP951056516, T38, T31); + ro[WS(os, 4)] = FNMS(KP951056516, T38, T31); + T3f = FNMS(KP250000000, T3e, T3b); + } + io[0] = T3b + T3e; + } + { + E T24, T23, T1c, T2i; + T1c = T10 + T1b; + T24 = T10 - T1b; + { + E T3h, T3l, T3m, T3k; + T3h = FMA(KP559016994, T3g, T3f); + T3l = FNMS(KP559016994, T3g, T3f); + T3m = FNMS(KP618033988, T3i, T3j); + T3k = FMA(KP618033988, T3j, T3i); + io[WS(os, 12)] = FMA(KP951056516, T3m, T3l); + io[WS(os, 8)] = FNMS(KP951056516, T3m, T3l); + io[WS(os, 16)] = FNMS(KP951056516, T3k, T3h); + io[WS(os, 4)] = FMA(KP951056516, T3k, T3h); + T23 = FNMS(KP250000000, T1c, TP); + } + io[WS(os, 5)] = TP + T1c; + T2i = T2g + T2h; + T2k = T2g - T2h; + { + E T25, T2d, T2e, T2c; + T25 = FMA(KP559016994, T24, T23); + T2d = FNMS(KP559016994, T24, T23); + T2e = FNMS(KP618033988, T28, T2b); + T2c = FMA(KP618033988, T2b, T28); + io[WS(os, 17)] = FMA(KP951056516, T2e, T2d); + io[WS(os, 13)] = FNMS(KP951056516, T2e, T2d); + io[WS(os, 9)] = FMA(KP951056516, T2c, T25); + io[WS(os, 1)] = FNMS(KP951056516, T2c, T25); + T2j = FNMS(KP250000000, T2i, T2f); + } + ro[WS(os, 5)] = T2f + T2i; + } + { + E T1m, T1l, T1k, T1U; + T1k = T1g + T1j; + T1m = T1g - T1j; + { + E T2l, T2p, T2q, T2o; + T2l = FMA(KP559016994, T2k, T2j); + T2p = FNMS(KP559016994, T2k, T2j); + T2q = FNMS(KP618033988, T2m, T2n); + T2o = FMA(KP618033988, T2n, T2m); + ro[WS(os, 17)] = FNMS(KP951056516, T2q, T2p); + ro[WS(os, 13)] = FMA(KP951056516, T2q, T2p); + ro[WS(os, 9)] = FNMS(KP951056516, T2o, T2l); + ro[WS(os, 1)] = FMA(KP951056516, T2o, T2l); + T1l = FNMS(KP250000000, T1k, T1d); + } + io[WS(os, 15)] = T1d + T1k; + T1U = T1S + T1T; + T1W = T1S - T1T; + { + E T1n, T1L, T1M, T1K; + T1n = FNMS(KP559016994, T1m, T1l); + T1L = FMA(KP559016994, T1m, T1l); + T1M = FMA(KP618033988, T1y, T1J); + T1K = FNMS(KP618033988, T1J, T1y); + io[WS(os, 19)] = FMA(KP951056516, T1M, T1L); + io[WS(os, 11)] = FNMS(KP951056516, T1M, T1L); + io[WS(os, 7)] = FMA(KP951056516, T1K, T1n); + io[WS(os, 3)] = FNMS(KP951056516, T1K, T1n); + T1V = FNMS(KP250000000, T1U, T1R); + } + ro[WS(os, 15)] = T1R + T1U; + } + } + } + { + E T21, T1X, T20, T22; + T21 = FMA(KP559016994, T1W, T1V); + T1X = FNMS(KP559016994, T1W, T1V); + T20 = FNMS(KP618033988, T1Z, T1Y); + T22 = FMA(KP618033988, T1Y, T1Z); + ro[WS(os, 19)] = FNMS(KP951056516, T22, T21); + ro[WS(os, 11)] = FMA(KP951056516, T22, T21); + ro[WS(os, 7)] = FNMS(KP951056516, T20, T1X); + ro[WS(os, 3)] = FMA(KP951056516, T20, T1X); + } + } + } +} + +static const kdft_desc desc = { 20, "n1_20", {136, 0, 72, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_20) (planner *p) { + X(kdft_register) (p, n1_20, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 20 -name n1_20 -include n.h */ + +/* + * This function contains 208 FP additions, 48 FP multiplications, + * (or, 184 additions, 24 multiplications, 24 fused multiply/add), + * 81 stack variables, 4 constants, and 80 memory accesses + */ +#include "n.h" + +static void n1_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(80, is), MAKE_VOLATILE_STRIDE(80, os)) { + E T7, T2Q, T3h, TD, TP, T1U, T2l, T1d, Tt, TA, TB, T2w, T2z, T2S, T35; + E T36, T3f, TH, TI, TJ, T15, T1a, T1b, T1s, T1x, T1W, T29, T2a, T2j, T1h; + E T1i, T1j, Te, Tl, Tm, T2D, T2G, T2R, T32, T33, T3e, TE, TF, TG, TU; + E TZ, T10, T1D, T1I, T1V, T26, T27, T2i, T1e, T1f, T1g; + { + E T3, T1Q, TN, T2O, T6, TO, T1T, T2P; + { + E T1, T2, TL, TM; + T1 = ri[0]; + T2 = ri[WS(is, 10)]; + T3 = T1 + T2; + T1Q = T1 - T2; + TL = ii[0]; + TM = ii[WS(is, 10)]; + TN = TL - TM; + T2O = TL + TM; + } + { + E T4, T5, T1R, T1S; + T4 = ri[WS(is, 5)]; + T5 = ri[WS(is, 15)]; + T6 = T4 + T5; + TO = T4 - T5; + T1R = ii[WS(is, 5)]; + T1S = ii[WS(is, 15)]; + T1T = T1R - T1S; + T2P = T1R + T1S; + } + T7 = T3 - T6; + T2Q = T2O - T2P; + T3h = T2O + T2P; + TD = T3 + T6; + TP = TN - TO; + T1U = T1Q - T1T; + T2l = T1Q + T1T; + T1d = TO + TN; + } + { + E Tp, T1o, T13, T2u, Ts, T14, T1r, T2v, Tw, T1t, T18, T2x, Tz, T19, T1w; + E T2y; + { + E Tn, To, T11, T12; + Tn = ri[WS(is, 8)]; + To = ri[WS(is, 18)]; + Tp = Tn + To; + T1o = Tn - To; + T11 = ii[WS(is, 8)]; + T12 = ii[WS(is, 18)]; + T13 = T11 - T12; + T2u = T11 + T12; + } + { + E Tq, Tr, T1p, T1q; + Tq = ri[WS(is, 13)]; + Tr = ri[WS(is, 3)]; + Ts = Tq + Tr; + T14 = Tq - Tr; + T1p = ii[WS(is, 13)]; + T1q = ii[WS(is, 3)]; + T1r = T1p - T1q; + T2v = T1p + T1q; + } + { + E Tu, Tv, T16, T17; + Tu = ri[WS(is, 12)]; + Tv = ri[WS(is, 2)]; + Tw = Tu + Tv; + T1t = Tu - Tv; + T16 = ii[WS(is, 12)]; + T17 = ii[WS(is, 2)]; + T18 = T16 - T17; + T2x = T16 + T17; + } + { + E Tx, Ty, T1u, T1v; + Tx = ri[WS(is, 17)]; + Ty = ri[WS(is, 7)]; + Tz = Tx + Ty; + T19 = Tx - Ty; + T1u = ii[WS(is, 17)]; + T1v = ii[WS(is, 7)]; + T1w = T1u - T1v; + T2y = T1u + T1v; + } + Tt = Tp - Ts; + TA = Tw - Tz; + TB = Tt + TA; + T2w = T2u - T2v; + T2z = T2x - T2y; + T2S = T2w + T2z; + T35 = T2u + T2v; + T36 = T2x + T2y; + T3f = T35 + T36; + TH = Tp + Ts; + TI = Tw + Tz; + TJ = TH + TI; + T15 = T13 - T14; + T1a = T18 - T19; + T1b = T15 + T1a; + T1s = T1o - T1r; + T1x = T1t - T1w; + T1W = T1s + T1x; + T29 = T1o + T1r; + T2a = T1t + T1w; + T2j = T29 + T2a; + T1h = T14 + T13; + T1i = T19 + T18; + T1j = T1h + T1i; + } + { + E Ta, T1z, TS, T2B, Td, TT, T1C, T2C, Th, T1E, TX, T2E, Tk, TY, T1H; + E T2F; + { + E T8, T9, TQ, TR; + T8 = ri[WS(is, 4)]; + T9 = ri[WS(is, 14)]; + Ta = T8 + T9; + T1z = T8 - T9; + TQ = ii[WS(is, 4)]; + TR = ii[WS(is, 14)]; + TS = TQ - TR; + T2B = TQ + TR; + } + { + E Tb, Tc, T1A, T1B; + Tb = ri[WS(is, 9)]; + Tc = ri[WS(is, 19)]; + Td = Tb + Tc; + TT = Tb - Tc; + T1A = ii[WS(is, 9)]; + T1B = ii[WS(is, 19)]; + T1C = T1A - T1B; + T2C = T1A + T1B; + } + { + E Tf, Tg, TV, TW; + Tf = ri[WS(is, 16)]; + Tg = ri[WS(is, 6)]; + Th = Tf + Tg; + T1E = Tf - Tg; + TV = ii[WS(is, 16)]; + TW = ii[WS(is, 6)]; + TX = TV - TW; + T2E = TV + TW; + } + { + E Ti, Tj, T1F, T1G; + Ti = ri[WS(is, 1)]; + Tj = ri[WS(is, 11)]; + Tk = Ti + Tj; + TY = Ti - Tj; + T1F = ii[WS(is, 1)]; + T1G = ii[WS(is, 11)]; + T1H = T1F - T1G; + T2F = T1F + T1G; + } + Te = Ta - Td; + Tl = Th - Tk; + Tm = Te + Tl; + T2D = T2B - T2C; + T2G = T2E - T2F; + T2R = T2D + T2G; + T32 = T2B + T2C; + T33 = T2E + T2F; + T3e = T32 + T33; + TE = Ta + Td; + TF = Th + Tk; + TG = TE + TF; + TU = TS - TT; + TZ = TX - TY; + T10 = TU + TZ; + T1D = T1z - T1C; + T1I = T1E - T1H; + T1V = T1D + T1I; + T26 = T1z + T1C; + T27 = T1E + T1H; + T2i = T26 + T27; + T1e = TT + TS; + T1f = TY + TX; + T1g = T1e + T1f; + } + { + E T2s, TC, T2r, T2I, T2K, T2A, T2H, T2J, T2t; + T2s = KP559016994 * (Tm - TB); + TC = Tm + TB; + T2r = FNMS(KP250000000, TC, T7); + T2A = T2w - T2z; + T2H = T2D - T2G; + T2I = FNMS(KP587785252, T2H, KP951056516 * T2A); + T2K = FMA(KP951056516, T2H, KP587785252 * T2A); + ro[WS(os, 10)] = T7 + TC; + T2J = T2s + T2r; + ro[WS(os, 14)] = T2J - T2K; + ro[WS(os, 6)] = T2J + T2K; + T2t = T2r - T2s; + ro[WS(os, 2)] = T2t - T2I; + ro[WS(os, 18)] = T2t + T2I; + } + { + E T2V, T2T, T2U, T2N, T2Y, T2L, T2M, T2X, T2W; + T2V = KP559016994 * (T2R - T2S); + T2T = T2R + T2S; + T2U = FNMS(KP250000000, T2T, T2Q); + T2L = Tt - TA; + T2M = Te - Tl; + T2N = FNMS(KP587785252, T2M, KP951056516 * T2L); + T2Y = FMA(KP951056516, T2M, KP587785252 * T2L); + io[WS(os, 10)] = T2Q + T2T; + T2X = T2V + T2U; + io[WS(os, 6)] = T2X - T2Y; + io[WS(os, 14)] = T2Y + T2X; + T2W = T2U - T2V; + io[WS(os, 2)] = T2N + T2W; + io[WS(os, 18)] = T2W - T2N; + } + { + E T2Z, TK, T30, T38, T3a, T34, T37, T39, T31; + T2Z = KP559016994 * (TG - TJ); + TK = TG + TJ; + T30 = FNMS(KP250000000, TK, TD); + T34 = T32 - T33; + T37 = T35 - T36; + T38 = FMA(KP951056516, T34, KP587785252 * T37); + T3a = FNMS(KP587785252, T34, KP951056516 * T37); + ro[0] = TD + TK; + T39 = T30 - T2Z; + ro[WS(os, 12)] = T39 - T3a; + ro[WS(os, 8)] = T39 + T3a; + T31 = T2Z + T30; + ro[WS(os, 4)] = T31 - T38; + ro[WS(os, 16)] = T31 + T38; + } + { + E T3g, T3i, T3j, T3d, T3m, T3b, T3c, T3l, T3k; + T3g = KP559016994 * (T3e - T3f); + T3i = T3e + T3f; + T3j = FNMS(KP250000000, T3i, T3h); + T3b = TE - TF; + T3c = TH - TI; + T3d = FMA(KP951056516, T3b, KP587785252 * T3c); + T3m = FNMS(KP587785252, T3b, KP951056516 * T3c); + io[0] = T3h + T3i; + T3l = T3j - T3g; + io[WS(os, 8)] = T3l - T3m; + io[WS(os, 12)] = T3m + T3l; + T3k = T3g + T3j; + io[WS(os, 4)] = T3d + T3k; + io[WS(os, 16)] = T3k - T3d; + } + { + E T23, T1c, T24, T2c, T2e, T28, T2b, T2d, T25; + T23 = KP559016994 * (T10 - T1b); + T1c = T10 + T1b; + T24 = FNMS(KP250000000, T1c, TP); + T28 = T26 - T27; + T2b = T29 - T2a; + T2c = FMA(KP951056516, T28, KP587785252 * T2b); + T2e = FNMS(KP587785252, T28, KP951056516 * T2b); + io[WS(os, 5)] = TP + T1c; + T2d = T24 - T23; + io[WS(os, 13)] = T2d - T2e; + io[WS(os, 17)] = T2d + T2e; + T25 = T23 + T24; + io[WS(os, 1)] = T25 - T2c; + io[WS(os, 9)] = T25 + T2c; + } + { + E T2k, T2m, T2n, T2h, T2p, T2f, T2g, T2q, T2o; + T2k = KP559016994 * (T2i - T2j); + T2m = T2i + T2j; + T2n = FNMS(KP250000000, T2m, T2l); + T2f = TU - TZ; + T2g = T15 - T1a; + T2h = FMA(KP951056516, T2f, KP587785252 * T2g); + T2p = FNMS(KP587785252, T2f, KP951056516 * T2g); + ro[WS(os, 5)] = T2l + T2m; + T2q = T2n - T2k; + ro[WS(os, 13)] = T2p + T2q; + ro[WS(os, 17)] = T2q - T2p; + T2o = T2k + T2n; + ro[WS(os, 1)] = T2h + T2o; + ro[WS(os, 9)] = T2o - T2h; + } + { + E T1m, T1k, T1l, T1K, T1M, T1y, T1J, T1L, T1n; + T1m = KP559016994 * (T1g - T1j); + T1k = T1g + T1j; + T1l = FNMS(KP250000000, T1k, T1d); + T1y = T1s - T1x; + T1J = T1D - T1I; + T1K = FNMS(KP587785252, T1J, KP951056516 * T1y); + T1M = FMA(KP951056516, T1J, KP587785252 * T1y); + io[WS(os, 15)] = T1d + T1k; + T1L = T1m + T1l; + io[WS(os, 11)] = T1L - T1M; + io[WS(os, 19)] = T1L + T1M; + T1n = T1l - T1m; + io[WS(os, 3)] = T1n - T1K; + io[WS(os, 7)] = T1n + T1K; + } + { + E T1Z, T1X, T1Y, T1P, T21, T1N, T1O, T22, T20; + T1Z = KP559016994 * (T1V - T1W); + T1X = T1V + T1W; + T1Y = FNMS(KP250000000, T1X, T1U); + T1N = T1h - T1i; + T1O = T1e - T1f; + T1P = FNMS(KP587785252, T1O, KP951056516 * T1N); + T21 = FMA(KP951056516, T1O, KP587785252 * T1N); + ro[WS(os, 15)] = T1U + T1X; + T22 = T1Z + T1Y; + ro[WS(os, 11)] = T21 + T22; + ro[WS(os, 19)] = T22 - T21; + T20 = T1Y - T1Z; + ro[WS(os, 3)] = T1P + T20; + ro[WS(os, 7)] = T20 - T1P; + } + } + } +} + +static const kdft_desc desc = { 20, "n1_20", {184, 24, 24, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_20) (planner *p) { + X(kdft_register) (p, n1_20, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1207 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:50 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 25 -name n1_25 -include n.h */ + +/* + * This function contains 352 FP additions, 268 FP multiplications, + * (or, 84 additions, 0 multiplications, 268 fused multiply/add), + * 164 stack variables, 47 constants, and 100 memory accesses + */ +#include "n.h" + +static void n1_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP803003575, +0.803003575438660414833440593570376004635464850); + DK(KP554608978, +0.554608978404018097464974850792216217022558774); + DK(KP248028675, +0.248028675328619457762448260696444630363259177); + DK(KP726211448, +0.726211448929902658173535992263577167607493062); + DK(KP525970792, +0.525970792408939708442463226536226366643874659); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP851038619, +0.851038619207379630836264138867114231259902550); + DK(KP912575812, +0.912575812670962425556968549836277086778922727); + DK(KP912018591, +0.912018591466481957908415381764119056233607330); + DK(KP943557151, +0.943557151597354104399655195398983005179443399); + DK(KP614372930, +0.614372930789563808870829930444362096004872855); + DK(KP621716863, +0.621716863012209892444754556304102309693593202); + DK(KP994076283, +0.994076283785401014123185814696322018529298887); + DK(KP734762448, +0.734762448793050413546343770063151342619912334); + DK(KP772036680, +0.772036680810363904029489473607579825330539880); + DK(KP126329378, +0.126329378446108174786050455341811215027378105); + DK(KP827271945, +0.827271945972475634034355757144307982555673741); + DK(KP949179823, +0.949179823508441261575555465843363271711583843); + DK(KP860541664, +0.860541664367944677098261680920518816412804187); + DK(KP557913902, +0.557913902031834264187699648465567037992437152); + DK(KP249506682, +0.249506682107067890488084201715862638334226305); + DK(KP681693190, +0.681693190061530575150324149145440022633095390); + DK(KP560319534, +0.560319534973832390111614715371676131169633784); + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP906616052, +0.906616052148196230441134447086066874408359177); + DK(KP968479752, +0.968479752739016373193524836781420152702090879); + DK(KP845997307, +0.845997307939530944175097360758058292389769300); + DK(KP470564281, +0.470564281212251493087595091036643380879947982); + DK(KP062914667, +0.062914667253649757225485955897349402364686947); + DK(KP921177326, +0.921177326965143320250447435415066029359282231); + DK(KP833417178, +0.833417178328688677408962550243238843138996060); + DK(KP541454447, +0.541454447536312777046285590082819509052033189); + DK(KP242145790, +0.242145790282157779872542093866183953459003101); + DK(KP683113946, +0.683113946453479238701949862233725244439656928); + DK(KP559154169, +0.559154169276087864842202529084232643714075927); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP904730450, +0.904730450839922351881287709692877908104763647); + DK(KP831864738, +0.831864738706457140726048799369896829771167132); + DK(KP871714437, +0.871714437527667770979999223229522602943903653); + DK(KP939062505, +0.939062505817492352556001843133229685779824606); + DK(KP549754652, +0.549754652192770074288023275540779861653779767); + DK(KP634619297, +0.634619297544148100711287640319130485732531031); + DK(KP256756360, +0.256756360367726783319498520922669048172391148); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(100, is), MAKE_VOLATILE_STRIDE(100, os)) { + E T3Y, T3U, T3W, T42, T44, T3X, T3R, T3V, T3Z, T43; + { + E T4Q, T1U, T9, T3b, T45, T3e, T46, T1D, T4P, T1R, Ts, T1K, T18, T1E, T4z; + E T5f, T3z, T22, T4s, T5b, T3C, T2o, T3D, T2h, T4p, T5c, T4w, T5e, T3A, T29; + E T2z, T2y, TL, T1L, T1r, T1F, T4a, T57, T3v, T2x, T4k, T55, T3s, T2T, T2D; + E T4c, T3t, T2M, T4h, T54, T1v, T1C, T1Q; + { + E T1, T2, T3, T5, T6; + T1 = ri[0]; + T2 = ri[WS(is, 5)]; + T3 = ri[WS(is, 20)]; + T5 = ri[WS(is, 10)]; + T6 = ri[WS(is, 15)]; + { + E T3a, T3c, T1y, T1z, T1A, T39, T4, T1S, T1B, T3d; + T1v = ii[0]; + T4 = T2 + T3; + T1S = T2 - T3; + { + E T7, T1T, T8, T1w, T1x; + T7 = T5 + T6; + T1T = T5 - T6; + T1w = ii[WS(is, 5)]; + T1x = ii[WS(is, 20)]; + T4Q = FNMS(KP618033988, T1S, T1T); + T1U = FMA(KP618033988, T1T, T1S); + T8 = T4 + T7; + T3a = T4 - T7; + T3c = T1w - T1x; + T1y = T1w + T1x; + T1z = ii[WS(is, 10)]; + T1A = ii[WS(is, 15)]; + T39 = FNMS(KP250000000, T8, T1); + T9 = T1 + T8; + } + T1B = T1z + T1A; + T3d = T1z - T1A; + T3b = FMA(KP559016994, T3a, T39); + T45 = FNMS(KP559016994, T3a, T39); + T3e = FMA(KP618033988, T3d, T3c); + T46 = FNMS(KP618033988, T3c, T3d); + T1C = T1y + T1B; + T1Q = T1y - T1B; + } + } + { + E T24, T23, T28, T4v; + { + E Ta, TQ, Tj, TZ, T1Z, T20, Th, T26, T27, T1X, TX, T2l, T2m, Tq, T2c; + E T2e, T12, T15, T2f, T1P, TT, TW; + Ta = ri[WS(is, 1)]; + T1P = FNMS(KP250000000, T1C, T1v); + T1D = T1v + T1C; + TQ = ii[WS(is, 1)]; + Tj = ri[WS(is, 4)]; + T4P = FNMS(KP559016994, T1Q, T1P); + T1R = FMA(KP559016994, T1Q, T1P); + TZ = ii[WS(is, 4)]; + { + E Tb, Tc, Te, Tf; + Tb = ri[WS(is, 6)]; + Tc = ri[WS(is, 21)]; + Te = ri[WS(is, 11)]; + Tf = ri[WS(is, 16)]; + { + E TR, Td, Tg, TS, TU, TV; + TR = ii[WS(is, 6)]; + T1Z = Tc - Tb; + Td = Tb + Tc; + T20 = Tf - Te; + Tg = Te + Tf; + TS = ii[WS(is, 21)]; + TU = ii[WS(is, 11)]; + TV = ii[WS(is, 16)]; + Th = Td + Tg; + T24 = Td - Tg; + T26 = TR - TS; + TT = TR + TS; + TW = TU + TV; + T27 = TV - TU; + } + } + { + E Tk, Tl, Tn, To; + Tk = ri[WS(is, 9)]; + T1X = TT - TW; + TX = TT + TW; + Tl = ri[WS(is, 24)]; + Tn = ri[WS(is, 14)]; + To = ri[WS(is, 19)]; + { + E T10, Tm, Tp, T11, T13, T14; + T10 = ii[WS(is, 9)]; + T2l = Tl - Tk; + Tm = Tk + Tl; + T2m = To - Tn; + Tp = Tn + To; + T11 = ii[WS(is, 24)]; + T13 = ii[WS(is, 14)]; + T14 = ii[WS(is, 19)]; + Tq = Tm + Tp; + T2c = Tm - Tp; + T2e = T11 - T10; + T12 = T10 + T11; + T15 = T13 + T14; + T2f = T14 - T13; + } + } + { + E T2j, T2b, T1W, T21, T4y, T2i; + { + E Ti, T16, Tr, TY, T17; + T23 = FNMS(KP250000000, Th, Ta); + Ti = Ta + Th; + T2j = T15 - T12; + T16 = T12 + T15; + Tr = Tj + Tq; + T2b = FMS(KP250000000, Tq, Tj); + T1W = FNMS(KP250000000, TX, TQ); + TY = TQ + TX; + T21 = FMA(KP618033988, T20, T1Z); + T4y = FNMS(KP618033988, T1Z, T20); + T2i = FNMS(KP250000000, T16, TZ); + T17 = TZ + T16; + Ts = Ti + Tr; + T1K = Ti - Tr; + T18 = TY - T17; + T1E = TY + T17; + } + { + E T2n, T4r, T4x, T1Y; + T2n = FMA(KP618033988, T2m, T2l); + T4r = FNMS(KP618033988, T2l, T2m); + T4x = FNMS(KP559016994, T1X, T1W); + T1Y = FMA(KP559016994, T1X, T1W); + { + E T4o, T2g, T2d, T4n, T4q, T2k; + T4o = FNMS(KP618033988, T2e, T2f); + T2g = FMA(KP618033988, T2f, T2e); + T4z = FMA(KP951056516, T4y, T4x); + T5f = FNMS(KP951056516, T4y, T4x); + T3z = FNMS(KP951056516, T21, T1Y); + T22 = FMA(KP951056516, T21, T1Y); + T4q = FMA(KP559016994, T2j, T2i); + T2k = FNMS(KP559016994, T2j, T2i); + T4s = FMA(KP951056516, T4r, T4q); + T5b = FNMS(KP951056516, T4r, T4q); + T3C = FNMS(KP951056516, T2n, T2k); + T2o = FMA(KP951056516, T2n, T2k); + T2d = FNMS(KP559016994, T2c, T2b); + T4n = FMA(KP559016994, T2c, T2b); + T28 = FNMS(KP618033988, T27, T26); + T4v = FMA(KP618033988, T26, T27); + T3D = FNMS(KP951056516, T2g, T2d); + T2h = FMA(KP951056516, T2g, T2d); + T4p = FMA(KP951056516, T4o, T4n); + T5c = FNMS(KP951056516, T4o, T4n); + } + } + } + } + { + E Tt, T19, TC, T1i, T2u, T2v, TA, T2B, T2C, T2s, T1g, T2J, T2K, TJ, T2O; + E T2Q, T1l, T1o, T2R; + { + E T4u, T25, T1c, T1f; + Tt = ri[WS(is, 2)]; + T19 = ii[WS(is, 2)]; + TC = ri[WS(is, 3)]; + T4u = FNMS(KP559016994, T24, T23); + T25 = FMA(KP559016994, T24, T23); + T1i = ii[WS(is, 3)]; + { + E Tu, Tv, Tx, Ty; + Tu = ri[WS(is, 7)]; + T4w = FNMS(KP951056516, T4v, T4u); + T5e = FMA(KP951056516, T4v, T4u); + T3A = FNMS(KP951056516, T28, T25); + T29 = FMA(KP951056516, T28, T25); + Tv = ri[WS(is, 22)]; + Tx = ri[WS(is, 12)]; + Ty = ri[WS(is, 17)]; + { + E T1a, Tw, Tz, T1b, T1d, T1e; + T1a = ii[WS(is, 7)]; + T2u = Tv - Tu; + Tw = Tu + Tv; + T2v = Ty - Tx; + Tz = Tx + Ty; + T1b = ii[WS(is, 22)]; + T1d = ii[WS(is, 12)]; + T1e = ii[WS(is, 17)]; + TA = Tw + Tz; + T2z = Tz - Tw; + T2B = T1b - T1a; + T1c = T1a + T1b; + T1f = T1d + T1e; + T2C = T1d - T1e; + } + } + { + E TD, TE, TG, TH; + TD = ri[WS(is, 8)]; + T2s = T1f - T1c; + T1g = T1c + T1f; + TE = ri[WS(is, 23)]; + TG = ri[WS(is, 13)]; + TH = ri[WS(is, 18)]; + { + E T1j, TF, TI, T1k, T1m, T1n; + T1j = ii[WS(is, 8)]; + T2J = TD - TE; + TF = TD + TE; + T2K = TG - TH; + TI = TG + TH; + T1k = ii[WS(is, 23)]; + T1m = ii[WS(is, 13)]; + T1n = ii[WS(is, 18)]; + TJ = TF + TI; + T2O = TI - TF; + T2Q = T1k - T1j; + T1l = T1j + T1k; + T1o = T1m + T1n; + T2R = T1n - T1m; + } + } + } + { + E T2H, T2N, T2r, T2w, T49, T2G; + { + E TB, T1p, TK, T1h, T1q; + T2y = FNMS(KP250000000, TA, Tt); + TB = Tt + TA; + T2H = T1o - T1l; + T1p = T1l + T1o; + TK = TC + TJ; + T2N = FNMS(KP250000000, TJ, TC); + T2r = FNMS(KP250000000, T1g, T19); + T1h = T19 + T1g; + T2w = FMA(KP618033988, T2v, T2u); + T49 = FNMS(KP618033988, T2u, T2v); + T2G = FNMS(KP250000000, T1p, T1i); + T1q = T1i + T1p; + TL = TB + TK; + T1L = TB - TK; + T1r = T1h - T1q; + T1F = T1h + T1q; + } + { + E T2S, T4j, T48, T2t; + T2S = FMA(KP618033988, T2R, T2Q); + T4j = FNMS(KP618033988, T2Q, T2R); + T48 = FMA(KP559016994, T2s, T2r); + T2t = FNMS(KP559016994, T2s, T2r); + { + E T4g, T2L, T2I, T4f, T4i, T2P; + T4g = FNMS(KP618033988, T2J, T2K); + T2L = FMA(KP618033988, T2K, T2J); + T4a = FMA(KP951056516, T49, T48); + T57 = FNMS(KP951056516, T49, T48); + T3v = FNMS(KP951056516, T2w, T2t); + T2x = FMA(KP951056516, T2w, T2t); + T4i = FMA(KP559016994, T2O, T2N); + T2P = FNMS(KP559016994, T2O, T2N); + T4k = FNMS(KP951056516, T4j, T4i); + T55 = FMA(KP951056516, T4j, T4i); + T3s = FMA(KP951056516, T2S, T2P); + T2T = FNMS(KP951056516, T2S, T2P); + T2I = FNMS(KP559016994, T2H, T2G); + T4f = FMA(KP559016994, T2H, T2G); + T2D = FNMS(KP618033988, T2C, T2B); + T4c = FMA(KP618033988, T2B, T2C); + T3t = FMA(KP951056516, T2L, T2I); + T2M = FNMS(KP951056516, T2L, T2I); + T4h = FNMS(KP951056516, T4g, T4f); + T54 = FMA(KP951056516, T4g, T4f); + } + } + } + } + } + { + E T4d, T58, T3w, T3H, T3r, T3k, T36, T38, T3o, T3q, T3j, T2Z, T37; + { + E T2E, T1s, T1u, TP, T1t; + { + E TM, TO, TN, T4b, T2A; + TM = Ts + TL; + TO = Ts - TL; + T4b = FMA(KP559016994, T2z, T2y); + T2A = FNMS(KP559016994, T2z, T2y); + TN = FNMS(KP250000000, TM, T9); + T4d = FMA(KP951056516, T4c, T4b); + T58 = FNMS(KP951056516, T4c, T4b); + T3w = FMA(KP951056516, T2D, T2A); + T2E = FNMS(KP951056516, T2D, T2A); + T1s = FMA(KP618033988, T1r, T18); + T1u = FNMS(KP618033988, T18, T1r); + ro[0] = T9 + TM; + TP = FMA(KP559016994, TO, TN); + T1t = FNMS(KP559016994, TO, TN); + } + { + E T1J, T1N, T1M, T1O, T1G, T1I, T1H; + T1G = T1E + T1F; + T1I = T1E - T1F; + ro[WS(os, 15)] = FMA(KP951056516, T1u, T1t); + ro[WS(os, 10)] = FNMS(KP951056516, T1u, T1t); + ro[WS(os, 5)] = FMA(KP951056516, T1s, TP); + ro[WS(os, 20)] = FNMS(KP951056516, T1s, TP); + T1H = FNMS(KP250000000, T1G, T1D); + io[0] = T1D + T1G; + T1J = FMA(KP559016994, T1I, T1H); + T1N = FNMS(KP559016994, T1I, T1H); + T1M = FMA(KP618033988, T1L, T1K); + T1O = FNMS(KP618033988, T1K, T1L); + { + E T1V, T3f, T3m, T3n, T2W, T2Y, T32, T3g, T3h, T35, T3i, T2X; + T3H = FMA(KP951056516, T1U, T1R); + T1V = FNMS(KP951056516, T1U, T1R); + T3f = FMA(KP951056516, T3e, T3b); + T3r = FNMS(KP951056516, T3e, T3b); + io[WS(os, 15)] = FNMS(KP951056516, T1O, T1N); + io[WS(os, 10)] = FMA(KP951056516, T1O, T1N); + io[WS(os, 20)] = FMA(KP951056516, T1M, T1J); + io[WS(os, 5)] = FNMS(KP951056516, T1M, T1J); + { + E T30, T2a, T2p, T31, T33, T2F, T2U, T34, T2q, T2V; + T30 = FMA(KP256756360, T22, T29); + T2a = FNMS(KP256756360, T29, T22); + T2p = FMA(KP634619297, T2o, T2h); + T31 = FNMS(KP634619297, T2h, T2o); + T33 = FMA(KP549754652, T2x, T2E); + T2F = FNMS(KP549754652, T2E, T2x); + T2U = FNMS(KP939062505, T2T, T2M); + T34 = FMA(KP939062505, T2M, T2T); + T3m = FNMS(KP871714437, T2p, T2a); + T2q = FMA(KP871714437, T2p, T2a); + T3n = FNMS(KP831864738, T2U, T2F); + T2V = FMA(KP831864738, T2U, T2F); + T2W = FMA(KP904730450, T2V, T2q); + T2Y = FNMS(KP904730450, T2V, T2q); + T32 = FNMS(KP871714437, T31, T30); + T3g = FMA(KP871714437, T31, T30); + T3h = FMA(KP831864738, T34, T33); + T35 = FNMS(KP831864738, T34, T33); + } + io[WS(os, 1)] = FMA(KP968583161, T2W, T1V); + T3i = FMA(KP904730450, T3h, T3g); + T3k = FNMS(KP904730450, T3h, T3g); + T36 = FMA(KP559154169, T35, T32); + T38 = FNMS(KP683113946, T32, T35); + ro[WS(os, 1)] = FMA(KP968583161, T3i, T3f); + T2X = FNMS(KP242145790, T2W, T1V); + T3o = FMA(KP559154169, T3n, T3m); + T3q = FNMS(KP683113946, T3m, T3n); + T3j = FNMS(KP242145790, T3i, T3f); + T2Z = FMA(KP541454447, T2Y, T2X); + T37 = FNMS(KP541454447, T2Y, T2X); + } + } + } + { + E T47, T4R, T5A, T5w, T5y, T5E, T5G, T5z, T5t, T5x; + { + E T53, T5j, T5u, T5v, T5i, T5D, T5m, T5p, T5C, T3p, T3l, T5s, T5q, T5r; + T47 = FMA(KP951056516, T46, T45); + T53 = FNMS(KP951056516, T46, T45); + T3p = FNMS(KP541454447, T3k, T3j); + T3l = FMA(KP541454447, T3k, T3j); + io[WS(os, 16)] = FNMS(KP833417178, T38, T37); + io[WS(os, 11)] = FMA(KP833417178, T38, T37); + io[WS(os, 21)] = FMA(KP921177326, T36, T2Z); + io[WS(os, 6)] = FNMS(KP921177326, T36, T2Z); + ro[WS(os, 11)] = FNMS(KP833417178, T3q, T3p); + ro[WS(os, 16)] = FMA(KP833417178, T3q, T3p); + ro[WS(os, 21)] = FNMS(KP921177326, T3o, T3l); + ro[WS(os, 6)] = FMA(KP921177326, T3o, T3l); + T5j = FMA(KP951056516, T4Q, T4P); + T4R = FNMS(KP951056516, T4Q, T4P); + { + E T5k, T56, T59, T5l, T5n, T5d, T5g, T5o, T5a, T5h; + T5k = FNMS(KP062914667, T54, T55); + T56 = FMA(KP062914667, T55, T54); + T59 = FMA(KP634619297, T58, T57); + T5l = FNMS(KP634619297, T57, T58); + T5n = FNMS(KP470564281, T5b, T5c); + T5d = FMA(KP470564281, T5c, T5b); + T5g = FMA(KP549754652, T5f, T5e); + T5o = FNMS(KP549754652, T5e, T5f); + T5u = FNMS(KP845997307, T59, T56); + T5a = FMA(KP845997307, T59, T56); + T5v = FNMS(KP968479752, T5g, T5d); + T5h = FMA(KP968479752, T5g, T5d); + T5i = FMA(KP906616052, T5h, T5a); + T5A = FNMS(KP906616052, T5h, T5a); + T5D = FNMS(KP845997307, T5l, T5k); + T5m = FMA(KP845997307, T5l, T5k); + T5p = FMA(KP968479752, T5o, T5n); + T5C = FNMS(KP968479752, T5o, T5n); + } + ro[WS(os, 2)] = FMA(KP998026728, T5i, T53); + T5s = FMA(KP906616052, T5p, T5m); + T5q = FNMS(KP906616052, T5p, T5m); + T5w = FNMS(KP560319534, T5v, T5u); + T5y = FMA(KP681693190, T5u, T5v); + T5E = FNMS(KP681693190, T5D, T5C); + T5G = FMA(KP560319534, T5C, T5D); + T5r = FMA(KP249506682, T5q, T5j); + io[WS(os, 2)] = FNMS(KP998026728, T5q, T5j); + T5z = FNMS(KP249506682, T5i, T53); + T5t = FNMS(KP557913902, T5s, T5r); + T5x = FMA(KP557913902, T5s, T5r); + } + { + E T4W, T4M, T4O, T50, T52, T4V, T4F, T4N; + { + E T4Y, T4Z, T4C, T4E, T4I, T4T, T4S, T4L, T5F, T5B, T4U, T4D; + T5F = FMA(KP557913902, T5A, T5z); + T5B = FNMS(KP557913902, T5A, T5z); + io[WS(os, 7)] = FMA(KP860541664, T5y, T5x); + io[WS(os, 22)] = FNMS(KP860541664, T5y, T5x); + io[WS(os, 17)] = FMA(KP949179823, T5w, T5t); + io[WS(os, 12)] = FNMS(KP949179823, T5w, T5t); + ro[WS(os, 12)] = FNMS(KP949179823, T5G, T5F); + ro[WS(os, 17)] = FMA(KP949179823, T5G, T5F); + ro[WS(os, 7)] = FNMS(KP860541664, T5E, T5B); + ro[WS(os, 22)] = FMA(KP860541664, T5E, T5B); + { + E T4J, T4e, T4l, T4K, T4G, T4t, T4A, T4H, T4m, T4B; + T4J = FNMS(KP062914667, T4a, T4d); + T4e = FMA(KP062914667, T4d, T4a); + T4l = FNMS(KP827271945, T4k, T4h); + T4K = FMA(KP827271945, T4h, T4k); + T4G = FNMS(KP126329378, T4p, T4s); + T4t = FMA(KP126329378, T4s, T4p); + T4A = FMA(KP939062505, T4z, T4w); + T4H = FNMS(KP939062505, T4w, T4z); + T4Y = FNMS(KP772036680, T4l, T4e); + T4m = FMA(KP772036680, T4l, T4e); + T4Z = FNMS(KP734762448, T4A, T4t); + T4B = FMA(KP734762448, T4A, T4t); + T4C = FMA(KP994076283, T4B, T4m); + T4E = FNMS(KP994076283, T4B, T4m); + T4I = FMA(KP734762448, T4H, T4G); + T4T = FNMS(KP734762448, T4H, T4G); + T4S = FMA(KP772036680, T4K, T4J); + T4L = FNMS(KP772036680, T4K, T4J); + } + ro[WS(os, 3)] = FMA(KP998026728, T4C, T47); + T4U = FMA(KP994076283, T4T, T4S); + T4W = FNMS(KP994076283, T4T, T4S); + T4M = FNMS(KP621716863, T4L, T4I); + T4O = FMA(KP614372930, T4I, T4L); + io[WS(os, 3)] = FNMS(KP998026728, T4U, T4R); + T4D = FNMS(KP249506682, T4C, T47); + T50 = FMA(KP614372930, T4Z, T4Y); + T52 = FNMS(KP621716863, T4Y, T4Z); + T4V = FMA(KP249506682, T4U, T4R); + T4F = FNMS(KP557913902, T4E, T4D); + T4N = FMA(KP557913902, T4E, T4D); + } + { + E T3S, T3T, T3G, T41, T3K, T3N, T40, T51, T4X, T3Q, T3O, T3P; + T51 = FMA(KP557913902, T4W, T4V); + T4X = FNMS(KP557913902, T4W, T4V); + ro[WS(os, 18)] = FNMS(KP949179823, T4O, T4N); + ro[WS(os, 13)] = FMA(KP949179823, T4O, T4N); + ro[WS(os, 8)] = FMA(KP943557151, T4M, T4F); + ro[WS(os, 23)] = FNMS(KP943557151, T4M, T4F); + io[WS(os, 8)] = FMA(KP943557151, T52, T51); + io[WS(os, 23)] = FNMS(KP943557151, T52, T51); + io[WS(os, 18)] = FNMS(KP949179823, T50, T4X); + io[WS(os, 13)] = FMA(KP949179823, T50, T4X); + { + E T3I, T3u, T3x, T3J, T3L, T3B, T3E, T3M, T3y, T3F; + T3I = FMA(KP126329378, T3s, T3t); + T3u = FNMS(KP126329378, T3t, T3s); + T3x = FNMS(KP470564281, T3w, T3v); + T3J = FMA(KP470564281, T3v, T3w); + T3L = FNMS(KP634619297, T3z, T3A); + T3B = FMA(KP634619297, T3A, T3z); + T3E = FNMS(KP827271945, T3D, T3C); + T3M = FMA(KP827271945, T3C, T3D); + T3S = FMA(KP912018591, T3x, T3u); + T3y = FNMS(KP912018591, T3x, T3u); + T3T = FMA(KP912575812, T3E, T3B); + T3F = FNMS(KP912575812, T3E, T3B); + T3G = FNMS(KP851038619, T3F, T3y); + T3Y = FMA(KP851038619, T3F, T3y); + T41 = FNMS(KP912018591, T3J, T3I); + T3K = FMA(KP912018591, T3J, T3I); + T3N = FMA(KP912575812, T3M, T3L); + T40 = FNMS(KP912575812, T3M, T3L); + } + ro[WS(os, 4)] = FNMS(KP992114701, T3G, T3r); + T3Q = FNMS(KP851038619, T3N, T3K); + T3O = FMA(KP851038619, T3N, T3K); + T3U = FNMS(KP525970792, T3T, T3S); + T3W = FMA(KP726211448, T3S, T3T); + T42 = FNMS(KP726211448, T41, T40); + T44 = FMA(KP525970792, T40, T41); + T3P = FMA(KP248028675, T3O, T3H); + io[WS(os, 4)] = FNMS(KP992114701, T3O, T3H); + T3X = FMA(KP248028675, T3G, T3r); + T3R = FNMS(KP554608978, T3Q, T3P); + T3V = FMA(KP554608978, T3Q, T3P); + } + } + } + } + } + T3Z = FMA(KP554608978, T3Y, T3X); + T43 = FNMS(KP554608978, T3Y, T3X); + io[WS(os, 9)] = FNMS(KP803003575, T3W, T3V); + io[WS(os, 24)] = FMA(KP803003575, T3W, T3V); + io[WS(os, 19)] = FNMS(KP943557151, T3U, T3R); + io[WS(os, 14)] = FMA(KP943557151, T3U, T3R); + ro[WS(os, 14)] = FNMS(KP943557151, T44, T43); + ro[WS(os, 19)] = FMA(KP943557151, T44, T43); + ro[WS(os, 24)] = FMA(KP803003575, T42, T3Z); + ro[WS(os, 9)] = FNMS(KP803003575, T42, T3Z); + } + } +} + +static const kdft_desc desc = { 25, "n1_25", {84, 0, 268, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_25) (planner *p) { + X(kdft_register) (p, n1_25, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 25 -name n1_25 -include n.h */ + +/* + * This function contains 352 FP additions, 184 FP multiplications, + * (or, 260 additions, 92 multiplications, 92 fused multiply/add), + * 101 stack variables, 20 constants, and 100 memory accesses + */ +#include "n.h" + +static void n1_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP425779291, +0.425779291565072648862502445744251703979973042); + DK(KP904827052, +0.904827052466019527713668647932697593970413911); + DK(KP637423989, +0.637423989748689710176712811676016195434917298); + DK(KP770513242, +0.770513242775789230803009636396177847271667672); + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP062790519, +0.062790519529313376076178224565631133122484832); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP125333233, +0.125333233564304245373118759816508793942918247); + DK(KP684547105, +0.684547105928688673732283357621209269889519233); + DK(KP728968627, +0.728968627421411523146730319055259111372571664); + DK(KP481753674, +0.481753674101715274987191502872129653528542010); + DK(KP876306680, +0.876306680043863587308115903922062583399064238); + DK(KP844327925, +0.844327925502015078548558063966681505381659241); + DK(KP535826794, +0.535826794978996618271308767867639978063575346); + DK(KP248689887, +0.248689887164854788242283746006447968417567406); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(100, is), MAKE_VOLATILE_STRIDE(100, os)) { + E T9, T4u, T2T, TP, T3H, TW, T5y, T3I, T2Q, T4v, Ti, Tr, Ts, T5m, T5n; + E T5v, T18, T4G, T34, T3M, T1G, T4J, T38, T3T, T1v, T4K, T37, T3W, T1j, T4H; + E T35, T3P, TB, TK, TL, T5p, T5q, T5w, T1T, T4N, T3c, T41, T2r, T4Q, T3e; + E T4b, T2g, T4R, T3f, T48, T24, T4O, T3b, T44; + { + E T1, T4, T7, T8, T2S, T2R, TN, TO; + T1 = ri[0]; + { + E T2, T3, T5, T6; + T2 = ri[WS(is, 5)]; + T3 = ri[WS(is, 20)]; + T4 = T2 + T3; + T5 = ri[WS(is, 10)]; + T6 = ri[WS(is, 15)]; + T7 = T5 + T6; + T8 = T4 + T7; + T2S = T5 - T6; + T2R = T2 - T3; + } + T9 = T1 + T8; + T4u = FNMS(KP587785252, T2R, KP951056516 * T2S); + T2T = FMA(KP951056516, T2R, KP587785252 * T2S); + TN = KP559016994 * (T4 - T7); + TO = FNMS(KP250000000, T8, T1); + TP = TN + TO; + T3H = TO - TN; + } + { + E T2N, T2K, T2L, TS, T2O, TV, T2M, T2P; + T2N = ii[0]; + { + E TQ, TR, TT, TU; + TQ = ii[WS(is, 5)]; + TR = ii[WS(is, 20)]; + T2K = TQ + TR; + TT = ii[WS(is, 10)]; + TU = ii[WS(is, 15)]; + T2L = TT + TU; + TS = TQ - TR; + T2O = T2K + T2L; + TV = TT - TU; + } + TW = FMA(KP951056516, TS, KP587785252 * TV); + T5y = T2N + T2O; + T3I = FNMS(KP587785252, TS, KP951056516 * TV); + T2M = KP559016994 * (T2K - T2L); + T2P = FNMS(KP250000000, T2O, T2N); + T2Q = T2M + T2P; + T4v = T2P - T2M; + } + { + E Ta, T1c, Tj, T1z, Th, T1h, TY, T1g, T13, T1d, T16, T1b, Tq, T1E, T1l; + E T1D, T1q, T1A, T1t, T1y; + Ta = ri[WS(is, 1)]; + T1c = ii[WS(is, 1)]; + Tj = ri[WS(is, 4)]; + T1z = ii[WS(is, 4)]; + { + E Tb, Tc, Td, Te, Tf, Tg; + Tb = ri[WS(is, 6)]; + Tc = ri[WS(is, 21)]; + Td = Tb + Tc; + Te = ri[WS(is, 11)]; + Tf = ri[WS(is, 16)]; + Tg = Te + Tf; + Th = Td + Tg; + T1h = Te - Tf; + TY = KP559016994 * (Td - Tg); + T1g = Tb - Tc; + } + { + E T11, T12, T19, T14, T15, T1a; + T11 = ii[WS(is, 6)]; + T12 = ii[WS(is, 21)]; + T19 = T11 + T12; + T14 = ii[WS(is, 11)]; + T15 = ii[WS(is, 16)]; + T1a = T14 + T15; + T13 = T11 - T12; + T1d = T19 + T1a; + T16 = T14 - T15; + T1b = KP559016994 * (T19 - T1a); + } + { + E Tk, Tl, Tm, Tn, To, Tp; + Tk = ri[WS(is, 9)]; + Tl = ri[WS(is, 24)]; + Tm = Tk + Tl; + Tn = ri[WS(is, 14)]; + To = ri[WS(is, 19)]; + Tp = Tn + To; + Tq = Tm + Tp; + T1E = Tn - To; + T1l = KP559016994 * (Tm - Tp); + T1D = Tk - Tl; + } + { + E T1o, T1p, T1w, T1r, T1s, T1x; + T1o = ii[WS(is, 9)]; + T1p = ii[WS(is, 24)]; + T1w = T1o + T1p; + T1r = ii[WS(is, 14)]; + T1s = ii[WS(is, 19)]; + T1x = T1r + T1s; + T1q = T1o - T1p; + T1A = T1w + T1x; + T1t = T1r - T1s; + T1y = KP559016994 * (T1w - T1x); + } + Ti = Ta + Th; + Tr = Tj + Tq; + Ts = Ti + Tr; + T5m = T1c + T1d; + T5n = T1z + T1A; + T5v = T5m + T5n; + { + E T17, T3L, T10, T3K, TZ; + T17 = FMA(KP951056516, T13, KP587785252 * T16); + T3L = FNMS(KP587785252, T13, KP951056516 * T16); + TZ = FNMS(KP250000000, Th, Ta); + T10 = TY + TZ; + T3K = TZ - TY; + T18 = T10 + T17; + T4G = T3K + T3L; + T34 = T10 - T17; + T3M = T3K - T3L; + } + { + E T1F, T3R, T1C, T3S, T1B; + T1F = FMA(KP951056516, T1D, KP587785252 * T1E); + T3R = FNMS(KP587785252, T1D, KP951056516 * T1E); + T1B = FNMS(KP250000000, T1A, T1z); + T1C = T1y + T1B; + T3S = T1B - T1y; + T1G = T1C - T1F; + T4J = T3S - T3R; + T38 = T1F + T1C; + T3T = T3R + T3S; + } + { + E T1u, T3V, T1n, T3U, T1m; + T1u = FMA(KP951056516, T1q, KP587785252 * T1t); + T3V = FNMS(KP587785252, T1q, KP951056516 * T1t); + T1m = FNMS(KP250000000, Tq, Tj); + T1n = T1l + T1m; + T3U = T1m - T1l; + T1v = T1n + T1u; + T4K = T3U + T3V; + T37 = T1n - T1u; + T3W = T3U - T3V; + } + { + E T1i, T3N, T1f, T3O, T1e; + T1i = FMA(KP951056516, T1g, KP587785252 * T1h); + T3N = FNMS(KP587785252, T1g, KP951056516 * T1h); + T1e = FNMS(KP250000000, T1d, T1c); + T1f = T1b + T1e; + T3O = T1e - T1b; + T1j = T1f - T1i; + T4H = T3O - T3N; + T35 = T1i + T1f; + T3P = T3N + T3O; + } + } + { + E Tt, T1X, TC, T2k, TA, T22, T1J, T21, T1O, T1Y, T1R, T1W, TJ, T2p, T26; + E T2o, T2b, T2l, T2e, T2j; + Tt = ri[WS(is, 2)]; + T1X = ii[WS(is, 2)]; + TC = ri[WS(is, 3)]; + T2k = ii[WS(is, 3)]; + { + E Tu, Tv, Tw, Tx, Ty, Tz; + Tu = ri[WS(is, 7)]; + Tv = ri[WS(is, 22)]; + Tw = Tu + Tv; + Tx = ri[WS(is, 12)]; + Ty = ri[WS(is, 17)]; + Tz = Tx + Ty; + TA = Tw + Tz; + T22 = Tx - Ty; + T1J = KP559016994 * (Tw - Tz); + T21 = Tu - Tv; + } + { + E T1M, T1N, T1U, T1P, T1Q, T1V; + T1M = ii[WS(is, 7)]; + T1N = ii[WS(is, 22)]; + T1U = T1M + T1N; + T1P = ii[WS(is, 12)]; + T1Q = ii[WS(is, 17)]; + T1V = T1P + T1Q; + T1O = T1M - T1N; + T1Y = T1U + T1V; + T1R = T1P - T1Q; + T1W = KP559016994 * (T1U - T1V); + } + { + E TD, TE, TF, TG, TH, TI; + TD = ri[WS(is, 8)]; + TE = ri[WS(is, 23)]; + TF = TD + TE; + TG = ri[WS(is, 13)]; + TH = ri[WS(is, 18)]; + TI = TG + TH; + TJ = TF + TI; + T2p = TG - TH; + T26 = KP559016994 * (TF - TI); + T2o = TD - TE; + } + { + E T29, T2a, T2h, T2c, T2d, T2i; + T29 = ii[WS(is, 8)]; + T2a = ii[WS(is, 23)]; + T2h = T29 + T2a; + T2c = ii[WS(is, 13)]; + T2d = ii[WS(is, 18)]; + T2i = T2c + T2d; + T2b = T29 - T2a; + T2l = T2h + T2i; + T2e = T2c - T2d; + T2j = KP559016994 * (T2h - T2i); + } + TB = Tt + TA; + TK = TC + TJ; + TL = TB + TK; + T5p = T1X + T1Y; + T5q = T2k + T2l; + T5w = T5p + T5q; + { + E T1S, T40, T1L, T3Z, T1K; + T1S = FMA(KP951056516, T1O, KP587785252 * T1R); + T40 = FNMS(KP587785252, T1O, KP951056516 * T1R); + T1K = FNMS(KP250000000, TA, Tt); + T1L = T1J + T1K; + T3Z = T1K - T1J; + T1T = T1L + T1S; + T4N = T3Z + T40; + T3c = T1L - T1S; + T41 = T3Z - T40; + } + { + E T2q, T49, T2n, T4a, T2m; + T2q = FMA(KP951056516, T2o, KP587785252 * T2p); + T49 = FNMS(KP587785252, T2o, KP951056516 * T2p); + T2m = FNMS(KP250000000, T2l, T2k); + T2n = T2j + T2m; + T4a = T2m - T2j; + T2r = T2n - T2q; + T4Q = T4a - T49; + T3e = T2q + T2n; + T4b = T49 + T4a; + } + { + E T2f, T47, T28, T46, T27; + T2f = FMA(KP951056516, T2b, KP587785252 * T2e); + T47 = FNMS(KP587785252, T2b, KP951056516 * T2e); + T27 = FNMS(KP250000000, TJ, TC); + T28 = T26 + T27; + T46 = T27 - T26; + T2g = T28 + T2f; + T4R = T46 + T47; + T3f = T28 - T2f; + T48 = T46 - T47; + } + { + E T23, T42, T20, T43, T1Z; + T23 = FMA(KP951056516, T21, KP587785252 * T22); + T42 = FNMS(KP587785252, T21, KP951056516 * T22); + T1Z = FNMS(KP250000000, T1Y, T1X); + T20 = T1W + T1Z; + T43 = T1Z - T1W; + T24 = T20 - T23; + T4O = T43 - T42; + T3b = T23 + T20; + T44 = T42 + T43; + } + } + { + E T5j, TM, T5k, T5s, T5u, T5o, T5r, T5t, T5l; + T5j = KP559016994 * (Ts - TL); + TM = Ts + TL; + T5k = FNMS(KP250000000, TM, T9); + T5o = T5m - T5n; + T5r = T5p - T5q; + T5s = FMA(KP951056516, T5o, KP587785252 * T5r); + T5u = FNMS(KP587785252, T5o, KP951056516 * T5r); + ro[0] = T9 + TM; + T5t = T5k - T5j; + ro[WS(os, 10)] = T5t - T5u; + ro[WS(os, 15)] = T5t + T5u; + T5l = T5j + T5k; + ro[WS(os, 20)] = T5l - T5s; + ro[WS(os, 5)] = T5l + T5s; + } + { + E T5x, T5z, T5A, T5E, T5F, T5C, T5D, T5G, T5B; + T5x = KP559016994 * (T5v - T5w); + T5z = T5v + T5w; + T5A = FNMS(KP250000000, T5z, T5y); + T5C = Ti - Tr; + T5D = TB - TK; + T5E = FMA(KP951056516, T5C, KP587785252 * T5D); + T5F = FNMS(KP587785252, T5C, KP951056516 * T5D); + io[0] = T5y + T5z; + T5G = T5A - T5x; + io[WS(os, 10)] = T5F + T5G; + io[WS(os, 15)] = T5G - T5F; + T5B = T5x + T5A; + io[WS(os, 5)] = T5B - T5E; + io[WS(os, 20)] = T5E + T5B; + } + { + E TX, T2U, T2u, T2Z, T2v, T2Y, T2A, T2V, T2D, T2J; + TX = TP + TW; + T2U = T2Q - T2T; + { + E T1k, T1H, T1I, T25, T2s, T2t; + T1k = FMA(KP968583161, T18, KP248689887 * T1j); + T1H = FMA(KP535826794, T1v, KP844327925 * T1G); + T1I = T1k + T1H; + T25 = FMA(KP876306680, T1T, KP481753674 * T24); + T2s = FMA(KP728968627, T2g, KP684547105 * T2r); + T2t = T25 + T2s; + T2u = T1I + T2t; + T2Z = T25 - T2s; + T2v = KP559016994 * (T1I - T2t); + T2Y = T1k - T1H; + } + { + E T2y, T2z, T2H, T2B, T2C, T2I; + T2y = FNMS(KP248689887, T18, KP968583161 * T1j); + T2z = FNMS(KP844327925, T1v, KP535826794 * T1G); + T2H = T2y + T2z; + T2B = FNMS(KP481753674, T1T, KP876306680 * T24); + T2C = FNMS(KP684547105, T2g, KP728968627 * T2r); + T2I = T2B + T2C; + T2A = T2y - T2z; + T2V = T2H + T2I; + T2D = T2B - T2C; + T2J = KP559016994 * (T2H - T2I); + } + ro[WS(os, 1)] = TX + T2u; + io[WS(os, 1)] = T2U + T2V; + { + E T2E, T2G, T2x, T2F, T2w; + T2E = FMA(KP951056516, T2A, KP587785252 * T2D); + T2G = FNMS(KP587785252, T2A, KP951056516 * T2D); + T2w = FNMS(KP250000000, T2u, TX); + T2x = T2v + T2w; + T2F = T2w - T2v; + ro[WS(os, 21)] = T2x - T2E; + ro[WS(os, 16)] = T2F + T2G; + ro[WS(os, 6)] = T2x + T2E; + ro[WS(os, 11)] = T2F - T2G; + } + { + E T30, T31, T2X, T32, T2W; + T30 = FMA(KP951056516, T2Y, KP587785252 * T2Z); + T31 = FNMS(KP587785252, T2Y, KP951056516 * T2Z); + T2W = FNMS(KP250000000, T2V, T2U); + T2X = T2J + T2W; + T32 = T2W - T2J; + io[WS(os, 6)] = T2X - T30; + io[WS(os, 16)] = T32 - T31; + io[WS(os, 21)] = T30 + T2X; + io[WS(os, 11)] = T31 + T32; + } + } + { + E T4F, T52, T4U, T5b, T56, T57, T51, T5f, T53, T5e; + T4F = T3H + T3I; + T52 = T4v - T4u; + { + E T4I, T4L, T4M, T4P, T4S, T4T; + T4I = FMA(KP728968627, T4G, KP684547105 * T4H); + T4L = FNMS(KP992114701, T4K, KP125333233 * T4J); + T4M = T4I + T4L; + T4P = FMA(KP062790519, T4N, KP998026728 * T4O); + T4S = FNMS(KP637423989, T4R, KP770513242 * T4Q); + T4T = T4P + T4S; + T4U = T4M + T4T; + T5b = KP559016994 * (T4M - T4T); + T56 = T4I - T4L; + T57 = T4P - T4S; + } + { + E T4V, T4W, T4X, T4Y, T4Z, T50; + T4V = FNMS(KP684547105, T4G, KP728968627 * T4H); + T4W = FMA(KP125333233, T4K, KP992114701 * T4J); + T4X = T4V - T4W; + T4Y = FNMS(KP998026728, T4N, KP062790519 * T4O); + T4Z = FMA(KP770513242, T4R, KP637423989 * T4Q); + T50 = T4Y - T4Z; + T51 = KP559016994 * (T4X - T50); + T5f = T4Y + T4Z; + T53 = T4X + T50; + T5e = T4V + T4W; + } + ro[WS(os, 3)] = T4F + T4U; + io[WS(os, 3)] = T52 + T53; + { + E T58, T59, T55, T5a, T54; + T58 = FMA(KP951056516, T56, KP587785252 * T57); + T59 = FNMS(KP587785252, T56, KP951056516 * T57); + T54 = FNMS(KP250000000, T53, T52); + T55 = T51 + T54; + T5a = T54 - T51; + io[WS(os, 8)] = T55 - T58; + io[WS(os, 18)] = T5a - T59; + io[WS(os, 23)] = T58 + T55; + io[WS(os, 13)] = T59 + T5a; + } + { + E T5g, T5i, T5d, T5h, T5c; + T5g = FMA(KP951056516, T5e, KP587785252 * T5f); + T5i = FNMS(KP587785252, T5e, KP951056516 * T5f); + T5c = FNMS(KP250000000, T4U, T4F); + T5d = T5b + T5c; + T5h = T5c - T5b; + ro[WS(os, 23)] = T5d - T5g; + ro[WS(os, 18)] = T5h + T5i; + ro[WS(os, 8)] = T5d + T5g; + ro[WS(os, 13)] = T5h - T5i; + } + } + { + E T3J, T4w, T4e, T4B, T4f, T4A, T4k, T4x, T4n, T4t; + T3J = T3H - T3I; + T4w = T4u + T4v; + { + E T3Q, T3X, T3Y, T45, T4c, T4d; + T3Q = FMA(KP876306680, T3M, KP481753674 * T3P); + T3X = FNMS(KP425779291, T3W, KP904827052 * T3T); + T3Y = T3Q + T3X; + T45 = FMA(KP535826794, T41, KP844327925 * T44); + T4c = FMA(KP062790519, T48, KP998026728 * T4b); + T4d = T45 + T4c; + T4e = T3Y + T4d; + T4B = T45 - T4c; + T4f = KP559016994 * (T3Y - T4d); + T4A = T3Q - T3X; + } + { + E T4i, T4j, T4r, T4l, T4m, T4s; + T4i = FNMS(KP481753674, T3M, KP876306680 * T3P); + T4j = FMA(KP904827052, T3W, KP425779291 * T3T); + T4r = T4i - T4j; + T4l = FNMS(KP844327925, T41, KP535826794 * T44); + T4m = FNMS(KP998026728, T48, KP062790519 * T4b); + T4s = T4l + T4m; + T4k = T4i + T4j; + T4x = T4r + T4s; + T4n = T4l - T4m; + T4t = KP559016994 * (T4r - T4s); + } + ro[WS(os, 2)] = T3J + T4e; + io[WS(os, 2)] = T4w + T4x; + { + E T4o, T4q, T4h, T4p, T4g; + T4o = FMA(KP951056516, T4k, KP587785252 * T4n); + T4q = FNMS(KP587785252, T4k, KP951056516 * T4n); + T4g = FNMS(KP250000000, T4e, T3J); + T4h = T4f + T4g; + T4p = T4g - T4f; + ro[WS(os, 22)] = T4h - T4o; + ro[WS(os, 17)] = T4p + T4q; + ro[WS(os, 7)] = T4h + T4o; + ro[WS(os, 12)] = T4p - T4q; + } + { + E T4C, T4D, T4z, T4E, T4y; + T4C = FMA(KP951056516, T4A, KP587785252 * T4B); + T4D = FNMS(KP587785252, T4A, KP951056516 * T4B); + T4y = FNMS(KP250000000, T4x, T4w); + T4z = T4t + T4y; + T4E = T4y - T4t; + io[WS(os, 7)] = T4z - T4C; + io[WS(os, 17)] = T4E - T4D; + io[WS(os, 22)] = T4C + T4z; + io[WS(os, 12)] = T4D + T4E; + } + } + { + E T33, T3j, T3i, T3z, T3r, T3s, T3q, T3D, T3v, T3C; + T33 = TP - TW; + T3j = T2T + T2Q; + { + E T36, T39, T3a, T3d, T3g, T3h; + T36 = FMA(KP535826794, T34, KP844327925 * T35); + T39 = FMA(KP637423989, T37, KP770513242 * T38); + T3a = T36 - T39; + T3d = FNMS(KP425779291, T3c, KP904827052 * T3b); + T3g = FNMS(KP992114701, T3f, KP125333233 * T3e); + T3h = T3d + T3g; + T3i = T3a + T3h; + T3z = KP559016994 * (T3a - T3h); + T3r = T3d - T3g; + T3s = T36 + T39; + } + { + E T3k, T3l, T3m, T3n, T3o, T3p; + T3k = FNMS(KP844327925, T34, KP535826794 * T35); + T3l = FNMS(KP637423989, T38, KP770513242 * T37); + T3m = T3k + T3l; + T3n = FMA(KP904827052, T3c, KP425779291 * T3b); + T3o = FMA(KP125333233, T3f, KP992114701 * T3e); + T3p = T3n + T3o; + T3q = T3m - T3p; + T3D = T3o - T3n; + T3v = KP559016994 * (T3m + T3p); + T3C = T3k - T3l; + } + ro[WS(os, 4)] = T33 + T3i; + io[WS(os, 4)] = T3j + T3q; + { + E T3t, T3y, T3w, T3x, T3u; + T3t = FNMS(KP587785252, T3s, KP951056516 * T3r); + T3y = FMA(KP951056516, T3s, KP587785252 * T3r); + T3u = FNMS(KP250000000, T3q, T3j); + T3w = T3u - T3v; + T3x = T3u + T3v; + io[WS(os, 14)] = T3t + T3w; + io[WS(os, 24)] = T3y + T3x; + io[WS(os, 19)] = T3w - T3t; + io[WS(os, 9)] = T3x - T3y; + } + { + E T3E, T3G, T3B, T3F, T3A; + T3E = FMA(KP951056516, T3C, KP587785252 * T3D); + T3G = FNMS(KP587785252, T3C, KP951056516 * T3D); + T3A = FNMS(KP250000000, T3i, T33); + T3B = T3z + T3A; + T3F = T3A - T3z; + ro[WS(os, 24)] = T3B - T3E; + ro[WS(os, 19)] = T3F + T3G; + ro[WS(os, 9)] = T3B + T3E; + ro[WS(os, 14)] = T3F - T3G; + } + } + } + } +} + +static const kdft_desc desc = { 25, "n1_25", {260, 92, 92, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_25) (planner *p) { + X(kdft_register) (p, n1_25, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 3 -name n1_3 -include n.h */ + +/* + * This function contains 12 FP additions, 6 FP multiplications, + * (or, 6 additions, 0 multiplications, 6 fused multiply/add), + * 15 stack variables, 2 constants, and 12 memory accesses + */ +#include "n.h" + +static void n1_3(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(12, is), MAKE_VOLATILE_STRIDE(12, os)) { + E T1, T9, T2, T3, T6, T7; + T1 = ri[0]; + T9 = ii[0]; + T2 = ri[WS(is, 1)]; + T3 = ri[WS(is, 2)]; + T6 = ii[WS(is, 1)]; + T7 = ii[WS(is, 2)]; + { + E T4, Tc, T8, Ta, T5, Tb; + T4 = T2 + T3; + Tc = T3 - T2; + T8 = T6 - T7; + Ta = T6 + T7; + T5 = FNMS(KP500000000, T4, T1); + ro[0] = T1 + T4; + Tb = FNMS(KP500000000, Ta, T9); + io[0] = T9 + Ta; + ro[WS(os, 1)] = FMA(KP866025403, T8, T5); + ro[WS(os, 2)] = FNMS(KP866025403, T8, T5); + io[WS(os, 2)] = FNMS(KP866025403, Tc, Tb); + io[WS(os, 1)] = FMA(KP866025403, Tc, Tb); + } + } + } +} + +static const kdft_desc desc = { 3, "n1_3", {6, 0, 6, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_3) (planner *p) { + X(kdft_register) (p, n1_3, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 3 -name n1_3 -include n.h */ + +/* + * This function contains 12 FP additions, 4 FP multiplications, + * (or, 10 additions, 2 multiplications, 2 fused multiply/add), + * 15 stack variables, 2 constants, and 12 memory accesses + */ +#include "n.h" + +static void n1_3(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(12, is), MAKE_VOLATILE_STRIDE(12, os)) { + E T1, Ta, T4, T9, T8, Tb, T5, Tc; + T1 = ri[0]; + Ta = ii[0]; + { + E T2, T3, T6, T7; + T2 = ri[WS(is, 1)]; + T3 = ri[WS(is, 2)]; + T4 = T2 + T3; + T9 = KP866025403 * (T3 - T2); + T6 = ii[WS(is, 1)]; + T7 = ii[WS(is, 2)]; + T8 = KP866025403 * (T6 - T7); + Tb = T6 + T7; + } + ro[0] = T1 + T4; + io[0] = Ta + Tb; + T5 = FNMS(KP500000000, T4, T1); + ro[WS(os, 2)] = T5 - T8; + ro[WS(os, 1)] = T5 + T8; + Tc = FNMS(KP500000000, Tb, Ta); + io[WS(os, 1)] = T9 + Tc; + io[WS(os, 2)] = Tc - T9; + } + } +} + +static const kdft_desc desc = { 3, "n1_3", {10, 2, 2, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_3) (planner *p) { + X(kdft_register) (p, n1_3, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1291 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:49 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -name n1_32 -include n.h */ + +/* + * This function contains 372 FP additions, 136 FP multiplications, + * (or, 236 additions, 0 multiplications, 136 fused multiply/add), + * 136 stack variables, 7 constants, and 128 memory accesses + */ +#include "n.h" + +static void n1_32(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(128, is), MAKE_VOLATILE_STRIDE(128, os)) { + E T3g, T3f, T3n, T3b, T3r, T3l, T3o, T3e, T3h, T3p; + { + E T2T, T3T, T4r, T7, T3t, T1z, T18, T4Z, Te, T50, T4s, T1f, T2W, T3u, T3U; + E T1G, Tm, T1n, T3X, T3y, T2Z, T1O, T53, T4w, Tt, T1u, T3W, T3B, T2Y, T1V; + E T52, T4z, T3O, T2t, T3L, T2K, T5F, TZ, T5I, T5X, T4R, T5k, T3M, T2E, T5j; + E T4W, T3P, T2N, T3H, T22, T3E, T2j, T4H, T4K, T5A, TK, T5D, T5W, T2k, T2l; + E T4G, T5h, T3F, T2d; + { + E Tj, T1L, Ti, T1I, T1j, Tk, T1k, T1l; + { + E T4, T1x, T3, T2R, T14, T5, T15, T16, T1C, T1F; + { + E T1, T2, T12, T13; + T1 = ri[0]; + T2 = ri[WS(is, 16)]; + T12 = ii[0]; + T13 = ii[WS(is, 16)]; + T4 = ri[WS(is, 8)]; + T1x = T1 - T2; + T3 = T1 + T2; + T2R = T12 - T13; + T14 = T12 + T13; + T5 = ri[WS(is, 24)]; + T15 = ii[WS(is, 8)]; + T16 = ii[WS(is, 24)]; + } + { + E Tb, T1A, Ta, T1B, T1b, Tc, T1c, T1d; + { + E T8, T9, T19, T1a; + T8 = ri[WS(is, 4)]; + { + E T2S, T6, T1y, T17; + T2S = T4 - T5; + T6 = T4 + T5; + T1y = T15 - T16; + T17 = T15 + T16; + T2T = T2R - T2S; + T3T = T2S + T2R; + T4r = T3 - T6; + T7 = T3 + T6; + T3t = T1x - T1y; + T1z = T1x + T1y; + T18 = T14 + T17; + T4Z = T14 - T17; + T9 = ri[WS(is, 20)]; + } + T19 = ii[WS(is, 4)]; + T1a = ii[WS(is, 20)]; + Tb = ri[WS(is, 28)]; + T1A = T8 - T9; + Ta = T8 + T9; + T1B = T19 - T1a; + T1b = T19 + T1a; + Tc = ri[WS(is, 12)]; + T1c = ii[WS(is, 28)]; + T1d = ii[WS(is, 12)]; + } + { + E T2U, T1D, Td, T1E, T1e, T2V; + T1C = T1A + T1B; + T2U = T1B - T1A; + T1D = Tb - Tc; + Td = Tb + Tc; + T1E = T1c - T1d; + T1e = T1c + T1d; + Te = Ta + Td; + T50 = Td - Ta; + T1F = T1D - T1E; + T2V = T1D + T1E; + T4s = T1b - T1e; + T1f = T1b + T1e; + T2W = T2U + T2V; + T3u = T2U - T2V; + } + } + { + E Tg, Th, T1h, T1i; + Tg = ri[WS(is, 2)]; + T3U = T1F - T1C; + T1G = T1C + T1F; + Th = ri[WS(is, 18)]; + T1h = ii[WS(is, 2)]; + T1i = ii[WS(is, 18)]; + Tj = ri[WS(is, 10)]; + T1L = Tg - Th; + Ti = Tg + Th; + T1I = T1h - T1i; + T1j = T1h + T1i; + Tk = ri[WS(is, 26)]; + T1k = ii[WS(is, 10)]; + T1l = ii[WS(is, 26)]; + } + } + { + E Tq, T1S, Tp, T1P, T1q, Tr, T1r, T1s; + { + E Tn, To, T1o, T1p, T1J, Tl; + Tn = ri[WS(is, 30)]; + T1J = Tj - Tk; + Tl = Tj + Tk; + { + E T1M, T1m, T3w, T1K; + T1M = T1k - T1l; + T1m = T1k + T1l; + T3w = T1J + T1I; + T1K = T1I - T1J; + { + E T4v, T3x, T1N, T4u; + T4v = Ti - Tl; + Tm = Ti + Tl; + T3x = T1L - T1M; + T1N = T1L + T1M; + T4u = T1j - T1m; + T1n = T1j + T1m; + T3X = FNMS(KP414213562, T3w, T3x); + T3y = FMA(KP414213562, T3x, T3w); + T2Z = FMA(KP414213562, T1K, T1N); + T1O = FNMS(KP414213562, T1N, T1K); + T53 = T4v + T4u; + T4w = T4u - T4v; + To = ri[WS(is, 14)]; + } + } + T1o = ii[WS(is, 30)]; + T1p = ii[WS(is, 14)]; + Tq = ri[WS(is, 6)]; + T1S = Tn - To; + Tp = Tn + To; + T1P = T1o - T1p; + T1q = T1o + T1p; + Tr = ri[WS(is, 22)]; + T1r = ii[WS(is, 6)]; + T1s = ii[WS(is, 22)]; + } + { + E T4S, T4V, T2L, T2M; + { + E T2G, TN, T4N, T2r, T2s, TQ, T4O, T2J, TV, T2x, TU, T4T, T2w, TW, T2A; + E T2B; + { + E TO, TP, T2H, T2I; + { + E TL, TM, T2p, T2q, T1Q, Ts; + TL = ri[WS(is, 31)]; + T1Q = Tq - Tr; + Ts = Tq + Tr; + { + E T1T, T1t, T3z, T1R; + T1T = T1r - T1s; + T1t = T1r + T1s; + T3z = T1Q + T1P; + T1R = T1P - T1Q; + { + E T4x, T3A, T1U, T4y; + T4x = Tp - Ts; + Tt = Tp + Ts; + T3A = T1S - T1T; + T1U = T1S + T1T; + T4y = T1q - T1t; + T1u = T1q + T1t; + T3W = FMA(KP414213562, T3z, T3A); + T3B = FNMS(KP414213562, T3A, T3z); + T2Y = FNMS(KP414213562, T1R, T1U); + T1V = FMA(KP414213562, T1U, T1R); + T52 = T4x - T4y; + T4z = T4x + T4y; + TM = ri[WS(is, 15)]; + } + } + T2p = ii[WS(is, 31)]; + T2q = ii[WS(is, 15)]; + TO = ri[WS(is, 7)]; + T2G = TL - TM; + TN = TL + TM; + T4N = T2p + T2q; + T2r = T2p - T2q; + TP = ri[WS(is, 23)]; + T2H = ii[WS(is, 7)]; + T2I = ii[WS(is, 23)]; + } + { + E TS, TT, T2u, T2v; + TS = ri[WS(is, 3)]; + T2s = TO - TP; + TQ = TO + TP; + T4O = T2H + T2I; + T2J = T2H - T2I; + TT = ri[WS(is, 19)]; + T2u = ii[WS(is, 3)]; + T2v = ii[WS(is, 19)]; + TV = ri[WS(is, 27)]; + T2x = TS - TT; + TU = TS + TT; + T4T = T2u + T2v; + T2w = T2u - T2v; + TW = ri[WS(is, 11)]; + T2A = ii[WS(is, 27)]; + T2B = ii[WS(is, 11)]; + } + } + { + E T2z, T4U, T2C, TR, TY, T4Q, TX; + T3O = T2s + T2r; + T2t = T2r - T2s; + T2z = TV - TW; + TX = TV + TW; + T4U = T2A + T2B; + T2C = T2A - T2B; + T3L = T2G - T2J; + T2K = T2G + T2J; + T4S = TN - TQ; + TR = TN + TQ; + TY = TU + TX; + T4Q = TX - TU; + { + E T4P, T5G, T5H, T2y, T2D; + T4P = T4N - T4O; + T5G = T4N + T4O; + T5H = T4T + T4U; + T4V = T4T - T4U; + T5F = TR - TY; + TZ = TR + TY; + T5I = T5G - T5H; + T5X = T5G + T5H; + T2L = T2x + T2w; + T2y = T2w - T2x; + T2D = T2z + T2C; + T2M = T2z - T2C; + T4R = T4P - T4Q; + T5k = T4Q + T4P; + T3M = T2D - T2y; + T2E = T2y + T2D; + } + } + } + { + E T2f, Ty, T4C, T20, T21, TB, T4D, T2i, TG, T26, TF, T4I, T25, TH, T29; + E T2a; + { + E Tz, TA, T2g, T2h; + { + E Tw, Tx, T1Y, T1Z; + Tw = ri[WS(is, 1)]; + T5j = T4S + T4V; + T4W = T4S - T4V; + T3P = T2L - T2M; + T2N = T2L + T2M; + Tx = ri[WS(is, 17)]; + T1Y = ii[WS(is, 1)]; + T1Z = ii[WS(is, 17)]; + Tz = ri[WS(is, 9)]; + T2f = Tw - Tx; + Ty = Tw + Tx; + T4C = T1Y + T1Z; + T20 = T1Y - T1Z; + TA = ri[WS(is, 25)]; + T2g = ii[WS(is, 9)]; + T2h = ii[WS(is, 25)]; + } + { + E TD, TE, T23, T24; + TD = ri[WS(is, 5)]; + T21 = Tz - TA; + TB = Tz + TA; + T4D = T2g + T2h; + T2i = T2g - T2h; + TE = ri[WS(is, 21)]; + T23 = ii[WS(is, 5)]; + T24 = ii[WS(is, 21)]; + TG = ri[WS(is, 29)]; + T26 = TD - TE; + TF = TD + TE; + T4I = T23 + T24; + T25 = T23 - T24; + TH = ri[WS(is, 13)]; + T29 = ii[WS(is, 29)]; + T2a = ii[WS(is, 13)]; + } + } + { + E T28, T4J, T2b, TC, TJ, T4F, TI; + T3H = T21 + T20; + T22 = T20 - T21; + T28 = TG - TH; + TI = TG + TH; + T4J = T29 + T2a; + T2b = T29 - T2a; + T3E = T2f - T2i; + T2j = T2f + T2i; + T4H = Ty - TB; + TC = Ty + TB; + TJ = TF + TI; + T4F = TI - TF; + { + E T4E, T5B, T5C, T27, T2c; + T4E = T4C - T4D; + T5B = T4C + T4D; + T5C = T4I + T4J; + T4K = T4I - T4J; + T5A = TC - TJ; + TK = TC + TJ; + T5D = T5B - T5C; + T5W = T5B + T5C; + T2k = T26 + T25; + T27 = T25 - T26; + T2c = T28 + T2b; + T2l = T28 - T2b; + T4G = T4E - T4F; + T5h = T4F + T4E; + T3F = T2c - T27; + T2d = T27 + T2c; + } + } + } + } + } + } + { + E T3I, T2m, Tv, T60, T11, T10, T5Z, T1w; + { + E T5f, T5w, T5q, T5m, T5v, T5p; + { + E T5d, T5g, T5o, T4B, T5a, T5n, T5e, T56, T4Y, T57, T55; + { + E T4X, T4M, T5b, T5c, T51, T54; + { + E T4t, T4A, T58, T59, T4L; + T5d = T4r + T4s; + T4t = T4r - T4s; + T5g = T4H + T4K; + T4L = T4H - T4K; + T3I = T2k - T2l; + T2m = T2k + T2l; + T4A = T4w - T4z; + T5o = T4w + T4z; + T4X = FNMS(KP414213562, T4W, T4R); + T58 = FMA(KP414213562, T4R, T4W); + T59 = FNMS(KP414213562, T4G, T4L); + T4M = FMA(KP414213562, T4L, T4G); + T5b = FNMS(KP707106781, T4A, T4t); + T4B = FMA(KP707106781, T4A, T4t); + T5c = T59 + T58; + T5a = T58 - T59; + T5n = T50 + T4Z; + T51 = T4Z - T50; + T54 = T52 - T53; + T5e = T53 + T52; + } + ro[WS(os, 14)] = FNMS(KP923879532, T5c, T5b); + T56 = T4M + T4X; + T4Y = T4M - T4X; + T57 = FMA(KP707106781, T54, T51); + T55 = FNMS(KP707106781, T54, T51); + ro[WS(os, 30)] = FMA(KP923879532, T5c, T5b); + } + ro[WS(os, 6)] = FMA(KP923879532, T4Y, T4B); + ro[WS(os, 22)] = FNMS(KP923879532, T4Y, T4B); + io[WS(os, 6)] = FMA(KP923879532, T5a, T57); + io[WS(os, 22)] = FNMS(KP923879532, T5a, T57); + io[WS(os, 30)] = FMA(KP923879532, T56, T55); + io[WS(os, 14)] = FNMS(KP923879532, T56, T55); + { + E T5i, T5l, T5r, T5u, T5s, T5t; + T5i = FMA(KP414213562, T5h, T5g); + T5s = FNMS(KP414213562, T5g, T5h); + T5t = FMA(KP414213562, T5j, T5k); + T5l = FNMS(KP414213562, T5k, T5j); + T5r = FNMS(KP707106781, T5e, T5d); + T5f = FMA(KP707106781, T5e, T5d); + T5w = T5s + T5t; + T5u = T5s - T5t; + ro[WS(os, 26)] = FNMS(KP923879532, T5u, T5r); + T5q = T5l - T5i; + T5m = T5i + T5l; + T5v = FMA(KP707106781, T5o, T5n); + T5p = FNMS(KP707106781, T5o, T5n); + ro[WS(os, 10)] = FMA(KP923879532, T5u, T5r); + } + } + ro[WS(os, 2)] = FMA(KP923879532, T5m, T5f); + ro[WS(os, 18)] = FNMS(KP923879532, T5m, T5f); + io[WS(os, 2)] = FMA(KP923879532, T5w, T5v); + io[WS(os, 18)] = FNMS(KP923879532, T5w, T5v); + io[WS(os, 10)] = FMA(KP923879532, T5q, T5p); + io[WS(os, 26)] = FNMS(KP923879532, T5q, T5p); + { + E Tf, T1v, T5z, T5U, T1g, Tu, T5O, T5K, T5T, T5N, T5V, T5Y; + { + E T5E, T5J, T5P, T5S, T5L, T5M; + { + E T5x, T5y, T5Q, T5R; + Tf = T7 + Te; + T5x = T7 - Te; + T5y = T1n - T1u; + T1v = T1n + T1u; + T5E = T5A + T5D; + T5Q = T5D - T5A; + T5R = T5F + T5I; + T5J = T5F - T5I; + T5P = T5x - T5y; + T5z = T5x + T5y; + T5U = T5Q + T5R; + T5S = T5Q - T5R; + T1g = T18 + T1f; + T5L = T18 - T1f; + T5M = Tt - Tm; + Tu = Tm + Tt; + } + ro[WS(os, 28)] = FNMS(KP707106781, T5S, T5P); + T5O = T5J - T5E; + T5K = T5E + T5J; + T5T = T5M + T5L; + T5N = T5L - T5M; + ro[WS(os, 12)] = FMA(KP707106781, T5S, T5P); + } + ro[WS(os, 4)] = FMA(KP707106781, T5K, T5z); + ro[WS(os, 20)] = FNMS(KP707106781, T5K, T5z); + io[WS(os, 4)] = FMA(KP707106781, T5U, T5T); + io[WS(os, 20)] = FNMS(KP707106781, T5U, T5T); + io[WS(os, 12)] = FMA(KP707106781, T5O, T5N); + io[WS(os, 28)] = FNMS(KP707106781, T5O, T5N); + T5V = Tf - Tu; + Tv = Tf + Tu; + T60 = T5W + T5X; + T5Y = T5W - T5X; + ro[WS(os, 8)] = T5V + T5Y; + T11 = TZ - TK; + T10 = TK + TZ; + T5Z = T1g + T1v; + T1w = T1g - T1v; + ro[WS(os, 24)] = T5V - T5Y; + } + } + ro[0] = Tv + T10; + ro[WS(os, 16)] = Tv - T10; + io[0] = T5Z + T60; + io[WS(os, 16)] = T5Z - T60; + io[WS(os, 24)] = T1w - T11; + io[WS(os, 8)] = T11 + T1w; + { + E T39, T3k, T3j, T3a, T3d, T3c, T47, T4i, T4h, T41, T3D, T48, T4b, T4a, T4e; + E T3N, T45, T3Z, T42, T3K, T3Q, T4d; + { + E T2e, T37, T1X, T33, T31, T2n, T2F, T2O; + { + E T1H, T1W, T2X, T30; + T39 = FMA(KP707106781, T1G, T1z); + T1H = FNMS(KP707106781, T1G, T1z); + T1W = T1O - T1V; + T3k = T1O + T1V; + T3j = FMA(KP707106781, T2W, T2T); + T2X = FNMS(KP707106781, T2W, T2T); + T30 = T2Y - T2Z; + T3a = T2Z + T2Y; + T3d = FMA(KP707106781, T2d, T22); + T2e = FNMS(KP707106781, T2d, T22); + T37 = FNMS(KP923879532, T1W, T1H); + T1X = FMA(KP923879532, T1W, T1H); + T33 = FMA(KP923879532, T30, T2X); + T31 = FNMS(KP923879532, T30, T2X); + T2n = FNMS(KP707106781, T2m, T2j); + T3c = FMA(KP707106781, T2m, T2j); + T3g = FMA(KP707106781, T2E, T2t); + T2F = FNMS(KP707106781, T2E, T2t); + T2O = FNMS(KP707106781, T2N, T2K); + T3f = FMA(KP707106781, T2N, T2K); + } + { + E T3V, T3Y, T3G, T3J; + { + E T3v, T35, T2o, T34, T2P, T3C; + T47 = FNMS(KP707106781, T3u, T3t); + T3v = FMA(KP707106781, T3u, T3t); + T35 = FNMS(KP668178637, T2e, T2n); + T2o = FMA(KP668178637, T2n, T2e); + T34 = FMA(KP668178637, T2F, T2O); + T2P = FNMS(KP668178637, T2O, T2F); + T3C = T3y - T3B; + T4i = T3y + T3B; + T4h = FNMS(KP707106781, T3U, T3T); + T3V = FMA(KP707106781, T3U, T3T); + { + E T38, T36, T32, T2Q; + T38 = T35 + T34; + T36 = T34 - T35; + T32 = T2o + T2P; + T2Q = T2o - T2P; + T41 = FNMS(KP923879532, T3C, T3v); + T3D = FMA(KP923879532, T3C, T3v); + ro[WS(os, 29)] = FMA(KP831469612, T38, T37); + ro[WS(os, 13)] = FNMS(KP831469612, T38, T37); + io[WS(os, 5)] = FMA(KP831469612, T36, T33); + io[WS(os, 21)] = FNMS(KP831469612, T36, T33); + io[WS(os, 29)] = FMA(KP831469612, T32, T31); + io[WS(os, 13)] = FNMS(KP831469612, T32, T31); + ro[WS(os, 5)] = FMA(KP831469612, T2Q, T1X); + ro[WS(os, 21)] = FNMS(KP831469612, T2Q, T1X); + T3Y = T3W - T3X; + T48 = T3X + T3W; + } + } + T4b = FMA(KP707106781, T3F, T3E); + T3G = FNMS(KP707106781, T3F, T3E); + T3J = FNMS(KP707106781, T3I, T3H); + T4a = FMA(KP707106781, T3I, T3H); + T4e = FMA(KP707106781, T3M, T3L); + T3N = FNMS(KP707106781, T3M, T3L); + T45 = FMA(KP923879532, T3Y, T3V); + T3Z = FNMS(KP923879532, T3Y, T3V); + T42 = FNMS(KP668178637, T3G, T3J); + T3K = FMA(KP668178637, T3J, T3G); + T3Q = FNMS(KP707106781, T3P, T3O); + T4d = FMA(KP707106781, T3P, T3O); + } + } + { + E T4p, T49, T4l, T4j, T4n, T4c, T43, T3R, T4m, T4f; + T43 = FMA(KP668178637, T3N, T3Q); + T3R = FNMS(KP668178637, T3Q, T3N); + T4p = FMA(KP923879532, T48, T47); + T49 = FNMS(KP923879532, T48, T47); + { + E T44, T46, T40, T3S; + T44 = T42 - T43; + T46 = T42 + T43; + T40 = T3R - T3K; + T3S = T3K + T3R; + ro[WS(os, 11)] = FMA(KP831469612, T44, T41); + ro[WS(os, 27)] = FNMS(KP831469612, T44, T41); + io[WS(os, 3)] = FMA(KP831469612, T46, T45); + io[WS(os, 19)] = FNMS(KP831469612, T46, T45); + io[WS(os, 11)] = FMA(KP831469612, T40, T3Z); + io[WS(os, 27)] = FNMS(KP831469612, T40, T3Z); + ro[WS(os, 3)] = FMA(KP831469612, T3S, T3D); + ro[WS(os, 19)] = FNMS(KP831469612, T3S, T3D); + } + T4l = FNMS(KP923879532, T4i, T4h); + T4j = FMA(KP923879532, T4i, T4h); + T4n = FNMS(KP198912367, T4a, T4b); + T4c = FMA(KP198912367, T4b, T4a); + T4m = FMA(KP198912367, T4d, T4e); + T4f = FNMS(KP198912367, T4e, T4d); + T3n = FNMS(KP923879532, T3a, T39); + T3b = FMA(KP923879532, T3a, T39); + { + E T4q, T4o, T4k, T4g; + T4q = T4n + T4m; + T4o = T4m - T4n; + T4k = T4c + T4f; + T4g = T4c - T4f; + ro[WS(os, 31)] = FMA(KP980785280, T4q, T4p); + ro[WS(os, 15)] = FNMS(KP980785280, T4q, T4p); + io[WS(os, 7)] = FMA(KP980785280, T4o, T4l); + io[WS(os, 23)] = FNMS(KP980785280, T4o, T4l); + io[WS(os, 31)] = FMA(KP980785280, T4k, T4j); + io[WS(os, 15)] = FNMS(KP980785280, T4k, T4j); + ro[WS(os, 7)] = FMA(KP980785280, T4g, T49); + ro[WS(os, 23)] = FNMS(KP980785280, T4g, T49); + } + T3r = FMA(KP923879532, T3k, T3j); + T3l = FNMS(KP923879532, T3k, T3j); + T3o = FNMS(KP198912367, T3c, T3d); + T3e = FMA(KP198912367, T3d, T3c); + } + } + } + } + T3h = FNMS(KP198912367, T3g, T3f); + T3p = FMA(KP198912367, T3f, T3g); + { + E T3s, T3q, T3i, T3m; + T3s = T3o + T3p; + T3q = T3o - T3p; + T3i = T3e + T3h; + T3m = T3h - T3e; + ro[WS(os, 9)] = FMA(KP980785280, T3q, T3n); + ro[WS(os, 25)] = FNMS(KP980785280, T3q, T3n); + io[WS(os, 1)] = FMA(KP980785280, T3s, T3r); + io[WS(os, 17)] = FNMS(KP980785280, T3s, T3r); + io[WS(os, 9)] = FMA(KP980785280, T3m, T3l); + io[WS(os, 25)] = FNMS(KP980785280, T3m, T3l); + ro[WS(os, 1)] = FMA(KP980785280, T3i, T3b); + ro[WS(os, 17)] = FNMS(KP980785280, T3i, T3b); + } + } + } +} + +static const kdft_desc desc = { 32, "n1_32", {236, 0, 136, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_32) (planner *p) { + X(kdft_register) (p, n1_32, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 32 -name n1_32 -include n.h */ + +/* + * This function contains 372 FP additions, 84 FP multiplications, + * (or, 340 additions, 52 multiplications, 32 fused multiply/add), + * 100 stack variables, 7 constants, and 128 memory accesses + */ +#include "n.h" + +static void n1_32(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(128, is), MAKE_VOLATILE_STRIDE(128, os)) { + E T7, T4r, T4Z, T18, T1z, T3t, T3T, T2T, Te, T1f, T50, T4s, T2W, T3u, T1G; + E T3U, Tm, T1n, T1O, T2Z, T3y, T3X, T4w, T53, Tt, T1u, T1V, T2Y, T3B, T3W; + E T4z, T52, T2t, T3L, T3O, T2K, TR, TY, T5F, T5G, T5H, T5I, T4R, T5j, T2E; + E T3P, T4W, T5k, T2N, T3M, T22, T3E, T3H, T2j, TC, TJ, T5A, T5B, T5C, T5D; + E T4G, T5g, T2d, T3F, T4L, T5h, T2m, T3I; + { + E T3, T1x, T14, T2S, T6, T2R, T17, T1y; + { + E T1, T2, T12, T13; + T1 = ri[0]; + T2 = ri[WS(is, 16)]; + T3 = T1 + T2; + T1x = T1 - T2; + T12 = ii[0]; + T13 = ii[WS(is, 16)]; + T14 = T12 + T13; + T2S = T12 - T13; + } + { + E T4, T5, T15, T16; + T4 = ri[WS(is, 8)]; + T5 = ri[WS(is, 24)]; + T6 = T4 + T5; + T2R = T4 - T5; + T15 = ii[WS(is, 8)]; + T16 = ii[WS(is, 24)]; + T17 = T15 + T16; + T1y = T15 - T16; + } + T7 = T3 + T6; + T4r = T3 - T6; + T4Z = T14 - T17; + T18 = T14 + T17; + T1z = T1x - T1y; + T3t = T1x + T1y; + T3T = T2S - T2R; + T2T = T2R + T2S; + } + { + E Ta, T1B, T1b, T1A, Td, T1D, T1e, T1E; + { + E T8, T9, T19, T1a; + T8 = ri[WS(is, 4)]; + T9 = ri[WS(is, 20)]; + Ta = T8 + T9; + T1B = T8 - T9; + T19 = ii[WS(is, 4)]; + T1a = ii[WS(is, 20)]; + T1b = T19 + T1a; + T1A = T19 - T1a; + } + { + E Tb, Tc, T1c, T1d; + Tb = ri[WS(is, 28)]; + Tc = ri[WS(is, 12)]; + Td = Tb + Tc; + T1D = Tb - Tc; + T1c = ii[WS(is, 28)]; + T1d = ii[WS(is, 12)]; + T1e = T1c + T1d; + T1E = T1c - T1d; + } + Te = Ta + Td; + T1f = T1b + T1e; + T50 = Td - Ta; + T4s = T1b - T1e; + { + E T2U, T2V, T1C, T1F; + T2U = T1D - T1E; + T2V = T1B + T1A; + T2W = KP707106781 * (T2U - T2V); + T3u = KP707106781 * (T2V + T2U); + T1C = T1A - T1B; + T1F = T1D + T1E; + T1G = KP707106781 * (T1C - T1F); + T3U = KP707106781 * (T1C + T1F); + } + } + { + E Ti, T1L, T1j, T1J, Tl, T1I, T1m, T1M, T1K, T1N; + { + E Tg, Th, T1h, T1i; + Tg = ri[WS(is, 2)]; + Th = ri[WS(is, 18)]; + Ti = Tg + Th; + T1L = Tg - Th; + T1h = ii[WS(is, 2)]; + T1i = ii[WS(is, 18)]; + T1j = T1h + T1i; + T1J = T1h - T1i; + } + { + E Tj, Tk, T1k, T1l; + Tj = ri[WS(is, 10)]; + Tk = ri[WS(is, 26)]; + Tl = Tj + Tk; + T1I = Tj - Tk; + T1k = ii[WS(is, 10)]; + T1l = ii[WS(is, 26)]; + T1m = T1k + T1l; + T1M = T1k - T1l; + } + Tm = Ti + Tl; + T1n = T1j + T1m; + T1K = T1I + T1J; + T1N = T1L - T1M; + T1O = FNMS(KP923879532, T1N, KP382683432 * T1K); + T2Z = FMA(KP923879532, T1K, KP382683432 * T1N); + { + E T3w, T3x, T4u, T4v; + T3w = T1J - T1I; + T3x = T1L + T1M; + T3y = FNMS(KP382683432, T3x, KP923879532 * T3w); + T3X = FMA(KP382683432, T3w, KP923879532 * T3x); + T4u = T1j - T1m; + T4v = Ti - Tl; + T4w = T4u - T4v; + T53 = T4v + T4u; + } + } + { + E Tp, T1S, T1q, T1Q, Ts, T1P, T1t, T1T, T1R, T1U; + { + E Tn, To, T1o, T1p; + Tn = ri[WS(is, 30)]; + To = ri[WS(is, 14)]; + Tp = Tn + To; + T1S = Tn - To; + T1o = ii[WS(is, 30)]; + T1p = ii[WS(is, 14)]; + T1q = T1o + T1p; + T1Q = T1o - T1p; + } + { + E Tq, Tr, T1r, T1s; + Tq = ri[WS(is, 6)]; + Tr = ri[WS(is, 22)]; + Ts = Tq + Tr; + T1P = Tq - Tr; + T1r = ii[WS(is, 6)]; + T1s = ii[WS(is, 22)]; + T1t = T1r + T1s; + T1T = T1r - T1s; + } + Tt = Tp + Ts; + T1u = T1q + T1t; + T1R = T1P + T1Q; + T1U = T1S - T1T; + T1V = FMA(KP382683432, T1R, KP923879532 * T1U); + T2Y = FNMS(KP923879532, T1R, KP382683432 * T1U); + { + E T3z, T3A, T4x, T4y; + T3z = T1Q - T1P; + T3A = T1S + T1T; + T3B = FMA(KP923879532, T3z, KP382683432 * T3A); + T3W = FNMS(KP382683432, T3z, KP923879532 * T3A); + T4x = Tp - Ts; + T4y = T1q - T1t; + T4z = T4x + T4y; + T52 = T4x - T4y; + } + } + { + E TN, T2p, T2J, T4S, TQ, T2G, T2s, T4T, TU, T2x, T2w, T4O, TX, T2z, T2C; + E T4P; + { + E TL, TM, T2H, T2I; + TL = ri[WS(is, 31)]; + TM = ri[WS(is, 15)]; + TN = TL + TM; + T2p = TL - TM; + T2H = ii[WS(is, 31)]; + T2I = ii[WS(is, 15)]; + T2J = T2H - T2I; + T4S = T2H + T2I; + } + { + E TO, TP, T2q, T2r; + TO = ri[WS(is, 7)]; + TP = ri[WS(is, 23)]; + TQ = TO + TP; + T2G = TO - TP; + T2q = ii[WS(is, 7)]; + T2r = ii[WS(is, 23)]; + T2s = T2q - T2r; + T4T = T2q + T2r; + } + { + E TS, TT, T2u, T2v; + TS = ri[WS(is, 3)]; + TT = ri[WS(is, 19)]; + TU = TS + TT; + T2x = TS - TT; + T2u = ii[WS(is, 3)]; + T2v = ii[WS(is, 19)]; + T2w = T2u - T2v; + T4O = T2u + T2v; + } + { + E TV, TW, T2A, T2B; + TV = ri[WS(is, 27)]; + TW = ri[WS(is, 11)]; + TX = TV + TW; + T2z = TV - TW; + T2A = ii[WS(is, 27)]; + T2B = ii[WS(is, 11)]; + T2C = T2A - T2B; + T4P = T2A + T2B; + } + T2t = T2p - T2s; + T3L = T2p + T2s; + T3O = T2J - T2G; + T2K = T2G + T2J; + TR = TN + TQ; + TY = TU + TX; + T5F = TR - TY; + { + E T4N, T4Q, T2y, T2D; + T5G = T4S + T4T; + T5H = T4O + T4P; + T5I = T5G - T5H; + T4N = TN - TQ; + T4Q = T4O - T4P; + T4R = T4N - T4Q; + T5j = T4N + T4Q; + T2y = T2w - T2x; + T2D = T2z + T2C; + T2E = KP707106781 * (T2y - T2D); + T3P = KP707106781 * (T2y + T2D); + { + E T4U, T4V, T2L, T2M; + T4U = T4S - T4T; + T4V = TX - TU; + T4W = T4U - T4V; + T5k = T4V + T4U; + T2L = T2z - T2C; + T2M = T2x + T2w; + T2N = KP707106781 * (T2L - T2M); + T3M = KP707106781 * (T2M + T2L); + } + } + } + { + E Ty, T2f, T21, T4C, TB, T1Y, T2i, T4D, TF, T28, T2b, T4I, TI, T23, T26; + E T4J; + { + E Tw, Tx, T1Z, T20; + Tw = ri[WS(is, 1)]; + Tx = ri[WS(is, 17)]; + Ty = Tw + Tx; + T2f = Tw - Tx; + T1Z = ii[WS(is, 1)]; + T20 = ii[WS(is, 17)]; + T21 = T1Z - T20; + T4C = T1Z + T20; + } + { + E Tz, TA, T2g, T2h; + Tz = ri[WS(is, 9)]; + TA = ri[WS(is, 25)]; + TB = Tz + TA; + T1Y = Tz - TA; + T2g = ii[WS(is, 9)]; + T2h = ii[WS(is, 25)]; + T2i = T2g - T2h; + T4D = T2g + T2h; + } + { + E TD, TE, T29, T2a; + TD = ri[WS(is, 5)]; + TE = ri[WS(is, 21)]; + TF = TD + TE; + T28 = TD - TE; + T29 = ii[WS(is, 5)]; + T2a = ii[WS(is, 21)]; + T2b = T29 - T2a; + T4I = T29 + T2a; + } + { + E TG, TH, T24, T25; + TG = ri[WS(is, 29)]; + TH = ri[WS(is, 13)]; + TI = TG + TH; + T23 = TG - TH; + T24 = ii[WS(is, 29)]; + T25 = ii[WS(is, 13)]; + T26 = T24 - T25; + T4J = T24 + T25; + } + T22 = T1Y + T21; + T3E = T2f + T2i; + T3H = T21 - T1Y; + T2j = T2f - T2i; + TC = Ty + TB; + TJ = TF + TI; + T5A = TC - TJ; + { + E T4E, T4F, T27, T2c; + T5B = T4C + T4D; + T5C = T4I + T4J; + T5D = T5B - T5C; + T4E = T4C - T4D; + T4F = TI - TF; + T4G = T4E - T4F; + T5g = T4F + T4E; + T27 = T23 - T26; + T2c = T28 + T2b; + T2d = KP707106781 * (T27 - T2c); + T3F = KP707106781 * (T2c + T27); + { + E T4H, T4K, T2k, T2l; + T4H = Ty - TB; + T4K = T4I - T4J; + T4L = T4H - T4K; + T5h = T4H + T4K; + T2k = T2b - T28; + T2l = T23 + T26; + T2m = KP707106781 * (T2k - T2l); + T3I = KP707106781 * (T2k + T2l); + } + } + } + { + E T4B, T57, T5a, T5c, T4Y, T56, T55, T5b; + { + E T4t, T4A, T58, T59; + T4t = T4r - T4s; + T4A = KP707106781 * (T4w - T4z); + T4B = T4t + T4A; + T57 = T4t - T4A; + T58 = FNMS(KP923879532, T4L, KP382683432 * T4G); + T59 = FMA(KP382683432, T4W, KP923879532 * T4R); + T5a = T58 - T59; + T5c = T58 + T59; + } + { + E T4M, T4X, T51, T54; + T4M = FMA(KP923879532, T4G, KP382683432 * T4L); + T4X = FNMS(KP923879532, T4W, KP382683432 * T4R); + T4Y = T4M + T4X; + T56 = T4X - T4M; + T51 = T4Z - T50; + T54 = KP707106781 * (T52 - T53); + T55 = T51 - T54; + T5b = T51 + T54; + } + ro[WS(os, 22)] = T4B - T4Y; + io[WS(os, 22)] = T5b - T5c; + ro[WS(os, 6)] = T4B + T4Y; + io[WS(os, 6)] = T5b + T5c; + io[WS(os, 30)] = T55 - T56; + ro[WS(os, 30)] = T57 - T5a; + io[WS(os, 14)] = T55 + T56; + ro[WS(os, 14)] = T57 + T5a; + } + { + E T5f, T5r, T5u, T5w, T5m, T5q, T5p, T5v; + { + E T5d, T5e, T5s, T5t; + T5d = T4r + T4s; + T5e = KP707106781 * (T53 + T52); + T5f = T5d + T5e; + T5r = T5d - T5e; + T5s = FNMS(KP382683432, T5h, KP923879532 * T5g); + T5t = FMA(KP923879532, T5k, KP382683432 * T5j); + T5u = T5s - T5t; + T5w = T5s + T5t; + } + { + E T5i, T5l, T5n, T5o; + T5i = FMA(KP382683432, T5g, KP923879532 * T5h); + T5l = FNMS(KP382683432, T5k, KP923879532 * T5j); + T5m = T5i + T5l; + T5q = T5l - T5i; + T5n = T50 + T4Z; + T5o = KP707106781 * (T4w + T4z); + T5p = T5n - T5o; + T5v = T5n + T5o; + } + ro[WS(os, 18)] = T5f - T5m; + io[WS(os, 18)] = T5v - T5w; + ro[WS(os, 2)] = T5f + T5m; + io[WS(os, 2)] = T5v + T5w; + io[WS(os, 26)] = T5p - T5q; + ro[WS(os, 26)] = T5r - T5u; + io[WS(os, 10)] = T5p + T5q; + ro[WS(os, 10)] = T5r + T5u; + } + { + E T5z, T5P, T5S, T5U, T5K, T5O, T5N, T5T; + { + E T5x, T5y, T5Q, T5R; + T5x = T7 - Te; + T5y = T1n - T1u; + T5z = T5x + T5y; + T5P = T5x - T5y; + T5Q = T5D - T5A; + T5R = T5F + T5I; + T5S = KP707106781 * (T5Q - T5R); + T5U = KP707106781 * (T5Q + T5R); + } + { + E T5E, T5J, T5L, T5M; + T5E = T5A + T5D; + T5J = T5F - T5I; + T5K = KP707106781 * (T5E + T5J); + T5O = KP707106781 * (T5J - T5E); + T5L = T18 - T1f; + T5M = Tt - Tm; + T5N = T5L - T5M; + T5T = T5M + T5L; + } + ro[WS(os, 20)] = T5z - T5K; + io[WS(os, 20)] = T5T - T5U; + ro[WS(os, 4)] = T5z + T5K; + io[WS(os, 4)] = T5T + T5U; + io[WS(os, 28)] = T5N - T5O; + ro[WS(os, 28)] = T5P - T5S; + io[WS(os, 12)] = T5N + T5O; + ro[WS(os, 12)] = T5P + T5S; + } + { + E Tv, T5V, T5Y, T60, T10, T11, T1w, T5Z; + { + E Tf, Tu, T5W, T5X; + Tf = T7 + Te; + Tu = Tm + Tt; + Tv = Tf + Tu; + T5V = Tf - Tu; + T5W = T5B + T5C; + T5X = T5G + T5H; + T5Y = T5W - T5X; + T60 = T5W + T5X; + } + { + E TK, TZ, T1g, T1v; + TK = TC + TJ; + TZ = TR + TY; + T10 = TK + TZ; + T11 = TZ - TK; + T1g = T18 + T1f; + T1v = T1n + T1u; + T1w = T1g - T1v; + T5Z = T1g + T1v; + } + ro[WS(os, 16)] = Tv - T10; + io[WS(os, 16)] = T5Z - T60; + ro[0] = Tv + T10; + io[0] = T5Z + T60; + io[WS(os, 8)] = T11 + T1w; + ro[WS(os, 8)] = T5V + T5Y; + io[WS(os, 24)] = T1w - T11; + ro[WS(os, 24)] = T5V - T5Y; + } + { + E T1X, T33, T31, T37, T2o, T34, T2P, T35; + { + E T1H, T1W, T2X, T30; + T1H = T1z - T1G; + T1W = T1O - T1V; + T1X = T1H + T1W; + T33 = T1H - T1W; + T2X = T2T - T2W; + T30 = T2Y - T2Z; + T31 = T2X - T30; + T37 = T2X + T30; + } + { + E T2e, T2n, T2F, T2O; + T2e = T22 - T2d; + T2n = T2j - T2m; + T2o = FMA(KP980785280, T2e, KP195090322 * T2n); + T34 = FNMS(KP980785280, T2n, KP195090322 * T2e); + T2F = T2t - T2E; + T2O = T2K - T2N; + T2P = FNMS(KP980785280, T2O, KP195090322 * T2F); + T35 = FMA(KP195090322, T2O, KP980785280 * T2F); + } + { + E T2Q, T38, T32, T36; + T2Q = T2o + T2P; + ro[WS(os, 23)] = T1X - T2Q; + ro[WS(os, 7)] = T1X + T2Q; + T38 = T34 + T35; + io[WS(os, 23)] = T37 - T38; + io[WS(os, 7)] = T37 + T38; + T32 = T2P - T2o; + io[WS(os, 31)] = T31 - T32; + io[WS(os, 15)] = T31 + T32; + T36 = T34 - T35; + ro[WS(os, 31)] = T33 - T36; + ro[WS(os, 15)] = T33 + T36; + } + } + { + E T3D, T41, T3Z, T45, T3K, T42, T3R, T43; + { + E T3v, T3C, T3V, T3Y; + T3v = T3t - T3u; + T3C = T3y - T3B; + T3D = T3v + T3C; + T41 = T3v - T3C; + T3V = T3T - T3U; + T3Y = T3W - T3X; + T3Z = T3V - T3Y; + T45 = T3V + T3Y; + } + { + E T3G, T3J, T3N, T3Q; + T3G = T3E - T3F; + T3J = T3H - T3I; + T3K = FMA(KP555570233, T3G, KP831469612 * T3J); + T42 = FNMS(KP831469612, T3G, KP555570233 * T3J); + T3N = T3L - T3M; + T3Q = T3O - T3P; + T3R = FNMS(KP831469612, T3Q, KP555570233 * T3N); + T43 = FMA(KP831469612, T3N, KP555570233 * T3Q); + } + { + E T3S, T46, T40, T44; + T3S = T3K + T3R; + ro[WS(os, 21)] = T3D - T3S; + ro[WS(os, 5)] = T3D + T3S; + T46 = T42 + T43; + io[WS(os, 21)] = T45 - T46; + io[WS(os, 5)] = T45 + T46; + T40 = T3R - T3K; + io[WS(os, 29)] = T3Z - T40; + io[WS(os, 13)] = T3Z + T40; + T44 = T42 - T43; + ro[WS(os, 29)] = T41 - T44; + ro[WS(os, 13)] = T41 + T44; + } + } + { + E T49, T4l, T4j, T4p, T4c, T4m, T4f, T4n; + { + E T47, T48, T4h, T4i; + T47 = T3t + T3u; + T48 = T3X + T3W; + T49 = T47 + T48; + T4l = T47 - T48; + T4h = T3T + T3U; + T4i = T3y + T3B; + T4j = T4h - T4i; + T4p = T4h + T4i; + } + { + E T4a, T4b, T4d, T4e; + T4a = T3E + T3F; + T4b = T3H + T3I; + T4c = FMA(KP980785280, T4a, KP195090322 * T4b); + T4m = FNMS(KP195090322, T4a, KP980785280 * T4b); + T4d = T3L + T3M; + T4e = T3O + T3P; + T4f = FNMS(KP195090322, T4e, KP980785280 * T4d); + T4n = FMA(KP195090322, T4d, KP980785280 * T4e); + } + { + E T4g, T4q, T4k, T4o; + T4g = T4c + T4f; + ro[WS(os, 17)] = T49 - T4g; + ro[WS(os, 1)] = T49 + T4g; + T4q = T4m + T4n; + io[WS(os, 17)] = T4p - T4q; + io[WS(os, 1)] = T4p + T4q; + T4k = T4f - T4c; + io[WS(os, 25)] = T4j - T4k; + io[WS(os, 9)] = T4j + T4k; + T4o = T4m - T4n; + ro[WS(os, 25)] = T4l - T4o; + ro[WS(os, 9)] = T4l + T4o; + } + } + { + E T3b, T3n, T3l, T3r, T3e, T3o, T3h, T3p; + { + E T39, T3a, T3j, T3k; + T39 = T1z + T1G; + T3a = T2Z + T2Y; + T3b = T39 + T3a; + T3n = T39 - T3a; + T3j = T2T + T2W; + T3k = T1O + T1V; + T3l = T3j - T3k; + T3r = T3j + T3k; + } + { + E T3c, T3d, T3f, T3g; + T3c = T22 + T2d; + T3d = T2j + T2m; + T3e = FMA(KP555570233, T3c, KP831469612 * T3d); + T3o = FNMS(KP555570233, T3d, KP831469612 * T3c); + T3f = T2t + T2E; + T3g = T2K + T2N; + T3h = FNMS(KP555570233, T3g, KP831469612 * T3f); + T3p = FMA(KP831469612, T3g, KP555570233 * T3f); + } + { + E T3i, T3s, T3m, T3q; + T3i = T3e + T3h; + ro[WS(os, 19)] = T3b - T3i; + ro[WS(os, 3)] = T3b + T3i; + T3s = T3o + T3p; + io[WS(os, 19)] = T3r - T3s; + io[WS(os, 3)] = T3r + T3s; + T3m = T3h - T3e; + io[WS(os, 27)] = T3l - T3m; + io[WS(os, 11)] = T3l + T3m; + T3q = T3o - T3p; + ro[WS(os, 27)] = T3n - T3q; + ro[WS(os, 11)] = T3n + T3q; + } + } + } + } +} + +static const kdft_desc desc = { 32, "n1_32", {340, 52, 32, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_32) (planner *p) { + X(kdft_register) (p, n1_32, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 4 -name n1_4 -include n.h */ + +/* + * This function contains 16 FP additions, 0 FP multiplications, + * (or, 16 additions, 0 multiplications, 0 fused multiply/add), + * 13 stack variables, 0 constants, and 16 memory accesses + */ +#include "n.h" + +static void n1_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) { + E T4, Tb, T3, Tf, T9, T5, Tc, Td; + { + E T1, T2, T7, T8; + T1 = ri[0]; + T2 = ri[WS(is, 2)]; + T7 = ii[0]; + T8 = ii[WS(is, 2)]; + T4 = ri[WS(is, 1)]; + Tb = T1 - T2; + T3 = T1 + T2; + Tf = T7 + T8; + T9 = T7 - T8; + T5 = ri[WS(is, 3)]; + Tc = ii[WS(is, 1)]; + Td = ii[WS(is, 3)]; + } + { + E T6, Ta, Te, Tg; + T6 = T4 + T5; + Ta = T4 - T5; + Te = Tc - Td; + Tg = Tc + Td; + io[WS(os, 3)] = Ta + T9; + io[WS(os, 1)] = T9 - Ta; + ro[0] = T3 + T6; + ro[WS(os, 2)] = T3 - T6; + io[0] = Tf + Tg; + io[WS(os, 2)] = Tf - Tg; + ro[WS(os, 3)] = Tb - Te; + ro[WS(os, 1)] = Tb + Te; + } + } + } +} + +static const kdft_desc desc = { 4, "n1_4", {16, 0, 0, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_4) (planner *p) { + X(kdft_register) (p, n1_4, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 4 -name n1_4 -include n.h */ + +/* + * This function contains 16 FP additions, 0 FP multiplications, + * (or, 16 additions, 0 multiplications, 0 fused multiply/add), + * 13 stack variables, 0 constants, and 16 memory accesses + */ +#include "n.h" + +static void n1_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) { + E T3, Tb, T9, Tf, T6, Ta, Te, Tg; + { + E T1, T2, T7, T8; + T1 = ri[0]; + T2 = ri[WS(is, 2)]; + T3 = T1 + T2; + Tb = T1 - T2; + T7 = ii[0]; + T8 = ii[WS(is, 2)]; + T9 = T7 - T8; + Tf = T7 + T8; + } + { + E T4, T5, Tc, Td; + T4 = ri[WS(is, 1)]; + T5 = ri[WS(is, 3)]; + T6 = T4 + T5; + Ta = T4 - T5; + Tc = ii[WS(is, 1)]; + Td = ii[WS(is, 3)]; + Te = Tc - Td; + Tg = Tc + Td; + } + ro[WS(os, 2)] = T3 - T6; + io[WS(os, 2)] = Tf - Tg; + ro[0] = T3 + T6; + io[0] = Tf + Tg; + io[WS(os, 1)] = T9 - Ta; + ro[WS(os, 1)] = Tb + Te; + io[WS(os, 3)] = Ta + T9; + ro[WS(os, 3)] = Tb - Te; + } + } +} + +static const kdft_desc desc = { 4, "n1_4", {16, 0, 0, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_4) (planner *p) { + X(kdft_register) (p, n1_4, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 5 -name n1_5 -include n.h */ + +/* + * This function contains 32 FP additions, 18 FP multiplications, + * (or, 14 additions, 0 multiplications, 18 fused multiply/add), + * 37 stack variables, 4 constants, and 20 memory accesses + */ +#include "n.h" + +static void n1_5(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(20, is), MAKE_VOLATILE_STRIDE(20, os)) { + E Tq, Ti, Tk, Tu, Tw, Tp, Tb, Tj, Tr, Tv; + { + E T1, Tl, Ts, Tt, T8, Ta, Te, Tm, Tn, Th, To, T9; + T1 = ri[0]; + Tl = ii[0]; + { + E T2, T3, T5, T6; + T2 = ri[WS(is, 1)]; + T3 = ri[WS(is, 4)]; + T5 = ri[WS(is, 2)]; + T6 = ri[WS(is, 3)]; + { + E Tc, T4, T7, Td, Tf, Tg; + Tc = ii[WS(is, 1)]; + Ts = T2 - T3; + T4 = T2 + T3; + Tt = T5 - T6; + T7 = T5 + T6; + Td = ii[WS(is, 4)]; + Tf = ii[WS(is, 2)]; + Tg = ii[WS(is, 3)]; + T8 = T4 + T7; + Ta = T4 - T7; + Te = Tc - Td; + Tm = Tc + Td; + Tn = Tf + Tg; + Th = Tf - Tg; + } + } + ro[0] = T1 + T8; + To = Tm + Tn; + Tq = Tm - Tn; + Ti = FMA(KP618033988, Th, Te); + Tk = FNMS(KP618033988, Te, Th); + io[0] = Tl + To; + T9 = FNMS(KP250000000, T8, T1); + Tu = FMA(KP618033988, Tt, Ts); + Tw = FNMS(KP618033988, Ts, Tt); + Tp = FNMS(KP250000000, To, Tl); + Tb = FMA(KP559016994, Ta, T9); + Tj = FNMS(KP559016994, Ta, T9); + } + Tr = FMA(KP559016994, Tq, Tp); + Tv = FNMS(KP559016994, Tq, Tp); + ro[WS(os, 2)] = FNMS(KP951056516, Tk, Tj); + ro[WS(os, 3)] = FMA(KP951056516, Tk, Tj); + ro[WS(os, 1)] = FMA(KP951056516, Ti, Tb); + ro[WS(os, 4)] = FNMS(KP951056516, Ti, Tb); + io[WS(os, 2)] = FMA(KP951056516, Tw, Tv); + io[WS(os, 3)] = FNMS(KP951056516, Tw, Tv); + io[WS(os, 4)] = FMA(KP951056516, Tu, Tr); + io[WS(os, 1)] = FNMS(KP951056516, Tu, Tr); + } + } +} + +static const kdft_desc desc = { 5, "n1_5", {14, 0, 18, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_5) (planner *p) { + X(kdft_register) (p, n1_5, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 5 -name n1_5 -include n.h */ + +/* + * This function contains 32 FP additions, 12 FP multiplications, + * (or, 26 additions, 6 multiplications, 6 fused multiply/add), + * 21 stack variables, 4 constants, and 20 memory accesses + */ +#include "n.h" + +static void n1_5(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(20, is), MAKE_VOLATILE_STRIDE(20, os)) { + E T1, To, T8, Tt, T9, Ts, Te, Tp, Th, Tn; + T1 = ri[0]; + To = ii[0]; + { + E T2, T3, T4, T5, T6, T7; + T2 = ri[WS(is, 1)]; + T3 = ri[WS(is, 4)]; + T4 = T2 + T3; + T5 = ri[WS(is, 2)]; + T6 = ri[WS(is, 3)]; + T7 = T5 + T6; + T8 = T4 + T7; + Tt = T5 - T6; + T9 = KP559016994 * (T4 - T7); + Ts = T2 - T3; + } + { + E Tc, Td, Tl, Tf, Tg, Tm; + Tc = ii[WS(is, 1)]; + Td = ii[WS(is, 4)]; + Tl = Tc + Td; + Tf = ii[WS(is, 2)]; + Tg = ii[WS(is, 3)]; + Tm = Tf + Tg; + Te = Tc - Td; + Tp = Tl + Tm; + Th = Tf - Tg; + Tn = KP559016994 * (Tl - Tm); + } + ro[0] = T1 + T8; + io[0] = To + Tp; + { + E Ti, Tk, Tb, Tj, Ta; + Ti = FMA(KP951056516, Te, KP587785252 * Th); + Tk = FNMS(KP587785252, Te, KP951056516 * Th); + Ta = FNMS(KP250000000, T8, T1); + Tb = T9 + Ta; + Tj = Ta - T9; + ro[WS(os, 4)] = Tb - Ti; + ro[WS(os, 3)] = Tj + Tk; + ro[WS(os, 1)] = Tb + Ti; + ro[WS(os, 2)] = Tj - Tk; + } + { + E Tu, Tv, Tr, Tw, Tq; + Tu = FMA(KP951056516, Ts, KP587785252 * Tt); + Tv = FNMS(KP587785252, Ts, KP951056516 * Tt); + Tq = FNMS(KP250000000, Tp, To); + Tr = Tn + Tq; + Tw = Tq - Tn; + io[WS(os, 1)] = Tr - Tu; + io[WS(os, 3)] = Tw - Tv; + io[WS(os, 4)] = Tu + Tr; + io[WS(os, 2)] = Tv + Tw; + } + } + } +} + +static const kdft_desc desc = { 5, "n1_5", {26, 6, 6, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_5) (planner *p) { + X(kdft_register) (p, n1_5, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 6 -name n1_6 -include n.h */ + +/* + * This function contains 36 FP additions, 12 FP multiplications, + * (or, 24 additions, 0 multiplications, 12 fused multiply/add), + * 30 stack variables, 2 constants, and 24 memory accesses + */ +#include "n.h" + +static void n1_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) { + E TA, Tz; + { + E Tb, T3, Tx, Tp, Tj, Te, Ts, Ta, Tu, Ti, Tk; + { + E T1, T2, Tn, To; + T1 = ri[0]; + T2 = ri[WS(is, 3)]; + Tn = ii[0]; + To = ii[WS(is, 3)]; + { + E T4, T5, T7, T8; + T4 = ri[WS(is, 2)]; + Tb = T1 + T2; + T3 = T1 - T2; + Tx = Tn + To; + Tp = Tn - To; + T5 = ri[WS(is, 5)]; + T7 = ri[WS(is, 4)]; + T8 = ri[WS(is, 1)]; + { + E Tg, Tc, T6, Td, T9, Th; + Tg = ii[WS(is, 2)]; + Tc = T4 + T5; + T6 = T4 - T5; + Td = T7 + T8; + T9 = T7 - T8; + Th = ii[WS(is, 5)]; + Tj = ii[WS(is, 4)]; + Te = Tc + Td; + TA = Td - Tc; + Ts = T9 - T6; + Ta = T6 + T9; + Tu = Tg + Th; + Ti = Tg - Th; + Tk = ii[WS(is, 1)]; + } + } + } + ro[WS(os, 3)] = T3 + Ta; + ro[0] = Tb + Te; + { + E Tf, Tv, Tl, Ty, Tr; + Tf = FNMS(KP500000000, Ta, T3); + Tv = Tj + Tk; + Tl = Tj - Tk; + { + E Tt, Tw, Tq, Tm; + Tt = FNMS(KP500000000, Te, Tb); + Ty = Tu + Tv; + Tw = Tu - Tv; + Tq = Ti + Tl; + Tm = Ti - Tl; + io[0] = Tx + Ty; + ro[WS(os, 1)] = FMA(KP866025403, Tm, Tf); + ro[WS(os, 5)] = FNMS(KP866025403, Tm, Tf); + Tr = FNMS(KP500000000, Tq, Tp); + io[WS(os, 3)] = Tp + Tq; + ro[WS(os, 2)] = FNMS(KP866025403, Tw, Tt); + ro[WS(os, 4)] = FMA(KP866025403, Tw, Tt); + } + io[WS(os, 5)] = FNMS(KP866025403, Ts, Tr); + io[WS(os, 1)] = FMA(KP866025403, Ts, Tr); + Tz = FNMS(KP500000000, Ty, Tx); + } + } + io[WS(os, 4)] = FMA(KP866025403, TA, Tz); + io[WS(os, 2)] = FNMS(KP866025403, TA, Tz); + } + } +} + +static const kdft_desc desc = { 6, "n1_6", {24, 0, 12, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_6) (planner *p) { + X(kdft_register) (p, n1_6, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 6 -name n1_6 -include n.h */ + +/* + * This function contains 36 FP additions, 8 FP multiplications, + * (or, 32 additions, 4 multiplications, 4 fused multiply/add), + * 23 stack variables, 2 constants, and 24 memory accesses + */ +#include "n.h" + +static void n1_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) { + E T3, Tb, Tq, Tx, T6, Tc, T9, Td, Ta, Te, Ti, Tu, Tl, Tv, Tr; + E Ty; + { + E T1, T2, To, Tp; + T1 = ri[0]; + T2 = ri[WS(is, 3)]; + T3 = T1 - T2; + Tb = T1 + T2; + To = ii[0]; + Tp = ii[WS(is, 3)]; + Tq = To - Tp; + Tx = To + Tp; + } + { + E T4, T5, T7, T8; + T4 = ri[WS(is, 2)]; + T5 = ri[WS(is, 5)]; + T6 = T4 - T5; + Tc = T4 + T5; + T7 = ri[WS(is, 4)]; + T8 = ri[WS(is, 1)]; + T9 = T7 - T8; + Td = T7 + T8; + } + Ta = T6 + T9; + Te = Tc + Td; + { + E Tg, Th, Tj, Tk; + Tg = ii[WS(is, 2)]; + Th = ii[WS(is, 5)]; + Ti = Tg - Th; + Tu = Tg + Th; + Tj = ii[WS(is, 4)]; + Tk = ii[WS(is, 1)]; + Tl = Tj - Tk; + Tv = Tj + Tk; + } + Tr = Ti + Tl; + Ty = Tu + Tv; + ro[WS(os, 3)] = T3 + Ta; + io[WS(os, 3)] = Tq + Tr; + ro[0] = Tb + Te; + io[0] = Tx + Ty; + { + E Tf, Tm, Tn, Ts; + Tf = FNMS(KP500000000, Ta, T3); + Tm = KP866025403 * (Ti - Tl); + ro[WS(os, 5)] = Tf - Tm; + ro[WS(os, 1)] = Tf + Tm; + Tn = KP866025403 * (T9 - T6); + Ts = FNMS(KP500000000, Tr, Tq); + io[WS(os, 1)] = Tn + Ts; + io[WS(os, 5)] = Ts - Tn; + } + { + E Tt, Tw, Tz, TA; + Tt = FNMS(KP500000000, Te, Tb); + Tw = KP866025403 * (Tu - Tv); + ro[WS(os, 2)] = Tt - Tw; + ro[WS(os, 4)] = Tt + Tw; + Tz = FNMS(KP500000000, Ty, Tx); + TA = KP866025403 * (Td - Tc); + io[WS(os, 2)] = Tz - TA; + io[WS(os, 4)] = TA + Tz; + } + } + } +} + +static const kdft_desc desc = { 6, "n1_6", {32, 4, 4, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_6) (planner *p) { + X(kdft_register) (p, n1_6, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2981 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:49 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 64 -name n1_64 -include n.h */ + +/* + * This function contains 912 FP additions, 392 FP multiplications, + * (or, 520 additions, 0 multiplications, 392 fused multiply/add), + * 202 stack variables, 15 constants, and 256 memory accesses + */ +#include "n.h" + +static void n1_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP534511135, +0.534511135950791641089685961295362908582039528); + DK(KP303346683, +0.303346683607342391675883946941299872384187453); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP820678790, +0.820678790828660330972281985331011598767386482); + DK(KP098491403, +0.098491403357164253077197521291327432293052451); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(256, is), MAKE_VOLATILE_STRIDE(256, os)) { + E T9b, T9e; + { + E T7B, T37, T5Z, T8F, Td9, Tf, TcB, TbB, T7C, T62, TdH, T2i, Tcb, Tah, T8G; + E T3e, Tu, TdI, Tak, TbC, TbD, Tan, Tda, T2x, T65, T3m, T8I, T7G, T8J, T7J; + E T64, T3t, Tdd, TK, Tce, Tas, Tcf, Tav, Tdc, T2N, T6G, T3G, T9k, T7O, T9l; + E T7R, T6H, T3N, TdA, T1L, Tct, Tbs, Teo, Tdx, T6Y, T5j, T6V, T5Q, T9z, T8y; + E Tcw, Tbb, T9C, T8n, Tdf, TZ, Tch, Taz, Tci, TaC, Tdg, T32, T6J, T3Z, T9n; + E T7V, T9o, T7Y, T6K, T46, Tdp, T1g, Tcm, Tb1, Tej, Tdm, T6R, T4q, T6O, T4X; + E T9s, T8f, Tcp, TaK, T9v, T84, Tdn, T1v, Tcq, Tb4, Tek, Tds, T6P, T4N, T6S; + E T50, T9w, T8i, Tcn, TaV, T9t, T8b, Tdy, T20, Tcx, Tbv, Tep, TdD, T8q, T6W; + E T5G, T6Z, T5T, T8t, T9D, T8B, Tcu, Tbm, T8l, T8m; + { + E T3s, T3p, T3M, T3J; + { + E Taf, T3d, T3a, Tag; + { + E T35, T3, T5Y, T26, T5X, T6, T36, T29, Tb, T39, Ta, T38, T2d, Tc, T2e; + E T2f; + { + E T4, T5, T27, T28; + { + E T1, T2, T24, T25; + T1 = ri[0]; + T2 = ri[WS(is, 32)]; + T24 = ii[0]; + T25 = ii[WS(is, 32)]; + T4 = ri[WS(is, 16)]; + T35 = T1 - T2; + T3 = T1 + T2; + T5Y = T24 - T25; + T26 = T24 + T25; + T5 = ri[WS(is, 48)]; + T27 = ii[WS(is, 16)]; + T28 = ii[WS(is, 48)]; + } + { + E T8, T9, T2b, T2c; + T8 = ri[WS(is, 8)]; + T5X = T4 - T5; + T6 = T4 + T5; + T36 = T27 - T28; + T29 = T27 + T28; + T9 = ri[WS(is, 40)]; + T2b = ii[WS(is, 8)]; + T2c = ii[WS(is, 40)]; + Tb = ri[WS(is, 56)]; + T39 = T8 - T9; + Ta = T8 + T9; + T38 = T2b - T2c; + T2d = T2b + T2c; + Tc = ri[WS(is, 24)]; + T2e = ii[WS(is, 56)]; + T2f = ii[WS(is, 24)]; + } + } + { + E T3b, T3c, T2g, T7, Te, Tbz, Td; + T7B = T35 + T36; + T37 = T35 - T36; + T3b = Tb - Tc; + Td = Tb + Tc; + T3c = T2e - T2f; + T2g = T2e + T2f; + T5Z = T5X + T5Y; + T8F = T5Y - T5X; + Taf = T3 - T6; + T7 = T3 + T6; + Te = Ta + Td; + Tbz = Td - Ta; + { + E T2a, T60, T61, TbA, T2h; + TbA = T26 - T29; + T2a = T26 + T29; + T3d = T3b + T3c; + T60 = T3b - T3c; + Td9 = T7 - Te; + Tf = T7 + Te; + TcB = TbA - Tbz; + TbB = Tbz + TbA; + T61 = T39 + T38; + T3a = T38 - T39; + T2h = T2d + T2g; + Tag = T2d - T2g; + T7C = T61 + T60; + T62 = T60 - T61; + TdH = T2a - T2h; + T2i = T2a + T2h; + } + } + } + { + E T3j, Ti, T3h, T2l, T3g, Tl, T3k, T2o, Tq, T3q, Tp, T3o, T2s, Tr, T2t; + E T2u; + { + E Tj, Tk, T2m, T2n; + { + E Tg, Th, T2j, T2k; + Tg = ri[WS(is, 4)]; + Tcb = Taf - Tag; + Tah = Taf + Tag; + T8G = T3a + T3d; + T3e = T3a - T3d; + Th = ri[WS(is, 36)]; + T2j = ii[WS(is, 4)]; + T2k = ii[WS(is, 36)]; + Tj = ri[WS(is, 20)]; + T3j = Tg - Th; + Ti = Tg + Th; + T3h = T2j - T2k; + T2l = T2j + T2k; + Tk = ri[WS(is, 52)]; + T2m = ii[WS(is, 20)]; + T2n = ii[WS(is, 52)]; + } + { + E Tn, To, T2q, T2r; + Tn = ri[WS(is, 60)]; + T3g = Tj - Tk; + Tl = Tj + Tk; + T3k = T2m - T2n; + T2o = T2m + T2n; + To = ri[WS(is, 28)]; + T2q = ii[WS(is, 60)]; + T2r = ii[WS(is, 28)]; + Tq = ri[WS(is, 12)]; + T3q = Tn - To; + Tp = Tn + To; + T3o = T2q - T2r; + T2s = T2q + T2r; + Tr = ri[WS(is, 44)]; + T2t = ii[WS(is, 12)]; + T2u = ii[WS(is, 44)]; + } + } + { + E T3n, T3r, T2p, T2w; + { + E Tai, Tm, T2v, Tal, Tt, Taj, Ts, Tam; + Tai = Ti - Tl; + Tm = Ti + Tl; + T3n = Tq - Tr; + Ts = Tq + Tr; + T3r = T2t - T2u; + T2v = T2t + T2u; + Tal = Tp - Ts; + Tt = Tp + Ts; + Taj = T2l - T2o; + T2p = T2l + T2o; + Tam = T2s - T2v; + T2w = T2s + T2v; + Tu = Tm + Tt; + TdI = Tt - Tm; + Tak = Tai + Taj; + TbC = Taj - Tai; + TbD = Tal + Tam; + Tan = Tal - Tam; + } + { + E T7F, T7E, T3i, T3l, T7H, T7I; + T7F = T3h - T3g; + T3i = T3g + T3h; + T3l = T3j - T3k; + T7E = T3j + T3k; + Tda = T2p - T2w; + T2x = T2p + T2w; + T65 = FNMS(KP414213562, T3i, T3l); + T3m = FMA(KP414213562, T3l, T3i); + T3s = T3q - T3r; + T7H = T3q + T3r; + T7I = T3o - T3n; + T3p = T3n + T3o; + T8I = FNMS(KP414213562, T7E, T7F); + T7G = FMA(KP414213562, T7F, T7E); + T8J = FMA(KP414213562, T7H, T7I); + T7J = FNMS(KP414213562, T7I, T7H); + } + } + } + } + { + E T3H, Ty, T3x, T2B, T3w, TB, T3I, T2E, TI, T2L, T3z, TF, T3E, T3K, T2I; + E T3A; + { + E T2z, T2A, Tz, TA, Tw, Tx, T2C, T2D; + Tw = ri[WS(is, 2)]; + Tx = ri[WS(is, 34)]; + T2z = ii[WS(is, 2)]; + T64 = FMA(KP414213562, T3p, T3s); + T3t = FNMS(KP414213562, T3s, T3p); + T3H = Tw - Tx; + Ty = Tw + Tx; + T2A = ii[WS(is, 34)]; + Tz = ri[WS(is, 18)]; + TA = ri[WS(is, 50)]; + T2C = ii[WS(is, 18)]; + T3x = T2z - T2A; + T2B = T2z + T2A; + T3w = Tz - TA; + TB = Tz + TA; + T2D = ii[WS(is, 50)]; + { + E T2J, T3C, T2K, TG, TH; + TG = ri[WS(is, 58)]; + TH = ri[WS(is, 26)]; + T2J = ii[WS(is, 58)]; + T3I = T2C - T2D; + T2E = T2C + T2D; + T3C = TG - TH; + TI = TG + TH; + T2K = ii[WS(is, 26)]; + { + E T2G, T2H, TD, TE, T3D; + TD = ri[WS(is, 10)]; + TE = ri[WS(is, 42)]; + T3D = T2J - T2K; + T2L = T2J + T2K; + T2G = ii[WS(is, 10)]; + T3z = TD - TE; + TF = TD + TE; + T2H = ii[WS(is, 42)]; + T3E = T3C - T3D; + T3K = T3C + T3D; + T2I = T2G + T2H; + T3A = T2G - T2H; + } + } + } + { + E T3L, T3B, T2F, T2M; + { + E Tat, Taq, Tar, TC, TJ, Tau; + Tat = Ty - TB; + TC = Ty + TB; + TJ = TF + TI; + Taq = TI - TF; + T3L = T3A - T3z; + T3B = T3z + T3A; + Tdd = TC - TJ; + TK = TC + TJ; + Tar = T2B - T2E; + T2F = T2B + T2E; + Tau = T2I - T2L; + T2M = T2I + T2L; + Tce = Tar - Taq; + Tas = Taq + Tar; + Tcf = Tat - Tau; + Tav = Tat + Tau; + } + { + E T7M, T7Q, T7N, T3y, T3F, T7P; + T7M = T3x - T3w; + T3y = T3w + T3x; + T3F = T3B - T3E; + T7Q = T3B + T3E; + Tdc = T2F - T2M; + T2N = T2F + T2M; + T6G = FMA(KP707106781, T3F, T3y); + T3G = FNMS(KP707106781, T3F, T3y); + T7N = T3L + T3K; + T3M = T3K - T3L; + T3J = T3H - T3I; + T7P = T3H + T3I; + T9k = FNMS(KP707106781, T7N, T7M); + T7O = FMA(KP707106781, T7N, T7M); + T9l = FNMS(KP707106781, T7Q, T7P); + T7R = FMA(KP707106781, T7Q, T7P); + } + } + } + { + E T5I, T1z, Tb8, T56, T53, T1C, Tb9, T5L, T1J, Tbq, T58, T1G, T5N, T5h, Tbp; + E T5b; + { + E T54, T55, T1A, T1B, T1x, T1y, T5J, T5K; + T1x = ri[WS(is, 63)]; + T1y = ri[WS(is, 31)]; + T54 = ii[WS(is, 63)]; + T6H = FMA(KP707106781, T3M, T3J); + T3N = FNMS(KP707106781, T3M, T3J); + T5I = T1x - T1y; + T1z = T1x + T1y; + T55 = ii[WS(is, 31)]; + T1A = ri[WS(is, 15)]; + T1B = ri[WS(is, 47)]; + T5J = ii[WS(is, 15)]; + Tb8 = T54 + T55; + T56 = T54 - T55; + T53 = T1A - T1B; + T1C = T1A + T1B; + T5K = ii[WS(is, 47)]; + { + E T5e, T5d, T5f, T1H, T1I; + T1H = ri[WS(is, 55)]; + T1I = ri[WS(is, 23)]; + T5e = ii[WS(is, 55)]; + Tb9 = T5J + T5K; + T5L = T5J - T5K; + T5d = T1H - T1I; + T1J = T1H + T1I; + T5f = ii[WS(is, 23)]; + { + E T59, T5a, T1E, T1F, T5g; + T1E = ri[WS(is, 7)]; + T1F = ri[WS(is, 39)]; + T5g = T5e - T5f; + Tbq = T5e + T5f; + T59 = ii[WS(is, 7)]; + T58 = T1E - T1F; + T1G = T1E + T1F; + T5a = ii[WS(is, 39)]; + T5N = T5d + T5g; + T5h = T5d - T5g; + Tbp = T59 + T5a; + T5b = T59 - T5a; + } + } + } + { + E Tb7, T5O, Tba, T57, T5i, T8x, T8w, T5M, T5P; + { + E Tbo, T5c, Tbr, Tdw, T1D, T1K, Tdv; + Tbo = T1z - T1C; + T1D = T1z + T1C; + T1K = T1G + T1J; + Tb7 = T1J - T1G; + T5c = T58 + T5b; + T5O = T5b - T58; + TdA = T1D - T1K; + T1L = T1D + T1K; + Tbr = Tbp - Tbq; + Tdw = Tbp + Tbq; + Tba = Tb8 - Tb9; + Tdv = Tb8 + Tb9; + T8l = T56 - T53; + T57 = T53 + T56; + Tct = Tbo - Tbr; + Tbs = Tbo + Tbr; + Teo = Tdv + Tdw; + Tdx = Tdv - Tdw; + T5i = T5c - T5h; + T8x = T5c + T5h; + } + T8w = T5I + T5L; + T5M = T5I - T5L; + T5P = T5N - T5O; + T8m = T5O + T5N; + T6Y = FMA(KP707106781, T5i, T57); + T5j = FNMS(KP707106781, T5i, T57); + T6V = FMA(KP707106781, T5P, T5M); + T5Q = FNMS(KP707106781, T5P, T5M); + T9z = FNMS(KP707106781, T8x, T8w); + T8y = FMA(KP707106781, T8x, T8w); + Tcw = Tba - Tb7; + Tbb = Tb7 + Tba; + } + } + } + { + E T82, T83, T45, T42, T87, T8a; + { + E T40, TN, T3Q, T2Q, T3P, TQ, T41, T2T, TX, T30, T3S, TU, T3X, T43, T2X; + E T3T; + { + E T2O, T2P, TO, TP, TL, TM, T2R, T2S; + TL = ri[WS(is, 62)]; + TM = ri[WS(is, 30)]; + T2O = ii[WS(is, 62)]; + T9C = FNMS(KP707106781, T8m, T8l); + T8n = FMA(KP707106781, T8m, T8l); + T40 = TL - TM; + TN = TL + TM; + T2P = ii[WS(is, 30)]; + TO = ri[WS(is, 14)]; + TP = ri[WS(is, 46)]; + T2R = ii[WS(is, 14)]; + T3Q = T2O - T2P; + T2Q = T2O + T2P; + T3P = TO - TP; + TQ = TO + TP; + T2S = ii[WS(is, 46)]; + { + E T2Y, T3V, T2Z, TV, TW; + TV = ri[WS(is, 54)]; + TW = ri[WS(is, 22)]; + T2Y = ii[WS(is, 54)]; + T41 = T2R - T2S; + T2T = T2R + T2S; + T3V = TV - TW; + TX = TV + TW; + T2Z = ii[WS(is, 22)]; + { + E T2V, T2W, TS, TT, T3W; + TS = ri[WS(is, 6)]; + TT = ri[WS(is, 38)]; + T3W = T2Y - T2Z; + T30 = T2Y + T2Z; + T2V = ii[WS(is, 6)]; + T3S = TS - TT; + TU = TS + TT; + T2W = ii[WS(is, 38)]; + T3X = T3V - T3W; + T43 = T3V + T3W; + T2X = T2V + T2W; + T3T = T2V - T2W; + } + } + } + { + E T44, T3U, T2U, T31; + { + E TaA, Tax, Tay, TR, TY, TaB; + TaA = TN - TQ; + TR = TN + TQ; + TY = TU + TX; + Tax = TX - TU; + T44 = T3T - T3S; + T3U = T3S + T3T; + Tdf = TR - TY; + TZ = TR + TY; + Tay = T2Q - T2T; + T2U = T2Q + T2T; + TaB = T2X - T30; + T31 = T2X + T30; + Tch = Tay - Tax; + Taz = Tax + Tay; + Tci = TaA - TaB; + TaC = TaA + TaB; + } + { + E T7T, T7X, T7U, T3R, T3Y, T7W; + T7T = T3Q - T3P; + T3R = T3P + T3Q; + T3Y = T3U - T3X; + T7X = T3U + T3X; + Tdg = T2U - T31; + T32 = T2U + T31; + T6J = FMA(KP707106781, T3Y, T3R); + T3Z = FNMS(KP707106781, T3Y, T3R); + T7U = T44 + T43; + T45 = T43 - T44; + T42 = T40 - T41; + T7W = T40 + T41; + T9n = FNMS(KP707106781, T7U, T7T); + T7V = FMA(KP707106781, T7U, T7T); + T9o = FNMS(KP707106781, T7X, T7W); + T7Y = FMA(KP707106781, T7X, T7W); + } + } + } + { + E T4P, T14, TaH, T4d, T4a, T17, TaI, T4S, T1e, TaZ, T4f, T1b, T4U, T4o, TaY; + E T4i; + { + E T4b, T4c, T15, T16, T12, T13, T4Q, T4R; + T12 = ri[WS(is, 1)]; + T13 = ri[WS(is, 33)]; + T4b = ii[WS(is, 1)]; + T6K = FMA(KP707106781, T45, T42); + T46 = FNMS(KP707106781, T45, T42); + T4P = T12 - T13; + T14 = T12 + T13; + T4c = ii[WS(is, 33)]; + T15 = ri[WS(is, 17)]; + T16 = ri[WS(is, 49)]; + T4Q = ii[WS(is, 17)]; + TaH = T4b + T4c; + T4d = T4b - T4c; + T4a = T15 - T16; + T17 = T15 + T16; + T4R = ii[WS(is, 49)]; + { + E T4l, T4k, T4m, T1c, T1d; + T1c = ri[WS(is, 57)]; + T1d = ri[WS(is, 25)]; + T4l = ii[WS(is, 57)]; + TaI = T4Q + T4R; + T4S = T4Q - T4R; + T4k = T1c - T1d; + T1e = T1c + T1d; + T4m = ii[WS(is, 25)]; + { + E T4g, T4h, T19, T1a, T4n; + T19 = ri[WS(is, 9)]; + T1a = ri[WS(is, 41)]; + T4n = T4l - T4m; + TaZ = T4l + T4m; + T4g = ii[WS(is, 9)]; + T4f = T19 - T1a; + T1b = T19 + T1a; + T4h = ii[WS(is, 41)]; + T4U = T4k + T4n; + T4o = T4k - T4n; + TaY = T4g + T4h; + T4i = T4g - T4h; + } + } + } + { + E TaG, T4V, TaJ, T4e, T4p, T8e, T8d, T4T, T4W; + { + E TaX, T4j, Tb0, Tdl, T18, T1f, Tdk; + TaX = T14 - T17; + T18 = T14 + T17; + T1f = T1b + T1e; + TaG = T1e - T1b; + T4j = T4f + T4i; + T4V = T4i - T4f; + Tdp = T18 - T1f; + T1g = T18 + T1f; + Tb0 = TaY - TaZ; + Tdl = TaY + TaZ; + TaJ = TaH - TaI; + Tdk = TaH + TaI; + T82 = T4d - T4a; + T4e = T4a + T4d; + Tcm = TaX - Tb0; + Tb1 = TaX + Tb0; + Tej = Tdk + Tdl; + Tdm = Tdk - Tdl; + T4p = T4j - T4o; + T8e = T4j + T4o; + } + T8d = T4P + T4S; + T4T = T4P - T4S; + T4W = T4U - T4V; + T83 = T4V + T4U; + T6R = FMA(KP707106781, T4p, T4e); + T4q = FNMS(KP707106781, T4p, T4e); + T6O = FMA(KP707106781, T4W, T4T); + T4X = FNMS(KP707106781, T4W, T4T); + T9s = FNMS(KP707106781, T8e, T8d); + T8f = FMA(KP707106781, T8e, T8d); + Tcp = TaJ - TaG; + TaK = TaG + TaJ; + } + } + { + E T85, T4L, TaO, T1n, Tdq, TaN, T86, T4G, T4r, T1q, T4s, TaR, T4z, T4w, T1t; + E T4t; + { + E T4C, T1j, T4D, TaL, T4K, T4H, T1m, T4E; + { + E T4I, T4J, T1h, T1i, T1k, T1l; + T1h = ri[WS(is, 5)]; + T1i = ri[WS(is, 37)]; + T4I = ii[WS(is, 5)]; + T9v = FNMS(KP707106781, T83, T82); + T84 = FMA(KP707106781, T83, T82); + T4C = T1h - T1i; + T1j = T1h + T1i; + T4J = ii[WS(is, 37)]; + T1k = ri[WS(is, 21)]; + T1l = ri[WS(is, 53)]; + T4D = ii[WS(is, 21)]; + TaL = T4I + T4J; + T4K = T4I - T4J; + T4H = T1k - T1l; + T1m = T1k + T1l; + T4E = ii[WS(is, 53)]; + } + { + E T4x, T4y, T1r, T1s; + { + E T1o, T4F, TaM, T1p; + T1o = ri[WS(is, 61)]; + T85 = T4K - T4H; + T4L = T4H + T4K; + TaO = T1j - T1m; + T1n = T1j + T1m; + T4F = T4D - T4E; + TaM = T4D + T4E; + T1p = ri[WS(is, 29)]; + T4x = ii[WS(is, 61)]; + Tdq = TaL + TaM; + TaN = TaL - TaM; + T86 = T4C + T4F; + T4G = T4C - T4F; + T4r = T1o - T1p; + T1q = T1o + T1p; + T4y = ii[WS(is, 29)]; + } + T1r = ri[WS(is, 13)]; + T1s = ri[WS(is, 45)]; + T4s = ii[WS(is, 13)]; + TaR = T4x + T4y; + T4z = T4x - T4y; + T4w = T1r - T1s; + T1t = T1r + T1s; + T4t = ii[WS(is, 45)]; + } + } + { + E T88, TaP, T89, TaU, T4Z, T4B, T4M, T4Y, T8g, T8h; + { + E T4A, Tb2, Tdr, T4v, Tb3; + { + E TaQ, T1u, T4u, TaS, TaT; + T88 = T4z - T4w; + T4A = T4w + T4z; + TaQ = T1q - T1t; + T1u = T1q + T1t; + T4u = T4s - T4t; + TaS = T4s + T4t; + Tb2 = TaO + TaN; + TaP = TaN - TaO; + Tdr = TaR + TaS; + TaT = TaR - TaS; + T89 = T4r + T4u; + T4v = T4r - T4u; + Tdn = T1u - T1n; + T1v = T1n + T1u; + Tb3 = TaQ - TaT; + TaU = TaQ + TaT; + } + T4Z = FNMS(KP414213562, T4v, T4A); + T4B = FMA(KP414213562, T4A, T4v); + Tcq = Tb2 - Tb3; + Tb4 = Tb2 + Tb3; + Tek = Tdq + Tdr; + Tds = Tdq - Tdr; + T4M = FNMS(KP414213562, T4L, T4G); + T4Y = FMA(KP414213562, T4G, T4L); + } + T87 = FNMS(KP414213562, T86, T85); + T8g = FMA(KP414213562, T85, T86); + T6P = T4M + T4B; + T4N = T4B - T4M; + T6S = T4Y + T4Z; + T50 = T4Y - T4Z; + T8h = FNMS(KP414213562, T88, T89); + T8a = FMA(KP414213562, T89, T88); + T9w = T8g - T8h; + T8i = T8g + T8h; + Tcn = TaU - TaP; + TaV = TaP + TaU; + } + } + { + E T8o, T5E, Tbf, T1S, TdB, Tbe, T8p, T5z, T5k, T1V, T5l, Tbi, T5s, T5p, T1Y; + E T5m; + { + E T5v, T1O, T5w, Tbc, T5D, T5A, T1R, T5x; + { + E T5B, T5C, T1M, T1N, T1P, T1Q; + T1M = ri[WS(is, 3)]; + T1N = ri[WS(is, 35)]; + T5B = ii[WS(is, 3)]; + T9t = T8a - T87; + T8b = T87 + T8a; + T5v = T1M - T1N; + T1O = T1M + T1N; + T5C = ii[WS(is, 35)]; + T1P = ri[WS(is, 19)]; + T1Q = ri[WS(is, 51)]; + T5w = ii[WS(is, 19)]; + Tbc = T5B + T5C; + T5D = T5B - T5C; + T5A = T1P - T1Q; + T1R = T1P + T1Q; + T5x = ii[WS(is, 51)]; + } + { + E T5q, T5r, T1W, T1X; + { + E T1T, T5y, Tbd, T1U; + T1T = ri[WS(is, 59)]; + T8o = T5D - T5A; + T5E = T5A + T5D; + Tbf = T1O - T1R; + T1S = T1O + T1R; + T5y = T5w - T5x; + Tbd = T5w + T5x; + T1U = ri[WS(is, 27)]; + T5q = ii[WS(is, 59)]; + TdB = Tbc + Tbd; + Tbe = Tbc - Tbd; + T8p = T5v + T5y; + T5z = T5v - T5y; + T5k = T1T - T1U; + T1V = T1T + T1U; + T5r = ii[WS(is, 27)]; + } + T1W = ri[WS(is, 11)]; + T1X = ri[WS(is, 43)]; + T5l = ii[WS(is, 11)]; + Tbi = T5q + T5r; + T5s = T5q - T5r; + T5p = T1W - T1X; + T1Y = T1W + T1X; + T5m = ii[WS(is, 43)]; + } + } + { + E T8r, Tbg, T8s, Tbl, T5S, T5u, T5F, T5R, T8z, T8A; + { + E T5t, Tbt, TdC, T5o, Tbu; + { + E Tbh, T1Z, T5n, Tbj, Tbk; + T8r = T5s - T5p; + T5t = T5p + T5s; + Tbh = T1V - T1Y; + T1Z = T1V + T1Y; + T5n = T5l - T5m; + Tbj = T5l + T5m; + Tbt = Tbf + Tbe; + Tbg = Tbe - Tbf; + TdC = Tbi + Tbj; + Tbk = Tbi - Tbj; + T8s = T5k + T5n; + T5o = T5k - T5n; + Tdy = T1Z - T1S; + T20 = T1S + T1Z; + Tbu = Tbh - Tbk; + Tbl = Tbh + Tbk; + } + T5S = FNMS(KP414213562, T5o, T5t); + T5u = FMA(KP414213562, T5t, T5o); + Tcx = Tbt - Tbu; + Tbv = Tbt + Tbu; + Tep = TdB + TdC; + TdD = TdB - TdC; + T5F = FNMS(KP414213562, T5E, T5z); + T5R = FMA(KP414213562, T5z, T5E); + } + T8q = FNMS(KP414213562, T8p, T8o); + T8z = FMA(KP414213562, T8o, T8p); + T6W = T5F + T5u; + T5G = T5u - T5F; + T6Z = T5R + T5S; + T5T = T5R - T5S; + T8A = FNMS(KP414213562, T8r, T8s); + T8t = FMA(KP414213562, T8s, T8r); + T9D = T8z - T8A; + T8B = T8z + T8A; + Tcu = Tbl - Tbg; + Tbm = Tbg + Tbl; + } + } + } + { + E T9A, T8u, TbE, Tao, Td7, Td8; + { + E Teq, Ten, Tex, Teh, TeB, Tev, Tey, Tem, Te9, Tec; + { + E Tef, Teu, Tel, T11, Tei, Tet, T2y, TeI, T23, T22, T33, Teg, TeD, TeG, T34; + E TeH; + { + E TeE, TeF, Tv, T10, T1w, T21; + Tef = Tf - Tu; + Tv = Tf + Tu; + T10 = TK + TZ; + Teu = TZ - TK; + Tel = Tej - Tek; + TeE = Tej + Tek; + T9A = T8t - T8q; + T8u = T8q + T8t; + TeD = Tv - T10; + T11 = Tv + T10; + TeF = Teo + Tep; + Teq = Teo - Tep; + Tei = T1g - T1v; + T1w = T1g + T1v; + T21 = T1L + T20; + Ten = T1L - T20; + Tet = T2i - T2x; + T2y = T2i + T2x; + TeI = TeE + TeF; + TeG = TeE - TeF; + T23 = T21 - T1w; + T22 = T1w + T21; + T33 = T2N + T32; + Teg = T2N - T32; + } + ro[WS(os, 16)] = TeD + TeG; + ro[WS(os, 48)] = TeD - TeG; + ro[0] = T11 + T22; + ro[WS(os, 32)] = T11 - T22; + T34 = T2y - T33; + TeH = T2y + T33; + io[0] = TeH + TeI; + io[WS(os, 32)] = TeH - TeI; + io[WS(os, 48)] = T34 - T23; + io[WS(os, 16)] = T23 + T34; + Tex = Tef - Teg; + Teh = Tef + Teg; + TeB = Teu + Tet; + Tev = Tet - Teu; + Tey = Tel - Tei; + Tem = Tei + Tel; + } + { + E TdV, Tdb, TdJ, Te5, TdE, Tdz, Te6, Tdi, Teb, Te3, TdZ, TdY, TdW, TdM, TdR; + E Tdu; + { + E TdL, Tde, Tdh, TdK, Tez, Ter; + TdV = Td9 + Tda; + Tdb = Td9 - Tda; + TdJ = TdH - TdI; + Te5 = TdI + TdH; + Tez = Ten + Teq; + Ter = Ten - Teq; + TdL = Tdd + Tdc; + Tde = Tdc - Tdd; + { + E TeA, TeC, Tew, Tes; + TeA = Tey - Tez; + TeC = Tey + Tez; + Tew = Ter - Tem; + Tes = Tem + Ter; + ro[WS(os, 24)] = FMA(KP707106781, TeA, Tex); + ro[WS(os, 56)] = FNMS(KP707106781, TeA, Tex); + io[WS(os, 8)] = FMA(KP707106781, TeC, TeB); + io[WS(os, 40)] = FNMS(KP707106781, TeC, TeB); + io[WS(os, 24)] = FMA(KP707106781, Tew, Tev); + io[WS(os, 56)] = FNMS(KP707106781, Tew, Tev); + ro[WS(os, 8)] = FMA(KP707106781, Tes, Teh); + ro[WS(os, 40)] = FNMS(KP707106781, Tes, Teh); + Tdh = Tdf + Tdg; + TdK = Tdf - Tdg; + } + { + E Te1, Te2, Tdo, Tdt; + TdE = TdA - TdD; + Te1 = TdA + TdD; + Te2 = Tdy + Tdx; + Tdz = Tdx - Tdy; + Te6 = Tde + Tdh; + Tdi = Tde - Tdh; + Teb = FMA(KP414213562, Te1, Te2); + Te3 = FNMS(KP414213562, Te2, Te1); + TdZ = Tdn + Tdm; + Tdo = Tdm - Tdn; + Tdt = Tdp - Tds; + TdY = Tdp + Tds; + TdW = TdL + TdK; + TdM = TdK - TdL; + TdR = FNMS(KP414213562, Tdo, Tdt); + Tdu = FMA(KP414213562, Tdt, Tdo); + } + } + { + E TdT, Tea, Te0, TdU; + { + E Tdj, TdQ, TdF, TdP, TdN, TdS, TdO, TdG; + TdT = FNMS(KP707106781, Tdi, Tdb); + Tdj = FMA(KP707106781, Tdi, Tdb); + Tea = FNMS(KP414213562, TdY, TdZ); + Te0 = FMA(KP414213562, TdZ, TdY); + TdQ = FMA(KP414213562, Tdz, TdE); + TdF = FNMS(KP414213562, TdE, Tdz); + TdP = FMA(KP707106781, TdM, TdJ); + TdN = FNMS(KP707106781, TdM, TdJ); + TdS = TdQ - TdR; + TdU = TdR + TdQ; + TdO = Tdu + TdF; + TdG = Tdu - TdF; + io[WS(os, 12)] = FMA(KP923879532, TdS, TdP); + io[WS(os, 44)] = FNMS(KP923879532, TdS, TdP); + ro[WS(os, 12)] = FMA(KP923879532, TdG, Tdj); + ro[WS(os, 44)] = FNMS(KP923879532, TdG, Tdj); + io[WS(os, 60)] = FMA(KP923879532, TdO, TdN); + io[WS(os, 28)] = FNMS(KP923879532, TdO, TdN); + } + { + E Te8, Te7, Ted, Tee, TdX, Te4; + Te9 = FNMS(KP707106781, TdW, TdV); + TdX = FMA(KP707106781, TdW, TdV); + Te4 = Te0 + Te3; + Te8 = Te3 - Te0; + Te7 = FNMS(KP707106781, Te6, Te5); + Ted = FMA(KP707106781, Te6, Te5); + ro[WS(os, 60)] = FMA(KP923879532, TdU, TdT); + ro[WS(os, 28)] = FNMS(KP923879532, TdU, TdT); + ro[WS(os, 4)] = FMA(KP923879532, Te4, TdX); + ro[WS(os, 36)] = FNMS(KP923879532, Te4, TdX); + Tee = Tea + Teb; + Tec = Tea - Teb; + io[WS(os, 4)] = FMA(KP923879532, Tee, Ted); + io[WS(os, 36)] = FNMS(KP923879532, Tee, Ted); + io[WS(os, 20)] = FMA(KP923879532, Te8, Te7); + io[WS(os, 52)] = FNMS(KP923879532, Te8, Te7); + } + } + } + { + E TcP, Tcd, TcZ, TcD, Tcy, Tcv, TcT, Td0, Tck, Td4, TcX, TcS, TcK, Tcs, TcQ; + E TcG; + { + E TcF, Tcg, Tcj, TcE, TcV, TcW, Tcc, TcC, Tco, Tcr; + TbE = TbC + TbD; + Tcc = TbC - TbD; + TcC = Tan - Tak; + Tao = Tak + Tan; + TcF = FNMS(KP414213562, Tce, Tcf); + Tcg = FMA(KP414213562, Tcf, Tce); + ro[WS(os, 20)] = FMA(KP923879532, Tec, Te9); + ro[WS(os, 52)] = FNMS(KP923879532, Tec, Te9); + TcP = FNMS(KP707106781, Tcc, Tcb); + Tcd = FMA(KP707106781, Tcc, Tcb); + TcZ = FNMS(KP707106781, TcC, TcB); + TcD = FMA(KP707106781, TcC, TcB); + Tcj = FNMS(KP414213562, Tci, Tch); + TcE = FMA(KP414213562, Tch, Tci); + Tcy = FNMS(KP707106781, Tcx, Tcw); + TcV = FMA(KP707106781, Tcx, Tcw); + TcW = FMA(KP707106781, Tcu, Tct); + Tcv = FNMS(KP707106781, Tcu, Tct); + TcT = FMA(KP707106781, Tcn, Tcm); + Tco = FNMS(KP707106781, Tcn, Tcm); + Td0 = Tcg + Tcj; + Tck = Tcg - Tcj; + Td4 = FMA(KP198912367, TcV, TcW); + TcX = FNMS(KP198912367, TcW, TcV); + Tcr = FNMS(KP707106781, Tcq, Tcp); + TcS = FMA(KP707106781, Tcq, Tcp); + TcK = FNMS(KP668178637, Tco, Tcr); + Tcs = FMA(KP668178637, Tcr, Tco); + TcQ = TcF + TcE; + TcG = TcE - TcF; + } + { + E TcJ, Td5, TcU, TcM; + { + E Tcl, TcL, Tcz, TcN, TcH, TcO, TcI, TcA; + TcJ = FNMS(KP923879532, Tck, Tcd); + Tcl = FMA(KP923879532, Tck, Tcd); + Td5 = FNMS(KP198912367, TcS, TcT); + TcU = FMA(KP198912367, TcT, TcS); + TcL = FMA(KP668178637, Tcv, Tcy); + Tcz = FNMS(KP668178637, Tcy, Tcv); + TcN = FMA(KP923879532, TcG, TcD); + TcH = FNMS(KP923879532, TcG, TcD); + TcO = TcK + TcL; + TcM = TcK - TcL; + TcI = Tcz - Tcs; + TcA = Tcs + Tcz; + io[WS(os, 6)] = FMA(KP831469612, TcO, TcN); + io[WS(os, 38)] = FNMS(KP831469612, TcO, TcN); + ro[WS(os, 6)] = FMA(KP831469612, TcA, Tcl); + ro[WS(os, 38)] = FNMS(KP831469612, TcA, Tcl); + io[WS(os, 22)] = FMA(KP831469612, TcI, TcH); + io[WS(os, 54)] = FNMS(KP831469612, TcI, TcH); + } + { + E Td2, Td1, Td3, Td6, TcR, TcY; + Td7 = FMA(KP923879532, TcQ, TcP); + TcR = FNMS(KP923879532, TcQ, TcP); + TcY = TcU - TcX; + Td2 = TcU + TcX; + Td1 = FMA(KP923879532, Td0, TcZ); + Td3 = FNMS(KP923879532, Td0, TcZ); + ro[WS(os, 22)] = FMA(KP831469612, TcM, TcJ); + ro[WS(os, 54)] = FNMS(KP831469612, TcM, TcJ); + ro[WS(os, 14)] = FMA(KP980785280, TcY, TcR); + ro[WS(os, 46)] = FNMS(KP980785280, TcY, TcR); + Td6 = Td4 - Td5; + Td8 = Td5 + Td4; + io[WS(os, 14)] = FMA(KP980785280, Td6, Td3); + io[WS(os, 46)] = FNMS(KP980785280, Td6, Td3); + io[WS(os, 62)] = FMA(KP980785280, Td2, Td1); + io[WS(os, 30)] = FNMS(KP980785280, Td2, Td1); + } + } + } + } + { + E T3f, T66, T63, T3u, T7z, T7A, Tc5, Tc8; + { + E TbR, Tap, Tc1, TbF, Tbw, Tbn, TbV, Tc2, TaE, Tc7, TbZ, TbU, TbN, Tb6, TbS; + E TbI; + { + E TbH, Taw, TaD, TbG, TbX, TbY, TaW, Tb5; + TbH = FMA(KP414213562, Tas, Tav); + Taw = FNMS(KP414213562, Tav, Tas); + ro[WS(os, 62)] = FMA(KP980785280, Td8, Td7); + ro[WS(os, 30)] = FNMS(KP980785280, Td8, Td7); + TbR = FMA(KP707106781, Tao, Tah); + Tap = FNMS(KP707106781, Tao, Tah); + Tc1 = FMA(KP707106781, TbE, TbB); + TbF = FNMS(KP707106781, TbE, TbB); + TaD = FMA(KP414213562, TaC, Taz); + TbG = FNMS(KP414213562, Taz, TaC); + Tbw = FNMS(KP707106781, Tbv, Tbs); + TbX = FMA(KP707106781, Tbv, Tbs); + TbY = FMA(KP707106781, Tbm, Tbb); + Tbn = FNMS(KP707106781, Tbm, Tbb); + TbV = FMA(KP707106781, TaV, TaK); + TaW = FNMS(KP707106781, TaV, TaK); + Tc2 = Taw + TaD; + TaE = Taw - TaD; + Tc7 = FMA(KP198912367, TbX, TbY); + TbZ = FNMS(KP198912367, TbY, TbX); + Tb5 = FNMS(KP707106781, Tb4, Tb1); + TbU = FMA(KP707106781, Tb4, Tb1); + TbN = FNMS(KP668178637, TaW, Tb5); + Tb6 = FMA(KP668178637, Tb5, TaW); + TbS = TbH + TbG; + TbI = TbG - TbH; + } + { + E TbP, Tc6, TbW, TbQ; + { + E TaF, TbM, Tbx, TbL, TbJ, TbO, TbK, Tby; + TbP = FNMS(KP923879532, TaE, Tap); + TaF = FMA(KP923879532, TaE, Tap); + Tc6 = FNMS(KP198912367, TbU, TbV); + TbW = FMA(KP198912367, TbV, TbU); + TbM = FMA(KP668178637, Tbn, Tbw); + Tbx = FNMS(KP668178637, Tbw, Tbn); + TbL = FMA(KP923879532, TbI, TbF); + TbJ = FNMS(KP923879532, TbI, TbF); + TbO = TbM - TbN; + TbQ = TbN + TbM; + TbK = Tb6 + Tbx; + Tby = Tb6 - Tbx; + io[WS(os, 10)] = FMA(KP831469612, TbO, TbL); + io[WS(os, 42)] = FNMS(KP831469612, TbO, TbL); + ro[WS(os, 10)] = FMA(KP831469612, Tby, TaF); + ro[WS(os, 42)] = FNMS(KP831469612, Tby, TaF); + io[WS(os, 58)] = FMA(KP831469612, TbK, TbJ); + io[WS(os, 26)] = FNMS(KP831469612, TbK, TbJ); + } + { + E Tc4, Tc3, Tc9, Tca, TbT, Tc0; + Tc5 = FNMS(KP923879532, TbS, TbR); + TbT = FMA(KP923879532, TbS, TbR); + Tc0 = TbW + TbZ; + Tc4 = TbZ - TbW; + Tc3 = FNMS(KP923879532, Tc2, Tc1); + Tc9 = FMA(KP923879532, Tc2, Tc1); + ro[WS(os, 58)] = FMA(KP831469612, TbQ, TbP); + ro[WS(os, 26)] = FNMS(KP831469612, TbQ, TbP); + ro[WS(os, 2)] = FMA(KP980785280, Tc0, TbT); + ro[WS(os, 34)] = FNMS(KP980785280, Tc0, TbT); + Tca = Tc6 + Tc7; + Tc8 = Tc6 - Tc7; + io[WS(os, 2)] = FMA(KP980785280, Tca, Tc9); + io[WS(os, 34)] = FNMS(KP980785280, Tca, Tc9); + io[WS(os, 18)] = FMA(KP980785280, Tc4, Tc3); + io[WS(os, 50)] = FNMS(KP980785280, Tc4, Tc3); + } + } + } + { + E T7h, T6F, T70, T6X, T7x, T7m, T7w, T7p, T7s, T6M, T7c, T6U, T7r, T75, T7i; + E T78; + { + E T6T, T6Q, T77, T6I, T6L, T76, T73, T74; + { + E T7k, T7l, T6D, T6E, T7n, T7o; + T3f = FMA(KP707106781, T3e, T37); + T6D = FNMS(KP707106781, T3e, T37); + T6E = T65 + T64; + T66 = T64 - T65; + T6T = FNMS(KP923879532, T6S, T6R); + T7k = FMA(KP923879532, T6S, T6R); + ro[WS(os, 18)] = FMA(KP980785280, Tc8, Tc5); + ro[WS(os, 50)] = FNMS(KP980785280, Tc8, Tc5); + T7h = FMA(KP923879532, T6E, T6D); + T6F = FNMS(KP923879532, T6E, T6D); + T7l = FMA(KP923879532, T6P, T6O); + T6Q = FNMS(KP923879532, T6P, T6O); + T70 = FNMS(KP923879532, T6Z, T6Y); + T7n = FMA(KP923879532, T6Z, T6Y); + T7o = FMA(KP923879532, T6W, T6V); + T6X = FNMS(KP923879532, T6W, T6V); + T77 = FNMS(KP198912367, T6G, T6H); + T6I = FMA(KP198912367, T6H, T6G); + T7x = FNMS(KP098491403, T7k, T7l); + T7m = FMA(KP098491403, T7l, T7k); + T7w = FMA(KP098491403, T7n, T7o); + T7p = FNMS(KP098491403, T7o, T7n); + T6L = FNMS(KP198912367, T6K, T6J); + T76 = FMA(KP198912367, T6J, T6K); + } + T63 = FMA(KP707106781, T62, T5Z); + T73 = FNMS(KP707106781, T62, T5Z); + T7s = T6I + T6L; + T6M = T6I - T6L; + T7c = FNMS(KP820678790, T6Q, T6T); + T6U = FMA(KP820678790, T6T, T6Q); + T74 = T3m + T3t; + T3u = T3m - T3t; + T7r = FMA(KP923879532, T74, T73); + T75 = FNMS(KP923879532, T74, T73); + T7i = T77 + T76; + T78 = T76 - T77; + } + { + E T7b, T6N, T7f, T79, T71, T7d; + T7b = FNMS(KP980785280, T6M, T6F); + T6N = FMA(KP980785280, T6M, T6F); + T7f = FMA(KP980785280, T78, T75); + T79 = FNMS(KP980785280, T78, T75); + T71 = FNMS(KP820678790, T70, T6X); + T7d = FMA(KP820678790, T6X, T70); + { + E T7u, T7t, T7v, T7y, T7j, T7q; + T7z = FMA(KP980785280, T7i, T7h); + T7j = FNMS(KP980785280, T7i, T7h); + T7q = T7m - T7p; + T7u = T7m + T7p; + { + E T7g, T7e, T72, T7a; + T7g = T7c + T7d; + T7e = T7c - T7d; + T72 = T6U + T71; + T7a = T71 - T6U; + ro[WS(os, 23)] = FMA(KP773010453, T7e, T7b); + ro[WS(os, 55)] = FNMS(KP773010453, T7e, T7b); + io[WS(os, 7)] = FMA(KP773010453, T7g, T7f); + io[WS(os, 39)] = FNMS(KP773010453, T7g, T7f); + io[WS(os, 23)] = FMA(KP773010453, T7a, T79); + io[WS(os, 55)] = FNMS(KP773010453, T7a, T79); + ro[WS(os, 7)] = FMA(KP773010453, T72, T6N); + ro[WS(os, 39)] = FNMS(KP773010453, T72, T6N); + ro[WS(os, 47)] = FNMS(KP995184726, T7q, T7j); + ro[WS(os, 15)] = FMA(KP995184726, T7q, T7j); + } + T7t = FMA(KP980785280, T7s, T7r); + T7v = FNMS(KP980785280, T7s, T7r); + T7y = T7w - T7x; + T7A = T7x + T7w; + io[WS(os, 15)] = FMA(KP995184726, T7y, T7v); + io[WS(os, 47)] = FNMS(KP995184726, T7y, T7v); + io[WS(os, 63)] = FMA(KP995184726, T7u, T7t); + io[WS(os, 31)] = FNMS(KP995184726, T7u, T7t); + } + } + } + { + E T7D, T8K, T8H, T7K, Tad, Tae, T6x, T6A; + { + E T9V, T9j, T9E, T9B, Tab, Ta0, Taa, Ta3, Ta6, T9q, T9Q, T9y, Ta5, T9J, T9W; + E T9M; + { + E T9x, T9u, T9L, T9m, T9p, T9K, T9H, T9I; + { + E T9Y, T9Z, T9h, T9i, Ta1, Ta2; + T7D = FMA(KP707106781, T7C, T7B); + T9h = FNMS(KP707106781, T7C, T7B); + T9i = T8I - T8J; + T8K = T8I + T8J; + T9x = FNMS(KP923879532, T9w, T9v); + T9Y = FMA(KP923879532, T9w, T9v); + ro[WS(os, 63)] = FMA(KP995184726, T7A, T7z); + ro[WS(os, 31)] = FNMS(KP995184726, T7A, T7z); + T9V = FNMS(KP923879532, T9i, T9h); + T9j = FMA(KP923879532, T9i, T9h); + T9Z = FMA(KP923879532, T9t, T9s); + T9u = FNMS(KP923879532, T9t, T9s); + T9E = FNMS(KP923879532, T9D, T9C); + Ta1 = FMA(KP923879532, T9D, T9C); + Ta2 = FMA(KP923879532, T9A, T9z); + T9B = FNMS(KP923879532, T9A, T9z); + T9L = FNMS(KP668178637, T9k, T9l); + T9m = FMA(KP668178637, T9l, T9k); + Tab = FNMS(KP303346683, T9Y, T9Z); + Ta0 = FMA(KP303346683, T9Z, T9Y); + Taa = FMA(KP303346683, Ta1, Ta2); + Ta3 = FNMS(KP303346683, Ta2, Ta1); + T9p = FNMS(KP668178637, T9o, T9n); + T9K = FMA(KP668178637, T9n, T9o); + } + T8H = FMA(KP707106781, T8G, T8F); + T9H = FNMS(KP707106781, T8G, T8F); + Ta6 = T9m + T9p; + T9q = T9m - T9p; + T9Q = FNMS(KP534511135, T9u, T9x); + T9y = FMA(KP534511135, T9x, T9u); + T9I = T7J - T7G; + T7K = T7G + T7J; + Ta5 = FNMS(KP923879532, T9I, T9H); + T9J = FMA(KP923879532, T9I, T9H); + T9W = T9L + T9K; + T9M = T9K - T9L; + } + { + E T9P, T9r, T9T, T9N, T9F, T9R; + T9P = FNMS(KP831469612, T9q, T9j); + T9r = FMA(KP831469612, T9q, T9j); + T9T = FMA(KP831469612, T9M, T9J); + T9N = FNMS(KP831469612, T9M, T9J); + T9F = FNMS(KP534511135, T9E, T9B); + T9R = FMA(KP534511135, T9B, T9E); + { + E Ta8, Ta7, Ta9, Tac, T9X, Ta4; + Tad = FMA(KP831469612, T9W, T9V); + T9X = FNMS(KP831469612, T9W, T9V); + Ta4 = Ta0 - Ta3; + Ta8 = Ta0 + Ta3; + { + E T9U, T9S, T9G, T9O; + T9U = T9Q + T9R; + T9S = T9Q - T9R; + T9G = T9y + T9F; + T9O = T9F - T9y; + ro[WS(os, 21)] = FMA(KP881921264, T9S, T9P); + ro[WS(os, 53)] = FNMS(KP881921264, T9S, T9P); + io[WS(os, 5)] = FMA(KP881921264, T9U, T9T); + io[WS(os, 37)] = FNMS(KP881921264, T9U, T9T); + io[WS(os, 21)] = FMA(KP881921264, T9O, T9N); + io[WS(os, 53)] = FNMS(KP881921264, T9O, T9N); + ro[WS(os, 5)] = FMA(KP881921264, T9G, T9r); + ro[WS(os, 37)] = FNMS(KP881921264, T9G, T9r); + ro[WS(os, 45)] = FNMS(KP956940335, Ta4, T9X); + ro[WS(os, 13)] = FMA(KP956940335, Ta4, T9X); + } + Ta7 = FMA(KP831469612, Ta6, Ta5); + Ta9 = FNMS(KP831469612, Ta6, Ta5); + Tac = Taa - Tab; + Tae = Tab + Taa; + io[WS(os, 13)] = FMA(KP956940335, Tac, Ta9); + io[WS(os, 45)] = FNMS(KP956940335, Tac, Ta9); + io[WS(os, 61)] = FMA(KP956940335, Ta8, Ta7); + io[WS(os, 29)] = FNMS(KP956940335, Ta8, Ta7); + } + } + } + { + E T6j, T3v, T5U, T5H, T6y, T6o, T6z, T6r, T6u, T48, T6f, T52, T6t, T67, T6k; + E T6a; + { + E T51, T4O, T69, T3O, T47, T68; + { + E T6m, T6n, T6p, T6q; + T51 = FNMS(KP923879532, T50, T4X); + T6m = FMA(KP923879532, T50, T4X); + ro[WS(os, 61)] = FMA(KP956940335, Tae, Tad); + ro[WS(os, 29)] = FNMS(KP956940335, Tae, Tad); + T6j = FMA(KP923879532, T3u, T3f); + T3v = FNMS(KP923879532, T3u, T3f); + T6n = FMA(KP923879532, T4N, T4q); + T4O = FNMS(KP923879532, T4N, T4q); + T5U = FNMS(KP923879532, T5T, T5Q); + T6p = FMA(KP923879532, T5T, T5Q); + T6q = FMA(KP923879532, T5G, T5j); + T5H = FNMS(KP923879532, T5G, T5j); + T69 = FMA(KP668178637, T3G, T3N); + T3O = FNMS(KP668178637, T3N, T3G); + T6y = FNMS(KP303346683, T6m, T6n); + T6o = FMA(KP303346683, T6n, T6m); + T6z = FMA(KP303346683, T6p, T6q); + T6r = FNMS(KP303346683, T6q, T6p); + T47 = FMA(KP668178637, T46, T3Z); + T68 = FNMS(KP668178637, T3Z, T46); + } + T6u = T3O + T47; + T48 = T3O - T47; + T6f = FNMS(KP534511135, T4O, T51); + T52 = FMA(KP534511135, T51, T4O); + T6t = FMA(KP923879532, T66, T63); + T67 = FNMS(KP923879532, T66, T63); + T6k = T69 + T68; + T6a = T68 - T69; + } + { + E T6h, T49, T6d, T6b, T5V, T6e; + T6h = FNMS(KP831469612, T48, T3v); + T49 = FMA(KP831469612, T48, T3v); + T6d = FMA(KP831469612, T6a, T67); + T6b = FNMS(KP831469612, T6a, T67); + T5V = FNMS(KP534511135, T5U, T5H); + T6e = FMA(KP534511135, T5H, T5U); + { + E T6w, T6v, T6B, T6C, T6l, T6s; + T6x = FNMS(KP831469612, T6k, T6j); + T6l = FMA(KP831469612, T6k, T6j); + T6s = T6o + T6r; + T6w = T6r - T6o; + { + E T6g, T6i, T5W, T6c; + T6g = T6e - T6f; + T6i = T6f + T6e; + T5W = T52 - T5V; + T6c = T52 + T5V; + ro[WS(os, 59)] = FMA(KP881921264, T6i, T6h); + ro[WS(os, 27)] = FNMS(KP881921264, T6i, T6h); + io[WS(os, 11)] = FMA(KP881921264, T6g, T6d); + io[WS(os, 43)] = FNMS(KP881921264, T6g, T6d); + io[WS(os, 59)] = FMA(KP881921264, T6c, T6b); + io[WS(os, 27)] = FNMS(KP881921264, T6c, T6b); + ro[WS(os, 11)] = FMA(KP881921264, T5W, T49); + ro[WS(os, 43)] = FNMS(KP881921264, T5W, T49); + ro[WS(os, 35)] = FNMS(KP956940335, T6s, T6l); + ro[WS(os, 3)] = FMA(KP956940335, T6s, T6l); + } + T6v = FNMS(KP831469612, T6u, T6t); + T6B = FMA(KP831469612, T6u, T6t); + T6C = T6y + T6z; + T6A = T6y - T6z; + io[WS(os, 3)] = FMA(KP956940335, T6C, T6B); + io[WS(os, 35)] = FNMS(KP956940335, T6C, T6B); + io[WS(os, 19)] = FMA(KP956940335, T6w, T6v); + io[WS(os, 51)] = FNMS(KP956940335, T6w, T6v); + } + } + } + { + E T8X, T7L, T8C, T8v, T9c, T92, T9d, T95, T98, T80, T8T, T8k, T97, T8L, T8Y; + E T8O; + { + E T8j, T8c, T8N, T7S, T7Z, T8M; + { + E T90, T91, T93, T94; + T8j = FNMS(KP923879532, T8i, T8f); + T90 = FMA(KP923879532, T8i, T8f); + ro[WS(os, 19)] = FMA(KP956940335, T6A, T6x); + ro[WS(os, 51)] = FNMS(KP956940335, T6A, T6x); + T8X = FMA(KP923879532, T7K, T7D); + T7L = FNMS(KP923879532, T7K, T7D); + T91 = FMA(KP923879532, T8b, T84); + T8c = FNMS(KP923879532, T8b, T84); + T8C = FNMS(KP923879532, T8B, T8y); + T93 = FMA(KP923879532, T8B, T8y); + T94 = FMA(KP923879532, T8u, T8n); + T8v = FNMS(KP923879532, T8u, T8n); + T8N = FMA(KP198912367, T7O, T7R); + T7S = FNMS(KP198912367, T7R, T7O); + T9c = FNMS(KP098491403, T90, T91); + T92 = FMA(KP098491403, T91, T90); + T9d = FMA(KP098491403, T93, T94); + T95 = FNMS(KP098491403, T94, T93); + T7Z = FMA(KP198912367, T7Y, T7V); + T8M = FNMS(KP198912367, T7V, T7Y); + } + T98 = T7S + T7Z; + T80 = T7S - T7Z; + T8T = FNMS(KP820678790, T8c, T8j); + T8k = FMA(KP820678790, T8j, T8c); + T97 = FMA(KP923879532, T8K, T8H); + T8L = FNMS(KP923879532, T8K, T8H); + T8Y = T8N + T8M; + T8O = T8M - T8N; + } + { + E T8V, T81, T8R, T8P, T8D, T8S; + T8V = FNMS(KP980785280, T80, T7L); + T81 = FMA(KP980785280, T80, T7L); + T8R = FMA(KP980785280, T8O, T8L); + T8P = FNMS(KP980785280, T8O, T8L); + T8D = FNMS(KP820678790, T8C, T8v); + T8S = FMA(KP820678790, T8v, T8C); + { + E T9a, T99, T9f, T9g, T8Z, T96; + T9b = FNMS(KP980785280, T8Y, T8X); + T8Z = FMA(KP980785280, T8Y, T8X); + T96 = T92 + T95; + T9a = T95 - T92; + { + E T8U, T8W, T8E, T8Q; + T8U = T8S - T8T; + T8W = T8T + T8S; + T8E = T8k - T8D; + T8Q = T8k + T8D; + ro[WS(os, 57)] = FMA(KP773010453, T8W, T8V); + ro[WS(os, 25)] = FNMS(KP773010453, T8W, T8V); + io[WS(os, 9)] = FMA(KP773010453, T8U, T8R); + io[WS(os, 41)] = FNMS(KP773010453, T8U, T8R); + io[WS(os, 57)] = FMA(KP773010453, T8Q, T8P); + io[WS(os, 25)] = FNMS(KP773010453, T8Q, T8P); + ro[WS(os, 9)] = FMA(KP773010453, T8E, T81); + ro[WS(os, 41)] = FNMS(KP773010453, T8E, T81); + ro[WS(os, 33)] = FNMS(KP995184726, T96, T8Z); + ro[WS(os, 1)] = FMA(KP995184726, T96, T8Z); + } + T99 = FNMS(KP980785280, T98, T97); + T9f = FMA(KP980785280, T98, T97); + T9g = T9c + T9d; + T9e = T9c - T9d; + io[WS(os, 1)] = FMA(KP995184726, T9g, T9f); + io[WS(os, 33)] = FNMS(KP995184726, T9g, T9f); + io[WS(os, 17)] = FMA(KP995184726, T9a, T99); + io[WS(os, 49)] = FNMS(KP995184726, T9a, T99); + } + } + } + } + } + } + } + ro[WS(os, 17)] = FMA(KP995184726, T9e, T9b); + ro[WS(os, 49)] = FNMS(KP995184726, T9e, T9b); + } + } +} + +static const kdft_desc desc = { 64, "n1_64", {520, 0, 392, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_64) (planner *p) { + X(kdft_register) (p, n1_64, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 64 -name n1_64 -include n.h */ + +/* + * This function contains 912 FP additions, 248 FP multiplications, + * (or, 808 additions, 144 multiplications, 104 fused multiply/add), + * 172 stack variables, 15 constants, and 256 memory accesses + */ +#include "n.h" + +static void n1_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP634393284, +0.634393284163645498215171613225493370675687095); + DK(KP098017140, +0.098017140329560601994195563888641845861136673); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP471396736, +0.471396736825997648556387625905254377657460319); + DK(KP290284677, +0.290284677254462367636192375817395274691476278); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(256, is), MAKE_VOLATILE_STRIDE(256, os)) { + E T37, T7B, T8F, T5Z, Tf, Td9, TbB, TcB, T62, T7C, T2i, TdH, Tah, Tcb, T3e; + E T8G, Tu, TdI, Tak, TbD, Tan, TbC, T2x, Tda, T3m, T65, T7G, T8J, T7J, T8I; + E T3t, T64, TK, Tdd, Tas, Tce, Tav, Tcf, T2N, Tdc, T3G, T6G, T7O, T9k, T7R; + E T9l, T3N, T6H, T1L, Tdv, Tbs, Tcw, TdC, Teo, T5j, T6V, T5Q, T6Y, T8y, T9C; + E Tbb, Tct, T8n, T9z, TZ, Tdf, Taz, Tch, TaC, Tci, T32, Tdg, T3Z, T6J, T7V; + E T9n, T7Y, T9o, T46, T6K, T1g, Tdp, Tb1, Tcm, Tdm, Tej, T4q, T6R, T4X, T6O; + E T8f, T9s, TaK, Tcp, T84, T9v, T1v, Tdn, Tb4, Tcq, Tds, Tek, T4N, T6P, T50; + E T6S, T8i, T9w, TaV, Tcn, T8b, T9t, T20, TdD, Tbv, Tcu, Tdy, Tep, T5G, T6Z; + E T5T, T6W, T8B, T9A, Tbm, Tcx, T8u, T9D; + { + E T3, T35, T26, T5Y, T6, T5X, T29, T36, Ta, T39, T2d, T38, Td, T3b, T2g; + E T3c; + { + E T1, T2, T24, T25; + T1 = ri[0]; + T2 = ri[WS(is, 32)]; + T3 = T1 + T2; + T35 = T1 - T2; + T24 = ii[0]; + T25 = ii[WS(is, 32)]; + T26 = T24 + T25; + T5Y = T24 - T25; + } + { + E T4, T5, T27, T28; + T4 = ri[WS(is, 16)]; + T5 = ri[WS(is, 48)]; + T6 = T4 + T5; + T5X = T4 - T5; + T27 = ii[WS(is, 16)]; + T28 = ii[WS(is, 48)]; + T29 = T27 + T28; + T36 = T27 - T28; + } + { + E T8, T9, T2b, T2c; + T8 = ri[WS(is, 8)]; + T9 = ri[WS(is, 40)]; + Ta = T8 + T9; + T39 = T8 - T9; + T2b = ii[WS(is, 8)]; + T2c = ii[WS(is, 40)]; + T2d = T2b + T2c; + T38 = T2b - T2c; + } + { + E Tb, Tc, T2e, T2f; + Tb = ri[WS(is, 56)]; + Tc = ri[WS(is, 24)]; + Td = Tb + Tc; + T3b = Tb - Tc; + T2e = ii[WS(is, 56)]; + T2f = ii[WS(is, 24)]; + T2g = T2e + T2f; + T3c = T2e - T2f; + } + { + E T7, Te, T2a, T2h; + T37 = T35 - T36; + T7B = T35 + T36; + T8F = T5Y - T5X; + T5Z = T5X + T5Y; + T7 = T3 + T6; + Te = Ta + Td; + Tf = T7 + Te; + Td9 = T7 - Te; + { + E Tbz, TbA, T60, T61; + Tbz = T26 - T29; + TbA = Td - Ta; + TbB = Tbz - TbA; + TcB = TbA + Tbz; + T60 = T3b - T3c; + T61 = T39 + T38; + T62 = KP707106781 * (T60 - T61); + T7C = KP707106781 * (T61 + T60); + } + T2a = T26 + T29; + T2h = T2d + T2g; + T2i = T2a + T2h; + TdH = T2a - T2h; + { + E Taf, Tag, T3a, T3d; + Taf = T3 - T6; + Tag = T2d - T2g; + Tah = Taf - Tag; + Tcb = Taf + Tag; + T3a = T38 - T39; + T3d = T3b + T3c; + T3e = KP707106781 * (T3a - T3d); + T8G = KP707106781 * (T3a + T3d); + } + } + } + { + E Ti, T3j, T2l, T3h, Tl, T3g, T2o, T3k, Tp, T3q, T2s, T3o, Ts, T3n, T2v; + E T3r; + { + E Tg, Th, T2j, T2k; + Tg = ri[WS(is, 4)]; + Th = ri[WS(is, 36)]; + Ti = Tg + Th; + T3j = Tg - Th; + T2j = ii[WS(is, 4)]; + T2k = ii[WS(is, 36)]; + T2l = T2j + T2k; + T3h = T2j - T2k; + } + { + E Tj, Tk, T2m, T2n; + Tj = ri[WS(is, 20)]; + Tk = ri[WS(is, 52)]; + Tl = Tj + Tk; + T3g = Tj - Tk; + T2m = ii[WS(is, 20)]; + T2n = ii[WS(is, 52)]; + T2o = T2m + T2n; + T3k = T2m - T2n; + } + { + E Tn, To, T2q, T2r; + Tn = ri[WS(is, 60)]; + To = ri[WS(is, 28)]; + Tp = Tn + To; + T3q = Tn - To; + T2q = ii[WS(is, 60)]; + T2r = ii[WS(is, 28)]; + T2s = T2q + T2r; + T3o = T2q - T2r; + } + { + E Tq, Tr, T2t, T2u; + Tq = ri[WS(is, 12)]; + Tr = ri[WS(is, 44)]; + Ts = Tq + Tr; + T3n = Tq - Tr; + T2t = ii[WS(is, 12)]; + T2u = ii[WS(is, 44)]; + T2v = T2t + T2u; + T3r = T2t - T2u; + } + { + E Tm, Tt, Tai, Taj; + Tm = Ti + Tl; + Tt = Tp + Ts; + Tu = Tm + Tt; + TdI = Tt - Tm; + Tai = T2l - T2o; + Taj = Ti - Tl; + Tak = Tai - Taj; + TbD = Taj + Tai; + } + { + E Tal, Tam, T2p, T2w; + Tal = Tp - Ts; + Tam = T2s - T2v; + Tan = Tal + Tam; + TbC = Tal - Tam; + T2p = T2l + T2o; + T2w = T2s + T2v; + T2x = T2p + T2w; + Tda = T2p - T2w; + } + { + E T3i, T3l, T7E, T7F; + T3i = T3g + T3h; + T3l = T3j - T3k; + T3m = FNMS(KP923879532, T3l, KP382683432 * T3i); + T65 = FMA(KP923879532, T3i, KP382683432 * T3l); + T7E = T3h - T3g; + T7F = T3j + T3k; + T7G = FNMS(KP382683432, T7F, KP923879532 * T7E); + T8J = FMA(KP382683432, T7E, KP923879532 * T7F); + } + { + E T7H, T7I, T3p, T3s; + T7H = T3o - T3n; + T7I = T3q + T3r; + T7J = FMA(KP923879532, T7H, KP382683432 * T7I); + T8I = FNMS(KP382683432, T7H, KP923879532 * T7I); + T3p = T3n + T3o; + T3s = T3q - T3r; + T3t = FMA(KP382683432, T3p, KP923879532 * T3s); + T64 = FNMS(KP923879532, T3p, KP382683432 * T3s); + } + } + { + E Ty, T3H, T2B, T3x, TB, T3w, T2E, T3I, TI, T3L, T2L, T3B, TF, T3K, T2I; + E T3E; + { + E Tw, Tx, T2C, T2D; + Tw = ri[WS(is, 2)]; + Tx = ri[WS(is, 34)]; + Ty = Tw + Tx; + T3H = Tw - Tx; + { + E T2z, T2A, Tz, TA; + T2z = ii[WS(is, 2)]; + T2A = ii[WS(is, 34)]; + T2B = T2z + T2A; + T3x = T2z - T2A; + Tz = ri[WS(is, 18)]; + TA = ri[WS(is, 50)]; + TB = Tz + TA; + T3w = Tz - TA; + } + T2C = ii[WS(is, 18)]; + T2D = ii[WS(is, 50)]; + T2E = T2C + T2D; + T3I = T2C - T2D; + { + E TG, TH, T3z, T2J, T2K, T3A; + TG = ri[WS(is, 58)]; + TH = ri[WS(is, 26)]; + T3z = TG - TH; + T2J = ii[WS(is, 58)]; + T2K = ii[WS(is, 26)]; + T3A = T2J - T2K; + TI = TG + TH; + T3L = T3z + T3A; + T2L = T2J + T2K; + T3B = T3z - T3A; + } + { + E TD, TE, T3C, T2G, T2H, T3D; + TD = ri[WS(is, 10)]; + TE = ri[WS(is, 42)]; + T3C = TD - TE; + T2G = ii[WS(is, 10)]; + T2H = ii[WS(is, 42)]; + T3D = T2G - T2H; + TF = TD + TE; + T3K = T3D - T3C; + T2I = T2G + T2H; + T3E = T3C + T3D; + } + } + { + E TC, TJ, Taq, Tar; + TC = Ty + TB; + TJ = TF + TI; + TK = TC + TJ; + Tdd = TC - TJ; + Taq = T2B - T2E; + Tar = TI - TF; + Tas = Taq - Tar; + Tce = Tar + Taq; + } + { + E Tat, Tau, T2F, T2M; + Tat = Ty - TB; + Tau = T2I - T2L; + Tav = Tat - Tau; + Tcf = Tat + Tau; + T2F = T2B + T2E; + T2M = T2I + T2L; + T2N = T2F + T2M; + Tdc = T2F - T2M; + } + { + E T3y, T3F, T7M, T7N; + T3y = T3w + T3x; + T3F = KP707106781 * (T3B - T3E); + T3G = T3y - T3F; + T6G = T3y + T3F; + T7M = T3x - T3w; + T7N = KP707106781 * (T3K + T3L); + T7O = T7M - T7N; + T9k = T7M + T7N; + } + { + E T7P, T7Q, T3J, T3M; + T7P = T3H + T3I; + T7Q = KP707106781 * (T3E + T3B); + T7R = T7P - T7Q; + T9l = T7P + T7Q; + T3J = T3H - T3I; + T3M = KP707106781 * (T3K - T3L); + T3N = T3J - T3M; + T6H = T3J + T3M; + } + } + { + E T1z, T53, T5L, Tbo, T1C, T5I, T56, Tbp, T1J, Tb9, T5h, T5N, T1G, Tb8, T5c; + E T5O; + { + E T1x, T1y, T54, T55; + T1x = ri[WS(is, 63)]; + T1y = ri[WS(is, 31)]; + T1z = T1x + T1y; + T53 = T1x - T1y; + { + E T5J, T5K, T1A, T1B; + T5J = ii[WS(is, 63)]; + T5K = ii[WS(is, 31)]; + T5L = T5J - T5K; + Tbo = T5J + T5K; + T1A = ri[WS(is, 15)]; + T1B = ri[WS(is, 47)]; + T1C = T1A + T1B; + T5I = T1A - T1B; + } + T54 = ii[WS(is, 15)]; + T55 = ii[WS(is, 47)]; + T56 = T54 - T55; + Tbp = T54 + T55; + { + E T1H, T1I, T5d, T5e, T5f, T5g; + T1H = ri[WS(is, 55)]; + T1I = ri[WS(is, 23)]; + T5d = T1H - T1I; + T5e = ii[WS(is, 55)]; + T5f = ii[WS(is, 23)]; + T5g = T5e - T5f; + T1J = T1H + T1I; + Tb9 = T5e + T5f; + T5h = T5d + T5g; + T5N = T5d - T5g; + } + { + E T1E, T1F, T5b, T58, T59, T5a; + T1E = ri[WS(is, 7)]; + T1F = ri[WS(is, 39)]; + T5b = T1E - T1F; + T58 = ii[WS(is, 7)]; + T59 = ii[WS(is, 39)]; + T5a = T58 - T59; + T1G = T1E + T1F; + Tb8 = T58 + T59; + T5c = T5a - T5b; + T5O = T5b + T5a; + } + } + { + E T1D, T1K, Tbq, Tbr; + T1D = T1z + T1C; + T1K = T1G + T1J; + T1L = T1D + T1K; + Tdv = T1D - T1K; + Tbq = Tbo - Tbp; + Tbr = T1J - T1G; + Tbs = Tbq - Tbr; + Tcw = Tbr + Tbq; + } + { + E TdA, TdB, T57, T5i; + TdA = Tbo + Tbp; + TdB = Tb8 + Tb9; + TdC = TdA - TdB; + Teo = TdA + TdB; + T57 = T53 - T56; + T5i = KP707106781 * (T5c - T5h); + T5j = T57 - T5i; + T6V = T57 + T5i; + } + { + E T5M, T5P, T8w, T8x; + T5M = T5I + T5L; + T5P = KP707106781 * (T5N - T5O); + T5Q = T5M - T5P; + T6Y = T5M + T5P; + T8w = T5L - T5I; + T8x = KP707106781 * (T5c + T5h); + T8y = T8w - T8x; + T9C = T8w + T8x; + } + { + E Tb7, Tba, T8l, T8m; + Tb7 = T1z - T1C; + Tba = Tb8 - Tb9; + Tbb = Tb7 - Tba; + Tct = Tb7 + Tba; + T8l = T53 + T56; + T8m = KP707106781 * (T5O + T5N); + T8n = T8l - T8m; + T9z = T8l + T8m; + } + } + { + E TN, T40, T2Q, T3Q, TQ, T3P, T2T, T41, TX, T44, T30, T3U, TU, T43, T2X; + E T3X; + { + E TL, TM, T2R, T2S; + TL = ri[WS(is, 62)]; + TM = ri[WS(is, 30)]; + TN = TL + TM; + T40 = TL - TM; + { + E T2O, T2P, TO, TP; + T2O = ii[WS(is, 62)]; + T2P = ii[WS(is, 30)]; + T2Q = T2O + T2P; + T3Q = T2O - T2P; + TO = ri[WS(is, 14)]; + TP = ri[WS(is, 46)]; + TQ = TO + TP; + T3P = TO - TP; + } + T2R = ii[WS(is, 14)]; + T2S = ii[WS(is, 46)]; + T2T = T2R + T2S; + T41 = T2R - T2S; + { + E TV, TW, T3S, T2Y, T2Z, T3T; + TV = ri[WS(is, 54)]; + TW = ri[WS(is, 22)]; + T3S = TV - TW; + T2Y = ii[WS(is, 54)]; + T2Z = ii[WS(is, 22)]; + T3T = T2Y - T2Z; + TX = TV + TW; + T44 = T3S + T3T; + T30 = T2Y + T2Z; + T3U = T3S - T3T; + } + { + E TS, TT, T3V, T2V, T2W, T3W; + TS = ri[WS(is, 6)]; + TT = ri[WS(is, 38)]; + T3V = TS - TT; + T2V = ii[WS(is, 6)]; + T2W = ii[WS(is, 38)]; + T3W = T2V - T2W; + TU = TS + TT; + T43 = T3W - T3V; + T2X = T2V + T2W; + T3X = T3V + T3W; + } + } + { + E TR, TY, Tax, Tay; + TR = TN + TQ; + TY = TU + TX; + TZ = TR + TY; + Tdf = TR - TY; + Tax = T2Q - T2T; + Tay = TX - TU; + Taz = Tax - Tay; + Tch = Tay + Tax; + } + { + E TaA, TaB, T2U, T31; + TaA = TN - TQ; + TaB = T2X - T30; + TaC = TaA - TaB; + Tci = TaA + TaB; + T2U = T2Q + T2T; + T31 = T2X + T30; + T32 = T2U + T31; + Tdg = T2U - T31; + } + { + E T3R, T3Y, T7T, T7U; + T3R = T3P + T3Q; + T3Y = KP707106781 * (T3U - T3X); + T3Z = T3R - T3Y; + T6J = T3R + T3Y; + T7T = T40 + T41; + T7U = KP707106781 * (T3X + T3U); + T7V = T7T - T7U; + T9n = T7T + T7U; + } + { + E T7W, T7X, T42, T45; + T7W = T3Q - T3P; + T7X = KP707106781 * (T43 + T44); + T7Y = T7W - T7X; + T9o = T7W + T7X; + T42 = T40 - T41; + T45 = KP707106781 * (T43 - T44); + T46 = T42 - T45; + T6K = T42 + T45; + } + } + { + E T14, T4P, T4d, TaG, T17, T4a, T4S, TaH, T1e, TaZ, T4j, T4V, T1b, TaY, T4o; + E T4U; + { + E T12, T13, T4Q, T4R; + T12 = ri[WS(is, 1)]; + T13 = ri[WS(is, 33)]; + T14 = T12 + T13; + T4P = T12 - T13; + { + E T4b, T4c, T15, T16; + T4b = ii[WS(is, 1)]; + T4c = ii[WS(is, 33)]; + T4d = T4b - T4c; + TaG = T4b + T4c; + T15 = ri[WS(is, 17)]; + T16 = ri[WS(is, 49)]; + T17 = T15 + T16; + T4a = T15 - T16; + } + T4Q = ii[WS(is, 17)]; + T4R = ii[WS(is, 49)]; + T4S = T4Q - T4R; + TaH = T4Q + T4R; + { + E T1c, T1d, T4f, T4g, T4h, T4i; + T1c = ri[WS(is, 57)]; + T1d = ri[WS(is, 25)]; + T4f = T1c - T1d; + T4g = ii[WS(is, 57)]; + T4h = ii[WS(is, 25)]; + T4i = T4g - T4h; + T1e = T1c + T1d; + TaZ = T4g + T4h; + T4j = T4f - T4i; + T4V = T4f + T4i; + } + { + E T19, T1a, T4k, T4l, T4m, T4n; + T19 = ri[WS(is, 9)]; + T1a = ri[WS(is, 41)]; + T4k = T19 - T1a; + T4l = ii[WS(is, 9)]; + T4m = ii[WS(is, 41)]; + T4n = T4l - T4m; + T1b = T19 + T1a; + TaY = T4l + T4m; + T4o = T4k + T4n; + T4U = T4n - T4k; + } + } + { + E T18, T1f, TaX, Tb0; + T18 = T14 + T17; + T1f = T1b + T1e; + T1g = T18 + T1f; + Tdp = T18 - T1f; + TaX = T14 - T17; + Tb0 = TaY - TaZ; + Tb1 = TaX - Tb0; + Tcm = TaX + Tb0; + } + { + E Tdk, Tdl, T4e, T4p; + Tdk = TaG + TaH; + Tdl = TaY + TaZ; + Tdm = Tdk - Tdl; + Tej = Tdk + Tdl; + T4e = T4a + T4d; + T4p = KP707106781 * (T4j - T4o); + T4q = T4e - T4p; + T6R = T4e + T4p; + } + { + E T4T, T4W, T8d, T8e; + T4T = T4P - T4S; + T4W = KP707106781 * (T4U - T4V); + T4X = T4T - T4W; + T6O = T4T + T4W; + T8d = T4P + T4S; + T8e = KP707106781 * (T4o + T4j); + T8f = T8d - T8e; + T9s = T8d + T8e; + } + { + E TaI, TaJ, T82, T83; + TaI = TaG - TaH; + TaJ = T1e - T1b; + TaK = TaI - TaJ; + Tcp = TaJ + TaI; + T82 = T4d - T4a; + T83 = KP707106781 * (T4U + T4V); + T84 = T82 - T83; + T9v = T82 + T83; + } + } + { + E T1j, TaR, T1m, TaS, T4G, T4L, TaT, TaQ, T89, T88, T1q, TaM, T1t, TaN, T4v; + E T4A, TaO, TaL, T86, T85; + { + E T4H, T4F, T4C, T4K; + { + E T1h, T1i, T4D, T4E; + T1h = ri[WS(is, 5)]; + T1i = ri[WS(is, 37)]; + T1j = T1h + T1i; + T4H = T1h - T1i; + T4D = ii[WS(is, 5)]; + T4E = ii[WS(is, 37)]; + T4F = T4D - T4E; + TaR = T4D + T4E; + } + { + E T1k, T1l, T4I, T4J; + T1k = ri[WS(is, 21)]; + T1l = ri[WS(is, 53)]; + T1m = T1k + T1l; + T4C = T1k - T1l; + T4I = ii[WS(is, 21)]; + T4J = ii[WS(is, 53)]; + T4K = T4I - T4J; + TaS = T4I + T4J; + } + T4G = T4C + T4F; + T4L = T4H - T4K; + TaT = TaR - TaS; + TaQ = T1j - T1m; + T89 = T4H + T4K; + T88 = T4F - T4C; + } + { + E T4r, T4z, T4w, T4u; + { + E T1o, T1p, T4x, T4y; + T1o = ri[WS(is, 61)]; + T1p = ri[WS(is, 29)]; + T1q = T1o + T1p; + T4r = T1o - T1p; + T4x = ii[WS(is, 61)]; + T4y = ii[WS(is, 29)]; + T4z = T4x - T4y; + TaM = T4x + T4y; + } + { + E T1r, T1s, T4s, T4t; + T1r = ri[WS(is, 13)]; + T1s = ri[WS(is, 45)]; + T1t = T1r + T1s; + T4w = T1r - T1s; + T4s = ii[WS(is, 13)]; + T4t = ii[WS(is, 45)]; + T4u = T4s - T4t; + TaN = T4s + T4t; + } + T4v = T4r - T4u; + T4A = T4w + T4z; + TaO = TaM - TaN; + TaL = T1q - T1t; + T86 = T4z - T4w; + T85 = T4r + T4u; + } + { + E T1n, T1u, Tb2, Tb3; + T1n = T1j + T1m; + T1u = T1q + T1t; + T1v = T1n + T1u; + Tdn = T1u - T1n; + Tb2 = TaT - TaQ; + Tb3 = TaL + TaO; + Tb4 = KP707106781 * (Tb2 - Tb3); + Tcq = KP707106781 * (Tb2 + Tb3); + } + { + E Tdq, Tdr, T4B, T4M; + Tdq = TaR + TaS; + Tdr = TaM + TaN; + Tds = Tdq - Tdr; + Tek = Tdq + Tdr; + T4B = FNMS(KP923879532, T4A, KP382683432 * T4v); + T4M = FMA(KP923879532, T4G, KP382683432 * T4L); + T4N = T4B - T4M; + T6P = T4M + T4B; + } + { + E T4Y, T4Z, T8g, T8h; + T4Y = FNMS(KP923879532, T4L, KP382683432 * T4G); + T4Z = FMA(KP382683432, T4A, KP923879532 * T4v); + T50 = T4Y - T4Z; + T6S = T4Y + T4Z; + T8g = FNMS(KP382683432, T89, KP923879532 * T88); + T8h = FMA(KP923879532, T86, KP382683432 * T85); + T8i = T8g - T8h; + T9w = T8g + T8h; + } + { + E TaP, TaU, T87, T8a; + TaP = TaL - TaO; + TaU = TaQ + TaT; + TaV = KP707106781 * (TaP - TaU); + Tcn = KP707106781 * (TaU + TaP); + T87 = FNMS(KP382683432, T86, KP923879532 * T85); + T8a = FMA(KP382683432, T88, KP923879532 * T89); + T8b = T87 - T8a; + T9t = T8a + T87; + } + } + { + E T1O, Tbc, T1R, Tbd, T5o, T5t, Tbf, Tbe, T8p, T8o, T1V, Tbi, T1Y, Tbj, T5z; + E T5E, Tbk, Tbh, T8s, T8r; + { + E T5p, T5n, T5k, T5s; + { + E T1M, T1N, T5l, T5m; + T1M = ri[WS(is, 3)]; + T1N = ri[WS(is, 35)]; + T1O = T1M + T1N; + T5p = T1M - T1N; + T5l = ii[WS(is, 3)]; + T5m = ii[WS(is, 35)]; + T5n = T5l - T5m; + Tbc = T5l + T5m; + } + { + E T1P, T1Q, T5q, T5r; + T1P = ri[WS(is, 19)]; + T1Q = ri[WS(is, 51)]; + T1R = T1P + T1Q; + T5k = T1P - T1Q; + T5q = ii[WS(is, 19)]; + T5r = ii[WS(is, 51)]; + T5s = T5q - T5r; + Tbd = T5q + T5r; + } + T5o = T5k + T5n; + T5t = T5p - T5s; + Tbf = T1O - T1R; + Tbe = Tbc - Tbd; + T8p = T5p + T5s; + T8o = T5n - T5k; + } + { + E T5A, T5y, T5v, T5D; + { + E T1T, T1U, T5w, T5x; + T1T = ri[WS(is, 59)]; + T1U = ri[WS(is, 27)]; + T1V = T1T + T1U; + T5A = T1T - T1U; + T5w = ii[WS(is, 59)]; + T5x = ii[WS(is, 27)]; + T5y = T5w - T5x; + Tbi = T5w + T5x; + } + { + E T1W, T1X, T5B, T5C; + T1W = ri[WS(is, 11)]; + T1X = ri[WS(is, 43)]; + T1Y = T1W + T1X; + T5v = T1W - T1X; + T5B = ii[WS(is, 11)]; + T5C = ii[WS(is, 43)]; + T5D = T5B - T5C; + Tbj = T5B + T5C; + } + T5z = T5v + T5y; + T5E = T5A - T5D; + Tbk = Tbi - Tbj; + Tbh = T1V - T1Y; + T8s = T5A + T5D; + T8r = T5y - T5v; + } + { + E T1S, T1Z, Tbt, Tbu; + T1S = T1O + T1R; + T1Z = T1V + T1Y; + T20 = T1S + T1Z; + TdD = T1Z - T1S; + Tbt = Tbh - Tbk; + Tbu = Tbf + Tbe; + Tbv = KP707106781 * (Tbt - Tbu); + Tcu = KP707106781 * (Tbu + Tbt); + } + { + E Tdw, Tdx, T5u, T5F; + Tdw = Tbc + Tbd; + Tdx = Tbi + Tbj; + Tdy = Tdw - Tdx; + Tep = Tdw + Tdx; + T5u = FNMS(KP923879532, T5t, KP382683432 * T5o); + T5F = FMA(KP382683432, T5z, KP923879532 * T5E); + T5G = T5u - T5F; + T6Z = T5u + T5F; + } + { + E T5R, T5S, T8z, T8A; + T5R = FNMS(KP923879532, T5z, KP382683432 * T5E); + T5S = FMA(KP923879532, T5o, KP382683432 * T5t); + T5T = T5R - T5S; + T6W = T5S + T5R; + T8z = FNMS(KP382683432, T8r, KP923879532 * T8s); + T8A = FMA(KP382683432, T8o, KP923879532 * T8p); + T8B = T8z - T8A; + T9A = T8A + T8z; + } + { + E Tbg, Tbl, T8q, T8t; + Tbg = Tbe - Tbf; + Tbl = Tbh + Tbk; + Tbm = KP707106781 * (Tbg - Tbl); + Tcx = KP707106781 * (Tbg + Tbl); + T8q = FNMS(KP382683432, T8p, KP923879532 * T8o); + T8t = FMA(KP923879532, T8r, KP382683432 * T8s); + T8u = T8q - T8t; + T9D = T8q + T8t; + } + } + { + E T11, TeD, TeG, TeI, T22, T23, T34, TeH; + { + E Tv, T10, TeE, TeF; + Tv = Tf + Tu; + T10 = TK + TZ; + T11 = Tv + T10; + TeD = Tv - T10; + TeE = Tej + Tek; + TeF = Teo + Tep; + TeG = TeE - TeF; + TeI = TeE + TeF; + } + { + E T1w, T21, T2y, T33; + T1w = T1g + T1v; + T21 = T1L + T20; + T22 = T1w + T21; + T23 = T21 - T1w; + T2y = T2i + T2x; + T33 = T2N + T32; + T34 = T2y - T33; + TeH = T2y + T33; + } + ro[WS(os, 32)] = T11 - T22; + io[WS(os, 32)] = TeH - TeI; + ro[0] = T11 + T22; + io[0] = TeH + TeI; + io[WS(os, 16)] = T23 + T34; + ro[WS(os, 16)] = TeD + TeG; + io[WS(os, 48)] = T34 - T23; + ro[WS(os, 48)] = TeD - TeG; + } + { + E Teh, Tex, Tev, TeB, Tem, Tey, Ter, Tez; + { + E Tef, Teg, Tet, Teu; + Tef = Tf - Tu; + Teg = T2N - T32; + Teh = Tef + Teg; + Tex = Tef - Teg; + Tet = T2i - T2x; + Teu = TZ - TK; + Tev = Tet - Teu; + TeB = Teu + Tet; + } + { + E Tei, Tel, Ten, Teq; + Tei = T1g - T1v; + Tel = Tej - Tek; + Tem = Tei + Tel; + Tey = Tel - Tei; + Ten = T1L - T20; + Teq = Teo - Tep; + Ter = Ten - Teq; + Tez = Ten + Teq; + } + { + E Tes, TeC, Tew, TeA; + Tes = KP707106781 * (Tem + Ter); + ro[WS(os, 40)] = Teh - Tes; + ro[WS(os, 8)] = Teh + Tes; + TeC = KP707106781 * (Tey + Tez); + io[WS(os, 40)] = TeB - TeC; + io[WS(os, 8)] = TeB + TeC; + Tew = KP707106781 * (Ter - Tem); + io[WS(os, 56)] = Tev - Tew; + io[WS(os, 24)] = Tev + Tew; + TeA = KP707106781 * (Tey - Tez); + ro[WS(os, 56)] = Tex - TeA; + ro[WS(os, 24)] = Tex + TeA; + } + } + { + E Tdb, TdV, Te5, TdJ, Tdi, Te6, Te3, Teb, TdM, TdW, Tdu, TdQ, Te0, Tea, TdF; + E TdR; + { + E Tde, Tdh, Tdo, Tdt; + Tdb = Td9 - Tda; + TdV = Td9 + Tda; + Te5 = TdI + TdH; + TdJ = TdH - TdI; + Tde = Tdc - Tdd; + Tdh = Tdf + Tdg; + Tdi = KP707106781 * (Tde - Tdh); + Te6 = KP707106781 * (Tde + Tdh); + { + E Te1, Te2, TdK, TdL; + Te1 = Tdv + Tdy; + Te2 = TdD + TdC; + Te3 = FNMS(KP382683432, Te2, KP923879532 * Te1); + Teb = FMA(KP923879532, Te2, KP382683432 * Te1); + TdK = Tdf - Tdg; + TdL = Tdd + Tdc; + TdM = KP707106781 * (TdK - TdL); + TdW = KP707106781 * (TdL + TdK); + } + Tdo = Tdm - Tdn; + Tdt = Tdp - Tds; + Tdu = FMA(KP923879532, Tdo, KP382683432 * Tdt); + TdQ = FNMS(KP923879532, Tdt, KP382683432 * Tdo); + { + E TdY, TdZ, Tdz, TdE; + TdY = Tdn + Tdm; + TdZ = Tdp + Tds; + Te0 = FMA(KP382683432, TdY, KP923879532 * TdZ); + Tea = FNMS(KP382683432, TdZ, KP923879532 * TdY); + Tdz = Tdv - Tdy; + TdE = TdC - TdD; + TdF = FNMS(KP923879532, TdE, KP382683432 * Tdz); + TdR = FMA(KP382683432, TdE, KP923879532 * Tdz); + } + } + { + E Tdj, TdG, TdT, TdU; + Tdj = Tdb + Tdi; + TdG = Tdu + TdF; + ro[WS(os, 44)] = Tdj - TdG; + ro[WS(os, 12)] = Tdj + TdG; + TdT = TdJ + TdM; + TdU = TdQ + TdR; + io[WS(os, 44)] = TdT - TdU; + io[WS(os, 12)] = TdT + TdU; + } + { + E TdN, TdO, TdP, TdS; + TdN = TdJ - TdM; + TdO = TdF - Tdu; + io[WS(os, 60)] = TdN - TdO; + io[WS(os, 28)] = TdN + TdO; + TdP = Tdb - Tdi; + TdS = TdQ - TdR; + ro[WS(os, 60)] = TdP - TdS; + ro[WS(os, 28)] = TdP + TdS; + } + { + E TdX, Te4, Ted, Tee; + TdX = TdV + TdW; + Te4 = Te0 + Te3; + ro[WS(os, 36)] = TdX - Te4; + ro[WS(os, 4)] = TdX + Te4; + Ted = Te5 + Te6; + Tee = Tea + Teb; + io[WS(os, 36)] = Ted - Tee; + io[WS(os, 4)] = Ted + Tee; + } + { + E Te7, Te8, Te9, Tec; + Te7 = Te5 - Te6; + Te8 = Te3 - Te0; + io[WS(os, 52)] = Te7 - Te8; + io[WS(os, 20)] = Te7 + Te8; + Te9 = TdV - TdW; + Tec = Tea - Teb; + ro[WS(os, 52)] = Te9 - Tec; + ro[WS(os, 20)] = Te9 + Tec; + } + } + { + E Tcd, TcP, TcD, TcZ, Tck, Td0, TcX, Td5, Tcs, TcK, TcG, TcQ, TcU, Td4, Tcz; + E TcL, Tcc, TcC; + Tcc = KP707106781 * (TbD + TbC); + Tcd = Tcb - Tcc; + TcP = Tcb + Tcc; + TcC = KP707106781 * (Tak + Tan); + TcD = TcB - TcC; + TcZ = TcB + TcC; + { + E Tcg, Tcj, TcV, TcW; + Tcg = FNMS(KP382683432, Tcf, KP923879532 * Tce); + Tcj = FMA(KP923879532, Tch, KP382683432 * Tci); + Tck = Tcg - Tcj; + Td0 = Tcg + Tcj; + TcV = Tct + Tcu; + TcW = Tcw + Tcx; + TcX = FNMS(KP195090322, TcW, KP980785280 * TcV); + Td5 = FMA(KP195090322, TcV, KP980785280 * TcW); + } + { + E Tco, Tcr, TcE, TcF; + Tco = Tcm - Tcn; + Tcr = Tcp - Tcq; + Tcs = FMA(KP555570233, Tco, KP831469612 * Tcr); + TcK = FNMS(KP831469612, Tco, KP555570233 * Tcr); + TcE = FNMS(KP382683432, Tch, KP923879532 * Tci); + TcF = FMA(KP382683432, Tce, KP923879532 * Tcf); + TcG = TcE - TcF; + TcQ = TcF + TcE; + } + { + E TcS, TcT, Tcv, Tcy; + TcS = Tcm + Tcn; + TcT = Tcp + Tcq; + TcU = FMA(KP980785280, TcS, KP195090322 * TcT); + Td4 = FNMS(KP195090322, TcS, KP980785280 * TcT); + Tcv = Tct - Tcu; + Tcy = Tcw - Tcx; + Tcz = FNMS(KP831469612, Tcy, KP555570233 * Tcv); + TcL = FMA(KP831469612, Tcv, KP555570233 * Tcy); + } + { + E Tcl, TcA, TcN, TcO; + Tcl = Tcd + Tck; + TcA = Tcs + Tcz; + ro[WS(os, 42)] = Tcl - TcA; + ro[WS(os, 10)] = Tcl + TcA; + TcN = TcD + TcG; + TcO = TcK + TcL; + io[WS(os, 42)] = TcN - TcO; + io[WS(os, 10)] = TcN + TcO; + } + { + E TcH, TcI, TcJ, TcM; + TcH = TcD - TcG; + TcI = Tcz - Tcs; + io[WS(os, 58)] = TcH - TcI; + io[WS(os, 26)] = TcH + TcI; + TcJ = Tcd - Tck; + TcM = TcK - TcL; + ro[WS(os, 58)] = TcJ - TcM; + ro[WS(os, 26)] = TcJ + TcM; + } + { + E TcR, TcY, Td7, Td8; + TcR = TcP + TcQ; + TcY = TcU + TcX; + ro[WS(os, 34)] = TcR - TcY; + ro[WS(os, 2)] = TcR + TcY; + Td7 = TcZ + Td0; + Td8 = Td4 + Td5; + io[WS(os, 34)] = Td7 - Td8; + io[WS(os, 2)] = Td7 + Td8; + } + { + E Td1, Td2, Td3, Td6; + Td1 = TcZ - Td0; + Td2 = TcX - TcU; + io[WS(os, 50)] = Td1 - Td2; + io[WS(os, 18)] = Td1 + Td2; + Td3 = TcP - TcQ; + Td6 = Td4 - Td5; + ro[WS(os, 50)] = Td3 - Td6; + ro[WS(os, 18)] = Td3 + Td6; + } + } + { + E Tap, TbR, TbF, Tc1, TaE, Tc2, TbZ, Tc7, Tb6, TbM, TbI, TbS, TbW, Tc6, Tbx; + E TbN, Tao, TbE; + Tao = KP707106781 * (Tak - Tan); + Tap = Tah - Tao; + TbR = Tah + Tao; + TbE = KP707106781 * (TbC - TbD); + TbF = TbB - TbE; + Tc1 = TbB + TbE; + { + E Taw, TaD, TbX, TbY; + Taw = FNMS(KP923879532, Tav, KP382683432 * Tas); + TaD = FMA(KP382683432, Taz, KP923879532 * TaC); + TaE = Taw - TaD; + Tc2 = Taw + TaD; + TbX = Tbb + Tbm; + TbY = Tbs + Tbv; + TbZ = FNMS(KP555570233, TbY, KP831469612 * TbX); + Tc7 = FMA(KP831469612, TbY, KP555570233 * TbX); + } + { + E TaW, Tb5, TbG, TbH; + TaW = TaK - TaV; + Tb5 = Tb1 - Tb4; + Tb6 = FMA(KP980785280, TaW, KP195090322 * Tb5); + TbM = FNMS(KP980785280, Tb5, KP195090322 * TaW); + TbG = FNMS(KP923879532, Taz, KP382683432 * TaC); + TbH = FMA(KP923879532, Tas, KP382683432 * Tav); + TbI = TbG - TbH; + TbS = TbH + TbG; + } + { + E TbU, TbV, Tbn, Tbw; + TbU = TaK + TaV; + TbV = Tb1 + Tb4; + TbW = FMA(KP555570233, TbU, KP831469612 * TbV); + Tc6 = FNMS(KP555570233, TbV, KP831469612 * TbU); + Tbn = Tbb - Tbm; + Tbw = Tbs - Tbv; + Tbx = FNMS(KP980785280, Tbw, KP195090322 * Tbn); + TbN = FMA(KP195090322, Tbw, KP980785280 * Tbn); + } + { + E TaF, Tby, TbP, TbQ; + TaF = Tap + TaE; + Tby = Tb6 + Tbx; + ro[WS(os, 46)] = TaF - Tby; + ro[WS(os, 14)] = TaF + Tby; + TbP = TbF + TbI; + TbQ = TbM + TbN; + io[WS(os, 46)] = TbP - TbQ; + io[WS(os, 14)] = TbP + TbQ; + } + { + E TbJ, TbK, TbL, TbO; + TbJ = TbF - TbI; + TbK = Tbx - Tb6; + io[WS(os, 62)] = TbJ - TbK; + io[WS(os, 30)] = TbJ + TbK; + TbL = Tap - TaE; + TbO = TbM - TbN; + ro[WS(os, 62)] = TbL - TbO; + ro[WS(os, 30)] = TbL + TbO; + } + { + E TbT, Tc0, Tc9, Tca; + TbT = TbR + TbS; + Tc0 = TbW + TbZ; + ro[WS(os, 38)] = TbT - Tc0; + ro[WS(os, 6)] = TbT + Tc0; + Tc9 = Tc1 + Tc2; + Tca = Tc6 + Tc7; + io[WS(os, 38)] = Tc9 - Tca; + io[WS(os, 6)] = Tc9 + Tca; + } + { + E Tc3, Tc4, Tc5, Tc8; + Tc3 = Tc1 - Tc2; + Tc4 = TbZ - TbW; + io[WS(os, 54)] = Tc3 - Tc4; + io[WS(os, 22)] = Tc3 + Tc4; + Tc5 = TbR - TbS; + Tc8 = Tc6 - Tc7; + ro[WS(os, 54)] = Tc5 - Tc8; + ro[WS(os, 22)] = Tc5 + Tc8; + } + } + { + E T6F, T7h, T7m, T7w, T7p, T7x, T6M, T7s, T6U, T7c, T75, T7r, T78, T7i, T71; + E T7d; + { + E T6D, T6E, T7k, T7l; + T6D = T37 + T3e; + T6E = T65 + T64; + T6F = T6D - T6E; + T7h = T6D + T6E; + T7k = T6O + T6P; + T7l = T6R + T6S; + T7m = FMA(KP956940335, T7k, KP290284677 * T7l); + T7w = FNMS(KP290284677, T7k, KP956940335 * T7l); + } + { + E T7n, T7o, T6I, T6L; + T7n = T6V + T6W; + T7o = T6Y + T6Z; + T7p = FNMS(KP290284677, T7o, KP956940335 * T7n); + T7x = FMA(KP290284677, T7n, KP956940335 * T7o); + T6I = FNMS(KP555570233, T6H, KP831469612 * T6G); + T6L = FMA(KP831469612, T6J, KP555570233 * T6K); + T6M = T6I - T6L; + T7s = T6I + T6L; + } + { + E T6Q, T6T, T73, T74; + T6Q = T6O - T6P; + T6T = T6R - T6S; + T6U = FMA(KP471396736, T6Q, KP881921264 * T6T); + T7c = FNMS(KP881921264, T6Q, KP471396736 * T6T); + T73 = T5Z + T62; + T74 = T3m + T3t; + T75 = T73 - T74; + T7r = T73 + T74; + } + { + E T76, T77, T6X, T70; + T76 = FNMS(KP555570233, T6J, KP831469612 * T6K); + T77 = FMA(KP555570233, T6G, KP831469612 * T6H); + T78 = T76 - T77; + T7i = T77 + T76; + T6X = T6V - T6W; + T70 = T6Y - T6Z; + T71 = FNMS(KP881921264, T70, KP471396736 * T6X); + T7d = FMA(KP881921264, T6X, KP471396736 * T70); + } + { + E T6N, T72, T7f, T7g; + T6N = T6F + T6M; + T72 = T6U + T71; + ro[WS(os, 43)] = T6N - T72; + ro[WS(os, 11)] = T6N + T72; + T7f = T75 + T78; + T7g = T7c + T7d; + io[WS(os, 43)] = T7f - T7g; + io[WS(os, 11)] = T7f + T7g; + } + { + E T79, T7a, T7b, T7e; + T79 = T75 - T78; + T7a = T71 - T6U; + io[WS(os, 59)] = T79 - T7a; + io[WS(os, 27)] = T79 + T7a; + T7b = T6F - T6M; + T7e = T7c - T7d; + ro[WS(os, 59)] = T7b - T7e; + ro[WS(os, 27)] = T7b + T7e; + } + { + E T7j, T7q, T7z, T7A; + T7j = T7h + T7i; + T7q = T7m + T7p; + ro[WS(os, 35)] = T7j - T7q; + ro[WS(os, 3)] = T7j + T7q; + T7z = T7r + T7s; + T7A = T7w + T7x; + io[WS(os, 35)] = T7z - T7A; + io[WS(os, 3)] = T7z + T7A; + } + { + E T7t, T7u, T7v, T7y; + T7t = T7r - T7s; + T7u = T7p - T7m; + io[WS(os, 51)] = T7t - T7u; + io[WS(os, 19)] = T7t + T7u; + T7v = T7h - T7i; + T7y = T7w - T7x; + ro[WS(os, 51)] = T7v - T7y; + ro[WS(os, 19)] = T7v + T7y; + } + } + { + E T9j, T9V, Ta0, Taa, Ta3, Tab, T9q, Ta6, T9y, T9Q, T9J, Ta5, T9M, T9W, T9F; + E T9R; + { + E T9h, T9i, T9Y, T9Z; + T9h = T7B + T7C; + T9i = T8J + T8I; + T9j = T9h - T9i; + T9V = T9h + T9i; + T9Y = T9s + T9t; + T9Z = T9v + T9w; + Ta0 = FMA(KP995184726, T9Y, KP098017140 * T9Z); + Taa = FNMS(KP098017140, T9Y, KP995184726 * T9Z); + } + { + E Ta1, Ta2, T9m, T9p; + Ta1 = T9z + T9A; + Ta2 = T9C + T9D; + Ta3 = FNMS(KP098017140, Ta2, KP995184726 * Ta1); + Tab = FMA(KP098017140, Ta1, KP995184726 * Ta2); + T9m = FNMS(KP195090322, T9l, KP980785280 * T9k); + T9p = FMA(KP195090322, T9n, KP980785280 * T9o); + T9q = T9m - T9p; + Ta6 = T9m + T9p; + } + { + E T9u, T9x, T9H, T9I; + T9u = T9s - T9t; + T9x = T9v - T9w; + T9y = FMA(KP634393284, T9u, KP773010453 * T9x); + T9Q = FNMS(KP773010453, T9u, KP634393284 * T9x); + T9H = T8F + T8G; + T9I = T7G + T7J; + T9J = T9H - T9I; + Ta5 = T9H + T9I; + } + { + E T9K, T9L, T9B, T9E; + T9K = FNMS(KP195090322, T9o, KP980785280 * T9n); + T9L = FMA(KP980785280, T9l, KP195090322 * T9k); + T9M = T9K - T9L; + T9W = T9L + T9K; + T9B = T9z - T9A; + T9E = T9C - T9D; + T9F = FNMS(KP773010453, T9E, KP634393284 * T9B); + T9R = FMA(KP773010453, T9B, KP634393284 * T9E); + } + { + E T9r, T9G, T9T, T9U; + T9r = T9j + T9q; + T9G = T9y + T9F; + ro[WS(os, 41)] = T9r - T9G; + ro[WS(os, 9)] = T9r + T9G; + T9T = T9J + T9M; + T9U = T9Q + T9R; + io[WS(os, 41)] = T9T - T9U; + io[WS(os, 9)] = T9T + T9U; + } + { + E T9N, T9O, T9P, T9S; + T9N = T9J - T9M; + T9O = T9F - T9y; + io[WS(os, 57)] = T9N - T9O; + io[WS(os, 25)] = T9N + T9O; + T9P = T9j - T9q; + T9S = T9Q - T9R; + ro[WS(os, 57)] = T9P - T9S; + ro[WS(os, 25)] = T9P + T9S; + } + { + E T9X, Ta4, Tad, Tae; + T9X = T9V + T9W; + Ta4 = Ta0 + Ta3; + ro[WS(os, 33)] = T9X - Ta4; + ro[WS(os, 1)] = T9X + Ta4; + Tad = Ta5 + Ta6; + Tae = Taa + Tab; + io[WS(os, 33)] = Tad - Tae; + io[WS(os, 1)] = Tad + Tae; + } + { + E Ta7, Ta8, Ta9, Tac; + Ta7 = Ta5 - Ta6; + Ta8 = Ta3 - Ta0; + io[WS(os, 49)] = Ta7 - Ta8; + io[WS(os, 17)] = Ta7 + Ta8; + Ta9 = T9V - T9W; + Tac = Taa - Tab; + ro[WS(os, 49)] = Ta9 - Tac; + ro[WS(os, 17)] = Ta9 + Tac; + } + } + { + E T3v, T6j, T6o, T6y, T6r, T6z, T48, T6u, T52, T6e, T67, T6t, T6a, T6k, T5V; + E T6f; + { + E T3f, T3u, T6m, T6n; + T3f = T37 - T3e; + T3u = T3m - T3t; + T3v = T3f - T3u; + T6j = T3f + T3u; + T6m = T4q + T4N; + T6n = T4X + T50; + T6o = FMA(KP634393284, T6m, KP773010453 * T6n); + T6y = FNMS(KP634393284, T6n, KP773010453 * T6m); + } + { + E T6p, T6q, T3O, T47; + T6p = T5j + T5G; + T6q = T5Q + T5T; + T6r = FNMS(KP634393284, T6q, KP773010453 * T6p); + T6z = FMA(KP773010453, T6q, KP634393284 * T6p); + T3O = FNMS(KP980785280, T3N, KP195090322 * T3G); + T47 = FMA(KP195090322, T3Z, KP980785280 * T46); + T48 = T3O - T47; + T6u = T3O + T47; + } + { + E T4O, T51, T63, T66; + T4O = T4q - T4N; + T51 = T4X - T50; + T52 = FMA(KP995184726, T4O, KP098017140 * T51); + T6e = FNMS(KP995184726, T51, KP098017140 * T4O); + T63 = T5Z - T62; + T66 = T64 - T65; + T67 = T63 - T66; + T6t = T63 + T66; + } + { + E T68, T69, T5H, T5U; + T68 = FNMS(KP980785280, T3Z, KP195090322 * T46); + T69 = FMA(KP980785280, T3G, KP195090322 * T3N); + T6a = T68 - T69; + T6k = T69 + T68; + T5H = T5j - T5G; + T5U = T5Q - T5T; + T5V = FNMS(KP995184726, T5U, KP098017140 * T5H); + T6f = FMA(KP098017140, T5U, KP995184726 * T5H); + } + { + E T49, T5W, T6h, T6i; + T49 = T3v + T48; + T5W = T52 + T5V; + ro[WS(os, 47)] = T49 - T5W; + ro[WS(os, 15)] = T49 + T5W; + T6h = T67 + T6a; + T6i = T6e + T6f; + io[WS(os, 47)] = T6h - T6i; + io[WS(os, 15)] = T6h + T6i; + } + { + E T6b, T6c, T6d, T6g; + T6b = T67 - T6a; + T6c = T5V - T52; + io[WS(os, 63)] = T6b - T6c; + io[WS(os, 31)] = T6b + T6c; + T6d = T3v - T48; + T6g = T6e - T6f; + ro[WS(os, 63)] = T6d - T6g; + ro[WS(os, 31)] = T6d + T6g; + } + { + E T6l, T6s, T6B, T6C; + T6l = T6j + T6k; + T6s = T6o + T6r; + ro[WS(os, 39)] = T6l - T6s; + ro[WS(os, 7)] = T6l + T6s; + T6B = T6t + T6u; + T6C = T6y + T6z; + io[WS(os, 39)] = T6B - T6C; + io[WS(os, 7)] = T6B + T6C; + } + { + E T6v, T6w, T6x, T6A; + T6v = T6t - T6u; + T6w = T6r - T6o; + io[WS(os, 55)] = T6v - T6w; + io[WS(os, 23)] = T6v + T6w; + T6x = T6j - T6k; + T6A = T6y - T6z; + ro[WS(os, 55)] = T6x - T6A; + ro[WS(os, 23)] = T6x + T6A; + } + } + { + E T7L, T8X, T92, T9c, T95, T9d, T80, T98, T8k, T8S, T8L, T97, T8O, T8Y, T8D; + E T8T; + { + E T7D, T7K, T90, T91; + T7D = T7B - T7C; + T7K = T7G - T7J; + T7L = T7D - T7K; + T8X = T7D + T7K; + T90 = T84 + T8b; + T91 = T8f + T8i; + T92 = FMA(KP471396736, T90, KP881921264 * T91); + T9c = FNMS(KP471396736, T91, KP881921264 * T90); + } + { + E T93, T94, T7S, T7Z; + T93 = T8n + T8u; + T94 = T8y + T8B; + T95 = FNMS(KP471396736, T94, KP881921264 * T93); + T9d = FMA(KP881921264, T94, KP471396736 * T93); + T7S = FNMS(KP831469612, T7R, KP555570233 * T7O); + T7Z = FMA(KP831469612, T7V, KP555570233 * T7Y); + T80 = T7S - T7Z; + T98 = T7S + T7Z; + } + { + E T8c, T8j, T8H, T8K; + T8c = T84 - T8b; + T8j = T8f - T8i; + T8k = FMA(KP956940335, T8c, KP290284677 * T8j); + T8S = FNMS(KP956940335, T8j, KP290284677 * T8c); + T8H = T8F - T8G; + T8K = T8I - T8J; + T8L = T8H - T8K; + T97 = T8H + T8K; + } + { + E T8M, T8N, T8v, T8C; + T8M = FNMS(KP831469612, T7Y, KP555570233 * T7V); + T8N = FMA(KP555570233, T7R, KP831469612 * T7O); + T8O = T8M - T8N; + T8Y = T8N + T8M; + T8v = T8n - T8u; + T8C = T8y - T8B; + T8D = FNMS(KP956940335, T8C, KP290284677 * T8v); + T8T = FMA(KP290284677, T8C, KP956940335 * T8v); + } + { + E T81, T8E, T8V, T8W; + T81 = T7L + T80; + T8E = T8k + T8D; + ro[WS(os, 45)] = T81 - T8E; + ro[WS(os, 13)] = T81 + T8E; + T8V = T8L + T8O; + T8W = T8S + T8T; + io[WS(os, 45)] = T8V - T8W; + io[WS(os, 13)] = T8V + T8W; + } + { + E T8P, T8Q, T8R, T8U; + T8P = T8L - T8O; + T8Q = T8D - T8k; + io[WS(os, 61)] = T8P - T8Q; + io[WS(os, 29)] = T8P + T8Q; + T8R = T7L - T80; + T8U = T8S - T8T; + ro[WS(os, 61)] = T8R - T8U; + ro[WS(os, 29)] = T8R + T8U; + } + { + E T8Z, T96, T9f, T9g; + T8Z = T8X + T8Y; + T96 = T92 + T95; + ro[WS(os, 37)] = T8Z - T96; + ro[WS(os, 5)] = T8Z + T96; + T9f = T97 + T98; + T9g = T9c + T9d; + io[WS(os, 37)] = T9f - T9g; + io[WS(os, 5)] = T9f + T9g; + } + { + E T99, T9a, T9b, T9e; + T99 = T97 - T98; + T9a = T95 - T92; + io[WS(os, 53)] = T99 - T9a; + io[WS(os, 21)] = T99 + T9a; + T9b = T8X - T8Y; + T9e = T9c - T9d; + ro[WS(os, 53)] = T9b - T9e; + ro[WS(os, 21)] = T9b + T9e; + } + } + } + } +} + +static const kdft_desc desc = { 64, "n1_64", {808, 144, 104, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_64) (planner *p) { + X(kdft_register) (p, n1_64, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 7 -name n1_7 -include n.h */ + +/* + * This function contains 60 FP additions, 42 FP multiplications, + * (or, 18 additions, 0 multiplications, 42 fused multiply/add), + * 51 stack variables, 6 constants, and 28 memory accesses + */ +#include "n.h" + +static void n1_7(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP974927912, +0.974927912181823607018131682993931217232785801); + DK(KP900968867, +0.900968867902419126236102319507445051165919162); + DK(KP801937735, +0.801937735804838252472204639014890102331838324); + DK(KP692021471, +0.692021471630095869627814897002069140197260599); + DK(KP356895867, +0.356895867892209443894399510021300583399127187); + DK(KP554958132, +0.554958132087371191422194871006410481067288862); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) { + E Tz, TP, Ty, TK, TN, TE, Tw, TF; + { + E T1, TI, T4, TG, Ta, TT, Tp, TH, T7, Tk, TJ, TO, Tu, Tb, TB; + E Tg, Tl, Th, Ti; + T1 = ri[0]; + Tz = ii[0]; + { + E T5, T6, Te, Tf; + { + E T2, T3, T8, T9; + T2 = ri[WS(is, 1)]; + T3 = ri[WS(is, 6)]; + T8 = ri[WS(is, 3)]; + T9 = ri[WS(is, 4)]; + T5 = ri[WS(is, 2)]; + TI = T3 - T2; + T4 = T2 + T3; + TG = T9 - T8; + Ta = T8 + T9; + T6 = ri[WS(is, 5)]; + } + Te = ii[WS(is, 2)]; + TT = FMA(KP554958132, TG, TI); + Tp = FNMS(KP356895867, T4, Ta); + TH = T6 - T5; + T7 = T5 + T6; + Tf = ii[WS(is, 5)]; + Tk = ii[WS(is, 3)]; + TJ = FNMS(KP554958132, TI, TH); + TO = FMA(KP554958132, TH, TG); + Tu = FNMS(KP356895867, Ta, T7); + Tb = FNMS(KP356895867, T7, T4); + TB = Te + Tf; + Tg = Te - Tf; + Tl = ii[WS(is, 4)]; + Th = ii[WS(is, 1)]; + Ti = ii[WS(is, 6)]; + } + { + E Tm, TA, Tj, TD, Ts, TL, Tx, TU, To, TR, Td, TM, Tv; + { + E TC, TQ, Tn, Tc; + ro[0] = T1 + T4 + T7 + Ta; + TC = Tk + Tl; + Tm = Tk - Tl; + TA = Th + Ti; + Tj = Th - Ti; + TD = FNMS(KP356895867, TC, TB); + Ts = FMA(KP554958132, Tg, Tm); + TL = FNMS(KP356895867, TA, TC); + TQ = FNMS(KP356895867, TB, TA); + Tx = FNMS(KP554958132, Tj, Tg); + Tn = FMA(KP554958132, Tm, Tj); + io[0] = Tz + TA + TB + TC; + Tc = FNMS(KP692021471, Tb, Ta); + TU = FMA(KP801937735, TT, TH); + To = FMA(KP801937735, Tn, Tg); + TR = FNMS(KP692021471, TQ, TC); + Td = FNMS(KP900968867, Tc, T1); + } + { + E Tt, Tr, TS, Tq; + Tt = FNMS(KP801937735, Ts, Tj); + Tq = FNMS(KP692021471, Tp, T7); + TS = FNMS(KP900968867, TR, Tz); + ro[WS(os, 1)] = FMA(KP974927912, To, Td); + ro[WS(os, 6)] = FNMS(KP974927912, To, Td); + Tr = FNMS(KP900968867, Tq, T1); + io[WS(os, 6)] = FNMS(KP974927912, TU, TS); + io[WS(os, 1)] = FMA(KP974927912, TU, TS); + TP = FNMS(KP801937735, TO, TI); + ro[WS(os, 2)] = FMA(KP974927912, Tt, Tr); + ro[WS(os, 5)] = FNMS(KP974927912, Tt, Tr); + TM = FNMS(KP692021471, TL, TB); + } + Ty = FNMS(KP801937735, Tx, Tm); + Tv = FNMS(KP692021471, Tu, T4); + TK = FNMS(KP801937735, TJ, TG); + TN = FNMS(KP900968867, TM, Tz); + TE = FNMS(KP692021471, TD, TA); + Tw = FNMS(KP900968867, Tv, T1); + } + } + io[WS(os, 5)] = FNMS(KP974927912, TP, TN); + io[WS(os, 2)] = FMA(KP974927912, TP, TN); + TF = FNMS(KP900968867, TE, Tz); + ro[WS(os, 3)] = FMA(KP974927912, Ty, Tw); + ro[WS(os, 4)] = FNMS(KP974927912, Ty, Tw); + io[WS(os, 4)] = FNMS(KP974927912, TK, TF); + io[WS(os, 3)] = FMA(KP974927912, TK, TF); + } + } +} + +static const kdft_desc desc = { 7, "n1_7", {18, 0, 42, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_7) (planner *p) { + X(kdft_register) (p, n1_7, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 7 -name n1_7 -include n.h */ + +/* + * This function contains 60 FP additions, 36 FP multiplications, + * (or, 36 additions, 12 multiplications, 24 fused multiply/add), + * 25 stack variables, 6 constants, and 28 memory accesses + */ +#include "n.h" + +static void n1_7(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP222520933, +0.222520933956314404288902564496794759466355569); + DK(KP900968867, +0.900968867902419126236102319507445051165919162); + DK(KP623489801, +0.623489801858733530525004884004239810632274731); + DK(KP433883739, +0.433883739117558120475768332848358754609990728); + DK(KP781831482, +0.781831482468029808708444526674057750232334519); + DK(KP974927912, +0.974927912181823607018131682993931217232785801); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) { + E T1, Tu, T4, Tq, Te, Tx, T7, Ts, Tk, Tv, Ta, Tr, Th, Tw; + T1 = ri[0]; + Tu = ii[0]; + { + E T2, T3, Tc, Td; + T2 = ri[WS(is, 1)]; + T3 = ri[WS(is, 6)]; + T4 = T2 + T3; + Tq = T3 - T2; + Tc = ii[WS(is, 1)]; + Td = ii[WS(is, 6)]; + Te = Tc - Td; + Tx = Tc + Td; + } + { + E T5, T6, Ti, Tj; + T5 = ri[WS(is, 2)]; + T6 = ri[WS(is, 5)]; + T7 = T5 + T6; + Ts = T6 - T5; + Ti = ii[WS(is, 2)]; + Tj = ii[WS(is, 5)]; + Tk = Ti - Tj; + Tv = Ti + Tj; + } + { + E T8, T9, Tf, Tg; + T8 = ri[WS(is, 3)]; + T9 = ri[WS(is, 4)]; + Ta = T8 + T9; + Tr = T9 - T8; + Tf = ii[WS(is, 3)]; + Tg = ii[WS(is, 4)]; + Th = Tf - Tg; + Tw = Tf + Tg; + } + ro[0] = T1 + T4 + T7 + Ta; + io[0] = Tu + Tx + Tv + Tw; + { + E Tl, Tb, TB, TC; + Tl = FNMS(KP781831482, Th, KP974927912 * Te) - (KP433883739 * Tk); + Tb = FMA(KP623489801, Ta, T1) + FNMA(KP900968867, T7, KP222520933 * T4); + ro[WS(os, 5)] = Tb - Tl; + ro[WS(os, 2)] = Tb + Tl; + TB = FNMS(KP781831482, Tr, KP974927912 * Tq) - (KP433883739 * Ts); + TC = FMA(KP623489801, Tw, Tu) + FNMA(KP900968867, Tv, KP222520933 * Tx); + io[WS(os, 2)] = TB + TC; + io[WS(os, 5)] = TC - TB; + } + { + E Tn, Tm, Tz, TA; + Tn = FMA(KP781831482, Te, KP974927912 * Tk) + (KP433883739 * Th); + Tm = FMA(KP623489801, T4, T1) + FNMA(KP900968867, Ta, KP222520933 * T7); + ro[WS(os, 6)] = Tm - Tn; + ro[WS(os, 1)] = Tm + Tn; + Tz = FMA(KP781831482, Tq, KP974927912 * Ts) + (KP433883739 * Tr); + TA = FMA(KP623489801, Tx, Tu) + FNMA(KP900968867, Tw, KP222520933 * Tv); + io[WS(os, 1)] = Tz + TA; + io[WS(os, 6)] = TA - Tz; + } + { + E Tp, To, Tt, Ty; + Tp = FMA(KP433883739, Te, KP974927912 * Th) - (KP781831482 * Tk); + To = FMA(KP623489801, T7, T1) + FNMA(KP222520933, Ta, KP900968867 * T4); + ro[WS(os, 4)] = To - Tp; + ro[WS(os, 3)] = To + Tp; + Tt = FMA(KP433883739, Tq, KP974927912 * Tr) - (KP781831482 * Ts); + Ty = FMA(KP623489801, Tv, Tu) + FNMA(KP222520933, Tw, KP900968867 * Tx); + io[WS(os, 3)] = Tt + Ty; + io[WS(os, 4)] = Ty - Tt; + } + } + } +} + +static const kdft_desc desc = { 7, "n1_7", {36, 12, 24, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_7) (planner *p) { + X(kdft_register) (p, n1_7, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 8 -name n1_8 -include n.h */ + +/* + * This function contains 52 FP additions, 8 FP multiplications, + * (or, 44 additions, 0 multiplications, 8 fused multiply/add), + * 36 stack variables, 1 constants, and 32 memory accesses + */ +#include "n.h" + +static void n1_8(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(32, is), MAKE_VOLATILE_STRIDE(32, os)) { + E TF, TE, TD, TI; + { + E Tn, T3, TC, Ti, TB, T6, To, Tl, Td, TN, Tz, TH, Ta, Tq, Tt; + E TM; + { + E T4, T5, Tj, Tk; + { + E T1, T2, Tg, Th; + T1 = ri[0]; + T2 = ri[WS(is, 4)]; + Tg = ii[0]; + Th = ii[WS(is, 4)]; + T4 = ri[WS(is, 2)]; + Tn = T1 - T2; + T3 = T1 + T2; + TC = Tg - Th; + Ti = Tg + Th; + T5 = ri[WS(is, 6)]; + } + Tj = ii[WS(is, 2)]; + Tk = ii[WS(is, 6)]; + { + E Tb, Tc, Tw, Tx; + Tb = ri[WS(is, 7)]; + TB = T4 - T5; + T6 = T4 + T5; + To = Tj - Tk; + Tl = Tj + Tk; + Tc = ri[WS(is, 3)]; + Tw = ii[WS(is, 7)]; + Tx = ii[WS(is, 3)]; + { + E T8, Tv, Ty, T9, Tr, Ts; + T8 = ri[WS(is, 1)]; + Td = Tb + Tc; + Tv = Tb - Tc; + TN = Tw + Tx; + Ty = Tw - Tx; + T9 = ri[WS(is, 5)]; + Tr = ii[WS(is, 1)]; + Ts = ii[WS(is, 5)]; + Tz = Tv - Ty; + TH = Tv + Ty; + Ta = T8 + T9; + Tq = T8 - T9; + Tt = Tr - Ts; + TM = Tr + Ts; + } + } + } + { + E TL, TG, Tu, Tf, Tm, TO; + { + E T7, Te, TP, TQ; + TL = T3 - T6; + T7 = T3 + T6; + TG = Tt - Tq; + Tu = Tq + Tt; + Te = Ta + Td; + Tf = Td - Ta; + Tm = Ti - Tl; + TP = Ti + Tl; + TQ = TM + TN; + TO = TM - TN; + ro[0] = T7 + Te; + ro[WS(os, 4)] = T7 - Te; + io[0] = TP + TQ; + io[WS(os, 4)] = TP - TQ; + } + { + E Tp, TA, TJ, TK; + TF = Tn - To; + Tp = Tn + To; + io[WS(os, 6)] = Tm - Tf; + io[WS(os, 2)] = Tf + Tm; + ro[WS(os, 2)] = TL + TO; + ro[WS(os, 6)] = TL - TO; + TA = Tu + Tz; + TE = Tz - Tu; + TD = TB + TC; + TJ = TC - TB; + TK = TG + TH; + TI = TG - TH; + ro[WS(os, 1)] = FMA(KP707106781, TA, Tp); + ro[WS(os, 5)] = FNMS(KP707106781, TA, Tp); + io[WS(os, 1)] = FMA(KP707106781, TK, TJ); + io[WS(os, 5)] = FNMS(KP707106781, TK, TJ); + } + } + } + io[WS(os, 3)] = FMA(KP707106781, TE, TD); + io[WS(os, 7)] = FNMS(KP707106781, TE, TD); + ro[WS(os, 3)] = FMA(KP707106781, TI, TF); + ro[WS(os, 7)] = FNMS(KP707106781, TI, TF); + } + } +} + +static const kdft_desc desc = { 8, "n1_8", {44, 0, 8, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_8) (planner *p) { + X(kdft_register) (p, n1_8, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 8 -name n1_8 -include n.h */ + +/* + * This function contains 52 FP additions, 4 FP multiplications, + * (or, 52 additions, 4 multiplications, 0 fused multiply/add), + * 28 stack variables, 1 constants, and 32 memory accesses + */ +#include "n.h" + +static void n1_8(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(32, is), MAKE_VOLATILE_STRIDE(32, os)) { + E T3, Tn, Ti, TC, T6, TB, Tl, To, Td, TN, Tz, TH, Ta, TM, Tu; + E TG; + { + E T1, T2, Tj, Tk; + T1 = ri[0]; + T2 = ri[WS(is, 4)]; + T3 = T1 + T2; + Tn = T1 - T2; + { + E Tg, Th, T4, T5; + Tg = ii[0]; + Th = ii[WS(is, 4)]; + Ti = Tg + Th; + TC = Tg - Th; + T4 = ri[WS(is, 2)]; + T5 = ri[WS(is, 6)]; + T6 = T4 + T5; + TB = T4 - T5; + } + Tj = ii[WS(is, 2)]; + Tk = ii[WS(is, 6)]; + Tl = Tj + Tk; + To = Tj - Tk; + { + E Tb, Tc, Tv, Tw, Tx, Ty; + Tb = ri[WS(is, 7)]; + Tc = ri[WS(is, 3)]; + Tv = Tb - Tc; + Tw = ii[WS(is, 7)]; + Tx = ii[WS(is, 3)]; + Ty = Tw - Tx; + Td = Tb + Tc; + TN = Tw + Tx; + Tz = Tv - Ty; + TH = Tv + Ty; + } + { + E T8, T9, Tq, Tr, Ts, Tt; + T8 = ri[WS(is, 1)]; + T9 = ri[WS(is, 5)]; + Tq = T8 - T9; + Tr = ii[WS(is, 1)]; + Ts = ii[WS(is, 5)]; + Tt = Tr - Ts; + Ta = T8 + T9; + TM = Tr + Ts; + Tu = Tq + Tt; + TG = Tt - Tq; + } + } + { + E T7, Te, TP, TQ; + T7 = T3 + T6; + Te = Ta + Td; + ro[WS(os, 4)] = T7 - Te; + ro[0] = T7 + Te; + TP = Ti + Tl; + TQ = TM + TN; + io[WS(os, 4)] = TP - TQ; + io[0] = TP + TQ; + } + { + E Tf, Tm, TL, TO; + Tf = Td - Ta; + Tm = Ti - Tl; + io[WS(os, 2)] = Tf + Tm; + io[WS(os, 6)] = Tm - Tf; + TL = T3 - T6; + TO = TM - TN; + ro[WS(os, 6)] = TL - TO; + ro[WS(os, 2)] = TL + TO; + } + { + E Tp, TA, TJ, TK; + Tp = Tn + To; + TA = KP707106781 * (Tu + Tz); + ro[WS(os, 5)] = Tp - TA; + ro[WS(os, 1)] = Tp + TA; + TJ = TC - TB; + TK = KP707106781 * (TG + TH); + io[WS(os, 5)] = TJ - TK; + io[WS(os, 1)] = TJ + TK; + } + { + E TD, TE, TF, TI; + TD = TB + TC; + TE = KP707106781 * (Tz - Tu); + io[WS(os, 7)] = TD - TE; + io[WS(os, 3)] = TD + TE; + TF = Tn - To; + TI = KP707106781 * (TG - TH); + ro[WS(os, 7)] = TF - TI; + ro[WS(os, 3)] = TF + TI; + } + } + } +} + +static const kdft_desc desc = { 8, "n1_8", {52, 4, 0, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_8) (planner *p) { + X(kdft_register) (p, n1_8, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/n1_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 9 -name n1_9 -include n.h */ + +/* + * This function contains 80 FP additions, 56 FP multiplications, + * (or, 24 additions, 0 multiplications, 56 fused multiply/add), + * 59 stack variables, 10 constants, and 36 memory accesses + */ +#include "n.h" + +static void n1_9(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP954188894, +0.954188894138671133499268364187245676532219158); + DK(KP363970234, +0.363970234266202361351047882776834043890471784); + DK(KP852868531, +0.852868531952443209628250963940074071936020296); + DK(KP984807753, +0.984807753012208059366743024589523013670643252); + DK(KP492403876, +0.492403876506104029683371512294761506835321626); + DK(KP777861913, +0.777861913430206160028177977318626690410586096); + DK(KP839099631, +0.839099631177280011763127298123181364687434283); + DK(KP176326980, +0.176326980708464973471090386868618986121633062); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(36, is), MAKE_VOLATILE_STRIDE(36, os)) { + E T17, TV, T14, TY, T11, T15; + { + E Tm, TM, TL, T5, Tl, T1f, Tb, Tt, Ta, T1c, TI, TX, TF, TW, Tc; + E Td, Tp, Tq; + { + E T1, Th, Ti, Tj, T4, T2, T3; + T1 = ri[0]; + T2 = ri[WS(is, 3)]; + T3 = ri[WS(is, 6)]; + Th = ii[0]; + Ti = ii[WS(is, 3)]; + Tj = ii[WS(is, 6)]; + T4 = T2 + T3; + Tm = T3 - T2; + { + E T6, Tz, T7, T8, TA, TB, Tk; + T6 = ri[WS(is, 1)]; + TM = Ti - Tj; + Tk = Ti + Tj; + TL = FNMS(KP500000000, T4, T1); + T5 = T1 + T4; + Tz = ii[WS(is, 1)]; + Tl = FNMS(KP500000000, Tk, Th); + T1f = Th + Tk; + T7 = ri[WS(is, 4)]; + T8 = ri[WS(is, 7)]; + TA = ii[WS(is, 4)]; + TB = ii[WS(is, 7)]; + { + E TE, T9, TH, TC, TG, TD; + Tb = ri[WS(is, 2)]; + TE = T7 - T8; + T9 = T7 + T8; + TH = TB - TA; + TC = TA + TB; + Tt = ii[WS(is, 2)]; + Ta = T6 + T9; + TG = FNMS(KP500000000, T9, T6); + T1c = Tz + TC; + TD = FNMS(KP500000000, TC, Tz); + TI = FNMS(KP866025403, TH, TG); + TX = FMA(KP866025403, TH, TG); + TF = FNMS(KP866025403, TE, TD); + TW = FMA(KP866025403, TE, TD); + Tc = ri[WS(is, 5)]; + Td = ri[WS(is, 8)]; + Tp = ii[WS(is, 5)]; + Tq = ii[WS(is, 8)]; + } + } + } + { + E Tn, TN, TZ, T10, TO, Ty, TJ, TP; + { + E Tw, Te, Tu, Tr; + T17 = FNMS(KP866025403, Tm, Tl); + Tn = FMA(KP866025403, Tm, Tl); + Tw = Td - Tc; + Te = Tc + Td; + Tu = Tp + Tq; + Tr = Tp - Tq; + TN = FMA(KP866025403, TM, TL); + TV = FNMS(KP866025403, TM, TL); + { + E Tf, To, T1d, Tv; + Tf = Tb + Te; + To = FNMS(KP500000000, Te, Tb); + T1d = Tt + Tu; + Tv = FNMS(KP500000000, Tu, Tt); + { + E Ts, Tg, T1i, Tx; + Ts = FMA(KP866025403, Tr, To); + TZ = FNMS(KP866025403, Tr, To); + Tg = Ta + Tf; + T1i = Tf - Ta; + Tx = FMA(KP866025403, Tw, Tv); + T10 = FNMS(KP866025403, Tw, Tv); + { + E T1e, T1g, T1b, T1h; + T1e = T1c - T1d; + T1g = T1c + T1d; + ro[0] = T5 + Tg; + T1b = FNMS(KP500000000, Tg, T5); + io[0] = T1f + T1g; + T1h = FNMS(KP500000000, T1g, T1f); + TO = FMA(KP176326980, Ts, Tx); + Ty = FNMS(KP176326980, Tx, Ts); + ro[WS(os, 6)] = FNMS(KP866025403, T1e, T1b); + ro[WS(os, 3)] = FMA(KP866025403, T1e, T1b); + io[WS(os, 6)] = FNMS(KP866025403, T1i, T1h); + io[WS(os, 3)] = FMA(KP866025403, T1i, T1h); + TJ = FNMS(KP839099631, TI, TF); + TP = FMA(KP839099631, TF, TI); + } + } + } + } + { + E TS, TK, TU, TQ, TT, TR; + TS = FMA(KP777861913, TJ, Ty); + TK = FNMS(KP777861913, TJ, Ty); + TU = FNMS(KP777861913, TP, TO); + TQ = FMA(KP777861913, TP, TO); + TT = FMA(KP492403876, TK, Tn); + io[WS(os, 1)] = FNMS(KP984807753, TK, Tn); + TR = FNMS(KP492403876, TQ, TN); + ro[WS(os, 1)] = FMA(KP984807753, TQ, TN); + io[WS(os, 4)] = FMA(KP852868531, TU, TT); + io[WS(os, 7)] = FNMS(KP852868531, TU, TT); + ro[WS(os, 7)] = FNMS(KP852868531, TS, TR); + ro[WS(os, 4)] = FMA(KP852868531, TS, TR); + T14 = FNMS(KP176326980, TW, TX); + TY = FMA(KP176326980, TX, TW); + T11 = FNMS(KP363970234, T10, TZ); + T15 = FMA(KP363970234, TZ, T10); + } + } + } + { + E T12, T1a, T16, T18, T13, T19; + T12 = FNMS(KP954188894, T11, TY); + T1a = FMA(KP954188894, T11, TY); + T16 = FNMS(KP954188894, T15, T14); + T18 = FMA(KP954188894, T15, T14); + T13 = FNMS(KP492403876, T12, TV); + ro[WS(os, 2)] = FMA(KP984807753, T12, TV); + T19 = FMA(KP492403876, T18, T17); + io[WS(os, 2)] = FNMS(KP984807753, T18, T17); + ro[WS(os, 8)] = FMA(KP852868531, T16, T13); + ro[WS(os, 5)] = FNMS(KP852868531, T16, T13); + io[WS(os, 8)] = FMA(KP852868531, T1a, T19); + io[WS(os, 5)] = FNMS(KP852868531, T1a, T19); + } + } + } +} + +static const kdft_desc desc = { 9, "n1_9", {24, 0, 56, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_9) (planner *p) { + X(kdft_register) (p, n1_9, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 9 -name n1_9 -include n.h */ + +/* + * This function contains 80 FP additions, 40 FP multiplications, + * (or, 60 additions, 20 multiplications, 20 fused multiply/add), + * 39 stack variables, 8 constants, and 36 memory accesses + */ +#include "n.h" + +static void n1_9(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP939692620, +0.939692620785908384054109277324731469936208134); + DK(KP342020143, +0.342020143325668733044099614682259580763083368); + DK(KP984807753, +0.984807753012208059366743024589523013670643252); + DK(KP173648177, +0.173648177666930348851716626769314796000375677); + DK(KP642787609, +0.642787609686539326322643409907263432907559884); + DK(KP766044443, +0.766044443118978035202392650555416673935832457); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(36, is), MAKE_VOLATILE_STRIDE(36, os)) { + E T5, TO, Th, Tk, T1g, TR, Ta, T1c, Tq, TW, Tv, TX, Tf, T1d, TB; + E T10, TG, TZ; + { + E T1, T2, T3, T4; + T1 = ri[0]; + T2 = ri[WS(is, 3)]; + T3 = ri[WS(is, 6)]; + T4 = T2 + T3; + T5 = T1 + T4; + TO = KP866025403 * (T3 - T2); + Th = FNMS(KP500000000, T4, T1); + } + { + E TP, Ti, Tj, TQ; + TP = ii[0]; + Ti = ii[WS(is, 3)]; + Tj = ii[WS(is, 6)]; + TQ = Ti + Tj; + Tk = KP866025403 * (Ti - Tj); + T1g = TP + TQ; + TR = FNMS(KP500000000, TQ, TP); + } + { + E T6, Ts, T9, Tr, Tp, Tt, Tm, Tu; + T6 = ri[WS(is, 1)]; + Ts = ii[WS(is, 1)]; + { + E T7, T8, Tn, To; + T7 = ri[WS(is, 4)]; + T8 = ri[WS(is, 7)]; + T9 = T7 + T8; + Tr = KP866025403 * (T8 - T7); + Tn = ii[WS(is, 4)]; + To = ii[WS(is, 7)]; + Tp = KP866025403 * (Tn - To); + Tt = Tn + To; + } + Ta = T6 + T9; + T1c = Ts + Tt; + Tm = FNMS(KP500000000, T9, T6); + Tq = Tm + Tp; + TW = Tm - Tp; + Tu = FNMS(KP500000000, Tt, Ts); + Tv = Tr + Tu; + TX = Tu - Tr; + } + { + E Tb, TD, Te, TC, TA, TE, Tx, TF; + Tb = ri[WS(is, 2)]; + TD = ii[WS(is, 2)]; + { + E Tc, Td, Ty, Tz; + Tc = ri[WS(is, 5)]; + Td = ri[WS(is, 8)]; + Te = Tc + Td; + TC = KP866025403 * (Td - Tc); + Ty = ii[WS(is, 5)]; + Tz = ii[WS(is, 8)]; + TA = KP866025403 * (Ty - Tz); + TE = Ty + Tz; + } + Tf = Tb + Te; + T1d = TD + TE; + Tx = FNMS(KP500000000, Te, Tb); + TB = Tx + TA; + T10 = Tx - TA; + TF = FNMS(KP500000000, TE, TD); + TG = TC + TF; + TZ = TF - TC; + } + { + E T1e, Tg, T1b, T1f, T1h, T1i; + T1e = KP866025403 * (T1c - T1d); + Tg = Ta + Tf; + T1b = FNMS(KP500000000, Tg, T5); + ro[0] = T5 + Tg; + ro[WS(os, 3)] = T1b + T1e; + ro[WS(os, 6)] = T1b - T1e; + T1f = KP866025403 * (Tf - Ta); + T1h = T1c + T1d; + T1i = FNMS(KP500000000, T1h, T1g); + io[WS(os, 3)] = T1f + T1i; + io[0] = T1g + T1h; + io[WS(os, 6)] = T1i - T1f; + } + { + E Tl, TS, TI, TN, TM, TT, TJ, TU; + Tl = Th + Tk; + TS = TO + TR; + { + E Tw, TH, TK, TL; + Tw = FMA(KP766044443, Tq, KP642787609 * Tv); + TH = FMA(KP173648177, TB, KP984807753 * TG); + TI = Tw + TH; + TN = KP866025403 * (TH - Tw); + TK = FNMS(KP642787609, Tq, KP766044443 * Tv); + TL = FNMS(KP984807753, TB, KP173648177 * TG); + TM = KP866025403 * (TK - TL); + TT = TK + TL; + } + ro[WS(os, 1)] = Tl + TI; + io[WS(os, 1)] = TS + TT; + TJ = FNMS(KP500000000, TI, Tl); + ro[WS(os, 7)] = TJ - TM; + ro[WS(os, 4)] = TJ + TM; + TU = FNMS(KP500000000, TT, TS); + io[WS(os, 4)] = TN + TU; + io[WS(os, 7)] = TU - TN; + } + { + E TV, T14, T12, T13, T17, T1a, T18, T19; + TV = Th - Tk; + T14 = TR - TO; + { + E TY, T11, T15, T16; + TY = FMA(KP173648177, TW, KP984807753 * TX); + T11 = FNMS(KP939692620, T10, KP342020143 * TZ); + T12 = TY + T11; + T13 = KP866025403 * (T11 - TY); + T15 = FNMS(KP984807753, TW, KP173648177 * TX); + T16 = FMA(KP342020143, T10, KP939692620 * TZ); + T17 = T15 - T16; + T1a = KP866025403 * (T15 + T16); + } + ro[WS(os, 2)] = TV + T12; + io[WS(os, 2)] = T14 + T17; + T18 = FNMS(KP500000000, T17, T14); + io[WS(os, 5)] = T13 + T18; + io[WS(os, 8)] = T18 - T13; + T19 = FNMS(KP500000000, T12, TV); + ro[WS(os, 8)] = T19 - T1a; + ro[WS(os, 5)] = T19 + T1a; + } + } + } +} + +static const kdft_desc desc = { 9, "n1_9", {60, 20, 20, 0}, &GENUS, 0, 0, 0, 0 }; + +void X(codelet_n1_9) (planner *p) { + X(kdft_register) (p, n1_9, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/q1_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/q1_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:58 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twidsq.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 2 -name q1_2 -include q.h */ + +/* + * This function contains 12 FP additions, 8 FP multiplications, + * (or, 8 additions, 4 multiplications, 4 fused multiply/add), + * 21 stack variables, 0 constants, and 16 memory accesses + */ +#include "q.h" + +static void q1_2(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + (mb * 2); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 2, MAKE_VOLATILE_STRIDE(4, rs), MAKE_VOLATILE_STRIDE(0, vs)) { + E T9, T6, T5; + { + E T1, T2, T7, T8, Tb, T4, Tc, Th, Ti, Te, Tj, Td, Tg; + T1 = rio[0]; + T2 = rio[WS(rs, 1)]; + T7 = iio[0]; + T8 = iio[WS(rs, 1)]; + Tb = rio[WS(vs, 1)]; + T4 = T1 - T2; + Tc = rio[WS(vs, 1) + WS(rs, 1)]; + T9 = T7 - T8; + Th = iio[WS(vs, 1)]; + Ti = iio[WS(vs, 1) + WS(rs, 1)]; + Te = Tb - Tc; + rio[0] = T1 + T2; + iio[0] = T7 + T8; + Tj = Th - Ti; + rio[WS(rs, 1)] = Tb + Tc; + iio[WS(rs, 1)] = Th + Ti; + Td = W[0]; + Tg = W[1]; + { + E T3, Tk, Tf, Ta; + T3 = W[0]; + T6 = W[1]; + Tk = Td * Tj; + Tf = Td * Te; + Ta = T3 * T9; + T5 = T3 * T4; + iio[WS(vs, 1) + WS(rs, 1)] = FNMS(Tg, Te, Tk); + rio[WS(vs, 1) + WS(rs, 1)] = FMA(Tg, Tj, Tf); + iio[WS(vs, 1)] = FNMS(T6, T4, Ta); + } + } + rio[WS(vs, 1)] = FMA(T6, T9, T5); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 2}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 2, "q1_2", twinstr, &GENUS, {8, 4, 4, 0}, 0, 0, 0 }; + +void X(codelet_q1_2) (planner *p) { + X(kdft_difsq_register) (p, q1_2, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twidsq.native -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 2 -name q1_2 -include q.h */ + +/* + * This function contains 12 FP additions, 8 FP multiplications, + * (or, 8 additions, 4 multiplications, 4 fused multiply/add), + * 17 stack variables, 0 constants, and 16 memory accesses + */ +#include "q.h" + +static void q1_2(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + (mb * 2); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 2, MAKE_VOLATILE_STRIDE(4, rs), MAKE_VOLATILE_STRIDE(0, vs)) { + E T1, T2, T4, T6, T7, T8, T9, Ta, Tc, Te, Tf, Tg; + T1 = rio[0]; + T2 = rio[WS(rs, 1)]; + T4 = T1 - T2; + T6 = iio[0]; + T7 = iio[WS(rs, 1)]; + T8 = T6 - T7; + T9 = rio[WS(vs, 1)]; + Ta = rio[WS(vs, 1) + WS(rs, 1)]; + Tc = T9 - Ta; + Te = iio[WS(vs, 1)]; + Tf = iio[WS(vs, 1) + WS(rs, 1)]; + Tg = Te - Tf; + rio[0] = T1 + T2; + iio[0] = T6 + T7; + rio[WS(rs, 1)] = T9 + Ta; + iio[WS(rs, 1)] = Te + Tf; + { + E Tb, Td, T3, T5; + Tb = W[0]; + Td = W[1]; + rio[WS(vs, 1) + WS(rs, 1)] = FMA(Tb, Tc, Td * Tg); + iio[WS(vs, 1) + WS(rs, 1)] = FNMS(Td, Tc, Tb * Tg); + T3 = W[0]; + T5 = W[1]; + rio[WS(vs, 1)] = FMA(T3, T4, T5 * T8); + iio[WS(vs, 1)] = FNMS(T5, T4, T3 * T8); + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 2}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 2, "q1_2", twinstr, &GENUS, {8, 4, 4, 0}, 0, 0, 0 }; + +void X(codelet_q1_2) (planner *p) { + X(kdft_difsq_register) (p, q1_2, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/q1_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/q1_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,316 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:00 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twidsq.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 3 -name q1_3 -include q.h */ + +/* + * This function contains 48 FP additions, 42 FP multiplications, + * (or, 18 additions, 12 multiplications, 30 fused multiply/add), + * 56 stack variables, 2 constants, and 36 memory accesses + */ +#include "q.h" + +static void q1_3(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + (mb * 4); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 4, MAKE_VOLATILE_STRIDE(6, rs), MAKE_VOLATILE_STRIDE(0, vs)) { + E Tk, Tn, Tm, To, Tl; + { + E T1, Td, T4, Tg, Tp, T9, Te, T6, Tf, TB, TE, Ts, TZ, Tu, Tx; + E TC, TN, TO, TD, TV, T10, TP, Tq, Tr; + { + E T2, T3, T7, T8; + T1 = rio[0]; + T2 = rio[WS(rs, 1)]; + T3 = rio[WS(rs, 2)]; + Td = iio[0]; + T7 = iio[WS(rs, 1)]; + T8 = iio[WS(rs, 2)]; + T4 = T2 + T3; + Tg = T3 - T2; + Tp = rio[WS(vs, 1)]; + T9 = T7 - T8; + Te = T7 + T8; + T6 = FNMS(KP500000000, T4, T1); + Tq = rio[WS(vs, 1) + WS(rs, 1)]; + Tr = rio[WS(vs, 1) + WS(rs, 2)]; + Tf = FNMS(KP500000000, Te, Td); + } + { + E Tv, Tw, TT, TU; + TB = iio[WS(vs, 1)]; + Tv = iio[WS(vs, 1) + WS(rs, 1)]; + TE = Tr - Tq; + Ts = Tq + Tr; + Tw = iio[WS(vs, 1) + WS(rs, 2)]; + TZ = iio[WS(vs, 2)]; + TT = iio[WS(vs, 2) + WS(rs, 1)]; + Tu = FNMS(KP500000000, Ts, Tp); + Tx = Tv - Tw; + TC = Tv + Tw; + TU = iio[WS(vs, 2) + WS(rs, 2)]; + TN = rio[WS(vs, 2)]; + TO = rio[WS(vs, 2) + WS(rs, 1)]; + TD = FNMS(KP500000000, TC, TB); + TV = TT - TU; + T10 = TT + TU; + TP = rio[WS(vs, 2) + WS(rs, 2)]; + } + { + E T11, T12, TS, TQ; + rio[0] = T1 + T4; + iio[0] = Td + Te; + T11 = FNMS(KP500000000, T10, TZ); + T12 = TP - TO; + TQ = TO + TP; + rio[WS(rs, 1)] = Tp + Ts; + iio[WS(rs, 1)] = TB + TC; + iio[WS(rs, 2)] = TZ + T10; + TS = FNMS(KP500000000, TQ, TN); + rio[WS(rs, 2)] = TN + TQ; + { + E TW, T13, Ty, TI, TL, TF, TH, TK; + { + E Ta, Th, T5, Tc; + Tk = FNMS(KP866025403, T9, T6); + Ta = FMA(KP866025403, T9, T6); + Th = FMA(KP866025403, Tg, Tf); + Tn = FNMS(KP866025403, Tg, Tf); + T5 = W[0]; + Tc = W[1]; + { + E T16, T19, T18, T1a, T17, Ti, Tb, T15; + TW = FMA(KP866025403, TV, TS); + T16 = FNMS(KP866025403, TV, TS); + T19 = FNMS(KP866025403, T12, T11); + T13 = FMA(KP866025403, T12, T11); + Ti = T5 * Th; + Tb = T5 * Ta; + T15 = W[2]; + T18 = W[3]; + iio[WS(vs, 1)] = FNMS(Tc, Ta, Ti); + rio[WS(vs, 1)] = FMA(Tc, Th, Tb); + T1a = T15 * T19; + T17 = T15 * T16; + Ty = FMA(KP866025403, Tx, Tu); + TI = FNMS(KP866025403, Tx, Tu); + TL = FNMS(KP866025403, TE, TD); + TF = FMA(KP866025403, TE, TD); + iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T18, T16, T1a); + rio[WS(vs, 2) + WS(rs, 2)] = FMA(T18, T19, T17); + TH = W[2]; + TK = W[3]; + } + } + { + E TA, TG, Tz, TM, TJ, Tt; + TM = TH * TL; + TJ = TH * TI; + Tt = W[0]; + TA = W[1]; + iio[WS(vs, 2) + WS(rs, 1)] = FNMS(TK, TI, TM); + rio[WS(vs, 2) + WS(rs, 1)] = FMA(TK, TL, TJ); + TG = Tt * TF; + Tz = Tt * Ty; + { + E TR, TY, T14, TX, Tj; + iio[WS(vs, 1) + WS(rs, 1)] = FNMS(TA, Ty, TG); + rio[WS(vs, 1) + WS(rs, 1)] = FMA(TA, TF, Tz); + TR = W[0]; + TY = W[1]; + T14 = TR * T13; + TX = TR * TW; + Tj = W[2]; + Tm = W[3]; + iio[WS(vs, 1) + WS(rs, 2)] = FNMS(TY, TW, T14); + rio[WS(vs, 1) + WS(rs, 2)] = FMA(TY, T13, TX); + To = Tj * Tn; + Tl = Tj * Tk; + } + } + } + } + } + iio[WS(vs, 2)] = FNMS(Tm, Tk, To); + rio[WS(vs, 2)] = FMA(Tm, Tn, Tl); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 3}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 3, "q1_3", twinstr, &GENUS, {18, 12, 30, 0}, 0, 0, 0 }; + +void X(codelet_q1_3) (planner *p) { + X(kdft_difsq_register) (p, q1_3, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twidsq.native -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 3 -name q1_3 -include q.h */ + +/* + * This function contains 48 FP additions, 36 FP multiplications, + * (or, 30 additions, 18 multiplications, 18 fused multiply/add), + * 35 stack variables, 2 constants, and 36 memory accesses + */ +#include "q.h" + +static void q1_3(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + (mb * 4); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 4, MAKE_VOLATILE_STRIDE(6, rs), MAKE_VOLATILE_STRIDE(0, vs)) { + E T1, T4, T6, Tc, Td, Te, T9, Tf, Tl, To, Tq, Tw, Tx, Ty, Tt; + E Tz, TR, TS, TN, TT, TF, TI, TK, TQ; + { + E T2, T3, Tr, Ts; + T1 = rio[0]; + T2 = rio[WS(rs, 1)]; + T3 = rio[WS(rs, 2)]; + T4 = T2 + T3; + T6 = FNMS(KP500000000, T4, T1); + Tc = KP866025403 * (T3 - T2); + { + E T7, T8, Tm, Tn; + Td = iio[0]; + T7 = iio[WS(rs, 1)]; + T8 = iio[WS(rs, 2)]; + Te = T7 + T8; + T9 = KP866025403 * (T7 - T8); + Tf = FNMS(KP500000000, Te, Td); + Tl = rio[WS(vs, 1)]; + Tm = rio[WS(vs, 1) + WS(rs, 1)]; + Tn = rio[WS(vs, 1) + WS(rs, 2)]; + To = Tm + Tn; + Tq = FNMS(KP500000000, To, Tl); + Tw = KP866025403 * (Tn - Tm); + } + Tx = iio[WS(vs, 1)]; + Tr = iio[WS(vs, 1) + WS(rs, 1)]; + Ts = iio[WS(vs, 1) + WS(rs, 2)]; + Ty = Tr + Ts; + Tt = KP866025403 * (Tr - Ts); + Tz = FNMS(KP500000000, Ty, Tx); + { + E TL, TM, TG, TH; + TR = iio[WS(vs, 2)]; + TL = iio[WS(vs, 2) + WS(rs, 1)]; + TM = iio[WS(vs, 2) + WS(rs, 2)]; + TS = TL + TM; + TN = KP866025403 * (TL - TM); + TT = FNMS(KP500000000, TS, TR); + TF = rio[WS(vs, 2)]; + TG = rio[WS(vs, 2) + WS(rs, 1)]; + TH = rio[WS(vs, 2) + WS(rs, 2)]; + TI = TG + TH; + TK = FNMS(KP500000000, TI, TF); + TQ = KP866025403 * (TH - TG); + } + } + rio[0] = T1 + T4; + iio[0] = Td + Te; + rio[WS(rs, 1)] = Tl + To; + iio[WS(rs, 1)] = Tx + Ty; + iio[WS(rs, 2)] = TR + TS; + rio[WS(rs, 2)] = TF + TI; + { + E Ta, Tg, T5, Tb; + Ta = T6 + T9; + Tg = Tc + Tf; + T5 = W[0]; + Tb = W[1]; + rio[WS(vs, 1)] = FMA(T5, Ta, Tb * Tg); + iio[WS(vs, 1)] = FNMS(Tb, Ta, T5 * Tg); + } + { + E TW, TY, TV, TX; + TW = TK - TN; + TY = TT - TQ; + TV = W[2]; + TX = W[3]; + rio[WS(vs, 2) + WS(rs, 2)] = FMA(TV, TW, TX * TY); + iio[WS(vs, 2) + WS(rs, 2)] = FNMS(TX, TW, TV * TY); + } + { + E TC, TE, TB, TD; + TC = Tq - Tt; + TE = Tz - Tw; + TB = W[2]; + TD = W[3]; + rio[WS(vs, 2) + WS(rs, 1)] = FMA(TB, TC, TD * TE); + iio[WS(vs, 2) + WS(rs, 1)] = FNMS(TD, TC, TB * TE); + } + { + E Tu, TA, Tp, Tv; + Tu = Tq + Tt; + TA = Tw + Tz; + Tp = W[0]; + Tv = W[1]; + rio[WS(vs, 1) + WS(rs, 1)] = FMA(Tp, Tu, Tv * TA); + iio[WS(vs, 1) + WS(rs, 1)] = FNMS(Tv, Tu, Tp * TA); + } + { + E TO, TU, TJ, TP; + TO = TK + TN; + TU = TQ + TT; + TJ = W[0]; + TP = W[1]; + rio[WS(vs, 1) + WS(rs, 2)] = FMA(TJ, TO, TP * TU); + iio[WS(vs, 1) + WS(rs, 2)] = FNMS(TP, TO, TJ * TU); + } + { + E Ti, Tk, Th, Tj; + Ti = T6 - T9; + Tk = Tf - Tc; + Th = W[2]; + Tj = W[3]; + rio[WS(vs, 2)] = FMA(Th, Ti, Tj * Tk); + iio[WS(vs, 2)] = FNMS(Tj, Ti, Th * Tk); + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 3}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 3, "q1_3", twinstr, &GENUS, {30, 18, 18, 0}, 0, 0, 0 }; + +void X(codelet_q1_3) (planner *p) { + X(kdft_difsq_register) (p, q1_3, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/q1_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/q1_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,518 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:58 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twidsq.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 4 -name q1_4 -include q.h */ + +/* + * This function contains 88 FP additions, 48 FP multiplications, + * (or, 64 additions, 24 multiplications, 24 fused multiply/add), + * 76 stack variables, 0 constants, and 64 memory accesses + */ +#include "q.h" + +static void q1_4(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + (mb * 6); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 6, MAKE_VOLATILE_STRIDE(8, rs), MAKE_VOLATILE_STRIDE(0, vs)) { + E T1X, T1S, T1L, T1Y, T1R; + { + E T3, Tf, Tv, Ti, Tw, Tx, T6, Tm, Tc, Ts, T1T, T1H, T29, T1W, T2a; + E T2b, T1K, T20, T1Q, T26, TN, TB, T13, TQ, T14, T15, TE, TU, TK, T10; + E T1l, T19, T1a, T1h, T1B, T1o, T1C, T1b, T1D, T1e, T1c; + { + E T1I, T1P, T1J, T1M; + { + E Tb, T4, T5, T8; + { + E T1, T2, T9, Ta, Tg, Th; + T1 = rio[0]; + T2 = rio[WS(rs, 2)]; + T9 = iio[0]; + Ta = iio[WS(rs, 2)]; + Tg = iio[WS(rs, 1)]; + T3 = T1 + T2; + Tf = T1 - T2; + Th = iio[WS(rs, 3)]; + Tv = T9 + Ta; + Tb = T9 - Ta; + T4 = rio[WS(rs, 1)]; + Ti = Tg - Th; + Tw = Tg + Th; + T5 = rio[WS(rs, 3)]; + } + Tx = Tv - Tw; + T8 = T4 - T5; + T6 = T4 + T5; + { + E T1N, T1O, T1F, T1G, T1U, T1V; + T1F = rio[WS(vs, 3)]; + T1G = rio[WS(vs, 3) + WS(rs, 2)]; + Tm = Tb - T8; + Tc = T8 + Tb; + Ts = T3 - T6; + T1T = T1F - T1G; + T1H = T1F + T1G; + T1N = iio[WS(vs, 3)]; + T1O = iio[WS(vs, 3) + WS(rs, 2)]; + T1U = iio[WS(vs, 3) + WS(rs, 1)]; + T1V = iio[WS(vs, 3) + WS(rs, 3)]; + T1I = rio[WS(vs, 3) + WS(rs, 1)]; + T1P = T1N - T1O; + T29 = T1N + T1O; + T1W = T1U - T1V; + T2a = T1U + T1V; + T1J = rio[WS(vs, 3) + WS(rs, 3)]; + } + } + T2b = T29 - T2a; + T1M = T1I - T1J; + T1K = T1I + T1J; + { + E TC, TJ, TD, TG; + { + E TH, TI, Tz, TA, TO, TP; + Tz = rio[WS(vs, 1)]; + TA = rio[WS(vs, 1) + WS(rs, 2)]; + T20 = T1P - T1M; + T1Q = T1M + T1P; + T26 = T1H - T1K; + TN = Tz - TA; + TB = Tz + TA; + TH = iio[WS(vs, 1)]; + TI = iio[WS(vs, 1) + WS(rs, 2)]; + TO = iio[WS(vs, 1) + WS(rs, 1)]; + TP = iio[WS(vs, 1) + WS(rs, 3)]; + TC = rio[WS(vs, 1) + WS(rs, 1)]; + TJ = TH - TI; + T13 = TH + TI; + TQ = TO - TP; + T14 = TO + TP; + TD = rio[WS(vs, 1) + WS(rs, 3)]; + } + T15 = T13 - T14; + TG = TC - TD; + TE = TC + TD; + { + E T1f, T1g, T17, T18, T1m, T1n; + T17 = rio[WS(vs, 2)]; + T18 = rio[WS(vs, 2) + WS(rs, 2)]; + TU = TJ - TG; + TK = TG + TJ; + T10 = TB - TE; + T1l = T17 - T18; + T19 = T17 + T18; + T1f = iio[WS(vs, 2)]; + T1g = iio[WS(vs, 2) + WS(rs, 2)]; + T1m = iio[WS(vs, 2) + WS(rs, 1)]; + T1n = iio[WS(vs, 2) + WS(rs, 3)]; + T1a = rio[WS(vs, 2) + WS(rs, 1)]; + T1h = T1f - T1g; + T1B = T1f + T1g; + T1o = T1m - T1n; + T1C = T1m + T1n; + T1b = rio[WS(vs, 2) + WS(rs, 3)]; + } + } + } + T1D = T1B - T1C; + T1e = T1a - T1b; + T1c = T1a + T1b; + { + E T1s, T1i, T1y, T28, T27, Tr, Tu; + rio[0] = T3 + T6; + iio[0] = Tv + Tw; + T1s = T1h - T1e; + T1i = T1e + T1h; + T1y = T19 - T1c; + rio[WS(rs, 1)] = TB + TE; + iio[WS(rs, 1)] = T13 + T14; + rio[WS(rs, 2)] = T19 + T1c; + iio[WS(rs, 2)] = T1B + T1C; + iio[WS(rs, 3)] = T29 + T2a; + rio[WS(rs, 3)] = T1H + T1K; + Tr = W[2]; + Tu = W[3]; + { + E T25, Ty, Tt, T2c; + T25 = W[2]; + T28 = W[3]; + Ty = Tr * Tx; + Tt = Tr * Ts; + T2c = T25 * T2b; + T27 = T25 * T26; + iio[WS(vs, 2)] = FNMS(Tu, Ts, Ty); + rio[WS(vs, 2)] = FMA(Tu, Tx, Tt); + iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T28, T26, T2c); + } + rio[WS(vs, 2) + WS(rs, 3)] = FMA(T28, T2b, T27); + { + E Tp, T1v, T23, T22, T1Z, TR, TM, TF; + { + E T1A, T1z, TZ, T12; + TZ = W[2]; + T12 = W[3]; + { + E T1x, T16, T11, T1E; + T1x = W[2]; + T1A = W[3]; + T16 = TZ * T15; + T11 = TZ * T10; + T1E = T1x * T1D; + T1z = T1x * T1y; + iio[WS(vs, 2) + WS(rs, 1)] = FNMS(T12, T10, T16); + rio[WS(vs, 2) + WS(rs, 1)] = FMA(T12, T15, T11); + iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T1A, T1y, T1E); + } + rio[WS(vs, 2) + WS(rs, 2)] = FMA(T1A, T1D, T1z); + { + E Tj, Te, T7, T1p, T1k, T1j; + Tp = Tf + Ti; + Tj = Tf - Ti; + Te = W[5]; + T7 = W[4]; + { + E T1d, T1q, Tk, Td; + T1p = T1l - T1o; + T1v = T1l + T1o; + T1k = W[5]; + Tk = Te * Tc; + Td = T7 * Tc; + T1d = W[4]; + T1q = T1k * T1i; + rio[WS(vs, 3)] = FMA(T7, Tj, Tk); + iio[WS(vs, 3)] = FNMS(Te, Tj, Td); + T1j = T1d * T1i; + rio[WS(vs, 3) + WS(rs, 2)] = FMA(T1d, T1p, T1q); + } + T23 = T1T + T1W; + T1X = T1T - T1W; + T22 = W[1]; + iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T1k, T1p, T1j); + T1Z = W[0]; + } + } + { + E TX, TW, TT, TY, TV, T24, T21; + TX = TN + TQ; + TR = TN - TQ; + T24 = T22 * T20; + TW = W[1]; + T21 = T1Z * T20; + TT = W[0]; + rio[WS(vs, 1) + WS(rs, 3)] = FMA(T1Z, T23, T24); + TY = TW * TU; + iio[WS(vs, 1) + WS(rs, 3)] = FNMS(T22, T23, T21); + TV = TT * TU; + rio[WS(vs, 1) + WS(rs, 1)] = FMA(TT, TX, TY); + TM = W[5]; + iio[WS(vs, 1) + WS(rs, 1)] = FNMS(TW, TX, TV); + TF = W[4]; + } + { + E To, Tl, Tq, Tn, TS, TL; + TS = TM * TK; + To = W[1]; + TL = TF * TK; + Tl = W[0]; + rio[WS(vs, 3) + WS(rs, 1)] = FMA(TF, TR, TS); + Tq = To * Tm; + iio[WS(vs, 3) + WS(rs, 1)] = FNMS(TM, TR, TL); + Tn = Tl * Tm; + { + E T1u, T1r, T1w, T1t; + rio[WS(vs, 1)] = FMA(Tl, Tp, Tq); + T1u = W[1]; + iio[WS(vs, 1)] = FNMS(To, Tp, Tn); + T1r = W[0]; + T1w = T1u * T1s; + T1S = W[5]; + T1t = T1r * T1s; + T1L = W[4]; + rio[WS(vs, 1) + WS(rs, 2)] = FMA(T1r, T1v, T1w); + T1Y = T1S * T1Q; + iio[WS(vs, 1) + WS(rs, 2)] = FNMS(T1u, T1v, T1t); + T1R = T1L * T1Q; + } + } + } + } + } + rio[WS(vs, 3) + WS(rs, 3)] = FMA(T1L, T1X, T1Y); + iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T1S, T1X, T1R); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 4}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 4, "q1_4", twinstr, &GENUS, {64, 24, 24, 0}, 0, 0, 0 }; + +void X(codelet_q1_4) (planner *p) { + X(kdft_difsq_register) (p, q1_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twidsq.native -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 4 -name q1_4 -include q.h */ + +/* + * This function contains 88 FP additions, 48 FP multiplications, + * (or, 64 additions, 24 multiplications, 24 fused multiply/add), + * 37 stack variables, 0 constants, and 64 memory accesses + */ +#include "q.h" + +static void q1_4(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + (mb * 6); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 6, MAKE_VOLATILE_STRIDE(8, rs), MAKE_VOLATILE_STRIDE(0, vs)) { + E T3, Te, Tb, Tq, T6, T8, Th, Tr, Tv, TG, TD, TS, Ty, TA, TJ; + E TT, TX, T18, T15, T1k, T10, T12, T1b, T1l, T1p, T1A, T1x, T1M, T1s, T1u; + E T1D, T1N; + { + E T1, T2, T9, Ta; + T1 = rio[0]; + T2 = rio[WS(rs, 2)]; + T3 = T1 + T2; + Te = T1 - T2; + T9 = iio[0]; + Ta = iio[WS(rs, 2)]; + Tb = T9 - Ta; + Tq = T9 + Ta; + } + { + E T4, T5, Tf, Tg; + T4 = rio[WS(rs, 1)]; + T5 = rio[WS(rs, 3)]; + T6 = T4 + T5; + T8 = T4 - T5; + Tf = iio[WS(rs, 1)]; + Tg = iio[WS(rs, 3)]; + Th = Tf - Tg; + Tr = Tf + Tg; + } + { + E Tt, Tu, TB, TC; + Tt = rio[WS(vs, 1)]; + Tu = rio[WS(vs, 1) + WS(rs, 2)]; + Tv = Tt + Tu; + TG = Tt - Tu; + TB = iio[WS(vs, 1)]; + TC = iio[WS(vs, 1) + WS(rs, 2)]; + TD = TB - TC; + TS = TB + TC; + } + { + E Tw, Tx, TH, TI; + Tw = rio[WS(vs, 1) + WS(rs, 1)]; + Tx = rio[WS(vs, 1) + WS(rs, 3)]; + Ty = Tw + Tx; + TA = Tw - Tx; + TH = iio[WS(vs, 1) + WS(rs, 1)]; + TI = iio[WS(vs, 1) + WS(rs, 3)]; + TJ = TH - TI; + TT = TH + TI; + } + { + E TV, TW, T13, T14; + TV = rio[WS(vs, 2)]; + TW = rio[WS(vs, 2) + WS(rs, 2)]; + TX = TV + TW; + T18 = TV - TW; + T13 = iio[WS(vs, 2)]; + T14 = iio[WS(vs, 2) + WS(rs, 2)]; + T15 = T13 - T14; + T1k = T13 + T14; + } + { + E TY, TZ, T19, T1a; + TY = rio[WS(vs, 2) + WS(rs, 1)]; + TZ = rio[WS(vs, 2) + WS(rs, 3)]; + T10 = TY + TZ; + T12 = TY - TZ; + T19 = iio[WS(vs, 2) + WS(rs, 1)]; + T1a = iio[WS(vs, 2) + WS(rs, 3)]; + T1b = T19 - T1a; + T1l = T19 + T1a; + } + { + E T1n, T1o, T1v, T1w; + T1n = rio[WS(vs, 3)]; + T1o = rio[WS(vs, 3) + WS(rs, 2)]; + T1p = T1n + T1o; + T1A = T1n - T1o; + T1v = iio[WS(vs, 3)]; + T1w = iio[WS(vs, 3) + WS(rs, 2)]; + T1x = T1v - T1w; + T1M = T1v + T1w; + } + { + E T1q, T1r, T1B, T1C; + T1q = rio[WS(vs, 3) + WS(rs, 1)]; + T1r = rio[WS(vs, 3) + WS(rs, 3)]; + T1s = T1q + T1r; + T1u = T1q - T1r; + T1B = iio[WS(vs, 3) + WS(rs, 1)]; + T1C = iio[WS(vs, 3) + WS(rs, 3)]; + T1D = T1B - T1C; + T1N = T1B + T1C; + } + rio[0] = T3 + T6; + iio[0] = Tq + Tr; + rio[WS(rs, 1)] = Tv + Ty; + iio[WS(rs, 1)] = TS + TT; + rio[WS(rs, 2)] = TX + T10; + iio[WS(rs, 2)] = T1k + T1l; + iio[WS(rs, 3)] = T1M + T1N; + rio[WS(rs, 3)] = T1p + T1s; + { + E Tc, Ti, T7, Td; + Tc = T8 + Tb; + Ti = Te - Th; + T7 = W[4]; + Td = W[5]; + iio[WS(vs, 3)] = FNMS(Td, Ti, T7 * Tc); + rio[WS(vs, 3)] = FMA(Td, Tc, T7 * Ti); + } + { + E T1K, T1O, T1J, T1L; + T1K = T1p - T1s; + T1O = T1M - T1N; + T1J = W[2]; + T1L = W[3]; + rio[WS(vs, 2) + WS(rs, 3)] = FMA(T1J, T1K, T1L * T1O); + iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T1L, T1K, T1J * T1O); + } + { + E Tk, Tm, Tj, Tl; + Tk = Tb - T8; + Tm = Te + Th; + Tj = W[0]; + Tl = W[1]; + iio[WS(vs, 1)] = FNMS(Tl, Tm, Tj * Tk); + rio[WS(vs, 1)] = FMA(Tl, Tk, Tj * Tm); + } + { + E To, Ts, Tn, Tp; + To = T3 - T6; + Ts = Tq - Tr; + Tn = W[2]; + Tp = W[3]; + rio[WS(vs, 2)] = FMA(Tn, To, Tp * Ts); + iio[WS(vs, 2)] = FNMS(Tp, To, Tn * Ts); + } + { + E T16, T1c, T11, T17; + T16 = T12 + T15; + T1c = T18 - T1b; + T11 = W[4]; + T17 = W[5]; + iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T17, T1c, T11 * T16); + rio[WS(vs, 3) + WS(rs, 2)] = FMA(T17, T16, T11 * T1c); + } + { + E T1G, T1I, T1F, T1H; + T1G = T1x - T1u; + T1I = T1A + T1D; + T1F = W[0]; + T1H = W[1]; + iio[WS(vs, 1) + WS(rs, 3)] = FNMS(T1H, T1I, T1F * T1G); + rio[WS(vs, 1) + WS(rs, 3)] = FMA(T1H, T1G, T1F * T1I); + } + { + E TQ, TU, TP, TR; + TQ = Tv - Ty; + TU = TS - TT; + TP = W[2]; + TR = W[3]; + rio[WS(vs, 2) + WS(rs, 1)] = FMA(TP, TQ, TR * TU); + iio[WS(vs, 2) + WS(rs, 1)] = FNMS(TR, TQ, TP * TU); + } + { + E T1e, T1g, T1d, T1f; + T1e = T15 - T12; + T1g = T18 + T1b; + T1d = W[0]; + T1f = W[1]; + iio[WS(vs, 1) + WS(rs, 2)] = FNMS(T1f, T1g, T1d * T1e); + rio[WS(vs, 1) + WS(rs, 2)] = FMA(T1f, T1e, T1d * T1g); + } + { + E T1i, T1m, T1h, T1j; + T1i = TX - T10; + T1m = T1k - T1l; + T1h = W[2]; + T1j = W[3]; + rio[WS(vs, 2) + WS(rs, 2)] = FMA(T1h, T1i, T1j * T1m); + iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T1j, T1i, T1h * T1m); + } + { + E T1y, T1E, T1t, T1z; + T1y = T1u + T1x; + T1E = T1A - T1D; + T1t = W[4]; + T1z = W[5]; + iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T1z, T1E, T1t * T1y); + rio[WS(vs, 3) + WS(rs, 3)] = FMA(T1z, T1y, T1t * T1E); + } + { + E TM, TO, TL, TN; + TM = TD - TA; + TO = TG + TJ; + TL = W[0]; + TN = W[1]; + iio[WS(vs, 1) + WS(rs, 1)] = FNMS(TN, TO, TL * TM); + rio[WS(vs, 1) + WS(rs, 1)] = FMA(TN, TM, TL * TO); + } + { + E TE, TK, Tz, TF; + TE = TA + TD; + TK = TG - TJ; + Tz = W[4]; + TF = W[5]; + iio[WS(vs, 3) + WS(rs, 1)] = FNMS(TF, TK, Tz * TE); + rio[WS(vs, 3) + WS(rs, 1)] = FMA(TF, TE, Tz * TK); + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 4}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 4, "q1_4", twinstr, &GENUS, {64, 24, 24, 0}, 0, 0, 0 }; + +void X(codelet_q1_4) (planner *p) { + X(kdft_difsq_register) (p, q1_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/q1_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/q1_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,983 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:00 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twidsq.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 5 -name q1_5 -include q.h */ + +/* + * This function contains 200 FP additions, 170 FP multiplications, + * (or, 70 additions, 40 multiplications, 130 fused multiply/add), + * 104 stack variables, 4 constants, and 100 memory accesses + */ +#include "q.h" + +static void q1_5(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + (mb * 8); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 8, MAKE_VOLATILE_STRIDE(10, rs), MAKE_VOLATILE_STRIDE(0, vs)) { + E T1x, T1w, T1v; + { + E T1, Tn, TM, Tw, Tb, T8, Ta, TV, Tq, Ts, TH, Tj, Tr, T1h, T1q; + E T1G, T12, T15, T1P, T14, T1k, T1m, T1B, T1d, T1l, T2b, T2k, T2A, T1W, T1Z; + E T3Z, T1Y, T2e, T2g, T2v, T27, T2f, T3D, T42, T44, T4j, T3V, T43, T2J, T48; + E T4o, T3K, T3N, T35, T3M, T2V, T3e, T3u, T2Q, T2T, T37, T30, T2S, T2W; + { + E T1Q, T2j, T1V, T1R; + { + E Tp, Ti, Td, Te; + { + E T5, T6, T2, T3, T7, Tv; + T1 = rio[0]; + T5 = rio[WS(rs, 2)]; + T6 = rio[WS(rs, 3)]; + T2 = rio[WS(rs, 1)]; + T3 = rio[WS(rs, 4)]; + Tn = iio[0]; + T7 = T5 + T6; + Tv = T5 - T6; + { + E T4, Tu, Tg, Th; + T4 = T2 + T3; + Tu = T2 - T3; + Tg = iio[WS(rs, 2)]; + Th = iio[WS(rs, 3)]; + TM = FNMS(KP618033988, Tu, Tv); + Tw = FMA(KP618033988, Tv, Tu); + Tb = T4 - T7; + T8 = T4 + T7; + Tp = Tg + Th; + Ti = Tg - Th; + Ta = FNMS(KP250000000, T8, T1); + Td = iio[WS(rs, 1)]; + Te = iio[WS(rs, 4)]; + } + } + { + E TW, T1p, T11, TX; + TV = rio[WS(vs, 1)]; + { + E TZ, T10, Tf, To; + TZ = rio[WS(vs, 1) + WS(rs, 2)]; + T10 = rio[WS(vs, 1) + WS(rs, 3)]; + Tf = Td - Te; + To = Td + Te; + TW = rio[WS(vs, 1) + WS(rs, 1)]; + T1p = TZ - T10; + T11 = TZ + T10; + Tq = To + Tp; + Ts = To - Tp; + TH = FNMS(KP618033988, Tf, Ti); + Tj = FMA(KP618033988, Ti, Tf); + Tr = FNMS(KP250000000, Tq, Tn); + TX = rio[WS(vs, 1) + WS(rs, 4)]; + } + { + E T17, T1j, T1c, T18; + T1h = iio[WS(vs, 1)]; + { + E T1a, T1b, TY, T1o; + T1a = iio[WS(vs, 1) + WS(rs, 2)]; + T1b = iio[WS(vs, 1) + WS(rs, 3)]; + TY = TW + TX; + T1o = TW - TX; + T17 = iio[WS(vs, 1) + WS(rs, 1)]; + T1j = T1a + T1b; + T1c = T1a - T1b; + T1q = FMA(KP618033988, T1p, T1o); + T1G = FNMS(KP618033988, T1o, T1p); + T12 = TY + T11; + T15 = TY - T11; + T18 = iio[WS(vs, 1) + WS(rs, 4)]; + } + T1P = rio[WS(vs, 2)]; + T14 = FNMS(KP250000000, T12, TV); + { + E T1T, T1i, T19, T1U; + T1T = rio[WS(vs, 2) + WS(rs, 2)]; + T1i = T17 + T18; + T19 = T17 - T18; + T1U = rio[WS(vs, 2) + WS(rs, 3)]; + T1Q = rio[WS(vs, 2) + WS(rs, 1)]; + T1k = T1i + T1j; + T1m = T1i - T1j; + T1B = FNMS(KP618033988, T19, T1c); + T1d = FMA(KP618033988, T1c, T19); + T2j = T1T - T1U; + T1V = T1T + T1U; + T1l = FNMS(KP250000000, T1k, T1h); + T1R = rio[WS(vs, 2) + WS(rs, 4)]; + } + } + } + } + { + E T3P, T41, T3U, T3Q; + { + E T21, T2d, T26, T22; + T2b = iio[WS(vs, 2)]; + { + E T24, T25, T1S, T2i; + T24 = iio[WS(vs, 2) + WS(rs, 2)]; + T25 = iio[WS(vs, 2) + WS(rs, 3)]; + T1S = T1Q + T1R; + T2i = T1Q - T1R; + T21 = iio[WS(vs, 2) + WS(rs, 1)]; + T2d = T24 + T25; + T26 = T24 - T25; + T2k = FMA(KP618033988, T2j, T2i); + T2A = FNMS(KP618033988, T2i, T2j); + T1W = T1S + T1V; + T1Z = T1S - T1V; + T22 = iio[WS(vs, 2) + WS(rs, 4)]; + } + T3Z = iio[WS(vs, 4)]; + T1Y = FNMS(KP250000000, T1W, T1P); + { + E T3S, T2c, T23, T3T; + T3S = iio[WS(vs, 4) + WS(rs, 2)]; + T2c = T21 + T22; + T23 = T21 - T22; + T3T = iio[WS(vs, 4) + WS(rs, 3)]; + T3P = iio[WS(vs, 4) + WS(rs, 1)]; + T2e = T2c + T2d; + T2g = T2c - T2d; + T2v = FNMS(KP618033988, T23, T26); + T27 = FMA(KP618033988, T26, T23); + T41 = T3S + T3T; + T3U = T3S - T3T; + T2f = FNMS(KP250000000, T2e, T2b); + T3Q = iio[WS(vs, 4) + WS(rs, 4)]; + } + } + { + E T3E, T47, T3J, T3F; + T3D = rio[WS(vs, 4)]; + { + E T3H, T3I, T3R, T40; + T3H = rio[WS(vs, 4) + WS(rs, 2)]; + T3I = rio[WS(vs, 4) + WS(rs, 3)]; + T3R = T3P - T3Q; + T40 = T3P + T3Q; + T3E = rio[WS(vs, 4) + WS(rs, 1)]; + T47 = T3H - T3I; + T3J = T3H + T3I; + T42 = T40 + T41; + T44 = T40 - T41; + T4j = FNMS(KP618033988, T3R, T3U); + T3V = FMA(KP618033988, T3U, T3R); + T43 = FNMS(KP250000000, T42, T3Z); + T3F = rio[WS(vs, 4) + WS(rs, 4)]; + } + { + E T2K, T3d, T2P, T2L; + T2J = rio[WS(vs, 3)]; + { + E T2N, T2O, T3G, T46; + T2N = rio[WS(vs, 3) + WS(rs, 2)]; + T2O = rio[WS(vs, 3) + WS(rs, 3)]; + T3G = T3E + T3F; + T46 = T3E - T3F; + T2K = rio[WS(vs, 3) + WS(rs, 1)]; + T3d = T2N - T2O; + T2P = T2N + T2O; + T48 = FMA(KP618033988, T47, T46); + T4o = FNMS(KP618033988, T46, T47); + T3K = T3G + T3J; + T3N = T3G - T3J; + T2L = rio[WS(vs, 3) + WS(rs, 4)]; + } + T35 = iio[WS(vs, 3)]; + T3M = FNMS(KP250000000, T3K, T3D); + { + E T2Y, T3c, T2M, T2Z; + T2Y = iio[WS(vs, 3) + WS(rs, 2)]; + T3c = T2K - T2L; + T2M = T2K + T2L; + T2Z = iio[WS(vs, 3) + WS(rs, 3)]; + T2V = iio[WS(vs, 3) + WS(rs, 1)]; + T3e = FMA(KP618033988, T3d, T3c); + T3u = FNMS(KP618033988, T3c, T3d); + T2Q = T2M + T2P; + T2T = T2M - T2P; + T37 = T2Y + T2Z; + T30 = T2Y - T2Z; + T2S = FNMS(KP250000000, T2Q, T2J); + T2W = iio[WS(vs, 3) + WS(rs, 4)]; + } + } + } + } + } + { + E T3a, T31, T3p, T39, T2X, T36, T38; + rio[0] = T1 + T8; + iio[0] = Tn + Tq; + rio[WS(rs, 1)] = TV + T12; + T2X = T2V - T2W; + T36 = T2V + T2W; + iio[WS(rs, 1)] = T1h + T1k; + rio[WS(rs, 2)] = T1P + T1W; + T3a = T36 - T37; + T38 = T36 + T37; + T31 = FMA(KP618033988, T30, T2X); + T3p = FNMS(KP618033988, T2X, T30); + T39 = FNMS(KP250000000, T38, T35); + iio[WS(rs, 2)] = T2b + T2e; + iio[WS(rs, 4)] = T3Z + T42; + rio[WS(rs, 4)] = T3D + T3K; + rio[WS(rs, 3)] = T2J + T2Q; + iio[WS(rs, 3)] = T35 + T38; + { + E T3O, T45, T2r, T2q, T2p, TT, TS, TR; + { + E TG, TL, TD, TC, TB, Tc, Tt; + TG = FNMS(KP559016994, Tb, Ta); + Tc = FMA(KP559016994, Tb, Ta); + Tt = FMA(KP559016994, Ts, Tr); + TL = FNMS(KP559016994, Ts, Tr); + { + E T9, Tm, Tk, TA, Tx; + T9 = W[0]; + Tm = W[1]; + Tk = FMA(KP951056516, Tj, Tc); + TA = FNMS(KP951056516, Tj, Tc); + Tx = FNMS(KP951056516, Tw, Tt); + TD = FMA(KP951056516, Tw, Tt); + { + E Tz, Tl, Ty, TE; + Tz = W[6]; + Tl = T9 * Tk; + TC = W[7]; + Ty = T9 * Tx; + TE = Tz * TD; + TB = Tz * TA; + rio[WS(vs, 1)] = FMA(Tm, Tx, Tl); + iio[WS(vs, 1)] = FNMS(Tm, Tk, Ty); + iio[WS(vs, 4)] = FNMS(TC, TA, TE); + } + } + rio[WS(vs, 4)] = FMA(TC, TD, TB); + { + E TF, TK, TI, TQ, TN; + TF = W[2]; + TK = W[3]; + TI = FNMS(KP951056516, TH, TG); + TQ = FMA(KP951056516, TH, TG); + TN = FMA(KP951056516, TM, TL); + TT = FNMS(KP951056516, TM, TL); + { + E TP, TJ, TO, TU; + TP = W[4]; + TJ = TF * TI; + TS = W[5]; + TO = TF * TN; + TU = TP * TT; + TR = TP * TQ; + rio[WS(vs, 2)] = FMA(TK, TN, TJ); + iio[WS(vs, 2)] = FNMS(TK, TI, TO); + iio[WS(vs, 3)] = FNMS(TS, TQ, TU); + } + } + } + rio[WS(vs, 3)] = FMA(TS, TT, TR); + { + E T20, T2h, T2H, T2G, T2F, T2u, T2z; + T20 = FMA(KP559016994, T1Z, T1Y); + T2u = FNMS(KP559016994, T1Z, T1Y); + T2z = FNMS(KP559016994, T2g, T2f); + T2h = FMA(KP559016994, T2g, T2f); + { + E T2t, T2y, T2w, T2E, T2B; + T2t = W[2]; + T2y = W[3]; + T2w = FNMS(KP951056516, T2v, T2u); + T2E = FMA(KP951056516, T2v, T2u); + T2B = FMA(KP951056516, T2A, T2z); + T2H = FNMS(KP951056516, T2A, T2z); + { + E T2D, T2x, T2C, T2I; + T2D = W[4]; + T2x = T2t * T2w; + T2G = W[5]; + T2C = T2t * T2B; + T2I = T2D * T2H; + T2F = T2D * T2E; + rio[WS(vs, 2) + WS(rs, 2)] = FMA(T2y, T2B, T2x); + iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T2y, T2w, T2C); + iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T2G, T2E, T2I); + } + } + rio[WS(vs, 3) + WS(rs, 2)] = FMA(T2G, T2H, T2F); + { + E T4v, T4u, T4t, T4i, T4n; + T3O = FMA(KP559016994, T3N, T3M); + T4i = FNMS(KP559016994, T3N, T3M); + T4n = FNMS(KP559016994, T44, T43); + T45 = FMA(KP559016994, T44, T43); + { + E T4h, T4m, T4k, T4s, T4p; + T4h = W[2]; + T4m = W[3]; + T4k = FNMS(KP951056516, T4j, T4i); + T4s = FMA(KP951056516, T4j, T4i); + T4p = FMA(KP951056516, T4o, T4n); + T4v = FNMS(KP951056516, T4o, T4n); + { + E T4r, T4l, T4q, T4w; + T4r = W[4]; + T4l = T4h * T4k; + T4u = W[5]; + T4q = T4h * T4p; + T4w = T4r * T4v; + T4t = T4r * T4s; + rio[WS(vs, 2) + WS(rs, 4)] = FMA(T4m, T4p, T4l); + iio[WS(vs, 2) + WS(rs, 4)] = FNMS(T4m, T4k, T4q); + iio[WS(vs, 3) + WS(rs, 4)] = FNMS(T4u, T4s, T4w); + } + } + rio[WS(vs, 3) + WS(rs, 4)] = FMA(T4u, T4v, T4t); + { + E T1X, T2a, T28, T2o, T2l; + T1X = W[0]; + T2a = W[1]; + T28 = FMA(KP951056516, T27, T20); + T2o = FNMS(KP951056516, T27, T20); + T2l = FNMS(KP951056516, T2k, T2h); + T2r = FMA(KP951056516, T2k, T2h); + { + E T2n, T29, T2m, T2s; + T2n = W[6]; + T29 = T1X * T28; + T2q = W[7]; + T2m = T1X * T2l; + T2s = T2n * T2r; + T2p = T2n * T2o; + rio[WS(vs, 1) + WS(rs, 2)] = FMA(T2a, T2l, T29); + iio[WS(vs, 1) + WS(rs, 2)] = FNMS(T2a, T28, T2m); + iio[WS(vs, 4) + WS(rs, 2)] = FNMS(T2q, T2o, T2s); + } + } + } + } + rio[WS(vs, 4) + WS(rs, 2)] = FMA(T2q, T2r, T2p); + { + E T3B, T3A, T3z, T4f, T4e, T4d; + { + E T3o, T3t, T3l, T3k, T3j, T2U, T3b; + T3o = FNMS(KP559016994, T2T, T2S); + T2U = FMA(KP559016994, T2T, T2S); + T3b = FMA(KP559016994, T3a, T39); + T3t = FNMS(KP559016994, T3a, T39); + { + E T2R, T34, T32, T3i, T3f; + T2R = W[0]; + T34 = W[1]; + T32 = FMA(KP951056516, T31, T2U); + T3i = FNMS(KP951056516, T31, T2U); + T3f = FNMS(KP951056516, T3e, T3b); + T3l = FMA(KP951056516, T3e, T3b); + { + E T3h, T33, T3g, T3m; + T3h = W[6]; + T33 = T2R * T32; + T3k = W[7]; + T3g = T2R * T3f; + T3m = T3h * T3l; + T3j = T3h * T3i; + rio[WS(vs, 1) + WS(rs, 3)] = FMA(T34, T3f, T33); + iio[WS(vs, 1) + WS(rs, 3)] = FNMS(T34, T32, T3g); + iio[WS(vs, 4) + WS(rs, 3)] = FNMS(T3k, T3i, T3m); + } + } + rio[WS(vs, 4) + WS(rs, 3)] = FMA(T3k, T3l, T3j); + { + E T3n, T3s, T3q, T3y, T3v; + T3n = W[2]; + T3s = W[3]; + T3q = FNMS(KP951056516, T3p, T3o); + T3y = FMA(KP951056516, T3p, T3o); + T3v = FMA(KP951056516, T3u, T3t); + T3B = FNMS(KP951056516, T3u, T3t); + { + E T3x, T3r, T3w, T3C; + T3x = W[4]; + T3r = T3n * T3q; + T3A = W[5]; + T3w = T3n * T3v; + T3C = T3x * T3B; + T3z = T3x * T3y; + rio[WS(vs, 2) + WS(rs, 3)] = FMA(T3s, T3v, T3r); + iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T3s, T3q, T3w); + iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T3A, T3y, T3C); + } + } + } + rio[WS(vs, 3) + WS(rs, 3)] = FMA(T3A, T3B, T3z); + { + E T3L, T3Y, T3W, T4c, T49; + T3L = W[0]; + T3Y = W[1]; + T3W = FMA(KP951056516, T3V, T3O); + T4c = FNMS(KP951056516, T3V, T3O); + T49 = FNMS(KP951056516, T48, T45); + T4f = FMA(KP951056516, T48, T45); + { + E T4b, T3X, T4a, T4g; + T4b = W[6]; + T3X = T3L * T3W; + T4e = W[7]; + T4a = T3L * T49; + T4g = T4b * T4f; + T4d = T4b * T4c; + rio[WS(vs, 1) + WS(rs, 4)] = FMA(T3Y, T49, T3X); + iio[WS(vs, 1) + WS(rs, 4)] = FNMS(T3Y, T3W, T4a); + iio[WS(vs, 4) + WS(rs, 4)] = FNMS(T4e, T4c, T4g); + } + } + rio[WS(vs, 4) + WS(rs, 4)] = FMA(T4e, T4f, T4d); + { + E T16, T1n, T1N, T1M, T1L, T1A, T1F; + T16 = FMA(KP559016994, T15, T14); + T1A = FNMS(KP559016994, T15, T14); + T1F = FNMS(KP559016994, T1m, T1l); + T1n = FMA(KP559016994, T1m, T1l); + { + E T1z, T1E, T1C, T1K, T1H; + T1z = W[2]; + T1E = W[3]; + T1C = FNMS(KP951056516, T1B, T1A); + T1K = FMA(KP951056516, T1B, T1A); + T1H = FMA(KP951056516, T1G, T1F); + T1N = FNMS(KP951056516, T1G, T1F); + { + E T1J, T1D, T1I, T1O; + T1J = W[4]; + T1D = T1z * T1C; + T1M = W[5]; + T1I = T1z * T1H; + T1O = T1J * T1N; + T1L = T1J * T1K; + rio[WS(vs, 2) + WS(rs, 1)] = FMA(T1E, T1H, T1D); + iio[WS(vs, 2) + WS(rs, 1)] = FNMS(T1E, T1C, T1I); + iio[WS(vs, 3) + WS(rs, 1)] = FNMS(T1M, T1K, T1O); + } + } + rio[WS(vs, 3) + WS(rs, 1)] = FMA(T1M, T1N, T1L); + { + E T13, T1g, T1e, T1u, T1r; + T13 = W[0]; + T1g = W[1]; + T1e = FMA(KP951056516, T1d, T16); + T1u = FNMS(KP951056516, T1d, T16); + T1r = FNMS(KP951056516, T1q, T1n); + T1x = FMA(KP951056516, T1q, T1n); + { + E T1t, T1f, T1s, T1y; + T1t = W[6]; + T1f = T13 * T1e; + T1w = W[7]; + T1s = T13 * T1r; + T1y = T1t * T1x; + T1v = T1t * T1u; + rio[WS(vs, 1) + WS(rs, 1)] = FMA(T1g, T1r, T1f); + iio[WS(vs, 1) + WS(rs, 1)] = FNMS(T1g, T1e, T1s); + iio[WS(vs, 4) + WS(rs, 1)] = FNMS(T1w, T1u, T1y); + } + } + } + } + } + } + } + rio[WS(vs, 4) + WS(rs, 1)] = FMA(T1w, T1x, T1v); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 5}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 5, "q1_5", twinstr, &GENUS, {70, 40, 130, 0}, 0, 0, 0 }; + +void X(codelet_q1_5) (planner *p) { + X(kdft_difsq_register) (p, q1_5, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twidsq.native -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 5 -name q1_5 -include q.h */ + +/* + * This function contains 200 FP additions, 140 FP multiplications, + * (or, 130 additions, 70 multiplications, 70 fused multiply/add), + * 75 stack variables, 4 constants, and 100 memory accesses + */ +#include "q.h" + +static void q1_5(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + (mb * 8); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 8, MAKE_VOLATILE_STRIDE(10, rs), MAKE_VOLATILE_STRIDE(0, vs)) { + E T1, Ta, TG, Tv, T8, Tb, Tp, Tj, TD, To, Tq, Tr, TN, TW, T1s; + E T1h, TU, TX, T1b, T15, T1p, T1a, T1c, T1d, T1z, T1I, T2e, T23, T1G, T1J; + E T1X, T1R, T2b, T1W, T1Y, T1Z, T3v, T3p, T3J, T3u, T3w, T3x, T37, T3g, T3M; + E T3B, T3e, T3h, T2l, T2u, T30, T2P, T2s, T2v, T2J, T2D, T2X, T2I, T2K, T2L; + { + E T7, Tu, T4, Tt; + T1 = rio[0]; + { + E T5, T6, T2, T3; + T5 = rio[WS(rs, 2)]; + T6 = rio[WS(rs, 3)]; + T7 = T5 + T6; + Tu = T5 - T6; + T2 = rio[WS(rs, 1)]; + T3 = rio[WS(rs, 4)]; + T4 = T2 + T3; + Tt = T2 - T3; + } + Ta = KP559016994 * (T4 - T7); + TG = FNMS(KP587785252, Tt, KP951056516 * Tu); + Tv = FMA(KP951056516, Tt, KP587785252 * Tu); + T8 = T4 + T7; + Tb = FNMS(KP250000000, T8, T1); + } + { + E Ti, Tn, Tf, Tm; + Tp = iio[0]; + { + E Tg, Th, Td, Te; + Tg = iio[WS(rs, 2)]; + Th = iio[WS(rs, 3)]; + Ti = Tg - Th; + Tn = Tg + Th; + Td = iio[WS(rs, 1)]; + Te = iio[WS(rs, 4)]; + Tf = Td - Te; + Tm = Td + Te; + } + Tj = FMA(KP951056516, Tf, KP587785252 * Ti); + TD = FNMS(KP587785252, Tf, KP951056516 * Ti); + To = KP559016994 * (Tm - Tn); + Tq = Tm + Tn; + Tr = FNMS(KP250000000, Tq, Tp); + } + { + E TT, T1g, TQ, T1f; + TN = rio[WS(vs, 1)]; + { + E TR, TS, TO, TP; + TR = rio[WS(vs, 1) + WS(rs, 2)]; + TS = rio[WS(vs, 1) + WS(rs, 3)]; + TT = TR + TS; + T1g = TR - TS; + TO = rio[WS(vs, 1) + WS(rs, 1)]; + TP = rio[WS(vs, 1) + WS(rs, 4)]; + TQ = TO + TP; + T1f = TO - TP; + } + TW = KP559016994 * (TQ - TT); + T1s = FNMS(KP587785252, T1f, KP951056516 * T1g); + T1h = FMA(KP951056516, T1f, KP587785252 * T1g); + TU = TQ + TT; + TX = FNMS(KP250000000, TU, TN); + } + { + E T14, T19, T11, T18; + T1b = iio[WS(vs, 1)]; + { + E T12, T13, TZ, T10; + T12 = iio[WS(vs, 1) + WS(rs, 2)]; + T13 = iio[WS(vs, 1) + WS(rs, 3)]; + T14 = T12 - T13; + T19 = T12 + T13; + TZ = iio[WS(vs, 1) + WS(rs, 1)]; + T10 = iio[WS(vs, 1) + WS(rs, 4)]; + T11 = TZ - T10; + T18 = TZ + T10; + } + T15 = FMA(KP951056516, T11, KP587785252 * T14); + T1p = FNMS(KP587785252, T11, KP951056516 * T14); + T1a = KP559016994 * (T18 - T19); + T1c = T18 + T19; + T1d = FNMS(KP250000000, T1c, T1b); + } + { + E T1F, T22, T1C, T21; + T1z = rio[WS(vs, 2)]; + { + E T1D, T1E, T1A, T1B; + T1D = rio[WS(vs, 2) + WS(rs, 2)]; + T1E = rio[WS(vs, 2) + WS(rs, 3)]; + T1F = T1D + T1E; + T22 = T1D - T1E; + T1A = rio[WS(vs, 2) + WS(rs, 1)]; + T1B = rio[WS(vs, 2) + WS(rs, 4)]; + T1C = T1A + T1B; + T21 = T1A - T1B; + } + T1I = KP559016994 * (T1C - T1F); + T2e = FNMS(KP587785252, T21, KP951056516 * T22); + T23 = FMA(KP951056516, T21, KP587785252 * T22); + T1G = T1C + T1F; + T1J = FNMS(KP250000000, T1G, T1z); + } + { + E T1Q, T1V, T1N, T1U; + T1X = iio[WS(vs, 2)]; + { + E T1O, T1P, T1L, T1M; + T1O = iio[WS(vs, 2) + WS(rs, 2)]; + T1P = iio[WS(vs, 2) + WS(rs, 3)]; + T1Q = T1O - T1P; + T1V = T1O + T1P; + T1L = iio[WS(vs, 2) + WS(rs, 1)]; + T1M = iio[WS(vs, 2) + WS(rs, 4)]; + T1N = T1L - T1M; + T1U = T1L + T1M; + } + T1R = FMA(KP951056516, T1N, KP587785252 * T1Q); + T2b = FNMS(KP587785252, T1N, KP951056516 * T1Q); + T1W = KP559016994 * (T1U - T1V); + T1Y = T1U + T1V; + T1Z = FNMS(KP250000000, T1Y, T1X); + } + { + E T3o, T3t, T3l, T3s; + T3v = iio[WS(vs, 4)]; + { + E T3m, T3n, T3j, T3k; + T3m = iio[WS(vs, 4) + WS(rs, 2)]; + T3n = iio[WS(vs, 4) + WS(rs, 3)]; + T3o = T3m - T3n; + T3t = T3m + T3n; + T3j = iio[WS(vs, 4) + WS(rs, 1)]; + T3k = iio[WS(vs, 4) + WS(rs, 4)]; + T3l = T3j - T3k; + T3s = T3j + T3k; + } + T3p = FMA(KP951056516, T3l, KP587785252 * T3o); + T3J = FNMS(KP587785252, T3l, KP951056516 * T3o); + T3u = KP559016994 * (T3s - T3t); + T3w = T3s + T3t; + T3x = FNMS(KP250000000, T3w, T3v); + } + { + E T3d, T3A, T3a, T3z; + T37 = rio[WS(vs, 4)]; + { + E T3b, T3c, T38, T39; + T3b = rio[WS(vs, 4) + WS(rs, 2)]; + T3c = rio[WS(vs, 4) + WS(rs, 3)]; + T3d = T3b + T3c; + T3A = T3b - T3c; + T38 = rio[WS(vs, 4) + WS(rs, 1)]; + T39 = rio[WS(vs, 4) + WS(rs, 4)]; + T3a = T38 + T39; + T3z = T38 - T39; + } + T3g = KP559016994 * (T3a - T3d); + T3M = FNMS(KP587785252, T3z, KP951056516 * T3A); + T3B = FMA(KP951056516, T3z, KP587785252 * T3A); + T3e = T3a + T3d; + T3h = FNMS(KP250000000, T3e, T37); + } + { + E T2r, T2O, T2o, T2N; + T2l = rio[WS(vs, 3)]; + { + E T2p, T2q, T2m, T2n; + T2p = rio[WS(vs, 3) + WS(rs, 2)]; + T2q = rio[WS(vs, 3) + WS(rs, 3)]; + T2r = T2p + T2q; + T2O = T2p - T2q; + T2m = rio[WS(vs, 3) + WS(rs, 1)]; + T2n = rio[WS(vs, 3) + WS(rs, 4)]; + T2o = T2m + T2n; + T2N = T2m - T2n; + } + T2u = KP559016994 * (T2o - T2r); + T30 = FNMS(KP587785252, T2N, KP951056516 * T2O); + T2P = FMA(KP951056516, T2N, KP587785252 * T2O); + T2s = T2o + T2r; + T2v = FNMS(KP250000000, T2s, T2l); + } + { + E T2C, T2H, T2z, T2G; + T2J = iio[WS(vs, 3)]; + { + E T2A, T2B, T2x, T2y; + T2A = iio[WS(vs, 3) + WS(rs, 2)]; + T2B = iio[WS(vs, 3) + WS(rs, 3)]; + T2C = T2A - T2B; + T2H = T2A + T2B; + T2x = iio[WS(vs, 3) + WS(rs, 1)]; + T2y = iio[WS(vs, 3) + WS(rs, 4)]; + T2z = T2x - T2y; + T2G = T2x + T2y; + } + T2D = FMA(KP951056516, T2z, KP587785252 * T2C); + T2X = FNMS(KP587785252, T2z, KP951056516 * T2C); + T2I = KP559016994 * (T2G - T2H); + T2K = T2G + T2H; + T2L = FNMS(KP250000000, T2K, T2J); + } + rio[0] = T1 + T8; + iio[0] = Tp + Tq; + rio[WS(rs, 1)] = TN + TU; + iio[WS(rs, 1)] = T1b + T1c; + rio[WS(rs, 2)] = T1z + T1G; + iio[WS(rs, 2)] = T1X + T1Y; + iio[WS(rs, 4)] = T3v + T3w; + rio[WS(rs, 4)] = T37 + T3e; + rio[WS(rs, 3)] = T2l + T2s; + iio[WS(rs, 3)] = T2J + T2K; + { + E Tk, Ty, Tw, TA, Tc, Ts; + Tc = Ta + Tb; + Tk = Tc + Tj; + Ty = Tc - Tj; + Ts = To + Tr; + Tw = Ts - Tv; + TA = Tv + Ts; + { + E T9, Tl, Tx, Tz; + T9 = W[0]; + Tl = W[1]; + rio[WS(vs, 1)] = FMA(T9, Tk, Tl * Tw); + iio[WS(vs, 1)] = FNMS(Tl, Tk, T9 * Tw); + Tx = W[6]; + Tz = W[7]; + rio[WS(vs, 4)] = FMA(Tx, Ty, Tz * TA); + iio[WS(vs, 4)] = FNMS(Tz, Ty, Tx * TA); + } + } + { + E TE, TK, TI, TM, TC, TH; + TC = Tb - Ta; + TE = TC - TD; + TK = TC + TD; + TH = Tr - To; + TI = TG + TH; + TM = TH - TG; + { + E TB, TF, TJ, TL; + TB = W[2]; + TF = W[3]; + rio[WS(vs, 2)] = FMA(TB, TE, TF * TI); + iio[WS(vs, 2)] = FNMS(TF, TE, TB * TI); + TJ = W[4]; + TL = W[5]; + rio[WS(vs, 3)] = FMA(TJ, TK, TL * TM); + iio[WS(vs, 3)] = FNMS(TL, TK, TJ * TM); + } + } + { + E T2c, T2i, T2g, T2k, T2a, T2f; + T2a = T1J - T1I; + T2c = T2a - T2b; + T2i = T2a + T2b; + T2f = T1Z - T1W; + T2g = T2e + T2f; + T2k = T2f - T2e; + { + E T29, T2d, T2h, T2j; + T29 = W[2]; + T2d = W[3]; + rio[WS(vs, 2) + WS(rs, 2)] = FMA(T29, T2c, T2d * T2g); + iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T2d, T2c, T29 * T2g); + T2h = W[4]; + T2j = W[5]; + rio[WS(vs, 3) + WS(rs, 2)] = FMA(T2h, T2i, T2j * T2k); + iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T2j, T2i, T2h * T2k); + } + } + { + E T3K, T3Q, T3O, T3S, T3I, T3N; + T3I = T3h - T3g; + T3K = T3I - T3J; + T3Q = T3I + T3J; + T3N = T3x - T3u; + T3O = T3M + T3N; + T3S = T3N - T3M; + { + E T3H, T3L, T3P, T3R; + T3H = W[2]; + T3L = W[3]; + rio[WS(vs, 2) + WS(rs, 4)] = FMA(T3H, T3K, T3L * T3O); + iio[WS(vs, 2) + WS(rs, 4)] = FNMS(T3L, T3K, T3H * T3O); + T3P = W[4]; + T3R = W[5]; + rio[WS(vs, 3) + WS(rs, 4)] = FMA(T3P, T3Q, T3R * T3S); + iio[WS(vs, 3) + WS(rs, 4)] = FNMS(T3R, T3Q, T3P * T3S); + } + } + { + E T1S, T26, T24, T28, T1K, T20; + T1K = T1I + T1J; + T1S = T1K + T1R; + T26 = T1K - T1R; + T20 = T1W + T1Z; + T24 = T20 - T23; + T28 = T23 + T20; + { + E T1H, T1T, T25, T27; + T1H = W[0]; + T1T = W[1]; + rio[WS(vs, 1) + WS(rs, 2)] = FMA(T1H, T1S, T1T * T24); + iio[WS(vs, 1) + WS(rs, 2)] = FNMS(T1T, T1S, T1H * T24); + T25 = W[6]; + T27 = W[7]; + rio[WS(vs, 4) + WS(rs, 2)] = FMA(T25, T26, T27 * T28); + iio[WS(vs, 4) + WS(rs, 2)] = FNMS(T27, T26, T25 * T28); + } + } + { + E T2E, T2S, T2Q, T2U, T2w, T2M; + T2w = T2u + T2v; + T2E = T2w + T2D; + T2S = T2w - T2D; + T2M = T2I + T2L; + T2Q = T2M - T2P; + T2U = T2P + T2M; + { + E T2t, T2F, T2R, T2T; + T2t = W[0]; + T2F = W[1]; + rio[WS(vs, 1) + WS(rs, 3)] = FMA(T2t, T2E, T2F * T2Q); + iio[WS(vs, 1) + WS(rs, 3)] = FNMS(T2F, T2E, T2t * T2Q); + T2R = W[6]; + T2T = W[7]; + rio[WS(vs, 4) + WS(rs, 3)] = FMA(T2R, T2S, T2T * T2U); + iio[WS(vs, 4) + WS(rs, 3)] = FNMS(T2T, T2S, T2R * T2U); + } + } + { + E T2Y, T34, T32, T36, T2W, T31; + T2W = T2v - T2u; + T2Y = T2W - T2X; + T34 = T2W + T2X; + T31 = T2L - T2I; + T32 = T30 + T31; + T36 = T31 - T30; + { + E T2V, T2Z, T33, T35; + T2V = W[2]; + T2Z = W[3]; + rio[WS(vs, 2) + WS(rs, 3)] = FMA(T2V, T2Y, T2Z * T32); + iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T2Z, T2Y, T2V * T32); + T33 = W[4]; + T35 = W[5]; + rio[WS(vs, 3) + WS(rs, 3)] = FMA(T33, T34, T35 * T36); + iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T35, T34, T33 * T36); + } + } + { + E T3q, T3E, T3C, T3G, T3i, T3y; + T3i = T3g + T3h; + T3q = T3i + T3p; + T3E = T3i - T3p; + T3y = T3u + T3x; + T3C = T3y - T3B; + T3G = T3B + T3y; + { + E T3f, T3r, T3D, T3F; + T3f = W[0]; + T3r = W[1]; + rio[WS(vs, 1) + WS(rs, 4)] = FMA(T3f, T3q, T3r * T3C); + iio[WS(vs, 1) + WS(rs, 4)] = FNMS(T3r, T3q, T3f * T3C); + T3D = W[6]; + T3F = W[7]; + rio[WS(vs, 4) + WS(rs, 4)] = FMA(T3D, T3E, T3F * T3G); + iio[WS(vs, 4) + WS(rs, 4)] = FNMS(T3F, T3E, T3D * T3G); + } + } + { + E T1q, T1w, T1u, T1y, T1o, T1t; + T1o = TX - TW; + T1q = T1o - T1p; + T1w = T1o + T1p; + T1t = T1d - T1a; + T1u = T1s + T1t; + T1y = T1t - T1s; + { + E T1n, T1r, T1v, T1x; + T1n = W[2]; + T1r = W[3]; + rio[WS(vs, 2) + WS(rs, 1)] = FMA(T1n, T1q, T1r * T1u); + iio[WS(vs, 2) + WS(rs, 1)] = FNMS(T1r, T1q, T1n * T1u); + T1v = W[4]; + T1x = W[5]; + rio[WS(vs, 3) + WS(rs, 1)] = FMA(T1v, T1w, T1x * T1y); + iio[WS(vs, 3) + WS(rs, 1)] = FNMS(T1x, T1w, T1v * T1y); + } + } + { + E T16, T1k, T1i, T1m, TY, T1e; + TY = TW + TX; + T16 = TY + T15; + T1k = TY - T15; + T1e = T1a + T1d; + T1i = T1e - T1h; + T1m = T1h + T1e; + { + E TV, T17, T1j, T1l; + TV = W[0]; + T17 = W[1]; + rio[WS(vs, 1) + WS(rs, 1)] = FMA(TV, T16, T17 * T1i); + iio[WS(vs, 1) + WS(rs, 1)] = FNMS(T17, T16, TV * T1i); + T1j = W[6]; + T1l = W[7]; + rio[WS(vs, 4) + WS(rs, 1)] = FMA(T1j, T1k, T1l * T1m); + iio[WS(vs, 4) + WS(rs, 1)] = FNMS(T1l, T1k, T1j * T1m); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 5}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 5, "q1_5", twinstr, &GENUS, {130, 70, 70, 0}, 0, 0, 0 }; + +void X(codelet_q1_5) (planner *p) { + X(kdft_difsq_register) (p, q1_5, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/q1_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/q1_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1313 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:01 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twidsq.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 6 -name q1_6 -include q.h */ + +/* + * This function contains 276 FP additions, 192 FP multiplications, + * (or, 144 additions, 60 multiplications, 132 fused multiply/add), + * 129 stack variables, 2 constants, and 144 memory accesses + */ +#include "q.h" + +static void q1_6(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + (mb * 10); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs), MAKE_VOLATILE_STRIDE(0, vs)) { + E T4c, T4f, T4e, T4g, T4d; + { + E T3, Tw, Ta, TW, Tg, TG, TM, TT, TU, TP, Tn, T17, TV, TJ, Tv; + E T1A, T1e, T20, T1k, T1K, T1Q, T1X, T1Y, T1T, T1r, T1Z, T1N, T1z, T31, T32; + E T2X, T2v, T2b, T33, T2R, T2D, T2E, T2i, T34, T3f, T2o, T2O, T2U, T3I, T3m; + E T48, T3s, T3S, T3Y, T45, T46, T41, T3z, T4j, T47, T3V, T3H, T4M, T4q, T5c; + E T4w, T4W, T52, T59, T5a, T55, T4D, T5b, T4Z, T4L, T6d, T5r, T6e, T69, T5H; + E T5w, T5n, T6f, T63, T5P, T5s, T5o, T5p; + { + E T2f, T2k, T2g, T2c, T2d; + { + E T1b, T1g, T1c, T18, T19; + { + E T4, Tc, Te, T9, T5; + { + E T1, T2, T7, T8; + T1 = rio[0]; + T2 = rio[WS(rs, 3)]; + T7 = rio[WS(rs, 4)]; + T8 = rio[WS(rs, 1)]; + T4 = rio[WS(rs, 2)]; + Tc = T1 - T2; + T3 = T1 + T2; + Te = T7 - T8; + T9 = T7 + T8; + T5 = rio[WS(rs, 5)]; + } + { + E TN, Tj, Tk, Tl, Tt, Th, Ti; + Th = iio[WS(rs, 2)]; + Ti = iio[WS(rs, 5)]; + { + E Tr, Ts, Td, T6, Tf; + Tr = iio[0]; + Td = T4 - T5; + T6 = T4 + T5; + TN = Th + Ti; + Tj = Th - Ti; + Tf = Td + Te; + Tw = Te - Td; + Ta = T6 + T9; + TW = T9 - T6; + Tg = FNMS(KP500000000, Tf, Tc); + TG = Tc + Tf; + Ts = iio[WS(rs, 3)]; + TM = FNMS(KP500000000, Ta, T3); + Tk = iio[WS(rs, 4)]; + Tl = iio[WS(rs, 1)]; + Tt = Tr - Ts; + TT = Tr + Ts; + } + { + E T15, TO, Tm, T16, Tu; + T15 = rio[WS(vs, 1)]; + TO = Tk + Tl; + Tm = Tk - Tl; + T16 = rio[WS(vs, 1) + WS(rs, 3)]; + T1b = rio[WS(vs, 1) + WS(rs, 4)]; + TU = TN + TO; + TP = TN - TO; + Tu = Tj + Tm; + Tn = Tj - Tm; + T1g = T15 - T16; + T17 = T15 + T16; + TV = FNMS(KP500000000, TU, TT); + TJ = Tt + Tu; + Tv = FNMS(KP500000000, Tu, Tt); + T1c = rio[WS(vs, 1) + WS(rs, 1)]; + T18 = rio[WS(vs, 1) + WS(rs, 2)]; + T19 = rio[WS(vs, 1) + WS(rs, 5)]; + } + } + } + { + E T1v, T1R, T1n, T1w, T1o, T1p; + { + E T1l, T1i, T1d, T1h, T1a, T1m, T1j; + T1l = iio[WS(vs, 1) + WS(rs, 2)]; + T1i = T1b - T1c; + T1d = T1b + T1c; + T1h = T18 - T19; + T1a = T18 + T19; + T1m = iio[WS(vs, 1) + WS(rs, 5)]; + T1v = iio[WS(vs, 1)]; + T1j = T1h + T1i; + T1A = T1i - T1h; + T1e = T1a + T1d; + T20 = T1d - T1a; + T1R = T1l + T1m; + T1n = T1l - T1m; + T1k = FNMS(KP500000000, T1j, T1g); + T1K = T1g + T1j; + T1Q = FNMS(KP500000000, T1e, T17); + T1w = iio[WS(vs, 1) + WS(rs, 3)]; + T1o = iio[WS(vs, 1) + WS(rs, 4)]; + T1p = iio[WS(vs, 1) + WS(rs, 1)]; + } + { + E T2z, T2V, T2r, T2A, T2s, T2t; + { + E T2p, T1x, T1S, T1q, T2q, T1y; + T2p = iio[WS(vs, 2) + WS(rs, 2)]; + T1X = T1v + T1w; + T1x = T1v - T1w; + T1S = T1o + T1p; + T1q = T1o - T1p; + T2q = iio[WS(vs, 2) + WS(rs, 5)]; + T2z = iio[WS(vs, 2)]; + T1Y = T1R + T1S; + T1T = T1R - T1S; + T1y = T1n + T1q; + T1r = T1n - T1q; + T2V = T2p + T2q; + T2r = T2p - T2q; + T1Z = FNMS(KP500000000, T1Y, T1X); + T1N = T1x + T1y; + T1z = FNMS(KP500000000, T1y, T1x); + T2A = iio[WS(vs, 2) + WS(rs, 3)]; + T2s = iio[WS(vs, 2) + WS(rs, 4)]; + T2t = iio[WS(vs, 2) + WS(rs, 1)]; + } + { + E T29, T2B, T2W, T2u, T2a, T2C; + T29 = rio[WS(vs, 2)]; + T31 = T2z + T2A; + T2B = T2z - T2A; + T2W = T2s + T2t; + T2u = T2s - T2t; + T2a = rio[WS(vs, 2) + WS(rs, 3)]; + T2f = rio[WS(vs, 2) + WS(rs, 4)]; + T32 = T2V + T2W; + T2X = T2V - T2W; + T2C = T2r + T2u; + T2v = T2r - T2u; + T2k = T29 - T2a; + T2b = T29 + T2a; + T33 = FNMS(KP500000000, T32, T31); + T2R = T2B + T2C; + T2D = FNMS(KP500000000, T2C, T2B); + T2g = rio[WS(vs, 2) + WS(rs, 1)]; + T2c = rio[WS(vs, 2) + WS(rs, 2)]; + T2d = rio[WS(vs, 2) + WS(rs, 5)]; + } + } + } + } + { + E T4n, T4s, T4o, T4k, T4l; + { + E T3j, T3o, T3k, T3g, T3h; + { + E T3d, T2m, T2h, T2l, T2e, T3e, T2n; + T3d = rio[WS(vs, 3)]; + T2m = T2f - T2g; + T2h = T2f + T2g; + T2l = T2c - T2d; + T2e = T2c + T2d; + T3e = rio[WS(vs, 3) + WS(rs, 3)]; + T3j = rio[WS(vs, 3) + WS(rs, 4)]; + T2n = T2l + T2m; + T2E = T2m - T2l; + T2i = T2e + T2h; + T34 = T2h - T2e; + T3o = T3d - T3e; + T3f = T3d + T3e; + T2o = FNMS(KP500000000, T2n, T2k); + T2O = T2k + T2n; + T2U = FNMS(KP500000000, T2i, T2b); + T3k = rio[WS(vs, 3) + WS(rs, 1)]; + T3g = rio[WS(vs, 3) + WS(rs, 2)]; + T3h = rio[WS(vs, 3) + WS(rs, 5)]; + } + { + E T3D, T3Z, T3v, T3E, T3w, T3x; + { + E T3t, T3q, T3l, T3p, T3i, T3u, T3r; + T3t = iio[WS(vs, 3) + WS(rs, 2)]; + T3q = T3j - T3k; + T3l = T3j + T3k; + T3p = T3g - T3h; + T3i = T3g + T3h; + T3u = iio[WS(vs, 3) + WS(rs, 5)]; + T3D = iio[WS(vs, 3)]; + T3r = T3p + T3q; + T3I = T3q - T3p; + T3m = T3i + T3l; + T48 = T3l - T3i; + T3Z = T3t + T3u; + T3v = T3t - T3u; + T3s = FNMS(KP500000000, T3r, T3o); + T3S = T3o + T3r; + T3Y = FNMS(KP500000000, T3m, T3f); + T3E = iio[WS(vs, 3) + WS(rs, 3)]; + T3w = iio[WS(vs, 3) + WS(rs, 4)]; + T3x = iio[WS(vs, 3) + WS(rs, 1)]; + } + { + E T4h, T3F, T40, T3y, T4i, T3G; + T4h = rio[WS(vs, 4)]; + T45 = T3D + T3E; + T3F = T3D - T3E; + T40 = T3w + T3x; + T3y = T3w - T3x; + T4i = rio[WS(vs, 4) + WS(rs, 3)]; + T4n = rio[WS(vs, 4) + WS(rs, 4)]; + T46 = T3Z + T40; + T41 = T3Z - T40; + T3G = T3v + T3y; + T3z = T3v - T3y; + T4s = T4h - T4i; + T4j = T4h + T4i; + T47 = FNMS(KP500000000, T46, T45); + T3V = T3F + T3G; + T3H = FNMS(KP500000000, T3G, T3F); + T4o = rio[WS(vs, 4) + WS(rs, 1)]; + T4k = rio[WS(vs, 4) + WS(rs, 2)]; + T4l = rio[WS(vs, 4) + WS(rs, 5)]; + } + } + } + { + E T4H, T53, T4z, T4I, T4A, T4B; + { + E T4x, T4u, T4p, T4t, T4m, T4y, T4v; + T4x = iio[WS(vs, 4) + WS(rs, 2)]; + T4u = T4n - T4o; + T4p = T4n + T4o; + T4t = T4k - T4l; + T4m = T4k + T4l; + T4y = iio[WS(vs, 4) + WS(rs, 5)]; + T4H = iio[WS(vs, 4)]; + T4v = T4t + T4u; + T4M = T4u - T4t; + T4q = T4m + T4p; + T5c = T4p - T4m; + T53 = T4x + T4y; + T4z = T4x - T4y; + T4w = FNMS(KP500000000, T4v, T4s); + T4W = T4s + T4v; + T52 = FNMS(KP500000000, T4q, T4j); + T4I = iio[WS(vs, 4) + WS(rs, 3)]; + T4A = iio[WS(vs, 4) + WS(rs, 4)]; + T4B = iio[WS(vs, 4) + WS(rs, 1)]; + } + { + E T5L, T67, T5D, T5M, T5E, T5F; + { + E T5B, T4J, T54, T4C, T5C, T4K; + T5B = iio[WS(vs, 5) + WS(rs, 2)]; + T59 = T4H + T4I; + T4J = T4H - T4I; + T54 = T4A + T4B; + T4C = T4A - T4B; + T5C = iio[WS(vs, 5) + WS(rs, 5)]; + T5L = iio[WS(vs, 5)]; + T5a = T53 + T54; + T55 = T53 - T54; + T4K = T4z + T4C; + T4D = T4z - T4C; + T67 = T5B + T5C; + T5D = T5B - T5C; + T5b = FNMS(KP500000000, T5a, T59); + T4Z = T4J + T4K; + T4L = FNMS(KP500000000, T4K, T4J); + T5M = iio[WS(vs, 5) + WS(rs, 3)]; + T5E = iio[WS(vs, 5) + WS(rs, 4)]; + T5F = iio[WS(vs, 5) + WS(rs, 1)]; + } + { + E T5l, T5N, T68, T5G, T5m, T5O; + T5l = rio[WS(vs, 5)]; + T6d = T5L + T5M; + T5N = T5L - T5M; + T68 = T5E + T5F; + T5G = T5E - T5F; + T5m = rio[WS(vs, 5) + WS(rs, 3)]; + T5r = rio[WS(vs, 5) + WS(rs, 4)]; + T6e = T67 + T68; + T69 = T67 - T68; + T5O = T5D + T5G; + T5H = T5D - T5G; + T5w = T5l - T5m; + T5n = T5l + T5m; + T6f = FNMS(KP500000000, T6e, T6d); + T63 = T5N + T5O; + T5P = FNMS(KP500000000, T5O, T5N); + T5s = rio[WS(vs, 5) + WS(rs, 1)]; + T5o = rio[WS(vs, 5) + WS(rs, 2)]; + T5p = rio[WS(vs, 5) + WS(rs, 5)]; + } + } + } + } + } + { + E T6a, T6h, T5I, T5R, T65, T6c; + { + E T5Q, T5u, T6g, T5A, T60, T66; + { + E T5y, T5t, T5x, T5q, T5z; + rio[0] = T3 + Ta; + T5y = T5r - T5s; + T5t = T5r + T5s; + T5x = T5o - T5p; + T5q = T5o + T5p; + iio[0] = TT + TU; + rio[WS(rs, 1)] = T17 + T1e; + T5z = T5x + T5y; + T5Q = T5y - T5x; + T5u = T5q + T5t; + T6g = T5t - T5q; + T5A = FNMS(KP500000000, T5z, T5w); + T60 = T5w + T5z; + iio[WS(rs, 1)] = T1X + T1Y; + T66 = FNMS(KP500000000, T5u, T5n); + rio[WS(rs, 2)] = T2b + T2i; + } + iio[WS(rs, 2)] = T31 + T32; + iio[WS(rs, 4)] = T59 + T5a; + rio[WS(rs, 4)] = T4j + T4q; + rio[WS(rs, 3)] = T3f + T3m; + iio[WS(rs, 3)] = T45 + T46; + { + E TA, TD, TQ, T10, T13, TX, TZ, T12; + rio[WS(rs, 5)] = T5n + T5u; + iio[WS(rs, 5)] = T6d + T6e; + { + E To, Tx, Tb, Tq; + TA = FNMS(KP866025403, Tn, Tg); + To = FMA(KP866025403, Tn, Tg); + Tx = FMA(KP866025403, Tw, Tv); + TD = FNMS(KP866025403, Tw, Tv); + Tb = W[0]; + Tq = W[1]; + { + E TI, TK, TH, Ty, Tp, TF; + Ty = Tb * Tx; + Tp = Tb * To; + TF = W[4]; + TI = W[5]; + iio[WS(vs, 1)] = FNMS(Tq, To, Ty); + rio[WS(vs, 1)] = FMA(Tq, Tx, Tp); + TK = TF * TJ; + TH = TF * TG; + TQ = FNMS(KP866025403, TP, TM); + T10 = FMA(KP866025403, TP, TM); + T13 = FMA(KP866025403, TW, TV); + TX = FNMS(KP866025403, TW, TV); + iio[WS(vs, 3)] = FNMS(TI, TG, TK); + rio[WS(vs, 3)] = FMA(TI, TJ, TH); + TZ = W[6]; + T12 = W[7]; + } + } + { + E TC, TE, TB, TL, TS; + { + E T62, T64, T61, T14, T11, T5Z; + T14 = TZ * T13; + T11 = TZ * T10; + T5Z = W[4]; + T62 = W[5]; + iio[WS(vs, 4)] = FNMS(T12, T10, T14); + rio[WS(vs, 4)] = FMA(T12, T13, T11); + T64 = T5Z * T63; + T61 = T5Z * T60; + { + E T6k, T6n, T6j, T6m, T6o, T6l, Tz; + T6a = FNMS(KP866025403, T69, T66); + T6k = FMA(KP866025403, T69, T66); + T6n = FMA(KP866025403, T6g, T6f); + T6h = FNMS(KP866025403, T6g, T6f); + iio[WS(vs, 3) + WS(rs, 5)] = FNMS(T62, T60, T64); + rio[WS(vs, 3) + WS(rs, 5)] = FMA(T62, T63, T61); + T6j = W[6]; + T6m = W[7]; + T6o = T6j * T6n; + T6l = T6j * T6k; + Tz = W[8]; + TC = W[9]; + iio[WS(vs, 4) + WS(rs, 5)] = FNMS(T6m, T6k, T6o); + rio[WS(vs, 4) + WS(rs, 5)] = FMA(T6m, T6n, T6l); + TE = Tz * TD; + TB = Tz * TA; + } + } + iio[WS(vs, 5)] = FNMS(TC, TA, TE); + rio[WS(vs, 5)] = FMA(TC, TD, TB); + TL = W[2]; + TS = W[3]; + { + E T5U, T5X, T5W, T5Y, T5V, TY, TR, T5T; + T5I = FMA(KP866025403, T5H, T5A); + T5U = FNMS(KP866025403, T5H, T5A); + T5X = FNMS(KP866025403, T5Q, T5P); + T5R = FMA(KP866025403, T5Q, T5P); + TY = TL * TX; + TR = TL * TQ; + T5T = W[8]; + T5W = W[9]; + iio[WS(vs, 2)] = FNMS(TS, TQ, TY); + rio[WS(vs, 2)] = FMA(TS, TX, TR); + T5Y = T5T * T5X; + T5V = T5T * T5U; + iio[WS(vs, 5) + WS(rs, 5)] = FNMS(T5W, T5U, T5Y); + rio[WS(vs, 5) + WS(rs, 5)] = FMA(T5W, T5X, T5V); + T65 = W[2]; + T6c = W[3]; + } + } + } + } + { + E T5g, T5j, T5f, T5i; + { + E T1E, T1H, T3M, T3P, T56, T5d, T58, T5e, T57; + { + E T1s, T1B, T1f, T1u; + { + E T5K, T5S, T5J, T6i, T6b, T5v; + T6i = T65 * T6h; + T6b = T65 * T6a; + T5v = W[0]; + T5K = W[1]; + iio[WS(vs, 2) + WS(rs, 5)] = FNMS(T6c, T6a, T6i); + rio[WS(vs, 2) + WS(rs, 5)] = FMA(T6c, T6h, T6b); + T5S = T5v * T5R; + T5J = T5v * T5I; + T1E = FNMS(KP866025403, T1r, T1k); + T1s = FMA(KP866025403, T1r, T1k); + T1B = FMA(KP866025403, T1A, T1z); + T1H = FNMS(KP866025403, T1A, T1z); + iio[WS(vs, 1) + WS(rs, 5)] = FNMS(T5K, T5I, T5S); + rio[WS(vs, 1) + WS(rs, 5)] = FMA(T5K, T5R, T5J); + T1f = W[0]; + T1u = W[1]; + } + { + E T3U, T3W, T3T, T1C, T1t, T3R; + T1C = T1f * T1B; + T1t = T1f * T1s; + T3R = W[4]; + T3U = W[5]; + iio[WS(vs, 1) + WS(rs, 1)] = FNMS(T1u, T1s, T1C); + rio[WS(vs, 1) + WS(rs, 1)] = FMA(T1u, T1B, T1t); + T3W = T3R * T3V; + T3T = T3R * T3S; + { + E T3A, T3J, T3n, T3C, T3K, T3B, T51; + T3M = FNMS(KP866025403, T3z, T3s); + T3A = FMA(KP866025403, T3z, T3s); + T3J = FMA(KP866025403, T3I, T3H); + T3P = FNMS(KP866025403, T3I, T3H); + iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T3U, T3S, T3W); + rio[WS(vs, 3) + WS(rs, 3)] = FMA(T3U, T3V, T3T); + T3n = W[0]; + T3C = W[1]; + T5g = FMA(KP866025403, T55, T52); + T56 = FNMS(KP866025403, T55, T52); + T5d = FNMS(KP866025403, T5c, T5b); + T5j = FMA(KP866025403, T5c, T5b); + T3K = T3n * T3J; + T3B = T3n * T3A; + T51 = W[2]; + T58 = W[3]; + iio[WS(vs, 1) + WS(rs, 3)] = FNMS(T3C, T3A, T3K); + rio[WS(vs, 1) + WS(rs, 3)] = FMA(T3C, T3J, T3B); + T5e = T51 * T5d; + T57 = T51 * T56; + } + } + } + { + E T38, T3b, T3O, T3Q, T3N, T37, T3a; + { + E T2Y, T35, T2T, T30, T36, T2Z, T3L; + T38 = FMA(KP866025403, T2X, T2U); + T2Y = FNMS(KP866025403, T2X, T2U); + T35 = FNMS(KP866025403, T34, T33); + T3b = FMA(KP866025403, T34, T33); + iio[WS(vs, 2) + WS(rs, 4)] = FNMS(T58, T56, T5e); + rio[WS(vs, 2) + WS(rs, 4)] = FMA(T58, T5d, T57); + T2T = W[2]; + T30 = W[3]; + T36 = T2T * T35; + T2Z = T2T * T2Y; + T3L = W[8]; + T3O = W[9]; + iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T30, T2Y, T36); + rio[WS(vs, 2) + WS(rs, 2)] = FMA(T30, T35, T2Z); + T3Q = T3L * T3P; + T3N = T3L * T3M; + } + iio[WS(vs, 5) + WS(rs, 3)] = FNMS(T3O, T3M, T3Q); + rio[WS(vs, 5) + WS(rs, 3)] = FMA(T3O, T3P, T3N); + T37 = W[6]; + T3a = W[7]; + { + E T1G, T1I, T1F, T3c, T39, T1D; + T3c = T37 * T3b; + T39 = T37 * T38; + T1D = W[8]; + T1G = W[9]; + iio[WS(vs, 4) + WS(rs, 2)] = FNMS(T3a, T38, T3c); + rio[WS(vs, 4) + WS(rs, 2)] = FMA(T3a, T3b, T39); + T1I = T1D * T1H; + T1F = T1D * T1E; + iio[WS(vs, 5) + WS(rs, 1)] = FNMS(T1G, T1E, T1I); + rio[WS(vs, 5) + WS(rs, 1)] = FMA(T1G, T1H, T1F); + T5f = W[6]; + T5i = W[7]; + } + } + } + { + E T4Q, T4T, T2I, T2w, T2F, T2L, T2y, T2G, T2x, T4V, T4Y; + { + E T1M, T1O, T1L, T5k, T5h, T1J; + T5k = T5f * T5j; + T5h = T5f * T5g; + T1J = W[4]; + T1M = W[5]; + iio[WS(vs, 4) + WS(rs, 4)] = FNMS(T5i, T5g, T5k); + rio[WS(vs, 4) + WS(rs, 4)] = FMA(T5i, T5j, T5h); + T1O = T1J * T1N; + T1L = T1J * T1K; + iio[WS(vs, 3) + WS(rs, 1)] = FNMS(T1M, T1K, T1O); + rio[WS(vs, 3) + WS(rs, 1)] = FMA(T1M, T1N, T1L); + T4V = W[4]; + T4Y = W[5]; + } + { + E T4E, T4N, T4G, T4O, T4F, T50, T4X, T4r; + T4Q = FNMS(KP866025403, T4D, T4w); + T4E = FMA(KP866025403, T4D, T4w); + T4N = FMA(KP866025403, T4M, T4L); + T4T = FNMS(KP866025403, T4M, T4L); + T50 = T4V * T4Z; + T4X = T4V * T4W; + T4r = W[0]; + T4G = W[1]; + iio[WS(vs, 3) + WS(rs, 4)] = FNMS(T4Y, T4W, T50); + rio[WS(vs, 3) + WS(rs, 4)] = FMA(T4Y, T4Z, T4X); + T4O = T4r * T4N; + T4F = T4r * T4E; + { + E T2N, T2Q, T2S, T2P, T2j; + iio[WS(vs, 1) + WS(rs, 4)] = FNMS(T4G, T4E, T4O); + rio[WS(vs, 1) + WS(rs, 4)] = FMA(T4G, T4N, T4F); + T2N = W[4]; + T2Q = W[5]; + T2I = FNMS(KP866025403, T2v, T2o); + T2w = FMA(KP866025403, T2v, T2o); + T2F = FMA(KP866025403, T2E, T2D); + T2L = FNMS(KP866025403, T2E, T2D); + T2S = T2N * T2R; + T2P = T2N * T2O; + T2j = W[0]; + T2y = W[1]; + iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T2Q, T2O, T2S); + rio[WS(vs, 3) + WS(rs, 2)] = FMA(T2Q, T2R, T2P); + T2G = T2j * T2F; + T2x = T2j * T2w; + } + } + { + E T1U, T21, T2H, T2K; + { + E T24, T27, T23, T26; + T1U = FNMS(KP866025403, T1T, T1Q); + T24 = FMA(KP866025403, T1T, T1Q); + T27 = FMA(KP866025403, T20, T1Z); + T21 = FNMS(KP866025403, T20, T1Z); + iio[WS(vs, 1) + WS(rs, 2)] = FNMS(T2y, T2w, T2G); + rio[WS(vs, 1) + WS(rs, 2)] = FMA(T2y, T2F, T2x); + T23 = W[6]; + T26 = W[7]; + { + E T42, T49, T44, T4a, T43, T28, T25, T3X; + T4c = FMA(KP866025403, T41, T3Y); + T42 = FNMS(KP866025403, T41, T3Y); + T49 = FNMS(KP866025403, T48, T47); + T4f = FMA(KP866025403, T48, T47); + T28 = T23 * T27; + T25 = T23 * T24; + T3X = W[2]; + T44 = W[3]; + iio[WS(vs, 4) + WS(rs, 1)] = FNMS(T26, T24, T28); + rio[WS(vs, 4) + WS(rs, 1)] = FMA(T26, T27, T25); + T4a = T3X * T49; + T43 = T3X * T42; + iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T44, T42, T4a); + rio[WS(vs, 2) + WS(rs, 3)] = FMA(T44, T49, T43); + T2H = W[8]; + T2K = W[9]; + } + } + { + E T4S, T4U, T4R, T2M, T2J, T4P; + T2M = T2H * T2L; + T2J = T2H * T2I; + T4P = W[8]; + T4S = W[9]; + iio[WS(vs, 5) + WS(rs, 2)] = FNMS(T2K, T2I, T2M); + rio[WS(vs, 5) + WS(rs, 2)] = FMA(T2K, T2L, T2J); + T4U = T4P * T4T; + T4R = T4P * T4Q; + { + E T1P, T1W, T22, T1V, T4b; + iio[WS(vs, 5) + WS(rs, 4)] = FNMS(T4S, T4Q, T4U); + rio[WS(vs, 5) + WS(rs, 4)] = FMA(T4S, T4T, T4R); + T1P = W[2]; + T1W = W[3]; + T22 = T1P * T21; + T1V = T1P * T1U; + T4b = W[6]; + T4e = W[7]; + iio[WS(vs, 2) + WS(rs, 1)] = FNMS(T1W, T1U, T22); + rio[WS(vs, 2) + WS(rs, 1)] = FMA(T1W, T21, T1V); + T4g = T4b * T4f; + T4d = T4b * T4c; + } + } + } + } + } + } + } + iio[WS(vs, 4) + WS(rs, 3)] = FNMS(T4e, T4c, T4g); + rio[WS(vs, 4) + WS(rs, 3)] = FMA(T4e, T4f, T4d); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 6}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 6, "q1_6", twinstr, &GENUS, {144, 60, 132, 0}, 0, 0, 0 }; + +void X(codelet_q1_6) (planner *p) { + X(kdft_difsq_register) (p, q1_6, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twidsq.native -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 6 -name q1_6 -include q.h */ + +/* + * This function contains 276 FP additions, 168 FP multiplications, + * (or, 192 additions, 84 multiplications, 84 fused multiply/add), + * 85 stack variables, 2 constants, and 144 memory accesses + */ +#include "q.h" + +static void q1_6(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + (mb * 10); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs), MAKE_VOLATILE_STRIDE(0, vs)) { + E T3, Tc, Tt, TM, TX, T16, T1n, T1G, T2h, T2A, T1R, T20, T2L, T2U, T3b; + E T3u, T3F, T3O, T45, T4o, T4Z, T5i, T4z, T4I, Ta, TP, Tf, Tq, Tn, TN; + E Tu, TJ, T14, T1J, T19, T1k, T1h, T1H, T1o, T1D, T2b, T2B, T2i, T2x, T1Y; + E T2D, T23, T2e, T2S, T3x, T2X, T38, T35, T3v, T3c, T3r, T3M, T4r, T3R, T42; + E T3Z, T4p, T46, T4l, T4T, T5j, T50, T5f, T4G, T5l, T4L, T4W; + { + E T1, T2, T1l, T1m; + T1 = rio[0]; + T2 = rio[WS(rs, 3)]; + T3 = T1 + T2; + Tc = T1 - T2; + { + E Tr, Ts, TV, TW; + Tr = iio[0]; + Ts = iio[WS(rs, 3)]; + Tt = Tr - Ts; + TM = Tr + Ts; + TV = rio[WS(vs, 1)]; + TW = rio[WS(vs, 1) + WS(rs, 3)]; + TX = TV + TW; + T16 = TV - TW; + } + T1l = iio[WS(vs, 1)]; + T1m = iio[WS(vs, 1) + WS(rs, 3)]; + T1n = T1l - T1m; + T1G = T1l + T1m; + { + E T2f, T2g, T1P, T1Q; + T2f = iio[WS(vs, 2)]; + T2g = iio[WS(vs, 2) + WS(rs, 3)]; + T2h = T2f - T2g; + T2A = T2f + T2g; + T1P = rio[WS(vs, 2)]; + T1Q = rio[WS(vs, 2) + WS(rs, 3)]; + T1R = T1P + T1Q; + T20 = T1P - T1Q; + } + } + { + E T2J, T2K, T43, T44; + T2J = rio[WS(vs, 3)]; + T2K = rio[WS(vs, 3) + WS(rs, 3)]; + T2L = T2J + T2K; + T2U = T2J - T2K; + { + E T39, T3a, T3D, T3E; + T39 = iio[WS(vs, 3)]; + T3a = iio[WS(vs, 3) + WS(rs, 3)]; + T3b = T39 - T3a; + T3u = T39 + T3a; + T3D = rio[WS(vs, 4)]; + T3E = rio[WS(vs, 4) + WS(rs, 3)]; + T3F = T3D + T3E; + T3O = T3D - T3E; + } + T43 = iio[WS(vs, 4)]; + T44 = iio[WS(vs, 4) + WS(rs, 3)]; + T45 = T43 - T44; + T4o = T43 + T44; + { + E T4X, T4Y, T4x, T4y; + T4X = iio[WS(vs, 5)]; + T4Y = iio[WS(vs, 5) + WS(rs, 3)]; + T4Z = T4X - T4Y; + T5i = T4X + T4Y; + T4x = rio[WS(vs, 5)]; + T4y = rio[WS(vs, 5) + WS(rs, 3)]; + T4z = T4x + T4y; + T4I = T4x - T4y; + } + } + { + E T6, Td, T9, Te; + { + E T4, T5, T7, T8; + T4 = rio[WS(rs, 2)]; + T5 = rio[WS(rs, 5)]; + T6 = T4 + T5; + Td = T4 - T5; + T7 = rio[WS(rs, 4)]; + T8 = rio[WS(rs, 1)]; + T9 = T7 + T8; + Te = T7 - T8; + } + Ta = T6 + T9; + TP = KP866025403 * (T9 - T6); + Tf = Td + Te; + Tq = KP866025403 * (Te - Td); + } + { + E Tj, TH, Tm, TI; + { + E Th, Ti, Tk, Tl; + Th = iio[WS(rs, 2)]; + Ti = iio[WS(rs, 5)]; + Tj = Th - Ti; + TH = Th + Ti; + Tk = iio[WS(rs, 4)]; + Tl = iio[WS(rs, 1)]; + Tm = Tk - Tl; + TI = Tk + Tl; + } + Tn = KP866025403 * (Tj - Tm); + TN = TH + TI; + Tu = Tj + Tm; + TJ = KP866025403 * (TH - TI); + } + { + E T10, T17, T13, T18; + { + E TY, TZ, T11, T12; + TY = rio[WS(vs, 1) + WS(rs, 2)]; + TZ = rio[WS(vs, 1) + WS(rs, 5)]; + T10 = TY + TZ; + T17 = TY - TZ; + T11 = rio[WS(vs, 1) + WS(rs, 4)]; + T12 = rio[WS(vs, 1) + WS(rs, 1)]; + T13 = T11 + T12; + T18 = T11 - T12; + } + T14 = T10 + T13; + T1J = KP866025403 * (T13 - T10); + T19 = T17 + T18; + T1k = KP866025403 * (T18 - T17); + } + { + E T1d, T1B, T1g, T1C; + { + E T1b, T1c, T1e, T1f; + T1b = iio[WS(vs, 1) + WS(rs, 2)]; + T1c = iio[WS(vs, 1) + WS(rs, 5)]; + T1d = T1b - T1c; + T1B = T1b + T1c; + T1e = iio[WS(vs, 1) + WS(rs, 4)]; + T1f = iio[WS(vs, 1) + WS(rs, 1)]; + T1g = T1e - T1f; + T1C = T1e + T1f; + } + T1h = KP866025403 * (T1d - T1g); + T1H = T1B + T1C; + T1o = T1d + T1g; + T1D = KP866025403 * (T1B - T1C); + } + { + E T27, T2v, T2a, T2w; + { + E T25, T26, T28, T29; + T25 = iio[WS(vs, 2) + WS(rs, 2)]; + T26 = iio[WS(vs, 2) + WS(rs, 5)]; + T27 = T25 - T26; + T2v = T25 + T26; + T28 = iio[WS(vs, 2) + WS(rs, 4)]; + T29 = iio[WS(vs, 2) + WS(rs, 1)]; + T2a = T28 - T29; + T2w = T28 + T29; + } + T2b = KP866025403 * (T27 - T2a); + T2B = T2v + T2w; + T2i = T27 + T2a; + T2x = KP866025403 * (T2v - T2w); + } + { + E T1U, T21, T1X, T22; + { + E T1S, T1T, T1V, T1W; + T1S = rio[WS(vs, 2) + WS(rs, 2)]; + T1T = rio[WS(vs, 2) + WS(rs, 5)]; + T1U = T1S + T1T; + T21 = T1S - T1T; + T1V = rio[WS(vs, 2) + WS(rs, 4)]; + T1W = rio[WS(vs, 2) + WS(rs, 1)]; + T1X = T1V + T1W; + T22 = T1V - T1W; + } + T1Y = T1U + T1X; + T2D = KP866025403 * (T1X - T1U); + T23 = T21 + T22; + T2e = KP866025403 * (T22 - T21); + } + { + E T2O, T2V, T2R, T2W; + { + E T2M, T2N, T2P, T2Q; + T2M = rio[WS(vs, 3) + WS(rs, 2)]; + T2N = rio[WS(vs, 3) + WS(rs, 5)]; + T2O = T2M + T2N; + T2V = T2M - T2N; + T2P = rio[WS(vs, 3) + WS(rs, 4)]; + T2Q = rio[WS(vs, 3) + WS(rs, 1)]; + T2R = T2P + T2Q; + T2W = T2P - T2Q; + } + T2S = T2O + T2R; + T3x = KP866025403 * (T2R - T2O); + T2X = T2V + T2W; + T38 = KP866025403 * (T2W - T2V); + } + { + E T31, T3p, T34, T3q; + { + E T2Z, T30, T32, T33; + T2Z = iio[WS(vs, 3) + WS(rs, 2)]; + T30 = iio[WS(vs, 3) + WS(rs, 5)]; + T31 = T2Z - T30; + T3p = T2Z + T30; + T32 = iio[WS(vs, 3) + WS(rs, 4)]; + T33 = iio[WS(vs, 3) + WS(rs, 1)]; + T34 = T32 - T33; + T3q = T32 + T33; + } + T35 = KP866025403 * (T31 - T34); + T3v = T3p + T3q; + T3c = T31 + T34; + T3r = KP866025403 * (T3p - T3q); + } + { + E T3I, T3P, T3L, T3Q; + { + E T3G, T3H, T3J, T3K; + T3G = rio[WS(vs, 4) + WS(rs, 2)]; + T3H = rio[WS(vs, 4) + WS(rs, 5)]; + T3I = T3G + T3H; + T3P = T3G - T3H; + T3J = rio[WS(vs, 4) + WS(rs, 4)]; + T3K = rio[WS(vs, 4) + WS(rs, 1)]; + T3L = T3J + T3K; + T3Q = T3J - T3K; + } + T3M = T3I + T3L; + T4r = KP866025403 * (T3L - T3I); + T3R = T3P + T3Q; + T42 = KP866025403 * (T3Q - T3P); + } + { + E T3V, T4j, T3Y, T4k; + { + E T3T, T3U, T3W, T3X; + T3T = iio[WS(vs, 4) + WS(rs, 2)]; + T3U = iio[WS(vs, 4) + WS(rs, 5)]; + T3V = T3T - T3U; + T4j = T3T + T3U; + T3W = iio[WS(vs, 4) + WS(rs, 4)]; + T3X = iio[WS(vs, 4) + WS(rs, 1)]; + T3Y = T3W - T3X; + T4k = T3W + T3X; + } + T3Z = KP866025403 * (T3V - T3Y); + T4p = T4j + T4k; + T46 = T3V + T3Y; + T4l = KP866025403 * (T4j - T4k); + } + { + E T4P, T5d, T4S, T5e; + { + E T4N, T4O, T4Q, T4R; + T4N = iio[WS(vs, 5) + WS(rs, 2)]; + T4O = iio[WS(vs, 5) + WS(rs, 5)]; + T4P = T4N - T4O; + T5d = T4N + T4O; + T4Q = iio[WS(vs, 5) + WS(rs, 4)]; + T4R = iio[WS(vs, 5) + WS(rs, 1)]; + T4S = T4Q - T4R; + T5e = T4Q + T4R; + } + T4T = KP866025403 * (T4P - T4S); + T5j = T5d + T5e; + T50 = T4P + T4S; + T5f = KP866025403 * (T5d - T5e); + } + { + E T4C, T4J, T4F, T4K; + { + E T4A, T4B, T4D, T4E; + T4A = rio[WS(vs, 5) + WS(rs, 2)]; + T4B = rio[WS(vs, 5) + WS(rs, 5)]; + T4C = T4A + T4B; + T4J = T4A - T4B; + T4D = rio[WS(vs, 5) + WS(rs, 4)]; + T4E = rio[WS(vs, 5) + WS(rs, 1)]; + T4F = T4D + T4E; + T4K = T4D - T4E; + } + T4G = T4C + T4F; + T5l = KP866025403 * (T4F - T4C); + T4L = T4J + T4K; + T4W = KP866025403 * (T4K - T4J); + } + rio[0] = T3 + Ta; + iio[0] = TM + TN; + rio[WS(rs, 1)] = TX + T14; + iio[WS(rs, 1)] = T1G + T1H; + rio[WS(rs, 3)] = T2L + T2S; + rio[WS(rs, 2)] = T1R + T1Y; + iio[WS(rs, 2)] = T2A + T2B; + iio[WS(rs, 3)] = T3u + T3v; + iio[WS(rs, 4)] = T4o + T4p; + iio[WS(rs, 5)] = T5i + T5j; + rio[WS(rs, 5)] = T4z + T4G; + rio[WS(rs, 4)] = T3F + T3M; + { + E T1w, T1y, T1v, T1x; + T1w = T16 + T19; + T1y = T1n + T1o; + T1v = W[4]; + T1x = W[5]; + rio[WS(vs, 3) + WS(rs, 1)] = FMA(T1v, T1w, T1x * T1y); + iio[WS(vs, 3) + WS(rs, 1)] = FNMS(T1x, T1w, T1v * T1y); + } + { + E T58, T5a, T57, T59; + T58 = T4I + T4L; + T5a = T4Z + T50; + T57 = W[4]; + T59 = W[5]; + rio[WS(vs, 3) + WS(rs, 5)] = FMA(T57, T58, T59 * T5a); + iio[WS(vs, 3) + WS(rs, 5)] = FNMS(T59, T58, T57 * T5a); + } + { + E TC, TE, TB, TD; + TC = Tc + Tf; + TE = Tt + Tu; + TB = W[4]; + TD = W[5]; + rio[WS(vs, 3)] = FMA(TB, TC, TD * TE); + iio[WS(vs, 3)] = FNMS(TD, TC, TB * TE); + } + { + E T4e, T4g, T4d, T4f; + T4e = T3O + T3R; + T4g = T45 + T46; + T4d = W[4]; + T4f = W[5]; + rio[WS(vs, 3) + WS(rs, 4)] = FMA(T4d, T4e, T4f * T4g); + iio[WS(vs, 3) + WS(rs, 4)] = FNMS(T4f, T4e, T4d * T4g); + } + { + E T3k, T3m, T3j, T3l; + T3k = T2U + T2X; + T3m = T3b + T3c; + T3j = W[4]; + T3l = W[5]; + rio[WS(vs, 3) + WS(rs, 3)] = FMA(T3j, T3k, T3l * T3m); + iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T3l, T3k, T3j * T3m); + } + { + E T2q, T2s, T2p, T2r; + T2q = T20 + T23; + T2s = T2h + T2i; + T2p = W[4]; + T2r = W[5]; + rio[WS(vs, 3) + WS(rs, 2)] = FMA(T2p, T2q, T2r * T2s); + iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T2r, T2q, T2p * T2s); + } + { + E T5g, T5o, T5m, T5q, T5c, T5k; + T5c = FNMS(KP500000000, T4G, T4z); + T5g = T5c - T5f; + T5o = T5c + T5f; + T5k = FNMS(KP500000000, T5j, T5i); + T5m = T5k - T5l; + T5q = T5l + T5k; + { + E T5b, T5h, T5n, T5p; + T5b = W[2]; + T5h = W[3]; + rio[WS(vs, 2) + WS(rs, 5)] = FMA(T5b, T5g, T5h * T5m); + iio[WS(vs, 2) + WS(rs, 5)] = FNMS(T5h, T5g, T5b * T5m); + T5n = W[6]; + T5p = W[7]; + rio[WS(vs, 4) + WS(rs, 5)] = FMA(T5n, T5o, T5p * T5q); + iio[WS(vs, 4) + WS(rs, 5)] = FNMS(T5p, T5o, T5n * T5q); + } + } + { + E To, Ty, Tw, TA, Tg, Tv; + Tg = FNMS(KP500000000, Tf, Tc); + To = Tg + Tn; + Ty = Tg - Tn; + Tv = FNMS(KP500000000, Tu, Tt); + Tw = Tq + Tv; + TA = Tv - Tq; + { + E Tb, Tp, Tx, Tz; + Tb = W[0]; + Tp = W[1]; + rio[WS(vs, 1)] = FMA(Tb, To, Tp * Tw); + iio[WS(vs, 1)] = FNMS(Tp, To, Tb * Tw); + Tx = W[8]; + Tz = W[9]; + rio[WS(vs, 5)] = FMA(Tx, Ty, Tz * TA); + iio[WS(vs, 5)] = FNMS(Tz, Ty, Tx * TA); + } + } + { + E T36, T3g, T3e, T3i, T2Y, T3d; + T2Y = FNMS(KP500000000, T2X, T2U); + T36 = T2Y + T35; + T3g = T2Y - T35; + T3d = FNMS(KP500000000, T3c, T3b); + T3e = T38 + T3d; + T3i = T3d - T38; + { + E T2T, T37, T3f, T3h; + T2T = W[0]; + T37 = W[1]; + rio[WS(vs, 1) + WS(rs, 3)] = FMA(T2T, T36, T37 * T3e); + iio[WS(vs, 1) + WS(rs, 3)] = FNMS(T37, T36, T2T * T3e); + T3f = W[8]; + T3h = W[9]; + rio[WS(vs, 5) + WS(rs, 3)] = FMA(T3f, T3g, T3h * T3i); + iio[WS(vs, 5) + WS(rs, 3)] = FNMS(T3h, T3g, T3f * T3i); + } + } + { + E T2y, T2G, T2E, T2I, T2u, T2C; + T2u = FNMS(KP500000000, T1Y, T1R); + T2y = T2u - T2x; + T2G = T2u + T2x; + T2C = FNMS(KP500000000, T2B, T2A); + T2E = T2C - T2D; + T2I = T2D + T2C; + { + E T2t, T2z, T2F, T2H; + T2t = W[2]; + T2z = W[3]; + rio[WS(vs, 2) + WS(rs, 2)] = FMA(T2t, T2y, T2z * T2E); + iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T2z, T2y, T2t * T2E); + T2F = W[6]; + T2H = W[7]; + rio[WS(vs, 4) + WS(rs, 2)] = FMA(T2F, T2G, T2H * T2I); + iio[WS(vs, 4) + WS(rs, 2)] = FNMS(T2H, T2G, T2F * T2I); + } + } + { + E T3s, T3A, T3y, T3C, T3o, T3w; + T3o = FNMS(KP500000000, T2S, T2L); + T3s = T3o - T3r; + T3A = T3o + T3r; + T3w = FNMS(KP500000000, T3v, T3u); + T3y = T3w - T3x; + T3C = T3x + T3w; + { + E T3n, T3t, T3z, T3B; + T3n = W[2]; + T3t = W[3]; + rio[WS(vs, 2) + WS(rs, 3)] = FMA(T3n, T3s, T3t * T3y); + iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T3t, T3s, T3n * T3y); + T3z = W[6]; + T3B = W[7]; + rio[WS(vs, 4) + WS(rs, 3)] = FMA(T3z, T3A, T3B * T3C); + iio[WS(vs, 4) + WS(rs, 3)] = FNMS(T3B, T3A, T3z * T3C); + } + } + { + E T1E, T1M, T1K, T1O, T1A, T1I; + T1A = FNMS(KP500000000, T14, TX); + T1E = T1A - T1D; + T1M = T1A + T1D; + T1I = FNMS(KP500000000, T1H, T1G); + T1K = T1I - T1J; + T1O = T1J + T1I; + { + E T1z, T1F, T1L, T1N; + T1z = W[2]; + T1F = W[3]; + rio[WS(vs, 2) + WS(rs, 1)] = FMA(T1z, T1E, T1F * T1K); + iio[WS(vs, 2) + WS(rs, 1)] = FNMS(T1F, T1E, T1z * T1K); + T1L = W[6]; + T1N = W[7]; + rio[WS(vs, 4) + WS(rs, 1)] = FMA(T1L, T1M, T1N * T1O); + iio[WS(vs, 4) + WS(rs, 1)] = FNMS(T1N, T1M, T1L * T1O); + } + } + { + E T4m, T4u, T4s, T4w, T4i, T4q; + T4i = FNMS(KP500000000, T3M, T3F); + T4m = T4i - T4l; + T4u = T4i + T4l; + T4q = FNMS(KP500000000, T4p, T4o); + T4s = T4q - T4r; + T4w = T4r + T4q; + { + E T4h, T4n, T4t, T4v; + T4h = W[2]; + T4n = W[3]; + rio[WS(vs, 2) + WS(rs, 4)] = FMA(T4h, T4m, T4n * T4s); + iio[WS(vs, 2) + WS(rs, 4)] = FNMS(T4n, T4m, T4h * T4s); + T4t = W[6]; + T4v = W[7]; + rio[WS(vs, 4) + WS(rs, 4)] = FMA(T4t, T4u, T4v * T4w); + iio[WS(vs, 4) + WS(rs, 4)] = FNMS(T4v, T4u, T4t * T4w); + } + } + { + E TK, TS, TQ, TU, TG, TO; + TG = FNMS(KP500000000, Ta, T3); + TK = TG - TJ; + TS = TG + TJ; + TO = FNMS(KP500000000, TN, TM); + TQ = TO - TP; + TU = TP + TO; + { + E TF, TL, TR, TT; + TF = W[2]; + TL = W[3]; + rio[WS(vs, 2)] = FMA(TF, TK, TL * TQ); + iio[WS(vs, 2)] = FNMS(TL, TK, TF * TQ); + TR = W[6]; + TT = W[7]; + rio[WS(vs, 4)] = FMA(TR, TS, TT * TU); + iio[WS(vs, 4)] = FNMS(TT, TS, TR * TU); + } + } + { + E T2c, T2m, T2k, T2o, T24, T2j; + T24 = FNMS(KP500000000, T23, T20); + T2c = T24 + T2b; + T2m = T24 - T2b; + T2j = FNMS(KP500000000, T2i, T2h); + T2k = T2e + T2j; + T2o = T2j - T2e; + { + E T1Z, T2d, T2l, T2n; + T1Z = W[0]; + T2d = W[1]; + rio[WS(vs, 1) + WS(rs, 2)] = FMA(T1Z, T2c, T2d * T2k); + iio[WS(vs, 1) + WS(rs, 2)] = FNMS(T2d, T2c, T1Z * T2k); + T2l = W[8]; + T2n = W[9]; + rio[WS(vs, 5) + WS(rs, 2)] = FMA(T2l, T2m, T2n * T2o); + iio[WS(vs, 5) + WS(rs, 2)] = FNMS(T2n, T2m, T2l * T2o); + } + } + { + E T40, T4a, T48, T4c, T3S, T47; + T3S = FNMS(KP500000000, T3R, T3O); + T40 = T3S + T3Z; + T4a = T3S - T3Z; + T47 = FNMS(KP500000000, T46, T45); + T48 = T42 + T47; + T4c = T47 - T42; + { + E T3N, T41, T49, T4b; + T3N = W[0]; + T41 = W[1]; + rio[WS(vs, 1) + WS(rs, 4)] = FMA(T3N, T40, T41 * T48); + iio[WS(vs, 1) + WS(rs, 4)] = FNMS(T41, T40, T3N * T48); + T49 = W[8]; + T4b = W[9]; + rio[WS(vs, 5) + WS(rs, 4)] = FMA(T49, T4a, T4b * T4c); + iio[WS(vs, 5) + WS(rs, 4)] = FNMS(T4b, T4a, T49 * T4c); + } + } + { + E T1i, T1s, T1q, T1u, T1a, T1p; + T1a = FNMS(KP500000000, T19, T16); + T1i = T1a + T1h; + T1s = T1a - T1h; + T1p = FNMS(KP500000000, T1o, T1n); + T1q = T1k + T1p; + T1u = T1p - T1k; + { + E T15, T1j, T1r, T1t; + T15 = W[0]; + T1j = W[1]; + rio[WS(vs, 1) + WS(rs, 1)] = FMA(T15, T1i, T1j * T1q); + iio[WS(vs, 1) + WS(rs, 1)] = FNMS(T1j, T1i, T15 * T1q); + T1r = W[8]; + T1t = W[9]; + rio[WS(vs, 5) + WS(rs, 1)] = FMA(T1r, T1s, T1t * T1u); + iio[WS(vs, 5) + WS(rs, 1)] = FNMS(T1t, T1s, T1r * T1u); + } + } + { + E T4U, T54, T52, T56, T4M, T51; + T4M = FNMS(KP500000000, T4L, T4I); + T4U = T4M + T4T; + T54 = T4M - T4T; + T51 = FNMS(KP500000000, T50, T4Z); + T52 = T4W + T51; + T56 = T51 - T4W; + { + E T4H, T4V, T53, T55; + T4H = W[0]; + T4V = W[1]; + rio[WS(vs, 1) + WS(rs, 5)] = FMA(T4H, T4U, T4V * T52); + iio[WS(vs, 1) + WS(rs, 5)] = FNMS(T4V, T4U, T4H * T52); + T53 = W[8]; + T55 = W[9]; + rio[WS(vs, 5) + WS(rs, 5)] = FMA(T53, T54, T55 * T56); + iio[WS(vs, 5) + WS(rs, 5)] = FNMS(T55, T54, T53 * T56); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 6}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 6, "q1_6", twinstr, &GENUS, {192, 84, 84, 0}, 0, 0, 0 }; + +void X(codelet_q1_6) (planner *p) { + X(kdft_difsq_register) (p, q1_6, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/q1_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/q1_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2396 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:59 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twidsq.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 8 -name q1_8 -include q.h */ + +/* + * This function contains 528 FP additions, 288 FP multiplications, + * (or, 352 additions, 112 multiplications, 176 fused multiply/add), + * 190 stack variables, 1 constants, and 256 memory accesses + */ +#include "q.h" + +static void q1_8(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 14); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 14, MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(0, vs)) { + E T9C, T9N, T9l, T9E, T9D, T9O; + { + E TV, Tk, T1d, T7, T18, T1t, TQ, TD, T5t, T4S, T5L, T4F, T5G, T61, T5o; + E T5b, T6Z, T6o, T7h, T6b, T7c, T7x, T6U, T6H, Tbx, TaW, TbP, TaJ, TbK, Tc5; + E Tbs, Tbf, T2r, T1Q, T2J, T1D, T2E, T2Z, T2m, T29, T3X, T3m, T4f, T39, T4a; + E T4v, T3S, T3F, T8v, T7U, T8N, T7H, T8I, T93, T8q, T8d, Ta1, T9q, Taj, T9d; + E Tae, Taz, T9W, T9J, Te, T19, T1u, T1g, Tv, TR, TG, TW, T5H, T4M, T5O; + E T62, T5p, T53, T5u, T5e, T6i, T7d, T7y, T7k, T6z, T6V, T6K, T70, TbL, TaQ; + E TbS, Tc6, Tbt, Tb7, Tby, Tbi, T1K, T2F, T30, T2M, T21, T2n, T2c, T2s, T4b; + E T3g, T4i, T4w, T3T, T3x, T3Y, T3I, T7O, T8J, T94, T8Q, T85, T8r, T8g, T8w; + E Tak, T9r, T9K, T9A, Taf, T9k, Tal, T9u; + { + E T9a, T9F, T99, Tac, T9p, T9b, T9G, T9H; + { + E TaG, Tbb, TaF, TbI, TaV, TaH, Tbc, Tbd; + { + E T4C, T57, T4B, T5E, T4R, T4D, T58, T59; + { + E T4, Tz, T3, T16, Tj, T5, TA, TB; + { + E T1, T2, Th, Ti; + T1 = rio[0]; + T2 = rio[WS(rs, 4)]; + Th = iio[0]; + Ti = iio[WS(rs, 4)]; + T4 = rio[WS(rs, 2)]; + Tz = T1 - T2; + T3 = T1 + T2; + T16 = Th + Ti; + Tj = Th - Ti; + T5 = rio[WS(rs, 6)]; + TA = iio[WS(rs, 2)]; + TB = iio[WS(rs, 6)]; + } + { + E T4z, T4A, T4P, T4Q; + T4z = rio[WS(vs, 3)]; + { + E Tg, T6, T17, TC; + Tg = T4 - T5; + T6 = T4 + T5; + T17 = TA + TB; + TC = TA - TB; + TV = Tj - Tg; + Tk = Tg + Tj; + T1d = T3 - T6; + T7 = T3 + T6; + T18 = T16 - T17; + T1t = T16 + T17; + TQ = Tz + TC; + TD = Tz - TC; + T4A = rio[WS(vs, 3) + WS(rs, 4)]; + } + T4P = iio[WS(vs, 3)]; + T4Q = iio[WS(vs, 3) + WS(rs, 4)]; + T4C = rio[WS(vs, 3) + WS(rs, 2)]; + T57 = T4z - T4A; + T4B = T4z + T4A; + T5E = T4P + T4Q; + T4R = T4P - T4Q; + T4D = rio[WS(vs, 3) + WS(rs, 6)]; + T58 = iio[WS(vs, 3) + WS(rs, 2)]; + T59 = iio[WS(vs, 3) + WS(rs, 6)]; + } + } + { + E T68, T6D, T67, T7a, T6n, T69, T6E, T6F; + { + E T65, T66, T6l, T6m; + T65 = rio[WS(vs, 4)]; + { + E T4O, T4E, T5F, T5a; + T4O = T4C - T4D; + T4E = T4C + T4D; + T5F = T58 + T59; + T5a = T58 - T59; + T5t = T4R - T4O; + T4S = T4O + T4R; + T5L = T4B - T4E; + T4F = T4B + T4E; + T5G = T5E - T5F; + T61 = T5E + T5F; + T5o = T57 + T5a; + T5b = T57 - T5a; + T66 = rio[WS(vs, 4) + WS(rs, 4)]; + } + T6l = iio[WS(vs, 4)]; + T6m = iio[WS(vs, 4) + WS(rs, 4)]; + T68 = rio[WS(vs, 4) + WS(rs, 2)]; + T6D = T65 - T66; + T67 = T65 + T66; + T7a = T6l + T6m; + T6n = T6l - T6m; + T69 = rio[WS(vs, 4) + WS(rs, 6)]; + T6E = iio[WS(vs, 4) + WS(rs, 2)]; + T6F = iio[WS(vs, 4) + WS(rs, 6)]; + } + { + E TaD, TaE, TaT, TaU; + TaD = rio[WS(vs, 7)]; + { + E T6k, T6a, T7b, T6G; + T6k = T68 - T69; + T6a = T68 + T69; + T7b = T6E + T6F; + T6G = T6E - T6F; + T6Z = T6n - T6k; + T6o = T6k + T6n; + T7h = T67 - T6a; + T6b = T67 + T6a; + T7c = T7a - T7b; + T7x = T7a + T7b; + T6U = T6D + T6G; + T6H = T6D - T6G; + TaE = rio[WS(vs, 7) + WS(rs, 4)]; + } + TaT = iio[WS(vs, 7)]; + TaU = iio[WS(vs, 7) + WS(rs, 4)]; + TaG = rio[WS(vs, 7) + WS(rs, 2)]; + Tbb = TaD - TaE; + TaF = TaD + TaE; + TbI = TaT + TaU; + TaV = TaT - TaU; + TaH = rio[WS(vs, 7) + WS(rs, 6)]; + Tbc = iio[WS(vs, 7) + WS(rs, 2)]; + Tbd = iio[WS(vs, 7) + WS(rs, 6)]; + } + } + } + { + E T36, T3B, T35, T48, T3l, T37, T3C, T3D; + { + E T1A, T25, T1z, T2C, T1P, T1B, T26, T27; + { + E T1x, T1y, T1N, T1O; + T1x = rio[WS(vs, 1)]; + { + E TaS, TaI, TbJ, Tbe; + TaS = TaG - TaH; + TaI = TaG + TaH; + TbJ = Tbc + Tbd; + Tbe = Tbc - Tbd; + Tbx = TaV - TaS; + TaW = TaS + TaV; + TbP = TaF - TaI; + TaJ = TaF + TaI; + TbK = TbI - TbJ; + Tc5 = TbI + TbJ; + Tbs = Tbb + Tbe; + Tbf = Tbb - Tbe; + T1y = rio[WS(vs, 1) + WS(rs, 4)]; + } + T1N = iio[WS(vs, 1)]; + T1O = iio[WS(vs, 1) + WS(rs, 4)]; + T1A = rio[WS(vs, 1) + WS(rs, 2)]; + T25 = T1x - T1y; + T1z = T1x + T1y; + T2C = T1N + T1O; + T1P = T1N - T1O; + T1B = rio[WS(vs, 1) + WS(rs, 6)]; + T26 = iio[WS(vs, 1) + WS(rs, 2)]; + T27 = iio[WS(vs, 1) + WS(rs, 6)]; + } + { + E T33, T34, T3j, T3k; + T33 = rio[WS(vs, 2)]; + { + E T1M, T1C, T2D, T28; + T1M = T1A - T1B; + T1C = T1A + T1B; + T2D = T26 + T27; + T28 = T26 - T27; + T2r = T1P - T1M; + T1Q = T1M + T1P; + T2J = T1z - T1C; + T1D = T1z + T1C; + T2E = T2C - T2D; + T2Z = T2C + T2D; + T2m = T25 + T28; + T29 = T25 - T28; + T34 = rio[WS(vs, 2) + WS(rs, 4)]; + } + T3j = iio[WS(vs, 2)]; + T3k = iio[WS(vs, 2) + WS(rs, 4)]; + T36 = rio[WS(vs, 2) + WS(rs, 2)]; + T3B = T33 - T34; + T35 = T33 + T34; + T48 = T3j + T3k; + T3l = T3j - T3k; + T37 = rio[WS(vs, 2) + WS(rs, 6)]; + T3C = iio[WS(vs, 2) + WS(rs, 2)]; + T3D = iio[WS(vs, 2) + WS(rs, 6)]; + } + } + { + E T7E, T89, T7D, T8G, T7T, T7F, T8a, T8b; + { + E T7B, T7C, T7R, T7S; + T7B = rio[WS(vs, 5)]; + { + E T3i, T38, T49, T3E; + T3i = T36 - T37; + T38 = T36 + T37; + T49 = T3C + T3D; + T3E = T3C - T3D; + T3X = T3l - T3i; + T3m = T3i + T3l; + T4f = T35 - T38; + T39 = T35 + T38; + T4a = T48 - T49; + T4v = T48 + T49; + T3S = T3B + T3E; + T3F = T3B - T3E; + T7C = rio[WS(vs, 5) + WS(rs, 4)]; + } + T7R = iio[WS(vs, 5)]; + T7S = iio[WS(vs, 5) + WS(rs, 4)]; + T7E = rio[WS(vs, 5) + WS(rs, 2)]; + T89 = T7B - T7C; + T7D = T7B + T7C; + T8G = T7R + T7S; + T7T = T7R - T7S; + T7F = rio[WS(vs, 5) + WS(rs, 6)]; + T8a = iio[WS(vs, 5) + WS(rs, 2)]; + T8b = iio[WS(vs, 5) + WS(rs, 6)]; + } + { + E T97, T98, T9n, T9o; + T97 = rio[WS(vs, 6)]; + { + E T7Q, T7G, T8H, T8c; + T7Q = T7E - T7F; + T7G = T7E + T7F; + T8H = T8a + T8b; + T8c = T8a - T8b; + T8v = T7T - T7Q; + T7U = T7Q + T7T; + T8N = T7D - T7G; + T7H = T7D + T7G; + T8I = T8G - T8H; + T93 = T8G + T8H; + T8q = T89 + T8c; + T8d = T89 - T8c; + T98 = rio[WS(vs, 6) + WS(rs, 4)]; + } + T9n = iio[WS(vs, 6)]; + T9o = iio[WS(vs, 6) + WS(rs, 4)]; + T9a = rio[WS(vs, 6) + WS(rs, 2)]; + T9F = T97 - T98; + T99 = T97 + T98; + Tac = T9n + T9o; + T9p = T9n - T9o; + T9b = rio[WS(vs, 6) + WS(rs, 6)]; + T9G = iio[WS(vs, 6) + WS(rs, 2)]; + T9H = iio[WS(vs, 6) + WS(rs, 6)]; + } + } + } + } + { + E TbQ, TaX, Tbg, Tb6, TbR, Tb0; + { + E T5M, T4T, T5c, T52, T5N, T4W; + { + E Tu, TE, TF, Tp; + { + E Tb, Tq, Ta, T1e, Tt, Tc, Tm, Tn; + { + E T8, T9, Tr, Ts; + T8 = rio[WS(rs, 1)]; + { + E T9m, T9c, Tad, T9I; + T9m = T9a - T9b; + T9c = T9a + T9b; + Tad = T9G + T9H; + T9I = T9G - T9H; + Ta1 = T9p - T9m; + T9q = T9m + T9p; + Taj = T99 - T9c; + T9d = T99 + T9c; + Tae = Tac - Tad; + Taz = Tac + Tad; + T9W = T9F + T9I; + T9J = T9F - T9I; + T9 = rio[WS(rs, 5)]; + } + Tr = iio[WS(rs, 1)]; + Ts = iio[WS(rs, 5)]; + Tb = rio[WS(rs, 7)]; + Tq = T8 - T9; + Ta = T8 + T9; + T1e = Tr + Ts; + Tt = Tr - Ts; + Tc = rio[WS(rs, 3)]; + Tm = iio[WS(rs, 7)]; + Tn = iio[WS(rs, 3)]; + } + { + E Tl, Td, T1f, To; + Tu = Tq + Tt; + TE = Tt - Tq; + Tl = Tb - Tc; + Td = Tb + Tc; + T1f = Tm + Tn; + To = Tm - Tn; + Te = Ta + Td; + T19 = Td - Ta; + T1u = T1e + T1f; + T1g = T1e - T1f; + TF = Tl + To; + Tp = Tl - To; + } + } + { + E T4I, T4Y, T4U, T51, T4L, T4V; + { + E T4Z, T50, T4G, T4H, T4J, T4K; + T4G = rio[WS(vs, 3) + WS(rs, 1)]; + T4H = rio[WS(vs, 3) + WS(rs, 5)]; + Tv = Tp - Tu; + TR = Tu + Tp; + TG = TE - TF; + TW = TE + TF; + T4I = T4G + T4H; + T4Y = T4G - T4H; + T4Z = iio[WS(vs, 3) + WS(rs, 1)]; + T50 = iio[WS(vs, 3) + WS(rs, 5)]; + T4J = rio[WS(vs, 3) + WS(rs, 7)]; + T4K = rio[WS(vs, 3) + WS(rs, 3)]; + T4U = iio[WS(vs, 3) + WS(rs, 7)]; + T51 = T4Z - T50; + T5M = T4Z + T50; + T4L = T4J + T4K; + T4T = T4J - T4K; + T4V = iio[WS(vs, 3) + WS(rs, 3)]; + } + T5c = T51 - T4Y; + T52 = T4Y + T51; + T5H = T4L - T4I; + T4M = T4I + T4L; + T5N = T4U + T4V; + T4W = T4U - T4V; + } + } + { + E T7i, T6p, T6y, T6I, T6s, T7j; + { + E T6e, T6u, T6q, T6x, T6h, T6r; + { + E T6v, T6w, T6f, T6g; + { + E T4X, T5d, T6c, T6d; + T6c = rio[WS(vs, 4) + WS(rs, 1)]; + T6d = rio[WS(vs, 4) + WS(rs, 5)]; + T5O = T5M - T5N; + T62 = T5M + T5N; + T4X = T4T - T4W; + T5d = T4T + T4W; + T6e = T6c + T6d; + T6u = T6c - T6d; + T5p = T52 + T4X; + T53 = T4X - T52; + T5u = T5c + T5d; + T5e = T5c - T5d; + T6v = iio[WS(vs, 4) + WS(rs, 1)]; + T6w = iio[WS(vs, 4) + WS(rs, 5)]; + } + T6f = rio[WS(vs, 4) + WS(rs, 7)]; + T6g = rio[WS(vs, 4) + WS(rs, 3)]; + T6q = iio[WS(vs, 4) + WS(rs, 7)]; + T7i = T6v + T6w; + T6x = T6v - T6w; + T6p = T6f - T6g; + T6h = T6f + T6g; + T6r = iio[WS(vs, 4) + WS(rs, 3)]; + } + T6y = T6u + T6x; + T6I = T6x - T6u; + T6i = T6e + T6h; + T7d = T6h - T6e; + T6s = T6q - T6r; + T7j = T6q + T6r; + } + { + E Tb2, TaM, TaY, Tb5, TaP, TaZ; + { + E Tb3, Tb4, TaN, TaO; + { + E T6J, T6t, TaK, TaL; + TaK = rio[WS(vs, 7) + WS(rs, 1)]; + TaL = rio[WS(vs, 7) + WS(rs, 5)]; + T7y = T7i + T7j; + T7k = T7i - T7j; + T6J = T6p + T6s; + T6t = T6p - T6s; + Tb2 = TaK - TaL; + TaM = TaK + TaL; + T6z = T6t - T6y; + T6V = T6y + T6t; + T6K = T6I - T6J; + T70 = T6I + T6J; + Tb3 = iio[WS(vs, 7) + WS(rs, 1)]; + Tb4 = iio[WS(vs, 7) + WS(rs, 5)]; + } + TaN = rio[WS(vs, 7) + WS(rs, 7)]; + TaO = rio[WS(vs, 7) + WS(rs, 3)]; + TaY = iio[WS(vs, 7) + WS(rs, 7)]; + Tb5 = Tb3 - Tb4; + TbQ = Tb3 + Tb4; + TaP = TaN + TaO; + TaX = TaN - TaO; + TaZ = iio[WS(vs, 7) + WS(rs, 3)]; + } + Tbg = Tb5 - Tb2; + Tb6 = Tb2 + Tb5; + TbL = TaP - TaM; + TaQ = TaM + TaP; + TbR = TaY + TaZ; + Tb0 = TaY - TaZ; + } + } + } + { + E T4g, T3n, T3G, T3w, T4h, T3q; + { + E T2K, T1R, T20, T2a, T1U, T2L; + { + E T1G, T1W, T1S, T1Z, T1J, T1T; + { + E T1X, T1Y, T1H, T1I; + { + E Tb1, Tbh, T1E, T1F; + T1E = rio[WS(vs, 1) + WS(rs, 1)]; + T1F = rio[WS(vs, 1) + WS(rs, 5)]; + TbS = TbQ - TbR; + Tc6 = TbQ + TbR; + Tb1 = TaX - Tb0; + Tbh = TaX + Tb0; + T1G = T1E + T1F; + T1W = T1E - T1F; + Tbt = Tb6 + Tb1; + Tb7 = Tb1 - Tb6; + Tby = Tbg + Tbh; + Tbi = Tbg - Tbh; + T1X = iio[WS(vs, 1) + WS(rs, 1)]; + T1Y = iio[WS(vs, 1) + WS(rs, 5)]; + } + T1H = rio[WS(vs, 1) + WS(rs, 7)]; + T1I = rio[WS(vs, 1) + WS(rs, 3)]; + T1S = iio[WS(vs, 1) + WS(rs, 7)]; + T2K = T1X + T1Y; + T1Z = T1X - T1Y; + T1R = T1H - T1I; + T1J = T1H + T1I; + T1T = iio[WS(vs, 1) + WS(rs, 3)]; + } + T20 = T1W + T1Z; + T2a = T1Z - T1W; + T1K = T1G + T1J; + T2F = T1J - T1G; + T1U = T1S - T1T; + T2L = T1S + T1T; + } + { + E T3s, T3c, T3o, T3v, T3f, T3p; + { + E T3t, T3u, T3d, T3e; + { + E T2b, T1V, T3a, T3b; + T3a = rio[WS(vs, 2) + WS(rs, 1)]; + T3b = rio[WS(vs, 2) + WS(rs, 5)]; + T30 = T2K + T2L; + T2M = T2K - T2L; + T2b = T1R + T1U; + T1V = T1R - T1U; + T3s = T3a - T3b; + T3c = T3a + T3b; + T21 = T1V - T20; + T2n = T20 + T1V; + T2c = T2a - T2b; + T2s = T2a + T2b; + T3t = iio[WS(vs, 2) + WS(rs, 1)]; + T3u = iio[WS(vs, 2) + WS(rs, 5)]; + } + T3d = rio[WS(vs, 2) + WS(rs, 7)]; + T3e = rio[WS(vs, 2) + WS(rs, 3)]; + T3o = iio[WS(vs, 2) + WS(rs, 7)]; + T3v = T3t - T3u; + T4g = T3t + T3u; + T3f = T3d + T3e; + T3n = T3d - T3e; + T3p = iio[WS(vs, 2) + WS(rs, 3)]; + } + T3G = T3v - T3s; + T3w = T3s + T3v; + T4b = T3f - T3c; + T3g = T3c + T3f; + T4h = T3o + T3p; + T3q = T3o - T3p; + } + } + { + E T8O, T7V, T84, T8e, T7Y, T8P; + { + E T7K, T80, T7W, T83, T7N, T7X; + { + E T81, T82, T7L, T7M; + { + E T3r, T3H, T7I, T7J; + T7I = rio[WS(vs, 5) + WS(rs, 1)]; + T7J = rio[WS(vs, 5) + WS(rs, 5)]; + T4i = T4g - T4h; + T4w = T4g + T4h; + T3r = T3n - T3q; + T3H = T3n + T3q; + T7K = T7I + T7J; + T80 = T7I - T7J; + T3T = T3w + T3r; + T3x = T3r - T3w; + T3Y = T3G + T3H; + T3I = T3G - T3H; + T81 = iio[WS(vs, 5) + WS(rs, 1)]; + T82 = iio[WS(vs, 5) + WS(rs, 5)]; + } + T7L = rio[WS(vs, 5) + WS(rs, 7)]; + T7M = rio[WS(vs, 5) + WS(rs, 3)]; + T7W = iio[WS(vs, 5) + WS(rs, 7)]; + T8O = T81 + T82; + T83 = T81 - T82; + T7V = T7L - T7M; + T7N = T7L + T7M; + T7X = iio[WS(vs, 5) + WS(rs, 3)]; + } + T84 = T80 + T83; + T8e = T83 - T80; + T7O = T7K + T7N; + T8J = T7N - T7K; + T7Y = T7W - T7X; + T8P = T7W + T7X; + } + { + E T9w, T9g, T9s, T9z, T9j, T9t; + { + E T9x, T9y, T9h, T9i; + { + E T8f, T7Z, T9e, T9f; + T9e = rio[WS(vs, 6) + WS(rs, 1)]; + T9f = rio[WS(vs, 6) + WS(rs, 5)]; + T94 = T8O + T8P; + T8Q = T8O - T8P; + T8f = T7V + T7Y; + T7Z = T7V - T7Y; + T9w = T9e - T9f; + T9g = T9e + T9f; + T85 = T7Z - T84; + T8r = T84 + T7Z; + T8g = T8e - T8f; + T8w = T8e + T8f; + T9x = iio[WS(vs, 6) + WS(rs, 1)]; + T9y = iio[WS(vs, 6) + WS(rs, 5)]; + } + T9h = rio[WS(vs, 6) + WS(rs, 7)]; + T9i = rio[WS(vs, 6) + WS(rs, 3)]; + T9s = iio[WS(vs, 6) + WS(rs, 7)]; + T9z = T9x - T9y; + Tak = T9x + T9y; + T9j = T9h + T9i; + T9r = T9h - T9i; + T9t = iio[WS(vs, 6) + WS(rs, 3)]; + } + T9K = T9z - T9w; + T9A = T9w + T9z; + Taf = T9j - T9g; + T9k = T9g + T9j; + Tal = T9s + T9t; + T9u = T9s - T9t; + } + } + } + } + } + { + E T9X, T9B, Ta2, T9M, T2T, T2Q, TbT, TbH, TbO, TbN, TbU; + { + E Tam, TaA, T9v, T9L; + rio[0] = T7 + Te; + iio[0] = T1t + T1u; + Tam = Tak - Tal; + TaA = Tak + Tal; + T9v = T9r - T9u; + T9L = T9r + T9u; + rio[WS(rs, 1)] = T1D + T1K; + iio[WS(rs, 1)] = T2Z + T30; + T9X = T9A + T9v; + T9B = T9v - T9A; + Ta2 = T9K + T9L; + T9M = T9K - T9L; + rio[WS(rs, 2)] = T39 + T3g; + iio[WS(rs, 2)] = T4v + T4w; + rio[WS(rs, 3)] = T4F + T4M; + iio[WS(rs, 3)] = T61 + T62; + rio[WS(rs, 4)] = T6b + T6i; + iio[WS(rs, 4)] = T7x + T7y; + rio[WS(rs, 5)] = T7H + T7O; + iio[WS(rs, 5)] = T93 + T94; + rio[WS(rs, 6)] = T9d + T9k; + iio[WS(rs, 6)] = Taz + TaA; + rio[WS(rs, 7)] = TaJ + TaQ; + iio[WS(rs, 7)] = Tc5 + Tc6; + { + E T10, T13, T1h, T1a, Tat, Taq, TbC, TbF, TbE, TbG, TbD; + { + E T1q, T1v, T1s, T1w, T1r; + { + E T2N, T2B, T2I, T2H, T2O; + { + E TS, TX, TP, TU, T2G, TY, TT; + T10 = FMA(KP707106781, TR, TQ); + TS = FNMS(KP707106781, TR, TQ); + TX = FNMS(KP707106781, TW, TV); + T13 = FMA(KP707106781, TW, TV); + TP = W[8]; + TU = W[9]; + T2T = T2J + T2M; + T2N = T2J - T2M; + T2G = T2E - T2F; + T2Q = T2F + T2E; + TY = TP * TX; + TT = TP * TS; + T2B = W[10]; + T2I = W[11]; + iio[WS(vs, 5)] = FNMS(TU, TS, TY); + rio[WS(vs, 5)] = FMA(TU, TX, TT); + T2H = T2B * T2G; + T2O = T2I * T2G; + } + { + E T1n, T1k, T1j, T1m, T1l, T1o, T1p; + T1h = T1d - T1g; + T1n = T1d + T1g; + T1k = T19 + T18; + T1a = T18 - T19; + iio[WS(vs, 6) + WS(rs, 1)] = FNMS(T2I, T2N, T2H); + rio[WS(vs, 6) + WS(rs, 1)] = FMA(T2B, T2N, T2O); + T1j = W[2]; + T1m = W[3]; + T1q = T7 - Te; + T1v = T1t - T1u; + T1l = T1j * T1k; + T1o = T1m * T1k; + T1p = W[6]; + T1s = W[7]; + iio[WS(vs, 2)] = FNMS(T1m, T1n, T1l); + rio[WS(vs, 2)] = FMA(T1j, T1n, T1o); + T1w = T1p * T1v; + T1r = T1p * T1q; + } + } + { + E Tc2, Tc7, Tc4, Tc8, Tc3; + { + E Tan, Tag, Tab, Tai, Tah, Tao, Tc1; + Tat = Taj + Tam; + Tan = Taj - Tam; + Tag = Tae - Taf; + Taq = Taf + Tae; + iio[WS(vs, 4)] = FNMS(T1s, T1q, T1w); + rio[WS(vs, 4)] = FMA(T1s, T1v, T1r); + Tab = W[10]; + Tai = W[11]; + Tc2 = TaJ - TaQ; + Tc7 = Tc5 - Tc6; + Tah = Tab * Tag; + Tao = Tai * Tag; + Tc1 = W[6]; + Tc4 = W[7]; + iio[WS(vs, 6) + WS(rs, 6)] = FNMS(Tai, Tan, Tah); + rio[WS(vs, 6) + WS(rs, 6)] = FMA(Tab, Tan, Tao); + Tc8 = Tc1 * Tc7; + Tc3 = Tc1 * Tc2; + } + { + E Tbu, Tbz, Tbr, Tbw, TbA, Tbv, TbB; + TbC = FMA(KP707106781, Tbt, Tbs); + Tbu = FNMS(KP707106781, Tbt, Tbs); + Tbz = FNMS(KP707106781, Tby, Tbx); + TbF = FMA(KP707106781, Tby, Tbx); + iio[WS(vs, 4) + WS(rs, 7)] = FNMS(Tc4, Tc2, Tc8); + rio[WS(vs, 4) + WS(rs, 7)] = FMA(Tc4, Tc7, Tc3); + Tbr = W[8]; + Tbw = W[9]; + TbA = Tbr * Tbz; + Tbv = Tbr * Tbu; + TbB = W[0]; + TbE = W[1]; + iio[WS(vs, 5) + WS(rs, 7)] = FNMS(Tbw, Tbu, TbA); + rio[WS(vs, 5) + WS(rs, 7)] = FMA(Tbw, Tbz, Tbv); + TbG = TbB * TbF; + TbD = TbB * TbC; + } + } + } + { + E T2o, T2t, T2q, T2u, T2p; + { + E T2w, T2z, T2y, T2A, T2x; + { + E TZ, T12, T14, T11, T2v; + iio[WS(vs, 1) + WS(rs, 7)] = FNMS(TbE, TbC, TbG); + rio[WS(vs, 1) + WS(rs, 7)] = FMA(TbE, TbF, TbD); + TZ = W[0]; + T12 = W[1]; + T2o = FNMS(KP707106781, T2n, T2m); + T2w = FMA(KP707106781, T2n, T2m); + T2z = FMA(KP707106781, T2s, T2r); + T2t = FNMS(KP707106781, T2s, T2r); + T14 = TZ * T13; + T11 = TZ * T10; + T2v = W[0]; + T2y = W[1]; + iio[WS(vs, 1)] = FNMS(T12, T10, T14); + rio[WS(vs, 1)] = FMA(T12, T13, T11); + T2A = T2v * T2z; + T2x = T2v * T2w; + } + { + E T15, T1c, T1b, T1i, T2l; + iio[WS(vs, 1) + WS(rs, 1)] = FNMS(T2y, T2w, T2A); + rio[WS(vs, 1) + WS(rs, 1)] = FMA(T2y, T2z, T2x); + T15 = W[10]; + T1c = W[11]; + T1b = T15 * T1a; + T1i = T1c * T1a; + T2l = W[8]; + T2q = W[9]; + iio[WS(vs, 6)] = FNMS(T1c, T1h, T1b); + rio[WS(vs, 6)] = FMA(T15, T1h, T1i); + T2u = T2l * T2t; + T2p = T2l * T2o; + } + } + { + E TbZ, TbM, TbV, TbY, TbX, Tc0; + { + E Tap, Tas, TbW, Tar, Tau; + iio[WS(vs, 5) + WS(rs, 1)] = FNMS(T2q, T2o, T2u); + rio[WS(vs, 5) + WS(rs, 1)] = FMA(T2q, T2t, T2p); + Tap = W[2]; + Tas = W[3]; + TbT = TbP - TbS; + TbZ = TbP + TbS; + TbW = TbL + TbK; + TbM = TbK - TbL; + Tar = Tap * Taq; + Tau = Tas * Taq; + TbV = W[2]; + TbY = W[3]; + iio[WS(vs, 2) + WS(rs, 6)] = FNMS(Tas, Tat, Tar); + rio[WS(vs, 2) + WS(rs, 6)] = FMA(Tap, Tat, Tau); + TbX = TbV * TbW; + Tc0 = TbY * TbW; + } + { + E Taw, TaB, Tav, Tay, TaC, Tax; + Taw = T9d - T9k; + TaB = Taz - TaA; + iio[WS(vs, 2) + WS(rs, 7)] = FNMS(TbY, TbZ, TbX); + rio[WS(vs, 2) + WS(rs, 7)] = FMA(TbV, TbZ, Tc0); + Tav = W[6]; + Tay = W[7]; + TaC = Tav * TaB; + Tax = Tav * Taw; + TbH = W[10]; + TbO = W[11]; + iio[WS(vs, 4) + WS(rs, 6)] = FNMS(Tay, Taw, TaC); + rio[WS(vs, 4) + WS(rs, 6)] = FMA(Tay, TaB, Tax); + TbN = TbH * TbM; + TbU = TbO * TbM; + } + } + } + } + } + { + E T5q, T5v, T8R, T8K, T90, T95, T92, T96, T91; + { + E T3U, T3Z, T74, T77, T9Y, Ta3, T7l, T7e, T8X, T8T, T8W, T8V, T8Y; + { + E T5y, T5B, T5A, T5C, T5z; + { + E T5Y, T63, T60, T64, T5Z; + { + E T2P, T2S, T2R, T2U, T5X; + iio[WS(vs, 6) + WS(rs, 7)] = FNMS(TbO, TbT, TbN); + rio[WS(vs, 6) + WS(rs, 7)] = FMA(TbH, TbT, TbU); + T2P = W[2]; + T2S = W[3]; + T5Y = T4F - T4M; + T63 = T61 - T62; + T2R = T2P * T2Q; + T2U = T2S * T2Q; + T5X = W[6]; + T60 = W[7]; + iio[WS(vs, 2) + WS(rs, 1)] = FNMS(T2S, T2T, T2R); + rio[WS(vs, 2) + WS(rs, 1)] = FMA(T2P, T2T, T2U); + T64 = T5X * T63; + T5Z = T5X * T5Y; + } + { + E T42, T45, T41, T44, T46, T43, T5x; + T3U = FNMS(KP707106781, T3T, T3S); + T42 = FMA(KP707106781, T3T, T3S); + T45 = FMA(KP707106781, T3Y, T3X); + T3Z = FNMS(KP707106781, T3Y, T3X); + iio[WS(vs, 4) + WS(rs, 3)] = FNMS(T60, T5Y, T64); + rio[WS(vs, 4) + WS(rs, 3)] = FMA(T60, T63, T5Z); + T41 = W[0]; + T44 = W[1]; + T5q = FNMS(KP707106781, T5p, T5o); + T5y = FMA(KP707106781, T5p, T5o); + T5B = FMA(KP707106781, T5u, T5t); + T5v = FNMS(KP707106781, T5u, T5t); + T46 = T41 * T45; + T43 = T41 * T42; + T5x = W[0]; + T5A = W[1]; + iio[WS(vs, 1) + WS(rs, 2)] = FNMS(T44, T42, T46); + rio[WS(vs, 1) + WS(rs, 2)] = FMA(T44, T45, T43); + T5C = T5x * T5B; + T5z = T5x * T5y; + } + } + { + E Ta6, Ta9, Ta8, Taa, Ta7; + { + E T6W, T71, T6T, T6Y, T72, T6X, Ta5; + T74 = FMA(KP707106781, T6V, T6U); + T6W = FNMS(KP707106781, T6V, T6U); + T71 = FNMS(KP707106781, T70, T6Z); + T77 = FMA(KP707106781, T70, T6Z); + iio[WS(vs, 1) + WS(rs, 3)] = FNMS(T5A, T5y, T5C); + rio[WS(vs, 1) + WS(rs, 3)] = FMA(T5A, T5B, T5z); + T6T = W[8]; + T6Y = W[9]; + T9Y = FNMS(KP707106781, T9X, T9W); + Ta6 = FMA(KP707106781, T9X, T9W); + Ta9 = FMA(KP707106781, Ta2, Ta1); + Ta3 = FNMS(KP707106781, Ta2, Ta1); + T72 = T6T * T71; + T6X = T6T * T6W; + Ta5 = W[0]; + Ta8 = W[1]; + iio[WS(vs, 5) + WS(rs, 4)] = FNMS(T6Y, T6W, T72); + rio[WS(vs, 5) + WS(rs, 4)] = FMA(T6Y, T71, T6X); + Taa = Ta5 * Ta9; + Ta7 = Ta5 * Ta6; + } + { + E T7r, T7o, T7n, T7q, T8U, T7p, T7s; + T7l = T7h - T7k; + T7r = T7h + T7k; + T7o = T7d + T7c; + T7e = T7c - T7d; + iio[WS(vs, 1) + WS(rs, 6)] = FNMS(Ta8, Ta6, Taa); + rio[WS(vs, 1) + WS(rs, 6)] = FMA(Ta8, Ta9, Ta7); + T7n = W[2]; + T7q = W[3]; + T8R = T8N - T8Q; + T8X = T8N + T8Q; + T8U = T8J + T8I; + T8K = T8I - T8J; + T7p = T7n * T7o; + T7s = T7q * T7o; + T8T = W[2]; + T8W = W[3]; + iio[WS(vs, 2) + WS(rs, 4)] = FNMS(T7q, T7r, T7p); + rio[WS(vs, 2) + WS(rs, 4)] = FMA(T7n, T7r, T7s); + T8V = T8T * T8U; + T8Y = T8W * T8U; + } + } + } + { + E T5P, T5D, T5K, T5J, T5Q, Ta0, Ta4, T9Z; + { + E T5V, T5I, T5R, T5U, T5T, T5W; + { + E T2W, T31, T2V, T2Y, T5S, T32, T2X; + T2W = T1D - T1K; + T31 = T2Z - T30; + iio[WS(vs, 2) + WS(rs, 5)] = FNMS(T8W, T8X, T8V); + rio[WS(vs, 2) + WS(rs, 5)] = FMA(T8T, T8X, T8Y); + T2V = W[6]; + T2Y = W[7]; + T5P = T5L - T5O; + T5V = T5L + T5O; + T5S = T5H + T5G; + T5I = T5G - T5H; + T32 = T2V * T31; + T2X = T2V * T2W; + T5R = W[2]; + T5U = W[3]; + iio[WS(vs, 4) + WS(rs, 1)] = FNMS(T2Y, T2W, T32); + rio[WS(vs, 4) + WS(rs, 1)] = FMA(T2Y, T31, T2X); + T5T = T5R * T5S; + T5W = T5U * T5S; + } + { + E T3R, T3W, T40, T3V; + iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T5U, T5V, T5T); + rio[WS(vs, 2) + WS(rs, 3)] = FMA(T5R, T5V, T5W); + T3R = W[8]; + T3W = W[9]; + T40 = T3R * T3Z; + T3V = T3R * T3U; + T5D = W[10]; + T5K = W[11]; + iio[WS(vs, 5) + WS(rs, 2)] = FNMS(T3W, T3U, T40); + rio[WS(vs, 5) + WS(rs, 2)] = FMA(T3W, T3Z, T3V); + T5J = T5D * T5I; + T5Q = T5K * T5I; + } + } + { + E T73, T76, T78, T75, T9V; + iio[WS(vs, 6) + WS(rs, 3)] = FNMS(T5K, T5P, T5J); + rio[WS(vs, 6) + WS(rs, 3)] = FMA(T5D, T5P, T5Q); + T73 = W[0]; + T76 = W[1]; + T78 = T73 * T77; + T75 = T73 * T74; + T9V = W[8]; + Ta0 = W[9]; + iio[WS(vs, 1) + WS(rs, 4)] = FNMS(T76, T74, T78); + rio[WS(vs, 1) + WS(rs, 4)] = FMA(T76, T77, T75); + Ta4 = T9V * Ta3; + T9Z = T9V * T9Y; + } + { + E T79, T7g, T7f, T7m, T8Z; + iio[WS(vs, 5) + WS(rs, 6)] = FNMS(Ta0, T9Y, Ta4); + rio[WS(vs, 5) + WS(rs, 6)] = FMA(Ta0, Ta3, T9Z); + T79 = W[10]; + T7g = W[11]; + T90 = T7H - T7O; + T95 = T93 - T94; + T7f = T79 * T7e; + T7m = T7g * T7e; + T8Z = W[6]; + T92 = W[7]; + iio[WS(vs, 6) + WS(rs, 4)] = FNMS(T7g, T7l, T7f); + rio[WS(vs, 6) + WS(rs, 4)] = FMA(T79, T7l, T7m); + T96 = T8Z * T95; + T91 = T8Z * T90; + } + } + } + { + E T8A, T8D, T8C, T8E, T8B; + { + E T4s, T4x, T4u, T4y, T4t; + { + E T4p, T4m, T5s, T5w, T5r; + { + E T4j, T4c, T47, T4e, T4d, T4k, T5n; + T4p = T4f + T4i; + T4j = T4f - T4i; + T4c = T4a - T4b; + T4m = T4b + T4a; + iio[WS(vs, 4) + WS(rs, 5)] = FNMS(T92, T90, T96); + rio[WS(vs, 4) + WS(rs, 5)] = FMA(T92, T95, T91); + T47 = W[10]; + T4e = W[11]; + T4d = T47 * T4c; + T4k = T4e * T4c; + T5n = W[8]; + T5s = W[9]; + iio[WS(vs, 6) + WS(rs, 2)] = FNMS(T4e, T4j, T4d); + rio[WS(vs, 6) + WS(rs, 2)] = FMA(T47, T4j, T4k); + T5w = T5n * T5v; + T5r = T5n * T5q; + } + { + E T4l, T4o, T4n, T4q, T4r; + iio[WS(vs, 5) + WS(rs, 3)] = FNMS(T5s, T5q, T5w); + rio[WS(vs, 5) + WS(rs, 3)] = FMA(T5s, T5v, T5r); + T4l = W[2]; + T4o = W[3]; + T4s = T39 - T3g; + T4x = T4v - T4w; + T4n = T4l * T4m; + T4q = T4o * T4m; + T4r = W[6]; + T4u = W[7]; + iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T4o, T4p, T4n); + rio[WS(vs, 2) + WS(rs, 2)] = FMA(T4l, T4p, T4q); + T4y = T4r * T4x; + T4t = T4r * T4s; + } + } + { + E T8F, T8M, T8L, T8S; + { + E T7u, T7z, T7t, T7w, T7A, T7v; + T7u = T6b - T6i; + T7z = T7x - T7y; + iio[WS(vs, 4) + WS(rs, 2)] = FNMS(T4u, T4s, T4y); + rio[WS(vs, 4) + WS(rs, 2)] = FMA(T4u, T4x, T4t); + T7t = W[6]; + T7w = W[7]; + T7A = T7t * T7z; + T7v = T7t * T7u; + T8F = W[10]; + T8M = W[11]; + iio[WS(vs, 4) + WS(rs, 4)] = FNMS(T7w, T7u, T7A); + rio[WS(vs, 4) + WS(rs, 4)] = FMA(T7w, T7z, T7v); + T8L = T8F * T8K; + T8S = T8M * T8K; + } + { + E T8s, T8x, T8p, T8u, T8y, T8t, T8z; + T8A = FMA(KP707106781, T8r, T8q); + T8s = FNMS(KP707106781, T8r, T8q); + T8x = FNMS(KP707106781, T8w, T8v); + T8D = FMA(KP707106781, T8w, T8v); + iio[WS(vs, 6) + WS(rs, 5)] = FNMS(T8M, T8R, T8L); + rio[WS(vs, 6) + WS(rs, 5)] = FMA(T8F, T8R, T8S); + T8p = W[8]; + T8u = W[9]; + T8y = T8p * T8x; + T8t = T8p * T8s; + T8z = W[0]; + T8C = W[1]; + iio[WS(vs, 5) + WS(rs, 5)] = FNMS(T8u, T8s, T8y); + rio[WS(vs, 5) + WS(rs, 5)] = FMA(T8u, T8x, T8t); + T8E = T8z * T8D; + T8B = T8z * T8A; + } + } + } + { + E T3y, T3J, T3h, T3A, T3z, T3K; + { + E T54, T5f, T4N, T56, T55, T5g; + { + E Tw, TH, Tf, Ty, Tx, TI; + { + E TN, TJ, TM, TL, TO, TK; + TK = FMA(KP707106781, Tv, Tk); + Tw = FNMS(KP707106781, Tv, Tk); + iio[WS(vs, 1) + WS(rs, 5)] = FNMS(T8C, T8A, T8E); + rio[WS(vs, 1) + WS(rs, 5)] = FMA(T8C, T8D, T8B); + TH = FNMS(KP707106781, TG, TD); + TN = FMA(KP707106781, TG, TD); + TJ = W[4]; + TM = W[5]; + Tf = W[12]; + TL = TJ * TK; + TO = TM * TK; + Ty = W[13]; + Tx = Tf * Tw; + iio[WS(vs, 3)] = FNMS(TM, TN, TL); + rio[WS(vs, 3)] = FMA(TJ, TN, TO); + } + TI = Ty * Tw; + iio[WS(vs, 7)] = FNMS(Ty, TH, Tx); + { + E T5h, T5l, T5k, T5j, T5m, T5i; + T5i = FMA(KP707106781, T53, T4S); + T54 = FNMS(KP707106781, T53, T4S); + rio[WS(vs, 7)] = FMA(Tf, TH, TI); + T5h = W[4]; + T5f = FNMS(KP707106781, T5e, T5b); + T5l = FMA(KP707106781, T5e, T5b); + T5k = W[5]; + T5j = T5h * T5i; + T4N = W[12]; + T5m = T5k * T5i; + T56 = W[13]; + iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T5k, T5l, T5j); + T55 = T4N * T54; + rio[WS(vs, 3) + WS(rs, 3)] = FMA(T5h, T5l, T5m); + } + } + T5g = T56 * T54; + { + E T22, T2d, T1L, T24, T23, T2e; + { + E T2j, T2f, T2i, T2h, T2k, T2g; + iio[WS(vs, 7) + WS(rs, 3)] = FNMS(T56, T5f, T55); + T22 = FNMS(KP707106781, T21, T1Q); + T2g = FMA(KP707106781, T21, T1Q); + rio[WS(vs, 7) + WS(rs, 3)] = FMA(T4N, T5f, T5g); + T2d = FNMS(KP707106781, T2c, T29); + T2j = FMA(KP707106781, T2c, T29); + T2f = W[4]; + T2i = W[5]; + T1L = W[12]; + T2h = T2f * T2g; + T2k = T2i * T2g; + T24 = W[13]; + T23 = T1L * T22; + iio[WS(vs, 3) + WS(rs, 1)] = FNMS(T2i, T2j, T2h); + rio[WS(vs, 3) + WS(rs, 1)] = FMA(T2f, T2j, T2k); + } + T2e = T24 * T22; + iio[WS(vs, 7) + WS(rs, 1)] = FNMS(T24, T2d, T23); + { + E T3L, T3P, T3O, T3N, T3Q, T3M; + T3M = FMA(KP707106781, T3x, T3m); + T3y = FNMS(KP707106781, T3x, T3m); + rio[WS(vs, 7) + WS(rs, 1)] = FMA(T1L, T2d, T2e); + T3L = W[4]; + T3J = FNMS(KP707106781, T3I, T3F); + T3P = FMA(KP707106781, T3I, T3F); + T3O = W[5]; + T3N = T3L * T3M; + T3h = W[12]; + T3Q = T3O * T3M; + T3A = W[13]; + iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T3O, T3P, T3N); + T3z = T3h * T3y; + rio[WS(vs, 3) + WS(rs, 2)] = FMA(T3L, T3P, T3Q); + } + } + } + T3K = T3A * T3y; + { + E Tb8, Tbj, TaR, Tba, Tb9, Tbk; + { + E T6A, T6L, T6j, T6C, T6B, T6M; + { + E T6R, T6N, T6Q, T6P, T6S, T6O; + iio[WS(vs, 7) + WS(rs, 2)] = FNMS(T3A, T3J, T3z); + T6A = FNMS(KP707106781, T6z, T6o); + T6O = FMA(KP707106781, T6z, T6o); + rio[WS(vs, 7) + WS(rs, 2)] = FMA(T3h, T3J, T3K); + T6L = FNMS(KP707106781, T6K, T6H); + T6R = FMA(KP707106781, T6K, T6H); + T6N = W[4]; + T6Q = W[5]; + T6j = W[12]; + T6P = T6N * T6O; + T6S = T6Q * T6O; + T6C = W[13]; + T6B = T6j * T6A; + iio[WS(vs, 3) + WS(rs, 4)] = FNMS(T6Q, T6R, T6P); + rio[WS(vs, 3) + WS(rs, 4)] = FMA(T6N, T6R, T6S); + } + T6M = T6C * T6A; + iio[WS(vs, 7) + WS(rs, 4)] = FNMS(T6C, T6L, T6B); + { + E Tbl, Tbp, Tbo, Tbn, Tbq, Tbm; + Tbm = FMA(KP707106781, Tb7, TaW); + Tb8 = FNMS(KP707106781, Tb7, TaW); + rio[WS(vs, 7) + WS(rs, 4)] = FMA(T6j, T6L, T6M); + Tbl = W[4]; + Tbj = FNMS(KP707106781, Tbi, Tbf); + Tbp = FMA(KP707106781, Tbi, Tbf); + Tbo = W[5]; + Tbn = Tbl * Tbm; + TaR = W[12]; + Tbq = Tbo * Tbm; + Tba = W[13]; + iio[WS(vs, 3) + WS(rs, 7)] = FNMS(Tbo, Tbp, Tbn); + Tb9 = TaR * Tb8; + rio[WS(vs, 3) + WS(rs, 7)] = FMA(Tbl, Tbp, Tbq); + } + } + Tbk = Tba * Tb8; + { + E T86, T8h, T7P, T88, T87, T8i; + { + E T8n, T8j, T8m, T8l, T8o, T8k; + iio[WS(vs, 7) + WS(rs, 7)] = FNMS(Tba, Tbj, Tb9); + T86 = FNMS(KP707106781, T85, T7U); + T8k = FMA(KP707106781, T85, T7U); + rio[WS(vs, 7) + WS(rs, 7)] = FMA(TaR, Tbj, Tbk); + T8h = FNMS(KP707106781, T8g, T8d); + T8n = FMA(KP707106781, T8g, T8d); + T8j = W[4]; + T8m = W[5]; + T7P = W[12]; + T8l = T8j * T8k; + T8o = T8m * T8k; + T88 = W[13]; + T87 = T7P * T86; + iio[WS(vs, 3) + WS(rs, 5)] = FNMS(T8m, T8n, T8l); + rio[WS(vs, 3) + WS(rs, 5)] = FMA(T8j, T8n, T8o); + } + T8i = T88 * T86; + iio[WS(vs, 7) + WS(rs, 5)] = FNMS(T88, T8h, T87); + { + E T9P, T9T, T9S, T9R, T9U, T9Q; + T9Q = FMA(KP707106781, T9B, T9q); + T9C = FNMS(KP707106781, T9B, T9q); + rio[WS(vs, 7) + WS(rs, 5)] = FMA(T7P, T8h, T8i); + T9P = W[4]; + T9N = FNMS(KP707106781, T9M, T9J); + T9T = FMA(KP707106781, T9M, T9J); + T9S = W[5]; + T9R = T9P * T9Q; + T9l = W[12]; + T9U = T9S * T9Q; + T9E = W[13]; + iio[WS(vs, 3) + WS(rs, 6)] = FNMS(T9S, T9T, T9R); + T9D = T9l * T9C; + rio[WS(vs, 3) + WS(rs, 6)] = FMA(T9P, T9T, T9U); + } + } + } + } + } + } + } + } + T9O = T9E * T9C; + iio[WS(vs, 7) + WS(rs, 6)] = FNMS(T9E, T9N, T9D); + rio[WS(vs, 7) + WS(rs, 6)] = FMA(T9l, T9N, T9O); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 8}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 8, "q1_8", twinstr, &GENUS, {352, 112, 176, 0}, 0, 0, 0 }; + +void X(codelet_q1_8) (planner *p) { + X(kdft_difsq_register) (p, q1_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twidsq.native -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 8 -name q1_8 -include q.h */ + +/* + * This function contains 528 FP additions, 256 FP multiplications, + * (or, 416 additions, 144 multiplications, 112 fused multiply/add), + * 142 stack variables, 1 constants, and 256 memory accesses + */ +#include "q.h" + +static void q1_8(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 14); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 14, MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(0, vs)) { + E T7, T14, T1g, Tk, TC, TQ, T10, TM, T1w, T2p, T2z, T1H, T1M, T1W, T2j; + E T1V, T7R, T8O, T90, T84, T8m, T8A, T8K, T8w, T9g, Ta9, Taj, T9r, T9w, T9G; + E Ta3, T9F, Te, T17, T1h, Tp, Tu, TE, T11, TD, T1p, T2m, T2y, T1C, T1U; + E T28, T2i, T24, T7Y, T8R, T91, T89, T8e, T8o, T8L, T8n, T99, Ta6, Tai, T9m; + E T9E, T9S, Ta2, T9O, T2H, T3E, T3Q, T2U, T3c, T3q, T3A, T3m, T46, T4Z, T59; + E T4h, T4m, T4w, T4T, T4v, T5h, T6e, T6q, T5u, T5M, T60, T6a, T5W, T6G, T7z; + E T7J, T6R, T6W, T76, T7t, T75, T2O, T3H, T3R, T2Z, T34, T3e, T3B, T3d, T3Z; + E T4W, T58, T4c, T4u, T4I, T4S, T4E, T5o, T6h, T6r, T5z, T5E, T5O, T6b, T5N; + E T6z, T7w, T7I, T6M, T74, T7i, T7s, T7e; + { + E T3, Ty, Tj, TY, T6, Tg, TB, TZ; + { + E T1, T2, Th, Ti; + T1 = rio[0]; + T2 = rio[WS(rs, 4)]; + T3 = T1 + T2; + Ty = T1 - T2; + Th = iio[0]; + Ti = iio[WS(rs, 4)]; + Tj = Th - Ti; + TY = Th + Ti; + } + { + E T4, T5, Tz, TA; + T4 = rio[WS(rs, 2)]; + T5 = rio[WS(rs, 6)]; + T6 = T4 + T5; + Tg = T4 - T5; + Tz = iio[WS(rs, 2)]; + TA = iio[WS(rs, 6)]; + TB = Tz - TA; + TZ = Tz + TA; + } + T7 = T3 + T6; + T14 = T3 - T6; + T1g = TY + TZ; + Tk = Tg + Tj; + TC = Ty - TB; + TQ = Tj - Tg; + T10 = TY - TZ; + TM = Ty + TB; + } + { + E T1s, T1I, T1L, T2n, T1v, T1D, T1G, T2o; + { + E T1q, T1r, T1J, T1K; + T1q = rio[WS(vs, 1) + WS(rs, 1)]; + T1r = rio[WS(vs, 1) + WS(rs, 5)]; + T1s = T1q + T1r; + T1I = T1q - T1r; + T1J = iio[WS(vs, 1) + WS(rs, 1)]; + T1K = iio[WS(vs, 1) + WS(rs, 5)]; + T1L = T1J - T1K; + T2n = T1J + T1K; + } + { + E T1t, T1u, T1E, T1F; + T1t = rio[WS(vs, 1) + WS(rs, 7)]; + T1u = rio[WS(vs, 1) + WS(rs, 3)]; + T1v = T1t + T1u; + T1D = T1t - T1u; + T1E = iio[WS(vs, 1) + WS(rs, 7)]; + T1F = iio[WS(vs, 1) + WS(rs, 3)]; + T1G = T1E - T1F; + T2o = T1E + T1F; + } + T1w = T1s + T1v; + T2p = T2n - T2o; + T2z = T2n + T2o; + T1H = T1D - T1G; + T1M = T1I + T1L; + T1W = T1D + T1G; + T2j = T1v - T1s; + T1V = T1L - T1I; + } + { + E T7N, T8i, T83, T8I, T7Q, T80, T8l, T8J; + { + E T7L, T7M, T81, T82; + T7L = rio[WS(vs, 6)]; + T7M = rio[WS(vs, 6) + WS(rs, 4)]; + T7N = T7L + T7M; + T8i = T7L - T7M; + T81 = iio[WS(vs, 6)]; + T82 = iio[WS(vs, 6) + WS(rs, 4)]; + T83 = T81 - T82; + T8I = T81 + T82; + } + { + E T7O, T7P, T8j, T8k; + T7O = rio[WS(vs, 6) + WS(rs, 2)]; + T7P = rio[WS(vs, 6) + WS(rs, 6)]; + T7Q = T7O + T7P; + T80 = T7O - T7P; + T8j = iio[WS(vs, 6) + WS(rs, 2)]; + T8k = iio[WS(vs, 6) + WS(rs, 6)]; + T8l = T8j - T8k; + T8J = T8j + T8k; + } + T7R = T7N + T7Q; + T8O = T7N - T7Q; + T90 = T8I + T8J; + T84 = T80 + T83; + T8m = T8i - T8l; + T8A = T83 - T80; + T8K = T8I - T8J; + T8w = T8i + T8l; + } + { + E T9c, T9s, T9v, Ta7, T9f, T9n, T9q, Ta8; + { + E T9a, T9b, T9t, T9u; + T9a = rio[WS(vs, 7) + WS(rs, 1)]; + T9b = rio[WS(vs, 7) + WS(rs, 5)]; + T9c = T9a + T9b; + T9s = T9a - T9b; + T9t = iio[WS(vs, 7) + WS(rs, 1)]; + T9u = iio[WS(vs, 7) + WS(rs, 5)]; + T9v = T9t - T9u; + Ta7 = T9t + T9u; + } + { + E T9d, T9e, T9o, T9p; + T9d = rio[WS(vs, 7) + WS(rs, 7)]; + T9e = rio[WS(vs, 7) + WS(rs, 3)]; + T9f = T9d + T9e; + T9n = T9d - T9e; + T9o = iio[WS(vs, 7) + WS(rs, 7)]; + T9p = iio[WS(vs, 7) + WS(rs, 3)]; + T9q = T9o - T9p; + Ta8 = T9o + T9p; + } + T9g = T9c + T9f; + Ta9 = Ta7 - Ta8; + Taj = Ta7 + Ta8; + T9r = T9n - T9q; + T9w = T9s + T9v; + T9G = T9n + T9q; + Ta3 = T9f - T9c; + T9F = T9v - T9s; + } + { + E Ta, Tq, Tt, T15, Td, Tl, To, T16; + { + E T8, T9, Tr, Ts; + T8 = rio[WS(rs, 1)]; + T9 = rio[WS(rs, 5)]; + Ta = T8 + T9; + Tq = T8 - T9; + Tr = iio[WS(rs, 1)]; + Ts = iio[WS(rs, 5)]; + Tt = Tr - Ts; + T15 = Tr + Ts; + } + { + E Tb, Tc, Tm, Tn; + Tb = rio[WS(rs, 7)]; + Tc = rio[WS(rs, 3)]; + Td = Tb + Tc; + Tl = Tb - Tc; + Tm = iio[WS(rs, 7)]; + Tn = iio[WS(rs, 3)]; + To = Tm - Tn; + T16 = Tm + Tn; + } + Te = Ta + Td; + T17 = T15 - T16; + T1h = T15 + T16; + Tp = Tl - To; + Tu = Tq + Tt; + TE = Tl + To; + T11 = Td - Ta; + TD = Tt - Tq; + } + { + E T1l, T1Q, T1B, T2g, T1o, T1y, T1T, T2h; + { + E T1j, T1k, T1z, T1A; + T1j = rio[WS(vs, 1)]; + T1k = rio[WS(vs, 1) + WS(rs, 4)]; + T1l = T1j + T1k; + T1Q = T1j - T1k; + T1z = iio[WS(vs, 1)]; + T1A = iio[WS(vs, 1) + WS(rs, 4)]; + T1B = T1z - T1A; + T2g = T1z + T1A; + } + { + E T1m, T1n, T1R, T1S; + T1m = rio[WS(vs, 1) + WS(rs, 2)]; + T1n = rio[WS(vs, 1) + WS(rs, 6)]; + T1o = T1m + T1n; + T1y = T1m - T1n; + T1R = iio[WS(vs, 1) + WS(rs, 2)]; + T1S = iio[WS(vs, 1) + WS(rs, 6)]; + T1T = T1R - T1S; + T2h = T1R + T1S; + } + T1p = T1l + T1o; + T2m = T1l - T1o; + T2y = T2g + T2h; + T1C = T1y + T1B; + T1U = T1Q - T1T; + T28 = T1B - T1y; + T2i = T2g - T2h; + T24 = T1Q + T1T; + } + { + E T7U, T8a, T8d, T8P, T7X, T85, T88, T8Q; + { + E T7S, T7T, T8b, T8c; + T7S = rio[WS(vs, 6) + WS(rs, 1)]; + T7T = rio[WS(vs, 6) + WS(rs, 5)]; + T7U = T7S + T7T; + T8a = T7S - T7T; + T8b = iio[WS(vs, 6) + WS(rs, 1)]; + T8c = iio[WS(vs, 6) + WS(rs, 5)]; + T8d = T8b - T8c; + T8P = T8b + T8c; + } + { + E T7V, T7W, T86, T87; + T7V = rio[WS(vs, 6) + WS(rs, 7)]; + T7W = rio[WS(vs, 6) + WS(rs, 3)]; + T7X = T7V + T7W; + T85 = T7V - T7W; + T86 = iio[WS(vs, 6) + WS(rs, 7)]; + T87 = iio[WS(vs, 6) + WS(rs, 3)]; + T88 = T86 - T87; + T8Q = T86 + T87; + } + T7Y = T7U + T7X; + T8R = T8P - T8Q; + T91 = T8P + T8Q; + T89 = T85 - T88; + T8e = T8a + T8d; + T8o = T85 + T88; + T8L = T7X - T7U; + T8n = T8d - T8a; + } + { + E T95, T9A, T9l, Ta0, T98, T9i, T9D, Ta1; + { + E T93, T94, T9j, T9k; + T93 = rio[WS(vs, 7)]; + T94 = rio[WS(vs, 7) + WS(rs, 4)]; + T95 = T93 + T94; + T9A = T93 - T94; + T9j = iio[WS(vs, 7)]; + T9k = iio[WS(vs, 7) + WS(rs, 4)]; + T9l = T9j - T9k; + Ta0 = T9j + T9k; + } + { + E T96, T97, T9B, T9C; + T96 = rio[WS(vs, 7) + WS(rs, 2)]; + T97 = rio[WS(vs, 7) + WS(rs, 6)]; + T98 = T96 + T97; + T9i = T96 - T97; + T9B = iio[WS(vs, 7) + WS(rs, 2)]; + T9C = iio[WS(vs, 7) + WS(rs, 6)]; + T9D = T9B - T9C; + Ta1 = T9B + T9C; + } + T99 = T95 + T98; + Ta6 = T95 - T98; + Tai = Ta0 + Ta1; + T9m = T9i + T9l; + T9E = T9A - T9D; + T9S = T9l - T9i; + Ta2 = Ta0 - Ta1; + T9O = T9A + T9D; + } + { + E T2D, T38, T2T, T3y, T2G, T2Q, T3b, T3z; + { + E T2B, T2C, T2R, T2S; + T2B = rio[WS(vs, 2)]; + T2C = rio[WS(vs, 2) + WS(rs, 4)]; + T2D = T2B + T2C; + T38 = T2B - T2C; + T2R = iio[WS(vs, 2)]; + T2S = iio[WS(vs, 2) + WS(rs, 4)]; + T2T = T2R - T2S; + T3y = T2R + T2S; + } + { + E T2E, T2F, T39, T3a; + T2E = rio[WS(vs, 2) + WS(rs, 2)]; + T2F = rio[WS(vs, 2) + WS(rs, 6)]; + T2G = T2E + T2F; + T2Q = T2E - T2F; + T39 = iio[WS(vs, 2) + WS(rs, 2)]; + T3a = iio[WS(vs, 2) + WS(rs, 6)]; + T3b = T39 - T3a; + T3z = T39 + T3a; + } + T2H = T2D + T2G; + T3E = T2D - T2G; + T3Q = T3y + T3z; + T2U = T2Q + T2T; + T3c = T38 - T3b; + T3q = T2T - T2Q; + T3A = T3y - T3z; + T3m = T38 + T3b; + } + { + E T42, T4i, T4l, T4X, T45, T4d, T4g, T4Y; + { + E T40, T41, T4j, T4k; + T40 = rio[WS(vs, 3) + WS(rs, 1)]; + T41 = rio[WS(vs, 3) + WS(rs, 5)]; + T42 = T40 + T41; + T4i = T40 - T41; + T4j = iio[WS(vs, 3) + WS(rs, 1)]; + T4k = iio[WS(vs, 3) + WS(rs, 5)]; + T4l = T4j - T4k; + T4X = T4j + T4k; + } + { + E T43, T44, T4e, T4f; + T43 = rio[WS(vs, 3) + WS(rs, 7)]; + T44 = rio[WS(vs, 3) + WS(rs, 3)]; + T45 = T43 + T44; + T4d = T43 - T44; + T4e = iio[WS(vs, 3) + WS(rs, 7)]; + T4f = iio[WS(vs, 3) + WS(rs, 3)]; + T4g = T4e - T4f; + T4Y = T4e + T4f; + } + T46 = T42 + T45; + T4Z = T4X - T4Y; + T59 = T4X + T4Y; + T4h = T4d - T4g; + T4m = T4i + T4l; + T4w = T4d + T4g; + T4T = T45 - T42; + T4v = T4l - T4i; + } + { + E T5d, T5I, T5t, T68, T5g, T5q, T5L, T69; + { + E T5b, T5c, T5r, T5s; + T5b = rio[WS(vs, 4)]; + T5c = rio[WS(vs, 4) + WS(rs, 4)]; + T5d = T5b + T5c; + T5I = T5b - T5c; + T5r = iio[WS(vs, 4)]; + T5s = iio[WS(vs, 4) + WS(rs, 4)]; + T5t = T5r - T5s; + T68 = T5r + T5s; + } + { + E T5e, T5f, T5J, T5K; + T5e = rio[WS(vs, 4) + WS(rs, 2)]; + T5f = rio[WS(vs, 4) + WS(rs, 6)]; + T5g = T5e + T5f; + T5q = T5e - T5f; + T5J = iio[WS(vs, 4) + WS(rs, 2)]; + T5K = iio[WS(vs, 4) + WS(rs, 6)]; + T5L = T5J - T5K; + T69 = T5J + T5K; + } + T5h = T5d + T5g; + T6e = T5d - T5g; + T6q = T68 + T69; + T5u = T5q + T5t; + T5M = T5I - T5L; + T60 = T5t - T5q; + T6a = T68 - T69; + T5W = T5I + T5L; + } + { + E T6C, T6S, T6V, T7x, T6F, T6N, T6Q, T7y; + { + E T6A, T6B, T6T, T6U; + T6A = rio[WS(vs, 5) + WS(rs, 1)]; + T6B = rio[WS(vs, 5) + WS(rs, 5)]; + T6C = T6A + T6B; + T6S = T6A - T6B; + T6T = iio[WS(vs, 5) + WS(rs, 1)]; + T6U = iio[WS(vs, 5) + WS(rs, 5)]; + T6V = T6T - T6U; + T7x = T6T + T6U; + } + { + E T6D, T6E, T6O, T6P; + T6D = rio[WS(vs, 5) + WS(rs, 7)]; + T6E = rio[WS(vs, 5) + WS(rs, 3)]; + T6F = T6D + T6E; + T6N = T6D - T6E; + T6O = iio[WS(vs, 5) + WS(rs, 7)]; + T6P = iio[WS(vs, 5) + WS(rs, 3)]; + T6Q = T6O - T6P; + T7y = T6O + T6P; + } + T6G = T6C + T6F; + T7z = T7x - T7y; + T7J = T7x + T7y; + T6R = T6N - T6Q; + T6W = T6S + T6V; + T76 = T6N + T6Q; + T7t = T6F - T6C; + T75 = T6V - T6S; + } + { + E T2K, T30, T33, T3F, T2N, T2V, T2Y, T3G; + { + E T2I, T2J, T31, T32; + T2I = rio[WS(vs, 2) + WS(rs, 1)]; + T2J = rio[WS(vs, 2) + WS(rs, 5)]; + T2K = T2I + T2J; + T30 = T2I - T2J; + T31 = iio[WS(vs, 2) + WS(rs, 1)]; + T32 = iio[WS(vs, 2) + WS(rs, 5)]; + T33 = T31 - T32; + T3F = T31 + T32; + } + { + E T2L, T2M, T2W, T2X; + T2L = rio[WS(vs, 2) + WS(rs, 7)]; + T2M = rio[WS(vs, 2) + WS(rs, 3)]; + T2N = T2L + T2M; + T2V = T2L - T2M; + T2W = iio[WS(vs, 2) + WS(rs, 7)]; + T2X = iio[WS(vs, 2) + WS(rs, 3)]; + T2Y = T2W - T2X; + T3G = T2W + T2X; + } + T2O = T2K + T2N; + T3H = T3F - T3G; + T3R = T3F + T3G; + T2Z = T2V - T2Y; + T34 = T30 + T33; + T3e = T2V + T2Y; + T3B = T2N - T2K; + T3d = T33 - T30; + } + { + E T3V, T4q, T4b, T4Q, T3Y, T48, T4t, T4R; + { + E T3T, T3U, T49, T4a; + T3T = rio[WS(vs, 3)]; + T3U = rio[WS(vs, 3) + WS(rs, 4)]; + T3V = T3T + T3U; + T4q = T3T - T3U; + T49 = iio[WS(vs, 3)]; + T4a = iio[WS(vs, 3) + WS(rs, 4)]; + T4b = T49 - T4a; + T4Q = T49 + T4a; + } + { + E T3W, T3X, T4r, T4s; + T3W = rio[WS(vs, 3) + WS(rs, 2)]; + T3X = rio[WS(vs, 3) + WS(rs, 6)]; + T3Y = T3W + T3X; + T48 = T3W - T3X; + T4r = iio[WS(vs, 3) + WS(rs, 2)]; + T4s = iio[WS(vs, 3) + WS(rs, 6)]; + T4t = T4r - T4s; + T4R = T4r + T4s; + } + T3Z = T3V + T3Y; + T4W = T3V - T3Y; + T58 = T4Q + T4R; + T4c = T48 + T4b; + T4u = T4q - T4t; + T4I = T4b - T48; + T4S = T4Q - T4R; + T4E = T4q + T4t; + } + { + E T5k, T5A, T5D, T6f, T5n, T5v, T5y, T6g; + { + E T5i, T5j, T5B, T5C; + T5i = rio[WS(vs, 4) + WS(rs, 1)]; + T5j = rio[WS(vs, 4) + WS(rs, 5)]; + T5k = T5i + T5j; + T5A = T5i - T5j; + T5B = iio[WS(vs, 4) + WS(rs, 1)]; + T5C = iio[WS(vs, 4) + WS(rs, 5)]; + T5D = T5B - T5C; + T6f = T5B + T5C; + } + { + E T5l, T5m, T5w, T5x; + T5l = rio[WS(vs, 4) + WS(rs, 7)]; + T5m = rio[WS(vs, 4) + WS(rs, 3)]; + T5n = T5l + T5m; + T5v = T5l - T5m; + T5w = iio[WS(vs, 4) + WS(rs, 7)]; + T5x = iio[WS(vs, 4) + WS(rs, 3)]; + T5y = T5w - T5x; + T6g = T5w + T5x; + } + T5o = T5k + T5n; + T6h = T6f - T6g; + T6r = T6f + T6g; + T5z = T5v - T5y; + T5E = T5A + T5D; + T5O = T5v + T5y; + T6b = T5n - T5k; + T5N = T5D - T5A; + } + { + E T6v, T70, T6L, T7q, T6y, T6I, T73, T7r; + { + E T6t, T6u, T6J, T6K; + T6t = rio[WS(vs, 5)]; + T6u = rio[WS(vs, 5) + WS(rs, 4)]; + T6v = T6t + T6u; + T70 = T6t - T6u; + T6J = iio[WS(vs, 5)]; + T6K = iio[WS(vs, 5) + WS(rs, 4)]; + T6L = T6J - T6K; + T7q = T6J + T6K; + } + { + E T6w, T6x, T71, T72; + T6w = rio[WS(vs, 5) + WS(rs, 2)]; + T6x = rio[WS(vs, 5) + WS(rs, 6)]; + T6y = T6w + T6x; + T6I = T6w - T6x; + T71 = iio[WS(vs, 5) + WS(rs, 2)]; + T72 = iio[WS(vs, 5) + WS(rs, 6)]; + T73 = T71 - T72; + T7r = T71 + T72; + } + T6z = T6v + T6y; + T7w = T6v - T6y; + T7I = T7q + T7r; + T6M = T6I + T6L; + T74 = T70 - T73; + T7i = T6L - T6I; + T7s = T7q - T7r; + T7e = T70 + T73; + } + rio[0] = T7 + Te; + iio[0] = T1g + T1h; + rio[WS(rs, 1)] = T1p + T1w; + iio[WS(rs, 1)] = T2y + T2z; + rio[WS(rs, 3)] = T3Z + T46; + rio[WS(rs, 2)] = T2H + T2O; + iio[WS(rs, 2)] = T3Q + T3R; + iio[WS(rs, 3)] = T58 + T59; + rio[WS(rs, 6)] = T7R + T7Y; + iio[WS(rs, 6)] = T90 + T91; + iio[WS(rs, 5)] = T7I + T7J; + rio[WS(rs, 5)] = T6z + T6G; + iio[WS(rs, 4)] = T6q + T6r; + rio[WS(rs, 4)] = T5h + T5o; + rio[WS(rs, 7)] = T99 + T9g; + iio[WS(rs, 7)] = Tai + Taj; + { + E T12, T18, TX, T13; + T12 = T10 - T11; + T18 = T14 - T17; + TX = W[10]; + T13 = W[11]; + iio[WS(vs, 6)] = FNMS(T13, T18, TX * T12); + rio[WS(vs, 6)] = FMA(T13, T12, TX * T18); + } + { + E Tag, Tak, Taf, Tah; + Tag = T99 - T9g; + Tak = Tai - Taj; + Taf = W[6]; + Tah = W[7]; + rio[WS(vs, 4) + WS(rs, 7)] = FMA(Taf, Tag, Tah * Tak); + iio[WS(vs, 4) + WS(rs, 7)] = FNMS(Tah, Tag, Taf * Tak); + } + { + E T8M, T8S, T8H, T8N; + T8M = T8K - T8L; + T8S = T8O - T8R; + T8H = W[10]; + T8N = W[11]; + iio[WS(vs, 6) + WS(rs, 6)] = FNMS(T8N, T8S, T8H * T8M); + rio[WS(vs, 6) + WS(rs, 6)] = FMA(T8N, T8M, T8H * T8S); + } + { + E T2k, T2q, T2f, T2l; + T2k = T2i - T2j; + T2q = T2m - T2p; + T2f = W[10]; + T2l = W[11]; + iio[WS(vs, 6) + WS(rs, 1)] = FNMS(T2l, T2q, T2f * T2k); + rio[WS(vs, 6) + WS(rs, 1)] = FMA(T2l, T2k, T2f * T2q); + } + { + E Ta4, Taa, T9Z, Ta5; + Ta4 = Ta2 - Ta3; + Taa = Ta6 - Ta9; + T9Z = W[10]; + Ta5 = W[11]; + iio[WS(vs, 6) + WS(rs, 7)] = FNMS(Ta5, Taa, T9Z * Ta4); + rio[WS(vs, 6) + WS(rs, 7)] = FMA(Ta5, Ta4, T9Z * Taa); + } + { + E T8Y, T92, T8X, T8Z; + T8Y = T7R - T7Y; + T92 = T90 - T91; + T8X = W[6]; + T8Z = W[7]; + rio[WS(vs, 4) + WS(rs, 6)] = FMA(T8X, T8Y, T8Z * T92); + iio[WS(vs, 4) + WS(rs, 6)] = FNMS(T8Z, T8Y, T8X * T92); + } + { + E T2w, T2A, T2v, T2x; + T2w = T1p - T1w; + T2A = T2y - T2z; + T2v = W[6]; + T2x = W[7]; + rio[WS(vs, 4) + WS(rs, 1)] = FMA(T2v, T2w, T2x * T2A); + iio[WS(vs, 4) + WS(rs, 1)] = FNMS(T2x, T2w, T2v * T2A); + } + { + E Tac, Tae, Tab, Tad; + Tac = Ta3 + Ta2; + Tae = Ta6 + Ta9; + Tab = W[2]; + Tad = W[3]; + iio[WS(vs, 2) + WS(rs, 7)] = FNMS(Tad, Tae, Tab * Tac); + rio[WS(vs, 2) + WS(rs, 7)] = FMA(Tad, Tac, Tab * Tae); + } + { + E T8U, T8W, T8T, T8V; + T8U = T8L + T8K; + T8W = T8O + T8R; + T8T = W[2]; + T8V = W[3]; + iio[WS(vs, 2) + WS(rs, 6)] = FNMS(T8V, T8W, T8T * T8U); + rio[WS(vs, 2) + WS(rs, 6)] = FMA(T8V, T8U, T8T * T8W); + } + { + E T1a, T1c, T19, T1b; + T1a = T11 + T10; + T1c = T14 + T17; + T19 = W[2]; + T1b = W[3]; + iio[WS(vs, 2)] = FNMS(T1b, T1c, T19 * T1a); + rio[WS(vs, 2)] = FMA(T1b, T1a, T19 * T1c); + } + { + E T1e, T1i, T1d, T1f; + T1e = T7 - Te; + T1i = T1g - T1h; + T1d = W[6]; + T1f = W[7]; + rio[WS(vs, 4)] = FMA(T1d, T1e, T1f * T1i); + iio[WS(vs, 4)] = FNMS(T1f, T1e, T1d * T1i); + } + { + E T2s, T2u, T2r, T2t; + T2s = T2j + T2i; + T2u = T2m + T2p; + T2r = W[2]; + T2t = W[3]; + iio[WS(vs, 2) + WS(rs, 1)] = FNMS(T2t, T2u, T2r * T2s); + rio[WS(vs, 2) + WS(rs, 1)] = FMA(T2t, T2s, T2r * T2u); + } + { + E T3C, T3I, T3x, T3D; + T3C = T3A - T3B; + T3I = T3E - T3H; + T3x = W[10]; + T3D = W[11]; + iio[WS(vs, 6) + WS(rs, 2)] = FNMS(T3D, T3I, T3x * T3C); + rio[WS(vs, 6) + WS(rs, 2)] = FMA(T3D, T3C, T3x * T3I); + } + { + E T4U, T50, T4P, T4V; + T4U = T4S - T4T; + T50 = T4W - T4Z; + T4P = W[10]; + T4V = W[11]; + iio[WS(vs, 6) + WS(rs, 3)] = FNMS(T4V, T50, T4P * T4U); + rio[WS(vs, 6) + WS(rs, 3)] = FMA(T4V, T4U, T4P * T50); + } + { + E T56, T5a, T55, T57; + T56 = T3Z - T46; + T5a = T58 - T59; + T55 = W[6]; + T57 = W[7]; + rio[WS(vs, 4) + WS(rs, 3)] = FMA(T55, T56, T57 * T5a); + iio[WS(vs, 4) + WS(rs, 3)] = FNMS(T57, T56, T55 * T5a); + } + { + E T6o, T6s, T6n, T6p; + T6o = T5h - T5o; + T6s = T6q - T6r; + T6n = W[6]; + T6p = W[7]; + rio[WS(vs, 4) + WS(rs, 4)] = FMA(T6n, T6o, T6p * T6s); + iio[WS(vs, 4) + WS(rs, 4)] = FNMS(T6p, T6o, T6n * T6s); + } + { + E T7u, T7A, T7p, T7v; + T7u = T7s - T7t; + T7A = T7w - T7z; + T7p = W[10]; + T7v = W[11]; + iio[WS(vs, 6) + WS(rs, 5)] = FNMS(T7v, T7A, T7p * T7u); + rio[WS(vs, 6) + WS(rs, 5)] = FMA(T7v, T7u, T7p * T7A); + } + { + E T6c, T6i, T67, T6d; + T6c = T6a - T6b; + T6i = T6e - T6h; + T67 = W[10]; + T6d = W[11]; + iio[WS(vs, 6) + WS(rs, 4)] = FNMS(T6d, T6i, T67 * T6c); + rio[WS(vs, 6) + WS(rs, 4)] = FMA(T6d, T6c, T67 * T6i); + } + { + E T7G, T7K, T7F, T7H; + T7G = T6z - T6G; + T7K = T7I - T7J; + T7F = W[6]; + T7H = W[7]; + rio[WS(vs, 4) + WS(rs, 5)] = FMA(T7F, T7G, T7H * T7K); + iio[WS(vs, 4) + WS(rs, 5)] = FNMS(T7H, T7G, T7F * T7K); + } + { + E T3O, T3S, T3N, T3P; + T3O = T2H - T2O; + T3S = T3Q - T3R; + T3N = W[6]; + T3P = W[7]; + rio[WS(vs, 4) + WS(rs, 2)] = FMA(T3N, T3O, T3P * T3S); + iio[WS(vs, 4) + WS(rs, 2)] = FNMS(T3P, T3O, T3N * T3S); + } + { + E T3K, T3M, T3J, T3L; + T3K = T3B + T3A; + T3M = T3E + T3H; + T3J = W[2]; + T3L = W[3]; + iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T3L, T3M, T3J * T3K); + rio[WS(vs, 2) + WS(rs, 2)] = FMA(T3L, T3K, T3J * T3M); + } + { + E T7C, T7E, T7B, T7D; + T7C = T7t + T7s; + T7E = T7w + T7z; + T7B = W[2]; + T7D = W[3]; + iio[WS(vs, 2) + WS(rs, 5)] = FNMS(T7D, T7E, T7B * T7C); + rio[WS(vs, 2) + WS(rs, 5)] = FMA(T7D, T7C, T7B * T7E); + } + { + E T6k, T6m, T6j, T6l; + T6k = T6b + T6a; + T6m = T6e + T6h; + T6j = W[2]; + T6l = W[3]; + iio[WS(vs, 2) + WS(rs, 4)] = FNMS(T6l, T6m, T6j * T6k); + rio[WS(vs, 2) + WS(rs, 4)] = FMA(T6l, T6k, T6j * T6m); + } + { + E T52, T54, T51, T53; + T52 = T4T + T4S; + T54 = T4W + T4Z; + T51 = W[2]; + T53 = W[3]; + iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T53, T54, T51 * T52); + rio[WS(vs, 2) + WS(rs, 3)] = FMA(T53, T52, T51 * T54); + } + { + E T5G, T5S, T5Q, T5U, T5F, T5P; + T5F = KP707106781 * (T5z - T5E); + T5G = T5u - T5F; + T5S = T5u + T5F; + T5P = KP707106781 * (T5N - T5O); + T5Q = T5M - T5P; + T5U = T5M + T5P; + { + E T5p, T5H, T5R, T5T; + T5p = W[12]; + T5H = W[13]; + iio[WS(vs, 7) + WS(rs, 4)] = FNMS(T5H, T5Q, T5p * T5G); + rio[WS(vs, 7) + WS(rs, 4)] = FMA(T5H, T5G, T5p * T5Q); + T5R = W[4]; + T5T = W[5]; + iio[WS(vs, 3) + WS(rs, 4)] = FNMS(T5T, T5U, T5R * T5S); + rio[WS(vs, 3) + WS(rs, 4)] = FMA(T5T, T5S, T5R * T5U); + } + } + { + E Tw, TI, TG, TK, Tv, TF; + Tv = KP707106781 * (Tp - Tu); + Tw = Tk - Tv; + TI = Tk + Tv; + TF = KP707106781 * (TD - TE); + TG = TC - TF; + TK = TC + TF; + { + E Tf, Tx, TH, TJ; + Tf = W[12]; + Tx = W[13]; + iio[WS(vs, 7)] = FNMS(Tx, TG, Tf * Tw); + rio[WS(vs, 7)] = FMA(Tx, Tw, Tf * TG); + TH = W[4]; + TJ = W[5]; + iio[WS(vs, 3)] = FNMS(TJ, TK, TH * TI); + rio[WS(vs, 3)] = FMA(TJ, TI, TH * TK); + } + } + { + E T9Q, T9W, T9U, T9Y, T9P, T9T; + T9P = KP707106781 * (T9w + T9r); + T9Q = T9O - T9P; + T9W = T9O + T9P; + T9T = KP707106781 * (T9F + T9G); + T9U = T9S - T9T; + T9Y = T9S + T9T; + { + E T9N, T9R, T9V, T9X; + T9N = W[8]; + T9R = W[9]; + rio[WS(vs, 5) + WS(rs, 7)] = FMA(T9N, T9Q, T9R * T9U); + iio[WS(vs, 5) + WS(rs, 7)] = FNMS(T9R, T9Q, T9N * T9U); + T9V = W[0]; + T9X = W[1]; + rio[WS(vs, 1) + WS(rs, 7)] = FMA(T9V, T9W, T9X * T9Y); + iio[WS(vs, 1) + WS(rs, 7)] = FNMS(T9X, T9W, T9V * T9Y); + } + } + { + E T36, T3i, T3g, T3k, T35, T3f; + T35 = KP707106781 * (T2Z - T34); + T36 = T2U - T35; + T3i = T2U + T35; + T3f = KP707106781 * (T3d - T3e); + T3g = T3c - T3f; + T3k = T3c + T3f; + { + E T2P, T37, T3h, T3j; + T2P = W[12]; + T37 = W[13]; + iio[WS(vs, 7) + WS(rs, 2)] = FNMS(T37, T3g, T2P * T36); + rio[WS(vs, 7) + WS(rs, 2)] = FMA(T37, T36, T2P * T3g); + T3h = W[4]; + T3j = W[5]; + iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T3j, T3k, T3h * T3i); + rio[WS(vs, 3) + WS(rs, 2)] = FMA(T3j, T3i, T3h * T3k); + } + } + { + E T5Y, T64, T62, T66, T5X, T61; + T5X = KP707106781 * (T5E + T5z); + T5Y = T5W - T5X; + T64 = T5W + T5X; + T61 = KP707106781 * (T5N + T5O); + T62 = T60 - T61; + T66 = T60 + T61; + { + E T5V, T5Z, T63, T65; + T5V = W[8]; + T5Z = W[9]; + rio[WS(vs, 5) + WS(rs, 4)] = FMA(T5V, T5Y, T5Z * T62); + iio[WS(vs, 5) + WS(rs, 4)] = FNMS(T5Z, T5Y, T5V * T62); + T63 = W[0]; + T65 = W[1]; + rio[WS(vs, 1) + WS(rs, 4)] = FMA(T63, T64, T65 * T66); + iio[WS(vs, 1) + WS(rs, 4)] = FNMS(T65, T64, T63 * T66); + } + } + { + E T7g, T7m, T7k, T7o, T7f, T7j; + T7f = KP707106781 * (T6W + T6R); + T7g = T7e - T7f; + T7m = T7e + T7f; + T7j = KP707106781 * (T75 + T76); + T7k = T7i - T7j; + T7o = T7i + T7j; + { + E T7d, T7h, T7l, T7n; + T7d = W[8]; + T7h = W[9]; + rio[WS(vs, 5) + WS(rs, 5)] = FMA(T7d, T7g, T7h * T7k); + iio[WS(vs, 5) + WS(rs, 5)] = FNMS(T7h, T7g, T7d * T7k); + T7l = W[0]; + T7n = W[1]; + rio[WS(vs, 1) + WS(rs, 5)] = FMA(T7l, T7m, T7n * T7o); + iio[WS(vs, 1) + WS(rs, 5)] = FNMS(T7n, T7m, T7l * T7o); + } + } + { + E T8g, T8s, T8q, T8u, T8f, T8p; + T8f = KP707106781 * (T89 - T8e); + T8g = T84 - T8f; + T8s = T84 + T8f; + T8p = KP707106781 * (T8n - T8o); + T8q = T8m - T8p; + T8u = T8m + T8p; + { + E T7Z, T8h, T8r, T8t; + T7Z = W[12]; + T8h = W[13]; + iio[WS(vs, 7) + WS(rs, 6)] = FNMS(T8h, T8q, T7Z * T8g); + rio[WS(vs, 7) + WS(rs, 6)] = FMA(T8h, T8g, T7Z * T8q); + T8r = W[4]; + T8t = W[5]; + iio[WS(vs, 3) + WS(rs, 6)] = FNMS(T8t, T8u, T8r * T8s); + rio[WS(vs, 3) + WS(rs, 6)] = FMA(T8t, T8s, T8r * T8u); + } + } + { + E T4G, T4M, T4K, T4O, T4F, T4J; + T4F = KP707106781 * (T4m + T4h); + T4G = T4E - T4F; + T4M = T4E + T4F; + T4J = KP707106781 * (T4v + T4w); + T4K = T4I - T4J; + T4O = T4I + T4J; + { + E T4D, T4H, T4L, T4N; + T4D = W[8]; + T4H = W[9]; + rio[WS(vs, 5) + WS(rs, 3)] = FMA(T4D, T4G, T4H * T4K); + iio[WS(vs, 5) + WS(rs, 3)] = FNMS(T4H, T4G, T4D * T4K); + T4L = W[0]; + T4N = W[1]; + rio[WS(vs, 1) + WS(rs, 3)] = FMA(T4L, T4M, T4N * T4O); + iio[WS(vs, 1) + WS(rs, 3)] = FNMS(T4N, T4M, T4L * T4O); + } + } + { + E TO, TU, TS, TW, TN, TR; + TN = KP707106781 * (Tu + Tp); + TO = TM - TN; + TU = TM + TN; + TR = KP707106781 * (TD + TE); + TS = TQ - TR; + TW = TQ + TR; + { + E TL, TP, TT, TV; + TL = W[8]; + TP = W[9]; + rio[WS(vs, 5)] = FMA(TL, TO, TP * TS); + iio[WS(vs, 5)] = FNMS(TP, TO, TL * TS); + TT = W[0]; + TV = W[1]; + rio[WS(vs, 1)] = FMA(TT, TU, TV * TW); + iio[WS(vs, 1)] = FNMS(TV, TU, TT * TW); + } + } + { + E T26, T2c, T2a, T2e, T25, T29; + T25 = KP707106781 * (T1M + T1H); + T26 = T24 - T25; + T2c = T24 + T25; + T29 = KP707106781 * (T1V + T1W); + T2a = T28 - T29; + T2e = T28 + T29; + { + E T23, T27, T2b, T2d; + T23 = W[8]; + T27 = W[9]; + rio[WS(vs, 5) + WS(rs, 1)] = FMA(T23, T26, T27 * T2a); + iio[WS(vs, 5) + WS(rs, 1)] = FNMS(T27, T26, T23 * T2a); + T2b = W[0]; + T2d = W[1]; + rio[WS(vs, 1) + WS(rs, 1)] = FMA(T2b, T2c, T2d * T2e); + iio[WS(vs, 1) + WS(rs, 1)] = FNMS(T2d, T2c, T2b * T2e); + } + } + { + E T9y, T9K, T9I, T9M, T9x, T9H; + T9x = KP707106781 * (T9r - T9w); + T9y = T9m - T9x; + T9K = T9m + T9x; + T9H = KP707106781 * (T9F - T9G); + T9I = T9E - T9H; + T9M = T9E + T9H; + { + E T9h, T9z, T9J, T9L; + T9h = W[12]; + T9z = W[13]; + iio[WS(vs, 7) + WS(rs, 7)] = FNMS(T9z, T9I, T9h * T9y); + rio[WS(vs, 7) + WS(rs, 7)] = FMA(T9z, T9y, T9h * T9I); + T9J = W[4]; + T9L = W[5]; + iio[WS(vs, 3) + WS(rs, 7)] = FNMS(T9L, T9M, T9J * T9K); + rio[WS(vs, 3) + WS(rs, 7)] = FMA(T9L, T9K, T9J * T9M); + } + } + { + E T6Y, T7a, T78, T7c, T6X, T77; + T6X = KP707106781 * (T6R - T6W); + T6Y = T6M - T6X; + T7a = T6M + T6X; + T77 = KP707106781 * (T75 - T76); + T78 = T74 - T77; + T7c = T74 + T77; + { + E T6H, T6Z, T79, T7b; + T6H = W[12]; + T6Z = W[13]; + iio[WS(vs, 7) + WS(rs, 5)] = FNMS(T6Z, T78, T6H * T6Y); + rio[WS(vs, 7) + WS(rs, 5)] = FMA(T6Z, T6Y, T6H * T78); + T79 = W[4]; + T7b = W[5]; + iio[WS(vs, 3) + WS(rs, 5)] = FNMS(T7b, T7c, T79 * T7a); + rio[WS(vs, 3) + WS(rs, 5)] = FMA(T7b, T7a, T79 * T7c); + } + } + { + E T1O, T20, T1Y, T22, T1N, T1X; + T1N = KP707106781 * (T1H - T1M); + T1O = T1C - T1N; + T20 = T1C + T1N; + T1X = KP707106781 * (T1V - T1W); + T1Y = T1U - T1X; + T22 = T1U + T1X; + { + E T1x, T1P, T1Z, T21; + T1x = W[12]; + T1P = W[13]; + iio[WS(vs, 7) + WS(rs, 1)] = FNMS(T1P, T1Y, T1x * T1O); + rio[WS(vs, 7) + WS(rs, 1)] = FMA(T1P, T1O, T1x * T1Y); + T1Z = W[4]; + T21 = W[5]; + iio[WS(vs, 3) + WS(rs, 1)] = FNMS(T21, T22, T1Z * T20); + rio[WS(vs, 3) + WS(rs, 1)] = FMA(T21, T20, T1Z * T22); + } + } + { + E T4o, T4A, T4y, T4C, T4n, T4x; + T4n = KP707106781 * (T4h - T4m); + T4o = T4c - T4n; + T4A = T4c + T4n; + T4x = KP707106781 * (T4v - T4w); + T4y = T4u - T4x; + T4C = T4u + T4x; + { + E T47, T4p, T4z, T4B; + T47 = W[12]; + T4p = W[13]; + iio[WS(vs, 7) + WS(rs, 3)] = FNMS(T4p, T4y, T47 * T4o); + rio[WS(vs, 7) + WS(rs, 3)] = FMA(T4p, T4o, T47 * T4y); + T4z = W[4]; + T4B = W[5]; + iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T4B, T4C, T4z * T4A); + rio[WS(vs, 3) + WS(rs, 3)] = FMA(T4B, T4A, T4z * T4C); + } + } + { + E T3o, T3u, T3s, T3w, T3n, T3r; + T3n = KP707106781 * (T34 + T2Z); + T3o = T3m - T3n; + T3u = T3m + T3n; + T3r = KP707106781 * (T3d + T3e); + T3s = T3q - T3r; + T3w = T3q + T3r; + { + E T3l, T3p, T3t, T3v; + T3l = W[8]; + T3p = W[9]; + rio[WS(vs, 5) + WS(rs, 2)] = FMA(T3l, T3o, T3p * T3s); + iio[WS(vs, 5) + WS(rs, 2)] = FNMS(T3p, T3o, T3l * T3s); + T3t = W[0]; + T3v = W[1]; + rio[WS(vs, 1) + WS(rs, 2)] = FMA(T3t, T3u, T3v * T3w); + iio[WS(vs, 1) + WS(rs, 2)] = FNMS(T3v, T3u, T3t * T3w); + } + } + { + E T8y, T8E, T8C, T8G, T8x, T8B; + T8x = KP707106781 * (T8e + T89); + T8y = T8w - T8x; + T8E = T8w + T8x; + T8B = KP707106781 * (T8n + T8o); + T8C = T8A - T8B; + T8G = T8A + T8B; + { + E T8v, T8z, T8D, T8F; + T8v = W[8]; + T8z = W[9]; + rio[WS(vs, 5) + WS(rs, 6)] = FMA(T8v, T8y, T8z * T8C); + iio[WS(vs, 5) + WS(rs, 6)] = FNMS(T8z, T8y, T8v * T8C); + T8D = W[0]; + T8F = W[1]; + rio[WS(vs, 1) + WS(rs, 6)] = FMA(T8D, T8E, T8F * T8G); + iio[WS(vs, 1) + WS(rs, 6)] = FNMS(T8F, T8E, T8D * T8G); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 8}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 8, "q1_8", twinstr, &GENUS, {416, 144, 112, 0}, 0, 0, 0 }; + +void X(codelet_q1_8) (planner *p) { + X(kdft_difsq_register) (p, q1_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,501 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:51 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 10 -name t1_10 -include t.h */ + +/* + * This function contains 102 FP additions, 72 FP multiplications, + * (or, 48 additions, 18 multiplications, 54 fused multiply/add), + * 70 stack variables, 4 constants, and 40 memory accesses + */ +#include "t.h" + +static void t1_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + (mb * 18); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 18, MAKE_VOLATILE_STRIDE(20, rs)) { + E T1X, T21, T20, T22; + { + E T23, T1U, T8, T12, T1y, T25, T1P, T1H, T1Y, T18, T10, T2b, T1K, T1O, T15; + E T1Z, T2a, Tz, T24, T1n; + { + E T1, T1T, T3, T6, T2, T5; + T1 = ri[0]; + T1T = ii[0]; + T3 = ri[WS(rs, 5)]; + T6 = ii[WS(rs, 5)]; + T2 = W[8]; + T5 = W[9]; + { + E T1w, TY, T1s, T1F, TM, T16, T1u, TS; + { + E TF, T1p, TO, TR, T1r, TL, TN, TQ, T1t, TP; + { + E TU, TX, TT, TW; + { + E TB, TE, T1R, T4, TA, TD; + TB = ri[WS(rs, 4)]; + TE = ii[WS(rs, 4)]; + T1R = T2 * T6; + T4 = T2 * T3; + TA = W[6]; + TD = W[7]; + { + E T1S, T7, T1o, TC; + T1S = FNMS(T5, T3, T1R); + T7 = FMA(T5, T6, T4); + T1o = TA * TE; + TC = TA * TB; + T23 = T1T - T1S; + T1U = T1S + T1T; + T8 = T1 - T7; + T12 = T1 + T7; + TF = FMA(TD, TE, TC); + T1p = FNMS(TD, TB, T1o); + } + } + TU = ri[WS(rs, 1)]; + TX = ii[WS(rs, 1)]; + TT = W[0]; + TW = W[1]; + { + E TH, TK, TJ, T1q, TI, T1v, TV, TG; + TH = ri[WS(rs, 9)]; + TK = ii[WS(rs, 9)]; + T1v = TT * TX; + TV = TT * TU; + TG = W[16]; + TJ = W[17]; + T1w = FNMS(TW, TU, T1v); + TY = FMA(TW, TX, TV); + T1q = TG * TK; + TI = TG * TH; + TO = ri[WS(rs, 6)]; + TR = ii[WS(rs, 6)]; + T1r = FNMS(TJ, TH, T1q); + TL = FMA(TJ, TK, TI); + TN = W[10]; + TQ = W[11]; + } + } + T1s = T1p - T1r; + T1F = T1p + T1r; + TM = TF - TL; + T16 = TF + TL; + T1t = TN * TR; + TP = TN * TO; + T1u = FNMS(TQ, TO, T1t); + TS = FMA(TQ, TR, TP); + } + { + E T1e, Te, T1l, Tx, Tn, Tq, Tp, T1g, Tk, T1i, To; + { + E Tt, Tw, Tv, T1k, Tu; + { + E Ta, Td, T9, Tc, T1d, Tb, Ts; + Ta = ri[WS(rs, 2)]; + Td = ii[WS(rs, 2)]; + { + E T1G, T1x, TZ, T17; + T1G = T1u + T1w; + T1x = T1u - T1w; + TZ = TS - TY; + T17 = TS + TY; + T1y = T1s - T1x; + T25 = T1s + T1x; + T1P = T1F + T1G; + T1H = T1F - T1G; + T1Y = T16 - T17; + T18 = T16 + T17; + T10 = TM + TZ; + T2b = TM - TZ; + T9 = W[2]; + } + Tc = W[3]; + Tt = ri[WS(rs, 3)]; + Tw = ii[WS(rs, 3)]; + T1d = T9 * Td; + Tb = T9 * Ta; + Ts = W[4]; + Tv = W[5]; + T1e = FNMS(Tc, Ta, T1d); + Te = FMA(Tc, Td, Tb); + T1k = Ts * Tw; + Tu = Ts * Tt; + } + { + E Tg, Tj, Tf, Ti, T1f, Th, Tm; + Tg = ri[WS(rs, 7)]; + Tj = ii[WS(rs, 7)]; + T1l = FNMS(Tv, Tt, T1k); + Tx = FMA(Tv, Tw, Tu); + Tf = W[12]; + Ti = W[13]; + Tn = ri[WS(rs, 8)]; + Tq = ii[WS(rs, 8)]; + T1f = Tf * Tj; + Th = Tf * Tg; + Tm = W[14]; + Tp = W[15]; + T1g = FNMS(Ti, Tg, T1f); + Tk = FMA(Ti, Tj, Th); + T1i = Tm * Tq; + To = Tm * Tn; + } + } + { + E T1h, T1I, Tl, T13, T1j, Tr; + T1h = T1e - T1g; + T1I = T1e + T1g; + Tl = Te - Tk; + T13 = Te + Tk; + T1j = FNMS(Tp, Tn, T1i); + Tr = FMA(Tp, Tq, To); + { + E T1m, T1J, T14, Ty; + T1m = T1j - T1l; + T1J = T1j + T1l; + T14 = Tr + Tx; + Ty = Tr - Tx; + T1K = T1I - T1J; + T1O = T1I + T1J; + T15 = T13 + T14; + T1Z = T13 - T14; + T2a = Tl - Ty; + Tz = Tl + Ty; + T24 = T1h + T1m; + T1n = T1h - T1m; + } + } + } + } + } + { + E T2c, T2e, T29, T2d; + { + E T1b, T11, T26, T28, T27; + T1b = Tz - T10; + T11 = Tz + T10; + T26 = T24 + T25; + T28 = T24 - T25; + { + E T1B, T1z, T1a, T1A, T1c; + T1B = FNMS(KP618033988, T1n, T1y); + T1z = FMA(KP618033988, T1y, T1n); + ri[WS(rs, 5)] = T8 + T11; + T1a = FNMS(KP250000000, T11, T8); + T1A = FNMS(KP559016994, T1b, T1a); + T1c = FMA(KP559016994, T1b, T1a); + T27 = FNMS(KP250000000, T26, T23); + T2c = FMA(KP618033988, T2b, T2a); + T2e = FNMS(KP618033988, T2a, T2b); + ri[WS(rs, 1)] = FMA(KP951056516, T1z, T1c); + ri[WS(rs, 9)] = FNMS(KP951056516, T1z, T1c); + ri[WS(rs, 3)] = FMA(KP951056516, T1B, T1A); + ri[WS(rs, 7)] = FNMS(KP951056516, T1B, T1A); + } + ii[WS(rs, 5)] = T26 + T23; + T29 = FMA(KP559016994, T28, T27); + T2d = FNMS(KP559016994, T28, T27); + } + { + E T1E, T1M, T1L, T1N, T19, T1D, T1C, T1Q, T1W, T1V; + T19 = T15 + T18; + T1D = T15 - T18; + ii[WS(rs, 7)] = FMA(KP951056516, T2e, T2d); + ii[WS(rs, 3)] = FNMS(KP951056516, T2e, T2d); + ii[WS(rs, 9)] = FMA(KP951056516, T2c, T29); + ii[WS(rs, 1)] = FNMS(KP951056516, T2c, T29); + T1C = FNMS(KP250000000, T19, T12); + ri[0] = T12 + T19; + T1E = FNMS(KP559016994, T1D, T1C); + T1M = FMA(KP559016994, T1D, T1C); + T1L = FNMS(KP618033988, T1K, T1H); + T1N = FMA(KP618033988, T1H, T1K); + T1Q = T1O + T1P; + T1W = T1O - T1P; + ri[WS(rs, 6)] = FMA(KP951056516, T1N, T1M); + ri[WS(rs, 4)] = FNMS(KP951056516, T1N, T1M); + ri[WS(rs, 8)] = FMA(KP951056516, T1L, T1E); + ri[WS(rs, 2)] = FNMS(KP951056516, T1L, T1E); + T1V = FNMS(KP250000000, T1Q, T1U); + ii[0] = T1Q + T1U; + T1X = FNMS(KP559016994, T1W, T1V); + T21 = FMA(KP559016994, T1W, T1V); + T20 = FNMS(KP618033988, T1Z, T1Y); + T22 = FMA(KP618033988, T1Y, T1Z); + } + } + } + ii[WS(rs, 6)] = FNMS(KP951056516, T22, T21); + ii[WS(rs, 4)] = FMA(KP951056516, T22, T21); + ii[WS(rs, 8)] = FNMS(KP951056516, T20, T1X); + ii[WS(rs, 2)] = FMA(KP951056516, T20, T1X); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 10}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 10, "t1_10", twinstr, &GENUS, {48, 18, 54, 0}, 0, 0, 0 }; + +void X(codelet_t1_10) (planner *p) { + X(kdft_dit_register) (p, t1_10, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 10 -name t1_10 -include t.h */ + +/* + * This function contains 102 FP additions, 60 FP multiplications, + * (or, 72 additions, 30 multiplications, 30 fused multiply/add), + * 45 stack variables, 4 constants, and 40 memory accesses + */ +#include "t.h" + +static void t1_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + (mb * 18); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 18, MAKE_VOLATILE_STRIDE(20, rs)) { + E T7, T1O, TT, T1C, TF, TQ, TR, T1o, T1p, T1y, TX, TY, TZ, T1d, T1g; + E T1M, Ti, Tt, Tu, T1r, T1s, T1x, TU, TV, TW, T16, T19, T1L; + { + E T1, T1B, T6, T1A; + T1 = ri[0]; + T1B = ii[0]; + { + E T3, T5, T2, T4; + T3 = ri[WS(rs, 5)]; + T5 = ii[WS(rs, 5)]; + T2 = W[8]; + T4 = W[9]; + T6 = FMA(T2, T3, T4 * T5); + T1A = FNMS(T4, T3, T2 * T5); + } + T7 = T1 - T6; + T1O = T1B - T1A; + TT = T1 + T6; + T1C = T1A + T1B; + } + { + E Tz, T1b, TP, T1f, TE, T1c, TK, T1e; + { + E Tw, Ty, Tv, Tx; + Tw = ri[WS(rs, 4)]; + Ty = ii[WS(rs, 4)]; + Tv = W[6]; + Tx = W[7]; + Tz = FMA(Tv, Tw, Tx * Ty); + T1b = FNMS(Tx, Tw, Tv * Ty); + } + { + E TM, TO, TL, TN; + TM = ri[WS(rs, 1)]; + TO = ii[WS(rs, 1)]; + TL = W[0]; + TN = W[1]; + TP = FMA(TL, TM, TN * TO); + T1f = FNMS(TN, TM, TL * TO); + } + { + E TB, TD, TA, TC; + TB = ri[WS(rs, 9)]; + TD = ii[WS(rs, 9)]; + TA = W[16]; + TC = W[17]; + TE = FMA(TA, TB, TC * TD); + T1c = FNMS(TC, TB, TA * TD); + } + { + E TH, TJ, TG, TI; + TH = ri[WS(rs, 6)]; + TJ = ii[WS(rs, 6)]; + TG = W[10]; + TI = W[11]; + TK = FMA(TG, TH, TI * TJ); + T1e = FNMS(TI, TH, TG * TJ); + } + TF = Tz - TE; + TQ = TK - TP; + TR = TF + TQ; + T1o = T1b + T1c; + T1p = T1e + T1f; + T1y = T1o + T1p; + TX = Tz + TE; + TY = TK + TP; + TZ = TX + TY; + T1d = T1b - T1c; + T1g = T1e - T1f; + T1M = T1d + T1g; + } + { + E Tc, T14, Ts, T18, Th, T15, Tn, T17; + { + E T9, Tb, T8, Ta; + T9 = ri[WS(rs, 2)]; + Tb = ii[WS(rs, 2)]; + T8 = W[2]; + Ta = W[3]; + Tc = FMA(T8, T9, Ta * Tb); + T14 = FNMS(Ta, T9, T8 * Tb); + } + { + E Tp, Tr, To, Tq; + Tp = ri[WS(rs, 3)]; + Tr = ii[WS(rs, 3)]; + To = W[4]; + Tq = W[5]; + Ts = FMA(To, Tp, Tq * Tr); + T18 = FNMS(Tq, Tp, To * Tr); + } + { + E Te, Tg, Td, Tf; + Te = ri[WS(rs, 7)]; + Tg = ii[WS(rs, 7)]; + Td = W[12]; + Tf = W[13]; + Th = FMA(Td, Te, Tf * Tg); + T15 = FNMS(Tf, Te, Td * Tg); + } + { + E Tk, Tm, Tj, Tl; + Tk = ri[WS(rs, 8)]; + Tm = ii[WS(rs, 8)]; + Tj = W[14]; + Tl = W[15]; + Tn = FMA(Tj, Tk, Tl * Tm); + T17 = FNMS(Tl, Tk, Tj * Tm); + } + Ti = Tc - Th; + Tt = Tn - Ts; + Tu = Ti + Tt; + T1r = T14 + T15; + T1s = T17 + T18; + T1x = T1r + T1s; + TU = Tc + Th; + TV = Tn + Ts; + TW = TU + TV; + T16 = T14 - T15; + T19 = T17 - T18; + T1L = T16 + T19; + } + { + E T11, TS, T12, T1i, T1k, T1a, T1h, T1j, T13; + T11 = KP559016994 * (Tu - TR); + TS = Tu + TR; + T12 = FNMS(KP250000000, TS, T7); + T1a = T16 - T19; + T1h = T1d - T1g; + T1i = FMA(KP951056516, T1a, KP587785252 * T1h); + T1k = FNMS(KP587785252, T1a, KP951056516 * T1h); + ri[WS(rs, 5)] = T7 + TS; + T1j = T12 - T11; + ri[WS(rs, 7)] = T1j - T1k; + ri[WS(rs, 3)] = T1j + T1k; + T13 = T11 + T12; + ri[WS(rs, 9)] = T13 - T1i; + ri[WS(rs, 1)] = T13 + T1i; + } + { + E T1N, T1P, T1Q, T1U, T1W, T1S, T1T, T1V, T1R; + T1N = KP559016994 * (T1L - T1M); + T1P = T1L + T1M; + T1Q = FNMS(KP250000000, T1P, T1O); + T1S = Ti - Tt; + T1T = TF - TQ; + T1U = FMA(KP951056516, T1S, KP587785252 * T1T); + T1W = FNMS(KP587785252, T1S, KP951056516 * T1T); + ii[WS(rs, 5)] = T1P + T1O; + T1V = T1Q - T1N; + ii[WS(rs, 3)] = T1V - T1W; + ii[WS(rs, 7)] = T1W + T1V; + T1R = T1N + T1Q; + ii[WS(rs, 1)] = T1R - T1U; + ii[WS(rs, 9)] = T1U + T1R; + } + { + E T1m, T10, T1l, T1u, T1w, T1q, T1t, T1v, T1n; + T1m = KP559016994 * (TW - TZ); + T10 = TW + TZ; + T1l = FNMS(KP250000000, T10, TT); + T1q = T1o - T1p; + T1t = T1r - T1s; + T1u = FNMS(KP587785252, T1t, KP951056516 * T1q); + T1w = FMA(KP951056516, T1t, KP587785252 * T1q); + ri[0] = TT + T10; + T1v = T1m + T1l; + ri[WS(rs, 4)] = T1v - T1w; + ri[WS(rs, 6)] = T1v + T1w; + T1n = T1l - T1m; + ri[WS(rs, 2)] = T1n - T1u; + ri[WS(rs, 8)] = T1n + T1u; + } + { + E T1H, T1z, T1G, T1F, T1J, T1D, T1E, T1K, T1I; + T1H = KP559016994 * (T1x - T1y); + T1z = T1x + T1y; + T1G = FNMS(KP250000000, T1z, T1C); + T1D = TX - TY; + T1E = TU - TV; + T1F = FNMS(KP587785252, T1E, KP951056516 * T1D); + T1J = FMA(KP951056516, T1E, KP587785252 * T1D); + ii[0] = T1z + T1C; + T1K = T1H + T1G; + ii[WS(rs, 4)] = T1J + T1K; + ii[WS(rs, 6)] = T1K - T1J; + T1I = T1G - T1H; + ii[WS(rs, 2)] = T1F + T1I; + ii[WS(rs, 8)] = T1I - T1F; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 10}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 10, "t1_10", twinstr, &GENUS, {72, 30, 30, 0}, 0, 0, 0 }; + +void X(codelet_t1_10) (planner *p) { + X(kdft_dit_register) (p, t1_10, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,566 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:52 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 12 -name t1_12 -include t.h */ + +/* + * This function contains 118 FP additions, 68 FP multiplications, + * (or, 72 additions, 22 multiplications, 46 fused multiply/add), + * 84 stack variables, 2 constants, and 48 memory accesses + */ +#include "t.h" + +static void t1_12(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + (mb * 22); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 22, MAKE_VOLATILE_STRIDE(24, rs)) { + E T2B, T2C; + { + E T1, T2i, T2e, Tl, T1Y, T10, T1S, TG, T2f, T1s, T2r, Ty, T1Z, T1H, T21; + E T1d, TI, TL, T2h, T1l, T2o, Te, TJ, T1w, TO, TR, TN, TK, TQ; + { + E TW, TZ, TY, T1X, TX; + T1 = ri[0]; + T2i = ii[0]; + { + E Th, Tk, Tg, Tj, T2d, Ti, TV; + Th = ri[WS(rs, 6)]; + Tk = ii[WS(rs, 6)]; + Tg = W[10]; + Tj = W[11]; + TW = ri[WS(rs, 9)]; + TZ = ii[WS(rs, 9)]; + T2d = Tg * Tk; + Ti = Tg * Th; + TV = W[16]; + TY = W[17]; + T2e = FNMS(Tj, Th, T2d); + Tl = FMA(Tj, Tk, Ti); + T1X = TV * TZ; + TX = TV * TW; + } + { + E Tn, Tq, Tt, T1o, To, Tw, Ts, Tp, Tv; + { + E TC, TF, TB, TE, T1R, TD, Tm; + TC = ri[WS(rs, 3)]; + TF = ii[WS(rs, 3)]; + T1Y = FNMS(TY, TW, T1X); + T10 = FMA(TY, TZ, TX); + TB = W[4]; + TE = W[5]; + Tn = ri[WS(rs, 10)]; + Tq = ii[WS(rs, 10)]; + T1R = TB * TF; + TD = TB * TC; + Tm = W[18]; + Tt = ri[WS(rs, 2)]; + T1S = FNMS(TE, TC, T1R); + TG = FMA(TE, TF, TD); + T1o = Tm * Tq; + To = Tm * Tn; + Tw = ii[WS(rs, 2)]; + Ts = W[2]; + Tp = W[19]; + Tv = W[3]; + } + { + E T12, T15, T13, T1D, T18, T1b, T17, T14, T1a; + { + E T1p, Tr, T1r, Tx, T1q, Tu, T11; + T12 = ri[WS(rs, 1)]; + T1q = Ts * Tw; + Tu = Ts * Tt; + T1p = FNMS(Tp, Tn, T1o); + Tr = FMA(Tp, Tq, To); + T1r = FNMS(Tv, Tt, T1q); + Tx = FMA(Tv, Tw, Tu); + T15 = ii[WS(rs, 1)]; + T11 = W[0]; + T2f = T1p + T1r; + T1s = T1p - T1r; + T2r = Tx - Tr; + Ty = Tr + Tx; + T13 = T11 * T12; + T1D = T11 * T15; + } + T18 = ri[WS(rs, 5)]; + T1b = ii[WS(rs, 5)]; + T17 = W[8]; + T14 = W[1]; + T1a = W[9]; + { + E T3, T6, T4, T1h, T9, Tc, T8, T5, Tb; + { + E T1E, T16, T1G, T1c, T1F, T19, T2; + T3 = ri[WS(rs, 4)]; + T1F = T17 * T1b; + T19 = T17 * T18; + T1E = FNMS(T14, T12, T1D); + T16 = FMA(T14, T15, T13); + T1G = FNMS(T1a, T18, T1F); + T1c = FMA(T1a, T1b, T19); + T6 = ii[WS(rs, 4)]; + T2 = W[6]; + T1Z = T1E + T1G; + T1H = T1E - T1G; + T21 = T1c - T16; + T1d = T16 + T1c; + T4 = T2 * T3; + T1h = T2 * T6; + } + T9 = ri[WS(rs, 8)]; + Tc = ii[WS(rs, 8)]; + T8 = W[14]; + T5 = W[7]; + Tb = W[15]; + { + E T1i, T7, T1k, Td, T1j, Ta, TH; + TI = ri[WS(rs, 7)]; + T1j = T8 * Tc; + Ta = T8 * T9; + T1i = FNMS(T5, T3, T1h); + T7 = FMA(T5, T6, T4); + T1k = FNMS(Tb, T9, T1j); + Td = FMA(Tb, Tc, Ta); + TL = ii[WS(rs, 7)]; + TH = W[12]; + T2h = T1i + T1k; + T1l = T1i - T1k; + T2o = Td - T7; + Te = T7 + Td; + TJ = TH * TI; + T1w = TH * TL; + } + TO = ri[WS(rs, 11)]; + TR = ii[WS(rs, 11)]; + TN = W[20]; + TK = W[13]; + TQ = W[21]; + } + } + } + } + { + E T1g, T1n, T2q, T1A, T1V, T28, TA, T2n, T1v, T1C, T1U, T29, T2m, T2k, T2l; + E T1f, T2a, T20; + { + E T2g, T1T, TT, T2j, TU, T1e; + { + E Tf, T1x, TM, T1z, TS, Tz, T1y, TP; + T1g = FNMS(KP500000000, Te, T1); + Tf = T1 + Te; + T1y = TN * TR; + TP = TN * TO; + T1x = FNMS(TK, TI, T1w); + TM = FMA(TK, TL, TJ); + T1z = FNMS(TQ, TO, T1y); + TS = FMA(TQ, TR, TP); + Tz = Tl + Ty; + T1n = FNMS(KP500000000, Ty, Tl); + T2q = FNMS(KP500000000, T2f, T2e); + T2g = T2e + T2f; + T1T = T1x + T1z; + T1A = T1x - T1z; + T1V = TS - TM; + TT = TM + TS; + T28 = Tf - Tz; + TA = Tf + Tz; + T2j = T2h + T2i; + T2n = FNMS(KP500000000, T2h, T2i); + } + T1v = FNMS(KP500000000, TT, TG); + TU = TG + TT; + T1e = T10 + T1d; + T1C = FNMS(KP500000000, T1d, T10); + T1U = FNMS(KP500000000, T1T, T1S); + T29 = T1S + T1T; + T2m = T2j - T2g; + T2k = T2g + T2j; + T2l = TU - T1e; + T1f = TU + T1e; + T2a = T1Y + T1Z; + T20 = FNMS(KP500000000, T1Z, T1Y); + } + { + E T1m, T1K, T2y, T2p, T2x, T2s, T1L, T1t, T1B, T1N, T2c, T2b; + ii[WS(rs, 9)] = T2m - T2l; + ii[WS(rs, 3)] = T2l + T2m; + ri[0] = TA + T1f; + ri[WS(rs, 6)] = TA - T1f; + T2c = T29 + T2a; + T2b = T29 - T2a; + T1m = FNMS(KP866025403, T1l, T1g); + T1K = FMA(KP866025403, T1l, T1g); + ii[0] = T2c + T2k; + ii[WS(rs, 6)] = T2k - T2c; + ri[WS(rs, 9)] = T28 + T2b; + ri[WS(rs, 3)] = T28 - T2b; + T2y = FNMS(KP866025403, T2o, T2n); + T2p = FMA(KP866025403, T2o, T2n); + T2x = FNMS(KP866025403, T2r, T2q); + T2s = FMA(KP866025403, T2r, T2q); + T1L = FMA(KP866025403, T1s, T1n); + T1t = FNMS(KP866025403, T1s, T1n); + T1B = FNMS(KP866025403, T1A, T1v); + T1N = FMA(KP866025403, T1A, T1v); + { + E T24, T27, T1Q, T2u, T23, T2v, T2w, T2t; + { + E T1u, T1W, T22, T1O, T1I, T2z, T2A, T25, T26, T1M, T1J, T1P; + T24 = T1m - T1t; + T1u = T1m + T1t; + T25 = FNMS(KP866025403, T1V, T1U); + T1W = FMA(KP866025403, T1V, T1U); + T26 = FNMS(KP866025403, T21, T20); + T22 = FMA(KP866025403, T21, T20); + T1O = FMA(KP866025403, T1H, T1C); + T1I = FNMS(KP866025403, T1H, T1C); + T2z = T2x + T2y; + T2B = T2y - T2x; + T27 = T25 - T26; + T2A = T25 + T26; + T1M = T1K + T1L; + T1Q = T1K - T1L; + T2C = T1B - T1I; + T1J = T1B + T1I; + T1P = T1N + T1O; + T2u = T1N - T1O; + ii[WS(rs, 8)] = T2A + T2z; + ii[WS(rs, 2)] = T2z - T2A; + ri[WS(rs, 8)] = T1u + T1J; + ri[WS(rs, 2)] = T1u - T1J; + ri[WS(rs, 10)] = T1M - T1P; + ri[WS(rs, 4)] = T1M + T1P; + T23 = T1W - T22; + T2v = T1W + T22; + T2w = T2s + T2p; + T2t = T2p - T2s; + } + ii[WS(rs, 10)] = T2w - T2v; + ii[WS(rs, 4)] = T2v + T2w; + ri[WS(rs, 1)] = T1Q + T23; + ri[WS(rs, 7)] = T1Q - T23; + ii[WS(rs, 7)] = T2u + T2t; + ii[WS(rs, 1)] = T2t - T2u; + ri[WS(rs, 5)] = T24 + T27; + ri[WS(rs, 11)] = T24 - T27; + } + } + } + } + ii[WS(rs, 11)] = T2C + T2B; + ii[WS(rs, 5)] = T2B - T2C; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 12}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 12, "t1_12", twinstr, &GENUS, {72, 22, 46, 0}, 0, 0, 0 }; + +void X(codelet_t1_12) (planner *p) { + X(kdft_dit_register) (p, t1_12, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 12 -name t1_12 -include t.h */ + +/* + * This function contains 118 FP additions, 60 FP multiplications, + * (or, 88 additions, 30 multiplications, 30 fused multiply/add), + * 47 stack variables, 2 constants, and 48 memory accesses + */ +#include "t.h" + +static void t1_12(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + (mb * 22); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 22, MAKE_VOLATILE_STRIDE(24, rs)) { + E T1, T1W, T18, T21, Tc, T15, T1V, T22, TR, T1E, T1o, T1D, T12, T1l, T1F; + E T1G, Ti, T1S, T1d, T24, Tt, T1a, T1T, T25, TA, T1z, T1j, T1y, TL, T1g; + E T1A, T1B; + { + E T6, T16, Tb, T17; + T1 = ri[0]; + T1W = ii[0]; + { + E T3, T5, T2, T4; + T3 = ri[WS(rs, 4)]; + T5 = ii[WS(rs, 4)]; + T2 = W[6]; + T4 = W[7]; + T6 = FMA(T2, T3, T4 * T5); + T16 = FNMS(T4, T3, T2 * T5); + } + { + E T8, Ta, T7, T9; + T8 = ri[WS(rs, 8)]; + Ta = ii[WS(rs, 8)]; + T7 = W[14]; + T9 = W[15]; + Tb = FMA(T7, T8, T9 * Ta); + T17 = FNMS(T9, T8, T7 * Ta); + } + T18 = KP866025403 * (T16 - T17); + T21 = KP866025403 * (Tb - T6); + Tc = T6 + Tb; + T15 = FNMS(KP500000000, Tc, T1); + T1V = T16 + T17; + T22 = FNMS(KP500000000, T1V, T1W); + } + { + E T11, T1n, TW, T1m; + { + E TO, TQ, TN, TP; + TO = ri[WS(rs, 9)]; + TQ = ii[WS(rs, 9)]; + TN = W[16]; + TP = W[17]; + TR = FMA(TN, TO, TP * TQ); + T1E = FNMS(TP, TO, TN * TQ); + } + { + E TY, T10, TX, TZ; + TY = ri[WS(rs, 5)]; + T10 = ii[WS(rs, 5)]; + TX = W[8]; + TZ = W[9]; + T11 = FMA(TX, TY, TZ * T10); + T1n = FNMS(TZ, TY, TX * T10); + } + { + E TT, TV, TS, TU; + TT = ri[WS(rs, 1)]; + TV = ii[WS(rs, 1)]; + TS = W[0]; + TU = W[1]; + TW = FMA(TS, TT, TU * TV); + T1m = FNMS(TU, TT, TS * TV); + } + T1o = KP866025403 * (T1m - T1n); + T1D = KP866025403 * (T11 - TW); + T12 = TW + T11; + T1l = FNMS(KP500000000, T12, TR); + T1F = T1m + T1n; + T1G = FNMS(KP500000000, T1F, T1E); + } + { + E Ts, T1c, Tn, T1b; + { + E Tf, Th, Te, Tg; + Tf = ri[WS(rs, 6)]; + Th = ii[WS(rs, 6)]; + Te = W[10]; + Tg = W[11]; + Ti = FMA(Te, Tf, Tg * Th); + T1S = FNMS(Tg, Tf, Te * Th); + } + { + E Tp, Tr, To, Tq; + Tp = ri[WS(rs, 2)]; + Tr = ii[WS(rs, 2)]; + To = W[2]; + Tq = W[3]; + Ts = FMA(To, Tp, Tq * Tr); + T1c = FNMS(Tq, Tp, To * Tr); + } + { + E Tk, Tm, Tj, Tl; + Tk = ri[WS(rs, 10)]; + Tm = ii[WS(rs, 10)]; + Tj = W[18]; + Tl = W[19]; + Tn = FMA(Tj, Tk, Tl * Tm); + T1b = FNMS(Tl, Tk, Tj * Tm); + } + T1d = KP866025403 * (T1b - T1c); + T24 = KP866025403 * (Ts - Tn); + Tt = Tn + Ts; + T1a = FNMS(KP500000000, Tt, Ti); + T1T = T1b + T1c; + T25 = FNMS(KP500000000, T1T, T1S); + } + { + E TK, T1i, TF, T1h; + { + E Tx, Tz, Tw, Ty; + Tx = ri[WS(rs, 3)]; + Tz = ii[WS(rs, 3)]; + Tw = W[4]; + Ty = W[5]; + TA = FMA(Tw, Tx, Ty * Tz); + T1z = FNMS(Ty, Tx, Tw * Tz); + } + { + E TH, TJ, TG, TI; + TH = ri[WS(rs, 11)]; + TJ = ii[WS(rs, 11)]; + TG = W[20]; + TI = W[21]; + TK = FMA(TG, TH, TI * TJ); + T1i = FNMS(TI, TH, TG * TJ); + } + { + E TC, TE, TB, TD; + TC = ri[WS(rs, 7)]; + TE = ii[WS(rs, 7)]; + TB = W[12]; + TD = W[13]; + TF = FMA(TB, TC, TD * TE); + T1h = FNMS(TD, TC, TB * TE); + } + T1j = KP866025403 * (T1h - T1i); + T1y = KP866025403 * (TK - TF); + TL = TF + TK; + T1g = FNMS(KP500000000, TL, TA); + T1A = T1h + T1i; + T1B = FNMS(KP500000000, T1A, T1z); + } + { + E Tv, T1N, T1Y, T20, T14, T1Z, T1Q, T1R; + { + E Td, Tu, T1U, T1X; + Td = T1 + Tc; + Tu = Ti + Tt; + Tv = Td + Tu; + T1N = Td - Tu; + T1U = T1S + T1T; + T1X = T1V + T1W; + T1Y = T1U + T1X; + T20 = T1X - T1U; + } + { + E TM, T13, T1O, T1P; + TM = TA + TL; + T13 = TR + T12; + T14 = TM + T13; + T1Z = TM - T13; + T1O = T1z + T1A; + T1P = T1E + T1F; + T1Q = T1O - T1P; + T1R = T1O + T1P; + } + ri[WS(rs, 6)] = Tv - T14; + ii[WS(rs, 6)] = T1Y - T1R; + ri[0] = Tv + T14; + ii[0] = T1R + T1Y; + ri[WS(rs, 3)] = T1N - T1Q; + ii[WS(rs, 3)] = T1Z + T20; + ri[WS(rs, 9)] = T1N + T1Q; + ii[WS(rs, 9)] = T20 - T1Z; + } + { + E T1t, T1x, T27, T2a, T1w, T28, T1I, T29; + { + E T1r, T1s, T23, T26; + T1r = T15 + T18; + T1s = T1a + T1d; + T1t = T1r + T1s; + T1x = T1r - T1s; + T23 = T21 + T22; + T26 = T24 + T25; + T27 = T23 - T26; + T2a = T26 + T23; + } + { + E T1u, T1v, T1C, T1H; + T1u = T1g + T1j; + T1v = T1l + T1o; + T1w = T1u + T1v; + T28 = T1u - T1v; + T1C = T1y + T1B; + T1H = T1D + T1G; + T1I = T1C - T1H; + T29 = T1C + T1H; + } + ri[WS(rs, 10)] = T1t - T1w; + ii[WS(rs, 10)] = T2a - T29; + ri[WS(rs, 4)] = T1t + T1w; + ii[WS(rs, 4)] = T29 + T2a; + ri[WS(rs, 7)] = T1x - T1I; + ii[WS(rs, 7)] = T28 + T27; + ri[WS(rs, 1)] = T1x + T1I; + ii[WS(rs, 1)] = T27 - T28; + } + { + E T1f, T1J, T2d, T2f, T1q, T2g, T1M, T2e; + { + E T19, T1e, T2b, T2c; + T19 = T15 - T18; + T1e = T1a - T1d; + T1f = T19 + T1e; + T1J = T19 - T1e; + T2b = T25 - T24; + T2c = T22 - T21; + T2d = T2b + T2c; + T2f = T2c - T2b; + } + { + E T1k, T1p, T1K, T1L; + T1k = T1g - T1j; + T1p = T1l - T1o; + T1q = T1k + T1p; + T2g = T1k - T1p; + T1K = T1B - T1y; + T1L = T1G - T1D; + T1M = T1K - T1L; + T2e = T1K + T1L; + } + ri[WS(rs, 2)] = T1f - T1q; + ii[WS(rs, 2)] = T2d - T2e; + ri[WS(rs, 8)] = T1f + T1q; + ii[WS(rs, 8)] = T2e + T2d; + ri[WS(rs, 11)] = T1J - T1M; + ii[WS(rs, 11)] = T2g + T2f; + ri[WS(rs, 5)] = T1J + T1M; + ii[WS(rs, 5)] = T2f - T2g; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 12}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 12, "t1_12", twinstr, &GENUS, {88, 30, 30, 0}, 0, 0, 0 }; + +void X(codelet_t1_12) (planner *p) { + X(kdft_dit_register) (p, t1_12, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,801 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:52 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 15 -name t1_15 -include t.h */ + +/* + * This function contains 184 FP additions, 140 FP multiplications, + * (or, 72 additions, 28 multiplications, 112 fused multiply/add), + * 89 stack variables, 6 constants, and 60 memory accesses + */ +#include "t.h" + +static void t1_15(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + (mb * 28); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 28, MAKE_VOLATILE_STRIDE(30, rs)) { + E T2d, T2O, T2Q, T2m, T2k, T2l, T2P, T2n; + { + E T1G, T3u, T3k, T3t, T1B, Tf, T37, T1y, T2V, T2M, T2a, T2i, T39, Tz, T2X; + E T2t, T1O, T2e, T3a, TT, T10, T2Y, T2z, T1V, T2f, T2C, T12, T15, T14, T21; + E T1c, T1Y, T13; + { + E T2I, T1k, T1m, T1p, T1o, T28, T1w, T25, T1n; + { + E T1, T3j, T9, Tc, Tb, T1D, T7, T1E, Ta, T1j, T1i, T1h; + T1 = ri[0]; + T3j = ii[0]; + { + E T3, T6, T2, T5, T1C, T4, T8; + T3 = ri[WS(rs, 5)]; + T6 = ii[WS(rs, 5)]; + T2 = W[8]; + T5 = W[9]; + T9 = ri[WS(rs, 10)]; + Tc = ii[WS(rs, 10)]; + T1C = T2 * T6; + T4 = T2 * T3; + T8 = W[18]; + Tb = W[19]; + T1D = FNMS(T5, T3, T1C); + T7 = FMA(T5, T6, T4); + T1E = T8 * Tc; + Ta = T8 * T9; + } + { + E T1g, T1F, Td, T1f, T3i, Te, T2H; + T1g = ri[WS(rs, 9)]; + T1j = ii[WS(rs, 9)]; + T1F = FNMS(Tb, T9, T1E); + Td = FMA(Tb, Tc, Ta); + T1f = W[16]; + T1i = W[17]; + T1G = T1D - T1F; + T3i = T1D + T1F; + T3u = Td - T7; + Te = T7 + Td; + T2H = T1f * T1j; + T1h = T1f * T1g; + T3k = T3i + T3j; + T3t = FNMS(KP500000000, T3i, T3j); + T1B = FNMS(KP500000000, Te, T1); + Tf = T1 + Te; + T2I = FNMS(T1i, T1g, T2H); + } + T1k = FMA(T1i, T1j, T1h); + { + E T1s, T1v, T1r, T1u, T27, T1t, T1l; + T1s = ri[WS(rs, 4)]; + T1v = ii[WS(rs, 4)]; + T1r = W[6]; + T1u = W[7]; + T1m = ri[WS(rs, 14)]; + T1p = ii[WS(rs, 14)]; + T27 = T1r * T1v; + T1t = T1r * T1s; + T1l = W[26]; + T1o = W[27]; + T28 = FNMS(T1u, T1s, T27); + T1w = FMA(T1u, T1v, T1t); + T25 = T1l * T1p; + T1n = T1l * T1m; + } + } + { + E Tl, T2p, Tn, Tq, Tp, T1M, Tx, T1J, To; + { + E Th, Tk, T26, T1q, Tg, Tj; + Th = ri[WS(rs, 3)]; + Tk = ii[WS(rs, 3)]; + T26 = FNMS(T1o, T1m, T25); + T1q = FMA(T1o, T1p, T1n); + Tg = W[4]; + Tj = W[5]; + { + E T29, T2J, T1x, T2L; + T29 = T26 - T28; + T2J = T26 + T28; + T1x = T1q + T1w; + T2L = T1w - T1q; + { + E T2o, Ti, T2K, T24; + T2o = Tg * Tk; + Ti = Tg * Th; + T2K = FNMS(KP500000000, T2J, T2I); + T37 = T2I + T2J; + T24 = FNMS(KP500000000, T1x, T1k); + T1y = T1k + T1x; + Tl = FMA(Tj, Tk, Ti); + T2V = FNMS(KP866025403, T2L, T2K); + T2M = FMA(KP866025403, T2L, T2K); + T2a = FNMS(KP866025403, T29, T24); + T2i = FMA(KP866025403, T29, T24); + T2p = FNMS(Tj, Th, T2o); + } + } + } + { + E Tt, Tw, Ts, Tv, T1L, Tu, Tm; + Tt = ri[WS(rs, 13)]; + Tw = ii[WS(rs, 13)]; + Ts = W[24]; + Tv = W[25]; + Tn = ri[WS(rs, 8)]; + Tq = ii[WS(rs, 8)]; + T1L = Ts * Tw; + Tu = Ts * Tt; + Tm = W[14]; + Tp = W[15]; + T1M = FNMS(Tv, Tt, T1L); + Tx = FMA(Tv, Tw, Tu); + T1J = Tm * Tq; + To = Tm * Tn; + } + { + E TF, T2v, TH, TK, TJ, T1T, TR, T1Q, TI; + { + E TB, TE, T1K, Tr, TA, TD; + TB = ri[WS(rs, 12)]; + TE = ii[WS(rs, 12)]; + T1K = FNMS(Tp, Tn, T1J); + Tr = FMA(Tp, Tq, To); + TA = W[22]; + TD = W[23]; + { + E T1N, T2q, Ty, T2s; + T1N = T1K - T1M; + T2q = T1K + T1M; + Ty = Tr + Tx; + T2s = Tx - Tr; + { + E T2u, TC, T2r, T1I; + T2u = TA * TE; + TC = TA * TB; + T2r = FNMS(KP500000000, T2q, T2p); + T39 = T2p + T2q; + T1I = FNMS(KP500000000, Ty, Tl); + Tz = Tl + Ty; + TF = FMA(TD, TE, TC); + T2X = FNMS(KP866025403, T2s, T2r); + T2t = FMA(KP866025403, T2s, T2r); + T1O = FNMS(KP866025403, T1N, T1I); + T2e = FMA(KP866025403, T1N, T1I); + T2v = FNMS(TD, TB, T2u); + } + } + } + { + E TN, TQ, TM, TP, T1S, TO, TG; + TN = ri[WS(rs, 7)]; + TQ = ii[WS(rs, 7)]; + TM = W[12]; + TP = W[13]; + TH = ri[WS(rs, 2)]; + TK = ii[WS(rs, 2)]; + T1S = TM * TQ; + TO = TM * TN; + TG = W[2]; + TJ = W[3]; + T1T = FNMS(TP, TN, T1S); + TR = FMA(TP, TQ, TO); + T1Q = TG * TK; + TI = TG * TH; + } + { + E TW, TZ, T1R, TL, TV, TY; + TW = ri[WS(rs, 6)]; + TZ = ii[WS(rs, 6)]; + T1R = FNMS(TJ, TH, T1Q); + TL = FMA(TJ, TK, TI); + TV = W[10]; + TY = W[11]; + { + E T1U, T2w, TS, T2y; + T1U = T1R - T1T; + T2w = T1R + T1T; + TS = TL + TR; + T2y = TR - TL; + { + E T2B, TX, T2x, T1P; + T2B = TV * TZ; + TX = TV * TW; + T2x = FNMS(KP500000000, T2w, T2v); + T3a = T2v + T2w; + T1P = FNMS(KP500000000, TS, TF); + TT = TF + TS; + T10 = FMA(TY, TZ, TX); + T2Y = FNMS(KP866025403, T2y, T2x); + T2z = FMA(KP866025403, T2y, T2x); + T1V = FNMS(KP866025403, T1U, T1P); + T2f = FMA(KP866025403, T1U, T1P); + T2C = FNMS(TY, TW, T2B); + } + } + } + { + E T18, T1b, T17, T1a, T20, T19, T11; + T18 = ri[WS(rs, 1)]; + T1b = ii[WS(rs, 1)]; + T17 = W[0]; + T1a = W[1]; + T12 = ri[WS(rs, 11)]; + T15 = ii[WS(rs, 11)]; + T20 = T17 * T1b; + T19 = T17 * T18; + T11 = W[20]; + T14 = W[21]; + T21 = FNMS(T1a, T18, T20); + T1c = FMA(T1a, T1b, T19); + T1Y = T11 * T15; + T13 = T11 * T12; + } + } + } + } + { + E T2G, T2h, T3J, T3I, T32, T30, T1H, T1W, T3P, T3O, T2b; + { + E T3f, T3b, T1Z, T16, T3p, TU; + T3f = T39 + T3a; + T3b = T39 - T3a; + T1Z = FNMS(T14, T12, T1Y); + T16 = FMA(T14, T15, T13); + T3p = Tz - TT; + TU = Tz + TT; + { + E T3g, T2U, T23, T3c, T3e, T3q, T3s, T1A, T34, T3r, T3n; + { + E T22, T1d, T2F, T2E, T36, T2D; + T22 = T1Z - T21; + T2D = T1Z + T21; + T1d = T16 + T1c; + T2F = T1c - T16; + T2E = FNMS(KP500000000, T2D, T2C); + T36 = T2C + T2D; + { + E T1e, T1X, T38, T1z, T3o; + T1e = T10 + T1d; + T1X = FNMS(KP500000000, T1d, T10); + T38 = T36 - T37; + T3g = T36 + T37; + T2G = FMA(KP866025403, T2F, T2E); + T2U = FNMS(KP866025403, T2F, T2E); + T1z = T1e + T1y; + T3o = T1e - T1y; + T2h = FMA(KP866025403, T22, T1X); + T23 = FNMS(KP866025403, T22, T1X); + T3c = FNMS(KP618033988, T3b, T38); + T3e = FMA(KP618033988, T38, T3b); + T3q = FNMS(KP618033988, T3p, T3o); + T3s = FMA(KP618033988, T3o, T3p); + T1A = TU + T1z; + T34 = TU - T1z; + } + } + { + E T2W, T33, T3m, T3h, T2Z, T3d, T35, T3l; + T3J = T2U + T2V; + T2W = T2U - T2V; + ri[0] = Tf + T1A; + T33 = FNMS(KP250000000, T1A, Tf); + T3m = T3f - T3g; + T3h = T3f + T3g; + T2Z = T2X - T2Y; + T3I = T2X + T2Y; + T3d = FMA(KP559016994, T34, T33); + T35 = FNMS(KP559016994, T34, T33); + ii[0] = T3h + T3k; + T3l = FNMS(KP250000000, T3h, T3k); + ri[WS(rs, 3)] = FMA(KP951056516, T3c, T35); + ri[WS(rs, 12)] = FNMS(KP951056516, T3c, T35); + ri[WS(rs, 6)] = FMA(KP951056516, T3e, T3d); + ri[WS(rs, 9)] = FNMS(KP951056516, T3e, T3d); + T3r = FMA(KP559016994, T3m, T3l); + T3n = FNMS(KP559016994, T3m, T3l); + T32 = FMA(KP618033988, T2W, T2Z); + T30 = FNMS(KP618033988, T2Z, T2W); + } + ii[WS(rs, 12)] = FMA(KP951056516, T3q, T3n); + ii[WS(rs, 3)] = FNMS(KP951056516, T3q, T3n); + ii[WS(rs, 9)] = FMA(KP951056516, T3s, T3r); + ii[WS(rs, 6)] = FNMS(KP951056516, T3s, T3r); + T2d = FMA(KP866025403, T1G, T1B); + T1H = FNMS(KP866025403, T1G, T1B); + T1W = T1O + T1V; + T3P = T1O - T1V; + T3O = T23 - T2a; + T2b = T23 + T2a; + } + } + { + E T3H, T3v, T2S, T3Q, T3S, T2R, T2c; + T3H = FNMS(KP866025403, T3u, T3t); + T3v = FMA(KP866025403, T3u, T3t); + T2c = T1W + T2b; + T2S = T1W - T2b; + T3Q = FNMS(KP618033988, T3P, T3O); + T3S = FMA(KP618033988, T3O, T3P); + ri[WS(rs, 5)] = T1H + T2c; + T2R = FNMS(KP250000000, T2c, T1H); + { + E T2g, T2j, T3G, T3E, T2A, T2N, T3y, T3A, T3M, T3L, T3z, T3F, T3B; + { + E T3C, T3D, T31, T2T, T3K; + T2g = T2e + T2f; + T3C = T2e - T2f; + T3D = T2h - T2i; + T2j = T2h + T2i; + T31 = FMA(KP559016994, T2S, T2R); + T2T = FNMS(KP559016994, T2S, T2R); + T3K = T3I + T3J; + T3M = T3I - T3J; + ri[WS(rs, 8)] = FMA(KP951056516, T30, T2T); + ri[WS(rs, 2)] = FNMS(KP951056516, T30, T2T); + ri[WS(rs, 11)] = FMA(KP951056516, T32, T31); + ri[WS(rs, 14)] = FNMS(KP951056516, T32, T31); + ii[WS(rs, 5)] = T3K + T3H; + T3L = FNMS(KP250000000, T3K, T3H); + T3G = FNMS(KP618033988, T3C, T3D); + T3E = FMA(KP618033988, T3D, T3C); + } + { + E T3N, T3R, T3w, T3x; + T3N = FNMS(KP559016994, T3M, T3L); + T3R = FMA(KP559016994, T3M, T3L); + T3w = T2t + T2z; + T2A = T2t - T2z; + T2N = T2G - T2M; + T3x = T2G + T2M; + ii[WS(rs, 8)] = FNMS(KP951056516, T3Q, T3N); + ii[WS(rs, 2)] = FMA(KP951056516, T3Q, T3N); + ii[WS(rs, 14)] = FMA(KP951056516, T3S, T3R); + ii[WS(rs, 11)] = FNMS(KP951056516, T3S, T3R); + T3y = T3w + T3x; + T3A = T3w - T3x; + } + ii[WS(rs, 10)] = T3y + T3v; + T3z = FNMS(KP250000000, T3y, T3v); + T2O = FMA(KP618033988, T2N, T2A); + T2Q = FNMS(KP618033988, T2A, T2N); + T3F = FNMS(KP559016994, T3A, T3z); + T3B = FMA(KP559016994, T3A, T3z); + ii[WS(rs, 4)] = FMA(KP951056516, T3E, T3B); + ii[WS(rs, 1)] = FNMS(KP951056516, T3E, T3B); + ii[WS(rs, 13)] = FNMS(KP951056516, T3G, T3F); + ii[WS(rs, 7)] = FMA(KP951056516, T3G, T3F); + T2m = T2g - T2j; + T2k = T2g + T2j; + } + } + } + } + ri[WS(rs, 10)] = T2d + T2k; + T2l = FNMS(KP250000000, T2k, T2d); + T2P = FNMS(KP559016994, T2m, T2l); + T2n = FMA(KP559016994, T2m, T2l); + ri[WS(rs, 1)] = FMA(KP951056516, T2O, T2n); + ri[WS(rs, 4)] = FNMS(KP951056516, T2O, T2n); + ri[WS(rs, 13)] = FMA(KP951056516, T2Q, T2P); + ri[WS(rs, 7)] = FNMS(KP951056516, T2Q, T2P); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 15}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 15, "t1_15", twinstr, &GENUS, {72, 28, 112, 0}, 0, 0, 0 }; + +void X(codelet_t1_15) (planner *p) { + X(kdft_dit_register) (p, t1_15, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 15 -name t1_15 -include t.h */ + +/* + * This function contains 184 FP additions, 112 FP multiplications, + * (or, 128 additions, 56 multiplications, 56 fused multiply/add), + * 65 stack variables, 6 constants, and 60 memory accesses + */ +#include "t.h" + +static void t1_15(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + (mb * 28); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 28, MAKE_VOLATILE_STRIDE(30, rs)) { + E T1q, T34, Td, T1n, T2S, T35, T13, T1k, T1l, T2E, T2F, T2O, T1H, T1T, T2k; + E T2t, T2f, T2s, T1M, T1U, Tu, TL, TM, T2H, T2I, T2N, T1w, T1Q, T29, T2w; + E T24, T2v, T1B, T1R; + { + E T1, T2R, T6, T1o, Tb, T1p, Tc, T2Q; + T1 = ri[0]; + T2R = ii[0]; + { + E T3, T5, T2, T4; + T3 = ri[WS(rs, 5)]; + T5 = ii[WS(rs, 5)]; + T2 = W[8]; + T4 = W[9]; + T6 = FMA(T2, T3, T4 * T5); + T1o = FNMS(T4, T3, T2 * T5); + } + { + E T8, Ta, T7, T9; + T8 = ri[WS(rs, 10)]; + Ta = ii[WS(rs, 10)]; + T7 = W[18]; + T9 = W[19]; + Tb = FMA(T7, T8, T9 * Ta); + T1p = FNMS(T9, T8, T7 * Ta); + } + T1q = KP866025403 * (T1o - T1p); + T34 = KP866025403 * (Tb - T6); + Tc = T6 + Tb; + Td = T1 + Tc; + T1n = FNMS(KP500000000, Tc, T1); + T2Q = T1o + T1p; + T2S = T2Q + T2R; + T35 = FNMS(KP500000000, T2Q, T2R); + } + { + E TR, T2c, T18, T2h, TW, T1E, T11, T1F, T12, T2d, T1d, T1J, T1i, T1K, T1j; + E T2i; + { + E TO, TQ, TN, TP; + TO = ri[WS(rs, 6)]; + TQ = ii[WS(rs, 6)]; + TN = W[10]; + TP = W[11]; + TR = FMA(TN, TO, TP * TQ); + T2c = FNMS(TP, TO, TN * TQ); + } + { + E T15, T17, T14, T16; + T15 = ri[WS(rs, 9)]; + T17 = ii[WS(rs, 9)]; + T14 = W[16]; + T16 = W[17]; + T18 = FMA(T14, T15, T16 * T17); + T2h = FNMS(T16, T15, T14 * T17); + } + { + E TT, TV, TS, TU; + TT = ri[WS(rs, 11)]; + TV = ii[WS(rs, 11)]; + TS = W[20]; + TU = W[21]; + TW = FMA(TS, TT, TU * TV); + T1E = FNMS(TU, TT, TS * TV); + } + { + E TY, T10, TX, TZ; + TY = ri[WS(rs, 1)]; + T10 = ii[WS(rs, 1)]; + TX = W[0]; + TZ = W[1]; + T11 = FMA(TX, TY, TZ * T10); + T1F = FNMS(TZ, TY, TX * T10); + } + T12 = TW + T11; + T2d = T1E + T1F; + { + E T1a, T1c, T19, T1b; + T1a = ri[WS(rs, 14)]; + T1c = ii[WS(rs, 14)]; + T19 = W[26]; + T1b = W[27]; + T1d = FMA(T19, T1a, T1b * T1c); + T1J = FNMS(T1b, T1a, T19 * T1c); + } + { + E T1f, T1h, T1e, T1g; + T1f = ri[WS(rs, 4)]; + T1h = ii[WS(rs, 4)]; + T1e = W[6]; + T1g = W[7]; + T1i = FMA(T1e, T1f, T1g * T1h); + T1K = FNMS(T1g, T1f, T1e * T1h); + } + T1j = T1d + T1i; + T2i = T1J + T1K; + { + E T1D, T1G, T2g, T2j; + T13 = TR + T12; + T1k = T18 + T1j; + T1l = T13 + T1k; + T2E = T2c + T2d; + T2F = T2h + T2i; + T2O = T2E + T2F; + T1D = FNMS(KP500000000, T12, TR); + T1G = KP866025403 * (T1E - T1F); + T1H = T1D - T1G; + T1T = T1D + T1G; + T2g = KP866025403 * (T1i - T1d); + T2j = FNMS(KP500000000, T2i, T2h); + T2k = T2g + T2j; + T2t = T2j - T2g; + { + E T2b, T2e, T1I, T1L; + T2b = KP866025403 * (T11 - TW); + T2e = FNMS(KP500000000, T2d, T2c); + T2f = T2b + T2e; + T2s = T2e - T2b; + T1I = FNMS(KP500000000, T1j, T18); + T1L = KP866025403 * (T1J - T1K); + T1M = T1I - T1L; + T1U = T1I + T1L; + } + } + } + { + E Ti, T21, Tz, T26, Tn, T1t, Ts, T1u, Tt, T22, TE, T1y, TJ, T1z, TK; + E T27; + { + E Tf, Th, Te, Tg; + Tf = ri[WS(rs, 3)]; + Th = ii[WS(rs, 3)]; + Te = W[4]; + Tg = W[5]; + Ti = FMA(Te, Tf, Tg * Th); + T21 = FNMS(Tg, Tf, Te * Th); + } + { + E Tw, Ty, Tv, Tx; + Tw = ri[WS(rs, 12)]; + Ty = ii[WS(rs, 12)]; + Tv = W[22]; + Tx = W[23]; + Tz = FMA(Tv, Tw, Tx * Ty); + T26 = FNMS(Tx, Tw, Tv * Ty); + } + { + E Tk, Tm, Tj, Tl; + Tk = ri[WS(rs, 8)]; + Tm = ii[WS(rs, 8)]; + Tj = W[14]; + Tl = W[15]; + Tn = FMA(Tj, Tk, Tl * Tm); + T1t = FNMS(Tl, Tk, Tj * Tm); + } + { + E Tp, Tr, To, Tq; + Tp = ri[WS(rs, 13)]; + Tr = ii[WS(rs, 13)]; + To = W[24]; + Tq = W[25]; + Ts = FMA(To, Tp, Tq * Tr); + T1u = FNMS(Tq, Tp, To * Tr); + } + Tt = Tn + Ts; + T22 = T1t + T1u; + { + E TB, TD, TA, TC; + TB = ri[WS(rs, 2)]; + TD = ii[WS(rs, 2)]; + TA = W[2]; + TC = W[3]; + TE = FMA(TA, TB, TC * TD); + T1y = FNMS(TC, TB, TA * TD); + } + { + E TG, TI, TF, TH; + TG = ri[WS(rs, 7)]; + TI = ii[WS(rs, 7)]; + TF = W[12]; + TH = W[13]; + TJ = FMA(TF, TG, TH * TI); + T1z = FNMS(TH, TG, TF * TI); + } + TK = TE + TJ; + T27 = T1y + T1z; + { + E T1s, T1v, T25, T28; + Tu = Ti + Tt; + TL = Tz + TK; + TM = Tu + TL; + T2H = T21 + T22; + T2I = T26 + T27; + T2N = T2H + T2I; + T1s = FNMS(KP500000000, Tt, Ti); + T1v = KP866025403 * (T1t - T1u); + T1w = T1s - T1v; + T1Q = T1s + T1v; + T25 = KP866025403 * (TJ - TE); + T28 = FNMS(KP500000000, T27, T26); + T29 = T25 + T28; + T2w = T28 - T25; + { + E T20, T23, T1x, T1A; + T20 = KP866025403 * (Ts - Tn); + T23 = FNMS(KP500000000, T22, T21); + T24 = T20 + T23; + T2v = T23 - T20; + T1x = FNMS(KP500000000, TK, Tz); + T1A = KP866025403 * (T1y - T1z); + T1B = T1x - T1A; + T1R = T1x + T1A; + } + } + } + { + E T2C, T1m, T2B, T2K, T2M, T2G, T2J, T2L, T2D; + T2C = KP559016994 * (TM - T1l); + T1m = TM + T1l; + T2B = FNMS(KP250000000, T1m, Td); + T2G = T2E - T2F; + T2J = T2H - T2I; + T2K = FNMS(KP587785252, T2J, KP951056516 * T2G); + T2M = FMA(KP951056516, T2J, KP587785252 * T2G); + ri[0] = Td + T1m; + T2L = T2C + T2B; + ri[WS(rs, 9)] = T2L - T2M; + ri[WS(rs, 6)] = T2L + T2M; + T2D = T2B - T2C; + ri[WS(rs, 12)] = T2D - T2K; + ri[WS(rs, 3)] = T2D + T2K; + } + { + E T2U, T2P, T2T, T2Y, T30, T2W, T2X, T2Z, T2V; + T2U = KP559016994 * (T2N - T2O); + T2P = T2N + T2O; + T2T = FNMS(KP250000000, T2P, T2S); + T2W = T13 - T1k; + T2X = Tu - TL; + T2Y = FNMS(KP587785252, T2X, KP951056516 * T2W); + T30 = FMA(KP951056516, T2X, KP587785252 * T2W); + ii[0] = T2P + T2S; + T2Z = T2U + T2T; + ii[WS(rs, 6)] = T2Z - T30; + ii[WS(rs, 9)] = T30 + T2Z; + T2V = T2T - T2U; + ii[WS(rs, 3)] = T2V - T2Y; + ii[WS(rs, 12)] = T2Y + T2V; + } + { + E T2y, T2A, T1r, T1O, T2p, T2q, T2z, T2r; + { + E T2u, T2x, T1C, T1N; + T2u = T2s - T2t; + T2x = T2v - T2w; + T2y = FNMS(KP587785252, T2x, KP951056516 * T2u); + T2A = FMA(KP951056516, T2x, KP587785252 * T2u); + T1r = T1n - T1q; + T1C = T1w + T1B; + T1N = T1H + T1M; + T1O = T1C + T1N; + T2p = FNMS(KP250000000, T1O, T1r); + T2q = KP559016994 * (T1C - T1N); + } + ri[WS(rs, 5)] = T1r + T1O; + T2z = T2q + T2p; + ri[WS(rs, 14)] = T2z - T2A; + ri[WS(rs, 11)] = T2z + T2A; + T2r = T2p - T2q; + ri[WS(rs, 2)] = T2r - T2y; + ri[WS(rs, 8)] = T2r + T2y; + } + { + E T3h, T3q, T3i, T3l, T3m, T3n, T3p, T3o; + { + E T3f, T3g, T3j, T3k; + T3f = T1H - T1M; + T3g = T1w - T1B; + T3h = FNMS(KP587785252, T3g, KP951056516 * T3f); + T3q = FMA(KP951056516, T3g, KP587785252 * T3f); + T3i = T35 - T34; + T3j = T2v + T2w; + T3k = T2s + T2t; + T3l = T3j + T3k; + T3m = FNMS(KP250000000, T3l, T3i); + T3n = KP559016994 * (T3j - T3k); + } + ii[WS(rs, 5)] = T3l + T3i; + T3p = T3n + T3m; + ii[WS(rs, 11)] = T3p - T3q; + ii[WS(rs, 14)] = T3q + T3p; + T3o = T3m - T3n; + ii[WS(rs, 2)] = T3h + T3o; + ii[WS(rs, 8)] = T3o - T3h; + } + { + E T3c, T3d, T36, T37, T33, T38, T3e, T39; + { + E T3a, T3b, T31, T32; + T3a = T1Q - T1R; + T3b = T1T - T1U; + T3c = FMA(KP951056516, T3a, KP587785252 * T3b); + T3d = FNMS(KP587785252, T3a, KP951056516 * T3b); + T36 = T34 + T35; + T31 = T24 + T29; + T32 = T2f + T2k; + T37 = T31 + T32; + T33 = KP559016994 * (T31 - T32); + T38 = FNMS(KP250000000, T37, T36); + } + ii[WS(rs, 10)] = T37 + T36; + T3e = T38 - T33; + ii[WS(rs, 7)] = T3d + T3e; + ii[WS(rs, 13)] = T3e - T3d; + T39 = T33 + T38; + ii[WS(rs, 1)] = T39 - T3c; + ii[WS(rs, 4)] = T3c + T39; + } + { + E T2m, T2o, T1P, T1W, T1X, T1Y, T2n, T1Z; + { + E T2a, T2l, T1S, T1V; + T2a = T24 - T29; + T2l = T2f - T2k; + T2m = FMA(KP951056516, T2a, KP587785252 * T2l); + T2o = FNMS(KP587785252, T2a, KP951056516 * T2l); + T1P = T1n + T1q; + T1S = T1Q + T1R; + T1V = T1T + T1U; + T1W = T1S + T1V; + T1X = KP559016994 * (T1S - T1V); + T1Y = FNMS(KP250000000, T1W, T1P); + } + ri[WS(rs, 10)] = T1P + T1W; + T2n = T1Y - T1X; + ri[WS(rs, 7)] = T2n - T2o; + ri[WS(rs, 13)] = T2n + T2o; + T1Z = T1X + T1Y; + ri[WS(rs, 4)] = T1Z - T2m; + ri[WS(rs, 1)] = T1Z + T2m; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 15}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 15, "t1_15", twinstr, &GENUS, {128, 56, 56, 0}, 0, 0, 0 }; + +void X(codelet_t1_15) (planner *p) { + X(kdft_dit_register) (p, t1_15, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,785 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:52 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 16 -name t1_16 -include t.h */ + +/* + * This function contains 174 FP additions, 100 FP multiplications, + * (or, 104 additions, 30 multiplications, 70 fused multiply/add), + * 97 stack variables, 3 constants, and 64 memory accesses + */ +#include "t.h" + +static void t1_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 30); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 30, MAKE_VOLATILE_STRIDE(32, rs)) { + E T3G, T3F; + { + E T3z, T3o, T8, T1I, T2o, T35, T2r, T1s, T2w, T36, T2p, T1F, T3k, T1N, T3A; + E Tl, T1T, T2V, T1U, Tz, T29, T30, T2c, T11, TB, TE, T2h, T31, T2a, T1e; + E TC, T1X, TH, TK, TG, TD, TJ; + { + E Ta, Td, Tb, T1J, Tg, Tj, Tf, Tc, Ti; + { + E T1h, T1k, T1n, T2k, T1i, T1q, T1m, T1j, T1p; + { + E T1, T3n, T3, T6, T2, T5; + T1 = ri[0]; + T3n = ii[0]; + T3 = ri[WS(rs, 8)]; + T6 = ii[WS(rs, 8)]; + T2 = W[14]; + T5 = W[15]; + { + E T3l, T4, T1g, T3m, T7; + T1h = ri[WS(rs, 15)]; + T1k = ii[WS(rs, 15)]; + T3l = T2 * T6; + T4 = T2 * T3; + T1g = W[28]; + T1n = ri[WS(rs, 7)]; + T3m = FNMS(T5, T3, T3l); + T7 = FMA(T5, T6, T4); + T2k = T1g * T1k; + T1i = T1g * T1h; + T3z = T3n - T3m; + T3o = T3m + T3n; + T8 = T1 + T7; + T1I = T1 - T7; + T1q = ii[WS(rs, 7)]; + T1m = W[12]; + } + T1j = W[29]; + T1p = W[13]; + } + { + E T1u, T1x, T1v, T2s, T1A, T1D, T1z, T1w, T1C; + { + E T2l, T1l, T2n, T1r, T2m, T1o, T1t; + T1u = ri[WS(rs, 3)]; + T2m = T1m * T1q; + T1o = T1m * T1n; + T2l = FNMS(T1j, T1h, T2k); + T1l = FMA(T1j, T1k, T1i); + T2n = FNMS(T1p, T1n, T2m); + T1r = FMA(T1p, T1q, T1o); + T1x = ii[WS(rs, 3)]; + T1t = W[4]; + T2o = T2l - T2n; + T35 = T2l + T2n; + T2r = T1l - T1r; + T1s = T1l + T1r; + T1v = T1t * T1u; + T2s = T1t * T1x; + } + T1A = ri[WS(rs, 11)]; + T1D = ii[WS(rs, 11)]; + T1z = W[20]; + T1w = W[5]; + T1C = W[21]; + { + E T2t, T1y, T2v, T1E, T2u, T1B, T9; + Ta = ri[WS(rs, 4)]; + T2u = T1z * T1D; + T1B = T1z * T1A; + T2t = FNMS(T1w, T1u, T2s); + T1y = FMA(T1w, T1x, T1v); + T2v = FNMS(T1C, T1A, T2u); + T1E = FMA(T1C, T1D, T1B); + Td = ii[WS(rs, 4)]; + T9 = W[6]; + T2w = T2t - T2v; + T36 = T2t + T2v; + T2p = T1y - T1E; + T1F = T1y + T1E; + Tb = T9 * Ta; + T1J = T9 * Td; + } + Tg = ri[WS(rs, 12)]; + Tj = ii[WS(rs, 12)]; + Tf = W[22]; + Tc = W[7]; + Ti = W[23]; + } + } + { + E TQ, TT, TR, T25, TW, TZ, TV, TS, TY; + { + E To, Tr, Tp, T1P, Tu, Tx, Tt, Tq, Tw; + { + E T1K, Te, T1M, Tk, T1L, Th, Tn; + To = ri[WS(rs, 2)]; + T1L = Tf * Tj; + Th = Tf * Tg; + T1K = FNMS(Tc, Ta, T1J); + Te = FMA(Tc, Td, Tb); + T1M = FNMS(Ti, Tg, T1L); + Tk = FMA(Ti, Tj, Th); + Tr = ii[WS(rs, 2)]; + Tn = W[2]; + T3k = T1K + T1M; + T1N = T1K - T1M; + T3A = Te - Tk; + Tl = Te + Tk; + Tp = Tn * To; + T1P = Tn * Tr; + } + Tu = ri[WS(rs, 10)]; + Tx = ii[WS(rs, 10)]; + Tt = W[18]; + Tq = W[3]; + Tw = W[19]; + { + E T1Q, Ts, T1S, Ty, T1R, Tv, TP; + TQ = ri[WS(rs, 1)]; + T1R = Tt * Tx; + Tv = Tt * Tu; + T1Q = FNMS(Tq, To, T1P); + Ts = FMA(Tq, Tr, Tp); + T1S = FNMS(Tw, Tu, T1R); + Ty = FMA(Tw, Tx, Tv); + TT = ii[WS(rs, 1)]; + TP = W[0]; + T1T = T1Q - T1S; + T2V = T1Q + T1S; + T1U = Ts - Ty; + Tz = Ts + Ty; + TR = TP * TQ; + T25 = TP * TT; + } + TW = ri[WS(rs, 9)]; + TZ = ii[WS(rs, 9)]; + TV = W[16]; + TS = W[1]; + TY = W[17]; + } + { + E T13, T16, T14, T2d, T19, T1c, T18, T15, T1b; + { + E T26, TU, T28, T10, T27, TX, T12; + T13 = ri[WS(rs, 5)]; + T27 = TV * TZ; + TX = TV * TW; + T26 = FNMS(TS, TQ, T25); + TU = FMA(TS, TT, TR); + T28 = FNMS(TY, TW, T27); + T10 = FMA(TY, TZ, TX); + T16 = ii[WS(rs, 5)]; + T12 = W[8]; + T29 = T26 - T28; + T30 = T26 + T28; + T2c = TU - T10; + T11 = TU + T10; + T14 = T12 * T13; + T2d = T12 * T16; + } + T19 = ri[WS(rs, 13)]; + T1c = ii[WS(rs, 13)]; + T18 = W[24]; + T15 = W[9]; + T1b = W[25]; + { + E T2e, T17, T2g, T1d, T2f, T1a, TA; + TB = ri[WS(rs, 14)]; + T2f = T18 * T1c; + T1a = T18 * T19; + T2e = FNMS(T15, T13, T2d); + T17 = FMA(T15, T16, T14); + T2g = FNMS(T1b, T19, T2f); + T1d = FMA(T1b, T1c, T1a); + TE = ii[WS(rs, 14)]; + TA = W[26]; + T2h = T2e - T2g; + T31 = T2e + T2g; + T2a = T17 - T1d; + T1e = T17 + T1d; + TC = TA * TB; + T1X = TA * TE; + } + TH = ri[WS(rs, 6)]; + TK = ii[WS(rs, 6)]; + TG = W[10]; + TD = W[27]; + TJ = W[11]; + } + } + } + { + E T2U, T3u, T2Z, T21, T1W, T34, T2X, T3f, T32, T3t, T1H, T3q, T3e, TO, T3g; + E T37, T3r, T3s, T3h, T3i; + { + E Tm, T1Y, TF, T20, TL, T3p, T1Z, TI; + T2U = T8 - Tl; + Tm = T8 + Tl; + T1Z = TG * TK; + TI = TG * TH; + T1Y = FNMS(TD, TB, T1X); + TF = FMA(TD, TE, TC); + T20 = FNMS(TJ, TH, T1Z); + TL = FMA(TJ, TK, TI); + T3p = T3k + T3o; + T3u = T3o - T3k; + { + E T1f, TM, T1G, T3j, T2W, TN; + T2Z = T11 - T1e; + T1f = T11 + T1e; + T21 = T1Y - T20; + T2W = T1Y + T20; + T1W = TF - TL; + TM = TF + TL; + T1G = T1s + T1F; + T34 = T1s - T1F; + T2X = T2V - T2W; + T3j = T2V + T2W; + T3f = T30 + T31; + T32 = T30 - T31; + T3t = TM - Tz; + TN = Tz + TM; + T3r = T1G - T1f; + T1H = T1f + T1G; + T3s = T3p - T3j; + T3q = T3j + T3p; + T3e = Tm - TN; + TO = Tm + TN; + T3g = T35 + T36; + T37 = T35 - T36; + } + } + ii[WS(rs, 12)] = T3s - T3r; + ii[WS(rs, 4)] = T3r + T3s; + ri[0] = TO + T1H; + ri[WS(rs, 8)] = TO - T1H; + T3h = T3f - T3g; + T3i = T3f + T3g; + { + E T3a, T2Y, T3x, T3v, T3b, T33; + ii[0] = T3i + T3q; + ii[WS(rs, 8)] = T3q - T3i; + ri[WS(rs, 4)] = T3e + T3h; + ri[WS(rs, 12)] = T3e - T3h; + T3a = T2U - T2X; + T2Y = T2U + T2X; + T3x = T3u - T3t; + T3v = T3t + T3u; + T3b = T32 - T2Z; + T33 = T2Z + T32; + { + E T2E, T1O, T3B, T3H, T2x, T2q, T3C, T23, T2S, T2O, T2K, T2J, T3I, T2H, T2B; + E T2j; + { + E T2F, T1V, T22, T2G, T3c, T38; + T2E = T1I + T1N; + T1O = T1I - T1N; + T3B = T3z - T3A; + T3H = T3A + T3z; + T3c = T34 + T37; + T38 = T34 - T37; + T2F = T1U + T1T; + T1V = T1T - T1U; + { + E T3d, T3w, T3y, T39; + T3d = T3b - T3c; + T3w = T3b + T3c; + T3y = T38 - T33; + T39 = T33 + T38; + ri[WS(rs, 6)] = FMA(KP707106781, T3d, T3a); + ri[WS(rs, 14)] = FNMS(KP707106781, T3d, T3a); + ii[WS(rs, 10)] = FNMS(KP707106781, T3w, T3v); + ii[WS(rs, 2)] = FMA(KP707106781, T3w, T3v); + ii[WS(rs, 14)] = FNMS(KP707106781, T3y, T3x); + ii[WS(rs, 6)] = FMA(KP707106781, T3y, T3x); + ri[WS(rs, 2)] = FMA(KP707106781, T39, T2Y); + ri[WS(rs, 10)] = FNMS(KP707106781, T39, T2Y); + T22 = T1W + T21; + T2G = T1W - T21; + } + { + E T2M, T2N, T2b, T2i; + T2x = T2r - T2w; + T2M = T2r + T2w; + T2N = T2o - T2p; + T2q = T2o + T2p; + T3C = T1V + T22; + T23 = T1V - T22; + T2S = FMA(KP414213562, T2M, T2N); + T2O = FNMS(KP414213562, T2N, T2M); + T2K = T29 - T2a; + T2b = T29 + T2a; + T2i = T2c - T2h; + T2J = T2c + T2h; + T3I = T2G - T2F; + T2H = T2F + T2G; + T2B = FNMS(KP414213562, T2b, T2i); + T2j = FMA(KP414213562, T2i, T2b); + } + } + { + E T2R, T2L, T3L, T3M; + { + E T2A, T24, T2C, T2y, T3J, T3K, T2D, T2z; + T2A = FNMS(KP707106781, T23, T1O); + T24 = FMA(KP707106781, T23, T1O); + T2R = FNMS(KP414213562, T2J, T2K); + T2L = FMA(KP414213562, T2K, T2J); + T2C = FMA(KP414213562, T2q, T2x); + T2y = FNMS(KP414213562, T2x, T2q); + T3J = FMA(KP707106781, T3I, T3H); + T3L = FNMS(KP707106781, T3I, T3H); + T3K = T2C - T2B; + T2D = T2B + T2C; + T3M = T2j + T2y; + T2z = T2j - T2y; + ii[WS(rs, 11)] = FNMS(KP923879532, T3K, T3J); + ii[WS(rs, 3)] = FMA(KP923879532, T3K, T3J); + ri[WS(rs, 3)] = FMA(KP923879532, T2z, T24); + ri[WS(rs, 11)] = FNMS(KP923879532, T2z, T24); + ri[WS(rs, 15)] = FMA(KP923879532, T2D, T2A); + ri[WS(rs, 7)] = FNMS(KP923879532, T2D, T2A); + } + { + E T2Q, T3D, T3E, T2T, T2I, T2P; + T2Q = FNMS(KP707106781, T2H, T2E); + T2I = FMA(KP707106781, T2H, T2E); + T2P = T2L + T2O; + T3G = T2O - T2L; + T3F = FNMS(KP707106781, T3C, T3B); + T3D = FMA(KP707106781, T3C, T3B); + ii[WS(rs, 15)] = FMA(KP923879532, T3M, T3L); + ii[WS(rs, 7)] = FNMS(KP923879532, T3M, T3L); + ri[WS(rs, 1)] = FMA(KP923879532, T2P, T2I); + ri[WS(rs, 9)] = FNMS(KP923879532, T2P, T2I); + T3E = T2R + T2S; + T2T = T2R - T2S; + ii[WS(rs, 9)] = FNMS(KP923879532, T3E, T3D); + ii[WS(rs, 1)] = FMA(KP923879532, T3E, T3D); + ri[WS(rs, 5)] = FMA(KP923879532, T2T, T2Q); + ri[WS(rs, 13)] = FNMS(KP923879532, T2T, T2Q); + } + } + } + } + } + } + ii[WS(rs, 13)] = FNMS(KP923879532, T3G, T3F); + ii[WS(rs, 5)] = FMA(KP923879532, T3G, T3F); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 16}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 16, "t1_16", twinstr, &GENUS, {104, 30, 70, 0}, 0, 0, 0 }; + +void X(codelet_t1_16) (planner *p) { + X(kdft_dit_register) (p, t1_16, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 16 -name t1_16 -include t.h */ + +/* + * This function contains 174 FP additions, 84 FP multiplications, + * (or, 136 additions, 46 multiplications, 38 fused multiply/add), + * 52 stack variables, 3 constants, and 64 memory accesses + */ +#include "t.h" + +static void t1_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 30); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 30, MAKE_VOLATILE_STRIDE(32, rs)) { + E T7, T37, T1t, T2U, Ti, T38, T1w, T2R, Tu, T2s, T1C, T2c, TF, T2t, T1H; + E T2d, T1f, T1q, T2B, T2C, T2D, T2E, T1Z, T2j, T24, T2k, TS, T13, T2w, T2x; + E T2y, T2z, T1O, T2g, T1T, T2h; + { + E T1, T2T, T6, T2S; + T1 = ri[0]; + T2T = ii[0]; + { + E T3, T5, T2, T4; + T3 = ri[WS(rs, 8)]; + T5 = ii[WS(rs, 8)]; + T2 = W[14]; + T4 = W[15]; + T6 = FMA(T2, T3, T4 * T5); + T2S = FNMS(T4, T3, T2 * T5); + } + T7 = T1 + T6; + T37 = T2T - T2S; + T1t = T1 - T6; + T2U = T2S + T2T; + } + { + E Tc, T1u, Th, T1v; + { + E T9, Tb, T8, Ta; + T9 = ri[WS(rs, 4)]; + Tb = ii[WS(rs, 4)]; + T8 = W[6]; + Ta = W[7]; + Tc = FMA(T8, T9, Ta * Tb); + T1u = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = ri[WS(rs, 12)]; + Tg = ii[WS(rs, 12)]; + Td = W[22]; + Tf = W[23]; + Th = FMA(Td, Te, Tf * Tg); + T1v = FNMS(Tf, Te, Td * Tg); + } + Ti = Tc + Th; + T38 = Tc - Th; + T1w = T1u - T1v; + T2R = T1u + T1v; + } + { + E To, T1y, Tt, T1z, T1A, T1B; + { + E Tl, Tn, Tk, Tm; + Tl = ri[WS(rs, 2)]; + Tn = ii[WS(rs, 2)]; + Tk = W[2]; + Tm = W[3]; + To = FMA(Tk, Tl, Tm * Tn); + T1y = FNMS(Tm, Tl, Tk * Tn); + } + { + E Tq, Ts, Tp, Tr; + Tq = ri[WS(rs, 10)]; + Ts = ii[WS(rs, 10)]; + Tp = W[18]; + Tr = W[19]; + Tt = FMA(Tp, Tq, Tr * Ts); + T1z = FNMS(Tr, Tq, Tp * Ts); + } + Tu = To + Tt; + T2s = T1y + T1z; + T1A = T1y - T1z; + T1B = To - Tt; + T1C = T1A - T1B; + T2c = T1B + T1A; + } + { + E Tz, T1E, TE, T1F, T1D, T1G; + { + E Tw, Ty, Tv, Tx; + Tw = ri[WS(rs, 14)]; + Ty = ii[WS(rs, 14)]; + Tv = W[26]; + Tx = W[27]; + Tz = FMA(Tv, Tw, Tx * Ty); + T1E = FNMS(Tx, Tw, Tv * Ty); + } + { + E TB, TD, TA, TC; + TB = ri[WS(rs, 6)]; + TD = ii[WS(rs, 6)]; + TA = W[10]; + TC = W[11]; + TE = FMA(TA, TB, TC * TD); + T1F = FNMS(TC, TB, TA * TD); + } + TF = Tz + TE; + T2t = T1E + T1F; + T1D = Tz - TE; + T1G = T1E - T1F; + T1H = T1D + T1G; + T2d = T1D - T1G; + } + { + E T19, T20, T1p, T1X, T1e, T21, T1k, T1W; + { + E T16, T18, T15, T17; + T16 = ri[WS(rs, 15)]; + T18 = ii[WS(rs, 15)]; + T15 = W[28]; + T17 = W[29]; + T19 = FMA(T15, T16, T17 * T18); + T20 = FNMS(T17, T16, T15 * T18); + } + { + E T1m, T1o, T1l, T1n; + T1m = ri[WS(rs, 11)]; + T1o = ii[WS(rs, 11)]; + T1l = W[20]; + T1n = W[21]; + T1p = FMA(T1l, T1m, T1n * T1o); + T1X = FNMS(T1n, T1m, T1l * T1o); + } + { + E T1b, T1d, T1a, T1c; + T1b = ri[WS(rs, 7)]; + T1d = ii[WS(rs, 7)]; + T1a = W[12]; + T1c = W[13]; + T1e = FMA(T1a, T1b, T1c * T1d); + T21 = FNMS(T1c, T1b, T1a * T1d); + } + { + E T1h, T1j, T1g, T1i; + T1h = ri[WS(rs, 3)]; + T1j = ii[WS(rs, 3)]; + T1g = W[4]; + T1i = W[5]; + T1k = FMA(T1g, T1h, T1i * T1j); + T1W = FNMS(T1i, T1h, T1g * T1j); + } + T1f = T19 + T1e; + T1q = T1k + T1p; + T2B = T1f - T1q; + T2C = T20 + T21; + T2D = T1W + T1X; + T2E = T2C - T2D; + { + E T1V, T1Y, T22, T23; + T1V = T19 - T1e; + T1Y = T1W - T1X; + T1Z = T1V - T1Y; + T2j = T1V + T1Y; + T22 = T20 - T21; + T23 = T1k - T1p; + T24 = T22 + T23; + T2k = T22 - T23; + } + } + { + E TM, T1K, T12, T1R, TR, T1L, TX, T1Q; + { + E TJ, TL, TI, TK; + TJ = ri[WS(rs, 1)]; + TL = ii[WS(rs, 1)]; + TI = W[0]; + TK = W[1]; + TM = FMA(TI, TJ, TK * TL); + T1K = FNMS(TK, TJ, TI * TL); + } + { + E TZ, T11, TY, T10; + TZ = ri[WS(rs, 13)]; + T11 = ii[WS(rs, 13)]; + TY = W[24]; + T10 = W[25]; + T12 = FMA(TY, TZ, T10 * T11); + T1R = FNMS(T10, TZ, TY * T11); + } + { + E TO, TQ, TN, TP; + TO = ri[WS(rs, 9)]; + TQ = ii[WS(rs, 9)]; + TN = W[16]; + TP = W[17]; + TR = FMA(TN, TO, TP * TQ); + T1L = FNMS(TP, TO, TN * TQ); + } + { + E TU, TW, TT, TV; + TU = ri[WS(rs, 5)]; + TW = ii[WS(rs, 5)]; + TT = W[8]; + TV = W[9]; + TX = FMA(TT, TU, TV * TW); + T1Q = FNMS(TV, TU, TT * TW); + } + TS = TM + TR; + T13 = TX + T12; + T2w = TS - T13; + T2x = T1K + T1L; + T2y = T1Q + T1R; + T2z = T2x - T2y; + { + E T1M, T1N, T1P, T1S; + T1M = T1K - T1L; + T1N = TX - T12; + T1O = T1M + T1N; + T2g = T1M - T1N; + T1P = TM - TR; + T1S = T1Q - T1R; + T1T = T1P - T1S; + T2h = T1P + T1S; + } + } + { + E T1J, T27, T3g, T3i, T26, T3h, T2a, T3d; + { + E T1x, T1I, T3e, T3f; + T1x = T1t - T1w; + T1I = KP707106781 * (T1C - T1H); + T1J = T1x + T1I; + T27 = T1x - T1I; + T3e = KP707106781 * (T2d - T2c); + T3f = T38 + T37; + T3g = T3e + T3f; + T3i = T3f - T3e; + } + { + E T1U, T25, T28, T29; + T1U = FMA(KP923879532, T1O, KP382683432 * T1T); + T25 = FNMS(KP923879532, T24, KP382683432 * T1Z); + T26 = T1U + T25; + T3h = T25 - T1U; + T28 = FNMS(KP923879532, T1T, KP382683432 * T1O); + T29 = FMA(KP382683432, T24, KP923879532 * T1Z); + T2a = T28 - T29; + T3d = T28 + T29; + } + ri[WS(rs, 11)] = T1J - T26; + ii[WS(rs, 11)] = T3g - T3d; + ri[WS(rs, 3)] = T1J + T26; + ii[WS(rs, 3)] = T3d + T3g; + ri[WS(rs, 15)] = T27 - T2a; + ii[WS(rs, 15)] = T3i - T3h; + ri[WS(rs, 7)] = T27 + T2a; + ii[WS(rs, 7)] = T3h + T3i; + } + { + E T2v, T2H, T32, T34, T2G, T33, T2K, T2Z; + { + E T2r, T2u, T30, T31; + T2r = T7 - Ti; + T2u = T2s - T2t; + T2v = T2r + T2u; + T2H = T2r - T2u; + T30 = TF - Tu; + T31 = T2U - T2R; + T32 = T30 + T31; + T34 = T31 - T30; + } + { + E T2A, T2F, T2I, T2J; + T2A = T2w + T2z; + T2F = T2B - T2E; + T2G = KP707106781 * (T2A + T2F); + T33 = KP707106781 * (T2F - T2A); + T2I = T2z - T2w; + T2J = T2B + T2E; + T2K = KP707106781 * (T2I - T2J); + T2Z = KP707106781 * (T2I + T2J); + } + ri[WS(rs, 10)] = T2v - T2G; + ii[WS(rs, 10)] = T32 - T2Z; + ri[WS(rs, 2)] = T2v + T2G; + ii[WS(rs, 2)] = T2Z + T32; + ri[WS(rs, 14)] = T2H - T2K; + ii[WS(rs, 14)] = T34 - T33; + ri[WS(rs, 6)] = T2H + T2K; + ii[WS(rs, 6)] = T33 + T34; + } + { + E T2f, T2n, T3a, T3c, T2m, T3b, T2q, T35; + { + E T2b, T2e, T36, T39; + T2b = T1t + T1w; + T2e = KP707106781 * (T2c + T2d); + T2f = T2b + T2e; + T2n = T2b - T2e; + T36 = KP707106781 * (T1C + T1H); + T39 = T37 - T38; + T3a = T36 + T39; + T3c = T39 - T36; + } + { + E T2i, T2l, T2o, T2p; + T2i = FMA(KP382683432, T2g, KP923879532 * T2h); + T2l = FNMS(KP382683432, T2k, KP923879532 * T2j); + T2m = T2i + T2l; + T3b = T2l - T2i; + T2o = FNMS(KP382683432, T2h, KP923879532 * T2g); + T2p = FMA(KP923879532, T2k, KP382683432 * T2j); + T2q = T2o - T2p; + T35 = T2o + T2p; + } + ri[WS(rs, 9)] = T2f - T2m; + ii[WS(rs, 9)] = T3a - T35; + ri[WS(rs, 1)] = T2f + T2m; + ii[WS(rs, 1)] = T35 + T3a; + ri[WS(rs, 13)] = T2n - T2q; + ii[WS(rs, 13)] = T3c - T3b; + ri[WS(rs, 5)] = T2n + T2q; + ii[WS(rs, 5)] = T3b + T3c; + } + { + E TH, T2L, T2W, T2Y, T1s, T2X, T2O, T2P; + { + E Tj, TG, T2Q, T2V; + Tj = T7 + Ti; + TG = Tu + TF; + TH = Tj + TG; + T2L = Tj - TG; + T2Q = T2s + T2t; + T2V = T2R + T2U; + T2W = T2Q + T2V; + T2Y = T2V - T2Q; + } + { + E T14, T1r, T2M, T2N; + T14 = TS + T13; + T1r = T1f + T1q; + T1s = T14 + T1r; + T2X = T1r - T14; + T2M = T2x + T2y; + T2N = T2C + T2D; + T2O = T2M - T2N; + T2P = T2M + T2N; + } + ri[WS(rs, 8)] = TH - T1s; + ii[WS(rs, 8)] = T2W - T2P; + ri[0] = TH + T1s; + ii[0] = T2P + T2W; + ri[WS(rs, 12)] = T2L - T2O; + ii[WS(rs, 12)] = T2Y - T2X; + ri[WS(rs, 4)] = T2L + T2O; + ii[WS(rs, 4)] = T2X + T2Y; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 16}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 16, "t1_16", twinstr, &GENUS, {136, 46, 38, 0}, 0, 0, 0 }; + +void X(codelet_t1_16) (planner *p) { + X(kdft_dit_register) (p, t1_16, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:51 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 2 -name t1_2 -include t.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 4 additions, 2 multiplications, 2 fused multiply/add), + * 11 stack variables, 0 constants, and 8 memory accesses + */ +#include "t.h" + +static void t1_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + (mb * 2); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 2, MAKE_VOLATILE_STRIDE(4, rs)) { + E T1, Ta, T3, T6, T2, T5; + T1 = ri[0]; + Ta = ii[0]; + T3 = ri[WS(rs, 1)]; + T6 = ii[WS(rs, 1)]; + T2 = W[0]; + T5 = W[1]; + { + E T8, T4, T9, T7; + T8 = T2 * T6; + T4 = T2 * T3; + T9 = FNMS(T5, T3, T8); + T7 = FMA(T5, T6, T4); + ii[0] = T9 + Ta; + ii[WS(rs, 1)] = Ta - T9; + ri[0] = T1 + T7; + ri[WS(rs, 1)] = T1 - T7; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 2}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 2, "t1_2", twinstr, &GENUS, {4, 2, 2, 0}, 0, 0, 0 }; + +void X(codelet_t1_2) (planner *p) { + X(kdft_dit_register) (p, t1_2, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 2 -name t1_2 -include t.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 4 additions, 2 multiplications, 2 fused multiply/add), + * 9 stack variables, 0 constants, and 8 memory accesses + */ +#include "t.h" + +static void t1_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + (mb * 2); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 2, MAKE_VOLATILE_STRIDE(4, rs)) { + E T1, T8, T6, T7; + T1 = ri[0]; + T8 = ii[0]; + { + E T3, T5, T2, T4; + T3 = ri[WS(rs, 1)]; + T5 = ii[WS(rs, 1)]; + T2 = W[0]; + T4 = W[1]; + T6 = FMA(T2, T3, T4 * T5); + T7 = FNMS(T4, T3, T2 * T5); + } + ri[WS(rs, 1)] = T1 - T6; + ii[WS(rs, 1)] = T8 - T7; + ri[0] = T1 + T6; + ii[0] = T7 + T8; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 2}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 2, "t1_2", twinstr, &GENUS, {4, 2, 2, 0}, 0, 0, 0 }; + +void X(codelet_t1_2) (planner *p) { + X(kdft_dit_register) (p, t1_2, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1029 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:52 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 20 -name t1_20 -include t.h */ + +/* + * This function contains 246 FP additions, 148 FP multiplications, + * (or, 136 additions, 38 multiplications, 110 fused multiply/add), + * 97 stack variables, 4 constants, and 80 memory accesses + */ +#include "t.h" + +static void t1_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + (mb * 38); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 38, MAKE_VOLATILE_STRIDE(40, rs)) { + E T4P, T4Y, T50, T4U, T4S, T4T, T4Z, T4V; + { + E T4N, T4r, T8, T2i, T4n, T2n, T4O, Tl, T2v, T3v, T40, T4b, TN, T2b, T3F; + E T3i, T2R, T3z, T3W, T4f, T27, T2f, T3J, T3a, T2K, T3y, T3T, T4e, T1G, T2e; + E T3I, T33, T2C, T3w, T43, T4c, T1e, T2c, T3G, T3p; + { + E T1, T4q, T3, T6, T2, T5; + T1 = ri[0]; + T4q = ii[0]; + T3 = ri[WS(rs, 10)]; + T6 = ii[WS(rs, 10)]; + T2 = W[18]; + T5 = W[19]; + { + E Ta, Td, Tg, T2j, Tb, Tj, Tf, Tc, Ti; + { + E T4o, T4, T9, T4p, T7; + Ta = ri[WS(rs, 5)]; + Td = ii[WS(rs, 5)]; + T4o = T2 * T6; + T4 = T2 * T3; + T9 = W[8]; + Tg = ri[WS(rs, 15)]; + T4p = FNMS(T5, T3, T4o); + T7 = FMA(T5, T6, T4); + T2j = T9 * Td; + Tb = T9 * Ta; + T4N = T4q - T4p; + T4r = T4p + T4q; + T8 = T1 + T7; + T2i = T1 - T7; + Tj = ii[WS(rs, 15)]; + Tf = W[28]; + } + Tc = W[9]; + Ti = W[29]; + { + E T3d, Ts, T2t, TL, TB, TE, TD, T3f, Ty, T2q, TC; + { + E TH, TK, TJ, T2s, TI; + { + E To, Tr, Tp, T3c, Tq, TG; + { + E T2k, Te, T2m, Tk, T2l, Th, Tn; + To = ri[WS(rs, 4)]; + T2l = Tf * Tj; + Th = Tf * Tg; + T2k = FNMS(Tc, Ta, T2j); + Te = FMA(Tc, Td, Tb); + T2m = FNMS(Ti, Tg, T2l); + Tk = FMA(Ti, Tj, Th); + Tr = ii[WS(rs, 4)]; + Tn = W[6]; + T4n = T2k + T2m; + T2n = T2k - T2m; + T4O = Te - Tk; + Tl = Te + Tk; + Tp = Tn * To; + T3c = Tn * Tr; + } + Tq = W[7]; + TH = ri[WS(rs, 19)]; + TK = ii[WS(rs, 19)]; + TG = W[36]; + T3d = FNMS(Tq, To, T3c); + Ts = FMA(Tq, Tr, Tp); + TJ = W[37]; + T2s = TG * TK; + TI = TG * TH; + } + { + E Tu, Tx, Tt, Tw, T3e, Tv, TA; + Tu = ri[WS(rs, 14)]; + Tx = ii[WS(rs, 14)]; + T2t = FNMS(TJ, TH, T2s); + TL = FMA(TJ, TK, TI); + Tt = W[26]; + Tw = W[27]; + TB = ri[WS(rs, 9)]; + TE = ii[WS(rs, 9)]; + T3e = Tt * Tx; + Tv = Tt * Tu; + TA = W[16]; + TD = W[17]; + T3f = FNMS(Tw, Tu, T3e); + Ty = FMA(Tw, Tx, Tv); + T2q = TA * TE; + TC = TA * TB; + } + } + { + E T3g, T3Y, Tz, T2p, T2r, TF; + T3g = T3d - T3f; + T3Y = T3d + T3f; + Tz = Ts + Ty; + T2p = Ts - Ty; + T2r = FNMS(TD, TB, T2q); + TF = FMA(TD, TE, TC); + { + E T3Z, T2u, T3h, TM; + T3Z = T2r + T2t; + T2u = T2r - T2t; + T3h = TF - TL; + TM = TF + TL; + T2v = T2p - T2u; + T3v = T2p + T2u; + T40 = T3Y - T3Z; + T4b = T3Y + T3Z; + TN = Tz - TM; + T2b = Tz + TM; + T3F = T3g - T3h; + T3i = T3g + T3h; + } + } + } + } + } + { + E T35, T1M, T2P, T25, T1V, T1Y, T1X, T37, T1S, T2M, T1W; + { + E T21, T24, T23, T2O, T22; + { + E T1I, T1L, T1H, T1K, T34, T1J, T20; + T1I = ri[WS(rs, 12)]; + T1L = ii[WS(rs, 12)]; + T1H = W[22]; + T1K = W[23]; + T21 = ri[WS(rs, 7)]; + T24 = ii[WS(rs, 7)]; + T34 = T1H * T1L; + T1J = T1H * T1I; + T20 = W[12]; + T23 = W[13]; + T35 = FNMS(T1K, T1I, T34); + T1M = FMA(T1K, T1L, T1J); + T2O = T20 * T24; + T22 = T20 * T21; + } + { + E T1O, T1R, T1N, T1Q, T36, T1P, T1U; + T1O = ri[WS(rs, 2)]; + T1R = ii[WS(rs, 2)]; + T2P = FNMS(T23, T21, T2O); + T25 = FMA(T23, T24, T22); + T1N = W[2]; + T1Q = W[3]; + T1V = ri[WS(rs, 17)]; + T1Y = ii[WS(rs, 17)]; + T36 = T1N * T1R; + T1P = T1N * T1O; + T1U = W[32]; + T1X = W[33]; + T37 = FNMS(T1Q, T1O, T36); + T1S = FMA(T1Q, T1R, T1P); + T2M = T1U * T1Y; + T1W = T1U * T1V; + } + } + { + E T38, T3U, T1T, T2L, T2N, T1Z; + T38 = T35 - T37; + T3U = T35 + T37; + T1T = T1M + T1S; + T2L = T1M - T1S; + T2N = FNMS(T1X, T1V, T2M); + T1Z = FMA(T1X, T1Y, T1W); + { + E T3V, T2Q, T39, T26; + T3V = T2N + T2P; + T2Q = T2N - T2P; + T39 = T1Z - T25; + T26 = T1Z + T25; + T2R = T2L - T2Q; + T3z = T2L + T2Q; + T3W = T3U - T3V; + T4f = T3U + T3V; + T27 = T1T - T26; + T2f = T1T + T26; + T3J = T38 - T39; + T3a = T38 + T39; + } + } + } + { + E T2Y, T1l, T2I, T1E, T1u, T1x, T1w, T30, T1r, T2F, T1v; + { + E T1A, T1D, T1C, T2H, T1B; + { + E T1h, T1k, T1g, T1j, T2X, T1i, T1z; + T1h = ri[WS(rs, 8)]; + T1k = ii[WS(rs, 8)]; + T1g = W[14]; + T1j = W[15]; + T1A = ri[WS(rs, 3)]; + T1D = ii[WS(rs, 3)]; + T2X = T1g * T1k; + T1i = T1g * T1h; + T1z = W[4]; + T1C = W[5]; + T2Y = FNMS(T1j, T1h, T2X); + T1l = FMA(T1j, T1k, T1i); + T2H = T1z * T1D; + T1B = T1z * T1A; + } + { + E T1n, T1q, T1m, T1p, T2Z, T1o, T1t; + T1n = ri[WS(rs, 18)]; + T1q = ii[WS(rs, 18)]; + T2I = FNMS(T1C, T1A, T2H); + T1E = FMA(T1C, T1D, T1B); + T1m = W[34]; + T1p = W[35]; + T1u = ri[WS(rs, 13)]; + T1x = ii[WS(rs, 13)]; + T2Z = T1m * T1q; + T1o = T1m * T1n; + T1t = W[24]; + T1w = W[25]; + T30 = FNMS(T1p, T1n, T2Z); + T1r = FMA(T1p, T1q, T1o); + T2F = T1t * T1x; + T1v = T1t * T1u; + } + } + { + E T31, T3R, T1s, T2E, T2G, T1y; + T31 = T2Y - T30; + T3R = T2Y + T30; + T1s = T1l + T1r; + T2E = T1l - T1r; + T2G = FNMS(T1w, T1u, T2F); + T1y = FMA(T1w, T1x, T1v); + { + E T3S, T2J, T32, T1F; + T3S = T2G + T2I; + T2J = T2G - T2I; + T32 = T1y - T1E; + T1F = T1y + T1E; + T2K = T2E - T2J; + T3y = T2E + T2J; + T3T = T3R - T3S; + T4e = T3R + T3S; + T1G = T1s - T1F; + T2e = T1s + T1F; + T3I = T31 - T32; + T33 = T31 + T32; + } + } + } + { + E T3k, TT, T2A, T1c, T12, T15, T14, T3m, TZ, T2x, T13; + { + E T18, T1b, T1a, T2z, T19; + { + E TP, TS, TO, TR, T3j, TQ, T17; + TP = ri[WS(rs, 16)]; + TS = ii[WS(rs, 16)]; + TO = W[30]; + TR = W[31]; + T18 = ri[WS(rs, 11)]; + T1b = ii[WS(rs, 11)]; + T3j = TO * TS; + TQ = TO * TP; + T17 = W[20]; + T1a = W[21]; + T3k = FNMS(TR, TP, T3j); + TT = FMA(TR, TS, TQ); + T2z = T17 * T1b; + T19 = T17 * T18; + } + { + E TV, TY, TU, TX, T3l, TW, T11; + TV = ri[WS(rs, 6)]; + TY = ii[WS(rs, 6)]; + T2A = FNMS(T1a, T18, T2z); + T1c = FMA(T1a, T1b, T19); + TU = W[10]; + TX = W[11]; + T12 = ri[WS(rs, 1)]; + T15 = ii[WS(rs, 1)]; + T3l = TU * TY; + TW = TU * TV; + T11 = W[0]; + T14 = W[1]; + T3m = FNMS(TX, TV, T3l); + TZ = FMA(TX, TY, TW); + T2x = T11 * T15; + T13 = T11 * T12; + } + } + { + E T3n, T41, T10, T2w, T2y, T16; + T3n = T3k - T3m; + T41 = T3k + T3m; + T10 = TT + TZ; + T2w = TT - TZ; + T2y = FNMS(T14, T12, T2x); + T16 = FMA(T14, T15, T13); + { + E T42, T2B, T3o, T1d; + T42 = T2y + T2A; + T2B = T2y - T2A; + T3o = T16 - T1c; + T1d = T16 + T1c; + T2C = T2w - T2B; + T3w = T2w + T2B; + T43 = T41 - T42; + T4c = T41 + T42; + T1e = T10 - T1d; + T2c = T10 + T1d; + T3G = T3n - T3o; + T3p = T3n + T3o; + } + } + } + { + E T4s, T4k, T4l, T4h, T4j, T49, T4y, T4A, T48; + { + E T4D, T4C, T2a, T47, T45, T4B, T4M, T4K, T46, T3Q; + { + E Tm, T1f, T4J, T4I, T28, T3X, T44, T29, T3P, T3O; + T4D = T3T + T3W; + T3X = T3T - T3W; + T44 = T40 - T43; + T4C = T40 + T43; + T2a = T8 + Tl; + Tm = T8 - Tl; + T1f = TN + T1e; + T4J = TN - T1e; + T4I = T1G - T27; + T28 = T1G + T27; + T47 = FMA(KP618033988, T3X, T44); + T45 = FNMS(KP618033988, T44, T3X); + T29 = T1f + T28; + T3P = T1f - T28; + T4B = T4r - T4n; + T4s = T4n + T4r; + ri[WS(rs, 10)] = Tm + T29; + T3O = FNMS(KP250000000, T29, Tm); + T4M = FMA(KP618033988, T4I, T4J); + T4K = FNMS(KP618033988, T4J, T4I); + T46 = FMA(KP559016994, T3P, T3O); + T3Q = FNMS(KP559016994, T3P, T3O); + } + { + E T2d, T4w, T4x, T2g, T2h; + { + E T4d, T4G, T4F, T4g, T4E, T4L, T4H; + T4k = T4b + T4c; + T4d = T4b - T4c; + T4G = T4C - T4D; + T4E = T4C + T4D; + ri[WS(rs, 18)] = FMA(KP951056516, T45, T3Q); + ri[WS(rs, 2)] = FNMS(KP951056516, T45, T3Q); + ri[WS(rs, 6)] = FMA(KP951056516, T47, T46); + ri[WS(rs, 14)] = FNMS(KP951056516, T47, T46); + ii[WS(rs, 10)] = T4E + T4B; + T4F = FNMS(KP250000000, T4E, T4B); + T4g = T4e - T4f; + T4l = T4e + T4f; + T2d = T2b + T2c; + T4w = T2b - T2c; + T4L = FMA(KP559016994, T4G, T4F); + T4H = FNMS(KP559016994, T4G, T4F); + T4h = FMA(KP618033988, T4g, T4d); + T4j = FNMS(KP618033988, T4d, T4g); + ii[WS(rs, 18)] = FNMS(KP951056516, T4K, T4H); + ii[WS(rs, 2)] = FMA(KP951056516, T4K, T4H); + ii[WS(rs, 14)] = FMA(KP951056516, T4M, T4L); + ii[WS(rs, 6)] = FNMS(KP951056516, T4M, T4L); + T4x = T2e - T2f; + T2g = T2e + T2f; + } + T2h = T2d + T2g; + T49 = T2d - T2g; + T4y = FMA(KP618033988, T4x, T4w); + T4A = FNMS(KP618033988, T4w, T4x); + ri[0] = T2a + T2h; + T48 = FNMS(KP250000000, T2h, T2a); + } + } + { + E T3u, T51, T5a, T5c, T56, T54; + { + E T53, T52, T3t, T3r, T2o, T59, T58, T2T, T2V, T4u, T4t, T2U, T3s, T2W; + { + E T3b, T3q, T4i, T4a, T4m; + T53 = T33 + T3a; + T3b = T33 - T3a; + T3q = T3i - T3p; + T52 = T3i + T3p; + T4i = FNMS(KP559016994, T49, T48); + T4a = FMA(KP559016994, T49, T48); + T4m = T4k + T4l; + T4u = T4k - T4l; + ri[WS(rs, 16)] = FMA(KP951056516, T4h, T4a); + ri[WS(rs, 4)] = FNMS(KP951056516, T4h, T4a); + ri[WS(rs, 8)] = FMA(KP951056516, T4j, T4i); + ri[WS(rs, 12)] = FNMS(KP951056516, T4j, T4i); + ii[0] = T4m + T4s; + T4t = FNMS(KP250000000, T4m, T4s); + T3t = FMA(KP618033988, T3b, T3q); + T3r = FNMS(KP618033988, T3q, T3b); + } + T3u = T2i + T2n; + T2o = T2i - T2n; + { + E T4v, T4z, T2D, T2S; + T4v = FMA(KP559016994, T4u, T4t); + T4z = FNMS(KP559016994, T4u, T4t); + T2D = T2v + T2C; + T59 = T2v - T2C; + T58 = T2K - T2R; + T2S = T2K + T2R; + ii[WS(rs, 16)] = FNMS(KP951056516, T4y, T4v); + ii[WS(rs, 4)] = FMA(KP951056516, T4y, T4v); + ii[WS(rs, 12)] = FMA(KP951056516, T4A, T4z); + ii[WS(rs, 8)] = FNMS(KP951056516, T4A, T4z); + T2T = T2D + T2S; + T2V = T2D - T2S; + } + ri[WS(rs, 15)] = T2o + T2T; + T2U = FNMS(KP250000000, T2T, T2o); + T51 = T4O + T4N; + T4P = T4N - T4O; + T5a = FNMS(KP618033988, T59, T58); + T5c = FMA(KP618033988, T58, T59); + T3s = FMA(KP559016994, T2V, T2U); + T2W = FNMS(KP559016994, T2V, T2U); + ri[WS(rs, 7)] = FNMS(KP951056516, T3r, T2W); + ri[WS(rs, 3)] = FMA(KP951056516, T3r, T2W); + ri[WS(rs, 19)] = FNMS(KP951056516, T3t, T3s); + ri[WS(rs, 11)] = FMA(KP951056516, T3t, T3s); + T56 = T52 - T53; + T54 = T52 + T53; + } + { + E T4Q, T4R, T3N, T3L, T4W, T4X, T3B, T3D, T3H, T3K, T55, T3C, T3M, T3E; + T4Q = T3F + T3G; + T3H = T3F - T3G; + T3K = T3I - T3J; + T4R = T3I + T3J; + ii[WS(rs, 15)] = T54 + T51; + T55 = FNMS(KP250000000, T54, T51); + T3N = FNMS(KP618033988, T3H, T3K); + T3L = FMA(KP618033988, T3K, T3H); + { + E T57, T5b, T3x, T3A; + T57 = FNMS(KP559016994, T56, T55); + T5b = FMA(KP559016994, T56, T55); + T3x = T3v + T3w; + T4W = T3v - T3w; + T4X = T3y - T3z; + T3A = T3y + T3z; + ii[WS(rs, 7)] = FMA(KP951056516, T5a, T57); + ii[WS(rs, 3)] = FNMS(KP951056516, T5a, T57); + ii[WS(rs, 19)] = FMA(KP951056516, T5c, T5b); + ii[WS(rs, 11)] = FNMS(KP951056516, T5c, T5b); + T3B = T3x + T3A; + T3D = T3x - T3A; + } + ri[WS(rs, 5)] = T3u + T3B; + T3C = FNMS(KP250000000, T3B, T3u); + T4Y = FMA(KP618033988, T4X, T4W); + T50 = FNMS(KP618033988, T4W, T4X); + T3M = FNMS(KP559016994, T3D, T3C); + T3E = FMA(KP559016994, T3D, T3C); + ri[WS(rs, 9)] = FNMS(KP951056516, T3L, T3E); + ri[WS(rs, 1)] = FMA(KP951056516, T3L, T3E); + ri[WS(rs, 17)] = FNMS(KP951056516, T3N, T3M); + ri[WS(rs, 13)] = FMA(KP951056516, T3N, T3M); + T4U = T4Q - T4R; + T4S = T4Q + T4R; + } + } + } + } + ii[WS(rs, 5)] = T4S + T4P; + T4T = FNMS(KP250000000, T4S, T4P); + T4Z = FNMS(KP559016994, T4U, T4T); + T4V = FMA(KP559016994, T4U, T4T); + ii[WS(rs, 9)] = FMA(KP951056516, T4Y, T4V); + ii[WS(rs, 1)] = FNMS(KP951056516, T4Y, T4V); + ii[WS(rs, 17)] = FMA(KP951056516, T50, T4Z); + ii[WS(rs, 13)] = FNMS(KP951056516, T50, T4Z); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 20}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 20, "t1_20", twinstr, &GENUS, {136, 38, 110, 0}, 0, 0, 0 }; + +void X(codelet_t1_20) (planner *p) { + X(kdft_dit_register) (p, t1_20, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 20 -name t1_20 -include t.h */ + +/* + * This function contains 246 FP additions, 124 FP multiplications, + * (or, 184 additions, 62 multiplications, 62 fused multiply/add), + * 85 stack variables, 4 constants, and 80 memory accesses + */ +#include "t.h" + +static void t1_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + (mb * 38); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 38, MAKE_VOLATILE_STRIDE(40, rs)) { + E Tj, T1R, T4g, T4p, T2q, T37, T3Q, T42, T1r, T1O, T1P, T3i, T3l, T44, T3D; + E T3E, T3K, T1V, T1W, T1X, T23, T28, T4r, T2W, T2X, T4c, T33, T34, T35, T2G; + E T2L, T2M, TG, T13, T14, T3p, T3s, T43, T3A, T3B, T3J, T1S, T1T, T1U, T2e; + E T2j, T4q, T2T, T2U, T4b, T30, T31, T32, T2v, T2A, T2B; + { + E T1, T3O, T6, T3N, Tc, T2n, Th, T2o; + T1 = ri[0]; + T3O = ii[0]; + { + E T3, T5, T2, T4; + T3 = ri[WS(rs, 10)]; + T5 = ii[WS(rs, 10)]; + T2 = W[18]; + T4 = W[19]; + T6 = FMA(T2, T3, T4 * T5); + T3N = FNMS(T4, T3, T2 * T5); + } + { + E T9, Tb, T8, Ta; + T9 = ri[WS(rs, 5)]; + Tb = ii[WS(rs, 5)]; + T8 = W[8]; + Ta = W[9]; + Tc = FMA(T8, T9, Ta * Tb); + T2n = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = ri[WS(rs, 15)]; + Tg = ii[WS(rs, 15)]; + Td = W[28]; + Tf = W[29]; + Th = FMA(Td, Te, Tf * Tg); + T2o = FNMS(Tf, Te, Td * Tg); + } + { + E T7, Ti, T4e, T4f; + T7 = T1 + T6; + Ti = Tc + Th; + Tj = T7 - Ti; + T1R = T7 + Ti; + T4e = T3O - T3N; + T4f = Tc - Th; + T4g = T4e - T4f; + T4p = T4f + T4e; + } + { + E T2m, T2p, T3M, T3P; + T2m = T1 - T6; + T2p = T2n - T2o; + T2q = T2m - T2p; + T37 = T2m + T2p; + T3M = T2n + T2o; + T3P = T3N + T3O; + T3Q = T3M + T3P; + T42 = T3P - T3M; + } + } + { + E T1f, T3g, T21, T2C, T1N, T3k, T27, T2K, T1q, T3h, T22, T2F, T1C, T3j, T26; + E T2H; + { + E T19, T1Z, T1e, T20; + { + E T16, T18, T15, T17; + T16 = ri[WS(rs, 8)]; + T18 = ii[WS(rs, 8)]; + T15 = W[14]; + T17 = W[15]; + T19 = FMA(T15, T16, T17 * T18); + T1Z = FNMS(T17, T16, T15 * T18); + } + { + E T1b, T1d, T1a, T1c; + T1b = ri[WS(rs, 18)]; + T1d = ii[WS(rs, 18)]; + T1a = W[34]; + T1c = W[35]; + T1e = FMA(T1a, T1b, T1c * T1d); + T20 = FNMS(T1c, T1b, T1a * T1d); + } + T1f = T19 + T1e; + T3g = T1Z + T20; + T21 = T1Z - T20; + T2C = T19 - T1e; + } + { + E T1H, T2I, T1M, T2J; + { + E T1E, T1G, T1D, T1F; + T1E = ri[WS(rs, 17)]; + T1G = ii[WS(rs, 17)]; + T1D = W[32]; + T1F = W[33]; + T1H = FMA(T1D, T1E, T1F * T1G); + T2I = FNMS(T1F, T1E, T1D * T1G); + } + { + E T1J, T1L, T1I, T1K; + T1J = ri[WS(rs, 7)]; + T1L = ii[WS(rs, 7)]; + T1I = W[12]; + T1K = W[13]; + T1M = FMA(T1I, T1J, T1K * T1L); + T2J = FNMS(T1K, T1J, T1I * T1L); + } + T1N = T1H + T1M; + T3k = T2I + T2J; + T27 = T1H - T1M; + T2K = T2I - T2J; + } + { + E T1k, T2D, T1p, T2E; + { + E T1h, T1j, T1g, T1i; + T1h = ri[WS(rs, 13)]; + T1j = ii[WS(rs, 13)]; + T1g = W[24]; + T1i = W[25]; + T1k = FMA(T1g, T1h, T1i * T1j); + T2D = FNMS(T1i, T1h, T1g * T1j); + } + { + E T1m, T1o, T1l, T1n; + T1m = ri[WS(rs, 3)]; + T1o = ii[WS(rs, 3)]; + T1l = W[4]; + T1n = W[5]; + T1p = FMA(T1l, T1m, T1n * T1o); + T2E = FNMS(T1n, T1m, T1l * T1o); + } + T1q = T1k + T1p; + T3h = T2D + T2E; + T22 = T1k - T1p; + T2F = T2D - T2E; + } + { + E T1w, T24, T1B, T25; + { + E T1t, T1v, T1s, T1u; + T1t = ri[WS(rs, 12)]; + T1v = ii[WS(rs, 12)]; + T1s = W[22]; + T1u = W[23]; + T1w = FMA(T1s, T1t, T1u * T1v); + T24 = FNMS(T1u, T1t, T1s * T1v); + } + { + E T1y, T1A, T1x, T1z; + T1y = ri[WS(rs, 2)]; + T1A = ii[WS(rs, 2)]; + T1x = W[2]; + T1z = W[3]; + T1B = FMA(T1x, T1y, T1z * T1A); + T25 = FNMS(T1z, T1y, T1x * T1A); + } + T1C = T1w + T1B; + T3j = T24 + T25; + T26 = T24 - T25; + T2H = T1w - T1B; + } + T1r = T1f - T1q; + T1O = T1C - T1N; + T1P = T1r + T1O; + T3i = T3g - T3h; + T3l = T3j - T3k; + T44 = T3i + T3l; + T3D = T3g + T3h; + T3E = T3j + T3k; + T3K = T3D + T3E; + T1V = T1f + T1q; + T1W = T1C + T1N; + T1X = T1V + T1W; + T23 = T21 + T22; + T28 = T26 + T27; + T4r = T23 + T28; + T2W = T21 - T22; + T2X = T26 - T27; + T4c = T2W + T2X; + T33 = T2C + T2F; + T34 = T2H + T2K; + T35 = T33 + T34; + T2G = T2C - T2F; + T2L = T2H - T2K; + T2M = T2G + T2L; + } + { + E Tu, T3n, T2c, T2r, T12, T3r, T2i, T2z, TF, T3o, T2d, T2u, TR, T3q, T2h; + E T2w; + { + E To, T2a, Tt, T2b; + { + E Tl, Tn, Tk, Tm; + Tl = ri[WS(rs, 4)]; + Tn = ii[WS(rs, 4)]; + Tk = W[6]; + Tm = W[7]; + To = FMA(Tk, Tl, Tm * Tn); + T2a = FNMS(Tm, Tl, Tk * Tn); + } + { + E Tq, Ts, Tp, Tr; + Tq = ri[WS(rs, 14)]; + Ts = ii[WS(rs, 14)]; + Tp = W[26]; + Tr = W[27]; + Tt = FMA(Tp, Tq, Tr * Ts); + T2b = FNMS(Tr, Tq, Tp * Ts); + } + Tu = To + Tt; + T3n = T2a + T2b; + T2c = T2a - T2b; + T2r = To - Tt; + } + { + E TW, T2x, T11, T2y; + { + E TT, TV, TS, TU; + TT = ri[WS(rs, 1)]; + TV = ii[WS(rs, 1)]; + TS = W[0]; + TU = W[1]; + TW = FMA(TS, TT, TU * TV); + T2x = FNMS(TU, TT, TS * TV); + } + { + E TY, T10, TX, TZ; + TY = ri[WS(rs, 11)]; + T10 = ii[WS(rs, 11)]; + TX = W[20]; + TZ = W[21]; + T11 = FMA(TX, TY, TZ * T10); + T2y = FNMS(TZ, TY, TX * T10); + } + T12 = TW + T11; + T3r = T2x + T2y; + T2i = TW - T11; + T2z = T2x - T2y; + } + { + E Tz, T2s, TE, T2t; + { + E Tw, Ty, Tv, Tx; + Tw = ri[WS(rs, 9)]; + Ty = ii[WS(rs, 9)]; + Tv = W[16]; + Tx = W[17]; + Tz = FMA(Tv, Tw, Tx * Ty); + T2s = FNMS(Tx, Tw, Tv * Ty); + } + { + E TB, TD, TA, TC; + TB = ri[WS(rs, 19)]; + TD = ii[WS(rs, 19)]; + TA = W[36]; + TC = W[37]; + TE = FMA(TA, TB, TC * TD); + T2t = FNMS(TC, TB, TA * TD); + } + TF = Tz + TE; + T3o = T2s + T2t; + T2d = Tz - TE; + T2u = T2s - T2t; + } + { + E TL, T2f, TQ, T2g; + { + E TI, TK, TH, TJ; + TI = ri[WS(rs, 16)]; + TK = ii[WS(rs, 16)]; + TH = W[30]; + TJ = W[31]; + TL = FMA(TH, TI, TJ * TK); + T2f = FNMS(TJ, TI, TH * TK); + } + { + E TN, TP, TM, TO; + TN = ri[WS(rs, 6)]; + TP = ii[WS(rs, 6)]; + TM = W[10]; + TO = W[11]; + TQ = FMA(TM, TN, TO * TP); + T2g = FNMS(TO, TN, TM * TP); + } + TR = TL + TQ; + T3q = T2f + T2g; + T2h = T2f - T2g; + T2w = TL - TQ; + } + TG = Tu - TF; + T13 = TR - T12; + T14 = TG + T13; + T3p = T3n - T3o; + T3s = T3q - T3r; + T43 = T3p + T3s; + T3A = T3n + T3o; + T3B = T3q + T3r; + T3J = T3A + T3B; + T1S = Tu + TF; + T1T = TR + T12; + T1U = T1S + T1T; + T2e = T2c + T2d; + T2j = T2h + T2i; + T4q = T2e + T2j; + T2T = T2c - T2d; + T2U = T2h - T2i; + T4b = T2T + T2U; + T30 = T2r + T2u; + T31 = T2w + T2z; + T32 = T30 + T31; + T2v = T2r - T2u; + T2A = T2w - T2z; + T2B = T2v + T2A; + } + { + E T3e, T1Q, T3d, T3u, T3w, T3m, T3t, T3v, T3f; + T3e = KP559016994 * (T14 - T1P); + T1Q = T14 + T1P; + T3d = FNMS(KP250000000, T1Q, Tj); + T3m = T3i - T3l; + T3t = T3p - T3s; + T3u = FNMS(KP587785252, T3t, KP951056516 * T3m); + T3w = FMA(KP951056516, T3t, KP587785252 * T3m); + ri[WS(rs, 10)] = Tj + T1Q; + T3v = T3e + T3d; + ri[WS(rs, 14)] = T3v - T3w; + ri[WS(rs, 6)] = T3v + T3w; + T3f = T3d - T3e; + ri[WS(rs, 2)] = T3f - T3u; + ri[WS(rs, 18)] = T3f + T3u; + } + { + E T47, T45, T46, T41, T4a, T3Z, T40, T49, T48; + T47 = KP559016994 * (T43 - T44); + T45 = T43 + T44; + T46 = FNMS(KP250000000, T45, T42); + T3Z = T1r - T1O; + T40 = TG - T13; + T41 = FNMS(KP587785252, T40, KP951056516 * T3Z); + T4a = FMA(KP951056516, T40, KP587785252 * T3Z); + ii[WS(rs, 10)] = T45 + T42; + T49 = T47 + T46; + ii[WS(rs, 6)] = T49 - T4a; + ii[WS(rs, 14)] = T4a + T49; + T48 = T46 - T47; + ii[WS(rs, 2)] = T41 + T48; + ii[WS(rs, 18)] = T48 - T41; + } + { + E T3x, T1Y, T3y, T3G, T3I, T3C, T3F, T3H, T3z; + T3x = KP559016994 * (T1U - T1X); + T1Y = T1U + T1X; + T3y = FNMS(KP250000000, T1Y, T1R); + T3C = T3A - T3B; + T3F = T3D - T3E; + T3G = FMA(KP951056516, T3C, KP587785252 * T3F); + T3I = FNMS(KP587785252, T3C, KP951056516 * T3F); + ri[0] = T1R + T1Y; + T3H = T3y - T3x; + ri[WS(rs, 12)] = T3H - T3I; + ri[WS(rs, 8)] = T3H + T3I; + T3z = T3x + T3y; + ri[WS(rs, 4)] = T3z - T3G; + ri[WS(rs, 16)] = T3z + T3G; + } + { + E T3U, T3L, T3V, T3T, T3Y, T3R, T3S, T3X, T3W; + T3U = KP559016994 * (T3J - T3K); + T3L = T3J + T3K; + T3V = FNMS(KP250000000, T3L, T3Q); + T3R = T1S - T1T; + T3S = T1V - T1W; + T3T = FMA(KP951056516, T3R, KP587785252 * T3S); + T3Y = FNMS(KP587785252, T3R, KP951056516 * T3S); + ii[0] = T3L + T3Q; + T3X = T3V - T3U; + ii[WS(rs, 8)] = T3X - T3Y; + ii[WS(rs, 12)] = T3Y + T3X; + T3W = T3U + T3V; + ii[WS(rs, 4)] = T3T + T3W; + ii[WS(rs, 16)] = T3W - T3T; + } + { + E T2P, T2N, T2O, T2l, T2R, T29, T2k, T2S, T2Q; + T2P = KP559016994 * (T2B - T2M); + T2N = T2B + T2M; + T2O = FNMS(KP250000000, T2N, T2q); + T29 = T23 - T28; + T2k = T2e - T2j; + T2l = FNMS(KP587785252, T2k, KP951056516 * T29); + T2R = FMA(KP951056516, T2k, KP587785252 * T29); + ri[WS(rs, 15)] = T2q + T2N; + T2S = T2P + T2O; + ri[WS(rs, 11)] = T2R + T2S; + ri[WS(rs, 19)] = T2S - T2R; + T2Q = T2O - T2P; + ri[WS(rs, 3)] = T2l + T2Q; + ri[WS(rs, 7)] = T2Q - T2l; + } + { + E T4u, T4s, T4t, T4y, T4A, T4w, T4x, T4z, T4v; + T4u = KP559016994 * (T4q - T4r); + T4s = T4q + T4r; + T4t = FNMS(KP250000000, T4s, T4p); + T4w = T2G - T2L; + T4x = T2v - T2A; + T4y = FNMS(KP587785252, T4x, KP951056516 * T4w); + T4A = FMA(KP951056516, T4x, KP587785252 * T4w); + ii[WS(rs, 15)] = T4s + T4p; + T4z = T4u + T4t; + ii[WS(rs, 11)] = T4z - T4A; + ii[WS(rs, 19)] = T4A + T4z; + T4v = T4t - T4u; + ii[WS(rs, 3)] = T4v - T4y; + ii[WS(rs, 7)] = T4y + T4v; + } + { + E T36, T38, T39, T2Z, T3b, T2V, T2Y, T3c, T3a; + T36 = KP559016994 * (T32 - T35); + T38 = T32 + T35; + T39 = FNMS(KP250000000, T38, T37); + T2V = T2T - T2U; + T2Y = T2W - T2X; + T2Z = FMA(KP951056516, T2V, KP587785252 * T2Y); + T3b = FNMS(KP587785252, T2V, KP951056516 * T2Y); + ri[WS(rs, 5)] = T37 + T38; + T3c = T39 - T36; + ri[WS(rs, 13)] = T3b + T3c; + ri[WS(rs, 17)] = T3c - T3b; + T3a = T36 + T39; + ri[WS(rs, 1)] = T2Z + T3a; + ri[WS(rs, 9)] = T3a - T2Z; + } + { + E T4d, T4h, T4i, T4m, T4o, T4k, T4l, T4n, T4j; + T4d = KP559016994 * (T4b - T4c); + T4h = T4b + T4c; + T4i = FNMS(KP250000000, T4h, T4g); + T4k = T30 - T31; + T4l = T33 - T34; + T4m = FMA(KP951056516, T4k, KP587785252 * T4l); + T4o = FNMS(KP587785252, T4k, KP951056516 * T4l); + ii[WS(rs, 5)] = T4h + T4g; + T4n = T4i - T4d; + ii[WS(rs, 13)] = T4n - T4o; + ii[WS(rs, 17)] = T4o + T4n; + T4j = T4d + T4i; + ii[WS(rs, 1)] = T4j - T4m; + ii[WS(rs, 9)] = T4m + T4j; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 20}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 20, "t1_20", twinstr, &GENUS, {184, 62, 62, 0}, 0, 0, 0 }; + +void X(codelet_t1_20) (planner *p) { + X(kdft_dit_register) (p, t1_20, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1561 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:53 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 25 -name t1_25 -include t.h */ + +/* + * This function contains 400 FP additions, 364 FP multiplications, + * (or, 84 additions, 48 multiplications, 316 fused multiply/add), + * 181 stack variables, 47 constants, and 100 memory accesses + */ +#include "t.h" + +static void t1_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP860541664, +0.860541664367944677098261680920518816412804187); + DK(KP681693190, +0.681693190061530575150324149145440022633095390); + DK(KP560319534, +0.560319534973832390111614715371676131169633784); + DK(KP949179823, +0.949179823508441261575555465843363271711583843); + DK(KP557913902, +0.557913902031834264187699648465567037992437152); + DK(KP249506682, +0.249506682107067890488084201715862638334226305); + DK(KP906616052, +0.906616052148196230441134447086066874408359177); + DK(KP968479752, +0.968479752739016373193524836781420152702090879); + DK(KP621716863, +0.621716863012209892444754556304102309693593202); + DK(KP614372930, +0.614372930789563808870829930444362096004872855); + DK(KP845997307, +0.845997307939530944175097360758058292389769300); + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP994076283, +0.994076283785401014123185814696322018529298887); + DK(KP734762448, +0.734762448793050413546343770063151342619912334); + DK(KP772036680, +0.772036680810363904029489473607579825330539880); + DK(KP062914667, +0.062914667253649757225485955897349402364686947); + DK(KP803003575, +0.803003575438660414833440593570376004635464850); + DK(KP943557151, +0.943557151597354104399655195398983005179443399); + DK(KP554608978, +0.554608978404018097464974850792216217022558774); + DK(KP248028675, +0.248028675328619457762448260696444630363259177); + DK(KP726211448, +0.726211448929902658173535992263577167607493062); + DK(KP525970792, +0.525970792408939708442463226536226366643874659); + DK(KP921177326, +0.921177326965143320250447435415066029359282231); + DK(KP833417178, +0.833417178328688677408962550243238843138996060); + DK(KP541454447, +0.541454447536312777046285590082819509052033189); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP242145790, +0.242145790282157779872542093866183953459003101); + DK(KP851038619, +0.851038619207379630836264138867114231259902550); + DK(KP912575812, +0.912575812670962425556968549836277086778922727); + DK(KP559154169, +0.559154169276087864842202529084232643714075927); + DK(KP683113946, +0.683113946453479238701949862233725244439656928); + DK(KP912018591, +0.912018591466481957908415381764119056233607330); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP470564281, +0.470564281212251493087595091036643380879947982); + DK(KP827271945, +0.827271945972475634034355757144307982555673741); + DK(KP904730450, +0.904730450839922351881287709692877908104763647); + DK(KP126329378, +0.126329378446108174786050455341811215027378105); + DK(KP831864738, +0.831864738706457140726048799369896829771167132); + DK(KP549754652, +0.549754652192770074288023275540779861653779767); + DK(KP871714437, +0.871714437527667770979999223229522602943903653); + DK(KP634619297, +0.634619297544148100711287640319130485732531031); + DK(KP939062505, +0.939062505817492352556001843133229685779824606); + DK(KP256756360, +0.256756360367726783319498520922669048172391148); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + (mb * 48); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 48, MAKE_VOLATILE_STRIDE(50, rs)) { + E T7I, T6Q, T6O, T7O, T7M, T7H, T6P, T6H, T7J, T7N; + { + E T78, T5G, T3Y, T3M, T7C, T7c, T77, T6Y, Tt, T3L, T5T, T4P, T5Q, T4W, T3G; + E T2G, T5P, T4T, T5S, T4M, T65, T45, T68, T4c, T2Z, T11, T67, T49, T64, T42; + E T5Y, T4r, T61, T4k, T3d, T1z, T60, T4h, T5X, T4o, T3g, T1G, T3q, T4z, T4G; + E T26, T3i, T1M, T3k, T1S; + { + E T3u, T2e, T3E, T4O, T4V, T2E, T3w, T2k, T3y, T2q; + { + E T1, T6X, T3P, T7, T3W, Tq, T9, Tc, Tb, T3U, Tk, T3Q, Ta; + { + E T3, T6, T2, T5; + T1 = ri[0]; + T6X = ii[0]; + T3 = ri[WS(rs, 5)]; + T6 = ii[WS(rs, 5)]; + T2 = W[8]; + T5 = W[9]; + { + E Tm, Tp, To, T3V, Tn, T3O, T4, Tl; + Tm = ri[WS(rs, 15)]; + Tp = ii[WS(rs, 15)]; + T3O = T2 * T6; + T4 = T2 * T3; + Tl = W[28]; + To = W[29]; + T3P = FNMS(T5, T3, T3O); + T7 = FMA(T5, T6, T4); + T3V = Tl * Tp; + Tn = Tl * Tm; + { + E Tg, Tj, Tf, Ti, T3T, Th, T8; + Tg = ri[WS(rs, 10)]; + Tj = ii[WS(rs, 10)]; + T3W = FNMS(To, Tm, T3V); + Tq = FMA(To, Tp, Tn); + Tf = W[18]; + Ti = W[19]; + T9 = ri[WS(rs, 20)]; + Tc = ii[WS(rs, 20)]; + T3T = Tf * Tj; + Th = Tf * Tg; + T8 = W[38]; + Tb = W[39]; + T3U = FNMS(Ti, Tg, T3T); + Tk = FMA(Ti, Tj, Th); + T3Q = T8 * Tc; + Ta = T8 * T9; + } + } + } + { + E T6V, T3X, T7b, Tr, T3R, Td; + T6V = T3U + T3W; + T3X = T3U - T3W; + T7b = Tk - Tq; + Tr = Tk + Tq; + T3R = FNMS(Tb, T9, T3Q); + Td = FMA(Tb, Tc, Ta); + { + E T3S, T7a, Te, T6W, T6U, Ts; + T3S = T3P - T3R; + T6U = T3P + T3R; + T7a = T7 - Td; + Te = T7 + Td; + T78 = T6U - T6V; + T6W = T6U + T6V; + T5G = FNMS(KP618033988, T3S, T3X); + T3Y = FMA(KP618033988, T3X, T3S); + T3M = Te - Tr; + Ts = Te + Tr; + T7C = FNMS(KP618033988, T7a, T7b); + T7c = FMA(KP618033988, T7b, T7a); + T77 = FNMS(KP250000000, T6W, T6X); + T6Y = T6W + T6X; + Tt = T1 + Ts; + T3L = FNMS(KP250000000, Ts, T1); + } + } + } + { + E T2g, T2j, T2m, T3v, T2h, T2p, T2l, T2i, T2o, T3x, T2n; + { + E T2a, T2d, T29, T2c; + T2a = ri[WS(rs, 3)]; + T2d = ii[WS(rs, 3)]; + T29 = W[4]; + T2c = W[5]; + { + E T2t, T2w, T2z, T3A, T2u, T2C, T2y, T2v, T2B, T3t, T2b, T2s, T2f; + T2t = ri[WS(rs, 13)]; + T2w = ii[WS(rs, 13)]; + T3t = T29 * T2d; + T2b = T29 * T2a; + T2s = W[24]; + T2z = ri[WS(rs, 18)]; + T3u = FNMS(T2c, T2a, T3t); + T2e = FMA(T2c, T2d, T2b); + T3A = T2s * T2w; + T2u = T2s * T2t; + T2C = ii[WS(rs, 18)]; + T2y = W[34]; + T2v = W[25]; + T2B = W[35]; + { + E T3B, T2x, T3D, T2D, T3C, T2A; + T2g = ri[WS(rs, 8)]; + T3C = T2y * T2C; + T2A = T2y * T2z; + T3B = FNMS(T2v, T2t, T3A); + T2x = FMA(T2v, T2w, T2u); + T3D = FNMS(T2B, T2z, T3C); + T2D = FMA(T2B, T2C, T2A); + T2j = ii[WS(rs, 8)]; + T2f = W[14]; + T3E = T3B + T3D; + T4O = T3D - T3B; + T4V = T2x - T2D; + T2E = T2x + T2D; + } + T2m = ri[WS(rs, 23)]; + T3v = T2f * T2j; + T2h = T2f * T2g; + T2p = ii[WS(rs, 23)]; + T2l = W[44]; + T2i = W[15]; + T2o = W[45]; + } + } + T3x = T2l * T2p; + T2n = T2l * T2m; + T3w = FNMS(T2i, T2g, T3v); + T2k = FMA(T2i, T2j, T2h); + T3y = FNMS(T2o, T2m, T3x); + T2q = FMA(T2o, T2p, T2n); + } + { + E T2N, Tz, T2X, T44, T4b, TZ, T2P, TF, T2R, TL; + { + E TB, TE, TH, T2O, TC, TK, TG, TD, TJ, T2Q, TI; + { + E Tv, Ty, Tu, Tx; + { + E T4S, T4L, T4R, T4K, T4N, T3z; + Tv = ri[WS(rs, 1)]; + T4N = T3y - T3w; + T3z = T3w + T3y; + { + E T4U, T2r, T3F, T2F; + T4U = T2k - T2q; + T2r = T2k + T2q; + T5T = FNMS(KP618033988, T4N, T4O); + T4P = FMA(KP618033988, T4O, T4N); + T3F = T3z + T3E; + T4S = T3E - T3z; + T5Q = FNMS(KP618033988, T4U, T4V); + T4W = FMA(KP618033988, T4V, T4U); + T2F = T2r + T2E; + T4L = T2E - T2r; + T3G = T3u + T3F; + T4R = FNMS(KP250000000, T3F, T3u); + T2G = T2e + T2F; + T4K = FNMS(KP250000000, T2F, T2e); + Ty = ii[WS(rs, 1)]; + } + T5P = FMA(KP559016994, T4S, T4R); + T4T = FNMS(KP559016994, T4S, T4R); + T5S = FMA(KP559016994, T4L, T4K); + T4M = FNMS(KP559016994, T4L, T4K); + Tu = W[0]; + } + Tx = W[1]; + { + E TO, TR, TU, T2T, TP, TX, TT, TQ, TW, T2M, Tw, TN, TA; + TO = ri[WS(rs, 11)]; + TR = ii[WS(rs, 11)]; + T2M = Tu * Ty; + Tw = Tu * Tv; + TN = W[20]; + TU = ri[WS(rs, 16)]; + T2N = FNMS(Tx, Tv, T2M); + Tz = FMA(Tx, Ty, Tw); + T2T = TN * TR; + TP = TN * TO; + TX = ii[WS(rs, 16)]; + TT = W[30]; + TQ = W[21]; + TW = W[31]; + { + E T2U, TS, T2W, TY, T2V, TV; + TB = ri[WS(rs, 6)]; + T2V = TT * TX; + TV = TT * TU; + T2U = FNMS(TQ, TO, T2T); + TS = FMA(TQ, TR, TP); + T2W = FNMS(TW, TU, T2V); + TY = FMA(TW, TX, TV); + TE = ii[WS(rs, 6)]; + TA = W[10]; + T2X = T2U + T2W; + T44 = T2W - T2U; + T4b = TY - TS; + TZ = TS + TY; + } + TH = ri[WS(rs, 21)]; + T2O = TA * TE; + TC = TA * TB; + TK = ii[WS(rs, 21)]; + TG = W[40]; + TD = W[11]; + TJ = W[41]; + } + } + T2Q = TG * TK; + TI = TG * TH; + T2P = FNMS(TD, TB, T2O); + TF = FMA(TD, TE, TC); + T2R = FNMS(TJ, TH, T2Q); + TL = FMA(TJ, TK, TI); + } + { + E T31, T17, T3b, T4q, T4j, T1x, T33, T1d, T35, T1j; + { + E T19, T1c, T1f, T32, T1a, T1i, T1e, T1b, T1h, T34, T1g; + { + E T13, T16, T12, T15; + { + E T48, T41, T47, T40, T43, T2S; + T13 = ri[WS(rs, 4)]; + T43 = T2P - T2R; + T2S = T2P + T2R; + { + E T4a, TM, T2Y, T10; + T4a = TL - TF; + TM = TF + TL; + T65 = FMA(KP618033988, T43, T44); + T45 = FNMS(KP618033988, T44, T43); + T2Y = T2S + T2X; + T48 = T2S - T2X; + T68 = FNMS(KP618033988, T4a, T4b); + T4c = FMA(KP618033988, T4b, T4a); + T10 = TM + TZ; + T41 = TM - TZ; + T2Z = T2N + T2Y; + T47 = FNMS(KP250000000, T2Y, T2N); + T11 = Tz + T10; + T40 = FNMS(KP250000000, T10, Tz); + T16 = ii[WS(rs, 4)]; + } + T67 = FNMS(KP559016994, T48, T47); + T49 = FMA(KP559016994, T48, T47); + T64 = FNMS(KP559016994, T41, T40); + T42 = FMA(KP559016994, T41, T40); + T12 = W[6]; + } + T15 = W[7]; + { + E T1m, T1p, T1s, T37, T1n, T1v, T1r, T1o, T1u, T30, T14, T1l, T18; + T1m = ri[WS(rs, 14)]; + T1p = ii[WS(rs, 14)]; + T30 = T12 * T16; + T14 = T12 * T13; + T1l = W[26]; + T1s = ri[WS(rs, 19)]; + T31 = FNMS(T15, T13, T30); + T17 = FMA(T15, T16, T14); + T37 = T1l * T1p; + T1n = T1l * T1m; + T1v = ii[WS(rs, 19)]; + T1r = W[36]; + T1o = W[27]; + T1u = W[37]; + { + E T38, T1q, T3a, T1w, T39, T1t; + T19 = ri[WS(rs, 9)]; + T39 = T1r * T1v; + T1t = T1r * T1s; + T38 = FNMS(T1o, T1m, T37); + T1q = FMA(T1o, T1p, T1n); + T3a = FNMS(T1u, T1s, T39); + T1w = FMA(T1u, T1v, T1t); + T1c = ii[WS(rs, 9)]; + T18 = W[16]; + T3b = T38 + T3a; + T4q = T3a - T38; + T4j = T1w - T1q; + T1x = T1q + T1w; + } + T1f = ri[WS(rs, 24)]; + T32 = T18 * T1c; + T1a = T18 * T19; + T1i = ii[WS(rs, 24)]; + T1e = W[46]; + T1b = W[17]; + T1h = W[47]; + } + } + T34 = T1e * T1i; + T1g = T1e * T1f; + T33 = FNMS(T1b, T19, T32); + T1d = FMA(T1b, T1c, T1a); + T35 = FNMS(T1h, T1f, T34); + T1j = FMA(T1h, T1i, T1g); + } + { + E T1I, T1L, T1O, T3h, T1J, T1R, T1N, T1K, T1Q, T3j, T1P; + { + E T1C, T1F, T1B, T1E; + { + E T4g, T4n, T4f, T4m, T4p, T36; + T1C = ri[WS(rs, 2)]; + T4p = T35 - T33; + T36 = T33 + T35; + { + E T4i, T1k, T3c, T1y; + T4i = T1j - T1d; + T1k = T1d + T1j; + T5Y = FNMS(KP618033988, T4p, T4q); + T4r = FMA(KP618033988, T4q, T4p); + T3c = T36 + T3b; + T4g = T3b - T36; + T61 = FNMS(KP618033988, T4i, T4j); + T4k = FMA(KP618033988, T4j, T4i); + T1y = T1k + T1x; + T4n = T1k - T1x; + T3d = T31 + T3c; + T4f = FNMS(KP250000000, T3c, T31); + T1z = T17 + T1y; + T4m = FNMS(KP250000000, T1y, T17); + T1F = ii[WS(rs, 2)]; + } + T60 = FMA(KP559016994, T4g, T4f); + T4h = FNMS(KP559016994, T4g, T4f); + T5X = FNMS(KP559016994, T4n, T4m); + T4o = FMA(KP559016994, T4n, T4m); + T1B = W[2]; + } + T1E = W[3]; + { + E T1V, T1Y, T21, T3m, T1W, T24, T20, T1X, T23, T3f, T1D, T1U, T1H; + T1V = ri[WS(rs, 12)]; + T1Y = ii[WS(rs, 12)]; + T3f = T1B * T1F; + T1D = T1B * T1C; + T1U = W[22]; + T21 = ri[WS(rs, 17)]; + T3g = FNMS(T1E, T1C, T3f); + T1G = FMA(T1E, T1F, T1D); + T3m = T1U * T1Y; + T1W = T1U * T1V; + T24 = ii[WS(rs, 17)]; + T20 = W[32]; + T1X = W[23]; + T23 = W[33]; + { + E T3n, T1Z, T3p, T25, T3o, T22; + T1I = ri[WS(rs, 7)]; + T3o = T20 * T24; + T22 = T20 * T21; + T3n = FNMS(T1X, T1V, T3m); + T1Z = FMA(T1X, T1Y, T1W); + T3p = FNMS(T23, T21, T3o); + T25 = FMA(T23, T24, T22); + T1L = ii[WS(rs, 7)]; + T1H = W[12]; + T3q = T3n + T3p; + T4z = T3n - T3p; + T4G = T25 - T1Z; + T26 = T1Z + T25; + } + T1O = ri[WS(rs, 22)]; + T3h = T1H * T1L; + T1J = T1H * T1I; + T1R = ii[WS(rs, 22)]; + T1N = W[42]; + T1K = W[13]; + T1Q = W[43]; + } + } + T3j = T1N * T1R; + T1P = T1N * T1O; + T3i = FNMS(T1K, T1I, T3h); + T1M = FMA(T1K, T1L, T1J); + T3k = FNMS(T1Q, T1O, T3j); + T1S = FMA(T1Q, T1R, T1P); + } + } + } + } + { + E T6R, T5M, T4A, T5J, T4H, T6S, T5I, T4E, T5L, T4x, T3K, T3I, T2K, T74, T76; + E T2J; + { + E T1A, T72, T73, T2H, T28, T2I; + { + E T3e, T4D, T4w, T4C, T4v, T3H, T4y, T3l; + T6R = T2Z + T3d; + T3e = T2Z - T3d; + T4y = T3k - T3i; + T3l = T3i + T3k; + { + E T4F, T1T, T3r, T27, T3s; + T4F = T1S - T1M; + T1T = T1M + T1S; + T5M = FMA(KP618033988, T4y, T4z); + T4A = FNMS(KP618033988, T4z, T4y); + T3r = T3l + T3q; + T4D = T3q - T3l; + T5J = FNMS(KP618033988, T4F, T4G); + T4H = FMA(KP618033988, T4G, T4F); + T27 = T1T + T26; + T4w = T26 - T1T; + T3s = T3g + T3r; + T4C = FNMS(KP250000000, T3r, T3g); + T28 = T1G + T27; + T4v = FNMS(KP250000000, T27, T1G); + T3H = T3s - T3G; + T6S = T3s + T3G; + } + T5I = FMA(KP559016994, T4D, T4C); + T4E = FNMS(KP559016994, T4D, T4C); + T5L = FMA(KP559016994, T4w, T4v); + T4x = FNMS(KP559016994, T4w, T4v); + T3K = FNMS(KP618033988, T3e, T3H); + T3I = FMA(KP618033988, T3H, T3e); + } + T1A = T11 + T1z; + T72 = T11 - T1z; + T73 = T28 - T2G; + T2H = T28 + T2G; + T2I = T1A + T2H; + T2K = T1A - T2H; + T74 = FMA(KP618033988, T73, T72); + T76 = FNMS(KP618033988, T72, T73); + ri[0] = Tt + T2I; + T2J = FNMS(KP250000000, T2I, Tt); + } + { + E T5F, T7B, T7u, T5E, T5C, T7A, T7y, T7t, T5D, T5v; + { + E T3Z, T5d, T7p, T7d, T5m, T5l, T56, T7k, T59, T7l, T5z, T5g, T7g, T7i, T52; + E T50, T5x, T5q, T5A, T5j, T70, T6Z, T3N; + T5F = FNMS(KP559016994, T3M, T3L); + T3N = FMA(KP559016994, T3M, T3L); + { + E T79, T3J, T2L, T6T; + T79 = FMA(KP559016994, T78, T77); + T7B = FNMS(KP559016994, T78, T77); + T3J = FNMS(KP559016994, T2K, T2J); + T2L = FMA(KP559016994, T2K, T2J); + T6T = T6R + T6S; + T70 = T6R - T6S; + T3Z = FMA(KP951056516, T3Y, T3N); + T5d = FNMS(KP951056516, T3Y, T3N); + ri[WS(rs, 5)] = FMA(KP951056516, T3I, T2L); + ri[WS(rs, 20)] = FNMS(KP951056516, T3I, T2L); + ri[WS(rs, 15)] = FMA(KP951056516, T3K, T3J); + ri[WS(rs, 10)] = FNMS(KP951056516, T3K, T3J); + ii[0] = T6T + T6Y; + T6Z = FNMS(KP250000000, T6T, T6Y); + T7p = FMA(KP951056516, T7c, T79); + T7d = FNMS(KP951056516, T7c, T79); + } + { + E T5e, T54, T4e, T5f, T5o, T5p, T5i, T4B, T58, T4Y, T55, T4t, T4I, T5h; + { + E T4Q, T4X, T4l, T4s; + { + E T46, T71, T75, T4d; + T5m = FNMS(KP951056516, T45, T42); + T46 = FMA(KP951056516, T45, T42); + T71 = FMA(KP559016994, T70, T6Z); + T75 = FNMS(KP559016994, T70, T6Z); + T4d = FMA(KP951056516, T4c, T49); + T5l = FNMS(KP951056516, T4c, T49); + T5e = FMA(KP951056516, T4P, T4M); + T4Q = FNMS(KP951056516, T4P, T4M); + ii[WS(rs, 20)] = FMA(KP951056516, T74, T71); + ii[WS(rs, 5)] = FNMS(KP951056516, T74, T71); + ii[WS(rs, 15)] = FNMS(KP951056516, T76, T75); + ii[WS(rs, 10)] = FMA(KP951056516, T76, T75); + T54 = FNMS(KP256756360, T46, T4d); + T4e = FMA(KP256756360, T4d, T46); + T4X = FNMS(KP951056516, T4W, T4T); + T5f = FMA(KP951056516, T4W, T4T); + } + T5o = FNMS(KP951056516, T4k, T4h); + T4l = FMA(KP951056516, T4k, T4h); + T4s = FNMS(KP951056516, T4r, T4o); + T5p = FMA(KP951056516, T4r, T4o); + T5i = FMA(KP951056516, T4A, T4x); + T4B = FNMS(KP951056516, T4A, T4x); + T58 = FNMS(KP939062505, T4Q, T4X); + T4Y = FMA(KP939062505, T4X, T4Q); + T55 = FNMS(KP634619297, T4l, T4s); + T4t = FMA(KP634619297, T4s, T4l); + T4I = FMA(KP951056516, T4H, T4E); + T5h = FNMS(KP951056516, T4H, T4E); + } + { + E T7e, T4u, T57, T4J, T7f, T4Z; + T7e = FNMS(KP871714437, T55, T54); + T56 = FMA(KP871714437, T55, T54); + T4u = FMA(KP871714437, T4t, T4e); + T7k = FNMS(KP871714437, T4t, T4e); + T57 = FNMS(KP549754652, T4B, T4I); + T4J = FMA(KP549754652, T4I, T4B); + T7f = FMA(KP831864738, T58, T57); + T59 = FNMS(KP831864738, T58, T57); + T4Z = FMA(KP831864738, T4Y, T4J); + T7l = FNMS(KP831864738, T4Y, T4J); + T5z = FMA(KP126329378, T5e, T5f); + T5g = FNMS(KP126329378, T5f, T5e); + T7g = FMA(KP904730450, T7f, T7e); + T7i = FNMS(KP904730450, T7f, T7e); + T52 = FNMS(KP904730450, T4Z, T4u); + T50 = FMA(KP904730450, T4Z, T4u); + } + T5x = FNMS(KP827271945, T5o, T5p); + T5q = FMA(KP827271945, T5p, T5o); + T5A = FMA(KP470564281, T5h, T5i); + T5j = FNMS(KP470564281, T5i, T5h); + } + { + E T7q, T5B, T5k, T7x, T5w, T5n; + ri[WS(rs, 1)] = FMA(KP968583161, T50, T3Z); + T7q = FMA(KP912018591, T5A, T5z); + T5B = FNMS(KP912018591, T5A, T5z); + T5k = FNMS(KP912018591, T5j, T5g); + T7x = FMA(KP912018591, T5j, T5g); + T5w = FNMS(KP634619297, T5l, T5m); + T5n = FMA(KP634619297, T5m, T5l); + ii[WS(rs, 1)] = FMA(KP968583161, T7g, T7d); + { + E T5y, T7w, T7s, T5s, T5u, T7o, T7m, T7n, T7j, T5t; + { + E T5c, T5a, T51, T7r, T5r, T53, T5b, T7h; + T5c = FNMS(KP683113946, T56, T59); + T5a = FMA(KP559154169, T59, T56); + T7r = FNMS(KP912575812, T5x, T5w); + T5y = FMA(KP912575812, T5x, T5w); + T5r = FNMS(KP912575812, T5q, T5n); + T7w = FMA(KP912575812, T5q, T5n); + T7s = FMA(KP851038619, T7r, T7q); + T7u = FNMS(KP851038619, T7r, T7q); + T5s = FNMS(KP851038619, T5r, T5k); + T5u = FMA(KP851038619, T5r, T5k); + T51 = FNMS(KP242145790, T50, T3Z); + ii[WS(rs, 4)] = FNMS(KP992114701, T7s, T7p); + ri[WS(rs, 4)] = FNMS(KP992114701, T5s, T5d); + T7o = FNMS(KP683113946, T7k, T7l); + T7m = FMA(KP559154169, T7l, T7k); + T53 = FMA(KP541454447, T52, T51); + T5b = FNMS(KP541454447, T52, T51); + T7h = FNMS(KP242145790, T7g, T7d); + ri[WS(rs, 11)] = FNMS(KP833417178, T5c, T5b); + ri[WS(rs, 16)] = FMA(KP833417178, T5c, T5b); + ri[WS(rs, 21)] = FNMS(KP921177326, T5a, T53); + ri[WS(rs, 6)] = FMA(KP921177326, T5a, T53); + T7n = FNMS(KP541454447, T7i, T7h); + T7j = FMA(KP541454447, T7i, T7h); + } + T5E = FMA(KP525970792, T5y, T5B); + T5C = FNMS(KP726211448, T5B, T5y); + ii[WS(rs, 21)] = FMA(KP921177326, T7m, T7j); + ii[WS(rs, 6)] = FNMS(KP921177326, T7m, T7j); + ii[WS(rs, 11)] = FMA(KP833417178, T7o, T7n); + ii[WS(rs, 16)] = FNMS(KP833417178, T7o, T7n); + T5t = FMA(KP248028675, T5s, T5d); + T7A = FNMS(KP525970792, T7w, T7x); + T7y = FMA(KP726211448, T7x, T7w); + T7t = FMA(KP248028675, T7s, T7p); + T5D = FNMS(KP554608978, T5u, T5t); + T5v = FMA(KP554608978, T5u, T5t); + } + } + } + { + E T5H, T6p, T7P, T7D, T6y, T6x, T6l, T7X, T6i, T7W, T6L, T6s, T7S, T7U, T6e; + E T6c, T6J, T6C, T6M, T6v, T7z, T7v; + ri[WS(rs, 14)] = FNMS(KP943557151, T5E, T5D); + ri[WS(rs, 19)] = FMA(KP943557151, T5E, T5D); + ri[WS(rs, 24)] = FMA(KP803003575, T5C, T5v); + ri[WS(rs, 9)] = FNMS(KP803003575, T5C, T5v); + T7z = FNMS(KP554608978, T7u, T7t); + T7v = FMA(KP554608978, T7u, T7t); + T5H = FMA(KP951056516, T5G, T5F); + T6p = FNMS(KP951056516, T5G, T5F); + ii[WS(rs, 14)] = FMA(KP943557151, T7A, T7z); + ii[WS(rs, 19)] = FNMS(KP943557151, T7A, T7z); + ii[WS(rs, 24)] = FMA(KP803003575, T7y, T7v); + ii[WS(rs, 9)] = FNMS(KP803003575, T7y, T7v); + { + E T6t, T6u, T6A, T6j, T5O, T6B, T6q, T6r, T5Z, T6h, T6a, T6k, T5V, T62; + { + E T66, T69, T5K, T5N, T5R, T5U; + T6t = FNMS(KP951056516, T5J, T5I); + T5K = FMA(KP951056516, T5J, T5I); + T5N = FMA(KP951056516, T5M, T5L); + T6u = FNMS(KP951056516, T5M, T5L); + T6A = FMA(KP951056516, T65, T64); + T66 = FNMS(KP951056516, T65, T64); + T7P = FNMS(KP951056516, T7C, T7B); + T7D = FMA(KP951056516, T7C, T7B); + T6j = FNMS(KP062914667, T5K, T5N); + T5O = FMA(KP062914667, T5N, T5K); + T69 = FMA(KP951056516, T68, T67); + T6B = FNMS(KP951056516, T68, T67); + T6q = FMA(KP951056516, T5Q, T5P); + T5R = FNMS(KP951056516, T5Q, T5P); + T5U = FNMS(KP951056516, T5T, T5S); + T6r = FMA(KP951056516, T5T, T5S); + T6y = FMA(KP951056516, T5Y, T5X); + T5Z = FNMS(KP951056516, T5Y, T5X); + T6h = FNMS(KP939062505, T66, T69); + T6a = FMA(KP939062505, T69, T66); + T6k = FMA(KP827271945, T5R, T5U); + T5V = FNMS(KP827271945, T5U, T5R); + T62 = FMA(KP951056516, T61, T60); + T6x = FNMS(KP951056516, T61, T60); + } + { + E T7Q, T5W, T6g, T63, T7R, T6b; + T7Q = FMA(KP772036680, T6k, T6j); + T6l = FNMS(KP772036680, T6k, T6j); + T5W = FMA(KP772036680, T5V, T5O); + T7X = FNMS(KP772036680, T5V, T5O); + T6g = FMA(KP126329378, T5Z, T62); + T63 = FNMS(KP126329378, T62, T5Z); + T7R = FNMS(KP734762448, T6h, T6g); + T6i = FMA(KP734762448, T6h, T6g); + T6b = FNMS(KP734762448, T6a, T63); + T7W = FMA(KP734762448, T6a, T63); + T6L = FNMS(KP062914667, T6q, T6r); + T6s = FMA(KP062914667, T6r, T6q); + T7S = FMA(KP994076283, T7R, T7Q); + T7U = FNMS(KP994076283, T7R, T7Q); + T6e = FMA(KP994076283, T6b, T5W); + T6c = FNMS(KP994076283, T6b, T5W); + } + T6J = FNMS(KP549754652, T6A, T6B); + T6C = FMA(KP549754652, T6B, T6A); + T6M = FNMS(KP634619297, T6t, T6u); + T6v = FMA(KP634619297, T6u, T6t); + } + { + E T7E, T6N, T6w, T7L, T6I, T6z; + ri[WS(rs, 3)] = FMA(KP998026728, T6c, T5H); + T7E = FMA(KP845997307, T6M, T6L); + T6N = FNMS(KP845997307, T6M, T6L); + T6w = FMA(KP845997307, T6v, T6s); + T7L = FNMS(KP845997307, T6v, T6s); + T6I = FMA(KP470564281, T6x, T6y); + T6z = FNMS(KP470564281, T6y, T6x); + ii[WS(rs, 3)] = FNMS(KP998026728, T7S, T7P); + { + E T6K, T7K, T7G, T6E, T6G, T80, T7Y, T7Z, T7V, T6F; + { + E T6o, T6m, T6d, T7F, T6D, T6f, T6n, T7T; + T6o = FMA(KP614372930, T6i, T6l); + T6m = FNMS(KP621716863, T6l, T6i); + T7F = FNMS(KP968479752, T6J, T6I); + T6K = FMA(KP968479752, T6J, T6I); + T6D = FMA(KP968479752, T6C, T6z); + T7K = FNMS(KP968479752, T6C, T6z); + T7G = FMA(KP906616052, T7F, T7E); + T7I = FNMS(KP906616052, T7F, T7E); + T6E = FMA(KP906616052, T6D, T6w); + T6G = FNMS(KP906616052, T6D, T6w); + T6d = FNMS(KP249506682, T6c, T5H); + ii[WS(rs, 2)] = FNMS(KP998026728, T7G, T7D); + ri[WS(rs, 2)] = FMA(KP998026728, T6E, T6p); + T80 = FNMS(KP614372930, T7W, T7X); + T7Y = FMA(KP621716863, T7X, T7W); + T6f = FNMS(KP557913902, T6e, T6d); + T6n = FMA(KP557913902, T6e, T6d); + T7T = FMA(KP249506682, T7S, T7P); + ri[WS(rs, 18)] = FNMS(KP949179823, T6o, T6n); + ri[WS(rs, 13)] = FMA(KP949179823, T6o, T6n); + ri[WS(rs, 8)] = FMA(KP943557151, T6m, T6f); + ri[WS(rs, 23)] = FNMS(KP943557151, T6m, T6f); + T7Z = FNMS(KP557913902, T7U, T7T); + T7V = FMA(KP557913902, T7U, T7T); + } + T6Q = FNMS(KP560319534, T6K, T6N); + T6O = FMA(KP681693190, T6N, T6K); + ii[WS(rs, 23)] = FMA(KP943557151, T7Y, T7V); + ii[WS(rs, 8)] = FNMS(KP943557151, T7Y, T7V); + ii[WS(rs, 13)] = FMA(KP949179823, T80, T7Z); + ii[WS(rs, 18)] = FNMS(KP949179823, T80, T7Z); + T6F = FNMS(KP249506682, T6E, T6p); + T7O = FNMS(KP560319534, T7K, T7L); + T7M = FMA(KP681693190, T7L, T7K); + T7H = FMA(KP249506682, T7G, T7D); + T6P = FMA(KP557913902, T6G, T6F); + T6H = FNMS(KP557913902, T6G, T6F); + } + } + } + } + } + } + ri[WS(rs, 12)] = FNMS(KP949179823, T6Q, T6P); + ri[WS(rs, 17)] = FMA(KP949179823, T6Q, T6P); + ri[WS(rs, 7)] = FMA(KP860541664, T6O, T6H); + ri[WS(rs, 22)] = FNMS(KP860541664, T6O, T6H); + T7J = FMA(KP557913902, T7I, T7H); + T7N = FNMS(KP557913902, T7I, T7H); + ii[WS(rs, 12)] = FNMS(KP949179823, T7O, T7N); + ii[WS(rs, 17)] = FMA(KP949179823, T7O, T7N); + ii[WS(rs, 22)] = FNMS(KP860541664, T7M, T7J); + ii[WS(rs, 7)] = FMA(KP860541664, T7M, T7J); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 25}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 25, "t1_25", twinstr, &GENUS, {84, 48, 316, 0}, 0, 0, 0 }; + +void X(codelet_t1_25) (planner *p) { + X(kdft_dit_register) (p, t1_25, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 25 -name t1_25 -include t.h */ + +/* + * This function contains 400 FP additions, 280 FP multiplications, + * (or, 260 additions, 140 multiplications, 140 fused multiply/add), + * 101 stack variables, 20 constants, and 100 memory accesses + */ +#include "t.h" + +static void t1_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP062790519, +0.062790519529313376076178224565631133122484832); + DK(KP425779291, +0.425779291565072648862502445744251703979973042); + DK(KP904827052, +0.904827052466019527713668647932697593970413911); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP125333233, +0.125333233564304245373118759816508793942918247); + DK(KP637423989, +0.637423989748689710176712811676016195434917298); + DK(KP770513242, +0.770513242775789230803009636396177847271667672); + DK(KP684547105, +0.684547105928688673732283357621209269889519233); + DK(KP728968627, +0.728968627421411523146730319055259111372571664); + DK(KP481753674, +0.481753674101715274987191502872129653528542010); + DK(KP876306680, +0.876306680043863587308115903922062583399064238); + DK(KP844327925, +0.844327925502015078548558063966681505381659241); + DK(KP535826794, +0.535826794978996618271308767867639978063575346); + DK(KP248689887, +0.248689887164854788242283746006447968417567406); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + (mb * 48); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 48, MAKE_VOLATILE_STRIDE(50, rs)) { + E T1, T6b, T2l, T6o, To, T2m, T6a, T6p, T6t, T6S, T2u, T4I, T2i, T60, T3O; + E T5D, T4r, T58, T3Z, T5C, T4q, T5b, TS, T5W, T2G, T5s, T4g, T4M, T2R, T5t; + E T4h, T4P, T1l, T5X, T33, T5w, T4j, T4W, T3e, T5v, T4k, T4T, T1P, T5Z, T3r; + E T5z, T4o, T51, T3C, T5A, T4n, T54; + { + E T6, T2o, Tb, T2p, Tc, T68, Th, T2r, Tm, T2s, Tn, T69; + T1 = ri[0]; + T6b = ii[0]; + { + E T3, T5, T2, T4; + T3 = ri[WS(rs, 5)]; + T5 = ii[WS(rs, 5)]; + T2 = W[8]; + T4 = W[9]; + T6 = FMA(T2, T3, T4 * T5); + T2o = FNMS(T4, T3, T2 * T5); + } + { + E T8, Ta, T7, T9; + T8 = ri[WS(rs, 20)]; + Ta = ii[WS(rs, 20)]; + T7 = W[38]; + T9 = W[39]; + Tb = FMA(T7, T8, T9 * Ta); + T2p = FNMS(T9, T8, T7 * Ta); + } + Tc = T6 + Tb; + T68 = T2o + T2p; + { + E Te, Tg, Td, Tf; + Te = ri[WS(rs, 10)]; + Tg = ii[WS(rs, 10)]; + Td = W[18]; + Tf = W[19]; + Th = FMA(Td, Te, Tf * Tg); + T2r = FNMS(Tf, Te, Td * Tg); + } + { + E Tj, Tl, Ti, Tk; + Tj = ri[WS(rs, 15)]; + Tl = ii[WS(rs, 15)]; + Ti = W[28]; + Tk = W[29]; + Tm = FMA(Ti, Tj, Tk * Tl); + T2s = FNMS(Tk, Tj, Ti * Tl); + } + Tn = Th + Tm; + T69 = T2r + T2s; + T2l = KP559016994 * (Tc - Tn); + T6o = KP559016994 * (T68 - T69); + To = Tc + Tn; + T2m = FNMS(KP250000000, To, T1); + T6a = T68 + T69; + T6p = FNMS(KP250000000, T6a, T6b); + { + E T6r, T6s, T2q, T2t; + T6r = T6 - Tb; + T6s = Th - Tm; + T6t = FMA(KP951056516, T6r, KP587785252 * T6s); + T6S = FNMS(KP587785252, T6r, KP951056516 * T6s); + T2q = T2o - T2p; + T2t = T2r - T2s; + T2u = FMA(KP951056516, T2q, KP587785252 * T2t); + T4I = FNMS(KP587785252, T2q, KP951056516 * T2t); + } + } + { + E T1U, T3S, T3J, T3M, T3X, T3W, T3P, T3Q, T3T, T25, T2g, T2h; + { + E T1R, T1T, T1Q, T1S; + T1R = ri[WS(rs, 3)]; + T1T = ii[WS(rs, 3)]; + T1Q = W[4]; + T1S = W[5]; + T1U = FMA(T1Q, T1R, T1S * T1T); + T3S = FNMS(T1S, T1R, T1Q * T1T); + } + { + E T1Z, T3H, T2f, T3L, T24, T3I, T2a, T3K; + { + E T1W, T1Y, T1V, T1X; + T1W = ri[WS(rs, 8)]; + T1Y = ii[WS(rs, 8)]; + T1V = W[14]; + T1X = W[15]; + T1Z = FMA(T1V, T1W, T1X * T1Y); + T3H = FNMS(T1X, T1W, T1V * T1Y); + } + { + E T2c, T2e, T2b, T2d; + T2c = ri[WS(rs, 18)]; + T2e = ii[WS(rs, 18)]; + T2b = W[34]; + T2d = W[35]; + T2f = FMA(T2b, T2c, T2d * T2e); + T3L = FNMS(T2d, T2c, T2b * T2e); + } + { + E T21, T23, T20, T22; + T21 = ri[WS(rs, 23)]; + T23 = ii[WS(rs, 23)]; + T20 = W[44]; + T22 = W[45]; + T24 = FMA(T20, T21, T22 * T23); + T3I = FNMS(T22, T21, T20 * T23); + } + { + E T27, T29, T26, T28; + T27 = ri[WS(rs, 13)]; + T29 = ii[WS(rs, 13)]; + T26 = W[24]; + T28 = W[25]; + T2a = FMA(T26, T27, T28 * T29); + T3K = FNMS(T28, T27, T26 * T29); + } + T3J = T3H - T3I; + T3M = T3K - T3L; + T3X = T2a - T2f; + T3W = T1Z - T24; + T3P = T3H + T3I; + T3Q = T3K + T3L; + T3T = T3P + T3Q; + T25 = T1Z + T24; + T2g = T2a + T2f; + T2h = T25 + T2g; + } + T2i = T1U + T2h; + T60 = T3S + T3T; + { + E T3N, T57, T3G, T56, T3E, T3F; + T3N = FMA(KP951056516, T3J, KP587785252 * T3M); + T57 = FNMS(KP587785252, T3J, KP951056516 * T3M); + T3E = KP559016994 * (T25 - T2g); + T3F = FNMS(KP250000000, T2h, T1U); + T3G = T3E + T3F; + T56 = T3F - T3E; + T3O = T3G + T3N; + T5D = T56 + T57; + T4r = T3G - T3N; + T58 = T56 - T57; + } + { + E T3Y, T59, T3V, T5a, T3R, T3U; + T3Y = FMA(KP951056516, T3W, KP587785252 * T3X); + T59 = FNMS(KP587785252, T3W, KP951056516 * T3X); + T3R = KP559016994 * (T3P - T3Q); + T3U = FNMS(KP250000000, T3T, T3S); + T3V = T3R + T3U; + T5a = T3U - T3R; + T3Z = T3V - T3Y; + T5C = T5a - T59; + T4q = T3Y + T3V; + T5b = T59 + T5a; + } + } + { + E Tu, T2K, T2B, T2E, T2P, T2O, T2H, T2I, T2L, TF, TQ, TR; + { + E Tr, Tt, Tq, Ts; + Tr = ri[WS(rs, 1)]; + Tt = ii[WS(rs, 1)]; + Tq = W[0]; + Ts = W[1]; + Tu = FMA(Tq, Tr, Ts * Tt); + T2K = FNMS(Ts, Tr, Tq * Tt); + } + { + E Tz, T2z, TP, T2D, TE, T2A, TK, T2C; + { + E Tw, Ty, Tv, Tx; + Tw = ri[WS(rs, 6)]; + Ty = ii[WS(rs, 6)]; + Tv = W[10]; + Tx = W[11]; + Tz = FMA(Tv, Tw, Tx * Ty); + T2z = FNMS(Tx, Tw, Tv * Ty); + } + { + E TM, TO, TL, TN; + TM = ri[WS(rs, 16)]; + TO = ii[WS(rs, 16)]; + TL = W[30]; + TN = W[31]; + TP = FMA(TL, TM, TN * TO); + T2D = FNMS(TN, TM, TL * TO); + } + { + E TB, TD, TA, TC; + TB = ri[WS(rs, 21)]; + TD = ii[WS(rs, 21)]; + TA = W[40]; + TC = W[41]; + TE = FMA(TA, TB, TC * TD); + T2A = FNMS(TC, TB, TA * TD); + } + { + E TH, TJ, TG, TI; + TH = ri[WS(rs, 11)]; + TJ = ii[WS(rs, 11)]; + TG = W[20]; + TI = W[21]; + TK = FMA(TG, TH, TI * TJ); + T2C = FNMS(TI, TH, TG * TJ); + } + T2B = T2z - T2A; + T2E = T2C - T2D; + T2P = TK - TP; + T2O = Tz - TE; + T2H = T2z + T2A; + T2I = T2C + T2D; + T2L = T2H + T2I; + TF = Tz + TE; + TQ = TK + TP; + TR = TF + TQ; + } + TS = Tu + TR; + T5W = T2K + T2L; + { + E T2F, T4L, T2y, T4K, T2w, T2x; + T2F = FMA(KP951056516, T2B, KP587785252 * T2E); + T4L = FNMS(KP587785252, T2B, KP951056516 * T2E); + T2w = KP559016994 * (TF - TQ); + T2x = FNMS(KP250000000, TR, Tu); + T2y = T2w + T2x; + T4K = T2x - T2w; + T2G = T2y + T2F; + T5s = T4K + T4L; + T4g = T2y - T2F; + T4M = T4K - T4L; + } + { + E T2Q, T4N, T2N, T4O, T2J, T2M; + T2Q = FMA(KP951056516, T2O, KP587785252 * T2P); + T4N = FNMS(KP587785252, T2O, KP951056516 * T2P); + T2J = KP559016994 * (T2H - T2I); + T2M = FNMS(KP250000000, T2L, T2K); + T2N = T2J + T2M; + T4O = T2M - T2J; + T2R = T2N - T2Q; + T5t = T4O - T4N; + T4h = T2Q + T2N; + T4P = T4N + T4O; + } + } + { + E TX, T37, T2Y, T31, T3c, T3b, T34, T35, T38, T18, T1j, T1k; + { + E TU, TW, TT, TV; + TU = ri[WS(rs, 4)]; + TW = ii[WS(rs, 4)]; + TT = W[6]; + TV = W[7]; + TX = FMA(TT, TU, TV * TW); + T37 = FNMS(TV, TU, TT * TW); + } + { + E T12, T2W, T1i, T30, T17, T2X, T1d, T2Z; + { + E TZ, T11, TY, T10; + TZ = ri[WS(rs, 9)]; + T11 = ii[WS(rs, 9)]; + TY = W[16]; + T10 = W[17]; + T12 = FMA(TY, TZ, T10 * T11); + T2W = FNMS(T10, TZ, TY * T11); + } + { + E T1f, T1h, T1e, T1g; + T1f = ri[WS(rs, 19)]; + T1h = ii[WS(rs, 19)]; + T1e = W[36]; + T1g = W[37]; + T1i = FMA(T1e, T1f, T1g * T1h); + T30 = FNMS(T1g, T1f, T1e * T1h); + } + { + E T14, T16, T13, T15; + T14 = ri[WS(rs, 24)]; + T16 = ii[WS(rs, 24)]; + T13 = W[46]; + T15 = W[47]; + T17 = FMA(T13, T14, T15 * T16); + T2X = FNMS(T15, T14, T13 * T16); + } + { + E T1a, T1c, T19, T1b; + T1a = ri[WS(rs, 14)]; + T1c = ii[WS(rs, 14)]; + T19 = W[26]; + T1b = W[27]; + T1d = FMA(T19, T1a, T1b * T1c); + T2Z = FNMS(T1b, T1a, T19 * T1c); + } + T2Y = T2W - T2X; + T31 = T2Z - T30; + T3c = T1d - T1i; + T3b = T12 - T17; + T34 = T2W + T2X; + T35 = T2Z + T30; + T38 = T34 + T35; + T18 = T12 + T17; + T1j = T1d + T1i; + T1k = T18 + T1j; + } + T1l = TX + T1k; + T5X = T37 + T38; + { + E T32, T4V, T2V, T4U, T2T, T2U; + T32 = FMA(KP951056516, T2Y, KP587785252 * T31); + T4V = FNMS(KP587785252, T2Y, KP951056516 * T31); + T2T = KP559016994 * (T18 - T1j); + T2U = FNMS(KP250000000, T1k, TX); + T2V = T2T + T2U; + T4U = T2U - T2T; + T33 = T2V + T32; + T5w = T4U + T4V; + T4j = T2V - T32; + T4W = T4U - T4V; + } + { + E T3d, T4R, T3a, T4S, T36, T39; + T3d = FMA(KP951056516, T3b, KP587785252 * T3c); + T4R = FNMS(KP587785252, T3b, KP951056516 * T3c); + T36 = KP559016994 * (T34 - T35); + T39 = FNMS(KP250000000, T38, T37); + T3a = T36 + T39; + T4S = T39 - T36; + T3e = T3a - T3d; + T5v = T4S - T4R; + T4k = T3d + T3a; + T4T = T4R + T4S; + } + } + { + E T1r, T3v, T3m, T3p, T3A, T3z, T3s, T3t, T3w, T1C, T1N, T1O; + { + E T1o, T1q, T1n, T1p; + T1o = ri[WS(rs, 2)]; + T1q = ii[WS(rs, 2)]; + T1n = W[2]; + T1p = W[3]; + T1r = FMA(T1n, T1o, T1p * T1q); + T3v = FNMS(T1p, T1o, T1n * T1q); + } + { + E T1w, T3k, T1M, T3o, T1B, T3l, T1H, T3n; + { + E T1t, T1v, T1s, T1u; + T1t = ri[WS(rs, 7)]; + T1v = ii[WS(rs, 7)]; + T1s = W[12]; + T1u = W[13]; + T1w = FMA(T1s, T1t, T1u * T1v); + T3k = FNMS(T1u, T1t, T1s * T1v); + } + { + E T1J, T1L, T1I, T1K; + T1J = ri[WS(rs, 17)]; + T1L = ii[WS(rs, 17)]; + T1I = W[32]; + T1K = W[33]; + T1M = FMA(T1I, T1J, T1K * T1L); + T3o = FNMS(T1K, T1J, T1I * T1L); + } + { + E T1y, T1A, T1x, T1z; + T1y = ri[WS(rs, 22)]; + T1A = ii[WS(rs, 22)]; + T1x = W[42]; + T1z = W[43]; + T1B = FMA(T1x, T1y, T1z * T1A); + T3l = FNMS(T1z, T1y, T1x * T1A); + } + { + E T1E, T1G, T1D, T1F; + T1E = ri[WS(rs, 12)]; + T1G = ii[WS(rs, 12)]; + T1D = W[22]; + T1F = W[23]; + T1H = FMA(T1D, T1E, T1F * T1G); + T3n = FNMS(T1F, T1E, T1D * T1G); + } + T3m = T3k - T3l; + T3p = T3n - T3o; + T3A = T1H - T1M; + T3z = T1w - T1B; + T3s = T3k + T3l; + T3t = T3n + T3o; + T3w = T3s + T3t; + T1C = T1w + T1B; + T1N = T1H + T1M; + T1O = T1C + T1N; + } + T1P = T1r + T1O; + T5Z = T3v + T3w; + { + E T3q, T50, T3j, T4Z, T3h, T3i; + T3q = FMA(KP951056516, T3m, KP587785252 * T3p); + T50 = FNMS(KP587785252, T3m, KP951056516 * T3p); + T3h = KP559016994 * (T1C - T1N); + T3i = FNMS(KP250000000, T1O, T1r); + T3j = T3h + T3i; + T4Z = T3i - T3h; + T3r = T3j + T3q; + T5z = T4Z + T50; + T4o = T3j - T3q; + T51 = T4Z - T50; + } + { + E T3B, T52, T3y, T53, T3u, T3x; + T3B = FMA(KP951056516, T3z, KP587785252 * T3A); + T52 = FNMS(KP587785252, T3z, KP951056516 * T3A); + T3u = KP559016994 * (T3s - T3t); + T3x = FNMS(KP250000000, T3w, T3v); + T3y = T3u + T3x; + T53 = T3x - T3u; + T3C = T3y - T3B; + T5A = T53 - T52; + T4n = T3B + T3y; + T54 = T52 + T53; + } + } + { + E T62, T64, Tp, T2k, T5T, T5U, T63, T5V; + { + E T5Y, T61, T1m, T2j; + T5Y = T5W - T5X; + T61 = T5Z - T60; + T62 = FMA(KP951056516, T5Y, KP587785252 * T61); + T64 = FNMS(KP587785252, T5Y, KP951056516 * T61); + Tp = T1 + To; + T1m = TS + T1l; + T2j = T1P + T2i; + T2k = T1m + T2j; + T5T = KP559016994 * (T1m - T2j); + T5U = FNMS(KP250000000, T2k, Tp); + } + ri[0] = Tp + T2k; + T63 = T5U - T5T; + ri[WS(rs, 10)] = T63 - T64; + ri[WS(rs, 15)] = T63 + T64; + T5V = T5T + T5U; + ri[WS(rs, 20)] = T5V - T62; + ri[WS(rs, 5)] = T5V + T62; + } + { + E T6i, T6j, T6c, T67, T6d, T6e, T6k, T6f; + { + E T6g, T6h, T65, T66; + T6g = TS - T1l; + T6h = T1P - T2i; + T6i = FMA(KP951056516, T6g, KP587785252 * T6h); + T6j = FNMS(KP587785252, T6g, KP951056516 * T6h); + T6c = T6a + T6b; + T65 = T5W + T5X; + T66 = T5Z + T60; + T67 = T65 + T66; + T6d = KP559016994 * (T65 - T66); + T6e = FNMS(KP250000000, T67, T6c); + } + ii[0] = T67 + T6c; + T6k = T6e - T6d; + ii[WS(rs, 10)] = T6j + T6k; + ii[WS(rs, 15)] = T6k - T6j; + T6f = T6d + T6e; + ii[WS(rs, 5)] = T6f - T6i; + ii[WS(rs, 20)] = T6i + T6f; + } + { + E T2v, T4f, T6u, T6G, T42, T6z, T43, T6y, T4A, T6H, T4D, T6F, T4u, T6L, T4v; + E T6K, T48, T6v, T4b, T6n, T2n, T6q; + T2n = T2l + T2m; + T2v = T2n + T2u; + T4f = T2n - T2u; + T6q = T6o + T6p; + T6u = T6q - T6t; + T6G = T6t + T6q; + { + E T2S, T3f, T3g, T3D, T40, T41; + T2S = FMA(KP968583161, T2G, KP248689887 * T2R); + T3f = FMA(KP535826794, T33, KP844327925 * T3e); + T3g = T2S + T3f; + T3D = FMA(KP876306680, T3r, KP481753674 * T3C); + T40 = FMA(KP728968627, T3O, KP684547105 * T3Z); + T41 = T3D + T40; + T42 = T3g + T41; + T6z = T3D - T40; + T43 = KP559016994 * (T3g - T41); + T6y = T2S - T3f; + } + { + E T4y, T4z, T6D, T4B, T4C, T6E; + T4y = FNMS(KP844327925, T4g, KP535826794 * T4h); + T4z = FNMS(KP637423989, T4k, KP770513242 * T4j); + T6D = T4y + T4z; + T4B = FMA(KP125333233, T4r, KP992114701 * T4q); + T4C = FMA(KP904827052, T4o, KP425779291 * T4n); + T6E = T4C + T4B; + T4A = T4y - T4z; + T6H = KP559016994 * (T6D + T6E); + T4D = T4B - T4C; + T6F = T6D - T6E; + } + { + E T4i, T4l, T4m, T4p, T4s, T4t; + T4i = FMA(KP535826794, T4g, KP844327925 * T4h); + T4l = FMA(KP637423989, T4j, KP770513242 * T4k); + T4m = T4i - T4l; + T4p = FNMS(KP425779291, T4o, KP904827052 * T4n); + T4s = FNMS(KP992114701, T4r, KP125333233 * T4q); + T4t = T4p + T4s; + T4u = T4m + T4t; + T6L = T4p - T4s; + T4v = KP559016994 * (T4m - T4t); + T6K = T4i + T4l; + } + { + E T46, T47, T6l, T49, T4a, T6m; + T46 = FNMS(KP248689887, T2G, KP968583161 * T2R); + T47 = FNMS(KP844327925, T33, KP535826794 * T3e); + T6l = T46 + T47; + T49 = FNMS(KP481753674, T3r, KP876306680 * T3C); + T4a = FNMS(KP684547105, T3O, KP728968627 * T3Z); + T6m = T49 + T4a; + T48 = T46 - T47; + T6v = KP559016994 * (T6l - T6m); + T4b = T49 - T4a; + T6n = T6l + T6m; + } + ri[WS(rs, 1)] = T2v + T42; + ii[WS(rs, 1)] = T6n + T6u; + ri[WS(rs, 4)] = T4f + T4u; + ii[WS(rs, 4)] = T6F + T6G; + { + E T4c, T4e, T45, T4d, T44; + T4c = FMA(KP951056516, T48, KP587785252 * T4b); + T4e = FNMS(KP587785252, T48, KP951056516 * T4b); + T44 = FNMS(KP250000000, T42, T2v); + T45 = T43 + T44; + T4d = T44 - T43; + ri[WS(rs, 21)] = T45 - T4c; + ri[WS(rs, 16)] = T4d + T4e; + ri[WS(rs, 6)] = T45 + T4c; + ri[WS(rs, 11)] = T4d - T4e; + } + { + E T6A, T6B, T6x, T6C, T6w; + T6A = FMA(KP951056516, T6y, KP587785252 * T6z); + T6B = FNMS(KP587785252, T6y, KP951056516 * T6z); + T6w = FNMS(KP250000000, T6n, T6u); + T6x = T6v + T6w; + T6C = T6w - T6v; + ii[WS(rs, 6)] = T6x - T6A; + ii[WS(rs, 16)] = T6C - T6B; + ii[WS(rs, 21)] = T6A + T6x; + ii[WS(rs, 11)] = T6B + T6C; + } + { + E T4E, T4G, T4x, T4F, T4w; + T4E = FMA(KP951056516, T4A, KP587785252 * T4D); + T4G = FNMS(KP587785252, T4A, KP951056516 * T4D); + T4w = FNMS(KP250000000, T4u, T4f); + T4x = T4v + T4w; + T4F = T4w - T4v; + ri[WS(rs, 24)] = T4x - T4E; + ri[WS(rs, 19)] = T4F + T4G; + ri[WS(rs, 9)] = T4x + T4E; + ri[WS(rs, 14)] = T4F - T4G; + } + { + E T6M, T6N, T6J, T6O, T6I; + T6M = FMA(KP951056516, T6K, KP587785252 * T6L); + T6N = FNMS(KP587785252, T6K, KP951056516 * T6L); + T6I = FNMS(KP250000000, T6F, T6G); + T6J = T6H + T6I; + T6O = T6I - T6H; + ii[WS(rs, 9)] = T6J - T6M; + ii[WS(rs, 19)] = T6O - T6N; + ii[WS(rs, 24)] = T6M + T6J; + ii[WS(rs, 14)] = T6N + T6O; + } + } + { + E T4J, T5r, T6U, T76, T5e, T6Z, T5f, T6Y, T5M, T77, T5P, T75, T5G, T7b, T5H; + E T7a, T5k, T6V, T5n, T6R, T4H, T6T; + T4H = T2m - T2l; + T4J = T4H - T4I; + T5r = T4H + T4I; + T6T = T6p - T6o; + T6U = T6S + T6T; + T76 = T6T - T6S; + { + E T4Q, T4X, T4Y, T55, T5c, T5d; + T4Q = FMA(KP876306680, T4M, KP481753674 * T4P); + T4X = FNMS(KP425779291, T4W, KP904827052 * T4T); + T4Y = T4Q + T4X; + T55 = FMA(KP535826794, T51, KP844327925 * T54); + T5c = FMA(KP062790519, T58, KP998026728 * T5b); + T5d = T55 + T5c; + T5e = T4Y + T5d; + T6Z = T55 - T5c; + T5f = KP559016994 * (T4Y - T5d); + T6Y = T4Q - T4X; + } + { + E T5K, T5L, T73, T5N, T5O, T74; + T5K = FNMS(KP684547105, T5s, KP728968627 * T5t); + T5L = FMA(KP125333233, T5w, KP992114701 * T5v); + T73 = T5K - T5L; + T5N = FNMS(KP998026728, T5z, KP062790519 * T5A); + T5O = FMA(KP770513242, T5D, KP637423989 * T5C); + T74 = T5N - T5O; + T5M = T5K + T5L; + T77 = KP559016994 * (T73 - T74); + T5P = T5N + T5O; + T75 = T73 + T74; + } + { + E T5u, T5x, T5y, T5B, T5E, T5F; + T5u = FMA(KP728968627, T5s, KP684547105 * T5t); + T5x = FNMS(KP992114701, T5w, KP125333233 * T5v); + T5y = T5u + T5x; + T5B = FMA(KP062790519, T5z, KP998026728 * T5A); + T5E = FNMS(KP637423989, T5D, KP770513242 * T5C); + T5F = T5B + T5E; + T5G = T5y + T5F; + T7b = T5B - T5E; + T5H = KP559016994 * (T5y - T5F); + T7a = T5u - T5x; + } + { + E T5i, T5j, T6P, T5l, T5m, T6Q; + T5i = FNMS(KP481753674, T4M, KP876306680 * T4P); + T5j = FMA(KP904827052, T4W, KP425779291 * T4T); + T6P = T5i - T5j; + T5l = FNMS(KP844327925, T51, KP535826794 * T54); + T5m = FNMS(KP998026728, T58, KP062790519 * T5b); + T6Q = T5l + T5m; + T5k = T5i + T5j; + T6V = KP559016994 * (T6P - T6Q); + T5n = T5l - T5m; + T6R = T6P + T6Q; + } + ri[WS(rs, 2)] = T4J + T5e; + ii[WS(rs, 2)] = T6R + T6U; + ri[WS(rs, 3)] = T5r + T5G; + ii[WS(rs, 3)] = T75 + T76; + { + E T5o, T5q, T5h, T5p, T5g; + T5o = FMA(KP951056516, T5k, KP587785252 * T5n); + T5q = FNMS(KP587785252, T5k, KP951056516 * T5n); + T5g = FNMS(KP250000000, T5e, T4J); + T5h = T5f + T5g; + T5p = T5g - T5f; + ri[WS(rs, 22)] = T5h - T5o; + ri[WS(rs, 17)] = T5p + T5q; + ri[WS(rs, 7)] = T5h + T5o; + ri[WS(rs, 12)] = T5p - T5q; + } + { + E T70, T71, T6X, T72, T6W; + T70 = FMA(KP951056516, T6Y, KP587785252 * T6Z); + T71 = FNMS(KP587785252, T6Y, KP951056516 * T6Z); + T6W = FNMS(KP250000000, T6R, T6U); + T6X = T6V + T6W; + T72 = T6W - T6V; + ii[WS(rs, 7)] = T6X - T70; + ii[WS(rs, 17)] = T72 - T71; + ii[WS(rs, 22)] = T70 + T6X; + ii[WS(rs, 12)] = T71 + T72; + } + { + E T5Q, T5S, T5J, T5R, T5I; + T5Q = FMA(KP951056516, T5M, KP587785252 * T5P); + T5S = FNMS(KP587785252, T5M, KP951056516 * T5P); + T5I = FNMS(KP250000000, T5G, T5r); + T5J = T5H + T5I; + T5R = T5I - T5H; + ri[WS(rs, 23)] = T5J - T5Q; + ri[WS(rs, 18)] = T5R + T5S; + ri[WS(rs, 8)] = T5J + T5Q; + ri[WS(rs, 13)] = T5R - T5S; + } + { + E T7c, T7d, T79, T7e, T78; + T7c = FMA(KP951056516, T7a, KP587785252 * T7b); + T7d = FNMS(KP587785252, T7a, KP951056516 * T7b); + T78 = FNMS(KP250000000, T75, T76); + T79 = T77 + T78; + T7e = T78 - T77; + ii[WS(rs, 8)] = T79 - T7c; + ii[WS(rs, 18)] = T7e - T7d; + ii[WS(rs, 23)] = T7c + T79; + ii[WS(rs, 13)] = T7d + T7e; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 25}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 25, "t1_25", twinstr, &GENUS, {260, 140, 140, 0}, 0, 0, 0 }; + +void X(codelet_t1_25) (planner *p) { + X(kdft_dit_register) (p, t1_25, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:51 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 3 -name t1_3 -include t.h */ + +/* + * This function contains 16 FP additions, 14 FP multiplications, + * (or, 6 additions, 4 multiplications, 10 fused multiply/add), + * 21 stack variables, 2 constants, and 12 memory accesses + */ +#include "t.h" + +static void t1_3(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + (mb * 4); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 4, MAKE_VOLATILE_STRIDE(6, rs)) { + E T1, Tm, T9, Tc, Tb, Th, T7, Ti, Ta, Tj, Td; + T1 = ri[0]; + Tm = ii[0]; + { + E T3, T6, T2, T5, Tg, T4, T8; + T3 = ri[WS(rs, 1)]; + T6 = ii[WS(rs, 1)]; + T2 = W[0]; + T5 = W[1]; + T9 = ri[WS(rs, 2)]; + Tc = ii[WS(rs, 2)]; + Tg = T2 * T6; + T4 = T2 * T3; + T8 = W[2]; + Tb = W[3]; + Th = FNMS(T5, T3, Tg); + T7 = FMA(T5, T6, T4); + Ti = T8 * Tc; + Ta = T8 * T9; + } + Tj = FNMS(Tb, T9, Ti); + Td = FMA(Tb, Tc, Ta); + { + E Tk, Te, To, Tn, Tl, Tf; + Tk = Th - Tj; + Tl = Th + Tj; + Te = T7 + Td; + To = Td - T7; + ii[0] = Tl + Tm; + Tn = FNMS(KP500000000, Tl, Tm); + ri[0] = T1 + Te; + Tf = FNMS(KP500000000, Te, T1); + ii[WS(rs, 1)] = FMA(KP866025403, To, Tn); + ii[WS(rs, 2)] = FNMS(KP866025403, To, Tn); + ri[WS(rs, 2)] = FNMS(KP866025403, Tk, Tf); + ri[WS(rs, 1)] = FMA(KP866025403, Tk, Tf); + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 3}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 3, "t1_3", twinstr, &GENUS, {6, 4, 10, 0}, 0, 0, 0 }; + +void X(codelet_t1_3) (planner *p) { + X(kdft_dit_register) (p, t1_3, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 3 -name t1_3 -include t.h */ + +/* + * This function contains 16 FP additions, 12 FP multiplications, + * (or, 10 additions, 6 multiplications, 6 fused multiply/add), + * 15 stack variables, 2 constants, and 12 memory accesses + */ +#include "t.h" + +static void t1_3(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + (mb * 4); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 4, MAKE_VOLATILE_STRIDE(6, rs)) { + E T1, Ti, T6, Te, Tb, Tf, Tc, Th; + T1 = ri[0]; + Ti = ii[0]; + { + E T3, T5, T2, T4; + T3 = ri[WS(rs, 1)]; + T5 = ii[WS(rs, 1)]; + T2 = W[0]; + T4 = W[1]; + T6 = FMA(T2, T3, T4 * T5); + Te = FNMS(T4, T3, T2 * T5); + } + { + E T8, Ta, T7, T9; + T8 = ri[WS(rs, 2)]; + Ta = ii[WS(rs, 2)]; + T7 = W[2]; + T9 = W[3]; + Tb = FMA(T7, T8, T9 * Ta); + Tf = FNMS(T9, T8, T7 * Ta); + } + Tc = T6 + Tb; + Th = Te + Tf; + ri[0] = T1 + Tc; + ii[0] = Th + Ti; + { + E Td, Tg, Tj, Tk; + Td = FNMS(KP500000000, Tc, T1); + Tg = KP866025403 * (Te - Tf); + ri[WS(rs, 2)] = Td - Tg; + ri[WS(rs, 1)] = Td + Tg; + Tj = KP866025403 * (Tb - T6); + Tk = FNMS(KP500000000, Th, Ti); + ii[WS(rs, 1)] = Tj + Tk; + ii[WS(rs, 2)] = Tk - Tj; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 3}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 3, "t1_3", twinstr, &GENUS, {10, 6, 6, 0}, 0, 0, 0 }; + +void X(codelet_t1_3) (planner *p) { + X(kdft_dit_register) (p, t1_3, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1771 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:52 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -name t1_32 -include t.h */ + +/* + * This function contains 434 FP additions, 260 FP multiplications, + * (or, 236 additions, 62 multiplications, 198 fused multiply/add), + * 135 stack variables, 7 constants, and 128 memory accesses + */ +#include "t.h" + +static void t1_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 62); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 62, MAKE_VOLATILE_STRIDE(64, rs)) { + E T90, T8Z; + { + E T8x, T87, T8, T3w, T83, T3B, T8y, Tl, T6F, Tz, T3J, T5T, T6G, TM, T3Q; + E T5U, T46, T5Y, T7D, T6L, T5X, T3Z, T6M, T1f, T7E, T6R, T60, T4e, T6O, T1G; + E T61, T4l, T78, T7N, T54, T6f, T32, T7b, T6c, T5r, T6X, T7I, T4v, T68, T29; + E T70, T65, T4S, T5s, T5b, T7O, T7e, T79, T3t, T5t, T5i, T4H, T2y, T4A, T71; + E T2m, T4B, T4F, T2s; + { + E T44, T1d, T3X, T6J, T11, T40, T42, T17, T5h, T5c; + { + E Ta, Td, Tg, T3x, Tb, Tj, Tf, Tc, Ti; + { + E T1, T86, T3, T6, T2, T5; + T1 = ri[0]; + T86 = ii[0]; + T3 = ri[WS(rs, 16)]; + T6 = ii[WS(rs, 16)]; + T2 = W[30]; + T5 = W[31]; + { + E T84, T4, T9, T85, T7; + Ta = ri[WS(rs, 8)]; + Td = ii[WS(rs, 8)]; + T84 = T2 * T6; + T4 = T2 * T3; + T9 = W[14]; + Tg = ri[WS(rs, 24)]; + T85 = FNMS(T5, T3, T84); + T7 = FMA(T5, T6, T4); + T3x = T9 * Td; + Tb = T9 * Ta; + T8x = T86 - T85; + T87 = T85 + T86; + T8 = T1 + T7; + T3w = T1 - T7; + Tj = ii[WS(rs, 24)]; + Tf = W[46]; + } + Tc = W[15]; + Ti = W[47]; + } + { + E Tu, Tx, T3F, Ts, Tw, T3G, Tv; + { + E To, Tr, Tp, T3E, Tq, Tt; + { + E T3y, Te, T3A, Tk, T3z, Th, Tn; + To = ri[WS(rs, 4)]; + T3z = Tf * Tj; + Th = Tf * Tg; + T3y = FNMS(Tc, Ta, T3x); + Te = FMA(Tc, Td, Tb); + T3A = FNMS(Ti, Tg, T3z); + Tk = FMA(Ti, Tj, Th); + Tr = ii[WS(rs, 4)]; + Tn = W[6]; + T83 = T3y + T3A; + T3B = T3y - T3A; + T8y = Te - Tk; + Tl = Te + Tk; + Tp = Tn * To; + T3E = Tn * Tr; + } + Tq = W[7]; + Tu = ri[WS(rs, 20)]; + Tx = ii[WS(rs, 20)]; + Tt = W[38]; + T3F = FNMS(Tq, To, T3E); + Ts = FMA(Tq, Tr, Tp); + Tw = W[39]; + T3G = Tt * Tx; + Tv = Tt * Tu; + } + { + E T3M, TF, TH, TK, TG, TJ, TE, TD, TC; + { + E TB, T3H, Ty, TA, T3I, T3D, T3L; + TB = ri[WS(rs, 28)]; + TE = ii[WS(rs, 28)]; + T3H = FNMS(Tw, Tu, T3G); + Ty = FMA(Tw, Tx, Tv); + TA = W[54]; + TD = W[55]; + T6F = T3F + T3H; + T3I = T3F - T3H; + Tz = Ts + Ty; + T3D = Ts - Ty; + T3L = TA * TE; + TC = TA * TB; + T3J = T3D + T3I; + T5T = T3I - T3D; + T3M = FNMS(TD, TB, T3L); + } + TF = FMA(TD, TE, TC); + TH = ri[WS(rs, 12)]; + TK = ii[WS(rs, 12)]; + TG = W[22]; + TJ = W[23]; + { + E TU, T3U, T13, T16, T3W, T10, T12, T15, T41, T14; + { + E T19, T1c, T18, T1b, T3P, T3K; + { + E TQ, TT, T3N, TI, TP, TS; + TQ = ri[WS(rs, 2)]; + TT = ii[WS(rs, 2)]; + T3N = TG * TK; + TI = TG * TH; + TP = W[2]; + TS = W[3]; + { + E T3O, TL, T3T, TR; + T3O = FNMS(TJ, TH, T3N); + TL = FMA(TJ, TK, TI); + T3T = TP * TT; + TR = TP * TQ; + T6G = T3M + T3O; + T3P = T3M - T3O; + TM = TF + TL; + T3K = TF - TL; + TU = FMA(TS, TT, TR); + T3U = FNMS(TS, TQ, T3T); + } + } + T3Q = T3K - T3P; + T5U = T3K + T3P; + T19 = ri[WS(rs, 26)]; + T1c = ii[WS(rs, 26)]; + T18 = W[50]; + T1b = W[51]; + { + E TW, TZ, TY, T3V, TX, T43, T1a, TV; + TW = ri[WS(rs, 18)]; + TZ = ii[WS(rs, 18)]; + T43 = T18 * T1c; + T1a = T18 * T19; + TV = W[34]; + TY = W[35]; + T44 = FNMS(T1b, T19, T43); + T1d = FMA(T1b, T1c, T1a); + T3V = TV * TZ; + TX = TV * TW; + T13 = ri[WS(rs, 10)]; + T16 = ii[WS(rs, 10)]; + T3W = FNMS(TY, TW, T3V); + T10 = FMA(TY, TZ, TX); + T12 = W[18]; + T15 = W[19]; + } + } + T3X = T3U - T3W; + T6J = T3U + T3W; + T11 = TU + T10; + T40 = TU - T10; + T41 = T12 * T16; + T14 = T12 * T13; + T42 = FNMS(T15, T13, T41); + T17 = FMA(T15, T16, T14); + } + } + } + } + { + E T49, T1l, T4j, T1E, T1u, T1x, T1w, T4b, T1r, T4g, T1v; + { + E T1A, T1D, T1C, T4i, T1B; + { + E T1h, T1k, T1g, T1j, T48, T1i, T1z; + T1h = ri[WS(rs, 30)]; + T1k = ii[WS(rs, 30)]; + { + E T6K, T45, T1e, T3Y; + T6K = T42 + T44; + T45 = T42 - T44; + T1e = T17 + T1d; + T3Y = T17 - T1d; + T46 = T40 + T45; + T5Y = T40 - T45; + T7D = T6J + T6K; + T6L = T6J - T6K; + T5X = T3X + T3Y; + T3Z = T3X - T3Y; + T6M = T11 - T1e; + T1f = T11 + T1e; + T1g = W[58]; + } + T1j = W[59]; + T1A = ri[WS(rs, 22)]; + T1D = ii[WS(rs, 22)]; + T48 = T1g * T1k; + T1i = T1g * T1h; + T1z = W[42]; + T1C = W[43]; + T49 = FNMS(T1j, T1h, T48); + T1l = FMA(T1j, T1k, T1i); + T4i = T1z * T1D; + T1B = T1z * T1A; + } + { + E T1n, T1q, T1m, T1p, T4a, T1o, T1t; + T1n = ri[WS(rs, 14)]; + T1q = ii[WS(rs, 14)]; + T4j = FNMS(T1C, T1A, T4i); + T1E = FMA(T1C, T1D, T1B); + T1m = W[26]; + T1p = W[27]; + T1u = ri[WS(rs, 6)]; + T1x = ii[WS(rs, 6)]; + T4a = T1m * T1q; + T1o = T1m * T1n; + T1t = W[10]; + T1w = W[11]; + T4b = FNMS(T1p, T1n, T4a); + T1r = FMA(T1p, T1q, T1o); + T4g = T1t * T1x; + T1v = T1t * T1u; + } + } + { + E T4c, T6P, T1s, T4f, T4h, T1y; + T4c = T49 - T4b; + T6P = T49 + T4b; + T1s = T1l + T1r; + T4f = T1l - T1r; + T4h = FNMS(T1w, T1u, T4g); + T1y = FMA(T1w, T1x, T1v); + { + E T4k, T6Q, T4d, T1F; + T4k = T4h - T4j; + T6Q = T4h + T4j; + T4d = T1y - T1E; + T1F = T1y + T1E; + T7E = T6P + T6Q; + T6R = T6P - T6Q; + T60 = T4c + T4d; + T4e = T4c - T4d; + T6O = T1s - T1F; + T1G = T1s + T1F; + T61 = T4f - T4k; + T4l = T4f + T4k; + } + } + } + { + E T4Z, T2H, T5p, T30, T2Q, T2T, T2S, T51, T2N, T5m, T2R; + { + E T2W, T2Z, T2Y, T5o, T2X; + { + E T2D, T2G, T2C, T2F, T4Y, T2E, T2V; + T2D = ri[WS(rs, 31)]; + T2G = ii[WS(rs, 31)]; + T2C = W[60]; + T2F = W[61]; + T2W = ri[WS(rs, 23)]; + T2Z = ii[WS(rs, 23)]; + T4Y = T2C * T2G; + T2E = T2C * T2D; + T2V = W[44]; + T2Y = W[45]; + T4Z = FNMS(T2F, T2D, T4Y); + T2H = FMA(T2F, T2G, T2E); + T5o = T2V * T2Z; + T2X = T2V * T2W; + } + { + E T2J, T2M, T2I, T2L, T50, T2K, T2P; + T2J = ri[WS(rs, 15)]; + T2M = ii[WS(rs, 15)]; + T5p = FNMS(T2Y, T2W, T5o); + T30 = FMA(T2Y, T2Z, T2X); + T2I = W[28]; + T2L = W[29]; + T2Q = ri[WS(rs, 7)]; + T2T = ii[WS(rs, 7)]; + T50 = T2I * T2M; + T2K = T2I * T2J; + T2P = W[12]; + T2S = W[13]; + T51 = FNMS(T2L, T2J, T50); + T2N = FMA(T2L, T2M, T2K); + T5m = T2P * T2T; + T2R = T2P * T2Q; + } + } + { + E T52, T76, T2O, T5l, T5n, T2U; + T52 = T4Z - T51; + T76 = T4Z + T51; + T2O = T2H + T2N; + T5l = T2H - T2N; + T5n = FNMS(T2S, T2Q, T5m); + T2U = FMA(T2S, T2T, T2R); + { + E T5q, T77, T53, T31; + T5q = T5n - T5p; + T77 = T5n + T5p; + T53 = T2U - T30; + T31 = T2U + T30; + T78 = T76 - T77; + T7N = T76 + T77; + T54 = T52 - T53; + T6f = T52 + T53; + T32 = T2O + T31; + T7b = T2O - T31; + T6c = T5l - T5q; + T5r = T5l + T5q; + } + } + } + { + E T4q, T1O, T4Q, T27, T1X, T20, T1Z, T4s, T1U, T4N, T1Y; + { + E T23, T26, T25, T4P, T24; + { + E T1K, T1N, T1J, T1M, T4p, T1L, T22; + T1K = ri[WS(rs, 1)]; + T1N = ii[WS(rs, 1)]; + T1J = W[0]; + T1M = W[1]; + T23 = ri[WS(rs, 25)]; + T26 = ii[WS(rs, 25)]; + T4p = T1J * T1N; + T1L = T1J * T1K; + T22 = W[48]; + T25 = W[49]; + T4q = FNMS(T1M, T1K, T4p); + T1O = FMA(T1M, T1N, T1L); + T4P = T22 * T26; + T24 = T22 * T23; + } + { + E T1Q, T1T, T1P, T1S, T4r, T1R, T1W; + T1Q = ri[WS(rs, 17)]; + T1T = ii[WS(rs, 17)]; + T4Q = FNMS(T25, T23, T4P); + T27 = FMA(T25, T26, T24); + T1P = W[32]; + T1S = W[33]; + T1X = ri[WS(rs, 9)]; + T20 = ii[WS(rs, 9)]; + T4r = T1P * T1T; + T1R = T1P * T1Q; + T1W = W[16]; + T1Z = W[17]; + T4s = FNMS(T1S, T1Q, T4r); + T1U = FMA(T1S, T1T, T1R); + T4N = T1W * T20; + T1Y = T1W * T1X; + } + } + { + E T4t, T6V, T1V, T4M, T4O, T21; + T4t = T4q - T4s; + T6V = T4q + T4s; + T1V = T1O + T1U; + T4M = T1O - T1U; + T4O = FNMS(T1Z, T1X, T4N); + T21 = FMA(T1Z, T20, T1Y); + { + E T4R, T6W, T4u, T28; + T4R = T4O - T4Q; + T6W = T4O + T4Q; + T4u = T21 - T27; + T28 = T21 + T27; + T6X = T6V - T6W; + T7I = T6V + T6W; + T4v = T4t - T4u; + T68 = T4t + T4u; + T29 = T1V + T28; + T70 = T1V - T28; + T65 = T4M - T4R; + T4S = T4M + T4R; + } + } + } + { + E T56, T38, T5g, T3r, T3h, T3k, T3j, T58, T3e, T5d, T3i; + { + E T3n, T3q, T3p, T5f, T3o; + { + E T34, T37, T33, T36, T55, T35, T3m; + T34 = ri[WS(rs, 3)]; + T37 = ii[WS(rs, 3)]; + T33 = W[4]; + T36 = W[5]; + T3n = ri[WS(rs, 11)]; + T3q = ii[WS(rs, 11)]; + T55 = T33 * T37; + T35 = T33 * T34; + T3m = W[20]; + T3p = W[21]; + T56 = FNMS(T36, T34, T55); + T38 = FMA(T36, T37, T35); + T5f = T3m * T3q; + T3o = T3m * T3n; + } + { + E T3a, T3d, T39, T3c, T57, T3b, T3g; + T3a = ri[WS(rs, 19)]; + T3d = ii[WS(rs, 19)]; + T5g = FNMS(T3p, T3n, T5f); + T3r = FMA(T3p, T3q, T3o); + T39 = W[36]; + T3c = W[37]; + T3h = ri[WS(rs, 27)]; + T3k = ii[WS(rs, 27)]; + T57 = T39 * T3d; + T3b = T39 * T3a; + T3g = W[52]; + T3j = W[53]; + T58 = FNMS(T3c, T3a, T57); + T3e = FMA(T3c, T3d, T3b); + T5d = T3g * T3k; + T3i = T3g * T3h; + } + } + { + E T59, T7c, T3f, T5a, T5e, T3l, T7d, T3s; + T59 = T56 - T58; + T7c = T56 + T58; + T3f = T38 + T3e; + T5a = T38 - T3e; + T5e = FNMS(T3j, T3h, T5d); + T3l = FMA(T3j, T3k, T3i); + T5h = T5e - T5g; + T7d = T5e + T5g; + T3s = T3l + T3r; + T5c = T3l - T3r; + T5s = T5a + T59; + T5b = T59 - T5a; + T7O = T7c + T7d; + T7e = T7c - T7d; + T79 = T3s - T3f; + T3t = T3f + T3s; + } + } + { + E T4x, T2f, T2o, T2r, T4z, T2l, T2n, T2q, T4E, T2p; + { + E T2u, T2x, T2t, T2w; + { + E T2b, T2e, T2d, T4w, T2c, T2a; + T2b = ri[WS(rs, 5)]; + T2e = ii[WS(rs, 5)]; + T2a = W[8]; + T5t = T5c - T5h; + T5i = T5c + T5h; + T2d = W[9]; + T4w = T2a * T2e; + T2c = T2a * T2b; + T2u = ri[WS(rs, 13)]; + T2x = ii[WS(rs, 13)]; + T4x = FNMS(T2d, T2b, T4w); + T2f = FMA(T2d, T2e, T2c); + T2t = W[24]; + T2w = W[25]; + } + { + E T2h, T2k, T2j, T4y, T2i, T4G, T2v, T2g; + T2h = ri[WS(rs, 21)]; + T2k = ii[WS(rs, 21)]; + T4G = T2t * T2x; + T2v = T2t * T2u; + T2g = W[40]; + T2j = W[41]; + T4H = FNMS(T2w, T2u, T4G); + T2y = FMA(T2w, T2x, T2v); + T4y = T2g * T2k; + T2i = T2g * T2h; + T2o = ri[WS(rs, 29)]; + T2r = ii[WS(rs, 29)]; + T4z = FNMS(T2j, T2h, T4y); + T2l = FMA(T2j, T2k, T2i); + T2n = W[56]; + T2q = W[57]; + } + } + T4A = T4x - T4z; + T71 = T4x + T4z; + T2m = T2f + T2l; + T4B = T2f - T2l; + T4E = T2n * T2r; + T2p = T2n * T2o; + T4F = FNMS(T2q, T2o, T4E); + T2s = FMA(T2q, T2r, T2p); + } + } + { + E T4T, T4C, T4J, T4U, T7y, T8q, T8p, T7B; + { + E T6E, T8j, T73, T6Y, T6H, T8k, T8i, T8h; + { + E T7C, TO, T80, T7Z, T8e, T89, T8d, T1H, T8b, T3v, T7T, T7L, T7U, T7Q, T2A; + E T7K, T7P, T7W, T1I; + { + E T7X, T7Y, T7J, T82, T88; + { + E Tm, T4I, T72, T4D, T2z, TN; + T6E = T8 - Tl; + Tm = T8 + Tl; + T4T = T4B + T4A; + T4C = T4A - T4B; + T4I = T4F - T4H; + T72 = T4F + T4H; + T4D = T2s - T2y; + T2z = T2s + T2y; + TN = Tz + TM; + T8j = TM - Tz; + T73 = T71 - T72; + T7J = T71 + T72; + T4J = T4D + T4I; + T4U = T4D - T4I; + T2A = T2m + T2z; + T6Y = T2z - T2m; + T7C = Tm - TN; + TO = Tm + TN; + } + T7K = T7I - T7J; + T7X = T7I + T7J; + T7Y = T7N + T7O; + T7P = T7N - T7O; + T6H = T6F - T6G; + T82 = T6F + T6G; + T88 = T83 + T87; + T8k = T87 - T83; + T80 = T7X + T7Y; + T7Z = T7X - T7Y; + T8e = T88 - T82; + T89 = T82 + T88; + } + { + E T7H, T7M, T2B, T3u; + T7H = T29 - T2A; + T2B = T29 + T2A; + T3u = T32 + T3t; + T7M = T32 - T3t; + T8d = T1G - T1f; + T1H = T1f + T1G; + T8b = T3u - T2B; + T3v = T2B + T3u; + T7T = T7K - T7H; + T7L = T7H + T7K; + T7U = T7M + T7P; + T7Q = T7M - T7P; + } + T7W = TO - T1H; + T1I = TO + T1H; + { + E T7S, T8f, T8g, T7V; + { + E T7R, T8c, T8a, T7G, T81, T7F; + T8i = T7Q - T7L; + T7R = T7L + T7Q; + T81 = T7D + T7E; + T7F = T7D - T7E; + ri[0] = T1I + T3v; + ri[WS(rs, 16)] = T1I - T3v; + ri[WS(rs, 8)] = T7W + T7Z; + ri[WS(rs, 24)] = T7W - T7Z; + T8c = T89 - T81; + T8a = T81 + T89; + T7G = T7C + T7F; + T7S = T7C - T7F; + T8h = T8e - T8d; + T8f = T8d + T8e; + ii[WS(rs, 24)] = T8c - T8b; + ii[WS(rs, 8)] = T8b + T8c; + ii[WS(rs, 16)] = T8a - T80; + ii[0] = T80 + T8a; + ri[WS(rs, 4)] = FMA(KP707106781, T7R, T7G); + ri[WS(rs, 20)] = FNMS(KP707106781, T7R, T7G); + T8g = T7T + T7U; + T7V = T7T - T7U; + } + ii[WS(rs, 20)] = FNMS(KP707106781, T8g, T8f); + ii[WS(rs, 4)] = FMA(KP707106781, T8g, T8f); + ri[WS(rs, 12)] = FMA(KP707106781, T7V, T7S); + ri[WS(rs, 28)] = FNMS(KP707106781, T7V, T7S); + } + } + { + E T7f, T7m, T6I, T7a, T7A, T7w, T8r, T8l, T8m, T6T, T7j, T75, T8s, T7p, T7z; + E T7t; + { + E T7n, T6N, T6S, T7o, T7u, T7v; + T7f = T7b - T7e; + T7u = T7b + T7e; + ii[WS(rs, 28)] = FNMS(KP707106781, T8i, T8h); + ii[WS(rs, 12)] = FMA(KP707106781, T8i, T8h); + T7m = T6E + T6H; + T6I = T6E - T6H; + T7v = T78 + T79; + T7a = T78 - T79; + T7n = T6M + T6L; + T6N = T6L - T6M; + T7A = FMA(KP414213562, T7u, T7v); + T7w = FNMS(KP414213562, T7v, T7u); + T8r = T8k - T8j; + T8l = T8j + T8k; + T6S = T6O + T6R; + T7o = T6O - T6R; + { + E T7s, T7r, T6Z, T74; + T7s = T6X + T6Y; + T6Z = T6X - T6Y; + T74 = T70 - T73; + T7r = T70 + T73; + T8m = T6N + T6S; + T6T = T6N - T6S; + T7j = FNMS(KP414213562, T6Z, T74); + T75 = FMA(KP414213562, T74, T6Z); + T8s = T7o - T7n; + T7p = T7n + T7o; + T7z = FNMS(KP414213562, T7r, T7s); + T7t = FMA(KP414213562, T7s, T7r); + } + } + { + E T7i, T6U, T8t, T8v, T7k, T7g; + T7i = FNMS(KP707106781, T6T, T6I); + T6U = FMA(KP707106781, T6T, T6I); + T8t = FMA(KP707106781, T8s, T8r); + T8v = FNMS(KP707106781, T8s, T8r); + T7k = FMA(KP414213562, T7a, T7f); + T7g = FNMS(KP414213562, T7f, T7a); + { + E T7q, T7x, T8n, T8o; + T7y = FNMS(KP707106781, T7p, T7m); + T7q = FMA(KP707106781, T7p, T7m); + { + E T7l, T8u, T8w, T7h; + T7l = T7j + T7k; + T8u = T7k - T7j; + T8w = T75 + T7g; + T7h = T75 - T7g; + ri[WS(rs, 30)] = FMA(KP923879532, T7l, T7i); + ri[WS(rs, 14)] = FNMS(KP923879532, T7l, T7i); + ii[WS(rs, 22)] = FNMS(KP923879532, T8u, T8t); + ii[WS(rs, 6)] = FMA(KP923879532, T8u, T8t); + ii[WS(rs, 30)] = FMA(KP923879532, T8w, T8v); + ii[WS(rs, 14)] = FNMS(KP923879532, T8w, T8v); + ri[WS(rs, 6)] = FMA(KP923879532, T7h, T6U); + ri[WS(rs, 22)] = FNMS(KP923879532, T7h, T6U); + T7x = T7t + T7w; + T8q = T7w - T7t; + } + T8p = FNMS(KP707106781, T8m, T8l); + T8n = FMA(KP707106781, T8m, T8l); + T8o = T7z + T7A; + T7B = T7z - T7A; + ri[WS(rs, 2)] = FMA(KP923879532, T7x, T7q); + ri[WS(rs, 18)] = FNMS(KP923879532, T7x, T7q); + ii[WS(rs, 18)] = FNMS(KP923879532, T8o, T8n); + ii[WS(rs, 2)] = FMA(KP923879532, T8o, T8n); + } + } + } + } + { + E T5S, T8O, T8N, T5V, T6d, T6g, T66, T69, T8G, T8F; + { + E T5C, T3S, T8C, T4n, T8H, T8B, T8I, T5F, T5k, T5L, T5u, T4K, T4V; + { + E T5D, T5E, T8z, T8A, T5j; + { + E T3C, T3R, T47, T4m; + T5S = T3w - T3B; + T3C = T3w + T3B; + ri[WS(rs, 10)] = FMA(KP923879532, T7B, T7y); + ri[WS(rs, 26)] = FNMS(KP923879532, T7B, T7y); + ii[WS(rs, 26)] = FNMS(KP923879532, T8q, T8p); + ii[WS(rs, 10)] = FMA(KP923879532, T8q, T8p); + T3R = T3J + T3Q; + T8O = T3Q - T3J; + T5D = FMA(KP414213562, T3Z, T46); + T47 = FNMS(KP414213562, T46, T3Z); + T4m = FMA(KP414213562, T4l, T4e); + T5E = FNMS(KP414213562, T4e, T4l); + T8N = T8y + T8x; + T8z = T8x - T8y; + T5C = FMA(KP707106781, T3R, T3C); + T3S = FNMS(KP707106781, T3R, T3C); + T8C = T47 + T4m; + T4n = T47 - T4m; + T8A = T5T + T5U; + T5V = T5T - T5U; + } + T6d = T5i - T5b; + T5j = T5b + T5i; + T8H = FNMS(KP707106781, T8A, T8z); + T8B = FMA(KP707106781, T8A, T8z); + T8I = T5E - T5D; + T5F = T5D + T5E; + T5k = FNMS(KP707106781, T5j, T54); + T5L = FMA(KP707106781, T5j, T54); + T5u = T5s + T5t; + T6g = T5s - T5t; + T66 = T4J - T4C; + T4K = T4C + T4J; + T4V = T4T + T4U; + T69 = T4T - T4U; + } + { + E T5M, T5Q, T5J, T5P, T8L, T8M; + { + E T5y, T4o, T5A, T5w, T5z, T4X, T8J, T5K, T5v, T8K, T5B, T5x; + T5y = FNMS(KP923879532, T4n, T3S); + T4o = FMA(KP923879532, T4n, T3S); + T5K = FMA(KP707106781, T5u, T5r); + T5v = FNMS(KP707106781, T5u, T5r); + { + E T5I, T4L, T5H, T4W; + T5I = FMA(KP707106781, T4K, T4v); + T4L = FNMS(KP707106781, T4K, T4v); + T5H = FMA(KP707106781, T4V, T4S); + T4W = FNMS(KP707106781, T4V, T4S); + T5M = FNMS(KP198912367, T5L, T5K); + T5Q = FMA(KP198912367, T5K, T5L); + T5A = FMA(KP668178637, T5k, T5v); + T5w = FNMS(KP668178637, T5v, T5k); + T5J = FMA(KP198912367, T5I, T5H); + T5P = FNMS(KP198912367, T5H, T5I); + T5z = FNMS(KP668178637, T4L, T4W); + T4X = FMA(KP668178637, T4W, T4L); + } + T8J = FMA(KP923879532, T8I, T8H); + T8L = FNMS(KP923879532, T8I, T8H); + T8K = T5A - T5z; + T5B = T5z + T5A; + T8M = T4X + T5w; + T5x = T4X - T5w; + ii[WS(rs, 21)] = FNMS(KP831469612, T8K, T8J); + ii[WS(rs, 5)] = FMA(KP831469612, T8K, T8J); + ri[WS(rs, 5)] = FMA(KP831469612, T5x, T4o); + ri[WS(rs, 21)] = FNMS(KP831469612, T5x, T4o); + ri[WS(rs, 29)] = FMA(KP831469612, T5B, T5y); + ri[WS(rs, 13)] = FNMS(KP831469612, T5B, T5y); + } + { + E T5O, T8D, T8E, T5R, T5G, T5N; + T5O = FNMS(KP923879532, T5F, T5C); + T5G = FMA(KP923879532, T5F, T5C); + T5N = T5J + T5M; + T8G = T5M - T5J; + T8F = FNMS(KP923879532, T8C, T8B); + T8D = FMA(KP923879532, T8C, T8B); + ii[WS(rs, 29)] = FMA(KP831469612, T8M, T8L); + ii[WS(rs, 13)] = FNMS(KP831469612, T8M, T8L); + ri[WS(rs, 1)] = FMA(KP980785280, T5N, T5G); + ri[WS(rs, 17)] = FNMS(KP980785280, T5N, T5G); + T8E = T5P + T5Q; + T5R = T5P - T5Q; + ii[WS(rs, 17)] = FNMS(KP980785280, T8E, T8D); + ii[WS(rs, 1)] = FMA(KP980785280, T8E, T8D); + ri[WS(rs, 9)] = FMA(KP980785280, T5R, T5O); + ri[WS(rs, 25)] = FNMS(KP980785280, T5R, T5O); + } + } + } + { + E T6o, T5W, T8W, T63, T8V, T8P, T8Q, T6r, T67, T6u, T6y, T6C, T6m, T6i; + { + E T6p, T5Z, T62, T6q; + T6p = FNMS(KP414213562, T5X, T5Y); + T5Z = FMA(KP414213562, T5Y, T5X); + ii[WS(rs, 25)] = FNMS(KP980785280, T8G, T8F); + ii[WS(rs, 9)] = FMA(KP980785280, T8G, T8F); + T6o = FNMS(KP707106781, T5V, T5S); + T5W = FMA(KP707106781, T5V, T5S); + T62 = FNMS(KP414213562, T61, T60); + T6q = FMA(KP414213562, T60, T61); + T8W = T5Z + T62; + T63 = T5Z - T62; + T8V = FNMS(KP707106781, T8O, T8N); + T8P = FMA(KP707106781, T8O, T8N); + { + E T6x, T6e, T6w, T6h; + T8Q = T6q - T6p; + T6r = T6p + T6q; + T6x = FMA(KP707106781, T6d, T6c); + T6e = FNMS(KP707106781, T6d, T6c); + T6w = FMA(KP707106781, T6g, T6f); + T6h = FNMS(KP707106781, T6g, T6f); + T67 = FNMS(KP707106781, T66, T65); + T6u = FMA(KP707106781, T66, T65); + T6y = FNMS(KP198912367, T6x, T6w); + T6C = FMA(KP198912367, T6w, T6x); + T6m = FMA(KP668178637, T6e, T6h); + T6i = FNMS(KP668178637, T6h, T6e); + } + } + { + E T6k, T64, T8R, T8T, T6t, T6a; + T6k = FNMS(KP923879532, T63, T5W); + T64 = FMA(KP923879532, T63, T5W); + T8R = FMA(KP923879532, T8Q, T8P); + T8T = FNMS(KP923879532, T8Q, T8P); + T6t = FMA(KP707106781, T69, T68); + T6a = FNMS(KP707106781, T69, T68); + { + E T6A, T8X, T8Y, T6D; + { + E T6s, T6B, T6l, T6b, T6z, T6v; + T6A = FMA(KP923879532, T6r, T6o); + T6s = FNMS(KP923879532, T6r, T6o); + T6v = FMA(KP198912367, T6u, T6t); + T6B = FNMS(KP198912367, T6t, T6u); + T6l = FNMS(KP668178637, T67, T6a); + T6b = FMA(KP668178637, T6a, T67); + T6z = T6v - T6y; + T90 = T6v + T6y; + T8Z = FMA(KP923879532, T8W, T8V); + T8X = FNMS(KP923879532, T8W, T8V); + { + E T6n, T8S, T8U, T6j; + T6n = T6l - T6m; + T8S = T6l + T6m; + T8U = T6i - T6b; + T6j = T6b + T6i; + ri[WS(rs, 7)] = FMA(KP980785280, T6z, T6s); + ri[WS(rs, 23)] = FNMS(KP980785280, T6z, T6s); + ri[WS(rs, 11)] = FMA(KP831469612, T6n, T6k); + ri[WS(rs, 27)] = FNMS(KP831469612, T6n, T6k); + ii[WS(rs, 19)] = FNMS(KP831469612, T8S, T8R); + ii[WS(rs, 3)] = FMA(KP831469612, T8S, T8R); + ii[WS(rs, 27)] = FNMS(KP831469612, T8U, T8T); + ii[WS(rs, 11)] = FMA(KP831469612, T8U, T8T); + ri[WS(rs, 3)] = FMA(KP831469612, T6j, T64); + ri[WS(rs, 19)] = FNMS(KP831469612, T6j, T64); + T8Y = T6C - T6B; + T6D = T6B + T6C; + } + } + ii[WS(rs, 23)] = FNMS(KP980785280, T8Y, T8X); + ii[WS(rs, 7)] = FMA(KP980785280, T8Y, T8X); + ri[WS(rs, 31)] = FMA(KP980785280, T6D, T6A); + ri[WS(rs, 15)] = FNMS(KP980785280, T6D, T6A); + } + } + } + } + } + } + ii[WS(rs, 31)] = FMA(KP980785280, T90, T8Z); + ii[WS(rs, 15)] = FNMS(KP980785280, T90, T8Z); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 32}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 32, "t1_32", twinstr, &GENUS, {236, 62, 198, 0}, 0, 0, 0 }; + +void X(codelet_t1_32) (planner *p) { + X(kdft_dit_register) (p, t1_32, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 32 -name t1_32 -include t.h */ + +/* + * This function contains 434 FP additions, 208 FP multiplications, + * (or, 340 additions, 114 multiplications, 94 fused multiply/add), + * 96 stack variables, 7 constants, and 128 memory accesses + */ +#include "t.h" + +static void t1_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 62); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 62, MAKE_VOLATILE_STRIDE(64, rs)) { + E Tj, T5F, T7C, T7Q, T35, T4T, T78, T7m, T1Q, T61, T5Y, T6J, T3K, T59, T41; + E T56, T2B, T67, T6e, T6O, T4b, T5d, T4s, T5g, TG, T7l, T5I, T73, T3a, T4U; + E T3f, T4V, T14, T5N, T5M, T6E, T3m, T4Y, T3r, T4Z, T1r, T5P, T5S, T6F, T3x; + E T51, T3C, T52, T2d, T5Z, T64, T6K, T3V, T57, T44, T5a, T2Y, T6f, T6a, T6P; + E T4m, T5h, T4v, T5e; + { + E T1, T76, T6, T75, Tc, T32, Th, T33; + T1 = ri[0]; + T76 = ii[0]; + { + E T3, T5, T2, T4; + T3 = ri[WS(rs, 16)]; + T5 = ii[WS(rs, 16)]; + T2 = W[30]; + T4 = W[31]; + T6 = FMA(T2, T3, T4 * T5); + T75 = FNMS(T4, T3, T2 * T5); + } + { + E T9, Tb, T8, Ta; + T9 = ri[WS(rs, 8)]; + Tb = ii[WS(rs, 8)]; + T8 = W[14]; + Ta = W[15]; + Tc = FMA(T8, T9, Ta * Tb); + T32 = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = ri[WS(rs, 24)]; + Tg = ii[WS(rs, 24)]; + Td = W[46]; + Tf = W[47]; + Th = FMA(Td, Te, Tf * Tg); + T33 = FNMS(Tf, Te, Td * Tg); + } + { + E T7, Ti, T7A, T7B; + T7 = T1 + T6; + Ti = Tc + Th; + Tj = T7 + Ti; + T5F = T7 - Ti; + T7A = T76 - T75; + T7B = Tc - Th; + T7C = T7A - T7B; + T7Q = T7B + T7A; + } + { + E T31, T34, T74, T77; + T31 = T1 - T6; + T34 = T32 - T33; + T35 = T31 - T34; + T4T = T31 + T34; + T74 = T32 + T33; + T77 = T75 + T76; + T78 = T74 + T77; + T7m = T77 - T74; + } + } + { + E T1y, T3G, T1O, T3Z, T1D, T3H, T1J, T3Y; + { + E T1v, T1x, T1u, T1w; + T1v = ri[WS(rs, 1)]; + T1x = ii[WS(rs, 1)]; + T1u = W[0]; + T1w = W[1]; + T1y = FMA(T1u, T1v, T1w * T1x); + T3G = FNMS(T1w, T1v, T1u * T1x); + } + { + E T1L, T1N, T1K, T1M; + T1L = ri[WS(rs, 25)]; + T1N = ii[WS(rs, 25)]; + T1K = W[48]; + T1M = W[49]; + T1O = FMA(T1K, T1L, T1M * T1N); + T3Z = FNMS(T1M, T1L, T1K * T1N); + } + { + E T1A, T1C, T1z, T1B; + T1A = ri[WS(rs, 17)]; + T1C = ii[WS(rs, 17)]; + T1z = W[32]; + T1B = W[33]; + T1D = FMA(T1z, T1A, T1B * T1C); + T3H = FNMS(T1B, T1A, T1z * T1C); + } + { + E T1G, T1I, T1F, T1H; + T1G = ri[WS(rs, 9)]; + T1I = ii[WS(rs, 9)]; + T1F = W[16]; + T1H = W[17]; + T1J = FMA(T1F, T1G, T1H * T1I); + T3Y = FNMS(T1H, T1G, T1F * T1I); + } + { + E T1E, T1P, T5W, T5X; + T1E = T1y + T1D; + T1P = T1J + T1O; + T1Q = T1E + T1P; + T61 = T1E - T1P; + T5W = T3G + T3H; + T5X = T3Y + T3Z; + T5Y = T5W - T5X; + T6J = T5W + T5X; + } + { + E T3I, T3J, T3X, T40; + T3I = T3G - T3H; + T3J = T1J - T1O; + T3K = T3I + T3J; + T59 = T3I - T3J; + T3X = T1y - T1D; + T40 = T3Y - T3Z; + T41 = T3X - T40; + T56 = T3X + T40; + } + } + { + E T2j, T4o, T2z, T49, T2o, T4p, T2u, T48; + { + E T2g, T2i, T2f, T2h; + T2g = ri[WS(rs, 31)]; + T2i = ii[WS(rs, 31)]; + T2f = W[60]; + T2h = W[61]; + T2j = FMA(T2f, T2g, T2h * T2i); + T4o = FNMS(T2h, T2g, T2f * T2i); + } + { + E T2w, T2y, T2v, T2x; + T2w = ri[WS(rs, 23)]; + T2y = ii[WS(rs, 23)]; + T2v = W[44]; + T2x = W[45]; + T2z = FMA(T2v, T2w, T2x * T2y); + T49 = FNMS(T2x, T2w, T2v * T2y); + } + { + E T2l, T2n, T2k, T2m; + T2l = ri[WS(rs, 15)]; + T2n = ii[WS(rs, 15)]; + T2k = W[28]; + T2m = W[29]; + T2o = FMA(T2k, T2l, T2m * T2n); + T4p = FNMS(T2m, T2l, T2k * T2n); + } + { + E T2r, T2t, T2q, T2s; + T2r = ri[WS(rs, 7)]; + T2t = ii[WS(rs, 7)]; + T2q = W[12]; + T2s = W[13]; + T2u = FMA(T2q, T2r, T2s * T2t); + T48 = FNMS(T2s, T2r, T2q * T2t); + } + { + E T2p, T2A, T6c, T6d; + T2p = T2j + T2o; + T2A = T2u + T2z; + T2B = T2p + T2A; + T67 = T2p - T2A; + T6c = T4o + T4p; + T6d = T48 + T49; + T6e = T6c - T6d; + T6O = T6c + T6d; + } + { + E T47, T4a, T4q, T4r; + T47 = T2j - T2o; + T4a = T48 - T49; + T4b = T47 - T4a; + T5d = T47 + T4a; + T4q = T4o - T4p; + T4r = T2u - T2z; + T4s = T4q + T4r; + T5g = T4q - T4r; + } + } + { + E To, T36, TE, T3d, Tt, T37, Tz, T3c; + { + E Tl, Tn, Tk, Tm; + Tl = ri[WS(rs, 4)]; + Tn = ii[WS(rs, 4)]; + Tk = W[6]; + Tm = W[7]; + To = FMA(Tk, Tl, Tm * Tn); + T36 = FNMS(Tm, Tl, Tk * Tn); + } + { + E TB, TD, TA, TC; + TB = ri[WS(rs, 12)]; + TD = ii[WS(rs, 12)]; + TA = W[22]; + TC = W[23]; + TE = FMA(TA, TB, TC * TD); + T3d = FNMS(TC, TB, TA * TD); + } + { + E Tq, Ts, Tp, Tr; + Tq = ri[WS(rs, 20)]; + Ts = ii[WS(rs, 20)]; + Tp = W[38]; + Tr = W[39]; + Tt = FMA(Tp, Tq, Tr * Ts); + T37 = FNMS(Tr, Tq, Tp * Ts); + } + { + E Tw, Ty, Tv, Tx; + Tw = ri[WS(rs, 28)]; + Ty = ii[WS(rs, 28)]; + Tv = W[54]; + Tx = W[55]; + Tz = FMA(Tv, Tw, Tx * Ty); + T3c = FNMS(Tx, Tw, Tv * Ty); + } + { + E Tu, TF, T5G, T5H; + Tu = To + Tt; + TF = Tz + TE; + TG = Tu + TF; + T7l = TF - Tu; + T5G = T36 + T37; + T5H = T3c + T3d; + T5I = T5G - T5H; + T73 = T5G + T5H; + } + { + E T38, T39, T3b, T3e; + T38 = T36 - T37; + T39 = To - Tt; + T3a = T38 - T39; + T4U = T39 + T38; + T3b = Tz - TE; + T3e = T3c - T3d; + T3f = T3b + T3e; + T4V = T3b - T3e; + } + } + { + E TM, T3i, T12, T3p, TR, T3j, TX, T3o; + { + E TJ, TL, TI, TK; + TJ = ri[WS(rs, 2)]; + TL = ii[WS(rs, 2)]; + TI = W[2]; + TK = W[3]; + TM = FMA(TI, TJ, TK * TL); + T3i = FNMS(TK, TJ, TI * TL); + } + { + E TZ, T11, TY, T10; + TZ = ri[WS(rs, 26)]; + T11 = ii[WS(rs, 26)]; + TY = W[50]; + T10 = W[51]; + T12 = FMA(TY, TZ, T10 * T11); + T3p = FNMS(T10, TZ, TY * T11); + } + { + E TO, TQ, TN, TP; + TO = ri[WS(rs, 18)]; + TQ = ii[WS(rs, 18)]; + TN = W[34]; + TP = W[35]; + TR = FMA(TN, TO, TP * TQ); + T3j = FNMS(TP, TO, TN * TQ); + } + { + E TU, TW, TT, TV; + TU = ri[WS(rs, 10)]; + TW = ii[WS(rs, 10)]; + TT = W[18]; + TV = W[19]; + TX = FMA(TT, TU, TV * TW); + T3o = FNMS(TV, TU, TT * TW); + } + { + E TS, T13, T5K, T5L; + TS = TM + TR; + T13 = TX + T12; + T14 = TS + T13; + T5N = TS - T13; + T5K = T3i + T3j; + T5L = T3o + T3p; + T5M = T5K - T5L; + T6E = T5K + T5L; + } + { + E T3k, T3l, T3n, T3q; + T3k = T3i - T3j; + T3l = TX - T12; + T3m = T3k + T3l; + T4Y = T3k - T3l; + T3n = TM - TR; + T3q = T3o - T3p; + T3r = T3n - T3q; + T4Z = T3n + T3q; + } + } + { + E T19, T3t, T1p, T3A, T1e, T3u, T1k, T3z; + { + E T16, T18, T15, T17; + T16 = ri[WS(rs, 30)]; + T18 = ii[WS(rs, 30)]; + T15 = W[58]; + T17 = W[59]; + T19 = FMA(T15, T16, T17 * T18); + T3t = FNMS(T17, T16, T15 * T18); + } + { + E T1m, T1o, T1l, T1n; + T1m = ri[WS(rs, 22)]; + T1o = ii[WS(rs, 22)]; + T1l = W[42]; + T1n = W[43]; + T1p = FMA(T1l, T1m, T1n * T1o); + T3A = FNMS(T1n, T1m, T1l * T1o); + } + { + E T1b, T1d, T1a, T1c; + T1b = ri[WS(rs, 14)]; + T1d = ii[WS(rs, 14)]; + T1a = W[26]; + T1c = W[27]; + T1e = FMA(T1a, T1b, T1c * T1d); + T3u = FNMS(T1c, T1b, T1a * T1d); + } + { + E T1h, T1j, T1g, T1i; + T1h = ri[WS(rs, 6)]; + T1j = ii[WS(rs, 6)]; + T1g = W[10]; + T1i = W[11]; + T1k = FMA(T1g, T1h, T1i * T1j); + T3z = FNMS(T1i, T1h, T1g * T1j); + } + { + E T1f, T1q, T5Q, T5R; + T1f = T19 + T1e; + T1q = T1k + T1p; + T1r = T1f + T1q; + T5P = T1f - T1q; + T5Q = T3t + T3u; + T5R = T3z + T3A; + T5S = T5Q - T5R; + T6F = T5Q + T5R; + } + { + E T3v, T3w, T3y, T3B; + T3v = T3t - T3u; + T3w = T1k - T1p; + T3x = T3v + T3w; + T51 = T3v - T3w; + T3y = T19 - T1e; + T3B = T3z - T3A; + T3C = T3y - T3B; + T52 = T3y + T3B; + } + } + { + E T1V, T3R, T20, T3S, T3Q, T3T, T26, T3M, T2b, T3N, T3L, T3O; + { + E T1S, T1U, T1R, T1T; + T1S = ri[WS(rs, 5)]; + T1U = ii[WS(rs, 5)]; + T1R = W[8]; + T1T = W[9]; + T1V = FMA(T1R, T1S, T1T * T1U); + T3R = FNMS(T1T, T1S, T1R * T1U); + } + { + E T1X, T1Z, T1W, T1Y; + T1X = ri[WS(rs, 21)]; + T1Z = ii[WS(rs, 21)]; + T1W = W[40]; + T1Y = W[41]; + T20 = FMA(T1W, T1X, T1Y * T1Z); + T3S = FNMS(T1Y, T1X, T1W * T1Z); + } + T3Q = T1V - T20; + T3T = T3R - T3S; + { + E T23, T25, T22, T24; + T23 = ri[WS(rs, 29)]; + T25 = ii[WS(rs, 29)]; + T22 = W[56]; + T24 = W[57]; + T26 = FMA(T22, T23, T24 * T25); + T3M = FNMS(T24, T23, T22 * T25); + } + { + E T28, T2a, T27, T29; + T28 = ri[WS(rs, 13)]; + T2a = ii[WS(rs, 13)]; + T27 = W[24]; + T29 = W[25]; + T2b = FMA(T27, T28, T29 * T2a); + T3N = FNMS(T29, T28, T27 * T2a); + } + T3L = T26 - T2b; + T3O = T3M - T3N; + { + E T21, T2c, T62, T63; + T21 = T1V + T20; + T2c = T26 + T2b; + T2d = T21 + T2c; + T5Z = T2c - T21; + T62 = T3R + T3S; + T63 = T3M + T3N; + T64 = T62 - T63; + T6K = T62 + T63; + } + { + E T3P, T3U, T42, T43; + T3P = T3L - T3O; + T3U = T3Q + T3T; + T3V = KP707106781 * (T3P - T3U); + T57 = KP707106781 * (T3U + T3P); + T42 = T3T - T3Q; + T43 = T3L + T3O; + T44 = KP707106781 * (T42 - T43); + T5a = KP707106781 * (T42 + T43); + } + } + { + E T2G, T4c, T2L, T4d, T4e, T4f, T2R, T4i, T2W, T4j, T4h, T4k; + { + E T2D, T2F, T2C, T2E; + T2D = ri[WS(rs, 3)]; + T2F = ii[WS(rs, 3)]; + T2C = W[4]; + T2E = W[5]; + T2G = FMA(T2C, T2D, T2E * T2F); + T4c = FNMS(T2E, T2D, T2C * T2F); + } + { + E T2I, T2K, T2H, T2J; + T2I = ri[WS(rs, 19)]; + T2K = ii[WS(rs, 19)]; + T2H = W[36]; + T2J = W[37]; + T2L = FMA(T2H, T2I, T2J * T2K); + T4d = FNMS(T2J, T2I, T2H * T2K); + } + T4e = T4c - T4d; + T4f = T2G - T2L; + { + E T2O, T2Q, T2N, T2P; + T2O = ri[WS(rs, 27)]; + T2Q = ii[WS(rs, 27)]; + T2N = W[52]; + T2P = W[53]; + T2R = FMA(T2N, T2O, T2P * T2Q); + T4i = FNMS(T2P, T2O, T2N * T2Q); + } + { + E T2T, T2V, T2S, T2U; + T2T = ri[WS(rs, 11)]; + T2V = ii[WS(rs, 11)]; + T2S = W[20]; + T2U = W[21]; + T2W = FMA(T2S, T2T, T2U * T2V); + T4j = FNMS(T2U, T2T, T2S * T2V); + } + T4h = T2R - T2W; + T4k = T4i - T4j; + { + E T2M, T2X, T68, T69; + T2M = T2G + T2L; + T2X = T2R + T2W; + T2Y = T2M + T2X; + T6f = T2X - T2M; + T68 = T4c + T4d; + T69 = T4i + T4j; + T6a = T68 - T69; + T6P = T68 + T69; + } + { + E T4g, T4l, T4t, T4u; + T4g = T4e - T4f; + T4l = T4h + T4k; + T4m = KP707106781 * (T4g - T4l); + T5h = KP707106781 * (T4g + T4l); + T4t = T4h - T4k; + T4u = T4f + T4e; + T4v = KP707106781 * (T4t - T4u); + T5e = KP707106781 * (T4u + T4t); + } + } + { + E T1t, T6X, T7a, T7c, T30, T7b, T70, T71; + { + E TH, T1s, T72, T79; + TH = Tj + TG; + T1s = T14 + T1r; + T1t = TH + T1s; + T6X = TH - T1s; + T72 = T6E + T6F; + T79 = T73 + T78; + T7a = T72 + T79; + T7c = T79 - T72; + } + { + E T2e, T2Z, T6Y, T6Z; + T2e = T1Q + T2d; + T2Z = T2B + T2Y; + T30 = T2e + T2Z; + T7b = T2Z - T2e; + T6Y = T6J + T6K; + T6Z = T6O + T6P; + T70 = T6Y - T6Z; + T71 = T6Y + T6Z; + } + ri[WS(rs, 16)] = T1t - T30; + ii[WS(rs, 16)] = T7a - T71; + ri[0] = T1t + T30; + ii[0] = T71 + T7a; + ri[WS(rs, 24)] = T6X - T70; + ii[WS(rs, 24)] = T7c - T7b; + ri[WS(rs, 8)] = T6X + T70; + ii[WS(rs, 8)] = T7b + T7c; + } + { + E T6H, T6T, T7g, T7i, T6M, T6U, T6R, T6V; + { + E T6D, T6G, T7e, T7f; + T6D = Tj - TG; + T6G = T6E - T6F; + T6H = T6D + T6G; + T6T = T6D - T6G; + T7e = T1r - T14; + T7f = T78 - T73; + T7g = T7e + T7f; + T7i = T7f - T7e; + } + { + E T6I, T6L, T6N, T6Q; + T6I = T1Q - T2d; + T6L = T6J - T6K; + T6M = T6I + T6L; + T6U = T6L - T6I; + T6N = T2B - T2Y; + T6Q = T6O - T6P; + T6R = T6N - T6Q; + T6V = T6N + T6Q; + } + { + E T6S, T7d, T6W, T7h; + T6S = KP707106781 * (T6M + T6R); + ri[WS(rs, 20)] = T6H - T6S; + ri[WS(rs, 4)] = T6H + T6S; + T7d = KP707106781 * (T6U + T6V); + ii[WS(rs, 4)] = T7d + T7g; + ii[WS(rs, 20)] = T7g - T7d; + T6W = KP707106781 * (T6U - T6V); + ri[WS(rs, 28)] = T6T - T6W; + ri[WS(rs, 12)] = T6T + T6W; + T7h = KP707106781 * (T6R - T6M); + ii[WS(rs, 12)] = T7h + T7i; + ii[WS(rs, 28)] = T7i - T7h; + } + } + { + E T5J, T7n, T7t, T6n, T5U, T7k, T6x, T6B, T6q, T7s, T66, T6k, T6u, T6A, T6h; + E T6l; + { + E T5O, T5T, T60, T65; + T5J = T5F - T5I; + T7n = T7l + T7m; + T7t = T7m - T7l; + T6n = T5F + T5I; + T5O = T5M - T5N; + T5T = T5P + T5S; + T5U = KP707106781 * (T5O - T5T); + T7k = KP707106781 * (T5O + T5T); + { + E T6v, T6w, T6o, T6p; + T6v = T67 + T6a; + T6w = T6e + T6f; + T6x = FNMS(KP382683432, T6w, KP923879532 * T6v); + T6B = FMA(KP923879532, T6w, KP382683432 * T6v); + T6o = T5N + T5M; + T6p = T5P - T5S; + T6q = KP707106781 * (T6o + T6p); + T7s = KP707106781 * (T6p - T6o); + } + T60 = T5Y - T5Z; + T65 = T61 - T64; + T66 = FMA(KP923879532, T60, KP382683432 * T65); + T6k = FNMS(KP923879532, T65, KP382683432 * T60); + { + E T6s, T6t, T6b, T6g; + T6s = T5Y + T5Z; + T6t = T61 + T64; + T6u = FMA(KP382683432, T6s, KP923879532 * T6t); + T6A = FNMS(KP382683432, T6t, KP923879532 * T6s); + T6b = T67 - T6a; + T6g = T6e - T6f; + T6h = FNMS(KP923879532, T6g, KP382683432 * T6b); + T6l = FMA(KP382683432, T6g, KP923879532 * T6b); + } + } + { + E T5V, T6i, T7r, T7u; + T5V = T5J + T5U; + T6i = T66 + T6h; + ri[WS(rs, 22)] = T5V - T6i; + ri[WS(rs, 6)] = T5V + T6i; + T7r = T6k + T6l; + T7u = T7s + T7t; + ii[WS(rs, 6)] = T7r + T7u; + ii[WS(rs, 22)] = T7u - T7r; + } + { + E T6j, T6m, T7v, T7w; + T6j = T5J - T5U; + T6m = T6k - T6l; + ri[WS(rs, 30)] = T6j - T6m; + ri[WS(rs, 14)] = T6j + T6m; + T7v = T6h - T66; + T7w = T7t - T7s; + ii[WS(rs, 14)] = T7v + T7w; + ii[WS(rs, 30)] = T7w - T7v; + } + { + E T6r, T6y, T7j, T7o; + T6r = T6n + T6q; + T6y = T6u + T6x; + ri[WS(rs, 18)] = T6r - T6y; + ri[WS(rs, 2)] = T6r + T6y; + T7j = T6A + T6B; + T7o = T7k + T7n; + ii[WS(rs, 2)] = T7j + T7o; + ii[WS(rs, 18)] = T7o - T7j; + } + { + E T6z, T6C, T7p, T7q; + T6z = T6n - T6q; + T6C = T6A - T6B; + ri[WS(rs, 26)] = T6z - T6C; + ri[WS(rs, 10)] = T6z + T6C; + T7p = T6x - T6u; + T7q = T7n - T7k; + ii[WS(rs, 10)] = T7p + T7q; + ii[WS(rs, 26)] = T7q - T7p; + } + } + { + E T3h, T4D, T7R, T7X, T3E, T7O, T4N, T4R, T46, T4A, T4G, T7W, T4K, T4Q, T4x; + E T4B, T3g, T7P; + T3g = KP707106781 * (T3a - T3f); + T3h = T35 - T3g; + T4D = T35 + T3g; + T7P = KP707106781 * (T4V - T4U); + T7R = T7P + T7Q; + T7X = T7Q - T7P; + { + E T3s, T3D, T4L, T4M; + T3s = FNMS(KP923879532, T3r, KP382683432 * T3m); + T3D = FMA(KP382683432, T3x, KP923879532 * T3C); + T3E = T3s - T3D; + T7O = T3s + T3D; + T4L = T4b + T4m; + T4M = T4s + T4v; + T4N = FNMS(KP555570233, T4M, KP831469612 * T4L); + T4R = FMA(KP831469612, T4M, KP555570233 * T4L); + } + { + E T3W, T45, T4E, T4F; + T3W = T3K - T3V; + T45 = T41 - T44; + T46 = FMA(KP980785280, T3W, KP195090322 * T45); + T4A = FNMS(KP980785280, T45, KP195090322 * T3W); + T4E = FMA(KP923879532, T3m, KP382683432 * T3r); + T4F = FNMS(KP923879532, T3x, KP382683432 * T3C); + T4G = T4E + T4F; + T7W = T4F - T4E; + } + { + E T4I, T4J, T4n, T4w; + T4I = T3K + T3V; + T4J = T41 + T44; + T4K = FMA(KP555570233, T4I, KP831469612 * T4J); + T4Q = FNMS(KP555570233, T4J, KP831469612 * T4I); + T4n = T4b - T4m; + T4w = T4s - T4v; + T4x = FNMS(KP980785280, T4w, KP195090322 * T4n); + T4B = FMA(KP195090322, T4w, KP980785280 * T4n); + } + { + E T3F, T4y, T7V, T7Y; + T3F = T3h + T3E; + T4y = T46 + T4x; + ri[WS(rs, 23)] = T3F - T4y; + ri[WS(rs, 7)] = T3F + T4y; + T7V = T4A + T4B; + T7Y = T7W + T7X; + ii[WS(rs, 7)] = T7V + T7Y; + ii[WS(rs, 23)] = T7Y - T7V; + } + { + E T4z, T4C, T7Z, T80; + T4z = T3h - T3E; + T4C = T4A - T4B; + ri[WS(rs, 31)] = T4z - T4C; + ri[WS(rs, 15)] = T4z + T4C; + T7Z = T4x - T46; + T80 = T7X - T7W; + ii[WS(rs, 15)] = T7Z + T80; + ii[WS(rs, 31)] = T80 - T7Z; + } + { + E T4H, T4O, T7N, T7S; + T4H = T4D + T4G; + T4O = T4K + T4N; + ri[WS(rs, 19)] = T4H - T4O; + ri[WS(rs, 3)] = T4H + T4O; + T7N = T4Q + T4R; + T7S = T7O + T7R; + ii[WS(rs, 3)] = T7N + T7S; + ii[WS(rs, 19)] = T7S - T7N; + } + { + E T4P, T4S, T7T, T7U; + T4P = T4D - T4G; + T4S = T4Q - T4R; + ri[WS(rs, 27)] = T4P - T4S; + ri[WS(rs, 11)] = T4P + T4S; + T7T = T4N - T4K; + T7U = T7R - T7O; + ii[WS(rs, 11)] = T7T + T7U; + ii[WS(rs, 27)] = T7U - T7T; + } + } + { + E T4X, T5p, T7D, T7J, T54, T7y, T5z, T5D, T5c, T5m, T5s, T7I, T5w, T5C, T5j; + E T5n, T4W, T7z; + T4W = KP707106781 * (T4U + T4V); + T4X = T4T - T4W; + T5p = T4T + T4W; + T7z = KP707106781 * (T3a + T3f); + T7D = T7z + T7C; + T7J = T7C - T7z; + { + E T50, T53, T5x, T5y; + T50 = FNMS(KP382683432, T4Z, KP923879532 * T4Y); + T53 = FMA(KP923879532, T51, KP382683432 * T52); + T54 = T50 - T53; + T7y = T50 + T53; + T5x = T5d + T5e; + T5y = T5g + T5h; + T5z = FNMS(KP195090322, T5y, KP980785280 * T5x); + T5D = FMA(KP195090322, T5x, KP980785280 * T5y); + } + { + E T58, T5b, T5q, T5r; + T58 = T56 - T57; + T5b = T59 - T5a; + T5c = FMA(KP555570233, T58, KP831469612 * T5b); + T5m = FNMS(KP831469612, T58, KP555570233 * T5b); + T5q = FMA(KP382683432, T4Y, KP923879532 * T4Z); + T5r = FNMS(KP382683432, T51, KP923879532 * T52); + T5s = T5q + T5r; + T7I = T5r - T5q; + } + { + E T5u, T5v, T5f, T5i; + T5u = T56 + T57; + T5v = T59 + T5a; + T5w = FMA(KP980785280, T5u, KP195090322 * T5v); + T5C = FNMS(KP195090322, T5u, KP980785280 * T5v); + T5f = T5d - T5e; + T5i = T5g - T5h; + T5j = FNMS(KP831469612, T5i, KP555570233 * T5f); + T5n = FMA(KP831469612, T5f, KP555570233 * T5i); + } + { + E T55, T5k, T7H, T7K; + T55 = T4X + T54; + T5k = T5c + T5j; + ri[WS(rs, 21)] = T55 - T5k; + ri[WS(rs, 5)] = T55 + T5k; + T7H = T5m + T5n; + T7K = T7I + T7J; + ii[WS(rs, 5)] = T7H + T7K; + ii[WS(rs, 21)] = T7K - T7H; + } + { + E T5l, T5o, T7L, T7M; + T5l = T4X - T54; + T5o = T5m - T5n; + ri[WS(rs, 29)] = T5l - T5o; + ri[WS(rs, 13)] = T5l + T5o; + T7L = T5j - T5c; + T7M = T7J - T7I; + ii[WS(rs, 13)] = T7L + T7M; + ii[WS(rs, 29)] = T7M - T7L; + } + { + E T5t, T5A, T7x, T7E; + T5t = T5p + T5s; + T5A = T5w + T5z; + ri[WS(rs, 17)] = T5t - T5A; + ri[WS(rs, 1)] = T5t + T5A; + T7x = T5C + T5D; + T7E = T7y + T7D; + ii[WS(rs, 1)] = T7x + T7E; + ii[WS(rs, 17)] = T7E - T7x; + } + { + E T5B, T5E, T7F, T7G; + T5B = T5p - T5s; + T5E = T5C - T5D; + ri[WS(rs, 25)] = T5B - T5E; + ri[WS(rs, 9)] = T5B + T5E; + T7F = T5z - T5w; + T7G = T7D - T7y; + ii[WS(rs, 9)] = T7F + T7G; + ii[WS(rs, 25)] = T7G - T7F; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 32}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 32, "t1_32", twinstr, &GENUS, {340, 114, 94, 0}, 0, 0, 0 }; + +void X(codelet_t1_32) (planner *p) { + X(kdft_dit_register) (p, t1_32, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:51 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 4 -name t1_4 -include t.h */ + +/* + * This function contains 22 FP additions, 12 FP multiplications, + * (or, 16 additions, 6 multiplications, 6 fused multiply/add), + * 31 stack variables, 0 constants, and 16 memory accesses + */ +#include "t.h" + +static void t1_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + (mb * 6); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 6, MAKE_VOLATILE_STRIDE(8, rs)) { + E To, Te, Tm, T8, Tw, Tx, Tq, Tk; + { + E T1, Tv, Tu, T7, Tg, Tj, Tf, Ti, Tp, Th; + T1 = ri[0]; + Tv = ii[0]; + { + E T3, T6, T2, T5; + T3 = ri[WS(rs, 2)]; + T6 = ii[WS(rs, 2)]; + T2 = W[2]; + T5 = W[3]; + { + E Ta, Td, Tc, Tn, Tb, Tt, T4, T9; + Ta = ri[WS(rs, 1)]; + Td = ii[WS(rs, 1)]; + Tt = T2 * T6; + T4 = T2 * T3; + T9 = W[0]; + Tc = W[1]; + Tu = FNMS(T5, T3, Tt); + T7 = FMA(T5, T6, T4); + Tn = T9 * Td; + Tb = T9 * Ta; + Tg = ri[WS(rs, 3)]; + Tj = ii[WS(rs, 3)]; + To = FNMS(Tc, Ta, Tn); + Te = FMA(Tc, Td, Tb); + Tf = W[4]; + Ti = W[5]; + } + } + Tm = T1 - T7; + T8 = T1 + T7; + Tw = Tu + Tv; + Tx = Tv - Tu; + Tp = Tf * Tj; + Th = Tf * Tg; + Tq = FNMS(Ti, Tg, Tp); + Tk = FMA(Ti, Tj, Th); + } + { + E Ts, Tr, Tl, Ty; + Ts = To + Tq; + Tr = To - Tq; + Tl = Te + Tk; + Ty = Te - Tk; + ri[WS(rs, 1)] = Tm + Tr; + ri[WS(rs, 3)] = Tm - Tr; + ii[WS(rs, 2)] = Tw - Ts; + ii[0] = Ts + Tw; + ii[WS(rs, 3)] = Ty + Tx; + ii[WS(rs, 1)] = Tx - Ty; + ri[0] = T8 + Tl; + ri[WS(rs, 2)] = T8 - Tl; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 4}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 4, "t1_4", twinstr, &GENUS, {16, 6, 6, 0}, 0, 0, 0 }; + +void X(codelet_t1_4) (planner *p) { + X(kdft_dit_register) (p, t1_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 4 -name t1_4 -include t.h */ + +/* + * This function contains 22 FP additions, 12 FP multiplications, + * (or, 16 additions, 6 multiplications, 6 fused multiply/add), + * 13 stack variables, 0 constants, and 16 memory accesses + */ +#include "t.h" + +static void t1_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + (mb * 6); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 6, MAKE_VOLATILE_STRIDE(8, rs)) { + E T1, Tp, T6, To, Tc, Tk, Th, Tl; + T1 = ri[0]; + Tp = ii[0]; + { + E T3, T5, T2, T4; + T3 = ri[WS(rs, 2)]; + T5 = ii[WS(rs, 2)]; + T2 = W[2]; + T4 = W[3]; + T6 = FMA(T2, T3, T4 * T5); + To = FNMS(T4, T3, T2 * T5); + } + { + E T9, Tb, T8, Ta; + T9 = ri[WS(rs, 1)]; + Tb = ii[WS(rs, 1)]; + T8 = W[0]; + Ta = W[1]; + Tc = FMA(T8, T9, Ta * Tb); + Tk = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = ri[WS(rs, 3)]; + Tg = ii[WS(rs, 3)]; + Td = W[4]; + Tf = W[5]; + Th = FMA(Td, Te, Tf * Tg); + Tl = FNMS(Tf, Te, Td * Tg); + } + { + E T7, Ti, Tn, Tq; + T7 = T1 + T6; + Ti = Tc + Th; + ri[WS(rs, 2)] = T7 - Ti; + ri[0] = T7 + Ti; + Tn = Tk + Tl; + Tq = To + Tp; + ii[0] = Tn + Tq; + ii[WS(rs, 2)] = Tq - Tn; + } + { + E Tj, Tm, Tr, Ts; + Tj = T1 - T6; + Tm = Tk - Tl; + ri[WS(rs, 3)] = Tj - Tm; + ri[WS(rs, 1)] = Tj + Tm; + Tr = Tp - To; + Ts = Tc - Th; + ii[WS(rs, 1)] = Tr - Ts; + ii[WS(rs, 3)] = Ts + Tr; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 4}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 4, "t1_4", twinstr, &GENUS, {16, 6, 6, 0}, 0, 0, 0 }; + +void X(codelet_t1_4) (planner *p) { + X(kdft_dit_register) (p, t1_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:51 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 5 -name t1_5 -include t.h */ + +/* + * This function contains 40 FP additions, 34 FP multiplications, + * (or, 14 additions, 8 multiplications, 26 fused multiply/add), + * 43 stack variables, 4 constants, and 20 memory accesses + */ +#include "t.h" + +static void t1_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(10, rs)) { + E T1, TM, TJ, TA, TQ, Te, TC, Tk, TE, Tq; + { + E Tg, Tj, Tm, TB, Th, Tp, Tl, Ti, To, TD, Tn; + T1 = ri[0]; + TM = ii[0]; + { + E T9, Tc, Ty, Ta, Tb, Tx, T7, Tf, Tz, Td; + { + E T3, T6, T8, Tw, T4, T2, T5; + T3 = ri[WS(rs, 1)]; + T6 = ii[WS(rs, 1)]; + T2 = W[0]; + T9 = ri[WS(rs, 4)]; + Tc = ii[WS(rs, 4)]; + T8 = W[6]; + Tw = T2 * T6; + T4 = T2 * T3; + T5 = W[1]; + Ty = T8 * Tc; + Ta = T8 * T9; + Tb = W[7]; + Tx = FNMS(T5, T3, Tw); + T7 = FMA(T5, T6, T4); + } + Tg = ri[WS(rs, 2)]; + Tz = FNMS(Tb, T9, Ty); + Td = FMA(Tb, Tc, Ta); + Tj = ii[WS(rs, 2)]; + Tf = W[2]; + TJ = Tx + Tz; + TA = Tx - Tz; + TQ = T7 - Td; + Te = T7 + Td; + Tm = ri[WS(rs, 3)]; + TB = Tf * Tj; + Th = Tf * Tg; + Tp = ii[WS(rs, 3)]; + Tl = W[4]; + Ti = W[3]; + To = W[5]; + } + TD = Tl * Tp; + Tn = Tl * Tm; + TC = FNMS(Ti, Tg, TB); + Tk = FMA(Ti, Tj, Th); + TE = FNMS(To, Tm, TD); + Tq = FMA(To, Tp, Tn); + } + { + E TG, TI, TO, TS, TU, Tu, TN, Tt, TK, TF; + TK = TC + TE; + TF = TC - TE; + { + E Tr, TR, TL, Ts; + Tr = Tk + Tq; + TR = Tk - Tq; + TG = FMA(KP618033988, TF, TA); + TI = FNMS(KP618033988, TA, TF); + TO = TJ - TK; + TL = TJ + TK; + TS = FMA(KP618033988, TR, TQ); + TU = FNMS(KP618033988, TQ, TR); + Tu = Te - Tr; + Ts = Te + Tr; + ii[0] = TL + TM; + TN = FNMS(KP250000000, TL, TM); + ri[0] = T1 + Ts; + Tt = FNMS(KP250000000, Ts, T1); + } + { + E TT, TP, TH, Tv; + TT = FNMS(KP559016994, TO, TN); + TP = FMA(KP559016994, TO, TN); + TH = FNMS(KP559016994, Tu, Tt); + Tv = FMA(KP559016994, Tu, Tt); + ii[WS(rs, 4)] = FMA(KP951056516, TS, TP); + ii[WS(rs, 1)] = FNMS(KP951056516, TS, TP); + ii[WS(rs, 3)] = FNMS(KP951056516, TU, TT); + ii[WS(rs, 2)] = FMA(KP951056516, TU, TT); + ri[WS(rs, 1)] = FMA(KP951056516, TG, Tv); + ri[WS(rs, 4)] = FNMS(KP951056516, TG, Tv); + ri[WS(rs, 3)] = FMA(KP951056516, TI, TH); + ri[WS(rs, 2)] = FNMS(KP951056516, TI, TH); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 5}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 5, "t1_5", twinstr, &GENUS, {14, 8, 26, 0}, 0, 0, 0 }; + +void X(codelet_t1_5) (planner *p) { + X(kdft_dit_register) (p, t1_5, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 5 -name t1_5 -include t.h */ + +/* + * This function contains 40 FP additions, 28 FP multiplications, + * (or, 26 additions, 14 multiplications, 14 fused multiply/add), + * 29 stack variables, 4 constants, and 20 memory accesses + */ +#include "t.h" + +static void t1_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(10, rs)) { + E T1, TE, Tu, Tx, TJ, TI, TB, TC, TD, Tc, Tn, To; + T1 = ri[0]; + TE = ii[0]; + { + E T6, Ts, Tm, Tw, Tb, Tt, Th, Tv; + { + E T3, T5, T2, T4; + T3 = ri[WS(rs, 1)]; + T5 = ii[WS(rs, 1)]; + T2 = W[0]; + T4 = W[1]; + T6 = FMA(T2, T3, T4 * T5); + Ts = FNMS(T4, T3, T2 * T5); + } + { + E Tj, Tl, Ti, Tk; + Tj = ri[WS(rs, 3)]; + Tl = ii[WS(rs, 3)]; + Ti = W[4]; + Tk = W[5]; + Tm = FMA(Ti, Tj, Tk * Tl); + Tw = FNMS(Tk, Tj, Ti * Tl); + } + { + E T8, Ta, T7, T9; + T8 = ri[WS(rs, 4)]; + Ta = ii[WS(rs, 4)]; + T7 = W[6]; + T9 = W[7]; + Tb = FMA(T7, T8, T9 * Ta); + Tt = FNMS(T9, T8, T7 * Ta); + } + { + E Te, Tg, Td, Tf; + Te = ri[WS(rs, 2)]; + Tg = ii[WS(rs, 2)]; + Td = W[2]; + Tf = W[3]; + Th = FMA(Td, Te, Tf * Tg); + Tv = FNMS(Tf, Te, Td * Tg); + } + Tu = Ts - Tt; + Tx = Tv - Tw; + TJ = Th - Tm; + TI = T6 - Tb; + TB = Ts + Tt; + TC = Tv + Tw; + TD = TB + TC; + Tc = T6 + Tb; + Tn = Th + Tm; + To = Tc + Tn; + } + ri[0] = T1 + To; + ii[0] = TD + TE; + { + E Ty, TA, Tr, Tz, Tp, Tq; + Ty = FMA(KP951056516, Tu, KP587785252 * Tx); + TA = FNMS(KP587785252, Tu, KP951056516 * Tx); + Tp = KP559016994 * (Tc - Tn); + Tq = FNMS(KP250000000, To, T1); + Tr = Tp + Tq; + Tz = Tq - Tp; + ri[WS(rs, 4)] = Tr - Ty; + ri[WS(rs, 3)] = Tz + TA; + ri[WS(rs, 1)] = Tr + Ty; + ri[WS(rs, 2)] = Tz - TA; + } + { + E TK, TL, TH, TM, TF, TG; + TK = FMA(KP951056516, TI, KP587785252 * TJ); + TL = FNMS(KP587785252, TI, KP951056516 * TJ); + TF = KP559016994 * (TB - TC); + TG = FNMS(KP250000000, TD, TE); + TH = TF + TG; + TM = TG - TF; + ii[WS(rs, 1)] = TH - TK; + ii[WS(rs, 3)] = TM - TL; + ii[WS(rs, 4)] = TK + TH; + ii[WS(rs, 2)] = TL + TM; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 5}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 5, "t1_5", twinstr, &GENUS, {26, 14, 14, 0}, 0, 0, 0 }; + +void X(codelet_t1_5) (planner *p) { + X(kdft_dit_register) (p, t1_5, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:51 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 6 -name t1_6 -include t.h */ + +/* + * This function contains 46 FP additions, 32 FP multiplications, + * (or, 24 additions, 10 multiplications, 22 fused multiply/add), + * 47 stack variables, 2 constants, and 24 memory accesses + */ +#include "t.h" + +static void t1_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + (mb * 10); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) { + E TY, TU, T10, TZ; + { + E T1, TX, TW, T7, Tn, Tq, TJ, TR, TB, Tl, To, TK, Tt, Tw, Ts; + E Tp, Tv; + T1 = ri[0]; + TX = ii[0]; + { + E T3, T6, T2, T5; + T3 = ri[WS(rs, 3)]; + T6 = ii[WS(rs, 3)]; + T2 = W[4]; + T5 = W[5]; + { + E Ta, Td, Tg, TF, Tb, Tj, Tf, Tc, Ti, TV, T4, T9; + Ta = ri[WS(rs, 2)]; + Td = ii[WS(rs, 2)]; + TV = T2 * T6; + T4 = T2 * T3; + T9 = W[2]; + Tg = ri[WS(rs, 5)]; + TW = FNMS(T5, T3, TV); + T7 = FMA(T5, T6, T4); + TF = T9 * Td; + Tb = T9 * Ta; + Tj = ii[WS(rs, 5)]; + Tf = W[8]; + Tc = W[3]; + Ti = W[9]; + { + E TG, Te, TI, Tk, TH, Th, Tm; + Tn = ri[WS(rs, 4)]; + TH = Tf * Tj; + Th = Tf * Tg; + TG = FNMS(Tc, Ta, TF); + Te = FMA(Tc, Td, Tb); + TI = FNMS(Ti, Tg, TH); + Tk = FMA(Ti, Tj, Th); + Tq = ii[WS(rs, 4)]; + Tm = W[6]; + TJ = TG - TI; + TR = TG + TI; + TB = Te + Tk; + Tl = Te - Tk; + To = Tm * Tn; + TK = Tm * Tq; + } + Tt = ri[WS(rs, 1)]; + Tw = ii[WS(rs, 1)]; + Ts = W[0]; + Tp = W[7]; + Tv = W[1]; + } + } + { + E TA, T8, TL, Tr, TN, Tx, T11, TM, Tu; + TA = T1 + T7; + T8 = T1 - T7; + TM = Ts * Tw; + Tu = Ts * Tt; + TL = FNMS(Tp, Tn, TK); + Tr = FMA(Tp, Tq, To); + TN = FNMS(Tv, Tt, TM); + Tx = FMA(Tv, Tw, Tu); + T11 = TX - TW; + TY = TW + TX; + { + E TP, TT, TD, TE, TQ, Tz, T14, T13; + { + E TO, TS, TC, Ty, T12; + TO = TL - TN; + TS = TL + TN; + TC = Tr + Tx; + Ty = Tr - Tx; + T12 = TJ + TO; + TP = TJ - TO; + TT = TR - TS; + TU = TR + TS; + Tz = Tl + Ty; + T14 = Ty - Tl; + ii[WS(rs, 3)] = T12 + T11; + T13 = FNMS(KP500000000, T12, T11); + T10 = TC - TB; + TD = TB + TC; + } + ri[WS(rs, 3)] = T8 + Tz; + TE = FNMS(KP500000000, Tz, T8); + ii[WS(rs, 5)] = FNMS(KP866025403, T14, T13); + ii[WS(rs, 1)] = FMA(KP866025403, T14, T13); + TQ = FNMS(KP500000000, TD, TA); + ri[WS(rs, 5)] = FNMS(KP866025403, TP, TE); + ri[WS(rs, 1)] = FMA(KP866025403, TP, TE); + ri[0] = TA + TD; + ri[WS(rs, 4)] = FMA(KP866025403, TT, TQ); + ri[WS(rs, 2)] = FNMS(KP866025403, TT, TQ); + } + } + } + ii[0] = TU + TY; + TZ = FNMS(KP500000000, TU, TY); + ii[WS(rs, 2)] = FNMS(KP866025403, T10, TZ); + ii[WS(rs, 4)] = FMA(KP866025403, T10, TZ); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 6}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 6, "t1_6", twinstr, &GENUS, {24, 10, 22, 0}, 0, 0, 0 }; + +void X(codelet_t1_6) (planner *p) { + X(kdft_dit_register) (p, t1_6, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 6 -name t1_6 -include t.h */ + +/* + * This function contains 46 FP additions, 28 FP multiplications, + * (or, 32 additions, 14 multiplications, 14 fused multiply/add), + * 23 stack variables, 2 constants, and 24 memory accesses + */ +#include "t.h" + +static void t1_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + (mb * 10); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) { + E T7, TS, Tv, TO, Tt, TJ, Tx, TF, Ti, TI, Tw, TC; + { + E T1, TN, T6, TM; + T1 = ri[0]; + TN = ii[0]; + { + E T3, T5, T2, T4; + T3 = ri[WS(rs, 3)]; + T5 = ii[WS(rs, 3)]; + T2 = W[4]; + T4 = W[5]; + T6 = FMA(T2, T3, T4 * T5); + TM = FNMS(T4, T3, T2 * T5); + } + T7 = T1 - T6; + TS = TN - TM; + Tv = T1 + T6; + TO = TM + TN; + } + { + E Tn, TD, Ts, TE; + { + E Tk, Tm, Tj, Tl; + Tk = ri[WS(rs, 4)]; + Tm = ii[WS(rs, 4)]; + Tj = W[6]; + Tl = W[7]; + Tn = FMA(Tj, Tk, Tl * Tm); + TD = FNMS(Tl, Tk, Tj * Tm); + } + { + E Tp, Tr, To, Tq; + Tp = ri[WS(rs, 1)]; + Tr = ii[WS(rs, 1)]; + To = W[0]; + Tq = W[1]; + Ts = FMA(To, Tp, Tq * Tr); + TE = FNMS(Tq, Tp, To * Tr); + } + Tt = Tn - Ts; + TJ = TD + TE; + Tx = Tn + Ts; + TF = TD - TE; + } + { + E Tc, TA, Th, TB; + { + E T9, Tb, T8, Ta; + T9 = ri[WS(rs, 2)]; + Tb = ii[WS(rs, 2)]; + T8 = W[2]; + Ta = W[3]; + Tc = FMA(T8, T9, Ta * Tb); + TA = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = ri[WS(rs, 5)]; + Tg = ii[WS(rs, 5)]; + Td = W[8]; + Tf = W[9]; + Th = FMA(Td, Te, Tf * Tg); + TB = FNMS(Tf, Te, Td * Tg); + } + Ti = Tc - Th; + TI = TA + TB; + Tw = Tc + Th; + TC = TA - TB; + } + { + E TG, Tu, Tz, TR, TT, TU; + TG = KP866025403 * (TC - TF); + Tu = Ti + Tt; + Tz = FNMS(KP500000000, Tu, T7); + ri[WS(rs, 3)] = T7 + Tu; + ri[WS(rs, 1)] = Tz + TG; + ri[WS(rs, 5)] = Tz - TG; + TR = KP866025403 * (Tt - Ti); + TT = TC + TF; + TU = FNMS(KP500000000, TT, TS); + ii[WS(rs, 1)] = TR + TU; + ii[WS(rs, 3)] = TT + TS; + ii[WS(rs, 5)] = TU - TR; + } + { + E TK, Ty, TH, TQ, TL, TP; + TK = KP866025403 * (TI - TJ); + Ty = Tw + Tx; + TH = FNMS(KP500000000, Ty, Tv); + ri[0] = Tv + Ty; + ri[WS(rs, 4)] = TH + TK; + ri[WS(rs, 2)] = TH - TK; + TQ = KP866025403 * (Tx - Tw); + TL = TI + TJ; + TP = FNMS(KP500000000, TL, TO); + ii[0] = TL + TO; + ii[WS(rs, 4)] = TQ + TP; + ii[WS(rs, 2)] = TP - TQ; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 6}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 6, "t1_6", twinstr, &GENUS, {32, 14, 14, 0}, 0, 0, 0 }; + +void X(codelet_t1_6) (planner *p) { + X(kdft_dit_register) (p, t1_6, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3975 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:52 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 64 -name t1_64 -include t.h */ + +/* + * This function contains 1038 FP additions, 644 FP multiplications, + * (or, 520 additions, 126 multiplications, 518 fused multiply/add), + * 228 stack variables, 15 constants, and 256 memory accesses + */ +#include "t.h" + +static void t1_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP820678790, +0.820678790828660330972281985331011598767386482); + DK(KP098491403, +0.098491403357164253077197521291327432293052451); + DK(KP534511135, +0.534511135950791641089685961295362908582039528); + DK(KP303346683, +0.303346683607342391675883946941299872384187453); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT m; + for (m = mb, W = W + (mb * 126); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 126, MAKE_VOLATILE_STRIDE(128, rs)) { + E TeI, Tkk, Tkj, TeL; + { + E TiV, Tjm, T7e, TcA, TjR, Tkl, Tm, TeM, TeZ, Ths, T7Q, TcJ, T1G, TeW, TcI; + E T7X, Tf5, Thv, T87, TcN, T29, Tf8, TcQ, T8u, TfU, ThS, Taq, Tdm, T5K, Tg9; + E Tdx, Tbj, TcB, T7l, TiP, TeP, Tjl, TN, TcC, T7s, T7I, TcF, TeU, Thr, T7B; + E TcG, T1f, TeR, Tfg, ThB, T8G, TcU, T32, Tfj, TcX, T93, Tft, ThH, T9h, Td3; + E T3X, TfI, Tde, Taa, Thw, Tfb, Tf6, T2A, T8x, TcO, T8m, TcR, Tfm, ThC, T3t; + E Tfh, T96, TcV, T8V, TcY, ThI, TfL, Tfu, T4o, Tad, Td4, T9w, Tdf, Tgc, ThT; + E T6b, TfV, Tbm, Tdn, TaF, Tdy, ThN, T4Q, TfN, TfA, Taf, Ta1, Td8, Tdh, ThO; + E T5h, TfO, TfF, Tag, T9M, Tdb, Tdi, ThY, T6D, Tge, Tg1, Tbo, Tba, Tdr, TdA; + E TaN, Tdt, Tg5, ThZ, Tg2, T74, Tds, TaU; + { + E T7a, Te, T78, T8, TjP, TiU, T7c, Tk; + { + E T1, TiT, TiS, T7, Tg, Tj, Tf, Ti, T7b, Th; + T1 = ri[0]; + TiT = ii[0]; + { + E T3, T6, T2, T5; + T3 = ri[WS(rs, 32)]; + T6 = ii[WS(rs, 32)]; + T2 = W[62]; + T5 = W[63]; + { + E Ta, Td, Tc, T79, Tb, TiR, T4, T9; + Ta = ri[WS(rs, 16)]; + Td = ii[WS(rs, 16)]; + TiR = T2 * T6; + T4 = T2 * T3; + T9 = W[30]; + Tc = W[31]; + TiS = FNMS(T5, T3, TiR); + T7 = FMA(T5, T6, T4); + T79 = T9 * Td; + Tb = T9 * Ta; + Tg = ri[WS(rs, 48)]; + Tj = ii[WS(rs, 48)]; + T7a = FNMS(Tc, Ta, T79); + Te = FMA(Tc, Td, Tb); + Tf = W[94]; + Ti = W[95]; + } + } + T78 = T1 - T7; + T8 = T1 + T7; + TjP = TiT - TiS; + TiU = TiS + TiT; + T7b = Tf * Tj; + Th = Tf * Tg; + T7c = FNMS(Ti, Tg, T7b); + Tk = FMA(Ti, Tj, Th); + } + { + E T7L, T1l, T7V, T1E, T1u, T1x, T1w, T7N, T1r, T7S, T1v; + { + E T1A, T1D, T1C, T7U, T1B; + { + E T1h, T1k, T1g, T1j, T7K, T1i, T1z; + T1h = ri[WS(rs, 60)]; + T1k = ii[WS(rs, 60)]; + { + E T7d, TiQ, Tl, TjQ; + T7d = T7a - T7c; + TiQ = T7a + T7c; + Tl = Te + Tk; + TjQ = Te - Tk; + TiV = TiQ + TiU; + Tjm = TiU - TiQ; + T7e = T78 - T7d; + TcA = T78 + T7d; + TjR = TjP - TjQ; + Tkl = TjQ + TjP; + Tm = T8 + Tl; + TeM = T8 - Tl; + T1g = W[118]; + } + T1j = W[119]; + T1A = ri[WS(rs, 44)]; + T1D = ii[WS(rs, 44)]; + T7K = T1g * T1k; + T1i = T1g * T1h; + T1z = W[86]; + T1C = W[87]; + T7L = FNMS(T1j, T1h, T7K); + T1l = FMA(T1j, T1k, T1i); + T7U = T1z * T1D; + T1B = T1z * T1A; + } + { + E T1n, T1q, T1m, T1p, T7M, T1o, T1t; + T1n = ri[WS(rs, 28)]; + T1q = ii[WS(rs, 28)]; + T7V = FNMS(T1C, T1A, T7U); + T1E = FMA(T1C, T1D, T1B); + T1m = W[54]; + T1p = W[55]; + T1u = ri[WS(rs, 12)]; + T1x = ii[WS(rs, 12)]; + T7M = T1m * T1q; + T1o = T1m * T1n; + T1t = W[22]; + T1w = W[23]; + T7N = FNMS(T1p, T1n, T7M); + T1r = FMA(T1p, T1q, T1o); + T7S = T1t * T1x; + T1v = T1t * T1u; + } + } + { + E T7O, TeX, T1s, T7R, T7T, T1y; + T7O = T7L - T7N; + TeX = T7L + T7N; + T1s = T1l + T1r; + T7R = T1l - T1r; + T7T = FNMS(T1w, T1u, T7S); + T1y = FMA(T1w, T1x, T1v); + { + E T7W, TeY, T7P, T1F; + T7W = T7T - T7V; + TeY = T7T + T7V; + T7P = T1y - T1E; + T1F = T1y + T1E; + TeZ = TeX - TeY; + Ths = TeX + TeY; + T7Q = T7O + T7P; + TcJ = T7O - T7P; + T1G = T1s + T1F; + TeW = T1s - T1F; + TcI = T7R + T7W; + T7X = T7R - T7W; + } + } + } + } + { + E T82, T1O, T8s, T27, T1X, T20, T1Z, T84, T1U, T8p, T1Y; + { + E T23, T26, T25, T8r, T24; + { + E T1K, T1N, T1J, T1M, T81, T1L, T22; + T1K = ri[WS(rs, 2)]; + T1N = ii[WS(rs, 2)]; + T1J = W[2]; + T1M = W[3]; + T23 = ri[WS(rs, 50)]; + T26 = ii[WS(rs, 50)]; + T81 = T1J * T1N; + T1L = T1J * T1K; + T22 = W[98]; + T25 = W[99]; + T82 = FNMS(T1M, T1K, T81); + T1O = FMA(T1M, T1N, T1L); + T8r = T22 * T26; + T24 = T22 * T23; + } + { + E T1Q, T1T, T1P, T1S, T83, T1R, T1W; + T1Q = ri[WS(rs, 34)]; + T1T = ii[WS(rs, 34)]; + T8s = FNMS(T25, T23, T8r); + T27 = FMA(T25, T26, T24); + T1P = W[66]; + T1S = W[67]; + T1X = ri[WS(rs, 18)]; + T20 = ii[WS(rs, 18)]; + T83 = T1P * T1T; + T1R = T1P * T1Q; + T1W = W[34]; + T1Z = W[35]; + T84 = FNMS(T1S, T1Q, T83); + T1U = FMA(T1S, T1T, T1R); + T8p = T1W * T20; + T1Y = T1W * T1X; + } + } + { + E T85, Tf3, T1V, T8o, T8q, T21; + T85 = T82 - T84; + Tf3 = T82 + T84; + T1V = T1O + T1U; + T8o = T1O - T1U; + T8q = FNMS(T1Z, T1X, T8p); + T21 = FMA(T1Z, T20, T1Y); + { + E T8t, Tf4, T86, T28; + T8t = T8q - T8s; + Tf4 = T8q + T8s; + T86 = T21 - T27; + T28 = T21 + T27; + Tf5 = Tf3 - Tf4; + Thv = Tf3 + Tf4; + T87 = T85 + T86; + TcN = T85 - T86; + T29 = T1V + T28; + Tf8 = T1V - T28; + TcQ = T8o + T8t; + T8u = T8o - T8t; + } + } + } + { + E Tal, T5p, Tbh, T5I, T5y, T5B, T5A, Tan, T5v, Tbe, T5z; + { + E T5E, T5H, T5G, Tbg, T5F; + { + E T5l, T5o, T5k, T5n, Tak, T5m, T5D; + T5l = ri[WS(rs, 63)]; + T5o = ii[WS(rs, 63)]; + T5k = W[124]; + T5n = W[125]; + T5E = ri[WS(rs, 47)]; + T5H = ii[WS(rs, 47)]; + Tak = T5k * T5o; + T5m = T5k * T5l; + T5D = W[92]; + T5G = W[93]; + Tal = FNMS(T5n, T5l, Tak); + T5p = FMA(T5n, T5o, T5m); + Tbg = T5D * T5H; + T5F = T5D * T5E; + } + { + E T5r, T5u, T5q, T5t, Tam, T5s, T5x; + T5r = ri[WS(rs, 31)]; + T5u = ii[WS(rs, 31)]; + Tbh = FNMS(T5G, T5E, Tbg); + T5I = FMA(T5G, T5H, T5F); + T5q = W[60]; + T5t = W[61]; + T5y = ri[WS(rs, 15)]; + T5B = ii[WS(rs, 15)]; + Tam = T5q * T5u; + T5s = T5q * T5r; + T5x = W[28]; + T5A = W[29]; + Tan = FNMS(T5t, T5r, Tam); + T5v = FMA(T5t, T5u, T5s); + Tbe = T5x * T5B; + T5z = T5x * T5y; + } + } + { + E Tao, TfS, T5w, Tbd, Tbf, T5C; + Tao = Tal - Tan; + TfS = Tal + Tan; + T5w = T5p + T5v; + Tbd = T5p - T5v; + Tbf = FNMS(T5A, T5y, Tbe); + T5C = FMA(T5A, T5B, T5z); + { + E Tbi, TfT, Tap, T5J; + Tbi = Tbf - Tbh; + TfT = Tbf + Tbh; + Tap = T5C - T5I; + T5J = T5C + T5I; + TfU = TfS - TfT; + ThS = TfS + TfT; + Taq = Tao + Tap; + Tdm = Tao - Tap; + T5K = T5w + T5J; + Tg9 = T5w - T5J; + Tdx = Tbd + Tbi; + Tbj = Tbd - Tbi; + } + } + } + { + E T7G, T1d, T7z, TeS, T11, T7C, T7E, T17, T7r, T7m; + { + E T7g, Ts, T7q, TL, TB, TE, TD, T7i, Ty, T7n, TC; + { + E TH, TK, TJ, T7p, TI; + { + E To, Tr, Tn, Tq, T7f, Tp, TG; + To = ri[WS(rs, 8)]; + Tr = ii[WS(rs, 8)]; + Tn = W[14]; + Tq = W[15]; + TH = ri[WS(rs, 24)]; + TK = ii[WS(rs, 24)]; + T7f = Tn * Tr; + Tp = Tn * To; + TG = W[46]; + TJ = W[47]; + T7g = FNMS(Tq, To, T7f); + Ts = FMA(Tq, Tr, Tp); + T7p = TG * TK; + TI = TG * TH; + } + { + E Tu, Tx, Tt, Tw, T7h, Tv, TA; + Tu = ri[WS(rs, 40)]; + Tx = ii[WS(rs, 40)]; + T7q = FNMS(TJ, TH, T7p); + TL = FMA(TJ, TK, TI); + Tt = W[78]; + Tw = W[79]; + TB = ri[WS(rs, 56)]; + TE = ii[WS(rs, 56)]; + T7h = Tt * Tx; + Tv = Tt * Tu; + TA = W[110]; + TD = W[111]; + T7i = FNMS(Tw, Tu, T7h); + Ty = FMA(Tw, Tx, Tv); + T7n = TA * TE; + TC = TA * TB; + } + } + { + E T7j, TeN, Tz, T7k, T7o, TF, TeO, TM; + T7j = T7g - T7i; + TeN = T7g + T7i; + Tz = Ts + Ty; + T7k = Ts - Ty; + T7o = FNMS(TD, TB, T7n); + TF = FMA(TD, TE, TC); + T7r = T7o - T7q; + TeO = T7o + T7q; + TM = TF + TL; + T7m = TF - TL; + TcB = T7k + T7j; + T7l = T7j - T7k; + TiP = TeN + TeO; + TeP = TeN - TeO; + Tjl = TM - Tz; + TN = Tz + TM; + } + } + { + E T7w, TU, T13, T16, T7y, T10, T12, T15, T7D, T14; + { + E T19, T1c, T18, T1b; + { + E TQ, TT, TS, T7v, TR, TP; + TQ = ri[WS(rs, 4)]; + TT = ii[WS(rs, 4)]; + TP = W[6]; + TcC = T7m - T7r; + T7s = T7m + T7r; + TS = W[7]; + T7v = TP * TT; + TR = TP * TQ; + T19 = ri[WS(rs, 52)]; + T1c = ii[WS(rs, 52)]; + T7w = FNMS(TS, TQ, T7v); + TU = FMA(TS, TT, TR); + T18 = W[102]; + T1b = W[103]; + } + { + E TW, TZ, TY, T7x, TX, T7F, T1a, TV; + TW = ri[WS(rs, 36)]; + TZ = ii[WS(rs, 36)]; + T7F = T18 * T1c; + T1a = T18 * T19; + TV = W[70]; + TY = W[71]; + T7G = FNMS(T1b, T19, T7F); + T1d = FMA(T1b, T1c, T1a); + T7x = TV * TZ; + TX = TV * TW; + T13 = ri[WS(rs, 20)]; + T16 = ii[WS(rs, 20)]; + T7y = FNMS(TY, TW, T7x); + T10 = FMA(TY, TZ, TX); + T12 = W[38]; + T15 = W[39]; + } + } + T7z = T7w - T7y; + TeS = T7w + T7y; + T11 = TU + T10; + T7C = TU - T10; + T7D = T12 * T16; + T14 = T12 * T13; + T7E = FNMS(T15, T13, T7D); + T17 = FMA(T15, T16, T14); + } + { + E T8B, T2H, T91, T30, T2Q, T2T, T2S, T8D, T2N, T8Y, T2R; + { + E T2W, T2Z, T2Y, T90, T2X; + { + E T2D, T2G, T2C, T2F, T8A, T2E, T2V; + T2D = ri[WS(rs, 62)]; + T2G = ii[WS(rs, 62)]; + { + E TeT, T7H, T1e, T7A; + TeT = T7E + T7G; + T7H = T7E - T7G; + T1e = T17 + T1d; + T7A = T17 - T1d; + T7I = T7C - T7H; + TcF = T7C + T7H; + TeU = TeS - TeT; + Thr = TeS + TeT; + T7B = T7z + T7A; + TcG = T7z - T7A; + T1f = T11 + T1e; + TeR = T11 - T1e; + T2C = W[122]; + } + T2F = W[123]; + T2W = ri[WS(rs, 46)]; + T2Z = ii[WS(rs, 46)]; + T8A = T2C * T2G; + T2E = T2C * T2D; + T2V = W[90]; + T2Y = W[91]; + T8B = FNMS(T2F, T2D, T8A); + T2H = FMA(T2F, T2G, T2E); + T90 = T2V * T2Z; + T2X = T2V * T2W; + } + { + E T2J, T2M, T2I, T2L, T8C, T2K, T2P; + T2J = ri[WS(rs, 30)]; + T2M = ii[WS(rs, 30)]; + T91 = FNMS(T2Y, T2W, T90); + T30 = FMA(T2Y, T2Z, T2X); + T2I = W[58]; + T2L = W[59]; + T2Q = ri[WS(rs, 14)]; + T2T = ii[WS(rs, 14)]; + T8C = T2I * T2M; + T2K = T2I * T2J; + T2P = W[26]; + T2S = W[27]; + T8D = FNMS(T2L, T2J, T8C); + T2N = FMA(T2L, T2M, T2K); + T8Y = T2P * T2T; + T2R = T2P * T2Q; + } + } + { + E T8E, Tfe, T2O, T8X, T8Z, T2U; + T8E = T8B - T8D; + Tfe = T8B + T8D; + T2O = T2H + T2N; + T8X = T2H - T2N; + T8Z = FNMS(T2S, T2Q, T8Y); + T2U = FMA(T2S, T2T, T2R); + { + E T92, Tff, T8F, T31; + T92 = T8Z - T91; + Tff = T8Z + T91; + T8F = T2U - T30; + T31 = T2U + T30; + Tfg = Tfe - Tff; + ThB = Tfe + Tff; + T8G = T8E + T8F; + TcU = T8E - T8F; + T32 = T2O + T31; + Tfj = T2O - T31; + TcX = T8X + T92; + T93 = T8X - T92; + } + } + } + { + E T9c, T3C, Ta8, T3V, T3L, T3O, T3N, T9e, T3I, Ta5, T3M; + { + E T3R, T3U, T3T, Ta7, T3S; + { + E T3y, T3B, T3x, T3A, T9b, T3z, T3Q; + T3y = ri[WS(rs, 1)]; + T3B = ii[WS(rs, 1)]; + T3x = W[0]; + T3A = W[1]; + T3R = ri[WS(rs, 49)]; + T3U = ii[WS(rs, 49)]; + T9b = T3x * T3B; + T3z = T3x * T3y; + T3Q = W[96]; + T3T = W[97]; + T9c = FNMS(T3A, T3y, T9b); + T3C = FMA(T3A, T3B, T3z); + Ta7 = T3Q * T3U; + T3S = T3Q * T3R; + } + { + E T3E, T3H, T3D, T3G, T9d, T3F, T3K; + T3E = ri[WS(rs, 33)]; + T3H = ii[WS(rs, 33)]; + Ta8 = FNMS(T3T, T3R, Ta7); + T3V = FMA(T3T, T3U, T3S); + T3D = W[64]; + T3G = W[65]; + T3L = ri[WS(rs, 17)]; + T3O = ii[WS(rs, 17)]; + T9d = T3D * T3H; + T3F = T3D * T3E; + T3K = W[32]; + T3N = W[33]; + T9e = FNMS(T3G, T3E, T9d); + T3I = FMA(T3G, T3H, T3F); + Ta5 = T3K * T3O; + T3M = T3K * T3L; + } + } + { + E T9f, Tfr, T3J, Ta4, Ta6, T3P; + T9f = T9c - T9e; + Tfr = T9c + T9e; + T3J = T3C + T3I; + Ta4 = T3C - T3I; + Ta6 = FNMS(T3N, T3L, Ta5); + T3P = FMA(T3N, T3O, T3M); + { + E Ta9, Tfs, T9g, T3W; + Ta9 = Ta6 - Ta8; + Tfs = Ta6 + Ta8; + T9g = T3P - T3V; + T3W = T3P + T3V; + Tft = Tfr - Tfs; + ThH = Tfr + Tfs; + T9h = T9f + T9g; + Td3 = T9f - T9g; + T3X = T3J + T3W; + TfI = T3J - T3W; + Tde = Ta4 + Ta9; + Taa = Ta4 - Ta9; + } + } + } + } + { + E TaC, T69, Taw, Tga, T5X, Tar, TaA, T63; + { + E T8S, T3r, T8M, Tfk, T3f, T8H, T8Q, T3l; + { + E T8k, T8f, T8w, T8e; + { + E T8a, T2f, T8j, T2y, T2o, T2r, T2q, T8c, T2l, T8g, T2p; + { + E T2u, T2x, T2w, T8i, T2v; + { + E T2b, T2e, T2a, T2d, T89, T2c, T2t; + T2b = ri[WS(rs, 10)]; + T2e = ii[WS(rs, 10)]; + T2a = W[18]; + T2d = W[19]; + T2u = ri[WS(rs, 26)]; + T2x = ii[WS(rs, 26)]; + T89 = T2a * T2e; + T2c = T2a * T2b; + T2t = W[50]; + T2w = W[51]; + T8a = FNMS(T2d, T2b, T89); + T2f = FMA(T2d, T2e, T2c); + T8i = T2t * T2x; + T2v = T2t * T2u; + } + { + E T2h, T2k, T2g, T2j, T8b, T2i, T2n; + T2h = ri[WS(rs, 42)]; + T2k = ii[WS(rs, 42)]; + T8j = FNMS(T2w, T2u, T8i); + T2y = FMA(T2w, T2x, T2v); + T2g = W[82]; + T2j = W[83]; + T2o = ri[WS(rs, 58)]; + T2r = ii[WS(rs, 58)]; + T8b = T2g * T2k; + T2i = T2g * T2h; + T2n = W[114]; + T2q = W[115]; + T8c = FNMS(T2j, T2h, T8b); + T2l = FMA(T2j, T2k, T2i); + T8g = T2n * T2r; + T2p = T2n * T2o; + } + } + { + E T8d, Tf9, T2m, T88, T8h, T2s, Tfa, T2z; + T8d = T8a - T8c; + Tf9 = T8a + T8c; + T2m = T2f + T2l; + T88 = T2f - T2l; + T8h = FNMS(T2q, T2o, T8g); + T2s = FMA(T2q, T2r, T2p); + T8k = T8h - T8j; + Tfa = T8h + T8j; + T2z = T2s + T2y; + T8f = T2s - T2y; + T8w = T8d - T88; + T8e = T88 + T8d; + Thw = Tf9 + Tfa; + Tfb = Tf9 - Tfa; + Tf6 = T2z - T2m; + T2A = T2m + T2z; + } + } + { + E T38, T8J, T3h, T3k, T8L, T3e, T3g, T3j, T8P, T3i; + { + E T3n, T3q, T3m, T3p; + { + E T34, T37, T33, T8v, T8l, T36, T8I, T35; + T34 = ri[WS(rs, 6)]; + T37 = ii[WS(rs, 6)]; + T33 = W[10]; + T8v = T8f + T8k; + T8l = T8f - T8k; + T36 = W[11]; + T8I = T33 * T37; + T35 = T33 * T34; + T8x = T8v - T8w; + TcO = T8w + T8v; + T8m = T8e - T8l; + TcR = T8e + T8l; + T38 = FMA(T36, T37, T35); + T8J = FNMS(T36, T34, T8I); + } + T3n = ri[WS(rs, 22)]; + T3q = ii[WS(rs, 22)]; + T3m = W[42]; + T3p = W[43]; + { + E T3a, T3d, T3c, T8K, T3b, T8R, T3o, T39; + T3a = ri[WS(rs, 38)]; + T3d = ii[WS(rs, 38)]; + T8R = T3m * T3q; + T3o = T3m * T3n; + T39 = W[74]; + T3c = W[75]; + T8S = FNMS(T3p, T3n, T8R); + T3r = FMA(T3p, T3q, T3o); + T8K = T39 * T3d; + T3b = T39 * T3a; + T3h = ri[WS(rs, 54)]; + T3k = ii[WS(rs, 54)]; + T8L = FNMS(T3c, T3a, T8K); + T3e = FMA(T3c, T3d, T3b); + T3g = W[106]; + T3j = W[107]; + } + } + T8M = T8J - T8L; + Tfk = T8J + T8L; + T3f = T38 + T3e; + T8H = T38 - T3e; + T8P = T3g * T3k; + T3i = T3g * T3h; + T8Q = FNMS(T3j, T3h, T8P); + T3l = FMA(T3j, T3k, T3i); + } + } + { + E T9u, T9p, Tac, T9o; + { + E T9k, T43, T9t, T4m, T4c, T4f, T4e, T9m, T49, T9q, T4d; + { + E T4i, T4l, T4k, T9s, T4j; + { + E T3Z, T42, T3Y, T41, T9j, T40, T4h; + { + E T95, T8N, T8T, Tfl, T8O, T3s, T8U, T94; + T3Z = ri[WS(rs, 9)]; + T95 = T8M - T8H; + T8N = T8H + T8M; + T8T = T8Q - T8S; + Tfl = T8Q + T8S; + T8O = T3l - T3r; + T3s = T3l + T3r; + T42 = ii[WS(rs, 9)]; + Tfm = Tfk - Tfl; + ThC = Tfk + Tfl; + T8U = T8O - T8T; + T94 = T8O + T8T; + T3t = T3f + T3s; + Tfh = T3s - T3f; + T96 = T94 - T95; + TcV = T95 + T94; + T8V = T8N - T8U; + TcY = T8N + T8U; + T3Y = W[16]; + } + T41 = W[17]; + T4i = ri[WS(rs, 25)]; + T4l = ii[WS(rs, 25)]; + T9j = T3Y * T42; + T40 = T3Y * T3Z; + T4h = W[48]; + T4k = W[49]; + T9k = FNMS(T41, T3Z, T9j); + T43 = FMA(T41, T42, T40); + T9s = T4h * T4l; + T4j = T4h * T4i; + } + { + E T45, T48, T44, T47, T9l, T46, T4b; + T45 = ri[WS(rs, 41)]; + T48 = ii[WS(rs, 41)]; + T9t = FNMS(T4k, T4i, T9s); + T4m = FMA(T4k, T4l, T4j); + T44 = W[80]; + T47 = W[81]; + T4c = ri[WS(rs, 57)]; + T4f = ii[WS(rs, 57)]; + T9l = T44 * T48; + T46 = T44 * T45; + T4b = W[112]; + T4e = W[113]; + T9m = FNMS(T47, T45, T9l); + T49 = FMA(T47, T48, T46); + T9q = T4b * T4f; + T4d = T4b * T4c; + } + } + { + E T9n, TfJ, T4a, T9i, T9r, T4g, TfK, T4n; + T9n = T9k - T9m; + TfJ = T9k + T9m; + T4a = T43 + T49; + T9i = T43 - T49; + T9r = FNMS(T4e, T4c, T9q); + T4g = FMA(T4e, T4f, T4d); + T9u = T9r - T9t; + TfK = T9r + T9t; + T4n = T4g + T4m; + T9p = T4g - T4m; + Tac = T9n - T9i; + T9o = T9i + T9n; + ThI = TfJ + TfK; + TfL = TfJ - TfK; + Tfu = T4n - T4a; + T4o = T4a + T4n; + } + } + { + E T5Q, Tat, T5Z, T62, Tav, T5W, T5Y, T61, Taz, T60; + { + E T65, T68, T64, T67; + { + E T5M, T5P, T5L, Tab, T9v, T5O, Tas, T5N; + T5M = ri[WS(rs, 7)]; + T5P = ii[WS(rs, 7)]; + T5L = W[12]; + Tab = T9p + T9u; + T9v = T9p - T9u; + T5O = W[13]; + Tas = T5L * T5P; + T5N = T5L * T5M; + Tad = Tab - Tac; + Td4 = Tac + Tab; + T9w = T9o - T9v; + Tdf = T9o + T9v; + T5Q = FMA(T5O, T5P, T5N); + Tat = FNMS(T5O, T5M, Tas); + } + T65 = ri[WS(rs, 23)]; + T68 = ii[WS(rs, 23)]; + T64 = W[44]; + T67 = W[45]; + { + E T5S, T5V, T5U, Tau, T5T, TaB, T66, T5R; + T5S = ri[WS(rs, 39)]; + T5V = ii[WS(rs, 39)]; + TaB = T64 * T68; + T66 = T64 * T65; + T5R = W[76]; + T5U = W[77]; + TaC = FNMS(T67, T65, TaB); + T69 = FMA(T67, T68, T66); + Tau = T5R * T5V; + T5T = T5R * T5S; + T5Z = ri[WS(rs, 55)]; + T62 = ii[WS(rs, 55)]; + Tav = FNMS(T5U, T5S, Tau); + T5W = FMA(T5U, T5V, T5T); + T5Y = W[108]; + T61 = W[109]; + } + } + Taw = Tat - Tav; + Tga = Tat + Tav; + T5X = T5Q + T5W; + Tar = T5Q - T5W; + Taz = T5Y * T62; + T60 = T5Y * T5Z; + TaA = FNMS(T61, T5Z, Taz); + T63 = FMA(T61, T62, T60); + } + } + } + { + E T9E, Tda, TfE, TfB, Td9, T9L; + { + E T9T, Td7, Tfy, Tfz, Td6, Ta0; + { + E T9V, T4v, T9R, T4O, T4E, T4H, T4G, T9X, T4B, T9O, T4F; + { + E T4K, T4N, T4M, T9Q, T4L; + { + E T4r, T4u, T4q, T4t, T9U, T4s, T4J; + { + E Tbl, Tax, TaD, Tgb, Tay, T6a, TaE, Tbk; + T4r = ri[WS(rs, 5)]; + Tbl = Taw - Tar; + Tax = Tar + Taw; + TaD = TaA - TaC; + Tgb = TaA + TaC; + Tay = T63 - T69; + T6a = T63 + T69; + T4u = ii[WS(rs, 5)]; + Tgc = Tga - Tgb; + ThT = Tga + Tgb; + TaE = Tay - TaD; + Tbk = Tay + TaD; + T6b = T5X + T6a; + TfV = T6a - T5X; + Tbm = Tbk - Tbl; + Tdn = Tbl + Tbk; + TaF = Tax - TaE; + Tdy = Tax + TaE; + T4q = W[8]; + } + T4t = W[9]; + T4K = ri[WS(rs, 53)]; + T4N = ii[WS(rs, 53)]; + T9U = T4q * T4u; + T4s = T4q * T4r; + T4J = W[104]; + T4M = W[105]; + T9V = FNMS(T4t, T4r, T9U); + T4v = FMA(T4t, T4u, T4s); + T9Q = T4J * T4N; + T4L = T4J * T4K; + } + { + E T4x, T4A, T4w, T4z, T9W, T4y, T4D; + T4x = ri[WS(rs, 37)]; + T4A = ii[WS(rs, 37)]; + T9R = FNMS(T4M, T4K, T9Q); + T4O = FMA(T4M, T4N, T4L); + T4w = W[72]; + T4z = W[73]; + T4E = ri[WS(rs, 21)]; + T4H = ii[WS(rs, 21)]; + T9W = T4w * T4A; + T4y = T4w * T4x; + T4D = W[40]; + T4G = W[41]; + T9X = FNMS(T4z, T4x, T9W); + T4B = FMA(T4z, T4A, T4y); + T9O = T4D * T4H; + T4F = T4D * T4E; + } + } + { + E T9Y, Tfw, T4C, T9N, T9P, T4I; + T9Y = T9V - T9X; + Tfw = T9V + T9X; + T4C = T4v + T4B; + T9N = T4v - T4B; + T9P = FNMS(T4G, T4E, T9O); + T4I = FMA(T4G, T4H, T4F); + { + E Tfx, T9S, T9Z, T4P; + Tfx = T9P + T9R; + T9S = T9P - T9R; + T9Z = T4I - T4O; + T4P = T4I + T4O; + T9T = T9N - T9S; + Td7 = T9N + T9S; + Tfy = Tfw - Tfx; + ThN = Tfw + Tfx; + Tfz = T4C - T4P; + T4Q = T4C + T4P; + Td6 = T9Y - T9Z; + Ta0 = T9Y + T9Z; + } + } + } + { + E T9G, T4W, T9C, T5f, T55, T58, T57, T9I, T52, T9z, T56; + { + E T5b, T5e, T5d, T9B, T5c; + { + E T4S, T4V, T4R, T4U, T9F, T4T, T5a; + T4S = ri[WS(rs, 61)]; + TfN = Tfz + Tfy; + TfA = Tfy - Tfz; + Taf = FMA(KP414213562, T9T, Ta0); + Ta1 = FNMS(KP414213562, Ta0, T9T); + Td8 = FNMS(KP414213562, Td7, Td6); + Tdh = FMA(KP414213562, Td6, Td7); + T4V = ii[WS(rs, 61)]; + T4R = W[120]; + T4U = W[121]; + T5b = ri[WS(rs, 45)]; + T5e = ii[WS(rs, 45)]; + T9F = T4R * T4V; + T4T = T4R * T4S; + T5a = W[88]; + T5d = W[89]; + T9G = FNMS(T4U, T4S, T9F); + T4W = FMA(T4U, T4V, T4T); + T9B = T5a * T5e; + T5c = T5a * T5b; + } + { + E T4Y, T51, T4X, T50, T9H, T4Z, T54; + T4Y = ri[WS(rs, 29)]; + T51 = ii[WS(rs, 29)]; + T9C = FNMS(T5d, T5b, T9B); + T5f = FMA(T5d, T5e, T5c); + T4X = W[56]; + T50 = W[57]; + T55 = ri[WS(rs, 13)]; + T58 = ii[WS(rs, 13)]; + T9H = T4X * T51; + T4Z = T4X * T4Y; + T54 = W[24]; + T57 = W[25]; + T9I = FNMS(T50, T4Y, T9H); + T52 = FMA(T50, T51, T4Z); + T9z = T54 * T58; + T56 = T54 * T55; + } + } + { + E T9J, TfC, T53, T9y, T9A, T59; + T9J = T9G - T9I; + TfC = T9G + T9I; + T53 = T4W + T52; + T9y = T4W - T52; + T9A = FNMS(T57, T55, T9z); + T59 = FMA(T57, T58, T56); + { + E TfD, T9D, T9K, T5g; + TfD = T9A + T9C; + T9D = T9A - T9C; + T9K = T59 - T5f; + T5g = T59 + T5f; + T9E = T9y - T9D; + Tda = T9y + T9D; + TfE = TfC - TfD; + ThO = TfC + TfD; + TfB = T53 - T5g; + T5h = T53 + T5g; + Td9 = T9J - T9K; + T9L = T9J + T9K; + } + } + } + } + { + E Tb2, Tdq, TfZ, Tg0, Tdp, Tb9; + { + E Tb4, T6i, Tb0, T6B, T6r, T6u, T6t, Tb6, T6o, TaX, T6s; + { + E T6x, T6A, T6z, TaZ, T6y; + { + E T6e, T6h, T6d, T6g, Tb3, T6f, T6w; + T6e = ri[WS(rs, 3)]; + TfO = TfB - TfE; + TfF = TfB + TfE; + Tag = FNMS(KP414213562, T9E, T9L); + T9M = FMA(KP414213562, T9L, T9E); + Tdb = FMA(KP414213562, Tda, Td9); + Tdi = FNMS(KP414213562, Td9, Tda); + T6h = ii[WS(rs, 3)]; + T6d = W[4]; + T6g = W[5]; + T6x = ri[WS(rs, 51)]; + T6A = ii[WS(rs, 51)]; + Tb3 = T6d * T6h; + T6f = T6d * T6e; + T6w = W[100]; + T6z = W[101]; + Tb4 = FNMS(T6g, T6e, Tb3); + T6i = FMA(T6g, T6h, T6f); + TaZ = T6w * T6A; + T6y = T6w * T6x; + } + { + E T6k, T6n, T6j, T6m, Tb5, T6l, T6q; + T6k = ri[WS(rs, 35)]; + T6n = ii[WS(rs, 35)]; + Tb0 = FNMS(T6z, T6x, TaZ); + T6B = FMA(T6z, T6A, T6y); + T6j = W[68]; + T6m = W[69]; + T6r = ri[WS(rs, 19)]; + T6u = ii[WS(rs, 19)]; + Tb5 = T6j * T6n; + T6l = T6j * T6k; + T6q = W[36]; + T6t = W[37]; + Tb6 = FNMS(T6m, T6k, Tb5); + T6o = FMA(T6m, T6n, T6l); + TaX = T6q * T6u; + T6s = T6q * T6r; + } + } + { + E Tb7, TfX, T6p, TaW, TaY, T6v; + Tb7 = Tb4 - Tb6; + TfX = Tb4 + Tb6; + T6p = T6i + T6o; + TaW = T6i - T6o; + TaY = FNMS(T6t, T6r, TaX); + T6v = FMA(T6t, T6u, T6s); + { + E TfY, Tb1, Tb8, T6C; + TfY = TaY + Tb0; + Tb1 = TaY - Tb0; + Tb8 = T6v - T6B; + T6C = T6v + T6B; + Tb2 = TaW - Tb1; + Tdq = TaW + Tb1; + TfZ = TfX - TfY; + ThY = TfX + TfY; + Tg0 = T6p - T6C; + T6D = T6p + T6C; + Tdp = Tb7 - Tb8; + Tb9 = Tb7 + Tb8; + } + } + } + { + E TaP, T6J, TaL, T72, T6S, T6V, T6U, TaR, T6P, TaI, T6T; + { + E T6Y, T71, T70, TaK, T6Z; + { + E T6F, T6I, T6E, T6H, TaO, T6G, T6X; + T6F = ri[WS(rs, 59)]; + Tge = Tg0 + TfZ; + Tg1 = TfZ - Tg0; + Tbo = FMA(KP414213562, Tb2, Tb9); + Tba = FNMS(KP414213562, Tb9, Tb2); + Tdr = FNMS(KP414213562, Tdq, Tdp); + TdA = FMA(KP414213562, Tdp, Tdq); + T6I = ii[WS(rs, 59)]; + T6E = W[116]; + T6H = W[117]; + T6Y = ri[WS(rs, 43)]; + T71 = ii[WS(rs, 43)]; + TaO = T6E * T6I; + T6G = T6E * T6F; + T6X = W[84]; + T70 = W[85]; + TaP = FNMS(T6H, T6F, TaO); + T6J = FMA(T6H, T6I, T6G); + TaK = T6X * T71; + T6Z = T6X * T6Y; + } + { + E T6L, T6O, T6K, T6N, TaQ, T6M, T6R; + T6L = ri[WS(rs, 27)]; + T6O = ii[WS(rs, 27)]; + TaL = FNMS(T70, T6Y, TaK); + T72 = FMA(T70, T71, T6Z); + T6K = W[52]; + T6N = W[53]; + T6S = ri[WS(rs, 11)]; + T6V = ii[WS(rs, 11)]; + TaQ = T6K * T6O; + T6M = T6K * T6L; + T6R = W[20]; + T6U = W[21]; + TaR = FNMS(T6N, T6L, TaQ); + T6P = FMA(T6N, T6O, T6M); + TaI = T6R * T6V; + T6T = T6R * T6S; + } + } + { + E TaS, Tg3, T6Q, TaH, TaJ, T6W; + TaS = TaP - TaR; + Tg3 = TaP + TaR; + T6Q = T6J + T6P; + TaH = T6J - T6P; + TaJ = FNMS(T6U, T6S, TaI); + T6W = FMA(T6U, T6V, T6T); + { + E Tg4, TaM, TaT, T73; + Tg4 = TaJ + TaL; + TaM = TaJ - TaL; + TaT = T6W - T72; + T73 = T6W + T72; + TaN = TaH - TaM; + Tdt = TaH + TaM; + Tg5 = Tg3 - Tg4; + ThZ = Tg3 + Tg4; + Tg2 = T6Q - T73; + T74 = T6Q + T73; + Tds = TaS - TaT; + TaU = TaS + TaT; + } + } + } + } + } + } + { + E Tgf, Tg6, Tbp, TaV, Tdu, TdB, Tje, Tjd, TjO, TjN; + { + E Thq, Tj7, Thy, ThA, Tht, Tj8, Thx, ThD, ThX, ThV, ThU, Ti0, ThM, ThK, ThJ; + E ThP, TiI, TiZ, TiL, Tj0; + { + E Tio, T1I, Tj1, T3v, Tj2, TiX, TiN, Tir, T76, TiK, TiC, TiG, T5j, Tit, Tiw; + E TiJ; + { + E TiO, TiW, Tip, Tiq; + { + E TO, T1H, T2B, T3u; + Thq = Tm - TN; + TO = Tm + TN; + Tgf = Tg2 - Tg5; + Tg6 = Tg2 + Tg5; + Tbp = FNMS(KP414213562, TaN, TaU); + TaV = FMA(KP414213562, TaU, TaN); + Tdu = FMA(KP414213562, Tdt, Tds); + TdB = FNMS(KP414213562, Tds, Tdt); + T1H = T1f + T1G; + Tj7 = T1G - T1f; + Thy = T29 - T2A; + T2B = T29 + T2A; + T3u = T32 + T3t; + ThA = T32 - T3t; + Tht = Thr - Ths; + TiO = Thr + Ths; + Tio = TO - T1H; + T1I = TO + T1H; + Tj1 = T3u - T2B; + T3v = T2B + T3u; + TiW = TiP + TiV; + Tj8 = TiV - TiP; + } + Thx = Thv - Thw; + Tip = Thv + Thw; + Tiq = ThB + ThC; + ThD = ThB - ThC; + { + E T6c, T75, Tiz, TiA; + ThX = T5K - T6b; + T6c = T5K + T6b; + Tj2 = TiW - TiO; + TiX = TiO + TiW; + TiN = Tip + Tiq; + Tir = Tip - Tiq; + T75 = T6D + T74; + ThV = T74 - T6D; + ThU = ThS - ThT; + Tiz = ThS + ThT; + TiA = ThY + ThZ; + Ti0 = ThY - ThZ; + { + E T4p, Tiy, TiB, T5i, Tiu, Tiv; + ThM = T3X - T4o; + T4p = T3X + T4o; + T76 = T6c + T75; + Tiy = T6c - T75; + TiK = Tiz + TiA; + TiB = Tiz - TiA; + T5i = T4Q + T5h; + ThK = T5h - T4Q; + ThJ = ThH - ThI; + Tiu = ThH + ThI; + Tiv = ThN + ThO; + ThP = ThN - ThO; + TiC = Tiy - TiB; + TiG = Tiy + TiB; + T5j = T4p + T5i; + Tit = T4p - T5i; + Tiw = Tiu - Tiv; + TiJ = Tiu + Tiv; + } + } + } + { + E TiE, Tis, TiD, Tj6, Tj5, Tj3, Tj4, TiH; + { + E T3w, TiF, Tix, T77, TiM, TiY; + TiI = T1I - T3v; + T3w = T1I + T3v; + TiF = Tiw - Tit; + Tix = Tit + Tiw; + T77 = T5j + T76; + TiZ = T76 - T5j; + TiL = TiJ - TiK; + TiM = TiJ + TiK; + TiY = TiN + TiX; + Tj0 = TiX - TiN; + TiE = Tio - Tir; + Tis = Tio + Tir; + ri[0] = T3w + T77; + ri[WS(rs, 32)] = T3w - T77; + ii[WS(rs, 32)] = TiY - TiM; + ii[0] = TiM + TiY; + TiD = Tix + TiC; + Tj6 = TiC - Tix; + Tj5 = Tj2 - Tj1; + Tj3 = Tj1 + Tj2; + Tj4 = TiF + TiG; + TiH = TiF - TiG; + } + ri[WS(rs, 8)] = FMA(KP707106781, TiD, Tis); + ri[WS(rs, 40)] = FNMS(KP707106781, TiD, Tis); + ii[WS(rs, 40)] = FNMS(KP707106781, Tj4, Tj3); + ii[WS(rs, 8)] = FMA(KP707106781, Tj4, Tj3); + ri[WS(rs, 24)] = FMA(KP707106781, TiH, TiE); + ri[WS(rs, 56)] = FNMS(KP707106781, TiH, TiE); + ii[WS(rs, 56)] = FNMS(KP707106781, Tj6, Tj5); + ii[WS(rs, 24)] = FMA(KP707106781, Tj6, Tj5); + } + } + { + E Ti8, Thu, Tjf, Tj9, Tib, Tjg, Tja, ThF, Tih, ThW, Tif, Til, Ti5, ThR; + ri[WS(rs, 16)] = TiI + TiL; + ri[WS(rs, 48)] = TiI - TiL; + ii[WS(rs, 48)] = Tj0 - TiZ; + ii[WS(rs, 16)] = TiZ + Tj0; + Ti8 = Thq + Tht; + Thu = Thq - Tht; + Tjf = Tj8 - Tj7; + Tj9 = Tj7 + Tj8; + { + E Tie, ThL, Tid, ThQ; + { + E Ti9, Thz, Tia, ThE; + Ti9 = Thy + Thx; + Thz = Thx - Thy; + Tia = ThA - ThD; + ThE = ThA + ThD; + Tib = Ti9 + Tia; + Tjg = Tia - Ti9; + Tja = Thz + ThE; + ThF = Thz - ThE; + Tie = ThJ + ThK; + ThL = ThJ - ThK; + } + Tid = ThM + ThP; + ThQ = ThM - ThP; + Tih = ThU + ThV; + ThW = ThU - ThV; + Tif = FMA(KP414213562, Tie, Tid); + Til = FNMS(KP414213562, Tid, Tie); + Ti5 = FNMS(KP414213562, ThL, ThQ); + ThR = FMA(KP414213562, ThQ, ThL); + } + { + E Ti4, ThG, Tjh, Tjj, Tig, Ti1; + Ti4 = FNMS(KP707106781, ThF, Thu); + ThG = FMA(KP707106781, ThF, Thu); + Tjh = FMA(KP707106781, Tjg, Tjf); + Tjj = FNMS(KP707106781, Tjg, Tjf); + Tig = ThX + Ti0; + Ti1 = ThX - Ti0; + { + E Tik, Tjb, Tjc, Tin; + { + E Tic, Tim, Ti6, Ti2, Tij, Tii; + Tik = FNMS(KP707106781, Tib, Ti8); + Tic = FMA(KP707106781, Tib, Ti8); + Tii = FNMS(KP414213562, Tih, Tig); + Tim = FMA(KP414213562, Tig, Tih); + Ti6 = FMA(KP414213562, ThW, Ti1); + Ti2 = FNMS(KP414213562, Ti1, ThW); + Tij = Tif + Tii; + Tje = Tii - Tif; + Tjd = FNMS(KP707106781, Tja, Tj9); + Tjb = FMA(KP707106781, Tja, Tj9); + { + E Ti7, Tji, Tjk, Ti3; + Ti7 = Ti5 + Ti6; + Tji = Ti6 - Ti5; + Tjk = ThR + Ti2; + Ti3 = ThR - Ti2; + ri[WS(rs, 4)] = FMA(KP923879532, Tij, Tic); + ri[WS(rs, 36)] = FNMS(KP923879532, Tij, Tic); + ri[WS(rs, 60)] = FMA(KP923879532, Ti7, Ti4); + ri[WS(rs, 28)] = FNMS(KP923879532, Ti7, Ti4); + ii[WS(rs, 44)] = FNMS(KP923879532, Tji, Tjh); + ii[WS(rs, 12)] = FMA(KP923879532, Tji, Tjh); + ii[WS(rs, 60)] = FMA(KP923879532, Tjk, Tjj); + ii[WS(rs, 28)] = FNMS(KP923879532, Tjk, Tjj); + ri[WS(rs, 12)] = FMA(KP923879532, Ti3, ThG); + ri[WS(rs, 44)] = FNMS(KP923879532, Ti3, ThG); + Tjc = Til + Tim; + Tin = Til - Tim; + } + } + ii[WS(rs, 36)] = FNMS(KP923879532, Tjc, Tjb); + ii[WS(rs, 4)] = FMA(KP923879532, Tjc, Tjb); + ri[WS(rs, 20)] = FMA(KP923879532, Tin, Tik); + ri[WS(rs, 52)] = FNMS(KP923879532, Tin, Tik); + } + } + } + } + { + E TjD, TjJ, Tgo, Tf2, Tjp, Tjv, Tha, TgI, Tgd, Tgr, Tjw, Tjq, Tfp, Tgg, Thk; + E Tho, Th8, Th4, Tgv, TgB, Tgl, TfR, TjE, Thd, TjK, TgP, Tgx, Tg8, Thh, Thn; + E Th7, TgX; + { + E TgJ, TgK, TgM, TgN, Tg7, TfW, Th1, Thj, Th0, Th2; + { + E TgE, TeQ, TjB, Tjn, TgF, TgG, TjC, Tf1, TeV, Tf0; + TgE = TeM - TeP; + TeQ = TeM + TeP; + TjB = Tjm - Tjl; + Tjn = Tjl + Tjm; + TgF = TeU - TeR; + TeV = TeR + TeU; + ii[WS(rs, 52)] = FNMS(KP923879532, Tje, Tjd); + ii[WS(rs, 20)] = FMA(KP923879532, Tje, Tjd); + Tf0 = TeW - TeZ; + TgG = TeW + TeZ; + TjC = Tf0 - TeV; + Tf1 = TeV + Tf0; + { + E Tfi, Tgp, Tfd, Tfn; + { + E Tf7, Tjo, TgH, Tfc; + TgJ = Tf5 - Tf6; + Tf7 = Tf5 + Tf6; + TjD = FMA(KP707106781, TjC, TjB); + TjJ = FNMS(KP707106781, TjC, TjB); + Tgo = FMA(KP707106781, Tf1, TeQ); + Tf2 = FNMS(KP707106781, Tf1, TeQ); + Tjo = TgF + TgG; + TgH = TgF - TgG; + Tfc = Tf8 + Tfb; + TgK = Tf8 - Tfb; + TgM = Tfg - Tfh; + Tfi = Tfg + Tfh; + Tjp = FMA(KP707106781, Tjo, Tjn); + Tjv = FNMS(KP707106781, Tjo, Tjn); + Tha = FNMS(KP707106781, TgH, TgE); + TgI = FMA(KP707106781, TgH, TgE); + Tgp = FMA(KP414213562, Tf7, Tfc); + Tfd = FNMS(KP414213562, Tfc, Tf7); + Tfn = Tfj + Tfm; + TgN = Tfj - Tfm; + } + { + E TgY, TgZ, Tgq, Tfo; + Tgd = Tg9 + Tgc; + TgY = Tg9 - Tgc; + TgZ = Tg6 - Tg1; + Tg7 = Tg1 + Tg6; + TfW = TfU + TfV; + Th1 = TfU - TfV; + Tgq = FNMS(KP414213562, Tfi, Tfn); + Tfo = FMA(KP414213562, Tfn, Tfi); + Thj = FMA(KP707106781, TgZ, TgY); + Th0 = FNMS(KP707106781, TgZ, TgY); + Tgr = Tgp + Tgq; + Tjw = Tgq - Tgp; + Tjq = Tfd + Tfo; + Tfp = Tfd - Tfo; + Th2 = Tge - Tgf; + Tgg = Tge + Tgf; + } + } + } + { + E TgU, TgS, TgR, TgV, Thb, TgL; + { + E TfM, Tgu, TfH, TfP, Tgt, TfQ; + { + E Tfv, TfG, Thi, Th3; + TgU = Tft - Tfu; + Tfv = Tft + Tfu; + TfG = TfA + TfF; + TgS = TfF - TfA; + TgR = TfI - TfL; + TfM = TfI + TfL; + Thi = FMA(KP707106781, Th2, Th1); + Th3 = FNMS(KP707106781, Th2, Th1); + Tgu = FMA(KP707106781, TfG, Tfv); + TfH = FNMS(KP707106781, TfG, Tfv); + Thk = FNMS(KP198912367, Thj, Thi); + Tho = FMA(KP198912367, Thi, Thj); + Th8 = FMA(KP668178637, Th0, Th3); + Th4 = FNMS(KP668178637, Th3, Th0); + TfP = TfN + TfO; + TgV = TfN - TfO; + } + Tgt = FMA(KP707106781, TfP, TfM); + TfQ = FNMS(KP707106781, TfP, TfM); + Thb = FNMS(KP414213562, TgJ, TgK); + TgL = FMA(KP414213562, TgK, TgJ); + Tgv = FMA(KP198912367, Tgu, Tgt); + TgB = FNMS(KP198912367, Tgt, Tgu); + Tgl = FNMS(KP668178637, TfH, TfQ); + TfR = FMA(KP668178637, TfQ, TfH); + } + { + E Thg, TgT, Thc, TgO, Thf, TgW; + Thc = FMA(KP414213562, TgM, TgN); + TgO = FNMS(KP414213562, TgN, TgM); + Thg = FMA(KP707106781, TgS, TgR); + TgT = FNMS(KP707106781, TgS, TgR); + TjE = Thc - Thb; + Thd = Thb + Thc; + TjK = TgL + TgO; + TgP = TgL - TgO; + Thf = FMA(KP707106781, TgV, TgU); + TgW = FNMS(KP707106781, TgV, TgU); + Tgx = FMA(KP707106781, Tg7, TfW); + Tg8 = FNMS(KP707106781, Tg7, TfW); + Thh = FMA(KP198912367, Thg, Thf); + Thn = FNMS(KP198912367, Thf, Thg); + Th7 = FNMS(KP668178637, TgT, TgW); + TgX = FMA(KP668178637, TgW, TgT); + } + } + } + { + E Tju, Tjt, TjI, TjH; + { + E Tgk, Tfq, Tjx, Tjz, Tgw, Tgh; + Tgk = FNMS(KP923879532, Tfp, Tf2); + Tfq = FMA(KP923879532, Tfp, Tf2); + Tjx = FMA(KP923879532, Tjw, Tjv); + Tjz = FNMS(KP923879532, Tjw, Tjv); + Tgw = FMA(KP707106781, Tgg, Tgd); + Tgh = FNMS(KP707106781, Tgg, Tgd); + { + E TgA, Tjr, Tjs, TgD; + { + E Tgs, TgC, Tgm, Tgi, Tgz, Tgy; + TgA = FNMS(KP923879532, Tgr, Tgo); + Tgs = FMA(KP923879532, Tgr, Tgo); + Tgy = FNMS(KP198912367, Tgx, Tgw); + TgC = FMA(KP198912367, Tgw, Tgx); + Tgm = FMA(KP668178637, Tg8, Tgh); + Tgi = FNMS(KP668178637, Tgh, Tg8); + Tgz = Tgv + Tgy; + Tju = Tgy - Tgv; + Tjt = FNMS(KP923879532, Tjq, Tjp); + Tjr = FMA(KP923879532, Tjq, Tjp); + { + E Tgn, Tjy, TjA, Tgj; + Tgn = Tgl + Tgm; + Tjy = Tgm - Tgl; + TjA = TfR + Tgi; + Tgj = TfR - Tgi; + ri[WS(rs, 2)] = FMA(KP980785280, Tgz, Tgs); + ri[WS(rs, 34)] = FNMS(KP980785280, Tgz, Tgs); + ri[WS(rs, 58)] = FMA(KP831469612, Tgn, Tgk); + ri[WS(rs, 26)] = FNMS(KP831469612, Tgn, Tgk); + ii[WS(rs, 42)] = FNMS(KP831469612, Tjy, Tjx); + ii[WS(rs, 10)] = FMA(KP831469612, Tjy, Tjx); + ii[WS(rs, 58)] = FMA(KP831469612, TjA, Tjz); + ii[WS(rs, 26)] = FNMS(KP831469612, TjA, Tjz); + ri[WS(rs, 10)] = FMA(KP831469612, Tgj, Tfq); + ri[WS(rs, 42)] = FNMS(KP831469612, Tgj, Tfq); + Tjs = TgB + TgC; + TgD = TgB - TgC; + } + } + ii[WS(rs, 34)] = FNMS(KP980785280, Tjs, Tjr); + ii[WS(rs, 2)] = FMA(KP980785280, Tjs, Tjr); + ri[WS(rs, 18)] = FMA(KP980785280, TgD, TgA); + ri[WS(rs, 50)] = FNMS(KP980785280, TgD, TgA); + } + } + { + E Th6, TjF, TjG, Th9, TgQ, Th5; + Th6 = FNMS(KP923879532, TgP, TgI); + TgQ = FMA(KP923879532, TgP, TgI); + Th5 = TgX + Th4; + TjI = Th4 - TgX; + TjH = FNMS(KP923879532, TjE, TjD); + TjF = FMA(KP923879532, TjE, TjD); + ii[WS(rs, 50)] = FNMS(KP980785280, Tju, Tjt); + ii[WS(rs, 18)] = FMA(KP980785280, Tju, Tjt); + ri[WS(rs, 6)] = FMA(KP831469612, Th5, TgQ); + ri[WS(rs, 38)] = FNMS(KP831469612, Th5, TgQ); + TjG = Th7 + Th8; + Th9 = Th7 - Th8; + ii[WS(rs, 38)] = FNMS(KP831469612, TjG, TjF); + ii[WS(rs, 6)] = FMA(KP831469612, TjG, TjF); + ri[WS(rs, 22)] = FMA(KP831469612, Th9, Th6); + ri[WS(rs, 54)] = FNMS(KP831469612, Th9, Th6); + } + { + E Thm, TjL, TjM, Thp, The, Thl; + Thm = FMA(KP923879532, Thd, Tha); + The = FNMS(KP923879532, Thd, Tha); + Thl = Thh - Thk; + TjO = Thh + Thk; + TjN = FMA(KP923879532, TjK, TjJ); + TjL = FNMS(KP923879532, TjK, TjJ); + ii[WS(rs, 54)] = FNMS(KP831469612, TjI, TjH); + ii[WS(rs, 22)] = FMA(KP831469612, TjI, TjH); + ri[WS(rs, 14)] = FMA(KP980785280, Thl, The); + ri[WS(rs, 46)] = FNMS(KP980785280, Thl, The); + TjM = Tho - Thn; + Thp = Thn + Tho; + ii[WS(rs, 46)] = FNMS(KP980785280, TjM, TjL); + ii[WS(rs, 14)] = FMA(KP980785280, TjM, TjL); + ri[WS(rs, 62)] = FMA(KP980785280, Thp, Thm); + ri[WS(rs, 30)] = FNMS(KP980785280, Thp, Thm); + } + } + } + { + E TjS, TcD, Tcw, TkO, TkN, Tcz; + { + E TbB, Tkw, Tkq, T99, TbF, TbL, Tbv, Taj, Tcu, Tcy, Tci, Tce, Tcr, Tcx, Tch; + E Tc7, TkE, Tcn, TkK, TbZ, TbP, T7J, TbO, T7u, TkB, Tkn, TbI, TbM, Tbw, Tbs; + E T7Y, TbQ; + { + E TbT, TbU, TbW, TbX, Tc4, Tc2, Tc1, Tc5, Tbn, Tbb, TaG, Tcb, Tct, Tca, Tcc; + E Tbq, Tcl, TbV; + { + E T8W, Tbz, T8z, T97, T8n, T8y; + TbT = FMA(KP707106781, T8m, T87); + T8n = FNMS(KP707106781, T8m, T87); + T8y = FNMS(KP707106781, T8x, T8u); + TbU = FMA(KP707106781, T8x, T8u); + TbW = FMA(KP707106781, T8V, T8G); + T8W = FNMS(KP707106781, T8V, T8G); + ii[WS(rs, 62)] = FMA(KP980785280, TjO, TjN); + ii[WS(rs, 30)] = FNMS(KP980785280, TjO, TjN); + Tbz = FMA(KP668178637, T8n, T8y); + T8z = FNMS(KP668178637, T8y, T8n); + T97 = FNMS(KP707106781, T96, T93); + TbX = FMA(KP707106781, T96, T93); + { + E Tae, TbE, Ta3, Tah; + { + E T9x, Ta2, TbA, T98; + Tc4 = FMA(KP707106781, T9w, T9h); + T9x = FNMS(KP707106781, T9w, T9h); + Ta2 = T9M - Ta1; + Tc2 = Ta1 + T9M; + Tc1 = FMA(KP707106781, Tad, Taa); + Tae = FNMS(KP707106781, Tad, Taa); + TbA = FNMS(KP668178637, T8W, T97); + T98 = FMA(KP668178637, T97, T8W); + TbE = FMA(KP923879532, Ta2, T9x); + Ta3 = FNMS(KP923879532, Ta2, T9x); + TbB = Tbz + TbA; + Tkw = TbA - Tbz; + Tkq = T8z + T98; + T99 = T8z - T98; + Tah = Taf - Tag; + Tc5 = Taf + Tag; + } + { + E Tc8, Tc9, TbD, Tai; + Tbn = FNMS(KP707106781, Tbm, Tbj); + Tc8 = FMA(KP707106781, Tbm, Tbj); + Tc9 = Tba + TaV; + Tbb = TaV - Tba; + TaG = FNMS(KP707106781, TaF, Taq); + Tcb = FMA(KP707106781, TaF, Taq); + TbD = FMA(KP923879532, Tah, Tae); + Tai = FNMS(KP923879532, Tah, Tae); + Tct = FMA(KP923879532, Tc9, Tc8); + Tca = FNMS(KP923879532, Tc9, Tc8); + TbF = FMA(KP303346683, TbE, TbD); + TbL = FNMS(KP303346683, TbD, TbE); + Tbv = FNMS(KP534511135, Ta3, Tai); + Taj = FMA(KP534511135, Tai, Ta3); + Tcc = Tbo + Tbp; + Tbq = Tbo - Tbp; + } + } + } + { + E Tcq, Tc3, Tcs, Tcd, Tcp, Tc6; + Tcs = FMA(KP923879532, Tcc, Tcb); + Tcd = FNMS(KP923879532, Tcc, Tcb); + Tcq = FMA(KP923879532, Tc2, Tc1); + Tc3 = FNMS(KP923879532, Tc2, Tc1); + Tcu = FNMS(KP098491403, Tct, Tcs); + Tcy = FMA(KP098491403, Tcs, Tct); + Tci = FMA(KP820678790, Tca, Tcd); + Tce = FNMS(KP820678790, Tcd, Tca); + Tcp = FMA(KP923879532, Tc5, Tc4); + Tc6 = FNMS(KP923879532, Tc5, Tc4); + Tcl = FNMS(KP198912367, TbT, TbU); + TbV = FMA(KP198912367, TbU, TbT); + Tcr = FMA(KP098491403, Tcq, Tcp); + Tcx = FNMS(KP098491403, Tcp, Tcq); + Tch = FNMS(KP820678790, Tc3, Tc6); + Tc7 = FMA(KP820678790, Tc6, Tc3); + } + { + E TbH, Tbc, Tcm, TbY; + Tcm = FMA(KP198912367, TbW, TbX); + TbY = FNMS(KP198912367, TbX, TbW); + TbH = FMA(KP923879532, Tbb, TaG); + Tbc = FNMS(KP923879532, Tbb, TaG); + TkE = Tcm - Tcl; + Tcn = Tcl + Tcm; + TkK = TbV + TbY; + TbZ = TbV - TbY; + { + E T7t, Tkm, TbG, Tbr; + TjS = T7l + T7s; + T7t = T7l - T7s; + Tkm = TcC - TcB; + TcD = TcB + TcC; + TbP = FNMS(KP414213562, T7B, T7I); + T7J = FMA(KP414213562, T7I, T7B); + TbG = FMA(KP923879532, Tbq, Tbn); + Tbr = FNMS(KP923879532, Tbq, Tbn); + TbO = FNMS(KP707106781, T7t, T7e); + T7u = FMA(KP707106781, T7t, T7e); + TkB = FNMS(KP707106781, Tkm, Tkl); + Tkn = FMA(KP707106781, Tkm, Tkl); + TbI = FNMS(KP303346683, TbH, TbG); + TbM = FMA(KP303346683, TbG, TbH); + Tbw = FMA(KP534511135, Tbc, Tbr); + Tbs = FNMS(KP534511135, Tbr, Tbc); + T7Y = FNMS(KP414213562, T7X, T7Q); + TbQ = FMA(KP414213562, T7Q, T7X); + } + } + } + { + E TkJ, TkD, Tck, TbS, TbK, Tku, Tkt, TbN; + { + E TkA, Tby, Tkp, Tbu, Tkz, Tbx; + { + E Tbt, T9a, Tkx, Tky, Tkv; + TkA = Taj + Tbs; + Tbt = Taj - Tbs; + { + E TkC, T7Z, Tko, TbR, T80; + TkC = T7J + T7Y; + T7Z = T7J - T7Y; + Tko = TbQ - TbP; + TbR = TbP + TbQ; + TkJ = FMA(KP923879532, TkC, TkB); + TkD = FNMS(KP923879532, TkC, TkB); + Tby = FMA(KP923879532, T7Z, T7u); + T80 = FNMS(KP923879532, T7Z, T7u); + Tkv = FNMS(KP923879532, Tko, Tkn); + Tkp = FMA(KP923879532, Tko, Tkn); + Tck = FMA(KP923879532, TbR, TbO); + TbS = FNMS(KP923879532, TbR, TbO); + T9a = FMA(KP831469612, T99, T80); + Tbu = FNMS(KP831469612, T99, T80); + } + Tkz = FNMS(KP831469612, Tkw, Tkv); + Tkx = FMA(KP831469612, Tkw, Tkv); + Tky = Tbw - Tbv; + Tbx = Tbv + Tbw; + ri[WS(rs, 11)] = FMA(KP881921264, Tbt, T9a); + ri[WS(rs, 43)] = FNMS(KP881921264, Tbt, T9a); + ii[WS(rs, 43)] = FNMS(KP881921264, Tky, Tkx); + ii[WS(rs, 11)] = FMA(KP881921264, Tky, Tkx); + } + { + E TbC, TbJ, Tkr, Tks; + TbK = FNMS(KP831469612, TbB, Tby); + TbC = FMA(KP831469612, TbB, Tby); + ri[WS(rs, 59)] = FMA(KP881921264, Tbx, Tbu); + ri[WS(rs, 27)] = FNMS(KP881921264, Tbx, Tbu); + ii[WS(rs, 59)] = FMA(KP881921264, TkA, Tkz); + ii[WS(rs, 27)] = FNMS(KP881921264, TkA, Tkz); + TbJ = TbF + TbI; + Tku = TbI - TbF; + Tkt = FNMS(KP831469612, Tkq, Tkp); + Tkr = FMA(KP831469612, Tkq, Tkp); + Tks = TbL + TbM; + TbN = TbL - TbM; + ri[WS(rs, 3)] = FMA(KP956940335, TbJ, TbC); + ri[WS(rs, 35)] = FNMS(KP956940335, TbJ, TbC); + ii[WS(rs, 35)] = FNMS(KP956940335, Tks, Tkr); + ii[WS(rs, 3)] = FMA(KP956940335, Tks, Tkr); + } + } + { + E Tcg, TkI, TkH, Tcj; + { + E Tc0, Tcf, TkF, TkG; + Tcg = FNMS(KP980785280, TbZ, TbS); + Tc0 = FMA(KP980785280, TbZ, TbS); + ri[WS(rs, 19)] = FMA(KP956940335, TbN, TbK); + ri[WS(rs, 51)] = FNMS(KP956940335, TbN, TbK); + ii[WS(rs, 51)] = FNMS(KP956940335, Tku, Tkt); + ii[WS(rs, 19)] = FMA(KP956940335, Tku, Tkt); + Tcf = Tc7 + Tce; + TkI = Tce - Tc7; + TkH = FNMS(KP980785280, TkE, TkD); + TkF = FMA(KP980785280, TkE, TkD); + TkG = Tch + Tci; + Tcj = Tch - Tci; + ri[WS(rs, 7)] = FMA(KP773010453, Tcf, Tc0); + ri[WS(rs, 39)] = FNMS(KP773010453, Tcf, Tc0); + ii[WS(rs, 39)] = FNMS(KP773010453, TkG, TkF); + ii[WS(rs, 7)] = FMA(KP773010453, TkG, TkF); + } + { + E Tco, Tcv, TkL, TkM; + Tcw = FMA(KP980785280, Tcn, Tck); + Tco = FNMS(KP980785280, Tcn, Tck); + ri[WS(rs, 23)] = FMA(KP773010453, Tcj, Tcg); + ri[WS(rs, 55)] = FNMS(KP773010453, Tcj, Tcg); + ii[WS(rs, 55)] = FNMS(KP773010453, TkI, TkH); + ii[WS(rs, 23)] = FMA(KP773010453, TkI, TkH); + Tcv = Tcr - Tcu; + TkO = Tcr + Tcu; + TkN = FMA(KP980785280, TkK, TkJ); + TkL = FNMS(KP980785280, TkK, TkJ); + TkM = Tcy - Tcx; + Tcz = Tcx + Tcy; + ri[WS(rs, 15)] = FMA(KP995184726, Tcv, Tco); + ri[WS(rs, 47)] = FNMS(KP995184726, Tcv, Tco); + ii[WS(rs, 47)] = FNMS(KP995184726, TkM, TkL); + ii[WS(rs, 15)] = FMA(KP995184726, TkM, TkL); + } + } + } + } + { + E TdN, Tk2, TjW, Td1, TdR, TdX, TdH, Tdl, TeG, TeK, Teu, Teq, TeD, TeJ, Tet; + E Tej, Tka, Tez, Tkg, Teb, Te1, TcH, Te0, TcE, Tk7, TjT, TdU, TdY, TdI, TdE; + E TcK, Te2; + { + E Te5, Te6, Te8, Te9, Teg, Tee, Ted, Teh, Tdz, Tdv, Tdo, Ten, TeF, Tem, Teo; + E TdC, Tex, Te7; + { + E TcP, TcS, TcW, TcZ; + Te5 = FNMS(KP707106781, TcO, TcN); + TcP = FMA(KP707106781, TcO, TcN); + ri[WS(rs, 63)] = FMA(KP995184726, Tcz, Tcw); + ri[WS(rs, 31)] = FNMS(KP995184726, Tcz, Tcw); + ii[WS(rs, 63)] = FMA(KP995184726, TkO, TkN); + ii[WS(rs, 31)] = FNMS(KP995184726, TkO, TkN); + TcS = FMA(KP707106781, TcR, TcQ); + Te6 = FNMS(KP707106781, TcR, TcQ); + Te8 = FNMS(KP707106781, TcV, TcU); + TcW = FMA(KP707106781, TcV, TcU); + TcZ = FMA(KP707106781, TcY, TcX); + Te9 = FNMS(KP707106781, TcY, TcX); + { + E Tdg, TdQ, Tdd, Tdj; + { + E Td5, TdL, TcT, TdM, Td0, Tdc; + Teg = FNMS(KP707106781, Td4, Td3); + Td5 = FMA(KP707106781, Td4, Td3); + TdL = FMA(KP198912367, TcP, TcS); + TcT = FNMS(KP198912367, TcS, TcP); + TdM = FNMS(KP198912367, TcW, TcZ); + Td0 = FMA(KP198912367, TcZ, TcW); + Tdc = Td8 + Tdb; + Tee = Tdb - Td8; + Ted = FNMS(KP707106781, Tdf, Tde); + Tdg = FMA(KP707106781, Tdf, Tde); + TdN = TdL + TdM; + Tk2 = TdM - TdL; + TjW = TcT + Td0; + Td1 = TcT - Td0; + TdQ = FMA(KP923879532, Tdc, Td5); + Tdd = FNMS(KP923879532, Tdc, Td5); + Tdj = Tdh + Tdi; + Teh = Tdh - Tdi; + } + { + E Tek, Tel, TdP, Tdk; + Tdz = FMA(KP707106781, Tdy, Tdx); + Tek = FNMS(KP707106781, Tdy, Tdx); + Tel = Tdu - Tdr; + Tdv = Tdr + Tdu; + Tdo = FMA(KP707106781, Tdn, Tdm); + Ten = FNMS(KP707106781, Tdn, Tdm); + TdP = FMA(KP923879532, Tdj, Tdg); + Tdk = FNMS(KP923879532, Tdj, Tdg); + TeF = FMA(KP923879532, Tel, Tek); + Tem = FNMS(KP923879532, Tel, Tek); + TdR = FMA(KP098491403, TdQ, TdP); + TdX = FNMS(KP098491403, TdP, TdQ); + TdH = FNMS(KP820678790, Tdd, Tdk); + Tdl = FMA(KP820678790, Tdk, Tdd); + Teo = TdA - TdB; + TdC = TdA + TdB; + } + } + } + { + E TeC, Tef, TeE, Tep, TeB, Tei; + TeE = FMA(KP923879532, Teo, Ten); + Tep = FNMS(KP923879532, Teo, Ten); + TeC = FMA(KP923879532, Tee, Ted); + Tef = FNMS(KP923879532, Tee, Ted); + TeG = FNMS(KP303346683, TeF, TeE); + TeK = FMA(KP303346683, TeE, TeF); + Teu = FMA(KP534511135, Tem, Tep); + Teq = FNMS(KP534511135, Tep, Tem); + TeB = FMA(KP923879532, Teh, Teg); + Tei = FNMS(KP923879532, Teh, Teg); + Tex = FNMS(KP668178637, Te5, Te6); + Te7 = FMA(KP668178637, Te6, Te5); + TeD = FMA(KP303346683, TeC, TeB); + TeJ = FNMS(KP303346683, TeB, TeC); + Tet = FNMS(KP534511135, Tef, Tei); + Tej = FMA(KP534511135, Tei, Tef); + } + { + E TdT, Tdw, Tey, Tea, TdS, TdD; + Tey = FMA(KP668178637, Te8, Te9); + Tea = FNMS(KP668178637, Te9, Te8); + TdT = FMA(KP923879532, Tdv, Tdo); + Tdw = FNMS(KP923879532, Tdv, Tdo); + Tka = Tey - Tex; + Tez = Tex + Tey; + Tkg = Te7 + Tea; + Teb = Te7 - Tea; + Te1 = FNMS(KP414213562, TcF, TcG); + TcH = FMA(KP414213562, TcG, TcF); + TdS = FMA(KP923879532, TdC, Tdz); + TdD = FNMS(KP923879532, TdC, Tdz); + Te0 = FNMS(KP707106781, TcD, TcA); + TcE = FMA(KP707106781, TcD, TcA); + Tk7 = FNMS(KP707106781, TjS, TjR); + TjT = FMA(KP707106781, TjS, TjR); + TdU = FNMS(KP098491403, TdT, TdS); + TdY = FMA(KP098491403, TdS, TdT); + TdI = FMA(KP820678790, Tdw, TdD); + TdE = FNMS(KP820678790, TdD, Tdw); + TcK = FNMS(KP414213562, TcJ, TcI); + Te2 = FMA(KP414213562, TcI, TcJ); + } + } + { + E Tkf, Tk9, Tew, Te4, TdW, Tk0, TjZ, TdZ; + { + E Tk6, TdK, TjV, TdG, Tk5, TdJ; + { + E TdF, Td2, Tk3, Tk4, Tk1; + Tk6 = Tdl + TdE; + TdF = Tdl - TdE; + { + E Tk8, TcL, TjU, Te3, TcM; + Tk8 = TcK - TcH; + TcL = TcH + TcK; + TjU = Te1 + Te2; + Te3 = Te1 - Te2; + Tkf = FNMS(KP923879532, Tk8, Tk7); + Tk9 = FMA(KP923879532, Tk8, Tk7); + TdK = FMA(KP923879532, TcL, TcE); + TcM = FNMS(KP923879532, TcL, TcE); + Tk1 = FNMS(KP923879532, TjU, TjT); + TjV = FMA(KP923879532, TjU, TjT); + Tew = FNMS(KP923879532, Te3, Te0); + Te4 = FMA(KP923879532, Te3, Te0); + Td2 = FMA(KP980785280, Td1, TcM); + TdG = FNMS(KP980785280, Td1, TcM); + } + Tk5 = FNMS(KP980785280, Tk2, Tk1); + Tk3 = FMA(KP980785280, Tk2, Tk1); + Tk4 = TdI - TdH; + TdJ = TdH + TdI; + ri[WS(rs, 9)] = FMA(KP773010453, TdF, Td2); + ri[WS(rs, 41)] = FNMS(KP773010453, TdF, Td2); + ii[WS(rs, 41)] = FNMS(KP773010453, Tk4, Tk3); + ii[WS(rs, 9)] = FMA(KP773010453, Tk4, Tk3); + } + { + E TdO, TdV, TjX, TjY; + TdW = FNMS(KP980785280, TdN, TdK); + TdO = FMA(KP980785280, TdN, TdK); + ri[WS(rs, 57)] = FMA(KP773010453, TdJ, TdG); + ri[WS(rs, 25)] = FNMS(KP773010453, TdJ, TdG); + ii[WS(rs, 57)] = FMA(KP773010453, Tk6, Tk5); + ii[WS(rs, 25)] = FNMS(KP773010453, Tk6, Tk5); + TdV = TdR + TdU; + Tk0 = TdU - TdR; + TjZ = FNMS(KP980785280, TjW, TjV); + TjX = FMA(KP980785280, TjW, TjV); + TjY = TdX + TdY; + TdZ = TdX - TdY; + ri[WS(rs, 1)] = FMA(KP995184726, TdV, TdO); + ri[WS(rs, 33)] = FNMS(KP995184726, TdV, TdO); + ii[WS(rs, 33)] = FNMS(KP995184726, TjY, TjX); + ii[WS(rs, 1)] = FMA(KP995184726, TjY, TjX); + } + } + { + E Tes, Tke, Tkd, Tev; + { + E Tec, Ter, Tkb, Tkc; + Tes = FNMS(KP831469612, Teb, Te4); + Tec = FMA(KP831469612, Teb, Te4); + ri[WS(rs, 17)] = FMA(KP995184726, TdZ, TdW); + ri[WS(rs, 49)] = FNMS(KP995184726, TdZ, TdW); + ii[WS(rs, 49)] = FNMS(KP995184726, Tk0, TjZ); + ii[WS(rs, 17)] = FMA(KP995184726, Tk0, TjZ); + Ter = Tej + Teq; + Tke = Teq - Tej; + Tkd = FNMS(KP831469612, Tka, Tk9); + Tkb = FMA(KP831469612, Tka, Tk9); + Tkc = Tet + Teu; + Tev = Tet - Teu; + ri[WS(rs, 5)] = FMA(KP881921264, Ter, Tec); + ri[WS(rs, 37)] = FNMS(KP881921264, Ter, Tec); + ii[WS(rs, 37)] = FNMS(KP881921264, Tkc, Tkb); + ii[WS(rs, 5)] = FMA(KP881921264, Tkc, Tkb); + } + { + E TeA, TeH, Tkh, Tki; + TeI = FMA(KP831469612, Tez, Tew); + TeA = FNMS(KP831469612, Tez, Tew); + ri[WS(rs, 21)] = FMA(KP881921264, Tev, Tes); + ri[WS(rs, 53)] = FNMS(KP881921264, Tev, Tes); + ii[WS(rs, 53)] = FNMS(KP881921264, Tke, Tkd); + ii[WS(rs, 21)] = FMA(KP881921264, Tke, Tkd); + TeH = TeD - TeG; + Tkk = TeD + TeG; + Tkj = FMA(KP831469612, Tkg, Tkf); + Tkh = FNMS(KP831469612, Tkg, Tkf); + Tki = TeK - TeJ; + TeL = TeJ + TeK; + ri[WS(rs, 13)] = FMA(KP956940335, TeH, TeA); + ri[WS(rs, 45)] = FNMS(KP956940335, TeH, TeA); + ii[WS(rs, 45)] = FNMS(KP956940335, Tki, Tkh); + ii[WS(rs, 13)] = FMA(KP956940335, Tki, Tkh); + } + } + } + } + } + } + } + ri[WS(rs, 61)] = FMA(KP956940335, TeL, TeI); + ri[WS(rs, 29)] = FNMS(KP956940335, TeL, TeI); + ii[WS(rs, 61)] = FMA(KP956940335, Tkk, Tkj); + ii[WS(rs, 29)] = FNMS(KP956940335, Tkk, Tkj); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 64}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 64, "t1_64", twinstr, &GENUS, {520, 126, 518, 0}, 0, 0, 0 }; + +void X(codelet_t1_64) (planner *p) { + X(kdft_dit_register) (p, t1_64, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 64 -name t1_64 -include t.h */ + +/* + * This function contains 1038 FP additions, 500 FP multiplications, + * (or, 808 additions, 270 multiplications, 230 fused multiply/add), + * 176 stack variables, 15 constants, and 256 memory accesses + */ +#include "t.h" + +static void t1_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP471396736, +0.471396736825997648556387625905254377657460319); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP290284677, +0.290284677254462367636192375817395274691476278); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP634393284, +0.634393284163645498215171613225493370675687095); + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP098017140, +0.098017140329560601994195563888641845861136673); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 126); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 126, MAKE_VOLATILE_STRIDE(128, rs)) { + E Tj, TcL, ThT, Tin, T6b, Taz, TgT, Thn, TG, Thm, TcO, TgO, T6m, ThQ, TaC; + E Tim, T14, Tfq, T6y, T9O, TaG, Tc0, TcU, TeE, T1r, Tfr, T6J, T9P, TaJ, Tc1; + E TcZ, TeF, T1Q, T2d, Tfx, Tfu, Tfv, Tfw, T6Q, TaM, Tdb, TeJ, T71, TaQ, T7a; + E TaN, Td6, TeI, T77, TaP, T2B, T2Y, Tfz, TfA, TfB, TfC, T7h, TaW, Tdm, TeM; + E T7s, TaU, T7B, TaX, Tdh, TeL, T7y, TaT, T5j, TfR, Tec, Tf0, TfY, Tgy, T8D; + E Tbl, T8O, Tbx, T9l, Tbm, TdV, TeX, T9i, Tbw, T3M, TfL, TdL, TeQ, TfI, Tgt; + E T7K, Tb2, T7V, Tbe, T8s, Tb3, Tdu, TeT, T8p, Tbd, T4x, TfJ, TdE, TdM, TfO; + E Tgu, T87, T8v, T8i, T8u, Tba, Tbg, Tdz, TdN, Tb7, Tbh, T64, TfZ, Te5, Ted; + E TfU, Tgz, T90, T9o, T9b, T9n, Tbt, Tbz, Te0, Tee, Tbq, TbA; + { + E T1, TgR, T6, TgQ, Tc, T68, Th, T69; + T1 = ri[0]; + TgR = ii[0]; + { + E T3, T5, T2, T4; + T3 = ri[WS(rs, 32)]; + T5 = ii[WS(rs, 32)]; + T2 = W[62]; + T4 = W[63]; + T6 = FMA(T2, T3, T4 * T5); + TgQ = FNMS(T4, T3, T2 * T5); + } + { + E T9, Tb, T8, Ta; + T9 = ri[WS(rs, 16)]; + Tb = ii[WS(rs, 16)]; + T8 = W[30]; + Ta = W[31]; + Tc = FMA(T8, T9, Ta * Tb); + T68 = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = ri[WS(rs, 48)]; + Tg = ii[WS(rs, 48)]; + Td = W[94]; + Tf = W[95]; + Th = FMA(Td, Te, Tf * Tg); + T69 = FNMS(Tf, Te, Td * Tg); + } + { + E T7, Ti, ThR, ThS; + T7 = T1 + T6; + Ti = Tc + Th; + Tj = T7 + Ti; + TcL = T7 - Ti; + ThR = TgR - TgQ; + ThS = Tc - Th; + ThT = ThR - ThS; + Tin = ThS + ThR; + } + { + E T67, T6a, TgP, TgS; + T67 = T1 - T6; + T6a = T68 - T69; + T6b = T67 - T6a; + Taz = T67 + T6a; + TgP = T68 + T69; + TgS = TgQ + TgR; + TgT = TgP + TgS; + Thn = TgS - TgP; + } + } + { + E To, T6c, Tt, T6d, T6e, T6f, Tz, T6i, TE, T6j, T6h, T6k; + { + E Tl, Tn, Tk, Tm; + Tl = ri[WS(rs, 8)]; + Tn = ii[WS(rs, 8)]; + Tk = W[14]; + Tm = W[15]; + To = FMA(Tk, Tl, Tm * Tn); + T6c = FNMS(Tm, Tl, Tk * Tn); + } + { + E Tq, Ts, Tp, Tr; + Tq = ri[WS(rs, 40)]; + Ts = ii[WS(rs, 40)]; + Tp = W[78]; + Tr = W[79]; + Tt = FMA(Tp, Tq, Tr * Ts); + T6d = FNMS(Tr, Tq, Tp * Ts); + } + T6e = T6c - T6d; + T6f = To - Tt; + { + E Tw, Ty, Tv, Tx; + Tw = ri[WS(rs, 56)]; + Ty = ii[WS(rs, 56)]; + Tv = W[110]; + Tx = W[111]; + Tz = FMA(Tv, Tw, Tx * Ty); + T6i = FNMS(Tx, Tw, Tv * Ty); + } + { + E TB, TD, TA, TC; + TB = ri[WS(rs, 24)]; + TD = ii[WS(rs, 24)]; + TA = W[46]; + TC = W[47]; + TE = FMA(TA, TB, TC * TD); + T6j = FNMS(TC, TB, TA * TD); + } + T6h = Tz - TE; + T6k = T6i - T6j; + { + E Tu, TF, TcM, TcN; + Tu = To + Tt; + TF = Tz + TE; + TG = Tu + TF; + Thm = TF - Tu; + TcM = T6c + T6d; + TcN = T6i + T6j; + TcO = TcM - TcN; + TgO = TcM + TcN; + } + { + E T6g, T6l, TaA, TaB; + T6g = T6e - T6f; + T6l = T6h + T6k; + T6m = KP707106781 * (T6g - T6l); + ThQ = KP707106781 * (T6g + T6l); + TaA = T6f + T6e; + TaB = T6h - T6k; + TaC = KP707106781 * (TaA + TaB); + Tim = KP707106781 * (TaB - TaA); + } + } + { + E TS, TcQ, T6q, T6t, T13, TcR, T6r, T6w, T6s, T6x; + { + E TM, T6o, TR, T6p; + { + E TJ, TL, TI, TK; + TJ = ri[WS(rs, 4)]; + TL = ii[WS(rs, 4)]; + TI = W[6]; + TK = W[7]; + TM = FMA(TI, TJ, TK * TL); + T6o = FNMS(TK, TJ, TI * TL); + } + { + E TO, TQ, TN, TP; + TO = ri[WS(rs, 36)]; + TQ = ii[WS(rs, 36)]; + TN = W[70]; + TP = W[71]; + TR = FMA(TN, TO, TP * TQ); + T6p = FNMS(TP, TO, TN * TQ); + } + TS = TM + TR; + TcQ = T6o + T6p; + T6q = T6o - T6p; + T6t = TM - TR; + } + { + E TX, T6u, T12, T6v; + { + E TU, TW, TT, TV; + TU = ri[WS(rs, 20)]; + TW = ii[WS(rs, 20)]; + TT = W[38]; + TV = W[39]; + TX = FMA(TT, TU, TV * TW); + T6u = FNMS(TV, TU, TT * TW); + } + { + E TZ, T11, TY, T10; + TZ = ri[WS(rs, 52)]; + T11 = ii[WS(rs, 52)]; + TY = W[102]; + T10 = W[103]; + T12 = FMA(TY, TZ, T10 * T11); + T6v = FNMS(T10, TZ, TY * T11); + } + T13 = TX + T12; + TcR = T6u + T6v; + T6r = TX - T12; + T6w = T6u - T6v; + } + T14 = TS + T13; + Tfq = TcQ + TcR; + T6s = T6q + T6r; + T6x = T6t - T6w; + T6y = FNMS(KP923879532, T6x, KP382683432 * T6s); + T9O = FMA(KP923879532, T6s, KP382683432 * T6x); + { + E TaE, TaF, TcS, TcT; + TaE = T6q - T6r; + TaF = T6t + T6w; + TaG = FNMS(KP382683432, TaF, KP923879532 * TaE); + Tc0 = FMA(KP382683432, TaE, KP923879532 * TaF); + TcS = TcQ - TcR; + TcT = TS - T13; + TcU = TcS - TcT; + TeE = TcT + TcS; + } + } + { + E T1f, TcW, T6B, T6E, T1q, TcX, T6C, T6H, T6D, T6I; + { + E T19, T6z, T1e, T6A; + { + E T16, T18, T15, T17; + T16 = ri[WS(rs, 60)]; + T18 = ii[WS(rs, 60)]; + T15 = W[118]; + T17 = W[119]; + T19 = FMA(T15, T16, T17 * T18); + T6z = FNMS(T17, T16, T15 * T18); + } + { + E T1b, T1d, T1a, T1c; + T1b = ri[WS(rs, 28)]; + T1d = ii[WS(rs, 28)]; + T1a = W[54]; + T1c = W[55]; + T1e = FMA(T1a, T1b, T1c * T1d); + T6A = FNMS(T1c, T1b, T1a * T1d); + } + T1f = T19 + T1e; + TcW = T6z + T6A; + T6B = T6z - T6A; + T6E = T19 - T1e; + } + { + E T1k, T6F, T1p, T6G; + { + E T1h, T1j, T1g, T1i; + T1h = ri[WS(rs, 12)]; + T1j = ii[WS(rs, 12)]; + T1g = W[22]; + T1i = W[23]; + T1k = FMA(T1g, T1h, T1i * T1j); + T6F = FNMS(T1i, T1h, T1g * T1j); + } + { + E T1m, T1o, T1l, T1n; + T1m = ri[WS(rs, 44)]; + T1o = ii[WS(rs, 44)]; + T1l = W[86]; + T1n = W[87]; + T1p = FMA(T1l, T1m, T1n * T1o); + T6G = FNMS(T1n, T1m, T1l * T1o); + } + T1q = T1k + T1p; + TcX = T6F + T6G; + T6C = T1k - T1p; + T6H = T6F - T6G; + } + T1r = T1f + T1q; + Tfr = TcW + TcX; + T6D = T6B + T6C; + T6I = T6E - T6H; + T6J = FMA(KP382683432, T6D, KP923879532 * T6I); + T9P = FNMS(KP923879532, T6D, KP382683432 * T6I); + { + E TaH, TaI, TcV, TcY; + TaH = T6B - T6C; + TaI = T6E + T6H; + TaJ = FMA(KP923879532, TaH, KP382683432 * TaI); + Tc1 = FNMS(KP382683432, TaH, KP923879532 * TaI); + TcV = T1f - T1q; + TcY = TcW - TcX; + TcZ = TcV + TcY; + TeF = TcV - TcY; + } + } + { + E T1y, T6M, T1D, T6N, T1E, Td2, T1J, T74, T1O, T75, T1P, Td3, T21, Td8, T6W; + E T6Z, T2c, Td9, T6R, T6U; + { + E T1v, T1x, T1u, T1w; + T1v = ri[WS(rs, 2)]; + T1x = ii[WS(rs, 2)]; + T1u = W[2]; + T1w = W[3]; + T1y = FMA(T1u, T1v, T1w * T1x); + T6M = FNMS(T1w, T1v, T1u * T1x); + } + { + E T1A, T1C, T1z, T1B; + T1A = ri[WS(rs, 34)]; + T1C = ii[WS(rs, 34)]; + T1z = W[66]; + T1B = W[67]; + T1D = FMA(T1z, T1A, T1B * T1C); + T6N = FNMS(T1B, T1A, T1z * T1C); + } + T1E = T1y + T1D; + Td2 = T6M + T6N; + { + E T1G, T1I, T1F, T1H; + T1G = ri[WS(rs, 18)]; + T1I = ii[WS(rs, 18)]; + T1F = W[34]; + T1H = W[35]; + T1J = FMA(T1F, T1G, T1H * T1I); + T74 = FNMS(T1H, T1G, T1F * T1I); + } + { + E T1L, T1N, T1K, T1M; + T1L = ri[WS(rs, 50)]; + T1N = ii[WS(rs, 50)]; + T1K = W[98]; + T1M = W[99]; + T1O = FMA(T1K, T1L, T1M * T1N); + T75 = FNMS(T1M, T1L, T1K * T1N); + } + T1P = T1J + T1O; + Td3 = T74 + T75; + { + E T1V, T6X, T20, T6Y; + { + E T1S, T1U, T1R, T1T; + T1S = ri[WS(rs, 10)]; + T1U = ii[WS(rs, 10)]; + T1R = W[18]; + T1T = W[19]; + T1V = FMA(T1R, T1S, T1T * T1U); + T6X = FNMS(T1T, T1S, T1R * T1U); + } + { + E T1X, T1Z, T1W, T1Y; + T1X = ri[WS(rs, 42)]; + T1Z = ii[WS(rs, 42)]; + T1W = W[82]; + T1Y = W[83]; + T20 = FMA(T1W, T1X, T1Y * T1Z); + T6Y = FNMS(T1Y, T1X, T1W * T1Z); + } + T21 = T1V + T20; + Td8 = T6X + T6Y; + T6W = T1V - T20; + T6Z = T6X - T6Y; + } + { + E T26, T6S, T2b, T6T; + { + E T23, T25, T22, T24; + T23 = ri[WS(rs, 58)]; + T25 = ii[WS(rs, 58)]; + T22 = W[114]; + T24 = W[115]; + T26 = FMA(T22, T23, T24 * T25); + T6S = FNMS(T24, T23, T22 * T25); + } + { + E T28, T2a, T27, T29; + T28 = ri[WS(rs, 26)]; + T2a = ii[WS(rs, 26)]; + T27 = W[50]; + T29 = W[51]; + T2b = FMA(T27, T28, T29 * T2a); + T6T = FNMS(T29, T28, T27 * T2a); + } + T2c = T26 + T2b; + Td9 = T6S + T6T; + T6R = T26 - T2b; + T6U = T6S - T6T; + } + T1Q = T1E + T1P; + T2d = T21 + T2c; + Tfx = T1Q - T2d; + Tfu = Td2 + Td3; + Tfv = Td8 + Td9; + Tfw = Tfu - Tfv; + { + E T6O, T6P, Td7, Tda; + T6O = T6M - T6N; + T6P = T1J - T1O; + T6Q = T6O + T6P; + TaM = T6O - T6P; + Td7 = T1E - T1P; + Tda = Td8 - Td9; + Tdb = Td7 - Tda; + TeJ = Td7 + Tda; + } + { + E T6V, T70, T78, T79; + T6V = T6R - T6U; + T70 = T6W + T6Z; + T71 = KP707106781 * (T6V - T70); + TaQ = KP707106781 * (T70 + T6V); + T78 = T6Z - T6W; + T79 = T6R + T6U; + T7a = KP707106781 * (T78 - T79); + TaN = KP707106781 * (T78 + T79); + } + { + E Td4, Td5, T73, T76; + Td4 = Td2 - Td3; + Td5 = T2c - T21; + Td6 = Td4 - Td5; + TeI = Td4 + Td5; + T73 = T1y - T1D; + T76 = T74 - T75; + T77 = T73 - T76; + TaP = T73 + T76; + } + } + { + E T2j, T7d, T2o, T7e, T2p, Tdd, T2u, T7v, T2z, T7w, T2A, Tde, T2M, Tdj, T7n; + E T7q, T2X, Tdk, T7i, T7l; + { + E T2g, T2i, T2f, T2h; + T2g = ri[WS(rs, 62)]; + T2i = ii[WS(rs, 62)]; + T2f = W[122]; + T2h = W[123]; + T2j = FMA(T2f, T2g, T2h * T2i); + T7d = FNMS(T2h, T2g, T2f * T2i); + } + { + E T2l, T2n, T2k, T2m; + T2l = ri[WS(rs, 30)]; + T2n = ii[WS(rs, 30)]; + T2k = W[58]; + T2m = W[59]; + T2o = FMA(T2k, T2l, T2m * T2n); + T7e = FNMS(T2m, T2l, T2k * T2n); + } + T2p = T2j + T2o; + Tdd = T7d + T7e; + { + E T2r, T2t, T2q, T2s; + T2r = ri[WS(rs, 14)]; + T2t = ii[WS(rs, 14)]; + T2q = W[26]; + T2s = W[27]; + T2u = FMA(T2q, T2r, T2s * T2t); + T7v = FNMS(T2s, T2r, T2q * T2t); + } + { + E T2w, T2y, T2v, T2x; + T2w = ri[WS(rs, 46)]; + T2y = ii[WS(rs, 46)]; + T2v = W[90]; + T2x = W[91]; + T2z = FMA(T2v, T2w, T2x * T2y); + T7w = FNMS(T2x, T2w, T2v * T2y); + } + T2A = T2u + T2z; + Tde = T7v + T7w; + { + E T2G, T7o, T2L, T7p; + { + E T2D, T2F, T2C, T2E; + T2D = ri[WS(rs, 6)]; + T2F = ii[WS(rs, 6)]; + T2C = W[10]; + T2E = W[11]; + T2G = FMA(T2C, T2D, T2E * T2F); + T7o = FNMS(T2E, T2D, T2C * T2F); + } + { + E T2I, T2K, T2H, T2J; + T2I = ri[WS(rs, 38)]; + T2K = ii[WS(rs, 38)]; + T2H = W[74]; + T2J = W[75]; + T2L = FMA(T2H, T2I, T2J * T2K); + T7p = FNMS(T2J, T2I, T2H * T2K); + } + T2M = T2G + T2L; + Tdj = T7o + T7p; + T7n = T2G - T2L; + T7q = T7o - T7p; + } + { + E T2R, T7j, T2W, T7k; + { + E T2O, T2Q, T2N, T2P; + T2O = ri[WS(rs, 54)]; + T2Q = ii[WS(rs, 54)]; + T2N = W[106]; + T2P = W[107]; + T2R = FMA(T2N, T2O, T2P * T2Q); + T7j = FNMS(T2P, T2O, T2N * T2Q); + } + { + E T2T, T2V, T2S, T2U; + T2T = ri[WS(rs, 22)]; + T2V = ii[WS(rs, 22)]; + T2S = W[42]; + T2U = W[43]; + T2W = FMA(T2S, T2T, T2U * T2V); + T7k = FNMS(T2U, T2T, T2S * T2V); + } + T2X = T2R + T2W; + Tdk = T7j + T7k; + T7i = T2R - T2W; + T7l = T7j - T7k; + } + T2B = T2p + T2A; + T2Y = T2M + T2X; + Tfz = T2B - T2Y; + TfA = Tdd + Tde; + TfB = Tdj + Tdk; + TfC = TfA - TfB; + { + E T7f, T7g, Tdi, Tdl; + T7f = T7d - T7e; + T7g = T2u - T2z; + T7h = T7f + T7g; + TaW = T7f - T7g; + Tdi = T2p - T2A; + Tdl = Tdj - Tdk; + Tdm = Tdi - Tdl; + TeM = Tdi + Tdl; + } + { + E T7m, T7r, T7z, T7A; + T7m = T7i - T7l; + T7r = T7n + T7q; + T7s = KP707106781 * (T7m - T7r); + TaU = KP707106781 * (T7r + T7m); + T7z = T7q - T7n; + T7A = T7i + T7l; + T7B = KP707106781 * (T7z - T7A); + TaX = KP707106781 * (T7z + T7A); + } + { + E Tdf, Tdg, T7u, T7x; + Tdf = Tdd - Tde; + Tdg = T2X - T2M; + Tdh = Tdf - Tdg; + TeL = Tdf + Tdg; + T7u = T2j - T2o; + T7x = T7v - T7w; + T7y = T7u - T7x; + TaT = T7u + T7x; + } + } + { + E T4D, T9e, T4I, T9f, T4J, Te8, T4O, T8A, T4T, T8B, T4U, Te9, T56, TdS, T8G; + E T8H, T5h, TdT, T8J, T8M; + { + E T4A, T4C, T4z, T4B; + T4A = ri[WS(rs, 63)]; + T4C = ii[WS(rs, 63)]; + T4z = W[124]; + T4B = W[125]; + T4D = FMA(T4z, T4A, T4B * T4C); + T9e = FNMS(T4B, T4A, T4z * T4C); + } + { + E T4F, T4H, T4E, T4G; + T4F = ri[WS(rs, 31)]; + T4H = ii[WS(rs, 31)]; + T4E = W[60]; + T4G = W[61]; + T4I = FMA(T4E, T4F, T4G * T4H); + T9f = FNMS(T4G, T4F, T4E * T4H); + } + T4J = T4D + T4I; + Te8 = T9e + T9f; + { + E T4L, T4N, T4K, T4M; + T4L = ri[WS(rs, 15)]; + T4N = ii[WS(rs, 15)]; + T4K = W[28]; + T4M = W[29]; + T4O = FMA(T4K, T4L, T4M * T4N); + T8A = FNMS(T4M, T4L, T4K * T4N); + } + { + E T4Q, T4S, T4P, T4R; + T4Q = ri[WS(rs, 47)]; + T4S = ii[WS(rs, 47)]; + T4P = W[92]; + T4R = W[93]; + T4T = FMA(T4P, T4Q, T4R * T4S); + T8B = FNMS(T4R, T4Q, T4P * T4S); + } + T4U = T4O + T4T; + Te9 = T8A + T8B; + { + E T50, T8E, T55, T8F; + { + E T4X, T4Z, T4W, T4Y; + T4X = ri[WS(rs, 7)]; + T4Z = ii[WS(rs, 7)]; + T4W = W[12]; + T4Y = W[13]; + T50 = FMA(T4W, T4X, T4Y * T4Z); + T8E = FNMS(T4Y, T4X, T4W * T4Z); + } + { + E T52, T54, T51, T53; + T52 = ri[WS(rs, 39)]; + T54 = ii[WS(rs, 39)]; + T51 = W[76]; + T53 = W[77]; + T55 = FMA(T51, T52, T53 * T54); + T8F = FNMS(T53, T52, T51 * T54); + } + T56 = T50 + T55; + TdS = T8E + T8F; + T8G = T8E - T8F; + T8H = T50 - T55; + } + { + E T5b, T8K, T5g, T8L; + { + E T58, T5a, T57, T59; + T58 = ri[WS(rs, 55)]; + T5a = ii[WS(rs, 55)]; + T57 = W[108]; + T59 = W[109]; + T5b = FMA(T57, T58, T59 * T5a); + T8K = FNMS(T59, T58, T57 * T5a); + } + { + E T5d, T5f, T5c, T5e; + T5d = ri[WS(rs, 23)]; + T5f = ii[WS(rs, 23)]; + T5c = W[44]; + T5e = W[45]; + T5g = FMA(T5c, T5d, T5e * T5f); + T8L = FNMS(T5e, T5d, T5c * T5f); + } + T5h = T5b + T5g; + TdT = T8K + T8L; + T8J = T5b - T5g; + T8M = T8K - T8L; + } + { + E T4V, T5i, Tea, Teb; + T4V = T4J + T4U; + T5i = T56 + T5h; + T5j = T4V + T5i; + TfR = T4V - T5i; + Tea = Te8 - Te9; + Teb = T5h - T56; + Tec = Tea - Teb; + Tf0 = Tea + Teb; + } + { + E TfW, TfX, T8z, T8C; + TfW = Te8 + Te9; + TfX = TdS + TdT; + TfY = TfW - TfX; + Tgy = TfW + TfX; + T8z = T4D - T4I; + T8C = T8A - T8B; + T8D = T8z - T8C; + Tbl = T8z + T8C; + } + { + E T8I, T8N, T9j, T9k; + T8I = T8G - T8H; + T8N = T8J + T8M; + T8O = KP707106781 * (T8I - T8N); + Tbx = KP707106781 * (T8I + T8N); + T9j = T8J - T8M; + T9k = T8H + T8G; + T9l = KP707106781 * (T9j - T9k); + Tbm = KP707106781 * (T9k + T9j); + } + { + E TdR, TdU, T9g, T9h; + TdR = T4J - T4U; + TdU = TdS - TdT; + TdV = TdR - TdU; + TeX = TdR + TdU; + T9g = T9e - T9f; + T9h = T4O - T4T; + T9i = T9g + T9h; + Tbw = T9g - T9h; + } + } + { + E T36, T7G, T3b, T7H, T3c, Tdq, T3h, T8m, T3m, T8n, T3n, Tdr, T3z, TdI, T7Q; + E T7T, T3K, TdJ, T7L, T7O; + { + E T33, T35, T32, T34; + T33 = ri[WS(rs, 1)]; + T35 = ii[WS(rs, 1)]; + T32 = W[0]; + T34 = W[1]; + T36 = FMA(T32, T33, T34 * T35); + T7G = FNMS(T34, T33, T32 * T35); + } + { + E T38, T3a, T37, T39; + T38 = ri[WS(rs, 33)]; + T3a = ii[WS(rs, 33)]; + T37 = W[64]; + T39 = W[65]; + T3b = FMA(T37, T38, T39 * T3a); + T7H = FNMS(T39, T38, T37 * T3a); + } + T3c = T36 + T3b; + Tdq = T7G + T7H; + { + E T3e, T3g, T3d, T3f; + T3e = ri[WS(rs, 17)]; + T3g = ii[WS(rs, 17)]; + T3d = W[32]; + T3f = W[33]; + T3h = FMA(T3d, T3e, T3f * T3g); + T8m = FNMS(T3f, T3e, T3d * T3g); + } + { + E T3j, T3l, T3i, T3k; + T3j = ri[WS(rs, 49)]; + T3l = ii[WS(rs, 49)]; + T3i = W[96]; + T3k = W[97]; + T3m = FMA(T3i, T3j, T3k * T3l); + T8n = FNMS(T3k, T3j, T3i * T3l); + } + T3n = T3h + T3m; + Tdr = T8m + T8n; + { + E T3t, T7R, T3y, T7S; + { + E T3q, T3s, T3p, T3r; + T3q = ri[WS(rs, 9)]; + T3s = ii[WS(rs, 9)]; + T3p = W[16]; + T3r = W[17]; + T3t = FMA(T3p, T3q, T3r * T3s); + T7R = FNMS(T3r, T3q, T3p * T3s); + } + { + E T3v, T3x, T3u, T3w; + T3v = ri[WS(rs, 41)]; + T3x = ii[WS(rs, 41)]; + T3u = W[80]; + T3w = W[81]; + T3y = FMA(T3u, T3v, T3w * T3x); + T7S = FNMS(T3w, T3v, T3u * T3x); + } + T3z = T3t + T3y; + TdI = T7R + T7S; + T7Q = T3t - T3y; + T7T = T7R - T7S; + } + { + E T3E, T7M, T3J, T7N; + { + E T3B, T3D, T3A, T3C; + T3B = ri[WS(rs, 57)]; + T3D = ii[WS(rs, 57)]; + T3A = W[112]; + T3C = W[113]; + T3E = FMA(T3A, T3B, T3C * T3D); + T7M = FNMS(T3C, T3B, T3A * T3D); + } + { + E T3G, T3I, T3F, T3H; + T3G = ri[WS(rs, 25)]; + T3I = ii[WS(rs, 25)]; + T3F = W[48]; + T3H = W[49]; + T3J = FMA(T3F, T3G, T3H * T3I); + T7N = FNMS(T3H, T3G, T3F * T3I); + } + T3K = T3E + T3J; + TdJ = T7M + T7N; + T7L = T3E - T3J; + T7O = T7M - T7N; + } + { + E T3o, T3L, TdH, TdK; + T3o = T3c + T3n; + T3L = T3z + T3K; + T3M = T3o + T3L; + TfL = T3o - T3L; + TdH = T3c - T3n; + TdK = TdI - TdJ; + TdL = TdH - TdK; + TeQ = TdH + TdK; + } + { + E TfG, TfH, T7I, T7J; + TfG = Tdq + Tdr; + TfH = TdI + TdJ; + TfI = TfG - TfH; + Tgt = TfG + TfH; + T7I = T7G - T7H; + T7J = T3h - T3m; + T7K = T7I + T7J; + Tb2 = T7I - T7J; + } + { + E T7P, T7U, T8q, T8r; + T7P = T7L - T7O; + T7U = T7Q + T7T; + T7V = KP707106781 * (T7P - T7U); + Tbe = KP707106781 * (T7U + T7P); + T8q = T7T - T7Q; + T8r = T7L + T7O; + T8s = KP707106781 * (T8q - T8r); + Tb3 = KP707106781 * (T8q + T8r); + } + { + E Tds, Tdt, T8l, T8o; + Tds = Tdq - Tdr; + Tdt = T3K - T3z; + Tdu = Tds - Tdt; + TeT = Tds + Tdt; + T8l = T36 - T3b; + T8o = T8m - T8n; + T8p = T8l - T8o; + Tbd = T8l + T8o; + } + } + { + E T3X, TdB, T8a, T8d, T4v, Tdx, T80, T85, T48, TdC, T8b, T8g, T4k, Tdw, T7X; + E T84; + { + E T3R, T88, T3W, T89; + { + E T3O, T3Q, T3N, T3P; + T3O = ri[WS(rs, 5)]; + T3Q = ii[WS(rs, 5)]; + T3N = W[8]; + T3P = W[9]; + T3R = FMA(T3N, T3O, T3P * T3Q); + T88 = FNMS(T3P, T3O, T3N * T3Q); + } + { + E T3T, T3V, T3S, T3U; + T3T = ri[WS(rs, 37)]; + T3V = ii[WS(rs, 37)]; + T3S = W[72]; + T3U = W[73]; + T3W = FMA(T3S, T3T, T3U * T3V); + T89 = FNMS(T3U, T3T, T3S * T3V); + } + T3X = T3R + T3W; + TdB = T88 + T89; + T8a = T88 - T89; + T8d = T3R - T3W; + } + { + E T4p, T7Y, T4u, T7Z; + { + E T4m, T4o, T4l, T4n; + T4m = ri[WS(rs, 13)]; + T4o = ii[WS(rs, 13)]; + T4l = W[24]; + T4n = W[25]; + T4p = FMA(T4l, T4m, T4n * T4o); + T7Y = FNMS(T4n, T4m, T4l * T4o); + } + { + E T4r, T4t, T4q, T4s; + T4r = ri[WS(rs, 45)]; + T4t = ii[WS(rs, 45)]; + T4q = W[88]; + T4s = W[89]; + T4u = FMA(T4q, T4r, T4s * T4t); + T7Z = FNMS(T4s, T4r, T4q * T4t); + } + T4v = T4p + T4u; + Tdx = T7Y + T7Z; + T80 = T7Y - T7Z; + T85 = T4p - T4u; + } + { + E T42, T8e, T47, T8f; + { + E T3Z, T41, T3Y, T40; + T3Z = ri[WS(rs, 21)]; + T41 = ii[WS(rs, 21)]; + T3Y = W[40]; + T40 = W[41]; + T42 = FMA(T3Y, T3Z, T40 * T41); + T8e = FNMS(T40, T3Z, T3Y * T41); + } + { + E T44, T46, T43, T45; + T44 = ri[WS(rs, 53)]; + T46 = ii[WS(rs, 53)]; + T43 = W[104]; + T45 = W[105]; + T47 = FMA(T43, T44, T45 * T46); + T8f = FNMS(T45, T44, T43 * T46); + } + T48 = T42 + T47; + TdC = T8e + T8f; + T8b = T42 - T47; + T8g = T8e - T8f; + } + { + E T4e, T82, T4j, T83; + { + E T4b, T4d, T4a, T4c; + T4b = ri[WS(rs, 61)]; + T4d = ii[WS(rs, 61)]; + T4a = W[120]; + T4c = W[121]; + T4e = FMA(T4a, T4b, T4c * T4d); + T82 = FNMS(T4c, T4b, T4a * T4d); + } + { + E T4g, T4i, T4f, T4h; + T4g = ri[WS(rs, 29)]; + T4i = ii[WS(rs, 29)]; + T4f = W[56]; + T4h = W[57]; + T4j = FMA(T4f, T4g, T4h * T4i); + T83 = FNMS(T4h, T4g, T4f * T4i); + } + T4k = T4e + T4j; + Tdw = T82 + T83; + T7X = T4e - T4j; + T84 = T82 - T83; + } + { + E T49, T4w, TdA, TdD; + T49 = T3X + T48; + T4w = T4k + T4v; + T4x = T49 + T4w; + TfJ = T4w - T49; + TdA = T3X - T48; + TdD = TdB - TdC; + TdE = TdA + TdD; + TdM = TdD - TdA; + } + { + E TfM, TfN, T81, T86; + TfM = TdB + TdC; + TfN = Tdw + Tdx; + TfO = TfM - TfN; + Tgu = TfM + TfN; + T81 = T7X - T80; + T86 = T84 + T85; + T87 = FNMS(KP923879532, T86, KP382683432 * T81); + T8v = FMA(KP382683432, T86, KP923879532 * T81); + } + { + E T8c, T8h, Tb8, Tb9; + T8c = T8a + T8b; + T8h = T8d - T8g; + T8i = FMA(KP923879532, T8c, KP382683432 * T8h); + T8u = FNMS(KP923879532, T8h, KP382683432 * T8c); + Tb8 = T8a - T8b; + Tb9 = T8d + T8g; + Tba = FMA(KP382683432, Tb8, KP923879532 * Tb9); + Tbg = FNMS(KP382683432, Tb9, KP923879532 * Tb8); + } + { + E Tdv, Tdy, Tb5, Tb6; + Tdv = T4k - T4v; + Tdy = Tdw - Tdx; + Tdz = Tdv - Tdy; + TdN = Tdv + Tdy; + Tb5 = T7X + T80; + Tb6 = T84 - T85; + Tb7 = FNMS(KP382683432, Tb6, KP923879532 * Tb5); + Tbh = FMA(KP923879532, Tb6, KP382683432 * Tb5); + } + } + { + E T5u, TdW, T8S, T8V, T62, Te3, T94, T99, T5F, TdX, T8T, T8Y, T5R, Te2, T93; + E T96; + { + E T5o, T8Q, T5t, T8R; + { + E T5l, T5n, T5k, T5m; + T5l = ri[WS(rs, 3)]; + T5n = ii[WS(rs, 3)]; + T5k = W[4]; + T5m = W[5]; + T5o = FMA(T5k, T5l, T5m * T5n); + T8Q = FNMS(T5m, T5l, T5k * T5n); + } + { + E T5q, T5s, T5p, T5r; + T5q = ri[WS(rs, 35)]; + T5s = ii[WS(rs, 35)]; + T5p = W[68]; + T5r = W[69]; + T5t = FMA(T5p, T5q, T5r * T5s); + T8R = FNMS(T5r, T5q, T5p * T5s); + } + T5u = T5o + T5t; + TdW = T8Q + T8R; + T8S = T8Q - T8R; + T8V = T5o - T5t; + } + { + E T5W, T97, T61, T98; + { + E T5T, T5V, T5S, T5U; + T5T = ri[WS(rs, 11)]; + T5V = ii[WS(rs, 11)]; + T5S = W[20]; + T5U = W[21]; + T5W = FMA(T5S, T5T, T5U * T5V); + T97 = FNMS(T5U, T5T, T5S * T5V); + } + { + E T5Y, T60, T5X, T5Z; + T5Y = ri[WS(rs, 43)]; + T60 = ii[WS(rs, 43)]; + T5X = W[84]; + T5Z = W[85]; + T61 = FMA(T5X, T5Y, T5Z * T60); + T98 = FNMS(T5Z, T5Y, T5X * T60); + } + T62 = T5W + T61; + Te3 = T97 + T98; + T94 = T5W - T61; + T99 = T97 - T98; + } + { + E T5z, T8W, T5E, T8X; + { + E T5w, T5y, T5v, T5x; + T5w = ri[WS(rs, 19)]; + T5y = ii[WS(rs, 19)]; + T5v = W[36]; + T5x = W[37]; + T5z = FMA(T5v, T5w, T5x * T5y); + T8W = FNMS(T5x, T5w, T5v * T5y); + } + { + E T5B, T5D, T5A, T5C; + T5B = ri[WS(rs, 51)]; + T5D = ii[WS(rs, 51)]; + T5A = W[100]; + T5C = W[101]; + T5E = FMA(T5A, T5B, T5C * T5D); + T8X = FNMS(T5C, T5B, T5A * T5D); + } + T5F = T5z + T5E; + TdX = T8W + T8X; + T8T = T5z - T5E; + T8Y = T8W - T8X; + } + { + E T5L, T91, T5Q, T92; + { + E T5I, T5K, T5H, T5J; + T5I = ri[WS(rs, 59)]; + T5K = ii[WS(rs, 59)]; + T5H = W[116]; + T5J = W[117]; + T5L = FMA(T5H, T5I, T5J * T5K); + T91 = FNMS(T5J, T5I, T5H * T5K); + } + { + E T5N, T5P, T5M, T5O; + T5N = ri[WS(rs, 27)]; + T5P = ii[WS(rs, 27)]; + T5M = W[52]; + T5O = W[53]; + T5Q = FMA(T5M, T5N, T5O * T5P); + T92 = FNMS(T5O, T5N, T5M * T5P); + } + T5R = T5L + T5Q; + Te2 = T91 + T92; + T93 = T91 - T92; + T96 = T5L - T5Q; + } + { + E T5G, T63, Te1, Te4; + T5G = T5u + T5F; + T63 = T5R + T62; + T64 = T5G + T63; + TfZ = T63 - T5G; + Te1 = T5R - T62; + Te4 = Te2 - Te3; + Te5 = Te1 + Te4; + Ted = Te1 - Te4; + } + { + E TfS, TfT, T8U, T8Z; + TfS = TdW + TdX; + TfT = Te2 + Te3; + TfU = TfS - TfT; + Tgz = TfS + TfT; + T8U = T8S + T8T; + T8Z = T8V - T8Y; + T90 = FNMS(KP923879532, T8Z, KP382683432 * T8U); + T9o = FMA(KP923879532, T8U, KP382683432 * T8Z); + } + { + E T95, T9a, Tbr, Tbs; + T95 = T93 + T94; + T9a = T96 - T99; + T9b = FMA(KP382683432, T95, KP923879532 * T9a); + T9n = FNMS(KP923879532, T95, KP382683432 * T9a); + Tbr = T93 - T94; + Tbs = T96 + T99; + Tbt = FMA(KP923879532, Tbr, KP382683432 * Tbs); + Tbz = FNMS(KP382683432, Tbr, KP923879532 * Tbs); + } + { + E TdY, TdZ, Tbo, Tbp; + TdY = TdW - TdX; + TdZ = T5u - T5F; + Te0 = TdY - TdZ; + Tee = TdZ + TdY; + Tbo = T8S - T8T; + Tbp = T8V + T8Y; + Tbq = FNMS(KP382683432, Tbp, KP923879532 * Tbo); + TbA = FMA(KP382683432, Tbo, KP923879532 * Tbp); + } + } + { + E T1t, Tgn, TgK, TgL, TgV, Th1, T30, Th0, T66, TgX, Tgw, TgE, TgB, TgF, Tgq; + E TgM; + { + E TH, T1s, TgI, TgJ; + TH = Tj + TG; + T1s = T14 + T1r; + T1t = TH + T1s; + Tgn = TH - T1s; + TgI = Tgt + Tgu; + TgJ = Tgy + Tgz; + TgK = TgI - TgJ; + TgL = TgI + TgJ; + } + { + E TgN, TgU, T2e, T2Z; + TgN = Tfq + Tfr; + TgU = TgO + TgT; + TgV = TgN + TgU; + Th1 = TgU - TgN; + T2e = T1Q + T2d; + T2Z = T2B + T2Y; + T30 = T2e + T2Z; + Th0 = T2Z - T2e; + } + { + E T4y, T65, Tgs, Tgv; + T4y = T3M + T4x; + T65 = T5j + T64; + T66 = T4y + T65; + TgX = T65 - T4y; + Tgs = T3M - T4x; + Tgv = Tgt - Tgu; + Tgw = Tgs + Tgv; + TgE = Tgv - Tgs; + } + { + E Tgx, TgA, Tgo, Tgp; + Tgx = T5j - T64; + TgA = Tgy - Tgz; + TgB = Tgx - TgA; + TgF = Tgx + TgA; + Tgo = Tfu + Tfv; + Tgp = TfA + TfB; + Tgq = Tgo - Tgp; + TgM = Tgo + Tgp; + } + { + E T31, TgW, TgH, TgY; + T31 = T1t + T30; + ri[WS(rs, 32)] = T31 - T66; + ri[0] = T31 + T66; + TgW = TgM + TgV; + ii[0] = TgL + TgW; + ii[WS(rs, 32)] = TgW - TgL; + TgH = T1t - T30; + ri[WS(rs, 48)] = TgH - TgK; + ri[WS(rs, 16)] = TgH + TgK; + TgY = TgV - TgM; + ii[WS(rs, 16)] = TgX + TgY; + ii[WS(rs, 48)] = TgY - TgX; + } + { + E Tgr, TgC, TgZ, Th2; + Tgr = Tgn + Tgq; + TgC = KP707106781 * (Tgw + TgB); + ri[WS(rs, 40)] = Tgr - TgC; + ri[WS(rs, 8)] = Tgr + TgC; + TgZ = KP707106781 * (TgE + TgF); + Th2 = Th0 + Th1; + ii[WS(rs, 8)] = TgZ + Th2; + ii[WS(rs, 40)] = Th2 - TgZ; + } + { + E TgD, TgG, Th3, Th4; + TgD = Tgn - Tgq; + TgG = KP707106781 * (TgE - TgF); + ri[WS(rs, 56)] = TgD - TgG; + ri[WS(rs, 24)] = TgD + TgG; + Th3 = KP707106781 * (TgB - Tgw); + Th4 = Th1 - Th0; + ii[WS(rs, 24)] = Th3 + Th4; + ii[WS(rs, 56)] = Th4 - Th3; + } + } + { + E Tft, Tg7, Tgh, Tgl, Th9, Thf, TfE, Th6, TfQ, Tg4, Tga, The, Tge, Tgk, Tg1; + E Tg5; + { + E Tfp, Tfs, Tgf, Tgg; + Tfp = Tj - TG; + Tfs = Tfq - Tfr; + Tft = Tfp - Tfs; + Tg7 = Tfp + Tfs; + Tgf = TfR + TfU; + Tgg = TfY + TfZ; + Tgh = FNMS(KP382683432, Tgg, KP923879532 * Tgf); + Tgl = FMA(KP923879532, Tgg, KP382683432 * Tgf); + } + { + E Th7, Th8, Tfy, TfD; + Th7 = T1r - T14; + Th8 = TgT - TgO; + Th9 = Th7 + Th8; + Thf = Th8 - Th7; + Tfy = Tfw - Tfx; + TfD = Tfz + TfC; + TfE = KP707106781 * (Tfy - TfD); + Th6 = KP707106781 * (Tfy + TfD); + } + { + E TfK, TfP, Tg8, Tg9; + TfK = TfI - TfJ; + TfP = TfL - TfO; + TfQ = FMA(KP923879532, TfK, KP382683432 * TfP); + Tg4 = FNMS(KP923879532, TfP, KP382683432 * TfK); + Tg8 = Tfx + Tfw; + Tg9 = Tfz - TfC; + Tga = KP707106781 * (Tg8 + Tg9); + The = KP707106781 * (Tg9 - Tg8); + } + { + E Tgc, Tgd, TfV, Tg0; + Tgc = TfI + TfJ; + Tgd = TfL + TfO; + Tge = FMA(KP382683432, Tgc, KP923879532 * Tgd); + Tgk = FNMS(KP382683432, Tgd, KP923879532 * Tgc); + TfV = TfR - TfU; + Tg0 = TfY - TfZ; + Tg1 = FNMS(KP923879532, Tg0, KP382683432 * TfV); + Tg5 = FMA(KP382683432, Tg0, KP923879532 * TfV); + } + { + E TfF, Tg2, Thd, Thg; + TfF = Tft + TfE; + Tg2 = TfQ + Tg1; + ri[WS(rs, 44)] = TfF - Tg2; + ri[WS(rs, 12)] = TfF + Tg2; + Thd = Tg4 + Tg5; + Thg = The + Thf; + ii[WS(rs, 12)] = Thd + Thg; + ii[WS(rs, 44)] = Thg - Thd; + } + { + E Tg3, Tg6, Thh, Thi; + Tg3 = Tft - TfE; + Tg6 = Tg4 - Tg5; + ri[WS(rs, 60)] = Tg3 - Tg6; + ri[WS(rs, 28)] = Tg3 + Tg6; + Thh = Tg1 - TfQ; + Thi = Thf - The; + ii[WS(rs, 28)] = Thh + Thi; + ii[WS(rs, 60)] = Thi - Thh; + } + { + E Tgb, Tgi, Th5, Tha; + Tgb = Tg7 + Tga; + Tgi = Tge + Tgh; + ri[WS(rs, 36)] = Tgb - Tgi; + ri[WS(rs, 4)] = Tgb + Tgi; + Th5 = Tgk + Tgl; + Tha = Th6 + Th9; + ii[WS(rs, 4)] = Th5 + Tha; + ii[WS(rs, 36)] = Tha - Th5; + } + { + E Tgj, Tgm, Thb, Thc; + Tgj = Tg7 - Tga; + Tgm = Tgk - Tgl; + ri[WS(rs, 52)] = Tgj - Tgm; + ri[WS(rs, 20)] = Tgj + Tgm; + Thb = Tgh - Tge; + Thc = Th9 - Th6; + ii[WS(rs, 20)] = Thb + Thc; + ii[WS(rs, 52)] = Thc - Thb; + } + } + { + E Td1, Ten, Tdo, ThA, ThD, ThJ, Teq, ThI, Teh, TeB, Tel, Tex, TdQ, TeA, Tek; + E Teu; + { + E TcP, Td0, Teo, Tep; + TcP = TcL - TcO; + Td0 = KP707106781 * (TcU - TcZ); + Td1 = TcP - Td0; + Ten = TcP + Td0; + { + E Tdc, Tdn, ThB, ThC; + Tdc = FNMS(KP923879532, Tdb, KP382683432 * Td6); + Tdn = FMA(KP382683432, Tdh, KP923879532 * Tdm); + Tdo = Tdc - Tdn; + ThA = Tdc + Tdn; + ThB = KP707106781 * (TeF - TeE); + ThC = Thn - Thm; + ThD = ThB + ThC; + ThJ = ThC - ThB; + } + Teo = FMA(KP923879532, Td6, KP382683432 * Tdb); + Tep = FNMS(KP923879532, Tdh, KP382683432 * Tdm); + Teq = Teo + Tep; + ThI = Tep - Teo; + { + E Te7, Tev, Teg, Tew, Te6, Tef; + Te6 = KP707106781 * (Te0 - Te5); + Te7 = TdV - Te6; + Tev = TdV + Te6; + Tef = KP707106781 * (Ted - Tee); + Teg = Tec - Tef; + Tew = Tec + Tef; + Teh = FNMS(KP980785280, Teg, KP195090322 * Te7); + TeB = FMA(KP831469612, Tew, KP555570233 * Tev); + Tel = FMA(KP195090322, Teg, KP980785280 * Te7); + Tex = FNMS(KP555570233, Tew, KP831469612 * Tev); + } + { + E TdG, Tes, TdP, Tet, TdF, TdO; + TdF = KP707106781 * (Tdz - TdE); + TdG = Tdu - TdF; + Tes = Tdu + TdF; + TdO = KP707106781 * (TdM - TdN); + TdP = TdL - TdO; + Tet = TdL + TdO; + TdQ = FMA(KP980785280, TdG, KP195090322 * TdP); + TeA = FNMS(KP555570233, Tet, KP831469612 * Tes); + Tek = FNMS(KP980785280, TdP, KP195090322 * TdG); + Teu = FMA(KP555570233, Tes, KP831469612 * Tet); + } + } + { + E Tdp, Tei, ThH, ThK; + Tdp = Td1 + Tdo; + Tei = TdQ + Teh; + ri[WS(rs, 46)] = Tdp - Tei; + ri[WS(rs, 14)] = Tdp + Tei; + ThH = Tek + Tel; + ThK = ThI + ThJ; + ii[WS(rs, 14)] = ThH + ThK; + ii[WS(rs, 46)] = ThK - ThH; + } + { + E Tej, Tem, ThL, ThM; + Tej = Td1 - Tdo; + Tem = Tek - Tel; + ri[WS(rs, 62)] = Tej - Tem; + ri[WS(rs, 30)] = Tej + Tem; + ThL = Teh - TdQ; + ThM = ThJ - ThI; + ii[WS(rs, 30)] = ThL + ThM; + ii[WS(rs, 62)] = ThM - ThL; + } + { + E Ter, Tey, Thz, ThE; + Ter = Ten + Teq; + Tey = Teu + Tex; + ri[WS(rs, 38)] = Ter - Tey; + ri[WS(rs, 6)] = Ter + Tey; + Thz = TeA + TeB; + ThE = ThA + ThD; + ii[WS(rs, 6)] = Thz + ThE; + ii[WS(rs, 38)] = ThE - Thz; + } + { + E Tez, TeC, ThF, ThG; + Tez = Ten - Teq; + TeC = TeA - TeB; + ri[WS(rs, 54)] = Tez - TeC; + ri[WS(rs, 22)] = Tez + TeC; + ThF = Tex - Teu; + ThG = ThD - ThA; + ii[WS(rs, 22)] = ThF + ThG; + ii[WS(rs, 54)] = ThG - ThF; + } + } + { + E TeH, Tf9, TeO, Thk, Thp, Thv, Tfc, Thu, Tf3, Tfn, Tf7, Tfj, TeW, Tfm, Tf6; + E Tfg; + { + E TeD, TeG, Tfa, Tfb; + TeD = TcL + TcO; + TeG = KP707106781 * (TeE + TeF); + TeH = TeD - TeG; + Tf9 = TeD + TeG; + { + E TeK, TeN, Thl, Tho; + TeK = FNMS(KP382683432, TeJ, KP923879532 * TeI); + TeN = FMA(KP923879532, TeL, KP382683432 * TeM); + TeO = TeK - TeN; + Thk = TeK + TeN; + Thl = KP707106781 * (TcU + TcZ); + Tho = Thm + Thn; + Thp = Thl + Tho; + Thv = Tho - Thl; + } + Tfa = FMA(KP382683432, TeI, KP923879532 * TeJ); + Tfb = FNMS(KP382683432, TeL, KP923879532 * TeM); + Tfc = Tfa + Tfb; + Thu = Tfb - Tfa; + { + E TeZ, Tfh, Tf2, Tfi, TeY, Tf1; + TeY = KP707106781 * (Tee + Ted); + TeZ = TeX - TeY; + Tfh = TeX + TeY; + Tf1 = KP707106781 * (Te0 + Te5); + Tf2 = Tf0 - Tf1; + Tfi = Tf0 + Tf1; + Tf3 = FNMS(KP831469612, Tf2, KP555570233 * TeZ); + Tfn = FMA(KP195090322, Tfh, KP980785280 * Tfi); + Tf7 = FMA(KP831469612, TeZ, KP555570233 * Tf2); + Tfj = FNMS(KP195090322, Tfi, KP980785280 * Tfh); + } + { + E TeS, Tfe, TeV, Tff, TeR, TeU; + TeR = KP707106781 * (TdE + Tdz); + TeS = TeQ - TeR; + Tfe = TeQ + TeR; + TeU = KP707106781 * (TdM + TdN); + TeV = TeT - TeU; + Tff = TeT + TeU; + TeW = FMA(KP555570233, TeS, KP831469612 * TeV); + Tfm = FNMS(KP195090322, Tfe, KP980785280 * Tff); + Tf6 = FNMS(KP831469612, TeS, KP555570233 * TeV); + Tfg = FMA(KP980785280, Tfe, KP195090322 * Tff); + } + } + { + E TeP, Tf4, Tht, Thw; + TeP = TeH + TeO; + Tf4 = TeW + Tf3; + ri[WS(rs, 42)] = TeP - Tf4; + ri[WS(rs, 10)] = TeP + Tf4; + Tht = Tf6 + Tf7; + Thw = Thu + Thv; + ii[WS(rs, 10)] = Tht + Thw; + ii[WS(rs, 42)] = Thw - Tht; + } + { + E Tf5, Tf8, Thx, Thy; + Tf5 = TeH - TeO; + Tf8 = Tf6 - Tf7; + ri[WS(rs, 58)] = Tf5 - Tf8; + ri[WS(rs, 26)] = Tf5 + Tf8; + Thx = Tf3 - TeW; + Thy = Thv - Thu; + ii[WS(rs, 26)] = Thx + Thy; + ii[WS(rs, 58)] = Thy - Thx; + } + { + E Tfd, Tfk, Thj, Thq; + Tfd = Tf9 + Tfc; + Tfk = Tfg + Tfj; + ri[WS(rs, 34)] = Tfd - Tfk; + ri[WS(rs, 2)] = Tfd + Tfk; + Thj = Tfm + Tfn; + Thq = Thk + Thp; + ii[WS(rs, 2)] = Thj + Thq; + ii[WS(rs, 34)] = Thq - Thj; + } + { + E Tfl, Tfo, Thr, Ths; + Tfl = Tf9 - Tfc; + Tfo = Tfm - Tfn; + ri[WS(rs, 50)] = Tfl - Tfo; + ri[WS(rs, 18)] = Tfl + Tfo; + Thr = Tfj - Tfg; + Ths = Thp - Thk; + ii[WS(rs, 18)] = Thr + Ths; + ii[WS(rs, 50)] = Ths - Thr; + } + } + { + E T6L, T9x, TiD, TiJ, T7E, TiI, T9A, TiA, T8y, T9K, T9u, T9E, T9r, T9L, T9v; + E T9H; + { + E T6n, T6K, TiB, TiC; + T6n = T6b - T6m; + T6K = T6y - T6J; + T6L = T6n - T6K; + T9x = T6n + T6K; + TiB = T9P - T9O; + TiC = Tin - Tim; + TiD = TiB + TiC; + TiJ = TiC - TiB; + } + { + E T7c, T9y, T7D, T9z; + { + E T72, T7b, T7t, T7C; + T72 = T6Q - T71; + T7b = T77 - T7a; + T7c = FNMS(KP980785280, T7b, KP195090322 * T72); + T9y = FMA(KP980785280, T72, KP195090322 * T7b); + T7t = T7h - T7s; + T7C = T7y - T7B; + T7D = FMA(KP195090322, T7t, KP980785280 * T7C); + T9z = FNMS(KP980785280, T7t, KP195090322 * T7C); + } + T7E = T7c - T7D; + TiI = T9z - T9y; + T9A = T9y + T9z; + TiA = T7c + T7D; + } + { + E T8k, T9C, T8x, T9D; + { + E T7W, T8j, T8t, T8w; + T7W = T7K - T7V; + T8j = T87 - T8i; + T8k = T7W - T8j; + T9C = T7W + T8j; + T8t = T8p - T8s; + T8w = T8u - T8v; + T8x = T8t - T8w; + T9D = T8t + T8w; + } + T8y = FMA(KP995184726, T8k, KP098017140 * T8x); + T9K = FNMS(KP634393284, T9D, KP773010453 * T9C); + T9u = FNMS(KP995184726, T8x, KP098017140 * T8k); + T9E = FMA(KP634393284, T9C, KP773010453 * T9D); + } + { + E T9d, T9F, T9q, T9G; + { + E T8P, T9c, T9m, T9p; + T8P = T8D - T8O; + T9c = T90 - T9b; + T9d = T8P - T9c; + T9F = T8P + T9c; + T9m = T9i - T9l; + T9p = T9n - T9o; + T9q = T9m - T9p; + T9G = T9m + T9p; + } + T9r = FNMS(KP995184726, T9q, KP098017140 * T9d); + T9L = FMA(KP773010453, T9G, KP634393284 * T9F); + T9v = FMA(KP098017140, T9q, KP995184726 * T9d); + T9H = FNMS(KP634393284, T9G, KP773010453 * T9F); + } + { + E T7F, T9s, TiH, TiK; + T7F = T6L + T7E; + T9s = T8y + T9r; + ri[WS(rs, 47)] = T7F - T9s; + ri[WS(rs, 15)] = T7F + T9s; + TiH = T9u + T9v; + TiK = TiI + TiJ; + ii[WS(rs, 15)] = TiH + TiK; + ii[WS(rs, 47)] = TiK - TiH; + } + { + E T9t, T9w, TiL, TiM; + T9t = T6L - T7E; + T9w = T9u - T9v; + ri[WS(rs, 63)] = T9t - T9w; + ri[WS(rs, 31)] = T9t + T9w; + TiL = T9r - T8y; + TiM = TiJ - TiI; + ii[WS(rs, 31)] = TiL + TiM; + ii[WS(rs, 63)] = TiM - TiL; + } + { + E T9B, T9I, Tiz, TiE; + T9B = T9x + T9A; + T9I = T9E + T9H; + ri[WS(rs, 39)] = T9B - T9I; + ri[WS(rs, 7)] = T9B + T9I; + Tiz = T9K + T9L; + TiE = TiA + TiD; + ii[WS(rs, 7)] = Tiz + TiE; + ii[WS(rs, 39)] = TiE - Tiz; + } + { + E T9J, T9M, TiF, TiG; + T9J = T9x - T9A; + T9M = T9K - T9L; + ri[WS(rs, 55)] = T9J - T9M; + ri[WS(rs, 23)] = T9J + T9M; + TiF = T9H - T9E; + TiG = TiD - TiA; + ii[WS(rs, 23)] = TiF + TiG; + ii[WS(rs, 55)] = TiG - TiF; + } + } + { + E TaL, TbJ, Ti9, Tif, Tb0, Tie, TbM, Ti6, Tbk, TbW, TbG, TbQ, TbD, TbX, TbH; + E TbT; + { + E TaD, TaK, Ti7, Ti8; + TaD = Taz - TaC; + TaK = TaG - TaJ; + TaL = TaD - TaK; + TbJ = TaD + TaK; + Ti7 = Tc1 - Tc0; + Ti8 = ThT - ThQ; + Ti9 = Ti7 + Ti8; + Tif = Ti8 - Ti7; + } + { + E TaS, TbK, TaZ, TbL; + { + E TaO, TaR, TaV, TaY; + TaO = TaM - TaN; + TaR = TaP - TaQ; + TaS = FNMS(KP831469612, TaR, KP555570233 * TaO); + TbK = FMA(KP555570233, TaR, KP831469612 * TaO); + TaV = TaT - TaU; + TaY = TaW - TaX; + TaZ = FMA(KP831469612, TaV, KP555570233 * TaY); + TbL = FNMS(KP831469612, TaY, KP555570233 * TaV); + } + Tb0 = TaS - TaZ; + Tie = TbL - TbK; + TbM = TbK + TbL; + Ti6 = TaS + TaZ; + } + { + E Tbc, TbO, Tbj, TbP; + { + E Tb4, Tbb, Tbf, Tbi; + Tb4 = Tb2 - Tb3; + Tbb = Tb7 - Tba; + Tbc = Tb4 - Tbb; + TbO = Tb4 + Tbb; + Tbf = Tbd - Tbe; + Tbi = Tbg - Tbh; + Tbj = Tbf - Tbi; + TbP = Tbf + Tbi; + } + Tbk = FMA(KP956940335, Tbc, KP290284677 * Tbj); + TbW = FNMS(KP471396736, TbP, KP881921264 * TbO); + TbG = FNMS(KP956940335, Tbj, KP290284677 * Tbc); + TbQ = FMA(KP471396736, TbO, KP881921264 * TbP); + } + { + E Tbv, TbR, TbC, TbS; + { + E Tbn, Tbu, Tby, TbB; + Tbn = Tbl - Tbm; + Tbu = Tbq - Tbt; + Tbv = Tbn - Tbu; + TbR = Tbn + Tbu; + Tby = Tbw - Tbx; + TbB = Tbz - TbA; + TbC = Tby - TbB; + TbS = Tby + TbB; + } + TbD = FNMS(KP956940335, TbC, KP290284677 * Tbv); + TbX = FMA(KP881921264, TbS, KP471396736 * TbR); + TbH = FMA(KP290284677, TbC, KP956940335 * Tbv); + TbT = FNMS(KP471396736, TbS, KP881921264 * TbR); + } + { + E Tb1, TbE, Tid, Tig; + Tb1 = TaL + Tb0; + TbE = Tbk + TbD; + ri[WS(rs, 45)] = Tb1 - TbE; + ri[WS(rs, 13)] = Tb1 + TbE; + Tid = TbG + TbH; + Tig = Tie + Tif; + ii[WS(rs, 13)] = Tid + Tig; + ii[WS(rs, 45)] = Tig - Tid; + } + { + E TbF, TbI, Tih, Tii; + TbF = TaL - Tb0; + TbI = TbG - TbH; + ri[WS(rs, 61)] = TbF - TbI; + ri[WS(rs, 29)] = TbF + TbI; + Tih = TbD - Tbk; + Tii = Tif - Tie; + ii[WS(rs, 29)] = Tih + Tii; + ii[WS(rs, 61)] = Tii - Tih; + } + { + E TbN, TbU, Ti5, Tia; + TbN = TbJ + TbM; + TbU = TbQ + TbT; + ri[WS(rs, 37)] = TbN - TbU; + ri[WS(rs, 5)] = TbN + TbU; + Ti5 = TbW + TbX; + Tia = Ti6 + Ti9; + ii[WS(rs, 5)] = Ti5 + Tia; + ii[WS(rs, 37)] = Tia - Ti5; + } + { + E TbV, TbY, Tib, Tic; + TbV = TbJ - TbM; + TbY = TbW - TbX; + ri[WS(rs, 53)] = TbV - TbY; + ri[WS(rs, 21)] = TbV + TbY; + Tib = TbT - TbQ; + Tic = Ti9 - Ti6; + ii[WS(rs, 21)] = Tib + Tic; + ii[WS(rs, 53)] = Tic - Tib; + } + } + { + E Tc3, Tcv, ThV, Ti1, Tca, Ti0, Tcy, ThO, Tci, TcI, Tcs, TcC, Tcp, TcJ, Tct; + E TcF; + { + E TbZ, Tc2, ThP, ThU; + TbZ = Taz + TaC; + Tc2 = Tc0 + Tc1; + Tc3 = TbZ - Tc2; + Tcv = TbZ + Tc2; + ThP = TaG + TaJ; + ThU = ThQ + ThT; + ThV = ThP + ThU; + Ti1 = ThU - ThP; + } + { + E Tc6, Tcw, Tc9, Tcx; + { + E Tc4, Tc5, Tc7, Tc8; + Tc4 = TaM + TaN; + Tc5 = TaP + TaQ; + Tc6 = FNMS(KP195090322, Tc5, KP980785280 * Tc4); + Tcw = FMA(KP980785280, Tc5, KP195090322 * Tc4); + Tc7 = TaT + TaU; + Tc8 = TaW + TaX; + Tc9 = FMA(KP195090322, Tc7, KP980785280 * Tc8); + Tcx = FNMS(KP195090322, Tc8, KP980785280 * Tc7); + } + Tca = Tc6 - Tc9; + Ti0 = Tcx - Tcw; + Tcy = Tcw + Tcx; + ThO = Tc6 + Tc9; + } + { + E Tce, TcA, Tch, TcB; + { + E Tcc, Tcd, Tcf, Tcg; + Tcc = Tbd + Tbe; + Tcd = Tba + Tb7; + Tce = Tcc - Tcd; + TcA = Tcc + Tcd; + Tcf = Tb2 + Tb3; + Tcg = Tbg + Tbh; + Tch = Tcf - Tcg; + TcB = Tcf + Tcg; + } + Tci = FMA(KP634393284, Tce, KP773010453 * Tch); + TcI = FNMS(KP098017140, TcA, KP995184726 * TcB); + Tcs = FNMS(KP773010453, Tce, KP634393284 * Tch); + TcC = FMA(KP995184726, TcA, KP098017140 * TcB); + } + { + E Tcl, TcD, Tco, TcE; + { + E Tcj, Tck, Tcm, Tcn; + Tcj = Tbl + Tbm; + Tck = TbA + Tbz; + Tcl = Tcj - Tck; + TcD = Tcj + Tck; + Tcm = Tbw + Tbx; + Tcn = Tbq + Tbt; + Tco = Tcm - Tcn; + TcE = Tcm + Tcn; + } + Tcp = FNMS(KP773010453, Tco, KP634393284 * Tcl); + TcJ = FMA(KP098017140, TcD, KP995184726 * TcE); + Tct = FMA(KP773010453, Tcl, KP634393284 * Tco); + TcF = FNMS(KP098017140, TcE, KP995184726 * TcD); + } + { + E Tcb, Tcq, ThZ, Ti2; + Tcb = Tc3 + Tca; + Tcq = Tci + Tcp; + ri[WS(rs, 41)] = Tcb - Tcq; + ri[WS(rs, 9)] = Tcb + Tcq; + ThZ = Tcs + Tct; + Ti2 = Ti0 + Ti1; + ii[WS(rs, 9)] = ThZ + Ti2; + ii[WS(rs, 41)] = Ti2 - ThZ; + } + { + E Tcr, Tcu, Ti3, Ti4; + Tcr = Tc3 - Tca; + Tcu = Tcs - Tct; + ri[WS(rs, 57)] = Tcr - Tcu; + ri[WS(rs, 25)] = Tcr + Tcu; + Ti3 = Tcp - Tci; + Ti4 = Ti1 - Ti0; + ii[WS(rs, 25)] = Ti3 + Ti4; + ii[WS(rs, 57)] = Ti4 - Ti3; + } + { + E Tcz, TcG, ThN, ThW; + Tcz = Tcv + Tcy; + TcG = TcC + TcF; + ri[WS(rs, 33)] = Tcz - TcG; + ri[WS(rs, 1)] = Tcz + TcG; + ThN = TcI + TcJ; + ThW = ThO + ThV; + ii[WS(rs, 1)] = ThN + ThW; + ii[WS(rs, 33)] = ThW - ThN; + } + { + E TcH, TcK, ThX, ThY; + TcH = Tcv - Tcy; + TcK = TcI - TcJ; + ri[WS(rs, 49)] = TcH - TcK; + ri[WS(rs, 17)] = TcH + TcK; + ThX = TcF - TcC; + ThY = ThV - ThO; + ii[WS(rs, 17)] = ThX + ThY; + ii[WS(rs, 49)] = ThY - ThX; + } + } + { + E T9R, Taj, Tip, Tiv, T9Y, Tiu, Tam, Tik, Ta6, Taw, Tag, Taq, Tad, Tax, Tah; + E Tat; + { + E T9N, T9Q, Til, Tio; + T9N = T6b + T6m; + T9Q = T9O + T9P; + T9R = T9N - T9Q; + Taj = T9N + T9Q; + Til = T6y + T6J; + Tio = Tim + Tin; + Tip = Til + Tio; + Tiv = Tio - Til; + } + { + E T9U, Tak, T9X, Tal; + { + E T9S, T9T, T9V, T9W; + T9S = T6Q + T71; + T9T = T77 + T7a; + T9U = FNMS(KP555570233, T9T, KP831469612 * T9S); + Tak = FMA(KP555570233, T9S, KP831469612 * T9T); + T9V = T7h + T7s; + T9W = T7y + T7B; + T9X = FMA(KP831469612, T9V, KP555570233 * T9W); + Tal = FNMS(KP555570233, T9V, KP831469612 * T9W); + } + T9Y = T9U - T9X; + Tiu = Tal - Tak; + Tam = Tak + Tal; + Tik = T9U + T9X; + } + { + E Ta2, Tao, Ta5, Tap; + { + E Ta0, Ta1, Ta3, Ta4; + Ta0 = T8p + T8s; + Ta1 = T8i + T87; + Ta2 = Ta0 - Ta1; + Tao = Ta0 + Ta1; + Ta3 = T7K + T7V; + Ta4 = T8u + T8v; + Ta5 = Ta3 - Ta4; + Tap = Ta3 + Ta4; + } + Ta6 = FMA(KP471396736, Ta2, KP881921264 * Ta5); + Taw = FNMS(KP290284677, Tao, KP956940335 * Tap); + Tag = FNMS(KP881921264, Ta2, KP471396736 * Ta5); + Taq = FMA(KP956940335, Tao, KP290284677 * Tap); + } + { + E Ta9, Tar, Tac, Tas; + { + E Ta7, Ta8, Taa, Tab; + Ta7 = T8D + T8O; + Ta8 = T9o + T9n; + Ta9 = Ta7 - Ta8; + Tar = Ta7 + Ta8; + Taa = T9i + T9l; + Tab = T90 + T9b; + Tac = Taa - Tab; + Tas = Taa + Tab; + } + Tad = FNMS(KP881921264, Tac, KP471396736 * Ta9); + Tax = FMA(KP290284677, Tar, KP956940335 * Tas); + Tah = FMA(KP881921264, Ta9, KP471396736 * Tac); + Tat = FNMS(KP290284677, Tas, KP956940335 * Tar); + } + { + E T9Z, Tae, Tit, Tiw; + T9Z = T9R + T9Y; + Tae = Ta6 + Tad; + ri[WS(rs, 43)] = T9Z - Tae; + ri[WS(rs, 11)] = T9Z + Tae; + Tit = Tag + Tah; + Tiw = Tiu + Tiv; + ii[WS(rs, 11)] = Tit + Tiw; + ii[WS(rs, 43)] = Tiw - Tit; + } + { + E Taf, Tai, Tix, Tiy; + Taf = T9R - T9Y; + Tai = Tag - Tah; + ri[WS(rs, 59)] = Taf - Tai; + ri[WS(rs, 27)] = Taf + Tai; + Tix = Tad - Ta6; + Tiy = Tiv - Tiu; + ii[WS(rs, 27)] = Tix + Tiy; + ii[WS(rs, 59)] = Tiy - Tix; + } + { + E Tan, Tau, Tij, Tiq; + Tan = Taj + Tam; + Tau = Taq + Tat; + ri[WS(rs, 35)] = Tan - Tau; + ri[WS(rs, 3)] = Tan + Tau; + Tij = Taw + Tax; + Tiq = Tik + Tip; + ii[WS(rs, 3)] = Tij + Tiq; + ii[WS(rs, 35)] = Tiq - Tij; + } + { + E Tav, Tay, Tir, Tis; + Tav = Taj - Tam; + Tay = Taw - Tax; + ri[WS(rs, 51)] = Tav - Tay; + ri[WS(rs, 19)] = Tav + Tay; + Tir = Tat - Taq; + Tis = Tip - Tik; + ii[WS(rs, 19)] = Tir + Tis; + ii[WS(rs, 51)] = Tis - Tir; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 64}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 64, "t1_64", twinstr, &GENUS, {808, 270, 230, 0}, 0, 0, 0 }; + +void X(codelet_t1_64) (planner *p) { + X(kdft_dit_register) (p, t1_64, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:51 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 7 -name t1_7 -include t.h */ + +/* + * This function contains 72 FP additions, 66 FP multiplications, + * (or, 18 additions, 12 multiplications, 54 fused multiply/add), + * 66 stack variables, 6 constants, and 28 memory accesses + */ +#include "t.h" + +static void t1_7(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP974927912, +0.974927912181823607018131682993931217232785801); + DK(KP801937735, +0.801937735804838252472204639014890102331838324); + DK(KP900968867, +0.900968867902419126236102319507445051165919162); + DK(KP692021471, +0.692021471630095869627814897002069140197260599); + DK(KP554958132, +0.554958132087371191422194871006410481067288862); + DK(KP356895867, +0.356895867892209443894399510021300583399127187); + { + INT m; + for (m = mb, W = W + (mb * 12); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 12, MAKE_VOLATILE_STRIDE(14, rs)) { + E T1c, T19, T1i, T18, T16, T1q, T1t, T1r, T1u, T1s; + { + E T1, TR, T1h, Te, Tt, Tw, T1a, TM, T1g, Tr, Tu, TS, Tz, TC, Ty; + E Tv, TB; + T1 = ri[0]; + T1c = ii[0]; + { + E T9, Tc, TP, Ta, Tb, TO, T7; + { + E T3, T6, T8, TN, T4, T2, T5; + T3 = ri[WS(rs, 1)]; + T6 = ii[WS(rs, 1)]; + T2 = W[0]; + T9 = ri[WS(rs, 6)]; + Tc = ii[WS(rs, 6)]; + T8 = W[10]; + TN = T2 * T6; + T4 = T2 * T3; + T5 = W[1]; + TP = T8 * Tc; + Ta = T8 * T9; + Tb = W[11]; + TO = FNMS(T5, T3, TN); + T7 = FMA(T5, T6, T4); + } + { + E Tg, Tj, Th, TI, Tm, Tp, Tl, Ti, To, TQ, Td, Tf; + Tg = ri[WS(rs, 2)]; + TQ = FNMS(Tb, T9, TP); + Td = FMA(Tb, Tc, Ta); + Tj = ii[WS(rs, 2)]; + Tf = W[2]; + T19 = TO + TQ; + TR = TO - TQ; + T1h = Td - T7; + Te = T7 + Td; + Th = Tf * Tg; + TI = Tf * Tj; + Tm = ri[WS(rs, 5)]; + Tp = ii[WS(rs, 5)]; + Tl = W[8]; + Ti = W[3]; + To = W[9]; + { + E TJ, Tk, TL, Tq, TK, Tn, Ts; + Tt = ri[WS(rs, 3)]; + TK = Tl * Tp; + Tn = Tl * Tm; + TJ = FNMS(Ti, Tg, TI); + Tk = FMA(Ti, Tj, Th); + TL = FNMS(To, Tm, TK); + Tq = FMA(To, Tp, Tn); + Tw = ii[WS(rs, 3)]; + Ts = W[4]; + T1a = TJ + TL; + TM = TJ - TL; + T1g = Tq - Tk; + Tr = Tk + Tq; + Tu = Ts * Tt; + TS = Ts * Tw; + } + Tz = ri[WS(rs, 4)]; + TC = ii[WS(rs, 4)]; + Ty = W[6]; + Tv = W[5]; + TB = W[7]; + } + } + { + E TF, TT, Tx, TV, TD, T1d, TU, TA; + TF = FNMS(KP356895867, Tr, Te); + TU = Ty * TC; + TA = Ty * Tz; + TT = FNMS(Tv, Tt, TS); + Tx = FMA(Tv, Tw, Tu); + TV = FNMS(TB, Tz, TU); + TD = FMA(TB, TC, TA); + T1d = FNMS(KP356895867, T1a, T19); + { + E T1b, T15, T17, TW; + T17 = FNMS(KP554958132, TR, TM); + T1b = TT + TV; + TW = TT - TV; + { + E TE, T1l, T1e, T12; + T1i = TD - Tx; + TE = Tx + TD; + T1l = FNMS(KP356895867, T19, T1b); + T1e = FNMS(KP692021471, T1d, T1b); + ii[0] = T19 + T1a + T1b + T1c; + T12 = FMA(KP554958132, TM, TW); + { + E TX, T1o, T1j, T14; + TX = FMA(KP554958132, TW, TR); + T1o = FMA(KP554958132, T1g, T1i); + T1j = FMA(KP554958132, T1i, T1h); + T14 = FNMS(KP356895867, TE, Tr); + { + E TZ, TG, T1m, T1f; + TZ = FNMS(KP356895867, Te, TE); + TG = FNMS(KP692021471, TF, TE); + ri[0] = T1 + Te + Tr + TE; + T1m = FNMS(KP692021471, T1l, T1a); + T1f = FNMS(KP900968867, T1e, T1c); + { + E T13, TY, T1p, T1k; + T13 = FNMS(KP801937735, T12, TR); + TY = FMA(KP801937735, TX, TM); + T1p = FNMS(KP801937735, T1o, T1h); + T1k = FMA(KP801937735, T1j, T1g); + T15 = FNMS(KP692021471, T14, Te); + { + E T10, TH, T1n, T11; + T10 = FNMS(KP692021471, TZ, Tr); + TH = FNMS(KP900968867, TG, T1); + T1n = FNMS(KP900968867, T1m, T1c); + ii[WS(rs, 6)] = FNMS(KP974927912, T1k, T1f); + ii[WS(rs, 1)] = FMA(KP974927912, T1k, T1f); + T11 = FNMS(KP900968867, T10, T1); + ri[WS(rs, 1)] = FMA(KP974927912, TY, TH); + ri[WS(rs, 6)] = FNMS(KP974927912, TY, TH); + ii[WS(rs, 5)] = FNMS(KP974927912, T1p, T1n); + ii[WS(rs, 2)] = FMA(KP974927912, T1p, T1n); + ri[WS(rs, 2)] = FMA(KP974927912, T13, T11); + ri[WS(rs, 5)] = FNMS(KP974927912, T13, T11); + T18 = FNMS(KP801937735, T17, TW); + } + } + } + } + } + T16 = FNMS(KP900968867, T15, T1); + T1q = FNMS(KP356895867, T1b, T1a); + T1t = FNMS(KP554958132, T1h, T1g); + } + } + } + ri[WS(rs, 3)] = FMA(KP974927912, T18, T16); + ri[WS(rs, 4)] = FNMS(KP974927912, T18, T16); + T1r = FNMS(KP692021471, T1q, T19); + T1u = FNMS(KP801937735, T1t, T1i); + T1s = FNMS(KP900968867, T1r, T1c); + ii[WS(rs, 4)] = FNMS(KP974927912, T1u, T1s); + ii[WS(rs, 3)] = FMA(KP974927912, T1u, T1s); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 7}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 7, "t1_7", twinstr, &GENUS, {18, 12, 54, 0}, 0, 0, 0 }; + +void X(codelet_t1_7) (planner *p) { + X(kdft_dit_register) (p, t1_7, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 7 -name t1_7 -include t.h */ + +/* + * This function contains 72 FP additions, 60 FP multiplications, + * (or, 36 additions, 24 multiplications, 36 fused multiply/add), + * 29 stack variables, 6 constants, and 28 memory accesses + */ +#include "t.h" + +static void t1_7(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP222520933, +0.222520933956314404288902564496794759466355569); + DK(KP900968867, +0.900968867902419126236102319507445051165919162); + DK(KP623489801, +0.623489801858733530525004884004239810632274731); + DK(KP433883739, +0.433883739117558120475768332848358754609990728); + DK(KP781831482, +0.781831482468029808708444526674057750232334519); + DK(KP974927912, +0.974927912181823607018131682993931217232785801); + { + INT m; + for (m = mb, W = W + (mb * 12); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 12, MAKE_VOLATILE_STRIDE(14, rs)) { + E T1, TR, Tc, TS, TC, TO, Tn, TT, TI, TP, Ty, TU, TF, TQ; + T1 = ri[0]; + TR = ii[0]; + { + E T6, TA, Tb, TB; + { + E T3, T5, T2, T4; + T3 = ri[WS(rs, 1)]; + T5 = ii[WS(rs, 1)]; + T2 = W[0]; + T4 = W[1]; + T6 = FMA(T2, T3, T4 * T5); + TA = FNMS(T4, T3, T2 * T5); + } + { + E T8, Ta, T7, T9; + T8 = ri[WS(rs, 6)]; + Ta = ii[WS(rs, 6)]; + T7 = W[10]; + T9 = W[11]; + Tb = FMA(T7, T8, T9 * Ta); + TB = FNMS(T9, T8, T7 * Ta); + } + Tc = T6 + Tb; + TS = Tb - T6; + TC = TA - TB; + TO = TA + TB; + } + { + E Th, TG, Tm, TH; + { + E Te, Tg, Td, Tf; + Te = ri[WS(rs, 2)]; + Tg = ii[WS(rs, 2)]; + Td = W[2]; + Tf = W[3]; + Th = FMA(Td, Te, Tf * Tg); + TG = FNMS(Tf, Te, Td * Tg); + } + { + E Tj, Tl, Ti, Tk; + Tj = ri[WS(rs, 5)]; + Tl = ii[WS(rs, 5)]; + Ti = W[8]; + Tk = W[9]; + Tm = FMA(Ti, Tj, Tk * Tl); + TH = FNMS(Tk, Tj, Ti * Tl); + } + Tn = Th + Tm; + TT = Tm - Th; + TI = TG - TH; + TP = TG + TH; + } + { + E Ts, TD, Tx, TE; + { + E Tp, Tr, To, Tq; + Tp = ri[WS(rs, 3)]; + Tr = ii[WS(rs, 3)]; + To = W[4]; + Tq = W[5]; + Ts = FMA(To, Tp, Tq * Tr); + TD = FNMS(Tq, Tp, To * Tr); + } + { + E Tu, Tw, Tt, Tv; + Tu = ri[WS(rs, 4)]; + Tw = ii[WS(rs, 4)]; + Tt = W[6]; + Tv = W[7]; + Tx = FMA(Tt, Tu, Tv * Tw); + TE = FNMS(Tv, Tu, Tt * Tw); + } + Ty = Ts + Tx; + TU = Tx - Ts; + TF = TD - TE; + TQ = TD + TE; + } + ri[0] = T1 + Tc + Tn + Ty; + ii[0] = TO + TP + TQ + TR; + { + E TJ, Tz, TX, TY; + TJ = FNMS(KP781831482, TF, KP974927912 * TC) - (KP433883739 * TI); + Tz = FMA(KP623489801, Ty, T1) + FNMA(KP900968867, Tn, KP222520933 * Tc); + ri[WS(rs, 5)] = Tz - TJ; + ri[WS(rs, 2)] = Tz + TJ; + TX = FNMS(KP781831482, TU, KP974927912 * TS) - (KP433883739 * TT); + TY = FMA(KP623489801, TQ, TR) + FNMA(KP900968867, TP, KP222520933 * TO); + ii[WS(rs, 2)] = TX + TY; + ii[WS(rs, 5)] = TY - TX; + } + { + E TL, TK, TV, TW; + TL = FMA(KP781831482, TC, KP974927912 * TI) + (KP433883739 * TF); + TK = FMA(KP623489801, Tc, T1) + FNMA(KP900968867, Ty, KP222520933 * Tn); + ri[WS(rs, 6)] = TK - TL; + ri[WS(rs, 1)] = TK + TL; + TV = FMA(KP781831482, TS, KP974927912 * TT) + (KP433883739 * TU); + TW = FMA(KP623489801, TO, TR) + FNMA(KP900968867, TQ, KP222520933 * TP); + ii[WS(rs, 1)] = TV + TW; + ii[WS(rs, 6)] = TW - TV; + } + { + E TN, TM, TZ, T10; + TN = FMA(KP433883739, TC, KP974927912 * TF) - (KP781831482 * TI); + TM = FMA(KP623489801, Tn, T1) + FNMA(KP222520933, Ty, KP900968867 * Tc); + ri[WS(rs, 4)] = TM - TN; + ri[WS(rs, 3)] = TM + TN; + TZ = FMA(KP433883739, TS, KP974927912 * TU) - (KP781831482 * TT); + T10 = FMA(KP623489801, TP, TR) + FNMA(KP222520933, TQ, KP900968867 * TO); + ii[WS(rs, 3)] = TZ + T10; + ii[WS(rs, 4)] = T10 - TZ; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 7}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 7, "t1_7", twinstr, &GENUS, {36, 24, 36, 0}, 0, 0, 0 }; + +void X(codelet_t1_7) (planner *p) { + X(kdft_dit_register) (p, t1_7, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,370 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:51 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 8 -name t1_8 -include t.h */ + +/* + * This function contains 66 FP additions, 36 FP multiplications, + * (or, 44 additions, 14 multiplications, 22 fused multiply/add), + * 61 stack variables, 1 constants, and 32 memory accesses + */ +#include "t.h" + +static void t1_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 14); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 14, MAKE_VOLATILE_STRIDE(16, rs)) { + E T1g, T1f, T1e, Tm, T1q, T1o, T1p, TN, T1h, T1i; + { + E T1, T1m, T1l, T7, TS, Tk, TQ, Te, To, Tr, T17, TM, T12, Tu, TW; + E Tp, Tx, Tt, Tq, Tw; + { + E T3, T6, T2, T5; + T1 = ri[0]; + T1m = ii[0]; + T3 = ri[WS(rs, 4)]; + T6 = ii[WS(rs, 4)]; + T2 = W[6]; + T5 = W[7]; + { + E Ta, Td, T9, Tc; + { + E Tg, Tj, Ti, TR, Th, T1k, T4, Tf; + Tg = ri[WS(rs, 6)]; + Tj = ii[WS(rs, 6)]; + T1k = T2 * T6; + T4 = T2 * T3; + Tf = W[10]; + Ti = W[11]; + T1l = FNMS(T5, T3, T1k); + T7 = FMA(T5, T6, T4); + TR = Tf * Tj; + Th = Tf * Tg; + Ta = ri[WS(rs, 2)]; + Td = ii[WS(rs, 2)]; + TS = FNMS(Ti, Tg, TR); + Tk = FMA(Ti, Tj, Th); + T9 = W[2]; + Tc = W[3]; + } + { + E TB, TE, TH, T13, TC, TK, TG, TD, TJ, TP, Tb, TA, Tn; + TB = ri[WS(rs, 7)]; + TE = ii[WS(rs, 7)]; + TP = T9 * Td; + Tb = T9 * Ta; + TA = W[12]; + TH = ri[WS(rs, 3)]; + TQ = FNMS(Tc, Ta, TP); + Te = FMA(Tc, Td, Tb); + T13 = TA * TE; + TC = TA * TB; + TK = ii[WS(rs, 3)]; + TG = W[4]; + TD = W[13]; + TJ = W[5]; + { + E T14, TF, T16, TL, T15, TI; + To = ri[WS(rs, 1)]; + T15 = TG * TK; + TI = TG * TH; + T14 = FNMS(TD, TB, T13); + TF = FMA(TD, TE, TC); + T16 = FNMS(TJ, TH, T15); + TL = FMA(TJ, TK, TI); + Tr = ii[WS(rs, 1)]; + Tn = W[0]; + T17 = T14 - T16; + T1g = T14 + T16; + TM = TF + TL; + T12 = TF - TL; + } + Tu = ri[WS(rs, 5)]; + TW = Tn * Tr; + Tp = Tn * To; + Tx = ii[WS(rs, 5)]; + Tt = W[8]; + Tq = W[1]; + Tw = W[9]; + } + } + } + { + E T8, T1j, T1n, Tz, T1a, TU, Tl, T1b, T1c, T1v, T1t, T1w, T19, T1u, T1d; + { + E T1r, T10, TV, T1s, T11, T18; + { + E TO, TX, Ts, TZ, Ty, TT, TY, Tv; + T8 = T1 + T7; + TO = T1 - T7; + TY = Tt * Tx; + Tv = Tt * Tu; + TX = FNMS(Tq, To, TW); + Ts = FMA(Tq, Tr, Tp); + TZ = FNMS(Tw, Tu, TY); + Ty = FMA(Tw, Tx, Tv); + TT = TQ - TS; + T1j = TQ + TS; + T1n = T1l + T1m; + T1r = T1m - T1l; + T10 = TX - TZ; + T1f = TX + TZ; + Tz = Ts + Ty; + TV = Ts - Ty; + T1a = TO - TT; + TU = TO + TT; + T1s = Te - Tk; + Tl = Te + Tk; + } + T1b = T10 - TV; + T11 = TV + T10; + T18 = T12 - T17; + T1c = T12 + T17; + T1v = T1s + T1r; + T1t = T1r - T1s; + T1w = T18 - T11; + T19 = T11 + T18; + } + ii[WS(rs, 3)] = FMA(KP707106781, T1w, T1v); + ii[WS(rs, 7)] = FNMS(KP707106781, T1w, T1v); + ri[WS(rs, 1)] = FMA(KP707106781, T19, TU); + ri[WS(rs, 5)] = FNMS(KP707106781, T19, TU); + T1u = T1b + T1c; + T1d = T1b - T1c; + ii[WS(rs, 1)] = FMA(KP707106781, T1u, T1t); + ii[WS(rs, 5)] = FNMS(KP707106781, T1u, T1t); + ri[WS(rs, 3)] = FMA(KP707106781, T1d, T1a); + ri[WS(rs, 7)] = FNMS(KP707106781, T1d, T1a); + T1e = T8 - Tl; + Tm = T8 + Tl; + T1q = T1n - T1j; + T1o = T1j + T1n; + T1p = TM - Tz; + TN = Tz + TM; + } + } + ii[WS(rs, 2)] = T1p + T1q; + ii[WS(rs, 6)] = T1q - T1p; + ri[0] = Tm + TN; + ri[WS(rs, 4)] = Tm - TN; + T1h = T1f - T1g; + T1i = T1f + T1g; + ii[0] = T1i + T1o; + ii[WS(rs, 4)] = T1o - T1i; + ri[WS(rs, 2)] = T1e + T1h; + ri[WS(rs, 6)] = T1e - T1h; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 8}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 8, "t1_8", twinstr, &GENUS, {44, 14, 22, 0}, 0, 0, 0 }; + +void X(codelet_t1_8) (planner *p) { + X(kdft_dit_register) (p, t1_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 8 -name t1_8 -include t.h */ + +/* + * This function contains 66 FP additions, 32 FP multiplications, + * (or, 52 additions, 18 multiplications, 14 fused multiply/add), + * 28 stack variables, 1 constants, and 32 memory accesses + */ +#include "t.h" + +static void t1_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 14); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 14, MAKE_VOLATILE_STRIDE(16, rs)) { + E T7, T1e, TH, T19, TF, T13, TR, TU, Ti, T1f, TK, T16, Tu, T12, TM; + E TP; + { + E T1, T18, T6, T17; + T1 = ri[0]; + T18 = ii[0]; + { + E T3, T5, T2, T4; + T3 = ri[WS(rs, 4)]; + T5 = ii[WS(rs, 4)]; + T2 = W[6]; + T4 = W[7]; + T6 = FMA(T2, T3, T4 * T5); + T17 = FNMS(T4, T3, T2 * T5); + } + T7 = T1 + T6; + T1e = T18 - T17; + TH = T1 - T6; + T19 = T17 + T18; + } + { + E Tz, TS, TE, TT; + { + E Tw, Ty, Tv, Tx; + Tw = ri[WS(rs, 7)]; + Ty = ii[WS(rs, 7)]; + Tv = W[12]; + Tx = W[13]; + Tz = FMA(Tv, Tw, Tx * Ty); + TS = FNMS(Tx, Tw, Tv * Ty); + } + { + E TB, TD, TA, TC; + TB = ri[WS(rs, 3)]; + TD = ii[WS(rs, 3)]; + TA = W[4]; + TC = W[5]; + TE = FMA(TA, TB, TC * TD); + TT = FNMS(TC, TB, TA * TD); + } + TF = Tz + TE; + T13 = TS + TT; + TR = Tz - TE; + TU = TS - TT; + } + { + E Tc, TI, Th, TJ; + { + E T9, Tb, T8, Ta; + T9 = ri[WS(rs, 2)]; + Tb = ii[WS(rs, 2)]; + T8 = W[2]; + Ta = W[3]; + Tc = FMA(T8, T9, Ta * Tb); + TI = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = ri[WS(rs, 6)]; + Tg = ii[WS(rs, 6)]; + Td = W[10]; + Tf = W[11]; + Th = FMA(Td, Te, Tf * Tg); + TJ = FNMS(Tf, Te, Td * Tg); + } + Ti = Tc + Th; + T1f = Tc - Th; + TK = TI - TJ; + T16 = TI + TJ; + } + { + E To, TN, Tt, TO; + { + E Tl, Tn, Tk, Tm; + Tl = ri[WS(rs, 1)]; + Tn = ii[WS(rs, 1)]; + Tk = W[0]; + Tm = W[1]; + To = FMA(Tk, Tl, Tm * Tn); + TN = FNMS(Tm, Tl, Tk * Tn); + } + { + E Tq, Ts, Tp, Tr; + Tq = ri[WS(rs, 5)]; + Ts = ii[WS(rs, 5)]; + Tp = W[8]; + Tr = W[9]; + Tt = FMA(Tp, Tq, Tr * Ts); + TO = FNMS(Tr, Tq, Tp * Ts); + } + Tu = To + Tt; + T12 = TN + TO; + TM = To - Tt; + TP = TN - TO; + } + { + E Tj, TG, T1b, T1c; + Tj = T7 + Ti; + TG = Tu + TF; + ri[WS(rs, 4)] = Tj - TG; + ri[0] = Tj + TG; + { + E T15, T1a, T11, T14; + T15 = T12 + T13; + T1a = T16 + T19; + ii[0] = T15 + T1a; + ii[WS(rs, 4)] = T1a - T15; + T11 = T7 - Ti; + T14 = T12 - T13; + ri[WS(rs, 6)] = T11 - T14; + ri[WS(rs, 2)] = T11 + T14; + } + T1b = TF - Tu; + T1c = T19 - T16; + ii[WS(rs, 2)] = T1b + T1c; + ii[WS(rs, 6)] = T1c - T1b; + { + E TX, T1g, T10, T1d, TY, TZ; + TX = TH - TK; + T1g = T1e - T1f; + TY = TP - TM; + TZ = TR + TU; + T10 = KP707106781 * (TY - TZ); + T1d = KP707106781 * (TY + TZ); + ri[WS(rs, 7)] = TX - T10; + ii[WS(rs, 5)] = T1g - T1d; + ri[WS(rs, 3)] = TX + T10; + ii[WS(rs, 1)] = T1d + T1g; + } + { + E TL, T1i, TW, T1h, TQ, TV; + TL = TH + TK; + T1i = T1f + T1e; + TQ = TM + TP; + TV = TR - TU; + TW = KP707106781 * (TQ + TV); + T1h = KP707106781 * (TV - TQ); + ri[WS(rs, 5)] = TL - TW; + ii[WS(rs, 7)] = T1i - T1h; + ri[WS(rs, 1)] = TL + TW; + ii[WS(rs, 3)] = T1h + T1i; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 8}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 8, "t1_8", twinstr, &GENUS, {52, 18, 14, 0}, 0, 0, 0 }; + +void X(codelet_t1_8) (planner *p) { + X(kdft_dit_register) (p, t1_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t1_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,481 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:51 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 9 -name t1_9 -include t.h */ + +/* + * This function contains 96 FP additions, 88 FP multiplications, + * (or, 24 additions, 16 multiplications, 72 fused multiply/add), + * 72 stack variables, 10 constants, and 36 memory accesses + */ +#include "t.h" + +static void t1_9(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP954188894, +0.954188894138671133499268364187245676532219158); + DK(KP852868531, +0.852868531952443209628250963940074071936020296); + DK(KP363970234, +0.363970234266202361351047882776834043890471784); + DK(KP492403876, +0.492403876506104029683371512294761506835321626); + DK(KP984807753, +0.984807753012208059366743024589523013670643252); + DK(KP777861913, +0.777861913430206160028177977318626690410586096); + DK(KP839099631, +0.839099631177280011763127298123181364687434283); + DK(KP176326980, +0.176326980708464973471090386868618986121633062); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + (mb * 16); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) { + E T1K, T24, T1H, T23; + { + E T1, T1R, T1Q, T10, T1W, Te, TB, T1l, T1r, T1q, T1M, TE, T1g, Tz, T12; + E TC, TH, TK, T17, TR, TG, TJ, TD; + T1 = ri[0]; + T1R = ii[0]; + { + E T9, Tc, TY, Ta, Tb, TX, T7; + { + E T3, T6, T8, TW, T4, T2, T5; + T3 = ri[WS(rs, 3)]; + T6 = ii[WS(rs, 3)]; + T2 = W[4]; + T9 = ri[WS(rs, 6)]; + Tc = ii[WS(rs, 6)]; + T8 = W[10]; + TW = T2 * T6; + T4 = T2 * T3; + T5 = W[5]; + TY = T8 * Tc; + Ta = T8 * T9; + Tb = W[11]; + TX = FNMS(T5, T3, TW); + T7 = FMA(T5, T6, T4); + } + { + E Th, Tk, Ti, T1n, Tn, Tq, Tp, T1i, Tx, T1j, To, Tj, TZ, Td, Tg; + E TA, Tl, Ty; + Th = ri[WS(rs, 1)]; + TZ = FNMS(Tb, T9, TY); + Td = FMA(Tb, Tc, Ta); + Tk = ii[WS(rs, 1)]; + Tg = W[0]; + T1Q = TX + TZ; + T10 = TX - TZ; + T1W = Td - T7; + Te = T7 + Td; + Ti = Tg * Th; + T1n = Tg * Tk; + { + E Tt, Tw, Ts, Tv, T1h, Tu, Tm; + Tt = ri[WS(rs, 7)]; + Tw = ii[WS(rs, 7)]; + Ts = W[12]; + Tv = W[13]; + Tn = ri[WS(rs, 4)]; + Tq = ii[WS(rs, 4)]; + T1h = Ts * Tw; + Tu = Ts * Tt; + Tm = W[6]; + Tp = W[7]; + T1i = FNMS(Tv, Tt, T1h); + Tx = FMA(Tv, Tw, Tu); + T1j = Tm * Tq; + To = Tm * Tn; + } + Tj = W[1]; + TB = ri[WS(rs, 2)]; + { + E T1k, Tr, T1o, T1p; + T1k = FNMS(Tp, Tn, T1j); + Tr = FMA(Tp, Tq, To); + T1o = FNMS(Tj, Th, T1n); + Tl = FMA(Tj, Tk, Ti); + T1p = T1k + T1i; + T1l = T1i - T1k; + Ty = Tr + Tx; + T1r = Tr - Tx; + T1q = FNMS(KP500000000, T1p, T1o); + T1M = T1o + T1p; + TE = ii[WS(rs, 2)]; + } + T1g = FNMS(KP500000000, Ty, Tl); + Tz = Tl + Ty; + TA = W[2]; + { + E TN, TQ, TP, T16, TO, TM; + TN = ri[WS(rs, 8)]; + TQ = ii[WS(rs, 8)]; + TM = W[14]; + T12 = TA * TE; + TC = TA * TB; + TP = W[15]; + T16 = TM * TQ; + TO = TM * TN; + TH = ri[WS(rs, 5)]; + TK = ii[WS(rs, 5)]; + T17 = FNMS(TP, TN, T16); + TR = FMA(TP, TQ, TO); + TG = W[8]; + TJ = W[9]; + } + TD = W[3]; + } + } + { + E TV, Tf, T1S, T1V, T1d, T1a, T19, T1N, TT, T1c; + { + E T13, TF, T15, TL, T14, TI, TS, T18; + TV = FNMS(KP500000000, Te, T1); + Tf = T1 + Te; + T14 = TG * TK; + TI = TG * TH; + T13 = FNMS(TD, TB, T12); + TF = FMA(TD, TE, TC); + T15 = FNMS(TJ, TH, T14); + TL = FMA(TJ, TK, TI); + T1S = T1Q + T1R; + T1V = FNMS(KP500000000, T1Q, T1R); + T18 = T15 + T17; + T1d = T15 - T17; + TS = TL + TR; + T1a = TR - TL; + T19 = FNMS(KP500000000, T18, T13); + T1N = T13 + T18; + TT = TF + TS; + T1c = FNMS(KP500000000, TS, TF); + } + { + E T11, T1z, T1E, T1D, T21, T1X, T1I, T1C, T1Y, T1y, T20, T1u, T1U, TU; + T1U = TT - Tz; + TU = Tz + TT; + { + E T1P, T1O, T1L, T1T; + T1P = T1M + T1N; + T1O = T1M - T1N; + T11 = FMA(KP866025403, T10, TV); + T1z = FNMS(KP866025403, T10, TV); + T1L = FNMS(KP500000000, TU, Tf); + ri[0] = Tf + TU; + T1T = FNMS(KP500000000, T1P, T1S); + ii[0] = T1P + T1S; + ri[WS(rs, 3)] = FMA(KP866025403, T1O, T1L); + ri[WS(rs, 6)] = FNMS(KP866025403, T1O, T1L); + ii[WS(rs, 6)] = FNMS(KP866025403, T1U, T1T); + ii[WS(rs, 3)] = FMA(KP866025403, T1U, T1T); + } + { + E T1B, T1m, T1w, T1f, T1s, T1A, T1b, T1e, T1x, T1t; + T1E = FNMS(KP866025403, T1a, T19); + T1b = FMA(KP866025403, T1a, T19); + T1e = FMA(KP866025403, T1d, T1c); + T1D = FNMS(KP866025403, T1d, T1c); + T1B = FMA(KP866025403, T1l, T1g); + T1m = FNMS(KP866025403, T1l, T1g); + T21 = FNMS(KP866025403, T1W, T1V); + T1X = FMA(KP866025403, T1W, T1V); + T1w = FNMS(KP176326980, T1b, T1e); + T1f = FMA(KP176326980, T1e, T1b); + T1s = FNMS(KP866025403, T1r, T1q); + T1A = FMA(KP866025403, T1r, T1q); + T1x = FNMS(KP839099631, T1m, T1s); + T1t = FMA(KP839099631, T1s, T1m); + T1I = FNMS(KP176326980, T1A, T1B); + T1C = FMA(KP176326980, T1B, T1A); + T1Y = FNMS(KP777861913, T1x, T1w); + T1y = FMA(KP777861913, T1x, T1w); + T20 = FNMS(KP777861913, T1t, T1f); + T1u = FMA(KP777861913, T1t, T1f); + } + { + E T22, T1G, T1Z, T1F, T1J, T1v; + ii[WS(rs, 1)] = FNMS(KP984807753, T1Y, T1X); + T1v = FNMS(KP492403876, T1u, T11); + ri[WS(rs, 1)] = FMA(KP984807753, T1u, T11); + T1F = FNMS(KP363970234, T1E, T1D); + T1J = FMA(KP363970234, T1D, T1E); + ri[WS(rs, 4)] = FMA(KP852868531, T1y, T1v); + ri[WS(rs, 7)] = FNMS(KP852868531, T1y, T1v); + T1K = FNMS(KP954188894, T1J, T1I); + T22 = FMA(KP954188894, T1J, T1I); + T1G = FNMS(KP954188894, T1F, T1C); + T24 = FMA(KP954188894, T1F, T1C); + T1Z = FMA(KP492403876, T1Y, T1X); + ii[WS(rs, 2)] = FNMS(KP984807753, T22, T21); + ri[WS(rs, 2)] = FMA(KP984807753, T1G, T1z); + T1H = FNMS(KP492403876, T1G, T1z); + ii[WS(rs, 7)] = FNMS(KP852868531, T20, T1Z); + ii[WS(rs, 4)] = FMA(KP852868531, T20, T1Z); + T23 = FMA(KP492403876, T22, T21); + } + } + } + } + ri[WS(rs, 8)] = FMA(KP852868531, T1K, T1H); + ri[WS(rs, 5)] = FNMS(KP852868531, T1K, T1H); + ii[WS(rs, 8)] = FMA(KP852868531, T24, T23); + ii[WS(rs, 5)] = FNMS(KP852868531, T24, T23); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 9}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 9, "t1_9", twinstr, &GENUS, {24, 16, 72, 0}, 0, 0, 0 }; + +void X(codelet_t1_9) (planner *p) { + X(kdft_dit_register) (p, t1_9, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 9 -name t1_9 -include t.h */ + +/* + * This function contains 96 FP additions, 72 FP multiplications, + * (or, 60 additions, 36 multiplications, 36 fused multiply/add), + * 41 stack variables, 8 constants, and 36 memory accesses + */ +#include "t.h" + +static void t1_9(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP939692620, +0.939692620785908384054109277324731469936208134); + DK(KP342020143, +0.342020143325668733044099614682259580763083368); + DK(KP984807753, +0.984807753012208059366743024589523013670643252); + DK(KP173648177, +0.173648177666930348851716626769314796000375677); + DK(KP642787609, +0.642787609686539326322643409907263432907559884); + DK(KP766044443, +0.766044443118978035202392650555416673935832457); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + (mb * 16); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) { + E T1, T1B, TQ, T1G, Tc, TN, T1A, T1H, TL, T1x, T17, T1o, T1c, T1n, Tu; + E T1w, TW, T1k, T11, T1l; + { + E T6, TO, Tb, TP; + T1 = ri[0]; + T1B = ii[0]; + { + E T3, T5, T2, T4; + T3 = ri[WS(rs, 3)]; + T5 = ii[WS(rs, 3)]; + T2 = W[4]; + T4 = W[5]; + T6 = FMA(T2, T3, T4 * T5); + TO = FNMS(T4, T3, T2 * T5); + } + { + E T8, Ta, T7, T9; + T8 = ri[WS(rs, 6)]; + Ta = ii[WS(rs, 6)]; + T7 = W[10]; + T9 = W[11]; + Tb = FMA(T7, T8, T9 * Ta); + TP = FNMS(T9, T8, T7 * Ta); + } + TQ = KP866025403 * (TO - TP); + T1G = KP866025403 * (Tb - T6); + Tc = T6 + Tb; + TN = FNMS(KP500000000, Tc, T1); + T1A = TO + TP; + T1H = FNMS(KP500000000, T1A, T1B); + } + { + E Tz, T19, TE, T14, TJ, T15, TK, T1a; + { + E Tw, Ty, Tv, Tx; + Tw = ri[WS(rs, 2)]; + Ty = ii[WS(rs, 2)]; + Tv = W[2]; + Tx = W[3]; + Tz = FMA(Tv, Tw, Tx * Ty); + T19 = FNMS(Tx, Tw, Tv * Ty); + } + { + E TB, TD, TA, TC; + TB = ri[WS(rs, 5)]; + TD = ii[WS(rs, 5)]; + TA = W[8]; + TC = W[9]; + TE = FMA(TA, TB, TC * TD); + T14 = FNMS(TC, TB, TA * TD); + } + { + E TG, TI, TF, TH; + TG = ri[WS(rs, 8)]; + TI = ii[WS(rs, 8)]; + TF = W[14]; + TH = W[15]; + TJ = FMA(TF, TG, TH * TI); + T15 = FNMS(TH, TG, TF * TI); + } + TK = TE + TJ; + T1a = T14 + T15; + TL = Tz + TK; + T1x = T19 + T1a; + { + E T13, T16, T18, T1b; + T13 = FNMS(KP500000000, TK, Tz); + T16 = KP866025403 * (T14 - T15); + T17 = T13 + T16; + T1o = T13 - T16; + T18 = KP866025403 * (TJ - TE); + T1b = FNMS(KP500000000, T1a, T19); + T1c = T18 + T1b; + T1n = T1b - T18; + } + } + { + E Ti, TY, Tn, TT, Ts, TU, Tt, TZ; + { + E Tf, Th, Te, Tg; + Tf = ri[WS(rs, 1)]; + Th = ii[WS(rs, 1)]; + Te = W[0]; + Tg = W[1]; + Ti = FMA(Te, Tf, Tg * Th); + TY = FNMS(Tg, Tf, Te * Th); + } + { + E Tk, Tm, Tj, Tl; + Tk = ri[WS(rs, 4)]; + Tm = ii[WS(rs, 4)]; + Tj = W[6]; + Tl = W[7]; + Tn = FMA(Tj, Tk, Tl * Tm); + TT = FNMS(Tl, Tk, Tj * Tm); + } + { + E Tp, Tr, To, Tq; + Tp = ri[WS(rs, 7)]; + Tr = ii[WS(rs, 7)]; + To = W[12]; + Tq = W[13]; + Ts = FMA(To, Tp, Tq * Tr); + TU = FNMS(Tq, Tp, To * Tr); + } + Tt = Tn + Ts; + TZ = TT + TU; + Tu = Ti + Tt; + T1w = TY + TZ; + { + E TS, TV, TX, T10; + TS = FNMS(KP500000000, Tt, Ti); + TV = KP866025403 * (TT - TU); + TW = TS + TV; + T1k = TS - TV; + TX = KP866025403 * (Ts - Tn); + T10 = FNMS(KP500000000, TZ, TY); + T11 = TX + T10; + T1l = T10 - TX; + } + } + { + E T1y, Td, TM, T1v; + T1y = KP866025403 * (T1w - T1x); + Td = T1 + Tc; + TM = Tu + TL; + T1v = FNMS(KP500000000, TM, Td); + ri[0] = Td + TM; + ri[WS(rs, 3)] = T1v + T1y; + ri[WS(rs, 6)] = T1v - T1y; + } + { + E T1D, T1z, T1C, T1E; + T1D = KP866025403 * (TL - Tu); + T1z = T1w + T1x; + T1C = T1A + T1B; + T1E = FNMS(KP500000000, T1z, T1C); + ii[0] = T1z + T1C; + ii[WS(rs, 6)] = T1E - T1D; + ii[WS(rs, 3)] = T1D + T1E; + } + { + E TR, T1I, T1e, T1J, T1i, T1F, T1f, T1K; + TR = TN + TQ; + T1I = T1G + T1H; + { + E T12, T1d, T1g, T1h; + T12 = FMA(KP766044443, TW, KP642787609 * T11); + T1d = FMA(KP173648177, T17, KP984807753 * T1c); + T1e = T12 + T1d; + T1J = KP866025403 * (T1d - T12); + T1g = FNMS(KP642787609, TW, KP766044443 * T11); + T1h = FNMS(KP984807753, T17, KP173648177 * T1c); + T1i = KP866025403 * (T1g - T1h); + T1F = T1g + T1h; + } + ri[WS(rs, 1)] = TR + T1e; + ii[WS(rs, 1)] = T1F + T1I; + T1f = FNMS(KP500000000, T1e, TR); + ri[WS(rs, 7)] = T1f - T1i; + ri[WS(rs, 4)] = T1f + T1i; + T1K = FNMS(KP500000000, T1F, T1I); + ii[WS(rs, 4)] = T1J + T1K; + ii[WS(rs, 7)] = T1K - T1J; + } + { + E T1j, T1M, T1q, T1N, T1u, T1L, T1r, T1O; + T1j = TN - TQ; + T1M = T1H - T1G; + { + E T1m, T1p, T1s, T1t; + T1m = FMA(KP173648177, T1k, KP984807753 * T1l); + T1p = FNMS(KP939692620, T1o, KP342020143 * T1n); + T1q = T1m + T1p; + T1N = KP866025403 * (T1p - T1m); + T1s = FNMS(KP984807753, T1k, KP173648177 * T1l); + T1t = FMA(KP342020143, T1o, KP939692620 * T1n); + T1u = KP866025403 * (T1s + T1t); + T1L = T1s - T1t; + } + ri[WS(rs, 2)] = T1j + T1q; + ii[WS(rs, 2)] = T1L + T1M; + T1r = FNMS(KP500000000, T1q, T1j); + ri[WS(rs, 8)] = T1r - T1u; + ri[WS(rs, 5)] = T1r + T1u; + T1O = FNMS(KP500000000, T1L, T1M); + ii[WS(rs, 5)] = T1N + T1O; + ii[WS(rs, 8)] = T1O - T1N; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 9}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 9, "t1_9", twinstr, &GENUS, {60, 36, 36, 0}, 0, 0, 0 }; + +void X(codelet_t1_9) (planner *p) { + X(kdft_dit_register) (p, t1_9, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,509 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:56 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 10 -name t2_10 -include t.h */ + +/* + * This function contains 114 FP additions, 94 FP multiplications, + * (or, 48 additions, 28 multiplications, 66 fused multiply/add), + * 85 stack variables, 4 constants, and 40 memory accesses + */ +#include "t.h" + +static void t2_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + (mb * 6); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 6, MAKE_VOLATILE_STRIDE(20, rs)) { + E T27, T2b, T2a, T2c; + { + E T2, T3, T8, Tc, T5, T4, TX, T11, TE, T6, TB, TA; + T2 = W[0]; + T3 = W[2]; + T8 = W[4]; + Tc = W[5]; + T5 = W[1]; + T4 = T2 * T3; + TX = T3 * T8; + TA = T2 * T8; + T11 = T3 * Tc; + TE = T2 * Tc; + T6 = W[3]; + TB = FMA(T5, Tc, TA); + { + E T2d, T24, T1c, Tk, T1i, T28, T2l, T1a, T2f, T1I, T1R, T1Z, TL, T1v, T1d; + E Tz, T1S, T1r, TH, T1t; + { + E T1, TF, TY, T12, Tl, T7, T23, To, Tb, Te, Ti, Th, Td, Tw, Ts; + E Ta; + T1 = ri[0]; + TF = FNMS(T5, T8, TE); + TY = FMA(T6, Tc, TX); + T12 = FNMS(T6, T8, T11); + Tl = FMA(T5, T6, T4); + T7 = FNMS(T5, T6, T4); + Ta = T2 * T6; + T23 = ii[0]; + { + E Tg, T9, Tv, Tr; + Tg = T7 * Tc; + T9 = T7 * T8; + Tv = Tl * Tc; + Tr = Tl * T8; + To = FNMS(T5, T3, Ta); + Tb = FMA(T5, T3, Ta); + Te = ri[WS(rs, 5)]; + Ti = ii[WS(rs, 5)]; + Th = FNMS(Tb, T8, Tg); + Td = FMA(Tb, Tc, T9); + Tw = FNMS(To, T8, Tv); + Ts = FMA(To, Tc, Tr); + } + { + E T18, T1G, T1g, TW, T1P, T1C, T14, T1E; + { + E TR, T1z, TV, T1B, TZ, T13, T15, T17, T10, T1D; + { + E TO, TQ, TP, T22, Tj, T1y, T21, Tf; + TO = ri[WS(rs, 4)]; + T21 = Td * Ti; + Tf = Td * Te; + TQ = ii[WS(rs, 4)]; + TP = T7 * TO; + T22 = FNMS(Th, Te, T21); + Tj = FMA(Th, Ti, Tf); + T1y = T7 * TQ; + TR = FMA(Tb, TQ, TP); + T2d = T23 - T22; + T24 = T22 + T23; + T1c = T1 + Tj; + Tk = T1 - Tj; + T1z = FNMS(Tb, TO, T1y); + } + T15 = ri[WS(rs, 1)]; + T17 = ii[WS(rs, 1)]; + { + E TS, TU, T16, T1F, TT, T1A; + TS = ri[WS(rs, 9)]; + TU = ii[WS(rs, 9)]; + T16 = T2 * T15; + T1F = T2 * T17; + TT = T8 * TS; + T1A = T8 * TU; + T18 = FMA(T5, T17, T16); + T1G = FNMS(T5, T15, T1F); + TV = FMA(Tc, TU, TT); + T1B = FNMS(Tc, TS, T1A); + } + TZ = ri[WS(rs, 6)]; + T13 = ii[WS(rs, 6)]; + T1g = TR + TV; + TW = TR - TV; + T1P = T1z + T1B; + T1C = T1z - T1B; + T10 = TY * TZ; + T1D = TY * T13; + T14 = FMA(T12, T13, T10); + T1E = FNMS(T12, TZ, T1D); + } + { + E Tq, T1o, Ty, TC, TG, T1q, TD, T1s; + { + E TI, TK, Tt, T1p; + { + E Tm, T1n, Tp, Tn; + Tm = ri[WS(rs, 2)]; + Tp = ii[WS(rs, 2)]; + { + E T19, T1h, T1Q, T1H; + T19 = T14 - T18; + T1h = T14 + T18; + T1Q = T1E + T1G; + T1H = T1E - T1G; + Tn = Tl * Tm; + T1i = T1g + T1h; + T28 = T1g - T1h; + T2l = TW - T19; + T1a = TW + T19; + T2f = T1C + T1H; + T1I = T1C - T1H; + T1R = T1P - T1Q; + T1Z = T1P + T1Q; + T1n = Tl * Tp; + } + Tq = FMA(To, Tp, Tn); + TI = ri[WS(rs, 3)]; + TK = ii[WS(rs, 3)]; + T1o = FNMS(To, Tm, T1n); + } + { + E Tx, Tu, TJ, T1u; + Tt = ri[WS(rs, 7)]; + TJ = T3 * TI; + T1u = T3 * TK; + Tx = ii[WS(rs, 7)]; + Tu = Ts * Tt; + TL = FMA(T6, TK, TJ); + T1v = FNMS(T6, TI, T1u); + T1p = Ts * Tx; + Ty = FMA(Tw, Tx, Tu); + } + TC = ri[WS(rs, 8)]; + TG = ii[WS(rs, 8)]; + T1q = FNMS(Tw, Tt, T1p); + } + T1d = Tq + Ty; + Tz = Tq - Ty; + TD = TB * TC; + T1s = TB * TG; + T1S = T1o + T1q; + T1r = T1o - T1q; + TH = FMA(TF, TG, TD); + T1t = FNMS(TF, TC, T1s); + } + } + } + { + E T1f, T29, T1Y, T1U, T2j, T2n, T2m, T2o; + { + E T2k, T2e, T1l, T1L, T1J, T1k, T1b, T1e, TM; + T1e = TH + TL; + TM = TH - TL; + { + E T1w, T1T, TN, T1x; + T1w = T1t - T1v; + T1T = T1t + T1v; + T1f = T1d + T1e; + T29 = T1d - T1e; + T2k = Tz - TM; + TN = Tz + TM; + T1x = T1r - T1w; + T2e = T1r + T1w; + T1Y = T1S + T1T; + T1U = T1S - T1T; + T1l = TN - T1a; + T1b = TN + T1a; + T1L = FNMS(KP618033988, T1x, T1I); + T1J = FMA(KP618033988, T1I, T1x); + } + T1k = FNMS(KP250000000, T1b, Tk); + ri[WS(rs, 5)] = Tk + T1b; + { + E T2g, T2i, T2h, T1K, T1m; + T2g = T2e + T2f; + T2i = T2e - T2f; + T1K = FNMS(KP559016994, T1l, T1k); + T1m = FMA(KP559016994, T1l, T1k); + T2h = FNMS(KP250000000, T2g, T2d); + ri[WS(rs, 1)] = FMA(KP951056516, T1J, T1m); + ri[WS(rs, 9)] = FNMS(KP951056516, T1J, T1m); + ri[WS(rs, 3)] = FMA(KP951056516, T1L, T1K); + ri[WS(rs, 7)] = FNMS(KP951056516, T1L, T1K); + ii[WS(rs, 5)] = T2g + T2d; + T2j = FMA(KP559016994, T2i, T2h); + T2n = FNMS(KP559016994, T2i, T2h); + T2m = FMA(KP618033988, T2l, T2k); + T2o = FNMS(KP618033988, T2k, T2l); + } + } + { + E T1O, T1W, T1V, T1X, T1j, T1N, T1M, T20, T26, T25; + T1j = T1f + T1i; + T1N = T1f - T1i; + ii[WS(rs, 7)] = FMA(KP951056516, T2o, T2n); + ii[WS(rs, 3)] = FNMS(KP951056516, T2o, T2n); + ii[WS(rs, 9)] = FMA(KP951056516, T2m, T2j); + ii[WS(rs, 1)] = FNMS(KP951056516, T2m, T2j); + T1M = FNMS(KP250000000, T1j, T1c); + ri[0] = T1c + T1j; + T1O = FNMS(KP559016994, T1N, T1M); + T1W = FMA(KP559016994, T1N, T1M); + T1V = FNMS(KP618033988, T1U, T1R); + T1X = FMA(KP618033988, T1R, T1U); + T20 = T1Y + T1Z; + T26 = T1Y - T1Z; + ri[WS(rs, 6)] = FMA(KP951056516, T1X, T1W); + ri[WS(rs, 4)] = FNMS(KP951056516, T1X, T1W); + ri[WS(rs, 8)] = FMA(KP951056516, T1V, T1O); + ri[WS(rs, 2)] = FNMS(KP951056516, T1V, T1O); + T25 = FNMS(KP250000000, T20, T24); + ii[0] = T20 + T24; + T27 = FNMS(KP559016994, T26, T25); + T2b = FMA(KP559016994, T26, T25); + T2a = FNMS(KP618033988, T29, T28); + T2c = FMA(KP618033988, T28, T29); + } + } + } + } + ii[WS(rs, 6)] = FNMS(KP951056516, T2c, T2b); + ii[WS(rs, 4)] = FMA(KP951056516, T2c, T2b); + ii[WS(rs, 8)] = FNMS(KP951056516, T2a, T27); + ii[WS(rs, 2)] = FMA(KP951056516, T2a, T27); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_CEXP, 0, 9}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 10, "t2_10", twinstr, &GENUS, {48, 28, 66, 0}, 0, 0, 0 }; + +void X(codelet_t2_10) (planner *p) { + X(kdft_dit_register) (p, t2_10, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 10 -name t2_10 -include t.h */ + +/* + * This function contains 114 FP additions, 80 FP multiplications, + * (or, 76 additions, 42 multiplications, 38 fused multiply/add), + * 63 stack variables, 4 constants, and 40 memory accesses + */ +#include "t.h" + +static void t2_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + (mb * 6); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 6, MAKE_VOLATILE_STRIDE(20, rs)) { + E T2, T5, T3, T6, T8, Tm, Tc, Tk, T9, Td, Te, TM, TO, Tg, Tp; + E Tv, Tx, Tr; + { + E T4, Tb, T7, Ta; + T2 = W[0]; + T5 = W[1]; + T3 = W[2]; + T6 = W[3]; + T4 = T2 * T3; + Tb = T5 * T3; + T7 = T5 * T6; + Ta = T2 * T6; + T8 = T4 - T7; + Tm = Ta - Tb; + Tc = Ta + Tb; + Tk = T4 + T7; + T9 = W[4]; + Td = W[5]; + Te = FMA(T8, T9, Tc * Td); + TM = FMA(T3, T9, T6 * Td); + TO = FNMS(T6, T9, T3 * Td); + Tg = FNMS(Tc, T9, T8 * Td); + Tp = FMA(Tk, T9, Tm * Td); + Tv = FMA(T2, T9, T5 * Td); + Tx = FNMS(T5, T9, T2 * Td); + Tr = FNMS(Tm, T9, Tk * Td); + } + { + E Tj, T1S, TX, T1G, TL, TU, TV, T1s, T1t, T1C, T11, T12, T13, T1h, T1k; + E T1Q, Tu, TD, TE, T1v, T1w, T1B, TY, TZ, T10, T1a, T1d, T1P; + { + E T1, T1F, Ti, T1E, Tf, Th; + T1 = ri[0]; + T1F = ii[0]; + Tf = ri[WS(rs, 5)]; + Th = ii[WS(rs, 5)]; + Ti = FMA(Te, Tf, Tg * Th); + T1E = FNMS(Tg, Tf, Te * Th); + Tj = T1 - Ti; + T1S = T1F - T1E; + TX = T1 + Ti; + T1G = T1E + T1F; + } + { + E TH, T1f, TT, T1j, TK, T1g, TQ, T1i; + { + E TF, TG, TR, TS; + TF = ri[WS(rs, 4)]; + TG = ii[WS(rs, 4)]; + TH = FMA(T8, TF, Tc * TG); + T1f = FNMS(Tc, TF, T8 * TG); + TR = ri[WS(rs, 1)]; + TS = ii[WS(rs, 1)]; + TT = FMA(T2, TR, T5 * TS); + T1j = FNMS(T5, TR, T2 * TS); + } + { + E TI, TJ, TN, TP; + TI = ri[WS(rs, 9)]; + TJ = ii[WS(rs, 9)]; + TK = FMA(T9, TI, Td * TJ); + T1g = FNMS(Td, TI, T9 * TJ); + TN = ri[WS(rs, 6)]; + TP = ii[WS(rs, 6)]; + TQ = FMA(TM, TN, TO * TP); + T1i = FNMS(TO, TN, TM * TP); + } + TL = TH - TK; + TU = TQ - TT; + TV = TL + TU; + T1s = T1f + T1g; + T1t = T1i + T1j; + T1C = T1s + T1t; + T11 = TH + TK; + T12 = TQ + TT; + T13 = T11 + T12; + T1h = T1f - T1g; + T1k = T1i - T1j; + T1Q = T1h + T1k; + } + { + E To, T18, TC, T1c, Tt, T19, Tz, T1b; + { + E Tl, Tn, TA, TB; + Tl = ri[WS(rs, 2)]; + Tn = ii[WS(rs, 2)]; + To = FMA(Tk, Tl, Tm * Tn); + T18 = FNMS(Tm, Tl, Tk * Tn); + TA = ri[WS(rs, 3)]; + TB = ii[WS(rs, 3)]; + TC = FMA(T3, TA, T6 * TB); + T1c = FNMS(T6, TA, T3 * TB); + } + { + E Tq, Ts, Tw, Ty; + Tq = ri[WS(rs, 7)]; + Ts = ii[WS(rs, 7)]; + Tt = FMA(Tp, Tq, Tr * Ts); + T19 = FNMS(Tr, Tq, Tp * Ts); + Tw = ri[WS(rs, 8)]; + Ty = ii[WS(rs, 8)]; + Tz = FMA(Tv, Tw, Tx * Ty); + T1b = FNMS(Tx, Tw, Tv * Ty); + } + Tu = To - Tt; + TD = Tz - TC; + TE = Tu + TD; + T1v = T18 + T19; + T1w = T1b + T1c; + T1B = T1v + T1w; + TY = To + Tt; + TZ = Tz + TC; + T10 = TY + TZ; + T1a = T18 - T19; + T1d = T1b - T1c; + T1P = T1a + T1d; + } + { + E T15, TW, T16, T1m, T1o, T1e, T1l, T1n, T17; + T15 = KP559016994 * (TE - TV); + TW = TE + TV; + T16 = FNMS(KP250000000, TW, Tj); + T1e = T1a - T1d; + T1l = T1h - T1k; + T1m = FMA(KP951056516, T1e, KP587785252 * T1l); + T1o = FNMS(KP587785252, T1e, KP951056516 * T1l); + ri[WS(rs, 5)] = Tj + TW; + T1n = T16 - T15; + ri[WS(rs, 7)] = T1n - T1o; + ri[WS(rs, 3)] = T1n + T1o; + T17 = T15 + T16; + ri[WS(rs, 9)] = T17 - T1m; + ri[WS(rs, 1)] = T17 + T1m; + } + { + E T1R, T1T, T1U, T1Y, T20, T1W, T1X, T1Z, T1V; + T1R = KP559016994 * (T1P - T1Q); + T1T = T1P + T1Q; + T1U = FNMS(KP250000000, T1T, T1S); + T1W = Tu - TD; + T1X = TL - TU; + T1Y = FMA(KP951056516, T1W, KP587785252 * T1X); + T20 = FNMS(KP587785252, T1W, KP951056516 * T1X); + ii[WS(rs, 5)] = T1T + T1S; + T1Z = T1U - T1R; + ii[WS(rs, 3)] = T1Z - T20; + ii[WS(rs, 7)] = T20 + T1Z; + T1V = T1R + T1U; + ii[WS(rs, 1)] = T1V - T1Y; + ii[WS(rs, 9)] = T1Y + T1V; + } + { + E T1q, T14, T1p, T1y, T1A, T1u, T1x, T1z, T1r; + T1q = KP559016994 * (T10 - T13); + T14 = T10 + T13; + T1p = FNMS(KP250000000, T14, TX); + T1u = T1s - T1t; + T1x = T1v - T1w; + T1y = FNMS(KP587785252, T1x, KP951056516 * T1u); + T1A = FMA(KP951056516, T1x, KP587785252 * T1u); + ri[0] = TX + T14; + T1z = T1q + T1p; + ri[WS(rs, 4)] = T1z - T1A; + ri[WS(rs, 6)] = T1z + T1A; + T1r = T1p - T1q; + ri[WS(rs, 2)] = T1r - T1y; + ri[WS(rs, 8)] = T1r + T1y; + } + { + E T1L, T1D, T1K, T1J, T1N, T1H, T1I, T1O, T1M; + T1L = KP559016994 * (T1B - T1C); + T1D = T1B + T1C; + T1K = FNMS(KP250000000, T1D, T1G); + T1H = T11 - T12; + T1I = TY - TZ; + T1J = FNMS(KP587785252, T1I, KP951056516 * T1H); + T1N = FMA(KP951056516, T1I, KP587785252 * T1H); + ii[0] = T1D + T1G; + T1O = T1L + T1K; + ii[WS(rs, 4)] = T1N + T1O; + ii[WS(rs, 6)] = T1O - T1N; + T1M = T1K - T1L; + ii[WS(rs, 2)] = T1J + T1M; + ii[WS(rs, 8)] = T1M - T1J; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_CEXP, 0, 9}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 10, "t2_10", twinstr, &GENUS, {76, 42, 38, 0}, 0, 0, 0 }; + +void X(codelet_t2_10) (planner *p) { + X(kdft_dit_register) (p, t2_10, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,827 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:54 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 16 -name t2_16 -include t.h */ + +/* + * This function contains 196 FP additions, 134 FP multiplications, + * (or, 104 additions, 42 multiplications, 92 fused multiply/add), + * 100 stack variables, 3 constants, and 64 memory accesses + */ +#include "t.h" + +static void t2_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(32, rs)) { + E T3S, T3R; + { + E T2, Tf, TM, TO, T3, Tg, TN, TS, T4, Tp, T6, T5, Th; + T2 = W[0]; + Tf = W[2]; + TM = W[6]; + TO = W[7]; + T3 = W[4]; + Tg = T2 * Tf; + TN = T2 * TM; + TS = T2 * TO; + T4 = T2 * T3; + Tp = Tf * T3; + T6 = W[5]; + T5 = W[1]; + Th = W[3]; + { + E TZ, Te, T1U, T3A, T3L, T2D, T1G, T2A, T3h, T1R, T2B, T2I, T3i, Tx, T3M; + E T1Z, T3w, TL, T26, T25, T37, T1d, T2o, T2l, T3c, T1s, T2m, T2t, T3d, TX; + E T10, TV, T2a, TY, T2b; + { + E TF, TP, TT, Tq, TW, Tz, Tu, TI, TC, T1m, T1f, T1p, T1j, Tr, Ts; + E Tv, To, T1W; + { + E Ti, Tm, T1L, T1O, T1D, T1A, T1x, T2y, T1F, T2x; + { + E T1, T7, Tb, T3z, T8, T1z, T9, Tc; + { + E T1i, T1e, T1C, T1y, Tt, Ta, Tl; + T1 = ri[0]; + Tt = Tf * T6; + Ta = T2 * T6; + T7 = FMA(T5, T6, T4); + TF = FNMS(T5, T6, T4); + TP = FMA(T5, TO, TN); + TT = FNMS(T5, TM, TS); + Tq = FNMS(Th, T6, Tp); + TW = FMA(Th, T6, Tp); + Tz = FMA(T5, Th, Tg); + Ti = FNMS(T5, Th, Tg); + Tl = T2 * Th; + Tu = FMA(Th, T3, Tt); + TZ = FNMS(Th, T3, Tt); + TI = FMA(T5, T3, Ta); + Tb = FNMS(T5, T3, Ta); + T1i = Ti * T6; + T1e = Ti * T3; + T1C = Tz * T6; + T1y = Tz * T3; + Tm = FMA(T5, Tf, Tl); + TC = FNMS(T5, Tf, Tl); + T3z = ii[0]; + T8 = ri[WS(rs, 8)]; + T1m = FNMS(Tm, T6, T1e); + T1f = FMA(Tm, T6, T1e); + T1p = FMA(Tm, T3, T1i); + T1j = FNMS(Tm, T3, T1i); + T1L = FNMS(TC, T6, T1y); + T1z = FMA(TC, T6, T1y); + T1O = FMA(TC, T3, T1C); + T1D = FNMS(TC, T3, T1C); + T9 = T7 * T8; + Tc = ii[WS(rs, 8)]; + } + { + E T1u, T1w, T1v, T2w, T3y, T1B, T1E, Td, T3x; + T1u = ri[WS(rs, 15)]; + T1w = ii[WS(rs, 15)]; + T1A = ri[WS(rs, 7)]; + Td = FMA(Tb, Tc, T9); + T3x = T7 * Tc; + T1v = TM * T1u; + T2w = TM * T1w; + Te = T1 + Td; + T1U = T1 - Td; + T3y = FNMS(Tb, T8, T3x); + T1B = T1z * T1A; + T1E = ii[WS(rs, 7)]; + T1x = FMA(TO, T1w, T1v); + T3A = T3y + T3z; + T3L = T3z - T3y; + T2y = T1z * T1E; + T1F = FMA(T1D, T1E, T1B); + T2x = FNMS(TO, T1u, T2w); + } + } + { + E T1H, T1I, T1J, T1M, T1P, T2z; + T1H = ri[WS(rs, 3)]; + T2z = FNMS(T1D, T1A, T2y); + T2D = T1x - T1F; + T1G = T1x + T1F; + T1I = Tf * T1H; + T2A = T2x - T2z; + T3h = T2x + T2z; + T1J = ii[WS(rs, 3)]; + T1M = ri[WS(rs, 11)]; + T1P = ii[WS(rs, 11)]; + { + E Tj, Tk, Tn, T1V; + { + E T1K, T2F, T1Q, T2H, T2E, T1N, T2G; + Tj = ri[WS(rs, 4)]; + T1K = FMA(Th, T1J, T1I); + T2E = Tf * T1J; + T1N = T1L * T1M; + T2G = T1L * T1P; + Tk = Ti * Tj; + T2F = FNMS(Th, T1H, T2E); + T1Q = FMA(T1O, T1P, T1N); + T2H = FNMS(T1O, T1M, T2G); + Tn = ii[WS(rs, 4)]; + Tr = ri[WS(rs, 12)]; + T1R = T1K + T1Q; + T2B = T1K - T1Q; + T2I = T2F - T2H; + T3i = T2F + T2H; + T1V = Ti * Tn; + Ts = Tq * Tr; + Tv = ii[WS(rs, 12)]; + } + To = FMA(Tm, Tn, Tk); + T1W = FNMS(Tm, Tj, T1V); + } + } + } + { + E T19, T1b, T18, T2i, T1a, T2j; + { + E TE, T22, TK, T24; + { + E TA, TD, TB, T21, TG, TJ, TH, T23, T1Y, Tw, T1X; + TA = ri[WS(rs, 2)]; + Tw = FMA(Tu, Tv, Ts); + T1X = Tq * Tv; + TD = ii[WS(rs, 2)]; + TB = Tz * TA; + Tx = To + Tw; + T3M = To - Tw; + T1Y = FNMS(Tu, Tr, T1X); + T21 = Tz * TD; + TG = ri[WS(rs, 10)]; + TJ = ii[WS(rs, 10)]; + T1Z = T1W - T1Y; + T3w = T1W + T1Y; + TH = TF * TG; + T23 = TF * TJ; + TE = FMA(TC, TD, TB); + T22 = FNMS(TC, TA, T21); + TK = FMA(TI, TJ, TH); + T24 = FNMS(TI, TG, T23); + } + { + E T15, T17, T16, T2h; + T15 = ri[WS(rs, 1)]; + T17 = ii[WS(rs, 1)]; + TL = TE + TK; + T26 = TE - TK; + T25 = T22 - T24; + T37 = T22 + T24; + T16 = T2 * T15; + T2h = T2 * T17; + T19 = ri[WS(rs, 9)]; + T1b = ii[WS(rs, 9)]; + T18 = FMA(T5, T17, T16); + T2i = FNMS(T5, T15, T2h); + T1a = T3 * T19; + T2j = T3 * T1b; + } + } + { + E T1n, T1q, T1l, T2q, T1o, T2r; + { + E T1g, T1k, T1h, T2p, T1c, T2k; + T1g = ri[WS(rs, 5)]; + T1k = ii[WS(rs, 5)]; + T1c = FMA(T6, T1b, T1a); + T2k = FNMS(T6, T19, T2j); + T1h = T1f * T1g; + T2p = T1f * T1k; + T1d = T18 + T1c; + T2o = T18 - T1c; + T2l = T2i - T2k; + T3c = T2i + T2k; + T1n = ri[WS(rs, 13)]; + T1q = ii[WS(rs, 13)]; + T1l = FMA(T1j, T1k, T1h); + T2q = FNMS(T1j, T1g, T2p); + T1o = T1m * T1n; + T2r = T1m * T1q; + } + { + E TQ, TU, TR, T29, T1r, T2s; + TQ = ri[WS(rs, 14)]; + TU = ii[WS(rs, 14)]; + T1r = FMA(T1p, T1q, T1o); + T2s = FNMS(T1p, T1n, T2r); + TR = TP * TQ; + T29 = TP * TU; + T1s = T1l + T1r; + T2m = T1l - T1r; + T2t = T2q - T2s; + T3d = T2q + T2s; + TX = ri[WS(rs, 6)]; + T10 = ii[WS(rs, 6)]; + TV = FMA(TT, TU, TR); + T2a = FNMS(TT, TQ, T29); + TY = TW * TX; + T2b = TW * T10; + } + } + } + } + { + E T36, T3G, T3b, T3g, T28, T2d, T3F, T39, T3e, T3q, T3C, T3j, T3u, T3t; + { + E T3D, T1T, T3r, T14, T3E, T3s; + { + E Ty, T3B, T11, T2c, T13, T3v; + T36 = Te - Tx; + Ty = Te + Tx; + T3B = T3w + T3A; + T3G = T3A - T3w; + T11 = FMA(TZ, T10, TY); + T2c = FNMS(TZ, TX, T2b); + { + E T1t, T1S, T12, T38; + T3b = T1d - T1s; + T1t = T1d + T1s; + T1S = T1G + T1R; + T3g = T1G - T1R; + T12 = TV + T11; + T28 = TV - T11; + T2d = T2a - T2c; + T38 = T2a + T2c; + T3D = T1S - T1t; + T1T = T1t + T1S; + T13 = TL + T12; + T3F = T12 - TL; + T39 = T37 - T38; + T3v = T37 + T38; + } + T3e = T3c - T3d; + T3r = T3c + T3d; + T3q = Ty - T13; + T14 = Ty + T13; + T3E = T3B - T3v; + T3C = T3v + T3B; + T3s = T3h + T3i; + T3j = T3h - T3i; + } + ri[WS(rs, 8)] = T14 - T1T; + ri[0] = T14 + T1T; + ii[WS(rs, 12)] = T3E - T3D; + T3u = T3r + T3s; + T3t = T3r - T3s; + ii[WS(rs, 4)] = T3D + T3E; + } + { + E T3m, T3a, T3J, T3H; + ii[0] = T3u + T3C; + ii[WS(rs, 8)] = T3C - T3u; + ri[WS(rs, 4)] = T3q + T3t; + ri[WS(rs, 12)] = T3q - T3t; + T3m = T36 - T39; + T3a = T36 + T39; + T3J = T3G - T3F; + T3H = T3F + T3G; + { + E T2Q, T20, T3N, T3T, T2J, T2C, T3O, T2f, T34, T30, T2W, T2V, T3U, T2T, T2N; + E T2v; + { + E T2R, T27, T2e, T2S; + { + E T3n, T3f, T3o, T3k; + T2Q = T1U + T1Z; + T20 = T1U - T1Z; + T3n = T3e - T3b; + T3f = T3b + T3e; + T3o = T3g + T3j; + T3k = T3g - T3j; + T3N = T3L - T3M; + T3T = T3M + T3L; + { + E T3p, T3I, T3K, T3l; + T3p = T3n - T3o; + T3I = T3n + T3o; + T3K = T3k - T3f; + T3l = T3f + T3k; + ri[WS(rs, 6)] = FMA(KP707106781, T3p, T3m); + ri[WS(rs, 14)] = FNMS(KP707106781, T3p, T3m); + ii[WS(rs, 10)] = FNMS(KP707106781, T3I, T3H); + ii[WS(rs, 2)] = FMA(KP707106781, T3I, T3H); + ii[WS(rs, 14)] = FNMS(KP707106781, T3K, T3J); + ii[WS(rs, 6)] = FMA(KP707106781, T3K, T3J); + ri[WS(rs, 2)] = FMA(KP707106781, T3l, T3a); + ri[WS(rs, 10)] = FNMS(KP707106781, T3l, T3a); + T2R = T26 + T25; + T27 = T25 - T26; + T2e = T28 + T2d; + T2S = T28 - T2d; + } + } + { + E T2Y, T2Z, T2n, T2u; + T2J = T2D - T2I; + T2Y = T2D + T2I; + T2Z = T2A - T2B; + T2C = T2A + T2B; + T3O = T27 + T2e; + T2f = T27 - T2e; + T34 = FMA(KP414213562, T2Y, T2Z); + T30 = FNMS(KP414213562, T2Z, T2Y); + T2W = T2l - T2m; + T2n = T2l + T2m; + T2u = T2o - T2t; + T2V = T2o + T2t; + T3U = T2S - T2R; + T2T = T2R + T2S; + T2N = FNMS(KP414213562, T2n, T2u); + T2v = FMA(KP414213562, T2u, T2n); + } + } + { + E T33, T2X, T3X, T3Y; + { + E T2M, T2g, T2O, T2K, T3V, T3W, T2P, T2L; + T2M = FNMS(KP707106781, T2f, T20); + T2g = FMA(KP707106781, T2f, T20); + T33 = FNMS(KP414213562, T2V, T2W); + T2X = FMA(KP414213562, T2W, T2V); + T2O = FMA(KP414213562, T2C, T2J); + T2K = FNMS(KP414213562, T2J, T2C); + T3V = FMA(KP707106781, T3U, T3T); + T3X = FNMS(KP707106781, T3U, T3T); + T3W = T2O - T2N; + T2P = T2N + T2O; + T3Y = T2v + T2K; + T2L = T2v - T2K; + ii[WS(rs, 11)] = FNMS(KP923879532, T3W, T3V); + ii[WS(rs, 3)] = FMA(KP923879532, T3W, T3V); + ri[WS(rs, 3)] = FMA(KP923879532, T2L, T2g); + ri[WS(rs, 11)] = FNMS(KP923879532, T2L, T2g); + ri[WS(rs, 15)] = FMA(KP923879532, T2P, T2M); + ri[WS(rs, 7)] = FNMS(KP923879532, T2P, T2M); + } + { + E T32, T3P, T3Q, T35, T2U, T31; + T32 = FNMS(KP707106781, T2T, T2Q); + T2U = FMA(KP707106781, T2T, T2Q); + T31 = T2X + T30; + T3S = T30 - T2X; + T3R = FNMS(KP707106781, T3O, T3N); + T3P = FMA(KP707106781, T3O, T3N); + ii[WS(rs, 15)] = FMA(KP923879532, T3Y, T3X); + ii[WS(rs, 7)] = FNMS(KP923879532, T3Y, T3X); + ri[WS(rs, 1)] = FMA(KP923879532, T31, T2U); + ri[WS(rs, 9)] = FNMS(KP923879532, T31, T2U); + T3Q = T33 + T34; + T35 = T33 - T34; + ii[WS(rs, 9)] = FNMS(KP923879532, T3Q, T3P); + ii[WS(rs, 1)] = FMA(KP923879532, T3Q, T3P); + ri[WS(rs, 5)] = FMA(KP923879532, T35, T32); + ri[WS(rs, 13)] = FNMS(KP923879532, T35, T32); + } + } + } + } + } + } + } + ii[WS(rs, 13)] = FNMS(KP923879532, T3S, T3R); + ii[WS(rs, 5)] = FMA(KP923879532, T3S, T3R); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_CEXP, 0, 9}, + {TW_CEXP, 0, 15}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 16, "t2_16", twinstr, &GENUS, {104, 42, 92, 0}, 0, 0, 0 }; + +void X(codelet_t2_16) (planner *p) { + X(kdft_dit_register) (p, t2_16, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 16 -name t2_16 -include t.h */ + +/* + * This function contains 196 FP additions, 108 FP multiplications, + * (or, 156 additions, 68 multiplications, 40 fused multiply/add), + * 82 stack variables, 3 constants, and 64 memory accesses + */ +#include "t.h" + +static void t2_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(32, rs)) { + E T2, T5, Tg, Ti, Tk, To, TE, TC, T6, T3, T8, TW, TJ, Tt, TU; + E Tc, Tx, TH, TN, TO, TP, TR, T1f, T1k, T1b, T1i, T1y, T1H, T1u, T1F; + { + E T7, Tv, Ta, Ts, T4, Tw, Tb, Tr; + { + E Th, Tn, Tj, Tm; + T2 = W[0]; + T5 = W[1]; + Tg = W[2]; + Ti = W[3]; + Th = T2 * Tg; + Tn = T5 * Tg; + Tj = T5 * Ti; + Tm = T2 * Ti; + Tk = Th - Tj; + To = Tm + Tn; + TE = Tm - Tn; + TC = Th + Tj; + T6 = W[5]; + T7 = T5 * T6; + Tv = Tg * T6; + Ta = T2 * T6; + Ts = Ti * T6; + T3 = W[4]; + T4 = T2 * T3; + Tw = Ti * T3; + Tb = T5 * T3; + Tr = Tg * T3; + } + T8 = T4 + T7; + TW = Tv - Tw; + TJ = Ta + Tb; + Tt = Tr - Ts; + TU = Tr + Ts; + Tc = Ta - Tb; + Tx = Tv + Tw; + TH = T4 - T7; + TN = W[6]; + TO = W[7]; + TP = FMA(T2, TN, T5 * TO); + TR = FNMS(T5, TN, T2 * TO); + { + E T1d, T1e, T19, T1a; + T1d = Tk * T6; + T1e = To * T3; + T1f = T1d - T1e; + T1k = T1d + T1e; + T19 = Tk * T3; + T1a = To * T6; + T1b = T19 + T1a; + T1i = T19 - T1a; + } + { + E T1w, T1x, T1s, T1t; + T1w = TC * T6; + T1x = TE * T3; + T1y = T1w - T1x; + T1H = T1w + T1x; + T1s = TC * T3; + T1t = TE * T6; + T1u = T1s + T1t; + T1F = T1s - T1t; + } + } + { + E Tf, T3r, T1N, T3e, TA, T3s, T1Q, T3b, TM, T2M, T1W, T2w, TZ, T2N, T21; + E T2x, T1B, T1K, T2V, T2W, T2X, T2Y, T2j, T2D, T2o, T2E, T18, T1n, T2Q, T2R; + E T2S, T2T, T28, T2A, T2d, T2B; + { + E T1, T3d, Te, T3c, T9, Td; + T1 = ri[0]; + T3d = ii[0]; + T9 = ri[WS(rs, 8)]; + Td = ii[WS(rs, 8)]; + Te = FMA(T8, T9, Tc * Td); + T3c = FNMS(Tc, T9, T8 * Td); + Tf = T1 + Te; + T3r = T3d - T3c; + T1N = T1 - Te; + T3e = T3c + T3d; + } + { + E Tq, T1O, Tz, T1P; + { + E Tl, Tp, Tu, Ty; + Tl = ri[WS(rs, 4)]; + Tp = ii[WS(rs, 4)]; + Tq = FMA(Tk, Tl, To * Tp); + T1O = FNMS(To, Tl, Tk * Tp); + Tu = ri[WS(rs, 12)]; + Ty = ii[WS(rs, 12)]; + Tz = FMA(Tt, Tu, Tx * Ty); + T1P = FNMS(Tx, Tu, Tt * Ty); + } + TA = Tq + Tz; + T3s = Tq - Tz; + T1Q = T1O - T1P; + T3b = T1O + T1P; + } + { + E TG, T1S, TL, T1T, T1U, T1V; + { + E TD, TF, TI, TK; + TD = ri[WS(rs, 2)]; + TF = ii[WS(rs, 2)]; + TG = FMA(TC, TD, TE * TF); + T1S = FNMS(TE, TD, TC * TF); + TI = ri[WS(rs, 10)]; + TK = ii[WS(rs, 10)]; + TL = FMA(TH, TI, TJ * TK); + T1T = FNMS(TJ, TI, TH * TK); + } + TM = TG + TL; + T2M = T1S + T1T; + T1U = T1S - T1T; + T1V = TG - TL; + T1W = T1U - T1V; + T2w = T1V + T1U; + } + { + E TT, T1Y, TY, T1Z, T1X, T20; + { + E TQ, TS, TV, TX; + TQ = ri[WS(rs, 14)]; + TS = ii[WS(rs, 14)]; + TT = FMA(TP, TQ, TR * TS); + T1Y = FNMS(TR, TQ, TP * TS); + TV = ri[WS(rs, 6)]; + TX = ii[WS(rs, 6)]; + TY = FMA(TU, TV, TW * TX); + T1Z = FNMS(TW, TV, TU * TX); + } + TZ = TT + TY; + T2N = T1Y + T1Z; + T1X = TT - TY; + T20 = T1Y - T1Z; + T21 = T1X + T20; + T2x = T1X - T20; + } + { + E T1r, T2k, T1J, T2h, T1A, T2l, T1E, T2g; + { + E T1p, T1q, T1G, T1I; + T1p = ri[WS(rs, 15)]; + T1q = ii[WS(rs, 15)]; + T1r = FMA(TN, T1p, TO * T1q); + T2k = FNMS(TO, T1p, TN * T1q); + T1G = ri[WS(rs, 11)]; + T1I = ii[WS(rs, 11)]; + T1J = FMA(T1F, T1G, T1H * T1I); + T2h = FNMS(T1H, T1G, T1F * T1I); + } + { + E T1v, T1z, T1C, T1D; + T1v = ri[WS(rs, 7)]; + T1z = ii[WS(rs, 7)]; + T1A = FMA(T1u, T1v, T1y * T1z); + T2l = FNMS(T1y, T1v, T1u * T1z); + T1C = ri[WS(rs, 3)]; + T1D = ii[WS(rs, 3)]; + T1E = FMA(Tg, T1C, Ti * T1D); + T2g = FNMS(Ti, T1C, Tg * T1D); + } + T1B = T1r + T1A; + T1K = T1E + T1J; + T2V = T1B - T1K; + T2W = T2k + T2l; + T2X = T2g + T2h; + T2Y = T2W - T2X; + { + E T2f, T2i, T2m, T2n; + T2f = T1r - T1A; + T2i = T2g - T2h; + T2j = T2f - T2i; + T2D = T2f + T2i; + T2m = T2k - T2l; + T2n = T1E - T1J; + T2o = T2m + T2n; + T2E = T2m - T2n; + } + } + { + E T14, T24, T1m, T2b, T17, T25, T1h, T2a; + { + E T12, T13, T1j, T1l; + T12 = ri[WS(rs, 1)]; + T13 = ii[WS(rs, 1)]; + T14 = FMA(T2, T12, T5 * T13); + T24 = FNMS(T5, T12, T2 * T13); + T1j = ri[WS(rs, 13)]; + T1l = ii[WS(rs, 13)]; + T1m = FMA(T1i, T1j, T1k * T1l); + T2b = FNMS(T1k, T1j, T1i * T1l); + } + { + E T15, T16, T1c, T1g; + T15 = ri[WS(rs, 9)]; + T16 = ii[WS(rs, 9)]; + T17 = FMA(T3, T15, T6 * T16); + T25 = FNMS(T6, T15, T3 * T16); + T1c = ri[WS(rs, 5)]; + T1g = ii[WS(rs, 5)]; + T1h = FMA(T1b, T1c, T1f * T1g); + T2a = FNMS(T1f, T1c, T1b * T1g); + } + T18 = T14 + T17; + T1n = T1h + T1m; + T2Q = T18 - T1n; + T2R = T24 + T25; + T2S = T2a + T2b; + T2T = T2R - T2S; + { + E T26, T27, T29, T2c; + T26 = T24 - T25; + T27 = T1h - T1m; + T28 = T26 + T27; + T2A = T26 - T27; + T29 = T14 - T17; + T2c = T2a - T2b; + T2d = T29 - T2c; + T2B = T29 + T2c; + } + } + { + E T23, T2r, T3A, T3C, T2q, T3B, T2u, T3x; + { + E T1R, T22, T3y, T3z; + T1R = T1N - T1Q; + T22 = KP707106781 * (T1W - T21); + T23 = T1R + T22; + T2r = T1R - T22; + T3y = KP707106781 * (T2x - T2w); + T3z = T3s + T3r; + T3A = T3y + T3z; + T3C = T3z - T3y; + } + { + E T2e, T2p, T2s, T2t; + T2e = FMA(KP923879532, T28, KP382683432 * T2d); + T2p = FNMS(KP923879532, T2o, KP382683432 * T2j); + T2q = T2e + T2p; + T3B = T2p - T2e; + T2s = FNMS(KP923879532, T2d, KP382683432 * T28); + T2t = FMA(KP382683432, T2o, KP923879532 * T2j); + T2u = T2s - T2t; + T3x = T2s + T2t; + } + ri[WS(rs, 11)] = T23 - T2q; + ii[WS(rs, 11)] = T3A - T3x; + ri[WS(rs, 3)] = T23 + T2q; + ii[WS(rs, 3)] = T3x + T3A; + ri[WS(rs, 15)] = T2r - T2u; + ii[WS(rs, 15)] = T3C - T3B; + ri[WS(rs, 7)] = T2r + T2u; + ii[WS(rs, 7)] = T3B + T3C; + } + { + E T2P, T31, T3m, T3o, T30, T3n, T34, T3j; + { + E T2L, T2O, T3k, T3l; + T2L = Tf - TA; + T2O = T2M - T2N; + T2P = T2L + T2O; + T31 = T2L - T2O; + T3k = TZ - TM; + T3l = T3e - T3b; + T3m = T3k + T3l; + T3o = T3l - T3k; + } + { + E T2U, T2Z, T32, T33; + T2U = T2Q + T2T; + T2Z = T2V - T2Y; + T30 = KP707106781 * (T2U + T2Z); + T3n = KP707106781 * (T2Z - T2U); + T32 = T2T - T2Q; + T33 = T2V + T2Y; + T34 = KP707106781 * (T32 - T33); + T3j = KP707106781 * (T32 + T33); + } + ri[WS(rs, 10)] = T2P - T30; + ii[WS(rs, 10)] = T3m - T3j; + ri[WS(rs, 2)] = T2P + T30; + ii[WS(rs, 2)] = T3j + T3m; + ri[WS(rs, 14)] = T31 - T34; + ii[WS(rs, 14)] = T3o - T3n; + ri[WS(rs, 6)] = T31 + T34; + ii[WS(rs, 6)] = T3n + T3o; + } + { + E T2z, T2H, T3u, T3w, T2G, T3v, T2K, T3p; + { + E T2v, T2y, T3q, T3t; + T2v = T1N + T1Q; + T2y = KP707106781 * (T2w + T2x); + T2z = T2v + T2y; + T2H = T2v - T2y; + T3q = KP707106781 * (T1W + T21); + T3t = T3r - T3s; + T3u = T3q + T3t; + T3w = T3t - T3q; + } + { + E T2C, T2F, T2I, T2J; + T2C = FMA(KP382683432, T2A, KP923879532 * T2B); + T2F = FNMS(KP382683432, T2E, KP923879532 * T2D); + T2G = T2C + T2F; + T3v = T2F - T2C; + T2I = FNMS(KP382683432, T2B, KP923879532 * T2A); + T2J = FMA(KP923879532, T2E, KP382683432 * T2D); + T2K = T2I - T2J; + T3p = T2I + T2J; + } + ri[WS(rs, 9)] = T2z - T2G; + ii[WS(rs, 9)] = T3u - T3p; + ri[WS(rs, 1)] = T2z + T2G; + ii[WS(rs, 1)] = T3p + T3u; + ri[WS(rs, 13)] = T2H - T2K; + ii[WS(rs, 13)] = T3w - T3v; + ri[WS(rs, 5)] = T2H + T2K; + ii[WS(rs, 5)] = T3v + T3w; + } + { + E T11, T35, T3g, T3i, T1M, T3h, T38, T39; + { + E TB, T10, T3a, T3f; + TB = Tf + TA; + T10 = TM + TZ; + T11 = TB + T10; + T35 = TB - T10; + T3a = T2M + T2N; + T3f = T3b + T3e; + T3g = T3a + T3f; + T3i = T3f - T3a; + } + { + E T1o, T1L, T36, T37; + T1o = T18 + T1n; + T1L = T1B + T1K; + T1M = T1o + T1L; + T3h = T1L - T1o; + T36 = T2R + T2S; + T37 = T2W + T2X; + T38 = T36 - T37; + T39 = T36 + T37; + } + ri[WS(rs, 8)] = T11 - T1M; + ii[WS(rs, 8)] = T3g - T39; + ri[0] = T11 + T1M; + ii[0] = T39 + T3g; + ri[WS(rs, 12)] = T35 - T38; + ii[WS(rs, 12)] = T3i - T3h; + ri[WS(rs, 4)] = T35 + T38; + ii[WS(rs, 4)] = T3h + T3i; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_CEXP, 0, 9}, + {TW_CEXP, 0, 15}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 16, "t2_16", twinstr, &GENUS, {156, 68, 40, 0}, 0, 0, 0 }; + +void X(codelet_t2_16) (planner *p) { + X(kdft_dit_register) (p, t2_16, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1064 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:56 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 20 -name t2_20 -include t.h */ + +/* + * This function contains 276 FP additions, 198 FP multiplications, + * (or, 136 additions, 58 multiplications, 140 fused multiply/add), + * 142 stack variables, 4 constants, and 80 memory accesses + */ +#include "t.h" + +static void t2_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(40, rs)) { + E T59, T5i, T5k, T5e, T5c, T5d, T5j, T5f; + { + E T2, Th, Tf, T6, T5, Tl, T1p, T1n, Ti, T3, Tt, Tv, T24, T1f, T1D; + E Tb, T1P, Tm, T21, T1b, T7, T1A, Tw, T1H, T13, TA, T1L, T17, T1S, Tq; + E T1o, T2g, T1t, T2c, TO, TK; + { + E T1e, Ta, Tk, Tg; + T2 = W[0]; + Th = W[3]; + Tf = W[2]; + T6 = W[5]; + T5 = W[1]; + Tk = T2 * Th; + Tg = T2 * Tf; + T1e = Tf * T6; + Ta = T2 * T6; + Tl = FMA(T5, Tf, Tk); + T1p = FNMS(T5, Tf, Tk); + T1n = FMA(T5, Th, Tg); + Ti = FNMS(T5, Th, Tg); + T3 = W[4]; + Tt = W[6]; + Tv = W[7]; + { + E Tp, Tj, TN, TJ; + Tp = Ti * T6; + T24 = FMA(Th, T3, T1e); + T1f = FNMS(Th, T3, T1e); + T1D = FNMS(T5, T3, Ta); + Tb = FMA(T5, T3, Ta); + Tj = Ti * T3; + { + E T1a, T4, Tu, T1G; + T1a = Tf * T3; + T4 = T2 * T3; + Tu = Ti * Tt; + T1G = T2 * Tt; + { + E T12, Tz, T1K, T16; + T12 = Tf * Tt; + Tz = Ti * Tv; + T1K = T2 * Tv; + T16 = Tf * Tv; + T1P = FNMS(Tl, T6, Tj); + Tm = FMA(Tl, T6, Tj); + T21 = FNMS(Th, T6, T1a); + T1b = FMA(Th, T6, T1a); + T7 = FNMS(T5, T6, T4); + T1A = FMA(T5, T6, T4); + Tw = FMA(Tl, Tv, Tu); + T1H = FMA(T5, Tv, T1G); + T13 = FMA(Th, Tv, T12); + TA = FNMS(Tl, Tt, Tz); + T1L = FNMS(T5, Tt, T1K); + T17 = FNMS(Th, Tt, T16); + T1S = FMA(Tl, T3, Tp); + Tq = FNMS(Tl, T3, Tp); + } + } + T1o = T1n * T3; + T2g = T1n * Tv; + TN = Tm * Tv; + TJ = Tm * Tt; + T1t = T1n * T6; + T2c = T1n * Tt; + TO = FNMS(Tq, Tt, TN); + TK = FMA(Tq, Tv, TJ); + } + } + { + E Te, T2C, T4L, T57, T58, TD, T2H, T4H, T3C, T3Z, T11, T2v, T2P, T3P, T4k; + E T4v, T3u, T43, T2r, T2z, T3b, T3T, T4g, T4z, T3n, T42, T20, T2y, T34, T3S; + E T4d, T4y, T1c, T19, T1d, T3E, T1w, T2U, T1g, T1j, T1l; + { + E T2d, T2h, T2k, T1q, T1u, T2n, TL, TI, TM, T3x, TZ, T2N, TP, TS, TU; + { + E T1, T4K, T8, T9, Tc; + T1 = ri[0]; + T4K = ii[0]; + T8 = ri[WS(rs, 10)]; + T2d = FMA(T1p, Tv, T2c); + T2h = FNMS(T1p, Tt, T2g); + T2k = FMA(T1p, T6, T1o); + T1q = FNMS(T1p, T6, T1o); + T1u = FMA(T1p, T3, T1t); + T2n = FNMS(T1p, T3, T1t); + T9 = T7 * T8; + Tc = ii[WS(rs, 10)]; + { + E Tx, Ts, T2F, TC, T2E; + { + E Tn, Tr, To, T2D, T4J, Ty, TB, Td, T4I; + Tn = ri[WS(rs, 5)]; + Tr = ii[WS(rs, 5)]; + Tx = ri[WS(rs, 15)]; + Td = FMA(Tb, Tc, T9); + T4I = T7 * Tc; + To = Tm * Tn; + T2D = Tm * Tr; + Te = T1 + Td; + T2C = T1 - Td; + T4J = FNMS(Tb, T8, T4I); + Ty = Tw * Tx; + TB = ii[WS(rs, 15)]; + Ts = FMA(Tq, Tr, To); + T4L = T4J + T4K; + T57 = T4K - T4J; + T2F = Tw * TB; + TC = FMA(TA, TB, Ty); + T2E = FNMS(Tq, Tn, T2D); + } + { + E TF, TG, TH, TW, TY, T2G, T3w, TX, T2M; + TF = ri[WS(rs, 4)]; + T2G = FNMS(TA, Tx, T2F); + T58 = Ts - TC; + TD = Ts + TC; + TG = Ti * TF; + T2H = T2E - T2G; + T4H = T2E + T2G; + TH = ii[WS(rs, 4)]; + TW = ri[WS(rs, 19)]; + TY = ii[WS(rs, 19)]; + TL = ri[WS(rs, 14)]; + TI = FMA(Tl, TH, TG); + T3w = Ti * TH; + TX = Tt * TW; + T2M = Tt * TY; + TM = TK * TL; + T3x = FNMS(Tl, TF, T3w); + TZ = FMA(Tv, TY, TX); + T2N = FNMS(Tv, TW, T2M); + TP = ii[WS(rs, 14)]; + TS = ri[WS(rs, 9)]; + TU = ii[WS(rs, 9)]; + } + } + } + { + E T27, T26, T28, T3p, T2p, T39, T29, T2e, T2i; + { + E T22, T23, T25, T2l, T2o, T3o, T2m, T38; + { + E TR, T2J, T3z, TV, T2L, T4i, T3A; + T22 = ri[WS(rs, 12)]; + { + E TQ, T3y, TT, T2K; + TQ = FMA(TO, TP, TM); + T3y = TK * TP; + TT = T3 * TS; + T2K = T3 * TU; + TR = TI + TQ; + T2J = TI - TQ; + T3z = FNMS(TO, TL, T3y); + TV = FMA(T6, TU, TT); + T2L = FNMS(T6, TS, T2K); + T23 = T21 * T22; + } + T4i = T3x + T3z; + T3A = T3x - T3z; + { + E T10, T3B, T4j, T2O; + T10 = TV + TZ; + T3B = TV - TZ; + T4j = T2L + T2N; + T2O = T2L - T2N; + T3C = T3A + T3B; + T3Z = T3A - T3B; + T11 = TR - T10; + T2v = TR + T10; + T2P = T2J - T2O; + T3P = T2J + T2O; + T4k = T4i - T4j; + T4v = T4i + T4j; + T25 = ii[WS(rs, 12)]; + } + } + T2l = ri[WS(rs, 7)]; + T2o = ii[WS(rs, 7)]; + T27 = ri[WS(rs, 2)]; + T26 = FMA(T24, T25, T23); + T3o = T21 * T25; + T2m = T2k * T2l; + T38 = T2k * T2o; + T28 = T1n * T27; + T3p = FNMS(T24, T22, T3o); + T2p = FMA(T2n, T2o, T2m); + T39 = FNMS(T2n, T2l, T38); + T29 = ii[WS(rs, 2)]; + T2e = ri[WS(rs, 17)]; + T2i = ii[WS(rs, 17)]; + } + { + E T1I, T1F, T1J, T3i, T1Y, T32, T1M, T1Q, T1T; + { + E T1B, T1C, T1E, T1V, T1X, T3h, T1W, T31; + { + E T2b, T35, T3r, T2j, T37, T4e, T3s; + T1B = ri[WS(rs, 8)]; + { + E T2a, T3q, T2f, T36; + T2a = FMA(T1p, T29, T28); + T3q = T1n * T29; + T2f = T2d * T2e; + T36 = T2d * T2i; + T2b = T26 + T2a; + T35 = T26 - T2a; + T3r = FNMS(T1p, T27, T3q); + T2j = FMA(T2h, T2i, T2f); + T37 = FNMS(T2h, T2e, T36); + T1C = T1A * T1B; + } + T4e = T3p + T3r; + T3s = T3p - T3r; + { + E T2q, T3t, T4f, T3a; + T2q = T2j + T2p; + T3t = T2j - T2p; + T4f = T37 + T39; + T3a = T37 - T39; + T3u = T3s + T3t; + T43 = T3s - T3t; + T2r = T2b - T2q; + T2z = T2b + T2q; + T3b = T35 - T3a; + T3T = T35 + T3a; + T4g = T4e - T4f; + T4z = T4e + T4f; + T1E = ii[WS(rs, 8)]; + } + } + T1V = ri[WS(rs, 3)]; + T1X = ii[WS(rs, 3)]; + T1I = ri[WS(rs, 18)]; + T1F = FMA(T1D, T1E, T1C); + T3h = T1A * T1E; + T1W = Tf * T1V; + T31 = Tf * T1X; + T1J = T1H * T1I; + T3i = FNMS(T1D, T1B, T3h); + T1Y = FMA(Th, T1X, T1W); + T32 = FNMS(Th, T1V, T31); + T1M = ii[WS(rs, 18)]; + T1Q = ri[WS(rs, 13)]; + T1T = ii[WS(rs, 13)]; + } + { + E T14, T15, T18, T1r, T1v, T3D, T1s, T2T; + { + E T1O, T2Y, T3k, T1U, T30, T4b, T3l; + T14 = ri[WS(rs, 16)]; + { + E T1N, T3j, T1R, T2Z; + T1N = FMA(T1L, T1M, T1J); + T3j = T1H * T1M; + T1R = T1P * T1Q; + T2Z = T1P * T1T; + T1O = T1F + T1N; + T2Y = T1F - T1N; + T3k = FNMS(T1L, T1I, T3j); + T1U = FMA(T1S, T1T, T1R); + T30 = FNMS(T1S, T1Q, T2Z); + T15 = T13 * T14; + } + T4b = T3i + T3k; + T3l = T3i - T3k; + { + E T1Z, T3m, T4c, T33; + T1Z = T1U + T1Y; + T3m = T1U - T1Y; + T4c = T30 + T32; + T33 = T30 - T32; + T3n = T3l + T3m; + T42 = T3l - T3m; + T20 = T1O - T1Z; + T2y = T1O + T1Z; + T34 = T2Y - T33; + T3S = T2Y + T33; + T4d = T4b - T4c; + T4y = T4b + T4c; + T18 = ii[WS(rs, 16)]; + } + } + T1r = ri[WS(rs, 11)]; + T1v = ii[WS(rs, 11)]; + T1c = ri[WS(rs, 6)]; + T19 = FMA(T17, T18, T15); + T3D = T13 * T18; + T1s = T1q * T1r; + T2T = T1q * T1v; + T1d = T1b * T1c; + T3E = FNMS(T17, T14, T3D); + T1w = FMA(T1u, T1v, T1s); + T2U = FNMS(T1u, T1r, T2T); + T1g = ii[WS(rs, 6)]; + T1j = ri[WS(rs, 1)]; + T1l = ii[WS(rs, 1)]; + } + } + } + } + { + E T3J, T40, T2W, T3Q, T4M, T4E, T4F, T4U, T4S; + { + E T4X, T2u, T2w, T4w, T4W, T4r, T4p, T54, T56, T4V, T4a, T4q; + { + E T4h, TE, T4n, T53, T1z, T2s, T52; + { + E T1i, T2Q, T3G, T1m, T2S, T4l, T3H; + T4h = T4d - T4g; + T4X = T4d + T4g; + { + E T1h, T3F, T1k, T2R; + T1h = FMA(T1f, T1g, T1d); + T3F = T1b * T1g; + T1k = T2 * T1j; + T2R = T2 * T1l; + T1i = T19 + T1h; + T2Q = T19 - T1h; + T3G = FNMS(T1f, T1c, T3F); + T1m = FMA(T5, T1l, T1k); + T2S = FNMS(T5, T1j, T2R); + } + TE = Te - TD; + T2u = Te + TD; + T4l = T3E + T3G; + T3H = T3E - T3G; + { + E T1x, T3I, T4m, T2V, T1y; + T1x = T1m + T1w; + T3I = T1m - T1w; + T4m = T2S + T2U; + T2V = T2S - T2U; + T3J = T3H + T3I; + T40 = T3H - T3I; + T1y = T1i - T1x; + T2w = T1i + T1x; + T2W = T2Q - T2V; + T3Q = T2Q + T2V; + T4n = T4l - T4m; + T4w = T4l + T4m; + T53 = T11 - T1y; + T1z = T11 + T1y; + T2s = T20 + T2r; + T52 = T20 - T2r; + } + } + { + E T49, T48, T4o, T2t; + T4o = T4k - T4n; + T4W = T4k + T4n; + T49 = T1z - T2s; + T2t = T1z + T2s; + T4r = FMA(KP618033988, T4h, T4o); + T4p = FNMS(KP618033988, T4o, T4h); + T54 = FNMS(KP618033988, T53, T52); + T56 = FMA(KP618033988, T52, T53); + ri[WS(rs, 10)] = TE + T2t; + T48 = FNMS(KP250000000, T2t, TE); + T4V = T4L - T4H; + T4M = T4H + T4L; + T4a = FNMS(KP559016994, T49, T48); + T4q = FMA(KP559016994, T49, T48); + } + } + { + E T2x, T4Q, T4B, T4D, T4R, T2A, T51, T55; + { + E T4x, T50, T4Y, T4A, T4Z; + T4E = T4v + T4w; + T4x = T4v - T4w; + ri[WS(rs, 18)] = FMA(KP951056516, T4p, T4a); + ri[WS(rs, 2)] = FNMS(KP951056516, T4p, T4a); + ri[WS(rs, 6)] = FMA(KP951056516, T4r, T4q); + ri[WS(rs, 14)] = FNMS(KP951056516, T4r, T4q); + T50 = T4W - T4X; + T4Y = T4W + T4X; + T4A = T4y - T4z; + T4F = T4y + T4z; + T2x = T2v + T2w; + T4Q = T2v - T2w; + ii[WS(rs, 10)] = T4Y + T4V; + T4Z = FNMS(KP250000000, T4Y, T4V); + T4B = FMA(KP618033988, T4A, T4x); + T4D = FNMS(KP618033988, T4x, T4A); + T4R = T2y - T2z; + T2A = T2y + T2z; + T51 = FNMS(KP559016994, T50, T4Z); + T55 = FMA(KP559016994, T50, T4Z); + } + { + E T4t, T4s, T2B, T4u, T4C; + T2B = T2x + T2A; + T4t = T2x - T2A; + ii[WS(rs, 18)] = FNMS(KP951056516, T54, T51); + ii[WS(rs, 2)] = FMA(KP951056516, T54, T51); + ii[WS(rs, 14)] = FMA(KP951056516, T56, T55); + ii[WS(rs, 6)] = FNMS(KP951056516, T56, T55); + ri[0] = T2u + T2B; + T4s = FNMS(KP250000000, T2B, T2u); + T4u = FMA(KP559016994, T4t, T4s); + T4C = FNMS(KP559016994, T4t, T4s); + T4U = FNMS(KP618033988, T4Q, T4R); + T4S = FMA(KP618033988, T4R, T4Q); + ri[WS(rs, 16)] = FMA(KP951056516, T4B, T4u); + ri[WS(rs, 4)] = FNMS(KP951056516, T4B, T4u); + ri[WS(rs, 8)] = FMA(KP951056516, T4D, T4C); + ri[WS(rs, 12)] = FNMS(KP951056516, T4D, T4C); + } + } + } + { + E T3O, T5u, T5w, T5l, T5q, T5o; + { + E T5n, T5m, T2I, T4O, T3N, T3L, T2X, T5t, T4N, T5s, T3c, T3v, T3K, T4G; + T5n = T3n + T3u; + T3v = T3n - T3u; + T3K = T3C - T3J; + T5m = T3C + T3J; + T3O = T2C + T2H; + T2I = T2C - T2H; + T4O = T4E - T4F; + T4G = T4E + T4F; + T3N = FMA(KP618033988, T3v, T3K); + T3L = FNMS(KP618033988, T3K, T3v); + T2X = T2P + T2W; + T5t = T2P - T2W; + ii[0] = T4G + T4M; + T4N = FNMS(KP250000000, T4G, T4M); + T5s = T34 - T3b; + T3c = T34 + T3b; + { + E T3f, T3e, T4P, T4T, T3d, T3M, T3g; + T4P = FMA(KP559016994, T4O, T4N); + T4T = FNMS(KP559016994, T4O, T4N); + T3f = T2X - T3c; + T3d = T2X + T3c; + ii[WS(rs, 16)] = FNMS(KP951056516, T4S, T4P); + ii[WS(rs, 4)] = FMA(KP951056516, T4S, T4P); + ii[WS(rs, 12)] = FMA(KP951056516, T4U, T4T); + ii[WS(rs, 8)] = FNMS(KP951056516, T4U, T4T); + ri[WS(rs, 15)] = T2I + T3d; + T3e = FNMS(KP250000000, T3d, T2I); + T5u = FNMS(KP618033988, T5t, T5s); + T5w = FMA(KP618033988, T5s, T5t); + T5l = T58 + T57; + T59 = T57 - T58; + T3M = FMA(KP559016994, T3f, T3e); + T3g = FNMS(KP559016994, T3f, T3e); + ri[WS(rs, 7)] = FNMS(KP951056516, T3L, T3g); + ri[WS(rs, 3)] = FMA(KP951056516, T3L, T3g); + ri[WS(rs, 19)] = FNMS(KP951056516, T3N, T3M); + ri[WS(rs, 11)] = FMA(KP951056516, T3N, T3M); + T5q = T5m - T5n; + T5o = T5m + T5n; + } + } + { + E T5a, T5b, T47, T45, T5g, T5h, T3V, T3X, T41, T44, T5p, T3W, T46, T3Y; + T5a = T3Z + T40; + T41 = T3Z - T40; + T44 = T42 - T43; + T5b = T42 + T43; + ii[WS(rs, 15)] = T5o + T5l; + T5p = FNMS(KP250000000, T5o, T5l); + T47 = FNMS(KP618033988, T41, T44); + T45 = FMA(KP618033988, T44, T41); + { + E T5r, T5v, T3R, T3U; + T5r = FNMS(KP559016994, T5q, T5p); + T5v = FMA(KP559016994, T5q, T5p); + T3R = T3P + T3Q; + T5g = T3P - T3Q; + T5h = T3S - T3T; + T3U = T3S + T3T; + ii[WS(rs, 7)] = FMA(KP951056516, T5u, T5r); + ii[WS(rs, 3)] = FNMS(KP951056516, T5u, T5r); + ii[WS(rs, 19)] = FMA(KP951056516, T5w, T5v); + ii[WS(rs, 11)] = FNMS(KP951056516, T5w, T5v); + T3V = T3R + T3U; + T3X = T3R - T3U; + } + ri[WS(rs, 5)] = T3O + T3V; + T3W = FNMS(KP250000000, T3V, T3O); + T5i = FMA(KP618033988, T5h, T5g); + T5k = FNMS(KP618033988, T5g, T5h); + T46 = FNMS(KP559016994, T3X, T3W); + T3Y = FMA(KP559016994, T3X, T3W); + ri[WS(rs, 9)] = FNMS(KP951056516, T45, T3Y); + ri[WS(rs, 1)] = FMA(KP951056516, T45, T3Y); + ri[WS(rs, 17)] = FNMS(KP951056516, T47, T46); + ri[WS(rs, 13)] = FMA(KP951056516, T47, T46); + T5e = T5a - T5b; + T5c = T5a + T5b; + } + } + } + } + } + ii[WS(rs, 5)] = T5c + T59; + T5d = FNMS(KP250000000, T5c, T59); + T5j = FNMS(KP559016994, T5e, T5d); + T5f = FMA(KP559016994, T5e, T5d); + ii[WS(rs, 9)] = FMA(KP951056516, T5i, T5f); + ii[WS(rs, 1)] = FNMS(KP951056516, T5i, T5f); + ii[WS(rs, 17)] = FMA(KP951056516, T5k, T5j); + ii[WS(rs, 13)] = FNMS(KP951056516, T5k, T5j); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_CEXP, 0, 9}, + {TW_CEXP, 0, 19}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 20, "t2_20", twinstr, &GENUS, {136, 58, 140, 0}, 0, 0, 0 }; + +void X(codelet_t2_20) (planner *p) { + X(kdft_dit_register) (p, t2_20, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 20 -name t2_20 -include t.h */ + +/* + * This function contains 276 FP additions, 164 FP multiplications, + * (or, 204 additions, 92 multiplications, 72 fused multiply/add), + * 123 stack variables, 4 constants, and 80 memory accesses + */ +#include "t.h" + +static void t2_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(40, rs)) { + E T2, T5, Tg, Ti, Tk, To, T1h, T1f, T6, T3, T8, T14, T1Q, Tc, T1O; + E T1v, T18, T1t, T1n, T24, T1j, T22, Tq, Tu, T1E, T1G, Tx, Ty, Tz, TJ; + E T1Z, TB, T1X, T1A, TZ, TL, T1y, TX; + { + E T7, T16, Ta, T13, T4, T17, Tb, T12; + { + E Th, Tn, Tj, Tm; + T2 = W[0]; + T5 = W[1]; + Tg = W[2]; + Ti = W[3]; + Th = T2 * Tg; + Tn = T5 * Tg; + Tj = T5 * Ti; + Tm = T2 * Ti; + Tk = Th - Tj; + To = Tm + Tn; + T1h = Tm - Tn; + T1f = Th + Tj; + T6 = W[5]; + T7 = T5 * T6; + T16 = Tg * T6; + Ta = T2 * T6; + T13 = Ti * T6; + T3 = W[4]; + T4 = T2 * T3; + T17 = Ti * T3; + Tb = T5 * T3; + T12 = Tg * T3; + } + T8 = T4 - T7; + T14 = T12 + T13; + T1Q = T16 + T17; + Tc = Ta + Tb; + T1O = T12 - T13; + T1v = Ta - Tb; + T18 = T16 - T17; + T1t = T4 + T7; + { + E T1l, T1m, T1g, T1i; + T1l = T1f * T6; + T1m = T1h * T3; + T1n = T1l + T1m; + T24 = T1l - T1m; + T1g = T1f * T3; + T1i = T1h * T6; + T1j = T1g - T1i; + T22 = T1g + T1i; + { + E Tl, Tp, Ts, Tt; + Tl = Tk * T3; + Tp = To * T6; + Tq = Tl + Tp; + Ts = Tk * T6; + Tt = To * T3; + Tu = Ts - Tt; + T1E = Tl - Tp; + T1G = Ts + Tt; + Tx = W[6]; + Ty = W[7]; + Tz = FMA(Tk, Tx, To * Ty); + TJ = FMA(Tq, Tx, Tu * Ty); + T1Z = FNMS(T1h, Tx, T1f * Ty); + TB = FNMS(To, Tx, Tk * Ty); + T1X = FMA(T1f, Tx, T1h * Ty); + T1A = FNMS(T5, Tx, T2 * Ty); + TZ = FNMS(Ti, Tx, Tg * Ty); + TL = FNMS(Tu, Tx, Tq * Ty); + T1y = FMA(T2, Tx, T5 * Ty); + TX = FMA(Tg, Tx, Ti * Ty); + } + } + } + { + E TF, T2b, T4A, T4J, T2K, T3r, T4a, T4m, T1N, T28, T29, T3C, T3F, T4o, T3X; + E T3Y, T44, T2f, T2g, T2h, T2n, T2s, T4L, T3g, T3h, T4w, T3n, T3o, T3p, T30; + E T35, T36, TW, T1r, T1s, T3J, T3M, T4n, T3U, T3V, T43, T2c, T2d, T2e, T2y; + E T2D, T4K, T3d, T3e, T4v, T3k, T3l, T3m, T2P, T2U, T2V; + { + E T1, T48, Te, T47, Tw, T2H, TD, T2I, T9, Td; + T1 = ri[0]; + T48 = ii[0]; + T9 = ri[WS(rs, 10)]; + Td = ii[WS(rs, 10)]; + Te = FMA(T8, T9, Tc * Td); + T47 = FNMS(Tc, T9, T8 * Td); + { + E Tr, Tv, TA, TC; + Tr = ri[WS(rs, 5)]; + Tv = ii[WS(rs, 5)]; + Tw = FMA(Tq, Tr, Tu * Tv); + T2H = FNMS(Tu, Tr, Tq * Tv); + TA = ri[WS(rs, 15)]; + TC = ii[WS(rs, 15)]; + TD = FMA(Tz, TA, TB * TC); + T2I = FNMS(TB, TA, Tz * TC); + } + { + E Tf, TE, T4y, T4z; + Tf = T1 + Te; + TE = Tw + TD; + TF = Tf - TE; + T2b = Tf + TE; + T4y = T48 - T47; + T4z = Tw - TD; + T4A = T4y - T4z; + T4J = T4z + T4y; + } + { + E T2G, T2J, T46, T49; + T2G = T1 - Te; + T2J = T2H - T2I; + T2K = T2G - T2J; + T3r = T2G + T2J; + T46 = T2H + T2I; + T49 = T47 + T48; + T4a = T46 + T49; + T4m = T49 - T46; + } + } + { + E T1D, T3A, T2l, T2W, T27, T3E, T2r, T34, T1M, T3B, T2m, T2Z, T1W, T3D, T2q; + E T31; + { + E T1x, T2j, T1C, T2k; + { + E T1u, T1w, T1z, T1B; + T1u = ri[WS(rs, 8)]; + T1w = ii[WS(rs, 8)]; + T1x = FMA(T1t, T1u, T1v * T1w); + T2j = FNMS(T1v, T1u, T1t * T1w); + T1z = ri[WS(rs, 18)]; + T1B = ii[WS(rs, 18)]; + T1C = FMA(T1y, T1z, T1A * T1B); + T2k = FNMS(T1A, T1z, T1y * T1B); + } + T1D = T1x + T1C; + T3A = T2j + T2k; + T2l = T2j - T2k; + T2W = T1x - T1C; + } + { + E T21, T32, T26, T33; + { + E T1Y, T20, T23, T25; + T1Y = ri[WS(rs, 17)]; + T20 = ii[WS(rs, 17)]; + T21 = FMA(T1X, T1Y, T1Z * T20); + T32 = FNMS(T1Z, T1Y, T1X * T20); + T23 = ri[WS(rs, 7)]; + T25 = ii[WS(rs, 7)]; + T26 = FMA(T22, T23, T24 * T25); + T33 = FNMS(T24, T23, T22 * T25); + } + T27 = T21 + T26; + T3E = T32 + T33; + T2r = T21 - T26; + T34 = T32 - T33; + } + { + E T1I, T2X, T1L, T2Y; + { + E T1F, T1H, T1J, T1K; + T1F = ri[WS(rs, 13)]; + T1H = ii[WS(rs, 13)]; + T1I = FMA(T1E, T1F, T1G * T1H); + T2X = FNMS(T1G, T1F, T1E * T1H); + T1J = ri[WS(rs, 3)]; + T1K = ii[WS(rs, 3)]; + T1L = FMA(Tg, T1J, Ti * T1K); + T2Y = FNMS(Ti, T1J, Tg * T1K); + } + T1M = T1I + T1L; + T3B = T2X + T2Y; + T2m = T1I - T1L; + T2Z = T2X - T2Y; + } + { + E T1S, T2o, T1V, T2p; + { + E T1P, T1R, T1T, T1U; + T1P = ri[WS(rs, 12)]; + T1R = ii[WS(rs, 12)]; + T1S = FMA(T1O, T1P, T1Q * T1R); + T2o = FNMS(T1Q, T1P, T1O * T1R); + T1T = ri[WS(rs, 2)]; + T1U = ii[WS(rs, 2)]; + T1V = FMA(T1f, T1T, T1h * T1U); + T2p = FNMS(T1h, T1T, T1f * T1U); + } + T1W = T1S + T1V; + T3D = T2o + T2p; + T2q = T2o - T2p; + T31 = T1S - T1V; + } + T1N = T1D - T1M; + T28 = T1W - T27; + T29 = T1N + T28; + T3C = T3A - T3B; + T3F = T3D - T3E; + T4o = T3C + T3F; + T3X = T3A + T3B; + T3Y = T3D + T3E; + T44 = T3X + T3Y; + T2f = T1D + T1M; + T2g = T1W + T27; + T2h = T2f + T2g; + T2n = T2l + T2m; + T2s = T2q + T2r; + T4L = T2n + T2s; + T3g = T2l - T2m; + T3h = T2q - T2r; + T4w = T3g + T3h; + T3n = T2W + T2Z; + T3o = T31 + T34; + T3p = T3n + T3o; + T30 = T2W - T2Z; + T35 = T31 - T34; + T36 = T30 + T35; + } + { + E TO, T3H, T2w, T2L, T1q, T3L, T2C, T2T, TV, T3I, T2x, T2O, T1b, T3K, T2B; + E T2Q; + { + E TI, T2u, TN, T2v; + { + E TG, TH, TK, TM; + TG = ri[WS(rs, 4)]; + TH = ii[WS(rs, 4)]; + TI = FMA(Tk, TG, To * TH); + T2u = FNMS(To, TG, Tk * TH); + TK = ri[WS(rs, 14)]; + TM = ii[WS(rs, 14)]; + TN = FMA(TJ, TK, TL * TM); + T2v = FNMS(TL, TK, TJ * TM); + } + TO = TI + TN; + T3H = T2u + T2v; + T2w = T2u - T2v; + T2L = TI - TN; + } + { + E T1e, T2R, T1p, T2S; + { + E T1c, T1d, T1k, T1o; + T1c = ri[WS(rs, 1)]; + T1d = ii[WS(rs, 1)]; + T1e = FMA(T2, T1c, T5 * T1d); + T2R = FNMS(T5, T1c, T2 * T1d); + T1k = ri[WS(rs, 11)]; + T1o = ii[WS(rs, 11)]; + T1p = FMA(T1j, T1k, T1n * T1o); + T2S = FNMS(T1n, T1k, T1j * T1o); + } + T1q = T1e + T1p; + T3L = T2R + T2S; + T2C = T1e - T1p; + T2T = T2R - T2S; + } + { + E TR, T2M, TU, T2N; + { + E TP, TQ, TS, TT; + TP = ri[WS(rs, 9)]; + TQ = ii[WS(rs, 9)]; + TR = FMA(T3, TP, T6 * TQ); + T2M = FNMS(T6, TP, T3 * TQ); + TS = ri[WS(rs, 19)]; + TT = ii[WS(rs, 19)]; + TU = FMA(Tx, TS, Ty * TT); + T2N = FNMS(Ty, TS, Tx * TT); + } + TV = TR + TU; + T3I = T2M + T2N; + T2x = TR - TU; + T2O = T2M - T2N; + } + { + E T11, T2z, T1a, T2A; + { + E TY, T10, T15, T19; + TY = ri[WS(rs, 16)]; + T10 = ii[WS(rs, 16)]; + T11 = FMA(TX, TY, TZ * T10); + T2z = FNMS(TZ, TY, TX * T10); + T15 = ri[WS(rs, 6)]; + T19 = ii[WS(rs, 6)]; + T1a = FMA(T14, T15, T18 * T19); + T2A = FNMS(T18, T15, T14 * T19); + } + T1b = T11 + T1a; + T3K = T2z + T2A; + T2B = T2z - T2A; + T2Q = T11 - T1a; + } + TW = TO - TV; + T1r = T1b - T1q; + T1s = TW + T1r; + T3J = T3H - T3I; + T3M = T3K - T3L; + T4n = T3J + T3M; + T3U = T3H + T3I; + T3V = T3K + T3L; + T43 = T3U + T3V; + T2c = TO + TV; + T2d = T1b + T1q; + T2e = T2c + T2d; + T2y = T2w + T2x; + T2D = T2B + T2C; + T4K = T2y + T2D; + T3d = T2w - T2x; + T3e = T2B - T2C; + T4v = T3d + T3e; + T3k = T2L + T2O; + T3l = T2Q + T2T; + T3m = T3k + T3l; + T2P = T2L - T2O; + T2U = T2Q - T2T; + T2V = T2P + T2U; + } + { + E T3y, T2a, T3x, T3O, T3Q, T3G, T3N, T3P, T3z; + T3y = KP559016994 * (T1s - T29); + T2a = T1s + T29; + T3x = FNMS(KP250000000, T2a, TF); + T3G = T3C - T3F; + T3N = T3J - T3M; + T3O = FNMS(KP587785252, T3N, KP951056516 * T3G); + T3Q = FMA(KP951056516, T3N, KP587785252 * T3G); + ri[WS(rs, 10)] = TF + T2a; + T3P = T3y + T3x; + ri[WS(rs, 14)] = T3P - T3Q; + ri[WS(rs, 6)] = T3P + T3Q; + T3z = T3x - T3y; + ri[WS(rs, 2)] = T3z - T3O; + ri[WS(rs, 18)] = T3z + T3O; + } + { + E T4r, T4p, T4q, T4l, T4u, T4j, T4k, T4t, T4s; + T4r = KP559016994 * (T4n - T4o); + T4p = T4n + T4o; + T4q = FNMS(KP250000000, T4p, T4m); + T4j = T1N - T28; + T4k = TW - T1r; + T4l = FNMS(KP587785252, T4k, KP951056516 * T4j); + T4u = FMA(KP951056516, T4k, KP587785252 * T4j); + ii[WS(rs, 10)] = T4p + T4m; + T4t = T4r + T4q; + ii[WS(rs, 6)] = T4t - T4u; + ii[WS(rs, 14)] = T4u + T4t; + T4s = T4q - T4r; + ii[WS(rs, 2)] = T4l + T4s; + ii[WS(rs, 18)] = T4s - T4l; + } + { + E T3R, T2i, T3S, T40, T42, T3W, T3Z, T41, T3T; + T3R = KP559016994 * (T2e - T2h); + T2i = T2e + T2h; + T3S = FNMS(KP250000000, T2i, T2b); + T3W = T3U - T3V; + T3Z = T3X - T3Y; + T40 = FMA(KP951056516, T3W, KP587785252 * T3Z); + T42 = FNMS(KP587785252, T3W, KP951056516 * T3Z); + ri[0] = T2b + T2i; + T41 = T3S - T3R; + ri[WS(rs, 12)] = T41 - T42; + ri[WS(rs, 8)] = T41 + T42; + T3T = T3R + T3S; + ri[WS(rs, 4)] = T3T - T40; + ri[WS(rs, 16)] = T3T + T40; + } + { + E T4e, T45, T4f, T4d, T4i, T4b, T4c, T4h, T4g; + T4e = KP559016994 * (T43 - T44); + T45 = T43 + T44; + T4f = FNMS(KP250000000, T45, T4a); + T4b = T2c - T2d; + T4c = T2f - T2g; + T4d = FMA(KP951056516, T4b, KP587785252 * T4c); + T4i = FNMS(KP587785252, T4b, KP951056516 * T4c); + ii[0] = T45 + T4a; + T4h = T4f - T4e; + ii[WS(rs, 8)] = T4h - T4i; + ii[WS(rs, 12)] = T4i + T4h; + T4g = T4e + T4f; + ii[WS(rs, 4)] = T4d + T4g; + ii[WS(rs, 16)] = T4g - T4d; + } + { + E T39, T37, T38, T2F, T3b, T2t, T2E, T3c, T3a; + T39 = KP559016994 * (T2V - T36); + T37 = T2V + T36; + T38 = FNMS(KP250000000, T37, T2K); + T2t = T2n - T2s; + T2E = T2y - T2D; + T2F = FNMS(KP587785252, T2E, KP951056516 * T2t); + T3b = FMA(KP951056516, T2E, KP587785252 * T2t); + ri[WS(rs, 15)] = T2K + T37; + T3c = T39 + T38; + ri[WS(rs, 11)] = T3b + T3c; + ri[WS(rs, 19)] = T3c - T3b; + T3a = T38 - T39; + ri[WS(rs, 3)] = T2F + T3a; + ri[WS(rs, 7)] = T3a - T2F; + } + { + E T4O, T4M, T4N, T4S, T4U, T4Q, T4R, T4T, T4P; + T4O = KP559016994 * (T4K - T4L); + T4M = T4K + T4L; + T4N = FNMS(KP250000000, T4M, T4J); + T4Q = T30 - T35; + T4R = T2P - T2U; + T4S = FNMS(KP587785252, T4R, KP951056516 * T4Q); + T4U = FMA(KP951056516, T4R, KP587785252 * T4Q); + ii[WS(rs, 15)] = T4M + T4J; + T4T = T4O + T4N; + ii[WS(rs, 11)] = T4T - T4U; + ii[WS(rs, 19)] = T4U + T4T; + T4P = T4N - T4O; + ii[WS(rs, 3)] = T4P - T4S; + ii[WS(rs, 7)] = T4S + T4P; + } + { + E T3q, T3s, T3t, T3j, T3v, T3f, T3i, T3w, T3u; + T3q = KP559016994 * (T3m - T3p); + T3s = T3m + T3p; + T3t = FNMS(KP250000000, T3s, T3r); + T3f = T3d - T3e; + T3i = T3g - T3h; + T3j = FMA(KP951056516, T3f, KP587785252 * T3i); + T3v = FNMS(KP587785252, T3f, KP951056516 * T3i); + ri[WS(rs, 5)] = T3r + T3s; + T3w = T3t - T3q; + ri[WS(rs, 13)] = T3v + T3w; + ri[WS(rs, 17)] = T3w - T3v; + T3u = T3q + T3t; + ri[WS(rs, 1)] = T3j + T3u; + ri[WS(rs, 9)] = T3u - T3j; + } + { + E T4x, T4B, T4C, T4G, T4I, T4E, T4F, T4H, T4D; + T4x = KP559016994 * (T4v - T4w); + T4B = T4v + T4w; + T4C = FNMS(KP250000000, T4B, T4A); + T4E = T3k - T3l; + T4F = T3n - T3o; + T4G = FMA(KP951056516, T4E, KP587785252 * T4F); + T4I = FNMS(KP587785252, T4E, KP951056516 * T4F); + ii[WS(rs, 5)] = T4B + T4A; + T4H = T4C - T4x; + ii[WS(rs, 13)] = T4H - T4I; + ii[WS(rs, 17)] = T4I + T4H; + T4D = T4x + T4C; + ii[WS(rs, 1)] = T4D - T4G; + ii[WS(rs, 9)] = T4G + T4D; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_CEXP, 0, 9}, + {TW_CEXP, 0, 19}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 20, "t2_20", twinstr, &GENUS, {204, 92, 72, 0}, 0, 0, 0 }; + +void X(codelet_t2_20) (planner *p) { + X(kdft_dit_register) (p, t2_20, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1619 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:57 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 25 -name t2_25 -include t.h */ + +/* + * This function contains 440 FP additions, 434 FP multiplications, + * (or, 84 additions, 78 multiplications, 356 fused multiply/add), + * 215 stack variables, 47 constants, and 100 memory accesses + */ +#include "t.h" + +static void t2_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP860541664, +0.860541664367944677098261680920518816412804187); + DK(KP681693190, +0.681693190061530575150324149145440022633095390); + DK(KP560319534, +0.560319534973832390111614715371676131169633784); + DK(KP949179823, +0.949179823508441261575555465843363271711583843); + DK(KP557913902, +0.557913902031834264187699648465567037992437152); + DK(KP249506682, +0.249506682107067890488084201715862638334226305); + DK(KP906616052, +0.906616052148196230441134447086066874408359177); + DK(KP968479752, +0.968479752739016373193524836781420152702090879); + DK(KP621716863, +0.621716863012209892444754556304102309693593202); + DK(KP614372930, +0.614372930789563808870829930444362096004872855); + DK(KP845997307, +0.845997307939530944175097360758058292389769300); + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP994076283, +0.994076283785401014123185814696322018529298887); + DK(KP734762448, +0.734762448793050413546343770063151342619912334); + DK(KP772036680, +0.772036680810363904029489473607579825330539880); + DK(KP062914667, +0.062914667253649757225485955897349402364686947); + DK(KP803003575, +0.803003575438660414833440593570376004635464850); + DK(KP943557151, +0.943557151597354104399655195398983005179443399); + DK(KP554608978, +0.554608978404018097464974850792216217022558774); + DK(KP248028675, +0.248028675328619457762448260696444630363259177); + DK(KP921177326, +0.921177326965143320250447435415066029359282231); + DK(KP833417178, +0.833417178328688677408962550243238843138996060); + DK(KP726211448, +0.726211448929902658173535992263577167607493062); + DK(KP525970792, +0.525970792408939708442463226536226366643874659); + DK(KP541454447, +0.541454447536312777046285590082819509052033189); + DK(KP242145790, +0.242145790282157779872542093866183953459003101); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP559154169, +0.559154169276087864842202529084232643714075927); + DK(KP683113946, +0.683113946453479238701949862233725244439656928); + DK(KP851038619, +0.851038619207379630836264138867114231259902550); + DK(KP912575812, +0.912575812670962425556968549836277086778922727); + DK(KP912018591, +0.912018591466481957908415381764119056233607330); + DK(KP470564281, +0.470564281212251493087595091036643380879947982); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP827271945, +0.827271945972475634034355757144307982555673741); + DK(KP126329378, +0.126329378446108174786050455341811215027378105); + DK(KP904730450, +0.904730450839922351881287709692877908104763647); + DK(KP831864738, +0.831864738706457140726048799369896829771167132); + DK(KP871714437, +0.871714437527667770979999223229522602943903653); + DK(KP549754652, +0.549754652192770074288023275540779861653779767); + DK(KP634619297, +0.634619297544148100711287640319130485732531031); + DK(KP939062505, +0.939062505817492352556001843133229685779824606); + DK(KP256756360, +0.256756360367726783319498520922669048172391148); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) { + E T8c, T7k, T7i, T8i, T8g, T8b, T7j, T7b, T8d, T8h; + { + E T2, T8, T3, T6, Tk, Tv, TS, T4, Ta, TD, T2L, T10, Tm, T5, Tc; + T2 = W[0]; + T8 = W[4]; + T3 = W[2]; + T6 = W[3]; + Tk = W[6]; + Tv = T2 * T8; + TS = T3 * T8; + T4 = T2 * T3; + Ta = T2 * T6; + TD = T8 * Tk; + T2L = T2 * Tk; + T10 = T3 * Tk; + Tm = W[7]; + T5 = W[1]; + Tc = W[5]; + { + E T7G, T86, T4s, T6a, T4g, TN, T4f, T7C, T7s, T7B, T5q, T6k, T3a, T5j, T6n; + E T6m, T5g, T4a, T5n, T6j, T6C, T4G, T6z, T4z, T1v, T3t, T6y, T4w, T6B, T4D; + E T6v, T4O, T6s, T4V, T21, T3H, T6r, T4S, T6u, T4L, T26, T3K, T5a, T2A, T3U; + E T53, T2c, T3M, T2k, T3O; + { + E T11, T1b, Tb, T19, T7, T2m, TT, T15, T2Q, TX, T2p, T1g, T2a, T2e, T2i; + E T27, T1c, T1O, T1K, T1q, T1m, T2x, T2t, T1W, T1S, T2G, T3Y, T2N, T5p, T38; + E T48, T5i, T2K, T40, T2S, T41; + { + E T2M, T1j, T1l, T2X, T2U, T35, T31, T7r, T7p, T7o, T2O, T2R; + { + E T1, Tj, T4j, TK, T4q, TC, T4o, Tt, T4l; + { + E TE, Tw, TI, TA, Th, Tr, Tn, Td, Te, Ti, T14, T2P, TH, Tx, TB; + T1 = ri[0]; + T11 = FMA(T6, Tm, T10); + T14 = T3 * Tm; + T2P = T2 * Tm; + TH = T8 * Tm; + T2M = FMA(T5, Tm, T2L); + T1b = FNMS(T5, T3, Ta); + Tb = FMA(T5, T3, Ta); + T19 = FMA(T5, T6, T4); + T7 = FNMS(T5, T6, T4); + T2m = FNMS(T6, Tc, TS); + TT = FMA(T6, Tc, TS); + TE = FMA(Tc, Tm, TD); + T1j = FMA(T5, Tc, Tv); + Tw = FNMS(T5, Tc, Tv); + { + E TW, Tz, T1f, T2d; + TW = T3 * Tc; + Tz = T2 * Tc; + T15 = FNMS(T6, Tk, T14); + T2Q = FNMS(T5, Tk, T2P); + TI = FNMS(Tc, Tk, TH); + T1f = T19 * Tc; + T2d = T19 * Tk; + { + E T2h, T1a, Tg, Tq; + T2h = T19 * Tm; + T1a = T19 * T8; + Tg = T7 * Tc; + Tq = T7 * Tm; + { + E Tl, T9, T1p, T1k; + Tl = T7 * Tk; + T9 = T7 * T8; + T1p = T1j * Tm; + T1k = T1j * Tk; + { + E T34, T30, T1N, T1J; + T34 = TT * Tm; + T30 = TT * Tk; + T1N = Tw * Tm; + T1J = Tw * Tk; + TX = FNMS(T6, T8, TW); + T2p = FMA(T6, T8, TW); + TA = FMA(T5, T8, Tz); + T1l = FNMS(T5, T8, Tz); + T1g = FMA(T1b, T8, T1f); + T2a = FNMS(T1b, T8, T1f); + T2e = FMA(T1b, Tm, T2d); + T2i = FNMS(T1b, Tk, T2h); + T27 = FMA(T1b, Tc, T1a); + T1c = FNMS(T1b, Tc, T1a); + T2X = FMA(Tb, T8, Tg); + Th = FNMS(Tb, T8, Tg); + Tr = FNMS(Tb, Tk, Tq); + Tn = FMA(Tb, Tm, Tl); + Td = FMA(Tb, Tc, T9); + T2U = FNMS(Tb, Tc, T9); + T35 = FNMS(TX, Tk, T34); + T31 = FMA(TX, Tm, T30); + T1O = FNMS(TA, Tk, T1N); + T1K = FMA(TA, Tm, T1J); + T1q = FNMS(T1l, Tk, T1p); + T1m = FMA(T1l, Tm, T1k); + { + E T2w, T2s, T1V, T1R; + T2w = T27 * Tm; + T2s = T27 * Tk; + T1V = Td * Tm; + T1R = Td * Tk; + T2x = FNMS(T2a, Tk, T2w); + T2t = FMA(T2a, Tm, T2s); + T1W = FNMS(Th, Tk, T1V); + T1S = FMA(Th, Tm, T1R); + T7r = ii[0]; + Te = ri[WS(rs, 5)]; + Ti = ii[WS(rs, 5)]; + } + } + } + } + } + { + E TF, TJ, Tf, T4i, TG, T4p; + TF = ri[WS(rs, 15)]; + TJ = ii[WS(rs, 15)]; + Tf = Td * Te; + T4i = Td * Ti; + TG = TE * TF; + T4p = TE * TJ; + Tj = FMA(Th, Ti, Tf); + T4j = FNMS(Th, Te, T4i); + TK = FMA(TI, TJ, TG); + T4q = FNMS(TI, TF, T4p); + } + Tx = ri[WS(rs, 10)]; + TB = ii[WS(rs, 10)]; + { + E To, Ts, Ty, T4n, Tp, T4k; + To = ri[WS(rs, 20)]; + Ts = ii[WS(rs, 20)]; + Ty = Tw * Tx; + T4n = Tw * TB; + Tp = Tn * To; + T4k = Tn * Ts; + TC = FMA(TA, TB, Ty); + T4o = FNMS(TA, Tx, T4n); + Tt = FMA(Tr, Ts, Tp); + T4l = FNMS(Tr, To, T4k); + } + } + { + E TL, T7F, T4r, Tu, T7E, T4m, TM; + TL = TC + TK; + T7F = TC - TK; + T4r = T4o - T4q; + T7p = T4o + T4q; + Tu = Tj + Tt; + T7E = Tj - Tt; + T4m = T4j - T4l; + T7o = T4j + T4l; + T7G = FMA(KP618033988, T7F, T7E); + T86 = FNMS(KP618033988, T7E, T7F); + T4s = FMA(KP618033988, T4r, T4m); + T6a = FNMS(KP618033988, T4m, T4r); + T4g = Tu - TL; + TM = Tu + TL; + TN = T1 + TM; + T4f = FNMS(KP250000000, TM, T1); + } + } + { + E T2D, T2F, T7q, T2E, T3X; + T2D = ri[WS(rs, 3)]; + T2F = ii[WS(rs, 3)]; + T7C = T7o - T7p; + T7q = T7o + T7p; + T2E = T3 * T2D; + T3X = T3 * T2F; + { + E T2V, T2W, T2Y, T32, T36; + T2V = ri[WS(rs, 13)]; + T7s = T7q + T7r; + T7B = FNMS(KP250000000, T7q, T7r); + T2G = FMA(T6, T2F, T2E); + T3Y = FNMS(T6, T2D, T3X); + T2W = T2U * T2V; + T2Y = ii[WS(rs, 13)]; + T32 = ri[WS(rs, 18)]; + T36 = ii[WS(rs, 18)]; + { + E T2H, T2I, T2J, T3Z; + { + E T2Z, T45, T37, T47, T44, T33, T46; + T2H = ri[WS(rs, 8)]; + T2Z = FMA(T2X, T2Y, T2W); + T44 = T2U * T2Y; + T33 = T31 * T32; + T46 = T31 * T36; + T2I = T1j * T2H; + T45 = FNMS(T2X, T2V, T44); + T37 = FMA(T35, T36, T33); + T47 = FNMS(T35, T32, T46); + T2J = ii[WS(rs, 8)]; + T2N = ri[WS(rs, 23)]; + T5p = T2Z - T37; + T38 = T2Z + T37; + T48 = T45 + T47; + T5i = T47 - T45; + T3Z = T1j * T2J; + T2O = T2M * T2N; + T2R = ii[WS(rs, 23)]; + } + T2K = FMA(T1l, T2J, T2I); + T40 = FNMS(T1l, T2H, T3Z); + } + } + } + T2S = FMA(T2Q, T2R, T2O); + T41 = T2M * T2R; + } + { + E TR, T3h, T1t, T4F, T3r, T4y, TZ, T3j, T17, T3l; + { + E T12, T16, T13, T3k; + { + E TO, TP, T5m, T5l, TQ; + { + E T2T, T5o, T42, T5f, T39; + TO = ri[WS(rs, 1)]; + T2T = T2K + T2S; + T5o = T2K - T2S; + T42 = FNMS(T2Q, T2N, T41); + TP = T2 * TO; + T5q = FMA(KP618033988, T5p, T5o); + T6k = FNMS(KP618033988, T5o, T5p); + T5f = T38 - T2T; + T39 = T2T + T38; + { + E T43, T5h, T5e, T49; + T43 = T40 + T42; + T5h = T42 - T40; + T5e = FNMS(KP250000000, T39, T2G); + T3a = T2G + T39; + T5j = FMA(KP618033988, T5i, T5h); + T6n = FNMS(KP618033988, T5h, T5i); + T5m = T48 - T43; + T49 = T43 + T48; + T6m = FMA(KP559016994, T5f, T5e); + T5g = FNMS(KP559016994, T5f, T5e); + T5l = FNMS(KP250000000, T49, T3Y); + T4a = T3Y + T49; + TQ = ii[WS(rs, 1)]; + } + } + { + E T1n, T1r, T1i, T1o, T3o, T3p; + { + E T1d, T1h, T1e, T3n, T3g; + T1d = ri[WS(rs, 11)]; + T1h = ii[WS(rs, 11)]; + T5n = FNMS(KP559016994, T5m, T5l); + T6j = FMA(KP559016994, T5m, T5l); + TR = FMA(T5, TQ, TP); + T3g = T2 * TQ; + T1e = T1c * T1d; + T3n = T1c * T1h; + T1n = ri[WS(rs, 16)]; + T3h = FNMS(T5, TO, T3g); + T1r = ii[WS(rs, 16)]; + T1i = FMA(T1g, T1h, T1e); + T1o = T1m * T1n; + T3o = FNMS(T1g, T1d, T3n); + T3p = T1m * T1r; + } + { + E TU, TY, TV, T3i, T3q, T1s; + TU = ri[WS(rs, 6)]; + T1s = FMA(T1q, T1r, T1o); + TY = ii[WS(rs, 6)]; + T3q = FNMS(T1q, T1n, T3p); + TV = TT * TU; + T1t = T1i + T1s; + T4F = T1s - T1i; + T3i = TT * TY; + T3r = T3o + T3q; + T4y = T3q - T3o; + T12 = ri[WS(rs, 21)]; + T16 = ii[WS(rs, 21)]; + TZ = FMA(TX, TY, TV); + T3j = FNMS(TX, TU, T3i); + T13 = T11 * T12; + T3k = T11 * T16; + } + } + } + T17 = FMA(T15, T16, T13); + T3l = FNMS(T15, T12, T3k); + } + { + E T1z, T3v, T4N, T1Z, T3F, T4U, T1D, T3x, T1H, T3z; + { + E T1E, T1G, T1F, T3y; + { + E T1w, T1y, T1x, T4v, T4C, T4u, T4B, T3u, T18, T4E; + T1w = ri[WS(rs, 4)]; + T1y = ii[WS(rs, 4)]; + T18 = TZ + T17; + T4E = T17 - TZ; + { + E T3m, T4x, T1u, T3s; + T3m = T3j + T3l; + T4x = T3j - T3l; + T1x = T7 * T1w; + T6C = FNMS(KP618033988, T4E, T4F); + T4G = FMA(KP618033988, T4F, T4E); + T1u = T18 + T1t; + T4v = T18 - T1t; + T6z = FMA(KP618033988, T4x, T4y); + T4z = FNMS(KP618033988, T4y, T4x); + T3s = T3m + T3r; + T4C = T3m - T3r; + T1v = TR + T1u; + T4u = FNMS(KP250000000, T1u, TR); + T3t = T3h + T3s; + T4B = FNMS(KP250000000, T3s, T3h); + T3u = T7 * T1y; + } + T6y = FNMS(KP559016994, T4v, T4u); + T4w = FMA(KP559016994, T4v, T4u); + T6B = FNMS(KP559016994, T4C, T4B); + T4D = FMA(KP559016994, T4C, T4B); + T1z = FMA(Tb, T1y, T1x); + T3v = FNMS(Tb, T1w, T3u); + } + { + E T1Q, T3C, T1Y, T3E; + { + E T1L, T1P, T1T, T1X, T1M, T3B, T1U, T3D; + T1L = ri[WS(rs, 14)]; + T1P = ii[WS(rs, 14)]; + T1T = ri[WS(rs, 19)]; + T1X = ii[WS(rs, 19)]; + T1M = T1K * T1L; + T3B = T1K * T1P; + T1U = T1S * T1T; + T3D = T1S * T1X; + T1Q = FMA(T1O, T1P, T1M); + T3C = FNMS(T1O, T1L, T3B); + T1Y = FMA(T1W, T1X, T1U); + T3E = FNMS(T1W, T1T, T3D); + } + { + E T1A, T1C, T1B, T3w; + T1A = ri[WS(rs, 9)]; + T1C = ii[WS(rs, 9)]; + T4N = T1Y - T1Q; + T1Z = T1Q + T1Y; + T3F = T3C + T3E; + T4U = T3E - T3C; + T1B = T8 * T1A; + T3w = T8 * T1C; + T1E = ri[WS(rs, 24)]; + T1G = ii[WS(rs, 24)]; + T1D = FMA(Tc, T1C, T1B); + T3x = FNMS(Tc, T1A, T3w); + T1F = Tk * T1E; + T3y = Tk * T1G; + } + } + T1H = FMA(Tm, T1G, T1F); + T3z = FNMS(Tm, T1E, T3y); + } + { + E T2f, T2j, T2g, T3N; + { + E T23, T25, T24, T4R, T4K, T4Q, T4J, T3J, T1I, T4M; + T23 = ri[WS(rs, 2)]; + T25 = ii[WS(rs, 2)]; + T1I = T1D + T1H; + T4M = T1H - T1D; + { + E T3A, T4T, T20, T3G; + T3A = T3x + T3z; + T4T = T3z - T3x; + T24 = T19 * T23; + T6v = FNMS(KP618033988, T4M, T4N); + T4O = FMA(KP618033988, T4N, T4M); + T20 = T1I + T1Z; + T4R = T1I - T1Z; + T6s = FNMS(KP618033988, T4T, T4U); + T4V = FMA(KP618033988, T4U, T4T); + T3G = T3A + T3F; + T4K = T3F - T3A; + T21 = T1z + T20; + T4Q = FNMS(KP250000000, T20, T1z); + T3H = T3v + T3G; + T4J = FNMS(KP250000000, T3G, T3v); + T3J = T19 * T25; + } + T6r = FNMS(KP559016994, T4R, T4Q); + T4S = FMA(KP559016994, T4R, T4Q); + T6u = FMA(KP559016994, T4K, T4J); + T4L = FNMS(KP559016994, T4K, T4J); + T26 = FMA(T1b, T25, T24); + T3K = FNMS(T1b, T23, T3J); + } + { + E T2r, T3R, T2z, T3T; + { + E T2n, T2q, T2u, T2y, T2o, T3Q, T2v, T3S; + T2n = ri[WS(rs, 12)]; + T2q = ii[WS(rs, 12)]; + T2u = ri[WS(rs, 17)]; + T2y = ii[WS(rs, 17)]; + T2o = T2m * T2n; + T3Q = T2m * T2q; + T2v = T2t * T2u; + T3S = T2t * T2y; + T2r = FMA(T2p, T2q, T2o); + T3R = FNMS(T2p, T2n, T3Q); + T2z = FMA(T2x, T2y, T2v); + T3T = FNMS(T2x, T2u, T3S); + } + { + E T28, T2b, T29, T3L; + T28 = ri[WS(rs, 7)]; + T2b = ii[WS(rs, 7)]; + T5a = T2z - T2r; + T2A = T2r + T2z; + T3U = T3R + T3T; + T53 = T3R - T3T; + T29 = T27 * T28; + T3L = T27 * T2b; + T2f = ri[WS(rs, 22)]; + T2j = ii[WS(rs, 22)]; + T2c = FMA(T2a, T2b, T29); + T3M = FNMS(T2a, T28, T3L); + T2g = T2e * T2f; + T3N = T2e * T2j; + } + } + T2k = FMA(T2i, T2j, T2g); + T3O = FNMS(T2i, T2f, T3N); + } + } + } + } + { + E T7l, T5b, T6d, T54, T6g, T51, T6f, T7m, T6c, T58, T4e, T4c, T7A, T7y, T4d; + E T3f; + { + E T7w, T22, T7x, T3b, T3I, T3c, T3e, T3d; + T7l = T3t + T3H; + T3I = T3t - T3H; + { + E T2l, T59, T3P, T52; + T2l = T2c + T2k; + T59 = T2k - T2c; + T3P = T3M + T3O; + T52 = T3O - T3M; + T5b = FMA(KP618033988, T5a, T59); + T6d = FNMS(KP618033988, T59, T5a); + { + E T50, T2B, T57, T3V; + T50 = T2A - T2l; + T2B = T2l + T2A; + T54 = FNMS(KP618033988, T53, T52); + T6g = FMA(KP618033988, T52, T53); + T57 = T3U - T3P; + T3V = T3P + T3U; + { + E T4Z, T2C, T56, T3W, T4b; + T4Z = FNMS(KP250000000, T2B, T26); + T2C = T26 + T2B; + T56 = FNMS(KP250000000, T3V, T3K); + T3W = T3K + T3V; + T7w = T1v - T21; + T22 = T1v + T21; + T51 = FNMS(KP559016994, T50, T4Z); + T6f = FMA(KP559016994, T50, T4Z); + T4b = T3W - T4a; + T7m = T3W + T4a; + T6c = FMA(KP559016994, T57, T56); + T58 = FNMS(KP559016994, T57, T56); + T7x = T2C - T3a; + T3b = T2C + T3a; + T4e = FNMS(KP618033988, T3I, T4b); + T4c = FMA(KP618033988, T4b, T3I); + } + } + } + T3c = T22 + T3b; + T3e = T22 - T3b; + ri[0] = TN + T3c; + T3d = FNMS(KP250000000, T3c, TN); + T7A = FNMS(KP618033988, T7w, T7x); + T7y = FMA(KP618033988, T7x, T7w); + T4d = FNMS(KP559016994, T3e, T3d); + T3f = FMA(KP559016994, T3e, T3d); + } + { + E T69, T85, T7Y, T68, T66, T84, T82, T7X, T67, T5Z; + { + E T4t, T5H, T5Q, T7T, T7H, T5P, T5M, T5L, T5A, T7O, T5D, T7P, T7K, T7M, T5u; + E T5w, T5K, T63, T61, T5U, T7D, T7z, T7v; + { + E T7u, T7t, T4h, T7n; + T69 = FNMS(KP559016994, T4g, T4f); + T4h = FMA(KP559016994, T4g, T4f); + T7u = T7l - T7m; + T7n = T7l + T7m; + ri[WS(rs, 5)] = FMA(KP951056516, T4c, T3f); + ri[WS(rs, 20)] = FNMS(KP951056516, T4c, T3f); + ri[WS(rs, 15)] = FMA(KP951056516, T4e, T4d); + ri[WS(rs, 10)] = FNMS(KP951056516, T4e, T4d); + ii[0] = T7n + T7s; + T7t = FNMS(KP250000000, T7n, T7s); + T4t = FMA(KP951056516, T4s, T4h); + T5H = FNMS(KP951056516, T4s, T4h); + T7D = FMA(KP559016994, T7C, T7B); + T85 = FNMS(KP559016994, T7C, T7B); + T7z = FNMS(KP559016994, T7u, T7t); + T7v = FMA(KP559016994, T7u, T7t); + } + { + E T5I, T5J, T5S, T4P, T5y, T4I, T5C, T5s, T4W, T5T, T55, T5c; + { + E T4A, T4H, T5k, T5r; + T5Q = FNMS(KP951056516, T4z, T4w); + T4A = FMA(KP951056516, T4z, T4w); + T7T = FMA(KP951056516, T7G, T7D); + T7H = FNMS(KP951056516, T7G, T7D); + ii[WS(rs, 20)] = FMA(KP951056516, T7y, T7v); + ii[WS(rs, 5)] = FNMS(KP951056516, T7y, T7v); + ii[WS(rs, 15)] = FNMS(KP951056516, T7A, T7z); + ii[WS(rs, 10)] = FMA(KP951056516, T7A, T7z); + T4H = FMA(KP951056516, T4G, T4D); + T5P = FNMS(KP951056516, T4G, T4D); + T5I = FMA(KP951056516, T5j, T5g); + T5k = FNMS(KP951056516, T5j, T5g); + T5r = FNMS(KP951056516, T5q, T5n); + T5J = FMA(KP951056516, T5q, T5n); + T5S = FNMS(KP951056516, T4O, T4L); + T4P = FMA(KP951056516, T4O, T4L); + T5y = FNMS(KP256756360, T4A, T4H); + T4I = FMA(KP256756360, T4H, T4A); + T5C = FNMS(KP939062505, T5k, T5r); + T5s = FMA(KP939062505, T5r, T5k); + T4W = FNMS(KP951056516, T4V, T4S); + T5T = FMA(KP951056516, T4V, T4S); + T5M = FMA(KP951056516, T54, T51); + T55 = FNMS(KP951056516, T54, T51); + T5c = FMA(KP951056516, T5b, T58); + T5L = FNMS(KP951056516, T5b, T58); + } + { + E T4Y, T5t, T5z, T4X; + T5z = FNMS(KP634619297, T4P, T4W); + T4X = FMA(KP634619297, T4W, T4P); + { + E T5B, T5d, T7I, T7J; + T5B = FNMS(KP549754652, T55, T5c); + T5d = FMA(KP549754652, T5c, T55); + T7I = FNMS(KP871714437, T5z, T5y); + T5A = FMA(KP871714437, T5z, T5y); + T4Y = FMA(KP871714437, T4X, T4I); + T7O = FNMS(KP871714437, T4X, T4I); + T7J = FMA(KP831864738, T5C, T5B); + T5D = FNMS(KP831864738, T5C, T5B); + T5t = FMA(KP831864738, T5s, T5d); + T7P = FNMS(KP831864738, T5s, T5d); + T7K = FMA(KP904730450, T7J, T7I); + T7M = FNMS(KP904730450, T7J, T7I); + } + T5u = FMA(KP904730450, T5t, T4Y); + T5w = FNMS(KP904730450, T5t, T4Y); + } + T5K = FNMS(KP126329378, T5J, T5I); + T63 = FMA(KP126329378, T5I, T5J); + T61 = FNMS(KP827271945, T5S, T5T); + T5U = FMA(KP827271945, T5T, T5S); + } + { + E T65, T81, T62, T80, T7W, T5W, T5Y; + { + E T5O, T5V, T64, T5N; + ri[WS(rs, 1)] = FMA(KP968583161, T5u, T4t); + T64 = FMA(KP470564281, T5L, T5M); + T5N = FNMS(KP470564281, T5M, T5L); + { + E T60, T5R, T7U, T7V; + T60 = FNMS(KP634619297, T5P, T5Q); + T5R = FMA(KP634619297, T5Q, T5P); + T7U = FMA(KP912018591, T64, T63); + T65 = FNMS(KP912018591, T64, T63); + T5O = FNMS(KP912018591, T5N, T5K); + T81 = FMA(KP912018591, T5N, T5K); + T7V = FNMS(KP912575812, T61, T60); + T62 = FMA(KP912575812, T61, T60); + T5V = FNMS(KP912575812, T5U, T5R); + T80 = FMA(KP912575812, T5U, T5R); + T7W = FMA(KP851038619, T7V, T7U); + T7Y = FNMS(KP851038619, T7V, T7U); + ii[WS(rs, 1)] = FMA(KP968583161, T7K, T7H); + } + T5W = FNMS(KP851038619, T5V, T5O); + T5Y = FMA(KP851038619, T5V, T5O); + } + { + E T5G, T5E, T7S, T7Q, T7L, T5F, T5x, T5v, T5X, T7R, T7N; + T5G = FNMS(KP683113946, T5A, T5D); + T5E = FMA(KP559154169, T5D, T5A); + ii[WS(rs, 4)] = FNMS(KP992114701, T7W, T7T); + ri[WS(rs, 4)] = FNMS(KP992114701, T5W, T5H); + T5v = FNMS(KP242145790, T5u, T4t); + T7S = FNMS(KP683113946, T7O, T7P); + T7Q = FMA(KP559154169, T7P, T7O); + T7L = FNMS(KP242145790, T7K, T7H); + T5F = FNMS(KP541454447, T5w, T5v); + T5x = FMA(KP541454447, T5w, T5v); + T68 = FMA(KP525970792, T62, T65); + T66 = FNMS(KP726211448, T65, T62); + ri[WS(rs, 11)] = FNMS(KP833417178, T5G, T5F); + ri[WS(rs, 16)] = FMA(KP833417178, T5G, T5F); + ri[WS(rs, 21)] = FNMS(KP921177326, T5E, T5x); + ri[WS(rs, 6)] = FMA(KP921177326, T5E, T5x); + T7R = FNMS(KP541454447, T7M, T7L); + T7N = FMA(KP541454447, T7M, T7L); + T5X = FMA(KP248028675, T5W, T5H); + ii[WS(rs, 11)] = FMA(KP833417178, T7S, T7R); + ii[WS(rs, 16)] = FNMS(KP833417178, T7S, T7R); + ii[WS(rs, 21)] = FMA(KP921177326, T7Q, T7N); + ii[WS(rs, 6)] = FNMS(KP921177326, T7Q, T7N); + T84 = FNMS(KP525970792, T80, T81); + T82 = FMA(KP726211448, T81, T80); + T7X = FMA(KP248028675, T7W, T7T); + T67 = FNMS(KP554608978, T5Y, T5X); + T5Z = FMA(KP554608978, T5Y, T5X); + } + } + } + { + E T6b, T6T, T8j, T87, T72, T71, T6P, T8r, T6M, T8q, T7f, T6W, T8m, T8o, T6I; + E T6G, T7d, T76, T7g, T6Z, T83, T7Z; + ri[WS(rs, 14)] = FNMS(KP943557151, T68, T67); + ri[WS(rs, 19)] = FMA(KP943557151, T68, T67); + ri[WS(rs, 24)] = FMA(KP803003575, T66, T5Z); + ri[WS(rs, 9)] = FNMS(KP803003575, T66, T5Z); + T83 = FNMS(KP554608978, T7Y, T7X); + T7Z = FMA(KP554608978, T7Y, T7X); + T6b = FMA(KP951056516, T6a, T69); + T6T = FNMS(KP951056516, T6a, T69); + ii[WS(rs, 14)] = FMA(KP943557151, T84, T83); + ii[WS(rs, 19)] = FNMS(KP943557151, T84, T83); + ii[WS(rs, 24)] = FMA(KP803003575, T82, T7Z); + ii[WS(rs, 9)] = FNMS(KP803003575, T82, T7Z); + { + E T6X, T6Y, T74, T6N, T6i, T75, T6U, T6V, T6t, T6L, T6E, T6O, T6p, T6w; + { + E T6A, T6D, T6e, T6h, T6l, T6o; + T6X = FNMS(KP951056516, T6d, T6c); + T6e = FMA(KP951056516, T6d, T6c); + T6h = FMA(KP951056516, T6g, T6f); + T6Y = FNMS(KP951056516, T6g, T6f); + T74 = FMA(KP951056516, T6z, T6y); + T6A = FNMS(KP951056516, T6z, T6y); + T8j = FNMS(KP951056516, T86, T85); + T87 = FMA(KP951056516, T86, T85); + T6N = FNMS(KP062914667, T6e, T6h); + T6i = FMA(KP062914667, T6h, T6e); + T6D = FMA(KP951056516, T6C, T6B); + T75 = FNMS(KP951056516, T6C, T6B); + T6U = FMA(KP951056516, T6k, T6j); + T6l = FNMS(KP951056516, T6k, T6j); + T6o = FNMS(KP951056516, T6n, T6m); + T6V = FMA(KP951056516, T6n, T6m); + T72 = FMA(KP951056516, T6s, T6r); + T6t = FNMS(KP951056516, T6s, T6r); + T6L = FNMS(KP939062505, T6A, T6D); + T6E = FMA(KP939062505, T6D, T6A); + T6O = FMA(KP827271945, T6l, T6o); + T6p = FNMS(KP827271945, T6o, T6l); + T6w = FMA(KP951056516, T6v, T6u); + T71 = FNMS(KP951056516, T6v, T6u); + } + { + E T8k, T6q, T6K, T6x, T8l, T6F; + T8k = FMA(KP772036680, T6O, T6N); + T6P = FNMS(KP772036680, T6O, T6N); + T6q = FMA(KP772036680, T6p, T6i); + T8r = FNMS(KP772036680, T6p, T6i); + T6K = FMA(KP126329378, T6t, T6w); + T6x = FNMS(KP126329378, T6w, T6t); + T8l = FNMS(KP734762448, T6L, T6K); + T6M = FMA(KP734762448, T6L, T6K); + T6F = FNMS(KP734762448, T6E, T6x); + T8q = FMA(KP734762448, T6E, T6x); + T7f = FNMS(KP062914667, T6U, T6V); + T6W = FMA(KP062914667, T6V, T6U); + T8m = FMA(KP994076283, T8l, T8k); + T8o = FNMS(KP994076283, T8l, T8k); + T6I = FMA(KP994076283, T6F, T6q); + T6G = FNMS(KP994076283, T6F, T6q); + } + T7d = FNMS(KP549754652, T74, T75); + T76 = FMA(KP549754652, T75, T74); + T7g = FNMS(KP634619297, T6X, T6Y); + T6Z = FMA(KP634619297, T6Y, T6X); + } + { + E T88, T7h, T70, T8f, T7c, T73; + ri[WS(rs, 3)] = FMA(KP998026728, T6G, T6b); + T88 = FMA(KP845997307, T7g, T7f); + T7h = FNMS(KP845997307, T7g, T7f); + T70 = FMA(KP845997307, T6Z, T6W); + T8f = FNMS(KP845997307, T6Z, T6W); + T7c = FMA(KP470564281, T71, T72); + T73 = FNMS(KP470564281, T72, T71); + ii[WS(rs, 3)] = FNMS(KP998026728, T8m, T8j); + { + E T7e, T8e, T8a, T78, T7a, T8u, T8s, T8t, T8p, T79; + { + E T6S, T6Q, T6H, T89, T77, T6J, T6R, T8n; + T6S = FMA(KP614372930, T6M, T6P); + T6Q = FNMS(KP621716863, T6P, T6M); + T89 = FNMS(KP968479752, T7d, T7c); + T7e = FMA(KP968479752, T7d, T7c); + T77 = FMA(KP968479752, T76, T73); + T8e = FNMS(KP968479752, T76, T73); + T8a = FMA(KP906616052, T89, T88); + T8c = FNMS(KP906616052, T89, T88); + T78 = FMA(KP906616052, T77, T70); + T7a = FNMS(KP906616052, T77, T70); + T6H = FNMS(KP249506682, T6G, T6b); + ii[WS(rs, 2)] = FNMS(KP998026728, T8a, T87); + ri[WS(rs, 2)] = FMA(KP998026728, T78, T6T); + T8u = FNMS(KP614372930, T8q, T8r); + T8s = FMA(KP621716863, T8r, T8q); + T6J = FNMS(KP557913902, T6I, T6H); + T6R = FMA(KP557913902, T6I, T6H); + T8n = FMA(KP249506682, T8m, T8j); + ri[WS(rs, 18)] = FNMS(KP949179823, T6S, T6R); + ri[WS(rs, 13)] = FMA(KP949179823, T6S, T6R); + ri[WS(rs, 8)] = FMA(KP943557151, T6Q, T6J); + ri[WS(rs, 23)] = FNMS(KP943557151, T6Q, T6J); + T8t = FNMS(KP557913902, T8o, T8n); + T8p = FMA(KP557913902, T8o, T8n); + } + T7k = FNMS(KP560319534, T7e, T7h); + T7i = FMA(KP681693190, T7h, T7e); + ii[WS(rs, 23)] = FMA(KP943557151, T8s, T8p); + ii[WS(rs, 8)] = FNMS(KP943557151, T8s, T8p); + ii[WS(rs, 13)] = FMA(KP949179823, T8u, T8t); + ii[WS(rs, 18)] = FNMS(KP949179823, T8u, T8t); + T79 = FNMS(KP249506682, T78, T6T); + T8i = FNMS(KP560319534, T8e, T8f); + T8g = FMA(KP681693190, T8f, T8e); + T8b = FMA(KP249506682, T8a, T87); + T7j = FMA(KP557913902, T7a, T79); + T7b = FNMS(KP557913902, T7a, T79); + } + } + } + } + } + } + } + ri[WS(rs, 12)] = FNMS(KP949179823, T7k, T7j); + ri[WS(rs, 17)] = FMA(KP949179823, T7k, T7j); + ri[WS(rs, 7)] = FMA(KP860541664, T7i, T7b); + ri[WS(rs, 22)] = FNMS(KP860541664, T7i, T7b); + T8d = FMA(KP557913902, T8c, T8b); + T8h = FNMS(KP557913902, T8c, T8b); + ii[WS(rs, 12)] = FNMS(KP949179823, T8i, T8h); + ii[WS(rs, 17)] = FMA(KP949179823, T8i, T8h); + ii[WS(rs, 22)] = FNMS(KP860541664, T8g, T8d); + ii[WS(rs, 7)] = FMA(KP860541664, T8g, T8d); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_CEXP, 0, 9}, + {TW_CEXP, 0, 24}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 25, "t2_25", twinstr, &GENUS, {84, 78, 356, 0}, 0, 0, 0 }; + +void X(codelet_t2_25) (planner *p) { + X(kdft_dit_register) (p, t2_25, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 25 -name t2_25 -include t.h */ + +/* + * This function contains 440 FP additions, 340 FP multiplications, + * (or, 280 additions, 180 multiplications, 160 fused multiply/add), + * 149 stack variables, 20 constants, and 100 memory accesses + */ +#include "t.h" + +static void t2_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP062790519, +0.062790519529313376076178224565631133122484832); + DK(KP425779291, +0.425779291565072648862502445744251703979973042); + DK(KP904827052, +0.904827052466019527713668647932697593970413911); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP125333233, +0.125333233564304245373118759816508793942918247); + DK(KP637423989, +0.637423989748689710176712811676016195434917298); + DK(KP770513242, +0.770513242775789230803009636396177847271667672); + DK(KP684547105, +0.684547105928688673732283357621209269889519233); + DK(KP728968627, +0.728968627421411523146730319055259111372571664); + DK(KP481753674, +0.481753674101715274987191502872129653528542010); + DK(KP876306680, +0.876306680043863587308115903922062583399064238); + DK(KP844327925, +0.844327925502015078548558063966681505381659241); + DK(KP535826794, +0.535826794978996618271308767867639978063575346); + DK(KP248689887, +0.248689887164854788242283746006447968417567406); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) { + E T2, T5, T3, T6, T8, Td, T16, T14, Te, T9, T21, T23, Tx, TR, T1g; + E TB, T1f, TV, T1Q, Tg, T1S, Tk, T18, T2s, T1c, T2q, Tn, To, Tp, Tr; + E T28, T2x, TY, T2k, T2m, T2v, TG, TE, T10, T1h, T1E, T26, T1B, T1G, T1V; + E T1X, T1z, T1j; + { + E Tw, TT, Tz, TQ, Tv, TU, TA, TP; + { + E T4, Tc, T7, Tb; + T2 = W[0]; + T5 = W[1]; + T3 = W[2]; + T6 = W[3]; + T4 = T2 * T3; + Tc = T5 * T3; + T7 = T5 * T6; + Tb = T2 * T6; + T8 = T4 - T7; + Td = Tb + Tc; + T16 = Tb - Tc; + T14 = T4 + T7; + Te = W[5]; + Tw = T5 * Te; + TT = T3 * Te; + Tz = T2 * Te; + TQ = T6 * Te; + T9 = W[4]; + Tv = T2 * T9; + TU = T6 * T9; + TA = T5 * T9; + TP = T3 * T9; + } + T21 = TP - TQ; + T23 = TT + TU; + { + E T15, T17, Ta, Tf, T1a, T1b, Ti, Tj; + Tx = Tv - Tw; + TR = TP + TQ; + T1g = Tz - TA; + TB = Tz + TA; + T1f = Tv + Tw; + TV = TT - TU; + T15 = T14 * T9; + T17 = T16 * Te; + T1Q = T15 + T17; + Ta = T8 * T9; + Tf = Td * Te; + Tg = Ta + Tf; + T1a = T14 * Te; + T1b = T16 * T9; + T1S = T1a - T1b; + Ti = T8 * Te; + Tj = Td * T9; + Tk = Ti - Tj; + T18 = T15 - T17; + T2s = Ti + Tj; + T1c = T1a + T1b; + T2q = Ta - Tf; + Tn = W[6]; + To = W[7]; + Tp = FMA(T8, Tn, Td * To); + Tr = FNMS(Td, Tn, T8 * To); + T28 = FNMS(T1S, Tn, T1Q * To); + T2x = FNMS(TV, Tn, TR * To); + TY = FMA(T3, Tn, T6 * To); + T2k = FMA(T2, Tn, T5 * To); + T2m = FNMS(T5, Tn, T2 * To); + T2v = FMA(TR, Tn, TV * To); + TG = FNMS(Te, Tn, T9 * To); + TE = FMA(T9, Tn, Te * To); + T10 = FNMS(T6, Tn, T3 * To); + T1h = FMA(T1f, Tn, T1g * To); + T1E = FMA(Tg, Tn, Tk * To); + T26 = FMA(T1Q, Tn, T1S * To); + T1B = FNMS(TB, Tn, Tx * To); + T1G = FNMS(Tk, Tn, Tg * To); + T1V = FMA(T14, Tn, T16 * To); + T1X = FNMS(T16, Tn, T14 * To); + T1z = FMA(Tx, Tn, TB * To); + T1j = FNMS(T1g, Tn, T1f * To); + } + } + { + E T1, T6v, T2F, T6I, TK, T2G, T6u, T6J, T6N, T7c, T2O, T52, T2C, T6k, T48; + E T5X, T4L, T5s, T4j, T5W, T4K, T5v, T1o, T6g, T30, T5M, T4A, T56, T3b, T5N; + E T4B, T59, T1L, T6h, T3n, T5Q, T4D, T5g, T3y, T5P, T4E, T5d, T2d, T6j, T3L; + E T5T, T4I, T5l, T3W, T5U, T4H, T5o; + { + E Tm, T2I, Tt, T2J, Tu, T6s, TD, T2L, TI, T2M, TJ, T6t; + T1 = ri[0]; + T6v = ii[0]; + { + E Th, Tl, Tq, Ts; + Th = ri[WS(rs, 5)]; + Tl = ii[WS(rs, 5)]; + Tm = FMA(Tg, Th, Tk * Tl); + T2I = FNMS(Tk, Th, Tg * Tl); + Tq = ri[WS(rs, 20)]; + Ts = ii[WS(rs, 20)]; + Tt = FMA(Tp, Tq, Tr * Ts); + T2J = FNMS(Tr, Tq, Tp * Ts); + } + Tu = Tm + Tt; + T6s = T2I + T2J; + { + E Ty, TC, TF, TH; + Ty = ri[WS(rs, 10)]; + TC = ii[WS(rs, 10)]; + TD = FMA(Tx, Ty, TB * TC); + T2L = FNMS(TB, Ty, Tx * TC); + TF = ri[WS(rs, 15)]; + TH = ii[WS(rs, 15)]; + TI = FMA(TE, TF, TG * TH); + T2M = FNMS(TG, TF, TE * TH); + } + TJ = TD + TI; + T6t = T2L + T2M; + T2F = KP559016994 * (Tu - TJ); + T6I = KP559016994 * (T6s - T6t); + TK = Tu + TJ; + T2G = FNMS(KP250000000, TK, T1); + T6u = T6s + T6t; + T6J = FNMS(KP250000000, T6u, T6v); + { + E T6L, T6M, T2K, T2N; + T6L = Tm - Tt; + T6M = TD - TI; + T6N = FMA(KP951056516, T6L, KP587785252 * T6M); + T7c = FNMS(KP587785252, T6L, KP951056516 * T6M); + T2K = T2I - T2J; + T2N = T2L - T2M; + T2O = FMA(KP951056516, T2K, KP587785252 * T2N); + T52 = FNMS(KP587785252, T2K, KP951056516 * T2N); + } + } + { + E T2g, T4c, T43, T46, T4h, T4g, T49, T4a, T4d, T2p, T2A, T2B, T2e, T2f; + T2e = ri[WS(rs, 3)]; + T2f = ii[WS(rs, 3)]; + T2g = FMA(T3, T2e, T6 * T2f); + T4c = FNMS(T6, T2e, T3 * T2f); + { + E T2j, T41, T2z, T45, T2o, T42, T2u, T44; + { + E T2h, T2i, T2w, T2y; + T2h = ri[WS(rs, 8)]; + T2i = ii[WS(rs, 8)]; + T2j = FMA(T1f, T2h, T1g * T2i); + T41 = FNMS(T1g, T2h, T1f * T2i); + T2w = ri[WS(rs, 18)]; + T2y = ii[WS(rs, 18)]; + T2z = FMA(T2v, T2w, T2x * T2y); + T45 = FNMS(T2x, T2w, T2v * T2y); + } + { + E T2l, T2n, T2r, T2t; + T2l = ri[WS(rs, 23)]; + T2n = ii[WS(rs, 23)]; + T2o = FMA(T2k, T2l, T2m * T2n); + T42 = FNMS(T2m, T2l, T2k * T2n); + T2r = ri[WS(rs, 13)]; + T2t = ii[WS(rs, 13)]; + T2u = FMA(T2q, T2r, T2s * T2t); + T44 = FNMS(T2s, T2r, T2q * T2t); + } + T43 = T41 - T42; + T46 = T44 - T45; + T4h = T2u - T2z; + T4g = T2j - T2o; + T49 = T41 + T42; + T4a = T44 + T45; + T4d = T49 + T4a; + T2p = T2j + T2o; + T2A = T2u + T2z; + T2B = T2p + T2A; + } + T2C = T2g + T2B; + T6k = T4c + T4d; + { + E T47, T5r, T40, T5q, T3Y, T3Z; + T47 = FMA(KP951056516, T43, KP587785252 * T46); + T5r = FNMS(KP587785252, T43, KP951056516 * T46); + T3Y = KP559016994 * (T2p - T2A); + T3Z = FNMS(KP250000000, T2B, T2g); + T40 = T3Y + T3Z; + T5q = T3Z - T3Y; + T48 = T40 + T47; + T5X = T5q + T5r; + T4L = T40 - T47; + T5s = T5q - T5r; + } + { + E T4i, T5t, T4f, T5u, T4b, T4e; + T4i = FMA(KP951056516, T4g, KP587785252 * T4h); + T5t = FNMS(KP587785252, T4g, KP951056516 * T4h); + T4b = KP559016994 * (T49 - T4a); + T4e = FNMS(KP250000000, T4d, T4c); + T4f = T4b + T4e; + T5u = T4e - T4b; + T4j = T4f - T4i; + T5W = T5u - T5t; + T4K = T4i + T4f; + T5v = T5t + T5u; + } + } + { + E TO, T34, T2V, T2Y, T39, T38, T31, T32, T35, T13, T1m, T1n, TM, TN; + TM = ri[WS(rs, 1)]; + TN = ii[WS(rs, 1)]; + TO = FMA(T2, TM, T5 * TN); + T34 = FNMS(T5, TM, T2 * TN); + { + E TX, T2T, T1l, T2X, T12, T2U, T1e, T2W; + { + E TS, TW, T1i, T1k; + TS = ri[WS(rs, 6)]; + TW = ii[WS(rs, 6)]; + TX = FMA(TR, TS, TV * TW); + T2T = FNMS(TV, TS, TR * TW); + T1i = ri[WS(rs, 16)]; + T1k = ii[WS(rs, 16)]; + T1l = FMA(T1h, T1i, T1j * T1k); + T2X = FNMS(T1j, T1i, T1h * T1k); + } + { + E TZ, T11, T19, T1d; + TZ = ri[WS(rs, 21)]; + T11 = ii[WS(rs, 21)]; + T12 = FMA(TY, TZ, T10 * T11); + T2U = FNMS(T10, TZ, TY * T11); + T19 = ri[WS(rs, 11)]; + T1d = ii[WS(rs, 11)]; + T1e = FMA(T18, T19, T1c * T1d); + T2W = FNMS(T1c, T19, T18 * T1d); + } + T2V = T2T - T2U; + T2Y = T2W - T2X; + T39 = T1e - T1l; + T38 = TX - T12; + T31 = T2T + T2U; + T32 = T2W + T2X; + T35 = T31 + T32; + T13 = TX + T12; + T1m = T1e + T1l; + T1n = T13 + T1m; + } + T1o = TO + T1n; + T6g = T34 + T35; + { + E T2Z, T55, T2S, T54, T2Q, T2R; + T2Z = FMA(KP951056516, T2V, KP587785252 * T2Y); + T55 = FNMS(KP587785252, T2V, KP951056516 * T2Y); + T2Q = KP559016994 * (T13 - T1m); + T2R = FNMS(KP250000000, T1n, TO); + T2S = T2Q + T2R; + T54 = T2R - T2Q; + T30 = T2S + T2Z; + T5M = T54 + T55; + T4A = T2S - T2Z; + T56 = T54 - T55; + } + { + E T3a, T57, T37, T58, T33, T36; + T3a = FMA(KP951056516, T38, KP587785252 * T39); + T57 = FNMS(KP587785252, T38, KP951056516 * T39); + T33 = KP559016994 * (T31 - T32); + T36 = FNMS(KP250000000, T35, T34); + T37 = T33 + T36; + T58 = T36 - T33; + T3b = T37 - T3a; + T5N = T58 - T57; + T4B = T3a + T37; + T59 = T57 + T58; + } + } + { + E T1r, T3r, T3i, T3l, T3w, T3v, T3o, T3p, T3s, T1y, T1J, T1K, T1p, T1q; + T1p = ri[WS(rs, 4)]; + T1q = ii[WS(rs, 4)]; + T1r = FMA(T8, T1p, Td * T1q); + T3r = FNMS(Td, T1p, T8 * T1q); + { + E T1u, T3g, T1I, T3k, T1x, T3h, T1D, T3j; + { + E T1s, T1t, T1F, T1H; + T1s = ri[WS(rs, 9)]; + T1t = ii[WS(rs, 9)]; + T1u = FMA(T9, T1s, Te * T1t); + T3g = FNMS(Te, T1s, T9 * T1t); + T1F = ri[WS(rs, 19)]; + T1H = ii[WS(rs, 19)]; + T1I = FMA(T1E, T1F, T1G * T1H); + T3k = FNMS(T1G, T1F, T1E * T1H); + } + { + E T1v, T1w, T1A, T1C; + T1v = ri[WS(rs, 24)]; + T1w = ii[WS(rs, 24)]; + T1x = FMA(Tn, T1v, To * T1w); + T3h = FNMS(To, T1v, Tn * T1w); + T1A = ri[WS(rs, 14)]; + T1C = ii[WS(rs, 14)]; + T1D = FMA(T1z, T1A, T1B * T1C); + T3j = FNMS(T1B, T1A, T1z * T1C); + } + T3i = T3g - T3h; + T3l = T3j - T3k; + T3w = T1D - T1I; + T3v = T1u - T1x; + T3o = T3g + T3h; + T3p = T3j + T3k; + T3s = T3o + T3p; + T1y = T1u + T1x; + T1J = T1D + T1I; + T1K = T1y + T1J; + } + T1L = T1r + T1K; + T6h = T3r + T3s; + { + E T3m, T5f, T3f, T5e, T3d, T3e; + T3m = FMA(KP951056516, T3i, KP587785252 * T3l); + T5f = FNMS(KP587785252, T3i, KP951056516 * T3l); + T3d = KP559016994 * (T1y - T1J); + T3e = FNMS(KP250000000, T1K, T1r); + T3f = T3d + T3e; + T5e = T3e - T3d; + T3n = T3f + T3m; + T5Q = T5e + T5f; + T4D = T3f - T3m; + T5g = T5e - T5f; + } + { + E T3x, T5b, T3u, T5c, T3q, T3t; + T3x = FMA(KP951056516, T3v, KP587785252 * T3w); + T5b = FNMS(KP587785252, T3v, KP951056516 * T3w); + T3q = KP559016994 * (T3o - T3p); + T3t = FNMS(KP250000000, T3s, T3r); + T3u = T3q + T3t; + T5c = T3t - T3q; + T3y = T3u - T3x; + T5P = T5c - T5b; + T4E = T3x + T3u; + T5d = T5b + T5c; + } + } + { + E T1P, T3P, T3G, T3J, T3U, T3T, T3M, T3N, T3Q, T20, T2b, T2c, T1N, T1O; + T1N = ri[WS(rs, 2)]; + T1O = ii[WS(rs, 2)]; + T1P = FMA(T14, T1N, T16 * T1O); + T3P = FNMS(T16, T1N, T14 * T1O); + { + E T1U, T3E, T2a, T3I, T1Z, T3F, T25, T3H; + { + E T1R, T1T, T27, T29; + T1R = ri[WS(rs, 7)]; + T1T = ii[WS(rs, 7)]; + T1U = FMA(T1Q, T1R, T1S * T1T); + T3E = FNMS(T1S, T1R, T1Q * T1T); + T27 = ri[WS(rs, 17)]; + T29 = ii[WS(rs, 17)]; + T2a = FMA(T26, T27, T28 * T29); + T3I = FNMS(T28, T27, T26 * T29); + } + { + E T1W, T1Y, T22, T24; + T1W = ri[WS(rs, 22)]; + T1Y = ii[WS(rs, 22)]; + T1Z = FMA(T1V, T1W, T1X * T1Y); + T3F = FNMS(T1X, T1W, T1V * T1Y); + T22 = ri[WS(rs, 12)]; + T24 = ii[WS(rs, 12)]; + T25 = FMA(T21, T22, T23 * T24); + T3H = FNMS(T23, T22, T21 * T24); + } + T3G = T3E - T3F; + T3J = T3H - T3I; + T3U = T25 - T2a; + T3T = T1U - T1Z; + T3M = T3E + T3F; + T3N = T3H + T3I; + T3Q = T3M + T3N; + T20 = T1U + T1Z; + T2b = T25 + T2a; + T2c = T20 + T2b; + } + T2d = T1P + T2c; + T6j = T3P + T3Q; + { + E T3K, T5k, T3D, T5j, T3B, T3C; + T3K = FMA(KP951056516, T3G, KP587785252 * T3J); + T5k = FNMS(KP587785252, T3G, KP951056516 * T3J); + T3B = KP559016994 * (T20 - T2b); + T3C = FNMS(KP250000000, T2c, T1P); + T3D = T3B + T3C; + T5j = T3C - T3B; + T3L = T3D + T3K; + T5T = T5j + T5k; + T4I = T3D - T3K; + T5l = T5j - T5k; + } + { + E T3V, T5m, T3S, T5n, T3O, T3R; + T3V = FMA(KP951056516, T3T, KP587785252 * T3U); + T5m = FNMS(KP587785252, T3T, KP951056516 * T3U); + T3O = KP559016994 * (T3M - T3N); + T3R = FNMS(KP250000000, T3Q, T3P); + T3S = T3O + T3R; + T5n = T3R - T3O; + T3W = T3S - T3V; + T5U = T5n - T5m; + T4H = T3V + T3S; + T5o = T5m + T5n; + } + } + { + E T6m, T6o, TL, T2E, T6d, T6e, T6n, T6f; + { + E T6i, T6l, T1M, T2D; + T6i = T6g - T6h; + T6l = T6j - T6k; + T6m = FMA(KP951056516, T6i, KP587785252 * T6l); + T6o = FNMS(KP587785252, T6i, KP951056516 * T6l); + TL = T1 + TK; + T1M = T1o + T1L; + T2D = T2d + T2C; + T2E = T1M + T2D; + T6d = KP559016994 * (T1M - T2D); + T6e = FNMS(KP250000000, T2E, TL); + } + ri[0] = TL + T2E; + T6n = T6e - T6d; + ri[WS(rs, 10)] = T6n - T6o; + ri[WS(rs, 15)] = T6n + T6o; + T6f = T6d + T6e; + ri[WS(rs, 20)] = T6f - T6m; + ri[WS(rs, 5)] = T6f + T6m; + } + { + E T6C, T6D, T6w, T6r, T6x, T6y, T6E, T6z; + { + E T6A, T6B, T6p, T6q; + T6A = T1o - T1L; + T6B = T2d - T2C; + T6C = FMA(KP951056516, T6A, KP587785252 * T6B); + T6D = FNMS(KP587785252, T6A, KP951056516 * T6B); + T6w = T6u + T6v; + T6p = T6g + T6h; + T6q = T6j + T6k; + T6r = T6p + T6q; + T6x = KP559016994 * (T6p - T6q); + T6y = FNMS(KP250000000, T6r, T6w); + } + ii[0] = T6r + T6w; + T6E = T6y - T6x; + ii[WS(rs, 10)] = T6D + T6E; + ii[WS(rs, 15)] = T6E - T6D; + T6z = T6x + T6y; + ii[WS(rs, 5)] = T6z - T6C; + ii[WS(rs, 20)] = T6C + T6z; + } + { + E T2P, T4z, T6O, T70, T4m, T6T, T4n, T6S, T4U, T71, T4X, T6Z, T4O, T75, T4P; + E T74, T4s, T6P, T4v, T6H, T2H, T6K; + T2H = T2F + T2G; + T2P = T2H + T2O; + T4z = T2H - T2O; + T6K = T6I + T6J; + T6O = T6K - T6N; + T70 = T6N + T6K; + { + E T3c, T3z, T3A, T3X, T4k, T4l; + T3c = FMA(KP968583161, T30, KP248689887 * T3b); + T3z = FMA(KP535826794, T3n, KP844327925 * T3y); + T3A = T3c + T3z; + T3X = FMA(KP876306680, T3L, KP481753674 * T3W); + T4k = FMA(KP728968627, T48, KP684547105 * T4j); + T4l = T3X + T4k; + T4m = T3A + T4l; + T6T = T3X - T4k; + T4n = KP559016994 * (T3A - T4l); + T6S = T3c - T3z; + } + { + E T4S, T4T, T6X, T4V, T4W, T6Y; + T4S = FNMS(KP844327925, T4A, KP535826794 * T4B); + T4T = FNMS(KP637423989, T4E, KP770513242 * T4D); + T6X = T4S + T4T; + T4V = FMA(KP125333233, T4L, KP992114701 * T4K); + T4W = FMA(KP904827052, T4I, KP425779291 * T4H); + T6Y = T4W + T4V; + T4U = T4S - T4T; + T71 = KP559016994 * (T6X + T6Y); + T4X = T4V - T4W; + T6Z = T6X - T6Y; + } + { + E T4C, T4F, T4G, T4J, T4M, T4N; + T4C = FMA(KP535826794, T4A, KP844327925 * T4B); + T4F = FMA(KP637423989, T4D, KP770513242 * T4E); + T4G = T4C - T4F; + T4J = FNMS(KP425779291, T4I, KP904827052 * T4H); + T4M = FNMS(KP992114701, T4L, KP125333233 * T4K); + T4N = T4J + T4M; + T4O = T4G + T4N; + T75 = T4J - T4M; + T4P = KP559016994 * (T4G - T4N); + T74 = T4C + T4F; + } + { + E T4q, T4r, T6F, T4t, T4u, T6G; + T4q = FNMS(KP248689887, T30, KP968583161 * T3b); + T4r = FNMS(KP844327925, T3n, KP535826794 * T3y); + T6F = T4q + T4r; + T4t = FNMS(KP481753674, T3L, KP876306680 * T3W); + T4u = FNMS(KP684547105, T48, KP728968627 * T4j); + T6G = T4t + T4u; + T4s = T4q - T4r; + T6P = KP559016994 * (T6F - T6G); + T4v = T4t - T4u; + T6H = T6F + T6G; + } + ri[WS(rs, 1)] = T2P + T4m; + ii[WS(rs, 1)] = T6H + T6O; + ri[WS(rs, 4)] = T4z + T4O; + ii[WS(rs, 4)] = T6Z + T70; + { + E T4w, T4y, T4p, T4x, T4o; + T4w = FMA(KP951056516, T4s, KP587785252 * T4v); + T4y = FNMS(KP587785252, T4s, KP951056516 * T4v); + T4o = FNMS(KP250000000, T4m, T2P); + T4p = T4n + T4o; + T4x = T4o - T4n; + ri[WS(rs, 21)] = T4p - T4w; + ri[WS(rs, 16)] = T4x + T4y; + ri[WS(rs, 6)] = T4p + T4w; + ri[WS(rs, 11)] = T4x - T4y; + } + { + E T6U, T6V, T6R, T6W, T6Q; + T6U = FMA(KP951056516, T6S, KP587785252 * T6T); + T6V = FNMS(KP587785252, T6S, KP951056516 * T6T); + T6Q = FNMS(KP250000000, T6H, T6O); + T6R = T6P + T6Q; + T6W = T6Q - T6P; + ii[WS(rs, 6)] = T6R - T6U; + ii[WS(rs, 16)] = T6W - T6V; + ii[WS(rs, 21)] = T6U + T6R; + ii[WS(rs, 11)] = T6V + T6W; + } + { + E T4Y, T50, T4R, T4Z, T4Q; + T4Y = FMA(KP951056516, T4U, KP587785252 * T4X); + T50 = FNMS(KP587785252, T4U, KP951056516 * T4X); + T4Q = FNMS(KP250000000, T4O, T4z); + T4R = T4P + T4Q; + T4Z = T4Q - T4P; + ri[WS(rs, 24)] = T4R - T4Y; + ri[WS(rs, 19)] = T4Z + T50; + ri[WS(rs, 9)] = T4R + T4Y; + ri[WS(rs, 14)] = T4Z - T50; + } + { + E T76, T77, T73, T78, T72; + T76 = FMA(KP951056516, T74, KP587785252 * T75); + T77 = FNMS(KP587785252, T74, KP951056516 * T75); + T72 = FNMS(KP250000000, T6Z, T70); + T73 = T71 + T72; + T78 = T72 - T71; + ii[WS(rs, 9)] = T73 - T76; + ii[WS(rs, 19)] = T78 - T77; + ii[WS(rs, 24)] = T76 + T73; + ii[WS(rs, 14)] = T77 + T78; + } + } + { + E T53, T5L, T7e, T7q, T5y, T7j, T5z, T7i, T66, T7r, T69, T7p, T60, T7v, T61; + E T7u, T5E, T7f, T5H, T7b, T51, T7d; + T51 = T2G - T2F; + T53 = T51 - T52; + T5L = T51 + T52; + T7d = T6J - T6I; + T7e = T7c + T7d; + T7q = T7d - T7c; + { + E T5a, T5h, T5i, T5p, T5w, T5x; + T5a = FMA(KP876306680, T56, KP481753674 * T59); + T5h = FNMS(KP425779291, T5g, KP904827052 * T5d); + T5i = T5a + T5h; + T5p = FMA(KP535826794, T5l, KP844327925 * T5o); + T5w = FMA(KP062790519, T5s, KP998026728 * T5v); + T5x = T5p + T5w; + T5y = T5i + T5x; + T7j = T5p - T5w; + T5z = KP559016994 * (T5i - T5x); + T7i = T5a - T5h; + } + { + E T64, T65, T7n, T67, T68, T7o; + T64 = FNMS(KP684547105, T5M, KP728968627 * T5N); + T65 = FMA(KP125333233, T5Q, KP992114701 * T5P); + T7n = T64 - T65; + T67 = FNMS(KP998026728, T5T, KP062790519 * T5U); + T68 = FMA(KP770513242, T5X, KP637423989 * T5W); + T7o = T67 - T68; + T66 = T64 + T65; + T7r = KP559016994 * (T7n - T7o); + T69 = T67 + T68; + T7p = T7n + T7o; + } + { + E T5O, T5R, T5S, T5V, T5Y, T5Z; + T5O = FMA(KP728968627, T5M, KP684547105 * T5N); + T5R = FNMS(KP992114701, T5Q, KP125333233 * T5P); + T5S = T5O + T5R; + T5V = FMA(KP062790519, T5T, KP998026728 * T5U); + T5Y = FNMS(KP637423989, T5X, KP770513242 * T5W); + T5Z = T5V + T5Y; + T60 = T5S + T5Z; + T7v = T5V - T5Y; + T61 = KP559016994 * (T5S - T5Z); + T7u = T5O - T5R; + } + { + E T5C, T5D, T79, T5F, T5G, T7a; + T5C = FNMS(KP481753674, T56, KP876306680 * T59); + T5D = FMA(KP904827052, T5g, KP425779291 * T5d); + T79 = T5C - T5D; + T5F = FNMS(KP844327925, T5l, KP535826794 * T5o); + T5G = FNMS(KP998026728, T5s, KP062790519 * T5v); + T7a = T5F + T5G; + T5E = T5C + T5D; + T7f = KP559016994 * (T79 - T7a); + T5H = T5F - T5G; + T7b = T79 + T7a; + } + ri[WS(rs, 2)] = T53 + T5y; + ii[WS(rs, 2)] = T7b + T7e; + ri[WS(rs, 3)] = T5L + T60; + ii[WS(rs, 3)] = T7p + T7q; + { + E T5I, T5K, T5B, T5J, T5A; + T5I = FMA(KP951056516, T5E, KP587785252 * T5H); + T5K = FNMS(KP587785252, T5E, KP951056516 * T5H); + T5A = FNMS(KP250000000, T5y, T53); + T5B = T5z + T5A; + T5J = T5A - T5z; + ri[WS(rs, 22)] = T5B - T5I; + ri[WS(rs, 17)] = T5J + T5K; + ri[WS(rs, 7)] = T5B + T5I; + ri[WS(rs, 12)] = T5J - T5K; + } + { + E T7k, T7l, T7h, T7m, T7g; + T7k = FMA(KP951056516, T7i, KP587785252 * T7j); + T7l = FNMS(KP587785252, T7i, KP951056516 * T7j); + T7g = FNMS(KP250000000, T7b, T7e); + T7h = T7f + T7g; + T7m = T7g - T7f; + ii[WS(rs, 7)] = T7h - T7k; + ii[WS(rs, 17)] = T7m - T7l; + ii[WS(rs, 22)] = T7k + T7h; + ii[WS(rs, 12)] = T7l + T7m; + } + { + E T6a, T6c, T63, T6b, T62; + T6a = FMA(KP951056516, T66, KP587785252 * T69); + T6c = FNMS(KP587785252, T66, KP951056516 * T69); + T62 = FNMS(KP250000000, T60, T5L); + T63 = T61 + T62; + T6b = T62 - T61; + ri[WS(rs, 23)] = T63 - T6a; + ri[WS(rs, 18)] = T6b + T6c; + ri[WS(rs, 8)] = T63 + T6a; + ri[WS(rs, 13)] = T6b - T6c; + } + { + E T7w, T7x, T7t, T7y, T7s; + T7w = FMA(KP951056516, T7u, KP587785252 * T7v); + T7x = FNMS(KP587785252, T7u, KP951056516 * T7v); + T7s = FNMS(KP250000000, T7p, T7q); + T7t = T7r + T7s; + T7y = T7s - T7r; + ii[WS(rs, 8)] = T7t - T7w; + ii[WS(rs, 18)] = T7y - T7x; + ii[WS(rs, 23)] = T7w + T7t; + ii[WS(rs, 13)] = T7x + T7y; + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_CEXP, 0, 9}, + {TW_CEXP, 0, 24}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 25, "t2_25", twinstr, &GENUS, {280, 180, 160, 0}, 0, 0, 0 }; + +void X(codelet_t2_25) (planner *p) { + X(kdft_dit_register) (p, t2_25, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1844 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:54 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -name t2_32 -include t.h */ + +/* + * This function contains 488 FP additions, 350 FP multiplications, + * (or, 236 additions, 98 multiplications, 252 fused multiply/add), + * 181 stack variables, 7 constants, and 128 memory accesses + */ +#include "t.h" + +static void t2_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) { + E T9A, T9z; + { + E T2, T8, T3, T6, Te, Tr, T18, T4, Ta, Tz, T1n, T10, Ti, T5, Tc; + T2 = W[0]; + T8 = W[4]; + T3 = W[2]; + T6 = W[3]; + Te = W[6]; + Tr = T2 * T8; + T18 = T3 * T8; + T4 = T2 * T3; + Ta = T2 * T6; + Tz = T3 * Te; + T1n = T8 * Te; + T10 = T2 * Te; + Ti = W[7]; + T5 = W[1]; + Tc = W[5]; + { + E T34, T31, T2X, T2T, Tq, T46, T8H, T97, TH, T98, T4b, T8D, TZ, T7f, T4j; + E T6t, T1g, T7g, T4q, T6u, T4z, T6x, T1J, T7m, T7l, T8d, T6y, T4G, T2k, T7o; + E T7r, T8e, T6A, T4O, T6B, T4V, T6P, T5E, T7L, T3G, T6M, T61, T8n, T7I, T6I; + E T55, T7A, T2N, T6F, T5s, T8i, T7x, T5L, T62, T43, T7J, T5S, T63, T7O, T8o; + E T2U, T2R, T2V, T57, T3a, T5h, T2Y, T32, T35; + { + E T1K, T23, T1N, T26, T2b, T1U, T3C, T3j, T3z, T3f, T1R, T29, TR, Th, T2J; + E T2F, Td, TP, T3r, T3n, T2w, T2s, T3Q, T3M, T1Z, T1V, T2g, T2c; + { + E T11, T1C, TM, Tb, TJ, T7, T1o, T19, T1w, T1F, T15, T1s, T1d, T1z, TW; + E TS, Ty, T48, TG, T4a; + { + E T1, TA, Ts, TE, Tw, Tn, Tj, T8G, Tk, To, T14; + T1 = ri[0]; + TA = FMA(T6, Ti, Tz); + T1K = FNMS(T6, Ti, Tz); + T14 = T2 * Ti; + { + E T1r, TD, T1c, Tv; + T1r = T8 * Ti; + TD = T3 * Ti; + T11 = FNMS(T5, Ti, T10); + T1C = FMA(T5, Ti, T10); + TM = FMA(T5, T3, Ta); + Tb = FNMS(T5, T3, Ta); + TJ = FNMS(T5, T6, T4); + T7 = FMA(T5, T6, T4); + T1o = FMA(Tc, Ti, T1n); + T23 = FMA(T6, Tc, T18); + T19 = FNMS(T6, Tc, T18); + T1w = FNMS(T5, Tc, Tr); + Ts = FMA(T5, Tc, Tr); + T1c = T3 * Tc; + Tv = T2 * Tc; + T1F = FNMS(T5, Te, T14); + T15 = FMA(T5, Te, T14); + T1s = FNMS(Tc, Te, T1r); + T1N = FMA(T6, Te, TD); + TE = FNMS(T6, Te, TD); + { + E T1T, T3i, T3e, T1Q; + T1T = TJ * Tc; + T3i = TJ * Ti; + T3e = TJ * Te; + T1Q = TJ * T8; + { + E Tg, T2I, T2E, T9; + Tg = T7 * Tc; + T2I = T7 * Ti; + T2E = T7 * Te; + T9 = T7 * T8; + { + E T3q, T3m, T2v, T2r; + T3q = T19 * Ti; + T3m = T19 * Te; + T2v = T1w * Ti; + T2r = T1w * Te; + { + E T2W, T2S, T3P, T3L; + T2W = T23 * Ti; + T2S = T23 * Te; + T3P = Ts * Ti; + T3L = Ts * Te; + T26 = FNMS(T6, T8, T1c); + T1d = FMA(T6, T8, T1c); + T1z = FMA(T5, T8, Tv); + Tw = FNMS(T5, T8, Tv); + T2b = FNMS(TM, T8, T1T); + T1U = FMA(TM, T8, T1T); + T3C = FNMS(TM, Te, T3i); + T3j = FMA(TM, Te, T3i); + T3z = FMA(TM, Ti, T3e); + T3f = FNMS(TM, Ti, T3e); + T1R = FNMS(TM, Tc, T1Q); + T29 = FMA(TM, Tc, T1Q); + TR = FNMS(Tb, T8, Tg); + Th = FMA(Tb, T8, Tg); + T34 = FMA(Tb, Te, T2I); + T2J = FNMS(Tb, Te, T2I); + T31 = FNMS(Tb, Ti, T2E); + T2F = FMA(Tb, Ti, T2E); + Td = FNMS(Tb, Tc, T9); + TP = FMA(Tb, Tc, T9); + T2X = FNMS(T26, Te, T2W); + T2T = FMA(T26, Ti, T2S); + T3r = FNMS(T1d, Te, T3q); + T3n = FMA(T1d, Ti, T3m); + T2w = FNMS(T1z, Te, T2v); + T2s = FMA(T1z, Ti, T2r); + T3Q = FNMS(Tw, Te, T3P); + T3M = FMA(Tw, Ti, T3L); + { + E T1Y, T1S, T2f, T2a; + T1Y = T1R * Ti; + T1S = T1R * Te; + T2f = T29 * Ti; + T2a = T29 * Te; + { + E Tm, Tf, TV, TQ; + Tm = Td * Ti; + Tf = Td * Te; + TV = TP * Ti; + TQ = TP * Te; + T1Z = FNMS(T1U, Te, T1Y); + T1V = FMA(T1U, Ti, T1S); + T2g = FNMS(T2b, Te, T2f); + T2c = FMA(T2b, Ti, T2a); + Tn = FNMS(Th, Te, Tm); + Tj = FMA(Th, Ti, Tf); + TW = FNMS(TR, Te, TV); + TS = FMA(TR, Ti, TQ); + T8G = ii[0]; + } + } + } + } + } + } + } + Tk = ri[WS(rs, 16)]; + To = ii[WS(rs, 16)]; + { + E Tt, Tx, Tu, T47, TB, TF, TC, T49; + { + E Tl, T8E, Tp, T8F; + Tt = ri[WS(rs, 8)]; + Tx = ii[WS(rs, 8)]; + Tl = Tj * Tk; + T8E = Tj * To; + Tu = Ts * Tt; + T47 = Ts * Tx; + Tp = FMA(Tn, To, Tl); + T8F = FNMS(Tn, Tk, T8E); + TB = ri[WS(rs, 24)]; + TF = ii[WS(rs, 24)]; + Tq = T1 + Tp; + T46 = T1 - Tp; + T8H = T8F + T8G; + T97 = T8G - T8F; + TC = TA * TB; + T49 = TA * TF; + } + Ty = FMA(Tw, Tx, Tu); + T48 = FNMS(Tw, Tt, T47); + TG = FMA(TE, TF, TC); + T4a = FNMS(TE, TB, T49); + } + } + { + E TT, TX, TO, T4f, TU, T4g; + { + E TK, TN, TL, T4e; + TK = ri[WS(rs, 4)]; + TN = ii[WS(rs, 4)]; + TH = Ty + TG; + T98 = Ty - TG; + T4b = T48 - T4a; + T8D = T48 + T4a; + TL = TJ * TK; + T4e = TJ * TN; + TT = ri[WS(rs, 20)]; + TX = ii[WS(rs, 20)]; + TO = FMA(TM, TN, TL); + T4f = FNMS(TM, TK, T4e); + TU = TS * TT; + T4g = TS * TX; + } + { + E T17, T4m, T1a, T1e, T4d, T4i; + { + E T12, T16, TY, T4h, T13, T4l; + T12 = ri[WS(rs, 28)]; + T16 = ii[WS(rs, 28)]; + TY = FMA(TW, TX, TU); + T4h = FNMS(TW, TT, T4g); + T13 = T11 * T12; + T4l = T11 * T16; + TZ = TO + TY; + T4d = TO - TY; + T7f = T4f + T4h; + T4i = T4f - T4h; + T17 = FMA(T15, T16, T13); + T4m = FNMS(T15, T12, T4l); + } + T4j = T4d + T4i; + T6t = T4i - T4d; + T1a = ri[WS(rs, 12)]; + T1e = ii[WS(rs, 12)]; + { + E T1m, T4u, T1H, T4E, T1x, T1A, T1u, T4w, T1y, T4B; + { + E T1D, T1G, T1E, T4D; + { + E T1f, T4o, T4k, T4p; + { + E T1j, T1l, T1b, T4n, T1k, T4t; + T1j = ri[WS(rs, 2)]; + T1l = ii[WS(rs, 2)]; + T1b = T19 * T1a; + T4n = T19 * T1e; + T1k = T7 * T1j; + T4t = T7 * T1l; + T1f = FMA(T1d, T1e, T1b); + T4o = FNMS(T1d, T1a, T4n); + T1m = FMA(Tb, T1l, T1k); + T4u = FNMS(Tb, T1j, T4t); + } + T1g = T17 + T1f; + T4k = T17 - T1f; + T7g = T4m + T4o; + T4p = T4m - T4o; + T1D = ri[WS(rs, 26)]; + T1G = ii[WS(rs, 26)]; + T4q = T4k - T4p; + T6u = T4k + T4p; + T1E = T1C * T1D; + T4D = T1C * T1G; + } + { + E T1p, T1t, T1q, T4v; + T1p = ri[WS(rs, 18)]; + T1t = ii[WS(rs, 18)]; + T1H = FMA(T1F, T1G, T1E); + T4E = FNMS(T1F, T1D, T4D); + T1q = T1o * T1p; + T4v = T1o * T1t; + T1x = ri[WS(rs, 10)]; + T1A = ii[WS(rs, 10)]; + T1u = FMA(T1s, T1t, T1q); + T4w = FNMS(T1s, T1p, T4v); + T1y = T1w * T1x; + T4B = T1w * T1A; + } + } + { + E T4A, T1v, T7j, T4x, T1B, T4C; + T4A = T1m - T1u; + T1v = T1m + T1u; + T7j = T4u + T4w; + T4x = T4u - T4w; + T1B = FMA(T1z, T1A, T1y); + T4C = FNMS(T1z, T1x, T4B); + { + E T1I, T4y, T4F, T7k; + T1I = T1B + T1H; + T4y = T1B - T1H; + T4F = T4C - T4E; + T7k = T4C + T4E; + T4z = T4x - T4y; + T6x = T4x + T4y; + T1J = T1v + T1I; + T7m = T1v - T1I; + T7l = T7j - T7k; + T8d = T7j + T7k; + T6y = T4A - T4F; + T4G = T4A + T4F; + } + } + } + } + } + } + { + E T5Z, T3u, T5V, T5C, T7G, T5D, T3F, T5X, T4P, T4U; + { + E T1P, T4J, T2i, T4T, T21, T4L, T28, T4R; + { + E T1L, T1O, T1W, T20; + T1L = ri[WS(rs, 30)]; + T1O = ii[WS(rs, 30)]; + { + E T2d, T2h, T1M, T4I, T2e, T4S; + T2d = ri[WS(rs, 22)]; + T2h = ii[WS(rs, 22)]; + T1M = T1K * T1L; + T4I = T1K * T1O; + T2e = T2c * T2d; + T4S = T2c * T2h; + T1P = FMA(T1N, T1O, T1M); + T4J = FNMS(T1N, T1L, T4I); + T2i = FMA(T2g, T2h, T2e); + T4T = FNMS(T2g, T2d, T4S); + } + T1W = ri[WS(rs, 14)]; + T20 = ii[WS(rs, 14)]; + { + E T24, T27, T1X, T4K, T25, T4Q; + T24 = ri[WS(rs, 6)]; + T27 = ii[WS(rs, 6)]; + T1X = T1V * T1W; + T4K = T1V * T20; + T25 = T23 * T24; + T4Q = T23 * T27; + T21 = FMA(T1Z, T20, T1X); + T4L = FNMS(T1Z, T1W, T4K); + T28 = FMA(T26, T27, T25); + T4R = FNMS(T26, T24, T4Q); + } + } + { + E T22, T7p, T4M, T4N, T2j, T7q; + T4P = T1P - T21; + T22 = T1P + T21; + T7p = T4J + T4L; + T4M = T4J - T4L; + T4N = T28 - T2i; + T2j = T28 + T2i; + T7q = T4R + T4T; + T4U = T4R - T4T; + T2k = T22 + T2j; + T7o = T22 - T2j; + T7r = T7p - T7q; + T8e = T7p + T7q; + T6A = T4M + T4N; + T4O = T4M - T4N; + } + } + { + E T3l, T5z, T3E, T3v, T3t, T3w, T3x, T5B, T3A, T3B, T3D, T3y, T5W; + { + E T3g, T3k, T3h, T5y; + T3g = ri[WS(rs, 31)]; + T3k = ii[WS(rs, 31)]; + T3A = ri[WS(rs, 23)]; + T6B = T4P - T4U; + T4V = T4P + T4U; + T3h = T3f * T3g; + T5y = T3f * T3k; + T3B = T3z * T3A; + T3D = ii[WS(rs, 23)]; + T3l = FMA(T3j, T3k, T3h); + T5z = FNMS(T3j, T3g, T5y); + } + { + E T3o, T5Y, T3s, T3p, T5A; + T3o = ri[WS(rs, 15)]; + T3E = FMA(T3C, T3D, T3B); + T5Y = T3z * T3D; + T3s = ii[WS(rs, 15)]; + T3p = T3n * T3o; + T3v = ri[WS(rs, 7)]; + T5Z = FNMS(T3C, T3A, T5Y); + T5A = T3n * T3s; + T3t = FMA(T3r, T3s, T3p); + T3w = TP * T3v; + T3x = ii[WS(rs, 7)]; + T5B = FNMS(T3r, T3o, T5A); + } + T3u = T3l + T3t; + T5V = T3l - T3t; + T3y = FMA(TR, T3x, T3w); + T5W = TP * T3x; + T5C = T5z - T5B; + T7G = T5z + T5B; + T5D = T3y - T3E; + T3F = T3y + T3E; + T5X = FNMS(TR, T3v, T5W); + } + { + E T2L, T5q, T5m, T2z, T7v, T53, T2D, T5o; + { + E T2q, T50, T2y, T2A, T2C, T52, T2B, T5n; + { + E T2G, T2K, T2n, T4Z, T2t, T51; + { + E T2o, T2p, T60, T7H; + T2n = ri[WS(rs, 1)]; + T6P = T5C + T5D; + T5E = T5C - T5D; + T7L = T3u - T3F; + T3G = T3u + T3F; + T60 = T5X - T5Z; + T7H = T5X + T5Z; + T2o = T2 * T2n; + T2p = ii[WS(rs, 1)]; + T6M = T5V - T60; + T61 = T5V + T60; + T8n = T7G + T7H; + T7I = T7G - T7H; + T4Z = T2 * T2p; + T2q = FMA(T5, T2p, T2o); + } + T2G = ri[WS(rs, 25)]; + T2K = ii[WS(rs, 25)]; + T50 = FNMS(T5, T2n, T4Z); + { + E T2x, T2u, T2H, T5p; + T2t = ri[WS(rs, 17)]; + T2H = T2F * T2G; + T5p = T2F * T2K; + T2x = ii[WS(rs, 17)]; + T2u = T2s * T2t; + T2L = FMA(T2J, T2K, T2H); + T5q = FNMS(T2J, T2G, T5p); + T51 = T2s * T2x; + T2y = FMA(T2w, T2x, T2u); + } + T2A = ri[WS(rs, 9)]; + T2C = ii[WS(rs, 9)]; + T52 = FNMS(T2w, T2t, T51); + } + T5m = T2q - T2y; + T2z = T2q + T2y; + T2B = T8 * T2A; + T5n = T8 * T2C; + T7v = T50 + T52; + T53 = T50 - T52; + T2D = FMA(Tc, T2C, T2B); + T5o = FNMS(Tc, T2A, T5n); + } + { + E T3N, T3K, T3O, T5G, T41, T5Q, T3R, T3U, T3W; + { + E T3H, T3I, T3J, T3Y, T40, T5F, T3Z, T5P; + T3H = ri[WS(rs, 3)]; + { + E T54, T2M, T5r, T7w; + T54 = T2D - T2L; + T2M = T2D + T2L; + T5r = T5o - T5q; + T7w = T5o + T5q; + T6I = T53 + T54; + T55 = T53 - T54; + T7A = T2z - T2M; + T2N = T2z + T2M; + T6F = T5m - T5r; + T5s = T5m + T5r; + T8i = T7v + T7w; + T7x = T7v - T7w; + T3I = T3 * T3H; + } + T3J = ii[WS(rs, 3)]; + T3Y = ri[WS(rs, 11)]; + T40 = ii[WS(rs, 11)]; + T3N = ri[WS(rs, 19)]; + T3K = FMA(T6, T3J, T3I); + T5F = T3 * T3J; + T3Z = Td * T3Y; + T5P = Td * T40; + T3O = T3M * T3N; + T5G = FNMS(T6, T3H, T5F); + T41 = FMA(Th, T40, T3Z); + T5Q = FNMS(Th, T3Y, T5P); + T3R = ii[WS(rs, 19)]; + T3U = ri[WS(rs, 27)]; + T3W = ii[WS(rs, 27)]; + } + { + E T2O, T2P, T2Q, T37, T39, T56, T38, T5g; + { + E T3T, T5K, T5I, T3X, T5O, T7M, T5J; + T2O = ri[WS(rs, 5)]; + { + E T3S, T5H, T3V, T5N; + T3S = FMA(T3Q, T3R, T3O); + T5H = T3M * T3R; + T3V = Te * T3U; + T5N = Te * T3W; + T3T = T3K + T3S; + T5K = T3K - T3S; + T5I = FNMS(T3Q, T3N, T5H); + T3X = FMA(Ti, T3W, T3V); + T5O = FNMS(Ti, T3U, T5N); + T2P = T29 * T2O; + } + T7M = T5G + T5I; + T5J = T5G - T5I; + { + E T42, T5M, T7N, T5R; + T42 = T3X + T41; + T5M = T3X - T41; + T7N = T5O + T5Q; + T5R = T5O - T5Q; + T5L = T5J - T5K; + T62 = T5K + T5J; + T43 = T3T + T42; + T7J = T42 - T3T; + T5S = T5M + T5R; + T63 = T5M - T5R; + T7O = T7M - T7N; + T8o = T7M + T7N; + T2Q = ii[WS(rs, 5)]; + } + } + T37 = ri[WS(rs, 13)]; + T39 = ii[WS(rs, 13)]; + T2U = ri[WS(rs, 21)]; + T2R = FMA(T2b, T2Q, T2P); + T56 = T29 * T2Q; + T38 = T1R * T37; + T5g = T1R * T39; + T2V = T2T * T2U; + T57 = FNMS(T2b, T2O, T56); + T3a = FMA(T1U, T39, T38); + T5h = FNMS(T1U, T37, T5g); + T2Y = ii[WS(rs, 21)]; + T32 = ri[WS(rs, 29)]; + T35 = ii[WS(rs, 29)]; + } + } + } + } + } + { + E T5c, T5t, T5j, T5u, T88, T90, T8Z, T8b; + { + E T7e, T8T, T7y, T7D, T7h, T8U, T8S, T8R; + { + E T8c, T1i, T8A, T8z, T8O, T8J, T8N, T2l, T8L, T45, T8t, T8l, T8u, T8q, T3c; + E T8k, T8p, T8w, T2m; + { + E T8x, T8y, T8j, T8C, T8I; + { + E TI, T30, T5b, T59, T36, T5f, T1h, T7B, T5a; + TI = Tq + TH; + T7e = Tq - TH; + { + E T2Z, T58, T33, T5e; + T2Z = FMA(T2X, T2Y, T2V); + T58 = T2T * T2Y; + T33 = T31 * T32; + T5e = T31 * T35; + T30 = T2R + T2Z; + T5b = T2R - T2Z; + T59 = FNMS(T2X, T2U, T58); + T36 = FMA(T34, T35, T33); + T5f = FNMS(T34, T32, T5e); + T1h = TZ + T1g; + T8T = T1g - TZ; + } + T7B = T57 + T59; + T5a = T57 - T59; + { + E T3b, T5d, T7C, T5i; + T3b = T36 + T3a; + T5d = T36 - T3a; + T7C = T5f + T5h; + T5i = T5f - T5h; + T5c = T5a - T5b; + T5t = T5b + T5a; + T3c = T30 + T3b; + T7y = T3b - T30; + T5j = T5d + T5i; + T5u = T5d - T5i; + T7D = T7B - T7C; + T8j = T7B + T7C; + T8c = TI - T1h; + T1i = TI + T1h; + } + } + T8k = T8i - T8j; + T8x = T8i + T8j; + T8y = T8n + T8o; + T8p = T8n - T8o; + T7h = T7f - T7g; + T8C = T7f + T7g; + T8I = T8D + T8H; + T8U = T8H - T8D; + T8A = T8x + T8y; + T8z = T8x - T8y; + T8O = T8I - T8C; + T8J = T8C + T8I; + } + { + E T8h, T8m, T3d, T44; + T8h = T2N - T3c; + T3d = T2N + T3c; + T44 = T3G + T43; + T8m = T3G - T43; + T8N = T2k - T1J; + T2l = T1J + T2k; + T8L = T44 - T3d; + T45 = T3d + T44; + T8t = T8k - T8h; + T8l = T8h + T8k; + T8u = T8m + T8p; + T8q = T8m - T8p; + } + T8w = T1i - T2l; + T2m = T1i + T2l; + { + E T8s, T8P, T8Q, T8v; + { + E T8r, T8M, T8K, T8g, T8B, T8f; + T8S = T8q - T8l; + T8r = T8l + T8q; + T8B = T8d + T8e; + T8f = T8d - T8e; + ri[0] = T2m + T45; + ri[WS(rs, 16)] = T2m - T45; + ri[WS(rs, 8)] = T8w + T8z; + ri[WS(rs, 24)] = T8w - T8z; + T8M = T8J - T8B; + T8K = T8B + T8J; + T8g = T8c + T8f; + T8s = T8c - T8f; + T8R = T8O - T8N; + T8P = T8N + T8O; + ii[WS(rs, 24)] = T8M - T8L; + ii[WS(rs, 8)] = T8L + T8M; + ii[WS(rs, 16)] = T8K - T8A; + ii[0] = T8A + T8K; + ri[WS(rs, 4)] = FMA(KP707106781, T8r, T8g); + ri[WS(rs, 20)] = FNMS(KP707106781, T8r, T8g); + T8Q = T8t + T8u; + T8v = T8t - T8u; + } + ii[WS(rs, 20)] = FNMS(KP707106781, T8Q, T8P); + ii[WS(rs, 4)] = FMA(KP707106781, T8Q, T8P); + ri[WS(rs, 12)] = FMA(KP707106781, T8v, T8s); + ri[WS(rs, 28)] = FNMS(KP707106781, T8v, T8s); + } + } + { + E T7P, T7W, T7i, T7K, T8a, T86, T91, T8V, T8W, T7t, T7T, T7F, T92, T7Z, T89; + E T83; + { + E T7X, T7n, T7s, T7Y, T84, T85; + T7P = T7L - T7O; + T84 = T7L + T7O; + ii[WS(rs, 28)] = FNMS(KP707106781, T8S, T8R); + ii[WS(rs, 12)] = FMA(KP707106781, T8S, T8R); + T7W = T7e + T7h; + T7i = T7e - T7h; + T85 = T7I + T7J; + T7K = T7I - T7J; + T7X = T7m + T7l; + T7n = T7l - T7m; + T8a = FMA(KP414213562, T84, T85); + T86 = FNMS(KP414213562, T85, T84); + T91 = T8U - T8T; + T8V = T8T + T8U; + T7s = T7o + T7r; + T7Y = T7o - T7r; + { + E T82, T81, T7z, T7E; + T82 = T7x + T7y; + T7z = T7x - T7y; + T7E = T7A - T7D; + T81 = T7A + T7D; + T8W = T7n + T7s; + T7t = T7n - T7s; + T7T = FNMS(KP414213562, T7z, T7E); + T7F = FMA(KP414213562, T7E, T7z); + T92 = T7Y - T7X; + T7Z = T7X + T7Y; + T89 = FNMS(KP414213562, T81, T82); + T83 = FMA(KP414213562, T82, T81); + } + } + { + E T7S, T7u, T93, T95, T7U, T7Q; + T7S = FNMS(KP707106781, T7t, T7i); + T7u = FMA(KP707106781, T7t, T7i); + T93 = FMA(KP707106781, T92, T91); + T95 = FNMS(KP707106781, T92, T91); + T7U = FMA(KP414213562, T7K, T7P); + T7Q = FNMS(KP414213562, T7P, T7K); + { + E T80, T87, T8X, T8Y; + T88 = FNMS(KP707106781, T7Z, T7W); + T80 = FMA(KP707106781, T7Z, T7W); + { + E T7V, T94, T96, T7R; + T7V = T7T + T7U; + T94 = T7U - T7T; + T96 = T7F + T7Q; + T7R = T7F - T7Q; + ri[WS(rs, 30)] = FMA(KP923879532, T7V, T7S); + ri[WS(rs, 14)] = FNMS(KP923879532, T7V, T7S); + ii[WS(rs, 22)] = FNMS(KP923879532, T94, T93); + ii[WS(rs, 6)] = FMA(KP923879532, T94, T93); + ii[WS(rs, 30)] = FMA(KP923879532, T96, T95); + ii[WS(rs, 14)] = FNMS(KP923879532, T96, T95); + ri[WS(rs, 6)] = FMA(KP923879532, T7R, T7u); + ri[WS(rs, 22)] = FNMS(KP923879532, T7R, T7u); + T87 = T83 + T86; + T90 = T86 - T83; + } + T8Z = FNMS(KP707106781, T8W, T8V); + T8X = FMA(KP707106781, T8W, T8V); + T8Y = T89 + T8a; + T8b = T89 - T8a; + ri[WS(rs, 2)] = FMA(KP923879532, T87, T80); + ri[WS(rs, 18)] = FNMS(KP923879532, T87, T80); + ii[WS(rs, 18)] = FNMS(KP923879532, T8Y, T8X); + ii[WS(rs, 2)] = FMA(KP923879532, T8Y, T8X); + } + } + } + } + { + E T6s, T9o, T9n, T6v, T6N, T6Q, T6G, T6J, T9g, T9f; + { + E T6c, T4s, T9c, T4X, T9h, T9b, T9i, T6f, T5U, T6l, T64, T5k, T5v; + { + E T6d, T6e, T99, T9a, T5T; + { + E T4c, T4r, T4H, T4W; + T6s = T46 - T4b; + T4c = T46 + T4b; + ri[WS(rs, 10)] = FMA(KP923879532, T8b, T88); + ri[WS(rs, 26)] = FNMS(KP923879532, T8b, T88); + ii[WS(rs, 26)] = FNMS(KP923879532, T90, T8Z); + ii[WS(rs, 10)] = FMA(KP923879532, T90, T8Z); + T4r = T4j + T4q; + T9o = T4q - T4j; + T6d = FMA(KP414213562, T4z, T4G); + T4H = FNMS(KP414213562, T4G, T4z); + T4W = FMA(KP414213562, T4V, T4O); + T6e = FNMS(KP414213562, T4O, T4V); + T9n = T98 + T97; + T99 = T97 - T98; + T6c = FMA(KP707106781, T4r, T4c); + T4s = FNMS(KP707106781, T4r, T4c); + T9c = T4H + T4W; + T4X = T4H - T4W; + T9a = T6t + T6u; + T6v = T6t - T6u; + } + T6N = T5S - T5L; + T5T = T5L + T5S; + T9h = FNMS(KP707106781, T9a, T99); + T9b = FMA(KP707106781, T9a, T99); + T9i = T6e - T6d; + T6f = T6d + T6e; + T5U = FNMS(KP707106781, T5T, T5E); + T6l = FMA(KP707106781, T5T, T5E); + T64 = T62 + T63; + T6Q = T62 - T63; + T6G = T5j - T5c; + T5k = T5c + T5j; + T5v = T5t + T5u; + T6J = T5t - T5u; + } + { + E T6m, T6q, T6j, T6p, T9l, T9m; + { + E T68, T4Y, T6a, T66, T69, T5x, T9j, T6k, T65, T9k, T6b, T67; + T68 = FNMS(KP923879532, T4X, T4s); + T4Y = FMA(KP923879532, T4X, T4s); + T6k = FMA(KP707106781, T64, T61); + T65 = FNMS(KP707106781, T64, T61); + { + E T6i, T5l, T6h, T5w; + T6i = FMA(KP707106781, T5k, T55); + T5l = FNMS(KP707106781, T5k, T55); + T6h = FMA(KP707106781, T5v, T5s); + T5w = FNMS(KP707106781, T5v, T5s); + T6m = FNMS(KP198912367, T6l, T6k); + T6q = FMA(KP198912367, T6k, T6l); + T6a = FMA(KP668178637, T5U, T65); + T66 = FNMS(KP668178637, T65, T5U); + T6j = FMA(KP198912367, T6i, T6h); + T6p = FNMS(KP198912367, T6h, T6i); + T69 = FNMS(KP668178637, T5l, T5w); + T5x = FMA(KP668178637, T5w, T5l); + } + T9j = FMA(KP923879532, T9i, T9h); + T9l = FNMS(KP923879532, T9i, T9h); + T9k = T6a - T69; + T6b = T69 + T6a; + T9m = T5x + T66; + T67 = T5x - T66; + ii[WS(rs, 21)] = FNMS(KP831469612, T9k, T9j); + ii[WS(rs, 5)] = FMA(KP831469612, T9k, T9j); + ri[WS(rs, 5)] = FMA(KP831469612, T67, T4Y); + ri[WS(rs, 21)] = FNMS(KP831469612, T67, T4Y); + ri[WS(rs, 29)] = FMA(KP831469612, T6b, T68); + ri[WS(rs, 13)] = FNMS(KP831469612, T6b, T68); + } + { + E T6o, T9d, T9e, T6r, T6g, T6n; + T6o = FNMS(KP923879532, T6f, T6c); + T6g = FMA(KP923879532, T6f, T6c); + T6n = T6j + T6m; + T9g = T6m - T6j; + T9f = FNMS(KP923879532, T9c, T9b); + T9d = FMA(KP923879532, T9c, T9b); + ii[WS(rs, 29)] = FMA(KP831469612, T9m, T9l); + ii[WS(rs, 13)] = FNMS(KP831469612, T9m, T9l); + ri[WS(rs, 1)] = FMA(KP980785280, T6n, T6g); + ri[WS(rs, 17)] = FNMS(KP980785280, T6n, T6g); + T9e = T6p + T6q; + T6r = T6p - T6q; + ii[WS(rs, 17)] = FNMS(KP980785280, T9e, T9d); + ii[WS(rs, 1)] = FMA(KP980785280, T9e, T9d); + ri[WS(rs, 9)] = FMA(KP980785280, T6r, T6o); + ri[WS(rs, 25)] = FNMS(KP980785280, T6r, T6o); + } + } + } + { + E T6Y, T6w, T9w, T6D, T9v, T9p, T9q, T71, T6H, T74, T78, T7c, T6W, T6S; + { + E T6Z, T6z, T6C, T70; + T6Z = FNMS(KP414213562, T6x, T6y); + T6z = FMA(KP414213562, T6y, T6x); + ii[WS(rs, 25)] = FNMS(KP980785280, T9g, T9f); + ii[WS(rs, 9)] = FMA(KP980785280, T9g, T9f); + T6Y = FNMS(KP707106781, T6v, T6s); + T6w = FMA(KP707106781, T6v, T6s); + T6C = FNMS(KP414213562, T6B, T6A); + T70 = FMA(KP414213562, T6A, T6B); + T9w = T6z + T6C; + T6D = T6z - T6C; + T9v = FNMS(KP707106781, T9o, T9n); + T9p = FMA(KP707106781, T9o, T9n); + { + E T77, T6O, T76, T6R; + T9q = T70 - T6Z; + T71 = T6Z + T70; + T77 = FMA(KP707106781, T6N, T6M); + T6O = FNMS(KP707106781, T6N, T6M); + T76 = FMA(KP707106781, T6Q, T6P); + T6R = FNMS(KP707106781, T6Q, T6P); + T6H = FNMS(KP707106781, T6G, T6F); + T74 = FMA(KP707106781, T6G, T6F); + T78 = FNMS(KP198912367, T77, T76); + T7c = FMA(KP198912367, T76, T77); + T6W = FMA(KP668178637, T6O, T6R); + T6S = FNMS(KP668178637, T6R, T6O); + } + } + { + E T6U, T6E, T9r, T9t, T73, T6K; + T6U = FNMS(KP923879532, T6D, T6w); + T6E = FMA(KP923879532, T6D, T6w); + T9r = FMA(KP923879532, T9q, T9p); + T9t = FNMS(KP923879532, T9q, T9p); + T73 = FMA(KP707106781, T6J, T6I); + T6K = FNMS(KP707106781, T6J, T6I); + { + E T7a, T9x, T9y, T7d; + { + E T72, T7b, T6V, T6L, T79, T75; + T7a = FMA(KP923879532, T71, T6Y); + T72 = FNMS(KP923879532, T71, T6Y); + T75 = FMA(KP198912367, T74, T73); + T7b = FNMS(KP198912367, T73, T74); + T6V = FNMS(KP668178637, T6H, T6K); + T6L = FMA(KP668178637, T6K, T6H); + T79 = T75 - T78; + T9A = T75 + T78; + T9z = FMA(KP923879532, T9w, T9v); + T9x = FNMS(KP923879532, T9w, T9v); + { + E T6X, T9s, T9u, T6T; + T6X = T6V - T6W; + T9s = T6V + T6W; + T9u = T6S - T6L; + T6T = T6L + T6S; + ri[WS(rs, 7)] = FMA(KP980785280, T79, T72); + ri[WS(rs, 23)] = FNMS(KP980785280, T79, T72); + ri[WS(rs, 11)] = FMA(KP831469612, T6X, T6U); + ri[WS(rs, 27)] = FNMS(KP831469612, T6X, T6U); + ii[WS(rs, 19)] = FNMS(KP831469612, T9s, T9r); + ii[WS(rs, 3)] = FMA(KP831469612, T9s, T9r); + ii[WS(rs, 27)] = FNMS(KP831469612, T9u, T9t); + ii[WS(rs, 11)] = FMA(KP831469612, T9u, T9t); + ri[WS(rs, 3)] = FMA(KP831469612, T6T, T6E); + ri[WS(rs, 19)] = FNMS(KP831469612, T6T, T6E); + T9y = T7c - T7b; + T7d = T7b + T7c; + } + } + ii[WS(rs, 23)] = FNMS(KP980785280, T9y, T9x); + ii[WS(rs, 7)] = FMA(KP980785280, T9y, T9x); + ri[WS(rs, 31)] = FMA(KP980785280, T7d, T7a); + ri[WS(rs, 15)] = FNMS(KP980785280, T7d, T7a); + } + } + } + } + } + } + } + ii[WS(rs, 31)] = FMA(KP980785280, T9A, T9z); + ii[WS(rs, 15)] = FNMS(KP980785280, T9A, T9z); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_CEXP, 0, 9}, + {TW_CEXP, 0, 27}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 32, "t2_32", twinstr, &GENUS, {236, 98, 252, 0}, 0, 0, 0 }; + +void X(codelet_t2_32) (planner *p) { + X(kdft_dit_register) (p, t2_32, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -name t2_32 -include t.h */ + +/* + * This function contains 488 FP additions, 280 FP multiplications, + * (or, 376 additions, 168 multiplications, 112 fused multiply/add), + * 158 stack variables, 7 constants, and 128 memory accesses + */ +#include "t.h" + +static void t2_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) { + E T2, T5, T3, T6, T8, TM, TO, Td, T9, Te, Th, Tl, TD, TH, T1y; + E T1H, T15, T1A, T11, T1F, T1n, T1p, T2q, T2I, T2u, T2K, T2V, T3b, T2Z, T3d; + E Tu, Ty, T3l, T3n, T1t, T1v, T2f, T2h, T1a, T1e, T32, T34, T1W, T1Y, T2C; + E T2E, Tg, TR, Tk, TS, Tm, TV, To, TT, T1M, T21, T1P, T22, T1Q, T25; + E T1S, T23; + { + E Ts, T1d, Tx, T18, Tt, T1c, Tw, T19, TB, T14, TG, TZ, TC, T13, TF; + E T10; + { + E T4, Tc, T7, Tb; + T2 = W[0]; + T5 = W[1]; + T3 = W[2]; + T6 = W[3]; + T4 = T2 * T3; + Tc = T5 * T3; + T7 = T5 * T6; + Tb = T2 * T6; + T8 = T4 + T7; + TM = T4 - T7; + TO = Tb + Tc; + Td = Tb - Tc; + T9 = W[4]; + Ts = T2 * T9; + T1d = T6 * T9; + Tx = T5 * T9; + T18 = T3 * T9; + Te = W[5]; + Tt = T5 * Te; + T1c = T3 * Te; + Tw = T2 * Te; + T19 = T6 * Te; + Th = W[6]; + TB = T3 * Th; + T14 = T5 * Th; + TG = T6 * Th; + TZ = T2 * Th; + Tl = W[7]; + TC = T6 * Tl; + T13 = T2 * Tl; + TF = T3 * Tl; + T10 = T5 * Tl; + } + TD = TB + TC; + TH = TF - TG; + T1y = TZ + T10; + T1H = TF + TG; + T15 = T13 + T14; + T1A = T13 - T14; + T11 = TZ - T10; + T1F = TB - TC; + T1n = FMA(T9, Th, Te * Tl); + T1p = FNMS(Te, Th, T9 * Tl); + { + E T2o, T2p, T2s, T2t; + T2o = T8 * Th; + T2p = Td * Tl; + T2q = T2o + T2p; + T2I = T2o - T2p; + T2s = T8 * Tl; + T2t = Td * Th; + T2u = T2s - T2t; + T2K = T2s + T2t; + } + { + E T2T, T2U, T2X, T2Y; + T2T = TM * Th; + T2U = TO * Tl; + T2V = T2T - T2U; + T3b = T2T + T2U; + T2X = TM * Tl; + T2Y = TO * Th; + T2Z = T2X + T2Y; + T3d = T2X - T2Y; + Tu = Ts + Tt; + Ty = Tw - Tx; + T3l = FMA(Tu, Th, Ty * Tl); + T3n = FNMS(Ty, Th, Tu * Tl); + } + T1t = Ts - Tt; + T1v = Tw + Tx; + T2f = FMA(T1t, Th, T1v * Tl); + T2h = FNMS(T1v, Th, T1t * Tl); + T1a = T18 - T19; + T1e = T1c + T1d; + T32 = FMA(T1a, Th, T1e * Tl); + T34 = FNMS(T1e, Th, T1a * Tl); + T1W = T18 + T19; + T1Y = T1c - T1d; + T2C = FMA(T1W, Th, T1Y * Tl); + T2E = FNMS(T1Y, Th, T1W * Tl); + { + E Ta, Tf, Ti, Tj; + Ta = T8 * T9; + Tf = Td * Te; + Tg = Ta - Tf; + TR = Ta + Tf; + Ti = T8 * Te; + Tj = Td * T9; + Tk = Ti + Tj; + TS = Ti - Tj; + } + Tm = FMA(Tg, Th, Tk * Tl); + TV = FNMS(TS, Th, TR * Tl); + To = FNMS(Tk, Th, Tg * Tl); + TT = FMA(TR, Th, TS * Tl); + { + E T1K, T1L, T1N, T1O; + T1K = TM * T9; + T1L = TO * Te; + T1M = T1K - T1L; + T21 = T1K + T1L; + T1N = TM * Te; + T1O = TO * T9; + T1P = T1N + T1O; + T22 = T1N - T1O; + } + T1Q = FMA(T1M, Th, T1P * Tl); + T25 = FNMS(T22, Th, T21 * Tl); + T1S = FNMS(T1P, Th, T1M * Tl); + T23 = FMA(T21, Th, T22 * Tl); + } + { + E TL, T6f, T8c, T8q, T3F, T5t, T7I, T7W, T2y, T6B, T6y, T7j, T4k, T5J, T4B; + E T5G, T3h, T6H, T6O, T7o, T4L, T5N, T52, T5Q, T1i, T7V, T6i, T7D, T3K, T5u; + E T3P, T5v, T1E, T6n, T6m, T7e, T3W, T5y, T41, T5z, T29, T6p, T6s, T7f, T47; + E T5B, T4c, T5C, T2R, T6z, T6E, T7k, T4v, T5H, T4E, T5K, T3y, T6P, T6K, T7p; + E T4W, T5R, T55, T5O; + { + E T1, T7G, Tq, T7F, TA, T3C, TJ, T3D, Tn, Tp; + T1 = ri[0]; + T7G = ii[0]; + Tn = ri[WS(rs, 16)]; + Tp = ii[WS(rs, 16)]; + Tq = FMA(Tm, Tn, To * Tp); + T7F = FNMS(To, Tn, Tm * Tp); + { + E Tv, Tz, TE, TI; + Tv = ri[WS(rs, 8)]; + Tz = ii[WS(rs, 8)]; + TA = FMA(Tu, Tv, Ty * Tz); + T3C = FNMS(Ty, Tv, Tu * Tz); + TE = ri[WS(rs, 24)]; + TI = ii[WS(rs, 24)]; + TJ = FMA(TD, TE, TH * TI); + T3D = FNMS(TH, TE, TD * TI); + } + { + E Tr, TK, T8a, T8b; + Tr = T1 + Tq; + TK = TA + TJ; + TL = Tr + TK; + T6f = Tr - TK; + T8a = T7G - T7F; + T8b = TA - TJ; + T8c = T8a - T8b; + T8q = T8b + T8a; + } + { + E T3B, T3E, T7E, T7H; + T3B = T1 - Tq; + T3E = T3C - T3D; + T3F = T3B - T3E; + T5t = T3B + T3E; + T7E = T3C + T3D; + T7H = T7F + T7G; + T7I = T7E + T7H; + T7W = T7H - T7E; + } + } + { + E T2e, T4g, T2w, T4z, T2j, T4h, T2n, T4y; + { + E T2c, T2d, T2r, T2v; + T2c = ri[WS(rs, 1)]; + T2d = ii[WS(rs, 1)]; + T2e = FMA(T2, T2c, T5 * T2d); + T4g = FNMS(T5, T2c, T2 * T2d); + T2r = ri[WS(rs, 25)]; + T2v = ii[WS(rs, 25)]; + T2w = FMA(T2q, T2r, T2u * T2v); + T4z = FNMS(T2u, T2r, T2q * T2v); + } + { + E T2g, T2i, T2l, T2m; + T2g = ri[WS(rs, 17)]; + T2i = ii[WS(rs, 17)]; + T2j = FMA(T2f, T2g, T2h * T2i); + T4h = FNMS(T2h, T2g, T2f * T2i); + T2l = ri[WS(rs, 9)]; + T2m = ii[WS(rs, 9)]; + T2n = FMA(T9, T2l, Te * T2m); + T4y = FNMS(Te, T2l, T9 * T2m); + } + { + E T2k, T2x, T6w, T6x; + T2k = T2e + T2j; + T2x = T2n + T2w; + T2y = T2k + T2x; + T6B = T2k - T2x; + T6w = T4g + T4h; + T6x = T4y + T4z; + T6y = T6w - T6x; + T7j = T6w + T6x; + } + { + E T4i, T4j, T4x, T4A; + T4i = T4g - T4h; + T4j = T2n - T2w; + T4k = T4i + T4j; + T5J = T4i - T4j; + T4x = T2e - T2j; + T4A = T4y - T4z; + T4B = T4x - T4A; + T5G = T4x + T4A; + } + } + { + E T31, T4Y, T3f, T4J, T36, T4Z, T3a, T4I; + { + E T2W, T30, T3c, T3e; + T2W = ri[WS(rs, 31)]; + T30 = ii[WS(rs, 31)]; + T31 = FMA(T2V, T2W, T2Z * T30); + T4Y = FNMS(T2Z, T2W, T2V * T30); + T3c = ri[WS(rs, 23)]; + T3e = ii[WS(rs, 23)]; + T3f = FMA(T3b, T3c, T3d * T3e); + T4J = FNMS(T3d, T3c, T3b * T3e); + } + { + E T33, T35, T38, T39; + T33 = ri[WS(rs, 15)]; + T35 = ii[WS(rs, 15)]; + T36 = FMA(T32, T33, T34 * T35); + T4Z = FNMS(T34, T33, T32 * T35); + T38 = ri[WS(rs, 7)]; + T39 = ii[WS(rs, 7)]; + T3a = FMA(TR, T38, TS * T39); + T4I = FNMS(TS, T38, TR * T39); + } + { + E T37, T3g, T6M, T6N; + T37 = T31 + T36; + T3g = T3a + T3f; + T3h = T37 + T3g; + T6H = T37 - T3g; + T6M = T4Y + T4Z; + T6N = T4I + T4J; + T6O = T6M - T6N; + T7o = T6M + T6N; + } + { + E T4H, T4K, T50, T51; + T4H = T31 - T36; + T4K = T4I - T4J; + T4L = T4H - T4K; + T5N = T4H + T4K; + T50 = T4Y - T4Z; + T51 = T3a - T3f; + T52 = T50 + T51; + T5Q = T50 - T51; + } + } + { + E TQ, T3G, T1g, T3N, TX, T3H, T17, T3M; + { + E TN, TP, T1b, T1f; + TN = ri[WS(rs, 4)]; + TP = ii[WS(rs, 4)]; + TQ = FMA(TM, TN, TO * TP); + T3G = FNMS(TO, TN, TM * TP); + T1b = ri[WS(rs, 12)]; + T1f = ii[WS(rs, 12)]; + T1g = FMA(T1a, T1b, T1e * T1f); + T3N = FNMS(T1e, T1b, T1a * T1f); + } + { + E TU, TW, T12, T16; + TU = ri[WS(rs, 20)]; + TW = ii[WS(rs, 20)]; + TX = FMA(TT, TU, TV * TW); + T3H = FNMS(TV, TU, TT * TW); + T12 = ri[WS(rs, 28)]; + T16 = ii[WS(rs, 28)]; + T17 = FMA(T11, T12, T15 * T16); + T3M = FNMS(T15, T12, T11 * T16); + } + { + E TY, T1h, T6g, T6h; + TY = TQ + TX; + T1h = T17 + T1g; + T1i = TY + T1h; + T7V = T1h - TY; + T6g = T3G + T3H; + T6h = T3M + T3N; + T6i = T6g - T6h; + T7D = T6g + T6h; + } + { + E T3I, T3J, T3L, T3O; + T3I = T3G - T3H; + T3J = TQ - TX; + T3K = T3I - T3J; + T5u = T3J + T3I; + T3L = T17 - T1g; + T3O = T3M - T3N; + T3P = T3L + T3O; + T5v = T3L - T3O; + } + } + { + E T1m, T3S, T1C, T3Z, T1r, T3T, T1x, T3Y; + { + E T1k, T1l, T1z, T1B; + T1k = ri[WS(rs, 2)]; + T1l = ii[WS(rs, 2)]; + T1m = FMA(T8, T1k, Td * T1l); + T3S = FNMS(Td, T1k, T8 * T1l); + T1z = ri[WS(rs, 26)]; + T1B = ii[WS(rs, 26)]; + T1C = FMA(T1y, T1z, T1A * T1B); + T3Z = FNMS(T1A, T1z, T1y * T1B); + } + { + E T1o, T1q, T1u, T1w; + T1o = ri[WS(rs, 18)]; + T1q = ii[WS(rs, 18)]; + T1r = FMA(T1n, T1o, T1p * T1q); + T3T = FNMS(T1p, T1o, T1n * T1q); + T1u = ri[WS(rs, 10)]; + T1w = ii[WS(rs, 10)]; + T1x = FMA(T1t, T1u, T1v * T1w); + T3Y = FNMS(T1v, T1u, T1t * T1w); + } + { + E T1s, T1D, T6k, T6l; + T1s = T1m + T1r; + T1D = T1x + T1C; + T1E = T1s + T1D; + T6n = T1s - T1D; + T6k = T3S + T3T; + T6l = T3Y + T3Z; + T6m = T6k - T6l; + T7e = T6k + T6l; + } + { + E T3U, T3V, T3X, T40; + T3U = T3S - T3T; + T3V = T1x - T1C; + T3W = T3U + T3V; + T5y = T3U - T3V; + T3X = T1m - T1r; + T40 = T3Y - T3Z; + T41 = T3X - T40; + T5z = T3X + T40; + } + } + { + E T1J, T43, T27, T4a, T1U, T44, T20, T49; + { + E T1G, T1I, T24, T26; + T1G = ri[WS(rs, 30)]; + T1I = ii[WS(rs, 30)]; + T1J = FMA(T1F, T1G, T1H * T1I); + T43 = FNMS(T1H, T1G, T1F * T1I); + T24 = ri[WS(rs, 22)]; + T26 = ii[WS(rs, 22)]; + T27 = FMA(T23, T24, T25 * T26); + T4a = FNMS(T25, T24, T23 * T26); + } + { + E T1R, T1T, T1X, T1Z; + T1R = ri[WS(rs, 14)]; + T1T = ii[WS(rs, 14)]; + T1U = FMA(T1Q, T1R, T1S * T1T); + T44 = FNMS(T1S, T1R, T1Q * T1T); + T1X = ri[WS(rs, 6)]; + T1Z = ii[WS(rs, 6)]; + T20 = FMA(T1W, T1X, T1Y * T1Z); + T49 = FNMS(T1Y, T1X, T1W * T1Z); + } + { + E T1V, T28, T6q, T6r; + T1V = T1J + T1U; + T28 = T20 + T27; + T29 = T1V + T28; + T6p = T1V - T28; + T6q = T43 + T44; + T6r = T49 + T4a; + T6s = T6q - T6r; + T7f = T6q + T6r; + } + { + E T45, T46, T48, T4b; + T45 = T43 - T44; + T46 = T20 - T27; + T47 = T45 + T46; + T5B = T45 - T46; + T48 = T1J - T1U; + T4b = T49 - T4a; + T4c = T48 - T4b; + T5C = T48 + T4b; + } + } + { + E T2B, T4r, T2G, T4s, T4q, T4t, T2M, T4m, T2P, T4n, T4l, T4o; + { + E T2z, T2A, T2D, T2F; + T2z = ri[WS(rs, 5)]; + T2A = ii[WS(rs, 5)]; + T2B = FMA(T21, T2z, T22 * T2A); + T4r = FNMS(T22, T2z, T21 * T2A); + T2D = ri[WS(rs, 21)]; + T2F = ii[WS(rs, 21)]; + T2G = FMA(T2C, T2D, T2E * T2F); + T4s = FNMS(T2E, T2D, T2C * T2F); + } + T4q = T2B - T2G; + T4t = T4r - T4s; + { + E T2J, T2L, T2N, T2O; + T2J = ri[WS(rs, 29)]; + T2L = ii[WS(rs, 29)]; + T2M = FMA(T2I, T2J, T2K * T2L); + T4m = FNMS(T2K, T2J, T2I * T2L); + T2N = ri[WS(rs, 13)]; + T2O = ii[WS(rs, 13)]; + T2P = FMA(T1M, T2N, T1P * T2O); + T4n = FNMS(T1P, T2N, T1M * T2O); + } + T4l = T2M - T2P; + T4o = T4m - T4n; + { + E T2H, T2Q, T6C, T6D; + T2H = T2B + T2G; + T2Q = T2M + T2P; + T2R = T2H + T2Q; + T6z = T2Q - T2H; + T6C = T4r + T4s; + T6D = T4m + T4n; + T6E = T6C - T6D; + T7k = T6C + T6D; + } + { + E T4p, T4u, T4C, T4D; + T4p = T4l - T4o; + T4u = T4q + T4t; + T4v = KP707106781 * (T4p - T4u); + T5H = KP707106781 * (T4u + T4p); + T4C = T4t - T4q; + T4D = T4l + T4o; + T4E = KP707106781 * (T4C - T4D); + T5K = KP707106781 * (T4C + T4D); + } + } + { + E T3k, T4M, T3p, T4N, T4O, T4P, T3t, T4S, T3w, T4T, T4R, T4U; + { + E T3i, T3j, T3m, T3o; + T3i = ri[WS(rs, 3)]; + T3j = ii[WS(rs, 3)]; + T3k = FMA(T3, T3i, T6 * T3j); + T4M = FNMS(T6, T3i, T3 * T3j); + T3m = ri[WS(rs, 19)]; + T3o = ii[WS(rs, 19)]; + T3p = FMA(T3l, T3m, T3n * T3o); + T4N = FNMS(T3n, T3m, T3l * T3o); + } + T4O = T4M - T4N; + T4P = T3k - T3p; + { + E T3r, T3s, T3u, T3v; + T3r = ri[WS(rs, 27)]; + T3s = ii[WS(rs, 27)]; + T3t = FMA(Th, T3r, Tl * T3s); + T4S = FNMS(Tl, T3r, Th * T3s); + T3u = ri[WS(rs, 11)]; + T3v = ii[WS(rs, 11)]; + T3w = FMA(Tg, T3u, Tk * T3v); + T4T = FNMS(Tk, T3u, Tg * T3v); + } + T4R = T3t - T3w; + T4U = T4S - T4T; + { + E T3q, T3x, T6I, T6J; + T3q = T3k + T3p; + T3x = T3t + T3w; + T3y = T3q + T3x; + T6P = T3x - T3q; + T6I = T4M + T4N; + T6J = T4S + T4T; + T6K = T6I - T6J; + T7p = T6I + T6J; + } + { + E T4Q, T4V, T53, T54; + T4Q = T4O - T4P; + T4V = T4R + T4U; + T4W = KP707106781 * (T4Q - T4V); + T5R = KP707106781 * (T4Q + T4V); + T53 = T4R - T4U; + T54 = T4P + T4O; + T55 = KP707106781 * (T53 - T54); + T5O = KP707106781 * (T54 + T53); + } + } + { + E T2b, T7x, T7K, T7M, T3A, T7L, T7A, T7B; + { + E T1j, T2a, T7C, T7J; + T1j = TL + T1i; + T2a = T1E + T29; + T2b = T1j + T2a; + T7x = T1j - T2a; + T7C = T7e + T7f; + T7J = T7D + T7I; + T7K = T7C + T7J; + T7M = T7J - T7C; + } + { + E T2S, T3z, T7y, T7z; + T2S = T2y + T2R; + T3z = T3h + T3y; + T3A = T2S + T3z; + T7L = T3z - T2S; + T7y = T7j + T7k; + T7z = T7o + T7p; + T7A = T7y - T7z; + T7B = T7y + T7z; + } + ri[WS(rs, 16)] = T2b - T3A; + ii[WS(rs, 16)] = T7K - T7B; + ri[0] = T2b + T3A; + ii[0] = T7B + T7K; + ri[WS(rs, 24)] = T7x - T7A; + ii[WS(rs, 24)] = T7M - T7L; + ri[WS(rs, 8)] = T7x + T7A; + ii[WS(rs, 8)] = T7L + T7M; + } + { + E T7h, T7t, T7Q, T7S, T7m, T7u, T7r, T7v; + { + E T7d, T7g, T7O, T7P; + T7d = TL - T1i; + T7g = T7e - T7f; + T7h = T7d + T7g; + T7t = T7d - T7g; + T7O = T29 - T1E; + T7P = T7I - T7D; + T7Q = T7O + T7P; + T7S = T7P - T7O; + } + { + E T7i, T7l, T7n, T7q; + T7i = T2y - T2R; + T7l = T7j - T7k; + T7m = T7i + T7l; + T7u = T7l - T7i; + T7n = T3h - T3y; + T7q = T7o - T7p; + T7r = T7n - T7q; + T7v = T7n + T7q; + } + { + E T7s, T7N, T7w, T7R; + T7s = KP707106781 * (T7m + T7r); + ri[WS(rs, 20)] = T7h - T7s; + ri[WS(rs, 4)] = T7h + T7s; + T7N = KP707106781 * (T7u + T7v); + ii[WS(rs, 4)] = T7N + T7Q; + ii[WS(rs, 20)] = T7Q - T7N; + T7w = KP707106781 * (T7u - T7v); + ri[WS(rs, 28)] = T7t - T7w; + ri[WS(rs, 12)] = T7t + T7w; + T7R = KP707106781 * (T7r - T7m); + ii[WS(rs, 12)] = T7R + T7S; + ii[WS(rs, 28)] = T7S - T7R; + } + } + { + E T6j, T7X, T83, T6X, T6u, T7U, T77, T7b, T70, T82, T6G, T6U, T74, T7a, T6R; + E T6V; + { + E T6o, T6t, T6A, T6F; + T6j = T6f - T6i; + T7X = T7V + T7W; + T83 = T7W - T7V; + T6X = T6f + T6i; + T6o = T6m - T6n; + T6t = T6p + T6s; + T6u = KP707106781 * (T6o - T6t); + T7U = KP707106781 * (T6o + T6t); + { + E T75, T76, T6Y, T6Z; + T75 = T6H + T6K; + T76 = T6O + T6P; + T77 = FNMS(KP382683432, T76, KP923879532 * T75); + T7b = FMA(KP923879532, T76, KP382683432 * T75); + T6Y = T6n + T6m; + T6Z = T6p - T6s; + T70 = KP707106781 * (T6Y + T6Z); + T82 = KP707106781 * (T6Z - T6Y); + } + T6A = T6y - T6z; + T6F = T6B - T6E; + T6G = FMA(KP923879532, T6A, KP382683432 * T6F); + T6U = FNMS(KP923879532, T6F, KP382683432 * T6A); + { + E T72, T73, T6L, T6Q; + T72 = T6y + T6z; + T73 = T6B + T6E; + T74 = FMA(KP382683432, T72, KP923879532 * T73); + T7a = FNMS(KP382683432, T73, KP923879532 * T72); + T6L = T6H - T6K; + T6Q = T6O - T6P; + T6R = FNMS(KP923879532, T6Q, KP382683432 * T6L); + T6V = FMA(KP382683432, T6Q, KP923879532 * T6L); + } + } + { + E T6v, T6S, T81, T84; + T6v = T6j + T6u; + T6S = T6G + T6R; + ri[WS(rs, 22)] = T6v - T6S; + ri[WS(rs, 6)] = T6v + T6S; + T81 = T6U + T6V; + T84 = T82 + T83; + ii[WS(rs, 6)] = T81 + T84; + ii[WS(rs, 22)] = T84 - T81; + } + { + E T6T, T6W, T85, T86; + T6T = T6j - T6u; + T6W = T6U - T6V; + ri[WS(rs, 30)] = T6T - T6W; + ri[WS(rs, 14)] = T6T + T6W; + T85 = T6R - T6G; + T86 = T83 - T82; + ii[WS(rs, 14)] = T85 + T86; + ii[WS(rs, 30)] = T86 - T85; + } + { + E T71, T78, T7T, T7Y; + T71 = T6X + T70; + T78 = T74 + T77; + ri[WS(rs, 18)] = T71 - T78; + ri[WS(rs, 2)] = T71 + T78; + T7T = T7a + T7b; + T7Y = T7U + T7X; + ii[WS(rs, 2)] = T7T + T7Y; + ii[WS(rs, 18)] = T7Y - T7T; + } + { + E T79, T7c, T7Z, T80; + T79 = T6X - T70; + T7c = T7a - T7b; + ri[WS(rs, 26)] = T79 - T7c; + ri[WS(rs, 10)] = T79 + T7c; + T7Z = T77 - T74; + T80 = T7X - T7U; + ii[WS(rs, 10)] = T7Z + T80; + ii[WS(rs, 26)] = T80 - T7Z; + } + } + { + E T3R, T5d, T8r, T8x, T4e, T8o, T5n, T5r, T4G, T5a, T5g, T8w, T5k, T5q, T57; + E T5b, T3Q, T8p; + T3Q = KP707106781 * (T3K - T3P); + T3R = T3F - T3Q; + T5d = T3F + T3Q; + T8p = KP707106781 * (T5v - T5u); + T8r = T8p + T8q; + T8x = T8q - T8p; + { + E T42, T4d, T5l, T5m; + T42 = FNMS(KP923879532, T41, KP382683432 * T3W); + T4d = FMA(KP382683432, T47, KP923879532 * T4c); + T4e = T42 - T4d; + T8o = T42 + T4d; + T5l = T4L + T4W; + T5m = T52 + T55; + T5n = FNMS(KP555570233, T5m, KP831469612 * T5l); + T5r = FMA(KP831469612, T5m, KP555570233 * T5l); + } + { + E T4w, T4F, T5e, T5f; + T4w = T4k - T4v; + T4F = T4B - T4E; + T4G = FMA(KP980785280, T4w, KP195090322 * T4F); + T5a = FNMS(KP980785280, T4F, KP195090322 * T4w); + T5e = FMA(KP923879532, T3W, KP382683432 * T41); + T5f = FNMS(KP923879532, T47, KP382683432 * T4c); + T5g = T5e + T5f; + T8w = T5f - T5e; + } + { + E T5i, T5j, T4X, T56; + T5i = T4k + T4v; + T5j = T4B + T4E; + T5k = FMA(KP555570233, T5i, KP831469612 * T5j); + T5q = FNMS(KP555570233, T5j, KP831469612 * T5i); + T4X = T4L - T4W; + T56 = T52 - T55; + T57 = FNMS(KP980785280, T56, KP195090322 * T4X); + T5b = FMA(KP195090322, T56, KP980785280 * T4X); + } + { + E T4f, T58, T8v, T8y; + T4f = T3R + T4e; + T58 = T4G + T57; + ri[WS(rs, 23)] = T4f - T58; + ri[WS(rs, 7)] = T4f + T58; + T8v = T5a + T5b; + T8y = T8w + T8x; + ii[WS(rs, 7)] = T8v + T8y; + ii[WS(rs, 23)] = T8y - T8v; + } + { + E T59, T5c, T8z, T8A; + T59 = T3R - T4e; + T5c = T5a - T5b; + ri[WS(rs, 31)] = T59 - T5c; + ri[WS(rs, 15)] = T59 + T5c; + T8z = T57 - T4G; + T8A = T8x - T8w; + ii[WS(rs, 15)] = T8z + T8A; + ii[WS(rs, 31)] = T8A - T8z; + } + { + E T5h, T5o, T8n, T8s; + T5h = T5d + T5g; + T5o = T5k + T5n; + ri[WS(rs, 19)] = T5h - T5o; + ri[WS(rs, 3)] = T5h + T5o; + T8n = T5q + T5r; + T8s = T8o + T8r; + ii[WS(rs, 3)] = T8n + T8s; + ii[WS(rs, 19)] = T8s - T8n; + } + { + E T5p, T5s, T8t, T8u; + T5p = T5d - T5g; + T5s = T5q - T5r; + ri[WS(rs, 27)] = T5p - T5s; + ri[WS(rs, 11)] = T5p + T5s; + T8t = T5n - T5k; + T8u = T8r - T8o; + ii[WS(rs, 11)] = T8t + T8u; + ii[WS(rs, 27)] = T8u - T8t; + } + } + { + E T5x, T5Z, T8d, T8j, T5E, T88, T69, T6d, T5M, T5W, T62, T8i, T66, T6c, T5T; + E T5X, T5w, T89; + T5w = KP707106781 * (T5u + T5v); + T5x = T5t - T5w; + T5Z = T5t + T5w; + T89 = KP707106781 * (T3K + T3P); + T8d = T89 + T8c; + T8j = T8c - T89; + { + E T5A, T5D, T67, T68; + T5A = FNMS(KP382683432, T5z, KP923879532 * T5y); + T5D = FMA(KP923879532, T5B, KP382683432 * T5C); + T5E = T5A - T5D; + T88 = T5A + T5D; + T67 = T5N + T5O; + T68 = T5Q + T5R; + T69 = FNMS(KP195090322, T68, KP980785280 * T67); + T6d = FMA(KP195090322, T67, KP980785280 * T68); + } + { + E T5I, T5L, T60, T61; + T5I = T5G - T5H; + T5L = T5J - T5K; + T5M = FMA(KP555570233, T5I, KP831469612 * T5L); + T5W = FNMS(KP831469612, T5I, KP555570233 * T5L); + T60 = FMA(KP382683432, T5y, KP923879532 * T5z); + T61 = FNMS(KP382683432, T5B, KP923879532 * T5C); + T62 = T60 + T61; + T8i = T61 - T60; + } + { + E T64, T65, T5P, T5S; + T64 = T5G + T5H; + T65 = T5J + T5K; + T66 = FMA(KP980785280, T64, KP195090322 * T65); + T6c = FNMS(KP195090322, T64, KP980785280 * T65); + T5P = T5N - T5O; + T5S = T5Q - T5R; + T5T = FNMS(KP831469612, T5S, KP555570233 * T5P); + T5X = FMA(KP831469612, T5P, KP555570233 * T5S); + } + { + E T5F, T5U, T8h, T8k; + T5F = T5x + T5E; + T5U = T5M + T5T; + ri[WS(rs, 21)] = T5F - T5U; + ri[WS(rs, 5)] = T5F + T5U; + T8h = T5W + T5X; + T8k = T8i + T8j; + ii[WS(rs, 5)] = T8h + T8k; + ii[WS(rs, 21)] = T8k - T8h; + } + { + E T5V, T5Y, T8l, T8m; + T5V = T5x - T5E; + T5Y = T5W - T5X; + ri[WS(rs, 29)] = T5V - T5Y; + ri[WS(rs, 13)] = T5V + T5Y; + T8l = T5T - T5M; + T8m = T8j - T8i; + ii[WS(rs, 13)] = T8l + T8m; + ii[WS(rs, 29)] = T8m - T8l; + } + { + E T63, T6a, T87, T8e; + T63 = T5Z + T62; + T6a = T66 + T69; + ri[WS(rs, 17)] = T63 - T6a; + ri[WS(rs, 1)] = T63 + T6a; + T87 = T6c + T6d; + T8e = T88 + T8d; + ii[WS(rs, 1)] = T87 + T8e; + ii[WS(rs, 17)] = T8e - T87; + } + { + E T6b, T6e, T8f, T8g; + T6b = T5Z - T62; + T6e = T6c - T6d; + ri[WS(rs, 25)] = T6b - T6e; + ri[WS(rs, 9)] = T6b + T6e; + T8f = T69 - T66; + T8g = T8d - T88; + ii[WS(rs, 9)] = T8f + T8g; + ii[WS(rs, 25)] = T8g - T8f; + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_CEXP, 0, 9}, + {TW_CEXP, 0, 27}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 32, "t2_32", twinstr, &GENUS, {376, 168, 112, 0}, 0, 0, 0 }; + +void X(codelet_t2_32) (planner *p) { + X(kdft_dit_register) (p, t2_32, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:54 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -name t2_4 -include t.h */ + +/* + * This function contains 24 FP additions, 16 FP multiplications, + * (or, 16 additions, 8 multiplications, 8 fused multiply/add), + * 33 stack variables, 0 constants, and 16 memory accesses + */ +#include "t.h" + +static void t2_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + (mb * 4); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 4, MAKE_VOLATILE_STRIDE(8, rs)) { + E Ti, Tq, To, Te, Ty, Tz, Tm, Ts; + { + E T2, T6, T3, T5; + T2 = W[0]; + T6 = W[3]; + T3 = W[2]; + T5 = W[1]; + { + E T1, Tx, Td, Tw, Tj, Tl, Ta, T4, Tk, Tr; + T1 = ri[0]; + Ta = T2 * T6; + T4 = T2 * T3; + Tx = ii[0]; + { + E T8, Tb, T7, Tc; + T8 = ri[WS(rs, 2)]; + Tb = FNMS(T5, T3, Ta); + T7 = FMA(T5, T6, T4); + Tc = ii[WS(rs, 2)]; + { + E Tf, Th, T9, Tv, Tg, Tp; + Tf = ri[WS(rs, 1)]; + Th = ii[WS(rs, 1)]; + T9 = T7 * T8; + Tv = T7 * Tc; + Tg = T2 * Tf; + Tp = T2 * Th; + Td = FMA(Tb, Tc, T9); + Tw = FNMS(Tb, T8, Tv); + Ti = FMA(T5, Th, Tg); + Tq = FNMS(T5, Tf, Tp); + } + Tj = ri[WS(rs, 3)]; + Tl = ii[WS(rs, 3)]; + } + To = T1 - Td; + Te = T1 + Td; + Ty = Tw + Tx; + Tz = Tx - Tw; + Tk = T3 * Tj; + Tr = T3 * Tl; + Tm = FMA(T6, Tl, Tk); + Ts = FNMS(T6, Tj, Tr); + } + } + { + E Tn, TA, Tu, Tt; + Tn = Ti + Tm; + TA = Ti - Tm; + Tu = Tq + Ts; + Tt = Tq - Ts; + ii[WS(rs, 3)] = TA + Tz; + ii[WS(rs, 1)] = Tz - TA; + ri[0] = Te + Tn; + ri[WS(rs, 2)] = Te - Tn; + ri[WS(rs, 1)] = To + Tt; + ri[WS(rs, 3)] = To - Tt; + ii[WS(rs, 2)] = Ty - Tu; + ii[0] = Tu + Ty; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 4, "t2_4", twinstr, &GENUS, {16, 8, 8, 0}, 0, 0, 0 }; + +void X(codelet_t2_4) (planner *p) { + X(kdft_dit_register) (p, t2_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -name t2_4 -include t.h */ + +/* + * This function contains 24 FP additions, 16 FP multiplications, + * (or, 16 additions, 8 multiplications, 8 fused multiply/add), + * 21 stack variables, 0 constants, and 16 memory accesses + */ +#include "t.h" + +static void t2_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + (mb * 4); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 4, MAKE_VOLATILE_STRIDE(8, rs)) { + E T2, T4, T3, T5, T6, T8; + T2 = W[0]; + T4 = W[1]; + T3 = W[2]; + T5 = W[3]; + T6 = FMA(T2, T3, T4 * T5); + T8 = FNMS(T4, T3, T2 * T5); + { + E T1, Tp, Ta, To, Te, Tk, Th, Tl, T7, T9; + T1 = ri[0]; + Tp = ii[0]; + T7 = ri[WS(rs, 2)]; + T9 = ii[WS(rs, 2)]; + Ta = FMA(T6, T7, T8 * T9); + To = FNMS(T8, T7, T6 * T9); + { + E Tc, Td, Tf, Tg; + Tc = ri[WS(rs, 1)]; + Td = ii[WS(rs, 1)]; + Te = FMA(T2, Tc, T4 * Td); + Tk = FNMS(T4, Tc, T2 * Td); + Tf = ri[WS(rs, 3)]; + Tg = ii[WS(rs, 3)]; + Th = FMA(T3, Tf, T5 * Tg); + Tl = FNMS(T5, Tf, T3 * Tg); + } + { + E Tb, Ti, Tn, Tq; + Tb = T1 + Ta; + Ti = Te + Th; + ri[WS(rs, 2)] = Tb - Ti; + ri[0] = Tb + Ti; + Tn = Tk + Tl; + Tq = To + Tp; + ii[0] = Tn + Tq; + ii[WS(rs, 2)] = Tq - Tn; + } + { + E Tj, Tm, Tr, Ts; + Tj = T1 - Ta; + Tm = Tk - Tl; + ri[WS(rs, 3)] = Tj - Tm; + ri[WS(rs, 1)] = Tj + Tm; + Tr = Tp - To; + Ts = Te - Th; + ii[WS(rs, 1)] = Tr - Ts; + ii[WS(rs, 3)] = Ts + Tr; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 4, "t2_4", twinstr, &GENUS, {16, 8, 8, 0}, 0, 0, 0 }; + +void X(codelet_t2_4) (planner *p) { + X(kdft_dit_register) (p, t2_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,271 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:56 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 5 -name t2_5 -include t.h */ + +/* + * This function contains 44 FP additions, 40 FP multiplications, + * (or, 14 additions, 10 multiplications, 30 fused multiply/add), + * 47 stack variables, 4 constants, and 20 memory accesses + */ +#include "t.h" + +static void t2_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + (mb * 4); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 4, MAKE_VOLATILE_STRIDE(10, rs)) { + E Ta, T1, TO, Tp, TS, Ti, TL, TC, To, TE, Ts, TF, T2, T8, T5; + E TT, Tt, TG; + T2 = W[0]; + Ta = W[3]; + T8 = W[2]; + T5 = W[1]; + { + E Tq, Tr, Te, T9; + T1 = ri[0]; + Te = T2 * Ta; + T9 = T2 * T8; + TO = ii[0]; + { + E T3, Tf, Tm, Tj, Tb, T4, T6, Tc, Tg; + T3 = ri[WS(rs, 1)]; + Tf = FMA(T5, T8, Te); + Tm = FNMS(T5, T8, Te); + Tj = FMA(T5, Ta, T9); + Tb = FNMS(T5, Ta, T9); + T4 = T2 * T3; + T6 = ii[WS(rs, 1)]; + Tc = ri[WS(rs, 4)]; + Tg = ii[WS(rs, 4)]; + { + E Tk, Tl, Tn, TD; + { + E T7, Tz, Th, TB, Ty, Td, TA; + Tk = ri[WS(rs, 2)]; + T7 = FMA(T5, T6, T4); + Ty = T2 * T6; + Td = Tb * Tc; + TA = Tb * Tg; + Tl = Tj * Tk; + Tz = FNMS(T5, T3, Ty); + Th = FMA(Tf, Tg, Td); + TB = FNMS(Tf, Tc, TA); + Tn = ii[WS(rs, 2)]; + Tp = ri[WS(rs, 3)]; + TS = T7 - Th; + Ti = T7 + Th; + TL = Tz + TB; + TC = Tz - TB; + TD = Tj * Tn; + Tq = T8 * Tp; + Tr = ii[WS(rs, 3)]; + } + To = FMA(Tm, Tn, Tl); + TE = FNMS(Tm, Tk, TD); + } + } + Ts = FMA(Ta, Tr, Tq); + TF = T8 * Tr; + } + TT = To - Ts; + Tt = To + Ts; + TG = FNMS(Ta, Tp, TF); + { + E TU, TW, TV, TR, Tw, Tu; + TU = FMA(KP618033988, TT, TS); + TW = FNMS(KP618033988, TS, TT); + Tw = Ti - Tt; + Tu = Ti + Tt; + { + E TM, TH, Tv, TI, TK; + TM = TE + TG; + TH = TE - TG; + ri[0] = T1 + Tu; + Tv = FNMS(KP250000000, Tu, T1); + TI = FMA(KP618033988, TH, TC); + TK = FNMS(KP618033988, TC, TH); + { + E TQ, TN, TJ, Tx, TP; + TQ = TL - TM; + TN = TL + TM; + TJ = FNMS(KP559016994, Tw, Tv); + Tx = FMA(KP559016994, Tw, Tv); + ii[0] = TN + TO; + TP = FNMS(KP250000000, TN, TO); + ri[WS(rs, 1)] = FMA(KP951056516, TI, Tx); + ri[WS(rs, 4)] = FNMS(KP951056516, TI, Tx); + ri[WS(rs, 3)] = FMA(KP951056516, TK, TJ); + ri[WS(rs, 2)] = FNMS(KP951056516, TK, TJ); + TV = FNMS(KP559016994, TQ, TP); + TR = FMA(KP559016994, TQ, TP); + } + } + ii[WS(rs, 4)] = FMA(KP951056516, TU, TR); + ii[WS(rs, 1)] = FNMS(KP951056516, TU, TR); + ii[WS(rs, 3)] = FNMS(KP951056516, TW, TV); + ii[WS(rs, 2)] = FMA(KP951056516, TW, TV); + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 5, "t2_5", twinstr, &GENUS, {14, 10, 30, 0}, 0, 0, 0 }; + +void X(codelet_t2_5) (planner *p) { + X(kdft_dit_register) (p, t2_5, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 5 -name t2_5 -include t.h */ + +/* + * This function contains 44 FP additions, 32 FP multiplications, + * (or, 30 additions, 18 multiplications, 14 fused multiply/add), + * 37 stack variables, 4 constants, and 20 memory accesses + */ +#include "t.h" + +static void t2_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + for (m = mb, W = W + (mb * 4); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 4, MAKE_VOLATILE_STRIDE(10, rs)) { + E T2, T4, T7, T9, Tb, Tl, Tf, Tj; + { + E T8, Te, Ta, Td; + T2 = W[0]; + T4 = W[1]; + T7 = W[2]; + T9 = W[3]; + T8 = T2 * T7; + Te = T4 * T7; + Ta = T4 * T9; + Td = T2 * T9; + Tb = T8 - Ta; + Tl = Td - Te; + Tf = Td + Te; + Tj = T8 + Ta; + } + { + E T1, TI, Ty, TB, TN, TM, TF, TG, TH, Ti, Tr, Ts; + T1 = ri[0]; + TI = ii[0]; + { + E T6, Tw, Tq, TA, Th, Tx, Tn, Tz; + { + E T3, T5, To, Tp; + T3 = ri[WS(rs, 1)]; + T5 = ii[WS(rs, 1)]; + T6 = FMA(T2, T3, T4 * T5); + Tw = FNMS(T4, T3, T2 * T5); + To = ri[WS(rs, 3)]; + Tp = ii[WS(rs, 3)]; + Tq = FMA(T7, To, T9 * Tp); + TA = FNMS(T9, To, T7 * Tp); + } + { + E Tc, Tg, Tk, Tm; + Tc = ri[WS(rs, 4)]; + Tg = ii[WS(rs, 4)]; + Th = FMA(Tb, Tc, Tf * Tg); + Tx = FNMS(Tf, Tc, Tb * Tg); + Tk = ri[WS(rs, 2)]; + Tm = ii[WS(rs, 2)]; + Tn = FMA(Tj, Tk, Tl * Tm); + Tz = FNMS(Tl, Tk, Tj * Tm); + } + Ty = Tw - Tx; + TB = Tz - TA; + TN = Tn - Tq; + TM = T6 - Th; + TF = Tw + Tx; + TG = Tz + TA; + TH = TF + TG; + Ti = T6 + Th; + Tr = Tn + Tq; + Ts = Ti + Tr; + } + ri[0] = T1 + Ts; + ii[0] = TH + TI; + { + E TC, TE, Tv, TD, Tt, Tu; + TC = FMA(KP951056516, Ty, KP587785252 * TB); + TE = FNMS(KP587785252, Ty, KP951056516 * TB); + Tt = KP559016994 * (Ti - Tr); + Tu = FNMS(KP250000000, Ts, T1); + Tv = Tt + Tu; + TD = Tu - Tt; + ri[WS(rs, 4)] = Tv - TC; + ri[WS(rs, 3)] = TD + TE; + ri[WS(rs, 1)] = Tv + TC; + ri[WS(rs, 2)] = TD - TE; + } + { + E TO, TP, TL, TQ, TJ, TK; + TO = FMA(KP951056516, TM, KP587785252 * TN); + TP = FNMS(KP587785252, TM, KP951056516 * TN); + TJ = KP559016994 * (TF - TG); + TK = FNMS(KP250000000, TH, TI); + TL = TJ + TK; + TQ = TK - TJ; + ii[WS(rs, 1)] = TL - TO; + ii[WS(rs, 3)] = TQ - TP; + ii[WS(rs, 4)] = TO + TL; + ii[WS(rs, 2)] = TP + TQ; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 5, "t2_5", twinstr, &GENUS, {30, 18, 14, 0}, 0, 0, 0 }; + +void X(codelet_t2_5) (planner *p) { + X(kdft_dit_register) (p, t2_5, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,4096 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:56 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 64 -name t2_64 -include t.h */ + +/* + * This function contains 1154 FP additions, 840 FP multiplications, + * (or, 520 additions, 206 multiplications, 634 fused multiply/add), + * 349 stack variables, 15 constants, and 256 memory accesses + */ +#include "t.h" + +static void t2_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP820678790, +0.820678790828660330972281985331011598767386482); + DK(KP098491403, +0.098491403357164253077197521291327432293052451); + DK(KP534511135, +0.534511135950791641089685961295362908582039528); + DK(KP303346683, +0.303346683607342391675883946941299872384187453); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT m; + for (m = mb, W = W + (mb * 10); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 10, MAKE_VOLATILE_STRIDE(128, rs)) { + E Tg0, TlC, TlB, Tg3; + { + E T2, T3, Tc, T8, Te, T5, T6, T14, T3d, T3i, TJ, T7, Tr, T3g, TG; + E T10, T3a, TL, TP, Tb, Tt, T17, Td, Ti, T3N, T3R, T1i, Tu, T1I, T2U; + E T1t, T3U, T5O, T48, T2u, T7B, TK, T79, T3D, T2h, T2l, T3G, T1x, T3X, T2d; + E T1M, T2X, T4B, T4x, T3j, T4T, T29, T5s, T81, T5w, T7X, T7N, T7h, T64, T6a; + E T6e, T7l, T60, T7R, T6h, T5A, T7o, T6J, T6k, T5E, T6N, T7r, T6x, T6t, T7c; + E TO, T2x, T7E, TU, TQ, T2C, T2y, T5R, T4b, T4c, T4g, T4W, T3m, T3r, T3n; + E T1k, Tx, Ty, T4p, T4s, TC, T23, T1Z, T19, Th, T31, T35, T1e, T44, T41; + E T1a, T6W, T70, T55, T59, T3v, T3z, Tf, T1R, T2N, T2Q, T1V, T1p, T1l, Tm; + { + E T1H, T1s, T2g, Tg, Tw, TH, T2t, T47, T3h, T3M, T4w, T28, T3Q, T4A, T2c; + E Ts; + { + E T4, T13, TI, TF, TZ, Ta, T9; + T2 = W[0]; + T3 = W[2]; + Tc = W[5]; + T8 = W[4]; + Te = W[6]; + T4 = T2 * T3; + T13 = T2 * Tc; + TI = T3 * Tc; + TF = T3 * T8; + T1H = T8 * Te; + TZ = T2 * T8; + T5 = W[1]; + T6 = W[3]; + T1s = T3 * Te; + T2g = T2 * Te; + T14 = FNMS(T5, T8, T13); + T3d = FMA(T5, T8, T13); + T3i = FNMS(T6, T8, TI); + TJ = FMA(T6, T8, TI); + T7 = FNMS(T5, T6, T4); + Tr = FMA(T5, T6, T4); + Ta = T2 * T6; + Tg = T7 * Tc; + Tw = Tr * Tc; + T3g = FMA(T6, Tc, TF); + TG = FNMS(T6, Tc, TF); + T10 = FMA(T5, Tc, TZ); + T3a = FNMS(T5, Tc, TZ); + TH = TG * Te; + T2t = T10 * Te; + T47 = T3a * Te; + T3h = T3g * Te; + TL = W[8]; + TP = W[9]; + T9 = T7 * T8; + Tb = FMA(T5, T3, Ta); + Tt = FNMS(T5, T3, Ta); + T3M = T2 * TL; + T4w = T8 * TL; + T28 = T3 * TL; + T3Q = T2 * TP; + T4A = T8 * TP; + T2c = T3 * TP; + T17 = FNMS(Tb, Tc, T9); + Td = FMA(Tb, Tc, T9); + Ts = Tr * T8; + Ti = W[7]; + } + { + E T5r, T80, T1L, T2k, T1w, T5z, T2B, T2v; + T3N = FMA(T5, TP, T3M); + T3R = FNMS(T5, TL, T3Q); + T1i = FMA(Tt, Tc, Ts); + Tu = FNMS(Tt, Tc, Ts); + T1I = FNMS(Tc, Ti, T1H); + T2U = FMA(Tc, Ti, T1H); + T1t = FMA(T6, Ti, T1s); + T3U = FNMS(T6, Ti, T1s); + T5O = FNMS(T3d, Ti, T47); + T48 = FMA(T3d, Ti, T47); + T2u = FMA(T14, Ti, T2t); + T7B = FNMS(T14, Ti, T2t); + T1L = T8 * Ti; + T2k = T2 * Ti; + T1w = T3 * Ti; + TK = FMA(TJ, Ti, TH); + T79 = FNMS(TJ, Ti, TH); + T3D = FMA(T5, Ti, T2g); + T2h = FNMS(T5, Ti, T2g); + T2l = FMA(T5, Te, T2k); + T3G = FNMS(T5, Te, T2k); + T1x = FNMS(T6, Te, T1w); + T3X = FMA(T6, Te, T1w); + T2d = FNMS(T6, TL, T2c); + T1M = FMA(Tc, Te, T1L); + T2X = FNMS(Tc, Te, T1L); + T4B = FNMS(Tc, TL, T4A); + T4x = FMA(Tc, TP, T4w); + T3j = FMA(T3i, Ti, T3h); + T4T = FNMS(T3i, Ti, T3h); + T29 = FMA(T6, TP, T28); + T5r = T3g * TL; + T80 = T7 * TP; + { + E T7M, T7g, T63, T5v, T7W; + T5v = T3g * TP; + T7W = T7 * TL; + T5s = FMA(T3i, TP, T5r); + T81 = FNMS(Tb, TL, T80); + T5w = FNMS(T3i, TL, T5v); + T7X = FMA(Tb, TP, T7W); + T7M = TG * TL; + T7g = T10 * TL; + T63 = T3a * TP; + { + E T6d, T7k, T69, T5Z, T7Q; + T69 = Tr * TL; + T7N = FMA(TJ, TP, T7M); + T7h = FMA(T14, TP, T7g); + T64 = FNMS(T3d, TL, T63); + T6a = FMA(Tt, TP, T69); + T6d = Tr * TP; + T7k = T10 * TP; + T5Z = T3a * TL; + T7Q = TG * TP; + T6e = FNMS(Tt, TL, T6d); + T7l = FNMS(T14, TL, T7k); + T60 = FMA(T3d, TP, T5Z); + T7R = FNMS(TJ, TL, T7Q); + T5z = Tr * Te; + } + } + { + E T6I, T5D, T6M, T6s, T6w; + T6I = T7 * Te; + T5D = Tr * Ti; + T6M = T7 * Ti; + T6h = FNMS(Tt, Ti, T5z); + T5A = FMA(Tt, Ti, T5z); + T7o = FMA(Tb, Ti, T6I); + T6J = FNMS(Tb, Ti, T6I); + T6k = FMA(Tt, Te, T5D); + T5E = FNMS(Tt, Te, T5D); + T6N = FMA(Tb, Te, T6M); + T7r = FNMS(Tb, Te, T6M); + T6s = T2U * TL; + T6w = T2U * TP; + { + E TN, TT, TM, T2w; + TN = TG * Ti; + T2w = T10 * Ti; + T6x = FNMS(T2X, TL, T6w); + T6t = FMA(T2X, TP, T6s); + T7c = FMA(TJ, Te, TN); + TO = FNMS(TJ, Te, TN); + TT = TK * TP; + TM = TK * TL; + T2x = FNMS(T14, Te, T2w); + T7E = FMA(T14, Te, T2w); + TU = FNMS(TO, TL, TT); + TQ = FMA(TO, TP, TM); + T2B = T2u * TP; + T2v = T2u * TL; + } + } + { + E T1Y, T22, Tv, TB; + { + E T49, T4f, T4a, T3l, T3q, T3k; + T4a = T3a * Ti; + T2C = FNMS(T2x, TL, T2B); + T2y = FMA(T2x, TP, T2v); + T5R = FMA(T3d, Te, T4a); + T4b = FNMS(T3d, Te, T4a); + T49 = T48 * TL; + T4f = T48 * TP; + T3l = T3g * Ti; + T4c = FMA(T4b, TP, T49); + T4g = FNMS(T4b, TL, T4f); + T4W = FMA(T3i, Te, T3l); + T3m = FNMS(T3i, Te, T3l); + T1Y = Tu * TL; + T3q = T3j * TP; + T3k = T3j * TL; + T22 = Tu * TP; + Tv = Tu * Te; + T3r = FNMS(T3m, TL, T3q); + T3n = FMA(T3m, TP, T3k); + TB = Tu * Ti; + T1k = FNMS(Tt, T8, Tw); + Tx = FMA(Tt, T8, Tw); + } + { + E T30, T34, T18, T1d; + T30 = T17 * TL; + T34 = T17 * TP; + T18 = T17 * Te; + Ty = FMA(Tx, Ti, Tv); + T4p = FNMS(Tx, Ti, Tv); + T4s = FMA(Tx, Te, TB); + TC = FNMS(Tx, Te, TB); + T23 = FNMS(Tx, TL, T22); + T1Z = FMA(Tx, TP, T1Y); + T1d = T17 * Ti; + T19 = FMA(Tb, T8, Tg); + Th = FNMS(Tb, T8, Tg); + { + E T1j, T1o, T1Q, T1U; + T1j = T1i * TL; + { + E T6V, T6Z, T54, T58; + T6V = Ty * TL; + T6Z = Ty * TP; + T31 = FMA(T19, TP, T30); + T35 = FNMS(T19, TL, T34); + T1e = FMA(T19, Te, T1d); + T44 = FNMS(T19, Te, T1d); + T41 = FMA(T19, Ti, T18); + T1a = FNMS(T19, Ti, T18); + T6W = FMA(TC, TP, T6V); + T70 = FNMS(TC, TL, T6Z); + T1o = T1i * TP; + T54 = T41 * TL; + T58 = T41 * TP; + T1Q = T1i * Te; + T1U = T1i * Ti; + T55 = FMA(T44, TP, T54); + T59 = FNMS(T44, TL, T58); + } + T3v = Td * TL; + T3z = Td * TP; + Tf = Td * Te; + T1R = FMA(T1k, Ti, T1Q); + T2N = FNMS(T1k, Ti, T1Q); + T2Q = FMA(T1k, Te, T1U); + T1V = FNMS(T1k, Te, T1U); + T1p = FNMS(T1k, TL, T1o); + T1l = FMA(T1k, TP, T1j); + Tm = Td * Ti; + } + } + } + } + } + { + E Tl9, TlD, TY, Tg4, T8w, TdS, TkE, Tkd, T2G, Tge, Tgh, TiK, Te1, T98, Te0; + E T9f, Te5, T9p, Tgq, T39, Te8, T9M, TiN, Tgn, TeE, TbI, Thr, T74, TeP, TcB; + E Tja, Thc, T8D, TdT, T1B, TkD, T8K, TdU, Tg7, Tk7, T8T, TdY, T27, Tg9, T90; + E TdX, Tgc, TiJ, T9Y, Tec, T4k, TgB, Tal, Tef, Tgy, TiT, Taz, Tel, T5d, Th0; + E Tbs, Tew, TgL, TiZ, T3K, Tgo, Tgt, TiO, T9P, Te6, T9E, Te9, T4L, Tgz, TgE; + E TiU, Tao, Ted, Tad, Teg, T5I, TgM, Th3, Tj0, Tbv, Tem, TaO, Tex, T7v, Thd; + E Thu, Tjb, TcE, TeF, TbX, TeQ, T68, Tj5, Tez, Teq, Tbj, Tbx, TgS, Th5, T6B; + E Tj6, TeA, Tet, Tb4, Tby, TgX, Th6, T7V, Tjg, TeS, TeJ, Tcs, TcG, Thj, Thw; + E T84, T83, T85, Tc7, T8k, Tc3, T86, T89, T8b; + { + E T3w, T3A, T4H, T4E, T8e, T8i, T5j, T5n, T4U, T4S, T4V, Tau, T5b, Tbq, T4X; + E T50, T52; + { + E T72, Tcz, Tcv, T6Q, Tha, TbG, T6U, Tcx, T99, T9e; + { + E T1, Tkb, Tp, Tka, TR, TV, TE, T8s, TS, T8t; + { + E Tn, Tj, T8d, T8h, T5i, T5m; + T1 = ri[0]; + T8d = T1R * TL; + T8h = T1R * TP; + T3w = FMA(Th, TP, T3v); + T3A = FNMS(Th, TL, T3z); + Tn = FMA(Th, Te, Tm); + T4H = FNMS(Th, Te, Tm); + T4E = FMA(Th, Ti, Tf); + Tj = FNMS(Th, Ti, Tf); + T8e = FMA(T1V, TP, T8d); + T8i = FNMS(T1V, TL, T8h); + Tkb = ii[0]; + T5i = T4E * TL; + T5m = T4E * TP; + { + E Tk, To, Tl, Tk9; + Tk = ri[WS(rs, 32)]; + To = ii[WS(rs, 32)]; + T5j = FMA(T4H, TP, T5i); + T5n = FNMS(T4H, TL, T5m); + Tl = Tj * Tk; + Tk9 = Tj * To; + { + E Tz, TD, TA, T8r; + Tz = ri[WS(rs, 16)]; + TD = ii[WS(rs, 16)]; + Tp = FMA(Tn, To, Tl); + Tka = FNMS(Tn, Tk, Tk9); + TA = Ty * Tz; + T8r = Ty * TD; + TR = ri[WS(rs, 48)]; + TV = ii[WS(rs, 48)]; + TE = FMA(TC, TD, TA); + T8s = FNMS(TC, Tz, T8r); + TS = TQ * TR; + T8t = TQ * TV; + } + } + } + { + E T8q, Tq, Tl7, Tkc, TW, T8u; + T8q = T1 - Tp; + Tq = T1 + Tp; + Tl7 = Tkb - Tka; + Tkc = Tka + Tkb; + TW = FMA(TU, TV, TS); + T8u = FNMS(TU, TR, T8t); + { + E TX, Tl8, T8v, Tk8; + TX = TE + TW; + Tl8 = TE - TW; + T8v = T8s - T8u; + Tk8 = T8s + T8u; + Tl9 = Tl7 - Tl8; + TlD = Tl8 + Tl7; + TY = Tq + TX; + Tg4 = Tq - TX; + T8w = T8q - T8v; + TdS = T8q + T8v; + TkE = Tkc - Tk8; + Tkd = Tk8 + Tkc; + } + } + } + { + E T2f, T93, T2E, T9d, T2n, T95, T2s, T9b; + { + E T2a, T2e, T2i, T2m; + T2a = ri[WS(rs, 60)]; + T2e = ii[WS(rs, 60)]; + { + E T2z, T2D, T2b, T92, T2A, T9c; + T2z = ri[WS(rs, 44)]; + T2D = ii[WS(rs, 44)]; + T2b = T29 * T2a; + T92 = T29 * T2e; + T2A = T2y * T2z; + T9c = T2y * T2D; + T2f = FMA(T2d, T2e, T2b); + T93 = FNMS(T2d, T2a, T92); + T2E = FMA(T2C, T2D, T2A); + T9d = FNMS(T2C, T2z, T9c); + } + T2i = ri[WS(rs, 28)]; + T2m = ii[WS(rs, 28)]; + { + E T2p, T2r, T2j, T94, T2q, T9a; + T2p = ri[WS(rs, 12)]; + T2r = ii[WS(rs, 12)]; + T2j = T2h * T2i; + T94 = T2h * T2m; + T2q = TG * T2p; + T9a = TG * T2r; + T2n = FMA(T2l, T2m, T2j); + T95 = FNMS(T2l, T2i, T94); + T2s = FMA(TJ, T2r, T2q); + T9b = FNMS(TJ, T2p, T9a); + } + } + { + E T2o, Tgf, T96, T97, T2F, Tgg; + T99 = T2f - T2n; + T2o = T2f + T2n; + Tgf = T93 + T95; + T96 = T93 - T95; + T97 = T2s - T2E; + T2F = T2s + T2E; + Tgg = T9b + T9d; + T9e = T9b - T9d; + T2G = T2o + T2F; + Tge = T2o - T2F; + Tgh = Tgf - Tgg; + TiK = Tgf + Tgg; + Te1 = T96 - T97; + T98 = T96 + T97; + } + } + { + E T9K, T2T, T9G, T9n, Tgl, T9o, T38, T9I; + { + E T2M, T9k, T37, T2V, T2S, T2W, T2Y, T9m, T32, T33, T36, T2Z, T9H; + { + E T2J, T2L, T2K, T9j; + T2J = ri[WS(rs, 2)]; + T2L = ii[WS(rs, 2)]; + T32 = ri[WS(rs, 50)]; + Te0 = T99 + T9e; + T9f = T99 - T9e; + T2K = Tr * T2J; + T9j = Tr * T2L; + T33 = T31 * T32; + T36 = ii[WS(rs, 50)]; + T2M = FMA(Tt, T2L, T2K); + T9k = FNMS(Tt, T2J, T9j); + } + { + E T2O, T9J, T2R, T2P, T9l; + T2O = ri[WS(rs, 34)]; + T37 = FMA(T35, T36, T33); + T9J = T31 * T36; + T2R = ii[WS(rs, 34)]; + T2P = T2N * T2O; + T2V = ri[WS(rs, 18)]; + T9K = FNMS(T35, T32, T9J); + T9l = T2N * T2R; + T2S = FMA(T2Q, T2R, T2P); + T2W = T2U * T2V; + T2Y = ii[WS(rs, 18)]; + T9m = FNMS(T2Q, T2O, T9l); + } + T2T = T2M + T2S; + T9G = T2M - T2S; + T2Z = FMA(T2X, T2Y, T2W); + T9H = T2U * T2Y; + T9n = T9k - T9m; + Tgl = T9k + T9m; + T9o = T2Z - T37; + T38 = T2Z + T37; + T9I = FNMS(T2X, T2V, T9H); + } + { + E T6H, TbD, T6P, T6R, T6T, TbF, T6S, Tcw; + { + E T6X, T71, T6E, TbC, T6K, TbE; + { + E T6F, T6G, T9L, Tgm; + T6E = ri[WS(rs, 63)]; + Te5 = T9n - T9o; + T9p = T9n + T9o; + Tgq = T2T - T38; + T39 = T2T + T38; + T9L = T9I - T9K; + Tgm = T9I + T9K; + T6F = TL * T6E; + T6G = ii[WS(rs, 63)]; + Te8 = T9G + T9L; + T9M = T9G - T9L; + TiN = Tgl + Tgm; + Tgn = Tgl - Tgm; + TbC = TL * T6G; + T6H = FMA(TP, T6G, T6F); + } + T6X = ri[WS(rs, 47)]; + T71 = ii[WS(rs, 47)]; + TbD = FNMS(TP, T6E, TbC); + { + E T6O, T6L, T6Y, Tcy; + T6K = ri[WS(rs, 31)]; + T6Y = T6W * T6X; + Tcy = T6W * T71; + T6O = ii[WS(rs, 31)]; + T6L = T6J * T6K; + T72 = FMA(T70, T71, T6Y); + Tcz = FNMS(T70, T6X, Tcy); + TbE = T6J * T6O; + T6P = FMA(T6N, T6O, T6L); + } + T6R = ri[WS(rs, 15)]; + T6T = ii[WS(rs, 15)]; + TbF = FNMS(T6N, T6K, TbE); + } + Tcv = T6H - T6P; + T6Q = T6H + T6P; + T6S = TK * T6R; + Tcw = TK * T6T; + Tha = TbD + TbF; + TbG = TbD - TbF; + T6U = FMA(TO, T6T, T6S); + Tcx = FNMS(TO, T6R, Tcw); + } + } + { + E T1J, T1G, T1K, T8O, T25, T8Y, T1N, T1S, T1W; + { + E T1b, T16, T1c, T8y, T1z, T8I, T1f, T1m, T1q; + { + E T11, T12, T15, T1u, T1y, T8x, T1v, T8H; + T11 = ri[WS(rs, 8)]; + { + E TbH, T73, TcA, Thb; + TbH = T6U - T72; + T73 = T6U + T72; + TcA = Tcx - Tcz; + Thb = Tcx + Tcz; + TeE = TbG - TbH; + TbI = TbG + TbH; + Thr = T6Q - T73; + T74 = T6Q + T73; + TeP = Tcv + TcA; + TcB = Tcv - TcA; + Tja = Tha + Thb; + Thc = Tha - Thb; + T12 = T10 * T11; + } + T15 = ii[WS(rs, 8)]; + T1u = ri[WS(rs, 24)]; + T1y = ii[WS(rs, 24)]; + T1b = ri[WS(rs, 40)]; + T16 = FMA(T14, T15, T12); + T8x = T10 * T15; + T1v = T1t * T1u; + T8H = T1t * T1y; + T1c = T1a * T1b; + T8y = FNMS(T14, T11, T8x); + T1z = FMA(T1x, T1y, T1v); + T8I = FNMS(T1x, T1u, T8H); + T1f = ii[WS(rs, 40)]; + T1m = ri[WS(rs, 56)]; + T1q = ii[WS(rs, 56)]; + } + { + E T1D, T1E, T1F, T20, T24, T8N, T21, T8X; + { + E T1h, T8C, T8A, T1r, T8G, Tg5, T8B; + T1D = ri[WS(rs, 4)]; + { + E T1g, T8z, T1n, T8F; + T1g = FMA(T1e, T1f, T1c); + T8z = T1a * T1f; + T1n = T1l * T1m; + T8F = T1l * T1q; + T1h = T16 + T1g; + T8C = T16 - T1g; + T8A = FNMS(T1e, T1b, T8z); + T1r = FMA(T1p, T1q, T1n); + T8G = FNMS(T1p, T1m, T8F); + T1E = T7 * T1D; + } + Tg5 = T8y + T8A; + T8B = T8y - T8A; + { + E T1A, T8E, Tg6, T8J; + T1A = T1r + T1z; + T8E = T1r - T1z; + Tg6 = T8G + T8I; + T8J = T8G - T8I; + T8D = T8B - T8C; + TdT = T8C + T8B; + T1B = T1h + T1A; + TkD = T1A - T1h; + T8K = T8E + T8J; + TdU = T8E - T8J; + Tg7 = Tg5 - Tg6; + Tk7 = Tg5 + Tg6; + T1F = ii[WS(rs, 4)]; + } + } + T20 = ri[WS(rs, 52)]; + T24 = ii[WS(rs, 52)]; + T1J = ri[WS(rs, 36)]; + T1G = FMA(Tb, T1F, T1E); + T8N = T7 * T1F; + T21 = T1Z * T20; + T8X = T1Z * T24; + T1K = T1I * T1J; + T8O = FNMS(Tb, T1D, T8N); + T25 = FMA(T23, T24, T21); + T8Y = FNMS(T23, T20, T8X); + T1N = ii[WS(rs, 36)]; + T1S = ri[WS(rs, 20)]; + T1W = ii[WS(rs, 20)]; + } + } + { + E T3V, T3T, T3W, T9T, T4i, Taj, T3Y, T42, T45; + { + E T3O, T3P, T3S, T4d, T4h, T9S, T4e, Tai; + { + E T1P, T8U, T8Q, T1X, T8W, Tga, T8R; + T3O = ri[WS(rs, 62)]; + { + E T1O, T8P, T1T, T8V; + T1O = FMA(T1M, T1N, T1K); + T8P = T1I * T1N; + T1T = T1R * T1S; + T8V = T1R * T1W; + T1P = T1G + T1O; + T8U = T1G - T1O; + T8Q = FNMS(T1M, T1J, T8P); + T1X = FMA(T1V, T1W, T1T); + T8W = FNMS(T1V, T1S, T8V); + T3P = T3N * T3O; + } + Tga = T8O + T8Q; + T8R = T8O - T8Q; + { + E T26, T8S, Tgb, T8Z; + T26 = T1X + T25; + T8S = T1X - T25; + Tgb = T8W + T8Y; + T8Z = T8W - T8Y; + T8T = T8R + T8S; + TdY = T8R - T8S; + T27 = T1P + T26; + Tg9 = T1P - T26; + T90 = T8U - T8Z; + TdX = T8U + T8Z; + Tgc = Tga - Tgb; + TiJ = Tga + Tgb; + T3S = ii[WS(rs, 62)]; + } + } + T4d = ri[WS(rs, 46)]; + T4h = ii[WS(rs, 46)]; + T3V = ri[WS(rs, 30)]; + T3T = FMA(T3R, T3S, T3P); + T9S = T3N * T3S; + T4e = T4c * T4d; + Tai = T4c * T4h; + T3W = T3U * T3V; + T9T = FNMS(T3R, T3O, T9S); + T4i = FMA(T4g, T4h, T4e); + Taj = FNMS(T4g, T4d, Tai); + T3Y = ii[WS(rs, 30)]; + T42 = ri[WS(rs, 14)]; + T45 = ii[WS(rs, 14)]; + } + { + E T4P, T4Q, T4R, T56, T5a, Tat, T57, Tbp; + { + E T40, Taf, T9V, T46, Tah, Tgw, T9W; + T4P = ri[WS(rs, 1)]; + { + E T3Z, T9U, T43, Tag; + T3Z = FMA(T3X, T3Y, T3W); + T9U = T3U * T3Y; + T43 = T41 * T42; + Tag = T41 * T45; + T40 = T3T + T3Z; + Taf = T3T - T3Z; + T9V = FNMS(T3X, T3V, T9U); + T46 = FMA(T44, T45, T43); + Tah = FNMS(T44, T42, Tag); + T4Q = T2 * T4P; + } + Tgw = T9T + T9V; + T9W = T9T - T9V; + { + E T4j, T9X, Tgx, Tak; + T4j = T46 + T4i; + T9X = T46 - T4i; + Tgx = Tah + Taj; + Tak = Tah - Taj; + T9Y = T9W + T9X; + Tec = T9W - T9X; + T4k = T40 + T4j; + TgB = T40 - T4j; + Tal = Taf - Tak; + Tef = Taf + Tak; + Tgy = Tgw - Tgx; + TiT = Tgw + Tgx; + T4R = ii[WS(rs, 1)]; + } + } + T56 = ri[WS(rs, 49)]; + T5a = ii[WS(rs, 49)]; + T4U = ri[WS(rs, 33)]; + T4S = FMA(T5, T4R, T4Q); + Tat = T2 * T4R; + T57 = T55 * T56; + Tbp = T55 * T5a; + T4V = T4T * T4U; + Tau = FNMS(T5, T4P, Tat); + T5b = FMA(T59, T5a, T57); + Tbq = FNMS(T59, T56, Tbp); + T4X = ii[WS(rs, 33)]; + T50 = ri[WS(rs, 17)]; + T52 = ii[WS(rs, 17)]; + } + } + } + } + { + E T7a, T78, T7b, TbL, T7t, TbU, T7d, T7i, T7m; + { + E T4q, T4o, T4r, Ta1, T4J, Taa, T4t, T4y, T4C; + { + E T3o, T3f, T3p, T9s, T3I, T9B, T3s, T3x, T3B; + { + E T3b, T3c, T3e, T3E, T3H, T9r, T3F, T9A; + { + E T4Z, Tbm, Taw, T53, Tbo, TgJ, Tax; + T3b = ri[WS(rs, 10)]; + { + E T4Y, Tav, T51, Tbn; + T4Y = FMA(T4W, T4X, T4V); + Tav = T4T * T4X; + T51 = T48 * T50; + Tbn = T48 * T52; + T4Z = T4S + T4Y; + Tbm = T4S - T4Y; + Taw = FNMS(T4W, T4U, Tav); + T53 = FMA(T4b, T52, T51); + Tbo = FNMS(T4b, T50, Tbn); + T3c = T3a * T3b; + } + TgJ = Tau + Taw; + Tax = Tau - Taw; + { + E T5c, Tay, TgK, Tbr; + T5c = T53 + T5b; + Tay = T53 - T5b; + TgK = Tbo + Tbq; + Tbr = Tbo - Tbq; + Taz = Tax + Tay; + Tel = Tax - Tay; + T5d = T4Z + T5c; + Th0 = T4Z - T5c; + Tbs = Tbm - Tbr; + Tew = Tbm + Tbr; + TgL = TgJ - TgK; + TiZ = TgJ + TgK; + T3e = ii[WS(rs, 10)]; + } + } + T3E = ri[WS(rs, 26)]; + T3H = ii[WS(rs, 26)]; + T3o = ri[WS(rs, 42)]; + T3f = FMA(T3d, T3e, T3c); + T9r = T3a * T3e; + T3F = T3D * T3E; + T9A = T3D * T3H; + T3p = T3n * T3o; + T9s = FNMS(T3d, T3b, T9r); + T3I = FMA(T3G, T3H, T3F); + T9B = FNMS(T3G, T3E, T9A); + T3s = ii[WS(rs, 42)]; + T3x = ri[WS(rs, 58)]; + T3B = ii[WS(rs, 58)]; + } + { + E T4l, T4m, T4n, T4F, T4I, Ta0, T4G, Ta9; + { + E T3u, T9q, T9u, T3C, T9z, Tgr, T9v; + T4l = ri[WS(rs, 6)]; + { + E T3t, T9t, T3y, T9y; + T3t = FMA(T3r, T3s, T3p); + T9t = T3n * T3s; + T3y = T3w * T3x; + T9y = T3w * T3B; + T3u = T3f + T3t; + T9q = T3f - T3t; + T9u = FNMS(T3r, T3o, T9t); + T3C = FMA(T3A, T3B, T3y); + T9z = FNMS(T3A, T3x, T9y); + T4m = T3g * T4l; + } + Tgr = T9s + T9u; + T9v = T9s - T9u; + { + E T3J, T9x, Tgs, T9C; + T3J = T3C + T3I; + T9x = T3C - T3I; + Tgs = T9z + T9B; + T9C = T9z - T9B; + { + E T9w, T9O, T9D, T9N; + T9w = T9q + T9v; + T9O = T9v - T9q; + T3K = T3u + T3J; + Tgo = T3J - T3u; + T9D = T9x - T9C; + T9N = T9x + T9C; + Tgt = Tgr - Tgs; + TiO = Tgr + Tgs; + T9P = T9N - T9O; + Te6 = T9O + T9N; + T9E = T9w - T9D; + Te9 = T9w + T9D; + T4n = ii[WS(rs, 6)]; + } + } + } + T4F = ri[WS(rs, 22)]; + T4I = ii[WS(rs, 22)]; + T4q = ri[WS(rs, 38)]; + T4o = FMA(T3i, T4n, T4m); + Ta0 = T3g * T4n; + T4G = T4E * T4F; + Ta9 = T4E * T4I; + T4r = T4p * T4q; + Ta1 = FNMS(T3i, T4l, Ta0); + T4J = FMA(T4H, T4I, T4G); + Taa = FNMS(T4H, T4F, Ta9); + T4t = ii[WS(rs, 38)]; + T4y = ri[WS(rs, 54)]; + T4C = ii[WS(rs, 54)]; + } + } + { + E T5k, T5h, T5l, TaC, T5G, TaL, T5o, T5t, T5x; + { + E T5e, T5f, T5g, T5B, T5F, TaB, T5C, TaK; + { + E T4v, T9Z, Ta3, T4D, Ta8, TgC, Ta4; + T5e = ri[WS(rs, 9)]; + { + E T4u, Ta2, T4z, Ta7; + T4u = FMA(T4s, T4t, T4r); + Ta2 = T4p * T4t; + T4z = T4x * T4y; + Ta7 = T4x * T4C; + T4v = T4o + T4u; + T9Z = T4o - T4u; + Ta3 = FNMS(T4s, T4q, Ta2); + T4D = FMA(T4B, T4C, T4z); + Ta8 = FNMS(T4B, T4y, Ta7); + T5f = T8 * T5e; + } + TgC = Ta1 + Ta3; + Ta4 = Ta1 - Ta3; + { + E T4K, Ta6, TgD, Tab; + T4K = T4D + T4J; + Ta6 = T4D - T4J; + TgD = Ta8 + Taa; + Tab = Ta8 - Taa; + { + E Ta5, Tan, Tac, Tam; + Ta5 = T9Z + Ta4; + Tan = Ta4 - T9Z; + T4L = T4v + T4K; + Tgz = T4K - T4v; + Tac = Ta6 - Tab; + Tam = Ta6 + Tab; + TgE = TgC - TgD; + TiU = TgC + TgD; + Tao = Tam - Tan; + Ted = Tan + Tam; + Tad = Ta5 - Tac; + Teg = Ta5 + Tac; + T5g = ii[WS(rs, 9)]; + } + } + } + T5B = ri[WS(rs, 25)]; + T5F = ii[WS(rs, 25)]; + T5k = ri[WS(rs, 41)]; + T5h = FMA(Tc, T5g, T5f); + TaB = T8 * T5g; + T5C = T5A * T5B; + TaK = T5A * T5F; + T5l = T5j * T5k; + TaC = FNMS(Tc, T5e, TaB); + T5G = FMA(T5E, T5F, T5C); + TaL = FNMS(T5E, T5B, TaK); + T5o = ii[WS(rs, 41)]; + T5t = ri[WS(rs, 57)]; + T5x = ii[WS(rs, 57)]; + } + { + E T75, T76, T77, T7p, T7s, TbK, T7q, TbT; + { + E T5q, TaA, TaE, T5y, TaJ, Th1, TaF; + T75 = ri[WS(rs, 7)]; + { + E T5p, TaD, T5u, TaI; + T5p = FMA(T5n, T5o, T5l); + TaD = T5j * T5o; + T5u = T5s * T5t; + TaI = T5s * T5x; + T5q = T5h + T5p; + TaA = T5h - T5p; + TaE = FNMS(T5n, T5k, TaD); + T5y = FMA(T5w, T5x, T5u); + TaJ = FNMS(T5w, T5t, TaI); + T76 = T1i * T75; + } + Th1 = TaC + TaE; + TaF = TaC - TaE; + { + E T5H, TaH, Th2, TaM; + T5H = T5y + T5G; + TaH = T5y - T5G; + Th2 = TaJ + TaL; + TaM = TaJ - TaL; + { + E TaG, Tbu, TaN, Tbt; + TaG = TaA + TaF; + Tbu = TaF - TaA; + T5I = T5q + T5H; + TgM = T5H - T5q; + TaN = TaH - TaM; + Tbt = TaH + TaM; + Th3 = Th1 - Th2; + Tj0 = Th1 + Th2; + Tbv = Tbt - Tbu; + Tem = Tbu + Tbt; + TaO = TaG - TaN; + Tex = TaG + TaN; + T77 = ii[WS(rs, 7)]; + } + } + } + T7p = ri[WS(rs, 23)]; + T7s = ii[WS(rs, 23)]; + T7a = ri[WS(rs, 39)]; + T78 = FMA(T1k, T77, T76); + TbK = T1i * T77; + T7q = T7o * T7p; + TbT = T7o * T7s; + T7b = T79 * T7a; + TbL = FNMS(T1k, T75, TbK); + T7t = FMA(T7r, T7s, T7q); + TbU = FNMS(T7r, T7p, TbT); + T7d = ii[WS(rs, 39)]; + T7i = ri[WS(rs, 55)]; + T7m = ii[WS(rs, 55)]; + } + } + } + { + E T6i, T6g, T6j, TaY, T6z, TaU, T6l, T6o, T6q; + { + E T5P, T5N, T5Q, Tbd, T66, Tb9, T5S, T5V, T5X; + { + E T5K, T5L, T5M, T61, T65, Tbc, T62, Tb8; + { + E T7f, TbJ, TbN, T7n, TbS, Ths, TbO; + T5K = ri[WS(rs, 5)]; + { + E T7e, TbM, T7j, TbR; + T7e = FMA(T7c, T7d, T7b); + TbM = T79 * T7d; + T7j = T7h * T7i; + TbR = T7h * T7m; + T7f = T78 + T7e; + TbJ = T78 - T7e; + TbN = FNMS(T7c, T7a, TbM); + T7n = FMA(T7l, T7m, T7j); + TbS = FNMS(T7l, T7i, TbR); + T5L = Td * T5K; + } + Ths = TbL + TbN; + TbO = TbL - TbN; + { + E T7u, TbQ, Tht, TbV; + T7u = T7n + T7t; + TbQ = T7n - T7t; + Tht = TbS + TbU; + TbV = TbS - TbU; + { + E TbP, TcD, TbW, TcC; + TbP = TbJ + TbO; + TcD = TbO - TbJ; + T7v = T7f + T7u; + Thd = T7u - T7f; + TbW = TbQ - TbV; + TcC = TbQ + TbV; + Thu = Ths - Tht; + Tjb = Ths + Tht; + TcE = TcC - TcD; + TeF = TcD + TcC; + TbX = TbP - TbW; + TeQ = TbP + TbW; + T5M = ii[WS(rs, 5)]; + } + } + } + T61 = ri[WS(rs, 53)]; + T65 = ii[WS(rs, 53)]; + T5P = ri[WS(rs, 37)]; + T5N = FMA(Th, T5M, T5L); + Tbc = Td * T5M; + T62 = T60 * T61; + Tb8 = T60 * T65; + T5Q = T5O * T5P; + Tbd = FNMS(Th, T5K, Tbc); + T66 = FMA(T64, T65, T62); + Tb9 = FNMS(T64, T61, Tb8); + T5S = ii[WS(rs, 37)]; + T5V = ri[WS(rs, 21)]; + T5X = ii[WS(rs, 21)]; + } + { + E T6b, T6c, T6f, T6u, T6y, TaX, T6v, TaT; + { + E T5U, Tb5, Tbf, T5Y, Tb7; + T6b = ri[WS(rs, 61)]; + { + E T5T, Tbe, T5W, Tb6; + T5T = FMA(T5R, T5S, T5Q); + Tbe = T5O * T5S; + T5W = T3j * T5V; + Tb6 = T3j * T5X; + T5U = T5N + T5T; + Tb5 = T5N - T5T; + Tbf = FNMS(T5R, T5P, Tbe); + T5Y = FMA(T3m, T5X, T5W); + Tb7 = FNMS(T3m, T5V, Tb6); + T6c = T6a * T6b; + } + { + E TgO, Tbg, T67, Tbh; + TgO = Tbd + Tbf; + Tbg = Tbd - Tbf; + T67 = T5Y + T66; + Tbh = T5Y - T66; + { + E TgP, Tba, Tbi, Teo; + TgP = Tb7 + Tb9; + Tba = Tb7 - Tb9; + Tbi = Tbg + Tbh; + Teo = Tbg - Tbh; + { + E TgR, Tbb, Tep, TgQ; + TgR = T5U - T67; + T68 = T5U + T67; + Tbb = Tb5 - Tba; + Tep = Tb5 + Tba; + TgQ = TgO - TgP; + Tj5 = TgO + TgP; + Tez = FMA(KP414213562, Teo, Tep); + Teq = FNMS(KP414213562, Tep, Teo); + Tbj = FNMS(KP414213562, Tbi, Tbb); + Tbx = FMA(KP414213562, Tbb, Tbi); + TgS = TgQ - TgR; + Th5 = TgR + TgQ; + T6f = ii[WS(rs, 61)]; + } + } + } + } + T6u = ri[WS(rs, 45)]; + T6y = ii[WS(rs, 45)]; + T6i = ri[WS(rs, 29)]; + T6g = FMA(T6e, T6f, T6c); + TaX = T6a * T6f; + T6v = T6t * T6u; + TaT = T6t * T6y; + T6j = T6h * T6i; + TaY = FNMS(T6e, T6b, TaX); + T6z = FMA(T6x, T6y, T6v); + TaU = FNMS(T6x, T6u, TaT); + T6l = ii[WS(rs, 29)]; + T6o = ri[WS(rs, 13)]; + T6q = ii[WS(rs, 13)]; + } + } + { + E T7C, T7A, T7D, Tcm, T7T, Tci, T7F, T7I, T7K; + { + E T7x, T7y, T7z, T7O, T7S, Tcl, T7P, Tch; + { + E T6n, TaQ, Tb0, T6r, TaS; + T7x = ri[WS(rs, 3)]; + { + E T6m, TaZ, T6p, TaR; + T6m = FMA(T6k, T6l, T6j); + TaZ = T6h * T6l; + T6p = T17 * T6o; + TaR = T17 * T6q; + T6n = T6g + T6m; + TaQ = T6g - T6m; + Tb0 = FNMS(T6k, T6i, TaZ); + T6r = FMA(T19, T6q, T6p); + TaS = FNMS(T19, T6o, TaR); + T7y = T3 * T7x; + } + { + E TgU, Tb1, T6A, Tb2; + TgU = TaY + Tb0; + Tb1 = TaY - Tb0; + T6A = T6r + T6z; + Tb2 = T6r - T6z; + { + E TgV, TaV, Tb3, Ter; + TgV = TaS + TaU; + TaV = TaS - TaU; + Tb3 = Tb1 + Tb2; + Ter = Tb1 - Tb2; + { + E TgT, TaW, Tes, TgW; + TgT = T6n - T6A; + T6B = T6n + T6A; + TaW = TaQ - TaV; + Tes = TaQ + TaV; + TgW = TgU - TgV; + Tj6 = TgU + TgV; + TeA = FNMS(KP414213562, Ter, Tes); + Tet = FMA(KP414213562, Tes, Ter); + Tb4 = FMA(KP414213562, Tb3, TaW); + Tby = FNMS(KP414213562, TaW, Tb3); + TgX = TgT + TgW; + Th6 = TgT - TgW; + T7z = ii[WS(rs, 3)]; + } + } + } + } + T7O = ri[WS(rs, 51)]; + T7S = ii[WS(rs, 51)]; + T7C = ri[WS(rs, 35)]; + T7A = FMA(T6, T7z, T7y); + Tcl = T3 * T7z; + T7P = T7N * T7O; + Tch = T7N * T7S; + T7D = T7B * T7C; + Tcm = FNMS(T6, T7x, Tcl); + T7T = FMA(T7R, T7S, T7P); + Tci = FNMS(T7R, T7O, Tch); + T7F = ii[WS(rs, 35)]; + T7I = ri[WS(rs, 19)]; + T7K = ii[WS(rs, 19)]; + } + { + E T7Y, T7Z, T82, T8f, T8j, Tc6, T8g, Tc2; + { + E T7H, Tce, Tco, T7L, Tcg; + T7Y = ri[WS(rs, 59)]; + { + E T7G, Tcn, T7J, Tcf; + T7G = FMA(T7E, T7F, T7D); + Tcn = T7B * T7F; + T7J = T2u * T7I; + Tcf = T2u * T7K; + T7H = T7A + T7G; + Tce = T7A - T7G; + Tco = FNMS(T7E, T7C, Tcn); + T7L = FMA(T2x, T7K, T7J); + Tcg = FNMS(T2x, T7I, Tcf); + T7Z = T7X * T7Y; + } + { + E Thf, Tcp, T7U, Tcq; + Thf = Tcm + Tco; + Tcp = Tcm - Tco; + T7U = T7L + T7T; + Tcq = T7L - T7T; + { + E Thg, Tcj, Tcr, TeH; + Thg = Tcg + Tci; + Tcj = Tcg - Tci; + Tcr = Tcp + Tcq; + TeH = Tcp - Tcq; + { + E Thi, Tck, TeI, Thh; + Thi = T7H - T7U; + T7V = T7H + T7U; + Tck = Tce - Tcj; + TeI = Tce + Tcj; + Thh = Thf - Thg; + Tjg = Thf + Thg; + TeS = FMA(KP414213562, TeH, TeI); + TeJ = FNMS(KP414213562, TeI, TeH); + Tcs = FNMS(KP414213562, Tcr, Tck); + TcG = FMA(KP414213562, Tck, Tcr); + Thj = Thh - Thi; + Thw = Thi + Thh; + T82 = ii[WS(rs, 59)]; + } + } + } + } + T8f = ri[WS(rs, 43)]; + T8j = ii[WS(rs, 43)]; + T84 = ri[WS(rs, 27)]; + T83 = FMA(T81, T82, T7Z); + Tc6 = T7X * T82; + T8g = T8e * T8f; + Tc2 = T8e * T8j; + T85 = Te * T84; + Tc7 = FNMS(T81, T7Y, Tc6); + T8k = FMA(T8i, T8j, T8g); + Tc3 = FNMS(T8i, T8f, Tc2); + T86 = ii[WS(rs, 27)]; + T89 = ri[WS(rs, 11)]; + T8b = ii[WS(rs, 11)]; + } + } + } + } + } + { + E TeT, TeM, Tcd, TcH, Tho, Thx, Tkw, Tkv, Tl6, Tl5; + { + E TiI, Tkp, TiQ, TiS, TiL, Tkq, TiP, TiV, Tjf, Tjd, Tjc, Tji, Tj4, Tj2, Tj1; + E Tj7, Tkh, Tki; + { + E TjG, T2I, Tkj, T4N, Tkk, Tkf, Tk5, TjJ, T8o, Tk2, TjL, T6D, TjY, TjU, Tk1; + E TjO; + { + E T8m, Tjh, T3L, T4M, Tk6, Tke, TjH, TjI; + { + E T1C, T88, TbZ, Tc9, T8c, Tc1, T2H; + T1C = TY + T1B; + TiI = TY - T1B; + { + E T87, Tc8, T8a, Tc0; + T87 = FMA(Ti, T86, T85); + Tc8 = Te * T86; + T8a = Tu * T89; + Tc0 = Tu * T8b; + T88 = T83 + T87; + TbZ = T83 - T87; + Tc9 = FNMS(Ti, T84, Tc8); + T8c = FMA(Tx, T8b, T8a); + Tc1 = FNMS(Tx, T89, Tc0); + T2H = T27 + T2G; + Tkp = T2G - T27; + } + { + E Thl, Tca, T8l, Tcb; + Thl = Tc7 + Tc9; + Tca = Tc7 - Tc9; + T8l = T8c + T8k; + Tcb = T8c - T8k; + { + E Thm, Tc4, Tcc, TeK; + Thm = Tc1 + Tc3; + Tc4 = Tc1 - Tc3; + Tcc = Tca + Tcb; + TeK = Tca - Tcb; + { + E Thk, Tc5, TeL, Thn; + Thk = T88 - T8l; + T8m = T88 + T8l; + Tc5 = TbZ - Tc4; + TeL = TbZ + Tc4; + Thn = Thl - Thm; + Tjh = Thl + Thm; + TeT = FNMS(KP414213562, TeK, TeL); + TeM = FMA(KP414213562, TeL, TeK); + Tcd = FMA(KP414213562, Tcc, Tc5); + TcH = FNMS(KP414213562, Tc5, Tcc); + Tho = Thk + Thn; + Thx = Thk - Thn; + TjG = T1C - T2H; + T2I = T1C + T2H; + } + } + } + } + TiQ = T39 - T3K; + T3L = T39 + T3K; + T4M = T4k + T4L; + TiS = T4k - T4L; + TiL = TiJ - TiK; + Tk6 = TiJ + TiK; + Tke = Tk7 + Tkd; + Tkq = Tkd - Tk7; + TiP = TiN - TiO; + TjH = TiN + TiO; + Tkj = T4M - T3L; + T4N = T3L + T4M; + Tkk = Tke - Tk6; + Tkf = Tk6 + Tke; + TjI = TiT + TiU; + TiV = TiT - TiU; + { + E TjR, TjQ, TjS, T7w, T8n; + Tjf = T74 - T7v; + T7w = T74 + T7v; + T8n = T7V + T8m; + Tjd = T8m - T7V; + Tjc = Tja - Tjb; + TjR = Tja + Tjb; + Tk5 = TjH + TjI; + TjJ = TjH - TjI; + TjQ = T7w - T8n; + T8o = T7w + T8n; + Tji = Tjg - Tjh; + TjS = Tjg + Tjh; + { + E TjM, TjN, T5J, T6C, TjT; + Tj4 = T5d - T5I; + T5J = T5d + T5I; + T6C = T68 + T6B; + Tj2 = T6B - T68; + TjT = TjR - TjS; + Tk2 = TjR + TjS; + Tj1 = TiZ - Tj0; + TjM = TiZ + Tj0; + TjL = T5J - T6C; + T6D = T5J + T6C; + Tj7 = Tj5 - Tj6; + TjN = Tj5 + Tj6; + TjY = TjQ + TjT; + TjU = TjQ - TjT; + Tk1 = TjM + TjN; + TjO = TjM - TjN; + } + } + } + { + E Tk0, Tk3, TjW, Tko, Tkn, Tkl, Tkm, TjZ; + { + E TjP, TjX, Tk4, Tkg, T4O, T8p, TjK, TjV; + Tk0 = T2I - T4N; + T4O = T2I + T4N; + T8p = T6D + T8o; + Tkh = T8o - T6D; + TjP = TjL + TjO; + TjX = TjO - TjL; + Tk3 = Tk1 - Tk2; + Tk4 = Tk1 + Tk2; + ri[0] = T4O + T8p; + ri[WS(rs, 32)] = T4O - T8p; + Tkg = Tk5 + Tkf; + Tki = Tkf - Tk5; + TjW = TjG - TjJ; + TjK = TjG + TjJ; + TjV = TjP + TjU; + Tko = TjU - TjP; + Tkn = Tkk - Tkj; + Tkl = Tkj + Tkk; + ii[WS(rs, 32)] = Tkg - Tk4; + ii[0] = Tk4 + Tkg; + ri[WS(rs, 8)] = FMA(KP707106781, TjV, TjK); + ri[WS(rs, 40)] = FNMS(KP707106781, TjV, TjK); + Tkm = TjX + TjY; + TjZ = TjX - TjY; + } + ii[WS(rs, 40)] = FNMS(KP707106781, Tkm, Tkl); + ii[WS(rs, 8)] = FMA(KP707106781, Tkm, Tkl); + ri[WS(rs, 24)] = FMA(KP707106781, TjZ, TjW); + ri[WS(rs, 56)] = FNMS(KP707106781, TjZ, TjW); + ii[WS(rs, 56)] = FNMS(KP707106781, Tko, Tkn); + ii[WS(rs, 24)] = FMA(KP707106781, Tko, Tkn); + ri[WS(rs, 16)] = Tk0 + Tk3; + ri[WS(rs, 48)] = Tk0 - Tk3; + } + } + { + E Tjq, TiM, Tkx, Tkr, Tjt, Tky, Tks, TiX, Tjz, Tje, Tjx, TjD, Tjn, Tj9, Tjr; + E TiR; + ii[WS(rs, 48)] = Tki - Tkh; + ii[WS(rs, 16)] = Tkh + Tki; + Tjq = TiI + TiL; + TiM = TiI - TiL; + Tkx = Tkq - Tkp; + Tkr = Tkp + Tkq; + Tjr = TiQ + TiP; + TiR = TiP - TiQ; + { + E Tjw, Tj3, Tjs, TiW, Tjv, Tj8; + Tjs = TiS - TiV; + TiW = TiS + TiV; + Tjw = Tj1 + Tj2; + Tj3 = Tj1 - Tj2; + Tjt = Tjr + Tjs; + Tky = Tjs - Tjr; + Tks = TiR + TiW; + TiX = TiR - TiW; + Tjv = Tj4 + Tj7; + Tj8 = Tj4 - Tj7; + Tjz = Tjc + Tjd; + Tje = Tjc - Tjd; + Tjx = FMA(KP414213562, Tjw, Tjv); + TjD = FNMS(KP414213562, Tjv, Tjw); + Tjn = FNMS(KP414213562, Tj3, Tj8); + Tj9 = FMA(KP414213562, Tj8, Tj3); + } + { + E Tjm, TiY, Tkz, TkB, Tjy, Tjj; + Tjm = FNMS(KP707106781, TiX, TiM); + TiY = FMA(KP707106781, TiX, TiM); + Tkz = FMA(KP707106781, Tky, Tkx); + TkB = FNMS(KP707106781, Tky, Tkx); + Tjy = Tjf + Tji; + Tjj = Tjf - Tji; + { + E TjC, Tkt, Tku, TjF; + { + E Tju, TjE, Tjo, Tjk, TjB, TjA; + TjC = FNMS(KP707106781, Tjt, Tjq); + Tju = FMA(KP707106781, Tjt, Tjq); + TjA = FNMS(KP414213562, Tjz, Tjy); + TjE = FMA(KP414213562, Tjy, Tjz); + Tjo = FMA(KP414213562, Tje, Tjj); + Tjk = FNMS(KP414213562, Tjj, Tje); + TjB = Tjx + TjA; + Tkw = TjA - Tjx; + Tkv = FNMS(KP707106781, Tks, Tkr); + Tkt = FMA(KP707106781, Tks, Tkr); + { + E Tjp, TkA, TkC, Tjl; + Tjp = Tjn + Tjo; + TkA = Tjo - Tjn; + TkC = Tj9 + Tjk; + Tjl = Tj9 - Tjk; + ri[WS(rs, 4)] = FMA(KP923879532, TjB, Tju); + ri[WS(rs, 36)] = FNMS(KP923879532, TjB, Tju); + ri[WS(rs, 60)] = FMA(KP923879532, Tjp, Tjm); + ri[WS(rs, 28)] = FNMS(KP923879532, Tjp, Tjm); + ii[WS(rs, 44)] = FNMS(KP923879532, TkA, Tkz); + ii[WS(rs, 12)] = FMA(KP923879532, TkA, Tkz); + ii[WS(rs, 60)] = FMA(KP923879532, TkC, TkB); + ii[WS(rs, 28)] = FNMS(KP923879532, TkC, TkB); + ri[WS(rs, 12)] = FMA(KP923879532, Tjl, TiY); + ri[WS(rs, 44)] = FNMS(KP923879532, Tjl, TiY); + Tku = TjD + TjE; + TjF = TjD - TjE; + } + } + ii[WS(rs, 36)] = FNMS(KP923879532, Tku, Tkt); + ii[WS(rs, 4)] = FMA(KP923879532, Tku, Tkt); + ri[WS(rs, 20)] = FMA(KP923879532, TjF, TjC); + ri[WS(rs, 52)] = FNMS(KP923879532, TjF, TjC); + } + } + } + } + { + E TkV, Tl1, ThG, Tgk, TkH, TkN, Tis, Ti0, Thv, ThJ, TkO, TkI, TgH, Thy, TiC; + E TiG, Tiq, Tim, ThN, ThT, ThD, Th9, TkW, Tiv, Tl2, Ti7, ThP, Thq, Tiz, TiF; + E Tip, Tif; + { + E Ti1, Ti2, Ti4, Ti5, Thp, The, Tij, TiB, Tii, Tik; + { + E ThW, Tg8, TkT, TkF, ThX, ThY, TkU, Tgj, Tgd, Tgi; + ThW = Tg4 - Tg7; + Tg8 = Tg4 + Tg7; + TkT = TkE - TkD; + TkF = TkD + TkE; + ThX = Tgc - Tg9; + Tgd = Tg9 + Tgc; + ii[WS(rs, 52)] = FNMS(KP923879532, Tkw, Tkv); + ii[WS(rs, 20)] = FMA(KP923879532, Tkw, Tkv); + Tgi = Tge - Tgh; + ThY = Tge + Tgh; + TkU = Tgi - Tgd; + Tgj = Tgd + Tgi; + { + E TgA, ThH, Tgv, TgF; + { + E Tgp, TkG, ThZ, Tgu; + Ti1 = Tgn - Tgo; + Tgp = Tgn + Tgo; + TkV = FMA(KP707106781, TkU, TkT); + Tl1 = FNMS(KP707106781, TkU, TkT); + ThG = FMA(KP707106781, Tgj, Tg8); + Tgk = FNMS(KP707106781, Tgj, Tg8); + TkG = ThX + ThY; + ThZ = ThX - ThY; + Tgu = Tgq + Tgt; + Ti2 = Tgq - Tgt; + Ti4 = Tgy - Tgz; + TgA = Tgy + Tgz; + TkH = FMA(KP707106781, TkG, TkF); + TkN = FNMS(KP707106781, TkG, TkF); + Tis = FNMS(KP707106781, ThZ, ThW); + Ti0 = FMA(KP707106781, ThZ, ThW); + ThH = FMA(KP414213562, Tgp, Tgu); + Tgv = FNMS(KP414213562, Tgu, Tgp); + TgF = TgB + TgE; + Ti5 = TgB - TgE; + } + { + E Tig, Tih, ThI, TgG; + Thv = Thr + Thu; + Tig = Thr - Thu; + Tih = Tho - Thj; + Thp = Thj + Tho; + The = Thc + Thd; + Tij = Thc - Thd; + ThI = FNMS(KP414213562, TgA, TgF); + TgG = FMA(KP414213562, TgF, TgA); + TiB = FMA(KP707106781, Tih, Tig); + Tii = FNMS(KP707106781, Tih, Tig); + ThJ = ThH + ThI; + TkO = ThI - ThH; + TkI = Tgv + TgG; + TgH = Tgv - TgG; + Tik = Thw - Thx; + Thy = Thw + Thx; + } + } + } + { + E Tic, Tia, Ti9, Tid, Tit, Ti3; + { + E Th4, ThM, TgZ, Th7, ThL, Th8; + { + E TgN, TgY, TiA, Til; + Tic = TgL - TgM; + TgN = TgL + TgM; + TgY = TgS + TgX; + Tia = TgX - TgS; + Ti9 = Th0 - Th3; + Th4 = Th0 + Th3; + TiA = FMA(KP707106781, Tik, Tij); + Til = FNMS(KP707106781, Tik, Tij); + ThM = FMA(KP707106781, TgY, TgN); + TgZ = FNMS(KP707106781, TgY, TgN); + TiC = FNMS(KP198912367, TiB, TiA); + TiG = FMA(KP198912367, TiA, TiB); + Tiq = FMA(KP668178637, Tii, Til); + Tim = FNMS(KP668178637, Til, Tii); + Th7 = Th5 + Th6; + Tid = Th5 - Th6; + } + ThL = FMA(KP707106781, Th7, Th4); + Th8 = FNMS(KP707106781, Th7, Th4); + Tit = FNMS(KP414213562, Ti1, Ti2); + Ti3 = FMA(KP414213562, Ti2, Ti1); + ThN = FMA(KP198912367, ThM, ThL); + ThT = FNMS(KP198912367, ThL, ThM); + ThD = FNMS(KP668178637, TgZ, Th8); + Th9 = FMA(KP668178637, Th8, TgZ); + } + { + E Tiy, Tib, Tiu, Ti6, Tix, Tie; + Tiu = FMA(KP414213562, Ti4, Ti5); + Ti6 = FNMS(KP414213562, Ti5, Ti4); + Tiy = FMA(KP707106781, Tia, Ti9); + Tib = FNMS(KP707106781, Tia, Ti9); + TkW = Tiu - Tit; + Tiv = Tit + Tiu; + Tl2 = Ti3 + Ti6; + Ti7 = Ti3 - Ti6; + Tix = FMA(KP707106781, Tid, Tic); + Tie = FNMS(KP707106781, Tid, Tic); + ThP = FMA(KP707106781, Thp, The); + Thq = FNMS(KP707106781, Thp, The); + Tiz = FMA(KP198912367, Tiy, Tix); + TiF = FNMS(KP198912367, Tix, Tiy); + Tip = FNMS(KP668178637, Tib, Tie); + Tif = FMA(KP668178637, Tie, Tib); + } + } + } + { + E TkM, TkL, Tl0, TkZ; + { + E ThC, TgI, TkP, TkR, ThO, Thz; + ThC = FNMS(KP923879532, TgH, Tgk); + TgI = FMA(KP923879532, TgH, Tgk); + TkP = FMA(KP923879532, TkO, TkN); + TkR = FNMS(KP923879532, TkO, TkN); + ThO = FMA(KP707106781, Thy, Thv); + Thz = FNMS(KP707106781, Thy, Thv); + { + E ThS, TkJ, TkK, ThV; + { + E ThK, ThU, ThE, ThA, ThR, ThQ; + ThS = FNMS(KP923879532, ThJ, ThG); + ThK = FMA(KP923879532, ThJ, ThG); + ThQ = FNMS(KP198912367, ThP, ThO); + ThU = FMA(KP198912367, ThO, ThP); + ThE = FMA(KP668178637, Thq, Thz); + ThA = FNMS(KP668178637, Thz, Thq); + ThR = ThN + ThQ; + TkM = ThQ - ThN; + TkL = FNMS(KP923879532, TkI, TkH); + TkJ = FMA(KP923879532, TkI, TkH); + { + E ThF, TkQ, TkS, ThB; + ThF = ThD + ThE; + TkQ = ThE - ThD; + TkS = Th9 + ThA; + ThB = Th9 - ThA; + ri[WS(rs, 2)] = FMA(KP980785280, ThR, ThK); + ri[WS(rs, 34)] = FNMS(KP980785280, ThR, ThK); + ri[WS(rs, 58)] = FMA(KP831469612, ThF, ThC); + ri[WS(rs, 26)] = FNMS(KP831469612, ThF, ThC); + ii[WS(rs, 42)] = FNMS(KP831469612, TkQ, TkP); + ii[WS(rs, 10)] = FMA(KP831469612, TkQ, TkP); + ii[WS(rs, 58)] = FMA(KP831469612, TkS, TkR); + ii[WS(rs, 26)] = FNMS(KP831469612, TkS, TkR); + ri[WS(rs, 10)] = FMA(KP831469612, ThB, TgI); + ri[WS(rs, 42)] = FNMS(KP831469612, ThB, TgI); + TkK = ThT + ThU; + ThV = ThT - ThU; + } + } + ii[WS(rs, 34)] = FNMS(KP980785280, TkK, TkJ); + ii[WS(rs, 2)] = FMA(KP980785280, TkK, TkJ); + ri[WS(rs, 18)] = FMA(KP980785280, ThV, ThS); + ri[WS(rs, 50)] = FNMS(KP980785280, ThV, ThS); + } + } + { + E Tio, TkX, TkY, Tir, Ti8, Tin; + Tio = FNMS(KP923879532, Ti7, Ti0); + Ti8 = FMA(KP923879532, Ti7, Ti0); + Tin = Tif + Tim; + Tl0 = Tim - Tif; + TkZ = FNMS(KP923879532, TkW, TkV); + TkX = FMA(KP923879532, TkW, TkV); + ii[WS(rs, 50)] = FNMS(KP980785280, TkM, TkL); + ii[WS(rs, 18)] = FMA(KP980785280, TkM, TkL); + ri[WS(rs, 6)] = FMA(KP831469612, Tin, Ti8); + ri[WS(rs, 38)] = FNMS(KP831469612, Tin, Ti8); + TkY = Tip + Tiq; + Tir = Tip - Tiq; + ii[WS(rs, 38)] = FNMS(KP831469612, TkY, TkX); + ii[WS(rs, 6)] = FMA(KP831469612, TkY, TkX); + ri[WS(rs, 22)] = FMA(KP831469612, Tir, Tio); + ri[WS(rs, 54)] = FNMS(KP831469612, Tir, Tio); + } + { + E TiE, Tl3, Tl4, TiH, Tiw, TiD; + TiE = FMA(KP923879532, Tiv, Tis); + Tiw = FNMS(KP923879532, Tiv, Tis); + TiD = Tiz - TiC; + Tl6 = Tiz + TiC; + Tl5 = FMA(KP923879532, Tl2, Tl1); + Tl3 = FNMS(KP923879532, Tl2, Tl1); + ii[WS(rs, 54)] = FNMS(KP831469612, Tl0, TkZ); + ii[WS(rs, 22)] = FMA(KP831469612, Tl0, TkZ); + ri[WS(rs, 14)] = FMA(KP980785280, TiD, Tiw); + ri[WS(rs, 46)] = FNMS(KP980785280, TiD, Tiw); + Tl4 = TiG - TiF; + TiH = TiF + TiG; + ii[WS(rs, 46)] = FNMS(KP980785280, Tl4, Tl3); + ii[WS(rs, 14)] = FMA(KP980785280, Tl4, Tl3); + ri[WS(rs, 62)] = FMA(KP980785280, TiH, TiE); + ri[WS(rs, 30)] = FNMS(KP980785280, TiH, TiE); + } + } + } + { + E Tla, TdV, TdO, Tm6, Tm5, TdR; + { + E TcT, TlO, TlI, Tar, TcX, Td3, TcN, TbB, TdM, TdQ, TdA, Tdw, TdJ, TdP, Tdz; + E Tdp, TlW, TdF, Tm2, Tdh, Td7, T91, Td6, T8M, TlT, TlF, Td0, Td4, TcO, TcK; + E T9g, Td8; + { + E Tdb, Tdc, Tde, Tdf, Tdm, Tdk, Tdj, Tdn, TcF, Tct, TbY, Tdt, TdL, Tds, Tdu; + E TcI, TdD, Tdd; + { + E Tae, TcR, T9R, Tap, T9F, T9Q; + Tdb = FMA(KP707106781, T9E, T9p); + T9F = FNMS(KP707106781, T9E, T9p); + T9Q = FNMS(KP707106781, T9P, T9M); + Tdc = FMA(KP707106781, T9P, T9M); + Tde = FMA(KP707106781, Tad, T9Y); + Tae = FNMS(KP707106781, Tad, T9Y); + ii[WS(rs, 62)] = FMA(KP980785280, Tl6, Tl5); + ii[WS(rs, 30)] = FNMS(KP980785280, Tl6, Tl5); + TcR = FMA(KP668178637, T9F, T9Q); + T9R = FNMS(KP668178637, T9Q, T9F); + Tap = FNMS(KP707106781, Tao, Tal); + Tdf = FMA(KP707106781, Tao, Tal); + { + E Tbw, TcW, Tbl, Tbz; + { + E TaP, Tbk, TcS, Taq; + Tdm = FMA(KP707106781, TaO, Taz); + TaP = FNMS(KP707106781, TaO, Taz); + Tbk = Tb4 - Tbj; + Tdk = Tbj + Tb4; + Tdj = FMA(KP707106781, Tbv, Tbs); + Tbw = FNMS(KP707106781, Tbv, Tbs); + TcS = FNMS(KP668178637, Tae, Tap); + Taq = FMA(KP668178637, Tap, Tae); + TcW = FMA(KP923879532, Tbk, TaP); + Tbl = FNMS(KP923879532, Tbk, TaP); + TcT = TcR + TcS; + TlO = TcS - TcR; + TlI = T9R + Taq; + Tar = T9R - Taq; + Tbz = Tbx - Tby; + Tdn = Tbx + Tby; + } + { + E Tdq, Tdr, TcV, TbA; + TcF = FNMS(KP707106781, TcE, TcB); + Tdq = FMA(KP707106781, TcE, TcB); + Tdr = Tcs + Tcd; + Tct = Tcd - Tcs; + TbY = FNMS(KP707106781, TbX, TbI); + Tdt = FMA(KP707106781, TbX, TbI); + TcV = FMA(KP923879532, Tbz, Tbw); + TbA = FNMS(KP923879532, Tbz, Tbw); + TdL = FMA(KP923879532, Tdr, Tdq); + Tds = FNMS(KP923879532, Tdr, Tdq); + TcX = FMA(KP303346683, TcW, TcV); + Td3 = FNMS(KP303346683, TcV, TcW); + TcN = FNMS(KP534511135, Tbl, TbA); + TbB = FMA(KP534511135, TbA, Tbl); + Tdu = TcG + TcH; + TcI = TcG - TcH; + } + } + } + { + E TdI, Tdl, TdK, Tdv, TdH, Tdo; + TdK = FMA(KP923879532, Tdu, Tdt); + Tdv = FNMS(KP923879532, Tdu, Tdt); + TdI = FMA(KP923879532, Tdk, Tdj); + Tdl = FNMS(KP923879532, Tdk, Tdj); + TdM = FNMS(KP098491403, TdL, TdK); + TdQ = FMA(KP098491403, TdK, TdL); + TdA = FMA(KP820678790, Tds, Tdv); + Tdw = FNMS(KP820678790, Tdv, Tds); + TdH = FMA(KP923879532, Tdn, Tdm); + Tdo = FNMS(KP923879532, Tdn, Tdm); + TdD = FNMS(KP198912367, Tdb, Tdc); + Tdd = FMA(KP198912367, Tdc, Tdb); + TdJ = FMA(KP098491403, TdI, TdH); + TdP = FNMS(KP098491403, TdH, TdI); + Tdz = FNMS(KP820678790, Tdl, Tdo); + Tdp = FMA(KP820678790, Tdo, Tdl); + } + { + E TcZ, Tcu, TdE, Tdg; + TdE = FMA(KP198912367, Tde, Tdf); + Tdg = FNMS(KP198912367, Tdf, Tde); + TcZ = FMA(KP923879532, Tct, TbY); + Tcu = FNMS(KP923879532, Tct, TbY); + TlW = TdE - TdD; + TdF = TdD + TdE; + Tm2 = Tdd + Tdg; + Tdh = Tdd - Tdg; + { + E T8L, TlE, TcY, TcJ; + Tla = T8D + T8K; + T8L = T8D - T8K; + TlE = TdU - TdT; + TdV = TdT + TdU; + Td7 = FNMS(KP414213562, T8T, T90); + T91 = FMA(KP414213562, T90, T8T); + TcY = FMA(KP923879532, TcI, TcF); + TcJ = FNMS(KP923879532, TcI, TcF); + Td6 = FNMS(KP707106781, T8L, T8w); + T8M = FMA(KP707106781, T8L, T8w); + TlT = FNMS(KP707106781, TlE, TlD); + TlF = FMA(KP707106781, TlE, TlD); + Td0 = FNMS(KP303346683, TcZ, TcY); + Td4 = FMA(KP303346683, TcY, TcZ); + TcO = FMA(KP534511135, Tcu, TcJ); + TcK = FNMS(KP534511135, TcJ, Tcu); + T9g = FNMS(KP414213562, T9f, T98); + Td8 = FMA(KP414213562, T98, T9f); + } + } + } + { + E Tm1, TlV, TdC, Tda, Td2, TlM, TlL, Td5; + { + E TlS, TcQ, TlH, TcM, TlR, TcP; + { + E TcL, Tas, TlP, TlQ, TlN; + TlS = TbB + TcK; + TcL = TbB - TcK; + { + E TlU, T9h, TlG, Td9, T9i; + TlU = T91 + T9g; + T9h = T91 - T9g; + TlG = Td8 - Td7; + Td9 = Td7 + Td8; + Tm1 = FMA(KP923879532, TlU, TlT); + TlV = FNMS(KP923879532, TlU, TlT); + TcQ = FMA(KP923879532, T9h, T8M); + T9i = FNMS(KP923879532, T9h, T8M); + TlN = FNMS(KP923879532, TlG, TlF); + TlH = FMA(KP923879532, TlG, TlF); + TdC = FMA(KP923879532, Td9, Td6); + Tda = FNMS(KP923879532, Td9, Td6); + Tas = FMA(KP831469612, Tar, T9i); + TcM = FNMS(KP831469612, Tar, T9i); + } + TlR = FNMS(KP831469612, TlO, TlN); + TlP = FMA(KP831469612, TlO, TlN); + TlQ = TcO - TcN; + TcP = TcN + TcO; + ri[WS(rs, 11)] = FMA(KP881921264, TcL, Tas); + ri[WS(rs, 43)] = FNMS(KP881921264, TcL, Tas); + ii[WS(rs, 43)] = FNMS(KP881921264, TlQ, TlP); + ii[WS(rs, 11)] = FMA(KP881921264, TlQ, TlP); + } + { + E TcU, Td1, TlJ, TlK; + Td2 = FNMS(KP831469612, TcT, TcQ); + TcU = FMA(KP831469612, TcT, TcQ); + ri[WS(rs, 59)] = FMA(KP881921264, TcP, TcM); + ri[WS(rs, 27)] = FNMS(KP881921264, TcP, TcM); + ii[WS(rs, 59)] = FMA(KP881921264, TlS, TlR); + ii[WS(rs, 27)] = FNMS(KP881921264, TlS, TlR); + Td1 = TcX + Td0; + TlM = Td0 - TcX; + TlL = FNMS(KP831469612, TlI, TlH); + TlJ = FMA(KP831469612, TlI, TlH); + TlK = Td3 + Td4; + Td5 = Td3 - Td4; + ri[WS(rs, 3)] = FMA(KP956940335, Td1, TcU); + ri[WS(rs, 35)] = FNMS(KP956940335, Td1, TcU); + ii[WS(rs, 35)] = FNMS(KP956940335, TlK, TlJ); + ii[WS(rs, 3)] = FMA(KP956940335, TlK, TlJ); + } + } + { + E Tdy, Tm0, TlZ, TdB; + { + E Tdi, Tdx, TlX, TlY; + Tdy = FNMS(KP980785280, Tdh, Tda); + Tdi = FMA(KP980785280, Tdh, Tda); + ri[WS(rs, 19)] = FMA(KP956940335, Td5, Td2); + ri[WS(rs, 51)] = FNMS(KP956940335, Td5, Td2); + ii[WS(rs, 51)] = FNMS(KP956940335, TlM, TlL); + ii[WS(rs, 19)] = FMA(KP956940335, TlM, TlL); + Tdx = Tdp + Tdw; + Tm0 = Tdw - Tdp; + TlZ = FNMS(KP980785280, TlW, TlV); + TlX = FMA(KP980785280, TlW, TlV); + TlY = Tdz + TdA; + TdB = Tdz - TdA; + ri[WS(rs, 7)] = FMA(KP773010453, Tdx, Tdi); + ri[WS(rs, 39)] = FNMS(KP773010453, Tdx, Tdi); + ii[WS(rs, 39)] = FNMS(KP773010453, TlY, TlX); + ii[WS(rs, 7)] = FMA(KP773010453, TlY, TlX); + } + { + E TdG, TdN, Tm3, Tm4; + TdO = FMA(KP980785280, TdF, TdC); + TdG = FNMS(KP980785280, TdF, TdC); + ri[WS(rs, 23)] = FMA(KP773010453, TdB, Tdy); + ri[WS(rs, 55)] = FNMS(KP773010453, TdB, Tdy); + ii[WS(rs, 55)] = FNMS(KP773010453, Tm0, TlZ); + ii[WS(rs, 23)] = FMA(KP773010453, Tm0, TlZ); + TdN = TdJ - TdM; + Tm6 = TdJ + TdM; + Tm5 = FMA(KP980785280, Tm2, Tm1); + Tm3 = FNMS(KP980785280, Tm2, Tm1); + Tm4 = TdQ - TdP; + TdR = TdP + TdQ; + ri[WS(rs, 15)] = FMA(KP995184726, TdN, TdG); + ri[WS(rs, 47)] = FNMS(KP995184726, TdN, TdG); + ii[WS(rs, 47)] = FNMS(KP995184726, Tm4, Tm3); + ii[WS(rs, 15)] = FMA(KP995184726, Tm4, Tm3); + } + } + } + } + { + E Tf5, Tlk, Tle, Tej, Tf9, Tff, TeZ, TeD, TfY, Tg2, TfM, TfI, TfV, Tg1, TfL; + E TfB, Tls, TfR, Tly, Tft, Tfj, TdZ, Tfi, TdW, Tlp, Tlb, Tfc, Tfg, Tf0, TeW; + E Te2, Tfk; + { + E Tfn, Tfo, Tfq, Tfr, Tfy, Tfw, Tfv, Tfz, TeR, TeN, TeG, TfF, TfX, TfE, TfG; + E TeU, TfP, Tfp; + { + E Te7, Tea, Tee, Teh; + Tfn = FNMS(KP707106781, Te6, Te5); + Te7 = FMA(KP707106781, Te6, Te5); + ri[WS(rs, 63)] = FMA(KP995184726, TdR, TdO); + ri[WS(rs, 31)] = FNMS(KP995184726, TdR, TdO); + ii[WS(rs, 63)] = FMA(KP995184726, Tm6, Tm5); + ii[WS(rs, 31)] = FNMS(KP995184726, Tm6, Tm5); + Tea = FMA(KP707106781, Te9, Te8); + Tfo = FNMS(KP707106781, Te9, Te8); + Tfq = FNMS(KP707106781, Ted, Tec); + Tee = FMA(KP707106781, Ted, Tec); + Teh = FMA(KP707106781, Teg, Tef); + Tfr = FNMS(KP707106781, Teg, Tef); + { + E Tey, Tf8, Tev, TeB; + { + E Ten, Tf3, Teb, Tf4, Tei, Teu; + Tfy = FNMS(KP707106781, Tem, Tel); + Ten = FMA(KP707106781, Tem, Tel); + Tf3 = FMA(KP198912367, Te7, Tea); + Teb = FNMS(KP198912367, Tea, Te7); + Tf4 = FNMS(KP198912367, Tee, Teh); + Tei = FMA(KP198912367, Teh, Tee); + Teu = Teq + Tet; + Tfw = Tet - Teq; + Tfv = FNMS(KP707106781, Tex, Tew); + Tey = FMA(KP707106781, Tex, Tew); + Tf5 = Tf3 + Tf4; + Tlk = Tf4 - Tf3; + Tle = Teb + Tei; + Tej = Teb - Tei; + Tf8 = FMA(KP923879532, Teu, Ten); + Tev = FNMS(KP923879532, Teu, Ten); + TeB = Tez + TeA; + Tfz = Tez - TeA; + } + { + E TfC, TfD, Tf7, TeC; + TeR = FMA(KP707106781, TeQ, TeP); + TfC = FNMS(KP707106781, TeQ, TeP); + TfD = TeM - TeJ; + TeN = TeJ + TeM; + TeG = FMA(KP707106781, TeF, TeE); + TfF = FNMS(KP707106781, TeF, TeE); + Tf7 = FMA(KP923879532, TeB, Tey); + TeC = FNMS(KP923879532, TeB, Tey); + TfX = FMA(KP923879532, TfD, TfC); + TfE = FNMS(KP923879532, TfD, TfC); + Tf9 = FMA(KP098491403, Tf8, Tf7); + Tff = FNMS(KP098491403, Tf7, Tf8); + TeZ = FNMS(KP820678790, Tev, TeC); + TeD = FMA(KP820678790, TeC, Tev); + TfG = TeS - TeT; + TeU = TeS + TeT; + } + } + } + { + E TfU, Tfx, TfW, TfH, TfT, TfA; + TfW = FMA(KP923879532, TfG, TfF); + TfH = FNMS(KP923879532, TfG, TfF); + TfU = FMA(KP923879532, Tfw, Tfv); + Tfx = FNMS(KP923879532, Tfw, Tfv); + TfY = FNMS(KP303346683, TfX, TfW); + Tg2 = FMA(KP303346683, TfW, TfX); + TfM = FMA(KP534511135, TfE, TfH); + TfI = FNMS(KP534511135, TfH, TfE); + TfT = FMA(KP923879532, Tfz, Tfy); + TfA = FNMS(KP923879532, Tfz, Tfy); + TfP = FNMS(KP668178637, Tfn, Tfo); + Tfp = FMA(KP668178637, Tfo, Tfn); + TfV = FMA(KP303346683, TfU, TfT); + Tg1 = FNMS(KP303346683, TfT, TfU); + TfL = FNMS(KP534511135, Tfx, TfA); + TfB = FMA(KP534511135, TfA, Tfx); + } + { + E Tfb, TeO, TfQ, Tfs, Tfa, TeV; + TfQ = FMA(KP668178637, Tfq, Tfr); + Tfs = FNMS(KP668178637, Tfr, Tfq); + Tfb = FMA(KP923879532, TeN, TeG); + TeO = FNMS(KP923879532, TeN, TeG); + Tls = TfQ - TfP; + TfR = TfP + TfQ; + Tly = Tfp + Tfs; + Tft = Tfp - Tfs; + Tfj = FNMS(KP414213562, TdX, TdY); + TdZ = FMA(KP414213562, TdY, TdX); + Tfa = FMA(KP923879532, TeU, TeR); + TeV = FNMS(KP923879532, TeU, TeR); + Tfi = FNMS(KP707106781, TdV, TdS); + TdW = FMA(KP707106781, TdV, TdS); + Tlp = FNMS(KP707106781, Tla, Tl9); + Tlb = FMA(KP707106781, Tla, Tl9); + Tfc = FNMS(KP098491403, Tfb, Tfa); + Tfg = FMA(KP098491403, Tfa, Tfb); + Tf0 = FMA(KP820678790, TeO, TeV); + TeW = FNMS(KP820678790, TeV, TeO); + Te2 = FNMS(KP414213562, Te1, Te0); + Tfk = FMA(KP414213562, Te0, Te1); + } + } + { + E Tlx, Tlr, TfO, Tfm, Tfe, Tli, Tlh, Tfh; + { + E Tlo, Tf2, Tld, TeY, Tln, Tf1; + { + E TeX, Tek, Tll, Tlm, Tlj; + Tlo = TeD + TeW; + TeX = TeD - TeW; + { + E Tlq, Te3, Tlc, Tfl, Te4; + Tlq = Te2 - TdZ; + Te3 = TdZ + Te2; + Tlc = Tfj + Tfk; + Tfl = Tfj - Tfk; + Tlx = FNMS(KP923879532, Tlq, Tlp); + Tlr = FMA(KP923879532, Tlq, Tlp); + Tf2 = FMA(KP923879532, Te3, TdW); + Te4 = FNMS(KP923879532, Te3, TdW); + Tlj = FNMS(KP923879532, Tlc, Tlb); + Tld = FMA(KP923879532, Tlc, Tlb); + TfO = FNMS(KP923879532, Tfl, Tfi); + Tfm = FMA(KP923879532, Tfl, Tfi); + Tek = FMA(KP980785280, Tej, Te4); + TeY = FNMS(KP980785280, Tej, Te4); + } + Tln = FNMS(KP980785280, Tlk, Tlj); + Tll = FMA(KP980785280, Tlk, Tlj); + Tlm = Tf0 - TeZ; + Tf1 = TeZ + Tf0; + ri[WS(rs, 9)] = FMA(KP773010453, TeX, Tek); + ri[WS(rs, 41)] = FNMS(KP773010453, TeX, Tek); + ii[WS(rs, 41)] = FNMS(KP773010453, Tlm, Tll); + ii[WS(rs, 9)] = FMA(KP773010453, Tlm, Tll); + } + { + E Tf6, Tfd, Tlf, Tlg; + Tfe = FNMS(KP980785280, Tf5, Tf2); + Tf6 = FMA(KP980785280, Tf5, Tf2); + ri[WS(rs, 57)] = FMA(KP773010453, Tf1, TeY); + ri[WS(rs, 25)] = FNMS(KP773010453, Tf1, TeY); + ii[WS(rs, 57)] = FMA(KP773010453, Tlo, Tln); + ii[WS(rs, 25)] = FNMS(KP773010453, Tlo, Tln); + Tfd = Tf9 + Tfc; + Tli = Tfc - Tf9; + Tlh = FNMS(KP980785280, Tle, Tld); + Tlf = FMA(KP980785280, Tle, Tld); + Tlg = Tff + Tfg; + Tfh = Tff - Tfg; + ri[WS(rs, 1)] = FMA(KP995184726, Tfd, Tf6); + ri[WS(rs, 33)] = FNMS(KP995184726, Tfd, Tf6); + ii[WS(rs, 33)] = FNMS(KP995184726, Tlg, Tlf); + ii[WS(rs, 1)] = FMA(KP995184726, Tlg, Tlf); + } + } + { + E TfK, Tlw, Tlv, TfN; + { + E Tfu, TfJ, Tlt, Tlu; + TfK = FNMS(KP831469612, Tft, Tfm); + Tfu = FMA(KP831469612, Tft, Tfm); + ri[WS(rs, 17)] = FMA(KP995184726, Tfh, Tfe); + ri[WS(rs, 49)] = FNMS(KP995184726, Tfh, Tfe); + ii[WS(rs, 49)] = FNMS(KP995184726, Tli, Tlh); + ii[WS(rs, 17)] = FMA(KP995184726, Tli, Tlh); + TfJ = TfB + TfI; + Tlw = TfI - TfB; + Tlv = FNMS(KP831469612, Tls, Tlr); + Tlt = FMA(KP831469612, Tls, Tlr); + Tlu = TfL + TfM; + TfN = TfL - TfM; + ri[WS(rs, 5)] = FMA(KP881921264, TfJ, Tfu); + ri[WS(rs, 37)] = FNMS(KP881921264, TfJ, Tfu); + ii[WS(rs, 37)] = FNMS(KP881921264, Tlu, Tlt); + ii[WS(rs, 5)] = FMA(KP881921264, Tlu, Tlt); + } + { + E TfS, TfZ, Tlz, TlA; + Tg0 = FMA(KP831469612, TfR, TfO); + TfS = FNMS(KP831469612, TfR, TfO); + ri[WS(rs, 21)] = FMA(KP881921264, TfN, TfK); + ri[WS(rs, 53)] = FNMS(KP881921264, TfN, TfK); + ii[WS(rs, 53)] = FNMS(KP881921264, Tlw, Tlv); + ii[WS(rs, 21)] = FMA(KP881921264, Tlw, Tlv); + TfZ = TfV - TfY; + TlC = TfV + TfY; + TlB = FMA(KP831469612, Tly, Tlx); + Tlz = FNMS(KP831469612, Tly, Tlx); + TlA = Tg2 - Tg1; + Tg3 = Tg1 + Tg2; + ri[WS(rs, 13)] = FMA(KP956940335, TfZ, TfS); + ri[WS(rs, 45)] = FNMS(KP956940335, TfZ, TfS); + ii[WS(rs, 45)] = FNMS(KP956940335, TlA, Tlz); + ii[WS(rs, 13)] = FMA(KP956940335, TlA, Tlz); + } + } + } + } + } + } + } + } + ri[WS(rs, 61)] = FMA(KP956940335, Tg3, Tg0); + ri[WS(rs, 29)] = FNMS(KP956940335, Tg3, Tg0); + ii[WS(rs, 61)] = FMA(KP956940335, TlC, TlB); + ii[WS(rs, 29)] = FNMS(KP956940335, TlC, TlB); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_CEXP, 0, 9}, + {TW_CEXP, 0, 27}, + {TW_CEXP, 0, 63}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 64, "t2_64", twinstr, &GENUS, {520, 206, 634, 0}, 0, 0, 0 }; + +void X(codelet_t2_64) (planner *p) { + X(kdft_dit_register) (p, t2_64, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 64 -name t2_64 -include t.h */ + +/* + * This function contains 1154 FP additions, 660 FP multiplications, + * (or, 880 additions, 386 multiplications, 274 fused multiply/add), + * 302 stack variables, 15 constants, and 256 memory accesses + */ +#include "t.h" + +static void t2_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP471396736, +0.471396736825997648556387625905254377657460319); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP290284677, +0.290284677254462367636192375817395274691476278); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP634393284, +0.634393284163645498215171613225493370675687095); + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP098017140, +0.098017140329560601994195563888641845861136673); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 10); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 10, MAKE_VOLATILE_STRIDE(128, rs)) { + E T2, T5, T3, T6, Te, T9, TP, T3e, T1e, T39, T3c, TT, T1a, T37, T8; + E Tw, Td, Ty, Tm, Th, T1C, T3K, T1V, T3x, T3I, T1G, T1R, T3v, T2m, T2q; + E T5Y, T6u, T53, T5B, T62, T6w, T57, T5D, T2V, T2X, Tg, TE, T3Y, T3V, T3j; + E Tl, TA, T3g, T1j, T1t, TV, T2C, T2z, T1u, TZ, T1h, To, T1p, T6j, T6H; + E Ts, T1l, T6l, T6F, T2P, T4b, T4x, T5i, T2R, T49, T4z, T5g, TG, T4k, T4m; + E TK, T21, T3O, T3Q, T25, TW, T10, T11, T79, T6X, T5M, T6b, T1v, T30, T69; + E T77, T13, T2F, T2D, T6p, T6O, T1x, T2a, T2f, T6V, T28, T6r, T2h, T6Q, T32; + E T5K, T5w, T4G, T4Q, T3m, T4h, T4I, T5y, T3k, T4f, T41, T4S, T4Y, T3q, T3D; + E T3F, T5r, T3s, T4W, T3Z, T5p; + { + E Ta, Tj, Tx, TC, Tf, Tk, Tz, TD, T1B, T1E, T2o, T2l, T1T, T1Q, T1A; + E T1F, T2p, T2k, T1U, T1P; + { + E T4, T1d, T19, Tb, T1c, T7, Tc, T18, TR, TO, TS, TN; + T2 = W[0]; + T5 = W[1]; + T3 = W[2]; + T6 = W[3]; + Te = W[5]; + T9 = W[4]; + T4 = T2 * T3; + T1d = T5 * T9; + T19 = T5 * Te; + Tb = T2 * T6; + T1c = T2 * Te; + T7 = T5 * T6; + Tc = T5 * T3; + T18 = T2 * T9; + TR = T3 * Te; + TO = T6 * Te; + TS = T6 * T9; + TN = T3 * T9; + TP = TN - TO; + T3e = TR - TS; + T1e = T1c - T1d; + T39 = T1c + T1d; + T3c = TN + TO; + TT = TR + TS; + T1a = T18 + T19; + T37 = T18 - T19; + T8 = T4 - T7; + Ta = T8 * T9; + Tj = T8 * Te; + Tw = T4 + T7; + Tx = Tw * T9; + TC = Tw * Te; + Td = Tb + Tc; + Tf = Td * Te; + Tk = Td * T9; + Ty = Tb - Tc; + Tz = Ty * Te; + TD = Ty * T9; + Tm = W[7]; + T1B = T6 * Tm; + T1E = T3 * Tm; + T2o = T2 * Tm; + T2l = T5 * Tm; + T1T = T9 * Tm; + T1Q = Te * Tm; + Th = W[6]; + T1A = T3 * Th; + T1F = T6 * Th; + T2p = T5 * Th; + T2k = T2 * Th; + T1U = Te * Th; + T1P = T9 * Th; + } + T1C = T1A + T1B; + T3K = T1E + T1F; + T1V = T1T + T1U; + T3x = T2o - T2p; + T3I = T1A - T1B; + T1G = T1E - T1F; + T1R = T1P - T1Q; + { + E T5W, T5X, T55, T56; + T3v = T2k + T2l; + T2m = T2k - T2l; + T2q = T2o + T2p; + T5W = T8 * Th; + T5X = Td * Tm; + T5Y = T5W - T5X; + T6u = T5W + T5X; + { + E T51, T52, T60, T61; + T51 = Tw * Th; + T52 = Ty * Tm; + T53 = T51 + T52; + T5B = T51 - T52; + T60 = T8 * Tm; + T61 = Td * Th; + T62 = T60 + T61; + T6w = T60 - T61; + } + T55 = Tw * Tm; + T56 = Ty * Th; + T57 = T55 - T56; + T5D = T55 + T56; + { + E Ti, Tq, TF, TJ, T3W, T3X, T3T, T3U, T3h, T3i, Tn, Tr, TB, TI, T3d; + E T3f, T1k, T1o, T1Z, T23, TQ, TU, T2A, T2B, T2x, T2y, T20, T24, TX, TY; + E T1i, T1n; + T2V = T1P + T1Q; + T2X = T1T - T1U; + Tg = Ta + Tf; + Ti = Tg * Th; + Tq = Tg * Tm; + TE = TC + TD; + TF = TE * Tm; + TJ = TE * Th; + T3W = T37 * Tm; + T3X = T39 * Th; + T3Y = T3W - T3X; + T3T = T37 * Th; + T3U = T39 * Tm; + T3V = T3T + T3U; + T3h = T3c * Tm; + T3i = T3e * Th; + T3j = T3h - T3i; + Tl = Tj - Tk; + Tn = Tl * Tm; + Tr = Tl * Th; + TA = Tx - Tz; + TB = TA * Th; + TI = TA * Tm; + T3d = T3c * Th; + T3f = T3e * Tm; + T3g = T3d + T3f; + T1j = Tj + Tk; + T1k = T1j * Tm; + T1o = T1j * Th; + T1t = Tx + Tz; + T1Z = T1t * Th; + T23 = T1t * Tm; + TQ = TP * Th; + TU = TT * Tm; + TV = TQ + TU; + T2A = T1a * Tm; + T2B = T1e * Th; + T2C = T2A - T2B; + T2x = T1a * Th; + T2y = T1e * Tm; + T2z = T2x + T2y; + T1u = TC - TD; + T20 = T1u * Tm; + T24 = T1u * Th; + TX = TP * Tm; + TY = TT * Th; + TZ = TX - TY; + T1h = Ta - Tf; + T1i = T1h * Th; + T1n = T1h * Tm; + To = Ti - Tn; + T1p = T1n + T1o; + T6j = TQ - TU; + T6H = T2A + T2B; + Ts = Tq + Tr; + T1l = T1i - T1k; + T6l = TX + TY; + T6F = T2x - T2y; + T2P = T1Z - T20; + T4b = TI + TJ; + T4x = T3d - T3f; + T5i = T3W + T3X; + T2R = T23 + T24; + T49 = TB - TF; + T4z = T3h + T3i; + T5g = T3T - T3U; + TG = TB + TF; + T4k = Ti + Tn; + T4m = Tq - Tr; + TK = TI - TJ; + T21 = T1Z + T20; + T3O = T1i + T1k; + T3Q = T1n - T1o; + T25 = T23 - T24; + TW = W[8]; + T10 = W[9]; + T11 = FMA(TV, TW, TZ * T10); + T79 = FNMS(T25, TW, T21 * T10); + T6X = FNMS(Td, TW, T8 * T10); + T5M = FNMS(T2X, TW, T2V * T10); + T6b = FNMS(TK, TW, TG * T10); + T1v = FMA(T1t, TW, T1u * T10); + T30 = FMA(T1h, TW, T1j * T10); + T69 = FMA(TG, TW, TK * T10); + T77 = FMA(T21, TW, T25 * T10); + T13 = FNMS(TZ, TW, TV * T10); + T2F = FNMS(T2C, TW, T2z * T10); + T2D = FMA(T2z, TW, T2C * T10); + T6p = FMA(T1a, TW, T1e * T10); + T6O = FMA(TP, TW, TT * T10); + T1x = FNMS(T1u, TW, T1t * T10); + T2a = FNMS(TE, TW, TA * T10); + T2f = FMA(T3, TW, T6 * T10); + T6V = FMA(T8, TW, Td * T10); + T28 = FMA(TA, TW, TE * T10); + T6r = FNMS(T1e, TW, T1a * T10); + T2h = FNMS(T6, TW, T3 * T10); + T6Q = FNMS(TT, TW, TP * T10); + T32 = FNMS(T1j, TW, T1h * T10); + T5K = FMA(T2V, TW, T2X * T10); + T5w = FMA(Tw, TW, Ty * T10); + T4G = FMA(T3O, TW, T3Q * T10); + T4Q = FMA(T4k, TW, T4m * T10); + T3m = FNMS(T3j, TW, T3g * T10); + T4h = FNMS(Te, TW, T9 * T10); + T4I = FNMS(T3Q, TW, T3O * T10); + T5y = FNMS(Ty, TW, Tw * T10); + T3k = FMA(T3g, TW, T3j * T10); + T4f = FMA(T9, TW, Te * T10); + T41 = FNMS(T3Y, TW, T3V * T10); + T4S = FNMS(T4m, TW, T4k * T10); + T4Y = FNMS(T3e, TW, T3c * T10); + T3q = FMA(Tg, TW, Tl * T10); + T3D = FMA(T2, TW, T5 * T10); + T3F = FNMS(T5, TW, T2 * T10); + T5r = FNMS(T39, TW, T37 * T10); + T3s = FNMS(Tl, TW, Tg * T10); + T4W = FMA(T3c, TW, T3e * T10); + T3Z = FMA(T3V, TW, T3Y * T10); + T5p = FMA(T37, TW, T39 * T10); + } + } + } + { + E T17, TdV, Tj3, Tjx, T7l, TbJ, Ti3, Tix, T1K, Tiw, TdY, ThY, T7w, Tj0, TbM; + E Tjw, T2e, TgA, T7I, TaY, TbQ, Tda, Te4, TfO, T2J, TgB, T7T, TaZ, TbT, Tdb; + E Te9, TfP, T36, T3B, TgH, TgE, TgF, TgG, T80, TbW, Tel, TfT, T8b, Tc0, T8k; + E TbX, Teg, TfS, T8h, TbZ, T45, T4q, TgJ, TgK, TgL, TgM, T8r, Tc6, Tew, TfW; + E T8C, Tc4, T8L, Tc7, Ter, TfV, T8I, Tc3, T6B, Th1, Tfm, Tga, Th8, ThI, T9N; + E Tcv, T9Y, TcH, Tav, Tcw, Tf5, Tg7, Tas, TcG, T5c, TgV, TeV, Tg0, TgS, ThD; + E T8U, Tcc, T95, Tco, T9C, Tcd, TeE, Tg3, T9z, Tcn, T5R, TgT, TeO, TeW, TgY; + E ThE, T9h, T9F, T9s, T9E, Tck, Tcq, TeJ, TeX, Tch, Tcr, T7e, Th9, Tff, Tfn; + E Th4, ThJ, Taa, Tay, Tal, Tax, TcD, TcJ, Tfa, Tfo, TcA, TcK; + { + E T1, Ti1, Tu, Ti0, TM, T7i, T15, T7j, Tp, Tt; + T1 = ri[0]; + Ti1 = ii[0]; + Tp = ri[WS(rs, 32)]; + Tt = ii[WS(rs, 32)]; + Tu = FMA(To, Tp, Ts * Tt); + Ti0 = FNMS(Ts, Tp, To * Tt); + { + E TH, TL, T12, T14; + TH = ri[WS(rs, 16)]; + TL = ii[WS(rs, 16)]; + TM = FMA(TG, TH, TK * TL); + T7i = FNMS(TK, TH, TG * TL); + T12 = ri[WS(rs, 48)]; + T14 = ii[WS(rs, 48)]; + T15 = FMA(T11, T12, T13 * T14); + T7j = FNMS(T13, T12, T11 * T14); + } + { + E Tv, T16, Tj1, Tj2; + Tv = T1 + Tu; + T16 = TM + T15; + T17 = Tv + T16; + TdV = Tv - T16; + Tj1 = Ti1 - Ti0; + Tj2 = TM - T15; + Tj3 = Tj1 - Tj2; + Tjx = Tj2 + Tj1; + } + { + E T7h, T7k, ThZ, Ti2; + T7h = T1 - Tu; + T7k = T7i - T7j; + T7l = T7h - T7k; + TbJ = T7h + T7k; + ThZ = T7i + T7j; + Ti2 = Ti0 + Ti1; + Ti3 = ThZ + Ti2; + Tix = Ti2 - ThZ; + } + } + { + E T1g, T7m, T1r, T7n, T7o, T7p, T1z, T7s, T1I, T7t, T7r, T7u; + { + E T1b, T1f, T1m, T1q; + T1b = ri[WS(rs, 8)]; + T1f = ii[WS(rs, 8)]; + T1g = FMA(T1a, T1b, T1e * T1f); + T7m = FNMS(T1e, T1b, T1a * T1f); + T1m = ri[WS(rs, 40)]; + T1q = ii[WS(rs, 40)]; + T1r = FMA(T1l, T1m, T1p * T1q); + T7n = FNMS(T1p, T1m, T1l * T1q); + } + T7o = T7m - T7n; + T7p = T1g - T1r; + { + E T1w, T1y, T1D, T1H; + T1w = ri[WS(rs, 56)]; + T1y = ii[WS(rs, 56)]; + T1z = FMA(T1v, T1w, T1x * T1y); + T7s = FNMS(T1x, T1w, T1v * T1y); + T1D = ri[WS(rs, 24)]; + T1H = ii[WS(rs, 24)]; + T1I = FMA(T1C, T1D, T1G * T1H); + T7t = FNMS(T1G, T1D, T1C * T1H); + } + T7r = T1z - T1I; + T7u = T7s - T7t; + { + E T1s, T1J, TdW, TdX; + T1s = T1g + T1r; + T1J = T1z + T1I; + T1K = T1s + T1J; + Tiw = T1J - T1s; + TdW = T7m + T7n; + TdX = T7s + T7t; + TdY = TdW - TdX; + ThY = TdW + TdX; + } + { + E T7q, T7v, TbK, TbL; + T7q = T7o - T7p; + T7v = T7r + T7u; + T7w = KP707106781 * (T7q - T7v); + Tj0 = KP707106781 * (T7q + T7v); + TbK = T7p + T7o; + TbL = T7r - T7u; + TbM = KP707106781 * (TbK + TbL); + Tjw = KP707106781 * (TbL - TbK); + } + } + { + E T1Y, Te0, T7A, T7D, T2d, Te1, T7B, T7G, T7C, T7H; + { + E T1O, T7y, T1X, T7z; + { + E T1M, T1N, T1S, T1W; + T1M = ri[WS(rs, 4)]; + T1N = ii[WS(rs, 4)]; + T1O = FMA(T8, T1M, Td * T1N); + T7y = FNMS(Td, T1M, T8 * T1N); + T1S = ri[WS(rs, 36)]; + T1W = ii[WS(rs, 36)]; + T1X = FMA(T1R, T1S, T1V * T1W); + T7z = FNMS(T1V, T1S, T1R * T1W); + } + T1Y = T1O + T1X; + Te0 = T7y + T7z; + T7A = T7y - T7z; + T7D = T1O - T1X; + } + { + E T27, T7E, T2c, T7F; + { + E T22, T26, T29, T2b; + T22 = ri[WS(rs, 20)]; + T26 = ii[WS(rs, 20)]; + T27 = FMA(T21, T22, T25 * T26); + T7E = FNMS(T25, T22, T21 * T26); + T29 = ri[WS(rs, 52)]; + T2b = ii[WS(rs, 52)]; + T2c = FMA(T28, T29, T2a * T2b); + T7F = FNMS(T2a, T29, T28 * T2b); + } + T2d = T27 + T2c; + Te1 = T7E + T7F; + T7B = T27 - T2c; + T7G = T7E - T7F; + } + T2e = T1Y + T2d; + TgA = Te0 + Te1; + T7C = T7A + T7B; + T7H = T7D - T7G; + T7I = FNMS(KP923879532, T7H, KP382683432 * T7C); + TaY = FMA(KP923879532, T7C, KP382683432 * T7H); + { + E TbO, TbP, Te2, Te3; + TbO = T7A - T7B; + TbP = T7D + T7G; + TbQ = FNMS(KP382683432, TbP, KP923879532 * TbO); + Tda = FMA(KP382683432, TbO, KP923879532 * TbP); + Te2 = Te0 - Te1; + Te3 = T1Y - T2d; + Te4 = Te2 - Te3; + TfO = Te3 + Te2; + } + } + { + E T2t, Te6, T7L, T7O, T2I, Te7, T7M, T7R, T7N, T7S; + { + E T2j, T7J, T2s, T7K; + { + E T2g, T2i, T2n, T2r; + T2g = ri[WS(rs, 60)]; + T2i = ii[WS(rs, 60)]; + T2j = FMA(T2f, T2g, T2h * T2i); + T7J = FNMS(T2h, T2g, T2f * T2i); + T2n = ri[WS(rs, 28)]; + T2r = ii[WS(rs, 28)]; + T2s = FMA(T2m, T2n, T2q * T2r); + T7K = FNMS(T2q, T2n, T2m * T2r); + } + T2t = T2j + T2s; + Te6 = T7J + T7K; + T7L = T7J - T7K; + T7O = T2j - T2s; + } + { + E T2w, T7P, T2H, T7Q; + { + E T2u, T2v, T2E, T2G; + T2u = ri[WS(rs, 12)]; + T2v = ii[WS(rs, 12)]; + T2w = FMA(TP, T2u, TT * T2v); + T7P = FNMS(TT, T2u, TP * T2v); + T2E = ri[WS(rs, 44)]; + T2G = ii[WS(rs, 44)]; + T2H = FMA(T2D, T2E, T2F * T2G); + T7Q = FNMS(T2F, T2E, T2D * T2G); + } + T2I = T2w + T2H; + Te7 = T7P + T7Q; + T7M = T2w - T2H; + T7R = T7P - T7Q; + } + T2J = T2t + T2I; + TgB = Te6 + Te7; + T7N = T7L + T7M; + T7S = T7O - T7R; + T7T = FMA(KP382683432, T7N, KP923879532 * T7S); + TaZ = FNMS(KP923879532, T7N, KP382683432 * T7S); + { + E TbR, TbS, Te5, Te8; + TbR = T7L - T7M; + TbS = T7O + T7R; + TbT = FMA(KP923879532, TbR, KP382683432 * TbS); + Tdb = FNMS(KP382683432, TbR, KP923879532 * TbS); + Te5 = T2t - T2I; + Te8 = Te6 - Te7; + Te9 = Te5 + Te8; + TfP = Te5 - Te8; + } + } + { + E T2O, T7W, T2T, T7X, T2U, Tec, T2Z, T8e, T34, T8f, T35, Ted, T3p, Tei, T86; + E T89, T3A, Tej, T81, T84; + { + E T2M, T2N, T2Q, T2S; + T2M = ri[WS(rs, 2)]; + T2N = ii[WS(rs, 2)]; + T2O = FMA(Tw, T2M, Ty * T2N); + T7W = FNMS(Ty, T2M, Tw * T2N); + T2Q = ri[WS(rs, 34)]; + T2S = ii[WS(rs, 34)]; + T2T = FMA(T2P, T2Q, T2R * T2S); + T7X = FNMS(T2R, T2Q, T2P * T2S); + } + T2U = T2O + T2T; + Tec = T7W + T7X; + { + E T2W, T2Y, T31, T33; + T2W = ri[WS(rs, 18)]; + T2Y = ii[WS(rs, 18)]; + T2Z = FMA(T2V, T2W, T2X * T2Y); + T8e = FNMS(T2X, T2W, T2V * T2Y); + T31 = ri[WS(rs, 50)]; + T33 = ii[WS(rs, 50)]; + T34 = FMA(T30, T31, T32 * T33); + T8f = FNMS(T32, T31, T30 * T33); + } + T35 = T2Z + T34; + Ted = T8e + T8f; + { + E T3b, T87, T3o, T88; + { + E T38, T3a, T3l, T3n; + T38 = ri[WS(rs, 10)]; + T3a = ii[WS(rs, 10)]; + T3b = FMA(T37, T38, T39 * T3a); + T87 = FNMS(T39, T38, T37 * T3a); + T3l = ri[WS(rs, 42)]; + T3n = ii[WS(rs, 42)]; + T3o = FMA(T3k, T3l, T3m * T3n); + T88 = FNMS(T3m, T3l, T3k * T3n); + } + T3p = T3b + T3o; + Tei = T87 + T88; + T86 = T3b - T3o; + T89 = T87 - T88; + } + { + E T3u, T82, T3z, T83; + { + E T3r, T3t, T3w, T3y; + T3r = ri[WS(rs, 58)]; + T3t = ii[WS(rs, 58)]; + T3u = FMA(T3q, T3r, T3s * T3t); + T82 = FNMS(T3s, T3r, T3q * T3t); + T3w = ri[WS(rs, 26)]; + T3y = ii[WS(rs, 26)]; + T3z = FMA(T3v, T3w, T3x * T3y); + T83 = FNMS(T3x, T3w, T3v * T3y); + } + T3A = T3u + T3z; + Tej = T82 + T83; + T81 = T3u - T3z; + T84 = T82 - T83; + } + T36 = T2U + T35; + T3B = T3p + T3A; + TgH = T36 - T3B; + TgE = Tec + Ted; + TgF = Tei + Tej; + TgG = TgE - TgF; + { + E T7Y, T7Z, Teh, Tek; + T7Y = T7W - T7X; + T7Z = T2Z - T34; + T80 = T7Y + T7Z; + TbW = T7Y - T7Z; + Teh = T2U - T35; + Tek = Tei - Tej; + Tel = Teh - Tek; + TfT = Teh + Tek; + } + { + E T85, T8a, T8i, T8j; + T85 = T81 - T84; + T8a = T86 + T89; + T8b = KP707106781 * (T85 - T8a); + Tc0 = KP707106781 * (T8a + T85); + T8i = T89 - T86; + T8j = T81 + T84; + T8k = KP707106781 * (T8i - T8j); + TbX = KP707106781 * (T8i + T8j); + } + { + E Tee, Tef, T8d, T8g; + Tee = Tec - Ted; + Tef = T3A - T3p; + Teg = Tee - Tef; + TfS = Tee + Tef; + T8d = T2O - T2T; + T8g = T8e - T8f; + T8h = T8d - T8g; + TbZ = T8d + T8g; + } + } + { + E T3H, T8n, T3M, T8o, T3N, Ten, T3S, T8F, T43, T8G, T44, Teo, T4e, Tet, T8x; + E T8A, T4p, Teu, T8s, T8v; + { + E T3E, T3G, T3J, T3L; + T3E = ri[WS(rs, 62)]; + T3G = ii[WS(rs, 62)]; + T3H = FMA(T3D, T3E, T3F * T3G); + T8n = FNMS(T3F, T3E, T3D * T3G); + T3J = ri[WS(rs, 30)]; + T3L = ii[WS(rs, 30)]; + T3M = FMA(T3I, T3J, T3K * T3L); + T8o = FNMS(T3K, T3J, T3I * T3L); + } + T3N = T3H + T3M; + Ten = T8n + T8o; + { + E T3P, T3R, T40, T42; + T3P = ri[WS(rs, 14)]; + T3R = ii[WS(rs, 14)]; + T3S = FMA(T3O, T3P, T3Q * T3R); + T8F = FNMS(T3Q, T3P, T3O * T3R); + T40 = ri[WS(rs, 46)]; + T42 = ii[WS(rs, 46)]; + T43 = FMA(T3Z, T40, T41 * T42); + T8G = FNMS(T41, T40, T3Z * T42); + } + T44 = T3S + T43; + Teo = T8F + T8G; + { + E T48, T8y, T4d, T8z; + { + E T46, T47, T4a, T4c; + T46 = ri[WS(rs, 6)]; + T47 = ii[WS(rs, 6)]; + T48 = FMA(T3c, T46, T3e * T47); + T8y = FNMS(T3e, T46, T3c * T47); + T4a = ri[WS(rs, 38)]; + T4c = ii[WS(rs, 38)]; + T4d = FMA(T49, T4a, T4b * T4c); + T8z = FNMS(T4b, T4a, T49 * T4c); + } + T4e = T48 + T4d; + Tet = T8y + T8z; + T8x = T48 - T4d; + T8A = T8y - T8z; + } + { + E T4j, T8t, T4o, T8u; + { + E T4g, T4i, T4l, T4n; + T4g = ri[WS(rs, 54)]; + T4i = ii[WS(rs, 54)]; + T4j = FMA(T4f, T4g, T4h * T4i); + T8t = FNMS(T4h, T4g, T4f * T4i); + T4l = ri[WS(rs, 22)]; + T4n = ii[WS(rs, 22)]; + T4o = FMA(T4k, T4l, T4m * T4n); + T8u = FNMS(T4m, T4l, T4k * T4n); + } + T4p = T4j + T4o; + Teu = T8t + T8u; + T8s = T4j - T4o; + T8v = T8t - T8u; + } + T45 = T3N + T44; + T4q = T4e + T4p; + TgJ = T45 - T4q; + TgK = Ten + Teo; + TgL = Tet + Teu; + TgM = TgK - TgL; + { + E T8p, T8q, Tes, Tev; + T8p = T8n - T8o; + T8q = T3S - T43; + T8r = T8p + T8q; + Tc6 = T8p - T8q; + Tes = T3N - T44; + Tev = Tet - Teu; + Tew = Tes - Tev; + TfW = Tes + Tev; + } + { + E T8w, T8B, T8J, T8K; + T8w = T8s - T8v; + T8B = T8x + T8A; + T8C = KP707106781 * (T8w - T8B); + Tc4 = KP707106781 * (T8B + T8w); + T8J = T8A - T8x; + T8K = T8s + T8v; + T8L = KP707106781 * (T8J - T8K); + Tc7 = KP707106781 * (T8J + T8K); + } + { + E Tep, Teq, T8E, T8H; + Tep = Ten - Teo; + Teq = T4p - T4e; + Ter = Tep - Teq; + TfV = Tep + Teq; + T8E = T3H - T3M; + T8H = T8F - T8G; + T8I = T8E - T8H; + Tc3 = T8E + T8H; + } + } + { + E T5V, Tao, T64, Tap, T65, Tfi, T68, T9K, T6d, T9L, T6e, Tfj, T6o, Tf2, T9Q; + E T9R, T6z, Tf3, T9T, T9W; + { + E T5T, T5U, T5Z, T63; + T5T = ri[WS(rs, 63)]; + T5U = ii[WS(rs, 63)]; + T5V = FMA(TW, T5T, T10 * T5U); + Tao = FNMS(T10, T5T, TW * T5U); + T5Z = ri[WS(rs, 31)]; + T63 = ii[WS(rs, 31)]; + T64 = FMA(T5Y, T5Z, T62 * T63); + Tap = FNMS(T62, T5Z, T5Y * T63); + } + T65 = T5V + T64; + Tfi = Tao + Tap; + { + E T66, T67, T6a, T6c; + T66 = ri[WS(rs, 15)]; + T67 = ii[WS(rs, 15)]; + T68 = FMA(TV, T66, TZ * T67); + T9K = FNMS(TZ, T66, TV * T67); + T6a = ri[WS(rs, 47)]; + T6c = ii[WS(rs, 47)]; + T6d = FMA(T69, T6a, T6b * T6c); + T9L = FNMS(T6b, T6a, T69 * T6c); + } + T6e = T68 + T6d; + Tfj = T9K + T9L; + { + E T6i, T9O, T6n, T9P; + { + E T6g, T6h, T6k, T6m; + T6g = ri[WS(rs, 7)]; + T6h = ii[WS(rs, 7)]; + T6i = FMA(T1t, T6g, T1u * T6h); + T9O = FNMS(T1u, T6g, T1t * T6h); + T6k = ri[WS(rs, 39)]; + T6m = ii[WS(rs, 39)]; + T6n = FMA(T6j, T6k, T6l * T6m); + T9P = FNMS(T6l, T6k, T6j * T6m); + } + T6o = T6i + T6n; + Tf2 = T9O + T9P; + T9Q = T9O - T9P; + T9R = T6i - T6n; + } + { + E T6t, T9U, T6y, T9V; + { + E T6q, T6s, T6v, T6x; + T6q = ri[WS(rs, 55)]; + T6s = ii[WS(rs, 55)]; + T6t = FMA(T6p, T6q, T6r * T6s); + T9U = FNMS(T6r, T6q, T6p * T6s); + T6v = ri[WS(rs, 23)]; + T6x = ii[WS(rs, 23)]; + T6y = FMA(T6u, T6v, T6w * T6x); + T9V = FNMS(T6w, T6v, T6u * T6x); + } + T6z = T6t + T6y; + Tf3 = T9U + T9V; + T9T = T6t - T6y; + T9W = T9U - T9V; + } + { + E T6f, T6A, Tfk, Tfl; + T6f = T65 + T6e; + T6A = T6o + T6z; + T6B = T6f + T6A; + Th1 = T6f - T6A; + Tfk = Tfi - Tfj; + Tfl = T6z - T6o; + Tfm = Tfk - Tfl; + Tga = Tfk + Tfl; + } + { + E Th6, Th7, T9J, T9M; + Th6 = Tfi + Tfj; + Th7 = Tf2 + Tf3; + Th8 = Th6 - Th7; + ThI = Th6 + Th7; + T9J = T5V - T64; + T9M = T9K - T9L; + T9N = T9J - T9M; + Tcv = T9J + T9M; + } + { + E T9S, T9X, Tat, Tau; + T9S = T9Q - T9R; + T9X = T9T + T9W; + T9Y = KP707106781 * (T9S - T9X); + TcH = KP707106781 * (T9S + T9X); + Tat = T9T - T9W; + Tau = T9R + T9Q; + Tav = KP707106781 * (Tat - Tau); + Tcw = KP707106781 * (Tau + Tat); + } + { + E Tf1, Tf4, Taq, Tar; + Tf1 = T65 - T6e; + Tf4 = Tf2 - Tf3; + Tf5 = Tf1 - Tf4; + Tg7 = Tf1 + Tf4; + Taq = Tao - Tap; + Tar = T68 - T6d; + Tas = Taq + Tar; + TcG = Taq - Tar; + } + } + { + E T4w, T8Q, T4B, T8R, T4C, TeA, T4F, T9w, T4K, T9x, T4L, TeB, T4V, TeS, T90; + E T93, T5a, TeT, T8V, T8Y; + { + E T4u, T4v, T4y, T4A; + T4u = ri[WS(rs, 1)]; + T4v = ii[WS(rs, 1)]; + T4w = FMA(T2, T4u, T5 * T4v); + T8Q = FNMS(T5, T4u, T2 * T4v); + T4y = ri[WS(rs, 33)]; + T4A = ii[WS(rs, 33)]; + T4B = FMA(T4x, T4y, T4z * T4A); + T8R = FNMS(T4z, T4y, T4x * T4A); + } + T4C = T4w + T4B; + TeA = T8Q + T8R; + { + E T4D, T4E, T4H, T4J; + T4D = ri[WS(rs, 17)]; + T4E = ii[WS(rs, 17)]; + T4F = FMA(T3V, T4D, T3Y * T4E); + T9w = FNMS(T3Y, T4D, T3V * T4E); + T4H = ri[WS(rs, 49)]; + T4J = ii[WS(rs, 49)]; + T4K = FMA(T4G, T4H, T4I * T4J); + T9x = FNMS(T4I, T4H, T4G * T4J); + } + T4L = T4F + T4K; + TeB = T9w + T9x; + { + E T4P, T91, T4U, T92; + { + E T4N, T4O, T4R, T4T; + T4N = ri[WS(rs, 9)]; + T4O = ii[WS(rs, 9)]; + T4P = FMA(T9, T4N, Te * T4O); + T91 = FNMS(Te, T4N, T9 * T4O); + T4R = ri[WS(rs, 41)]; + T4T = ii[WS(rs, 41)]; + T4U = FMA(T4Q, T4R, T4S * T4T); + T92 = FNMS(T4S, T4R, T4Q * T4T); + } + T4V = T4P + T4U; + TeS = T91 + T92; + T90 = T4P - T4U; + T93 = T91 - T92; + } + { + E T50, T8W, T59, T8X; + { + E T4X, T4Z, T54, T58; + T4X = ri[WS(rs, 57)]; + T4Z = ii[WS(rs, 57)]; + T50 = FMA(T4W, T4X, T4Y * T4Z); + T8W = FNMS(T4Y, T4X, T4W * T4Z); + T54 = ri[WS(rs, 25)]; + T58 = ii[WS(rs, 25)]; + T59 = FMA(T53, T54, T57 * T58); + T8X = FNMS(T57, T54, T53 * T58); + } + T5a = T50 + T59; + TeT = T8W + T8X; + T8V = T50 - T59; + T8Y = T8W - T8X; + } + { + E T4M, T5b, TeR, TeU; + T4M = T4C + T4L; + T5b = T4V + T5a; + T5c = T4M + T5b; + TgV = T4M - T5b; + TeR = T4C - T4L; + TeU = TeS - TeT; + TeV = TeR - TeU; + Tg0 = TeR + TeU; + } + { + E TgQ, TgR, T8S, T8T; + TgQ = TeA + TeB; + TgR = TeS + TeT; + TgS = TgQ - TgR; + ThD = TgQ + TgR; + T8S = T8Q - T8R; + T8T = T4F - T4K; + T8U = T8S + T8T; + Tcc = T8S - T8T; + } + { + E T8Z, T94, T9A, T9B; + T8Z = T8V - T8Y; + T94 = T90 + T93; + T95 = KP707106781 * (T8Z - T94); + Tco = KP707106781 * (T94 + T8Z); + T9A = T93 - T90; + T9B = T8V + T8Y; + T9C = KP707106781 * (T9A - T9B); + Tcd = KP707106781 * (T9A + T9B); + } + { + E TeC, TeD, T9v, T9y; + TeC = TeA - TeB; + TeD = T5a - T4V; + TeE = TeC - TeD; + Tg3 = TeC + TeD; + T9v = T4w - T4B; + T9y = T9w - T9x; + T9z = T9v - T9y; + Tcn = T9v + T9y; + } + } + { + E T5l, TeL, T9k, T9n, T5P, TeH, T9a, T9f, T5u, TeM, T9l, T9q, T5G, TeG, T97; + E T9e; + { + E T5f, T9i, T5k, T9j; + { + E T5d, T5e, T5h, T5j; + T5d = ri[WS(rs, 5)]; + T5e = ii[WS(rs, 5)]; + T5f = FMA(Tg, T5d, Tl * T5e); + T9i = FNMS(Tl, T5d, Tg * T5e); + T5h = ri[WS(rs, 37)]; + T5j = ii[WS(rs, 37)]; + T5k = FMA(T5g, T5h, T5i * T5j); + T9j = FNMS(T5i, T5h, T5g * T5j); + } + T5l = T5f + T5k; + TeL = T9i + T9j; + T9k = T9i - T9j; + T9n = T5f - T5k; + } + { + E T5J, T98, T5O, T99; + { + E T5H, T5I, T5L, T5N; + T5H = ri[WS(rs, 13)]; + T5I = ii[WS(rs, 13)]; + T5J = FMA(T1h, T5H, T1j * T5I); + T98 = FNMS(T1j, T5H, T1h * T5I); + T5L = ri[WS(rs, 45)]; + T5N = ii[WS(rs, 45)]; + T5O = FMA(T5K, T5L, T5M * T5N); + T99 = FNMS(T5M, T5L, T5K * T5N); + } + T5P = T5J + T5O; + TeH = T98 + T99; + T9a = T98 - T99; + T9f = T5J - T5O; + } + { + E T5o, T9o, T5t, T9p; + { + E T5m, T5n, T5q, T5s; + T5m = ri[WS(rs, 21)]; + T5n = ii[WS(rs, 21)]; + T5o = FMA(T3g, T5m, T3j * T5n); + T9o = FNMS(T3j, T5m, T3g * T5n); + T5q = ri[WS(rs, 53)]; + T5s = ii[WS(rs, 53)]; + T5t = FMA(T5p, T5q, T5r * T5s); + T9p = FNMS(T5r, T5q, T5p * T5s); + } + T5u = T5o + T5t; + TeM = T9o + T9p; + T9l = T5o - T5t; + T9q = T9o - T9p; + } + { + E T5A, T9c, T5F, T9d; + { + E T5x, T5z, T5C, T5E; + T5x = ri[WS(rs, 61)]; + T5z = ii[WS(rs, 61)]; + T5A = FMA(T5w, T5x, T5y * T5z); + T9c = FNMS(T5y, T5x, T5w * T5z); + T5C = ri[WS(rs, 29)]; + T5E = ii[WS(rs, 29)]; + T5F = FMA(T5B, T5C, T5D * T5E); + T9d = FNMS(T5D, T5C, T5B * T5E); + } + T5G = T5A + T5F; + TeG = T9c + T9d; + T97 = T5A - T5F; + T9e = T9c - T9d; + } + { + E T5v, T5Q, TeK, TeN; + T5v = T5l + T5u; + T5Q = T5G + T5P; + T5R = T5v + T5Q; + TgT = T5Q - T5v; + TeK = T5l - T5u; + TeN = TeL - TeM; + TeO = TeK + TeN; + TeW = TeN - TeK; + } + { + E TgW, TgX, T9b, T9g; + TgW = TeL + TeM; + TgX = TeG + TeH; + TgY = TgW - TgX; + ThE = TgW + TgX; + T9b = T97 - T9a; + T9g = T9e + T9f; + T9h = FNMS(KP923879532, T9g, KP382683432 * T9b); + T9F = FMA(KP382683432, T9g, KP923879532 * T9b); + } + { + E T9m, T9r, Tci, Tcj; + T9m = T9k + T9l; + T9r = T9n - T9q; + T9s = FMA(KP923879532, T9m, KP382683432 * T9r); + T9E = FNMS(KP923879532, T9r, KP382683432 * T9m); + Tci = T9k - T9l; + Tcj = T9n + T9q; + Tck = FMA(KP382683432, Tci, KP923879532 * Tcj); + Tcq = FNMS(KP382683432, Tcj, KP923879532 * Tci); + } + { + E TeF, TeI, Tcf, Tcg; + TeF = T5G - T5P; + TeI = TeG - TeH; + TeJ = TeF - TeI; + TeX = TeF + TeI; + Tcf = T97 + T9a; + Tcg = T9e - T9f; + Tch = FNMS(KP382683432, Tcg, KP923879532 * Tcf); + Tcr = FMA(KP923879532, Tcg, KP382683432 * Tcf); + } + } + { + E T6K, Tf6, Ta2, Ta5, T7c, Tfd, Tae, Taj, T6T, Tf7, Ta3, Ta8, T73, Tfc, Tad; + E Tag; + { + E T6E, Ta0, T6J, Ta1; + { + E T6C, T6D, T6G, T6I; + T6C = ri[WS(rs, 3)]; + T6D = ii[WS(rs, 3)]; + T6E = FMA(T3, T6C, T6 * T6D); + Ta0 = FNMS(T6, T6C, T3 * T6D); + T6G = ri[WS(rs, 35)]; + T6I = ii[WS(rs, 35)]; + T6J = FMA(T6F, T6G, T6H * T6I); + Ta1 = FNMS(T6H, T6G, T6F * T6I); + } + T6K = T6E + T6J; + Tf6 = Ta0 + Ta1; + Ta2 = Ta0 - Ta1; + Ta5 = T6E - T6J; + } + { + E T76, Tah, T7b, Tai; + { + E T74, T75, T78, T7a; + T74 = ri[WS(rs, 11)]; + T75 = ii[WS(rs, 11)]; + T76 = FMA(TA, T74, TE * T75); + Tah = FNMS(TE, T74, TA * T75); + T78 = ri[WS(rs, 43)]; + T7a = ii[WS(rs, 43)]; + T7b = FMA(T77, T78, T79 * T7a); + Tai = FNMS(T79, T78, T77 * T7a); + } + T7c = T76 + T7b; + Tfd = Tah + Tai; + Tae = T76 - T7b; + Taj = Tah - Tai; + } + { + E T6N, Ta6, T6S, Ta7; + { + E T6L, T6M, T6P, T6R; + T6L = ri[WS(rs, 19)]; + T6M = ii[WS(rs, 19)]; + T6N = FMA(T2z, T6L, T2C * T6M); + Ta6 = FNMS(T2C, T6L, T2z * T6M); + T6P = ri[WS(rs, 51)]; + T6R = ii[WS(rs, 51)]; + T6S = FMA(T6O, T6P, T6Q * T6R); + Ta7 = FNMS(T6Q, T6P, T6O * T6R); + } + T6T = T6N + T6S; + Tf7 = Ta6 + Ta7; + Ta3 = T6N - T6S; + Ta8 = Ta6 - Ta7; + } + { + E T6Z, Tab, T72, Tac; + { + E T6W, T6Y, T70, T71; + T6W = ri[WS(rs, 59)]; + T6Y = ii[WS(rs, 59)]; + T6Z = FMA(T6V, T6W, T6X * T6Y); + Tab = FNMS(T6X, T6W, T6V * T6Y); + T70 = ri[WS(rs, 27)]; + T71 = ii[WS(rs, 27)]; + T72 = FMA(Th, T70, Tm * T71); + Tac = FNMS(Tm, T70, Th * T71); + } + T73 = T6Z + T72; + Tfc = Tab + Tac; + Tad = Tab - Tac; + Tag = T6Z - T72; + } + { + E T6U, T7d, Tfb, Tfe; + T6U = T6K + T6T; + T7d = T73 + T7c; + T7e = T6U + T7d; + Th9 = T7d - T6U; + Tfb = T73 - T7c; + Tfe = Tfc - Tfd; + Tff = Tfb + Tfe; + Tfn = Tfb - Tfe; + } + { + E Th2, Th3, Ta4, Ta9; + Th2 = Tf6 + Tf7; + Th3 = Tfc + Tfd; + Th4 = Th2 - Th3; + ThJ = Th2 + Th3; + Ta4 = Ta2 + Ta3; + Ta9 = Ta5 - Ta8; + Taa = FNMS(KP923879532, Ta9, KP382683432 * Ta4); + Tay = FMA(KP923879532, Ta4, KP382683432 * Ta9); + } + { + E Taf, Tak, TcB, TcC; + Taf = Tad + Tae; + Tak = Tag - Taj; + Tal = FMA(KP382683432, Taf, KP923879532 * Tak); + Tax = FNMS(KP923879532, Taf, KP382683432 * Tak); + TcB = Tad - Tae; + TcC = Tag + Taj; + TcD = FMA(KP923879532, TcB, KP382683432 * TcC); + TcJ = FNMS(KP382683432, TcB, KP923879532 * TcC); + } + { + E Tf8, Tf9, Tcy, Tcz; + Tf8 = Tf6 - Tf7; + Tf9 = T6K - T6T; + Tfa = Tf8 - Tf9; + Tfo = Tf9 + Tf8; + Tcy = Ta2 - Ta3; + Tcz = Ta5 + Ta8; + TcA = FNMS(KP382683432, Tcz, KP923879532 * Tcy); + TcK = FMA(KP382683432, Tcy, KP923879532 * Tcz); + } + } + { + E T2L, Thx, ThU, ThV, Ti5, Tib, T4s, Tia, T7g, Ti7, ThG, ThO, ThL, ThP, ThA; + E ThW; + { + E T1L, T2K, ThS, ThT; + T1L = T17 + T1K; + T2K = T2e + T2J; + T2L = T1L + T2K; + Thx = T1L - T2K; + ThS = ThD + ThE; + ThT = ThI + ThJ; + ThU = ThS - ThT; + ThV = ThS + ThT; + } + { + E ThX, Ti4, T3C, T4r; + ThX = TgA + TgB; + Ti4 = ThY + Ti3; + Ti5 = ThX + Ti4; + Tib = Ti4 - ThX; + T3C = T36 + T3B; + T4r = T45 + T4q; + T4s = T3C + T4r; + Tia = T4r - T3C; + } + { + E T5S, T7f, ThC, ThF; + T5S = T5c + T5R; + T7f = T6B + T7e; + T7g = T5S + T7f; + Ti7 = T7f - T5S; + ThC = T5c - T5R; + ThF = ThD - ThE; + ThG = ThC + ThF; + ThO = ThF - ThC; + } + { + E ThH, ThK, Thy, Thz; + ThH = T6B - T7e; + ThK = ThI - ThJ; + ThL = ThH - ThK; + ThP = ThH + ThK; + Thy = TgE + TgF; + Thz = TgK + TgL; + ThA = Thy - Thz; + ThW = Thy + Thz; + } + { + E T4t, Ti6, ThR, Ti8; + T4t = T2L + T4s; + ri[WS(rs, 32)] = T4t - T7g; + ri[0] = T4t + T7g; + Ti6 = ThW + Ti5; + ii[0] = ThV + Ti6; + ii[WS(rs, 32)] = Ti6 - ThV; + ThR = T2L - T4s; + ri[WS(rs, 48)] = ThR - ThU; + ri[WS(rs, 16)] = ThR + ThU; + Ti8 = Ti5 - ThW; + ii[WS(rs, 16)] = Ti7 + Ti8; + ii[WS(rs, 48)] = Ti8 - Ti7; + } + { + E ThB, ThM, Ti9, Tic; + ThB = Thx + ThA; + ThM = KP707106781 * (ThG + ThL); + ri[WS(rs, 40)] = ThB - ThM; + ri[WS(rs, 8)] = ThB + ThM; + Ti9 = KP707106781 * (ThO + ThP); + Tic = Tia + Tib; + ii[WS(rs, 8)] = Ti9 + Tic; + ii[WS(rs, 40)] = Tic - Ti9; + } + { + E ThN, ThQ, Tid, Tie; + ThN = Thx - ThA; + ThQ = KP707106781 * (ThO - ThP); + ri[WS(rs, 56)] = ThN - ThQ; + ri[WS(rs, 24)] = ThN + ThQ; + Tid = KP707106781 * (ThL - ThG); + Tie = Tib - Tia; + ii[WS(rs, 24)] = Tid + Tie; + ii[WS(rs, 56)] = Tie - Tid; + } + } + { + E TgD, Thh, Thr, Thv, Tij, Tip, TgO, Tig, Th0, The, Thk, Tio, Tho, Thu, Thb; + E Thf; + { + E Tgz, TgC, Thp, Thq; + Tgz = T17 - T1K; + TgC = TgA - TgB; + TgD = Tgz - TgC; + Thh = Tgz + TgC; + Thp = Th1 + Th4; + Thq = Th8 + Th9; + Thr = FNMS(KP382683432, Thq, KP923879532 * Thp); + Thv = FMA(KP923879532, Thq, KP382683432 * Thp); + } + { + E Tih, Tii, TgI, TgN; + Tih = T2J - T2e; + Tii = Ti3 - ThY; + Tij = Tih + Tii; + Tip = Tii - Tih; + TgI = TgG - TgH; + TgN = TgJ + TgM; + TgO = KP707106781 * (TgI - TgN); + Tig = KP707106781 * (TgI + TgN); + } + { + E TgU, TgZ, Thi, Thj; + TgU = TgS - TgT; + TgZ = TgV - TgY; + Th0 = FMA(KP923879532, TgU, KP382683432 * TgZ); + The = FNMS(KP923879532, TgZ, KP382683432 * TgU); + Thi = TgH + TgG; + Thj = TgJ - TgM; + Thk = KP707106781 * (Thi + Thj); + Tio = KP707106781 * (Thj - Thi); + } + { + E Thm, Thn, Th5, Tha; + Thm = TgS + TgT; + Thn = TgV + TgY; + Tho = FMA(KP382683432, Thm, KP923879532 * Thn); + Thu = FNMS(KP382683432, Thn, KP923879532 * Thm); + Th5 = Th1 - Th4; + Tha = Th8 - Th9; + Thb = FNMS(KP923879532, Tha, KP382683432 * Th5); + Thf = FMA(KP382683432, Tha, KP923879532 * Th5); + } + { + E TgP, Thc, Tin, Tiq; + TgP = TgD + TgO; + Thc = Th0 + Thb; + ri[WS(rs, 44)] = TgP - Thc; + ri[WS(rs, 12)] = TgP + Thc; + Tin = The + Thf; + Tiq = Tio + Tip; + ii[WS(rs, 12)] = Tin + Tiq; + ii[WS(rs, 44)] = Tiq - Tin; + } + { + E Thd, Thg, Tir, Tis; + Thd = TgD - TgO; + Thg = The - Thf; + ri[WS(rs, 60)] = Thd - Thg; + ri[WS(rs, 28)] = Thd + Thg; + Tir = Thb - Th0; + Tis = Tip - Tio; + ii[WS(rs, 28)] = Tir + Tis; + ii[WS(rs, 60)] = Tis - Tir; + } + { + E Thl, Ths, Tif, Tik; + Thl = Thh + Thk; + Ths = Tho + Thr; + ri[WS(rs, 36)] = Thl - Ths; + ri[WS(rs, 4)] = Thl + Ths; + Tif = Thu + Thv; + Tik = Tig + Tij; + ii[WS(rs, 4)] = Tif + Tik; + ii[WS(rs, 36)] = Tik - Tif; + } + { + E Tht, Thw, Til, Tim; + Tht = Thh - Thk; + Thw = Thu - Thv; + ri[WS(rs, 52)] = Tht - Thw; + ri[WS(rs, 20)] = Tht + Thw; + Til = Thr - Tho; + Tim = Tij - Tig; + ii[WS(rs, 20)] = Til + Tim; + ii[WS(rs, 52)] = Tim - Til; + } + } + { + E Teb, Tfx, Tey, TiK, TiN, TiT, TfA, TiS, Tfr, TfL, Tfv, TfH, Tf0, TfK, Tfu; + E TfE; + { + E TdZ, Tea, Tfy, Tfz; + TdZ = TdV - TdY; + Tea = KP707106781 * (Te4 - Te9); + Teb = TdZ - Tea; + Tfx = TdZ + Tea; + { + E Tem, Tex, TiL, TiM; + Tem = FNMS(KP923879532, Tel, KP382683432 * Teg); + Tex = FMA(KP382683432, Ter, KP923879532 * Tew); + Tey = Tem - Tex; + TiK = Tem + Tex; + TiL = KP707106781 * (TfP - TfO); + TiM = Tix - Tiw; + TiN = TiL + TiM; + TiT = TiM - TiL; + } + Tfy = FMA(KP923879532, Teg, KP382683432 * Tel); + Tfz = FNMS(KP923879532, Ter, KP382683432 * Tew); + TfA = Tfy + Tfz; + TiS = Tfz - Tfy; + { + E Tfh, TfF, Tfq, TfG, Tfg, Tfp; + Tfg = KP707106781 * (Tfa - Tff); + Tfh = Tf5 - Tfg; + TfF = Tf5 + Tfg; + Tfp = KP707106781 * (Tfn - Tfo); + Tfq = Tfm - Tfp; + TfG = Tfm + Tfp; + Tfr = FNMS(KP980785280, Tfq, KP195090322 * Tfh); + TfL = FMA(KP831469612, TfG, KP555570233 * TfF); + Tfv = FMA(KP195090322, Tfq, KP980785280 * Tfh); + TfH = FNMS(KP555570233, TfG, KP831469612 * TfF); + } + { + E TeQ, TfC, TeZ, TfD, TeP, TeY; + TeP = KP707106781 * (TeJ - TeO); + TeQ = TeE - TeP; + TfC = TeE + TeP; + TeY = KP707106781 * (TeW - TeX); + TeZ = TeV - TeY; + TfD = TeV + TeY; + Tf0 = FMA(KP980785280, TeQ, KP195090322 * TeZ); + TfK = FNMS(KP555570233, TfD, KP831469612 * TfC); + Tfu = FNMS(KP980785280, TeZ, KP195090322 * TeQ); + TfE = FMA(KP555570233, TfC, KP831469612 * TfD); + } + } + { + E Tez, Tfs, TiR, TiU; + Tez = Teb + Tey; + Tfs = Tf0 + Tfr; + ri[WS(rs, 46)] = Tez - Tfs; + ri[WS(rs, 14)] = Tez + Tfs; + TiR = Tfu + Tfv; + TiU = TiS + TiT; + ii[WS(rs, 14)] = TiR + TiU; + ii[WS(rs, 46)] = TiU - TiR; + } + { + E Tft, Tfw, TiV, TiW; + Tft = Teb - Tey; + Tfw = Tfu - Tfv; + ri[WS(rs, 62)] = Tft - Tfw; + ri[WS(rs, 30)] = Tft + Tfw; + TiV = Tfr - Tf0; + TiW = TiT - TiS; + ii[WS(rs, 30)] = TiV + TiW; + ii[WS(rs, 62)] = TiW - TiV; + } + { + E TfB, TfI, TiJ, TiO; + TfB = Tfx + TfA; + TfI = TfE + TfH; + ri[WS(rs, 38)] = TfB - TfI; + ri[WS(rs, 6)] = TfB + TfI; + TiJ = TfK + TfL; + TiO = TiK + TiN; + ii[WS(rs, 6)] = TiJ + TiO; + ii[WS(rs, 38)] = TiO - TiJ; + } + { + E TfJ, TfM, TiP, TiQ; + TfJ = Tfx - TfA; + TfM = TfK - TfL; + ri[WS(rs, 54)] = TfJ - TfM; + ri[WS(rs, 22)] = TfJ + TfM; + TiP = TfH - TfE; + TiQ = TiN - TiK; + ii[WS(rs, 22)] = TiP + TiQ; + ii[WS(rs, 54)] = TiQ - TiP; + } + } + { + E TfR, Tgj, TfY, Tiu, Tiz, TiF, Tgm, TiE, Tgd, Tgx, Tgh, Tgt, Tg6, Tgw, Tgg; + E Tgq; + { + E TfN, TfQ, Tgk, Tgl; + TfN = TdV + TdY; + TfQ = KP707106781 * (TfO + TfP); + TfR = TfN - TfQ; + Tgj = TfN + TfQ; + { + E TfU, TfX, Tiv, Tiy; + TfU = FNMS(KP382683432, TfT, KP923879532 * TfS); + TfX = FMA(KP923879532, TfV, KP382683432 * TfW); + TfY = TfU - TfX; + Tiu = TfU + TfX; + Tiv = KP707106781 * (Te4 + Te9); + Tiy = Tiw + Tix; + Tiz = Tiv + Tiy; + TiF = Tiy - Tiv; + } + Tgk = FMA(KP382683432, TfS, KP923879532 * TfT); + Tgl = FNMS(KP382683432, TfV, KP923879532 * TfW); + Tgm = Tgk + Tgl; + TiE = Tgl - Tgk; + { + E Tg9, Tgr, Tgc, Tgs, Tg8, Tgb; + Tg8 = KP707106781 * (Tfo + Tfn); + Tg9 = Tg7 - Tg8; + Tgr = Tg7 + Tg8; + Tgb = KP707106781 * (Tfa + Tff); + Tgc = Tga - Tgb; + Tgs = Tga + Tgb; + Tgd = FNMS(KP831469612, Tgc, KP555570233 * Tg9); + Tgx = FMA(KP195090322, Tgr, KP980785280 * Tgs); + Tgh = FMA(KP831469612, Tg9, KP555570233 * Tgc); + Tgt = FNMS(KP195090322, Tgs, KP980785280 * Tgr); + } + { + E Tg2, Tgo, Tg5, Tgp, Tg1, Tg4; + Tg1 = KP707106781 * (TeO + TeJ); + Tg2 = Tg0 - Tg1; + Tgo = Tg0 + Tg1; + Tg4 = KP707106781 * (TeW + TeX); + Tg5 = Tg3 - Tg4; + Tgp = Tg3 + Tg4; + Tg6 = FMA(KP555570233, Tg2, KP831469612 * Tg5); + Tgw = FNMS(KP195090322, Tgo, KP980785280 * Tgp); + Tgg = FNMS(KP831469612, Tg2, KP555570233 * Tg5); + Tgq = FMA(KP980785280, Tgo, KP195090322 * Tgp); + } + } + { + E TfZ, Tge, TiD, TiG; + TfZ = TfR + TfY; + Tge = Tg6 + Tgd; + ri[WS(rs, 42)] = TfZ - Tge; + ri[WS(rs, 10)] = TfZ + Tge; + TiD = Tgg + Tgh; + TiG = TiE + TiF; + ii[WS(rs, 10)] = TiD + TiG; + ii[WS(rs, 42)] = TiG - TiD; + } + { + E Tgf, Tgi, TiH, TiI; + Tgf = TfR - TfY; + Tgi = Tgg - Tgh; + ri[WS(rs, 58)] = Tgf - Tgi; + ri[WS(rs, 26)] = Tgf + Tgi; + TiH = Tgd - Tg6; + TiI = TiF - TiE; + ii[WS(rs, 26)] = TiH + TiI; + ii[WS(rs, 58)] = TiI - TiH; + } + { + E Tgn, Tgu, Tit, TiA; + Tgn = Tgj + Tgm; + Tgu = Tgq + Tgt; + ri[WS(rs, 34)] = Tgn - Tgu; + ri[WS(rs, 2)] = Tgn + Tgu; + Tit = Tgw + Tgx; + TiA = Tiu + Tiz; + ii[WS(rs, 2)] = Tit + TiA; + ii[WS(rs, 34)] = TiA - Tit; + } + { + E Tgv, Tgy, TiB, TiC; + Tgv = Tgj - Tgm; + Tgy = Tgw - Tgx; + ri[WS(rs, 50)] = Tgv - Tgy; + ri[WS(rs, 18)] = Tgv + Tgy; + TiB = Tgt - Tgq; + TiC = Tiz - Tiu; + ii[WS(rs, 18)] = TiB + TiC; + ii[WS(rs, 50)] = TiC - TiB; + } + } + { + E T7V, TaH, TjN, TjT, T8O, TjS, TaK, TjK, T9I, TaU, TaE, TaO, TaB, TaV, TaF; + E TaR; + { + E T7x, T7U, TjL, TjM; + T7x = T7l - T7w; + T7U = T7I - T7T; + T7V = T7x - T7U; + TaH = T7x + T7U; + TjL = TaZ - TaY; + TjM = Tjx - Tjw; + TjN = TjL + TjM; + TjT = TjM - TjL; + } + { + E T8m, TaI, T8N, TaJ; + { + E T8c, T8l, T8D, T8M; + T8c = T80 - T8b; + T8l = T8h - T8k; + T8m = FNMS(KP980785280, T8l, KP195090322 * T8c); + TaI = FMA(KP980785280, T8c, KP195090322 * T8l); + T8D = T8r - T8C; + T8M = T8I - T8L; + T8N = FMA(KP195090322, T8D, KP980785280 * T8M); + TaJ = FNMS(KP980785280, T8D, KP195090322 * T8M); + } + T8O = T8m - T8N; + TjS = TaJ - TaI; + TaK = TaI + TaJ; + TjK = T8m + T8N; + } + { + E T9u, TaM, T9H, TaN; + { + E T96, T9t, T9D, T9G; + T96 = T8U - T95; + T9t = T9h - T9s; + T9u = T96 - T9t; + TaM = T96 + T9t; + T9D = T9z - T9C; + T9G = T9E - T9F; + T9H = T9D - T9G; + TaN = T9D + T9G; + } + T9I = FMA(KP995184726, T9u, KP098017140 * T9H); + TaU = FNMS(KP634393284, TaN, KP773010453 * TaM); + TaE = FNMS(KP995184726, T9H, KP098017140 * T9u); + TaO = FMA(KP634393284, TaM, KP773010453 * TaN); + } + { + E Tan, TaP, TaA, TaQ; + { + E T9Z, Tam, Taw, Taz; + T9Z = T9N - T9Y; + Tam = Taa - Tal; + Tan = T9Z - Tam; + TaP = T9Z + Tam; + Taw = Tas - Tav; + Taz = Tax - Tay; + TaA = Taw - Taz; + TaQ = Taw + Taz; + } + TaB = FNMS(KP995184726, TaA, KP098017140 * Tan); + TaV = FMA(KP773010453, TaQ, KP634393284 * TaP); + TaF = FMA(KP098017140, TaA, KP995184726 * Tan); + TaR = FNMS(KP634393284, TaQ, KP773010453 * TaP); + } + { + E T8P, TaC, TjR, TjU; + T8P = T7V + T8O; + TaC = T9I + TaB; + ri[WS(rs, 47)] = T8P - TaC; + ri[WS(rs, 15)] = T8P + TaC; + TjR = TaE + TaF; + TjU = TjS + TjT; + ii[WS(rs, 15)] = TjR + TjU; + ii[WS(rs, 47)] = TjU - TjR; + } + { + E TaD, TaG, TjV, TjW; + TaD = T7V - T8O; + TaG = TaE - TaF; + ri[WS(rs, 63)] = TaD - TaG; + ri[WS(rs, 31)] = TaD + TaG; + TjV = TaB - T9I; + TjW = TjT - TjS; + ii[WS(rs, 31)] = TjV + TjW; + ii[WS(rs, 63)] = TjW - TjV; + } + { + E TaL, TaS, TjJ, TjO; + TaL = TaH + TaK; + TaS = TaO + TaR; + ri[WS(rs, 39)] = TaL - TaS; + ri[WS(rs, 7)] = TaL + TaS; + TjJ = TaU + TaV; + TjO = TjK + TjN; + ii[WS(rs, 7)] = TjJ + TjO; + ii[WS(rs, 39)] = TjO - TjJ; + } + { + E TaT, TaW, TjP, TjQ; + TaT = TaH - TaK; + TaW = TaU - TaV; + ri[WS(rs, 55)] = TaT - TaW; + ri[WS(rs, 23)] = TaT + TaW; + TjP = TaR - TaO; + TjQ = TjN - TjK; + ii[WS(rs, 23)] = TjP + TjQ; + ii[WS(rs, 55)] = TjQ - TjP; + } + } + { + E TbV, TcT, Tjj, Tjp, Tca, Tjo, TcW, Tjg, Tcu, Td6, TcQ, Td0, TcN, Td7, TcR; + E Td3; + { + E TbN, TbU, Tjh, Tji; + TbN = TbJ - TbM; + TbU = TbQ - TbT; + TbV = TbN - TbU; + TcT = TbN + TbU; + Tjh = Tdb - Tda; + Tji = Tj3 - Tj0; + Tjj = Tjh + Tji; + Tjp = Tji - Tjh; + } + { + E Tc2, TcU, Tc9, TcV; + { + E TbY, Tc1, Tc5, Tc8; + TbY = TbW - TbX; + Tc1 = TbZ - Tc0; + Tc2 = FNMS(KP831469612, Tc1, KP555570233 * TbY); + TcU = FMA(KP555570233, Tc1, KP831469612 * TbY); + Tc5 = Tc3 - Tc4; + Tc8 = Tc6 - Tc7; + Tc9 = FMA(KP831469612, Tc5, KP555570233 * Tc8); + TcV = FNMS(KP831469612, Tc8, KP555570233 * Tc5); + } + Tca = Tc2 - Tc9; + Tjo = TcV - TcU; + TcW = TcU + TcV; + Tjg = Tc2 + Tc9; + } + { + E Tcm, TcY, Tct, TcZ; + { + E Tce, Tcl, Tcp, Tcs; + Tce = Tcc - Tcd; + Tcl = Tch - Tck; + Tcm = Tce - Tcl; + TcY = Tce + Tcl; + Tcp = Tcn - Tco; + Tcs = Tcq - Tcr; + Tct = Tcp - Tcs; + TcZ = Tcp + Tcs; + } + Tcu = FMA(KP956940335, Tcm, KP290284677 * Tct); + Td6 = FNMS(KP471396736, TcZ, KP881921264 * TcY); + TcQ = FNMS(KP956940335, Tct, KP290284677 * Tcm); + Td0 = FMA(KP471396736, TcY, KP881921264 * TcZ); + } + { + E TcF, Td1, TcM, Td2; + { + E Tcx, TcE, TcI, TcL; + Tcx = Tcv - Tcw; + TcE = TcA - TcD; + TcF = Tcx - TcE; + Td1 = Tcx + TcE; + TcI = TcG - TcH; + TcL = TcJ - TcK; + TcM = TcI - TcL; + Td2 = TcI + TcL; + } + TcN = FNMS(KP956940335, TcM, KP290284677 * TcF); + Td7 = FMA(KP881921264, Td2, KP471396736 * Td1); + TcR = FMA(KP290284677, TcM, KP956940335 * TcF); + Td3 = FNMS(KP471396736, Td2, KP881921264 * Td1); + } + { + E Tcb, TcO, Tjn, Tjq; + Tcb = TbV + Tca; + TcO = Tcu + TcN; + ri[WS(rs, 45)] = Tcb - TcO; + ri[WS(rs, 13)] = Tcb + TcO; + Tjn = TcQ + TcR; + Tjq = Tjo + Tjp; + ii[WS(rs, 13)] = Tjn + Tjq; + ii[WS(rs, 45)] = Tjq - Tjn; + } + { + E TcP, TcS, Tjr, Tjs; + TcP = TbV - Tca; + TcS = TcQ - TcR; + ri[WS(rs, 61)] = TcP - TcS; + ri[WS(rs, 29)] = TcP + TcS; + Tjr = TcN - Tcu; + Tjs = Tjp - Tjo; + ii[WS(rs, 29)] = Tjr + Tjs; + ii[WS(rs, 61)] = Tjs - Tjr; + } + { + E TcX, Td4, Tjf, Tjk; + TcX = TcT + TcW; + Td4 = Td0 + Td3; + ri[WS(rs, 37)] = TcX - Td4; + ri[WS(rs, 5)] = TcX + Td4; + Tjf = Td6 + Td7; + Tjk = Tjg + Tjj; + ii[WS(rs, 5)] = Tjf + Tjk; + ii[WS(rs, 37)] = Tjk - Tjf; + } + { + E Td5, Td8, Tjl, Tjm; + Td5 = TcT - TcW; + Td8 = Td6 - Td7; + ri[WS(rs, 53)] = Td5 - Td8; + ri[WS(rs, 21)] = Td5 + Td8; + Tjl = Td3 - Td0; + Tjm = Tjj - Tjg; + ii[WS(rs, 21)] = Tjl + Tjm; + ii[WS(rs, 53)] = Tjm - Tjl; + } + } + { + E Tdd, TdF, Tj5, Tjb, Tdk, Tja, TdI, TiY, Tds, TdS, TdC, TdM, Tdz, TdT, TdD; + E TdP; + { + E Td9, Tdc, TiZ, Tj4; + Td9 = TbJ + TbM; + Tdc = Tda + Tdb; + Tdd = Td9 - Tdc; + TdF = Td9 + Tdc; + TiZ = TbQ + TbT; + Tj4 = Tj0 + Tj3; + Tj5 = TiZ + Tj4; + Tjb = Tj4 - TiZ; + } + { + E Tdg, TdG, Tdj, TdH; + { + E Tde, Tdf, Tdh, Tdi; + Tde = TbW + TbX; + Tdf = TbZ + Tc0; + Tdg = FNMS(KP195090322, Tdf, KP980785280 * Tde); + TdG = FMA(KP980785280, Tdf, KP195090322 * Tde); + Tdh = Tc3 + Tc4; + Tdi = Tc6 + Tc7; + Tdj = FMA(KP195090322, Tdh, KP980785280 * Tdi); + TdH = FNMS(KP195090322, Tdi, KP980785280 * Tdh); + } + Tdk = Tdg - Tdj; + Tja = TdH - TdG; + TdI = TdG + TdH; + TiY = Tdg + Tdj; + } + { + E Tdo, TdK, Tdr, TdL; + { + E Tdm, Tdn, Tdp, Tdq; + Tdm = Tcn + Tco; + Tdn = Tck + Tch; + Tdo = Tdm - Tdn; + TdK = Tdm + Tdn; + Tdp = Tcc + Tcd; + Tdq = Tcq + Tcr; + Tdr = Tdp - Tdq; + TdL = Tdp + Tdq; + } + Tds = FMA(KP634393284, Tdo, KP773010453 * Tdr); + TdS = FNMS(KP098017140, TdK, KP995184726 * TdL); + TdC = FNMS(KP773010453, Tdo, KP634393284 * Tdr); + TdM = FMA(KP995184726, TdK, KP098017140 * TdL); + } + { + E Tdv, TdN, Tdy, TdO; + { + E Tdt, Tdu, Tdw, Tdx; + Tdt = Tcv + Tcw; + Tdu = TcK + TcJ; + Tdv = Tdt - Tdu; + TdN = Tdt + Tdu; + Tdw = TcG + TcH; + Tdx = TcA + TcD; + Tdy = Tdw - Tdx; + TdO = Tdw + Tdx; + } + Tdz = FNMS(KP773010453, Tdy, KP634393284 * Tdv); + TdT = FMA(KP098017140, TdN, KP995184726 * TdO); + TdD = FMA(KP773010453, Tdv, KP634393284 * Tdy); + TdP = FNMS(KP098017140, TdO, KP995184726 * TdN); + } + { + E Tdl, TdA, Tj9, Tjc; + Tdl = Tdd + Tdk; + TdA = Tds + Tdz; + ri[WS(rs, 41)] = Tdl - TdA; + ri[WS(rs, 9)] = Tdl + TdA; + Tj9 = TdC + TdD; + Tjc = Tja + Tjb; + ii[WS(rs, 9)] = Tj9 + Tjc; + ii[WS(rs, 41)] = Tjc - Tj9; + } + { + E TdB, TdE, Tjd, Tje; + TdB = Tdd - Tdk; + TdE = TdC - TdD; + ri[WS(rs, 57)] = TdB - TdE; + ri[WS(rs, 25)] = TdB + TdE; + Tjd = Tdz - Tds; + Tje = Tjb - Tja; + ii[WS(rs, 25)] = Tjd + Tje; + ii[WS(rs, 57)] = Tje - Tjd; + } + { + E TdJ, TdQ, TiX, Tj6; + TdJ = TdF + TdI; + TdQ = TdM + TdP; + ri[WS(rs, 33)] = TdJ - TdQ; + ri[WS(rs, 1)] = TdJ + TdQ; + TiX = TdS + TdT; + Tj6 = TiY + Tj5; + ii[WS(rs, 1)] = TiX + Tj6; + ii[WS(rs, 33)] = Tj6 - TiX; + } + { + E TdR, TdU, Tj7, Tj8; + TdR = TdF - TdI; + TdU = TdS - TdT; + ri[WS(rs, 49)] = TdR - TdU; + ri[WS(rs, 17)] = TdR + TdU; + Tj7 = TdP - TdM; + Tj8 = Tj5 - TiY; + ii[WS(rs, 17)] = Tj7 + Tj8; + ii[WS(rs, 49)] = Tj8 - Tj7; + } + } + { + E Tb1, Tbt, Tjz, TjF, Tb8, TjE, Tbw, Tju, Tbg, TbG, Tbq, TbA, Tbn, TbH, Tbr; + E TbD; + { + E TaX, Tb0, Tjv, Tjy; + TaX = T7l + T7w; + Tb0 = TaY + TaZ; + Tb1 = TaX - Tb0; + Tbt = TaX + Tb0; + Tjv = T7I + T7T; + Tjy = Tjw + Tjx; + Tjz = Tjv + Tjy; + TjF = Tjy - Tjv; + } + { + E Tb4, Tbu, Tb7, Tbv; + { + E Tb2, Tb3, Tb5, Tb6; + Tb2 = T80 + T8b; + Tb3 = T8h + T8k; + Tb4 = FNMS(KP555570233, Tb3, KP831469612 * Tb2); + Tbu = FMA(KP555570233, Tb2, KP831469612 * Tb3); + Tb5 = T8r + T8C; + Tb6 = T8I + T8L; + Tb7 = FMA(KP831469612, Tb5, KP555570233 * Tb6); + Tbv = FNMS(KP555570233, Tb5, KP831469612 * Tb6); + } + Tb8 = Tb4 - Tb7; + TjE = Tbv - Tbu; + Tbw = Tbu + Tbv; + Tju = Tb4 + Tb7; + } + { + E Tbc, Tby, Tbf, Tbz; + { + E Tba, Tbb, Tbd, Tbe; + Tba = T9z + T9C; + Tbb = T9s + T9h; + Tbc = Tba - Tbb; + Tby = Tba + Tbb; + Tbd = T8U + T95; + Tbe = T9E + T9F; + Tbf = Tbd - Tbe; + Tbz = Tbd + Tbe; + } + Tbg = FMA(KP471396736, Tbc, KP881921264 * Tbf); + TbG = FNMS(KP290284677, Tby, KP956940335 * Tbz); + Tbq = FNMS(KP881921264, Tbc, KP471396736 * Tbf); + TbA = FMA(KP956940335, Tby, KP290284677 * Tbz); + } + { + E Tbj, TbB, Tbm, TbC; + { + E Tbh, Tbi, Tbk, Tbl; + Tbh = T9N + T9Y; + Tbi = Tay + Tax; + Tbj = Tbh - Tbi; + TbB = Tbh + Tbi; + Tbk = Tas + Tav; + Tbl = Taa + Tal; + Tbm = Tbk - Tbl; + TbC = Tbk + Tbl; + } + Tbn = FNMS(KP881921264, Tbm, KP471396736 * Tbj); + TbH = FMA(KP290284677, TbB, KP956940335 * TbC); + Tbr = FMA(KP881921264, Tbj, KP471396736 * Tbm); + TbD = FNMS(KP290284677, TbC, KP956940335 * TbB); + } + { + E Tb9, Tbo, TjD, TjG; + Tb9 = Tb1 + Tb8; + Tbo = Tbg + Tbn; + ri[WS(rs, 43)] = Tb9 - Tbo; + ri[WS(rs, 11)] = Tb9 + Tbo; + TjD = Tbq + Tbr; + TjG = TjE + TjF; + ii[WS(rs, 11)] = TjD + TjG; + ii[WS(rs, 43)] = TjG - TjD; + } + { + E Tbp, Tbs, TjH, TjI; + Tbp = Tb1 - Tb8; + Tbs = Tbq - Tbr; + ri[WS(rs, 59)] = Tbp - Tbs; + ri[WS(rs, 27)] = Tbp + Tbs; + TjH = Tbn - Tbg; + TjI = TjF - TjE; + ii[WS(rs, 27)] = TjH + TjI; + ii[WS(rs, 59)] = TjI - TjH; + } + { + E Tbx, TbE, Tjt, TjA; + Tbx = Tbt + Tbw; + TbE = TbA + TbD; + ri[WS(rs, 35)] = Tbx - TbE; + ri[WS(rs, 3)] = Tbx + TbE; + Tjt = TbG + TbH; + TjA = Tju + Tjz; + ii[WS(rs, 3)] = Tjt + TjA; + ii[WS(rs, 35)] = TjA - Tjt; + } + { + E TbF, TbI, TjB, TjC; + TbF = Tbt - Tbw; + TbI = TbG - TbH; + ri[WS(rs, 51)] = TbF - TbI; + ri[WS(rs, 19)] = TbF + TbI; + TjB = TbD - TbA; + TjC = Tjz - Tju; + ii[WS(rs, 19)] = TjB + TjC; + ii[WS(rs, 51)] = TjC - TjB; + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_CEXP, 0, 9}, + {TW_CEXP, 0, 27}, + {TW_CEXP, 0, 63}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 64, "t2_64", twinstr, &GENUS, {880, 386, 274, 0}, 0, 0, 0 }; + +void X(codelet_t2_64) (planner *p) { + X(kdft_dit_register) (p, t2_64, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/codelets/t2_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:45:54 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 8 -name t2_8 -include t.h */ + +/* + * This function contains 74 FP additions, 50 FP multiplications, + * (or, 44 additions, 20 multiplications, 30 fused multiply/add), + * 64 stack variables, 1 constants, and 32 memory accesses + */ +#include "t.h" + +static void t2_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 6); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { + E TS, T1m, TJ, T1l, T1k, Tw, T1w, T1u; + { + E T2, T3, Tl, Tn, T5, T4, Tm, Tr, T6; + T2 = W[0]; + T3 = W[2]; + Tl = W[4]; + Tn = W[5]; + T5 = W[1]; + T4 = T2 * T3; + Tm = T2 * Tl; + Tr = T2 * Tn; + T6 = W[3]; + { + E T1, T1s, TG, Td, T1r, Tu, TY, Tk, TW, T18, T1d, TD, TH, TA, T13; + E TE, T14; + { + E To, Ts, Tf, T7, T8, Ti, Tb, T9, Tc, TC, Ta, TF, TB, Tg, Th; + E Tj; + T1 = ri[0]; + To = FMA(T5, Tn, Tm); + Ts = FNMS(T5, Tl, Tr); + Tf = FMA(T5, T6, T4); + T7 = FNMS(T5, T6, T4); + Ta = T2 * T6; + T1s = ii[0]; + T8 = ri[WS(rs, 4)]; + TF = Tf * Tn; + TB = Tf * Tl; + Ti = FNMS(T5, T3, Ta); + Tb = FMA(T5, T3, Ta); + T9 = T7 * T8; + Tc = ii[WS(rs, 4)]; + TG = FNMS(Ti, Tl, TF); + TC = FMA(Ti, Tn, TB); + { + E Tp, T1q, Tt, Tq, TX; + Tp = ri[WS(rs, 6)]; + Td = FMA(Tb, Tc, T9); + T1q = T7 * Tc; + Tt = ii[WS(rs, 6)]; + Tq = To * Tp; + Tg = ri[WS(rs, 2)]; + T1r = FNMS(Tb, T8, T1q); + TX = To * Tt; + Tu = FMA(Ts, Tt, Tq); + Th = Tf * Tg; + Tj = ii[WS(rs, 2)]; + TY = FNMS(Ts, Tp, TX); + } + { + E TO, TQ, TN, TP, T1a, T1b; + { + E TK, TM, TL, T19, TV; + TK = ri[WS(rs, 7)]; + TM = ii[WS(rs, 7)]; + Tk = FMA(Ti, Tj, Th); + TV = Tf * Tj; + TL = Tl * TK; + T19 = Tl * TM; + TO = ri[WS(rs, 3)]; + TW = FNMS(Ti, Tg, TV); + TQ = ii[WS(rs, 3)]; + TN = FMA(Tn, TM, TL); + TP = T3 * TO; + T1a = FNMS(Tn, TK, T19); + T1b = T3 * TQ; + } + { + E Tx, Tz, Ty, T12, T1c, TR; + Tx = ri[WS(rs, 1)]; + TR = FMA(T6, TQ, TP); + Tz = ii[WS(rs, 1)]; + T1c = FNMS(T6, TO, T1b); + Ty = T2 * Tx; + T18 = TN - TR; + TS = TN + TR; + T12 = T2 * Tz; + T1d = T1a - T1c; + T1m = T1a + T1c; + TD = ri[WS(rs, 5)]; + TH = ii[WS(rs, 5)]; + TA = FMA(T5, Tz, Ty); + T13 = FNMS(T5, Tx, T12); + TE = TC * TD; + T14 = TC * TH; + } + } + } + { + E Te, T1p, T1t, Tv; + { + E T1g, T10, T1z, T1B, T1A, T1j, T1C, T1f; + { + E T1x, T11, T16, T1y; + { + E TU, TZ, TI, T15; + Te = T1 + Td; + TU = T1 - Td; + TZ = TW - TY; + T1p = TW + TY; + TI = FMA(TG, TH, TE); + T15 = FNMS(TG, TD, T14); + T1t = T1r + T1s; + T1x = T1s - T1r; + T1g = TU - TZ; + T10 = TU + TZ; + T11 = TA - TI; + TJ = TA + TI; + T1l = T13 + T15; + T16 = T13 - T15; + T1y = Tk - Tu; + Tv = Tk + Tu; + } + { + E T1i, T1e, T17, T1h; + T1i = T18 + T1d; + T1e = T18 - T1d; + T17 = T11 + T16; + T1h = T16 - T11; + T1z = T1x - T1y; + T1B = T1y + T1x; + T1A = T1h + T1i; + T1j = T1h - T1i; + T1C = T1e - T17; + T1f = T17 + T1e; + } + } + ri[WS(rs, 7)] = FNMS(KP707106781, T1j, T1g); + ii[WS(rs, 7)] = FNMS(KP707106781, T1C, T1B); + ri[WS(rs, 1)] = FMA(KP707106781, T1f, T10); + ri[WS(rs, 5)] = FNMS(KP707106781, T1f, T10); + ii[WS(rs, 1)] = FMA(KP707106781, T1A, T1z); + ii[WS(rs, 5)] = FNMS(KP707106781, T1A, T1z); + ri[WS(rs, 3)] = FMA(KP707106781, T1j, T1g); + ii[WS(rs, 3)] = FMA(KP707106781, T1C, T1B); + } + T1k = Te - Tv; + Tw = Te + Tv; + T1w = T1t - T1p; + T1u = T1p + T1t; + } + } + } + { + E TT, T1v, T1n, T1o; + TT = TJ + TS; + T1v = TS - TJ; + T1n = T1l - T1m; + T1o = T1l + T1m; + ii[WS(rs, 2)] = T1v + T1w; + ii[WS(rs, 6)] = T1w - T1v; + ri[0] = Tw + TT; + ri[WS(rs, 4)] = Tw - TT; + ii[0] = T1o + T1u; + ii[WS(rs, 4)] = T1u - T1o; + ri[WS(rs, 2)] = T1k + T1n; + ri[WS(rs, 6)] = T1k - T1n; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_CEXP, 0, 7}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 8, "t2_8", twinstr, &GENUS, {44, 20, 30, 0}, 0, 0, 0 }; + +void X(codelet_t2_8) (planner *p) { + X(kdft_dit_register) (p, t2_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 8 -name t2_8 -include t.h */ + +/* + * This function contains 74 FP additions, 44 FP multiplications, + * (or, 56 additions, 26 multiplications, 18 fused multiply/add), + * 42 stack variables, 1 constants, and 32 memory accesses + */ +#include "t.h" + +static void t2_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 6); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { + E T2, T5, T3, T6, T8, Tc, Tg, Ti, Tl, Tm, Tn, Tz, Tp, Tx; + { + E T4, Tb, T7, Ta; + T2 = W[0]; + T5 = W[1]; + T3 = W[2]; + T6 = W[3]; + T4 = T2 * T3; + Tb = T5 * T3; + T7 = T5 * T6; + Ta = T2 * T6; + T8 = T4 - T7; + Tc = Ta + Tb; + Tg = T4 + T7; + Ti = Ta - Tb; + Tl = W[4]; + Tm = W[5]; + Tn = FMA(T2, Tl, T5 * Tm); + Tz = FNMS(Ti, Tl, Tg * Tm); + Tp = FNMS(T5, Tl, T2 * Tm); + Tx = FMA(Tg, Tl, Ti * Tm); + } + { + E Tf, T1i, TL, T1d, TJ, T17, TV, TY, Ts, T1j, TO, T1a, TC, T16, TQ; + E TT; + { + E T1, T1c, Te, T1b, T9, Td; + T1 = ri[0]; + T1c = ii[0]; + T9 = ri[WS(rs, 4)]; + Td = ii[WS(rs, 4)]; + Te = FMA(T8, T9, Tc * Td); + T1b = FNMS(Tc, T9, T8 * Td); + Tf = T1 + Te; + T1i = T1c - T1b; + TL = T1 - Te; + T1d = T1b + T1c; + } + { + E TF, TW, TI, TX; + { + E TD, TE, TG, TH; + TD = ri[WS(rs, 7)]; + TE = ii[WS(rs, 7)]; + TF = FMA(Tl, TD, Tm * TE); + TW = FNMS(Tm, TD, Tl * TE); + TG = ri[WS(rs, 3)]; + TH = ii[WS(rs, 3)]; + TI = FMA(T3, TG, T6 * TH); + TX = FNMS(T6, TG, T3 * TH); + } + TJ = TF + TI; + T17 = TW + TX; + TV = TF - TI; + TY = TW - TX; + } + { + E Tk, TM, Tr, TN; + { + E Th, Tj, To, Tq; + Th = ri[WS(rs, 2)]; + Tj = ii[WS(rs, 2)]; + Tk = FMA(Tg, Th, Ti * Tj); + TM = FNMS(Ti, Th, Tg * Tj); + To = ri[WS(rs, 6)]; + Tq = ii[WS(rs, 6)]; + Tr = FMA(Tn, To, Tp * Tq); + TN = FNMS(Tp, To, Tn * Tq); + } + Ts = Tk + Tr; + T1j = Tk - Tr; + TO = TM - TN; + T1a = TM + TN; + } + { + E Tw, TR, TB, TS; + { + E Tu, Tv, Ty, TA; + Tu = ri[WS(rs, 1)]; + Tv = ii[WS(rs, 1)]; + Tw = FMA(T2, Tu, T5 * Tv); + TR = FNMS(T5, Tu, T2 * Tv); + Ty = ri[WS(rs, 5)]; + TA = ii[WS(rs, 5)]; + TB = FMA(Tx, Ty, Tz * TA); + TS = FNMS(Tz, Ty, Tx * TA); + } + TC = Tw + TB; + T16 = TR + TS; + TQ = Tw - TB; + TT = TR - TS; + } + { + E Tt, TK, T1f, T1g; + Tt = Tf + Ts; + TK = TC + TJ; + ri[WS(rs, 4)] = Tt - TK; + ri[0] = Tt + TK; + { + E T19, T1e, T15, T18; + T19 = T16 + T17; + T1e = T1a + T1d; + ii[0] = T19 + T1e; + ii[WS(rs, 4)] = T1e - T19; + T15 = Tf - Ts; + T18 = T16 - T17; + ri[WS(rs, 6)] = T15 - T18; + ri[WS(rs, 2)] = T15 + T18; + } + T1f = TJ - TC; + T1g = T1d - T1a; + ii[WS(rs, 2)] = T1f + T1g; + ii[WS(rs, 6)] = T1g - T1f; + { + E T11, T1k, T14, T1h, T12, T13; + T11 = TL - TO; + T1k = T1i - T1j; + T12 = TT - TQ; + T13 = TV + TY; + T14 = KP707106781 * (T12 - T13); + T1h = KP707106781 * (T12 + T13); + ri[WS(rs, 7)] = T11 - T14; + ii[WS(rs, 5)] = T1k - T1h; + ri[WS(rs, 3)] = T11 + T14; + ii[WS(rs, 1)] = T1h + T1k; + } + { + E TP, T1m, T10, T1l, TU, TZ; + TP = TL + TO; + T1m = T1j + T1i; + TU = TQ + TT; + TZ = TV - TY; + T10 = KP707106781 * (TU + TZ); + T1l = KP707106781 * (TZ - TU); + ri[WS(rs, 5)] = TP - T10; + ii[WS(rs, 7)] = T1m - T1l; + ri[WS(rs, 1)] = TP + T10; + ii[WS(rs, 3)] = T1l + T1m; + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 0, 1}, + {TW_CEXP, 0, 3}, + {TW_CEXP, 0, 7}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 8, "t2_8", twinstr, &GENUS, {56, 26, 18, 0}, 0, 0, 0 }; + +void X(codelet_t2_8) (planner *p) { + X(kdft_dit_register) (p, t2_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/f.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/f.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1 @@ +#include "t.h" /* same stuff, no need to duplicate */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/n.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/n.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "codelet-dft.h" +#include "n.h" + +static int okp(const kdft_desc *d, + const R *ri, const R *ii, + const R *ro, const R *io, + INT is, INT os, INT vl, INT ivs, INT ovs, + const planner *plnr) +{ + UNUSED(ri); UNUSED(ii); UNUSED(ro); UNUSED(io); UNUSED(vl); UNUSED(plnr); + return (1 + && (!d->is || (d->is == is)) + && (!d->os || (d->os == os)) + && (!d->ivs || (d->ivs == ivs)) + && (!d->ovs || (d->ovs == ovs)) + ); +} + +const kdft_genus GENUS = { okp, 1 }; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/n.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/n.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#define GENUS X(dft_n_genus) +extern const kdft_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/q.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/q.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1 @@ +#include "t.h" /* same stuff, no need to duplicate */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/t.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/t.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "codelet-dft.h" +#include "t.h" + +static int okp(const ct_desc *d, + const R *rio, const R *iio, + INT rs, INT vs, INT m, INT mb, INT me, INT ms, + const planner *plnr) +{ + UNUSED(rio); UNUSED(iio); UNUSED(m); UNUSED(mb); UNUSED(me); UNUSED(plnr); + return (1 + && (!d->rs || (d->rs == rs)) + && (!d->vs || (d->vs == vs)) + && (!d->ms || (d->ms == ms)) + ); +} + +const ct_genus GENUS = { okp, 1 }; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/scalar/t.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/scalar/t.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#define GENUS X(dft_t_genus) +extern const ct_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +SUBDIRS = common sse2 avx altivec neon +EXTRA_DIST = n1b.h n1f.h n2b.h n2f.h n2s.h q1b.h q1f.h t1b.h t1bu.h \ +t1f.h t1fu.h t2b.h t2f.h t3b.h t3f.h ts.h codlist.mk simd.mk diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,642 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = dft/simd +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = common sse2 avx altivec neon +EXTRA_DIST = n1b.h n1f.h n2b.h n2f.h n2s.h q1b.h q1f.h t1b.h t1bu.h \ +t1f.h t1fu.h t2b.h t2f.h t3b.h t3f.h ts.h codlist.mk simd.mk + +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu dft/simd/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu dft/simd/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ + ps ps-am tags tags-am uninstall uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,13 @@ +AM_CFLAGS = $(ALTIVEC_CFLAGS) +SIMD_HEADER=simd-altivec.h + +include $(top_srcdir)/dft/simd/codlist.mk +include $(top_srcdir)/dft/simd/simd.mk + +if HAVE_ALTIVEC + +BUILT_SOURCES = $(EXTRA_DIST) +noinst_LTLIBRARIES = libdft_altivec_codelets.la +libdft_altivec_codelets_la_SOURCES = $(BUILT_SOURCES) + +endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,967 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# This file contains a standard list of DFT SIMD codelets. It is +# included by common/Makefile to generate the C files with the actual +# codelets in them. It is included by {sse,sse2,...}/Makefile to +# generate and compile stub files that include common/*.c + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +DIST_COMMON = $(top_srcdir)/dft/simd/codlist.mk \ + $(top_srcdir)/dft/simd/simd.mk $(srcdir)/Makefile.in \ + $(srcdir)/Makefile.am $(top_srcdir)/depcomp +subdir = dft/simd/altivec +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libdft_altivec_codelets_la_LIBADD = +am__libdft_altivec_codelets_la_SOURCES_DIST = n1fv_2.c n1fv_3.c \ + n1fv_4.c n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c n1fv_9.c \ + n1fv_10.c n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c \ + n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_20.c n1fv_25.c \ + n1bv_2.c n1bv_3.c n1bv_4.c n1bv_5.c n1bv_6.c n1bv_7.c n1bv_8.c \ + n1bv_9.c n1bv_10.c n1bv_11.c n1bv_12.c n1bv_13.c n1bv_14.c \ + n1bv_15.c n1bv_16.c n1bv_32.c n1bv_64.c n1bv_128.c n1bv_20.c \ + n1bv_25.c n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c \ + n2fv_12.c n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_20.c \ + n2bv_2.c n2bv_4.c n2bv_6.c n2bv_8.c n2bv_10.c n2bv_12.c \ + n2bv_14.c n2bv_16.c n2bv_32.c n2bv_64.c n2bv_20.c n2sv_4.c \ + n2sv_8.c n2sv_16.c n2sv_32.c n2sv_64.c t1fuv_2.c t1fuv_3.c \ + t1fuv_4.c t1fuv_5.c t1fuv_6.c t1fuv_7.c t1fuv_8.c t1fuv_9.c \ + t1fuv_10.c t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c \ + t1fv_7.c t1fv_8.c t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c \ + t1fv_16.c t1fv_32.c t1fv_64.c t1fv_20.c t1fv_25.c t2fv_2.c \ + t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c t2fv_64.c t2fv_5.c \ + t2fv_10.c t2fv_20.c t2fv_25.c t3fv_4.c t3fv_8.c t3fv_16.c \ + t3fv_32.c t3fv_5.c t3fv_10.c t3fv_20.c t3fv_25.c t1buv_2.c \ + t1buv_3.c t1buv_4.c t1buv_5.c t1buv_6.c t1buv_7.c t1buv_8.c \ + t1buv_9.c t1buv_10.c t1bv_2.c t1bv_3.c t1bv_4.c t1bv_5.c \ + t1bv_6.c t1bv_7.c t1bv_8.c t1bv_9.c t1bv_10.c t1bv_12.c \ + t1bv_15.c t1bv_16.c t1bv_32.c t1bv_64.c t1bv_20.c t1bv_25.c \ + t2bv_2.c t2bv_4.c t2bv_8.c t2bv_16.c t2bv_32.c t2bv_64.c \ + t2bv_5.c t2bv_10.c t2bv_20.c t2bv_25.c t3bv_4.c t3bv_8.c \ + t3bv_16.c t3bv_32.c t3bv_5.c t3bv_10.c t3bv_20.c t3bv_25.c \ + t1sv_2.c t1sv_4.c t1sv_8.c t1sv_16.c t1sv_32.c t2sv_4.c \ + t2sv_8.c t2sv_16.c t2sv_32.c q1fv_2.c q1fv_4.c q1fv_5.c \ + q1fv_8.c q1bv_2.c q1bv_4.c q1bv_5.c q1bv_8.c genus.c codlist.c +am__objects_1 = n1fv_2.lo n1fv_3.lo n1fv_4.lo n1fv_5.lo n1fv_6.lo \ + n1fv_7.lo n1fv_8.lo n1fv_9.lo n1fv_10.lo n1fv_11.lo n1fv_12.lo \ + n1fv_13.lo n1fv_14.lo n1fv_15.lo n1fv_16.lo n1fv_32.lo \ + n1fv_64.lo n1fv_128.lo n1fv_20.lo n1fv_25.lo +am__objects_2 = n1bv_2.lo n1bv_3.lo n1bv_4.lo n1bv_5.lo n1bv_6.lo \ + n1bv_7.lo n1bv_8.lo n1bv_9.lo n1bv_10.lo n1bv_11.lo n1bv_12.lo \ + n1bv_13.lo n1bv_14.lo n1bv_15.lo n1bv_16.lo n1bv_32.lo \ + n1bv_64.lo n1bv_128.lo n1bv_20.lo n1bv_25.lo +am__objects_3 = n2fv_2.lo n2fv_4.lo n2fv_6.lo n2fv_8.lo n2fv_10.lo \ + n2fv_12.lo n2fv_14.lo n2fv_16.lo n2fv_32.lo n2fv_64.lo \ + n2fv_20.lo +am__objects_4 = n2bv_2.lo n2bv_4.lo n2bv_6.lo n2bv_8.lo n2bv_10.lo \ + n2bv_12.lo n2bv_14.lo n2bv_16.lo n2bv_32.lo n2bv_64.lo \ + n2bv_20.lo +am__objects_5 = n2sv_4.lo n2sv_8.lo n2sv_16.lo n2sv_32.lo n2sv_64.lo +am__objects_6 = t1fuv_2.lo t1fuv_3.lo t1fuv_4.lo t1fuv_5.lo t1fuv_6.lo \ + t1fuv_7.lo t1fuv_8.lo t1fuv_9.lo t1fuv_10.lo +am__objects_7 = t1fv_2.lo t1fv_3.lo t1fv_4.lo t1fv_5.lo t1fv_6.lo \ + t1fv_7.lo t1fv_8.lo t1fv_9.lo t1fv_10.lo t1fv_12.lo t1fv_15.lo \ + t1fv_16.lo t1fv_32.lo t1fv_64.lo t1fv_20.lo t1fv_25.lo +am__objects_8 = t2fv_2.lo t2fv_4.lo t2fv_8.lo t2fv_16.lo t2fv_32.lo \ + t2fv_64.lo t2fv_5.lo t2fv_10.lo t2fv_20.lo t2fv_25.lo +am__objects_9 = t3fv_4.lo t3fv_8.lo t3fv_16.lo t3fv_32.lo t3fv_5.lo \ + t3fv_10.lo t3fv_20.lo t3fv_25.lo +am__objects_10 = t1buv_2.lo t1buv_3.lo t1buv_4.lo t1buv_5.lo \ + t1buv_6.lo t1buv_7.lo t1buv_8.lo t1buv_9.lo t1buv_10.lo +am__objects_11 = t1bv_2.lo t1bv_3.lo t1bv_4.lo t1bv_5.lo t1bv_6.lo \ + t1bv_7.lo t1bv_8.lo t1bv_9.lo t1bv_10.lo t1bv_12.lo t1bv_15.lo \ + t1bv_16.lo t1bv_32.lo t1bv_64.lo t1bv_20.lo t1bv_25.lo +am__objects_12 = t2bv_2.lo t2bv_4.lo t2bv_8.lo t2bv_16.lo t2bv_32.lo \ + t2bv_64.lo t2bv_5.lo t2bv_10.lo t2bv_20.lo t2bv_25.lo +am__objects_13 = t3bv_4.lo t3bv_8.lo t3bv_16.lo t3bv_32.lo t3bv_5.lo \ + t3bv_10.lo t3bv_20.lo t3bv_25.lo +am__objects_14 = t1sv_2.lo t1sv_4.lo t1sv_8.lo t1sv_16.lo t1sv_32.lo +am__objects_15 = t2sv_4.lo t2sv_8.lo t2sv_16.lo t2sv_32.lo +am__objects_16 = q1fv_2.lo q1fv_4.lo q1fv_5.lo q1fv_8.lo +am__objects_17 = q1bv_2.lo q1bv_4.lo q1bv_5.lo q1bv_8.lo +am__objects_18 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \ + $(am__objects_4) $(am__objects_5) $(am__objects_6) \ + $(am__objects_7) $(am__objects_8) $(am__objects_9) \ + $(am__objects_10) $(am__objects_11) $(am__objects_12) \ + $(am__objects_13) $(am__objects_14) $(am__objects_15) \ + $(am__objects_16) $(am__objects_17) +am__objects_19 = $(am__objects_18) genus.lo codlist.lo +@HAVE_ALTIVEC_TRUE@am__objects_20 = $(am__objects_19) +@HAVE_ALTIVEC_TRUE@am_libdft_altivec_codelets_la_OBJECTS = \ +@HAVE_ALTIVEC_TRUE@ $(am__objects_20) +libdft_altivec_codelets_la_OBJECTS = \ + $(am_libdft_altivec_codelets_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +@HAVE_ALTIVEC_TRUE@am_libdft_altivec_codelets_la_rpath = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libdft_altivec_codelets_la_SOURCES) +DIST_SOURCES = $(am__libdft_altivec_codelets_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(ALTIVEC_CFLAGS) +SIMD_HEADER = simd-altivec.h + +########################################################################### +# n1fv_ is a hard-coded FFTW_FORWARD FFT of size , using SIMD +N1F = n1fv_2.c n1fv_3.c n1fv_4.c n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c \ +n1fv_9.c n1fv_10.c n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c \ +n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_20.c n1fv_25.c + + +# as above, with restricted input vector stride +N2F = n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c n2fv_12.c \ +n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_20.c + + +# as above, but FFTW_BACKWARD +N1B = n1bv_2.c n1bv_3.c n1bv_4.c n1bv_5.c n1bv_6.c n1bv_7.c n1bv_8.c \ +n1bv_9.c n1bv_10.c n1bv_11.c n1bv_12.c n1bv_13.c n1bv_14.c n1bv_15.c \ +n1bv_16.c n1bv_32.c n1bv_64.c n1bv_128.c n1bv_20.c n1bv_25.c + +N2B = n2bv_2.c n2bv_4.c n2bv_6.c n2bv_8.c n2bv_10.c n2bv_12.c \ +n2bv_14.c n2bv_16.c n2bv_32.c n2bv_64.c n2bv_20.c + + +# split-complex codelets +N2S = n2sv_4.c n2sv_8.c n2sv_16.c n2sv_32.c n2sv_64.c + +########################################################################### +# t1fv_ is a "twiddle" FFT of size , implementing a radix-r DIT step +# for an FFTW_FORWARD transform, using SIMD +T1F = t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c t1fv_7.c t1fv_8.c \ +t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c t1fv_16.c t1fv_32.c t1fv_64.c \ +t1fv_20.c t1fv_25.c + + +# same as t1fv_*, but with different twiddle storage scheme +T2F = t2fv_2.c t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c t2fv_64.c \ +t2fv_5.c t2fv_10.c t2fv_20.c t2fv_25.c + +T3F = t3fv_4.c t3fv_8.c t3fv_16.c t3fv_32.c t3fv_5.c t3fv_10.c \ +t3fv_20.c t3fv_25.c + +T1FU = t1fuv_2.c t1fuv_3.c t1fuv_4.c t1fuv_5.c t1fuv_6.c t1fuv_7.c \ +t1fuv_8.c t1fuv_9.c t1fuv_10.c + + +# as above, but FFTW_BACKWARD +T1B = t1bv_2.c t1bv_3.c t1bv_4.c t1bv_5.c t1bv_6.c t1bv_7.c t1bv_8.c \ +t1bv_9.c t1bv_10.c t1bv_12.c t1bv_15.c t1bv_16.c t1bv_32.c t1bv_64.c \ +t1bv_20.c t1bv_25.c + + +# same as t1bv_*, but with different twiddle storage scheme +T2B = t2bv_2.c t2bv_4.c t2bv_8.c t2bv_16.c t2bv_32.c t2bv_64.c \ +t2bv_5.c t2bv_10.c t2bv_20.c t2bv_25.c + +T3B = t3bv_4.c t3bv_8.c t3bv_16.c t3bv_32.c t3bv_5.c t3bv_10.c \ +t3bv_20.c t3bv_25.c + +T1BU = t1buv_2.c t1buv_3.c t1buv_4.c t1buv_5.c t1buv_6.c t1buv_7.c \ +t1buv_8.c t1buv_9.c t1buv_10.c + + +# split-complex codelets +T1S = t1sv_2.c t1sv_4.c t1sv_8.c t1sv_16.c t1sv_32.c +T2S = t2sv_4.c t2sv_8.c t2sv_16.c t2sv_32.c + +########################################################################### +# q1fv_ is twiddle FFTW_FORWARD FFTs of size (DIF step), +# where the output is transposed, using SIMD. This is used for +# in-place transposes in sizes that are divisible by ^2. These +# codelets have size ~ ^2, so you should probably not use +# bigger than 8 or so. +Q1F = q1fv_2.c q1fv_4.c q1fv_5.c q1fv_8.c + +# as above, but FFTW_BACKWARD +Q1B = q1bv_2.c q1bv_4.c q1bv_5.c q1bv_8.c + +########################################################################### +SIMD_CODELETS = $(N1F) $(N1B) $(N2F) $(N2B) $(N2S) $(T1FU) $(T1F) \ +$(T2F) $(T3F) $(T1BU) $(T1B) $(T2B) $(T3B) $(T1S) $(T2S) $(Q1F) $(Q1B) + +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft \ +-I$(top_srcdir)/dft/simd -I$(top_srcdir)/simd-support + +EXTRA_DIST = $(SIMD_CODELETS) genus.c codlist.c +@HAVE_ALTIVEC_TRUE@BUILT_SOURCES = $(EXTRA_DIST) +@HAVE_ALTIVEC_TRUE@noinst_LTLIBRARIES = libdft_altivec_codelets.la +@HAVE_ALTIVEC_TRUE@libdft_altivec_codelets_la_SOURCES = $(BUILT_SOURCES) +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/dft/simd/codlist.mk $(top_srcdir)/dft/simd/simd.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu dft/simd/altivec/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu dft/simd/altivec/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(top_srcdir)/dft/simd/codlist.mk $(top_srcdir)/dft/simd/simd.mk: + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libdft_altivec_codelets.la: $(libdft_altivec_codelets_la_OBJECTS) $(libdft_altivec_codelets_la_DEPENDENCIES) $(EXTRA_libdft_altivec_codelets_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(am_libdft_altivec_codelets_la_rpath) $(libdft_altivec_codelets_la_OBJECTS) $(libdft_altivec_codelets_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/codlist.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/genus.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_11.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_128.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_13.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_11.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_128.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_13.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2sv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2sv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2sv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2sv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_8.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: all check install install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + + +$(EXTRA_DIST): Makefile + ( \ + echo "/* Generated automatically. DO NOT EDIT! */"; \ + echo "#define SIMD_HEADER \"$(SIMD_HEADER)\""; \ + echo "#include \"../common/"$*".c\""; \ + ) >$@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/codlist.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/codlist.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/codlist.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/genus.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/genus.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/genus.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_11.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_11.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_11.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_128.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_128.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_128.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_13.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_13.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_13.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_14.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_15.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1bv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1bv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_11.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_11.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_11.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_128.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_128.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_128.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_13.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_13.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_13.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_14.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_15.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n1fv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n1fv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2bv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2bv_14.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2bv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2bv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2fv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2fv_14.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2fv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2fv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2sv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2sv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2sv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2sv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2sv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2sv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2sv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2sv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2sv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2sv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2sv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2sv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/n2sv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/n2sv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/n2sv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/q1bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/q1bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/q1bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/q1bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/q1bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/q1bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/q1bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/q1bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/q1bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/q1bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/q1bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/q1bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/q1fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/q1fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/q1fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/q1fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/q1fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/q1fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/q1fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/q1fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/q1fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/q1fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/q1fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/q1fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1buv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1buv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1buv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1buv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1buv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1buv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1buv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1buv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1buv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1buv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1bv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1bv_15.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1bv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1bv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1bv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1bv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1bv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1bv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1bv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fuv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fuv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fuv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fuv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fuv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fuv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fuv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fuv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fuv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fuv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fv_15.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1fv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1fv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1sv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1sv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1sv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1sv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1sv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1sv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1sv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1sv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1sv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1sv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1sv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1sv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t1sv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t1sv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t1sv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2bv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2bv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2fv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2fv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2sv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2sv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2sv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2sv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2sv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2sv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2sv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2sv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2sv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t2sv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t2sv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t2sv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t3bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t3bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t3bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t3bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t3bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t3bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t3bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t3bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t3bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t3bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t3bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t3bv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t3bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t3bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t3bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t3bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t3bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t3bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t3bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t3bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t3bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t3bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t3bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t3bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t3fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t3fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t3fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t3fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t3fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t3fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t3fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t3fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t3fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t3fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t3fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t3fv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t3fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t3fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t3fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t3fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t3fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t3fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t3fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t3fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t3fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/altivec/t3fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/altivec/t3fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/t3fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,13 @@ +AM_CFLAGS = $(AVX_CFLAGS) +SIMD_HEADER=simd-avx.h + +include $(top_srcdir)/dft/simd/codlist.mk +include $(top_srcdir)/dft/simd/simd.mk + +if HAVE_AVX + +BUILT_SOURCES = $(EXTRA_DIST) +noinst_LTLIBRARIES = libdft_avx_codelets.la +libdft_avx_codelets_la_SOURCES = $(BUILT_SOURCES) + +endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,965 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# This file contains a standard list of DFT SIMD codelets. It is +# included by common/Makefile to generate the C files with the actual +# codelets in them. It is included by {sse,sse2,...}/Makefile to +# generate and compile stub files that include common/*.c + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +DIST_COMMON = $(top_srcdir)/dft/simd/codlist.mk \ + $(top_srcdir)/dft/simd/simd.mk $(srcdir)/Makefile.in \ + $(srcdir)/Makefile.am $(top_srcdir)/depcomp +subdir = dft/simd/avx +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libdft_avx_codelets_la_LIBADD = +am__libdft_avx_codelets_la_SOURCES_DIST = n1fv_2.c n1fv_3.c n1fv_4.c \ + n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c n1fv_9.c n1fv_10.c \ + n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c n1fv_16.c \ + n1fv_32.c n1fv_64.c n1fv_128.c n1fv_20.c n1fv_25.c n1bv_2.c \ + n1bv_3.c n1bv_4.c n1bv_5.c n1bv_6.c n1bv_7.c n1bv_8.c n1bv_9.c \ + n1bv_10.c n1bv_11.c n1bv_12.c n1bv_13.c n1bv_14.c n1bv_15.c \ + n1bv_16.c n1bv_32.c n1bv_64.c n1bv_128.c n1bv_20.c n1bv_25.c \ + n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c n2fv_12.c \ + n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_20.c n2bv_2.c \ + n2bv_4.c n2bv_6.c n2bv_8.c n2bv_10.c n2bv_12.c n2bv_14.c \ + n2bv_16.c n2bv_32.c n2bv_64.c n2bv_20.c n2sv_4.c n2sv_8.c \ + n2sv_16.c n2sv_32.c n2sv_64.c t1fuv_2.c t1fuv_3.c t1fuv_4.c \ + t1fuv_5.c t1fuv_6.c t1fuv_7.c t1fuv_8.c t1fuv_9.c t1fuv_10.c \ + t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c t1fv_7.c t1fv_8.c \ + t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c t1fv_16.c t1fv_32.c \ + t1fv_64.c t1fv_20.c t1fv_25.c t2fv_2.c t2fv_4.c t2fv_8.c \ + t2fv_16.c t2fv_32.c t2fv_64.c t2fv_5.c t2fv_10.c t2fv_20.c \ + t2fv_25.c t3fv_4.c t3fv_8.c t3fv_16.c t3fv_32.c t3fv_5.c \ + t3fv_10.c t3fv_20.c t3fv_25.c t1buv_2.c t1buv_3.c t1buv_4.c \ + t1buv_5.c t1buv_6.c t1buv_7.c t1buv_8.c t1buv_9.c t1buv_10.c \ + t1bv_2.c t1bv_3.c t1bv_4.c t1bv_5.c t1bv_6.c t1bv_7.c t1bv_8.c \ + t1bv_9.c t1bv_10.c t1bv_12.c t1bv_15.c t1bv_16.c t1bv_32.c \ + t1bv_64.c t1bv_20.c t1bv_25.c t2bv_2.c t2bv_4.c t2bv_8.c \ + t2bv_16.c t2bv_32.c t2bv_64.c t2bv_5.c t2bv_10.c t2bv_20.c \ + t2bv_25.c t3bv_4.c t3bv_8.c t3bv_16.c t3bv_32.c t3bv_5.c \ + t3bv_10.c t3bv_20.c t3bv_25.c t1sv_2.c t1sv_4.c t1sv_8.c \ + t1sv_16.c t1sv_32.c t2sv_4.c t2sv_8.c t2sv_16.c t2sv_32.c \ + q1fv_2.c q1fv_4.c q1fv_5.c q1fv_8.c q1bv_2.c q1bv_4.c q1bv_5.c \ + q1bv_8.c genus.c codlist.c +am__objects_1 = n1fv_2.lo n1fv_3.lo n1fv_4.lo n1fv_5.lo n1fv_6.lo \ + n1fv_7.lo n1fv_8.lo n1fv_9.lo n1fv_10.lo n1fv_11.lo n1fv_12.lo \ + n1fv_13.lo n1fv_14.lo n1fv_15.lo n1fv_16.lo n1fv_32.lo \ + n1fv_64.lo n1fv_128.lo n1fv_20.lo n1fv_25.lo +am__objects_2 = n1bv_2.lo n1bv_3.lo n1bv_4.lo n1bv_5.lo n1bv_6.lo \ + n1bv_7.lo n1bv_8.lo n1bv_9.lo n1bv_10.lo n1bv_11.lo n1bv_12.lo \ + n1bv_13.lo n1bv_14.lo n1bv_15.lo n1bv_16.lo n1bv_32.lo \ + n1bv_64.lo n1bv_128.lo n1bv_20.lo n1bv_25.lo +am__objects_3 = n2fv_2.lo n2fv_4.lo n2fv_6.lo n2fv_8.lo n2fv_10.lo \ + n2fv_12.lo n2fv_14.lo n2fv_16.lo n2fv_32.lo n2fv_64.lo \ + n2fv_20.lo +am__objects_4 = n2bv_2.lo n2bv_4.lo n2bv_6.lo n2bv_8.lo n2bv_10.lo \ + n2bv_12.lo n2bv_14.lo n2bv_16.lo n2bv_32.lo n2bv_64.lo \ + n2bv_20.lo +am__objects_5 = n2sv_4.lo n2sv_8.lo n2sv_16.lo n2sv_32.lo n2sv_64.lo +am__objects_6 = t1fuv_2.lo t1fuv_3.lo t1fuv_4.lo t1fuv_5.lo t1fuv_6.lo \ + t1fuv_7.lo t1fuv_8.lo t1fuv_9.lo t1fuv_10.lo +am__objects_7 = t1fv_2.lo t1fv_3.lo t1fv_4.lo t1fv_5.lo t1fv_6.lo \ + t1fv_7.lo t1fv_8.lo t1fv_9.lo t1fv_10.lo t1fv_12.lo t1fv_15.lo \ + t1fv_16.lo t1fv_32.lo t1fv_64.lo t1fv_20.lo t1fv_25.lo +am__objects_8 = t2fv_2.lo t2fv_4.lo t2fv_8.lo t2fv_16.lo t2fv_32.lo \ + t2fv_64.lo t2fv_5.lo t2fv_10.lo t2fv_20.lo t2fv_25.lo +am__objects_9 = t3fv_4.lo t3fv_8.lo t3fv_16.lo t3fv_32.lo t3fv_5.lo \ + t3fv_10.lo t3fv_20.lo t3fv_25.lo +am__objects_10 = t1buv_2.lo t1buv_3.lo t1buv_4.lo t1buv_5.lo \ + t1buv_6.lo t1buv_7.lo t1buv_8.lo t1buv_9.lo t1buv_10.lo +am__objects_11 = t1bv_2.lo t1bv_3.lo t1bv_4.lo t1bv_5.lo t1bv_6.lo \ + t1bv_7.lo t1bv_8.lo t1bv_9.lo t1bv_10.lo t1bv_12.lo t1bv_15.lo \ + t1bv_16.lo t1bv_32.lo t1bv_64.lo t1bv_20.lo t1bv_25.lo +am__objects_12 = t2bv_2.lo t2bv_4.lo t2bv_8.lo t2bv_16.lo t2bv_32.lo \ + t2bv_64.lo t2bv_5.lo t2bv_10.lo t2bv_20.lo t2bv_25.lo +am__objects_13 = t3bv_4.lo t3bv_8.lo t3bv_16.lo t3bv_32.lo t3bv_5.lo \ + t3bv_10.lo t3bv_20.lo t3bv_25.lo +am__objects_14 = t1sv_2.lo t1sv_4.lo t1sv_8.lo t1sv_16.lo t1sv_32.lo +am__objects_15 = t2sv_4.lo t2sv_8.lo t2sv_16.lo t2sv_32.lo +am__objects_16 = q1fv_2.lo q1fv_4.lo q1fv_5.lo q1fv_8.lo +am__objects_17 = q1bv_2.lo q1bv_4.lo q1bv_5.lo q1bv_8.lo +am__objects_18 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \ + $(am__objects_4) $(am__objects_5) $(am__objects_6) \ + $(am__objects_7) $(am__objects_8) $(am__objects_9) \ + $(am__objects_10) $(am__objects_11) $(am__objects_12) \ + $(am__objects_13) $(am__objects_14) $(am__objects_15) \ + $(am__objects_16) $(am__objects_17) +am__objects_19 = $(am__objects_18) genus.lo codlist.lo +@HAVE_AVX_TRUE@am__objects_20 = $(am__objects_19) +@HAVE_AVX_TRUE@am_libdft_avx_codelets_la_OBJECTS = $(am__objects_20) +libdft_avx_codelets_la_OBJECTS = $(am_libdft_avx_codelets_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +@HAVE_AVX_TRUE@am_libdft_avx_codelets_la_rpath = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libdft_avx_codelets_la_SOURCES) +DIST_SOURCES = $(am__libdft_avx_codelets_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(AVX_CFLAGS) +SIMD_HEADER = simd-avx.h + +########################################################################### +# n1fv_ is a hard-coded FFTW_FORWARD FFT of size , using SIMD +N1F = n1fv_2.c n1fv_3.c n1fv_4.c n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c \ +n1fv_9.c n1fv_10.c n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c \ +n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_20.c n1fv_25.c + + +# as above, with restricted input vector stride +N2F = n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c n2fv_12.c \ +n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_20.c + + +# as above, but FFTW_BACKWARD +N1B = n1bv_2.c n1bv_3.c n1bv_4.c n1bv_5.c n1bv_6.c n1bv_7.c n1bv_8.c \ +n1bv_9.c n1bv_10.c n1bv_11.c n1bv_12.c n1bv_13.c n1bv_14.c n1bv_15.c \ +n1bv_16.c n1bv_32.c n1bv_64.c n1bv_128.c n1bv_20.c n1bv_25.c + +N2B = n2bv_2.c n2bv_4.c n2bv_6.c n2bv_8.c n2bv_10.c n2bv_12.c \ +n2bv_14.c n2bv_16.c n2bv_32.c n2bv_64.c n2bv_20.c + + +# split-complex codelets +N2S = n2sv_4.c n2sv_8.c n2sv_16.c n2sv_32.c n2sv_64.c + +########################################################################### +# t1fv_ is a "twiddle" FFT of size , implementing a radix-r DIT step +# for an FFTW_FORWARD transform, using SIMD +T1F = t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c t1fv_7.c t1fv_8.c \ +t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c t1fv_16.c t1fv_32.c t1fv_64.c \ +t1fv_20.c t1fv_25.c + + +# same as t1fv_*, but with different twiddle storage scheme +T2F = t2fv_2.c t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c t2fv_64.c \ +t2fv_5.c t2fv_10.c t2fv_20.c t2fv_25.c + +T3F = t3fv_4.c t3fv_8.c t3fv_16.c t3fv_32.c t3fv_5.c t3fv_10.c \ +t3fv_20.c t3fv_25.c + +T1FU = t1fuv_2.c t1fuv_3.c t1fuv_4.c t1fuv_5.c t1fuv_6.c t1fuv_7.c \ +t1fuv_8.c t1fuv_9.c t1fuv_10.c + + +# as above, but FFTW_BACKWARD +T1B = t1bv_2.c t1bv_3.c t1bv_4.c t1bv_5.c t1bv_6.c t1bv_7.c t1bv_8.c \ +t1bv_9.c t1bv_10.c t1bv_12.c t1bv_15.c t1bv_16.c t1bv_32.c t1bv_64.c \ +t1bv_20.c t1bv_25.c + + +# same as t1bv_*, but with different twiddle storage scheme +T2B = t2bv_2.c t2bv_4.c t2bv_8.c t2bv_16.c t2bv_32.c t2bv_64.c \ +t2bv_5.c t2bv_10.c t2bv_20.c t2bv_25.c + +T3B = t3bv_4.c t3bv_8.c t3bv_16.c t3bv_32.c t3bv_5.c t3bv_10.c \ +t3bv_20.c t3bv_25.c + +T1BU = t1buv_2.c t1buv_3.c t1buv_4.c t1buv_5.c t1buv_6.c t1buv_7.c \ +t1buv_8.c t1buv_9.c t1buv_10.c + + +# split-complex codelets +T1S = t1sv_2.c t1sv_4.c t1sv_8.c t1sv_16.c t1sv_32.c +T2S = t2sv_4.c t2sv_8.c t2sv_16.c t2sv_32.c + +########################################################################### +# q1fv_ is twiddle FFTW_FORWARD FFTs of size (DIF step), +# where the output is transposed, using SIMD. This is used for +# in-place transposes in sizes that are divisible by ^2. These +# codelets have size ~ ^2, so you should probably not use +# bigger than 8 or so. +Q1F = q1fv_2.c q1fv_4.c q1fv_5.c q1fv_8.c + +# as above, but FFTW_BACKWARD +Q1B = q1bv_2.c q1bv_4.c q1bv_5.c q1bv_8.c + +########################################################################### +SIMD_CODELETS = $(N1F) $(N1B) $(N2F) $(N2B) $(N2S) $(T1FU) $(T1F) \ +$(T2F) $(T3F) $(T1BU) $(T1B) $(T2B) $(T3B) $(T1S) $(T2S) $(Q1F) $(Q1B) + +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft \ +-I$(top_srcdir)/dft/simd -I$(top_srcdir)/simd-support + +EXTRA_DIST = $(SIMD_CODELETS) genus.c codlist.c +@HAVE_AVX_TRUE@BUILT_SOURCES = $(EXTRA_DIST) +@HAVE_AVX_TRUE@noinst_LTLIBRARIES = libdft_avx_codelets.la +@HAVE_AVX_TRUE@libdft_avx_codelets_la_SOURCES = $(BUILT_SOURCES) +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/dft/simd/codlist.mk $(top_srcdir)/dft/simd/simd.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu dft/simd/avx/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu dft/simd/avx/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(top_srcdir)/dft/simd/codlist.mk $(top_srcdir)/dft/simd/simd.mk: + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libdft_avx_codelets.la: $(libdft_avx_codelets_la_OBJECTS) $(libdft_avx_codelets_la_DEPENDENCIES) $(EXTRA_libdft_avx_codelets_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(am_libdft_avx_codelets_la_rpath) $(libdft_avx_codelets_la_OBJECTS) $(libdft_avx_codelets_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/codlist.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/genus.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_11.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_128.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_13.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_11.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_128.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_13.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2sv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2sv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2sv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2sv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_8.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: all check install install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + + +$(EXTRA_DIST): Makefile + ( \ + echo "/* Generated automatically. DO NOT EDIT! */"; \ + echo "#define SIMD_HEADER \"$(SIMD_HEADER)\""; \ + echo "#include \"../common/"$*".c\""; \ + ) >$@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/codlist.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/codlist.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/codlist.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/genus.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/genus.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/genus.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_11.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_11.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_11.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_128.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_128.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_128.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_13.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_13.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_13.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_14.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_15.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1bv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1bv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_11.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_11.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_11.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_128.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_128.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_128.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_13.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_13.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_13.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_14.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_15.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n1fv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n1fv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2bv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2bv_14.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2bv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2bv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2fv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2fv_14.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2fv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2fv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2sv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2sv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2sv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2sv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2sv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2sv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2sv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2sv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2sv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2sv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2sv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2sv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/n2sv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/n2sv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/n2sv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/q1bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/q1bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/q1bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/q1bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/q1bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/q1bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/q1bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/q1bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/q1bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/q1bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/q1bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/q1bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/q1fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/q1fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/q1fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/q1fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/q1fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/q1fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/q1fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/q1fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/q1fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/q1fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/q1fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/q1fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1buv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1buv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1buv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1buv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1buv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1buv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1buv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1buv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1buv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1buv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1bv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1bv_15.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1bv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1bv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1bv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1bv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1bv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1bv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1bv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fuv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fuv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fuv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fuv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fuv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fuv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fuv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fuv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fuv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fuv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fv_15.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1fv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1fv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1sv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1sv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1sv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1sv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1sv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1sv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1sv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1sv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1sv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1sv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1sv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1sv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t1sv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t1sv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t1sv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2bv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2bv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2fv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2fv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2sv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2sv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2sv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2sv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2sv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2sv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2sv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2sv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2sv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t2sv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t2sv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t2sv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t3bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t3bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t3bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t3bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t3bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t3bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t3bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t3bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t3bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t3bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t3bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t3bv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t3bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t3bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t3bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t3bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t3bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t3bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t3bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t3bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t3bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t3bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t3bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t3bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t3fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t3fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t3fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t3fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t3fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t3fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t3fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t3fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t3fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t3fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t3fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t3fv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t3fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t3fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t3fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t3fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t3fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t3fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t3fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t3fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t3fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/avx/t3fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/avx/t3fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/t3fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/codlist.mk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/codlist.mk Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,79 @@ +# This file contains a standard list of DFT SIMD codelets. It is +# included by common/Makefile to generate the C files with the actual +# codelets in them. It is included by {sse,sse2,...}/Makefile to +# generate and compile stub files that include common/*.c + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +########################################################################### +# n1fv_ is a hard-coded FFTW_FORWARD FFT of size , using SIMD +N1F = n1fv_2.c n1fv_3.c n1fv_4.c n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c \ +n1fv_9.c n1fv_10.c n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c \ +n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_20.c n1fv_25.c + +# as above, with restricted input vector stride +N2F = n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c n2fv_12.c \ +n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_20.c + +# as above, but FFTW_BACKWARD +N1B = n1bv_2.c n1bv_3.c n1bv_4.c n1bv_5.c n1bv_6.c n1bv_7.c n1bv_8.c \ +n1bv_9.c n1bv_10.c n1bv_11.c n1bv_12.c n1bv_13.c n1bv_14.c n1bv_15.c \ +n1bv_16.c n1bv_32.c n1bv_64.c n1bv_128.c n1bv_20.c n1bv_25.c + +N2B = n2bv_2.c n2bv_4.c n2bv_6.c n2bv_8.c n2bv_10.c n2bv_12.c \ +n2bv_14.c n2bv_16.c n2bv_32.c n2bv_64.c n2bv_20.c + +# split-complex codelets +N2S = n2sv_4.c n2sv_8.c n2sv_16.c n2sv_32.c n2sv_64.c + +########################################################################### +# t1fv_ is a "twiddle" FFT of size , implementing a radix-r DIT step +# for an FFTW_FORWARD transform, using SIMD +T1F = t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c t1fv_7.c t1fv_8.c \ +t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c t1fv_16.c t1fv_32.c t1fv_64.c \ +t1fv_20.c t1fv_25.c + +# same as t1fv_*, but with different twiddle storage scheme +T2F = t2fv_2.c t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c t2fv_64.c \ +t2fv_5.c t2fv_10.c t2fv_20.c t2fv_25.c +T3F = t3fv_4.c t3fv_8.c t3fv_16.c t3fv_32.c t3fv_5.c t3fv_10.c \ +t3fv_20.c t3fv_25.c +T1FU = t1fuv_2.c t1fuv_3.c t1fuv_4.c t1fuv_5.c t1fuv_6.c t1fuv_7.c \ +t1fuv_8.c t1fuv_9.c t1fuv_10.c + +# as above, but FFTW_BACKWARD +T1B = t1bv_2.c t1bv_3.c t1bv_4.c t1bv_5.c t1bv_6.c t1bv_7.c t1bv_8.c \ +t1bv_9.c t1bv_10.c t1bv_12.c t1bv_15.c t1bv_16.c t1bv_32.c t1bv_64.c \ +t1bv_20.c t1bv_25.c + +# same as t1bv_*, but with different twiddle storage scheme +T2B = t2bv_2.c t2bv_4.c t2bv_8.c t2bv_16.c t2bv_32.c t2bv_64.c \ +t2bv_5.c t2bv_10.c t2bv_20.c t2bv_25.c +T3B = t3bv_4.c t3bv_8.c t3bv_16.c t3bv_32.c t3bv_5.c t3bv_10.c \ +t3bv_20.c t3bv_25.c +T1BU = t1buv_2.c t1buv_3.c t1buv_4.c t1buv_5.c t1buv_6.c t1buv_7.c \ +t1buv_8.c t1buv_9.c t1buv_10.c + +# split-complex codelets +T1S = t1sv_2.c t1sv_4.c t1sv_8.c t1sv_16.c t1sv_32.c +T2S = t2sv_4.c t2sv_8.c t2sv_16.c t2sv_32.c + +########################################################################### +# q1fv_ is twiddle FFTW_FORWARD FFTs of size (DIF step), +# where the output is transposed, using SIMD. This is used for +# in-place transposes in sizes that are divisible by ^2. These +# codelets have size ~ ^2, so you should probably not use +# bigger than 8 or so. +Q1F = q1fv_2.c q1fv_4.c q1fv_5.c q1fv_8.c + +# as above, but FFTW_BACKWARD +Q1B = q1bv_2.c q1bv_4.c q1bv_5.c q1bv_8.c + +########################################################################### +SIMD_CODELETS = $(N1F) $(N1B) $(N2F) $(N2B) $(N2S) $(T1FU) $(T1F) \ +$(T2F) $(T3F) $(T1BU) $(T1B) $(T2B) $(T3B) $(T1S) $(T2S) $(Q1F) $(Q1B) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,73 @@ +# include the list of codelets + +include $(top_srcdir)/dft/simd/codlist.mk + +ALL_CODELETS = $(SIMD_CODELETS) +BUILT_SOURCES= $(SIMD_CODELETS) $(CODLIST) +EXTRA_DIST = $(BUILT_SOURCES) genus.c +INCLUDE_SIMD_HEADER="\#include SIMD_HEADER" +XRENAME=XSIMD +SOLVTAB_NAME = XSIMD(solvtab_dft) + +# include special rules for regenerating codelets. +include $(top_srcdir)/support/Makefile.codelets + +if MAINTAINER_MODE + +GFLAGS = -simd $(FLAGS_COMMON) -pipeline-latency 8 +FLAGS_T2S=-twiddle-log3 -precompute-twiddles +FLAGS_T3=-twiddle-log3 -precompute-twiddles -no-generate-bytw + +n1fv_%.c: $(CODELET_DEPS) $(GEN_NOTW_C) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_NOTW_C) $(GFLAGS) -n $* -name n1fv_$* -include "n1f.h") | $(ADD_DATE) | $(INDENT) >$@ + +n2fv_%.c: $(CODELET_DEPS) $(GEN_NOTW_C) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_NOTW_C) $(GFLAGS) -n $* -name n2fv_$* -with-ostride 2 -include "n2f.h" -store-multiple 2) | $(ADD_DATE) | $(INDENT) >$@ + +n1bv_%.c: $(CODELET_DEPS) $(GEN_NOTW_C) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_NOTW_C) $(GFLAGS) -sign 1 -n $* -name n1bv_$* -include "n1b.h") | $(ADD_DATE) | $(INDENT) >$@ + +n2bv_%.c: $(CODELET_DEPS) $(GEN_NOTW_C) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_NOTW_C) $(GFLAGS) -sign 1 -n $* -name n2bv_$* -with-ostride 2 -include "n2b.h" -store-multiple 2) | $(ADD_DATE) | $(INDENT) >$@ + +n2sv_%.c: $(CODELET_DEPS) $(GEN_NOTW) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_NOTW) $(GFLAGS) -n $* -name n2sv_$* -with-ostride 1 -include "n2s.h" -store-multiple 4) | $(ADD_DATE) | $(INDENT) >$@ + +t1fv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) -n $* -name t1fv_$* -include "t1f.h") | $(ADD_DATE) | $(INDENT) >$@ + +t1fuv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) -n $* -name t1fuv_$* -include "t1fu.h") | $(ADD_DATE) | $(INDENT) >$@ + +t2fv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) -n $* -name t2fv_$* -include "t2f.h") | $(ADD_DATE) | $(INDENT) >$@ + +t3fv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) $(FLAGS_T3) -n $* -name t3fv_$* -include "t3f.h") | $(ADD_DATE) | $(INDENT) >$@ + +t1bv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) -n $* -name t1bv_$* -include "t1b.h" -sign 1) | $(ADD_DATE) | $(INDENT) >$@ + +t1buv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) -n $* -name t1buv_$* -include "t1bu.h" -sign 1) | $(ADD_DATE) | $(INDENT) >$@ + +t2bv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) -n $* -name t2bv_$* -include "t2b.h" -sign 1) | $(ADD_DATE) | $(INDENT) >$@ + +t3bv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) $(FLAGS_T3) -n $* -name t3bv_$* -include "t3b.h" -sign 1) | $(ADD_DATE) | $(INDENT) >$@ + +t1sv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE) $(GFLAGS) -n $* -name t1sv_$* -include "ts.h") | $(ADD_DATE) | $(INDENT) >$@ + +t2sv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE) $(GFLAGS) $(FLAGS_T2S) -n $* -name t2sv_$* -include "ts.h") | $(ADD_DATE) | $(INDENT) >$@ + +q1fv_%.c: $(CODELET_DEPS) $(GEN_TWIDSQ_C) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDSQ_C) $(GFLAGS) -n $* -dif -name q1fv_$* -include "q1f.h") | $(ADD_DATE) | $(INDENT) >$@ + +q1bv_%.c: $(CODELET_DEPS) $(GEN_TWIDSQ_C) + ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDSQ_C) $(GFLAGS) -n $* -dif -name q1bv_$* -include "q1b.h" -sign 1) | $(ADD_DATE) | $(INDENT) >$@ + + +endif # MAINTAINER_MODE diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,692 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# include the list of codelets + +# This file contains a standard list of DFT SIMD codelets. It is +# included by common/Makefile to generate the C files with the actual +# codelets in them. It is included by {sse,sse2,...}/Makefile to +# generate and compile stub files that include common/*.c + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +# -*- makefile -*- +# This file contains special make rules to generate codelets. +# Most of this file requires GNU make . +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +DIST_COMMON = $(top_srcdir)/dft/simd/codlist.mk \ + $(top_srcdir)/support/Makefile.codelets $(srcdir)/Makefile.in \ + $(srcdir)/Makefile.am +subdir = dft/simd/common +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ + +########################################################################### +# n1fv_ is a hard-coded FFTW_FORWARD FFT of size , using SIMD +N1F = n1fv_2.c n1fv_3.c n1fv_4.c n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c \ +n1fv_9.c n1fv_10.c n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c \ +n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_20.c n1fv_25.c + + +# as above, with restricted input vector stride +N2F = n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c n2fv_12.c \ +n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_20.c + + +# as above, but FFTW_BACKWARD +N1B = n1bv_2.c n1bv_3.c n1bv_4.c n1bv_5.c n1bv_6.c n1bv_7.c n1bv_8.c \ +n1bv_9.c n1bv_10.c n1bv_11.c n1bv_12.c n1bv_13.c n1bv_14.c n1bv_15.c \ +n1bv_16.c n1bv_32.c n1bv_64.c n1bv_128.c n1bv_20.c n1bv_25.c + +N2B = n2bv_2.c n2bv_4.c n2bv_6.c n2bv_8.c n2bv_10.c n2bv_12.c \ +n2bv_14.c n2bv_16.c n2bv_32.c n2bv_64.c n2bv_20.c + + +# split-complex codelets +N2S = n2sv_4.c n2sv_8.c n2sv_16.c n2sv_32.c n2sv_64.c + +########################################################################### +# t1fv_ is a "twiddle" FFT of size , implementing a radix-r DIT step +# for an FFTW_FORWARD transform, using SIMD +T1F = t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c t1fv_7.c t1fv_8.c \ +t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c t1fv_16.c t1fv_32.c t1fv_64.c \ +t1fv_20.c t1fv_25.c + + +# same as t1fv_*, but with different twiddle storage scheme +T2F = t2fv_2.c t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c t2fv_64.c \ +t2fv_5.c t2fv_10.c t2fv_20.c t2fv_25.c + +T3F = t3fv_4.c t3fv_8.c t3fv_16.c t3fv_32.c t3fv_5.c t3fv_10.c \ +t3fv_20.c t3fv_25.c + +T1FU = t1fuv_2.c t1fuv_3.c t1fuv_4.c t1fuv_5.c t1fuv_6.c t1fuv_7.c \ +t1fuv_8.c t1fuv_9.c t1fuv_10.c + + +# as above, but FFTW_BACKWARD +T1B = t1bv_2.c t1bv_3.c t1bv_4.c t1bv_5.c t1bv_6.c t1bv_7.c t1bv_8.c \ +t1bv_9.c t1bv_10.c t1bv_12.c t1bv_15.c t1bv_16.c t1bv_32.c t1bv_64.c \ +t1bv_20.c t1bv_25.c + + +# same as t1bv_*, but with different twiddle storage scheme +T2B = t2bv_2.c t2bv_4.c t2bv_8.c t2bv_16.c t2bv_32.c t2bv_64.c \ +t2bv_5.c t2bv_10.c t2bv_20.c t2bv_25.c + +T3B = t3bv_4.c t3bv_8.c t3bv_16.c t3bv_32.c t3bv_5.c t3bv_10.c \ +t3bv_20.c t3bv_25.c + +T1BU = t1buv_2.c t1buv_3.c t1buv_4.c t1buv_5.c t1buv_6.c t1buv_7.c \ +t1buv_8.c t1buv_9.c t1buv_10.c + + +# split-complex codelets +T1S = t1sv_2.c t1sv_4.c t1sv_8.c t1sv_16.c t1sv_32.c +T2S = t2sv_4.c t2sv_8.c t2sv_16.c t2sv_32.c + +########################################################################### +# q1fv_ is twiddle FFTW_FORWARD FFTs of size (DIF step), +# where the output is transposed, using SIMD. This is used for +# in-place transposes in sizes that are divisible by ^2. These +# codelets have size ~ ^2, so you should probably not use +# bigger than 8 or so. +Q1F = q1fv_2.c q1fv_4.c q1fv_5.c q1fv_8.c + +# as above, but FFTW_BACKWARD +Q1B = q1bv_2.c q1bv_4.c q1bv_5.c q1bv_8.c + +########################################################################### +SIMD_CODELETS = $(N1F) $(N1B) $(N2F) $(N2B) $(N2S) $(T1FU) $(T1F) \ +$(T2F) $(T3F) $(T1BU) $(T1B) $(T2B) $(T3B) $(T1S) $(T2S) $(Q1F) $(Q1B) + +ALL_CODELETS = $(SIMD_CODELETS) +BUILT_SOURCES = $(SIMD_CODELETS) $(CODLIST) +EXTRA_DIST = $(BUILT_SOURCES) genus.c +INCLUDE_SIMD_HEADER = "\#include SIMD_HEADER" +XRENAME = XSIMD +SOLVTAB_NAME = XSIMD(solvtab_dft) +CODLIST = codlist.c +CODELET_NAME = codelet_ +@MAINTAINER_MODE_TRUE@INDENT = indent -kr -cs -i5 -l800 -fca -nfc1 -sc -sob -cli4 -TR -Tplanner -TV +@MAINTAINER_MODE_TRUE@TWOVERS = sh ${top_srcdir}/support/twovers.sh +@MAINTAINER_MODE_TRUE@GENFFTDIR = ${top_builddir}/genfft +@MAINTAINER_MODE_TRUE@GEN_NOTW = ${GENFFTDIR}/gen_notw.native +@MAINTAINER_MODE_TRUE@GEN_NOTW_C = ${GENFFTDIR}/gen_notw_c.native +@MAINTAINER_MODE_TRUE@GEN_TWIDDLE = ${GENFFTDIR}/gen_twiddle.native +@MAINTAINER_MODE_TRUE@GEN_TWIDDLE_C = ${GENFFTDIR}/gen_twiddle_c.native +@MAINTAINER_MODE_TRUE@GEN_TWIDSQ = ${GENFFTDIR}/gen_twidsq.native +@MAINTAINER_MODE_TRUE@GEN_TWIDSQ_C = ${GENFFTDIR}/gen_twidsq_c.native +@MAINTAINER_MODE_TRUE@GEN_R2CF = ${GENFFTDIR}/gen_r2cf.native +@MAINTAINER_MODE_TRUE@GEN_R2CB = ${GENFFTDIR}/gen_r2cb.native +@MAINTAINER_MODE_TRUE@GEN_HC2HC = ${GENFFTDIR}/gen_hc2hc.native +@MAINTAINER_MODE_TRUE@GEN_HC2C = ${GENFFTDIR}/gen_hc2c.native +@MAINTAINER_MODE_TRUE@GEN_HC2CDFT = ${GENFFTDIR}/gen_hc2cdft.native +@MAINTAINER_MODE_TRUE@GEN_HC2CDFT_C = ${GENFFTDIR}/gen_hc2cdft_c.native +@MAINTAINER_MODE_TRUE@GEN_R2R = ${GENFFTDIR}/gen_r2r.native +@MAINTAINER_MODE_TRUE@PRELUDE_DFT = ${top_srcdir}/support/codelet_prelude.dft +@MAINTAINER_MODE_TRUE@PRELUDE_RDFT = ${top_srcdir}/support/codelet_prelude.rdft +@MAINTAINER_MODE_TRUE@ADD_DATE = sed -e s/@DATE@/"`date`"/ +@MAINTAINER_MODE_TRUE@COPYRIGHT = ${top_srcdir}/COPYRIGHT +@MAINTAINER_MODE_TRUE@CODELET_DEPS = $(COPYRIGHT) $(PRELUDE) +@MAINTAINER_MODE_TRUE@PRELUDE_COMMANDS_DFT = cat $(COPYRIGHT) $(PRELUDE_DFT) +@MAINTAINER_MODE_TRUE@PRELUDE_COMMANDS_RDFT = cat $(COPYRIGHT) $(PRELUDE_RDFT) +@MAINTAINER_MODE_TRUE@FLAGS_COMMON = -compact -variables 4 +@MAINTAINER_MODE_TRUE@DFT_FLAGS_COMMON = $(FLAGS_COMMON) -pipeline-latency 4 +@MAINTAINER_MODE_TRUE@RDFT_FLAGS_COMMON = $(FLAGS_COMMON) -pipeline-latency 4 + +# include special rules for regenerating codelets. +@MAINTAINER_MODE_TRUE@GFLAGS = -simd $(FLAGS_COMMON) -pipeline-latency 8 +@MAINTAINER_MODE_TRUE@FLAGS_T2S = -twiddle-log3 -precompute-twiddles +@MAINTAINER_MODE_TRUE@FLAGS_T3 = -twiddle-log3 -precompute-twiddles -no-generate-bytw +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/dft/simd/codlist.mk $(top_srcdir)/support/Makefile.codelets $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu dft/simd/common/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu dft/simd/common/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(top_srcdir)/dft/simd/codlist.mk $(top_srcdir)/support/Makefile.codelets: + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile +installdirs: +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic \ + maintainer-clean-local + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: all check install install-am install-strip + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic \ + maintainer-clean-local mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ + uninstall-am + + +# rule to build codlist +$(CODLIST): Makefile + ( \ + echo "#include \"ifftw.h\""; \ + echo $(INCLUDE_SIMD_HEADER); \ + echo; \ + for i in $(ALL_CODELETS) NIL; do \ + if test "$$i" != NIL; then \ + j=`basename $$i | sed -e 's/[.][cS]$$//g'`; \ + echo "extern void $(XRENAME)($(CODELET_NAME)$$j)(planner *);"; \ + fi \ + done; \ + echo; \ + echo; \ + echo "extern const solvtab $(SOLVTAB_NAME);"; \ + echo "const solvtab $(SOLVTAB_NAME) = {"; \ + for i in $(ALL_CODELETS) NIL; do \ + if test "$$i" != NIL; then \ + j=`basename $$i | sed -e 's/[.][cS]$$//g'`; \ + echo " SOLVTAB($(XRENAME)($(CODELET_NAME)$$j)),"; \ + fi \ + done; \ + echo " SOLVTAB_END"; \ + echo "};"; \ + ) >$@ + +# only delete codlist.c in maintainer-mode, since it is included in the dist +# FIXME: is there a way to delete in 'make clean' only when builddir != srcdir? +maintainer-clean-local: + rm -f $(CODLIST) + +# cancel the hideous builtin rules that cause an infinite loop +@MAINTAINER_MODE_TRUE@%: %.o +@MAINTAINER_MODE_TRUE@%: %.s +@MAINTAINER_MODE_TRUE@%: %.c +@MAINTAINER_MODE_TRUE@%: %.S + +@MAINTAINER_MODE_TRUE@n1fv_%.c: $(CODELET_DEPS) $(GEN_NOTW_C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_NOTW_C) $(GFLAGS) -n $* -name n1fv_$* -include "n1f.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@n2fv_%.c: $(CODELET_DEPS) $(GEN_NOTW_C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_NOTW_C) $(GFLAGS) -n $* -name n2fv_$* -with-ostride 2 -include "n2f.h" -store-multiple 2) | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@n1bv_%.c: $(CODELET_DEPS) $(GEN_NOTW_C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_NOTW_C) $(GFLAGS) -sign 1 -n $* -name n1bv_$* -include "n1b.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@n2bv_%.c: $(CODELET_DEPS) $(GEN_NOTW_C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_NOTW_C) $(GFLAGS) -sign 1 -n $* -name n2bv_$* -with-ostride 2 -include "n2b.h" -store-multiple 2) | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@n2sv_%.c: $(CODELET_DEPS) $(GEN_NOTW) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_NOTW) $(GFLAGS) -n $* -name n2sv_$* -with-ostride 1 -include "n2s.h" -store-multiple 4) | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@t1fv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) -n $* -name t1fv_$* -include "t1f.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@t1fuv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) -n $* -name t1fuv_$* -include "t1fu.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@t2fv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) -n $* -name t2fv_$* -include "t2f.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@t3fv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) $(FLAGS_T3) -n $* -name t3fv_$* -include "t3f.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@t1bv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) -n $* -name t1bv_$* -include "t1b.h" -sign 1) | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@t1buv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) -n $* -name t1buv_$* -include "t1bu.h" -sign 1) | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@t2bv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) -n $* -name t2bv_$* -include "t2b.h" -sign 1) | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@t3bv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE_C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE_C) $(GFLAGS) $(FLAGS_T3) -n $* -name t3bv_$* -include "t3b.h" -sign 1) | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@t1sv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE) $(GFLAGS) -n $* -name t1sv_$* -include "ts.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@t2sv_%.c: $(CODELET_DEPS) $(GEN_TWIDDLE) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDDLE) $(GFLAGS) $(FLAGS_T2S) -n $* -name t2sv_$* -include "ts.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@q1fv_%.c: $(CODELET_DEPS) $(GEN_TWIDSQ_C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDSQ_C) $(GFLAGS) -n $* -dif -name q1fv_$* -include "q1f.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@q1bv_%.c: $(CODELET_DEPS) $(GEN_TWIDSQ_C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_DFT); $(TWOVERS) $(GEN_TWIDSQ_C) $(GFLAGS) -n $* -dif -name q1bv_$* -include "q1b.h" -sign 1) | $(ADD_DATE) | $(INDENT) >$@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/codlist.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/codlist.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,349 @@ +#include "ifftw.h" +#include SIMD_HEADER + +extern void XSIMD(codelet_n1fv_2)(planner *); +extern void XSIMD(codelet_n1fv_3)(planner *); +extern void XSIMD(codelet_n1fv_4)(planner *); +extern void XSIMD(codelet_n1fv_5)(planner *); +extern void XSIMD(codelet_n1fv_6)(planner *); +extern void XSIMD(codelet_n1fv_7)(planner *); +extern void XSIMD(codelet_n1fv_8)(planner *); +extern void XSIMD(codelet_n1fv_9)(planner *); +extern void XSIMD(codelet_n1fv_10)(planner *); +extern void XSIMD(codelet_n1fv_11)(planner *); +extern void XSIMD(codelet_n1fv_12)(planner *); +extern void XSIMD(codelet_n1fv_13)(planner *); +extern void XSIMD(codelet_n1fv_14)(planner *); +extern void XSIMD(codelet_n1fv_15)(planner *); +extern void XSIMD(codelet_n1fv_16)(planner *); +extern void XSIMD(codelet_n1fv_32)(planner *); +extern void XSIMD(codelet_n1fv_64)(planner *); +extern void XSIMD(codelet_n1fv_128)(planner *); +extern void XSIMD(codelet_n1fv_20)(planner *); +extern void XSIMD(codelet_n1fv_25)(planner *); +extern void XSIMD(codelet_n1bv_2)(planner *); +extern void XSIMD(codelet_n1bv_3)(planner *); +extern void XSIMD(codelet_n1bv_4)(planner *); +extern void XSIMD(codelet_n1bv_5)(planner *); +extern void XSIMD(codelet_n1bv_6)(planner *); +extern void XSIMD(codelet_n1bv_7)(planner *); +extern void XSIMD(codelet_n1bv_8)(planner *); +extern void XSIMD(codelet_n1bv_9)(planner *); +extern void XSIMD(codelet_n1bv_10)(planner *); +extern void XSIMD(codelet_n1bv_11)(planner *); +extern void XSIMD(codelet_n1bv_12)(planner *); +extern void XSIMD(codelet_n1bv_13)(planner *); +extern void XSIMD(codelet_n1bv_14)(planner *); +extern void XSIMD(codelet_n1bv_15)(planner *); +extern void XSIMD(codelet_n1bv_16)(planner *); +extern void XSIMD(codelet_n1bv_32)(planner *); +extern void XSIMD(codelet_n1bv_64)(planner *); +extern void XSIMD(codelet_n1bv_128)(planner *); +extern void XSIMD(codelet_n1bv_20)(planner *); +extern void XSIMD(codelet_n1bv_25)(planner *); +extern void XSIMD(codelet_n2fv_2)(planner *); +extern void XSIMD(codelet_n2fv_4)(planner *); +extern void XSIMD(codelet_n2fv_6)(planner *); +extern void XSIMD(codelet_n2fv_8)(planner *); +extern void XSIMD(codelet_n2fv_10)(planner *); +extern void XSIMD(codelet_n2fv_12)(planner *); +extern void XSIMD(codelet_n2fv_14)(planner *); +extern void XSIMD(codelet_n2fv_16)(planner *); +extern void XSIMD(codelet_n2fv_32)(planner *); +extern void XSIMD(codelet_n2fv_64)(planner *); +extern void XSIMD(codelet_n2fv_20)(planner *); +extern void XSIMD(codelet_n2bv_2)(planner *); +extern void XSIMD(codelet_n2bv_4)(planner *); +extern void XSIMD(codelet_n2bv_6)(planner *); +extern void XSIMD(codelet_n2bv_8)(planner *); +extern void XSIMD(codelet_n2bv_10)(planner *); +extern void XSIMD(codelet_n2bv_12)(planner *); +extern void XSIMD(codelet_n2bv_14)(planner *); +extern void XSIMD(codelet_n2bv_16)(planner *); +extern void XSIMD(codelet_n2bv_32)(planner *); +extern void XSIMD(codelet_n2bv_64)(planner *); +extern void XSIMD(codelet_n2bv_20)(planner *); +extern void XSIMD(codelet_n2sv_4)(planner *); +extern void XSIMD(codelet_n2sv_8)(planner *); +extern void XSIMD(codelet_n2sv_16)(planner *); +extern void XSIMD(codelet_n2sv_32)(planner *); +extern void XSIMD(codelet_n2sv_64)(planner *); +extern void XSIMD(codelet_t1fuv_2)(planner *); +extern void XSIMD(codelet_t1fuv_3)(planner *); +extern void XSIMD(codelet_t1fuv_4)(planner *); +extern void XSIMD(codelet_t1fuv_5)(planner *); +extern void XSIMD(codelet_t1fuv_6)(planner *); +extern void XSIMD(codelet_t1fuv_7)(planner *); +extern void XSIMD(codelet_t1fuv_8)(planner *); +extern void XSIMD(codelet_t1fuv_9)(planner *); +extern void XSIMD(codelet_t1fuv_10)(planner *); +extern void XSIMD(codelet_t1fv_2)(planner *); +extern void XSIMD(codelet_t1fv_3)(planner *); +extern void XSIMD(codelet_t1fv_4)(planner *); +extern void XSIMD(codelet_t1fv_5)(planner *); +extern void XSIMD(codelet_t1fv_6)(planner *); +extern void XSIMD(codelet_t1fv_7)(planner *); +extern void XSIMD(codelet_t1fv_8)(planner *); +extern void XSIMD(codelet_t1fv_9)(planner *); +extern void XSIMD(codelet_t1fv_10)(planner *); +extern void XSIMD(codelet_t1fv_12)(planner *); +extern void XSIMD(codelet_t1fv_15)(planner *); +extern void XSIMD(codelet_t1fv_16)(planner *); +extern void XSIMD(codelet_t1fv_32)(planner *); +extern void XSIMD(codelet_t1fv_64)(planner *); +extern void XSIMD(codelet_t1fv_20)(planner *); +extern void XSIMD(codelet_t1fv_25)(planner *); +extern void XSIMD(codelet_t2fv_2)(planner *); +extern void XSIMD(codelet_t2fv_4)(planner *); +extern void XSIMD(codelet_t2fv_8)(planner *); +extern void XSIMD(codelet_t2fv_16)(planner *); +extern void XSIMD(codelet_t2fv_32)(planner *); +extern void XSIMD(codelet_t2fv_64)(planner *); +extern void XSIMD(codelet_t2fv_5)(planner *); +extern void XSIMD(codelet_t2fv_10)(planner *); +extern void XSIMD(codelet_t2fv_20)(planner *); +extern void XSIMD(codelet_t2fv_25)(planner *); +extern void XSIMD(codelet_t3fv_4)(planner *); +extern void XSIMD(codelet_t3fv_8)(planner *); +extern void XSIMD(codelet_t3fv_16)(planner *); +extern void XSIMD(codelet_t3fv_32)(planner *); +extern void XSIMD(codelet_t3fv_5)(planner *); +extern void XSIMD(codelet_t3fv_10)(planner *); +extern void XSIMD(codelet_t3fv_20)(planner *); +extern void XSIMD(codelet_t3fv_25)(planner *); +extern void XSIMD(codelet_t1buv_2)(planner *); +extern void XSIMD(codelet_t1buv_3)(planner *); +extern void XSIMD(codelet_t1buv_4)(planner *); +extern void XSIMD(codelet_t1buv_5)(planner *); +extern void XSIMD(codelet_t1buv_6)(planner *); +extern void XSIMD(codelet_t1buv_7)(planner *); +extern void XSIMD(codelet_t1buv_8)(planner *); +extern void XSIMD(codelet_t1buv_9)(planner *); +extern void XSIMD(codelet_t1buv_10)(planner *); +extern void XSIMD(codelet_t1bv_2)(planner *); +extern void XSIMD(codelet_t1bv_3)(planner *); +extern void XSIMD(codelet_t1bv_4)(planner *); +extern void XSIMD(codelet_t1bv_5)(planner *); +extern void XSIMD(codelet_t1bv_6)(planner *); +extern void XSIMD(codelet_t1bv_7)(planner *); +extern void XSIMD(codelet_t1bv_8)(planner *); +extern void XSIMD(codelet_t1bv_9)(planner *); +extern void XSIMD(codelet_t1bv_10)(planner *); +extern void XSIMD(codelet_t1bv_12)(planner *); +extern void XSIMD(codelet_t1bv_15)(planner *); +extern void XSIMD(codelet_t1bv_16)(planner *); +extern void XSIMD(codelet_t1bv_32)(planner *); +extern void XSIMD(codelet_t1bv_64)(planner *); +extern void XSIMD(codelet_t1bv_20)(planner *); +extern void XSIMD(codelet_t1bv_25)(planner *); +extern void XSIMD(codelet_t2bv_2)(planner *); +extern void XSIMD(codelet_t2bv_4)(planner *); +extern void XSIMD(codelet_t2bv_8)(planner *); +extern void XSIMD(codelet_t2bv_16)(planner *); +extern void XSIMD(codelet_t2bv_32)(planner *); +extern void XSIMD(codelet_t2bv_64)(planner *); +extern void XSIMD(codelet_t2bv_5)(planner *); +extern void XSIMD(codelet_t2bv_10)(planner *); +extern void XSIMD(codelet_t2bv_20)(planner *); +extern void XSIMD(codelet_t2bv_25)(planner *); +extern void XSIMD(codelet_t3bv_4)(planner *); +extern void XSIMD(codelet_t3bv_8)(planner *); +extern void XSIMD(codelet_t3bv_16)(planner *); +extern void XSIMD(codelet_t3bv_32)(planner *); +extern void XSIMD(codelet_t3bv_5)(planner *); +extern void XSIMD(codelet_t3bv_10)(planner *); +extern void XSIMD(codelet_t3bv_20)(planner *); +extern void XSIMD(codelet_t3bv_25)(planner *); +extern void XSIMD(codelet_t1sv_2)(planner *); +extern void XSIMD(codelet_t1sv_4)(planner *); +extern void XSIMD(codelet_t1sv_8)(planner *); +extern void XSIMD(codelet_t1sv_16)(planner *); +extern void XSIMD(codelet_t1sv_32)(planner *); +extern void XSIMD(codelet_t2sv_4)(planner *); +extern void XSIMD(codelet_t2sv_8)(planner *); +extern void XSIMD(codelet_t2sv_16)(planner *); +extern void XSIMD(codelet_t2sv_32)(planner *); +extern void XSIMD(codelet_q1fv_2)(planner *); +extern void XSIMD(codelet_q1fv_4)(planner *); +extern void XSIMD(codelet_q1fv_5)(planner *); +extern void XSIMD(codelet_q1fv_8)(planner *); +extern void XSIMD(codelet_q1bv_2)(planner *); +extern void XSIMD(codelet_q1bv_4)(planner *); +extern void XSIMD(codelet_q1bv_5)(planner *); +extern void XSIMD(codelet_q1bv_8)(planner *); + + +extern const solvtab XSIMD(solvtab_dft); +const solvtab XSIMD(solvtab_dft) = { + SOLVTAB(XSIMD(codelet_n1fv_2)), + SOLVTAB(XSIMD(codelet_n1fv_3)), + SOLVTAB(XSIMD(codelet_n1fv_4)), + SOLVTAB(XSIMD(codelet_n1fv_5)), + SOLVTAB(XSIMD(codelet_n1fv_6)), + SOLVTAB(XSIMD(codelet_n1fv_7)), + SOLVTAB(XSIMD(codelet_n1fv_8)), + SOLVTAB(XSIMD(codelet_n1fv_9)), + SOLVTAB(XSIMD(codelet_n1fv_10)), + SOLVTAB(XSIMD(codelet_n1fv_11)), + SOLVTAB(XSIMD(codelet_n1fv_12)), + SOLVTAB(XSIMD(codelet_n1fv_13)), + SOLVTAB(XSIMD(codelet_n1fv_14)), + SOLVTAB(XSIMD(codelet_n1fv_15)), + SOLVTAB(XSIMD(codelet_n1fv_16)), + SOLVTAB(XSIMD(codelet_n1fv_32)), + SOLVTAB(XSIMD(codelet_n1fv_64)), + SOLVTAB(XSIMD(codelet_n1fv_128)), + SOLVTAB(XSIMD(codelet_n1fv_20)), + SOLVTAB(XSIMD(codelet_n1fv_25)), + SOLVTAB(XSIMD(codelet_n1bv_2)), + SOLVTAB(XSIMD(codelet_n1bv_3)), + SOLVTAB(XSIMD(codelet_n1bv_4)), + SOLVTAB(XSIMD(codelet_n1bv_5)), + SOLVTAB(XSIMD(codelet_n1bv_6)), + SOLVTAB(XSIMD(codelet_n1bv_7)), + SOLVTAB(XSIMD(codelet_n1bv_8)), + SOLVTAB(XSIMD(codelet_n1bv_9)), + SOLVTAB(XSIMD(codelet_n1bv_10)), + SOLVTAB(XSIMD(codelet_n1bv_11)), + SOLVTAB(XSIMD(codelet_n1bv_12)), + SOLVTAB(XSIMD(codelet_n1bv_13)), + SOLVTAB(XSIMD(codelet_n1bv_14)), + SOLVTAB(XSIMD(codelet_n1bv_15)), + SOLVTAB(XSIMD(codelet_n1bv_16)), + SOLVTAB(XSIMD(codelet_n1bv_32)), + SOLVTAB(XSIMD(codelet_n1bv_64)), + SOLVTAB(XSIMD(codelet_n1bv_128)), + SOLVTAB(XSIMD(codelet_n1bv_20)), + SOLVTAB(XSIMD(codelet_n1bv_25)), + SOLVTAB(XSIMD(codelet_n2fv_2)), + SOLVTAB(XSIMD(codelet_n2fv_4)), + SOLVTAB(XSIMD(codelet_n2fv_6)), + SOLVTAB(XSIMD(codelet_n2fv_8)), + SOLVTAB(XSIMD(codelet_n2fv_10)), + SOLVTAB(XSIMD(codelet_n2fv_12)), + SOLVTAB(XSIMD(codelet_n2fv_14)), + SOLVTAB(XSIMD(codelet_n2fv_16)), + SOLVTAB(XSIMD(codelet_n2fv_32)), + SOLVTAB(XSIMD(codelet_n2fv_64)), + SOLVTAB(XSIMD(codelet_n2fv_20)), + SOLVTAB(XSIMD(codelet_n2bv_2)), + SOLVTAB(XSIMD(codelet_n2bv_4)), + SOLVTAB(XSIMD(codelet_n2bv_6)), + SOLVTAB(XSIMD(codelet_n2bv_8)), + SOLVTAB(XSIMD(codelet_n2bv_10)), + SOLVTAB(XSIMD(codelet_n2bv_12)), + SOLVTAB(XSIMD(codelet_n2bv_14)), + SOLVTAB(XSIMD(codelet_n2bv_16)), + SOLVTAB(XSIMD(codelet_n2bv_32)), + SOLVTAB(XSIMD(codelet_n2bv_64)), + SOLVTAB(XSIMD(codelet_n2bv_20)), + SOLVTAB(XSIMD(codelet_n2sv_4)), + SOLVTAB(XSIMD(codelet_n2sv_8)), + SOLVTAB(XSIMD(codelet_n2sv_16)), + SOLVTAB(XSIMD(codelet_n2sv_32)), + SOLVTAB(XSIMD(codelet_n2sv_64)), + SOLVTAB(XSIMD(codelet_t1fuv_2)), + SOLVTAB(XSIMD(codelet_t1fuv_3)), + SOLVTAB(XSIMD(codelet_t1fuv_4)), + SOLVTAB(XSIMD(codelet_t1fuv_5)), + SOLVTAB(XSIMD(codelet_t1fuv_6)), + SOLVTAB(XSIMD(codelet_t1fuv_7)), + SOLVTAB(XSIMD(codelet_t1fuv_8)), + SOLVTAB(XSIMD(codelet_t1fuv_9)), + SOLVTAB(XSIMD(codelet_t1fuv_10)), + SOLVTAB(XSIMD(codelet_t1fv_2)), + SOLVTAB(XSIMD(codelet_t1fv_3)), + SOLVTAB(XSIMD(codelet_t1fv_4)), + SOLVTAB(XSIMD(codelet_t1fv_5)), + SOLVTAB(XSIMD(codelet_t1fv_6)), + SOLVTAB(XSIMD(codelet_t1fv_7)), + SOLVTAB(XSIMD(codelet_t1fv_8)), + SOLVTAB(XSIMD(codelet_t1fv_9)), + SOLVTAB(XSIMD(codelet_t1fv_10)), + SOLVTAB(XSIMD(codelet_t1fv_12)), + SOLVTAB(XSIMD(codelet_t1fv_15)), + SOLVTAB(XSIMD(codelet_t1fv_16)), + SOLVTAB(XSIMD(codelet_t1fv_32)), + SOLVTAB(XSIMD(codelet_t1fv_64)), + SOLVTAB(XSIMD(codelet_t1fv_20)), + SOLVTAB(XSIMD(codelet_t1fv_25)), + SOLVTAB(XSIMD(codelet_t2fv_2)), + SOLVTAB(XSIMD(codelet_t2fv_4)), + SOLVTAB(XSIMD(codelet_t2fv_8)), + SOLVTAB(XSIMD(codelet_t2fv_16)), + SOLVTAB(XSIMD(codelet_t2fv_32)), + SOLVTAB(XSIMD(codelet_t2fv_64)), + SOLVTAB(XSIMD(codelet_t2fv_5)), + SOLVTAB(XSIMD(codelet_t2fv_10)), + SOLVTAB(XSIMD(codelet_t2fv_20)), + SOLVTAB(XSIMD(codelet_t2fv_25)), + SOLVTAB(XSIMD(codelet_t3fv_4)), + SOLVTAB(XSIMD(codelet_t3fv_8)), + SOLVTAB(XSIMD(codelet_t3fv_16)), + SOLVTAB(XSIMD(codelet_t3fv_32)), + SOLVTAB(XSIMD(codelet_t3fv_5)), + SOLVTAB(XSIMD(codelet_t3fv_10)), + SOLVTAB(XSIMD(codelet_t3fv_20)), + SOLVTAB(XSIMD(codelet_t3fv_25)), + SOLVTAB(XSIMD(codelet_t1buv_2)), + SOLVTAB(XSIMD(codelet_t1buv_3)), + SOLVTAB(XSIMD(codelet_t1buv_4)), + SOLVTAB(XSIMD(codelet_t1buv_5)), + SOLVTAB(XSIMD(codelet_t1buv_6)), + SOLVTAB(XSIMD(codelet_t1buv_7)), + SOLVTAB(XSIMD(codelet_t1buv_8)), + SOLVTAB(XSIMD(codelet_t1buv_9)), + SOLVTAB(XSIMD(codelet_t1buv_10)), + SOLVTAB(XSIMD(codelet_t1bv_2)), + SOLVTAB(XSIMD(codelet_t1bv_3)), + SOLVTAB(XSIMD(codelet_t1bv_4)), + SOLVTAB(XSIMD(codelet_t1bv_5)), + SOLVTAB(XSIMD(codelet_t1bv_6)), + SOLVTAB(XSIMD(codelet_t1bv_7)), + SOLVTAB(XSIMD(codelet_t1bv_8)), + SOLVTAB(XSIMD(codelet_t1bv_9)), + SOLVTAB(XSIMD(codelet_t1bv_10)), + SOLVTAB(XSIMD(codelet_t1bv_12)), + SOLVTAB(XSIMD(codelet_t1bv_15)), + SOLVTAB(XSIMD(codelet_t1bv_16)), + SOLVTAB(XSIMD(codelet_t1bv_32)), + SOLVTAB(XSIMD(codelet_t1bv_64)), + SOLVTAB(XSIMD(codelet_t1bv_20)), + SOLVTAB(XSIMD(codelet_t1bv_25)), + SOLVTAB(XSIMD(codelet_t2bv_2)), + SOLVTAB(XSIMD(codelet_t2bv_4)), + SOLVTAB(XSIMD(codelet_t2bv_8)), + SOLVTAB(XSIMD(codelet_t2bv_16)), + SOLVTAB(XSIMD(codelet_t2bv_32)), + SOLVTAB(XSIMD(codelet_t2bv_64)), + SOLVTAB(XSIMD(codelet_t2bv_5)), + SOLVTAB(XSIMD(codelet_t2bv_10)), + SOLVTAB(XSIMD(codelet_t2bv_20)), + SOLVTAB(XSIMD(codelet_t2bv_25)), + SOLVTAB(XSIMD(codelet_t3bv_4)), + SOLVTAB(XSIMD(codelet_t3bv_8)), + SOLVTAB(XSIMD(codelet_t3bv_16)), + SOLVTAB(XSIMD(codelet_t3bv_32)), + SOLVTAB(XSIMD(codelet_t3bv_5)), + SOLVTAB(XSIMD(codelet_t3bv_10)), + SOLVTAB(XSIMD(codelet_t3bv_20)), + SOLVTAB(XSIMD(codelet_t3bv_25)), + SOLVTAB(XSIMD(codelet_t1sv_2)), + SOLVTAB(XSIMD(codelet_t1sv_4)), + SOLVTAB(XSIMD(codelet_t1sv_8)), + SOLVTAB(XSIMD(codelet_t1sv_16)), + SOLVTAB(XSIMD(codelet_t1sv_32)), + SOLVTAB(XSIMD(codelet_t2sv_4)), + SOLVTAB(XSIMD(codelet_t2sv_8)), + SOLVTAB(XSIMD(codelet_t2sv_16)), + SOLVTAB(XSIMD(codelet_t2sv_32)), + SOLVTAB(XSIMD(codelet_q1fv_2)), + SOLVTAB(XSIMD(codelet_q1fv_4)), + SOLVTAB(XSIMD(codelet_q1fv_5)), + SOLVTAB(XSIMD(codelet_q1fv_8)), + SOLVTAB(XSIMD(codelet_q1bv_2)), + SOLVTAB(XSIMD(codelet_q1bv_4)), + SOLVTAB(XSIMD(codelet_q1bv_5)), + SOLVTAB(XSIMD(codelet_q1bv_8)), + SOLVTAB_END +}; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/genus.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/genus.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,331 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "codelet-dft.h" +#include SIMD_HEADER + +#define EXTERN_CONST(t, x) extern const t x; const t x + +static int n1b_okp(const kdft_desc *d, + const R *ri, const R *ii, const R *ro, const R *io, + INT is, INT os, INT vl, INT ivs, INT ovs, + const planner *plnr) +{ + return (1 + && ALIGNED(ii) + && ALIGNED(io) + && !NO_SIMDP(plnr) + && SIMD_STRIDE_OK(is) + && SIMD_STRIDE_OK(os) + && SIMD_VSTRIDE_OK(ivs) + && SIMD_VSTRIDE_OK(ovs) + && ri == ii + 1 + && ro == io + 1 + && (vl % VL) == 0 + && (!d->is || (d->is == is)) + && (!d->os || (d->os == os)) + && (!d->ivs || (d->ivs == ivs)) + && (!d->ovs || (d->ovs == ovs)) + ); +} + +EXTERN_CONST(kdft_genus, XSIMD(dft_n1bsimd_genus)) = { n1b_okp, VL }; + +static int n1f_okp(const kdft_desc *d, + const R *ri, const R *ii, const R *ro, const R *io, + INT is, INT os, INT vl, INT ivs, INT ovs, + const planner *plnr) +{ + return (1 + && ALIGNED(ri) + && ALIGNED(ro) + && !NO_SIMDP(plnr) + && SIMD_STRIDE_OK(is) + && SIMD_STRIDE_OK(os) + && SIMD_VSTRIDE_OK(ivs) + && SIMD_VSTRIDE_OK(ovs) + && ii == ri + 1 + && io == ro + 1 + && (vl % VL) == 0 + && (!d->is || (d->is == is)) + && (!d->os || (d->os == os)) + && (!d->ivs || (d->ivs == ivs)) + && (!d->ovs || (d->ovs == ovs)) + ); +} + +EXTERN_CONST(kdft_genus, XSIMD(dft_n1fsimd_genus)) = { n1f_okp, VL }; + +static int n2b_okp(const kdft_desc *d, + const R *ri, const R *ii, const R *ro, const R *io, + INT is, INT os, INT vl, INT ivs, INT ovs, + const planner *plnr) +{ + return (1 + && ALIGNEDA(ii) + && ALIGNEDA(io) + && !NO_SIMDP(plnr) + && SIMD_STRIDE_OKA(is) + && SIMD_VSTRIDE_OKA(ivs) + && SIMD_VSTRIDE_OKA(os) /* os == 2 enforced by codelet */ + && SIMD_STRIDE_OKPAIR(ovs) + && ri == ii + 1 + && ro == io + 1 + && (vl % VL) == 0 + && (!d->is || (d->is == is)) + && (!d->os || (d->os == os)) + && (!d->ivs || (d->ivs == ivs)) + && (!d->ovs || (d->ovs == ovs)) + ); +} + +EXTERN_CONST(kdft_genus, XSIMD(dft_n2bsimd_genus)) = { n2b_okp, VL }; + +static int n2f_okp(const kdft_desc *d, + const R *ri, const R *ii, const R *ro, const R *io, + INT is, INT os, INT vl, INT ivs, INT ovs, + const planner *plnr) +{ + return (1 + && ALIGNEDA(ri) + && ALIGNEDA(ro) + && !NO_SIMDP(plnr) + && SIMD_STRIDE_OKA(is) + && SIMD_VSTRIDE_OKA(ivs) + && SIMD_VSTRIDE_OKA(os) /* os == 2 enforced by codelet */ + && SIMD_STRIDE_OKPAIR(ovs) + && ii == ri + 1 + && io == ro + 1 + && (vl % VL) == 0 + && (!d->is || (d->is == is)) + && (!d->os || (d->os == os)) + && (!d->ivs || (d->ivs == ivs)) + && (!d->ovs || (d->ovs == ovs)) + ); +} + +EXTERN_CONST(kdft_genus, XSIMD(dft_n2fsimd_genus)) = { n2f_okp, VL }; + +static int n2s_okp(const kdft_desc *d, + const R *ri, const R *ii, const R *ro, const R *io, + INT is, INT os, INT vl, INT ivs, INT ovs, + const planner *plnr) +{ + return (1 + && !NO_SIMDP(plnr) + && ALIGNEDA(ri) + && ALIGNEDA(ii) + && ALIGNEDA(ro) + && ALIGNEDA(io) + && SIMD_STRIDE_OKA(is) + && ivs == 1 + && os == 1 + && SIMD_STRIDE_OKA(ovs) + && (vl % (2 * VL)) == 0 + && (!d->is || (d->is == is)) + && (!d->os || (d->os == os)) + && (!d->ivs || (d->ivs == ivs)) + && (!d->ovs || (d->ovs == ovs)) + ); +} + +EXTERN_CONST(kdft_genus, XSIMD(dft_n2ssimd_genus)) = { n2s_okp, 2 * VL }; + +static int q1b_okp(const ct_desc *d, + const R *rio, const R *iio, + INT rs, INT vs, INT m, INT mb, INT me, INT ms, + const planner *plnr) +{ + return (1 + && ALIGNED(iio) + && !NO_SIMDP(plnr) + && SIMD_STRIDE_OK(rs) + && SIMD_STRIDE_OK(vs) + && SIMD_VSTRIDE_OK(ms) + && rio == iio + 1 + && (m % VL) == 0 + && (mb % VL) == 0 + && (me % VL) == 0 + && (!d->rs || (d->rs == rs)) + && (!d->vs || (d->vs == vs)) + && (!d->ms || (d->ms == ms)) + ); +} +EXTERN_CONST(ct_genus, XSIMD(dft_q1bsimd_genus)) = { q1b_okp, VL }; + +static int q1f_okp(const ct_desc *d, + const R *rio, const R *iio, + INT rs, INT vs, INT m, INT mb, INT me, INT ms, + const planner *plnr) +{ + return (1 + && ALIGNED(rio) + && !NO_SIMDP(plnr) + && SIMD_STRIDE_OK(rs) + && SIMD_STRIDE_OK(vs) + && SIMD_VSTRIDE_OK(ms) + && iio == rio + 1 + && (m % VL) == 0 + && (mb % VL) == 0 + && (me % VL) == 0 + && (!d->rs || (d->rs == rs)) + && (!d->vs || (d->vs == vs)) + && (!d->ms || (d->ms == ms)) + ); +} +EXTERN_CONST(ct_genus, XSIMD(dft_q1fsimd_genus)) = { q1f_okp, VL }; + +static int t_okp_common(const ct_desc *d, + const R *rio, const R *iio, + INT rs, INT vs, INT m, INT mb, INT me, INT ms, + const planner *plnr) +{ + UNUSED(rio); UNUSED(iio); + return (1 + && !NO_SIMDP(plnr) + && SIMD_STRIDE_OKA(rs) + && SIMD_VSTRIDE_OKA(ms) + && (m % VL) == 0 + && (mb % VL) == 0 + && (me % VL) == 0 + && (!d->rs || (d->rs == rs)) + && (!d->vs || (d->vs == vs)) + && (!d->ms || (d->ms == ms)) + ); +} + +static int t_okp_commonu(const ct_desc *d, + const R *rio, const R *iio, + INT rs, INT vs, INT m, INT mb, INT me, INT ms, + const planner *plnr) +{ + UNUSED(rio); UNUSED(iio); UNUSED(m); + return (1 + && !NO_SIMDP(plnr) + && SIMD_STRIDE_OK(rs) + && SIMD_VSTRIDE_OK(ms) + && (mb % VL) == 0 + && (me % VL) == 0 + && (!d->rs || (d->rs == rs)) + && (!d->vs || (d->vs == vs)) + && (!d->ms || (d->ms == ms)) + ); +} + +static int t_okp_t1f(const ct_desc *d, + const R *rio, const R *iio, + INT rs, INT vs, INT m, INT mb, INT me, INT ms, + const planner *plnr) +{ + return t_okp_common(d, rio, iio, rs, vs, m, mb, me, ms, plnr) + && iio == rio + 1 + && ALIGNEDA(rio); +} + +EXTERN_CONST(ct_genus, XSIMD(dft_t1fsimd_genus)) = { t_okp_t1f, VL }; + +static int t_okp_t1fu(const ct_desc *d, + const R *rio, const R *iio, + INT rs, INT vs, INT m, INT mb, INT me, INT ms, + const planner *plnr) +{ + return t_okp_commonu(d, rio, iio, rs, vs, m, mb, me, ms, plnr) + && iio == rio + 1 + && ALIGNED(rio); +} + +EXTERN_CONST(ct_genus, XSIMD(dft_t1fusimd_genus)) = { t_okp_t1fu, VL }; + +static int t_okp_t1b(const ct_desc *d, + const R *rio, const R *iio, + INT rs, INT vs, INT m, INT mb, INT me, INT ms, + const planner *plnr) +{ + return t_okp_common(d, rio, iio, rs, vs, m, mb, me, ms, plnr) + && rio == iio + 1 + && ALIGNEDA(iio); +} + +EXTERN_CONST(ct_genus, XSIMD(dft_t1bsimd_genus)) = { t_okp_t1b, VL }; + +static int t_okp_t1bu(const ct_desc *d, + const R *rio, const R *iio, + INT rs, INT vs, INT m, INT mb, INT me, INT ms, + const planner *plnr) +{ + return t_okp_commonu(d, rio, iio, rs, vs, m, mb, me, ms, plnr) + && rio == iio + 1 + && ALIGNED(iio); +} + +EXTERN_CONST(ct_genus, XSIMD(dft_t1busimd_genus)) = { t_okp_t1bu, VL }; + +/* use t2* codelets only when n = m*radix is small, because + t2* codelets use ~2n twiddle factors (instead of ~n) */ +static int small_enough(const ct_desc *d, INT m) +{ + return m * d->radix <= 16384; +} + +static int t_okp_t2f(const ct_desc *d, + const R *rio, const R *iio, + INT rs, INT vs, INT m, INT mb, INT me, INT ms, + const planner *plnr) +{ + return t_okp_t1f(d, rio, iio, rs, vs, m, mb, me, ms, plnr) + && small_enough(d, m); +} + +EXTERN_CONST(ct_genus, XSIMD(dft_t2fsimd_genus)) = { t_okp_t2f, VL }; + +static int t_okp_t2b(const ct_desc *d, + const R *rio, const R *iio, + INT rs, INT vs, INT m, INT mb, INT me, INT ms, + const planner *plnr) +{ + return t_okp_t1b(d, rio, iio, rs, vs, m, mb, me, ms, plnr) + && small_enough(d, m); +} + +EXTERN_CONST(ct_genus, XSIMD(dft_t2bsimd_genus)) = { t_okp_t2b, VL }; + +static int ts_okp(const ct_desc *d, + const R *rio, const R *iio, + INT rs, INT vs, INT m, INT mb, INT me, INT ms, + const planner *plnr) +{ + UNUSED(rio); + UNUSED(iio); + return (1 + && !NO_SIMDP(plnr) + && ALIGNEDA(rio) + && ALIGNEDA(iio) + && SIMD_STRIDE_OKA(rs) + && ms == 1 + && (m % (2 * VL)) == 0 + && (mb % (2 * VL)) == 0 + && (me % (2 * VL)) == 0 + && (!d->rs || (d->rs == rs)) + && (!d->vs || (d->vs == vs)) + && (!d->ms || (d->ms == ms)) + ); +} + +EXTERN_CONST(ct_genus, XSIMD(dft_tssimd_genus)) = { ts_okp, 2 * VL }; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:51 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 10 -name n1bv_10 -include n1b.h */ + +/* + * This function contains 42 FP additions, 22 FP multiplications, + * (or, 24 additions, 4 multiplications, 18 fused multiply/add), + * 43 stack variables, 4 constants, and 20 memory accesses + */ +#include "n1b.h" + +static void n1bv_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(20, is), MAKE_VOLATILE_STRIDE(20, os)) { + V Tb, Tr, T3, Ts, T6, Tw, Tg, Tt, T9, Tc, T1, T2; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + { + V T4, T5, Te, Tf, T7, T8; + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Te = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tr = VADD(T1, T2); + T3 = VSUB(T1, T2); + Ts = VADD(T4, T5); + T6 = VSUB(T4, T5); + Tw = VADD(Te, Tf); + Tg = VSUB(Te, Tf); + Tt = VADD(T7, T8); + T9 = VSUB(T7, T8); + Tc = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + } + { + V TD, Tu, Tm, Ta, Td, Tv; + TD = VSUB(Ts, Tt); + Tu = VADD(Ts, Tt); + Tm = VSUB(T6, T9); + Ta = VADD(T6, T9); + Td = VSUB(Tb, Tc); + Tv = VADD(Tb, Tc); + { + V TC, Tx, Tn, Th; + TC = VSUB(Tv, Tw); + Tx = VADD(Tv, Tw); + Tn = VSUB(Td, Tg); + Th = VADD(Td, Tg); + { + V Ty, TA, TE, TG, Ti, Tk, To, Tq, Tz, Tj; + Ty = VADD(Tu, Tx); + TA = VSUB(Tu, Tx); + TE = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TD, TC)); + TG = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TC, TD)); + Ti = VADD(Ta, Th); + Tk = VSUB(Ta, Th); + To = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tn, Tm)); + Tq = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tm, Tn)); + Tz = VFNMS(LDK(KP250000000), Ty, Tr); + ST(&(xo[0]), VADD(Tr, Ty), ovs, &(xo[0])); + Tj = VFNMS(LDK(KP250000000), Ti, T3); + ST(&(xo[WS(os, 5)]), VADD(T3, Ti), ovs, &(xo[WS(os, 1)])); + { + V TB, TF, Tl, Tp; + TB = VFNMS(LDK(KP559016994), TA, Tz); + TF = VFMA(LDK(KP559016994), TA, Tz); + Tl = VFMA(LDK(KP559016994), Tk, Tj); + Tp = VFNMS(LDK(KP559016994), Tk, Tj); + ST(&(xo[WS(os, 4)]), VFNMSI(TG, TF), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VFMAI(TG, TF), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VFMAI(TE, TB), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFNMSI(TE, TB), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VFMAI(Tq, Tp), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFNMSI(Tq, Tp), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VFNMSI(To, Tl), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFMAI(To, Tl), ovs, &(xo[WS(os, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 10, XSIMD_STRING("n1bv_10"), {24, 4, 18, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_10) (planner *p) { + X(kdft_register) (p, n1bv_10, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 10 -name n1bv_10 -include n1b.h */ + +/* + * This function contains 42 FP additions, 12 FP multiplications, + * (or, 36 additions, 6 multiplications, 6 fused multiply/add), + * 33 stack variables, 4 constants, and 20 memory accesses + */ +#include "n1b.h" + +static void n1bv_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(20, is), MAKE_VOLATILE_STRIDE(20, os)) { + V Tl, Ty, T7, Te, Tw, Tt, Tz, TA, TB, Tg, Th, Tm, Tj, Tk; + Tj = LD(&(xi[0]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tl = VSUB(Tj, Tk); + Ty = VADD(Tj, Tk); + { + V T3, Tr, Td, Tv, T6, Ts, Ta, Tu; + { + V T1, T2, Tb, Tc; + T1 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + Tr = VADD(T1, T2); + Tb = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Td = VSUB(Tb, Tc); + Tv = VADD(Tb, Tc); + } + { + V T4, T5, T8, T9; + T4 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + Ts = VADD(T4, T5); + T8 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T9 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Ta = VSUB(T8, T9); + Tu = VADD(T8, T9); + } + T7 = VSUB(T3, T6); + Te = VSUB(Ta, Td); + Tw = VSUB(Tu, Tv); + Tt = VSUB(Tr, Ts); + Tz = VADD(Tr, Ts); + TA = VADD(Tu, Tv); + TB = VADD(Tz, TA); + Tg = VADD(T3, T6); + Th = VADD(Ta, Td); + Tm = VADD(Tg, Th); + } + ST(&(xo[WS(os, 5)]), VADD(Tl, Tm), ovs, &(xo[WS(os, 1)])); + ST(&(xo[0]), VADD(Ty, TB), ovs, &(xo[0])); + { + V Tf, Tq, To, Tp, Ti, Tn; + Tf = VBYI(VFMA(LDK(KP951056516), T7, VMUL(LDK(KP587785252), Te))); + Tq = VBYI(VFNMS(LDK(KP951056516), Te, VMUL(LDK(KP587785252), T7))); + Ti = VMUL(LDK(KP559016994), VSUB(Tg, Th)); + Tn = VFNMS(LDK(KP250000000), Tm, Tl); + To = VADD(Ti, Tn); + Tp = VSUB(Tn, Ti); + ST(&(xo[WS(os, 1)]), VADD(Tf, To), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VADD(Tq, Tp), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VSUB(To, Tf), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VSUB(Tp, Tq), ovs, &(xo[WS(os, 1)])); + } + { + V Tx, TG, TE, TF, TC, TD; + Tx = VBYI(VFNMS(LDK(KP951056516), Tw, VMUL(LDK(KP587785252), Tt))); + TG = VBYI(VFMA(LDK(KP951056516), Tt, VMUL(LDK(KP587785252), Tw))); + TC = VFNMS(LDK(KP250000000), TB, Ty); + TD = VMUL(LDK(KP559016994), VSUB(Tz, TA)); + TE = VSUB(TC, TD); + TF = VADD(TD, TC); + ST(&(xo[WS(os, 2)]), VADD(Tx, TE), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VADD(TG, TF), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VSUB(TE, Tx), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VSUB(TF, TG), ovs, &(xo[0])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 10, XSIMD_STRING("n1bv_10"), {36, 6, 6, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_10) (planner *p) { + X(kdft_register) (p, n1bv_10, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_11.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_11.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:51 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 11 -name n1bv_11 -include n1b.h */ + +/* + * This function contains 70 FP additions, 60 FP multiplications, + * (or, 15 additions, 5 multiplications, 55 fused multiply/add), + * 67 stack variables, 11 constants, and 22 memory accesses + */ +#include "n1b.h" + +static void n1bv_11(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP959492973, +0.959492973614497389890368057066327699062454848); + DVK(KP876768831, +0.876768831002589333891339807079336796764054852); + DVK(KP918985947, +0.918985947228994779780736114132655398124909697); + DVK(KP989821441, +0.989821441880932732376092037776718787376519372); + DVK(KP778434453, +0.778434453334651800608337670740821884709317477); + DVK(KP830830026, +0.830830026003772851058548298459246407048009821); + DVK(KP372785597, +0.372785597771792209609773152906148328659002598); + DVK(KP634356270, +0.634356270682424498893150776899916060542806975); + DVK(KP715370323, +0.715370323453429719112414662767260662417897278); + DVK(KP342584725, +0.342584725681637509502641509861112333758894680); + DVK(KP521108558, +0.521108558113202722944698153526659300680427422); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(22, is), MAKE_VOLATILE_STRIDE(22, os)) { + V T1, Tb, T4, Tq, Tg, Tm, T7, Tp, Ta, To, Tc, T11; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + { + V T2, T3, Te, Tf; + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Te = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tf = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + { + V T5, T6, T8, T9; + T5 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T9 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T4 = VADD(T2, T3); + Tq = VSUB(T2, T3); + Tg = VADD(Te, Tf); + Tm = VSUB(Te, Tf); + T7 = VADD(T5, T6); + Tp = VSUB(T5, T6); + Ta = VADD(T8, T9); + To = VSUB(T8, T9); + Tc = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + } + } + T11 = VFMA(LDK(KP521108558), Tm, Tq); + { + V TA, TS, TE, TW, Td, Tn, Ts, Tw, Tr, Tv, TT, TF; + Tr = VFNMS(LDK(KP521108558), Tq, Tp); + Tv = VFNMS(LDK(KP342584725), T7, Tg); + TA = VFMA(LDK(KP715370323), To, Tq); + TS = VFMA(LDK(KP521108558), To, Tm); + TE = VFNMS(LDK(KP342584725), T4, Ta); + TW = VFNMS(LDK(KP342584725), Ta, T7); + Td = VADD(Tb, Tc); + Tn = VSUB(Tb, Tc); + Ts = VFNMS(LDK(KP715370323), Tr, To); + Tw = VFNMS(LDK(KP634356270), Tv, T4); + TT = VFNMS(LDK(KP715370323), TS, Tp); + TF = VFNMS(LDK(KP634356270), TE, Tg); + { + V Tu, TV, TD, TL, T14, TP, TZ, Tj, Tz, TI, TB, TJ, TM; + TB = VFMA(LDK(KP372785597), Tn, TA); + TJ = VFNMS(LDK(KP521108558), Tp, Tn); + { + V T12, TN, TX, Th; + T12 = VFMA(LDK(KP715370323), T11, Tn); + ST(&(xo[0]), VADD(Tg, VADD(Td, VADD(Ta, VADD(T7, VADD(T4, T1))))), ovs, &(xo[0])); + TN = VFNMS(LDK(KP342584725), Td, T4); + TX = VFNMS(LDK(KP634356270), TW, Td); + Th = VFNMS(LDK(KP342584725), Tg, Td); + { + V Tt, Tx, TU, TG; + Tt = VFNMS(LDK(KP830830026), Ts, Tn); + Tx = VFNMS(LDK(KP778434453), Tw, Ta); + TU = VFMA(LDK(KP830830026), TT, Tq); + TG = VFNMS(LDK(KP778434453), TF, Td); + { + V TC, TK, T13, TO; + TC = VFNMS(LDK(KP830830026), TB, Tm); + TK = VFMA(LDK(KP715370323), TJ, Tm); + T13 = VFMA(LDK(KP830830026), T12, Tp); + TO = VFNMS(LDK(KP634356270), TN, T7); + { + V TY, Ti, Ty, TH; + TY = VFNMS(LDK(KP778434453), TX, T4); + Ti = VFNMS(LDK(KP634356270), Th, Ta); + Tu = VMUL(LDK(KP989821441), VFNMS(LDK(KP918985947), Tt, Tm)); + Ty = VFNMS(LDK(KP876768831), Tx, Td); + TV = VMUL(LDK(KP989821441), VFNMS(LDK(KP918985947), TU, Tn)); + TH = VFNMS(LDK(KP876768831), TG, T7); + TD = VMUL(LDK(KP989821441), VFMA(LDK(KP918985947), TC, Tp)); + TL = VFNMS(LDK(KP830830026), TK, To); + T14 = VMUL(LDK(KP989821441), VFMA(LDK(KP918985947), T13, To)); + TP = VFNMS(LDK(KP778434453), TO, Tg); + TZ = VFNMS(LDK(KP876768831), TY, Tg); + Tj = VFNMS(LDK(KP778434453), Ti, T7); + Tz = VFNMS(LDK(KP959492973), Ty, T1); + TI = VFNMS(LDK(KP959492973), TH, T1); + } + } + } + } + TM = VMUL(LDK(KP989821441), VFNMS(LDK(KP918985947), TL, Tq)); + { + V TQ, T10, Tk, TR, Tl; + TQ = VFNMS(LDK(KP876768831), TP, Ta); + T10 = VFNMS(LDK(KP959492973), TZ, T1); + Tk = VFNMS(LDK(KP876768831), Tj, T4); + ST(&(xo[WS(os, 7)]), VFMAI(TD, Tz), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VFNMSI(TD, Tz), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VFNMSI(TM, TI), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VFMAI(TM, TI), ovs, &(xo[WS(os, 1)])); + TR = VFNMS(LDK(KP959492973), TQ, T1); + ST(&(xo[WS(os, 10)]), VFNMSI(T14, T10), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VFMAI(T14, T10), ovs, &(xo[WS(os, 1)])); + Tl = VFNMS(LDK(KP959492973), Tk, T1); + ST(&(xo[WS(os, 9)]), VFMAI(TV, TR), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VFNMSI(TV, TR), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VFNMSI(Tu, Tl), ovs, &(xo[0])); + ST(&(xo[WS(os, 5)]), VFMAI(Tu, Tl), ovs, &(xo[WS(os, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 11, XSIMD_STRING("n1bv_11"), {15, 5, 55, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_11) (planner *p) { + X(kdft_register) (p, n1bv_11, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 11 -name n1bv_11 -include n1b.h */ + +/* + * This function contains 70 FP additions, 50 FP multiplications, + * (or, 30 additions, 10 multiplications, 40 fused multiply/add), + * 32 stack variables, 10 constants, and 22 memory accesses + */ +#include "n1b.h" + +static void n1bv_11(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP959492973, +0.959492973614497389890368057066327699062454848); + DVK(KP654860733, +0.654860733945285064056925072466293553183791199); + DVK(KP142314838, +0.142314838273285140443792668616369668791051361); + DVK(KP415415013, +0.415415013001886425529274149229623203524004910); + DVK(KP841253532, +0.841253532831181168861811648919367717513292498); + DVK(KP540640817, +0.540640817455597582107635954318691695431770608); + DVK(KP909631995, +0.909631995354518371411715383079028460060241051); + DVK(KP989821441, +0.989821441880932732376092037776718787376519372); + DVK(KP755749574, +0.755749574354258283774035843972344420179717445); + DVK(KP281732556, +0.281732556841429697711417915346616899035777899); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(22, is), MAKE_VOLATILE_STRIDE(22, os)) { + V Th, T3, Tm, Tf, Ti, Tc, Tj, T9, Tk, T6, Tl, Ta, Tb, Ts, Tt; + Th = LD(&(xi[0]), ivs, &(xi[0])); + { + V T1, T2, Td, Te; + T1 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T2 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T3 = VSUB(T1, T2); + Tm = VADD(T1, T2); + Td = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Te = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tf = VSUB(Td, Te); + Ti = VADD(Td, Te); + } + Ta = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tb = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Tc = VSUB(Ta, Tb); + Tj = VADD(Ta, Tb); + { + V T7, T8, T4, T5; + T7 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T8 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T9 = VSUB(T7, T8); + Tk = VADD(T7, T8); + T4 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T6 = VSUB(T4, T5); + Tl = VADD(T4, T5); + } + ST(&(xo[0]), VADD(Th, VADD(Tm, VADD(Ti, VADD(Tl, VADD(Tj, Tk))))), ovs, &(xo[0])); + { + V Tg, Tn, Tu, Tv; + Tg = VBYI(VFMA(LDK(KP281732556), T3, VFMA(LDK(KP755749574), T6, VFNMS(LDK(KP909631995), Tc, VFNMS(LDK(KP540640817), Tf, VMUL(LDK(KP989821441), T9)))))); + Tn = VFMA(LDK(KP841253532), Ti, VFMA(LDK(KP415415013), Tj, VFNMS(LDK(KP142314838), Tk, VFNMS(LDK(KP654860733), Tl, VFNMS(LDK(KP959492973), Tm, Th))))); + ST(&(xo[WS(os, 5)]), VADD(Tg, Tn), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 6)]), VSUB(Tn, Tg), ovs, &(xo[0])); + Tu = VBYI(VFMA(LDK(KP755749574), T3, VFMA(LDK(KP540640817), T6, VFNMS(LDK(KP909631995), T9, VFNMS(LDK(KP989821441), Tf, VMUL(LDK(KP281732556), Tc)))))); + Tv = VFMA(LDK(KP841253532), Tl, VFMA(LDK(KP415415013), Tk, VFNMS(LDK(KP959492973), Tj, VFNMS(LDK(KP142314838), Ti, VFNMS(LDK(KP654860733), Tm, Th))))); + ST(&(xo[WS(os, 4)]), VADD(Tu, Tv), ovs, &(xo[0])); + ST(&(xo[WS(os, 7)]), VSUB(Tv, Tu), ovs, &(xo[WS(os, 1)])); + } + Ts = VBYI(VFMA(LDK(KP909631995), T3, VFNMS(LDK(KP540640817), T9, VFNMS(LDK(KP989821441), Tc, VFNMS(LDK(KP281732556), T6, VMUL(LDK(KP755749574), Tf)))))); + Tt = VFMA(LDK(KP415415013), Tm, VFMA(LDK(KP841253532), Tk, VFNMS(LDK(KP142314838), Tj, VFNMS(LDK(KP959492973), Tl, VFNMS(LDK(KP654860733), Ti, Th))))); + ST(&(xo[WS(os, 2)]), VADD(Ts, Tt), ovs, &(xo[0])); + ST(&(xo[WS(os, 9)]), VSUB(Tt, Ts), ovs, &(xo[WS(os, 1)])); + { + V Tq, Tr, To, Tp; + Tq = VBYI(VFMA(LDK(KP540640817), T3, VFMA(LDK(KP909631995), Tf, VFMA(LDK(KP989821441), T6, VFMA(LDK(KP755749574), Tc, VMUL(LDK(KP281732556), T9)))))); + Tr = VFMA(LDK(KP841253532), Tm, VFMA(LDK(KP415415013), Ti, VFNMS(LDK(KP959492973), Tk, VFNMS(LDK(KP654860733), Tj, VFNMS(LDK(KP142314838), Tl, Th))))); + ST(&(xo[WS(os, 1)]), VADD(Tq, Tr), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 10)]), VSUB(Tr, Tq), ovs, &(xo[0])); + To = VBYI(VFMA(LDK(KP989821441), T3, VFMA(LDK(KP540640817), Tc, VFNMS(LDK(KP909631995), T6, VFNMS(LDK(KP281732556), Tf, VMUL(LDK(KP755749574), T9)))))); + Tp = VFMA(LDK(KP415415013), Tl, VFMA(LDK(KP841253532), Tj, VFNMS(LDK(KP654860733), Tk, VFNMS(LDK(KP959492973), Ti, VFNMS(LDK(KP142314838), Tm, Th))))); + ST(&(xo[WS(os, 3)]), VADD(To, Tp), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 8)]), VSUB(Tp, To), ovs, &(xo[0])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 11, XSIMD_STRING("n1bv_11"), {30, 10, 40, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_11) (planner *p) { + X(kdft_register) (p, n1bv_11, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:51 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 12 -name n1bv_12 -include n1b.h */ + +/* + * This function contains 48 FP additions, 20 FP multiplications, + * (or, 30 additions, 2 multiplications, 18 fused multiply/add), + * 49 stack variables, 2 constants, and 24 memory accesses + */ +#include "n1b.h" + +static void n1bv_12(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) { + V T1, T6, Tc, Th, Td, Te, Ti, Tz, T4, TA, T9, Tj, Tf, Tw; + { + V T2, T3, T7, T8; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T3 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T7 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Th = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Td = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Te = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Ti = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tz = VSUB(T2, T3); + T4 = VADD(T2, T3); + TA = VSUB(T7, T8); + T9 = VADD(T7, T8); + Tj = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + } + Tf = VADD(Td, Te); + Tw = VSUB(Td, Te); + { + V T5, Tp, TJ, TB, Ta, Tq, Tk, Tx, Tg, Ts; + T5 = VADD(T1, T4); + Tp = VFNMS(LDK(KP500000000), T4, T1); + TJ = VSUB(Tz, TA); + TB = VADD(Tz, TA); + Ta = VADD(T6, T9); + Tq = VFNMS(LDK(KP500000000), T9, T6); + Tk = VADD(Ti, Tj); + Tx = VSUB(Tj, Ti); + Tg = VADD(Tc, Tf); + Ts = VFNMS(LDK(KP500000000), Tf, Tc); + { + V Tr, TF, Tb, Tn, TG, Ty, Tl, Tt; + Tr = VADD(Tp, Tq); + TF = VSUB(Tp, Tq); + Tb = VSUB(T5, Ta); + Tn = VADD(T5, Ta); + TG = VADD(Tw, Tx); + Ty = VSUB(Tw, Tx); + Tl = VADD(Th, Tk); + Tt = VFNMS(LDK(KP500000000), Tk, Th); + { + V TC, TE, TH, TL, Tu, TI, Tm, To; + TC = VMUL(LDK(KP866025403), VSUB(Ty, TB)); + TE = VMUL(LDK(KP866025403), VADD(TB, Ty)); + TH = VFNMS(LDK(KP866025403), TG, TF); + TL = VFMA(LDK(KP866025403), TG, TF); + Tu = VADD(Ts, Tt); + TI = VSUB(Ts, Tt); + Tm = VSUB(Tg, Tl); + To = VADD(Tg, Tl); + { + V TK, TM, Tv, TD; + TK = VFMA(LDK(KP866025403), TJ, TI); + TM = VFNMS(LDK(KP866025403), TJ, TI); + Tv = VSUB(Tr, Tu); + TD = VADD(Tr, Tu); + ST(&(xo[0]), VADD(Tn, To), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VSUB(Tn, To), ovs, &(xo[0])); + ST(&(xo[WS(os, 9)]), VFMAI(Tm, Tb), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VFNMSI(Tm, Tb), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VFMAI(TM, TL), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFNMSI(TM, TL), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VFNMSI(TK, TH), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFMAI(TK, TH), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 8)]), VFNMSI(TE, TD), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VFMAI(TE, TD), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFMAI(TC, Tv), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VFNMSI(TC, Tv), ovs, &(xo[0])); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 12, XSIMD_STRING("n1bv_12"), {30, 2, 18, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_12) (planner *p) { + X(kdft_register) (p, n1bv_12, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 12 -name n1bv_12 -include n1b.h */ + +/* + * This function contains 48 FP additions, 8 FP multiplications, + * (or, 44 additions, 4 multiplications, 4 fused multiply/add), + * 27 stack variables, 2 constants, and 24 memory accesses + */ +#include "n1b.h" + +static void n1bv_12(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) { + V T5, Ta, TG, TF, Ty, Tm, Ti, Tp, TJ, TI, Tx, Ts; + { + V T1, T6, T4, Tk, T9, Tl; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + { + V T2, T3, T7, T8; + T2 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T3 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T4 = VADD(T2, T3); + Tk = VSUB(T2, T3); + T7 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T9 = VADD(T7, T8); + Tl = VSUB(T7, T8); + } + T5 = VFNMS(LDK(KP500000000), T4, T1); + Ta = VFNMS(LDK(KP500000000), T9, T6); + TG = VADD(T6, T9); + TF = VADD(T1, T4); + Ty = VADD(Tk, Tl); + Tm = VMUL(LDK(KP866025403), VSUB(Tk, Tl)); + } + { + V Tn, Tq, Te, To, Th, Tr; + Tn = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tq = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + { + V Tc, Td, Tf, Tg; + Tc = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Td = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Te = VSUB(Tc, Td); + To = VADD(Tc, Td); + Tf = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tg = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Th = VSUB(Tf, Tg); + Tr = VADD(Tf, Tg); + } + Ti = VMUL(LDK(KP866025403), VSUB(Te, Th)); + Tp = VFNMS(LDK(KP500000000), To, Tn); + TJ = VADD(Tq, Tr); + TI = VADD(Tn, To); + Tx = VADD(Te, Th); + Ts = VFNMS(LDK(KP500000000), Tr, Tq); + } + { + V TH, TK, TL, TM; + TH = VSUB(TF, TG); + TK = VBYI(VSUB(TI, TJ)); + ST(&(xo[WS(os, 3)]), VSUB(TH, TK), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VADD(TH, TK), ovs, &(xo[WS(os, 1)])); + TL = VADD(TF, TG); + TM = VADD(TI, TJ); + ST(&(xo[WS(os, 6)]), VSUB(TL, TM), ovs, &(xo[0])); + ST(&(xo[0]), VADD(TL, TM), ovs, &(xo[0])); + } + { + V Tj, Tv, Tu, Tw, Tb, Tt; + Tb = VSUB(T5, Ta); + Tj = VSUB(Tb, Ti); + Tv = VADD(Tb, Ti); + Tt = VSUB(Tp, Ts); + Tu = VBYI(VADD(Tm, Tt)); + Tw = VBYI(VSUB(Tt, Tm)); + ST(&(xo[WS(os, 11)]), VSUB(Tj, Tu), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VADD(Tv, Tw), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VADD(Tj, Tu), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VSUB(Tv, Tw), ovs, &(xo[WS(os, 1)])); + } + { + V Tz, TD, TC, TE, TA, TB; + Tz = VBYI(VMUL(LDK(KP866025403), VSUB(Tx, Ty))); + TD = VBYI(VMUL(LDK(KP866025403), VADD(Ty, Tx))); + TA = VADD(T5, Ta); + TB = VADD(Tp, Ts); + TC = VSUB(TA, TB); + TE = VADD(TA, TB); + ST(&(xo[WS(os, 2)]), VADD(Tz, TC), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VSUB(TE, TD), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VSUB(TC, Tz), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VADD(TD, TE), ovs, &(xo[0])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 12, XSIMD_STRING("n1bv_12"), {44, 4, 4, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_12) (planner *p) { + X(kdft_register) (p, n1bv_12, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_128.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_128.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3527 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:53 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 128 -name n1bv_128 -include n1b.h */ + +/* + * This function contains 1082 FP additions, 642 FP multiplications, + * (or, 440 additions, 0 multiplications, 642 fused multiply/add), + * 295 stack variables, 31 constants, and 256 memory accesses + */ +#include "n1b.h" + +static void n1bv_128(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP903989293, +0.903989293123443331586200297230537048710132025); + DVK(KP941544065, +0.941544065183020778412509402599502357185589796); + DVK(KP357805721, +0.357805721314524104672487743774474392487532769); + DVK(KP472964775, +0.472964775891319928124438237972992463904131113); + DVK(KP857728610, +0.857728610000272069902269984284770137042490799); + DVK(KP970031253, +0.970031253194543992603984207286100251456865962); + DVK(KP250486960, +0.250486960191305461595702160124721208578685568); + DVK(KP998795456, +0.998795456205172392714771604759100694443203615); + DVK(KP740951125, +0.740951125354959091175616897495162729728955309); + DVK(KP599376933, +0.599376933681923766271389869014404232837890546); + DVK(KP906347169, +0.906347169019147157946142717268914412664134293); + DVK(KP049126849, +0.049126849769467254105343321271313617079695752); + DVK(KP989176509, +0.989176509964780973451673738016243063983689533); + DVK(KP803207531, +0.803207531480644909806676512963141923879569427); + DVK(KP741650546, +0.741650546272035369581266691172079863842265220); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP148335987, +0.148335987538347428753676511486911367000625355); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP303346683, +0.303346683607342391675883946941299872384187453); + DVK(KP534511135, +0.534511135950791641089685961295362908582039528); + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP820678790, +0.820678790828660330972281985331011598767386482); + DVK(KP098491403, +0.098491403357164253077197521291327432293052451); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(256, is), MAKE_VOLATILE_STRIDE(256, os)) { + V T6a, T5J, T6b, T5K, T6B, T6C, T6J, T6A, T6o, T6j, T6r, T68, T6d, T5O, T5R; + V T6e, T6D, T6K; + { + V Tad, TcZ, T6Z, T8T, T4U, Tr, Tfq, TgG, Ted, Tgf, Td0, Tcc, T9k, T84, Tb6; + V Tbt, Td8, TdK, TeK, Tgq, TeV, Tgt, T7q, T94, T3p, T5X, T7B, T97, T2G, T5U; + V TbD, Tc0, Tdf, TdN, Tf5, Tgx, Tfg, TgA, T7J, T9b, T4E, T64, T7U, T9e, T3V; + V T61, Td2, Td3, T85, T72, T4V, TI, Tcd, Tas, TgH, Tek, Tgg, Tft, T86, T75; + V T4W, TZ, TaI, Tcf, Tdo, TdG, Tgi, Tet, Tgj, Teq, T8X, T7a, T5M, T1B, T8W; + V T7d, T5N, T1s, TaX, Tcg, Tdr, TdH, Tgl, TeC, Tgm, Tez, T90, T7h, T5P, T2c; + V T8Z, T7k, T5Q, T23, T3Y, T49, TdL, Tdb, Tbu, Tbl, Tgu, TeR, Tgr, TeY, Tf6; + V TbG, T5V, T3s, T5Y, T3f, T95, T7E, T98, T7x, T4g, T4f, T4q, TbH, T41, TbI; + V T44, T4h, T4j, T4k, Tf9, TbN; + { + V Tu, TF, Ty, TL, TW, Tah, Tx, Tag, Tee, Tz, TM, TN, Teh, Tan, TP; + V TQ; + { + V TeG, T2A, Tbq, TeT, Tbp, TeH, T3m, T2x, Td6, T7o, T2q, T3l, T7z, Tbr, T2D; + V T82, T83; + { + V Ta7, T3, Ta8, T4O, Taa, Tab, Ta, T4P, Te, Tc6, Th, Tc7, Tl, Tc9, Tca; + V To; + { + V T1, T2, T4M, T4N; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 64)]), ivs, &(xi[0])); + T4M = LD(&(xi[WS(is, 32)]), ivs, &(xi[0])); + T4N = LD(&(xi[WS(is, 96)]), ivs, &(xi[0])); + { + V T4, T5, T7, T8; + T4 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 80)]), ivs, &(xi[0])); + T7 = LD(&(xi[WS(is, 112)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 48)]), ivs, &(xi[0])); + { + V Tc, T6, T9, Td, Tf, Tg; + Tc = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Ta7 = VADD(T1, T2); + T3 = VSUB(T1, T2); + Ta8 = VADD(T4M, T4N); + T4O = VSUB(T4M, T4N); + Taa = VADD(T4, T5); + T6 = VSUB(T4, T5); + Tab = VADD(T7, T8); + T9 = VSUB(T7, T8); + Td = LD(&(xi[WS(is, 72)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 40)]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 104)]), ivs, &(xi[0])); + { + V Tj, Tk, Tm, Tn; + Tj = LD(&(xi[WS(is, 120)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 56)]), ivs, &(xi[0])); + Tm = LD(&(xi[WS(is, 88)]), ivs, &(xi[0])); + Tn = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + Ta = VADD(T6, T9); + T4P = VSUB(T6, T9); + Te = VSUB(Tc, Td); + Tc6 = VADD(Tc, Td); + Th = VSUB(Tf, Tg); + Tc7 = VADD(Tf, Tg); + Tl = VSUB(Tj, Tk); + Tc9 = VADD(Tj, Tk); + Tca = VADD(Tn, Tm); + To = VSUB(Tm, Tn); + } + } + } + } + { + V T6X, Tb, Te9, Ta9, Tc8, Tea, T4R, Ti, Tfo, Tac, Tp, T4S, Tcb, Teb, T4Q; + T6X = VFNMS(LDK(KP707106781), Ta, T3); + Tb = VFMA(LDK(KP707106781), Ta, T3); + Te9 = VSUB(Ta7, Ta8); + Ta9 = VADD(Ta7, Ta8); + Tc8 = VADD(Tc6, Tc7); + Tea = VSUB(Tc6, Tc7); + T4R = VFMA(LDK(KP414213562), Te, Th); + Ti = VFNMS(LDK(KP414213562), Th, Te); + Tfo = VSUB(Taa, Tab); + Tac = VADD(Taa, Tab); + Tp = VFNMS(LDK(KP414213562), To, Tl); + T4S = VFMA(LDK(KP414213562), Tl, To); + Tcb = VADD(Tc9, Tca); + Teb = VSUB(Tc9, Tca); + T4Q = VFMA(LDK(KP707106781), T4P, T4O); + T82 = VFNMS(LDK(KP707106781), T4P, T4O); + { + V T4T, T6Y, Tq, Tfp, Tec; + T4T = VSUB(T4R, T4S); + T6Y = VADD(T4R, T4S); + T83 = VSUB(Ti, Tp); + Tq = VADD(Ti, Tp); + Tfp = VSUB(Tea, Teb); + Tec = VADD(Tea, Teb); + Tad = VSUB(Ta9, Tac); + TcZ = VADD(Ta9, Tac); + T6Z = VFMA(LDK(KP923879532), T6Y, T6X); + T8T = VFNMS(LDK(KP923879532), T6Y, T6X); + T4U = VFMA(LDK(KP923879532), T4T, T4Q); + T6a = VFNMS(LDK(KP923879532), T4T, T4Q); + Tr = VFMA(LDK(KP923879532), Tq, Tb); + T5J = VFNMS(LDK(KP923879532), Tq, Tb); + Tfq = VFMA(LDK(KP707106781), Tfp, Tfo); + TgG = VFNMS(LDK(KP707106781), Tfp, Tfo); + Ted = VFMA(LDK(KP707106781), Tec, Te9); + Tgf = VFNMS(LDK(KP707106781), Tec, Te9); + Td0 = VADD(Tc8, Tcb); + Tcc = VSUB(Tc8, Tcb); + } + } + } + { + V T2i, T3j, Tb2, T2B, Tb5, T3k, T2p, T2C; + { + V T2m, Tb0, Tb1, Tb3, T2l, T2n; + { + V T2g, T2h, T3h, T3i, T2j, T2k; + T2g = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T2h = LD(&(xi[WS(is, 65)]), ivs, &(xi[WS(is, 1)])); + T3h = LD(&(xi[WS(is, 33)]), ivs, &(xi[WS(is, 1)])); + T3i = LD(&(xi[WS(is, 97)]), ivs, &(xi[WS(is, 1)])); + T2j = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + T2k = LD(&(xi[WS(is, 81)]), ivs, &(xi[WS(is, 1)])); + T2m = LD(&(xi[WS(is, 113)]), ivs, &(xi[WS(is, 1)])); + T9k = VFMA(LDK(KP923879532), T83, T82); + T84 = VFNMS(LDK(KP923879532), T83, T82); + T2i = VSUB(T2g, T2h); + Tb0 = VADD(T2g, T2h); + T3j = VSUB(T3h, T3i); + Tb1 = VADD(T3h, T3i); + Tb3 = VADD(T2j, T2k); + T2l = VSUB(T2j, T2k); + T2n = LD(&(xi[WS(is, 49)]), ivs, &(xi[WS(is, 1)])); + } + { + V T2r, T2s, T2u, T2v; + T2r = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T2s = LD(&(xi[WS(is, 73)]), ivs, &(xi[WS(is, 1)])); + T2u = LD(&(xi[WS(is, 41)]), ivs, &(xi[WS(is, 1)])); + T2v = LD(&(xi[WS(is, 105)]), ivs, &(xi[WS(is, 1)])); + TeG = VSUB(Tb0, Tb1); + Tb2 = VADD(Tb0, Tb1); + { + V T2y, T2z, Tb4, T2o, Tbn, T2t, Tbo, T2w; + T2y = LD(&(xi[WS(is, 121)]), ivs, &(xi[WS(is, 1)])); + T2z = LD(&(xi[WS(is, 57)]), ivs, &(xi[WS(is, 1)])); + Tb4 = VADD(T2m, T2n); + T2o = VSUB(T2m, T2n); + Tbn = VADD(T2r, T2s); + T2t = VSUB(T2r, T2s); + Tbo = VADD(T2u, T2v); + T2w = VSUB(T2u, T2v); + T2B = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + T2A = VSUB(T2y, T2z); + Tbq = VADD(T2y, T2z); + TeT = VSUB(Tb3, Tb4); + Tb5 = VADD(Tb3, Tb4); + T3k = VSUB(T2l, T2o); + T2p = VADD(T2l, T2o); + Tbp = VADD(Tbn, Tbo); + TeH = VSUB(Tbn, Tbo); + T3m = VFMA(LDK(KP414213562), T2t, T2w); + T2x = VFNMS(LDK(KP414213562), T2w, T2t); + T2C = LD(&(xi[WS(is, 89)]), ivs, &(xi[WS(is, 1)])); + } + } + } + Td6 = VADD(Tb2, Tb5); + Tb6 = VSUB(Tb2, Tb5); + T7o = VFNMS(LDK(KP707106781), T2p, T2i); + T2q = VFMA(LDK(KP707106781), T2p, T2i); + T3l = VFMA(LDK(KP707106781), T3k, T3j); + T7z = VFNMS(LDK(KP707106781), T3k, T3j); + Tbr = VADD(T2B, T2C); + T2D = VSUB(T2B, T2C); + } + { + V Tf1, Tfe, Tf2, TbZ, T3M, T4B, Tdd, T3F, T7H, T4A, T7S, TbW, Tf3, T4C, T3T; + { + V T3x, T4y, Tbz, T3Q, TbC, T4z, T3E, T3R, T3P, TbU, TbV, T3S; + { + V T3y, T3z, T3B, T3C; + { + V T3v, T3w, T4w, T4x; + T3v = LD(&(xi[WS(is, 127)]), ivs, &(xi[WS(is, 1)])); + T3w = LD(&(xi[WS(is, 63)]), ivs, &(xi[WS(is, 1)])); + T4w = LD(&(xi[WS(is, 95)]), ivs, &(xi[WS(is, 1)])); + T4x = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + T3y = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + { + V Tbs, TeI, T3n, T2E, Tbx; + Tbs = VADD(Tbq, Tbr); + TeI = VSUB(Tbq, Tbr); + T3n = VFNMS(LDK(KP414213562), T2A, T2D); + T2E = VFMA(LDK(KP414213562), T2D, T2A); + T3x = VSUB(T3v, T3w); + Tbx = VADD(T3v, T3w); + { + V Tby, Td7, TeJ, TeU; + T4y = VSUB(T4w, T4x); + Tby = VADD(T4x, T4w); + Td7 = VADD(Tbp, Tbs); + Tbt = VSUB(Tbp, Tbs); + TeJ = VADD(TeH, TeI); + TeU = VSUB(TeH, TeI); + { + V T7p, T3o, T7A, T2F; + T7p = VSUB(T3m, T3n); + T3o = VADD(T3m, T3n); + T7A = VSUB(T2x, T2E); + T2F = VADD(T2x, T2E); + Tbz = VADD(Tbx, Tby); + Tf1 = VSUB(Tbx, Tby); + Td8 = VADD(Td6, Td7); + TdK = VSUB(Td6, Td7); + TeK = VFMA(LDK(KP707106781), TeJ, TeG); + Tgq = VFNMS(LDK(KP707106781), TeJ, TeG); + TeV = VFMA(LDK(KP707106781), TeU, TeT); + Tgt = VFNMS(LDK(KP707106781), TeU, TeT); + T7q = VFMA(LDK(KP923879532), T7p, T7o); + T94 = VFNMS(LDK(KP923879532), T7p, T7o); + T3p = VFMA(LDK(KP923879532), T3o, T3l); + T5X = VFNMS(LDK(KP923879532), T3o, T3l); + T7B = VFNMS(LDK(KP923879532), T7A, T7z); + T97 = VFMA(LDK(KP923879532), T7A, T7z); + T2G = VFMA(LDK(KP923879532), T2F, T2q); + T5U = VFNMS(LDK(KP923879532), T2F, T2q); + T3z = LD(&(xi[WS(is, 79)]), ivs, &(xi[WS(is, 1)])); + } + } + } + T3B = LD(&(xi[WS(is, 111)]), ivs, &(xi[WS(is, 1)])); + T3C = LD(&(xi[WS(is, 47)]), ivs, &(xi[WS(is, 1)])); + } + { + V T3G, T3H, T3J, T3K; + T3G = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T3H = LD(&(xi[WS(is, 71)]), ivs, &(xi[WS(is, 1)])); + T3J = LD(&(xi[WS(is, 103)]), ivs, &(xi[WS(is, 1)])); + T3K = LD(&(xi[WS(is, 39)]), ivs, &(xi[WS(is, 1)])); + { + V T3N, T3A, TbA, T3D, TbB, T3I, TbX, T3L, TbY, T3O; + T3N = LD(&(xi[WS(is, 119)]), ivs, &(xi[WS(is, 1)])); + T3A = VSUB(T3y, T3z); + TbA = VADD(T3y, T3z); + T3D = VSUB(T3B, T3C); + TbB = VADD(T3B, T3C); + T3I = VSUB(T3G, T3H); + TbX = VADD(T3G, T3H); + T3L = VSUB(T3J, T3K); + TbY = VADD(T3K, T3J); + T3O = LD(&(xi[WS(is, 55)]), ivs, &(xi[WS(is, 1)])); + T3Q = LD(&(xi[WS(is, 87)]), ivs, &(xi[WS(is, 1)])); + Tfe = VSUB(TbB, TbA); + TbC = VADD(TbA, TbB); + T4z = VSUB(T3D, T3A); + T3E = VADD(T3A, T3D); + T3R = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + Tf2 = VSUB(TbX, TbY); + TbZ = VADD(TbX, TbY); + T3M = VFMA(LDK(KP414213562), T3L, T3I); + T4B = VFNMS(LDK(KP414213562), T3I, T3L); + T3P = VSUB(T3N, T3O); + TbU = VADD(T3N, T3O); + } + } + } + Tdd = VADD(Tbz, TbC); + TbD = VSUB(Tbz, TbC); + TbV = VADD(T3R, T3Q); + T3S = VSUB(T3Q, T3R); + T3F = VFMA(LDK(KP707106781), T3E, T3x); + T7H = VFNMS(LDK(KP707106781), T3E, T3x); + T4A = VFMA(LDK(KP707106781), T4z, T4y); + T7S = VFNMS(LDK(KP707106781), T4z, T4y); + TbW = VADD(TbU, TbV); + Tf3 = VSUB(TbU, TbV); + T4C = VFMA(LDK(KP414213562), T3P, T3S); + T3T = VFNMS(LDK(KP414213562), T3S, T3P); + } + { + V TD, Tae, TE, TJ, TK, TU, TV; + { + V Ts, Tt, Tde, Tf4, Tff; + Ts = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tt = LD(&(xi[WS(is, 68)]), ivs, &(xi[0])); + TD = LD(&(xi[WS(is, 36)]), ivs, &(xi[0])); + Tde = VADD(TbZ, TbW); + Tc0 = VSUB(TbW, TbZ); + Tf4 = VADD(Tf2, Tf3); + Tff = VSUB(Tf3, Tf2); + { + V T7I, T4D, T7T, T3U; + T7I = VSUB(T4C, T4B); + T4D = VADD(T4B, T4C); + T7T = VSUB(T3T, T3M); + T3U = VADD(T3M, T3T); + Tae = VADD(Ts, Tt); + Tu = VSUB(Ts, Tt); + Tdf = VADD(Tdd, Tde); + TdN = VSUB(Tdd, Tde); + Tf5 = VFMA(LDK(KP707106781), Tf4, Tf1); + Tgx = VFNMS(LDK(KP707106781), Tf4, Tf1); + Tfg = VFMA(LDK(KP707106781), Tff, Tfe); + TgA = VFNMS(LDK(KP707106781), Tff, Tfe); + T7J = VFMA(LDK(KP923879532), T7I, T7H); + T9b = VFNMS(LDK(KP923879532), T7I, T7H); + T4E = VFMA(LDK(KP923879532), T4D, T4A); + T64 = VFNMS(LDK(KP923879532), T4D, T4A); + T7U = VFNMS(LDK(KP923879532), T7T, T7S); + T9e = VFMA(LDK(KP923879532), T7T, T7S); + T3V = VFMA(LDK(KP923879532), T3U, T3F); + T61 = VFNMS(LDK(KP923879532), T3U, T3F); + TE = LD(&(xi[WS(is, 100)]), ivs, &(xi[0])); + } + } + TJ = LD(&(xi[WS(is, 124)]), ivs, &(xi[0])); + TK = LD(&(xi[WS(is, 60)]), ivs, &(xi[0])); + TU = LD(&(xi[WS(is, 92)]), ivs, &(xi[0])); + TV = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + { + V Tal, Tam, Tv, Tw, Taf; + Tv = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + Tw = LD(&(xi[WS(is, 84)]), ivs, &(xi[0])); + Taf = VADD(TD, TE); + TF = VSUB(TD, TE); + Ty = LD(&(xi[WS(is, 116)]), ivs, &(xi[0])); + TL = VSUB(TJ, TK); + Tal = VADD(TJ, TK); + TW = VSUB(TU, TV); + Tam = VADD(TV, TU); + Tah = VADD(Tv, Tw); + Tx = VSUB(Tv, Tw); + Tag = VADD(Tae, Taf); + Tee = VSUB(Tae, Taf); + Tz = LD(&(xi[WS(is, 52)]), ivs, &(xi[0])); + TM = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + TN = LD(&(xi[WS(is, 76)]), ivs, &(xi[0])); + Teh = VSUB(Tal, Tam); + Tan = VADD(Tal, Tam); + TP = LD(&(xi[WS(is, 108)]), ivs, &(xi[0])); + TQ = LD(&(xi[WS(is, 44)]), ivs, &(xi[0])); + } + } + } + } + { + V Tev, TeA, Tdp, TaP, Tew, TaV, T1U, T29, T7f, T1N, T28, T7i, Tex, TaS, T21; + V T2a; + { + V Tem, Ter, Ten, TaD, T1j, T1y, TaA, Tdm, T1c, T78, T7b, T1x, TaG, Teo, T1z; + V T1q; + { + V T14, T1v, Taw, Taz, T1b, T1w, T1n, T1o, T1m, TaE, TaF, T1p; + { + V Tau, Tav, T15, T16, T18, T19; + { + V T12, Tai, TA, Tao, TO, T13; + T12 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tai = VADD(Ty, Tz); + TA = VSUB(Ty, Tz); + Tao = VADD(TM, TN); + TO = VSUB(TM, TN); + T13 = LD(&(xi[WS(is, 66)]), ivs, &(xi[0])); + { + V T1t, Tap, TR, Taj, Tef, TG, TB, T1u; + T1t = LD(&(xi[WS(is, 34)]), ivs, &(xi[0])); + Tap = VADD(TP, TQ); + TR = VSUB(TP, TQ); + Taj = VADD(Tah, Tai); + Tef = VSUB(Tah, Tai); + TG = VSUB(Tx, TA); + TB = VADD(Tx, TA); + Tau = VADD(T12, T13); + T14 = VSUB(T12, T13); + T1u = LD(&(xi[WS(is, 98)]), ivs, &(xi[0])); + { + V Taq, Tei, TX, TS, Tak; + Taq = VADD(Tao, Tap); + Tei = VSUB(Tap, Tao); + TX = VSUB(TR, TO); + TS = VADD(TO, TR); + Tak = VSUB(Tag, Taj); + Td2 = VADD(Tag, Taj); + { + V Teg, Tfr, T71, TH; + Teg = VFNMS(LDK(KP414213562), Tef, Tee); + Tfr = VFMA(LDK(KP414213562), Tee, Tef); + T71 = VFNMS(LDK(KP707106781), TG, TF); + TH = VFMA(LDK(KP707106781), TG, TF); + { + V T70, TC, Tar, Tej, Tfs; + T70 = VFNMS(LDK(KP707106781), TB, Tu); + TC = VFMA(LDK(KP707106781), TB, Tu); + Tar = VSUB(Tan, Taq); + Td3 = VADD(Tan, Taq); + Tej = VFNMS(LDK(KP414213562), Tei, Teh); + Tfs = VFMA(LDK(KP414213562), Teh, Tei); + { + V T74, TY, T73, TT; + T74 = VFNMS(LDK(KP707106781), TX, TW); + TY = VFMA(LDK(KP707106781), TX, TW); + T73 = VFNMS(LDK(KP707106781), TS, TL); + TT = VFMA(LDK(KP707106781), TS, TL); + T85 = VFNMS(LDK(KP668178637), T70, T71); + T72 = VFMA(LDK(KP668178637), T71, T70); + T4V = VFMA(LDK(KP198912367), TC, TH); + TI = VFNMS(LDK(KP198912367), TH, TC); + Tcd = VSUB(Tak, Tar); + Tas = VADD(Tak, Tar); + TgH = VSUB(Teg, Tej); + Tek = VADD(Teg, Tej); + Tgg = VADD(Tfr, Tfs); + Tft = VSUB(Tfr, Tfs); + T86 = VFNMS(LDK(KP668178637), T73, T74); + T75 = VFMA(LDK(KP668178637), T74, T73); + T4W = VFMA(LDK(KP198912367), TT, TY); + TZ = VFNMS(LDK(KP198912367), TY, TT); + Tav = VADD(T1t, T1u); + T1v = VSUB(T1t, T1u); + } + } + } + } + } + } + T15 = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + T16 = LD(&(xi[WS(is, 82)]), ivs, &(xi[0])); + T18 = LD(&(xi[WS(is, 114)]), ivs, &(xi[0])); + T19 = LD(&(xi[WS(is, 50)]), ivs, &(xi[0])); + { + V T1d, T1e, T1g, T1h, Tax, T17, Tay, T1a; + T1d = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Taw = VADD(Tau, Tav); + Tem = VSUB(Tau, Tav); + T1e = LD(&(xi[WS(is, 74)]), ivs, &(xi[0])); + T1g = LD(&(xi[WS(is, 42)]), ivs, &(xi[0])); + T1h = LD(&(xi[WS(is, 106)]), ivs, &(xi[0])); + Tax = VADD(T15, T16); + T17 = VSUB(T15, T16); + Tay = VADD(T18, T19); + T1a = VSUB(T18, T19); + { + V T1k, T1f, TaB, T1i, TaC, T1l; + T1k = LD(&(xi[WS(is, 122)]), ivs, &(xi[0])); + T1f = VSUB(T1d, T1e); + TaB = VADD(T1d, T1e); + T1i = VSUB(T1g, T1h); + TaC = VADD(T1g, T1h); + T1l = LD(&(xi[WS(is, 58)]), ivs, &(xi[0])); + Taz = VADD(Tax, Tay); + Ter = VSUB(Tax, Tay); + T1b = VADD(T17, T1a); + T1w = VSUB(T17, T1a); + T1n = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + T1o = LD(&(xi[WS(is, 90)]), ivs, &(xi[0])); + Ten = VSUB(TaB, TaC); + TaD = VADD(TaB, TaC); + T1j = VFNMS(LDK(KP414213562), T1i, T1f); + T1y = VFMA(LDK(KP414213562), T1f, T1i); + T1m = VSUB(T1k, T1l); + TaE = VADD(T1k, T1l); + } + } + } + TaA = VSUB(Taw, Taz); + Tdm = VADD(Taw, Taz); + TaF = VADD(T1n, T1o); + T1p = VSUB(T1n, T1o); + T1c = VFMA(LDK(KP707106781), T1b, T14); + T78 = VFNMS(LDK(KP707106781), T1b, T14); + T7b = VFNMS(LDK(KP707106781), T1w, T1v); + T1x = VFMA(LDK(KP707106781), T1w, T1v); + TaG = VADD(TaE, TaF); + Teo = VSUB(TaE, TaF); + T1z = VFNMS(LDK(KP414213562), T1m, T1p); + T1q = VFMA(LDK(KP414213562), T1p, T1m); + } + { + V T1F, T26, T1Q, TaT, TaL, TaO, T27, T1M, T1Y, T1Z, TaU, T1T, TaQ, T1X, T20; + V TaR; + { + V T24, TaJ, T25, T1G, T1H, T1J, T1K, T1D, T1E; + T1D = LD(&(xi[WS(is, 126)]), ivs, &(xi[0])); + T1E = LD(&(xi[WS(is, 62)]), ivs, &(xi[0])); + T24 = LD(&(xi[WS(is, 94)]), ivs, &(xi[0])); + { + V TaH, Tdn, Tes, Tep; + TaH = VSUB(TaD, TaG); + Tdn = VADD(TaD, TaG); + Tes = VSUB(Ten, Teo); + Tep = VADD(Ten, Teo); + { + V T79, T1A, T7c, T1r; + T79 = VSUB(T1y, T1z); + T1A = VADD(T1y, T1z); + T7c = VSUB(T1j, T1q); + T1r = VADD(T1j, T1q); + TaJ = VADD(T1D, T1E); + T1F = VSUB(T1D, T1E); + TaI = VFNMS(LDK(KP414213562), TaH, TaA); + Tcf = VFMA(LDK(KP414213562), TaA, TaH); + Tdo = VADD(Tdm, Tdn); + TdG = VSUB(Tdm, Tdn); + Tgi = VFNMS(LDK(KP707106781), Tes, Ter); + Tet = VFMA(LDK(KP707106781), Tes, Ter); + Tgj = VFNMS(LDK(KP707106781), Tep, Tem); + Teq = VFMA(LDK(KP707106781), Tep, Tem); + T8X = VFNMS(LDK(KP923879532), T79, T78); + T7a = VFMA(LDK(KP923879532), T79, T78); + T5M = VFNMS(LDK(KP923879532), T1A, T1x); + T1B = VFMA(LDK(KP923879532), T1A, T1x); + T8W = VFMA(LDK(KP923879532), T7c, T7b); + T7d = VFNMS(LDK(KP923879532), T7c, T7b); + T5N = VFNMS(LDK(KP923879532), T1r, T1c); + T1s = VFMA(LDK(KP923879532), T1r, T1c); + T25 = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + } + } + T1G = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + T1H = LD(&(xi[WS(is, 78)]), ivs, &(xi[0])); + T1J = LD(&(xi[WS(is, 110)]), ivs, &(xi[0])); + T1K = LD(&(xi[WS(is, 46)]), ivs, &(xi[0])); + { + V T1R, T1I, TaM, T1L, TaN, T1S, T1O, T1P, TaK, T1V, T1W; + T1O = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T1P = LD(&(xi[WS(is, 70)]), ivs, &(xi[0])); + T26 = VSUB(T24, T25); + TaK = VADD(T25, T24); + T1R = LD(&(xi[WS(is, 102)]), ivs, &(xi[0])); + T1I = VSUB(T1G, T1H); + TaM = VADD(T1G, T1H); + T1L = VSUB(T1J, T1K); + TaN = VADD(T1J, T1K); + T1Q = VSUB(T1O, T1P); + TaT = VADD(T1O, T1P); + Tev = VSUB(TaJ, TaK); + TaL = VADD(TaJ, TaK); + T1S = LD(&(xi[WS(is, 38)]), ivs, &(xi[0])); + T1V = LD(&(xi[WS(is, 118)]), ivs, &(xi[0])); + T1W = LD(&(xi[WS(is, 54)]), ivs, &(xi[0])); + TeA = VSUB(TaN, TaM); + TaO = VADD(TaM, TaN); + T27 = VSUB(T1L, T1I); + T1M = VADD(T1I, T1L); + T1Y = LD(&(xi[WS(is, 86)]), ivs, &(xi[0])); + T1Z = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + TaU = VADD(T1S, T1R); + T1T = VSUB(T1R, T1S); + TaQ = VADD(T1V, T1W); + T1X = VSUB(T1V, T1W); + } + } + Tdp = VADD(TaL, TaO); + TaP = VSUB(TaL, TaO); + T20 = VSUB(T1Y, T1Z); + TaR = VADD(T1Z, T1Y); + Tew = VSUB(TaT, TaU); + TaV = VADD(TaT, TaU); + T1U = VFMA(LDK(KP414213562), T1T, T1Q); + T29 = VFNMS(LDK(KP414213562), T1Q, T1T); + T7f = VFNMS(LDK(KP707106781), T1M, T1F); + T1N = VFMA(LDK(KP707106781), T1M, T1F); + T28 = VFMA(LDK(KP707106781), T27, T26); + T7i = VFNMS(LDK(KP707106781), T27, T26); + Tex = VSUB(TaQ, TaR); + TaS = VADD(TaQ, TaR); + T21 = VFNMS(LDK(KP414213562), T20, T1X); + T2a = VFMA(LDK(KP414213562), T1X, T20); + } + } + { + V T2J, T2U, T30, T3b, TeL, Tb9, TeO, Tbg, T2M, Tba, T2P, Tbb, T34, Tbh, T33; + V T35; + { + V T2H, T2I, T2S, T2T, T2Y, T2Z, T39, T3a; + T2H = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + { + V Tdq, TaW, Tey, TeB; + Tdq = VADD(TaV, TaS); + TaW = VSUB(TaS, TaV); + Tey = VADD(Tew, Tex); + TeB = VSUB(Tex, Tew); + { + V T2b, T7g, T22, T7j; + T2b = VADD(T29, T2a); + T7g = VSUB(T2a, T29); + T22 = VADD(T1U, T21); + T7j = VSUB(T21, T1U); + TaX = VFNMS(LDK(KP414213562), TaW, TaP); + Tcg = VFMA(LDK(KP414213562), TaP, TaW); + Tdr = VADD(Tdp, Tdq); + TdH = VSUB(Tdp, Tdq); + Tgl = VFNMS(LDK(KP707106781), TeB, TeA); + TeC = VFMA(LDK(KP707106781), TeB, TeA); + Tgm = VFNMS(LDK(KP707106781), Tey, Tev); + Tez = VFMA(LDK(KP707106781), Tey, Tev); + T90 = VFNMS(LDK(KP923879532), T7g, T7f); + T7h = VFMA(LDK(KP923879532), T7g, T7f); + T5P = VFNMS(LDK(KP923879532), T2b, T28); + T2c = VFMA(LDK(KP923879532), T2b, T28); + T8Z = VFMA(LDK(KP923879532), T7j, T7i); + T7k = VFNMS(LDK(KP923879532), T7j, T7i); + T5Q = VFNMS(LDK(KP923879532), T22, T1N); + T23 = VFMA(LDK(KP923879532), T22, T1N); + T2I = LD(&(xi[WS(is, 69)]), ivs, &(xi[WS(is, 1)])); + } + } + T2S = LD(&(xi[WS(is, 37)]), ivs, &(xi[WS(is, 1)])); + T2T = LD(&(xi[WS(is, 101)]), ivs, &(xi[WS(is, 1)])); + T2Y = LD(&(xi[WS(is, 125)]), ivs, &(xi[WS(is, 1)])); + T2Z = LD(&(xi[WS(is, 61)]), ivs, &(xi[WS(is, 1)])); + T39 = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + T3a = LD(&(xi[WS(is, 93)]), ivs, &(xi[WS(is, 1)])); + { + V T2K, Tbe, Tbf, T2L, T2N, T2O, Tb7, Tb8, T31, T32; + T2K = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + T2J = VSUB(T2H, T2I); + Tb7 = VADD(T2H, T2I); + T2U = VSUB(T2S, T2T); + Tb8 = VADD(T2S, T2T); + T30 = VSUB(T2Y, T2Z); + Tbe = VADD(T2Y, T2Z); + T3b = VSUB(T39, T3a); + Tbf = VADD(T39, T3a); + T2L = LD(&(xi[WS(is, 85)]), ivs, &(xi[WS(is, 1)])); + T2N = LD(&(xi[WS(is, 117)]), ivs, &(xi[WS(is, 1)])); + T2O = LD(&(xi[WS(is, 53)]), ivs, &(xi[WS(is, 1)])); + TeL = VSUB(Tb7, Tb8); + Tb9 = VADD(Tb7, Tb8); + T31 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T32 = LD(&(xi[WS(is, 77)]), ivs, &(xi[WS(is, 1)])); + TeO = VSUB(Tbe, Tbf); + Tbg = VADD(Tbe, Tbf); + T2M = VSUB(T2K, T2L); + Tba = VADD(T2K, T2L); + T2P = VSUB(T2N, T2O); + Tbb = VADD(T2N, T2O); + T34 = LD(&(xi[WS(is, 109)]), ivs, &(xi[WS(is, 1)])); + Tbh = VADD(T31, T32); + T33 = VSUB(T31, T32); + T35 = LD(&(xi[WS(is, 45)]), ivs, &(xi[WS(is, 1)])); + } + } + { + V T4d, T4e, T4o, T4p; + { + V T2X, T3q, T7t, T7C, T3r, T3e, T7D, T7w; + { + V T47, TbE, Tbd, Td9, TeW, TeN, T7s, T2W, T7r, T2R, TeP, Tbj, T37, T3c, T48; + { + V T3W, T3X, TeM, Tbc, T2Q, T2V, Tbi, T36; + T3W = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T3X = LD(&(xi[WS(is, 67)]), ivs, &(xi[WS(is, 1)])); + TeM = VSUB(Tba, Tbb); + Tbc = VADD(Tba, Tbb); + T2Q = VADD(T2M, T2P); + T2V = VSUB(T2M, T2P); + T47 = LD(&(xi[WS(is, 99)]), ivs, &(xi[WS(is, 1)])); + Tbi = VADD(T34, T35); + T36 = VSUB(T34, T35); + TbE = VADD(T3W, T3X); + T3Y = VSUB(T3W, T3X); + Tbd = VSUB(Tb9, Tbc); + Td9 = VADD(Tb9, Tbc); + TeW = VFMA(LDK(KP414213562), TeL, TeM); + TeN = VFNMS(LDK(KP414213562), TeM, TeL); + T7s = VFNMS(LDK(KP707106781), T2V, T2U); + T2W = VFMA(LDK(KP707106781), T2V, T2U); + T7r = VFNMS(LDK(KP707106781), T2Q, T2J); + T2R = VFMA(LDK(KP707106781), T2Q, T2J); + TeP = VSUB(Tbh, Tbi); + Tbj = VADD(Tbh, Tbi); + T37 = VADD(T33, T36); + T3c = VSUB(T33, T36); + T48 = LD(&(xi[WS(is, 35)]), ivs, &(xi[WS(is, 1)])); + } + T2X = VFNMS(LDK(KP198912367), T2W, T2R); + T3q = VFMA(LDK(KP198912367), T2R, T2W); + T7t = VFMA(LDK(KP668178637), T7s, T7r); + T7C = VFNMS(LDK(KP668178637), T7r, T7s); + { + V Tbk, Tda, TeX, TeQ; + Tbk = VSUB(Tbg, Tbj); + Tda = VADD(Tbg, Tbj); + TeX = VFNMS(LDK(KP414213562), TeO, TeP); + TeQ = VFMA(LDK(KP414213562), TeP, TeO); + { + V T7v, T3d, T7u, T38, TbF; + T7v = VFNMS(LDK(KP707106781), T3c, T3b); + T3d = VFMA(LDK(KP707106781), T3c, T3b); + T7u = VFNMS(LDK(KP707106781), T37, T30); + T38 = VFMA(LDK(KP707106781), T37, T30); + T49 = VSUB(T47, T48); + TbF = VADD(T48, T47); + TdL = VSUB(Td9, Tda); + Tdb = VADD(Td9, Tda); + Tbu = VSUB(Tbd, Tbk); + Tbl = VADD(Tbd, Tbk); + Tgu = VSUB(TeN, TeQ); + TeR = VADD(TeN, TeQ); + Tgr = VSUB(TeW, TeX); + TeY = VADD(TeW, TeX); + T3r = VFNMS(LDK(KP198912367), T38, T3d); + T3e = VFMA(LDK(KP198912367), T3d, T38); + T7D = VFMA(LDK(KP668178637), T7u, T7v); + T7w = VFNMS(LDK(KP668178637), T7v, T7u); + Tf6 = VSUB(TbE, TbF); + TbG = VADD(TbE, TbF); + } + } + } + T4d = LD(&(xi[WS(is, 123)]), ivs, &(xi[WS(is, 1)])); + T5V = VSUB(T3q, T3r); + T3s = VADD(T3q, T3r); + T5Y = VSUB(T2X, T3e); + T3f = VADD(T2X, T3e); + T95 = VSUB(T7D, T7C); + T7E = VADD(T7C, T7D); + T98 = VSUB(T7t, T7w); + T7x = VADD(T7t, T7w); + T4e = LD(&(xi[WS(is, 59)]), ivs, &(xi[WS(is, 1)])); + T4o = LD(&(xi[WS(is, 91)]), ivs, &(xi[WS(is, 1)])); + T4p = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + } + { + V T3Z, T40, T42, T43, TbL, TbM; + T3Z = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + T40 = LD(&(xi[WS(is, 83)]), ivs, &(xi[WS(is, 1)])); + T42 = LD(&(xi[WS(is, 115)]), ivs, &(xi[WS(is, 1)])); + T43 = LD(&(xi[WS(is, 51)]), ivs, &(xi[WS(is, 1)])); + T4g = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + T4f = VSUB(T4d, T4e); + TbL = VADD(T4d, T4e); + T4q = VSUB(T4o, T4p); + TbM = VADD(T4p, T4o); + TbH = VADD(T3Z, T40); + T41 = VSUB(T3Z, T40); + TbI = VADD(T42, T43); + T44 = VSUB(T42, T43); + T4h = LD(&(xi[WS(is, 75)]), ivs, &(xi[WS(is, 1)])); + T4j = LD(&(xi[WS(is, 107)]), ivs, &(xi[WS(is, 1)])); + T4k = LD(&(xi[WS(is, 43)]), ivs, &(xi[WS(is, 1)])); + Tf9 = VSUB(TbL, TbM); + TbN = VADD(TbL, TbM); + } + } + } + } + } + { + V TgB, Tgy, T62, T4H, T65, T4u, T9c, T7X, T9f, T7Q, Tg0, Tga, TfF, TeF, TfT; + V TfU, TfP, Tg7, TfI, Tfy, Tfz, Tf0, TfA, Tfl, Tg1, TfS; + { + V Tc1, TbS, Tfc, Tfj, TdX, Te5, TdZ, TdR, Te7, Te3, TdU, Te4; + { + V TdF, TdS, Tdx, Td5, TdO, TdE, TdC, Tdt, Tdk; + { + V Tdc, TdA, T4F, T4c, T7V, T7M, T4G, T4t, T7W, T7P, TdB, Tdj; + { + V Td1, Tdg, TbK, Tf8, Tfh, T4b, T7L, T46, T7K, TbQ, Tfa, T4r, T4m, Td4; + TdF = VSUB(TcZ, Td0); + Td1 = VADD(TcZ, Td0); + { + V TbJ, Tf7, T4a, T45; + TbJ = VADD(TbH, TbI); + Tf7 = VSUB(TbI, TbH); + T4a = VSUB(T44, T41); + T45 = VADD(T41, T44); + { + V TbO, T4i, TbP, T4l; + TbO = VADD(T4g, T4h); + T4i = VSUB(T4g, T4h); + TbP = VADD(T4j, T4k); + T4l = VSUB(T4j, T4k); + Tdg = VADD(TbG, TbJ); + TbK = VSUB(TbG, TbJ); + Tf8 = VFMA(LDK(KP414213562), Tf7, Tf6); + Tfh = VFNMS(LDK(KP414213562), Tf6, Tf7); + T4b = VFMA(LDK(KP707106781), T4a, T49); + T7L = VFNMS(LDK(KP707106781), T4a, T49); + T46 = VFMA(LDK(KP707106781), T45, T3Y); + T7K = VFNMS(LDK(KP707106781), T45, T3Y); + TbQ = VADD(TbO, TbP); + Tfa = VSUB(TbP, TbO); + T4r = VSUB(T4l, T4i); + T4m = VADD(T4i, T4l); + Td4 = VADD(Td2, Td3); + TdS = VSUB(Td2, Td3); + } + } + Tdc = VSUB(Td8, Tdb); + TdA = VADD(Td8, Tdb); + T4F = VFNMS(LDK(KP198912367), T46, T4b); + T4c = VFMA(LDK(KP198912367), T4b, T46); + T7V = VFMA(LDK(KP668178637), T7K, T7L); + T7M = VFNMS(LDK(KP668178637), T7L, T7K); + { + V Tdh, TbR, Tfb, Tfi; + Tdh = VADD(TbN, TbQ); + TbR = VSUB(TbN, TbQ); + Tfb = VFNMS(LDK(KP414213562), Tfa, Tf9); + Tfi = VFMA(LDK(KP414213562), Tf9, Tfa); + { + V T4s, T7O, T4n, T7N, Tdi; + T4s = VFMA(LDK(KP707106781), T4r, T4q); + T7O = VFNMS(LDK(KP707106781), T4r, T4q); + T4n = VFMA(LDK(KP707106781), T4m, T4f); + T7N = VFNMS(LDK(KP707106781), T4m, T4f); + Tdx = VADD(Td1, Td4); + Td5 = VSUB(Td1, Td4); + TdO = VSUB(Tdh, Tdg); + Tdi = VADD(Tdg, Tdh); + Tc1 = VSUB(TbR, TbK); + TbS = VADD(TbK, TbR); + TgB = VSUB(Tfb, Tf8); + Tfc = VADD(Tf8, Tfb); + Tgy = VSUB(Tfi, Tfh); + Tfj = VADD(Tfh, Tfi); + T4G = VFMA(LDK(KP198912367), T4n, T4s); + T4t = VFNMS(LDK(KP198912367), T4s, T4n); + T7W = VFNMS(LDK(KP668178637), T7N, T7O); + T7P = VFMA(LDK(KP668178637), T7O, T7N); + TdB = VADD(Tdf, Tdi); + Tdj = VSUB(Tdf, Tdi); + } + } + } + T62 = VSUB(T4G, T4F); + T4H = VADD(T4F, T4G); + T65 = VSUB(T4t, T4c); + T4u = VADD(T4c, T4t); + T9c = VSUB(T7V, T7W); + T7X = VADD(T7V, T7W); + T9f = VSUB(T7P, T7M); + T7Q = VADD(T7M, T7P); + TdE = VADD(TdA, TdB); + TdC = VSUB(TdA, TdB); + Tdt = VSUB(Tdc, Tdj); + Tdk = VADD(Tdc, Tdj); + } + { + V TdT, Tdl, Tdv, TdJ, Te1, Te2, TdQ, Tdz, TdD, Tdu, Tdw; + { + V TdI, TdM, TdV, TdW, TdP, Tds, Tdy; + TdI = VADD(TdG, TdH); + TdT = VSUB(TdG, TdH); + TdM = VFNMS(LDK(KP414213562), TdL, TdK); + TdV = VFMA(LDK(KP414213562), TdK, TdL); + TdW = VFMA(LDK(KP414213562), TdN, TdO); + TdP = VFNMS(LDK(KP414213562), TdO, TdN); + Tdl = VFNMS(LDK(KP707106781), Tdk, Td5); + Tdv = VFMA(LDK(KP707106781), Tdk, Td5); + Tds = VSUB(Tdo, Tdr); + Tdy = VADD(Tdo, Tdr); + TdJ = VFMA(LDK(KP707106781), TdI, TdF); + Te1 = VFNMS(LDK(KP707106781), TdI, TdF); + TdX = VSUB(TdV, TdW); + Te2 = VADD(TdV, TdW); + Te5 = VSUB(TdM, TdP); + TdQ = VADD(TdM, TdP); + Tdz = VSUB(Tdx, Tdy); + TdD = VADD(Tdx, Tdy); + Tdu = VFNMS(LDK(KP707106781), Tdt, Tds); + Tdw = VFMA(LDK(KP707106781), Tdt, Tds); + } + TdZ = VFMA(LDK(KP923879532), TdQ, TdJ); + TdR = VFNMS(LDK(KP923879532), TdQ, TdJ); + Te7 = VFMA(LDK(KP923879532), Te2, Te1); + Te3 = VFNMS(LDK(KP923879532), Te2, Te1); + ST(&(xo[0]), VADD(TdD, TdE), ovs, &(xo[0])); + ST(&(xo[WS(os, 64)]), VSUB(TdD, TdE), ovs, &(xo[0])); + ST(&(xo[WS(os, 32)]), VFMAI(TdC, Tdz), ovs, &(xo[0])); + ST(&(xo[WS(os, 96)]), VFNMSI(TdC, Tdz), ovs, &(xo[0])); + ST(&(xo[WS(os, 112)]), VFNMSI(Tdw, Tdv), ovs, &(xo[0])); + ST(&(xo[WS(os, 16)]), VFMAI(Tdw, Tdv), ovs, &(xo[0])); + ST(&(xo[WS(os, 80)]), VFMAI(Tdu, Tdl), ovs, &(xo[0])); + ST(&(xo[WS(os, 48)]), VFNMSI(Tdu, Tdl), ovs, &(xo[0])); + TdU = VFMA(LDK(KP707106781), TdT, TdS); + Te4 = VFNMS(LDK(KP707106781), TdT, TdS); + } + } + { + V Tcx, TcJ, TcI, Tcy, TcA, Tbm, Tcp, TaZ, Tcs, Tci, Tbv, TcB, TcD, TbT, Tc2; + V TcE, Tat, TaY; + Tcx = VFNMS(LDK(KP707106781), Tas, Tad); + Tat = VFMA(LDK(KP707106781), Tas, Tad); + TaY = VADD(TaI, TaX); + TcJ = VSUB(TaI, TaX); + { + V Tce, Tch, Te8, Te6, TdY, Te0; + TcI = VFNMS(LDK(KP707106781), Tcd, Tcc); + Tce = VFMA(LDK(KP707106781), Tcd, Tcc); + Tch = VSUB(Tcf, Tcg); + Tcy = VADD(Tcf, Tcg); + Te8 = VFNMS(LDK(KP923879532), Te5, Te4); + Te6 = VFMA(LDK(KP923879532), Te5, Te4); + TdY = VFNMS(LDK(KP923879532), TdX, TdU); + Te0 = VFMA(LDK(KP923879532), TdX, TdU); + TcA = VFNMS(LDK(KP707106781), Tbl, Tb6); + Tbm = VFMA(LDK(KP707106781), Tbl, Tb6); + Tcp = VFNMS(LDK(KP923879532), TaY, Tat); + TaZ = VFMA(LDK(KP923879532), TaY, Tat); + Tcs = VFNMS(LDK(KP923879532), Tch, Tce); + Tci = VFMA(LDK(KP923879532), Tch, Tce); + ST(&(xo[WS(os, 88)]), VFNMSI(Te6, Te3), ovs, &(xo[0])); + ST(&(xo[WS(os, 40)]), VFMAI(Te6, Te3), ovs, &(xo[0])); + ST(&(xo[WS(os, 104)]), VFMAI(Te8, Te7), ovs, &(xo[0])); + ST(&(xo[WS(os, 24)]), VFNMSI(Te8, Te7), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VFMAI(Te0, TdZ), ovs, &(xo[0])); + ST(&(xo[WS(os, 120)]), VFNMSI(Te0, TdZ), ovs, &(xo[0])); + ST(&(xo[WS(os, 72)]), VFMAI(TdY, TdR), ovs, &(xo[0])); + ST(&(xo[WS(os, 56)]), VFNMSI(TdY, TdR), ovs, &(xo[0])); + Tbv = VFMA(LDK(KP707106781), Tbu, Tbt); + TcB = VFNMS(LDK(KP707106781), Tbu, Tbt); + TcD = VFNMS(LDK(KP707106781), TbS, TbD); + TbT = VFMA(LDK(KP707106781), TbS, TbD); + Tc2 = VFMA(LDK(KP707106781), Tc1, Tc0); + TcE = VFNMS(LDK(KP707106781), Tc1, Tc0); + } + { + V TcR, Tcz, TcU, TcK, Tcq, Tcl, Tct, Tc4; + { + V Tcj, Tbw, Tck, Tc3; + Tcj = VFMA(LDK(KP198912367), Tbm, Tbv); + Tbw = VFNMS(LDK(KP198912367), Tbv, Tbm); + Tck = VFMA(LDK(KP198912367), TbT, Tc2); + Tc3 = VFNMS(LDK(KP198912367), Tc2, TbT); + TcR = VFNMS(LDK(KP923879532), Tcy, Tcx); + Tcz = VFMA(LDK(KP923879532), Tcy, Tcx); + TcU = VFMA(LDK(KP923879532), TcJ, TcI); + TcK = VFNMS(LDK(KP923879532), TcJ, TcI); + Tcq = VADD(Tcj, Tck); + Tcl = VSUB(Tcj, Tck); + Tct = VSUB(Tbw, Tc3); + Tc4 = VADD(Tbw, Tc3); + } + { + V TfN, Tel, TfY, Tfu, Tfv, Tfw, TcT, TcX, TcQ, TcO, TcW, TcY, TcP, TcH, TfZ; + V TeE; + { + V Teu, TcS, TcN, TcV, TcG, TeD; + TfN = VFNMS(LDK(KP923879532), Tek, Ted); + Tel = VFMA(LDK(KP923879532), Tek, Ted); + { + V TcL, TcC, Tcr, Tcv; + TcL = VFNMS(LDK(KP668178637), TcA, TcB); + TcC = VFMA(LDK(KP668178637), TcB, TcA); + Tcr = VFNMS(LDK(KP980785280), Tcq, Tcp); + Tcv = VFMA(LDK(KP980785280), Tcq, Tcp); + { + V Tco, Tcm, Tcu, Tcw; + Tco = VFMA(LDK(KP980785280), Tcl, Tci); + Tcm = VFNMS(LDK(KP980785280), Tcl, Tci); + Tcu = VFMA(LDK(KP980785280), Tct, Tcs); + Tcw = VFNMS(LDK(KP980785280), Tct, Tcs); + { + V Tcn, Tc5, TcM, TcF; + Tcn = VFMA(LDK(KP980785280), Tc4, TaZ); + Tc5 = VFNMS(LDK(KP980785280), Tc4, TaZ); + TcM = VFNMS(LDK(KP668178637), TcD, TcE); + TcF = VFMA(LDK(KP668178637), TcE, TcD); + TfY = VFNMS(LDK(KP923879532), Tft, Tfq); + Tfu = VFMA(LDK(KP923879532), Tft, Tfq); + Tfv = VFMA(LDK(KP198912367), Teq, Tet); + Teu = VFNMS(LDK(KP198912367), Tet, Teq); + ST(&(xo[WS(os, 92)]), VFNMSI(Tcu, Tcr), ovs, &(xo[0])); + ST(&(xo[WS(os, 36)]), VFMAI(Tcu, Tcr), ovs, &(xo[0])); + ST(&(xo[WS(os, 100)]), VFMAI(Tcw, Tcv), ovs, &(xo[0])); + ST(&(xo[WS(os, 28)]), VFNMSI(Tcw, Tcv), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VFMAI(Tco, Tcn), ovs, &(xo[0])); + ST(&(xo[WS(os, 124)]), VFNMSI(Tco, Tcn), ovs, &(xo[0])); + ST(&(xo[WS(os, 68)]), VFMAI(Tcm, Tc5), ovs, &(xo[0])); + ST(&(xo[WS(os, 60)]), VFNMSI(Tcm, Tc5), ovs, &(xo[0])); + TcS = VADD(TcL, TcM); + TcN = VSUB(TcL, TcM); + TcV = VSUB(TcC, TcF); + TcG = VADD(TcC, TcF); + TeD = VFNMS(LDK(KP198912367), TeC, Tez); + Tfw = VFMA(LDK(KP198912367), Tez, TeC); + } + } + } + TcT = VFMA(LDK(KP831469612), TcS, TcR); + TcX = VFNMS(LDK(KP831469612), TcS, TcR); + TcQ = VFMA(LDK(KP831469612), TcN, TcK); + TcO = VFNMS(LDK(KP831469612), TcN, TcK); + TcW = VFNMS(LDK(KP831469612), TcV, TcU); + TcY = VFMA(LDK(KP831469612), TcV, TcU); + TcP = VFMA(LDK(KP831469612), TcG, Tcz); + TcH = VFNMS(LDK(KP831469612), TcG, Tcz); + TfZ = VSUB(Teu, TeD); + TeE = VADD(Teu, TeD); + } + { + V TfQ, TeS, TfO, Tfx, TeZ, TfR, Tfd, Tfk; + TfQ = VFNMS(LDK(KP923879532), TeR, TeK); + TeS = VFMA(LDK(KP923879532), TeR, TeK); + ST(&(xo[WS(os, 84)]), VFMAI(TcW, TcT), ovs, &(xo[0])); + ST(&(xo[WS(os, 44)]), VFNMSI(TcW, TcT), ovs, &(xo[0])); + ST(&(xo[WS(os, 108)]), VFNMSI(TcY, TcX), ovs, &(xo[0])); + ST(&(xo[WS(os, 20)]), VFMAI(TcY, TcX), ovs, &(xo[0])); + ST(&(xo[WS(os, 116)]), VFMAI(TcQ, TcP), ovs, &(xo[0])); + ST(&(xo[WS(os, 12)]), VFNMSI(TcQ, TcP), ovs, &(xo[0])); + ST(&(xo[WS(os, 52)]), VFMAI(TcO, TcH), ovs, &(xo[0])); + ST(&(xo[WS(os, 76)]), VFNMSI(TcO, TcH), ovs, &(xo[0])); + Tg0 = VFNMS(LDK(KP980785280), TfZ, TfY); + Tga = VFMA(LDK(KP980785280), TfZ, TfY); + TfF = VFNMS(LDK(KP980785280), TeE, Tel); + TeF = VFMA(LDK(KP980785280), TeE, Tel); + TfO = VADD(Tfv, Tfw); + Tfx = VSUB(Tfv, Tfw); + TeZ = VFMA(LDK(KP923879532), TeY, TeV); + TfR = VFNMS(LDK(KP923879532), TeY, TeV); + TfT = VFNMS(LDK(KP923879532), Tfc, Tf5); + Tfd = VFMA(LDK(KP923879532), Tfc, Tf5); + Tfk = VFMA(LDK(KP923879532), Tfj, Tfg); + TfU = VFNMS(LDK(KP923879532), Tfj, Tfg); + TfP = VFMA(LDK(KP980785280), TfO, TfN); + Tg7 = VFNMS(LDK(KP980785280), TfO, TfN); + TfI = VFNMS(LDK(KP980785280), Tfx, Tfu); + Tfy = VFMA(LDK(KP980785280), Tfx, Tfu); + Tfz = VFMA(LDK(KP098491403), TeS, TeZ); + Tf0 = VFNMS(LDK(KP098491403), TeZ, TeS); + TfA = VFMA(LDK(KP098491403), Tfd, Tfk); + Tfl = VFNMS(LDK(KP098491403), Tfk, Tfd); + Tg1 = VFNMS(LDK(KP820678790), TfQ, TfR); + TfS = VFMA(LDK(KP820678790), TfR, TfQ); + } + } + } + } + } + { + V T8x, T8y, T8F, T8w, T8k, T8f, T8n, T80, T9l, T76, T87, T8U, T89, T7e, T7l; + V T8a; + { + V The, Tho, TgT, Tgp, Th7, Th8, Thf, Th6, Th3, Thl, TgW, TgM, TgU, TgP, TgX; + V TgE; + { + V Th1, TgI, TgJ, TgK; + { + V Tgh, Thc, Tgk, TfG, TfB, TfJ, Tfm, Tg2, TfV, Tgn, TfL, TfH; + Th1 = VFMA(LDK(KP923879532), Tgg, Tgf); + Tgh = VFNMS(LDK(KP923879532), Tgg, Tgf); + Thc = VFNMS(LDK(KP923879532), TgH, TgG); + TgI = VFMA(LDK(KP923879532), TgH, TgG); + TgJ = VFMA(LDK(KP668178637), Tgi, Tgj); + Tgk = VFNMS(LDK(KP668178637), Tgj, Tgi); + TfG = VADD(Tfz, TfA); + TfB = VSUB(Tfz, TfA); + TfJ = VSUB(Tf0, Tfl); + Tfm = VADD(Tf0, Tfl); + Tg2 = VFNMS(LDK(KP820678790), TfT, TfU); + TfV = VFMA(LDK(KP820678790), TfU, TfT); + Tgn = VFNMS(LDK(KP668178637), Tgm, Tgl); + TgK = VFMA(LDK(KP668178637), Tgl, Tgm); + TfL = VFMA(LDK(KP995184726), TfG, TfF); + TfH = VFNMS(LDK(KP995184726), TfG, TfF); + { + V TfE, TfC, TfM, TfK; + TfE = VFMA(LDK(KP995184726), TfB, Tfy); + TfC = VFNMS(LDK(KP995184726), TfB, Tfy); + TfM = VFNMS(LDK(KP995184726), TfJ, TfI); + TfK = VFMA(LDK(KP995184726), TfJ, TfI); + { + V TfD, Tfn, Tg8, Tg3; + TfD = VFMA(LDK(KP995184726), Tfm, TeF); + Tfn = VFNMS(LDK(KP995184726), Tfm, TeF); + Tg8 = VADD(Tg1, Tg2); + Tg3 = VSUB(Tg1, Tg2); + { + V Tgb, TfW, Thd, Tgo; + Tgb = VSUB(TfS, TfV); + TfW = VADD(TfS, TfV); + Thd = VSUB(Tgk, Tgn); + Tgo = VADD(Tgk, Tgn); + ST(&(xo[WS(os, 98)]), VFMAI(TfM, TfL), ovs, &(xo[0])); + ST(&(xo[WS(os, 30)]), VFNMSI(TfM, TfL), ovs, &(xo[0])); + ST(&(xo[WS(os, 94)]), VFNMSI(TfK, TfH), ovs, &(xo[0])); + ST(&(xo[WS(os, 34)]), VFMAI(TfK, TfH), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFMAI(TfE, TfD), ovs, &(xo[0])); + ST(&(xo[WS(os, 126)]), VFNMSI(TfE, TfD), ovs, &(xo[0])); + ST(&(xo[WS(os, 66)]), VFMAI(TfC, Tfn), ovs, &(xo[0])); + ST(&(xo[WS(os, 62)]), VFNMSI(TfC, Tfn), ovs, &(xo[0])); + { + V Tgd, Tg9, Tg6, Tg4; + Tgd = VFNMS(LDK(KP773010453), Tg8, Tg7); + Tg9 = VFMA(LDK(KP773010453), Tg8, Tg7); + Tg6 = VFMA(LDK(KP773010453), Tg3, Tg0); + Tg4 = VFNMS(LDK(KP773010453), Tg3, Tg0); + { + V Tge, Tgc, Tg5, TfX; + Tge = VFMA(LDK(KP773010453), Tgb, Tga); + Tgc = VFNMS(LDK(KP773010453), Tgb, Tga); + Tg5 = VFMA(LDK(KP773010453), TfW, TfP); + TfX = VFNMS(LDK(KP773010453), TfW, TfP); + The = VFMA(LDK(KP831469612), Thd, Thc); + Tho = VFNMS(LDK(KP831469612), Thd, Thc); + TgT = VFMA(LDK(KP831469612), Tgo, Tgh); + Tgp = VFNMS(LDK(KP831469612), Tgo, Tgh); + ST(&(xo[WS(os, 110)]), VFNMSI(Tge, Tgd), ovs, &(xo[0])); + ST(&(xo[WS(os, 18)]), VFMAI(Tge, Tgd), ovs, &(xo[0])); + ST(&(xo[WS(os, 82)]), VFMAI(Tgc, Tg9), ovs, &(xo[0])); + ST(&(xo[WS(os, 46)]), VFNMSI(Tgc, Tg9), ovs, &(xo[0])); + ST(&(xo[WS(os, 114)]), VFMAI(Tg6, Tg5), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VFNMSI(Tg6, Tg5), ovs, &(xo[0])); + ST(&(xo[WS(os, 50)]), VFMAI(Tg4, TfX), ovs, &(xo[0])); + ST(&(xo[WS(os, 78)]), VFNMSI(Tg4, TfX), ovs, &(xo[0])); + } + } + } + } + } + } + { + V Th4, Tgs, Tgv, Th5, Tgz, TgC, Th2, TgL; + Th4 = VFMA(LDK(KP923879532), Tgr, Tgq); + Tgs = VFNMS(LDK(KP923879532), Tgr, Tgq); + Tgv = VFMA(LDK(KP923879532), Tgu, Tgt); + Th5 = VFNMS(LDK(KP923879532), Tgu, Tgt); + Th7 = VFMA(LDK(KP923879532), Tgy, Tgx); + Tgz = VFNMS(LDK(KP923879532), Tgy, Tgx); + TgC = VFMA(LDK(KP923879532), TgB, TgA); + Th8 = VFNMS(LDK(KP923879532), TgB, TgA); + Th2 = VADD(TgJ, TgK); + TgL = VSUB(TgJ, TgK); + { + V TgN, Tgw, TgO, TgD; + TgN = VFMA(LDK(KP534511135), Tgs, Tgv); + Tgw = VFNMS(LDK(KP534511135), Tgv, Tgs); + TgO = VFMA(LDK(KP534511135), Tgz, TgC); + TgD = VFNMS(LDK(KP534511135), TgC, Tgz); + Thf = VFNMS(LDK(KP303346683), Th4, Th5); + Th6 = VFMA(LDK(KP303346683), Th5, Th4); + Th3 = VFMA(LDK(KP831469612), Th2, Th1); + Thl = VFNMS(LDK(KP831469612), Th2, Th1); + TgW = VFNMS(LDK(KP831469612), TgL, TgI); + TgM = VFMA(LDK(KP831469612), TgL, TgI); + TgU = VADD(TgN, TgO); + TgP = VSUB(TgN, TgO); + TgX = VSUB(Tgw, TgD); + TgE = VADD(Tgw, TgD); + } + } + } + { + V T8u, T8v, T7R, T8d, T7G, Thm, Thh, Thp, Tha, T7Y, Thr, Thn; + { + V T7y, T7F, TgZ, TgV; + T8u = VFNMS(LDK(KP831469612), T7x, T7q); + T7y = VFMA(LDK(KP831469612), T7x, T7q); + T7F = VFMA(LDK(KP831469612), T7E, T7B); + T8v = VFNMS(LDK(KP831469612), T7E, T7B); + T8x = VFNMS(LDK(KP831469612), T7Q, T7J); + T7R = VFMA(LDK(KP831469612), T7Q, T7J); + TgZ = VFMA(LDK(KP881921264), TgU, TgT); + TgV = VFNMS(LDK(KP881921264), TgU, TgT); + { + V TgS, TgQ, Th0, TgY; + TgS = VFMA(LDK(KP881921264), TgP, TgM); + TgQ = VFNMS(LDK(KP881921264), TgP, TgM); + Th0 = VFNMS(LDK(KP881921264), TgX, TgW); + TgY = VFMA(LDK(KP881921264), TgX, TgW); + { + V TgR, TgF, Thg, Th9; + TgR = VFMA(LDK(KP881921264), TgE, Tgp); + TgF = VFNMS(LDK(KP881921264), TgE, Tgp); + Thg = VFNMS(LDK(KP303346683), Th7, Th8); + Th9 = VFMA(LDK(KP303346683), Th8, Th7); + T8d = VFNMS(LDK(KP148335987), T7y, T7F); + T7G = VFMA(LDK(KP148335987), T7F, T7y); + ST(&(xo[WS(os, 106)]), VFMAI(Th0, TgZ), ovs, &(xo[0])); + ST(&(xo[WS(os, 22)]), VFNMSI(Th0, TgZ), ovs, &(xo[0])); + ST(&(xo[WS(os, 86)]), VFNMSI(TgY, TgV), ovs, &(xo[0])); + ST(&(xo[WS(os, 42)]), VFMAI(TgY, TgV), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VFMAI(TgS, TgR), ovs, &(xo[0])); + ST(&(xo[WS(os, 118)]), VFNMSI(TgS, TgR), ovs, &(xo[0])); + ST(&(xo[WS(os, 74)]), VFMAI(TgQ, TgF), ovs, &(xo[0])); + ST(&(xo[WS(os, 54)]), VFNMSI(TgQ, TgF), ovs, &(xo[0])); + Thm = VADD(Thf, Thg); + Thh = VSUB(Thf, Thg); + Thp = VSUB(Th6, Th9); + Tha = VADD(Th6, Th9); + T7Y = VFMA(LDK(KP831469612), T7X, T7U); + T8y = VFNMS(LDK(KP831469612), T7X, T7U); + } + } + } + Thr = VFNMS(LDK(KP956940335), Thm, Thl); + Thn = VFMA(LDK(KP956940335), Thm, Thl); + { + V Thk, Thi, Ths, Thq; + Thk = VFMA(LDK(KP956940335), Thh, The); + Thi = VFNMS(LDK(KP956940335), Thh, The); + Ths = VFMA(LDK(KP956940335), Thp, Tho); + Thq = VFNMS(LDK(KP956940335), Thp, Tho); + { + V Thj, Thb, T8e, T7Z; + Thj = VFMA(LDK(KP956940335), Tha, Th3); + Thb = VFNMS(LDK(KP956940335), Tha, Th3); + T8e = VFNMS(LDK(KP148335987), T7R, T7Y); + T7Z = VFMA(LDK(KP148335987), T7Y, T7R); + T8F = VFMA(LDK(KP741650546), T8u, T8v); + T8w = VFNMS(LDK(KP741650546), T8v, T8u); + ST(&(xo[WS(os, 102)]), VFNMSI(Ths, Thr), ovs, &(xo[0])); + ST(&(xo[WS(os, 26)]), VFMAI(Ths, Thr), ovs, &(xo[0])); + ST(&(xo[WS(os, 90)]), VFMAI(Thq, Thn), ovs, &(xo[0])); + ST(&(xo[WS(os, 38)]), VFNMSI(Thq, Thn), ovs, &(xo[0])); + ST(&(xo[WS(os, 122)]), VFMAI(Thk, Thj), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VFNMSI(Thk, Thj), ovs, &(xo[0])); + ST(&(xo[WS(os, 58)]), VFMAI(Thi, Thb), ovs, &(xo[0])); + ST(&(xo[WS(os, 70)]), VFNMSI(Thi, Thb), ovs, &(xo[0])); + T8k = VADD(T8d, T8e); + T8f = VSUB(T8d, T8e); + T8n = VSUB(T7G, T7Z); + T80 = VADD(T7G, T7Z); + } + } + T9l = VSUB(T72, T75); + T76 = VADD(T72, T75); + T87 = VSUB(T85, T86); + T8U = VADD(T85, T86); + T89 = VFNMS(LDK(KP303346683), T7a, T7d); + T7e = VFMA(LDK(KP303346683), T7d, T7a); + T7l = VFMA(LDK(KP303346683), T7k, T7h); + T8a = VFNMS(LDK(KP303346683), T7h, T7k); + } + } + { + V T11, T5h, T5a, T55, T5d, T4K, T5C, T5x, T5F, T5q, T4X, T4Z, T1C, T2d, T50; + { + V T5k, T3g, T3t, T5l, T5n, T4v, T4I, T5o, T8G, T8z; + T5k = VFNMS(LDK(KP980785280), T3f, T2G); + T3g = VFMA(LDK(KP980785280), T3f, T2G); + T8G = VFMA(LDK(KP741650546), T8x, T8y); + T8z = VFNMS(LDK(KP741650546), T8y, T8x); + { + V T8r, T77, T8C, T88; + T8r = VFNMS(LDK(KP831469612), T76, T6Z); + T77 = VFMA(LDK(KP831469612), T76, T6Z); + T8C = VFNMS(LDK(KP831469612), T87, T84); + T88 = VFMA(LDK(KP831469612), T87, T84); + { + V T8D, T7m, T8s, T8b; + T8D = VSUB(T7e, T7l); + T7m = VADD(T7e, T7l); + T8s = VADD(T89, T8a); + T8b = VSUB(T89, T8a); + { + V T8M, T8H, T8P, T8A; + T8M = VADD(T8F, T8G); + T8H = VSUB(T8F, T8G); + T8P = VSUB(T8w, T8z); + T8A = VADD(T8w, T8z); + { + V T8E, T8O, T8j, T7n; + T8E = VFMA(LDK(KP956940335), T8D, T8C); + T8O = VFNMS(LDK(KP956940335), T8D, T8C); + T8j = VFNMS(LDK(KP956940335), T7m, T77); + T7n = VFMA(LDK(KP956940335), T7m, T77); + { + V T8t, T8L, T8m, T8c; + T8t = VFNMS(LDK(KP956940335), T8s, T8r); + T8L = VFMA(LDK(KP956940335), T8s, T8r); + T8m = VFNMS(LDK(KP956940335), T8b, T88); + T8c = VFMA(LDK(KP956940335), T8b, T88); + { + V T8K, T8I, T8S, T8Q; + T8K = VFMA(LDK(KP803207531), T8H, T8E); + T8I = VFNMS(LDK(KP803207531), T8H, T8E); + T8S = VFNMS(LDK(KP803207531), T8P, T8O); + T8Q = VFMA(LDK(KP803207531), T8P, T8O); + { + V T8p, T8l, T8h, T81; + T8p = VFNMS(LDK(KP989176509), T8k, T8j); + T8l = VFMA(LDK(KP989176509), T8k, T8j); + T8h = VFMA(LDK(KP989176509), T80, T7n); + T81 = VFNMS(LDK(KP989176509), T80, T7n); + { + V T8J, T8B, T8R, T8N; + T8J = VFMA(LDK(KP803207531), T8A, T8t); + T8B = VFNMS(LDK(KP803207531), T8A, T8t); + T8R = VFMA(LDK(KP803207531), T8M, T8L); + T8N = VFNMS(LDK(KP803207531), T8M, T8L); + { + V T8q, T8o, T8i, T8g; + T8q = VFMA(LDK(KP989176509), T8n, T8m); + T8o = VFNMS(LDK(KP989176509), T8n, T8m); + T8i = VFMA(LDK(KP989176509), T8f, T8c); + T8g = VFNMS(LDK(KP989176509), T8f, T8c); + ST(&(xo[WS(os, 13)]), VFMAI(T8K, T8J), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 115)]), VFNMSI(T8K, T8J), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 77)]), VFMAI(T8I, T8B), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 51)]), VFNMSI(T8I, T8B), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 109)]), VFMAI(T8S, T8R), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 19)]), VFNMSI(T8S, T8R), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 83)]), VFNMSI(T8Q, T8N), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 45)]), VFMAI(T8Q, T8N), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 99)]), VFNMSI(T8q, T8p), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 29)]), VFMAI(T8q, T8p), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 93)]), VFMAI(T8o, T8l), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 35)]), VFNMSI(T8o, T8l), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 125)]), VFMAI(T8i, T8h), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VFNMSI(T8i, T8h), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 61)]), VFMAI(T8g, T81), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 67)]), VFNMSI(T8g, T81), ovs, &(xo[WS(os, 1)])); + T3t = VFMA(LDK(KP980785280), T3s, T3p); + T5l = VFNMS(LDK(KP980785280), T3s, T3p); + } + } + } + } + } + } + } + } + } + T5n = VFNMS(LDK(KP980785280), T4u, T3V); + T4v = VFMA(LDK(KP980785280), T4u, T3V); + T4I = VFMA(LDK(KP980785280), T4H, T4E); + T5o = VFNMS(LDK(KP980785280), T4H, T4E); + { + V T53, T3u, T54, T4J, T5v, T5m, T5w, T5p, T10; + T6b = VSUB(TI, TZ); + T10 = VADD(TI, TZ); + T53 = VFMA(LDK(KP049126849), T3g, T3t); + T3u = VFNMS(LDK(KP049126849), T3t, T3g); + T54 = VFMA(LDK(KP049126849), T4v, T4I); + T4J = VFNMS(LDK(KP049126849), T4I, T4v); + T5v = VFNMS(LDK(KP906347169), T5k, T5l); + T5m = VFMA(LDK(KP906347169), T5l, T5k); + T5w = VFNMS(LDK(KP906347169), T5n, T5o); + T5p = VFMA(LDK(KP906347169), T5o, T5n); + T11 = VFMA(LDK(KP980785280), T10, Tr); + T5h = VFNMS(LDK(KP980785280), T10, Tr); + T5a = VADD(T53, T54); + T55 = VSUB(T53, T54); + T5d = VSUB(T3u, T4J); + T4K = VADD(T3u, T4J); + T5C = VADD(T5v, T5w); + T5x = VSUB(T5v, T5w); + T5F = VSUB(T5m, T5p); + T5q = VADD(T5m, T5p); + T4X = VSUB(T4V, T4W); + T5K = VADD(T4V, T4W); + } + T4Z = VFMA(LDK(KP098491403), T1s, T1B); + T1C = VFNMS(LDK(KP098491403), T1B, T1s); + T2d = VFNMS(LDK(KP098491403), T2c, T23); + T50 = VFMA(LDK(KP098491403), T23, T2c); + } + { + V T9y, T9t, T9B, T9i, T9n, T9o, T9F, T8V, T9Q, T9m, T9R, T92, Ta0, T9V, Ta3; + V T9O; + { + V T9I, T9J, T9L, T9d, T5s, T4Y, T5t, T2e, T5i, T51, T9r, T9a, T9g, T9M, T96; + V T99; + T9I = VFMA(LDK(KP831469612), T95, T94); + T96 = VFNMS(LDK(KP831469612), T95, T94); + T99 = VFNMS(LDK(KP831469612), T98, T97); + T9J = VFMA(LDK(KP831469612), T98, T97); + T9L = VFMA(LDK(KP831469612), T9c, T9b); + T9d = VFNMS(LDK(KP831469612), T9c, T9b); + T5s = VFNMS(LDK(KP980785280), T4X, T4U); + T4Y = VFMA(LDK(KP980785280), T4X, T4U); + T5t = VSUB(T1C, T2d); + T2e = VADD(T1C, T2d); + T5i = VADD(T4Z, T50); + T51 = VSUB(T4Z, T50); + T9r = VFNMS(LDK(KP599376933), T96, T99); + T9a = VFMA(LDK(KP599376933), T99, T96); + T9g = VFNMS(LDK(KP831469612), T9f, T9e); + T9M = VFMA(LDK(KP831469612), T9f, T9e); + { + V T5u, T5E, T8Y, T91; + T5u = VFNMS(LDK(KP995184726), T5t, T5s); + T5E = VFMA(LDK(KP995184726), T5t, T5s); + { + V T59, T2f, T5j, T5B; + T59 = VFNMS(LDK(KP995184726), T2e, T11); + T2f = VFMA(LDK(KP995184726), T2e, T11); + T5j = VFMA(LDK(KP995184726), T5i, T5h); + T5B = VFNMS(LDK(KP995184726), T5i, T5h); + { + V T5c, T52, T9s, T9h; + T5c = VFNMS(LDK(KP995184726), T51, T4Y); + T52 = VFMA(LDK(KP995184726), T51, T4Y); + T9s = VFNMS(LDK(KP599376933), T9d, T9g); + T9h = VFMA(LDK(KP599376933), T9g, T9d); + { + V T5A, T5y, T5I, T5G; + T5A = VFMA(LDK(KP740951125), T5x, T5u); + T5y = VFNMS(LDK(KP740951125), T5x, T5u); + T5I = VFMA(LDK(KP740951125), T5F, T5E); + T5G = VFNMS(LDK(KP740951125), T5F, T5E); + { + V T5f, T5b, T57, T4L; + T5f = VFMA(LDK(KP998795456), T5a, T59); + T5b = VFNMS(LDK(KP998795456), T5a, T59); + T57 = VFMA(LDK(KP998795456), T4K, T2f); + T4L = VFNMS(LDK(KP998795456), T4K, T2f); + { + V T5z, T5r, T5H, T5D; + T5z = VFMA(LDK(KP740951125), T5q, T5j); + T5r = VFNMS(LDK(KP740951125), T5q, T5j); + T5H = VFNMS(LDK(KP740951125), T5C, T5B); + T5D = VFMA(LDK(KP740951125), T5C, T5B); + { + V T5g, T5e, T58, T56; + T5g = VFNMS(LDK(KP998795456), T5d, T5c); + T5e = VFMA(LDK(KP998795456), T5d, T5c); + T58 = VFMA(LDK(KP998795456), T55, T52); + T56 = VFNMS(LDK(KP998795456), T55, T52); + T9y = VADD(T9r, T9s); + T9t = VSUB(T9r, T9s); + T9B = VSUB(T9a, T9h); + T9i = VADD(T9a, T9h); + ST(&(xo[WS(os, 113)]), VFMAI(T5A, T5z), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 15)]), VFNMSI(T5A, T5z), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 49)]), VFMAI(T5y, T5r), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 79)]), VFNMSI(T5y, T5r), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 111)]), VFNMSI(T5I, T5H), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 17)]), VFMAI(T5I, T5H), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 81)]), VFMAI(T5G, T5D), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 47)]), VFNMSI(T5G, T5D), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 97)]), VFMAI(T5g, T5f), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 31)]), VFNMSI(T5g, T5f), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 95)]), VFNMSI(T5e, T5b), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 33)]), VFMAI(T5e, T5b), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFMAI(T58, T57), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 127)]), VFNMSI(T58, T57), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 65)]), VFMAI(T56, T4L), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 63)]), VFNMSI(T56, T4L), ovs, &(xo[WS(os, 1)])); + } + } + } + } + } + } + T9n = VFNMS(LDK(KP534511135), T8W, T8X); + T8Y = VFMA(LDK(KP534511135), T8X, T8W); + T91 = VFMA(LDK(KP534511135), T90, T8Z); + T9o = VFNMS(LDK(KP534511135), T8Z, T90); + { + V T9T, T9K, T9U, T9N; + T9T = VFMA(LDK(KP250486960), T9I, T9J); + T9K = VFNMS(LDK(KP250486960), T9J, T9I); + T9U = VFMA(LDK(KP250486960), T9L, T9M); + T9N = VFNMS(LDK(KP250486960), T9M, T9L); + T9F = VFNMS(LDK(KP831469612), T8U, T8T); + T8V = VFMA(LDK(KP831469612), T8U, T8T); + T9Q = VFMA(LDK(KP831469612), T9l, T9k); + T9m = VFNMS(LDK(KP831469612), T9l, T9k); + T9R = VSUB(T8Y, T91); + T92 = VADD(T8Y, T91); + Ta0 = VADD(T9T, T9U); + T9V = VSUB(T9T, T9U); + Ta3 = VSUB(T9K, T9N); + T9O = VADD(T9K, T9N); + } + } + } + { + V T6y, T6z, T63, T9Y, T9W, Ta6, Ta4, T9D, T9z, T9v, T9j, T6h, T60, T9H, T9Z; + V T9A, T9q, T66, T9X, T9P; + { + V T5W, T9S, Ta2, T9x, T93, T5Z, T9G, T9p; + T6y = VFMA(LDK(KP980785280), T5V, T5U); + T5W = VFNMS(LDK(KP980785280), T5V, T5U); + T9S = VFMA(LDK(KP881921264), T9R, T9Q); + Ta2 = VFNMS(LDK(KP881921264), T9R, T9Q); + T9x = VFNMS(LDK(KP881921264), T92, T8V); + T93 = VFMA(LDK(KP881921264), T92, T8V); + T5Z = VFMA(LDK(KP980785280), T5Y, T5X); + T6z = VFNMS(LDK(KP980785280), T5Y, T5X); + T6B = VFMA(LDK(KP980785280), T62, T61); + T63 = VFNMS(LDK(KP980785280), T62, T61); + T9G = VADD(T9n, T9o); + T9p = VSUB(T9n, T9o); + T9Y = VFMA(LDK(KP970031253), T9V, T9S); + T9W = VFNMS(LDK(KP970031253), T9V, T9S); + Ta6 = VFNMS(LDK(KP970031253), Ta3, Ta2); + Ta4 = VFMA(LDK(KP970031253), Ta3, Ta2); + T9D = VFNMS(LDK(KP857728610), T9y, T9x); + T9z = VFMA(LDK(KP857728610), T9y, T9x); + T9v = VFMA(LDK(KP857728610), T9i, T93); + T9j = VFNMS(LDK(KP857728610), T9i, T93); + T6h = VFMA(LDK(KP472964775), T5W, T5Z); + T60 = VFNMS(LDK(KP472964775), T5Z, T5W); + T9H = VFMA(LDK(KP881921264), T9G, T9F); + T9Z = VFNMS(LDK(KP881921264), T9G, T9F); + T9A = VFMA(LDK(KP881921264), T9p, T9m); + T9q = VFNMS(LDK(KP881921264), T9p, T9m); + T66 = VFMA(LDK(KP980785280), T65, T64); + T6C = VFNMS(LDK(KP980785280), T65, T64); + } + T9X = VFMA(LDK(KP970031253), T9O, T9H); + T9P = VFNMS(LDK(KP970031253), T9O, T9H); + { + V Ta5, Ta1, T9E, T9C; + Ta5 = VFMA(LDK(KP970031253), Ta0, T9Z); + Ta1 = VFNMS(LDK(KP970031253), Ta0, T9Z); + T9E = VFMA(LDK(KP857728610), T9B, T9A); + T9C = VFNMS(LDK(KP857728610), T9B, T9A); + { + V T9w, T9u, T6i, T67; + T9w = VFMA(LDK(KP857728610), T9t, T9q); + T9u = VFNMS(LDK(KP857728610), T9t, T9q); + T6i = VFMA(LDK(KP472964775), T63, T66); + T67 = VFNMS(LDK(KP472964775), T66, T63); + T6J = VFNMS(LDK(KP357805721), T6y, T6z); + T6A = VFMA(LDK(KP357805721), T6z, T6y); + ST(&(xo[WS(os, 5)]), VFMAI(T9Y, T9X), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 123)]), VFNMSI(T9Y, T9X), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 69)]), VFMAI(T9W, T9P), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 59)]), VFNMSI(T9W, T9P), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 101)]), VFMAI(Ta6, Ta5), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 27)]), VFNMSI(Ta6, Ta5), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 91)]), VFNMSI(Ta4, Ta1), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 37)]), VFMAI(Ta4, Ta1), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 107)]), VFNMSI(T9E, T9D), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 21)]), VFMAI(T9E, T9D), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 85)]), VFMAI(T9C, T9z), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 43)]), VFNMSI(T9C, T9z), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 117)]), VFMAI(T9w, T9v), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VFNMSI(T9w, T9v), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 53)]), VFMAI(T9u, T9j), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 75)]), VFNMSI(T9u, T9j), ovs, &(xo[WS(os, 1)])); + T6o = VADD(T6h, T6i); + T6j = VSUB(T6h, T6i); + T6r = VSUB(T60, T67); + T68 = VADD(T60, T67); + } + } + T6d = VFMA(LDK(KP820678790), T5M, T5N); + T5O = VFNMS(LDK(KP820678790), T5N, T5M); + T5R = VFNMS(LDK(KP820678790), T5Q, T5P); + T6e = VFMA(LDK(KP820678790), T5P, T5Q); + } + } + } + } + } + } + T6D = VFMA(LDK(KP357805721), T6C, T6B); + T6K = VFNMS(LDK(KP357805721), T6B, T6C); + { + V T5L, T6v, T6c, T6G; + T5L = VFNMS(LDK(KP980785280), T5K, T5J); + T6v = VFMA(LDK(KP980785280), T5K, T5J); + T6c = VFMA(LDK(KP980785280), T6b, T6a); + T6G = VFNMS(LDK(KP980785280), T6b, T6a); + { + V T5S, T6H, T6f, T6w; + T5S = VADD(T5O, T5R); + T6H = VSUB(T5O, T5R); + T6f = VSUB(T6d, T6e); + T6w = VADD(T6d, T6e); + { + V T6L, T6Q, T6E, T6T; + T6L = VSUB(T6J, T6K); + T6Q = VADD(T6J, T6K); + T6E = VADD(T6A, T6D); + T6T = VSUB(T6A, T6D); + { + V T6S, T6I, T5T, T6n; + T6S = VFNMS(LDK(KP773010453), T6H, T6G); + T6I = VFMA(LDK(KP773010453), T6H, T6G); + T5T = VFNMS(LDK(KP773010453), T5S, T5L); + T6n = VFMA(LDK(KP773010453), T5S, T5L); + { + V T6P, T6x, T6g, T6q; + T6P = VFNMS(LDK(KP773010453), T6w, T6v); + T6x = VFMA(LDK(KP773010453), T6w, T6v); + T6g = VFMA(LDK(KP773010453), T6f, T6c); + T6q = VFNMS(LDK(KP773010453), T6f, T6c); + { + V T6M, T6O, T6U, T6W; + T6M = VFNMS(LDK(KP941544065), T6L, T6I); + T6O = VFMA(LDK(KP941544065), T6L, T6I); + T6U = VFNMS(LDK(KP941544065), T6T, T6S); + T6W = VFMA(LDK(KP941544065), T6T, T6S); + { + V T6p, T6t, T69, T6l; + T6p = VFNMS(LDK(KP903989293), T6o, T6n); + T6t = VFMA(LDK(KP903989293), T6o, T6n); + T69 = VFNMS(LDK(KP903989293), T68, T5T); + T6l = VFMA(LDK(KP903989293), T68, T5T); + { + V T6F, T6N, T6R, T6V; + T6F = VFNMS(LDK(KP941544065), T6E, T6x); + T6N = VFMA(LDK(KP941544065), T6E, T6x); + T6R = VFMA(LDK(KP941544065), T6Q, T6P); + T6V = VFNMS(LDK(KP941544065), T6Q, T6P); + { + V T6s, T6u, T6k, T6m; + T6s = VFMA(LDK(KP903989293), T6r, T6q); + T6u = VFNMS(LDK(KP903989293), T6r, T6q); + T6k = VFNMS(LDK(KP903989293), T6j, T6g); + T6m = VFMA(LDK(KP903989293), T6j, T6g); + ST(&(xo[WS(os, 121)]), VFMAI(T6O, T6N), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFNMSI(T6O, T6N), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 57)]), VFMAI(T6M, T6F), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 71)]), VFNMSI(T6M, T6F), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 103)]), VFNMSI(T6W, T6V), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 25)]), VFMAI(T6W, T6V), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 89)]), VFMAI(T6U, T6R), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 39)]), VFNMSI(T6U, T6R), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 105)]), VFMAI(T6u, T6t), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 23)]), VFNMSI(T6u, T6t), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 87)]), VFNMSI(T6s, T6p), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 41)]), VFMAI(T6s, T6p), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VFMAI(T6m, T6l), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 119)]), VFNMSI(T6m, T6l), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 73)]), VFMAI(T6k, T69), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 55)]), VFNMSI(T6k, T69), ovs, &(xo[WS(os, 1)])); + } + } + } + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 128, XSIMD_STRING("n1bv_128"), {440, 0, 642, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_128) (planner *p) { + X(kdft_register) (p, n1bv_128, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 128 -name n1bv_128 -include n1b.h */ + +/* + * This function contains 1082 FP additions, 330 FP multiplications, + * (or, 938 additions, 186 multiplications, 144 fused multiply/add), + * 194 stack variables, 31 constants, and 256 memory accesses + */ +#include "n1b.h" + +static void n1bv_128(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP146730474, +0.146730474455361751658850129646717819706215317); + DVK(KP989176509, +0.989176509964780973451673738016243063983689533); + DVK(KP595699304, +0.595699304492433343467036528829969889511926338); + DVK(KP803207531, +0.803207531480644909806676512963141923879569427); + DVK(KP049067674, +0.049067674327418014254954976942682658314745363); + DVK(KP998795456, +0.998795456205172392714771604759100694443203615); + DVK(KP671558954, +0.671558954847018400625376850427421803228750632); + DVK(KP740951125, +0.740951125354959091175616897495162729728955309); + DVK(KP514102744, +0.514102744193221726593693838968815772608049120); + DVK(KP857728610, +0.857728610000272069902269984284770137042490799); + DVK(KP242980179, +0.242980179903263889948274162077471118320990783); + DVK(KP970031253, +0.970031253194543992603984207286100251456865962); + DVK(KP427555093, +0.427555093430282094320966856888798534304578629); + DVK(KP903989293, +0.903989293123443331586200297230537048710132025); + DVK(KP336889853, +0.336889853392220050689253212619147570477766780); + DVK(KP941544065, +0.941544065183020778412509402599502357185589796); + DVK(KP634393284, +0.634393284163645498215171613225493370675687095); + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP098017140, +0.098017140329560601994195563888641845861136673); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP290284677, +0.290284677254462367636192375817395274691476278); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP471396736, +0.471396736825997648556387625905254377657460319); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(256, is), MAKE_VOLATILE_STRIDE(256, os)) { + V T49, T6e, Tev, TgK, TfA, TgL, T4U, T5J, T7R, T9o, Tah, TdG, Tcw, TdB, T84; + V T8T, Tfk, Tfo, T1G, T64, Tgs, Th6, T2p, T62, T7t, T9c, Tce, Tdm, T7i, T9e; + V Tc8, Tdp, TgF, TgG, T4q, T4V, TeC, Tfx, T4H, T4W, T7X, T86, Tcr, TdH, T7U; + V T85, Taw, TdC, Tf3, Tf7, Tr, T5X, Tgl, Th3, T1a, T5V, T7a, T95, TbD, Tdf; + V T6Z, T97, Tbx, Tdi, Tgy, Tgz, TgA, TaN, Tdv, TeK, Tfu, T2W, T5M, T35, T5N; + V T7F, T8X, TaI, Tdu, T7C, T8W, TgB, TgC, TgD, Tb4, Tdy, TeR, Tfv, T3x, T5P; + V T3G, T5Q, T7M, T90, TaZ, Tdx, T7J, T8Z, Tbm, Tdg, TbG, Tdj, Tgo, Th4, Tf0; + V Tf8, T76, T98, T7d, T94, T10, T5Y, T1d, T5U, TbX, Tdn, Tch, Tdq, Tgv, Th7; + V Tfh, Tfp, T7p, T9f, T7w, T9b, T2f, T65, T2s, T61; + { + V T47, Ta8, T4O, Ta7, T44, Tcu, T4P, Tct, Taa, Tab, T3P, Tac, T4R, Tad, Tae; + V T3W, Taf, T4S; + { + V T45, T46, T4M, T4N; + T45 = LD(&(xi[WS(is, 32)]), ivs, &(xi[0])); + T46 = LD(&(xi[WS(is, 96)]), ivs, &(xi[0])); + T47 = VSUB(T45, T46); + Ta8 = VADD(T45, T46); + T4M = LD(&(xi[0]), ivs, &(xi[0])); + T4N = LD(&(xi[WS(is, 64)]), ivs, &(xi[0])); + T4O = VSUB(T4M, T4N); + Ta7 = VADD(T4M, T4N); + } + { + V T3Y, T3Z, T40, T41, T42, T43; + T3Y = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T3Z = LD(&(xi[WS(is, 80)]), ivs, &(xi[0])); + T40 = VSUB(T3Y, T3Z); + T41 = LD(&(xi[WS(is, 112)]), ivs, &(xi[0])); + T42 = LD(&(xi[WS(is, 48)]), ivs, &(xi[0])); + T43 = VSUB(T41, T42); + T44 = VMUL(LDK(KP707106781), VSUB(T40, T43)); + Tcu = VADD(T41, T42); + T4P = VMUL(LDK(KP707106781), VADD(T40, T43)); + Tct = VADD(T3Y, T3Z); + } + { + V T3L, T3O, T3S, T3V; + { + V T3J, T3K, T3M, T3N; + T3J = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T3K = LD(&(xi[WS(is, 72)]), ivs, &(xi[0])); + T3L = VSUB(T3J, T3K); + Taa = VADD(T3J, T3K); + T3M = LD(&(xi[WS(is, 40)]), ivs, &(xi[0])); + T3N = LD(&(xi[WS(is, 104)]), ivs, &(xi[0])); + T3O = VSUB(T3M, T3N); + Tab = VADD(T3M, T3N); + } + T3P = VFNMS(LDK(KP382683432), T3O, VMUL(LDK(KP923879532), T3L)); + Tac = VSUB(Taa, Tab); + T4R = VFMA(LDK(KP382683432), T3L, VMUL(LDK(KP923879532), T3O)); + { + V T3Q, T3R, T3T, T3U; + T3Q = LD(&(xi[WS(is, 120)]), ivs, &(xi[0])); + T3R = LD(&(xi[WS(is, 56)]), ivs, &(xi[0])); + T3S = VSUB(T3Q, T3R); + Tad = VADD(T3Q, T3R); + T3T = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T3U = LD(&(xi[WS(is, 88)]), ivs, &(xi[0])); + T3V = VSUB(T3T, T3U); + Tae = VADD(T3T, T3U); + } + T3W = VFMA(LDK(KP923879532), T3S, VMUL(LDK(KP382683432), T3V)); + Taf = VSUB(Tad, Tae); + T4S = VFNMS(LDK(KP382683432), T3S, VMUL(LDK(KP923879532), T3V)); + } + { + V T3X, T48, Tet, Teu; + T3X = VSUB(T3P, T3W); + T48 = VSUB(T44, T47); + T49 = VSUB(T3X, T48); + T6e = VADD(T48, T3X); + Tet = VADD(Ta7, Ta8); + Teu = VADD(Tct, Tcu); + Tev = VSUB(Tet, Teu); + TgK = VADD(Tet, Teu); + } + { + V Tfy, Tfz, T4Q, T4T; + Tfy = VADD(Taa, Tab); + Tfz = VADD(Tad, Tae); + TfA = VSUB(Tfy, Tfz); + TgL = VADD(Tfy, Tfz); + T4Q = VSUB(T4O, T4P); + T4T = VSUB(T4R, T4S); + T4U = VSUB(T4Q, T4T); + T5J = VADD(T4Q, T4T); + } + { + V T7P, T7Q, Ta9, Tag; + T7P = VADD(T4R, T4S); + T7Q = VADD(T47, T44); + T7R = VSUB(T7P, T7Q); + T9o = VADD(T7Q, T7P); + Ta9 = VSUB(Ta7, Ta8); + Tag = VMUL(LDK(KP707106781), VADD(Tac, Taf)); + Tah = VSUB(Ta9, Tag); + TdG = VADD(Ta9, Tag); + } + { + V Tcs, Tcv, T82, T83; + Tcs = VMUL(LDK(KP707106781), VSUB(Tac, Taf)); + Tcv = VSUB(Tct, Tcu); + Tcw = VSUB(Tcs, Tcv); + TdB = VADD(Tcv, Tcs); + T82 = VADD(T4O, T4P); + T83 = VADD(T3P, T3W); + T84 = VSUB(T82, T83); + T8T = VADD(T82, T83); + } + } + { + V Tca, Tcb, T1i, Tfm, T2n, Tc5, Tc6, T1p, Tfn, T2k, T1x, Tfi, T2h, Tc0, T1E; + V Tfj, T2i, Tc3, T1l, T1o, Tcc, Tcd; + { + V T1g, T1h, T2l, T2m; + T1g = LD(&(xi[WS(is, 127)]), ivs, &(xi[WS(is, 1)])); + T1h = LD(&(xi[WS(is, 63)]), ivs, &(xi[WS(is, 1)])); + Tca = VADD(T1g, T1h); + T2l = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + T2m = LD(&(xi[WS(is, 95)]), ivs, &(xi[WS(is, 1)])); + Tcb = VADD(T2l, T2m); + T1i = VSUB(T1g, T1h); + Tfm = VADD(Tca, Tcb); + T2n = VSUB(T2l, T2m); + } + { + V T1j, T1k, T1m, T1n; + T1j = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T1k = LD(&(xi[WS(is, 79)]), ivs, &(xi[WS(is, 1)])); + T1l = VSUB(T1j, T1k); + Tc5 = VADD(T1j, T1k); + T1m = LD(&(xi[WS(is, 111)]), ivs, &(xi[WS(is, 1)])); + T1n = LD(&(xi[WS(is, 47)]), ivs, &(xi[WS(is, 1)])); + T1o = VSUB(T1m, T1n); + Tc6 = VADD(T1m, T1n); + } + T1p = VMUL(LDK(KP707106781), VADD(T1l, T1o)); + Tfn = VADD(Tc5, Tc6); + T2k = VMUL(LDK(KP707106781), VSUB(T1l, T1o)); + { + V T1t, TbY, T1w, TbZ; + { + V T1r, T1s, T1u, T1v; + T1r = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T1s = LD(&(xi[WS(is, 71)]), ivs, &(xi[WS(is, 1)])); + T1t = VSUB(T1r, T1s); + TbY = VADD(T1r, T1s); + T1u = LD(&(xi[WS(is, 39)]), ivs, &(xi[WS(is, 1)])); + T1v = LD(&(xi[WS(is, 103)]), ivs, &(xi[WS(is, 1)])); + T1w = VSUB(T1u, T1v); + TbZ = VADD(T1u, T1v); + } + T1x = VFMA(LDK(KP382683432), T1t, VMUL(LDK(KP923879532), T1w)); + Tfi = VADD(TbY, TbZ); + T2h = VFNMS(LDK(KP382683432), T1w, VMUL(LDK(KP923879532), T1t)); + Tc0 = VSUB(TbY, TbZ); + } + { + V T1A, Tc2, T1D, Tc1; + { + V T1y, T1z, T1B, T1C; + T1y = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + T1z = LD(&(xi[WS(is, 87)]), ivs, &(xi[WS(is, 1)])); + T1A = VSUB(T1y, T1z); + Tc2 = VADD(T1y, T1z); + T1B = LD(&(xi[WS(is, 119)]), ivs, &(xi[WS(is, 1)])); + T1C = LD(&(xi[WS(is, 55)]), ivs, &(xi[WS(is, 1)])); + T1D = VSUB(T1B, T1C); + Tc1 = VADD(T1B, T1C); + } + T1E = VFNMS(LDK(KP382683432), T1D, VMUL(LDK(KP923879532), T1A)); + Tfj = VADD(Tc1, Tc2); + T2i = VFMA(LDK(KP923879532), T1D, VMUL(LDK(KP382683432), T1A)); + Tc3 = VSUB(Tc1, Tc2); + } + Tfk = VSUB(Tfi, Tfj); + Tfo = VSUB(Tfm, Tfn); + { + V T1q, T1F, Tgq, Tgr; + T1q = VSUB(T1i, T1p); + T1F = VSUB(T1x, T1E); + T1G = VSUB(T1q, T1F); + T64 = VADD(T1q, T1F); + Tgq = VADD(Tfm, Tfn); + Tgr = VADD(Tfi, Tfj); + Tgs = VSUB(Tgq, Tgr); + Th6 = VADD(Tgq, Tgr); + } + { + V T2j, T2o, T7r, T7s; + T2j = VSUB(T2h, T2i); + T2o = VSUB(T2k, T2n); + T2p = VSUB(T2j, T2o); + T62 = VADD(T2o, T2j); + T7r = VADD(T1x, T1E); + T7s = VADD(T2n, T2k); + T7t = VSUB(T7r, T7s); + T9c = VADD(T7s, T7r); + } + Tcc = VSUB(Tca, Tcb); + Tcd = VMUL(LDK(KP707106781), VADD(Tc0, Tc3)); + Tce = VSUB(Tcc, Tcd); + Tdm = VADD(Tcc, Tcd); + { + V T7g, T7h, Tc4, Tc7; + T7g = VADD(T1i, T1p); + T7h = VADD(T2h, T2i); + T7i = VSUB(T7g, T7h); + T9e = VADD(T7g, T7h); + Tc4 = VMUL(LDK(KP707106781), VSUB(Tc0, Tc3)); + Tc7 = VSUB(Tc5, Tc6); + Tc8 = VSUB(Tc4, Tc7); + Tdp = VADD(Tc7, Tc4); + } + } + { + V T4c, Tew, T4o, Tak, T4A, Tez, T4E, Tau, T4j, Tex, T4l, Tan, T4x, TeA, T4F; + V Tar, Tcp, Tcq; + { + V T4a, T4b, Tai, T4m, T4n, Taj; + T4a = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T4b = LD(&(xi[WS(is, 68)]), ivs, &(xi[0])); + Tai = VADD(T4a, T4b); + T4m = LD(&(xi[WS(is, 36)]), ivs, &(xi[0])); + T4n = LD(&(xi[WS(is, 100)]), ivs, &(xi[0])); + Taj = VADD(T4m, T4n); + T4c = VSUB(T4a, T4b); + Tew = VADD(Tai, Taj); + T4o = VSUB(T4m, T4n); + Tak = VSUB(Tai, Taj); + } + { + V T4y, T4z, Tat, T4C, T4D, Tas; + T4y = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + T4z = LD(&(xi[WS(is, 92)]), ivs, &(xi[0])); + Tat = VADD(T4y, T4z); + T4C = LD(&(xi[WS(is, 124)]), ivs, &(xi[0])); + T4D = LD(&(xi[WS(is, 60)]), ivs, &(xi[0])); + Tas = VADD(T4C, T4D); + T4A = VSUB(T4y, T4z); + Tez = VADD(Tas, Tat); + T4E = VSUB(T4C, T4D); + Tau = VSUB(Tas, Tat); + } + { + V T4f, Tal, T4i, Tam; + { + V T4d, T4e, T4g, T4h; + T4d = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + T4e = LD(&(xi[WS(is, 84)]), ivs, &(xi[0])); + T4f = VSUB(T4d, T4e); + Tal = VADD(T4d, T4e); + T4g = LD(&(xi[WS(is, 116)]), ivs, &(xi[0])); + T4h = LD(&(xi[WS(is, 52)]), ivs, &(xi[0])); + T4i = VSUB(T4g, T4h); + Tam = VADD(T4g, T4h); + } + T4j = VMUL(LDK(KP707106781), VADD(T4f, T4i)); + Tex = VADD(Tal, Tam); + T4l = VMUL(LDK(KP707106781), VSUB(T4f, T4i)); + Tan = VSUB(Tal, Tam); + } + { + V T4t, Tap, T4w, Taq; + { + V T4r, T4s, T4u, T4v; + T4r = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T4s = LD(&(xi[WS(is, 76)]), ivs, &(xi[0])); + T4t = VSUB(T4r, T4s); + Tap = VADD(T4r, T4s); + T4u = LD(&(xi[WS(is, 108)]), ivs, &(xi[0])); + T4v = LD(&(xi[WS(is, 44)]), ivs, &(xi[0])); + T4w = VSUB(T4u, T4v); + Taq = VADD(T4u, T4v); + } + T4x = VMUL(LDK(KP707106781), VSUB(T4t, T4w)); + TeA = VADD(Tap, Taq); + T4F = VMUL(LDK(KP707106781), VADD(T4t, T4w)); + Tar = VSUB(Tap, Taq); + } + TgF = VADD(Tew, Tex); + TgG = VADD(Tez, TeA); + { + V T4k, T4p, Tey, TeB; + T4k = VSUB(T4c, T4j); + T4p = VSUB(T4l, T4o); + T4q = VFNMS(LDK(KP555570233), T4p, VMUL(LDK(KP831469612), T4k)); + T4V = VFMA(LDK(KP831469612), T4p, VMUL(LDK(KP555570233), T4k)); + Tey = VSUB(Tew, Tex); + TeB = VSUB(Tez, TeA); + TeC = VMUL(LDK(KP707106781), VADD(Tey, TeB)); + Tfx = VMUL(LDK(KP707106781), VSUB(Tey, TeB)); + } + { + V T4B, T4G, T7V, T7W; + T4B = VSUB(T4x, T4A); + T4G = VSUB(T4E, T4F); + T4H = VFMA(LDK(KP555570233), T4B, VMUL(LDK(KP831469612), T4G)); + T4W = VFNMS(LDK(KP555570233), T4G, VMUL(LDK(KP831469612), T4B)); + T7V = VADD(T4A, T4x); + T7W = VADD(T4E, T4F); + T7X = VFMA(LDK(KP195090322), T7V, VMUL(LDK(KP980785280), T7W)); + T86 = VFNMS(LDK(KP195090322), T7W, VMUL(LDK(KP980785280), T7V)); + } + Tcp = VFNMS(LDK(KP382683432), Tan, VMUL(LDK(KP923879532), Tak)); + Tcq = VFMA(LDK(KP923879532), Tau, VMUL(LDK(KP382683432), Tar)); + Tcr = VSUB(Tcp, Tcq); + TdH = VADD(Tcp, Tcq); + { + V T7S, T7T, Tao, Tav; + T7S = VADD(T4c, T4j); + T7T = VADD(T4o, T4l); + T7U = VFNMS(LDK(KP195090322), T7T, VMUL(LDK(KP980785280), T7S)); + T85 = VFMA(LDK(KP980785280), T7T, VMUL(LDK(KP195090322), T7S)); + Tao = VFMA(LDK(KP382683432), Tak, VMUL(LDK(KP923879532), Tan)); + Tav = VFNMS(LDK(KP382683432), Tau, VMUL(LDK(KP923879532), Tar)); + Taw = VSUB(Tao, Tav); + TdC = VADD(Tao, Tav); + } + } + { + V Tbz, TbA, T3, Tf5, T18, Tbu, Tbv, Ta, Tf6, T15, Ti, Tf1, T12, Tbp, Tp; + V Tf2, T13, Tbs, T6, T9, TbB, TbC; + { + V T1, T2, T16, T17; + T1 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T2 = LD(&(xi[WS(is, 65)]), ivs, &(xi[WS(is, 1)])); + Tbz = VADD(T1, T2); + T16 = LD(&(xi[WS(is, 33)]), ivs, &(xi[WS(is, 1)])); + T17 = LD(&(xi[WS(is, 97)]), ivs, &(xi[WS(is, 1)])); + TbA = VADD(T16, T17); + T3 = VSUB(T1, T2); + Tf5 = VADD(Tbz, TbA); + T18 = VSUB(T16, T17); + } + { + V T4, T5, T7, T8; + T4 = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 81)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + Tbu = VADD(T4, T5); + T7 = LD(&(xi[WS(is, 113)]), ivs, &(xi[WS(is, 1)])); + T8 = LD(&(xi[WS(is, 49)]), ivs, &(xi[WS(is, 1)])); + T9 = VSUB(T7, T8); + Tbv = VADD(T7, T8); + } + Ta = VMUL(LDK(KP707106781), VADD(T6, T9)); + Tf6 = VADD(Tbu, Tbv); + T15 = VMUL(LDK(KP707106781), VSUB(T6, T9)); + { + V Te, Tbn, Th, Tbo; + { + V Tc, Td, Tf, Tg; + Tc = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Td = LD(&(xi[WS(is, 73)]), ivs, &(xi[WS(is, 1)])); + Te = VSUB(Tc, Td); + Tbn = VADD(Tc, Td); + Tf = LD(&(xi[WS(is, 41)]), ivs, &(xi[WS(is, 1)])); + Tg = LD(&(xi[WS(is, 105)]), ivs, &(xi[WS(is, 1)])); + Th = VSUB(Tf, Tg); + Tbo = VADD(Tf, Tg); + } + Ti = VFMA(LDK(KP382683432), Te, VMUL(LDK(KP923879532), Th)); + Tf1 = VADD(Tbn, Tbo); + T12 = VFNMS(LDK(KP382683432), Th, VMUL(LDK(KP923879532), Te)); + Tbp = VSUB(Tbn, Tbo); + } + { + V Tl, Tbr, To, Tbq; + { + V Tj, Tk, Tm, Tn; + Tj = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + Tk = LD(&(xi[WS(is, 89)]), ivs, &(xi[WS(is, 1)])); + Tl = VSUB(Tj, Tk); + Tbr = VADD(Tj, Tk); + Tm = LD(&(xi[WS(is, 121)]), ivs, &(xi[WS(is, 1)])); + Tn = LD(&(xi[WS(is, 57)]), ivs, &(xi[WS(is, 1)])); + To = VSUB(Tm, Tn); + Tbq = VADD(Tm, Tn); + } + Tp = VFNMS(LDK(KP382683432), To, VMUL(LDK(KP923879532), Tl)); + Tf2 = VADD(Tbq, Tbr); + T13 = VFMA(LDK(KP923879532), To, VMUL(LDK(KP382683432), Tl)); + Tbs = VSUB(Tbq, Tbr); + } + Tf3 = VSUB(Tf1, Tf2); + Tf7 = VSUB(Tf5, Tf6); + { + V Tb, Tq, Tgj, Tgk; + Tb = VSUB(T3, Ta); + Tq = VSUB(Ti, Tp); + Tr = VSUB(Tb, Tq); + T5X = VADD(Tb, Tq); + Tgj = VADD(Tf5, Tf6); + Tgk = VADD(Tf1, Tf2); + Tgl = VSUB(Tgj, Tgk); + Th3 = VADD(Tgj, Tgk); + } + { + V T14, T19, T78, T79; + T14 = VSUB(T12, T13); + T19 = VSUB(T15, T18); + T1a = VSUB(T14, T19); + T5V = VADD(T19, T14); + T78 = VADD(Ti, Tp); + T79 = VADD(T18, T15); + T7a = VSUB(T78, T79); + T95 = VADD(T79, T78); + } + TbB = VSUB(Tbz, TbA); + TbC = VMUL(LDK(KP707106781), VADD(Tbp, Tbs)); + TbD = VSUB(TbB, TbC); + Tdf = VADD(TbB, TbC); + { + V T6X, T6Y, Tbt, Tbw; + T6X = VADD(T3, Ta); + T6Y = VADD(T12, T13); + T6Z = VSUB(T6X, T6Y); + T97 = VADD(T6X, T6Y); + Tbt = VMUL(LDK(KP707106781), VSUB(Tbp, Tbs)); + Tbw = VSUB(Tbu, Tbv); + Tbx = VSUB(Tbt, Tbw); + Tdi = VADD(Tbw, Tbt); + } + } + { + V TaK, TaJ, T2U, TeE, T2Z, TaF, TaG, T2R, TeF, T30, T2C, TeH, T32, TaA, T2J; + V TeI, T33, TaD, T2N, T2Q, TaL, TaM; + { + V T2S, T2T, T2X, T2Y; + T2S = LD(&(xi[WS(is, 34)]), ivs, &(xi[0])); + T2T = LD(&(xi[WS(is, 98)]), ivs, &(xi[0])); + TaK = VADD(T2S, T2T); + T2X = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T2Y = LD(&(xi[WS(is, 66)]), ivs, &(xi[0])); + TaJ = VADD(T2X, T2Y); + T2U = VSUB(T2S, T2T); + TeE = VADD(TaJ, TaK); + T2Z = VSUB(T2X, T2Y); + } + { + V T2L, T2M, T2O, T2P; + T2L = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + T2M = LD(&(xi[WS(is, 82)]), ivs, &(xi[0])); + T2N = VSUB(T2L, T2M); + TaF = VADD(T2L, T2M); + T2O = LD(&(xi[WS(is, 114)]), ivs, &(xi[0])); + T2P = LD(&(xi[WS(is, 50)]), ivs, &(xi[0])); + T2Q = VSUB(T2O, T2P); + TaG = VADD(T2O, T2P); + } + T2R = VMUL(LDK(KP707106781), VSUB(T2N, T2Q)); + TeF = VADD(TaF, TaG); + T30 = VMUL(LDK(KP707106781), VADD(T2N, T2Q)); + { + V T2y, Tay, T2B, Taz; + { + V T2w, T2x, T2z, T2A; + T2w = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T2x = LD(&(xi[WS(is, 74)]), ivs, &(xi[0])); + T2y = VSUB(T2w, T2x); + Tay = VADD(T2w, T2x); + T2z = LD(&(xi[WS(is, 42)]), ivs, &(xi[0])); + T2A = LD(&(xi[WS(is, 106)]), ivs, &(xi[0])); + T2B = VSUB(T2z, T2A); + Taz = VADD(T2z, T2A); + } + T2C = VFNMS(LDK(KP382683432), T2B, VMUL(LDK(KP923879532), T2y)); + TeH = VADD(Tay, Taz); + T32 = VFMA(LDK(KP382683432), T2y, VMUL(LDK(KP923879532), T2B)); + TaA = VSUB(Tay, Taz); + } + { + V T2F, TaB, T2I, TaC; + { + V T2D, T2E, T2G, T2H; + T2D = LD(&(xi[WS(is, 122)]), ivs, &(xi[0])); + T2E = LD(&(xi[WS(is, 58)]), ivs, &(xi[0])); + T2F = VSUB(T2D, T2E); + TaB = VADD(T2D, T2E); + T2G = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + T2H = LD(&(xi[WS(is, 90)]), ivs, &(xi[0])); + T2I = VSUB(T2G, T2H); + TaC = VADD(T2G, T2H); + } + T2J = VFMA(LDK(KP923879532), T2F, VMUL(LDK(KP382683432), T2I)); + TeI = VADD(TaB, TaC); + T33 = VFNMS(LDK(KP382683432), T2F, VMUL(LDK(KP923879532), T2I)); + TaD = VSUB(TaB, TaC); + } + Tgy = VADD(TeE, TeF); + Tgz = VADD(TeH, TeI); + TgA = VSUB(Tgy, Tgz); + TaL = VSUB(TaJ, TaK); + TaM = VMUL(LDK(KP707106781), VADD(TaA, TaD)); + TaN = VSUB(TaL, TaM); + Tdv = VADD(TaL, TaM); + { + V TeG, TeJ, T2K, T2V; + TeG = VSUB(TeE, TeF); + TeJ = VSUB(TeH, TeI); + TeK = VFMA(LDK(KP382683432), TeG, VMUL(LDK(KP923879532), TeJ)); + Tfu = VFNMS(LDK(KP382683432), TeJ, VMUL(LDK(KP923879532), TeG)); + T2K = VSUB(T2C, T2J); + T2V = VSUB(T2R, T2U); + T2W = VSUB(T2K, T2V); + T5M = VADD(T2V, T2K); + } + { + V T31, T34, T7D, T7E; + T31 = VSUB(T2Z, T30); + T34 = VSUB(T32, T33); + T35 = VSUB(T31, T34); + T5N = VADD(T31, T34); + T7D = VADD(T32, T33); + T7E = VADD(T2U, T2R); + T7F = VSUB(T7D, T7E); + T8X = VADD(T7E, T7D); + } + { + V TaE, TaH, T7A, T7B; + TaE = VMUL(LDK(KP707106781), VSUB(TaA, TaD)); + TaH = VSUB(TaF, TaG); + TaI = VSUB(TaE, TaH); + Tdu = VADD(TaH, TaE); + T7A = VADD(T2Z, T30); + T7B = VADD(T2C, T2J); + T7C = VSUB(T7A, T7B); + T8W = VADD(T7A, T7B); + } + } + { + V Tb1, Tb0, T3v, TeO, T3A, TaW, TaX, T3s, TeP, T3B, T3d, TeL, T3D, TaR, T3k; + V TeM, T3E, TaU, T3o, T3r, Tb2, Tb3; + { + V T3t, T3u, T3y, T3z; + T3t = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + T3u = LD(&(xi[WS(is, 94)]), ivs, &(xi[0])); + Tb1 = VADD(T3t, T3u); + T3y = LD(&(xi[WS(is, 126)]), ivs, &(xi[0])); + T3z = LD(&(xi[WS(is, 62)]), ivs, &(xi[0])); + Tb0 = VADD(T3y, T3z); + T3v = VSUB(T3t, T3u); + TeO = VADD(Tb0, Tb1); + T3A = VSUB(T3y, T3z); + } + { + V T3m, T3n, T3p, T3q; + T3m = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + T3n = LD(&(xi[WS(is, 78)]), ivs, &(xi[0])); + T3o = VSUB(T3m, T3n); + TaW = VADD(T3m, T3n); + T3p = LD(&(xi[WS(is, 110)]), ivs, &(xi[0])); + T3q = LD(&(xi[WS(is, 46)]), ivs, &(xi[0])); + T3r = VSUB(T3p, T3q); + TaX = VADD(T3p, T3q); + } + T3s = VMUL(LDK(KP707106781), VSUB(T3o, T3r)); + TeP = VADD(TaW, TaX); + T3B = VMUL(LDK(KP707106781), VADD(T3o, T3r)); + { + V T39, TaP, T3c, TaQ; + { + V T37, T38, T3a, T3b; + T37 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T38 = LD(&(xi[WS(is, 70)]), ivs, &(xi[0])); + T39 = VSUB(T37, T38); + TaP = VADD(T37, T38); + T3a = LD(&(xi[WS(is, 38)]), ivs, &(xi[0])); + T3b = LD(&(xi[WS(is, 102)]), ivs, &(xi[0])); + T3c = VSUB(T3a, T3b); + TaQ = VADD(T3a, T3b); + } + T3d = VFNMS(LDK(KP382683432), T3c, VMUL(LDK(KP923879532), T39)); + TeL = VADD(TaP, TaQ); + T3D = VFMA(LDK(KP382683432), T39, VMUL(LDK(KP923879532), T3c)); + TaR = VSUB(TaP, TaQ); + } + { + V T3g, TaS, T3j, TaT; + { + V T3e, T3f, T3h, T3i; + T3e = LD(&(xi[WS(is, 118)]), ivs, &(xi[0])); + T3f = LD(&(xi[WS(is, 54)]), ivs, &(xi[0])); + T3g = VSUB(T3e, T3f); + TaS = VADD(T3e, T3f); + T3h = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + T3i = LD(&(xi[WS(is, 86)]), ivs, &(xi[0])); + T3j = VSUB(T3h, T3i); + TaT = VADD(T3h, T3i); + } + T3k = VFMA(LDK(KP923879532), T3g, VMUL(LDK(KP382683432), T3j)); + TeM = VADD(TaS, TaT); + T3E = VFNMS(LDK(KP382683432), T3g, VMUL(LDK(KP923879532), T3j)); + TaU = VSUB(TaS, TaT); + } + TgB = VADD(TeO, TeP); + TgC = VADD(TeL, TeM); + TgD = VSUB(TgB, TgC); + Tb2 = VSUB(Tb0, Tb1); + Tb3 = VMUL(LDK(KP707106781), VADD(TaR, TaU)); + Tb4 = VSUB(Tb2, Tb3); + Tdy = VADD(Tb2, Tb3); + { + V TeN, TeQ, T3l, T3w; + TeN = VSUB(TeL, TeM); + TeQ = VSUB(TeO, TeP); + TeR = VFNMS(LDK(KP382683432), TeQ, VMUL(LDK(KP923879532), TeN)); + Tfv = VFMA(LDK(KP923879532), TeQ, VMUL(LDK(KP382683432), TeN)); + T3l = VSUB(T3d, T3k); + T3w = VSUB(T3s, T3v); + T3x = VSUB(T3l, T3w); + T5P = VADD(T3w, T3l); + } + { + V T3C, T3F, T7K, T7L; + T3C = VSUB(T3A, T3B); + T3F = VSUB(T3D, T3E); + T3G = VSUB(T3C, T3F); + T5Q = VADD(T3C, T3F); + T7K = VADD(T3A, T3B); + T7L = VADD(T3d, T3k); + T7M = VSUB(T7K, T7L); + T90 = VADD(T7K, T7L); + } + { + V TaV, TaY, T7H, T7I; + TaV = VMUL(LDK(KP707106781), VSUB(TaR, TaU)); + TaY = VSUB(TaW, TaX); + TaZ = VSUB(TaV, TaY); + Tdx = VADD(TaY, TaV); + T7H = VADD(T3D, T3E); + T7I = VADD(T3v, T3s); + T7J = VSUB(T7H, T7I); + T8Z = VADD(T7I, T7H); + } + } + { + V TB, TeU, TF, Tba, TS, TeX, TW, Tbh, Ty, TeV, TG, Tbd, TP, TeY, TX; + V Tbk; + { + V Tz, TA, Tb9, TD, TE, Tb8; + Tz = LD(&(xi[WS(is, 37)]), ivs, &(xi[WS(is, 1)])); + TA = LD(&(xi[WS(is, 101)]), ivs, &(xi[WS(is, 1)])); + Tb9 = VADD(Tz, TA); + TD = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + TE = LD(&(xi[WS(is, 69)]), ivs, &(xi[WS(is, 1)])); + Tb8 = VADD(TD, TE); + TB = VSUB(Tz, TA); + TeU = VADD(Tb8, Tb9); + TF = VSUB(TD, TE); + Tba = VSUB(Tb8, Tb9); + } + { + V TQ, TR, Tbg, TU, TV, Tbf; + TQ = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + TR = LD(&(xi[WS(is, 93)]), ivs, &(xi[WS(is, 1)])); + Tbg = VADD(TQ, TR); + TU = LD(&(xi[WS(is, 125)]), ivs, &(xi[WS(is, 1)])); + TV = LD(&(xi[WS(is, 61)]), ivs, &(xi[WS(is, 1)])); + Tbf = VADD(TU, TV); + TS = VSUB(TQ, TR); + TeX = VADD(Tbf, Tbg); + TW = VSUB(TU, TV); + Tbh = VSUB(Tbf, Tbg); + } + { + V Tu, Tbb, Tx, Tbc; + { + V Ts, Tt, Tv, Tw; + Ts = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + Tt = LD(&(xi[WS(is, 85)]), ivs, &(xi[WS(is, 1)])); + Tu = VSUB(Ts, Tt); + Tbb = VADD(Ts, Tt); + Tv = LD(&(xi[WS(is, 117)]), ivs, &(xi[WS(is, 1)])); + Tw = LD(&(xi[WS(is, 53)]), ivs, &(xi[WS(is, 1)])); + Tx = VSUB(Tv, Tw); + Tbc = VADD(Tv, Tw); + } + Ty = VMUL(LDK(KP707106781), VSUB(Tu, Tx)); + TeV = VADD(Tbb, Tbc); + TG = VMUL(LDK(KP707106781), VADD(Tu, Tx)); + Tbd = VSUB(Tbb, Tbc); + } + { + V TL, Tbi, TO, Tbj; + { + V TJ, TK, TM, TN; + TJ = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + TK = LD(&(xi[WS(is, 77)]), ivs, &(xi[WS(is, 1)])); + TL = VSUB(TJ, TK); + Tbi = VADD(TJ, TK); + TM = LD(&(xi[WS(is, 109)]), ivs, &(xi[WS(is, 1)])); + TN = LD(&(xi[WS(is, 45)]), ivs, &(xi[WS(is, 1)])); + TO = VSUB(TM, TN); + Tbj = VADD(TM, TN); + } + TP = VMUL(LDK(KP707106781), VSUB(TL, TO)); + TeY = VADD(Tbi, Tbj); + TX = VMUL(LDK(KP707106781), VADD(TL, TO)); + Tbk = VSUB(Tbi, Tbj); + } + { + V Tbe, Tbl, TeW, TeZ; + Tbe = VFNMS(LDK(KP382683432), Tbd, VMUL(LDK(KP923879532), Tba)); + Tbl = VFMA(LDK(KP923879532), Tbh, VMUL(LDK(KP382683432), Tbk)); + Tbm = VSUB(Tbe, Tbl); + Tdg = VADD(Tbe, Tbl); + { + V TbE, TbF, Tgm, Tgn; + TbE = VFMA(LDK(KP382683432), Tba, VMUL(LDK(KP923879532), Tbd)); + TbF = VFNMS(LDK(KP382683432), Tbh, VMUL(LDK(KP923879532), Tbk)); + TbG = VSUB(TbE, TbF); + Tdj = VADD(TbE, TbF); + Tgm = VADD(TeU, TeV); + Tgn = VADD(TeX, TeY); + Tgo = VSUB(Tgm, Tgn); + Th4 = VADD(Tgm, Tgn); + } + TeW = VSUB(TeU, TeV); + TeZ = VSUB(TeX, TeY); + Tf0 = VMUL(LDK(KP707106781), VSUB(TeW, TeZ)); + Tf8 = VMUL(LDK(KP707106781), VADD(TeW, TeZ)); + { + V T72, T7b, T75, T7c; + { + V T70, T71, T73, T74; + T70 = VADD(TB, Ty); + T71 = VADD(TF, TG); + T72 = VFMA(LDK(KP980785280), T70, VMUL(LDK(KP195090322), T71)); + T7b = VFNMS(LDK(KP195090322), T70, VMUL(LDK(KP980785280), T71)); + T73 = VADD(TS, TP); + T74 = VADD(TW, TX); + T75 = VFNMS(LDK(KP195090322), T74, VMUL(LDK(KP980785280), T73)); + T7c = VFMA(LDK(KP195090322), T73, VMUL(LDK(KP980785280), T74)); + } + T76 = VSUB(T72, T75); + T98 = VADD(T7b, T7c); + T7d = VSUB(T7b, T7c); + T94 = VADD(T72, T75); + } + { + V TI, T1b, TZ, T1c; + { + V TC, TH, TT, TY; + TC = VSUB(Ty, TB); + TH = VSUB(TF, TG); + TI = VFMA(LDK(KP831469612), TC, VMUL(LDK(KP555570233), TH)); + T1b = VFNMS(LDK(KP555570233), TC, VMUL(LDK(KP831469612), TH)); + TT = VSUB(TP, TS); + TY = VSUB(TW, TX); + TZ = VFNMS(LDK(KP555570233), TY, VMUL(LDK(KP831469612), TT)); + T1c = VFMA(LDK(KP555570233), TT, VMUL(LDK(KP831469612), TY)); + } + T10 = VSUB(TI, TZ); + T5Y = VADD(T1b, T1c); + T1d = VSUB(T1b, T1c); + T5U = VADD(TI, TZ); + } + } + } + { + V T1Q, Tfb, T1U, TbL, T27, Tfe, T2b, TbS, T1N, Tfc, T1V, TbO, T24, Tff, T2c; + V TbV; + { + V T1O, T1P, TbK, T1S, T1T, TbJ; + T1O = LD(&(xi[WS(is, 35)]), ivs, &(xi[WS(is, 1)])); + T1P = LD(&(xi[WS(is, 99)]), ivs, &(xi[WS(is, 1)])); + TbK = VADD(T1O, T1P); + T1S = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T1T = LD(&(xi[WS(is, 67)]), ivs, &(xi[WS(is, 1)])); + TbJ = VADD(T1S, T1T); + T1Q = VSUB(T1O, T1P); + Tfb = VADD(TbJ, TbK); + T1U = VSUB(T1S, T1T); + TbL = VSUB(TbJ, TbK); + } + { + V T25, T26, TbR, T29, T2a, TbQ; + T25 = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + T26 = LD(&(xi[WS(is, 91)]), ivs, &(xi[WS(is, 1)])); + TbR = VADD(T25, T26); + T29 = LD(&(xi[WS(is, 123)]), ivs, &(xi[WS(is, 1)])); + T2a = LD(&(xi[WS(is, 59)]), ivs, &(xi[WS(is, 1)])); + TbQ = VADD(T29, T2a); + T27 = VSUB(T25, T26); + Tfe = VADD(TbQ, TbR); + T2b = VSUB(T29, T2a); + TbS = VSUB(TbQ, TbR); + } + { + V T1J, TbM, T1M, TbN; + { + V T1H, T1I, T1K, T1L; + T1H = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + T1I = LD(&(xi[WS(is, 83)]), ivs, &(xi[WS(is, 1)])); + T1J = VSUB(T1H, T1I); + TbM = VADD(T1H, T1I); + T1K = LD(&(xi[WS(is, 115)]), ivs, &(xi[WS(is, 1)])); + T1L = LD(&(xi[WS(is, 51)]), ivs, &(xi[WS(is, 1)])); + T1M = VSUB(T1K, T1L); + TbN = VADD(T1K, T1L); + } + T1N = VMUL(LDK(KP707106781), VSUB(T1J, T1M)); + Tfc = VADD(TbM, TbN); + T1V = VMUL(LDK(KP707106781), VADD(T1J, T1M)); + TbO = VSUB(TbM, TbN); + } + { + V T20, TbT, T23, TbU; + { + V T1Y, T1Z, T21, T22; + T1Y = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + T1Z = LD(&(xi[WS(is, 75)]), ivs, &(xi[WS(is, 1)])); + T20 = VSUB(T1Y, T1Z); + TbT = VADD(T1Y, T1Z); + T21 = LD(&(xi[WS(is, 107)]), ivs, &(xi[WS(is, 1)])); + T22 = LD(&(xi[WS(is, 43)]), ivs, &(xi[WS(is, 1)])); + T23 = VSUB(T21, T22); + TbU = VADD(T21, T22); + } + T24 = VMUL(LDK(KP707106781), VSUB(T20, T23)); + Tff = VADD(TbT, TbU); + T2c = VMUL(LDK(KP707106781), VADD(T20, T23)); + TbV = VSUB(TbT, TbU); + } + { + V TbP, TbW, Tfd, Tfg; + TbP = VFNMS(LDK(KP382683432), TbO, VMUL(LDK(KP923879532), TbL)); + TbW = VFMA(LDK(KP923879532), TbS, VMUL(LDK(KP382683432), TbV)); + TbX = VSUB(TbP, TbW); + Tdn = VADD(TbP, TbW); + { + V Tcf, Tcg, Tgt, Tgu; + Tcf = VFMA(LDK(KP382683432), TbL, VMUL(LDK(KP923879532), TbO)); + Tcg = VFNMS(LDK(KP382683432), TbS, VMUL(LDK(KP923879532), TbV)); + Tch = VSUB(Tcf, Tcg); + Tdq = VADD(Tcf, Tcg); + Tgt = VADD(Tfb, Tfc); + Tgu = VADD(Tfe, Tff); + Tgv = VSUB(Tgt, Tgu); + Th7 = VADD(Tgt, Tgu); + } + Tfd = VSUB(Tfb, Tfc); + Tfg = VSUB(Tfe, Tff); + Tfh = VMUL(LDK(KP707106781), VSUB(Tfd, Tfg)); + Tfp = VMUL(LDK(KP707106781), VADD(Tfd, Tfg)); + { + V T7l, T7u, T7o, T7v; + { + V T7j, T7k, T7m, T7n; + T7j = VADD(T1Q, T1N); + T7k = VADD(T1U, T1V); + T7l = VFMA(LDK(KP980785280), T7j, VMUL(LDK(KP195090322), T7k)); + T7u = VFNMS(LDK(KP195090322), T7j, VMUL(LDK(KP980785280), T7k)); + T7m = VADD(T27, T24); + T7n = VADD(T2b, T2c); + T7o = VFNMS(LDK(KP195090322), T7n, VMUL(LDK(KP980785280), T7m)); + T7v = VFMA(LDK(KP195090322), T7m, VMUL(LDK(KP980785280), T7n)); + } + T7p = VSUB(T7l, T7o); + T9f = VADD(T7u, T7v); + T7w = VSUB(T7u, T7v); + T9b = VADD(T7l, T7o); + } + { + V T1X, T2q, T2e, T2r; + { + V T1R, T1W, T28, T2d; + T1R = VSUB(T1N, T1Q); + T1W = VSUB(T1U, T1V); + T1X = VFMA(LDK(KP831469612), T1R, VMUL(LDK(KP555570233), T1W)); + T2q = VFNMS(LDK(KP555570233), T1R, VMUL(LDK(KP831469612), T1W)); + T28 = VSUB(T24, T27); + T2d = VSUB(T2b, T2c); + T2e = VFNMS(LDK(KP555570233), T2d, VMUL(LDK(KP831469612), T28)); + T2r = VFMA(LDK(KP555570233), T28, VMUL(LDK(KP831469612), T2d)); + } + T2f = VSUB(T1X, T2e); + T65 = VADD(T2q, T2r); + T2s = VSUB(T2q, T2r); + T61 = VADD(T1X, T2e); + } + } + } + { + V Tgx, TgW, TgR, TgZ, TgI, TgY, TgO, TgV; + { + V Tgp, Tgw, TgP, TgQ; + Tgp = VFNMS(LDK(KP382683432), Tgo, VMUL(LDK(KP923879532), Tgl)); + Tgw = VFMA(LDK(KP923879532), Tgs, VMUL(LDK(KP382683432), Tgv)); + Tgx = VSUB(Tgp, Tgw); + TgW = VADD(Tgp, Tgw); + TgP = VFMA(LDK(KP382683432), Tgl, VMUL(LDK(KP923879532), Tgo)); + TgQ = VFNMS(LDK(KP382683432), Tgs, VMUL(LDK(KP923879532), Tgv)); + TgR = VSUB(TgP, TgQ); + TgZ = VADD(TgP, TgQ); + } + { + V TgE, TgH, TgM, TgN; + TgE = VMUL(LDK(KP707106781), VSUB(TgA, TgD)); + TgH = VSUB(TgF, TgG); + TgI = VSUB(TgE, TgH); + TgY = VADD(TgH, TgE); + TgM = VSUB(TgK, TgL); + TgN = VMUL(LDK(KP707106781), VADD(TgA, TgD)); + TgO = VSUB(TgM, TgN); + TgV = VADD(TgM, TgN); + } + { + V TgJ, TgS, Th1, Th2; + TgJ = VBYI(VSUB(Tgx, TgI)); + TgS = VSUB(TgO, TgR); + ST(&(xo[WS(os, 40)]), VADD(TgJ, TgS), ovs, &(xo[0])); + ST(&(xo[WS(os, 88)]), VSUB(TgS, TgJ), ovs, &(xo[0])); + Th1 = VSUB(TgV, TgW); + Th2 = VBYI(VSUB(TgZ, TgY)); + ST(&(xo[WS(os, 72)]), VSUB(Th1, Th2), ovs, &(xo[0])); + ST(&(xo[WS(os, 56)]), VADD(Th1, Th2), ovs, &(xo[0])); + } + { + V TgT, TgU, TgX, Th0; + TgT = VBYI(VADD(TgI, Tgx)); + TgU = VADD(TgO, TgR); + ST(&(xo[WS(os, 24)]), VADD(TgT, TgU), ovs, &(xo[0])); + ST(&(xo[WS(os, 104)]), VSUB(TgU, TgT), ovs, &(xo[0])); + TgX = VADD(TgV, TgW); + Th0 = VBYI(VADD(TgY, TgZ)); + ST(&(xo[WS(os, 120)]), VSUB(TgX, Th0), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VADD(TgX, Th0), ovs, &(xo[0])); + } + } + { + V Th9, Thh, Thq, Ths, Thc, Thm, Thg, Thl, Thn, Thr; + { + V Th5, Th8, Tho, Thp; + Th5 = VSUB(Th3, Th4); + Th8 = VSUB(Th6, Th7); + Th9 = VMUL(LDK(KP707106781), VSUB(Th5, Th8)); + Thh = VMUL(LDK(KP707106781), VADD(Th5, Th8)); + Tho = VADD(Th3, Th4); + Thp = VADD(Th6, Th7); + Thq = VBYI(VSUB(Tho, Thp)); + Ths = VADD(Tho, Thp); + } + { + V Tha, Thb, The, Thf; + Tha = VADD(Tgy, Tgz); + Thb = VADD(TgB, TgC); + Thc = VSUB(Tha, Thb); + Thm = VADD(Tha, Thb); + The = VADD(TgK, TgL); + Thf = VADD(TgF, TgG); + Thg = VSUB(The, Thf); + Thl = VADD(The, Thf); + } + Thn = VSUB(Thl, Thm); + ST(&(xo[WS(os, 96)]), VSUB(Thn, Thq), ovs, &(xo[0])); + ST(&(xo[WS(os, 32)]), VADD(Thn, Thq), ovs, &(xo[0])); + Thr = VADD(Thl, Thm); + ST(&(xo[WS(os, 64)]), VSUB(Thr, Ths), ovs, &(xo[0])); + ST(&(xo[0]), VADD(Thr, Ths), ovs, &(xo[0])); + { + V Thd, Thi, Thj, Thk; + Thd = VBYI(VSUB(Th9, Thc)); + Thi = VSUB(Thg, Thh); + ST(&(xo[WS(os, 48)]), VADD(Thd, Thi), ovs, &(xo[0])); + ST(&(xo[WS(os, 80)]), VSUB(Thi, Thd), ovs, &(xo[0])); + Thj = VBYI(VADD(Thc, Th9)); + Thk = VADD(Thg, Thh); + ST(&(xo[WS(os, 16)]), VADD(Thj, Thk), ovs, &(xo[0])); + ST(&(xo[WS(os, 112)]), VSUB(Thk, Thj), ovs, &(xo[0])); + } + } + { + V TeT, TfM, TfC, TfK, Tfs, TfN, TfF, TfJ; + { + V TeD, TeS, Tfw, TfB; + TeD = VSUB(Tev, TeC); + TeS = VSUB(TeK, TeR); + TeT = VSUB(TeD, TeS); + TfM = VADD(TeD, TeS); + Tfw = VSUB(Tfu, Tfv); + TfB = VSUB(Tfx, TfA); + TfC = VSUB(Tfw, TfB); + TfK = VADD(TfB, Tfw); + { + V Tfa, TfD, Tfr, TfE; + { + V Tf4, Tf9, Tfl, Tfq; + Tf4 = VSUB(Tf0, Tf3); + Tf9 = VSUB(Tf7, Tf8); + Tfa = VFMA(LDK(KP831469612), Tf4, VMUL(LDK(KP555570233), Tf9)); + TfD = VFNMS(LDK(KP555570233), Tf4, VMUL(LDK(KP831469612), Tf9)); + Tfl = VSUB(Tfh, Tfk); + Tfq = VSUB(Tfo, Tfp); + Tfr = VFNMS(LDK(KP555570233), Tfq, VMUL(LDK(KP831469612), Tfl)); + TfE = VFMA(LDK(KP555570233), Tfl, VMUL(LDK(KP831469612), Tfq)); + } + Tfs = VSUB(Tfa, Tfr); + TfN = VADD(TfD, TfE); + TfF = VSUB(TfD, TfE); + TfJ = VADD(Tfa, Tfr); + } + } + { + V Tft, TfG, TfP, TfQ; + Tft = VADD(TeT, Tfs); + TfG = VBYI(VADD(TfC, TfF)); + ST(&(xo[WS(os, 108)]), VSUB(Tft, TfG), ovs, &(xo[0])); + ST(&(xo[WS(os, 20)]), VADD(Tft, TfG), ovs, &(xo[0])); + TfP = VBYI(VADD(TfK, TfJ)); + TfQ = VADD(TfM, TfN); + ST(&(xo[WS(os, 12)]), VADD(TfP, TfQ), ovs, &(xo[0])); + ST(&(xo[WS(os, 116)]), VSUB(TfQ, TfP), ovs, &(xo[0])); + } + { + V TfH, TfI, TfL, TfO; + TfH = VSUB(TeT, Tfs); + TfI = VBYI(VSUB(TfF, TfC)); + ST(&(xo[WS(os, 84)]), VSUB(TfH, TfI), ovs, &(xo[0])); + ST(&(xo[WS(os, 44)]), VADD(TfH, TfI), ovs, &(xo[0])); + TfL = VBYI(VSUB(TfJ, TfK)); + TfO = VSUB(TfM, TfN); + ST(&(xo[WS(os, 52)]), VADD(TfL, TfO), ovs, &(xo[0])); + ST(&(xo[WS(os, 76)]), VSUB(TfO, TfL), ovs, &(xo[0])); + } + } + { + V TfT, Tge, Tg4, Tgc, Tg0, Tgf, Tg7, Tgb; + { + V TfR, TfS, Tg2, Tg3; + TfR = VADD(Tev, TeC); + TfS = VADD(Tfu, Tfv); + TfT = VSUB(TfR, TfS); + Tge = VADD(TfR, TfS); + Tg2 = VADD(TeK, TeR); + Tg3 = VADD(TfA, Tfx); + Tg4 = VSUB(Tg2, Tg3); + Tgc = VADD(Tg3, Tg2); + { + V TfW, Tg5, TfZ, Tg6; + { + V TfU, TfV, TfX, TfY; + TfU = VADD(Tf3, Tf0); + TfV = VADD(Tf7, Tf8); + TfW = VFMA(LDK(KP980785280), TfU, VMUL(LDK(KP195090322), TfV)); + Tg5 = VFNMS(LDK(KP195090322), TfU, VMUL(LDK(KP980785280), TfV)); + TfX = VADD(Tfk, Tfh); + TfY = VADD(Tfo, Tfp); + TfZ = VFNMS(LDK(KP195090322), TfY, VMUL(LDK(KP980785280), TfX)); + Tg6 = VFMA(LDK(KP195090322), TfX, VMUL(LDK(KP980785280), TfY)); + } + Tg0 = VSUB(TfW, TfZ); + Tgf = VADD(Tg5, Tg6); + Tg7 = VSUB(Tg5, Tg6); + Tgb = VADD(TfW, TfZ); + } + } + { + V Tg1, Tg8, Tgh, Tgi; + Tg1 = VADD(TfT, Tg0); + Tg8 = VBYI(VADD(Tg4, Tg7)); + ST(&(xo[WS(os, 100)]), VSUB(Tg1, Tg8), ovs, &(xo[0])); + ST(&(xo[WS(os, 28)]), VADD(Tg1, Tg8), ovs, &(xo[0])); + Tgh = VBYI(VADD(Tgc, Tgb)); + Tgi = VADD(Tge, Tgf); + ST(&(xo[WS(os, 4)]), VADD(Tgh, Tgi), ovs, &(xo[0])); + ST(&(xo[WS(os, 124)]), VSUB(Tgi, Tgh), ovs, &(xo[0])); + } + { + V Tg9, Tga, Tgd, Tgg; + Tg9 = VSUB(TfT, Tg0); + Tga = VBYI(VSUB(Tg7, Tg4)); + ST(&(xo[WS(os, 92)]), VSUB(Tg9, Tga), ovs, &(xo[0])); + ST(&(xo[WS(os, 36)]), VADD(Tg9, Tga), ovs, &(xo[0])); + Tgd = VBYI(VSUB(Tgb, Tgc)); + Tgg = VSUB(Tge, Tgf); + ST(&(xo[WS(os, 60)]), VADD(Tgd, Tgg), ovs, &(xo[0])); + ST(&(xo[WS(os, 68)]), VSUB(Tgg, Tgd), ovs, &(xo[0])); + } + } + { + V Tb7, Td8, TcI, Td0, Tcy, Tda, TcG, TcP, Tck, TcJ, TcB, TcF, TcW, Tdb, Td3; + V Td7; + { + V Tax, TcZ, Tb6, TcY, TaO, Tb5; + Tax = VSUB(Tah, Taw); + TcZ = VADD(Tcw, Tcr); + TaO = VFMA(LDK(KP831469612), TaI, VMUL(LDK(KP555570233), TaN)); + Tb5 = VFNMS(LDK(KP555570233), Tb4, VMUL(LDK(KP831469612), TaZ)); + Tb6 = VSUB(TaO, Tb5); + TcY = VADD(TaO, Tb5); + Tb7 = VSUB(Tax, Tb6); + Td8 = VADD(TcZ, TcY); + TcI = VADD(Tax, Tb6); + Td0 = VSUB(TcY, TcZ); + } + { + V Tcx, TcN, Tco, TcO, Tcm, Tcn; + Tcx = VSUB(Tcr, Tcw); + TcN = VADD(Tah, Taw); + Tcm = VFNMS(LDK(KP555570233), TaI, VMUL(LDK(KP831469612), TaN)); + Tcn = VFMA(LDK(KP555570233), TaZ, VMUL(LDK(KP831469612), Tb4)); + Tco = VSUB(Tcm, Tcn); + TcO = VADD(Tcm, Tcn); + Tcy = VSUB(Tco, Tcx); + Tda = VADD(TcN, TcO); + TcG = VADD(Tcx, Tco); + TcP = VSUB(TcN, TcO); + } + { + V TbI, Tcz, Tcj, TcA; + { + V Tby, TbH, Tc9, Tci; + Tby = VSUB(Tbm, Tbx); + TbH = VSUB(TbD, TbG); + TbI = VFMA(LDK(KP881921264), Tby, VMUL(LDK(KP471396736), TbH)); + Tcz = VFNMS(LDK(KP471396736), Tby, VMUL(LDK(KP881921264), TbH)); + Tc9 = VSUB(TbX, Tc8); + Tci = VSUB(Tce, Tch); + Tcj = VFNMS(LDK(KP471396736), Tci, VMUL(LDK(KP881921264), Tc9)); + TcA = VFMA(LDK(KP471396736), Tc9, VMUL(LDK(KP881921264), Tci)); + } + Tck = VSUB(TbI, Tcj); + TcJ = VADD(Tcz, TcA); + TcB = VSUB(Tcz, TcA); + TcF = VADD(TbI, Tcj); + } + { + V TcS, Td1, TcV, Td2; + { + V TcQ, TcR, TcT, TcU; + TcQ = VADD(Tbx, Tbm); + TcR = VADD(TbD, TbG); + TcS = VFMA(LDK(KP956940335), TcQ, VMUL(LDK(KP290284677), TcR)); + Td1 = VFNMS(LDK(KP290284677), TcQ, VMUL(LDK(KP956940335), TcR)); + TcT = VADD(Tc8, TbX); + TcU = VADD(Tce, Tch); + TcV = VFNMS(LDK(KP290284677), TcU, VMUL(LDK(KP956940335), TcT)); + Td2 = VFMA(LDK(KP290284677), TcT, VMUL(LDK(KP956940335), TcU)); + } + TcW = VSUB(TcS, TcV); + Tdb = VADD(Td1, Td2); + Td3 = VSUB(Td1, Td2); + Td7 = VADD(TcS, TcV); + } + { + V Tcl, TcC, Td9, Tdc; + Tcl = VADD(Tb7, Tck); + TcC = VBYI(VADD(Tcy, TcB)); + ST(&(xo[WS(os, 106)]), VSUB(Tcl, TcC), ovs, &(xo[0])); + ST(&(xo[WS(os, 22)]), VADD(Tcl, TcC), ovs, &(xo[0])); + Td9 = VBYI(VSUB(Td7, Td8)); + Tdc = VSUB(Tda, Tdb); + ST(&(xo[WS(os, 58)]), VADD(Td9, Tdc), ovs, &(xo[0])); + ST(&(xo[WS(os, 70)]), VSUB(Tdc, Td9), ovs, &(xo[0])); + } + { + V Tdd, Tde, TcD, TcE; + Tdd = VBYI(VADD(Td8, Td7)); + Tde = VADD(Tda, Tdb); + ST(&(xo[WS(os, 6)]), VADD(Tdd, Tde), ovs, &(xo[0])); + ST(&(xo[WS(os, 122)]), VSUB(Tde, Tdd), ovs, &(xo[0])); + TcD = VSUB(Tb7, Tck); + TcE = VBYI(VSUB(TcB, Tcy)); + ST(&(xo[WS(os, 86)]), VSUB(TcD, TcE), ovs, &(xo[0])); + ST(&(xo[WS(os, 42)]), VADD(TcD, TcE), ovs, &(xo[0])); + } + { + V TcH, TcK, TcX, Td4; + TcH = VBYI(VSUB(TcF, TcG)); + TcK = VSUB(TcI, TcJ); + ST(&(xo[WS(os, 54)]), VADD(TcH, TcK), ovs, &(xo[0])); + ST(&(xo[WS(os, 74)]), VSUB(TcK, TcH), ovs, &(xo[0])); + TcX = VADD(TcP, TcW); + Td4 = VBYI(VADD(Td0, Td3)); + ST(&(xo[WS(os, 102)]), VSUB(TcX, Td4), ovs, &(xo[0])); + ST(&(xo[WS(os, 26)]), VADD(TcX, Td4), ovs, &(xo[0])); + } + { + V Td5, Td6, TcL, TcM; + Td5 = VSUB(TcP, TcW); + Td6 = VBYI(VSUB(Td3, Td0)); + ST(&(xo[WS(os, 90)]), VSUB(Td5, Td6), ovs, &(xo[0])); + ST(&(xo[WS(os, 38)]), VADD(Td5, Td6), ovs, &(xo[0])); + TcL = VBYI(VADD(TcG, TcF)); + TcM = VADD(TcI, TcJ); + ST(&(xo[WS(os, 10)]), VADD(TcL, TcM), ovs, &(xo[0])); + ST(&(xo[WS(os, 118)]), VSUB(TcM, TcL), ovs, &(xo[0])); + } + } + { + V TdE, Tel, TdW, Tee, TdM, Teo, TdT, Tea, Tdt, TdX, TdP, TdU, Te7, Tep, Teh; + V Tem; + { + V TdD, Tec, TdA, Ted, Tdw, Tdz; + TdD = VADD(TdB, TdC); + Tec = VSUB(TdG, TdH); + Tdw = VFMA(LDK(KP980785280), Tdu, VMUL(LDK(KP195090322), Tdv)); + Tdz = VFNMS(LDK(KP195090322), Tdy, VMUL(LDK(KP980785280), Tdx)); + TdA = VADD(Tdw, Tdz); + Ted = VSUB(Tdw, Tdz); + TdE = VSUB(TdA, TdD); + Tel = VADD(Tec, Ted); + TdW = VADD(TdD, TdA); + Tee = VSUB(Tec, Ted); + } + { + V TdI, Te9, TdL, Te8, TdJ, TdK; + TdI = VADD(TdG, TdH); + Te9 = VSUB(TdC, TdB); + TdJ = VFNMS(LDK(KP195090322), Tdu, VMUL(LDK(KP980785280), Tdv)); + TdK = VFMA(LDK(KP195090322), Tdx, VMUL(LDK(KP980785280), Tdy)); + TdL = VADD(TdJ, TdK); + Te8 = VSUB(TdJ, TdK); + TdM = VSUB(TdI, TdL); + Teo = VADD(Te9, Te8); + TdT = VADD(TdI, TdL); + Tea = VSUB(Te8, Te9); + } + { + V Tdl, TdN, Tds, TdO; + { + V Tdh, Tdk, Tdo, Tdr; + Tdh = VADD(Tdf, Tdg); + Tdk = VADD(Tdi, Tdj); + Tdl = VFNMS(LDK(KP098017140), Tdk, VMUL(LDK(KP995184726), Tdh)); + TdN = VFMA(LDK(KP098017140), Tdh, VMUL(LDK(KP995184726), Tdk)); + Tdo = VADD(Tdm, Tdn); + Tdr = VADD(Tdp, Tdq); + Tds = VFMA(LDK(KP995184726), Tdo, VMUL(LDK(KP098017140), Tdr)); + TdO = VFNMS(LDK(KP098017140), Tdo, VMUL(LDK(KP995184726), Tdr)); + } + Tdt = VSUB(Tdl, Tds); + TdX = VADD(TdN, TdO); + TdP = VSUB(TdN, TdO); + TdU = VADD(Tdl, Tds); + } + { + V Te3, Tef, Te6, Teg; + { + V Te1, Te2, Te4, Te5; + Te1 = VSUB(Tdf, Tdg); + Te2 = VSUB(Tdj, Tdi); + Te3 = VFNMS(LDK(KP634393284), Te2, VMUL(LDK(KP773010453), Te1)); + Tef = VFMA(LDK(KP634393284), Te1, VMUL(LDK(KP773010453), Te2)); + Te4 = VSUB(Tdm, Tdn); + Te5 = VSUB(Tdq, Tdp); + Te6 = VFMA(LDK(KP773010453), Te4, VMUL(LDK(KP634393284), Te5)); + Teg = VFNMS(LDK(KP634393284), Te4, VMUL(LDK(KP773010453), Te5)); + } + Te7 = VSUB(Te3, Te6); + Tep = VADD(Tef, Teg); + Teh = VSUB(Tef, Teg); + Tem = VADD(Te3, Te6); + } + { + V TdF, TdQ, Ten, Teq; + TdF = VBYI(VSUB(Tdt, TdE)); + TdQ = VSUB(TdM, TdP); + ST(&(xo[WS(os, 34)]), VADD(TdF, TdQ), ovs, &(xo[0])); + ST(&(xo[WS(os, 94)]), VSUB(TdQ, TdF), ovs, &(xo[0])); + Ten = VADD(Tel, Tem); + Teq = VBYI(VADD(Teo, Tep)); + ST(&(xo[WS(os, 114)]), VSUB(Ten, Teq), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VADD(Ten, Teq), ovs, &(xo[0])); + } + { + V Ter, Tes, TdR, TdS; + Ter = VSUB(Tel, Tem); + Tes = VBYI(VSUB(Tep, Teo)); + ST(&(xo[WS(os, 78)]), VSUB(Ter, Tes), ovs, &(xo[0])); + ST(&(xo[WS(os, 50)]), VADD(Ter, Tes), ovs, &(xo[0])); + TdR = VBYI(VADD(TdE, Tdt)); + TdS = VADD(TdM, TdP); + ST(&(xo[WS(os, 30)]), VADD(TdR, TdS), ovs, &(xo[0])); + ST(&(xo[WS(os, 98)]), VSUB(TdS, TdR), ovs, &(xo[0])); + } + { + V TdV, TdY, Teb, Tei; + TdV = VADD(TdT, TdU); + TdY = VBYI(VADD(TdW, TdX)); + ST(&(xo[WS(os, 126)]), VSUB(TdV, TdY), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VADD(TdV, TdY), ovs, &(xo[0])); + Teb = VBYI(VSUB(Te7, Tea)); + Tei = VSUB(Tee, Teh); + ST(&(xo[WS(os, 46)]), VADD(Teb, Tei), ovs, &(xo[0])); + ST(&(xo[WS(os, 82)]), VSUB(Tei, Teb), ovs, &(xo[0])); + } + { + V Tej, Tek, TdZ, Te0; + Tej = VBYI(VADD(Tea, Te7)); + Tek = VADD(Tee, Teh); + ST(&(xo[WS(os, 18)]), VADD(Tej, Tek), ovs, &(xo[0])); + ST(&(xo[WS(os, 110)]), VSUB(Tek, Tej), ovs, &(xo[0])); + TdZ = VSUB(TdT, TdU); + Te0 = VBYI(VSUB(TdX, TdW)); + ST(&(xo[WS(os, 66)]), VSUB(TdZ, Te0), ovs, &(xo[0])); + ST(&(xo[WS(os, 62)]), VADD(TdZ, Te0), ovs, &(xo[0])); + } + } + { + V T7z, T8n, T8f, T8k, T8x, T8P, T8H, T8M, T80, T8L, T8O, T8c, T8j, T8A, T8E; + V T8m; + { + V T7f, T8d, T7y, T8e; + { + V T77, T7e, T7q, T7x; + T77 = VADD(T6Z, T76); + T7e = VADD(T7a, T7d); + T7f = VFNMS(LDK(KP336889853), T7e, VMUL(LDK(KP941544065), T77)); + T8d = VFMA(LDK(KP336889853), T77, VMUL(LDK(KP941544065), T7e)); + T7q = VADD(T7i, T7p); + T7x = VADD(T7t, T7w); + T7y = VFMA(LDK(KP941544065), T7q, VMUL(LDK(KP336889853), T7x)); + T8e = VFNMS(LDK(KP336889853), T7q, VMUL(LDK(KP941544065), T7x)); + } + T7z = VSUB(T7f, T7y); + T8n = VADD(T8d, T8e); + T8f = VSUB(T8d, T8e); + T8k = VADD(T7f, T7y); + } + { + V T8t, T8F, T8w, T8G; + { + V T8r, T8s, T8u, T8v; + T8r = VSUB(T6Z, T76); + T8s = VSUB(T7d, T7a); + T8t = VFNMS(LDK(KP427555093), T8s, VMUL(LDK(KP903989293), T8r)); + T8F = VFMA(LDK(KP427555093), T8r, VMUL(LDK(KP903989293), T8s)); + T8u = VSUB(T7i, T7p); + T8v = VSUB(T7w, T7t); + T8w = VFMA(LDK(KP903989293), T8u, VMUL(LDK(KP427555093), T8v)); + T8G = VFNMS(LDK(KP427555093), T8u, VMUL(LDK(KP903989293), T8v)); + } + T8x = VSUB(T8t, T8w); + T8P = VADD(T8F, T8G); + T8H = VSUB(T8F, T8G); + T8M = VADD(T8t, T8w); + } + { + V T7Z, T8z, T88, T8C, T7O, T8D, T8b, T8y, T7Y, T87; + T7Y = VSUB(T7U, T7X); + T7Z = VADD(T7R, T7Y); + T8z = VSUB(T7Y, T7R); + T87 = VSUB(T85, T86); + T88 = VADD(T84, T87); + T8C = VSUB(T84, T87); + { + V T7G, T7N, T89, T8a; + T7G = VFMA(LDK(KP634393284), T7C, VMUL(LDK(KP773010453), T7F)); + T7N = VFNMS(LDK(KP634393284), T7M, VMUL(LDK(KP773010453), T7J)); + T7O = VADD(T7G, T7N); + T8D = VSUB(T7G, T7N); + T89 = VFNMS(LDK(KP634393284), T7F, VMUL(LDK(KP773010453), T7C)); + T8a = VFMA(LDK(KP773010453), T7M, VMUL(LDK(KP634393284), T7J)); + T8b = VADD(T89, T8a); + T8y = VSUB(T89, T8a); + } + T80 = VSUB(T7O, T7Z); + T8L = VADD(T8C, T8D); + T8O = VADD(T8z, T8y); + T8c = VSUB(T88, T8b); + T8j = VADD(T88, T8b); + T8A = VSUB(T8y, T8z); + T8E = VSUB(T8C, T8D); + T8m = VADD(T7Z, T7O); + } + { + V T81, T8g, T8N, T8Q; + T81 = VBYI(VSUB(T7z, T80)); + T8g = VSUB(T8c, T8f); + ST(&(xo[WS(os, 39)]), VADD(T81, T8g), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 89)]), VSUB(T8g, T81), ovs, &(xo[WS(os, 1)])); + T8N = VADD(T8L, T8M); + T8Q = VBYI(VADD(T8O, T8P)); + ST(&(xo[WS(os, 119)]), VSUB(T8N, T8Q), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VADD(T8N, T8Q), ovs, &(xo[WS(os, 1)])); + } + { + V T8R, T8S, T8h, T8i; + T8R = VSUB(T8L, T8M); + T8S = VBYI(VSUB(T8P, T8O)); + ST(&(xo[WS(os, 73)]), VSUB(T8R, T8S), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 55)]), VADD(T8R, T8S), ovs, &(xo[WS(os, 1)])); + T8h = VBYI(VADD(T80, T7z)); + T8i = VADD(T8c, T8f); + ST(&(xo[WS(os, 25)]), VADD(T8h, T8i), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 103)]), VSUB(T8i, T8h), ovs, &(xo[WS(os, 1)])); + } + { + V T8l, T8o, T8B, T8I; + T8l = VADD(T8j, T8k); + T8o = VBYI(VADD(T8m, T8n)); + ST(&(xo[WS(os, 121)]), VSUB(T8l, T8o), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VADD(T8l, T8o), ovs, &(xo[WS(os, 1)])); + T8B = VBYI(VSUB(T8x, T8A)); + T8I = VSUB(T8E, T8H); + ST(&(xo[WS(os, 41)]), VADD(T8B, T8I), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 87)]), VSUB(T8I, T8B), ovs, &(xo[WS(os, 1)])); + } + { + V T8J, T8K, T8p, T8q; + T8J = VBYI(VADD(T8A, T8x)); + T8K = VADD(T8E, T8H); + ST(&(xo[WS(os, 23)]), VADD(T8J, T8K), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 105)]), VSUB(T8K, T8J), ovs, &(xo[WS(os, 1)])); + T8p = VSUB(T8j, T8k); + T8q = VBYI(VSUB(T8n, T8m)); + ST(&(xo[WS(os, 71)]), VSUB(T8p, T8q), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 57)]), VADD(T8p, T8q), ovs, &(xo[WS(os, 1)])); + } + } + { + V T2v, T5d, T55, T5a, T5n, T5F, T5x, T5C, T4K, T5B, T5E, T52, T59, T5q, T5u; + V T5c; + { + V T1f, T53, T2u, T54; + { + V T11, T1e, T2g, T2t; + T11 = VADD(Tr, T10); + T1e = VADD(T1a, T1d); + T1f = VFNMS(LDK(KP242980179), T1e, VMUL(LDK(KP970031253), T11)); + T53 = VFMA(LDK(KP242980179), T11, VMUL(LDK(KP970031253), T1e)); + T2g = VADD(T1G, T2f); + T2t = VADD(T2p, T2s); + T2u = VFMA(LDK(KP970031253), T2g, VMUL(LDK(KP242980179), T2t)); + T54 = VFNMS(LDK(KP242980179), T2g, VMUL(LDK(KP970031253), T2t)); + } + T2v = VSUB(T1f, T2u); + T5d = VADD(T53, T54); + T55 = VSUB(T53, T54); + T5a = VADD(T1f, T2u); + } + { + V T5j, T5v, T5m, T5w; + { + V T5h, T5i, T5k, T5l; + T5h = VSUB(Tr, T10); + T5i = VSUB(T1d, T1a); + T5j = VFNMS(LDK(KP514102744), T5i, VMUL(LDK(KP857728610), T5h)); + T5v = VFMA(LDK(KP514102744), T5h, VMUL(LDK(KP857728610), T5i)); + T5k = VSUB(T1G, T2f); + T5l = VSUB(T2s, T2p); + T5m = VFMA(LDK(KP857728610), T5k, VMUL(LDK(KP514102744), T5l)); + T5w = VFNMS(LDK(KP514102744), T5k, VMUL(LDK(KP857728610), T5l)); + } + T5n = VSUB(T5j, T5m); + T5F = VADD(T5v, T5w); + T5x = VSUB(T5v, T5w); + T5C = VADD(T5j, T5m); + } + { + V T4J, T5p, T4Y, T5s, T3I, T5t, T51, T5o, T4I, T4X; + T4I = VSUB(T4q, T4H); + T4J = VADD(T49, T4I); + T5p = VSUB(T4I, T49); + T4X = VSUB(T4V, T4W); + T4Y = VADD(T4U, T4X); + T5s = VSUB(T4U, T4X); + { + V T36, T3H, T4Z, T50; + T36 = VFMA(LDK(KP881921264), T2W, VMUL(LDK(KP471396736), T35)); + T3H = VFNMS(LDK(KP471396736), T3G, VMUL(LDK(KP881921264), T3x)); + T3I = VADD(T36, T3H); + T5t = VSUB(T36, T3H); + T4Z = VFNMS(LDK(KP471396736), T2W, VMUL(LDK(KP881921264), T35)); + T50 = VFMA(LDK(KP471396736), T3x, VMUL(LDK(KP881921264), T3G)); + T51 = VADD(T4Z, T50); + T5o = VSUB(T4Z, T50); + } + T4K = VSUB(T3I, T4J); + T5B = VADD(T5s, T5t); + T5E = VADD(T5p, T5o); + T52 = VSUB(T4Y, T51); + T59 = VADD(T4Y, T51); + T5q = VSUB(T5o, T5p); + T5u = VSUB(T5s, T5t); + T5c = VADD(T4J, T3I); + } + { + V T4L, T56, T5D, T5G; + T4L = VBYI(VSUB(T2v, T4K)); + T56 = VSUB(T52, T55); + ST(&(xo[WS(os, 37)]), VADD(T4L, T56), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 91)]), VSUB(T56, T4L), ovs, &(xo[WS(os, 1)])); + T5D = VADD(T5B, T5C); + T5G = VBYI(VADD(T5E, T5F)); + ST(&(xo[WS(os, 117)]), VSUB(T5D, T5G), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VADD(T5D, T5G), ovs, &(xo[WS(os, 1)])); + } + { + V T5H, T5I, T57, T58; + T5H = VSUB(T5B, T5C); + T5I = VBYI(VSUB(T5F, T5E)); + ST(&(xo[WS(os, 75)]), VSUB(T5H, T5I), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 53)]), VADD(T5H, T5I), ovs, &(xo[WS(os, 1)])); + T57 = VBYI(VADD(T4K, T2v)); + T58 = VADD(T52, T55); + ST(&(xo[WS(os, 27)]), VADD(T57, T58), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 101)]), VSUB(T58, T57), ovs, &(xo[WS(os, 1)])); + } + { + V T5b, T5e, T5r, T5y; + T5b = VADD(T59, T5a); + T5e = VBYI(VADD(T5c, T5d)); + ST(&(xo[WS(os, 123)]), VSUB(T5b, T5e), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VADD(T5b, T5e), ovs, &(xo[WS(os, 1)])); + T5r = VBYI(VSUB(T5n, T5q)); + T5y = VSUB(T5u, T5x); + ST(&(xo[WS(os, 43)]), VADD(T5r, T5y), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 85)]), VSUB(T5y, T5r), ovs, &(xo[WS(os, 1)])); + } + { + V T5z, T5A, T5f, T5g; + T5z = VBYI(VADD(T5q, T5n)); + T5A = VADD(T5u, T5x); + ST(&(xo[WS(os, 21)]), VADD(T5z, T5A), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 107)]), VSUB(T5A, T5z), ovs, &(xo[WS(os, 1)])); + T5f = VSUB(T59, T5a); + T5g = VBYI(VSUB(T5d, T5c)); + ST(&(xo[WS(os, 69)]), VSUB(T5f, T5g), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 59)]), VADD(T5f, T5g), ovs, &(xo[WS(os, 1)])); + } + } + { + V T9i, T9B, T9t, T9x, T9O, Ta3, T9V, T9Z, T93, Ta0, Ta2, T9q, T9y, T9H, T9S; + V T9A; + { + V T9a, T9r, T9h, T9s; + { + V T96, T99, T9d, T9g; + T96 = VSUB(T94, T95); + T99 = VSUB(T97, T98); + T9a = VFMA(LDK(KP740951125), T96, VMUL(LDK(KP671558954), T99)); + T9r = VFNMS(LDK(KP671558954), T96, VMUL(LDK(KP740951125), T99)); + T9d = VSUB(T9b, T9c); + T9g = VSUB(T9e, T9f); + T9h = VFNMS(LDK(KP671558954), T9g, VMUL(LDK(KP740951125), T9d)); + T9s = VFMA(LDK(KP671558954), T9d, VMUL(LDK(KP740951125), T9g)); + } + T9i = VSUB(T9a, T9h); + T9B = VADD(T9r, T9s); + T9t = VSUB(T9r, T9s); + T9x = VADD(T9a, T9h); + } + { + V T9K, T9T, T9N, T9U; + { + V T9I, T9J, T9L, T9M; + T9I = VADD(T95, T94); + T9J = VADD(T97, T98); + T9K = VFMA(LDK(KP998795456), T9I, VMUL(LDK(KP049067674), T9J)); + T9T = VFNMS(LDK(KP049067674), T9I, VMUL(LDK(KP998795456), T9J)); + T9L = VADD(T9c, T9b); + T9M = VADD(T9e, T9f); + T9N = VFNMS(LDK(KP049067674), T9M, VMUL(LDK(KP998795456), T9L)); + T9U = VFMA(LDK(KP049067674), T9L, VMUL(LDK(KP998795456), T9M)); + } + T9O = VSUB(T9K, T9N); + Ta3 = VADD(T9T, T9U); + T9V = VSUB(T9T, T9U); + T9Z = VADD(T9K, T9N); + } + { + V T8V, T9F, T9p, T9R, T92, T9Q, T9m, T9G, T8U, T9n; + T8U = VADD(T7U, T7X); + T8V = VSUB(T8T, T8U); + T9F = VADD(T8T, T8U); + T9n = VADD(T85, T86); + T9p = VSUB(T9n, T9o); + T9R = VADD(T9o, T9n); + { + V T8Y, T91, T9k, T9l; + T8Y = VFMA(LDK(KP098017140), T8W, VMUL(LDK(KP995184726), T8X)); + T91 = VFNMS(LDK(KP098017140), T90, VMUL(LDK(KP995184726), T8Z)); + T92 = VSUB(T8Y, T91); + T9Q = VADD(T8Y, T91); + T9k = VFNMS(LDK(KP098017140), T8X, VMUL(LDK(KP995184726), T8W)); + T9l = VFMA(LDK(KP995184726), T90, VMUL(LDK(KP098017140), T8Z)); + T9m = VSUB(T9k, T9l); + T9G = VADD(T9k, T9l); + } + T93 = VSUB(T8V, T92); + Ta0 = VADD(T9R, T9Q); + Ta2 = VADD(T9F, T9G); + T9q = VSUB(T9m, T9p); + T9y = VADD(T9p, T9m); + T9H = VSUB(T9F, T9G); + T9S = VSUB(T9Q, T9R); + T9A = VADD(T8V, T92); + } + { + V T9j, T9u, Ta1, Ta4; + T9j = VADD(T93, T9i); + T9u = VBYI(VADD(T9q, T9t)); + ST(&(xo[WS(os, 111)]), VSUB(T9j, T9u), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 17)]), VADD(T9j, T9u), ovs, &(xo[WS(os, 1)])); + Ta1 = VBYI(VSUB(T9Z, Ta0)); + Ta4 = VSUB(Ta2, Ta3); + ST(&(xo[WS(os, 63)]), VADD(Ta1, Ta4), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 65)]), VSUB(Ta4, Ta1), ovs, &(xo[WS(os, 1)])); + } + { + V Ta5, Ta6, T9v, T9w; + Ta5 = VBYI(VADD(Ta0, T9Z)); + Ta6 = VADD(Ta2, Ta3); + ST(&(xo[WS(os, 1)]), VADD(Ta5, Ta6), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 127)]), VSUB(Ta6, Ta5), ovs, &(xo[WS(os, 1)])); + T9v = VSUB(T93, T9i); + T9w = VBYI(VSUB(T9t, T9q)); + ST(&(xo[WS(os, 81)]), VSUB(T9v, T9w), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 47)]), VADD(T9v, T9w), ovs, &(xo[WS(os, 1)])); + } + { + V T9z, T9C, T9P, T9W; + T9z = VBYI(VSUB(T9x, T9y)); + T9C = VSUB(T9A, T9B); + ST(&(xo[WS(os, 49)]), VADD(T9z, T9C), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 79)]), VSUB(T9C, T9z), ovs, &(xo[WS(os, 1)])); + T9P = VADD(T9H, T9O); + T9W = VBYI(VADD(T9S, T9V)); + ST(&(xo[WS(os, 97)]), VSUB(T9P, T9W), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 31)]), VADD(T9P, T9W), ovs, &(xo[WS(os, 1)])); + } + { + V T9X, T9Y, T9D, T9E; + T9X = VSUB(T9H, T9O); + T9Y = VBYI(VSUB(T9V, T9S)); + ST(&(xo[WS(os, 95)]), VSUB(T9X, T9Y), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 33)]), VADD(T9X, T9Y), ovs, &(xo[WS(os, 1)])); + T9D = VBYI(VADD(T9y, T9x)); + T9E = VADD(T9A, T9B); + ST(&(xo[WS(os, 15)]), VADD(T9D, T9E), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 113)]), VSUB(T9E, T9D), ovs, &(xo[WS(os, 1)])); + } + } + { + V T68, T6r, T6j, T6n, T6E, T6T, T6L, T6P, T5T, T6Q, T6S, T6g, T6o, T6x, T6I; + V T6q; + { + V T60, T6h, T67, T6i; + { + V T5W, T5Z, T63, T66; + T5W = VSUB(T5U, T5V); + T5Z = VSUB(T5X, T5Y); + T60 = VFMA(LDK(KP803207531), T5W, VMUL(LDK(KP595699304), T5Z)); + T6h = VFNMS(LDK(KP595699304), T5W, VMUL(LDK(KP803207531), T5Z)); + T63 = VSUB(T61, T62); + T66 = VSUB(T64, T65); + T67 = VFNMS(LDK(KP595699304), T66, VMUL(LDK(KP803207531), T63)); + T6i = VFMA(LDK(KP595699304), T63, VMUL(LDK(KP803207531), T66)); + } + T68 = VSUB(T60, T67); + T6r = VADD(T6h, T6i); + T6j = VSUB(T6h, T6i); + T6n = VADD(T60, T67); + } + { + V T6A, T6J, T6D, T6K; + { + V T6y, T6z, T6B, T6C; + T6y = VADD(T5V, T5U); + T6z = VADD(T5X, T5Y); + T6A = VFMA(LDK(KP989176509), T6y, VMUL(LDK(KP146730474), T6z)); + T6J = VFNMS(LDK(KP146730474), T6y, VMUL(LDK(KP989176509), T6z)); + T6B = VADD(T62, T61); + T6C = VADD(T64, T65); + T6D = VFNMS(LDK(KP146730474), T6C, VMUL(LDK(KP989176509), T6B)); + T6K = VFMA(LDK(KP146730474), T6B, VMUL(LDK(KP989176509), T6C)); + } + T6E = VSUB(T6A, T6D); + T6T = VADD(T6J, T6K); + T6L = VSUB(T6J, T6K); + T6P = VADD(T6A, T6D); + } + { + V T5L, T6v, T6f, T6H, T5S, T6G, T6c, T6w, T5K, T6d; + T5K = VADD(T4q, T4H); + T5L = VSUB(T5J, T5K); + T6v = VADD(T5J, T5K); + T6d = VADD(T4V, T4W); + T6f = VSUB(T6d, T6e); + T6H = VADD(T6e, T6d); + { + V T5O, T5R, T6a, T6b; + T5O = VFMA(LDK(KP956940335), T5M, VMUL(LDK(KP290284677), T5N)); + T5R = VFNMS(LDK(KP290284677), T5Q, VMUL(LDK(KP956940335), T5P)); + T5S = VSUB(T5O, T5R); + T6G = VADD(T5O, T5R); + T6a = VFNMS(LDK(KP290284677), T5M, VMUL(LDK(KP956940335), T5N)); + T6b = VFMA(LDK(KP290284677), T5P, VMUL(LDK(KP956940335), T5Q)); + T6c = VSUB(T6a, T6b); + T6w = VADD(T6a, T6b); + } + T5T = VSUB(T5L, T5S); + T6Q = VADD(T6H, T6G); + T6S = VADD(T6v, T6w); + T6g = VSUB(T6c, T6f); + T6o = VADD(T6f, T6c); + T6x = VSUB(T6v, T6w); + T6I = VSUB(T6G, T6H); + T6q = VADD(T5L, T5S); + } + { + V T69, T6k, T6R, T6U; + T69 = VADD(T5T, T68); + T6k = VBYI(VADD(T6g, T6j)); + ST(&(xo[WS(os, 109)]), VSUB(T69, T6k), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 19)]), VADD(T69, T6k), ovs, &(xo[WS(os, 1)])); + T6R = VBYI(VSUB(T6P, T6Q)); + T6U = VSUB(T6S, T6T); + ST(&(xo[WS(os, 61)]), VADD(T6R, T6U), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 67)]), VSUB(T6U, T6R), ovs, &(xo[WS(os, 1)])); + } + { + V T6V, T6W, T6l, T6m; + T6V = VBYI(VADD(T6Q, T6P)); + T6W = VADD(T6S, T6T); + ST(&(xo[WS(os, 3)]), VADD(T6V, T6W), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 125)]), VSUB(T6W, T6V), ovs, &(xo[WS(os, 1)])); + T6l = VSUB(T5T, T68); + T6m = VBYI(VSUB(T6j, T6g)); + ST(&(xo[WS(os, 83)]), VSUB(T6l, T6m), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 45)]), VADD(T6l, T6m), ovs, &(xo[WS(os, 1)])); + } + { + V T6p, T6s, T6F, T6M; + T6p = VBYI(VSUB(T6n, T6o)); + T6s = VSUB(T6q, T6r); + ST(&(xo[WS(os, 51)]), VADD(T6p, T6s), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 77)]), VSUB(T6s, T6p), ovs, &(xo[WS(os, 1)])); + T6F = VADD(T6x, T6E); + T6M = VBYI(VADD(T6I, T6L)); + ST(&(xo[WS(os, 99)]), VSUB(T6F, T6M), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 29)]), VADD(T6F, T6M), ovs, &(xo[WS(os, 1)])); + } + { + V T6N, T6O, T6t, T6u; + T6N = VSUB(T6x, T6E); + T6O = VBYI(VSUB(T6L, T6I)); + ST(&(xo[WS(os, 93)]), VSUB(T6N, T6O), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 35)]), VADD(T6N, T6O), ovs, &(xo[WS(os, 1)])); + T6t = VBYI(VADD(T6o, T6n)); + T6u = VADD(T6q, T6r); + ST(&(xo[WS(os, 13)]), VADD(T6t, T6u), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 115)]), VSUB(T6u, T6t), ovs, &(xo[WS(os, 1)])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 128, XSIMD_STRING("n1bv_128"), {938, 186, 144, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_128) (planner *p) { + X(kdft_register) (p, n1bv_128, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_13.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_13.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,406 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:52 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 13 -name n1bv_13 -include n1b.h */ + +/* + * This function contains 88 FP additions, 63 FP multiplications, + * (or, 31 additions, 6 multiplications, 57 fused multiply/add), + * 96 stack variables, 23 constants, and 26 memory accesses + */ +#include "n1b.h" + +static void n1bv_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP904176221, +0.904176221990848204433795481776887926501523162); + DVK(KP575140729, +0.575140729474003121368385547455453388461001608); + DVK(KP300462606, +0.300462606288665774426601772289207995520941381); + DVK(KP516520780, +0.516520780623489722840901288569017135705033622); + DVK(KP522026385, +0.522026385161275033714027226654165028300441940); + DVK(KP957805992, +0.957805992594665126462521754605754580515587217); + DVK(KP600477271, +0.600477271932665282925769253334763009352012849); + DVK(KP251768516, +0.251768516431883313623436926934233488546674281); + DVK(KP503537032, +0.503537032863766627246873853868466977093348562); + DVK(KP769338817, +0.769338817572980603471413688209101117038278899); + DVK(KP859542535, +0.859542535098774820163672132761689612766401925); + DVK(KP581704778, +0.581704778510515730456870384989698884939833902); + DVK(KP853480001, +0.853480001859823990758994934970528322872359049); + DVK(KP083333333, +0.083333333333333333333333333333333333333333333); + DVK(KP226109445, +0.226109445035782405468510155372505010481906348); + DVK(KP301479260, +0.301479260047709873958013540496673347309208464); + DVK(KP686558370, +0.686558370781754340655719594850823015421401653); + DVK(KP514918778, +0.514918778086315755491789696138117261566051239); + DVK(KP038632954, +0.038632954644348171955506895830342264440241080); + DVK(KP612264650, +0.612264650376756543746494474777125408779395514); + DVK(KP302775637, +0.302775637731994646559610633735247973125648287); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(26, is), MAKE_VOLATILE_STRIDE(26, os)) { + V T1, T7, T2, Tg, Tf, TN, Th, Tq, Ta, Tj, T5, Tr, Tk; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + { + V Td, Te, T8, T9, T3, T4; + Td = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Te = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T9 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T4 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tg = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Tf = VADD(Td, Te); + TN = VSUB(Td, Te); + Th = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tq = VSUB(T8, T9); + Ta = VADD(T8, T9); + Tj = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T5 = VADD(T3, T4); + Tr = VSUB(T4, T3); + Tk = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + } + { + V Tt, Ti, Ty, Tb, Ts, TQ, Tx, T6, Tu, Tl; + Tt = VSUB(Tg, Th); + Ti = VADD(Tg, Th); + Ty = VFMS(LDK(KP500000000), Ta, T7); + Tb = VADD(T7, Ta); + Ts = VSUB(Tq, Tr); + TQ = VADD(Tr, Tq); + Tx = VFNMS(LDK(KP500000000), T5, T2); + T6 = VADD(T2, T5); + Tu = VSUB(Tj, Tk); + Tl = VADD(Tj, Tk); + { + V TK, Tz, Tc, TX, Tv, TO, TL, Tm; + TK = VADD(Tx, Ty); + Tz = VSUB(Tx, Ty); + Tc = VADD(T6, Tb); + TX = VSUB(T6, Tb); + Tv = VSUB(Tt, Tu); + TO = VADD(Tt, Tu); + TL = VSUB(Ti, Tl); + Tm = VADD(Ti, Tl); + { + V TF, Tw, TP, TY, TT, TM, TA, Tn; + TF = VSUB(Ts, Tv); + Tw = VADD(Ts, Tv); + TP = VFNMS(LDK(KP500000000), TO, TN); + TY = VADD(TN, TO); + TT = VFNMS(LDK(KP866025403), TL, TK); + TM = VFMA(LDK(KP866025403), TL, TK); + TA = VFNMS(LDK(KP500000000), Tm, Tf); + Tn = VADD(Tf, Tm); + { + V T1f, T1n, TI, T18, T1k, T1c, TD, T17, T10, T1m, T16, T1e, TU, TR; + TU = VFNMS(LDK(KP866025403), TQ, TP); + TR = VFMA(LDK(KP866025403), TQ, TP); + { + V TZ, T15, TE, TB; + TZ = VFMA(LDK(KP302775637), TY, TX); + T15 = VFNMS(LDK(KP302775637), TX, TY); + TE = VSUB(Tz, TA); + TB = VADD(Tz, TA); + { + V TH, To, TV, T13; + TH = VSUB(Tc, Tn); + To = VADD(Tc, Tn); + TV = VFNMS(LDK(KP612264650), TU, TT); + T13 = VFMA(LDK(KP612264650), TT, TU); + { + V TS, T12, TG, T1b; + TS = VFNMS(LDK(KP038632954), TR, TM); + T12 = VFMA(LDK(KP038632954), TM, TR); + TG = VFNMS(LDK(KP514918778), TF, TE); + T1b = VFMA(LDK(KP686558370), TE, TF); + { + V TC, T1a, Tp, TW, T14; + TC = VFMA(LDK(KP301479260), TB, Tw); + T1a = VFNMS(LDK(KP226109445), Tw, TB); + Tp = VFNMS(LDK(KP083333333), To, T1); + ST(&(xo[0]), VADD(T1, To), ovs, &(xo[0])); + T1f = VFMA(LDK(KP853480001), TV, TS); + TW = VFNMS(LDK(KP853480001), TV, TS); + T1n = VFMA(LDK(KP853480001), T13, T12); + T14 = VFNMS(LDK(KP853480001), T13, T12); + TI = VFMA(LDK(KP581704778), TH, TG); + T18 = VFNMS(LDK(KP859542535), TG, TH); + T1k = VFMA(LDK(KP769338817), T1b, T1a); + T1c = VFNMS(LDK(KP769338817), T1b, T1a); + TD = VFMA(LDK(KP503537032), TC, Tp); + T17 = VFNMS(LDK(KP251768516), TC, Tp); + T10 = VMUL(LDK(KP600477271), VFMA(LDK(KP957805992), TZ, TW)); + T1m = VFNMS(LDK(KP522026385), TW, TZ); + T16 = VMUL(LDK(KP600477271), VFMA(LDK(KP957805992), T15, T14)); + T1e = VFNMS(LDK(KP522026385), T14, T15); + } + } + } + } + { + V T1o, T1q, T1g, T1i, T1d, T1h, T1l, T1p; + { + V T11, TJ, T19, T1j; + T11 = VFMA(LDK(KP516520780), TI, TD); + TJ = VFNMS(LDK(KP516520780), TI, TD); + T19 = VFMA(LDK(KP300462606), T18, T17); + T1j = VFNMS(LDK(KP300462606), T18, T17); + T1o = VMUL(LDK(KP575140729), VFNMS(LDK(KP904176221), T1n, T1m)); + T1q = VMUL(LDK(KP575140729), VFMA(LDK(KP904176221), T1n, T1m)); + T1g = VMUL(LDK(KP575140729), VFMA(LDK(KP904176221), T1f, T1e)); + T1i = VMUL(LDK(KP575140729), VFNMS(LDK(KP904176221), T1f, T1e)); + ST(&(xo[WS(os, 12)]), VFMAI(T16, T11), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VFNMSI(T16, T11), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 8)]), VFNMSI(T10, TJ), ovs, &(xo[0])); + ST(&(xo[WS(os, 5)]), VFMAI(T10, TJ), ovs, &(xo[WS(os, 1)])); + T1d = VFNMS(LDK(KP503537032), T1c, T19); + T1h = VFMA(LDK(KP503537032), T1c, T19); + T1l = VFNMS(LDK(KP503537032), T1k, T1j); + T1p = VFMA(LDK(KP503537032), T1k, T1j); + } + ST(&(xo[WS(os, 9)]), VFNMSI(T1g, T1d), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VFMAI(T1g, T1d), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VFMAI(T1i, T1h), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VFNMSI(T1i, T1h), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFNMSI(T1o, T1l), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 6)]), VFMAI(T1o, T1l), ovs, &(xo[0])); + ST(&(xo[WS(os, 11)]), VFNMSI(T1q, T1p), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VFMAI(T1q, T1p), ovs, &(xo[0])); + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 13, XSIMD_STRING("n1bv_13"), {31, 6, 57, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_13) (planner *p) { + X(kdft_register) (p, n1bv_13, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 13 -name n1bv_13 -include n1b.h */ + +/* + * This function contains 88 FP additions, 34 FP multiplications, + * (or, 69 additions, 15 multiplications, 19 fused multiply/add), + * 60 stack variables, 20 constants, and 26 memory accesses + */ +#include "n1b.h" + +static void n1bv_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DVK(KP083333333, +0.083333333333333333333333333333333333333333333); + DVK(KP075902986, +0.075902986037193865983102897245103540356428373); + DVK(KP251768516, +0.251768516431883313623436926934233488546674281); + DVK(KP132983124, +0.132983124607418643793760531921092974399165133); + DVK(KP258260390, +0.258260390311744861420450644284508567852516811); + DVK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + DVK(KP300238635, +0.300238635966332641462884626667381504676006424); + DVK(KP011599105, +0.011599105605768290721655456654083252189827041); + DVK(KP256247671, +0.256247671582936600958684654061725059144125175); + DVK(KP156891391, +0.156891391051584611046832726756003269660212636); + DVK(KP174138601, +0.174138601152135905005660794929264742616964676); + DVK(KP575140729, +0.575140729474003121368385547455453388461001608); + DVK(KP503537032, +0.503537032863766627246873853868466977093348562); + DVK(KP113854479, +0.113854479055790798974654345867655310534642560); + DVK(KP265966249, +0.265966249214837287587521063842185948798330267); + DVK(KP387390585, +0.387390585467617292130675966426762851778775217); + DVK(KP300462606, +0.300462606288665774426601772289207995520941381); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(26, is), MAKE_VOLATILE_STRIDE(26, os)) { + V TW, Tb, Tm, Ts, TB, TR, TX, TK, TU, Tz, TC, TN, TT; + TW = LD(&(xi[0]), ivs, &(xi[0])); + { + V Te, TH, Ta, Tu, Tp, T5, Tt, To, Th, Tw, Tk, Tx, Tl, TI, Tc; + V Td, Tq, Tr; + Tc = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Te = VSUB(Tc, Td); + TH = VADD(Tc, Td); + { + V T6, T7, T8, T9; + T6 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T7 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T9 = VADD(T7, T8); + Ta = VADD(T6, T9); + Tu = VFNMS(LDK(KP500000000), T9, T6); + Tp = VSUB(T7, T8); + } + { + V T1, T2, T3, T4; + T1 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T2 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T4 = VADD(T2, T3); + T5 = VADD(T1, T4); + Tt = VFNMS(LDK(KP500000000), T4, T1); + To = VSUB(T2, T3); + } + { + V Tf, Tg, Ti, Tj; + Tf = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Tg = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Th = VSUB(Tf, Tg); + Tw = VADD(Tf, Tg); + Ti = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Tj = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tk = VSUB(Ti, Tj); + Tx = VADD(Ti, Tj); + } + Tl = VADD(Th, Tk); + TI = VADD(Tw, Tx); + Tb = VSUB(T5, Ta); + Tm = VADD(Te, Tl); + Tq = VMUL(LDK(KP866025403), VSUB(To, Tp)); + Tr = VFNMS(LDK(KP500000000), Tl, Te); + Ts = VADD(Tq, Tr); + TB = VSUB(Tq, Tr); + { + V TP, TQ, TG, TJ; + TP = VADD(T5, Ta); + TQ = VADD(TH, TI); + TR = VMUL(LDK(KP300462606), VSUB(TP, TQ)); + TX = VADD(TP, TQ); + TG = VADD(Tt, Tu); + TJ = VFNMS(LDK(KP500000000), TI, TH); + TK = VSUB(TG, TJ); + TU = VADD(TG, TJ); + } + { + V Tv, Ty, TL, TM; + Tv = VSUB(Tt, Tu); + Ty = VMUL(LDK(KP866025403), VSUB(Tw, Tx)); + Tz = VSUB(Tv, Ty); + TC = VADD(Tv, Ty); + TL = VADD(To, Tp); + TM = VSUB(Th, Tk); + TN = VSUB(TL, TM); + TT = VADD(TL, TM); + } + } + ST(&(xo[0]), VADD(TW, TX), ovs, &(xo[0])); + { + V T1c, T1n, T11, T14, T17, T1k, Tn, TE, T18, T1j, TS, T1m, TZ, T1f, TA; + V TD; + { + V T1a, T1b, T12, T13; + T1a = VFMA(LDK(KP387390585), TN, VMUL(LDK(KP265966249), TK)); + T1b = VFNMS(LDK(KP503537032), TU, VMUL(LDK(KP113854479), TT)); + T1c = VSUB(T1a, T1b); + T1n = VADD(T1a, T1b); + T11 = VFMA(LDK(KP575140729), Tb, VMUL(LDK(KP174138601), Tm)); + T12 = VFNMS(LDK(KP256247671), Tz, VMUL(LDK(KP156891391), Ts)); + T13 = VFMA(LDK(KP011599105), TB, VMUL(LDK(KP300238635), TC)); + T14 = VADD(T12, T13); + T17 = VSUB(T11, T14); + T1k = VMUL(LDK(KP1_732050807), VSUB(T12, T13)); + } + Tn = VFNMS(LDK(KP575140729), Tm, VMUL(LDK(KP174138601), Tb)); + TA = VFMA(LDK(KP256247671), Ts, VMUL(LDK(KP156891391), Tz)); + TD = VFNMS(LDK(KP011599105), TC, VMUL(LDK(KP300238635), TB)); + TE = VADD(TA, TD); + T18 = VMUL(LDK(KP1_732050807), VSUB(TD, TA)); + T1j = VSUB(Tn, TE); + { + V TO, T1e, TV, TY, T1d; + TO = VFNMS(LDK(KP132983124), TN, VMUL(LDK(KP258260390), TK)); + T1e = VSUB(TR, TO); + TV = VFMA(LDK(KP251768516), TT, VMUL(LDK(KP075902986), TU)); + TY = VFNMS(LDK(KP083333333), TX, TW); + T1d = VSUB(TY, TV); + TS = VFMA(LDK(KP2_000000000), TO, TR); + T1m = VADD(T1e, T1d); + TZ = VFMA(LDK(KP2_000000000), TV, TY); + T1f = VSUB(T1d, T1e); + } + { + V TF, T10, T1l, T1o; + TF = VBYI(VFMA(LDK(KP2_000000000), TE, Tn)); + T10 = VADD(TS, TZ); + ST(&(xo[WS(os, 1)]), VADD(TF, T10), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 12)]), VSUB(T10, TF), ovs, &(xo[0])); + { + V T15, T16, T1p, T1q; + T15 = VBYI(VFMA(LDK(KP2_000000000), T14, T11)); + T16 = VSUB(TZ, TS); + ST(&(xo[WS(os, 5)]), VADD(T15, T16), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 8)]), VSUB(T16, T15), ovs, &(xo[0])); + T1p = VADD(T1n, T1m); + T1q = VBYI(VADD(T1j, T1k)); + ST(&(xo[WS(os, 4)]), VSUB(T1p, T1q), ovs, &(xo[0])); + ST(&(xo[WS(os, 9)]), VADD(T1q, T1p), ovs, &(xo[WS(os, 1)])); + } + T1l = VBYI(VSUB(T1j, T1k)); + T1o = VSUB(T1m, T1n); + ST(&(xo[WS(os, 3)]), VADD(T1l, T1o), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 10)]), VSUB(T1o, T1l), ovs, &(xo[0])); + { + V T1h, T1i, T19, T1g; + T1h = VBYI(VADD(T18, T17)); + T1i = VSUB(T1f, T1c); + ST(&(xo[WS(os, 6)]), VADD(T1h, T1i), ovs, &(xo[0])); + ST(&(xo[WS(os, 7)]), VSUB(T1i, T1h), ovs, &(xo[WS(os, 1)])); + T19 = VBYI(VSUB(T17, T18)); + T1g = VADD(T1c, T1f); + ST(&(xo[WS(os, 2)]), VADD(T19, T1g), ovs, &(xo[0])); + ST(&(xo[WS(os, 11)]), VSUB(T1g, T19), ovs, &(xo[WS(os, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 13, XSIMD_STRING("n1bv_13"), {69, 15, 19, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_13) (planner *p) { + X(kdft_register) (p, n1bv_13, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,308 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:52 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 14 -name n1bv_14 -include n1b.h */ + +/* + * This function contains 74 FP additions, 48 FP multiplications, + * (or, 32 additions, 6 multiplications, 42 fused multiply/add), + * 63 stack variables, 6 constants, and 28 memory accesses + */ +#include "n1b.h" + +static void n1bv_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP801937735, +0.801937735804838252472204639014890102331838324); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + DVK(KP692021471, +0.692021471630095869627814897002069140197260599); + DVK(KP554958132, +0.554958132087371191422194871006410481067288862); + DVK(KP356895867, +0.356895867892209443894399510021300583399127187); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) { + V TH, T3, TP, Tn, Ta, Tu, TU, TK, TO, Tk, TM, Tg, TL, Td, T1; + V T2; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + { + V Ti, TI, T6, TJ, T9, Tj, Te, Tf, Tb, Tc; + { + V T4, T5, T7, T8, Tl, Tm; + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tl = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tm = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Ti = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + TH = VADD(T1, T2); + T3 = VSUB(T1, T2); + TI = VADD(T4, T5); + T6 = VSUB(T4, T5); + TJ = VADD(T7, T8); + T9 = VSUB(T7, T8); + TP = VADD(Tl, Tm); + Tn = VSUB(Tl, Tm); + Tj = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Te = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + } + Ta = VADD(T6, T9); + Tu = VSUB(T6, T9); + TU = VSUB(TI, TJ); + TK = VADD(TI, TJ); + TO = VADD(Ti, Tj); + Tk = VSUB(Ti, Tj); + TM = VADD(Te, Tf); + Tg = VSUB(Te, Tf); + TL = VADD(Tb, Tc); + Td = VSUB(Tb, Tc); + } + { + V T13, TG, TY, T18, TB, Tw, TT, Tz, T11, T16, TE, Tr, TV, TQ; + TV = VSUB(TP, TO); + TQ = VADD(TO, TP); + { + V Ts, To, TW, TN; + Ts = VSUB(Tk, Tn); + To = VADD(Tk, Tn); + TW = VSUB(TM, TL); + TN = VADD(TL, TM); + { + V Tt, Th, TR, T12; + Tt = VSUB(Td, Tg); + Th = VADD(Td, Tg); + TR = VFNMS(LDK(KP356895867), TK, TQ); + T12 = VFNMS(LDK(KP554958132), TV, TU); + { + V Tx, TF, TZ, T14; + Tx = VFNMS(LDK(KP356895867), Ta, To); + TF = VFMA(LDK(KP554958132), Ts, Tu); + ST(&(xo[0]), VADD(TH, VADD(TK, VADD(TN, TQ))), ovs, &(xo[0])); + TZ = VFNMS(LDK(KP356895867), TN, TK); + T14 = VFNMS(LDK(KP356895867), TQ, TN); + { + V TX, T17, TC, Tp; + TX = VFMA(LDK(KP554958132), TW, TV); + T17 = VFMA(LDK(KP554958132), TU, TW); + ST(&(xo[WS(os, 7)]), VADD(T3, VADD(Ta, VADD(Th, To))), ovs, &(xo[WS(os, 1)])); + TC = VFNMS(LDK(KP356895867), Th, Ta); + Tp = VFNMS(LDK(KP356895867), To, Th); + { + V TA, Tv, TS, Ty; + TA = VFMA(LDK(KP554958132), Tt, Ts); + Tv = VFNMS(LDK(KP554958132), Tu, Tt); + TS = VFNMS(LDK(KP692021471), TR, TN); + T13 = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), T12, TW)); + Ty = VFNMS(LDK(KP692021471), Tx, Th); + TG = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), TF, Tt)); + { + V T10, T15, TD, Tq; + T10 = VFNMS(LDK(KP692021471), TZ, TQ); + T15 = VFNMS(LDK(KP692021471), T14, TK); + TY = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), TX, TU)); + T18 = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), T17, TV)); + TD = VFNMS(LDK(KP692021471), TC, To); + Tq = VFNMS(LDK(KP692021471), Tp, Ta); + TB = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), TA, Tu)); + Tw = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tv, Ts)); + TT = VFNMS(LDK(KP900968867), TS, TH); + Tz = VFNMS(LDK(KP900968867), Ty, T3); + T11 = VFNMS(LDK(KP900968867), T10, TH); + T16 = VFNMS(LDK(KP900968867), T15, TH); + TE = VFNMS(LDK(KP900968867), TD, T3); + Tr = VFNMS(LDK(KP900968867), Tq, T3); + } + } + } + } + } + } + ST(&(xo[WS(os, 2)]), VFMAI(TY, TT), ovs, &(xo[0])); + ST(&(xo[WS(os, 12)]), VFNMSI(TY, TT), ovs, &(xo[0])); + ST(&(xo[WS(os, 9)]), VFMAI(TB, Tz), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VFNMSI(TB, Tz), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 6)]), VFMAI(T13, T11), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VFNMSI(T13, T11), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VFMAI(T18, T16), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VFNMSI(T18, T16), ovs, &(xo[0])); + ST(&(xo[WS(os, 13)]), VFNMSI(TG, TE), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFMAI(TG, TE), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VFNMSI(Tw, Tr), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VFMAI(Tw, Tr), ovs, &(xo[WS(os, 1)])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 14, XSIMD_STRING("n1bv_14"), {32, 6, 42, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_14) (planner *p) { + X(kdft_register) (p, n1bv_14, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 14 -name n1bv_14 -include n1b.h */ + +/* + * This function contains 74 FP additions, 36 FP multiplications, + * (or, 50 additions, 12 multiplications, 24 fused multiply/add), + * 33 stack variables, 6 constants, and 28 memory accesses + */ +#include "n1b.h" + +static void n1bv_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP222520933, +0.222520933956314404288902564496794759466355569); + DVK(KP623489801, +0.623489801858733530525004884004239810632274731); + DVK(KP781831482, +0.781831482468029808708444526674057750232334519); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + DVK(KP433883739, +0.433883739117558120475768332848358754609990728); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) { + V Tp, Ty, Tl, TL, Tq, TE, T7, TJ, Ts, TB, Te, TK, Tr, TH, Tn; + V To; + Tn = LD(&(xi[0]), ivs, &(xi[0])); + To = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Tp = VSUB(Tn, To); + Ty = VADD(Tn, To); + { + V Th, TC, Tk, TD; + { + V Tf, Tg, Ti, Tj; + Tf = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Th = VSUB(Tf, Tg); + TC = VADD(Tf, Tg); + Ti = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tj = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tk = VSUB(Ti, Tj); + TD = VADD(Ti, Tj); + } + Tl = VSUB(Th, Tk); + TL = VSUB(TD, TC); + Tq = VADD(Th, Tk); + TE = VADD(TC, TD); + } + { + V T3, Tz, T6, TA; + { + V T1, T2, T4, T5; + T1 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + Tz = VADD(T1, T2); + T4 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + TA = VADD(T4, T5); + } + T7 = VSUB(T3, T6); + TJ = VSUB(Tz, TA); + Ts = VADD(T3, T6); + TB = VADD(Tz, TA); + } + { + V Ta, TF, Td, TG; + { + V T8, T9, Tb, Tc; + T8 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T9 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Ta = VSUB(T8, T9); + TF = VADD(T8, T9); + Tb = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Td = VSUB(Tb, Tc); + TG = VADD(Tb, Tc); + } + Te = VSUB(Ta, Td); + TK = VSUB(TG, TF); + Tr = VADD(Ta, Td); + TH = VADD(TF, TG); + } + ST(&(xo[WS(os, 7)]), VADD(Tp, VADD(Ts, VADD(Tq, Tr))), ovs, &(xo[WS(os, 1)])); + ST(&(xo[0]), VADD(Ty, VADD(TB, VADD(TE, TH))), ovs, &(xo[0])); + { + V Tm, Tt, TQ, TP; + Tm = VBYI(VFMA(LDK(KP433883739), T7, VFNMS(LDK(KP781831482), Tl, VMUL(LDK(KP974927912), Te)))); + Tt = VFMA(LDK(KP623489801), Tq, VFNMS(LDK(KP222520933), Tr, VFNMS(LDK(KP900968867), Ts, Tp))); + ST(&(xo[WS(os, 3)]), VADD(Tm, Tt), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VSUB(Tt, Tm), ovs, &(xo[WS(os, 1)])); + TQ = VBYI(VFMA(LDK(KP974927912), TJ, VFMA(LDK(KP433883739), TL, VMUL(LDK(KP781831482), TK)))); + TP = VFMA(LDK(KP623489801), TH, VFNMS(LDK(KP900968867), TE, VFNMS(LDK(KP222520933), TB, Ty))); + ST(&(xo[WS(os, 12)]), VSUB(TP, TQ), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VADD(TP, TQ), ovs, &(xo[0])); + } + { + V Tu, Tv, TM, TI; + Tu = VBYI(VFMA(LDK(KP781831482), T7, VFMA(LDK(KP974927912), Tl, VMUL(LDK(KP433883739), Te)))); + Tv = VFMA(LDK(KP623489801), Ts, VFNMS(LDK(KP900968867), Tr, VFNMS(LDK(KP222520933), Tq, Tp))); + ST(&(xo[WS(os, 1)]), VADD(Tu, Tv), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 13)]), VSUB(Tv, Tu), ovs, &(xo[WS(os, 1)])); + TM = VBYI(VFNMS(LDK(KP433883739), TK, VFNMS(LDK(KP974927912), TL, VMUL(LDK(KP781831482), TJ)))); + TI = VFMA(LDK(KP623489801), TB, VFNMS(LDK(KP900968867), TH, VFNMS(LDK(KP222520933), TE, Ty))); + ST(&(xo[WS(os, 6)]), VSUB(TI, TM), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VADD(TI, TM), ovs, &(xo[0])); + } + { + V TO, TN, Tx, Tw; + TO = VBYI(VFMA(LDK(KP433883739), TJ, VFNMS(LDK(KP974927912), TK, VMUL(LDK(KP781831482), TL)))); + TN = VFMA(LDK(KP623489801), TE, VFNMS(LDK(KP222520933), TH, VFNMS(LDK(KP900968867), TB, Ty))); + ST(&(xo[WS(os, 4)]), VSUB(TN, TO), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VADD(TN, TO), ovs, &(xo[0])); + Tx = VBYI(VFNMS(LDK(KP781831482), Te, VFNMS(LDK(KP433883739), Tl, VMUL(LDK(KP974927912), T7)))); + Tw = VFMA(LDK(KP623489801), Tr, VFNMS(LDK(KP900968867), Tq, VFNMS(LDK(KP222520933), Ts, Tp))); + ST(&(xo[WS(os, 5)]), VSUB(Tw, Tx), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VADD(Tx, Tw), ovs, &(xo[WS(os, 1)])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 14, XSIMD_STRING("n1bv_14"), {50, 12, 24, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_14) (planner *p) { + X(kdft_register) (p, n1bv_14, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:52 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 15 -name n1bv_15 -include n1b.h */ + +/* + * This function contains 78 FP additions, 49 FP multiplications, + * (or, 36 additions, 7 multiplications, 42 fused multiply/add), + * 78 stack variables, 8 constants, and 30 memory accesses + */ +#include "n1b.h" + +static void n1bv_15(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP823639103, +0.823639103546331925877420039278190003029660514); + DVK(KP910592997, +0.910592997310029334643087372129977886038870291); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(30, is), MAKE_VOLATILE_STRIDE(30, os)) { + V Tb, TH, Tw, TA, Th, T11, T5, Ti, T12, Ta, Tx, Te, Tq, T16, Tj; + V T1, T2, T3; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + { + V T6, T7, T8, Tm, Tn, To; + T6 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Tm = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tn = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + To = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + { + V T4, Tc, T9, Td, Tp; + Tb = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T4 = VADD(T2, T3); + TH = VSUB(T2, T3); + Tc = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tw = VSUB(T7, T8); + T9 = VADD(T7, T8); + Td = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Tp = VADD(Tn, To); + TA = VSUB(Tn, To); + Th = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T11 = VADD(T1, T4); + T5 = VFNMS(LDK(KP500000000), T4, T1); + Ti = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + T12 = VADD(T6, T9); + Ta = VFNMS(LDK(KP500000000), T9, T6); + Tx = VSUB(Tc, Td); + Te = VADD(Tc, Td); + Tq = VFNMS(LDK(KP500000000), Tp, Tm); + T16 = VADD(Tm, Tp); + Tj = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + } + } + { + V TI, Ty, T13, Tf, Tz, Tk; + TI = VADD(Tw, Tx); + Ty = VSUB(Tw, Tx); + T13 = VADD(Tb, Te); + Tf = VFNMS(LDK(KP500000000), Te, Tb); + Tz = VSUB(Ti, Tj); + Tk = VADD(Ti, Tj); + { + V T1d, T14, Tg, TE, TJ, TB, T15, Tl; + T1d = VSUB(T12, T13); + T14 = VADD(T12, T13); + Tg = VADD(Ta, Tf); + TE = VSUB(Ta, Tf); + TJ = VADD(Tz, TA); + TB = VSUB(Tz, TA); + T15 = VADD(Th, Tk); + Tl = VFNMS(LDK(KP500000000), Tk, Th); + { + V TM, TK, TS, TC, T1c, T17, Tr, TF, TL, T10; + TM = VSUB(TI, TJ); + TK = VADD(TI, TJ); + TS = VFNMS(LDK(KP618033988), Ty, TB); + TC = VFMA(LDK(KP618033988), TB, Ty); + T1c = VSUB(T15, T16); + T17 = VADD(T15, T16); + Tr = VADD(Tl, Tq); + TF = VSUB(Tl, Tq); + TL = VFNMS(LDK(KP250000000), TK, TH); + T10 = VMUL(LDK(KP866025403), VADD(TH, TK)); + { + V T1g, T1e, T1a, Tu, Ts, TU, TG, TV, TN, T19, T18, Tt, TZ; + T1g = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1c, T1d)); + T1e = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1d, T1c)); + T18 = VADD(T14, T17); + T1a = VSUB(T14, T17); + Tu = VSUB(Tg, Tr); + Ts = VADD(Tg, Tr); + TU = VFNMS(LDK(KP618033988), TE, TF); + TG = VFMA(LDK(KP618033988), TF, TE); + TV = VFNMS(LDK(KP559016994), TM, TL); + TN = VFMA(LDK(KP559016994), TM, TL); + ST(&(xo[0]), VADD(T11, T18), ovs, &(xo[0])); + T19 = VFNMS(LDK(KP250000000), T18, T11); + Tt = VFNMS(LDK(KP250000000), Ts, T5); + TZ = VADD(T5, Ts); + { + V TW, TY, TQ, TO, T1b, T1f, TR, Tv; + TW = VMUL(LDK(KP951056516), VFMA(LDK(KP910592997), TV, TU)); + TY = VMUL(LDK(KP951056516), VFNMS(LDK(KP910592997), TV, TU)); + TQ = VMUL(LDK(KP951056516), VFNMS(LDK(KP910592997), TN, TG)); + TO = VMUL(LDK(KP951056516), VFMA(LDK(KP910592997), TN, TG)); + T1b = VFNMS(LDK(KP559016994), T1a, T19); + T1f = VFMA(LDK(KP559016994), T1a, T19); + TR = VFNMS(LDK(KP559016994), Tu, Tt); + Tv = VFMA(LDK(KP559016994), Tu, Tt); + ST(&(xo[WS(os, 10)]), VFMAI(T10, TZ), ovs, &(xo[0])); + ST(&(xo[WS(os, 5)]), VFNMSI(T10, TZ), ovs, &(xo[WS(os, 1)])); + { + V TT, TX, TP, TD; + ST(&(xo[WS(os, 12)]), VFNMSI(T1e, T1b), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VFMAI(T1e, T1b), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VFNMSI(T1g, T1f), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 6)]), VFMAI(T1g, T1f), ovs, &(xo[0])); + TT = VFNMS(LDK(KP823639103), TS, TR); + TX = VFMA(LDK(KP823639103), TS, TR); + TP = VFMA(LDK(KP823639103), TC, Tv); + TD = VFNMS(LDK(KP823639103), TC, Tv); + ST(&(xo[WS(os, 13)]), VFMAI(TW, TT), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VFNMSI(TW, TT), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VFMAI(TY, TX), ovs, &(xo[0])); + ST(&(xo[WS(os, 7)]), VFNMSI(TY, TX), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VFMAI(TQ, TP), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VFNMSI(TQ, TP), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VFNMSI(TO, TD), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VFMAI(TO, TD), ovs, &(xo[WS(os, 1)])); + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 15, XSIMD_STRING("n1bv_15"), {36, 7, 42, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_15) (planner *p) { + X(kdft_register) (p, n1bv_15, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 15 -name n1bv_15 -include n1b.h */ + +/* + * This function contains 78 FP additions, 25 FP multiplications, + * (or, 64 additions, 11 multiplications, 14 fused multiply/add), + * 55 stack variables, 10 constants, and 30 memory accesses + */ +#include "n1b.h" + +static void n1bv_15(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP216506350, +0.216506350946109661690930792688234045867850657); + DVK(KP509036960, +0.509036960455127183450980863393907648510733164); + DVK(KP823639103, +0.823639103546331925877420039278190003029660514); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP484122918, +0.484122918275927110647408174972799951354115213); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(30, is), MAKE_VOLATILE_STRIDE(30, os)) { + V Ti, T11, TH, Ts, TL, TM, Tz, TC, TD, TI, T12, T13, T14, T15, T16; + V T17, Tf, Tj, TZ, T10; + { + V TF, Tg, Th, TG; + TF = LD(&(xi[0]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Th = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + TG = VADD(Tg, Th); + Ti = VSUB(Tg, Th); + T11 = VADD(TF, TG); + TH = VFNMS(LDK(KP500000000), TG, TF); + } + { + V Tm, Tn, T3, To, Tw, Tx, Td, Ty, Tp, Tq, T6, Tr, Tt, Tu, Ta; + V Tv, T7, Te; + { + V T1, T2, Tb, Tc; + Tm = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T1 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Tn = VADD(T1, T2); + T3 = VSUB(T1, T2); + To = VFNMS(LDK(KP500000000), Tn, Tm); + Tw = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tb = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tx = VADD(Tb, Tc); + Td = VSUB(Tb, Tc); + Ty = VFNMS(LDK(KP500000000), Tx, Tw); + } + { + V T4, T5, T8, T9; + Tp = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Tq = VADD(T4, T5); + T6 = VSUB(T4, T5); + Tr = VFNMS(LDK(KP500000000), Tq, Tp); + Tt = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + T9 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tu = VADD(T8, T9); + Ta = VSUB(T8, T9); + Tv = VFNMS(LDK(KP500000000), Tu, Tt); + } + Ts = VSUB(To, Tr); + TL = VSUB(T3, T6); + TM = VSUB(Ta, Td); + Tz = VSUB(Tv, Ty); + TC = VADD(To, Tr); + TD = VADD(Tv, Ty); + TI = VADD(TC, TD); + T12 = VADD(Tm, Tn); + T13 = VADD(Tp, Tq); + T14 = VADD(T12, T13); + T15 = VADD(Tt, Tu); + T16 = VADD(Tw, Tx); + T17 = VADD(T15, T16); + T7 = VADD(T3, T6); + Te = VADD(Ta, Td); + Tf = VMUL(LDK(KP484122918), VSUB(T7, Te)); + Tj = VADD(T7, Te); + } + TZ = VADD(TH, TI); + T10 = VBYI(VMUL(LDK(KP866025403), VADD(Ti, Tj))); + ST(&(xo[WS(os, 5)]), VSUB(TZ, T10), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 10)]), VADD(T10, TZ), ovs, &(xo[0])); + { + V T1a, T18, T19, T1e, T1f, T1c, T1d, T1g, T1b; + T1a = VMUL(LDK(KP559016994), VSUB(T14, T17)); + T18 = VADD(T14, T17); + T19 = VFNMS(LDK(KP250000000), T18, T11); + T1c = VSUB(T12, T13); + T1d = VSUB(T15, T16); + T1e = VBYI(VFNMS(LDK(KP951056516), T1d, VMUL(LDK(KP587785252), T1c))); + T1f = VBYI(VFMA(LDK(KP951056516), T1c, VMUL(LDK(KP587785252), T1d))); + ST(&(xo[0]), VADD(T11, T18), ovs, &(xo[0])); + T1g = VADD(T1a, T19); + ST(&(xo[WS(os, 6)]), VADD(T1f, T1g), ovs, &(xo[0])); + ST(&(xo[WS(os, 9)]), VSUB(T1g, T1f), ovs, &(xo[WS(os, 1)])); + T1b = VSUB(T19, T1a); + ST(&(xo[WS(os, 3)]), VSUB(T1b, T1e), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 12)]), VADD(T1e, T1b), ovs, &(xo[0])); + } + { + V TA, TN, TU, TS, Tl, TR, TK, TV, Tk, TE, TJ; + TA = VFMA(LDK(KP951056516), Ts, VMUL(LDK(KP587785252), Tz)); + TN = VFMA(LDK(KP823639103), TL, VMUL(LDK(KP509036960), TM)); + TU = VFNMS(LDK(KP823639103), TM, VMUL(LDK(KP509036960), TL)); + TS = VFNMS(LDK(KP951056516), Tz, VMUL(LDK(KP587785252), Ts)); + Tk = VFNMS(LDK(KP216506350), Tj, VMUL(LDK(KP866025403), Ti)); + Tl = VADD(Tf, Tk); + TR = VSUB(Tf, Tk); + TE = VMUL(LDK(KP559016994), VSUB(TC, TD)); + TJ = VFNMS(LDK(KP250000000), TI, TH); + TK = VADD(TE, TJ); + TV = VSUB(TJ, TE); + { + V TB, TO, TX, TY; + TB = VBYI(VADD(Tl, TA)); + TO = VSUB(TK, TN); + ST(&(xo[WS(os, 1)]), VADD(TB, TO), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 14)]), VSUB(TO, TB), ovs, &(xo[0])); + TX = VBYI(VSUB(TS, TR)); + TY = VSUB(TV, TU); + ST(&(xo[WS(os, 7)]), VADD(TX, TY), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 8)]), VSUB(TY, TX), ovs, &(xo[0])); + } + { + V TP, TQ, TT, TW; + TP = VBYI(VSUB(Tl, TA)); + TQ = VADD(TN, TK); + ST(&(xo[WS(os, 4)]), VADD(TP, TQ), ovs, &(xo[0])); + ST(&(xo[WS(os, 11)]), VSUB(TQ, TP), ovs, &(xo[WS(os, 1)])); + TT = VBYI(VADD(TR, TS)); + TW = VADD(TU, TV); + ST(&(xo[WS(os, 2)]), VADD(TT, TW), ovs, &(xo[0])); + ST(&(xo[WS(os, 13)]), VSUB(TW, TT), ovs, &(xo[WS(os, 1)])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 15, XSIMD_STRING("n1bv_15"), {64, 11, 14, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_15) (planner *p) { + X(kdft_register) (p, n1bv_15, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,340 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:53 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 16 -name n1bv_16 -include n1b.h */ + +/* + * This function contains 72 FP additions, 34 FP multiplications, + * (or, 38 additions, 0 multiplications, 34 fused multiply/add), + * 54 stack variables, 3 constants, and 32 memory accesses + */ +#include "n1b.h" + +static void n1bv_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(32, is), MAKE_VOLATILE_STRIDE(32, os)) { + V T7, Tu, TF, TB, T13, TL, TO, TX, TC, Te, TP, Th, TQ, Tk, TW; + V T16; + { + V TH, TU, Tz, Tf, TK, TV, TA, TM, Ta, TN, Td, Tg, Ti, Tj; + { + V T1, T2, T4, T5, To, Tp, Tr, Ts; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + To = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tp = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tr = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + Ts = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + { + V T8, TI, Tq, TJ, Tt, T9, Tb, Tc, T3, T6; + T8 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + TH = VSUB(T1, T2); + T3 = VADD(T1, T2); + TU = VSUB(T4, T5); + T6 = VADD(T4, T5); + TI = VSUB(To, Tp); + Tq = VADD(To, Tp); + TJ = VSUB(Tr, Ts); + Tt = VADD(Tr, Ts); + T9 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tb = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tc = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T7 = VSUB(T3, T6); + Tz = VADD(T3, T6); + Tf = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + TK = VADD(TI, TJ); + TV = VSUB(TI, TJ); + TA = VADD(Tq, Tt); + Tu = VSUB(Tq, Tt); + TM = VSUB(T8, T9); + Ta = VADD(T8, T9); + TN = VSUB(Tb, Tc); + Td = VADD(Tb, Tc); + Tg = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Ti = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tj = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + } + } + TF = VADD(Tz, TA); + TB = VSUB(Tz, TA); + T13 = VFNMS(LDK(KP707106781), TK, TH); + TL = VFMA(LDK(KP707106781), TK, TH); + TO = VFNMS(LDK(KP414213562), TN, TM); + TX = VFMA(LDK(KP414213562), TM, TN); + TC = VADD(Ta, Td); + Te = VSUB(Ta, Td); + TP = VSUB(Tf, Tg); + Th = VADD(Tf, Tg); + TQ = VSUB(Tj, Ti); + Tk = VADD(Ti, Tj); + TW = VFMA(LDK(KP707106781), TV, TU); + T16 = VFNMS(LDK(KP707106781), TV, TU); + } + { + V TY, TR, Tl, TD; + TY = VFMA(LDK(KP414213562), TP, TQ); + TR = VFNMS(LDK(KP414213562), TQ, TP); + Tl = VSUB(Th, Tk); + TD = VADD(Th, Tk); + { + V TS, T17, TZ, T14; + TS = VADD(TO, TR); + T17 = VSUB(TO, TR); + TZ = VSUB(TX, TY); + T14 = VADD(TX, TY); + { + V TE, TG, Tm, Tv; + TE = VSUB(TC, TD); + TG = VADD(TC, TD); + Tm = VADD(Te, Tl); + Tv = VSUB(Te, Tl); + { + V T18, T1a, TT, T11; + T18 = VFMA(LDK(KP923879532), T17, T16); + T1a = VFNMS(LDK(KP923879532), T17, T16); + TT = VFNMS(LDK(KP923879532), TS, TL); + T11 = VFMA(LDK(KP923879532), TS, TL); + { + V T15, T19, T10, T12; + T15 = VFNMS(LDK(KP923879532), T14, T13); + T19 = VFMA(LDK(KP923879532), T14, T13); + T10 = VFNMS(LDK(KP923879532), TZ, TW); + T12 = VFMA(LDK(KP923879532), TZ, TW); + ST(&(xo[0]), VADD(TF, TG), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VSUB(TF, TG), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VFMAI(TE, TB), ovs, &(xo[0])); + ST(&(xo[WS(os, 12)]), VFNMSI(TE, TB), ovs, &(xo[0])); + { + V Tw, Ty, Tn, Tx; + Tw = VFNMS(LDK(KP707106781), Tv, Tu); + Ty = VFMA(LDK(KP707106781), Tv, Tu); + Tn = VFNMS(LDK(KP707106781), Tm, T7); + Tx = VFMA(LDK(KP707106781), Tm, T7); + ST(&(xo[WS(os, 3)]), VFNMSI(T1a, T19), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 13)]), VFMAI(T1a, T19), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VFNMSI(T18, T15), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VFMAI(T18, T15), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 15)]), VFNMSI(T12, T11), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFMAI(T12, T11), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VFMAI(T10, TT), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFNMSI(T10, TT), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VFMAI(Ty, Tx), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VFNMSI(Ty, Tx), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VFMAI(Tw, Tn), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VFNMSI(Tw, Tn), ovs, &(xo[0])); + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 16, XSIMD_STRING("n1bv_16"), {38, 0, 34, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_16) (planner *p) { + X(kdft_register) (p, n1bv_16, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 16 -name n1bv_16 -include n1b.h */ + +/* + * This function contains 72 FP additions, 12 FP multiplications, + * (or, 68 additions, 8 multiplications, 4 fused multiply/add), + * 30 stack variables, 3 constants, and 32 memory accesses + */ +#include "n1b.h" + +static void n1bv_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(32, is), MAKE_VOLATILE_STRIDE(32, os)) { + V Tp, T13, Tu, TY, Tm, T14, Tv, TU, T7, T16, Tx, TN, Te, T17, Ty; + V TQ; + { + V Tn, To, TX, Ts, Tt, TW; + Tn = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + To = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + TX = VADD(Tn, To); + Ts = LD(&(xi[0]), ivs, &(xi[0])); + Tt = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + TW = VADD(Ts, Tt); + Tp = VSUB(Tn, To); + T13 = VADD(TW, TX); + Tu = VSUB(Ts, Tt); + TY = VSUB(TW, TX); + } + { + V Ti, TS, Tl, TT; + { + V Tg, Th, Tj, Tk; + Tg = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Th = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Ti = VSUB(Tg, Th); + TS = VADD(Tg, Th); + Tj = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tl = VSUB(Tj, Tk); + TT = VADD(Tj, Tk); + } + Tm = VMUL(LDK(KP707106781), VSUB(Ti, Tl)); + T14 = VADD(TS, TT); + Tv = VMUL(LDK(KP707106781), VADD(Ti, Tl)); + TU = VSUB(TS, TT); + } + { + V T3, TL, T6, TM; + { + V T1, T2, T4, T5; + T1 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T2 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + TL = VADD(T1, T2); + T4 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + TM = VADD(T4, T5); + } + T7 = VFNMS(LDK(KP382683432), T6, VMUL(LDK(KP923879532), T3)); + T16 = VADD(TL, TM); + Tx = VFMA(LDK(KP382683432), T3, VMUL(LDK(KP923879532), T6)); + TN = VSUB(TL, TM); + } + { + V Ta, TO, Td, TP; + { + V T8, T9, Tb, Tc; + T8 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T9 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Ta = VSUB(T8, T9); + TO = VADD(T8, T9); + Tb = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tc = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Td = VSUB(Tb, Tc); + TP = VADD(Tb, Tc); + } + Te = VFMA(LDK(KP923879532), Ta, VMUL(LDK(KP382683432), Td)); + T17 = VADD(TO, TP); + Ty = VFNMS(LDK(KP382683432), Ta, VMUL(LDK(KP923879532), Td)); + TQ = VSUB(TO, TP); + } + { + V T15, T18, T19, T1a; + T15 = VSUB(T13, T14); + T18 = VBYI(VSUB(T16, T17)); + ST(&(xo[WS(os, 12)]), VSUB(T15, T18), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VADD(T15, T18), ovs, &(xo[0])); + T19 = VADD(T13, T14); + T1a = VADD(T16, T17); + ST(&(xo[WS(os, 8)]), VSUB(T19, T1a), ovs, &(xo[0])); + ST(&(xo[0]), VADD(T19, T1a), ovs, &(xo[0])); + } + { + V TV, T11, T10, T12, TR, TZ; + TR = VMUL(LDK(KP707106781), VSUB(TN, TQ)); + TV = VBYI(VSUB(TR, TU)); + T11 = VBYI(VADD(TU, TR)); + TZ = VMUL(LDK(KP707106781), VADD(TN, TQ)); + T10 = VSUB(TY, TZ); + T12 = VADD(TY, TZ); + ST(&(xo[WS(os, 6)]), VADD(TV, T10), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VSUB(T12, T11), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VSUB(T10, TV), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VADD(T11, T12), ovs, &(xo[0])); + } + { + V Tr, TB, TA, TC; + { + V Tf, Tq, Tw, Tz; + Tf = VSUB(T7, Te); + Tq = VSUB(Tm, Tp); + Tr = VBYI(VSUB(Tf, Tq)); + TB = VBYI(VADD(Tq, Tf)); + Tw = VSUB(Tu, Tv); + Tz = VSUB(Tx, Ty); + TA = VSUB(Tw, Tz); + TC = VADD(Tw, Tz); + } + ST(&(xo[WS(os, 5)]), VADD(Tr, TA), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 13)]), VSUB(TC, TB), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VSUB(TA, Tr), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VADD(TB, TC), ovs, &(xo[WS(os, 1)])); + } + { + V TF, TJ, TI, TK; + { + V TD, TE, TG, TH; + TD = VADD(Tu, Tv); + TE = VADD(T7, Te); + TF = VADD(TD, TE); + TJ = VSUB(TD, TE); + TG = VADD(Tp, Tm); + TH = VADD(Tx, Ty); + TI = VBYI(VADD(TG, TH)); + TK = VBYI(VSUB(TH, TG)); + } + ST(&(xo[WS(os, 15)]), VSUB(TF, TI), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VADD(TJ, TK), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VADD(TF, TI), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VSUB(TJ, TK), ovs, &(xo[WS(os, 1)])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 16, XSIMD_STRING("n1bv_16"), {68, 8, 4, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_16) (planner *p) { + X(kdft_register) (p, n1bv_16, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:50 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 2 -name n1bv_2 -include n1b.h */ + +/* + * This function contains 2 FP additions, 0 FP multiplications, + * (or, 2 additions, 0 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 4 memory accesses + */ +#include "n1b.h" + +static void n1bv_2(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(4, is), MAKE_VOLATILE_STRIDE(4, os)) { + V T1, T2; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + ST(&(xo[0]), VADD(T1, T2), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VSUB(T1, T2), ovs, &(xo[WS(os, 1)])); + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 2, XSIMD_STRING("n1bv_2"), {2, 0, 0, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_2) (planner *p) { + X(kdft_register) (p, n1bv_2, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 2 -name n1bv_2 -include n1b.h */ + +/* + * This function contains 2 FP additions, 0 FP multiplications, + * (or, 2 additions, 0 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 4 memory accesses + */ +#include "n1b.h" + +static void n1bv_2(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(4, is), MAKE_VOLATILE_STRIDE(4, os)) { + V T1, T2; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + ST(&(xo[WS(os, 1)]), VSUB(T1, T2), ovs, &(xo[WS(os, 1)])); + ST(&(xo[0]), VADD(T1, T2), ovs, &(xo[0])); + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 2, XSIMD_STRING("n1bv_2"), {2, 0, 0, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_2) (planner *p) { + X(kdft_register) (p, n1bv_2, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,416 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:54 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 20 -name n1bv_20 -include n1b.h */ + +/* + * This function contains 104 FP additions, 50 FP multiplications, + * (or, 58 additions, 4 multiplications, 46 fused multiply/add), + * 71 stack variables, 4 constants, and 40 memory accesses + */ +#include "n1b.h" + +static void n1bv_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(40, is), MAKE_VOLATILE_STRIDE(40, os)) { + V TS, TA, TN, TV, TK, TU, TR, Tl; + { + V T3, TE, T1r, T13, Ta, TL, Tz, TG, Ts, TF, Th, TM, T1u, T1C, T1n; + V T1a, T1m, T1h, T1x, T1D, Tk, Ti; + { + V T1, T2, TC, TD; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + TC = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + TD = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + { + V T14, T6, T1c, Tv, Tm, T1f, Ty, T17, T9, Tn, Tp, T1b, Td, Tq, Te; + V Tf, T15, To; + { + V Tw, Tx, T7, T8, Tb, Tc; + { + V T4, T5, Tt, Tu, T11, T12; + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + Tt = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Tu = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tw = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + T11 = VADD(T1, T2); + TE = VSUB(TC, TD); + T12 = VADD(TC, TD); + T14 = VADD(T4, T5); + T6 = VSUB(T4, T5); + T1c = VADD(Tt, Tu); + Tv = VSUB(Tt, Tu); + Tx = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T1r = VADD(T11, T12); + T13 = VSUB(T11, T12); + } + Tb = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Tm = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T1f = VADD(Tw, Tx); + Ty = VSUB(Tw, Tx); + T17 = VADD(T7, T8); + T9 = VSUB(T7, T8); + Tn = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + Tp = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T1b = VADD(Tb, Tc); + Td = VSUB(Tb, Tc); + Tq = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Te = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + } + Ta = VADD(T6, T9); + TL = VSUB(T6, T9); + T15 = VADD(Tm, Tn); + To = VSUB(Tm, Tn); + Tz = VSUB(Tv, Ty); + TG = VADD(Tv, Ty); + { + V T1d, T1v, T18, Tr, T1e, Tg, T16, T1s; + T1d = VSUB(T1b, T1c); + T1v = VADD(T1b, T1c); + T18 = VADD(Tp, Tq); + Tr = VSUB(Tp, Tq); + T1e = VADD(Te, Tf); + Tg = VSUB(Te, Tf); + T16 = VSUB(T14, T15); + T1s = VADD(T14, T15); + { + V T1t, T19, T1w, T1g; + T1t = VADD(T17, T18); + T19 = VSUB(T17, T18); + Ts = VSUB(To, Tr); + TF = VADD(To, Tr); + T1w = VADD(T1e, T1f); + T1g = VSUB(T1e, T1f); + Th = VADD(Td, Tg); + TM = VSUB(Td, Tg); + T1u = VADD(T1s, T1t); + T1C = VSUB(T1s, T1t); + T1n = VSUB(T16, T19); + T1a = VADD(T16, T19); + T1m = VSUB(T1d, T1g); + T1h = VADD(T1d, T1g); + T1x = VADD(T1v, T1w); + T1D = VSUB(T1v, T1w); + } + } + } + } + Tk = VSUB(Ta, Th); + Ti = VADD(Ta, Th); + { + V TJ, T1k, T1A, TZ, Tj, T1E, T1G, TI, T10, T1j, T1z, T1i, T1y, TH; + TJ = VSUB(TF, TG); + TH = VADD(TF, TG); + T1i = VADD(T1a, T1h); + T1k = VSUB(T1a, T1h); + T1y = VADD(T1u, T1x); + T1A = VSUB(T1u, T1x); + TZ = VADD(T3, Ti); + Tj = VFNMS(LDK(KP250000000), Ti, T3); + T1E = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1D, T1C)); + T1G = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1C, T1D)); + TI = VFNMS(LDK(KP250000000), TH, TE); + T10 = VADD(TE, TH); + T1j = VFNMS(LDK(KP250000000), T1i, T13); + ST(&(xo[0]), VADD(T1r, T1y), ovs, &(xo[0])); + T1z = VFNMS(LDK(KP250000000), T1y, T1r); + ST(&(xo[WS(os, 10)]), VADD(T13, T1i), ovs, &(xo[0])); + { + V T1p, T1l, T1o, T1q, T1F, T1B; + TS = VFNMS(LDK(KP618033988), Ts, Tz); + TA = VFMA(LDK(KP618033988), Tz, Ts); + TN = VFMA(LDK(KP618033988), TM, TL); + TV = VFNMS(LDK(KP618033988), TL, TM); + ST(&(xo[WS(os, 5)]), VFMAI(T10, TZ), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 15)]), VFNMSI(T10, TZ), ovs, &(xo[WS(os, 1)])); + T1p = VFMA(LDK(KP559016994), T1k, T1j); + T1l = VFNMS(LDK(KP559016994), T1k, T1j); + T1o = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1n, T1m)); + T1q = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1m, T1n)); + T1F = VFNMS(LDK(KP559016994), T1A, T1z); + T1B = VFMA(LDK(KP559016994), T1A, T1z); + ST(&(xo[WS(os, 14)]), VFNMSI(T1q, T1p), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VFMAI(T1q, T1p), ovs, &(xo[0])); + ST(&(xo[WS(os, 18)]), VFMAI(T1o, T1l), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFNMSI(T1o, T1l), ovs, &(xo[0])); + ST(&(xo[WS(os, 16)]), VFMAI(T1E, T1B), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VFNMSI(T1E, T1B), ovs, &(xo[0])); + ST(&(xo[WS(os, 12)]), VFNMSI(T1G, T1F), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VFMAI(T1G, T1F), ovs, &(xo[0])); + TK = VFMA(LDK(KP559016994), TJ, TI); + TU = VFNMS(LDK(KP559016994), TJ, TI); + TR = VFNMS(LDK(KP559016994), Tk, Tj); + Tl = VFMA(LDK(KP559016994), Tk, Tj); + } + } + } + { + V TY, TW, TO, TQ, TB, TP, TX, TT; + TY = VFMA(LDK(KP951056516), TV, TU); + TW = VFNMS(LDK(KP951056516), TV, TU); + TO = VFMA(LDK(KP951056516), TN, TK); + TQ = VFNMS(LDK(KP951056516), TN, TK); + TB = VFNMS(LDK(KP951056516), TA, Tl); + TP = VFMA(LDK(KP951056516), TA, Tl); + TX = VFNMS(LDK(KP951056516), TS, TR); + TT = VFMA(LDK(KP951056516), TS, TR); + ST(&(xo[WS(os, 9)]), VFMAI(TQ, TP), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VFNMSI(TQ, TP), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFMAI(TO, TB), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 19)]), VFNMSI(TO, TB), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 17)]), VFMAI(TW, TT), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VFNMSI(TW, TT), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 13)]), VFMAI(TY, TX), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFNMSI(TY, TX), ovs, &(xo[WS(os, 1)])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 20, XSIMD_STRING("n1bv_20"), {58, 4, 46, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_20) (planner *p) { + X(kdft_register) (p, n1bv_20, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 20 -name n1bv_20 -include n1b.h */ + +/* + * This function contains 104 FP additions, 24 FP multiplications, + * (or, 92 additions, 12 multiplications, 12 fused multiply/add), + * 53 stack variables, 4 constants, and 40 memory accesses + */ +#include "n1b.h" + +static void n1bv_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(40, is), MAKE_VOLATILE_STRIDE(40, os)) { + V T3, T1y, TH, T1i, Ts, TL, TM, Tz, T13, T16, T1j, T1u, T1v, T1w, T1r; + V T1s, T1t, T1a, T1d, T1k, Ti, Tk, TE, TI, TZ, T10; + { + V T1, T2, T1g, TF, TG, T1h; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T1g = VADD(T1, T2); + TF = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + TG = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T1h = VADD(TF, TG); + T3 = VSUB(T1, T2); + T1y = VADD(T1g, T1h); + TH = VSUB(TF, TG); + T1i = VSUB(T1g, T1h); + } + { + V T6, T11, Tv, T19, Ty, T1c, T9, T14, Td, T18, To, T12, Tr, T15, Tg; + V T1b; + { + V T4, T5, Tt, Tu; + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + T6 = VSUB(T4, T5); + T11 = VADD(T4, T5); + Tt = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Tu = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tv = VSUB(Tt, Tu); + T19 = VADD(Tt, Tu); + } + { + V Tw, Tx, T7, T8; + Tw = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + Tx = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Ty = VSUB(Tw, Tx); + T1c = VADD(Tw, Tx); + T7 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T9 = VSUB(T7, T8); + T14 = VADD(T7, T8); + } + { + V Tb, Tc, Tm, Tn; + Tb = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Td = VSUB(Tb, Tc); + T18 = VADD(Tb, Tc); + Tm = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tn = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + To = VSUB(Tm, Tn); + T12 = VADD(Tm, Tn); + } + { + V Tp, Tq, Te, Tf; + Tp = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tq = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Tr = VSUB(Tp, Tq); + T15 = VADD(Tp, Tq); + Te = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tg = VSUB(Te, Tf); + T1b = VADD(Te, Tf); + } + Ts = VSUB(To, Tr); + TL = VSUB(T6, T9); + TM = VSUB(Td, Tg); + Tz = VSUB(Tv, Ty); + T13 = VSUB(T11, T12); + T16 = VSUB(T14, T15); + T1j = VADD(T13, T16); + T1u = VADD(T18, T19); + T1v = VADD(T1b, T1c); + T1w = VADD(T1u, T1v); + T1r = VADD(T11, T12); + T1s = VADD(T14, T15); + T1t = VADD(T1r, T1s); + T1a = VSUB(T18, T19); + T1d = VSUB(T1b, T1c); + T1k = VADD(T1a, T1d); + { + V Ta, Th, TC, TD; + Ta = VADD(T6, T9); + Th = VADD(Td, Tg); + Ti = VADD(Ta, Th); + Tk = VMUL(LDK(KP559016994), VSUB(Ta, Th)); + TC = VADD(To, Tr); + TD = VADD(Tv, Ty); + TE = VMUL(LDK(KP559016994), VSUB(TC, TD)); + TI = VADD(TC, TD); + } + } + TZ = VADD(T3, Ti); + T10 = VBYI(VADD(TH, TI)); + ST(&(xo[WS(os, 15)]), VSUB(TZ, T10), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VADD(TZ, T10), ovs, &(xo[WS(os, 1)])); + { + V T1x, T1z, T1A, T1E, T1G, T1C, T1D, T1F, T1B; + T1x = VMUL(LDK(KP559016994), VSUB(T1t, T1w)); + T1z = VADD(T1t, T1w); + T1A = VFNMS(LDK(KP250000000), T1z, T1y); + T1C = VSUB(T1r, T1s); + T1D = VSUB(T1u, T1v); + T1E = VBYI(VFMA(LDK(KP951056516), T1C, VMUL(LDK(KP587785252), T1D))); + T1G = VBYI(VFNMS(LDK(KP951056516), T1D, VMUL(LDK(KP587785252), T1C))); + ST(&(xo[0]), VADD(T1y, T1z), ovs, &(xo[0])); + T1F = VSUB(T1A, T1x); + ST(&(xo[WS(os, 8)]), VSUB(T1F, T1G), ovs, &(xo[0])); + ST(&(xo[WS(os, 12)]), VADD(T1G, T1F), ovs, &(xo[0])); + T1B = VADD(T1x, T1A); + ST(&(xo[WS(os, 4)]), VSUB(T1B, T1E), ovs, &(xo[0])); + ST(&(xo[WS(os, 16)]), VADD(T1E, T1B), ovs, &(xo[0])); + } + { + V T1n, T1l, T1m, T1f, T1p, T17, T1e, T1q, T1o; + T1n = VMUL(LDK(KP559016994), VSUB(T1j, T1k)); + T1l = VADD(T1j, T1k); + T1m = VFNMS(LDK(KP250000000), T1l, T1i); + T17 = VSUB(T13, T16); + T1e = VSUB(T1a, T1d); + T1f = VBYI(VFNMS(LDK(KP951056516), T1e, VMUL(LDK(KP587785252), T17))); + T1p = VBYI(VFMA(LDK(KP951056516), T17, VMUL(LDK(KP587785252), T1e))); + ST(&(xo[WS(os, 10)]), VADD(T1i, T1l), ovs, &(xo[0])); + T1q = VADD(T1n, T1m); + ST(&(xo[WS(os, 6)]), VADD(T1p, T1q), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VSUB(T1q, T1p), ovs, &(xo[0])); + T1o = VSUB(T1m, T1n); + ST(&(xo[WS(os, 2)]), VADD(T1f, T1o), ovs, &(xo[0])); + ST(&(xo[WS(os, 18)]), VSUB(T1o, T1f), ovs, &(xo[0])); + } + { + V TA, TN, TU, TS, TK, TV, Tl, TR, TJ, Tj; + TA = VFNMS(LDK(KP951056516), Tz, VMUL(LDK(KP587785252), Ts)); + TN = VFNMS(LDK(KP951056516), TM, VMUL(LDK(KP587785252), TL)); + TU = VFMA(LDK(KP951056516), TL, VMUL(LDK(KP587785252), TM)); + TS = VFMA(LDK(KP951056516), Ts, VMUL(LDK(KP587785252), Tz)); + TJ = VFNMS(LDK(KP250000000), TI, TH); + TK = VSUB(TE, TJ); + TV = VADD(TE, TJ); + Tj = VFNMS(LDK(KP250000000), Ti, T3); + Tl = VSUB(Tj, Tk); + TR = VADD(Tk, Tj); + { + V TB, TO, TX, TY; + TB = VSUB(Tl, TA); + TO = VBYI(VSUB(TK, TN)); + ST(&(xo[WS(os, 17)]), VSUB(TB, TO), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VADD(TB, TO), ovs, &(xo[WS(os, 1)])); + TX = VADD(TR, TS); + TY = VBYI(VSUB(TV, TU)); + ST(&(xo[WS(os, 11)]), VSUB(TX, TY), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VADD(TX, TY), ovs, &(xo[WS(os, 1)])); + } + { + V TP, TQ, TT, TW; + TP = VADD(Tl, TA); + TQ = VBYI(VADD(TN, TK)); + ST(&(xo[WS(os, 13)]), VSUB(TP, TQ), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VADD(TP, TQ), ovs, &(xo[WS(os, 1)])); + TT = VSUB(TR, TS); + TW = VBYI(VADD(TU, TV)); + ST(&(xo[WS(os, 19)]), VSUB(TT, TW), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VADD(TT, TW), ovs, &(xo[WS(os, 1)])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 20, XSIMD_STRING("n1bv_20"), {92, 12, 12, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_20) (planner *p) { + X(kdft_register) (p, n1bv_20, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,798 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:54 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 25 -name n1bv_25 -include n1b.h */ + +/* + * This function contains 224 FP additions, 193 FP multiplications, + * (or, 43 additions, 12 multiplications, 181 fused multiply/add), + * 215 stack variables, 67 constants, and 50 memory accesses + */ +#include "n1b.h" + +static void n1bv_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP792626838, +0.792626838241819413632131824093538848057784557); + DVK(KP876091699, +0.876091699473550838204498029706869638173524346); + DVK(KP803003575, +0.803003575438660414833440593570376004635464850); + DVK(KP617882369, +0.617882369114440893914546919006756321695042882); + DVK(KP242145790, +0.242145790282157779872542093866183953459003101); + DVK(KP968583161, +0.968583161128631119490168375464735813836012403); + DVK(KP999544308, +0.999544308746292983948881682379742149196758193); + DVK(KP683113946, +0.683113946453479238701949862233725244439656928); + DVK(KP559154169, +0.559154169276087864842202529084232643714075927); + DVK(KP904730450, +0.904730450839922351881287709692877908104763647); + DVK(KP829049696, +0.829049696159252993975487806364305442437946767); + DVK(KP831864738, +0.831864738706457140726048799369896829771167132); + DVK(KP916574801, +0.916574801383451584742370439148878693530976769); + DVK(KP894834959, +0.894834959464455102997960030820114611498661386); + DVK(KP809385824, +0.809385824416008241660603814668679683846476688); + DVK(KP447417479, +0.447417479732227551498980015410057305749330693); + DVK(KP860541664, +0.860541664367944677098261680920518816412804187); + DVK(KP897376177, +0.897376177523557693138608077137219684419427330); + DVK(KP876306680, +0.876306680043863587308115903922062583399064238); + DVK(KP681693190, +0.681693190061530575150324149145440022633095390); + DVK(KP560319534, +0.560319534973832390111614715371676131169633784); + DVK(KP855719849, +0.855719849902058969314654733608091555096772472); + DVK(KP237294955, +0.237294955877110315393888866460840817927895961); + DVK(KP949179823, +0.949179823508441261575555465843363271711583843); + DVK(KP904508497, +0.904508497187473712051146708591409529430077295); + DVK(KP997675361, +0.997675361079556513670859573984492383596555031); + DVK(KP262346850, +0.262346850930607871785420028382979691334784273); + DVK(KP763932022, +0.763932022500210303590826331268723764559381640); + DVK(KP992114701, +0.992114701314477831049793042785778521453036709); + DVK(KP690983005, +0.690983005625052575897706582817180941139845410); + DVK(KP952936919, +0.952936919628306576880750665357914584765951388); + DVK(KP998026728, +0.998026728428271561952336806863450553336905220); + DVK(KP570584518, +0.570584518783621657366766175430996792655723863); + DVK(KP669429328, +0.669429328479476605641803240971985825917022098); + DVK(KP923225144, +0.923225144846402650453449441572664695995209956); + DVK(KP906616052, +0.906616052148196230441134447086066874408359177); + DVK(KP956723877, +0.956723877038460305821989399535483155872969262); + DVK(KP522616830, +0.522616830205754336872861364785224694908468440); + DVK(KP945422727, +0.945422727388575946270360266328811958657216298); + DVK(KP912575812, +0.912575812670962425556968549836277086778922727); + DVK(KP982009705, +0.982009705009746369461829878184175962711969869); + DVK(KP921078979, +0.921078979742360627699756128143719920817673854); + DVK(KP734762448, +0.734762448793050413546343770063151342619912334); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP958953096, +0.958953096729998668045963838399037225970891871); + DVK(KP867381224, +0.867381224396525206773171885031575671309956167); + DVK(KP269969613, +0.269969613759572083574752974412347470060951301); + DVK(KP244189809, +0.244189809627953270309879511234821255780225091); + DVK(KP845997307, +0.845997307939530944175097360758058292389769300); + DVK(KP772036680, +0.772036680810363904029489473607579825330539880); + DVK(KP132830569, +0.132830569247582714407653942074819768844536507); + DVK(KP120146378, +0.120146378570687701782758537356596213647956445); + DVK(KP987388751, +0.987388751065621252324603216482382109400433949); + DVK(KP893101515, +0.893101515366181661711202267938416198338079437); + DVK(KP786782374, +0.786782374965295178365099601674911834788448471); + DVK(KP869845200, +0.869845200362138853122720822420327157933056305); + DVK(KP447533225, +0.447533225982656890041886979663652563063114397); + DVK(KP494780565, +0.494780565770515410344588413655324772219443730); + DVK(KP578046249, +0.578046249379945007321754579646815604023525655); + DVK(KP522847744, +0.522847744331509716623755382187077770911012542); + DVK(KP059835404, +0.059835404262124915169548397419498386427871950); + DVK(KP066152395, +0.066152395967733048213034281011006031460903353); + DVK(KP603558818, +0.603558818296015001454675132653458027918768137); + DVK(KP667278218, +0.667278218140296670899089292254759909713898805); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(50, is), MAKE_VOLATILE_STRIDE(50, os)) { + V T1g, T1k, T1I, T24, T2a, T1G, T1A, T1l, T1B, T1H, T1d; + { + V T2z, T1q, Ta, T9, T3n, Ty, Tl, T2O, T2W, T2l, T2s, TV, T1i, T1K, T1S; + V T3z, T3t, Tk, T3o, Tp, T2g, T2N, T2V, T2o, T2t, T1a, T1j, T1J, T1R, Tz; + V Tt, TA, Tw; + { + V T1, T5, T6, T2, T3; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + { + V TH, TW, TK, TS, T10, T8, TN, TT, T17, TZ, T11; + TH = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + TW = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + { + V TI, TJ, TL, T7, T1p, T4, T1o, TM, TX, TY; + TI = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + TJ = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + TL = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T7 = VADD(T5, T6); + T1p = VSUB(T5, T6); + T4 = VADD(T2, T3); + T1o = VSUB(T2, T3); + TM = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + TX = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + TK = VADD(TI, TJ); + TS = VSUB(TI, TJ); + TY = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + T10 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T2z = VFNMS(LDK(KP618033988), T1o, T1p); + T1q = VFMA(LDK(KP618033988), T1p, T1o); + Ta = VSUB(T4, T7); + T8 = VADD(T4, T7); + TN = VADD(TL, TM); + TT = VSUB(TM, TL); + T17 = VSUB(TX, TY); + TZ = VADD(TX, TY); + T11 = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + } + { + V Tc, T2m, T19, Tn, To, Tr, Tj, T16, T2n, Ts, Tu, Tv; + { + V TU, T2j, TO, TQ, T12, T18; + Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T9 = VFNMS(LDK(KP250000000), T8, T1); + T3n = VADD(T1, T8); + TU = VFNMS(LDK(KP618033988), TT, TS); + T2j = VFMA(LDK(KP618033988), TS, TT); + TO = VADD(TK, TN); + TQ = VSUB(TN, TK); + T12 = VADD(T10, T11); + T18 = VSUB(T10, T11); + Ty = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + { + V T3r, T15, T13, Tf, Ti, T2k, TR, TP, T3s, T14; + { + V Td, Te, Tg, Th; + Td = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Te = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + Tg = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Th = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + TP = VFNMS(LDK(KP250000000), TO, TH); + T3r = VADD(TH, TO); + T2m = VFNMS(LDK(KP618033988), T17, T18); + T19 = VFMA(LDK(KP618033988), T18, T17); + T15 = VSUB(T12, TZ); + T13 = VADD(TZ, T12); + Tf = VADD(Td, Te); + Tn = VSUB(Td, Te); + To = VSUB(Th, Tg); + Ti = VADD(Tg, Th); + } + T2k = VFMA(LDK(KP559016994), TQ, TP); + TR = VFNMS(LDK(KP559016994), TQ, TP); + Tr = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T3s = VADD(TW, T13); + T14 = VFNMS(LDK(KP250000000), T13, TW); + Tj = VADD(Tf, Ti); + Tl = VSUB(Tf, Ti); + T2O = VFNMS(LDK(KP667278218), T2k, T2j); + T2W = VFMA(LDK(KP603558818), T2j, T2k); + T2l = VFMA(LDK(KP066152395), T2k, T2j); + T2s = VFNMS(LDK(KP059835404), T2j, T2k); + TV = VFNMS(LDK(KP522847744), TU, TR); + T1i = VFMA(LDK(KP578046249), TR, TU); + T1K = VFNMS(LDK(KP494780565), TR, TU); + T1S = VFMA(LDK(KP447533225), TU, TR); + T16 = VFNMS(LDK(KP559016994), T15, T14); + T2n = VFMA(LDK(KP559016994), T15, T14); + T3z = VSUB(T3r, T3s); + T3t = VADD(T3r, T3s); + Ts = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tu = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + Tv = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + } + } + Tk = VFNMS(LDK(KP250000000), Tj, Tc); + T3o = VADD(Tc, Tj); + Tp = VFNMS(LDK(KP618033988), To, Tn); + T2g = VFMA(LDK(KP618033988), Tn, To); + T2N = VFMA(LDK(KP066152395), T2n, T2m); + T2V = VFNMS(LDK(KP059835404), T2m, T2n); + T2o = VFMA(LDK(KP869845200), T2n, T2m); + T2t = VFNMS(LDK(KP786782374), T2m, T2n); + T1a = VFNMS(LDK(KP893101515), T19, T16); + T1j = VFMA(LDK(KP987388751), T16, T19); + T1J = VFNMS(LDK(KP120146378), T19, T16); + T1R = VFMA(LDK(KP132830569), T16, T19); + Tz = VADD(Ts, Tr); + Tt = VSUB(Tr, Ts); + TA = VADD(Tv, Tu); + Tw = VSUB(Tu, Tv); + } + } + } + { + V T2p, T2I, T2u, T2C, Tx, T2d, T2X, T34, T2P, T3b, T2b, Tb, T2Q, T2Z, T2h; + V T2w, Tq, T1e, T1M, T1U, TE, T2c, T3q, T3y; + T2p = VFNMS(LDK(KP772036680), T2o, T2l); + T2I = VFMA(LDK(KP772036680), T2o, T2l); + T2u = VFMA(LDK(KP772036680), T2t, T2s); + T2C = VFNMS(LDK(KP772036680), T2t, T2s); + { + V TD, TB, Tm, T2f, T3p, TC; + Tx = VFMA(LDK(KP618033988), Tw, Tt); + T2d = VFNMS(LDK(KP618033988), Tt, Tw); + TD = VSUB(Tz, TA); + TB = VADD(Tz, TA); + Tm = VFMA(LDK(KP559016994), Tl, Tk); + T2f = VFNMS(LDK(KP559016994), Tl, Tk); + T2X = VFMA(LDK(KP845997307), T2W, T2V); + T34 = VFNMS(LDK(KP845997307), T2W, T2V); + T2P = VFNMS(LDK(KP845997307), T2O, T2N); + T3b = VFMA(LDK(KP845997307), T2O, T2N); + T2b = VFNMS(LDK(KP559016994), Ta, T9); + Tb = VFMA(LDK(KP559016994), Ta, T9); + T3p = VADD(Ty, TB); + TC = VFMS(LDK(KP250000000), TB, Ty); + T2Q = VFNMS(LDK(KP522847744), T2g, T2f); + T2Z = VFMA(LDK(KP578046249), T2f, T2g); + T2h = VFMA(LDK(KP893101515), T2g, T2f); + T2w = VFNMS(LDK(KP987388751), T2f, T2g); + Tq = VFNMS(LDK(KP244189809), Tp, Tm); + T1e = VFMA(LDK(KP269969613), Tm, Tp); + T1M = VFMA(LDK(KP667278218), Tm, Tp); + T1U = VFNMS(LDK(KP603558818), Tp, Tm); + TE = VFNMS(LDK(KP559016994), TD, TC); + T2c = VFMA(LDK(KP559016994), TD, TC); + T3q = VADD(T3o, T3p); + T3y = VSUB(T3o, T3p); + } + { + V T1Z, T25, T1P, T22, T1X, TG, T1b, T28, T1t, T1y, T1x, T1E, T1Q, T1Y; + { + V T26, T1L, T1T, TF, T1f, T1W, T3m, T3g, T2M, T2G, T39, T3j, T21, T1O, T20; + V T27; + T26 = VFMA(LDK(KP867381224), T1K, T1J); + T1L = VFNMS(LDK(KP867381224), T1K, T1J); + T20 = VFNMS(LDK(KP958953096), T1S, T1R); + T1T = VFMA(LDK(KP958953096), T1S, T1R); + { + V T2R, T2Y, T2e, T2v, T1N, T1V; + T2R = VFNMS(LDK(KP494780565), T2c, T2d); + T2Y = VFMA(LDK(KP447533225), T2d, T2c); + T2e = VFMA(LDK(KP120146378), T2d, T2c); + T2v = VFNMS(LDK(KP132830569), T2c, T2d); + TF = VFNMS(LDK(KP667278218), TE, Tx); + T1f = VFMA(LDK(KP603558818), Tx, TE); + T1N = VFMA(LDK(KP869845200), TE, Tx); + T1V = VFNMS(LDK(KP786782374), Tx, TE); + { + V T3A, T3C, T3w, T3u; + T3A = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T3z, T3y)); + T3C = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T3y, T3z)); + T3w = VSUB(T3q, T3t); + T3u = VADD(T3q, T3t); + { + V T2B, T2x, T2H, T2i; + T2B = VFMA(LDK(KP734762448), T2w, T2v); + T2x = VFNMS(LDK(KP734762448), T2w, T2v); + T2H = VFNMS(LDK(KP734762448), T2h, T2e); + T2i = VFMA(LDK(KP734762448), T2h, T2e); + { + V T30, T35, T3c, T2S, T3v; + T30 = VFNMS(LDK(KP921078979), T2Z, T2Y); + T35 = VFMA(LDK(KP921078979), T2Z, T2Y); + T3c = VFMA(LDK(KP982009705), T2R, T2Q); + T2S = VFNMS(LDK(KP982009705), T2R, T2Q); + T1W = VFMA(LDK(KP912575812), T1V, T1U); + T1Z = VFNMS(LDK(KP912575812), T1V, T1U); + T1O = VFMA(LDK(KP912575812), T1N, T1M); + T25 = VFNMS(LDK(KP912575812), T1N, T1M); + ST(&(xo[0]), VADD(T3u, T3n), ovs, &(xo[0])); + T3v = VFNMS(LDK(KP250000000), T3u, T3n); + { + V T2y, T2J, T2q, T2D; + T2y = VFMA(LDK(KP945422727), T2x, T2u); + T2J = VFMA(LDK(KP522616830), T2x, T2I); + T2q = VFMA(LDK(KP956723877), T2p, T2i); + T2D = VFNMS(LDK(KP522616830), T2i, T2C); + { + V T3e, T31, T36, T2T; + T3e = VFMA(LDK(KP906616052), T30, T2X); + T31 = VFNMS(LDK(KP906616052), T30, T2X); + T36 = VFNMS(LDK(KP923225144), T2S, T2P); + T2T = VFMA(LDK(KP923225144), T2S, T2P); + { + V T3k, T3d, T3x, T3B; + T3k = VFNMS(LDK(KP669429328), T3b, T3c); + T3d = VFMA(LDK(KP570584518), T3c, T3b); + T3x = VFMA(LDK(KP559016994), T3w, T3v); + T3B = VFNMS(LDK(KP559016994), T3w, T3v); + { + V T2A, T2K, T2r, T2E; + T2A = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T2z, T2y)); + T2K = VFNMS(LDK(KP690983005), T2J, T2u); + T2r = VFMA(LDK(KP992114701), T2q, T2b); + T2E = VFMA(LDK(KP763932022), T2D, T2p); + { + V T32, T3a, T37, T3h; + T32 = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T2z, T31)); + T3a = VFMA(LDK(KP262346850), T31, T2z); + T37 = VFNMS(LDK(KP997675361), T36, T35); + T3h = VFNMS(LDK(KP904508497), T36, T34); + { + V T2U, T33, T3l, T3f; + T2U = VFMA(LDK(KP949179823), T2T, T2b); + T33 = VFNMS(LDK(KP237294955), T2T, T2b); + T3l = VFNMS(LDK(KP669429328), T3e, T3k); + T3f = VFMA(LDK(KP618033988), T3e, T3d); + ST(&(xo[WS(os, 20)]), VFNMSI(T3A, T3x), ovs, &(xo[0])); + ST(&(xo[WS(os, 5)]), VFMAI(T3A, T3x), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 15)]), VFMAI(T3C, T3B), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 10)]), VFNMSI(T3C, T3B), ovs, &(xo[0])); + { + V T2L, T2F, T38, T3i; + T2L = VFMA(LDK(KP855719849), T2K, T2H); + ST(&(xo[WS(os, 3)]), VFMAI(T2A, T2r), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 22)]), VFNMSI(T2A, T2r), ovs, &(xo[0])); + T2F = VFNMS(LDK(KP855719849), T2E, T2B); + T38 = VFMA(LDK(KP560319534), T37, T34); + T3i = VFNMS(LDK(KP681693190), T3h, T35); + ST(&(xo[WS(os, 2)]), VFMAI(T32, T2U), ovs, &(xo[0])); + ST(&(xo[WS(os, 23)]), VFNMSI(T32, T2U), ovs, &(xo[WS(os, 1)])); + T3m = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T3l, T3a)); + T3g = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T3f, T3a)); + T2M = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2L, T2z)); + T2G = VFMA(LDK(KP897376177), T2F, T2b); + T39 = VFNMS(LDK(KP949179823), T38, T33); + T3j = VFNMS(LDK(KP860541664), T3i, T33); + T21 = VFMA(LDK(KP447417479), T1O, T20); + } + } + } + } + } + } + } + } + } + } + } + T1P = VFNMS(LDK(KP809385824), T1O, T1L); + ST(&(xo[WS(os, 17)]), VFNMSI(T2M, T2G), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 8)]), VFMAI(T2M, T2G), ovs, &(xo[0])); + ST(&(xo[WS(os, 13)]), VFMAI(T3g, T39), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 12)]), VFNMSI(T3g, T39), ovs, &(xo[0])); + ST(&(xo[WS(os, 18)]), VFMAI(T3m, T3j), ovs, &(xo[0])); + ST(&(xo[WS(os, 7)]), VFNMSI(T3m, T3j), ovs, &(xo[WS(os, 1)])); + T22 = VFMA(LDK(KP690983005), T21, T1L); + T27 = VFMA(LDK(KP447417479), T1W, T26); + T1X = VFMA(LDK(KP894834959), T1W, T1T); + { + V T1r, T1s, T1v, T1w; + T1r = VFNMS(LDK(KP916574801), T1f, T1e); + T1g = VFMA(LDK(KP916574801), T1f, T1e); + T1k = VFNMS(LDK(KP831864738), T1j, T1i); + T1s = VFMA(LDK(KP831864738), T1j, T1i); + T1v = VFNMS(LDK(KP829049696), TF, Tq); + TG = VFMA(LDK(KP829049696), TF, Tq); + T1b = VFMA(LDK(KP831864738), T1a, TV); + T1w = VFNMS(LDK(KP831864738), T1a, TV); + T28 = VFNMS(LDK(KP763932022), T27, T1T); + T1t = VFMA(LDK(KP904730450), T1s, T1r); + T1y = VFNMS(LDK(KP904730450), T1s, T1r); + T1x = VFMA(LDK(KP559154169), T1w, T1v); + T1E = VFNMS(LDK(KP683113946), T1v, T1w); + } + } + T1Q = VFNMS(LDK(KP992114701), T1P, Tb); + T1Y = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T1X, T1q)); + { + V T1u, T1F, T1z, T1h, T1c, T23, T29; + T23 = VFNMS(LDK(KP999544308), T22, T1Z); + T29 = VFNMS(LDK(KP999544308), T28, T25); + T1I = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1t, T1q)); + T1u = VFNMS(LDK(KP242145790), T1t, T1q); + T1F = VFMA(LDK(KP617882369), T1y, T1E); + T1z = VFMA(LDK(KP559016994), T1y, T1x); + T1h = VFNMS(LDK(KP904730450), T1b, TG); + T1c = VFMA(LDK(KP904730450), T1b, TG); + ST(&(xo[WS(os, 21)]), VFMAI(T1Y, T1Q), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VFNMSI(T1Y, T1Q), ovs, &(xo[0])); + T24 = VFNMS(LDK(KP803003575), T23, Tb); + T2a = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T29, T1q)); + T1G = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T1F, T1u)); + T1A = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1z, T1u)); + T1l = VFNMS(LDK(KP904730450), T1k, T1h); + T1B = VADD(T1g, T1h); + T1H = VFMA(LDK(KP968583161), T1c, Tb); + T1d = VFNMS(LDK(KP242145790), T1c, Tb); + } + } + } + } + ST(&(xo[WS(os, 16)]), VFMAI(T2a, T24), ovs, &(xo[0])); + ST(&(xo[WS(os, 9)]), VFNMSI(T2a, T24), ovs, &(xo[WS(os, 1)])); + { + V T1m, T1C, T1n, T1D; + T1m = VFNMS(LDK(KP618033988), T1l, T1g); + T1C = VFNMS(LDK(KP683113946), T1B, T1k); + ST(&(xo[WS(os, 24)]), VFNMSI(T1I, T1H), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VFMAI(T1I, T1H), ovs, &(xo[WS(os, 1)])); + T1n = VFNMS(LDK(KP876091699), T1m, T1d); + T1D = VFMA(LDK(KP792626838), T1C, T1d); + ST(&(xo[WS(os, 19)]), VFNMSI(T1A, T1n), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 6)]), VFMAI(T1A, T1n), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VFNMSI(T1G, T1D), ovs, &(xo[0])); + ST(&(xo[WS(os, 11)]), VFMAI(T1G, T1D), ovs, &(xo[WS(os, 1)])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 25, XSIMD_STRING("n1bv_25"), {43, 12, 181, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_25) (planner *p) { + X(kdft_register) (p, n1bv_25, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 25 -name n1bv_25 -include n1b.h */ + +/* + * This function contains 224 FP additions, 140 FP multiplications, + * (or, 147 additions, 63 multiplications, 77 fused multiply/add), + * 115 stack variables, 40 constants, and 50 memory accesses + */ +#include "n1b.h" + +static void n1bv_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP809016994, +0.809016994374947424102293417182819058860154590); + DVK(KP309016994, +0.309016994374947424102293417182819058860154590); + DVK(KP637423989, +0.637423989748689710176712811676016195434917298); + DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344); + DVK(KP125333233, +0.125333233564304245373118759816508793942918247); + DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418); + DVK(KP770513242, +0.770513242775789230803009636396177847271667672); + DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596); + DVK(KP992114701, +0.992114701314477831049793042785778521453036709); + DVK(KP250666467, +0.250666467128608490746237519633017587885836494); + DVK(KP851558583, +0.851558583130145297725004891488503407959946084); + DVK(KP904827052, +0.904827052466019527713668647932697593970413911); + DVK(KP425779291, +0.425779291565072648862502445744251703979973042); + DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822); + DVK(KP497379774, +0.497379774329709576484567492012895936835134813); + DVK(KP968583161, +0.968583161128631119490168375464735813836012403); + DVK(KP248689887, +0.248689887164854788242283746006447968417567406); + DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806); + DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483); + DVK(KP535826794, +0.535826794978996618271308767867639978063575346); + DVK(KP481753674, +0.481753674101715274987191502872129653528542010); + DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477); + DVK(KP844327925, +0.844327925502015078548558063966681505381659241); + DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691); + DVK(KP963507348, +0.963507348203430549974383005744259307057084020); + DVK(KP876306680, +0.876306680043863587308115903922062583399064238); + DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439); + DVK(KP062790519, +0.062790519529313376076178224565631133122484832); + DVK(KP684547105, +0.684547105928688673732283357621209269889519233); + DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328); + DVK(KP998026728, +0.998026728428271561952336806863450553336905220); + DVK(KP125581039, +0.125581039058626752152356449131262266244969664); + DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465); + DVK(KP728968627, +0.728968627421411523146730319055259111372571664); + DVK(KP293892626, +0.293892626146236564584352977319536384298826219); + DVK(KP475528258, +0.475528258147576786058219666689691071702849317); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(50, is), MAKE_VOLATILE_STRIDE(50, os)) { + V T1b, T2o, T1v, T1e, T2W, T2P, T2Q, T2U, T11, T27, TY, T26, T12, T2f, T1j; + V T28, TM, T24, TJ, T23, TN, T2e, T1i, T25, T2M, T2N, T2T, Tm, T1W, Tt; + V T1X, Tu, T20, Tw, T1Y, T7, T1U, Te, T1T, Tf, T21, Tx, T1V; + { + V T1c, T1a, T1t, T17, T1r; + T1c = LD(&(xi[0]), ivs, &(xi[0])); + { + V T18, T19, T15, T16; + T18 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T19 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T1a = VADD(T18, T19); + T1t = VSUB(T18, T19); + T15 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T16 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + T17 = VADD(T15, T16); + T1r = VSUB(T15, T16); + } + { + V T2n, T1s, T1u, T1d; + T1b = VMUL(LDK(KP559016994), VSUB(T17, T1a)); + T2n = VMUL(LDK(KP587785252), T1r); + T2o = VFNMS(LDK(KP951056516), T1t, T2n); + T1s = VMUL(LDK(KP951056516), T1r); + T1u = VMUL(LDK(KP587785252), T1t); + T1v = VADD(T1s, T1u); + T1d = VADD(T17, T1a); + T1e = VFNMS(LDK(KP250000000), T1d, T1c); + T2W = VADD(T1c, T1d); + } + } + { + V TG, TV, TF, TL, TH, TK, TU, T10, TW, TZ, TX, TI; + TG = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + TV = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + { + V Tz, TA, TB, TC, TD, TE; + Tz = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + TA = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + TB = VADD(Tz, TA); + TC = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + TD = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + TE = VADD(TC, TD); + TF = VMUL(LDK(KP559016994), VSUB(TB, TE)); + TL = VSUB(TC, TD); + TH = VADD(TB, TE); + TK = VSUB(Tz, TA); + } + { + V TO, TP, TQ, TR, TS, TT; + TO = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + TP = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + TQ = VADD(TO, TP); + TR = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + TS = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + TT = VADD(TR, TS); + TU = VMUL(LDK(KP559016994), VSUB(TQ, TT)); + T10 = VSUB(TR, TS); + TW = VADD(TQ, TT); + TZ = VSUB(TO, TP); + } + T2P = VADD(TG, TH); + T2Q = VADD(TV, TW); + T2U = VADD(T2P, T2Q); + T11 = VFMA(LDK(KP475528258), TZ, VMUL(LDK(KP293892626), T10)); + T27 = VFNMS(LDK(KP475528258), T10, VMUL(LDK(KP293892626), TZ)); + TX = VFNMS(LDK(KP250000000), TW, TV); + TY = VADD(TU, TX); + T26 = VSUB(TX, TU); + T12 = VFNMS(LDK(KP1_369094211), T11, VMUL(LDK(KP728968627), TY)); + T2f = VFMA(LDK(KP125581039), T27, VMUL(LDK(KP998026728), T26)); + T1j = VFMA(LDK(KP1_457937254), T11, VMUL(LDK(KP684547105), TY)); + T28 = VFNMS(LDK(KP1_996053456), T27, VMUL(LDK(KP062790519), T26)); + TM = VFMA(LDK(KP475528258), TK, VMUL(LDK(KP293892626), TL)); + T24 = VFNMS(LDK(KP475528258), TL, VMUL(LDK(KP293892626), TK)); + TI = VFNMS(LDK(KP250000000), TH, TG); + TJ = VADD(TF, TI); + T23 = VSUB(TI, TF); + TN = VFNMS(LDK(KP963507348), TM, VMUL(LDK(KP876306680), TJ)); + T2e = VFMA(LDK(KP1_071653589), T24, VMUL(LDK(KP844327925), T23)); + T1i = VFMA(LDK(KP1_752613360), TM, VMUL(LDK(KP481753674), TJ)); + T25 = VFNMS(LDK(KP1_688655851), T24, VMUL(LDK(KP535826794), T23)); + } + { + V Tb, Tq, T3, Tc, T6, Ta, Ti, Tr, Tl, Tp, Ts, Td; + Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tq = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + { + V T1, T2, T8, T4, T5, T9; + T1 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T2 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T8 = VADD(T1, T2); + T4 = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + T9 = VADD(T4, T5); + T3 = VSUB(T1, T2); + Tc = VADD(T8, T9); + T6 = VSUB(T4, T5); + Ta = VMUL(LDK(KP559016994), VSUB(T8, T9)); + } + { + V Tg, Th, Tn, Tj, Tk, To; + Tg = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Th = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + Tn = VADD(Tg, Th); + Tj = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Tk = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + To = VADD(Tj, Tk); + Ti = VSUB(Tg, Th); + Tr = VADD(Tn, To); + Tl = VSUB(Tj, Tk); + Tp = VMUL(LDK(KP559016994), VSUB(Tn, To)); + } + T2M = VADD(Tq, Tr); + T2N = VADD(Tb, Tc); + T2T = VADD(T2M, T2N); + Tm = VFMA(LDK(KP475528258), Ti, VMUL(LDK(KP293892626), Tl)); + T1W = VFNMS(LDK(KP475528258), Tl, VMUL(LDK(KP293892626), Ti)); + Ts = VFNMS(LDK(KP250000000), Tr, Tq); + Tt = VADD(Tp, Ts); + T1X = VSUB(Ts, Tp); + Tu = VFMA(LDK(KP1_937166322), Tm, VMUL(LDK(KP248689887), Tt)); + T20 = VFNMS(LDK(KP963507348), T1W, VMUL(LDK(KP876306680), T1X)); + Tw = VFNMS(LDK(KP497379774), Tm, VMUL(LDK(KP968583161), Tt)); + T1Y = VFMA(LDK(KP1_752613360), T1W, VMUL(LDK(KP481753674), T1X)); + T7 = VFMA(LDK(KP475528258), T3, VMUL(LDK(KP293892626), T6)); + T1U = VFNMS(LDK(KP475528258), T6, VMUL(LDK(KP293892626), T3)); + Td = VFNMS(LDK(KP250000000), Tc, Tb); + Te = VADD(Ta, Td); + T1T = VSUB(Td, Ta); + Tf = VFMA(LDK(KP1_071653589), T7, VMUL(LDK(KP844327925), Te)); + T21 = VFMA(LDK(KP1_809654104), T1U, VMUL(LDK(KP425779291), T1T)); + Tx = VFNMS(LDK(KP1_688655851), T7, VMUL(LDK(KP535826794), Te)); + T1V = VFNMS(LDK(KP851558583), T1U, VMUL(LDK(KP904827052), T1T)); + } + { + V T2V, T2X, T2Y, T2S, T30, T2O, T2R, T31, T2Z; + T2V = VMUL(LDK(KP559016994), VSUB(T2T, T2U)); + T2X = VADD(T2T, T2U); + T2Y = VFNMS(LDK(KP250000000), T2X, T2W); + T2O = VSUB(T2M, T2N); + T2R = VSUB(T2P, T2Q); + T2S = VBYI(VFMA(LDK(KP951056516), T2O, VMUL(LDK(KP587785252), T2R))); + T30 = VBYI(VFNMS(LDK(KP951056516), T2R, VMUL(LDK(KP587785252), T2O))); + ST(&(xo[0]), VADD(T2W, T2X), ovs, &(xo[0])); + T31 = VSUB(T2Y, T2V); + ST(&(xo[WS(os, 10)]), VADD(T30, T31), ovs, &(xo[0])); + ST(&(xo[WS(os, 15)]), VSUB(T31, T30), ovs, &(xo[WS(os, 1)])); + T2Z = VADD(T2V, T2Y); + ST(&(xo[WS(os, 5)]), VADD(T2S, T2Z), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 20)]), VSUB(T2Z, T2S), ovs, &(xo[0])); + } + { + V T1Z, T2i, T2j, T2g, T2w, T2x, T2y, T2G, T2H, T2I, T2D, T2E, T2F, T2z, T2A; + V T2B, T2p, T2m, T2q, T2b, T2c, T2a, T2d, T2h, T2r; + T1Z = VSUB(T1V, T1Y); + T2i = VADD(T20, T21); + T2j = VSUB(T25, T28); + T2g = VSUB(T2e, T2f); + T2w = VFMA(LDK(KP1_369094211), T1W, VMUL(LDK(KP728968627), T1X)); + T2x = VFNMS(LDK(KP992114701), T1T, VMUL(LDK(KP250666467), T1U)); + T2y = VADD(T2w, T2x); + T2G = VFNMS(LDK(KP125581039), T24, VMUL(LDK(KP998026728), T23)); + T2H = VFMA(LDK(KP1_274847979), T27, VMUL(LDK(KP770513242), T26)); + T2I = VADD(T2G, T2H); + T2D = VFNMS(LDK(KP1_457937254), T1W, VMUL(LDK(KP684547105), T1X)); + T2E = VFMA(LDK(KP1_984229402), T1U, VMUL(LDK(KP125333233), T1T)); + T2F = VADD(T2D, T2E); + T2z = VFMA(LDK(KP1_996053456), T24, VMUL(LDK(KP062790519), T23)); + T2A = VFNMS(LDK(KP637423989), T26, VMUL(LDK(KP1_541026485), T27)); + T2B = VADD(T2z, T2A); + { + V T2k, T2l, T22, T29; + T2k = VADD(T1Y, T1V); + T2l = VADD(T2e, T2f); + T2p = VADD(T2k, T2l); + T2m = VMUL(LDK(KP559016994), VSUB(T2k, T2l)); + T2q = VFNMS(LDK(KP250000000), T2p, T2o); + T2b = VSUB(T1e, T1b); + T22 = VSUB(T20, T21); + T29 = VADD(T25, T28); + T2c = VADD(T22, T29); + T2a = VMUL(LDK(KP559016994), VSUB(T22, T29)); + T2d = VFNMS(LDK(KP250000000), T2c, T2b); + } + { + V T2u, T2v, T2C, T2J; + T2u = VADD(T2b, T2c); + T2v = VBYI(VADD(T2o, T2p)); + ST(&(xo[WS(os, 23)]), VSUB(T2u, T2v), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VADD(T2u, T2v), ovs, &(xo[0])); + T2C = VADD(T2b, VADD(T2y, T2B)); + T2J = VBYI(VSUB(VADD(T2F, T2I), T2o)); + ST(&(xo[WS(os, 22)]), VSUB(T2C, T2J), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VADD(T2C, T2J), ovs, &(xo[WS(os, 1)])); + } + T2h = VFMA(LDK(KP951056516), T1Z, VADD(T2a, VFNMS(LDK(KP587785252), T2g, T2d))); + T2r = VBYI(VADD(VFMA(LDK(KP951056516), T2i, VMUL(LDK(KP587785252), T2j)), VADD(T2m, T2q))); + ST(&(xo[WS(os, 18)]), VSUB(T2h, T2r), ovs, &(xo[0])); + ST(&(xo[WS(os, 7)]), VADD(T2h, T2r), ovs, &(xo[WS(os, 1)])); + { + V T2s, T2t, T2K, T2L; + T2s = VFMA(LDK(KP587785252), T1Z, VFMA(LDK(KP951056516), T2g, VSUB(T2d, T2a))); + T2t = VBYI(VADD(VFNMS(LDK(KP951056516), T2j, VMUL(LDK(KP587785252), T2i)), VSUB(T2q, T2m))); + ST(&(xo[WS(os, 13)]), VSUB(T2s, T2t), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 12)]), VADD(T2s, T2t), ovs, &(xo[0])); + T2K = VBYI(VSUB(VFMA(LDK(KP951056516), VSUB(T2w, T2x), VFMA(LDK(KP309016994), T2F, VFNMS(LDK(KP809016994), T2I, VMUL(LDK(KP587785252), VSUB(T2z, T2A))))), T2o)); + T2L = VFMA(LDK(KP309016994), T2y, VFMA(LDK(KP951056516), VSUB(T2E, T2D), VFMA(LDK(KP587785252), VSUB(T2H, T2G), VFNMS(LDK(KP809016994), T2B, T2b)))); + ST(&(xo[WS(os, 8)]), VADD(T2K, T2L), ovs, &(xo[0])); + ST(&(xo[WS(os, 17)]), VSUB(T2L, T2K), ovs, &(xo[WS(os, 1)])); + } + } + { + V Tv, T1m, T1n, T1k, T1D, T1E, T1F, T1N, T1O, T1P, T1K, T1L, T1M, T1G, T1H; + V T1I, T1w, T1q, T1x, T1f, T1g, T14, T1h, T1l, T1y; + Tv = VSUB(Tf, Tu); + T1m = VSUB(Tw, Tx); + T1n = VSUB(TN, T12); + T1k = VSUB(T1i, T1j); + T1D = VFMA(LDK(KP1_688655851), Tm, VMUL(LDK(KP535826794), Tt)); + T1E = VFMA(LDK(KP1_541026485), T7, VMUL(LDK(KP637423989), Te)); + T1F = VSUB(T1D, T1E); + T1N = VFMA(LDK(KP851558583), TM, VMUL(LDK(KP904827052), TJ)); + T1O = VFMA(LDK(KP1_984229402), T11, VMUL(LDK(KP125333233), TY)); + T1P = VADD(T1N, T1O); + T1K = VFNMS(LDK(KP1_071653589), Tm, VMUL(LDK(KP844327925), Tt)); + T1L = VFNMS(LDK(KP770513242), Te, VMUL(LDK(KP1_274847979), T7)); + T1M = VADD(T1K, T1L); + T1G = VFNMS(LDK(KP425779291), TJ, VMUL(LDK(KP1_809654104), TM)); + T1H = VFNMS(LDK(KP992114701), TY, VMUL(LDK(KP250666467), T11)); + T1I = VADD(T1G, T1H); + { + V T1o, T1p, Ty, T13; + T1o = VADD(Tu, Tf); + T1p = VADD(T1i, T1j); + T1w = VADD(T1o, T1p); + T1q = VMUL(LDK(KP559016994), VSUB(T1o, T1p)); + T1x = VFNMS(LDK(KP250000000), T1w, T1v); + T1f = VADD(T1b, T1e); + Ty = VADD(Tw, Tx); + T13 = VADD(TN, T12); + T1g = VADD(Ty, T13); + T14 = VMUL(LDK(KP559016994), VSUB(Ty, T13)); + T1h = VFNMS(LDK(KP250000000), T1g, T1f); + } + { + V T1B, T1C, T1J, T1Q; + T1B = VADD(T1f, T1g); + T1C = VBYI(VADD(T1v, T1w)); + ST(&(xo[WS(os, 24)]), VSUB(T1B, T1C), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VADD(T1B, T1C), ovs, &(xo[WS(os, 1)])); + T1J = VADD(T1f, VADD(T1F, T1I)); + T1Q = VBYI(VSUB(VADD(T1M, T1P), T1v)); + ST(&(xo[WS(os, 21)]), VSUB(T1J, T1Q), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VADD(T1J, T1Q), ovs, &(xo[0])); + } + T1l = VFMA(LDK(KP951056516), Tv, VADD(T14, VFNMS(LDK(KP587785252), T1k, T1h))); + T1y = VBYI(VADD(VFMA(LDK(KP951056516), T1m, VMUL(LDK(KP587785252), T1n)), VADD(T1q, T1x))); + ST(&(xo[WS(os, 19)]), VSUB(T1l, T1y), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 6)]), VADD(T1l, T1y), ovs, &(xo[0])); + { + V T1z, T1A, T1R, T1S; + T1z = VFMA(LDK(KP587785252), Tv, VFMA(LDK(KP951056516), T1k, VSUB(T1h, T14))); + T1A = VBYI(VADD(VFNMS(LDK(KP951056516), T1n, VMUL(LDK(KP587785252), T1m)), VSUB(T1x, T1q))); + ST(&(xo[WS(os, 14)]), VSUB(T1z, T1A), ovs, &(xo[0])); + ST(&(xo[WS(os, 11)]), VADD(T1z, T1A), ovs, &(xo[WS(os, 1)])); + T1R = VBYI(VSUB(VFMA(LDK(KP309016994), T1M, VFMA(LDK(KP951056516), VADD(T1D, T1E), VFNMS(LDK(KP809016994), T1P, VMUL(LDK(KP587785252), VSUB(T1G, T1H))))), T1v)); + T1S = VFMA(LDK(KP951056516), VSUB(T1L, T1K), VFMA(LDK(KP309016994), T1F, VFMA(LDK(KP587785252), VSUB(T1O, T1N), VFNMS(LDK(KP809016994), T1I, T1f)))); + ST(&(xo[WS(os, 9)]), VADD(T1R, T1S), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 16)]), VSUB(T1S, T1R), ovs, &(xo[0])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 25, XSIMD_STRING("n1bv_25"), {147, 63, 77, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_25) (planner *p) { + X(kdft_register) (p, n1bv_25, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:50 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 3 -name n1bv_3 -include n1b.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 3 additions, 1 multiplications, 3 fused multiply/add), + * 11 stack variables, 2 constants, and 6 memory accesses + */ +#include "n1b.h" + +static void n1bv_3(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(6, is), MAKE_VOLATILE_STRIDE(6, os)) { + V T1, T2, T3, T6, T4, T5; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T6 = VMUL(LDK(KP866025403), VSUB(T2, T3)); + T4 = VADD(T2, T3); + T5 = VFNMS(LDK(KP500000000), T4, T1); + ST(&(xo[0]), VADD(T1, T4), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFNMSI(T6, T5), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VFMAI(T6, T5), ovs, &(xo[WS(os, 1)])); + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 3, XSIMD_STRING("n1bv_3"), {3, 1, 3, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_3) (planner *p) { + X(kdft_register) (p, n1bv_3, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 3 -name n1bv_3 -include n1b.h */ + +/* + * This function contains 6 FP additions, 2 FP multiplications, + * (or, 5 additions, 1 multiplications, 1 fused multiply/add), + * 11 stack variables, 2 constants, and 6 memory accesses + */ +#include "n1b.h" + +static void n1bv_3(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(6, is), MAKE_VOLATILE_STRIDE(6, os)) { + V T4, T3, T5, T1, T2, T6; + T4 = LD(&(xi[0]), ivs, &(xi[0])); + T1 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T2 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T3 = VBYI(VMUL(LDK(KP866025403), VSUB(T1, T2))); + T5 = VADD(T1, T2); + ST(&(xo[0]), VADD(T4, T5), ovs, &(xo[0])); + T6 = VFNMS(LDK(KP500000000), T5, T4); + ST(&(xo[WS(os, 1)]), VADD(T3, T6), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VSUB(T6, T3), ovs, &(xo[0])); + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 3, XSIMD_STRING("n1bv_3"), {5, 1, 1, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_3) (planner *p) { + X(kdft_register) (p, n1bv_3, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,696 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:53 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 32 -name n1bv_32 -include n1b.h */ + +/* + * This function contains 186 FP additions, 98 FP multiplications, + * (or, 88 additions, 0 multiplications, 98 fused multiply/add), + * 104 stack variables, 7 constants, and 64 memory accesses + */ +#include "n1b.h" + +static void n1bv_32(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(64, is), MAKE_VOLATILE_STRIDE(64, os)) { + V T1h, Tr, T1a, T1k, TI, T1b, T1L, T1P, T1I, T1G, T1O, T1Q, T1H, T1z, T1c; + V TZ; + { + V T2x, T1T, T2K, T1W, T1p, Tb, T1A, T16, Tu, TF, T2O, T2H, T2b, T2t, TY; + V T1w, TT, T1v, T20, T2C, Tj, Te, T2e, To, T2i, T23, T2D, TB, TG, Th; + V T2f, Tk; + { + V TL, TW, TP, TQ, T2F, T27, T28, TO; + { + V T1, T2, T12, T13, T4, T5, T7, T8; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T12 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T13 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + T7 = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + { + V TM, T25, T26, TN; + { + V TJ, T3, T14, T1U, T6, T1V, T9, TK, TU, TV, T1R, T1S, Ta, T15; + TJ = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + T1R = VADD(T1, T2); + T3 = VSUB(T1, T2); + T1S = VADD(T12, T13); + T14 = VSUB(T12, T13); + T1U = VADD(T4, T5); + T6 = VSUB(T4, T5); + T1V = VADD(T7, T8); + T9 = VSUB(T7, T8); + TK = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + TU = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + T2x = VSUB(T1R, T1S); + T1T = VADD(T1R, T1S); + TV = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + TM = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T2K = VSUB(T1U, T1V); + T1W = VADD(T1U, T1V); + Ta = VADD(T6, T9); + T15 = VSUB(T6, T9); + T25 = VADD(TJ, TK); + TL = VSUB(TJ, TK); + T26 = VADD(TV, TU); + TW = VSUB(TU, TV); + TN = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + TP = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + T1p = VFNMS(LDK(KP707106781), Ta, T3); + Tb = VFMA(LDK(KP707106781), Ta, T3); + T1A = VFNMS(LDK(KP707106781), T15, T14); + T16 = VFMA(LDK(KP707106781), T15, T14); + TQ = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + } + T2F = VSUB(T25, T26); + T27 = VADD(T25, T26); + T28 = VADD(TM, TN); + TO = VSUB(TM, TN); + } + } + { + V Ty, T21, Tx, Tz, T1Y, T1Z; + { + V Ts, Tt, TD, T29, TR, TE, Tv, Tw; + Ts = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tt = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + TD = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T29 = VADD(TP, TQ); + TR = VSUB(TP, TQ); + TE = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + Tv = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tw = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + Ty = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + T1Y = VADD(Ts, Tt); + Tu = VSUB(Ts, Tt); + { + V T2G, T2a, TX, TS; + T2G = VSUB(T29, T28); + T2a = VADD(T28, T29); + TX = VSUB(TR, TO); + TS = VADD(TO, TR); + T1Z = VADD(TD, TE); + TF = VSUB(TD, TE); + T21 = VADD(Tv, Tw); + Tx = VSUB(Tv, Tw); + T2O = VFMA(LDK(KP414213562), T2F, T2G); + T2H = VFNMS(LDK(KP414213562), T2G, T2F); + T2b = VSUB(T27, T2a); + T2t = VADD(T27, T2a); + TY = VFMA(LDK(KP707106781), TX, TW); + T1w = VFNMS(LDK(KP707106781), TX, TW); + TT = VFMA(LDK(KP707106781), TS, TL); + T1v = VFNMS(LDK(KP707106781), TS, TL); + Tz = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + } + } + T20 = VADD(T1Y, T1Z); + T2C = VSUB(T1Y, T1Z); + { + V Tc, Td, Tm, Tn; + Tc = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Tm = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + Tn = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + { + V Tf, TA, T22, Tg; + Tf = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + TA = VSUB(Ty, Tz); + T22 = VADD(Ty, Tz); + Tg = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + Tj = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + Te = VSUB(Tc, Td); + T2e = VADD(Tc, Td); + To = VSUB(Tm, Tn); + T2i = VADD(Tn, Tm); + T23 = VADD(T21, T22); + T2D = VSUB(T21, T22); + TB = VADD(Tx, TA); + TG = VSUB(Tx, TA); + Th = VSUB(Tf, Tg); + T2f = VADD(Tf, Tg); + Tk = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + } + } + } + } + { + V T1t, TH, T1s, TC, T2P, T2U, T2n, T2d, T2w, T2u, T1q, T19, T1B, Tq, T2W; + V T2M, T2B, T2T, T2v, T2r, T2o, T2m, T2X, T2I; + { + V T1X, T2p, T2E, T2N, T2s, T2y, T2g, T17, Ti, T2h, Tl, T2c, T2l, T24; + T1X = VSUB(T1T, T1W); + T2p = VADD(T1T, T1W); + T2E = VFNMS(LDK(KP414213562), T2D, T2C); + T2N = VFMA(LDK(KP414213562), T2C, T2D); + T2s = VADD(T20, T23); + T24 = VSUB(T20, T23); + T1t = VFNMS(LDK(KP707106781), TG, TF); + TH = VFMA(LDK(KP707106781), TG, TF); + T1s = VFNMS(LDK(KP707106781), TB, Tu); + TC = VFMA(LDK(KP707106781), TB, Tu); + T2y = VSUB(T2e, T2f); + T2g = VADD(T2e, T2f); + T17 = VFMA(LDK(KP414213562), Te, Th); + Ti = VFNMS(LDK(KP414213562), Th, Te); + T2h = VADD(Tj, Tk); + Tl = VSUB(Tj, Tk); + T2c = VADD(T24, T2b); + T2l = VSUB(T24, T2b); + { + V T2L, T2A, T2q, T2k; + T2P = VSUB(T2N, T2O); + T2U = VADD(T2N, T2O); + { + V T2z, T2j, T18, Tp; + T2z = VSUB(T2h, T2i); + T2j = VADD(T2h, T2i); + T18 = VFMA(LDK(KP414213562), Tl, To); + Tp = VFNMS(LDK(KP414213562), To, Tl); + T2n = VFMA(LDK(KP707106781), T2c, T1X); + T2d = VFNMS(LDK(KP707106781), T2c, T1X); + T2w = VADD(T2s, T2t); + T2u = VSUB(T2s, T2t); + T2L = VSUB(T2y, T2z); + T2A = VADD(T2y, T2z); + T2q = VADD(T2g, T2j); + T2k = VSUB(T2g, T2j); + T1q = VADD(T17, T18); + T19 = VSUB(T17, T18); + T1B = VSUB(Ti, Tp); + Tq = VADD(Ti, Tp); + } + T2W = VFNMS(LDK(KP707106781), T2L, T2K); + T2M = VFMA(LDK(KP707106781), T2L, T2K); + T2B = VFMA(LDK(KP707106781), T2A, T2x); + T2T = VFNMS(LDK(KP707106781), T2A, T2x); + T2v = VADD(T2p, T2q); + T2r = VSUB(T2p, T2q); + T2o = VFMA(LDK(KP707106781), T2l, T2k); + T2m = VFNMS(LDK(KP707106781), T2l, T2k); + T2X = VSUB(T2E, T2H); + T2I = VADD(T2E, T2H); + } + } + { + V T2V, T2Z, T2Y, T30, T2R, T2J; + T2V = VFNMS(LDK(KP923879532), T2U, T2T); + T2Z = VFMA(LDK(KP923879532), T2U, T2T); + ST(&(xo[WS(os, 16)]), VSUB(T2v, T2w), ovs, &(xo[0])); + ST(&(xo[0]), VADD(T2v, T2w), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VFMAI(T2u, T2r), ovs, &(xo[0])); + ST(&(xo[WS(os, 24)]), VFNMSI(T2u, T2r), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VFMAI(T2o, T2n), ovs, &(xo[0])); + ST(&(xo[WS(os, 28)]), VFNMSI(T2o, T2n), ovs, &(xo[0])); + ST(&(xo[WS(os, 20)]), VFMAI(T2m, T2d), ovs, &(xo[0])); + ST(&(xo[WS(os, 12)]), VFNMSI(T2m, T2d), ovs, &(xo[0])); + T2Y = VFMA(LDK(KP923879532), T2X, T2W); + T30 = VFNMS(LDK(KP923879532), T2X, T2W); + T2R = VFMA(LDK(KP923879532), T2I, T2B); + T2J = VFNMS(LDK(KP923879532), T2I, T2B); + { + V T1J, T1r, T1C, T1M, T2S, T2Q, T1u, T1D, T1E, T1x; + T1J = VFNMS(LDK(KP923879532), T1q, T1p); + T1r = VFMA(LDK(KP923879532), T1q, T1p); + T1C = VFNMS(LDK(KP923879532), T1B, T1A); + T1M = VFMA(LDK(KP923879532), T1B, T1A); + ST(&(xo[WS(os, 6)]), VFNMSI(T30, T2Z), ovs, &(xo[0])); + ST(&(xo[WS(os, 26)]), VFMAI(T30, T2Z), ovs, &(xo[0])); + ST(&(xo[WS(os, 22)]), VFNMSI(T2Y, T2V), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VFMAI(T2Y, T2V), ovs, &(xo[0])); + T2S = VFMA(LDK(KP923879532), T2P, T2M); + T2Q = VFNMS(LDK(KP923879532), T2P, T2M); + T1u = VFMA(LDK(KP668178637), T1t, T1s); + T1D = VFNMS(LDK(KP668178637), T1s, T1t); + T1E = VFNMS(LDK(KP668178637), T1v, T1w); + T1x = VFMA(LDK(KP668178637), T1w, T1v); + { + V T1K, T1F, T1N, T1y; + T1h = VFNMS(LDK(KP923879532), Tq, Tb); + Tr = VFMA(LDK(KP923879532), Tq, Tb); + ST(&(xo[WS(os, 30)]), VFNMSI(T2S, T2R), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFMAI(T2S, T2R), ovs, &(xo[0])); + ST(&(xo[WS(os, 18)]), VFMAI(T2Q, T2J), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VFNMSI(T2Q, T2J), ovs, &(xo[0])); + T1K = VADD(T1D, T1E); + T1F = VSUB(T1D, T1E); + T1N = VSUB(T1u, T1x); + T1y = VADD(T1u, T1x); + T1a = VFMA(LDK(KP923879532), T19, T16); + T1k = VFNMS(LDK(KP923879532), T19, T16); + TI = VFNMS(LDK(KP198912367), TH, TC); + T1b = VFMA(LDK(KP198912367), TC, TH); + T1L = VFMA(LDK(KP831469612), T1K, T1J); + T1P = VFNMS(LDK(KP831469612), T1K, T1J); + T1I = VFMA(LDK(KP831469612), T1F, T1C); + T1G = VFNMS(LDK(KP831469612), T1F, T1C); + T1O = VFNMS(LDK(KP831469612), T1N, T1M); + T1Q = VFMA(LDK(KP831469612), T1N, T1M); + T1H = VFMA(LDK(KP831469612), T1y, T1r); + T1z = VFNMS(LDK(KP831469612), T1y, T1r); + T1c = VFMA(LDK(KP198912367), TT, TY); + TZ = VFNMS(LDK(KP198912367), TY, TT); + } + } + } + } + } + { + V T1d, T1i, T10, T1l; + ST(&(xo[WS(os, 21)]), VFMAI(T1O, T1L), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VFNMSI(T1O, T1L), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 27)]), VFNMSI(T1Q, T1P), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VFMAI(T1Q, T1P), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 29)]), VFMAI(T1I, T1H), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VFNMSI(T1I, T1H), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 13)]), VFMAI(T1G, T1z), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 19)]), VFNMSI(T1G, T1z), ovs, &(xo[WS(os, 1)])); + T1d = VSUB(T1b, T1c); + T1i = VADD(T1b, T1c); + T10 = VADD(TI, TZ); + T1l = VSUB(TI, TZ); + { + V T1n, T1j, T1e, T1g, T1o, T1m, T11, T1f; + T1n = VFMA(LDK(KP980785280), T1i, T1h); + T1j = VFNMS(LDK(KP980785280), T1i, T1h); + T1e = VFNMS(LDK(KP980785280), T1d, T1a); + T1g = VFMA(LDK(KP980785280), T1d, T1a); + T1o = VFNMS(LDK(KP980785280), T1l, T1k); + T1m = VFMA(LDK(KP980785280), T1l, T1k); + T11 = VFNMS(LDK(KP980785280), T10, Tr); + T1f = VFMA(LDK(KP980785280), T10, Tr); + ST(&(xo[WS(os, 23)]), VFNMSI(T1m, T1j), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VFMAI(T1m, T1j), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 25)]), VFMAI(T1o, T1n), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFNMSI(T1o, T1n), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFMAI(T1g, T1f), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 31)]), VFNMSI(T1g, T1f), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 17)]), VFMAI(T1e, T11), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 15)]), VFNMSI(T1e, T11), ovs, &(xo[WS(os, 1)])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 32, XSIMD_STRING("n1bv_32"), {88, 0, 98, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_32) (planner *p) { + X(kdft_register) (p, n1bv_32, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 32 -name n1bv_32 -include n1b.h */ + +/* + * This function contains 186 FP additions, 42 FP multiplications, + * (or, 170 additions, 26 multiplications, 16 fused multiply/add), + * 58 stack variables, 7 constants, and 64 memory accesses + */ +#include "n1b.h" + +static void n1bv_32(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(64, is), MAKE_VOLATILE_STRIDE(64, os)) { + V T2f, T2k, T2N, T2M, T19, T1B, Tb, T1p, TT, T1v, TY, T1w, T2E, T2F, T2G; + V T24, T2o, TC, T1s, TH, T1t, T2B, T2C, T2D, T1X, T2n, T2I, T2J, Tq, T1A; + V T14, T1q, T2c, T2l; + { + V T3, T2i, T18, T2j, T6, T2d, T9, T2e, T15, Ta; + { + V T1, T2, T16, T17; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T3 = VSUB(T1, T2); + T2i = VADD(T1, T2); + T16 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T17 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T18 = VSUB(T16, T17); + T2j = VADD(T16, T17); + } + { + V T4, T5, T7, T8; + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + T6 = VSUB(T4, T5); + T2d = VADD(T4, T5); + T7 = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T9 = VSUB(T7, T8); + T2e = VADD(T7, T8); + } + T2f = VSUB(T2d, T2e); + T2k = VSUB(T2i, T2j); + T2N = VADD(T2d, T2e); + T2M = VADD(T2i, T2j); + T15 = VMUL(LDK(KP707106781), VSUB(T6, T9)); + T19 = VSUB(T15, T18); + T1B = VADD(T18, T15); + Ta = VMUL(LDK(KP707106781), VADD(T6, T9)); + Tb = VSUB(T3, Ta); + T1p = VADD(T3, Ta); + } + { + V TL, T21, TW, T1Y, TO, T22, TS, T1Z; + { + V TJ, TK, TU, TV; + TJ = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + TK = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + TL = VSUB(TJ, TK); + T21 = VADD(TJ, TK); + TU = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + TV = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + TW = VSUB(TU, TV); + T1Y = VADD(TU, TV); + } + { + V TM, TN, TQ, TR; + TM = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + TN = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + TO = VSUB(TM, TN); + T22 = VADD(TM, TN); + TQ = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + TR = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + TS = VSUB(TQ, TR); + T1Z = VADD(TQ, TR); + } + { + V TP, TX, T20, T23; + TP = VMUL(LDK(KP707106781), VSUB(TL, TO)); + TT = VSUB(TP, TS); + T1v = VADD(TS, TP); + TX = VMUL(LDK(KP707106781), VADD(TL, TO)); + TY = VSUB(TW, TX); + T1w = VADD(TW, TX); + T2E = VADD(T1Y, T1Z); + T2F = VADD(T21, T22); + T2G = VSUB(T2E, T2F); + T20 = VSUB(T1Y, T1Z); + T23 = VSUB(T21, T22); + T24 = VFMA(LDK(KP923879532), T20, VMUL(LDK(KP382683432), T23)); + T2o = VFNMS(LDK(KP382683432), T20, VMUL(LDK(KP923879532), T23)); + } + } + { + V Tu, T1U, TF, T1R, Tx, T1V, TB, T1S; + { + V Ts, Tt, TD, TE; + Ts = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tt = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + Tu = VSUB(Ts, Tt); + T1U = VADD(Ts, Tt); + TD = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + TE = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + TF = VSUB(TD, TE); + T1R = VADD(TD, TE); + } + { + V Tv, Tw, Tz, TA; + Tv = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + Tw = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Tx = VSUB(Tv, Tw); + T1V = VADD(Tv, Tw); + Tz = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + TA = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + TB = VSUB(Tz, TA); + T1S = VADD(Tz, TA); + } + { + V Ty, TG, T1T, T1W; + Ty = VMUL(LDK(KP707106781), VSUB(Tu, Tx)); + TC = VSUB(Ty, TB); + T1s = VADD(TB, Ty); + TG = VMUL(LDK(KP707106781), VADD(Tu, Tx)); + TH = VSUB(TF, TG); + T1t = VADD(TF, TG); + T2B = VADD(T1R, T1S); + T2C = VADD(T1U, T1V); + T2D = VSUB(T2B, T2C); + T1T = VSUB(T1R, T1S); + T1W = VSUB(T1U, T1V); + T1X = VFNMS(LDK(KP382683432), T1W, VMUL(LDK(KP923879532), T1T)); + T2n = VFMA(LDK(KP382683432), T1T, VMUL(LDK(KP923879532), T1W)); + } + } + { + V Te, T26, To, T29, Th, T27, Tl, T2a, Ti, Tp; + { + V Tc, Td, Tm, Tn; + Tc = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Te = VSUB(Tc, Td); + T26 = VADD(Tc, Td); + Tm = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + Tn = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + To = VSUB(Tm, Tn); + T29 = VADD(Tm, Tn); + } + { + V Tf, Tg, Tj, Tk; + Tf = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + Th = VSUB(Tf, Tg); + T27 = VADD(Tf, Tg); + Tj = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + Tl = VSUB(Tj, Tk); + T2a = VADD(Tj, Tk); + } + T2I = VADD(T26, T27); + T2J = VADD(T29, T2a); + Ti = VFMA(LDK(KP382683432), Te, VMUL(LDK(KP923879532), Th)); + Tp = VFNMS(LDK(KP382683432), To, VMUL(LDK(KP923879532), Tl)); + Tq = VSUB(Ti, Tp); + T1A = VADD(Ti, Tp); + { + V T12, T13, T28, T2b; + T12 = VFNMS(LDK(KP382683432), Th, VMUL(LDK(KP923879532), Te)); + T13 = VFMA(LDK(KP923879532), To, VMUL(LDK(KP382683432), Tl)); + T14 = VSUB(T12, T13); + T1q = VADD(T12, T13); + T28 = VSUB(T26, T27); + T2b = VSUB(T29, T2a); + T2c = VMUL(LDK(KP707106781), VSUB(T28, T2b)); + T2l = VMUL(LDK(KP707106781), VADD(T28, T2b)); + } + } + { + V T2L, T2R, T2Q, T2S; + { + V T2H, T2K, T2O, T2P; + T2H = VMUL(LDK(KP707106781), VSUB(T2D, T2G)); + T2K = VSUB(T2I, T2J); + T2L = VBYI(VSUB(T2H, T2K)); + T2R = VBYI(VADD(T2K, T2H)); + T2O = VSUB(T2M, T2N); + T2P = VMUL(LDK(KP707106781), VADD(T2D, T2G)); + T2Q = VSUB(T2O, T2P); + T2S = VADD(T2O, T2P); + } + ST(&(xo[WS(os, 12)]), VADD(T2L, T2Q), ovs, &(xo[0])); + ST(&(xo[WS(os, 28)]), VSUB(T2S, T2R), ovs, &(xo[0])); + ST(&(xo[WS(os, 20)]), VSUB(T2Q, T2L), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VADD(T2R, T2S), ovs, &(xo[0])); + } + { + V T2h, T2r, T2q, T2s; + { + V T25, T2g, T2m, T2p; + T25 = VSUB(T1X, T24); + T2g = VSUB(T2c, T2f); + T2h = VBYI(VSUB(T25, T2g)); + T2r = VBYI(VADD(T2g, T25)); + T2m = VSUB(T2k, T2l); + T2p = VSUB(T2n, T2o); + T2q = VSUB(T2m, T2p); + T2s = VADD(T2m, T2p); + } + ST(&(xo[WS(os, 10)]), VADD(T2h, T2q), ovs, &(xo[0])); + ST(&(xo[WS(os, 26)]), VSUB(T2s, T2r), ovs, &(xo[0])); + ST(&(xo[WS(os, 22)]), VSUB(T2q, T2h), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VADD(T2r, T2s), ovs, &(xo[0])); + } + { + V T2V, T2Z, T2Y, T30; + { + V T2T, T2U, T2W, T2X; + T2T = VADD(T2M, T2N); + T2U = VADD(T2I, T2J); + T2V = VSUB(T2T, T2U); + T2Z = VADD(T2T, T2U); + T2W = VADD(T2B, T2C); + T2X = VADD(T2E, T2F); + T2Y = VBYI(VSUB(T2W, T2X)); + T30 = VADD(T2W, T2X); + } + ST(&(xo[WS(os, 24)]), VSUB(T2V, T2Y), ovs, &(xo[0])); + ST(&(xo[0]), VADD(T2Z, T30), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VADD(T2V, T2Y), ovs, &(xo[0])); + ST(&(xo[WS(os, 16)]), VSUB(T2Z, T30), ovs, &(xo[0])); + } + { + V T2v, T2z, T2y, T2A; + { + V T2t, T2u, T2w, T2x; + T2t = VADD(T2k, T2l); + T2u = VADD(T1X, T24); + T2v = VADD(T2t, T2u); + T2z = VSUB(T2t, T2u); + T2w = VADD(T2f, T2c); + T2x = VADD(T2n, T2o); + T2y = VBYI(VADD(T2w, T2x)); + T2A = VBYI(VSUB(T2x, T2w)); + } + ST(&(xo[WS(os, 30)]), VSUB(T2v, T2y), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VADD(T2z, T2A), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VADD(T2v, T2y), ovs, &(xo[0])); + ST(&(xo[WS(os, 18)]), VSUB(T2z, T2A), ovs, &(xo[0])); + } + { + V T1r, T1C, T1M, T1K, T1F, T1N, T1y, T1J; + T1r = VSUB(T1p, T1q); + T1C = VSUB(T1A, T1B); + T1M = VADD(T1p, T1q); + T1K = VADD(T1B, T1A); + { + V T1D, T1E, T1u, T1x; + T1D = VFNMS(LDK(KP195090322), T1s, VMUL(LDK(KP980785280), T1t)); + T1E = VFMA(LDK(KP195090322), T1v, VMUL(LDK(KP980785280), T1w)); + T1F = VSUB(T1D, T1E); + T1N = VADD(T1D, T1E); + T1u = VFMA(LDK(KP980785280), T1s, VMUL(LDK(KP195090322), T1t)); + T1x = VFNMS(LDK(KP195090322), T1w, VMUL(LDK(KP980785280), T1v)); + T1y = VSUB(T1u, T1x); + T1J = VADD(T1u, T1x); + } + { + V T1z, T1G, T1P, T1Q; + T1z = VADD(T1r, T1y); + T1G = VBYI(VADD(T1C, T1F)); + ST(&(xo[WS(os, 25)]), VSUB(T1z, T1G), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VADD(T1z, T1G), ovs, &(xo[WS(os, 1)])); + T1P = VBYI(VADD(T1K, T1J)); + T1Q = VADD(T1M, T1N); + ST(&(xo[WS(os, 1)]), VADD(T1P, T1Q), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 31)]), VSUB(T1Q, T1P), ovs, &(xo[WS(os, 1)])); + } + { + V T1H, T1I, T1L, T1O; + T1H = VSUB(T1r, T1y); + T1I = VBYI(VSUB(T1F, T1C)); + ST(&(xo[WS(os, 23)]), VSUB(T1H, T1I), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VADD(T1H, T1I), ovs, &(xo[WS(os, 1)])); + T1L = VBYI(VSUB(T1J, T1K)); + T1O = VSUB(T1M, T1N); + ST(&(xo[WS(os, 15)]), VADD(T1L, T1O), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 17)]), VSUB(T1O, T1L), ovs, &(xo[WS(os, 1)])); + } + } + { + V Tr, T1a, T1k, T1i, T1d, T1l, T10, T1h; + Tr = VSUB(Tb, Tq); + T1a = VSUB(T14, T19); + T1k = VADD(Tb, Tq); + T1i = VADD(T19, T14); + { + V T1b, T1c, TI, TZ; + T1b = VFNMS(LDK(KP555570233), TC, VMUL(LDK(KP831469612), TH)); + T1c = VFMA(LDK(KP555570233), TT, VMUL(LDK(KP831469612), TY)); + T1d = VSUB(T1b, T1c); + T1l = VADD(T1b, T1c); + TI = VFMA(LDK(KP831469612), TC, VMUL(LDK(KP555570233), TH)); + TZ = VFNMS(LDK(KP555570233), TY, VMUL(LDK(KP831469612), TT)); + T10 = VSUB(TI, TZ); + T1h = VADD(TI, TZ); + } + { + V T11, T1e, T1n, T1o; + T11 = VADD(Tr, T10); + T1e = VBYI(VADD(T1a, T1d)); + ST(&(xo[WS(os, 27)]), VSUB(T11, T1e), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VADD(T11, T1e), ovs, &(xo[WS(os, 1)])); + T1n = VBYI(VADD(T1i, T1h)); + T1o = VADD(T1k, T1l); + ST(&(xo[WS(os, 3)]), VADD(T1n, T1o), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 29)]), VSUB(T1o, T1n), ovs, &(xo[WS(os, 1)])); + } + { + V T1f, T1g, T1j, T1m; + T1f = VSUB(Tr, T10); + T1g = VBYI(VSUB(T1d, T1a)); + ST(&(xo[WS(os, 21)]), VSUB(T1f, T1g), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VADD(T1f, T1g), ovs, &(xo[WS(os, 1)])); + T1j = VBYI(VSUB(T1h, T1i)); + T1m = VSUB(T1k, T1l); + ST(&(xo[WS(os, 13)]), VADD(T1j, T1m), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 19)]), VSUB(T1m, T1j), ovs, &(xo[WS(os, 1)])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 32, XSIMD_STRING("n1bv_32"), {170, 26, 16, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_32) (planner *p) { + X(kdft_register) (p, n1bv_32, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:50 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 4 -name n1bv_4 -include n1b.h */ + +/* + * This function contains 8 FP additions, 2 FP multiplications, + * (or, 6 additions, 0 multiplications, 2 fused multiply/add), + * 11 stack variables, 0 constants, and 8 memory accesses + */ +#include "n1b.h" + +static void n1bv_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(8, is), MAKE_VOLATILE_STRIDE(8, os)) { + V T1, T2, T4, T5; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + { + V T3, T7, T6, T8; + T3 = VSUB(T1, T2); + T7 = VADD(T1, T2); + T6 = VSUB(T4, T5); + T8 = VADD(T4, T5); + ST(&(xo[WS(os, 2)]), VSUB(T7, T8), ovs, &(xo[0])); + ST(&(xo[0]), VADD(T7, T8), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VFMAI(T6, T3), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VFNMSI(T6, T3), ovs, &(xo[WS(os, 1)])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 4, XSIMD_STRING("n1bv_4"), {6, 0, 2, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_4) (planner *p) { + X(kdft_register) (p, n1bv_4, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 4 -name n1bv_4 -include n1b.h */ + +/* + * This function contains 8 FP additions, 0 FP multiplications, + * (or, 8 additions, 0 multiplications, 0 fused multiply/add), + * 11 stack variables, 0 constants, and 8 memory accesses + */ +#include "n1b.h" + +static void n1bv_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(8, is), MAKE_VOLATILE_STRIDE(8, os)) { + V T3, T7, T6, T8; + { + V T1, T2, T4, T5; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T3 = VSUB(T1, T2); + T7 = VADD(T1, T2); + T4 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T6 = VBYI(VSUB(T4, T5)); + T8 = VADD(T4, T5); + } + ST(&(xo[WS(os, 3)]), VSUB(T3, T6), ovs, &(xo[WS(os, 1)])); + ST(&(xo[0]), VADD(T7, T8), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VADD(T3, T6), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VSUB(T7, T8), ovs, &(xo[0])); + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 4, XSIMD_STRING("n1bv_4"), {8, 0, 0, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_4) (planner *p) { + X(kdft_register) (p, n1bv_4, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:50 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 5 -name n1bv_5 -include n1b.h */ + +/* + * This function contains 16 FP additions, 11 FP multiplications, + * (or, 7 additions, 2 multiplications, 9 fused multiply/add), + * 23 stack variables, 4 constants, and 10 memory accesses + */ +#include "n1b.h" + +static void n1bv_5(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(10, is), MAKE_VOLATILE_STRIDE(10, os)) { + V T1, T2, T3, T5, T6; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + { + V Tc, T4, Td, T7; + Tc = VSUB(T2, T3); + T4 = VADD(T2, T3); + Td = VSUB(T5, T6); + T7 = VADD(T5, T6); + { + V Tg, Te, Ta, T8, T9, Tf, Tb; + Tg = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tc, Td)); + Te = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Td, Tc)); + Ta = VSUB(T4, T7); + T8 = VADD(T4, T7); + T9 = VFNMS(LDK(KP250000000), T8, T1); + ST(&(xo[0]), VADD(T1, T8), ovs, &(xo[0])); + Tf = VFNMS(LDK(KP559016994), Ta, T9); + Tb = VFMA(LDK(KP559016994), Ta, T9); + ST(&(xo[WS(os, 2)]), VFNMSI(Tg, Tf), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VFMAI(Tg, Tf), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VFNMSI(Te, Tb), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VFMAI(Te, Tb), ovs, &(xo[WS(os, 1)])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 5, XSIMD_STRING("n1bv_5"), {7, 2, 9, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_5) (planner *p) { + X(kdft_register) (p, n1bv_5, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 5 -name n1bv_5 -include n1b.h */ + +/* + * This function contains 16 FP additions, 6 FP multiplications, + * (or, 13 additions, 3 multiplications, 3 fused multiply/add), + * 18 stack variables, 4 constants, and 10 memory accesses + */ +#include "n1b.h" + +static void n1bv_5(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(10, is), MAKE_VOLATILE_STRIDE(10, os)) { + V Tb, T3, Tc, T6, Ta; + Tb = LD(&(xi[0]), ivs, &(xi[0])); + { + V T1, T2, T8, T4, T5, T9; + T1 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T2 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T8 = VADD(T1, T2); + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T9 = VADD(T4, T5); + T3 = VSUB(T1, T2); + Tc = VADD(T8, T9); + T6 = VSUB(T4, T5); + Ta = VMUL(LDK(KP559016994), VSUB(T8, T9)); + } + ST(&(xo[0]), VADD(Tb, Tc), ovs, &(xo[0])); + { + V T7, Tf, Te, Tg, Td; + T7 = VBYI(VFMA(LDK(KP951056516), T3, VMUL(LDK(KP587785252), T6))); + Tf = VBYI(VFNMS(LDK(KP951056516), T6, VMUL(LDK(KP587785252), T3))); + Td = VFNMS(LDK(KP250000000), Tc, Tb); + Te = VADD(Ta, Td); + Tg = VSUB(Td, Ta); + ST(&(xo[WS(os, 1)]), VADD(T7, Te), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VSUB(Tg, Tf), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VSUB(Te, T7), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VADD(Tf, Tg), ovs, &(xo[0])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 5, XSIMD_STRING("n1bv_5"), {13, 3, 3, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_5) (planner *p) { + X(kdft_register) (p, n1bv_5, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:50 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 6 -name n1bv_6 -include n1b.h */ + +/* + * This function contains 18 FP additions, 8 FP multiplications, + * (or, 12 additions, 2 multiplications, 6 fused multiply/add), + * 23 stack variables, 2 constants, and 12 memory accesses + */ +#include "n1b.h" + +static void n1bv_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(12, is), MAKE_VOLATILE_STRIDE(12, os)) { + V T1, T2, T4, T5, T7, T8; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + { + V T3, Td, T6, Te, T9, Tf; + T3 = VSUB(T1, T2); + Td = VADD(T1, T2); + T6 = VSUB(T4, T5); + Te = VADD(T4, T5); + T9 = VSUB(T7, T8); + Tf = VADD(T7, T8); + { + V Tg, Ti, Ta, Tc, Th, Tb; + Tg = VADD(Te, Tf); + Ti = VMUL(LDK(KP866025403), VSUB(Te, Tf)); + Ta = VADD(T6, T9); + Tc = VMUL(LDK(KP866025403), VSUB(T6, T9)); + Th = VFNMS(LDK(KP500000000), Tg, Td); + ST(&(xo[0]), VADD(Td, Tg), ovs, &(xo[0])); + Tb = VFNMS(LDK(KP500000000), Ta, T3); + ST(&(xo[WS(os, 3)]), VADD(T3, Ta), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VFMAI(Ti, Th), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFNMSI(Ti, Th), ovs, &(xo[0])); + ST(&(xo[WS(os, 5)]), VFNMSI(Tc, Tb), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFMAI(Tc, Tb), ovs, &(xo[WS(os, 1)])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 6, XSIMD_STRING("n1bv_6"), {12, 2, 6, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_6) (planner *p) { + X(kdft_register) (p, n1bv_6, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 6 -name n1bv_6 -include n1b.h */ + +/* + * This function contains 18 FP additions, 4 FP multiplications, + * (or, 16 additions, 2 multiplications, 2 fused multiply/add), + * 19 stack variables, 2 constants, and 12 memory accesses + */ +#include "n1b.h" + +static void n1bv_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(12, is), MAKE_VOLATILE_STRIDE(12, os)) { + V Ta, Td, T3, Te, T6, Tf, Tb, Tg, T8, T9; + T8 = LD(&(xi[0]), ivs, &(xi[0])); + T9 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Ta = VSUB(T8, T9); + Td = VADD(T8, T9); + { + V T1, T2, T4, T5; + T1 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + Te = VADD(T1, T2); + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + Tf = VADD(T4, T5); + } + Tb = VADD(T3, T6); + Tg = VADD(Te, Tf); + ST(&(xo[WS(os, 3)]), VADD(Ta, Tb), ovs, &(xo[WS(os, 1)])); + ST(&(xo[0]), VADD(Td, Tg), ovs, &(xo[0])); + { + V T7, Tc, Th, Ti; + T7 = VBYI(VMUL(LDK(KP866025403), VSUB(T3, T6))); + Tc = VFNMS(LDK(KP500000000), Tb, Ta); + ST(&(xo[WS(os, 1)]), VADD(T7, Tc), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VSUB(Tc, T7), ovs, &(xo[WS(os, 1)])); + Th = VFNMS(LDK(KP500000000), Tg, Td); + Ti = VBYI(VMUL(LDK(KP866025403), VSUB(Te, Tf))); + ST(&(xo[WS(os, 2)]), VSUB(Th, Ti), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VADD(Ti, Th), ovs, &(xo[0])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 6, XSIMD_STRING("n1bv_6"), {16, 2, 2, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_6) (planner *p) { + X(kdft_register) (p, n1bv_6, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1568 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:53 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 64 -name n1bv_64 -include n1b.h */ + +/* + * This function contains 456 FP additions, 258 FP multiplications, + * (or, 198 additions, 0 multiplications, 258 fused multiply/add), + * 168 stack variables, 15 constants, and 128 memory accesses + */ +#include "n1b.h" + +static void n1bv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP820678790, +0.820678790828660330972281985331011598767386482); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP534511135, +0.534511135950791641089685961295362908582039528); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP098491403, +0.098491403357164253077197521291327432293052451); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP303346683, +0.303346683607342391675883946941299872384187453); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(128, is), MAKE_VOLATILE_STRIDE(128, os)) { + V T5T, T5S, T5X, T65, T5Z, T5R, T67, T63, T5U, T64; + { + V T7, T26, T5k, T6A, T47, T69, T2V, T3z, T6B, T4e, T6a, T5n, T3M, T2Y, T27; + V Tm, T3A, T3i, T29, TC, T5p, T4o, T6D, T6e, T3l, T3B, TR, T2a, T4x, T5q; + V T6h, T6E, T39, T3H, T3I, T3c, T5N, T57, T72, T6w, T5O, T5e, T71, T6t, T2y; + V T1W, T2x, T1N, T33, T34, T3E, T32, T1p, T2v, T1g, T2u, T4M, T5K, T6p, T6Z; + V T6m, T6Y, T5L, T4T; + { + V T4g, T4l, T3g, Tu, Tx, T4h, TA, T4i; + { + V T1, T2, T23, T24, T4, T5, T20, T21; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 32)]), ivs, &(xi[0])); + T23 = LD(&(xi[WS(is, 56)]), ivs, &(xi[0])); + T24 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 48)]), ivs, &(xi[0])); + T20 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T21 = LD(&(xi[WS(is, 40)]), ivs, &(xi[0])); + { + V Ta, T48, Tk, T4c, T49, Td, Tf, Tg; + { + V T8, T43, T3, T45, T25, T5i, T6, T44, T22, T9, Ti, Tj, Tb, Tc; + T8 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T43 = VSUB(T1, T2); + T3 = VADD(T1, T2); + T45 = VSUB(T23, T24); + T25 = VADD(T23, T24); + T5i = VSUB(T4, T5); + T6 = VADD(T4, T5); + T44 = VSUB(T20, T21); + T22 = VADD(T20, T21); + T9 = LD(&(xi[WS(is, 36)]), ivs, &(xi[0])); + Ti = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Tj = LD(&(xi[WS(is, 44)]), ivs, &(xi[0])); + Tb = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 52)]), ivs, &(xi[0])); + { + V T2T, T46, T5j, T2U; + T7 = VSUB(T3, T6); + T2T = VADD(T3, T6); + T46 = VADD(T44, T45); + T5j = VSUB(T44, T45); + T26 = VSUB(T22, T25); + T2U = VADD(T22, T25); + Ta = VADD(T8, T9); + T48 = VSUB(T8, T9); + Tk = VADD(Ti, Tj); + T4c = VSUB(Tj, Ti); + T5k = VFMA(LDK(KP707106781), T5j, T5i); + T6A = VFNMS(LDK(KP707106781), T5j, T5i); + T47 = VFMA(LDK(KP707106781), T46, T43); + T69 = VFNMS(LDK(KP707106781), T46, T43); + T2V = VADD(T2T, T2U); + T3z = VSUB(T2T, T2U); + T49 = VSUB(Tb, Tc); + Td = VADD(Tb, Tc); + } + Tf = LD(&(xi[WS(is, 60)]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + } + { + V Te, T2W, T5l, T4a, Tq, Tt, Tv, Tw, T5m, T4d, Tl, T2X, Ty, Tz, To; + V Tp; + To = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tp = LD(&(xi[WS(is, 34)]), ivs, &(xi[0])); + { + V Th, T4b, Tr, Ts; + Tr = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Ts = LD(&(xi[WS(is, 50)]), ivs, &(xi[0])); + Te = VSUB(Ta, Td); + T2W = VADD(Ta, Td); + T5l = VFMA(LDK(KP414213562), T48, T49); + T4a = VFNMS(LDK(KP414213562), T49, T48); + Th = VADD(Tf, Tg); + T4b = VSUB(Tf, Tg); + Tq = VADD(To, Tp); + T4g = VSUB(To, Tp); + T4l = VSUB(Tr, Ts); + Tt = VADD(Tr, Ts); + Tv = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tw = LD(&(xi[WS(is, 42)]), ivs, &(xi[0])); + T5m = VFMA(LDK(KP414213562), T4b, T4c); + T4d = VFNMS(LDK(KP414213562), T4c, T4b); + Tl = VSUB(Th, Tk); + T2X = VADD(Th, Tk); + Ty = LD(&(xi[WS(is, 58)]), ivs, &(xi[0])); + Tz = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + } + T3g = VADD(Tq, Tt); + Tu = VSUB(Tq, Tt); + Tx = VADD(Tv, Tw); + T4h = VSUB(Tv, Tw); + T6B = VSUB(T4a, T4d); + T4e = VADD(T4a, T4d); + T6a = VADD(T5l, T5m); + T5n = VSUB(T5l, T5m); + T3M = VSUB(T2W, T2X); + T2Y = VADD(T2W, T2X); + T27 = VSUB(Te, Tl); + Tm = VADD(Te, Tl); + TA = VADD(Ty, Tz); + T4i = VSUB(Ty, Tz); + } + } + } + { + V TK, T4p, T4u, T4k, T6d, T4n, T6c, TL, TN, TO, T3j, TJ, TF, TI; + { + V TD, TE, TG, TH; + TD = LD(&(xi[WS(is, 62)]), ivs, &(xi[0])); + TE = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + TG = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + TH = LD(&(xi[WS(is, 46)]), ivs, &(xi[0])); + TK = LD(&(xi[WS(is, 54)]), ivs, &(xi[0])); + { + V T3h, TB, T4j, T4m; + T3h = VADD(Tx, TA); + TB = VSUB(Tx, TA); + T4j = VADD(T4h, T4i); + T4m = VSUB(T4h, T4i); + T4p = VSUB(TD, TE); + TF = VADD(TD, TE); + T4u = VSUB(TH, TG); + TI = VADD(TG, TH); + T3A = VSUB(T3g, T3h); + T3i = VADD(T3g, T3h); + T29 = VFMA(LDK(KP414213562), Tu, TB); + TC = VFNMS(LDK(KP414213562), TB, Tu); + T4k = VFMA(LDK(KP707106781), T4j, T4g); + T6d = VFNMS(LDK(KP707106781), T4j, T4g); + T4n = VFMA(LDK(KP707106781), T4m, T4l); + T6c = VFNMS(LDK(KP707106781), T4m, T4l); + TL = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + } + TN = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + TO = LD(&(xi[WS(is, 38)]), ivs, &(xi[0])); + } + T3j = VADD(TF, TI); + TJ = VSUB(TF, TI); + { + V T3a, T1E, T52, T5b, T1x, T4Z, T6r, T6u, T5a, T1U, T55, T5c, T1L, T3b; + { + V T4V, T1t, T58, T1w, T1Q, T1T, T1I, T4Y, T59, T1J, T53, T1H; + { + V T1r, TM, T4r, TP, T4q, T1s, T1u, T1v; + T1r = LD(&(xi[WS(is, 63)]), ivs, &(xi[WS(is, 1)])); + T5p = VFMA(LDK(KP198912367), T4k, T4n); + T4o = VFNMS(LDK(KP198912367), T4n, T4k); + T6D = VFMA(LDK(KP668178637), T6c, T6d); + T6e = VFNMS(LDK(KP668178637), T6d, T6c); + TM = VADD(TK, TL); + T4r = VSUB(TK, TL); + TP = VADD(TN, TO); + T4q = VSUB(TN, TO); + T1s = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + T1u = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T1v = LD(&(xi[WS(is, 47)]), ivs, &(xi[WS(is, 1)])); + { + V T1R, T4X, T6g, T4t, T6f, T4w, T1S, T1O, T1P; + T1O = LD(&(xi[WS(is, 55)]), ivs, &(xi[WS(is, 1)])); + T1P = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + T1R = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + { + V T3k, TQ, T4s, T4v; + T3k = VADD(TP, TM); + TQ = VSUB(TM, TP); + T4s = VADD(T4q, T4r); + T4v = VSUB(T4r, T4q); + T4V = VSUB(T1r, T1s); + T1t = VADD(T1r, T1s); + T58 = VSUB(T1v, T1u); + T1w = VADD(T1u, T1v); + T4X = VSUB(T1O, T1P); + T1Q = VADD(T1O, T1P); + T3l = VADD(T3j, T3k); + T3B = VSUB(T3j, T3k); + TR = VFNMS(LDK(KP414213562), TQ, TJ); + T2a = VFMA(LDK(KP414213562), TJ, TQ); + T6g = VFNMS(LDK(KP707106781), T4s, T4p); + T4t = VFMA(LDK(KP707106781), T4s, T4p); + T6f = VFNMS(LDK(KP707106781), T4v, T4u); + T4w = VFMA(LDK(KP707106781), T4v, T4u); + T1S = LD(&(xi[WS(is, 39)]), ivs, &(xi[WS(is, 1)])); + } + { + V T4W, T1A, T50, T51, T1D, T1F, T1G; + { + V T1y, T1z, T1B, T1C; + T1y = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T1z = LD(&(xi[WS(is, 35)]), ivs, &(xi[WS(is, 1)])); + T1B = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + T1C = LD(&(xi[WS(is, 51)]), ivs, &(xi[WS(is, 1)])); + T4x = VFNMS(LDK(KP198912367), T4w, T4t); + T5q = VFMA(LDK(KP198912367), T4t, T4w); + T6h = VFNMS(LDK(KP668178637), T6g, T6f); + T6E = VFMA(LDK(KP668178637), T6f, T6g); + T4W = VSUB(T1R, T1S); + T1T = VADD(T1R, T1S); + T1A = VADD(T1y, T1z); + T50 = VSUB(T1y, T1z); + T51 = VSUB(T1C, T1B); + T1D = VADD(T1B, T1C); + } + T1F = LD(&(xi[WS(is, 59)]), ivs, &(xi[WS(is, 1)])); + T1G = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + T1I = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + T4Y = VADD(T4W, T4X); + T59 = VSUB(T4X, T4W); + T1J = LD(&(xi[WS(is, 43)]), ivs, &(xi[WS(is, 1)])); + T3a = VADD(T1A, T1D); + T1E = VSUB(T1A, T1D); + T52 = VFMA(LDK(KP414213562), T51, T50); + T5b = VFNMS(LDK(KP414213562), T50, T51); + T53 = VSUB(T1F, T1G); + T1H = VADD(T1F, T1G); + } + } + } + { + V T37, T54, T1K, T38; + T1x = VSUB(T1t, T1w); + T37 = VADD(T1t, T1w); + T4Z = VFMA(LDK(KP707106781), T4Y, T4V); + T6r = VFNMS(LDK(KP707106781), T4Y, T4V); + T54 = VSUB(T1J, T1I); + T1K = VADD(T1I, T1J); + T6u = VFNMS(LDK(KP707106781), T59, T58); + T5a = VFMA(LDK(KP707106781), T59, T58); + T38 = VADD(T1T, T1Q); + T1U = VSUB(T1Q, T1T); + T55 = VFNMS(LDK(KP414213562), T54, T53); + T5c = VFMA(LDK(KP414213562), T53, T54); + T1L = VSUB(T1H, T1K); + T3b = VADD(T1H, T1K); + T39 = VADD(T37, T38); + T3H = VSUB(T37, T38); + } + } + { + V T4A, TW, T4N, TZ, T1j, T1m, T4O, T4D, T13, T4F, T16, T4G, T1a, T4I, T4J; + V T1d; + { + V TU, TV, TX, TY, T56, T6v; + TU = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T56 = VADD(T52, T55); + T6v = VSUB(T55, T52); + { + V T5d, T6s, T1V, T1M; + T5d = VADD(T5b, T5c); + T6s = VSUB(T5c, T5b); + T1V = VSUB(T1L, T1E); + T1M = VADD(T1E, T1L); + T3I = VSUB(T3b, T3a); + T3c = VADD(T3a, T3b); + T5N = VFNMS(LDK(KP923879532), T56, T4Z); + T57 = VFMA(LDK(KP923879532), T56, T4Z); + T72 = VFNMS(LDK(KP923879532), T6v, T6u); + T6w = VFMA(LDK(KP923879532), T6v, T6u); + T5O = VFNMS(LDK(KP923879532), T5d, T5a); + T5e = VFMA(LDK(KP923879532), T5d, T5a); + T71 = VFMA(LDK(KP923879532), T6s, T6r); + T6t = VFNMS(LDK(KP923879532), T6s, T6r); + T2y = VFNMS(LDK(KP707106781), T1V, T1U); + T1W = VFMA(LDK(KP707106781), T1V, T1U); + T2x = VFNMS(LDK(KP707106781), T1M, T1x); + T1N = VFMA(LDK(KP707106781), T1M, T1x); + TV = LD(&(xi[WS(is, 33)]), ivs, &(xi[WS(is, 1)])); + } + TX = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + TY = LD(&(xi[WS(is, 49)]), ivs, &(xi[WS(is, 1)])); + { + V T1h, T1i, T1k, T1l; + T1h = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T1i = LD(&(xi[WS(is, 41)]), ivs, &(xi[WS(is, 1)])); + T1k = LD(&(xi[WS(is, 57)]), ivs, &(xi[WS(is, 1)])); + T1l = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + { + V T11, T4B, T4C, T12, T14, T15; + T11 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T4A = VSUB(TU, TV); + TW = VADD(TU, TV); + T4N = VSUB(TX, TY); + TZ = VADD(TX, TY); + T1j = VADD(T1h, T1i); + T4B = VSUB(T1h, T1i); + T1m = VADD(T1k, T1l); + T4C = VSUB(T1k, T1l); + T12 = LD(&(xi[WS(is, 37)]), ivs, &(xi[WS(is, 1)])); + T14 = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + T15 = LD(&(xi[WS(is, 53)]), ivs, &(xi[WS(is, 1)])); + { + V T18, T19, T1b, T1c; + T18 = LD(&(xi[WS(is, 61)]), ivs, &(xi[WS(is, 1)])); + T19 = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + T1b = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T1c = LD(&(xi[WS(is, 45)]), ivs, &(xi[WS(is, 1)])); + T4O = VSUB(T4B, T4C); + T4D = VADD(T4B, T4C); + T13 = VADD(T11, T12); + T4F = VSUB(T11, T12); + T16 = VADD(T14, T15); + T4G = VSUB(T14, T15); + T1a = VADD(T18, T19); + T4I = VSUB(T18, T19); + T4J = VSUB(T1b, T1c); + T1d = VADD(T1b, T1c); + } + } + } + } + { + V T30, T10, T6k, T4E, T4Q, T4H, T17, T6n, T4P, T1e, T4K, T4R, T1n, T31; + T30 = VADD(TW, TZ); + T10 = VSUB(TW, TZ); + T6k = VFNMS(LDK(KP707106781), T4D, T4A); + T4E = VFMA(LDK(KP707106781), T4D, T4A); + T4Q = VFMA(LDK(KP414213562), T4F, T4G); + T4H = VFNMS(LDK(KP414213562), T4G, T4F); + T33 = VADD(T13, T16); + T17 = VSUB(T13, T16); + T6n = VFNMS(LDK(KP707106781), T4O, T4N); + T4P = VFMA(LDK(KP707106781), T4O, T4N); + T34 = VADD(T1a, T1d); + T1e = VSUB(T1a, T1d); + T4K = VFMA(LDK(KP414213562), T4J, T4I); + T4R = VFNMS(LDK(KP414213562), T4I, T4J); + T1n = VSUB(T1j, T1m); + T31 = VADD(T1j, T1m); + { + V T1f, T1o, T6o, T4L, T4S, T6l; + T1f = VADD(T17, T1e); + T1o = VSUB(T17, T1e); + T6o = VSUB(T4H, T4K); + T4L = VADD(T4H, T4K); + T4S = VADD(T4Q, T4R); + T6l = VSUB(T4Q, T4R); + T3E = VSUB(T30, T31); + T32 = VADD(T30, T31); + T1p = VFMA(LDK(KP707106781), T1o, T1n); + T2v = VFNMS(LDK(KP707106781), T1o, T1n); + T1g = VFMA(LDK(KP707106781), T1f, T10); + T2u = VFNMS(LDK(KP707106781), T1f, T10); + T4M = VFMA(LDK(KP923879532), T4L, T4E); + T5K = VFNMS(LDK(KP923879532), T4L, T4E); + T6p = VFMA(LDK(KP923879532), T6o, T6n); + T6Z = VFNMS(LDK(KP923879532), T6o, T6n); + T6m = VFNMS(LDK(KP923879532), T6l, T6k); + T6Y = VFMA(LDK(KP923879532), T6l, T6k); + T5L = VFNMS(LDK(KP923879532), T4S, T4P); + T4T = VFMA(LDK(KP923879532), T4S, T4P); + } + } + } + } + } + } + { + V T6b, T6F, T7f, T6X, T70, T79, T7a, T73, T6C, T76, T77, T6i; + { + V T2Z, T3r, T3s, T3m, T3d, T3v; + T2Z = VSUB(T2V, T2Y); + T3r = VADD(T2V, T2Y); + T3s = VADD(T3i, T3l); + T3m = VSUB(T3i, T3l); + T3d = VSUB(T39, T3c); + T3v = VADD(T39, T3c); + { + V T3x, T3t, T3Q, T3J, T3D, T3V, T3G, T3P, T3u, T36, T3O, T3Y, T6V, T6W; + { + V T3N, T3C, T3F, T35; + T3N = VSUB(T3A, T3B); + T3C = VADD(T3A, T3B); + T3F = VSUB(T33, T34); + T35 = VADD(T33, T34); + T3x = VADD(T3r, T3s); + T3t = VSUB(T3r, T3s); + T3Q = VFMA(LDK(KP414213562), T3H, T3I); + T3J = VFNMS(LDK(KP414213562), T3I, T3H); + T3D = VFMA(LDK(KP707106781), T3C, T3z); + T3V = VFNMS(LDK(KP707106781), T3C, T3z); + T3G = VFNMS(LDK(KP414213562), T3F, T3E); + T3P = VFMA(LDK(KP414213562), T3E, T3F); + T3u = VADD(T32, T35); + T36 = VSUB(T32, T35); + T3O = VFMA(LDK(KP707106781), T3N, T3M); + T3Y = VFNMS(LDK(KP707106781), T3N, T3M); + } + T6b = VFNMS(LDK(KP923879532), T6a, T69); + T6V = VFMA(LDK(KP923879532), T6a, T69); + T6W = VADD(T6D, T6E); + T6F = VSUB(T6D, T6E); + { + V T3R, T3W, T3K, T3Z; + T3R = VSUB(T3P, T3Q); + T3W = VADD(T3P, T3Q); + T3K = VADD(T3G, T3J); + T3Z = VSUB(T3G, T3J); + { + V T3e, T3n, T3w, T3y; + T3e = VADD(T36, T3d); + T3n = VSUB(T36, T3d); + T3w = VSUB(T3u, T3v); + T3y = VADD(T3u, T3v); + { + V T41, T3X, T3S, T3U; + T41 = VFMA(LDK(KP923879532), T3W, T3V); + T3X = VFNMS(LDK(KP923879532), T3W, T3V); + T3S = VFNMS(LDK(KP923879532), T3R, T3O); + T3U = VFMA(LDK(KP923879532), T3R, T3O); + { + V T42, T40, T3L, T3T; + T42 = VFNMS(LDK(KP923879532), T3Z, T3Y); + T40 = VFMA(LDK(KP923879532), T3Z, T3Y); + T3L = VFNMS(LDK(KP923879532), T3K, T3D); + T3T = VFMA(LDK(KP923879532), T3K, T3D); + { + V T3o, T3q, T3f, T3p; + T3o = VFNMS(LDK(KP707106781), T3n, T3m); + T3q = VFMA(LDK(KP707106781), T3n, T3m); + T3f = VFNMS(LDK(KP707106781), T3e, T2Z); + T3p = VFMA(LDK(KP707106781), T3e, T2Z); + ST(&(xo[WS(os, 32)]), VSUB(T3x, T3y), ovs, &(xo[0])); + ST(&(xo[0]), VADD(T3x, T3y), ovs, &(xo[0])); + ST(&(xo[WS(os, 16)]), VFMAI(T3w, T3t), ovs, &(xo[0])); + ST(&(xo[WS(os, 48)]), VFNMSI(T3w, T3t), ovs, &(xo[0])); + ST(&(xo[WS(os, 44)]), VFNMSI(T40, T3X), ovs, &(xo[0])); + ST(&(xo[WS(os, 20)]), VFMAI(T40, T3X), ovs, &(xo[0])); + ST(&(xo[WS(os, 52)]), VFMAI(T42, T41), ovs, &(xo[0])); + ST(&(xo[WS(os, 12)]), VFNMSI(T42, T41), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VFMAI(T3U, T3T), ovs, &(xo[0])); + ST(&(xo[WS(os, 60)]), VFNMSI(T3U, T3T), ovs, &(xo[0])); + ST(&(xo[WS(os, 36)]), VFMAI(T3S, T3L), ovs, &(xo[0])); + ST(&(xo[WS(os, 28)]), VFNMSI(T3S, T3L), ovs, &(xo[0])); + ST(&(xo[WS(os, 56)]), VFNMSI(T3q, T3p), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VFMAI(T3q, T3p), ovs, &(xo[0])); + ST(&(xo[WS(os, 40)]), VFMAI(T3o, T3f), ovs, &(xo[0])); + ST(&(xo[WS(os, 24)]), VFNMSI(T3o, T3f), ovs, &(xo[0])); + T7f = VFNMS(LDK(KP831469612), T6W, T6V); + T6X = VFMA(LDK(KP831469612), T6W, T6V); + } + } + } + } + } + T70 = VFMA(LDK(KP303346683), T6Z, T6Y); + T79 = VFNMS(LDK(KP303346683), T6Y, T6Z); + T7a = VFNMS(LDK(KP303346683), T71, T72); + T73 = VFMA(LDK(KP303346683), T72, T71); + T6C = VFMA(LDK(KP923879532), T6B, T6A); + T76 = VFNMS(LDK(KP923879532), T6B, T6A); + T77 = VSUB(T6e, T6h); + T6i = VADD(T6e, T6h); + } + } + { + V T2r, T2D, T2C, T2s, T5H, T5o, T5v, T5D, T5r, T5I, T5x, T5h, T5F, T5B; + { + V TT, T2f, T2n, T1Y, T28, T2b, T2l, T2p, T2j, T2k; + { + V T1q, T2d, T7h, T7l, T2e, T1X, T75, T7d, T7m, T7k, T7c, T7e, Tn, TS; + T2r = VFNMS(LDK(KP707106781), Tm, T7); + Tn = VFMA(LDK(KP707106781), Tm, T7); + TS = VADD(TC, TR); + T2D = VSUB(TC, TR); + { + V T7b, T7j, T74, T7i, T78, T7g; + T1q = VFNMS(LDK(KP198912367), T1p, T1g); + T2d = VFMA(LDK(KP198912367), T1g, T1p); + T7g = VADD(T79, T7a); + T7b = VSUB(T79, T7a); + T7j = VSUB(T70, T73); + T74 = VADD(T70, T73); + T7i = VFNMS(LDK(KP831469612), T77, T76); + T78 = VFMA(LDK(KP831469612), T77, T76); + T2j = VFNMS(LDK(KP923879532), TS, Tn); + TT = VFMA(LDK(KP923879532), TS, Tn); + T7h = VFMA(LDK(KP956940335), T7g, T7f); + T7l = VFNMS(LDK(KP956940335), T7g, T7f); + T2e = VFMA(LDK(KP198912367), T1N, T1W); + T1X = VFNMS(LDK(KP198912367), T1W, T1N); + T75 = VFNMS(LDK(KP956940335), T74, T6X); + T7d = VFMA(LDK(KP956940335), T74, T6X); + T7m = VFMA(LDK(KP956940335), T7j, T7i); + T7k = VFNMS(LDK(KP956940335), T7j, T7i); + T7c = VFNMS(LDK(KP956940335), T7b, T78); + T7e = VFMA(LDK(KP956940335), T7b, T78); + } + T2k = VADD(T2d, T2e); + T2f = VSUB(T2d, T2e); + ST(&(xo[WS(os, 45)]), VFMAI(T7k, T7h), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 19)]), VFNMSI(T7k, T7h), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 51)]), VFNMSI(T7m, T7l), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 13)]), VFMAI(T7m, T7l), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 61)]), VFMAI(T7e, T7d), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VFNMSI(T7e, T7d), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 29)]), VFMAI(T7c, T75), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 35)]), VFNMSI(T7c, T75), ovs, &(xo[WS(os, 1)])); + T2n = VSUB(T1q, T1X); + T1Y = VADD(T1q, T1X); + T2C = VFNMS(LDK(KP707106781), T27, T26); + T28 = VFMA(LDK(KP707106781), T27, T26); + T2b = VSUB(T29, T2a); + T2s = VADD(T29, T2a); + } + T2l = VFNMS(LDK(KP980785280), T2k, T2j); + T2p = VFMA(LDK(KP980785280), T2k, T2j); + { + V T5z, T4z, T5A, T5g; + { + V T4f, T4y, T1Z, T2h, T4U, T5t, T2m, T2c, T5u, T5f; + T5H = VFNMS(LDK(KP923879532), T4e, T47); + T4f = VFMA(LDK(KP923879532), T4e, T47); + T4y = VADD(T4o, T4x); + T5T = VSUB(T4o, T4x); + T1Z = VFNMS(LDK(KP980785280), T1Y, TT); + T2h = VFMA(LDK(KP980785280), T1Y, TT); + T4U = VFNMS(LDK(KP098491403), T4T, T4M); + T5t = VFMA(LDK(KP098491403), T4M, T4T); + T2m = VFNMS(LDK(KP923879532), T2b, T28); + T2c = VFMA(LDK(KP923879532), T2b, T28); + T5u = VFMA(LDK(KP098491403), T57, T5e); + T5f = VFNMS(LDK(KP098491403), T5e, T57); + T5z = VFNMS(LDK(KP980785280), T4y, T4f); + T4z = VFMA(LDK(KP980785280), T4y, T4f); + T5S = VFNMS(LDK(KP923879532), T5n, T5k); + T5o = VFMA(LDK(KP923879532), T5n, T5k); + { + V T2o, T2q, T2i, T2g; + T2o = VFMA(LDK(KP980785280), T2n, T2m); + T2q = VFNMS(LDK(KP980785280), T2n, T2m); + T2i = VFMA(LDK(KP980785280), T2f, T2c); + T2g = VFNMS(LDK(KP980785280), T2f, T2c); + T5A = VADD(T5t, T5u); + T5v = VSUB(T5t, T5u); + T5D = VSUB(T4U, T5f); + T5g = VADD(T4U, T5f); + ST(&(xo[WS(os, 46)]), VFNMSI(T2o, T2l), ovs, &(xo[0])); + ST(&(xo[WS(os, 18)]), VFMAI(T2o, T2l), ovs, &(xo[0])); + ST(&(xo[WS(os, 50)]), VFMAI(T2q, T2p), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VFNMSI(T2q, T2p), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFMAI(T2i, T2h), ovs, &(xo[0])); + ST(&(xo[WS(os, 62)]), VFNMSI(T2i, T2h), ovs, &(xo[0])); + ST(&(xo[WS(os, 34)]), VFMAI(T2g, T1Z), ovs, &(xo[0])); + ST(&(xo[WS(os, 30)]), VFNMSI(T2g, T1Z), ovs, &(xo[0])); + T5r = VSUB(T5p, T5q); + T5I = VADD(T5p, T5q); + } + } + T5x = VFMA(LDK(KP995184726), T5g, T4z); + T5h = VFNMS(LDK(KP995184726), T5g, T4z); + T5F = VFMA(LDK(KP995184726), T5A, T5z); + T5B = VFNMS(LDK(KP995184726), T5A, T5z); + } + } + { + V T6J, T6R, T6L, T6z, T6T, T6P; + { + V T6N, T6j, T6O, T6y; + { + V T6q, T6H, T5C, T5s, T6I, T6x; + T6q = VFNMS(LDK(KP534511135), T6p, T6m); + T6H = VFMA(LDK(KP534511135), T6m, T6p); + T5C = VFNMS(LDK(KP980785280), T5r, T5o); + T5s = VFMA(LDK(KP980785280), T5r, T5o); + T6I = VFMA(LDK(KP534511135), T6t, T6w); + T6x = VFNMS(LDK(KP534511135), T6w, T6t); + T6N = VFMA(LDK(KP831469612), T6i, T6b); + T6j = VFNMS(LDK(KP831469612), T6i, T6b); + { + V T5E, T5G, T5y, T5w; + T5E = VFMA(LDK(KP995184726), T5D, T5C); + T5G = VFNMS(LDK(KP995184726), T5D, T5C); + T5y = VFMA(LDK(KP995184726), T5v, T5s); + T5w = VFNMS(LDK(KP995184726), T5v, T5s); + T6O = VADD(T6H, T6I); + T6J = VSUB(T6H, T6I); + T6R = VSUB(T6q, T6x); + T6y = VADD(T6q, T6x); + ST(&(xo[WS(os, 47)]), VFNMSI(T5E, T5B), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 17)]), VFMAI(T5E, T5B), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 49)]), VFMAI(T5G, T5F), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 15)]), VFNMSI(T5G, T5F), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFMAI(T5y, T5x), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 63)]), VFNMSI(T5y, T5x), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 33)]), VFMAI(T5w, T5h), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 31)]), VFNMSI(T5w, T5h), ovs, &(xo[WS(os, 1)])); + } + } + T6L = VFMA(LDK(KP881921264), T6y, T6j); + T6z = VFNMS(LDK(KP881921264), T6y, T6j); + T6T = VFMA(LDK(KP881921264), T6O, T6N); + T6P = VFNMS(LDK(KP881921264), T6O, T6N); + } + { + V T2H, T2P, T2J, T2B, T2R, T2N; + { + V T2L, T2t, T2M, T2A; + { + V T2w, T2F, T6Q, T6G, T2G, T2z; + T2w = VFMA(LDK(KP668178637), T2v, T2u); + T2F = VFNMS(LDK(KP668178637), T2u, T2v); + T6Q = VFNMS(LDK(KP831469612), T6F, T6C); + T6G = VFMA(LDK(KP831469612), T6F, T6C); + T2G = VFNMS(LDK(KP668178637), T2x, T2y); + T2z = VFMA(LDK(KP668178637), T2y, T2x); + T2L = VFNMS(LDK(KP923879532), T2s, T2r); + T2t = VFMA(LDK(KP923879532), T2s, T2r); + { + V T6S, T6U, T6M, T6K; + T6S = VFMA(LDK(KP881921264), T6R, T6Q); + T6U = VFNMS(LDK(KP881921264), T6R, T6Q); + T6M = VFMA(LDK(KP881921264), T6J, T6G); + T6K = VFNMS(LDK(KP881921264), T6J, T6G); + T2M = VADD(T2F, T2G); + T2H = VSUB(T2F, T2G); + T2P = VSUB(T2w, T2z); + T2A = VADD(T2w, T2z); + ST(&(xo[WS(os, 43)]), VFNMSI(T6S, T6P), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 21)]), VFMAI(T6S, T6P), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 53)]), VFMAI(T6U, T6T), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VFNMSI(T6U, T6T), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VFMAI(T6M, T6L), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 59)]), VFNMSI(T6M, T6L), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 37)]), VFMAI(T6K, T6z), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 27)]), VFNMSI(T6K, T6z), ovs, &(xo[WS(os, 1)])); + } + } + T2J = VFMA(LDK(KP831469612), T2A, T2t); + T2B = VFNMS(LDK(KP831469612), T2A, T2t); + T2R = VFNMS(LDK(KP831469612), T2M, T2L); + T2N = VFMA(LDK(KP831469612), T2M, T2L); + } + { + V T61, T5J, T62, T5Q; + { + V T5M, T5V, T2O, T2E, T5W, T5P; + T5M = VFMA(LDK(KP820678790), T5L, T5K); + T5V = VFNMS(LDK(KP820678790), T5K, T5L); + T2O = VFMA(LDK(KP923879532), T2D, T2C); + T2E = VFNMS(LDK(KP923879532), T2D, T2C); + T5W = VFNMS(LDK(KP820678790), T5N, T5O); + T5P = VFMA(LDK(KP820678790), T5O, T5N); + T61 = VFNMS(LDK(KP980785280), T5I, T5H); + T5J = VFMA(LDK(KP980785280), T5I, T5H); + { + V T2Q, T2S, T2K, T2I; + T2Q = VFNMS(LDK(KP831469612), T2P, T2O); + T2S = VFMA(LDK(KP831469612), T2P, T2O); + T2K = VFMA(LDK(KP831469612), T2H, T2E); + T2I = VFNMS(LDK(KP831469612), T2H, T2E); + T62 = VADD(T5V, T5W); + T5X = VSUB(T5V, T5W); + T65 = VSUB(T5M, T5P); + T5Q = VADD(T5M, T5P); + ST(&(xo[WS(os, 42)]), VFMAI(T2Q, T2N), ovs, &(xo[0])); + ST(&(xo[WS(os, 22)]), VFNMSI(T2Q, T2N), ovs, &(xo[0])); + ST(&(xo[WS(os, 54)]), VFNMSI(T2S, T2R), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VFMAI(T2S, T2R), ovs, &(xo[0])); + ST(&(xo[WS(os, 58)]), VFMAI(T2K, T2J), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VFNMSI(T2K, T2J), ovs, &(xo[0])); + ST(&(xo[WS(os, 26)]), VFMAI(T2I, T2B), ovs, &(xo[0])); + ST(&(xo[WS(os, 38)]), VFNMSI(T2I, T2B), ovs, &(xo[0])); + } + } + T5Z = VFMA(LDK(KP773010453), T5Q, T5J); + T5R = VFNMS(LDK(KP773010453), T5Q, T5J); + T67 = VFNMS(LDK(KP773010453), T62, T61); + T63 = VFMA(LDK(KP773010453), T62, T61); + } + } + } + } + } + } + T5U = VFNMS(LDK(KP980785280), T5T, T5S); + T64 = VFMA(LDK(KP980785280), T5T, T5S); + { + V T68, T66, T5Y, T60; + T68 = VFMA(LDK(KP773010453), T65, T64); + T66 = VFNMS(LDK(KP773010453), T65, T64); + T5Y = VFNMS(LDK(KP773010453), T5X, T5U); + T60 = VFMA(LDK(KP773010453), T5X, T5U); + ST(&(xo[WS(os, 41)]), VFMAI(T66, T63), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 23)]), VFNMSI(T66, T63), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 55)]), VFNMSI(T68, T67), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VFMAI(T68, T67), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 57)]), VFMAI(T60, T5Z), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFNMSI(T60, T5Z), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 25)]), VFMAI(T5Y, T5R), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 39)]), VFNMSI(T5Y, T5R), ovs, &(xo[WS(os, 1)])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 64, XSIMD_STRING("n1bv_64"), {198, 0, 258, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_64) (planner *p) { + X(kdft_register) (p, n1bv_64, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 64 -name n1bv_64 -include n1b.h */ + +/* + * This function contains 456 FP additions, 124 FP multiplications, + * (or, 404 additions, 72 multiplications, 52 fused multiply/add), + * 108 stack variables, 15 constants, and 128 memory accesses + */ +#include "n1b.h" + +static void n1bv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP634393284, +0.634393284163645498215171613225493370675687095); + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP290284677, +0.290284677254462367636192375817395274691476278); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP098017140, +0.098017140329560601994195563888641845861136673); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP471396736, +0.471396736825997648556387625905254377657460319); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(128, is), MAKE_VOLATILE_STRIDE(128, os)) { + V T4p, T5u, Tb, T3A, T2q, T3v, T6G, T78, Tq, T3w, T6B, T79, T2l, T3B, T4w; + V T5r, TI, T2g, T6u, T74, T3q, T3D, T4E, T5o, TZ, T2h, T6x, T75, T3t, T3E; + V T4L, T5p, T23, T2N, T6m, T70, T6p, T71, T2c, T2O, T3i, T3Y, T5f, T5R, T5k; + V T5S, T3l, T3Z, T1s, T2K, T6f, T6X, T6i, T6Y, T1B, T2L, T3b, T3V, T4Y, T5O; + V T53, T5P, T3e, T3W; + { + V T3, T4n, T2p, T4o, T6, T5s, T9, T5t; + { + V T1, T2, T2n, T2o; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 32)]), ivs, &(xi[0])); + T3 = VSUB(T1, T2); + T4n = VADD(T1, T2); + T2n = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T2o = LD(&(xi[WS(is, 48)]), ivs, &(xi[0])); + T2p = VSUB(T2n, T2o); + T4o = VADD(T2n, T2o); + } + { + V T4, T5, T7, T8; + T4 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 40)]), ivs, &(xi[0])); + T6 = VSUB(T4, T5); + T5s = VADD(T4, T5); + T7 = LD(&(xi[WS(is, 56)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T9 = VSUB(T7, T8); + T5t = VADD(T7, T8); + } + T4p = VSUB(T4n, T4o); + T5u = VSUB(T5s, T5t); + { + V Ta, T2m, T6E, T6F; + Ta = VMUL(LDK(KP707106781), VADD(T6, T9)); + Tb = VSUB(T3, Ta); + T3A = VADD(T3, Ta); + T2m = VMUL(LDK(KP707106781), VSUB(T6, T9)); + T2q = VSUB(T2m, T2p); + T3v = VADD(T2p, T2m); + T6E = VADD(T4n, T4o); + T6F = VADD(T5s, T5t); + T6G = VSUB(T6E, T6F); + T78 = VADD(T6E, T6F); + } + } + { + V Te, T4q, To, T4t, Th, T4r, Tl, T4u; + { + V Tc, Td, Tm, Tn; + Tc = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 36)]), ivs, &(xi[0])); + Te = VSUB(Tc, Td); + T4q = VADD(Tc, Td); + Tm = LD(&(xi[WS(is, 60)]), ivs, &(xi[0])); + Tn = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + To = VSUB(Tm, Tn); + T4t = VADD(Tm, Tn); + } + { + V Tf, Tg, Tj, Tk; + Tf = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 52)]), ivs, &(xi[0])); + Th = VSUB(Tf, Tg); + T4r = VADD(Tf, Tg); + Tj = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 44)]), ivs, &(xi[0])); + Tl = VSUB(Tj, Tk); + T4u = VADD(Tj, Tk); + } + { + V Ti, Tp, T6z, T6A; + Ti = VFMA(LDK(KP382683432), Te, VMUL(LDK(KP923879532), Th)); + Tp = VFNMS(LDK(KP382683432), To, VMUL(LDK(KP923879532), Tl)); + Tq = VSUB(Ti, Tp); + T3w = VADD(Ti, Tp); + T6z = VADD(T4q, T4r); + T6A = VADD(T4t, T4u); + T6B = VSUB(T6z, T6A); + T79 = VADD(T6z, T6A); + } + { + V T2j, T2k, T4s, T4v; + T2j = VFNMS(LDK(KP382683432), Th, VMUL(LDK(KP923879532), Te)); + T2k = VFMA(LDK(KP923879532), To, VMUL(LDK(KP382683432), Tl)); + T2l = VSUB(T2j, T2k); + T3B = VADD(T2j, T2k); + T4s = VSUB(T4q, T4r); + T4v = VSUB(T4t, T4u); + T4w = VMUL(LDK(KP707106781), VADD(T4s, T4v)); + T5r = VMUL(LDK(KP707106781), VSUB(T4s, T4v)); + } + } + { + V TB, T4z, TF, T4y, Ty, T4C, TG, T4B; + { + V Tz, TA, TD, TE; + Tz = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + TA = LD(&(xi[WS(is, 50)]), ivs, &(xi[0])); + TB = VSUB(Tz, TA); + T4z = VADD(Tz, TA); + TD = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + TE = LD(&(xi[WS(is, 34)]), ivs, &(xi[0])); + TF = VSUB(TD, TE); + T4y = VADD(TD, TE); + { + V Ts, Tt, Tu, Tv, Tw, Tx; + Ts = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tt = LD(&(xi[WS(is, 42)]), ivs, &(xi[0])); + Tu = VSUB(Ts, Tt); + Tv = LD(&(xi[WS(is, 58)]), ivs, &(xi[0])); + Tw = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + Tx = VSUB(Tv, Tw); + Ty = VMUL(LDK(KP707106781), VSUB(Tu, Tx)); + T4C = VADD(Tv, Tw); + TG = VMUL(LDK(KP707106781), VADD(Tu, Tx)); + T4B = VADD(Ts, Tt); + } + } + { + V TC, TH, T6s, T6t; + TC = VSUB(Ty, TB); + TH = VSUB(TF, TG); + TI = VFMA(LDK(KP831469612), TC, VMUL(LDK(KP555570233), TH)); + T2g = VFNMS(LDK(KP555570233), TC, VMUL(LDK(KP831469612), TH)); + T6s = VADD(T4y, T4z); + T6t = VADD(T4B, T4C); + T6u = VSUB(T6s, T6t); + T74 = VADD(T6s, T6t); + } + { + V T3o, T3p, T4A, T4D; + T3o = VADD(TB, Ty); + T3p = VADD(TF, TG); + T3q = VFMA(LDK(KP980785280), T3o, VMUL(LDK(KP195090322), T3p)); + T3D = VFNMS(LDK(KP195090322), T3o, VMUL(LDK(KP980785280), T3p)); + T4A = VSUB(T4y, T4z); + T4D = VSUB(T4B, T4C); + T4E = VFMA(LDK(KP382683432), T4A, VMUL(LDK(KP923879532), T4D)); + T5o = VFNMS(LDK(KP382683432), T4D, VMUL(LDK(KP923879532), T4A)); + } + } + { + V TS, T4J, TW, T4I, TP, T4G, TX, T4F; + { + V TQ, TR, TU, TV; + TQ = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + TR = LD(&(xi[WS(is, 46)]), ivs, &(xi[0])); + TS = VSUB(TQ, TR); + T4J = VADD(TQ, TR); + TU = LD(&(xi[WS(is, 62)]), ivs, &(xi[0])); + TV = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + TW = VSUB(TU, TV); + T4I = VADD(TU, TV); + { + V TJ, TK, TL, TM, TN, TO; + TJ = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + TK = LD(&(xi[WS(is, 38)]), ivs, &(xi[0])); + TL = VSUB(TJ, TK); + TM = LD(&(xi[WS(is, 54)]), ivs, &(xi[0])); + TN = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + TO = VSUB(TM, TN); + TP = VMUL(LDK(KP707106781), VSUB(TL, TO)); + T4G = VADD(TM, TN); + TX = VMUL(LDK(KP707106781), VADD(TL, TO)); + T4F = VADD(TJ, TK); + } + } + { + V TT, TY, T6v, T6w; + TT = VSUB(TP, TS); + TY = VSUB(TW, TX); + TZ = VFNMS(LDK(KP555570233), TY, VMUL(LDK(KP831469612), TT)); + T2h = VFMA(LDK(KP555570233), TT, VMUL(LDK(KP831469612), TY)); + T6v = VADD(T4I, T4J); + T6w = VADD(T4F, T4G); + T6x = VSUB(T6v, T6w); + T75 = VADD(T6v, T6w); + } + { + V T3r, T3s, T4H, T4K; + T3r = VADD(TS, TP); + T3s = VADD(TW, TX); + T3t = VFNMS(LDK(KP195090322), T3s, VMUL(LDK(KP980785280), T3r)); + T3E = VFMA(LDK(KP195090322), T3r, VMUL(LDK(KP980785280), T3s)); + T4H = VSUB(T4F, T4G); + T4K = VSUB(T4I, T4J); + T4L = VFNMS(LDK(KP382683432), T4K, VMUL(LDK(KP923879532), T4H)); + T5p = VFMA(LDK(KP923879532), T4K, VMUL(LDK(KP382683432), T4H)); + } + } + { + V T21, T5h, T26, T5g, T1Y, T5d, T27, T5c, T55, T56, T1J, T57, T29, T58, T59; + V T1Q, T5a, T2a; + { + V T1Z, T20, T24, T25; + T1Z = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T20 = LD(&(xi[WS(is, 47)]), ivs, &(xi[WS(is, 1)])); + T21 = VSUB(T1Z, T20); + T5h = VADD(T1Z, T20); + T24 = LD(&(xi[WS(is, 63)]), ivs, &(xi[WS(is, 1)])); + T25 = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + T26 = VSUB(T24, T25); + T5g = VADD(T24, T25); + } + { + V T1S, T1T, T1U, T1V, T1W, T1X; + T1S = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T1T = LD(&(xi[WS(is, 39)]), ivs, &(xi[WS(is, 1)])); + T1U = VSUB(T1S, T1T); + T1V = LD(&(xi[WS(is, 55)]), ivs, &(xi[WS(is, 1)])); + T1W = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + T1X = VSUB(T1V, T1W); + T1Y = VMUL(LDK(KP707106781), VSUB(T1U, T1X)); + T5d = VADD(T1V, T1W); + T27 = VMUL(LDK(KP707106781), VADD(T1U, T1X)); + T5c = VADD(T1S, T1T); + } + { + V T1F, T1I, T1M, T1P; + { + V T1D, T1E, T1G, T1H; + T1D = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T1E = LD(&(xi[WS(is, 35)]), ivs, &(xi[WS(is, 1)])); + T1F = VSUB(T1D, T1E); + T55 = VADD(T1D, T1E); + T1G = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + T1H = LD(&(xi[WS(is, 51)]), ivs, &(xi[WS(is, 1)])); + T1I = VSUB(T1G, T1H); + T56 = VADD(T1G, T1H); + } + T1J = VFNMS(LDK(KP382683432), T1I, VMUL(LDK(KP923879532), T1F)); + T57 = VSUB(T55, T56); + T29 = VFMA(LDK(KP382683432), T1F, VMUL(LDK(KP923879532), T1I)); + { + V T1K, T1L, T1N, T1O; + T1K = LD(&(xi[WS(is, 59)]), ivs, &(xi[WS(is, 1)])); + T1L = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + T1M = VSUB(T1K, T1L); + T58 = VADD(T1K, T1L); + T1N = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + T1O = LD(&(xi[WS(is, 43)]), ivs, &(xi[WS(is, 1)])); + T1P = VSUB(T1N, T1O); + T59 = VADD(T1N, T1O); + } + T1Q = VFMA(LDK(KP923879532), T1M, VMUL(LDK(KP382683432), T1P)); + T5a = VSUB(T58, T59); + T2a = VFNMS(LDK(KP382683432), T1M, VMUL(LDK(KP923879532), T1P)); + } + { + V T1R, T22, T6k, T6l; + T1R = VSUB(T1J, T1Q); + T22 = VSUB(T1Y, T21); + T23 = VSUB(T1R, T22); + T2N = VADD(T22, T1R); + T6k = VADD(T5g, T5h); + T6l = VADD(T5c, T5d); + T6m = VSUB(T6k, T6l); + T70 = VADD(T6k, T6l); + } + { + V T6n, T6o, T28, T2b; + T6n = VADD(T55, T56); + T6o = VADD(T58, T59); + T6p = VSUB(T6n, T6o); + T71 = VADD(T6n, T6o); + T28 = VSUB(T26, T27); + T2b = VSUB(T29, T2a); + T2c = VSUB(T28, T2b); + T2O = VADD(T28, T2b); + } + { + V T3g, T3h, T5b, T5e; + T3g = VADD(T26, T27); + T3h = VADD(T1J, T1Q); + T3i = VADD(T3g, T3h); + T3Y = VSUB(T3g, T3h); + T5b = VMUL(LDK(KP707106781), VSUB(T57, T5a)); + T5e = VSUB(T5c, T5d); + T5f = VSUB(T5b, T5e); + T5R = VADD(T5e, T5b); + } + { + V T5i, T5j, T3j, T3k; + T5i = VSUB(T5g, T5h); + T5j = VMUL(LDK(KP707106781), VADD(T57, T5a)); + T5k = VSUB(T5i, T5j); + T5S = VADD(T5i, T5j); + T3j = VADD(T21, T1Y); + T3k = VADD(T29, T2a); + T3l = VADD(T3j, T3k); + T3Z = VSUB(T3k, T3j); + } + } + { + V T1q, T50, T1v, T4Z, T1n, T4W, T1w, T4V, T4O, T4P, T18, T4Q, T1y, T4R, T4S; + V T1f, T4T, T1z; + { + V T1o, T1p, T1t, T1u; + T1o = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + T1p = LD(&(xi[WS(is, 49)]), ivs, &(xi[WS(is, 1)])); + T1q = VSUB(T1o, T1p); + T50 = VADD(T1o, T1p); + T1t = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T1u = LD(&(xi[WS(is, 33)]), ivs, &(xi[WS(is, 1)])); + T1v = VSUB(T1t, T1u); + T4Z = VADD(T1t, T1u); + } + { + V T1h, T1i, T1j, T1k, T1l, T1m; + T1h = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T1i = LD(&(xi[WS(is, 41)]), ivs, &(xi[WS(is, 1)])); + T1j = VSUB(T1h, T1i); + T1k = LD(&(xi[WS(is, 57)]), ivs, &(xi[WS(is, 1)])); + T1l = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + T1m = VSUB(T1k, T1l); + T1n = VMUL(LDK(KP707106781), VSUB(T1j, T1m)); + T4W = VADD(T1k, T1l); + T1w = VMUL(LDK(KP707106781), VADD(T1j, T1m)); + T4V = VADD(T1h, T1i); + } + { + V T14, T17, T1b, T1e; + { + V T12, T13, T15, T16; + T12 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T13 = LD(&(xi[WS(is, 37)]), ivs, &(xi[WS(is, 1)])); + T14 = VSUB(T12, T13); + T4O = VADD(T12, T13); + T15 = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + T16 = LD(&(xi[WS(is, 53)]), ivs, &(xi[WS(is, 1)])); + T17 = VSUB(T15, T16); + T4P = VADD(T15, T16); + } + T18 = VFNMS(LDK(KP382683432), T17, VMUL(LDK(KP923879532), T14)); + T4Q = VSUB(T4O, T4P); + T1y = VFMA(LDK(KP382683432), T14, VMUL(LDK(KP923879532), T17)); + { + V T19, T1a, T1c, T1d; + T19 = LD(&(xi[WS(is, 61)]), ivs, &(xi[WS(is, 1)])); + T1a = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + T1b = VSUB(T19, T1a); + T4R = VADD(T19, T1a); + T1c = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T1d = LD(&(xi[WS(is, 45)]), ivs, &(xi[WS(is, 1)])); + T1e = VSUB(T1c, T1d); + T4S = VADD(T1c, T1d); + } + T1f = VFMA(LDK(KP923879532), T1b, VMUL(LDK(KP382683432), T1e)); + T4T = VSUB(T4R, T4S); + T1z = VFNMS(LDK(KP382683432), T1b, VMUL(LDK(KP923879532), T1e)); + } + { + V T1g, T1r, T6d, T6e; + T1g = VSUB(T18, T1f); + T1r = VSUB(T1n, T1q); + T1s = VSUB(T1g, T1r); + T2K = VADD(T1r, T1g); + T6d = VADD(T4Z, T50); + T6e = VADD(T4V, T4W); + T6f = VSUB(T6d, T6e); + T6X = VADD(T6d, T6e); + } + { + V T6g, T6h, T1x, T1A; + T6g = VADD(T4O, T4P); + T6h = VADD(T4R, T4S); + T6i = VSUB(T6g, T6h); + T6Y = VADD(T6g, T6h); + T1x = VSUB(T1v, T1w); + T1A = VSUB(T1y, T1z); + T1B = VSUB(T1x, T1A); + T2L = VADD(T1x, T1A); + } + { + V T39, T3a, T4U, T4X; + T39 = VADD(T1v, T1w); + T3a = VADD(T18, T1f); + T3b = VADD(T39, T3a); + T3V = VSUB(T39, T3a); + T4U = VMUL(LDK(KP707106781), VSUB(T4Q, T4T)); + T4X = VSUB(T4V, T4W); + T4Y = VSUB(T4U, T4X); + T5O = VADD(T4X, T4U); + } + { + V T51, T52, T3c, T3d; + T51 = VSUB(T4Z, T50); + T52 = VMUL(LDK(KP707106781), VADD(T4Q, T4T)); + T53 = VSUB(T51, T52); + T5P = VADD(T51, T52); + T3c = VADD(T1q, T1n); + T3d = VADD(T1y, T1z); + T3e = VADD(T3c, T3d); + T3W = VSUB(T3d, T3c); + } + } + { + V T7h, T7l, T7k, T7m; + { + V T7f, T7g, T7i, T7j; + T7f = VADD(T78, T79); + T7g = VADD(T74, T75); + T7h = VSUB(T7f, T7g); + T7l = VADD(T7f, T7g); + T7i = VADD(T6X, T6Y); + T7j = VADD(T70, T71); + T7k = VBYI(VSUB(T7i, T7j)); + T7m = VADD(T7i, T7j); + } + ST(&(xo[WS(os, 48)]), VSUB(T7h, T7k), ovs, &(xo[0])); + ST(&(xo[0]), VADD(T7l, T7m), ovs, &(xo[0])); + ST(&(xo[WS(os, 16)]), VADD(T7h, T7k), ovs, &(xo[0])); + ST(&(xo[WS(os, 32)]), VSUB(T7l, T7m), ovs, &(xo[0])); + } + { + V T76, T7a, T73, T7b, T6Z, T72; + T76 = VSUB(T74, T75); + T7a = VSUB(T78, T79); + T6Z = VSUB(T6X, T6Y); + T72 = VSUB(T70, T71); + T73 = VMUL(LDK(KP707106781), VSUB(T6Z, T72)); + T7b = VMUL(LDK(KP707106781), VADD(T6Z, T72)); + { + V T77, T7c, T7d, T7e; + T77 = VBYI(VSUB(T73, T76)); + T7c = VSUB(T7a, T7b); + ST(&(xo[WS(os, 24)]), VADD(T77, T7c), ovs, &(xo[0])); + ST(&(xo[WS(os, 40)]), VSUB(T7c, T77), ovs, &(xo[0])); + T7d = VBYI(VADD(T76, T73)); + T7e = VADD(T7a, T7b); + ST(&(xo[WS(os, 8)]), VADD(T7d, T7e), ovs, &(xo[0])); + ST(&(xo[WS(os, 56)]), VSUB(T7e, T7d), ovs, &(xo[0])); + } + } + { + V T6C, T6S, T6I, T6P, T6r, T6Q, T6L, T6T, T6y, T6H; + T6y = VMUL(LDK(KP707106781), VSUB(T6u, T6x)); + T6C = VSUB(T6y, T6B); + T6S = VADD(T6B, T6y); + T6H = VMUL(LDK(KP707106781), VADD(T6u, T6x)); + T6I = VSUB(T6G, T6H); + T6P = VADD(T6G, T6H); + { + V T6j, T6q, T6J, T6K; + T6j = VFNMS(LDK(KP382683432), T6i, VMUL(LDK(KP923879532), T6f)); + T6q = VFMA(LDK(KP923879532), T6m, VMUL(LDK(KP382683432), T6p)); + T6r = VSUB(T6j, T6q); + T6Q = VADD(T6j, T6q); + T6J = VFMA(LDK(KP382683432), T6f, VMUL(LDK(KP923879532), T6i)); + T6K = VFNMS(LDK(KP382683432), T6m, VMUL(LDK(KP923879532), T6p)); + T6L = VSUB(T6J, T6K); + T6T = VADD(T6J, T6K); + } + { + V T6D, T6M, T6V, T6W; + T6D = VBYI(VSUB(T6r, T6C)); + T6M = VSUB(T6I, T6L); + ST(&(xo[WS(os, 20)]), VADD(T6D, T6M), ovs, &(xo[0])); + ST(&(xo[WS(os, 44)]), VSUB(T6M, T6D), ovs, &(xo[0])); + T6V = VSUB(T6P, T6Q); + T6W = VBYI(VSUB(T6T, T6S)); + ST(&(xo[WS(os, 36)]), VSUB(T6V, T6W), ovs, &(xo[0])); + ST(&(xo[WS(os, 28)]), VADD(T6V, T6W), ovs, &(xo[0])); + } + { + V T6N, T6O, T6R, T6U; + T6N = VBYI(VADD(T6C, T6r)); + T6O = VADD(T6I, T6L); + ST(&(xo[WS(os, 12)]), VADD(T6N, T6O), ovs, &(xo[0])); + ST(&(xo[WS(os, 52)]), VSUB(T6O, T6N), ovs, &(xo[0])); + T6R = VADD(T6P, T6Q); + T6U = VBYI(VADD(T6S, T6T)); + ST(&(xo[WS(os, 60)]), VSUB(T6R, T6U), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VADD(T6R, T6U), ovs, &(xo[0])); + } + } + { + V T5N, T68, T61, T69, T5U, T65, T5Y, T66; + { + V T5L, T5M, T5Z, T60; + T5L = VADD(T4p, T4w); + T5M = VADD(T5o, T5p); + T5N = VSUB(T5L, T5M); + T68 = VADD(T5L, T5M); + T5Z = VFNMS(LDK(KP195090322), T5O, VMUL(LDK(KP980785280), T5P)); + T60 = VFMA(LDK(KP195090322), T5R, VMUL(LDK(KP980785280), T5S)); + T61 = VSUB(T5Z, T60); + T69 = VADD(T5Z, T60); + } + { + V T5Q, T5T, T5W, T5X; + T5Q = VFMA(LDK(KP980785280), T5O, VMUL(LDK(KP195090322), T5P)); + T5T = VFNMS(LDK(KP195090322), T5S, VMUL(LDK(KP980785280), T5R)); + T5U = VSUB(T5Q, T5T); + T65 = VADD(T5Q, T5T); + T5W = VADD(T4E, T4L); + T5X = VADD(T5u, T5r); + T5Y = VSUB(T5W, T5X); + T66 = VADD(T5X, T5W); + } + { + V T5V, T62, T6b, T6c; + T5V = VADD(T5N, T5U); + T62 = VBYI(VADD(T5Y, T61)); + ST(&(xo[WS(os, 50)]), VSUB(T5V, T62), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VADD(T5V, T62), ovs, &(xo[0])); + T6b = VBYI(VADD(T66, T65)); + T6c = VADD(T68, T69); + ST(&(xo[WS(os, 2)]), VADD(T6b, T6c), ovs, &(xo[0])); + ST(&(xo[WS(os, 62)]), VSUB(T6c, T6b), ovs, &(xo[0])); + } + { + V T63, T64, T67, T6a; + T63 = VSUB(T5N, T5U); + T64 = VBYI(VSUB(T61, T5Y)); + ST(&(xo[WS(os, 46)]), VSUB(T63, T64), ovs, &(xo[0])); + ST(&(xo[WS(os, 18)]), VADD(T63, T64), ovs, &(xo[0])); + T67 = VBYI(VSUB(T65, T66)); + T6a = VSUB(T68, T69); + ST(&(xo[WS(os, 30)]), VADD(T67, T6a), ovs, &(xo[0])); + ST(&(xo[WS(os, 34)]), VSUB(T6a, T67), ovs, &(xo[0])); + } + } + { + V T11, T2C, T2v, T2D, T2e, T2z, T2s, T2A; + { + V Tr, T10, T2t, T2u; + Tr = VSUB(Tb, Tq); + T10 = VSUB(TI, TZ); + T11 = VSUB(Tr, T10); + T2C = VADD(Tr, T10); + T2t = VFNMS(LDK(KP471396736), T1s, VMUL(LDK(KP881921264), T1B)); + T2u = VFMA(LDK(KP471396736), T23, VMUL(LDK(KP881921264), T2c)); + T2v = VSUB(T2t, T2u); + T2D = VADD(T2t, T2u); + } + { + V T1C, T2d, T2i, T2r; + T1C = VFMA(LDK(KP881921264), T1s, VMUL(LDK(KP471396736), T1B)); + T2d = VFNMS(LDK(KP471396736), T2c, VMUL(LDK(KP881921264), T23)); + T2e = VSUB(T1C, T2d); + T2z = VADD(T1C, T2d); + T2i = VSUB(T2g, T2h); + T2r = VSUB(T2l, T2q); + T2s = VSUB(T2i, T2r); + T2A = VADD(T2r, T2i); + } + { + V T2f, T2w, T2F, T2G; + T2f = VADD(T11, T2e); + T2w = VBYI(VADD(T2s, T2v)); + ST(&(xo[WS(os, 53)]), VSUB(T2f, T2w), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VADD(T2f, T2w), ovs, &(xo[WS(os, 1)])); + T2F = VBYI(VADD(T2A, T2z)); + T2G = VADD(T2C, T2D); + ST(&(xo[WS(os, 5)]), VADD(T2F, T2G), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 59)]), VSUB(T2G, T2F), ovs, &(xo[WS(os, 1)])); + } + { + V T2x, T2y, T2B, T2E; + T2x = VSUB(T11, T2e); + T2y = VBYI(VSUB(T2v, T2s)); + ST(&(xo[WS(os, 43)]), VSUB(T2x, T2y), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 21)]), VADD(T2x, T2y), ovs, &(xo[WS(os, 1)])); + T2B = VBYI(VSUB(T2z, T2A)); + T2E = VSUB(T2C, T2D); + ST(&(xo[WS(os, 27)]), VADD(T2B, T2E), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 37)]), VSUB(T2E, T2B), ovs, &(xo[WS(os, 1)])); + } + } + { + V T3n, T3O, T3J, T3R, T3y, T3Q, T3G, T3N; + { + V T3f, T3m, T3H, T3I; + T3f = VFNMS(LDK(KP098017140), T3e, VMUL(LDK(KP995184726), T3b)); + T3m = VFMA(LDK(KP995184726), T3i, VMUL(LDK(KP098017140), T3l)); + T3n = VSUB(T3f, T3m); + T3O = VADD(T3f, T3m); + T3H = VFMA(LDK(KP098017140), T3b, VMUL(LDK(KP995184726), T3e)); + T3I = VFNMS(LDK(KP098017140), T3i, VMUL(LDK(KP995184726), T3l)); + T3J = VSUB(T3H, T3I); + T3R = VADD(T3H, T3I); + } + { + V T3u, T3x, T3C, T3F; + T3u = VADD(T3q, T3t); + T3x = VADD(T3v, T3w); + T3y = VSUB(T3u, T3x); + T3Q = VADD(T3x, T3u); + T3C = VADD(T3A, T3B); + T3F = VADD(T3D, T3E); + T3G = VSUB(T3C, T3F); + T3N = VADD(T3C, T3F); + } + { + V T3z, T3K, T3T, T3U; + T3z = VBYI(VSUB(T3n, T3y)); + T3K = VSUB(T3G, T3J); + ST(&(xo[WS(os, 17)]), VADD(T3z, T3K), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 47)]), VSUB(T3K, T3z), ovs, &(xo[WS(os, 1)])); + T3T = VSUB(T3N, T3O); + T3U = VBYI(VSUB(T3R, T3Q)); + ST(&(xo[WS(os, 33)]), VSUB(T3T, T3U), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 31)]), VADD(T3T, T3U), ovs, &(xo[WS(os, 1)])); + } + { + V T3L, T3M, T3P, T3S; + T3L = VBYI(VADD(T3y, T3n)); + T3M = VADD(T3G, T3J); + ST(&(xo[WS(os, 15)]), VADD(T3L, T3M), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 49)]), VSUB(T3M, T3L), ovs, &(xo[WS(os, 1)])); + T3P = VADD(T3N, T3O); + T3S = VBYI(VADD(T3Q, T3R)); + ST(&(xo[WS(os, 63)]), VSUB(T3P, T3S), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VADD(T3P, T3S), ovs, &(xo[WS(os, 1)])); + } + } + { + V T4N, T5G, T5z, T5H, T5m, T5D, T5w, T5E; + { + V T4x, T4M, T5x, T5y; + T4x = VSUB(T4p, T4w); + T4M = VSUB(T4E, T4L); + T4N = VSUB(T4x, T4M); + T5G = VADD(T4x, T4M); + T5x = VFNMS(LDK(KP555570233), T4Y, VMUL(LDK(KP831469612), T53)); + T5y = VFMA(LDK(KP555570233), T5f, VMUL(LDK(KP831469612), T5k)); + T5z = VSUB(T5x, T5y); + T5H = VADD(T5x, T5y); + } + { + V T54, T5l, T5q, T5v; + T54 = VFMA(LDK(KP831469612), T4Y, VMUL(LDK(KP555570233), T53)); + T5l = VFNMS(LDK(KP555570233), T5k, VMUL(LDK(KP831469612), T5f)); + T5m = VSUB(T54, T5l); + T5D = VADD(T54, T5l); + T5q = VSUB(T5o, T5p); + T5v = VSUB(T5r, T5u); + T5w = VSUB(T5q, T5v); + T5E = VADD(T5v, T5q); + } + { + V T5n, T5A, T5J, T5K; + T5n = VADD(T4N, T5m); + T5A = VBYI(VADD(T5w, T5z)); + ST(&(xo[WS(os, 54)]), VSUB(T5n, T5A), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VADD(T5n, T5A), ovs, &(xo[0])); + T5J = VBYI(VADD(T5E, T5D)); + T5K = VADD(T5G, T5H); + ST(&(xo[WS(os, 6)]), VADD(T5J, T5K), ovs, &(xo[0])); + ST(&(xo[WS(os, 58)]), VSUB(T5K, T5J), ovs, &(xo[0])); + } + { + V T5B, T5C, T5F, T5I; + T5B = VSUB(T4N, T5m); + T5C = VBYI(VSUB(T5z, T5w)); + ST(&(xo[WS(os, 42)]), VSUB(T5B, T5C), ovs, &(xo[0])); + ST(&(xo[WS(os, 22)]), VADD(T5B, T5C), ovs, &(xo[0])); + T5F = VBYI(VSUB(T5D, T5E)); + T5I = VSUB(T5G, T5H); + ST(&(xo[WS(os, 26)]), VADD(T5F, T5I), ovs, &(xo[0])); + ST(&(xo[WS(os, 38)]), VSUB(T5I, T5F), ovs, &(xo[0])); + } + } + { + V T2J, T34, T2X, T35, T2Q, T31, T2U, T32; + { + V T2H, T2I, T2V, T2W; + T2H = VADD(Tb, Tq); + T2I = VADD(T2g, T2h); + T2J = VSUB(T2H, T2I); + T34 = VADD(T2H, T2I); + T2V = VFNMS(LDK(KP290284677), T2K, VMUL(LDK(KP956940335), T2L)); + T2W = VFMA(LDK(KP290284677), T2N, VMUL(LDK(KP956940335), T2O)); + T2X = VSUB(T2V, T2W); + T35 = VADD(T2V, T2W); + } + { + V T2M, T2P, T2S, T2T; + T2M = VFMA(LDK(KP956940335), T2K, VMUL(LDK(KP290284677), T2L)); + T2P = VFNMS(LDK(KP290284677), T2O, VMUL(LDK(KP956940335), T2N)); + T2Q = VSUB(T2M, T2P); + T31 = VADD(T2M, T2P); + T2S = VADD(TI, TZ); + T2T = VADD(T2q, T2l); + T2U = VSUB(T2S, T2T); + T32 = VADD(T2T, T2S); + } + { + V T2R, T2Y, T37, T38; + T2R = VADD(T2J, T2Q); + T2Y = VBYI(VADD(T2U, T2X)); + ST(&(xo[WS(os, 51)]), VSUB(T2R, T2Y), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 13)]), VADD(T2R, T2Y), ovs, &(xo[WS(os, 1)])); + T37 = VBYI(VADD(T32, T31)); + T38 = VADD(T34, T35); + ST(&(xo[WS(os, 3)]), VADD(T37, T38), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 61)]), VSUB(T38, T37), ovs, &(xo[WS(os, 1)])); + } + { + V T2Z, T30, T33, T36; + T2Z = VSUB(T2J, T2Q); + T30 = VBYI(VSUB(T2X, T2U)); + ST(&(xo[WS(os, 45)]), VSUB(T2Z, T30), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 19)]), VADD(T2Z, T30), ovs, &(xo[WS(os, 1)])); + T33 = VBYI(VSUB(T31, T32)); + T36 = VSUB(T34, T35); + ST(&(xo[WS(os, 29)]), VADD(T33, T36), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 35)]), VSUB(T36, T33), ovs, &(xo[WS(os, 1)])); + } + } + { + V T41, T4g, T4b, T4j, T44, T4i, T48, T4f; + { + V T3X, T40, T49, T4a; + T3X = VFNMS(LDK(KP634393284), T3W, VMUL(LDK(KP773010453), T3V)); + T40 = VFMA(LDK(KP773010453), T3Y, VMUL(LDK(KP634393284), T3Z)); + T41 = VSUB(T3X, T40); + T4g = VADD(T3X, T40); + T49 = VFMA(LDK(KP634393284), T3V, VMUL(LDK(KP773010453), T3W)); + T4a = VFNMS(LDK(KP634393284), T3Y, VMUL(LDK(KP773010453), T3Z)); + T4b = VSUB(T49, T4a); + T4j = VADD(T49, T4a); + } + { + V T42, T43, T46, T47; + T42 = VSUB(T3D, T3E); + T43 = VSUB(T3w, T3v); + T44 = VSUB(T42, T43); + T4i = VADD(T43, T42); + T46 = VSUB(T3A, T3B); + T47 = VSUB(T3q, T3t); + T48 = VSUB(T46, T47); + T4f = VADD(T46, T47); + } + { + V T45, T4c, T4l, T4m; + T45 = VBYI(VSUB(T41, T44)); + T4c = VSUB(T48, T4b); + ST(&(xo[WS(os, 23)]), VADD(T45, T4c), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 41)]), VSUB(T4c, T45), ovs, &(xo[WS(os, 1)])); + T4l = VSUB(T4f, T4g); + T4m = VBYI(VSUB(T4j, T4i)); + ST(&(xo[WS(os, 39)]), VSUB(T4l, T4m), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 25)]), VADD(T4l, T4m), ovs, &(xo[WS(os, 1)])); + } + { + V T4d, T4e, T4h, T4k; + T4d = VBYI(VADD(T44, T41)); + T4e = VADD(T48, T4b); + ST(&(xo[WS(os, 9)]), VADD(T4d, T4e), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 55)]), VSUB(T4e, T4d), ovs, &(xo[WS(os, 1)])); + T4h = VADD(T4f, T4g); + T4k = VBYI(VADD(T4i, T4j)); + ST(&(xo[WS(os, 57)]), VSUB(T4h, T4k), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VADD(T4h, T4k), ovs, &(xo[WS(os, 1)])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 64, XSIMD_STRING("n1bv_64"), {404, 72, 52, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_64) (planner *p) { + X(kdft_register) (p, n1bv_64, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:50 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 7 -name n1bv_7 -include n1b.h */ + +/* + * This function contains 30 FP additions, 24 FP multiplications, + * (or, 9 additions, 3 multiplications, 21 fused multiply/add), + * 37 stack variables, 6 constants, and 14 memory accesses + */ +#include "n1b.h" + +static void n1bv_7(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP692021471, +0.692021471630095869627814897002069140197260599); + DVK(KP801937735, +0.801937735804838252472204639014890102331838324); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + DVK(KP356895867, +0.356895867892209443894399510021300583399127187); + DVK(KP554958132, +0.554958132087371191422194871006410481067288862); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(14, is), MAKE_VOLATILE_STRIDE(14, os)) { + V T1, T2, T3, T8, T9, T5, T6; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T9 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + { + V Tg, T4, Te, Ta, Tf, T7; + Tg = VSUB(T2, T3); + T4 = VADD(T2, T3); + Te = VSUB(T8, T9); + Ta = VADD(T8, T9); + Tf = VSUB(T5, T6); + T7 = VADD(T5, T6); + { + V Tr, Tj, Tm, Th, To, Tb; + Tr = VFMA(LDK(KP554958132), Te, Tg); + Tj = VFNMS(LDK(KP356895867), T4, Ta); + Tm = VFMA(LDK(KP554958132), Tf, Te); + Th = VFNMS(LDK(KP554958132), Tg, Tf); + ST(&(xo[0]), VADD(T1, VADD(T4, VADD(T7, Ta))), ovs, &(xo[0])); + To = VFNMS(LDK(KP356895867), T7, T4); + Tb = VFNMS(LDK(KP356895867), Ta, T7); + { + V Ts, Tk, Tn, Ti; + Ts = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), Tr, Tf)); + Tk = VFNMS(LDK(KP692021471), Tj, T7); + Tn = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tm, Tg)); + Ti = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Th, Te)); + { + V Tp, Tc, Tl, Tq, Td; + Tp = VFNMS(LDK(KP692021471), To, Ta); + Tc = VFNMS(LDK(KP692021471), Tb, T4); + Tl = VFNMS(LDK(KP900968867), Tk, T1); + Tq = VFNMS(LDK(KP900968867), Tp, T1); + Td = VFNMS(LDK(KP900968867), Tc, T1); + ST(&(xo[WS(os, 5)]), VFNMSI(Tn, Tl), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VFMAI(Tn, Tl), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VFNMSI(Ts, Tq), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VFMAI(Ts, Tq), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VFNMSI(Ti, Td), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VFMAI(Ti, Td), ovs, &(xo[WS(os, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 7, XSIMD_STRING("n1bv_7"), {9, 3, 21, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_7) (planner *p) { + X(kdft_register) (p, n1bv_7, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 7 -name n1bv_7 -include n1b.h */ + +/* + * This function contains 30 FP additions, 18 FP multiplications, + * (or, 18 additions, 6 multiplications, 12 fused multiply/add), + * 24 stack variables, 6 constants, and 14 memory accesses + */ +#include "n1b.h" + +static void n1bv_7(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP222520933, +0.222520933956314404288902564496794759466355569); + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP623489801, +0.623489801858733530525004884004239810632274731); + DVK(KP433883739, +0.433883739117558120475768332848358754609990728); + DVK(KP781831482, +0.781831482468029808708444526674057750232334519); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(14, is), MAKE_VOLATILE_STRIDE(14, os)) { + V Tb, T9, Tc, T3, Te, T6, Td, T7, T8, Ti, Tj; + Tb = LD(&(xi[0]), ivs, &(xi[0])); + T7 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T9 = VSUB(T7, T8); + Tc = VADD(T7, T8); + { + V T1, T2, T4, T5; + T1 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T2 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T3 = VSUB(T1, T2); + Te = VADD(T1, T2); + T4 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T6 = VSUB(T4, T5); + Td = VADD(T4, T5); + } + ST(&(xo[0]), VADD(Tb, VADD(Te, VADD(Tc, Td))), ovs, &(xo[0])); + Ti = VBYI(VFNMS(LDK(KP781831482), T6, VFNMS(LDK(KP433883739), T9, VMUL(LDK(KP974927912), T3)))); + Tj = VFMA(LDK(KP623489801), Td, VFNMS(LDK(KP900968867), Tc, VFNMS(LDK(KP222520933), Te, Tb))); + ST(&(xo[WS(os, 2)]), VADD(Ti, Tj), ovs, &(xo[0])); + ST(&(xo[WS(os, 5)]), VSUB(Tj, Ti), ovs, &(xo[WS(os, 1)])); + { + V Ta, Tf, Tg, Th; + Ta = VBYI(VFMA(LDK(KP433883739), T3, VFNMS(LDK(KP781831482), T9, VMUL(LDK(KP974927912), T6)))); + Tf = VFMA(LDK(KP623489801), Tc, VFNMS(LDK(KP222520933), Td, VFNMS(LDK(KP900968867), Te, Tb))); + ST(&(xo[WS(os, 3)]), VADD(Ta, Tf), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VSUB(Tf, Ta), ovs, &(xo[0])); + Tg = VBYI(VFMA(LDK(KP781831482), T3, VFMA(LDK(KP974927912), T9, VMUL(LDK(KP433883739), T6)))); + Th = VFMA(LDK(KP623489801), Te, VFNMS(LDK(KP900968867), Td, VFNMS(LDK(KP222520933), Tc, Tb))); + ST(&(xo[WS(os, 1)]), VADD(Tg, Th), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 6)]), VSUB(Th, Tg), ovs, &(xo[0])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 7, XSIMD_STRING("n1bv_7"), {18, 6, 12, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_7) (planner *p) { + X(kdft_register) (p, n1bv_7, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:51 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 8 -name n1bv_8 -include n1b.h */ + +/* + * This function contains 26 FP additions, 10 FP multiplications, + * (or, 16 additions, 0 multiplications, 10 fused multiply/add), + * 30 stack variables, 1 constants, and 16 memory accesses + */ +#include "n1b.h" + +static void n1bv_8(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) { + V T1, T2, Tc, Td, T4, T5, T7, T8; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + { + V T3, Tj, Te, Tk, T6, Tm, T9, Tn, Tp, Tl; + T3 = VSUB(T1, T2); + Tj = VADD(T1, T2); + Te = VSUB(Tc, Td); + Tk = VADD(Tc, Td); + T6 = VSUB(T4, T5); + Tm = VADD(T4, T5); + T9 = VSUB(T7, T8); + Tn = VADD(T7, T8); + Tp = VADD(Tj, Tk); + Tl = VSUB(Tj, Tk); + { + V Tq, To, Ta, Tf; + Tq = VADD(Tm, Tn); + To = VSUB(Tm, Tn); + Ta = VADD(T6, T9); + Tf = VSUB(T6, T9); + { + V Tg, Ti, Tb, Th; + ST(&(xo[WS(os, 2)]), VFMAI(To, Tl), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VFNMSI(To, Tl), ovs, &(xo[0])); + ST(&(xo[0]), VADD(Tp, Tq), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VSUB(Tp, Tq), ovs, &(xo[0])); + Tg = VFNMS(LDK(KP707106781), Tf, Te); + Ti = VFMA(LDK(KP707106781), Tf, Te); + Tb = VFNMS(LDK(KP707106781), Ta, T3); + Th = VFMA(LDK(KP707106781), Ta, T3); + ST(&(xo[WS(os, 7)]), VFNMSI(Ti, Th), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFMAI(Ti, Th), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VFMAI(Tg, Tb), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VFNMSI(Tg, Tb), ovs, &(xo[WS(os, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 8, XSIMD_STRING("n1bv_8"), {16, 0, 10, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_8) (planner *p) { + X(kdft_register) (p, n1bv_8, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 8 -name n1bv_8 -include n1b.h */ + +/* + * This function contains 26 FP additions, 2 FP multiplications, + * (or, 26 additions, 2 multiplications, 0 fused multiply/add), + * 22 stack variables, 1 constants, and 16 memory accesses + */ +#include "n1b.h" + +static void n1bv_8(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) { + V Ta, Tk, Te, Tj, T7, Tn, Tf, Tm; + { + V T8, T9, Tc, Td; + T8 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T9 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Ta = VSUB(T8, T9); + Tk = VADD(T8, T9); + Tc = LD(&(xi[0]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Te = VSUB(Tc, Td); + Tj = VADD(Tc, Td); + { + V T1, T2, T3, T4, T5, T6; + T1 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + T4 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + T7 = VMUL(LDK(KP707106781), VSUB(T3, T6)); + Tn = VADD(T4, T5); + Tf = VMUL(LDK(KP707106781), VADD(T3, T6)); + Tm = VADD(T1, T2); + } + } + { + V Tb, Tg, Tp, Tq; + Tb = VBYI(VSUB(T7, Ta)); + Tg = VSUB(Te, Tf); + ST(&(xo[WS(os, 3)]), VADD(Tb, Tg), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VSUB(Tg, Tb), ovs, &(xo[WS(os, 1)])); + Tp = VADD(Tj, Tk); + Tq = VADD(Tm, Tn); + ST(&(xo[WS(os, 4)]), VSUB(Tp, Tq), ovs, &(xo[0])); + ST(&(xo[0]), VADD(Tp, Tq), ovs, &(xo[0])); + } + { + V Th, Ti, Tl, To; + Th = VBYI(VADD(Ta, T7)); + Ti = VADD(Te, Tf); + ST(&(xo[WS(os, 1)]), VADD(Th, Ti), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VSUB(Ti, Th), ovs, &(xo[WS(os, 1)])); + Tl = VSUB(Tj, Tk); + To = VBYI(VSUB(Tm, Tn)); + ST(&(xo[WS(os, 6)]), VSUB(Tl, To), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VADD(Tl, To), ovs, &(xo[0])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 8, XSIMD_STRING("n1bv_8"), {26, 2, 0, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_8) (planner *p) { + X(kdft_register) (p, n1bv_8, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1bv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:51 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 9 -name n1bv_9 -include n1b.h */ + +/* + * This function contains 46 FP additions, 38 FP multiplications, + * (or, 12 additions, 4 multiplications, 34 fused multiply/add), + * 68 stack variables, 19 constants, and 18 memory accesses + */ +#include "n1b.h" + +static void n1bv_9(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP939692620, +0.939692620785908384054109277324731469936208134); + DVK(KP907603734, +0.907603734547952313649323976213898122064543220); + DVK(KP852868531, +0.852868531952443209628250963940074071936020296); + DVK(KP666666666, +0.666666666666666666666666666666666666666666667); + DVK(KP879385241, +0.879385241571816768108218554649462939872416269); + DVK(KP984807753, +0.984807753012208059366743024589523013670643252); + DVK(KP826351822, +0.826351822333069651148283373230685203999624323); + DVK(KP347296355, +0.347296355333860697703433253538629592000751354); + DVK(KP898197570, +0.898197570222573798468955502359086394667167570); + DVK(KP673648177, +0.673648177666930348851716626769314796000375677); + DVK(KP420276625, +0.420276625461206169731530603237061658838781920); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP586256827, +0.586256827714544512072145703099641959914944179); + DVK(KP968908795, +0.968908795874236621082202410917456709164223497); + DVK(KP726681596, +0.726681596905677465811651808188092531873167623); + DVK(KP439692620, +0.439692620785908384054109277324731469936208134); + DVK(KP203604859, +0.203604859554852403062088995281827210665664861); + DVK(KP152703644, +0.152703644666139302296566746461370407999248646); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(18, is), MAKE_VOLATILE_STRIDE(18, os)) { + V T1, T2, T3, T6, Tf, T7, T8, Tb, Tc, Tp, T4; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T8 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Tp = VSUB(T2, T3); + T4 = VADD(T2, T3); + { + V Te, T9, Tg, Td, TF, T5; + Te = VSUB(T8, T7); + T9 = VADD(T7, T8); + Tg = VADD(Tb, Tc); + Td = VSUB(Tb, Tc); + TF = VADD(T1, T4); + T5 = VFNMS(LDK(KP500000000), T4, T1); + { + V Ta, TH, Th, TG; + Ta = VFNMS(LDK(KP500000000), T9, T6); + TH = VADD(T6, T9); + Th = VFNMS(LDK(KP500000000), Tg, Tf); + TG = VADD(Tf, Tg); + { + V Tr, Tu, Tm, Tv, Ts, Ti, TI, TK; + Tr = VFNMS(LDK(KP152703644), Te, Ta); + Tu = VFMA(LDK(KP203604859), Ta, Te); + Tm = VFNMS(LDK(KP439692620), Td, Ta); + Tv = VFNMS(LDK(KP726681596), Td, Th); + Ts = VFMA(LDK(KP968908795), Th, Td); + Ti = VFNMS(LDK(KP586256827), Th, Te); + TI = VADD(TG, TH); + TK = VMUL(LDK(KP866025403), VSUB(TG, TH)); + { + V Tt, TA, Tw, Tz, Tj, TJ, To, TE, Tn; + Tn = VFNMS(LDK(KP420276625), Tm, Te); + Tt = VFNMS(LDK(KP673648177), Ts, Tr); + TA = VFMA(LDK(KP673648177), Ts, Tr); + Tw = VFMA(LDK(KP898197570), Tv, Tu); + Tz = VFNMS(LDK(KP898197570), Tv, Tu); + Tj = VFNMS(LDK(KP347296355), Ti, Td); + ST(&(xo[0]), VADD(TI, TF), ovs, &(xo[0])); + TJ = VFNMS(LDK(KP500000000), TI, TF); + To = VFNMS(LDK(KP826351822), Tn, Th); + TE = VMUL(LDK(KP984807753), VFMA(LDK(KP879385241), Tp, TA)); + { + V TB, TD, Tx, Tk, Tq, TC, Ty, Tl; + TB = VFMA(LDK(KP666666666), TA, Tz); + TD = VFMA(LDK(KP852868531), Tw, T5); + Tx = VFNMS(LDK(KP500000000), Tw, Tt); + Tk = VFNMS(LDK(KP907603734), Tj, Ta); + ST(&(xo[WS(os, 6)]), VFNMSI(TK, TJ), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VFMAI(TK, TJ), ovs, &(xo[WS(os, 1)])); + Tq = VMUL(LDK(KP984807753), VFNMS(LDK(KP879385241), Tp, To)); + TC = VMUL(LDK(KP866025403), VFNMS(LDK(KP852868531), TB, Tp)); + ST(&(xo[WS(os, 8)]), VFNMSI(TE, TD), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VFMAI(TE, TD), ovs, &(xo[WS(os, 1)])); + Ty = VFMA(LDK(KP852868531), Tx, T5); + Tl = VFNMS(LDK(KP939692620), Tk, T5); + ST(&(xo[WS(os, 5)]), VFNMSI(TC, Ty), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VFMAI(TC, Ty), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFMAI(Tq, Tl), ovs, &(xo[0])); + ST(&(xo[WS(os, 7)]), VFNMSI(Tq, Tl), ovs, &(xo[WS(os, 1)])); + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 9, XSIMD_STRING("n1bv_9"), {12, 4, 34, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_9) (planner *p) { + X(kdft_register) (p, n1bv_9, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 9 -name n1bv_9 -include n1b.h */ + +/* + * This function contains 46 FP additions, 26 FP multiplications, + * (or, 30 additions, 10 multiplications, 16 fused multiply/add), + * 41 stack variables, 14 constants, and 18 memory accesses + */ +#include "n1b.h" + +static void n1bv_9(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP342020143, +0.342020143325668733044099614682259580763083368); + DVK(KP813797681, +0.813797681349373692844693217248393223289101568); + DVK(KP939692620, +0.939692620785908384054109277324731469936208134); + DVK(KP296198132, +0.296198132726023843175338011893050938967728390); + DVK(KP642787609, +0.642787609686539326322643409907263432907559884); + DVK(KP663413948, +0.663413948168938396205421319635891297216863310); + DVK(KP556670399, +0.556670399226419366452912952047023132968291906); + DVK(KP766044443, +0.766044443118978035202392650555416673935832457); + DVK(KP984807753, +0.984807753012208059366743024589523013670643252); + DVK(KP150383733, +0.150383733180435296639271897612501926072238258); + DVK(KP852868531, +0.852868531952443209628250963940074071936020296); + DVK(KP173648177, +0.173648177666930348851716626769314796000375677); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(18, is), MAKE_VOLATILE_STRIDE(18, os)) { + V T5, Ty, Tm, Ti, Tw, Th, Tj, To, Tb, Tv, Ta, Tc, Tn; + { + V T1, T2, T3, T4; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T4 = VADD(T2, T3); + T5 = VFNMS(LDK(KP500000000), T4, T1); + Ty = VADD(T1, T4); + Tm = VMUL(LDK(KP866025403), VSUB(T2, T3)); + } + { + V Td, Tg, Te, Tf; + Td = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Te = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tf = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tg = VADD(Te, Tf); + Ti = VSUB(Te, Tf); + Tw = VADD(Td, Tg); + Th = VFNMS(LDK(KP500000000), Tg, Td); + Tj = VFNMS(LDK(KP852868531), Ti, VMUL(LDK(KP173648177), Th)); + To = VFMA(LDK(KP150383733), Ti, VMUL(LDK(KP984807753), Th)); + } + { + V T6, T9, T7, T8; + T6 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T9 = VADD(T7, T8); + Tb = VSUB(T7, T8); + Tv = VADD(T6, T9); + Ta = VFNMS(LDK(KP500000000), T9, T6); + Tc = VFNMS(LDK(KP556670399), Tb, VMUL(LDK(KP766044443), Ta)); + Tn = VFMA(LDK(KP663413948), Tb, VMUL(LDK(KP642787609), Ta)); + } + { + V Tx, Tz, TA, Tt, Tu; + Tx = VBYI(VMUL(LDK(KP866025403), VSUB(Tv, Tw))); + Tz = VADD(Tv, Tw); + TA = VFNMS(LDK(KP500000000), Tz, Ty); + ST(&(xo[WS(os, 3)]), VADD(Tx, TA), ovs, &(xo[WS(os, 1)])); + ST(&(xo[0]), VADD(Ty, Tz), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VSUB(TA, Tx), ovs, &(xo[0])); + Tt = VFMA(LDK(KP852868531), Tb, VFMA(LDK(KP173648177), Ta, VFMA(LDK(KP296198132), Ti, VFNMS(LDK(KP939692620), Th, T5)))); + Tu = VBYI(VSUB(VFMA(LDK(KP984807753), Ta, VFMA(LDK(KP813797681), Ti, VFNMS(LDK(KP150383733), Tb, VMUL(LDK(KP342020143), Th)))), Tm)); + ST(&(xo[WS(os, 7)]), VSUB(Tt, Tu), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VADD(Tt, Tu), ovs, &(xo[0])); + { + V Tl, Ts, Tq, Tr, Tk, Tp; + Tk = VADD(Tc, Tj); + Tl = VADD(T5, Tk); + Ts = VFMA(LDK(KP866025403), VSUB(To, Tn), VFNMS(LDK(KP500000000), Tk, T5)); + Tp = VADD(Tn, To); + Tq = VBYI(VADD(Tm, Tp)); + Tr = VBYI(VADD(Tm, VFNMS(LDK(KP500000000), Tp, VMUL(LDK(KP866025403), VSUB(Tc, Tj))))); + ST(&(xo[WS(os, 8)]), VSUB(Tl, Tq), ovs, &(xo[0])); + ST(&(xo[WS(os, 5)]), VSUB(Ts, Tr), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VADD(Tl, Tq), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VADD(Tr, Ts), ovs, &(xo[0])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 9, XSIMD_STRING("n1bv_9"), {30, 10, 16, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1bv_9) (planner *p) { + X(kdft_register) (p, n1bv_9, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name n1fv_10 -include n1f.h */ + +/* + * This function contains 42 FP additions, 22 FP multiplications, + * (or, 24 additions, 4 multiplications, 18 fused multiply/add), + * 43 stack variables, 4 constants, and 20 memory accesses + */ +#include "n1f.h" + +static void n1fv_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(20, is), MAKE_VOLATILE_STRIDE(20, os)) { + V Tb, Tr, T3, Ts, T6, Tw, Tg, Tt, T9, Tc, T1, T2; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + { + V T4, T5, Te, Tf, T7, T8; + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Te = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tr = VADD(T1, T2); + T3 = VSUB(T1, T2); + Ts = VADD(T4, T5); + T6 = VSUB(T4, T5); + Tw = VADD(Te, Tf); + Tg = VSUB(Te, Tf); + Tt = VADD(T7, T8); + T9 = VSUB(T7, T8); + Tc = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + } + { + V TD, Tu, Tm, Ta, Td, Tv; + TD = VSUB(Ts, Tt); + Tu = VADD(Ts, Tt); + Tm = VSUB(T6, T9); + Ta = VADD(T6, T9); + Td = VSUB(Tb, Tc); + Tv = VADD(Tb, Tc); + { + V TC, Tx, Tn, Th; + TC = VSUB(Tv, Tw); + Tx = VADD(Tv, Tw); + Tn = VSUB(Td, Tg); + Th = VADD(Td, Tg); + { + V Ty, TA, TE, TG, Ti, Tk, To, Tq, Tz, Tj; + Ty = VADD(Tu, Tx); + TA = VSUB(Tu, Tx); + TE = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TD, TC)); + TG = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TC, TD)); + Ti = VADD(Ta, Th); + Tk = VSUB(Ta, Th); + To = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tn, Tm)); + Tq = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tm, Tn)); + Tz = VFNMS(LDK(KP250000000), Ty, Tr); + ST(&(xo[0]), VADD(Tr, Ty), ovs, &(xo[0])); + Tj = VFNMS(LDK(KP250000000), Ti, T3); + ST(&(xo[WS(os, 5)]), VADD(T3, Ti), ovs, &(xo[WS(os, 1)])); + { + V TB, TF, Tl, Tp; + TB = VFNMS(LDK(KP559016994), TA, Tz); + TF = VFMA(LDK(KP559016994), TA, Tz); + Tl = VFMA(LDK(KP559016994), Tk, Tj); + Tp = VFNMS(LDK(KP559016994), Tk, Tj); + ST(&(xo[WS(os, 4)]), VFMAI(TG, TF), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VFNMSI(TG, TF), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VFNMSI(TE, TB), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFMAI(TE, TB), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VFNMSI(Tq, Tp), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFMAI(Tq, Tp), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VFMAI(To, Tl), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFNMSI(To, Tl), ovs, &(xo[WS(os, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 10, XSIMD_STRING("n1fv_10"), {24, 4, 18, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_10) (planner *p) { + X(kdft_register) (p, n1fv_10, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name n1fv_10 -include n1f.h */ + +/* + * This function contains 42 FP additions, 12 FP multiplications, + * (or, 36 additions, 6 multiplications, 6 fused multiply/add), + * 33 stack variables, 4 constants, and 20 memory accesses + */ +#include "n1f.h" + +static void n1fv_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(20, is), MAKE_VOLATILE_STRIDE(20, os)) { + V Ti, Ty, Tm, Tn, Tw, Tt, Tz, TA, TB, T7, Te, Tj, Tg, Th; + Tg = LD(&(xi[0]), ivs, &(xi[0])); + Th = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Ti = VSUB(Tg, Th); + Ty = VADD(Tg, Th); + { + V T3, Tu, Td, Ts, T6, Tv, Ta, Tr; + { + V T1, T2, Tb, Tc; + T1 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + Tu = VADD(T1, T2); + Tb = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Td = VSUB(Tb, Tc); + Ts = VADD(Tb, Tc); + } + { + V T4, T5, T8, T9; + T4 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + Tv = VADD(T4, T5); + T8 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T9 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Ta = VSUB(T8, T9); + Tr = VADD(T8, T9); + } + Tm = VSUB(T3, T6); + Tn = VSUB(Ta, Td); + Tw = VSUB(Tu, Tv); + Tt = VSUB(Tr, Ts); + Tz = VADD(Tu, Tv); + TA = VADD(Tr, Ts); + TB = VADD(Tz, TA); + T7 = VADD(T3, T6); + Te = VADD(Ta, Td); + Tj = VADD(T7, Te); + } + ST(&(xo[WS(os, 5)]), VADD(Ti, Tj), ovs, &(xo[WS(os, 1)])); + ST(&(xo[0]), VADD(Ty, TB), ovs, &(xo[0])); + { + V To, Tq, Tl, Tp, Tf, Tk; + To = VBYI(VFMA(LDK(KP951056516), Tm, VMUL(LDK(KP587785252), Tn))); + Tq = VBYI(VFNMS(LDK(KP587785252), Tm, VMUL(LDK(KP951056516), Tn))); + Tf = VMUL(LDK(KP559016994), VSUB(T7, Te)); + Tk = VFNMS(LDK(KP250000000), Tj, Ti); + Tl = VADD(Tf, Tk); + Tp = VSUB(Tk, Tf); + ST(&(xo[WS(os, 1)]), VSUB(Tl, To), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VADD(Tq, Tp), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VADD(To, Tl), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VSUB(Tp, Tq), ovs, &(xo[WS(os, 1)])); + } + { + V Tx, TF, TE, TG, TC, TD; + Tx = VBYI(VFNMS(LDK(KP587785252), Tw, VMUL(LDK(KP951056516), Tt))); + TF = VBYI(VFMA(LDK(KP951056516), Tw, VMUL(LDK(KP587785252), Tt))); + TC = VFNMS(LDK(KP250000000), TB, Ty); + TD = VMUL(LDK(KP559016994), VSUB(Tz, TA)); + TE = VSUB(TC, TD); + TG = VADD(TD, TC); + ST(&(xo[WS(os, 2)]), VADD(Tx, TE), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VSUB(TG, TF), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VSUB(TE, Tx), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VADD(TF, TG), ovs, &(xo[0])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 10, XSIMD_STRING("n1fv_10"), {36, 6, 6, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_10) (planner *p) { + X(kdft_register) (p, n1fv_10, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_11.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_11.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 11 -name n1fv_11 -include n1f.h */ + +/* + * This function contains 70 FP additions, 60 FP multiplications, + * (or, 15 additions, 5 multiplications, 55 fused multiply/add), + * 67 stack variables, 11 constants, and 22 memory accesses + */ +#include "n1f.h" + +static void n1fv_11(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP959492973, +0.959492973614497389890368057066327699062454848); + DVK(KP876768831, +0.876768831002589333891339807079336796764054852); + DVK(KP918985947, +0.918985947228994779780736114132655398124909697); + DVK(KP989821441, +0.989821441880932732376092037776718787376519372); + DVK(KP778434453, +0.778434453334651800608337670740821884709317477); + DVK(KP830830026, +0.830830026003772851058548298459246407048009821); + DVK(KP372785597, +0.372785597771792209609773152906148328659002598); + DVK(KP634356270, +0.634356270682424498893150776899916060542806975); + DVK(KP715370323, +0.715370323453429719112414662767260662417897278); + DVK(KP342584725, +0.342584725681637509502641509861112333758894680); + DVK(KP521108558, +0.521108558113202722944698153526659300680427422); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(22, is), MAKE_VOLATILE_STRIDE(22, os)) { + V T1, Tb, T4, Tp, Tg, Tq, T7, Tn, Ta, Tm, Tc, Tr; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + { + V T2, T3, Te, Tf; + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Te = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tf = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + { + V T5, T6, T8, T9; + T5 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T9 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T4 = VADD(T2, T3); + Tp = VSUB(T3, T2); + Tg = VADD(Te, Tf); + Tq = VSUB(Tf, Te); + T7 = VADD(T5, T6); + Tn = VSUB(T6, T5); + Ta = VADD(T8, T9); + Tm = VSUB(T9, T8); + Tc = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + } + } + Tr = VFMA(LDK(KP521108558), Tq, Tp); + { + V TS, TE, Th, Td, To, T12, TO, TB, T11, TN, TA, TF; + T11 = VFNMS(LDK(KP521108558), Tp, Tn); + TN = VFNMS(LDK(KP342584725), T7, Tg); + TA = VFMA(LDK(KP521108558), Tm, Tq); + TS = VFMA(LDK(KP715370323), Tm, Tp); + TE = VFNMS(LDK(KP342584725), T4, Ta); + Th = VFNMS(LDK(KP342584725), Ta, T7); + Td = VADD(Tb, Tc); + To = VSUB(Tc, Tb); + T12 = VFNMS(LDK(KP715370323), T11, Tm); + TO = VFNMS(LDK(KP634356270), TN, T4); + TB = VFNMS(LDK(KP715370323), TA, Tn); + TF = VFNMS(LDK(KP634356270), TE, Tg); + { + V T14, TD, TV, Tu, TY, Tx, Tk, TR, TI, TM, TJ, TT, Ts; + TJ = VFNMS(LDK(KP521108558), Tn, To); + TT = VFMA(LDK(KP372785597), To, TS); + Ts = VFMA(LDK(KP715370323), Tr, To); + ST(&(xo[0]), VADD(T1, VADD(T4, VADD(T7, VADD(Ta, VADD(Td, Tg))))), ovs, &(xo[0])); + { + V TW, Tv, Ti, T13; + TW = VFNMS(LDK(KP342584725), Tg, Td); + Tv = VFNMS(LDK(KP342584725), Td, T4); + Ti = VFNMS(LDK(KP634356270), Th, Td); + T13 = VFNMS(LDK(KP830830026), T12, To); + { + V TP, TC, TG, TK; + TP = VFNMS(LDK(KP778434453), TO, Ta); + TC = VFMA(LDK(KP830830026), TB, Tp); + TG = VFNMS(LDK(KP778434453), TF, Td); + TK = VFMA(LDK(KP715370323), TJ, Tq); + { + V TU, Tt, TX, Tw; + TU = VFNMS(LDK(KP830830026), TT, Tq); + Tt = VFMA(LDK(KP830830026), Ts, Tn); + TX = VFNMS(LDK(KP634356270), TW, Ta); + Tw = VFNMS(LDK(KP634356270), Tv, T7); + { + V Tj, TQ, TH, TL; + Tj = VFNMS(LDK(KP778434453), Ti, T4); + T14 = VMUL(LDK(KP989821441), VFNMS(LDK(KP918985947), T13, Tq)); + TQ = VFNMS(LDK(KP876768831), TP, Td); + TD = VMUL(LDK(KP989821441), VFNMS(LDK(KP918985947), TC, To)); + TH = VFNMS(LDK(KP876768831), TG, T7); + TL = VFNMS(LDK(KP830830026), TK, Tm); + TV = VMUL(LDK(KP989821441), VFMA(LDK(KP918985947), TU, Tn)); + Tu = VMUL(LDK(KP989821441), VFMA(LDK(KP918985947), Tt, Tm)); + TY = VFNMS(LDK(KP778434453), TX, T7); + Tx = VFNMS(LDK(KP778434453), Tw, Tg); + Tk = VFNMS(LDK(KP876768831), Tj, Tg); + TR = VFNMS(LDK(KP959492973), TQ, T1); + TI = VFNMS(LDK(KP959492973), TH, T1); + TM = VMUL(LDK(KP989821441), VFNMS(LDK(KP918985947), TL, Tp)); + } + } + } + } + { + V TZ, Ty, Tl, T10, Tz; + TZ = VFNMS(LDK(KP876768831), TY, T4); + Ty = VFNMS(LDK(KP876768831), Tx, Ta); + Tl = VFNMS(LDK(KP959492973), Tk, T1); + ST(&(xo[WS(os, 7)]), VFMAI(TV, TR), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VFNMSI(TV, TR), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VFMAI(TM, TI), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 8)]), VFNMSI(TM, TI), ovs, &(xo[0])); + T10 = VFNMS(LDK(KP959492973), TZ, T1); + Tz = VFNMS(LDK(KP959492973), Ty, T1); + ST(&(xo[WS(os, 1)]), VFMAI(Tu, Tl), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 10)]), VFNMSI(Tu, Tl), ovs, &(xo[0])); + ST(&(xo[WS(os, 5)]), VFMAI(T14, T10), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 6)]), VFNMSI(T14, T10), ovs, &(xo[0])); + ST(&(xo[WS(os, 9)]), VFMAI(TD, Tz), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VFNMSI(TD, Tz), ovs, &(xo[0])); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 11, XSIMD_STRING("n1fv_11"), {15, 5, 55, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_11) (planner *p) { + X(kdft_register) (p, n1fv_11, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 11 -name n1fv_11 -include n1f.h */ + +/* + * This function contains 70 FP additions, 50 FP multiplications, + * (or, 30 additions, 10 multiplications, 40 fused multiply/add), + * 32 stack variables, 10 constants, and 22 memory accesses + */ +#include "n1f.h" + +static void n1fv_11(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP654860733, +0.654860733945285064056925072466293553183791199); + DVK(KP142314838, +0.142314838273285140443792668616369668791051361); + DVK(KP959492973, +0.959492973614497389890368057066327699062454848); + DVK(KP415415013, +0.415415013001886425529274149229623203524004910); + DVK(KP841253532, +0.841253532831181168861811648919367717513292498); + DVK(KP989821441, +0.989821441880932732376092037776718787376519372); + DVK(KP909631995, +0.909631995354518371411715383079028460060241051); + DVK(KP281732556, +0.281732556841429697711417915346616899035777899); + DVK(KP540640817, +0.540640817455597582107635954318691695431770608); + DVK(KP755749574, +0.755749574354258283774035843972344420179717445); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(22, is), MAKE_VOLATILE_STRIDE(22, os)) { + V T1, T4, Ti, Tg, Tl, Td, Tk, Ta, Tj, T7, Tm, Tb, Tc, Tt, Ts; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + { + V T2, T3, Te, Tf; + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T4 = VADD(T2, T3); + Ti = VSUB(T3, T2); + Te = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tf = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tg = VADD(Te, Tf); + Tl = VSUB(Tf, Te); + } + Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Td = VADD(Tb, Tc); + Tk = VSUB(Tc, Tb); + { + V T8, T9, T5, T6; + T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T9 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Ta = VADD(T8, T9); + Tj = VSUB(T9, T8); + T5 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T7 = VADD(T5, T6); + Tm = VSUB(T6, T5); + } + ST(&(xo[0]), VADD(T1, VADD(T4, VADD(T7, VADD(Ta, VADD(Td, Tg))))), ovs, &(xo[0])); + { + V Tn, Th, Tv, Tu; + Tn = VBYI(VFMA(LDK(KP755749574), Ti, VFMA(LDK(KP540640817), Tj, VFNMS(LDK(KP909631995), Tl, VFNMS(LDK(KP989821441), Tm, VMUL(LDK(KP281732556), Tk)))))); + Th = VFMA(LDK(KP841253532), Ta, VFMA(LDK(KP415415013), Tg, VFNMS(LDK(KP959492973), Td, VFNMS(LDK(KP142314838), T7, VFNMS(LDK(KP654860733), T4, T1))))); + ST(&(xo[WS(os, 7)]), VSUB(Th, Tn), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VADD(Th, Tn), ovs, &(xo[0])); + Tv = VBYI(VFMA(LDK(KP281732556), Ti, VFMA(LDK(KP755749574), Tj, VFNMS(LDK(KP909631995), Tk, VFNMS(LDK(KP540640817), Tm, VMUL(LDK(KP989821441), Tl)))))); + Tu = VFMA(LDK(KP841253532), T7, VFMA(LDK(KP415415013), Td, VFNMS(LDK(KP142314838), Tg, VFNMS(LDK(KP654860733), Ta, VFNMS(LDK(KP959492973), T4, T1))))); + ST(&(xo[WS(os, 6)]), VSUB(Tu, Tv), ovs, &(xo[0])); + ST(&(xo[WS(os, 5)]), VADD(Tu, Tv), ovs, &(xo[WS(os, 1)])); + } + Tt = VBYI(VFMA(LDK(KP989821441), Ti, VFMA(LDK(KP540640817), Tk, VFNMS(LDK(KP909631995), Tj, VFNMS(LDK(KP281732556), Tm, VMUL(LDK(KP755749574), Tl)))))); + Ts = VFMA(LDK(KP415415013), Ta, VFMA(LDK(KP841253532), Td, VFNMS(LDK(KP654860733), Tg, VFNMS(LDK(KP959492973), T7, VFNMS(LDK(KP142314838), T4, T1))))); + ST(&(xo[WS(os, 8)]), VSUB(Ts, Tt), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VADD(Ts, Tt), ovs, &(xo[WS(os, 1)])); + { + V Tr, Tq, Tp, To; + Tr = VBYI(VFMA(LDK(KP540640817), Ti, VFMA(LDK(KP909631995), Tm, VFMA(LDK(KP989821441), Tj, VFMA(LDK(KP755749574), Tk, VMUL(LDK(KP281732556), Tl)))))); + Tq = VFMA(LDK(KP841253532), T4, VFMA(LDK(KP415415013), T7, VFNMS(LDK(KP959492973), Tg, VFNMS(LDK(KP654860733), Td, VFNMS(LDK(KP142314838), Ta, T1))))); + ST(&(xo[WS(os, 10)]), VSUB(Tq, Tr), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VADD(Tq, Tr), ovs, &(xo[WS(os, 1)])); + Tp = VBYI(VFMA(LDK(KP909631995), Ti, VFNMS(LDK(KP540640817), Tl, VFNMS(LDK(KP989821441), Tk, VFNMS(LDK(KP281732556), Tj, VMUL(LDK(KP755749574), Tm)))))); + To = VFMA(LDK(KP415415013), T4, VFMA(LDK(KP841253532), Tg, VFNMS(LDK(KP142314838), Td, VFNMS(LDK(KP959492973), Ta, VFNMS(LDK(KP654860733), T7, T1))))); + ST(&(xo[WS(os, 9)]), VSUB(To, Tp), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VADD(To, Tp), ovs, &(xo[0])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 11, XSIMD_STRING("n1fv_11"), {30, 10, 40, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_11) (planner *p) { + X(kdft_register) (p, n1fv_11, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:49 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 12 -name n1fv_12 -include n1f.h */ + +/* + * This function contains 48 FP additions, 20 FP multiplications, + * (or, 30 additions, 2 multiplications, 18 fused multiply/add), + * 49 stack variables, 2 constants, and 24 memory accesses + */ +#include "n1f.h" + +static void n1fv_12(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) { + V T1, T6, Tk, Tn, Tc, Td, Tf, Tr, T4, Ts, T9, Tg, Te, Tl; + { + V T2, T3, T7, T8; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T3 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T7 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tn = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tc = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Td = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Tf = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tr = VSUB(T3, T2); + T4 = VADD(T2, T3); + Ts = VSUB(T8, T7); + T9 = VADD(T7, T8); + Tg = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + } + Te = VSUB(Tc, Td); + Tl = VADD(Td, Tc); + { + V T5, TF, TB, Tt, Ta, TG, Th, To, Tm, TI; + T5 = VFNMS(LDK(KP500000000), T4, T1); + TF = VADD(T1, T4); + TB = VADD(Tr, Ts); + Tt = VSUB(Tr, Ts); + Ta = VFNMS(LDK(KP500000000), T9, T6); + TG = VADD(T6, T9); + Th = VSUB(Tf, Tg); + To = VADD(Tf, Tg); + Tm = VFNMS(LDK(KP500000000), Tl, Tk); + TI = VADD(Tk, Tl); + { + V TH, TL, Tb, Tx, TJ, Tp, Ti, TA; + TH = VSUB(TF, TG); + TL = VADD(TF, TG); + Tb = VSUB(T5, Ta); + Tx = VADD(T5, Ta); + TJ = VADD(Tn, To); + Tp = VFNMS(LDK(KP500000000), To, Tn); + Ti = VADD(Te, Th); + TA = VSUB(Te, Th); + { + V Tq, Ty, TK, TM; + Tq = VSUB(Tm, Tp); + Ty = VADD(Tm, Tp); + TK = VSUB(TI, TJ); + TM = VADD(TI, TJ); + { + V TC, TE, Tj, Tv; + TC = VMUL(LDK(KP866025403), VSUB(TA, TB)); + TE = VMUL(LDK(KP866025403), VADD(TB, TA)); + Tj = VFMA(LDK(KP866025403), Ti, Tb); + Tv = VFNMS(LDK(KP866025403), Ti, Tb); + { + V Tz, TD, Tu, Tw; + Tz = VSUB(Tx, Ty); + TD = VADD(Tx, Ty); + Tu = VFNMS(LDK(KP866025403), Tt, Tq); + Tw = VFMA(LDK(KP866025403), Tt, Tq); + ST(&(xo[0]), VADD(TL, TM), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VSUB(TL, TM), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VFMAI(TK, TH), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VFNMSI(TK, TH), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VFMAI(TE, TD), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VFNMSI(TE, TD), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VFNMSI(TC, Tz), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFMAI(TC, Tz), ovs, &(xo[0])); + ST(&(xo[WS(os, 5)]), VFNMSI(Tw, Tv), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFMAI(Tw, Tv), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VFMAI(Tu, Tj), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFNMSI(Tu, Tj), ovs, &(xo[WS(os, 1)])); + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 12, XSIMD_STRING("n1fv_12"), {30, 2, 18, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_12) (planner *p) { + X(kdft_register) (p, n1fv_12, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 12 -name n1fv_12 -include n1f.h */ + +/* + * This function contains 48 FP additions, 8 FP multiplications, + * (or, 44 additions, 4 multiplications, 4 fused multiply/add), + * 27 stack variables, 2 constants, and 24 memory accesses + */ +#include "n1f.h" + +static void n1fv_12(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) { + V T5, Ta, TJ, Ty, Tq, Tp, Tg, Tl, TI, TA, Tz, Tu; + { + V T1, T6, T4, Tw, T9, Tx; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + { + V T2, T3, T7, T8; + T2 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T3 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T4 = VADD(T2, T3); + Tw = VSUB(T3, T2); + T7 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T9 = VADD(T7, T8); + Tx = VSUB(T8, T7); + } + T5 = VADD(T1, T4); + Ta = VADD(T6, T9); + TJ = VADD(Tw, Tx); + Ty = VMUL(LDK(KP866025403), VSUB(Tw, Tx)); + Tq = VFNMS(LDK(KP500000000), T9, T6); + Tp = VFNMS(LDK(KP500000000), T4, T1); + } + { + V Tc, Th, Tf, Ts, Tk, Tt; + Tc = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Th = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + { + V Td, Te, Ti, Tj; + Td = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Te = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Tf = VADD(Td, Te); + Ts = VSUB(Te, Td); + Ti = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tj = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tk = VADD(Ti, Tj); + Tt = VSUB(Tj, Ti); + } + Tg = VADD(Tc, Tf); + Tl = VADD(Th, Tk); + TI = VADD(Ts, Tt); + TA = VFNMS(LDK(KP500000000), Tk, Th); + Tz = VFNMS(LDK(KP500000000), Tf, Tc); + Tu = VMUL(LDK(KP866025403), VSUB(Ts, Tt)); + } + { + V Tb, Tm, Tn, To; + Tb = VSUB(T5, Ta); + Tm = VBYI(VSUB(Tg, Tl)); + ST(&(xo[WS(os, 9)]), VSUB(Tb, Tm), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VADD(Tb, Tm), ovs, &(xo[WS(os, 1)])); + Tn = VADD(T5, Ta); + To = VADD(Tg, Tl); + ST(&(xo[WS(os, 6)]), VSUB(Tn, To), ovs, &(xo[0])); + ST(&(xo[0]), VADD(Tn, To), ovs, &(xo[0])); + } + { + V Tv, TE, TC, TD, Tr, TB; + Tr = VSUB(Tp, Tq); + Tv = VSUB(Tr, Tu); + TE = VADD(Tr, Tu); + TB = VSUB(Tz, TA); + TC = VBYI(VADD(Ty, TB)); + TD = VBYI(VSUB(Ty, TB)); + ST(&(xo[WS(os, 5)]), VSUB(Tv, TC), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VSUB(TE, TD), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VADD(TC, Tv), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VADD(TD, TE), ovs, &(xo[WS(os, 1)])); + } + { + V TK, TM, TH, TL, TF, TG; + TK = VBYI(VMUL(LDK(KP866025403), VSUB(TI, TJ))); + TM = VBYI(VMUL(LDK(KP866025403), VADD(TJ, TI))); + TF = VADD(Tp, Tq); + TG = VADD(Tz, TA); + TH = VSUB(TF, TG); + TL = VADD(TF, TG); + ST(&(xo[WS(os, 10)]), VSUB(TH, TK), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VADD(TL, TM), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VADD(TH, TK), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VSUB(TL, TM), ovs, &(xo[0])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 12, XSIMD_STRING("n1fv_12"), {44, 4, 4, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_12) (planner *p) { + X(kdft_register) (p, n1fv_12, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_128.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_128.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3527 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:49 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 128 -name n1fv_128 -include n1f.h */ + +/* + * This function contains 1082 FP additions, 642 FP multiplications, + * (or, 440 additions, 0 multiplications, 642 fused multiply/add), + * 295 stack variables, 31 constants, and 256 memory accesses + */ +#include "n1f.h" + +static void n1fv_128(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP903989293, +0.903989293123443331586200297230537048710132025); + DVK(KP941544065, +0.941544065183020778412509402599502357185589796); + DVK(KP357805721, +0.357805721314524104672487743774474392487532769); + DVK(KP472964775, +0.472964775891319928124438237972992463904131113); + DVK(KP857728610, +0.857728610000272069902269984284770137042490799); + DVK(KP970031253, +0.970031253194543992603984207286100251456865962); + DVK(KP250486960, +0.250486960191305461595702160124721208578685568); + DVK(KP998795456, +0.998795456205172392714771604759100694443203615); + DVK(KP740951125, +0.740951125354959091175616897495162729728955309); + DVK(KP599376933, +0.599376933681923766271389869014404232837890546); + DVK(KP906347169, +0.906347169019147157946142717268914412664134293); + DVK(KP049126849, +0.049126849769467254105343321271313617079695752); + DVK(KP989176509, +0.989176509964780973451673738016243063983689533); + DVK(KP803207531, +0.803207531480644909806676512963141923879569427); + DVK(KP741650546, +0.741650546272035369581266691172079863842265220); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP148335987, +0.148335987538347428753676511486911367000625355); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP303346683, +0.303346683607342391675883946941299872384187453); + DVK(KP534511135, +0.534511135950791641089685961295362908582039528); + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP820678790, +0.820678790828660330972281985331011598767386482); + DVK(KP098491403, +0.098491403357164253077197521291327432293052451); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(256, is), MAKE_VOLATILE_STRIDE(256, os)) { + V T6a, T5J, T6b, T5K, T6B, T6C, T6J, T6A, T6o, T6j, T6r, T68, T6e, T5O, T5R; + V T6d, T6D, T6K; + { + V Tad, TcZ, T6Z, T8T, T4U, Tr, Tfq, TgG, Ted, Tgf, Td0, Tcc, T9k, T84, Tb6; + V Tbt, Td8, TdK, TeK, Tgq, TeV, Tgt, T7q, T94, T3p, T5X, T7B, T97, T2G, T5U; + V TbD, Tc0, Tdf, TdN, Tf5, Tgx, Tfg, TgA, T7J, T9b, T4E, T64, T7U, T9e, T3V; + V T61, Td2, Td3, T85, T72, T4V, TI, Tcd, Tas, TgH, Tek, Tgg, Tft, T86, T75; + V T4W, TZ, TaI, Tcg, Tdr, TdG, Tgi, Tet, Tgj, Teq, T8X, T7a, T5M, T1B, T8W; + V T7d, T5N, T1s, TaX, Tcf, Tdo, TdH, Tgl, TeC, Tgm, Tez, T90, T7h, T5P, T2c; + V T8Z, T7k, T5Q, T23, T3Y, T49, TdL, Tdb, Tbu, Tbl, Tgu, TeR, Tgr, TeY, Tf6; + V TbG, T5V, T3s, T5Y, T3f, T95, T7E, T98, T7x, T4g, T4f, T4q, TbH, T41, TbI; + V T44, T4h, T4j, T4k, Tf9, TbN; + { + V Tu, TF, Ty, TL, TW, Tah, Tx, Tag, Tee, Tz, TM, TN, Teh, Tan, TP; + V TQ; + { + V TeG, T2A, Tbq, TeT, Tbp, TeH, T3m, T2x, Td6, T7o, T2q, T3l, T7z, Tbr, T2D; + V T82, T83; + { + V Ta7, T3, Ta8, T4O, Taa, Tab, Ta, T4P, Te, Tc9, Th, Tca, Tl, Tc6, Tc7; + V To; + { + V T1, T2, T4M, T4N; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 64)]), ivs, &(xi[0])); + T4M = LD(&(xi[WS(is, 32)]), ivs, &(xi[0])); + T4N = LD(&(xi[WS(is, 96)]), ivs, &(xi[0])); + { + V T4, T5, T7, T8; + T4 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 80)]), ivs, &(xi[0])); + T7 = LD(&(xi[WS(is, 112)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 48)]), ivs, &(xi[0])); + { + V Tc, T6, T9, Td, Tf, Tg; + Tc = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Ta7 = VADD(T1, T2); + T3 = VSUB(T1, T2); + Ta8 = VADD(T4M, T4N); + T4O = VSUB(T4M, T4N); + Taa = VADD(T4, T5); + T6 = VSUB(T4, T5); + Tab = VADD(T7, T8); + T9 = VSUB(T7, T8); + Td = LD(&(xi[WS(is, 72)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 40)]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 104)]), ivs, &(xi[0])); + { + V Tj, Tk, Tm, Tn; + Tj = LD(&(xi[WS(is, 120)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 56)]), ivs, &(xi[0])); + Tm = LD(&(xi[WS(is, 88)]), ivs, &(xi[0])); + Tn = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + Ta = VADD(T6, T9); + T4P = VSUB(T9, T6); + Te = VSUB(Tc, Td); + Tc9 = VADD(Tc, Td); + Th = VSUB(Tf, Tg); + Tca = VADD(Tf, Tg); + Tl = VSUB(Tj, Tk); + Tc6 = VADD(Tj, Tk); + Tc7 = VADD(Tn, Tm); + To = VSUB(Tm, Tn); + } + } + } + } + { + V T6X, Tb, Te9, Ta9, Tcb, Tea, T4R, Ti, Tfo, Tac, Tp, T4S, Tc8, Teb, T4Q; + T6X = VFNMS(LDK(KP707106781), Ta, T3); + Tb = VFMA(LDK(KP707106781), Ta, T3); + Te9 = VSUB(Ta7, Ta8); + Ta9 = VADD(Ta7, Ta8); + Tcb = VADD(Tc9, Tca); + Tea = VSUB(Tc9, Tca); + T4R = VFMA(LDK(KP414213562), Te, Th); + Ti = VFNMS(LDK(KP414213562), Th, Te); + Tfo = VSUB(Tab, Taa); + Tac = VADD(Taa, Tab); + Tp = VFNMS(LDK(KP414213562), To, Tl); + T4S = VFMA(LDK(KP414213562), Tl, To); + Tc8 = VADD(Tc6, Tc7); + Teb = VSUB(Tc6, Tc7); + T4Q = VFNMS(LDK(KP707106781), T4P, T4O); + T82 = VFMA(LDK(KP707106781), T4P, T4O); + { + V T4T, T6Y, Tq, Tfp, Tec; + T4T = VSUB(T4R, T4S); + T6Y = VADD(T4R, T4S); + T83 = VSUB(Tp, Ti); + Tq = VADD(Ti, Tp); + Tfp = VSUB(Teb, Tea); + Tec = VADD(Tea, Teb); + Tad = VSUB(Ta9, Tac); + TcZ = VADD(Ta9, Tac); + T6Z = VFMA(LDK(KP923879532), T6Y, T6X); + T8T = VFNMS(LDK(KP923879532), T6Y, T6X); + T4U = VFMA(LDK(KP923879532), T4T, T4Q); + T6a = VFNMS(LDK(KP923879532), T4T, T4Q); + Tr = VFMA(LDK(KP923879532), Tq, Tb); + T5J = VFNMS(LDK(KP923879532), Tq, Tb); + Tfq = VFMA(LDK(KP707106781), Tfp, Tfo); + TgG = VFNMS(LDK(KP707106781), Tfp, Tfo); + Ted = VFMA(LDK(KP707106781), Tec, Te9); + Tgf = VFNMS(LDK(KP707106781), Tec, Te9); + Td0 = VADD(Tcb, Tc8); + Tcc = VSUB(Tc8, Tcb); + } + } + } + { + V T2i, T3j, Tb2, T2B, Tb5, T3k, T2p, T2C; + { + V T2m, Tb0, Tb1, Tb3, T2l, T2n; + { + V T2g, T2h, T3h, T3i, T2j, T2k; + T2g = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T2h = LD(&(xi[WS(is, 65)]), ivs, &(xi[WS(is, 1)])); + T3h = LD(&(xi[WS(is, 33)]), ivs, &(xi[WS(is, 1)])); + T3i = LD(&(xi[WS(is, 97)]), ivs, &(xi[WS(is, 1)])); + T2j = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + T2k = LD(&(xi[WS(is, 81)]), ivs, &(xi[WS(is, 1)])); + T2m = LD(&(xi[WS(is, 113)]), ivs, &(xi[WS(is, 1)])); + T9k = VFNMS(LDK(KP923879532), T83, T82); + T84 = VFMA(LDK(KP923879532), T83, T82); + T2i = VSUB(T2g, T2h); + Tb0 = VADD(T2g, T2h); + T3j = VSUB(T3h, T3i); + Tb1 = VADD(T3h, T3i); + Tb3 = VADD(T2j, T2k); + T2l = VSUB(T2j, T2k); + T2n = LD(&(xi[WS(is, 49)]), ivs, &(xi[WS(is, 1)])); + } + { + V T2r, T2s, T2u, T2v; + T2r = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T2s = LD(&(xi[WS(is, 73)]), ivs, &(xi[WS(is, 1)])); + T2u = LD(&(xi[WS(is, 41)]), ivs, &(xi[WS(is, 1)])); + T2v = LD(&(xi[WS(is, 105)]), ivs, &(xi[WS(is, 1)])); + TeG = VSUB(Tb0, Tb1); + Tb2 = VADD(Tb0, Tb1); + { + V T2y, T2z, Tb4, T2o, Tbn, T2t, Tbo, T2w; + T2y = LD(&(xi[WS(is, 121)]), ivs, &(xi[WS(is, 1)])); + T2z = LD(&(xi[WS(is, 57)]), ivs, &(xi[WS(is, 1)])); + Tb4 = VADD(T2m, T2n); + T2o = VSUB(T2m, T2n); + Tbn = VADD(T2r, T2s); + T2t = VSUB(T2r, T2s); + Tbo = VADD(T2u, T2v); + T2w = VSUB(T2u, T2v); + T2B = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + T2A = VSUB(T2y, T2z); + Tbq = VADD(T2y, T2z); + TeT = VSUB(Tb3, Tb4); + Tb5 = VADD(Tb3, Tb4); + T3k = VSUB(T2l, T2o); + T2p = VADD(T2l, T2o); + Tbp = VADD(Tbn, Tbo); + TeH = VSUB(Tbn, Tbo); + T3m = VFMA(LDK(KP414213562), T2t, T2w); + T2x = VFNMS(LDK(KP414213562), T2w, T2t); + T2C = LD(&(xi[WS(is, 89)]), ivs, &(xi[WS(is, 1)])); + } + } + } + Td6 = VADD(Tb2, Tb5); + Tb6 = VSUB(Tb2, Tb5); + T7o = VFNMS(LDK(KP707106781), T2p, T2i); + T2q = VFMA(LDK(KP707106781), T2p, T2i); + T3l = VFMA(LDK(KP707106781), T3k, T3j); + T7z = VFNMS(LDK(KP707106781), T3k, T3j); + Tbr = VADD(T2B, T2C); + T2D = VSUB(T2B, T2C); + } + { + V Tf1, Tfe, Tf2, TbZ, T3M, T4B, Tdd, T3F, T7H, T4A, T7S, TbW, Tf3, T4C, T3T; + { + V T3x, T4y, Tbz, T3Q, TbC, T4z, T3E, T3R, T3P, TbU, TbV, T3S; + { + V T3y, T3z, T3B, T3C; + { + V T3v, T3w, T4w, T4x; + T3v = LD(&(xi[WS(is, 127)]), ivs, &(xi[WS(is, 1)])); + T3w = LD(&(xi[WS(is, 63)]), ivs, &(xi[WS(is, 1)])); + T4w = LD(&(xi[WS(is, 95)]), ivs, &(xi[WS(is, 1)])); + T4x = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + T3y = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + { + V Tbs, TeI, T3n, T2E, Tbx; + Tbs = VADD(Tbq, Tbr); + TeI = VSUB(Tbq, Tbr); + T3n = VFNMS(LDK(KP414213562), T2A, T2D); + T2E = VFMA(LDK(KP414213562), T2D, T2A); + T3x = VSUB(T3v, T3w); + Tbx = VADD(T3v, T3w); + { + V Tby, Td7, TeJ, TeU; + T4y = VSUB(T4w, T4x); + Tby = VADD(T4x, T4w); + Td7 = VADD(Tbp, Tbs); + Tbt = VSUB(Tbp, Tbs); + TeJ = VADD(TeH, TeI); + TeU = VSUB(TeH, TeI); + { + V T7p, T3o, T7A, T2F; + T7p = VSUB(T3m, T3n); + T3o = VADD(T3m, T3n); + T7A = VSUB(T2x, T2E); + T2F = VADD(T2x, T2E); + Tbz = VADD(Tbx, Tby); + Tf1 = VSUB(Tbx, Tby); + Td8 = VADD(Td6, Td7); + TdK = VSUB(Td6, Td7); + TeK = VFMA(LDK(KP707106781), TeJ, TeG); + Tgq = VFNMS(LDK(KP707106781), TeJ, TeG); + TeV = VFMA(LDK(KP707106781), TeU, TeT); + Tgt = VFNMS(LDK(KP707106781), TeU, TeT); + T7q = VFMA(LDK(KP923879532), T7p, T7o); + T94 = VFNMS(LDK(KP923879532), T7p, T7o); + T3p = VFMA(LDK(KP923879532), T3o, T3l); + T5X = VFNMS(LDK(KP923879532), T3o, T3l); + T7B = VFNMS(LDK(KP923879532), T7A, T7z); + T97 = VFMA(LDK(KP923879532), T7A, T7z); + T2G = VFMA(LDK(KP923879532), T2F, T2q); + T5U = VFNMS(LDK(KP923879532), T2F, T2q); + T3z = LD(&(xi[WS(is, 79)]), ivs, &(xi[WS(is, 1)])); + } + } + } + T3B = LD(&(xi[WS(is, 111)]), ivs, &(xi[WS(is, 1)])); + T3C = LD(&(xi[WS(is, 47)]), ivs, &(xi[WS(is, 1)])); + } + { + V T3G, T3H, T3J, T3K; + T3G = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T3H = LD(&(xi[WS(is, 71)]), ivs, &(xi[WS(is, 1)])); + T3J = LD(&(xi[WS(is, 103)]), ivs, &(xi[WS(is, 1)])); + T3K = LD(&(xi[WS(is, 39)]), ivs, &(xi[WS(is, 1)])); + { + V T3N, T3A, TbA, T3D, TbB, T3I, TbX, T3L, TbY, T3O; + T3N = LD(&(xi[WS(is, 119)]), ivs, &(xi[WS(is, 1)])); + T3A = VSUB(T3y, T3z); + TbA = VADD(T3y, T3z); + T3D = VSUB(T3B, T3C); + TbB = VADD(T3B, T3C); + T3I = VSUB(T3G, T3H); + TbX = VADD(T3G, T3H); + T3L = VSUB(T3J, T3K); + TbY = VADD(T3K, T3J); + T3O = LD(&(xi[WS(is, 55)]), ivs, &(xi[WS(is, 1)])); + T3Q = LD(&(xi[WS(is, 87)]), ivs, &(xi[WS(is, 1)])); + Tfe = VSUB(TbB, TbA); + TbC = VADD(TbA, TbB); + T4z = VSUB(T3D, T3A); + T3E = VADD(T3A, T3D); + T3R = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + Tf2 = VSUB(TbX, TbY); + TbZ = VADD(TbX, TbY); + T3M = VFMA(LDK(KP414213562), T3L, T3I); + T4B = VFNMS(LDK(KP414213562), T3I, T3L); + T3P = VSUB(T3N, T3O); + TbU = VADD(T3N, T3O); + } + } + } + Tdd = VADD(Tbz, TbC); + TbD = VSUB(Tbz, TbC); + TbV = VADD(T3R, T3Q); + T3S = VSUB(T3Q, T3R); + T3F = VFMA(LDK(KP707106781), T3E, T3x); + T7H = VFNMS(LDK(KP707106781), T3E, T3x); + T4A = VFMA(LDK(KP707106781), T4z, T4y); + T7S = VFNMS(LDK(KP707106781), T4z, T4y); + TbW = VADD(TbU, TbV); + Tf3 = VSUB(TbU, TbV); + T4C = VFMA(LDK(KP414213562), T3P, T3S); + T3T = VFNMS(LDK(KP414213562), T3S, T3P); + } + { + V TD, Tae, TE, TJ, TK, TU, TV; + { + V Ts, Tt, Tde, Tf4, Tff; + Ts = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tt = LD(&(xi[WS(is, 68)]), ivs, &(xi[0])); + TD = LD(&(xi[WS(is, 36)]), ivs, &(xi[0])); + Tde = VADD(TbZ, TbW); + Tc0 = VSUB(TbW, TbZ); + Tf4 = VADD(Tf2, Tf3); + Tff = VSUB(Tf3, Tf2); + { + V T7I, T4D, T7T, T3U; + T7I = VSUB(T4C, T4B); + T4D = VADD(T4B, T4C); + T7T = VSUB(T3T, T3M); + T3U = VADD(T3M, T3T); + Tae = VADD(Ts, Tt); + Tu = VSUB(Ts, Tt); + Tdf = VADD(Tdd, Tde); + TdN = VSUB(Tdd, Tde); + Tf5 = VFMA(LDK(KP707106781), Tf4, Tf1); + Tgx = VFNMS(LDK(KP707106781), Tf4, Tf1); + Tfg = VFMA(LDK(KP707106781), Tff, Tfe); + TgA = VFNMS(LDK(KP707106781), Tff, Tfe); + T7J = VFMA(LDK(KP923879532), T7I, T7H); + T9b = VFNMS(LDK(KP923879532), T7I, T7H); + T4E = VFMA(LDK(KP923879532), T4D, T4A); + T64 = VFNMS(LDK(KP923879532), T4D, T4A); + T7U = VFNMS(LDK(KP923879532), T7T, T7S); + T9e = VFMA(LDK(KP923879532), T7T, T7S); + T3V = VFMA(LDK(KP923879532), T3U, T3F); + T61 = VFNMS(LDK(KP923879532), T3U, T3F); + TE = LD(&(xi[WS(is, 100)]), ivs, &(xi[0])); + } + } + TJ = LD(&(xi[WS(is, 124)]), ivs, &(xi[0])); + TK = LD(&(xi[WS(is, 60)]), ivs, &(xi[0])); + TU = LD(&(xi[WS(is, 92)]), ivs, &(xi[0])); + TV = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + { + V Tal, Tam, Tv, Tw, Taf; + Tv = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + Tw = LD(&(xi[WS(is, 84)]), ivs, &(xi[0])); + Taf = VADD(TD, TE); + TF = VSUB(TD, TE); + Ty = LD(&(xi[WS(is, 116)]), ivs, &(xi[0])); + TL = VSUB(TJ, TK); + Tal = VADD(TJ, TK); + TW = VSUB(TU, TV); + Tam = VADD(TV, TU); + Tah = VADD(Tv, Tw); + Tx = VSUB(Tv, Tw); + Tag = VADD(Tae, Taf); + Tee = VSUB(Tae, Taf); + Tz = LD(&(xi[WS(is, 52)]), ivs, &(xi[0])); + TM = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + TN = LD(&(xi[WS(is, 76)]), ivs, &(xi[0])); + Teh = VSUB(Tal, Tam); + Tan = VADD(Tal, Tam); + TP = LD(&(xi[WS(is, 108)]), ivs, &(xi[0])); + TQ = LD(&(xi[WS(is, 44)]), ivs, &(xi[0])); + } + } + } + } + { + V Tev, TeA, Tdm, TaP, Tew, TaV, T1U, T29, T7f, T1N, T28, T7i, Tex, TaS, T21; + V T2a; + { + V Tem, Ter, Ten, TaD, T1j, T1y, TaA, Tdp, T1c, T78, T7b, T1x, TaG, Teo, T1z; + V T1q; + { + V T14, T1v, Taw, Taz, T1b, T1w, T1n, T1o, T1m, TaE, TaF, T1p; + { + V Tau, Tav, T15, T16, T18, T19; + { + V T12, Tai, TA, Tao, TO, T13; + T12 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tai = VADD(Ty, Tz); + TA = VSUB(Ty, Tz); + Tao = VADD(TM, TN); + TO = VSUB(TM, TN); + T13 = LD(&(xi[WS(is, 66)]), ivs, &(xi[0])); + { + V T1t, Tap, TR, Taj, Tef, TG, TB, T1u; + T1t = LD(&(xi[WS(is, 34)]), ivs, &(xi[0])); + Tap = VADD(TP, TQ); + TR = VSUB(TP, TQ); + Taj = VADD(Tah, Tai); + Tef = VSUB(Tah, Tai); + TG = VSUB(Tx, TA); + TB = VADD(Tx, TA); + Tau = VADD(T12, T13); + T14 = VSUB(T12, T13); + T1u = LD(&(xi[WS(is, 98)]), ivs, &(xi[0])); + { + V Taq, Tei, TX, TS, Tak; + Taq = VADD(Tao, Tap); + Tei = VSUB(Tap, Tao); + TX = VSUB(TR, TO); + TS = VADD(TO, TR); + Tak = VSUB(Tag, Taj); + Td2 = VADD(Tag, Taj); + { + V Teg, Tfs, T71, TH; + Teg = VFNMS(LDK(KP414213562), Tef, Tee); + Tfs = VFMA(LDK(KP414213562), Tee, Tef); + T71 = VFNMS(LDK(KP707106781), TG, TF); + TH = VFMA(LDK(KP707106781), TG, TF); + { + V T70, TC, Tar, Tej, Tfr; + T70 = VFNMS(LDK(KP707106781), TB, Tu); + TC = VFMA(LDK(KP707106781), TB, Tu); + Tar = VSUB(Tan, Taq); + Td3 = VADD(Tan, Taq); + Tej = VFNMS(LDK(KP414213562), Tei, Teh); + Tfr = VFMA(LDK(KP414213562), Teh, Tei); + { + V T74, TY, T73, TT; + T74 = VFNMS(LDK(KP707106781), TX, TW); + TY = VFMA(LDK(KP707106781), TX, TW); + T73 = VFNMS(LDK(KP707106781), TS, TL); + TT = VFMA(LDK(KP707106781), TS, TL); + T85 = VFNMS(LDK(KP668178637), T70, T71); + T72 = VFMA(LDK(KP668178637), T71, T70); + T4V = VFMA(LDK(KP198912367), TC, TH); + TI = VFNMS(LDK(KP198912367), TH, TC); + Tcd = VSUB(Tar, Tak); + Tas = VADD(Tak, Tar); + TgH = VSUB(Tej, Teg); + Tek = VADD(Teg, Tej); + Tgg = VADD(Tfs, Tfr); + Tft = VSUB(Tfr, Tfs); + T86 = VFNMS(LDK(KP668178637), T73, T74); + T75 = VFMA(LDK(KP668178637), T74, T73); + T4W = VFMA(LDK(KP198912367), TT, TY); + TZ = VFNMS(LDK(KP198912367), TY, TT); + Tav = VADD(T1t, T1u); + T1v = VSUB(T1t, T1u); + } + } + } + } + } + } + T15 = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + T16 = LD(&(xi[WS(is, 82)]), ivs, &(xi[0])); + T18 = LD(&(xi[WS(is, 114)]), ivs, &(xi[0])); + T19 = LD(&(xi[WS(is, 50)]), ivs, &(xi[0])); + { + V T1d, T1e, T1g, T1h, Tax, T17, Tay, T1a; + T1d = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Taw = VADD(Tau, Tav); + Tem = VSUB(Tau, Tav); + T1e = LD(&(xi[WS(is, 74)]), ivs, &(xi[0])); + T1g = LD(&(xi[WS(is, 42)]), ivs, &(xi[0])); + T1h = LD(&(xi[WS(is, 106)]), ivs, &(xi[0])); + Tax = VADD(T15, T16); + T17 = VSUB(T15, T16); + Tay = VADD(T18, T19); + T1a = VSUB(T18, T19); + { + V T1k, T1f, TaB, T1i, TaC, T1l; + T1k = LD(&(xi[WS(is, 122)]), ivs, &(xi[0])); + T1f = VSUB(T1d, T1e); + TaB = VADD(T1d, T1e); + T1i = VSUB(T1g, T1h); + TaC = VADD(T1g, T1h); + T1l = LD(&(xi[WS(is, 58)]), ivs, &(xi[0])); + Taz = VADD(Tax, Tay); + Ter = VSUB(Tax, Tay); + T1b = VADD(T17, T1a); + T1w = VSUB(T17, T1a); + T1n = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + T1o = LD(&(xi[WS(is, 90)]), ivs, &(xi[0])); + Ten = VSUB(TaB, TaC); + TaD = VADD(TaB, TaC); + T1j = VFNMS(LDK(KP414213562), T1i, T1f); + T1y = VFMA(LDK(KP414213562), T1f, T1i); + T1m = VSUB(T1k, T1l); + TaE = VADD(T1k, T1l); + } + } + } + TaA = VSUB(Taw, Taz); + Tdp = VADD(Taw, Taz); + TaF = VADD(T1n, T1o); + T1p = VSUB(T1n, T1o); + T1c = VFMA(LDK(KP707106781), T1b, T14); + T78 = VFNMS(LDK(KP707106781), T1b, T14); + T7b = VFNMS(LDK(KP707106781), T1w, T1v); + T1x = VFMA(LDK(KP707106781), T1w, T1v); + TaG = VADD(TaE, TaF); + Teo = VSUB(TaE, TaF); + T1z = VFNMS(LDK(KP414213562), T1m, T1p); + T1q = VFMA(LDK(KP414213562), T1p, T1m); + } + { + V T1F, T26, T1Q, TaT, TaL, TaO, T27, T1M, T1Y, T1Z, TaU, T1T, TaQ, T1X, T20; + V TaR; + { + V T24, TaJ, T25, T1G, T1H, T1J, T1K, T1D, T1E; + T1D = LD(&(xi[WS(is, 126)]), ivs, &(xi[0])); + T1E = LD(&(xi[WS(is, 62)]), ivs, &(xi[0])); + T24 = LD(&(xi[WS(is, 94)]), ivs, &(xi[0])); + { + V TaH, Tdq, Tes, Tep; + TaH = VSUB(TaD, TaG); + Tdq = VADD(TaD, TaG); + Tes = VSUB(Ten, Teo); + Tep = VADD(Ten, Teo); + { + V T79, T1A, T7c, T1r; + T79 = VSUB(T1y, T1z); + T1A = VADD(T1y, T1z); + T7c = VSUB(T1j, T1q); + T1r = VADD(T1j, T1q); + TaJ = VADD(T1D, T1E); + T1F = VSUB(T1D, T1E); + TaI = VFNMS(LDK(KP414213562), TaH, TaA); + Tcg = VFMA(LDK(KP414213562), TaA, TaH); + Tdr = VADD(Tdp, Tdq); + TdG = VSUB(Tdp, Tdq); + Tgi = VFNMS(LDK(KP707106781), Tes, Ter); + Tet = VFMA(LDK(KP707106781), Tes, Ter); + Tgj = VFNMS(LDK(KP707106781), Tep, Tem); + Teq = VFMA(LDK(KP707106781), Tep, Tem); + T8X = VFNMS(LDK(KP923879532), T79, T78); + T7a = VFMA(LDK(KP923879532), T79, T78); + T5M = VFNMS(LDK(KP923879532), T1A, T1x); + T1B = VFMA(LDK(KP923879532), T1A, T1x); + T8W = VFMA(LDK(KP923879532), T7c, T7b); + T7d = VFNMS(LDK(KP923879532), T7c, T7b); + T5N = VFNMS(LDK(KP923879532), T1r, T1c); + T1s = VFMA(LDK(KP923879532), T1r, T1c); + T25 = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + } + } + T1G = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + T1H = LD(&(xi[WS(is, 78)]), ivs, &(xi[0])); + T1J = LD(&(xi[WS(is, 110)]), ivs, &(xi[0])); + T1K = LD(&(xi[WS(is, 46)]), ivs, &(xi[0])); + { + V T1R, T1I, TaM, T1L, TaN, T1S, T1O, T1P, TaK, T1V, T1W; + T1O = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T1P = LD(&(xi[WS(is, 70)]), ivs, &(xi[0])); + T26 = VSUB(T24, T25); + TaK = VADD(T25, T24); + T1R = LD(&(xi[WS(is, 102)]), ivs, &(xi[0])); + T1I = VSUB(T1G, T1H); + TaM = VADD(T1G, T1H); + T1L = VSUB(T1J, T1K); + TaN = VADD(T1J, T1K); + T1Q = VSUB(T1O, T1P); + TaT = VADD(T1O, T1P); + Tev = VSUB(TaJ, TaK); + TaL = VADD(TaJ, TaK); + T1S = LD(&(xi[WS(is, 38)]), ivs, &(xi[0])); + T1V = LD(&(xi[WS(is, 118)]), ivs, &(xi[0])); + T1W = LD(&(xi[WS(is, 54)]), ivs, &(xi[0])); + TeA = VSUB(TaN, TaM); + TaO = VADD(TaM, TaN); + T27 = VSUB(T1L, T1I); + T1M = VADD(T1I, T1L); + T1Y = LD(&(xi[WS(is, 86)]), ivs, &(xi[0])); + T1Z = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + TaU = VADD(T1S, T1R); + T1T = VSUB(T1R, T1S); + TaQ = VADD(T1V, T1W); + T1X = VSUB(T1V, T1W); + } + } + Tdm = VADD(TaL, TaO); + TaP = VSUB(TaL, TaO); + T20 = VSUB(T1Y, T1Z); + TaR = VADD(T1Z, T1Y); + Tew = VSUB(TaT, TaU); + TaV = VADD(TaT, TaU); + T1U = VFMA(LDK(KP414213562), T1T, T1Q); + T29 = VFNMS(LDK(KP414213562), T1Q, T1T); + T7f = VFNMS(LDK(KP707106781), T1M, T1F); + T1N = VFMA(LDK(KP707106781), T1M, T1F); + T28 = VFMA(LDK(KP707106781), T27, T26); + T7i = VFNMS(LDK(KP707106781), T27, T26); + Tex = VSUB(TaQ, TaR); + TaS = VADD(TaQ, TaR); + T21 = VFNMS(LDK(KP414213562), T20, T1X); + T2a = VFMA(LDK(KP414213562), T1X, T20); + } + } + { + V T2J, T2U, T30, T3b, TeL, Tb9, TeO, Tbg, T2M, Tba, T2P, Tbb, T34, Tbh, T33; + V T35; + { + V T2H, T2I, T2S, T2T, T2Y, T2Z, T39, T3a; + T2H = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + { + V Tdn, TaW, Tey, TeB; + Tdn = VADD(TaV, TaS); + TaW = VSUB(TaS, TaV); + Tey = VADD(Tew, Tex); + TeB = VSUB(Tex, Tew); + { + V T2b, T7g, T22, T7j; + T2b = VADD(T29, T2a); + T7g = VSUB(T2a, T29); + T22 = VADD(T1U, T21); + T7j = VSUB(T21, T1U); + TaX = VFNMS(LDK(KP414213562), TaW, TaP); + Tcf = VFMA(LDK(KP414213562), TaP, TaW); + Tdo = VADD(Tdm, Tdn); + TdH = VSUB(Tdm, Tdn); + Tgl = VFNMS(LDK(KP707106781), TeB, TeA); + TeC = VFMA(LDK(KP707106781), TeB, TeA); + Tgm = VFNMS(LDK(KP707106781), Tey, Tev); + Tez = VFMA(LDK(KP707106781), Tey, Tev); + T90 = VFNMS(LDK(KP923879532), T7g, T7f); + T7h = VFMA(LDK(KP923879532), T7g, T7f); + T5P = VFNMS(LDK(KP923879532), T2b, T28); + T2c = VFMA(LDK(KP923879532), T2b, T28); + T8Z = VFMA(LDK(KP923879532), T7j, T7i); + T7k = VFNMS(LDK(KP923879532), T7j, T7i); + T5Q = VFNMS(LDK(KP923879532), T22, T1N); + T23 = VFMA(LDK(KP923879532), T22, T1N); + T2I = LD(&(xi[WS(is, 69)]), ivs, &(xi[WS(is, 1)])); + } + } + T2S = LD(&(xi[WS(is, 37)]), ivs, &(xi[WS(is, 1)])); + T2T = LD(&(xi[WS(is, 101)]), ivs, &(xi[WS(is, 1)])); + T2Y = LD(&(xi[WS(is, 125)]), ivs, &(xi[WS(is, 1)])); + T2Z = LD(&(xi[WS(is, 61)]), ivs, &(xi[WS(is, 1)])); + T39 = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + T3a = LD(&(xi[WS(is, 93)]), ivs, &(xi[WS(is, 1)])); + { + V T2K, Tbe, Tbf, T2L, T2N, T2O, Tb7, Tb8, T31, T32; + T2K = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + T2J = VSUB(T2H, T2I); + Tb7 = VADD(T2H, T2I); + T2U = VSUB(T2S, T2T); + Tb8 = VADD(T2S, T2T); + T30 = VSUB(T2Y, T2Z); + Tbe = VADD(T2Y, T2Z); + T3b = VSUB(T39, T3a); + Tbf = VADD(T39, T3a); + T2L = LD(&(xi[WS(is, 85)]), ivs, &(xi[WS(is, 1)])); + T2N = LD(&(xi[WS(is, 117)]), ivs, &(xi[WS(is, 1)])); + T2O = LD(&(xi[WS(is, 53)]), ivs, &(xi[WS(is, 1)])); + TeL = VSUB(Tb7, Tb8); + Tb9 = VADD(Tb7, Tb8); + T31 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T32 = LD(&(xi[WS(is, 77)]), ivs, &(xi[WS(is, 1)])); + TeO = VSUB(Tbe, Tbf); + Tbg = VADD(Tbe, Tbf); + T2M = VSUB(T2K, T2L); + Tba = VADD(T2K, T2L); + T2P = VSUB(T2N, T2O); + Tbb = VADD(T2N, T2O); + T34 = LD(&(xi[WS(is, 109)]), ivs, &(xi[WS(is, 1)])); + Tbh = VADD(T31, T32); + T33 = VSUB(T31, T32); + T35 = LD(&(xi[WS(is, 45)]), ivs, &(xi[WS(is, 1)])); + } + } + { + V T4d, T4e, T4o, T4p; + { + V T2X, T3q, T7t, T7C, T3r, T3e, T7D, T7w; + { + V T47, TbE, Tbd, Td9, TeW, TeN, T7s, T2W, T7r, T2R, TeP, Tbj, T37, T3c, T48; + { + V T3W, T3X, TeM, Tbc, T2Q, T2V, Tbi, T36; + T3W = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T3X = LD(&(xi[WS(is, 67)]), ivs, &(xi[WS(is, 1)])); + TeM = VSUB(Tba, Tbb); + Tbc = VADD(Tba, Tbb); + T2Q = VADD(T2M, T2P); + T2V = VSUB(T2M, T2P); + T47 = LD(&(xi[WS(is, 99)]), ivs, &(xi[WS(is, 1)])); + Tbi = VADD(T34, T35); + T36 = VSUB(T34, T35); + TbE = VADD(T3W, T3X); + T3Y = VSUB(T3W, T3X); + Tbd = VSUB(Tb9, Tbc); + Td9 = VADD(Tb9, Tbc); + TeW = VFMA(LDK(KP414213562), TeL, TeM); + TeN = VFNMS(LDK(KP414213562), TeM, TeL); + T7s = VFNMS(LDK(KP707106781), T2V, T2U); + T2W = VFMA(LDK(KP707106781), T2V, T2U); + T7r = VFNMS(LDK(KP707106781), T2Q, T2J); + T2R = VFMA(LDK(KP707106781), T2Q, T2J); + TeP = VSUB(Tbh, Tbi); + Tbj = VADD(Tbh, Tbi); + T37 = VADD(T33, T36); + T3c = VSUB(T33, T36); + T48 = LD(&(xi[WS(is, 35)]), ivs, &(xi[WS(is, 1)])); + } + T2X = VFNMS(LDK(KP198912367), T2W, T2R); + T3q = VFMA(LDK(KP198912367), T2R, T2W); + T7t = VFMA(LDK(KP668178637), T7s, T7r); + T7C = VFNMS(LDK(KP668178637), T7r, T7s); + { + V Tbk, Tda, TeX, TeQ; + Tbk = VSUB(Tbg, Tbj); + Tda = VADD(Tbg, Tbj); + TeX = VFNMS(LDK(KP414213562), TeO, TeP); + TeQ = VFMA(LDK(KP414213562), TeP, TeO); + { + V T7v, T3d, T7u, T38, TbF; + T7v = VFNMS(LDK(KP707106781), T3c, T3b); + T3d = VFMA(LDK(KP707106781), T3c, T3b); + T7u = VFNMS(LDK(KP707106781), T37, T30); + T38 = VFMA(LDK(KP707106781), T37, T30); + T49 = VSUB(T47, T48); + TbF = VADD(T48, T47); + TdL = VSUB(Td9, Tda); + Tdb = VADD(Td9, Tda); + Tbu = VSUB(Tbd, Tbk); + Tbl = VADD(Tbd, Tbk); + Tgu = VSUB(TeN, TeQ); + TeR = VADD(TeN, TeQ); + Tgr = VSUB(TeW, TeX); + TeY = VADD(TeW, TeX); + T3r = VFNMS(LDK(KP198912367), T38, T3d); + T3e = VFMA(LDK(KP198912367), T3d, T38); + T7D = VFMA(LDK(KP668178637), T7u, T7v); + T7w = VFNMS(LDK(KP668178637), T7v, T7u); + Tf6 = VSUB(TbE, TbF); + TbG = VADD(TbE, TbF); + } + } + } + T4d = LD(&(xi[WS(is, 123)]), ivs, &(xi[WS(is, 1)])); + T5V = VSUB(T3q, T3r); + T3s = VADD(T3q, T3r); + T5Y = VSUB(T2X, T3e); + T3f = VADD(T2X, T3e); + T95 = VSUB(T7D, T7C); + T7E = VADD(T7C, T7D); + T98 = VSUB(T7t, T7w); + T7x = VADD(T7t, T7w); + T4e = LD(&(xi[WS(is, 59)]), ivs, &(xi[WS(is, 1)])); + T4o = LD(&(xi[WS(is, 91)]), ivs, &(xi[WS(is, 1)])); + T4p = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + } + { + V T3Z, T40, T42, T43, TbL, TbM; + T3Z = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + T40 = LD(&(xi[WS(is, 83)]), ivs, &(xi[WS(is, 1)])); + T42 = LD(&(xi[WS(is, 115)]), ivs, &(xi[WS(is, 1)])); + T43 = LD(&(xi[WS(is, 51)]), ivs, &(xi[WS(is, 1)])); + T4g = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + T4f = VSUB(T4d, T4e); + TbL = VADD(T4d, T4e); + T4q = VSUB(T4o, T4p); + TbM = VADD(T4p, T4o); + TbH = VADD(T3Z, T40); + T41 = VSUB(T3Z, T40); + TbI = VADD(T42, T43); + T44 = VSUB(T42, T43); + T4h = LD(&(xi[WS(is, 75)]), ivs, &(xi[WS(is, 1)])); + T4j = LD(&(xi[WS(is, 107)]), ivs, &(xi[WS(is, 1)])); + T4k = LD(&(xi[WS(is, 43)]), ivs, &(xi[WS(is, 1)])); + Tf9 = VSUB(TbL, TbM); + TbN = VADD(TbL, TbM); + } + } + } + } + } + { + V TgB, Tgy, T62, T4H, T65, T4u, T9c, T7X, T9f, T7Q, Tg0, Tga, TfF, TeF, TfT; + V TfU, TfP, Tg7, TfI, Tfy, TfA, Tf0, Tfz, Tfl, Tg2, TfS; + { + V Tc1, TbS, Tfc, Tfj, TdX, Te5, TdZ, TdR, Te7, Te3, TdU, Te4; + { + V TdF, TdS, Tdx, Td5, TdO, TdE, TdC, Tdt, Tdk; + { + V Tdc, TdA, T4F, T4c, T7V, T7M, T4G, T4t, T7W, T7P, TdB, Tdj; + { + V Td1, Tdg, TbK, Tf8, Tfh, T4b, T7L, T46, T7K, TbQ, Tfa, T4r, T4m, Td4; + TdF = VSUB(TcZ, Td0); + Td1 = VADD(TcZ, Td0); + { + V TbJ, Tf7, T4a, T45; + TbJ = VADD(TbH, TbI); + Tf7 = VSUB(TbI, TbH); + T4a = VSUB(T44, T41); + T45 = VADD(T41, T44); + { + V TbO, T4i, TbP, T4l; + TbO = VADD(T4g, T4h); + T4i = VSUB(T4g, T4h); + TbP = VADD(T4j, T4k); + T4l = VSUB(T4j, T4k); + Tdg = VADD(TbG, TbJ); + TbK = VSUB(TbG, TbJ); + Tf8 = VFMA(LDK(KP414213562), Tf7, Tf6); + Tfh = VFNMS(LDK(KP414213562), Tf6, Tf7); + T4b = VFMA(LDK(KP707106781), T4a, T49); + T7L = VFNMS(LDK(KP707106781), T4a, T49); + T46 = VFMA(LDK(KP707106781), T45, T3Y); + T7K = VFNMS(LDK(KP707106781), T45, T3Y); + TbQ = VADD(TbO, TbP); + Tfa = VSUB(TbP, TbO); + T4r = VSUB(T4l, T4i); + T4m = VADD(T4i, T4l); + Td4 = VADD(Td2, Td3); + TdS = VSUB(Td3, Td2); + } + } + Tdc = VSUB(Td8, Tdb); + TdA = VADD(Td8, Tdb); + T4F = VFNMS(LDK(KP198912367), T46, T4b); + T4c = VFMA(LDK(KP198912367), T4b, T46); + T7V = VFMA(LDK(KP668178637), T7K, T7L); + T7M = VFNMS(LDK(KP668178637), T7L, T7K); + { + V Tdh, TbR, Tfb, Tfi; + Tdh = VADD(TbN, TbQ); + TbR = VSUB(TbN, TbQ); + Tfb = VFNMS(LDK(KP414213562), Tfa, Tf9); + Tfi = VFMA(LDK(KP414213562), Tf9, Tfa); + { + V T4s, T7O, T4n, T7N, Tdi; + T4s = VFMA(LDK(KP707106781), T4r, T4q); + T7O = VFNMS(LDK(KP707106781), T4r, T4q); + T4n = VFMA(LDK(KP707106781), T4m, T4f); + T7N = VFNMS(LDK(KP707106781), T4m, T4f); + Tdx = VADD(Td1, Td4); + Td5 = VSUB(Td1, Td4); + TdO = VSUB(Tdh, Tdg); + Tdi = VADD(Tdg, Tdh); + Tc1 = VSUB(TbR, TbK); + TbS = VADD(TbK, TbR); + TgB = VSUB(Tfb, Tf8); + Tfc = VADD(Tf8, Tfb); + Tgy = VSUB(Tfi, Tfh); + Tfj = VADD(Tfh, Tfi); + T4G = VFMA(LDK(KP198912367), T4n, T4s); + T4t = VFNMS(LDK(KP198912367), T4s, T4n); + T7W = VFNMS(LDK(KP668178637), T7N, T7O); + T7P = VFMA(LDK(KP668178637), T7O, T7N); + TdB = VADD(Tdf, Tdi); + Tdj = VSUB(Tdf, Tdi); + } + } + } + T62 = VSUB(T4G, T4F); + T4H = VADD(T4F, T4G); + T65 = VSUB(T4t, T4c); + T4u = VADD(T4c, T4t); + T9c = VSUB(T7V, T7W); + T7X = VADD(T7V, T7W); + T9f = VSUB(T7P, T7M); + T7Q = VADD(T7M, T7P); + TdE = VSUB(TdB, TdA); + TdC = VADD(TdA, TdB); + Tdt = VSUB(Tdj, Tdc); + Tdk = VADD(Tdc, Tdj); + } + { + V TdT, Tdl, Tdv, TdJ, Te1, Te2, TdQ, Tdz, TdD, Tdu, Tdw; + { + V TdI, TdP, TdV, TdW, TdM, Tds, Tdy; + TdI = VADD(TdG, TdH); + TdT = VSUB(TdH, TdG); + TdP = VFNMS(LDK(KP414213562), TdO, TdN); + TdV = VFMA(LDK(KP414213562), TdN, TdO); + TdW = VFMA(LDK(KP414213562), TdK, TdL); + TdM = VFNMS(LDK(KP414213562), TdL, TdK); + Tdl = VFNMS(LDK(KP707106781), Tdk, Td5); + Tdv = VFMA(LDK(KP707106781), Tdk, Td5); + Tds = VSUB(Tdo, Tdr); + Tdy = VADD(Tdr, Tdo); + TdJ = VFMA(LDK(KP707106781), TdI, TdF); + Te1 = VFNMS(LDK(KP707106781), TdI, TdF); + TdX = VSUB(TdV, TdW); + Te2 = VADD(TdW, TdV); + Te5 = VSUB(TdP, TdM); + TdQ = VADD(TdM, TdP); + Tdz = VADD(Tdx, Tdy); + TdD = VSUB(Tdx, Tdy); + Tdu = VFNMS(LDK(KP707106781), Tdt, Tds); + Tdw = VFMA(LDK(KP707106781), Tdt, Tds); + } + TdZ = VFMA(LDK(KP923879532), TdQ, TdJ); + TdR = VFNMS(LDK(KP923879532), TdQ, TdJ); + Te7 = VFMA(LDK(KP923879532), Te2, Te1); + Te3 = VFNMS(LDK(KP923879532), Te2, Te1); + ST(&(xo[WS(os, 32)]), VFMAI(TdE, TdD), ovs, &(xo[0])); + ST(&(xo[WS(os, 96)]), VFNMSI(TdE, TdD), ovs, &(xo[0])); + ST(&(xo[0]), VADD(Tdz, TdC), ovs, &(xo[0])); + ST(&(xo[WS(os, 64)]), VSUB(Tdz, TdC), ovs, &(xo[0])); + ST(&(xo[WS(os, 16)]), VFMAI(Tdw, Tdv), ovs, &(xo[0])); + ST(&(xo[WS(os, 112)]), VFNMSI(Tdw, Tdv), ovs, &(xo[0])); + ST(&(xo[WS(os, 80)]), VFMAI(Tdu, Tdl), ovs, &(xo[0])); + ST(&(xo[WS(os, 48)]), VFNMSI(Tdu, Tdl), ovs, &(xo[0])); + TdU = VFMA(LDK(KP707106781), TdT, TdS); + Te4 = VFNMS(LDK(KP707106781), TdT, TdS); + } + } + { + V Tcx, TcJ, TcI, Tcy, TcA, Tbm, Tcp, TaZ, Tcs, Tci, Tbv, TcB, TcD, TbT, Tc2; + V TcE, Tat, TaY; + Tcx = VFNMS(LDK(KP707106781), Tas, Tad); + Tat = VFMA(LDK(KP707106781), Tas, Tad); + TaY = VADD(TaI, TaX); + TcJ = VSUB(TaX, TaI); + { + V Tce, Tch, Te8, Te6, TdY, Te0; + TcI = VFNMS(LDK(KP707106781), Tcd, Tcc); + Tce = VFMA(LDK(KP707106781), Tcd, Tcc); + Tch = VSUB(Tcf, Tcg); + Tcy = VADD(Tcg, Tcf); + Te8 = VFNMS(LDK(KP923879532), Te5, Te4); + Te6 = VFMA(LDK(KP923879532), Te5, Te4); + TdY = VFNMS(LDK(KP923879532), TdX, TdU); + Te0 = VFMA(LDK(KP923879532), TdX, TdU); + TcA = VFNMS(LDK(KP707106781), Tbl, Tb6); + Tbm = VFMA(LDK(KP707106781), Tbl, Tb6); + Tcp = VFNMS(LDK(KP923879532), TaY, Tat); + TaZ = VFMA(LDK(KP923879532), TaY, Tat); + Tcs = VFNMS(LDK(KP923879532), Tch, Tce); + Tci = VFMA(LDK(KP923879532), Tch, Tce); + ST(&(xo[WS(os, 88)]), VFNMSI(Te6, Te3), ovs, &(xo[0])); + ST(&(xo[WS(os, 40)]), VFMAI(Te6, Te3), ovs, &(xo[0])); + ST(&(xo[WS(os, 104)]), VFMAI(Te8, Te7), ovs, &(xo[0])); + ST(&(xo[WS(os, 24)]), VFNMSI(Te8, Te7), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VFMAI(Te0, TdZ), ovs, &(xo[0])); + ST(&(xo[WS(os, 120)]), VFNMSI(Te0, TdZ), ovs, &(xo[0])); + ST(&(xo[WS(os, 72)]), VFMAI(TdY, TdR), ovs, &(xo[0])); + ST(&(xo[WS(os, 56)]), VFNMSI(TdY, TdR), ovs, &(xo[0])); + Tbv = VFMA(LDK(KP707106781), Tbu, Tbt); + TcB = VFNMS(LDK(KP707106781), Tbu, Tbt); + TcD = VFNMS(LDK(KP707106781), TbS, TbD); + TbT = VFMA(LDK(KP707106781), TbS, TbD); + Tc2 = VFMA(LDK(KP707106781), Tc1, Tc0); + TcE = VFNMS(LDK(KP707106781), Tc1, Tc0); + } + { + V TcR, Tcz, TcU, TcK, Tcq, Tcl, Tct, Tc4; + { + V Tck, Tbw, Tcj, Tc3; + Tck = VFMA(LDK(KP198912367), Tbm, Tbv); + Tbw = VFNMS(LDK(KP198912367), Tbv, Tbm); + Tcj = VFMA(LDK(KP198912367), TbT, Tc2); + Tc3 = VFNMS(LDK(KP198912367), Tc2, TbT); + TcR = VFNMS(LDK(KP923879532), Tcy, Tcx); + Tcz = VFMA(LDK(KP923879532), Tcy, Tcx); + TcU = VFMA(LDK(KP923879532), TcJ, TcI); + TcK = VFNMS(LDK(KP923879532), TcJ, TcI); + Tcq = VADD(Tck, Tcj); + Tcl = VSUB(Tcj, Tck); + Tct = VSUB(Tc3, Tbw); + Tc4 = VADD(Tbw, Tc3); + } + { + V TfN, Tel, TfY, Tfu, Tfw, Tfv, TcT, TcX, TcQ, TcO, TcW, TcY, TcP, TcH, TfZ; + V TeE; + { + V Teu, TcS, TcN, TcV, TcG, TeD; + TfN = VFNMS(LDK(KP923879532), Tek, Ted); + Tel = VFMA(LDK(KP923879532), Tek, Ted); + { + V TcM, TcC, Tcr, Tcv; + TcM = VFNMS(LDK(KP668178637), TcA, TcB); + TcC = VFMA(LDK(KP668178637), TcB, TcA); + Tcr = VFNMS(LDK(KP980785280), Tcq, Tcp); + Tcv = VFMA(LDK(KP980785280), Tcq, Tcp); + { + V Tco, Tcm, Tcu, Tcw; + Tco = VFMA(LDK(KP980785280), Tcl, Tci); + Tcm = VFNMS(LDK(KP980785280), Tcl, Tci); + Tcu = VFMA(LDK(KP980785280), Tct, Tcs); + Tcw = VFNMS(LDK(KP980785280), Tct, Tcs); + { + V Tcn, Tc5, TcL, TcF; + Tcn = VFMA(LDK(KP980785280), Tc4, TaZ); + Tc5 = VFNMS(LDK(KP980785280), Tc4, TaZ); + TcL = VFNMS(LDK(KP668178637), TcD, TcE); + TcF = VFMA(LDK(KP668178637), TcE, TcD); + TfY = VFNMS(LDK(KP923879532), Tft, Tfq); + Tfu = VFMA(LDK(KP923879532), Tft, Tfq); + Tfw = VFMA(LDK(KP198912367), Teq, Tet); + Teu = VFNMS(LDK(KP198912367), Tet, Teq); + ST(&(xo[WS(os, 92)]), VFNMSI(Tcu, Tcr), ovs, &(xo[0])); + ST(&(xo[WS(os, 36)]), VFMAI(Tcu, Tcr), ovs, &(xo[0])); + ST(&(xo[WS(os, 100)]), VFMAI(Tcw, Tcv), ovs, &(xo[0])); + ST(&(xo[WS(os, 28)]), VFNMSI(Tcw, Tcv), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VFMAI(Tco, Tcn), ovs, &(xo[0])); + ST(&(xo[WS(os, 124)]), VFNMSI(Tco, Tcn), ovs, &(xo[0])); + ST(&(xo[WS(os, 68)]), VFMAI(Tcm, Tc5), ovs, &(xo[0])); + ST(&(xo[WS(os, 60)]), VFNMSI(Tcm, Tc5), ovs, &(xo[0])); + TcS = VADD(TcM, TcL); + TcN = VSUB(TcL, TcM); + TcV = VSUB(TcF, TcC); + TcG = VADD(TcC, TcF); + TeD = VFNMS(LDK(KP198912367), TeC, Tez); + Tfv = VFMA(LDK(KP198912367), Tez, TeC); + } + } + } + TcT = VFMA(LDK(KP831469612), TcS, TcR); + TcX = VFNMS(LDK(KP831469612), TcS, TcR); + TcQ = VFMA(LDK(KP831469612), TcN, TcK); + TcO = VFNMS(LDK(KP831469612), TcN, TcK); + TcW = VFNMS(LDK(KP831469612), TcV, TcU); + TcY = VFMA(LDK(KP831469612), TcV, TcU); + TcP = VFMA(LDK(KP831469612), TcG, Tcz); + TcH = VFNMS(LDK(KP831469612), TcG, Tcz); + TfZ = VSUB(TeD, Teu); + TeE = VADD(Teu, TeD); + } + { + V TfQ, TeS, TfO, Tfx, TeZ, TfR, Tfd, Tfk; + TfQ = VFNMS(LDK(KP923879532), TeR, TeK); + TeS = VFMA(LDK(KP923879532), TeR, TeK); + ST(&(xo[WS(os, 84)]), VFMAI(TcW, TcT), ovs, &(xo[0])); + ST(&(xo[WS(os, 44)]), VFNMSI(TcW, TcT), ovs, &(xo[0])); + ST(&(xo[WS(os, 108)]), VFNMSI(TcY, TcX), ovs, &(xo[0])); + ST(&(xo[WS(os, 20)]), VFMAI(TcY, TcX), ovs, &(xo[0])); + ST(&(xo[WS(os, 116)]), VFMAI(TcQ, TcP), ovs, &(xo[0])); + ST(&(xo[WS(os, 12)]), VFNMSI(TcQ, TcP), ovs, &(xo[0])); + ST(&(xo[WS(os, 52)]), VFMAI(TcO, TcH), ovs, &(xo[0])); + ST(&(xo[WS(os, 76)]), VFNMSI(TcO, TcH), ovs, &(xo[0])); + Tg0 = VFNMS(LDK(KP980785280), TfZ, TfY); + Tga = VFMA(LDK(KP980785280), TfZ, TfY); + TfF = VFNMS(LDK(KP980785280), TeE, Tel); + TeF = VFMA(LDK(KP980785280), TeE, Tel); + TfO = VADD(Tfw, Tfv); + Tfx = VSUB(Tfv, Tfw); + TeZ = VFMA(LDK(KP923879532), TeY, TeV); + TfR = VFNMS(LDK(KP923879532), TeY, TeV); + TfT = VFNMS(LDK(KP923879532), Tfc, Tf5); + Tfd = VFMA(LDK(KP923879532), Tfc, Tf5); + Tfk = VFMA(LDK(KP923879532), Tfj, Tfg); + TfU = VFNMS(LDK(KP923879532), Tfj, Tfg); + TfP = VFMA(LDK(KP980785280), TfO, TfN); + Tg7 = VFNMS(LDK(KP980785280), TfO, TfN); + TfI = VFNMS(LDK(KP980785280), Tfx, Tfu); + Tfy = VFMA(LDK(KP980785280), Tfx, Tfu); + TfA = VFMA(LDK(KP098491403), TeS, TeZ); + Tf0 = VFNMS(LDK(KP098491403), TeZ, TeS); + Tfz = VFMA(LDK(KP098491403), Tfd, Tfk); + Tfl = VFNMS(LDK(KP098491403), Tfk, Tfd); + Tg2 = VFNMS(LDK(KP820678790), TfQ, TfR); + TfS = VFMA(LDK(KP820678790), TfR, TfQ); + } + } + } + } + } + { + V T8x, T8y, T8F, T8w, T8k, T8f, T8n, T80, T9l, T76, T87, T8U, T89, T7e, T7l; + V T8a; + { + V The, Tho, TgT, Tgp, Th7, Th8, Thg, Th6, Th3, Thl, TgW, TgM, TgU, TgP, TgX; + V TgE; + { + V Th1, TgI, TgK, TgJ; + { + V Tgh, Thc, Tgk, TfG, TfB, TfJ, Tfm, Tg1, TfV, Tgn, TfL, TfH; + Th1 = VFMA(LDK(KP923879532), Tgg, Tgf); + Tgh = VFNMS(LDK(KP923879532), Tgg, Tgf); + Thc = VFNMS(LDK(KP923879532), TgH, TgG); + TgI = VFMA(LDK(KP923879532), TgH, TgG); + TgK = VFMA(LDK(KP668178637), Tgi, Tgj); + Tgk = VFNMS(LDK(KP668178637), Tgj, Tgi); + TfG = VADD(TfA, Tfz); + TfB = VSUB(Tfz, TfA); + TfJ = VSUB(Tfl, Tf0); + Tfm = VADD(Tf0, Tfl); + Tg1 = VFNMS(LDK(KP820678790), TfT, TfU); + TfV = VFMA(LDK(KP820678790), TfU, TfT); + Tgn = VFNMS(LDK(KP668178637), Tgm, Tgl); + TgJ = VFMA(LDK(KP668178637), Tgl, Tgm); + TfL = VFMA(LDK(KP995184726), TfG, TfF); + TfH = VFNMS(LDK(KP995184726), TfG, TfF); + { + V TfE, TfC, TfM, TfK; + TfE = VFMA(LDK(KP995184726), TfB, Tfy); + TfC = VFNMS(LDK(KP995184726), TfB, Tfy); + TfM = VFNMS(LDK(KP995184726), TfJ, TfI); + TfK = VFMA(LDK(KP995184726), TfJ, TfI); + { + V TfD, Tfn, Tg8, Tg3; + TfD = VFMA(LDK(KP995184726), Tfm, TeF); + Tfn = VFNMS(LDK(KP995184726), Tfm, TeF); + Tg8 = VADD(Tg2, Tg1); + Tg3 = VSUB(Tg1, Tg2); + { + V Tgb, TfW, Thd, Tgo; + Tgb = VSUB(TfV, TfS); + TfW = VADD(TfS, TfV); + Thd = VSUB(Tgn, Tgk); + Tgo = VADD(Tgk, Tgn); + ST(&(xo[WS(os, 98)]), VFMAI(TfM, TfL), ovs, &(xo[0])); + ST(&(xo[WS(os, 30)]), VFNMSI(TfM, TfL), ovs, &(xo[0])); + ST(&(xo[WS(os, 94)]), VFNMSI(TfK, TfH), ovs, &(xo[0])); + ST(&(xo[WS(os, 34)]), VFMAI(TfK, TfH), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFMAI(TfE, TfD), ovs, &(xo[0])); + ST(&(xo[WS(os, 126)]), VFNMSI(TfE, TfD), ovs, &(xo[0])); + ST(&(xo[WS(os, 66)]), VFMAI(TfC, Tfn), ovs, &(xo[0])); + ST(&(xo[WS(os, 62)]), VFNMSI(TfC, Tfn), ovs, &(xo[0])); + { + V Tgd, Tg9, Tg6, Tg4; + Tgd = VFNMS(LDK(KP773010453), Tg8, Tg7); + Tg9 = VFMA(LDK(KP773010453), Tg8, Tg7); + Tg6 = VFMA(LDK(KP773010453), Tg3, Tg0); + Tg4 = VFNMS(LDK(KP773010453), Tg3, Tg0); + { + V Tge, Tgc, Tg5, TfX; + Tge = VFMA(LDK(KP773010453), Tgb, Tga); + Tgc = VFNMS(LDK(KP773010453), Tgb, Tga); + Tg5 = VFMA(LDK(KP773010453), TfW, TfP); + TfX = VFNMS(LDK(KP773010453), TfW, TfP); + The = VFMA(LDK(KP831469612), Thd, Thc); + Tho = VFNMS(LDK(KP831469612), Thd, Thc); + TgT = VFMA(LDK(KP831469612), Tgo, Tgh); + Tgp = VFNMS(LDK(KP831469612), Tgo, Tgh); + ST(&(xo[WS(os, 110)]), VFNMSI(Tge, Tgd), ovs, &(xo[0])); + ST(&(xo[WS(os, 18)]), VFMAI(Tge, Tgd), ovs, &(xo[0])); + ST(&(xo[WS(os, 82)]), VFMAI(Tgc, Tg9), ovs, &(xo[0])); + ST(&(xo[WS(os, 46)]), VFNMSI(Tgc, Tg9), ovs, &(xo[0])); + ST(&(xo[WS(os, 114)]), VFMAI(Tg6, Tg5), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VFNMSI(Tg6, Tg5), ovs, &(xo[0])); + ST(&(xo[WS(os, 50)]), VFMAI(Tg4, TfX), ovs, &(xo[0])); + ST(&(xo[WS(os, 78)]), VFNMSI(Tg4, TfX), ovs, &(xo[0])); + } + } + } + } + } + } + { + V Th4, Tgs, Tgv, Th5, Tgz, TgC, Th2, TgL; + Th4 = VFMA(LDK(KP923879532), Tgr, Tgq); + Tgs = VFNMS(LDK(KP923879532), Tgr, Tgq); + Tgv = VFMA(LDK(KP923879532), Tgu, Tgt); + Th5 = VFNMS(LDK(KP923879532), Tgu, Tgt); + Th7 = VFMA(LDK(KP923879532), Tgy, Tgx); + Tgz = VFNMS(LDK(KP923879532), Tgy, Tgx); + TgC = VFMA(LDK(KP923879532), TgB, TgA); + Th8 = VFNMS(LDK(KP923879532), TgB, TgA); + Th2 = VADD(TgK, TgJ); + TgL = VSUB(TgJ, TgK); + { + V TgO, Tgw, TgN, TgD; + TgO = VFMA(LDK(KP534511135), Tgs, Tgv); + Tgw = VFNMS(LDK(KP534511135), Tgv, Tgs); + TgN = VFMA(LDK(KP534511135), Tgz, TgC); + TgD = VFNMS(LDK(KP534511135), TgC, Tgz); + Thg = VFNMS(LDK(KP303346683), Th4, Th5); + Th6 = VFMA(LDK(KP303346683), Th5, Th4); + Th3 = VFMA(LDK(KP831469612), Th2, Th1); + Thl = VFNMS(LDK(KP831469612), Th2, Th1); + TgW = VFNMS(LDK(KP831469612), TgL, TgI); + TgM = VFMA(LDK(KP831469612), TgL, TgI); + TgU = VADD(TgO, TgN); + TgP = VSUB(TgN, TgO); + TgX = VSUB(TgD, Tgw); + TgE = VADD(Tgw, TgD); + } + } + } + { + V T8u, T8v, T7R, T8d, T7G, Thm, Thh, Thp, Tha, T7Y, Thr, Thn; + { + V T7y, T7F, TgZ, TgV; + T8u = VFNMS(LDK(KP831469612), T7x, T7q); + T7y = VFMA(LDK(KP831469612), T7x, T7q); + T7F = VFMA(LDK(KP831469612), T7E, T7B); + T8v = VFNMS(LDK(KP831469612), T7E, T7B); + T8x = VFNMS(LDK(KP831469612), T7Q, T7J); + T7R = VFMA(LDK(KP831469612), T7Q, T7J); + TgZ = VFMA(LDK(KP881921264), TgU, TgT); + TgV = VFNMS(LDK(KP881921264), TgU, TgT); + { + V TgS, TgQ, Th0, TgY; + TgS = VFMA(LDK(KP881921264), TgP, TgM); + TgQ = VFNMS(LDK(KP881921264), TgP, TgM); + Th0 = VFNMS(LDK(KP881921264), TgX, TgW); + TgY = VFMA(LDK(KP881921264), TgX, TgW); + { + V TgR, TgF, Thf, Th9; + TgR = VFMA(LDK(KP881921264), TgE, Tgp); + TgF = VFNMS(LDK(KP881921264), TgE, Tgp); + Thf = VFNMS(LDK(KP303346683), Th7, Th8); + Th9 = VFMA(LDK(KP303346683), Th8, Th7); + T8d = VFNMS(LDK(KP148335987), T7y, T7F); + T7G = VFMA(LDK(KP148335987), T7F, T7y); + ST(&(xo[WS(os, 106)]), VFMAI(Th0, TgZ), ovs, &(xo[0])); + ST(&(xo[WS(os, 22)]), VFNMSI(Th0, TgZ), ovs, &(xo[0])); + ST(&(xo[WS(os, 86)]), VFNMSI(TgY, TgV), ovs, &(xo[0])); + ST(&(xo[WS(os, 42)]), VFMAI(TgY, TgV), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VFMAI(TgS, TgR), ovs, &(xo[0])); + ST(&(xo[WS(os, 118)]), VFNMSI(TgS, TgR), ovs, &(xo[0])); + ST(&(xo[WS(os, 74)]), VFMAI(TgQ, TgF), ovs, &(xo[0])); + ST(&(xo[WS(os, 54)]), VFNMSI(TgQ, TgF), ovs, &(xo[0])); + Thm = VADD(Thg, Thf); + Thh = VSUB(Thf, Thg); + Thp = VSUB(Th9, Th6); + Tha = VADD(Th6, Th9); + T7Y = VFMA(LDK(KP831469612), T7X, T7U); + T8y = VFNMS(LDK(KP831469612), T7X, T7U); + } + } + } + Thr = VFNMS(LDK(KP956940335), Thm, Thl); + Thn = VFMA(LDK(KP956940335), Thm, Thl); + { + V Thk, Thi, Ths, Thq; + Thk = VFMA(LDK(KP956940335), Thh, The); + Thi = VFNMS(LDK(KP956940335), Thh, The); + Ths = VFMA(LDK(KP956940335), Thp, Tho); + Thq = VFNMS(LDK(KP956940335), Thp, Tho); + { + V Thj, Thb, T8e, T7Z; + Thj = VFMA(LDK(KP956940335), Tha, Th3); + Thb = VFNMS(LDK(KP956940335), Tha, Th3); + T8e = VFNMS(LDK(KP148335987), T7R, T7Y); + T7Z = VFMA(LDK(KP148335987), T7Y, T7R); + T8F = VFMA(LDK(KP741650546), T8u, T8v); + T8w = VFNMS(LDK(KP741650546), T8v, T8u); + ST(&(xo[WS(os, 102)]), VFNMSI(Ths, Thr), ovs, &(xo[0])); + ST(&(xo[WS(os, 26)]), VFMAI(Ths, Thr), ovs, &(xo[0])); + ST(&(xo[WS(os, 90)]), VFMAI(Thq, Thn), ovs, &(xo[0])); + ST(&(xo[WS(os, 38)]), VFNMSI(Thq, Thn), ovs, &(xo[0])); + ST(&(xo[WS(os, 122)]), VFMAI(Thk, Thj), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VFNMSI(Thk, Thj), ovs, &(xo[0])); + ST(&(xo[WS(os, 58)]), VFMAI(Thi, Thb), ovs, &(xo[0])); + ST(&(xo[WS(os, 70)]), VFNMSI(Thi, Thb), ovs, &(xo[0])); + T8k = VADD(T8d, T8e); + T8f = VSUB(T8d, T8e); + T8n = VSUB(T7Z, T7G); + T80 = VADD(T7G, T7Z); + } + } + T9l = VSUB(T75, T72); + T76 = VADD(T72, T75); + T87 = VSUB(T85, T86); + T8U = VADD(T85, T86); + T89 = VFNMS(LDK(KP303346683), T7a, T7d); + T7e = VFMA(LDK(KP303346683), T7d, T7a); + T7l = VFMA(LDK(KP303346683), T7k, T7h); + T8a = VFNMS(LDK(KP303346683), T7h, T7k); + } + } + { + V T11, T5h, T5a, T55, T5d, T4K, T5C, T5x, T5F, T5q, T4X, T4Z, T1C, T2d, T50; + { + V T5k, T3g, T3t, T5l, T5n, T4v, T4I, T5o, T8G, T8z; + T5k = VFNMS(LDK(KP980785280), T3f, T2G); + T3g = VFMA(LDK(KP980785280), T3f, T2G); + T8G = VFMA(LDK(KP741650546), T8x, T8y); + T8z = VFNMS(LDK(KP741650546), T8y, T8x); + { + V T8r, T77, T8C, T88; + T8r = VFNMS(LDK(KP831469612), T76, T6Z); + T77 = VFMA(LDK(KP831469612), T76, T6Z); + T8C = VFNMS(LDK(KP831469612), T87, T84); + T88 = VFMA(LDK(KP831469612), T87, T84); + { + V T8D, T7m, T8s, T8b; + T8D = VSUB(T7l, T7e); + T7m = VADD(T7e, T7l); + T8s = VADD(T89, T8a); + T8b = VSUB(T89, T8a); + { + V T8M, T8H, T8P, T8A; + T8M = VADD(T8F, T8G); + T8H = VSUB(T8F, T8G); + T8P = VSUB(T8z, T8w); + T8A = VADD(T8w, T8z); + { + V T8E, T8O, T8j, T7n; + T8E = VFNMS(LDK(KP956940335), T8D, T8C); + T8O = VFMA(LDK(KP956940335), T8D, T8C); + T8j = VFNMS(LDK(KP956940335), T7m, T77); + T7n = VFMA(LDK(KP956940335), T7m, T77); + { + V T8t, T8L, T8m, T8c; + T8t = VFNMS(LDK(KP956940335), T8s, T8r); + T8L = VFMA(LDK(KP956940335), T8s, T8r); + T8m = VFNMS(LDK(KP956940335), T8b, T88); + T8c = VFMA(LDK(KP956940335), T8b, T88); + { + V T8K, T8I, T8S, T8Q; + T8K = VFMA(LDK(KP803207531), T8H, T8E); + T8I = VFNMS(LDK(KP803207531), T8H, T8E); + T8S = VFMA(LDK(KP803207531), T8P, T8O); + T8Q = VFNMS(LDK(KP803207531), T8P, T8O); + { + V T8p, T8l, T8h, T81; + T8p = VFNMS(LDK(KP989176509), T8k, T8j); + T8l = VFMA(LDK(KP989176509), T8k, T8j); + T8h = VFMA(LDK(KP989176509), T80, T7n); + T81 = VFNMS(LDK(KP989176509), T80, T7n); + { + V T8J, T8B, T8R, T8N; + T8J = VFMA(LDK(KP803207531), T8A, T8t); + T8B = VFNMS(LDK(KP803207531), T8A, T8t); + T8R = VFMA(LDK(KP803207531), T8M, T8L); + T8N = VFNMS(LDK(KP803207531), T8M, T8L); + { + V T8q, T8o, T8i, T8g; + T8q = VFNMS(LDK(KP989176509), T8n, T8m); + T8o = VFMA(LDK(KP989176509), T8n, T8m); + T8i = VFMA(LDK(KP989176509), T8f, T8c); + T8g = VFNMS(LDK(KP989176509), T8f, T8c); + ST(&(xo[WS(os, 115)]), VFMAI(T8K, T8J), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 13)]), VFNMSI(T8K, T8J), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 51)]), VFMAI(T8I, T8B), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 77)]), VFNMSI(T8I, T8B), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 109)]), VFNMSI(T8S, T8R), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 19)]), VFMAI(T8S, T8R), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 83)]), VFMAI(T8Q, T8N), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 45)]), VFNMSI(T8Q, T8N), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 99)]), VFMAI(T8q, T8p), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 29)]), VFNMSI(T8q, T8p), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 93)]), VFNMSI(T8o, T8l), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 35)]), VFMAI(T8o, T8l), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VFMAI(T8i, T8h), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 125)]), VFNMSI(T8i, T8h), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 67)]), VFMAI(T8g, T81), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 61)]), VFNMSI(T8g, T81), ovs, &(xo[WS(os, 1)])); + T3t = VFMA(LDK(KP980785280), T3s, T3p); + T5l = VFNMS(LDK(KP980785280), T3s, T3p); + } + } + } + } + } + } + } + } + } + T5n = VFNMS(LDK(KP980785280), T4u, T3V); + T4v = VFMA(LDK(KP980785280), T4u, T3V); + T4I = VFMA(LDK(KP980785280), T4H, T4E); + T5o = VFNMS(LDK(KP980785280), T4H, T4E); + { + V T53, T3u, T54, T4J, T5v, T5m, T5w, T5p, T10; + T6b = VSUB(TZ, TI); + T10 = VADD(TI, TZ); + T53 = VFMA(LDK(KP049126849), T3g, T3t); + T3u = VFNMS(LDK(KP049126849), T3t, T3g); + T54 = VFMA(LDK(KP049126849), T4v, T4I); + T4J = VFNMS(LDK(KP049126849), T4I, T4v); + T5v = VFNMS(LDK(KP906347169), T5k, T5l); + T5m = VFMA(LDK(KP906347169), T5l, T5k); + T5w = VFNMS(LDK(KP906347169), T5n, T5o); + T5p = VFMA(LDK(KP906347169), T5o, T5n); + T11 = VFMA(LDK(KP980785280), T10, Tr); + T5h = VFNMS(LDK(KP980785280), T10, Tr); + T5a = VADD(T53, T54); + T55 = VSUB(T53, T54); + T5d = VSUB(T4J, T3u); + T4K = VADD(T3u, T4J); + T5C = VADD(T5v, T5w); + T5x = VSUB(T5v, T5w); + T5F = VSUB(T5p, T5m); + T5q = VADD(T5m, T5p); + T4X = VSUB(T4V, T4W); + T5K = VADD(T4V, T4W); + } + T4Z = VFMA(LDK(KP098491403), T1s, T1B); + T1C = VFNMS(LDK(KP098491403), T1B, T1s); + T2d = VFNMS(LDK(KP098491403), T2c, T23); + T50 = VFMA(LDK(KP098491403), T23, T2c); + } + { + V T9y, T9t, T9B, T9i, T9o, T9n, T9F, T8V, T9Q, T9m, T9R, T92, Ta0, T9V, Ta3; + V T9O; + { + V T9I, T9J, T9L, T9d, T5s, T4Y, T5t, T2e, T5i, T51, T9r, T9a, T9g, T9M, T96; + V T99; + T9I = VFMA(LDK(KP831469612), T95, T94); + T96 = VFNMS(LDK(KP831469612), T95, T94); + T99 = VFNMS(LDK(KP831469612), T98, T97); + T9J = VFMA(LDK(KP831469612), T98, T97); + T9L = VFMA(LDK(KP831469612), T9c, T9b); + T9d = VFNMS(LDK(KP831469612), T9c, T9b); + T5s = VFNMS(LDK(KP980785280), T4X, T4U); + T4Y = VFMA(LDK(KP980785280), T4X, T4U); + T5t = VSUB(T2d, T1C); + T2e = VADD(T1C, T2d); + T5i = VADD(T4Z, T50); + T51 = VSUB(T4Z, T50); + T9r = VFNMS(LDK(KP599376933), T96, T99); + T9a = VFMA(LDK(KP599376933), T99, T96); + T9g = VFNMS(LDK(KP831469612), T9f, T9e); + T9M = VFMA(LDK(KP831469612), T9f, T9e); + { + V T5u, T5E, T8Y, T91; + T5u = VFMA(LDK(KP995184726), T5t, T5s); + T5E = VFNMS(LDK(KP995184726), T5t, T5s); + { + V T59, T2f, T5j, T5B; + T59 = VFNMS(LDK(KP995184726), T2e, T11); + T2f = VFMA(LDK(KP995184726), T2e, T11); + T5j = VFMA(LDK(KP995184726), T5i, T5h); + T5B = VFNMS(LDK(KP995184726), T5i, T5h); + { + V T5c, T52, T9s, T9h; + T5c = VFNMS(LDK(KP995184726), T51, T4Y); + T52 = VFMA(LDK(KP995184726), T51, T4Y); + T9s = VFNMS(LDK(KP599376933), T9d, T9g); + T9h = VFMA(LDK(KP599376933), T9g, T9d); + { + V T5A, T5y, T5I, T5G; + T5A = VFMA(LDK(KP740951125), T5x, T5u); + T5y = VFNMS(LDK(KP740951125), T5x, T5u); + T5I = VFNMS(LDK(KP740951125), T5F, T5E); + T5G = VFMA(LDK(KP740951125), T5F, T5E); + { + V T5f, T5b, T57, T4L; + T5f = VFMA(LDK(KP998795456), T5a, T59); + T5b = VFNMS(LDK(KP998795456), T5a, T59); + T57 = VFMA(LDK(KP998795456), T4K, T2f); + T4L = VFNMS(LDK(KP998795456), T4K, T2f); + { + V T5z, T5r, T5H, T5D; + T5z = VFMA(LDK(KP740951125), T5q, T5j); + T5r = VFNMS(LDK(KP740951125), T5q, T5j); + T5H = VFNMS(LDK(KP740951125), T5C, T5B); + T5D = VFMA(LDK(KP740951125), T5C, T5B); + { + V T5g, T5e, T58, T56; + T5g = VFMA(LDK(KP998795456), T5d, T5c); + T5e = VFNMS(LDK(KP998795456), T5d, T5c); + T58 = VFMA(LDK(KP998795456), T55, T52); + T56 = VFNMS(LDK(KP998795456), T55, T52); + T9y = VADD(T9r, T9s); + T9t = VSUB(T9r, T9s); + T9B = VSUB(T9h, T9a); + T9i = VADD(T9a, T9h); + ST(&(xo[WS(os, 15)]), VFMAI(T5A, T5z), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 113)]), VFNMSI(T5A, T5z), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 79)]), VFMAI(T5y, T5r), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 49)]), VFNMSI(T5y, T5r), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 111)]), VFMAI(T5I, T5H), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 17)]), VFNMSI(T5I, T5H), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 81)]), VFNMSI(T5G, T5D), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 47)]), VFMAI(T5G, T5D), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 97)]), VFNMSI(T5g, T5f), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 31)]), VFMAI(T5g, T5f), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 95)]), VFMAI(T5e, T5b), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 33)]), VFNMSI(T5e, T5b), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 127)]), VFMAI(T58, T57), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFNMSI(T58, T57), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 63)]), VFMAI(T56, T4L), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 65)]), VFNMSI(T56, T4L), ovs, &(xo[WS(os, 1)])); + } + } + } + } + } + } + T9o = VFNMS(LDK(KP534511135), T8W, T8X); + T8Y = VFMA(LDK(KP534511135), T8X, T8W); + T91 = VFMA(LDK(KP534511135), T90, T8Z); + T9n = VFNMS(LDK(KP534511135), T8Z, T90); + { + V T9T, T9K, T9U, T9N; + T9T = VFMA(LDK(KP250486960), T9I, T9J); + T9K = VFNMS(LDK(KP250486960), T9J, T9I); + T9U = VFMA(LDK(KP250486960), T9L, T9M); + T9N = VFNMS(LDK(KP250486960), T9M, T9L); + T9F = VFNMS(LDK(KP831469612), T8U, T8T); + T8V = VFMA(LDK(KP831469612), T8U, T8T); + T9Q = VFNMS(LDK(KP831469612), T9l, T9k); + T9m = VFMA(LDK(KP831469612), T9l, T9k); + T9R = VSUB(T8Y, T91); + T92 = VADD(T8Y, T91); + Ta0 = VADD(T9T, T9U); + T9V = VSUB(T9T, T9U); + Ta3 = VSUB(T9N, T9K); + T9O = VADD(T9K, T9N); + } + } + } + { + V T6y, T6z, T63, T9Y, T9W, Ta6, Ta4, T9D, T9z, T9v, T9j, T6h, T60, T9H, T9Z; + V T9A, T9q, T66, T9X, T9P; + { + V T5W, T9S, Ta2, T9x, T93, T5Z, T9G, T9p; + T6y = VFMA(LDK(KP980785280), T5V, T5U); + T5W = VFNMS(LDK(KP980785280), T5V, T5U); + T9S = VFMA(LDK(KP881921264), T9R, T9Q); + Ta2 = VFNMS(LDK(KP881921264), T9R, T9Q); + T9x = VFNMS(LDK(KP881921264), T92, T8V); + T93 = VFMA(LDK(KP881921264), T92, T8V); + T5Z = VFMA(LDK(KP980785280), T5Y, T5X); + T6z = VFNMS(LDK(KP980785280), T5Y, T5X); + T6B = VFMA(LDK(KP980785280), T62, T61); + T63 = VFNMS(LDK(KP980785280), T62, T61); + T9G = VADD(T9o, T9n); + T9p = VSUB(T9n, T9o); + T9Y = VFMA(LDK(KP970031253), T9V, T9S); + T9W = VFNMS(LDK(KP970031253), T9V, T9S); + Ta6 = VFMA(LDK(KP970031253), Ta3, Ta2); + Ta4 = VFNMS(LDK(KP970031253), Ta3, Ta2); + T9D = VFNMS(LDK(KP857728610), T9y, T9x); + T9z = VFMA(LDK(KP857728610), T9y, T9x); + T9v = VFMA(LDK(KP857728610), T9i, T93); + T9j = VFNMS(LDK(KP857728610), T9i, T93); + T6h = VFMA(LDK(KP472964775), T5W, T5Z); + T60 = VFNMS(LDK(KP472964775), T5Z, T5W); + T9H = VFMA(LDK(KP881921264), T9G, T9F); + T9Z = VFNMS(LDK(KP881921264), T9G, T9F); + T9A = VFNMS(LDK(KP881921264), T9p, T9m); + T9q = VFMA(LDK(KP881921264), T9p, T9m); + T66 = VFMA(LDK(KP980785280), T65, T64); + T6C = VFNMS(LDK(KP980785280), T65, T64); + } + T9X = VFMA(LDK(KP970031253), T9O, T9H); + T9P = VFNMS(LDK(KP970031253), T9O, T9H); + { + V Ta5, Ta1, T9E, T9C; + Ta5 = VFMA(LDK(KP970031253), Ta0, T9Z); + Ta1 = VFNMS(LDK(KP970031253), Ta0, T9Z); + T9E = VFNMS(LDK(KP857728610), T9B, T9A); + T9C = VFMA(LDK(KP857728610), T9B, T9A); + { + V T9w, T9u, T6i, T67; + T9w = VFMA(LDK(KP857728610), T9t, T9q); + T9u = VFNMS(LDK(KP857728610), T9t, T9q); + T6i = VFMA(LDK(KP472964775), T63, T66); + T67 = VFNMS(LDK(KP472964775), T66, T63); + T6J = VFNMS(LDK(KP357805721), T6y, T6z); + T6A = VFMA(LDK(KP357805721), T6z, T6y); + ST(&(xo[WS(os, 123)]), VFMAI(T9Y, T9X), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VFNMSI(T9Y, T9X), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 59)]), VFMAI(T9W, T9P), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 69)]), VFNMSI(T9W, T9P), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 101)]), VFNMSI(Ta6, Ta5), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 27)]), VFMAI(Ta6, Ta5), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 91)]), VFMAI(Ta4, Ta1), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 37)]), VFNMSI(Ta4, Ta1), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 107)]), VFMAI(T9E, T9D), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 21)]), VFNMSI(T9E, T9D), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 85)]), VFNMSI(T9C, T9z), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 43)]), VFMAI(T9C, T9z), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VFMAI(T9w, T9v), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 117)]), VFNMSI(T9w, T9v), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 75)]), VFMAI(T9u, T9j), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 53)]), VFNMSI(T9u, T9j), ovs, &(xo[WS(os, 1)])); + T6o = VADD(T6h, T6i); + T6j = VSUB(T6h, T6i); + T6r = VSUB(T67, T60); + T68 = VADD(T60, T67); + } + } + T6e = VFMA(LDK(KP820678790), T5M, T5N); + T5O = VFNMS(LDK(KP820678790), T5N, T5M); + T5R = VFNMS(LDK(KP820678790), T5Q, T5P); + T6d = VFMA(LDK(KP820678790), T5P, T5Q); + } + } + } + } + } + } + T6D = VFMA(LDK(KP357805721), T6C, T6B); + T6K = VFNMS(LDK(KP357805721), T6B, T6C); + { + V T5L, T6v, T6c, T6G; + T5L = VFNMS(LDK(KP980785280), T5K, T5J); + T6v = VFMA(LDK(KP980785280), T5K, T5J); + T6c = VFNMS(LDK(KP980785280), T6b, T6a); + T6G = VFMA(LDK(KP980785280), T6b, T6a); + { + V T5S, T6H, T6f, T6w; + T5S = VADD(T5O, T5R); + T6H = VSUB(T5O, T5R); + T6f = VSUB(T6d, T6e); + T6w = VADD(T6e, T6d); + { + V T6L, T6Q, T6E, T6T; + T6L = VSUB(T6J, T6K); + T6Q = VADD(T6J, T6K); + T6E = VADD(T6A, T6D); + T6T = VSUB(T6D, T6A); + { + V T6S, T6I, T5T, T6n; + T6S = VFNMS(LDK(KP773010453), T6H, T6G); + T6I = VFMA(LDK(KP773010453), T6H, T6G); + T5T = VFNMS(LDK(KP773010453), T5S, T5L); + T6n = VFMA(LDK(KP773010453), T5S, T5L); + { + V T6P, T6x, T6g, T6q; + T6P = VFNMS(LDK(KP773010453), T6w, T6v); + T6x = VFMA(LDK(KP773010453), T6w, T6v); + T6g = VFNMS(LDK(KP773010453), T6f, T6c); + T6q = VFMA(LDK(KP773010453), T6f, T6c); + { + V T6M, T6O, T6U, T6W; + T6M = VFNMS(LDK(KP941544065), T6L, T6I); + T6O = VFMA(LDK(KP941544065), T6L, T6I); + T6U = VFMA(LDK(KP941544065), T6T, T6S); + T6W = VFNMS(LDK(KP941544065), T6T, T6S); + { + V T6p, T6t, T69, T6l; + T6p = VFNMS(LDK(KP903989293), T6o, T6n); + T6t = VFMA(LDK(KP903989293), T6o, T6n); + T69 = VFNMS(LDK(KP903989293), T68, T5T); + T6l = VFMA(LDK(KP903989293), T68, T5T); + { + V T6F, T6N, T6R, T6V; + T6F = VFNMS(LDK(KP941544065), T6E, T6x); + T6N = VFMA(LDK(KP941544065), T6E, T6x); + T6R = VFMA(LDK(KP941544065), T6Q, T6P); + T6V = VFNMS(LDK(KP941544065), T6Q, T6P); + { + V T6s, T6u, T6k, T6m; + T6s = VFNMS(LDK(KP903989293), T6r, T6q); + T6u = VFMA(LDK(KP903989293), T6r, T6q); + T6k = VFNMS(LDK(KP903989293), T6j, T6g); + T6m = VFMA(LDK(KP903989293), T6j, T6g); + ST(&(xo[WS(os, 7)]), VFMAI(T6O, T6N), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 121)]), VFNMSI(T6O, T6N), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 71)]), VFMAI(T6M, T6F), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 57)]), VFNMSI(T6M, T6F), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 103)]), VFMAI(T6W, T6V), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 25)]), VFNMSI(T6W, T6V), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 89)]), VFNMSI(T6U, T6R), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 39)]), VFMAI(T6U, T6R), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 105)]), VFNMSI(T6u, T6t), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 23)]), VFMAI(T6u, T6t), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 87)]), VFMAI(T6s, T6p), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 41)]), VFNMSI(T6s, T6p), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 119)]), VFMAI(T6m, T6l), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VFNMSI(T6m, T6l), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 55)]), VFMAI(T6k, T69), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 73)]), VFNMSI(T6k, T69), ovs, &(xo[WS(os, 1)])); + } + } + } + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 128, XSIMD_STRING("n1fv_128"), {440, 0, 642, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_128) (planner *p) { + X(kdft_register) (p, n1fv_128, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 128 -name n1fv_128 -include n1f.h */ + +/* + * This function contains 1082 FP additions, 330 FP multiplications, + * (or, 938 additions, 186 multiplications, 144 fused multiply/add), + * 194 stack variables, 31 constants, and 256 memory accesses + */ +#include "n1f.h" + +static void n1fv_128(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP941544065, +0.941544065183020778412509402599502357185589796); + DVK(KP336889853, +0.336889853392220050689253212619147570477766780); + DVK(KP903989293, +0.903989293123443331586200297230537048710132025); + DVK(KP427555093, +0.427555093430282094320966856888798534304578629); + DVK(KP970031253, +0.970031253194543992603984207286100251456865962); + DVK(KP242980179, +0.242980179903263889948274162077471118320990783); + DVK(KP857728610, +0.857728610000272069902269984284770137042490799); + DVK(KP514102744, +0.514102744193221726593693838968815772608049120); + DVK(KP671558954, +0.671558954847018400625376850427421803228750632); + DVK(KP740951125, +0.740951125354959091175616897495162729728955309); + DVK(KP049067674, +0.049067674327418014254954976942682658314745363); + DVK(KP998795456, +0.998795456205172392714771604759100694443203615); + DVK(KP595699304, +0.595699304492433343467036528829969889511926338); + DVK(KP803207531, +0.803207531480644909806676512963141923879569427); + DVK(KP146730474, +0.146730474455361751658850129646717819706215317); + DVK(KP989176509, +0.989176509964780973451673738016243063983689533); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP290284677, +0.290284677254462367636192375817395274691476278); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP471396736, +0.471396736825997648556387625905254377657460319); + DVK(KP634393284, +0.634393284163645498215171613225493370675687095); + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP098017140, +0.098017140329560601994195563888641845861136673); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(256, is), MAKE_VOLATILE_STRIDE(256, os)) { + V Tr, T5J, Ted, Tgf, Tfq, TgH, T4U, T6b, T6Z, T8T, Tad, TcZ, Tcc, Td0, T84; + V T9l, Tb6, Tbt, T2G, T5X, TeV, Tgr, T3p, T5V, T7B, T95, TeK, Tgt, T7q, T97; + V Td8, TdK, TbD, Tc0, T3V, T61, Tfg, TgB, T4E, T65, T7U, T9f, Tf5, Tgx, T7J; + V T9b, Tdf, TdN, Td2, Td3, TI, T4V, Tft, Tgg, TZ, T4W, T75, T86, Tek, TgG; + V T72, T85, Tas, Tcd, Tdp, Tdq, TdG, Teq, Tgm, Tet, Tgl, T1s, T5P, T1B, T5Q; + V T7d, T8Z, TaI, Tcf, T7a, T90, Tdm, Tdn, TdH, Tez, Tgi, TeC, Tgj, T23, T5N; + V T2c, T5M, T7k, T8X, TaX, Tcg, T7h, T8W, Tbl, Tbu, Tdb, TdL, TeY, Tgu, TeR; + V Tgq, T7x, T98, T7E, T94, T3f, T5Y, T3s, T5U, TbS, Tc1, Tdi, TdO, Tfj, Tgy; + V Tfc, TgA, T7Q, T9e, T7X, T9c, T4u, T64, T4H, T62; + { + V T3, Ta7, T4P, Ta8, Ta, Tab, T4M, Taa, Tc9, Tca, Ti, Tea, T4S, Tc6, Tc7; + V Tp, Teb, T4R; + { + V T1, T2, T4N, T4O; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 64)]), ivs, &(xi[0])); + T3 = VSUB(T1, T2); + Ta7 = VADD(T1, T2); + T4N = LD(&(xi[WS(is, 32)]), ivs, &(xi[0])); + T4O = LD(&(xi[WS(is, 96)]), ivs, &(xi[0])); + T4P = VSUB(T4N, T4O); + Ta8 = VADD(T4N, T4O); + } + { + V T4, T5, T6, T7, T8, T9; + T4 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 80)]), ivs, &(xi[0])); + T6 = VSUB(T4, T5); + T7 = LD(&(xi[WS(is, 112)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 48)]), ivs, &(xi[0])); + T9 = VSUB(T7, T8); + Ta = VMUL(LDK(KP707106781), VADD(T6, T9)); + Tab = VADD(T7, T8); + T4M = VMUL(LDK(KP707106781), VSUB(T9, T6)); + Taa = VADD(T4, T5); + } + { + V Te, Th, Tl, To; + { + V Tc, Td, Tf, Tg; + Tc = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 72)]), ivs, &(xi[0])); + Te = VSUB(Tc, Td); + Tc9 = VADD(Tc, Td); + Tf = LD(&(xi[WS(is, 40)]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 104)]), ivs, &(xi[0])); + Th = VSUB(Tf, Tg); + Tca = VADD(Tf, Tg); + } + Ti = VFNMS(LDK(KP382683432), Th, VMUL(LDK(KP923879532), Te)); + Tea = VSUB(Tc9, Tca); + T4S = VFMA(LDK(KP382683432), Te, VMUL(LDK(KP923879532), Th)); + { + V Tj, Tk, Tm, Tn; + Tj = LD(&(xi[WS(is, 120)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 56)]), ivs, &(xi[0])); + Tl = VSUB(Tj, Tk); + Tc6 = VADD(Tj, Tk); + Tm = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + Tn = LD(&(xi[WS(is, 88)]), ivs, &(xi[0])); + To = VSUB(Tm, Tn); + Tc7 = VADD(Tm, Tn); + } + Tp = VFMA(LDK(KP923879532), Tl, VMUL(LDK(KP382683432), To)); + Teb = VSUB(Tc6, Tc7); + T4R = VFNMS(LDK(KP923879532), To, VMUL(LDK(KP382683432), Tl)); + } + { + V Tb, Tq, Te9, Tec; + Tb = VADD(T3, Ta); + Tq = VADD(Ti, Tp); + Tr = VADD(Tb, Tq); + T5J = VSUB(Tb, Tq); + Te9 = VSUB(Ta7, Ta8); + Tec = VMUL(LDK(KP707106781), VADD(Tea, Teb)); + Ted = VADD(Te9, Tec); + Tgf = VSUB(Te9, Tec); + } + { + V Tfo, Tfp, T4Q, T4T; + Tfo = VSUB(Tab, Taa); + Tfp = VMUL(LDK(KP707106781), VSUB(Teb, Tea)); + Tfq = VADD(Tfo, Tfp); + TgH = VSUB(Tfp, Tfo); + T4Q = VSUB(T4M, T4P); + T4T = VSUB(T4R, T4S); + T4U = VADD(T4Q, T4T); + T6b = VSUB(T4T, T4Q); + } + { + V T6X, T6Y, Ta9, Tac; + T6X = VSUB(T3, Ta); + T6Y = VADD(T4S, T4R); + T6Z = VADD(T6X, T6Y); + T8T = VSUB(T6X, T6Y); + Ta9 = VADD(Ta7, Ta8); + Tac = VADD(Taa, Tab); + Tad = VSUB(Ta9, Tac); + TcZ = VADD(Ta9, Tac); + } + { + V Tc8, Tcb, T82, T83; + Tc8 = VADD(Tc6, Tc7); + Tcb = VADD(Tc9, Tca); + Tcc = VSUB(Tc8, Tcb); + Td0 = VADD(Tcb, Tc8); + T82 = VADD(T4P, T4M); + T83 = VSUB(Tp, Ti); + T84 = VADD(T82, T83); + T9l = VSUB(T83, T82); + } + } + { + V Tb0, Tb1, T2i, Tb2, T3k, Tb3, Tb4, T2p, Tb5, T3h, T2x, TeH, T3n, Tbs, T2E; + V TeI, T3m, Tbp, T2l, T2o, TeG, TeJ; + { + V T2g, T2h, T3i, T3j; + T2g = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T2h = LD(&(xi[WS(is, 65)]), ivs, &(xi[WS(is, 1)])); + Tb0 = VADD(T2g, T2h); + T3i = LD(&(xi[WS(is, 33)]), ivs, &(xi[WS(is, 1)])); + T3j = LD(&(xi[WS(is, 97)]), ivs, &(xi[WS(is, 1)])); + Tb1 = VADD(T3i, T3j); + T2i = VSUB(T2g, T2h); + Tb2 = VADD(Tb0, Tb1); + T3k = VSUB(T3i, T3j); + } + { + V T2j, T2k, T2m, T2n; + T2j = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + T2k = LD(&(xi[WS(is, 81)]), ivs, &(xi[WS(is, 1)])); + T2l = VSUB(T2j, T2k); + Tb3 = VADD(T2j, T2k); + T2m = LD(&(xi[WS(is, 113)]), ivs, &(xi[WS(is, 1)])); + T2n = LD(&(xi[WS(is, 49)]), ivs, &(xi[WS(is, 1)])); + T2o = VSUB(T2m, T2n); + Tb4 = VADD(T2m, T2n); + } + T2p = VMUL(LDK(KP707106781), VADD(T2l, T2o)); + Tb5 = VADD(Tb3, Tb4); + T3h = VMUL(LDK(KP707106781), VSUB(T2o, T2l)); + { + V T2t, Tbq, T2w, Tbr; + { + V T2r, T2s, T2u, T2v; + T2r = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T2s = LD(&(xi[WS(is, 73)]), ivs, &(xi[WS(is, 1)])); + T2t = VSUB(T2r, T2s); + Tbq = VADD(T2r, T2s); + T2u = LD(&(xi[WS(is, 41)]), ivs, &(xi[WS(is, 1)])); + T2v = LD(&(xi[WS(is, 105)]), ivs, &(xi[WS(is, 1)])); + T2w = VSUB(T2u, T2v); + Tbr = VADD(T2u, T2v); + } + T2x = VFNMS(LDK(KP382683432), T2w, VMUL(LDK(KP923879532), T2t)); + TeH = VSUB(Tbq, Tbr); + T3n = VFMA(LDK(KP382683432), T2t, VMUL(LDK(KP923879532), T2w)); + Tbs = VADD(Tbq, Tbr); + } + { + V T2A, Tbn, T2D, Tbo; + { + V T2y, T2z, T2B, T2C; + T2y = LD(&(xi[WS(is, 121)]), ivs, &(xi[WS(is, 1)])); + T2z = LD(&(xi[WS(is, 57)]), ivs, &(xi[WS(is, 1)])); + T2A = VSUB(T2y, T2z); + Tbn = VADD(T2y, T2z); + T2B = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + T2C = LD(&(xi[WS(is, 89)]), ivs, &(xi[WS(is, 1)])); + T2D = VSUB(T2B, T2C); + Tbo = VADD(T2B, T2C); + } + T2E = VFMA(LDK(KP923879532), T2A, VMUL(LDK(KP382683432), T2D)); + TeI = VSUB(Tbn, Tbo); + T3m = VFNMS(LDK(KP923879532), T2D, VMUL(LDK(KP382683432), T2A)); + Tbp = VADD(Tbn, Tbo); + } + Tb6 = VSUB(Tb2, Tb5); + Tbt = VSUB(Tbp, Tbs); + { + V T2q, T2F, TeT, TeU; + T2q = VADD(T2i, T2p); + T2F = VADD(T2x, T2E); + T2G = VADD(T2q, T2F); + T5X = VSUB(T2q, T2F); + TeT = VSUB(Tb4, Tb3); + TeU = VMUL(LDK(KP707106781), VSUB(TeI, TeH)); + TeV = VADD(TeT, TeU); + Tgr = VSUB(TeU, TeT); + } + { + V T3l, T3o, T7z, T7A; + T3l = VSUB(T3h, T3k); + T3o = VSUB(T3m, T3n); + T3p = VADD(T3l, T3o); + T5V = VSUB(T3o, T3l); + T7z = VADD(T3k, T3h); + T7A = VSUB(T2E, T2x); + T7B = VADD(T7z, T7A); + T95 = VSUB(T7A, T7z); + } + TeG = VSUB(Tb0, Tb1); + TeJ = VMUL(LDK(KP707106781), VADD(TeH, TeI)); + TeK = VADD(TeG, TeJ); + Tgt = VSUB(TeG, TeJ); + { + V T7o, T7p, Td6, Td7; + T7o = VSUB(T2i, T2p); + T7p = VADD(T3n, T3m); + T7q = VADD(T7o, T7p); + T97 = VSUB(T7o, T7p); + Td6 = VADD(Tb2, Tb5); + Td7 = VADD(Tbs, Tbp); + Td8 = VADD(Td6, Td7); + TdK = VSUB(Td6, Td7); + } + } + { + V Tbx, Tby, T3x, Tbz, T4z, TbA, TbB, T3E, TbC, T4w, T3M, Tf2, T4C, TbZ, T3T; + V Tf3, T4B, TbW, T3A, T3D, Tf1, Tf4; + { + V T3v, T3w, T4x, T4y; + T3v = LD(&(xi[WS(is, 127)]), ivs, &(xi[WS(is, 1)])); + T3w = LD(&(xi[WS(is, 63)]), ivs, &(xi[WS(is, 1)])); + Tbx = VADD(T3v, T3w); + T4x = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + T4y = LD(&(xi[WS(is, 95)]), ivs, &(xi[WS(is, 1)])); + Tby = VADD(T4x, T4y); + T3x = VSUB(T3v, T3w); + Tbz = VADD(Tbx, Tby); + T4z = VSUB(T4x, T4y); + } + { + V T3y, T3z, T3B, T3C; + T3y = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T3z = LD(&(xi[WS(is, 79)]), ivs, &(xi[WS(is, 1)])); + T3A = VSUB(T3y, T3z); + TbA = VADD(T3y, T3z); + T3B = LD(&(xi[WS(is, 111)]), ivs, &(xi[WS(is, 1)])); + T3C = LD(&(xi[WS(is, 47)]), ivs, &(xi[WS(is, 1)])); + T3D = VSUB(T3B, T3C); + TbB = VADD(T3B, T3C); + } + T3E = VMUL(LDK(KP707106781), VADD(T3A, T3D)); + TbC = VADD(TbA, TbB); + T4w = VMUL(LDK(KP707106781), VSUB(T3D, T3A)); + { + V T3I, TbX, T3L, TbY; + { + V T3G, T3H, T3J, T3K; + T3G = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T3H = LD(&(xi[WS(is, 71)]), ivs, &(xi[WS(is, 1)])); + T3I = VSUB(T3G, T3H); + TbX = VADD(T3G, T3H); + T3J = LD(&(xi[WS(is, 39)]), ivs, &(xi[WS(is, 1)])); + T3K = LD(&(xi[WS(is, 103)]), ivs, &(xi[WS(is, 1)])); + T3L = VSUB(T3J, T3K); + TbY = VADD(T3J, T3K); + } + T3M = VFNMS(LDK(KP382683432), T3L, VMUL(LDK(KP923879532), T3I)); + Tf2 = VSUB(TbX, TbY); + T4C = VFMA(LDK(KP382683432), T3I, VMUL(LDK(KP923879532), T3L)); + TbZ = VADD(TbX, TbY); + } + { + V T3P, TbU, T3S, TbV; + { + V T3N, T3O, T3Q, T3R; + T3N = LD(&(xi[WS(is, 119)]), ivs, &(xi[WS(is, 1)])); + T3O = LD(&(xi[WS(is, 55)]), ivs, &(xi[WS(is, 1)])); + T3P = VSUB(T3N, T3O); + TbU = VADD(T3N, T3O); + T3Q = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + T3R = LD(&(xi[WS(is, 87)]), ivs, &(xi[WS(is, 1)])); + T3S = VSUB(T3Q, T3R); + TbV = VADD(T3Q, T3R); + } + T3T = VFMA(LDK(KP923879532), T3P, VMUL(LDK(KP382683432), T3S)); + Tf3 = VSUB(TbU, TbV); + T4B = VFNMS(LDK(KP923879532), T3S, VMUL(LDK(KP382683432), T3P)); + TbW = VADD(TbU, TbV); + } + TbD = VSUB(Tbz, TbC); + Tc0 = VSUB(TbW, TbZ); + { + V T3F, T3U, Tfe, Tff; + T3F = VADD(T3x, T3E); + T3U = VADD(T3M, T3T); + T3V = VADD(T3F, T3U); + T61 = VSUB(T3F, T3U); + Tfe = VSUB(TbB, TbA); + Tff = VMUL(LDK(KP707106781), VSUB(Tf3, Tf2)); + Tfg = VADD(Tfe, Tff); + TgB = VSUB(Tff, Tfe); + } + { + V T4A, T4D, T7S, T7T; + T4A = VSUB(T4w, T4z); + T4D = VSUB(T4B, T4C); + T4E = VADD(T4A, T4D); + T65 = VSUB(T4D, T4A); + T7S = VADD(T4z, T4w); + T7T = VSUB(T3T, T3M); + T7U = VADD(T7S, T7T); + T9f = VSUB(T7T, T7S); + } + Tf1 = VSUB(Tbx, Tby); + Tf4 = VMUL(LDK(KP707106781), VADD(Tf2, Tf3)); + Tf5 = VADD(Tf1, Tf4); + Tgx = VSUB(Tf1, Tf4); + { + V T7H, T7I, Tdd, Tde; + T7H = VSUB(T3x, T3E); + T7I = VADD(T4C, T4B); + T7J = VADD(T7H, T7I); + T9b = VSUB(T7H, T7I); + Tdd = VADD(Tbz, TbC); + Tde = VADD(TbZ, TbW); + Tdf = VADD(Tdd, Tde); + TdN = VSUB(Tdd, Tde); + } + } + { + V Tu, Tee, TG, Tag, TL, Teh, TX, Tan, TB, Tef, TD, Taj, TS, Tei, TU; + V Taq, Teg, Tej; + { + V Ts, Tt, Tae, TE, TF, Taf; + Ts = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tt = LD(&(xi[WS(is, 68)]), ivs, &(xi[0])); + Tae = VADD(Ts, Tt); + TE = LD(&(xi[WS(is, 36)]), ivs, &(xi[0])); + TF = LD(&(xi[WS(is, 100)]), ivs, &(xi[0])); + Taf = VADD(TE, TF); + Tu = VSUB(Ts, Tt); + Tee = VSUB(Tae, Taf); + TG = VSUB(TE, TF); + Tag = VADD(Tae, Taf); + } + { + V TJ, TK, Tal, TV, TW, Tam; + TJ = LD(&(xi[WS(is, 124)]), ivs, &(xi[0])); + TK = LD(&(xi[WS(is, 60)]), ivs, &(xi[0])); + Tal = VADD(TJ, TK); + TV = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + TW = LD(&(xi[WS(is, 92)]), ivs, &(xi[0])); + Tam = VADD(TV, TW); + TL = VSUB(TJ, TK); + Teh = VSUB(Tal, Tam); + TX = VSUB(TV, TW); + Tan = VADD(Tal, Tam); + } + { + V Tx, Tah, TA, Tai; + { + V Tv, Tw, Ty, Tz; + Tv = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + Tw = LD(&(xi[WS(is, 84)]), ivs, &(xi[0])); + Tx = VSUB(Tv, Tw); + Tah = VADD(Tv, Tw); + Ty = LD(&(xi[WS(is, 116)]), ivs, &(xi[0])); + Tz = LD(&(xi[WS(is, 52)]), ivs, &(xi[0])); + TA = VSUB(Ty, Tz); + Tai = VADD(Ty, Tz); + } + TB = VMUL(LDK(KP707106781), VADD(Tx, TA)); + Tef = VSUB(Tai, Tah); + TD = VMUL(LDK(KP707106781), VSUB(TA, Tx)); + Taj = VADD(Tah, Tai); + } + { + V TO, Tao, TR, Tap; + { + V TM, TN, TP, TQ; + TM = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + TN = LD(&(xi[WS(is, 76)]), ivs, &(xi[0])); + TO = VSUB(TM, TN); + Tao = VADD(TM, TN); + TP = LD(&(xi[WS(is, 108)]), ivs, &(xi[0])); + TQ = LD(&(xi[WS(is, 44)]), ivs, &(xi[0])); + TR = VSUB(TP, TQ); + Tap = VADD(TP, TQ); + } + TS = VMUL(LDK(KP707106781), VADD(TO, TR)); + Tei = VSUB(Tap, Tao); + TU = VMUL(LDK(KP707106781), VSUB(TR, TO)); + Taq = VADD(Tao, Tap); + } + Td2 = VADD(Tag, Taj); + Td3 = VADD(Tan, Taq); + { + V TC, TH, Tfr, Tfs; + TC = VADD(Tu, TB); + TH = VSUB(TD, TG); + TI = VFMA(LDK(KP980785280), TC, VMUL(LDK(KP195090322), TH)); + T4V = VFNMS(LDK(KP195090322), TC, VMUL(LDK(KP980785280), TH)); + Tfr = VFNMS(LDK(KP382683432), Tee, VMUL(LDK(KP923879532), Tef)); + Tfs = VFMA(LDK(KP382683432), Teh, VMUL(LDK(KP923879532), Tei)); + Tft = VADD(Tfr, Tfs); + Tgg = VSUB(Tfs, Tfr); + } + { + V TT, TY, T73, T74; + TT = VADD(TL, TS); + TY = VSUB(TU, TX); + TZ = VFNMS(LDK(KP195090322), TY, VMUL(LDK(KP980785280), TT)); + T4W = VFMA(LDK(KP195090322), TT, VMUL(LDK(KP980785280), TY)); + T73 = VSUB(TL, TS); + T74 = VADD(TX, TU); + T75 = VFNMS(LDK(KP555570233), T74, VMUL(LDK(KP831469612), T73)); + T86 = VFMA(LDK(KP555570233), T73, VMUL(LDK(KP831469612), T74)); + } + Teg = VFMA(LDK(KP923879532), Tee, VMUL(LDK(KP382683432), Tef)); + Tej = VFNMS(LDK(KP382683432), Tei, VMUL(LDK(KP923879532), Teh)); + Tek = VADD(Teg, Tej); + TgG = VSUB(Tej, Teg); + { + V T70, T71, Tak, Tar; + T70 = VSUB(Tu, TB); + T71 = VADD(TG, TD); + T72 = VFMA(LDK(KP831469612), T70, VMUL(LDK(KP555570233), T71)); + T85 = VFNMS(LDK(KP555570233), T70, VMUL(LDK(KP831469612), T71)); + Tak = VSUB(Tag, Taj); + Tar = VSUB(Tan, Taq); + Tas = VMUL(LDK(KP707106781), VADD(Tak, Tar)); + Tcd = VMUL(LDK(KP707106781), VSUB(Tar, Tak)); + } + } + { + V Tav, Tau, T1b, Taw, T1v, Tay, Tax, T18, Taz, T1w, T1j, Teo, T1z, TaD, T1q; + V Ten, T1y, TaG, T14, T17, Tem, Tep; + { + V T19, T1a, T1t, T1u; + T19 = LD(&(xi[WS(is, 34)]), ivs, &(xi[0])); + T1a = LD(&(xi[WS(is, 98)]), ivs, &(xi[0])); + Tav = VADD(T19, T1a); + T1t = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T1u = LD(&(xi[WS(is, 66)]), ivs, &(xi[0])); + Tau = VADD(T1t, T1u); + T1b = VSUB(T19, T1a); + Taw = VADD(Tau, Tav); + T1v = VSUB(T1t, T1u); + } + { + V T12, T13, T15, T16; + T12 = LD(&(xi[WS(is, 114)]), ivs, &(xi[0])); + T13 = LD(&(xi[WS(is, 50)]), ivs, &(xi[0])); + T14 = VSUB(T12, T13); + Tay = VADD(T12, T13); + T15 = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + T16 = LD(&(xi[WS(is, 82)]), ivs, &(xi[0])); + T17 = VSUB(T15, T16); + Tax = VADD(T15, T16); + } + T18 = VMUL(LDK(KP707106781), VSUB(T14, T17)); + Taz = VADD(Tax, Tay); + T1w = VMUL(LDK(KP707106781), VADD(T17, T14)); + { + V T1f, TaB, T1i, TaC; + { + V T1d, T1e, T1g, T1h; + T1d = LD(&(xi[WS(is, 122)]), ivs, &(xi[0])); + T1e = LD(&(xi[WS(is, 58)]), ivs, &(xi[0])); + T1f = VSUB(T1d, T1e); + TaB = VADD(T1d, T1e); + T1g = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + T1h = LD(&(xi[WS(is, 90)]), ivs, &(xi[0])); + T1i = VSUB(T1g, T1h); + TaC = VADD(T1g, T1h); + } + T1j = VFNMS(LDK(KP923879532), T1i, VMUL(LDK(KP382683432), T1f)); + Teo = VSUB(TaB, TaC); + T1z = VFMA(LDK(KP923879532), T1f, VMUL(LDK(KP382683432), T1i)); + TaD = VADD(TaB, TaC); + } + { + V T1m, TaE, T1p, TaF; + { + V T1k, T1l, T1n, T1o; + T1k = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T1l = LD(&(xi[WS(is, 74)]), ivs, &(xi[0])); + T1m = VSUB(T1k, T1l); + TaE = VADD(T1k, T1l); + T1n = LD(&(xi[WS(is, 42)]), ivs, &(xi[0])); + T1o = LD(&(xi[WS(is, 106)]), ivs, &(xi[0])); + T1p = VSUB(T1n, T1o); + TaF = VADD(T1n, T1o); + } + T1q = VFMA(LDK(KP382683432), T1m, VMUL(LDK(KP923879532), T1p)); + Ten = VSUB(TaE, TaF); + T1y = VFNMS(LDK(KP382683432), T1p, VMUL(LDK(KP923879532), T1m)); + TaG = VADD(TaE, TaF); + } + Tdp = VADD(Taw, Taz); + Tdq = VADD(TaG, TaD); + TdG = VSUB(Tdp, Tdq); + Tem = VSUB(Tau, Tav); + Tep = VMUL(LDK(KP707106781), VADD(Ten, Teo)); + Teq = VADD(Tem, Tep); + Tgm = VSUB(Tem, Tep); + { + V Ter, Tes, T1c, T1r; + Ter = VSUB(Tay, Tax); + Tes = VMUL(LDK(KP707106781), VSUB(Teo, Ten)); + Tet = VADD(Ter, Tes); + Tgl = VSUB(Tes, Ter); + T1c = VSUB(T18, T1b); + T1r = VSUB(T1j, T1q); + T1s = VADD(T1c, T1r); + T5P = VSUB(T1r, T1c); + } + { + V T1x, T1A, T7b, T7c; + T1x = VADD(T1v, T1w); + T1A = VADD(T1y, T1z); + T1B = VADD(T1x, T1A); + T5Q = VSUB(T1x, T1A); + T7b = VADD(T1b, T18); + T7c = VSUB(T1z, T1y); + T7d = VADD(T7b, T7c); + T8Z = VSUB(T7c, T7b); + } + { + V TaA, TaH, T78, T79; + TaA = VSUB(Taw, Taz); + TaH = VSUB(TaD, TaG); + TaI = VFMA(LDK(KP923879532), TaA, VMUL(LDK(KP382683432), TaH)); + Tcf = VFNMS(LDK(KP382683432), TaA, VMUL(LDK(KP923879532), TaH)); + T78 = VSUB(T1v, T1w); + T79 = VADD(T1q, T1j); + T7a = VADD(T78, T79); + T90 = VSUB(T78, T79); + } + } + { + V TaJ, TaK, T1F, TaL, T27, TaM, TaN, T1M, TaO, T24, T1U, Tew, T2a, TaV, T21; + V Tex, T29, TaS, T1I, T1L, Tev, Tey; + { + V T1D, T1E, T25, T26; + T1D = LD(&(xi[WS(is, 126)]), ivs, &(xi[0])); + T1E = LD(&(xi[WS(is, 62)]), ivs, &(xi[0])); + TaJ = VADD(T1D, T1E); + T25 = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + T26 = LD(&(xi[WS(is, 94)]), ivs, &(xi[0])); + TaK = VADD(T25, T26); + T1F = VSUB(T1D, T1E); + TaL = VADD(TaJ, TaK); + T27 = VSUB(T25, T26); + } + { + V T1G, T1H, T1J, T1K; + T1G = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + T1H = LD(&(xi[WS(is, 78)]), ivs, &(xi[0])); + T1I = VSUB(T1G, T1H); + TaM = VADD(T1G, T1H); + T1J = LD(&(xi[WS(is, 110)]), ivs, &(xi[0])); + T1K = LD(&(xi[WS(is, 46)]), ivs, &(xi[0])); + T1L = VSUB(T1J, T1K); + TaN = VADD(T1J, T1K); + } + T1M = VMUL(LDK(KP707106781), VADD(T1I, T1L)); + TaO = VADD(TaM, TaN); + T24 = VMUL(LDK(KP707106781), VSUB(T1L, T1I)); + { + V T1Q, TaT, T1T, TaU; + { + V T1O, T1P, T1R, T1S; + T1O = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T1P = LD(&(xi[WS(is, 70)]), ivs, &(xi[0])); + T1Q = VSUB(T1O, T1P); + TaT = VADD(T1O, T1P); + T1R = LD(&(xi[WS(is, 38)]), ivs, &(xi[0])); + T1S = LD(&(xi[WS(is, 102)]), ivs, &(xi[0])); + T1T = VSUB(T1R, T1S); + TaU = VADD(T1R, T1S); + } + T1U = VFNMS(LDK(KP382683432), T1T, VMUL(LDK(KP923879532), T1Q)); + Tew = VSUB(TaT, TaU); + T2a = VFMA(LDK(KP382683432), T1Q, VMUL(LDK(KP923879532), T1T)); + TaV = VADD(TaT, TaU); + } + { + V T1X, TaQ, T20, TaR; + { + V T1V, T1W, T1Y, T1Z; + T1V = LD(&(xi[WS(is, 118)]), ivs, &(xi[0])); + T1W = LD(&(xi[WS(is, 54)]), ivs, &(xi[0])); + T1X = VSUB(T1V, T1W); + TaQ = VADD(T1V, T1W); + T1Y = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + T1Z = LD(&(xi[WS(is, 86)]), ivs, &(xi[0])); + T20 = VSUB(T1Y, T1Z); + TaR = VADD(T1Y, T1Z); + } + T21 = VFMA(LDK(KP923879532), T1X, VMUL(LDK(KP382683432), T20)); + Tex = VSUB(TaQ, TaR); + T29 = VFNMS(LDK(KP923879532), T20, VMUL(LDK(KP382683432), T1X)); + TaS = VADD(TaQ, TaR); + } + Tdm = VADD(TaL, TaO); + Tdn = VADD(TaV, TaS); + TdH = VSUB(Tdm, Tdn); + Tev = VSUB(TaJ, TaK); + Tey = VMUL(LDK(KP707106781), VADD(Tew, Tex)); + Tez = VADD(Tev, Tey); + Tgi = VSUB(Tev, Tey); + { + V TeA, TeB, T1N, T22; + TeA = VSUB(TaN, TaM); + TeB = VMUL(LDK(KP707106781), VSUB(Tex, Tew)); + TeC = VADD(TeA, TeB); + Tgj = VSUB(TeB, TeA); + T1N = VADD(T1F, T1M); + T22 = VADD(T1U, T21); + T23 = VADD(T1N, T22); + T5N = VSUB(T1N, T22); + } + { + V T28, T2b, T7i, T7j; + T28 = VSUB(T24, T27); + T2b = VSUB(T29, T2a); + T2c = VADD(T28, T2b); + T5M = VSUB(T2b, T28); + T7i = VADD(T27, T24); + T7j = VSUB(T21, T1U); + T7k = VADD(T7i, T7j); + T8X = VSUB(T7j, T7i); + } + { + V TaP, TaW, T7f, T7g; + TaP = VSUB(TaL, TaO); + TaW = VSUB(TaS, TaV); + TaX = VFNMS(LDK(KP382683432), TaW, VMUL(LDK(KP923879532), TaP)); + Tcg = VFMA(LDK(KP382683432), TaP, VMUL(LDK(KP923879532), TaW)); + T7f = VSUB(T1F, T1M); + T7g = VADD(T2a, T29); + T7h = VADD(T7f, T7g); + T8W = VSUB(T7f, T7g); + } + } + { + V T2J, TeL, T2V, Tb9, T30, TeO, T3c, Tbg, T2Q, TeM, T2S, Tbc, T37, TeP, T39; + V Tbj; + { + V T2H, T2I, Tb7, T2T, T2U, Tb8; + T2H = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T2I = LD(&(xi[WS(is, 69)]), ivs, &(xi[WS(is, 1)])); + Tb7 = VADD(T2H, T2I); + T2T = LD(&(xi[WS(is, 37)]), ivs, &(xi[WS(is, 1)])); + T2U = LD(&(xi[WS(is, 101)]), ivs, &(xi[WS(is, 1)])); + Tb8 = VADD(T2T, T2U); + T2J = VSUB(T2H, T2I); + TeL = VSUB(Tb7, Tb8); + T2V = VSUB(T2T, T2U); + Tb9 = VADD(Tb7, Tb8); + } + { + V T2Y, T2Z, Tbe, T3a, T3b, Tbf; + T2Y = LD(&(xi[WS(is, 125)]), ivs, &(xi[WS(is, 1)])); + T2Z = LD(&(xi[WS(is, 61)]), ivs, &(xi[WS(is, 1)])); + Tbe = VADD(T2Y, T2Z); + T3a = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + T3b = LD(&(xi[WS(is, 93)]), ivs, &(xi[WS(is, 1)])); + Tbf = VADD(T3a, T3b); + T30 = VSUB(T2Y, T2Z); + TeO = VSUB(Tbe, Tbf); + T3c = VSUB(T3a, T3b); + Tbg = VADD(Tbe, Tbf); + } + { + V T2M, Tba, T2P, Tbb; + { + V T2K, T2L, T2N, T2O; + T2K = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + T2L = LD(&(xi[WS(is, 85)]), ivs, &(xi[WS(is, 1)])); + T2M = VSUB(T2K, T2L); + Tba = VADD(T2K, T2L); + T2N = LD(&(xi[WS(is, 117)]), ivs, &(xi[WS(is, 1)])); + T2O = LD(&(xi[WS(is, 53)]), ivs, &(xi[WS(is, 1)])); + T2P = VSUB(T2N, T2O); + Tbb = VADD(T2N, T2O); + } + T2Q = VMUL(LDK(KP707106781), VADD(T2M, T2P)); + TeM = VSUB(Tbb, Tba); + T2S = VMUL(LDK(KP707106781), VSUB(T2P, T2M)); + Tbc = VADD(Tba, Tbb); + } + { + V T33, Tbh, T36, Tbi; + { + V T31, T32, T34, T35; + T31 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T32 = LD(&(xi[WS(is, 77)]), ivs, &(xi[WS(is, 1)])); + T33 = VSUB(T31, T32); + Tbh = VADD(T31, T32); + T34 = LD(&(xi[WS(is, 109)]), ivs, &(xi[WS(is, 1)])); + T35 = LD(&(xi[WS(is, 45)]), ivs, &(xi[WS(is, 1)])); + T36 = VSUB(T34, T35); + Tbi = VADD(T34, T35); + } + T37 = VMUL(LDK(KP707106781), VADD(T33, T36)); + TeP = VSUB(Tbi, Tbh); + T39 = VMUL(LDK(KP707106781), VSUB(T36, T33)); + Tbj = VADD(Tbh, Tbi); + } + { + V Tbd, Tbk, TeN, TeQ; + Tbd = VSUB(Tb9, Tbc); + Tbk = VSUB(Tbg, Tbj); + Tbl = VMUL(LDK(KP707106781), VADD(Tbd, Tbk)); + Tbu = VMUL(LDK(KP707106781), VSUB(Tbk, Tbd)); + { + V Td9, Tda, TeW, TeX; + Td9 = VADD(Tb9, Tbc); + Tda = VADD(Tbg, Tbj); + Tdb = VADD(Td9, Tda); + TdL = VSUB(Tda, Td9); + TeW = VFNMS(LDK(KP382683432), TeL, VMUL(LDK(KP923879532), TeM)); + TeX = VFMA(LDK(KP382683432), TeO, VMUL(LDK(KP923879532), TeP)); + TeY = VADD(TeW, TeX); + Tgu = VSUB(TeX, TeW); + } + TeN = VFMA(LDK(KP923879532), TeL, VMUL(LDK(KP382683432), TeM)); + TeQ = VFNMS(LDK(KP382683432), TeP, VMUL(LDK(KP923879532), TeO)); + TeR = VADD(TeN, TeQ); + Tgq = VSUB(TeQ, TeN); + { + V T7t, T7C, T7w, T7D; + { + V T7r, T7s, T7u, T7v; + T7r = VSUB(T2J, T2Q); + T7s = VADD(T2V, T2S); + T7t = VFMA(LDK(KP831469612), T7r, VMUL(LDK(KP555570233), T7s)); + T7C = VFNMS(LDK(KP555570233), T7r, VMUL(LDK(KP831469612), T7s)); + T7u = VSUB(T30, T37); + T7v = VADD(T3c, T39); + T7w = VFNMS(LDK(KP555570233), T7v, VMUL(LDK(KP831469612), T7u)); + T7D = VFMA(LDK(KP555570233), T7u, VMUL(LDK(KP831469612), T7v)); + } + T7x = VADD(T7t, T7w); + T98 = VSUB(T7D, T7C); + T7E = VADD(T7C, T7D); + T94 = VSUB(T7w, T7t); + } + { + V T2X, T3q, T3e, T3r; + { + V T2R, T2W, T38, T3d; + T2R = VADD(T2J, T2Q); + T2W = VSUB(T2S, T2V); + T2X = VFMA(LDK(KP980785280), T2R, VMUL(LDK(KP195090322), T2W)); + T3q = VFNMS(LDK(KP195090322), T2R, VMUL(LDK(KP980785280), T2W)); + T38 = VADD(T30, T37); + T3d = VSUB(T39, T3c); + T3e = VFNMS(LDK(KP195090322), T3d, VMUL(LDK(KP980785280), T38)); + T3r = VFMA(LDK(KP195090322), T38, VMUL(LDK(KP980785280), T3d)); + } + T3f = VADD(T2X, T3e); + T5Y = VSUB(T3r, T3q); + T3s = VADD(T3q, T3r); + T5U = VSUB(T3e, T2X); + } + } + } + { + V T3Y, Tf6, T4a, TbG, T4f, Tf9, T4r, TbN, T45, Tf7, T47, TbJ, T4m, Tfa, T4o; + V TbQ; + { + V T3W, T3X, TbE, T48, T49, TbF; + T3W = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T3X = LD(&(xi[WS(is, 67)]), ivs, &(xi[WS(is, 1)])); + TbE = VADD(T3W, T3X); + T48 = LD(&(xi[WS(is, 35)]), ivs, &(xi[WS(is, 1)])); + T49 = LD(&(xi[WS(is, 99)]), ivs, &(xi[WS(is, 1)])); + TbF = VADD(T48, T49); + T3Y = VSUB(T3W, T3X); + Tf6 = VSUB(TbE, TbF); + T4a = VSUB(T48, T49); + TbG = VADD(TbE, TbF); + } + { + V T4d, T4e, TbL, T4p, T4q, TbM; + T4d = LD(&(xi[WS(is, 123)]), ivs, &(xi[WS(is, 1)])); + T4e = LD(&(xi[WS(is, 59)]), ivs, &(xi[WS(is, 1)])); + TbL = VADD(T4d, T4e); + T4p = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + T4q = LD(&(xi[WS(is, 91)]), ivs, &(xi[WS(is, 1)])); + TbM = VADD(T4p, T4q); + T4f = VSUB(T4d, T4e); + Tf9 = VSUB(TbL, TbM); + T4r = VSUB(T4p, T4q); + TbN = VADD(TbL, TbM); + } + { + V T41, TbH, T44, TbI; + { + V T3Z, T40, T42, T43; + T3Z = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + T40 = LD(&(xi[WS(is, 83)]), ivs, &(xi[WS(is, 1)])); + T41 = VSUB(T3Z, T40); + TbH = VADD(T3Z, T40); + T42 = LD(&(xi[WS(is, 115)]), ivs, &(xi[WS(is, 1)])); + T43 = LD(&(xi[WS(is, 51)]), ivs, &(xi[WS(is, 1)])); + T44 = VSUB(T42, T43); + TbI = VADD(T42, T43); + } + T45 = VMUL(LDK(KP707106781), VADD(T41, T44)); + Tf7 = VSUB(TbI, TbH); + T47 = VMUL(LDK(KP707106781), VSUB(T44, T41)); + TbJ = VADD(TbH, TbI); + } + { + V T4i, TbO, T4l, TbP; + { + V T4g, T4h, T4j, T4k; + T4g = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + T4h = LD(&(xi[WS(is, 75)]), ivs, &(xi[WS(is, 1)])); + T4i = VSUB(T4g, T4h); + TbO = VADD(T4g, T4h); + T4j = LD(&(xi[WS(is, 107)]), ivs, &(xi[WS(is, 1)])); + T4k = LD(&(xi[WS(is, 43)]), ivs, &(xi[WS(is, 1)])); + T4l = VSUB(T4j, T4k); + TbP = VADD(T4j, T4k); + } + T4m = VMUL(LDK(KP707106781), VADD(T4i, T4l)); + Tfa = VSUB(TbP, TbO); + T4o = VMUL(LDK(KP707106781), VSUB(T4l, T4i)); + TbQ = VADD(TbO, TbP); + } + { + V TbK, TbR, Tf8, Tfb; + TbK = VSUB(TbG, TbJ); + TbR = VSUB(TbN, TbQ); + TbS = VMUL(LDK(KP707106781), VADD(TbK, TbR)); + Tc1 = VMUL(LDK(KP707106781), VSUB(TbR, TbK)); + { + V Tdg, Tdh, Tfh, Tfi; + Tdg = VADD(TbG, TbJ); + Tdh = VADD(TbN, TbQ); + Tdi = VADD(Tdg, Tdh); + TdO = VSUB(Tdh, Tdg); + Tfh = VFNMS(LDK(KP382683432), Tf6, VMUL(LDK(KP923879532), Tf7)); + Tfi = VFMA(LDK(KP382683432), Tf9, VMUL(LDK(KP923879532), Tfa)); + Tfj = VADD(Tfh, Tfi); + Tgy = VSUB(Tfi, Tfh); + } + Tf8 = VFMA(LDK(KP923879532), Tf6, VMUL(LDK(KP382683432), Tf7)); + Tfb = VFNMS(LDK(KP382683432), Tfa, VMUL(LDK(KP923879532), Tf9)); + Tfc = VADD(Tf8, Tfb); + TgA = VSUB(Tfb, Tf8); + { + V T7M, T7V, T7P, T7W; + { + V T7K, T7L, T7N, T7O; + T7K = VSUB(T3Y, T45); + T7L = VADD(T4a, T47); + T7M = VFMA(LDK(KP831469612), T7K, VMUL(LDK(KP555570233), T7L)); + T7V = VFNMS(LDK(KP555570233), T7K, VMUL(LDK(KP831469612), T7L)); + T7N = VSUB(T4f, T4m); + T7O = VADD(T4r, T4o); + T7P = VFNMS(LDK(KP555570233), T7O, VMUL(LDK(KP831469612), T7N)); + T7W = VFMA(LDK(KP555570233), T7N, VMUL(LDK(KP831469612), T7O)); + } + T7Q = VADD(T7M, T7P); + T9e = VSUB(T7P, T7M); + T7X = VADD(T7V, T7W); + T9c = VSUB(T7W, T7V); + } + { + V T4c, T4F, T4t, T4G; + { + V T46, T4b, T4n, T4s; + T46 = VADD(T3Y, T45); + T4b = VSUB(T47, T4a); + T4c = VFMA(LDK(KP980785280), T46, VMUL(LDK(KP195090322), T4b)); + T4F = VFNMS(LDK(KP195090322), T46, VMUL(LDK(KP980785280), T4b)); + T4n = VADD(T4f, T4m); + T4s = VSUB(T4o, T4r); + T4t = VFNMS(LDK(KP195090322), T4s, VMUL(LDK(KP980785280), T4n)); + T4G = VFMA(LDK(KP195090322), T4n, VMUL(LDK(KP980785280), T4s)); + } + T4u = VADD(T4c, T4t); + T64 = VSUB(T4t, T4c); + T4H = VADD(T4F, T4G); + T62 = VSUB(T4G, T4F); + } + } + } + { + V Td5, Tdx, TdC, TdE, Tdk, Tdt, Tds, Tdy, Tdz, TdD; + { + V Td1, Td4, TdA, TdB; + Td1 = VADD(TcZ, Td0); + Td4 = VADD(Td2, Td3); + Td5 = VSUB(Td1, Td4); + Tdx = VADD(Td1, Td4); + TdA = VADD(Td8, Tdb); + TdB = VADD(Tdf, Tdi); + TdC = VADD(TdA, TdB); + TdE = VBYI(VSUB(TdB, TdA)); + } + { + V Tdc, Tdj, Tdo, Tdr; + Tdc = VSUB(Td8, Tdb); + Tdj = VSUB(Tdf, Tdi); + Tdk = VMUL(LDK(KP707106781), VADD(Tdc, Tdj)); + Tdt = VMUL(LDK(KP707106781), VSUB(Tdj, Tdc)); + Tdo = VADD(Tdm, Tdn); + Tdr = VADD(Tdp, Tdq); + Tds = VSUB(Tdo, Tdr); + Tdy = VADD(Tdr, Tdo); + } + Tdz = VADD(Tdx, Tdy); + ST(&(xo[WS(os, 64)]), VSUB(Tdz, TdC), ovs, &(xo[0])); + ST(&(xo[0]), VADD(Tdz, TdC), ovs, &(xo[0])); + TdD = VSUB(Tdx, Tdy); + ST(&(xo[WS(os, 96)]), VSUB(TdD, TdE), ovs, &(xo[0])); + ST(&(xo[WS(os, 32)]), VADD(TdD, TdE), ovs, &(xo[0])); + { + V Tdl, Tdu, Tdv, Tdw; + Tdl = VADD(Td5, Tdk); + Tdu = VBYI(VADD(Tds, Tdt)); + ST(&(xo[WS(os, 112)]), VSUB(Tdl, Tdu), ovs, &(xo[0])); + ST(&(xo[WS(os, 16)]), VADD(Tdl, Tdu), ovs, &(xo[0])); + Tdv = VSUB(Td5, Tdk); + Tdw = VBYI(VSUB(Tdt, Tds)); + ST(&(xo[WS(os, 80)]), VSUB(Tdv, Tdw), ovs, &(xo[0])); + ST(&(xo[WS(os, 48)]), VADD(Tdv, Tdw), ovs, &(xo[0])); + } + } + { + V TdJ, Te4, TdX, Te5, TdQ, Te1, TdU, Te2; + { + V TdF, TdI, TdV, TdW; + TdF = VSUB(TcZ, Td0); + TdI = VMUL(LDK(KP707106781), VADD(TdG, TdH)); + TdJ = VADD(TdF, TdI); + Te4 = VSUB(TdF, TdI); + TdV = VFNMS(LDK(KP382683432), TdK, VMUL(LDK(KP923879532), TdL)); + TdW = VFMA(LDK(KP382683432), TdN, VMUL(LDK(KP923879532), TdO)); + TdX = VADD(TdV, TdW); + Te5 = VSUB(TdW, TdV); + } + { + V TdM, TdP, TdS, TdT; + TdM = VFMA(LDK(KP923879532), TdK, VMUL(LDK(KP382683432), TdL)); + TdP = VFNMS(LDK(KP382683432), TdO, VMUL(LDK(KP923879532), TdN)); + TdQ = VADD(TdM, TdP); + Te1 = VSUB(TdP, TdM); + TdS = VSUB(Td3, Td2); + TdT = VMUL(LDK(KP707106781), VSUB(TdH, TdG)); + TdU = VADD(TdS, TdT); + Te2 = VSUB(TdT, TdS); + } + { + V TdR, TdY, Te7, Te8; + TdR = VADD(TdJ, TdQ); + TdY = VBYI(VADD(TdU, TdX)); + ST(&(xo[WS(os, 120)]), VSUB(TdR, TdY), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VADD(TdR, TdY), ovs, &(xo[0])); + Te7 = VBYI(VADD(Te2, Te1)); + Te8 = VADD(Te4, Te5); + ST(&(xo[WS(os, 24)]), VADD(Te7, Te8), ovs, &(xo[0])); + ST(&(xo[WS(os, 104)]), VSUB(Te8, Te7), ovs, &(xo[0])); + } + { + V TdZ, Te0, Te3, Te6; + TdZ = VSUB(TdJ, TdQ); + Te0 = VBYI(VSUB(TdX, TdU)); + ST(&(xo[WS(os, 72)]), VSUB(TdZ, Te0), ovs, &(xo[0])); + ST(&(xo[WS(os, 56)]), VADD(TdZ, Te0), ovs, &(xo[0])); + Te3 = VBYI(VSUB(Te1, Te2)); + Te6 = VSUB(Te4, Te5); + ST(&(xo[WS(os, 40)]), VADD(Te3, Te6), ovs, &(xo[0])); + ST(&(xo[WS(os, 88)]), VSUB(Te6, Te3), ovs, &(xo[0])); + } + } + { + V TaZ, Tcs, Tci, Tcq, Tc4, Tct, Tcl, Tcp; + { + V Tat, TaY, Tce, Tch; + Tat = VADD(Tad, Tas); + TaY = VADD(TaI, TaX); + TaZ = VADD(Tat, TaY); + Tcs = VSUB(Tat, TaY); + Tce = VADD(Tcc, Tcd); + Tch = VADD(Tcf, Tcg); + Tci = VADD(Tce, Tch); + Tcq = VSUB(Tch, Tce); + { + V Tbw, Tcj, Tc3, Tck; + { + V Tbm, Tbv, TbT, Tc2; + Tbm = VADD(Tb6, Tbl); + Tbv = VADD(Tbt, Tbu); + Tbw = VFMA(LDK(KP980785280), Tbm, VMUL(LDK(KP195090322), Tbv)); + Tcj = VFNMS(LDK(KP195090322), Tbm, VMUL(LDK(KP980785280), Tbv)); + TbT = VADD(TbD, TbS); + Tc2 = VADD(Tc0, Tc1); + Tc3 = VFNMS(LDK(KP195090322), Tc2, VMUL(LDK(KP980785280), TbT)); + Tck = VFMA(LDK(KP195090322), TbT, VMUL(LDK(KP980785280), Tc2)); + } + Tc4 = VADD(Tbw, Tc3); + Tct = VSUB(Tck, Tcj); + Tcl = VADD(Tcj, Tck); + Tcp = VSUB(Tc3, Tbw); + } + } + { + V Tc5, Tcm, Tcv, Tcw; + Tc5 = VADD(TaZ, Tc4); + Tcm = VBYI(VADD(Tci, Tcl)); + ST(&(xo[WS(os, 124)]), VSUB(Tc5, Tcm), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VADD(Tc5, Tcm), ovs, &(xo[0])); + Tcv = VBYI(VADD(Tcq, Tcp)); + Tcw = VADD(Tcs, Tct); + ST(&(xo[WS(os, 28)]), VADD(Tcv, Tcw), ovs, &(xo[0])); + ST(&(xo[WS(os, 100)]), VSUB(Tcw, Tcv), ovs, &(xo[0])); + } + { + V Tcn, Tco, Tcr, Tcu; + Tcn = VSUB(TaZ, Tc4); + Tco = VBYI(VSUB(Tcl, Tci)); + ST(&(xo[WS(os, 68)]), VSUB(Tcn, Tco), ovs, &(xo[0])); + ST(&(xo[WS(os, 60)]), VADD(Tcn, Tco), ovs, &(xo[0])); + Tcr = VBYI(VSUB(Tcp, Tcq)); + Tcu = VSUB(Tcs, Tct); + ST(&(xo[WS(os, 36)]), VADD(Tcr, Tcu), ovs, &(xo[0])); + ST(&(xo[WS(os, 92)]), VSUB(Tcu, Tcr), ovs, &(xo[0])); + } + } + { + V Tcz, TcU, TcK, TcS, TcG, TcV, TcN, TcR; + { + V Tcx, Tcy, TcI, TcJ; + Tcx = VSUB(Tad, Tas); + Tcy = VSUB(Tcg, Tcf); + Tcz = VADD(Tcx, Tcy); + TcU = VSUB(Tcx, Tcy); + TcI = VSUB(Tcd, Tcc); + TcJ = VSUB(TaX, TaI); + TcK = VADD(TcI, TcJ); + TcS = VSUB(TcJ, TcI); + { + V TcC, TcL, TcF, TcM; + { + V TcA, TcB, TcD, TcE; + TcA = VSUB(Tb6, Tbl); + TcB = VSUB(Tbu, Tbt); + TcC = VFMA(LDK(KP831469612), TcA, VMUL(LDK(KP555570233), TcB)); + TcL = VFNMS(LDK(KP555570233), TcA, VMUL(LDK(KP831469612), TcB)); + TcD = VSUB(TbD, TbS); + TcE = VSUB(Tc1, Tc0); + TcF = VFNMS(LDK(KP555570233), TcE, VMUL(LDK(KP831469612), TcD)); + TcM = VFMA(LDK(KP555570233), TcD, VMUL(LDK(KP831469612), TcE)); + } + TcG = VADD(TcC, TcF); + TcV = VSUB(TcM, TcL); + TcN = VADD(TcL, TcM); + TcR = VSUB(TcF, TcC); + } + } + { + V TcH, TcO, TcX, TcY; + TcH = VADD(Tcz, TcG); + TcO = VBYI(VADD(TcK, TcN)); + ST(&(xo[WS(os, 116)]), VSUB(TcH, TcO), ovs, &(xo[0])); + ST(&(xo[WS(os, 12)]), VADD(TcH, TcO), ovs, &(xo[0])); + TcX = VBYI(VADD(TcS, TcR)); + TcY = VADD(TcU, TcV); + ST(&(xo[WS(os, 20)]), VADD(TcX, TcY), ovs, &(xo[0])); + ST(&(xo[WS(os, 108)]), VSUB(TcY, TcX), ovs, &(xo[0])); + } + { + V TcP, TcQ, TcT, TcW; + TcP = VSUB(Tcz, TcG); + TcQ = VBYI(VSUB(TcN, TcK)); + ST(&(xo[WS(os, 76)]), VSUB(TcP, TcQ), ovs, &(xo[0])); + ST(&(xo[WS(os, 52)]), VADD(TcP, TcQ), ovs, &(xo[0])); + TcT = VBYI(VSUB(TcR, TcS)); + TcW = VSUB(TcU, TcV); + ST(&(xo[WS(os, 44)]), VADD(TcT, TcW), ovs, &(xo[0])); + ST(&(xo[WS(os, 84)]), VSUB(TcW, TcT), ovs, &(xo[0])); + } + } + { + V TeF, Tg8, TfI, Tg0, Tfy, Tga, TfG, TfP, Tfm, TfJ, TfB, TfF, TfW, Tgb, Tg3; + V Tg7; + { + V Tel, TfY, TeE, TfZ, Teu, TeD; + Tel = VADD(Ted, Tek); + TfY = VSUB(Tft, Tfq); + Teu = VFMA(LDK(KP980785280), Teq, VMUL(LDK(KP195090322), Tet)); + TeD = VFNMS(LDK(KP195090322), TeC, VMUL(LDK(KP980785280), Tez)); + TeE = VADD(Teu, TeD); + TfZ = VSUB(TeD, Teu); + TeF = VADD(Tel, TeE); + Tg8 = VSUB(TfZ, TfY); + TfI = VSUB(Tel, TeE); + Tg0 = VADD(TfY, TfZ); + } + { + V Tfu, TfN, Tfx, TfO, Tfv, Tfw; + Tfu = VADD(Tfq, Tft); + TfN = VSUB(Ted, Tek); + Tfv = VFNMS(LDK(KP195090322), Teq, VMUL(LDK(KP980785280), Tet)); + Tfw = VFMA(LDK(KP195090322), Tez, VMUL(LDK(KP980785280), TeC)); + Tfx = VADD(Tfv, Tfw); + TfO = VSUB(Tfw, Tfv); + Tfy = VADD(Tfu, Tfx); + Tga = VSUB(TfN, TfO); + TfG = VSUB(Tfx, Tfu); + TfP = VADD(TfN, TfO); + } + { + V Tf0, Tfz, Tfl, TfA; + { + V TeS, TeZ, Tfd, Tfk; + TeS = VADD(TeK, TeR); + TeZ = VADD(TeV, TeY); + Tf0 = VFMA(LDK(KP995184726), TeS, VMUL(LDK(KP098017140), TeZ)); + Tfz = VFNMS(LDK(KP098017140), TeS, VMUL(LDK(KP995184726), TeZ)); + Tfd = VADD(Tf5, Tfc); + Tfk = VADD(Tfg, Tfj); + Tfl = VFNMS(LDK(KP098017140), Tfk, VMUL(LDK(KP995184726), Tfd)); + TfA = VFMA(LDK(KP098017140), Tfd, VMUL(LDK(KP995184726), Tfk)); + } + Tfm = VADD(Tf0, Tfl); + TfJ = VSUB(TfA, Tfz); + TfB = VADD(Tfz, TfA); + TfF = VSUB(Tfl, Tf0); + } + { + V TfS, Tg1, TfV, Tg2; + { + V TfQ, TfR, TfT, TfU; + TfQ = VSUB(TeK, TeR); + TfR = VSUB(TeY, TeV); + TfS = VFMA(LDK(KP773010453), TfQ, VMUL(LDK(KP634393284), TfR)); + Tg1 = VFNMS(LDK(KP634393284), TfQ, VMUL(LDK(KP773010453), TfR)); + TfT = VSUB(Tf5, Tfc); + TfU = VSUB(Tfj, Tfg); + TfV = VFNMS(LDK(KP634393284), TfU, VMUL(LDK(KP773010453), TfT)); + Tg2 = VFMA(LDK(KP634393284), TfT, VMUL(LDK(KP773010453), TfU)); + } + TfW = VADD(TfS, TfV); + Tgb = VSUB(Tg2, Tg1); + Tg3 = VADD(Tg1, Tg2); + Tg7 = VSUB(TfV, TfS); + } + { + V Tfn, TfC, Tg9, Tgc; + Tfn = VADD(TeF, Tfm); + TfC = VBYI(VADD(Tfy, TfB)); + ST(&(xo[WS(os, 126)]), VSUB(Tfn, TfC), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VADD(Tfn, TfC), ovs, &(xo[0])); + Tg9 = VBYI(VSUB(Tg7, Tg8)); + Tgc = VSUB(Tga, Tgb); + ST(&(xo[WS(os, 46)]), VADD(Tg9, Tgc), ovs, &(xo[0])); + ST(&(xo[WS(os, 82)]), VSUB(Tgc, Tg9), ovs, &(xo[0])); + } + { + V Tgd, Tge, TfD, TfE; + Tgd = VBYI(VADD(Tg8, Tg7)); + Tge = VADD(Tga, Tgb); + ST(&(xo[WS(os, 18)]), VADD(Tgd, Tge), ovs, &(xo[0])); + ST(&(xo[WS(os, 110)]), VSUB(Tge, Tgd), ovs, &(xo[0])); + TfD = VSUB(TeF, Tfm); + TfE = VBYI(VSUB(TfB, Tfy)); + ST(&(xo[WS(os, 66)]), VSUB(TfD, TfE), ovs, &(xo[0])); + ST(&(xo[WS(os, 62)]), VADD(TfD, TfE), ovs, &(xo[0])); + } + { + V TfH, TfK, TfX, Tg4; + TfH = VBYI(VSUB(TfF, TfG)); + TfK = VSUB(TfI, TfJ); + ST(&(xo[WS(os, 34)]), VADD(TfH, TfK), ovs, &(xo[0])); + ST(&(xo[WS(os, 94)]), VSUB(TfK, TfH), ovs, &(xo[0])); + TfX = VADD(TfP, TfW); + Tg4 = VBYI(VADD(Tg0, Tg3)); + ST(&(xo[WS(os, 114)]), VSUB(TfX, Tg4), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VADD(TfX, Tg4), ovs, &(xo[0])); + } + { + V Tg5, Tg6, TfL, TfM; + Tg5 = VSUB(TfP, TfW); + Tg6 = VBYI(VSUB(Tg3, Tg0)); + ST(&(xo[WS(os, 78)]), VSUB(Tg5, Tg6), ovs, &(xo[0])); + ST(&(xo[WS(os, 50)]), VADD(Tg5, Tg6), ovs, &(xo[0])); + TfL = VBYI(VADD(TfG, TfF)); + TfM = VADD(TfI, TfJ); + ST(&(xo[WS(os, 30)]), VADD(TfL, TfM), ovs, &(xo[0])); + ST(&(xo[WS(os, 98)]), VSUB(TfM, TfL), ovs, &(xo[0])); + } + } + { + V Tgp, Thm, TgW, The, TgM, Tho, TgU, Th3, TgE, TgX, TgP, TgT, Tha, Thp, Thh; + V Thl; + { + V Tgh, Thc, Tgo, Thd, Tgk, Tgn; + Tgh = VSUB(Tgf, Tgg); + Thc = VADD(TgH, TgG); + Tgk = VFMA(LDK(KP555570233), Tgi, VMUL(LDK(KP831469612), Tgj)); + Tgn = VFNMS(LDK(KP555570233), Tgm, VMUL(LDK(KP831469612), Tgl)); + Tgo = VSUB(Tgk, Tgn); + Thd = VADD(Tgn, Tgk); + Tgp = VADD(Tgh, Tgo); + Thm = VSUB(Thd, Thc); + TgW = VSUB(Tgh, Tgo); + The = VADD(Thc, Thd); + } + { + V TgI, Th1, TgL, Th2, TgJ, TgK; + TgI = VSUB(TgG, TgH); + Th1 = VADD(Tgf, Tgg); + TgJ = VFNMS(LDK(KP555570233), Tgj, VMUL(LDK(KP831469612), Tgi)); + TgK = VFMA(LDK(KP831469612), Tgm, VMUL(LDK(KP555570233), Tgl)); + TgL = VSUB(TgJ, TgK); + Th2 = VADD(TgK, TgJ); + TgM = VADD(TgI, TgL); + Tho = VSUB(Th1, Th2); + TgU = VSUB(TgL, TgI); + Th3 = VADD(Th1, Th2); + } + { + V Tgw, TgN, TgD, TgO; + { + V Tgs, Tgv, Tgz, TgC; + Tgs = VSUB(Tgq, Tgr); + Tgv = VSUB(Tgt, Tgu); + Tgw = VFMA(LDK(KP471396736), Tgs, VMUL(LDK(KP881921264), Tgv)); + TgN = VFNMS(LDK(KP471396736), Tgv, VMUL(LDK(KP881921264), Tgs)); + Tgz = VSUB(Tgx, Tgy); + TgC = VSUB(TgA, TgB); + TgD = VFNMS(LDK(KP471396736), TgC, VMUL(LDK(KP881921264), Tgz)); + TgO = VFMA(LDK(KP881921264), TgC, VMUL(LDK(KP471396736), Tgz)); + } + TgE = VADD(Tgw, TgD); + TgX = VSUB(TgO, TgN); + TgP = VADD(TgN, TgO); + TgT = VSUB(TgD, Tgw); + } + { + V Th6, Thf, Th9, Thg; + { + V Th4, Th5, Th7, Th8; + Th4 = VADD(Tgr, Tgq); + Th5 = VADD(Tgt, Tgu); + Th6 = VFMA(LDK(KP290284677), Th4, VMUL(LDK(KP956940335), Th5)); + Thf = VFNMS(LDK(KP290284677), Th5, VMUL(LDK(KP956940335), Th4)); + Th7 = VADD(Tgx, Tgy); + Th8 = VADD(TgB, TgA); + Th9 = VFNMS(LDK(KP290284677), Th8, VMUL(LDK(KP956940335), Th7)); + Thg = VFMA(LDK(KP956940335), Th8, VMUL(LDK(KP290284677), Th7)); + } + Tha = VADD(Th6, Th9); + Thp = VSUB(Thg, Thf); + Thh = VADD(Thf, Thg); + Thl = VSUB(Th9, Th6); + } + { + V TgF, TgQ, Thn, Thq; + TgF = VADD(Tgp, TgE); + TgQ = VBYI(VADD(TgM, TgP)); + ST(&(xo[WS(os, 118)]), VSUB(TgF, TgQ), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VADD(TgF, TgQ), ovs, &(xo[0])); + Thn = VBYI(VSUB(Thl, Thm)); + Thq = VSUB(Tho, Thp); + ST(&(xo[WS(os, 38)]), VADD(Thn, Thq), ovs, &(xo[0])); + ST(&(xo[WS(os, 90)]), VSUB(Thq, Thn), ovs, &(xo[0])); + } + { + V Thr, Ths, TgR, TgS; + Thr = VBYI(VADD(Thm, Thl)); + Ths = VADD(Tho, Thp); + ST(&(xo[WS(os, 26)]), VADD(Thr, Ths), ovs, &(xo[0])); + ST(&(xo[WS(os, 102)]), VSUB(Ths, Thr), ovs, &(xo[0])); + TgR = VSUB(Tgp, TgE); + TgS = VBYI(VSUB(TgP, TgM)); + ST(&(xo[WS(os, 74)]), VSUB(TgR, TgS), ovs, &(xo[0])); + ST(&(xo[WS(os, 54)]), VADD(TgR, TgS), ovs, &(xo[0])); + } + { + V TgV, TgY, Thb, Thi; + TgV = VBYI(VSUB(TgT, TgU)); + TgY = VSUB(TgW, TgX); + ST(&(xo[WS(os, 42)]), VADD(TgV, TgY), ovs, &(xo[0])); + ST(&(xo[WS(os, 86)]), VSUB(TgY, TgV), ovs, &(xo[0])); + Thb = VADD(Th3, Tha); + Thi = VBYI(VADD(The, Thh)); + ST(&(xo[WS(os, 122)]), VSUB(Thb, Thi), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VADD(Thb, Thi), ovs, &(xo[0])); + } + { + V Thj, Thk, TgZ, Th0; + Thj = VSUB(Th3, Tha); + Thk = VBYI(VSUB(Thh, The)); + ST(&(xo[WS(os, 70)]), VSUB(Thj, Thk), ovs, &(xo[0])); + ST(&(xo[WS(os, 58)]), VADD(Thj, Thk), ovs, &(xo[0])); + TgZ = VBYI(VADD(TgU, TgT)); + Th0 = VADD(TgW, TgX); + ST(&(xo[WS(os, 22)]), VADD(TgZ, Th0), ovs, &(xo[0])); + ST(&(xo[WS(os, 106)]), VSUB(Th0, TgZ), ovs, &(xo[0])); + } + } + { + V T80, T8n, T8f, T8j, T8A, T8P, T8H, T8L, T7n, T8M, T8O, T8c, T8k, T8t, T8E; + V T8m; + { + V T7G, T8d, T7Z, T8e; + { + V T7y, T7F, T7R, T7Y; + T7y = VADD(T7q, T7x); + T7F = VADD(T7B, T7E); + T7G = VFMA(LDK(KP989176509), T7y, VMUL(LDK(KP146730474), T7F)); + T8d = VFNMS(LDK(KP146730474), T7y, VMUL(LDK(KP989176509), T7F)); + T7R = VADD(T7J, T7Q); + T7Y = VADD(T7U, T7X); + T7Z = VFNMS(LDK(KP146730474), T7Y, VMUL(LDK(KP989176509), T7R)); + T8e = VFMA(LDK(KP146730474), T7R, VMUL(LDK(KP989176509), T7Y)); + } + T80 = VADD(T7G, T7Z); + T8n = VSUB(T8e, T8d); + T8f = VADD(T8d, T8e); + T8j = VSUB(T7Z, T7G); + } + { + V T8w, T8F, T8z, T8G; + { + V T8u, T8v, T8x, T8y; + T8u = VSUB(T7q, T7x); + T8v = VSUB(T7E, T7B); + T8w = VFMA(LDK(KP803207531), T8u, VMUL(LDK(KP595699304), T8v)); + T8F = VFNMS(LDK(KP595699304), T8u, VMUL(LDK(KP803207531), T8v)); + T8x = VSUB(T7J, T7Q); + T8y = VSUB(T7X, T7U); + T8z = VFNMS(LDK(KP595699304), T8y, VMUL(LDK(KP803207531), T8x)); + T8G = VFMA(LDK(KP595699304), T8x, VMUL(LDK(KP803207531), T8y)); + } + T8A = VADD(T8w, T8z); + T8P = VSUB(T8G, T8F); + T8H = VADD(T8F, T8G); + T8L = VSUB(T8z, T8w); + } + { + V T77, T8r, T88, T8C, T7m, T8D, T8b, T8s, T76, T87; + T76 = VADD(T72, T75); + T77 = VADD(T6Z, T76); + T8r = VSUB(T6Z, T76); + T87 = VADD(T85, T86); + T88 = VADD(T84, T87); + T8C = VSUB(T87, T84); + { + V T7e, T7l, T89, T8a; + T7e = VFMA(LDK(KP956940335), T7a, VMUL(LDK(KP290284677), T7d)); + T7l = VFNMS(LDK(KP290284677), T7k, VMUL(LDK(KP956940335), T7h)); + T7m = VADD(T7e, T7l); + T8D = VSUB(T7l, T7e); + T89 = VFNMS(LDK(KP290284677), T7a, VMUL(LDK(KP956940335), T7d)); + T8a = VFMA(LDK(KP290284677), T7h, VMUL(LDK(KP956940335), T7k)); + T8b = VADD(T89, T8a); + T8s = VSUB(T8a, T89); + } + T7n = VADD(T77, T7m); + T8M = VSUB(T8D, T8C); + T8O = VSUB(T8r, T8s); + T8c = VADD(T88, T8b); + T8k = VSUB(T8b, T88); + T8t = VADD(T8r, T8s); + T8E = VADD(T8C, T8D); + T8m = VSUB(T77, T7m); + } + { + V T81, T8g, T8N, T8Q; + T81 = VADD(T7n, T80); + T8g = VBYI(VADD(T8c, T8f)); + ST(&(xo[WS(os, 125)]), VSUB(T81, T8g), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VADD(T81, T8g), ovs, &(xo[WS(os, 1)])); + T8N = VBYI(VSUB(T8L, T8M)); + T8Q = VSUB(T8O, T8P); + ST(&(xo[WS(os, 45)]), VADD(T8N, T8Q), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 83)]), VSUB(T8Q, T8N), ovs, &(xo[WS(os, 1)])); + } + { + V T8R, T8S, T8h, T8i; + T8R = VBYI(VADD(T8M, T8L)); + T8S = VADD(T8O, T8P); + ST(&(xo[WS(os, 19)]), VADD(T8R, T8S), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 109)]), VSUB(T8S, T8R), ovs, &(xo[WS(os, 1)])); + T8h = VSUB(T7n, T80); + T8i = VBYI(VSUB(T8f, T8c)); + ST(&(xo[WS(os, 67)]), VSUB(T8h, T8i), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 61)]), VADD(T8h, T8i), ovs, &(xo[WS(os, 1)])); + } + { + V T8l, T8o, T8B, T8I; + T8l = VBYI(VSUB(T8j, T8k)); + T8o = VSUB(T8m, T8n); + ST(&(xo[WS(os, 35)]), VADD(T8l, T8o), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 93)]), VSUB(T8o, T8l), ovs, &(xo[WS(os, 1)])); + T8B = VADD(T8t, T8A); + T8I = VBYI(VADD(T8E, T8H)); + ST(&(xo[WS(os, 115)]), VSUB(T8B, T8I), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 13)]), VADD(T8B, T8I), ovs, &(xo[WS(os, 1)])); + } + { + V T8J, T8K, T8p, T8q; + T8J = VSUB(T8t, T8A); + T8K = VBYI(VSUB(T8H, T8E)); + ST(&(xo[WS(os, 77)]), VSUB(T8J, T8K), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 51)]), VADD(T8J, T8K), ovs, &(xo[WS(os, 1)])); + T8p = VBYI(VADD(T8k, T8j)); + T8q = VADD(T8m, T8n); + ST(&(xo[WS(os, 29)]), VADD(T8p, T8q), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 99)]), VSUB(T8q, T8p), ovs, &(xo[WS(os, 1)])); + } + } + { + V T4K, T5d, T55, T59, T5q, T5F, T5x, T5B, T2f, T5C, T5E, T52, T5a, T5j, T5u; + V T5c; + { + V T3u, T53, T4J, T54; + { + V T3g, T3t, T4v, T4I; + T3g = VADD(T2G, T3f); + T3t = VADD(T3p, T3s); + T3u = VFMA(LDK(KP998795456), T3g, VMUL(LDK(KP049067674), T3t)); + T53 = VFNMS(LDK(KP049067674), T3g, VMUL(LDK(KP998795456), T3t)); + T4v = VADD(T3V, T4u); + T4I = VADD(T4E, T4H); + T4J = VFNMS(LDK(KP049067674), T4I, VMUL(LDK(KP998795456), T4v)); + T54 = VFMA(LDK(KP049067674), T4v, VMUL(LDK(KP998795456), T4I)); + } + T4K = VADD(T3u, T4J); + T5d = VSUB(T54, T53); + T55 = VADD(T53, T54); + T59 = VSUB(T4J, T3u); + } + { + V T5m, T5v, T5p, T5w; + { + V T5k, T5l, T5n, T5o; + T5k = VSUB(T2G, T3f); + T5l = VSUB(T3s, T3p); + T5m = VFMA(LDK(KP740951125), T5k, VMUL(LDK(KP671558954), T5l)); + T5v = VFNMS(LDK(KP671558954), T5k, VMUL(LDK(KP740951125), T5l)); + T5n = VSUB(T3V, T4u); + T5o = VSUB(T4H, T4E); + T5p = VFNMS(LDK(KP671558954), T5o, VMUL(LDK(KP740951125), T5n)); + T5w = VFMA(LDK(KP671558954), T5n, VMUL(LDK(KP740951125), T5o)); + } + T5q = VADD(T5m, T5p); + T5F = VSUB(T5w, T5v); + T5x = VADD(T5v, T5w); + T5B = VSUB(T5p, T5m); + } + { + V T11, T5h, T4Y, T5s, T2e, T5t, T51, T5i, T10, T4X; + T10 = VADD(TI, TZ); + T11 = VADD(Tr, T10); + T5h = VSUB(Tr, T10); + T4X = VADD(T4V, T4W); + T4Y = VADD(T4U, T4X); + T5s = VSUB(T4X, T4U); + { + V T1C, T2d, T4Z, T50; + T1C = VFMA(LDK(KP098017140), T1s, VMUL(LDK(KP995184726), T1B)); + T2d = VFNMS(LDK(KP098017140), T2c, VMUL(LDK(KP995184726), T23)); + T2e = VADD(T1C, T2d); + T5t = VSUB(T2d, T1C); + T4Z = VFNMS(LDK(KP098017140), T1B, VMUL(LDK(KP995184726), T1s)); + T50 = VFMA(LDK(KP995184726), T2c, VMUL(LDK(KP098017140), T23)); + T51 = VADD(T4Z, T50); + T5i = VSUB(T50, T4Z); + } + T2f = VADD(T11, T2e); + T5C = VSUB(T5t, T5s); + T5E = VSUB(T5h, T5i); + T52 = VADD(T4Y, T51); + T5a = VSUB(T51, T4Y); + T5j = VADD(T5h, T5i); + T5u = VADD(T5s, T5t); + T5c = VSUB(T11, T2e); + } + { + V T4L, T56, T5D, T5G; + T4L = VADD(T2f, T4K); + T56 = VBYI(VADD(T52, T55)); + ST(&(xo[WS(os, 127)]), VSUB(T4L, T56), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VADD(T4L, T56), ovs, &(xo[WS(os, 1)])); + T5D = VBYI(VSUB(T5B, T5C)); + T5G = VSUB(T5E, T5F); + ST(&(xo[WS(os, 47)]), VADD(T5D, T5G), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 81)]), VSUB(T5G, T5D), ovs, &(xo[WS(os, 1)])); + } + { + V T5H, T5I, T57, T58; + T5H = VBYI(VADD(T5C, T5B)); + T5I = VADD(T5E, T5F); + ST(&(xo[WS(os, 17)]), VADD(T5H, T5I), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 111)]), VSUB(T5I, T5H), ovs, &(xo[WS(os, 1)])); + T57 = VSUB(T2f, T4K); + T58 = VBYI(VSUB(T55, T52)); + ST(&(xo[WS(os, 65)]), VSUB(T57, T58), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 63)]), VADD(T57, T58), ovs, &(xo[WS(os, 1)])); + } + { + V T5b, T5e, T5r, T5y; + T5b = VBYI(VSUB(T59, T5a)); + T5e = VSUB(T5c, T5d); + ST(&(xo[WS(os, 33)]), VADD(T5b, T5e), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 95)]), VSUB(T5e, T5b), ovs, &(xo[WS(os, 1)])); + T5r = VADD(T5j, T5q); + T5y = VBYI(VADD(T5u, T5x)); + ST(&(xo[WS(os, 113)]), VSUB(T5r, T5y), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 15)]), VADD(T5r, T5y), ovs, &(xo[WS(os, 1)])); + } + { + V T5z, T5A, T5f, T5g; + T5z = VSUB(T5j, T5q); + T5A = VBYI(VSUB(T5x, T5u)); + ST(&(xo[WS(os, 79)]), VSUB(T5z, T5A), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 49)]), VADD(T5z, T5A), ovs, &(xo[WS(os, 1)])); + T5f = VBYI(VADD(T5a, T59)); + T5g = VADD(T5c, T5d); + ST(&(xo[WS(os, 31)]), VADD(T5f, T5g), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 97)]), VSUB(T5g, T5f), ovs, &(xo[WS(os, 1)])); + } + } + { + V T9i, T9B, T9t, T9x, T9O, Ta3, T9V, T9Z, T93, Ta0, Ta2, T9q, T9y, T9H, T9S; + V T9A; + { + V T9a, T9r, T9h, T9s; + { + V T96, T99, T9d, T9g; + T96 = VSUB(T94, T95); + T99 = VSUB(T97, T98); + T9a = VFMA(LDK(KP514102744), T96, VMUL(LDK(KP857728610), T99)); + T9r = VFNMS(LDK(KP514102744), T99, VMUL(LDK(KP857728610), T96)); + T9d = VSUB(T9b, T9c); + T9g = VSUB(T9e, T9f); + T9h = VFNMS(LDK(KP514102744), T9g, VMUL(LDK(KP857728610), T9d)); + T9s = VFMA(LDK(KP857728610), T9g, VMUL(LDK(KP514102744), T9d)); + } + T9i = VADD(T9a, T9h); + T9B = VSUB(T9s, T9r); + T9t = VADD(T9r, T9s); + T9x = VSUB(T9h, T9a); + } + { + V T9K, T9T, T9N, T9U; + { + V T9I, T9J, T9L, T9M; + T9I = VADD(T95, T94); + T9J = VADD(T97, T98); + T9K = VFMA(LDK(KP242980179), T9I, VMUL(LDK(KP970031253), T9J)); + T9T = VFNMS(LDK(KP242980179), T9J, VMUL(LDK(KP970031253), T9I)); + T9L = VADD(T9b, T9c); + T9M = VADD(T9f, T9e); + T9N = VFNMS(LDK(KP242980179), T9M, VMUL(LDK(KP970031253), T9L)); + T9U = VFMA(LDK(KP970031253), T9M, VMUL(LDK(KP242980179), T9L)); + } + T9O = VADD(T9K, T9N); + Ta3 = VSUB(T9U, T9T); + T9V = VADD(T9T, T9U); + T9Z = VSUB(T9N, T9K); + } + { + V T8V, T9F, T9m, T9Q, T92, T9R, T9p, T9G, T8U, T9k; + T8U = VSUB(T86, T85); + T8V = VSUB(T8T, T8U); + T9F = VADD(T8T, T8U); + T9k = VSUB(T75, T72); + T9m = VSUB(T9k, T9l); + T9Q = VADD(T9l, T9k); + { + V T8Y, T91, T9n, T9o; + T8Y = VFMA(LDK(KP471396736), T8W, VMUL(LDK(KP881921264), T8X)); + T91 = VFNMS(LDK(KP471396736), T90, VMUL(LDK(KP881921264), T8Z)); + T92 = VSUB(T8Y, T91); + T9R = VADD(T91, T8Y); + T9n = VFNMS(LDK(KP471396736), T8X, VMUL(LDK(KP881921264), T8W)); + T9o = VFMA(LDK(KP881921264), T90, VMUL(LDK(KP471396736), T8Z)); + T9p = VSUB(T9n, T9o); + T9G = VADD(T9o, T9n); + } + T93 = VADD(T8V, T92); + Ta0 = VSUB(T9R, T9Q); + Ta2 = VSUB(T9F, T9G); + T9q = VADD(T9m, T9p); + T9y = VSUB(T9p, T9m); + T9H = VADD(T9F, T9G); + T9S = VADD(T9Q, T9R); + T9A = VSUB(T8V, T92); + } + { + V T9j, T9u, Ta1, Ta4; + T9j = VADD(T93, T9i); + T9u = VBYI(VADD(T9q, T9t)); + ST(&(xo[WS(os, 117)]), VSUB(T9j, T9u), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VADD(T9j, T9u), ovs, &(xo[WS(os, 1)])); + Ta1 = VBYI(VSUB(T9Z, Ta0)); + Ta4 = VSUB(Ta2, Ta3); + ST(&(xo[WS(os, 37)]), VADD(Ta1, Ta4), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 91)]), VSUB(Ta4, Ta1), ovs, &(xo[WS(os, 1)])); + } + { + V Ta5, Ta6, T9v, T9w; + Ta5 = VBYI(VADD(Ta0, T9Z)); + Ta6 = VADD(Ta2, Ta3); + ST(&(xo[WS(os, 27)]), VADD(Ta5, Ta6), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 101)]), VSUB(Ta6, Ta5), ovs, &(xo[WS(os, 1)])); + T9v = VSUB(T93, T9i); + T9w = VBYI(VSUB(T9t, T9q)); + ST(&(xo[WS(os, 75)]), VSUB(T9v, T9w), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 53)]), VADD(T9v, T9w), ovs, &(xo[WS(os, 1)])); + } + { + V T9z, T9C, T9P, T9W; + T9z = VBYI(VSUB(T9x, T9y)); + T9C = VSUB(T9A, T9B); + ST(&(xo[WS(os, 43)]), VADD(T9z, T9C), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 85)]), VSUB(T9C, T9z), ovs, &(xo[WS(os, 1)])); + T9P = VADD(T9H, T9O); + T9W = VBYI(VADD(T9S, T9V)); + ST(&(xo[WS(os, 123)]), VSUB(T9P, T9W), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VADD(T9P, T9W), ovs, &(xo[WS(os, 1)])); + } + { + V T9X, T9Y, T9D, T9E; + T9X = VSUB(T9H, T9O); + T9Y = VBYI(VSUB(T9V, T9S)); + ST(&(xo[WS(os, 69)]), VSUB(T9X, T9Y), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 59)]), VADD(T9X, T9Y), ovs, &(xo[WS(os, 1)])); + T9D = VBYI(VADD(T9y, T9x)); + T9E = VADD(T9A, T9B); + ST(&(xo[WS(os, 21)]), VADD(T9D, T9E), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 107)]), VSUB(T9E, T9D), ovs, &(xo[WS(os, 1)])); + } + } + { + V T68, T6r, T6j, T6n, T6E, T6T, T6L, T6P, T5T, T6Q, T6S, T6g, T6o, T6x, T6I; + V T6q; + { + V T60, T6h, T67, T6i; + { + V T5W, T5Z, T63, T66; + T5W = VSUB(T5U, T5V); + T5Z = VSUB(T5X, T5Y); + T60 = VFMA(LDK(KP427555093), T5W, VMUL(LDK(KP903989293), T5Z)); + T6h = VFNMS(LDK(KP427555093), T5Z, VMUL(LDK(KP903989293), T5W)); + T63 = VSUB(T61, T62); + T66 = VSUB(T64, T65); + T67 = VFNMS(LDK(KP427555093), T66, VMUL(LDK(KP903989293), T63)); + T6i = VFMA(LDK(KP903989293), T66, VMUL(LDK(KP427555093), T63)); + } + T68 = VADD(T60, T67); + T6r = VSUB(T6i, T6h); + T6j = VADD(T6h, T6i); + T6n = VSUB(T67, T60); + } + { + V T6A, T6J, T6D, T6K; + { + V T6y, T6z, T6B, T6C; + T6y = VADD(T5V, T5U); + T6z = VADD(T5X, T5Y); + T6A = VFMA(LDK(KP336889853), T6y, VMUL(LDK(KP941544065), T6z)); + T6J = VFNMS(LDK(KP336889853), T6z, VMUL(LDK(KP941544065), T6y)); + T6B = VADD(T61, T62); + T6C = VADD(T65, T64); + T6D = VFNMS(LDK(KP336889853), T6C, VMUL(LDK(KP941544065), T6B)); + T6K = VFMA(LDK(KP941544065), T6C, VMUL(LDK(KP336889853), T6B)); + } + T6E = VADD(T6A, T6D); + T6T = VSUB(T6K, T6J); + T6L = VADD(T6J, T6K); + T6P = VSUB(T6D, T6A); + } + { + V T5L, T6v, T6c, T6G, T5S, T6H, T6f, T6w, T5K, T6a; + T5K = VSUB(T4W, T4V); + T5L = VSUB(T5J, T5K); + T6v = VADD(T5J, T5K); + T6a = VSUB(TZ, TI); + T6c = VSUB(T6a, T6b); + T6G = VADD(T6b, T6a); + { + V T5O, T5R, T6d, T6e; + T5O = VFMA(LDK(KP773010453), T5M, VMUL(LDK(KP634393284), T5N)); + T5R = VFNMS(LDK(KP634393284), T5Q, VMUL(LDK(KP773010453), T5P)); + T5S = VSUB(T5O, T5R); + T6H = VADD(T5R, T5O); + T6d = VFNMS(LDK(KP634393284), T5M, VMUL(LDK(KP773010453), T5N)); + T6e = VFMA(LDK(KP634393284), T5P, VMUL(LDK(KP773010453), T5Q)); + T6f = VSUB(T6d, T6e); + T6w = VADD(T6e, T6d); + } + T5T = VADD(T5L, T5S); + T6Q = VSUB(T6H, T6G); + T6S = VSUB(T6v, T6w); + T6g = VADD(T6c, T6f); + T6o = VSUB(T6f, T6c); + T6x = VADD(T6v, T6w); + T6I = VADD(T6G, T6H); + T6q = VSUB(T5L, T5S); + } + { + V T69, T6k, T6R, T6U; + T69 = VADD(T5T, T68); + T6k = VBYI(VADD(T6g, T6j)); + ST(&(xo[WS(os, 119)]), VSUB(T69, T6k), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VADD(T69, T6k), ovs, &(xo[WS(os, 1)])); + T6R = VBYI(VSUB(T6P, T6Q)); + T6U = VSUB(T6S, T6T); + ST(&(xo[WS(os, 39)]), VADD(T6R, T6U), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 89)]), VSUB(T6U, T6R), ovs, &(xo[WS(os, 1)])); + } + { + V T6V, T6W, T6l, T6m; + T6V = VBYI(VADD(T6Q, T6P)); + T6W = VADD(T6S, T6T); + ST(&(xo[WS(os, 25)]), VADD(T6V, T6W), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 103)]), VSUB(T6W, T6V), ovs, &(xo[WS(os, 1)])); + T6l = VSUB(T5T, T68); + T6m = VBYI(VSUB(T6j, T6g)); + ST(&(xo[WS(os, 73)]), VSUB(T6l, T6m), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 55)]), VADD(T6l, T6m), ovs, &(xo[WS(os, 1)])); + } + { + V T6p, T6s, T6F, T6M; + T6p = VBYI(VSUB(T6n, T6o)); + T6s = VSUB(T6q, T6r); + ST(&(xo[WS(os, 41)]), VADD(T6p, T6s), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 87)]), VSUB(T6s, T6p), ovs, &(xo[WS(os, 1)])); + T6F = VADD(T6x, T6E); + T6M = VBYI(VADD(T6I, T6L)); + ST(&(xo[WS(os, 121)]), VSUB(T6F, T6M), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VADD(T6F, T6M), ovs, &(xo[WS(os, 1)])); + } + { + V T6N, T6O, T6t, T6u; + T6N = VSUB(T6x, T6E); + T6O = VBYI(VSUB(T6L, T6I)); + ST(&(xo[WS(os, 71)]), VSUB(T6N, T6O), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 57)]), VADD(T6N, T6O), ovs, &(xo[WS(os, 1)])); + T6t = VBYI(VADD(T6o, T6n)); + T6u = VADD(T6q, T6r); + ST(&(xo[WS(os, 23)]), VADD(T6t, T6u), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 105)]), VSUB(T6u, T6t), ovs, &(xo[WS(os, 1)])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 128, XSIMD_STRING("n1fv_128"), {938, 186, 144, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_128) (planner *p) { + X(kdft_register) (p, n1fv_128, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_13.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_13.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,406 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:49 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 13 -name n1fv_13 -include n1f.h */ + +/* + * This function contains 88 FP additions, 63 FP multiplications, + * (or, 31 additions, 6 multiplications, 57 fused multiply/add), + * 96 stack variables, 23 constants, and 26 memory accesses + */ +#include "n1f.h" + +static void n1fv_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP904176221, +0.904176221990848204433795481776887926501523162); + DVK(KP575140729, +0.575140729474003121368385547455453388461001608); + DVK(KP300462606, +0.300462606288665774426601772289207995520941381); + DVK(KP516520780, +0.516520780623489722840901288569017135705033622); + DVK(KP522026385, +0.522026385161275033714027226654165028300441940); + DVK(KP957805992, +0.957805992594665126462521754605754580515587217); + DVK(KP600477271, +0.600477271932665282925769253334763009352012849); + DVK(KP251768516, +0.251768516431883313623436926934233488546674281); + DVK(KP503537032, +0.503537032863766627246873853868466977093348562); + DVK(KP769338817, +0.769338817572980603471413688209101117038278899); + DVK(KP859542535, +0.859542535098774820163672132761689612766401925); + DVK(KP581704778, +0.581704778510515730456870384989698884939833902); + DVK(KP853480001, +0.853480001859823990758994934970528322872359049); + DVK(KP083333333, +0.083333333333333333333333333333333333333333333); + DVK(KP226109445, +0.226109445035782405468510155372505010481906348); + DVK(KP301479260, +0.301479260047709873958013540496673347309208464); + DVK(KP686558370, +0.686558370781754340655719594850823015421401653); + DVK(KP514918778, +0.514918778086315755491789696138117261566051239); + DVK(KP038632954, +0.038632954644348171955506895830342264440241080); + DVK(KP612264650, +0.612264650376756543746494474777125408779395514); + DVK(KP302775637, +0.302775637731994646559610633735247973125648287); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(26, is), MAKE_VOLATILE_STRIDE(26, os)) { + V T1, T7, T2, Tg, Tf, TN, Th, Tq, Ta, Tj, T5, Tr, Tk; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + { + V Td, Te, T8, T9, T3, T4; + Td = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Te = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T9 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T4 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tg = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Tf = VADD(Td, Te); + TN = VSUB(Td, Te); + Th = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tq = VSUB(T8, T9); + Ta = VADD(T8, T9); + Tj = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T5 = VADD(T3, T4); + Tr = VSUB(T4, T3); + Tk = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + } + { + V Tt, Ti, Ty, Tb, Ts, TQ, Tx, T6, Tu, Tl; + Tt = VSUB(Tg, Th); + Ti = VADD(Tg, Th); + Ty = VFMS(LDK(KP500000000), Ta, T7); + Tb = VADD(T7, Ta); + Ts = VSUB(Tq, Tr); + TQ = VADD(Tr, Tq); + Tx = VFNMS(LDK(KP500000000), T5, T2); + T6 = VADD(T2, T5); + Tu = VSUB(Tj, Tk); + Tl = VADD(Tj, Tk); + { + V TK, Tz, Tc, TX, Tv, TO, TL, Tm; + TK = VADD(Tx, Ty); + Tz = VSUB(Tx, Ty); + Tc = VADD(T6, Tb); + TX = VSUB(T6, Tb); + Tv = VSUB(Tt, Tu); + TO = VADD(Tt, Tu); + TL = VSUB(Ti, Tl); + Tm = VADD(Ti, Tl); + { + V TF, Tw, TP, TY, TT, TM, TA, Tn; + TF = VSUB(Ts, Tv); + Tw = VADD(Ts, Tv); + TP = VFNMS(LDK(KP500000000), TO, TN); + TY = VADD(TN, TO); + TT = VFNMS(LDK(KP866025403), TL, TK); + TM = VFMA(LDK(KP866025403), TL, TK); + TA = VFNMS(LDK(KP500000000), Tm, Tf); + Tn = VADD(Tf, Tm); + { + V T1f, T1n, TI, T18, T1k, T1c, TD, T17, T10, T1m, T16, T1e, TU, TR; + TU = VFNMS(LDK(KP866025403), TQ, TP); + TR = VFMA(LDK(KP866025403), TQ, TP); + { + V TZ, T15, TE, TB; + TZ = VFMA(LDK(KP302775637), TY, TX); + T15 = VFNMS(LDK(KP302775637), TX, TY); + TE = VSUB(Tz, TA); + TB = VADD(Tz, TA); + { + V TH, To, TV, T13; + TH = VSUB(Tc, Tn); + To = VADD(Tc, Tn); + TV = VFNMS(LDK(KP612264650), TU, TT); + T13 = VFMA(LDK(KP612264650), TT, TU); + { + V TS, T12, TG, T1b; + TS = VFNMS(LDK(KP038632954), TR, TM); + T12 = VFMA(LDK(KP038632954), TM, TR); + TG = VFNMS(LDK(KP514918778), TF, TE); + T1b = VFMA(LDK(KP686558370), TE, TF); + { + V TC, T1a, Tp, TW, T14; + TC = VFMA(LDK(KP301479260), TB, Tw); + T1a = VFNMS(LDK(KP226109445), Tw, TB); + Tp = VFNMS(LDK(KP083333333), To, T1); + ST(&(xo[0]), VADD(T1, To), ovs, &(xo[0])); + T1f = VFMA(LDK(KP853480001), TV, TS); + TW = VFNMS(LDK(KP853480001), TV, TS); + T1n = VFMA(LDK(KP853480001), T13, T12); + T14 = VFNMS(LDK(KP853480001), T13, T12); + TI = VFMA(LDK(KP581704778), TH, TG); + T18 = VFNMS(LDK(KP859542535), TG, TH); + T1k = VFMA(LDK(KP769338817), T1b, T1a); + T1c = VFNMS(LDK(KP769338817), T1b, T1a); + TD = VFMA(LDK(KP503537032), TC, Tp); + T17 = VFNMS(LDK(KP251768516), TC, Tp); + T10 = VMUL(LDK(KP600477271), VFMA(LDK(KP957805992), TZ, TW)); + T1m = VFNMS(LDK(KP522026385), TW, TZ); + T16 = VMUL(LDK(KP600477271), VFMA(LDK(KP957805992), T15, T14)); + T1e = VFNMS(LDK(KP522026385), T14, T15); + } + } + } + } + { + V T1o, T1q, T1g, T1i, T1d, T1h, T1l, T1p; + { + V T11, TJ, T19, T1j; + T11 = VFMA(LDK(KP516520780), TI, TD); + TJ = VFNMS(LDK(KP516520780), TI, TD); + T19 = VFMA(LDK(KP300462606), T18, T17); + T1j = VFNMS(LDK(KP300462606), T18, T17); + T1o = VMUL(LDK(KP575140729), VFNMS(LDK(KP904176221), T1n, T1m)); + T1q = VMUL(LDK(KP575140729), VFMA(LDK(KP904176221), T1n, T1m)); + T1g = VMUL(LDK(KP575140729), VFMA(LDK(KP904176221), T1f, T1e)); + T1i = VMUL(LDK(KP575140729), VFNMS(LDK(KP904176221), T1f, T1e)); + ST(&(xo[WS(os, 12)]), VFNMSI(T16, T11), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VFMAI(T16, T11), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 8)]), VFMAI(T10, TJ), ovs, &(xo[0])); + ST(&(xo[WS(os, 5)]), VFNMSI(T10, TJ), ovs, &(xo[WS(os, 1)])); + T1d = VFNMS(LDK(KP503537032), T1c, T19); + T1h = VFMA(LDK(KP503537032), T1c, T19); + T1l = VFNMS(LDK(KP503537032), T1k, T1j); + T1p = VFMA(LDK(KP503537032), T1k, T1j); + } + ST(&(xo[WS(os, 9)]), VFMAI(T1g, T1d), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VFNMSI(T1g, T1d), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VFNMSI(T1i, T1h), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VFMAI(T1i, T1h), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFMAI(T1o, T1l), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 6)]), VFNMSI(T1o, T1l), ovs, &(xo[0])); + ST(&(xo[WS(os, 11)]), VFMAI(T1q, T1p), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VFNMSI(T1q, T1p), ovs, &(xo[0])); + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 13, XSIMD_STRING("n1fv_13"), {31, 6, 57, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_13) (planner *p) { + X(kdft_register) (p, n1fv_13, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 13 -name n1fv_13 -include n1f.h */ + +/* + * This function contains 88 FP additions, 34 FP multiplications, + * (or, 69 additions, 15 multiplications, 19 fused multiply/add), + * 60 stack variables, 20 constants, and 26 memory accesses + */ +#include "n1f.h" + +static void n1fv_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DVK(KP083333333, +0.083333333333333333333333333333333333333333333); + DVK(KP075902986, +0.075902986037193865983102897245103540356428373); + DVK(KP251768516, +0.251768516431883313623436926934233488546674281); + DVK(KP132983124, +0.132983124607418643793760531921092974399165133); + DVK(KP258260390, +0.258260390311744861420450644284508567852516811); + DVK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + DVK(KP300238635, +0.300238635966332641462884626667381504676006424); + DVK(KP011599105, +0.011599105605768290721655456654083252189827041); + DVK(KP156891391, +0.156891391051584611046832726756003269660212636); + DVK(KP256247671, +0.256247671582936600958684654061725059144125175); + DVK(KP174138601, +0.174138601152135905005660794929264742616964676); + DVK(KP575140729, +0.575140729474003121368385547455453388461001608); + DVK(KP503537032, +0.503537032863766627246873853868466977093348562); + DVK(KP113854479, +0.113854479055790798974654345867655310534642560); + DVK(KP265966249, +0.265966249214837287587521063842185948798330267); + DVK(KP387390585, +0.387390585467617292130675966426762851778775217); + DVK(KP300462606, +0.300462606288665774426601772289207995520941381); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(26, is), MAKE_VOLATILE_STRIDE(26, os)) { + V TW, Tb, Tm, Tu, TC, TR, TX, TK, TU, Tz, TB, TN, TT; + TW = LD(&(xi[0]), ivs, &(xi[0])); + { + V T3, TH, Tl, Tw, Tp, Tg, Tv, To, T6, Tr, T9, Ts, Ta, TI, T1; + V T2, Tq, Tt; + T1 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + TH = VADD(T1, T2); + { + V Th, Ti, Tj, Tk; + Th = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Ti = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tj = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tk = VADD(Ti, Tj); + Tl = VADD(Th, Tk); + Tw = VSUB(Ti, Tj); + Tp = VFNMS(LDK(KP500000000), Tk, Th); + } + { + V Tc, Td, Te, Tf; + Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Td = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Te = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tf = VADD(Td, Te); + Tg = VADD(Tc, Tf); + Tv = VSUB(Td, Te); + To = VFNMS(LDK(KP500000000), Tf, Tc); + } + { + V T4, T5, T7, T8; + T4 = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T6 = VSUB(T4, T5); + Tr = VADD(T4, T5); + T7 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T8 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T9 = VSUB(T7, T8); + Ts = VADD(T7, T8); + } + Ta = VADD(T6, T9); + TI = VADD(Tr, Ts); + Tb = VADD(T3, Ta); + Tm = VSUB(Tg, Tl); + Tq = VSUB(To, Tp); + Tt = VMUL(LDK(KP866025403), VSUB(Tr, Ts)); + Tu = VADD(Tq, Tt); + TC = VSUB(Tq, Tt); + { + V TP, TQ, TG, TJ; + TP = VADD(Tg, Tl); + TQ = VADD(TH, TI); + TR = VMUL(LDK(KP300462606), VSUB(TP, TQ)); + TX = VADD(TP, TQ); + TG = VADD(To, Tp); + TJ = VFNMS(LDK(KP500000000), TI, TH); + TK = VSUB(TG, TJ); + TU = VADD(TG, TJ); + } + { + V Tx, Ty, TL, TM; + Tx = VMUL(LDK(KP866025403), VSUB(Tv, Tw)); + Ty = VFNMS(LDK(KP500000000), Ta, T3); + Tz = VSUB(Tx, Ty); + TB = VADD(Tx, Ty); + TL = VADD(Tv, Tw); + TM = VSUB(T6, T9); + TN = VSUB(TL, TM); + TT = VADD(TL, TM); + } + } + ST(&(xo[0]), VADD(TW, TX), ovs, &(xo[0])); + { + V T19, T1n, T14, T13, T1f, T1k, Tn, TE, T1e, T1j, TS, T1m, TZ, T1c, TA; + V TD; + { + V T17, T18, T11, T12; + T17 = VFMA(LDK(KP387390585), TN, VMUL(LDK(KP265966249), TK)); + T18 = VFNMS(LDK(KP503537032), TU, VMUL(LDK(KP113854479), TT)); + T19 = VSUB(T17, T18); + T1n = VADD(T17, T18); + T14 = VFMA(LDK(KP575140729), Tm, VMUL(LDK(KP174138601), Tb)); + T11 = VFNMS(LDK(KP156891391), TB, VMUL(LDK(KP256247671), TC)); + T12 = VFMA(LDK(KP011599105), Tz, VMUL(LDK(KP300238635), Tu)); + T13 = VSUB(T11, T12); + T1f = VADD(T14, T13); + T1k = VMUL(LDK(KP1_732050807), VADD(T11, T12)); + } + Tn = VFNMS(LDK(KP174138601), Tm, VMUL(LDK(KP575140729), Tb)); + TA = VFNMS(LDK(KP300238635), Tz, VMUL(LDK(KP011599105), Tu)); + TD = VFMA(LDK(KP256247671), TB, VMUL(LDK(KP156891391), TC)); + TE = VSUB(TA, TD); + T1e = VMUL(LDK(KP1_732050807), VADD(TD, TA)); + T1j = VSUB(Tn, TE); + { + V TO, T1b, TV, TY, T1a; + TO = VFNMS(LDK(KP132983124), TN, VMUL(LDK(KP258260390), TK)); + T1b = VSUB(TR, TO); + TV = VFMA(LDK(KP251768516), TT, VMUL(LDK(KP075902986), TU)); + TY = VFNMS(LDK(KP083333333), TX, TW); + T1a = VSUB(TY, TV); + TS = VFMA(LDK(KP2_000000000), TO, TR); + T1m = VADD(T1b, T1a); + TZ = VFMA(LDK(KP2_000000000), TV, TY); + T1c = VSUB(T1a, T1b); + } + { + V TF, T10, T1l, T1o; + TF = VBYI(VFMA(LDK(KP2_000000000), TE, Tn)); + T10 = VADD(TS, TZ); + ST(&(xo[WS(os, 1)]), VADD(TF, T10), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 12)]), VSUB(T10, TF), ovs, &(xo[0])); + { + V T15, T16, T1p, T1q; + T15 = VBYI(VFMS(LDK(KP2_000000000), T13, T14)); + T16 = VSUB(TZ, TS); + ST(&(xo[WS(os, 5)]), VADD(T15, T16), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 8)]), VSUB(T16, T15), ovs, &(xo[0])); + T1p = VADD(T1n, T1m); + T1q = VBYI(VADD(T1j, T1k)); + ST(&(xo[WS(os, 4)]), VSUB(T1p, T1q), ovs, &(xo[0])); + ST(&(xo[WS(os, 9)]), VADD(T1q, T1p), ovs, &(xo[WS(os, 1)])); + } + T1l = VBYI(VSUB(T1j, T1k)); + T1o = VSUB(T1m, T1n); + ST(&(xo[WS(os, 3)]), VADD(T1l, T1o), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 10)]), VSUB(T1o, T1l), ovs, &(xo[0])); + { + V T1h, T1i, T1d, T1g; + T1h = VBYI(VSUB(T1e, T1f)); + T1i = VSUB(T1c, T19); + ST(&(xo[WS(os, 6)]), VADD(T1h, T1i), ovs, &(xo[0])); + ST(&(xo[WS(os, 7)]), VSUB(T1i, T1h), ovs, &(xo[WS(os, 1)])); + T1d = VADD(T19, T1c); + T1g = VBYI(VADD(T1e, T1f)); + ST(&(xo[WS(os, 2)]), VSUB(T1d, T1g), ovs, &(xo[0])); + ST(&(xo[WS(os, 11)]), VADD(T1g, T1d), ovs, &(xo[WS(os, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 13, XSIMD_STRING("n1fv_13"), {69, 15, 19, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_13) (planner *p) { + X(kdft_register) (p, n1fv_13, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,308 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:49 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 14 -name n1fv_14 -include n1f.h */ + +/* + * This function contains 74 FP additions, 48 FP multiplications, + * (or, 32 additions, 6 multiplications, 42 fused multiply/add), + * 63 stack variables, 6 constants, and 28 memory accesses + */ +#include "n1f.h" + +static void n1fv_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP801937735, +0.801937735804838252472204639014890102331838324); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + DVK(KP692021471, +0.692021471630095869627814897002069140197260599); + DVK(KP554958132, +0.554958132087371191422194871006410481067288862); + DVK(KP356895867, +0.356895867892209443894399510021300583399127187); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) { + V TH, T3, TP, Tn, Ta, Ts, TW, TK, TO, Tk, TM, Tg, TL, Td, T1; + V T2; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + { + V Ti, TI, T6, TJ, T9, Tj, Te, Tf, Tb, Tc; + { + V T4, T5, T7, T8, Tl, Tm; + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tl = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tm = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Ti = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + TH = VADD(T1, T2); + T3 = VSUB(T1, T2); + TI = VADD(T4, T5); + T6 = VSUB(T4, T5); + TJ = VADD(T7, T8); + T9 = VSUB(T7, T8); + TP = VADD(Tl, Tm); + Tn = VSUB(Tl, Tm); + Tj = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Te = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + } + Ta = VADD(T6, T9); + Ts = VSUB(T9, T6); + TW = VSUB(TJ, TI); + TK = VADD(TI, TJ); + TO = VADD(Ti, Tj); + Tk = VSUB(Ti, Tj); + TM = VADD(Te, Tf); + Tg = VSUB(Te, Tf); + TL = VADD(Tb, Tc); + Td = VSUB(Tb, Tc); + } + { + V T18, TB, T13, TY, TG, Tw, T11, Tr, T16, TT, Tz, TE, TU, TQ; + TU = VSUB(TO, TP); + TQ = VADD(TO, TP); + { + V Tt, To, TV, TN; + Tt = VSUB(Tn, Tk); + To = VADD(Tk, Tn); + TV = VSUB(TL, TM); + TN = VADD(TL, TM); + { + V Tu, Th, TZ, T17; + Tu = VSUB(Tg, Td); + Th = VADD(Td, Tg); + TZ = VFNMS(LDK(KP356895867), TK, TQ); + T17 = VFNMS(LDK(KP554958132), TU, TW); + { + V Tp, TA, T14, TR; + Tp = VFNMS(LDK(KP356895867), Ta, To); + TA = VFMA(LDK(KP554958132), Tt, Ts); + ST(&(xo[0]), VADD(TH, VADD(TK, VADD(TN, TQ))), ovs, &(xo[0])); + T14 = VFNMS(LDK(KP356895867), TN, TK); + TR = VFNMS(LDK(KP356895867), TQ, TN); + { + V T12, TX, Tx, TC; + T12 = VFMA(LDK(KP554958132), TV, TU); + TX = VFMA(LDK(KP554958132), TW, TV); + ST(&(xo[WS(os, 7)]), VADD(T3, VADD(Ta, VADD(Th, To))), ovs, &(xo[WS(os, 1)])); + Tx = VFNMS(LDK(KP356895867), Th, Ta); + TC = VFNMS(LDK(KP356895867), To, Th); + { + V TF, Tv, T10, Tq; + TF = VFNMS(LDK(KP554958132), Ts, Tu); + Tv = VFMA(LDK(KP554958132), Tu, Tt); + T10 = VFNMS(LDK(KP692021471), TZ, TN); + T18 = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), T17, TV)); + Tq = VFNMS(LDK(KP692021471), Tp, Th); + TB = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), TA, Tu)); + { + V T15, TS, Ty, TD; + T15 = VFNMS(LDK(KP692021471), T14, TQ); + TS = VFNMS(LDK(KP692021471), TR, TK); + T13 = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), T12, TW)); + TY = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), TX, TU)); + Ty = VFNMS(LDK(KP692021471), Tx, To); + TD = VFNMS(LDK(KP692021471), TC, Ta); + TG = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), TF, Tt)); + Tw = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tv, Ts)); + T11 = VFNMS(LDK(KP900968867), T10, TH); + Tr = VFNMS(LDK(KP900968867), Tq, T3); + T16 = VFNMS(LDK(KP900968867), T15, TH); + TT = VFNMS(LDK(KP900968867), TS, TH); + Tz = VFNMS(LDK(KP900968867), Ty, T3); + TE = VFNMS(LDK(KP900968867), TD, T3); + } + } + } + } + } + } + ST(&(xo[WS(os, 12)]), VFNMSI(T13, T11), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFMAI(T13, T11), ovs, &(xo[0])); + ST(&(xo[WS(os, 9)]), VFMAI(Tw, Tr), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VFNMSI(Tw, Tr), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 8)]), VFNMSI(T18, T16), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VFMAI(T18, T16), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VFNMSI(TY, TT), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VFMAI(TY, TT), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VFMAI(TB, Tz), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 13)]), VFNMSI(TB, Tz), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VFMAI(TG, TE), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VFNMSI(TG, TE), ovs, &(xo[WS(os, 1)])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 14, XSIMD_STRING("n1fv_14"), {32, 6, 42, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_14) (planner *p) { + X(kdft_register) (p, n1fv_14, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 14 -name n1fv_14 -include n1f.h */ + +/* + * This function contains 74 FP additions, 36 FP multiplications, + * (or, 50 additions, 12 multiplications, 24 fused multiply/add), + * 33 stack variables, 6 constants, and 28 memory accesses + */ +#include "n1f.h" + +static void n1fv_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP222520933, +0.222520933956314404288902564496794759466355569); + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP623489801, +0.623489801858733530525004884004239810632274731); + DVK(KP433883739, +0.433883739117558120475768332848358754609990728); + DVK(KP781831482, +0.781831482468029808708444526674057750232334519); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) { + V T3, Ty, To, TK, Tr, TE, Ta, TJ, Tq, TB, Th, TL, Ts, TH, T1; + V T2; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + Ty = VADD(T1, T2); + { + V Tk, TC, Tn, TD; + { + V Ti, Tj, Tl, Tm; + Ti = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tj = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Tk = VSUB(Ti, Tj); + TC = VADD(Ti, Tj); + Tl = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tm = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tn = VSUB(Tl, Tm); + TD = VADD(Tl, Tm); + } + To = VADD(Tk, Tn); + TK = VSUB(TC, TD); + Tr = VSUB(Tn, Tk); + TE = VADD(TC, TD); + } + { + V T6, Tz, T9, TA; + { + V T4, T5, T7, T8; + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + Tz = VADD(T4, T5); + T7 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T9 = VSUB(T7, T8); + TA = VADD(T7, T8); + } + Ta = VADD(T6, T9); + TJ = VSUB(TA, Tz); + Tq = VSUB(T9, T6); + TB = VADD(Tz, TA); + } + { + V Td, TF, Tg, TG; + { + V Tb, Tc, Te, Tf; + Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Td = VSUB(Tb, Tc); + TF = VADD(Tb, Tc); + Te = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tg = VSUB(Te, Tf); + TG = VADD(Te, Tf); + } + Th = VADD(Td, Tg); + TL = VSUB(TF, TG); + Ts = VSUB(Tg, Td); + TH = VADD(TF, TG); + } + ST(&(xo[WS(os, 7)]), VADD(T3, VADD(Ta, VADD(Th, To))), ovs, &(xo[WS(os, 1)])); + ST(&(xo[0]), VADD(Ty, VADD(TB, VADD(TH, TE))), ovs, &(xo[0])); + { + V Tt, Tp, TP, TQ; + Tt = VBYI(VFNMS(LDK(KP781831482), Tr, VFNMS(LDK(KP433883739), Ts, VMUL(LDK(KP974927912), Tq)))); + Tp = VFMA(LDK(KP623489801), To, VFNMS(LDK(KP900968867), Th, VFNMS(LDK(KP222520933), Ta, T3))); + ST(&(xo[WS(os, 5)]), VSUB(Tp, Tt), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VADD(Tp, Tt), ovs, &(xo[WS(os, 1)])); + TP = VBYI(VFMA(LDK(KP974927912), TJ, VFMA(LDK(KP433883739), TL, VMUL(LDK(KP781831482), TK)))); + TQ = VFMA(LDK(KP623489801), TE, VFNMS(LDK(KP900968867), TH, VFNMS(LDK(KP222520933), TB, Ty))); + ST(&(xo[WS(os, 2)]), VADD(TP, TQ), ovs, &(xo[0])); + ST(&(xo[WS(os, 12)]), VSUB(TQ, TP), ovs, &(xo[0])); + } + { + V Tv, Tu, TM, TI; + Tv = VBYI(VFMA(LDK(KP781831482), Tq, VFMA(LDK(KP974927912), Ts, VMUL(LDK(KP433883739), Tr)))); + Tu = VFMA(LDK(KP623489801), Ta, VFNMS(LDK(KP900968867), To, VFNMS(LDK(KP222520933), Th, T3))); + ST(&(xo[WS(os, 13)]), VSUB(Tu, Tv), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VADD(Tu, Tv), ovs, &(xo[WS(os, 1)])); + TM = VBYI(VFNMS(LDK(KP433883739), TK, VFNMS(LDK(KP974927912), TL, VMUL(LDK(KP781831482), TJ)))); + TI = VFMA(LDK(KP623489801), TB, VFNMS(LDK(KP900968867), TE, VFNMS(LDK(KP222520933), TH, Ty))); + ST(&(xo[WS(os, 6)]), VSUB(TI, TM), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VADD(TM, TI), ovs, &(xo[0])); + } + { + V TO, TN, Tx, Tw; + TO = VBYI(VFMA(LDK(KP433883739), TJ, VFNMS(LDK(KP974927912), TK, VMUL(LDK(KP781831482), TL)))); + TN = VFMA(LDK(KP623489801), TH, VFNMS(LDK(KP222520933), TE, VFNMS(LDK(KP900968867), TB, Ty))); + ST(&(xo[WS(os, 4)]), VSUB(TN, TO), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VADD(TO, TN), ovs, &(xo[0])); + Tx = VBYI(VFMA(LDK(KP433883739), Tq, VFNMS(LDK(KP781831482), Ts, VMUL(LDK(KP974927912), Tr)))); + Tw = VFMA(LDK(KP623489801), Th, VFNMS(LDK(KP222520933), To, VFNMS(LDK(KP900968867), Ta, T3))); + ST(&(xo[WS(os, 11)]), VSUB(Tw, Tx), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VADD(Tw, Tx), ovs, &(xo[WS(os, 1)])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 14, XSIMD_STRING("n1fv_14"), {50, 12, 24, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_14) (planner *p) { + X(kdft_register) (p, n1fv_14, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:49 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 15 -name n1fv_15 -include n1f.h */ + +/* + * This function contains 78 FP additions, 49 FP multiplications, + * (or, 36 additions, 7 multiplications, 42 fused multiply/add), + * 78 stack variables, 8 constants, and 30 memory accesses + */ +#include "n1f.h" + +static void n1fv_15(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP823639103, +0.823639103546331925877420039278190003029660514); + DVK(KP910592997, +0.910592997310029334643087372129977886038870291); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(30, is), MAKE_VOLATILE_STRIDE(30, os)) { + V Tb, TX, TM, TQ, Th, TB, T5, Ti, Ta, TC, TN, Te, TG, Tq, Tj; + V T1, T2, T3; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + { + V T6, T7, T8, Tm, Tn, To; + T6 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Tm = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tn = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + To = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + { + V T4, Tc, T9, Td, Tp; + Tb = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T4 = VADD(T2, T3); + TX = VSUB(T3, T2); + Tc = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + TM = VSUB(T8, T7); + T9 = VADD(T7, T8); + Td = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Tp = VADD(Tn, To); + TQ = VSUB(To, Tn); + Th = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + TB = VFNMS(LDK(KP500000000), T4, T1); + T5 = VADD(T1, T4); + Ti = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Ta = VADD(T6, T9); + TC = VFNMS(LDK(KP500000000), T9, T6); + TN = VSUB(Td, Tc); + Te = VADD(Tc, Td); + TG = VFNMS(LDK(KP500000000), Tp, Tm); + Tq = VADD(Tm, Tp); + Tj = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + } + } + { + V TY, TO, Tf, TD, TP, Tk; + TY = VADD(TM, TN); + TO = VSUB(TM, TN); + Tf = VADD(Tb, Te); + TD = VFNMS(LDK(KP500000000), Te, Tb); + TP = VSUB(Tj, Ti); + Tk = VADD(Ti, Tj); + { + V Tx, Tg, TE, TU, TZ, TR, Tl, TF; + Tx = VSUB(Ta, Tf); + Tg = VADD(Ta, Tf); + TE = VADD(TC, TD); + TU = VSUB(TC, TD); + TZ = VADD(TP, TQ); + TR = VSUB(TP, TQ); + Tl = VADD(Th, Tk); + TF = VFNMS(LDK(KP500000000), Tk, Th); + { + V T12, T10, T18, TS, Tw, Tr, TH, TV, T11, T1g; + T12 = VSUB(TY, TZ); + T10 = VADD(TY, TZ); + T18 = VFNMS(LDK(KP618033988), TO, TR); + TS = VFMA(LDK(KP618033988), TR, TO); + Tw = VSUB(Tl, Tq); + Tr = VADD(Tl, Tq); + TH = VADD(TF, TG); + TV = VSUB(TF, TG); + T11 = VFNMS(LDK(KP250000000), T10, TX); + T1g = VMUL(LDK(KP866025403), VADD(TX, T10)); + { + V TA, Ty, Tu, TK, TI, T1a, TW, T1b, T13, Tt, Ts, TJ, T1f; + TA = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tw, Tx)); + Ty = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tx, Tw)); + Ts = VADD(Tg, Tr); + Tu = VSUB(Tg, Tr); + TK = VSUB(TE, TH); + TI = VADD(TE, TH); + T1a = VFNMS(LDK(KP618033988), TU, TV); + TW = VFMA(LDK(KP618033988), TV, TU); + T1b = VFNMS(LDK(KP559016994), T12, T11); + T13 = VFMA(LDK(KP559016994), T12, T11); + ST(&(xo[0]), VADD(T5, Ts), ovs, &(xo[0])); + Tt = VFNMS(LDK(KP250000000), Ts, T5); + TJ = VFNMS(LDK(KP250000000), TI, TB); + T1f = VADD(TB, TI); + { + V T1c, T1e, T16, T14, Tv, Tz, T17, TL; + T1c = VMUL(LDK(KP951056516), VFNMS(LDK(KP910592997), T1b, T1a)); + T1e = VMUL(LDK(KP951056516), VFMA(LDK(KP910592997), T1b, T1a)); + T16 = VMUL(LDK(KP951056516), VFMA(LDK(KP910592997), T13, TW)); + T14 = VMUL(LDK(KP951056516), VFNMS(LDK(KP910592997), T13, TW)); + Tv = VFNMS(LDK(KP559016994), Tu, Tt); + Tz = VFMA(LDK(KP559016994), Tu, Tt); + T17 = VFNMS(LDK(KP559016994), TK, TJ); + TL = VFMA(LDK(KP559016994), TK, TJ); + ST(&(xo[WS(os, 10)]), VFMAI(T1g, T1f), ovs, &(xo[0])); + ST(&(xo[WS(os, 5)]), VFNMSI(T1g, T1f), ovs, &(xo[WS(os, 1)])); + { + V T19, T1d, T15, TT; + ST(&(xo[WS(os, 12)]), VFMAI(Ty, Tv), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VFNMSI(Ty, Tv), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VFMAI(TA, Tz), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 6)]), VFNMSI(TA, Tz), ovs, &(xo[0])); + T19 = VFMA(LDK(KP823639103), T18, T17); + T1d = VFNMS(LDK(KP823639103), T18, T17); + T15 = VFNMS(LDK(KP823639103), TS, TL); + TT = VFMA(LDK(KP823639103), TS, TL); + ST(&(xo[WS(os, 2)]), VFMAI(T1c, T19), ovs, &(xo[0])); + ST(&(xo[WS(os, 13)]), VFNMSI(T1c, T19), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFMAI(T1e, T1d), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 8)]), VFNMSI(T1e, T1d), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VFMAI(T16, T15), ovs, &(xo[0])); + ST(&(xo[WS(os, 11)]), VFNMSI(T16, T15), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 14)]), VFMAI(T14, TT), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VFNMSI(T14, TT), ovs, &(xo[WS(os, 1)])); + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 15, XSIMD_STRING("n1fv_15"), {36, 7, 42, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_15) (planner *p) { + X(kdft_register) (p, n1fv_15, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 15 -name n1fv_15 -include n1f.h */ + +/* + * This function contains 78 FP additions, 25 FP multiplications, + * (or, 64 additions, 11 multiplications, 14 fused multiply/add), + * 55 stack variables, 10 constants, and 30 memory accesses + */ +#include "n1f.h" + +static void n1fv_15(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP216506350, +0.216506350946109661690930792688234045867850657); + DVK(KP509036960, +0.509036960455127183450980863393907648510733164); + DVK(KP823639103, +0.823639103546331925877420039278190003029660514); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP484122918, +0.484122918275927110647408174972799951354115213); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(30, is), MAKE_VOLATILE_STRIDE(30, os)) { + V T5, T10, TB, TO, TU, TV, TR, Ta, Tf, Tg, Tl, Tq, Tr, TE, TH; + V TI, TZ, T11, T1f, T1g; + { + V T1, T2, T3, T4; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T4 = VADD(T2, T3); + T5 = VADD(T1, T4); + T10 = VSUB(T3, T2); + TB = VFNMS(LDK(KP500000000), T4, T1); + } + { + V T6, T9, TC, TP, Tm, Tp, TG, TN, Tb, Te, TD, TQ, Th, Tk, TF; + V TM, TX, TY; + { + V T7, T8, Tn, To; + T6 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T9 = VADD(T7, T8); + TC = VFNMS(LDK(KP500000000), T9, T6); + TP = VSUB(T8, T7); + Tm = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tn = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + To = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tp = VADD(Tn, To); + TG = VFNMS(LDK(KP500000000), Tp, Tm); + TN = VSUB(To, Tn); + } + { + V Tc, Td, Ti, Tj; + Tb = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Te = VADD(Tc, Td); + TD = VFNMS(LDK(KP500000000), Te, Tb); + TQ = VSUB(Td, Tc); + Th = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Ti = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Tj = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tk = VADD(Ti, Tj); + TF = VFNMS(LDK(KP500000000), Tk, Th); + TM = VSUB(Tj, Ti); + } + TO = VSUB(TM, TN); + TU = VSUB(TF, TG); + TV = VSUB(TC, TD); + TR = VSUB(TP, TQ); + Ta = VADD(T6, T9); + Tf = VADD(Tb, Te); + Tg = VADD(Ta, Tf); + Tl = VADD(Th, Tk); + Tq = VADD(Tm, Tp); + Tr = VADD(Tl, Tq); + TE = VADD(TC, TD); + TH = VADD(TF, TG); + TI = VADD(TE, TH); + TX = VADD(TP, TQ); + TY = VADD(TM, TN); + TZ = VMUL(LDK(KP484122918), VSUB(TX, TY)); + T11 = VADD(TX, TY); + } + T1f = VADD(TB, TI); + T1g = VBYI(VMUL(LDK(KP866025403), VADD(T10, T11))); + ST(&(xo[WS(os, 5)]), VSUB(T1f, T1g), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 10)]), VADD(T1f, T1g), ovs, &(xo[0])); + { + V Tu, Ts, Tt, Ty, TA, Tw, Tx, Tz, Tv; + Tu = VMUL(LDK(KP559016994), VSUB(Tg, Tr)); + Ts = VADD(Tg, Tr); + Tt = VFNMS(LDK(KP250000000), Ts, T5); + Tw = VSUB(Tl, Tq); + Tx = VSUB(Ta, Tf); + Ty = VBYI(VFNMS(LDK(KP587785252), Tx, VMUL(LDK(KP951056516), Tw))); + TA = VBYI(VFMA(LDK(KP951056516), Tx, VMUL(LDK(KP587785252), Tw))); + ST(&(xo[0]), VADD(T5, Ts), ovs, &(xo[0])); + Tz = VADD(Tu, Tt); + ST(&(xo[WS(os, 6)]), VSUB(Tz, TA), ovs, &(xo[0])); + ST(&(xo[WS(os, 9)]), VADD(TA, Tz), ovs, &(xo[WS(os, 1)])); + Tv = VSUB(Tt, Tu); + ST(&(xo[WS(os, 3)]), VSUB(Tv, Ty), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 12)]), VADD(Ty, Tv), ovs, &(xo[0])); + } + { + V TS, TW, T1b, T18, T13, T1a, TL, T17, T12, TJ, TK; + TS = VFNMS(LDK(KP509036960), TR, VMUL(LDK(KP823639103), TO)); + TW = VFNMS(LDK(KP587785252), TV, VMUL(LDK(KP951056516), TU)); + T1b = VFMA(LDK(KP951056516), TV, VMUL(LDK(KP587785252), TU)); + T18 = VFMA(LDK(KP823639103), TR, VMUL(LDK(KP509036960), TO)); + T12 = VFNMS(LDK(KP216506350), T11, VMUL(LDK(KP866025403), T10)); + T13 = VSUB(TZ, T12); + T1a = VADD(TZ, T12); + TJ = VFNMS(LDK(KP250000000), TI, TB); + TK = VMUL(LDK(KP559016994), VSUB(TE, TH)); + TL = VSUB(TJ, TK); + T17 = VADD(TK, TJ); + { + V TT, T14, T1d, T1e; + TT = VSUB(TL, TS); + T14 = VBYI(VSUB(TW, T13)); + ST(&(xo[WS(os, 8)]), VSUB(TT, T14), ovs, &(xo[0])); + ST(&(xo[WS(os, 7)]), VADD(TT, T14), ovs, &(xo[WS(os, 1)])); + T1d = VSUB(T17, T18); + T1e = VBYI(VADD(T1b, T1a)); + ST(&(xo[WS(os, 11)]), VSUB(T1d, T1e), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VADD(T1d, T1e), ovs, &(xo[0])); + } + { + V T15, T16, T19, T1c; + T15 = VADD(TL, TS); + T16 = VBYI(VADD(TW, T13)); + ST(&(xo[WS(os, 13)]), VSUB(T15, T16), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VADD(T15, T16), ovs, &(xo[0])); + T19 = VADD(T17, T18); + T1c = VBYI(VSUB(T1a, T1b)); + ST(&(xo[WS(os, 14)]), VSUB(T19, T1c), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VADD(T19, T1c), ovs, &(xo[WS(os, 1)])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 15, XSIMD_STRING("n1fv_15"), {64, 11, 14, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_15) (planner *p) { + X(kdft_register) (p, n1fv_15, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,340 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:49 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name n1fv_16 -include n1f.h */ + +/* + * This function contains 72 FP additions, 34 FP multiplications, + * (or, 38 additions, 0 multiplications, 34 fused multiply/add), + * 54 stack variables, 3 constants, and 32 memory accesses + */ +#include "n1f.h" + +static void n1fv_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(32, is), MAKE_VOLATILE_STRIDE(32, os)) { + V T7, Tu, TF, TB, T13, TL, TO, TX, TC, Te, TP, Th, TQ, Tk, TW; + V T16; + { + V TH, TU, Tz, Tf, TK, TV, TA, TM, Ta, TN, Td, Tg, Ti, Tj; + { + V T1, T2, T4, T5, To, Tp, Tr, Ts; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + To = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + Tp = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tr = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Ts = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + { + V T8, TJ, Tq, TI, Tt, T9, Tb, Tc, T3, T6; + T8 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + TH = VSUB(T1, T2); + T3 = VADD(T1, T2); + TU = VSUB(T4, T5); + T6 = VADD(T4, T5); + TJ = VSUB(To, Tp); + Tq = VADD(To, Tp); + TI = VSUB(Tr, Ts); + Tt = VADD(Tr, Ts); + T9 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tb = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tc = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T7 = VSUB(T3, T6); + Tz = VADD(T3, T6); + Tf = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + TK = VADD(TI, TJ); + TV = VSUB(TJ, TI); + TA = VADD(Tt, Tq); + Tu = VSUB(Tq, Tt); + TM = VSUB(T8, T9); + Ta = VADD(T8, T9); + TN = VSUB(Tb, Tc); + Td = VADD(Tb, Tc); + Tg = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Ti = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tj = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + } + } + TF = VSUB(Tz, TA); + TB = VADD(Tz, TA); + T13 = VFNMS(LDK(KP707106781), TK, TH); + TL = VFMA(LDK(KP707106781), TK, TH); + TO = VFNMS(LDK(KP414213562), TN, TM); + TX = VFMA(LDK(KP414213562), TM, TN); + TC = VADD(Ta, Td); + Te = VSUB(Ta, Td); + TP = VSUB(Tf, Tg); + Th = VADD(Tf, Tg); + TQ = VSUB(Tj, Ti); + Tk = VADD(Ti, Tj); + TW = VFNMS(LDK(KP707106781), TV, TU); + T16 = VFMA(LDK(KP707106781), TV, TU); + } + { + V TY, TR, Tl, TD; + TY = VFMA(LDK(KP414213562), TP, TQ); + TR = VFNMS(LDK(KP414213562), TQ, TP); + Tl = VSUB(Th, Tk); + TD = VADD(Th, Tk); + { + V TS, T17, TZ, T14; + TS = VADD(TO, TR); + T17 = VSUB(TR, TO); + TZ = VSUB(TX, TY); + T14 = VADD(TX, TY); + { + V TE, TG, Tm, Tv; + TE = VADD(TC, TD); + TG = VSUB(TD, TC); + Tm = VADD(Te, Tl); + Tv = VSUB(Tl, Te); + { + V T18, T1a, TT, T11; + T18 = VFNMS(LDK(KP923879532), T17, T16); + T1a = VFMA(LDK(KP923879532), T17, T16); + TT = VFNMS(LDK(KP923879532), TS, TL); + T11 = VFMA(LDK(KP923879532), TS, TL); + { + V T15, T19, T10, T12; + T15 = VFNMS(LDK(KP923879532), T14, T13); + T19 = VFMA(LDK(KP923879532), T14, T13); + T10 = VFNMS(LDK(KP923879532), TZ, TW); + T12 = VFMA(LDK(KP923879532), TZ, TW); + ST(&(xo[WS(os, 4)]), VFMAI(TG, TF), ovs, &(xo[0])); + ST(&(xo[WS(os, 12)]), VFNMSI(TG, TF), ovs, &(xo[0])); + ST(&(xo[0]), VADD(TB, TE), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VSUB(TB, TE), ovs, &(xo[0])); + { + V Tw, Ty, Tn, Tx; + Tw = VFNMS(LDK(KP707106781), Tv, Tu); + Ty = VFMA(LDK(KP707106781), Tv, Tu); + Tn = VFNMS(LDK(KP707106781), Tm, T7); + Tx = VFMA(LDK(KP707106781), Tm, T7); + ST(&(xo[WS(os, 3)]), VFMAI(T1a, T19), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 13)]), VFNMSI(T1a, T19), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VFMAI(T18, T15), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VFNMSI(T18, T15), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFNMSI(T12, T11), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 15)]), VFMAI(T12, T11), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFMAI(T10, TT), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VFNMSI(T10, TT), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 14)]), VFNMSI(Ty, Tx), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFMAI(Ty, Tx), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VFMAI(Tw, Tn), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VFNMSI(Tw, Tn), ovs, &(xo[0])); + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 16, XSIMD_STRING("n1fv_16"), {38, 0, 34, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_16) (planner *p) { + X(kdft_register) (p, n1fv_16, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name n1fv_16 -include n1f.h */ + +/* + * This function contains 72 FP additions, 12 FP multiplications, + * (or, 68 additions, 8 multiplications, 4 fused multiply/add), + * 30 stack variables, 3 constants, and 32 memory accesses + */ +#include "n1f.h" + +static void n1fv_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(32, is), MAKE_VOLATILE_STRIDE(32, os)) { + V Tp, T13, Tu, TN, Tm, T14, Tv, TY, T7, T17, Ty, TT, Te, T16, Tx; + V TQ; + { + V Tn, To, TM, Ts, Tt, TL; + Tn = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + To = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + TM = VADD(Tn, To); + Ts = LD(&(xi[0]), ivs, &(xi[0])); + Tt = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + TL = VADD(Ts, Tt); + Tp = VSUB(Tn, To); + T13 = VADD(TL, TM); + Tu = VSUB(Ts, Tt); + TN = VSUB(TL, TM); + } + { + V Ti, TW, Tl, TX; + { + V Tg, Th, Tj, Tk; + Tg = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + Th = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Ti = VSUB(Tg, Th); + TW = VADD(Tg, Th); + Tj = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tl = VSUB(Tj, Tk); + TX = VADD(Tj, Tk); + } + Tm = VMUL(LDK(KP707106781), VSUB(Ti, Tl)); + T14 = VADD(TX, TW); + Tv = VMUL(LDK(KP707106781), VADD(Tl, Ti)); + TY = VSUB(TW, TX); + } + { + V T3, TR, T6, TS; + { + V T1, T2, T4, T5; + T1 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + TR = VADD(T1, T2); + T4 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + TS = VADD(T4, T5); + } + T7 = VFNMS(LDK(KP923879532), T6, VMUL(LDK(KP382683432), T3)); + T17 = VADD(TR, TS); + Ty = VFMA(LDK(KP923879532), T3, VMUL(LDK(KP382683432), T6)); + TT = VSUB(TR, TS); + } + { + V Ta, TO, Td, TP; + { + V T8, T9, Tb, Tc; + T8 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T9 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Ta = VSUB(T8, T9); + TO = VADD(T8, T9); + Tb = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tc = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Td = VSUB(Tb, Tc); + TP = VADD(Tb, Tc); + } + Te = VFMA(LDK(KP382683432), Ta, VMUL(LDK(KP923879532), Td)); + T16 = VADD(TO, TP); + Tx = VFNMS(LDK(KP382683432), Td, VMUL(LDK(KP923879532), Ta)); + TQ = VSUB(TO, TP); + } + { + V T15, T18, T19, T1a; + T15 = VADD(T13, T14); + T18 = VADD(T16, T17); + ST(&(xo[WS(os, 8)]), VSUB(T15, T18), ovs, &(xo[0])); + ST(&(xo[0]), VADD(T15, T18), ovs, &(xo[0])); + T19 = VSUB(T13, T14); + T1a = VBYI(VSUB(T17, T16)); + ST(&(xo[WS(os, 12)]), VSUB(T19, T1a), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VADD(T19, T1a), ovs, &(xo[0])); + } + { + V TV, T11, T10, T12, TU, TZ; + TU = VMUL(LDK(KP707106781), VADD(TQ, TT)); + TV = VADD(TN, TU); + T11 = VSUB(TN, TU); + TZ = VMUL(LDK(KP707106781), VSUB(TT, TQ)); + T10 = VBYI(VADD(TY, TZ)); + T12 = VBYI(VSUB(TZ, TY)); + ST(&(xo[WS(os, 14)]), VSUB(TV, T10), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VADD(T11, T12), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VADD(TV, T10), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VSUB(T11, T12), ovs, &(xo[0])); + } + { + V Tr, TB, TA, TC; + { + V Tf, Tq, Tw, Tz; + Tf = VSUB(T7, Te); + Tq = VSUB(Tm, Tp); + Tr = VBYI(VSUB(Tf, Tq)); + TB = VBYI(VADD(Tq, Tf)); + Tw = VADD(Tu, Tv); + Tz = VADD(Tx, Ty); + TA = VSUB(Tw, Tz); + TC = VADD(Tw, Tz); + } + ST(&(xo[WS(os, 7)]), VADD(Tr, TA), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 15)]), VSUB(TC, TB), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VSUB(TA, Tr), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VADD(TB, TC), ovs, &(xo[WS(os, 1)])); + } + { + V TF, TJ, TI, TK; + { + V TD, TE, TG, TH; + TD = VSUB(Tu, Tv); + TE = VADD(Te, T7); + TF = VADD(TD, TE); + TJ = VSUB(TD, TE); + TG = VADD(Tp, Tm); + TH = VSUB(Ty, Tx); + TI = VBYI(VADD(TG, TH)); + TK = VBYI(VSUB(TH, TG)); + } + ST(&(xo[WS(os, 13)]), VSUB(TF, TI), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VADD(TJ, TK), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VADD(TF, TI), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VSUB(TJ, TK), ovs, &(xo[WS(os, 1)])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 16, XSIMD_STRING("n1fv_16"), {68, 8, 4, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_16) (planner *p) { + X(kdft_register) (p, n1fv_16, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name n1fv_2 -include n1f.h */ + +/* + * This function contains 2 FP additions, 0 FP multiplications, + * (or, 2 additions, 0 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 4 memory accesses + */ +#include "n1f.h" + +static void n1fv_2(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(4, is), MAKE_VOLATILE_STRIDE(4, os)) { + V T1, T2; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + ST(&(xo[0]), VADD(T1, T2), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VSUB(T1, T2), ovs, &(xo[WS(os, 1)])); + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 2, XSIMD_STRING("n1fv_2"), {2, 0, 0, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_2) (planner *p) { + X(kdft_register) (p, n1fv_2, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name n1fv_2 -include n1f.h */ + +/* + * This function contains 2 FP additions, 0 FP multiplications, + * (or, 2 additions, 0 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 4 memory accesses + */ +#include "n1f.h" + +static void n1fv_2(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(4, is), MAKE_VOLATILE_STRIDE(4, os)) { + V T1, T2; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + ST(&(xo[WS(os, 1)]), VSUB(T1, T2), ovs, &(xo[WS(os, 1)])); + ST(&(xo[0]), VADD(T1, T2), ovs, &(xo[0])); + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 2, XSIMD_STRING("n1fv_2"), {2, 0, 0, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_2) (planner *p) { + X(kdft_register) (p, n1fv_2, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,416 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:50 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 20 -name n1fv_20 -include n1f.h */ + +/* + * This function contains 104 FP additions, 50 FP multiplications, + * (or, 58 additions, 4 multiplications, 46 fused multiply/add), + * 71 stack variables, 4 constants, and 40 memory accesses + */ +#include "n1f.h" + +static void n1fv_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(40, is), MAKE_VOLATILE_STRIDE(40, os)) { + V TU, TI, TP, TX, TM, TW, TT, TF; + { + V T3, Tm, T1r, T13, Ta, TN, TH, TA, TG, Tt, Th, TO, T1u, T1C, T1n; + V T1a, T1m, T1h, T1x, T1D, TE, Ti; + { + V T1, T2, Tk, Tl; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tl = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + { + V T14, T6, T1c, Tw, Tn, T1f, Tz, T17, T9, To, Tq, T1b, Td, Tr, Te; + V Tf, T15, Tp; + { + V Tx, Ty, T7, T8, Tb, Tc; + { + V T4, T5, Tu, Tv, T11, T12; + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + Tu = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Tv = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tx = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + T11 = VADD(T1, T2); + Tm = VSUB(Tk, Tl); + T12 = VADD(Tk, Tl); + T14 = VADD(T4, T5); + T6 = VSUB(T4, T5); + T1c = VADD(Tu, Tv); + Tw = VSUB(Tu, Tv); + Ty = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T1r = VADD(T11, T12); + T13 = VSUB(T11, T12); + } + Tb = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Tn = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T1f = VADD(Tx, Ty); + Tz = VSUB(Tx, Ty); + T17 = VADD(T7, T8); + T9 = VSUB(T7, T8); + To = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + Tq = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T1b = VADD(Tb, Tc); + Td = VSUB(Tb, Tc); + Tr = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Te = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + } + Ta = VADD(T6, T9); + TN = VSUB(T6, T9); + T15 = VADD(Tn, To); + Tp = VSUB(Tn, To); + TH = VSUB(Tz, Tw); + TA = VADD(Tw, Tz); + { + V T1d, T1v, T18, Ts, T1e, Tg, T16, T1s; + T1d = VSUB(T1b, T1c); + T1v = VADD(T1b, T1c); + T18 = VADD(Tq, Tr); + Ts = VSUB(Tq, Tr); + T1e = VADD(Te, Tf); + Tg = VSUB(Te, Tf); + T16 = VSUB(T14, T15); + T1s = VADD(T14, T15); + { + V T1t, T19, T1w, T1g; + T1t = VADD(T17, T18); + T19 = VSUB(T17, T18); + TG = VSUB(Ts, Tp); + Tt = VADD(Tp, Ts); + T1w = VADD(T1e, T1f); + T1g = VSUB(T1e, T1f); + Th = VADD(Td, Tg); + TO = VSUB(Td, Tg); + T1u = VADD(T1s, T1t); + T1C = VSUB(T1s, T1t); + T1n = VSUB(T16, T19); + T1a = VADD(T16, T19); + T1m = VSUB(T1d, T1g); + T1h = VADD(T1d, T1g); + T1x = VADD(T1v, T1w); + T1D = VSUB(T1v, T1w); + } + } + } + } + TE = VSUB(Ta, Th); + Ti = VADD(Ta, Th); + { + V TL, T1k, T1A, Tj, TD, T1E, T1G, TK, TC, T1j, T1z, T1i, T1y, TB; + TL = VSUB(TA, Tt); + TB = VADD(Tt, TA); + T1i = VADD(T1a, T1h); + T1k = VSUB(T1a, T1h); + T1y = VADD(T1u, T1x); + T1A = VSUB(T1u, T1x); + Tj = VADD(T3, Ti); + TD = VFNMS(LDK(KP250000000), Ti, T3); + T1E = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1D, T1C)); + T1G = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1C, T1D)); + TK = VFNMS(LDK(KP250000000), TB, Tm); + TC = VADD(Tm, TB); + T1j = VFNMS(LDK(KP250000000), T1i, T13); + ST(&(xo[0]), VADD(T1r, T1y), ovs, &(xo[0])); + T1z = VFNMS(LDK(KP250000000), T1y, T1r); + ST(&(xo[WS(os, 10)]), VADD(T13, T1i), ovs, &(xo[0])); + { + V T1p, T1l, T1o, T1q, T1F, T1B; + TU = VFNMS(LDK(KP618033988), TG, TH); + TI = VFMA(LDK(KP618033988), TH, TG); + TP = VFMA(LDK(KP618033988), TO, TN); + TX = VFNMS(LDK(KP618033988), TN, TO); + ST(&(xo[WS(os, 15)]), VFMAI(TC, Tj), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VFNMSI(TC, Tj), ovs, &(xo[WS(os, 1)])); + T1p = VFMA(LDK(KP559016994), T1k, T1j); + T1l = VFNMS(LDK(KP559016994), T1k, T1j); + T1o = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1n, T1m)); + T1q = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1m, T1n)); + T1F = VFNMS(LDK(KP559016994), T1A, T1z); + T1B = VFMA(LDK(KP559016994), T1A, T1z); + ST(&(xo[WS(os, 14)]), VFMAI(T1q, T1p), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VFNMSI(T1q, T1p), ovs, &(xo[0])); + ST(&(xo[WS(os, 18)]), VFNMSI(T1o, T1l), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFMAI(T1o, T1l), ovs, &(xo[0])); + ST(&(xo[WS(os, 16)]), VFNMSI(T1E, T1B), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VFMAI(T1E, T1B), ovs, &(xo[0])); + ST(&(xo[WS(os, 12)]), VFMAI(T1G, T1F), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VFNMSI(T1G, T1F), ovs, &(xo[0])); + TM = VFNMS(LDK(KP559016994), TL, TK); + TW = VFMA(LDK(KP559016994), TL, TK); + TT = VFNMS(LDK(KP559016994), TE, TD); + TF = VFMA(LDK(KP559016994), TE, TD); + } + } + } + { + V T10, TY, TQ, TS, TJ, TR, TZ, TV; + T10 = VFMA(LDK(KP951056516), TX, TW); + TY = VFNMS(LDK(KP951056516), TX, TW); + TQ = VFMA(LDK(KP951056516), TP, TM); + TS = VFNMS(LDK(KP951056516), TP, TM); + TJ = VFMA(LDK(KP951056516), TI, TF); + TR = VFNMS(LDK(KP951056516), TI, TF); + TZ = VFMA(LDK(KP951056516), TU, TT); + TV = VFNMS(LDK(KP951056516), TU, TT); + ST(&(xo[WS(os, 11)]), VFMAI(TS, TR), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VFNMSI(TS, TR), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 19)]), VFMAI(TQ, TJ), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFNMSI(TQ, TJ), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VFMAI(TY, TV), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 17)]), VFNMSI(TY, TV), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFMAI(T10, TZ), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 13)]), VFNMSI(T10, TZ), ovs, &(xo[WS(os, 1)])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 20, XSIMD_STRING("n1fv_20"), {58, 4, 46, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_20) (planner *p) { + X(kdft_register) (p, n1fv_20, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 20 -name n1fv_20 -include n1f.h */ + +/* + * This function contains 104 FP additions, 24 FP multiplications, + * (or, 92 additions, 12 multiplications, 12 fused multiply/add), + * 53 stack variables, 4 constants, and 40 memory accesses + */ +#include "n1f.h" + +static void n1fv_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(40, is), MAKE_VOLATILE_STRIDE(40, os)) { + V T3, T1B, Tm, T1i, TG, TN, TO, TH, T13, T16, T1k, T1u, T1v, T1z, T1r; + V T1s, T1y, T1a, T1d, T1j, Ti, TD, TB, TL, Tj, TC; + { + V T1, T2, T1g, Tk, Tl, T1h; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T1g = VADD(T1, T2); + Tk = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tl = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T1h = VADD(Tk, Tl); + T3 = VSUB(T1, T2); + T1B = VADD(T1g, T1h); + Tm = VSUB(Tk, Tl); + T1i = VSUB(T1g, T1h); + } + { + V T6, T18, Tw, T12, Tz, T15, T9, T1b, Td, T11, Tp, T19, Ts, T1c, Tg; + V T14; + { + V T4, T5, Tu, Tv; + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + T6 = VSUB(T4, T5); + T18 = VADD(T4, T5); + Tu = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Tv = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tw = VSUB(Tu, Tv); + T12 = VADD(Tu, Tv); + } + { + V Tx, Ty, T7, T8; + Tx = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + Ty = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Tz = VSUB(Tx, Ty); + T15 = VADD(Tx, Ty); + T7 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T9 = VSUB(T7, T8); + T1b = VADD(T7, T8); + } + { + V Tb, Tc, Tn, To; + Tb = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Td = VSUB(Tb, Tc); + T11 = VADD(Tb, Tc); + Tn = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + To = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + Tp = VSUB(Tn, To); + T19 = VADD(Tn, To); + } + { + V Tq, Tr, Te, Tf; + Tq = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tr = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Ts = VSUB(Tq, Tr); + T1c = VADD(Tq, Tr); + Te = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tg = VSUB(Te, Tf); + T14 = VADD(Te, Tf); + } + TG = VSUB(Ts, Tp); + TN = VSUB(T6, T9); + TO = VSUB(Td, Tg); + TH = VSUB(Tz, Tw); + T13 = VSUB(T11, T12); + T16 = VSUB(T14, T15); + T1k = VADD(T13, T16); + T1u = VADD(T11, T12); + T1v = VADD(T14, T15); + T1z = VADD(T1u, T1v); + T1r = VADD(T18, T19); + T1s = VADD(T1b, T1c); + T1y = VADD(T1r, T1s); + T1a = VSUB(T18, T19); + T1d = VSUB(T1b, T1c); + T1j = VADD(T1a, T1d); + { + V Ta, Th, Tt, TA; + Ta = VADD(T6, T9); + Th = VADD(Td, Tg); + Ti = VADD(Ta, Th); + TD = VMUL(LDK(KP559016994), VSUB(Ta, Th)); + Tt = VADD(Tp, Ts); + TA = VADD(Tw, Tz); + TB = VADD(Tt, TA); + TL = VMUL(LDK(KP559016994), VSUB(TA, Tt)); + } + } + Tj = VADD(T3, Ti); + TC = VBYI(VADD(Tm, TB)); + ST(&(xo[WS(os, 5)]), VSUB(Tj, TC), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 15)]), VADD(Tj, TC), ovs, &(xo[WS(os, 1)])); + { + V T1A, T1C, T1D, T1x, T1G, T1t, T1w, T1F, T1E; + T1A = VMUL(LDK(KP559016994), VSUB(T1y, T1z)); + T1C = VADD(T1y, T1z); + T1D = VFNMS(LDK(KP250000000), T1C, T1B); + T1t = VSUB(T1r, T1s); + T1w = VSUB(T1u, T1v); + T1x = VBYI(VFMA(LDK(KP951056516), T1t, VMUL(LDK(KP587785252), T1w))); + T1G = VBYI(VFNMS(LDK(KP587785252), T1t, VMUL(LDK(KP951056516), T1w))); + ST(&(xo[0]), VADD(T1B, T1C), ovs, &(xo[0])); + T1F = VSUB(T1D, T1A); + ST(&(xo[WS(os, 8)]), VSUB(T1F, T1G), ovs, &(xo[0])); + ST(&(xo[WS(os, 12)]), VADD(T1G, T1F), ovs, &(xo[0])); + T1E = VADD(T1A, T1D); + ST(&(xo[WS(os, 4)]), VADD(T1x, T1E), ovs, &(xo[0])); + ST(&(xo[WS(os, 16)]), VSUB(T1E, T1x), ovs, &(xo[0])); + } + { + V T1n, T1l, T1m, T1f, T1q, T17, T1e, T1p, T1o; + T1n = VMUL(LDK(KP559016994), VSUB(T1j, T1k)); + T1l = VADD(T1j, T1k); + T1m = VFNMS(LDK(KP250000000), T1l, T1i); + T17 = VSUB(T13, T16); + T1e = VSUB(T1a, T1d); + T1f = VBYI(VFNMS(LDK(KP587785252), T1e, VMUL(LDK(KP951056516), T17))); + T1q = VBYI(VFMA(LDK(KP951056516), T1e, VMUL(LDK(KP587785252), T17))); + ST(&(xo[WS(os, 10)]), VADD(T1i, T1l), ovs, &(xo[0])); + T1p = VADD(T1n, T1m); + ST(&(xo[WS(os, 6)]), VSUB(T1p, T1q), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VADD(T1q, T1p), ovs, &(xo[0])); + T1o = VSUB(T1m, T1n); + ST(&(xo[WS(os, 2)]), VADD(T1f, T1o), ovs, &(xo[0])); + ST(&(xo[WS(os, 18)]), VSUB(T1o, T1f), ovs, &(xo[0])); + } + { + V TI, TP, TX, TU, TM, TW, TF, TT, TK, TE; + TI = VFMA(LDK(KP951056516), TG, VMUL(LDK(KP587785252), TH)); + TP = VFMA(LDK(KP951056516), TN, VMUL(LDK(KP587785252), TO)); + TX = VFNMS(LDK(KP587785252), TN, VMUL(LDK(KP951056516), TO)); + TU = VFNMS(LDK(KP587785252), TG, VMUL(LDK(KP951056516), TH)); + TK = VFMS(LDK(KP250000000), TB, Tm); + TM = VADD(TK, TL); + TW = VSUB(TL, TK); + TE = VFNMS(LDK(KP250000000), Ti, T3); + TF = VADD(TD, TE); + TT = VSUB(TE, TD); + { + V TJ, TQ, TZ, T10; + TJ = VADD(TF, TI); + TQ = VBYI(VSUB(TM, TP)); + ST(&(xo[WS(os, 19)]), VSUB(TJ, TQ), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VADD(TJ, TQ), ovs, &(xo[WS(os, 1)])); + TZ = VADD(TT, TU); + T10 = VBYI(VADD(TX, TW)); + ST(&(xo[WS(os, 13)]), VSUB(TZ, T10), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VADD(TZ, T10), ovs, &(xo[WS(os, 1)])); + } + { + V TR, TS, TV, TY; + TR = VSUB(TF, TI); + TS = VBYI(VADD(TP, TM)); + ST(&(xo[WS(os, 11)]), VSUB(TR, TS), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VADD(TR, TS), ovs, &(xo[WS(os, 1)])); + TV = VSUB(TT, TU); + TY = VBYI(VSUB(TW, TX)); + ST(&(xo[WS(os, 17)]), VSUB(TV, TY), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VADD(TV, TY), ovs, &(xo[WS(os, 1)])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 20, XSIMD_STRING("n1fv_20"), {92, 12, 12, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_20) (planner *p) { + X(kdft_register) (p, n1fv_20, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,793 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:50 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name n1fv_25 -include n1f.h */ + +/* + * This function contains 224 FP additions, 193 FP multiplications, + * (or, 43 additions, 12 multiplications, 181 fused multiply/add), + * 215 stack variables, 67 constants, and 50 memory accesses + */ +#include "n1f.h" + +static void n1fv_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP792626838, +0.792626838241819413632131824093538848057784557); + DVK(KP876091699, +0.876091699473550838204498029706869638173524346); + DVK(KP803003575, +0.803003575438660414833440593570376004635464850); + DVK(KP617882369, +0.617882369114440893914546919006756321695042882); + DVK(KP242145790, +0.242145790282157779872542093866183953459003101); + DVK(KP968583161, +0.968583161128631119490168375464735813836012403); + DVK(KP999544308, +0.999544308746292983948881682379742149196758193); + DVK(KP683113946, +0.683113946453479238701949862233725244439656928); + DVK(KP559154169, +0.559154169276087864842202529084232643714075927); + DVK(KP904730450, +0.904730450839922351881287709692877908104763647); + DVK(KP829049696, +0.829049696159252993975487806364305442437946767); + DVK(KP831864738, +0.831864738706457140726048799369896829771167132); + DVK(KP916574801, +0.916574801383451584742370439148878693530976769); + DVK(KP894834959, +0.894834959464455102997960030820114611498661386); + DVK(KP809385824, +0.809385824416008241660603814668679683846476688); + DVK(KP447417479, +0.447417479732227551498980015410057305749330693); + DVK(KP860541664, +0.860541664367944677098261680920518816412804187); + DVK(KP897376177, +0.897376177523557693138608077137219684419427330); + DVK(KP876306680, +0.876306680043863587308115903922062583399064238); + DVK(KP681693190, +0.681693190061530575150324149145440022633095390); + DVK(KP560319534, +0.560319534973832390111614715371676131169633784); + DVK(KP855719849, +0.855719849902058969314654733608091555096772472); + DVK(KP237294955, +0.237294955877110315393888866460840817927895961); + DVK(KP949179823, +0.949179823508441261575555465843363271711583843); + DVK(KP904508497, +0.904508497187473712051146708591409529430077295); + DVK(KP997675361, +0.997675361079556513670859573984492383596555031); + DVK(KP262346850, +0.262346850930607871785420028382979691334784273); + DVK(KP763932022, +0.763932022500210303590826331268723764559381640); + DVK(KP992114701, +0.992114701314477831049793042785778521453036709); + DVK(KP690983005, +0.690983005625052575897706582817180941139845410); + DVK(KP952936919, +0.952936919628306576880750665357914584765951388); + DVK(KP998026728, +0.998026728428271561952336806863450553336905220); + DVK(KP570584518, +0.570584518783621657366766175430996792655723863); + DVK(KP669429328, +0.669429328479476605641803240971985825917022098); + DVK(KP923225144, +0.923225144846402650453449441572664695995209956); + DVK(KP906616052, +0.906616052148196230441134447086066874408359177); + DVK(KP956723877, +0.956723877038460305821989399535483155872969262); + DVK(KP522616830, +0.522616830205754336872861364785224694908468440); + DVK(KP945422727, +0.945422727388575946270360266328811958657216298); + DVK(KP912575812, +0.912575812670962425556968549836277086778922727); + DVK(KP982009705, +0.982009705009746369461829878184175962711969869); + DVK(KP921078979, +0.921078979742360627699756128143719920817673854); + DVK(KP734762448, +0.734762448793050413546343770063151342619912334); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP958953096, +0.958953096729998668045963838399037225970891871); + DVK(KP867381224, +0.867381224396525206773171885031575671309956167); + DVK(KP269969613, +0.269969613759572083574752974412347470060951301); + DVK(KP244189809, +0.244189809627953270309879511234821255780225091); + DVK(KP845997307, +0.845997307939530944175097360758058292389769300); + DVK(KP772036680, +0.772036680810363904029489473607579825330539880); + DVK(KP132830569, +0.132830569247582714407653942074819768844536507); + DVK(KP120146378, +0.120146378570687701782758537356596213647956445); + DVK(KP987388751, +0.987388751065621252324603216482382109400433949); + DVK(KP893101515, +0.893101515366181661711202267938416198338079437); + DVK(KP786782374, +0.786782374965295178365099601674911834788448471); + DVK(KP869845200, +0.869845200362138853122720822420327157933056305); + DVK(KP447533225, +0.447533225982656890041886979663652563063114397); + DVK(KP494780565, +0.494780565770515410344588413655324772219443730); + DVK(KP578046249, +0.578046249379945007321754579646815604023525655); + DVK(KP522847744, +0.522847744331509716623755382187077770911012542); + DVK(KP059835404, +0.059835404262124915169548397419498386427871950); + DVK(KP066152395, +0.066152395967733048213034281011006031460903353); + DVK(KP603558818, +0.603558818296015001454675132653458027918768137); + DVK(KP667278218, +0.667278218140296670899089292254759909713898805); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(50, is), MAKE_VOLATILE_STRIDE(50, os)) { + V T1g, T1k, T1I, T24, T2a, T1G, T1A, T1l, T1B, T1H, T1d; + { + V T2z, T1q, Ta, T9, T3n, Ty, Tl, T2O, T2W, T2l, T2s, TV, T1i, T1K, T1S; + V T3z, T3t, Tk, T3o, Tp, T2g, T2N, T2V, T2o, T2t, T1a, T1j, T1J, T1R, Tz; + V Tt, TA, Tw; + { + V T1, T5, T6, T2, T3; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + { + V TH, TW, TK, TS, T10, T8, TN, TT, T17, TZ, T11; + TH = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + TW = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + { + V TI, TJ, TL, T7, T1p, T4, T1o, TM, TX, TY; + TI = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + TJ = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + TL = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T7 = VADD(T5, T6); + T1p = VSUB(T5, T6); + T4 = VADD(T2, T3); + T1o = VSUB(T2, T3); + TM = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + TX = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + TK = VADD(TI, TJ); + TS = VSUB(TI, TJ); + TY = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + T10 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T2z = VFNMS(LDK(KP618033988), T1o, T1p); + T1q = VFMA(LDK(KP618033988), T1p, T1o); + Ta = VSUB(T4, T7); + T8 = VADD(T4, T7); + TN = VADD(TL, TM); + TT = VSUB(TM, TL); + T17 = VSUB(TX, TY); + TZ = VADD(TX, TY); + T11 = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + } + { + V Tc, T2m, T19, Tn, To, Tr, Tj, T16, T2n, Ts, Tu, Tv; + { + V TU, T2j, TO, TQ, T12, T18; + Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T9 = VFNMS(LDK(KP250000000), T8, T1); + T3n = VADD(T1, T8); + TU = VFNMS(LDK(KP618033988), TT, TS); + T2j = VFMA(LDK(KP618033988), TS, TT); + TO = VADD(TK, TN); + TQ = VSUB(TN, TK); + T12 = VADD(T10, T11); + T18 = VSUB(T10, T11); + Ty = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + { + V T3r, T15, T13, Tf, Ti, T2k, TR, TP, T3s, T14; + { + V Td, Te, Tg, Th; + Td = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Te = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + Tg = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Th = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + TP = VFNMS(LDK(KP250000000), TO, TH); + T3r = VADD(TH, TO); + T2m = VFNMS(LDK(KP618033988), T17, T18); + T19 = VFMA(LDK(KP618033988), T18, T17); + T15 = VSUB(T12, TZ); + T13 = VADD(TZ, T12); + Tf = VADD(Td, Te); + Tn = VSUB(Td, Te); + To = VSUB(Th, Tg); + Ti = VADD(Tg, Th); + } + T2k = VFMA(LDK(KP559016994), TQ, TP); + TR = VFNMS(LDK(KP559016994), TQ, TP); + Tr = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T3s = VADD(TW, T13); + T14 = VFNMS(LDK(KP250000000), T13, TW); + Tj = VADD(Tf, Ti); + Tl = VSUB(Tf, Ti); + T2O = VFNMS(LDK(KP667278218), T2k, T2j); + T2W = VFMA(LDK(KP603558818), T2j, T2k); + T2l = VFMA(LDK(KP066152395), T2k, T2j); + T2s = VFNMS(LDK(KP059835404), T2j, T2k); + TV = VFNMS(LDK(KP522847744), TU, TR); + T1i = VFMA(LDK(KP578046249), TR, TU); + T1K = VFNMS(LDK(KP494780565), TR, TU); + T1S = VFMA(LDK(KP447533225), TU, TR); + T16 = VFNMS(LDK(KP559016994), T15, T14); + T2n = VFMA(LDK(KP559016994), T15, T14); + T3z = VSUB(T3r, T3s); + T3t = VADD(T3r, T3s); + Ts = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tu = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + Tv = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + } + } + Tk = VFNMS(LDK(KP250000000), Tj, Tc); + T3o = VADD(Tc, Tj); + Tp = VFNMS(LDK(KP618033988), To, Tn); + T2g = VFMA(LDK(KP618033988), Tn, To); + T2N = VFMA(LDK(KP066152395), T2n, T2m); + T2V = VFNMS(LDK(KP059835404), T2m, T2n); + T2o = VFMA(LDK(KP869845200), T2n, T2m); + T2t = VFNMS(LDK(KP786782374), T2m, T2n); + T1a = VFNMS(LDK(KP893101515), T19, T16); + T1j = VFMA(LDK(KP987388751), T16, T19); + T1J = VFNMS(LDK(KP120146378), T19, T16); + T1R = VFMA(LDK(KP132830569), T16, T19); + Tz = VADD(Ts, Tr); + Tt = VSUB(Tr, Ts); + TA = VADD(Tv, Tu); + Tw = VSUB(Tu, Tv); + } + } + } + { + V T2p, T2I, T2u, T2C, Tx, T2d, T2X, T34, T2P, T3b, T2b, Tb, T2Q, T2Z, T2h; + V T2w, Tq, T1e, T1M, T1U, TE, T2c, T3q, T3y; + T2p = VFNMS(LDK(KP772036680), T2o, T2l); + T2I = VFMA(LDK(KP772036680), T2o, T2l); + T2u = VFMA(LDK(KP772036680), T2t, T2s); + T2C = VFNMS(LDK(KP772036680), T2t, T2s); + { + V TD, TB, Tm, T2f, T3p, TC; + Tx = VFMA(LDK(KP618033988), Tw, Tt); + T2d = VFNMS(LDK(KP618033988), Tt, Tw); + TD = VSUB(Tz, TA); + TB = VADD(Tz, TA); + Tm = VFMA(LDK(KP559016994), Tl, Tk); + T2f = VFNMS(LDK(KP559016994), Tl, Tk); + T2X = VFMA(LDK(KP845997307), T2W, T2V); + T34 = VFNMS(LDK(KP845997307), T2W, T2V); + T2P = VFNMS(LDK(KP845997307), T2O, T2N); + T3b = VFMA(LDK(KP845997307), T2O, T2N); + T2b = VFNMS(LDK(KP559016994), Ta, T9); + Tb = VFMA(LDK(KP559016994), Ta, T9); + T3p = VADD(Ty, TB); + TC = VFMS(LDK(KP250000000), TB, Ty); + T2Q = VFNMS(LDK(KP522847744), T2g, T2f); + T2Z = VFMA(LDK(KP578046249), T2f, T2g); + T2h = VFMA(LDK(KP893101515), T2g, T2f); + T2w = VFNMS(LDK(KP987388751), T2f, T2g); + Tq = VFNMS(LDK(KP244189809), Tp, Tm); + T1e = VFMA(LDK(KP269969613), Tm, Tp); + T1M = VFMA(LDK(KP667278218), Tm, Tp); + T1U = VFNMS(LDK(KP603558818), Tp, Tm); + TE = VFNMS(LDK(KP559016994), TD, TC); + T2c = VFMA(LDK(KP559016994), TD, TC); + T3q = VADD(T3o, T3p); + T3y = VSUB(T3o, T3p); + } + { + V T1Z, T25, T1P, T22, T1X, TG, T1b, T28, T1t, T1y, T1x, T1E, T1Q, T1Y; + { + V T26, T1L, T1T, TF, T1f, T1W, T3m, T3g, T2M, T2G, T39, T3j, T21, T1O, T20; + V T27; + T26 = VFMA(LDK(KP867381224), T1K, T1J); + T1L = VFNMS(LDK(KP867381224), T1K, T1J); + T20 = VFNMS(LDK(KP958953096), T1S, T1R); + T1T = VFMA(LDK(KP958953096), T1S, T1R); + { + V T2R, T2Y, T2e, T2v, T1N, T1V; + T2R = VFNMS(LDK(KP494780565), T2c, T2d); + T2Y = VFMA(LDK(KP447533225), T2d, T2c); + T2e = VFMA(LDK(KP120146378), T2d, T2c); + T2v = VFNMS(LDK(KP132830569), T2c, T2d); + TF = VFNMS(LDK(KP667278218), TE, Tx); + T1f = VFMA(LDK(KP603558818), Tx, TE); + T1N = VFMA(LDK(KP869845200), TE, Tx); + T1V = VFNMS(LDK(KP786782374), Tx, TE); + { + V T3A, T3C, T3w, T3u; + T3A = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T3z, T3y)); + T3C = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T3y, T3z)); + T3w = VSUB(T3q, T3t); + T3u = VADD(T3q, T3t); + { + V T2B, T2x, T2H, T2i; + T2B = VFMA(LDK(KP734762448), T2w, T2v); + T2x = VFNMS(LDK(KP734762448), T2w, T2v); + T2H = VFNMS(LDK(KP734762448), T2h, T2e); + T2i = VFMA(LDK(KP734762448), T2h, T2e); + { + V T30, T35, T3c, T2S, T3v; + T30 = VFNMS(LDK(KP921078979), T2Z, T2Y); + T35 = VFMA(LDK(KP921078979), T2Z, T2Y); + T3c = VFMA(LDK(KP982009705), T2R, T2Q); + T2S = VFNMS(LDK(KP982009705), T2R, T2Q); + T1W = VFMA(LDK(KP912575812), T1V, T1U); + T1Z = VFNMS(LDK(KP912575812), T1V, T1U); + T1O = VFMA(LDK(KP912575812), T1N, T1M); + T25 = VFNMS(LDK(KP912575812), T1N, T1M); + ST(&(xo[0]), VADD(T3u, T3n), ovs, &(xo[0])); + T3v = VFNMS(LDK(KP250000000), T3u, T3n); + { + V T2y, T2J, T2q, T2D; + T2y = VFMA(LDK(KP945422727), T2x, T2u); + T2J = VFMA(LDK(KP522616830), T2x, T2I); + T2q = VFMA(LDK(KP956723877), T2p, T2i); + T2D = VFNMS(LDK(KP522616830), T2i, T2C); + { + V T3e, T31, T36, T2T; + T3e = VFMA(LDK(KP906616052), T30, T2X); + T31 = VFNMS(LDK(KP906616052), T30, T2X); + T36 = VFNMS(LDK(KP923225144), T2S, T2P); + T2T = VFMA(LDK(KP923225144), T2S, T2P); + { + V T3k, T3d, T3x, T3B; + T3k = VFNMS(LDK(KP669429328), T3b, T3c); + T3d = VFMA(LDK(KP570584518), T3c, T3b); + T3x = VFMA(LDK(KP559016994), T3w, T3v); + T3B = VFNMS(LDK(KP559016994), T3w, T3v); + { + V T2A, T2K, T2r, T2E; + T2A = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T2z, T2y)); + T2K = VFNMS(LDK(KP690983005), T2J, T2u); + T2r = VFMA(LDK(KP992114701), T2q, T2b); + T2E = VFMA(LDK(KP763932022), T2D, T2p); + { + V T32, T3a, T37, T3h; + T32 = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T2z, T31)); + T3a = VFMA(LDK(KP262346850), T31, T2z); + T37 = VFNMS(LDK(KP997675361), T36, T35); + T3h = VFNMS(LDK(KP904508497), T36, T34); + { + V T2U, T33, T3l, T3f; + T2U = VFMA(LDK(KP949179823), T2T, T2b); + T33 = VFNMS(LDK(KP237294955), T2T, T2b); + T3l = VFNMS(LDK(KP669429328), T3e, T3k); + T3f = VFMA(LDK(KP618033988), T3e, T3d); + ST(&(xo[WS(os, 20)]), VFMAI(T3A, T3x), ovs, &(xo[0])); + ST(&(xo[WS(os, 5)]), VFNMSI(T3A, T3x), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 15)]), VFNMSI(T3C, T3B), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 10)]), VFMAI(T3C, T3B), ovs, &(xo[0])); + { + V T2L, T2F, T38, T3i; + T2L = VFMA(LDK(KP855719849), T2K, T2H); + ST(&(xo[WS(os, 22)]), VFMAI(T2A, T2r), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VFNMSI(T2A, T2r), ovs, &(xo[WS(os, 1)])); + T2F = VFNMS(LDK(KP855719849), T2E, T2B); + T38 = VFMA(LDK(KP560319534), T37, T34); + T3i = VFNMS(LDK(KP681693190), T3h, T35); + ST(&(xo[WS(os, 23)]), VFMAI(T32, T2U), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VFNMSI(T32, T2U), ovs, &(xo[0])); + T3m = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T3l, T3a)); + T3g = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T3f, T3a)); + T2M = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2L, T2z)); + T2G = VFMA(LDK(KP897376177), T2F, T2b); + T39 = VFNMS(LDK(KP949179823), T38, T33); + T3j = VFNMS(LDK(KP860541664), T3i, T33); + T21 = VFMA(LDK(KP447417479), T1O, T20); + } + } + } + } + } + } + } + } + } + } + } + T1P = VFNMS(LDK(KP809385824), T1O, T1L); + ST(&(xo[WS(os, 17)]), VFMAI(T2M, T2G), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 8)]), VFNMSI(T2M, T2G), ovs, &(xo[0])); + ST(&(xo[WS(os, 12)]), VFMAI(T3g, T39), ovs, &(xo[0])); + ST(&(xo[WS(os, 13)]), VFNMSI(T3g, T39), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFMAI(T3m, T3j), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 18)]), VFNMSI(T3m, T3j), ovs, &(xo[0])); + T22 = VFMA(LDK(KP690983005), T21, T1L); + T27 = VFMA(LDK(KP447417479), T1W, T26); + T1X = VFMA(LDK(KP894834959), T1W, T1T); + { + V T1r, T1s, T1v, T1w; + T1r = VFNMS(LDK(KP916574801), T1f, T1e); + T1g = VFMA(LDK(KP916574801), T1f, T1e); + T1k = VFNMS(LDK(KP831864738), T1j, T1i); + T1s = VFMA(LDK(KP831864738), T1j, T1i); + T1v = VFNMS(LDK(KP829049696), TF, Tq); + TG = VFMA(LDK(KP829049696), TF, Tq); + T1b = VFMA(LDK(KP831864738), T1a, TV); + T1w = VFNMS(LDK(KP831864738), T1a, TV); + T28 = VFNMS(LDK(KP763932022), T27, T1T); + T1t = VFMA(LDK(KP904730450), T1s, T1r); + T1y = VFNMS(LDK(KP904730450), T1s, T1r); + T1x = VFMA(LDK(KP559154169), T1w, T1v); + T1E = VFNMS(LDK(KP683113946), T1v, T1w); + } + } + T1Q = VFNMS(LDK(KP992114701), T1P, Tb); + T1Y = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T1X, T1q)); + { + V T1u, T1F, T1z, T1h, T1c, T23, T29; + T23 = VFNMS(LDK(KP999544308), T22, T1Z); + T29 = VFNMS(LDK(KP999544308), T28, T25); + T1I = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1t, T1q)); + T1u = VFNMS(LDK(KP242145790), T1t, T1q); + T1F = VFMA(LDK(KP617882369), T1y, T1E); + T1z = VFMA(LDK(KP559016994), T1y, T1x); + T1h = VFNMS(LDK(KP904730450), T1b, TG); + T1c = VFMA(LDK(KP904730450), T1b, TG); + ST(&(xo[WS(os, 21)]), VFNMSI(T1Y, T1Q), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VFMAI(T1Y, T1Q), ovs, &(xo[0])); + T24 = VFNMS(LDK(KP803003575), T23, Tb); + T2a = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T29, T1q)); + T1G = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T1F, T1u)); + T1A = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1z, T1u)); + T1l = VFNMS(LDK(KP904730450), T1k, T1h); + T1B = VADD(T1g, T1h); + T1H = VFMA(LDK(KP968583161), T1c, Tb); + T1d = VFNMS(LDK(KP242145790), T1c, Tb); + } + } + } + } + ST(&(xo[WS(os, 9)]), VFMAI(T2a, T24), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 16)]), VFNMSI(T2a, T24), ovs, &(xo[0])); + { + V T1m, T1C, T1n, T1D; + T1m = VFNMS(LDK(KP618033988), T1l, T1g); + T1C = VFNMS(LDK(KP683113946), T1B, T1k); + ST(&(xo[WS(os, 24)]), VFMAI(T1I, T1H), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VFNMSI(T1I, T1H), ovs, &(xo[WS(os, 1)])); + T1n = VFNMS(LDK(KP876091699), T1m, T1d); + T1D = VFMA(LDK(KP792626838), T1C, T1d); + ST(&(xo[WS(os, 19)]), VFMAI(T1A, T1n), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 6)]), VFNMSI(T1A, T1n), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VFMAI(T1G, T1D), ovs, &(xo[0])); + ST(&(xo[WS(os, 11)]), VFNMSI(T1G, T1D), ovs, &(xo[WS(os, 1)])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 25, XSIMD_STRING("n1fv_25"), {43, 12, 181, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_25) (planner *p) { + X(kdft_register) (p, n1fv_25, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name n1fv_25 -include n1f.h */ + +/* + * This function contains 224 FP additions, 140 FP multiplications, + * (or, 146 additions, 62 multiplications, 78 fused multiply/add), + * 115 stack variables, 40 constants, and 50 memory accesses + */ +#include "n1f.h" + +static void n1fv_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP809016994, +0.809016994374947424102293417182819058860154590); + DVK(KP309016994, +0.309016994374947424102293417182819058860154590); + DVK(KP770513242, +0.770513242775789230803009636396177847271667672); + DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596); + DVK(KP992114701, +0.992114701314477831049793042785778521453036709); + DVK(KP250666467, +0.250666467128608490746237519633017587885836494); + DVK(KP637423989, +0.637423989748689710176712811676016195434917298); + DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344); + DVK(KP125333233, +0.125333233564304245373118759816508793942918247); + DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418); + DVK(KP248689887, +0.248689887164854788242283746006447968417567406); + DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806); + DVK(KP497379774, +0.497379774329709576484567492012895936835134813); + DVK(KP968583161, +0.968583161128631119490168375464735813836012403); + DVK(KP904827052, +0.904827052466019527713668647932697593970413911); + DVK(KP851558583, +0.851558583130145297725004891488503407959946084); + DVK(KP425779291, +0.425779291565072648862502445744251703979973042); + DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822); + DVK(KP844327925, +0.844327925502015078548558063966681505381659241); + DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691); + DVK(KP481753674, +0.481753674101715274987191502872129653528542010); + DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477); + DVK(KP535826794, +0.535826794978996618271308767867639978063575346); + DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483); + DVK(KP963507348, +0.963507348203430549974383005744259307057084020); + DVK(KP876306680, +0.876306680043863587308115903922062583399064238); + DVK(KP998026728, +0.998026728428271561952336806863450553336905220); + DVK(KP125581039, +0.125581039058626752152356449131262266244969664); + DVK(KP684547105, +0.684547105928688673732283357621209269889519233); + DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328); + DVK(KP062790519, +0.062790519529313376076178224565631133122484832); + DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439); + DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465); + DVK(KP728968627, +0.728968627421411523146730319055259111372571664); + DVK(KP293892626, +0.293892626146236564584352977319536384298826219); + DVK(KP475528258, +0.475528258147576786058219666689691071702849317); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(50, is), MAKE_VOLATILE_STRIDE(50, os)) { + V T7, T1g, T26, Ta, T2R, T2N, T2O, T2P, T19, T1Y, T16, T1Z, T1a, T2v, T1l; + V T2m, TU, T21, TR, T22, TV, T2u, T1k, T2l, T2K, T2L, T2M, TE, T1R, TB; + V T1S, TF, T2r, T1i, T2j, Tp, T1U, Tm, T1V, Tq, T2s, T1h, T2i; + { + V T8, T6, T1f, T3, T1e, T25, T9; + T8 = LD(&(xi[0]), ivs, &(xi[0])); + { + V T4, T5, T1, T2; + T4 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T6 = VADD(T4, T5); + T1f = VSUB(T4, T5); + T1 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T2 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + T3 = VADD(T1, T2); + T1e = VSUB(T1, T2); + } + T7 = VMUL(LDK(KP559016994), VSUB(T3, T6)); + T1g = VFMA(LDK(KP951056516), T1e, VMUL(LDK(KP587785252), T1f)); + T25 = VMUL(LDK(KP951056516), T1f); + T26 = VFNMS(LDK(KP587785252), T1e, T25); + T9 = VADD(T3, T6); + Ta = VFNMS(LDK(KP250000000), T9, T8); + T2R = VADD(T8, T9); + } + { + V TO, T13, TN, TT, TP, TS, T12, T18, T14, T17, T15, TQ; + TO = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T13 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + { + V TH, TI, TJ, TK, TL, TM; + TH = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + TI = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + TJ = VADD(TH, TI); + TK = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + TL = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + TM = VADD(TK, TL); + TN = VMUL(LDK(KP559016994), VSUB(TJ, TM)); + TT = VSUB(TK, TL); + TP = VADD(TJ, TM); + TS = VSUB(TH, TI); + } + { + V TW, TX, TY, TZ, T10, T11; + TW = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + TX = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + TY = VADD(TW, TX); + TZ = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T10 = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + T11 = VADD(TZ, T10); + T12 = VMUL(LDK(KP559016994), VSUB(TY, T11)); + T18 = VSUB(TZ, T10); + T14 = VADD(TY, T11); + T17 = VSUB(TW, TX); + } + T2N = VADD(TO, TP); + T2O = VADD(T13, T14); + T2P = VADD(T2N, T2O); + T19 = VFMA(LDK(KP475528258), T17, VMUL(LDK(KP293892626), T18)); + T1Y = VFNMS(LDK(KP293892626), T17, VMUL(LDK(KP475528258), T18)); + T15 = VFNMS(LDK(KP250000000), T14, T13); + T16 = VADD(T12, T15); + T1Z = VSUB(T15, T12); + T1a = VFNMS(LDK(KP1_369094211), T19, VMUL(LDK(KP728968627), T16)); + T2v = VFMA(LDK(KP1_996053456), T1Y, VMUL(LDK(KP062790519), T1Z)); + T1l = VFMA(LDK(KP1_457937254), T19, VMUL(LDK(KP684547105), T16)); + T2m = VFNMS(LDK(KP998026728), T1Z, VMUL(LDK(KP125581039), T1Y)); + TU = VFMA(LDK(KP475528258), TS, VMUL(LDK(KP293892626), TT)); + T21 = VFNMS(LDK(KP293892626), TS, VMUL(LDK(KP475528258), TT)); + TQ = VFNMS(LDK(KP250000000), TP, TO); + TR = VADD(TN, TQ); + T22 = VSUB(TQ, TN); + TV = VFNMS(LDK(KP963507348), TU, VMUL(LDK(KP876306680), TR)); + T2u = VFMA(LDK(KP1_688655851), T21, VMUL(LDK(KP535826794), T22)); + T1k = VFMA(LDK(KP1_752613360), TU, VMUL(LDK(KP481753674), TR)); + T2l = VFNMS(LDK(KP844327925), T22, VMUL(LDK(KP1_071653589), T21)); + } + { + V Tj, Ty, Ti, To, Tk, Tn, Tx, TD, Tz, TC, TA, Tl; + Tj = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Ty = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + { + V Tc, Td, Te, Tf, Tg, Th; + Tc = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + Te = VADD(Tc, Td); + Tf = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Tg = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + Th = VADD(Tf, Tg); + Ti = VMUL(LDK(KP559016994), VSUB(Te, Th)); + To = VSUB(Tf, Tg); + Tk = VADD(Te, Th); + Tn = VSUB(Tc, Td); + } + { + V Tr, Ts, Tt, Tu, Tv, Tw; + Tr = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Ts = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + Tt = VADD(Tr, Ts); + Tu = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + Tv = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + Tw = VADD(Tu, Tv); + Tx = VMUL(LDK(KP559016994), VSUB(Tt, Tw)); + TD = VSUB(Tu, Tv); + Tz = VADD(Tt, Tw); + TC = VSUB(Tr, Ts); + } + T2K = VADD(Tj, Tk); + T2L = VADD(Ty, Tz); + T2M = VADD(T2K, T2L); + TE = VFMA(LDK(KP475528258), TC, VMUL(LDK(KP293892626), TD)); + T1R = VFNMS(LDK(KP293892626), TC, VMUL(LDK(KP475528258), TD)); + TA = VFNMS(LDK(KP250000000), Tz, Ty); + TB = VADD(Tx, TA); + T1S = VSUB(TA, Tx); + TF = VFNMS(LDK(KP1_688655851), TE, VMUL(LDK(KP535826794), TB)); + T2r = VFNMS(LDK(KP425779291), T1S, VMUL(LDK(KP1_809654104), T1R)); + T1i = VFMA(LDK(KP1_071653589), TE, VMUL(LDK(KP844327925), TB)); + T2j = VFMA(LDK(KP851558583), T1R, VMUL(LDK(KP904827052), T1S)); + Tp = VFMA(LDK(KP475528258), Tn, VMUL(LDK(KP293892626), To)); + T1U = VFNMS(LDK(KP293892626), Tn, VMUL(LDK(KP475528258), To)); + Tl = VFNMS(LDK(KP250000000), Tk, Tj); + Tm = VADD(Ti, Tl); + T1V = VSUB(Tl, Ti); + Tq = VFNMS(LDK(KP497379774), Tp, VMUL(LDK(KP968583161), Tm)); + T2s = VFMA(LDK(KP963507348), T1U, VMUL(LDK(KP876306680), T1V)); + T1h = VFMA(LDK(KP1_937166322), Tp, VMUL(LDK(KP248689887), Tm)); + T2i = VFNMS(LDK(KP481753674), T1V, VMUL(LDK(KP1_752613360), T1U)); + } + { + V T2Q, T2S, T2T, T2X, T2Y, T2V, T2W, T2Z, T2U; + T2Q = VMUL(LDK(KP559016994), VSUB(T2M, T2P)); + T2S = VADD(T2M, T2P); + T2T = VFNMS(LDK(KP250000000), T2S, T2R); + T2V = VSUB(T2K, T2L); + T2W = VSUB(T2N, T2O); + T2X = VBYI(VFMA(LDK(KP951056516), T2V, VMUL(LDK(KP587785252), T2W))); + T2Y = VBYI(VFNMS(LDK(KP587785252), T2V, VMUL(LDK(KP951056516), T2W))); + ST(&(xo[0]), VADD(T2R, T2S), ovs, &(xo[0])); + T2Z = VSUB(T2T, T2Q); + ST(&(xo[WS(os, 10)]), VADD(T2Y, T2Z), ovs, &(xo[0])); + ST(&(xo[WS(os, 15)]), VSUB(T2Z, T2Y), ovs, &(xo[WS(os, 1)])); + T2U = VADD(T2Q, T2T); + ST(&(xo[WS(os, 5)]), VSUB(T2U, T2X), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 20)]), VADD(T2X, T2U), ovs, &(xo[0])); + } + { + V T2t, T2y, T2z, T2w, T1T, T1W, T1X, T2c, T2d, T2e, T29, T2a, T2b, T20, T23; + V T24, T2p, T2o, T2q, T28, T2D, T2C, T2E, T2x, T2F; + T2t = VSUB(T2r, T2s); + T2y = VADD(T2i, T2j); + T2z = VSUB(T2l, T2m); + T2w = VSUB(T2u, T2v); + T1T = VFNMS(LDK(KP125333233), T1S, VMUL(LDK(KP1_984229402), T1R)); + T1W = VFMA(LDK(KP1_457937254), T1U, VMUL(LDK(KP684547105), T1V)); + T1X = VSUB(T1T, T1W); + T2c = VFNMS(LDK(KP1_996053456), T21, VMUL(LDK(KP062790519), T22)); + T2d = VFMA(LDK(KP1_541026485), T1Y, VMUL(LDK(KP637423989), T1Z)); + T2e = VSUB(T2c, T2d); + T29 = VFNMS(LDK(KP1_369094211), T1U, VMUL(LDK(KP728968627), T1V)); + T2a = VFMA(LDK(KP250666467), T1R, VMUL(LDK(KP992114701), T1S)); + T2b = VSUB(T29, T2a); + T20 = VFNMS(LDK(KP770513242), T1Z, VMUL(LDK(KP1_274847979), T1Y)); + T23 = VFMA(LDK(KP125581039), T21, VMUL(LDK(KP998026728), T22)); + T24 = VSUB(T20, T23); + { + V T2k, T2n, T2A, T2B; + T2k = VSUB(T2i, T2j); + T2n = VADD(T2l, T2m); + T2p = VADD(T2k, T2n); + T2o = VMUL(LDK(KP559016994), VSUB(T2k, T2n)); + T2q = VFNMS(LDK(KP250000000), T2p, T26); + T28 = VSUB(Ta, T7); + T2A = VADD(T2s, T2r); + T2B = VADD(T2u, T2v); + T2D = VADD(T2A, T2B); + T2C = VMUL(LDK(KP559016994), VSUB(T2A, T2B)); + T2E = VFNMS(LDK(KP250000000), T2D, T28); + } + { + V T2I, T2J, T27, T2f; + T2I = VBYI(VADD(T26, T2p)); + T2J = VADD(T28, T2D); + ST(&(xo[WS(os, 2)]), VADD(T2I, T2J), ovs, &(xo[0])); + ST(&(xo[WS(os, 23)]), VSUB(T2J, T2I), ovs, &(xo[WS(os, 1)])); + T27 = VBYI(VSUB(VADD(T1X, T24), T26)); + T2f = VADD(T28, VADD(T2b, T2e)); + ST(&(xo[WS(os, 3)]), VADD(T27, T2f), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 22)]), VSUB(T2f, T27), ovs, &(xo[0])); + } + T2x = VBYI(VADD(T2o, VADD(T2q, VFNMS(LDK(KP587785252), T2w, VMUL(LDK(KP951056516), T2t))))); + T2F = VFMA(LDK(KP951056516), T2y, VFMA(LDK(KP587785252), T2z, VADD(T2C, T2E))); + ST(&(xo[WS(os, 7)]), VADD(T2x, T2F), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 18)]), VSUB(T2F, T2x), ovs, &(xo[0])); + { + V T2G, T2H, T2g, T2h; + T2G = VBYI(VADD(T2q, VSUB(VFMA(LDK(KP587785252), T2t, VMUL(LDK(KP951056516), T2w)), T2o))); + T2H = VFMA(LDK(KP587785252), T2y, VSUB(VFNMS(LDK(KP951056516), T2z, T2E), T2C)); + ST(&(xo[WS(os, 12)]), VADD(T2G, T2H), ovs, &(xo[0])); + ST(&(xo[WS(os, 13)]), VSUB(T2H, T2G), ovs, &(xo[WS(os, 1)])); + T2g = VFMA(LDK(KP309016994), T2b, VFNMS(LDK(KP809016994), T2e, VFNMS(LDK(KP587785252), VADD(T23, T20), VFNMS(LDK(KP951056516), VADD(T1W, T1T), T28)))); + T2h = VBYI(VSUB(VFNMS(LDK(KP587785252), VADD(T2c, T2d), VFNMS(LDK(KP809016994), T24, VFNMS(LDK(KP951056516), VADD(T29, T2a), VMUL(LDK(KP309016994), T1X)))), T26)); + ST(&(xo[WS(os, 17)]), VSUB(T2g, T2h), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 8)]), VADD(T2g, T2h), ovs, &(xo[0])); + } + } + { + V T1p, T1u, T1w, T1q, T1B, T1C, T1D, T1L, T1M, T1N, T1I, T1J, T1K, T1E, T1F; + V T1G, T1n, T1r, T1s, Tb, T1c, T1v, T1x, T1t, T1y; + T1p = VSUB(TF, Tq); + T1u = VSUB(T1i, T1h); + T1w = VSUB(T1l, T1k); + T1q = VSUB(TV, T1a); + T1B = VFMA(LDK(KP1_688655851), Tp, VMUL(LDK(KP535826794), Tm)); + T1C = VFMA(LDK(KP1_541026485), TE, VMUL(LDK(KP637423989), TB)); + T1D = VSUB(T1B, T1C); + T1L = VFMA(LDK(KP851558583), TU, VMUL(LDK(KP904827052), TR)); + T1M = VFMA(LDK(KP1_984229402), T19, VMUL(LDK(KP125333233), T16)); + T1N = VADD(T1L, T1M); + T1I = VFNMS(LDK(KP844327925), Tm, VMUL(LDK(KP1_071653589), Tp)); + T1J = VFNMS(LDK(KP1_274847979), TE, VMUL(LDK(KP770513242), TB)); + T1K = VADD(T1I, T1J); + T1E = VFNMS(LDK(KP425779291), TR, VMUL(LDK(KP1_809654104), TU)); + T1F = VFNMS(LDK(KP992114701), T16, VMUL(LDK(KP250666467), T19)); + T1G = VADD(T1E, T1F); + { + V T1j, T1m, TG, T1b; + T1j = VADD(T1h, T1i); + T1m = VADD(T1k, T1l); + T1n = VADD(T1j, T1m); + T1r = VFMS(LDK(KP250000000), T1n, T1g); + T1s = VMUL(LDK(KP559016994), VSUB(T1m, T1j)); + Tb = VADD(T7, Ta); + TG = VADD(Tq, TF); + T1b = VADD(TV, T1a); + T1c = VADD(TG, T1b); + T1v = VFNMS(LDK(KP250000000), T1c, Tb); + T1x = VMUL(LDK(KP559016994), VSUB(TG, T1b)); + } + { + V T1d, T1o, T1H, T1O; + T1d = VADD(Tb, T1c); + T1o = VBYI(VADD(T1g, T1n)); + ST(&(xo[WS(os, 1)]), VSUB(T1d, T1o), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 24)]), VADD(T1d, T1o), ovs, &(xo[0])); + T1H = VADD(Tb, VADD(T1D, T1G)); + T1O = VBYI(VADD(T1g, VSUB(T1K, T1N))); + ST(&(xo[WS(os, 21)]), VSUB(T1H, T1O), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VADD(T1H, T1O), ovs, &(xo[0])); + } + T1t = VBYI(VADD(VFMA(LDK(KP587785252), T1p, VMUL(LDK(KP951056516), T1q)), VSUB(T1r, T1s))); + T1y = VFMA(LDK(KP587785252), T1u, VFNMS(LDK(KP951056516), T1w, VSUB(T1v, T1x))); + ST(&(xo[WS(os, 11)]), VADD(T1t, T1y), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 14)]), VSUB(T1y, T1t), ovs, &(xo[0])); + { + V T1z, T1A, T1P, T1Q; + T1z = VBYI(VADD(VFNMS(LDK(KP587785252), T1q, VMUL(LDK(KP951056516), T1p)), VADD(T1r, T1s))); + T1A = VFMA(LDK(KP951056516), T1u, VADD(T1x, VFMA(LDK(KP587785252), T1w, T1v))); + ST(&(xo[WS(os, 6)]), VADD(T1z, T1A), ovs, &(xo[0])); + ST(&(xo[WS(os, 19)]), VSUB(T1A, T1z), ovs, &(xo[WS(os, 1)])); + T1P = VBYI(VADD(T1g, VFMA(LDK(KP309016994), T1K, VFMA(LDK(KP587785252), VSUB(T1F, T1E), VFNMS(LDK(KP951056516), VADD(T1B, T1C), VMUL(LDK(KP809016994), T1N)))))); + T1Q = VFMA(LDK(KP309016994), T1D, VFMA(LDK(KP951056516), VSUB(T1I, T1J), VFMA(LDK(KP587785252), VSUB(T1M, T1L), VFNMS(LDK(KP809016994), T1G, Tb)))); + ST(&(xo[WS(os, 9)]), VADD(T1P, T1Q), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 16)]), VSUB(T1Q, T1P), ovs, &(xo[0])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 25, XSIMD_STRING("n1fv_25"), {146, 62, 78, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_25) (planner *p) { + X(kdft_register) (p, n1fv_25, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 3 -name n1fv_3 -include n1f.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 3 additions, 1 multiplications, 3 fused multiply/add), + * 11 stack variables, 2 constants, and 6 memory accesses + */ +#include "n1f.h" + +static void n1fv_3(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(6, is), MAKE_VOLATILE_STRIDE(6, os)) { + V T1, T2, T3, T6, T4, T5; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T6 = VMUL(LDK(KP866025403), VSUB(T3, T2)); + T4 = VADD(T2, T3); + T5 = VFNMS(LDK(KP500000000), T4, T1); + ST(&(xo[0]), VADD(T1, T4), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VFMAI(T6, T5), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VFNMSI(T6, T5), ovs, &(xo[0])); + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 3, XSIMD_STRING("n1fv_3"), {3, 1, 3, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_3) (planner *p) { + X(kdft_register) (p, n1fv_3, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 3 -name n1fv_3 -include n1f.h */ + +/* + * This function contains 6 FP additions, 2 FP multiplications, + * (or, 5 additions, 1 multiplications, 1 fused multiply/add), + * 11 stack variables, 2 constants, and 6 memory accesses + */ +#include "n1f.h" + +static void n1fv_3(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(6, is), MAKE_VOLATILE_STRIDE(6, os)) { + V T1, T4, T6, T2, T3, T5; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T4 = VADD(T2, T3); + T6 = VBYI(VMUL(LDK(KP866025403), VSUB(T3, T2))); + ST(&(xo[0]), VADD(T1, T4), ovs, &(xo[0])); + T5 = VFNMS(LDK(KP500000000), T4, T1); + ST(&(xo[WS(os, 2)]), VSUB(T5, T6), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VADD(T5, T6), ovs, &(xo[WS(os, 1)])); + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 3, XSIMD_STRING("n1fv_3"), {5, 1, 1, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_3) (planner *p) { + X(kdft_register) (p, n1fv_3, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,696 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:49 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name n1fv_32 -include n1f.h */ + +/* + * This function contains 186 FP additions, 98 FP multiplications, + * (or, 88 additions, 0 multiplications, 98 fused multiply/add), + * 104 stack variables, 7 constants, and 64 memory accesses + */ +#include "n1f.h" + +static void n1fv_32(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(64, is), MAKE_VOLATILE_STRIDE(64, os)) { + V T1h, Tr, T1a, T1k, TI, T1b, T1L, T1P, T1I, T1G, T1O, T1Q, T1H, T1z, T1c; + V TZ; + { + V T2x, T1T, T2K, T1W, T1p, Tb, T1A, T16, Tu, TF, T2N, T2H, T2b, T2t, TY; + V T1w, TT, T1v, T20, T2C, Tj, Te, T2h, To, T2f, T23, T2D, TB, TG, Th; + V T2i, Tk; + { + V TL, TW, TP, TQ, T2F, T27, T28, TO; + { + V T1, T2, T12, T13, T4, T5, T7, T8; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T12 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T13 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + T7 = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + { + V TM, T25, T26, TN; + { + V TJ, T3, T14, T1U, T6, T1V, T9, TK, TU, TV, T1R, T1S, Ta, T15; + TJ = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + T1R = VADD(T1, T2); + T3 = VSUB(T1, T2); + T1S = VADD(T12, T13); + T14 = VSUB(T12, T13); + T1U = VADD(T4, T5); + T6 = VSUB(T4, T5); + T1V = VADD(T7, T8); + T9 = VSUB(T7, T8); + TK = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + TU = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + T2x = VSUB(T1R, T1S); + T1T = VADD(T1R, T1S); + TV = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + TM = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T2K = VSUB(T1V, T1U); + T1W = VADD(T1U, T1V); + Ta = VADD(T6, T9); + T15 = VSUB(T9, T6); + T25 = VADD(TJ, TK); + TL = VSUB(TJ, TK); + T26 = VADD(TV, TU); + TW = VSUB(TU, TV); + TN = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + TP = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + T1p = VFNMS(LDK(KP707106781), Ta, T3); + Tb = VFMA(LDK(KP707106781), Ta, T3); + T1A = VFMA(LDK(KP707106781), T15, T14); + T16 = VFNMS(LDK(KP707106781), T15, T14); + TQ = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + } + T2F = VSUB(T25, T26); + T27 = VADD(T25, T26); + T28 = VADD(TM, TN); + TO = VSUB(TM, TN); + } + } + { + V Ty, T21, Tx, Tz, T1Y, T1Z; + { + V Ts, Tt, TD, T29, TR, TE, Tv, Tw; + Ts = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tt = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + TD = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T29 = VADD(TP, TQ); + TR = VSUB(TP, TQ); + TE = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + Tv = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tw = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + Ty = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + T1Y = VADD(Ts, Tt); + Tu = VSUB(Ts, Tt); + { + V T2G, T2a, TX, TS; + T2G = VSUB(T29, T28); + T2a = VADD(T28, T29); + TX = VSUB(TR, TO); + TS = VADD(TO, TR); + T1Z = VADD(TD, TE); + TF = VSUB(TD, TE); + T21 = VADD(Tv, Tw); + Tx = VSUB(Tv, Tw); + T2N = VFMA(LDK(KP414213562), T2F, T2G); + T2H = VFNMS(LDK(KP414213562), T2G, T2F); + T2b = VSUB(T27, T2a); + T2t = VADD(T27, T2a); + TY = VFMA(LDK(KP707106781), TX, TW); + T1w = VFNMS(LDK(KP707106781), TX, TW); + TT = VFMA(LDK(KP707106781), TS, TL); + T1v = VFNMS(LDK(KP707106781), TS, TL); + Tz = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + } + } + T20 = VADD(T1Y, T1Z); + T2C = VSUB(T1Y, T1Z); + { + V Tc, Td, Tm, Tn; + Tc = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Tm = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + Tn = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + { + V Tf, TA, T22, Tg; + Tf = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + TA = VSUB(Ty, Tz); + T22 = VADD(Ty, Tz); + Tg = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + Tj = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + Te = VSUB(Tc, Td); + T2h = VADD(Tc, Td); + To = VSUB(Tm, Tn); + T2f = VADD(Tn, Tm); + T23 = VADD(T21, T22); + T2D = VSUB(T21, T22); + TB = VADD(Tx, TA); + TG = VSUB(Tx, TA); + Th = VSUB(Tf, Tg); + T2i = VADD(Tf, Tg); + Tk = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + } + } + } + } + { + V T1t, TH, T1s, TC, T2P, T2U, T2n, T2d, T2w, T2u, T1q, T19, T1B, Tq, T2W; + V T2M, T2B, T2T, T2v, T2r, T2o, T2m, T2X, T2I; + { + V T1X, T2p, T2E, T2O, T2s, T2y, T2j, T17, Ti, T2e, Tl, T2c, T2l, T24; + T1X = VSUB(T1T, T1W); + T2p = VADD(T1T, T1W); + T2E = VFNMS(LDK(KP414213562), T2D, T2C); + T2O = VFMA(LDK(KP414213562), T2C, T2D); + T2s = VADD(T20, T23); + T24 = VSUB(T20, T23); + T1t = VFNMS(LDK(KP707106781), TG, TF); + TH = VFMA(LDK(KP707106781), TG, TF); + T1s = VFNMS(LDK(KP707106781), TB, Tu); + TC = VFMA(LDK(KP707106781), TB, Tu); + T2y = VSUB(T2h, T2i); + T2j = VADD(T2h, T2i); + T17 = VFMA(LDK(KP414213562), Te, Th); + Ti = VFNMS(LDK(KP414213562), Th, Te); + T2e = VADD(Tj, Tk); + Tl = VSUB(Tj, Tk); + T2c = VADD(T24, T2b); + T2l = VSUB(T2b, T24); + { + V T2L, T2A, T2q, T2k; + T2P = VSUB(T2N, T2O); + T2U = VADD(T2O, T2N); + { + V T2z, T2g, T18, Tp; + T2z = VSUB(T2e, T2f); + T2g = VADD(T2e, T2f); + T18 = VFMA(LDK(KP414213562), Tl, To); + Tp = VFNMS(LDK(KP414213562), To, Tl); + T2n = VFMA(LDK(KP707106781), T2c, T1X); + T2d = VFNMS(LDK(KP707106781), T2c, T1X); + T2w = VSUB(T2t, T2s); + T2u = VADD(T2s, T2t); + T2L = VSUB(T2z, T2y); + T2A = VADD(T2y, T2z); + T2q = VADD(T2j, T2g); + T2k = VSUB(T2g, T2j); + T1q = VADD(T17, T18); + T19 = VSUB(T17, T18); + T1B = VSUB(Tp, Ti); + Tq = VADD(Ti, Tp); + } + T2W = VFNMS(LDK(KP707106781), T2L, T2K); + T2M = VFMA(LDK(KP707106781), T2L, T2K); + T2B = VFMA(LDK(KP707106781), T2A, T2x); + T2T = VFNMS(LDK(KP707106781), T2A, T2x); + T2v = VSUB(T2p, T2q); + T2r = VADD(T2p, T2q); + T2o = VFMA(LDK(KP707106781), T2l, T2k); + T2m = VFNMS(LDK(KP707106781), T2l, T2k); + T2X = VSUB(T2H, T2E); + T2I = VADD(T2E, T2H); + } + } + { + V T2V, T2Z, T2Y, T30, T2R, T2J; + T2V = VFNMS(LDK(KP923879532), T2U, T2T); + T2Z = VFMA(LDK(KP923879532), T2U, T2T); + ST(&(xo[WS(os, 24)]), VFNMSI(T2w, T2v), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VFMAI(T2w, T2v), ovs, &(xo[0])); + ST(&(xo[0]), VADD(T2r, T2u), ovs, &(xo[0])); + ST(&(xo[WS(os, 16)]), VSUB(T2r, T2u), ovs, &(xo[0])); + ST(&(xo[WS(os, 28)]), VFNMSI(T2o, T2n), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VFMAI(T2o, T2n), ovs, &(xo[0])); + ST(&(xo[WS(os, 20)]), VFMAI(T2m, T2d), ovs, &(xo[0])); + ST(&(xo[WS(os, 12)]), VFNMSI(T2m, T2d), ovs, &(xo[0])); + T2Y = VFMA(LDK(KP923879532), T2X, T2W); + T30 = VFNMS(LDK(KP923879532), T2X, T2W); + T2R = VFMA(LDK(KP923879532), T2I, T2B); + T2J = VFNMS(LDK(KP923879532), T2I, T2B); + { + V T1J, T1r, T1C, T1M, T2S, T2Q, T1u, T1D, T1E, T1x; + T1J = VFNMS(LDK(KP923879532), T1q, T1p); + T1r = VFMA(LDK(KP923879532), T1q, T1p); + T1C = VFMA(LDK(KP923879532), T1B, T1A); + T1M = VFNMS(LDK(KP923879532), T1B, T1A); + ST(&(xo[WS(os, 6)]), VFNMSI(T30, T2Z), ovs, &(xo[0])); + ST(&(xo[WS(os, 26)]), VFMAI(T30, T2Z), ovs, &(xo[0])); + ST(&(xo[WS(os, 22)]), VFNMSI(T2Y, T2V), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VFMAI(T2Y, T2V), ovs, &(xo[0])); + T2S = VFMA(LDK(KP923879532), T2P, T2M); + T2Q = VFNMS(LDK(KP923879532), T2P, T2M); + T1u = VFMA(LDK(KP668178637), T1t, T1s); + T1D = VFNMS(LDK(KP668178637), T1s, T1t); + T1E = VFNMS(LDK(KP668178637), T1v, T1w); + T1x = VFMA(LDK(KP668178637), T1w, T1v); + { + V T1K, T1F, T1N, T1y; + T1h = VFNMS(LDK(KP923879532), Tq, Tb); + Tr = VFMA(LDK(KP923879532), Tq, Tb); + ST(&(xo[WS(os, 30)]), VFNMSI(T2S, T2R), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFMAI(T2S, T2R), ovs, &(xo[0])); + ST(&(xo[WS(os, 18)]), VFMAI(T2Q, T2J), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VFNMSI(T2Q, T2J), ovs, &(xo[0])); + T1K = VADD(T1D, T1E); + T1F = VSUB(T1D, T1E); + T1N = VSUB(T1x, T1u); + T1y = VADD(T1u, T1x); + T1a = VFMA(LDK(KP923879532), T19, T16); + T1k = VFNMS(LDK(KP923879532), T19, T16); + TI = VFNMS(LDK(KP198912367), TH, TC); + T1b = VFMA(LDK(KP198912367), TC, TH); + T1L = VFMA(LDK(KP831469612), T1K, T1J); + T1P = VFNMS(LDK(KP831469612), T1K, T1J); + T1I = VFMA(LDK(KP831469612), T1F, T1C); + T1G = VFNMS(LDK(KP831469612), T1F, T1C); + T1O = VFMA(LDK(KP831469612), T1N, T1M); + T1Q = VFNMS(LDK(KP831469612), T1N, T1M); + T1H = VFMA(LDK(KP831469612), T1y, T1r); + T1z = VFNMS(LDK(KP831469612), T1y, T1r); + T1c = VFMA(LDK(KP198912367), TT, TY); + TZ = VFNMS(LDK(KP198912367), TY, TT); + } + } + } + } + } + { + V T1d, T1i, T10, T1l; + ST(&(xo[WS(os, 21)]), VFNMSI(T1O, T1L), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VFMAI(T1O, T1L), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 27)]), VFMAI(T1Q, T1P), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VFNMSI(T1Q, T1P), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VFMAI(T1I, T1H), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 29)]), VFNMSI(T1I, T1H), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 19)]), VFMAI(T1G, T1z), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 13)]), VFNMSI(T1G, T1z), ovs, &(xo[WS(os, 1)])); + T1d = VSUB(T1b, T1c); + T1i = VADD(T1b, T1c); + T10 = VADD(TI, TZ); + T1l = VSUB(TZ, TI); + { + V T1n, T1j, T1e, T1g, T1o, T1m, T11, T1f; + T1n = VFMA(LDK(KP980785280), T1i, T1h); + T1j = VFNMS(LDK(KP980785280), T1i, T1h); + T1e = VFNMS(LDK(KP980785280), T1d, T1a); + T1g = VFMA(LDK(KP980785280), T1d, T1a); + T1o = VFMA(LDK(KP980785280), T1l, T1k); + T1m = VFNMS(LDK(KP980785280), T1l, T1k); + T11 = VFNMS(LDK(KP980785280), T10, Tr); + T1f = VFMA(LDK(KP980785280), T10, Tr); + ST(&(xo[WS(os, 23)]), VFMAI(T1m, T1j), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VFNMSI(T1m, T1j), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 25)]), VFNMSI(T1o, T1n), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFMAI(T1o, T1n), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 31)]), VFMAI(T1g, T1f), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFNMSI(T1g, T1f), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 15)]), VFMAI(T1e, T11), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 17)]), VFNMSI(T1e, T11), ovs, &(xo[WS(os, 1)])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 32, XSIMD_STRING("n1fv_32"), {88, 0, 98, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_32) (planner *p) { + X(kdft_register) (p, n1fv_32, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name n1fv_32 -include n1f.h */ + +/* + * This function contains 186 FP additions, 42 FP multiplications, + * (or, 170 additions, 26 multiplications, 16 fused multiply/add), + * 58 stack variables, 7 constants, and 64 memory accesses + */ +#include "n1f.h" + +static void n1fv_32(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(64, is), MAKE_VOLATILE_STRIDE(64, os)) { + V T1T, T1W, T2K, T2x, T16, T1A, Tb, T1p, TT, T1v, TY, T1w, T27, T2a, T2b; + V T2H, T2O, TC, T1s, TH, T1t, T20, T23, T24, T2E, T2N, T2g, T2j, Tq, T1B; + V T19, T1q, T2A, T2L; + { + V T3, T1R, T15, T1S, T6, T1U, T9, T1V, T12, Ta; + { + V T1, T2, T13, T14; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T3 = VSUB(T1, T2); + T1R = VADD(T1, T2); + T13 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T14 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T15 = VSUB(T13, T14); + T1S = VADD(T13, T14); + } + { + V T4, T5, T7, T8; + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + T6 = VSUB(T4, T5); + T1U = VADD(T4, T5); + T7 = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T9 = VSUB(T7, T8); + T1V = VADD(T7, T8); + } + T1T = VADD(T1R, T1S); + T1W = VADD(T1U, T1V); + T2K = VSUB(T1V, T1U); + T2x = VSUB(T1R, T1S); + T12 = VMUL(LDK(KP707106781), VSUB(T9, T6)); + T16 = VSUB(T12, T15); + T1A = VADD(T15, T12); + Ta = VMUL(LDK(KP707106781), VADD(T6, T9)); + Tb = VADD(T3, Ta); + T1p = VSUB(T3, Ta); + } + { + V TL, T25, TX, T26, TO, T28, TR, T29; + { + V TJ, TK, TV, TW; + TJ = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + TK = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + TL = VSUB(TJ, TK); + T25 = VADD(TJ, TK); + TV = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + TW = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + TX = VSUB(TV, TW); + T26 = VADD(TV, TW); + } + { + V TM, TN, TP, TQ; + TM = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + TN = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + TO = VSUB(TM, TN); + T28 = VADD(TM, TN); + TP = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + TQ = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + TR = VSUB(TP, TQ); + T29 = VADD(TP, TQ); + } + { + V TS, TU, T2F, T2G; + TS = VMUL(LDK(KP707106781), VADD(TO, TR)); + TT = VADD(TL, TS); + T1v = VSUB(TL, TS); + TU = VMUL(LDK(KP707106781), VSUB(TR, TO)); + TY = VSUB(TU, TX); + T1w = VADD(TX, TU); + T27 = VADD(T25, T26); + T2a = VADD(T28, T29); + T2b = VSUB(T27, T2a); + T2F = VSUB(T25, T26); + T2G = VSUB(T29, T28); + T2H = VFNMS(LDK(KP382683432), T2G, VMUL(LDK(KP923879532), T2F)); + T2O = VFMA(LDK(KP382683432), T2F, VMUL(LDK(KP923879532), T2G)); + } + } + { + V Tu, T1Y, TG, T1Z, Tx, T21, TA, T22; + { + V Ts, Tt, TE, TF; + Ts = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tt = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + Tu = VSUB(Ts, Tt); + T1Y = VADD(Ts, Tt); + TE = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + TF = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + TG = VSUB(TE, TF); + T1Z = VADD(TE, TF); + } + { + V Tv, Tw, Ty, Tz; + Tv = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tw = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + Tx = VSUB(Tv, Tw); + T21 = VADD(Tv, Tw); + Ty = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + Tz = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + TA = VSUB(Ty, Tz); + T22 = VADD(Ty, Tz); + } + { + V TB, TD, T2C, T2D; + TB = VMUL(LDK(KP707106781), VADD(Tx, TA)); + TC = VADD(Tu, TB); + T1s = VSUB(Tu, TB); + TD = VMUL(LDK(KP707106781), VSUB(TA, Tx)); + TH = VSUB(TD, TG); + T1t = VADD(TG, TD); + T20 = VADD(T1Y, T1Z); + T23 = VADD(T21, T22); + T24 = VSUB(T20, T23); + T2C = VSUB(T1Y, T1Z); + T2D = VSUB(T22, T21); + T2E = VFMA(LDK(KP923879532), T2C, VMUL(LDK(KP382683432), T2D)); + T2N = VFNMS(LDK(KP382683432), T2C, VMUL(LDK(KP923879532), T2D)); + } + } + { + V Te, T2h, To, T2f, Th, T2i, Tl, T2e, Ti, Tp; + { + V Tc, Td, Tm, Tn; + Tc = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Te = VSUB(Tc, Td); + T2h = VADD(Tc, Td); + Tm = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tn = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + To = VSUB(Tm, Tn); + T2f = VADD(Tm, Tn); + } + { + V Tf, Tg, Tj, Tk; + Tf = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + Th = VSUB(Tf, Tg); + T2i = VADD(Tf, Tg); + Tj = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + Tl = VSUB(Tj, Tk); + T2e = VADD(Tj, Tk); + } + T2g = VADD(T2e, T2f); + T2j = VADD(T2h, T2i); + Ti = VFNMS(LDK(KP382683432), Th, VMUL(LDK(KP923879532), Te)); + Tp = VFMA(LDK(KP923879532), Tl, VMUL(LDK(KP382683432), To)); + Tq = VADD(Ti, Tp); + T1B = VSUB(Tp, Ti); + { + V T17, T18, T2y, T2z; + T17 = VFNMS(LDK(KP923879532), To, VMUL(LDK(KP382683432), Tl)); + T18 = VFMA(LDK(KP382683432), Te, VMUL(LDK(KP923879532), Th)); + T19 = VSUB(T17, T18); + T1q = VADD(T18, T17); + T2y = VSUB(T2h, T2i); + T2z = VSUB(T2e, T2f); + T2A = VMUL(LDK(KP707106781), VADD(T2y, T2z)); + T2L = VMUL(LDK(KP707106781), VSUB(T2z, T2y)); + } + } + { + V T2d, T2n, T2m, T2o; + { + V T1X, T2c, T2k, T2l; + T1X = VSUB(T1T, T1W); + T2c = VMUL(LDK(KP707106781), VADD(T24, T2b)); + T2d = VADD(T1X, T2c); + T2n = VSUB(T1X, T2c); + T2k = VSUB(T2g, T2j); + T2l = VMUL(LDK(KP707106781), VSUB(T2b, T24)); + T2m = VBYI(VADD(T2k, T2l)); + T2o = VBYI(VSUB(T2l, T2k)); + } + ST(&(xo[WS(os, 28)]), VSUB(T2d, T2m), ovs, &(xo[0])); + ST(&(xo[WS(os, 12)]), VADD(T2n, T2o), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VADD(T2d, T2m), ovs, &(xo[0])); + ST(&(xo[WS(os, 20)]), VSUB(T2n, T2o), ovs, &(xo[0])); + } + { + V T2r, T2v, T2u, T2w; + { + V T2p, T2q, T2s, T2t; + T2p = VADD(T1T, T1W); + T2q = VADD(T2j, T2g); + T2r = VADD(T2p, T2q); + T2v = VSUB(T2p, T2q); + T2s = VADD(T20, T23); + T2t = VADD(T27, T2a); + T2u = VADD(T2s, T2t); + T2w = VBYI(VSUB(T2t, T2s)); + } + ST(&(xo[WS(os, 16)]), VSUB(T2r, T2u), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VADD(T2v, T2w), ovs, &(xo[0])); + ST(&(xo[0]), VADD(T2r, T2u), ovs, &(xo[0])); + ST(&(xo[WS(os, 24)]), VSUB(T2v, T2w), ovs, &(xo[0])); + } + { + V T2V, T2Z, T2Y, T30; + { + V T2T, T2U, T2W, T2X; + T2T = VSUB(T2H, T2E); + T2U = VSUB(T2L, T2K); + T2V = VBYI(VSUB(T2T, T2U)); + T2Z = VBYI(VADD(T2U, T2T)); + T2W = VSUB(T2x, T2A); + T2X = VSUB(T2O, T2N); + T2Y = VSUB(T2W, T2X); + T30 = VADD(T2W, T2X); + } + ST(&(xo[WS(os, 10)]), VADD(T2V, T2Y), ovs, &(xo[0])); + ST(&(xo[WS(os, 26)]), VSUB(T30, T2Z), ovs, &(xo[0])); + ST(&(xo[WS(os, 22)]), VSUB(T2Y, T2V), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VADD(T2Z, T30), ovs, &(xo[0])); + } + { + V T2J, T2R, T2Q, T2S; + { + V T2B, T2I, T2M, T2P; + T2B = VADD(T2x, T2A); + T2I = VADD(T2E, T2H); + T2J = VADD(T2B, T2I); + T2R = VSUB(T2B, T2I); + T2M = VADD(T2K, T2L); + T2P = VADD(T2N, T2O); + T2Q = VBYI(VADD(T2M, T2P)); + T2S = VBYI(VSUB(T2P, T2M)); + } + ST(&(xo[WS(os, 30)]), VSUB(T2J, T2Q), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VADD(T2R, T2S), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VADD(T2J, T2Q), ovs, &(xo[0])); + ST(&(xo[WS(os, 18)]), VSUB(T2R, T2S), ovs, &(xo[0])); + } + { + V T1r, T1C, T1M, T1K, T1F, T1N, T1y, T1J; + T1r = VADD(T1p, T1q); + T1C = VADD(T1A, T1B); + T1M = VSUB(T1p, T1q); + T1K = VSUB(T1B, T1A); + { + V T1D, T1E, T1u, T1x; + T1D = VFNMS(LDK(KP555570233), T1s, VMUL(LDK(KP831469612), T1t)); + T1E = VFMA(LDK(KP555570233), T1v, VMUL(LDK(KP831469612), T1w)); + T1F = VADD(T1D, T1E); + T1N = VSUB(T1E, T1D); + T1u = VFMA(LDK(KP831469612), T1s, VMUL(LDK(KP555570233), T1t)); + T1x = VFNMS(LDK(KP555570233), T1w, VMUL(LDK(KP831469612), T1v)); + T1y = VADD(T1u, T1x); + T1J = VSUB(T1x, T1u); + } + { + V T1z, T1G, T1P, T1Q; + T1z = VADD(T1r, T1y); + T1G = VBYI(VADD(T1C, T1F)); + ST(&(xo[WS(os, 29)]), VSUB(T1z, T1G), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VADD(T1z, T1G), ovs, &(xo[WS(os, 1)])); + T1P = VBYI(VADD(T1K, T1J)); + T1Q = VADD(T1M, T1N); + ST(&(xo[WS(os, 5)]), VADD(T1P, T1Q), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 27)]), VSUB(T1Q, T1P), ovs, &(xo[WS(os, 1)])); + } + { + V T1H, T1I, T1L, T1O; + T1H = VSUB(T1r, T1y); + T1I = VBYI(VSUB(T1F, T1C)); + ST(&(xo[WS(os, 19)]), VSUB(T1H, T1I), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 13)]), VADD(T1H, T1I), ovs, &(xo[WS(os, 1)])); + T1L = VBYI(VSUB(T1J, T1K)); + T1O = VSUB(T1M, T1N); + ST(&(xo[WS(os, 11)]), VADD(T1L, T1O), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 21)]), VSUB(T1O, T1L), ovs, &(xo[WS(os, 1)])); + } + } + { + V Tr, T1a, T1k, T1i, T1d, T1l, T10, T1h; + Tr = VADD(Tb, Tq); + T1a = VADD(T16, T19); + T1k = VSUB(Tb, Tq); + T1i = VSUB(T19, T16); + { + V T1b, T1c, TI, TZ; + T1b = VFNMS(LDK(KP195090322), TC, VMUL(LDK(KP980785280), TH)); + T1c = VFMA(LDK(KP195090322), TT, VMUL(LDK(KP980785280), TY)); + T1d = VADD(T1b, T1c); + T1l = VSUB(T1c, T1b); + TI = VFMA(LDK(KP980785280), TC, VMUL(LDK(KP195090322), TH)); + TZ = VFNMS(LDK(KP195090322), TY, VMUL(LDK(KP980785280), TT)); + T10 = VADD(TI, TZ); + T1h = VSUB(TZ, TI); + } + { + V T11, T1e, T1n, T1o; + T11 = VADD(Tr, T10); + T1e = VBYI(VADD(T1a, T1d)); + ST(&(xo[WS(os, 31)]), VSUB(T11, T1e), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VADD(T11, T1e), ovs, &(xo[WS(os, 1)])); + T1n = VBYI(VADD(T1i, T1h)); + T1o = VADD(T1k, T1l); + ST(&(xo[WS(os, 7)]), VADD(T1n, T1o), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 25)]), VSUB(T1o, T1n), ovs, &(xo[WS(os, 1)])); + } + { + V T1f, T1g, T1j, T1m; + T1f = VSUB(Tr, T10); + T1g = VBYI(VSUB(T1d, T1a)); + ST(&(xo[WS(os, 17)]), VSUB(T1f, T1g), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 15)]), VADD(T1f, T1g), ovs, &(xo[WS(os, 1)])); + T1j = VBYI(VSUB(T1h, T1i)); + T1m = VSUB(T1k, T1l); + ST(&(xo[WS(os, 9)]), VADD(T1j, T1m), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 23)]), VSUB(T1m, T1j), ovs, &(xo[WS(os, 1)])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 32, XSIMD_STRING("n1fv_32"), {170, 26, 16, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_32) (planner *p) { + X(kdft_register) (p, n1fv_32, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name n1fv_4 -include n1f.h */ + +/* + * This function contains 8 FP additions, 2 FP multiplications, + * (or, 6 additions, 0 multiplications, 2 fused multiply/add), + * 11 stack variables, 0 constants, and 8 memory accesses + */ +#include "n1f.h" + +static void n1fv_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(8, is), MAKE_VOLATILE_STRIDE(8, os)) { + V T1, T2, T4, T5; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + { + V T3, T7, T6, T8; + T3 = VSUB(T1, T2); + T7 = VADD(T1, T2); + T6 = VSUB(T4, T5); + T8 = VADD(T4, T5); + ST(&(xo[WS(os, 2)]), VSUB(T7, T8), ovs, &(xo[0])); + ST(&(xo[0]), VADD(T7, T8), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VFMAI(T6, T3), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFNMSI(T6, T3), ovs, &(xo[WS(os, 1)])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 4, XSIMD_STRING("n1fv_4"), {6, 0, 2, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_4) (planner *p) { + X(kdft_register) (p, n1fv_4, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name n1fv_4 -include n1f.h */ + +/* + * This function contains 8 FP additions, 0 FP multiplications, + * (or, 8 additions, 0 multiplications, 0 fused multiply/add), + * 11 stack variables, 0 constants, and 8 memory accesses + */ +#include "n1f.h" + +static void n1fv_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(8, is), MAKE_VOLATILE_STRIDE(8, os)) { + V T3, T7, T6, T8; + { + V T1, T2, T4, T5; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T3 = VSUB(T1, T2); + T7 = VADD(T1, T2); + T4 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T6 = VBYI(VSUB(T4, T5)); + T8 = VADD(T4, T5); + } + ST(&(xo[WS(os, 1)]), VSUB(T3, T6), ovs, &(xo[WS(os, 1)])); + ST(&(xo[0]), VADD(T7, T8), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VADD(T3, T6), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VSUB(T7, T8), ovs, &(xo[0])); + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 4, XSIMD_STRING("n1fv_4"), {8, 0, 0, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_4) (planner *p) { + X(kdft_register) (p, n1fv_4, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name n1fv_5 -include n1f.h */ + +/* + * This function contains 16 FP additions, 11 FP multiplications, + * (or, 7 additions, 2 multiplications, 9 fused multiply/add), + * 23 stack variables, 4 constants, and 10 memory accesses + */ +#include "n1f.h" + +static void n1fv_5(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(10, is), MAKE_VOLATILE_STRIDE(10, os)) { + V T1, T2, T3, T5, T6; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + { + V Tc, T4, Td, T7; + Tc = VSUB(T2, T3); + T4 = VADD(T2, T3); + Td = VSUB(T5, T6); + T7 = VADD(T5, T6); + { + V Tg, Te, Ta, T8, T9, Tf, Tb; + Tg = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tc, Td)); + Te = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Td, Tc)); + Ta = VSUB(T4, T7); + T8 = VADD(T4, T7); + T9 = VFNMS(LDK(KP250000000), T8, T1); + ST(&(xo[0]), VADD(T1, T8), ovs, &(xo[0])); + Tf = VFNMS(LDK(KP559016994), Ta, T9); + Tb = VFMA(LDK(KP559016994), Ta, T9); + ST(&(xo[WS(os, 2)]), VFMAI(Tg, Tf), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VFNMSI(Tg, Tf), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VFMAI(Te, Tb), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VFNMSI(Te, Tb), ovs, &(xo[WS(os, 1)])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 5, XSIMD_STRING("n1fv_5"), {7, 2, 9, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_5) (planner *p) { + X(kdft_register) (p, n1fv_5, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name n1fv_5 -include n1f.h */ + +/* + * This function contains 16 FP additions, 6 FP multiplications, + * (or, 13 additions, 3 multiplications, 3 fused multiply/add), + * 18 stack variables, 4 constants, and 10 memory accesses + */ +#include "n1f.h" + +static void n1fv_5(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(10, is), MAKE_VOLATILE_STRIDE(10, os)) { + V T8, T7, Td, T9, Tc; + T8 = LD(&(xi[0]), ivs, &(xi[0])); + { + V T1, T2, T3, T4, T5, T6; + T1 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T2 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T3 = VADD(T1, T2); + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T6 = VADD(T4, T5); + T7 = VMUL(LDK(KP559016994), VSUB(T3, T6)); + Td = VSUB(T4, T5); + T9 = VADD(T3, T6); + Tc = VSUB(T1, T2); + } + ST(&(xo[0]), VADD(T8, T9), ovs, &(xo[0])); + { + V Te, Tf, Tb, Tg, Ta; + Te = VBYI(VFMA(LDK(KP951056516), Tc, VMUL(LDK(KP587785252), Td))); + Tf = VBYI(VFNMS(LDK(KP587785252), Tc, VMUL(LDK(KP951056516), Td))); + Ta = VFNMS(LDK(KP250000000), T9, T8); + Tb = VADD(T7, Ta); + Tg = VSUB(Ta, T7); + ST(&(xo[WS(os, 1)]), VSUB(Tb, Te), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VSUB(Tg, Tf), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VADD(Te, Tb), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VADD(Tf, Tg), ovs, &(xo[0])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 5, XSIMD_STRING("n1fv_5"), {13, 3, 3, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_5) (planner *p) { + X(kdft_register) (p, n1fv_5, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 6 -name n1fv_6 -include n1f.h */ + +/* + * This function contains 18 FP additions, 8 FP multiplications, + * (or, 12 additions, 2 multiplications, 6 fused multiply/add), + * 23 stack variables, 2 constants, and 12 memory accesses + */ +#include "n1f.h" + +static void n1fv_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(12, is), MAKE_VOLATILE_STRIDE(12, os)) { + V T1, T2, T4, T5, T7, T8; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + { + V T3, Td, T6, Te, T9, Tf; + T3 = VSUB(T1, T2); + Td = VADD(T1, T2); + T6 = VSUB(T4, T5); + Te = VADD(T4, T5); + T9 = VSUB(T7, T8); + Tf = VADD(T7, T8); + { + V Tg, Ti, Ta, Tc, Th, Tb; + Tg = VADD(Te, Tf); + Ti = VMUL(LDK(KP866025403), VSUB(Tf, Te)); + Ta = VADD(T6, T9); + Tc = VMUL(LDK(KP866025403), VSUB(T9, T6)); + Th = VFNMS(LDK(KP500000000), Tg, Td); + ST(&(xo[0]), VADD(Td, Tg), ovs, &(xo[0])); + Tb = VFNMS(LDK(KP500000000), Ta, T3); + ST(&(xo[WS(os, 3)]), VADD(T3, Ta), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VFMAI(Ti, Th), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFNMSI(Ti, Th), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VFMAI(Tc, Tb), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VFNMSI(Tc, Tb), ovs, &(xo[WS(os, 1)])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 6, XSIMD_STRING("n1fv_6"), {12, 2, 6, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_6) (planner *p) { + X(kdft_register) (p, n1fv_6, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 6 -name n1fv_6 -include n1f.h */ + +/* + * This function contains 18 FP additions, 4 FP multiplications, + * (or, 16 additions, 2 multiplications, 2 fused multiply/add), + * 19 stack variables, 2 constants, and 12 memory accesses + */ +#include "n1f.h" + +static void n1fv_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(12, is), MAKE_VOLATILE_STRIDE(12, os)) { + V T3, Td, T6, Te, T9, Tf, Ta, Tg, T1, T2; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + Td = VADD(T1, T2); + { + V T4, T5, T7, T8; + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + Te = VADD(T4, T5); + T7 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T9 = VSUB(T7, T8); + Tf = VADD(T7, T8); + } + Ta = VADD(T6, T9); + Tg = VADD(Te, Tf); + ST(&(xo[WS(os, 3)]), VADD(T3, Ta), ovs, &(xo[WS(os, 1)])); + ST(&(xo[0]), VADD(Td, Tg), ovs, &(xo[0])); + { + V Tb, Tc, Th, Ti; + Tb = VFNMS(LDK(KP500000000), Ta, T3); + Tc = VBYI(VMUL(LDK(KP866025403), VSUB(T9, T6))); + ST(&(xo[WS(os, 5)]), VSUB(Tb, Tc), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VADD(Tb, Tc), ovs, &(xo[WS(os, 1)])); + Th = VFNMS(LDK(KP500000000), Tg, Td); + Ti = VBYI(VMUL(LDK(KP866025403), VSUB(Tf, Te))); + ST(&(xo[WS(os, 2)]), VSUB(Th, Ti), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VADD(Th, Ti), ovs, &(xo[0])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 6, XSIMD_STRING("n1fv_6"), {16, 2, 2, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_6) (planner *p) { + X(kdft_register) (p, n1fv_6, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1568 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:49 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name n1fv_64 -include n1f.h */ + +/* + * This function contains 456 FP additions, 258 FP multiplications, + * (or, 198 additions, 0 multiplications, 258 fused multiply/add), + * 168 stack variables, 15 constants, and 128 memory accesses + */ +#include "n1f.h" + +static void n1fv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP820678790, +0.820678790828660330972281985331011598767386482); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP534511135, +0.534511135950791641089685961295362908582039528); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP098491403, +0.098491403357164253077197521291327432293052451); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP303346683, +0.303346683607342391675883946941299872384187453); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(128, is), MAKE_VOLATILE_STRIDE(128, os)) { + V T5T, T5S, T5X, T65, T5Z, T5R, T67, T63, T5U, T64; + { + V T7, T26, T5k, T6A, T47, T69, T2V, T3z, T6B, T4e, T6a, T5n, T3M, T2Y, T27; + V Tm, T3A, T3l, T2a, TC, T5p, T4o, T6E, T6e, T3i, T3B, TR, T29, T4x, T5q; + V T6h, T6D, T39, T3H, T3I, T3c, T5N, T57, T72, T6w, T5O, T5e, T71, T6t, T2y; + V T1W, T2x, T1N, T33, T34, T3E, T32, T1p, T2v, T1g, T2u, T4M, T5K, T6p, T6Z; + V T6m, T6Y, T5L, T4T; + { + V T4g, T4l, T3j, Tu, Tx, T4h, TA, T4i; + { + V T1, T2, T23, T24, T4, T5, T20, T21; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 32)]), ivs, &(xi[0])); + T23 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T24 = LD(&(xi[WS(is, 40)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 48)]), ivs, &(xi[0])); + T20 = LD(&(xi[WS(is, 56)]), ivs, &(xi[0])); + T21 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + { + V Ta, T48, Tk, T4c, T49, Td, Tf, Tg; + { + V T8, T43, T3, T44, T25, T5i, T6, T45, T22, T9, Ti, Tj, Tb, Tc; + T8 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T43 = VSUB(T1, T2); + T3 = VADD(T1, T2); + T44 = VSUB(T23, T24); + T25 = VADD(T23, T24); + T5i = VSUB(T4, T5); + T6 = VADD(T4, T5); + T45 = VSUB(T20, T21); + T22 = VADD(T20, T21); + T9 = LD(&(xi[WS(is, 36)]), ivs, &(xi[0])); + Ti = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Tj = LD(&(xi[WS(is, 44)]), ivs, &(xi[0])); + Tb = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 52)]), ivs, &(xi[0])); + { + V T2T, T46, T5j, T2U; + T7 = VSUB(T3, T6); + T2T = VADD(T3, T6); + T46 = VADD(T44, T45); + T5j = VSUB(T45, T44); + T26 = VSUB(T22, T25); + T2U = VADD(T25, T22); + Ta = VADD(T8, T9); + T48 = VSUB(T8, T9); + Tk = VADD(Ti, Tj); + T4c = VSUB(Tj, Ti); + T5k = VFNMS(LDK(KP707106781), T5j, T5i); + T6A = VFMA(LDK(KP707106781), T5j, T5i); + T47 = VFMA(LDK(KP707106781), T46, T43); + T69 = VFNMS(LDK(KP707106781), T46, T43); + T2V = VADD(T2T, T2U); + T3z = VSUB(T2T, T2U); + T49 = VSUB(Tb, Tc); + Td = VADD(Tb, Tc); + } + Tf = LD(&(xi[WS(is, 60)]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + } + { + V Te, T2W, T5l, T4a, Tq, Tt, Tv, Tw, T5m, T4d, Tl, T2X, Ty, Tz, To; + V Tp; + To = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tp = LD(&(xi[WS(is, 34)]), ivs, &(xi[0])); + { + V Th, T4b, Tr, Ts; + Tr = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Ts = LD(&(xi[WS(is, 50)]), ivs, &(xi[0])); + Te = VSUB(Ta, Td); + T2W = VADD(Ta, Td); + T5l = VFMA(LDK(KP414213562), T48, T49); + T4a = VFNMS(LDK(KP414213562), T49, T48); + Th = VADD(Tf, Tg); + T4b = VSUB(Tf, Tg); + Tq = VADD(To, Tp); + T4g = VSUB(To, Tp); + T4l = VSUB(Tr, Ts); + Tt = VADD(Tr, Ts); + Tv = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tw = LD(&(xi[WS(is, 42)]), ivs, &(xi[0])); + T5m = VFMA(LDK(KP414213562), T4b, T4c); + T4d = VFNMS(LDK(KP414213562), T4c, T4b); + Tl = VSUB(Th, Tk); + T2X = VADD(Th, Tk); + Ty = LD(&(xi[WS(is, 58)]), ivs, &(xi[0])); + Tz = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + } + T3j = VADD(Tq, Tt); + Tu = VSUB(Tq, Tt); + Tx = VADD(Tv, Tw); + T4h = VSUB(Tv, Tw); + T6B = VSUB(T4d, T4a); + T4e = VADD(T4a, T4d); + T6a = VADD(T5l, T5m); + T5n = VSUB(T5l, T5m); + T3M = VSUB(T2X, T2W); + T2Y = VADD(T2W, T2X); + T27 = VSUB(Tl, Te); + Tm = VADD(Te, Tl); + TA = VADD(Ty, Tz); + T4i = VSUB(Ty, Tz); + } + } + } + { + V TK, T4p, T4u, T4k, T6d, T4n, T6c, TL, TN, TO, T3g, TJ, TF, TI; + { + V TD, TE, TG, TH; + TD = LD(&(xi[WS(is, 62)]), ivs, &(xi[0])); + TE = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + TG = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + TH = LD(&(xi[WS(is, 46)]), ivs, &(xi[0])); + TK = LD(&(xi[WS(is, 54)]), ivs, &(xi[0])); + { + V T3k, TB, T4j, T4m; + T3k = VADD(Tx, TA); + TB = VSUB(Tx, TA); + T4j = VADD(T4h, T4i); + T4m = VSUB(T4h, T4i); + T4p = VSUB(TD, TE); + TF = VADD(TD, TE); + T4u = VSUB(TH, TG); + TI = VADD(TG, TH); + T3A = VSUB(T3j, T3k); + T3l = VADD(T3j, T3k); + T2a = VFMA(LDK(KP414213562), Tu, TB); + TC = VFNMS(LDK(KP414213562), TB, Tu); + T4k = VFMA(LDK(KP707106781), T4j, T4g); + T6d = VFNMS(LDK(KP707106781), T4j, T4g); + T4n = VFMA(LDK(KP707106781), T4m, T4l); + T6c = VFNMS(LDK(KP707106781), T4m, T4l); + TL = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + } + TN = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + TO = LD(&(xi[WS(is, 38)]), ivs, &(xi[0])); + } + T3g = VADD(TF, TI); + TJ = VSUB(TF, TI); + { + V T3a, T1E, T52, T5b, T1x, T4Z, T6r, T6u, T5a, T1U, T55, T5c, T1L, T3b; + { + V T4V, T1t, T58, T1w, T1Q, T1T, T1I, T4Y, T59, T1J, T53, T1H; + { + V T1r, TM, T4r, TP, T4q, T1s, T1u, T1v; + T1r = LD(&(xi[WS(is, 63)]), ivs, &(xi[WS(is, 1)])); + T5p = VFMA(LDK(KP198912367), T4k, T4n); + T4o = VFNMS(LDK(KP198912367), T4n, T4k); + T6E = VFMA(LDK(KP668178637), T6c, T6d); + T6e = VFNMS(LDK(KP668178637), T6d, T6c); + TM = VADD(TK, TL); + T4r = VSUB(TK, TL); + TP = VADD(TN, TO); + T4q = VSUB(TN, TO); + T1s = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + T1u = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T1v = LD(&(xi[WS(is, 47)]), ivs, &(xi[WS(is, 1)])); + { + V T1R, T4X, T6g, T4t, T6f, T4w, T1S, T1O, T1P; + T1O = LD(&(xi[WS(is, 55)]), ivs, &(xi[WS(is, 1)])); + T1P = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + T1R = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + { + V T3h, TQ, T4s, T4v; + T3h = VADD(TP, TM); + TQ = VSUB(TM, TP); + T4s = VADD(T4q, T4r); + T4v = VSUB(T4r, T4q); + T4V = VSUB(T1r, T1s); + T1t = VADD(T1r, T1s); + T58 = VSUB(T1v, T1u); + T1w = VADD(T1u, T1v); + T4X = VSUB(T1O, T1P); + T1Q = VADD(T1O, T1P); + T3i = VADD(T3g, T3h); + T3B = VSUB(T3g, T3h); + TR = VFNMS(LDK(KP414213562), TQ, TJ); + T29 = VFMA(LDK(KP414213562), TJ, TQ); + T6g = VFNMS(LDK(KP707106781), T4s, T4p); + T4t = VFMA(LDK(KP707106781), T4s, T4p); + T6f = VFNMS(LDK(KP707106781), T4v, T4u); + T4w = VFMA(LDK(KP707106781), T4v, T4u); + T1S = LD(&(xi[WS(is, 39)]), ivs, &(xi[WS(is, 1)])); + } + { + V T4W, T1A, T50, T51, T1D, T1F, T1G; + { + V T1y, T1z, T1B, T1C; + T1y = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T1z = LD(&(xi[WS(is, 35)]), ivs, &(xi[WS(is, 1)])); + T1B = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + T1C = LD(&(xi[WS(is, 51)]), ivs, &(xi[WS(is, 1)])); + T4x = VFNMS(LDK(KP198912367), T4w, T4t); + T5q = VFMA(LDK(KP198912367), T4t, T4w); + T6h = VFNMS(LDK(KP668178637), T6g, T6f); + T6D = VFMA(LDK(KP668178637), T6f, T6g); + T4W = VSUB(T1R, T1S); + T1T = VADD(T1R, T1S); + T1A = VADD(T1y, T1z); + T50 = VSUB(T1y, T1z); + T51 = VSUB(T1C, T1B); + T1D = VADD(T1B, T1C); + } + T1F = LD(&(xi[WS(is, 59)]), ivs, &(xi[WS(is, 1)])); + T1G = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + T1I = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + T4Y = VADD(T4W, T4X); + T59 = VSUB(T4X, T4W); + T1J = LD(&(xi[WS(is, 43)]), ivs, &(xi[WS(is, 1)])); + T3a = VADD(T1A, T1D); + T1E = VSUB(T1A, T1D); + T52 = VFMA(LDK(KP414213562), T51, T50); + T5b = VFNMS(LDK(KP414213562), T50, T51); + T53 = VSUB(T1F, T1G); + T1H = VADD(T1F, T1G); + } + } + } + { + V T37, T54, T1K, T38; + T1x = VSUB(T1t, T1w); + T37 = VADD(T1t, T1w); + T4Z = VFMA(LDK(KP707106781), T4Y, T4V); + T6r = VFNMS(LDK(KP707106781), T4Y, T4V); + T54 = VSUB(T1J, T1I); + T1K = VADD(T1I, T1J); + T6u = VFNMS(LDK(KP707106781), T59, T58); + T5a = VFMA(LDK(KP707106781), T59, T58); + T38 = VADD(T1T, T1Q); + T1U = VSUB(T1Q, T1T); + T55 = VFNMS(LDK(KP414213562), T54, T53); + T5c = VFMA(LDK(KP414213562), T53, T54); + T1L = VSUB(T1H, T1K); + T3b = VADD(T1H, T1K); + T39 = VADD(T37, T38); + T3H = VSUB(T37, T38); + } + } + { + V T4A, TW, T4N, TZ, T1j, T1m, T4O, T4D, T13, T4F, T16, T4G, T1a, T4I, T4J; + V T1d; + { + V TU, TV, TX, TY, T56, T6v; + TU = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T56 = VADD(T52, T55); + T6v = VSUB(T55, T52); + { + V T5d, T6s, T1V, T1M; + T5d = VADD(T5b, T5c); + T6s = VSUB(T5c, T5b); + T1V = VSUB(T1L, T1E); + T1M = VADD(T1E, T1L); + T3I = VSUB(T3b, T3a); + T3c = VADD(T3a, T3b); + T5N = VFNMS(LDK(KP923879532), T56, T4Z); + T57 = VFMA(LDK(KP923879532), T56, T4Z); + T72 = VFNMS(LDK(KP923879532), T6v, T6u); + T6w = VFMA(LDK(KP923879532), T6v, T6u); + T5O = VFNMS(LDK(KP923879532), T5d, T5a); + T5e = VFMA(LDK(KP923879532), T5d, T5a); + T71 = VFMA(LDK(KP923879532), T6s, T6r); + T6t = VFNMS(LDK(KP923879532), T6s, T6r); + T2y = VFNMS(LDK(KP707106781), T1V, T1U); + T1W = VFMA(LDK(KP707106781), T1V, T1U); + T2x = VFNMS(LDK(KP707106781), T1M, T1x); + T1N = VFMA(LDK(KP707106781), T1M, T1x); + TV = LD(&(xi[WS(is, 33)]), ivs, &(xi[WS(is, 1)])); + } + TX = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + TY = LD(&(xi[WS(is, 49)]), ivs, &(xi[WS(is, 1)])); + { + V T1h, T1i, T1k, T1l; + T1h = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T1i = LD(&(xi[WS(is, 41)]), ivs, &(xi[WS(is, 1)])); + T1k = LD(&(xi[WS(is, 57)]), ivs, &(xi[WS(is, 1)])); + T1l = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + { + V T11, T4B, T4C, T12, T14, T15; + T11 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T4A = VSUB(TU, TV); + TW = VADD(TU, TV); + T4N = VSUB(TX, TY); + TZ = VADD(TX, TY); + T1j = VADD(T1h, T1i); + T4B = VSUB(T1h, T1i); + T1m = VADD(T1k, T1l); + T4C = VSUB(T1k, T1l); + T12 = LD(&(xi[WS(is, 37)]), ivs, &(xi[WS(is, 1)])); + T14 = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + T15 = LD(&(xi[WS(is, 53)]), ivs, &(xi[WS(is, 1)])); + { + V T18, T19, T1b, T1c; + T18 = LD(&(xi[WS(is, 61)]), ivs, &(xi[WS(is, 1)])); + T19 = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + T1b = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T1c = LD(&(xi[WS(is, 45)]), ivs, &(xi[WS(is, 1)])); + T4O = VSUB(T4B, T4C); + T4D = VADD(T4B, T4C); + T13 = VADD(T11, T12); + T4F = VSUB(T11, T12); + T16 = VADD(T14, T15); + T4G = VSUB(T14, T15); + T1a = VADD(T18, T19); + T4I = VSUB(T18, T19); + T4J = VSUB(T1b, T1c); + T1d = VADD(T1b, T1c); + } + } + } + } + { + V T30, T10, T6k, T4E, T4Q, T4H, T17, T6n, T4P, T1e, T4K, T4R, T1n, T31; + T30 = VADD(TW, TZ); + T10 = VSUB(TW, TZ); + T6k = VFNMS(LDK(KP707106781), T4D, T4A); + T4E = VFMA(LDK(KP707106781), T4D, T4A); + T4Q = VFMA(LDK(KP414213562), T4F, T4G); + T4H = VFNMS(LDK(KP414213562), T4G, T4F); + T33 = VADD(T13, T16); + T17 = VSUB(T13, T16); + T6n = VFNMS(LDK(KP707106781), T4O, T4N); + T4P = VFMA(LDK(KP707106781), T4O, T4N); + T34 = VADD(T1a, T1d); + T1e = VSUB(T1a, T1d); + T4K = VFMA(LDK(KP414213562), T4J, T4I); + T4R = VFNMS(LDK(KP414213562), T4I, T4J); + T1n = VSUB(T1j, T1m); + T31 = VADD(T1j, T1m); + { + V T1f, T1o, T6o, T4L, T4S, T6l; + T1f = VADD(T17, T1e); + T1o = VSUB(T17, T1e); + T6o = VSUB(T4H, T4K); + T4L = VADD(T4H, T4K); + T4S = VADD(T4Q, T4R); + T6l = VSUB(T4Q, T4R); + T3E = VSUB(T30, T31); + T32 = VADD(T30, T31); + T1p = VFMA(LDK(KP707106781), T1o, T1n); + T2v = VFNMS(LDK(KP707106781), T1o, T1n); + T1g = VFMA(LDK(KP707106781), T1f, T10); + T2u = VFNMS(LDK(KP707106781), T1f, T10); + T4M = VFMA(LDK(KP923879532), T4L, T4E); + T5K = VFNMS(LDK(KP923879532), T4L, T4E); + T6p = VFMA(LDK(KP923879532), T6o, T6n); + T6Z = VFNMS(LDK(KP923879532), T6o, T6n); + T6m = VFNMS(LDK(KP923879532), T6l, T6k); + T6Y = VFMA(LDK(KP923879532), T6l, T6k); + T5L = VFNMS(LDK(KP923879532), T4S, T4P); + T4T = VFMA(LDK(KP923879532), T4S, T4P); + } + } + } + } + } + } + { + V T6b, T6F, T7f, T6X, T70, T79, T7a, T73, T6C, T76, T77, T6i; + { + V T2Z, T3r, T3s, T3m, T3d, T3v; + T2Z = VSUB(T2V, T2Y); + T3r = VADD(T2V, T2Y); + T3s = VADD(T3l, T3i); + T3m = VSUB(T3i, T3l); + T3d = VSUB(T39, T3c); + T3v = VADD(T39, T3c); + { + V T3x, T3t, T3P, T3J, T3D, T3V, T3Q, T3G, T36, T3u, T3Y, T3O, T6V, T6W; + { + V T3N, T3C, T3F, T35; + T3N = VSUB(T3B, T3A); + T3C = VADD(T3A, T3B); + T3F = VSUB(T33, T34); + T35 = VADD(T33, T34); + T3x = VSUB(T3r, T3s); + T3t = VADD(T3r, T3s); + T3P = VFMA(LDK(KP414213562), T3H, T3I); + T3J = VFNMS(LDK(KP414213562), T3I, T3H); + T3D = VFMA(LDK(KP707106781), T3C, T3z); + T3V = VFNMS(LDK(KP707106781), T3C, T3z); + T3Q = VFMA(LDK(KP414213562), T3E, T3F); + T3G = VFNMS(LDK(KP414213562), T3F, T3E); + T36 = VSUB(T32, T35); + T3u = VADD(T32, T35); + T3Y = VFNMS(LDK(KP707106781), T3N, T3M); + T3O = VFMA(LDK(KP707106781), T3N, T3M); + } + T6b = VFNMS(LDK(KP923879532), T6a, T69); + T6V = VFMA(LDK(KP923879532), T6a, T69); + T6W = VADD(T6E, T6D); + T6F = VSUB(T6D, T6E); + { + V T3K, T3Z, T3e, T3n; + T3K = VADD(T3G, T3J); + T3Z = VSUB(T3J, T3G); + T3e = VADD(T36, T3d); + T3n = VSUB(T3d, T36); + { + V T3w, T3y, T3R, T3W; + T3w = VADD(T3u, T3v); + T3y = VSUB(T3v, T3u); + T3R = VSUB(T3P, T3Q); + T3W = VADD(T3Q, T3P); + { + V T42, T40, T3L, T3T; + T42 = VFNMS(LDK(KP923879532), T3Z, T3Y); + T40 = VFMA(LDK(KP923879532), T3Z, T3Y); + T3L = VFNMS(LDK(KP923879532), T3K, T3D); + T3T = VFMA(LDK(KP923879532), T3K, T3D); + { + V T3o, T3q, T3f, T3p; + T3o = VFNMS(LDK(KP707106781), T3n, T3m); + T3q = VFMA(LDK(KP707106781), T3n, T3m); + T3f = VFNMS(LDK(KP707106781), T3e, T2Z); + T3p = VFMA(LDK(KP707106781), T3e, T2Z); + ST(&(xo[WS(os, 48)]), VFNMSI(T3y, T3x), ovs, &(xo[0])); + ST(&(xo[WS(os, 16)]), VFMAI(T3y, T3x), ovs, &(xo[0])); + ST(&(xo[0]), VADD(T3t, T3w), ovs, &(xo[0])); + ST(&(xo[WS(os, 32)]), VSUB(T3t, T3w), ovs, &(xo[0])); + { + V T41, T3X, T3S, T3U; + T41 = VFMA(LDK(KP923879532), T3W, T3V); + T3X = VFNMS(LDK(KP923879532), T3W, T3V); + T3S = VFNMS(LDK(KP923879532), T3R, T3O); + T3U = VFMA(LDK(KP923879532), T3R, T3O); + ST(&(xo[WS(os, 8)]), VFMAI(T3q, T3p), ovs, &(xo[0])); + ST(&(xo[WS(os, 56)]), VFNMSI(T3q, T3p), ovs, &(xo[0])); + ST(&(xo[WS(os, 40)]), VFMAI(T3o, T3f), ovs, &(xo[0])); + ST(&(xo[WS(os, 24)]), VFNMSI(T3o, T3f), ovs, &(xo[0])); + ST(&(xo[WS(os, 44)]), VFNMSI(T40, T3X), ovs, &(xo[0])); + ST(&(xo[WS(os, 20)]), VFMAI(T40, T3X), ovs, &(xo[0])); + ST(&(xo[WS(os, 52)]), VFMAI(T42, T41), ovs, &(xo[0])); + ST(&(xo[WS(os, 12)]), VFNMSI(T42, T41), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VFMAI(T3U, T3T), ovs, &(xo[0])); + ST(&(xo[WS(os, 60)]), VFNMSI(T3U, T3T), ovs, &(xo[0])); + ST(&(xo[WS(os, 36)]), VFMAI(T3S, T3L), ovs, &(xo[0])); + ST(&(xo[WS(os, 28)]), VFNMSI(T3S, T3L), ovs, &(xo[0])); + T7f = VFNMS(LDK(KP831469612), T6W, T6V); + T6X = VFMA(LDK(KP831469612), T6W, T6V); + } + } + } + } + } + T70 = VFMA(LDK(KP303346683), T6Z, T6Y); + T79 = VFNMS(LDK(KP303346683), T6Y, T6Z); + T7a = VFNMS(LDK(KP303346683), T71, T72); + T73 = VFMA(LDK(KP303346683), T72, T71); + T6C = VFNMS(LDK(KP923879532), T6B, T6A); + T76 = VFMA(LDK(KP923879532), T6B, T6A); + T77 = VSUB(T6e, T6h); + T6i = VADD(T6e, T6h); + } + } + { + V T2r, T2D, T2C, T2s, T5H, T5o, T5v, T5D, T5r, T5I, T5x, T5h, T5F, T5B; + { + V TT, T2f, T2n, T1Y, T28, T2b, T2l, T2p, T2j, T2k; + { + V T1X, T2d, T7h, T7l, T2e, T1q, T75, T7d, T7m, T7k, T7c, T7e, Tn, TS; + T2r = VFNMS(LDK(KP707106781), Tm, T7); + Tn = VFMA(LDK(KP707106781), Tm, T7); + TS = VADD(TC, TR); + T2D = VSUB(TR, TC); + { + V T7b, T7j, T74, T7i, T78, T7g; + T1X = VFNMS(LDK(KP198912367), T1W, T1N); + T2d = VFMA(LDK(KP198912367), T1N, T1W); + T7g = VADD(T79, T7a); + T7b = VSUB(T79, T7a); + T7j = VSUB(T73, T70); + T74 = VADD(T70, T73); + T7i = VFNMS(LDK(KP831469612), T77, T76); + T78 = VFMA(LDK(KP831469612), T77, T76); + T2j = VFNMS(LDK(KP923879532), TS, Tn); + TT = VFMA(LDK(KP923879532), TS, Tn); + T7h = VFMA(LDK(KP956940335), T7g, T7f); + T7l = VFNMS(LDK(KP956940335), T7g, T7f); + T2e = VFMA(LDK(KP198912367), T1g, T1p); + T1q = VFNMS(LDK(KP198912367), T1p, T1g); + T75 = VFNMS(LDK(KP956940335), T74, T6X); + T7d = VFMA(LDK(KP956940335), T74, T6X); + T7m = VFNMS(LDK(KP956940335), T7j, T7i); + T7k = VFMA(LDK(KP956940335), T7j, T7i); + T7c = VFNMS(LDK(KP956940335), T7b, T78); + T7e = VFMA(LDK(KP956940335), T7b, T78); + } + T2k = VADD(T2e, T2d); + T2f = VSUB(T2d, T2e); + ST(&(xo[WS(os, 45)]), VFNMSI(T7k, T7h), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 19)]), VFMAI(T7k, T7h), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 51)]), VFMAI(T7m, T7l), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 13)]), VFNMSI(T7m, T7l), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VFMAI(T7e, T7d), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 61)]), VFNMSI(T7e, T7d), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 35)]), VFMAI(T7c, T75), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 29)]), VFNMSI(T7c, T75), ovs, &(xo[WS(os, 1)])); + T2n = VSUB(T1X, T1q); + T1Y = VADD(T1q, T1X); + T2C = VFNMS(LDK(KP707106781), T27, T26); + T28 = VFMA(LDK(KP707106781), T27, T26); + T2b = VSUB(T29, T2a); + T2s = VADD(T2a, T29); + } + T2l = VFNMS(LDK(KP980785280), T2k, T2j); + T2p = VFMA(LDK(KP980785280), T2k, T2j); + { + V T5z, T4z, T5A, T5g; + { + V T4f, T4y, T1Z, T2h, T4U, T5t, T2m, T2c, T5u, T5f; + T5H = VFNMS(LDK(KP923879532), T4e, T47); + T4f = VFMA(LDK(KP923879532), T4e, T47); + T4y = VADD(T4o, T4x); + T5T = VSUB(T4x, T4o); + T1Z = VFNMS(LDK(KP980785280), T1Y, TT); + T2h = VFMA(LDK(KP980785280), T1Y, TT); + T4U = VFNMS(LDK(KP098491403), T4T, T4M); + T5t = VFMA(LDK(KP098491403), T4M, T4T); + T2m = VFNMS(LDK(KP923879532), T2b, T28); + T2c = VFMA(LDK(KP923879532), T2b, T28); + T5u = VFMA(LDK(KP098491403), T57, T5e); + T5f = VFNMS(LDK(KP098491403), T5e, T57); + T5z = VFNMS(LDK(KP980785280), T4y, T4f); + T4z = VFMA(LDK(KP980785280), T4y, T4f); + T5S = VFNMS(LDK(KP923879532), T5n, T5k); + T5o = VFMA(LDK(KP923879532), T5n, T5k); + { + V T2o, T2q, T2i, T2g; + T2o = VFMA(LDK(KP980785280), T2n, T2m); + T2q = VFNMS(LDK(KP980785280), T2n, T2m); + T2i = VFMA(LDK(KP980785280), T2f, T2c); + T2g = VFNMS(LDK(KP980785280), T2f, T2c); + T5A = VADD(T5t, T5u); + T5v = VSUB(T5t, T5u); + T5D = VSUB(T5f, T4U); + T5g = VADD(T4U, T5f); + ST(&(xo[WS(os, 46)]), VFNMSI(T2o, T2l), ovs, &(xo[0])); + ST(&(xo[WS(os, 18)]), VFMAI(T2o, T2l), ovs, &(xo[0])); + ST(&(xo[WS(os, 50)]), VFMAI(T2q, T2p), ovs, &(xo[0])); + ST(&(xo[WS(os, 14)]), VFNMSI(T2q, T2p), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFMAI(T2i, T2h), ovs, &(xo[0])); + ST(&(xo[WS(os, 62)]), VFNMSI(T2i, T2h), ovs, &(xo[0])); + ST(&(xo[WS(os, 34)]), VFMAI(T2g, T1Z), ovs, &(xo[0])); + ST(&(xo[WS(os, 30)]), VFNMSI(T2g, T1Z), ovs, &(xo[0])); + T5r = VSUB(T5p, T5q); + T5I = VADD(T5p, T5q); + } + } + T5x = VFMA(LDK(KP995184726), T5g, T4z); + T5h = VFNMS(LDK(KP995184726), T5g, T4z); + T5F = VFMA(LDK(KP995184726), T5A, T5z); + T5B = VFNMS(LDK(KP995184726), T5A, T5z); + } + } + { + V T6J, T6R, T6L, T6z, T6T, T6P; + { + V T6N, T6j, T6O, T6y; + { + V T6q, T6H, T5C, T5s, T6I, T6x; + T6q = VFNMS(LDK(KP534511135), T6p, T6m); + T6H = VFMA(LDK(KP534511135), T6m, T6p); + T5C = VFNMS(LDK(KP980785280), T5r, T5o); + T5s = VFMA(LDK(KP980785280), T5r, T5o); + T6I = VFMA(LDK(KP534511135), T6t, T6w); + T6x = VFNMS(LDK(KP534511135), T6w, T6t); + T6N = VFMA(LDK(KP831469612), T6i, T6b); + T6j = VFNMS(LDK(KP831469612), T6i, T6b); + { + V T5E, T5G, T5y, T5w; + T5E = VFNMS(LDK(KP995184726), T5D, T5C); + T5G = VFMA(LDK(KP995184726), T5D, T5C); + T5y = VFMA(LDK(KP995184726), T5v, T5s); + T5w = VFNMS(LDK(KP995184726), T5v, T5s); + T6O = VADD(T6H, T6I); + T6J = VSUB(T6H, T6I); + T6R = VSUB(T6x, T6q); + T6y = VADD(T6q, T6x); + ST(&(xo[WS(os, 47)]), VFMAI(T5E, T5B), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 17)]), VFNMSI(T5E, T5B), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 49)]), VFNMSI(T5G, T5F), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 15)]), VFMAI(T5G, T5F), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 63)]), VFMAI(T5y, T5x), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFNMSI(T5y, T5x), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 31)]), VFMAI(T5w, T5h), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 33)]), VFNMSI(T5w, T5h), ovs, &(xo[WS(os, 1)])); + } + } + T6L = VFMA(LDK(KP881921264), T6y, T6j); + T6z = VFNMS(LDK(KP881921264), T6y, T6j); + T6T = VFMA(LDK(KP881921264), T6O, T6N); + T6P = VFNMS(LDK(KP881921264), T6O, T6N); + } + { + V T2H, T2P, T2J, T2B, T2R, T2N; + { + V T2L, T2t, T2M, T2A; + { + V T2z, T2F, T6Q, T6G, T2G, T2w; + T2z = VFMA(LDK(KP668178637), T2y, T2x); + T2F = VFNMS(LDK(KP668178637), T2x, T2y); + T6Q = VFMA(LDK(KP831469612), T6F, T6C); + T6G = VFNMS(LDK(KP831469612), T6F, T6C); + T2G = VFNMS(LDK(KP668178637), T2u, T2v); + T2w = VFMA(LDK(KP668178637), T2v, T2u); + T2L = VFNMS(LDK(KP923879532), T2s, T2r); + T2t = VFMA(LDK(KP923879532), T2s, T2r); + { + V T6S, T6U, T6M, T6K; + T6S = VFNMS(LDK(KP881921264), T6R, T6Q); + T6U = VFMA(LDK(KP881921264), T6R, T6Q); + T6M = VFMA(LDK(KP881921264), T6J, T6G); + T6K = VFNMS(LDK(KP881921264), T6J, T6G); + T2M = VADD(T2G, T2F); + T2H = VSUB(T2F, T2G); + T2P = VSUB(T2z, T2w); + T2A = VADD(T2w, T2z); + ST(&(xo[WS(os, 43)]), VFMAI(T6S, T6P), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 21)]), VFNMSI(T6S, T6P), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 53)]), VFNMSI(T6U, T6T), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 11)]), VFMAI(T6U, T6T), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 59)]), VFMAI(T6M, T6L), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VFNMSI(T6M, T6L), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 27)]), VFMAI(T6K, T6z), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 37)]), VFNMSI(T6K, T6z), ovs, &(xo[WS(os, 1)])); + } + } + T2J = VFMA(LDK(KP831469612), T2A, T2t); + T2B = VFNMS(LDK(KP831469612), T2A, T2t); + T2R = VFNMS(LDK(KP831469612), T2M, T2L); + T2N = VFMA(LDK(KP831469612), T2M, T2L); + } + { + V T61, T5J, T62, T5Q; + { + V T5M, T5V, T2O, T2E, T5W, T5P; + T5M = VFMA(LDK(KP820678790), T5L, T5K); + T5V = VFNMS(LDK(KP820678790), T5K, T5L); + T2O = VFMA(LDK(KP923879532), T2D, T2C); + T2E = VFNMS(LDK(KP923879532), T2D, T2C); + T5W = VFNMS(LDK(KP820678790), T5N, T5O); + T5P = VFMA(LDK(KP820678790), T5O, T5N); + T61 = VFNMS(LDK(KP980785280), T5I, T5H); + T5J = VFMA(LDK(KP980785280), T5I, T5H); + { + V T2Q, T2S, T2K, T2I; + T2Q = VFNMS(LDK(KP831469612), T2P, T2O); + T2S = VFMA(LDK(KP831469612), T2P, T2O); + T2K = VFMA(LDK(KP831469612), T2H, T2E); + T2I = VFNMS(LDK(KP831469612), T2H, T2E); + T62 = VADD(T5V, T5W); + T5X = VSUB(T5V, T5W); + T65 = VSUB(T5P, T5M); + T5Q = VADD(T5M, T5P); + ST(&(xo[WS(os, 42)]), VFMAI(T2Q, T2N), ovs, &(xo[0])); + ST(&(xo[WS(os, 22)]), VFNMSI(T2Q, T2N), ovs, &(xo[0])); + ST(&(xo[WS(os, 54)]), VFNMSI(T2S, T2R), ovs, &(xo[0])); + ST(&(xo[WS(os, 10)]), VFMAI(T2S, T2R), ovs, &(xo[0])); + ST(&(xo[WS(os, 58)]), VFMAI(T2K, T2J), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VFNMSI(T2K, T2J), ovs, &(xo[0])); + ST(&(xo[WS(os, 26)]), VFMAI(T2I, T2B), ovs, &(xo[0])); + ST(&(xo[WS(os, 38)]), VFNMSI(T2I, T2B), ovs, &(xo[0])); + } + } + T5Z = VFMA(LDK(KP773010453), T5Q, T5J); + T5R = VFNMS(LDK(KP773010453), T5Q, T5J); + T67 = VFNMS(LDK(KP773010453), T62, T61); + T63 = VFMA(LDK(KP773010453), T62, T61); + } + } + } + } + } + } + T5U = VFMA(LDK(KP980785280), T5T, T5S); + T64 = VFNMS(LDK(KP980785280), T5T, T5S); + { + V T68, T66, T5Y, T60; + T68 = VFNMS(LDK(KP773010453), T65, T64); + T66 = VFMA(LDK(KP773010453), T65, T64); + T5Y = VFNMS(LDK(KP773010453), T5X, T5U); + T60 = VFMA(LDK(KP773010453), T5X, T5U); + ST(&(xo[WS(os, 41)]), VFNMSI(T66, T63), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 23)]), VFMAI(T66, T63), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 55)]), VFMAI(T68, T67), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 9)]), VFNMSI(T68, T67), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFMAI(T60, T5Z), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 57)]), VFNMSI(T60, T5Z), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 39)]), VFMAI(T5Y, T5R), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 25)]), VFNMSI(T5Y, T5R), ovs, &(xo[WS(os, 1)])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 64, XSIMD_STRING("n1fv_64"), {198, 0, 258, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_64) (planner *p) { + X(kdft_register) (p, n1fv_64, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name n1fv_64 -include n1f.h */ + +/* + * This function contains 456 FP additions, 124 FP multiplications, + * (or, 404 additions, 72 multiplications, 52 fused multiply/add), + * 108 stack variables, 15 constants, and 128 memory accesses + */ +#include "n1f.h" + +static void n1fv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP471396736, +0.471396736825997648556387625905254377657460319); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP098017140, +0.098017140329560601994195563888641845861136673); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP290284677, +0.290284677254462367636192375817395274691476278); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP634393284, +0.634393284163645498215171613225493370675687095); + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(128, is), MAKE_VOLATILE_STRIDE(128, os)) { + V T4p, T5q, Tb, T39, T2n, T3A, T6f, T6T, Tq, T3B, T6i, T76, T2i, T3a, T4w; + V T5r, TI, T2p, T6C, T6V, T3h, T3E, T4L, T5u, TZ, T2q, T6F, T6U, T3e, T3D; + V T4E, T5t, T23, T2N, T6t, T71, T6w, T72, T2c, T2O, T3t, T41, T5f, T5R, T5k; + V T5S, T3w, T42, T1s, T2K, T6m, T6Y, T6p, T6Z, T1B, T2L, T3m, T3Y, T4Y, T5O; + V T53, T5P, T3p, T3Z; + { + V T3, T4n, T2m, T4o, T6, T5p, T9, T5o; + { + V T1, T2, T2k, T2l; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 32)]), ivs, &(xi[0])); + T3 = VSUB(T1, T2); + T4n = VADD(T1, T2); + T2k = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T2l = LD(&(xi[WS(is, 48)]), ivs, &(xi[0])); + T2m = VSUB(T2k, T2l); + T4o = VADD(T2k, T2l); + } + { + V T4, T5, T7, T8; + T4 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 40)]), ivs, &(xi[0])); + T6 = VSUB(T4, T5); + T5p = VADD(T4, T5); + T7 = LD(&(xi[WS(is, 56)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T9 = VSUB(T7, T8); + T5o = VADD(T7, T8); + } + T4p = VSUB(T4n, T4o); + T5q = VSUB(T5o, T5p); + { + V Ta, T2j, T6d, T6e; + Ta = VMUL(LDK(KP707106781), VADD(T6, T9)); + Tb = VADD(T3, Ta); + T39 = VSUB(T3, Ta); + T2j = VMUL(LDK(KP707106781), VSUB(T9, T6)); + T2n = VSUB(T2j, T2m); + T3A = VADD(T2m, T2j); + T6d = VADD(T4n, T4o); + T6e = VADD(T5p, T5o); + T6f = VADD(T6d, T6e); + T6T = VSUB(T6d, T6e); + } + } + { + V Te, T4q, To, T4u, Th, T4r, Tl, T4t; + { + V Tc, Td, Tm, Tn; + Tc = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 36)]), ivs, &(xi[0])); + Te = VSUB(Tc, Td); + T4q = VADD(Tc, Td); + Tm = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Tn = LD(&(xi[WS(is, 44)]), ivs, &(xi[0])); + To = VSUB(Tm, Tn); + T4u = VADD(Tm, Tn); + } + { + V Tf, Tg, Tj, Tk; + Tf = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 52)]), ivs, &(xi[0])); + Th = VSUB(Tf, Tg); + T4r = VADD(Tf, Tg); + Tj = LD(&(xi[WS(is, 60)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + Tl = VSUB(Tj, Tk); + T4t = VADD(Tj, Tk); + } + { + V Ti, Tp, T6g, T6h; + Ti = VFNMS(LDK(KP382683432), Th, VMUL(LDK(KP923879532), Te)); + Tp = VFMA(LDK(KP923879532), Tl, VMUL(LDK(KP382683432), To)); + Tq = VADD(Ti, Tp); + T3B = VSUB(Tp, Ti); + T6g = VADD(T4q, T4r); + T6h = VADD(T4t, T4u); + T6i = VADD(T6g, T6h); + T76 = VSUB(T6h, T6g); + } + { + V T2g, T2h, T4s, T4v; + T2g = VFNMS(LDK(KP923879532), To, VMUL(LDK(KP382683432), Tl)); + T2h = VFMA(LDK(KP382683432), Te, VMUL(LDK(KP923879532), Th)); + T2i = VSUB(T2g, T2h); + T3a = VADD(T2h, T2g); + T4s = VSUB(T4q, T4r); + T4v = VSUB(T4t, T4u); + T4w = VMUL(LDK(KP707106781), VADD(T4s, T4v)); + T5r = VMUL(LDK(KP707106781), VSUB(T4v, T4s)); + } + } + { + V Tu, T4F, TG, T4G, TB, T4J, TD, T4I; + { + V Ts, Tt, TE, TF; + Ts = LD(&(xi[WS(is, 62)]), ivs, &(xi[0])); + Tt = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + Tu = VSUB(Ts, Tt); + T4F = VADD(Ts, Tt); + TE = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + TF = LD(&(xi[WS(is, 46)]), ivs, &(xi[0])); + TG = VSUB(TE, TF); + T4G = VADD(TE, TF); + { + V Tv, Tw, Tx, Ty, Tz, TA; + Tv = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tw = LD(&(xi[WS(is, 38)]), ivs, &(xi[0])); + Tx = VSUB(Tv, Tw); + Ty = LD(&(xi[WS(is, 54)]), ivs, &(xi[0])); + Tz = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + TA = VSUB(Ty, Tz); + TB = VMUL(LDK(KP707106781), VADD(Tx, TA)); + T4J = VADD(Tv, Tw); + TD = VMUL(LDK(KP707106781), VSUB(TA, Tx)); + T4I = VADD(Ty, Tz); + } + } + { + V TC, TH, T6A, T6B; + TC = VADD(Tu, TB); + TH = VSUB(TD, TG); + TI = VFMA(LDK(KP195090322), TC, VMUL(LDK(KP980785280), TH)); + T2p = VFNMS(LDK(KP195090322), TH, VMUL(LDK(KP980785280), TC)); + T6A = VADD(T4F, T4G); + T6B = VADD(T4J, T4I); + T6C = VADD(T6A, T6B); + T6V = VSUB(T6A, T6B); + } + { + V T3f, T3g, T4H, T4K; + T3f = VSUB(Tu, TB); + T3g = VADD(TG, TD); + T3h = VFNMS(LDK(KP555570233), T3g, VMUL(LDK(KP831469612), T3f)); + T3E = VFMA(LDK(KP555570233), T3f, VMUL(LDK(KP831469612), T3g)); + T4H = VSUB(T4F, T4G); + T4K = VSUB(T4I, T4J); + T4L = VFNMS(LDK(KP382683432), T4K, VMUL(LDK(KP923879532), T4H)); + T5u = VFMA(LDK(KP382683432), T4H, VMUL(LDK(KP923879532), T4K)); + } + } + { + V TS, T4z, TW, T4y, TP, T4C, TX, T4B; + { + V TQ, TR, TU, TV; + TQ = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + TR = LD(&(xi[WS(is, 50)]), ivs, &(xi[0])); + TS = VSUB(TQ, TR); + T4z = VADD(TQ, TR); + TU = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + TV = LD(&(xi[WS(is, 34)]), ivs, &(xi[0])); + TW = VSUB(TU, TV); + T4y = VADD(TU, TV); + { + V TJ, TK, TL, TM, TN, TO; + TJ = LD(&(xi[WS(is, 58)]), ivs, &(xi[0])); + TK = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + TL = VSUB(TJ, TK); + TM = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + TN = LD(&(xi[WS(is, 42)]), ivs, &(xi[0])); + TO = VSUB(TM, TN); + TP = VMUL(LDK(KP707106781), VSUB(TL, TO)); + T4C = VADD(TM, TN); + TX = VMUL(LDK(KP707106781), VADD(TO, TL)); + T4B = VADD(TJ, TK); + } + } + { + V TT, TY, T6D, T6E; + TT = VSUB(TP, TS); + TY = VADD(TW, TX); + TZ = VFNMS(LDK(KP195090322), TY, VMUL(LDK(KP980785280), TT)); + T2q = VFMA(LDK(KP980785280), TY, VMUL(LDK(KP195090322), TT)); + T6D = VADD(T4y, T4z); + T6E = VADD(T4C, T4B); + T6F = VADD(T6D, T6E); + T6U = VSUB(T6D, T6E); + } + { + V T3c, T3d, T4A, T4D; + T3c = VSUB(TW, TX); + T3d = VADD(TS, TP); + T3e = VFMA(LDK(KP831469612), T3c, VMUL(LDK(KP555570233), T3d)); + T3D = VFNMS(LDK(KP555570233), T3c, VMUL(LDK(KP831469612), T3d)); + T4A = VSUB(T4y, T4z); + T4D = VSUB(T4B, T4C); + T4E = VFMA(LDK(KP923879532), T4A, VMUL(LDK(KP382683432), T4D)); + T5t = VFNMS(LDK(KP382683432), T4A, VMUL(LDK(KP923879532), T4D)); + } + } + { + V T1F, T55, T2a, T56, T1M, T5h, T27, T5g, T58, T59, T1U, T5a, T25, T5b, T5c; + V T21, T5d, T24; + { + V T1D, T1E, T28, T29; + T1D = LD(&(xi[WS(is, 63)]), ivs, &(xi[WS(is, 1)])); + T1E = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + T1F = VSUB(T1D, T1E); + T55 = VADD(T1D, T1E); + T28 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T29 = LD(&(xi[WS(is, 47)]), ivs, &(xi[WS(is, 1)])); + T2a = VSUB(T28, T29); + T56 = VADD(T28, T29); + } + { + V T1G, T1H, T1I, T1J, T1K, T1L; + T1G = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T1H = LD(&(xi[WS(is, 39)]), ivs, &(xi[WS(is, 1)])); + T1I = VSUB(T1G, T1H); + T1J = LD(&(xi[WS(is, 55)]), ivs, &(xi[WS(is, 1)])); + T1K = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + T1L = VSUB(T1J, T1K); + T1M = VMUL(LDK(KP707106781), VADD(T1I, T1L)); + T5h = VADD(T1G, T1H); + T27 = VMUL(LDK(KP707106781), VSUB(T1L, T1I)); + T5g = VADD(T1J, T1K); + } + { + V T1Q, T1T, T1X, T20; + { + V T1O, T1P, T1R, T1S; + T1O = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T1P = LD(&(xi[WS(is, 35)]), ivs, &(xi[WS(is, 1)])); + T1Q = VSUB(T1O, T1P); + T58 = VADD(T1O, T1P); + T1R = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + T1S = LD(&(xi[WS(is, 51)]), ivs, &(xi[WS(is, 1)])); + T1T = VSUB(T1R, T1S); + T59 = VADD(T1R, T1S); + } + T1U = VFNMS(LDK(KP382683432), T1T, VMUL(LDK(KP923879532), T1Q)); + T5a = VSUB(T58, T59); + T25 = VFMA(LDK(KP382683432), T1Q, VMUL(LDK(KP923879532), T1T)); + { + V T1V, T1W, T1Y, T1Z; + T1V = LD(&(xi[WS(is, 59)]), ivs, &(xi[WS(is, 1)])); + T1W = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + T1X = VSUB(T1V, T1W); + T5b = VADD(T1V, T1W); + T1Y = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + T1Z = LD(&(xi[WS(is, 43)]), ivs, &(xi[WS(is, 1)])); + T20 = VSUB(T1Y, T1Z); + T5c = VADD(T1Y, T1Z); + } + T21 = VFMA(LDK(KP923879532), T1X, VMUL(LDK(KP382683432), T20)); + T5d = VSUB(T5b, T5c); + T24 = VFNMS(LDK(KP923879532), T20, VMUL(LDK(KP382683432), T1X)); + } + { + V T1N, T22, T6r, T6s; + T1N = VADD(T1F, T1M); + T22 = VADD(T1U, T21); + T23 = VSUB(T1N, T22); + T2N = VADD(T1N, T22); + T6r = VADD(T55, T56); + T6s = VADD(T5h, T5g); + T6t = VADD(T6r, T6s); + T71 = VSUB(T6r, T6s); + } + { + V T6u, T6v, T26, T2b; + T6u = VADD(T58, T59); + T6v = VADD(T5b, T5c); + T6w = VADD(T6u, T6v); + T72 = VSUB(T6v, T6u); + T26 = VSUB(T24, T25); + T2b = VSUB(T27, T2a); + T2c = VSUB(T26, T2b); + T2O = VADD(T2b, T26); + } + { + V T3r, T3s, T57, T5e; + T3r = VSUB(T1F, T1M); + T3s = VADD(T25, T24); + T3t = VADD(T3r, T3s); + T41 = VSUB(T3r, T3s); + T57 = VSUB(T55, T56); + T5e = VMUL(LDK(KP707106781), VADD(T5a, T5d)); + T5f = VADD(T57, T5e); + T5R = VSUB(T57, T5e); + } + { + V T5i, T5j, T3u, T3v; + T5i = VSUB(T5g, T5h); + T5j = VMUL(LDK(KP707106781), VSUB(T5d, T5a)); + T5k = VADD(T5i, T5j); + T5S = VSUB(T5j, T5i); + T3u = VADD(T2a, T27); + T3v = VSUB(T21, T1U); + T3w = VADD(T3u, T3v); + T42 = VSUB(T3v, T3u); + } + } + { + V T1q, T4P, T1v, T4O, T1n, T50, T1w, T4Z, T4U, T4V, T18, T4W, T1z, T4R, T4S; + V T1f, T4T, T1y; + { + V T1o, T1p, T1t, T1u; + T1o = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + T1p = LD(&(xi[WS(is, 49)]), ivs, &(xi[WS(is, 1)])); + T1q = VSUB(T1o, T1p); + T4P = VADD(T1o, T1p); + T1t = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T1u = LD(&(xi[WS(is, 33)]), ivs, &(xi[WS(is, 1)])); + T1v = VSUB(T1t, T1u); + T4O = VADD(T1t, T1u); + } + { + V T1h, T1i, T1j, T1k, T1l, T1m; + T1h = LD(&(xi[WS(is, 57)]), ivs, &(xi[WS(is, 1)])); + T1i = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + T1j = VSUB(T1h, T1i); + T1k = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T1l = LD(&(xi[WS(is, 41)]), ivs, &(xi[WS(is, 1)])); + T1m = VSUB(T1k, T1l); + T1n = VMUL(LDK(KP707106781), VSUB(T1j, T1m)); + T50 = VADD(T1k, T1l); + T1w = VMUL(LDK(KP707106781), VADD(T1m, T1j)); + T4Z = VADD(T1h, T1i); + } + { + V T14, T17, T1b, T1e; + { + V T12, T13, T15, T16; + T12 = LD(&(xi[WS(is, 61)]), ivs, &(xi[WS(is, 1)])); + T13 = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + T14 = VSUB(T12, T13); + T4U = VADD(T12, T13); + T15 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T16 = LD(&(xi[WS(is, 45)]), ivs, &(xi[WS(is, 1)])); + T17 = VSUB(T15, T16); + T4V = VADD(T15, T16); + } + T18 = VFNMS(LDK(KP923879532), T17, VMUL(LDK(KP382683432), T14)); + T4W = VSUB(T4U, T4V); + T1z = VFMA(LDK(KP923879532), T14, VMUL(LDK(KP382683432), T17)); + { + V T19, T1a, T1c, T1d; + T19 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T1a = LD(&(xi[WS(is, 37)]), ivs, &(xi[WS(is, 1)])); + T1b = VSUB(T19, T1a); + T4R = VADD(T19, T1a); + T1c = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + T1d = LD(&(xi[WS(is, 53)]), ivs, &(xi[WS(is, 1)])); + T1e = VSUB(T1c, T1d); + T4S = VADD(T1c, T1d); + } + T1f = VFMA(LDK(KP382683432), T1b, VMUL(LDK(KP923879532), T1e)); + T4T = VSUB(T4R, T4S); + T1y = VFNMS(LDK(KP382683432), T1e, VMUL(LDK(KP923879532), T1b)); + } + { + V T1g, T1r, T6k, T6l; + T1g = VSUB(T18, T1f); + T1r = VSUB(T1n, T1q); + T1s = VSUB(T1g, T1r); + T2K = VADD(T1r, T1g); + T6k = VADD(T4O, T4P); + T6l = VADD(T50, T4Z); + T6m = VADD(T6k, T6l); + T6Y = VSUB(T6k, T6l); + } + { + V T6n, T6o, T1x, T1A; + T6n = VADD(T4R, T4S); + T6o = VADD(T4U, T4V); + T6p = VADD(T6n, T6o); + T6Z = VSUB(T6o, T6n); + T1x = VADD(T1v, T1w); + T1A = VADD(T1y, T1z); + T1B = VSUB(T1x, T1A); + T2L = VADD(T1x, T1A); + } + { + V T3k, T3l, T4Q, T4X; + T3k = VSUB(T1v, T1w); + T3l = VADD(T1f, T18); + T3m = VADD(T3k, T3l); + T3Y = VSUB(T3k, T3l); + T4Q = VSUB(T4O, T4P); + T4X = VMUL(LDK(KP707106781), VADD(T4T, T4W)); + T4Y = VADD(T4Q, T4X); + T5O = VSUB(T4Q, T4X); + } + { + V T51, T52, T3n, T3o; + T51 = VSUB(T4Z, T50); + T52 = VMUL(LDK(KP707106781), VSUB(T4W, T4T)); + T53 = VADD(T51, T52); + T5P = VSUB(T52, T51); + T3n = VADD(T1q, T1n); + T3o = VSUB(T1z, T1y); + T3p = VADD(T3n, T3o); + T3Z = VSUB(T3o, T3n); + } + } + { + V T6N, T6R, T6Q, T6S; + { + V T6L, T6M, T6O, T6P; + T6L = VADD(T6f, T6i); + T6M = VADD(T6F, T6C); + T6N = VADD(T6L, T6M); + T6R = VSUB(T6L, T6M); + T6O = VADD(T6m, T6p); + T6P = VADD(T6t, T6w); + T6Q = VADD(T6O, T6P); + T6S = VBYI(VSUB(T6P, T6O)); + } + ST(&(xo[WS(os, 32)]), VSUB(T6N, T6Q), ovs, &(xo[0])); + ST(&(xo[WS(os, 16)]), VADD(T6R, T6S), ovs, &(xo[0])); + ST(&(xo[0]), VADD(T6N, T6Q), ovs, &(xo[0])); + ST(&(xo[WS(os, 48)]), VSUB(T6R, T6S), ovs, &(xo[0])); + } + { + V T6j, T6G, T6y, T6H, T6q, T6x; + T6j = VSUB(T6f, T6i); + T6G = VSUB(T6C, T6F); + T6q = VSUB(T6m, T6p); + T6x = VSUB(T6t, T6w); + T6y = VMUL(LDK(KP707106781), VADD(T6q, T6x)); + T6H = VMUL(LDK(KP707106781), VSUB(T6x, T6q)); + { + V T6z, T6I, T6J, T6K; + T6z = VADD(T6j, T6y); + T6I = VBYI(VADD(T6G, T6H)); + ST(&(xo[WS(os, 56)]), VSUB(T6z, T6I), ovs, &(xo[0])); + ST(&(xo[WS(os, 8)]), VADD(T6z, T6I), ovs, &(xo[0])); + T6J = VSUB(T6j, T6y); + T6K = VBYI(VSUB(T6H, T6G)); + ST(&(xo[WS(os, 40)]), VSUB(T6J, T6K), ovs, &(xo[0])); + ST(&(xo[WS(os, 24)]), VADD(T6J, T6K), ovs, &(xo[0])); + } + } + { + V T6X, T7i, T78, T7g, T74, T7f, T7b, T7j, T6W, T77; + T6W = VMUL(LDK(KP707106781), VADD(T6U, T6V)); + T6X = VADD(T6T, T6W); + T7i = VSUB(T6T, T6W); + T77 = VMUL(LDK(KP707106781), VSUB(T6V, T6U)); + T78 = VADD(T76, T77); + T7g = VSUB(T77, T76); + { + V T70, T73, T79, T7a; + T70 = VFMA(LDK(KP923879532), T6Y, VMUL(LDK(KP382683432), T6Z)); + T73 = VFNMS(LDK(KP382683432), T72, VMUL(LDK(KP923879532), T71)); + T74 = VADD(T70, T73); + T7f = VSUB(T73, T70); + T79 = VFNMS(LDK(KP382683432), T6Y, VMUL(LDK(KP923879532), T6Z)); + T7a = VFMA(LDK(KP382683432), T71, VMUL(LDK(KP923879532), T72)); + T7b = VADD(T79, T7a); + T7j = VSUB(T7a, T79); + } + { + V T75, T7c, T7l, T7m; + T75 = VADD(T6X, T74); + T7c = VBYI(VADD(T78, T7b)); + ST(&(xo[WS(os, 60)]), VSUB(T75, T7c), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VADD(T75, T7c), ovs, &(xo[0])); + T7l = VBYI(VADD(T7g, T7f)); + T7m = VADD(T7i, T7j); + ST(&(xo[WS(os, 12)]), VADD(T7l, T7m), ovs, &(xo[0])); + ST(&(xo[WS(os, 52)]), VSUB(T7m, T7l), ovs, &(xo[0])); + } + { + V T7d, T7e, T7h, T7k; + T7d = VSUB(T6X, T74); + T7e = VBYI(VSUB(T7b, T78)); + ST(&(xo[WS(os, 36)]), VSUB(T7d, T7e), ovs, &(xo[0])); + ST(&(xo[WS(os, 28)]), VADD(T7d, T7e), ovs, &(xo[0])); + T7h = VBYI(VSUB(T7f, T7g)); + T7k = VSUB(T7i, T7j); + ST(&(xo[WS(os, 20)]), VADD(T7h, T7k), ovs, &(xo[0])); + ST(&(xo[WS(os, 44)]), VSUB(T7k, T7h), ovs, &(xo[0])); + } + } + { + V T5N, T68, T61, T69, T5U, T65, T5Y, T66; + { + V T5L, T5M, T5Z, T60; + T5L = VSUB(T4p, T4w); + T5M = VSUB(T5u, T5t); + T5N = VADD(T5L, T5M); + T68 = VSUB(T5L, T5M); + T5Z = VFNMS(LDK(KP555570233), T5O, VMUL(LDK(KP831469612), T5P)); + T60 = VFMA(LDK(KP555570233), T5R, VMUL(LDK(KP831469612), T5S)); + T61 = VADD(T5Z, T60); + T69 = VSUB(T60, T5Z); + } + { + V T5Q, T5T, T5W, T5X; + T5Q = VFMA(LDK(KP831469612), T5O, VMUL(LDK(KP555570233), T5P)); + T5T = VFNMS(LDK(KP555570233), T5S, VMUL(LDK(KP831469612), T5R)); + T5U = VADD(T5Q, T5T); + T65 = VSUB(T5T, T5Q); + T5W = VSUB(T5r, T5q); + T5X = VSUB(T4L, T4E); + T5Y = VADD(T5W, T5X); + T66 = VSUB(T5X, T5W); + } + { + V T5V, T62, T6b, T6c; + T5V = VADD(T5N, T5U); + T62 = VBYI(VADD(T5Y, T61)); + ST(&(xo[WS(os, 58)]), VSUB(T5V, T62), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VADD(T5V, T62), ovs, &(xo[0])); + T6b = VBYI(VADD(T66, T65)); + T6c = VADD(T68, T69); + ST(&(xo[WS(os, 10)]), VADD(T6b, T6c), ovs, &(xo[0])); + ST(&(xo[WS(os, 54)]), VSUB(T6c, T6b), ovs, &(xo[0])); + } + { + V T63, T64, T67, T6a; + T63 = VSUB(T5N, T5U); + T64 = VBYI(VSUB(T61, T5Y)); + ST(&(xo[WS(os, 38)]), VSUB(T63, T64), ovs, &(xo[0])); + ST(&(xo[WS(os, 26)]), VADD(T63, T64), ovs, &(xo[0])); + T67 = VBYI(VSUB(T65, T66)); + T6a = VSUB(T68, T69); + ST(&(xo[WS(os, 22)]), VADD(T67, T6a), ovs, &(xo[0])); + ST(&(xo[WS(os, 42)]), VSUB(T6a, T67), ovs, &(xo[0])); + } + } + { + V T11, T2C, T2v, T2D, T2e, T2z, T2s, T2A; + { + V Tr, T10, T2t, T2u; + Tr = VSUB(Tb, Tq); + T10 = VSUB(TI, TZ); + T11 = VADD(Tr, T10); + T2C = VSUB(Tr, T10); + T2t = VFNMS(LDK(KP634393284), T1B, VMUL(LDK(KP773010453), T1s)); + T2u = VFMA(LDK(KP773010453), T2c, VMUL(LDK(KP634393284), T23)); + T2v = VADD(T2t, T2u); + T2D = VSUB(T2u, T2t); + } + { + V T1C, T2d, T2o, T2r; + T1C = VFMA(LDK(KP634393284), T1s, VMUL(LDK(KP773010453), T1B)); + T2d = VFNMS(LDK(KP634393284), T2c, VMUL(LDK(KP773010453), T23)); + T2e = VADD(T1C, T2d); + T2z = VSUB(T2d, T1C); + T2o = VSUB(T2i, T2n); + T2r = VSUB(T2p, T2q); + T2s = VADD(T2o, T2r); + T2A = VSUB(T2r, T2o); + } + { + V T2f, T2w, T2F, T2G; + T2f = VADD(T11, T2e); + T2w = VBYI(VADD(T2s, T2v)); + ST(&(xo[WS(os, 57)]), VSUB(T2f, T2w), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VADD(T2f, T2w), ovs, &(xo[WS(os, 1)])); + T2F = VBYI(VADD(T2A, T2z)); + T2G = VADD(T2C, T2D); + ST(&(xo[WS(os, 9)]), VADD(T2F, T2G), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 55)]), VSUB(T2G, T2F), ovs, &(xo[WS(os, 1)])); + } + { + V T2x, T2y, T2B, T2E; + T2x = VSUB(T11, T2e); + T2y = VBYI(VSUB(T2v, T2s)); + ST(&(xo[WS(os, 39)]), VSUB(T2x, T2y), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 25)]), VADD(T2x, T2y), ovs, &(xo[WS(os, 1)])); + T2B = VBYI(VSUB(T2z, T2A)); + T2E = VSUB(T2C, T2D); + ST(&(xo[WS(os, 23)]), VADD(T2B, T2E), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 41)]), VSUB(T2E, T2B), ovs, &(xo[WS(os, 1)])); + } + } + { + V T3j, T3Q, T3J, T3R, T3y, T3N, T3G, T3O; + { + V T3b, T3i, T3H, T3I; + T3b = VADD(T39, T3a); + T3i = VADD(T3e, T3h); + T3j = VADD(T3b, T3i); + T3Q = VSUB(T3b, T3i); + T3H = VFNMS(LDK(KP290284677), T3m, VMUL(LDK(KP956940335), T3p)); + T3I = VFMA(LDK(KP290284677), T3t, VMUL(LDK(KP956940335), T3w)); + T3J = VADD(T3H, T3I); + T3R = VSUB(T3I, T3H); + } + { + V T3q, T3x, T3C, T3F; + T3q = VFMA(LDK(KP956940335), T3m, VMUL(LDK(KP290284677), T3p)); + T3x = VFNMS(LDK(KP290284677), T3w, VMUL(LDK(KP956940335), T3t)); + T3y = VADD(T3q, T3x); + T3N = VSUB(T3x, T3q); + T3C = VADD(T3A, T3B); + T3F = VADD(T3D, T3E); + T3G = VADD(T3C, T3F); + T3O = VSUB(T3F, T3C); + } + { + V T3z, T3K, T3T, T3U; + T3z = VADD(T3j, T3y); + T3K = VBYI(VADD(T3G, T3J)); + ST(&(xo[WS(os, 61)]), VSUB(T3z, T3K), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VADD(T3z, T3K), ovs, &(xo[WS(os, 1)])); + T3T = VBYI(VADD(T3O, T3N)); + T3U = VADD(T3Q, T3R); + ST(&(xo[WS(os, 13)]), VADD(T3T, T3U), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 51)]), VSUB(T3U, T3T), ovs, &(xo[WS(os, 1)])); + } + { + V T3L, T3M, T3P, T3S; + T3L = VSUB(T3j, T3y); + T3M = VBYI(VSUB(T3J, T3G)); + ST(&(xo[WS(os, 35)]), VSUB(T3L, T3M), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 29)]), VADD(T3L, T3M), ovs, &(xo[WS(os, 1)])); + T3P = VBYI(VSUB(T3N, T3O)); + T3S = VSUB(T3Q, T3R); + ST(&(xo[WS(os, 19)]), VADD(T3P, T3S), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 45)]), VSUB(T3S, T3P), ovs, &(xo[WS(os, 1)])); + } + } + { + V T4N, T5G, T5z, T5H, T5m, T5D, T5w, T5E; + { + V T4x, T4M, T5x, T5y; + T4x = VADD(T4p, T4w); + T4M = VADD(T4E, T4L); + T4N = VADD(T4x, T4M); + T5G = VSUB(T4x, T4M); + T5x = VFNMS(LDK(KP195090322), T4Y, VMUL(LDK(KP980785280), T53)); + T5y = VFMA(LDK(KP195090322), T5f, VMUL(LDK(KP980785280), T5k)); + T5z = VADD(T5x, T5y); + T5H = VSUB(T5y, T5x); + } + { + V T54, T5l, T5s, T5v; + T54 = VFMA(LDK(KP980785280), T4Y, VMUL(LDK(KP195090322), T53)); + T5l = VFNMS(LDK(KP195090322), T5k, VMUL(LDK(KP980785280), T5f)); + T5m = VADD(T54, T5l); + T5D = VSUB(T5l, T54); + T5s = VADD(T5q, T5r); + T5v = VADD(T5t, T5u); + T5w = VADD(T5s, T5v); + T5E = VSUB(T5v, T5s); + } + { + V T5n, T5A, T5J, T5K; + T5n = VADD(T4N, T5m); + T5A = VBYI(VADD(T5w, T5z)); + ST(&(xo[WS(os, 62)]), VSUB(T5n, T5A), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VADD(T5n, T5A), ovs, &(xo[0])); + T5J = VBYI(VADD(T5E, T5D)); + T5K = VADD(T5G, T5H); + ST(&(xo[WS(os, 14)]), VADD(T5J, T5K), ovs, &(xo[0])); + ST(&(xo[WS(os, 50)]), VSUB(T5K, T5J), ovs, &(xo[0])); + } + { + V T5B, T5C, T5F, T5I; + T5B = VSUB(T4N, T5m); + T5C = VBYI(VSUB(T5z, T5w)); + ST(&(xo[WS(os, 34)]), VSUB(T5B, T5C), ovs, &(xo[0])); + ST(&(xo[WS(os, 30)]), VADD(T5B, T5C), ovs, &(xo[0])); + T5F = VBYI(VSUB(T5D, T5E)); + T5I = VSUB(T5G, T5H); + ST(&(xo[WS(os, 18)]), VADD(T5F, T5I), ovs, &(xo[0])); + ST(&(xo[WS(os, 46)]), VSUB(T5I, T5F), ovs, &(xo[0])); + } + } + { + V T2J, T34, T2X, T35, T2Q, T31, T2U, T32; + { + V T2H, T2I, T2V, T2W; + T2H = VADD(Tb, Tq); + T2I = VADD(T2q, T2p); + T2J = VADD(T2H, T2I); + T34 = VSUB(T2H, T2I); + T2V = VFNMS(LDK(KP098017140), T2L, VMUL(LDK(KP995184726), T2K)); + T2W = VFMA(LDK(KP995184726), T2O, VMUL(LDK(KP098017140), T2N)); + T2X = VADD(T2V, T2W); + T35 = VSUB(T2W, T2V); + } + { + V T2M, T2P, T2S, T2T; + T2M = VFMA(LDK(KP098017140), T2K, VMUL(LDK(KP995184726), T2L)); + T2P = VFNMS(LDK(KP098017140), T2O, VMUL(LDK(KP995184726), T2N)); + T2Q = VADD(T2M, T2P); + T31 = VSUB(T2P, T2M); + T2S = VADD(T2n, T2i); + T2T = VADD(TZ, TI); + T2U = VADD(T2S, T2T); + T32 = VSUB(T2T, T2S); + } + { + V T2R, T2Y, T37, T38; + T2R = VADD(T2J, T2Q); + T2Y = VBYI(VADD(T2U, T2X)); + ST(&(xo[WS(os, 63)]), VSUB(T2R, T2Y), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VADD(T2R, T2Y), ovs, &(xo[WS(os, 1)])); + T37 = VBYI(VADD(T32, T31)); + T38 = VADD(T34, T35); + ST(&(xo[WS(os, 15)]), VADD(T37, T38), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 49)]), VSUB(T38, T37), ovs, &(xo[WS(os, 1)])); + } + { + V T2Z, T30, T33, T36; + T2Z = VSUB(T2J, T2Q); + T30 = VBYI(VSUB(T2X, T2U)); + ST(&(xo[WS(os, 33)]), VSUB(T2Z, T30), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 31)]), VADD(T2Z, T30), ovs, &(xo[WS(os, 1)])); + T33 = VBYI(VSUB(T31, T32)); + T36 = VSUB(T34, T35); + ST(&(xo[WS(os, 17)]), VADD(T33, T36), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 47)]), VSUB(T36, T33), ovs, &(xo[WS(os, 1)])); + } + } + { + V T3X, T4i, T4b, T4j, T44, T4f, T48, T4g; + { + V T3V, T3W, T49, T4a; + T3V = VSUB(T39, T3a); + T3W = VSUB(T3E, T3D); + T3X = VADD(T3V, T3W); + T4i = VSUB(T3V, T3W); + T49 = VFNMS(LDK(KP471396736), T3Y, VMUL(LDK(KP881921264), T3Z)); + T4a = VFMA(LDK(KP471396736), T41, VMUL(LDK(KP881921264), T42)); + T4b = VADD(T49, T4a); + T4j = VSUB(T4a, T49); + } + { + V T40, T43, T46, T47; + T40 = VFMA(LDK(KP881921264), T3Y, VMUL(LDK(KP471396736), T3Z)); + T43 = VFNMS(LDK(KP471396736), T42, VMUL(LDK(KP881921264), T41)); + T44 = VADD(T40, T43); + T4f = VSUB(T43, T40); + T46 = VSUB(T3B, T3A); + T47 = VSUB(T3h, T3e); + T48 = VADD(T46, T47); + T4g = VSUB(T47, T46); + } + { + V T45, T4c, T4l, T4m; + T45 = VADD(T3X, T44); + T4c = VBYI(VADD(T48, T4b)); + ST(&(xo[WS(os, 59)]), VSUB(T45, T4c), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VADD(T45, T4c), ovs, &(xo[WS(os, 1)])); + T4l = VBYI(VADD(T4g, T4f)); + T4m = VADD(T4i, T4j); + ST(&(xo[WS(os, 11)]), VADD(T4l, T4m), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 53)]), VSUB(T4m, T4l), ovs, &(xo[WS(os, 1)])); + } + { + V T4d, T4e, T4h, T4k; + T4d = VSUB(T3X, T44); + T4e = VBYI(VSUB(T4b, T48)); + ST(&(xo[WS(os, 37)]), VSUB(T4d, T4e), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 27)]), VADD(T4d, T4e), ovs, &(xo[WS(os, 1)])); + T4h = VBYI(VSUB(T4f, T4g)); + T4k = VSUB(T4i, T4j); + ST(&(xo[WS(os, 21)]), VADD(T4h, T4k), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 43)]), VSUB(T4k, T4h), ovs, &(xo[WS(os, 1)])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 64, XSIMD_STRING("n1fv_64"), {404, 72, 52, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_64) (planner *p) { + X(kdft_register) (p, n1fv_64, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 7 -name n1fv_7 -include n1f.h */ + +/* + * This function contains 30 FP additions, 24 FP multiplications, + * (or, 9 additions, 3 multiplications, 21 fused multiply/add), + * 37 stack variables, 6 constants, and 14 memory accesses + */ +#include "n1f.h" + +static void n1fv_7(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP692021471, +0.692021471630095869627814897002069140197260599); + DVK(KP801937735, +0.801937735804838252472204639014890102331838324); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + DVK(KP356895867, +0.356895867892209443894399510021300583399127187); + DVK(KP554958132, +0.554958132087371191422194871006410481067288862); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(14, is), MAKE_VOLATILE_STRIDE(14, os)) { + V T1, T2, T3, T8, T9, T5, T6; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T9 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + { + V Te, T4, Tf, Ta, Tg, T7; + Te = VSUB(T3, T2); + T4 = VADD(T2, T3); + Tf = VSUB(T9, T8); + Ta = VADD(T8, T9); + Tg = VSUB(T6, T5); + T7 = VADD(T5, T6); + { + V Tm, Tb, Tr, Th, Tj, To; + Tm = VFMA(LDK(KP554958132), Tf, Te); + Tb = VFNMS(LDK(KP356895867), T4, Ta); + Tr = VFNMS(LDK(KP554958132), Te, Tg); + Th = VFMA(LDK(KP554958132), Tg, Tf); + ST(&(xo[0]), VADD(T1, VADD(T4, VADD(T7, Ta))), ovs, &(xo[0])); + Tj = VFNMS(LDK(KP356895867), T7, T4); + To = VFNMS(LDK(KP356895867), Ta, T7); + { + V Tn, Tc, Ts, Ti; + Tn = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), Tm, Tg)); + Tc = VFNMS(LDK(KP692021471), Tb, T7); + Ts = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tr, Tf)); + Ti = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Th, Te)); + { + V Tk, Tp, Td, Tl, Tq; + Tk = VFNMS(LDK(KP692021471), Tj, Ta); + Tp = VFNMS(LDK(KP692021471), To, T4); + Td = VFNMS(LDK(KP900968867), Tc, T1); + Tl = VFNMS(LDK(KP900968867), Tk, T1); + Tq = VFNMS(LDK(KP900968867), Tp, T1); + ST(&(xo[WS(os, 2)]), VFMAI(Ti, Td), ovs, &(xo[0])); + ST(&(xo[WS(os, 5)]), VFNMSI(Ti, Td), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFMAI(Tn, Tl), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 6)]), VFNMSI(Tn, Tl), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VFMAI(Ts, Tq), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 4)]), VFNMSI(Ts, Tq), ovs, &(xo[0])); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 7, XSIMD_STRING("n1fv_7"), {9, 3, 21, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_7) (planner *p) { + X(kdft_register) (p, n1fv_7, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 7 -name n1fv_7 -include n1f.h */ + +/* + * This function contains 30 FP additions, 18 FP multiplications, + * (or, 18 additions, 6 multiplications, 12 fused multiply/add), + * 24 stack variables, 6 constants, and 14 memory accesses + */ +#include "n1f.h" + +static void n1fv_7(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP222520933, +0.222520933956314404288902564496794759466355569); + DVK(KP623489801, +0.623489801858733530525004884004239810632274731); + DVK(KP781831482, +0.781831482468029808708444526674057750232334519); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + DVK(KP433883739, +0.433883739117558120475768332848358754609990728); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(14, is), MAKE_VOLATILE_STRIDE(14, os)) { + V T1, Ta, Td, T4, Tc, T7, Te, T8, T9, Tj, Ti; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T9 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Ta = VADD(T8, T9); + Td = VSUB(T9, T8); + { + V T2, T3, T5, T6; + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T4 = VADD(T2, T3); + Tc = VSUB(T3, T2); + T5 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T7 = VADD(T5, T6); + Te = VSUB(T6, T5); + } + ST(&(xo[0]), VADD(T1, VADD(T4, VADD(T7, Ta))), ovs, &(xo[0])); + Tj = VBYI(VFMA(LDK(KP433883739), Tc, VFNMS(LDK(KP781831482), Te, VMUL(LDK(KP974927912), Td)))); + Ti = VFMA(LDK(KP623489801), T7, VFNMS(LDK(KP222520933), Ta, VFNMS(LDK(KP900968867), T4, T1))); + ST(&(xo[WS(os, 4)]), VSUB(Ti, Tj), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VADD(Ti, Tj), ovs, &(xo[WS(os, 1)])); + { + V Tf, Tb, Th, Tg; + Tf = VBYI(VFNMS(LDK(KP781831482), Td, VFNMS(LDK(KP433883739), Te, VMUL(LDK(KP974927912), Tc)))); + Tb = VFMA(LDK(KP623489801), Ta, VFNMS(LDK(KP900968867), T7, VFNMS(LDK(KP222520933), T4, T1))); + ST(&(xo[WS(os, 5)]), VSUB(Tb, Tf), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VADD(Tb, Tf), ovs, &(xo[0])); + Th = VBYI(VFMA(LDK(KP781831482), Tc, VFMA(LDK(KP974927912), Te, VMUL(LDK(KP433883739), Td)))); + Tg = VFMA(LDK(KP623489801), T4, VFNMS(LDK(KP900968867), Ta, VFNMS(LDK(KP222520933), T7, T1))); + ST(&(xo[WS(os, 6)]), VSUB(Tg, Th), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VADD(Tg, Th), ovs, &(xo[WS(os, 1)])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 7, XSIMD_STRING("n1fv_7"), {18, 6, 12, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_7) (planner *p) { + X(kdft_register) (p, n1fv_7, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name n1fv_8 -include n1f.h */ + +/* + * This function contains 26 FP additions, 10 FP multiplications, + * (or, 16 additions, 0 multiplications, 10 fused multiply/add), + * 30 stack variables, 1 constants, and 16 memory accesses + */ +#include "n1f.h" + +static void n1fv_8(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) { + V T1, T2, Tc, Td, T4, T5, T7, T8; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + { + V T3, Tj, Te, Tk, T6, Tm, T9, Tn, Tp, Tl; + T3 = VSUB(T1, T2); + Tj = VADD(T1, T2); + Te = VSUB(Tc, Td); + Tk = VADD(Tc, Td); + T6 = VSUB(T4, T5); + Tm = VADD(T4, T5); + T9 = VSUB(T7, T8); + Tn = VADD(T7, T8); + Tp = VSUB(Tj, Tk); + Tl = VADD(Tj, Tk); + { + V Tq, To, Ta, Tf; + Tq = VSUB(Tn, Tm); + To = VADD(Tm, Tn); + Ta = VADD(T6, T9); + Tf = VSUB(T9, T6); + { + V Tg, Ti, Tb, Th; + ST(&(xo[0]), VADD(Tl, To), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VSUB(Tl, To), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VFMAI(Tq, Tp), ovs, &(xo[0])); + ST(&(xo[WS(os, 6)]), VFNMSI(Tq, Tp), ovs, &(xo[0])); + Tg = VFNMS(LDK(KP707106781), Tf, Te); + Ti = VFMA(LDK(KP707106781), Tf, Te); + Tb = VFMA(LDK(KP707106781), Ta, T3); + Th = VFNMS(LDK(KP707106781), Ta, T3); + ST(&(xo[WS(os, 3)]), VFMAI(Ti, Th), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VFNMSI(Ti, Th), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFMAI(Tg, Tb), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VFNMSI(Tg, Tb), ovs, &(xo[WS(os, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 8, XSIMD_STRING("n1fv_8"), {16, 0, 10, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_8) (planner *p) { + X(kdft_register) (p, n1fv_8, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name n1fv_8 -include n1f.h */ + +/* + * This function contains 26 FP additions, 2 FP multiplications, + * (or, 26 additions, 2 multiplications, 0 fused multiply/add), + * 22 stack variables, 1 constants, and 16 memory accesses + */ +#include "n1f.h" + +static void n1fv_8(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) { + V T3, Tj, Tf, Tk, Ta, Tn, Tc, Tm; + { + V T1, T2, Td, Te; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T3 = VSUB(T1, T2); + Tj = VADD(T1, T2); + Td = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Te = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tf = VSUB(Td, Te); + Tk = VADD(Td, Te); + { + V T4, T5, T6, T7, T8, T9; + T4 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + T7 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T9 = VSUB(T7, T8); + Ta = VMUL(LDK(KP707106781), VADD(T6, T9)); + Tn = VADD(T7, T8); + Tc = VMUL(LDK(KP707106781), VSUB(T9, T6)); + Tm = VADD(T4, T5); + } + } + { + V Tb, Tg, Tp, Tq; + Tb = VADD(T3, Ta); + Tg = VBYI(VSUB(Tc, Tf)); + ST(&(xo[WS(os, 7)]), VSUB(Tb, Tg), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 1)]), VADD(Tb, Tg), ovs, &(xo[WS(os, 1)])); + Tp = VSUB(Tj, Tk); + Tq = VBYI(VSUB(Tn, Tm)); + ST(&(xo[WS(os, 6)]), VSUB(Tp, Tq), ovs, &(xo[0])); + ST(&(xo[WS(os, 2)]), VADD(Tp, Tq), ovs, &(xo[0])); + } + { + V Th, Ti, Tl, To; + Th = VSUB(T3, Ta); + Ti = VBYI(VADD(Tf, Tc)); + ST(&(xo[WS(os, 5)]), VSUB(Th, Ti), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 3)]), VADD(Th, Ti), ovs, &(xo[WS(os, 1)])); + Tl = VADD(Tj, Tk); + To = VADD(Tm, Tn); + ST(&(xo[WS(os, 4)]), VSUB(Tl, To), ovs, &(xo[0])); + ST(&(xo[0]), VADD(Tl, To), ovs, &(xo[0])); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 8, XSIMD_STRING("n1fv_8"), {26, 2, 0, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_8) (planner *p) { + X(kdft_register) (p, n1fv_8, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n1fv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 9 -name n1fv_9 -include n1f.h */ + +/* + * This function contains 46 FP additions, 38 FP multiplications, + * (or, 12 additions, 4 multiplications, 34 fused multiply/add), + * 68 stack variables, 19 constants, and 18 memory accesses + */ +#include "n1f.h" + +static void n1fv_9(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP939692620, +0.939692620785908384054109277324731469936208134); + DVK(KP826351822, +0.826351822333069651148283373230685203999624323); + DVK(KP879385241, +0.879385241571816768108218554649462939872416269); + DVK(KP984807753, +0.984807753012208059366743024589523013670643252); + DVK(KP666666666, +0.666666666666666666666666666666666666666666667); + DVK(KP852868531, +0.852868531952443209628250963940074071936020296); + DVK(KP907603734, +0.907603734547952313649323976213898122064543220); + DVK(KP420276625, +0.420276625461206169731530603237061658838781920); + DVK(KP673648177, +0.673648177666930348851716626769314796000375677); + DVK(KP898197570, +0.898197570222573798468955502359086394667167570); + DVK(KP347296355, +0.347296355333860697703433253538629592000751354); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP439692620, +0.439692620785908384054109277324731469936208134); + DVK(KP203604859, +0.203604859554852403062088995281827210665664861); + DVK(KP152703644, +0.152703644666139302296566746461370407999248646); + DVK(KP586256827, +0.586256827714544512072145703099641959914944179); + DVK(KP968908795, +0.968908795874236621082202410917456709164223497); + DVK(KP726681596, +0.726681596905677465811651808188092531873167623); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(18, is), MAKE_VOLATILE_STRIDE(18, os)) { + V T1, T2, T3, T6, Tb, T7, T8, Tc, Td, Tv, T4; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tb = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T7 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Tc = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Td = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tv = VSUB(T3, T2); + T4 = VADD(T2, T3); + { + V Tl, T9, Tm, Te, Tj, T5; + Tl = VSUB(T7, T8); + T9 = VADD(T7, T8); + Tm = VSUB(Td, Tc); + Te = VADD(Tc, Td); + Tj = VFNMS(LDK(KP500000000), T4, T1); + T5 = VADD(T1, T4); + { + V Tn, Ta, Tk, Tf; + Tn = VFNMS(LDK(KP500000000), T9, T6); + Ta = VADD(T6, T9); + Tk = VFNMS(LDK(KP500000000), Te, Tb); + Tf = VADD(Tb, Te); + { + V Ty, TC, To, TB, Tx, Ts, Tg, Ti; + Ty = VFNMS(LDK(KP726681596), Tl, Tn); + TC = VFMA(LDK(KP968908795), Tn, Tl); + To = VFNMS(LDK(KP586256827), Tn, Tm); + TB = VFNMS(LDK(KP152703644), Tm, Tk); + Tx = VFMA(LDK(KP203604859), Tk, Tm); + Ts = VFNMS(LDK(KP439692620), Tl, Tk); + Tg = VADD(Ta, Tf); + Ti = VMUL(LDK(KP866025403), VSUB(Tf, Ta)); + { + V Tz, TI, TF, TD, Tt, Th, Tq, Tp; + Tp = VFNMS(LDK(KP347296355), To, Tl); + Tz = VFMA(LDK(KP898197570), Ty, Tx); + TI = VFNMS(LDK(KP898197570), Ty, Tx); + TF = VFNMS(LDK(KP673648177), TC, TB); + TD = VFMA(LDK(KP673648177), TC, TB); + Tt = VFNMS(LDK(KP420276625), Ts, Tm); + ST(&(xo[0]), VADD(T5, Tg), ovs, &(xo[0])); + Th = VFNMS(LDK(KP500000000), Tg, T5); + Tq = VFNMS(LDK(KP907603734), Tp, Tk); + { + V TA, TJ, TE, TG, Tu, Tr, TK, TH, Tw; + TA = VFMA(LDK(KP852868531), Tz, Tj); + TJ = VFMA(LDK(KP666666666), TD, TI); + TE = VMUL(LDK(KP984807753), VFNMS(LDK(KP879385241), Tv, TD)); + TG = VFNMS(LDK(KP500000000), Tz, TF); + Tu = VFNMS(LDK(KP826351822), Tt, Tn); + ST(&(xo[WS(os, 6)]), VFNMSI(Ti, Th), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VFMAI(Ti, Th), ovs, &(xo[WS(os, 1)])); + Tr = VFNMS(LDK(KP939692620), Tq, Tj); + TK = VMUL(LDK(KP866025403), VFMA(LDK(KP852868531), TJ, Tv)); + ST(&(xo[WS(os, 8)]), VFMAI(TE, TA), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VFNMSI(TE, TA), ovs, &(xo[WS(os, 1)])); + TH = VFMA(LDK(KP852868531), TG, Tj); + Tw = VMUL(LDK(KP984807753), VFMA(LDK(KP879385241), Tv, Tu)); + ST(&(xo[WS(os, 4)]), VFMAI(TK, TH), ovs, &(xo[0])); + ST(&(xo[WS(os, 5)]), VFNMSI(TK, TH), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 7)]), VFMAI(Tw, Tr), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VFNMSI(Tw, Tr), ovs, &(xo[0])); + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 9, XSIMD_STRING("n1fv_9"), {12, 4, 34, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_9) (planner *p) { + X(kdft_register) (p, n1fv_9, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 9 -name n1fv_9 -include n1f.h */ + +/* + * This function contains 46 FP additions, 26 FP multiplications, + * (or, 30 additions, 10 multiplications, 16 fused multiply/add), + * 41 stack variables, 14 constants, and 18 memory accesses + */ +#include "n1f.h" + +static void n1fv_9(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP342020143, +0.342020143325668733044099614682259580763083368); + DVK(KP813797681, +0.813797681349373692844693217248393223289101568); + DVK(KP939692620, +0.939692620785908384054109277324731469936208134); + DVK(KP296198132, +0.296198132726023843175338011893050938967728390); + DVK(KP642787609, +0.642787609686539326322643409907263432907559884); + DVK(KP663413948, +0.663413948168938396205421319635891297216863310); + DVK(KP556670399, +0.556670399226419366452912952047023132968291906); + DVK(KP766044443, +0.766044443118978035202392650555416673935832457); + DVK(KP984807753, +0.984807753012208059366743024589523013670643252); + DVK(KP150383733, +0.150383733180435296639271897612501926072238258); + DVK(KP852868531, +0.852868531952443209628250963940074071936020296); + DVK(KP173648177, +0.173648177666930348851716626769314796000375677); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(18, is), MAKE_VOLATILE_STRIDE(18, os)) { + V T5, Ts, Tj, To, Tf, Tn, Tp, Tu, Tl, Ta, Tk, Tm, Tt; + { + V T1, T2, T3, T4; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T3 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T4 = VADD(T2, T3); + T5 = VADD(T1, T4); + Ts = VMUL(LDK(KP866025403), VSUB(T3, T2)); + Tj = VFNMS(LDK(KP500000000), T4, T1); + } + { + V Tb, Te, Tc, Td; + Tb = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Td = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Te = VADD(Tc, Td); + To = VSUB(Td, Tc); + Tf = VADD(Tb, Te); + Tn = VFNMS(LDK(KP500000000), Te, Tb); + Tp = VFMA(LDK(KP173648177), Tn, VMUL(LDK(KP852868531), To)); + Tu = VFNMS(LDK(KP984807753), Tn, VMUL(LDK(KP150383733), To)); + } + { + V T6, T9, T7, T8; + T6 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T9 = VADD(T7, T8); + Tl = VSUB(T8, T7); + Ta = VADD(T6, T9); + Tk = VFNMS(LDK(KP500000000), T9, T6); + Tm = VFMA(LDK(KP766044443), Tk, VMUL(LDK(KP556670399), Tl)); + Tt = VFNMS(LDK(KP642787609), Tk, VMUL(LDK(KP663413948), Tl)); + } + { + V Ti, Tg, Th, Tz, TA; + Ti = VBYI(VMUL(LDK(KP866025403), VSUB(Tf, Ta))); + Tg = VADD(Ta, Tf); + Th = VFNMS(LDK(KP500000000), Tg, T5); + ST(&(xo[0]), VADD(T5, Tg), ovs, &(xo[0])); + ST(&(xo[WS(os, 3)]), VADD(Th, Ti), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 6)]), VSUB(Th, Ti), ovs, &(xo[0])); + Tz = VFMA(LDK(KP173648177), Tk, VFNMS(LDK(KP296198132), To, VFNMS(LDK(KP939692620), Tn, VFNMS(LDK(KP852868531), Tl, Tj)))); + TA = VBYI(VSUB(VFNMS(LDK(KP342020143), Tn, VFNMS(LDK(KP150383733), Tl, VFNMS(LDK(KP984807753), Tk, VMUL(LDK(KP813797681), To)))), Ts)); + ST(&(xo[WS(os, 7)]), VSUB(Tz, TA), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 2)]), VADD(Tz, TA), ovs, &(xo[0])); + { + V Tr, Tx, Tw, Ty, Tq, Tv; + Tq = VADD(Tm, Tp); + Tr = VADD(Tj, Tq); + Tx = VFMA(LDK(KP866025403), VSUB(Tt, Tu), VFNMS(LDK(KP500000000), Tq, Tj)); + Tv = VADD(Tt, Tu); + Tw = VBYI(VADD(Ts, Tv)); + Ty = VBYI(VADD(Ts, VFNMS(LDK(KP500000000), Tv, VMUL(LDK(KP866025403), VSUB(Tp, Tm))))); + ST(&(xo[WS(os, 8)]), VSUB(Tr, Tw), ovs, &(xo[0])); + ST(&(xo[WS(os, 4)]), VADD(Tx, Ty), ovs, &(xo[0])); + ST(&(xo[WS(os, 1)]), VADD(Tw, Tr), ovs, &(xo[WS(os, 1)])); + ST(&(xo[WS(os, 5)]), VSUB(Tx, Ty), ovs, &(xo[WS(os, 1)])); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 9, XSIMD_STRING("n1fv_9"), {30, 10, 16, 0}, &GENUS, 0, 0, 0, 0 }; + +void XSIMD(codelet_n1fv_9) (planner *p) { + X(kdft_register) (p, n1fv_9, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:59 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 10 -name n2bv_10 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 42 FP additions, 22 FP multiplications, + * (or, 24 additions, 4 multiplications, 18 fused multiply/add), + * 53 stack variables, 4 constants, and 25 memory accesses + */ +#include "n2b.h" + +static void n2bv_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(20, is), MAKE_VOLATILE_STRIDE(20, os)) { + V Tb, Tr, T3, Ts, T6, Tw, Tg, Tt, T9, Tc, T1, T2; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + { + V T4, T5, Te, Tf, T7, T8; + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Te = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tr = VADD(T1, T2); + T3 = VSUB(T1, T2); + Ts = VADD(T4, T5); + T6 = VSUB(T4, T5); + Tw = VADD(Te, Tf); + Tg = VSUB(Te, Tf); + Tt = VADD(T7, T8); + T9 = VSUB(T7, T8); + Tc = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + } + { + V TD, Tu, Tm, Ta, Td, Tv; + TD = VSUB(Ts, Tt); + Tu = VADD(Ts, Tt); + Tm = VSUB(T6, T9); + Ta = VADD(T6, T9); + Td = VSUB(Tb, Tc); + Tv = VADD(Tb, Tc); + { + V TC, Tx, Tn, Th; + TC = VSUB(Tv, Tw); + Tx = VADD(Tv, Tw); + Tn = VSUB(Td, Tg); + Th = VADD(Td, Tg); + { + V Ty, TA, TE, TG, Ti, Tk, To, Tq; + Ty = VADD(Tu, Tx); + TA = VSUB(Tu, Tx); + TE = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TD, TC)); + TG = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TC, TD)); + Ti = VADD(Ta, Th); + Tk = VSUB(Ta, Th); + To = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tn, Tm)); + Tq = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tm, Tn)); + { + V Tz, TH, Tj, TI; + Tz = VFNMS(LDK(KP250000000), Ty, Tr); + TH = VADD(Tr, Ty); + STM2(&(xo[0]), TH, ovs, &(xo[0])); + Tj = VFNMS(LDK(KP250000000), Ti, T3); + TI = VADD(T3, Ti); + STM2(&(xo[10]), TI, ovs, &(xo[2])); + { + V TB, TF, Tl, Tp; + TB = VFNMS(LDK(KP559016994), TA, Tz); + TF = VFMA(LDK(KP559016994), TA, Tz); + Tl = VFMA(LDK(KP559016994), Tk, Tj); + Tp = VFNMS(LDK(KP559016994), Tk, Tj); + { + V TJ, TK, TL, TM; + TJ = VFNMSI(TG, TF); + STM2(&(xo[8]), TJ, ovs, &(xo[0])); + STN2(&(xo[8]), TJ, TI, ovs); + TK = VFMAI(TG, TF); + STM2(&(xo[12]), TK, ovs, &(xo[0])); + TL = VFMAI(TE, TB); + STM2(&(xo[16]), TL, ovs, &(xo[0])); + TM = VFNMSI(TE, TB); + STM2(&(xo[4]), TM, ovs, &(xo[0])); + { + V TN, TO, TP, TQ; + TN = VFMAI(Tq, Tp); + STM2(&(xo[6]), TN, ovs, &(xo[2])); + STN2(&(xo[4]), TM, TN, ovs); + TO = VFNMSI(Tq, Tp); + STM2(&(xo[14]), TO, ovs, &(xo[2])); + STN2(&(xo[12]), TK, TO, ovs); + TP = VFNMSI(To, Tl); + STM2(&(xo[18]), TP, ovs, &(xo[2])); + STN2(&(xo[16]), TL, TP, ovs); + TQ = VFMAI(To, Tl); + STM2(&(xo[2]), TQ, ovs, &(xo[2])); + STN2(&(xo[0]), TH, TQ, ovs); + } + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 10, XSIMD_STRING("n2bv_10"), {24, 4, 18, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_10) (planner *p) { + X(kdft_register) (p, n2bv_10, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 10 -name n2bv_10 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 42 FP additions, 12 FP multiplications, + * (or, 36 additions, 6 multiplications, 6 fused multiply/add), + * 36 stack variables, 4 constants, and 25 memory accesses + */ +#include "n2b.h" + +static void n2bv_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(20, is), MAKE_VOLATILE_STRIDE(20, os)) { + V Tl, Ty, T7, Te, Tw, Tt, Tz, TA, TB, Tg, Th, Tm, Tj, Tk; + Tj = LD(&(xi[0]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tl = VSUB(Tj, Tk); + Ty = VADD(Tj, Tk); + { + V T3, Tr, Td, Tv, T6, Ts, Ta, Tu; + { + V T1, T2, Tb, Tc; + T1 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + Tr = VADD(T1, T2); + Tb = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Td = VSUB(Tb, Tc); + Tv = VADD(Tb, Tc); + } + { + V T4, T5, T8, T9; + T4 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + Ts = VADD(T4, T5); + T8 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T9 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Ta = VSUB(T8, T9); + Tu = VADD(T8, T9); + } + T7 = VSUB(T3, T6); + Te = VSUB(Ta, Td); + Tw = VSUB(Tu, Tv); + Tt = VSUB(Tr, Ts); + Tz = VADD(Tr, Ts); + TA = VADD(Tu, Tv); + TB = VADD(Tz, TA); + Tg = VADD(T3, T6); + Th = VADD(Ta, Td); + Tm = VADD(Tg, Th); + } + { + V TH, TI, TK, TL, TM; + TH = VADD(Tl, Tm); + STM2(&(xo[10]), TH, ovs, &(xo[2])); + TI = VADD(Ty, TB); + STM2(&(xo[0]), TI, ovs, &(xo[0])); + { + V Tf, Tq, To, Tp, Ti, Tn, TJ; + Tf = VBYI(VFMA(LDK(KP951056516), T7, VMUL(LDK(KP587785252), Te))); + Tq = VBYI(VFNMS(LDK(KP951056516), Te, VMUL(LDK(KP587785252), T7))); + Ti = VMUL(LDK(KP559016994), VSUB(Tg, Th)); + Tn = VFNMS(LDK(KP250000000), Tm, Tl); + To = VADD(Ti, Tn); + Tp = VSUB(Tn, Ti); + TJ = VADD(Tf, To); + STM2(&(xo[2]), TJ, ovs, &(xo[2])); + STN2(&(xo[0]), TI, TJ, ovs); + TK = VADD(Tq, Tp); + STM2(&(xo[14]), TK, ovs, &(xo[2])); + TL = VSUB(To, Tf); + STM2(&(xo[18]), TL, ovs, &(xo[2])); + TM = VSUB(Tp, Tq); + STM2(&(xo[6]), TM, ovs, &(xo[2])); + } + { + V Tx, TG, TE, TF, TC, TD; + Tx = VBYI(VFNMS(LDK(KP951056516), Tw, VMUL(LDK(KP587785252), Tt))); + TG = VBYI(VFMA(LDK(KP951056516), Tt, VMUL(LDK(KP587785252), Tw))); + TC = VFNMS(LDK(KP250000000), TB, Ty); + TD = VMUL(LDK(KP559016994), VSUB(Tz, TA)); + TE = VSUB(TC, TD); + TF = VADD(TD, TC); + { + V TN, TO, TP, TQ; + TN = VADD(Tx, TE); + STM2(&(xo[4]), TN, ovs, &(xo[0])); + STN2(&(xo[4]), TN, TM, ovs); + TO = VADD(TG, TF); + STM2(&(xo[12]), TO, ovs, &(xo[0])); + STN2(&(xo[12]), TO, TK, ovs); + TP = VSUB(TE, Tx); + STM2(&(xo[16]), TP, ovs, &(xo[0])); + STN2(&(xo[16]), TP, TL, ovs); + TQ = VSUB(TF, TG); + STM2(&(xo[8]), TQ, ovs, &(xo[0])); + STN2(&(xo[8]), TQ, TH, ovs); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 10, XSIMD_STRING("n2bv_10"), {36, 6, 6, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_10) (planner *p) { + X(kdft_register) (p, n2bv_10, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:59 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 12 -name n2bv_12 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 48 FP additions, 20 FP multiplications, + * (or, 30 additions, 2 multiplications, 18 fused multiply/add), + * 61 stack variables, 2 constants, and 30 memory accesses + */ +#include "n2b.h" + +static void n2bv_12(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) { + V T1, T6, Tc, Th, Td, Te, Ti, Tz, T4, TA, T9, Tj, Tf, Tw; + { + V T2, T3, T7, T8; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T3 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T7 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Th = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Td = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Te = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Ti = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tz = VSUB(T2, T3); + T4 = VADD(T2, T3); + TA = VSUB(T7, T8); + T9 = VADD(T7, T8); + Tj = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + } + Tf = VADD(Td, Te); + Tw = VSUB(Td, Te); + { + V T5, Tp, TJ, TB, Ta, Tq, Tk, Tx, Tg, Ts; + T5 = VADD(T1, T4); + Tp = VFNMS(LDK(KP500000000), T4, T1); + TJ = VSUB(Tz, TA); + TB = VADD(Tz, TA); + Ta = VADD(T6, T9); + Tq = VFNMS(LDK(KP500000000), T9, T6); + Tk = VADD(Ti, Tj); + Tx = VSUB(Tj, Ti); + Tg = VADD(Tc, Tf); + Ts = VFNMS(LDK(KP500000000), Tf, Tc); + { + V Tr, TF, Tb, Tn, TG, Ty, Tl, Tt; + Tr = VADD(Tp, Tq); + TF = VSUB(Tp, Tq); + Tb = VSUB(T5, Ta); + Tn = VADD(T5, Ta); + TG = VADD(Tw, Tx); + Ty = VSUB(Tw, Tx); + Tl = VADD(Th, Tk); + Tt = VFNMS(LDK(KP500000000), Tk, Th); + { + V TC, TE, TH, TL, Tu, TI, Tm, To; + TC = VMUL(LDK(KP866025403), VSUB(Ty, TB)); + TE = VMUL(LDK(KP866025403), VADD(TB, Ty)); + TH = VFNMS(LDK(KP866025403), TG, TF); + TL = VFMA(LDK(KP866025403), TG, TF); + Tu = VADD(Ts, Tt); + TI = VSUB(Ts, Tt); + Tm = VSUB(Tg, Tl); + To = VADD(Tg, Tl); + { + V TK, TM, Tv, TD; + TK = VFMA(LDK(KP866025403), TJ, TI); + TM = VFNMS(LDK(KP866025403), TJ, TI); + Tv = VSUB(Tr, Tu); + TD = VADD(Tr, Tu); + { + V TN, TO, TP, TQ; + TN = VADD(Tn, To); + STM2(&(xo[0]), TN, ovs, &(xo[0])); + TO = VSUB(Tn, To); + STM2(&(xo[12]), TO, ovs, &(xo[0])); + TP = VFMAI(Tm, Tb); + STM2(&(xo[18]), TP, ovs, &(xo[2])); + TQ = VFNMSI(Tm, Tb); + STM2(&(xo[6]), TQ, ovs, &(xo[2])); + { + V TR, TS, TT, TU; + TR = VFMAI(TM, TL); + STM2(&(xo[10]), TR, ovs, &(xo[2])); + TS = VFNMSI(TM, TL); + STM2(&(xo[14]), TS, ovs, &(xo[2])); + STN2(&(xo[12]), TO, TS, ovs); + TT = VFNMSI(TK, TH); + STM2(&(xo[22]), TT, ovs, &(xo[2])); + TU = VFMAI(TK, TH); + STM2(&(xo[2]), TU, ovs, &(xo[2])); + STN2(&(xo[0]), TN, TU, ovs); + { + V TV, TW, TX, TY; + TV = VFNMSI(TE, TD); + STM2(&(xo[16]), TV, ovs, &(xo[0])); + STN2(&(xo[16]), TV, TP, ovs); + TW = VFMAI(TE, TD); + STM2(&(xo[8]), TW, ovs, &(xo[0])); + STN2(&(xo[8]), TW, TR, ovs); + TX = VFMAI(TC, Tv); + STM2(&(xo[4]), TX, ovs, &(xo[0])); + STN2(&(xo[4]), TX, TQ, ovs); + TY = VFNMSI(TC, Tv); + STM2(&(xo[20]), TY, ovs, &(xo[0])); + STN2(&(xo[20]), TY, TT, ovs); + } + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 12, XSIMD_STRING("n2bv_12"), {30, 2, 18, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_12) (planner *p) { + X(kdft_register) (p, n2bv_12, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 12 -name n2bv_12 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 48 FP additions, 8 FP multiplications, + * (or, 44 additions, 4 multiplications, 4 fused multiply/add), + * 33 stack variables, 2 constants, and 30 memory accesses + */ +#include "n2b.h" + +static void n2bv_12(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) { + V T5, Ta, TG, TF, Ty, Tm, Ti, Tp, TJ, TI, Tx, Ts; + { + V T1, T6, T4, Tk, T9, Tl; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + { + V T2, T3, T7, T8; + T2 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T3 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T4 = VADD(T2, T3); + Tk = VSUB(T2, T3); + T7 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T9 = VADD(T7, T8); + Tl = VSUB(T7, T8); + } + T5 = VFNMS(LDK(KP500000000), T4, T1); + Ta = VFNMS(LDK(KP500000000), T9, T6); + TG = VADD(T6, T9); + TF = VADD(T1, T4); + Ty = VADD(Tk, Tl); + Tm = VMUL(LDK(KP866025403), VSUB(Tk, Tl)); + } + { + V Tn, Tq, Te, To, Th, Tr; + Tn = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tq = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + { + V Tc, Td, Tf, Tg; + Tc = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Td = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Te = VSUB(Tc, Td); + To = VADD(Tc, Td); + Tf = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tg = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Th = VSUB(Tf, Tg); + Tr = VADD(Tf, Tg); + } + Ti = VMUL(LDK(KP866025403), VSUB(Te, Th)); + Tp = VFNMS(LDK(KP500000000), To, Tn); + TJ = VADD(Tq, Tr); + TI = VADD(Tn, To); + Tx = VADD(Te, Th); + Ts = VFNMS(LDK(KP500000000), Tr, Tq); + } + { + V TN, TO, TP, TQ, TR, TS; + { + V TH, TK, TL, TM; + TH = VSUB(TF, TG); + TK = VBYI(VSUB(TI, TJ)); + TN = VSUB(TH, TK); + STM2(&(xo[6]), TN, ovs, &(xo[2])); + TO = VADD(TH, TK); + STM2(&(xo[18]), TO, ovs, &(xo[2])); + TL = VADD(TF, TG); + TM = VADD(TI, TJ); + TP = VSUB(TL, TM); + STM2(&(xo[12]), TP, ovs, &(xo[0])); + TQ = VADD(TL, TM); + STM2(&(xo[0]), TQ, ovs, &(xo[0])); + } + { + V Tj, Tv, Tu, Tw, Tb, Tt, TT, TU; + Tb = VSUB(T5, Ta); + Tj = VSUB(Tb, Ti); + Tv = VADD(Tb, Ti); + Tt = VSUB(Tp, Ts); + Tu = VBYI(VADD(Tm, Tt)); + Tw = VBYI(VSUB(Tt, Tm)); + TR = VSUB(Tj, Tu); + STM2(&(xo[22]), TR, ovs, &(xo[2])); + TS = VADD(Tv, Tw); + STM2(&(xo[10]), TS, ovs, &(xo[2])); + TT = VADD(Tj, Tu); + STM2(&(xo[2]), TT, ovs, &(xo[2])); + STN2(&(xo[0]), TQ, TT, ovs); + TU = VSUB(Tv, Tw); + STM2(&(xo[14]), TU, ovs, &(xo[2])); + STN2(&(xo[12]), TP, TU, ovs); + } + { + V Tz, TD, TC, TE, TA, TB; + Tz = VBYI(VMUL(LDK(KP866025403), VSUB(Tx, Ty))); + TD = VBYI(VMUL(LDK(KP866025403), VADD(Ty, Tx))); + TA = VADD(T5, Ta); + TB = VADD(Tp, Ts); + TC = VSUB(TA, TB); + TE = VADD(TA, TB); + { + V TV, TW, TX, TY; + TV = VADD(Tz, TC); + STM2(&(xo[4]), TV, ovs, &(xo[0])); + STN2(&(xo[4]), TV, TN, ovs); + TW = VSUB(TE, TD); + STM2(&(xo[16]), TW, ovs, &(xo[0])); + STN2(&(xo[16]), TW, TO, ovs); + TX = VSUB(TC, Tz); + STM2(&(xo[20]), TX, ovs, &(xo[0])); + STN2(&(xo[20]), TX, TR, ovs); + TY = VADD(TD, TE); + STM2(&(xo[8]), TY, ovs, &(xo[0])); + STN2(&(xo[8]), TY, TS, ovs); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 12, XSIMD_STRING("n2bv_12"), {44, 4, 4, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_12) (planner *p) { + X(kdft_register) (p, n2bv_12, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,369 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:59 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 14 -name n2bv_14 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 74 FP additions, 48 FP multiplications, + * (or, 32 additions, 6 multiplications, 42 fused multiply/add), + * 65 stack variables, 6 constants, and 35 memory accesses + */ +#include "n2b.h" + +static void n2bv_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP801937735, +0.801937735804838252472204639014890102331838324); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + DVK(KP692021471, +0.692021471630095869627814897002069140197260599); + DVK(KP554958132, +0.554958132087371191422194871006410481067288862); + DVK(KP356895867, +0.356895867892209443894399510021300583399127187); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) { + V TH, T3, TP, Tn, Ta, Tu, TU, TK, TO, Tk, TM, Tg, TL, Td, T1; + V T2; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + { + V Ti, TI, T6, TJ, T9, Tj, Te, Tf, Tb, Tc; + { + V T4, T5, T7, T8, Tl, Tm; + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tl = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tm = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Ti = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + TH = VADD(T1, T2); + T3 = VSUB(T1, T2); + TI = VADD(T4, T5); + T6 = VSUB(T4, T5); + TJ = VADD(T7, T8); + T9 = VSUB(T7, T8); + TP = VADD(Tl, Tm); + Tn = VSUB(Tl, Tm); + Tj = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Te = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + } + Ta = VADD(T6, T9); + Tu = VSUB(T6, T9); + TU = VSUB(TI, TJ); + TK = VADD(TI, TJ); + TO = VADD(Ti, Tj); + Tk = VSUB(Ti, Tj); + TM = VADD(Te, Tf); + Tg = VSUB(Te, Tf); + TL = VADD(Tb, Tc); + Td = VSUB(Tb, Tc); + } + { + V T19, T1a, T13, TG, TY, T18, TB, Tw, TT, Tz, T11, T16, TE, Tr, TV; + V TQ; + TV = VSUB(TP, TO); + TQ = VADD(TO, TP); + { + V Ts, To, TW, TN; + Ts = VSUB(Tk, Tn); + To = VADD(Tk, Tn); + TW = VSUB(TM, TL); + TN = VADD(TL, TM); + { + V Tt, Th, TR, T12; + Tt = VSUB(Td, Tg); + Th = VADD(Td, Tg); + TR = VFNMS(LDK(KP356895867), TK, TQ); + T12 = VFNMS(LDK(KP554958132), TV, TU); + { + V Tx, TF, TZ, T14; + Tx = VFNMS(LDK(KP356895867), Ta, To); + TF = VFMA(LDK(KP554958132), Ts, Tu); + T19 = VADD(TH, VADD(TK, VADD(TN, TQ))); + STM2(&(xo[0]), T19, ovs, &(xo[0])); + TZ = VFNMS(LDK(KP356895867), TN, TK); + T14 = VFNMS(LDK(KP356895867), TQ, TN); + { + V TX, T17, TC, Tp; + TX = VFMA(LDK(KP554958132), TW, TV); + T17 = VFMA(LDK(KP554958132), TU, TW); + T1a = VADD(T3, VADD(Ta, VADD(Th, To))); + STM2(&(xo[14]), T1a, ovs, &(xo[2])); + TC = VFNMS(LDK(KP356895867), Th, Ta); + Tp = VFNMS(LDK(KP356895867), To, Th); + { + V TA, Tv, TS, Ty; + TA = VFMA(LDK(KP554958132), Tt, Ts); + Tv = VFNMS(LDK(KP554958132), Tu, Tt); + TS = VFNMS(LDK(KP692021471), TR, TN); + T13 = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), T12, TW)); + Ty = VFNMS(LDK(KP692021471), Tx, Th); + TG = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), TF, Tt)); + { + V T10, T15, TD, Tq; + T10 = VFNMS(LDK(KP692021471), TZ, TQ); + T15 = VFNMS(LDK(KP692021471), T14, TK); + TY = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), TX, TU)); + T18 = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), T17, TV)); + TD = VFNMS(LDK(KP692021471), TC, To); + Tq = VFNMS(LDK(KP692021471), Tp, Ta); + TB = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), TA, Tu)); + Tw = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tv, Ts)); + TT = VFNMS(LDK(KP900968867), TS, TH); + Tz = VFNMS(LDK(KP900968867), Ty, T3); + T11 = VFNMS(LDK(KP900968867), T10, TH); + T16 = VFNMS(LDK(KP900968867), T15, TH); + TE = VFNMS(LDK(KP900968867), TD, T3); + Tr = VFNMS(LDK(KP900968867), Tq, T3); + } + } + } + } + } + } + { + V T1b, T1c, T1d, T1e; + T1b = VFMAI(TY, TT); + STM2(&(xo[4]), T1b, ovs, &(xo[0])); + T1c = VFNMSI(TY, TT); + STM2(&(xo[24]), T1c, ovs, &(xo[0])); + T1d = VFMAI(TB, Tz); + STM2(&(xo[18]), T1d, ovs, &(xo[2])); + T1e = VFNMSI(TB, Tz); + STM2(&(xo[10]), T1e, ovs, &(xo[2])); + { + V T1f, T1g, T1h, T1i; + T1f = VFMAI(T13, T11); + STM2(&(xo[12]), T1f, ovs, &(xo[0])); + STN2(&(xo[12]), T1f, T1a, ovs); + T1g = VFNMSI(T13, T11); + STM2(&(xo[16]), T1g, ovs, &(xo[0])); + STN2(&(xo[16]), T1g, T1d, ovs); + T1h = VFMAI(T18, T16); + STM2(&(xo[8]), T1h, ovs, &(xo[0])); + STN2(&(xo[8]), T1h, T1e, ovs); + T1i = VFNMSI(T18, T16); + STM2(&(xo[20]), T1i, ovs, &(xo[0])); + { + V T1j, T1k, T1l, T1m; + T1j = VFNMSI(TG, TE); + STM2(&(xo[26]), T1j, ovs, &(xo[2])); + STN2(&(xo[24]), T1c, T1j, ovs); + T1k = VFMAI(TG, TE); + STM2(&(xo[2]), T1k, ovs, &(xo[2])); + STN2(&(xo[0]), T19, T1k, ovs); + T1l = VFNMSI(Tw, Tr); + STM2(&(xo[22]), T1l, ovs, &(xo[2])); + STN2(&(xo[20]), T1i, T1l, ovs); + T1m = VFMAI(Tw, Tr); + STM2(&(xo[6]), T1m, ovs, &(xo[2])); + STN2(&(xo[4]), T1b, T1m, ovs); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 14, XSIMD_STRING("n2bv_14"), {32, 6, 42, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_14) (planner *p) { + X(kdft_register) (p, n2bv_14, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 14 -name n2bv_14 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 74 FP additions, 36 FP multiplications, + * (or, 50 additions, 12 multiplications, 24 fused multiply/add), + * 41 stack variables, 6 constants, and 35 memory accesses + */ +#include "n2b.h" + +static void n2bv_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP222520933, +0.222520933956314404288902564496794759466355569); + DVK(KP623489801, +0.623489801858733530525004884004239810632274731); + DVK(KP781831482, +0.781831482468029808708444526674057750232334519); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + DVK(KP433883739, +0.433883739117558120475768332848358754609990728); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) { + V Tp, Ty, Tl, TL, Tq, TE, T7, TJ, Ts, TB, Te, TK, Tr, TH, Tn; + V To; + Tn = LD(&(xi[0]), ivs, &(xi[0])); + To = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Tp = VSUB(Tn, To); + Ty = VADD(Tn, To); + { + V Th, TC, Tk, TD; + { + V Tf, Tg, Ti, Tj; + Tf = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Th = VSUB(Tf, Tg); + TC = VADD(Tf, Tg); + Ti = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tj = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tk = VSUB(Ti, Tj); + TD = VADD(Ti, Tj); + } + Tl = VSUB(Th, Tk); + TL = VSUB(TD, TC); + Tq = VADD(Th, Tk); + TE = VADD(TC, TD); + } + { + V T3, Tz, T6, TA; + { + V T1, T2, T4, T5; + T1 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + Tz = VADD(T1, T2); + T4 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + TA = VADD(T4, T5); + } + T7 = VSUB(T3, T6); + TJ = VSUB(Tz, TA); + Ts = VADD(T3, T6); + TB = VADD(Tz, TA); + } + { + V Ta, TF, Td, TG; + { + V T8, T9, Tb, Tc; + T8 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T9 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Ta = VSUB(T8, T9); + TF = VADD(T8, T9); + Tb = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Td = VSUB(Tb, Tc); + TG = VADD(Tb, Tc); + } + Te = VSUB(Ta, Td); + TK = VSUB(TG, TF); + Tr = VADD(Ta, Td); + TH = VADD(TF, TG); + } + { + V TR, TS, TU, TV; + TR = VADD(Tp, VADD(Ts, VADD(Tq, Tr))); + STM2(&(xo[14]), TR, ovs, &(xo[2])); + TS = VADD(Ty, VADD(TB, VADD(TE, TH))); + STM2(&(xo[0]), TS, ovs, &(xo[0])); + { + V TT, Tm, Tt, TQ, TP, TW; + Tm = VBYI(VFMA(LDK(KP433883739), T7, VFNMS(LDK(KP781831482), Tl, VMUL(LDK(KP974927912), Te)))); + Tt = VFMA(LDK(KP623489801), Tq, VFNMS(LDK(KP222520933), Tr, VFNMS(LDK(KP900968867), Ts, Tp))); + TT = VADD(Tm, Tt); + STM2(&(xo[6]), TT, ovs, &(xo[2])); + TU = VSUB(Tt, Tm); + STM2(&(xo[22]), TU, ovs, &(xo[2])); + TQ = VBYI(VFMA(LDK(KP974927912), TJ, VFMA(LDK(KP433883739), TL, VMUL(LDK(KP781831482), TK)))); + TP = VFMA(LDK(KP623489801), TH, VFNMS(LDK(KP900968867), TE, VFNMS(LDK(KP222520933), TB, Ty))); + TV = VSUB(TP, TQ); + STM2(&(xo[24]), TV, ovs, &(xo[0])); + TW = VADD(TP, TQ); + STM2(&(xo[4]), TW, ovs, &(xo[0])); + STN2(&(xo[4]), TW, TT, ovs); + } + { + V T10, TM, TI, TZ; + { + V Tu, Tv, TX, TY; + Tu = VBYI(VFMA(LDK(KP781831482), T7, VFMA(LDK(KP974927912), Tl, VMUL(LDK(KP433883739), Te)))); + Tv = VFMA(LDK(KP623489801), Ts, VFNMS(LDK(KP900968867), Tr, VFNMS(LDK(KP222520933), Tq, Tp))); + TX = VADD(Tu, Tv); + STM2(&(xo[2]), TX, ovs, &(xo[2])); + STN2(&(xo[0]), TS, TX, ovs); + TY = VSUB(Tv, Tu); + STM2(&(xo[26]), TY, ovs, &(xo[2])); + STN2(&(xo[24]), TV, TY, ovs); + } + TM = VBYI(VFNMS(LDK(KP433883739), TK, VFNMS(LDK(KP974927912), TL, VMUL(LDK(KP781831482), TJ)))); + TI = VFMA(LDK(KP623489801), TB, VFNMS(LDK(KP900968867), TH, VFNMS(LDK(KP222520933), TE, Ty))); + TZ = VSUB(TI, TM); + STM2(&(xo[12]), TZ, ovs, &(xo[0])); + STN2(&(xo[12]), TZ, TR, ovs); + T10 = VADD(TI, TM); + STM2(&(xo[16]), T10, ovs, &(xo[0])); + { + V T11, TO, TN, T12; + TO = VBYI(VFMA(LDK(KP433883739), TJ, VFNMS(LDK(KP974927912), TK, VMUL(LDK(KP781831482), TL)))); + TN = VFMA(LDK(KP623489801), TE, VFNMS(LDK(KP222520933), TH, VFNMS(LDK(KP900968867), TB, Ty))); + T11 = VSUB(TN, TO); + STM2(&(xo[8]), T11, ovs, &(xo[0])); + T12 = VADD(TN, TO); + STM2(&(xo[20]), T12, ovs, &(xo[0])); + STN2(&(xo[20]), T12, TU, ovs); + { + V Tx, Tw, T13, T14; + Tx = VBYI(VFNMS(LDK(KP781831482), Te, VFNMS(LDK(KP433883739), Tl, VMUL(LDK(KP974927912), T7)))); + Tw = VFMA(LDK(KP623489801), Tr, VFNMS(LDK(KP900968867), Tq, VFNMS(LDK(KP222520933), Ts, Tp))); + T13 = VSUB(Tw, Tx); + STM2(&(xo[10]), T13, ovs, &(xo[2])); + STN2(&(xo[8]), T11, T13, ovs); + T14 = VADD(Tx, Tw); + STM2(&(xo[18]), T14, ovs, &(xo[2])); + STN2(&(xo[16]), T10, T14, ovs); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 14, XSIMD_STRING("n2bv_14"), {50, 12, 24, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_14) (planner *p) { + X(kdft_register) (p, n2bv_14, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,412 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:01 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 16 -name n2bv_16 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 72 FP additions, 34 FP multiplications, + * (or, 38 additions, 0 multiplications, 34 fused multiply/add), + * 62 stack variables, 3 constants, and 40 memory accesses + */ +#include "n2b.h" + +static void n2bv_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(32, is), MAKE_VOLATILE_STRIDE(32, os)) { + V T7, Tu, TF, TB, T13, TL, TO, TX, TC, Te, TP, Th, TQ, Tk, TW; + V T16; + { + V TH, TU, Tz, Tf, TK, TV, TA, TM, Ta, TN, Td, Tg, Ti, Tj; + { + V T1, T2, T4, T5, To, Tp, Tr, Ts; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + To = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tp = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tr = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + Ts = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + { + V T8, TI, Tq, TJ, Tt, T9, Tb, Tc, T3, T6; + T8 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + TH = VSUB(T1, T2); + T3 = VADD(T1, T2); + TU = VSUB(T4, T5); + T6 = VADD(T4, T5); + TI = VSUB(To, Tp); + Tq = VADD(To, Tp); + TJ = VSUB(Tr, Ts); + Tt = VADD(Tr, Ts); + T9 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tb = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tc = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T7 = VSUB(T3, T6); + Tz = VADD(T3, T6); + Tf = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + TK = VADD(TI, TJ); + TV = VSUB(TI, TJ); + TA = VADD(Tq, Tt); + Tu = VSUB(Tq, Tt); + TM = VSUB(T8, T9); + Ta = VADD(T8, T9); + TN = VSUB(Tb, Tc); + Td = VADD(Tb, Tc); + Tg = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Ti = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tj = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + } + } + TF = VADD(Tz, TA); + TB = VSUB(Tz, TA); + T13 = VFNMS(LDK(KP707106781), TK, TH); + TL = VFMA(LDK(KP707106781), TK, TH); + TO = VFNMS(LDK(KP414213562), TN, TM); + TX = VFMA(LDK(KP414213562), TM, TN); + TC = VADD(Ta, Td); + Te = VSUB(Ta, Td); + TP = VSUB(Tf, Tg); + Th = VADD(Tf, Tg); + TQ = VSUB(Tj, Ti); + Tk = VADD(Ti, Tj); + TW = VFMA(LDK(KP707106781), TV, TU); + T16 = VFNMS(LDK(KP707106781), TV, TU); + } + { + V TY, TR, Tl, TD; + TY = VFMA(LDK(KP414213562), TP, TQ); + TR = VFNMS(LDK(KP414213562), TQ, TP); + Tl = VSUB(Th, Tk); + TD = VADD(Th, Tk); + { + V TS, T17, TZ, T14; + TS = VADD(TO, TR); + T17 = VSUB(TO, TR); + TZ = VSUB(TX, TY); + T14 = VADD(TX, TY); + { + V TE, TG, Tm, Tv; + TE = VSUB(TC, TD); + TG = VADD(TC, TD); + Tm = VADD(Te, Tl); + Tv = VSUB(Te, Tl); + { + V T18, T1a, TT, T11; + T18 = VFMA(LDK(KP923879532), T17, T16); + T1a = VFNMS(LDK(KP923879532), T17, T16); + TT = VFNMS(LDK(KP923879532), TS, TL); + T11 = VFMA(LDK(KP923879532), TS, TL); + { + V T15, T19, T10, T12; + T15 = VFNMS(LDK(KP923879532), T14, T13); + T19 = VFMA(LDK(KP923879532), T14, T13); + T10 = VFNMS(LDK(KP923879532), TZ, TW); + T12 = VFMA(LDK(KP923879532), TZ, TW); + { + V T1b, T1c, T1d, T1e; + T1b = VADD(TF, TG); + STM2(&(xo[0]), T1b, ovs, &(xo[0])); + T1c = VSUB(TF, TG); + STM2(&(xo[16]), T1c, ovs, &(xo[0])); + T1d = VFMAI(TE, TB); + STM2(&(xo[8]), T1d, ovs, &(xo[0])); + T1e = VFNMSI(TE, TB); + STM2(&(xo[24]), T1e, ovs, &(xo[0])); + { + V Tw, Ty, Tn, Tx; + Tw = VFNMS(LDK(KP707106781), Tv, Tu); + Ty = VFMA(LDK(KP707106781), Tv, Tu); + Tn = VFNMS(LDK(KP707106781), Tm, T7); + Tx = VFMA(LDK(KP707106781), Tm, T7); + { + V T1f, T1g, T1h, T1i; + T1f = VFNMSI(T1a, T19); + STM2(&(xo[6]), T1f, ovs, &(xo[2])); + T1g = VFMAI(T1a, T19); + STM2(&(xo[26]), T1g, ovs, &(xo[2])); + STN2(&(xo[24]), T1e, T1g, ovs); + T1h = VFNMSI(T18, T15); + STM2(&(xo[22]), T1h, ovs, &(xo[2])); + T1i = VFMAI(T18, T15); + STM2(&(xo[10]), T1i, ovs, &(xo[2])); + STN2(&(xo[8]), T1d, T1i, ovs); + { + V T1j, T1k, T1l, T1m; + T1j = VFNMSI(T12, T11); + STM2(&(xo[30]), T1j, ovs, &(xo[2])); + T1k = VFMAI(T12, T11); + STM2(&(xo[2]), T1k, ovs, &(xo[2])); + STN2(&(xo[0]), T1b, T1k, ovs); + T1l = VFMAI(T10, TT); + STM2(&(xo[18]), T1l, ovs, &(xo[2])); + STN2(&(xo[16]), T1c, T1l, ovs); + T1m = VFNMSI(T10, TT); + STM2(&(xo[14]), T1m, ovs, &(xo[2])); + { + V T1n, T1o, T1p, T1q; + T1n = VFMAI(Ty, Tx); + STM2(&(xo[4]), T1n, ovs, &(xo[0])); + STN2(&(xo[4]), T1n, T1f, ovs); + T1o = VFNMSI(Ty, Tx); + STM2(&(xo[28]), T1o, ovs, &(xo[0])); + STN2(&(xo[28]), T1o, T1j, ovs); + T1p = VFMAI(Tw, Tn); + STM2(&(xo[20]), T1p, ovs, &(xo[0])); + STN2(&(xo[20]), T1p, T1h, ovs); + T1q = VFNMSI(Tw, Tn); + STM2(&(xo[12]), T1q, ovs, &(xo[0])); + STN2(&(xo[12]), T1q, T1m, ovs); + } + } + } + } + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 16, XSIMD_STRING("n2bv_16"), {38, 0, 34, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_16) (planner *p) { + X(kdft_register) (p, n2bv_16, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 16 -name n2bv_16 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 72 FP additions, 12 FP multiplications, + * (or, 68 additions, 8 multiplications, 4 fused multiply/add), + * 38 stack variables, 3 constants, and 40 memory accesses + */ +#include "n2b.h" + +static void n2bv_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(32, is), MAKE_VOLATILE_STRIDE(32, os)) { + V Tp, T13, Tu, TY, Tm, T14, Tv, TU, T7, T16, Tx, TN, Te, T17, Ty; + V TQ; + { + V Tn, To, TX, Ts, Tt, TW; + Tn = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + To = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + TX = VADD(Tn, To); + Ts = LD(&(xi[0]), ivs, &(xi[0])); + Tt = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + TW = VADD(Ts, Tt); + Tp = VSUB(Tn, To); + T13 = VADD(TW, TX); + Tu = VSUB(Ts, Tt); + TY = VSUB(TW, TX); + } + { + V Ti, TS, Tl, TT; + { + V Tg, Th, Tj, Tk; + Tg = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Th = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Ti = VSUB(Tg, Th); + TS = VADD(Tg, Th); + Tj = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tl = VSUB(Tj, Tk); + TT = VADD(Tj, Tk); + } + Tm = VMUL(LDK(KP707106781), VSUB(Ti, Tl)); + T14 = VADD(TS, TT); + Tv = VMUL(LDK(KP707106781), VADD(Ti, Tl)); + TU = VSUB(TS, TT); + } + { + V T3, TL, T6, TM; + { + V T1, T2, T4, T5; + T1 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T2 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + TL = VADD(T1, T2); + T4 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + TM = VADD(T4, T5); + } + T7 = VFNMS(LDK(KP382683432), T6, VMUL(LDK(KP923879532), T3)); + T16 = VADD(TL, TM); + Tx = VFMA(LDK(KP382683432), T3, VMUL(LDK(KP923879532), T6)); + TN = VSUB(TL, TM); + } + { + V Ta, TO, Td, TP; + { + V T8, T9, Tb, Tc; + T8 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T9 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Ta = VSUB(T8, T9); + TO = VADD(T8, T9); + Tb = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tc = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Td = VSUB(Tb, Tc); + TP = VADD(Tb, Tc); + } + Te = VFMA(LDK(KP923879532), Ta, VMUL(LDK(KP382683432), Td)); + T17 = VADD(TO, TP); + Ty = VFNMS(LDK(KP382683432), Ta, VMUL(LDK(KP923879532), Td)); + TQ = VSUB(TO, TP); + } + { + V T1b, T1c, T1d, T1e; + { + V T15, T18, T19, T1a; + T15 = VSUB(T13, T14); + T18 = VBYI(VSUB(T16, T17)); + T1b = VSUB(T15, T18); + STM2(&(xo[24]), T1b, ovs, &(xo[0])); + T1c = VADD(T15, T18); + STM2(&(xo[8]), T1c, ovs, &(xo[0])); + T19 = VADD(T13, T14); + T1a = VADD(T16, T17); + T1d = VSUB(T19, T1a); + STM2(&(xo[16]), T1d, ovs, &(xo[0])); + T1e = VADD(T19, T1a); + STM2(&(xo[0]), T1e, ovs, &(xo[0])); + } + { + V T1f, T1g, T1h, T1i; + { + V TV, T11, T10, T12, TR, TZ; + TR = VMUL(LDK(KP707106781), VSUB(TN, TQ)); + TV = VBYI(VSUB(TR, TU)); + T11 = VBYI(VADD(TU, TR)); + TZ = VMUL(LDK(KP707106781), VADD(TN, TQ)); + T10 = VSUB(TY, TZ); + T12 = VADD(TY, TZ); + T1f = VADD(TV, T10); + STM2(&(xo[12]), T1f, ovs, &(xo[0])); + T1g = VSUB(T12, T11); + STM2(&(xo[28]), T1g, ovs, &(xo[0])); + T1h = VSUB(T10, TV); + STM2(&(xo[20]), T1h, ovs, &(xo[0])); + T1i = VADD(T11, T12); + STM2(&(xo[4]), T1i, ovs, &(xo[0])); + } + { + V Tr, TB, TA, TC; + { + V Tf, Tq, Tw, Tz; + Tf = VSUB(T7, Te); + Tq = VSUB(Tm, Tp); + Tr = VBYI(VSUB(Tf, Tq)); + TB = VBYI(VADD(Tq, Tf)); + Tw = VSUB(Tu, Tv); + Tz = VSUB(Tx, Ty); + TA = VSUB(Tw, Tz); + TC = VADD(Tw, Tz); + } + { + V T1j, T1k, T1l, T1m; + T1j = VADD(Tr, TA); + STM2(&(xo[10]), T1j, ovs, &(xo[2])); + STN2(&(xo[8]), T1c, T1j, ovs); + T1k = VSUB(TC, TB); + STM2(&(xo[26]), T1k, ovs, &(xo[2])); + STN2(&(xo[24]), T1b, T1k, ovs); + T1l = VSUB(TA, Tr); + STM2(&(xo[22]), T1l, ovs, &(xo[2])); + STN2(&(xo[20]), T1h, T1l, ovs); + T1m = VADD(TB, TC); + STM2(&(xo[6]), T1m, ovs, &(xo[2])); + STN2(&(xo[4]), T1i, T1m, ovs); + } + } + { + V TF, TJ, TI, TK; + { + V TD, TE, TG, TH; + TD = VADD(Tu, Tv); + TE = VADD(T7, Te); + TF = VADD(TD, TE); + TJ = VSUB(TD, TE); + TG = VADD(Tp, Tm); + TH = VADD(Tx, Ty); + TI = VBYI(VADD(TG, TH)); + TK = VBYI(VSUB(TH, TG)); + } + { + V T1n, T1o, T1p, T1q; + T1n = VSUB(TF, TI); + STM2(&(xo[30]), T1n, ovs, &(xo[2])); + STN2(&(xo[28]), T1g, T1n, ovs); + T1o = VADD(TJ, TK); + STM2(&(xo[14]), T1o, ovs, &(xo[2])); + STN2(&(xo[12]), T1f, T1o, ovs); + T1p = VADD(TF, TI); + STM2(&(xo[2]), T1p, ovs, &(xo[2])); + STN2(&(xo[0]), T1e, T1p, ovs); + T1q = VSUB(TJ, TK); + STM2(&(xo[18]), T1q, ovs, &(xo[2])); + STN2(&(xo[16]), T1d, T1q, ovs); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 16, XSIMD_STRING("n2bv_16"), {68, 8, 4, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_16) (planner *p) { + X(kdft_register) (p, n2bv_16, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:58 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 2 -name n2bv_2 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 2 FP additions, 0 FP multiplications, + * (or, 2 additions, 0 multiplications, 0 fused multiply/add), + * 7 stack variables, 0 constants, and 5 memory accesses + */ +#include "n2b.h" + +static void n2bv_2(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(4, is), MAKE_VOLATILE_STRIDE(4, os)) { + V T1, T2, T3, T4; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T3 = VADD(T1, T2); + STM2(&(xo[0]), T3, ovs, &(xo[0])); + T4 = VSUB(T1, T2); + STM2(&(xo[2]), T4, ovs, &(xo[2])); + STN2(&(xo[0]), T3, T4, ovs); + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 2, XSIMD_STRING("n2bv_2"), {2, 0, 0, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_2) (planner *p) { + X(kdft_register) (p, n2bv_2, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 2 -name n2bv_2 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 2 FP additions, 0 FP multiplications, + * (or, 2 additions, 0 multiplications, 0 fused multiply/add), + * 7 stack variables, 0 constants, and 5 memory accesses + */ +#include "n2b.h" + +static void n2bv_2(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(4, is), MAKE_VOLATILE_STRIDE(4, os)) { + V T1, T2, T3, T4; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + STM2(&(xo[2]), T3, ovs, &(xo[2])); + T4 = VADD(T1, T2); + STM2(&(xo[0]), T4, ovs, &(xo[0])); + STN2(&(xo[0]), T4, T3, ovs); + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 2, XSIMD_STRING("n2bv_2"), {2, 0, 0, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_2) (planner *p) { + X(kdft_register) (p, n2bv_2, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,495 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:02 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 20 -name n2bv_20 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 104 FP additions, 50 FP multiplications, + * (or, 58 additions, 4 multiplications, 46 fused multiply/add), + * 79 stack variables, 4 constants, and 50 memory accesses + */ +#include "n2b.h" + +static void n2bv_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(40, is), MAKE_VOLATILE_STRIDE(40, os)) { + V T1H, T1I, TS, TA, TN, TV, T1M, T1N, T1O, T1P, T1R, T1S, TK, TU, TR; + V Tl; + { + V T3, TE, T1r, T13, Ta, TL, Tz, TG, Ts, TF, Th, TM, T1u, T1C, T1n; + V T1a, T1m, T1h, T1x, T1D, Tk, Ti; + { + V T1, T2, TC, TD; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + TC = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + TD = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + { + V T14, T6, T1c, Tv, Tm, T1f, Ty, T17, T9, Tn, Tp, T1b, Td, Tq, Te; + V Tf, T15, To; + { + V Tw, Tx, T7, T8, Tb, Tc; + { + V T4, T5, Tt, Tu, T11, T12; + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + Tt = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Tu = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tw = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + T11 = VADD(T1, T2); + TE = VSUB(TC, TD); + T12 = VADD(TC, TD); + T14 = VADD(T4, T5); + T6 = VSUB(T4, T5); + T1c = VADD(Tt, Tu); + Tv = VSUB(Tt, Tu); + Tx = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T1r = VADD(T11, T12); + T13 = VSUB(T11, T12); + } + Tb = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Tm = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T1f = VADD(Tw, Tx); + Ty = VSUB(Tw, Tx); + T17 = VADD(T7, T8); + T9 = VSUB(T7, T8); + Tn = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + Tp = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T1b = VADD(Tb, Tc); + Td = VSUB(Tb, Tc); + Tq = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Te = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + } + Ta = VADD(T6, T9); + TL = VSUB(T6, T9); + T15 = VADD(Tm, Tn); + To = VSUB(Tm, Tn); + Tz = VSUB(Tv, Ty); + TG = VADD(Tv, Ty); + { + V T1d, T1v, T18, Tr, T1e, Tg, T16, T1s; + T1d = VSUB(T1b, T1c); + T1v = VADD(T1b, T1c); + T18 = VADD(Tp, Tq); + Tr = VSUB(Tp, Tq); + T1e = VADD(Te, Tf); + Tg = VSUB(Te, Tf); + T16 = VSUB(T14, T15); + T1s = VADD(T14, T15); + { + V T1t, T19, T1w, T1g; + T1t = VADD(T17, T18); + T19 = VSUB(T17, T18); + Ts = VSUB(To, Tr); + TF = VADD(To, Tr); + T1w = VADD(T1e, T1f); + T1g = VSUB(T1e, T1f); + Th = VADD(Td, Tg); + TM = VSUB(Td, Tg); + T1u = VADD(T1s, T1t); + T1C = VSUB(T1s, T1t); + T1n = VSUB(T16, T19); + T1a = VADD(T16, T19); + T1m = VSUB(T1d, T1g); + T1h = VADD(T1d, T1g); + T1x = VADD(T1v, T1w); + T1D = VSUB(T1v, T1w); + } + } + } + } + Tk = VSUB(Ta, Th); + Ti = VADD(Ta, Th); + { + V TJ, T1k, T1A, TZ, Tj, T1E, T1G, TI, T10, T1j, T1z, T1i, T1y, TH; + TJ = VSUB(TF, TG); + TH = VADD(TF, TG); + T1i = VADD(T1a, T1h); + T1k = VSUB(T1a, T1h); + T1y = VADD(T1u, T1x); + T1A = VSUB(T1u, T1x); + TZ = VADD(T3, Ti); + Tj = VFNMS(LDK(KP250000000), Ti, T3); + T1E = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1D, T1C)); + T1G = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1C, T1D)); + TI = VFNMS(LDK(KP250000000), TH, TE); + T10 = VADD(TE, TH); + T1j = VFNMS(LDK(KP250000000), T1i, T13); + T1H = VADD(T1r, T1y); + STM2(&(xo[0]), T1H, ovs, &(xo[0])); + T1z = VFNMS(LDK(KP250000000), T1y, T1r); + T1I = VADD(T13, T1i); + STM2(&(xo[20]), T1I, ovs, &(xo[0])); + { + V T1J, T1K, T1p, T1l, T1o, T1q, T1F, T1B, T1L, T1Q; + TS = VFNMS(LDK(KP618033988), Ts, Tz); + TA = VFMA(LDK(KP618033988), Tz, Ts); + TN = VFMA(LDK(KP618033988), TM, TL); + TV = VFNMS(LDK(KP618033988), TL, TM); + T1J = VFMAI(T10, TZ); + STM2(&(xo[10]), T1J, ovs, &(xo[2])); + T1K = VFNMSI(T10, TZ); + STM2(&(xo[30]), T1K, ovs, &(xo[2])); + T1p = VFMA(LDK(KP559016994), T1k, T1j); + T1l = VFNMS(LDK(KP559016994), T1k, T1j); + T1o = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1n, T1m)); + T1q = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1m, T1n)); + T1F = VFNMS(LDK(KP559016994), T1A, T1z); + T1B = VFMA(LDK(KP559016994), T1A, T1z); + T1L = VFNMSI(T1q, T1p); + STM2(&(xo[28]), T1L, ovs, &(xo[0])); + STN2(&(xo[28]), T1L, T1K, ovs); + T1M = VFMAI(T1q, T1p); + STM2(&(xo[12]), T1M, ovs, &(xo[0])); + T1N = VFMAI(T1o, T1l); + STM2(&(xo[36]), T1N, ovs, &(xo[0])); + T1O = VFNMSI(T1o, T1l); + STM2(&(xo[4]), T1O, ovs, &(xo[0])); + T1P = VFMAI(T1E, T1B); + STM2(&(xo[32]), T1P, ovs, &(xo[0])); + T1Q = VFNMSI(T1E, T1B); + STM2(&(xo[8]), T1Q, ovs, &(xo[0])); + STN2(&(xo[8]), T1Q, T1J, ovs); + T1R = VFNMSI(T1G, T1F); + STM2(&(xo[24]), T1R, ovs, &(xo[0])); + T1S = VFMAI(T1G, T1F); + STM2(&(xo[16]), T1S, ovs, &(xo[0])); + TK = VFMA(LDK(KP559016994), TJ, TI); + TU = VFNMS(LDK(KP559016994), TJ, TI); + TR = VFNMS(LDK(KP559016994), Tk, Tj); + Tl = VFMA(LDK(KP559016994), Tk, Tj); + } + } + } + { + V TY, TW, TO, TQ, TB, TP, TX, TT; + TY = VFMA(LDK(KP951056516), TV, TU); + TW = VFNMS(LDK(KP951056516), TV, TU); + TO = VFMA(LDK(KP951056516), TN, TK); + TQ = VFNMS(LDK(KP951056516), TN, TK); + TB = VFNMS(LDK(KP951056516), TA, Tl); + TP = VFMA(LDK(KP951056516), TA, Tl); + TX = VFNMS(LDK(KP951056516), TS, TR); + TT = VFMA(LDK(KP951056516), TS, TR); + { + V T1T, T1U, T1V, T1W; + T1T = VFMAI(TQ, TP); + STM2(&(xo[18]), T1T, ovs, &(xo[2])); + STN2(&(xo[16]), T1S, T1T, ovs); + T1U = VFNMSI(TQ, TP); + STM2(&(xo[22]), T1U, ovs, &(xo[2])); + STN2(&(xo[20]), T1I, T1U, ovs); + T1V = VFMAI(TO, TB); + STM2(&(xo[2]), T1V, ovs, &(xo[2])); + STN2(&(xo[0]), T1H, T1V, ovs); + T1W = VFNMSI(TO, TB); + STM2(&(xo[38]), T1W, ovs, &(xo[2])); + STN2(&(xo[36]), T1N, T1W, ovs); + { + V T1X, T1Y, T1Z, T20; + T1X = VFMAI(TW, TT); + STM2(&(xo[34]), T1X, ovs, &(xo[2])); + STN2(&(xo[32]), T1P, T1X, ovs); + T1Y = VFNMSI(TW, TT); + STM2(&(xo[6]), T1Y, ovs, &(xo[2])); + STN2(&(xo[4]), T1O, T1Y, ovs); + T1Z = VFMAI(TY, TX); + STM2(&(xo[26]), T1Z, ovs, &(xo[2])); + STN2(&(xo[24]), T1R, T1Z, ovs); + T20 = VFNMSI(TY, TX); + STM2(&(xo[14]), T20, ovs, &(xo[2])); + STN2(&(xo[12]), T1M, T20, ovs); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 20, XSIMD_STRING("n2bv_20"), {58, 4, 46, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_20) (planner *p) { + X(kdft_register) (p, n2bv_20, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 20 -name n2bv_20 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 104 FP additions, 24 FP multiplications, + * (or, 92 additions, 12 multiplications, 12 fused multiply/add), + * 57 stack variables, 4 constants, and 50 memory accesses + */ +#include "n2b.h" + +static void n2bv_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(40, is), MAKE_VOLATILE_STRIDE(40, os)) { + V T3, T1y, TH, T1i, Ts, TL, TM, Tz, T13, T16, T1j, T1u, T1v, T1w, T1r; + V T1s, T1t, T1a, T1d, T1k, Ti, Tk, TE, TI; + { + V T1, T2, T1g, TF, TG, T1h; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T1g = VADD(T1, T2); + TF = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + TG = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T1h = VADD(TF, TG); + T3 = VSUB(T1, T2); + T1y = VADD(T1g, T1h); + TH = VSUB(TF, TG); + T1i = VSUB(T1g, T1h); + } + { + V T6, T11, Tv, T19, Ty, T1c, T9, T14, Td, T18, To, T12, Tr, T15, Tg; + V T1b; + { + V T4, T5, Tt, Tu; + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + T6 = VSUB(T4, T5); + T11 = VADD(T4, T5); + Tt = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Tu = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tv = VSUB(Tt, Tu); + T19 = VADD(Tt, Tu); + } + { + V Tw, Tx, T7, T8; + Tw = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + Tx = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Ty = VSUB(Tw, Tx); + T1c = VADD(Tw, Tx); + T7 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T9 = VSUB(T7, T8); + T14 = VADD(T7, T8); + } + { + V Tb, Tc, Tm, Tn; + Tb = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Td = VSUB(Tb, Tc); + T18 = VADD(Tb, Tc); + Tm = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tn = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + To = VSUB(Tm, Tn); + T12 = VADD(Tm, Tn); + } + { + V Tp, Tq, Te, Tf; + Tp = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tq = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Tr = VSUB(Tp, Tq); + T15 = VADD(Tp, Tq); + Te = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tg = VSUB(Te, Tf); + T1b = VADD(Te, Tf); + } + Ts = VSUB(To, Tr); + TL = VSUB(T6, T9); + TM = VSUB(Td, Tg); + Tz = VSUB(Tv, Ty); + T13 = VSUB(T11, T12); + T16 = VSUB(T14, T15); + T1j = VADD(T13, T16); + T1u = VADD(T18, T19); + T1v = VADD(T1b, T1c); + T1w = VADD(T1u, T1v); + T1r = VADD(T11, T12); + T1s = VADD(T14, T15); + T1t = VADD(T1r, T1s); + T1a = VSUB(T18, T19); + T1d = VSUB(T1b, T1c); + T1k = VADD(T1a, T1d); + { + V Ta, Th, TC, TD; + Ta = VADD(T6, T9); + Th = VADD(Td, Tg); + Ti = VADD(Ta, Th); + Tk = VMUL(LDK(KP559016994), VSUB(Ta, Th)); + TC = VADD(To, Tr); + TD = VADD(Tv, Ty); + TE = VMUL(LDK(KP559016994), VSUB(TC, TD)); + TI = VADD(TC, TD); + } + } + { + V T1H, T1J, T1K, T1L, T1N, T1I, TZ, T10; + TZ = VADD(T3, Ti); + T10 = VBYI(VADD(TH, TI)); + T1H = VSUB(TZ, T10); + STM2(&(xo[30]), T1H, ovs, &(xo[2])); + T1I = VADD(TZ, T10); + STM2(&(xo[10]), T1I, ovs, &(xo[2])); + { + V T1x, T1z, T1A, T1E, T1G, T1C, T1D, T1F, T1B, T1M; + T1x = VMUL(LDK(KP559016994), VSUB(T1t, T1w)); + T1z = VADD(T1t, T1w); + T1A = VFNMS(LDK(KP250000000), T1z, T1y); + T1C = VSUB(T1r, T1s); + T1D = VSUB(T1u, T1v); + T1E = VBYI(VFMA(LDK(KP951056516), T1C, VMUL(LDK(KP587785252), T1D))); + T1G = VBYI(VFNMS(LDK(KP951056516), T1D, VMUL(LDK(KP587785252), T1C))); + T1J = VADD(T1y, T1z); + STM2(&(xo[0]), T1J, ovs, &(xo[0])); + T1F = VSUB(T1A, T1x); + T1K = VSUB(T1F, T1G); + STM2(&(xo[16]), T1K, ovs, &(xo[0])); + T1L = VADD(T1G, T1F); + STM2(&(xo[24]), T1L, ovs, &(xo[0])); + T1B = VADD(T1x, T1A); + T1M = VSUB(T1B, T1E); + STM2(&(xo[8]), T1M, ovs, &(xo[0])); + STN2(&(xo[8]), T1M, T1I, ovs); + T1N = VADD(T1E, T1B); + STM2(&(xo[32]), T1N, ovs, &(xo[0])); + } + { + V T1O, T1P, T1R, T1S; + { + V T1n, T1l, T1m, T1f, T1p, T17, T1e, T1q, T1Q, T1o; + T1n = VMUL(LDK(KP559016994), VSUB(T1j, T1k)); + T1l = VADD(T1j, T1k); + T1m = VFNMS(LDK(KP250000000), T1l, T1i); + T17 = VSUB(T13, T16); + T1e = VSUB(T1a, T1d); + T1f = VBYI(VFNMS(LDK(KP951056516), T1e, VMUL(LDK(KP587785252), T17))); + T1p = VBYI(VFMA(LDK(KP951056516), T17, VMUL(LDK(KP587785252), T1e))); + T1O = VADD(T1i, T1l); + STM2(&(xo[20]), T1O, ovs, &(xo[0])); + T1q = VADD(T1n, T1m); + T1P = VADD(T1p, T1q); + STM2(&(xo[12]), T1P, ovs, &(xo[0])); + T1Q = VSUB(T1q, T1p); + STM2(&(xo[28]), T1Q, ovs, &(xo[0])); + STN2(&(xo[28]), T1Q, T1H, ovs); + T1o = VSUB(T1m, T1n); + T1R = VADD(T1f, T1o); + STM2(&(xo[4]), T1R, ovs, &(xo[0])); + T1S = VSUB(T1o, T1f); + STM2(&(xo[36]), T1S, ovs, &(xo[0])); + } + { + V TA, TN, TU, TS, TK, TV, Tl, TR, TJ, Tj; + TA = VFNMS(LDK(KP951056516), Tz, VMUL(LDK(KP587785252), Ts)); + TN = VFNMS(LDK(KP951056516), TM, VMUL(LDK(KP587785252), TL)); + TU = VFMA(LDK(KP951056516), TL, VMUL(LDK(KP587785252), TM)); + TS = VFMA(LDK(KP951056516), Ts, VMUL(LDK(KP587785252), Tz)); + TJ = VFNMS(LDK(KP250000000), TI, TH); + TK = VSUB(TE, TJ); + TV = VADD(TE, TJ); + Tj = VFNMS(LDK(KP250000000), Ti, T3); + Tl = VSUB(Tj, Tk); + TR = VADD(Tk, Tj); + { + V TB, TO, T1T, T1U; + TB = VSUB(Tl, TA); + TO = VBYI(VSUB(TK, TN)); + T1T = VSUB(TB, TO); + STM2(&(xo[34]), T1T, ovs, &(xo[2])); + STN2(&(xo[32]), T1N, T1T, ovs); + T1U = VADD(TB, TO); + STM2(&(xo[6]), T1U, ovs, &(xo[2])); + STN2(&(xo[4]), T1R, T1U, ovs); + } + { + V TX, TY, T1V, T1W; + TX = VADD(TR, TS); + TY = VBYI(VSUB(TV, TU)); + T1V = VSUB(TX, TY); + STM2(&(xo[22]), T1V, ovs, &(xo[2])); + STN2(&(xo[20]), T1O, T1V, ovs); + T1W = VADD(TX, TY); + STM2(&(xo[18]), T1W, ovs, &(xo[2])); + STN2(&(xo[16]), T1K, T1W, ovs); + } + { + V TP, TQ, T1X, T1Y; + TP = VADD(Tl, TA); + TQ = VBYI(VADD(TN, TK)); + T1X = VSUB(TP, TQ); + STM2(&(xo[26]), T1X, ovs, &(xo[2])); + STN2(&(xo[24]), T1L, T1X, ovs); + T1Y = VADD(TP, TQ); + STM2(&(xo[14]), T1Y, ovs, &(xo[2])); + STN2(&(xo[12]), T1P, T1Y, ovs); + } + { + V TT, TW, T1Z, T20; + TT = VSUB(TR, TS); + TW = VBYI(VADD(TU, TV)); + T1Z = VSUB(TT, TW); + STM2(&(xo[38]), T1Z, ovs, &(xo[2])); + STN2(&(xo[36]), T1S, T1Z, ovs); + T20 = VADD(TT, TW); + STM2(&(xo[2]), T20, ovs, &(xo[2])); + STN2(&(xo[0]), T1J, T20, ovs); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 20, XSIMD_STRING("n2bv_20"), {92, 12, 12, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_20) (planner *p) { + X(kdft_register) (p, n2bv_20, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,823 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:02 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 32 -name n2bv_32 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 186 FP additions, 98 FP multiplications, + * (or, 88 additions, 0 multiplications, 98 fused multiply/add), + * 120 stack variables, 7 constants, and 80 memory accesses + */ +#include "n2b.h" + +static void n2bv_32(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(64, is), MAKE_VOLATILE_STRIDE(64, os)) { + V T31, T32, T33, T34, T35, T36, T37, T38, T39, T3a, T3b, T3c, T1h, Tr, T3d; + V T3e, T3f, T3g, T1a, T1k, TI, T1b, T1L, T1P, T1I, T1G, T1O, T1Q, T1H, T1z; + V T1c, TZ; + { + V T2x, T1T, T2K, T1W, T1p, Tb, T1A, T16, Tu, TF, T2O, T2H, T2b, T2t, TY; + V T1w, TT, T1v, T20, T2C, Tj, Te, T2e, To, T2i, T23, T2D, TB, TG, Th; + V T2f, Tk; + { + V TL, TW, TP, TQ, T2F, T27, T28, TO; + { + V T1, T2, T12, T13, T4, T5, T7, T8; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T12 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T13 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + T7 = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + { + V TM, T25, T26, TN; + { + V TJ, T3, T14, T1U, T6, T1V, T9, TK, TU, TV, T1R, T1S, Ta, T15; + TJ = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + T1R = VADD(T1, T2); + T3 = VSUB(T1, T2); + T1S = VADD(T12, T13); + T14 = VSUB(T12, T13); + T1U = VADD(T4, T5); + T6 = VSUB(T4, T5); + T1V = VADD(T7, T8); + T9 = VSUB(T7, T8); + TK = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + TU = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + T2x = VSUB(T1R, T1S); + T1T = VADD(T1R, T1S); + TV = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + TM = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T2K = VSUB(T1U, T1V); + T1W = VADD(T1U, T1V); + Ta = VADD(T6, T9); + T15 = VSUB(T6, T9); + T25 = VADD(TJ, TK); + TL = VSUB(TJ, TK); + T26 = VADD(TV, TU); + TW = VSUB(TU, TV); + TN = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + TP = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + T1p = VFNMS(LDK(KP707106781), Ta, T3); + Tb = VFMA(LDK(KP707106781), Ta, T3); + T1A = VFNMS(LDK(KP707106781), T15, T14); + T16 = VFMA(LDK(KP707106781), T15, T14); + TQ = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + } + T2F = VSUB(T25, T26); + T27 = VADD(T25, T26); + T28 = VADD(TM, TN); + TO = VSUB(TM, TN); + } + } + { + V Ty, T21, Tx, Tz, T1Y, T1Z; + { + V Ts, Tt, TD, T29, TR, TE, Tv, Tw; + Ts = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tt = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + TD = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T29 = VADD(TP, TQ); + TR = VSUB(TP, TQ); + TE = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + Tv = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tw = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + Ty = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + T1Y = VADD(Ts, Tt); + Tu = VSUB(Ts, Tt); + { + V T2G, T2a, TX, TS; + T2G = VSUB(T29, T28); + T2a = VADD(T28, T29); + TX = VSUB(TR, TO); + TS = VADD(TO, TR); + T1Z = VADD(TD, TE); + TF = VSUB(TD, TE); + T21 = VADD(Tv, Tw); + Tx = VSUB(Tv, Tw); + T2O = VFMA(LDK(KP414213562), T2F, T2G); + T2H = VFNMS(LDK(KP414213562), T2G, T2F); + T2b = VSUB(T27, T2a); + T2t = VADD(T27, T2a); + TY = VFMA(LDK(KP707106781), TX, TW); + T1w = VFNMS(LDK(KP707106781), TX, TW); + TT = VFMA(LDK(KP707106781), TS, TL); + T1v = VFNMS(LDK(KP707106781), TS, TL); + Tz = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + } + } + T20 = VADD(T1Y, T1Z); + T2C = VSUB(T1Y, T1Z); + { + V Tc, Td, Tm, Tn; + Tc = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Tm = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + Tn = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + { + V Tf, TA, T22, Tg; + Tf = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + TA = VSUB(Ty, Tz); + T22 = VADD(Ty, Tz); + Tg = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + Tj = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + Te = VSUB(Tc, Td); + T2e = VADD(Tc, Td); + To = VSUB(Tm, Tn); + T2i = VADD(Tn, Tm); + T23 = VADD(T21, T22); + T2D = VSUB(T21, T22); + TB = VADD(Tx, TA); + TG = VSUB(Tx, TA); + Th = VSUB(Tf, Tg); + T2f = VADD(Tf, Tg); + Tk = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + } + } + } + } + { + V T1t, TH, T1s, TC, T2P, T2U, T2n, T2d, T2w, T2u, T1q, T19, T1B, Tq, T2W; + V T2M, T2B, T2T, T2v, T2r, T2o, T2m, T2X, T2I; + { + V T1X, T2p, T2E, T2N, T2s, T2y, T2g, T17, Ti, T2h, Tl, T2c, T2l, T24; + T1X = VSUB(T1T, T1W); + T2p = VADD(T1T, T1W); + T2E = VFNMS(LDK(KP414213562), T2D, T2C); + T2N = VFMA(LDK(KP414213562), T2C, T2D); + T2s = VADD(T20, T23); + T24 = VSUB(T20, T23); + T1t = VFNMS(LDK(KP707106781), TG, TF); + TH = VFMA(LDK(KP707106781), TG, TF); + T1s = VFNMS(LDK(KP707106781), TB, Tu); + TC = VFMA(LDK(KP707106781), TB, Tu); + T2y = VSUB(T2e, T2f); + T2g = VADD(T2e, T2f); + T17 = VFMA(LDK(KP414213562), Te, Th); + Ti = VFNMS(LDK(KP414213562), Th, Te); + T2h = VADD(Tj, Tk); + Tl = VSUB(Tj, Tk); + T2c = VADD(T24, T2b); + T2l = VSUB(T24, T2b); + { + V T2L, T2A, T2q, T2k; + T2P = VSUB(T2N, T2O); + T2U = VADD(T2N, T2O); + { + V T2z, T2j, T18, Tp; + T2z = VSUB(T2h, T2i); + T2j = VADD(T2h, T2i); + T18 = VFMA(LDK(KP414213562), Tl, To); + Tp = VFNMS(LDK(KP414213562), To, Tl); + T2n = VFMA(LDK(KP707106781), T2c, T1X); + T2d = VFNMS(LDK(KP707106781), T2c, T1X); + T2w = VADD(T2s, T2t); + T2u = VSUB(T2s, T2t); + T2L = VSUB(T2y, T2z); + T2A = VADD(T2y, T2z); + T2q = VADD(T2g, T2j); + T2k = VSUB(T2g, T2j); + T1q = VADD(T17, T18); + T19 = VSUB(T17, T18); + T1B = VSUB(Ti, Tp); + Tq = VADD(Ti, Tp); + } + T2W = VFNMS(LDK(KP707106781), T2L, T2K); + T2M = VFMA(LDK(KP707106781), T2L, T2K); + T2B = VFMA(LDK(KP707106781), T2A, T2x); + T2T = VFNMS(LDK(KP707106781), T2A, T2x); + T2v = VADD(T2p, T2q); + T2r = VSUB(T2p, T2q); + T2o = VFMA(LDK(KP707106781), T2l, T2k); + T2m = VFNMS(LDK(KP707106781), T2l, T2k); + T2X = VSUB(T2E, T2H); + T2I = VADD(T2E, T2H); + } + } + { + V T2V, T2Z, T2Y, T30, T2R, T2J; + T2V = VFNMS(LDK(KP923879532), T2U, T2T); + T2Z = VFMA(LDK(KP923879532), T2U, T2T); + T31 = VSUB(T2v, T2w); + STM2(&(xo[32]), T31, ovs, &(xo[0])); + T32 = VADD(T2v, T2w); + STM2(&(xo[0]), T32, ovs, &(xo[0])); + T33 = VFMAI(T2u, T2r); + STM2(&(xo[16]), T33, ovs, &(xo[0])); + T34 = VFNMSI(T2u, T2r); + STM2(&(xo[48]), T34, ovs, &(xo[0])); + T35 = VFMAI(T2o, T2n); + STM2(&(xo[8]), T35, ovs, &(xo[0])); + T36 = VFNMSI(T2o, T2n); + STM2(&(xo[56]), T36, ovs, &(xo[0])); + T37 = VFMAI(T2m, T2d); + STM2(&(xo[40]), T37, ovs, &(xo[0])); + T38 = VFNMSI(T2m, T2d); + STM2(&(xo[24]), T38, ovs, &(xo[0])); + T2Y = VFMA(LDK(KP923879532), T2X, T2W); + T30 = VFNMS(LDK(KP923879532), T2X, T2W); + T2R = VFMA(LDK(KP923879532), T2I, T2B); + T2J = VFNMS(LDK(KP923879532), T2I, T2B); + { + V T1J, T1r, T1C, T1M, T2S, T2Q, T1u, T1D, T1E, T1x; + T1J = VFNMS(LDK(KP923879532), T1q, T1p); + T1r = VFMA(LDK(KP923879532), T1q, T1p); + T1C = VFNMS(LDK(KP923879532), T1B, T1A); + T1M = VFMA(LDK(KP923879532), T1B, T1A); + T39 = VFNMSI(T30, T2Z); + STM2(&(xo[12]), T39, ovs, &(xo[0])); + T3a = VFMAI(T30, T2Z); + STM2(&(xo[52]), T3a, ovs, &(xo[0])); + T3b = VFNMSI(T2Y, T2V); + STM2(&(xo[44]), T3b, ovs, &(xo[0])); + T3c = VFMAI(T2Y, T2V); + STM2(&(xo[20]), T3c, ovs, &(xo[0])); + T2S = VFMA(LDK(KP923879532), T2P, T2M); + T2Q = VFNMS(LDK(KP923879532), T2P, T2M); + T1u = VFMA(LDK(KP668178637), T1t, T1s); + T1D = VFNMS(LDK(KP668178637), T1s, T1t); + T1E = VFNMS(LDK(KP668178637), T1v, T1w); + T1x = VFMA(LDK(KP668178637), T1w, T1v); + { + V T1K, T1F, T1N, T1y; + T1h = VFNMS(LDK(KP923879532), Tq, Tb); + Tr = VFMA(LDK(KP923879532), Tq, Tb); + T3d = VFNMSI(T2S, T2R); + STM2(&(xo[60]), T3d, ovs, &(xo[0])); + T3e = VFMAI(T2S, T2R); + STM2(&(xo[4]), T3e, ovs, &(xo[0])); + T3f = VFMAI(T2Q, T2J); + STM2(&(xo[36]), T3f, ovs, &(xo[0])); + T3g = VFNMSI(T2Q, T2J); + STM2(&(xo[28]), T3g, ovs, &(xo[0])); + T1K = VADD(T1D, T1E); + T1F = VSUB(T1D, T1E); + T1N = VSUB(T1u, T1x); + T1y = VADD(T1u, T1x); + T1a = VFMA(LDK(KP923879532), T19, T16); + T1k = VFNMS(LDK(KP923879532), T19, T16); + TI = VFNMS(LDK(KP198912367), TH, TC); + T1b = VFMA(LDK(KP198912367), TC, TH); + T1L = VFMA(LDK(KP831469612), T1K, T1J); + T1P = VFNMS(LDK(KP831469612), T1K, T1J); + T1I = VFMA(LDK(KP831469612), T1F, T1C); + T1G = VFNMS(LDK(KP831469612), T1F, T1C); + T1O = VFNMS(LDK(KP831469612), T1N, T1M); + T1Q = VFMA(LDK(KP831469612), T1N, T1M); + T1H = VFMA(LDK(KP831469612), T1y, T1r); + T1z = VFNMS(LDK(KP831469612), T1y, T1r); + T1c = VFMA(LDK(KP198912367), TT, TY); + TZ = VFNMS(LDK(KP198912367), TY, TT); + } + } + } + } + } + { + V T1d, T1i, T10, T1l; + { + V T3h, T3i, T3j, T3k; + T3h = VFMAI(T1O, T1L); + STM2(&(xo[42]), T3h, ovs, &(xo[2])); + STN2(&(xo[40]), T37, T3h, ovs); + T3i = VFNMSI(T1O, T1L); + STM2(&(xo[22]), T3i, ovs, &(xo[2])); + STN2(&(xo[20]), T3c, T3i, ovs); + T3j = VFNMSI(T1Q, T1P); + STM2(&(xo[54]), T3j, ovs, &(xo[2])); + STN2(&(xo[52]), T3a, T3j, ovs); + T3k = VFMAI(T1Q, T1P); + STM2(&(xo[10]), T3k, ovs, &(xo[2])); + STN2(&(xo[8]), T35, T3k, ovs); + { + V T3l, T3m, T3n, T3o; + T3l = VFMAI(T1I, T1H); + STM2(&(xo[58]), T3l, ovs, &(xo[2])); + STN2(&(xo[56]), T36, T3l, ovs); + T3m = VFNMSI(T1I, T1H); + STM2(&(xo[6]), T3m, ovs, &(xo[2])); + STN2(&(xo[4]), T3e, T3m, ovs); + T3n = VFMAI(T1G, T1z); + STM2(&(xo[26]), T3n, ovs, &(xo[2])); + STN2(&(xo[24]), T38, T3n, ovs); + T3o = VFNMSI(T1G, T1z); + STM2(&(xo[38]), T3o, ovs, &(xo[2])); + STN2(&(xo[36]), T3f, T3o, ovs); + T1d = VSUB(T1b, T1c); + T1i = VADD(T1b, T1c); + T10 = VADD(TI, TZ); + T1l = VSUB(TI, TZ); + } + } + { + V T1n, T1j, T1e, T1g, T1o, T1m, T11, T1f; + T1n = VFMA(LDK(KP980785280), T1i, T1h); + T1j = VFNMS(LDK(KP980785280), T1i, T1h); + T1e = VFNMS(LDK(KP980785280), T1d, T1a); + T1g = VFMA(LDK(KP980785280), T1d, T1a); + T1o = VFNMS(LDK(KP980785280), T1l, T1k); + T1m = VFMA(LDK(KP980785280), T1l, T1k); + T11 = VFNMS(LDK(KP980785280), T10, Tr); + T1f = VFMA(LDK(KP980785280), T10, Tr); + { + V T3p, T3q, T3r, T3s; + T3p = VFNMSI(T1m, T1j); + STM2(&(xo[46]), T3p, ovs, &(xo[2])); + STN2(&(xo[44]), T3b, T3p, ovs); + T3q = VFMAI(T1m, T1j); + STM2(&(xo[18]), T3q, ovs, &(xo[2])); + STN2(&(xo[16]), T33, T3q, ovs); + T3r = VFMAI(T1o, T1n); + STM2(&(xo[50]), T3r, ovs, &(xo[2])); + STN2(&(xo[48]), T34, T3r, ovs); + T3s = VFNMSI(T1o, T1n); + STM2(&(xo[14]), T3s, ovs, &(xo[2])); + STN2(&(xo[12]), T39, T3s, ovs); + { + V T3t, T3u, T3v, T3w; + T3t = VFMAI(T1g, T1f); + STM2(&(xo[2]), T3t, ovs, &(xo[2])); + STN2(&(xo[0]), T32, T3t, ovs); + T3u = VFNMSI(T1g, T1f); + STM2(&(xo[62]), T3u, ovs, &(xo[2])); + STN2(&(xo[60]), T3d, T3u, ovs); + T3v = VFMAI(T1e, T11); + STM2(&(xo[34]), T3v, ovs, &(xo[2])); + STN2(&(xo[32]), T31, T3v, ovs); + T3w = VFNMSI(T1e, T11); + STM2(&(xo[30]), T3w, ovs, &(xo[2])); + STN2(&(xo[28]), T3g, T3w, ovs); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 32, XSIMD_STRING("n2bv_32"), {88, 0, 98, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_32) (planner *p) { + X(kdft_register) (p, n2bv_32, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 32 -name n2bv_32 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 186 FP additions, 42 FP multiplications, + * (or, 170 additions, 26 multiplications, 16 fused multiply/add), + * 72 stack variables, 7 constants, and 80 memory accesses + */ +#include "n2b.h" + +static void n2bv_32(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(64, is), MAKE_VOLATILE_STRIDE(64, os)) { + V T2f, T2k, T2N, T2M, T19, T1B, Tb, T1p, TT, T1v, TY, T1w, T2E, T2F, T2G; + V T24, T2o, TC, T1s, TH, T1t, T2B, T2C, T2D, T1X, T2n, T2I, T2J, Tq, T1A; + V T14, T1q, T2c, T2l; + { + V T3, T2i, T18, T2j, T6, T2d, T9, T2e, T15, Ta; + { + V T1, T2, T16, T17; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T3 = VSUB(T1, T2); + T2i = VADD(T1, T2); + T16 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T17 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T18 = VSUB(T16, T17); + T2j = VADD(T16, T17); + } + { + V T4, T5, T7, T8; + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + T6 = VSUB(T4, T5); + T2d = VADD(T4, T5); + T7 = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T9 = VSUB(T7, T8); + T2e = VADD(T7, T8); + } + T2f = VSUB(T2d, T2e); + T2k = VSUB(T2i, T2j); + T2N = VADD(T2d, T2e); + T2M = VADD(T2i, T2j); + T15 = VMUL(LDK(KP707106781), VSUB(T6, T9)); + T19 = VSUB(T15, T18); + T1B = VADD(T18, T15); + Ta = VMUL(LDK(KP707106781), VADD(T6, T9)); + Tb = VSUB(T3, Ta); + T1p = VADD(T3, Ta); + } + { + V TL, T21, TW, T1Y, TO, T22, TS, T1Z; + { + V TJ, TK, TU, TV; + TJ = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + TK = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + TL = VSUB(TJ, TK); + T21 = VADD(TJ, TK); + TU = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + TV = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + TW = VSUB(TU, TV); + T1Y = VADD(TU, TV); + } + { + V TM, TN, TQ, TR; + TM = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + TN = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + TO = VSUB(TM, TN); + T22 = VADD(TM, TN); + TQ = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + TR = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + TS = VSUB(TQ, TR); + T1Z = VADD(TQ, TR); + } + { + V TP, TX, T20, T23; + TP = VMUL(LDK(KP707106781), VSUB(TL, TO)); + TT = VSUB(TP, TS); + T1v = VADD(TS, TP); + TX = VMUL(LDK(KP707106781), VADD(TL, TO)); + TY = VSUB(TW, TX); + T1w = VADD(TW, TX); + T2E = VADD(T1Y, T1Z); + T2F = VADD(T21, T22); + T2G = VSUB(T2E, T2F); + T20 = VSUB(T1Y, T1Z); + T23 = VSUB(T21, T22); + T24 = VFMA(LDK(KP923879532), T20, VMUL(LDK(KP382683432), T23)); + T2o = VFNMS(LDK(KP382683432), T20, VMUL(LDK(KP923879532), T23)); + } + } + { + V Tu, T1U, TF, T1R, Tx, T1V, TB, T1S; + { + V Ts, Tt, TD, TE; + Ts = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tt = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + Tu = VSUB(Ts, Tt); + T1U = VADD(Ts, Tt); + TD = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + TE = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + TF = VSUB(TD, TE); + T1R = VADD(TD, TE); + } + { + V Tv, Tw, Tz, TA; + Tv = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + Tw = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Tx = VSUB(Tv, Tw); + T1V = VADD(Tv, Tw); + Tz = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + TA = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + TB = VSUB(Tz, TA); + T1S = VADD(Tz, TA); + } + { + V Ty, TG, T1T, T1W; + Ty = VMUL(LDK(KP707106781), VSUB(Tu, Tx)); + TC = VSUB(Ty, TB); + T1s = VADD(TB, Ty); + TG = VMUL(LDK(KP707106781), VADD(Tu, Tx)); + TH = VSUB(TF, TG); + T1t = VADD(TF, TG); + T2B = VADD(T1R, T1S); + T2C = VADD(T1U, T1V); + T2D = VSUB(T2B, T2C); + T1T = VSUB(T1R, T1S); + T1W = VSUB(T1U, T1V); + T1X = VFNMS(LDK(KP382683432), T1W, VMUL(LDK(KP923879532), T1T)); + T2n = VFMA(LDK(KP382683432), T1T, VMUL(LDK(KP923879532), T1W)); + } + } + { + V Te, T26, To, T29, Th, T27, Tl, T2a, Ti, Tp; + { + V Tc, Td, Tm, Tn; + Tc = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Te = VSUB(Tc, Td); + T26 = VADD(Tc, Td); + Tm = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + Tn = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + To = VSUB(Tm, Tn); + T29 = VADD(Tm, Tn); + } + { + V Tf, Tg, Tj, Tk; + Tf = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + Th = VSUB(Tf, Tg); + T27 = VADD(Tf, Tg); + Tj = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + Tl = VSUB(Tj, Tk); + T2a = VADD(Tj, Tk); + } + T2I = VADD(T26, T27); + T2J = VADD(T29, T2a); + Ti = VFMA(LDK(KP382683432), Te, VMUL(LDK(KP923879532), Th)); + Tp = VFNMS(LDK(KP382683432), To, VMUL(LDK(KP923879532), Tl)); + Tq = VSUB(Ti, Tp); + T1A = VADD(Ti, Tp); + { + V T12, T13, T28, T2b; + T12 = VFNMS(LDK(KP382683432), Th, VMUL(LDK(KP923879532), Te)); + T13 = VFMA(LDK(KP923879532), To, VMUL(LDK(KP382683432), Tl)); + T14 = VSUB(T12, T13); + T1q = VADD(T12, T13); + T28 = VSUB(T26, T27); + T2b = VSUB(T29, T2a); + T2c = VMUL(LDK(KP707106781), VSUB(T28, T2b)); + T2l = VMUL(LDK(KP707106781), VADD(T28, T2b)); + } + } + { + V T31, T32, T33, T34, T35, T36, T37, T38, T39, T3a, T3b, T3c; + { + V T2L, T2R, T2Q, T2S; + { + V T2H, T2K, T2O, T2P; + T2H = VMUL(LDK(KP707106781), VSUB(T2D, T2G)); + T2K = VSUB(T2I, T2J); + T2L = VBYI(VSUB(T2H, T2K)); + T2R = VBYI(VADD(T2K, T2H)); + T2O = VSUB(T2M, T2N); + T2P = VMUL(LDK(KP707106781), VADD(T2D, T2G)); + T2Q = VSUB(T2O, T2P); + T2S = VADD(T2O, T2P); + } + T31 = VADD(T2L, T2Q); + STM2(&(xo[24]), T31, ovs, &(xo[0])); + T32 = VSUB(T2S, T2R); + STM2(&(xo[56]), T32, ovs, &(xo[0])); + T33 = VSUB(T2Q, T2L); + STM2(&(xo[40]), T33, ovs, &(xo[0])); + T34 = VADD(T2R, T2S); + STM2(&(xo[8]), T34, ovs, &(xo[0])); + } + { + V T2h, T2r, T2q, T2s; + { + V T25, T2g, T2m, T2p; + T25 = VSUB(T1X, T24); + T2g = VSUB(T2c, T2f); + T2h = VBYI(VSUB(T25, T2g)); + T2r = VBYI(VADD(T2g, T25)); + T2m = VSUB(T2k, T2l); + T2p = VSUB(T2n, T2o); + T2q = VSUB(T2m, T2p); + T2s = VADD(T2m, T2p); + } + T35 = VADD(T2h, T2q); + STM2(&(xo[20]), T35, ovs, &(xo[0])); + T36 = VSUB(T2s, T2r); + STM2(&(xo[52]), T36, ovs, &(xo[0])); + T37 = VSUB(T2q, T2h); + STM2(&(xo[44]), T37, ovs, &(xo[0])); + T38 = VADD(T2r, T2s); + STM2(&(xo[12]), T38, ovs, &(xo[0])); + } + { + V T2V, T2Z, T2Y, T30; + { + V T2T, T2U, T2W, T2X; + T2T = VADD(T2M, T2N); + T2U = VADD(T2I, T2J); + T2V = VSUB(T2T, T2U); + T2Z = VADD(T2T, T2U); + T2W = VADD(T2B, T2C); + T2X = VADD(T2E, T2F); + T2Y = VBYI(VSUB(T2W, T2X)); + T30 = VADD(T2W, T2X); + } + T39 = VSUB(T2V, T2Y); + STM2(&(xo[48]), T39, ovs, &(xo[0])); + T3a = VADD(T2Z, T30); + STM2(&(xo[0]), T3a, ovs, &(xo[0])); + T3b = VADD(T2V, T2Y); + STM2(&(xo[16]), T3b, ovs, &(xo[0])); + T3c = VSUB(T2Z, T30); + STM2(&(xo[32]), T3c, ovs, &(xo[0])); + } + { + V T3d, T3e, T3f, T3g; + { + V T2v, T2z, T2y, T2A; + { + V T2t, T2u, T2w, T2x; + T2t = VADD(T2k, T2l); + T2u = VADD(T1X, T24); + T2v = VADD(T2t, T2u); + T2z = VSUB(T2t, T2u); + T2w = VADD(T2f, T2c); + T2x = VADD(T2n, T2o); + T2y = VBYI(VADD(T2w, T2x)); + T2A = VBYI(VSUB(T2x, T2w)); + } + T3d = VSUB(T2v, T2y); + STM2(&(xo[60]), T3d, ovs, &(xo[0])); + T3e = VADD(T2z, T2A); + STM2(&(xo[28]), T3e, ovs, &(xo[0])); + T3f = VADD(T2v, T2y); + STM2(&(xo[4]), T3f, ovs, &(xo[0])); + T3g = VSUB(T2z, T2A); + STM2(&(xo[36]), T3g, ovs, &(xo[0])); + } + { + V T1r, T1C, T1M, T1K, T1F, T1N, T1y, T1J; + T1r = VSUB(T1p, T1q); + T1C = VSUB(T1A, T1B); + T1M = VADD(T1p, T1q); + T1K = VADD(T1B, T1A); + { + V T1D, T1E, T1u, T1x; + T1D = VFNMS(LDK(KP195090322), T1s, VMUL(LDK(KP980785280), T1t)); + T1E = VFMA(LDK(KP195090322), T1v, VMUL(LDK(KP980785280), T1w)); + T1F = VSUB(T1D, T1E); + T1N = VADD(T1D, T1E); + T1u = VFMA(LDK(KP980785280), T1s, VMUL(LDK(KP195090322), T1t)); + T1x = VFNMS(LDK(KP195090322), T1w, VMUL(LDK(KP980785280), T1v)); + T1y = VSUB(T1u, T1x); + T1J = VADD(T1u, T1x); + } + { + V T1z, T1G, T3h, T3i; + T1z = VADD(T1r, T1y); + T1G = VBYI(VADD(T1C, T1F)); + T3h = VSUB(T1z, T1G); + STM2(&(xo[50]), T3h, ovs, &(xo[2])); + STN2(&(xo[48]), T39, T3h, ovs); + T3i = VADD(T1z, T1G); + STM2(&(xo[14]), T3i, ovs, &(xo[2])); + STN2(&(xo[12]), T38, T3i, ovs); + } + { + V T1P, T1Q, T3j, T3k; + T1P = VBYI(VADD(T1K, T1J)); + T1Q = VADD(T1M, T1N); + T3j = VADD(T1P, T1Q); + STM2(&(xo[2]), T3j, ovs, &(xo[2])); + STN2(&(xo[0]), T3a, T3j, ovs); + T3k = VSUB(T1Q, T1P); + STM2(&(xo[62]), T3k, ovs, &(xo[2])); + STN2(&(xo[60]), T3d, T3k, ovs); + } + { + V T1H, T1I, T3l, T3m; + T1H = VSUB(T1r, T1y); + T1I = VBYI(VSUB(T1F, T1C)); + T3l = VSUB(T1H, T1I); + STM2(&(xo[46]), T3l, ovs, &(xo[2])); + STN2(&(xo[44]), T37, T3l, ovs); + T3m = VADD(T1H, T1I); + STM2(&(xo[18]), T3m, ovs, &(xo[2])); + STN2(&(xo[16]), T3b, T3m, ovs); + } + { + V T1L, T1O, T3n, T3o; + T1L = VBYI(VSUB(T1J, T1K)); + T1O = VSUB(T1M, T1N); + T3n = VADD(T1L, T1O); + STM2(&(xo[30]), T3n, ovs, &(xo[2])); + STN2(&(xo[28]), T3e, T3n, ovs); + T3o = VSUB(T1O, T1L); + STM2(&(xo[34]), T3o, ovs, &(xo[2])); + STN2(&(xo[32]), T3c, T3o, ovs); + } + } + { + V Tr, T1a, T1k, T1i, T1d, T1l, T10, T1h; + Tr = VSUB(Tb, Tq); + T1a = VSUB(T14, T19); + T1k = VADD(Tb, Tq); + T1i = VADD(T19, T14); + { + V T1b, T1c, TI, TZ; + T1b = VFNMS(LDK(KP555570233), TC, VMUL(LDK(KP831469612), TH)); + T1c = VFMA(LDK(KP555570233), TT, VMUL(LDK(KP831469612), TY)); + T1d = VSUB(T1b, T1c); + T1l = VADD(T1b, T1c); + TI = VFMA(LDK(KP831469612), TC, VMUL(LDK(KP555570233), TH)); + TZ = VFNMS(LDK(KP555570233), TY, VMUL(LDK(KP831469612), TT)); + T10 = VSUB(TI, TZ); + T1h = VADD(TI, TZ); + } + { + V T11, T1e, T3p, T3q; + T11 = VADD(Tr, T10); + T1e = VBYI(VADD(T1a, T1d)); + T3p = VSUB(T11, T1e); + STM2(&(xo[54]), T3p, ovs, &(xo[2])); + STN2(&(xo[52]), T36, T3p, ovs); + T3q = VADD(T11, T1e); + STM2(&(xo[10]), T3q, ovs, &(xo[2])); + STN2(&(xo[8]), T34, T3q, ovs); + } + { + V T1n, T1o, T3r, T3s; + T1n = VBYI(VADD(T1i, T1h)); + T1o = VADD(T1k, T1l); + T3r = VADD(T1n, T1o); + STM2(&(xo[6]), T3r, ovs, &(xo[2])); + STN2(&(xo[4]), T3f, T3r, ovs); + T3s = VSUB(T1o, T1n); + STM2(&(xo[58]), T3s, ovs, &(xo[2])); + STN2(&(xo[56]), T32, T3s, ovs); + } + { + V T1f, T1g, T3t, T3u; + T1f = VSUB(Tr, T10); + T1g = VBYI(VSUB(T1d, T1a)); + T3t = VSUB(T1f, T1g); + STM2(&(xo[42]), T3t, ovs, &(xo[2])); + STN2(&(xo[40]), T33, T3t, ovs); + T3u = VADD(T1f, T1g); + STM2(&(xo[22]), T3u, ovs, &(xo[2])); + STN2(&(xo[20]), T35, T3u, ovs); + } + { + V T1j, T1m, T3v, T3w; + T1j = VBYI(VSUB(T1h, T1i)); + T1m = VSUB(T1k, T1l); + T3v = VADD(T1j, T1m); + STM2(&(xo[26]), T3v, ovs, &(xo[2])); + STN2(&(xo[24]), T31, T3v, ovs); + T3w = VSUB(T1m, T1j); + STM2(&(xo[38]), T3w, ovs, &(xo[2])); + STN2(&(xo[36]), T3g, T3w, ovs); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 32, XSIMD_STRING("n2bv_32"), {170, 26, 16, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_32) (planner *p) { + X(kdft_register) (p, n2bv_32, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:58 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 4 -name n2bv_4 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 8 FP additions, 2 FP multiplications, + * (or, 6 additions, 0 multiplications, 2 fused multiply/add), + * 15 stack variables, 0 constants, and 10 memory accesses + */ +#include "n2b.h" + +static void n2bv_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(8, is), MAKE_VOLATILE_STRIDE(8, os)) { + V T1, T2, T4, T5; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + { + V T3, T7, T6, T8; + T3 = VSUB(T1, T2); + T7 = VADD(T1, T2); + T6 = VSUB(T4, T5); + T8 = VADD(T4, T5); + { + V T9, Ta, Tb, Tc; + T9 = VSUB(T7, T8); + STM2(&(xo[4]), T9, ovs, &(xo[0])); + Ta = VADD(T7, T8); + STM2(&(xo[0]), Ta, ovs, &(xo[0])); + Tb = VFMAI(T6, T3); + STM2(&(xo[2]), Tb, ovs, &(xo[2])); + STN2(&(xo[0]), Ta, Tb, ovs); + Tc = VFNMSI(T6, T3); + STM2(&(xo[6]), Tc, ovs, &(xo[2])); + STN2(&(xo[4]), T9, Tc, ovs); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 4, XSIMD_STRING("n2bv_4"), {6, 0, 2, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_4) (planner *p) { + X(kdft_register) (p, n2bv_4, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 4 -name n2bv_4 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 8 FP additions, 0 FP multiplications, + * (or, 8 additions, 0 multiplications, 0 fused multiply/add), + * 11 stack variables, 0 constants, and 10 memory accesses + */ +#include "n2b.h" + +static void n2bv_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(8, is), MAKE_VOLATILE_STRIDE(8, os)) { + V T3, T7, T6, T8; + { + V T1, T2, T4, T5; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T3 = VSUB(T1, T2); + T7 = VADD(T1, T2); + T4 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T6 = VBYI(VSUB(T4, T5)); + T8 = VADD(T4, T5); + } + { + V T9, Ta, Tb, Tc; + T9 = VSUB(T3, T6); + STM2(&(xo[6]), T9, ovs, &(xo[2])); + Ta = VADD(T7, T8); + STM2(&(xo[0]), Ta, ovs, &(xo[0])); + Tb = VADD(T3, T6); + STM2(&(xo[2]), Tb, ovs, &(xo[2])); + STN2(&(xo[0]), Ta, Tb, ovs); + Tc = VSUB(T7, T8); + STM2(&(xo[4]), Tc, ovs, &(xo[0])); + STN2(&(xo[4]), Tc, T9, ovs); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 4, XSIMD_STRING("n2bv_4"), {8, 0, 0, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_4) (planner *p) { + X(kdft_register) (p, n2bv_4, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:58 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 6 -name n2bv_6 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 18 FP additions, 8 FP multiplications, + * (or, 12 additions, 2 multiplications, 6 fused multiply/add), + * 29 stack variables, 2 constants, and 15 memory accesses + */ +#include "n2b.h" + +static void n2bv_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(12, is), MAKE_VOLATILE_STRIDE(12, os)) { + V T1, T2, T4, T5, T7, T8; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + { + V T3, Td, T6, Te, T9, Tf; + T3 = VSUB(T1, T2); + Td = VADD(T1, T2); + T6 = VSUB(T4, T5); + Te = VADD(T4, T5); + T9 = VSUB(T7, T8); + Tf = VADD(T7, T8); + { + V Tg, Ti, Ta, Tc; + Tg = VADD(Te, Tf); + Ti = VMUL(LDK(KP866025403), VSUB(Te, Tf)); + Ta = VADD(T6, T9); + Tc = VMUL(LDK(KP866025403), VSUB(T6, T9)); + { + V Th, Tj, Tb, Tk; + Th = VFNMS(LDK(KP500000000), Tg, Td); + Tj = VADD(Td, Tg); + STM2(&(xo[0]), Tj, ovs, &(xo[0])); + Tb = VFNMS(LDK(KP500000000), Ta, T3); + Tk = VADD(T3, Ta); + STM2(&(xo[6]), Tk, ovs, &(xo[2])); + { + V Tl, Tm, Tn, To; + Tl = VFMAI(Ti, Th); + STM2(&(xo[8]), Tl, ovs, &(xo[0])); + Tm = VFNMSI(Ti, Th); + STM2(&(xo[4]), Tm, ovs, &(xo[0])); + STN2(&(xo[4]), Tm, Tk, ovs); + Tn = VFNMSI(Tc, Tb); + STM2(&(xo[10]), Tn, ovs, &(xo[2])); + STN2(&(xo[8]), Tl, Tn, ovs); + To = VFMAI(Tc, Tb); + STM2(&(xo[2]), To, ovs, &(xo[2])); + STN2(&(xo[0]), Tj, To, ovs); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 6, XSIMD_STRING("n2bv_6"), {12, 2, 6, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_6) (planner *p) { + X(kdft_register) (p, n2bv_6, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 6 -name n2bv_6 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 18 FP additions, 4 FP multiplications, + * (or, 16 additions, 2 multiplications, 2 fused multiply/add), + * 25 stack variables, 2 constants, and 15 memory accesses + */ +#include "n2b.h" + +static void n2bv_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(12, is), MAKE_VOLATILE_STRIDE(12, os)) { + V Ta, Td, T3, Te, T6, Tf, Tb, Tg, T8, T9, Tj, Tk; + T8 = LD(&(xi[0]), ivs, &(xi[0])); + T9 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Ta = VSUB(T8, T9); + Td = VADD(T8, T9); + { + V T1, T2, T4, T5; + T1 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + Te = VADD(T1, T2); + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + Tf = VADD(T4, T5); + } + Tb = VADD(T3, T6); + Tg = VADD(Te, Tf); + Tj = VADD(Ta, Tb); + STM2(&(xo[6]), Tj, ovs, &(xo[2])); + Tk = VADD(Td, Tg); + STM2(&(xo[0]), Tk, ovs, &(xo[0])); + { + V Tm, T7, Tc, Tl; + T7 = VBYI(VMUL(LDK(KP866025403), VSUB(T3, T6))); + Tc = VFNMS(LDK(KP500000000), Tb, Ta); + Tl = VADD(T7, Tc); + STM2(&(xo[2]), Tl, ovs, &(xo[2])); + STN2(&(xo[0]), Tk, Tl, ovs); + Tm = VSUB(Tc, T7); + STM2(&(xo[10]), Tm, ovs, &(xo[2])); + { + V Th, Ti, Tn, To; + Th = VFNMS(LDK(KP500000000), Tg, Td); + Ti = VBYI(VMUL(LDK(KP866025403), VSUB(Te, Tf))); + Tn = VSUB(Th, Ti); + STM2(&(xo[4]), Tn, ovs, &(xo[0])); + STN2(&(xo[4]), Tn, Tj, ovs); + To = VADD(Ti, Th); + STM2(&(xo[8]), To, ovs, &(xo[0])); + STN2(&(xo[8]), To, Tm, ovs); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 6, XSIMD_STRING("n2bv_6"), {16, 2, 2, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_6) (planner *p) { + X(kdft_register) (p, n2bv_6, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1815 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:02 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 64 -name n2bv_64 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 456 FP additions, 258 FP multiplications, + * (or, 198 additions, 0 multiplications, 258 fused multiply/add), + * 178 stack variables, 15 constants, and 160 memory accesses + */ +#include "n2b.h" + +static void n2bv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP820678790, +0.820678790828660330972281985331011598767386482); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP534511135, +0.534511135950791641089685961295362908582039528); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP098491403, +0.098491403357164253077197521291327432293052451); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP303346683, +0.303346683607342391675883946941299872384187453); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(128, is), MAKE_VOLATILE_STRIDE(128, os)) { + V T7z, T7A, T7B, T7C, T5T, T5S, T5X, T65, T8a, T8b, T8e, T8g, T5Z, T5R, T67; + V T63, T5U, T64; + { + V T7, T26, T5k, T6A, T47, T69, T2V, T3z, T6B, T4e, T6a, T5n, T3M, T2Y, T27; + V Tm, T3A, T3i, T29, TC, T5p, T4o, T6D, T6e, T3l, T3B, TR, T2a, T4x, T5q; + V T6h, T6E, T39, T3H, T3I, T3c, T5N, T57, T72, T6w, T5O, T5e, T71, T6t, T2y; + V T1W, T2x, T1N, T33, T34, T3E, T32, T1p, T2v, T1g, T2u, T4M, T5K, T6p, T6Z; + V T6m, T6Y, T5L, T4T; + { + V T4g, T4l, T3g, Tu, Tx, T4h, TA, T4i; + { + V T1, T2, T23, T24, T4, T5, T20, T21; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 32)]), ivs, &(xi[0])); + T23 = LD(&(xi[WS(is, 56)]), ivs, &(xi[0])); + T24 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 48)]), ivs, &(xi[0])); + T20 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T21 = LD(&(xi[WS(is, 40)]), ivs, &(xi[0])); + { + V Ta, T48, Tk, T4c, T49, Td, Tf, Tg; + { + V T8, T43, T3, T45, T25, T5i, T6, T44, T22, T9, Ti, Tj, Tb, Tc; + T8 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T43 = VSUB(T1, T2); + T3 = VADD(T1, T2); + T45 = VSUB(T23, T24); + T25 = VADD(T23, T24); + T5i = VSUB(T4, T5); + T6 = VADD(T4, T5); + T44 = VSUB(T20, T21); + T22 = VADD(T20, T21); + T9 = LD(&(xi[WS(is, 36)]), ivs, &(xi[0])); + Ti = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Tj = LD(&(xi[WS(is, 44)]), ivs, &(xi[0])); + Tb = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 52)]), ivs, &(xi[0])); + { + V T2T, T46, T5j, T2U; + T7 = VSUB(T3, T6); + T2T = VADD(T3, T6); + T46 = VADD(T44, T45); + T5j = VSUB(T44, T45); + T26 = VSUB(T22, T25); + T2U = VADD(T22, T25); + Ta = VADD(T8, T9); + T48 = VSUB(T8, T9); + Tk = VADD(Ti, Tj); + T4c = VSUB(Tj, Ti); + T5k = VFMA(LDK(KP707106781), T5j, T5i); + T6A = VFNMS(LDK(KP707106781), T5j, T5i); + T47 = VFMA(LDK(KP707106781), T46, T43); + T69 = VFNMS(LDK(KP707106781), T46, T43); + T2V = VADD(T2T, T2U); + T3z = VSUB(T2T, T2U); + T49 = VSUB(Tb, Tc); + Td = VADD(Tb, Tc); + } + Tf = LD(&(xi[WS(is, 60)]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + } + { + V Te, T2W, T5l, T4a, Tq, Tt, Tv, Tw, T5m, T4d, Tl, T2X, Ty, Tz, To; + V Tp; + To = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tp = LD(&(xi[WS(is, 34)]), ivs, &(xi[0])); + { + V Th, T4b, Tr, Ts; + Tr = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Ts = LD(&(xi[WS(is, 50)]), ivs, &(xi[0])); + Te = VSUB(Ta, Td); + T2W = VADD(Ta, Td); + T5l = VFMA(LDK(KP414213562), T48, T49); + T4a = VFNMS(LDK(KP414213562), T49, T48); + Th = VADD(Tf, Tg); + T4b = VSUB(Tf, Tg); + Tq = VADD(To, Tp); + T4g = VSUB(To, Tp); + T4l = VSUB(Tr, Ts); + Tt = VADD(Tr, Ts); + Tv = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tw = LD(&(xi[WS(is, 42)]), ivs, &(xi[0])); + T5m = VFMA(LDK(KP414213562), T4b, T4c); + T4d = VFNMS(LDK(KP414213562), T4c, T4b); + Tl = VSUB(Th, Tk); + T2X = VADD(Th, Tk); + Ty = LD(&(xi[WS(is, 58)]), ivs, &(xi[0])); + Tz = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + } + T3g = VADD(Tq, Tt); + Tu = VSUB(Tq, Tt); + Tx = VADD(Tv, Tw); + T4h = VSUB(Tv, Tw); + T6B = VSUB(T4a, T4d); + T4e = VADD(T4a, T4d); + T6a = VADD(T5l, T5m); + T5n = VSUB(T5l, T5m); + T3M = VSUB(T2W, T2X); + T2Y = VADD(T2W, T2X); + T27 = VSUB(Te, Tl); + Tm = VADD(Te, Tl); + TA = VADD(Ty, Tz); + T4i = VSUB(Ty, Tz); + } + } + } + { + V TK, T4p, T4u, T4k, T6d, T4n, T6c, TL, TN, TO, T3j, TJ, TF, TI; + { + V TD, TE, TG, TH; + TD = LD(&(xi[WS(is, 62)]), ivs, &(xi[0])); + TE = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + TG = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + TH = LD(&(xi[WS(is, 46)]), ivs, &(xi[0])); + TK = LD(&(xi[WS(is, 54)]), ivs, &(xi[0])); + { + V T3h, TB, T4j, T4m; + T3h = VADD(Tx, TA); + TB = VSUB(Tx, TA); + T4j = VADD(T4h, T4i); + T4m = VSUB(T4h, T4i); + T4p = VSUB(TD, TE); + TF = VADD(TD, TE); + T4u = VSUB(TH, TG); + TI = VADD(TG, TH); + T3A = VSUB(T3g, T3h); + T3i = VADD(T3g, T3h); + T29 = VFMA(LDK(KP414213562), Tu, TB); + TC = VFNMS(LDK(KP414213562), TB, Tu); + T4k = VFMA(LDK(KP707106781), T4j, T4g); + T6d = VFNMS(LDK(KP707106781), T4j, T4g); + T4n = VFMA(LDK(KP707106781), T4m, T4l); + T6c = VFNMS(LDK(KP707106781), T4m, T4l); + TL = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + } + TN = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + TO = LD(&(xi[WS(is, 38)]), ivs, &(xi[0])); + } + T3j = VADD(TF, TI); + TJ = VSUB(TF, TI); + { + V T3a, T1E, T52, T5b, T1x, T4Z, T6r, T6u, T5a, T1U, T55, T5c, T1L, T3b; + { + V T4V, T1t, T58, T1w, T1Q, T1T, T1I, T4Y, T59, T1J, T53, T1H; + { + V T1r, TM, T4r, TP, T4q, T1s, T1u, T1v; + T1r = LD(&(xi[WS(is, 63)]), ivs, &(xi[WS(is, 1)])); + T5p = VFMA(LDK(KP198912367), T4k, T4n); + T4o = VFNMS(LDK(KP198912367), T4n, T4k); + T6D = VFMA(LDK(KP668178637), T6c, T6d); + T6e = VFNMS(LDK(KP668178637), T6d, T6c); + TM = VADD(TK, TL); + T4r = VSUB(TK, TL); + TP = VADD(TN, TO); + T4q = VSUB(TN, TO); + T1s = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + T1u = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T1v = LD(&(xi[WS(is, 47)]), ivs, &(xi[WS(is, 1)])); + { + V T1R, T4X, T6g, T4t, T6f, T4w, T1S, T1O, T1P; + T1O = LD(&(xi[WS(is, 55)]), ivs, &(xi[WS(is, 1)])); + T1P = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + T1R = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + { + V T3k, TQ, T4s, T4v; + T3k = VADD(TP, TM); + TQ = VSUB(TM, TP); + T4s = VADD(T4q, T4r); + T4v = VSUB(T4r, T4q); + T4V = VSUB(T1r, T1s); + T1t = VADD(T1r, T1s); + T58 = VSUB(T1v, T1u); + T1w = VADD(T1u, T1v); + T4X = VSUB(T1O, T1P); + T1Q = VADD(T1O, T1P); + T3l = VADD(T3j, T3k); + T3B = VSUB(T3j, T3k); + TR = VFNMS(LDK(KP414213562), TQ, TJ); + T2a = VFMA(LDK(KP414213562), TJ, TQ); + T6g = VFNMS(LDK(KP707106781), T4s, T4p); + T4t = VFMA(LDK(KP707106781), T4s, T4p); + T6f = VFNMS(LDK(KP707106781), T4v, T4u); + T4w = VFMA(LDK(KP707106781), T4v, T4u); + T1S = LD(&(xi[WS(is, 39)]), ivs, &(xi[WS(is, 1)])); + } + { + V T4W, T1A, T50, T51, T1D, T1F, T1G; + { + V T1y, T1z, T1B, T1C; + T1y = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T1z = LD(&(xi[WS(is, 35)]), ivs, &(xi[WS(is, 1)])); + T1B = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + T1C = LD(&(xi[WS(is, 51)]), ivs, &(xi[WS(is, 1)])); + T4x = VFNMS(LDK(KP198912367), T4w, T4t); + T5q = VFMA(LDK(KP198912367), T4t, T4w); + T6h = VFNMS(LDK(KP668178637), T6g, T6f); + T6E = VFMA(LDK(KP668178637), T6f, T6g); + T4W = VSUB(T1R, T1S); + T1T = VADD(T1R, T1S); + T1A = VADD(T1y, T1z); + T50 = VSUB(T1y, T1z); + T51 = VSUB(T1C, T1B); + T1D = VADD(T1B, T1C); + } + T1F = LD(&(xi[WS(is, 59)]), ivs, &(xi[WS(is, 1)])); + T1G = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + T1I = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + T4Y = VADD(T4W, T4X); + T59 = VSUB(T4X, T4W); + T1J = LD(&(xi[WS(is, 43)]), ivs, &(xi[WS(is, 1)])); + T3a = VADD(T1A, T1D); + T1E = VSUB(T1A, T1D); + T52 = VFMA(LDK(KP414213562), T51, T50); + T5b = VFNMS(LDK(KP414213562), T50, T51); + T53 = VSUB(T1F, T1G); + T1H = VADD(T1F, T1G); + } + } + } + { + V T37, T54, T1K, T38; + T1x = VSUB(T1t, T1w); + T37 = VADD(T1t, T1w); + T4Z = VFMA(LDK(KP707106781), T4Y, T4V); + T6r = VFNMS(LDK(KP707106781), T4Y, T4V); + T54 = VSUB(T1J, T1I); + T1K = VADD(T1I, T1J); + T6u = VFNMS(LDK(KP707106781), T59, T58); + T5a = VFMA(LDK(KP707106781), T59, T58); + T38 = VADD(T1T, T1Q); + T1U = VSUB(T1Q, T1T); + T55 = VFNMS(LDK(KP414213562), T54, T53); + T5c = VFMA(LDK(KP414213562), T53, T54); + T1L = VSUB(T1H, T1K); + T3b = VADD(T1H, T1K); + T39 = VADD(T37, T38); + T3H = VSUB(T37, T38); + } + } + { + V T4A, TW, T4N, TZ, T1j, T1m, T4O, T4D, T13, T4F, T16, T4G, T1a, T4I, T4J; + V T1d; + { + V TU, TV, TX, TY, T56, T6v; + TU = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T56 = VADD(T52, T55); + T6v = VSUB(T55, T52); + { + V T5d, T6s, T1V, T1M; + T5d = VADD(T5b, T5c); + T6s = VSUB(T5c, T5b); + T1V = VSUB(T1L, T1E); + T1M = VADD(T1E, T1L); + T3I = VSUB(T3b, T3a); + T3c = VADD(T3a, T3b); + T5N = VFNMS(LDK(KP923879532), T56, T4Z); + T57 = VFMA(LDK(KP923879532), T56, T4Z); + T72 = VFNMS(LDK(KP923879532), T6v, T6u); + T6w = VFMA(LDK(KP923879532), T6v, T6u); + T5O = VFNMS(LDK(KP923879532), T5d, T5a); + T5e = VFMA(LDK(KP923879532), T5d, T5a); + T71 = VFMA(LDK(KP923879532), T6s, T6r); + T6t = VFNMS(LDK(KP923879532), T6s, T6r); + T2y = VFNMS(LDK(KP707106781), T1V, T1U); + T1W = VFMA(LDK(KP707106781), T1V, T1U); + T2x = VFNMS(LDK(KP707106781), T1M, T1x); + T1N = VFMA(LDK(KP707106781), T1M, T1x); + TV = LD(&(xi[WS(is, 33)]), ivs, &(xi[WS(is, 1)])); + } + TX = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + TY = LD(&(xi[WS(is, 49)]), ivs, &(xi[WS(is, 1)])); + { + V T1h, T1i, T1k, T1l; + T1h = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T1i = LD(&(xi[WS(is, 41)]), ivs, &(xi[WS(is, 1)])); + T1k = LD(&(xi[WS(is, 57)]), ivs, &(xi[WS(is, 1)])); + T1l = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + { + V T11, T4B, T4C, T12, T14, T15; + T11 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T4A = VSUB(TU, TV); + TW = VADD(TU, TV); + T4N = VSUB(TX, TY); + TZ = VADD(TX, TY); + T1j = VADD(T1h, T1i); + T4B = VSUB(T1h, T1i); + T1m = VADD(T1k, T1l); + T4C = VSUB(T1k, T1l); + T12 = LD(&(xi[WS(is, 37)]), ivs, &(xi[WS(is, 1)])); + T14 = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + T15 = LD(&(xi[WS(is, 53)]), ivs, &(xi[WS(is, 1)])); + { + V T18, T19, T1b, T1c; + T18 = LD(&(xi[WS(is, 61)]), ivs, &(xi[WS(is, 1)])); + T19 = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + T1b = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T1c = LD(&(xi[WS(is, 45)]), ivs, &(xi[WS(is, 1)])); + T4O = VSUB(T4B, T4C); + T4D = VADD(T4B, T4C); + T13 = VADD(T11, T12); + T4F = VSUB(T11, T12); + T16 = VADD(T14, T15); + T4G = VSUB(T14, T15); + T1a = VADD(T18, T19); + T4I = VSUB(T18, T19); + T4J = VSUB(T1b, T1c); + T1d = VADD(T1b, T1c); + } + } + } + } + { + V T30, T10, T6k, T4E, T4Q, T4H, T17, T6n, T4P, T1e, T4K, T4R, T1n, T31; + T30 = VADD(TW, TZ); + T10 = VSUB(TW, TZ); + T6k = VFNMS(LDK(KP707106781), T4D, T4A); + T4E = VFMA(LDK(KP707106781), T4D, T4A); + T4Q = VFMA(LDK(KP414213562), T4F, T4G); + T4H = VFNMS(LDK(KP414213562), T4G, T4F); + T33 = VADD(T13, T16); + T17 = VSUB(T13, T16); + T6n = VFNMS(LDK(KP707106781), T4O, T4N); + T4P = VFMA(LDK(KP707106781), T4O, T4N); + T34 = VADD(T1a, T1d); + T1e = VSUB(T1a, T1d); + T4K = VFMA(LDK(KP414213562), T4J, T4I); + T4R = VFNMS(LDK(KP414213562), T4I, T4J); + T1n = VSUB(T1j, T1m); + T31 = VADD(T1j, T1m); + { + V T1f, T1o, T6o, T4L, T4S, T6l; + T1f = VADD(T17, T1e); + T1o = VSUB(T17, T1e); + T6o = VSUB(T4H, T4K); + T4L = VADD(T4H, T4K); + T4S = VADD(T4Q, T4R); + T6l = VSUB(T4Q, T4R); + T3E = VSUB(T30, T31); + T32 = VADD(T30, T31); + T1p = VFMA(LDK(KP707106781), T1o, T1n); + T2v = VFNMS(LDK(KP707106781), T1o, T1n); + T1g = VFMA(LDK(KP707106781), T1f, T10); + T2u = VFNMS(LDK(KP707106781), T1f, T10); + T4M = VFMA(LDK(KP923879532), T4L, T4E); + T5K = VFNMS(LDK(KP923879532), T4L, T4E); + T6p = VFMA(LDK(KP923879532), T6o, T6n); + T6Z = VFNMS(LDK(KP923879532), T6o, T6n); + T6m = VFNMS(LDK(KP923879532), T6l, T6k); + T6Y = VFMA(LDK(KP923879532), T6l, T6k); + T5L = VFNMS(LDK(KP923879532), T4S, T4P); + T4T = VFMA(LDK(KP923879532), T4S, T4P); + } + } + } + } + } + } + { + V T6b, T6F, T7n, T7o, T7p, T7q, T7r, T7s, T7t, T7u, T7v, T7w, T7x, T7y, T7f; + V T6X, T70, T79, T7a, T73, T6C, T76, T77, T6i; + { + V T2Z, T3r, T3s, T3m, T3d, T3v; + T2Z = VSUB(T2V, T2Y); + T3r = VADD(T2V, T2Y); + T3s = VADD(T3i, T3l); + T3m = VSUB(T3i, T3l); + T3d = VSUB(T39, T3c); + T3v = VADD(T39, T3c); + { + V T3x, T3t, T3Q, T3J, T3D, T3V, T3G, T3P, T3u, T36, T3O, T3Y, T6V, T6W; + { + V T3N, T3C, T3F, T35; + T3N = VSUB(T3A, T3B); + T3C = VADD(T3A, T3B); + T3F = VSUB(T33, T34); + T35 = VADD(T33, T34); + T3x = VADD(T3r, T3s); + T3t = VSUB(T3r, T3s); + T3Q = VFMA(LDK(KP414213562), T3H, T3I); + T3J = VFNMS(LDK(KP414213562), T3I, T3H); + T3D = VFMA(LDK(KP707106781), T3C, T3z); + T3V = VFNMS(LDK(KP707106781), T3C, T3z); + T3G = VFNMS(LDK(KP414213562), T3F, T3E); + T3P = VFMA(LDK(KP414213562), T3E, T3F); + T3u = VADD(T32, T35); + T36 = VSUB(T32, T35); + T3O = VFMA(LDK(KP707106781), T3N, T3M); + T3Y = VFNMS(LDK(KP707106781), T3N, T3M); + } + T6b = VFNMS(LDK(KP923879532), T6a, T69); + T6V = VFMA(LDK(KP923879532), T6a, T69); + T6W = VADD(T6D, T6E); + T6F = VSUB(T6D, T6E); + { + V T3R, T3W, T3K, T3Z; + T3R = VSUB(T3P, T3Q); + T3W = VADD(T3P, T3Q); + T3K = VADD(T3G, T3J); + T3Z = VSUB(T3G, T3J); + { + V T3e, T3n, T3w, T3y; + T3e = VADD(T36, T3d); + T3n = VSUB(T36, T3d); + T3w = VSUB(T3u, T3v); + T3y = VADD(T3u, T3v); + { + V T41, T3X, T3S, T3U; + T41 = VFMA(LDK(KP923879532), T3W, T3V); + T3X = VFNMS(LDK(KP923879532), T3W, T3V); + T3S = VFNMS(LDK(KP923879532), T3R, T3O); + T3U = VFMA(LDK(KP923879532), T3R, T3O); + { + V T42, T40, T3L, T3T; + T42 = VFNMS(LDK(KP923879532), T3Z, T3Y); + T40 = VFMA(LDK(KP923879532), T3Z, T3Y); + T3L = VFNMS(LDK(KP923879532), T3K, T3D); + T3T = VFMA(LDK(KP923879532), T3K, T3D); + { + V T3o, T3q, T3f, T3p; + T3o = VFNMS(LDK(KP707106781), T3n, T3m); + T3q = VFMA(LDK(KP707106781), T3n, T3m); + T3f = VFNMS(LDK(KP707106781), T3e, T2Z); + T3p = VFMA(LDK(KP707106781), T3e, T2Z); + T7n = VSUB(T3x, T3y); + STM2(&(xo[64]), T7n, ovs, &(xo[0])); + T7o = VADD(T3x, T3y); + STM2(&(xo[0]), T7o, ovs, &(xo[0])); + T7p = VFMAI(T3w, T3t); + STM2(&(xo[32]), T7p, ovs, &(xo[0])); + T7q = VFNMSI(T3w, T3t); + STM2(&(xo[96]), T7q, ovs, &(xo[0])); + T7r = VFNMSI(T40, T3X); + STM2(&(xo[88]), T7r, ovs, &(xo[0])); + T7s = VFMAI(T40, T3X); + STM2(&(xo[40]), T7s, ovs, &(xo[0])); + T7t = VFMAI(T42, T41); + STM2(&(xo[104]), T7t, ovs, &(xo[0])); + T7u = VFNMSI(T42, T41); + STM2(&(xo[24]), T7u, ovs, &(xo[0])); + T7v = VFMAI(T3U, T3T); + STM2(&(xo[8]), T7v, ovs, &(xo[0])); + T7w = VFNMSI(T3U, T3T); + STM2(&(xo[120]), T7w, ovs, &(xo[0])); + T7x = VFMAI(T3S, T3L); + STM2(&(xo[72]), T7x, ovs, &(xo[0])); + T7y = VFNMSI(T3S, T3L); + STM2(&(xo[56]), T7y, ovs, &(xo[0])); + T7z = VFNMSI(T3q, T3p); + STM2(&(xo[112]), T7z, ovs, &(xo[0])); + T7A = VFMAI(T3q, T3p); + STM2(&(xo[16]), T7A, ovs, &(xo[0])); + T7B = VFMAI(T3o, T3f); + STM2(&(xo[80]), T7B, ovs, &(xo[0])); + T7C = VFNMSI(T3o, T3f); + STM2(&(xo[48]), T7C, ovs, &(xo[0])); + T7f = VFNMS(LDK(KP831469612), T6W, T6V); + T6X = VFMA(LDK(KP831469612), T6W, T6V); + } + } + } + } + } + T70 = VFMA(LDK(KP303346683), T6Z, T6Y); + T79 = VFNMS(LDK(KP303346683), T6Y, T6Z); + T7a = VFNMS(LDK(KP303346683), T71, T72); + T73 = VFMA(LDK(KP303346683), T72, T71); + T6C = VFMA(LDK(KP923879532), T6B, T6A); + T76 = VFNMS(LDK(KP923879532), T6B, T6A); + T77 = VSUB(T6e, T6h); + T6i = VADD(T6e, T6h); + } + } + { + V T2r, T2D, T2C, T2s, T5H, T5o, T5v, T5D, T7L, T7O, T7Q, T7S, T5r, T5I, T5x; + V T5h, T5F, T5B; + { + V TT, T2f, T7E, T7F, T7I, T7K, T2n, T1Y, T28, T2b, T2l, T2p, T2j, T2k; + { + V T1q, T2d, T7h, T7l, T2e, T1X, T75, T7d, T7m, T7k, T7c, T7e, Tn, TS; + T2r = VFNMS(LDK(KP707106781), Tm, T7); + Tn = VFMA(LDK(KP707106781), Tm, T7); + TS = VADD(TC, TR); + T2D = VSUB(TC, TR); + { + V T7b, T7j, T74, T7i, T78, T7g; + T1q = VFNMS(LDK(KP198912367), T1p, T1g); + T2d = VFMA(LDK(KP198912367), T1g, T1p); + T7g = VADD(T79, T7a); + T7b = VSUB(T79, T7a); + T7j = VSUB(T70, T73); + T74 = VADD(T70, T73); + T7i = VFNMS(LDK(KP831469612), T77, T76); + T78 = VFMA(LDK(KP831469612), T77, T76); + T2j = VFNMS(LDK(KP923879532), TS, Tn); + TT = VFMA(LDK(KP923879532), TS, Tn); + T7h = VFMA(LDK(KP956940335), T7g, T7f); + T7l = VFNMS(LDK(KP956940335), T7g, T7f); + T2e = VFMA(LDK(KP198912367), T1N, T1W); + T1X = VFNMS(LDK(KP198912367), T1W, T1N); + T75 = VFNMS(LDK(KP956940335), T74, T6X); + T7d = VFMA(LDK(KP956940335), T74, T6X); + T7m = VFMA(LDK(KP956940335), T7j, T7i); + T7k = VFNMS(LDK(KP956940335), T7j, T7i); + T7c = VFNMS(LDK(KP956940335), T7b, T78); + T7e = VFMA(LDK(KP956940335), T7b, T78); + } + T2k = VADD(T2d, T2e); + T2f = VSUB(T2d, T2e); + { + V T7D, T7G, T7H, T7J; + T7D = VFMAI(T7k, T7h); + STM2(&(xo[90]), T7D, ovs, &(xo[2])); + STN2(&(xo[88]), T7r, T7D, ovs); + T7E = VFNMSI(T7k, T7h); + STM2(&(xo[38]), T7E, ovs, &(xo[2])); + T7F = VFNMSI(T7m, T7l); + STM2(&(xo[102]), T7F, ovs, &(xo[2])); + T7G = VFMAI(T7m, T7l); + STM2(&(xo[26]), T7G, ovs, &(xo[2])); + STN2(&(xo[24]), T7u, T7G, ovs); + T7H = VFMAI(T7e, T7d); + STM2(&(xo[122]), T7H, ovs, &(xo[2])); + STN2(&(xo[120]), T7w, T7H, ovs); + T7I = VFNMSI(T7e, T7d); + STM2(&(xo[6]), T7I, ovs, &(xo[2])); + T7J = VFMAI(T7c, T75); + STM2(&(xo[58]), T7J, ovs, &(xo[2])); + STN2(&(xo[56]), T7y, T7J, ovs); + T7K = VFNMSI(T7c, T75); + STM2(&(xo[70]), T7K, ovs, &(xo[2])); + T2n = VSUB(T1q, T1X); + T1Y = VADD(T1q, T1X); + } + T2C = VFNMS(LDK(KP707106781), T27, T26); + T28 = VFMA(LDK(KP707106781), T27, T26); + T2b = VSUB(T29, T2a); + T2s = VADD(T29, T2a); + } + T2l = VFNMS(LDK(KP980785280), T2k, T2j); + T2p = VFMA(LDK(KP980785280), T2k, T2j); + { + V T5z, T4z, T5A, T5g; + { + V T4f, T4y, T1Z, T2h, T4U, T5t, T2m, T2c, T5u, T5f; + T5H = VFNMS(LDK(KP923879532), T4e, T47); + T4f = VFMA(LDK(KP923879532), T4e, T47); + T4y = VADD(T4o, T4x); + T5T = VSUB(T4o, T4x); + T1Z = VFNMS(LDK(KP980785280), T1Y, TT); + T2h = VFMA(LDK(KP980785280), T1Y, TT); + T4U = VFNMS(LDK(KP098491403), T4T, T4M); + T5t = VFMA(LDK(KP098491403), T4M, T4T); + T2m = VFNMS(LDK(KP923879532), T2b, T28); + T2c = VFMA(LDK(KP923879532), T2b, T28); + T5u = VFMA(LDK(KP098491403), T57, T5e); + T5f = VFNMS(LDK(KP098491403), T5e, T57); + T5z = VFNMS(LDK(KP980785280), T4y, T4f); + T4z = VFMA(LDK(KP980785280), T4y, T4f); + T5S = VFNMS(LDK(KP923879532), T5n, T5k); + T5o = VFMA(LDK(KP923879532), T5n, T5k); + { + V T2o, T2q, T2i, T2g; + T2o = VFMA(LDK(KP980785280), T2n, T2m); + T2q = VFNMS(LDK(KP980785280), T2n, T2m); + T2i = VFMA(LDK(KP980785280), T2f, T2c); + T2g = VFNMS(LDK(KP980785280), T2f, T2c); + T5A = VADD(T5t, T5u); + T5v = VSUB(T5t, T5u); + T5D = VSUB(T4U, T5f); + T5g = VADD(T4U, T5f); + T7L = VFNMSI(T2o, T2l); + STM2(&(xo[92]), T7L, ovs, &(xo[0])); + { + V T7M, T7N, T7P, T7R; + T7M = VFMAI(T2o, T2l); + STM2(&(xo[36]), T7M, ovs, &(xo[0])); + STN2(&(xo[36]), T7M, T7E, ovs); + T7N = VFMAI(T2q, T2p); + STM2(&(xo[100]), T7N, ovs, &(xo[0])); + STN2(&(xo[100]), T7N, T7F, ovs); + T7O = VFNMSI(T2q, T2p); + STM2(&(xo[28]), T7O, ovs, &(xo[0])); + T7P = VFMAI(T2i, T2h); + STM2(&(xo[4]), T7P, ovs, &(xo[0])); + STN2(&(xo[4]), T7P, T7I, ovs); + T7Q = VFNMSI(T2i, T2h); + STM2(&(xo[124]), T7Q, ovs, &(xo[0])); + T7R = VFMAI(T2g, T1Z); + STM2(&(xo[68]), T7R, ovs, &(xo[0])); + STN2(&(xo[68]), T7R, T7K, ovs); + T7S = VFNMSI(T2g, T1Z); + STM2(&(xo[60]), T7S, ovs, &(xo[0])); + T5r = VSUB(T5p, T5q); + T5I = VADD(T5p, T5q); + } + } + } + T5x = VFMA(LDK(KP995184726), T5g, T4z); + T5h = VFNMS(LDK(KP995184726), T5g, T4z); + T5F = VFMA(LDK(KP995184726), T5A, T5z); + T5B = VFNMS(LDK(KP995184726), T5A, T5z); + } + } + { + V T6J, T6R, T6L, T6z, T6T, T6P; + { + V T6N, T6j, T6O, T6y; + { + V T6q, T6H, T5C, T5s, T6I, T6x; + T6q = VFNMS(LDK(KP534511135), T6p, T6m); + T6H = VFMA(LDK(KP534511135), T6m, T6p); + T5C = VFNMS(LDK(KP980785280), T5r, T5o); + T5s = VFMA(LDK(KP980785280), T5r, T5o); + T6I = VFMA(LDK(KP534511135), T6t, T6w); + T6x = VFNMS(LDK(KP534511135), T6w, T6t); + T6N = VFMA(LDK(KP831469612), T6i, T6b); + T6j = VFNMS(LDK(KP831469612), T6i, T6b); + { + V T5E, T5G, T5y, T5w; + T5E = VFMA(LDK(KP995184726), T5D, T5C); + T5G = VFNMS(LDK(KP995184726), T5D, T5C); + T5y = VFMA(LDK(KP995184726), T5v, T5s); + T5w = VFNMS(LDK(KP995184726), T5v, T5s); + T6O = VADD(T6H, T6I); + T6J = VSUB(T6H, T6I); + T6R = VSUB(T6q, T6x); + T6y = VADD(T6q, T6x); + { + V T7T, T7U, T7V, T7W; + T7T = VFNMSI(T5E, T5B); + STM2(&(xo[94]), T7T, ovs, &(xo[2])); + STN2(&(xo[92]), T7L, T7T, ovs); + T7U = VFMAI(T5E, T5B); + STM2(&(xo[34]), T7U, ovs, &(xo[2])); + STN2(&(xo[32]), T7p, T7U, ovs); + T7V = VFMAI(T5G, T5F); + STM2(&(xo[98]), T7V, ovs, &(xo[2])); + STN2(&(xo[96]), T7q, T7V, ovs); + T7W = VFNMSI(T5G, T5F); + STM2(&(xo[30]), T7W, ovs, &(xo[2])); + STN2(&(xo[28]), T7O, T7W, ovs); + { + V T7X, T7Y, T7Z, T80; + T7X = VFMAI(T5y, T5x); + STM2(&(xo[2]), T7X, ovs, &(xo[2])); + STN2(&(xo[0]), T7o, T7X, ovs); + T7Y = VFNMSI(T5y, T5x); + STM2(&(xo[126]), T7Y, ovs, &(xo[2])); + STN2(&(xo[124]), T7Q, T7Y, ovs); + T7Z = VFMAI(T5w, T5h); + STM2(&(xo[66]), T7Z, ovs, &(xo[2])); + STN2(&(xo[64]), T7n, T7Z, ovs); + T80 = VFNMSI(T5w, T5h); + STM2(&(xo[62]), T80, ovs, &(xo[2])); + STN2(&(xo[60]), T7S, T80, ovs); + } + } + } + } + T6L = VFMA(LDK(KP881921264), T6y, T6j); + T6z = VFNMS(LDK(KP881921264), T6y, T6j); + T6T = VFMA(LDK(KP881921264), T6O, T6N); + T6P = VFNMS(LDK(KP881921264), T6O, T6N); + } + { + V T2H, T2P, T81, T84, T86, T88, T2J, T2B, T2R, T2N; + { + V T2L, T2t, T2M, T2A; + { + V T2w, T2F, T6Q, T6G, T2G, T2z; + T2w = VFMA(LDK(KP668178637), T2v, T2u); + T2F = VFNMS(LDK(KP668178637), T2u, T2v); + T6Q = VFNMS(LDK(KP831469612), T6F, T6C); + T6G = VFMA(LDK(KP831469612), T6F, T6C); + T2G = VFNMS(LDK(KP668178637), T2x, T2y); + T2z = VFMA(LDK(KP668178637), T2y, T2x); + T2L = VFNMS(LDK(KP923879532), T2s, T2r); + T2t = VFMA(LDK(KP923879532), T2s, T2r); + { + V T6S, T6U, T6M, T6K; + T6S = VFMA(LDK(KP881921264), T6R, T6Q); + T6U = VFNMS(LDK(KP881921264), T6R, T6Q); + T6M = VFMA(LDK(KP881921264), T6J, T6G); + T6K = VFNMS(LDK(KP881921264), T6J, T6G); + T2M = VADD(T2F, T2G); + T2H = VSUB(T2F, T2G); + T2P = VSUB(T2w, T2z); + T2A = VADD(T2w, T2z); + T81 = VFNMSI(T6S, T6P); + STM2(&(xo[86]), T81, ovs, &(xo[2])); + { + V T82, T83, T85, T87; + T82 = VFMAI(T6S, T6P); + STM2(&(xo[42]), T82, ovs, &(xo[2])); + STN2(&(xo[40]), T7s, T82, ovs); + T83 = VFMAI(T6U, T6T); + STM2(&(xo[106]), T83, ovs, &(xo[2])); + STN2(&(xo[104]), T7t, T83, ovs); + T84 = VFNMSI(T6U, T6T); + STM2(&(xo[22]), T84, ovs, &(xo[2])); + T85 = VFMAI(T6M, T6L); + STM2(&(xo[10]), T85, ovs, &(xo[2])); + STN2(&(xo[8]), T7v, T85, ovs); + T86 = VFNMSI(T6M, T6L); + STM2(&(xo[118]), T86, ovs, &(xo[2])); + T87 = VFMAI(T6K, T6z); + STM2(&(xo[74]), T87, ovs, &(xo[2])); + STN2(&(xo[72]), T7x, T87, ovs); + T88 = VFNMSI(T6K, T6z); + STM2(&(xo[54]), T88, ovs, &(xo[2])); + } + } + } + T2J = VFMA(LDK(KP831469612), T2A, T2t); + T2B = VFNMS(LDK(KP831469612), T2A, T2t); + T2R = VFNMS(LDK(KP831469612), T2M, T2L); + T2N = VFMA(LDK(KP831469612), T2M, T2L); + } + { + V T61, T5J, T62, T5Q; + { + V T5M, T5V, T2O, T2E, T5W, T5P; + T5M = VFMA(LDK(KP820678790), T5L, T5K); + T5V = VFNMS(LDK(KP820678790), T5K, T5L); + T2O = VFMA(LDK(KP923879532), T2D, T2C); + T2E = VFNMS(LDK(KP923879532), T2D, T2C); + T5W = VFNMS(LDK(KP820678790), T5N, T5O); + T5P = VFMA(LDK(KP820678790), T5O, T5N); + T61 = VFNMS(LDK(KP980785280), T5I, T5H); + T5J = VFMA(LDK(KP980785280), T5I, T5H); + { + V T2Q, T2S, T2K, T2I; + T2Q = VFNMS(LDK(KP831469612), T2P, T2O); + T2S = VFMA(LDK(KP831469612), T2P, T2O); + T2K = VFMA(LDK(KP831469612), T2H, T2E); + T2I = VFNMS(LDK(KP831469612), T2H, T2E); + T62 = VADD(T5V, T5W); + T5X = VSUB(T5V, T5W); + T65 = VSUB(T5M, T5P); + T5Q = VADD(T5M, T5P); + { + V T89, T8c, T8d, T8f; + T89 = VFMAI(T2Q, T2N); + STM2(&(xo[84]), T89, ovs, &(xo[0])); + STN2(&(xo[84]), T89, T81, ovs); + T8a = VFNMSI(T2Q, T2N); + STM2(&(xo[44]), T8a, ovs, &(xo[0])); + T8b = VFNMSI(T2S, T2R); + STM2(&(xo[108]), T8b, ovs, &(xo[0])); + T8c = VFMAI(T2S, T2R); + STM2(&(xo[20]), T8c, ovs, &(xo[0])); + STN2(&(xo[20]), T8c, T84, ovs); + T8d = VFMAI(T2K, T2J); + STM2(&(xo[116]), T8d, ovs, &(xo[0])); + STN2(&(xo[116]), T8d, T86, ovs); + T8e = VFNMSI(T2K, T2J); + STM2(&(xo[12]), T8e, ovs, &(xo[0])); + T8f = VFMAI(T2I, T2B); + STM2(&(xo[52]), T8f, ovs, &(xo[0])); + STN2(&(xo[52]), T8f, T88, ovs); + T8g = VFNMSI(T2I, T2B); + STM2(&(xo[76]), T8g, ovs, &(xo[0])); + } + } + } + T5Z = VFMA(LDK(KP773010453), T5Q, T5J); + T5R = VFNMS(LDK(KP773010453), T5Q, T5J); + T67 = VFNMS(LDK(KP773010453), T62, T61); + T63 = VFMA(LDK(KP773010453), T62, T61); + } + } + } + } + } + } + T5U = VFNMS(LDK(KP980785280), T5T, T5S); + T64 = VFMA(LDK(KP980785280), T5T, T5S); + { + V T68, T66, T5Y, T60; + T68 = VFMA(LDK(KP773010453), T65, T64); + T66 = VFNMS(LDK(KP773010453), T65, T64); + T5Y = VFNMS(LDK(KP773010453), T5X, T5U); + T60 = VFMA(LDK(KP773010453), T5X, T5U); + { + V T8h, T8i, T8j, T8k; + T8h = VFMAI(T66, T63); + STM2(&(xo[82]), T8h, ovs, &(xo[2])); + STN2(&(xo[80]), T7B, T8h, ovs); + T8i = VFNMSI(T66, T63); + STM2(&(xo[46]), T8i, ovs, &(xo[2])); + STN2(&(xo[44]), T8a, T8i, ovs); + T8j = VFNMSI(T68, T67); + STM2(&(xo[110]), T8j, ovs, &(xo[2])); + STN2(&(xo[108]), T8b, T8j, ovs); + T8k = VFMAI(T68, T67); + STM2(&(xo[18]), T8k, ovs, &(xo[2])); + STN2(&(xo[16]), T7A, T8k, ovs); + { + V T8l, T8m, T8n, T8o; + T8l = VFMAI(T60, T5Z); + STM2(&(xo[114]), T8l, ovs, &(xo[2])); + STN2(&(xo[112]), T7z, T8l, ovs); + T8m = VFNMSI(T60, T5Z); + STM2(&(xo[14]), T8m, ovs, &(xo[2])); + STN2(&(xo[12]), T8e, T8m, ovs); + T8n = VFMAI(T5Y, T5R); + STM2(&(xo[50]), T8n, ovs, &(xo[2])); + STN2(&(xo[48]), T7C, T8n, ovs); + T8o = VFNMSI(T5Y, T5R); + STM2(&(xo[78]), T8o, ovs, &(xo[2])); + STN2(&(xo[76]), T8g, T8o, ovs); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 64, XSIMD_STRING("n2bv_64"), {198, 0, 258, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_64) (planner *p) { + X(kdft_register) (p, n2bv_64, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 64 -name n2bv_64 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 456 FP additions, 124 FP multiplications, + * (or, 404 additions, 72 multiplications, 52 fused multiply/add), + * 128 stack variables, 15 constants, and 160 memory accesses + */ +#include "n2b.h" + +static void n2bv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP634393284, +0.634393284163645498215171613225493370675687095); + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP290284677, +0.290284677254462367636192375817395274691476278); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP098017140, +0.098017140329560601994195563888641845861136673); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP471396736, +0.471396736825997648556387625905254377657460319); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(128, is), MAKE_VOLATILE_STRIDE(128, os)) { + V T4p, T5u, Tb, T3A, T2q, T3v, T6G, T78, Tq, T3w, T6B, T79, T2l, T3B, T4w; + V T5r, TI, T2g, T6u, T74, T3q, T3D, T4E, T5o, TZ, T2h, T6x, T75, T3t, T3E; + V T4L, T5p, T23, T2N, T6m, T70, T6p, T71, T2c, T2O, T3i, T3Y, T5f, T5R, T5k; + V T5S, T3l, T3Z, T1s, T2K, T6f, T6X, T6i, T6Y, T1B, T2L, T3b, T3V, T4Y, T5O; + V T53, T5P, T3e, T3W; + { + V T3, T4n, T2p, T4o, T6, T5s, T9, T5t; + { + V T1, T2, T2n, T2o; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 32)]), ivs, &(xi[0])); + T3 = VSUB(T1, T2); + T4n = VADD(T1, T2); + T2n = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T2o = LD(&(xi[WS(is, 48)]), ivs, &(xi[0])); + T2p = VSUB(T2n, T2o); + T4o = VADD(T2n, T2o); + } + { + V T4, T5, T7, T8; + T4 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 40)]), ivs, &(xi[0])); + T6 = VSUB(T4, T5); + T5s = VADD(T4, T5); + T7 = LD(&(xi[WS(is, 56)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T9 = VSUB(T7, T8); + T5t = VADD(T7, T8); + } + T4p = VSUB(T4n, T4o); + T5u = VSUB(T5s, T5t); + { + V Ta, T2m, T6E, T6F; + Ta = VMUL(LDK(KP707106781), VADD(T6, T9)); + Tb = VSUB(T3, Ta); + T3A = VADD(T3, Ta); + T2m = VMUL(LDK(KP707106781), VSUB(T6, T9)); + T2q = VSUB(T2m, T2p); + T3v = VADD(T2p, T2m); + T6E = VADD(T4n, T4o); + T6F = VADD(T5s, T5t); + T6G = VSUB(T6E, T6F); + T78 = VADD(T6E, T6F); + } + } + { + V Te, T4q, To, T4t, Th, T4r, Tl, T4u; + { + V Tc, Td, Tm, Tn; + Tc = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 36)]), ivs, &(xi[0])); + Te = VSUB(Tc, Td); + T4q = VADD(Tc, Td); + Tm = LD(&(xi[WS(is, 60)]), ivs, &(xi[0])); + Tn = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + To = VSUB(Tm, Tn); + T4t = VADD(Tm, Tn); + } + { + V Tf, Tg, Tj, Tk; + Tf = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 52)]), ivs, &(xi[0])); + Th = VSUB(Tf, Tg); + T4r = VADD(Tf, Tg); + Tj = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 44)]), ivs, &(xi[0])); + Tl = VSUB(Tj, Tk); + T4u = VADD(Tj, Tk); + } + { + V Ti, Tp, T6z, T6A; + Ti = VFMA(LDK(KP382683432), Te, VMUL(LDK(KP923879532), Th)); + Tp = VFNMS(LDK(KP382683432), To, VMUL(LDK(KP923879532), Tl)); + Tq = VSUB(Ti, Tp); + T3w = VADD(Ti, Tp); + T6z = VADD(T4q, T4r); + T6A = VADD(T4t, T4u); + T6B = VSUB(T6z, T6A); + T79 = VADD(T6z, T6A); + } + { + V T2j, T2k, T4s, T4v; + T2j = VFNMS(LDK(KP382683432), Th, VMUL(LDK(KP923879532), Te)); + T2k = VFMA(LDK(KP923879532), To, VMUL(LDK(KP382683432), Tl)); + T2l = VSUB(T2j, T2k); + T3B = VADD(T2j, T2k); + T4s = VSUB(T4q, T4r); + T4v = VSUB(T4t, T4u); + T4w = VMUL(LDK(KP707106781), VADD(T4s, T4v)); + T5r = VMUL(LDK(KP707106781), VSUB(T4s, T4v)); + } + } + { + V TB, T4z, TF, T4y, Ty, T4C, TG, T4B; + { + V Tz, TA, TD, TE; + Tz = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + TA = LD(&(xi[WS(is, 50)]), ivs, &(xi[0])); + TB = VSUB(Tz, TA); + T4z = VADD(Tz, TA); + TD = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + TE = LD(&(xi[WS(is, 34)]), ivs, &(xi[0])); + TF = VSUB(TD, TE); + T4y = VADD(TD, TE); + { + V Ts, Tt, Tu, Tv, Tw, Tx; + Ts = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tt = LD(&(xi[WS(is, 42)]), ivs, &(xi[0])); + Tu = VSUB(Ts, Tt); + Tv = LD(&(xi[WS(is, 58)]), ivs, &(xi[0])); + Tw = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + Tx = VSUB(Tv, Tw); + Ty = VMUL(LDK(KP707106781), VSUB(Tu, Tx)); + T4C = VADD(Tv, Tw); + TG = VMUL(LDK(KP707106781), VADD(Tu, Tx)); + T4B = VADD(Ts, Tt); + } + } + { + V TC, TH, T6s, T6t; + TC = VSUB(Ty, TB); + TH = VSUB(TF, TG); + TI = VFMA(LDK(KP831469612), TC, VMUL(LDK(KP555570233), TH)); + T2g = VFNMS(LDK(KP555570233), TC, VMUL(LDK(KP831469612), TH)); + T6s = VADD(T4y, T4z); + T6t = VADD(T4B, T4C); + T6u = VSUB(T6s, T6t); + T74 = VADD(T6s, T6t); + } + { + V T3o, T3p, T4A, T4D; + T3o = VADD(TB, Ty); + T3p = VADD(TF, TG); + T3q = VFMA(LDK(KP980785280), T3o, VMUL(LDK(KP195090322), T3p)); + T3D = VFNMS(LDK(KP195090322), T3o, VMUL(LDK(KP980785280), T3p)); + T4A = VSUB(T4y, T4z); + T4D = VSUB(T4B, T4C); + T4E = VFMA(LDK(KP382683432), T4A, VMUL(LDK(KP923879532), T4D)); + T5o = VFNMS(LDK(KP382683432), T4D, VMUL(LDK(KP923879532), T4A)); + } + } + { + V TS, T4J, TW, T4I, TP, T4G, TX, T4F; + { + V TQ, TR, TU, TV; + TQ = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + TR = LD(&(xi[WS(is, 46)]), ivs, &(xi[0])); + TS = VSUB(TQ, TR); + T4J = VADD(TQ, TR); + TU = LD(&(xi[WS(is, 62)]), ivs, &(xi[0])); + TV = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + TW = VSUB(TU, TV); + T4I = VADD(TU, TV); + { + V TJ, TK, TL, TM, TN, TO; + TJ = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + TK = LD(&(xi[WS(is, 38)]), ivs, &(xi[0])); + TL = VSUB(TJ, TK); + TM = LD(&(xi[WS(is, 54)]), ivs, &(xi[0])); + TN = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + TO = VSUB(TM, TN); + TP = VMUL(LDK(KP707106781), VSUB(TL, TO)); + T4G = VADD(TM, TN); + TX = VMUL(LDK(KP707106781), VADD(TL, TO)); + T4F = VADD(TJ, TK); + } + } + { + V TT, TY, T6v, T6w; + TT = VSUB(TP, TS); + TY = VSUB(TW, TX); + TZ = VFNMS(LDK(KP555570233), TY, VMUL(LDK(KP831469612), TT)); + T2h = VFMA(LDK(KP555570233), TT, VMUL(LDK(KP831469612), TY)); + T6v = VADD(T4I, T4J); + T6w = VADD(T4F, T4G); + T6x = VSUB(T6v, T6w); + T75 = VADD(T6v, T6w); + } + { + V T3r, T3s, T4H, T4K; + T3r = VADD(TS, TP); + T3s = VADD(TW, TX); + T3t = VFNMS(LDK(KP195090322), T3s, VMUL(LDK(KP980785280), T3r)); + T3E = VFMA(LDK(KP195090322), T3r, VMUL(LDK(KP980785280), T3s)); + T4H = VSUB(T4F, T4G); + T4K = VSUB(T4I, T4J); + T4L = VFNMS(LDK(KP382683432), T4K, VMUL(LDK(KP923879532), T4H)); + T5p = VFMA(LDK(KP923879532), T4K, VMUL(LDK(KP382683432), T4H)); + } + } + { + V T21, T5h, T26, T5g, T1Y, T5d, T27, T5c, T55, T56, T1J, T57, T29, T58, T59; + V T1Q, T5a, T2a; + { + V T1Z, T20, T24, T25; + T1Z = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T20 = LD(&(xi[WS(is, 47)]), ivs, &(xi[WS(is, 1)])); + T21 = VSUB(T1Z, T20); + T5h = VADD(T1Z, T20); + T24 = LD(&(xi[WS(is, 63)]), ivs, &(xi[WS(is, 1)])); + T25 = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + T26 = VSUB(T24, T25); + T5g = VADD(T24, T25); + } + { + V T1S, T1T, T1U, T1V, T1W, T1X; + T1S = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T1T = LD(&(xi[WS(is, 39)]), ivs, &(xi[WS(is, 1)])); + T1U = VSUB(T1S, T1T); + T1V = LD(&(xi[WS(is, 55)]), ivs, &(xi[WS(is, 1)])); + T1W = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + T1X = VSUB(T1V, T1W); + T1Y = VMUL(LDK(KP707106781), VSUB(T1U, T1X)); + T5d = VADD(T1V, T1W); + T27 = VMUL(LDK(KP707106781), VADD(T1U, T1X)); + T5c = VADD(T1S, T1T); + } + { + V T1F, T1I, T1M, T1P; + { + V T1D, T1E, T1G, T1H; + T1D = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T1E = LD(&(xi[WS(is, 35)]), ivs, &(xi[WS(is, 1)])); + T1F = VSUB(T1D, T1E); + T55 = VADD(T1D, T1E); + T1G = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + T1H = LD(&(xi[WS(is, 51)]), ivs, &(xi[WS(is, 1)])); + T1I = VSUB(T1G, T1H); + T56 = VADD(T1G, T1H); + } + T1J = VFNMS(LDK(KP382683432), T1I, VMUL(LDK(KP923879532), T1F)); + T57 = VSUB(T55, T56); + T29 = VFMA(LDK(KP382683432), T1F, VMUL(LDK(KP923879532), T1I)); + { + V T1K, T1L, T1N, T1O; + T1K = LD(&(xi[WS(is, 59)]), ivs, &(xi[WS(is, 1)])); + T1L = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + T1M = VSUB(T1K, T1L); + T58 = VADD(T1K, T1L); + T1N = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + T1O = LD(&(xi[WS(is, 43)]), ivs, &(xi[WS(is, 1)])); + T1P = VSUB(T1N, T1O); + T59 = VADD(T1N, T1O); + } + T1Q = VFMA(LDK(KP923879532), T1M, VMUL(LDK(KP382683432), T1P)); + T5a = VSUB(T58, T59); + T2a = VFNMS(LDK(KP382683432), T1M, VMUL(LDK(KP923879532), T1P)); + } + { + V T1R, T22, T6k, T6l; + T1R = VSUB(T1J, T1Q); + T22 = VSUB(T1Y, T21); + T23 = VSUB(T1R, T22); + T2N = VADD(T22, T1R); + T6k = VADD(T5g, T5h); + T6l = VADD(T5c, T5d); + T6m = VSUB(T6k, T6l); + T70 = VADD(T6k, T6l); + } + { + V T6n, T6o, T28, T2b; + T6n = VADD(T55, T56); + T6o = VADD(T58, T59); + T6p = VSUB(T6n, T6o); + T71 = VADD(T6n, T6o); + T28 = VSUB(T26, T27); + T2b = VSUB(T29, T2a); + T2c = VSUB(T28, T2b); + T2O = VADD(T28, T2b); + } + { + V T3g, T3h, T5b, T5e; + T3g = VADD(T26, T27); + T3h = VADD(T1J, T1Q); + T3i = VADD(T3g, T3h); + T3Y = VSUB(T3g, T3h); + T5b = VMUL(LDK(KP707106781), VSUB(T57, T5a)); + T5e = VSUB(T5c, T5d); + T5f = VSUB(T5b, T5e); + T5R = VADD(T5e, T5b); + } + { + V T5i, T5j, T3j, T3k; + T5i = VSUB(T5g, T5h); + T5j = VMUL(LDK(KP707106781), VADD(T57, T5a)); + T5k = VSUB(T5i, T5j); + T5S = VADD(T5i, T5j); + T3j = VADD(T21, T1Y); + T3k = VADD(T29, T2a); + T3l = VADD(T3j, T3k); + T3Z = VSUB(T3k, T3j); + } + } + { + V T1q, T50, T1v, T4Z, T1n, T4W, T1w, T4V, T4O, T4P, T18, T4Q, T1y, T4R, T4S; + V T1f, T4T, T1z; + { + V T1o, T1p, T1t, T1u; + T1o = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + T1p = LD(&(xi[WS(is, 49)]), ivs, &(xi[WS(is, 1)])); + T1q = VSUB(T1o, T1p); + T50 = VADD(T1o, T1p); + T1t = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T1u = LD(&(xi[WS(is, 33)]), ivs, &(xi[WS(is, 1)])); + T1v = VSUB(T1t, T1u); + T4Z = VADD(T1t, T1u); + } + { + V T1h, T1i, T1j, T1k, T1l, T1m; + T1h = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T1i = LD(&(xi[WS(is, 41)]), ivs, &(xi[WS(is, 1)])); + T1j = VSUB(T1h, T1i); + T1k = LD(&(xi[WS(is, 57)]), ivs, &(xi[WS(is, 1)])); + T1l = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + T1m = VSUB(T1k, T1l); + T1n = VMUL(LDK(KP707106781), VSUB(T1j, T1m)); + T4W = VADD(T1k, T1l); + T1w = VMUL(LDK(KP707106781), VADD(T1j, T1m)); + T4V = VADD(T1h, T1i); + } + { + V T14, T17, T1b, T1e; + { + V T12, T13, T15, T16; + T12 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T13 = LD(&(xi[WS(is, 37)]), ivs, &(xi[WS(is, 1)])); + T14 = VSUB(T12, T13); + T4O = VADD(T12, T13); + T15 = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + T16 = LD(&(xi[WS(is, 53)]), ivs, &(xi[WS(is, 1)])); + T17 = VSUB(T15, T16); + T4P = VADD(T15, T16); + } + T18 = VFNMS(LDK(KP382683432), T17, VMUL(LDK(KP923879532), T14)); + T4Q = VSUB(T4O, T4P); + T1y = VFMA(LDK(KP382683432), T14, VMUL(LDK(KP923879532), T17)); + { + V T19, T1a, T1c, T1d; + T19 = LD(&(xi[WS(is, 61)]), ivs, &(xi[WS(is, 1)])); + T1a = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + T1b = VSUB(T19, T1a); + T4R = VADD(T19, T1a); + T1c = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T1d = LD(&(xi[WS(is, 45)]), ivs, &(xi[WS(is, 1)])); + T1e = VSUB(T1c, T1d); + T4S = VADD(T1c, T1d); + } + T1f = VFMA(LDK(KP923879532), T1b, VMUL(LDK(KP382683432), T1e)); + T4T = VSUB(T4R, T4S); + T1z = VFNMS(LDK(KP382683432), T1b, VMUL(LDK(KP923879532), T1e)); + } + { + V T1g, T1r, T6d, T6e; + T1g = VSUB(T18, T1f); + T1r = VSUB(T1n, T1q); + T1s = VSUB(T1g, T1r); + T2K = VADD(T1r, T1g); + T6d = VADD(T4Z, T50); + T6e = VADD(T4V, T4W); + T6f = VSUB(T6d, T6e); + T6X = VADD(T6d, T6e); + } + { + V T6g, T6h, T1x, T1A; + T6g = VADD(T4O, T4P); + T6h = VADD(T4R, T4S); + T6i = VSUB(T6g, T6h); + T6Y = VADD(T6g, T6h); + T1x = VSUB(T1v, T1w); + T1A = VSUB(T1y, T1z); + T1B = VSUB(T1x, T1A); + T2L = VADD(T1x, T1A); + } + { + V T39, T3a, T4U, T4X; + T39 = VADD(T1v, T1w); + T3a = VADD(T18, T1f); + T3b = VADD(T39, T3a); + T3V = VSUB(T39, T3a); + T4U = VMUL(LDK(KP707106781), VSUB(T4Q, T4T)); + T4X = VSUB(T4V, T4W); + T4Y = VSUB(T4U, T4X); + T5O = VADD(T4X, T4U); + } + { + V T51, T52, T3c, T3d; + T51 = VSUB(T4Z, T50); + T52 = VMUL(LDK(KP707106781), VADD(T4Q, T4T)); + T53 = VSUB(T51, T52); + T5P = VADD(T51, T52); + T3c = VADD(T1q, T1n); + T3d = VADD(T1y, T1z); + T3e = VADD(T3c, T3d); + T3W = VSUB(T3d, T3c); + } + } + { + V T7n, T7o, T7p, T7q, T7r, T7s, T7t, T7u, T7v, T7w, T7x, T7y, T7z, T7A, T7B; + V T7C, T7D, T7E, T7F, T7G, T7H, T7I, T7J, T7K; + { + V T7h, T7l, T7k, T7m; + { + V T7f, T7g, T7i, T7j; + T7f = VADD(T78, T79); + T7g = VADD(T74, T75); + T7h = VSUB(T7f, T7g); + T7l = VADD(T7f, T7g); + T7i = VADD(T6X, T6Y); + T7j = VADD(T70, T71); + T7k = VBYI(VSUB(T7i, T7j)); + T7m = VADD(T7i, T7j); + } + T7n = VSUB(T7h, T7k); + STM2(&(xo[96]), T7n, ovs, &(xo[0])); + T7o = VADD(T7l, T7m); + STM2(&(xo[0]), T7o, ovs, &(xo[0])); + T7p = VADD(T7h, T7k); + STM2(&(xo[32]), T7p, ovs, &(xo[0])); + T7q = VSUB(T7l, T7m); + STM2(&(xo[64]), T7q, ovs, &(xo[0])); + } + { + V T76, T7a, T73, T7b, T6Z, T72; + T76 = VSUB(T74, T75); + T7a = VSUB(T78, T79); + T6Z = VSUB(T6X, T6Y); + T72 = VSUB(T70, T71); + T73 = VMUL(LDK(KP707106781), VSUB(T6Z, T72)); + T7b = VMUL(LDK(KP707106781), VADD(T6Z, T72)); + { + V T77, T7c, T7d, T7e; + T77 = VBYI(VSUB(T73, T76)); + T7c = VSUB(T7a, T7b); + T7r = VADD(T77, T7c); + STM2(&(xo[48]), T7r, ovs, &(xo[0])); + T7s = VSUB(T7c, T77); + STM2(&(xo[80]), T7s, ovs, &(xo[0])); + T7d = VBYI(VADD(T76, T73)); + T7e = VADD(T7a, T7b); + T7t = VADD(T7d, T7e); + STM2(&(xo[16]), T7t, ovs, &(xo[0])); + T7u = VSUB(T7e, T7d); + STM2(&(xo[112]), T7u, ovs, &(xo[0])); + } + } + { + V T6C, T6S, T6I, T6P, T6r, T6Q, T6L, T6T, T6y, T6H; + T6y = VMUL(LDK(KP707106781), VSUB(T6u, T6x)); + T6C = VSUB(T6y, T6B); + T6S = VADD(T6B, T6y); + T6H = VMUL(LDK(KP707106781), VADD(T6u, T6x)); + T6I = VSUB(T6G, T6H); + T6P = VADD(T6G, T6H); + { + V T6j, T6q, T6J, T6K; + T6j = VFNMS(LDK(KP382683432), T6i, VMUL(LDK(KP923879532), T6f)); + T6q = VFMA(LDK(KP923879532), T6m, VMUL(LDK(KP382683432), T6p)); + T6r = VSUB(T6j, T6q); + T6Q = VADD(T6j, T6q); + T6J = VFMA(LDK(KP382683432), T6f, VMUL(LDK(KP923879532), T6i)); + T6K = VFNMS(LDK(KP382683432), T6m, VMUL(LDK(KP923879532), T6p)); + T6L = VSUB(T6J, T6K); + T6T = VADD(T6J, T6K); + } + { + V T6D, T6M, T6V, T6W; + T6D = VBYI(VSUB(T6r, T6C)); + T6M = VSUB(T6I, T6L); + T7v = VADD(T6D, T6M); + STM2(&(xo[40]), T7v, ovs, &(xo[0])); + T7w = VSUB(T6M, T6D); + STM2(&(xo[88]), T7w, ovs, &(xo[0])); + T6V = VSUB(T6P, T6Q); + T6W = VBYI(VSUB(T6T, T6S)); + T7x = VSUB(T6V, T6W); + STM2(&(xo[72]), T7x, ovs, &(xo[0])); + T7y = VADD(T6V, T6W); + STM2(&(xo[56]), T7y, ovs, &(xo[0])); + } + { + V T6N, T6O, T6R, T6U; + T6N = VBYI(VADD(T6C, T6r)); + T6O = VADD(T6I, T6L); + T7z = VADD(T6N, T6O); + STM2(&(xo[24]), T7z, ovs, &(xo[0])); + T7A = VSUB(T6O, T6N); + STM2(&(xo[104]), T7A, ovs, &(xo[0])); + T6R = VADD(T6P, T6Q); + T6U = VBYI(VADD(T6S, T6T)); + T7B = VSUB(T6R, T6U); + STM2(&(xo[120]), T7B, ovs, &(xo[0])); + T7C = VADD(T6R, T6U); + STM2(&(xo[8]), T7C, ovs, &(xo[0])); + } + } + { + V T5N, T68, T61, T69, T5U, T65, T5Y, T66; + { + V T5L, T5M, T5Z, T60; + T5L = VADD(T4p, T4w); + T5M = VADD(T5o, T5p); + T5N = VSUB(T5L, T5M); + T68 = VADD(T5L, T5M); + T5Z = VFNMS(LDK(KP195090322), T5O, VMUL(LDK(KP980785280), T5P)); + T60 = VFMA(LDK(KP195090322), T5R, VMUL(LDK(KP980785280), T5S)); + T61 = VSUB(T5Z, T60); + T69 = VADD(T5Z, T60); + } + { + V T5Q, T5T, T5W, T5X; + T5Q = VFMA(LDK(KP980785280), T5O, VMUL(LDK(KP195090322), T5P)); + T5T = VFNMS(LDK(KP195090322), T5S, VMUL(LDK(KP980785280), T5R)); + T5U = VSUB(T5Q, T5T); + T65 = VADD(T5Q, T5T); + T5W = VADD(T4E, T4L); + T5X = VADD(T5u, T5r); + T5Y = VSUB(T5W, T5X); + T66 = VADD(T5X, T5W); + } + { + V T5V, T62, T6b, T6c; + T5V = VADD(T5N, T5U); + T62 = VBYI(VADD(T5Y, T61)); + T7D = VSUB(T5V, T62); + STM2(&(xo[100]), T7D, ovs, &(xo[0])); + T7E = VADD(T5V, T62); + STM2(&(xo[28]), T7E, ovs, &(xo[0])); + T6b = VBYI(VADD(T66, T65)); + T6c = VADD(T68, T69); + T7F = VADD(T6b, T6c); + STM2(&(xo[4]), T7F, ovs, &(xo[0])); + T7G = VSUB(T6c, T6b); + STM2(&(xo[124]), T7G, ovs, &(xo[0])); + } + { + V T63, T64, T67, T6a; + T63 = VSUB(T5N, T5U); + T64 = VBYI(VSUB(T61, T5Y)); + T7H = VSUB(T63, T64); + STM2(&(xo[92]), T7H, ovs, &(xo[0])); + T7I = VADD(T63, T64); + STM2(&(xo[36]), T7I, ovs, &(xo[0])); + T67 = VBYI(VSUB(T65, T66)); + T6a = VSUB(T68, T69); + T7J = VADD(T67, T6a); + STM2(&(xo[60]), T7J, ovs, &(xo[0])); + T7K = VSUB(T6a, T67); + STM2(&(xo[68]), T7K, ovs, &(xo[0])); + } + } + { + V T7M, T7O, T7P, T7R; + { + V T11, T2C, T2v, T2D, T2e, T2z, T2s, T2A; + { + V Tr, T10, T2t, T2u; + Tr = VSUB(Tb, Tq); + T10 = VSUB(TI, TZ); + T11 = VSUB(Tr, T10); + T2C = VADD(Tr, T10); + T2t = VFNMS(LDK(KP471396736), T1s, VMUL(LDK(KP881921264), T1B)); + T2u = VFMA(LDK(KP471396736), T23, VMUL(LDK(KP881921264), T2c)); + T2v = VSUB(T2t, T2u); + T2D = VADD(T2t, T2u); + } + { + V T1C, T2d, T2i, T2r; + T1C = VFMA(LDK(KP881921264), T1s, VMUL(LDK(KP471396736), T1B)); + T2d = VFNMS(LDK(KP471396736), T2c, VMUL(LDK(KP881921264), T23)); + T2e = VSUB(T1C, T2d); + T2z = VADD(T1C, T2d); + T2i = VSUB(T2g, T2h); + T2r = VSUB(T2l, T2q); + T2s = VSUB(T2i, T2r); + T2A = VADD(T2r, T2i); + } + { + V T2f, T2w, T7L, T2F, T2G, T7N; + T2f = VADD(T11, T2e); + T2w = VBYI(VADD(T2s, T2v)); + T7L = VSUB(T2f, T2w); + STM2(&(xo[106]), T7L, ovs, &(xo[2])); + STN2(&(xo[104]), T7A, T7L, ovs); + T7M = VADD(T2f, T2w); + STM2(&(xo[22]), T7M, ovs, &(xo[2])); + T2F = VBYI(VADD(T2A, T2z)); + T2G = VADD(T2C, T2D); + T7N = VADD(T2F, T2G); + STM2(&(xo[10]), T7N, ovs, &(xo[2])); + STN2(&(xo[8]), T7C, T7N, ovs); + T7O = VSUB(T2G, T2F); + STM2(&(xo[118]), T7O, ovs, &(xo[2])); + } + { + V T2x, T2y, T7Q, T2B, T2E, T7S; + T2x = VSUB(T11, T2e); + T2y = VBYI(VSUB(T2v, T2s)); + T7P = VSUB(T2x, T2y); + STM2(&(xo[86]), T7P, ovs, &(xo[2])); + T7Q = VADD(T2x, T2y); + STM2(&(xo[42]), T7Q, ovs, &(xo[2])); + STN2(&(xo[40]), T7v, T7Q, ovs); + T2B = VBYI(VSUB(T2z, T2A)); + T2E = VSUB(T2C, T2D); + T7R = VADD(T2B, T2E); + STM2(&(xo[54]), T7R, ovs, &(xo[2])); + T7S = VSUB(T2E, T2B); + STM2(&(xo[74]), T7S, ovs, &(xo[2])); + STN2(&(xo[72]), T7x, T7S, ovs); + } + } + { + V T3n, T3O, T3J, T3R, T3y, T3Q, T3G, T3N; + { + V T3f, T3m, T3H, T3I; + T3f = VFNMS(LDK(KP098017140), T3e, VMUL(LDK(KP995184726), T3b)); + T3m = VFMA(LDK(KP995184726), T3i, VMUL(LDK(KP098017140), T3l)); + T3n = VSUB(T3f, T3m); + T3O = VADD(T3f, T3m); + T3H = VFMA(LDK(KP098017140), T3b, VMUL(LDK(KP995184726), T3e)); + T3I = VFNMS(LDK(KP098017140), T3i, VMUL(LDK(KP995184726), T3l)); + T3J = VSUB(T3H, T3I); + T3R = VADD(T3H, T3I); + } + { + V T3u, T3x, T3C, T3F; + T3u = VADD(T3q, T3t); + T3x = VADD(T3v, T3w); + T3y = VSUB(T3u, T3x); + T3Q = VADD(T3x, T3u); + T3C = VADD(T3A, T3B); + T3F = VADD(T3D, T3E); + T3G = VSUB(T3C, T3F); + T3N = VADD(T3C, T3F); + } + { + V T3z, T3K, T7T, T7U; + T3z = VBYI(VSUB(T3n, T3y)); + T3K = VSUB(T3G, T3J); + T7T = VADD(T3z, T3K); + STM2(&(xo[34]), T7T, ovs, &(xo[2])); + STN2(&(xo[32]), T7p, T7T, ovs); + T7U = VSUB(T3K, T3z); + STM2(&(xo[94]), T7U, ovs, &(xo[2])); + STN2(&(xo[92]), T7H, T7U, ovs); + } + { + V T3T, T3U, T7V, T7W; + T3T = VSUB(T3N, T3O); + T3U = VBYI(VSUB(T3R, T3Q)); + T7V = VSUB(T3T, T3U); + STM2(&(xo[66]), T7V, ovs, &(xo[2])); + STN2(&(xo[64]), T7q, T7V, ovs); + T7W = VADD(T3T, T3U); + STM2(&(xo[62]), T7W, ovs, &(xo[2])); + STN2(&(xo[60]), T7J, T7W, ovs); + } + { + V T3L, T3M, T7X, T7Y; + T3L = VBYI(VADD(T3y, T3n)); + T3M = VADD(T3G, T3J); + T7X = VADD(T3L, T3M); + STM2(&(xo[30]), T7X, ovs, &(xo[2])); + STN2(&(xo[28]), T7E, T7X, ovs); + T7Y = VSUB(T3M, T3L); + STM2(&(xo[98]), T7Y, ovs, &(xo[2])); + STN2(&(xo[96]), T7n, T7Y, ovs); + } + { + V T3P, T3S, T7Z, T80; + T3P = VADD(T3N, T3O); + T3S = VBYI(VADD(T3Q, T3R)); + T7Z = VSUB(T3P, T3S); + STM2(&(xo[126]), T7Z, ovs, &(xo[2])); + STN2(&(xo[124]), T7G, T7Z, ovs); + T80 = VADD(T3P, T3S); + STM2(&(xo[2]), T80, ovs, &(xo[2])); + STN2(&(xo[0]), T7o, T80, ovs); + } + } + { + V T81, T83, T86, T88; + { + V T4N, T5G, T5z, T5H, T5m, T5D, T5w, T5E; + { + V T4x, T4M, T5x, T5y; + T4x = VSUB(T4p, T4w); + T4M = VSUB(T4E, T4L); + T4N = VSUB(T4x, T4M); + T5G = VADD(T4x, T4M); + T5x = VFNMS(LDK(KP555570233), T4Y, VMUL(LDK(KP831469612), T53)); + T5y = VFMA(LDK(KP555570233), T5f, VMUL(LDK(KP831469612), T5k)); + T5z = VSUB(T5x, T5y); + T5H = VADD(T5x, T5y); + } + { + V T54, T5l, T5q, T5v; + T54 = VFMA(LDK(KP831469612), T4Y, VMUL(LDK(KP555570233), T53)); + T5l = VFNMS(LDK(KP555570233), T5k, VMUL(LDK(KP831469612), T5f)); + T5m = VSUB(T54, T5l); + T5D = VADD(T54, T5l); + T5q = VSUB(T5o, T5p); + T5v = VSUB(T5r, T5u); + T5w = VSUB(T5q, T5v); + T5E = VADD(T5v, T5q); + } + { + V T5n, T5A, T82, T5J, T5K, T84; + T5n = VADD(T4N, T5m); + T5A = VBYI(VADD(T5w, T5z)); + T81 = VSUB(T5n, T5A); + STM2(&(xo[108]), T81, ovs, &(xo[0])); + T82 = VADD(T5n, T5A); + STM2(&(xo[20]), T82, ovs, &(xo[0])); + STN2(&(xo[20]), T82, T7M, ovs); + T5J = VBYI(VADD(T5E, T5D)); + T5K = VADD(T5G, T5H); + T83 = VADD(T5J, T5K); + STM2(&(xo[12]), T83, ovs, &(xo[0])); + T84 = VSUB(T5K, T5J); + STM2(&(xo[116]), T84, ovs, &(xo[0])); + STN2(&(xo[116]), T84, T7O, ovs); + } + { + V T5B, T5C, T85, T5F, T5I, T87; + T5B = VSUB(T4N, T5m); + T5C = VBYI(VSUB(T5z, T5w)); + T85 = VSUB(T5B, T5C); + STM2(&(xo[84]), T85, ovs, &(xo[0])); + STN2(&(xo[84]), T85, T7P, ovs); + T86 = VADD(T5B, T5C); + STM2(&(xo[44]), T86, ovs, &(xo[0])); + T5F = VBYI(VSUB(T5D, T5E)); + T5I = VSUB(T5G, T5H); + T87 = VADD(T5F, T5I); + STM2(&(xo[52]), T87, ovs, &(xo[0])); + STN2(&(xo[52]), T87, T7R, ovs); + T88 = VSUB(T5I, T5F); + STM2(&(xo[76]), T88, ovs, &(xo[0])); + } + } + { + V T2J, T34, T2X, T35, T2Q, T31, T2U, T32; + { + V T2H, T2I, T2V, T2W; + T2H = VADD(Tb, Tq); + T2I = VADD(T2g, T2h); + T2J = VSUB(T2H, T2I); + T34 = VADD(T2H, T2I); + T2V = VFNMS(LDK(KP290284677), T2K, VMUL(LDK(KP956940335), T2L)); + T2W = VFMA(LDK(KP290284677), T2N, VMUL(LDK(KP956940335), T2O)); + T2X = VSUB(T2V, T2W); + T35 = VADD(T2V, T2W); + } + { + V T2M, T2P, T2S, T2T; + T2M = VFMA(LDK(KP956940335), T2K, VMUL(LDK(KP290284677), T2L)); + T2P = VFNMS(LDK(KP290284677), T2O, VMUL(LDK(KP956940335), T2N)); + T2Q = VSUB(T2M, T2P); + T31 = VADD(T2M, T2P); + T2S = VADD(TI, TZ); + T2T = VADD(T2q, T2l); + T2U = VSUB(T2S, T2T); + T32 = VADD(T2T, T2S); + } + { + V T2R, T2Y, T89, T8a; + T2R = VADD(T2J, T2Q); + T2Y = VBYI(VADD(T2U, T2X)); + T89 = VSUB(T2R, T2Y); + STM2(&(xo[102]), T89, ovs, &(xo[2])); + STN2(&(xo[100]), T7D, T89, ovs); + T8a = VADD(T2R, T2Y); + STM2(&(xo[26]), T8a, ovs, &(xo[2])); + STN2(&(xo[24]), T7z, T8a, ovs); + } + { + V T37, T38, T8b, T8c; + T37 = VBYI(VADD(T32, T31)); + T38 = VADD(T34, T35); + T8b = VADD(T37, T38); + STM2(&(xo[6]), T8b, ovs, &(xo[2])); + STN2(&(xo[4]), T7F, T8b, ovs); + T8c = VSUB(T38, T37); + STM2(&(xo[122]), T8c, ovs, &(xo[2])); + STN2(&(xo[120]), T7B, T8c, ovs); + } + { + V T2Z, T30, T8d, T8e; + T2Z = VSUB(T2J, T2Q); + T30 = VBYI(VSUB(T2X, T2U)); + T8d = VSUB(T2Z, T30); + STM2(&(xo[90]), T8d, ovs, &(xo[2])); + STN2(&(xo[88]), T7w, T8d, ovs); + T8e = VADD(T2Z, T30); + STM2(&(xo[38]), T8e, ovs, &(xo[2])); + STN2(&(xo[36]), T7I, T8e, ovs); + } + { + V T33, T36, T8f, T8g; + T33 = VBYI(VSUB(T31, T32)); + T36 = VSUB(T34, T35); + T8f = VADD(T33, T36); + STM2(&(xo[58]), T8f, ovs, &(xo[2])); + STN2(&(xo[56]), T7y, T8f, ovs); + T8g = VSUB(T36, T33); + STM2(&(xo[70]), T8g, ovs, &(xo[2])); + STN2(&(xo[68]), T7K, T8g, ovs); + } + } + { + V T41, T4g, T4b, T4j, T44, T4i, T48, T4f; + { + V T3X, T40, T49, T4a; + T3X = VFNMS(LDK(KP634393284), T3W, VMUL(LDK(KP773010453), T3V)); + T40 = VFMA(LDK(KP773010453), T3Y, VMUL(LDK(KP634393284), T3Z)); + T41 = VSUB(T3X, T40); + T4g = VADD(T3X, T40); + T49 = VFMA(LDK(KP634393284), T3V, VMUL(LDK(KP773010453), T3W)); + T4a = VFNMS(LDK(KP634393284), T3Y, VMUL(LDK(KP773010453), T3Z)); + T4b = VSUB(T49, T4a); + T4j = VADD(T49, T4a); + } + { + V T42, T43, T46, T47; + T42 = VSUB(T3D, T3E); + T43 = VSUB(T3w, T3v); + T44 = VSUB(T42, T43); + T4i = VADD(T43, T42); + T46 = VSUB(T3A, T3B); + T47 = VSUB(T3q, T3t); + T48 = VSUB(T46, T47); + T4f = VADD(T46, T47); + } + { + V T45, T4c, T8h, T8i; + T45 = VBYI(VSUB(T41, T44)); + T4c = VSUB(T48, T4b); + T8h = VADD(T45, T4c); + STM2(&(xo[46]), T8h, ovs, &(xo[2])); + STN2(&(xo[44]), T86, T8h, ovs); + T8i = VSUB(T4c, T45); + STM2(&(xo[82]), T8i, ovs, &(xo[2])); + STN2(&(xo[80]), T7s, T8i, ovs); + } + { + V T4l, T4m, T8j, T8k; + T4l = VSUB(T4f, T4g); + T4m = VBYI(VSUB(T4j, T4i)); + T8j = VSUB(T4l, T4m); + STM2(&(xo[78]), T8j, ovs, &(xo[2])); + STN2(&(xo[76]), T88, T8j, ovs); + T8k = VADD(T4l, T4m); + STM2(&(xo[50]), T8k, ovs, &(xo[2])); + STN2(&(xo[48]), T7r, T8k, ovs); + } + { + V T4d, T4e, T8l, T8m; + T4d = VBYI(VADD(T44, T41)); + T4e = VADD(T48, T4b); + T8l = VADD(T4d, T4e); + STM2(&(xo[18]), T8l, ovs, &(xo[2])); + STN2(&(xo[16]), T7t, T8l, ovs); + T8m = VSUB(T4e, T4d); + STM2(&(xo[110]), T8m, ovs, &(xo[2])); + STN2(&(xo[108]), T81, T8m, ovs); + } + { + V T4h, T4k, T8n, T8o; + T4h = VADD(T4f, T4g); + T4k = VBYI(VADD(T4i, T4j)); + T8n = VSUB(T4h, T4k); + STM2(&(xo[114]), T8n, ovs, &(xo[2])); + STN2(&(xo[112]), T7u, T8n, ovs); + T8o = VADD(T4h, T4k); + STM2(&(xo[14]), T8o, ovs, &(xo[2])); + STN2(&(xo[12]), T83, T8o, ovs); + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 64, XSIMD_STRING("n2bv_64"), {404, 72, 52, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_64) (planner *p) { + X(kdft_register) (p, n2bv_64, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:59 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 8 -name n2bv_8 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 26 FP additions, 10 FP multiplications, + * (or, 16 additions, 0 multiplications, 10 fused multiply/add), + * 38 stack variables, 1 constants, and 20 memory accesses + */ +#include "n2b.h" + +static void n2bv_8(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) { + V T1, T2, Tc, Td, T4, T5, T7, T8; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + { + V T3, Tj, Te, Tk, T6, Tm, T9, Tn, Tp, Tl; + T3 = VSUB(T1, T2); + Tj = VADD(T1, T2); + Te = VSUB(Tc, Td); + Tk = VADD(Tc, Td); + T6 = VSUB(T4, T5); + Tm = VADD(T4, T5); + T9 = VSUB(T7, T8); + Tn = VADD(T7, T8); + Tp = VADD(Tj, Tk); + Tl = VSUB(Tj, Tk); + { + V Tq, To, Ta, Tf; + Tq = VADD(Tm, Tn); + To = VSUB(Tm, Tn); + Ta = VADD(T6, T9); + Tf = VSUB(T6, T9); + { + V Tr, Ts, Tt, Tu, Tg, Ti, Tb, Th; + Tr = VFMAI(To, Tl); + STM2(&(xo[4]), Tr, ovs, &(xo[0])); + Ts = VFNMSI(To, Tl); + STM2(&(xo[12]), Ts, ovs, &(xo[0])); + Tt = VADD(Tp, Tq); + STM2(&(xo[0]), Tt, ovs, &(xo[0])); + Tu = VSUB(Tp, Tq); + STM2(&(xo[8]), Tu, ovs, &(xo[0])); + Tg = VFNMS(LDK(KP707106781), Tf, Te); + Ti = VFMA(LDK(KP707106781), Tf, Te); + Tb = VFNMS(LDK(KP707106781), Ta, T3); + Th = VFMA(LDK(KP707106781), Ta, T3); + { + V Tv, Tw, Tx, Ty; + Tv = VFNMSI(Ti, Th); + STM2(&(xo[14]), Tv, ovs, &(xo[2])); + STN2(&(xo[12]), Ts, Tv, ovs); + Tw = VFMAI(Ti, Th); + STM2(&(xo[2]), Tw, ovs, &(xo[2])); + STN2(&(xo[0]), Tt, Tw, ovs); + Tx = VFMAI(Tg, Tb); + STM2(&(xo[10]), Tx, ovs, &(xo[2])); + STN2(&(xo[8]), Tu, Tx, ovs); + Ty = VFNMSI(Tg, Tb); + STM2(&(xo[6]), Ty, ovs, &(xo[2])); + STN2(&(xo[4]), Tr, Ty, ovs); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 8, XSIMD_STRING("n2bv_8"), {16, 0, 10, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_8) (planner *p) { + X(kdft_register) (p, n2bv_8, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 8 -name n2bv_8 -with-ostride 2 -include n2b.h -store-multiple 2 */ + +/* + * This function contains 26 FP additions, 2 FP multiplications, + * (or, 26 additions, 2 multiplications, 0 fused multiply/add), + * 24 stack variables, 1 constants, and 20 memory accesses + */ +#include "n2b.h" + +static void n2bv_8(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ii; + xo = io; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) { + V Ta, Tk, Te, Tj, T7, Tn, Tf, Tm, Tr, Tu; + { + V T8, T9, Tc, Td; + T8 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T9 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Ta = VSUB(T8, T9); + Tk = VADD(T8, T9); + Tc = LD(&(xi[0]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Te = VSUB(Tc, Td); + Tj = VADD(Tc, Td); + { + V T1, T2, T3, T4, T5, T6; + T1 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + T4 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + T7 = VMUL(LDK(KP707106781), VSUB(T3, T6)); + Tn = VADD(T4, T5); + Tf = VMUL(LDK(KP707106781), VADD(T3, T6)); + Tm = VADD(T1, T2); + } + } + { + V Ts, Tb, Tg, Tp, Tq, Tt; + Tb = VBYI(VSUB(T7, Ta)); + Tg = VSUB(Te, Tf); + Tr = VADD(Tb, Tg); + STM2(&(xo[6]), Tr, ovs, &(xo[2])); + Ts = VSUB(Tg, Tb); + STM2(&(xo[10]), Ts, ovs, &(xo[2])); + Tp = VADD(Tj, Tk); + Tq = VADD(Tm, Tn); + Tt = VSUB(Tp, Tq); + STM2(&(xo[8]), Tt, ovs, &(xo[0])); + STN2(&(xo[8]), Tt, Ts, ovs); + Tu = VADD(Tp, Tq); + STM2(&(xo[0]), Tu, ovs, &(xo[0])); + } + { + V Tw, Th, Ti, Tv; + Th = VBYI(VADD(Ta, T7)); + Ti = VADD(Te, Tf); + Tv = VADD(Th, Ti); + STM2(&(xo[2]), Tv, ovs, &(xo[2])); + STN2(&(xo[0]), Tu, Tv, ovs); + Tw = VSUB(Ti, Th); + STM2(&(xo[14]), Tw, ovs, &(xo[2])); + { + V Tl, To, Tx, Ty; + Tl = VSUB(Tj, Tk); + To = VBYI(VSUB(Tm, Tn)); + Tx = VSUB(Tl, To); + STM2(&(xo[12]), Tx, ovs, &(xo[0])); + STN2(&(xo[12]), Tx, Tw, ovs); + Ty = VADD(Tl, To); + STM2(&(xo[4]), Ty, ovs, &(xo[0])); + STN2(&(xo[4]), Ty, Tr, ovs); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 8, XSIMD_STRING("n2bv_8"), {26, 2, 0, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2bv_8) (planner *p) { + X(kdft_register) (p, n2bv_8, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:55 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name n2fv_10 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 42 FP additions, 22 FP multiplications, + * (or, 24 additions, 4 multiplications, 18 fused multiply/add), + * 53 stack variables, 4 constants, and 25 memory accesses + */ +#include "n2f.h" + +static void n2fv_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(20, is), MAKE_VOLATILE_STRIDE(20, os)) { + V Tb, Tr, T3, Ts, T6, Tw, Tg, Tt, T9, Tc, T1, T2; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + { + V T4, T5, Te, Tf, T7, T8; + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Te = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tr = VADD(T1, T2); + T3 = VSUB(T1, T2); + Ts = VADD(T4, T5); + T6 = VSUB(T4, T5); + Tw = VADD(Te, Tf); + Tg = VSUB(Te, Tf); + Tt = VADD(T7, T8); + T9 = VSUB(T7, T8); + Tc = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + } + { + V TD, Tu, Tm, Ta, Td, Tv; + TD = VSUB(Ts, Tt); + Tu = VADD(Ts, Tt); + Tm = VSUB(T6, T9); + Ta = VADD(T6, T9); + Td = VSUB(Tb, Tc); + Tv = VADD(Tb, Tc); + { + V TC, Tx, Tn, Th; + TC = VSUB(Tv, Tw); + Tx = VADD(Tv, Tw); + Tn = VSUB(Td, Tg); + Th = VADD(Td, Tg); + { + V Ty, TA, TE, TG, Ti, Tk, To, Tq; + Ty = VADD(Tu, Tx); + TA = VSUB(Tu, Tx); + TE = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TD, TC)); + TG = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TC, TD)); + Ti = VADD(Ta, Th); + Tk = VSUB(Ta, Th); + To = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tn, Tm)); + Tq = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tm, Tn)); + { + V Tz, TH, Tj, TI; + Tz = VFNMS(LDK(KP250000000), Ty, Tr); + TH = VADD(Tr, Ty); + STM2(&(xo[0]), TH, ovs, &(xo[0])); + Tj = VFNMS(LDK(KP250000000), Ti, T3); + TI = VADD(T3, Ti); + STM2(&(xo[10]), TI, ovs, &(xo[2])); + { + V TB, TF, Tl, Tp; + TB = VFNMS(LDK(KP559016994), TA, Tz); + TF = VFMA(LDK(KP559016994), TA, Tz); + Tl = VFMA(LDK(KP559016994), Tk, Tj); + Tp = VFNMS(LDK(KP559016994), Tk, Tj); + { + V TJ, TK, TL, TM; + TJ = VFMAI(TG, TF); + STM2(&(xo[8]), TJ, ovs, &(xo[0])); + STN2(&(xo[8]), TJ, TI, ovs); + TK = VFNMSI(TG, TF); + STM2(&(xo[12]), TK, ovs, &(xo[0])); + TL = VFNMSI(TE, TB); + STM2(&(xo[16]), TL, ovs, &(xo[0])); + TM = VFMAI(TE, TB); + STM2(&(xo[4]), TM, ovs, &(xo[0])); + { + V TN, TO, TP, TQ; + TN = VFNMSI(Tq, Tp); + STM2(&(xo[6]), TN, ovs, &(xo[2])); + STN2(&(xo[4]), TM, TN, ovs); + TO = VFMAI(Tq, Tp); + STM2(&(xo[14]), TO, ovs, &(xo[2])); + STN2(&(xo[12]), TK, TO, ovs); + TP = VFMAI(To, Tl); + STM2(&(xo[18]), TP, ovs, &(xo[2])); + STN2(&(xo[16]), TL, TP, ovs); + TQ = VFNMSI(To, Tl); + STM2(&(xo[2]), TQ, ovs, &(xo[2])); + STN2(&(xo[0]), TH, TQ, ovs); + } + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 10, XSIMD_STRING("n2fv_10"), {24, 4, 18, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_10) (planner *p) { + X(kdft_register) (p, n2fv_10, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name n2fv_10 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 42 FP additions, 12 FP multiplications, + * (or, 36 additions, 6 multiplications, 6 fused multiply/add), + * 36 stack variables, 4 constants, and 25 memory accesses + */ +#include "n2f.h" + +static void n2fv_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(20, is), MAKE_VOLATILE_STRIDE(20, os)) { + V Ti, Ty, Tm, Tn, Tw, Tt, Tz, TA, TB, T7, Te, Tj, Tg, Th; + Tg = LD(&(xi[0]), ivs, &(xi[0])); + Th = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Ti = VSUB(Tg, Th); + Ty = VADD(Tg, Th); + { + V T3, Tu, Td, Ts, T6, Tv, Ta, Tr; + { + V T1, T2, Tb, Tc; + T1 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + Tu = VADD(T1, T2); + Tb = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Td = VSUB(Tb, Tc); + Ts = VADD(Tb, Tc); + } + { + V T4, T5, T8, T9; + T4 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + Tv = VADD(T4, T5); + T8 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T9 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Ta = VSUB(T8, T9); + Tr = VADD(T8, T9); + } + Tm = VSUB(T3, T6); + Tn = VSUB(Ta, Td); + Tw = VSUB(Tu, Tv); + Tt = VSUB(Tr, Ts); + Tz = VADD(Tu, Tv); + TA = VADD(Tr, Ts); + TB = VADD(Tz, TA); + T7 = VADD(T3, T6); + Te = VADD(Ta, Td); + Tj = VADD(T7, Te); + } + { + V TH, TI, TK, TL, TM; + TH = VADD(Ti, Tj); + STM2(&(xo[10]), TH, ovs, &(xo[2])); + TI = VADD(Ty, TB); + STM2(&(xo[0]), TI, ovs, &(xo[0])); + { + V To, Tq, Tl, Tp, Tf, Tk, TJ; + To = VBYI(VFMA(LDK(KP951056516), Tm, VMUL(LDK(KP587785252), Tn))); + Tq = VBYI(VFNMS(LDK(KP587785252), Tm, VMUL(LDK(KP951056516), Tn))); + Tf = VMUL(LDK(KP559016994), VSUB(T7, Te)); + Tk = VFNMS(LDK(KP250000000), Tj, Ti); + Tl = VADD(Tf, Tk); + Tp = VSUB(Tk, Tf); + TJ = VSUB(Tl, To); + STM2(&(xo[2]), TJ, ovs, &(xo[2])); + STN2(&(xo[0]), TI, TJ, ovs); + TK = VADD(Tq, Tp); + STM2(&(xo[14]), TK, ovs, &(xo[2])); + TL = VADD(To, Tl); + STM2(&(xo[18]), TL, ovs, &(xo[2])); + TM = VSUB(Tp, Tq); + STM2(&(xo[6]), TM, ovs, &(xo[2])); + } + { + V Tx, TF, TE, TG, TC, TD; + Tx = VBYI(VFNMS(LDK(KP587785252), Tw, VMUL(LDK(KP951056516), Tt))); + TF = VBYI(VFMA(LDK(KP951056516), Tw, VMUL(LDK(KP587785252), Tt))); + TC = VFNMS(LDK(KP250000000), TB, Ty); + TD = VMUL(LDK(KP559016994), VSUB(Tz, TA)); + TE = VSUB(TC, TD); + TG = VADD(TD, TC); + { + V TN, TO, TP, TQ; + TN = VADD(Tx, TE); + STM2(&(xo[4]), TN, ovs, &(xo[0])); + STN2(&(xo[4]), TN, TM, ovs); + TO = VSUB(TG, TF); + STM2(&(xo[12]), TO, ovs, &(xo[0])); + STN2(&(xo[12]), TO, TK, ovs); + TP = VSUB(TE, Tx); + STM2(&(xo[16]), TP, ovs, &(xo[0])); + STN2(&(xo[16]), TP, TL, ovs); + TQ = VADD(TF, TG); + STM2(&(xo[8]), TQ, ovs, &(xo[0])); + STN2(&(xo[8]), TQ, TH, ovs); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 10, XSIMD_STRING("n2fv_10"), {36, 6, 6, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_10) (planner *p) { + X(kdft_register) (p, n2fv_10, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:55 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 12 -name n2fv_12 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 48 FP additions, 20 FP multiplications, + * (or, 30 additions, 2 multiplications, 18 fused multiply/add), + * 61 stack variables, 2 constants, and 30 memory accesses + */ +#include "n2f.h" + +static void n2fv_12(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) { + V T1, T6, Tk, Tn, Tc, Td, Tf, Tr, T4, Ts, T9, Tg, Te, Tl; + { + V T2, T3, T7, T8; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T3 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T7 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tn = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tc = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Td = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Tf = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tr = VSUB(T3, T2); + T4 = VADD(T2, T3); + Ts = VSUB(T8, T7); + T9 = VADD(T7, T8); + Tg = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + } + Te = VSUB(Tc, Td); + Tl = VADD(Td, Tc); + { + V T5, TF, TB, Tt, Ta, TG, Th, To, Tm, TI; + T5 = VFNMS(LDK(KP500000000), T4, T1); + TF = VADD(T1, T4); + TB = VADD(Tr, Ts); + Tt = VSUB(Tr, Ts); + Ta = VFNMS(LDK(KP500000000), T9, T6); + TG = VADD(T6, T9); + Th = VSUB(Tf, Tg); + To = VADD(Tf, Tg); + Tm = VFNMS(LDK(KP500000000), Tl, Tk); + TI = VADD(Tk, Tl); + { + V TH, TL, Tb, Tx, TJ, Tp, Ti, TA; + TH = VSUB(TF, TG); + TL = VADD(TF, TG); + Tb = VSUB(T5, Ta); + Tx = VADD(T5, Ta); + TJ = VADD(Tn, To); + Tp = VFNMS(LDK(KP500000000), To, Tn); + Ti = VADD(Te, Th); + TA = VSUB(Te, Th); + { + V Tq, Ty, TK, TM; + Tq = VSUB(Tm, Tp); + Ty = VADD(Tm, Tp); + TK = VSUB(TI, TJ); + TM = VADD(TI, TJ); + { + V TC, TE, Tj, Tv; + TC = VMUL(LDK(KP866025403), VSUB(TA, TB)); + TE = VMUL(LDK(KP866025403), VADD(TB, TA)); + Tj = VFMA(LDK(KP866025403), Ti, Tb); + Tv = VFNMS(LDK(KP866025403), Ti, Tb); + { + V Tz, TD, Tu, Tw; + Tz = VSUB(Tx, Ty); + TD = VADD(Tx, Ty); + Tu = VFNMS(LDK(KP866025403), Tt, Tq); + Tw = VFMA(LDK(KP866025403), Tt, Tq); + { + V TN, TO, TP, TQ; + TN = VADD(TL, TM); + STM2(&(xo[0]), TN, ovs, &(xo[0])); + TO = VSUB(TL, TM); + STM2(&(xo[12]), TO, ovs, &(xo[0])); + TP = VFMAI(TK, TH); + STM2(&(xo[6]), TP, ovs, &(xo[2])); + TQ = VFNMSI(TK, TH); + STM2(&(xo[18]), TQ, ovs, &(xo[2])); + { + V TR, TS, TT, TU; + TR = VFMAI(TE, TD); + STM2(&(xo[8]), TR, ovs, &(xo[0])); + TS = VFNMSI(TE, TD); + STM2(&(xo[16]), TS, ovs, &(xo[0])); + STN2(&(xo[16]), TS, TQ, ovs); + TT = VFNMSI(TC, Tz); + STM2(&(xo[20]), TT, ovs, &(xo[0])); + TU = VFMAI(TC, Tz); + STM2(&(xo[4]), TU, ovs, &(xo[0])); + STN2(&(xo[4]), TU, TP, ovs); + { + V TV, TW, TX, TY; + TV = VFNMSI(Tw, Tv); + STM2(&(xo[10]), TV, ovs, &(xo[2])); + STN2(&(xo[8]), TR, TV, ovs); + TW = VFMAI(Tw, Tv); + STM2(&(xo[14]), TW, ovs, &(xo[2])); + STN2(&(xo[12]), TO, TW, ovs); + TX = VFMAI(Tu, Tj); + STM2(&(xo[22]), TX, ovs, &(xo[2])); + STN2(&(xo[20]), TT, TX, ovs); + TY = VFNMSI(Tu, Tj); + STM2(&(xo[2]), TY, ovs, &(xo[2])); + STN2(&(xo[0]), TN, TY, ovs); + } + } + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 12, XSIMD_STRING("n2fv_12"), {30, 2, 18, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_12) (planner *p) { + X(kdft_register) (p, n2fv_12, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 12 -name n2fv_12 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 48 FP additions, 8 FP multiplications, + * (or, 44 additions, 4 multiplications, 4 fused multiply/add), + * 33 stack variables, 2 constants, and 30 memory accesses + */ +#include "n2f.h" + +static void n2fv_12(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) { + V T5, Ta, TJ, Ty, Tq, Tp, Tg, Tl, TI, TA, Tz, Tu; + { + V T1, T6, T4, Tw, T9, Tx; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T6 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + { + V T2, T3, T7, T8; + T2 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T3 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T4 = VADD(T2, T3); + Tw = VSUB(T3, T2); + T7 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T9 = VADD(T7, T8); + Tx = VSUB(T8, T7); + } + T5 = VADD(T1, T4); + Ta = VADD(T6, T9); + TJ = VADD(Tw, Tx); + Ty = VMUL(LDK(KP866025403), VSUB(Tw, Tx)); + Tq = VFNMS(LDK(KP500000000), T9, T6); + Tp = VFNMS(LDK(KP500000000), T4, T1); + } + { + V Tc, Th, Tf, Ts, Tk, Tt; + Tc = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Th = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + { + V Td, Te, Ti, Tj; + Td = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Te = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Tf = VADD(Td, Te); + Ts = VSUB(Te, Td); + Ti = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tj = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tk = VADD(Ti, Tj); + Tt = VSUB(Tj, Ti); + } + Tg = VADD(Tc, Tf); + Tl = VADD(Th, Tk); + TI = VADD(Ts, Tt); + TA = VFNMS(LDK(KP500000000), Tk, Th); + Tz = VFNMS(LDK(KP500000000), Tf, Tc); + Tu = VMUL(LDK(KP866025403), VSUB(Ts, Tt)); + } + { + V TN, TO, TP, TQ, TR, TS; + { + V Tb, Tm, Tn, To; + Tb = VSUB(T5, Ta); + Tm = VBYI(VSUB(Tg, Tl)); + TN = VSUB(Tb, Tm); + STM2(&(xo[18]), TN, ovs, &(xo[2])); + TO = VADD(Tb, Tm); + STM2(&(xo[6]), TO, ovs, &(xo[2])); + Tn = VADD(T5, Ta); + To = VADD(Tg, Tl); + TP = VSUB(Tn, To); + STM2(&(xo[12]), TP, ovs, &(xo[0])); + TQ = VADD(Tn, To); + STM2(&(xo[0]), TQ, ovs, &(xo[0])); + } + { + V Tv, TE, TC, TD, Tr, TB, TT, TU; + Tr = VSUB(Tp, Tq); + Tv = VSUB(Tr, Tu); + TE = VADD(Tr, Tu); + TB = VSUB(Tz, TA); + TC = VBYI(VADD(Ty, TB)); + TD = VBYI(VSUB(Ty, TB)); + TR = VSUB(Tv, TC); + STM2(&(xo[10]), TR, ovs, &(xo[2])); + TS = VSUB(TE, TD); + STM2(&(xo[22]), TS, ovs, &(xo[2])); + TT = VADD(TC, Tv); + STM2(&(xo[14]), TT, ovs, &(xo[2])); + STN2(&(xo[12]), TP, TT, ovs); + TU = VADD(TD, TE); + STM2(&(xo[2]), TU, ovs, &(xo[2])); + STN2(&(xo[0]), TQ, TU, ovs); + } + { + V TK, TM, TH, TL, TF, TG; + TK = VBYI(VMUL(LDK(KP866025403), VSUB(TI, TJ))); + TM = VBYI(VMUL(LDK(KP866025403), VADD(TJ, TI))); + TF = VADD(Tp, Tq); + TG = VADD(Tz, TA); + TH = VSUB(TF, TG); + TL = VADD(TF, TG); + { + V TV, TW, TX, TY; + TV = VSUB(TH, TK); + STM2(&(xo[20]), TV, ovs, &(xo[0])); + STN2(&(xo[20]), TV, TS, ovs); + TW = VADD(TL, TM); + STM2(&(xo[8]), TW, ovs, &(xo[0])); + STN2(&(xo[8]), TW, TR, ovs); + TX = VADD(TH, TK); + STM2(&(xo[4]), TX, ovs, &(xo[0])); + STN2(&(xo[4]), TX, TO, ovs); + TY = VSUB(TL, TM); + STM2(&(xo[16]), TY, ovs, &(xo[0])); + STN2(&(xo[16]), TY, TN, ovs); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 12, XSIMD_STRING("n2fv_12"), {44, 4, 4, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_12) (planner *p) { + X(kdft_register) (p, n2fv_12, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,369 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:55 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 14 -name n2fv_14 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 74 FP additions, 48 FP multiplications, + * (or, 32 additions, 6 multiplications, 42 fused multiply/add), + * 65 stack variables, 6 constants, and 35 memory accesses + */ +#include "n2f.h" + +static void n2fv_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP801937735, +0.801937735804838252472204639014890102331838324); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + DVK(KP692021471, +0.692021471630095869627814897002069140197260599); + DVK(KP554958132, +0.554958132087371191422194871006410481067288862); + DVK(KP356895867, +0.356895867892209443894399510021300583399127187); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) { + V TH, T3, TP, Tn, Ta, Ts, TW, TK, TO, Tk, TM, Tg, TL, Td, T1; + V T2; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + { + V Ti, TI, T6, TJ, T9, Tj, Te, Tf, Tb, Tc; + { + V T4, T5, T7, T8, Tl, Tm; + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tl = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tm = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Ti = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + TH = VADD(T1, T2); + T3 = VSUB(T1, T2); + TI = VADD(T4, T5); + T6 = VSUB(T4, T5); + TJ = VADD(T7, T8); + T9 = VSUB(T7, T8); + TP = VADD(Tl, Tm); + Tn = VSUB(Tl, Tm); + Tj = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Te = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + } + Ta = VADD(T6, T9); + Ts = VSUB(T9, T6); + TW = VSUB(TJ, TI); + TK = VADD(TI, TJ); + TO = VADD(Ti, Tj); + Tk = VSUB(Ti, Tj); + TM = VADD(Te, Tf); + Tg = VSUB(Te, Tf); + TL = VADD(Tb, Tc); + Td = VSUB(Tb, Tc); + } + { + V T19, T1a, T18, TB, T13, TY, TG, Tw, T11, Tr, T16, TT, Tz, TE, TU; + V TQ; + TU = VSUB(TO, TP); + TQ = VADD(TO, TP); + { + V Tt, To, TV, TN; + Tt = VSUB(Tn, Tk); + To = VADD(Tk, Tn); + TV = VSUB(TL, TM); + TN = VADD(TL, TM); + { + V Tu, Th, TZ, T17; + Tu = VSUB(Tg, Td); + Th = VADD(Td, Tg); + TZ = VFNMS(LDK(KP356895867), TK, TQ); + T17 = VFNMS(LDK(KP554958132), TU, TW); + { + V Tp, TA, T14, TR; + Tp = VFNMS(LDK(KP356895867), Ta, To); + TA = VFMA(LDK(KP554958132), Tt, Ts); + T19 = VADD(TH, VADD(TK, VADD(TN, TQ))); + STM2(&(xo[0]), T19, ovs, &(xo[0])); + T14 = VFNMS(LDK(KP356895867), TN, TK); + TR = VFNMS(LDK(KP356895867), TQ, TN); + { + V T12, TX, Tx, TC; + T12 = VFMA(LDK(KP554958132), TV, TU); + TX = VFMA(LDK(KP554958132), TW, TV); + T1a = VADD(T3, VADD(Ta, VADD(Th, To))); + STM2(&(xo[14]), T1a, ovs, &(xo[2])); + Tx = VFNMS(LDK(KP356895867), Th, Ta); + TC = VFNMS(LDK(KP356895867), To, Th); + { + V TF, Tv, T10, Tq; + TF = VFNMS(LDK(KP554958132), Ts, Tu); + Tv = VFMA(LDK(KP554958132), Tu, Tt); + T10 = VFNMS(LDK(KP692021471), TZ, TN); + T18 = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), T17, TV)); + Tq = VFNMS(LDK(KP692021471), Tp, Th); + TB = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), TA, Tu)); + { + V T15, TS, Ty, TD; + T15 = VFNMS(LDK(KP692021471), T14, TQ); + TS = VFNMS(LDK(KP692021471), TR, TK); + T13 = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), T12, TW)); + TY = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), TX, TU)); + Ty = VFNMS(LDK(KP692021471), Tx, To); + TD = VFNMS(LDK(KP692021471), TC, Ta); + TG = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), TF, Tt)); + Tw = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tv, Ts)); + T11 = VFNMS(LDK(KP900968867), T10, TH); + Tr = VFNMS(LDK(KP900968867), Tq, T3); + T16 = VFNMS(LDK(KP900968867), T15, TH); + TT = VFNMS(LDK(KP900968867), TS, TH); + Tz = VFNMS(LDK(KP900968867), Ty, T3); + TE = VFNMS(LDK(KP900968867), TD, T3); + } + } + } + } + } + } + { + V T1b, T1c, T1d, T1e; + T1b = VFNMSI(T13, T11); + STM2(&(xo[24]), T1b, ovs, &(xo[0])); + T1c = VFMAI(T13, T11); + STM2(&(xo[4]), T1c, ovs, &(xo[0])); + T1d = VFMAI(Tw, Tr); + STM2(&(xo[18]), T1d, ovs, &(xo[2])); + T1e = VFNMSI(Tw, Tr); + STM2(&(xo[10]), T1e, ovs, &(xo[2])); + { + V T1f, T1g, T1h, T1i; + T1f = VFNMSI(T18, T16); + STM2(&(xo[16]), T1f, ovs, &(xo[0])); + STN2(&(xo[16]), T1f, T1d, ovs); + T1g = VFMAI(T18, T16); + STM2(&(xo[12]), T1g, ovs, &(xo[0])); + STN2(&(xo[12]), T1g, T1a, ovs); + T1h = VFNMSI(TY, TT); + STM2(&(xo[20]), T1h, ovs, &(xo[0])); + T1i = VFMAI(TY, TT); + STM2(&(xo[8]), T1i, ovs, &(xo[0])); + STN2(&(xo[8]), T1i, T1e, ovs); + { + V T1j, T1k, T1l, T1m; + T1j = VFMAI(TB, Tz); + STM2(&(xo[2]), T1j, ovs, &(xo[2])); + STN2(&(xo[0]), T19, T1j, ovs); + T1k = VFNMSI(TB, Tz); + STM2(&(xo[26]), T1k, ovs, &(xo[2])); + STN2(&(xo[24]), T1b, T1k, ovs); + T1l = VFMAI(TG, TE); + STM2(&(xo[6]), T1l, ovs, &(xo[2])); + STN2(&(xo[4]), T1c, T1l, ovs); + T1m = VFNMSI(TG, TE); + STM2(&(xo[22]), T1m, ovs, &(xo[2])); + STN2(&(xo[20]), T1h, T1m, ovs); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 14, XSIMD_STRING("n2fv_14"), {32, 6, 42, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_14) (planner *p) { + X(kdft_register) (p, n2fv_14, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 14 -name n2fv_14 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 74 FP additions, 36 FP multiplications, + * (or, 50 additions, 12 multiplications, 24 fused multiply/add), + * 39 stack variables, 6 constants, and 35 memory accesses + */ +#include "n2f.h" + +static void n2fv_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP222520933, +0.222520933956314404288902564496794759466355569); + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP623489801, +0.623489801858733530525004884004239810632274731); + DVK(KP433883739, +0.433883739117558120475768332848358754609990728); + DVK(KP781831482, +0.781831482468029808708444526674057750232334519); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) { + V T3, Ty, To, TK, Tr, TE, Ta, TJ, Tq, TB, Th, TL, Ts, TH, T1; + V T2; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + Ty = VADD(T1, T2); + { + V Tk, TC, Tn, TD; + { + V Ti, Tj, Tl, Tm; + Ti = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tj = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Tk = VSUB(Ti, Tj); + TC = VADD(Ti, Tj); + Tl = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tm = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tn = VSUB(Tl, Tm); + TD = VADD(Tl, Tm); + } + To = VADD(Tk, Tn); + TK = VSUB(TC, TD); + Tr = VSUB(Tn, Tk); + TE = VADD(TC, TD); + } + { + V T6, Tz, T9, TA; + { + V T4, T5, T7, T8; + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + Tz = VADD(T4, T5); + T7 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T9 = VSUB(T7, T8); + TA = VADD(T7, T8); + } + Ta = VADD(T6, T9); + TJ = VSUB(TA, Tz); + Tq = VSUB(T9, T6); + TB = VADD(Tz, TA); + } + { + V Td, TF, Tg, TG; + { + V Tb, Tc, Te, Tf; + Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Td = VSUB(Tb, Tc); + TF = VADD(Tb, Tc); + Te = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tg = VSUB(Te, Tf); + TG = VADD(Te, Tf); + } + Th = VADD(Td, Tg); + TL = VSUB(TF, TG); + Ts = VSUB(Tg, Td); + TH = VADD(TF, TG); + } + { + V TR, TS, TT, TU, TV, TW; + TR = VADD(T3, VADD(Ta, VADD(Th, To))); + STM2(&(xo[14]), TR, ovs, &(xo[2])); + TS = VADD(Ty, VADD(TB, VADD(TH, TE))); + STM2(&(xo[0]), TS, ovs, &(xo[0])); + { + V Tt, Tp, TP, TQ; + Tt = VBYI(VFNMS(LDK(KP781831482), Tr, VFNMS(LDK(KP433883739), Ts, VMUL(LDK(KP974927912), Tq)))); + Tp = VFMA(LDK(KP623489801), To, VFNMS(LDK(KP900968867), Th, VFNMS(LDK(KP222520933), Ta, T3))); + TT = VSUB(Tp, Tt); + STM2(&(xo[10]), TT, ovs, &(xo[2])); + TU = VADD(Tp, Tt); + STM2(&(xo[18]), TU, ovs, &(xo[2])); + TP = VBYI(VFMA(LDK(KP974927912), TJ, VFMA(LDK(KP433883739), TL, VMUL(LDK(KP781831482), TK)))); + TQ = VFMA(LDK(KP623489801), TE, VFNMS(LDK(KP900968867), TH, VFNMS(LDK(KP222520933), TB, Ty))); + TV = VADD(TP, TQ); + STM2(&(xo[4]), TV, ovs, &(xo[0])); + TW = VSUB(TQ, TP); + STM2(&(xo[24]), TW, ovs, &(xo[0])); + } + { + V Tv, Tu, TX, TY; + Tv = VBYI(VFMA(LDK(KP781831482), Tq, VFMA(LDK(KP974927912), Ts, VMUL(LDK(KP433883739), Tr)))); + Tu = VFMA(LDK(KP623489801), Ta, VFNMS(LDK(KP900968867), To, VFNMS(LDK(KP222520933), Th, T3))); + TX = VSUB(Tu, Tv); + STM2(&(xo[26]), TX, ovs, &(xo[2])); + STN2(&(xo[24]), TW, TX, ovs); + TY = VADD(Tu, Tv); + STM2(&(xo[2]), TY, ovs, &(xo[2])); + STN2(&(xo[0]), TS, TY, ovs); + } + { + V TM, TI, TZ, T10; + TM = VBYI(VFNMS(LDK(KP433883739), TK, VFNMS(LDK(KP974927912), TL, VMUL(LDK(KP781831482), TJ)))); + TI = VFMA(LDK(KP623489801), TB, VFNMS(LDK(KP900968867), TE, VFNMS(LDK(KP222520933), TH, Ty))); + TZ = VSUB(TI, TM); + STM2(&(xo[12]), TZ, ovs, &(xo[0])); + STN2(&(xo[12]), TZ, TR, ovs); + T10 = VADD(TM, TI); + STM2(&(xo[16]), T10, ovs, &(xo[0])); + STN2(&(xo[16]), T10, TU, ovs); + } + { + V T12, TO, TN, T11; + TO = VBYI(VFMA(LDK(KP433883739), TJ, VFNMS(LDK(KP974927912), TK, VMUL(LDK(KP781831482), TL)))); + TN = VFMA(LDK(KP623489801), TH, VFNMS(LDK(KP222520933), TE, VFNMS(LDK(KP900968867), TB, Ty))); + T11 = VSUB(TN, TO); + STM2(&(xo[8]), T11, ovs, &(xo[0])); + STN2(&(xo[8]), T11, TT, ovs); + T12 = VADD(TO, TN); + STM2(&(xo[20]), T12, ovs, &(xo[0])); + { + V Tx, Tw, T13, T14; + Tx = VBYI(VFMA(LDK(KP433883739), Tq, VFNMS(LDK(KP781831482), Ts, VMUL(LDK(KP974927912), Tr)))); + Tw = VFMA(LDK(KP623489801), Th, VFNMS(LDK(KP222520933), To, VFNMS(LDK(KP900968867), Ta, T3))); + T13 = VSUB(Tw, Tx); + STM2(&(xo[22]), T13, ovs, &(xo[2])); + STN2(&(xo[20]), T12, T13, ovs); + T14 = VADD(Tw, Tx); + STM2(&(xo[6]), T14, ovs, &(xo[2])); + STN2(&(xo[4]), TV, T14, ovs); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 14, XSIMD_STRING("n2fv_14"), {50, 12, 24, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_14) (planner *p) { + X(kdft_register) (p, n2fv_14, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,412 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:57 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name n2fv_16 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 72 FP additions, 34 FP multiplications, + * (or, 38 additions, 0 multiplications, 34 fused multiply/add), + * 62 stack variables, 3 constants, and 40 memory accesses + */ +#include "n2f.h" + +static void n2fv_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(32, is), MAKE_VOLATILE_STRIDE(32, os)) { + V T7, Tu, TF, TB, T13, TL, TO, TX, TC, Te, TP, Th, TQ, Tk, TW; + V T16; + { + V TH, TU, Tz, Tf, TK, TV, TA, TM, Ta, TN, Td, Tg, Ti, Tj; + { + V T1, T2, T4, T5, To, Tp, Tr, Ts; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + To = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + Tp = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tr = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Ts = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + { + V T8, TJ, Tq, TI, Tt, T9, Tb, Tc, T3, T6; + T8 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + TH = VSUB(T1, T2); + T3 = VADD(T1, T2); + TU = VSUB(T4, T5); + T6 = VADD(T4, T5); + TJ = VSUB(To, Tp); + Tq = VADD(To, Tp); + TI = VSUB(Tr, Ts); + Tt = VADD(Tr, Ts); + T9 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Tb = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tc = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T7 = VSUB(T3, T6); + Tz = VADD(T3, T6); + Tf = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + TK = VADD(TI, TJ); + TV = VSUB(TJ, TI); + TA = VADD(Tt, Tq); + Tu = VSUB(Tq, Tt); + TM = VSUB(T8, T9); + Ta = VADD(T8, T9); + TN = VSUB(Tb, Tc); + Td = VADD(Tb, Tc); + Tg = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Ti = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tj = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + } + } + TF = VSUB(Tz, TA); + TB = VADD(Tz, TA); + T13 = VFNMS(LDK(KP707106781), TK, TH); + TL = VFMA(LDK(KP707106781), TK, TH); + TO = VFNMS(LDK(KP414213562), TN, TM); + TX = VFMA(LDK(KP414213562), TM, TN); + TC = VADD(Ta, Td); + Te = VSUB(Ta, Td); + TP = VSUB(Tf, Tg); + Th = VADD(Tf, Tg); + TQ = VSUB(Tj, Ti); + Tk = VADD(Ti, Tj); + TW = VFNMS(LDK(KP707106781), TV, TU); + T16 = VFMA(LDK(KP707106781), TV, TU); + } + { + V TY, TR, Tl, TD; + TY = VFMA(LDK(KP414213562), TP, TQ); + TR = VFNMS(LDK(KP414213562), TQ, TP); + Tl = VSUB(Th, Tk); + TD = VADD(Th, Tk); + { + V TS, T17, TZ, T14; + TS = VADD(TO, TR); + T17 = VSUB(TR, TO); + TZ = VSUB(TX, TY); + T14 = VADD(TX, TY); + { + V TE, TG, Tm, Tv; + TE = VADD(TC, TD); + TG = VSUB(TD, TC); + Tm = VADD(Te, Tl); + Tv = VSUB(Tl, Te); + { + V T18, T1a, TT, T11; + T18 = VFNMS(LDK(KP923879532), T17, T16); + T1a = VFMA(LDK(KP923879532), T17, T16); + TT = VFNMS(LDK(KP923879532), TS, TL); + T11 = VFMA(LDK(KP923879532), TS, TL); + { + V T15, T19, T10, T12; + T15 = VFNMS(LDK(KP923879532), T14, T13); + T19 = VFMA(LDK(KP923879532), T14, T13); + T10 = VFNMS(LDK(KP923879532), TZ, TW); + T12 = VFMA(LDK(KP923879532), TZ, TW); + { + V T1b, T1c, T1d, T1e; + T1b = VFMAI(TG, TF); + STM2(&(xo[8]), T1b, ovs, &(xo[0])); + T1c = VFNMSI(TG, TF); + STM2(&(xo[24]), T1c, ovs, &(xo[0])); + T1d = VADD(TB, TE); + STM2(&(xo[0]), T1d, ovs, &(xo[0])); + T1e = VSUB(TB, TE); + STM2(&(xo[16]), T1e, ovs, &(xo[0])); + { + V Tw, Ty, Tn, Tx; + Tw = VFNMS(LDK(KP707106781), Tv, Tu); + Ty = VFMA(LDK(KP707106781), Tv, Tu); + Tn = VFNMS(LDK(KP707106781), Tm, T7); + Tx = VFMA(LDK(KP707106781), Tm, T7); + { + V T1f, T1g, T1h, T1i; + T1f = VFMAI(T1a, T19); + STM2(&(xo[6]), T1f, ovs, &(xo[2])); + T1g = VFNMSI(T1a, T19); + STM2(&(xo[26]), T1g, ovs, &(xo[2])); + STN2(&(xo[24]), T1c, T1g, ovs); + T1h = VFMAI(T18, T15); + STM2(&(xo[22]), T1h, ovs, &(xo[2])); + T1i = VFNMSI(T18, T15); + STM2(&(xo[10]), T1i, ovs, &(xo[2])); + STN2(&(xo[8]), T1b, T1i, ovs); + { + V T1j, T1k, T1l, T1m; + T1j = VFNMSI(T12, T11); + STM2(&(xo[2]), T1j, ovs, &(xo[2])); + STN2(&(xo[0]), T1d, T1j, ovs); + T1k = VFMAI(T12, T11); + STM2(&(xo[30]), T1k, ovs, &(xo[2])); + T1l = VFMAI(T10, TT); + STM2(&(xo[14]), T1l, ovs, &(xo[2])); + T1m = VFNMSI(T10, TT); + STM2(&(xo[18]), T1m, ovs, &(xo[2])); + STN2(&(xo[16]), T1e, T1m, ovs); + { + V T1n, T1o, T1p, T1q; + T1n = VFNMSI(Ty, Tx); + STM2(&(xo[28]), T1n, ovs, &(xo[0])); + STN2(&(xo[28]), T1n, T1k, ovs); + T1o = VFMAI(Ty, Tx); + STM2(&(xo[4]), T1o, ovs, &(xo[0])); + STN2(&(xo[4]), T1o, T1f, ovs); + T1p = VFMAI(Tw, Tn); + STM2(&(xo[20]), T1p, ovs, &(xo[0])); + STN2(&(xo[20]), T1p, T1h, ovs); + T1q = VFNMSI(Tw, Tn); + STM2(&(xo[12]), T1q, ovs, &(xo[0])); + STN2(&(xo[12]), T1q, T1l, ovs); + } + } + } + } + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 16, XSIMD_STRING("n2fv_16"), {38, 0, 34, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_16) (planner *p) { + X(kdft_register) (p, n2fv_16, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name n2fv_16 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 72 FP additions, 12 FP multiplications, + * (or, 68 additions, 8 multiplications, 4 fused multiply/add), + * 38 stack variables, 3 constants, and 40 memory accesses + */ +#include "n2f.h" + +static void n2fv_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(32, is), MAKE_VOLATILE_STRIDE(32, os)) { + V Tp, T13, Tu, TN, Tm, T14, Tv, TY, T7, T17, Ty, TT, Te, T16, Tx; + V TQ; + { + V Tn, To, TM, Ts, Tt, TL; + Tn = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + To = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + TM = VADD(Tn, To); + Ts = LD(&(xi[0]), ivs, &(xi[0])); + Tt = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + TL = VADD(Ts, Tt); + Tp = VSUB(Tn, To); + T13 = VADD(TL, TM); + Tu = VSUB(Ts, Tt); + TN = VSUB(TL, TM); + } + { + V Ti, TW, Tl, TX; + { + V Tg, Th, Tj, Tk; + Tg = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + Th = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Ti = VSUB(Tg, Th); + TW = VADD(Tg, Th); + Tj = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tl = VSUB(Tj, Tk); + TX = VADD(Tj, Tk); + } + Tm = VMUL(LDK(KP707106781), VSUB(Ti, Tl)); + T14 = VADD(TX, TW); + Tv = VMUL(LDK(KP707106781), VADD(Tl, Ti)); + TY = VSUB(TW, TX); + } + { + V T3, TR, T6, TS; + { + V T1, T2, T4, T5; + T1 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + TR = VADD(T1, T2); + T4 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + TS = VADD(T4, T5); + } + T7 = VFNMS(LDK(KP923879532), T6, VMUL(LDK(KP382683432), T3)); + T17 = VADD(TR, TS); + Ty = VFMA(LDK(KP923879532), T3, VMUL(LDK(KP382683432), T6)); + TT = VSUB(TR, TS); + } + { + V Ta, TO, Td, TP; + { + V T8, T9, Tb, Tc; + T8 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T9 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + Ta = VSUB(T8, T9); + TO = VADD(T8, T9); + Tb = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tc = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Td = VSUB(Tb, Tc); + TP = VADD(Tb, Tc); + } + Te = VFMA(LDK(KP382683432), Ta, VMUL(LDK(KP923879532), Td)); + T16 = VADD(TO, TP); + Tx = VFNMS(LDK(KP382683432), Td, VMUL(LDK(KP923879532), Ta)); + TQ = VSUB(TO, TP); + } + { + V T1b, T1c, T1d, T1e; + { + V T15, T18, T19, T1a; + T15 = VADD(T13, T14); + T18 = VADD(T16, T17); + T1b = VSUB(T15, T18); + STM2(&(xo[16]), T1b, ovs, &(xo[0])); + T1c = VADD(T15, T18); + STM2(&(xo[0]), T1c, ovs, &(xo[0])); + T19 = VSUB(T13, T14); + T1a = VBYI(VSUB(T17, T16)); + T1d = VSUB(T19, T1a); + STM2(&(xo[24]), T1d, ovs, &(xo[0])); + T1e = VADD(T19, T1a); + STM2(&(xo[8]), T1e, ovs, &(xo[0])); + } + { + V T1f, T1g, T1h, T1i; + { + V TV, T11, T10, T12, TU, TZ; + TU = VMUL(LDK(KP707106781), VADD(TQ, TT)); + TV = VADD(TN, TU); + T11 = VSUB(TN, TU); + TZ = VMUL(LDK(KP707106781), VSUB(TT, TQ)); + T10 = VBYI(VADD(TY, TZ)); + T12 = VBYI(VSUB(TZ, TY)); + T1f = VSUB(TV, T10); + STM2(&(xo[28]), T1f, ovs, &(xo[0])); + T1g = VADD(T11, T12); + STM2(&(xo[12]), T1g, ovs, &(xo[0])); + T1h = VADD(TV, T10); + STM2(&(xo[4]), T1h, ovs, &(xo[0])); + T1i = VSUB(T11, T12); + STM2(&(xo[20]), T1i, ovs, &(xo[0])); + } + { + V Tr, TB, TA, TC; + { + V Tf, Tq, Tw, Tz; + Tf = VSUB(T7, Te); + Tq = VSUB(Tm, Tp); + Tr = VBYI(VSUB(Tf, Tq)); + TB = VBYI(VADD(Tq, Tf)); + Tw = VADD(Tu, Tv); + Tz = VADD(Tx, Ty); + TA = VSUB(Tw, Tz); + TC = VADD(Tw, Tz); + } + { + V T1j, T1k, T1l, T1m; + T1j = VADD(Tr, TA); + STM2(&(xo[14]), T1j, ovs, &(xo[2])); + STN2(&(xo[12]), T1g, T1j, ovs); + T1k = VSUB(TC, TB); + STM2(&(xo[30]), T1k, ovs, &(xo[2])); + STN2(&(xo[28]), T1f, T1k, ovs); + T1l = VSUB(TA, Tr); + STM2(&(xo[18]), T1l, ovs, &(xo[2])); + STN2(&(xo[16]), T1b, T1l, ovs); + T1m = VADD(TB, TC); + STM2(&(xo[2]), T1m, ovs, &(xo[2])); + STN2(&(xo[0]), T1c, T1m, ovs); + } + } + { + V TF, TJ, TI, TK; + { + V TD, TE, TG, TH; + TD = VSUB(Tu, Tv); + TE = VADD(Te, T7); + TF = VADD(TD, TE); + TJ = VSUB(TD, TE); + TG = VADD(Tp, Tm); + TH = VSUB(Ty, Tx); + TI = VBYI(VADD(TG, TH)); + TK = VBYI(VSUB(TH, TG)); + } + { + V T1n, T1o, T1p, T1q; + T1n = VSUB(TF, TI); + STM2(&(xo[26]), T1n, ovs, &(xo[2])); + STN2(&(xo[24]), T1d, T1n, ovs); + T1o = VADD(TJ, TK); + STM2(&(xo[10]), T1o, ovs, &(xo[2])); + STN2(&(xo[8]), T1e, T1o, ovs); + T1p = VADD(TF, TI); + STM2(&(xo[6]), T1p, ovs, &(xo[2])); + STN2(&(xo[4]), T1h, T1p, ovs); + T1q = VSUB(TJ, TK); + STM2(&(xo[22]), T1q, ovs, &(xo[2])); + STN2(&(xo[20]), T1i, T1q, ovs); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 16, XSIMD_STRING("n2fv_16"), {68, 8, 4, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_16) (planner *p) { + X(kdft_register) (p, n2fv_16, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:54 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name n2fv_2 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 2 FP additions, 0 FP multiplications, + * (or, 2 additions, 0 multiplications, 0 fused multiply/add), + * 7 stack variables, 0 constants, and 5 memory accesses + */ +#include "n2f.h" + +static void n2fv_2(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(4, is), MAKE_VOLATILE_STRIDE(4, os)) { + V T1, T2, T3, T4; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T3 = VADD(T1, T2); + STM2(&(xo[0]), T3, ovs, &(xo[0])); + T4 = VSUB(T1, T2); + STM2(&(xo[2]), T4, ovs, &(xo[2])); + STN2(&(xo[0]), T3, T4, ovs); + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 2, XSIMD_STRING("n2fv_2"), {2, 0, 0, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_2) (planner *p) { + X(kdft_register) (p, n2fv_2, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name n2fv_2 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 2 FP additions, 0 FP multiplications, + * (or, 2 additions, 0 multiplications, 0 fused multiply/add), + * 7 stack variables, 0 constants, and 5 memory accesses + */ +#include "n2f.h" + +static void n2fv_2(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(4, is), MAKE_VOLATILE_STRIDE(4, os)) { + V T1, T2, T3, T4; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + STM2(&(xo[2]), T3, ovs, &(xo[2])); + T4 = VADD(T1, T2); + STM2(&(xo[0]), T4, ovs, &(xo[0])); + STN2(&(xo[0]), T4, T3, ovs); + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 2, XSIMD_STRING("n2fv_2"), {2, 0, 0, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_2) (planner *p) { + X(kdft_register) (p, n2fv_2, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,495 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:58 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 20 -name n2fv_20 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 104 FP additions, 50 FP multiplications, + * (or, 58 additions, 4 multiplications, 46 fused multiply/add), + * 79 stack variables, 4 constants, and 50 memory accesses + */ +#include "n2f.h" + +static void n2fv_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(40, is), MAKE_VOLATILE_STRIDE(40, os)) { + V T1H, T1I, TU, TI, TP, TX, T1M, T1N, T1O, T1P, T1R, T1S, TM, TW, TT; + V TF; + { + V T3, Tm, T1r, T13, Ta, TN, TH, TA, TG, Tt, Th, TO, T1u, T1C, T1n; + V T1a, T1m, T1h, T1x, T1D, TE, Ti; + { + V T1, T2, Tk, Tl; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tl = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + { + V T14, T6, T1c, Tw, Tn, T1f, Tz, T17, T9, To, Tq, T1b, Td, Tr, Te; + V Tf, T15, Tp; + { + V Tx, Ty, T7, T8, Tb, Tc; + { + V T4, T5, Tu, Tv, T11, T12; + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + Tu = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Tv = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tx = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + T11 = VADD(T1, T2); + Tm = VSUB(Tk, Tl); + T12 = VADD(Tk, Tl); + T14 = VADD(T4, T5); + T6 = VSUB(T4, T5); + T1c = VADD(Tu, Tv); + Tw = VSUB(Tu, Tv); + Ty = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T1r = VADD(T11, T12); + T13 = VSUB(T11, T12); + } + Tb = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Tn = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T1f = VADD(Tx, Ty); + Tz = VSUB(Tx, Ty); + T17 = VADD(T7, T8); + T9 = VSUB(T7, T8); + To = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + Tq = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T1b = VADD(Tb, Tc); + Td = VSUB(Tb, Tc); + Tr = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Te = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + } + Ta = VADD(T6, T9); + TN = VSUB(T6, T9); + T15 = VADD(Tn, To); + Tp = VSUB(Tn, To); + TH = VSUB(Tz, Tw); + TA = VADD(Tw, Tz); + { + V T1d, T1v, T18, Ts, T1e, Tg, T16, T1s; + T1d = VSUB(T1b, T1c); + T1v = VADD(T1b, T1c); + T18 = VADD(Tq, Tr); + Ts = VSUB(Tq, Tr); + T1e = VADD(Te, Tf); + Tg = VSUB(Te, Tf); + T16 = VSUB(T14, T15); + T1s = VADD(T14, T15); + { + V T1t, T19, T1w, T1g; + T1t = VADD(T17, T18); + T19 = VSUB(T17, T18); + TG = VSUB(Ts, Tp); + Tt = VADD(Tp, Ts); + T1w = VADD(T1e, T1f); + T1g = VSUB(T1e, T1f); + Th = VADD(Td, Tg); + TO = VSUB(Td, Tg); + T1u = VADD(T1s, T1t); + T1C = VSUB(T1s, T1t); + T1n = VSUB(T16, T19); + T1a = VADD(T16, T19); + T1m = VSUB(T1d, T1g); + T1h = VADD(T1d, T1g); + T1x = VADD(T1v, T1w); + T1D = VSUB(T1v, T1w); + } + } + } + } + TE = VSUB(Ta, Th); + Ti = VADD(Ta, Th); + { + V TL, T1k, T1A, Tj, TD, T1E, T1G, TK, TC, T1j, T1z, T1i, T1y, TB; + TL = VSUB(TA, Tt); + TB = VADD(Tt, TA); + T1i = VADD(T1a, T1h); + T1k = VSUB(T1a, T1h); + T1y = VADD(T1u, T1x); + T1A = VSUB(T1u, T1x); + Tj = VADD(T3, Ti); + TD = VFNMS(LDK(KP250000000), Ti, T3); + T1E = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1D, T1C)); + T1G = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1C, T1D)); + TK = VFNMS(LDK(KP250000000), TB, Tm); + TC = VADD(Tm, TB); + T1j = VFNMS(LDK(KP250000000), T1i, T13); + T1H = VADD(T1r, T1y); + STM2(&(xo[0]), T1H, ovs, &(xo[0])); + T1z = VFNMS(LDK(KP250000000), T1y, T1r); + T1I = VADD(T13, T1i); + STM2(&(xo[20]), T1I, ovs, &(xo[0])); + { + V T1J, T1K, T1p, T1l, T1o, T1q, T1F, T1B, T1L, T1Q; + TU = VFNMS(LDK(KP618033988), TG, TH); + TI = VFMA(LDK(KP618033988), TH, TG); + TP = VFMA(LDK(KP618033988), TO, TN); + TX = VFNMS(LDK(KP618033988), TN, TO); + T1J = VFMAI(TC, Tj); + STM2(&(xo[30]), T1J, ovs, &(xo[2])); + T1K = VFNMSI(TC, Tj); + STM2(&(xo[10]), T1K, ovs, &(xo[2])); + T1p = VFMA(LDK(KP559016994), T1k, T1j); + T1l = VFNMS(LDK(KP559016994), T1k, T1j); + T1o = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1n, T1m)); + T1q = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1m, T1n)); + T1F = VFNMS(LDK(KP559016994), T1A, T1z); + T1B = VFMA(LDK(KP559016994), T1A, T1z); + T1L = VFMAI(T1q, T1p); + STM2(&(xo[28]), T1L, ovs, &(xo[0])); + STN2(&(xo[28]), T1L, T1J, ovs); + T1M = VFNMSI(T1q, T1p); + STM2(&(xo[12]), T1M, ovs, &(xo[0])); + T1N = VFNMSI(T1o, T1l); + STM2(&(xo[36]), T1N, ovs, &(xo[0])); + T1O = VFMAI(T1o, T1l); + STM2(&(xo[4]), T1O, ovs, &(xo[0])); + T1P = VFNMSI(T1E, T1B); + STM2(&(xo[32]), T1P, ovs, &(xo[0])); + T1Q = VFMAI(T1E, T1B); + STM2(&(xo[8]), T1Q, ovs, &(xo[0])); + STN2(&(xo[8]), T1Q, T1K, ovs); + T1R = VFMAI(T1G, T1F); + STM2(&(xo[24]), T1R, ovs, &(xo[0])); + T1S = VFNMSI(T1G, T1F); + STM2(&(xo[16]), T1S, ovs, &(xo[0])); + TM = VFNMS(LDK(KP559016994), TL, TK); + TW = VFMA(LDK(KP559016994), TL, TK); + TT = VFNMS(LDK(KP559016994), TE, TD); + TF = VFMA(LDK(KP559016994), TE, TD); + } + } + } + { + V T10, TY, TQ, TS, TJ, TR, TZ, TV; + T10 = VFMA(LDK(KP951056516), TX, TW); + TY = VFNMS(LDK(KP951056516), TX, TW); + TQ = VFMA(LDK(KP951056516), TP, TM); + TS = VFNMS(LDK(KP951056516), TP, TM); + TJ = VFMA(LDK(KP951056516), TI, TF); + TR = VFNMS(LDK(KP951056516), TI, TF); + TZ = VFMA(LDK(KP951056516), TU, TT); + TV = VFNMS(LDK(KP951056516), TU, TT); + { + V T1T, T1U, T1V, T1W; + T1T = VFMAI(TS, TR); + STM2(&(xo[22]), T1T, ovs, &(xo[2])); + STN2(&(xo[20]), T1I, T1T, ovs); + T1U = VFNMSI(TS, TR); + STM2(&(xo[18]), T1U, ovs, &(xo[2])); + STN2(&(xo[16]), T1S, T1U, ovs); + T1V = VFMAI(TQ, TJ); + STM2(&(xo[38]), T1V, ovs, &(xo[2])); + STN2(&(xo[36]), T1N, T1V, ovs); + T1W = VFNMSI(TQ, TJ); + STM2(&(xo[2]), T1W, ovs, &(xo[2])); + STN2(&(xo[0]), T1H, T1W, ovs); + { + V T1X, T1Y, T1Z, T20; + T1X = VFMAI(TY, TV); + STM2(&(xo[6]), T1X, ovs, &(xo[2])); + STN2(&(xo[4]), T1O, T1X, ovs); + T1Y = VFNMSI(TY, TV); + STM2(&(xo[34]), T1Y, ovs, &(xo[2])); + STN2(&(xo[32]), T1P, T1Y, ovs); + T1Z = VFMAI(T10, TZ); + STM2(&(xo[14]), T1Z, ovs, &(xo[2])); + STN2(&(xo[12]), T1M, T1Z, ovs); + T20 = VFNMSI(T10, TZ); + STM2(&(xo[26]), T20, ovs, &(xo[2])); + STN2(&(xo[24]), T1R, T20, ovs); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 20, XSIMD_STRING("n2fv_20"), {58, 4, 46, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_20) (planner *p) { + X(kdft_register) (p, n2fv_20, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 20 -name n2fv_20 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 104 FP additions, 24 FP multiplications, + * (or, 92 additions, 12 multiplications, 12 fused multiply/add), + * 57 stack variables, 4 constants, and 50 memory accesses + */ +#include "n2f.h" + +static void n2fv_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(40, is), MAKE_VOLATILE_STRIDE(40, os)) { + V T3, T1B, Tm, T1i, TG, TN, TO, TH, T13, T16, T1k, T1u, T1v, T1z, T1r; + V T1s, T1y, T1a, T1d, T1j, Ti, TD, TB, TL; + { + V T1, T2, T1g, Tk, Tl, T1h; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + T1g = VADD(T1, T2); + Tk = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tl = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T1h = VADD(Tk, Tl); + T3 = VSUB(T1, T2); + T1B = VADD(T1g, T1h); + Tm = VSUB(Tk, Tl); + T1i = VSUB(T1g, T1h); + } + { + V T6, T18, Tw, T12, Tz, T15, T9, T1b, Td, T11, Tp, T19, Ts, T1c, Tg; + V T14; + { + V T4, T5, Tu, Tv; + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + T6 = VSUB(T4, T5); + T18 = VADD(T4, T5); + Tu = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + Tv = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + Tw = VSUB(Tu, Tv); + T12 = VADD(Tu, Tv); + } + { + V Tx, Ty, T7, T8; + Tx = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + Ty = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + Tz = VSUB(Tx, Ty); + T15 = VADD(Tx, Ty); + T7 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T9 = VSUB(T7, T8); + T1b = VADD(T7, T8); + } + { + V Tb, Tc, Tn, To; + Tb = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Td = VSUB(Tb, Tc); + T11 = VADD(Tb, Tc); + Tn = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + To = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + Tp = VSUB(Tn, To); + T19 = VADD(Tn, To); + } + { + V Tq, Tr, Te, Tf; + Tq = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tr = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + Ts = VSUB(Tq, Tr); + T1c = VADD(Tq, Tr); + Te = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Tf = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tg = VSUB(Te, Tf); + T14 = VADD(Te, Tf); + } + TG = VSUB(Ts, Tp); + TN = VSUB(T6, T9); + TO = VSUB(Td, Tg); + TH = VSUB(Tz, Tw); + T13 = VSUB(T11, T12); + T16 = VSUB(T14, T15); + T1k = VADD(T13, T16); + T1u = VADD(T11, T12); + T1v = VADD(T14, T15); + T1z = VADD(T1u, T1v); + T1r = VADD(T18, T19); + T1s = VADD(T1b, T1c); + T1y = VADD(T1r, T1s); + T1a = VSUB(T18, T19); + T1d = VSUB(T1b, T1c); + T1j = VADD(T1a, T1d); + { + V Ta, Th, Tt, TA; + Ta = VADD(T6, T9); + Th = VADD(Td, Tg); + Ti = VADD(Ta, Th); + TD = VMUL(LDK(KP559016994), VSUB(Ta, Th)); + Tt = VADD(Tp, Ts); + TA = VADD(Tw, Tz); + TB = VADD(Tt, TA); + TL = VMUL(LDK(KP559016994), VSUB(TA, Tt)); + } + } + { + V T1I, T1J, T1K, T1L, T1N, T1H, Tj, TC; + Tj = VADD(T3, Ti); + TC = VBYI(VADD(Tm, TB)); + T1H = VSUB(Tj, TC); + STM2(&(xo[10]), T1H, ovs, &(xo[2])); + T1I = VADD(Tj, TC); + STM2(&(xo[30]), T1I, ovs, &(xo[2])); + { + V T1A, T1C, T1D, T1x, T1G, T1t, T1w, T1F, T1E, T1M; + T1A = VMUL(LDK(KP559016994), VSUB(T1y, T1z)); + T1C = VADD(T1y, T1z); + T1D = VFNMS(LDK(KP250000000), T1C, T1B); + T1t = VSUB(T1r, T1s); + T1w = VSUB(T1u, T1v); + T1x = VBYI(VFMA(LDK(KP951056516), T1t, VMUL(LDK(KP587785252), T1w))); + T1G = VBYI(VFNMS(LDK(KP587785252), T1t, VMUL(LDK(KP951056516), T1w))); + T1J = VADD(T1B, T1C); + STM2(&(xo[0]), T1J, ovs, &(xo[0])); + T1F = VSUB(T1D, T1A); + T1K = VSUB(T1F, T1G); + STM2(&(xo[16]), T1K, ovs, &(xo[0])); + T1L = VADD(T1G, T1F); + STM2(&(xo[24]), T1L, ovs, &(xo[0])); + T1E = VADD(T1A, T1D); + T1M = VADD(T1x, T1E); + STM2(&(xo[8]), T1M, ovs, &(xo[0])); + STN2(&(xo[8]), T1M, T1H, ovs); + T1N = VSUB(T1E, T1x); + STM2(&(xo[32]), T1N, ovs, &(xo[0])); + } + { + V T1O, T1P, T1R, T1S; + { + V T1n, T1l, T1m, T1f, T1q, T17, T1e, T1p, T1Q, T1o; + T1n = VMUL(LDK(KP559016994), VSUB(T1j, T1k)); + T1l = VADD(T1j, T1k); + T1m = VFNMS(LDK(KP250000000), T1l, T1i); + T17 = VSUB(T13, T16); + T1e = VSUB(T1a, T1d); + T1f = VBYI(VFNMS(LDK(KP587785252), T1e, VMUL(LDK(KP951056516), T17))); + T1q = VBYI(VFMA(LDK(KP951056516), T1e, VMUL(LDK(KP587785252), T17))); + T1O = VADD(T1i, T1l); + STM2(&(xo[20]), T1O, ovs, &(xo[0])); + T1p = VADD(T1n, T1m); + T1P = VSUB(T1p, T1q); + STM2(&(xo[12]), T1P, ovs, &(xo[0])); + T1Q = VADD(T1q, T1p); + STM2(&(xo[28]), T1Q, ovs, &(xo[0])); + STN2(&(xo[28]), T1Q, T1I, ovs); + T1o = VSUB(T1m, T1n); + T1R = VADD(T1f, T1o); + STM2(&(xo[4]), T1R, ovs, &(xo[0])); + T1S = VSUB(T1o, T1f); + STM2(&(xo[36]), T1S, ovs, &(xo[0])); + } + { + V TI, TP, TX, TU, TM, TW, TF, TT, TK, TE; + TI = VFMA(LDK(KP951056516), TG, VMUL(LDK(KP587785252), TH)); + TP = VFMA(LDK(KP951056516), TN, VMUL(LDK(KP587785252), TO)); + TX = VFNMS(LDK(KP587785252), TN, VMUL(LDK(KP951056516), TO)); + TU = VFNMS(LDK(KP587785252), TG, VMUL(LDK(KP951056516), TH)); + TK = VFMS(LDK(KP250000000), TB, Tm); + TM = VADD(TK, TL); + TW = VSUB(TL, TK); + TE = VFNMS(LDK(KP250000000), Ti, T3); + TF = VADD(TD, TE); + TT = VSUB(TE, TD); + { + V TJ, TQ, T1T, T1U; + TJ = VADD(TF, TI); + TQ = VBYI(VSUB(TM, TP)); + T1T = VSUB(TJ, TQ); + STM2(&(xo[38]), T1T, ovs, &(xo[2])); + STN2(&(xo[36]), T1S, T1T, ovs); + T1U = VADD(TJ, TQ); + STM2(&(xo[2]), T1U, ovs, &(xo[2])); + STN2(&(xo[0]), T1J, T1U, ovs); + } + { + V TZ, T10, T1V, T1W; + TZ = VADD(TT, TU); + T10 = VBYI(VADD(TX, TW)); + T1V = VSUB(TZ, T10); + STM2(&(xo[26]), T1V, ovs, &(xo[2])); + STN2(&(xo[24]), T1L, T1V, ovs); + T1W = VADD(TZ, T10); + STM2(&(xo[14]), T1W, ovs, &(xo[2])); + STN2(&(xo[12]), T1P, T1W, ovs); + } + { + V TR, TS, T1X, T1Y; + TR = VSUB(TF, TI); + TS = VBYI(VADD(TP, TM)); + T1X = VSUB(TR, TS); + STM2(&(xo[22]), T1X, ovs, &(xo[2])); + STN2(&(xo[20]), T1O, T1X, ovs); + T1Y = VADD(TR, TS); + STM2(&(xo[18]), T1Y, ovs, &(xo[2])); + STN2(&(xo[16]), T1K, T1Y, ovs); + } + { + V TV, TY, T1Z, T20; + TV = VSUB(TT, TU); + TY = VBYI(VSUB(TW, TX)); + T1Z = VSUB(TV, TY); + STM2(&(xo[34]), T1Z, ovs, &(xo[2])); + STN2(&(xo[32]), T1N, T1Z, ovs); + T20 = VADD(TV, TY); + STM2(&(xo[6]), T20, ovs, &(xo[2])); + STN2(&(xo[4]), T1R, T20, ovs); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 20, XSIMD_STRING("n2fv_20"), {92, 12, 12, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_20) (planner *p) { + X(kdft_register) (p, n2fv_20, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,823 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:57 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name n2fv_32 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 186 FP additions, 98 FP multiplications, + * (or, 88 additions, 0 multiplications, 98 fused multiply/add), + * 120 stack variables, 7 constants, and 80 memory accesses + */ +#include "n2f.h" + +static void n2fv_32(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(64, is), MAKE_VOLATILE_STRIDE(64, os)) { + V T31, T32, T33, T34, T35, T36, T37, T38, T39, T3a, T3b, T3c, T1h, Tr, T3d; + V T3e, T3f, T3g, T1a, T1k, TI, T1b, T1L, T1P, T1I, T1G, T1O, T1Q, T1H, T1z; + V T1c, TZ; + { + V T2x, T1T, T2K, T1W, T1p, Tb, T1A, T16, Tu, TF, T2N, T2H, T2b, T2t, TY; + V T1w, TT, T1v, T20, T2C, Tj, Te, T2h, To, T2f, T23, T2D, TB, TG, Th; + V T2i, Tk; + { + V TL, TW, TP, TQ, T2F, T27, T28, TO; + { + V T1, T2, T12, T13, T4, T5, T7, T8; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T12 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T13 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + T7 = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + { + V TM, T25, T26, TN; + { + V TJ, T3, T14, T1U, T6, T1V, T9, TK, TU, TV, T1R, T1S, Ta, T15; + TJ = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + T1R = VADD(T1, T2); + T3 = VSUB(T1, T2); + T1S = VADD(T12, T13); + T14 = VSUB(T12, T13); + T1U = VADD(T4, T5); + T6 = VSUB(T4, T5); + T1V = VADD(T7, T8); + T9 = VSUB(T7, T8); + TK = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + TU = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + T2x = VSUB(T1R, T1S); + T1T = VADD(T1R, T1S); + TV = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + TM = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T2K = VSUB(T1V, T1U); + T1W = VADD(T1U, T1V); + Ta = VADD(T6, T9); + T15 = VSUB(T9, T6); + T25 = VADD(TJ, TK); + TL = VSUB(TJ, TK); + T26 = VADD(TV, TU); + TW = VSUB(TU, TV); + TN = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + TP = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + T1p = VFNMS(LDK(KP707106781), Ta, T3); + Tb = VFMA(LDK(KP707106781), Ta, T3); + T1A = VFMA(LDK(KP707106781), T15, T14); + T16 = VFNMS(LDK(KP707106781), T15, T14); + TQ = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + } + T2F = VSUB(T25, T26); + T27 = VADD(T25, T26); + T28 = VADD(TM, TN); + TO = VSUB(TM, TN); + } + } + { + V Ty, T21, Tx, Tz, T1Y, T1Z; + { + V Ts, Tt, TD, T29, TR, TE, Tv, Tw; + Ts = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tt = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + TD = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T29 = VADD(TP, TQ); + TR = VSUB(TP, TQ); + TE = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + Tv = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tw = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + Ty = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + T1Y = VADD(Ts, Tt); + Tu = VSUB(Ts, Tt); + { + V T2G, T2a, TX, TS; + T2G = VSUB(T29, T28); + T2a = VADD(T28, T29); + TX = VSUB(TR, TO); + TS = VADD(TO, TR); + T1Z = VADD(TD, TE); + TF = VSUB(TD, TE); + T21 = VADD(Tv, Tw); + Tx = VSUB(Tv, Tw); + T2N = VFMA(LDK(KP414213562), T2F, T2G); + T2H = VFNMS(LDK(KP414213562), T2G, T2F); + T2b = VSUB(T27, T2a); + T2t = VADD(T27, T2a); + TY = VFMA(LDK(KP707106781), TX, TW); + T1w = VFNMS(LDK(KP707106781), TX, TW); + TT = VFMA(LDK(KP707106781), TS, TL); + T1v = VFNMS(LDK(KP707106781), TS, TL); + Tz = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + } + } + T20 = VADD(T1Y, T1Z); + T2C = VSUB(T1Y, T1Z); + { + V Tc, Td, Tm, Tn; + Tc = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Tm = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + Tn = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + { + V Tf, TA, T22, Tg; + Tf = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + TA = VSUB(Ty, Tz); + T22 = VADD(Ty, Tz); + Tg = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + Tj = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + Te = VSUB(Tc, Td); + T2h = VADD(Tc, Td); + To = VSUB(Tm, Tn); + T2f = VADD(Tn, Tm); + T23 = VADD(T21, T22); + T2D = VSUB(T21, T22); + TB = VADD(Tx, TA); + TG = VSUB(Tx, TA); + Th = VSUB(Tf, Tg); + T2i = VADD(Tf, Tg); + Tk = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + } + } + } + } + { + V T1t, TH, T1s, TC, T2P, T2U, T2n, T2d, T2w, T2u, T1q, T19, T1B, Tq, T2W; + V T2M, T2B, T2T, T2v, T2r, T2o, T2m, T2X, T2I; + { + V T1X, T2p, T2E, T2O, T2s, T2y, T2j, T17, Ti, T2e, Tl, T2c, T2l, T24; + T1X = VSUB(T1T, T1W); + T2p = VADD(T1T, T1W); + T2E = VFNMS(LDK(KP414213562), T2D, T2C); + T2O = VFMA(LDK(KP414213562), T2C, T2D); + T2s = VADD(T20, T23); + T24 = VSUB(T20, T23); + T1t = VFNMS(LDK(KP707106781), TG, TF); + TH = VFMA(LDK(KP707106781), TG, TF); + T1s = VFNMS(LDK(KP707106781), TB, Tu); + TC = VFMA(LDK(KP707106781), TB, Tu); + T2y = VSUB(T2h, T2i); + T2j = VADD(T2h, T2i); + T17 = VFMA(LDK(KP414213562), Te, Th); + Ti = VFNMS(LDK(KP414213562), Th, Te); + T2e = VADD(Tj, Tk); + Tl = VSUB(Tj, Tk); + T2c = VADD(T24, T2b); + T2l = VSUB(T2b, T24); + { + V T2L, T2A, T2q, T2k; + T2P = VSUB(T2N, T2O); + T2U = VADD(T2O, T2N); + { + V T2z, T2g, T18, Tp; + T2z = VSUB(T2e, T2f); + T2g = VADD(T2e, T2f); + T18 = VFMA(LDK(KP414213562), Tl, To); + Tp = VFNMS(LDK(KP414213562), To, Tl); + T2n = VFMA(LDK(KP707106781), T2c, T1X); + T2d = VFNMS(LDK(KP707106781), T2c, T1X); + T2w = VSUB(T2t, T2s); + T2u = VADD(T2s, T2t); + T2L = VSUB(T2z, T2y); + T2A = VADD(T2y, T2z); + T2q = VADD(T2j, T2g); + T2k = VSUB(T2g, T2j); + T1q = VADD(T17, T18); + T19 = VSUB(T17, T18); + T1B = VSUB(Tp, Ti); + Tq = VADD(Ti, Tp); + } + T2W = VFNMS(LDK(KP707106781), T2L, T2K); + T2M = VFMA(LDK(KP707106781), T2L, T2K); + T2B = VFMA(LDK(KP707106781), T2A, T2x); + T2T = VFNMS(LDK(KP707106781), T2A, T2x); + T2v = VSUB(T2p, T2q); + T2r = VADD(T2p, T2q); + T2o = VFMA(LDK(KP707106781), T2l, T2k); + T2m = VFNMS(LDK(KP707106781), T2l, T2k); + T2X = VSUB(T2H, T2E); + T2I = VADD(T2E, T2H); + } + } + { + V T2V, T2Z, T2Y, T30, T2R, T2J; + T2V = VFNMS(LDK(KP923879532), T2U, T2T); + T2Z = VFMA(LDK(KP923879532), T2U, T2T); + T31 = VFNMSI(T2w, T2v); + STM2(&(xo[48]), T31, ovs, &(xo[0])); + T32 = VFMAI(T2w, T2v); + STM2(&(xo[16]), T32, ovs, &(xo[0])); + T33 = VADD(T2r, T2u); + STM2(&(xo[0]), T33, ovs, &(xo[0])); + T34 = VSUB(T2r, T2u); + STM2(&(xo[32]), T34, ovs, &(xo[0])); + T35 = VFNMSI(T2o, T2n); + STM2(&(xo[56]), T35, ovs, &(xo[0])); + T36 = VFMAI(T2o, T2n); + STM2(&(xo[8]), T36, ovs, &(xo[0])); + T37 = VFMAI(T2m, T2d); + STM2(&(xo[40]), T37, ovs, &(xo[0])); + T38 = VFNMSI(T2m, T2d); + STM2(&(xo[24]), T38, ovs, &(xo[0])); + T2Y = VFMA(LDK(KP923879532), T2X, T2W); + T30 = VFNMS(LDK(KP923879532), T2X, T2W); + T2R = VFMA(LDK(KP923879532), T2I, T2B); + T2J = VFNMS(LDK(KP923879532), T2I, T2B); + { + V T1J, T1r, T1C, T1M, T2S, T2Q, T1u, T1D, T1E, T1x; + T1J = VFNMS(LDK(KP923879532), T1q, T1p); + T1r = VFMA(LDK(KP923879532), T1q, T1p); + T1C = VFMA(LDK(KP923879532), T1B, T1A); + T1M = VFNMS(LDK(KP923879532), T1B, T1A); + T39 = VFNMSI(T30, T2Z); + STM2(&(xo[12]), T39, ovs, &(xo[0])); + T3a = VFMAI(T30, T2Z); + STM2(&(xo[52]), T3a, ovs, &(xo[0])); + T3b = VFNMSI(T2Y, T2V); + STM2(&(xo[44]), T3b, ovs, &(xo[0])); + T3c = VFMAI(T2Y, T2V); + STM2(&(xo[20]), T3c, ovs, &(xo[0])); + T2S = VFMA(LDK(KP923879532), T2P, T2M); + T2Q = VFNMS(LDK(KP923879532), T2P, T2M); + T1u = VFMA(LDK(KP668178637), T1t, T1s); + T1D = VFNMS(LDK(KP668178637), T1s, T1t); + T1E = VFNMS(LDK(KP668178637), T1v, T1w); + T1x = VFMA(LDK(KP668178637), T1w, T1v); + { + V T1K, T1F, T1N, T1y; + T1h = VFNMS(LDK(KP923879532), Tq, Tb); + Tr = VFMA(LDK(KP923879532), Tq, Tb); + T3d = VFNMSI(T2S, T2R); + STM2(&(xo[60]), T3d, ovs, &(xo[0])); + T3e = VFMAI(T2S, T2R); + STM2(&(xo[4]), T3e, ovs, &(xo[0])); + T3f = VFMAI(T2Q, T2J); + STM2(&(xo[36]), T3f, ovs, &(xo[0])); + T3g = VFNMSI(T2Q, T2J); + STM2(&(xo[28]), T3g, ovs, &(xo[0])); + T1K = VADD(T1D, T1E); + T1F = VSUB(T1D, T1E); + T1N = VSUB(T1x, T1u); + T1y = VADD(T1u, T1x); + T1a = VFMA(LDK(KP923879532), T19, T16); + T1k = VFNMS(LDK(KP923879532), T19, T16); + TI = VFNMS(LDK(KP198912367), TH, TC); + T1b = VFMA(LDK(KP198912367), TC, TH); + T1L = VFMA(LDK(KP831469612), T1K, T1J); + T1P = VFNMS(LDK(KP831469612), T1K, T1J); + T1I = VFMA(LDK(KP831469612), T1F, T1C); + T1G = VFNMS(LDK(KP831469612), T1F, T1C); + T1O = VFMA(LDK(KP831469612), T1N, T1M); + T1Q = VFNMS(LDK(KP831469612), T1N, T1M); + T1H = VFMA(LDK(KP831469612), T1y, T1r); + T1z = VFNMS(LDK(KP831469612), T1y, T1r); + T1c = VFMA(LDK(KP198912367), TT, TY); + TZ = VFNMS(LDK(KP198912367), TY, TT); + } + } + } + } + } + { + V T1d, T1i, T10, T1l; + { + V T3h, T3i, T3j, T3k; + T3h = VFNMSI(T1O, T1L); + STM2(&(xo[42]), T3h, ovs, &(xo[2])); + STN2(&(xo[40]), T37, T3h, ovs); + T3i = VFMAI(T1O, T1L); + STM2(&(xo[22]), T3i, ovs, &(xo[2])); + STN2(&(xo[20]), T3c, T3i, ovs); + T3j = VFMAI(T1Q, T1P); + STM2(&(xo[54]), T3j, ovs, &(xo[2])); + STN2(&(xo[52]), T3a, T3j, ovs); + T3k = VFNMSI(T1Q, T1P); + STM2(&(xo[10]), T3k, ovs, &(xo[2])); + STN2(&(xo[8]), T36, T3k, ovs); + { + V T3l, T3m, T3n, T3o; + T3l = VFMAI(T1I, T1H); + STM2(&(xo[6]), T3l, ovs, &(xo[2])); + STN2(&(xo[4]), T3e, T3l, ovs); + T3m = VFNMSI(T1I, T1H); + STM2(&(xo[58]), T3m, ovs, &(xo[2])); + STN2(&(xo[56]), T35, T3m, ovs); + T3n = VFMAI(T1G, T1z); + STM2(&(xo[38]), T3n, ovs, &(xo[2])); + STN2(&(xo[36]), T3f, T3n, ovs); + T3o = VFNMSI(T1G, T1z); + STM2(&(xo[26]), T3o, ovs, &(xo[2])); + STN2(&(xo[24]), T38, T3o, ovs); + T1d = VSUB(T1b, T1c); + T1i = VADD(T1b, T1c); + T10 = VADD(TI, TZ); + T1l = VSUB(TZ, TI); + } + } + { + V T1n, T1j, T1e, T1g, T1o, T1m, T11, T1f; + T1n = VFMA(LDK(KP980785280), T1i, T1h); + T1j = VFNMS(LDK(KP980785280), T1i, T1h); + T1e = VFNMS(LDK(KP980785280), T1d, T1a); + T1g = VFMA(LDK(KP980785280), T1d, T1a); + T1o = VFMA(LDK(KP980785280), T1l, T1k); + T1m = VFNMS(LDK(KP980785280), T1l, T1k); + T11 = VFNMS(LDK(KP980785280), T10, Tr); + T1f = VFMA(LDK(KP980785280), T10, Tr); + { + V T3p, T3q, T3r, T3s; + T3p = VFMAI(T1m, T1j); + STM2(&(xo[46]), T3p, ovs, &(xo[2])); + STN2(&(xo[44]), T3b, T3p, ovs); + T3q = VFNMSI(T1m, T1j); + STM2(&(xo[18]), T3q, ovs, &(xo[2])); + STN2(&(xo[16]), T32, T3q, ovs); + T3r = VFNMSI(T1o, T1n); + STM2(&(xo[50]), T3r, ovs, &(xo[2])); + STN2(&(xo[48]), T31, T3r, ovs); + T3s = VFMAI(T1o, T1n); + STM2(&(xo[14]), T3s, ovs, &(xo[2])); + STN2(&(xo[12]), T39, T3s, ovs); + { + V T3t, T3u, T3v, T3w; + T3t = VFMAI(T1g, T1f); + STM2(&(xo[62]), T3t, ovs, &(xo[2])); + STN2(&(xo[60]), T3d, T3t, ovs); + T3u = VFNMSI(T1g, T1f); + STM2(&(xo[2]), T3u, ovs, &(xo[2])); + STN2(&(xo[0]), T33, T3u, ovs); + T3v = VFMAI(T1e, T11); + STM2(&(xo[30]), T3v, ovs, &(xo[2])); + STN2(&(xo[28]), T3g, T3v, ovs); + T3w = VFNMSI(T1e, T11); + STM2(&(xo[34]), T3w, ovs, &(xo[2])); + STN2(&(xo[32]), T34, T3w, ovs); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 32, XSIMD_STRING("n2fv_32"), {88, 0, 98, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_32) (planner *p) { + X(kdft_register) (p, n2fv_32, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name n2fv_32 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 186 FP additions, 42 FP multiplications, + * (or, 170 additions, 26 multiplications, 16 fused multiply/add), + * 72 stack variables, 7 constants, and 80 memory accesses + */ +#include "n2f.h" + +static void n2fv_32(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(64, is), MAKE_VOLATILE_STRIDE(64, os)) { + V T1T, T1W, T2K, T2x, T16, T1A, Tb, T1p, TT, T1v, TY, T1w, T27, T2a, T2b; + V T2H, T2O, TC, T1s, TH, T1t, T20, T23, T24, T2E, T2N, T2g, T2j, Tq, T1B; + V T19, T1q, T2A, T2L; + { + V T3, T1R, T15, T1S, T6, T1U, T9, T1V, T12, Ta; + { + V T1, T2, T13, T14; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T3 = VSUB(T1, T2); + T1R = VADD(T1, T2); + T13 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T14 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T15 = VSUB(T13, T14); + T1S = VADD(T13, T14); + } + { + V T4, T5, T7, T8; + T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + T6 = VSUB(T4, T5); + T1U = VADD(T4, T5); + T7 = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + T9 = VSUB(T7, T8); + T1V = VADD(T7, T8); + } + T1T = VADD(T1R, T1S); + T1W = VADD(T1U, T1V); + T2K = VSUB(T1V, T1U); + T2x = VSUB(T1R, T1S); + T12 = VMUL(LDK(KP707106781), VSUB(T9, T6)); + T16 = VSUB(T12, T15); + T1A = VADD(T15, T12); + Ta = VMUL(LDK(KP707106781), VADD(T6, T9)); + Tb = VADD(T3, Ta); + T1p = VSUB(T3, Ta); + } + { + V TL, T25, TX, T26, TO, T28, TR, T29; + { + V TJ, TK, TV, TW; + TJ = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + TK = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + TL = VSUB(TJ, TK); + T25 = VADD(TJ, TK); + TV = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + TW = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + TX = VSUB(TV, TW); + T26 = VADD(TV, TW); + } + { + V TM, TN, TP, TQ; + TM = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + TN = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + TO = VSUB(TM, TN); + T28 = VADD(TM, TN); + TP = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + TQ = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + TR = VSUB(TP, TQ); + T29 = VADD(TP, TQ); + } + { + V TS, TU, T2F, T2G; + TS = VMUL(LDK(KP707106781), VADD(TO, TR)); + TT = VADD(TL, TS); + T1v = VSUB(TL, TS); + TU = VMUL(LDK(KP707106781), VSUB(TR, TO)); + TY = VSUB(TU, TX); + T1w = VADD(TX, TU); + T27 = VADD(T25, T26); + T2a = VADD(T28, T29); + T2b = VSUB(T27, T2a); + T2F = VSUB(T25, T26); + T2G = VSUB(T29, T28); + T2H = VFNMS(LDK(KP382683432), T2G, VMUL(LDK(KP923879532), T2F)); + T2O = VFMA(LDK(KP382683432), T2F, VMUL(LDK(KP923879532), T2G)); + } + } + { + V Tu, T1Y, TG, T1Z, Tx, T21, TA, T22; + { + V Ts, Tt, TE, TF; + Ts = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + Tt = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + Tu = VSUB(Ts, Tt); + T1Y = VADD(Ts, Tt); + TE = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + TF = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + TG = VSUB(TE, TF); + T1Z = VADD(TE, TF); + } + { + V Tv, Tw, Ty, Tz; + Tv = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + Tw = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + Tx = VSUB(Tv, Tw); + T21 = VADD(Tv, Tw); + Ty = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + Tz = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + TA = VSUB(Ty, Tz); + T22 = VADD(Ty, Tz); + } + { + V TB, TD, T2C, T2D; + TB = VMUL(LDK(KP707106781), VADD(Tx, TA)); + TC = VADD(Tu, TB); + T1s = VSUB(Tu, TB); + TD = VMUL(LDK(KP707106781), VSUB(TA, Tx)); + TH = VSUB(TD, TG); + T1t = VADD(TG, TD); + T20 = VADD(T1Y, T1Z); + T23 = VADD(T21, T22); + T24 = VSUB(T20, T23); + T2C = VSUB(T1Y, T1Z); + T2D = VSUB(T22, T21); + T2E = VFMA(LDK(KP923879532), T2C, VMUL(LDK(KP382683432), T2D)); + T2N = VFNMS(LDK(KP382683432), T2C, VMUL(LDK(KP923879532), T2D)); + } + } + { + V Te, T2h, To, T2f, Th, T2i, Tl, T2e, Ti, Tp; + { + V Tc, Td, Tm, Tn; + Tc = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Te = VSUB(Tc, Td); + T2h = VADD(Tc, Td); + Tm = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tn = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + To = VSUB(Tm, Tn); + T2f = VADD(Tm, Tn); + } + { + V Tf, Tg, Tj, Tk; + Tf = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + Th = VSUB(Tf, Tg); + T2i = VADD(Tf, Tg); + Tj = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + Tl = VSUB(Tj, Tk); + T2e = VADD(Tj, Tk); + } + T2g = VADD(T2e, T2f); + T2j = VADD(T2h, T2i); + Ti = VFNMS(LDK(KP382683432), Th, VMUL(LDK(KP923879532), Te)); + Tp = VFMA(LDK(KP923879532), Tl, VMUL(LDK(KP382683432), To)); + Tq = VADD(Ti, Tp); + T1B = VSUB(Tp, Ti); + { + V T17, T18, T2y, T2z; + T17 = VFNMS(LDK(KP923879532), To, VMUL(LDK(KP382683432), Tl)); + T18 = VFMA(LDK(KP382683432), Te, VMUL(LDK(KP923879532), Th)); + T19 = VSUB(T17, T18); + T1q = VADD(T18, T17); + T2y = VSUB(T2h, T2i); + T2z = VSUB(T2e, T2f); + T2A = VMUL(LDK(KP707106781), VADD(T2y, T2z)); + T2L = VMUL(LDK(KP707106781), VSUB(T2z, T2y)); + } + } + { + V T31, T32, T33, T34, T35, T36, T37, T38, T39, T3a, T3b, T3c; + { + V T2d, T2n, T2m, T2o; + { + V T1X, T2c, T2k, T2l; + T1X = VSUB(T1T, T1W); + T2c = VMUL(LDK(KP707106781), VADD(T24, T2b)); + T2d = VADD(T1X, T2c); + T2n = VSUB(T1X, T2c); + T2k = VSUB(T2g, T2j); + T2l = VMUL(LDK(KP707106781), VSUB(T2b, T24)); + T2m = VBYI(VADD(T2k, T2l)); + T2o = VBYI(VSUB(T2l, T2k)); + } + T31 = VSUB(T2d, T2m); + STM2(&(xo[56]), T31, ovs, &(xo[0])); + T32 = VADD(T2n, T2o); + STM2(&(xo[24]), T32, ovs, &(xo[0])); + T33 = VADD(T2d, T2m); + STM2(&(xo[8]), T33, ovs, &(xo[0])); + T34 = VSUB(T2n, T2o); + STM2(&(xo[40]), T34, ovs, &(xo[0])); + } + { + V T2r, T2v, T2u, T2w; + { + V T2p, T2q, T2s, T2t; + T2p = VADD(T1T, T1W); + T2q = VADD(T2j, T2g); + T2r = VADD(T2p, T2q); + T2v = VSUB(T2p, T2q); + T2s = VADD(T20, T23); + T2t = VADD(T27, T2a); + T2u = VADD(T2s, T2t); + T2w = VBYI(VSUB(T2t, T2s)); + } + T35 = VSUB(T2r, T2u); + STM2(&(xo[32]), T35, ovs, &(xo[0])); + T36 = VADD(T2v, T2w); + STM2(&(xo[16]), T36, ovs, &(xo[0])); + T37 = VADD(T2r, T2u); + STM2(&(xo[0]), T37, ovs, &(xo[0])); + T38 = VSUB(T2v, T2w); + STM2(&(xo[48]), T38, ovs, &(xo[0])); + } + { + V T2V, T2Z, T2Y, T30; + { + V T2T, T2U, T2W, T2X; + T2T = VSUB(T2H, T2E); + T2U = VSUB(T2L, T2K); + T2V = VBYI(VSUB(T2T, T2U)); + T2Z = VBYI(VADD(T2U, T2T)); + T2W = VSUB(T2x, T2A); + T2X = VSUB(T2O, T2N); + T2Y = VSUB(T2W, T2X); + T30 = VADD(T2W, T2X); + } + T39 = VADD(T2V, T2Y); + STM2(&(xo[20]), T39, ovs, &(xo[0])); + T3a = VSUB(T30, T2Z); + STM2(&(xo[52]), T3a, ovs, &(xo[0])); + T3b = VSUB(T2Y, T2V); + STM2(&(xo[44]), T3b, ovs, &(xo[0])); + T3c = VADD(T2Z, T30); + STM2(&(xo[12]), T3c, ovs, &(xo[0])); + } + { + V T3d, T3e, T3f, T3g; + { + V T2J, T2R, T2Q, T2S; + { + V T2B, T2I, T2M, T2P; + T2B = VADD(T2x, T2A); + T2I = VADD(T2E, T2H); + T2J = VADD(T2B, T2I); + T2R = VSUB(T2B, T2I); + T2M = VADD(T2K, T2L); + T2P = VADD(T2N, T2O); + T2Q = VBYI(VADD(T2M, T2P)); + T2S = VBYI(VSUB(T2P, T2M)); + } + T3d = VSUB(T2J, T2Q); + STM2(&(xo[60]), T3d, ovs, &(xo[0])); + T3e = VADD(T2R, T2S); + STM2(&(xo[28]), T3e, ovs, &(xo[0])); + T3f = VADD(T2J, T2Q); + STM2(&(xo[4]), T3f, ovs, &(xo[0])); + T3g = VSUB(T2R, T2S); + STM2(&(xo[36]), T3g, ovs, &(xo[0])); + } + { + V T1r, T1C, T1M, T1K, T1F, T1N, T1y, T1J; + T1r = VADD(T1p, T1q); + T1C = VADD(T1A, T1B); + T1M = VSUB(T1p, T1q); + T1K = VSUB(T1B, T1A); + { + V T1D, T1E, T1u, T1x; + T1D = VFNMS(LDK(KP555570233), T1s, VMUL(LDK(KP831469612), T1t)); + T1E = VFMA(LDK(KP555570233), T1v, VMUL(LDK(KP831469612), T1w)); + T1F = VADD(T1D, T1E); + T1N = VSUB(T1E, T1D); + T1u = VFMA(LDK(KP831469612), T1s, VMUL(LDK(KP555570233), T1t)); + T1x = VFNMS(LDK(KP555570233), T1w, VMUL(LDK(KP831469612), T1v)); + T1y = VADD(T1u, T1x); + T1J = VSUB(T1x, T1u); + } + { + V T1z, T1G, T3h, T3i; + T1z = VADD(T1r, T1y); + T1G = VBYI(VADD(T1C, T1F)); + T3h = VSUB(T1z, T1G); + STM2(&(xo[58]), T3h, ovs, &(xo[2])); + STN2(&(xo[56]), T31, T3h, ovs); + T3i = VADD(T1z, T1G); + STM2(&(xo[6]), T3i, ovs, &(xo[2])); + STN2(&(xo[4]), T3f, T3i, ovs); + } + { + V T1P, T1Q, T3j, T3k; + T1P = VBYI(VADD(T1K, T1J)); + T1Q = VADD(T1M, T1N); + T3j = VADD(T1P, T1Q); + STM2(&(xo[10]), T3j, ovs, &(xo[2])); + STN2(&(xo[8]), T33, T3j, ovs); + T3k = VSUB(T1Q, T1P); + STM2(&(xo[54]), T3k, ovs, &(xo[2])); + STN2(&(xo[52]), T3a, T3k, ovs); + } + { + V T1H, T1I, T3l, T3m; + T1H = VSUB(T1r, T1y); + T1I = VBYI(VSUB(T1F, T1C)); + T3l = VSUB(T1H, T1I); + STM2(&(xo[38]), T3l, ovs, &(xo[2])); + STN2(&(xo[36]), T3g, T3l, ovs); + T3m = VADD(T1H, T1I); + STM2(&(xo[26]), T3m, ovs, &(xo[2])); + STN2(&(xo[24]), T32, T3m, ovs); + } + { + V T1L, T1O, T3n, T3o; + T1L = VBYI(VSUB(T1J, T1K)); + T1O = VSUB(T1M, T1N); + T3n = VADD(T1L, T1O); + STM2(&(xo[22]), T3n, ovs, &(xo[2])); + STN2(&(xo[20]), T39, T3n, ovs); + T3o = VSUB(T1O, T1L); + STM2(&(xo[42]), T3o, ovs, &(xo[2])); + STN2(&(xo[40]), T34, T3o, ovs); + } + } + { + V Tr, T1a, T1k, T1i, T1d, T1l, T10, T1h; + Tr = VADD(Tb, Tq); + T1a = VADD(T16, T19); + T1k = VSUB(Tb, Tq); + T1i = VSUB(T19, T16); + { + V T1b, T1c, TI, TZ; + T1b = VFNMS(LDK(KP195090322), TC, VMUL(LDK(KP980785280), TH)); + T1c = VFMA(LDK(KP195090322), TT, VMUL(LDK(KP980785280), TY)); + T1d = VADD(T1b, T1c); + T1l = VSUB(T1c, T1b); + TI = VFMA(LDK(KP980785280), TC, VMUL(LDK(KP195090322), TH)); + TZ = VFNMS(LDK(KP195090322), TY, VMUL(LDK(KP980785280), TT)); + T10 = VADD(TI, TZ); + T1h = VSUB(TZ, TI); + } + { + V T11, T1e, T3p, T3q; + T11 = VADD(Tr, T10); + T1e = VBYI(VADD(T1a, T1d)); + T3p = VSUB(T11, T1e); + STM2(&(xo[62]), T3p, ovs, &(xo[2])); + STN2(&(xo[60]), T3d, T3p, ovs); + T3q = VADD(T11, T1e); + STM2(&(xo[2]), T3q, ovs, &(xo[2])); + STN2(&(xo[0]), T37, T3q, ovs); + } + { + V T1n, T1o, T3r, T3s; + T1n = VBYI(VADD(T1i, T1h)); + T1o = VADD(T1k, T1l); + T3r = VADD(T1n, T1o); + STM2(&(xo[14]), T3r, ovs, &(xo[2])); + STN2(&(xo[12]), T3c, T3r, ovs); + T3s = VSUB(T1o, T1n); + STM2(&(xo[50]), T3s, ovs, &(xo[2])); + STN2(&(xo[48]), T38, T3s, ovs); + } + { + V T1f, T1g, T3t, T3u; + T1f = VSUB(Tr, T10); + T1g = VBYI(VSUB(T1d, T1a)); + T3t = VSUB(T1f, T1g); + STM2(&(xo[34]), T3t, ovs, &(xo[2])); + STN2(&(xo[32]), T35, T3t, ovs); + T3u = VADD(T1f, T1g); + STM2(&(xo[30]), T3u, ovs, &(xo[2])); + STN2(&(xo[28]), T3e, T3u, ovs); + } + { + V T1j, T1m, T3v, T3w; + T1j = VBYI(VSUB(T1h, T1i)); + T1m = VSUB(T1k, T1l); + T3v = VADD(T1j, T1m); + STM2(&(xo[18]), T3v, ovs, &(xo[2])); + STN2(&(xo[16]), T36, T3v, ovs); + T3w = VSUB(T1m, T1j); + STM2(&(xo[46]), T3w, ovs, &(xo[2])); + STN2(&(xo[44]), T3b, T3w, ovs); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 32, XSIMD_STRING("n2fv_32"), {170, 26, 16, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_32) (planner *p) { + X(kdft_register) (p, n2fv_32, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:54 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name n2fv_4 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 8 FP additions, 2 FP multiplications, + * (or, 6 additions, 0 multiplications, 2 fused multiply/add), + * 15 stack variables, 0 constants, and 10 memory accesses + */ +#include "n2f.h" + +static void n2fv_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(8, is), MAKE_VOLATILE_STRIDE(8, os)) { + V T1, T2, T4, T5; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + { + V T3, T7, T6, T8; + T3 = VSUB(T1, T2); + T7 = VADD(T1, T2); + T6 = VSUB(T4, T5); + T8 = VADD(T4, T5); + { + V T9, Ta, Tb, Tc; + T9 = VSUB(T7, T8); + STM2(&(xo[4]), T9, ovs, &(xo[0])); + Ta = VADD(T7, T8); + STM2(&(xo[0]), Ta, ovs, &(xo[0])); + Tb = VFMAI(T6, T3); + STM2(&(xo[6]), Tb, ovs, &(xo[2])); + STN2(&(xo[4]), T9, Tb, ovs); + Tc = VFNMSI(T6, T3); + STM2(&(xo[2]), Tc, ovs, &(xo[2])); + STN2(&(xo[0]), Ta, Tc, ovs); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 4, XSIMD_STRING("n2fv_4"), {6, 0, 2, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_4) (planner *p) { + X(kdft_register) (p, n2fv_4, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name n2fv_4 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 8 FP additions, 0 FP multiplications, + * (or, 8 additions, 0 multiplications, 0 fused multiply/add), + * 11 stack variables, 0 constants, and 10 memory accesses + */ +#include "n2f.h" + +static void n2fv_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(8, is), MAKE_VOLATILE_STRIDE(8, os)) { + V T3, T7, T6, T8; + { + V T1, T2, T4, T5; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T3 = VSUB(T1, T2); + T7 = VADD(T1, T2); + T4 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T6 = VBYI(VSUB(T4, T5)); + T8 = VADD(T4, T5); + } + { + V T9, Ta, Tb, Tc; + T9 = VSUB(T3, T6); + STM2(&(xo[2]), T9, ovs, &(xo[2])); + Ta = VADD(T7, T8); + STM2(&(xo[0]), Ta, ovs, &(xo[0])); + STN2(&(xo[0]), Ta, T9, ovs); + Tb = VADD(T3, T6); + STM2(&(xo[6]), Tb, ovs, &(xo[2])); + Tc = VSUB(T7, T8); + STM2(&(xo[4]), Tc, ovs, &(xo[0])); + STN2(&(xo[4]), Tc, Tb, ovs); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 4, XSIMD_STRING("n2fv_4"), {8, 0, 0, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_4) (planner *p) { + X(kdft_register) (p, n2fv_4, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:54 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 6 -name n2fv_6 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 18 FP additions, 8 FP multiplications, + * (or, 12 additions, 2 multiplications, 6 fused multiply/add), + * 29 stack variables, 2 constants, and 15 memory accesses + */ +#include "n2f.h" + +static void n2fv_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(12, is), MAKE_VOLATILE_STRIDE(12, os)) { + V T1, T2, T4, T5, T7, T8; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + { + V T3, Td, T6, Te, T9, Tf; + T3 = VSUB(T1, T2); + Td = VADD(T1, T2); + T6 = VSUB(T4, T5); + Te = VADD(T4, T5); + T9 = VSUB(T7, T8); + Tf = VADD(T7, T8); + { + V Tg, Ti, Ta, Tc; + Tg = VADD(Te, Tf); + Ti = VMUL(LDK(KP866025403), VSUB(Tf, Te)); + Ta = VADD(T6, T9); + Tc = VMUL(LDK(KP866025403), VSUB(T9, T6)); + { + V Th, Tj, Tb, Tk; + Th = VFNMS(LDK(KP500000000), Tg, Td); + Tj = VADD(Td, Tg); + STM2(&(xo[0]), Tj, ovs, &(xo[0])); + Tb = VFNMS(LDK(KP500000000), Ta, T3); + Tk = VADD(T3, Ta); + STM2(&(xo[6]), Tk, ovs, &(xo[2])); + { + V Tl, Tm, Tn, To; + Tl = VFMAI(Ti, Th); + STM2(&(xo[8]), Tl, ovs, &(xo[0])); + Tm = VFNMSI(Ti, Th); + STM2(&(xo[4]), Tm, ovs, &(xo[0])); + STN2(&(xo[4]), Tm, Tk, ovs); + Tn = VFMAI(Tc, Tb); + STM2(&(xo[2]), Tn, ovs, &(xo[2])); + STN2(&(xo[0]), Tj, Tn, ovs); + To = VFNMSI(Tc, Tb); + STM2(&(xo[10]), To, ovs, &(xo[2])); + STN2(&(xo[8]), Tl, To, ovs); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 6, XSIMD_STRING("n2fv_6"), {12, 2, 6, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_6) (planner *p) { + X(kdft_register) (p, n2fv_6, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 6 -name n2fv_6 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 18 FP additions, 4 FP multiplications, + * (or, 16 additions, 2 multiplications, 2 fused multiply/add), + * 25 stack variables, 2 constants, and 15 memory accesses + */ +#include "n2f.h" + +static void n2fv_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(12, is), MAKE_VOLATILE_STRIDE(12, os)) { + V T3, Td, T6, Te, T9, Tf, Ta, Tg, T1, T2, Tj, Tk; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T3 = VSUB(T1, T2); + Td = VADD(T1, T2); + { + V T4, T5, T7, T8; + T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + Te = VADD(T4, T5); + T7 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T9 = VSUB(T7, T8); + Tf = VADD(T7, T8); + } + Ta = VADD(T6, T9); + Tg = VADD(Te, Tf); + Tj = VADD(T3, Ta); + STM2(&(xo[6]), Tj, ovs, &(xo[2])); + Tk = VADD(Td, Tg); + STM2(&(xo[0]), Tk, ovs, &(xo[0])); + { + V Tl, Tb, Tc, Tm; + Tb = VFNMS(LDK(KP500000000), Ta, T3); + Tc = VBYI(VMUL(LDK(KP866025403), VSUB(T9, T6))); + Tl = VSUB(Tb, Tc); + STM2(&(xo[10]), Tl, ovs, &(xo[2])); + Tm = VADD(Tb, Tc); + STM2(&(xo[2]), Tm, ovs, &(xo[2])); + STN2(&(xo[0]), Tk, Tm, ovs); + { + V Th, Ti, Tn, To; + Th = VFNMS(LDK(KP500000000), Tg, Td); + Ti = VBYI(VMUL(LDK(KP866025403), VSUB(Tf, Te))); + Tn = VSUB(Th, Ti); + STM2(&(xo[4]), Tn, ovs, &(xo[0])); + STN2(&(xo[4]), Tn, Tj, ovs); + To = VADD(Th, Ti); + STM2(&(xo[8]), To, ovs, &(xo[0])); + STN2(&(xo[8]), To, Tl, ovs); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 6, XSIMD_STRING("n2fv_6"), {16, 2, 2, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_6) (planner *p) { + X(kdft_register) (p, n2fv_6, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1815 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:57 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name n2fv_64 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 456 FP additions, 258 FP multiplications, + * (or, 198 additions, 0 multiplications, 258 fused multiply/add), + * 178 stack variables, 15 constants, and 160 memory accesses + */ +#include "n2f.h" + +static void n2fv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP820678790, +0.820678790828660330972281985331011598767386482); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP534511135, +0.534511135950791641089685961295362908582039528); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP098491403, +0.098491403357164253077197521291327432293052451); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP303346683, +0.303346683607342391675883946941299872384187453); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(128, is), MAKE_VOLATILE_STRIDE(128, os)) { + V T7r, T7s, T7t, T7u, T5T, T5S, T5X, T65, T8a, T8b, T8e, T8g, T5Z, T5R, T67; + V T63, T5U, T64; + { + V T7, T26, T5k, T6A, T47, T69, T2V, T3z, T6B, T4e, T6a, T5n, T3M, T2Y, T27; + V Tm, T3A, T3l, T2a, TC, T5p, T4o, T6E, T6e, T3i, T3B, TR, T29, T4x, T5q; + V T6h, T6D, T39, T3H, T3I, T3c, T5N, T57, T72, T6w, T5O, T5e, T71, T6t, T2y; + V T1W, T2x, T1N, T33, T34, T3E, T32, T1p, T2v, T1g, T2u, T4M, T5K, T6p, T6Z; + V T6m, T6Y, T5L, T4T; + { + V T4g, T4l, T3j, Tu, Tx, T4h, TA, T4i; + { + V T1, T2, T23, T24, T4, T5, T20, T21; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 32)]), ivs, &(xi[0])); + T23 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T24 = LD(&(xi[WS(is, 40)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 48)]), ivs, &(xi[0])); + T20 = LD(&(xi[WS(is, 56)]), ivs, &(xi[0])); + T21 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + { + V Ta, T48, Tk, T4c, T49, Td, Tf, Tg; + { + V T8, T43, T3, T44, T25, T5i, T6, T45, T22, T9, Ti, Tj, Tb, Tc; + T8 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T43 = VSUB(T1, T2); + T3 = VADD(T1, T2); + T44 = VSUB(T23, T24); + T25 = VADD(T23, T24); + T5i = VSUB(T4, T5); + T6 = VADD(T4, T5); + T45 = VSUB(T20, T21); + T22 = VADD(T20, T21); + T9 = LD(&(xi[WS(is, 36)]), ivs, &(xi[0])); + Ti = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Tj = LD(&(xi[WS(is, 44)]), ivs, &(xi[0])); + Tb = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 52)]), ivs, &(xi[0])); + { + V T2T, T46, T5j, T2U; + T7 = VSUB(T3, T6); + T2T = VADD(T3, T6); + T46 = VADD(T44, T45); + T5j = VSUB(T45, T44); + T26 = VSUB(T22, T25); + T2U = VADD(T25, T22); + Ta = VADD(T8, T9); + T48 = VSUB(T8, T9); + Tk = VADD(Ti, Tj); + T4c = VSUB(Tj, Ti); + T5k = VFNMS(LDK(KP707106781), T5j, T5i); + T6A = VFMA(LDK(KP707106781), T5j, T5i); + T47 = VFMA(LDK(KP707106781), T46, T43); + T69 = VFNMS(LDK(KP707106781), T46, T43); + T2V = VADD(T2T, T2U); + T3z = VSUB(T2T, T2U); + T49 = VSUB(Tb, Tc); + Td = VADD(Tb, Tc); + } + Tf = LD(&(xi[WS(is, 60)]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + } + { + V Te, T2W, T5l, T4a, Tq, Tt, Tv, Tw, T5m, T4d, Tl, T2X, Ty, Tz, To; + V Tp; + To = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Tp = LD(&(xi[WS(is, 34)]), ivs, &(xi[0])); + { + V Th, T4b, Tr, Ts; + Tr = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + Ts = LD(&(xi[WS(is, 50)]), ivs, &(xi[0])); + Te = VSUB(Ta, Td); + T2W = VADD(Ta, Td); + T5l = VFMA(LDK(KP414213562), T48, T49); + T4a = VFNMS(LDK(KP414213562), T49, T48); + Th = VADD(Tf, Tg); + T4b = VSUB(Tf, Tg); + Tq = VADD(To, Tp); + T4g = VSUB(To, Tp); + T4l = VSUB(Tr, Ts); + Tt = VADD(Tr, Ts); + Tv = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + Tw = LD(&(xi[WS(is, 42)]), ivs, &(xi[0])); + T5m = VFMA(LDK(KP414213562), T4b, T4c); + T4d = VFNMS(LDK(KP414213562), T4c, T4b); + Tl = VSUB(Th, Tk); + T2X = VADD(Th, Tk); + Ty = LD(&(xi[WS(is, 58)]), ivs, &(xi[0])); + Tz = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + } + T3j = VADD(Tq, Tt); + Tu = VSUB(Tq, Tt); + Tx = VADD(Tv, Tw); + T4h = VSUB(Tv, Tw); + T6B = VSUB(T4d, T4a); + T4e = VADD(T4a, T4d); + T6a = VADD(T5l, T5m); + T5n = VSUB(T5l, T5m); + T3M = VSUB(T2X, T2W); + T2Y = VADD(T2W, T2X); + T27 = VSUB(Tl, Te); + Tm = VADD(Te, Tl); + TA = VADD(Ty, Tz); + T4i = VSUB(Ty, Tz); + } + } + } + { + V TK, T4p, T4u, T4k, T6d, T4n, T6c, TL, TN, TO, T3g, TJ, TF, TI; + { + V TD, TE, TG, TH; + TD = LD(&(xi[WS(is, 62)]), ivs, &(xi[0])); + TE = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + TG = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + TH = LD(&(xi[WS(is, 46)]), ivs, &(xi[0])); + TK = LD(&(xi[WS(is, 54)]), ivs, &(xi[0])); + { + V T3k, TB, T4j, T4m; + T3k = VADD(Tx, TA); + TB = VSUB(Tx, TA); + T4j = VADD(T4h, T4i); + T4m = VSUB(T4h, T4i); + T4p = VSUB(TD, TE); + TF = VADD(TD, TE); + T4u = VSUB(TH, TG); + TI = VADD(TG, TH); + T3A = VSUB(T3j, T3k); + T3l = VADD(T3j, T3k); + T2a = VFMA(LDK(KP414213562), Tu, TB); + TC = VFNMS(LDK(KP414213562), TB, Tu); + T4k = VFMA(LDK(KP707106781), T4j, T4g); + T6d = VFNMS(LDK(KP707106781), T4j, T4g); + T4n = VFMA(LDK(KP707106781), T4m, T4l); + T6c = VFNMS(LDK(KP707106781), T4m, T4l); + TL = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + } + TN = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + TO = LD(&(xi[WS(is, 38)]), ivs, &(xi[0])); + } + T3g = VADD(TF, TI); + TJ = VSUB(TF, TI); + { + V T3a, T1E, T52, T5b, T1x, T4Z, T6r, T6u, T5a, T1U, T55, T5c, T1L, T3b; + { + V T4V, T1t, T58, T1w, T1Q, T1T, T1I, T4Y, T59, T1J, T53, T1H; + { + V T1r, TM, T4r, TP, T4q, T1s, T1u, T1v; + T1r = LD(&(xi[WS(is, 63)]), ivs, &(xi[WS(is, 1)])); + T5p = VFMA(LDK(KP198912367), T4k, T4n); + T4o = VFNMS(LDK(KP198912367), T4n, T4k); + T6E = VFMA(LDK(KP668178637), T6c, T6d); + T6e = VFNMS(LDK(KP668178637), T6d, T6c); + TM = VADD(TK, TL); + T4r = VSUB(TK, TL); + TP = VADD(TN, TO); + T4q = VSUB(TN, TO); + T1s = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + T1u = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T1v = LD(&(xi[WS(is, 47)]), ivs, &(xi[WS(is, 1)])); + { + V T1R, T4X, T6g, T4t, T6f, T4w, T1S, T1O, T1P; + T1O = LD(&(xi[WS(is, 55)]), ivs, &(xi[WS(is, 1)])); + T1P = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + T1R = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + { + V T3h, TQ, T4s, T4v; + T3h = VADD(TP, TM); + TQ = VSUB(TM, TP); + T4s = VADD(T4q, T4r); + T4v = VSUB(T4r, T4q); + T4V = VSUB(T1r, T1s); + T1t = VADD(T1r, T1s); + T58 = VSUB(T1v, T1u); + T1w = VADD(T1u, T1v); + T4X = VSUB(T1O, T1P); + T1Q = VADD(T1O, T1P); + T3i = VADD(T3g, T3h); + T3B = VSUB(T3g, T3h); + TR = VFNMS(LDK(KP414213562), TQ, TJ); + T29 = VFMA(LDK(KP414213562), TJ, TQ); + T6g = VFNMS(LDK(KP707106781), T4s, T4p); + T4t = VFMA(LDK(KP707106781), T4s, T4p); + T6f = VFNMS(LDK(KP707106781), T4v, T4u); + T4w = VFMA(LDK(KP707106781), T4v, T4u); + T1S = LD(&(xi[WS(is, 39)]), ivs, &(xi[WS(is, 1)])); + } + { + V T4W, T1A, T50, T51, T1D, T1F, T1G; + { + V T1y, T1z, T1B, T1C; + T1y = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T1z = LD(&(xi[WS(is, 35)]), ivs, &(xi[WS(is, 1)])); + T1B = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + T1C = LD(&(xi[WS(is, 51)]), ivs, &(xi[WS(is, 1)])); + T4x = VFNMS(LDK(KP198912367), T4w, T4t); + T5q = VFMA(LDK(KP198912367), T4t, T4w); + T6h = VFNMS(LDK(KP668178637), T6g, T6f); + T6D = VFMA(LDK(KP668178637), T6f, T6g); + T4W = VSUB(T1R, T1S); + T1T = VADD(T1R, T1S); + T1A = VADD(T1y, T1z); + T50 = VSUB(T1y, T1z); + T51 = VSUB(T1C, T1B); + T1D = VADD(T1B, T1C); + } + T1F = LD(&(xi[WS(is, 59)]), ivs, &(xi[WS(is, 1)])); + T1G = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + T1I = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + T4Y = VADD(T4W, T4X); + T59 = VSUB(T4X, T4W); + T1J = LD(&(xi[WS(is, 43)]), ivs, &(xi[WS(is, 1)])); + T3a = VADD(T1A, T1D); + T1E = VSUB(T1A, T1D); + T52 = VFMA(LDK(KP414213562), T51, T50); + T5b = VFNMS(LDK(KP414213562), T50, T51); + T53 = VSUB(T1F, T1G); + T1H = VADD(T1F, T1G); + } + } + } + { + V T37, T54, T1K, T38; + T1x = VSUB(T1t, T1w); + T37 = VADD(T1t, T1w); + T4Z = VFMA(LDK(KP707106781), T4Y, T4V); + T6r = VFNMS(LDK(KP707106781), T4Y, T4V); + T54 = VSUB(T1J, T1I); + T1K = VADD(T1I, T1J); + T6u = VFNMS(LDK(KP707106781), T59, T58); + T5a = VFMA(LDK(KP707106781), T59, T58); + T38 = VADD(T1T, T1Q); + T1U = VSUB(T1Q, T1T); + T55 = VFNMS(LDK(KP414213562), T54, T53); + T5c = VFMA(LDK(KP414213562), T53, T54); + T1L = VSUB(T1H, T1K); + T3b = VADD(T1H, T1K); + T39 = VADD(T37, T38); + T3H = VSUB(T37, T38); + } + } + { + V T4A, TW, T4N, TZ, T1j, T1m, T4O, T4D, T13, T4F, T16, T4G, T1a, T4I, T4J; + V T1d; + { + V TU, TV, TX, TY, T56, T6v; + TU = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T56 = VADD(T52, T55); + T6v = VSUB(T55, T52); + { + V T5d, T6s, T1V, T1M; + T5d = VADD(T5b, T5c); + T6s = VSUB(T5c, T5b); + T1V = VSUB(T1L, T1E); + T1M = VADD(T1E, T1L); + T3I = VSUB(T3b, T3a); + T3c = VADD(T3a, T3b); + T5N = VFNMS(LDK(KP923879532), T56, T4Z); + T57 = VFMA(LDK(KP923879532), T56, T4Z); + T72 = VFNMS(LDK(KP923879532), T6v, T6u); + T6w = VFMA(LDK(KP923879532), T6v, T6u); + T5O = VFNMS(LDK(KP923879532), T5d, T5a); + T5e = VFMA(LDK(KP923879532), T5d, T5a); + T71 = VFMA(LDK(KP923879532), T6s, T6r); + T6t = VFNMS(LDK(KP923879532), T6s, T6r); + T2y = VFNMS(LDK(KP707106781), T1V, T1U); + T1W = VFMA(LDK(KP707106781), T1V, T1U); + T2x = VFNMS(LDK(KP707106781), T1M, T1x); + T1N = VFMA(LDK(KP707106781), T1M, T1x); + TV = LD(&(xi[WS(is, 33)]), ivs, &(xi[WS(is, 1)])); + } + TX = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + TY = LD(&(xi[WS(is, 49)]), ivs, &(xi[WS(is, 1)])); + { + V T1h, T1i, T1k, T1l; + T1h = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T1i = LD(&(xi[WS(is, 41)]), ivs, &(xi[WS(is, 1)])); + T1k = LD(&(xi[WS(is, 57)]), ivs, &(xi[WS(is, 1)])); + T1l = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + { + V T11, T4B, T4C, T12, T14, T15; + T11 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T4A = VSUB(TU, TV); + TW = VADD(TU, TV); + T4N = VSUB(TX, TY); + TZ = VADD(TX, TY); + T1j = VADD(T1h, T1i); + T4B = VSUB(T1h, T1i); + T1m = VADD(T1k, T1l); + T4C = VSUB(T1k, T1l); + T12 = LD(&(xi[WS(is, 37)]), ivs, &(xi[WS(is, 1)])); + T14 = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + T15 = LD(&(xi[WS(is, 53)]), ivs, &(xi[WS(is, 1)])); + { + V T18, T19, T1b, T1c; + T18 = LD(&(xi[WS(is, 61)]), ivs, &(xi[WS(is, 1)])); + T19 = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + T1b = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T1c = LD(&(xi[WS(is, 45)]), ivs, &(xi[WS(is, 1)])); + T4O = VSUB(T4B, T4C); + T4D = VADD(T4B, T4C); + T13 = VADD(T11, T12); + T4F = VSUB(T11, T12); + T16 = VADD(T14, T15); + T4G = VSUB(T14, T15); + T1a = VADD(T18, T19); + T4I = VSUB(T18, T19); + T4J = VSUB(T1b, T1c); + T1d = VADD(T1b, T1c); + } + } + } + } + { + V T30, T10, T6k, T4E, T4Q, T4H, T17, T6n, T4P, T1e, T4K, T4R, T1n, T31; + T30 = VADD(TW, TZ); + T10 = VSUB(TW, TZ); + T6k = VFNMS(LDK(KP707106781), T4D, T4A); + T4E = VFMA(LDK(KP707106781), T4D, T4A); + T4Q = VFMA(LDK(KP414213562), T4F, T4G); + T4H = VFNMS(LDK(KP414213562), T4G, T4F); + T33 = VADD(T13, T16); + T17 = VSUB(T13, T16); + T6n = VFNMS(LDK(KP707106781), T4O, T4N); + T4P = VFMA(LDK(KP707106781), T4O, T4N); + T34 = VADD(T1a, T1d); + T1e = VSUB(T1a, T1d); + T4K = VFMA(LDK(KP414213562), T4J, T4I); + T4R = VFNMS(LDK(KP414213562), T4I, T4J); + T1n = VSUB(T1j, T1m); + T31 = VADD(T1j, T1m); + { + V T1f, T1o, T6o, T4L, T4S, T6l; + T1f = VADD(T17, T1e); + T1o = VSUB(T17, T1e); + T6o = VSUB(T4H, T4K); + T4L = VADD(T4H, T4K); + T4S = VADD(T4Q, T4R); + T6l = VSUB(T4Q, T4R); + T3E = VSUB(T30, T31); + T32 = VADD(T30, T31); + T1p = VFMA(LDK(KP707106781), T1o, T1n); + T2v = VFNMS(LDK(KP707106781), T1o, T1n); + T1g = VFMA(LDK(KP707106781), T1f, T10); + T2u = VFNMS(LDK(KP707106781), T1f, T10); + T4M = VFMA(LDK(KP923879532), T4L, T4E); + T5K = VFNMS(LDK(KP923879532), T4L, T4E); + T6p = VFMA(LDK(KP923879532), T6o, T6n); + T6Z = VFNMS(LDK(KP923879532), T6o, T6n); + T6m = VFNMS(LDK(KP923879532), T6l, T6k); + T6Y = VFMA(LDK(KP923879532), T6l, T6k); + T5L = VFNMS(LDK(KP923879532), T4S, T4P); + T4T = VFMA(LDK(KP923879532), T4S, T4P); + } + } + } + } + } + } + { + V T6b, T6F, T7n, T7o, T7p, T7q, T7v, T7w, T7x, T7y, T7z, T7A, T7B, T7C, T7f; + V T6X, T70, T79, T7a, T73, T6C, T76, T77, T6i; + { + V T2Z, T3r, T3s, T3m, T3d, T3v; + T2Z = VSUB(T2V, T2Y); + T3r = VADD(T2V, T2Y); + T3s = VADD(T3l, T3i); + T3m = VSUB(T3i, T3l); + T3d = VSUB(T39, T3c); + T3v = VADD(T39, T3c); + { + V T3x, T3t, T3P, T3J, T3D, T3V, T3Q, T3G, T36, T3u, T3Y, T3O, T6V, T6W; + { + V T3N, T3C, T3F, T35; + T3N = VSUB(T3B, T3A); + T3C = VADD(T3A, T3B); + T3F = VSUB(T33, T34); + T35 = VADD(T33, T34); + T3x = VSUB(T3r, T3s); + T3t = VADD(T3r, T3s); + T3P = VFMA(LDK(KP414213562), T3H, T3I); + T3J = VFNMS(LDK(KP414213562), T3I, T3H); + T3D = VFMA(LDK(KP707106781), T3C, T3z); + T3V = VFNMS(LDK(KP707106781), T3C, T3z); + T3Q = VFMA(LDK(KP414213562), T3E, T3F); + T3G = VFNMS(LDK(KP414213562), T3F, T3E); + T36 = VSUB(T32, T35); + T3u = VADD(T32, T35); + T3Y = VFNMS(LDK(KP707106781), T3N, T3M); + T3O = VFMA(LDK(KP707106781), T3N, T3M); + } + T6b = VFNMS(LDK(KP923879532), T6a, T69); + T6V = VFMA(LDK(KP923879532), T6a, T69); + T6W = VADD(T6E, T6D); + T6F = VSUB(T6D, T6E); + { + V T3K, T3Z, T3e, T3n; + T3K = VADD(T3G, T3J); + T3Z = VSUB(T3J, T3G); + T3e = VADD(T36, T3d); + T3n = VSUB(T3d, T36); + { + V T3w, T3y, T3R, T3W; + T3w = VADD(T3u, T3v); + T3y = VSUB(T3v, T3u); + T3R = VSUB(T3P, T3Q); + T3W = VADD(T3Q, T3P); + { + V T42, T40, T3L, T3T; + T42 = VFNMS(LDK(KP923879532), T3Z, T3Y); + T40 = VFMA(LDK(KP923879532), T3Z, T3Y); + T3L = VFNMS(LDK(KP923879532), T3K, T3D); + T3T = VFMA(LDK(KP923879532), T3K, T3D); + { + V T3o, T3q, T3f, T3p; + T3o = VFNMS(LDK(KP707106781), T3n, T3m); + T3q = VFMA(LDK(KP707106781), T3n, T3m); + T3f = VFNMS(LDK(KP707106781), T3e, T2Z); + T3p = VFMA(LDK(KP707106781), T3e, T2Z); + T7n = VFNMSI(T3y, T3x); + STM2(&(xo[96]), T7n, ovs, &(xo[0])); + T7o = VFMAI(T3y, T3x); + STM2(&(xo[32]), T7o, ovs, &(xo[0])); + T7p = VADD(T3t, T3w); + STM2(&(xo[0]), T7p, ovs, &(xo[0])); + T7q = VSUB(T3t, T3w); + STM2(&(xo[64]), T7q, ovs, &(xo[0])); + { + V T41, T3X, T3S, T3U; + T41 = VFMA(LDK(KP923879532), T3W, T3V); + T3X = VFNMS(LDK(KP923879532), T3W, T3V); + T3S = VFNMS(LDK(KP923879532), T3R, T3O); + T3U = VFMA(LDK(KP923879532), T3R, T3O); + T7r = VFMAI(T3q, T3p); + STM2(&(xo[16]), T7r, ovs, &(xo[0])); + T7s = VFNMSI(T3q, T3p); + STM2(&(xo[112]), T7s, ovs, &(xo[0])); + T7t = VFMAI(T3o, T3f); + STM2(&(xo[80]), T7t, ovs, &(xo[0])); + T7u = VFNMSI(T3o, T3f); + STM2(&(xo[48]), T7u, ovs, &(xo[0])); + T7v = VFNMSI(T40, T3X); + STM2(&(xo[88]), T7v, ovs, &(xo[0])); + T7w = VFMAI(T40, T3X); + STM2(&(xo[40]), T7w, ovs, &(xo[0])); + T7x = VFMAI(T42, T41); + STM2(&(xo[104]), T7x, ovs, &(xo[0])); + T7y = VFNMSI(T42, T41); + STM2(&(xo[24]), T7y, ovs, &(xo[0])); + T7z = VFMAI(T3U, T3T); + STM2(&(xo[8]), T7z, ovs, &(xo[0])); + T7A = VFNMSI(T3U, T3T); + STM2(&(xo[120]), T7A, ovs, &(xo[0])); + T7B = VFMAI(T3S, T3L); + STM2(&(xo[72]), T7B, ovs, &(xo[0])); + T7C = VFNMSI(T3S, T3L); + STM2(&(xo[56]), T7C, ovs, &(xo[0])); + T7f = VFNMS(LDK(KP831469612), T6W, T6V); + T6X = VFMA(LDK(KP831469612), T6W, T6V); + } + } + } + } + } + T70 = VFMA(LDK(KP303346683), T6Z, T6Y); + T79 = VFNMS(LDK(KP303346683), T6Y, T6Z); + T7a = VFNMS(LDK(KP303346683), T71, T72); + T73 = VFMA(LDK(KP303346683), T72, T71); + T6C = VFNMS(LDK(KP923879532), T6B, T6A); + T76 = VFMA(LDK(KP923879532), T6B, T6A); + T77 = VSUB(T6e, T6h); + T6i = VADD(T6e, T6h); + } + } + { + V T2r, T2D, T2C, T2s, T5H, T5o, T5v, T5D, T7L, T7O, T7Q, T7S, T5r, T5I, T5x; + V T5h, T5F, T5B; + { + V TT, T2f, T7E, T7F, T7H, T7J, T2n, T1Y, T28, T2b, T2l, T2p, T2j, T2k; + { + V T1X, T2d, T7h, T7l, T2e, T1q, T75, T7d, T7m, T7k, T7c, T7e, Tn, TS; + T2r = VFNMS(LDK(KP707106781), Tm, T7); + Tn = VFMA(LDK(KP707106781), Tm, T7); + TS = VADD(TC, TR); + T2D = VSUB(TR, TC); + { + V T7b, T7j, T74, T7i, T78, T7g; + T1X = VFNMS(LDK(KP198912367), T1W, T1N); + T2d = VFMA(LDK(KP198912367), T1N, T1W); + T7g = VADD(T79, T7a); + T7b = VSUB(T79, T7a); + T7j = VSUB(T73, T70); + T74 = VADD(T70, T73); + T7i = VFNMS(LDK(KP831469612), T77, T76); + T78 = VFMA(LDK(KP831469612), T77, T76); + T2j = VFNMS(LDK(KP923879532), TS, Tn); + TT = VFMA(LDK(KP923879532), TS, Tn); + T7h = VFMA(LDK(KP956940335), T7g, T7f); + T7l = VFNMS(LDK(KP956940335), T7g, T7f); + T2e = VFMA(LDK(KP198912367), T1g, T1p); + T1q = VFNMS(LDK(KP198912367), T1p, T1g); + T75 = VFNMS(LDK(KP956940335), T74, T6X); + T7d = VFMA(LDK(KP956940335), T74, T6X); + T7m = VFNMS(LDK(KP956940335), T7j, T7i); + T7k = VFMA(LDK(KP956940335), T7j, T7i); + T7c = VFNMS(LDK(KP956940335), T7b, T78); + T7e = VFMA(LDK(KP956940335), T7b, T78); + } + T2k = VADD(T2e, T2d); + T2f = VSUB(T2d, T2e); + { + V T7D, T7G, T7I, T7K; + T7D = VFNMSI(T7k, T7h); + STM2(&(xo[90]), T7D, ovs, &(xo[2])); + STN2(&(xo[88]), T7v, T7D, ovs); + T7E = VFMAI(T7k, T7h); + STM2(&(xo[38]), T7E, ovs, &(xo[2])); + T7F = VFMAI(T7m, T7l); + STM2(&(xo[102]), T7F, ovs, &(xo[2])); + T7G = VFNMSI(T7m, T7l); + STM2(&(xo[26]), T7G, ovs, &(xo[2])); + STN2(&(xo[24]), T7y, T7G, ovs); + T7H = VFMAI(T7e, T7d); + STM2(&(xo[6]), T7H, ovs, &(xo[2])); + T7I = VFNMSI(T7e, T7d); + STM2(&(xo[122]), T7I, ovs, &(xo[2])); + STN2(&(xo[120]), T7A, T7I, ovs); + T7J = VFMAI(T7c, T75); + STM2(&(xo[70]), T7J, ovs, &(xo[2])); + T7K = VFNMSI(T7c, T75); + STM2(&(xo[58]), T7K, ovs, &(xo[2])); + STN2(&(xo[56]), T7C, T7K, ovs); + T2n = VSUB(T1X, T1q); + T1Y = VADD(T1q, T1X); + } + T2C = VFNMS(LDK(KP707106781), T27, T26); + T28 = VFMA(LDK(KP707106781), T27, T26); + T2b = VSUB(T29, T2a); + T2s = VADD(T2a, T29); + } + T2l = VFNMS(LDK(KP980785280), T2k, T2j); + T2p = VFMA(LDK(KP980785280), T2k, T2j); + { + V T5z, T4z, T5A, T5g; + { + V T4f, T4y, T1Z, T2h, T4U, T5t, T2m, T2c, T5u, T5f; + T5H = VFNMS(LDK(KP923879532), T4e, T47); + T4f = VFMA(LDK(KP923879532), T4e, T47); + T4y = VADD(T4o, T4x); + T5T = VSUB(T4x, T4o); + T1Z = VFNMS(LDK(KP980785280), T1Y, TT); + T2h = VFMA(LDK(KP980785280), T1Y, TT); + T4U = VFNMS(LDK(KP098491403), T4T, T4M); + T5t = VFMA(LDK(KP098491403), T4M, T4T); + T2m = VFNMS(LDK(KP923879532), T2b, T28); + T2c = VFMA(LDK(KP923879532), T2b, T28); + T5u = VFMA(LDK(KP098491403), T57, T5e); + T5f = VFNMS(LDK(KP098491403), T5e, T57); + T5z = VFNMS(LDK(KP980785280), T4y, T4f); + T4z = VFMA(LDK(KP980785280), T4y, T4f); + T5S = VFNMS(LDK(KP923879532), T5n, T5k); + T5o = VFMA(LDK(KP923879532), T5n, T5k); + { + V T2o, T2q, T2i, T2g; + T2o = VFMA(LDK(KP980785280), T2n, T2m); + T2q = VFNMS(LDK(KP980785280), T2n, T2m); + T2i = VFMA(LDK(KP980785280), T2f, T2c); + T2g = VFNMS(LDK(KP980785280), T2f, T2c); + T5A = VADD(T5t, T5u); + T5v = VSUB(T5t, T5u); + T5D = VSUB(T5f, T4U); + T5g = VADD(T4U, T5f); + T7L = VFNMSI(T2o, T2l); + STM2(&(xo[92]), T7L, ovs, &(xo[0])); + { + V T7M, T7N, T7P, T7R; + T7M = VFMAI(T2o, T2l); + STM2(&(xo[36]), T7M, ovs, &(xo[0])); + STN2(&(xo[36]), T7M, T7E, ovs); + T7N = VFMAI(T2q, T2p); + STM2(&(xo[100]), T7N, ovs, &(xo[0])); + STN2(&(xo[100]), T7N, T7F, ovs); + T7O = VFNMSI(T2q, T2p); + STM2(&(xo[28]), T7O, ovs, &(xo[0])); + T7P = VFMAI(T2i, T2h); + STM2(&(xo[4]), T7P, ovs, &(xo[0])); + STN2(&(xo[4]), T7P, T7H, ovs); + T7Q = VFNMSI(T2i, T2h); + STM2(&(xo[124]), T7Q, ovs, &(xo[0])); + T7R = VFMAI(T2g, T1Z); + STM2(&(xo[68]), T7R, ovs, &(xo[0])); + STN2(&(xo[68]), T7R, T7J, ovs); + T7S = VFNMSI(T2g, T1Z); + STM2(&(xo[60]), T7S, ovs, &(xo[0])); + T5r = VSUB(T5p, T5q); + T5I = VADD(T5p, T5q); + } + } + } + T5x = VFMA(LDK(KP995184726), T5g, T4z); + T5h = VFNMS(LDK(KP995184726), T5g, T4z); + T5F = VFMA(LDK(KP995184726), T5A, T5z); + T5B = VFNMS(LDK(KP995184726), T5A, T5z); + } + } + { + V T6J, T6R, T6L, T6z, T6T, T6P; + { + V T6N, T6j, T6O, T6y; + { + V T6q, T6H, T5C, T5s, T6I, T6x; + T6q = VFNMS(LDK(KP534511135), T6p, T6m); + T6H = VFMA(LDK(KP534511135), T6m, T6p); + T5C = VFNMS(LDK(KP980785280), T5r, T5o); + T5s = VFMA(LDK(KP980785280), T5r, T5o); + T6I = VFMA(LDK(KP534511135), T6t, T6w); + T6x = VFNMS(LDK(KP534511135), T6w, T6t); + T6N = VFMA(LDK(KP831469612), T6i, T6b); + T6j = VFNMS(LDK(KP831469612), T6i, T6b); + { + V T5E, T5G, T5y, T5w; + T5E = VFNMS(LDK(KP995184726), T5D, T5C); + T5G = VFMA(LDK(KP995184726), T5D, T5C); + T5y = VFMA(LDK(KP995184726), T5v, T5s); + T5w = VFNMS(LDK(KP995184726), T5v, T5s); + T6O = VADD(T6H, T6I); + T6J = VSUB(T6H, T6I); + T6R = VSUB(T6x, T6q); + T6y = VADD(T6q, T6x); + { + V T7T, T7U, T7V, T7W; + T7T = VFMAI(T5E, T5B); + STM2(&(xo[94]), T7T, ovs, &(xo[2])); + STN2(&(xo[92]), T7L, T7T, ovs); + T7U = VFNMSI(T5E, T5B); + STM2(&(xo[34]), T7U, ovs, &(xo[2])); + STN2(&(xo[32]), T7o, T7U, ovs); + T7V = VFNMSI(T5G, T5F); + STM2(&(xo[98]), T7V, ovs, &(xo[2])); + STN2(&(xo[96]), T7n, T7V, ovs); + T7W = VFMAI(T5G, T5F); + STM2(&(xo[30]), T7W, ovs, &(xo[2])); + STN2(&(xo[28]), T7O, T7W, ovs); + { + V T7X, T7Y, T7Z, T80; + T7X = VFMAI(T5y, T5x); + STM2(&(xo[126]), T7X, ovs, &(xo[2])); + STN2(&(xo[124]), T7Q, T7X, ovs); + T7Y = VFNMSI(T5y, T5x); + STM2(&(xo[2]), T7Y, ovs, &(xo[2])); + STN2(&(xo[0]), T7p, T7Y, ovs); + T7Z = VFMAI(T5w, T5h); + STM2(&(xo[62]), T7Z, ovs, &(xo[2])); + STN2(&(xo[60]), T7S, T7Z, ovs); + T80 = VFNMSI(T5w, T5h); + STM2(&(xo[66]), T80, ovs, &(xo[2])); + STN2(&(xo[64]), T7q, T80, ovs); + } + } + } + } + T6L = VFMA(LDK(KP881921264), T6y, T6j); + T6z = VFNMS(LDK(KP881921264), T6y, T6j); + T6T = VFMA(LDK(KP881921264), T6O, T6N); + T6P = VFNMS(LDK(KP881921264), T6O, T6N); + } + { + V T2H, T2P, T81, T84, T85, T87, T2J, T2B, T2R, T2N; + { + V T2L, T2t, T2M, T2A; + { + V T2z, T2F, T6Q, T6G, T2G, T2w; + T2z = VFMA(LDK(KP668178637), T2y, T2x); + T2F = VFNMS(LDK(KP668178637), T2x, T2y); + T6Q = VFMA(LDK(KP831469612), T6F, T6C); + T6G = VFNMS(LDK(KP831469612), T6F, T6C); + T2G = VFNMS(LDK(KP668178637), T2u, T2v); + T2w = VFMA(LDK(KP668178637), T2v, T2u); + T2L = VFNMS(LDK(KP923879532), T2s, T2r); + T2t = VFMA(LDK(KP923879532), T2s, T2r); + { + V T6S, T6U, T6M, T6K; + T6S = VFNMS(LDK(KP881921264), T6R, T6Q); + T6U = VFMA(LDK(KP881921264), T6R, T6Q); + T6M = VFMA(LDK(KP881921264), T6J, T6G); + T6K = VFNMS(LDK(KP881921264), T6J, T6G); + T2M = VADD(T2G, T2F); + T2H = VSUB(T2F, T2G); + T2P = VSUB(T2z, T2w); + T2A = VADD(T2w, T2z); + T81 = VFMAI(T6S, T6P); + STM2(&(xo[86]), T81, ovs, &(xo[2])); + { + V T82, T83, T86, T88; + T82 = VFNMSI(T6S, T6P); + STM2(&(xo[42]), T82, ovs, &(xo[2])); + STN2(&(xo[40]), T7w, T82, ovs); + T83 = VFNMSI(T6U, T6T); + STM2(&(xo[106]), T83, ovs, &(xo[2])); + STN2(&(xo[104]), T7x, T83, ovs); + T84 = VFMAI(T6U, T6T); + STM2(&(xo[22]), T84, ovs, &(xo[2])); + T85 = VFMAI(T6M, T6L); + STM2(&(xo[118]), T85, ovs, &(xo[2])); + T86 = VFNMSI(T6M, T6L); + STM2(&(xo[10]), T86, ovs, &(xo[2])); + STN2(&(xo[8]), T7z, T86, ovs); + T87 = VFMAI(T6K, T6z); + STM2(&(xo[54]), T87, ovs, &(xo[2])); + T88 = VFNMSI(T6K, T6z); + STM2(&(xo[74]), T88, ovs, &(xo[2])); + STN2(&(xo[72]), T7B, T88, ovs); + } + } + } + T2J = VFMA(LDK(KP831469612), T2A, T2t); + T2B = VFNMS(LDK(KP831469612), T2A, T2t); + T2R = VFNMS(LDK(KP831469612), T2M, T2L); + T2N = VFMA(LDK(KP831469612), T2M, T2L); + } + { + V T61, T5J, T62, T5Q; + { + V T5M, T5V, T2O, T2E, T5W, T5P; + T5M = VFMA(LDK(KP820678790), T5L, T5K); + T5V = VFNMS(LDK(KP820678790), T5K, T5L); + T2O = VFMA(LDK(KP923879532), T2D, T2C); + T2E = VFNMS(LDK(KP923879532), T2D, T2C); + T5W = VFNMS(LDK(KP820678790), T5N, T5O); + T5P = VFMA(LDK(KP820678790), T5O, T5N); + T61 = VFNMS(LDK(KP980785280), T5I, T5H); + T5J = VFMA(LDK(KP980785280), T5I, T5H); + { + V T2Q, T2S, T2K, T2I; + T2Q = VFNMS(LDK(KP831469612), T2P, T2O); + T2S = VFMA(LDK(KP831469612), T2P, T2O); + T2K = VFMA(LDK(KP831469612), T2H, T2E); + T2I = VFNMS(LDK(KP831469612), T2H, T2E); + T62 = VADD(T5V, T5W); + T5X = VSUB(T5V, T5W); + T65 = VSUB(T5P, T5M); + T5Q = VADD(T5M, T5P); + { + V T89, T8c, T8d, T8f; + T89 = VFMAI(T2Q, T2N); + STM2(&(xo[84]), T89, ovs, &(xo[0])); + STN2(&(xo[84]), T89, T81, ovs); + T8a = VFNMSI(T2Q, T2N); + STM2(&(xo[44]), T8a, ovs, &(xo[0])); + T8b = VFNMSI(T2S, T2R); + STM2(&(xo[108]), T8b, ovs, &(xo[0])); + T8c = VFMAI(T2S, T2R); + STM2(&(xo[20]), T8c, ovs, &(xo[0])); + STN2(&(xo[20]), T8c, T84, ovs); + T8d = VFMAI(T2K, T2J); + STM2(&(xo[116]), T8d, ovs, &(xo[0])); + STN2(&(xo[116]), T8d, T85, ovs); + T8e = VFNMSI(T2K, T2J); + STM2(&(xo[12]), T8e, ovs, &(xo[0])); + T8f = VFMAI(T2I, T2B); + STM2(&(xo[52]), T8f, ovs, &(xo[0])); + STN2(&(xo[52]), T8f, T87, ovs); + T8g = VFNMSI(T2I, T2B); + STM2(&(xo[76]), T8g, ovs, &(xo[0])); + } + } + } + T5Z = VFMA(LDK(KP773010453), T5Q, T5J); + T5R = VFNMS(LDK(KP773010453), T5Q, T5J); + T67 = VFNMS(LDK(KP773010453), T62, T61); + T63 = VFMA(LDK(KP773010453), T62, T61); + } + } + } + } + } + } + T5U = VFMA(LDK(KP980785280), T5T, T5S); + T64 = VFNMS(LDK(KP980785280), T5T, T5S); + { + V T68, T66, T5Y, T60; + T68 = VFNMS(LDK(KP773010453), T65, T64); + T66 = VFMA(LDK(KP773010453), T65, T64); + T5Y = VFNMS(LDK(KP773010453), T5X, T5U); + T60 = VFMA(LDK(KP773010453), T5X, T5U); + { + V T8h, T8i, T8j, T8k; + T8h = VFNMSI(T66, T63); + STM2(&(xo[82]), T8h, ovs, &(xo[2])); + STN2(&(xo[80]), T7t, T8h, ovs); + T8i = VFMAI(T66, T63); + STM2(&(xo[46]), T8i, ovs, &(xo[2])); + STN2(&(xo[44]), T8a, T8i, ovs); + T8j = VFMAI(T68, T67); + STM2(&(xo[110]), T8j, ovs, &(xo[2])); + STN2(&(xo[108]), T8b, T8j, ovs); + T8k = VFNMSI(T68, T67); + STM2(&(xo[18]), T8k, ovs, &(xo[2])); + STN2(&(xo[16]), T7r, T8k, ovs); + { + V T8l, T8m, T8n, T8o; + T8l = VFMAI(T60, T5Z); + STM2(&(xo[14]), T8l, ovs, &(xo[2])); + STN2(&(xo[12]), T8e, T8l, ovs); + T8m = VFNMSI(T60, T5Z); + STM2(&(xo[114]), T8m, ovs, &(xo[2])); + STN2(&(xo[112]), T7s, T8m, ovs); + T8n = VFMAI(T5Y, T5R); + STM2(&(xo[78]), T8n, ovs, &(xo[2])); + STN2(&(xo[76]), T8g, T8n, ovs); + T8o = VFNMSI(T5Y, T5R); + STM2(&(xo[50]), T8o, ovs, &(xo[2])); + STN2(&(xo[48]), T7u, T8o, ovs); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 64, XSIMD_STRING("n2fv_64"), {198, 0, 258, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_64) (planner *p) { + X(kdft_register) (p, n2fv_64, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name n2fv_64 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 456 FP additions, 124 FP multiplications, + * (or, 404 additions, 72 multiplications, 52 fused multiply/add), + * 128 stack variables, 15 constants, and 160 memory accesses + */ +#include "n2f.h" + +static void n2fv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP471396736, +0.471396736825997648556387625905254377657460319); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP098017140, +0.098017140329560601994195563888641845861136673); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP290284677, +0.290284677254462367636192375817395274691476278); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP634393284, +0.634393284163645498215171613225493370675687095); + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(128, is), MAKE_VOLATILE_STRIDE(128, os)) { + V T4p, T5q, Tb, T39, T2n, T3A, T6f, T6T, Tq, T3B, T6i, T76, T2i, T3a, T4w; + V T5r, TI, T2p, T6C, T6V, T3h, T3E, T4L, T5u, TZ, T2q, T6F, T6U, T3e, T3D; + V T4E, T5t, T23, T2N, T6t, T71, T6w, T72, T2c, T2O, T3t, T41, T5f, T5R, T5k; + V T5S, T3w, T42, T1s, T2K, T6m, T6Y, T6p, T6Z, T1B, T2L, T3m, T3Y, T4Y, T5O; + V T53, T5P, T3p, T3Z; + { + V T3, T4n, T2m, T4o, T6, T5p, T9, T5o; + { + V T1, T2, T2k, T2l; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 32)]), ivs, &(xi[0])); + T3 = VSUB(T1, T2); + T4n = VADD(T1, T2); + T2k = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); + T2l = LD(&(xi[WS(is, 48)]), ivs, &(xi[0])); + T2m = VSUB(T2k, T2l); + T4o = VADD(T2k, T2l); + } + { + V T4, T5, T7, T8; + T4 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); + T5 = LD(&(xi[WS(is, 40)]), ivs, &(xi[0])); + T6 = VSUB(T4, T5); + T5p = VADD(T4, T5); + T7 = LD(&(xi[WS(is, 56)]), ivs, &(xi[0])); + T8 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); + T9 = VSUB(T7, T8); + T5o = VADD(T7, T8); + } + T4p = VSUB(T4n, T4o); + T5q = VSUB(T5o, T5p); + { + V Ta, T2j, T6d, T6e; + Ta = VMUL(LDK(KP707106781), VADD(T6, T9)); + Tb = VADD(T3, Ta); + T39 = VSUB(T3, Ta); + T2j = VMUL(LDK(KP707106781), VSUB(T9, T6)); + T2n = VSUB(T2j, T2m); + T3A = VADD(T2m, T2j); + T6d = VADD(T4n, T4o); + T6e = VADD(T5p, T5o); + T6f = VADD(T6d, T6e); + T6T = VSUB(T6d, T6e); + } + } + { + V Te, T4q, To, T4u, Th, T4r, Tl, T4t; + { + V Tc, Td, Tm, Tn; + Tc = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 36)]), ivs, &(xi[0])); + Te = VSUB(Tc, Td); + T4q = VADD(Tc, Td); + Tm = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); + Tn = LD(&(xi[WS(is, 44)]), ivs, &(xi[0])); + To = VSUB(Tm, Tn); + T4u = VADD(Tm, Tn); + } + { + V Tf, Tg, Tj, Tk; + Tf = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); + Tg = LD(&(xi[WS(is, 52)]), ivs, &(xi[0])); + Th = VSUB(Tf, Tg); + T4r = VADD(Tf, Tg); + Tj = LD(&(xi[WS(is, 60)]), ivs, &(xi[0])); + Tk = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); + Tl = VSUB(Tj, Tk); + T4t = VADD(Tj, Tk); + } + { + V Ti, Tp, T6g, T6h; + Ti = VFNMS(LDK(KP382683432), Th, VMUL(LDK(KP923879532), Te)); + Tp = VFMA(LDK(KP923879532), Tl, VMUL(LDK(KP382683432), To)); + Tq = VADD(Ti, Tp); + T3B = VSUB(Tp, Ti); + T6g = VADD(T4q, T4r); + T6h = VADD(T4t, T4u); + T6i = VADD(T6g, T6h); + T76 = VSUB(T6h, T6g); + } + { + V T2g, T2h, T4s, T4v; + T2g = VFNMS(LDK(KP923879532), To, VMUL(LDK(KP382683432), Tl)); + T2h = VFMA(LDK(KP382683432), Te, VMUL(LDK(KP923879532), Th)); + T2i = VSUB(T2g, T2h); + T3a = VADD(T2h, T2g); + T4s = VSUB(T4q, T4r); + T4v = VSUB(T4t, T4u); + T4w = VMUL(LDK(KP707106781), VADD(T4s, T4v)); + T5r = VMUL(LDK(KP707106781), VSUB(T4v, T4s)); + } + } + { + V Tu, T4F, TG, T4G, TB, T4J, TD, T4I; + { + V Ts, Tt, TE, TF; + Ts = LD(&(xi[WS(is, 62)]), ivs, &(xi[0])); + Tt = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); + Tu = VSUB(Ts, Tt); + T4F = VADD(Ts, Tt); + TE = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); + TF = LD(&(xi[WS(is, 46)]), ivs, &(xi[0])); + TG = VSUB(TE, TF); + T4G = VADD(TE, TF); + { + V Tv, Tw, Tx, Ty, Tz, TA; + Tv = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tw = LD(&(xi[WS(is, 38)]), ivs, &(xi[0])); + Tx = VSUB(Tv, Tw); + Ty = LD(&(xi[WS(is, 54)]), ivs, &(xi[0])); + Tz = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); + TA = VSUB(Ty, Tz); + TB = VMUL(LDK(KP707106781), VADD(Tx, TA)); + T4J = VADD(Tv, Tw); + TD = VMUL(LDK(KP707106781), VSUB(TA, Tx)); + T4I = VADD(Ty, Tz); + } + } + { + V TC, TH, T6A, T6B; + TC = VADD(Tu, TB); + TH = VSUB(TD, TG); + TI = VFMA(LDK(KP195090322), TC, VMUL(LDK(KP980785280), TH)); + T2p = VFNMS(LDK(KP195090322), TH, VMUL(LDK(KP980785280), TC)); + T6A = VADD(T4F, T4G); + T6B = VADD(T4J, T4I); + T6C = VADD(T6A, T6B); + T6V = VSUB(T6A, T6B); + } + { + V T3f, T3g, T4H, T4K; + T3f = VSUB(Tu, TB); + T3g = VADD(TG, TD); + T3h = VFNMS(LDK(KP555570233), T3g, VMUL(LDK(KP831469612), T3f)); + T3E = VFMA(LDK(KP555570233), T3f, VMUL(LDK(KP831469612), T3g)); + T4H = VSUB(T4F, T4G); + T4K = VSUB(T4I, T4J); + T4L = VFNMS(LDK(KP382683432), T4K, VMUL(LDK(KP923879532), T4H)); + T5u = VFMA(LDK(KP382683432), T4H, VMUL(LDK(KP923879532), T4K)); + } + } + { + V TS, T4z, TW, T4y, TP, T4C, TX, T4B; + { + V TQ, TR, TU, TV; + TQ = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); + TR = LD(&(xi[WS(is, 50)]), ivs, &(xi[0])); + TS = VSUB(TQ, TR); + T4z = VADD(TQ, TR); + TU = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + TV = LD(&(xi[WS(is, 34)]), ivs, &(xi[0])); + TW = VSUB(TU, TV); + T4y = VADD(TU, TV); + { + V TJ, TK, TL, TM, TN, TO; + TJ = LD(&(xi[WS(is, 58)]), ivs, &(xi[0])); + TK = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); + TL = VSUB(TJ, TK); + TM = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); + TN = LD(&(xi[WS(is, 42)]), ivs, &(xi[0])); + TO = VSUB(TM, TN); + TP = VMUL(LDK(KP707106781), VSUB(TL, TO)); + T4C = VADD(TM, TN); + TX = VMUL(LDK(KP707106781), VADD(TO, TL)); + T4B = VADD(TJ, TK); + } + } + { + V TT, TY, T6D, T6E; + TT = VSUB(TP, TS); + TY = VADD(TW, TX); + TZ = VFNMS(LDK(KP195090322), TY, VMUL(LDK(KP980785280), TT)); + T2q = VFMA(LDK(KP980785280), TY, VMUL(LDK(KP195090322), TT)); + T6D = VADD(T4y, T4z); + T6E = VADD(T4C, T4B); + T6F = VADD(T6D, T6E); + T6U = VSUB(T6D, T6E); + } + { + V T3c, T3d, T4A, T4D; + T3c = VSUB(TW, TX); + T3d = VADD(TS, TP); + T3e = VFMA(LDK(KP831469612), T3c, VMUL(LDK(KP555570233), T3d)); + T3D = VFNMS(LDK(KP555570233), T3c, VMUL(LDK(KP831469612), T3d)); + T4A = VSUB(T4y, T4z); + T4D = VSUB(T4B, T4C); + T4E = VFMA(LDK(KP923879532), T4A, VMUL(LDK(KP382683432), T4D)); + T5t = VFNMS(LDK(KP382683432), T4A, VMUL(LDK(KP923879532), T4D)); + } + } + { + V T1F, T55, T2a, T56, T1M, T5h, T27, T5g, T58, T59, T1U, T5a, T25, T5b, T5c; + V T21, T5d, T24; + { + V T1D, T1E, T28, T29; + T1D = LD(&(xi[WS(is, 63)]), ivs, &(xi[WS(is, 1)])); + T1E = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); + T1F = VSUB(T1D, T1E); + T55 = VADD(T1D, T1E); + T28 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); + T29 = LD(&(xi[WS(is, 47)]), ivs, &(xi[WS(is, 1)])); + T2a = VSUB(T28, T29); + T56 = VADD(T28, T29); + } + { + V T1G, T1H, T1I, T1J, T1K, T1L; + T1G = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T1H = LD(&(xi[WS(is, 39)]), ivs, &(xi[WS(is, 1)])); + T1I = VSUB(T1G, T1H); + T1J = LD(&(xi[WS(is, 55)]), ivs, &(xi[WS(is, 1)])); + T1K = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); + T1L = VSUB(T1J, T1K); + T1M = VMUL(LDK(KP707106781), VADD(T1I, T1L)); + T5h = VADD(T1G, T1H); + T27 = VMUL(LDK(KP707106781), VSUB(T1L, T1I)); + T5g = VADD(T1J, T1K); + } + { + V T1Q, T1T, T1X, T20; + { + V T1O, T1P, T1R, T1S; + T1O = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T1P = LD(&(xi[WS(is, 35)]), ivs, &(xi[WS(is, 1)])); + T1Q = VSUB(T1O, T1P); + T58 = VADD(T1O, T1P); + T1R = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); + T1S = LD(&(xi[WS(is, 51)]), ivs, &(xi[WS(is, 1)])); + T1T = VSUB(T1R, T1S); + T59 = VADD(T1R, T1S); + } + T1U = VFNMS(LDK(KP382683432), T1T, VMUL(LDK(KP923879532), T1Q)); + T5a = VSUB(T58, T59); + T25 = VFMA(LDK(KP382683432), T1Q, VMUL(LDK(KP923879532), T1T)); + { + V T1V, T1W, T1Y, T1Z; + T1V = LD(&(xi[WS(is, 59)]), ivs, &(xi[WS(is, 1)])); + T1W = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); + T1X = VSUB(T1V, T1W); + T5b = VADD(T1V, T1W); + T1Y = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); + T1Z = LD(&(xi[WS(is, 43)]), ivs, &(xi[WS(is, 1)])); + T20 = VSUB(T1Y, T1Z); + T5c = VADD(T1Y, T1Z); + } + T21 = VFMA(LDK(KP923879532), T1X, VMUL(LDK(KP382683432), T20)); + T5d = VSUB(T5b, T5c); + T24 = VFNMS(LDK(KP923879532), T20, VMUL(LDK(KP382683432), T1X)); + } + { + V T1N, T22, T6r, T6s; + T1N = VADD(T1F, T1M); + T22 = VADD(T1U, T21); + T23 = VSUB(T1N, T22); + T2N = VADD(T1N, T22); + T6r = VADD(T55, T56); + T6s = VADD(T5h, T5g); + T6t = VADD(T6r, T6s); + T71 = VSUB(T6r, T6s); + } + { + V T6u, T6v, T26, T2b; + T6u = VADD(T58, T59); + T6v = VADD(T5b, T5c); + T6w = VADD(T6u, T6v); + T72 = VSUB(T6v, T6u); + T26 = VSUB(T24, T25); + T2b = VSUB(T27, T2a); + T2c = VSUB(T26, T2b); + T2O = VADD(T2b, T26); + } + { + V T3r, T3s, T57, T5e; + T3r = VSUB(T1F, T1M); + T3s = VADD(T25, T24); + T3t = VADD(T3r, T3s); + T41 = VSUB(T3r, T3s); + T57 = VSUB(T55, T56); + T5e = VMUL(LDK(KP707106781), VADD(T5a, T5d)); + T5f = VADD(T57, T5e); + T5R = VSUB(T57, T5e); + } + { + V T5i, T5j, T3u, T3v; + T5i = VSUB(T5g, T5h); + T5j = VMUL(LDK(KP707106781), VSUB(T5d, T5a)); + T5k = VADD(T5i, T5j); + T5S = VSUB(T5j, T5i); + T3u = VADD(T2a, T27); + T3v = VSUB(T21, T1U); + T3w = VADD(T3u, T3v); + T42 = VSUB(T3v, T3u); + } + } + { + V T1q, T4P, T1v, T4O, T1n, T50, T1w, T4Z, T4U, T4V, T18, T4W, T1z, T4R, T4S; + V T1f, T4T, T1y; + { + V T1o, T1p, T1t, T1u; + T1o = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); + T1p = LD(&(xi[WS(is, 49)]), ivs, &(xi[WS(is, 1)])); + T1q = VSUB(T1o, T1p); + T4P = VADD(T1o, T1p); + T1t = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T1u = LD(&(xi[WS(is, 33)]), ivs, &(xi[WS(is, 1)])); + T1v = VSUB(T1t, T1u); + T4O = VADD(T1t, T1u); + } + { + V T1h, T1i, T1j, T1k, T1l, T1m; + T1h = LD(&(xi[WS(is, 57)]), ivs, &(xi[WS(is, 1)])); + T1i = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); + T1j = VSUB(T1h, T1i); + T1k = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); + T1l = LD(&(xi[WS(is, 41)]), ivs, &(xi[WS(is, 1)])); + T1m = VSUB(T1k, T1l); + T1n = VMUL(LDK(KP707106781), VSUB(T1j, T1m)); + T50 = VADD(T1k, T1l); + T1w = VMUL(LDK(KP707106781), VADD(T1m, T1j)); + T4Z = VADD(T1h, T1i); + } + { + V T14, T17, T1b, T1e; + { + V T12, T13, T15, T16; + T12 = LD(&(xi[WS(is, 61)]), ivs, &(xi[WS(is, 1)])); + T13 = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); + T14 = VSUB(T12, T13); + T4U = VADD(T12, T13); + T15 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); + T16 = LD(&(xi[WS(is, 45)]), ivs, &(xi[WS(is, 1)])); + T17 = VSUB(T15, T16); + T4V = VADD(T15, T16); + } + T18 = VFNMS(LDK(KP923879532), T17, VMUL(LDK(KP382683432), T14)); + T4W = VSUB(T4U, T4V); + T1z = VFMA(LDK(KP923879532), T14, VMUL(LDK(KP382683432), T17)); + { + V T19, T1a, T1c, T1d; + T19 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T1a = LD(&(xi[WS(is, 37)]), ivs, &(xi[WS(is, 1)])); + T1b = VSUB(T19, T1a); + T4R = VADD(T19, T1a); + T1c = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); + T1d = LD(&(xi[WS(is, 53)]), ivs, &(xi[WS(is, 1)])); + T1e = VSUB(T1c, T1d); + T4S = VADD(T1c, T1d); + } + T1f = VFMA(LDK(KP382683432), T1b, VMUL(LDK(KP923879532), T1e)); + T4T = VSUB(T4R, T4S); + T1y = VFNMS(LDK(KP382683432), T1e, VMUL(LDK(KP923879532), T1b)); + } + { + V T1g, T1r, T6k, T6l; + T1g = VSUB(T18, T1f); + T1r = VSUB(T1n, T1q); + T1s = VSUB(T1g, T1r); + T2K = VADD(T1r, T1g); + T6k = VADD(T4O, T4P); + T6l = VADD(T50, T4Z); + T6m = VADD(T6k, T6l); + T6Y = VSUB(T6k, T6l); + } + { + V T6n, T6o, T1x, T1A; + T6n = VADD(T4R, T4S); + T6o = VADD(T4U, T4V); + T6p = VADD(T6n, T6o); + T6Z = VSUB(T6o, T6n); + T1x = VADD(T1v, T1w); + T1A = VADD(T1y, T1z); + T1B = VSUB(T1x, T1A); + T2L = VADD(T1x, T1A); + } + { + V T3k, T3l, T4Q, T4X; + T3k = VSUB(T1v, T1w); + T3l = VADD(T1f, T18); + T3m = VADD(T3k, T3l); + T3Y = VSUB(T3k, T3l); + T4Q = VSUB(T4O, T4P); + T4X = VMUL(LDK(KP707106781), VADD(T4T, T4W)); + T4Y = VADD(T4Q, T4X); + T5O = VSUB(T4Q, T4X); + } + { + V T51, T52, T3n, T3o; + T51 = VSUB(T4Z, T50); + T52 = VMUL(LDK(KP707106781), VSUB(T4W, T4T)); + T53 = VADD(T51, T52); + T5P = VSUB(T52, T51); + T3n = VADD(T1q, T1n); + T3o = VSUB(T1z, T1y); + T3p = VADD(T3n, T3o); + T3Z = VSUB(T3o, T3n); + } + } + { + V T7n, T7o, T7p, T7q, T7r, T7s, T7t, T7u, T7v, T7w, T7x, T7y, T7z, T7A, T7B; + V T7C, T7D, T7E, T7F, T7G, T7H, T7I, T7J, T7K; + { + V T6N, T6R, T6Q, T6S; + { + V T6L, T6M, T6O, T6P; + T6L = VADD(T6f, T6i); + T6M = VADD(T6F, T6C); + T6N = VADD(T6L, T6M); + T6R = VSUB(T6L, T6M); + T6O = VADD(T6m, T6p); + T6P = VADD(T6t, T6w); + T6Q = VADD(T6O, T6P); + T6S = VBYI(VSUB(T6P, T6O)); + } + T7n = VSUB(T6N, T6Q); + STM2(&(xo[64]), T7n, ovs, &(xo[0])); + T7o = VADD(T6R, T6S); + STM2(&(xo[32]), T7o, ovs, &(xo[0])); + T7p = VADD(T6N, T6Q); + STM2(&(xo[0]), T7p, ovs, &(xo[0])); + T7q = VSUB(T6R, T6S); + STM2(&(xo[96]), T7q, ovs, &(xo[0])); + } + { + V T6j, T6G, T6y, T6H, T6q, T6x; + T6j = VSUB(T6f, T6i); + T6G = VSUB(T6C, T6F); + T6q = VSUB(T6m, T6p); + T6x = VSUB(T6t, T6w); + T6y = VMUL(LDK(KP707106781), VADD(T6q, T6x)); + T6H = VMUL(LDK(KP707106781), VSUB(T6x, T6q)); + { + V T6z, T6I, T6J, T6K; + T6z = VADD(T6j, T6y); + T6I = VBYI(VADD(T6G, T6H)); + T7r = VSUB(T6z, T6I); + STM2(&(xo[112]), T7r, ovs, &(xo[0])); + T7s = VADD(T6z, T6I); + STM2(&(xo[16]), T7s, ovs, &(xo[0])); + T6J = VSUB(T6j, T6y); + T6K = VBYI(VSUB(T6H, T6G)); + T7t = VSUB(T6J, T6K); + STM2(&(xo[80]), T7t, ovs, &(xo[0])); + T7u = VADD(T6J, T6K); + STM2(&(xo[48]), T7u, ovs, &(xo[0])); + } + } + { + V T6X, T7i, T78, T7g, T74, T7f, T7b, T7j, T6W, T77; + T6W = VMUL(LDK(KP707106781), VADD(T6U, T6V)); + T6X = VADD(T6T, T6W); + T7i = VSUB(T6T, T6W); + T77 = VMUL(LDK(KP707106781), VSUB(T6V, T6U)); + T78 = VADD(T76, T77); + T7g = VSUB(T77, T76); + { + V T70, T73, T79, T7a; + T70 = VFMA(LDK(KP923879532), T6Y, VMUL(LDK(KP382683432), T6Z)); + T73 = VFNMS(LDK(KP382683432), T72, VMUL(LDK(KP923879532), T71)); + T74 = VADD(T70, T73); + T7f = VSUB(T73, T70); + T79 = VFNMS(LDK(KP382683432), T6Y, VMUL(LDK(KP923879532), T6Z)); + T7a = VFMA(LDK(KP382683432), T71, VMUL(LDK(KP923879532), T72)); + T7b = VADD(T79, T7a); + T7j = VSUB(T7a, T79); + } + { + V T75, T7c, T7l, T7m; + T75 = VADD(T6X, T74); + T7c = VBYI(VADD(T78, T7b)); + T7v = VSUB(T75, T7c); + STM2(&(xo[120]), T7v, ovs, &(xo[0])); + T7w = VADD(T75, T7c); + STM2(&(xo[8]), T7w, ovs, &(xo[0])); + T7l = VBYI(VADD(T7g, T7f)); + T7m = VADD(T7i, T7j); + T7x = VADD(T7l, T7m); + STM2(&(xo[24]), T7x, ovs, &(xo[0])); + T7y = VSUB(T7m, T7l); + STM2(&(xo[104]), T7y, ovs, &(xo[0])); + } + { + V T7d, T7e, T7h, T7k; + T7d = VSUB(T6X, T74); + T7e = VBYI(VSUB(T7b, T78)); + T7z = VSUB(T7d, T7e); + STM2(&(xo[72]), T7z, ovs, &(xo[0])); + T7A = VADD(T7d, T7e); + STM2(&(xo[56]), T7A, ovs, &(xo[0])); + T7h = VBYI(VSUB(T7f, T7g)); + T7k = VSUB(T7i, T7j); + T7B = VADD(T7h, T7k); + STM2(&(xo[40]), T7B, ovs, &(xo[0])); + T7C = VSUB(T7k, T7h); + STM2(&(xo[88]), T7C, ovs, &(xo[0])); + } + } + { + V T5N, T68, T61, T69, T5U, T65, T5Y, T66; + { + V T5L, T5M, T5Z, T60; + T5L = VSUB(T4p, T4w); + T5M = VSUB(T5u, T5t); + T5N = VADD(T5L, T5M); + T68 = VSUB(T5L, T5M); + T5Z = VFNMS(LDK(KP555570233), T5O, VMUL(LDK(KP831469612), T5P)); + T60 = VFMA(LDK(KP555570233), T5R, VMUL(LDK(KP831469612), T5S)); + T61 = VADD(T5Z, T60); + T69 = VSUB(T60, T5Z); + } + { + V T5Q, T5T, T5W, T5X; + T5Q = VFMA(LDK(KP831469612), T5O, VMUL(LDK(KP555570233), T5P)); + T5T = VFNMS(LDK(KP555570233), T5S, VMUL(LDK(KP831469612), T5R)); + T5U = VADD(T5Q, T5T); + T65 = VSUB(T5T, T5Q); + T5W = VSUB(T5r, T5q); + T5X = VSUB(T4L, T4E); + T5Y = VADD(T5W, T5X); + T66 = VSUB(T5X, T5W); + } + { + V T5V, T62, T6b, T6c; + T5V = VADD(T5N, T5U); + T62 = VBYI(VADD(T5Y, T61)); + T7D = VSUB(T5V, T62); + STM2(&(xo[116]), T7D, ovs, &(xo[0])); + T7E = VADD(T5V, T62); + STM2(&(xo[12]), T7E, ovs, &(xo[0])); + T6b = VBYI(VADD(T66, T65)); + T6c = VADD(T68, T69); + T7F = VADD(T6b, T6c); + STM2(&(xo[20]), T7F, ovs, &(xo[0])); + T7G = VSUB(T6c, T6b); + STM2(&(xo[108]), T7G, ovs, &(xo[0])); + } + { + V T63, T64, T67, T6a; + T63 = VSUB(T5N, T5U); + T64 = VBYI(VSUB(T61, T5Y)); + T7H = VSUB(T63, T64); + STM2(&(xo[76]), T7H, ovs, &(xo[0])); + T7I = VADD(T63, T64); + STM2(&(xo[52]), T7I, ovs, &(xo[0])); + T67 = VBYI(VSUB(T65, T66)); + T6a = VSUB(T68, T69); + T7J = VADD(T67, T6a); + STM2(&(xo[44]), T7J, ovs, &(xo[0])); + T7K = VSUB(T6a, T67); + STM2(&(xo[84]), T7K, ovs, &(xo[0])); + } + } + { + V T7U, T7W, T7X, T7Z; + { + V T11, T2C, T2v, T2D, T2e, T2z, T2s, T2A; + { + V Tr, T10, T2t, T2u; + Tr = VSUB(Tb, Tq); + T10 = VSUB(TI, TZ); + T11 = VADD(Tr, T10); + T2C = VSUB(Tr, T10); + T2t = VFNMS(LDK(KP634393284), T1B, VMUL(LDK(KP773010453), T1s)); + T2u = VFMA(LDK(KP773010453), T2c, VMUL(LDK(KP634393284), T23)); + T2v = VADD(T2t, T2u); + T2D = VSUB(T2u, T2t); + } + { + V T1C, T2d, T2o, T2r; + T1C = VFMA(LDK(KP634393284), T1s, VMUL(LDK(KP773010453), T1B)); + T2d = VFNMS(LDK(KP634393284), T2c, VMUL(LDK(KP773010453), T23)); + T2e = VADD(T1C, T2d); + T2z = VSUB(T2d, T1C); + T2o = VSUB(T2i, T2n); + T2r = VSUB(T2p, T2q); + T2s = VADD(T2o, T2r); + T2A = VSUB(T2r, T2o); + } + { + V T2f, T2w, T7L, T7M; + T2f = VADD(T11, T2e); + T2w = VBYI(VADD(T2s, T2v)); + T7L = VSUB(T2f, T2w); + STM2(&(xo[114]), T7L, ovs, &(xo[2])); + STN2(&(xo[112]), T7r, T7L, ovs); + T7M = VADD(T2f, T2w); + STM2(&(xo[14]), T7M, ovs, &(xo[2])); + STN2(&(xo[12]), T7E, T7M, ovs); + } + { + V T2F, T2G, T7N, T7O; + T2F = VBYI(VADD(T2A, T2z)); + T2G = VADD(T2C, T2D); + T7N = VADD(T2F, T2G); + STM2(&(xo[18]), T7N, ovs, &(xo[2])); + STN2(&(xo[16]), T7s, T7N, ovs); + T7O = VSUB(T2G, T2F); + STM2(&(xo[110]), T7O, ovs, &(xo[2])); + STN2(&(xo[108]), T7G, T7O, ovs); + } + { + V T2x, T2y, T7P, T7Q; + T2x = VSUB(T11, T2e); + T2y = VBYI(VSUB(T2v, T2s)); + T7P = VSUB(T2x, T2y); + STM2(&(xo[78]), T7P, ovs, &(xo[2])); + STN2(&(xo[76]), T7H, T7P, ovs); + T7Q = VADD(T2x, T2y); + STM2(&(xo[50]), T7Q, ovs, &(xo[2])); + STN2(&(xo[48]), T7u, T7Q, ovs); + } + { + V T2B, T2E, T7R, T7S; + T2B = VBYI(VSUB(T2z, T2A)); + T2E = VSUB(T2C, T2D); + T7R = VADD(T2B, T2E); + STM2(&(xo[46]), T7R, ovs, &(xo[2])); + STN2(&(xo[44]), T7J, T7R, ovs); + T7S = VSUB(T2E, T2B); + STM2(&(xo[82]), T7S, ovs, &(xo[2])); + STN2(&(xo[80]), T7t, T7S, ovs); + } + } + { + V T3j, T3Q, T3J, T3R, T3y, T3N, T3G, T3O; + { + V T3b, T3i, T3H, T3I; + T3b = VADD(T39, T3a); + T3i = VADD(T3e, T3h); + T3j = VADD(T3b, T3i); + T3Q = VSUB(T3b, T3i); + T3H = VFNMS(LDK(KP290284677), T3m, VMUL(LDK(KP956940335), T3p)); + T3I = VFMA(LDK(KP290284677), T3t, VMUL(LDK(KP956940335), T3w)); + T3J = VADD(T3H, T3I); + T3R = VSUB(T3I, T3H); + } + { + V T3q, T3x, T3C, T3F; + T3q = VFMA(LDK(KP956940335), T3m, VMUL(LDK(KP290284677), T3p)); + T3x = VFNMS(LDK(KP290284677), T3w, VMUL(LDK(KP956940335), T3t)); + T3y = VADD(T3q, T3x); + T3N = VSUB(T3x, T3q); + T3C = VADD(T3A, T3B); + T3F = VADD(T3D, T3E); + T3G = VADD(T3C, T3F); + T3O = VSUB(T3F, T3C); + } + { + V T3z, T3K, T7T, T3T, T3U, T7V; + T3z = VADD(T3j, T3y); + T3K = VBYI(VADD(T3G, T3J)); + T7T = VSUB(T3z, T3K); + STM2(&(xo[122]), T7T, ovs, &(xo[2])); + STN2(&(xo[120]), T7v, T7T, ovs); + T7U = VADD(T3z, T3K); + STM2(&(xo[6]), T7U, ovs, &(xo[2])); + T3T = VBYI(VADD(T3O, T3N)); + T3U = VADD(T3Q, T3R); + T7V = VADD(T3T, T3U); + STM2(&(xo[26]), T7V, ovs, &(xo[2])); + STN2(&(xo[24]), T7x, T7V, ovs); + T7W = VSUB(T3U, T3T); + STM2(&(xo[102]), T7W, ovs, &(xo[2])); + } + { + V T3L, T3M, T7Y, T3P, T3S, T80; + T3L = VSUB(T3j, T3y); + T3M = VBYI(VSUB(T3J, T3G)); + T7X = VSUB(T3L, T3M); + STM2(&(xo[70]), T7X, ovs, &(xo[2])); + T7Y = VADD(T3L, T3M); + STM2(&(xo[58]), T7Y, ovs, &(xo[2])); + STN2(&(xo[56]), T7A, T7Y, ovs); + T3P = VBYI(VSUB(T3N, T3O)); + T3S = VSUB(T3Q, T3R); + T7Z = VADD(T3P, T3S); + STM2(&(xo[38]), T7Z, ovs, &(xo[2])); + T80 = VSUB(T3S, T3P); + STM2(&(xo[90]), T80, ovs, &(xo[2])); + STN2(&(xo[88]), T7C, T80, ovs); + } + } + { + V T81, T83, T86, T88; + { + V T4N, T5G, T5z, T5H, T5m, T5D, T5w, T5E; + { + V T4x, T4M, T5x, T5y; + T4x = VADD(T4p, T4w); + T4M = VADD(T4E, T4L); + T4N = VADD(T4x, T4M); + T5G = VSUB(T4x, T4M); + T5x = VFNMS(LDK(KP195090322), T4Y, VMUL(LDK(KP980785280), T53)); + T5y = VFMA(LDK(KP195090322), T5f, VMUL(LDK(KP980785280), T5k)); + T5z = VADD(T5x, T5y); + T5H = VSUB(T5y, T5x); + } + { + V T54, T5l, T5s, T5v; + T54 = VFMA(LDK(KP980785280), T4Y, VMUL(LDK(KP195090322), T53)); + T5l = VFNMS(LDK(KP195090322), T5k, VMUL(LDK(KP980785280), T5f)); + T5m = VADD(T54, T5l); + T5D = VSUB(T5l, T54); + T5s = VADD(T5q, T5r); + T5v = VADD(T5t, T5u); + T5w = VADD(T5s, T5v); + T5E = VSUB(T5v, T5s); + } + { + V T5n, T5A, T82, T5J, T5K, T84; + T5n = VADD(T4N, T5m); + T5A = VBYI(VADD(T5w, T5z)); + T81 = VSUB(T5n, T5A); + STM2(&(xo[124]), T81, ovs, &(xo[0])); + T82 = VADD(T5n, T5A); + STM2(&(xo[4]), T82, ovs, &(xo[0])); + STN2(&(xo[4]), T82, T7U, ovs); + T5J = VBYI(VADD(T5E, T5D)); + T5K = VADD(T5G, T5H); + T83 = VADD(T5J, T5K); + STM2(&(xo[28]), T83, ovs, &(xo[0])); + T84 = VSUB(T5K, T5J); + STM2(&(xo[100]), T84, ovs, &(xo[0])); + STN2(&(xo[100]), T84, T7W, ovs); + } + { + V T5B, T5C, T85, T5F, T5I, T87; + T5B = VSUB(T4N, T5m); + T5C = VBYI(VSUB(T5z, T5w)); + T85 = VSUB(T5B, T5C); + STM2(&(xo[68]), T85, ovs, &(xo[0])); + STN2(&(xo[68]), T85, T7X, ovs); + T86 = VADD(T5B, T5C); + STM2(&(xo[60]), T86, ovs, &(xo[0])); + T5F = VBYI(VSUB(T5D, T5E)); + T5I = VSUB(T5G, T5H); + T87 = VADD(T5F, T5I); + STM2(&(xo[36]), T87, ovs, &(xo[0])); + STN2(&(xo[36]), T87, T7Z, ovs); + T88 = VSUB(T5I, T5F); + STM2(&(xo[92]), T88, ovs, &(xo[0])); + } + } + { + V T2J, T34, T2X, T35, T2Q, T31, T2U, T32; + { + V T2H, T2I, T2V, T2W; + T2H = VADD(Tb, Tq); + T2I = VADD(T2q, T2p); + T2J = VADD(T2H, T2I); + T34 = VSUB(T2H, T2I); + T2V = VFNMS(LDK(KP098017140), T2L, VMUL(LDK(KP995184726), T2K)); + T2W = VFMA(LDK(KP995184726), T2O, VMUL(LDK(KP098017140), T2N)); + T2X = VADD(T2V, T2W); + T35 = VSUB(T2W, T2V); + } + { + V T2M, T2P, T2S, T2T; + T2M = VFMA(LDK(KP098017140), T2K, VMUL(LDK(KP995184726), T2L)); + T2P = VFNMS(LDK(KP098017140), T2O, VMUL(LDK(KP995184726), T2N)); + T2Q = VADD(T2M, T2P); + T31 = VSUB(T2P, T2M); + T2S = VADD(T2n, T2i); + T2T = VADD(TZ, TI); + T2U = VADD(T2S, T2T); + T32 = VSUB(T2T, T2S); + } + { + V T2R, T2Y, T89, T8a; + T2R = VADD(T2J, T2Q); + T2Y = VBYI(VADD(T2U, T2X)); + T89 = VSUB(T2R, T2Y); + STM2(&(xo[126]), T89, ovs, &(xo[2])); + STN2(&(xo[124]), T81, T89, ovs); + T8a = VADD(T2R, T2Y); + STM2(&(xo[2]), T8a, ovs, &(xo[2])); + STN2(&(xo[0]), T7p, T8a, ovs); + } + { + V T37, T38, T8b, T8c; + T37 = VBYI(VADD(T32, T31)); + T38 = VADD(T34, T35); + T8b = VADD(T37, T38); + STM2(&(xo[30]), T8b, ovs, &(xo[2])); + STN2(&(xo[28]), T83, T8b, ovs); + T8c = VSUB(T38, T37); + STM2(&(xo[98]), T8c, ovs, &(xo[2])); + STN2(&(xo[96]), T7q, T8c, ovs); + } + { + V T2Z, T30, T8d, T8e; + T2Z = VSUB(T2J, T2Q); + T30 = VBYI(VSUB(T2X, T2U)); + T8d = VSUB(T2Z, T30); + STM2(&(xo[66]), T8d, ovs, &(xo[2])); + STN2(&(xo[64]), T7n, T8d, ovs); + T8e = VADD(T2Z, T30); + STM2(&(xo[62]), T8e, ovs, &(xo[2])); + STN2(&(xo[60]), T86, T8e, ovs); + } + { + V T33, T36, T8f, T8g; + T33 = VBYI(VSUB(T31, T32)); + T36 = VSUB(T34, T35); + T8f = VADD(T33, T36); + STM2(&(xo[34]), T8f, ovs, &(xo[2])); + STN2(&(xo[32]), T7o, T8f, ovs); + T8g = VSUB(T36, T33); + STM2(&(xo[94]), T8g, ovs, &(xo[2])); + STN2(&(xo[92]), T88, T8g, ovs); + } + } + { + V T3X, T4i, T4b, T4j, T44, T4f, T48, T4g; + { + V T3V, T3W, T49, T4a; + T3V = VSUB(T39, T3a); + T3W = VSUB(T3E, T3D); + T3X = VADD(T3V, T3W); + T4i = VSUB(T3V, T3W); + T49 = VFNMS(LDK(KP471396736), T3Y, VMUL(LDK(KP881921264), T3Z)); + T4a = VFMA(LDK(KP471396736), T41, VMUL(LDK(KP881921264), T42)); + T4b = VADD(T49, T4a); + T4j = VSUB(T4a, T49); + } + { + V T40, T43, T46, T47; + T40 = VFMA(LDK(KP881921264), T3Y, VMUL(LDK(KP471396736), T3Z)); + T43 = VFNMS(LDK(KP471396736), T42, VMUL(LDK(KP881921264), T41)); + T44 = VADD(T40, T43); + T4f = VSUB(T43, T40); + T46 = VSUB(T3B, T3A); + T47 = VSUB(T3h, T3e); + T48 = VADD(T46, T47); + T4g = VSUB(T47, T46); + } + { + V T45, T4c, T8h, T8i; + T45 = VADD(T3X, T44); + T4c = VBYI(VADD(T48, T4b)); + T8h = VSUB(T45, T4c); + STM2(&(xo[118]), T8h, ovs, &(xo[2])); + STN2(&(xo[116]), T7D, T8h, ovs); + T8i = VADD(T45, T4c); + STM2(&(xo[10]), T8i, ovs, &(xo[2])); + STN2(&(xo[8]), T7w, T8i, ovs); + } + { + V T4l, T4m, T8j, T8k; + T4l = VBYI(VADD(T4g, T4f)); + T4m = VADD(T4i, T4j); + T8j = VADD(T4l, T4m); + STM2(&(xo[22]), T8j, ovs, &(xo[2])); + STN2(&(xo[20]), T7F, T8j, ovs); + T8k = VSUB(T4m, T4l); + STM2(&(xo[106]), T8k, ovs, &(xo[2])); + STN2(&(xo[104]), T7y, T8k, ovs); + } + { + V T4d, T4e, T8l, T8m; + T4d = VSUB(T3X, T44); + T4e = VBYI(VSUB(T4b, T48)); + T8l = VSUB(T4d, T4e); + STM2(&(xo[74]), T8l, ovs, &(xo[2])); + STN2(&(xo[72]), T7z, T8l, ovs); + T8m = VADD(T4d, T4e); + STM2(&(xo[54]), T8m, ovs, &(xo[2])); + STN2(&(xo[52]), T7I, T8m, ovs); + } + { + V T4h, T4k, T8n, T8o; + T4h = VBYI(VSUB(T4f, T4g)); + T4k = VSUB(T4i, T4j); + T8n = VADD(T4h, T4k); + STM2(&(xo[42]), T8n, ovs, &(xo[2])); + STN2(&(xo[40]), T7B, T8n, ovs); + T8o = VSUB(T4k, T4h); + STM2(&(xo[86]), T8o, ovs, &(xo[2])); + STN2(&(xo[84]), T7K, T8o, ovs); + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 64, XSIMD_STRING("n2fv_64"), {404, 72, 52, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_64) (planner *p) { + X(kdft_register) (p, n2fv_64, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:46:55 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name n2fv_8 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 26 FP additions, 10 FP multiplications, + * (or, 16 additions, 0 multiplications, 10 fused multiply/add), + * 38 stack variables, 1 constants, and 20 memory accesses + */ +#include "n2f.h" + +static void n2fv_8(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) { + V T1, T2, Tc, Td, T4, T5, T7, T8; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + Tc = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Td = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + T4 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T7 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + { + V T3, Tj, Te, Tk, T6, Tm, T9, Tn, Tp, Tl; + T3 = VSUB(T1, T2); + Tj = VADD(T1, T2); + Te = VSUB(Tc, Td); + Tk = VADD(Tc, Td); + T6 = VSUB(T4, T5); + Tm = VADD(T4, T5); + T9 = VSUB(T7, T8); + Tn = VADD(T7, T8); + Tp = VSUB(Tj, Tk); + Tl = VADD(Tj, Tk); + { + V Tq, To, Ta, Tf; + Tq = VSUB(Tn, Tm); + To = VADD(Tm, Tn); + Ta = VADD(T6, T9); + Tf = VSUB(T9, T6); + { + V Tr, Ts, Tt, Tu, Tg, Ti, Tb, Th; + Tr = VADD(Tl, To); + STM2(&(xo[0]), Tr, ovs, &(xo[0])); + Ts = VSUB(Tl, To); + STM2(&(xo[8]), Ts, ovs, &(xo[0])); + Tt = VFMAI(Tq, Tp); + STM2(&(xo[4]), Tt, ovs, &(xo[0])); + Tu = VFNMSI(Tq, Tp); + STM2(&(xo[12]), Tu, ovs, &(xo[0])); + Tg = VFNMS(LDK(KP707106781), Tf, Te); + Ti = VFMA(LDK(KP707106781), Tf, Te); + Tb = VFMA(LDK(KP707106781), Ta, T3); + Th = VFNMS(LDK(KP707106781), Ta, T3); + { + V Tv, Tw, Tx, Ty; + Tv = VFMAI(Ti, Th); + STM2(&(xo[6]), Tv, ovs, &(xo[2])); + STN2(&(xo[4]), Tt, Tv, ovs); + Tw = VFNMSI(Ti, Th); + STM2(&(xo[10]), Tw, ovs, &(xo[2])); + STN2(&(xo[8]), Ts, Tw, ovs); + Tx = VFMAI(Tg, Tb); + STM2(&(xo[14]), Tx, ovs, &(xo[2])); + STN2(&(xo[12]), Tu, Tx, ovs); + Ty = VFNMSI(Tg, Tb); + STM2(&(xo[2]), Ty, ovs, &(xo[2])); + STN2(&(xo[0]), Tr, Ty, ovs); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 8, XSIMD_STRING("n2fv_8"), {16, 0, 10, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_8) (planner *p) { + X(kdft_register) (p, n2fv_8, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name n2fv_8 -with-ostride 2 -include n2f.h -store-multiple 2 */ + +/* + * This function contains 26 FP additions, 2 FP multiplications, + * (or, 26 additions, 2 multiplications, 0 fused multiply/add), + * 24 stack variables, 1 constants, and 20 memory accesses + */ +#include "n2f.h" + +static void n2fv_8(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + const R *xi; + R *xo; + xi = ri; + xo = ro; + for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) { + V T3, Tj, Tf, Tk, Ta, Tn, Tc, Tm, Ts, Tu; + { + V T1, T2, Td, Te; + T1 = LD(&(xi[0]), ivs, &(xi[0])); + T2 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); + T3 = VSUB(T1, T2); + Tj = VADD(T1, T2); + Td = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); + Te = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); + Tf = VSUB(Td, Te); + Tk = VADD(Td, Te); + { + V T4, T5, T6, T7, T8, T9; + T4 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); + T5 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); + T6 = VSUB(T4, T5); + T7 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); + T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); + T9 = VSUB(T7, T8); + Ta = VMUL(LDK(KP707106781), VADD(T6, T9)); + Tn = VADD(T7, T8); + Tc = VMUL(LDK(KP707106781), VSUB(T9, T6)); + Tm = VADD(T4, T5); + } + } + { + V Tr, Tb, Tg, Tp, Tq, Tt; + Tb = VADD(T3, Ta); + Tg = VBYI(VSUB(Tc, Tf)); + Tr = VSUB(Tb, Tg); + STM2(&(xo[14]), Tr, ovs, &(xo[2])); + Ts = VADD(Tb, Tg); + STM2(&(xo[2]), Ts, ovs, &(xo[2])); + Tp = VSUB(Tj, Tk); + Tq = VBYI(VSUB(Tn, Tm)); + Tt = VSUB(Tp, Tq); + STM2(&(xo[12]), Tt, ovs, &(xo[0])); + STN2(&(xo[12]), Tt, Tr, ovs); + Tu = VADD(Tp, Tq); + STM2(&(xo[4]), Tu, ovs, &(xo[0])); + } + { + V Tv, Th, Ti, Tw; + Th = VSUB(T3, Ta); + Ti = VBYI(VADD(Tf, Tc)); + Tv = VSUB(Th, Ti); + STM2(&(xo[10]), Tv, ovs, &(xo[2])); + Tw = VADD(Th, Ti); + STM2(&(xo[6]), Tw, ovs, &(xo[2])); + STN2(&(xo[4]), Tu, Tw, ovs); + { + V Tl, To, Tx, Ty; + Tl = VADD(Tj, Tk); + To = VADD(Tm, Tn); + Tx = VSUB(Tl, To); + STM2(&(xo[8]), Tx, ovs, &(xo[0])); + STN2(&(xo[8]), Tx, Tv, ovs); + Ty = VADD(Tl, To); + STM2(&(xo[0]), Ty, ovs, &(xo[0])); + STN2(&(xo[0]), Ty, Ts, ovs); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 8, XSIMD_STRING("n2fv_8"), {26, 2, 0, 0}, &GENUS, 0, 2, 0, 0 }; + +void XSIMD(codelet_n2fv_8) (planner *p) { + X(kdft_register) (p, n2fv_8, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2sv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2sv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,653 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:03 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name n2sv_16 -with-ostride 1 -include n2s.h -store-multiple 4 */ + +/* + * This function contains 144 FP additions, 40 FP multiplications, + * (or, 104 additions, 0 multiplications, 40 fused multiply/add), + * 110 stack variables, 3 constants, and 72 memory accesses + */ +#include "n2s.h" + +static void n2sv_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT i; + for (i = v; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * ivs), ii = ii + ((2 * VL) * ivs), ro = ro + ((2 * VL) * ovs), io = io + ((2 * VL) * ovs), MAKE_VOLATILE_STRIDE(64, is), MAKE_VOLATILE_STRIDE(64, os)) { + V T2p, T2q, T2r, T2s, T2x, T2y, T2z, T2A, T1M, T1N, T1L, T1P, T2F, T2G, T2H; + V T2I, T1O, T1Q; + { + V T1l, T1H, T1R, T7, T1x, TN, TC, T25, T1E, T1b, T1Z, Tt, T2h, T22, T1D; + V T1g, T1n, TQ, T11, Ti, Te, T26, T1m, TT, T1S, TJ, TZ, T1V, TW, Tl; + V T12, T13; + { + V Tq, T1c, Tp, T20, T1a, Tr, T1d, T1e; + { + V T1, T2, Tw, Tx, T4, T5, Tz, TA; + T1 = LD(&(ri[0]), ivs, &(ri[0])); + T2 = LD(&(ri[WS(is, 8)]), ivs, &(ri[0])); + Tw = LD(&(ii[0]), ivs, &(ii[0])); + Tx = LD(&(ii[WS(is, 8)]), ivs, &(ii[0])); + T4 = LD(&(ri[WS(is, 4)]), ivs, &(ri[0])); + T5 = LD(&(ri[WS(is, 12)]), ivs, &(ri[0])); + Tz = LD(&(ii[WS(is, 4)]), ivs, &(ii[0])); + TA = LD(&(ii[WS(is, 12)]), ivs, &(ii[0])); + { + V Tn, TL, T3, T1k, Ty, T1j, T6, TM, TB, To, T18, T19; + Tn = LD(&(ri[WS(is, 15)]), ivs, &(ri[WS(is, 1)])); + TL = VSUB(T1, T2); + T3 = VADD(T1, T2); + T1k = VSUB(Tw, Tx); + Ty = VADD(Tw, Tx); + T1j = VSUB(T4, T5); + T6 = VADD(T4, T5); + TM = VSUB(Tz, TA); + TB = VADD(Tz, TA); + To = LD(&(ri[WS(is, 7)]), ivs, &(ri[WS(is, 1)])); + T18 = LD(&(ii[WS(is, 15)]), ivs, &(ii[WS(is, 1)])); + T19 = LD(&(ii[WS(is, 7)]), ivs, &(ii[WS(is, 1)])); + Tq = LD(&(ri[WS(is, 3)]), ivs, &(ri[WS(is, 1)])); + T1l = VADD(T1j, T1k); + T1H = VSUB(T1k, T1j); + T1R = VSUB(T3, T6); + T7 = VADD(T3, T6); + T1x = VADD(TL, TM); + TN = VSUB(TL, TM); + TC = VADD(Ty, TB); + T25 = VSUB(Ty, TB); + T1c = VSUB(Tn, To); + Tp = VADD(Tn, To); + T20 = VADD(T18, T19); + T1a = VSUB(T18, T19); + Tr = LD(&(ri[WS(is, 11)]), ivs, &(ri[WS(is, 1)])); + T1d = LD(&(ii[WS(is, 3)]), ivs, &(ii[WS(is, 1)])); + T1e = LD(&(ii[WS(is, 11)]), ivs, &(ii[WS(is, 1)])); + } + } + { + V Tb, Ta, TF, Tc, TG, TH, TP, TO; + { + V T8, T9, TD, TE; + T8 = LD(&(ri[WS(is, 2)]), ivs, &(ri[0])); + T9 = LD(&(ri[WS(is, 10)]), ivs, &(ri[0])); + TD = LD(&(ii[WS(is, 2)]), ivs, &(ii[0])); + TE = LD(&(ii[WS(is, 10)]), ivs, &(ii[0])); + Tb = LD(&(ri[WS(is, 14)]), ivs, &(ri[0])); + { + V T17, Ts, T21, T1f; + T17 = VSUB(Tq, Tr); + Ts = VADD(Tq, Tr); + T21 = VADD(T1d, T1e); + T1f = VSUB(T1d, T1e); + TP = VSUB(T8, T9); + Ta = VADD(T8, T9); + TO = VSUB(TD, TE); + TF = VADD(TD, TE); + T1E = VSUB(T1a, T17); + T1b = VADD(T17, T1a); + T1Z = VSUB(Tp, Ts); + Tt = VADD(Tp, Ts); + T2h = VADD(T20, T21); + T22 = VSUB(T20, T21); + T1D = VADD(T1c, T1f); + T1g = VSUB(T1c, T1f); + Tc = LD(&(ri[WS(is, 6)]), ivs, &(ri[0])); + } + TG = LD(&(ii[WS(is, 14)]), ivs, &(ii[0])); + TH = LD(&(ii[WS(is, 6)]), ivs, &(ii[0])); + } + T1n = VADD(TP, TO); + TQ = VSUB(TO, TP); + { + V Tg, Th, TX, TR, Td, TS, TI, TY, Tj, Tk; + Tg = LD(&(ri[WS(is, 1)]), ivs, &(ri[WS(is, 1)])); + Th = LD(&(ri[WS(is, 9)]), ivs, &(ri[WS(is, 1)])); + TX = LD(&(ii[WS(is, 1)]), ivs, &(ii[WS(is, 1)])); + TR = VSUB(Tb, Tc); + Td = VADD(Tb, Tc); + TS = VSUB(TG, TH); + TI = VADD(TG, TH); + TY = LD(&(ii[WS(is, 9)]), ivs, &(ii[WS(is, 1)])); + Tj = LD(&(ri[WS(is, 5)]), ivs, &(ri[WS(is, 1)])); + T11 = VSUB(Tg, Th); + Ti = VADD(Tg, Th); + Tk = LD(&(ri[WS(is, 13)]), ivs, &(ri[WS(is, 1)])); + Te = VADD(Ta, Td); + T26 = VSUB(Td, Ta); + T1m = VSUB(TR, TS); + TT = VADD(TR, TS); + T1S = VSUB(TF, TI); + TJ = VADD(TF, TI); + TZ = VSUB(TX, TY); + T1V = VADD(TX, TY); + TW = VSUB(Tj, Tk); + Tl = VADD(Tj, Tk); + T12 = LD(&(ii[WS(is, 5)]), ivs, &(ii[WS(is, 1)])); + T13 = LD(&(ii[WS(is, 13)]), ivs, &(ii[WS(is, 1)])); + } + } + } + { + V T2f, Tf, T2j, TK, Tm, T1U, T10, T1B, T14, T1W; + T2f = VSUB(T7, Te); + Tf = VADD(T7, Te); + T2j = VADD(TC, TJ); + TK = VSUB(TC, TJ); + Tm = VADD(Ti, Tl); + T1U = VSUB(Ti, Tl); + T10 = VADD(TW, TZ); + T1B = VSUB(TZ, TW); + T14 = VSUB(T12, T13); + T1W = VADD(T12, T13); + { + V T29, T1T, T27, T2d, T2b, T23, T15, T1A, T2l, T2m, T2n, T2o, T2i, T2k, T1Y; + V T2a; + { + V Tv, Tu, T1X, T2g; + T29 = VSUB(T1R, T1S); + T1T = VADD(T1R, T1S); + T27 = VSUB(T25, T26); + T2d = VADD(T26, T25); + T2b = VADD(T1Z, T22); + T23 = VSUB(T1Z, T22); + Tv = VSUB(Tt, Tm); + Tu = VADD(Tm, Tt); + T1X = VSUB(T1V, T1W); + T2g = VADD(T1V, T1W); + T15 = VSUB(T11, T14); + T1A = VADD(T11, T14); + T2l = VSUB(TK, Tv); + STM4(&(io[12]), T2l, ovs, &(io[0])); + T2m = VADD(Tv, TK); + STM4(&(io[4]), T2m, ovs, &(io[0])); + T2n = VADD(Tf, Tu); + STM4(&(ro[0]), T2n, ovs, &(ro[0])); + T2o = VSUB(Tf, Tu); + STM4(&(ro[8]), T2o, ovs, &(ro[0])); + T2i = VSUB(T2g, T2h); + T2k = VADD(T2g, T2h); + T1Y = VADD(T1U, T1X); + T2a = VSUB(T1X, T1U); + } + { + V T1I, T1y, T1t, T16, T1v, TV, T1r, T1p, T2t, T2u, T2v, T2w, T1h, T1s, TU; + V T1o; + T1I = VADD(TQ, TT); + TU = VSUB(TQ, TT); + T1o = VSUB(T1m, T1n); + T1y = VADD(T1n, T1m); + T1t = VFNMS(LDK(KP414213562), T10, T15); + T16 = VFMA(LDK(KP414213562), T15, T10); + T2p = VADD(T2f, T2i); + STM4(&(ro[4]), T2p, ovs, &(ro[0])); + T2q = VSUB(T2f, T2i); + STM4(&(ro[12]), T2q, ovs, &(ro[0])); + T2r = VADD(T2j, T2k); + STM4(&(io[0]), T2r, ovs, &(io[0])); + T2s = VSUB(T2j, T2k); + STM4(&(io[8]), T2s, ovs, &(io[0])); + { + V T28, T24, T2e, T2c; + T28 = VSUB(T23, T1Y); + T24 = VADD(T1Y, T23); + T2e = VADD(T2a, T2b); + T2c = VSUB(T2a, T2b); + T1v = VFNMS(LDK(KP707106781), TU, TN); + TV = VFMA(LDK(KP707106781), TU, TN); + T1r = VFMA(LDK(KP707106781), T1o, T1l); + T1p = VFNMS(LDK(KP707106781), T1o, T1l); + T2t = VFNMS(LDK(KP707106781), T28, T27); + STM4(&(io[14]), T2t, ovs, &(io[0])); + T2u = VFMA(LDK(KP707106781), T28, T27); + STM4(&(io[6]), T2u, ovs, &(io[0])); + T2v = VFMA(LDK(KP707106781), T24, T1T); + STM4(&(ro[2]), T2v, ovs, &(ro[0])); + T2w = VFNMS(LDK(KP707106781), T24, T1T); + STM4(&(ro[10]), T2w, ovs, &(ro[0])); + T2x = VFNMS(LDK(KP707106781), T2e, T2d); + STM4(&(io[10]), T2x, ovs, &(io[0])); + T2y = VFMA(LDK(KP707106781), T2e, T2d); + STM4(&(io[2]), T2y, ovs, &(io[0])); + T2z = VFMA(LDK(KP707106781), T2c, T29); + STM4(&(ro[6]), T2z, ovs, &(ro[0])); + T2A = VFNMS(LDK(KP707106781), T2c, T29); + STM4(&(ro[14]), T2A, ovs, &(ro[0])); + T1h = VFNMS(LDK(KP414213562), T1g, T1b); + T1s = VFMA(LDK(KP414213562), T1b, T1g); + } + { + V T1z, T1J, T1K, T1G, T2B, T2C, T2D, T2E, T1C, T1F; + T1M = VFNMS(LDK(KP414213562), T1A, T1B); + T1C = VFMA(LDK(KP414213562), T1B, T1A); + T1F = VFNMS(LDK(KP414213562), T1E, T1D); + T1N = VFMA(LDK(KP414213562), T1D, T1E); + { + V T1q, T1i, T1w, T1u; + T1q = VADD(T16, T1h); + T1i = VSUB(T16, T1h); + T1w = VADD(T1t, T1s); + T1u = VSUB(T1s, T1t); + T1L = VFNMS(LDK(KP707106781), T1y, T1x); + T1z = VFMA(LDK(KP707106781), T1y, T1x); + T1P = VFMA(LDK(KP707106781), T1I, T1H); + T1J = VFNMS(LDK(KP707106781), T1I, T1H); + T1K = VSUB(T1F, T1C); + T1G = VADD(T1C, T1F); + T2B = VFMA(LDK(KP923879532), T1q, T1p); + STM4(&(io[15]), T2B, ovs, &(io[1])); + T2C = VFNMS(LDK(KP923879532), T1q, T1p); + STM4(&(io[7]), T2C, ovs, &(io[1])); + T2D = VFMA(LDK(KP923879532), T1i, TV); + STM4(&(ro[3]), T2D, ovs, &(ro[1])); + T2E = VFNMS(LDK(KP923879532), T1i, TV); + STM4(&(ro[11]), T2E, ovs, &(ro[1])); + T2F = VFMA(LDK(KP923879532), T1w, T1v); + STM4(&(ro[15]), T2F, ovs, &(ro[1])); + T2G = VFNMS(LDK(KP923879532), T1w, T1v); + STM4(&(ro[7]), T2G, ovs, &(ro[1])); + T2H = VFMA(LDK(KP923879532), T1u, T1r); + STM4(&(io[3]), T2H, ovs, &(io[1])); + T2I = VFNMS(LDK(KP923879532), T1u, T1r); + STM4(&(io[11]), T2I, ovs, &(io[1])); + } + { + V T2J, T2K, T2L, T2M; + T2J = VFNMS(LDK(KP923879532), T1G, T1z); + STM4(&(ro[9]), T2J, ovs, &(ro[1])); + STN4(&(ro[8]), T2o, T2J, T2w, T2E, ovs); + T2K = VFMA(LDK(KP923879532), T1G, T1z); + STM4(&(ro[1]), T2K, ovs, &(ro[1])); + STN4(&(ro[0]), T2n, T2K, T2v, T2D, ovs); + T2L = VFNMS(LDK(KP923879532), T1K, T1J); + STM4(&(io[13]), T2L, ovs, &(io[1])); + STN4(&(io[12]), T2l, T2L, T2t, T2B, ovs); + T2M = VFMA(LDK(KP923879532), T1K, T1J); + STM4(&(io[5]), T2M, ovs, &(io[1])); + STN4(&(io[4]), T2m, T2M, T2u, T2C, ovs); + } + } + } + } + } + } + T1O = VSUB(T1M, T1N); + T1Q = VADD(T1M, T1N); + { + V T2N, T2O, T2P, T2Q; + T2N = VFMA(LDK(KP923879532), T1Q, T1P); + STM4(&(io[1]), T2N, ovs, &(io[1])); + STN4(&(io[0]), T2r, T2N, T2y, T2H, ovs); + T2O = VFNMS(LDK(KP923879532), T1Q, T1P); + STM4(&(io[9]), T2O, ovs, &(io[1])); + STN4(&(io[8]), T2s, T2O, T2x, T2I, ovs); + T2P = VFMA(LDK(KP923879532), T1O, T1L); + STM4(&(ro[5]), T2P, ovs, &(ro[1])); + STN4(&(ro[4]), T2p, T2P, T2z, T2G, ovs); + T2Q = VFNMS(LDK(KP923879532), T1O, T1L); + STM4(&(ro[13]), T2Q, ovs, &(ro[1])); + STN4(&(ro[12]), T2q, T2Q, T2A, T2F, ovs); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 16, XSIMD_STRING("n2sv_16"), {104, 0, 40, 0}, &GENUS, 0, 1, 0, 0 }; + +void XSIMD(codelet_n2sv_16) (planner *p) { + X(kdft_register) (p, n2sv_16, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name n2sv_16 -with-ostride 1 -include n2s.h -store-multiple 4 */ + +/* + * This function contains 144 FP additions, 24 FP multiplications, + * (or, 136 additions, 16 multiplications, 8 fused multiply/add), + * 74 stack variables, 3 constants, and 72 memory accesses + */ +#include "n2s.h" + +static void n2sv_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * ivs), ii = ii + ((2 * VL) * ivs), ro = ro + ((2 * VL) * ovs), io = io + ((2 * VL) * ovs), MAKE_VOLATILE_STRIDE(64, is), MAKE_VOLATILE_STRIDE(64, os)) { + V T7, T1R, T25, TC, TN, T1x, T1H, T1l, Tt, T22, T2h, T1b, T1g, T1E, T1Z; + V T1D, Te, T1S, T26, TJ, TQ, T1m, T1n, TT, Tm, T1X, T2g, T10, T15, T1B; + V T1U, T1A; + { + V T3, TL, Ty, T1k, T6, T1j, TB, TM; + { + V T1, T2, Tw, Tx; + T1 = LD(&(ri[0]), ivs, &(ri[0])); + T2 = LD(&(ri[WS(is, 8)]), ivs, &(ri[0])); + T3 = VADD(T1, T2); + TL = VSUB(T1, T2); + Tw = LD(&(ii[0]), ivs, &(ii[0])); + Tx = LD(&(ii[WS(is, 8)]), ivs, &(ii[0])); + Ty = VADD(Tw, Tx); + T1k = VSUB(Tw, Tx); + } + { + V T4, T5, Tz, TA; + T4 = LD(&(ri[WS(is, 4)]), ivs, &(ri[0])); + T5 = LD(&(ri[WS(is, 12)]), ivs, &(ri[0])); + T6 = VADD(T4, T5); + T1j = VSUB(T4, T5); + Tz = LD(&(ii[WS(is, 4)]), ivs, &(ii[0])); + TA = LD(&(ii[WS(is, 12)]), ivs, &(ii[0])); + TB = VADD(Tz, TA); + TM = VSUB(Tz, TA); + } + T7 = VADD(T3, T6); + T1R = VSUB(T3, T6); + T25 = VSUB(Ty, TB); + TC = VADD(Ty, TB); + TN = VSUB(TL, TM); + T1x = VADD(TL, TM); + T1H = VSUB(T1k, T1j); + T1l = VADD(T1j, T1k); + } + { + V Tp, T17, T1f, T20, Ts, T1c, T1a, T21; + { + V Tn, To, T1d, T1e; + Tn = LD(&(ri[WS(is, 15)]), ivs, &(ri[WS(is, 1)])); + To = LD(&(ri[WS(is, 7)]), ivs, &(ri[WS(is, 1)])); + Tp = VADD(Tn, To); + T17 = VSUB(Tn, To); + T1d = LD(&(ii[WS(is, 15)]), ivs, &(ii[WS(is, 1)])); + T1e = LD(&(ii[WS(is, 7)]), ivs, &(ii[WS(is, 1)])); + T1f = VSUB(T1d, T1e); + T20 = VADD(T1d, T1e); + } + { + V Tq, Tr, T18, T19; + Tq = LD(&(ri[WS(is, 3)]), ivs, &(ri[WS(is, 1)])); + Tr = LD(&(ri[WS(is, 11)]), ivs, &(ri[WS(is, 1)])); + Ts = VADD(Tq, Tr); + T1c = VSUB(Tq, Tr); + T18 = LD(&(ii[WS(is, 3)]), ivs, &(ii[WS(is, 1)])); + T19 = LD(&(ii[WS(is, 11)]), ivs, &(ii[WS(is, 1)])); + T1a = VSUB(T18, T19); + T21 = VADD(T18, T19); + } + Tt = VADD(Tp, Ts); + T22 = VSUB(T20, T21); + T2h = VADD(T20, T21); + T1b = VSUB(T17, T1a); + T1g = VADD(T1c, T1f); + T1E = VSUB(T1f, T1c); + T1Z = VSUB(Tp, Ts); + T1D = VADD(T17, T1a); + } + { + V Ta, TP, TF, TO, Td, TR, TI, TS; + { + V T8, T9, TD, TE; + T8 = LD(&(ri[WS(is, 2)]), ivs, &(ri[0])); + T9 = LD(&(ri[WS(is, 10)]), ivs, &(ri[0])); + Ta = VADD(T8, T9); + TP = VSUB(T8, T9); + TD = LD(&(ii[WS(is, 2)]), ivs, &(ii[0])); + TE = LD(&(ii[WS(is, 10)]), ivs, &(ii[0])); + TF = VADD(TD, TE); + TO = VSUB(TD, TE); + } + { + V Tb, Tc, TG, TH; + Tb = LD(&(ri[WS(is, 14)]), ivs, &(ri[0])); + Tc = LD(&(ri[WS(is, 6)]), ivs, &(ri[0])); + Td = VADD(Tb, Tc); + TR = VSUB(Tb, Tc); + TG = LD(&(ii[WS(is, 14)]), ivs, &(ii[0])); + TH = LD(&(ii[WS(is, 6)]), ivs, &(ii[0])); + TI = VADD(TG, TH); + TS = VSUB(TG, TH); + } + Te = VADD(Ta, Td); + T1S = VSUB(TF, TI); + T26 = VSUB(Td, Ta); + TJ = VADD(TF, TI); + TQ = VSUB(TO, TP); + T1m = VSUB(TR, TS); + T1n = VADD(TP, TO); + TT = VADD(TR, TS); + } + { + V Ti, T11, TZ, T1V, Tl, TW, T14, T1W; + { + V Tg, Th, TX, TY; + Tg = LD(&(ri[WS(is, 1)]), ivs, &(ri[WS(is, 1)])); + Th = LD(&(ri[WS(is, 9)]), ivs, &(ri[WS(is, 1)])); + Ti = VADD(Tg, Th); + T11 = VSUB(Tg, Th); + TX = LD(&(ii[WS(is, 1)]), ivs, &(ii[WS(is, 1)])); + TY = LD(&(ii[WS(is, 9)]), ivs, &(ii[WS(is, 1)])); + TZ = VSUB(TX, TY); + T1V = VADD(TX, TY); + } + { + V Tj, Tk, T12, T13; + Tj = LD(&(ri[WS(is, 5)]), ivs, &(ri[WS(is, 1)])); + Tk = LD(&(ri[WS(is, 13)]), ivs, &(ri[WS(is, 1)])); + Tl = VADD(Tj, Tk); + TW = VSUB(Tj, Tk); + T12 = LD(&(ii[WS(is, 5)]), ivs, &(ii[WS(is, 1)])); + T13 = LD(&(ii[WS(is, 13)]), ivs, &(ii[WS(is, 1)])); + T14 = VSUB(T12, T13); + T1W = VADD(T12, T13); + } + Tm = VADD(Ti, Tl); + T1X = VSUB(T1V, T1W); + T2g = VADD(T1V, T1W); + T10 = VADD(TW, TZ); + T15 = VSUB(T11, T14); + T1B = VADD(T11, T14); + T1U = VSUB(Ti, Tl); + T1A = VSUB(TZ, TW); + } + { + V T2l, T2m, T2n, T2o, T2p, T2q, T2r, T2s; + { + V Tf, Tu, T2j, T2k; + Tf = VADD(T7, Te); + Tu = VADD(Tm, Tt); + T2l = VSUB(Tf, Tu); + STM4(&(ro[8]), T2l, ovs, &(ro[0])); + T2m = VADD(Tf, Tu); + STM4(&(ro[0]), T2m, ovs, &(ro[0])); + T2j = VADD(TC, TJ); + T2k = VADD(T2g, T2h); + T2n = VSUB(T2j, T2k); + STM4(&(io[8]), T2n, ovs, &(io[0])); + T2o = VADD(T2j, T2k); + STM4(&(io[0]), T2o, ovs, &(io[0])); + } + { + V Tv, TK, T2f, T2i; + Tv = VSUB(Tt, Tm); + TK = VSUB(TC, TJ); + T2p = VADD(Tv, TK); + STM4(&(io[4]), T2p, ovs, &(io[0])); + T2q = VSUB(TK, Tv); + STM4(&(io[12]), T2q, ovs, &(io[0])); + T2f = VSUB(T7, Te); + T2i = VSUB(T2g, T2h); + T2r = VSUB(T2f, T2i); + STM4(&(ro[12]), T2r, ovs, &(ro[0])); + T2s = VADD(T2f, T2i); + STM4(&(ro[4]), T2s, ovs, &(ro[0])); + } + { + V T2t, T2u, T2v, T2w, T2x, T2y, T2z, T2A; + { + V T1T, T27, T24, T28, T1Y, T23; + T1T = VADD(T1R, T1S); + T27 = VSUB(T25, T26); + T1Y = VADD(T1U, T1X); + T23 = VSUB(T1Z, T22); + T24 = VMUL(LDK(KP707106781), VADD(T1Y, T23)); + T28 = VMUL(LDK(KP707106781), VSUB(T23, T1Y)); + T2t = VSUB(T1T, T24); + STM4(&(ro[10]), T2t, ovs, &(ro[0])); + T2u = VADD(T27, T28); + STM4(&(io[6]), T2u, ovs, &(io[0])); + T2v = VADD(T1T, T24); + STM4(&(ro[2]), T2v, ovs, &(ro[0])); + T2w = VSUB(T27, T28); + STM4(&(io[14]), T2w, ovs, &(io[0])); + } + { + V T29, T2d, T2c, T2e, T2a, T2b; + T29 = VSUB(T1R, T1S); + T2d = VADD(T26, T25); + T2a = VSUB(T1X, T1U); + T2b = VADD(T1Z, T22); + T2c = VMUL(LDK(KP707106781), VSUB(T2a, T2b)); + T2e = VMUL(LDK(KP707106781), VADD(T2a, T2b)); + T2x = VSUB(T29, T2c); + STM4(&(ro[14]), T2x, ovs, &(ro[0])); + T2y = VADD(T2d, T2e); + STM4(&(io[2]), T2y, ovs, &(io[0])); + T2z = VADD(T29, T2c); + STM4(&(ro[6]), T2z, ovs, &(ro[0])); + T2A = VSUB(T2d, T2e); + STM4(&(io[10]), T2A, ovs, &(io[0])); + } + { + V T2B, T2C, T2D, T2E, T2F, T2G, T2H, T2I; + { + V TV, T1r, T1p, T1v, T1i, T1q, T1u, T1w, TU, T1o; + TU = VMUL(LDK(KP707106781), VSUB(TQ, TT)); + TV = VADD(TN, TU); + T1r = VSUB(TN, TU); + T1o = VMUL(LDK(KP707106781), VSUB(T1m, T1n)); + T1p = VSUB(T1l, T1o); + T1v = VADD(T1l, T1o); + { + V T16, T1h, T1s, T1t; + T16 = VFMA(LDK(KP923879532), T10, VMUL(LDK(KP382683432), T15)); + T1h = VFNMS(LDK(KP923879532), T1g, VMUL(LDK(KP382683432), T1b)); + T1i = VADD(T16, T1h); + T1q = VSUB(T1h, T16); + T1s = VFNMS(LDK(KP923879532), T15, VMUL(LDK(KP382683432), T10)); + T1t = VFMA(LDK(KP382683432), T1g, VMUL(LDK(KP923879532), T1b)); + T1u = VSUB(T1s, T1t); + T1w = VADD(T1s, T1t); + } + T2B = VSUB(TV, T1i); + STM4(&(ro[11]), T2B, ovs, &(ro[1])); + T2C = VSUB(T1v, T1w); + STM4(&(io[11]), T2C, ovs, &(io[1])); + T2D = VADD(TV, T1i); + STM4(&(ro[3]), T2D, ovs, &(ro[1])); + T2E = VADD(T1v, T1w); + STM4(&(io[3]), T2E, ovs, &(io[1])); + T2F = VSUB(T1p, T1q); + STM4(&(io[15]), T2F, ovs, &(io[1])); + T2G = VSUB(T1r, T1u); + STM4(&(ro[15]), T2G, ovs, &(ro[1])); + T2H = VADD(T1p, T1q); + STM4(&(io[7]), T2H, ovs, &(io[1])); + T2I = VADD(T1r, T1u); + STM4(&(ro[7]), T2I, ovs, &(ro[1])); + } + { + V T1z, T1L, T1J, T1P, T1G, T1K, T1O, T1Q, T1y, T1I; + T1y = VMUL(LDK(KP707106781), VADD(T1n, T1m)); + T1z = VADD(T1x, T1y); + T1L = VSUB(T1x, T1y); + T1I = VMUL(LDK(KP707106781), VADD(TQ, TT)); + T1J = VSUB(T1H, T1I); + T1P = VADD(T1H, T1I); + { + V T1C, T1F, T1M, T1N; + T1C = VFMA(LDK(KP382683432), T1A, VMUL(LDK(KP923879532), T1B)); + T1F = VFNMS(LDK(KP382683432), T1E, VMUL(LDK(KP923879532), T1D)); + T1G = VADD(T1C, T1F); + T1K = VSUB(T1F, T1C); + T1M = VFNMS(LDK(KP382683432), T1B, VMUL(LDK(KP923879532), T1A)); + T1N = VFMA(LDK(KP923879532), T1E, VMUL(LDK(KP382683432), T1D)); + T1O = VSUB(T1M, T1N); + T1Q = VADD(T1M, T1N); + } + { + V T2J, T2K, T2L, T2M; + T2J = VSUB(T1z, T1G); + STM4(&(ro[9]), T2J, ovs, &(ro[1])); + STN4(&(ro[8]), T2l, T2J, T2t, T2B, ovs); + T2K = VSUB(T1P, T1Q); + STM4(&(io[9]), T2K, ovs, &(io[1])); + STN4(&(io[8]), T2n, T2K, T2A, T2C, ovs); + T2L = VADD(T1z, T1G); + STM4(&(ro[1]), T2L, ovs, &(ro[1])); + STN4(&(ro[0]), T2m, T2L, T2v, T2D, ovs); + T2M = VADD(T1P, T1Q); + STM4(&(io[1]), T2M, ovs, &(io[1])); + STN4(&(io[0]), T2o, T2M, T2y, T2E, ovs); + } + { + V T2N, T2O, T2P, T2Q; + T2N = VSUB(T1J, T1K); + STM4(&(io[13]), T2N, ovs, &(io[1])); + STN4(&(io[12]), T2q, T2N, T2w, T2F, ovs); + T2O = VSUB(T1L, T1O); + STM4(&(ro[13]), T2O, ovs, &(ro[1])); + STN4(&(ro[12]), T2r, T2O, T2x, T2G, ovs); + T2P = VADD(T1J, T1K); + STM4(&(io[5]), T2P, ovs, &(io[1])); + STN4(&(io[4]), T2p, T2P, T2u, T2H, ovs); + T2Q = VADD(T1L, T1O); + STM4(&(ro[5]), T2Q, ovs, &(ro[1])); + STN4(&(ro[4]), T2s, T2Q, T2z, T2I, ovs); + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 16, XSIMD_STRING("n2sv_16"), {136, 16, 8, 0}, &GENUS, 0, 1, 0, 0 }; + +void XSIMD(codelet_n2sv_16) (planner *p) { + X(kdft_register) (p, n2sv_16, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2sv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2sv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1453 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:05 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name n2sv_32 -with-ostride 1 -include n2s.h -store-multiple 4 */ + +/* + * This function contains 372 FP additions, 136 FP multiplications, + * (or, 236 additions, 0 multiplications, 136 fused multiply/add), + * 194 stack variables, 7 constants, and 144 memory accesses + */ +#include "n2s.h" + +static void n2sv_32(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT i; + for (i = v; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * ivs), ii = ii + ((2 * VL) * ivs), ro = ro + ((2 * VL) * ovs), io = io + ((2 * VL) * ovs), MAKE_VOLATILE_STRIDE(128, is), MAKE_VOLATILE_STRIDE(128, os)) { + V T61, T62, T63, T64, T65, T66, T67, T68, T69, T6a, T6b, T6c, T6d, T6e, T6f; + V T6g, T6h, T6i, T6j, T6k, T6l, T6m, T6n, T6o, T6p, T6q, T6r, T6s, T6t, T6u; + V T6v, T6w, T3g, T3f, T6x, T6y, T6z, T6A, T6B, T6C, T6D, T6E, T4p, T49, T4l; + V T4j, T6F, T6G, T6H, T6I, T6J, T6K, T6L, T6M, T3n, T3b, T3r, T3l, T3o, T3e; + V T4q, T4o, T4k, T4g, T3h, T3p; + { + V T2T, T3T, T4r, T7, T3t, T1z, T18, T4Z, Te, T50, T1f, T4s, T1G, T3U, T2W; + V T3u, Tm, T1n, T3X, T3y, T2Z, T1O, T53, T4w, Tt, T1u, T3W, T3B, T2Y, T1V; + V T52, T4z, T3O, T2t, T3L, T2K, TZ, T5F, T4R, T5k, T5j, T4W, T5I, T5X, T2E; + V T3M, T2N, T3P, T3H, T22, T3E, T2j, T4G, T5h, TK, T5A, T5D, T5W, T2d, T3F; + V T4L, T5g, T3I, T2m; + { + V T1L, T1j, T1k, T1l, T4v, T1K, T3w; + { + V T1, T2, T12, T13, T4, T5, T15, T16; + T1 = LD(&(ri[0]), ivs, &(ri[0])); + T2 = LD(&(ri[WS(is, 16)]), ivs, &(ri[0])); + T12 = LD(&(ii[0]), ivs, &(ii[0])); + T13 = LD(&(ii[WS(is, 16)]), ivs, &(ii[0])); + T4 = LD(&(ri[WS(is, 8)]), ivs, &(ri[0])); + T5 = LD(&(ri[WS(is, 24)]), ivs, &(ri[0])); + T15 = LD(&(ii[WS(is, 8)]), ivs, &(ii[0])); + T16 = LD(&(ii[WS(is, 24)]), ivs, &(ii[0])); + { + V Tb, T1A, Ta, T1B, T1b, Tc, T1c, T1d; + { + V T8, T1x, T3, T2R, T14, T2S, T6, T1y, T17, T9, T19, T1a; + T8 = LD(&(ri[WS(is, 4)]), ivs, &(ri[0])); + T1x = VSUB(T1, T2); + T3 = VADD(T1, T2); + T2R = VSUB(T12, T13); + T14 = VADD(T12, T13); + T2S = VSUB(T4, T5); + T6 = VADD(T4, T5); + T1y = VSUB(T15, T16); + T17 = VADD(T15, T16); + T9 = LD(&(ri[WS(is, 20)]), ivs, &(ri[0])); + T19 = LD(&(ii[WS(is, 4)]), ivs, &(ii[0])); + T1a = LD(&(ii[WS(is, 20)]), ivs, &(ii[0])); + Tb = LD(&(ri[WS(is, 28)]), ivs, &(ri[0])); + T2T = VSUB(T2R, T2S); + T3T = VADD(T2S, T2R); + T4r = VSUB(T3, T6); + T7 = VADD(T3, T6); + T3t = VSUB(T1x, T1y); + T1z = VADD(T1x, T1y); + T18 = VADD(T14, T17); + T4Z = VSUB(T14, T17); + T1A = VSUB(T8, T9); + Ta = VADD(T8, T9); + T1B = VSUB(T19, T1a); + T1b = VADD(T19, T1a); + Tc = LD(&(ri[WS(is, 12)]), ivs, &(ri[0])); + T1c = LD(&(ii[WS(is, 28)]), ivs, &(ii[0])); + T1d = LD(&(ii[WS(is, 12)]), ivs, &(ii[0])); + } + { + V Ti, T1I, T1J, Tl; + { + V T1h, T1C, T2U, T1D, Td, T1E, T1e, T1i, Tg, Th; + Tg = LD(&(ri[WS(is, 2)]), ivs, &(ri[0])); + Th = LD(&(ri[WS(is, 18)]), ivs, &(ri[0])); + T1h = LD(&(ii[WS(is, 2)]), ivs, &(ii[0])); + T1C = VADD(T1A, T1B); + T2U = VSUB(T1B, T1A); + T1D = VSUB(Tb, Tc); + Td = VADD(Tb, Tc); + T1E = VSUB(T1c, T1d); + T1e = VADD(T1c, T1d); + T1L = VSUB(Tg, Th); + Ti = VADD(Tg, Th); + T1i = LD(&(ii[WS(is, 18)]), ivs, &(ii[0])); + { + V T2V, T1F, Tj, Tk; + Tj = LD(&(ri[WS(is, 10)]), ivs, &(ri[0])); + Tk = LD(&(ri[WS(is, 26)]), ivs, &(ri[0])); + Te = VADD(Ta, Td); + T50 = VSUB(Td, Ta); + T2V = VADD(T1D, T1E); + T1F = VSUB(T1D, T1E); + T1f = VADD(T1b, T1e); + T4s = VSUB(T1b, T1e); + T1j = VADD(T1h, T1i); + T1I = VSUB(T1h, T1i); + T1J = VSUB(Tj, Tk); + Tl = VADD(Tj, Tk); + T1G = VADD(T1C, T1F); + T3U = VSUB(T1F, T1C); + T2W = VADD(T2U, T2V); + T3u = VSUB(T2U, T2V); + T1k = LD(&(ii[WS(is, 10)]), ivs, &(ii[0])); + T1l = LD(&(ii[WS(is, 26)]), ivs, &(ii[0])); + } + } + T4v = VSUB(Ti, Tl); + Tm = VADD(Ti, Tl); + T1K = VSUB(T1I, T1J); + T3w = VADD(T1J, T1I); + } + } + } + { + V T1r, T1S, T1q, T1s, T4x, T1R, T3z; + { + V Tp, T1P, T1Q, Ts; + { + V Tn, To, T1o, T1M, T1m, T1p; + Tn = LD(&(ri[WS(is, 30)]), ivs, &(ri[0])); + To = LD(&(ri[WS(is, 14)]), ivs, &(ri[0])); + T1o = LD(&(ii[WS(is, 30)]), ivs, &(ii[0])); + T1M = VSUB(T1k, T1l); + T1m = VADD(T1k, T1l); + T1p = LD(&(ii[WS(is, 14)]), ivs, &(ii[0])); + { + V Tq, Tr, T3x, T1N, T4u; + Tq = LD(&(ri[WS(is, 6)]), ivs, &(ri[0])); + Tr = LD(&(ri[WS(is, 22)]), ivs, &(ri[0])); + T1r = LD(&(ii[WS(is, 6)]), ivs, &(ii[0])); + T1S = VSUB(Tn, To); + Tp = VADD(Tn, To); + T3x = VSUB(T1L, T1M); + T1N = VADD(T1L, T1M); + T4u = VSUB(T1j, T1m); + T1n = VADD(T1j, T1m); + T1P = VSUB(T1o, T1p); + T1q = VADD(T1o, T1p); + T1Q = VSUB(Tq, Tr); + Ts = VADD(Tq, Tr); + T3X = VFNMS(LDK(KP414213562), T3w, T3x); + T3y = VFMA(LDK(KP414213562), T3x, T3w); + T2Z = VFMA(LDK(KP414213562), T1K, T1N); + T1O = VFNMS(LDK(KP414213562), T1N, T1K); + T53 = VADD(T4v, T4u); + T4w = VSUB(T4u, T4v); + T1s = LD(&(ii[WS(is, 22)]), ivs, &(ii[0])); + } + } + T4x = VSUB(Tp, Ts); + Tt = VADD(Tp, Ts); + T1R = VSUB(T1P, T1Q); + T3z = VADD(T1Q, T1P); + } + { + V T4S, T5G, T2y, T2L, T4V, T5H, T2D, T2M; + { + V T2G, TN, T4N, T2r, T2s, TQ, T2A, T4O, T2J, T2x, TU, T4T, T2w, T2z, TX; + V T2B, T2H, T2I, TR; + { + V TL, TM, T2p, T1T, T1t, T2q; + TL = LD(&(ri[WS(is, 31)]), ivs, &(ri[WS(is, 1)])); + TM = LD(&(ri[WS(is, 15)]), ivs, &(ri[WS(is, 1)])); + T2p = LD(&(ii[WS(is, 31)]), ivs, &(ii[WS(is, 1)])); + T1T = VSUB(T1r, T1s); + T1t = VADD(T1r, T1s); + T2q = LD(&(ii[WS(is, 15)]), ivs, &(ii[WS(is, 1)])); + { + V TO, TP, T3A, T1U, T4y; + TO = LD(&(ri[WS(is, 7)]), ivs, &(ri[WS(is, 1)])); + TP = LD(&(ri[WS(is, 23)]), ivs, &(ri[WS(is, 1)])); + T2H = LD(&(ii[WS(is, 7)]), ivs, &(ii[WS(is, 1)])); + T2G = VSUB(TL, TM); + TN = VADD(TL, TM); + T3A = VSUB(T1S, T1T); + T1U = VADD(T1S, T1T); + T4y = VSUB(T1q, T1t); + T1u = VADD(T1q, T1t); + T4N = VADD(T2p, T2q); + T2r = VSUB(T2p, T2q); + T2s = VSUB(TO, TP); + TQ = VADD(TO, TP); + T3W = VFMA(LDK(KP414213562), T3z, T3A); + T3B = VFNMS(LDK(KP414213562), T3A, T3z); + T2Y = VFNMS(LDK(KP414213562), T1R, T1U); + T1V = VFMA(LDK(KP414213562), T1U, T1R); + T52 = VSUB(T4x, T4y); + T4z = VADD(T4x, T4y); + T2I = LD(&(ii[WS(is, 23)]), ivs, &(ii[WS(is, 1)])); + } + } + { + V TS, TT, T2u, T2v, TV, TW; + TS = LD(&(ri[WS(is, 3)]), ivs, &(ri[WS(is, 1)])); + TT = LD(&(ri[WS(is, 19)]), ivs, &(ri[WS(is, 1)])); + T2u = LD(&(ii[WS(is, 3)]), ivs, &(ii[WS(is, 1)])); + T2v = LD(&(ii[WS(is, 19)]), ivs, &(ii[WS(is, 1)])); + TV = LD(&(ri[WS(is, 27)]), ivs, &(ri[WS(is, 1)])); + TW = LD(&(ri[WS(is, 11)]), ivs, &(ri[WS(is, 1)])); + T2A = LD(&(ii[WS(is, 27)]), ivs, &(ii[WS(is, 1)])); + T4O = VADD(T2H, T2I); + T2J = VSUB(T2H, T2I); + T2x = VSUB(TS, TT); + TU = VADD(TS, TT); + T4T = VADD(T2u, T2v); + T2w = VSUB(T2u, T2v); + T2z = VSUB(TV, TW); + TX = VADD(TV, TW); + T2B = LD(&(ii[WS(is, 11)]), ivs, &(ii[WS(is, 1)])); + } + T3O = VADD(T2s, T2r); + T2t = VSUB(T2r, T2s); + T3L = VSUB(T2G, T2J); + T2K = VADD(T2G, T2J); + T4S = VSUB(TN, TQ); + TR = VADD(TN, TQ); + { + V T4P, T4Q, TY, T4U, T2C; + T5G = VADD(T4N, T4O); + T4P = VSUB(T4N, T4O); + T4Q = VSUB(TX, TU); + TY = VADD(TU, TX); + T4U = VADD(T2A, T2B); + T2C = VSUB(T2A, T2B); + T2y = VSUB(T2w, T2x); + T2L = VADD(T2x, T2w); + TZ = VADD(TR, TY); + T5F = VSUB(TR, TY); + T4V = VSUB(T4T, T4U); + T5H = VADD(T4T, T4U); + T2D = VADD(T2z, T2C); + T2M = VSUB(T2z, T2C); + T4R = VSUB(T4P, T4Q); + T5k = VADD(T4Q, T4P); + } + } + { + V T2f, Ty, T23, T4C, T20, T21, TB, T4D, T2i, T26, TF, T24, TG, TH, T29; + V T2a; + { + V T1Y, T1Z, Tz, TA, T2g, T2h, Tw, Tx, TD, TE; + Tw = LD(&(ri[WS(is, 1)]), ivs, &(ri[WS(is, 1)])); + Tx = LD(&(ri[WS(is, 17)]), ivs, &(ri[WS(is, 1)])); + T5j = VADD(T4S, T4V); + T4W = VSUB(T4S, T4V); + T5I = VSUB(T5G, T5H); + T5X = VADD(T5G, T5H); + T2E = VADD(T2y, T2D); + T3M = VSUB(T2D, T2y); + T2N = VADD(T2L, T2M); + T3P = VSUB(T2L, T2M); + T2f = VSUB(Tw, Tx); + Ty = VADD(Tw, Tx); + T1Y = LD(&(ii[WS(is, 1)]), ivs, &(ii[WS(is, 1)])); + T1Z = LD(&(ii[WS(is, 17)]), ivs, &(ii[WS(is, 1)])); + Tz = LD(&(ri[WS(is, 9)]), ivs, &(ri[WS(is, 1)])); + TA = LD(&(ri[WS(is, 25)]), ivs, &(ri[WS(is, 1)])); + T2g = LD(&(ii[WS(is, 9)]), ivs, &(ii[WS(is, 1)])); + T2h = LD(&(ii[WS(is, 25)]), ivs, &(ii[WS(is, 1)])); + TD = LD(&(ri[WS(is, 5)]), ivs, &(ri[WS(is, 1)])); + TE = LD(&(ri[WS(is, 21)]), ivs, &(ri[WS(is, 1)])); + T23 = LD(&(ii[WS(is, 5)]), ivs, &(ii[WS(is, 1)])); + T4C = VADD(T1Y, T1Z); + T20 = VSUB(T1Y, T1Z); + T21 = VSUB(Tz, TA); + TB = VADD(Tz, TA); + T4D = VADD(T2g, T2h); + T2i = VSUB(T2g, T2h); + T26 = VSUB(TD, TE); + TF = VADD(TD, TE); + T24 = LD(&(ii[WS(is, 21)]), ivs, &(ii[WS(is, 1)])); + TG = LD(&(ri[WS(is, 29)]), ivs, &(ri[WS(is, 1)])); + TH = LD(&(ri[WS(is, 13)]), ivs, &(ri[WS(is, 1)])); + T29 = LD(&(ii[WS(is, 29)]), ivs, &(ii[WS(is, 1)])); + T2a = LD(&(ii[WS(is, 13)]), ivs, &(ii[WS(is, 1)])); + } + { + V T4I, T25, T28, TI, T4J, T2b, T4H, TC, T5B, T4E; + T3H = VADD(T21, T20); + T22 = VSUB(T20, T21); + T3E = VSUB(T2f, T2i); + T2j = VADD(T2f, T2i); + T4I = VADD(T23, T24); + T25 = VSUB(T23, T24); + T28 = VSUB(TG, TH); + TI = VADD(TG, TH); + T4J = VADD(T29, T2a); + T2b = VSUB(T29, T2a); + T4H = VSUB(Ty, TB); + TC = VADD(Ty, TB); + T5B = VADD(T4C, T4D); + T4E = VSUB(T4C, T4D); + { + V T27, T2k, TJ, T4F, T4K, T5C, T2c, T2l; + T27 = VSUB(T25, T26); + T2k = VADD(T26, T25); + TJ = VADD(TF, TI); + T4F = VSUB(TI, TF); + T4K = VSUB(T4I, T4J); + T5C = VADD(T4I, T4J); + T2c = VADD(T28, T2b); + T2l = VSUB(T28, T2b); + T4G = VSUB(T4E, T4F); + T5h = VADD(T4F, T4E); + TK = VADD(TC, TJ); + T5A = VSUB(TC, TJ); + T5D = VSUB(T5B, T5C); + T5W = VADD(T5B, T5C); + T2d = VADD(T27, T2c); + T3F = VSUB(T2c, T27); + T4L = VSUB(T4H, T4K); + T5g = VADD(T4H, T4K); + T3I = VSUB(T2k, T2l); + T2m = VADD(T2k, T2l); + } + } + } + } + } + } + { + V T1v, T1g, T5V, Tv, T60, T5Y, T11, T10; + { + V T5o, T5n, T5i, T5r, T5f, T5l, T5w, T5u; + { + V T5d, T4t, T4A, T4X, T58, T51, T4M, T59, T54, T5e, T5b, T4B; + T5d = VADD(T4r, T4s); + T4t = VSUB(T4r, T4s); + T4A = VSUB(T4w, T4z); + T5o = VADD(T4w, T4z); + T4X = VFNMS(LDK(KP414213562), T4W, T4R); + T58 = VFMA(LDK(KP414213562), T4R, T4W); + T5n = VADD(T50, T4Z); + T51 = VSUB(T4Z, T50); + T4M = VFMA(LDK(KP414213562), T4L, T4G); + T59 = VFNMS(LDK(KP414213562), T4G, T4L); + T54 = VSUB(T52, T53); + T5e = VADD(T53, T52); + T5b = VFNMS(LDK(KP707106781), T4A, T4t); + T4B = VFMA(LDK(KP707106781), T4A, T4t); + { + V T5s, T56, T4Y, T5c, T5a, T57, T55, T5t; + T5i = VFMA(LDK(KP414213562), T5h, T5g); + T5s = VFNMS(LDK(KP414213562), T5g, T5h); + T56 = VADD(T4M, T4X); + T4Y = VSUB(T4M, T4X); + T5c = VADD(T59, T58); + T5a = VSUB(T58, T59); + T57 = VFMA(LDK(KP707106781), T54, T51); + T55 = VFNMS(LDK(KP707106781), T54, T51); + T5r = VFNMS(LDK(KP707106781), T5e, T5d); + T5f = VFMA(LDK(KP707106781), T5e, T5d); + T5t = VFMA(LDK(KP414213562), T5j, T5k); + T5l = VFNMS(LDK(KP414213562), T5k, T5j); + T61 = VFMA(LDK(KP923879532), T4Y, T4B); + STM4(&(ro[6]), T61, ovs, &(ro[0])); + T62 = VFNMS(LDK(KP923879532), T4Y, T4B); + STM4(&(ro[22]), T62, ovs, &(ro[0])); + T63 = VFMA(LDK(KP923879532), T5c, T5b); + STM4(&(ro[30]), T63, ovs, &(ro[0])); + T64 = VFNMS(LDK(KP923879532), T5c, T5b); + STM4(&(ro[14]), T64, ovs, &(ro[0])); + T65 = VFMA(LDK(KP923879532), T5a, T57); + STM4(&(io[6]), T65, ovs, &(io[0])); + T66 = VFNMS(LDK(KP923879532), T5a, T57); + STM4(&(io[22]), T66, ovs, &(io[0])); + T67 = VFMA(LDK(KP923879532), T56, T55); + STM4(&(io[30]), T67, ovs, &(io[0])); + T68 = VFNMS(LDK(KP923879532), T56, T55); + STM4(&(io[14]), T68, ovs, &(io[0])); + T5w = VADD(T5s, T5t); + T5u = VSUB(T5s, T5t); + } + } + { + V Tf, T5P, T5z, T5S, T5U, T5O, T5K, T5L, T5M, Tu, T5T, T5N; + { + V T5E, T5Q, T5q, T5m, T5v, T5p, T5R, T5J, T5x, T5y; + Tf = VADD(T7, Te); + T5x = VSUB(T7, Te); + T5y = VSUB(T1n, T1u); + T1v = VADD(T1n, T1u); + T69 = VFMA(LDK(KP923879532), T5u, T5r); + STM4(&(ro[10]), T69, ovs, &(ro[0])); + T6a = VFNMS(LDK(KP923879532), T5u, T5r); + STM4(&(ro[26]), T6a, ovs, &(ro[0])); + T5E = VADD(T5A, T5D); + T5Q = VSUB(T5D, T5A); + T5q = VSUB(T5l, T5i); + T5m = VADD(T5i, T5l); + T5v = VFMA(LDK(KP707106781), T5o, T5n); + T5p = VFNMS(LDK(KP707106781), T5o, T5n); + T5P = VSUB(T5x, T5y); + T5z = VADD(T5x, T5y); + T5R = VADD(T5F, T5I); + T5J = VSUB(T5F, T5I); + T6b = VFMA(LDK(KP923879532), T5m, T5f); + STM4(&(ro[2]), T6b, ovs, &(ro[0])); + T6c = VFNMS(LDK(KP923879532), T5m, T5f); + STM4(&(ro[18]), T6c, ovs, &(ro[0])); + T6d = VFMA(LDK(KP923879532), T5w, T5v); + STM4(&(io[2]), T6d, ovs, &(io[0])); + T6e = VFNMS(LDK(KP923879532), T5w, T5v); + STM4(&(io[18]), T6e, ovs, &(io[0])); + T6f = VFMA(LDK(KP923879532), T5q, T5p); + STM4(&(io[10]), T6f, ovs, &(io[0])); + T6g = VFNMS(LDK(KP923879532), T5q, T5p); + STM4(&(io[26]), T6g, ovs, &(io[0])); + T5S = VSUB(T5Q, T5R); + T5U = VADD(T5Q, T5R); + T5O = VSUB(T5J, T5E); + T5K = VADD(T5E, T5J); + T1g = VADD(T18, T1f); + T5L = VSUB(T18, T1f); + T5M = VSUB(Tt, Tm); + Tu = VADD(Tm, Tt); + } + T6h = VFMA(LDK(KP707106781), T5S, T5P); + STM4(&(ro[12]), T6h, ovs, &(ro[0])); + T6i = VFNMS(LDK(KP707106781), T5S, T5P); + STM4(&(ro[28]), T6i, ovs, &(ro[0])); + T6j = VFMA(LDK(KP707106781), T5K, T5z); + STM4(&(ro[4]), T6j, ovs, &(ro[0])); + T6k = VFNMS(LDK(KP707106781), T5K, T5z); + STM4(&(ro[20]), T6k, ovs, &(ro[0])); + T5T = VADD(T5M, T5L); + T5N = VSUB(T5L, T5M); + T5V = VSUB(Tf, Tu); + Tv = VADD(Tf, Tu); + T6l = VFMA(LDK(KP707106781), T5U, T5T); + STM4(&(io[4]), T6l, ovs, &(io[0])); + T6m = VFNMS(LDK(KP707106781), T5U, T5T); + STM4(&(io[20]), T6m, ovs, &(io[0])); + T6n = VFMA(LDK(KP707106781), T5O, T5N); + STM4(&(io[12]), T6n, ovs, &(io[0])); + T6o = VFNMS(LDK(KP707106781), T5O, T5N); + STM4(&(io[28]), T6o, ovs, &(io[0])); + T60 = VADD(T5W, T5X); + T5Y = VSUB(T5W, T5X); + T11 = VSUB(TZ, TK); + T10 = VADD(TK, TZ); + } + } + { + V T39, T3k, T3j, T3a, T1X, T37, T33, T31, T3d, T3c, T47, T4i, T4h, T48, T4b; + V T4a, T4e, T3N, T41, T3D, T45, T3Z, T38, T36, T32, T2Q, T42, T3K, T3Q, T4d; + { + V T2e, T2n, T2F, T2O, T1w, T5Z; + { + V T1H, T1W, T2X, T30; + T39 = VFMA(LDK(KP707106781), T1G, T1z); + T1H = VFNMS(LDK(KP707106781), T1G, T1z); + T1W = VSUB(T1O, T1V); + T3k = VADD(T1O, T1V); + T3j = VFMA(LDK(KP707106781), T2W, T2T); + T2X = VFNMS(LDK(KP707106781), T2W, T2T); + T30 = VSUB(T2Y, T2Z); + T3a = VADD(T2Z, T2Y); + T6p = VSUB(T5V, T5Y); + STM4(&(ro[24]), T6p, ovs, &(ro[0])); + T6q = VADD(T5V, T5Y); + STM4(&(ro[8]), T6q, ovs, &(ro[0])); + T6r = VADD(Tv, T10); + STM4(&(ro[0]), T6r, ovs, &(ro[0])); + T6s = VSUB(Tv, T10); + STM4(&(ro[16]), T6s, ovs, &(ro[0])); + T1w = VSUB(T1g, T1v); + T5Z = VADD(T1g, T1v); + T1X = VFMA(LDK(KP923879532), T1W, T1H); + T37 = VFNMS(LDK(KP923879532), T1W, T1H); + T33 = VFMA(LDK(KP923879532), T30, T2X); + T31 = VFNMS(LDK(KP923879532), T30, T2X); + } + T3d = VFMA(LDK(KP707106781), T2d, T22); + T2e = VFNMS(LDK(KP707106781), T2d, T22); + T2n = VFNMS(LDK(KP707106781), T2m, T2j); + T3c = VFMA(LDK(KP707106781), T2m, T2j); + T6t = VADD(T5Z, T60); + STM4(&(io[0]), T6t, ovs, &(io[0])); + T6u = VSUB(T5Z, T60); + STM4(&(io[16]), T6u, ovs, &(io[0])); + T6v = VSUB(T1w, T11); + STM4(&(io[24]), T6v, ovs, &(io[0])); + T6w = VADD(T11, T1w); + STM4(&(io[8]), T6w, ovs, &(io[0])); + T3g = VFMA(LDK(KP707106781), T2E, T2t); + T2F = VFNMS(LDK(KP707106781), T2E, T2t); + T2O = VFNMS(LDK(KP707106781), T2N, T2K); + T3f = VFMA(LDK(KP707106781), T2N, T2K); + { + V T3v, T35, T2o, T3C, T3V, T3Y; + T47 = VFNMS(LDK(KP707106781), T3u, T3t); + T3v = VFMA(LDK(KP707106781), T3u, T3t); + T35 = VFNMS(LDK(KP668178637), T2e, T2n); + T2o = VFMA(LDK(KP668178637), T2n, T2e); + T3C = VSUB(T3y, T3B); + T4i = VADD(T3y, T3B); + T4h = VFNMS(LDK(KP707106781), T3U, T3T); + T3V = VFMA(LDK(KP707106781), T3U, T3T); + T3Y = VSUB(T3W, T3X); + T48 = VADD(T3X, T3W); + { + V T3G, T34, T2P, T3J; + T4b = VFMA(LDK(KP707106781), T3F, T3E); + T3G = VFNMS(LDK(KP707106781), T3F, T3E); + T34 = VFMA(LDK(KP668178637), T2F, T2O); + T2P = VFNMS(LDK(KP668178637), T2O, T2F); + T3J = VFNMS(LDK(KP707106781), T3I, T3H); + T4a = VFMA(LDK(KP707106781), T3I, T3H); + T4e = VFMA(LDK(KP707106781), T3M, T3L); + T3N = VFNMS(LDK(KP707106781), T3M, T3L); + T41 = VFNMS(LDK(KP923879532), T3C, T3v); + T3D = VFMA(LDK(KP923879532), T3C, T3v); + T45 = VFMA(LDK(KP923879532), T3Y, T3V); + T3Z = VFNMS(LDK(KP923879532), T3Y, T3V); + T38 = VADD(T35, T34); + T36 = VSUB(T34, T35); + T32 = VADD(T2o, T2P); + T2Q = VSUB(T2o, T2P); + T42 = VFNMS(LDK(KP668178637), T3G, T3J); + T3K = VFMA(LDK(KP668178637), T3J, T3G); + T3Q = VFNMS(LDK(KP707106781), T3P, T3O); + T4d = VFMA(LDK(KP707106781), T3P, T3O); + } + } + } + { + V T4n, T4c, T43, T3R, T4m, T4f; + T6x = VFMA(LDK(KP831469612), T38, T37); + STM4(&(ro[29]), T6x, ovs, &(ro[1])); + T6y = VFNMS(LDK(KP831469612), T38, T37); + STM4(&(ro[13]), T6y, ovs, &(ro[1])); + T6z = VFMA(LDK(KP831469612), T36, T33); + STM4(&(io[5]), T6z, ovs, &(io[1])); + T6A = VFNMS(LDK(KP831469612), T36, T33); + STM4(&(io[21]), T6A, ovs, &(io[1])); + T6B = VFMA(LDK(KP831469612), T32, T31); + STM4(&(io[29]), T6B, ovs, &(io[1])); + T6C = VFNMS(LDK(KP831469612), T32, T31); + STM4(&(io[13]), T6C, ovs, &(io[1])); + T6D = VFMA(LDK(KP831469612), T2Q, T1X); + STM4(&(ro[5]), T6D, ovs, &(ro[1])); + T6E = VFNMS(LDK(KP831469612), T2Q, T1X); + STM4(&(ro[21]), T6E, ovs, &(ro[1])); + T43 = VFMA(LDK(KP668178637), T3N, T3Q); + T3R = VFNMS(LDK(KP668178637), T3Q, T3N); + { + V T44, T46, T40, T3S; + T44 = VSUB(T42, T43); + T46 = VADD(T42, T43); + T40 = VSUB(T3R, T3K); + T3S = VADD(T3K, T3R); + T4p = VFMA(LDK(KP923879532), T48, T47); + T49 = VFNMS(LDK(KP923879532), T48, T47); + T4l = VFNMS(LDK(KP923879532), T4i, T4h); + T4j = VFMA(LDK(KP923879532), T4i, T4h); + T4n = VFNMS(LDK(KP198912367), T4a, T4b); + T4c = VFMA(LDK(KP198912367), T4b, T4a); + T6F = VFMA(LDK(KP831469612), T44, T41); + STM4(&(ro[11]), T6F, ovs, &(ro[1])); + T6G = VFNMS(LDK(KP831469612), T44, T41); + STM4(&(ro[27]), T6G, ovs, &(ro[1])); + T6H = VFMA(LDK(KP831469612), T46, T45); + STM4(&(io[3]), T6H, ovs, &(io[1])); + T6I = VFNMS(LDK(KP831469612), T46, T45); + STM4(&(io[19]), T6I, ovs, &(io[1])); + T6J = VFMA(LDK(KP831469612), T40, T3Z); + STM4(&(io[11]), T6J, ovs, &(io[1])); + T6K = VFNMS(LDK(KP831469612), T40, T3Z); + STM4(&(io[27]), T6K, ovs, &(io[1])); + T6L = VFMA(LDK(KP831469612), T3S, T3D); + STM4(&(ro[3]), T6L, ovs, &(ro[1])); + T6M = VFNMS(LDK(KP831469612), T3S, T3D); + STM4(&(ro[19]), T6M, ovs, &(ro[1])); + } + T4m = VFMA(LDK(KP198912367), T4d, T4e); + T4f = VFNMS(LDK(KP198912367), T4e, T4d); + T3n = VFNMS(LDK(KP923879532), T3a, T39); + T3b = VFMA(LDK(KP923879532), T3a, T39); + T3r = VFMA(LDK(KP923879532), T3k, T3j); + T3l = VFNMS(LDK(KP923879532), T3k, T3j); + T3o = VFNMS(LDK(KP198912367), T3c, T3d); + T3e = VFMA(LDK(KP198912367), T3d, T3c); + T4q = VADD(T4n, T4m); + T4o = VSUB(T4m, T4n); + T4k = VADD(T4c, T4f); + T4g = VSUB(T4c, T4f); + } + } + } + } + { + V T6N, T6O, T6P, T6Q; + T6N = VFMA(LDK(KP980785280), T4q, T4p); + STM4(&(ro[31]), T6N, ovs, &(ro[1])); + STN4(&(ro[28]), T6i, T6x, T63, T6N, ovs); + T6O = VFNMS(LDK(KP980785280), T4q, T4p); + STM4(&(ro[15]), T6O, ovs, &(ro[1])); + STN4(&(ro[12]), T6h, T6y, T64, T6O, ovs); + T6P = VFMA(LDK(KP980785280), T4o, T4l); + STM4(&(io[7]), T6P, ovs, &(io[1])); + STN4(&(io[4]), T6l, T6z, T65, T6P, ovs); + T6Q = VFNMS(LDK(KP980785280), T4o, T4l); + STM4(&(io[23]), T6Q, ovs, &(io[1])); + STN4(&(io[20]), T6m, T6A, T66, T6Q, ovs); + { + V T6R, T6S, T6T, T6U; + T6R = VFMA(LDK(KP980785280), T4k, T4j); + STM4(&(io[31]), T6R, ovs, &(io[1])); + STN4(&(io[28]), T6o, T6B, T67, T6R, ovs); + T6S = VFNMS(LDK(KP980785280), T4k, T4j); + STM4(&(io[15]), T6S, ovs, &(io[1])); + STN4(&(io[12]), T6n, T6C, T68, T6S, ovs); + T6T = VFMA(LDK(KP980785280), T4g, T49); + STM4(&(ro[7]), T6T, ovs, &(ro[1])); + STN4(&(ro[4]), T6j, T6D, T61, T6T, ovs); + T6U = VFNMS(LDK(KP980785280), T4g, T49); + STM4(&(ro[23]), T6U, ovs, &(ro[1])); + STN4(&(ro[20]), T6k, T6E, T62, T6U, ovs); + T3h = VFNMS(LDK(KP198912367), T3g, T3f); + T3p = VFMA(LDK(KP198912367), T3f, T3g); + } + } + { + V T3s, T3q, T3i, T3m; + T3s = VADD(T3o, T3p); + T3q = VSUB(T3o, T3p); + T3i = VADD(T3e, T3h); + T3m = VSUB(T3h, T3e); + { + V T6V, T6W, T6X, T6Y; + T6V = VFMA(LDK(KP980785280), T3q, T3n); + STM4(&(ro[9]), T6V, ovs, &(ro[1])); + STN4(&(ro[8]), T6q, T6V, T69, T6F, ovs); + T6W = VFNMS(LDK(KP980785280), T3q, T3n); + STM4(&(ro[25]), T6W, ovs, &(ro[1])); + STN4(&(ro[24]), T6p, T6W, T6a, T6G, ovs); + T6X = VFMA(LDK(KP980785280), T3s, T3r); + STM4(&(io[1]), T6X, ovs, &(io[1])); + STN4(&(io[0]), T6t, T6X, T6d, T6H, ovs); + T6Y = VFNMS(LDK(KP980785280), T3s, T3r); + STM4(&(io[17]), T6Y, ovs, &(io[1])); + STN4(&(io[16]), T6u, T6Y, T6e, T6I, ovs); + { + V T6Z, T70, T71, T72; + T6Z = VFMA(LDK(KP980785280), T3m, T3l); + STM4(&(io[9]), T6Z, ovs, &(io[1])); + STN4(&(io[8]), T6w, T6Z, T6f, T6J, ovs); + T70 = VFNMS(LDK(KP980785280), T3m, T3l); + STM4(&(io[25]), T70, ovs, &(io[1])); + STN4(&(io[24]), T6v, T70, T6g, T6K, ovs); + T71 = VFMA(LDK(KP980785280), T3i, T3b); + STM4(&(ro[1]), T71, ovs, &(ro[1])); + STN4(&(ro[0]), T6r, T71, T6b, T6L, ovs); + T72 = VFNMS(LDK(KP980785280), T3i, T3b); + STM4(&(ro[17]), T72, ovs, &(ro[1])); + STN4(&(ro[16]), T6s, T72, T6c, T6M, ovs); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 32, XSIMD_STRING("n2sv_32"), {236, 0, 136, 0}, &GENUS, 0, 1, 0, 0 }; + +void XSIMD(codelet_n2sv_32) (planner *p) { + X(kdft_register) (p, n2sv_32, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name n2sv_32 -with-ostride 1 -include n2s.h -store-multiple 4 */ + +/* + * This function contains 372 FP additions, 84 FP multiplications, + * (or, 340 additions, 52 multiplications, 32 fused multiply/add), + * 130 stack variables, 7 constants, and 144 memory accesses + */ +#include "n2s.h" + +static void n2sv_32(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * ivs), ii = ii + ((2 * VL) * ivs), ro = ro + ((2 * VL) * ovs), io = io + ((2 * VL) * ovs), MAKE_VOLATILE_STRIDE(128, is), MAKE_VOLATILE_STRIDE(128, os)) { + V T7, T4r, T4Z, T18, T1z, T3t, T3T, T2T, Te, T1f, T50, T4s, T2W, T3u, T1G; + V T3U, Tm, T1n, T1O, T2Z, T3y, T3X, T4w, T53, Tt, T1u, T1V, T2Y, T3B, T3W; + V T4z, T52, T2t, T3L, T3O, T2K, TR, TY, T5F, T5G, T5H, T5I, T4R, T5j, T2E; + V T3P, T4W, T5k, T2N, T3M, T22, T3E, T3H, T2j, TC, TJ, T5A, T5B, T5C, T5D; + V T4G, T5g, T2d, T3F, T4L, T5h, T2m, T3I; + { + V T3, T1x, T14, T2S, T6, T2R, T17, T1y; + { + V T1, T2, T12, T13; + T1 = LD(&(ri[0]), ivs, &(ri[0])); + T2 = LD(&(ri[WS(is, 16)]), ivs, &(ri[0])); + T3 = VADD(T1, T2); + T1x = VSUB(T1, T2); + T12 = LD(&(ii[0]), ivs, &(ii[0])); + T13 = LD(&(ii[WS(is, 16)]), ivs, &(ii[0])); + T14 = VADD(T12, T13); + T2S = VSUB(T12, T13); + } + { + V T4, T5, T15, T16; + T4 = LD(&(ri[WS(is, 8)]), ivs, &(ri[0])); + T5 = LD(&(ri[WS(is, 24)]), ivs, &(ri[0])); + T6 = VADD(T4, T5); + T2R = VSUB(T4, T5); + T15 = LD(&(ii[WS(is, 8)]), ivs, &(ii[0])); + T16 = LD(&(ii[WS(is, 24)]), ivs, &(ii[0])); + T17 = VADD(T15, T16); + T1y = VSUB(T15, T16); + } + T7 = VADD(T3, T6); + T4r = VSUB(T3, T6); + T4Z = VSUB(T14, T17); + T18 = VADD(T14, T17); + T1z = VSUB(T1x, T1y); + T3t = VADD(T1x, T1y); + T3T = VSUB(T2S, T2R); + T2T = VADD(T2R, T2S); + } + { + V Ta, T1B, T1b, T1A, Td, T1D, T1e, T1E; + { + V T8, T9, T19, T1a; + T8 = LD(&(ri[WS(is, 4)]), ivs, &(ri[0])); + T9 = LD(&(ri[WS(is, 20)]), ivs, &(ri[0])); + Ta = VADD(T8, T9); + T1B = VSUB(T8, T9); + T19 = LD(&(ii[WS(is, 4)]), ivs, &(ii[0])); + T1a = LD(&(ii[WS(is, 20)]), ivs, &(ii[0])); + T1b = VADD(T19, T1a); + T1A = VSUB(T19, T1a); + } + { + V Tb, Tc, T1c, T1d; + Tb = LD(&(ri[WS(is, 28)]), ivs, &(ri[0])); + Tc = LD(&(ri[WS(is, 12)]), ivs, &(ri[0])); + Td = VADD(Tb, Tc); + T1D = VSUB(Tb, Tc); + T1c = LD(&(ii[WS(is, 28)]), ivs, &(ii[0])); + T1d = LD(&(ii[WS(is, 12)]), ivs, &(ii[0])); + T1e = VADD(T1c, T1d); + T1E = VSUB(T1c, T1d); + } + Te = VADD(Ta, Td); + T1f = VADD(T1b, T1e); + T50 = VSUB(Td, Ta); + T4s = VSUB(T1b, T1e); + { + V T2U, T2V, T1C, T1F; + T2U = VSUB(T1D, T1E); + T2V = VADD(T1B, T1A); + T2W = VMUL(LDK(KP707106781), VSUB(T2U, T2V)); + T3u = VMUL(LDK(KP707106781), VADD(T2V, T2U)); + T1C = VSUB(T1A, T1B); + T1F = VADD(T1D, T1E); + T1G = VMUL(LDK(KP707106781), VSUB(T1C, T1F)); + T3U = VMUL(LDK(KP707106781), VADD(T1C, T1F)); + } + } + { + V Ti, T1L, T1j, T1J, Tl, T1I, T1m, T1M, T1K, T1N; + { + V Tg, Th, T1h, T1i; + Tg = LD(&(ri[WS(is, 2)]), ivs, &(ri[0])); + Th = LD(&(ri[WS(is, 18)]), ivs, &(ri[0])); + Ti = VADD(Tg, Th); + T1L = VSUB(Tg, Th); + T1h = LD(&(ii[WS(is, 2)]), ivs, &(ii[0])); + T1i = LD(&(ii[WS(is, 18)]), ivs, &(ii[0])); + T1j = VADD(T1h, T1i); + T1J = VSUB(T1h, T1i); + } + { + V Tj, Tk, T1k, T1l; + Tj = LD(&(ri[WS(is, 10)]), ivs, &(ri[0])); + Tk = LD(&(ri[WS(is, 26)]), ivs, &(ri[0])); + Tl = VADD(Tj, Tk); + T1I = VSUB(Tj, Tk); + T1k = LD(&(ii[WS(is, 10)]), ivs, &(ii[0])); + T1l = LD(&(ii[WS(is, 26)]), ivs, &(ii[0])); + T1m = VADD(T1k, T1l); + T1M = VSUB(T1k, T1l); + } + Tm = VADD(Ti, Tl); + T1n = VADD(T1j, T1m); + T1K = VADD(T1I, T1J); + T1N = VSUB(T1L, T1M); + T1O = VFNMS(LDK(KP923879532), T1N, VMUL(LDK(KP382683432), T1K)); + T2Z = VFMA(LDK(KP923879532), T1K, VMUL(LDK(KP382683432), T1N)); + { + V T3w, T3x, T4u, T4v; + T3w = VSUB(T1J, T1I); + T3x = VADD(T1L, T1M); + T3y = VFNMS(LDK(KP382683432), T3x, VMUL(LDK(KP923879532), T3w)); + T3X = VFMA(LDK(KP382683432), T3w, VMUL(LDK(KP923879532), T3x)); + T4u = VSUB(T1j, T1m); + T4v = VSUB(Ti, Tl); + T4w = VSUB(T4u, T4v); + T53 = VADD(T4v, T4u); + } + } + { + V Tp, T1S, T1q, T1Q, Ts, T1P, T1t, T1T, T1R, T1U; + { + V Tn, To, T1o, T1p; + Tn = LD(&(ri[WS(is, 30)]), ivs, &(ri[0])); + To = LD(&(ri[WS(is, 14)]), ivs, &(ri[0])); + Tp = VADD(Tn, To); + T1S = VSUB(Tn, To); + T1o = LD(&(ii[WS(is, 30)]), ivs, &(ii[0])); + T1p = LD(&(ii[WS(is, 14)]), ivs, &(ii[0])); + T1q = VADD(T1o, T1p); + T1Q = VSUB(T1o, T1p); + } + { + V Tq, Tr, T1r, T1s; + Tq = LD(&(ri[WS(is, 6)]), ivs, &(ri[0])); + Tr = LD(&(ri[WS(is, 22)]), ivs, &(ri[0])); + Ts = VADD(Tq, Tr); + T1P = VSUB(Tq, Tr); + T1r = LD(&(ii[WS(is, 6)]), ivs, &(ii[0])); + T1s = LD(&(ii[WS(is, 22)]), ivs, &(ii[0])); + T1t = VADD(T1r, T1s); + T1T = VSUB(T1r, T1s); + } + Tt = VADD(Tp, Ts); + T1u = VADD(T1q, T1t); + T1R = VADD(T1P, T1Q); + T1U = VSUB(T1S, T1T); + T1V = VFMA(LDK(KP382683432), T1R, VMUL(LDK(KP923879532), T1U)); + T2Y = VFNMS(LDK(KP923879532), T1R, VMUL(LDK(KP382683432), T1U)); + { + V T3z, T3A, T4x, T4y; + T3z = VSUB(T1Q, T1P); + T3A = VADD(T1S, T1T); + T3B = VFMA(LDK(KP923879532), T3z, VMUL(LDK(KP382683432), T3A)); + T3W = VFNMS(LDK(KP382683432), T3z, VMUL(LDK(KP923879532), T3A)); + T4x = VSUB(Tp, Ts); + T4y = VSUB(T1q, T1t); + T4z = VADD(T4x, T4y); + T52 = VSUB(T4x, T4y); + } + } + { + V TN, T2p, T2J, T4S, TQ, T2G, T2s, T4T, TU, T2x, T2w, T4O, TX, T2z, T2C; + V T4P; + { + V TL, TM, T2H, T2I; + TL = LD(&(ri[WS(is, 31)]), ivs, &(ri[WS(is, 1)])); + TM = LD(&(ri[WS(is, 15)]), ivs, &(ri[WS(is, 1)])); + TN = VADD(TL, TM); + T2p = VSUB(TL, TM); + T2H = LD(&(ii[WS(is, 31)]), ivs, &(ii[WS(is, 1)])); + T2I = LD(&(ii[WS(is, 15)]), ivs, &(ii[WS(is, 1)])); + T2J = VSUB(T2H, T2I); + T4S = VADD(T2H, T2I); + } + { + V TO, TP, T2q, T2r; + TO = LD(&(ri[WS(is, 7)]), ivs, &(ri[WS(is, 1)])); + TP = LD(&(ri[WS(is, 23)]), ivs, &(ri[WS(is, 1)])); + TQ = VADD(TO, TP); + T2G = VSUB(TO, TP); + T2q = LD(&(ii[WS(is, 7)]), ivs, &(ii[WS(is, 1)])); + T2r = LD(&(ii[WS(is, 23)]), ivs, &(ii[WS(is, 1)])); + T2s = VSUB(T2q, T2r); + T4T = VADD(T2q, T2r); + } + { + V TS, TT, T2u, T2v; + TS = LD(&(ri[WS(is, 3)]), ivs, &(ri[WS(is, 1)])); + TT = LD(&(ri[WS(is, 19)]), ivs, &(ri[WS(is, 1)])); + TU = VADD(TS, TT); + T2x = VSUB(TS, TT); + T2u = LD(&(ii[WS(is, 3)]), ivs, &(ii[WS(is, 1)])); + T2v = LD(&(ii[WS(is, 19)]), ivs, &(ii[WS(is, 1)])); + T2w = VSUB(T2u, T2v); + T4O = VADD(T2u, T2v); + } + { + V TV, TW, T2A, T2B; + TV = LD(&(ri[WS(is, 27)]), ivs, &(ri[WS(is, 1)])); + TW = LD(&(ri[WS(is, 11)]), ivs, &(ri[WS(is, 1)])); + TX = VADD(TV, TW); + T2z = VSUB(TV, TW); + T2A = LD(&(ii[WS(is, 27)]), ivs, &(ii[WS(is, 1)])); + T2B = LD(&(ii[WS(is, 11)]), ivs, &(ii[WS(is, 1)])); + T2C = VSUB(T2A, T2B); + T4P = VADD(T2A, T2B); + } + T2t = VSUB(T2p, T2s); + T3L = VADD(T2p, T2s); + T3O = VSUB(T2J, T2G); + T2K = VADD(T2G, T2J); + TR = VADD(TN, TQ); + TY = VADD(TU, TX); + T5F = VSUB(TR, TY); + { + V T4N, T4Q, T2y, T2D; + T5G = VADD(T4S, T4T); + T5H = VADD(T4O, T4P); + T5I = VSUB(T5G, T5H); + T4N = VSUB(TN, TQ); + T4Q = VSUB(T4O, T4P); + T4R = VSUB(T4N, T4Q); + T5j = VADD(T4N, T4Q); + T2y = VSUB(T2w, T2x); + T2D = VADD(T2z, T2C); + T2E = VMUL(LDK(KP707106781), VSUB(T2y, T2D)); + T3P = VMUL(LDK(KP707106781), VADD(T2y, T2D)); + { + V T4U, T4V, T2L, T2M; + T4U = VSUB(T4S, T4T); + T4V = VSUB(TX, TU); + T4W = VSUB(T4U, T4V); + T5k = VADD(T4V, T4U); + T2L = VSUB(T2z, T2C); + T2M = VADD(T2x, T2w); + T2N = VMUL(LDK(KP707106781), VSUB(T2L, T2M)); + T3M = VMUL(LDK(KP707106781), VADD(T2M, T2L)); + } + } + } + { + V Ty, T2f, T21, T4C, TB, T1Y, T2i, T4D, TF, T28, T2b, T4I, TI, T23, T26; + V T4J; + { + V Tw, Tx, T1Z, T20; + Tw = LD(&(ri[WS(is, 1)]), ivs, &(ri[WS(is, 1)])); + Tx = LD(&(ri[WS(is, 17)]), ivs, &(ri[WS(is, 1)])); + Ty = VADD(Tw, Tx); + T2f = VSUB(Tw, Tx); + T1Z = LD(&(ii[WS(is, 1)]), ivs, &(ii[WS(is, 1)])); + T20 = LD(&(ii[WS(is, 17)]), ivs, &(ii[WS(is, 1)])); + T21 = VSUB(T1Z, T20); + T4C = VADD(T1Z, T20); + } + { + V Tz, TA, T2g, T2h; + Tz = LD(&(ri[WS(is, 9)]), ivs, &(ri[WS(is, 1)])); + TA = LD(&(ri[WS(is, 25)]), ivs, &(ri[WS(is, 1)])); + TB = VADD(Tz, TA); + T1Y = VSUB(Tz, TA); + T2g = LD(&(ii[WS(is, 9)]), ivs, &(ii[WS(is, 1)])); + T2h = LD(&(ii[WS(is, 25)]), ivs, &(ii[WS(is, 1)])); + T2i = VSUB(T2g, T2h); + T4D = VADD(T2g, T2h); + } + { + V TD, TE, T29, T2a; + TD = LD(&(ri[WS(is, 5)]), ivs, &(ri[WS(is, 1)])); + TE = LD(&(ri[WS(is, 21)]), ivs, &(ri[WS(is, 1)])); + TF = VADD(TD, TE); + T28 = VSUB(TD, TE); + T29 = LD(&(ii[WS(is, 5)]), ivs, &(ii[WS(is, 1)])); + T2a = LD(&(ii[WS(is, 21)]), ivs, &(ii[WS(is, 1)])); + T2b = VSUB(T29, T2a); + T4I = VADD(T29, T2a); + } + { + V TG, TH, T24, T25; + TG = LD(&(ri[WS(is, 29)]), ivs, &(ri[WS(is, 1)])); + TH = LD(&(ri[WS(is, 13)]), ivs, &(ri[WS(is, 1)])); + TI = VADD(TG, TH); + T23 = VSUB(TG, TH); + T24 = LD(&(ii[WS(is, 29)]), ivs, &(ii[WS(is, 1)])); + T25 = LD(&(ii[WS(is, 13)]), ivs, &(ii[WS(is, 1)])); + T26 = VSUB(T24, T25); + T4J = VADD(T24, T25); + } + T22 = VADD(T1Y, T21); + T3E = VADD(T2f, T2i); + T3H = VSUB(T21, T1Y); + T2j = VSUB(T2f, T2i); + TC = VADD(Ty, TB); + TJ = VADD(TF, TI); + T5A = VSUB(TC, TJ); + { + V T4E, T4F, T27, T2c; + T5B = VADD(T4C, T4D); + T5C = VADD(T4I, T4J); + T5D = VSUB(T5B, T5C); + T4E = VSUB(T4C, T4D); + T4F = VSUB(TI, TF); + T4G = VSUB(T4E, T4F); + T5g = VADD(T4F, T4E); + T27 = VSUB(T23, T26); + T2c = VADD(T28, T2b); + T2d = VMUL(LDK(KP707106781), VSUB(T27, T2c)); + T3F = VMUL(LDK(KP707106781), VADD(T2c, T27)); + { + V T4H, T4K, T2k, T2l; + T4H = VSUB(Ty, TB); + T4K = VSUB(T4I, T4J); + T4L = VSUB(T4H, T4K); + T5h = VADD(T4H, T4K); + T2k = VSUB(T2b, T28); + T2l = VADD(T23, T26); + T2m = VMUL(LDK(KP707106781), VSUB(T2k, T2l)); + T3I = VMUL(LDK(KP707106781), VADD(T2k, T2l)); + } + } + } + { + V T61, T62, T63, T64, T65, T66, T67, T68, T69, T6a, T6b, T6c, T6d, T6e, T6f; + V T6g, T6h, T6i, T6j, T6k, T6l, T6m, T6n, T6o, T6p, T6q, T6r, T6s, T6t, T6u; + V T6v, T6w; + { + V T4B, T57, T5a, T5c, T4Y, T56, T55, T5b; + { + V T4t, T4A, T58, T59; + T4t = VSUB(T4r, T4s); + T4A = VMUL(LDK(KP707106781), VSUB(T4w, T4z)); + T4B = VADD(T4t, T4A); + T57 = VSUB(T4t, T4A); + T58 = VFNMS(LDK(KP923879532), T4L, VMUL(LDK(KP382683432), T4G)); + T59 = VFMA(LDK(KP382683432), T4W, VMUL(LDK(KP923879532), T4R)); + T5a = VSUB(T58, T59); + T5c = VADD(T58, T59); + } + { + V T4M, T4X, T51, T54; + T4M = VFMA(LDK(KP923879532), T4G, VMUL(LDK(KP382683432), T4L)); + T4X = VFNMS(LDK(KP923879532), T4W, VMUL(LDK(KP382683432), T4R)); + T4Y = VADD(T4M, T4X); + T56 = VSUB(T4X, T4M); + T51 = VSUB(T4Z, T50); + T54 = VMUL(LDK(KP707106781), VSUB(T52, T53)); + T55 = VSUB(T51, T54); + T5b = VADD(T51, T54); + } + T61 = VSUB(T4B, T4Y); + STM4(&(ro[22]), T61, ovs, &(ro[0])); + T62 = VSUB(T5b, T5c); + STM4(&(io[22]), T62, ovs, &(io[0])); + T63 = VADD(T4B, T4Y); + STM4(&(ro[6]), T63, ovs, &(ro[0])); + T64 = VADD(T5b, T5c); + STM4(&(io[6]), T64, ovs, &(io[0])); + T65 = VSUB(T55, T56); + STM4(&(io[30]), T65, ovs, &(io[0])); + T66 = VSUB(T57, T5a); + STM4(&(ro[30]), T66, ovs, &(ro[0])); + T67 = VADD(T55, T56); + STM4(&(io[14]), T67, ovs, &(io[0])); + T68 = VADD(T57, T5a); + STM4(&(ro[14]), T68, ovs, &(ro[0])); + } + { + V T5f, T5r, T5u, T5w, T5m, T5q, T5p, T5v; + { + V T5d, T5e, T5s, T5t; + T5d = VADD(T4r, T4s); + T5e = VMUL(LDK(KP707106781), VADD(T53, T52)); + T5f = VADD(T5d, T5e); + T5r = VSUB(T5d, T5e); + T5s = VFNMS(LDK(KP382683432), T5h, VMUL(LDK(KP923879532), T5g)); + T5t = VFMA(LDK(KP923879532), T5k, VMUL(LDK(KP382683432), T5j)); + T5u = VSUB(T5s, T5t); + T5w = VADD(T5s, T5t); + } + { + V T5i, T5l, T5n, T5o; + T5i = VFMA(LDK(KP382683432), T5g, VMUL(LDK(KP923879532), T5h)); + T5l = VFNMS(LDK(KP382683432), T5k, VMUL(LDK(KP923879532), T5j)); + T5m = VADD(T5i, T5l); + T5q = VSUB(T5l, T5i); + T5n = VADD(T50, T4Z); + T5o = VMUL(LDK(KP707106781), VADD(T4w, T4z)); + T5p = VSUB(T5n, T5o); + T5v = VADD(T5n, T5o); + } + T69 = VSUB(T5f, T5m); + STM4(&(ro[18]), T69, ovs, &(ro[0])); + T6a = VSUB(T5v, T5w); + STM4(&(io[18]), T6a, ovs, &(io[0])); + T6b = VADD(T5f, T5m); + STM4(&(ro[2]), T6b, ovs, &(ro[0])); + T6c = VADD(T5v, T5w); + STM4(&(io[2]), T6c, ovs, &(io[0])); + T6d = VSUB(T5p, T5q); + STM4(&(io[26]), T6d, ovs, &(io[0])); + T6e = VSUB(T5r, T5u); + STM4(&(ro[26]), T6e, ovs, &(ro[0])); + T6f = VADD(T5p, T5q); + STM4(&(io[10]), T6f, ovs, &(io[0])); + T6g = VADD(T5r, T5u); + STM4(&(ro[10]), T6g, ovs, &(ro[0])); + } + { + V T5z, T5P, T5S, T5U, T5K, T5O, T5N, T5T; + { + V T5x, T5y, T5Q, T5R; + T5x = VSUB(T7, Te); + T5y = VSUB(T1n, T1u); + T5z = VADD(T5x, T5y); + T5P = VSUB(T5x, T5y); + T5Q = VSUB(T5D, T5A); + T5R = VADD(T5F, T5I); + T5S = VMUL(LDK(KP707106781), VSUB(T5Q, T5R)); + T5U = VMUL(LDK(KP707106781), VADD(T5Q, T5R)); + } + { + V T5E, T5J, T5L, T5M; + T5E = VADD(T5A, T5D); + T5J = VSUB(T5F, T5I); + T5K = VMUL(LDK(KP707106781), VADD(T5E, T5J)); + T5O = VMUL(LDK(KP707106781), VSUB(T5J, T5E)); + T5L = VSUB(T18, T1f); + T5M = VSUB(Tt, Tm); + T5N = VSUB(T5L, T5M); + T5T = VADD(T5M, T5L); + } + T6h = VSUB(T5z, T5K); + STM4(&(ro[20]), T6h, ovs, &(ro[0])); + T6i = VSUB(T5T, T5U); + STM4(&(io[20]), T6i, ovs, &(io[0])); + T6j = VADD(T5z, T5K); + STM4(&(ro[4]), T6j, ovs, &(ro[0])); + T6k = VADD(T5T, T5U); + STM4(&(io[4]), T6k, ovs, &(io[0])); + T6l = VSUB(T5N, T5O); + STM4(&(io[28]), T6l, ovs, &(io[0])); + T6m = VSUB(T5P, T5S); + STM4(&(ro[28]), T6m, ovs, &(ro[0])); + T6n = VADD(T5N, T5O); + STM4(&(io[12]), T6n, ovs, &(io[0])); + T6o = VADD(T5P, T5S); + STM4(&(ro[12]), T6o, ovs, &(ro[0])); + } + { + V Tv, T5V, T5Y, T60, T10, T11, T1w, T5Z; + { + V Tf, Tu, T5W, T5X; + Tf = VADD(T7, Te); + Tu = VADD(Tm, Tt); + Tv = VADD(Tf, Tu); + T5V = VSUB(Tf, Tu); + T5W = VADD(T5B, T5C); + T5X = VADD(T5G, T5H); + T5Y = VSUB(T5W, T5X); + T60 = VADD(T5W, T5X); + } + { + V TK, TZ, T1g, T1v; + TK = VADD(TC, TJ); + TZ = VADD(TR, TY); + T10 = VADD(TK, TZ); + T11 = VSUB(TZ, TK); + T1g = VADD(T18, T1f); + T1v = VADD(T1n, T1u); + T1w = VSUB(T1g, T1v); + T5Z = VADD(T1g, T1v); + } + T6p = VSUB(Tv, T10); + STM4(&(ro[16]), T6p, ovs, &(ro[0])); + T6q = VSUB(T5Z, T60); + STM4(&(io[16]), T6q, ovs, &(io[0])); + T6r = VADD(Tv, T10); + STM4(&(ro[0]), T6r, ovs, &(ro[0])); + T6s = VADD(T5Z, T60); + STM4(&(io[0]), T6s, ovs, &(io[0])); + T6t = VADD(T11, T1w); + STM4(&(io[8]), T6t, ovs, &(io[0])); + T6u = VADD(T5V, T5Y); + STM4(&(ro[8]), T6u, ovs, &(ro[0])); + T6v = VSUB(T1w, T11); + STM4(&(io[24]), T6v, ovs, &(io[0])); + T6w = VSUB(T5V, T5Y); + STM4(&(ro[24]), T6w, ovs, &(ro[0])); + } + { + V T6x, T6y, T6z, T6A, T6B, T6C, T6D, T6E; + { + V T1X, T33, T31, T37, T2o, T34, T2P, T35; + { + V T1H, T1W, T2X, T30; + T1H = VSUB(T1z, T1G); + T1W = VSUB(T1O, T1V); + T1X = VADD(T1H, T1W); + T33 = VSUB(T1H, T1W); + T2X = VSUB(T2T, T2W); + T30 = VSUB(T2Y, T2Z); + T31 = VSUB(T2X, T30); + T37 = VADD(T2X, T30); + } + { + V T2e, T2n, T2F, T2O; + T2e = VSUB(T22, T2d); + T2n = VSUB(T2j, T2m); + T2o = VFMA(LDK(KP980785280), T2e, VMUL(LDK(KP195090322), T2n)); + T34 = VFNMS(LDK(KP980785280), T2n, VMUL(LDK(KP195090322), T2e)); + T2F = VSUB(T2t, T2E); + T2O = VSUB(T2K, T2N); + T2P = VFNMS(LDK(KP980785280), T2O, VMUL(LDK(KP195090322), T2F)); + T35 = VFMA(LDK(KP195090322), T2O, VMUL(LDK(KP980785280), T2F)); + } + { + V T2Q, T38, T32, T36; + T2Q = VADD(T2o, T2P); + T6x = VSUB(T1X, T2Q); + STM4(&(ro[23]), T6x, ovs, &(ro[1])); + T6y = VADD(T1X, T2Q); + STM4(&(ro[7]), T6y, ovs, &(ro[1])); + T38 = VADD(T34, T35); + T6z = VSUB(T37, T38); + STM4(&(io[23]), T6z, ovs, &(io[1])); + T6A = VADD(T37, T38); + STM4(&(io[7]), T6A, ovs, &(io[1])); + T32 = VSUB(T2P, T2o); + T6B = VSUB(T31, T32); + STM4(&(io[31]), T6B, ovs, &(io[1])); + T6C = VADD(T31, T32); + STM4(&(io[15]), T6C, ovs, &(io[1])); + T36 = VSUB(T34, T35); + T6D = VSUB(T33, T36); + STM4(&(ro[31]), T6D, ovs, &(ro[1])); + T6E = VADD(T33, T36); + STM4(&(ro[15]), T6E, ovs, &(ro[1])); + } + } + { + V T3D, T41, T3Z, T45, T3K, T42, T3R, T43; + { + V T3v, T3C, T3V, T3Y; + T3v = VSUB(T3t, T3u); + T3C = VSUB(T3y, T3B); + T3D = VADD(T3v, T3C); + T41 = VSUB(T3v, T3C); + T3V = VSUB(T3T, T3U); + T3Y = VSUB(T3W, T3X); + T3Z = VSUB(T3V, T3Y); + T45 = VADD(T3V, T3Y); + } + { + V T3G, T3J, T3N, T3Q; + T3G = VSUB(T3E, T3F); + T3J = VSUB(T3H, T3I); + T3K = VFMA(LDK(KP555570233), T3G, VMUL(LDK(KP831469612), T3J)); + T42 = VFNMS(LDK(KP831469612), T3G, VMUL(LDK(KP555570233), T3J)); + T3N = VSUB(T3L, T3M); + T3Q = VSUB(T3O, T3P); + T3R = VFNMS(LDK(KP831469612), T3Q, VMUL(LDK(KP555570233), T3N)); + T43 = VFMA(LDK(KP831469612), T3N, VMUL(LDK(KP555570233), T3Q)); + } + { + V T3S, T6F, T6G, T46, T6H, T6I; + T3S = VADD(T3K, T3R); + T6F = VSUB(T3D, T3S); + STM4(&(ro[21]), T6F, ovs, &(ro[1])); + STN4(&(ro[20]), T6h, T6F, T61, T6x, ovs); + T6G = VADD(T3D, T3S); + STM4(&(ro[5]), T6G, ovs, &(ro[1])); + STN4(&(ro[4]), T6j, T6G, T63, T6y, ovs); + T46 = VADD(T42, T43); + T6H = VSUB(T45, T46); + STM4(&(io[21]), T6H, ovs, &(io[1])); + STN4(&(io[20]), T6i, T6H, T62, T6z, ovs); + T6I = VADD(T45, T46); + STM4(&(io[5]), T6I, ovs, &(io[1])); + STN4(&(io[4]), T6k, T6I, T64, T6A, ovs); + } + { + V T40, T6J, T6K, T44, T6L, T6M; + T40 = VSUB(T3R, T3K); + T6J = VSUB(T3Z, T40); + STM4(&(io[29]), T6J, ovs, &(io[1])); + STN4(&(io[28]), T6l, T6J, T65, T6B, ovs); + T6K = VADD(T3Z, T40); + STM4(&(io[13]), T6K, ovs, &(io[1])); + STN4(&(io[12]), T6n, T6K, T67, T6C, ovs); + T44 = VSUB(T42, T43); + T6L = VSUB(T41, T44); + STM4(&(ro[29]), T6L, ovs, &(ro[1])); + STN4(&(ro[28]), T6m, T6L, T66, T6D, ovs); + T6M = VADD(T41, T44); + STM4(&(ro[13]), T6M, ovs, &(ro[1])); + STN4(&(ro[12]), T6o, T6M, T68, T6E, ovs); + } + } + } + { + V T6N, T6O, T6P, T6Q, T6R, T6S, T6T, T6U; + { + V T49, T4l, T4j, T4p, T4c, T4m, T4f, T4n; + { + V T47, T48, T4h, T4i; + T47 = VADD(T3t, T3u); + T48 = VADD(T3X, T3W); + T49 = VADD(T47, T48); + T4l = VSUB(T47, T48); + T4h = VADD(T3T, T3U); + T4i = VADD(T3y, T3B); + T4j = VSUB(T4h, T4i); + T4p = VADD(T4h, T4i); + } + { + V T4a, T4b, T4d, T4e; + T4a = VADD(T3E, T3F); + T4b = VADD(T3H, T3I); + T4c = VFMA(LDK(KP980785280), T4a, VMUL(LDK(KP195090322), T4b)); + T4m = VFNMS(LDK(KP195090322), T4a, VMUL(LDK(KP980785280), T4b)); + T4d = VADD(T3L, T3M); + T4e = VADD(T3O, T3P); + T4f = VFNMS(LDK(KP195090322), T4e, VMUL(LDK(KP980785280), T4d)); + T4n = VFMA(LDK(KP195090322), T4d, VMUL(LDK(KP980785280), T4e)); + } + { + V T4g, T4q, T4k, T4o; + T4g = VADD(T4c, T4f); + T6N = VSUB(T49, T4g); + STM4(&(ro[17]), T6N, ovs, &(ro[1])); + T6O = VADD(T49, T4g); + STM4(&(ro[1]), T6O, ovs, &(ro[1])); + T4q = VADD(T4m, T4n); + T6P = VSUB(T4p, T4q); + STM4(&(io[17]), T6P, ovs, &(io[1])); + T6Q = VADD(T4p, T4q); + STM4(&(io[1]), T6Q, ovs, &(io[1])); + T4k = VSUB(T4f, T4c); + T6R = VSUB(T4j, T4k); + STM4(&(io[25]), T6R, ovs, &(io[1])); + T6S = VADD(T4j, T4k); + STM4(&(io[9]), T6S, ovs, &(io[1])); + T4o = VSUB(T4m, T4n); + T6T = VSUB(T4l, T4o); + STM4(&(ro[25]), T6T, ovs, &(ro[1])); + T6U = VADD(T4l, T4o); + STM4(&(ro[9]), T6U, ovs, &(ro[1])); + } + } + { + V T3b, T3n, T3l, T3r, T3e, T3o, T3h, T3p; + { + V T39, T3a, T3j, T3k; + T39 = VADD(T1z, T1G); + T3a = VADD(T2Z, T2Y); + T3b = VADD(T39, T3a); + T3n = VSUB(T39, T3a); + T3j = VADD(T2T, T2W); + T3k = VADD(T1O, T1V); + T3l = VSUB(T3j, T3k); + T3r = VADD(T3j, T3k); + } + { + V T3c, T3d, T3f, T3g; + T3c = VADD(T22, T2d); + T3d = VADD(T2j, T2m); + T3e = VFMA(LDK(KP555570233), T3c, VMUL(LDK(KP831469612), T3d)); + T3o = VFNMS(LDK(KP555570233), T3d, VMUL(LDK(KP831469612), T3c)); + T3f = VADD(T2t, T2E); + T3g = VADD(T2K, T2N); + T3h = VFNMS(LDK(KP555570233), T3g, VMUL(LDK(KP831469612), T3f)); + T3p = VFMA(LDK(KP831469612), T3g, VMUL(LDK(KP555570233), T3f)); + } + { + V T3i, T6V, T6W, T3s, T6X, T6Y; + T3i = VADD(T3e, T3h); + T6V = VSUB(T3b, T3i); + STM4(&(ro[19]), T6V, ovs, &(ro[1])); + STN4(&(ro[16]), T6p, T6N, T69, T6V, ovs); + T6W = VADD(T3b, T3i); + STM4(&(ro[3]), T6W, ovs, &(ro[1])); + STN4(&(ro[0]), T6r, T6O, T6b, T6W, ovs); + T3s = VADD(T3o, T3p); + T6X = VSUB(T3r, T3s); + STM4(&(io[19]), T6X, ovs, &(io[1])); + STN4(&(io[16]), T6q, T6P, T6a, T6X, ovs); + T6Y = VADD(T3r, T3s); + STM4(&(io[3]), T6Y, ovs, &(io[1])); + STN4(&(io[0]), T6s, T6Q, T6c, T6Y, ovs); + } + { + V T3m, T6Z, T70, T3q, T71, T72; + T3m = VSUB(T3h, T3e); + T6Z = VSUB(T3l, T3m); + STM4(&(io[27]), T6Z, ovs, &(io[1])); + STN4(&(io[24]), T6v, T6R, T6d, T6Z, ovs); + T70 = VADD(T3l, T3m); + STM4(&(io[11]), T70, ovs, &(io[1])); + STN4(&(io[8]), T6t, T6S, T6f, T70, ovs); + T3q = VSUB(T3o, T3p); + T71 = VSUB(T3n, T3q); + STM4(&(ro[27]), T71, ovs, &(ro[1])); + STN4(&(ro[24]), T6w, T6T, T6e, T71, ovs); + T72 = VADD(T3n, T3q); + STM4(&(ro[11]), T72, ovs, &(ro[1])); + STN4(&(ro[8]), T6u, T6U, T6g, T72, ovs); + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 32, XSIMD_STRING("n2sv_32"), {340, 52, 32, 0}, &GENUS, 0, 1, 0, 0 }; + +void XSIMD(codelet_n2sv_32) (planner *p) { + X(kdft_register) (p, n2sv_32, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2sv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2sv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:03 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name n2sv_4 -with-ostride 1 -include n2s.h -store-multiple 4 */ + +/* + * This function contains 16 FP additions, 0 FP multiplications, + * (or, 16 additions, 0 multiplications, 0 fused multiply/add), + * 25 stack variables, 0 constants, and 18 memory accesses + */ +#include "n2s.h" + +static void n2sv_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + for (i = v; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * ivs), ii = ii + ((2 * VL) * ivs), ro = ro + ((2 * VL) * ovs), io = io + ((2 * VL) * ovs), MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) { + V T1, T2, T7, T8, T4, T5, Tc, Td; + T1 = LD(&(ri[0]), ivs, &(ri[0])); + T2 = LD(&(ri[WS(is, 2)]), ivs, &(ri[0])); + T7 = LD(&(ii[0]), ivs, &(ii[0])); + T8 = LD(&(ii[WS(is, 2)]), ivs, &(ii[0])); + T4 = LD(&(ri[WS(is, 1)]), ivs, &(ri[WS(is, 1)])); + T5 = LD(&(ri[WS(is, 3)]), ivs, &(ri[WS(is, 1)])); + Tc = LD(&(ii[WS(is, 1)]), ivs, &(ii[WS(is, 1)])); + Td = LD(&(ii[WS(is, 3)]), ivs, &(ii[WS(is, 1)])); + { + V T3, Tb, T9, Tf, T6, Ta, Te, Tg; + T3 = VADD(T1, T2); + Tb = VSUB(T1, T2); + T9 = VSUB(T7, T8); + Tf = VADD(T7, T8); + T6 = VADD(T4, T5); + Ta = VSUB(T4, T5); + Te = VSUB(Tc, Td); + Tg = VADD(Tc, Td); + { + V Th, Ti, Tj, Tk; + Th = VADD(Ta, T9); + STM4(&(io[3]), Th, ovs, &(io[1])); + Ti = VSUB(T9, Ta); + STM4(&(io[1]), Ti, ovs, &(io[1])); + Tj = VADD(T3, T6); + STM4(&(ro[0]), Tj, ovs, &(ro[0])); + Tk = VSUB(T3, T6); + STM4(&(ro[2]), Tk, ovs, &(ro[0])); + { + V Tl, Tm, Tn, To; + Tl = VADD(Tf, Tg); + STM4(&(io[0]), Tl, ovs, &(io[0])); + Tm = VSUB(Tf, Tg); + STM4(&(io[2]), Tm, ovs, &(io[0])); + STN4(&(io[0]), Tl, Ti, Tm, Th, ovs); + Tn = VSUB(Tb, Te); + STM4(&(ro[3]), Tn, ovs, &(ro[1])); + To = VADD(Tb, Te); + STM4(&(ro[1]), To, ovs, &(ro[1])); + STN4(&(ro[0]), Tj, To, Tk, Tn, ovs); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 4, XSIMD_STRING("n2sv_4"), {16, 0, 0, 0}, &GENUS, 0, 1, 0, 0 }; + +void XSIMD(codelet_n2sv_4) (planner *p) { + X(kdft_register) (p, n2sv_4, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name n2sv_4 -with-ostride 1 -include n2s.h -store-multiple 4 */ + +/* + * This function contains 16 FP additions, 0 FP multiplications, + * (or, 16 additions, 0 multiplications, 0 fused multiply/add), + * 17 stack variables, 0 constants, and 18 memory accesses + */ +#include "n2s.h" + +static void n2sv_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + { + INT i; + for (i = v; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * ivs), ii = ii + ((2 * VL) * ivs), ro = ro + ((2 * VL) * ovs), io = io + ((2 * VL) * ovs), MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) { + V T3, Tb, T9, Tf, T6, Ta, Te, Tg; + { + V T1, T2, T7, T8; + T1 = LD(&(ri[0]), ivs, &(ri[0])); + T2 = LD(&(ri[WS(is, 2)]), ivs, &(ri[0])); + T3 = VADD(T1, T2); + Tb = VSUB(T1, T2); + T7 = LD(&(ii[0]), ivs, &(ii[0])); + T8 = LD(&(ii[WS(is, 2)]), ivs, &(ii[0])); + T9 = VSUB(T7, T8); + Tf = VADD(T7, T8); + } + { + V T4, T5, Tc, Td; + T4 = LD(&(ri[WS(is, 1)]), ivs, &(ri[WS(is, 1)])); + T5 = LD(&(ri[WS(is, 3)]), ivs, &(ri[WS(is, 1)])); + T6 = VADD(T4, T5); + Ta = VSUB(T4, T5); + Tc = LD(&(ii[WS(is, 1)]), ivs, &(ii[WS(is, 1)])); + Td = LD(&(ii[WS(is, 3)]), ivs, &(ii[WS(is, 1)])); + Te = VSUB(Tc, Td); + Tg = VADD(Tc, Td); + } + { + V Th, Ti, Tj, Tk; + Th = VSUB(T3, T6); + STM4(&(ro[2]), Th, ovs, &(ro[0])); + Ti = VSUB(Tf, Tg); + STM4(&(io[2]), Ti, ovs, &(io[0])); + Tj = VADD(T3, T6); + STM4(&(ro[0]), Tj, ovs, &(ro[0])); + Tk = VADD(Tf, Tg); + STM4(&(io[0]), Tk, ovs, &(io[0])); + { + V Tl, Tm, Tn, To; + Tl = VSUB(T9, Ta); + STM4(&(io[1]), Tl, ovs, &(io[1])); + Tm = VADD(Tb, Te); + STM4(&(ro[1]), Tm, ovs, &(ro[1])); + Tn = VADD(Ta, T9); + STM4(&(io[3]), Tn, ovs, &(io[1])); + STN4(&(io[0]), Tk, Tl, Ti, Tn, ovs); + To = VSUB(Tb, Te); + STM4(&(ro[3]), To, ovs, &(ro[1])); + STN4(&(ro[0]), Tj, Tm, Th, To, ovs); + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 4, XSIMD_STRING("n2sv_4"), {16, 0, 0, 0}, &GENUS, 0, 1, 0, 0 }; + +void XSIMD(codelet_n2sv_4) (planner *p) { + X(kdft_register) (p, n2sv_4, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2sv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2sv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3303 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:07 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name n2sv_64 -with-ostride 1 -include n2s.h -store-multiple 4 */ + +/* + * This function contains 912 FP additions, 392 FP multiplications, + * (or, 520 additions, 0 multiplications, 392 fused multiply/add), + * 310 stack variables, 15 constants, and 288 memory accesses + */ +#include "n2s.h" + +static void n2sv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP534511135, +0.534511135950791641089685961295362908582039528); + DVK(KP303346683, +0.303346683607342391675883946941299872384187453); + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP820678790, +0.820678790828660330972281985331011598767386482); + DVK(KP098491403, +0.098491403357164253077197521291327432293052451); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT i; + for (i = v; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * ivs), ii = ii + ((2 * VL) * ivs), ro = ro + ((2 * VL) * ovs), io = io + ((2 * VL) * ovs), MAKE_VOLATILE_STRIDE(256, is), MAKE_VOLATILE_STRIDE(256, os)) { + V TeJ, TeK, TeP, TeQ, TfH, TfI, TfJ, TfK, Tgj, Tgk, Tgv, Tgw, T9a, T99, T9e; + V T9b; + { + V T7B, T37, T5Z, T8F, TbB, TcB, Tf, Td9, T62, T7C, T2i, TdH, Tcb, Tah, T8G; + V T3e, Tak, TbC, T65, T3m, TdI, Tu, Tda, T2x, TbD, Tan, T8I, T7G, T8J, T7J; + V T64, T3t, Tas, Tce, TK, Tdd, Tav, Tcf, Tdc, T2N, T3G, T6G, T9k, T7O, T9l; + V T7R, T6H, T3N, T1L, TdA, Tdx, Teo, Tbs, Tct, T5Q, T6V, T8y, T9z, T5j, T6Y; + V Tbb, Tcw, T8n, T9C, Tch, Taz, Tdf, TZ, Tdg, T32, Tci, TaC, T6J, T3Z, T9n; + V T7V, T9o, T7Y, T6K, T46, Tdp, T1g, Tej, Tdm, Tcm, Tb1, Tcp, TaK, T6O, T4X; + V T9s, T8f, T6R, T4q, T9v, T84, Tdn, T1v, Tek, Tds, Tcn, TaV, Tcq, Tb4, T9t; + V T8b, T9w, T8i, T6S, T50, T6P, T4N, T5k, T1V, T1S, TdB, Tbi, T5s, Tbt, Tbg; + V T5F, T5R, T5p, T1Y, Tbj, T5n, T8z, T8q; + { + V Tba, T57, T8l, Tb7, T5M, T8w, T8m, T5P, T8x, T5i; + { + V T2p, T7F, T7E, Tal, T2w, Tam, T3s, T7H, T7I, T3p, T3d, T3a; + { + V T8, T35, T3, T5Y, T26, T5X, T6, T36, T29, T9, T2b, T2c, Tb, Tc, T2e; + V T2f; + { + V T1, T2, T24, T25, T4, T5, T27, T28; + T1 = LD(&(ri[0]), ivs, &(ri[0])); + T2 = LD(&(ri[WS(is, 32)]), ivs, &(ri[0])); + T24 = LD(&(ii[0]), ivs, &(ii[0])); + T25 = LD(&(ii[WS(is, 32)]), ivs, &(ii[0])); + T4 = LD(&(ri[WS(is, 16)]), ivs, &(ri[0])); + T5 = LD(&(ri[WS(is, 48)]), ivs, &(ri[0])); + T27 = LD(&(ii[WS(is, 16)]), ivs, &(ii[0])); + T28 = LD(&(ii[WS(is, 48)]), ivs, &(ii[0])); + T8 = LD(&(ri[WS(is, 8)]), ivs, &(ri[0])); + T35 = VSUB(T1, T2); + T3 = VADD(T1, T2); + T5Y = VSUB(T24, T25); + T26 = VADD(T24, T25); + T5X = VSUB(T4, T5); + T6 = VADD(T4, T5); + T36 = VSUB(T27, T28); + T29 = VADD(T27, T28); + T9 = LD(&(ri[WS(is, 40)]), ivs, &(ri[0])); + T2b = LD(&(ii[WS(is, 8)]), ivs, &(ii[0])); + T2c = LD(&(ii[WS(is, 40)]), ivs, &(ii[0])); + Tb = LD(&(ri[WS(is, 56)]), ivs, &(ri[0])); + Tc = LD(&(ri[WS(is, 24)]), ivs, &(ri[0])); + T2e = LD(&(ii[WS(is, 56)]), ivs, &(ii[0])); + T2f = LD(&(ii[WS(is, 24)]), ivs, &(ii[0])); + } + { + V T39, Ta, T38, T2d, T3b, Td, T3c, T2g, Taf, T7; + T7B = VADD(T35, T36); + T37 = VSUB(T35, T36); + T39 = VSUB(T8, T9); + Ta = VADD(T8, T9); + T38 = VSUB(T2b, T2c); + T2d = VADD(T2b, T2c); + T3b = VSUB(Tb, Tc); + Td = VADD(Tb, Tc); + T3c = VSUB(T2e, T2f); + T2g = VADD(T2e, T2f); + T5Z = VADD(T5X, T5Y); + T8F = VSUB(T5Y, T5X); + Taf = VSUB(T3, T6); + T7 = VADD(T3, T6); + { + V TbA, T2a, Te, Tbz, T60, T61, T2h, Tag; + TbA = VSUB(T26, T29); + T2a = VADD(T26, T29); + Te = VADD(Ta, Td); + Tbz = VSUB(Td, Ta); + T3d = VADD(T3b, T3c); + T60 = VSUB(T3b, T3c); + T61 = VADD(T39, T38); + T3a = VSUB(T38, T39); + T2h = VADD(T2d, T2g); + Tag = VSUB(T2d, T2g); + TbB = VADD(Tbz, TbA); + TcB = VSUB(TbA, Tbz); + Tf = VADD(T7, Te); + Td9 = VSUB(T7, Te); + T62 = VSUB(T60, T61); + T7C = VADD(T61, T60); + T2i = VADD(T2a, T2h); + TdH = VSUB(T2a, T2h); + Tcb = VSUB(Taf, Tag); + Tah = VADD(Taf, Tag); + } + } + } + { + V T3j, Ti, T3h, T2l, T3g, Tl, T2t, T3k, T2o, T3q, Tp, T3o, T2s, T3n, Ts; + V T2u, T2m, T2n; + { + V Tg, Th, T2j, T2k, Tj, Tk; + Tg = LD(&(ri[WS(is, 4)]), ivs, &(ri[0])); + Th = LD(&(ri[WS(is, 36)]), ivs, &(ri[0])); + T2j = LD(&(ii[WS(is, 4)]), ivs, &(ii[0])); + T2k = LD(&(ii[WS(is, 36)]), ivs, &(ii[0])); + Tj = LD(&(ri[WS(is, 20)]), ivs, &(ri[0])); + Tk = LD(&(ri[WS(is, 52)]), ivs, &(ri[0])); + T2m = LD(&(ii[WS(is, 20)]), ivs, &(ii[0])); + T8G = VADD(T3a, T3d); + T3e = VSUB(T3a, T3d); + T3j = VSUB(Tg, Th); + Ti = VADD(Tg, Th); + T3h = VSUB(T2j, T2k); + T2l = VADD(T2j, T2k); + T3g = VSUB(Tj, Tk); + Tl = VADD(Tj, Tk); + T2n = LD(&(ii[WS(is, 52)]), ivs, &(ii[0])); + } + { + V Tn, To, T2q, T2r, Tq, Tr; + Tn = LD(&(ri[WS(is, 60)]), ivs, &(ri[0])); + To = LD(&(ri[WS(is, 28)]), ivs, &(ri[0])); + T2q = LD(&(ii[WS(is, 60)]), ivs, &(ii[0])); + T2r = LD(&(ii[WS(is, 28)]), ivs, &(ii[0])); + Tq = LD(&(ri[WS(is, 12)]), ivs, &(ri[0])); + Tr = LD(&(ri[WS(is, 44)]), ivs, &(ri[0])); + T2t = LD(&(ii[WS(is, 12)]), ivs, &(ii[0])); + T3k = VSUB(T2m, T2n); + T2o = VADD(T2m, T2n); + T3q = VSUB(Tn, To); + Tp = VADD(Tn, To); + T3o = VSUB(T2q, T2r); + T2s = VADD(T2q, T2r); + T3n = VSUB(Tq, Tr); + Ts = VADD(Tq, Tr); + T2u = LD(&(ii[WS(is, 44)]), ivs, &(ii[0])); + } + { + V Tai, Tm, Taj, T3r; + Tai = VSUB(Ti, Tl); + Tm = VADD(Ti, Tl); + T2p = VADD(T2l, T2o); + Taj = VSUB(T2l, T2o); + { + V T3i, T3l, Tt, T2v; + T7F = VSUB(T3h, T3g); + T3i = VADD(T3g, T3h); + T3l = VSUB(T3j, T3k); + T7E = VADD(T3j, T3k); + Tt = VADD(Tp, Ts); + Tal = VSUB(Tp, Ts); + T2v = VADD(T2t, T2u); + T3r = VSUB(T2t, T2u); + Tak = VADD(Tai, Taj); + TbC = VSUB(Taj, Tai); + T65 = VFNMS(LDK(KP414213562), T3i, T3l); + T3m = VFMA(LDK(KP414213562), T3l, T3i); + TdI = VSUB(Tt, Tm); + Tu = VADD(Tm, Tt); + T2w = VADD(T2s, T2v); + Tam = VSUB(T2s, T2v); + } + T3s = VSUB(T3q, T3r); + T7H = VADD(T3q, T3r); + T7I = VSUB(T3o, T3n); + T3p = VADD(T3n, T3o); + } + } + { + V T7M, T7Q, T7N, T3M, T3J, T7P; + { + V TG, T3H, Ty, T3x, T2B, T3w, TB, T3I, T2E, TH, T2J, T2K, TD, TE, T2G; + V T2H; + { + V Tw, Tx, T2z, T2A, Tz, TA, T2C, T2D; + Tw = LD(&(ri[WS(is, 2)]), ivs, &(ri[0])); + Tda = VSUB(T2p, T2w); + T2x = VADD(T2p, T2w); + TbD = VADD(Tal, Tam); + Tan = VSUB(Tal, Tam); + T8I = VFNMS(LDK(KP414213562), T7E, T7F); + T7G = VFMA(LDK(KP414213562), T7F, T7E); + T8J = VFMA(LDK(KP414213562), T7H, T7I); + T7J = VFNMS(LDK(KP414213562), T7I, T7H); + T64 = VFMA(LDK(KP414213562), T3p, T3s); + T3t = VFNMS(LDK(KP414213562), T3s, T3p); + Tx = LD(&(ri[WS(is, 34)]), ivs, &(ri[0])); + T2z = LD(&(ii[WS(is, 2)]), ivs, &(ii[0])); + T2A = LD(&(ii[WS(is, 34)]), ivs, &(ii[0])); + Tz = LD(&(ri[WS(is, 18)]), ivs, &(ri[0])); + TA = LD(&(ri[WS(is, 50)]), ivs, &(ri[0])); + T2C = LD(&(ii[WS(is, 18)]), ivs, &(ii[0])); + T2D = LD(&(ii[WS(is, 50)]), ivs, &(ii[0])); + TG = LD(&(ri[WS(is, 58)]), ivs, &(ri[0])); + T3H = VSUB(Tw, Tx); + Ty = VADD(Tw, Tx); + T3x = VSUB(T2z, T2A); + T2B = VADD(T2z, T2A); + T3w = VSUB(Tz, TA); + TB = VADD(Tz, TA); + T3I = VSUB(T2C, T2D); + T2E = VADD(T2C, T2D); + TH = LD(&(ri[WS(is, 26)]), ivs, &(ri[0])); + T2J = LD(&(ii[WS(is, 58)]), ivs, &(ii[0])); + T2K = LD(&(ii[WS(is, 26)]), ivs, &(ii[0])); + TD = LD(&(ri[WS(is, 10)]), ivs, &(ri[0])); + TE = LD(&(ri[WS(is, 42)]), ivs, &(ri[0])); + T2G = LD(&(ii[WS(is, 10)]), ivs, &(ii[0])); + T2H = LD(&(ii[WS(is, 42)]), ivs, &(ii[0])); + } + { + V Tat, TC, Tar, T2F, T3K, T3E, TJ, Taq, T2M, Tau, T3B, T3L, T3y, T3F; + { + V TI, T3C, T2L, T3D, TF, T3z, T2I, T3A; + Tat = VSUB(Ty, TB); + TC = VADD(Ty, TB); + TI = VADD(TG, TH); + T3C = VSUB(TG, TH); + T2L = VADD(T2J, T2K); + T3D = VSUB(T2J, T2K); + TF = VADD(TD, TE); + T3z = VSUB(TD, TE); + T2I = VADD(T2G, T2H); + T3A = VSUB(T2G, T2H); + Tar = VSUB(T2B, T2E); + T2F = VADD(T2B, T2E); + T3K = VADD(T3C, T3D); + T3E = VSUB(T3C, T3D); + TJ = VADD(TF, TI); + Taq = VSUB(TI, TF); + T2M = VADD(T2I, T2L); + Tau = VSUB(T2I, T2L); + T3B = VADD(T3z, T3A); + T3L = VSUB(T3A, T3z); + } + T7M = VSUB(T3x, T3w); + T3y = VADD(T3w, T3x); + Tas = VADD(Taq, Tar); + Tce = VSUB(Tar, Taq); + TK = VADD(TC, TJ); + Tdd = VSUB(TC, TJ); + Tav = VADD(Tat, Tau); + Tcf = VSUB(Tat, Tau); + T7Q = VADD(T3B, T3E); + T3F = VSUB(T3B, T3E); + Tdc = VSUB(T2F, T2M); + T2N = VADD(T2F, T2M); + T7N = VADD(T3L, T3K); + T3M = VSUB(T3K, T3L); + T3J = VSUB(T3H, T3I); + T7P = VADD(T3H, T3I); + T3G = VFNMS(LDK(KP707106781), T3F, T3y); + T6G = VFMA(LDK(KP707106781), T3F, T3y); + } + } + { + V T1H, T5I, T1z, Tb8, T56, T53, T1C, Tb9, T5L, T1I, T5e, T5f, T1E, T1F, T59; + V T5a; + { + V T1x, T1y, T54, T55, T1A, T1B, T5J, T5K; + T1x = LD(&(ri[WS(is, 63)]), ivs, &(ri[WS(is, 1)])); + T9k = VFNMS(LDK(KP707106781), T7N, T7M); + T7O = VFMA(LDK(KP707106781), T7N, T7M); + T9l = VFNMS(LDK(KP707106781), T7Q, T7P); + T7R = VFMA(LDK(KP707106781), T7Q, T7P); + T6H = VFMA(LDK(KP707106781), T3M, T3J); + T3N = VFNMS(LDK(KP707106781), T3M, T3J); + T1y = LD(&(ri[WS(is, 31)]), ivs, &(ri[WS(is, 1)])); + T54 = LD(&(ii[WS(is, 63)]), ivs, &(ii[WS(is, 1)])); + T55 = LD(&(ii[WS(is, 31)]), ivs, &(ii[WS(is, 1)])); + T1A = LD(&(ri[WS(is, 15)]), ivs, &(ri[WS(is, 1)])); + T1B = LD(&(ri[WS(is, 47)]), ivs, &(ri[WS(is, 1)])); + T5J = LD(&(ii[WS(is, 15)]), ivs, &(ii[WS(is, 1)])); + T5K = LD(&(ii[WS(is, 47)]), ivs, &(ii[WS(is, 1)])); + T1H = LD(&(ri[WS(is, 55)]), ivs, &(ri[WS(is, 1)])); + T5I = VSUB(T1x, T1y); + T1z = VADD(T1x, T1y); + Tb8 = VADD(T54, T55); + T56 = VSUB(T54, T55); + T53 = VSUB(T1A, T1B); + T1C = VADD(T1A, T1B); + Tb9 = VADD(T5J, T5K); + T5L = VSUB(T5J, T5K); + T1I = LD(&(ri[WS(is, 23)]), ivs, &(ri[WS(is, 1)])); + T5e = LD(&(ii[WS(is, 55)]), ivs, &(ii[WS(is, 1)])); + T5f = LD(&(ii[WS(is, 23)]), ivs, &(ii[WS(is, 1)])); + T1E = LD(&(ri[WS(is, 7)]), ivs, &(ri[WS(is, 1)])); + T1F = LD(&(ri[WS(is, 39)]), ivs, &(ri[WS(is, 1)])); + T59 = LD(&(ii[WS(is, 7)]), ivs, &(ii[WS(is, 1)])); + T5a = LD(&(ii[WS(is, 39)]), ivs, &(ii[WS(is, 1)])); + } + { + V Tbo, T1D, Tdv, T5h, T5N, T1K, Tdw, Tbr, T5O, T5c; + { + V T1J, T5d, Tbq, T5g, T1G, T58, Tbp, T5b; + Tbo = VSUB(T1z, T1C); + T1D = VADD(T1z, T1C); + T1J = VADD(T1H, T1I); + T5d = VSUB(T1H, T1I); + Tbq = VADD(T5e, T5f); + T5g = VSUB(T5e, T5f); + T1G = VADD(T1E, T1F); + T58 = VSUB(T1E, T1F); + Tbp = VADD(T59, T5a); + T5b = VSUB(T59, T5a); + Tba = VSUB(Tb8, Tb9); + Tdv = VADD(Tb8, Tb9); + T57 = VADD(T53, T56); + T8l = VSUB(T56, T53); + T5h = VSUB(T5d, T5g); + T5N = VADD(T5d, T5g); + Tb7 = VSUB(T1J, T1G); + T1K = VADD(T1G, T1J); + Tdw = VADD(Tbp, Tbq); + Tbr = VSUB(Tbp, Tbq); + T5O = VSUB(T5b, T58); + T5c = VADD(T58, T5b); + } + T5M = VSUB(T5I, T5L); + T8w = VADD(T5I, T5L); + T1L = VADD(T1D, T1K); + TdA = VSUB(T1D, T1K); + Tdx = VSUB(Tdv, Tdw); + Teo = VADD(Tdv, Tdw); + Tbs = VADD(Tbo, Tbr); + Tct = VSUB(Tbo, Tbr); + T8m = VADD(T5O, T5N); + T5P = VSUB(T5N, T5O); + T8x = VADD(T5c, T5h); + T5i = VSUB(T5c, T5h); + } + } + } + } + { + V T4e, T82, T8d, T4T, T4W, T83, T4p, T8e; + { + V T7T, T3R, T42, T7W, T3Y, T7X, T45, T7U; + { + V T40, TN, T2Y, T3Q, T2Q, T3P, TQ, T41, T2T, T3V, TX, T2Z, TS, TT, T2V; + V T2W; + { + V T2O, T2P, TO, TP, TL, TM; + TL = LD(&(ri[WS(is, 62)]), ivs, &(ri[0])); + TM = LD(&(ri[WS(is, 30)]), ivs, &(ri[0])); + T5Q = VFNMS(LDK(KP707106781), T5P, T5M); + T6V = VFMA(LDK(KP707106781), T5P, T5M); + T8y = VFMA(LDK(KP707106781), T8x, T8w); + T9z = VFNMS(LDK(KP707106781), T8x, T8w); + T5j = VFNMS(LDK(KP707106781), T5i, T57); + T6Y = VFMA(LDK(KP707106781), T5i, T57); + Tbb = VADD(Tb7, Tba); + Tcw = VSUB(Tba, Tb7); + T8n = VFMA(LDK(KP707106781), T8m, T8l); + T9C = VFNMS(LDK(KP707106781), T8m, T8l); + T40 = VSUB(TL, TM); + TN = VADD(TL, TM); + T2O = LD(&(ii[WS(is, 62)]), ivs, &(ii[0])); + T2P = LD(&(ii[WS(is, 30)]), ivs, &(ii[0])); + TO = LD(&(ri[WS(is, 14)]), ivs, &(ri[0])); + TP = LD(&(ri[WS(is, 46)]), ivs, &(ri[0])); + { + V T2R, T2S, TV, TW; + T2R = LD(&(ii[WS(is, 14)]), ivs, &(ii[0])); + T2S = LD(&(ii[WS(is, 46)]), ivs, &(ii[0])); + TV = LD(&(ri[WS(is, 54)]), ivs, &(ri[0])); + TW = LD(&(ri[WS(is, 22)]), ivs, &(ri[0])); + T2Y = LD(&(ii[WS(is, 54)]), ivs, &(ii[0])); + T3Q = VSUB(T2O, T2P); + T2Q = VADD(T2O, T2P); + T3P = VSUB(TO, TP); + TQ = VADD(TO, TP); + T41 = VSUB(T2R, T2S); + T2T = VADD(T2R, T2S); + T3V = VSUB(TV, TW); + TX = VADD(TV, TW); + T2Z = LD(&(ii[WS(is, 22)]), ivs, &(ii[0])); + TS = LD(&(ri[WS(is, 6)]), ivs, &(ri[0])); + TT = LD(&(ri[WS(is, 38)]), ivs, &(ri[0])); + T2V = LD(&(ii[WS(is, 6)]), ivs, &(ii[0])); + T2W = LD(&(ii[WS(is, 38)]), ivs, &(ii[0])); + } + } + { + V TaA, TR, Tay, T2U, T3W, T30, TU, T3S, T2X, T3T; + TaA = VSUB(TN, TQ); + TR = VADD(TN, TQ); + Tay = VSUB(T2Q, T2T); + T2U = VADD(T2Q, T2T); + T3W = VSUB(T2Y, T2Z); + T30 = VADD(T2Y, T2Z); + TU = VADD(TS, TT); + T3S = VSUB(TS, TT); + T2X = VADD(T2V, T2W); + T3T = VSUB(T2V, T2W); + { + V T3X, T43, Tax, TY, T31, TaB, T3U, T44; + T7T = VSUB(T3Q, T3P); + T3R = VADD(T3P, T3Q); + T3X = VSUB(T3V, T3W); + T43 = VADD(T3V, T3W); + Tax = VSUB(TX, TU); + TY = VADD(TU, TX); + T31 = VADD(T2X, T30); + TaB = VSUB(T2X, T30); + T3U = VADD(T3S, T3T); + T44 = VSUB(T3T, T3S); + T42 = VSUB(T40, T41); + T7W = VADD(T40, T41); + Tch = VSUB(Tay, Tax); + Taz = VADD(Tax, Tay); + Tdf = VSUB(TR, TY); + TZ = VADD(TR, TY); + Tdg = VSUB(T2U, T31); + T32 = VADD(T2U, T31); + Tci = VSUB(TaA, TaB); + TaC = VADD(TaA, TaB); + T3Y = VSUB(T3U, T3X); + T7X = VADD(T3U, T3X); + T45 = VSUB(T43, T44); + T7U = VADD(T44, T43); + } + } + } + { + V T4P, T14, T4l, TaH, T4d, T4a, T17, TaI, T4S, T4k, T1e, T4m, T19, T1a, T4g; + V T4h; + { + V T4b, T4c, T15, T16, T12, T13; + T12 = LD(&(ri[WS(is, 1)]), ivs, &(ri[WS(is, 1)])); + T13 = LD(&(ri[WS(is, 33)]), ivs, &(ri[WS(is, 1)])); + T4b = LD(&(ii[WS(is, 1)]), ivs, &(ii[WS(is, 1)])); + T6J = VFMA(LDK(KP707106781), T3Y, T3R); + T3Z = VFNMS(LDK(KP707106781), T3Y, T3R); + T9n = VFNMS(LDK(KP707106781), T7U, T7T); + T7V = VFMA(LDK(KP707106781), T7U, T7T); + T9o = VFNMS(LDK(KP707106781), T7X, T7W); + T7Y = VFMA(LDK(KP707106781), T7X, T7W); + T6K = VFMA(LDK(KP707106781), T45, T42); + T46 = VFNMS(LDK(KP707106781), T45, T42); + T4P = VSUB(T12, T13); + T14 = VADD(T12, T13); + T4c = LD(&(ii[WS(is, 33)]), ivs, &(ii[WS(is, 1)])); + T15 = LD(&(ri[WS(is, 17)]), ivs, &(ri[WS(is, 1)])); + T16 = LD(&(ri[WS(is, 49)]), ivs, &(ri[WS(is, 1)])); + { + V T4Q, T4R, T1c, T1d; + T4Q = LD(&(ii[WS(is, 17)]), ivs, &(ii[WS(is, 1)])); + T4R = LD(&(ii[WS(is, 49)]), ivs, &(ii[WS(is, 1)])); + T1c = LD(&(ri[WS(is, 57)]), ivs, &(ri[WS(is, 1)])); + T1d = LD(&(ri[WS(is, 25)]), ivs, &(ri[WS(is, 1)])); + T4l = LD(&(ii[WS(is, 57)]), ivs, &(ii[WS(is, 1)])); + TaH = VADD(T4b, T4c); + T4d = VSUB(T4b, T4c); + T4a = VSUB(T15, T16); + T17 = VADD(T15, T16); + TaI = VADD(T4Q, T4R); + T4S = VSUB(T4Q, T4R); + T4k = VSUB(T1c, T1d); + T1e = VADD(T1c, T1d); + T4m = LD(&(ii[WS(is, 25)]), ivs, &(ii[WS(is, 1)])); + T19 = LD(&(ri[WS(is, 9)]), ivs, &(ri[WS(is, 1)])); + T1a = LD(&(ri[WS(is, 41)]), ivs, &(ri[WS(is, 1)])); + T4g = LD(&(ii[WS(is, 9)]), ivs, &(ii[WS(is, 1)])); + T4h = LD(&(ii[WS(is, 41)]), ivs, &(ii[WS(is, 1)])); + } + } + { + V TaX, T18, T4n, TaZ, TaJ, Tdk, T1b, T4f, TaY, T4i; + TaX = VSUB(T14, T17); + T18 = VADD(T14, T17); + T4n = VSUB(T4l, T4m); + TaZ = VADD(T4l, T4m); + TaJ = VSUB(TaH, TaI); + Tdk = VADD(TaH, TaI); + T1b = VADD(T19, T1a); + T4f = VSUB(T19, T1a); + TaY = VADD(T4g, T4h); + T4i = VSUB(T4g, T4h); + T4e = VADD(T4a, T4d); + T82 = VSUB(T4d, T4a); + { + V T4U, T4o, T1f, TaG, Tdl, Tb0, T4V, T4j; + T8d = VADD(T4P, T4S); + T4T = VSUB(T4P, T4S); + T4U = VADD(T4k, T4n); + T4o = VSUB(T4k, T4n); + T1f = VADD(T1b, T1e); + TaG = VSUB(T1e, T1b); + Tdl = VADD(TaY, TaZ); + Tb0 = VSUB(TaY, TaZ); + T4V = VSUB(T4i, T4f); + T4j = VADD(T4f, T4i); + Tdp = VSUB(T18, T1f); + T1g = VADD(T18, T1f); + Tej = VADD(Tdk, Tdl); + Tdm = VSUB(Tdk, Tdl); + Tcm = VSUB(TaX, Tb0); + Tb1 = VADD(TaX, Tb0); + T4W = VSUB(T4U, T4V); + T83 = VADD(T4V, T4U); + T4p = VSUB(T4j, T4o); + T8e = VADD(T4j, T4o); + Tcp = VSUB(TaJ, TaG); + TaK = VADD(TaG, TaJ); + } + } + } + } + { + V T1n, Tdq, T4r, T1q, TaR, T4z, Tb2, TaP, T4M, T4Y, T4w, T1t, TaS, T4u, T8g; + V T87; + { + V T1r, T85, T4L, TaO, TaN, T86, T4G, T1s, T4s, T4t; + { + V T1h, T1i, T4I, T4J, T1k, T1l, T4D, T4E; + T1h = LD(&(ri[WS(is, 5)]), ivs, &(ri[WS(is, 1)])); + T6O = VFMA(LDK(KP707106781), T4W, T4T); + T4X = VFNMS(LDK(KP707106781), T4W, T4T); + T9s = VFNMS(LDK(KP707106781), T8e, T8d); + T8f = VFMA(LDK(KP707106781), T8e, T8d); + T6R = VFMA(LDK(KP707106781), T4p, T4e); + T4q = VFNMS(LDK(KP707106781), T4p, T4e); + T9v = VFNMS(LDK(KP707106781), T83, T82); + T84 = VFMA(LDK(KP707106781), T83, T82); + T1i = LD(&(ri[WS(is, 37)]), ivs, &(ri[WS(is, 1)])); + T4I = LD(&(ii[WS(is, 5)]), ivs, &(ii[WS(is, 1)])); + T4J = LD(&(ii[WS(is, 37)]), ivs, &(ii[WS(is, 1)])); + T1k = LD(&(ri[WS(is, 21)]), ivs, &(ri[WS(is, 1)])); + T1l = LD(&(ri[WS(is, 53)]), ivs, &(ri[WS(is, 1)])); + T4D = LD(&(ii[WS(is, 21)]), ivs, &(ii[WS(is, 1)])); + T4E = LD(&(ii[WS(is, 53)]), ivs, &(ii[WS(is, 1)])); + { + V T1o, T4C, T1j, TaL, T4K, T4H, T1m, TaM, T4F, T1p, T4x, T4y; + T1o = LD(&(ri[WS(is, 61)]), ivs, &(ri[WS(is, 1)])); + T4C = VSUB(T1h, T1i); + T1j = VADD(T1h, T1i); + TaL = VADD(T4I, T4J); + T4K = VSUB(T4I, T4J); + T4H = VSUB(T1k, T1l); + T1m = VADD(T1k, T1l); + TaM = VADD(T4D, T4E); + T4F = VSUB(T4D, T4E); + T1p = LD(&(ri[WS(is, 29)]), ivs, &(ri[WS(is, 1)])); + T4x = LD(&(ii[WS(is, 61)]), ivs, &(ii[WS(is, 1)])); + T4y = LD(&(ii[WS(is, 29)]), ivs, &(ii[WS(is, 1)])); + T1r = LD(&(ri[WS(is, 13)]), ivs, &(ri[WS(is, 1)])); + T85 = VSUB(T4K, T4H); + T4L = VADD(T4H, T4K); + TaO = VSUB(T1j, T1m); + T1n = VADD(T1j, T1m); + Tdq = VADD(TaL, TaM); + TaN = VSUB(TaL, TaM); + T86 = VADD(T4C, T4F); + T4G = VSUB(T4C, T4F); + T4r = VSUB(T1o, T1p); + T1q = VADD(T1o, T1p); + TaR = VADD(T4x, T4y); + T4z = VSUB(T4x, T4y); + T1s = LD(&(ri[WS(is, 45)]), ivs, &(ri[WS(is, 1)])); + T4s = LD(&(ii[WS(is, 13)]), ivs, &(ii[WS(is, 1)])); + T4t = LD(&(ii[WS(is, 45)]), ivs, &(ii[WS(is, 1)])); + } + } + Tb2 = VADD(TaO, TaN); + TaP = VSUB(TaN, TaO); + T4M = VFNMS(LDK(KP414213562), T4L, T4G); + T4Y = VFMA(LDK(KP414213562), T4G, T4L); + T4w = VSUB(T1r, T1s); + T1t = VADD(T1r, T1s); + TaS = VADD(T4s, T4t); + T4u = VSUB(T4s, T4t); + T8g = VFMA(LDK(KP414213562), T85, T86); + T87 = VFNMS(LDK(KP414213562), T86, T85); + } + { + V T1W, T8o, T5E, Tbf, Tbe, T8p, T5z, T1X, T5l, T5m; + { + V T5B, T5v, T1O, T5C, T1P, T1Q, T5w, T5x; + { + V T1M, T88, T4A, T1u, TaQ, Tdr, TaT, T89, T4v, T1N, TaU, Tb3; + T1M = LD(&(ri[WS(is, 3)]), ivs, &(ri[WS(is, 1)])); + T88 = VSUB(T4z, T4w); + T4A = VADD(T4w, T4z); + T1u = VADD(T1q, T1t); + TaQ = VSUB(T1q, T1t); + Tdr = VADD(TaR, TaS); + TaT = VSUB(TaR, TaS); + T89 = VADD(T4r, T4u); + T4v = VSUB(T4r, T4u); + T1N = LD(&(ri[WS(is, 35)]), ivs, &(ri[WS(is, 1)])); + T5B = LD(&(ii[WS(is, 3)]), ivs, &(ii[WS(is, 1)])); + Tdn = VSUB(T1u, T1n); + T1v = VADD(T1n, T1u); + Tek = VADD(Tdq, Tdr); + Tds = VSUB(Tdq, Tdr); + TaU = VADD(TaQ, TaT); + Tb3 = VSUB(TaQ, TaT); + { + V T8a, T8h, T4Z, T4B; + T8a = VFMA(LDK(KP414213562), T89, T88); + T8h = VFNMS(LDK(KP414213562), T88, T89); + T4Z = VFNMS(LDK(KP414213562), T4v, T4A); + T4B = VFMA(LDK(KP414213562), T4A, T4v); + T5v = VSUB(T1M, T1N); + T1O = VADD(T1M, T1N); + Tcn = VSUB(TaU, TaP); + TaV = VADD(TaP, TaU); + Tcq = VSUB(Tb2, Tb3); + Tb4 = VADD(Tb2, Tb3); + T9t = VSUB(T8a, T87); + T8b = VADD(T87, T8a); + T9w = VSUB(T8g, T8h); + T8i = VADD(T8g, T8h); + T6S = VADD(T4Y, T4Z); + T50 = VSUB(T4Y, T4Z); + T6P = VADD(T4M, T4B); + T4N = VSUB(T4B, T4M); + T5C = LD(&(ii[WS(is, 35)]), ivs, &(ii[WS(is, 1)])); + } + } + T1P = LD(&(ri[WS(is, 19)]), ivs, &(ri[WS(is, 1)])); + T1Q = LD(&(ri[WS(is, 51)]), ivs, &(ri[WS(is, 1)])); + T5w = LD(&(ii[WS(is, 19)]), ivs, &(ii[WS(is, 1)])); + T5x = LD(&(ii[WS(is, 51)]), ivs, &(ii[WS(is, 1)])); + { + V T5q, Tbc, T5D, T5A, T1R, Tbd, T5y, T5r, T1T, T1U; + T1T = LD(&(ri[WS(is, 59)]), ivs, &(ri[WS(is, 1)])); + T1U = LD(&(ri[WS(is, 27)]), ivs, &(ri[WS(is, 1)])); + T5q = LD(&(ii[WS(is, 59)]), ivs, &(ii[WS(is, 1)])); + Tbc = VADD(T5B, T5C); + T5D = VSUB(T5B, T5C); + T5A = VSUB(T1P, T1Q); + T1R = VADD(T1P, T1Q); + Tbd = VADD(T5w, T5x); + T5y = VSUB(T5w, T5x); + T5k = VSUB(T1T, T1U); + T1V = VADD(T1T, T1U); + T5r = LD(&(ii[WS(is, 27)]), ivs, &(ii[WS(is, 1)])); + T1W = LD(&(ri[WS(is, 11)]), ivs, &(ri[WS(is, 1)])); + T8o = VSUB(T5D, T5A); + T5E = VADD(T5A, T5D); + Tbf = VSUB(T1O, T1R); + T1S = VADD(T1O, T1R); + TdB = VADD(Tbc, Tbd); + Tbe = VSUB(Tbc, Tbd); + T8p = VADD(T5v, T5y); + T5z = VSUB(T5v, T5y); + Tbi = VADD(T5q, T5r); + T5s = VSUB(T5q, T5r); + T1X = LD(&(ri[WS(is, 43)]), ivs, &(ri[WS(is, 1)])); + T5l = LD(&(ii[WS(is, 11)]), ivs, &(ii[WS(is, 1)])); + T5m = LD(&(ii[WS(is, 43)]), ivs, &(ii[WS(is, 1)])); + } + } + Tbt = VADD(Tbf, Tbe); + Tbg = VSUB(Tbe, Tbf); + T5F = VFNMS(LDK(KP414213562), T5E, T5z); + T5R = VFMA(LDK(KP414213562), T5z, T5E); + T5p = VSUB(T1W, T1X); + T1Y = VADD(T1W, T1X); + Tbj = VADD(T5l, T5m); + T5n = VSUB(T5l, T5m); + T8z = VFMA(LDK(KP414213562), T8o, T8p); + T8q = VFNMS(LDK(KP414213562), T8p, T8o); + } + } + } + } + { + V Tbm, Tbv, T9A, T8u, T9D, T8B, T6Z, T5T, T6W, T5G, TeL, TeM, TeN, TeO, TeR; + V TeS, TeT, TeU, TeV, TeW, TeX, TeY, TeZ, Tf0, Tf1, Tf2, Tf3, Tf4, Tf5, Tf6; + V Tf7, Tf8, Tf9, Tfa, Tfb, Tfc, TbE, Tao, Tfd, Tfe, Td7, Td8, Tff, Tfg, Tfh; + V Tfi, Tfj, Tfk, Tfl, Tfm, Tfn, Tfo, Tfp, Tfq, Tfr, Tfs; + { + V Tel, Tdy, TdD, Tcu, Tcx, Teq, Tei, Ten, Tex, Teh, TeB, Tev, Te9, Tec; + { + V Tef, Teu, TeE, TeD, T11, TeF, T1w, T21, Tet, T2y, T33, Teg, T20; + { + V Tv, T8r, T5t, T1Z, Tbh, TdC, Tbk, T8s, T5o, T10, Tep, Tbl, Tbu; + Tef = VSUB(Tf, Tu); + Tv = VADD(Tf, Tu); + T8r = VSUB(T5s, T5p); + T5t = VADD(T5p, T5s); + T1Z = VADD(T1V, T1Y); + Tbh = VSUB(T1V, T1Y); + TdC = VADD(Tbi, Tbj); + Tbk = VSUB(Tbi, Tbj); + T8s = VADD(T5k, T5n); + T5o = VSUB(T5k, T5n); + T10 = VADD(TK, TZ); + Teu = VSUB(TZ, TK); + Tel = VSUB(Tej, Tek); + TeE = VADD(Tej, Tek); + Tdy = VSUB(T1Z, T1S); + T20 = VADD(T1S, T1Z); + Tep = VADD(TdB, TdC); + TdD = VSUB(TdB, TdC); + Tbl = VADD(Tbh, Tbk); + Tbu = VSUB(Tbh, Tbk); + { + V T8t, T8A, T5S, T5u; + T8t = VFMA(LDK(KP414213562), T8s, T8r); + T8A = VFNMS(LDK(KP414213562), T8r, T8s); + T5S = VFNMS(LDK(KP414213562), T5o, T5t); + T5u = VFMA(LDK(KP414213562), T5t, T5o); + TeD = VSUB(Tv, T10); + T11 = VADD(Tv, T10); + Tcu = VSUB(Tbl, Tbg); + Tbm = VADD(Tbg, Tbl); + Tcx = VSUB(Tbt, Tbu); + Tbv = VADD(Tbt, Tbu); + T9A = VSUB(T8t, T8q); + T8u = VADD(T8q, T8t); + T9D = VSUB(T8z, T8A); + T8B = VADD(T8z, T8A); + T6Z = VADD(T5R, T5S); + T5T = VSUB(T5R, T5S); + T6W = VADD(T5F, T5u); + T5G = VSUB(T5u, T5F); + TeF = VADD(Teo, Tep); + Teq = VSUB(Teo, Tep); + } + } + Tei = VSUB(T1g, T1v); + T1w = VADD(T1g, T1v); + T21 = VADD(T1L, T20); + Ten = VSUB(T1L, T20); + Tet = VSUB(T2i, T2x); + T2y = VADD(T2i, T2x); + T33 = VADD(T2N, T32); + Teg = VSUB(T2N, T32); + { + V TeI, TeG, T23, T22, TeH, T34; + TeI = VADD(TeE, TeF); + TeG = VSUB(TeE, TeF); + T23 = VSUB(T21, T1w); + T22 = VADD(T1w, T21); + TeH = VADD(T2y, T33); + T34 = VSUB(T2y, T33); + Tex = VSUB(Tef, Teg); + Teh = VADD(Tef, Teg); + TeJ = VSUB(TeD, TeG); + STM4(&(ro[48]), TeJ, ovs, &(ro[0])); + TeK = VADD(TeD, TeG); + STM4(&(ro[16]), TeK, ovs, &(ro[0])); + TeL = VADD(T11, T22); + STM4(&(ro[0]), TeL, ovs, &(ro[0])); + TeM = VSUB(T11, T22); + STM4(&(ro[32]), TeM, ovs, &(ro[0])); + TeN = VADD(TeH, TeI); + STM4(&(io[0]), TeN, ovs, &(io[0])); + TeO = VSUB(TeH, TeI); + STM4(&(io[32]), TeO, ovs, &(io[0])); + TeP = VSUB(T34, T23); + STM4(&(io[48]), TeP, ovs, &(io[0])); + TeQ = VADD(T23, T34); + STM4(&(io[16]), TeQ, ovs, &(io[0])); + TeB = VADD(Teu, Tet); + Tev = VSUB(Tet, Teu); + } + } + { + V TdV, Tdb, TdJ, Te5, TdE, Tdz, TdZ, Tdo, Te6, Tdi, Teb, Te3, TdW, TdM, Tdt; + V TdY; + { + V TdL, Tde, Tey, Tem, Tez, Ter, Tdh, TdK, Te1, Te2; + TdV = VADD(Td9, Tda); + Tdb = VSUB(Td9, Tda); + TdJ = VSUB(TdH, TdI); + Te5 = VADD(TdI, TdH); + TdL = VADD(Tdd, Tdc); + Tde = VSUB(Tdc, Tdd); + Tey = VSUB(Tel, Tei); + Tem = VADD(Tei, Tel); + Tez = VADD(Ten, Teq); + Ter = VSUB(Ten, Teq); + Tdh = VADD(Tdf, Tdg); + TdK = VSUB(Tdf, Tdg); + TdE = VSUB(TdA, TdD); + Te1 = VADD(TdA, TdD); + Te2 = VADD(Tdy, Tdx); + Tdz = VSUB(Tdx, Tdy); + TdZ = VADD(Tdn, Tdm); + Tdo = VSUB(Tdm, Tdn); + { + V TeA, TeC, Tew, Tes; + TeA = VSUB(Tey, Tez); + TeC = VADD(Tey, Tez); + Tew = VSUB(Ter, Tem); + Tes = VADD(Tem, Ter); + Te6 = VADD(Tde, Tdh); + Tdi = VSUB(Tde, Tdh); + Teb = VFMA(LDK(KP414213562), Te1, Te2); + Te3 = VFNMS(LDK(KP414213562), Te2, Te1); + TdW = VADD(TdL, TdK); + TdM = VSUB(TdK, TdL); + TeR = VFMA(LDK(KP707106781), TeA, Tex); + STM4(&(ro[24]), TeR, ovs, &(ro[0])); + TeS = VFNMS(LDK(KP707106781), TeA, Tex); + STM4(&(ro[56]), TeS, ovs, &(ro[0])); + TeT = VFMA(LDK(KP707106781), TeC, TeB); + STM4(&(io[8]), TeT, ovs, &(io[0])); + TeU = VFNMS(LDK(KP707106781), TeC, TeB); + STM4(&(io[40]), TeU, ovs, &(io[0])); + TeV = VFMA(LDK(KP707106781), Tew, Tev); + STM4(&(io[24]), TeV, ovs, &(io[0])); + TeW = VFNMS(LDK(KP707106781), Tew, Tev); + STM4(&(io[56]), TeW, ovs, &(io[0])); + TeX = VFMA(LDK(KP707106781), Tes, Teh); + STM4(&(ro[8]), TeX, ovs, &(ro[0])); + TeY = VFNMS(LDK(KP707106781), Tes, Teh); + STM4(&(ro[40]), TeY, ovs, &(ro[0])); + Tdt = VSUB(Tdp, Tds); + TdY = VADD(Tdp, Tds); + } + } + { + V TdT, Tdj, TdP, TdN, TdR, Tdu, Tea, Te0, TdQ, TdF, TdX, Ted, Te7; + TdT = VFNMS(LDK(KP707106781), Tdi, Tdb); + Tdj = VFMA(LDK(KP707106781), Tdi, Tdb); + TdP = VFMA(LDK(KP707106781), TdM, TdJ); + TdN = VFNMS(LDK(KP707106781), TdM, TdJ); + TdR = VFNMS(LDK(KP414213562), Tdo, Tdt); + Tdu = VFMA(LDK(KP414213562), Tdt, Tdo); + Tea = VFNMS(LDK(KP414213562), TdY, TdZ); + Te0 = VFMA(LDK(KP414213562), TdZ, TdY); + TdQ = VFMA(LDK(KP414213562), Tdz, TdE); + TdF = VFNMS(LDK(KP414213562), TdE, Tdz); + Te9 = VFNMS(LDK(KP707106781), TdW, TdV); + TdX = VFMA(LDK(KP707106781), TdW, TdV); + Ted = VFMA(LDK(KP707106781), Te6, Te5); + Te7 = VFNMS(LDK(KP707106781), Te6, Te5); + { + V Tee, Te8, Te4, TdU, TdS, TdO, TdG; + Tee = VADD(Tea, Teb); + Tec = VSUB(Tea, Teb); + Te8 = VSUB(Te3, Te0); + Te4 = VADD(Te0, Te3); + TdU = VADD(TdR, TdQ); + TdS = VSUB(TdQ, TdR); + TdO = VADD(Tdu, TdF); + TdG = VSUB(Tdu, TdF); + TeZ = VFMA(LDK(KP923879532), Tee, Ted); + STM4(&(io[4]), TeZ, ovs, &(io[0])); + Tf0 = VFNMS(LDK(KP923879532), Tee, Ted); + STM4(&(io[36]), Tf0, ovs, &(io[0])); + Tf1 = VFMA(LDK(KP923879532), Te4, TdX); + STM4(&(ro[4]), Tf1, ovs, &(ro[0])); + Tf2 = VFNMS(LDK(KP923879532), Te4, TdX); + STM4(&(ro[36]), Tf2, ovs, &(ro[0])); + Tf3 = VFMA(LDK(KP923879532), TdU, TdT); + STM4(&(ro[60]), Tf3, ovs, &(ro[0])); + Tf4 = VFNMS(LDK(KP923879532), TdU, TdT); + STM4(&(ro[28]), Tf4, ovs, &(ro[0])); + Tf5 = VFMA(LDK(KP923879532), TdS, TdP); + STM4(&(io[12]), Tf5, ovs, &(io[0])); + Tf6 = VFNMS(LDK(KP923879532), TdS, TdP); + STM4(&(io[44]), Tf6, ovs, &(io[0])); + Tf7 = VFMA(LDK(KP923879532), TdO, TdN); + STM4(&(io[60]), Tf7, ovs, &(io[0])); + Tf8 = VFNMS(LDK(KP923879532), TdO, TdN); + STM4(&(io[28]), Tf8, ovs, &(io[0])); + Tf9 = VFMA(LDK(KP923879532), TdG, Tdj); + STM4(&(ro[12]), Tf9, ovs, &(ro[0])); + Tfa = VFNMS(LDK(KP923879532), TdG, Tdj); + STM4(&(ro[44]), Tfa, ovs, &(ro[0])); + Tfb = VFMA(LDK(KP923879532), Te8, Te7); + STM4(&(io[20]), Tfb, ovs, &(io[0])); + Tfc = VFNMS(LDK(KP923879532), Te8, Te7); + STM4(&(io[52]), Tfc, ovs, &(io[0])); + } + } + } + { + V TcF, TcE, Tcy, Tcv, TcT, Tco, TcP, Tcd, TcZ, TcD, Td0, Tck, Td4, TcX, Tcr; + V TcS; + { + V Tcc, TcC, Tcg, Tcj, TcV, TcW; + TbE = VADD(TbC, TbD); + Tcc = VSUB(TbC, TbD); + TcC = VSUB(Tan, Tak); + Tao = VADD(Tak, Tan); + TcF = VFNMS(LDK(KP414213562), Tce, Tcf); + Tcg = VFMA(LDK(KP414213562), Tcf, Tce); + Tcj = VFNMS(LDK(KP414213562), Tci, Tch); + TcE = VFMA(LDK(KP414213562), Tch, Tci); + Tcy = VFNMS(LDK(KP707106781), Tcx, Tcw); + TcV = VFMA(LDK(KP707106781), Tcx, Tcw); + TcW = VFMA(LDK(KP707106781), Tcu, Tct); + Tcv = VFNMS(LDK(KP707106781), Tcu, Tct); + TcT = VFMA(LDK(KP707106781), Tcn, Tcm); + Tco = VFNMS(LDK(KP707106781), Tcn, Tcm); + Tfd = VFMA(LDK(KP923879532), Tec, Te9); + STM4(&(ro[20]), Tfd, ovs, &(ro[0])); + Tfe = VFNMS(LDK(KP923879532), Tec, Te9); + STM4(&(ro[52]), Tfe, ovs, &(ro[0])); + TcP = VFNMS(LDK(KP707106781), Tcc, Tcb); + Tcd = VFMA(LDK(KP707106781), Tcc, Tcb); + TcZ = VFNMS(LDK(KP707106781), TcC, TcB); + TcD = VFMA(LDK(KP707106781), TcC, TcB); + Td0 = VADD(Tcg, Tcj); + Tck = VSUB(Tcg, Tcj); + Td4 = VFMA(LDK(KP198912367), TcV, TcW); + TcX = VFNMS(LDK(KP198912367), TcW, TcV); + Tcr = VFNMS(LDK(KP707106781), Tcq, Tcp); + TcS = VFMA(LDK(KP707106781), Tcq, Tcp); + } + { + V TcJ, Tcl, TcK, Tcs, TcQ, TcG, Td5, TcU, TcL, Tcz; + TcJ = VFNMS(LDK(KP923879532), Tck, Tcd); + Tcl = VFMA(LDK(KP923879532), Tck, Tcd); + TcK = VFNMS(LDK(KP668178637), Tco, Tcr); + Tcs = VFMA(LDK(KP668178637), Tcr, Tco); + TcQ = VADD(TcF, TcE); + TcG = VSUB(TcE, TcF); + Td5 = VFNMS(LDK(KP198912367), TcS, TcT); + TcU = VFMA(LDK(KP198912367), TcT, TcS); + TcL = VFMA(LDK(KP668178637), Tcv, Tcy); + Tcz = VFNMS(LDK(KP668178637), Tcy, Tcv); + { + V Td1, Td3, TcR, TcN, TcH, Td2, TcY, TcM, TcO, TcI, TcA, Td6; + Td1 = VFMA(LDK(KP923879532), Td0, TcZ); + Td3 = VFNMS(LDK(KP923879532), Td0, TcZ); + TcR = VFNMS(LDK(KP923879532), TcQ, TcP); + Td7 = VFMA(LDK(KP923879532), TcQ, TcP); + TcN = VFMA(LDK(KP923879532), TcG, TcD); + TcH = VFNMS(LDK(KP923879532), TcG, TcD); + Td2 = VADD(TcU, TcX); + TcY = VSUB(TcU, TcX); + TcM = VSUB(TcK, TcL); + TcO = VADD(TcK, TcL); + TcI = VSUB(Tcz, Tcs); + TcA = VADD(Tcs, Tcz); + Td6 = VSUB(Td4, Td5); + Td8 = VADD(Td5, Td4); + Tff = VFMA(LDK(KP980785280), TcY, TcR); + STM4(&(ro[14]), Tff, ovs, &(ro[0])); + Tfg = VFNMS(LDK(KP980785280), TcY, TcR); + STM4(&(ro[46]), Tfg, ovs, &(ro[0])); + Tfh = VFMA(LDK(KP831469612), TcM, TcJ); + STM4(&(ro[22]), Tfh, ovs, &(ro[0])); + Tfi = VFNMS(LDK(KP831469612), TcM, TcJ); + STM4(&(ro[54]), Tfi, ovs, &(ro[0])); + Tfj = VFMA(LDK(KP831469612), TcO, TcN); + STM4(&(io[6]), Tfj, ovs, &(io[0])); + Tfk = VFNMS(LDK(KP831469612), TcO, TcN); + STM4(&(io[38]), Tfk, ovs, &(io[0])); + Tfl = VFMA(LDK(KP831469612), TcI, TcH); + STM4(&(io[22]), Tfl, ovs, &(io[0])); + Tfm = VFNMS(LDK(KP831469612), TcI, TcH); + STM4(&(io[54]), Tfm, ovs, &(io[0])); + Tfn = VFMA(LDK(KP831469612), TcA, Tcl); + STM4(&(ro[6]), Tfn, ovs, &(ro[0])); + Tfo = VFNMS(LDK(KP831469612), TcA, Tcl); + STM4(&(ro[38]), Tfo, ovs, &(ro[0])); + Tfp = VFMA(LDK(KP980785280), Td6, Td3); + STM4(&(io[14]), Tfp, ovs, &(io[0])); + Tfq = VFNMS(LDK(KP980785280), Td6, Td3); + STM4(&(io[46]), Tfq, ovs, &(io[0])); + Tfr = VFNMS(LDK(KP980785280), Td2, Td1); + STM4(&(io[30]), Tfr, ovs, &(io[0])); + Tfs = VFMA(LDK(KP980785280), Td2, Td1); + STM4(&(io[62]), Tfs, ovs, &(io[0])); + } + } + } + } + { + V Tft, Tfu, Tfv, Tfw, Tfx, Tfy, Tfz, TfA, TfB, TfC, TfD, TfE, TfF, TfG, T3f; + V T66, T63, T3u, TfL, TfM, TfN, TfO, TfP, TfQ, TfR, TfS, TfT, TfU, TfV, TfW; + V TfX, TfY, TfZ, Tg0, Tc5, Tc8; + { + V TbH, TbG, Tbw, Tbn, TbV, TaW, TbR, Tap, Tc1, TbF, Tc2, TaE, Tc7, TbZ, Tb5; + V TbU; + { + V Taw, TaD, TbX, TbY; + TbH = VFMA(LDK(KP414213562), Tas, Tav); + Taw = VFNMS(LDK(KP414213562), Tav, Tas); + TaD = VFMA(LDK(KP414213562), TaC, Taz); + TbG = VFNMS(LDK(KP414213562), Taz, TaC); + Tbw = VFNMS(LDK(KP707106781), Tbv, Tbs); + TbX = VFMA(LDK(KP707106781), Tbv, Tbs); + TbY = VFMA(LDK(KP707106781), Tbm, Tbb); + Tbn = VFNMS(LDK(KP707106781), Tbm, Tbb); + TbV = VFMA(LDK(KP707106781), TaV, TaK); + TaW = VFNMS(LDK(KP707106781), TaV, TaK); + Tft = VFMA(LDK(KP980785280), Td8, Td7); + STM4(&(ro[62]), Tft, ovs, &(ro[0])); + Tfu = VFNMS(LDK(KP980785280), Td8, Td7); + STM4(&(ro[30]), Tfu, ovs, &(ro[0])); + TbR = VFMA(LDK(KP707106781), Tao, Tah); + Tap = VFNMS(LDK(KP707106781), Tao, Tah); + Tc1 = VFMA(LDK(KP707106781), TbE, TbB); + TbF = VFNMS(LDK(KP707106781), TbE, TbB); + Tc2 = VADD(Taw, TaD); + TaE = VSUB(Taw, TaD); + Tc7 = VFMA(LDK(KP198912367), TbX, TbY); + TbZ = VFNMS(LDK(KP198912367), TbY, TbX); + Tb5 = VFNMS(LDK(KP707106781), Tb4, Tb1); + TbU = VFMA(LDK(KP707106781), Tb4, Tb1); + } + { + V TbP, TaF, TbN, Tb6, TbS, TbI, Tc6, TbW, TbM, Tbx; + TbP = VFNMS(LDK(KP923879532), TaE, Tap); + TaF = VFMA(LDK(KP923879532), TaE, Tap); + TbN = VFNMS(LDK(KP668178637), TaW, Tb5); + Tb6 = VFMA(LDK(KP668178637), Tb5, TaW); + TbS = VADD(TbH, TbG); + TbI = VSUB(TbG, TbH); + Tc6 = VFNMS(LDK(KP198912367), TbU, TbV); + TbW = VFMA(LDK(KP198912367), TbV, TbU); + TbM = VFMA(LDK(KP668178637), Tbn, Tbw); + Tbx = VFNMS(LDK(KP668178637), Tbw, Tbn); + { + V Tc3, Tc9, TbT, TbL, TbJ, Tc4, Tc0, TbQ, TbO, TbK, Tby, Tca; + Tc3 = VFNMS(LDK(KP923879532), Tc2, Tc1); + Tc9 = VFMA(LDK(KP923879532), Tc2, Tc1); + TbT = VFMA(LDK(KP923879532), TbS, TbR); + Tc5 = VFNMS(LDK(KP923879532), TbS, TbR); + TbL = VFMA(LDK(KP923879532), TbI, TbF); + TbJ = VFNMS(LDK(KP923879532), TbI, TbF); + Tc4 = VSUB(TbZ, TbW); + Tc0 = VADD(TbW, TbZ); + TbQ = VADD(TbN, TbM); + TbO = VSUB(TbM, TbN); + TbK = VADD(Tb6, Tbx); + Tby = VSUB(Tb6, Tbx); + Tca = VADD(Tc6, Tc7); + Tc8 = VSUB(Tc6, Tc7); + Tfv = VFMA(LDK(KP980785280), Tc0, TbT); + STM4(&(ro[2]), Tfv, ovs, &(ro[0])); + Tfw = VFNMS(LDK(KP980785280), Tc0, TbT); + STM4(&(ro[34]), Tfw, ovs, &(ro[0])); + Tfx = VFMA(LDK(KP831469612), TbQ, TbP); + STM4(&(ro[58]), Tfx, ovs, &(ro[0])); + Tfy = VFNMS(LDK(KP831469612), TbQ, TbP); + STM4(&(ro[26]), Tfy, ovs, &(ro[0])); + Tfz = VFMA(LDK(KP831469612), TbO, TbL); + STM4(&(io[10]), Tfz, ovs, &(io[0])); + TfA = VFNMS(LDK(KP831469612), TbO, TbL); + STM4(&(io[42]), TfA, ovs, &(io[0])); + TfB = VFMA(LDK(KP831469612), TbK, TbJ); + STM4(&(io[58]), TfB, ovs, &(io[0])); + TfC = VFNMS(LDK(KP831469612), TbK, TbJ); + STM4(&(io[26]), TfC, ovs, &(io[0])); + TfD = VFMA(LDK(KP831469612), Tby, TaF); + STM4(&(ro[10]), TfD, ovs, &(ro[0])); + TfE = VFNMS(LDK(KP831469612), Tby, TaF); + STM4(&(ro[42]), TfE, ovs, &(ro[0])); + TfF = VFMA(LDK(KP980785280), Tca, Tc9); + STM4(&(io[2]), TfF, ovs, &(io[0])); + TfG = VFNMS(LDK(KP980785280), Tca, Tc9); + STM4(&(io[34]), TfG, ovs, &(io[0])); + TfH = VFNMS(LDK(KP980785280), Tc4, Tc3); + STM4(&(io[50]), TfH, ovs, &(io[0])); + TfI = VFMA(LDK(KP980785280), Tc4, Tc3); + STM4(&(io[18]), TfI, ovs, &(io[0])); + } + } + } + { + V T70, T6X, T7h, T6F, T7x, T7m, T7w, T7p, T7s, T6M, T7c, T6U, T7r, T75, T7i; + V T78, T7b, T6N; + { + V T6T, T6Q, T77, T6I, T6L, T76, T73, T74; + { + V T6D, T6E, T7k, T7l, T7n, T7o; + T3f = VFMA(LDK(KP707106781), T3e, T37); + T6D = VFNMS(LDK(KP707106781), T3e, T37); + T6E = VADD(T65, T64); + T66 = VSUB(T64, T65); + T6T = VFNMS(LDK(KP923879532), T6S, T6R); + T7k = VFMA(LDK(KP923879532), T6S, T6R); + T7l = VFMA(LDK(KP923879532), T6P, T6O); + T6Q = VFNMS(LDK(KP923879532), T6P, T6O); + T70 = VFNMS(LDK(KP923879532), T6Z, T6Y); + T7n = VFMA(LDK(KP923879532), T6Z, T6Y); + T7o = VFMA(LDK(KP923879532), T6W, T6V); + T6X = VFNMS(LDK(KP923879532), T6W, T6V); + T77 = VFNMS(LDK(KP198912367), T6G, T6H); + T6I = VFMA(LDK(KP198912367), T6H, T6G); + TfJ = VFMA(LDK(KP980785280), Tc8, Tc5); + STM4(&(ro[18]), TfJ, ovs, &(ro[0])); + TfK = VFNMS(LDK(KP980785280), Tc8, Tc5); + STM4(&(ro[50]), TfK, ovs, &(ro[0])); + T7h = VFMA(LDK(KP923879532), T6E, T6D); + T6F = VFNMS(LDK(KP923879532), T6E, T6D); + T7x = VFNMS(LDK(KP098491403), T7k, T7l); + T7m = VFMA(LDK(KP098491403), T7l, T7k); + T7w = VFMA(LDK(KP098491403), T7n, T7o); + T7p = VFNMS(LDK(KP098491403), T7o, T7n); + T6L = VFNMS(LDK(KP198912367), T6K, T6J); + T76 = VFMA(LDK(KP198912367), T6J, T6K); + } + T63 = VFMA(LDK(KP707106781), T62, T5Z); + T73 = VFNMS(LDK(KP707106781), T62, T5Z); + T74 = VADD(T3m, T3t); + T3u = VSUB(T3m, T3t); + T7s = VADD(T6I, T6L); + T6M = VSUB(T6I, T6L); + T7c = VFNMS(LDK(KP820678790), T6Q, T6T); + T6U = VFMA(LDK(KP820678790), T6T, T6Q); + T7r = VFMA(LDK(KP923879532), T74, T73); + T75 = VFNMS(LDK(KP923879532), T74, T73); + T7i = VADD(T77, T76); + T78 = VSUB(T76, T77); + } + T7b = VFNMS(LDK(KP980785280), T6M, T6F); + T6N = VFMA(LDK(KP980785280), T6M, T6F); + { + V T7u, T7q, T7v, T7t, T7A, T7y, T7j, T7z, T7f, T79, T71, T7d; + T7u = VADD(T7m, T7p); + T7q = VSUB(T7m, T7p); + T7v = VFNMS(LDK(KP980785280), T7s, T7r); + T7t = VFMA(LDK(KP980785280), T7s, T7r); + T7A = VADD(T7x, T7w); + T7y = VSUB(T7w, T7x); + T7j = VFNMS(LDK(KP980785280), T7i, T7h); + T7z = VFMA(LDK(KP980785280), T7i, T7h); + T7f = VFMA(LDK(KP980785280), T78, T75); + T79 = VFNMS(LDK(KP980785280), T78, T75); + T71 = VFNMS(LDK(KP820678790), T70, T6X); + T7d = VFMA(LDK(KP820678790), T6X, T70); + { + V T7g, T7e, T72, T7a; + TfL = VFMA(LDK(KP995184726), T7y, T7v); + STM4(&(io[15]), TfL, ovs, &(io[1])); + TfM = VFNMS(LDK(KP995184726), T7y, T7v); + STM4(&(io[47]), TfM, ovs, &(io[1])); + TfN = VFMA(LDK(KP995184726), T7q, T7j); + STM4(&(ro[15]), TfN, ovs, &(ro[1])); + TfO = VFNMS(LDK(KP995184726), T7q, T7j); + STM4(&(ro[47]), TfO, ovs, &(ro[1])); + T7g = VADD(T7c, T7d); + T7e = VSUB(T7c, T7d); + T72 = VADD(T6U, T71); + T7a = VSUB(T71, T6U); + TfP = VFNMS(LDK(KP995184726), T7u, T7t); + STM4(&(io[31]), TfP, ovs, &(io[1])); + TfQ = VFMA(LDK(KP995184726), T7u, T7t); + STM4(&(io[63]), TfQ, ovs, &(io[1])); + TfR = VFMA(LDK(KP773010453), T7e, T7b); + STM4(&(ro[23]), TfR, ovs, &(ro[1])); + TfS = VFNMS(LDK(KP773010453), T7e, T7b); + STM4(&(ro[55]), TfS, ovs, &(ro[1])); + TfT = VFMA(LDK(KP773010453), T7g, T7f); + STM4(&(io[7]), TfT, ovs, &(io[1])); + TfU = VFNMS(LDK(KP773010453), T7g, T7f); + STM4(&(io[39]), TfU, ovs, &(io[1])); + TfV = VFMA(LDK(KP773010453), T7a, T79); + STM4(&(io[23]), TfV, ovs, &(io[1])); + TfW = VFNMS(LDK(KP773010453), T7a, T79); + STM4(&(io[55]), TfW, ovs, &(io[1])); + TfX = VFMA(LDK(KP773010453), T72, T6N); + STM4(&(ro[7]), TfX, ovs, &(ro[1])); + TfY = VFNMS(LDK(KP773010453), T72, T6N); + STM4(&(ro[39]), TfY, ovs, &(ro[1])); + TfZ = VFNMS(LDK(KP995184726), T7A, T7z); + STM4(&(ro[31]), TfZ, ovs, &(ro[1])); + Tg0 = VFMA(LDK(KP995184726), T7A, T7z); + STM4(&(ro[63]), Tg0, ovs, &(ro[1])); + } + } + } + { + V T7D, T8K, T8H, T7K, Ta8, Ta7, Tae, Tad; + { + V T9x, T9u, T9E, T9B, T9L, T9K, T9V, T9j, Tab, Ta0, Taa, Ta3, Ta6, T9q, T9H; + V T9I; + { + V T9h, T9i, T9Y, T9Z, Ta1, Ta2, T9m, T9p; + T7D = VFMA(LDK(KP707106781), T7C, T7B); + T9h = VFNMS(LDK(KP707106781), T7C, T7B); + T9i = VSUB(T8I, T8J); + T8K = VADD(T8I, T8J); + T9x = VFNMS(LDK(KP923879532), T9w, T9v); + T9Y = VFMA(LDK(KP923879532), T9w, T9v); + T9Z = VFMA(LDK(KP923879532), T9t, T9s); + T9u = VFNMS(LDK(KP923879532), T9t, T9s); + T9E = VFNMS(LDK(KP923879532), T9D, T9C); + Ta1 = VFMA(LDK(KP923879532), T9D, T9C); + Ta2 = VFMA(LDK(KP923879532), T9A, T9z); + T9B = VFNMS(LDK(KP923879532), T9A, T9z); + T9L = VFNMS(LDK(KP668178637), T9k, T9l); + T9m = VFMA(LDK(KP668178637), T9l, T9k); + T9p = VFNMS(LDK(KP668178637), T9o, T9n); + T9K = VFMA(LDK(KP668178637), T9n, T9o); + T9V = VFNMS(LDK(KP923879532), T9i, T9h); + T9j = VFMA(LDK(KP923879532), T9i, T9h); + Tab = VFNMS(LDK(KP303346683), T9Y, T9Z); + Ta0 = VFMA(LDK(KP303346683), T9Z, T9Y); + Taa = VFMA(LDK(KP303346683), Ta1, Ta2); + Ta3 = VFNMS(LDK(KP303346683), Ta2, Ta1); + Ta6 = VADD(T9m, T9p); + T9q = VSUB(T9m, T9p); + T8H = VFMA(LDK(KP707106781), T8G, T8F); + T9H = VFNMS(LDK(KP707106781), T8G, T8F); + T9I = VSUB(T7J, T7G); + T7K = VADD(T7G, T7J); + } + { + V T9P, T9r, T9Q, T9y, Ta5, T9J, T9W, T9M, T9R, T9F; + T9P = VFNMS(LDK(KP831469612), T9q, T9j); + T9r = VFMA(LDK(KP831469612), T9q, T9j); + T9Q = VFNMS(LDK(KP534511135), T9u, T9x); + T9y = VFMA(LDK(KP534511135), T9x, T9u); + Ta5 = VFNMS(LDK(KP923879532), T9I, T9H); + T9J = VFMA(LDK(KP923879532), T9I, T9H); + T9W = VADD(T9L, T9K); + T9M = VSUB(T9K, T9L); + T9R = VFMA(LDK(KP534511135), T9B, T9E); + T9F = VFNMS(LDK(KP534511135), T9E, T9B); + { + V T9T, T9N, T9U, T9S, T9G, T9O; + { + V Ta4, Ta9, Tac, T9X; + Ta8 = VADD(Ta0, Ta3); + Ta4 = VSUB(Ta0, Ta3); + Ta9 = VFNMS(LDK(KP831469612), Ta6, Ta5); + Ta7 = VFMA(LDK(KP831469612), Ta6, Ta5); + Tae = VADD(Tab, Taa); + Tac = VSUB(Taa, Tab); + T9X = VFNMS(LDK(KP831469612), T9W, T9V); + Tad = VFMA(LDK(KP831469612), T9W, T9V); + T9T = VFMA(LDK(KP831469612), T9M, T9J); + T9N = VFNMS(LDK(KP831469612), T9M, T9J); + T9U = VADD(T9Q, T9R); + T9S = VSUB(T9Q, T9R); + T9G = VADD(T9y, T9F); + T9O = VSUB(T9F, T9y); + { + V Tg1, Tg2, Tg3, Tg4; + Tg1 = VFNMS(LDK(KP956940335), Tac, Ta9); + STM4(&(io[45]), Tg1, ovs, &(io[1])); + STN4(&(io[44]), Tf6, Tg1, Tfq, TfM, ovs); + Tg2 = VFMA(LDK(KP956940335), Ta4, T9X); + STM4(&(ro[13]), Tg2, ovs, &(ro[1])); + STN4(&(ro[12]), Tf9, Tg2, Tff, TfN, ovs); + Tg3 = VFNMS(LDK(KP956940335), Ta4, T9X); + STM4(&(ro[45]), Tg3, ovs, &(ro[1])); + STN4(&(ro[44]), Tfa, Tg3, Tfg, TfO, ovs); + Tg4 = VFMA(LDK(KP956940335), Tac, Ta9); + STM4(&(io[13]), Tg4, ovs, &(io[1])); + STN4(&(io[12]), Tf5, Tg4, Tfp, TfL, ovs); + } + } + { + V Tg5, Tg6, Tg7, Tg8; + Tg5 = VFMA(LDK(KP881921264), T9S, T9P); + STM4(&(ro[21]), Tg5, ovs, &(ro[1])); + STN4(&(ro[20]), Tfd, Tg5, Tfh, TfR, ovs); + Tg6 = VFNMS(LDK(KP881921264), T9S, T9P); + STM4(&(ro[53]), Tg6, ovs, &(ro[1])); + STN4(&(ro[52]), Tfe, Tg6, Tfi, TfS, ovs); + Tg7 = VFMA(LDK(KP881921264), T9U, T9T); + STM4(&(io[5]), Tg7, ovs, &(io[1])); + STN4(&(io[4]), TeZ, Tg7, Tfj, TfT, ovs); + Tg8 = VFNMS(LDK(KP881921264), T9U, T9T); + STM4(&(io[37]), Tg8, ovs, &(io[1])); + STN4(&(io[36]), Tf0, Tg8, Tfk, TfU, ovs); + { + V Tg9, Tga, Tgb, Tgc; + Tg9 = VFMA(LDK(KP881921264), T9O, T9N); + STM4(&(io[21]), Tg9, ovs, &(io[1])); + STN4(&(io[20]), Tfb, Tg9, Tfl, TfV, ovs); + Tga = VFNMS(LDK(KP881921264), T9O, T9N); + STM4(&(io[53]), Tga, ovs, &(io[1])); + STN4(&(io[52]), Tfc, Tga, Tfm, TfW, ovs); + Tgb = VFMA(LDK(KP881921264), T9G, T9r); + STM4(&(ro[5]), Tgb, ovs, &(ro[1])); + STN4(&(ro[4]), Tf1, Tgb, Tfn, TfX, ovs); + Tgc = VFNMS(LDK(KP881921264), T9G, T9r); + STM4(&(ro[37]), Tgc, ovs, &(ro[1])); + STN4(&(ro[36]), Tf2, Tgc, Tfo, TfY, ovs); + } + } + } + } + } + { + V Tgh, Tgi, Tgl, Tgm, Tgn, Tgo, Tgp, Tgq, Tgr, Tgs, Tgt, Tgu; + { + V T5U, T6j, T3v, T6y, T6o, T5H, T69, T68, T6z, T6r, T6u, T48, T6f, T52, T6t; + V T67, T6h, T49; + { + V T51, T4O, T6p, T6q, T3O, T47, T6m, T6n; + T51 = VFNMS(LDK(KP923879532), T50, T4X); + T6m = VFMA(LDK(KP923879532), T50, T4X); + T6n = VFMA(LDK(KP923879532), T4N, T4q); + T4O = VFNMS(LDK(KP923879532), T4N, T4q); + T5U = VFNMS(LDK(KP923879532), T5T, T5Q); + T6p = VFMA(LDK(KP923879532), T5T, T5Q); + { + V Tgd, Tge, Tgf, Tgg; + Tgd = VFMA(LDK(KP956940335), Ta8, Ta7); + STM4(&(io[61]), Tgd, ovs, &(io[1])); + STN4(&(io[60]), Tf7, Tgd, Tfs, TfQ, ovs); + Tge = VFNMS(LDK(KP956940335), Ta8, Ta7); + STM4(&(io[29]), Tge, ovs, &(io[1])); + STN4(&(io[28]), Tf8, Tge, Tfr, TfP, ovs); + Tgf = VFMA(LDK(KP956940335), Tae, Tad); + STM4(&(ro[61]), Tgf, ovs, &(ro[1])); + STN4(&(ro[60]), Tf3, Tgf, Tft, Tg0, ovs); + Tgg = VFNMS(LDK(KP956940335), Tae, Tad); + STM4(&(ro[29]), Tgg, ovs, &(ro[1])); + STN4(&(ro[28]), Tf4, Tgg, Tfu, TfZ, ovs); + T6j = VFMA(LDK(KP923879532), T3u, T3f); + T3v = VFNMS(LDK(KP923879532), T3u, T3f); + T6y = VFNMS(LDK(KP303346683), T6m, T6n); + T6o = VFMA(LDK(KP303346683), T6n, T6m); + T6q = VFMA(LDK(KP923879532), T5G, T5j); + T5H = VFNMS(LDK(KP923879532), T5G, T5j); + } + T69 = VFMA(LDK(KP668178637), T3G, T3N); + T3O = VFNMS(LDK(KP668178637), T3N, T3G); + T47 = VFMA(LDK(KP668178637), T46, T3Z); + T68 = VFNMS(LDK(KP668178637), T3Z, T46); + T6z = VFMA(LDK(KP303346683), T6p, T6q); + T6r = VFNMS(LDK(KP303346683), T6q, T6p); + T6u = VADD(T3O, T47); + T48 = VSUB(T3O, T47); + T6f = VFNMS(LDK(KP534511135), T4O, T51); + T52 = VFMA(LDK(KP534511135), T51, T4O); + T6t = VFMA(LDK(KP923879532), T66, T63); + T67 = VFNMS(LDK(KP923879532), T66, T63); + } + T6h = VFNMS(LDK(KP831469612), T48, T3v); + T49 = VFMA(LDK(KP831469612), T48, T3v); + { + V T6w, T6s, T6B, T6v, T6A, T6C, T6k, T6a, T6e, T5V; + T6w = VSUB(T6r, T6o); + T6s = VADD(T6o, T6r); + T6B = VFMA(LDK(KP831469612), T6u, T6t); + T6v = VFNMS(LDK(KP831469612), T6u, T6t); + T6A = VSUB(T6y, T6z); + T6C = VADD(T6y, T6z); + T6k = VADD(T69, T68); + T6a = VSUB(T68, T69); + T6e = VFMA(LDK(KP534511135), T5H, T5U); + T5V = VFNMS(LDK(KP534511135), T5U, T5H); + Tgh = VFMA(LDK(KP956940335), T6C, T6B); + STM4(&(io[3]), Tgh, ovs, &(io[1])); + Tgi = VFNMS(LDK(KP956940335), T6C, T6B); + STM4(&(io[35]), Tgi, ovs, &(io[1])); + { + V T6l, T6x, T6d, T6b; + T6l = VFMA(LDK(KP831469612), T6k, T6j); + T6x = VFNMS(LDK(KP831469612), T6k, T6j); + T6d = VFMA(LDK(KP831469612), T6a, T67); + T6b = VFNMS(LDK(KP831469612), T6a, T67); + { + V T6g, T6i, T5W, T6c; + T6g = VSUB(T6e, T6f); + T6i = VADD(T6f, T6e); + T5W = VSUB(T52, T5V); + T6c = VADD(T52, T5V); + Tgj = VFMA(LDK(KP956940335), T6w, T6v); + STM4(&(io[19]), Tgj, ovs, &(io[1])); + Tgk = VFNMS(LDK(KP956940335), T6w, T6v); + STM4(&(io[51]), Tgk, ovs, &(io[1])); + Tgl = VFMA(LDK(KP956940335), T6s, T6l); + STM4(&(ro[3]), Tgl, ovs, &(ro[1])); + Tgm = VFNMS(LDK(KP956940335), T6s, T6l); + STM4(&(ro[35]), Tgm, ovs, &(ro[1])); + Tgn = VFMA(LDK(KP881921264), T6i, T6h); + STM4(&(ro[59]), Tgn, ovs, &(ro[1])); + Tgo = VFNMS(LDK(KP881921264), T6i, T6h); + STM4(&(ro[27]), Tgo, ovs, &(ro[1])); + Tgp = VFMA(LDK(KP881921264), T6g, T6d); + STM4(&(io[11]), Tgp, ovs, &(io[1])); + Tgq = VFNMS(LDK(KP881921264), T6g, T6d); + STM4(&(io[43]), Tgq, ovs, &(io[1])); + Tgr = VFMA(LDK(KP881921264), T6c, T6b); + STM4(&(io[59]), Tgr, ovs, &(io[1])); + Tgs = VFNMS(LDK(KP881921264), T6c, T6b); + STM4(&(io[27]), Tgs, ovs, &(io[1])); + Tgt = VFMA(LDK(KP881921264), T5W, T49); + STM4(&(ro[11]), Tgt, ovs, &(ro[1])); + Tgu = VFNMS(LDK(KP881921264), T5W, T49); + STM4(&(ro[43]), Tgu, ovs, &(ro[1])); + Tgv = VFNMS(LDK(KP956940335), T6A, T6x); + STM4(&(ro[51]), Tgv, ovs, &(ro[1])); + Tgw = VFMA(LDK(KP956940335), T6A, T6x); + STM4(&(ro[19]), Tgw, ovs, &(ro[1])); + } + } + } + } + { + V T8j, T8c, T8C, T8v, T8N, T8M, T8X, T7L, T9c, T92, T9d, T95, T98, T80; + { + V T90, T91, T93, T94, T7S, T7Z; + T8j = VFNMS(LDK(KP923879532), T8i, T8f); + T90 = VFMA(LDK(KP923879532), T8i, T8f); + T91 = VFMA(LDK(KP923879532), T8b, T84); + T8c = VFNMS(LDK(KP923879532), T8b, T84); + T8C = VFNMS(LDK(KP923879532), T8B, T8y); + T93 = VFMA(LDK(KP923879532), T8B, T8y); + T94 = VFMA(LDK(KP923879532), T8u, T8n); + T8v = VFNMS(LDK(KP923879532), T8u, T8n); + T8N = VFMA(LDK(KP198912367), T7O, T7R); + T7S = VFNMS(LDK(KP198912367), T7R, T7O); + T7Z = VFMA(LDK(KP198912367), T7Y, T7V); + T8M = VFNMS(LDK(KP198912367), T7V, T7Y); + T8X = VFMA(LDK(KP923879532), T7K, T7D); + T7L = VFNMS(LDK(KP923879532), T7K, T7D); + T9c = VFNMS(LDK(KP098491403), T90, T91); + T92 = VFMA(LDK(KP098491403), T91, T90); + T9d = VFMA(LDK(KP098491403), T93, T94); + T95 = VFNMS(LDK(KP098491403), T94, T93); + T98 = VADD(T7S, T7Z); + T80 = VSUB(T7S, T7Z); + } + { + V T8V, T81, T8T, T8k, T97, T8L, T8Y, T8O, T8S, T8D; + T8V = VFNMS(LDK(KP980785280), T80, T7L); + T81 = VFMA(LDK(KP980785280), T80, T7L); + T8T = VFNMS(LDK(KP820678790), T8c, T8j); + T8k = VFMA(LDK(KP820678790), T8j, T8c); + T97 = VFMA(LDK(KP923879532), T8K, T8H); + T8L = VFNMS(LDK(KP923879532), T8K, T8H); + T8Y = VADD(T8N, T8M); + T8O = VSUB(T8M, T8N); + T8S = VFMA(LDK(KP820678790), T8v, T8C); + T8D = VFNMS(LDK(KP820678790), T8C, T8v); + { + V T8R, T8P, T8U, T8W, T8E, T8Q; + { + V T96, T9f, T9g, T8Z; + T9a = VSUB(T95, T92); + T96 = VADD(T92, T95); + T9f = VFMA(LDK(KP980785280), T98, T97); + T99 = VFNMS(LDK(KP980785280), T98, T97); + T9e = VSUB(T9c, T9d); + T9g = VADD(T9c, T9d); + T8Z = VFMA(LDK(KP980785280), T8Y, T8X); + T9b = VFNMS(LDK(KP980785280), T8Y, T8X); + T8R = VFMA(LDK(KP980785280), T8O, T8L); + T8P = VFNMS(LDK(KP980785280), T8O, T8L); + T8U = VSUB(T8S, T8T); + T8W = VADD(T8T, T8S); + T8E = VSUB(T8k, T8D); + T8Q = VADD(T8k, T8D); + { + V Tgx, Tgy, Tgz, TgA; + Tgx = VFNMS(LDK(KP995184726), T9g, T9f); + STM4(&(io[33]), Tgx, ovs, &(io[1])); + STN4(&(io[32]), TeO, Tgx, TfG, Tgi, ovs); + Tgy = VFMA(LDK(KP995184726), T96, T8Z); + STM4(&(ro[1]), Tgy, ovs, &(ro[1])); + STN4(&(ro[0]), TeL, Tgy, Tfv, Tgl, ovs); + Tgz = VFNMS(LDK(KP995184726), T96, T8Z); + STM4(&(ro[33]), Tgz, ovs, &(ro[1])); + STN4(&(ro[32]), TeM, Tgz, Tfw, Tgm, ovs); + TgA = VFMA(LDK(KP995184726), T9g, T9f); + STM4(&(io[1]), TgA, ovs, &(io[1])); + STN4(&(io[0]), TeN, TgA, TfF, Tgh, ovs); + } + } + { + V TgB, TgC, TgD, TgE; + TgB = VFMA(LDK(KP773010453), T8W, T8V); + STM4(&(ro[57]), TgB, ovs, &(ro[1])); + STN4(&(ro[56]), TeS, TgB, Tfx, Tgn, ovs); + TgC = VFNMS(LDK(KP773010453), T8W, T8V); + STM4(&(ro[25]), TgC, ovs, &(ro[1])); + STN4(&(ro[24]), TeR, TgC, Tfy, Tgo, ovs); + TgD = VFMA(LDK(KP773010453), T8U, T8R); + STM4(&(io[9]), TgD, ovs, &(io[1])); + STN4(&(io[8]), TeT, TgD, Tfz, Tgp, ovs); + TgE = VFNMS(LDK(KP773010453), T8U, T8R); + STM4(&(io[41]), TgE, ovs, &(io[1])); + STN4(&(io[40]), TeU, TgE, TfA, Tgq, ovs); + { + V TgF, TgG, TgH, TgI; + TgF = VFMA(LDK(KP773010453), T8Q, T8P); + STM4(&(io[57]), TgF, ovs, &(io[1])); + STN4(&(io[56]), TeW, TgF, TfB, Tgr, ovs); + TgG = VFNMS(LDK(KP773010453), T8Q, T8P); + STM4(&(io[25]), TgG, ovs, &(io[1])); + STN4(&(io[24]), TeV, TgG, TfC, Tgs, ovs); + TgH = VFMA(LDK(KP773010453), T8E, T81); + STM4(&(ro[9]), TgH, ovs, &(ro[1])); + STN4(&(ro[8]), TeX, TgH, TfD, Tgt, ovs); + TgI = VFNMS(LDK(KP773010453), T8E, T81); + STM4(&(ro[41]), TgI, ovs, &(ro[1])); + STN4(&(ro[40]), TeY, TgI, TfE, Tgu, ovs); + } + } + } + } + } + } + } + } + } + } + { + V TgJ, TgK, TgL, TgM; + TgJ = VFMA(LDK(KP995184726), T9a, T99); + STM4(&(io[17]), TgJ, ovs, &(io[1])); + STN4(&(io[16]), TeQ, TgJ, TfI, Tgj, ovs); + TgK = VFNMS(LDK(KP995184726), T9a, T99); + STM4(&(io[49]), TgK, ovs, &(io[1])); + STN4(&(io[48]), TeP, TgK, TfH, Tgk, ovs); + TgL = VFMA(LDK(KP995184726), T9e, T9b); + STM4(&(ro[17]), TgL, ovs, &(ro[1])); + STN4(&(ro[16]), TeK, TgL, TfJ, Tgw, ovs); + TgM = VFNMS(LDK(KP995184726), T9e, T9b); + STM4(&(ro[49]), TgM, ovs, &(ro[1])); + STN4(&(ro[48]), TeJ, TgM, TfK, Tgv, ovs); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 64, XSIMD_STRING("n2sv_64"), {520, 0, 392, 0}, &GENUS, 0, 1, 0, 0 }; + +void XSIMD(codelet_n2sv_64) (planner *p) { + X(kdft_register) (p, n2sv_64, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name n2sv_64 -with-ostride 1 -include n2s.h -store-multiple 4 */ + +/* + * This function contains 912 FP additions, 248 FP multiplications, + * (or, 808 additions, 144 multiplications, 104 fused multiply/add), + * 260 stack variables, 15 constants, and 288 memory accesses + */ +#include "n2s.h" + +static void n2sv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP634393284, +0.634393284163645498215171613225493370675687095); + DVK(KP098017140, +0.098017140329560601994195563888641845861136673); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP471396736, +0.471396736825997648556387625905254377657460319); + DVK(KP290284677, +0.290284677254462367636192375817395274691476278); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * ivs), ii = ii + ((2 * VL) * ivs), ro = ro + ((2 * VL) * ovs), io = io + ((2 * VL) * ovs), MAKE_VOLATILE_STRIDE(256, is), MAKE_VOLATILE_STRIDE(256, os)) { + V T37, T7B, T8F, T5Z, Tf, Td9, TbB, TcB, T62, T7C, T2i, TdH, Tah, Tcb, T3e; + V T8G, Tu, TdI, Tak, TbD, Tan, TbC, T2x, Tda, T3m, T65, T7G, T8J, T7J, T8I; + V T3t, T64, TK, Tdd, Tas, Tce, Tav, Tcf, T2N, Tdc, T3G, T6G, T7O, T9k, T7R; + V T9l, T3N, T6H, T1L, Tdv, Tbs, Tcw, TdC, Teo, T5j, T6V, T5Q, T6Y, T8y, T9C; + V Tbb, Tct, T8n, T9z, TZ, Tdf, Taz, Tch, TaC, Tci, T32, Tdg, T3Z, T6J, T7V; + V T9n, T7Y, T9o, T46, T6K, T1g, Tdp, Tb1, Tcm, Tdm, Tej, T4q, T6R, T4X, T6O; + V T8f, T9s, TaK, Tcp, T84, T9v, T1v, Tdn, Tb4, Tcq, Tds, Tek, T4N, T6P, T50; + V T6S, T8i, T9w, TaV, Tcn, T8b, T9t, T20, TdD, Tbv, Tcu, Tdy, Tep, T5G, T6Z; + V T5T, T6W, T8B, T9A, Tbm, Tcx, T8u, T9D; + { + V T3, T35, T26, T5Y, T6, T5X, T29, T36, Ta, T39, T2d, T38, Td, T3b, T2g; + V T3c; + { + V T1, T2, T24, T25; + T1 = LD(&(ri[0]), ivs, &(ri[0])); + T2 = LD(&(ri[WS(is, 32)]), ivs, &(ri[0])); + T3 = VADD(T1, T2); + T35 = VSUB(T1, T2); + T24 = LD(&(ii[0]), ivs, &(ii[0])); + T25 = LD(&(ii[WS(is, 32)]), ivs, &(ii[0])); + T26 = VADD(T24, T25); + T5Y = VSUB(T24, T25); + } + { + V T4, T5, T27, T28; + T4 = LD(&(ri[WS(is, 16)]), ivs, &(ri[0])); + T5 = LD(&(ri[WS(is, 48)]), ivs, &(ri[0])); + T6 = VADD(T4, T5); + T5X = VSUB(T4, T5); + T27 = LD(&(ii[WS(is, 16)]), ivs, &(ii[0])); + T28 = LD(&(ii[WS(is, 48)]), ivs, &(ii[0])); + T29 = VADD(T27, T28); + T36 = VSUB(T27, T28); + } + { + V T8, T9, T2b, T2c; + T8 = LD(&(ri[WS(is, 8)]), ivs, &(ri[0])); + T9 = LD(&(ri[WS(is, 40)]), ivs, &(ri[0])); + Ta = VADD(T8, T9); + T39 = VSUB(T8, T9); + T2b = LD(&(ii[WS(is, 8)]), ivs, &(ii[0])); + T2c = LD(&(ii[WS(is, 40)]), ivs, &(ii[0])); + T2d = VADD(T2b, T2c); + T38 = VSUB(T2b, T2c); + } + { + V Tb, Tc, T2e, T2f; + Tb = LD(&(ri[WS(is, 56)]), ivs, &(ri[0])); + Tc = LD(&(ri[WS(is, 24)]), ivs, &(ri[0])); + Td = VADD(Tb, Tc); + T3b = VSUB(Tb, Tc); + T2e = LD(&(ii[WS(is, 56)]), ivs, &(ii[0])); + T2f = LD(&(ii[WS(is, 24)]), ivs, &(ii[0])); + T2g = VADD(T2e, T2f); + T3c = VSUB(T2e, T2f); + } + { + V T7, Te, T2a, T2h; + T37 = VSUB(T35, T36); + T7B = VADD(T35, T36); + T8F = VSUB(T5Y, T5X); + T5Z = VADD(T5X, T5Y); + T7 = VADD(T3, T6); + Te = VADD(Ta, Td); + Tf = VADD(T7, Te); + Td9 = VSUB(T7, Te); + { + V Tbz, TbA, T60, T61; + Tbz = VSUB(T26, T29); + TbA = VSUB(Td, Ta); + TbB = VSUB(Tbz, TbA); + TcB = VADD(TbA, Tbz); + T60 = VSUB(T3b, T3c); + T61 = VADD(T39, T38); + T62 = VMUL(LDK(KP707106781), VSUB(T60, T61)); + T7C = VMUL(LDK(KP707106781), VADD(T61, T60)); + } + T2a = VADD(T26, T29); + T2h = VADD(T2d, T2g); + T2i = VADD(T2a, T2h); + TdH = VSUB(T2a, T2h); + { + V Taf, Tag, T3a, T3d; + Taf = VSUB(T3, T6); + Tag = VSUB(T2d, T2g); + Tah = VSUB(Taf, Tag); + Tcb = VADD(Taf, Tag); + T3a = VSUB(T38, T39); + T3d = VADD(T3b, T3c); + T3e = VMUL(LDK(KP707106781), VSUB(T3a, T3d)); + T8G = VMUL(LDK(KP707106781), VADD(T3a, T3d)); + } + } + } + { + V Ti, T3j, T2l, T3h, Tl, T3g, T2o, T3k, Tp, T3q, T2s, T3o, Ts, T3n, T2v; + V T3r; + { + V Tg, Th, T2j, T2k; + Tg = LD(&(ri[WS(is, 4)]), ivs, &(ri[0])); + Th = LD(&(ri[WS(is, 36)]), ivs, &(ri[0])); + Ti = VADD(Tg, Th); + T3j = VSUB(Tg, Th); + T2j = LD(&(ii[WS(is, 4)]), ivs, &(ii[0])); + T2k = LD(&(ii[WS(is, 36)]), ivs, &(ii[0])); + T2l = VADD(T2j, T2k); + T3h = VSUB(T2j, T2k); + } + { + V Tj, Tk, T2m, T2n; + Tj = LD(&(ri[WS(is, 20)]), ivs, &(ri[0])); + Tk = LD(&(ri[WS(is, 52)]), ivs, &(ri[0])); + Tl = VADD(Tj, Tk); + T3g = VSUB(Tj, Tk); + T2m = LD(&(ii[WS(is, 20)]), ivs, &(ii[0])); + T2n = LD(&(ii[WS(is, 52)]), ivs, &(ii[0])); + T2o = VADD(T2m, T2n); + T3k = VSUB(T2m, T2n); + } + { + V Tn, To, T2q, T2r; + Tn = LD(&(ri[WS(is, 60)]), ivs, &(ri[0])); + To = LD(&(ri[WS(is, 28)]), ivs, &(ri[0])); + Tp = VADD(Tn, To); + T3q = VSUB(Tn, To); + T2q = LD(&(ii[WS(is, 60)]), ivs, &(ii[0])); + T2r = LD(&(ii[WS(is, 28)]), ivs, &(ii[0])); + T2s = VADD(T2q, T2r); + T3o = VSUB(T2q, T2r); + } + { + V Tq, Tr, T2t, T2u; + Tq = LD(&(ri[WS(is, 12)]), ivs, &(ri[0])); + Tr = LD(&(ri[WS(is, 44)]), ivs, &(ri[0])); + Ts = VADD(Tq, Tr); + T3n = VSUB(Tq, Tr); + T2t = LD(&(ii[WS(is, 12)]), ivs, &(ii[0])); + T2u = LD(&(ii[WS(is, 44)]), ivs, &(ii[0])); + T2v = VADD(T2t, T2u); + T3r = VSUB(T2t, T2u); + } + { + V Tm, Tt, Tai, Taj; + Tm = VADD(Ti, Tl); + Tt = VADD(Tp, Ts); + Tu = VADD(Tm, Tt); + TdI = VSUB(Tt, Tm); + Tai = VSUB(T2l, T2o); + Taj = VSUB(Ti, Tl); + Tak = VSUB(Tai, Taj); + TbD = VADD(Taj, Tai); + } + { + V Tal, Tam, T2p, T2w; + Tal = VSUB(Tp, Ts); + Tam = VSUB(T2s, T2v); + Tan = VADD(Tal, Tam); + TbC = VSUB(Tal, Tam); + T2p = VADD(T2l, T2o); + T2w = VADD(T2s, T2v); + T2x = VADD(T2p, T2w); + Tda = VSUB(T2p, T2w); + } + { + V T3i, T3l, T7E, T7F; + T3i = VADD(T3g, T3h); + T3l = VSUB(T3j, T3k); + T3m = VFNMS(LDK(KP923879532), T3l, VMUL(LDK(KP382683432), T3i)); + T65 = VFMA(LDK(KP923879532), T3i, VMUL(LDK(KP382683432), T3l)); + T7E = VSUB(T3h, T3g); + T7F = VADD(T3j, T3k); + T7G = VFNMS(LDK(KP382683432), T7F, VMUL(LDK(KP923879532), T7E)); + T8J = VFMA(LDK(KP382683432), T7E, VMUL(LDK(KP923879532), T7F)); + } + { + V T7H, T7I, T3p, T3s; + T7H = VSUB(T3o, T3n); + T7I = VADD(T3q, T3r); + T7J = VFMA(LDK(KP923879532), T7H, VMUL(LDK(KP382683432), T7I)); + T8I = VFNMS(LDK(KP382683432), T7H, VMUL(LDK(KP923879532), T7I)); + T3p = VADD(T3n, T3o); + T3s = VSUB(T3q, T3r); + T3t = VFMA(LDK(KP382683432), T3p, VMUL(LDK(KP923879532), T3s)); + T64 = VFNMS(LDK(KP923879532), T3p, VMUL(LDK(KP382683432), T3s)); + } + } + { + V Ty, T3H, T2B, T3x, TB, T3w, T2E, T3I, TI, T3L, T2L, T3B, TF, T3K, T2I; + V T3E; + { + V Tw, Tx, T2C, T2D; + Tw = LD(&(ri[WS(is, 2)]), ivs, &(ri[0])); + Tx = LD(&(ri[WS(is, 34)]), ivs, &(ri[0])); + Ty = VADD(Tw, Tx); + T3H = VSUB(Tw, Tx); + { + V T2z, T2A, Tz, TA; + T2z = LD(&(ii[WS(is, 2)]), ivs, &(ii[0])); + T2A = LD(&(ii[WS(is, 34)]), ivs, &(ii[0])); + T2B = VADD(T2z, T2A); + T3x = VSUB(T2z, T2A); + Tz = LD(&(ri[WS(is, 18)]), ivs, &(ri[0])); + TA = LD(&(ri[WS(is, 50)]), ivs, &(ri[0])); + TB = VADD(Tz, TA); + T3w = VSUB(Tz, TA); + } + T2C = LD(&(ii[WS(is, 18)]), ivs, &(ii[0])); + T2D = LD(&(ii[WS(is, 50)]), ivs, &(ii[0])); + T2E = VADD(T2C, T2D); + T3I = VSUB(T2C, T2D); + { + V TG, TH, T3z, T2J, T2K, T3A; + TG = LD(&(ri[WS(is, 58)]), ivs, &(ri[0])); + TH = LD(&(ri[WS(is, 26)]), ivs, &(ri[0])); + T3z = VSUB(TG, TH); + T2J = LD(&(ii[WS(is, 58)]), ivs, &(ii[0])); + T2K = LD(&(ii[WS(is, 26)]), ivs, &(ii[0])); + T3A = VSUB(T2J, T2K); + TI = VADD(TG, TH); + T3L = VADD(T3z, T3A); + T2L = VADD(T2J, T2K); + T3B = VSUB(T3z, T3A); + } + { + V TD, TE, T3C, T2G, T2H, T3D; + TD = LD(&(ri[WS(is, 10)]), ivs, &(ri[0])); + TE = LD(&(ri[WS(is, 42)]), ivs, &(ri[0])); + T3C = VSUB(TD, TE); + T2G = LD(&(ii[WS(is, 10)]), ivs, &(ii[0])); + T2H = LD(&(ii[WS(is, 42)]), ivs, &(ii[0])); + T3D = VSUB(T2G, T2H); + TF = VADD(TD, TE); + T3K = VSUB(T3D, T3C); + T2I = VADD(T2G, T2H); + T3E = VADD(T3C, T3D); + } + } + { + V TC, TJ, Taq, Tar; + TC = VADD(Ty, TB); + TJ = VADD(TF, TI); + TK = VADD(TC, TJ); + Tdd = VSUB(TC, TJ); + Taq = VSUB(T2B, T2E); + Tar = VSUB(TI, TF); + Tas = VSUB(Taq, Tar); + Tce = VADD(Tar, Taq); + } + { + V Tat, Tau, T2F, T2M; + Tat = VSUB(Ty, TB); + Tau = VSUB(T2I, T2L); + Tav = VSUB(Tat, Tau); + Tcf = VADD(Tat, Tau); + T2F = VADD(T2B, T2E); + T2M = VADD(T2I, T2L); + T2N = VADD(T2F, T2M); + Tdc = VSUB(T2F, T2M); + } + { + V T3y, T3F, T7M, T7N; + T3y = VADD(T3w, T3x); + T3F = VMUL(LDK(KP707106781), VSUB(T3B, T3E)); + T3G = VSUB(T3y, T3F); + T6G = VADD(T3y, T3F); + T7M = VSUB(T3x, T3w); + T7N = VMUL(LDK(KP707106781), VADD(T3K, T3L)); + T7O = VSUB(T7M, T7N); + T9k = VADD(T7M, T7N); + } + { + V T7P, T7Q, T3J, T3M; + T7P = VADD(T3H, T3I); + T7Q = VMUL(LDK(KP707106781), VADD(T3E, T3B)); + T7R = VSUB(T7P, T7Q); + T9l = VADD(T7P, T7Q); + T3J = VSUB(T3H, T3I); + T3M = VMUL(LDK(KP707106781), VSUB(T3K, T3L)); + T3N = VSUB(T3J, T3M); + T6H = VADD(T3J, T3M); + } + } + { + V T1z, T53, T5L, Tbo, T1C, T5I, T56, Tbp, T1J, Tb9, T5h, T5N, T1G, Tb8, T5c; + V T5O; + { + V T1x, T1y, T54, T55; + T1x = LD(&(ri[WS(is, 63)]), ivs, &(ri[WS(is, 1)])); + T1y = LD(&(ri[WS(is, 31)]), ivs, &(ri[WS(is, 1)])); + T1z = VADD(T1x, T1y); + T53 = VSUB(T1x, T1y); + { + V T5J, T5K, T1A, T1B; + T5J = LD(&(ii[WS(is, 63)]), ivs, &(ii[WS(is, 1)])); + T5K = LD(&(ii[WS(is, 31)]), ivs, &(ii[WS(is, 1)])); + T5L = VSUB(T5J, T5K); + Tbo = VADD(T5J, T5K); + T1A = LD(&(ri[WS(is, 15)]), ivs, &(ri[WS(is, 1)])); + T1B = LD(&(ri[WS(is, 47)]), ivs, &(ri[WS(is, 1)])); + T1C = VADD(T1A, T1B); + T5I = VSUB(T1A, T1B); + } + T54 = LD(&(ii[WS(is, 15)]), ivs, &(ii[WS(is, 1)])); + T55 = LD(&(ii[WS(is, 47)]), ivs, &(ii[WS(is, 1)])); + T56 = VSUB(T54, T55); + Tbp = VADD(T54, T55); + { + V T1H, T1I, T5d, T5e, T5f, T5g; + T1H = LD(&(ri[WS(is, 55)]), ivs, &(ri[WS(is, 1)])); + T1I = LD(&(ri[WS(is, 23)]), ivs, &(ri[WS(is, 1)])); + T5d = VSUB(T1H, T1I); + T5e = LD(&(ii[WS(is, 55)]), ivs, &(ii[WS(is, 1)])); + T5f = LD(&(ii[WS(is, 23)]), ivs, &(ii[WS(is, 1)])); + T5g = VSUB(T5e, T5f); + T1J = VADD(T1H, T1I); + Tb9 = VADD(T5e, T5f); + T5h = VADD(T5d, T5g); + T5N = VSUB(T5d, T5g); + } + { + V T1E, T1F, T5b, T58, T59, T5a; + T1E = LD(&(ri[WS(is, 7)]), ivs, &(ri[WS(is, 1)])); + T1F = LD(&(ri[WS(is, 39)]), ivs, &(ri[WS(is, 1)])); + T5b = VSUB(T1E, T1F); + T58 = LD(&(ii[WS(is, 7)]), ivs, &(ii[WS(is, 1)])); + T59 = LD(&(ii[WS(is, 39)]), ivs, &(ii[WS(is, 1)])); + T5a = VSUB(T58, T59); + T1G = VADD(T1E, T1F); + Tb8 = VADD(T58, T59); + T5c = VSUB(T5a, T5b); + T5O = VADD(T5b, T5a); + } + } + { + V T1D, T1K, Tbq, Tbr; + T1D = VADD(T1z, T1C); + T1K = VADD(T1G, T1J); + T1L = VADD(T1D, T1K); + Tdv = VSUB(T1D, T1K); + Tbq = VSUB(Tbo, Tbp); + Tbr = VSUB(T1J, T1G); + Tbs = VSUB(Tbq, Tbr); + Tcw = VADD(Tbr, Tbq); + } + { + V TdA, TdB, T57, T5i; + TdA = VADD(Tbo, Tbp); + TdB = VADD(Tb8, Tb9); + TdC = VSUB(TdA, TdB); + Teo = VADD(TdA, TdB); + T57 = VSUB(T53, T56); + T5i = VMUL(LDK(KP707106781), VSUB(T5c, T5h)); + T5j = VSUB(T57, T5i); + T6V = VADD(T57, T5i); + } + { + V T5M, T5P, T8w, T8x; + T5M = VADD(T5I, T5L); + T5P = VMUL(LDK(KP707106781), VSUB(T5N, T5O)); + T5Q = VSUB(T5M, T5P); + T6Y = VADD(T5M, T5P); + T8w = VSUB(T5L, T5I); + T8x = VMUL(LDK(KP707106781), VADD(T5c, T5h)); + T8y = VSUB(T8w, T8x); + T9C = VADD(T8w, T8x); + } + { + V Tb7, Tba, T8l, T8m; + Tb7 = VSUB(T1z, T1C); + Tba = VSUB(Tb8, Tb9); + Tbb = VSUB(Tb7, Tba); + Tct = VADD(Tb7, Tba); + T8l = VADD(T53, T56); + T8m = VMUL(LDK(KP707106781), VADD(T5O, T5N)); + T8n = VSUB(T8l, T8m); + T9z = VADD(T8l, T8m); + } + } + { + V TN, T40, T2Q, T3Q, TQ, T3P, T2T, T41, TX, T44, T30, T3U, TU, T43, T2X; + V T3X; + { + V TL, TM, T2R, T2S; + TL = LD(&(ri[WS(is, 62)]), ivs, &(ri[0])); + TM = LD(&(ri[WS(is, 30)]), ivs, &(ri[0])); + TN = VADD(TL, TM); + T40 = VSUB(TL, TM); + { + V T2O, T2P, TO, TP; + T2O = LD(&(ii[WS(is, 62)]), ivs, &(ii[0])); + T2P = LD(&(ii[WS(is, 30)]), ivs, &(ii[0])); + T2Q = VADD(T2O, T2P); + T3Q = VSUB(T2O, T2P); + TO = LD(&(ri[WS(is, 14)]), ivs, &(ri[0])); + TP = LD(&(ri[WS(is, 46)]), ivs, &(ri[0])); + TQ = VADD(TO, TP); + T3P = VSUB(TO, TP); + } + T2R = LD(&(ii[WS(is, 14)]), ivs, &(ii[0])); + T2S = LD(&(ii[WS(is, 46)]), ivs, &(ii[0])); + T2T = VADD(T2R, T2S); + T41 = VSUB(T2R, T2S); + { + V TV, TW, T3S, T2Y, T2Z, T3T; + TV = LD(&(ri[WS(is, 54)]), ivs, &(ri[0])); + TW = LD(&(ri[WS(is, 22)]), ivs, &(ri[0])); + T3S = VSUB(TV, TW); + T2Y = LD(&(ii[WS(is, 54)]), ivs, &(ii[0])); + T2Z = LD(&(ii[WS(is, 22)]), ivs, &(ii[0])); + T3T = VSUB(T2Y, T2Z); + TX = VADD(TV, TW); + T44 = VADD(T3S, T3T); + T30 = VADD(T2Y, T2Z); + T3U = VSUB(T3S, T3T); + } + { + V TS, TT, T3V, T2V, T2W, T3W; + TS = LD(&(ri[WS(is, 6)]), ivs, &(ri[0])); + TT = LD(&(ri[WS(is, 38)]), ivs, &(ri[0])); + T3V = VSUB(TS, TT); + T2V = LD(&(ii[WS(is, 6)]), ivs, &(ii[0])); + T2W = LD(&(ii[WS(is, 38)]), ivs, &(ii[0])); + T3W = VSUB(T2V, T2W); + TU = VADD(TS, TT); + T43 = VSUB(T3W, T3V); + T2X = VADD(T2V, T2W); + T3X = VADD(T3V, T3W); + } + } + { + V TR, TY, Tax, Tay; + TR = VADD(TN, TQ); + TY = VADD(TU, TX); + TZ = VADD(TR, TY); + Tdf = VSUB(TR, TY); + Tax = VSUB(T2Q, T2T); + Tay = VSUB(TX, TU); + Taz = VSUB(Tax, Tay); + Tch = VADD(Tay, Tax); + } + { + V TaA, TaB, T2U, T31; + TaA = VSUB(TN, TQ); + TaB = VSUB(T2X, T30); + TaC = VSUB(TaA, TaB); + Tci = VADD(TaA, TaB); + T2U = VADD(T2Q, T2T); + T31 = VADD(T2X, T30); + T32 = VADD(T2U, T31); + Tdg = VSUB(T2U, T31); + } + { + V T3R, T3Y, T7T, T7U; + T3R = VADD(T3P, T3Q); + T3Y = VMUL(LDK(KP707106781), VSUB(T3U, T3X)); + T3Z = VSUB(T3R, T3Y); + T6J = VADD(T3R, T3Y); + T7T = VADD(T40, T41); + T7U = VMUL(LDK(KP707106781), VADD(T3X, T3U)); + T7V = VSUB(T7T, T7U); + T9n = VADD(T7T, T7U); + } + { + V T7W, T7X, T42, T45; + T7W = VSUB(T3Q, T3P); + T7X = VMUL(LDK(KP707106781), VADD(T43, T44)); + T7Y = VSUB(T7W, T7X); + T9o = VADD(T7W, T7X); + T42 = VSUB(T40, T41); + T45 = VMUL(LDK(KP707106781), VSUB(T43, T44)); + T46 = VSUB(T42, T45); + T6K = VADD(T42, T45); + } + } + { + V T14, T4P, T4d, TaG, T17, T4a, T4S, TaH, T1e, TaZ, T4j, T4V, T1b, TaY, T4o; + V T4U; + { + V T12, T13, T4Q, T4R; + T12 = LD(&(ri[WS(is, 1)]), ivs, &(ri[WS(is, 1)])); + T13 = LD(&(ri[WS(is, 33)]), ivs, &(ri[WS(is, 1)])); + T14 = VADD(T12, T13); + T4P = VSUB(T12, T13); + { + V T4b, T4c, T15, T16; + T4b = LD(&(ii[WS(is, 1)]), ivs, &(ii[WS(is, 1)])); + T4c = LD(&(ii[WS(is, 33)]), ivs, &(ii[WS(is, 1)])); + T4d = VSUB(T4b, T4c); + TaG = VADD(T4b, T4c); + T15 = LD(&(ri[WS(is, 17)]), ivs, &(ri[WS(is, 1)])); + T16 = LD(&(ri[WS(is, 49)]), ivs, &(ri[WS(is, 1)])); + T17 = VADD(T15, T16); + T4a = VSUB(T15, T16); + } + T4Q = LD(&(ii[WS(is, 17)]), ivs, &(ii[WS(is, 1)])); + T4R = LD(&(ii[WS(is, 49)]), ivs, &(ii[WS(is, 1)])); + T4S = VSUB(T4Q, T4R); + TaH = VADD(T4Q, T4R); + { + V T1c, T1d, T4f, T4g, T4h, T4i; + T1c = LD(&(ri[WS(is, 57)]), ivs, &(ri[WS(is, 1)])); + T1d = LD(&(ri[WS(is, 25)]), ivs, &(ri[WS(is, 1)])); + T4f = VSUB(T1c, T1d); + T4g = LD(&(ii[WS(is, 57)]), ivs, &(ii[WS(is, 1)])); + T4h = LD(&(ii[WS(is, 25)]), ivs, &(ii[WS(is, 1)])); + T4i = VSUB(T4g, T4h); + T1e = VADD(T1c, T1d); + TaZ = VADD(T4g, T4h); + T4j = VSUB(T4f, T4i); + T4V = VADD(T4f, T4i); + } + { + V T19, T1a, T4k, T4l, T4m, T4n; + T19 = LD(&(ri[WS(is, 9)]), ivs, &(ri[WS(is, 1)])); + T1a = LD(&(ri[WS(is, 41)]), ivs, &(ri[WS(is, 1)])); + T4k = VSUB(T19, T1a); + T4l = LD(&(ii[WS(is, 9)]), ivs, &(ii[WS(is, 1)])); + T4m = LD(&(ii[WS(is, 41)]), ivs, &(ii[WS(is, 1)])); + T4n = VSUB(T4l, T4m); + T1b = VADD(T19, T1a); + TaY = VADD(T4l, T4m); + T4o = VADD(T4k, T4n); + T4U = VSUB(T4n, T4k); + } + } + { + V T18, T1f, TaX, Tb0; + T18 = VADD(T14, T17); + T1f = VADD(T1b, T1e); + T1g = VADD(T18, T1f); + Tdp = VSUB(T18, T1f); + TaX = VSUB(T14, T17); + Tb0 = VSUB(TaY, TaZ); + Tb1 = VSUB(TaX, Tb0); + Tcm = VADD(TaX, Tb0); + } + { + V Tdk, Tdl, T4e, T4p; + Tdk = VADD(TaG, TaH); + Tdl = VADD(TaY, TaZ); + Tdm = VSUB(Tdk, Tdl); + Tej = VADD(Tdk, Tdl); + T4e = VADD(T4a, T4d); + T4p = VMUL(LDK(KP707106781), VSUB(T4j, T4o)); + T4q = VSUB(T4e, T4p); + T6R = VADD(T4e, T4p); + } + { + V T4T, T4W, T8d, T8e; + T4T = VSUB(T4P, T4S); + T4W = VMUL(LDK(KP707106781), VSUB(T4U, T4V)); + T4X = VSUB(T4T, T4W); + T6O = VADD(T4T, T4W); + T8d = VADD(T4P, T4S); + T8e = VMUL(LDK(KP707106781), VADD(T4o, T4j)); + T8f = VSUB(T8d, T8e); + T9s = VADD(T8d, T8e); + } + { + V TaI, TaJ, T82, T83; + TaI = VSUB(TaG, TaH); + TaJ = VSUB(T1e, T1b); + TaK = VSUB(TaI, TaJ); + Tcp = VADD(TaJ, TaI); + T82 = VSUB(T4d, T4a); + T83 = VMUL(LDK(KP707106781), VADD(T4U, T4V)); + T84 = VSUB(T82, T83); + T9v = VADD(T82, T83); + } + } + { + V T1j, TaR, T1m, TaS, T4G, T4L, TaT, TaQ, T89, T88, T1q, TaM, T1t, TaN, T4v; + V T4A, TaO, TaL, T86, T85; + { + V T4H, T4F, T4C, T4K; + { + V T1h, T1i, T4D, T4E; + T1h = LD(&(ri[WS(is, 5)]), ivs, &(ri[WS(is, 1)])); + T1i = LD(&(ri[WS(is, 37)]), ivs, &(ri[WS(is, 1)])); + T1j = VADD(T1h, T1i); + T4H = VSUB(T1h, T1i); + T4D = LD(&(ii[WS(is, 5)]), ivs, &(ii[WS(is, 1)])); + T4E = LD(&(ii[WS(is, 37)]), ivs, &(ii[WS(is, 1)])); + T4F = VSUB(T4D, T4E); + TaR = VADD(T4D, T4E); + } + { + V T1k, T1l, T4I, T4J; + T1k = LD(&(ri[WS(is, 21)]), ivs, &(ri[WS(is, 1)])); + T1l = LD(&(ri[WS(is, 53)]), ivs, &(ri[WS(is, 1)])); + T1m = VADD(T1k, T1l); + T4C = VSUB(T1k, T1l); + T4I = LD(&(ii[WS(is, 21)]), ivs, &(ii[WS(is, 1)])); + T4J = LD(&(ii[WS(is, 53)]), ivs, &(ii[WS(is, 1)])); + T4K = VSUB(T4I, T4J); + TaS = VADD(T4I, T4J); + } + T4G = VADD(T4C, T4F); + T4L = VSUB(T4H, T4K); + TaT = VSUB(TaR, TaS); + TaQ = VSUB(T1j, T1m); + T89 = VADD(T4H, T4K); + T88 = VSUB(T4F, T4C); + } + { + V T4r, T4z, T4w, T4u; + { + V T1o, T1p, T4x, T4y; + T1o = LD(&(ri[WS(is, 61)]), ivs, &(ri[WS(is, 1)])); + T1p = LD(&(ri[WS(is, 29)]), ivs, &(ri[WS(is, 1)])); + T1q = VADD(T1o, T1p); + T4r = VSUB(T1o, T1p); + T4x = LD(&(ii[WS(is, 61)]), ivs, &(ii[WS(is, 1)])); + T4y = LD(&(ii[WS(is, 29)]), ivs, &(ii[WS(is, 1)])); + T4z = VSUB(T4x, T4y); + TaM = VADD(T4x, T4y); + } + { + V T1r, T1s, T4s, T4t; + T1r = LD(&(ri[WS(is, 13)]), ivs, &(ri[WS(is, 1)])); + T1s = LD(&(ri[WS(is, 45)]), ivs, &(ri[WS(is, 1)])); + T1t = VADD(T1r, T1s); + T4w = VSUB(T1r, T1s); + T4s = LD(&(ii[WS(is, 13)]), ivs, &(ii[WS(is, 1)])); + T4t = LD(&(ii[WS(is, 45)]), ivs, &(ii[WS(is, 1)])); + T4u = VSUB(T4s, T4t); + TaN = VADD(T4s, T4t); + } + T4v = VSUB(T4r, T4u); + T4A = VADD(T4w, T4z); + TaO = VSUB(TaM, TaN); + TaL = VSUB(T1q, T1t); + T86 = VSUB(T4z, T4w); + T85 = VADD(T4r, T4u); + } + { + V T1n, T1u, Tb2, Tb3; + T1n = VADD(T1j, T1m); + T1u = VADD(T1q, T1t); + T1v = VADD(T1n, T1u); + Tdn = VSUB(T1u, T1n); + Tb2 = VSUB(TaT, TaQ); + Tb3 = VADD(TaL, TaO); + Tb4 = VMUL(LDK(KP707106781), VSUB(Tb2, Tb3)); + Tcq = VMUL(LDK(KP707106781), VADD(Tb2, Tb3)); + } + { + V Tdq, Tdr, T4B, T4M; + Tdq = VADD(TaR, TaS); + Tdr = VADD(TaM, TaN); + Tds = VSUB(Tdq, Tdr); + Tek = VADD(Tdq, Tdr); + T4B = VFNMS(LDK(KP923879532), T4A, VMUL(LDK(KP382683432), T4v)); + T4M = VFMA(LDK(KP923879532), T4G, VMUL(LDK(KP382683432), T4L)); + T4N = VSUB(T4B, T4M); + T6P = VADD(T4M, T4B); + } + { + V T4Y, T4Z, T8g, T8h; + T4Y = VFNMS(LDK(KP923879532), T4L, VMUL(LDK(KP382683432), T4G)); + T4Z = VFMA(LDK(KP382683432), T4A, VMUL(LDK(KP923879532), T4v)); + T50 = VSUB(T4Y, T4Z); + T6S = VADD(T4Y, T4Z); + T8g = VFNMS(LDK(KP382683432), T89, VMUL(LDK(KP923879532), T88)); + T8h = VFMA(LDK(KP923879532), T86, VMUL(LDK(KP382683432), T85)); + T8i = VSUB(T8g, T8h); + T9w = VADD(T8g, T8h); + } + { + V TaP, TaU, T87, T8a; + TaP = VSUB(TaL, TaO); + TaU = VADD(TaQ, TaT); + TaV = VMUL(LDK(KP707106781), VSUB(TaP, TaU)); + Tcn = VMUL(LDK(KP707106781), VADD(TaU, TaP)); + T87 = VFNMS(LDK(KP382683432), T86, VMUL(LDK(KP923879532), T85)); + T8a = VFMA(LDK(KP382683432), T88, VMUL(LDK(KP923879532), T89)); + T8b = VSUB(T87, T8a); + T9t = VADD(T8a, T87); + } + } + { + V T1O, Tbc, T1R, Tbd, T5o, T5t, Tbf, Tbe, T8p, T8o, T1V, Tbi, T1Y, Tbj, T5z; + V T5E, Tbk, Tbh, T8s, T8r; + { + V T5p, T5n, T5k, T5s; + { + V T1M, T1N, T5l, T5m; + T1M = LD(&(ri[WS(is, 3)]), ivs, &(ri[WS(is, 1)])); + T1N = LD(&(ri[WS(is, 35)]), ivs, &(ri[WS(is, 1)])); + T1O = VADD(T1M, T1N); + T5p = VSUB(T1M, T1N); + T5l = LD(&(ii[WS(is, 3)]), ivs, &(ii[WS(is, 1)])); + T5m = LD(&(ii[WS(is, 35)]), ivs, &(ii[WS(is, 1)])); + T5n = VSUB(T5l, T5m); + Tbc = VADD(T5l, T5m); + } + { + V T1P, T1Q, T5q, T5r; + T1P = LD(&(ri[WS(is, 19)]), ivs, &(ri[WS(is, 1)])); + T1Q = LD(&(ri[WS(is, 51)]), ivs, &(ri[WS(is, 1)])); + T1R = VADD(T1P, T1Q); + T5k = VSUB(T1P, T1Q); + T5q = LD(&(ii[WS(is, 19)]), ivs, &(ii[WS(is, 1)])); + T5r = LD(&(ii[WS(is, 51)]), ivs, &(ii[WS(is, 1)])); + T5s = VSUB(T5q, T5r); + Tbd = VADD(T5q, T5r); + } + T5o = VADD(T5k, T5n); + T5t = VSUB(T5p, T5s); + Tbf = VSUB(T1O, T1R); + Tbe = VSUB(Tbc, Tbd); + T8p = VADD(T5p, T5s); + T8o = VSUB(T5n, T5k); + } + { + V T5A, T5y, T5v, T5D; + { + V T1T, T1U, T5w, T5x; + T1T = LD(&(ri[WS(is, 59)]), ivs, &(ri[WS(is, 1)])); + T1U = LD(&(ri[WS(is, 27)]), ivs, &(ri[WS(is, 1)])); + T1V = VADD(T1T, T1U); + T5A = VSUB(T1T, T1U); + T5w = LD(&(ii[WS(is, 59)]), ivs, &(ii[WS(is, 1)])); + T5x = LD(&(ii[WS(is, 27)]), ivs, &(ii[WS(is, 1)])); + T5y = VSUB(T5w, T5x); + Tbi = VADD(T5w, T5x); + } + { + V T1W, T1X, T5B, T5C; + T1W = LD(&(ri[WS(is, 11)]), ivs, &(ri[WS(is, 1)])); + T1X = LD(&(ri[WS(is, 43)]), ivs, &(ri[WS(is, 1)])); + T1Y = VADD(T1W, T1X); + T5v = VSUB(T1W, T1X); + T5B = LD(&(ii[WS(is, 11)]), ivs, &(ii[WS(is, 1)])); + T5C = LD(&(ii[WS(is, 43)]), ivs, &(ii[WS(is, 1)])); + T5D = VSUB(T5B, T5C); + Tbj = VADD(T5B, T5C); + } + T5z = VADD(T5v, T5y); + T5E = VSUB(T5A, T5D); + Tbk = VSUB(Tbi, Tbj); + Tbh = VSUB(T1V, T1Y); + T8s = VADD(T5A, T5D); + T8r = VSUB(T5y, T5v); + } + { + V T1S, T1Z, Tbt, Tbu; + T1S = VADD(T1O, T1R); + T1Z = VADD(T1V, T1Y); + T20 = VADD(T1S, T1Z); + TdD = VSUB(T1Z, T1S); + Tbt = VSUB(Tbh, Tbk); + Tbu = VADD(Tbf, Tbe); + Tbv = VMUL(LDK(KP707106781), VSUB(Tbt, Tbu)); + Tcu = VMUL(LDK(KP707106781), VADD(Tbu, Tbt)); + } + { + V Tdw, Tdx, T5u, T5F; + Tdw = VADD(Tbc, Tbd); + Tdx = VADD(Tbi, Tbj); + Tdy = VSUB(Tdw, Tdx); + Tep = VADD(Tdw, Tdx); + T5u = VFNMS(LDK(KP923879532), T5t, VMUL(LDK(KP382683432), T5o)); + T5F = VFMA(LDK(KP382683432), T5z, VMUL(LDK(KP923879532), T5E)); + T5G = VSUB(T5u, T5F); + T6Z = VADD(T5u, T5F); + } + { + V T5R, T5S, T8z, T8A; + T5R = VFNMS(LDK(KP923879532), T5z, VMUL(LDK(KP382683432), T5E)); + T5S = VFMA(LDK(KP923879532), T5o, VMUL(LDK(KP382683432), T5t)); + T5T = VSUB(T5R, T5S); + T6W = VADD(T5S, T5R); + T8z = VFNMS(LDK(KP382683432), T8r, VMUL(LDK(KP923879532), T8s)); + T8A = VFMA(LDK(KP382683432), T8o, VMUL(LDK(KP923879532), T8p)); + T8B = VSUB(T8z, T8A); + T9A = VADD(T8A, T8z); + } + { + V Tbg, Tbl, T8q, T8t; + Tbg = VSUB(Tbe, Tbf); + Tbl = VADD(Tbh, Tbk); + Tbm = VMUL(LDK(KP707106781), VSUB(Tbg, Tbl)); + Tcx = VMUL(LDK(KP707106781), VADD(Tbg, Tbl)); + T8q = VFNMS(LDK(KP382683432), T8p, VMUL(LDK(KP923879532), T8o)); + T8t = VFMA(LDK(KP923879532), T8r, VMUL(LDK(KP382683432), T8s)); + T8u = VSUB(T8q, T8t); + T9D = VADD(T8q, T8t); + } + } + { + V TeJ, TeK, TeL, TeM, TeN, TeO, TeP, TeQ, TeR, TeS, TeT, TeU, TeV, TeW, TeX; + V TeY, TeZ, Tf0, Tf1, Tf2, Tf3, Tf4, Tf5, Tf6, Tf7, Tf8, Tf9, Tfa, Tfb, Tfc; + V Tfd, Tfe, Tff, Tfg, Tfh, Tfi, Tfj, Tfk, Tfl, Tfm, Tfn, Tfo, Tfp, Tfq, Tfr; + V Tfs, Tft, Tfu; + { + V T11, TeD, TeG, TeI, T22, T23, T34, TeH; + { + V Tv, T10, TeE, TeF; + Tv = VADD(Tf, Tu); + T10 = VADD(TK, TZ); + T11 = VADD(Tv, T10); + TeD = VSUB(Tv, T10); + TeE = VADD(Tej, Tek); + TeF = VADD(Teo, Tep); + TeG = VSUB(TeE, TeF); + TeI = VADD(TeE, TeF); + } + { + V T1w, T21, T2y, T33; + T1w = VADD(T1g, T1v); + T21 = VADD(T1L, T20); + T22 = VADD(T1w, T21); + T23 = VSUB(T21, T1w); + T2y = VADD(T2i, T2x); + T33 = VADD(T2N, T32); + T34 = VSUB(T2y, T33); + TeH = VADD(T2y, T33); + } + TeJ = VSUB(T11, T22); + STM4(&(ro[32]), TeJ, ovs, &(ro[0])); + TeK = VSUB(TeH, TeI); + STM4(&(io[32]), TeK, ovs, &(io[0])); + TeL = VADD(T11, T22); + STM4(&(ro[0]), TeL, ovs, &(ro[0])); + TeM = VADD(TeH, TeI); + STM4(&(io[0]), TeM, ovs, &(io[0])); + TeN = VADD(T23, T34); + STM4(&(io[16]), TeN, ovs, &(io[0])); + TeO = VADD(TeD, TeG); + STM4(&(ro[16]), TeO, ovs, &(ro[0])); + TeP = VSUB(T34, T23); + STM4(&(io[48]), TeP, ovs, &(io[0])); + TeQ = VSUB(TeD, TeG); + STM4(&(ro[48]), TeQ, ovs, &(ro[0])); + } + { + V Teh, Tex, Tev, TeB, Tem, Tey, Ter, Tez; + { + V Tef, Teg, Tet, Teu; + Tef = VSUB(Tf, Tu); + Teg = VSUB(T2N, T32); + Teh = VADD(Tef, Teg); + Tex = VSUB(Tef, Teg); + Tet = VSUB(T2i, T2x); + Teu = VSUB(TZ, TK); + Tev = VSUB(Tet, Teu); + TeB = VADD(Teu, Tet); + } + { + V Tei, Tel, Ten, Teq; + Tei = VSUB(T1g, T1v); + Tel = VSUB(Tej, Tek); + Tem = VADD(Tei, Tel); + Tey = VSUB(Tel, Tei); + Ten = VSUB(T1L, T20); + Teq = VSUB(Teo, Tep); + Ter = VSUB(Ten, Teq); + Tez = VADD(Ten, Teq); + } + { + V Tes, TeC, Tew, TeA; + Tes = VMUL(LDK(KP707106781), VADD(Tem, Ter)); + TeR = VSUB(Teh, Tes); + STM4(&(ro[40]), TeR, ovs, &(ro[0])); + TeS = VADD(Teh, Tes); + STM4(&(ro[8]), TeS, ovs, &(ro[0])); + TeC = VMUL(LDK(KP707106781), VADD(Tey, Tez)); + TeT = VSUB(TeB, TeC); + STM4(&(io[40]), TeT, ovs, &(io[0])); + TeU = VADD(TeB, TeC); + STM4(&(io[8]), TeU, ovs, &(io[0])); + Tew = VMUL(LDK(KP707106781), VSUB(Ter, Tem)); + TeV = VSUB(Tev, Tew); + STM4(&(io[56]), TeV, ovs, &(io[0])); + TeW = VADD(Tev, Tew); + STM4(&(io[24]), TeW, ovs, &(io[0])); + TeA = VMUL(LDK(KP707106781), VSUB(Tey, Tez)); + TeX = VSUB(Tex, TeA); + STM4(&(ro[56]), TeX, ovs, &(ro[0])); + TeY = VADD(Tex, TeA); + STM4(&(ro[24]), TeY, ovs, &(ro[0])); + } + } + { + V Tdb, TdV, Te5, TdJ, Tdi, Te6, Te3, Teb, TdM, TdW, Tdu, TdQ, Te0, Tea, TdF; + V TdR; + { + V Tde, Tdh, Tdo, Tdt; + Tdb = VSUB(Td9, Tda); + TdV = VADD(Td9, Tda); + Te5 = VADD(TdI, TdH); + TdJ = VSUB(TdH, TdI); + Tde = VSUB(Tdc, Tdd); + Tdh = VADD(Tdf, Tdg); + Tdi = VMUL(LDK(KP707106781), VSUB(Tde, Tdh)); + Te6 = VMUL(LDK(KP707106781), VADD(Tde, Tdh)); + { + V Te1, Te2, TdK, TdL; + Te1 = VADD(Tdv, Tdy); + Te2 = VADD(TdD, TdC); + Te3 = VFNMS(LDK(KP382683432), Te2, VMUL(LDK(KP923879532), Te1)); + Teb = VFMA(LDK(KP923879532), Te2, VMUL(LDK(KP382683432), Te1)); + TdK = VSUB(Tdf, Tdg); + TdL = VADD(Tdd, Tdc); + TdM = VMUL(LDK(KP707106781), VSUB(TdK, TdL)); + TdW = VMUL(LDK(KP707106781), VADD(TdL, TdK)); + } + Tdo = VSUB(Tdm, Tdn); + Tdt = VSUB(Tdp, Tds); + Tdu = VFMA(LDK(KP923879532), Tdo, VMUL(LDK(KP382683432), Tdt)); + TdQ = VFNMS(LDK(KP923879532), Tdt, VMUL(LDK(KP382683432), Tdo)); + { + V TdY, TdZ, Tdz, TdE; + TdY = VADD(Tdn, Tdm); + TdZ = VADD(Tdp, Tds); + Te0 = VFMA(LDK(KP382683432), TdY, VMUL(LDK(KP923879532), TdZ)); + Tea = VFNMS(LDK(KP382683432), TdZ, VMUL(LDK(KP923879532), TdY)); + Tdz = VSUB(Tdv, Tdy); + TdE = VSUB(TdC, TdD); + TdF = VFNMS(LDK(KP923879532), TdE, VMUL(LDK(KP382683432), Tdz)); + TdR = VFMA(LDK(KP382683432), TdE, VMUL(LDK(KP923879532), Tdz)); + } + } + { + V Tdj, TdG, TdT, TdU; + Tdj = VADD(Tdb, Tdi); + TdG = VADD(Tdu, TdF); + TeZ = VSUB(Tdj, TdG); + STM4(&(ro[44]), TeZ, ovs, &(ro[0])); + Tf0 = VADD(Tdj, TdG); + STM4(&(ro[12]), Tf0, ovs, &(ro[0])); + TdT = VADD(TdJ, TdM); + TdU = VADD(TdQ, TdR); + Tf1 = VSUB(TdT, TdU); + STM4(&(io[44]), Tf1, ovs, &(io[0])); + Tf2 = VADD(TdT, TdU); + STM4(&(io[12]), Tf2, ovs, &(io[0])); + } + { + V TdN, TdO, TdP, TdS; + TdN = VSUB(TdJ, TdM); + TdO = VSUB(TdF, Tdu); + Tf3 = VSUB(TdN, TdO); + STM4(&(io[60]), Tf3, ovs, &(io[0])); + Tf4 = VADD(TdN, TdO); + STM4(&(io[28]), Tf4, ovs, &(io[0])); + TdP = VSUB(Tdb, Tdi); + TdS = VSUB(TdQ, TdR); + Tf5 = VSUB(TdP, TdS); + STM4(&(ro[60]), Tf5, ovs, &(ro[0])); + Tf6 = VADD(TdP, TdS); + STM4(&(ro[28]), Tf6, ovs, &(ro[0])); + } + { + V TdX, Te4, Ted, Tee; + TdX = VADD(TdV, TdW); + Te4 = VADD(Te0, Te3); + Tf7 = VSUB(TdX, Te4); + STM4(&(ro[36]), Tf7, ovs, &(ro[0])); + Tf8 = VADD(TdX, Te4); + STM4(&(ro[4]), Tf8, ovs, &(ro[0])); + Ted = VADD(Te5, Te6); + Tee = VADD(Tea, Teb); + Tf9 = VSUB(Ted, Tee); + STM4(&(io[36]), Tf9, ovs, &(io[0])); + Tfa = VADD(Ted, Tee); + STM4(&(io[4]), Tfa, ovs, &(io[0])); + } + { + V Te7, Te8, Te9, Tec; + Te7 = VSUB(Te5, Te6); + Te8 = VSUB(Te3, Te0); + Tfb = VSUB(Te7, Te8); + STM4(&(io[52]), Tfb, ovs, &(io[0])); + Tfc = VADD(Te7, Te8); + STM4(&(io[20]), Tfc, ovs, &(io[0])); + Te9 = VSUB(TdV, TdW); + Tec = VSUB(Tea, Teb); + Tfd = VSUB(Te9, Tec); + STM4(&(ro[52]), Tfd, ovs, &(ro[0])); + Tfe = VADD(Te9, Tec); + STM4(&(ro[20]), Tfe, ovs, &(ro[0])); + } + } + { + V Tcd, TcP, TcD, TcZ, Tck, Td0, TcX, Td5, Tcs, TcK, TcG, TcQ, TcU, Td4, Tcz; + V TcL, Tcc, TcC; + Tcc = VMUL(LDK(KP707106781), VADD(TbD, TbC)); + Tcd = VSUB(Tcb, Tcc); + TcP = VADD(Tcb, Tcc); + TcC = VMUL(LDK(KP707106781), VADD(Tak, Tan)); + TcD = VSUB(TcB, TcC); + TcZ = VADD(TcB, TcC); + { + V Tcg, Tcj, TcV, TcW; + Tcg = VFNMS(LDK(KP382683432), Tcf, VMUL(LDK(KP923879532), Tce)); + Tcj = VFMA(LDK(KP923879532), Tch, VMUL(LDK(KP382683432), Tci)); + Tck = VSUB(Tcg, Tcj); + Td0 = VADD(Tcg, Tcj); + TcV = VADD(Tct, Tcu); + TcW = VADD(Tcw, Tcx); + TcX = VFNMS(LDK(KP195090322), TcW, VMUL(LDK(KP980785280), TcV)); + Td5 = VFMA(LDK(KP195090322), TcV, VMUL(LDK(KP980785280), TcW)); + } + { + V Tco, Tcr, TcE, TcF; + Tco = VSUB(Tcm, Tcn); + Tcr = VSUB(Tcp, Tcq); + Tcs = VFMA(LDK(KP555570233), Tco, VMUL(LDK(KP831469612), Tcr)); + TcK = VFNMS(LDK(KP831469612), Tco, VMUL(LDK(KP555570233), Tcr)); + TcE = VFNMS(LDK(KP382683432), Tch, VMUL(LDK(KP923879532), Tci)); + TcF = VFMA(LDK(KP382683432), Tce, VMUL(LDK(KP923879532), Tcf)); + TcG = VSUB(TcE, TcF); + TcQ = VADD(TcF, TcE); + } + { + V TcS, TcT, Tcv, Tcy; + TcS = VADD(Tcm, Tcn); + TcT = VADD(Tcp, Tcq); + TcU = VFMA(LDK(KP980785280), TcS, VMUL(LDK(KP195090322), TcT)); + Td4 = VFNMS(LDK(KP195090322), TcS, VMUL(LDK(KP980785280), TcT)); + Tcv = VSUB(Tct, Tcu); + Tcy = VSUB(Tcw, Tcx); + Tcz = VFNMS(LDK(KP831469612), Tcy, VMUL(LDK(KP555570233), Tcv)); + TcL = VFMA(LDK(KP831469612), Tcv, VMUL(LDK(KP555570233), Tcy)); + } + { + V Tcl, TcA, TcN, TcO; + Tcl = VADD(Tcd, Tck); + TcA = VADD(Tcs, Tcz); + Tff = VSUB(Tcl, TcA); + STM4(&(ro[42]), Tff, ovs, &(ro[0])); + Tfg = VADD(Tcl, TcA); + STM4(&(ro[10]), Tfg, ovs, &(ro[0])); + TcN = VADD(TcD, TcG); + TcO = VADD(TcK, TcL); + Tfh = VSUB(TcN, TcO); + STM4(&(io[42]), Tfh, ovs, &(io[0])); + Tfi = VADD(TcN, TcO); + STM4(&(io[10]), Tfi, ovs, &(io[0])); + } + { + V TcH, TcI, TcJ, TcM; + TcH = VSUB(TcD, TcG); + TcI = VSUB(Tcz, Tcs); + Tfj = VSUB(TcH, TcI); + STM4(&(io[58]), Tfj, ovs, &(io[0])); + Tfk = VADD(TcH, TcI); + STM4(&(io[26]), Tfk, ovs, &(io[0])); + TcJ = VSUB(Tcd, Tck); + TcM = VSUB(TcK, TcL); + Tfl = VSUB(TcJ, TcM); + STM4(&(ro[58]), Tfl, ovs, &(ro[0])); + Tfm = VADD(TcJ, TcM); + STM4(&(ro[26]), Tfm, ovs, &(ro[0])); + } + { + V TcR, TcY, Td7, Td8; + TcR = VADD(TcP, TcQ); + TcY = VADD(TcU, TcX); + Tfn = VSUB(TcR, TcY); + STM4(&(ro[34]), Tfn, ovs, &(ro[0])); + Tfo = VADD(TcR, TcY); + STM4(&(ro[2]), Tfo, ovs, &(ro[0])); + Td7 = VADD(TcZ, Td0); + Td8 = VADD(Td4, Td5); + Tfp = VSUB(Td7, Td8); + STM4(&(io[34]), Tfp, ovs, &(io[0])); + Tfq = VADD(Td7, Td8); + STM4(&(io[2]), Tfq, ovs, &(io[0])); + } + { + V Td1, Td2, Td3, Td6; + Td1 = VSUB(TcZ, Td0); + Td2 = VSUB(TcX, TcU); + Tfr = VSUB(Td1, Td2); + STM4(&(io[50]), Tfr, ovs, &(io[0])); + Tfs = VADD(Td1, Td2); + STM4(&(io[18]), Tfs, ovs, &(io[0])); + Td3 = VSUB(TcP, TcQ); + Td6 = VSUB(Td4, Td5); + Tft = VSUB(Td3, Td6); + STM4(&(ro[50]), Tft, ovs, &(ro[0])); + Tfu = VADD(Td3, Td6); + STM4(&(ro[18]), Tfu, ovs, &(ro[0])); + } + } + { + V Tfv, Tfw, Tfx, Tfy, Tfz, TfA, TfB, TfC, TfD, TfE, TfF, TfG, TfH, TfI, TfJ; + V TfK, TfL, TfM, TfN, TfO, TfP, TfQ, TfR, TfS, TfT, TfU, TfV, TfW, TfX, TfY; + V TfZ, Tg0; + { + V Tap, TbR, TbF, Tc1, TaE, Tc2, TbZ, Tc7, Tb6, TbM, TbI, TbS, TbW, Tc6, Tbx; + V TbN, Tao, TbE; + Tao = VMUL(LDK(KP707106781), VSUB(Tak, Tan)); + Tap = VSUB(Tah, Tao); + TbR = VADD(Tah, Tao); + TbE = VMUL(LDK(KP707106781), VSUB(TbC, TbD)); + TbF = VSUB(TbB, TbE); + Tc1 = VADD(TbB, TbE); + { + V Taw, TaD, TbX, TbY; + Taw = VFNMS(LDK(KP923879532), Tav, VMUL(LDK(KP382683432), Tas)); + TaD = VFMA(LDK(KP382683432), Taz, VMUL(LDK(KP923879532), TaC)); + TaE = VSUB(Taw, TaD); + Tc2 = VADD(Taw, TaD); + TbX = VADD(Tbb, Tbm); + TbY = VADD(Tbs, Tbv); + TbZ = VFNMS(LDK(KP555570233), TbY, VMUL(LDK(KP831469612), TbX)); + Tc7 = VFMA(LDK(KP831469612), TbY, VMUL(LDK(KP555570233), TbX)); + } + { + V TaW, Tb5, TbG, TbH; + TaW = VSUB(TaK, TaV); + Tb5 = VSUB(Tb1, Tb4); + Tb6 = VFMA(LDK(KP980785280), TaW, VMUL(LDK(KP195090322), Tb5)); + TbM = VFNMS(LDK(KP980785280), Tb5, VMUL(LDK(KP195090322), TaW)); + TbG = VFNMS(LDK(KP923879532), Taz, VMUL(LDK(KP382683432), TaC)); + TbH = VFMA(LDK(KP923879532), Tas, VMUL(LDK(KP382683432), Tav)); + TbI = VSUB(TbG, TbH); + TbS = VADD(TbH, TbG); + } + { + V TbU, TbV, Tbn, Tbw; + TbU = VADD(TaK, TaV); + TbV = VADD(Tb1, Tb4); + TbW = VFMA(LDK(KP555570233), TbU, VMUL(LDK(KP831469612), TbV)); + Tc6 = VFNMS(LDK(KP555570233), TbV, VMUL(LDK(KP831469612), TbU)); + Tbn = VSUB(Tbb, Tbm); + Tbw = VSUB(Tbs, Tbv); + Tbx = VFNMS(LDK(KP980785280), Tbw, VMUL(LDK(KP195090322), Tbn)); + TbN = VFMA(LDK(KP195090322), Tbw, VMUL(LDK(KP980785280), Tbn)); + } + { + V TaF, Tby, TbP, TbQ; + TaF = VADD(Tap, TaE); + Tby = VADD(Tb6, Tbx); + Tfv = VSUB(TaF, Tby); + STM4(&(ro[46]), Tfv, ovs, &(ro[0])); + Tfw = VADD(TaF, Tby); + STM4(&(ro[14]), Tfw, ovs, &(ro[0])); + TbP = VADD(TbF, TbI); + TbQ = VADD(TbM, TbN); + Tfx = VSUB(TbP, TbQ); + STM4(&(io[46]), Tfx, ovs, &(io[0])); + Tfy = VADD(TbP, TbQ); + STM4(&(io[14]), Tfy, ovs, &(io[0])); + } + { + V TbJ, TbK, TbL, TbO; + TbJ = VSUB(TbF, TbI); + TbK = VSUB(Tbx, Tb6); + Tfz = VSUB(TbJ, TbK); + STM4(&(io[62]), Tfz, ovs, &(io[0])); + TfA = VADD(TbJ, TbK); + STM4(&(io[30]), TfA, ovs, &(io[0])); + TbL = VSUB(Tap, TaE); + TbO = VSUB(TbM, TbN); + TfB = VSUB(TbL, TbO); + STM4(&(ro[62]), TfB, ovs, &(ro[0])); + TfC = VADD(TbL, TbO); + STM4(&(ro[30]), TfC, ovs, &(ro[0])); + } + { + V TbT, Tc0, Tc9, Tca; + TbT = VADD(TbR, TbS); + Tc0 = VADD(TbW, TbZ); + TfD = VSUB(TbT, Tc0); + STM4(&(ro[38]), TfD, ovs, &(ro[0])); + TfE = VADD(TbT, Tc0); + STM4(&(ro[6]), TfE, ovs, &(ro[0])); + Tc9 = VADD(Tc1, Tc2); + Tca = VADD(Tc6, Tc7); + TfF = VSUB(Tc9, Tca); + STM4(&(io[38]), TfF, ovs, &(io[0])); + TfG = VADD(Tc9, Tca); + STM4(&(io[6]), TfG, ovs, &(io[0])); + } + { + V Tc3, Tc4, Tc5, Tc8; + Tc3 = VSUB(Tc1, Tc2); + Tc4 = VSUB(TbZ, TbW); + TfH = VSUB(Tc3, Tc4); + STM4(&(io[54]), TfH, ovs, &(io[0])); + TfI = VADD(Tc3, Tc4); + STM4(&(io[22]), TfI, ovs, &(io[0])); + Tc5 = VSUB(TbR, TbS); + Tc8 = VSUB(Tc6, Tc7); + TfJ = VSUB(Tc5, Tc8); + STM4(&(ro[54]), TfJ, ovs, &(ro[0])); + TfK = VADD(Tc5, Tc8); + STM4(&(ro[22]), TfK, ovs, &(ro[0])); + } + } + { + V T6F, T7h, T7m, T7w, T7p, T7x, T6M, T7s, T6U, T7c, T75, T7r, T78, T7i, T71; + V T7d; + { + V T6D, T6E, T7k, T7l; + T6D = VADD(T37, T3e); + T6E = VADD(T65, T64); + T6F = VSUB(T6D, T6E); + T7h = VADD(T6D, T6E); + T7k = VADD(T6O, T6P); + T7l = VADD(T6R, T6S); + T7m = VFMA(LDK(KP956940335), T7k, VMUL(LDK(KP290284677), T7l)); + T7w = VFNMS(LDK(KP290284677), T7k, VMUL(LDK(KP956940335), T7l)); + } + { + V T7n, T7o, T6I, T6L; + T7n = VADD(T6V, T6W); + T7o = VADD(T6Y, T6Z); + T7p = VFNMS(LDK(KP290284677), T7o, VMUL(LDK(KP956940335), T7n)); + T7x = VFMA(LDK(KP290284677), T7n, VMUL(LDK(KP956940335), T7o)); + T6I = VFNMS(LDK(KP555570233), T6H, VMUL(LDK(KP831469612), T6G)); + T6L = VFMA(LDK(KP831469612), T6J, VMUL(LDK(KP555570233), T6K)); + T6M = VSUB(T6I, T6L); + T7s = VADD(T6I, T6L); + } + { + V T6Q, T6T, T73, T74; + T6Q = VSUB(T6O, T6P); + T6T = VSUB(T6R, T6S); + T6U = VFMA(LDK(KP471396736), T6Q, VMUL(LDK(KP881921264), T6T)); + T7c = VFNMS(LDK(KP881921264), T6Q, VMUL(LDK(KP471396736), T6T)); + T73 = VADD(T5Z, T62); + T74 = VADD(T3m, T3t); + T75 = VSUB(T73, T74); + T7r = VADD(T73, T74); + } + { + V T76, T77, T6X, T70; + T76 = VFNMS(LDK(KP555570233), T6J, VMUL(LDK(KP831469612), T6K)); + T77 = VFMA(LDK(KP555570233), T6G, VMUL(LDK(KP831469612), T6H)); + T78 = VSUB(T76, T77); + T7i = VADD(T77, T76); + T6X = VSUB(T6V, T6W); + T70 = VSUB(T6Y, T6Z); + T71 = VFNMS(LDK(KP881921264), T70, VMUL(LDK(KP471396736), T6X)); + T7d = VFMA(LDK(KP881921264), T6X, VMUL(LDK(KP471396736), T70)); + } + { + V T6N, T72, T7f, T7g; + T6N = VADD(T6F, T6M); + T72 = VADD(T6U, T71); + TfL = VSUB(T6N, T72); + STM4(&(ro[43]), TfL, ovs, &(ro[1])); + TfM = VADD(T6N, T72); + STM4(&(ro[11]), TfM, ovs, &(ro[1])); + T7f = VADD(T75, T78); + T7g = VADD(T7c, T7d); + TfN = VSUB(T7f, T7g); + STM4(&(io[43]), TfN, ovs, &(io[1])); + TfO = VADD(T7f, T7g); + STM4(&(io[11]), TfO, ovs, &(io[1])); + } + { + V T79, T7a, T7b, T7e; + T79 = VSUB(T75, T78); + T7a = VSUB(T71, T6U); + TfP = VSUB(T79, T7a); + STM4(&(io[59]), TfP, ovs, &(io[1])); + TfQ = VADD(T79, T7a); + STM4(&(io[27]), TfQ, ovs, &(io[1])); + T7b = VSUB(T6F, T6M); + T7e = VSUB(T7c, T7d); + TfR = VSUB(T7b, T7e); + STM4(&(ro[59]), TfR, ovs, &(ro[1])); + TfS = VADD(T7b, T7e); + STM4(&(ro[27]), TfS, ovs, &(ro[1])); + } + { + V T7j, T7q, T7z, T7A; + T7j = VADD(T7h, T7i); + T7q = VADD(T7m, T7p); + TfT = VSUB(T7j, T7q); + STM4(&(ro[35]), TfT, ovs, &(ro[1])); + TfU = VADD(T7j, T7q); + STM4(&(ro[3]), TfU, ovs, &(ro[1])); + T7z = VADD(T7r, T7s); + T7A = VADD(T7w, T7x); + TfV = VSUB(T7z, T7A); + STM4(&(io[35]), TfV, ovs, &(io[1])); + TfW = VADD(T7z, T7A); + STM4(&(io[3]), TfW, ovs, &(io[1])); + } + { + V T7t, T7u, T7v, T7y; + T7t = VSUB(T7r, T7s); + T7u = VSUB(T7p, T7m); + TfX = VSUB(T7t, T7u); + STM4(&(io[51]), TfX, ovs, &(io[1])); + TfY = VADD(T7t, T7u); + STM4(&(io[19]), TfY, ovs, &(io[1])); + T7v = VSUB(T7h, T7i); + T7y = VSUB(T7w, T7x); + TfZ = VSUB(T7v, T7y); + STM4(&(ro[51]), TfZ, ovs, &(ro[1])); + Tg0 = VADD(T7v, T7y); + STM4(&(ro[19]), Tg0, ovs, &(ro[1])); + } + } + { + V T9j, T9V, Ta0, Taa, Ta3, Tab, T9q, Ta6, T9y, T9Q, T9J, Ta5, T9M, T9W, T9F; + V T9R; + { + V T9h, T9i, T9Y, T9Z; + T9h = VADD(T7B, T7C); + T9i = VADD(T8J, T8I); + T9j = VSUB(T9h, T9i); + T9V = VADD(T9h, T9i); + T9Y = VADD(T9s, T9t); + T9Z = VADD(T9v, T9w); + Ta0 = VFMA(LDK(KP995184726), T9Y, VMUL(LDK(KP098017140), T9Z)); + Taa = VFNMS(LDK(KP098017140), T9Y, VMUL(LDK(KP995184726), T9Z)); + } + { + V Ta1, Ta2, T9m, T9p; + Ta1 = VADD(T9z, T9A); + Ta2 = VADD(T9C, T9D); + Ta3 = VFNMS(LDK(KP098017140), Ta2, VMUL(LDK(KP995184726), Ta1)); + Tab = VFMA(LDK(KP098017140), Ta1, VMUL(LDK(KP995184726), Ta2)); + T9m = VFNMS(LDK(KP195090322), T9l, VMUL(LDK(KP980785280), T9k)); + T9p = VFMA(LDK(KP195090322), T9n, VMUL(LDK(KP980785280), T9o)); + T9q = VSUB(T9m, T9p); + Ta6 = VADD(T9m, T9p); + } + { + V T9u, T9x, T9H, T9I; + T9u = VSUB(T9s, T9t); + T9x = VSUB(T9v, T9w); + T9y = VFMA(LDK(KP634393284), T9u, VMUL(LDK(KP773010453), T9x)); + T9Q = VFNMS(LDK(KP773010453), T9u, VMUL(LDK(KP634393284), T9x)); + T9H = VADD(T8F, T8G); + T9I = VADD(T7G, T7J); + T9J = VSUB(T9H, T9I); + Ta5 = VADD(T9H, T9I); + } + { + V T9K, T9L, T9B, T9E; + T9K = VFNMS(LDK(KP195090322), T9o, VMUL(LDK(KP980785280), T9n)); + T9L = VFMA(LDK(KP980785280), T9l, VMUL(LDK(KP195090322), T9k)); + T9M = VSUB(T9K, T9L); + T9W = VADD(T9L, T9K); + T9B = VSUB(T9z, T9A); + T9E = VSUB(T9C, T9D); + T9F = VFNMS(LDK(KP773010453), T9E, VMUL(LDK(KP634393284), T9B)); + T9R = VFMA(LDK(KP773010453), T9B, VMUL(LDK(KP634393284), T9E)); + } + { + V T9r, T9G, Tg1, Tg2; + T9r = VADD(T9j, T9q); + T9G = VADD(T9y, T9F); + Tg1 = VSUB(T9r, T9G); + STM4(&(ro[41]), Tg1, ovs, &(ro[1])); + STN4(&(ro[40]), TeR, Tg1, Tff, TfL, ovs); + Tg2 = VADD(T9r, T9G); + STM4(&(ro[9]), Tg2, ovs, &(ro[1])); + STN4(&(ro[8]), TeS, Tg2, Tfg, TfM, ovs); + } + { + V T9T, T9U, Tg3, Tg4; + T9T = VADD(T9J, T9M); + T9U = VADD(T9Q, T9R); + Tg3 = VSUB(T9T, T9U); + STM4(&(io[41]), Tg3, ovs, &(io[1])); + STN4(&(io[40]), TeT, Tg3, Tfh, TfN, ovs); + Tg4 = VADD(T9T, T9U); + STM4(&(io[9]), Tg4, ovs, &(io[1])); + STN4(&(io[8]), TeU, Tg4, Tfi, TfO, ovs); + } + { + V T9N, T9O, Tg5, Tg6; + T9N = VSUB(T9J, T9M); + T9O = VSUB(T9F, T9y); + Tg5 = VSUB(T9N, T9O); + STM4(&(io[57]), Tg5, ovs, &(io[1])); + STN4(&(io[56]), TeV, Tg5, Tfj, TfP, ovs); + Tg6 = VADD(T9N, T9O); + STM4(&(io[25]), Tg6, ovs, &(io[1])); + STN4(&(io[24]), TeW, Tg6, Tfk, TfQ, ovs); + } + { + V T9P, T9S, Tg7, Tg8; + T9P = VSUB(T9j, T9q); + T9S = VSUB(T9Q, T9R); + Tg7 = VSUB(T9P, T9S); + STM4(&(ro[57]), Tg7, ovs, &(ro[1])); + STN4(&(ro[56]), TeX, Tg7, Tfl, TfR, ovs); + Tg8 = VADD(T9P, T9S); + STM4(&(ro[25]), Tg8, ovs, &(ro[1])); + STN4(&(ro[24]), TeY, Tg8, Tfm, TfS, ovs); + } + { + V T9X, Ta4, Tg9, Tga; + T9X = VADD(T9V, T9W); + Ta4 = VADD(Ta0, Ta3); + Tg9 = VSUB(T9X, Ta4); + STM4(&(ro[33]), Tg9, ovs, &(ro[1])); + STN4(&(ro[32]), TeJ, Tg9, Tfn, TfT, ovs); + Tga = VADD(T9X, Ta4); + STM4(&(ro[1]), Tga, ovs, &(ro[1])); + STN4(&(ro[0]), TeL, Tga, Tfo, TfU, ovs); + } + { + V Tad, Tae, Tgb, Tgc; + Tad = VADD(Ta5, Ta6); + Tae = VADD(Taa, Tab); + Tgb = VSUB(Tad, Tae); + STM4(&(io[33]), Tgb, ovs, &(io[1])); + STN4(&(io[32]), TeK, Tgb, Tfp, TfV, ovs); + Tgc = VADD(Tad, Tae); + STM4(&(io[1]), Tgc, ovs, &(io[1])); + STN4(&(io[0]), TeM, Tgc, Tfq, TfW, ovs); + } + { + V Ta7, Ta8, Tgd, Tge; + Ta7 = VSUB(Ta5, Ta6); + Ta8 = VSUB(Ta3, Ta0); + Tgd = VSUB(Ta7, Ta8); + STM4(&(io[49]), Tgd, ovs, &(io[1])); + STN4(&(io[48]), TeP, Tgd, Tfr, TfX, ovs); + Tge = VADD(Ta7, Ta8); + STM4(&(io[17]), Tge, ovs, &(io[1])); + STN4(&(io[16]), TeN, Tge, Tfs, TfY, ovs); + } + { + V Ta9, Tac, Tgf, Tgg; + Ta9 = VSUB(T9V, T9W); + Tac = VSUB(Taa, Tab); + Tgf = VSUB(Ta9, Tac); + STM4(&(ro[49]), Tgf, ovs, &(ro[1])); + STN4(&(ro[48]), TeQ, Tgf, Tft, TfZ, ovs); + Tgg = VADD(Ta9, Tac); + STM4(&(ro[17]), Tgg, ovs, &(ro[1])); + STN4(&(ro[16]), TeO, Tgg, Tfu, Tg0, ovs); + } + } + { + V Tgh, Tgi, Tgj, Tgk, Tgl, Tgm, Tgn, Tgo, Tgp, Tgq, Tgr, Tgs, Tgt, Tgu, Tgv; + V Tgw; + { + V T3v, T6j, T6o, T6y, T6r, T6z, T48, T6u, T52, T6e, T67, T6t, T6a, T6k, T5V; + V T6f; + { + V T3f, T3u, T6m, T6n; + T3f = VSUB(T37, T3e); + T3u = VSUB(T3m, T3t); + T3v = VSUB(T3f, T3u); + T6j = VADD(T3f, T3u); + T6m = VADD(T4q, T4N); + T6n = VADD(T4X, T50); + T6o = VFMA(LDK(KP634393284), T6m, VMUL(LDK(KP773010453), T6n)); + T6y = VFNMS(LDK(KP634393284), T6n, VMUL(LDK(KP773010453), T6m)); + } + { + V T6p, T6q, T3O, T47; + T6p = VADD(T5j, T5G); + T6q = VADD(T5Q, T5T); + T6r = VFNMS(LDK(KP634393284), T6q, VMUL(LDK(KP773010453), T6p)); + T6z = VFMA(LDK(KP773010453), T6q, VMUL(LDK(KP634393284), T6p)); + T3O = VFNMS(LDK(KP980785280), T3N, VMUL(LDK(KP195090322), T3G)); + T47 = VFMA(LDK(KP195090322), T3Z, VMUL(LDK(KP980785280), T46)); + T48 = VSUB(T3O, T47); + T6u = VADD(T3O, T47); + } + { + V T4O, T51, T63, T66; + T4O = VSUB(T4q, T4N); + T51 = VSUB(T4X, T50); + T52 = VFMA(LDK(KP995184726), T4O, VMUL(LDK(KP098017140), T51)); + T6e = VFNMS(LDK(KP995184726), T51, VMUL(LDK(KP098017140), T4O)); + T63 = VSUB(T5Z, T62); + T66 = VSUB(T64, T65); + T67 = VSUB(T63, T66); + T6t = VADD(T63, T66); + } + { + V T68, T69, T5H, T5U; + T68 = VFNMS(LDK(KP980785280), T3Z, VMUL(LDK(KP195090322), T46)); + T69 = VFMA(LDK(KP980785280), T3G, VMUL(LDK(KP195090322), T3N)); + T6a = VSUB(T68, T69); + T6k = VADD(T69, T68); + T5H = VSUB(T5j, T5G); + T5U = VSUB(T5Q, T5T); + T5V = VFNMS(LDK(KP995184726), T5U, VMUL(LDK(KP098017140), T5H)); + T6f = VFMA(LDK(KP098017140), T5U, VMUL(LDK(KP995184726), T5H)); + } + { + V T49, T5W, T6h, T6i; + T49 = VADD(T3v, T48); + T5W = VADD(T52, T5V); + Tgh = VSUB(T49, T5W); + STM4(&(ro[47]), Tgh, ovs, &(ro[1])); + Tgi = VADD(T49, T5W); + STM4(&(ro[15]), Tgi, ovs, &(ro[1])); + T6h = VADD(T67, T6a); + T6i = VADD(T6e, T6f); + Tgj = VSUB(T6h, T6i); + STM4(&(io[47]), Tgj, ovs, &(io[1])); + Tgk = VADD(T6h, T6i); + STM4(&(io[15]), Tgk, ovs, &(io[1])); + } + { + V T6b, T6c, T6d, T6g; + T6b = VSUB(T67, T6a); + T6c = VSUB(T5V, T52); + Tgl = VSUB(T6b, T6c); + STM4(&(io[63]), Tgl, ovs, &(io[1])); + Tgm = VADD(T6b, T6c); + STM4(&(io[31]), Tgm, ovs, &(io[1])); + T6d = VSUB(T3v, T48); + T6g = VSUB(T6e, T6f); + Tgn = VSUB(T6d, T6g); + STM4(&(ro[63]), Tgn, ovs, &(ro[1])); + Tgo = VADD(T6d, T6g); + STM4(&(ro[31]), Tgo, ovs, &(ro[1])); + } + { + V T6l, T6s, T6B, T6C; + T6l = VADD(T6j, T6k); + T6s = VADD(T6o, T6r); + Tgp = VSUB(T6l, T6s); + STM4(&(ro[39]), Tgp, ovs, &(ro[1])); + Tgq = VADD(T6l, T6s); + STM4(&(ro[7]), Tgq, ovs, &(ro[1])); + T6B = VADD(T6t, T6u); + T6C = VADD(T6y, T6z); + Tgr = VSUB(T6B, T6C); + STM4(&(io[39]), Tgr, ovs, &(io[1])); + Tgs = VADD(T6B, T6C); + STM4(&(io[7]), Tgs, ovs, &(io[1])); + } + { + V T6v, T6w, T6x, T6A; + T6v = VSUB(T6t, T6u); + T6w = VSUB(T6r, T6o); + Tgt = VSUB(T6v, T6w); + STM4(&(io[55]), Tgt, ovs, &(io[1])); + Tgu = VADD(T6v, T6w); + STM4(&(io[23]), Tgu, ovs, &(io[1])); + T6x = VSUB(T6j, T6k); + T6A = VSUB(T6y, T6z); + Tgv = VSUB(T6x, T6A); + STM4(&(ro[55]), Tgv, ovs, &(ro[1])); + Tgw = VADD(T6x, T6A); + STM4(&(ro[23]), Tgw, ovs, &(ro[1])); + } + } + { + V T7L, T8X, T92, T9c, T95, T9d, T80, T98, T8k, T8S, T8L, T97, T8O, T8Y, T8D; + V T8T; + { + V T7D, T7K, T90, T91; + T7D = VSUB(T7B, T7C); + T7K = VSUB(T7G, T7J); + T7L = VSUB(T7D, T7K); + T8X = VADD(T7D, T7K); + T90 = VADD(T84, T8b); + T91 = VADD(T8f, T8i); + T92 = VFMA(LDK(KP471396736), T90, VMUL(LDK(KP881921264), T91)); + T9c = VFNMS(LDK(KP471396736), T91, VMUL(LDK(KP881921264), T90)); + } + { + V T93, T94, T7S, T7Z; + T93 = VADD(T8n, T8u); + T94 = VADD(T8y, T8B); + T95 = VFNMS(LDK(KP471396736), T94, VMUL(LDK(KP881921264), T93)); + T9d = VFMA(LDK(KP881921264), T94, VMUL(LDK(KP471396736), T93)); + T7S = VFNMS(LDK(KP831469612), T7R, VMUL(LDK(KP555570233), T7O)); + T7Z = VFMA(LDK(KP831469612), T7V, VMUL(LDK(KP555570233), T7Y)); + T80 = VSUB(T7S, T7Z); + T98 = VADD(T7S, T7Z); + } + { + V T8c, T8j, T8H, T8K; + T8c = VSUB(T84, T8b); + T8j = VSUB(T8f, T8i); + T8k = VFMA(LDK(KP956940335), T8c, VMUL(LDK(KP290284677), T8j)); + T8S = VFNMS(LDK(KP956940335), T8j, VMUL(LDK(KP290284677), T8c)); + T8H = VSUB(T8F, T8G); + T8K = VSUB(T8I, T8J); + T8L = VSUB(T8H, T8K); + T97 = VADD(T8H, T8K); + } + { + V T8M, T8N, T8v, T8C; + T8M = VFNMS(LDK(KP831469612), T7Y, VMUL(LDK(KP555570233), T7V)); + T8N = VFMA(LDK(KP555570233), T7R, VMUL(LDK(KP831469612), T7O)); + T8O = VSUB(T8M, T8N); + T8Y = VADD(T8N, T8M); + T8v = VSUB(T8n, T8u); + T8C = VSUB(T8y, T8B); + T8D = VFNMS(LDK(KP956940335), T8C, VMUL(LDK(KP290284677), T8v)); + T8T = VFMA(LDK(KP290284677), T8C, VMUL(LDK(KP956940335), T8v)); + } + { + V T81, T8E, Tgx, Tgy; + T81 = VADD(T7L, T80); + T8E = VADD(T8k, T8D); + Tgx = VSUB(T81, T8E); + STM4(&(ro[45]), Tgx, ovs, &(ro[1])); + STN4(&(ro[44]), TeZ, Tgx, Tfv, Tgh, ovs); + Tgy = VADD(T81, T8E); + STM4(&(ro[13]), Tgy, ovs, &(ro[1])); + STN4(&(ro[12]), Tf0, Tgy, Tfw, Tgi, ovs); + } + { + V T8V, T8W, Tgz, TgA; + T8V = VADD(T8L, T8O); + T8W = VADD(T8S, T8T); + Tgz = VSUB(T8V, T8W); + STM4(&(io[45]), Tgz, ovs, &(io[1])); + STN4(&(io[44]), Tf1, Tgz, Tfx, Tgj, ovs); + TgA = VADD(T8V, T8W); + STM4(&(io[13]), TgA, ovs, &(io[1])); + STN4(&(io[12]), Tf2, TgA, Tfy, Tgk, ovs); + } + { + V T8P, T8Q, TgB, TgC; + T8P = VSUB(T8L, T8O); + T8Q = VSUB(T8D, T8k); + TgB = VSUB(T8P, T8Q); + STM4(&(io[61]), TgB, ovs, &(io[1])); + STN4(&(io[60]), Tf3, TgB, Tfz, Tgl, ovs); + TgC = VADD(T8P, T8Q); + STM4(&(io[29]), TgC, ovs, &(io[1])); + STN4(&(io[28]), Tf4, TgC, TfA, Tgm, ovs); + } + { + V T8R, T8U, TgD, TgE; + T8R = VSUB(T7L, T80); + T8U = VSUB(T8S, T8T); + TgD = VSUB(T8R, T8U); + STM4(&(ro[61]), TgD, ovs, &(ro[1])); + STN4(&(ro[60]), Tf5, TgD, TfB, Tgn, ovs); + TgE = VADD(T8R, T8U); + STM4(&(ro[29]), TgE, ovs, &(ro[1])); + STN4(&(ro[28]), Tf6, TgE, TfC, Tgo, ovs); + } + { + V T8Z, T96, TgF, TgG; + T8Z = VADD(T8X, T8Y); + T96 = VADD(T92, T95); + TgF = VSUB(T8Z, T96); + STM4(&(ro[37]), TgF, ovs, &(ro[1])); + STN4(&(ro[36]), Tf7, TgF, TfD, Tgp, ovs); + TgG = VADD(T8Z, T96); + STM4(&(ro[5]), TgG, ovs, &(ro[1])); + STN4(&(ro[4]), Tf8, TgG, TfE, Tgq, ovs); + } + { + V T9f, T9g, TgH, TgI; + T9f = VADD(T97, T98); + T9g = VADD(T9c, T9d); + TgH = VSUB(T9f, T9g); + STM4(&(io[37]), TgH, ovs, &(io[1])); + STN4(&(io[36]), Tf9, TgH, TfF, Tgr, ovs); + TgI = VADD(T9f, T9g); + STM4(&(io[5]), TgI, ovs, &(io[1])); + STN4(&(io[4]), Tfa, TgI, TfG, Tgs, ovs); + } + { + V T99, T9a, TgJ, TgK; + T99 = VSUB(T97, T98); + T9a = VSUB(T95, T92); + TgJ = VSUB(T99, T9a); + STM4(&(io[53]), TgJ, ovs, &(io[1])); + STN4(&(io[52]), Tfb, TgJ, TfH, Tgt, ovs); + TgK = VADD(T99, T9a); + STM4(&(io[21]), TgK, ovs, &(io[1])); + STN4(&(io[20]), Tfc, TgK, TfI, Tgu, ovs); + } + { + V T9b, T9e, TgL, TgM; + T9b = VSUB(T8X, T8Y); + T9e = VSUB(T9c, T9d); + TgL = VSUB(T9b, T9e); + STM4(&(ro[53]), TgL, ovs, &(ro[1])); + STN4(&(ro[52]), Tfd, TgL, TfJ, Tgv, ovs); + TgM = VADD(T9b, T9e); + STM4(&(ro[21]), TgM, ovs, &(ro[1])); + STN4(&(ro[20]), Tfe, TgM, TfK, Tgw, ovs); + } + } + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 64, XSIMD_STRING("n2sv_64"), {808, 144, 104, 0}, &GENUS, 0, 1, 0, 0 }; + +void XSIMD(codelet_n2sv_64) (planner *p) { + X(kdft_register) (p, n2sv_64, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/n2sv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/n2sv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,311 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:03 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name n2sv_8 -with-ostride 1 -include n2s.h -store-multiple 4 */ + +/* + * This function contains 52 FP additions, 8 FP multiplications, + * (or, 44 additions, 0 multiplications, 8 fused multiply/add), + * 58 stack variables, 1 constants, and 36 memory accesses + */ +#include "n2s.h" + +static void n2sv_8(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * ivs), ii = ii + ((2 * VL) * ivs), ro = ro + ((2 * VL) * ovs), io = io + ((2 * VL) * ovs), MAKE_VOLATILE_STRIDE(32, is), MAKE_VOLATILE_STRIDE(32, os)) { + V TF, TJ, TD, TR, TS, TT, TU, TV, TW, TE, TX, TY, TK, TI, TZ; + V T10, T11, T12; + { + V Tb, Tn, T3, TC, Ti, TB, T6, To, Tl, Tc, Tw, Tx, T8, T9, Tr; + V Ts; + { + V T1, T2, Tg, Th, T4, T5, Tj, Tk; + T1 = LD(&(ri[0]), ivs, &(ri[0])); + T2 = LD(&(ri[WS(is, 4)]), ivs, &(ri[0])); + Tg = LD(&(ii[0]), ivs, &(ii[0])); + Th = LD(&(ii[WS(is, 4)]), ivs, &(ii[0])); + T4 = LD(&(ri[WS(is, 2)]), ivs, &(ri[0])); + T5 = LD(&(ri[WS(is, 6)]), ivs, &(ri[0])); + Tj = LD(&(ii[WS(is, 2)]), ivs, &(ii[0])); + Tk = LD(&(ii[WS(is, 6)]), ivs, &(ii[0])); + Tb = LD(&(ri[WS(is, 7)]), ivs, &(ri[WS(is, 1)])); + Tn = VSUB(T1, T2); + T3 = VADD(T1, T2); + TC = VSUB(Tg, Th); + Ti = VADD(Tg, Th); + TB = VSUB(T4, T5); + T6 = VADD(T4, T5); + To = VSUB(Tj, Tk); + Tl = VADD(Tj, Tk); + Tc = LD(&(ri[WS(is, 3)]), ivs, &(ri[WS(is, 1)])); + Tw = LD(&(ii[WS(is, 7)]), ivs, &(ii[WS(is, 1)])); + Tx = LD(&(ii[WS(is, 3)]), ivs, &(ii[WS(is, 1)])); + T8 = LD(&(ri[WS(is, 1)]), ivs, &(ri[WS(is, 1)])); + T9 = LD(&(ri[WS(is, 5)]), ivs, &(ri[WS(is, 1)])); + Tr = LD(&(ii[WS(is, 1)]), ivs, &(ii[WS(is, 1)])); + Ts = LD(&(ii[WS(is, 5)]), ivs, &(ii[WS(is, 1)])); + } + { + V TL, T7, TP, Tm, Tz, TH, Te, Tf, TO, TQ, TG, Tu, Tp, TA; + { + V Td, Tv, TN, Ty, Ta, Tq, TM, Tt; + TL = VSUB(T3, T6); + T7 = VADD(T3, T6); + Td = VADD(Tb, Tc); + Tv = VSUB(Tb, Tc); + TN = VADD(Tw, Tx); + Ty = VSUB(Tw, Tx); + Ta = VADD(T8, T9); + Tq = VSUB(T8, T9); + TM = VADD(Tr, Ts); + Tt = VSUB(Tr, Ts); + TP = VADD(Ti, Tl); + Tm = VSUB(Ti, Tl); + Tz = VSUB(Tv, Ty); + TH = VADD(Tv, Ty); + Te = VADD(Ta, Td); + Tf = VSUB(Td, Ta); + TO = VSUB(TM, TN); + TQ = VADD(TM, TN); + TG = VSUB(Tt, Tq); + Tu = VADD(Tq, Tt); + } + TF = VSUB(Tn, To); + Tp = VADD(Tn, To); + TJ = VSUB(TC, TB); + TD = VADD(TB, TC); + TR = VSUB(Tm, Tf); + STM4(&(io[6]), TR, ovs, &(io[0])); + TS = VADD(Tf, Tm); + STM4(&(io[2]), TS, ovs, &(io[0])); + TT = VADD(T7, Te); + STM4(&(ro[0]), TT, ovs, &(ro[0])); + TU = VSUB(T7, Te); + STM4(&(ro[4]), TU, ovs, &(ro[0])); + TV = VADD(TP, TQ); + STM4(&(io[0]), TV, ovs, &(io[0])); + TW = VSUB(TP, TQ); + STM4(&(io[4]), TW, ovs, &(io[0])); + TE = VSUB(Tz, Tu); + TA = VADD(Tu, Tz); + TX = VADD(TL, TO); + STM4(&(ro[2]), TX, ovs, &(ro[0])); + TY = VSUB(TL, TO); + STM4(&(ro[6]), TY, ovs, &(ro[0])); + TK = VADD(TG, TH); + TI = VSUB(TG, TH); + TZ = VFMA(LDK(KP707106781), TA, Tp); + STM4(&(ro[1]), TZ, ovs, &(ro[1])); + T10 = VFNMS(LDK(KP707106781), TA, Tp); + STM4(&(ro[5]), T10, ovs, &(ro[1])); + } + } + T11 = VFMA(LDK(KP707106781), TK, TJ); + STM4(&(io[1]), T11, ovs, &(io[1])); + T12 = VFNMS(LDK(KP707106781), TK, TJ); + STM4(&(io[5]), T12, ovs, &(io[1])); + { + V T13, T14, T15, T16; + T13 = VFMA(LDK(KP707106781), TE, TD); + STM4(&(io[3]), T13, ovs, &(io[1])); + STN4(&(io[0]), TV, T11, TS, T13, ovs); + T14 = VFNMS(LDK(KP707106781), TE, TD); + STM4(&(io[7]), T14, ovs, &(io[1])); + STN4(&(io[4]), TW, T12, TR, T14, ovs); + T15 = VFMA(LDK(KP707106781), TI, TF); + STM4(&(ro[3]), T15, ovs, &(ro[1])); + STN4(&(ro[0]), TT, TZ, TX, T15, ovs); + T16 = VFNMS(LDK(KP707106781), TI, TF); + STM4(&(ro[7]), T16, ovs, &(ro[1])); + STN4(&(ro[4]), TU, T10, TY, T16, ovs); + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 8, XSIMD_STRING("n2sv_8"), {44, 0, 8, 0}, &GENUS, 0, 1, 0, 0 }; + +void XSIMD(codelet_n2sv_8) (planner *p) { + X(kdft_register) (p, n2sv_8, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_notw.native -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name n2sv_8 -with-ostride 1 -include n2s.h -store-multiple 4 */ + +/* + * This function contains 52 FP additions, 4 FP multiplications, + * (or, 52 additions, 4 multiplications, 0 fused multiply/add), + * 34 stack variables, 1 constants, and 36 memory accesses + */ +#include "n2s.h" + +static void n2sv_8(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * ivs), ii = ii + ((2 * VL) * ivs), ro = ro + ((2 * VL) * ovs), io = io + ((2 * VL) * ovs), MAKE_VOLATILE_STRIDE(32, is), MAKE_VOLATILE_STRIDE(32, os)) { + V T3, Tn, Ti, TC, T6, TB, Tl, To, Td, TN, Tz, TH, Ta, TM, Tu; + V TG; + { + V T1, T2, Tj, Tk; + T1 = LD(&(ri[0]), ivs, &(ri[0])); + T2 = LD(&(ri[WS(is, 4)]), ivs, &(ri[0])); + T3 = VADD(T1, T2); + Tn = VSUB(T1, T2); + { + V Tg, Th, T4, T5; + Tg = LD(&(ii[0]), ivs, &(ii[0])); + Th = LD(&(ii[WS(is, 4)]), ivs, &(ii[0])); + Ti = VADD(Tg, Th); + TC = VSUB(Tg, Th); + T4 = LD(&(ri[WS(is, 2)]), ivs, &(ri[0])); + T5 = LD(&(ri[WS(is, 6)]), ivs, &(ri[0])); + T6 = VADD(T4, T5); + TB = VSUB(T4, T5); + } + Tj = LD(&(ii[WS(is, 2)]), ivs, &(ii[0])); + Tk = LD(&(ii[WS(is, 6)]), ivs, &(ii[0])); + Tl = VADD(Tj, Tk); + To = VSUB(Tj, Tk); + { + V Tb, Tc, Tv, Tw, Tx, Ty; + Tb = LD(&(ri[WS(is, 7)]), ivs, &(ri[WS(is, 1)])); + Tc = LD(&(ri[WS(is, 3)]), ivs, &(ri[WS(is, 1)])); + Tv = VSUB(Tb, Tc); + Tw = LD(&(ii[WS(is, 7)]), ivs, &(ii[WS(is, 1)])); + Tx = LD(&(ii[WS(is, 3)]), ivs, &(ii[WS(is, 1)])); + Ty = VSUB(Tw, Tx); + Td = VADD(Tb, Tc); + TN = VADD(Tw, Tx); + Tz = VSUB(Tv, Ty); + TH = VADD(Tv, Ty); + } + { + V T8, T9, Tq, Tr, Ts, Tt; + T8 = LD(&(ri[WS(is, 1)]), ivs, &(ri[WS(is, 1)])); + T9 = LD(&(ri[WS(is, 5)]), ivs, &(ri[WS(is, 1)])); + Tq = VSUB(T8, T9); + Tr = LD(&(ii[WS(is, 1)]), ivs, &(ii[WS(is, 1)])); + Ts = LD(&(ii[WS(is, 5)]), ivs, &(ii[WS(is, 1)])); + Tt = VSUB(Tr, Ts); + Ta = VADD(T8, T9); + TM = VADD(Tr, Ts); + Tu = VADD(Tq, Tt); + TG = VSUB(Tt, Tq); + } + } + { + V TR, TS, TT, TU, TV, TW, TX, TY; + { + V T7, Te, TP, TQ; + T7 = VADD(T3, T6); + Te = VADD(Ta, Td); + TR = VSUB(T7, Te); + STM4(&(ro[4]), TR, ovs, &(ro[0])); + TS = VADD(T7, Te); + STM4(&(ro[0]), TS, ovs, &(ro[0])); + TP = VADD(Ti, Tl); + TQ = VADD(TM, TN); + TT = VSUB(TP, TQ); + STM4(&(io[4]), TT, ovs, &(io[0])); + TU = VADD(TP, TQ); + STM4(&(io[0]), TU, ovs, &(io[0])); + } + { + V Tf, Tm, TL, TO; + Tf = VSUB(Td, Ta); + Tm = VSUB(Ti, Tl); + TV = VADD(Tf, Tm); + STM4(&(io[2]), TV, ovs, &(io[0])); + TW = VSUB(Tm, Tf); + STM4(&(io[6]), TW, ovs, &(io[0])); + TL = VSUB(T3, T6); + TO = VSUB(TM, TN); + TX = VSUB(TL, TO); + STM4(&(ro[6]), TX, ovs, &(ro[0])); + TY = VADD(TL, TO); + STM4(&(ro[2]), TY, ovs, &(ro[0])); + } + { + V TZ, T10, T11, T12; + { + V Tp, TA, TJ, TK; + Tp = VADD(Tn, To); + TA = VMUL(LDK(KP707106781), VADD(Tu, Tz)); + TZ = VSUB(Tp, TA); + STM4(&(ro[5]), TZ, ovs, &(ro[1])); + T10 = VADD(Tp, TA); + STM4(&(ro[1]), T10, ovs, &(ro[1])); + TJ = VSUB(TC, TB); + TK = VMUL(LDK(KP707106781), VADD(TG, TH)); + T11 = VSUB(TJ, TK); + STM4(&(io[5]), T11, ovs, &(io[1])); + T12 = VADD(TJ, TK); + STM4(&(io[1]), T12, ovs, &(io[1])); + } + { + V TD, TE, T13, T14; + TD = VADD(TB, TC); + TE = VMUL(LDK(KP707106781), VSUB(Tz, Tu)); + T13 = VSUB(TD, TE); + STM4(&(io[7]), T13, ovs, &(io[1])); + STN4(&(io[4]), TT, T11, TW, T13, ovs); + T14 = VADD(TD, TE); + STM4(&(io[3]), T14, ovs, &(io[1])); + STN4(&(io[0]), TU, T12, TV, T14, ovs); + } + { + V TF, TI, T15, T16; + TF = VSUB(Tn, To); + TI = VMUL(LDK(KP707106781), VSUB(TG, TH)); + T15 = VSUB(TF, TI); + STM4(&(ro[7]), T15, ovs, &(ro[1])); + STN4(&(ro[4]), TR, TZ, TX, T15, ovs); + T16 = VADD(TF, TI); + STM4(&(ro[3]), T16, ovs, &(ro[1])); + STN4(&(ro[0]), TS, T10, TY, T16, ovs); + } + } + } + } + } + VLEAVE(); +} + +static const kdft_desc desc = { 8, XSIMD_STRING("n2sv_8"), {52, 4, 0, 0}, &GENUS, 0, 1, 0, 0 }; + +void XSIMD(codelet_n2sv_8) (planner *p) { + X(kdft_register) (p, n2sv_8, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/q1bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/q1bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:58 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twidsq_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 2 -dif -name q1bv_2 -include q1b.h -sign 1 */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 6 additions, 4 multiplications, 0 fused multiply/add), + * 8 stack variables, 0 constants, and 8 memory accesses + */ +#include "q1b.h" + +static void q1bv_2(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(4, rs), MAKE_VOLATILE_STRIDE(4, vs)) { + V T1, T2, T4, T5, T3, T6; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); + T5 = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[0]), VADD(T1, T2), ms, &(x[0])); + T3 = BYTW(&(W[0]), VSUB(T1, T2)); + ST(&(x[WS(rs, 1)]), VADD(T4, T5), ms, &(x[WS(rs, 1)])); + T6 = BYTW(&(W[0]), VSUB(T4, T5)); + ST(&(x[WS(vs, 1)]), T3, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 1)]), T6, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("q1bv_2"), twinstr, &GENUS, {6, 4, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_q1bv_2) (planner *p) { + X(kdft_difsq_register) (p, q1bv_2, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twidsq_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -dif -name q1bv_2 -include q1b.h -sign 1 */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 6 additions, 4 multiplications, 0 fused multiply/add), + * 8 stack variables, 0 constants, and 8 memory accesses + */ +#include "q1b.h" + +static void q1bv_2(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(4, rs), MAKE_VOLATILE_STRIDE(4, vs)) { + V T1, T2, T3, T4, T5, T6; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[0]), VSUB(T1, T2)); + T4 = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); + T5 = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + T6 = BYTW(&(W[0]), VSUB(T4, T5)); + ST(&(x[WS(vs, 1)]), T3, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 1)]), T6, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[0]), VADD(T1, T2), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VADD(T4, T5), ms, &(x[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("q1bv_2"), twinstr, &GENUS, {6, 4, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_q1bv_2) (planner *p) { + X(kdft_difsq_register) (p, q1bv_2, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/q1bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/q1bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:58 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twidsq_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 4 -dif -name q1bv_4 -include q1b.h -sign 1 */ + +/* + * This function contains 44 FP additions, 32 FP multiplications, + * (or, 36 additions, 24 multiplications, 8 fused multiply/add), + * 38 stack variables, 0 constants, and 32 memory accesses + */ +#include "q1b.h" + +static void q1bv_4(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(8, rs), MAKE_VOLATILE_STRIDE(8, vs)) { + V Tb, Tm, Tx, TI; + { + V Tc, T9, T3, TG, TA, TH, TD, Ta, T6, Td, Tn, To, Tq, Tr, Tf; + V Tg; + { + V T1, T2, Ty, Tz, TB, TC, T4, T5; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Ty = LD(&(x[WS(vs, 3)]), ms, &(x[WS(vs, 3)])); + Tz = LD(&(x[WS(vs, 3) + WS(rs, 2)]), ms, &(x[WS(vs, 3)])); + TB = LD(&(x[WS(vs, 3) + WS(rs, 1)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + TC = LD(&(x[WS(vs, 3) + WS(rs, 3)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T5 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Tc = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); + T9 = VADD(T1, T2); + T3 = VSUB(T1, T2); + TG = VADD(Ty, Tz); + TA = VSUB(Ty, Tz); + TH = VADD(TB, TC); + TD = VSUB(TB, TC); + Ta = VADD(T4, T5); + T6 = VSUB(T4, T5); + Td = LD(&(x[WS(vs, 1) + WS(rs, 2)]), ms, &(x[WS(vs, 1)])); + Tn = LD(&(x[WS(vs, 2)]), ms, &(x[WS(vs, 2)])); + To = LD(&(x[WS(vs, 2) + WS(rs, 2)]), ms, &(x[WS(vs, 2)])); + Tq = LD(&(x[WS(vs, 2) + WS(rs, 1)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + Tr = LD(&(x[WS(vs, 2) + WS(rs, 3)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + Tf = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + Tg = LD(&(x[WS(vs, 1) + WS(rs, 3)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + } + { + V Tk, Te, Tv, Tp, Tw, Ts, Tl, Th, T7, TE, Tu, TF; + ST(&(x[0]), VADD(T9, Ta), ms, &(x[0])); + Tk = VADD(Tc, Td); + Te = VSUB(Tc, Td); + Tv = VADD(Tn, To); + Tp = VSUB(Tn, To); + Tw = VADD(Tq, Tr); + Ts = VSUB(Tq, Tr); + Tl = VADD(Tf, Tg); + Th = VSUB(Tf, Tg); + ST(&(x[WS(rs, 3)]), VADD(TG, TH), ms, &(x[WS(rs, 1)])); + T7 = BYTW(&(W[TWVL * 4]), VFNMSI(T6, T3)); + TE = BYTW(&(W[TWVL * 4]), VFNMSI(TD, TA)); + { + V Tt, Ti, Tj, T8; + T8 = BYTW(&(W[0]), VFMAI(T6, T3)); + ST(&(x[WS(rs, 2)]), VADD(Tv, Tw), ms, &(x[0])); + Tt = BYTW(&(W[TWVL * 4]), VFNMSI(Ts, Tp)); + ST(&(x[WS(rs, 1)]), VADD(Tk, Tl), ms, &(x[WS(rs, 1)])); + Ti = BYTW(&(W[TWVL * 4]), VFNMSI(Th, Te)); + Tj = BYTW(&(W[0]), VFMAI(Th, Te)); + ST(&(x[WS(vs, 3)]), T7, ms, &(x[WS(vs, 3)])); + ST(&(x[WS(vs, 3) + WS(rs, 3)]), TE, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + ST(&(x[WS(vs, 1)]), T8, ms, &(x[WS(vs, 1)])); + Tu = BYTW(&(W[0]), VFMAI(Ts, Tp)); + ST(&(x[WS(vs, 3) + WS(rs, 2)]), Tt, ms, &(x[WS(vs, 3)])); + TF = BYTW(&(W[0]), VFMAI(TD, TA)); + ST(&(x[WS(vs, 3) + WS(rs, 1)]), Ti, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 1)]), Tj, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + } + Tb = BYTW(&(W[TWVL * 2]), VSUB(T9, Ta)); + Tm = BYTW(&(W[TWVL * 2]), VSUB(Tk, Tl)); + Tx = BYTW(&(W[TWVL * 2]), VSUB(Tv, Tw)); + ST(&(x[WS(vs, 1) + WS(rs, 2)]), Tu, ms, &(x[WS(vs, 1)])); + TI = BYTW(&(W[TWVL * 2]), VSUB(TG, TH)); + ST(&(x[WS(vs, 1) + WS(rs, 3)]), TF, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + } + } + ST(&(x[WS(vs, 2)]), Tb, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 2) + WS(rs, 1)]), Tm, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + ST(&(x[WS(vs, 2) + WS(rs, 2)]), Tx, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 2) + WS(rs, 3)]), TI, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("q1bv_4"), twinstr, &GENUS, {36, 24, 8, 0}, 0, 0, 0 }; + +void XSIMD(codelet_q1bv_4) (planner *p) { + X(kdft_difsq_register) (p, q1bv_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twidsq_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 4 -dif -name q1bv_4 -include q1b.h -sign 1 */ + +/* + * This function contains 44 FP additions, 24 FP multiplications, + * (or, 44 additions, 24 multiplications, 0 fused multiply/add), + * 22 stack variables, 0 constants, and 32 memory accesses + */ +#include "q1b.h" + +static void q1bv_4(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(8, rs), MAKE_VOLATILE_STRIDE(8, vs)) { + V T3, T9, TA, TG, TD, TH, T6, Ta, Te, Tk, Tp, Tv, Ts, Tw, Th; + V Tl; + { + V T1, T2, Ty, Tz; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T3 = VSUB(T1, T2); + T9 = VADD(T1, T2); + Ty = LD(&(x[WS(vs, 3)]), ms, &(x[WS(vs, 3)])); + Tz = LD(&(x[WS(vs, 3) + WS(rs, 2)]), ms, &(x[WS(vs, 3)])); + TA = VSUB(Ty, Tz); + TG = VADD(Ty, Tz); + } + { + V TB, TC, T4, T5; + TB = LD(&(x[WS(vs, 3) + WS(rs, 1)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + TC = LD(&(x[WS(vs, 3) + WS(rs, 3)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + TD = VBYI(VSUB(TB, TC)); + TH = VADD(TB, TC); + T4 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T5 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T6 = VBYI(VSUB(T4, T5)); + Ta = VADD(T4, T5); + } + { + V Tc, Td, Tn, To; + Tc = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); + Td = LD(&(x[WS(vs, 1) + WS(rs, 2)]), ms, &(x[WS(vs, 1)])); + Te = VSUB(Tc, Td); + Tk = VADD(Tc, Td); + Tn = LD(&(x[WS(vs, 2)]), ms, &(x[WS(vs, 2)])); + To = LD(&(x[WS(vs, 2) + WS(rs, 2)]), ms, &(x[WS(vs, 2)])); + Tp = VSUB(Tn, To); + Tv = VADD(Tn, To); + } + { + V Tq, Tr, Tf, Tg; + Tq = LD(&(x[WS(vs, 2) + WS(rs, 1)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + Tr = LD(&(x[WS(vs, 2) + WS(rs, 3)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + Ts = VBYI(VSUB(Tq, Tr)); + Tw = VADD(Tq, Tr); + Tf = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + Tg = LD(&(x[WS(vs, 1) + WS(rs, 3)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + Th = VBYI(VSUB(Tf, Tg)); + Tl = VADD(Tf, Tg); + } + ST(&(x[0]), VADD(T9, Ta), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VADD(Tk, Tl), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VADD(Tv, Tw), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VADD(TG, TH), ms, &(x[WS(rs, 1)])); + { + V T7, Ti, Tt, TE; + T7 = BYTW(&(W[TWVL * 4]), VSUB(T3, T6)); + ST(&(x[WS(vs, 3)]), T7, ms, &(x[WS(vs, 3)])); + Ti = BYTW(&(W[TWVL * 4]), VSUB(Te, Th)); + ST(&(x[WS(vs, 3) + WS(rs, 1)]), Ti, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + Tt = BYTW(&(W[TWVL * 4]), VSUB(Tp, Ts)); + ST(&(x[WS(vs, 3) + WS(rs, 2)]), Tt, ms, &(x[WS(vs, 3)])); + TE = BYTW(&(W[TWVL * 4]), VSUB(TA, TD)); + ST(&(x[WS(vs, 3) + WS(rs, 3)]), TE, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + } + { + V T8, Tj, Tu, TF; + T8 = BYTW(&(W[0]), VADD(T3, T6)); + ST(&(x[WS(vs, 1)]), T8, ms, &(x[WS(vs, 1)])); + Tj = BYTW(&(W[0]), VADD(Te, Th)); + ST(&(x[WS(vs, 1) + WS(rs, 1)]), Tj, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + Tu = BYTW(&(W[0]), VADD(Tp, Ts)); + ST(&(x[WS(vs, 1) + WS(rs, 2)]), Tu, ms, &(x[WS(vs, 1)])); + TF = BYTW(&(W[0]), VADD(TA, TD)); + ST(&(x[WS(vs, 1) + WS(rs, 3)]), TF, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + } + { + V Tb, Tm, Tx, TI; + Tb = BYTW(&(W[TWVL * 2]), VSUB(T9, Ta)); + ST(&(x[WS(vs, 2)]), Tb, ms, &(x[WS(vs, 2)])); + Tm = BYTW(&(W[TWVL * 2]), VSUB(Tk, Tl)); + ST(&(x[WS(vs, 2) + WS(rs, 1)]), Tm, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + Tx = BYTW(&(W[TWVL * 2]), VSUB(Tv, Tw)); + ST(&(x[WS(vs, 2) + WS(rs, 2)]), Tx, ms, &(x[WS(vs, 2)])); + TI = BYTW(&(W[TWVL * 2]), VSUB(TG, TH)); + ST(&(x[WS(vs, 2) + WS(rs, 3)]), TI, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("q1bv_4"), twinstr, &GENUS, {44, 24, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_q1bv_4) (planner *p) { + X(kdft_difsq_register) (p, q1bv_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/q1bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/q1bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,439 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:58 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twidsq_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 5 -dif -name q1bv_5 -include q1b.h -sign 1 */ + +/* + * This function contains 100 FP additions, 95 FP multiplications, + * (or, 55 additions, 50 multiplications, 45 fused multiply/add), + * 69 stack variables, 4 constants, and 50 memory accesses + */ +#include "q1b.h" + +static void q1bv_5(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(10, rs), MAKE_VOLATILE_STRIDE(10, vs)) { + V Te, T1w, Ty, TS, TW, Tb, T1t, Tv, T1g, T1c, TP, TV, T1f, T19, TY; + V TX; + { + V T1, T1j, Tl, Ti, Ta, T8, T1A, T1q, T1s, T9, TF, T1r, TZ, TR, TL; + V TC, Ts, Tu, TQ, TI, T15, T1b, T10, T11, Tt; + { + V T1n, T1o, T1k, T1l, T7, Td, T4, Tc; + { + V T5, T6, T2, T3; + T1 = LD(&(x[0]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T6 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T1j = LD(&(x[WS(vs, 4)]), ms, &(x[WS(vs, 4)])); + T1n = LD(&(x[WS(vs, 4) + WS(rs, 2)]), ms, &(x[WS(vs, 4)])); + T1o = LD(&(x[WS(vs, 4) + WS(rs, 3)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T1k = LD(&(x[WS(vs, 4) + WS(rs, 1)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T1l = LD(&(x[WS(vs, 4) + WS(rs, 4)]), ms, &(x[WS(vs, 4)])); + T7 = VADD(T5, T6); + Td = VSUB(T5, T6); + T4 = VADD(T2, T3); + Tc = VSUB(T2, T3); + } + { + V Tm, Tn, Tr, Tx, T1v, T1p; + Tl = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); + T1v = VSUB(T1n, T1o); + T1p = VADD(T1n, T1o); + { + V T1u, T1m, Tp, Tq; + T1u = VSUB(T1k, T1l); + T1m = VADD(T1k, T1l); + Tp = LD(&(x[WS(vs, 1) + WS(rs, 2)]), ms, &(x[WS(vs, 1)])); + Ti = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tc, Td)); + Te = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Td, Tc)); + Ta = VSUB(T4, T7); + T8 = VADD(T4, T7); + Tq = LD(&(x[WS(vs, 1) + WS(rs, 3)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + T1w = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1v, T1u)); + T1A = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1u, T1v)); + T1q = VADD(T1m, T1p); + T1s = VSUB(T1m, T1p); + Tm = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + T9 = VFNMS(LDK(KP250000000), T8, T1); + Tn = LD(&(x[WS(vs, 1) + WS(rs, 4)]), ms, &(x[WS(vs, 1)])); + Tr = VADD(Tp, Tq); + Tx = VSUB(Tp, Tq); + } + { + V TJ, TK, TG, Tw, To, TH, T13, T14; + TF = LD(&(x[WS(vs, 2)]), ms, &(x[WS(vs, 2)])); + T1r = VFNMS(LDK(KP250000000), T1q, T1j); + TJ = LD(&(x[WS(vs, 2) + WS(rs, 2)]), ms, &(x[WS(vs, 2)])); + TK = LD(&(x[WS(vs, 2) + WS(rs, 3)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + TG = LD(&(x[WS(vs, 2) + WS(rs, 1)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + Tw = VSUB(Tm, Tn); + To = VADD(Tm, Tn); + TH = LD(&(x[WS(vs, 2) + WS(rs, 4)]), ms, &(x[WS(vs, 2)])); + TZ = LD(&(x[WS(vs, 3)]), ms, &(x[WS(vs, 3)])); + T13 = LD(&(x[WS(vs, 3) + WS(rs, 2)]), ms, &(x[WS(vs, 3)])); + T14 = LD(&(x[WS(vs, 3) + WS(rs, 3)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + TR = VSUB(TJ, TK); + TL = VADD(TJ, TK); + Ty = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tx, Tw)); + TC = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tw, Tx)); + Ts = VADD(To, Tr); + Tu = VSUB(To, Tr); + TQ = VSUB(TG, TH); + TI = VADD(TG, TH); + T15 = VADD(T13, T14); + T1b = VSUB(T13, T14); + T10 = LD(&(x[WS(vs, 3) + WS(rs, 1)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T11 = LD(&(x[WS(vs, 3) + WS(rs, 4)]), ms, &(x[WS(vs, 3)])); + Tt = VFNMS(LDK(KP250000000), Ts, Tl); + } + } + } + { + V TO, T12, T1a, Th, T1z, TN, TM, T18, T17; + ST(&(x[0]), VADD(T1, T8), ms, &(x[0])); + TS = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TR, TQ)); + TW = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TQ, TR)); + TM = VADD(TI, TL); + TO = VSUB(TI, TL); + ST(&(x[WS(rs, 4)]), VADD(T1j, T1q), ms, &(x[0])); + T12 = VADD(T10, T11); + T1a = VSUB(T10, T11); + ST(&(x[WS(rs, 1)]), VADD(Tl, Ts), ms, &(x[WS(rs, 1)])); + Th = VFNMS(LDK(KP559016994), Ta, T9); + Tb = VFMA(LDK(KP559016994), Ta, T9); + T1t = VFMA(LDK(KP559016994), T1s, T1r); + T1z = VFNMS(LDK(KP559016994), T1s, T1r); + ST(&(x[WS(rs, 2)]), VADD(TF, TM), ms, &(x[0])); + TN = VFNMS(LDK(KP250000000), TM, TF); + { + V T16, Tk, Tj, T1C, T1B, TD, TE, TB; + TB = VFNMS(LDK(KP559016994), Tu, Tt); + Tv = VFMA(LDK(KP559016994), Tu, Tt); + T1g = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1a, T1b)); + T1c = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1b, T1a)); + T18 = VSUB(T12, T15); + T16 = VADD(T12, T15); + Tk = BYTW(&(W[TWVL * 4]), VFMAI(Ti, Th)); + Tj = BYTW(&(W[TWVL * 2]), VFNMSI(Ti, Th)); + T1C = BYTW(&(W[TWVL * 4]), VFMAI(T1A, T1z)); + T1B = BYTW(&(W[TWVL * 2]), VFNMSI(T1A, T1z)); + TD = BYTW(&(W[TWVL * 2]), VFNMSI(TC, TB)); + TE = BYTW(&(W[TWVL * 4]), VFMAI(TC, TB)); + ST(&(x[WS(rs, 3)]), VADD(TZ, T16), ms, &(x[WS(rs, 1)])); + T17 = VFNMS(LDK(KP250000000), T16, TZ); + ST(&(x[WS(vs, 3)]), Tk, ms, &(x[WS(vs, 3)])); + ST(&(x[WS(vs, 2)]), Tj, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 3) + WS(rs, 4)]), T1C, ms, &(x[WS(vs, 3)])); + ST(&(x[WS(vs, 2) + WS(rs, 4)]), T1B, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 2) + WS(rs, 1)]), TD, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + ST(&(x[WS(vs, 3) + WS(rs, 1)]), TE, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + } + TP = VFMA(LDK(KP559016994), TO, TN); + TV = VFNMS(LDK(KP559016994), TO, TN); + T1f = VFNMS(LDK(KP559016994), T18, T17); + T19 = VFMA(LDK(KP559016994), T18, T17); + } + } + TY = BYTW(&(W[TWVL * 4]), VFMAI(TW, TV)); + TX = BYTW(&(W[TWVL * 2]), VFNMSI(TW, TV)); + { + V T1i, T1h, TU, TT; + T1i = BYTW(&(W[TWVL * 4]), VFMAI(T1g, T1f)); + T1h = BYTW(&(W[TWVL * 2]), VFNMSI(T1g, T1f)); + TU = BYTW(&(W[TWVL * 6]), VFNMSI(TS, TP)); + TT = BYTW(&(W[0]), VFMAI(TS, TP)); + { + V Tg, Tf, TA, Tz; + Tg = BYTW(&(W[TWVL * 6]), VFNMSI(Te, Tb)); + Tf = BYTW(&(W[0]), VFMAI(Te, Tb)); + TA = BYTW(&(W[TWVL * 6]), VFNMSI(Ty, Tv)); + Tz = BYTW(&(W[0]), VFMAI(Ty, Tv)); + { + V T1e, T1d, T1y, T1x; + T1e = BYTW(&(W[TWVL * 6]), VFNMSI(T1c, T19)); + T1d = BYTW(&(W[0]), VFMAI(T1c, T19)); + T1y = BYTW(&(W[TWVL * 6]), VFNMSI(T1w, T1t)); + T1x = BYTW(&(W[0]), VFMAI(T1w, T1t)); + ST(&(x[WS(vs, 3) + WS(rs, 2)]), TY, ms, &(x[WS(vs, 3)])); + ST(&(x[WS(vs, 2) + WS(rs, 2)]), TX, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 3) + WS(rs, 3)]), T1i, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + ST(&(x[WS(vs, 2) + WS(rs, 3)]), T1h, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 2)]), TU, ms, &(x[WS(vs, 4)])); + ST(&(x[WS(vs, 1) + WS(rs, 2)]), TT, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 4)]), Tg, ms, &(x[WS(vs, 4)])); + ST(&(x[WS(vs, 1)]), Tf, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 1)]), TA, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 1)]), Tz, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 3)]), T1e, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 3)]), T1d, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 4)]), T1y, ms, &(x[WS(vs, 4)])); + ST(&(x[WS(vs, 1) + WS(rs, 4)]), T1x, ms, &(x[WS(vs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("q1bv_5"), twinstr, &GENUS, {55, 50, 45, 0}, 0, 0, 0 }; + +void XSIMD(codelet_q1bv_5) (planner *p) { + X(kdft_difsq_register) (p, q1bv_5, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twidsq_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 5 -dif -name q1bv_5 -include q1b.h -sign 1 */ + +/* + * This function contains 100 FP additions, 70 FP multiplications, + * (or, 85 additions, 55 multiplications, 15 fused multiply/add), + * 44 stack variables, 4 constants, and 50 memory accesses + */ +#include "q1b.h" + +static void q1bv_5(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(10, rs), MAKE_VOLATILE_STRIDE(10, vs)) { + V Tb, T7, Th, Ta, Tc, Td, T1t, T1p, T1z, T1s, T1u, T1v, Tv, Tr, TB; + V Tu, Tw, Tx, TP, TL, TV, TO, TQ, TR, T19, T15, T1f, T18, T1a, T1b; + { + V T6, T9, T3, T8; + Tb = LD(&(x[0]), ms, &(x[0])); + { + V T4, T5, T1, T2; + T4 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T6 = VSUB(T4, T5); + T9 = VADD(T4, T5); + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T3 = VSUB(T1, T2); + T8 = VADD(T1, T2); + } + T7 = VBYI(VFMA(LDK(KP951056516), T3, VMUL(LDK(KP587785252), T6))); + Th = VBYI(VFNMS(LDK(KP951056516), T6, VMUL(LDK(KP587785252), T3))); + Ta = VMUL(LDK(KP559016994), VSUB(T8, T9)); + Tc = VADD(T8, T9); + Td = VFNMS(LDK(KP250000000), Tc, Tb); + } + { + V T1o, T1r, T1l, T1q; + T1t = LD(&(x[WS(vs, 4)]), ms, &(x[WS(vs, 4)])); + { + V T1m, T1n, T1j, T1k; + T1m = LD(&(x[WS(vs, 4) + WS(rs, 2)]), ms, &(x[WS(vs, 4)])); + T1n = LD(&(x[WS(vs, 4) + WS(rs, 3)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T1o = VSUB(T1m, T1n); + T1r = VADD(T1m, T1n); + T1j = LD(&(x[WS(vs, 4) + WS(rs, 1)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T1k = LD(&(x[WS(vs, 4) + WS(rs, 4)]), ms, &(x[WS(vs, 4)])); + T1l = VSUB(T1j, T1k); + T1q = VADD(T1j, T1k); + } + T1p = VBYI(VFMA(LDK(KP951056516), T1l, VMUL(LDK(KP587785252), T1o))); + T1z = VBYI(VFNMS(LDK(KP951056516), T1o, VMUL(LDK(KP587785252), T1l))); + T1s = VMUL(LDK(KP559016994), VSUB(T1q, T1r)); + T1u = VADD(T1q, T1r); + T1v = VFNMS(LDK(KP250000000), T1u, T1t); + } + { + V Tq, Tt, Tn, Ts; + Tv = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); + { + V To, Tp, Tl, Tm; + To = LD(&(x[WS(vs, 1) + WS(rs, 2)]), ms, &(x[WS(vs, 1)])); + Tp = LD(&(x[WS(vs, 1) + WS(rs, 3)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + Tq = VSUB(To, Tp); + Tt = VADD(To, Tp); + Tl = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + Tm = LD(&(x[WS(vs, 1) + WS(rs, 4)]), ms, &(x[WS(vs, 1)])); + Tn = VSUB(Tl, Tm); + Ts = VADD(Tl, Tm); + } + Tr = VBYI(VFMA(LDK(KP951056516), Tn, VMUL(LDK(KP587785252), Tq))); + TB = VBYI(VFNMS(LDK(KP951056516), Tq, VMUL(LDK(KP587785252), Tn))); + Tu = VMUL(LDK(KP559016994), VSUB(Ts, Tt)); + Tw = VADD(Ts, Tt); + Tx = VFNMS(LDK(KP250000000), Tw, Tv); + } + { + V TK, TN, TH, TM; + TP = LD(&(x[WS(vs, 2)]), ms, &(x[WS(vs, 2)])); + { + V TI, TJ, TF, TG; + TI = LD(&(x[WS(vs, 2) + WS(rs, 2)]), ms, &(x[WS(vs, 2)])); + TJ = LD(&(x[WS(vs, 2) + WS(rs, 3)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + TK = VSUB(TI, TJ); + TN = VADD(TI, TJ); + TF = LD(&(x[WS(vs, 2) + WS(rs, 1)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + TG = LD(&(x[WS(vs, 2) + WS(rs, 4)]), ms, &(x[WS(vs, 2)])); + TH = VSUB(TF, TG); + TM = VADD(TF, TG); + } + TL = VBYI(VFMA(LDK(KP951056516), TH, VMUL(LDK(KP587785252), TK))); + TV = VBYI(VFNMS(LDK(KP951056516), TK, VMUL(LDK(KP587785252), TH))); + TO = VMUL(LDK(KP559016994), VSUB(TM, TN)); + TQ = VADD(TM, TN); + TR = VFNMS(LDK(KP250000000), TQ, TP); + } + { + V T14, T17, T11, T16; + T19 = LD(&(x[WS(vs, 3)]), ms, &(x[WS(vs, 3)])); + { + V T12, T13, TZ, T10; + T12 = LD(&(x[WS(vs, 3) + WS(rs, 2)]), ms, &(x[WS(vs, 3)])); + T13 = LD(&(x[WS(vs, 3) + WS(rs, 3)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T14 = VSUB(T12, T13); + T17 = VADD(T12, T13); + TZ = LD(&(x[WS(vs, 3) + WS(rs, 1)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T10 = LD(&(x[WS(vs, 3) + WS(rs, 4)]), ms, &(x[WS(vs, 3)])); + T11 = VSUB(TZ, T10); + T16 = VADD(TZ, T10); + } + T15 = VBYI(VFMA(LDK(KP951056516), T11, VMUL(LDK(KP587785252), T14))); + T1f = VBYI(VFNMS(LDK(KP951056516), T14, VMUL(LDK(KP587785252), T11))); + T18 = VMUL(LDK(KP559016994), VSUB(T16, T17)); + T1a = VADD(T16, T17); + T1b = VFNMS(LDK(KP250000000), T1a, T19); + } + ST(&(x[0]), VADD(Tb, Tc), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(T1t, T1u), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(TP, TQ), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VADD(T19, T1a), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(Tv, Tw), ms, &(x[WS(rs, 1)])); + { + V Tj, Tk, Ti, T1B, T1C, T1A; + Ti = VSUB(Td, Ta); + Tj = BYTW(&(W[TWVL * 2]), VADD(Th, Ti)); + Tk = BYTW(&(W[TWVL * 4]), VSUB(Ti, Th)); + ST(&(x[WS(vs, 2)]), Tj, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 3)]), Tk, ms, &(x[WS(vs, 3)])); + T1A = VSUB(T1v, T1s); + T1B = BYTW(&(W[TWVL * 2]), VADD(T1z, T1A)); + T1C = BYTW(&(W[TWVL * 4]), VSUB(T1A, T1z)); + ST(&(x[WS(vs, 2) + WS(rs, 4)]), T1B, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 3) + WS(rs, 4)]), T1C, ms, &(x[WS(vs, 3)])); + } + { + V T1h, T1i, T1g, TD, TE, TC; + T1g = VSUB(T1b, T18); + T1h = BYTW(&(W[TWVL * 2]), VADD(T1f, T1g)); + T1i = BYTW(&(W[TWVL * 4]), VSUB(T1g, T1f)); + ST(&(x[WS(vs, 2) + WS(rs, 3)]), T1h, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + ST(&(x[WS(vs, 3) + WS(rs, 3)]), T1i, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + TC = VSUB(Tx, Tu); + TD = BYTW(&(W[TWVL * 2]), VADD(TB, TC)); + TE = BYTW(&(W[TWVL * 4]), VSUB(TC, TB)); + ST(&(x[WS(vs, 2) + WS(rs, 1)]), TD, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + ST(&(x[WS(vs, 3) + WS(rs, 1)]), TE, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + } + { + V TX, TY, TW, TT, TU, TS; + TW = VSUB(TR, TO); + TX = BYTW(&(W[TWVL * 2]), VADD(TV, TW)); + TY = BYTW(&(W[TWVL * 4]), VSUB(TW, TV)); + ST(&(x[WS(vs, 2) + WS(rs, 2)]), TX, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 3) + WS(rs, 2)]), TY, ms, &(x[WS(vs, 3)])); + TS = VADD(TO, TR); + TT = BYTW(&(W[0]), VADD(TL, TS)); + TU = BYTW(&(W[TWVL * 6]), VSUB(TS, TL)); + ST(&(x[WS(vs, 1) + WS(rs, 2)]), TT, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 2)]), TU, ms, &(x[WS(vs, 4)])); + } + { + V Tf, Tg, Te, Tz, TA, Ty; + Te = VADD(Ta, Td); + Tf = BYTW(&(W[0]), VADD(T7, Te)); + Tg = BYTW(&(W[TWVL * 6]), VSUB(Te, T7)); + ST(&(x[WS(vs, 1)]), Tf, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 4)]), Tg, ms, &(x[WS(vs, 4)])); + Ty = VADD(Tu, Tx); + Tz = BYTW(&(W[0]), VADD(Tr, Ty)); + TA = BYTW(&(W[TWVL * 6]), VSUB(Ty, Tr)); + ST(&(x[WS(vs, 1) + WS(rs, 1)]), Tz, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 1)]), TA, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + } + { + V T1d, T1e, T1c, T1x, T1y, T1w; + T1c = VADD(T18, T1b); + T1d = BYTW(&(W[0]), VADD(T15, T1c)); + T1e = BYTW(&(W[TWVL * 6]), VSUB(T1c, T15)); + ST(&(x[WS(vs, 1) + WS(rs, 3)]), T1d, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 3)]), T1e, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T1w = VADD(T1s, T1v); + T1x = BYTW(&(W[0]), VADD(T1p, T1w)); + T1y = BYTW(&(W[TWVL * 6]), VSUB(T1w, T1p)); + ST(&(x[WS(vs, 1) + WS(rs, 4)]), T1x, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 4)]), T1y, ms, &(x[WS(vs, 4)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("q1bv_5"), twinstr, &GENUS, {85, 55, 15, 0}, 0, 0, 0 }; + +void XSIMD(codelet_q1bv_5) (planner *p) { + X(kdft_difsq_register) (p, q1bv_5, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/q1bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/q1bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,994 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:59 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twidsq_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 8 -dif -name q1bv_8 -include q1b.h -sign 1 */ + +/* + * This function contains 264 FP additions, 192 FP multiplications, + * (or, 184 additions, 112 multiplications, 80 fused multiply/add), + * 121 stack variables, 1 constants, and 128 memory accesses + */ +#include "q1b.h" + +static void q1bv_8(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 14)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(16, vs)) { + V T42, T43, T1U, T1V, T2Y, T2Z, TT, TS, T45, T44; + { + V T3, Te, T1E, T1P, Tv, Tp, T26, T20, T2b, T2m, T3M, T2x, T2D, T3X, TA; + V TL, T48, T4e, T17, T12, TW, T1i, T2I, T1z, T1t, T2T, T3f, T3q, T34, T3a; + V T3H, T3B, Ts, Tw, Tf, Ta, T23, T27, T1Q, T1L, T2A, T2E, T2n, T2i, T4b; + V T4f, T3Y, T3T, TZ, T13, TM, TH, T35, T2L, T3j, T1w, T1A, T1j, T1e, T36; + V T2O, T3C, T3i, T3k; + { + V T3d, T32, T3e, T3o, T3p, T33; + { + V T2v, T2w, T3V, T46, T3W; + { + V T1, T2, Tc, Td, T1C, T1D, T1N, T1O; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Td = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T1C = LD(&(x[WS(vs, 3)]), ms, &(x[WS(vs, 3)])); + T1D = LD(&(x[WS(vs, 3) + WS(rs, 4)]), ms, &(x[WS(vs, 3)])); + T1N = LD(&(x[WS(vs, 3) + WS(rs, 2)]), ms, &(x[WS(vs, 3)])); + T1O = LD(&(x[WS(vs, 3) + WS(rs, 6)]), ms, &(x[WS(vs, 3)])); + { + V T29, T1Y, T1Z, T2a, T2k, T2l, Tn, To, T3K, T3L; + T29 = LD(&(x[WS(vs, 4)]), ms, &(x[WS(vs, 4)])); + T3 = VSUB(T1, T2); + Tn = VADD(T1, T2); + Te = VSUB(Tc, Td); + To = VADD(Tc, Td); + T1E = VSUB(T1C, T1D); + T1Y = VADD(T1C, T1D); + T1P = VSUB(T1N, T1O); + T1Z = VADD(T1N, T1O); + T2a = LD(&(x[WS(vs, 4) + WS(rs, 4)]), ms, &(x[WS(vs, 4)])); + T2k = LD(&(x[WS(vs, 4) + WS(rs, 2)]), ms, &(x[WS(vs, 4)])); + T2l = LD(&(x[WS(vs, 4) + WS(rs, 6)]), ms, &(x[WS(vs, 4)])); + Tv = VADD(Tn, To); + Tp = VSUB(Tn, To); + T3K = LD(&(x[WS(vs, 7)]), ms, &(x[WS(vs, 7)])); + T3L = LD(&(x[WS(vs, 7) + WS(rs, 4)]), ms, &(x[WS(vs, 7)])); + T26 = VADD(T1Y, T1Z); + T20 = VSUB(T1Y, T1Z); + T2v = VADD(T29, T2a); + T2b = VSUB(T29, T2a); + T2w = VADD(T2k, T2l); + T2m = VSUB(T2k, T2l); + T3V = LD(&(x[WS(vs, 7) + WS(rs, 2)]), ms, &(x[WS(vs, 7)])); + T46 = VADD(T3K, T3L); + T3M = VSUB(T3K, T3L); + T3W = LD(&(x[WS(vs, 7) + WS(rs, 6)]), ms, &(x[WS(vs, 7)])); + } + } + { + V T15, TU, T16, T1g, TV, T1h; + { + V Ty, Tz, TJ, TK, T47; + Ty = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); + Tz = LD(&(x[WS(vs, 1) + WS(rs, 4)]), ms, &(x[WS(vs, 1)])); + TJ = LD(&(x[WS(vs, 1) + WS(rs, 2)]), ms, &(x[WS(vs, 1)])); + T2x = VSUB(T2v, T2w); + T2D = VADD(T2v, T2w); + TK = LD(&(x[WS(vs, 1) + WS(rs, 6)]), ms, &(x[WS(vs, 1)])); + T47 = VADD(T3V, T3W); + T3X = VSUB(T3V, T3W); + T15 = LD(&(x[WS(vs, 2)]), ms, &(x[WS(vs, 2)])); + TA = VSUB(Ty, Tz); + TU = VADD(Ty, Tz); + T16 = LD(&(x[WS(vs, 2) + WS(rs, 4)]), ms, &(x[WS(vs, 2)])); + T1g = LD(&(x[WS(vs, 2) + WS(rs, 2)]), ms, &(x[WS(vs, 2)])); + TL = VSUB(TJ, TK); + TV = VADD(TJ, TK); + T48 = VSUB(T46, T47); + T4e = VADD(T46, T47); + T1h = LD(&(x[WS(vs, 2) + WS(rs, 6)]), ms, &(x[WS(vs, 2)])); + } + { + V T2G, T1r, T2H, T2R, T1s, T2S; + T2G = LD(&(x[WS(vs, 5)]), ms, &(x[WS(vs, 5)])); + T17 = VSUB(T15, T16); + T1r = VADD(T15, T16); + T2H = LD(&(x[WS(vs, 5) + WS(rs, 4)]), ms, &(x[WS(vs, 5)])); + T12 = VADD(TU, TV); + TW = VSUB(TU, TV); + T2R = LD(&(x[WS(vs, 5) + WS(rs, 2)]), ms, &(x[WS(vs, 5)])); + T1i = VSUB(T1g, T1h); + T1s = VADD(T1g, T1h); + T2S = LD(&(x[WS(vs, 5) + WS(rs, 6)]), ms, &(x[WS(vs, 5)])); + T3d = LD(&(x[WS(vs, 6)]), ms, &(x[WS(vs, 6)])); + T2I = VSUB(T2G, T2H); + T32 = VADD(T2G, T2H); + T3e = LD(&(x[WS(vs, 6) + WS(rs, 4)]), ms, &(x[WS(vs, 6)])); + T3o = LD(&(x[WS(vs, 6) + WS(rs, 2)]), ms, &(x[WS(vs, 6)])); + T3p = LD(&(x[WS(vs, 6) + WS(rs, 6)]), ms, &(x[WS(vs, 6)])); + T1z = VADD(T1r, T1s); + T1t = VSUB(T1r, T1s); + T33 = VADD(T2R, T2S); + T2T = VSUB(T2R, T2S); + } + } + } + { + V T2y, T2e, T3Q, T2z, T2h, T49, T3P, T3R; + { + V T6, Tq, T1I, Tr, T9, T21, T1H, T1J; + { + V T4, T3z, T3A, T5, T7, T8, T1F, T1G; + T4 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3f = VSUB(T3d, T3e); + T3z = VADD(T3d, T3e); + T3q = VSUB(T3o, T3p); + T3A = VADD(T3o, T3p); + T5 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T7 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T34 = VSUB(T32, T33); + T3a = VADD(T32, T33); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1F = LD(&(x[WS(vs, 3) + WS(rs, 1)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T1G = LD(&(x[WS(vs, 3) + WS(rs, 5)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T3H = VADD(T3z, T3A); + T3B = VSUB(T3z, T3A); + T6 = VSUB(T4, T5); + Tq = VADD(T4, T5); + T1I = LD(&(x[WS(vs, 3) + WS(rs, 7)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + Tr = VADD(T7, T8); + T9 = VSUB(T7, T8); + T21 = VADD(T1F, T1G); + T1H = VSUB(T1F, T1G); + T1J = LD(&(x[WS(vs, 3) + WS(rs, 3)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + } + { + V T2f, T22, T1K, T2g, T2c, T2d, T3N, T3O; + T2c = LD(&(x[WS(vs, 4) + WS(rs, 1)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T2d = LD(&(x[WS(vs, 4) + WS(rs, 5)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T2f = LD(&(x[WS(vs, 4) + WS(rs, 7)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + Ts = VSUB(Tq, Tr); + Tw = VADD(Tq, Tr); + Tf = VSUB(T6, T9); + Ta = VADD(T6, T9); + T22 = VADD(T1I, T1J); + T1K = VSUB(T1I, T1J); + T2y = VADD(T2c, T2d); + T2e = VSUB(T2c, T2d); + T2g = LD(&(x[WS(vs, 4) + WS(rs, 3)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T3N = LD(&(x[WS(vs, 7) + WS(rs, 1)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); + T3O = LD(&(x[WS(vs, 7) + WS(rs, 5)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); + T3Q = LD(&(x[WS(vs, 7) + WS(rs, 7)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); + T23 = VSUB(T21, T22); + T27 = VADD(T21, T22); + T1Q = VSUB(T1H, T1K); + T1L = VADD(T1H, T1K); + T2z = VADD(T2f, T2g); + T2h = VSUB(T2f, T2g); + T49 = VADD(T3N, T3O); + T3P = VSUB(T3N, T3O); + T3R = LD(&(x[WS(vs, 7) + WS(rs, 3)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); + } + } + { + V TX, TD, T1b, TY, TG, T1u, T1a, T1c; + { + V TE, T4a, T3S, TF, TB, TC, T18, T19; + TB = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + TC = LD(&(x[WS(vs, 1) + WS(rs, 5)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + TE = LD(&(x[WS(vs, 1) + WS(rs, 7)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + T2A = VSUB(T2y, T2z); + T2E = VADD(T2y, T2z); + T2n = VSUB(T2e, T2h); + T2i = VADD(T2e, T2h); + T4a = VADD(T3Q, T3R); + T3S = VSUB(T3Q, T3R); + TX = VADD(TB, TC); + TD = VSUB(TB, TC); + TF = LD(&(x[WS(vs, 1) + WS(rs, 3)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + T18 = LD(&(x[WS(vs, 2) + WS(rs, 1)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T19 = LD(&(x[WS(vs, 2) + WS(rs, 5)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T1b = LD(&(x[WS(vs, 2) + WS(rs, 7)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T4b = VSUB(T49, T4a); + T4f = VADD(T49, T4a); + T3Y = VSUB(T3P, T3S); + T3T = VADD(T3P, T3S); + TY = VADD(TE, TF); + TG = VSUB(TE, TF); + T1u = VADD(T18, T19); + T1a = VSUB(T18, T19); + T1c = LD(&(x[WS(vs, 2) + WS(rs, 3)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + } + { + V T2M, T1v, T1d, T2N, T2J, T2K, T3g, T3h; + T2J = LD(&(x[WS(vs, 5) + WS(rs, 1)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); + T2K = LD(&(x[WS(vs, 5) + WS(rs, 5)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); + T2M = LD(&(x[WS(vs, 5) + WS(rs, 7)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); + TZ = VSUB(TX, TY); + T13 = VADD(TX, TY); + TM = VSUB(TD, TG); + TH = VADD(TD, TG); + T1v = VADD(T1b, T1c); + T1d = VSUB(T1b, T1c); + T35 = VADD(T2J, T2K); + T2L = VSUB(T2J, T2K); + T2N = LD(&(x[WS(vs, 5) + WS(rs, 3)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); + T3g = LD(&(x[WS(vs, 6) + WS(rs, 1)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T3h = LD(&(x[WS(vs, 6) + WS(rs, 5)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T3j = LD(&(x[WS(vs, 6) + WS(rs, 7)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T1w = VSUB(T1u, T1v); + T1A = VADD(T1u, T1v); + T1j = VSUB(T1a, T1d); + T1e = VADD(T1a, T1d); + T36 = VADD(T2M, T2N); + T2O = VSUB(T2M, T2N); + T3C = VADD(T3g, T3h); + T3i = VSUB(T3g, T3h); + T3k = LD(&(x[WS(vs, 6) + WS(rs, 3)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); + } + } + } + } + { + V T3b, T2U, T2P, T3I, T3r, T3m, T11, T25, T39, T4d; + { + V T37, T3E, T2B, T24; + { + V T3D, T3l, Tt, T4c; + ST(&(x[0]), VADD(Tv, Tw), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T1z, T1A), ms, &(x[0])); + ST(&(x[WS(rs, 7)]), VADD(T4e, T4f), ms, &(x[WS(rs, 1)])); + T37 = VSUB(T35, T36); + T3b = VADD(T35, T36); + T2U = VSUB(T2L, T2O); + T2P = VADD(T2L, T2O); + T3D = VADD(T3j, T3k); + T3l = VSUB(T3j, T3k); + ST(&(x[WS(rs, 4)]), VADD(T2D, T2E), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VADD(T26, T27), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T12, T13), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VADD(T3a, T3b), ms, &(x[WS(rs, 1)])); + Tt = BYTW(&(W[TWVL * 10]), VFNMSI(Ts, Tp)); + T4c = BYTW(&(W[TWVL * 10]), VFNMSI(T4b, T48)); + T3E = VSUB(T3C, T3D); + T3I = VADD(T3C, T3D); + T3r = VSUB(T3i, T3l); + T3m = VADD(T3i, T3l); + T2B = BYTW(&(W[TWVL * 10]), VFNMSI(T2A, T2x)); + T24 = BYTW(&(W[TWVL * 10]), VFNMSI(T23, T20)); + ST(&(x[WS(vs, 6)]), Tt, ms, &(x[WS(vs, 6)])); + ST(&(x[WS(vs, 6) + WS(rs, 7)]), T4c, ms, &(x[WS(vs, 6) + WS(rs, 1)])); + } + { + V T38, T1y, Tu, T10, T1x, T3F, T2C, T3G; + T10 = BYTW(&(W[TWVL * 10]), VFNMSI(TZ, TW)); + ST(&(x[WS(rs, 6)]), VADD(T3H, T3I), ms, &(x[0])); + T1x = BYTW(&(W[TWVL * 10]), VFNMSI(T1w, T1t)); + T3F = BYTW(&(W[TWVL * 10]), VFNMSI(T3E, T3B)); + ST(&(x[WS(vs, 6) + WS(rs, 4)]), T2B, ms, &(x[WS(vs, 6)])); + ST(&(x[WS(vs, 6) + WS(rs, 3)]), T24, ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T38 = BYTW(&(W[TWVL * 10]), VFNMSI(T37, T34)); + T1y = BYTW(&(W[TWVL * 2]), VFMAI(T1w, T1t)); + ST(&(x[WS(vs, 6) + WS(rs, 1)]), T10, ms, &(x[WS(vs, 6) + WS(rs, 1)])); + Tu = BYTW(&(W[TWVL * 2]), VFMAI(Ts, Tp)); + ST(&(x[WS(vs, 6) + WS(rs, 2)]), T1x, ms, &(x[WS(vs, 6)])); + ST(&(x[WS(vs, 6) + WS(rs, 6)]), T3F, ms, &(x[WS(vs, 6)])); + T2C = BYTW(&(W[TWVL * 2]), VFMAI(T2A, T2x)); + T3G = BYTW(&(W[TWVL * 2]), VFMAI(T3E, T3B)); + ST(&(x[WS(vs, 6) + WS(rs, 5)]), T38, ms, &(x[WS(vs, 6) + WS(rs, 1)])); + ST(&(x[WS(vs, 2) + WS(rs, 2)]), T1y, ms, &(x[WS(vs, 2)])); + T11 = BYTW(&(W[TWVL * 2]), VFMAI(TZ, TW)); + ST(&(x[WS(vs, 2)]), Tu, ms, &(x[WS(vs, 2)])); + T25 = BYTW(&(W[TWVL * 2]), VFMAI(T23, T20)); + T39 = BYTW(&(W[TWVL * 2]), VFMAI(T37, T34)); + ST(&(x[WS(vs, 2) + WS(rs, 4)]), T2C, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 2) + WS(rs, 6)]), T3G, ms, &(x[WS(vs, 2)])); + T4d = BYTW(&(W[TWVL * 2]), VFMAI(T4b, T48)); + } + } + { + V Tj, Tk, T2r, T2j, T2o, T2s, Ti, Th, T1M, T1R, T41, T40; + { + V T3c, T4g, T3J, T2F, Tx, T1B; + Tx = BYTW(&(W[TWVL * 6]), VSUB(Tv, Tw)); + ST(&(x[WS(vs, 2) + WS(rs, 1)]), T11, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T1B = BYTW(&(W[TWVL * 6]), VSUB(T1z, T1A)); + ST(&(x[WS(vs, 2) + WS(rs, 3)]), T25, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + ST(&(x[WS(vs, 2) + WS(rs, 5)]), T39, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T3c = BYTW(&(W[TWVL * 6]), VSUB(T3a, T3b)); + T4g = BYTW(&(W[TWVL * 6]), VSUB(T4e, T4f)); + ST(&(x[WS(vs, 2) + WS(rs, 7)]), T4d, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + ST(&(x[WS(vs, 4)]), Tx, ms, &(x[WS(vs, 4)])); + T3J = BYTW(&(W[TWVL * 6]), VSUB(T3H, T3I)); + ST(&(x[WS(vs, 4) + WS(rs, 2)]), T1B, ms, &(x[WS(vs, 4)])); + T2F = BYTW(&(W[TWVL * 6]), VSUB(T2D, T2E)); + { + V T14, Tb, Tg, T28, T3U, T3Z; + T28 = BYTW(&(W[TWVL * 6]), VSUB(T26, T27)); + ST(&(x[WS(vs, 4) + WS(rs, 5)]), T3c, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 7)]), T4g, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T14 = BYTW(&(W[TWVL * 6]), VSUB(T12, T13)); + Tj = VFMA(LDK(KP707106781), Ta, T3); + Tb = VFNMS(LDK(KP707106781), Ta, T3); + ST(&(x[WS(vs, 4) + WS(rs, 6)]), T3J, ms, &(x[WS(vs, 4)])); + Tk = VFMA(LDK(KP707106781), Tf, Te); + Tg = VFNMS(LDK(KP707106781), Tf, Te); + ST(&(x[WS(vs, 4) + WS(rs, 4)]), T2F, ms, &(x[WS(vs, 4)])); + ST(&(x[WS(vs, 4) + WS(rs, 3)]), T28, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T3U = VFNMS(LDK(KP707106781), T3T, T3M); + T42 = VFMA(LDK(KP707106781), T3T, T3M); + T43 = VFMA(LDK(KP707106781), T3Y, T3X); + T3Z = VFNMS(LDK(KP707106781), T3Y, T3X); + ST(&(x[WS(vs, 4) + WS(rs, 1)]), T14, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T2r = VFMA(LDK(KP707106781), T2i, T2b); + T2j = VFNMS(LDK(KP707106781), T2i, T2b); + T2o = VFNMS(LDK(KP707106781), T2n, T2m); + T2s = VFMA(LDK(KP707106781), T2n, T2m); + Ti = BYTW(&(W[TWVL * 8]), VFMAI(Tg, Tb)); + Th = BYTW(&(W[TWVL * 4]), VFNMSI(Tg, Tb)); + T1U = VFMA(LDK(KP707106781), T1L, T1E); + T1M = VFNMS(LDK(KP707106781), T1L, T1E); + T1R = VFNMS(LDK(KP707106781), T1Q, T1P); + T1V = VFMA(LDK(KP707106781), T1Q, T1P); + T41 = BYTW(&(W[TWVL * 8]), VFMAI(T3Z, T3U)); + T40 = BYTW(&(W[TWVL * 4]), VFNMSI(T3Z, T3U)); + } + } + { + V TQ, TR, T1n, T1o, T3v, T3w; + { + V TI, TN, T1f, T1k, T3n, T3s; + { + V T1T, T1S, T2q, T2p; + TQ = VFMA(LDK(KP707106781), TH, TA); + TI = VFNMS(LDK(KP707106781), TH, TA); + T2q = BYTW(&(W[TWVL * 8]), VFMAI(T2o, T2j)); + T2p = BYTW(&(W[TWVL * 4]), VFNMSI(T2o, T2j)); + ST(&(x[WS(vs, 5)]), Ti, ms, &(x[WS(vs, 5)])); + ST(&(x[WS(vs, 3)]), Th, ms, &(x[WS(vs, 3)])); + T1T = BYTW(&(W[TWVL * 8]), VFMAI(T1R, T1M)); + T1S = BYTW(&(W[TWVL * 4]), VFNMSI(T1R, T1M)); + ST(&(x[WS(vs, 5) + WS(rs, 7)]), T41, ms, &(x[WS(vs, 5) + WS(rs, 1)])); + ST(&(x[WS(vs, 3) + WS(rs, 7)]), T40, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + ST(&(x[WS(vs, 5) + WS(rs, 4)]), T2q, ms, &(x[WS(vs, 5)])); + ST(&(x[WS(vs, 3) + WS(rs, 4)]), T2p, ms, &(x[WS(vs, 3)])); + TN = VFNMS(LDK(KP707106781), TM, TL); + TR = VFMA(LDK(KP707106781), TM, TL); + T1n = VFMA(LDK(KP707106781), T1e, T17); + T1f = VFNMS(LDK(KP707106781), T1e, T17); + ST(&(x[WS(vs, 5) + WS(rs, 3)]), T1T, ms, &(x[WS(vs, 5) + WS(rs, 1)])); + ST(&(x[WS(vs, 3) + WS(rs, 3)]), T1S, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T1k = VFNMS(LDK(KP707106781), T1j, T1i); + T1o = VFMA(LDK(KP707106781), T1j, T1i); + T3v = VFMA(LDK(KP707106781), T3m, T3f); + T3n = VFNMS(LDK(KP707106781), T3m, T3f); + T3s = VFNMS(LDK(KP707106781), T3r, T3q); + T3w = VFMA(LDK(KP707106781), T3r, T3q); + } + { + V T2Q, TP, TO, T2V, T2X, T2W; + T2Y = VFMA(LDK(KP707106781), T2P, T2I); + T2Q = VFNMS(LDK(KP707106781), T2P, T2I); + TP = BYTW(&(W[TWVL * 8]), VFMAI(TN, TI)); + TO = BYTW(&(W[TWVL * 4]), VFNMSI(TN, TI)); + T2V = VFNMS(LDK(KP707106781), T2U, T2T); + T2Z = VFMA(LDK(KP707106781), T2U, T2T); + { + V T1m, T1l, T3u, T3t; + T1m = BYTW(&(W[TWVL * 8]), VFMAI(T1k, T1f)); + T1l = BYTW(&(W[TWVL * 4]), VFNMSI(T1k, T1f)); + T3u = BYTW(&(W[TWVL * 8]), VFMAI(T3s, T3n)); + T3t = BYTW(&(W[TWVL * 4]), VFNMSI(T3s, T3n)); + ST(&(x[WS(vs, 5) + WS(rs, 1)]), TP, ms, &(x[WS(vs, 5) + WS(rs, 1)])); + ST(&(x[WS(vs, 3) + WS(rs, 1)]), TO, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T2X = BYTW(&(W[TWVL * 8]), VFMAI(T2V, T2Q)); + T2W = BYTW(&(W[TWVL * 4]), VFNMSI(T2V, T2Q)); + ST(&(x[WS(vs, 5) + WS(rs, 2)]), T1m, ms, &(x[WS(vs, 5)])); + ST(&(x[WS(vs, 3) + WS(rs, 2)]), T1l, ms, &(x[WS(vs, 3)])); + ST(&(x[WS(vs, 5) + WS(rs, 6)]), T3u, ms, &(x[WS(vs, 5)])); + ST(&(x[WS(vs, 3) + WS(rs, 6)]), T3t, ms, &(x[WS(vs, 3)])); + } + ST(&(x[WS(vs, 5) + WS(rs, 5)]), T2X, ms, &(x[WS(vs, 5) + WS(rs, 1)])); + ST(&(x[WS(vs, 3) + WS(rs, 5)]), T2W, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + } + } + { + V T3y, T3x, T1q, T1p; + T1q = BYTW(&(W[TWVL * 12]), VFNMSI(T1o, T1n)); + T1p = BYTW(&(W[0]), VFMAI(T1o, T1n)); + { + V Tm, Tl, T2u, T2t; + Tm = BYTW(&(W[TWVL * 12]), VFNMSI(Tk, Tj)); + Tl = BYTW(&(W[0]), VFMAI(Tk, Tj)); + T2u = BYTW(&(W[TWVL * 12]), VFNMSI(T2s, T2r)); + T2t = BYTW(&(W[0]), VFMAI(T2s, T2r)); + ST(&(x[WS(vs, 7) + WS(rs, 2)]), T1q, ms, &(x[WS(vs, 7)])); + ST(&(x[WS(vs, 1) + WS(rs, 2)]), T1p, ms, &(x[WS(vs, 1)])); + T3y = BYTW(&(W[TWVL * 12]), VFNMSI(T3w, T3v)); + T3x = BYTW(&(W[0]), VFMAI(T3w, T3v)); + ST(&(x[WS(vs, 7)]), Tm, ms, &(x[WS(vs, 7)])); + ST(&(x[WS(vs, 1)]), Tl, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 7) + WS(rs, 4)]), T2u, ms, &(x[WS(vs, 7)])); + ST(&(x[WS(vs, 1) + WS(rs, 4)]), T2t, ms, &(x[WS(vs, 1)])); + } + ST(&(x[WS(vs, 7) + WS(rs, 6)]), T3y, ms, &(x[WS(vs, 7)])); + ST(&(x[WS(vs, 1) + WS(rs, 6)]), T3x, ms, &(x[WS(vs, 1)])); + TT = BYTW(&(W[TWVL * 12]), VFNMSI(TR, TQ)); + TS = BYTW(&(W[0]), VFMAI(TR, TQ)); + } + } + } + } + } + { + V T1X, T1W, T31, T30; + T1X = BYTW(&(W[TWVL * 12]), VFNMSI(T1V, T1U)); + T1W = BYTW(&(W[0]), VFMAI(T1V, T1U)); + T31 = BYTW(&(W[TWVL * 12]), VFNMSI(T2Z, T2Y)); + T30 = BYTW(&(W[0]), VFMAI(T2Z, T2Y)); + ST(&(x[WS(vs, 7) + WS(rs, 1)]), TT, ms, &(x[WS(vs, 7) + WS(rs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 1)]), TS, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + T45 = BYTW(&(W[TWVL * 12]), VFNMSI(T43, T42)); + T44 = BYTW(&(W[0]), VFMAI(T43, T42)); + ST(&(x[WS(vs, 7) + WS(rs, 3)]), T1X, ms, &(x[WS(vs, 7) + WS(rs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 3)]), T1W, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[WS(vs, 7) + WS(rs, 5)]), T31, ms, &(x[WS(vs, 7) + WS(rs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 5)]), T30, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + } + ST(&(x[WS(vs, 7) + WS(rs, 7)]), T45, ms, &(x[WS(vs, 7) + WS(rs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 7)]), T44, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("q1bv_8"), twinstr, &GENUS, {184, 112, 80, 0}, 0, 0, 0 }; + +void XSIMD(codelet_q1bv_8) (planner *p) { + X(kdft_difsq_register) (p, q1bv_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twidsq_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 8 -dif -name q1bv_8 -include q1b.h -sign 1 */ + +/* + * This function contains 264 FP additions, 128 FP multiplications, + * (or, 264 additions, 128 multiplications, 0 fused multiply/add), + * 77 stack variables, 1 constants, and 128 memory accesses + */ +#include "q1b.h" + +static void q1bv_8(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 14)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(16, vs)) { + V Ta, Tv, Te, Tp, T1L, T26, T1P, T20, T2i, T2D, T2m, T2x, T3T, T4e, T3X; + V T48, TH, T12, TL, TW, T1e, T1z, T1i, T1t, T2P, T3a, T2T, T34, T3m, T3H; + V T3q, T3B, T7, Tw, Tf, Ts, T1I, T27, T1Q, T23, T2f, T2E, T2n, T2A, T3Q; + V T4f, T3Y, T4b, TE, T13, TM, TZ, T1b, T1A, T1j, T1w, T2M, T3b, T2U, T37; + V T3j, T3I, T3r, T3E, T28, T14; + { + V T8, T9, To, Tc, Td, Tn; + T8 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T9 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + To = VADD(T8, T9); + Tc = LD(&(x[0]), ms, &(x[0])); + Td = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tn = VADD(Tc, Td); + Ta = VSUB(T8, T9); + Tv = VADD(Tn, To); + Te = VSUB(Tc, Td); + Tp = VSUB(Tn, To); + } + { + V T1J, T1K, T1Z, T1N, T1O, T1Y; + T1J = LD(&(x[WS(vs, 3) + WS(rs, 2)]), ms, &(x[WS(vs, 3)])); + T1K = LD(&(x[WS(vs, 3) + WS(rs, 6)]), ms, &(x[WS(vs, 3)])); + T1Z = VADD(T1J, T1K); + T1N = LD(&(x[WS(vs, 3)]), ms, &(x[WS(vs, 3)])); + T1O = LD(&(x[WS(vs, 3) + WS(rs, 4)]), ms, &(x[WS(vs, 3)])); + T1Y = VADD(T1N, T1O); + T1L = VSUB(T1J, T1K); + T26 = VADD(T1Y, T1Z); + T1P = VSUB(T1N, T1O); + T20 = VSUB(T1Y, T1Z); + } + { + V T2g, T2h, T2w, T2k, T2l, T2v; + T2g = LD(&(x[WS(vs, 4) + WS(rs, 2)]), ms, &(x[WS(vs, 4)])); + T2h = LD(&(x[WS(vs, 4) + WS(rs, 6)]), ms, &(x[WS(vs, 4)])); + T2w = VADD(T2g, T2h); + T2k = LD(&(x[WS(vs, 4)]), ms, &(x[WS(vs, 4)])); + T2l = LD(&(x[WS(vs, 4) + WS(rs, 4)]), ms, &(x[WS(vs, 4)])); + T2v = VADD(T2k, T2l); + T2i = VSUB(T2g, T2h); + T2D = VADD(T2v, T2w); + T2m = VSUB(T2k, T2l); + T2x = VSUB(T2v, T2w); + } + { + V T3R, T3S, T47, T3V, T3W, T46; + T3R = LD(&(x[WS(vs, 7) + WS(rs, 2)]), ms, &(x[WS(vs, 7)])); + T3S = LD(&(x[WS(vs, 7) + WS(rs, 6)]), ms, &(x[WS(vs, 7)])); + T47 = VADD(T3R, T3S); + T3V = LD(&(x[WS(vs, 7)]), ms, &(x[WS(vs, 7)])); + T3W = LD(&(x[WS(vs, 7) + WS(rs, 4)]), ms, &(x[WS(vs, 7)])); + T46 = VADD(T3V, T3W); + T3T = VSUB(T3R, T3S); + T4e = VADD(T46, T47); + T3X = VSUB(T3V, T3W); + T48 = VSUB(T46, T47); + } + { + V TF, TG, TV, TJ, TK, TU; + TF = LD(&(x[WS(vs, 1) + WS(rs, 2)]), ms, &(x[WS(vs, 1)])); + TG = LD(&(x[WS(vs, 1) + WS(rs, 6)]), ms, &(x[WS(vs, 1)])); + TV = VADD(TF, TG); + TJ = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); + TK = LD(&(x[WS(vs, 1) + WS(rs, 4)]), ms, &(x[WS(vs, 1)])); + TU = VADD(TJ, TK); + TH = VSUB(TF, TG); + T12 = VADD(TU, TV); + TL = VSUB(TJ, TK); + TW = VSUB(TU, TV); + } + { + V T1c, T1d, T1s, T1g, T1h, T1r; + T1c = LD(&(x[WS(vs, 2) + WS(rs, 2)]), ms, &(x[WS(vs, 2)])); + T1d = LD(&(x[WS(vs, 2) + WS(rs, 6)]), ms, &(x[WS(vs, 2)])); + T1s = VADD(T1c, T1d); + T1g = LD(&(x[WS(vs, 2)]), ms, &(x[WS(vs, 2)])); + T1h = LD(&(x[WS(vs, 2) + WS(rs, 4)]), ms, &(x[WS(vs, 2)])); + T1r = VADD(T1g, T1h); + T1e = VSUB(T1c, T1d); + T1z = VADD(T1r, T1s); + T1i = VSUB(T1g, T1h); + T1t = VSUB(T1r, T1s); + } + { + V T2N, T2O, T33, T2R, T2S, T32; + T2N = LD(&(x[WS(vs, 5) + WS(rs, 2)]), ms, &(x[WS(vs, 5)])); + T2O = LD(&(x[WS(vs, 5) + WS(rs, 6)]), ms, &(x[WS(vs, 5)])); + T33 = VADD(T2N, T2O); + T2R = LD(&(x[WS(vs, 5)]), ms, &(x[WS(vs, 5)])); + T2S = LD(&(x[WS(vs, 5) + WS(rs, 4)]), ms, &(x[WS(vs, 5)])); + T32 = VADD(T2R, T2S); + T2P = VSUB(T2N, T2O); + T3a = VADD(T32, T33); + T2T = VSUB(T2R, T2S); + T34 = VSUB(T32, T33); + } + { + V T3k, T3l, T3A, T3o, T3p, T3z; + T3k = LD(&(x[WS(vs, 6) + WS(rs, 2)]), ms, &(x[WS(vs, 6)])); + T3l = LD(&(x[WS(vs, 6) + WS(rs, 6)]), ms, &(x[WS(vs, 6)])); + T3A = VADD(T3k, T3l); + T3o = LD(&(x[WS(vs, 6)]), ms, &(x[WS(vs, 6)])); + T3p = LD(&(x[WS(vs, 6) + WS(rs, 4)]), ms, &(x[WS(vs, 6)])); + T3z = VADD(T3o, T3p); + T3m = VSUB(T3k, T3l); + T3H = VADD(T3z, T3A); + T3q = VSUB(T3o, T3p); + T3B = VSUB(T3z, T3A); + } + { + V T3, Tq, T6, Tr; + { + V T1, T2, T4, T5; + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T3 = VSUB(T1, T2); + Tq = VADD(T1, T2); + T4 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T5 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T6 = VSUB(T4, T5); + Tr = VADD(T4, T5); + } + T7 = VMUL(LDK(KP707106781), VSUB(T3, T6)); + Tw = VADD(Tq, Tr); + Tf = VMUL(LDK(KP707106781), VADD(T3, T6)); + Ts = VBYI(VSUB(Tq, Tr)); + } + { + V T1E, T21, T1H, T22; + { + V T1C, T1D, T1F, T1G; + T1C = LD(&(x[WS(vs, 3) + WS(rs, 1)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T1D = LD(&(x[WS(vs, 3) + WS(rs, 5)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T1E = VSUB(T1C, T1D); + T21 = VADD(T1C, T1D); + T1F = LD(&(x[WS(vs, 3) + WS(rs, 7)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T1G = LD(&(x[WS(vs, 3) + WS(rs, 3)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T1H = VSUB(T1F, T1G); + T22 = VADD(T1F, T1G); + } + T1I = VMUL(LDK(KP707106781), VSUB(T1E, T1H)); + T27 = VADD(T21, T22); + T1Q = VMUL(LDK(KP707106781), VADD(T1E, T1H)); + T23 = VBYI(VSUB(T21, T22)); + } + { + V T2b, T2y, T2e, T2z; + { + V T29, T2a, T2c, T2d; + T29 = LD(&(x[WS(vs, 4) + WS(rs, 1)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T2a = LD(&(x[WS(vs, 4) + WS(rs, 5)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T2b = VSUB(T29, T2a); + T2y = VADD(T29, T2a); + T2c = LD(&(x[WS(vs, 4) + WS(rs, 7)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T2d = LD(&(x[WS(vs, 4) + WS(rs, 3)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T2e = VSUB(T2c, T2d); + T2z = VADD(T2c, T2d); + } + T2f = VMUL(LDK(KP707106781), VSUB(T2b, T2e)); + T2E = VADD(T2y, T2z); + T2n = VMUL(LDK(KP707106781), VADD(T2b, T2e)); + T2A = VBYI(VSUB(T2y, T2z)); + } + { + V T3M, T49, T3P, T4a; + { + V T3K, T3L, T3N, T3O; + T3K = LD(&(x[WS(vs, 7) + WS(rs, 1)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); + T3L = LD(&(x[WS(vs, 7) + WS(rs, 5)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); + T3M = VSUB(T3K, T3L); + T49 = VADD(T3K, T3L); + T3N = LD(&(x[WS(vs, 7) + WS(rs, 7)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); + T3O = LD(&(x[WS(vs, 7) + WS(rs, 3)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); + T3P = VSUB(T3N, T3O); + T4a = VADD(T3N, T3O); + } + T3Q = VMUL(LDK(KP707106781), VSUB(T3M, T3P)); + T4f = VADD(T49, T4a); + T3Y = VMUL(LDK(KP707106781), VADD(T3M, T3P)); + T4b = VBYI(VSUB(T49, T4a)); + } + { + V TA, TX, TD, TY; + { + V Ty, Tz, TB, TC; + Ty = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + Tz = LD(&(x[WS(vs, 1) + WS(rs, 5)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + TA = VSUB(Ty, Tz); + TX = VADD(Ty, Tz); + TB = LD(&(x[WS(vs, 1) + WS(rs, 7)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + TC = LD(&(x[WS(vs, 1) + WS(rs, 3)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + TD = VSUB(TB, TC); + TY = VADD(TB, TC); + } + TE = VMUL(LDK(KP707106781), VSUB(TA, TD)); + T13 = VADD(TX, TY); + TM = VMUL(LDK(KP707106781), VADD(TA, TD)); + TZ = VBYI(VSUB(TX, TY)); + } + { + V T17, T1u, T1a, T1v; + { + V T15, T16, T18, T19; + T15 = LD(&(x[WS(vs, 2) + WS(rs, 1)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T16 = LD(&(x[WS(vs, 2) + WS(rs, 5)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T17 = VSUB(T15, T16); + T1u = VADD(T15, T16); + T18 = LD(&(x[WS(vs, 2) + WS(rs, 7)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T19 = LD(&(x[WS(vs, 2) + WS(rs, 3)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T1a = VSUB(T18, T19); + T1v = VADD(T18, T19); + } + T1b = VMUL(LDK(KP707106781), VSUB(T17, T1a)); + T1A = VADD(T1u, T1v); + T1j = VMUL(LDK(KP707106781), VADD(T17, T1a)); + T1w = VBYI(VSUB(T1u, T1v)); + } + { + V T2I, T35, T2L, T36; + { + V T2G, T2H, T2J, T2K; + T2G = LD(&(x[WS(vs, 5) + WS(rs, 1)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); + T2H = LD(&(x[WS(vs, 5) + WS(rs, 5)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); + T2I = VSUB(T2G, T2H); + T35 = VADD(T2G, T2H); + T2J = LD(&(x[WS(vs, 5) + WS(rs, 7)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); + T2K = LD(&(x[WS(vs, 5) + WS(rs, 3)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); + T2L = VSUB(T2J, T2K); + T36 = VADD(T2J, T2K); + } + T2M = VMUL(LDK(KP707106781), VSUB(T2I, T2L)); + T3b = VADD(T35, T36); + T2U = VMUL(LDK(KP707106781), VADD(T2I, T2L)); + T37 = VBYI(VSUB(T35, T36)); + } + { + V T3f, T3C, T3i, T3D; + { + V T3d, T3e, T3g, T3h; + T3d = LD(&(x[WS(vs, 6) + WS(rs, 1)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T3e = LD(&(x[WS(vs, 6) + WS(rs, 5)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T3f = VSUB(T3d, T3e); + T3C = VADD(T3d, T3e); + T3g = LD(&(x[WS(vs, 6) + WS(rs, 7)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T3h = LD(&(x[WS(vs, 6) + WS(rs, 3)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T3i = VSUB(T3g, T3h); + T3D = VADD(T3g, T3h); + } + T3j = VMUL(LDK(KP707106781), VSUB(T3f, T3i)); + T3I = VADD(T3C, T3D); + T3r = VMUL(LDK(KP707106781), VADD(T3f, T3i)); + T3E = VBYI(VSUB(T3C, T3D)); + } + ST(&(x[0]), VADD(Tv, Tw), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T1z, T1A), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VADD(T3a, T3b), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(T4e, T4f), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VADD(T3H, T3I), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(T2D, T2E), ms, &(x[0])); + { + V Tt, T4c, T2B, T24; + ST(&(x[WS(rs, 3)]), VADD(T26, T27), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T12, T13), ms, &(x[WS(rs, 1)])); + Tt = BYTW(&(W[TWVL * 10]), VSUB(Tp, Ts)); + ST(&(x[WS(vs, 6)]), Tt, ms, &(x[WS(vs, 6)])); + T4c = BYTW(&(W[TWVL * 10]), VSUB(T48, T4b)); + ST(&(x[WS(vs, 6) + WS(rs, 7)]), T4c, ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T2B = BYTW(&(W[TWVL * 10]), VSUB(T2x, T2A)); + ST(&(x[WS(vs, 6) + WS(rs, 4)]), T2B, ms, &(x[WS(vs, 6)])); + T24 = BYTW(&(W[TWVL * 10]), VSUB(T20, T23)); + ST(&(x[WS(vs, 6) + WS(rs, 3)]), T24, ms, &(x[WS(vs, 6) + WS(rs, 1)])); + } + { + V T10, T1x, T3F, T38, T1y, Tu; + T10 = BYTW(&(W[TWVL * 10]), VSUB(TW, TZ)); + ST(&(x[WS(vs, 6) + WS(rs, 1)]), T10, ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T1x = BYTW(&(W[TWVL * 10]), VSUB(T1t, T1w)); + ST(&(x[WS(vs, 6) + WS(rs, 2)]), T1x, ms, &(x[WS(vs, 6)])); + T3F = BYTW(&(W[TWVL * 10]), VSUB(T3B, T3E)); + ST(&(x[WS(vs, 6) + WS(rs, 6)]), T3F, ms, &(x[WS(vs, 6)])); + T38 = BYTW(&(W[TWVL * 10]), VSUB(T34, T37)); + ST(&(x[WS(vs, 6) + WS(rs, 5)]), T38, ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T1y = BYTW(&(W[TWVL * 2]), VADD(T1t, T1w)); + ST(&(x[WS(vs, 2) + WS(rs, 2)]), T1y, ms, &(x[WS(vs, 2)])); + Tu = BYTW(&(W[TWVL * 2]), VADD(Tp, Ts)); + ST(&(x[WS(vs, 2)]), Tu, ms, &(x[WS(vs, 2)])); + } + { + V T2C, T3G, T11, T25, T39, T4d; + T2C = BYTW(&(W[TWVL * 2]), VADD(T2x, T2A)); + ST(&(x[WS(vs, 2) + WS(rs, 4)]), T2C, ms, &(x[WS(vs, 2)])); + T3G = BYTW(&(W[TWVL * 2]), VADD(T3B, T3E)); + ST(&(x[WS(vs, 2) + WS(rs, 6)]), T3G, ms, &(x[WS(vs, 2)])); + T11 = BYTW(&(W[TWVL * 2]), VADD(TW, TZ)); + ST(&(x[WS(vs, 2) + WS(rs, 1)]), T11, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T25 = BYTW(&(W[TWVL * 2]), VADD(T20, T23)); + ST(&(x[WS(vs, 2) + WS(rs, 3)]), T25, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T39 = BYTW(&(W[TWVL * 2]), VADD(T34, T37)); + ST(&(x[WS(vs, 2) + WS(rs, 5)]), T39, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T4d = BYTW(&(W[TWVL * 2]), VADD(T48, T4b)); + ST(&(x[WS(vs, 2) + WS(rs, 7)]), T4d, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + } + { + V Tx, T1B, T3c, T4g, T3J, T2F; + Tx = BYTW(&(W[TWVL * 6]), VSUB(Tv, Tw)); + ST(&(x[WS(vs, 4)]), Tx, ms, &(x[WS(vs, 4)])); + T1B = BYTW(&(W[TWVL * 6]), VSUB(T1z, T1A)); + ST(&(x[WS(vs, 4) + WS(rs, 2)]), T1B, ms, &(x[WS(vs, 4)])); + T3c = BYTW(&(W[TWVL * 6]), VSUB(T3a, T3b)); + ST(&(x[WS(vs, 4) + WS(rs, 5)]), T3c, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T4g = BYTW(&(W[TWVL * 6]), VSUB(T4e, T4f)); + ST(&(x[WS(vs, 4) + WS(rs, 7)]), T4g, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T3J = BYTW(&(W[TWVL * 6]), VSUB(T3H, T3I)); + ST(&(x[WS(vs, 4) + WS(rs, 6)]), T3J, ms, &(x[WS(vs, 4)])); + T2F = BYTW(&(W[TWVL * 6]), VSUB(T2D, T2E)); + ST(&(x[WS(vs, 4) + WS(rs, 4)]), T2F, ms, &(x[WS(vs, 4)])); + } + T28 = BYTW(&(W[TWVL * 6]), VSUB(T26, T27)); + ST(&(x[WS(vs, 4) + WS(rs, 3)]), T28, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T14 = BYTW(&(W[TWVL * 6]), VSUB(T12, T13)); + ST(&(x[WS(vs, 4) + WS(rs, 1)]), T14, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + { + V Th, Ti, Tb, Tg; + Tb = VBYI(VSUB(T7, Ta)); + Tg = VSUB(Te, Tf); + Th = BYTW(&(W[TWVL * 4]), VADD(Tb, Tg)); + Ti = BYTW(&(W[TWVL * 8]), VSUB(Tg, Tb)); + ST(&(x[WS(vs, 3)]), Th, ms, &(x[WS(vs, 3)])); + ST(&(x[WS(vs, 5)]), Ti, ms, &(x[WS(vs, 5)])); + } + { + V T40, T41, T3U, T3Z; + T3U = VBYI(VSUB(T3Q, T3T)); + T3Z = VSUB(T3X, T3Y); + T40 = BYTW(&(W[TWVL * 4]), VADD(T3U, T3Z)); + T41 = BYTW(&(W[TWVL * 8]), VSUB(T3Z, T3U)); + ST(&(x[WS(vs, 3) + WS(rs, 7)]), T40, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + ST(&(x[WS(vs, 5) + WS(rs, 7)]), T41, ms, &(x[WS(vs, 5) + WS(rs, 1)])); + } + { + V T2p, T2q, T2j, T2o; + T2j = VBYI(VSUB(T2f, T2i)); + T2o = VSUB(T2m, T2n); + T2p = BYTW(&(W[TWVL * 4]), VADD(T2j, T2o)); + T2q = BYTW(&(W[TWVL * 8]), VSUB(T2o, T2j)); + ST(&(x[WS(vs, 3) + WS(rs, 4)]), T2p, ms, &(x[WS(vs, 3)])); + ST(&(x[WS(vs, 5) + WS(rs, 4)]), T2q, ms, &(x[WS(vs, 5)])); + } + { + V T1S, T1T, T1M, T1R; + T1M = VBYI(VSUB(T1I, T1L)); + T1R = VSUB(T1P, T1Q); + T1S = BYTW(&(W[TWVL * 4]), VADD(T1M, T1R)); + T1T = BYTW(&(W[TWVL * 8]), VSUB(T1R, T1M)); + ST(&(x[WS(vs, 3) + WS(rs, 3)]), T1S, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + ST(&(x[WS(vs, 5) + WS(rs, 3)]), T1T, ms, &(x[WS(vs, 5) + WS(rs, 1)])); + } + { + V TO, TP, TI, TN; + TI = VBYI(VSUB(TE, TH)); + TN = VSUB(TL, TM); + TO = BYTW(&(W[TWVL * 4]), VADD(TI, TN)); + TP = BYTW(&(W[TWVL * 8]), VSUB(TN, TI)); + ST(&(x[WS(vs, 3) + WS(rs, 1)]), TO, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + ST(&(x[WS(vs, 5) + WS(rs, 1)]), TP, ms, &(x[WS(vs, 5) + WS(rs, 1)])); + } + { + V T1l, T1m, T1f, T1k; + T1f = VBYI(VSUB(T1b, T1e)); + T1k = VSUB(T1i, T1j); + T1l = BYTW(&(W[TWVL * 4]), VADD(T1f, T1k)); + T1m = BYTW(&(W[TWVL * 8]), VSUB(T1k, T1f)); + ST(&(x[WS(vs, 3) + WS(rs, 2)]), T1l, ms, &(x[WS(vs, 3)])); + ST(&(x[WS(vs, 5) + WS(rs, 2)]), T1m, ms, &(x[WS(vs, 5)])); + } + { + V T3t, T3u, T3n, T3s; + T3n = VBYI(VSUB(T3j, T3m)); + T3s = VSUB(T3q, T3r); + T3t = BYTW(&(W[TWVL * 4]), VADD(T3n, T3s)); + T3u = BYTW(&(W[TWVL * 8]), VSUB(T3s, T3n)); + ST(&(x[WS(vs, 3) + WS(rs, 6)]), T3t, ms, &(x[WS(vs, 3)])); + ST(&(x[WS(vs, 5) + WS(rs, 6)]), T3u, ms, &(x[WS(vs, 5)])); + } + { + V T2W, T2X, T2Q, T2V; + T2Q = VBYI(VSUB(T2M, T2P)); + T2V = VSUB(T2T, T2U); + T2W = BYTW(&(W[TWVL * 4]), VADD(T2Q, T2V)); + T2X = BYTW(&(W[TWVL * 8]), VSUB(T2V, T2Q)); + ST(&(x[WS(vs, 3) + WS(rs, 5)]), T2W, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + ST(&(x[WS(vs, 5) + WS(rs, 5)]), T2X, ms, &(x[WS(vs, 5) + WS(rs, 1)])); + } + { + V T1p, T1q, T1n, T1o; + T1n = VBYI(VADD(T1e, T1b)); + T1o = VADD(T1i, T1j); + T1p = BYTW(&(W[0]), VADD(T1n, T1o)); + T1q = BYTW(&(W[TWVL * 12]), VSUB(T1o, T1n)); + ST(&(x[WS(vs, 1) + WS(rs, 2)]), T1p, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 7) + WS(rs, 2)]), T1q, ms, &(x[WS(vs, 7)])); + } + { + V Tl, Tm, Tj, Tk; + Tj = VBYI(VADD(Ta, T7)); + Tk = VADD(Te, Tf); + Tl = BYTW(&(W[0]), VADD(Tj, Tk)); + Tm = BYTW(&(W[TWVL * 12]), VSUB(Tk, Tj)); + ST(&(x[WS(vs, 1)]), Tl, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 7)]), Tm, ms, &(x[WS(vs, 7)])); + } + { + V T2t, T2u, T2r, T2s; + T2r = VBYI(VADD(T2i, T2f)); + T2s = VADD(T2m, T2n); + T2t = BYTW(&(W[0]), VADD(T2r, T2s)); + T2u = BYTW(&(W[TWVL * 12]), VSUB(T2s, T2r)); + ST(&(x[WS(vs, 1) + WS(rs, 4)]), T2t, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 7) + WS(rs, 4)]), T2u, ms, &(x[WS(vs, 7)])); + } + { + V T3x, T3y, T3v, T3w; + T3v = VBYI(VADD(T3m, T3j)); + T3w = VADD(T3q, T3r); + T3x = BYTW(&(W[0]), VADD(T3v, T3w)); + T3y = BYTW(&(W[TWVL * 12]), VSUB(T3w, T3v)); + ST(&(x[WS(vs, 1) + WS(rs, 6)]), T3x, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 7) + WS(rs, 6)]), T3y, ms, &(x[WS(vs, 7)])); + } + { + V TS, TT, TQ, TR; + TQ = VBYI(VADD(TH, TE)); + TR = VADD(TL, TM); + TS = BYTW(&(W[0]), VADD(TQ, TR)); + TT = BYTW(&(W[TWVL * 12]), VSUB(TR, TQ)); + ST(&(x[WS(vs, 1) + WS(rs, 1)]), TS, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[WS(vs, 7) + WS(rs, 1)]), TT, ms, &(x[WS(vs, 7) + WS(rs, 1)])); + } + { + V T1W, T1X, T1U, T1V; + T1U = VBYI(VADD(T1L, T1I)); + T1V = VADD(T1P, T1Q); + T1W = BYTW(&(W[0]), VADD(T1U, T1V)); + T1X = BYTW(&(W[TWVL * 12]), VSUB(T1V, T1U)); + ST(&(x[WS(vs, 1) + WS(rs, 3)]), T1W, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[WS(vs, 7) + WS(rs, 3)]), T1X, ms, &(x[WS(vs, 7) + WS(rs, 1)])); + } + { + V T30, T31, T2Y, T2Z; + T2Y = VBYI(VADD(T2P, T2M)); + T2Z = VADD(T2T, T2U); + T30 = BYTW(&(W[0]), VADD(T2Y, T2Z)); + T31 = BYTW(&(W[TWVL * 12]), VSUB(T2Z, T2Y)); + ST(&(x[WS(vs, 1) + WS(rs, 5)]), T30, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[WS(vs, 7) + WS(rs, 5)]), T31, ms, &(x[WS(vs, 7) + WS(rs, 1)])); + } + { + V T44, T45, T42, T43; + T42 = VBYI(VADD(T3T, T3Q)); + T43 = VADD(T3X, T3Y); + T44 = BYTW(&(W[0]), VADD(T42, T43)); + T45 = BYTW(&(W[TWVL * 12]), VSUB(T43, T42)); + ST(&(x[WS(vs, 1) + WS(rs, 7)]), T44, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[WS(vs, 7) + WS(rs, 7)]), T45, ms, &(x[WS(vs, 7) + WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("q1bv_8"), twinstr, &GENUS, {264, 128, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_q1bv_8) (planner *p) { + X(kdft_difsq_register) (p, q1bv_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/q1fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/q1fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:56 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twidsq_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 2 -dif -name q1fv_2 -include q1f.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 6 additions, 4 multiplications, 0 fused multiply/add), + * 8 stack variables, 0 constants, and 8 memory accesses + */ +#include "q1f.h" + +static void q1fv_2(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(4, rs), MAKE_VOLATILE_STRIDE(4, vs)) { + V T1, T2, T4, T5, T3, T6; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); + T5 = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[0]), VADD(T1, T2), ms, &(x[0])); + T3 = BYTWJ(&(W[0]), VSUB(T1, T2)); + ST(&(x[WS(rs, 1)]), VADD(T4, T5), ms, &(x[WS(rs, 1)])); + T6 = BYTWJ(&(W[0]), VSUB(T4, T5)); + ST(&(x[WS(vs, 1)]), T3, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 1)]), T6, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("q1fv_2"), twinstr, &GENUS, {6, 4, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_q1fv_2) (planner *p) { + X(kdft_difsq_register) (p, q1fv_2, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twidsq_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -dif -name q1fv_2 -include q1f.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 6 additions, 4 multiplications, 0 fused multiply/add), + * 8 stack variables, 0 constants, and 8 memory accesses + */ +#include "q1f.h" + +static void q1fv_2(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(4, rs), MAKE_VOLATILE_STRIDE(4, vs)) { + V T1, T2, T3, T4, T5, T6; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[0]), VSUB(T1, T2)); + T4 = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); + T5 = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + T6 = BYTWJ(&(W[0]), VSUB(T4, T5)); + ST(&(x[WS(vs, 1)]), T3, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 1)]), T6, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[0]), VADD(T1, T2), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VADD(T4, T5), ms, &(x[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("q1fv_2"), twinstr, &GENUS, {6, 4, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_q1fv_2) (planner *p) { + X(kdft_difsq_register) (p, q1fv_2, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/q1fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/q1fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:56 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twidsq_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 4 -dif -name q1fv_4 -include q1f.h */ + +/* + * This function contains 44 FP additions, 32 FP multiplications, + * (or, 36 additions, 24 multiplications, 8 fused multiply/add), + * 38 stack variables, 0 constants, and 32 memory accesses + */ +#include "q1f.h" + +static void q1fv_4(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(8, rs), MAKE_VOLATILE_STRIDE(8, vs)) { + V Tb, Tm, Tx, TI; + { + V Tc, T9, T3, TG, TA, TH, TD, Ta, T6, Td, Tn, To, Tq, Tr, Tf; + V Tg; + { + V T1, T2, Ty, Tz, TB, TC, T4, T5; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Ty = LD(&(x[WS(vs, 3)]), ms, &(x[WS(vs, 3)])); + Tz = LD(&(x[WS(vs, 3) + WS(rs, 2)]), ms, &(x[WS(vs, 3)])); + TB = LD(&(x[WS(vs, 3) + WS(rs, 1)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + TC = LD(&(x[WS(vs, 3) + WS(rs, 3)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T5 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Tc = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); + T9 = VADD(T1, T2); + T3 = VSUB(T1, T2); + TG = VADD(Ty, Tz); + TA = VSUB(Ty, Tz); + TH = VADD(TB, TC); + TD = VSUB(TB, TC); + Ta = VADD(T4, T5); + T6 = VSUB(T4, T5); + Td = LD(&(x[WS(vs, 1) + WS(rs, 2)]), ms, &(x[WS(vs, 1)])); + Tn = LD(&(x[WS(vs, 2)]), ms, &(x[WS(vs, 2)])); + To = LD(&(x[WS(vs, 2) + WS(rs, 2)]), ms, &(x[WS(vs, 2)])); + Tq = LD(&(x[WS(vs, 2) + WS(rs, 1)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + Tr = LD(&(x[WS(vs, 2) + WS(rs, 3)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + Tf = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + Tg = LD(&(x[WS(vs, 1) + WS(rs, 3)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + } + { + V Tk, Te, Tv, Tp, Tw, Ts, Tl, Th, T7, TE, Tu, TF; + ST(&(x[0]), VADD(T9, Ta), ms, &(x[0])); + Tk = VADD(Tc, Td); + Te = VSUB(Tc, Td); + Tv = VADD(Tn, To); + Tp = VSUB(Tn, To); + Tw = VADD(Tq, Tr); + Ts = VSUB(Tq, Tr); + Tl = VADD(Tf, Tg); + Th = VSUB(Tf, Tg); + ST(&(x[WS(rs, 3)]), VADD(TG, TH), ms, &(x[WS(rs, 1)])); + T7 = BYTWJ(&(W[0]), VFNMSI(T6, T3)); + TE = BYTWJ(&(W[0]), VFNMSI(TD, TA)); + { + V Tt, Ti, Tj, T8; + T8 = BYTWJ(&(W[TWVL * 4]), VFMAI(T6, T3)); + ST(&(x[WS(rs, 2)]), VADD(Tv, Tw), ms, &(x[0])); + Tt = BYTWJ(&(W[0]), VFNMSI(Ts, Tp)); + ST(&(x[WS(rs, 1)]), VADD(Tk, Tl), ms, &(x[WS(rs, 1)])); + Ti = BYTWJ(&(W[0]), VFNMSI(Th, Te)); + Tj = BYTWJ(&(W[TWVL * 4]), VFMAI(Th, Te)); + ST(&(x[WS(vs, 1)]), T7, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 3)]), TE, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[WS(vs, 3)]), T8, ms, &(x[WS(vs, 3)])); + Tu = BYTWJ(&(W[TWVL * 4]), VFMAI(Ts, Tp)); + ST(&(x[WS(vs, 1) + WS(rs, 2)]), Tt, ms, &(x[WS(vs, 1)])); + TF = BYTWJ(&(W[TWVL * 4]), VFMAI(TD, TA)); + ST(&(x[WS(vs, 1) + WS(rs, 1)]), Ti, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[WS(vs, 3) + WS(rs, 1)]), Tj, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + } + Tb = BYTWJ(&(W[TWVL * 2]), VSUB(T9, Ta)); + Tm = BYTWJ(&(W[TWVL * 2]), VSUB(Tk, Tl)); + Tx = BYTWJ(&(W[TWVL * 2]), VSUB(Tv, Tw)); + ST(&(x[WS(vs, 3) + WS(rs, 2)]), Tu, ms, &(x[WS(vs, 3)])); + TI = BYTWJ(&(W[TWVL * 2]), VSUB(TG, TH)); + ST(&(x[WS(vs, 3) + WS(rs, 3)]), TF, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + } + } + ST(&(x[WS(vs, 2)]), Tb, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 2) + WS(rs, 1)]), Tm, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + ST(&(x[WS(vs, 2) + WS(rs, 2)]), Tx, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 2) + WS(rs, 3)]), TI, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("q1fv_4"), twinstr, &GENUS, {36, 24, 8, 0}, 0, 0, 0 }; + +void XSIMD(codelet_q1fv_4) (planner *p) { + X(kdft_difsq_register) (p, q1fv_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twidsq_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 4 -dif -name q1fv_4 -include q1f.h */ + +/* + * This function contains 44 FP additions, 24 FP multiplications, + * (or, 44 additions, 24 multiplications, 0 fused multiply/add), + * 22 stack variables, 0 constants, and 32 memory accesses + */ +#include "q1f.h" + +static void q1fv_4(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(8, rs), MAKE_VOLATILE_STRIDE(8, vs)) { + V T3, T9, TA, TG, TD, TH, T6, Ta, Te, Tk, Tp, Tv, Ts, Tw, Th; + V Tl; + { + V T1, T2, Ty, Tz; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T3 = VSUB(T1, T2); + T9 = VADD(T1, T2); + Ty = LD(&(x[WS(vs, 3)]), ms, &(x[WS(vs, 3)])); + Tz = LD(&(x[WS(vs, 3) + WS(rs, 2)]), ms, &(x[WS(vs, 3)])); + TA = VSUB(Ty, Tz); + TG = VADD(Ty, Tz); + } + { + V TB, TC, T4, T5; + TB = LD(&(x[WS(vs, 3) + WS(rs, 1)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + TC = LD(&(x[WS(vs, 3) + WS(rs, 3)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + TD = VBYI(VSUB(TB, TC)); + TH = VADD(TB, TC); + T4 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T5 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T6 = VBYI(VSUB(T4, T5)); + Ta = VADD(T4, T5); + } + { + V Tc, Td, Tn, To; + Tc = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); + Td = LD(&(x[WS(vs, 1) + WS(rs, 2)]), ms, &(x[WS(vs, 1)])); + Te = VSUB(Tc, Td); + Tk = VADD(Tc, Td); + Tn = LD(&(x[WS(vs, 2)]), ms, &(x[WS(vs, 2)])); + To = LD(&(x[WS(vs, 2) + WS(rs, 2)]), ms, &(x[WS(vs, 2)])); + Tp = VSUB(Tn, To); + Tv = VADD(Tn, To); + } + { + V Tq, Tr, Tf, Tg; + Tq = LD(&(x[WS(vs, 2) + WS(rs, 1)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + Tr = LD(&(x[WS(vs, 2) + WS(rs, 3)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + Ts = VBYI(VSUB(Tq, Tr)); + Tw = VADD(Tq, Tr); + Tf = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + Tg = LD(&(x[WS(vs, 1) + WS(rs, 3)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + Th = VBYI(VSUB(Tf, Tg)); + Tl = VADD(Tf, Tg); + } + ST(&(x[0]), VADD(T9, Ta), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VADD(Tk, Tl), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VADD(Tv, Tw), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VADD(TG, TH), ms, &(x[WS(rs, 1)])); + { + V T7, Ti, Tt, TE; + T7 = BYTWJ(&(W[0]), VSUB(T3, T6)); + ST(&(x[WS(vs, 1)]), T7, ms, &(x[WS(vs, 1)])); + Ti = BYTWJ(&(W[0]), VSUB(Te, Th)); + ST(&(x[WS(vs, 1) + WS(rs, 1)]), Ti, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + Tt = BYTWJ(&(W[0]), VSUB(Tp, Ts)); + ST(&(x[WS(vs, 1) + WS(rs, 2)]), Tt, ms, &(x[WS(vs, 1)])); + TE = BYTWJ(&(W[0]), VSUB(TA, TD)); + ST(&(x[WS(vs, 1) + WS(rs, 3)]), TE, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + } + { + V T8, Tj, Tu, TF; + T8 = BYTWJ(&(W[TWVL * 4]), VADD(T3, T6)); + ST(&(x[WS(vs, 3)]), T8, ms, &(x[WS(vs, 3)])); + Tj = BYTWJ(&(W[TWVL * 4]), VADD(Te, Th)); + ST(&(x[WS(vs, 3) + WS(rs, 1)]), Tj, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + Tu = BYTWJ(&(W[TWVL * 4]), VADD(Tp, Ts)); + ST(&(x[WS(vs, 3) + WS(rs, 2)]), Tu, ms, &(x[WS(vs, 3)])); + TF = BYTWJ(&(W[TWVL * 4]), VADD(TA, TD)); + ST(&(x[WS(vs, 3) + WS(rs, 3)]), TF, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + } + { + V Tb, Tm, Tx, TI; + Tb = BYTWJ(&(W[TWVL * 2]), VSUB(T9, Ta)); + ST(&(x[WS(vs, 2)]), Tb, ms, &(x[WS(vs, 2)])); + Tm = BYTWJ(&(W[TWVL * 2]), VSUB(Tk, Tl)); + ST(&(x[WS(vs, 2) + WS(rs, 1)]), Tm, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + Tx = BYTWJ(&(W[TWVL * 2]), VSUB(Tv, Tw)); + ST(&(x[WS(vs, 2) + WS(rs, 2)]), Tx, ms, &(x[WS(vs, 2)])); + TI = BYTWJ(&(W[TWVL * 2]), VSUB(TG, TH)); + ST(&(x[WS(vs, 2) + WS(rs, 3)]), TI, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("q1fv_4"), twinstr, &GENUS, {44, 24, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_q1fv_4) (planner *p) { + X(kdft_difsq_register) (p, q1fv_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/q1fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/q1fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,439 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:56 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twidsq_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 5 -dif -name q1fv_5 -include q1f.h */ + +/* + * This function contains 100 FP additions, 95 FP multiplications, + * (or, 55 additions, 50 multiplications, 45 fused multiply/add), + * 69 stack variables, 4 constants, and 50 memory accesses + */ +#include "q1f.h" + +static void q1fv_5(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(10, rs), MAKE_VOLATILE_STRIDE(10, vs)) { + V Te, T1w, Ty, TS, TW, Tb, T1t, Tv, T1g, T1c, TP, TV, T1f, T19, TY; + V TX; + { + V T1, T1j, Tl, Ti, Ta, T8, T1A, T1q, T1s, T9, TF, T1r, TZ, TR, TL; + V TC, Ts, Tu, TQ, TI, T15, T1b, T10, T11, Tt; + { + V T1n, T1o, T1k, T1l, T7, Td, T4, Tc; + { + V T5, T6, T2, T3; + T1 = LD(&(x[0]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T6 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T1j = LD(&(x[WS(vs, 4)]), ms, &(x[WS(vs, 4)])); + T1n = LD(&(x[WS(vs, 4) + WS(rs, 2)]), ms, &(x[WS(vs, 4)])); + T1o = LD(&(x[WS(vs, 4) + WS(rs, 3)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T1k = LD(&(x[WS(vs, 4) + WS(rs, 1)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T1l = LD(&(x[WS(vs, 4) + WS(rs, 4)]), ms, &(x[WS(vs, 4)])); + T7 = VADD(T5, T6); + Td = VSUB(T5, T6); + T4 = VADD(T2, T3); + Tc = VSUB(T2, T3); + } + { + V Tm, Tn, Tr, Tx, T1v, T1p; + Tl = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); + T1v = VSUB(T1n, T1o); + T1p = VADD(T1n, T1o); + { + V T1u, T1m, Tp, Tq; + T1u = VSUB(T1k, T1l); + T1m = VADD(T1k, T1l); + Tp = LD(&(x[WS(vs, 1) + WS(rs, 2)]), ms, &(x[WS(vs, 1)])); + Ti = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tc, Td)); + Te = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Td, Tc)); + Ta = VSUB(T4, T7); + T8 = VADD(T4, T7); + Tq = LD(&(x[WS(vs, 1) + WS(rs, 3)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + T1w = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1v, T1u)); + T1A = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1u, T1v)); + T1q = VADD(T1m, T1p); + T1s = VSUB(T1m, T1p); + Tm = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + T9 = VFNMS(LDK(KP250000000), T8, T1); + Tn = LD(&(x[WS(vs, 1) + WS(rs, 4)]), ms, &(x[WS(vs, 1)])); + Tr = VADD(Tp, Tq); + Tx = VSUB(Tp, Tq); + } + { + V TJ, TK, TG, Tw, To, TH, T13, T14; + TF = LD(&(x[WS(vs, 2)]), ms, &(x[WS(vs, 2)])); + T1r = VFNMS(LDK(KP250000000), T1q, T1j); + TJ = LD(&(x[WS(vs, 2) + WS(rs, 2)]), ms, &(x[WS(vs, 2)])); + TK = LD(&(x[WS(vs, 2) + WS(rs, 3)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + TG = LD(&(x[WS(vs, 2) + WS(rs, 1)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + Tw = VSUB(Tm, Tn); + To = VADD(Tm, Tn); + TH = LD(&(x[WS(vs, 2) + WS(rs, 4)]), ms, &(x[WS(vs, 2)])); + TZ = LD(&(x[WS(vs, 3)]), ms, &(x[WS(vs, 3)])); + T13 = LD(&(x[WS(vs, 3) + WS(rs, 2)]), ms, &(x[WS(vs, 3)])); + T14 = LD(&(x[WS(vs, 3) + WS(rs, 3)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + TR = VSUB(TJ, TK); + TL = VADD(TJ, TK); + Ty = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tx, Tw)); + TC = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tw, Tx)); + Ts = VADD(To, Tr); + Tu = VSUB(To, Tr); + TQ = VSUB(TG, TH); + TI = VADD(TG, TH); + T15 = VADD(T13, T14); + T1b = VSUB(T13, T14); + T10 = LD(&(x[WS(vs, 3) + WS(rs, 1)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T11 = LD(&(x[WS(vs, 3) + WS(rs, 4)]), ms, &(x[WS(vs, 3)])); + Tt = VFNMS(LDK(KP250000000), Ts, Tl); + } + } + } + { + V TO, T12, T1a, Th, T1z, TN, TM, T18, T17; + ST(&(x[0]), VADD(T1, T8), ms, &(x[0])); + TS = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TR, TQ)); + TW = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TQ, TR)); + TM = VADD(TI, TL); + TO = VSUB(TI, TL); + ST(&(x[WS(rs, 4)]), VADD(T1j, T1q), ms, &(x[0])); + T12 = VADD(T10, T11); + T1a = VSUB(T10, T11); + ST(&(x[WS(rs, 1)]), VADD(Tl, Ts), ms, &(x[WS(rs, 1)])); + Th = VFNMS(LDK(KP559016994), Ta, T9); + Tb = VFMA(LDK(KP559016994), Ta, T9); + T1t = VFMA(LDK(KP559016994), T1s, T1r); + T1z = VFNMS(LDK(KP559016994), T1s, T1r); + ST(&(x[WS(rs, 2)]), VADD(TF, TM), ms, &(x[0])); + TN = VFNMS(LDK(KP250000000), TM, TF); + { + V T16, Tk, Tj, T1C, T1B, TD, TE, TB; + TB = VFNMS(LDK(KP559016994), Tu, Tt); + Tv = VFMA(LDK(KP559016994), Tu, Tt); + T1g = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1a, T1b)); + T1c = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1b, T1a)); + T18 = VSUB(T12, T15); + T16 = VADD(T12, T15); + Tk = BYTWJ(&(W[TWVL * 4]), VFNMSI(Ti, Th)); + Tj = BYTWJ(&(W[TWVL * 2]), VFMAI(Ti, Th)); + T1C = BYTWJ(&(W[TWVL * 4]), VFNMSI(T1A, T1z)); + T1B = BYTWJ(&(W[TWVL * 2]), VFMAI(T1A, T1z)); + TD = BYTWJ(&(W[TWVL * 2]), VFMAI(TC, TB)); + TE = BYTWJ(&(W[TWVL * 4]), VFNMSI(TC, TB)); + ST(&(x[WS(rs, 3)]), VADD(TZ, T16), ms, &(x[WS(rs, 1)])); + T17 = VFNMS(LDK(KP250000000), T16, TZ); + ST(&(x[WS(vs, 3)]), Tk, ms, &(x[WS(vs, 3)])); + ST(&(x[WS(vs, 2)]), Tj, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 3) + WS(rs, 4)]), T1C, ms, &(x[WS(vs, 3)])); + ST(&(x[WS(vs, 2) + WS(rs, 4)]), T1B, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 2) + WS(rs, 1)]), TD, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + ST(&(x[WS(vs, 3) + WS(rs, 1)]), TE, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + } + TP = VFMA(LDK(KP559016994), TO, TN); + TV = VFNMS(LDK(KP559016994), TO, TN); + T1f = VFNMS(LDK(KP559016994), T18, T17); + T19 = VFMA(LDK(KP559016994), T18, T17); + } + } + TY = BYTWJ(&(W[TWVL * 4]), VFNMSI(TW, TV)); + TX = BYTWJ(&(W[TWVL * 2]), VFMAI(TW, TV)); + { + V T1i, T1h, TU, TT; + T1i = BYTWJ(&(W[TWVL * 4]), VFNMSI(T1g, T1f)); + T1h = BYTWJ(&(W[TWVL * 2]), VFMAI(T1g, T1f)); + TU = BYTWJ(&(W[TWVL * 6]), VFMAI(TS, TP)); + TT = BYTWJ(&(W[0]), VFNMSI(TS, TP)); + { + V Tg, Tf, TA, Tz; + Tg = BYTWJ(&(W[TWVL * 6]), VFMAI(Te, Tb)); + Tf = BYTWJ(&(W[0]), VFNMSI(Te, Tb)); + TA = BYTWJ(&(W[TWVL * 6]), VFMAI(Ty, Tv)); + Tz = BYTWJ(&(W[0]), VFNMSI(Ty, Tv)); + { + V T1e, T1d, T1y, T1x; + T1e = BYTWJ(&(W[TWVL * 6]), VFMAI(T1c, T19)); + T1d = BYTWJ(&(W[0]), VFNMSI(T1c, T19)); + T1y = BYTWJ(&(W[TWVL * 6]), VFMAI(T1w, T1t)); + T1x = BYTWJ(&(W[0]), VFNMSI(T1w, T1t)); + ST(&(x[WS(vs, 3) + WS(rs, 2)]), TY, ms, &(x[WS(vs, 3)])); + ST(&(x[WS(vs, 2) + WS(rs, 2)]), TX, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 3) + WS(rs, 3)]), T1i, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + ST(&(x[WS(vs, 2) + WS(rs, 3)]), T1h, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 2)]), TU, ms, &(x[WS(vs, 4)])); + ST(&(x[WS(vs, 1) + WS(rs, 2)]), TT, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 4)]), Tg, ms, &(x[WS(vs, 4)])); + ST(&(x[WS(vs, 1)]), Tf, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 1)]), TA, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 1)]), Tz, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 3)]), T1e, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 3)]), T1d, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 4)]), T1y, ms, &(x[WS(vs, 4)])); + ST(&(x[WS(vs, 1) + WS(rs, 4)]), T1x, ms, &(x[WS(vs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("q1fv_5"), twinstr, &GENUS, {55, 50, 45, 0}, 0, 0, 0 }; + +void XSIMD(codelet_q1fv_5) (planner *p) { + X(kdft_difsq_register) (p, q1fv_5, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twidsq_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 5 -dif -name q1fv_5 -include q1f.h */ + +/* + * This function contains 100 FP additions, 70 FP multiplications, + * (or, 85 additions, 55 multiplications, 15 fused multiply/add), + * 44 stack variables, 4 constants, and 50 memory accesses + */ +#include "q1f.h" + +static void q1fv_5(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(10, rs), MAKE_VOLATILE_STRIDE(10, vs)) { + V T8, T7, Th, Te, T9, Ta, T1q, T1p, T1z, T1w, T1r, T1s, Ts, Tr, TB; + V Ty, Tt, Tu, TM, TL, TV, TS, TN, TO, T16, T15, T1f, T1c, T17, T18; + { + V T6, Td, T3, Tc; + T8 = LD(&(x[0]), ms, &(x[0])); + { + V T4, T5, T1, T2; + T4 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T6 = VADD(T4, T5); + Td = VSUB(T4, T5); + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T3 = VADD(T1, T2); + Tc = VSUB(T1, T2); + } + T7 = VMUL(LDK(KP559016994), VSUB(T3, T6)); + Th = VBYI(VFNMS(LDK(KP587785252), Tc, VMUL(LDK(KP951056516), Td))); + Te = VBYI(VFMA(LDK(KP951056516), Tc, VMUL(LDK(KP587785252), Td))); + T9 = VADD(T3, T6); + Ta = VFNMS(LDK(KP250000000), T9, T8); + } + { + V T1o, T1v, T1l, T1u; + T1q = LD(&(x[WS(vs, 4)]), ms, &(x[WS(vs, 4)])); + { + V T1m, T1n, T1j, T1k; + T1m = LD(&(x[WS(vs, 4) + WS(rs, 2)]), ms, &(x[WS(vs, 4)])); + T1n = LD(&(x[WS(vs, 4) + WS(rs, 3)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T1o = VADD(T1m, T1n); + T1v = VSUB(T1m, T1n); + T1j = LD(&(x[WS(vs, 4) + WS(rs, 1)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T1k = LD(&(x[WS(vs, 4) + WS(rs, 4)]), ms, &(x[WS(vs, 4)])); + T1l = VADD(T1j, T1k); + T1u = VSUB(T1j, T1k); + } + T1p = VMUL(LDK(KP559016994), VSUB(T1l, T1o)); + T1z = VBYI(VFNMS(LDK(KP587785252), T1u, VMUL(LDK(KP951056516), T1v))); + T1w = VBYI(VFMA(LDK(KP951056516), T1u, VMUL(LDK(KP587785252), T1v))); + T1r = VADD(T1l, T1o); + T1s = VFNMS(LDK(KP250000000), T1r, T1q); + } + { + V Tq, Tx, Tn, Tw; + Ts = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); + { + V To, Tp, Tl, Tm; + To = LD(&(x[WS(vs, 1) + WS(rs, 2)]), ms, &(x[WS(vs, 1)])); + Tp = LD(&(x[WS(vs, 1) + WS(rs, 3)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + Tq = VADD(To, Tp); + Tx = VSUB(To, Tp); + Tl = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + Tm = LD(&(x[WS(vs, 1) + WS(rs, 4)]), ms, &(x[WS(vs, 1)])); + Tn = VADD(Tl, Tm); + Tw = VSUB(Tl, Tm); + } + Tr = VMUL(LDK(KP559016994), VSUB(Tn, Tq)); + TB = VBYI(VFNMS(LDK(KP587785252), Tw, VMUL(LDK(KP951056516), Tx))); + Ty = VBYI(VFMA(LDK(KP951056516), Tw, VMUL(LDK(KP587785252), Tx))); + Tt = VADD(Tn, Tq); + Tu = VFNMS(LDK(KP250000000), Tt, Ts); + } + { + V TK, TR, TH, TQ; + TM = LD(&(x[WS(vs, 2)]), ms, &(x[WS(vs, 2)])); + { + V TI, TJ, TF, TG; + TI = LD(&(x[WS(vs, 2) + WS(rs, 2)]), ms, &(x[WS(vs, 2)])); + TJ = LD(&(x[WS(vs, 2) + WS(rs, 3)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + TK = VADD(TI, TJ); + TR = VSUB(TI, TJ); + TF = LD(&(x[WS(vs, 2) + WS(rs, 1)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + TG = LD(&(x[WS(vs, 2) + WS(rs, 4)]), ms, &(x[WS(vs, 2)])); + TH = VADD(TF, TG); + TQ = VSUB(TF, TG); + } + TL = VMUL(LDK(KP559016994), VSUB(TH, TK)); + TV = VBYI(VFNMS(LDK(KP587785252), TQ, VMUL(LDK(KP951056516), TR))); + TS = VBYI(VFMA(LDK(KP951056516), TQ, VMUL(LDK(KP587785252), TR))); + TN = VADD(TH, TK); + TO = VFNMS(LDK(KP250000000), TN, TM); + } + { + V T14, T1b, T11, T1a; + T16 = LD(&(x[WS(vs, 3)]), ms, &(x[WS(vs, 3)])); + { + V T12, T13, TZ, T10; + T12 = LD(&(x[WS(vs, 3) + WS(rs, 2)]), ms, &(x[WS(vs, 3)])); + T13 = LD(&(x[WS(vs, 3) + WS(rs, 3)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T14 = VADD(T12, T13); + T1b = VSUB(T12, T13); + TZ = LD(&(x[WS(vs, 3) + WS(rs, 1)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T10 = LD(&(x[WS(vs, 3) + WS(rs, 4)]), ms, &(x[WS(vs, 3)])); + T11 = VADD(TZ, T10); + T1a = VSUB(TZ, T10); + } + T15 = VMUL(LDK(KP559016994), VSUB(T11, T14)); + T1f = VBYI(VFNMS(LDK(KP587785252), T1a, VMUL(LDK(KP951056516), T1b))); + T1c = VBYI(VFMA(LDK(KP951056516), T1a, VMUL(LDK(KP587785252), T1b))); + T17 = VADD(T11, T14); + T18 = VFNMS(LDK(KP250000000), T17, T16); + } + ST(&(x[0]), VADD(T8, T9), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(T1q, T1r), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(TM, TN), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VADD(T16, T17), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(Ts, Tt), ms, &(x[WS(rs, 1)])); + { + V Tj, Tk, Ti, T1B, T1C, T1A; + Ti = VSUB(Ta, T7); + Tj = BYTWJ(&(W[TWVL * 2]), VADD(Th, Ti)); + Tk = BYTWJ(&(W[TWVL * 4]), VSUB(Ti, Th)); + ST(&(x[WS(vs, 2)]), Tj, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 3)]), Tk, ms, &(x[WS(vs, 3)])); + T1A = VSUB(T1s, T1p); + T1B = BYTWJ(&(W[TWVL * 2]), VADD(T1z, T1A)); + T1C = BYTWJ(&(W[TWVL * 4]), VSUB(T1A, T1z)); + ST(&(x[WS(vs, 2) + WS(rs, 4)]), T1B, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 3) + WS(rs, 4)]), T1C, ms, &(x[WS(vs, 3)])); + } + { + V T1h, T1i, T1g, TD, TE, TC; + T1g = VSUB(T18, T15); + T1h = BYTWJ(&(W[TWVL * 2]), VADD(T1f, T1g)); + T1i = BYTWJ(&(W[TWVL * 4]), VSUB(T1g, T1f)); + ST(&(x[WS(vs, 2) + WS(rs, 3)]), T1h, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + ST(&(x[WS(vs, 3) + WS(rs, 3)]), T1i, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + TC = VSUB(Tu, Tr); + TD = BYTWJ(&(W[TWVL * 2]), VADD(TB, TC)); + TE = BYTWJ(&(W[TWVL * 4]), VSUB(TC, TB)); + ST(&(x[WS(vs, 2) + WS(rs, 1)]), TD, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + ST(&(x[WS(vs, 3) + WS(rs, 1)]), TE, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + } + { + V TX, TY, TW, TT, TU, TP; + TW = VSUB(TO, TL); + TX = BYTWJ(&(W[TWVL * 2]), VADD(TV, TW)); + TY = BYTWJ(&(W[TWVL * 4]), VSUB(TW, TV)); + ST(&(x[WS(vs, 2) + WS(rs, 2)]), TX, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 3) + WS(rs, 2)]), TY, ms, &(x[WS(vs, 3)])); + TP = VADD(TL, TO); + TT = BYTWJ(&(W[0]), VSUB(TP, TS)); + TU = BYTWJ(&(W[TWVL * 6]), VADD(TS, TP)); + ST(&(x[WS(vs, 1) + WS(rs, 2)]), TT, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 2)]), TU, ms, &(x[WS(vs, 4)])); + } + { + V Tf, Tg, Tb, Tz, TA, Tv; + Tb = VADD(T7, Ta); + Tf = BYTWJ(&(W[0]), VSUB(Tb, Te)); + Tg = BYTWJ(&(W[TWVL * 6]), VADD(Te, Tb)); + ST(&(x[WS(vs, 1)]), Tf, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 4)]), Tg, ms, &(x[WS(vs, 4)])); + Tv = VADD(Tr, Tu); + Tz = BYTWJ(&(W[0]), VSUB(Tv, Ty)); + TA = BYTWJ(&(W[TWVL * 6]), VADD(Ty, Tv)); + ST(&(x[WS(vs, 1) + WS(rs, 1)]), Tz, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 1)]), TA, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + } + { + V T1d, T1e, T19, T1x, T1y, T1t; + T19 = VADD(T15, T18); + T1d = BYTWJ(&(W[0]), VSUB(T19, T1c)); + T1e = BYTWJ(&(W[TWVL * 6]), VADD(T1c, T19)); + ST(&(x[WS(vs, 1) + WS(rs, 3)]), T1d, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 3)]), T1e, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T1t = VADD(T1p, T1s); + T1x = BYTWJ(&(W[0]), VSUB(T1t, T1w)); + T1y = BYTWJ(&(W[TWVL * 6]), VADD(T1w, T1t)); + ST(&(x[WS(vs, 1) + WS(rs, 4)]), T1x, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 4)]), T1y, ms, &(x[WS(vs, 4)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("q1fv_5"), twinstr, &GENUS, {85, 55, 15, 0}, 0, 0, 0 }; + +void XSIMD(codelet_q1fv_5) (planner *p) { + X(kdft_difsq_register) (p, q1fv_5, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/q1fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/q1fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,991 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:57 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twidsq_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 8 -dif -name q1fv_8 -include q1f.h */ + +/* + * This function contains 264 FP additions, 192 FP multiplications, + * (or, 184 additions, 112 multiplications, 80 fused multiply/add), + * 117 stack variables, 1 constants, and 128 memory accesses + */ +#include "q1f.h" + +static void q1fv_8(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 14)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(16, vs)) { + V T42, T43, T1U, T1V, T2Y, T2Z, TT, TS; + { + V T3, Te, T1E, T1P, Tu, Tp, T25, T20, T2b, T2m, T3M, T2x, T2C, T3X, TA; + V TL, T48, T4d, T17, T11, TW, T1i, T2I, T1y, T1t, T2T, T3f, T3q, T34, T39; + V T3G, T3B, Ts, Tv, Tf, Ta, T23, T26, T1Q, T1L, T2A, T2D, T2n, T2i, T4b; + V T4e, T3Y, T3T, TZ, T12, TM, TH, T35, T2L, T3j, T1w, T1z, T1j, T1e, T36; + V T2O, T3C, T3i, T3k; + { + V T3d, T32, T3e, T3o, T3p, T33; + { + V T2v, T2w, T3V, T46, T3W; + { + V T1, T2, Tc, Td, T1C, T1D, T1N, T1O; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Td = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T1C = LD(&(x[WS(vs, 3)]), ms, &(x[WS(vs, 3)])); + T1D = LD(&(x[WS(vs, 3) + WS(rs, 4)]), ms, &(x[WS(vs, 3)])); + T1N = LD(&(x[WS(vs, 3) + WS(rs, 2)]), ms, &(x[WS(vs, 3)])); + T1O = LD(&(x[WS(vs, 3) + WS(rs, 6)]), ms, &(x[WS(vs, 3)])); + { + V T29, T1Y, T1Z, T2a, T2k, T2l, Tn, To, T3K, T3L; + T29 = LD(&(x[WS(vs, 4)]), ms, &(x[WS(vs, 4)])); + T3 = VSUB(T1, T2); + Tn = VADD(T1, T2); + Te = VSUB(Tc, Td); + To = VADD(Tc, Td); + T1E = VSUB(T1C, T1D); + T1Y = VADD(T1C, T1D); + T1P = VSUB(T1N, T1O); + T1Z = VADD(T1N, T1O); + T2a = LD(&(x[WS(vs, 4) + WS(rs, 4)]), ms, &(x[WS(vs, 4)])); + T2k = LD(&(x[WS(vs, 4) + WS(rs, 2)]), ms, &(x[WS(vs, 4)])); + T2l = LD(&(x[WS(vs, 4) + WS(rs, 6)]), ms, &(x[WS(vs, 4)])); + Tu = VSUB(Tn, To); + Tp = VADD(Tn, To); + T3K = LD(&(x[WS(vs, 7)]), ms, &(x[WS(vs, 7)])); + T3L = LD(&(x[WS(vs, 7) + WS(rs, 4)]), ms, &(x[WS(vs, 7)])); + T25 = VSUB(T1Y, T1Z); + T20 = VADD(T1Y, T1Z); + T2v = VADD(T29, T2a); + T2b = VSUB(T29, T2a); + T2w = VADD(T2k, T2l); + T2m = VSUB(T2k, T2l); + T3V = LD(&(x[WS(vs, 7) + WS(rs, 2)]), ms, &(x[WS(vs, 7)])); + T46 = VADD(T3K, T3L); + T3M = VSUB(T3K, T3L); + T3W = LD(&(x[WS(vs, 7) + WS(rs, 6)]), ms, &(x[WS(vs, 7)])); + } + } + { + V T15, TU, T16, T1g, TV, T1h; + { + V Ty, Tz, TJ, TK, T47; + Ty = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); + Tz = LD(&(x[WS(vs, 1) + WS(rs, 4)]), ms, &(x[WS(vs, 1)])); + TJ = LD(&(x[WS(vs, 1) + WS(rs, 2)]), ms, &(x[WS(vs, 1)])); + T2x = VADD(T2v, T2w); + T2C = VSUB(T2v, T2w); + TK = LD(&(x[WS(vs, 1) + WS(rs, 6)]), ms, &(x[WS(vs, 1)])); + T47 = VADD(T3V, T3W); + T3X = VSUB(T3V, T3W); + T15 = LD(&(x[WS(vs, 2)]), ms, &(x[WS(vs, 2)])); + TA = VSUB(Ty, Tz); + TU = VADD(Ty, Tz); + T16 = LD(&(x[WS(vs, 2) + WS(rs, 4)]), ms, &(x[WS(vs, 2)])); + T1g = LD(&(x[WS(vs, 2) + WS(rs, 2)]), ms, &(x[WS(vs, 2)])); + TL = VSUB(TJ, TK); + TV = VADD(TJ, TK); + T48 = VADD(T46, T47); + T4d = VSUB(T46, T47); + T1h = LD(&(x[WS(vs, 2) + WS(rs, 6)]), ms, &(x[WS(vs, 2)])); + } + { + V T2G, T1r, T2H, T2R, T1s, T2S; + T2G = LD(&(x[WS(vs, 5)]), ms, &(x[WS(vs, 5)])); + T17 = VSUB(T15, T16); + T1r = VADD(T15, T16); + T2H = LD(&(x[WS(vs, 5) + WS(rs, 4)]), ms, &(x[WS(vs, 5)])); + T11 = VSUB(TU, TV); + TW = VADD(TU, TV); + T2R = LD(&(x[WS(vs, 5) + WS(rs, 2)]), ms, &(x[WS(vs, 5)])); + T1i = VSUB(T1g, T1h); + T1s = VADD(T1g, T1h); + T2S = LD(&(x[WS(vs, 5) + WS(rs, 6)]), ms, &(x[WS(vs, 5)])); + T3d = LD(&(x[WS(vs, 6)]), ms, &(x[WS(vs, 6)])); + T2I = VSUB(T2G, T2H); + T32 = VADD(T2G, T2H); + T3e = LD(&(x[WS(vs, 6) + WS(rs, 4)]), ms, &(x[WS(vs, 6)])); + T3o = LD(&(x[WS(vs, 6) + WS(rs, 2)]), ms, &(x[WS(vs, 6)])); + T3p = LD(&(x[WS(vs, 6) + WS(rs, 6)]), ms, &(x[WS(vs, 6)])); + T1y = VSUB(T1r, T1s); + T1t = VADD(T1r, T1s); + T33 = VADD(T2R, T2S); + T2T = VSUB(T2R, T2S); + } + } + } + { + V T2y, T2e, T3Q, T2z, T2h, T49, T3P, T3R; + { + V T6, Tq, T1I, Tr, T9, T21, T1H, T1J; + { + V T4, T3z, T3A, T5, T7, T8, T1F, T1G; + T4 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3f = VSUB(T3d, T3e); + T3z = VADD(T3d, T3e); + T3q = VSUB(T3o, T3p); + T3A = VADD(T3o, T3p); + T5 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T7 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T34 = VADD(T32, T33); + T39 = VSUB(T32, T33); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1F = LD(&(x[WS(vs, 3) + WS(rs, 1)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T1G = LD(&(x[WS(vs, 3) + WS(rs, 5)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T3G = VSUB(T3z, T3A); + T3B = VADD(T3z, T3A); + T6 = VSUB(T4, T5); + Tq = VADD(T4, T5); + T1I = LD(&(x[WS(vs, 3) + WS(rs, 7)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + Tr = VADD(T7, T8); + T9 = VSUB(T7, T8); + T21 = VADD(T1F, T1G); + T1H = VSUB(T1F, T1G); + T1J = LD(&(x[WS(vs, 3) + WS(rs, 3)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + } + { + V T2f, T22, T1K, T2g, T2c, T2d, T3N, T3O; + T2c = LD(&(x[WS(vs, 4) + WS(rs, 1)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T2d = LD(&(x[WS(vs, 4) + WS(rs, 5)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T2f = LD(&(x[WS(vs, 4) + WS(rs, 7)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + Ts = VADD(Tq, Tr); + Tv = VSUB(Tr, Tq); + Tf = VSUB(T9, T6); + Ta = VADD(T6, T9); + T22 = VADD(T1I, T1J); + T1K = VSUB(T1I, T1J); + T2y = VADD(T2c, T2d); + T2e = VSUB(T2c, T2d); + T2g = LD(&(x[WS(vs, 4) + WS(rs, 3)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T3N = LD(&(x[WS(vs, 7) + WS(rs, 1)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); + T3O = LD(&(x[WS(vs, 7) + WS(rs, 5)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); + T3Q = LD(&(x[WS(vs, 7) + WS(rs, 7)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); + T23 = VADD(T21, T22); + T26 = VSUB(T22, T21); + T1Q = VSUB(T1K, T1H); + T1L = VADD(T1H, T1K); + T2z = VADD(T2f, T2g); + T2h = VSUB(T2f, T2g); + T49 = VADD(T3N, T3O); + T3P = VSUB(T3N, T3O); + T3R = LD(&(x[WS(vs, 7) + WS(rs, 3)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); + } + } + { + V TX, TD, T1b, TY, TG, T1u, T1a, T1c; + { + V TE, T4a, T3S, TF, TB, TC, T18, T19; + TB = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + TC = LD(&(x[WS(vs, 1) + WS(rs, 5)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + TE = LD(&(x[WS(vs, 1) + WS(rs, 7)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + T2A = VADD(T2y, T2z); + T2D = VSUB(T2z, T2y); + T2n = VSUB(T2h, T2e); + T2i = VADD(T2e, T2h); + T4a = VADD(T3Q, T3R); + T3S = VSUB(T3Q, T3R); + TX = VADD(TB, TC); + TD = VSUB(TB, TC); + TF = LD(&(x[WS(vs, 1) + WS(rs, 3)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + T18 = LD(&(x[WS(vs, 2) + WS(rs, 1)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T19 = LD(&(x[WS(vs, 2) + WS(rs, 5)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T1b = LD(&(x[WS(vs, 2) + WS(rs, 7)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T4b = VADD(T49, T4a); + T4e = VSUB(T4a, T49); + T3Y = VSUB(T3S, T3P); + T3T = VADD(T3P, T3S); + TY = VADD(TE, TF); + TG = VSUB(TE, TF); + T1u = VADD(T18, T19); + T1a = VSUB(T18, T19); + T1c = LD(&(x[WS(vs, 2) + WS(rs, 3)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + } + { + V T2M, T1v, T1d, T2N, T2J, T2K, T3g, T3h; + T2J = LD(&(x[WS(vs, 5) + WS(rs, 1)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); + T2K = LD(&(x[WS(vs, 5) + WS(rs, 5)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); + T2M = LD(&(x[WS(vs, 5) + WS(rs, 7)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); + TZ = VADD(TX, TY); + T12 = VSUB(TY, TX); + TM = VSUB(TG, TD); + TH = VADD(TD, TG); + T1v = VADD(T1b, T1c); + T1d = VSUB(T1b, T1c); + T35 = VADD(T2J, T2K); + T2L = VSUB(T2J, T2K); + T2N = LD(&(x[WS(vs, 5) + WS(rs, 3)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); + T3g = LD(&(x[WS(vs, 6) + WS(rs, 1)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T3h = LD(&(x[WS(vs, 6) + WS(rs, 5)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T3j = LD(&(x[WS(vs, 6) + WS(rs, 7)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T1w = VADD(T1u, T1v); + T1z = VSUB(T1v, T1u); + T1j = VSUB(T1d, T1a); + T1e = VADD(T1a, T1d); + T36 = VADD(T2M, T2N); + T2O = VSUB(T2M, T2N); + T3C = VADD(T3g, T3h); + T3i = VSUB(T3g, T3h); + T3k = LD(&(x[WS(vs, 6) + WS(rs, 3)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); + } + } + } + } + { + V T3a, T2U, T2P, T3H, T3r, T3m, T13, T27, T3b, T4f; + { + V T37, T3E, T2B, T24; + { + V T3D, T3l, Tt, T4c; + ST(&(x[0]), VADD(Tp, Ts), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T1t, T1w), ms, &(x[0])); + ST(&(x[WS(rs, 7)]), VADD(T48, T4b), ms, &(x[WS(rs, 1)])); + T37 = VADD(T35, T36); + T3a = VSUB(T36, T35); + T2U = VSUB(T2O, T2L); + T2P = VADD(T2L, T2O); + T3D = VADD(T3j, T3k); + T3l = VSUB(T3j, T3k); + ST(&(x[WS(rs, 4)]), VADD(T2x, T2A), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VADD(T20, T23), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VADD(T34, T37), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(TW, TZ), ms, &(x[WS(rs, 1)])); + Tt = BYTWJ(&(W[TWVL * 6]), VSUB(Tp, Ts)); + T4c = BYTWJ(&(W[TWVL * 6]), VSUB(T48, T4b)); + T3E = VADD(T3C, T3D); + T3H = VSUB(T3D, T3C); + T3r = VSUB(T3l, T3i); + T3m = VADD(T3i, T3l); + T2B = BYTWJ(&(W[TWVL * 6]), VSUB(T2x, T2A)); + T24 = BYTWJ(&(W[TWVL * 6]), VSUB(T20, T23)); + ST(&(x[WS(vs, 4)]), Tt, ms, &(x[WS(vs, 4)])); + ST(&(x[WS(vs, 4) + WS(rs, 7)]), T4c, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VADD(T3B, T3E), ms, &(x[0])); + } + { + V T38, T1A, Tw, T10, T1x, T3F, T2E, T3I; + T10 = BYTWJ(&(W[TWVL * 6]), VSUB(TW, TZ)); + T1x = BYTWJ(&(W[TWVL * 6]), VSUB(T1t, T1w)); + T3F = BYTWJ(&(W[TWVL * 6]), VSUB(T3B, T3E)); + ST(&(x[WS(vs, 4) + WS(rs, 4)]), T2B, ms, &(x[WS(vs, 4)])); + ST(&(x[WS(vs, 4) + WS(rs, 3)]), T24, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T38 = BYTWJ(&(W[TWVL * 6]), VSUB(T34, T37)); + T1A = BYTWJ(&(W[TWVL * 10]), VFNMSI(T1z, T1y)); + Tw = BYTWJ(&(W[TWVL * 10]), VFNMSI(Tv, Tu)); + ST(&(x[WS(vs, 4) + WS(rs, 1)]), T10, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + ST(&(x[WS(vs, 4) + WS(rs, 2)]), T1x, ms, &(x[WS(vs, 4)])); + ST(&(x[WS(vs, 4) + WS(rs, 6)]), T3F, ms, &(x[WS(vs, 4)])); + T2E = BYTWJ(&(W[TWVL * 10]), VFNMSI(T2D, T2C)); + T3I = BYTWJ(&(W[TWVL * 10]), VFNMSI(T3H, T3G)); + ST(&(x[WS(vs, 4) + WS(rs, 5)]), T38, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + ST(&(x[WS(vs, 6) + WS(rs, 2)]), T1A, ms, &(x[WS(vs, 6)])); + ST(&(x[WS(vs, 6)]), Tw, ms, &(x[WS(vs, 6)])); + T13 = BYTWJ(&(W[TWVL * 10]), VFNMSI(T12, T11)); + T27 = BYTWJ(&(W[TWVL * 10]), VFNMSI(T26, T25)); + T3b = BYTWJ(&(W[TWVL * 10]), VFNMSI(T3a, T39)); + ST(&(x[WS(vs, 6) + WS(rs, 4)]), T2E, ms, &(x[WS(vs, 6)])); + ST(&(x[WS(vs, 6) + WS(rs, 6)]), T3I, ms, &(x[WS(vs, 6)])); + T4f = BYTWJ(&(W[TWVL * 10]), VFNMSI(T4e, T4d)); + } + } + { + V Tj, Tk, T2r, T2j, Ti, Th, T2o, T2s, T1M, T1R, T41, T40; + { + V T3c, T4g, T3J, T2F, Tx, T1B; + Tx = BYTWJ(&(W[TWVL * 2]), VFMAI(Tv, Tu)); + T1B = BYTWJ(&(W[TWVL * 2]), VFMAI(T1z, T1y)); + ST(&(x[WS(vs, 6) + WS(rs, 1)]), T13, ms, &(x[WS(vs, 6) + WS(rs, 1)])); + ST(&(x[WS(vs, 6) + WS(rs, 3)]), T27, ms, &(x[WS(vs, 6) + WS(rs, 1)])); + ST(&(x[WS(vs, 6) + WS(rs, 5)]), T3b, ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T3c = BYTWJ(&(W[TWVL * 2]), VFMAI(T3a, T39)); + T4g = BYTWJ(&(W[TWVL * 2]), VFMAI(T4e, T4d)); + ST(&(x[WS(vs, 6) + WS(rs, 7)]), T4f, ms, &(x[WS(vs, 6) + WS(rs, 1)])); + ST(&(x[WS(vs, 2)]), Tx, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 2) + WS(rs, 2)]), T1B, ms, &(x[WS(vs, 2)])); + T3J = BYTWJ(&(W[TWVL * 2]), VFMAI(T3H, T3G)); + T2F = BYTWJ(&(W[TWVL * 2]), VFMAI(T2D, T2C)); + { + V T14, Tb, Tg, T28, T3U, T3Z; + T28 = BYTWJ(&(W[TWVL * 2]), VFMAI(T26, T25)); + ST(&(x[WS(vs, 2) + WS(rs, 5)]), T3c, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + ST(&(x[WS(vs, 2) + WS(rs, 7)]), T4g, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T14 = BYTWJ(&(W[TWVL * 2]), VFMAI(T12, T11)); + Tj = VFNMS(LDK(KP707106781), Ta, T3); + Tb = VFMA(LDK(KP707106781), Ta, T3); + Tg = VFNMS(LDK(KP707106781), Tf, Te); + Tk = VFMA(LDK(KP707106781), Tf, Te); + ST(&(x[WS(vs, 2) + WS(rs, 6)]), T3J, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 2) + WS(rs, 4)]), T2F, ms, &(x[WS(vs, 2)])); + ST(&(x[WS(vs, 2) + WS(rs, 3)]), T28, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T3U = VFMA(LDK(KP707106781), T3T, T3M); + T42 = VFNMS(LDK(KP707106781), T3T, T3M); + T43 = VFMA(LDK(KP707106781), T3Y, T3X); + T3Z = VFNMS(LDK(KP707106781), T3Y, T3X); + ST(&(x[WS(vs, 2) + WS(rs, 1)]), T14, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T2r = VFNMS(LDK(KP707106781), T2i, T2b); + T2j = VFMA(LDK(KP707106781), T2i, T2b); + Ti = BYTWJ(&(W[TWVL * 12]), VFMAI(Tg, Tb)); + Th = BYTWJ(&(W[0]), VFNMSI(Tg, Tb)); + T2o = VFNMS(LDK(KP707106781), T2n, T2m); + T2s = VFMA(LDK(KP707106781), T2n, T2m); + T1U = VFNMS(LDK(KP707106781), T1L, T1E); + T1M = VFMA(LDK(KP707106781), T1L, T1E); + T1R = VFNMS(LDK(KP707106781), T1Q, T1P); + T1V = VFMA(LDK(KP707106781), T1Q, T1P); + T41 = BYTWJ(&(W[TWVL * 12]), VFMAI(T3Z, T3U)); + T40 = BYTWJ(&(W[0]), VFNMSI(T3Z, T3U)); + } + } + { + V TQ, TR, T1n, T1o, T3v, T3w; + { + V T1f, T1k, T3n, TP, TO, T3s, T2Q, T2V; + { + V TI, T2q, T2p, T1T, T1S, TN; + TQ = VFNMS(LDK(KP707106781), TH, TA); + TI = VFMA(LDK(KP707106781), TH, TA); + ST(&(x[WS(vs, 7)]), Ti, ms, &(x[WS(vs, 7)])); + ST(&(x[WS(vs, 1)]), Th, ms, &(x[WS(vs, 1)])); + T2q = BYTWJ(&(W[TWVL * 12]), VFMAI(T2o, T2j)); + T2p = BYTWJ(&(W[0]), VFNMSI(T2o, T2j)); + T1T = BYTWJ(&(W[TWVL * 12]), VFMAI(T1R, T1M)); + T1S = BYTWJ(&(W[0]), VFNMSI(T1R, T1M)); + ST(&(x[WS(vs, 7) + WS(rs, 7)]), T41, ms, &(x[WS(vs, 7) + WS(rs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 7)]), T40, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + TN = VFNMS(LDK(KP707106781), TM, TL); + TR = VFMA(LDK(KP707106781), TM, TL); + T1n = VFNMS(LDK(KP707106781), T1e, T17); + T1f = VFMA(LDK(KP707106781), T1e, T17); + ST(&(x[WS(vs, 7) + WS(rs, 4)]), T2q, ms, &(x[WS(vs, 7)])); + ST(&(x[WS(vs, 1) + WS(rs, 4)]), T2p, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 7) + WS(rs, 3)]), T1T, ms, &(x[WS(vs, 7) + WS(rs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 3)]), T1S, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + T1k = VFNMS(LDK(KP707106781), T1j, T1i); + T1o = VFMA(LDK(KP707106781), T1j, T1i); + T3v = VFNMS(LDK(KP707106781), T3m, T3f); + T3n = VFMA(LDK(KP707106781), T3m, T3f); + TP = BYTWJ(&(W[TWVL * 12]), VFMAI(TN, TI)); + TO = BYTWJ(&(W[0]), VFNMSI(TN, TI)); + T3s = VFNMS(LDK(KP707106781), T3r, T3q); + T3w = VFMA(LDK(KP707106781), T3r, T3q); + } + T2Y = VFNMS(LDK(KP707106781), T2P, T2I); + T2Q = VFMA(LDK(KP707106781), T2P, T2I); + T2V = VFNMS(LDK(KP707106781), T2U, T2T); + T2Z = VFMA(LDK(KP707106781), T2U, T2T); + { + V T3u, T3t, T2X, T2W, T1m, T1l; + T1m = BYTWJ(&(W[TWVL * 12]), VFMAI(T1k, T1f)); + T1l = BYTWJ(&(W[0]), VFNMSI(T1k, T1f)); + ST(&(x[WS(vs, 7) + WS(rs, 1)]), TP, ms, &(x[WS(vs, 7) + WS(rs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 1)]), TO, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + T3u = BYTWJ(&(W[TWVL * 12]), VFMAI(T3s, T3n)); + T3t = BYTWJ(&(W[0]), VFNMSI(T3s, T3n)); + T2X = BYTWJ(&(W[TWVL * 12]), VFMAI(T2V, T2Q)); + T2W = BYTWJ(&(W[0]), VFNMSI(T2V, T2Q)); + ST(&(x[WS(vs, 7) + WS(rs, 2)]), T1m, ms, &(x[WS(vs, 7)])); + ST(&(x[WS(vs, 1) + WS(rs, 2)]), T1l, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 7) + WS(rs, 6)]), T3u, ms, &(x[WS(vs, 7)])); + ST(&(x[WS(vs, 1) + WS(rs, 6)]), T3t, ms, &(x[WS(vs, 1)])); + ST(&(x[WS(vs, 7) + WS(rs, 5)]), T2X, ms, &(x[WS(vs, 7) + WS(rs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 5)]), T2W, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + } + } + { + V T2u, T2t, T3y, T3x; + { + V T1q, T1p, Tm, Tl; + T1q = BYTWJ(&(W[TWVL * 4]), VFMAI(T1o, T1n)); + T1p = BYTWJ(&(W[TWVL * 8]), VFNMSI(T1o, T1n)); + Tm = BYTWJ(&(W[TWVL * 4]), VFMAI(Tk, Tj)); + Tl = BYTWJ(&(W[TWVL * 8]), VFNMSI(Tk, Tj)); + ST(&(x[WS(vs, 3) + WS(rs, 2)]), T1q, ms, &(x[WS(vs, 3)])); + ST(&(x[WS(vs, 5) + WS(rs, 2)]), T1p, ms, &(x[WS(vs, 5)])); + T2u = BYTWJ(&(W[TWVL * 4]), VFMAI(T2s, T2r)); + T2t = BYTWJ(&(W[TWVL * 8]), VFNMSI(T2s, T2r)); + T3y = BYTWJ(&(W[TWVL * 4]), VFMAI(T3w, T3v)); + T3x = BYTWJ(&(W[TWVL * 8]), VFNMSI(T3w, T3v)); + ST(&(x[WS(vs, 3)]), Tm, ms, &(x[WS(vs, 3)])); + ST(&(x[WS(vs, 5)]), Tl, ms, &(x[WS(vs, 5)])); + } + ST(&(x[WS(vs, 3) + WS(rs, 4)]), T2u, ms, &(x[WS(vs, 3)])); + ST(&(x[WS(vs, 5) + WS(rs, 4)]), T2t, ms, &(x[WS(vs, 5)])); + ST(&(x[WS(vs, 3) + WS(rs, 6)]), T3y, ms, &(x[WS(vs, 3)])); + ST(&(x[WS(vs, 5) + WS(rs, 6)]), T3x, ms, &(x[WS(vs, 5)])); + TT = BYTWJ(&(W[TWVL * 4]), VFMAI(TR, TQ)); + TS = BYTWJ(&(W[TWVL * 8]), VFNMSI(TR, TQ)); + } + } + } + } + } + { + V T31, T30, T45, T44, T1X, T1W; + T1X = BYTWJ(&(W[TWVL * 4]), VFMAI(T1V, T1U)); + T1W = BYTWJ(&(W[TWVL * 8]), VFNMSI(T1V, T1U)); + ST(&(x[WS(vs, 3) + WS(rs, 1)]), TT, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + ST(&(x[WS(vs, 5) + WS(rs, 1)]), TS, ms, &(x[WS(vs, 5) + WS(rs, 1)])); + T31 = BYTWJ(&(W[TWVL * 4]), VFMAI(T2Z, T2Y)); + T30 = BYTWJ(&(W[TWVL * 8]), VFNMSI(T2Z, T2Y)); + T45 = BYTWJ(&(W[TWVL * 4]), VFMAI(T43, T42)); + T44 = BYTWJ(&(W[TWVL * 8]), VFNMSI(T43, T42)); + ST(&(x[WS(vs, 3) + WS(rs, 3)]), T1X, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + ST(&(x[WS(vs, 5) + WS(rs, 3)]), T1W, ms, &(x[WS(vs, 5) + WS(rs, 1)])); + ST(&(x[WS(vs, 3) + WS(rs, 5)]), T31, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + ST(&(x[WS(vs, 5) + WS(rs, 5)]), T30, ms, &(x[WS(vs, 5) + WS(rs, 1)])); + ST(&(x[WS(vs, 3) + WS(rs, 7)]), T45, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + ST(&(x[WS(vs, 5) + WS(rs, 7)]), T44, ms, &(x[WS(vs, 5) + WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("q1fv_8"), twinstr, &GENUS, {184, 112, 80, 0}, 0, 0, 0 }; + +void XSIMD(codelet_q1fv_8) (planner *p) { + X(kdft_difsq_register) (p, q1fv_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twidsq_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 8 -dif -name q1fv_8 -include q1f.h */ + +/* + * This function contains 264 FP additions, 128 FP multiplications, + * (or, 264 additions, 128 multiplications, 0 fused multiply/add), + * 77 stack variables, 1 constants, and 128 memory accesses + */ +#include "q1f.h" + +static void q1fv_8(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 14)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(16, vs)) { + V T3, Tu, Tf, Tp, T1E, T25, T1Q, T20, T2b, T2C, T2n, T2x, T3M, T4d, T3Y; + V T48, TA, T11, TM, TW, T17, T1y, T1j, T1t, T2I, T39, T2U, T34, T3f, T3G; + V T3r, T3B, Ta, Tv, Tc, Ts, T1L, T26, T1N, T23, T2i, T2D, T2k, T2A, T3T; + V T4e, T3V, T4b, TH, T12, TJ, TZ, T1e, T1z, T1g, T1w, T2P, T3a, T2R, T37; + V T3m, T3H, T3o, T3E, T28, T14; + { + V T1, T2, Tn, Td, Te, To; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tn = VADD(T1, T2); + Td = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Te = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + To = VADD(Td, Te); + T3 = VSUB(T1, T2); + Tu = VSUB(Tn, To); + Tf = VSUB(Td, Te); + Tp = VADD(Tn, To); + } + { + V T1C, T1D, T1Y, T1O, T1P, T1Z; + T1C = LD(&(x[WS(vs, 3)]), ms, &(x[WS(vs, 3)])); + T1D = LD(&(x[WS(vs, 3) + WS(rs, 4)]), ms, &(x[WS(vs, 3)])); + T1Y = VADD(T1C, T1D); + T1O = LD(&(x[WS(vs, 3) + WS(rs, 2)]), ms, &(x[WS(vs, 3)])); + T1P = LD(&(x[WS(vs, 3) + WS(rs, 6)]), ms, &(x[WS(vs, 3)])); + T1Z = VADD(T1O, T1P); + T1E = VSUB(T1C, T1D); + T25 = VSUB(T1Y, T1Z); + T1Q = VSUB(T1O, T1P); + T20 = VADD(T1Y, T1Z); + } + { + V T29, T2a, T2v, T2l, T2m, T2w; + T29 = LD(&(x[WS(vs, 4)]), ms, &(x[WS(vs, 4)])); + T2a = LD(&(x[WS(vs, 4) + WS(rs, 4)]), ms, &(x[WS(vs, 4)])); + T2v = VADD(T29, T2a); + T2l = LD(&(x[WS(vs, 4) + WS(rs, 2)]), ms, &(x[WS(vs, 4)])); + T2m = LD(&(x[WS(vs, 4) + WS(rs, 6)]), ms, &(x[WS(vs, 4)])); + T2w = VADD(T2l, T2m); + T2b = VSUB(T29, T2a); + T2C = VSUB(T2v, T2w); + T2n = VSUB(T2l, T2m); + T2x = VADD(T2v, T2w); + } + { + V T3K, T3L, T46, T3W, T3X, T47; + T3K = LD(&(x[WS(vs, 7)]), ms, &(x[WS(vs, 7)])); + T3L = LD(&(x[WS(vs, 7) + WS(rs, 4)]), ms, &(x[WS(vs, 7)])); + T46 = VADD(T3K, T3L); + T3W = LD(&(x[WS(vs, 7) + WS(rs, 2)]), ms, &(x[WS(vs, 7)])); + T3X = LD(&(x[WS(vs, 7) + WS(rs, 6)]), ms, &(x[WS(vs, 7)])); + T47 = VADD(T3W, T3X); + T3M = VSUB(T3K, T3L); + T4d = VSUB(T46, T47); + T3Y = VSUB(T3W, T3X); + T48 = VADD(T46, T47); + } + { + V Ty, Tz, TU, TK, TL, TV; + Ty = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); + Tz = LD(&(x[WS(vs, 1) + WS(rs, 4)]), ms, &(x[WS(vs, 1)])); + TU = VADD(Ty, Tz); + TK = LD(&(x[WS(vs, 1) + WS(rs, 2)]), ms, &(x[WS(vs, 1)])); + TL = LD(&(x[WS(vs, 1) + WS(rs, 6)]), ms, &(x[WS(vs, 1)])); + TV = VADD(TK, TL); + TA = VSUB(Ty, Tz); + T11 = VSUB(TU, TV); + TM = VSUB(TK, TL); + TW = VADD(TU, TV); + } + { + V T15, T16, T1r, T1h, T1i, T1s; + T15 = LD(&(x[WS(vs, 2)]), ms, &(x[WS(vs, 2)])); + T16 = LD(&(x[WS(vs, 2) + WS(rs, 4)]), ms, &(x[WS(vs, 2)])); + T1r = VADD(T15, T16); + T1h = LD(&(x[WS(vs, 2) + WS(rs, 2)]), ms, &(x[WS(vs, 2)])); + T1i = LD(&(x[WS(vs, 2) + WS(rs, 6)]), ms, &(x[WS(vs, 2)])); + T1s = VADD(T1h, T1i); + T17 = VSUB(T15, T16); + T1y = VSUB(T1r, T1s); + T1j = VSUB(T1h, T1i); + T1t = VADD(T1r, T1s); + } + { + V T2G, T2H, T32, T2S, T2T, T33; + T2G = LD(&(x[WS(vs, 5)]), ms, &(x[WS(vs, 5)])); + T2H = LD(&(x[WS(vs, 5) + WS(rs, 4)]), ms, &(x[WS(vs, 5)])); + T32 = VADD(T2G, T2H); + T2S = LD(&(x[WS(vs, 5) + WS(rs, 2)]), ms, &(x[WS(vs, 5)])); + T2T = LD(&(x[WS(vs, 5) + WS(rs, 6)]), ms, &(x[WS(vs, 5)])); + T33 = VADD(T2S, T2T); + T2I = VSUB(T2G, T2H); + T39 = VSUB(T32, T33); + T2U = VSUB(T2S, T2T); + T34 = VADD(T32, T33); + } + { + V T3d, T3e, T3z, T3p, T3q, T3A; + T3d = LD(&(x[WS(vs, 6)]), ms, &(x[WS(vs, 6)])); + T3e = LD(&(x[WS(vs, 6) + WS(rs, 4)]), ms, &(x[WS(vs, 6)])); + T3z = VADD(T3d, T3e); + T3p = LD(&(x[WS(vs, 6) + WS(rs, 2)]), ms, &(x[WS(vs, 6)])); + T3q = LD(&(x[WS(vs, 6) + WS(rs, 6)]), ms, &(x[WS(vs, 6)])); + T3A = VADD(T3p, T3q); + T3f = VSUB(T3d, T3e); + T3G = VSUB(T3z, T3A); + T3r = VSUB(T3p, T3q); + T3B = VADD(T3z, T3A); + } + { + V T6, Tq, T9, Tr; + { + V T4, T5, T7, T8; + T4 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T5 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T6 = VSUB(T4, T5); + Tq = VADD(T4, T5); + T7 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = VSUB(T7, T8); + Tr = VADD(T7, T8); + } + Ta = VMUL(LDK(KP707106781), VADD(T6, T9)); + Tv = VBYI(VSUB(Tr, Tq)); + Tc = VMUL(LDK(KP707106781), VSUB(T9, T6)); + Ts = VADD(Tq, Tr); + } + { + V T1H, T21, T1K, T22; + { + V T1F, T1G, T1I, T1J; + T1F = LD(&(x[WS(vs, 3) + WS(rs, 1)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T1G = LD(&(x[WS(vs, 3) + WS(rs, 5)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T1H = VSUB(T1F, T1G); + T21 = VADD(T1F, T1G); + T1I = LD(&(x[WS(vs, 3) + WS(rs, 7)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T1J = LD(&(x[WS(vs, 3) + WS(rs, 3)]), ms, &(x[WS(vs, 3) + WS(rs, 1)])); + T1K = VSUB(T1I, T1J); + T22 = VADD(T1I, T1J); + } + T1L = VMUL(LDK(KP707106781), VADD(T1H, T1K)); + T26 = VBYI(VSUB(T22, T21)); + T1N = VMUL(LDK(KP707106781), VSUB(T1K, T1H)); + T23 = VADD(T21, T22); + } + { + V T2e, T2y, T2h, T2z; + { + V T2c, T2d, T2f, T2g; + T2c = LD(&(x[WS(vs, 4) + WS(rs, 1)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T2d = LD(&(x[WS(vs, 4) + WS(rs, 5)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T2e = VSUB(T2c, T2d); + T2y = VADD(T2c, T2d); + T2f = LD(&(x[WS(vs, 4) + WS(rs, 7)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T2g = LD(&(x[WS(vs, 4) + WS(rs, 3)]), ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T2h = VSUB(T2f, T2g); + T2z = VADD(T2f, T2g); + } + T2i = VMUL(LDK(KP707106781), VADD(T2e, T2h)); + T2D = VBYI(VSUB(T2z, T2y)); + T2k = VMUL(LDK(KP707106781), VSUB(T2h, T2e)); + T2A = VADD(T2y, T2z); + } + { + V T3P, T49, T3S, T4a; + { + V T3N, T3O, T3Q, T3R; + T3N = LD(&(x[WS(vs, 7) + WS(rs, 1)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); + T3O = LD(&(x[WS(vs, 7) + WS(rs, 5)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); + T3P = VSUB(T3N, T3O); + T49 = VADD(T3N, T3O); + T3Q = LD(&(x[WS(vs, 7) + WS(rs, 7)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); + T3R = LD(&(x[WS(vs, 7) + WS(rs, 3)]), ms, &(x[WS(vs, 7) + WS(rs, 1)])); + T3S = VSUB(T3Q, T3R); + T4a = VADD(T3Q, T3R); + } + T3T = VMUL(LDK(KP707106781), VADD(T3P, T3S)); + T4e = VBYI(VSUB(T4a, T49)); + T3V = VMUL(LDK(KP707106781), VSUB(T3S, T3P)); + T4b = VADD(T49, T4a); + } + { + V TD, TX, TG, TY; + { + V TB, TC, TE, TF; + TB = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + TC = LD(&(x[WS(vs, 1) + WS(rs, 5)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + TD = VSUB(TB, TC); + TX = VADD(TB, TC); + TE = LD(&(x[WS(vs, 1) + WS(rs, 7)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + TF = LD(&(x[WS(vs, 1) + WS(rs, 3)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); + TG = VSUB(TE, TF); + TY = VADD(TE, TF); + } + TH = VMUL(LDK(KP707106781), VADD(TD, TG)); + T12 = VBYI(VSUB(TY, TX)); + TJ = VMUL(LDK(KP707106781), VSUB(TG, TD)); + TZ = VADD(TX, TY); + } + { + V T1a, T1u, T1d, T1v; + { + V T18, T19, T1b, T1c; + T18 = LD(&(x[WS(vs, 2) + WS(rs, 1)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T19 = LD(&(x[WS(vs, 2) + WS(rs, 5)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T1a = VSUB(T18, T19); + T1u = VADD(T18, T19); + T1b = LD(&(x[WS(vs, 2) + WS(rs, 7)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T1c = LD(&(x[WS(vs, 2) + WS(rs, 3)]), ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T1d = VSUB(T1b, T1c); + T1v = VADD(T1b, T1c); + } + T1e = VMUL(LDK(KP707106781), VADD(T1a, T1d)); + T1z = VBYI(VSUB(T1v, T1u)); + T1g = VMUL(LDK(KP707106781), VSUB(T1d, T1a)); + T1w = VADD(T1u, T1v); + } + { + V T2L, T35, T2O, T36; + { + V T2J, T2K, T2M, T2N; + T2J = LD(&(x[WS(vs, 5) + WS(rs, 1)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); + T2K = LD(&(x[WS(vs, 5) + WS(rs, 5)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); + T2L = VSUB(T2J, T2K); + T35 = VADD(T2J, T2K); + T2M = LD(&(x[WS(vs, 5) + WS(rs, 7)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); + T2N = LD(&(x[WS(vs, 5) + WS(rs, 3)]), ms, &(x[WS(vs, 5) + WS(rs, 1)])); + T2O = VSUB(T2M, T2N); + T36 = VADD(T2M, T2N); + } + T2P = VMUL(LDK(KP707106781), VADD(T2L, T2O)); + T3a = VBYI(VSUB(T36, T35)); + T2R = VMUL(LDK(KP707106781), VSUB(T2O, T2L)); + T37 = VADD(T35, T36); + } + { + V T3i, T3C, T3l, T3D; + { + V T3g, T3h, T3j, T3k; + T3g = LD(&(x[WS(vs, 6) + WS(rs, 1)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T3h = LD(&(x[WS(vs, 6) + WS(rs, 5)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T3i = VSUB(T3g, T3h); + T3C = VADD(T3g, T3h); + T3j = LD(&(x[WS(vs, 6) + WS(rs, 7)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T3k = LD(&(x[WS(vs, 6) + WS(rs, 3)]), ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T3l = VSUB(T3j, T3k); + T3D = VADD(T3j, T3k); + } + T3m = VMUL(LDK(KP707106781), VADD(T3i, T3l)); + T3H = VBYI(VSUB(T3D, T3C)); + T3o = VMUL(LDK(KP707106781), VSUB(T3l, T3i)); + T3E = VADD(T3C, T3D); + } + ST(&(x[0]), VADD(Tp, Ts), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T1t, T1w), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VADD(T34, T37), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(T48, T4b), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VADD(T3B, T3E), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(T2x, T2A), ms, &(x[0])); + { + V Tt, T4c, T2B, T24; + ST(&(x[WS(rs, 3)]), VADD(T20, T23), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(TW, TZ), ms, &(x[WS(rs, 1)])); + Tt = BYTWJ(&(W[TWVL * 6]), VSUB(Tp, Ts)); + ST(&(x[WS(vs, 4)]), Tt, ms, &(x[WS(vs, 4)])); + T4c = BYTWJ(&(W[TWVL * 6]), VSUB(T48, T4b)); + ST(&(x[WS(vs, 4) + WS(rs, 7)]), T4c, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T2B = BYTWJ(&(W[TWVL * 6]), VSUB(T2x, T2A)); + ST(&(x[WS(vs, 4) + WS(rs, 4)]), T2B, ms, &(x[WS(vs, 4)])); + T24 = BYTWJ(&(W[TWVL * 6]), VSUB(T20, T23)); + ST(&(x[WS(vs, 4) + WS(rs, 3)]), T24, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + } + { + V T10, T1x, T3F, T38, T1A, Tw; + T10 = BYTWJ(&(W[TWVL * 6]), VSUB(TW, TZ)); + ST(&(x[WS(vs, 4) + WS(rs, 1)]), T10, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T1x = BYTWJ(&(W[TWVL * 6]), VSUB(T1t, T1w)); + ST(&(x[WS(vs, 4) + WS(rs, 2)]), T1x, ms, &(x[WS(vs, 4)])); + T3F = BYTWJ(&(W[TWVL * 6]), VSUB(T3B, T3E)); + ST(&(x[WS(vs, 4) + WS(rs, 6)]), T3F, ms, &(x[WS(vs, 4)])); + T38 = BYTWJ(&(W[TWVL * 6]), VSUB(T34, T37)); + ST(&(x[WS(vs, 4) + WS(rs, 5)]), T38, ms, &(x[WS(vs, 4) + WS(rs, 1)])); + T1A = BYTWJ(&(W[TWVL * 10]), VSUB(T1y, T1z)); + ST(&(x[WS(vs, 6) + WS(rs, 2)]), T1A, ms, &(x[WS(vs, 6)])); + Tw = BYTWJ(&(W[TWVL * 10]), VSUB(Tu, Tv)); + ST(&(x[WS(vs, 6)]), Tw, ms, &(x[WS(vs, 6)])); + } + { + V T2E, T3I, T13, T27, T3b, T4f; + T2E = BYTWJ(&(W[TWVL * 10]), VSUB(T2C, T2D)); + ST(&(x[WS(vs, 6) + WS(rs, 4)]), T2E, ms, &(x[WS(vs, 6)])); + T3I = BYTWJ(&(W[TWVL * 10]), VSUB(T3G, T3H)); + ST(&(x[WS(vs, 6) + WS(rs, 6)]), T3I, ms, &(x[WS(vs, 6)])); + T13 = BYTWJ(&(W[TWVL * 10]), VSUB(T11, T12)); + ST(&(x[WS(vs, 6) + WS(rs, 1)]), T13, ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T27 = BYTWJ(&(W[TWVL * 10]), VSUB(T25, T26)); + ST(&(x[WS(vs, 6) + WS(rs, 3)]), T27, ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T3b = BYTWJ(&(W[TWVL * 10]), VSUB(T39, T3a)); + ST(&(x[WS(vs, 6) + WS(rs, 5)]), T3b, ms, &(x[WS(vs, 6) + WS(rs, 1)])); + T4f = BYTWJ(&(W[TWVL * 10]), VSUB(T4d, T4e)); + ST(&(x[WS(vs, 6) + WS(rs, 7)]), T4f, ms, &(x[WS(vs, 6) + WS(rs, 1)])); + } + { + V Tx, T1B, T3c, T4g, T3J, T2F; + Tx = BYTWJ(&(W[TWVL * 2]), VADD(Tu, Tv)); + ST(&(x[WS(vs, 2)]), Tx, ms, &(x[WS(vs, 2)])); + T1B = BYTWJ(&(W[TWVL * 2]), VADD(T1y, T1z)); + ST(&(x[WS(vs, 2) + WS(rs, 2)]), T1B, ms, &(x[WS(vs, 2)])); + T3c = BYTWJ(&(W[TWVL * 2]), VADD(T39, T3a)); + ST(&(x[WS(vs, 2) + WS(rs, 5)]), T3c, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T4g = BYTWJ(&(W[TWVL * 2]), VADD(T4d, T4e)); + ST(&(x[WS(vs, 2) + WS(rs, 7)]), T4g, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T3J = BYTWJ(&(W[TWVL * 2]), VADD(T3G, T3H)); + ST(&(x[WS(vs, 2) + WS(rs, 6)]), T3J, ms, &(x[WS(vs, 2)])); + T2F = BYTWJ(&(W[TWVL * 2]), VADD(T2C, T2D)); + ST(&(x[WS(vs, 2) + WS(rs, 4)]), T2F, ms, &(x[WS(vs, 2)])); + } + T28 = BYTWJ(&(W[TWVL * 2]), VADD(T25, T26)); + ST(&(x[WS(vs, 2) + WS(rs, 3)]), T28, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + T14 = BYTWJ(&(W[TWVL * 2]), VADD(T11, T12)); + ST(&(x[WS(vs, 2) + WS(rs, 1)]), T14, ms, &(x[WS(vs, 2) + WS(rs, 1)])); + { + V Th, Ti, Tb, Tg; + Tb = VADD(T3, Ta); + Tg = VBYI(VSUB(Tc, Tf)); + Th = BYTWJ(&(W[TWVL * 12]), VSUB(Tb, Tg)); + Ti = BYTWJ(&(W[0]), VADD(Tb, Tg)); + ST(&(x[WS(vs, 7)]), Th, ms, &(x[WS(vs, 7)])); + ST(&(x[WS(vs, 1)]), Ti, ms, &(x[WS(vs, 1)])); + } + { + V T40, T41, T3U, T3Z; + T3U = VADD(T3M, T3T); + T3Z = VBYI(VSUB(T3V, T3Y)); + T40 = BYTWJ(&(W[TWVL * 12]), VSUB(T3U, T3Z)); + T41 = BYTWJ(&(W[0]), VADD(T3U, T3Z)); + ST(&(x[WS(vs, 7) + WS(rs, 7)]), T40, ms, &(x[WS(vs, 7) + WS(rs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 7)]), T41, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + } + { + V T2p, T2q, T2j, T2o; + T2j = VADD(T2b, T2i); + T2o = VBYI(VSUB(T2k, T2n)); + T2p = BYTWJ(&(W[TWVL * 12]), VSUB(T2j, T2o)); + T2q = BYTWJ(&(W[0]), VADD(T2j, T2o)); + ST(&(x[WS(vs, 7) + WS(rs, 4)]), T2p, ms, &(x[WS(vs, 7)])); + ST(&(x[WS(vs, 1) + WS(rs, 4)]), T2q, ms, &(x[WS(vs, 1)])); + } + { + V T1S, T1T, T1M, T1R; + T1M = VADD(T1E, T1L); + T1R = VBYI(VSUB(T1N, T1Q)); + T1S = BYTWJ(&(W[TWVL * 12]), VSUB(T1M, T1R)); + T1T = BYTWJ(&(W[0]), VADD(T1M, T1R)); + ST(&(x[WS(vs, 7) + WS(rs, 3)]), T1S, ms, &(x[WS(vs, 7) + WS(rs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 3)]), T1T, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + } + { + V TO, TP, TI, TN; + TI = VADD(TA, TH); + TN = VBYI(VSUB(TJ, TM)); + TO = BYTWJ(&(W[TWVL * 12]), VSUB(TI, TN)); + TP = BYTWJ(&(W[0]), VADD(TI, TN)); + ST(&(x[WS(vs, 7) + WS(rs, 1)]), TO, ms, &(x[WS(vs, 7) + WS(rs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 1)]), TP, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + } + { + V T1l, T1m, T1f, T1k; + T1f = VADD(T17, T1e); + T1k = VBYI(VSUB(T1g, T1j)); + T1l = BYTWJ(&(W[TWVL * 12]), VSUB(T1f, T1k)); + T1m = BYTWJ(&(W[0]), VADD(T1f, T1k)); + ST(&(x[WS(vs, 7) + WS(rs, 2)]), T1l, ms, &(x[WS(vs, 7)])); + ST(&(x[WS(vs, 1) + WS(rs, 2)]), T1m, ms, &(x[WS(vs, 1)])); + } + { + V T3t, T3u, T3n, T3s; + T3n = VADD(T3f, T3m); + T3s = VBYI(VSUB(T3o, T3r)); + T3t = BYTWJ(&(W[TWVL * 12]), VSUB(T3n, T3s)); + T3u = BYTWJ(&(W[0]), VADD(T3n, T3s)); + ST(&(x[WS(vs, 7) + WS(rs, 6)]), T3t, ms, &(x[WS(vs, 7)])); + ST(&(x[WS(vs, 1) + WS(rs, 6)]), T3u, ms, &(x[WS(vs, 1)])); + } + { + V T2W, T2X, T2Q, T2V; + T2Q = VADD(T2I, T2P); + T2V = VBYI(VSUB(T2R, T2U)); + T2W = BYTWJ(&(W[TWVL * 12]), VSUB(T2Q, T2V)); + T2X = BYTWJ(&(W[0]), VADD(T2Q, T2V)); + ST(&(x[WS(vs, 7) + WS(rs, 5)]), T2W, ms, &(x[WS(vs, 7) + WS(rs, 1)])); + ST(&(x[WS(vs, 1) + WS(rs, 5)]), T2X, ms, &(x[WS(vs, 1) + WS(rs, 1)])); + } + { + V T1p, T1q, T1n, T1o; + T1n = VSUB(T17, T1e); + T1o = VBYI(VADD(T1j, T1g)); + T1p = BYTWJ(&(W[TWVL * 8]), VSUB(T1n, T1o)); + T1q = BYTWJ(&(W[TWVL * 4]), VADD(T1n, T1o)); + ST(&(x[WS(vs, 5) + WS(rs, 2)]), T1p, ms, &(x[WS(vs, 5)])); + ST(&(x[WS(vs, 3) + WS(rs, 2)]), T1q, ms, &(x[WS(vs, 3)])); + } + { + V Tl, Tm, Tj, Tk; + Tj = VSUB(T3, Ta); + Tk = VBYI(VADD(Tf, Tc)); + Tl = BYTWJ(&(W[TWVL * 8]), VSUB(Tj, Tk)); + Tm = BYTWJ(&(W[TWVL * 4]), VADD(Tj, Tk)); + ST(&(x[WS(vs, 5)]), Tl, ms, &(x[WS(vs, 5)])); + ST(&(x[WS(vs, 3)]), Tm, ms, &(x[WS(vs, 3)])); + } + { + V T2t, T2u, T2r, T2s; + T2r = VSUB(T2b, T2i); + T2s = VBYI(VADD(T2n, T2k)); + T2t = BYTWJ(&(W[TWVL * 8]), VSUB(T2r, T2s)); + T2u = BYTWJ(&(W[TWVL * 4]), VADD(T2r, T2s)); + ST(&(x[WS(vs, 5) + WS(rs, 4)]), T2t, ms, &(x[WS(vs, 5)])); + ST(&(x[WS(vs, 3) + WS(rs, 4)]), T2u, ms, &(x[WS(vs, 3)])); + } + { + V T3x, T3y, T3v, T3w; + T3v = VSUB(T3f, T3m); + T3w = VBYI(VADD(T3r, T3o)); + T3x = BYTWJ(&(W[TWVL * 8]), VSUB(T3v, T3w)); + T3y = BYTWJ(&(W[TWVL * 4]), VADD(T3v, T3w)); + ST(&(x[WS(vs, 5) + WS(rs, 6)]), T3x, ms, &(x[WS(vs, 5)])); + ST(&(x[WS(vs, 3) + WS(rs, 6)]), T3y, ms, &(x[WS(vs, 3)])); + } + { + V TS, TT, TQ, TR; + TQ = VSUB(TA, TH); + TR = VBYI(VADD(TM, TJ)); + TS = BYTWJ(&(W[TWVL * 8]), VSUB(TQ, TR)); + TT = BYTWJ(&(W[TWVL * 4]), VADD(TQ, TR)); + ST(&(x[WS(vs, 5) + WS(rs, 1)]), TS, ms, &(x[WS(vs, 5) + WS(rs, 1)])); + ST(&(x[WS(vs, 3) + WS(rs, 1)]), TT, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + } + { + V T1W, T1X, T1U, T1V; + T1U = VSUB(T1E, T1L); + T1V = VBYI(VADD(T1Q, T1N)); + T1W = BYTWJ(&(W[TWVL * 8]), VSUB(T1U, T1V)); + T1X = BYTWJ(&(W[TWVL * 4]), VADD(T1U, T1V)); + ST(&(x[WS(vs, 5) + WS(rs, 3)]), T1W, ms, &(x[WS(vs, 5) + WS(rs, 1)])); + ST(&(x[WS(vs, 3) + WS(rs, 3)]), T1X, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + } + { + V T30, T31, T2Y, T2Z; + T2Y = VSUB(T2I, T2P); + T2Z = VBYI(VADD(T2U, T2R)); + T30 = BYTWJ(&(W[TWVL * 8]), VSUB(T2Y, T2Z)); + T31 = BYTWJ(&(W[TWVL * 4]), VADD(T2Y, T2Z)); + ST(&(x[WS(vs, 5) + WS(rs, 5)]), T30, ms, &(x[WS(vs, 5) + WS(rs, 1)])); + ST(&(x[WS(vs, 3) + WS(rs, 5)]), T31, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + } + { + V T44, T45, T42, T43; + T42 = VSUB(T3M, T3T); + T43 = VBYI(VADD(T3Y, T3V)); + T44 = BYTWJ(&(W[TWVL * 8]), VSUB(T42, T43)); + T45 = BYTWJ(&(W[TWVL * 4]), VADD(T42, T43)); + ST(&(x[WS(vs, 5) + WS(rs, 7)]), T44, ms, &(x[WS(vs, 5) + WS(rs, 1)])); + ST(&(x[WS(vs, 3) + WS(rs, 7)]), T45, ms, &(x[WS(vs, 3) + WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("q1fv_8"), twinstr, &GENUS, {264, 128, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_q1fv_8) (planner *p) { + X(kdft_difsq_register) (p, q1fv_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:32 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name t1buv_10 -include t1bu.h -sign 1 */ + +/* + * This function contains 51 FP additions, 40 FP multiplications, + * (or, 33 additions, 22 multiplications, 18 fused multiply/add), + * 43 stack variables, 4 constants, and 20 memory accesses + */ +#include "t1bu.h" + +static void t1buv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 18)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(10, rs)) { + V Td, TA, T4, Ta, Tk, TE, Tp, TF, TB, T9, T1, T2, Tb; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V Tg, Tn, Ti, Tl; + Tg = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tn = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Ti = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tl = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + { + V T6, T8, T5, Tc; + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + { + V T3, Th, To, Tj, Tm, T7; + T7 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[TWVL * 8]), T2); + Th = BYTW(&(W[TWVL * 6]), Tg); + To = BYTW(&(W[0]), Tn); + Tj = BYTW(&(W[TWVL * 16]), Ti); + Tm = BYTW(&(W[TWVL * 10]), Tl); + T6 = BYTW(&(W[TWVL * 2]), T5); + Td = BYTW(&(W[TWVL * 4]), Tc); + T8 = BYTW(&(W[TWVL * 12]), T7); + TA = VADD(T1, T3); + T4 = VSUB(T1, T3); + Ta = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Tk = VSUB(Th, Tj); + TE = VADD(Th, Tj); + Tp = VSUB(Tm, To); + TF = VADD(Tm, To); + } + TB = VADD(T6, T8); + T9 = VSUB(T6, T8); + } + } + Tb = BYTW(&(W[TWVL * 14]), Ta); + { + V TL, TG, Tw, Tq, TC, Te; + TL = VSUB(TE, TF); + TG = VADD(TE, TF); + Tw = VSUB(Tk, Tp); + Tq = VADD(Tk, Tp); + TC = VADD(Tb, Td); + Te = VSUB(Tb, Td); + { + V TM, TD, Tv, Tf; + TM = VSUB(TB, TC); + TD = VADD(TB, TC); + Tv = VSUB(T9, Te); + Tf = VADD(T9, Te); + { + V TP, TN, TH, TJ, Tz, Tx, Tr, Tt, TI, Ts; + TP = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TL, TM)); + TN = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TM, TL)); + TH = VADD(TD, TG); + TJ = VSUB(TD, TG); + Tz = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tv, Tw)); + Tx = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tw, Tv)); + Tr = VADD(Tf, Tq); + Tt = VSUB(Tf, Tq); + ST(&(x[0]), VADD(TA, TH), ms, &(x[0])); + TI = VFNMS(LDK(KP250000000), TH, TA); + ST(&(x[WS(rs, 5)]), VADD(T4, Tr), ms, &(x[WS(rs, 1)])); + Ts = VFNMS(LDK(KP250000000), Tr, T4); + { + V TK, TO, Tu, Ty; + TK = VFNMS(LDK(KP559016994), TJ, TI); + TO = VFMA(LDK(KP559016994), TJ, TI); + Tu = VFMA(LDK(KP559016994), Tt, Ts); + Ty = VFNMS(LDK(KP559016994), Tt, Ts); + ST(&(x[WS(rs, 8)]), VFMAI(TN, TK), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFNMSI(TN, TK), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFMAI(TP, TO), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFNMSI(TP, TO), ms, &(x[0])); + ST(&(x[WS(rs, 9)]), VFNMSI(Tx, Tu), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(Tx, Tu), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(Tz, Ty), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFMAI(Tz, Ty), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 10, XSIMD_STRING("t1buv_10"), twinstr, &GENUS, {33, 22, 18, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_10) (planner *p) { + X(kdft_dit_register) (p, t1buv_10, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name t1buv_10 -include t1bu.h -sign 1 */ + +/* + * This function contains 51 FP additions, 30 FP multiplications, + * (or, 45 additions, 24 multiplications, 6 fused multiply/add), + * 32 stack variables, 4 constants, and 20 memory accesses + */ +#include "t1bu.h" + +static void t1buv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 18)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(10, rs)) { + V Tu, TH, Tg, Tl, Tp, TD, TE, TJ, T5, Ta, To, TA, TB, TI, Tr; + V Tt, Ts; + Tr = LD(&(x[0]), ms, &(x[0])); + Ts = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tt = BYTW(&(W[TWVL * 8]), Ts); + Tu = VSUB(Tr, Tt); + TH = VADD(Tr, Tt); + { + V Td, Tk, Tf, Ti; + { + V Tc, Tj, Te, Th; + Tc = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Td = BYTW(&(W[TWVL * 6]), Tc); + Tj = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Tk = BYTW(&(W[0]), Tj); + Te = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tf = BYTW(&(W[TWVL * 16]), Te); + Th = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Ti = BYTW(&(W[TWVL * 10]), Th); + } + Tg = VSUB(Td, Tf); + Tl = VSUB(Ti, Tk); + Tp = VADD(Tg, Tl); + TD = VADD(Td, Tf); + TE = VADD(Ti, Tk); + TJ = VADD(TD, TE); + } + { + V T2, T9, T4, T7; + { + V T1, T8, T3, T6; + T1 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T2 = BYTW(&(W[TWVL * 2]), T1); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[TWVL * 4]), T8); + T3 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T4 = BYTW(&(W[TWVL * 12]), T3); + T6 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T7 = BYTW(&(W[TWVL * 14]), T6); + } + T5 = VSUB(T2, T4); + Ta = VSUB(T7, T9); + To = VADD(T5, Ta); + TA = VADD(T2, T4); + TB = VADD(T7, T9); + TI = VADD(TA, TB); + } + { + V Tq, Tv, Tw, Tn, Tz, Tb, Tm, Ty, Tx; + Tq = VMUL(LDK(KP559016994), VSUB(To, Tp)); + Tv = VADD(To, Tp); + Tw = VFNMS(LDK(KP250000000), Tv, Tu); + Tb = VSUB(T5, Ta); + Tm = VSUB(Tg, Tl); + Tn = VBYI(VFMA(LDK(KP951056516), Tb, VMUL(LDK(KP587785252), Tm))); + Tz = VBYI(VFNMS(LDK(KP951056516), Tm, VMUL(LDK(KP587785252), Tb))); + ST(&(x[WS(rs, 5)]), VADD(Tu, Tv), ms, &(x[WS(rs, 1)])); + Ty = VSUB(Tw, Tq); + ST(&(x[WS(rs, 3)]), VSUB(Ty, Tz), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(Tz, Ty), ms, &(x[WS(rs, 1)])); + Tx = VADD(Tq, Tw); + ST(&(x[WS(rs, 1)]), VADD(Tn, Tx), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VSUB(Tx, Tn), ms, &(x[WS(rs, 1)])); + } + { + V TM, TK, TL, TG, TP, TC, TF, TO, TN; + TM = VMUL(LDK(KP559016994), VSUB(TI, TJ)); + TK = VADD(TI, TJ); + TL = VFNMS(LDK(KP250000000), TK, TH); + TC = VSUB(TA, TB); + TF = VSUB(TD, TE); + TG = VBYI(VFNMS(LDK(KP951056516), TF, VMUL(LDK(KP587785252), TC))); + TP = VBYI(VFMA(LDK(KP951056516), TC, VMUL(LDK(KP587785252), TF))); + ST(&(x[0]), VADD(TH, TK), ms, &(x[0])); + TO = VADD(TM, TL); + ST(&(x[WS(rs, 4)]), VSUB(TO, TP), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VADD(TP, TO), ms, &(x[0])); + TN = VSUB(TL, TM); + ST(&(x[WS(rs, 2)]), VADD(TG, TN), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VSUB(TN, TG), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 10, XSIMD_STRING("t1buv_10"), twinstr, &GENUS, {45, 24, 6, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_10) (planner *p) { + X(kdft_dit_register) (p, t1buv_10, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:30 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t1buv_2 -include t1bu.h -sign 1 */ + +/* + * This function contains 3 FP additions, 2 FP multiplications, + * (or, 3 additions, 2 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 4 memory accesses + */ +#include "t1bu.h" + +static void t1buv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(2, rs)) { + V T1, T2, T3; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[0]), T2); + ST(&(x[0]), VADD(T1, T3), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VSUB(T1, T3), ms, &(x[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("t1buv_2"), twinstr, &GENUS, {3, 2, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_2) (planner *p) { + X(kdft_dit_register) (p, t1buv_2, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t1buv_2 -include t1bu.h -sign 1 */ + +/* + * This function contains 3 FP additions, 2 FP multiplications, + * (or, 3 additions, 2 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 4 memory accesses + */ +#include "t1bu.h" + +static void t1buv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(2, rs)) { + V T1, T3, T2; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[0]), T2); + ST(&(x[WS(rs, 1)]), VSUB(T1, T3), ms, &(x[WS(rs, 1)])); + ST(&(x[0]), VADD(T1, T3), ms, &(x[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("t1buv_2"), twinstr, &GENUS, {3, 2, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_2) (planner *p) { + X(kdft_dit_register) (p, t1buv_2, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:30 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 3 -name t1buv_3 -include t1bu.h -sign 1 */ + +/* + * This function contains 8 FP additions, 8 FP multiplications, + * (or, 5 additions, 5 multiplications, 3 fused multiply/add), + * 12 stack variables, 2 constants, and 6 memory accesses + */ +#include "t1bu.h" + +static void t1buv_3(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(3, rs)) { + V T1, T2, T4; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V T3, T5, T8, T6, T7; + T3 = BYTW(&(W[0]), T2); + T5 = BYTW(&(W[TWVL * 2]), T4); + T8 = VMUL(LDK(KP866025403), VSUB(T3, T5)); + T6 = VADD(T3, T5); + T7 = VFNMS(LDK(KP500000000), T6, T1); + ST(&(x[0]), VADD(T1, T6), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFNMSI(T8, T7), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(T8, T7), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 3, XSIMD_STRING("t1buv_3"), twinstr, &GENUS, {5, 5, 3, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_3) (planner *p) { + X(kdft_dit_register) (p, t1buv_3, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 3 -name t1buv_3 -include t1bu.h -sign 1 */ + +/* + * This function contains 8 FP additions, 6 FP multiplications, + * (or, 7 additions, 5 multiplications, 1 fused multiply/add), + * 12 stack variables, 2 constants, and 6 memory accesses + */ +#include "t1bu.h" + +static void t1buv_3(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(3, rs)) { + V T6, T2, T4, T7, T1, T3, T5, T8; + T6 = LD(&(x[0]), ms, &(x[0])); + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = BYTW(&(W[0]), T1); + T3 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T4 = BYTW(&(W[TWVL * 2]), T3); + T7 = VADD(T2, T4); + ST(&(x[0]), VADD(T6, T7), ms, &(x[0])); + T5 = VBYI(VMUL(LDK(KP866025403), VSUB(T2, T4))); + T8 = VFNMS(LDK(KP500000000), T7, T6); + ST(&(x[WS(rs, 1)]), VADD(T5, T8), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VSUB(T8, T5), ms, &(x[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 3, XSIMD_STRING("t1buv_3"), twinstr, &GENUS, {7, 5, 1, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_3) (planner *p) { + X(kdft_dit_register) (p, t1buv_3, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:30 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t1buv_4 -include t1bu.h -sign 1 */ + +/* + * This function contains 11 FP additions, 8 FP multiplications, + * (or, 9 additions, 6 multiplications, 2 fused multiply/add), + * 13 stack variables, 0 constants, and 8 memory accesses + */ +#include "t1bu.h" + +static void t1buv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(4, rs)) { + V T1, T7, T2, T5, T8, T3, T6; + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T8 = BYTW(&(W[TWVL * 4]), T7); + T3 = BYTW(&(W[TWVL * 2]), T2); + T6 = BYTW(&(W[0]), T5); + { + V Ta, T4, Tb, T9; + Ta = VADD(T1, T3); + T4 = VSUB(T1, T3); + Tb = VADD(T6, T8); + T9 = VSUB(T6, T8); + ST(&(x[0]), VADD(Ta, Tb), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VSUB(Ta, Tb), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(T9, T4), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(T9, T4), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t1buv_4"), twinstr, &GENUS, {9, 6, 2, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_4) (planner *p) { + X(kdft_dit_register) (p, t1buv_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t1buv_4 -include t1bu.h -sign 1 */ + +/* + * This function contains 11 FP additions, 6 FP multiplications, + * (or, 11 additions, 6 multiplications, 0 fused multiply/add), + * 13 stack variables, 0 constants, and 8 memory accesses + */ +#include "t1bu.h" + +static void t1buv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(4, rs)) { + V T1, T8, T3, T6, T7, T2, T5; + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T8 = BYTW(&(W[TWVL * 4]), T7); + T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T3 = BYTW(&(W[TWVL * 2]), T2); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T6 = BYTW(&(W[0]), T5); + { + V T4, T9, Ta, Tb; + T4 = VSUB(T1, T3); + T9 = VBYI(VSUB(T6, T8)); + ST(&(x[WS(rs, 3)]), VSUB(T4, T9), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T4, T9), ms, &(x[WS(rs, 1)])); + Ta = VADD(T1, T3); + Tb = VADD(T6, T8); + ST(&(x[WS(rs, 2)]), VSUB(Ta, Tb), ms, &(x[0])); + ST(&(x[0]), VADD(Ta, Tb), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t1buv_4"), twinstr, &GENUS, {11, 6, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_4) (planner *p) { + X(kdft_dit_register) (p, t1buv_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:30 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name t1buv_5 -include t1bu.h -sign 1 */ + +/* + * This function contains 20 FP additions, 19 FP multiplications, + * (or, 11 additions, 10 multiplications, 9 fused multiply/add), + * 26 stack variables, 4 constants, and 10 memory accesses + */ +#include "t1bu.h" + +static void t1buv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(5, rs)) { + V T1, T2, T9, T4, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T9 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V T3, Ta, T5, T8; + T3 = BYTW(&(W[0]), T2); + Ta = BYTW(&(W[TWVL * 4]), T9); + T5 = BYTW(&(W[TWVL * 6]), T4); + T8 = BYTW(&(W[TWVL * 2]), T7); + { + V T6, Tg, Tb, Th; + T6 = VADD(T3, T5); + Tg = VSUB(T3, T5); + Tb = VADD(T8, Ta); + Th = VSUB(T8, Ta); + { + V Te, Tc, Tk, Ti, Td, Tj, Tf; + Te = VSUB(T6, Tb); + Tc = VADD(T6, Tb); + Tk = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tg, Th)); + Ti = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Th, Tg)); + Td = VFNMS(LDK(KP250000000), Tc, T1); + ST(&(x[0]), VADD(T1, Tc), ms, &(x[0])); + Tj = VFNMS(LDK(KP559016994), Te, Td); + Tf = VFMA(LDK(KP559016994), Te, Td); + ST(&(x[WS(rs, 2)]), VFNMSI(Tk, Tj), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(Tk, Tj), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFNMSI(Ti, Tf), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(Ti, Tf), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("t1buv_5"), twinstr, &GENUS, {11, 10, 9, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_5) (planner *p) { + X(kdft_dit_register) (p, t1buv_5, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name t1buv_5 -include t1bu.h -sign 1 */ + +/* + * This function contains 20 FP additions, 14 FP multiplications, + * (or, 17 additions, 11 multiplications, 3 fused multiply/add), + * 20 stack variables, 4 constants, and 10 memory accesses + */ +#include "t1bu.h" + +static void t1buv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(5, rs)) { + V Tf, T5, Ta, Tc, Td, Tg; + Tf = LD(&(x[0]), ms, &(x[0])); + { + V T2, T9, T4, T7; + { + V T1, T8, T3, T6; + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = BYTW(&(W[0]), T1); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[TWVL * 4]), T8); + T3 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T4 = BYTW(&(W[TWVL * 6]), T3); + T6 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T7 = BYTW(&(W[TWVL * 2]), T6); + } + T5 = VSUB(T2, T4); + Ta = VSUB(T7, T9); + Tc = VADD(T2, T4); + Td = VADD(T7, T9); + Tg = VADD(Tc, Td); + } + ST(&(x[0]), VADD(Tf, Tg), ms, &(x[0])); + { + V Tb, Tj, Ti, Tk, Te, Th; + Tb = VBYI(VFMA(LDK(KP951056516), T5, VMUL(LDK(KP587785252), Ta))); + Tj = VBYI(VFNMS(LDK(KP951056516), Ta, VMUL(LDK(KP587785252), T5))); + Te = VMUL(LDK(KP559016994), VSUB(Tc, Td)); + Th = VFNMS(LDK(KP250000000), Tg, Tf); + Ti = VADD(Te, Th); + Tk = VSUB(Th, Te); + ST(&(x[WS(rs, 1)]), VADD(Tb, Ti), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VSUB(Tk, Tj), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VSUB(Ti, Tb), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(Tj, Tk), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("t1buv_5"), twinstr, &GENUS, {17, 11, 3, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_5) (planner *p) { + X(kdft_dit_register) (p, t1buv_5, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:31 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 6 -name t1buv_6 -include t1bu.h -sign 1 */ + +/* + * This function contains 23 FP additions, 18 FP multiplications, + * (or, 17 additions, 12 multiplications, 6 fused multiply/add), + * 27 stack variables, 2 constants, and 12 memory accesses + */ +#include "t1bu.h" + +static void t1buv_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 10)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(6, rs)) { + V T1, T2, Ta, Tc, T5, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V T3, Tb, Td, T6, T8; + T3 = BYTW(&(W[TWVL * 4]), T2); + Tb = BYTW(&(W[TWVL * 6]), Ta); + Td = BYTW(&(W[0]), Tc); + T6 = BYTW(&(W[TWVL * 2]), T5); + T8 = BYTW(&(W[TWVL * 8]), T7); + { + V Ti, T4, Tk, Te, Tj, T9; + Ti = VADD(T1, T3); + T4 = VSUB(T1, T3); + Tk = VADD(Tb, Td); + Te = VSUB(Tb, Td); + Tj = VADD(T6, T8); + T9 = VSUB(T6, T8); + { + V Tl, Tn, Tf, Th, Tm, Tg; + Tl = VADD(Tj, Tk); + Tn = VMUL(LDK(KP866025403), VSUB(Tj, Tk)); + Tf = VADD(T9, Te); + Th = VMUL(LDK(KP866025403), VSUB(T9, Te)); + ST(&(x[0]), VADD(Ti, Tl), ms, &(x[0])); + Tm = VFNMS(LDK(KP500000000), Tl, Ti); + ST(&(x[WS(rs, 3)]), VADD(T4, Tf), ms, &(x[WS(rs, 1)])); + Tg = VFNMS(LDK(KP500000000), Tf, T4); + ST(&(x[WS(rs, 4)]), VFMAI(Tn, Tm), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFNMSI(Tn, Tm), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFNMSI(Th, Tg), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(Th, Tg), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 6, XSIMD_STRING("t1buv_6"), twinstr, &GENUS, {17, 12, 6, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_6) (planner *p) { + X(kdft_dit_register) (p, t1buv_6, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 6 -name t1buv_6 -include t1bu.h -sign 1 */ + +/* + * This function contains 23 FP additions, 14 FP multiplications, + * (or, 21 additions, 12 multiplications, 2 fused multiply/add), + * 19 stack variables, 2 constants, and 12 memory accesses + */ +#include "t1bu.h" + +static void t1buv_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 10)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(6, rs)) { + V Tf, Ti, Ta, Tk, T5, Tj, Tc, Te, Td; + Tc = LD(&(x[0]), ms, &(x[0])); + Td = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Te = BYTW(&(W[TWVL * 4]), Td); + Tf = VSUB(Tc, Te); + Ti = VADD(Tc, Te); + { + V T7, T9, T6, T8; + T6 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = BYTW(&(W[TWVL * 6]), T6); + T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[0]), T8); + Ta = VSUB(T7, T9); + Tk = VADD(T7, T9); + } + { + V T2, T4, T1, T3; + T1 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T2 = BYTW(&(W[TWVL * 2]), T1); + T3 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T4 = BYTW(&(W[TWVL * 8]), T3); + T5 = VSUB(T2, T4); + Tj = VADD(T2, T4); + } + { + V Tb, Tg, Th, Tn, Tl, Tm; + Tb = VBYI(VMUL(LDK(KP866025403), VSUB(T5, Ta))); + Tg = VADD(T5, Ta); + Th = VFNMS(LDK(KP500000000), Tg, Tf); + ST(&(x[WS(rs, 1)]), VADD(Tb, Th), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(Tf, Tg), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VSUB(Th, Tb), ms, &(x[WS(rs, 1)])); + Tn = VBYI(VMUL(LDK(KP866025403), VSUB(Tj, Tk))); + Tl = VADD(Tj, Tk); + Tm = VFNMS(LDK(KP500000000), Tl, Ti); + ST(&(x[WS(rs, 2)]), VSUB(Tm, Tn), ms, &(x[0])); + ST(&(x[0]), VADD(Ti, Tl), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(Tn, Tm), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 6, XSIMD_STRING("t1buv_6"), twinstr, &GENUS, {21, 12, 2, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_6) (planner *p) { + X(kdft_dit_register) (p, t1buv_6, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:31 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 7 -name t1buv_7 -include t1bu.h -sign 1 */ + +/* + * This function contains 36 FP additions, 36 FP multiplications, + * (or, 15 additions, 15 multiplications, 21 fused multiply/add), + * 42 stack variables, 6 constants, and 14 memory accesses + */ +#include "t1bu.h" + +static void t1buv_7(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP801937735, +0.801937735804838252472204639014890102331838324); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + DVK(KP692021471, +0.692021471630095869627814897002069140197260599); + DVK(KP554958132, +0.554958132087371191422194871006410481067288862); + DVK(KP356895867, +0.356895867892209443894399510021300583399127187); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 12)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 12), MAKE_VOLATILE_STRIDE(7, rs)) { + V T1, T2, T4, Te, Tc, T9, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Te = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T7 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V T3, T5, Tf, Td, Ta, T8; + T3 = BYTW(&(W[0]), T2); + T5 = BYTW(&(W[TWVL * 10]), T4); + Tf = BYTW(&(W[TWVL * 6]), Te); + Td = BYTW(&(W[TWVL * 4]), Tc); + Ta = BYTW(&(W[TWVL * 8]), T9); + T8 = BYTW(&(W[TWVL * 2]), T7); + { + V T6, Tm, Tg, Tk, Tb, Tl; + T6 = VADD(T3, T5); + Tm = VSUB(T3, T5); + Tg = VADD(Td, Tf); + Tk = VSUB(Td, Tf); + Tb = VADD(T8, Ta); + Tl = VSUB(T8, Ta); + { + V Tp, Tx, Tu, Th, Ts, Tn, Tq, Ty; + Tp = VFNMS(LDK(KP356895867), T6, Tg); + Tx = VFMA(LDK(KP554958132), Tk, Tm); + ST(&(x[0]), VADD(T1, VADD(T6, VADD(Tb, Tg))), ms, &(x[0])); + Tu = VFNMS(LDK(KP356895867), Tb, T6); + Th = VFNMS(LDK(KP356895867), Tg, Tb); + Ts = VFMA(LDK(KP554958132), Tl, Tk); + Tn = VFNMS(LDK(KP554958132), Tm, Tl); + Tq = VFNMS(LDK(KP692021471), Tp, Tb); + Ty = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), Tx, Tl)); + { + V Tv, Ti, Tt, To, Tr, Tw, Tj; + Tv = VFNMS(LDK(KP692021471), Tu, Tg); + Ti = VFNMS(LDK(KP692021471), Th, T6); + Tt = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Ts, Tm)); + To = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tn, Tk)); + Tr = VFNMS(LDK(KP900968867), Tq, T1); + Tw = VFNMS(LDK(KP900968867), Tv, T1); + Tj = VFNMS(LDK(KP900968867), Ti, T1); + ST(&(x[WS(rs, 5)]), VFNMSI(Tt, Tr), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VFMAI(Tt, Tr), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(Ty, Tw), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(Ty, Tw), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFNMSI(To, Tj), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(To, Tj), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 7, XSIMD_STRING("t1buv_7"), twinstr, &GENUS, {15, 15, 21, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_7) (planner *p) { + X(kdft_dit_register) (p, t1buv_7, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 7 -name t1buv_7 -include t1bu.h -sign 1 */ + +/* + * This function contains 36 FP additions, 30 FP multiplications, + * (or, 24 additions, 18 multiplications, 12 fused multiply/add), + * 21 stack variables, 6 constants, and 14 memory accesses + */ +#include "t1bu.h" + +static void t1buv_7(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP222520933, +0.222520933956314404288902564496794759466355569); + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP623489801, +0.623489801858733530525004884004239810632274731); + DVK(KP433883739, +0.433883739117558120475768332848358754609990728); + DVK(KP781831482, +0.781831482468029808708444526674057750232334519); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 12)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 12), MAKE_VOLATILE_STRIDE(7, rs)) { + V Th, Tf, Ti, T5, Tk, Ta, Tj, To, Tp; + Th = LD(&(x[0]), ms, &(x[0])); + { + V Tc, Te, Tb, Td; + Tb = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tc = BYTW(&(W[TWVL * 2]), Tb); + Td = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Te = BYTW(&(W[TWVL * 8]), Td); + Tf = VSUB(Tc, Te); + Ti = VADD(Tc, Te); + } + { + V T2, T4, T1, T3; + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = BYTW(&(W[0]), T1); + T3 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T4 = BYTW(&(W[TWVL * 10]), T3); + T5 = VSUB(T2, T4); + Tk = VADD(T2, T4); + } + { + V T7, T9, T6, T8; + T6 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T7 = BYTW(&(W[TWVL * 4]), T6); + T8 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T9 = BYTW(&(W[TWVL * 6]), T8); + Ta = VSUB(T7, T9); + Tj = VADD(T7, T9); + } + ST(&(x[0]), VADD(Th, VADD(Tk, VADD(Ti, Tj))), ms, &(x[0])); + To = VBYI(VFNMS(LDK(KP781831482), Ta, VFNMS(LDK(KP433883739), Tf, VMUL(LDK(KP974927912), T5)))); + Tp = VFMA(LDK(KP623489801), Tj, VFNMS(LDK(KP900968867), Ti, VFNMS(LDK(KP222520933), Tk, Th))); + ST(&(x[WS(rs, 2)]), VADD(To, Tp), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VSUB(Tp, To), ms, &(x[WS(rs, 1)])); + { + V Tg, Tl, Tm, Tn; + Tg = VBYI(VFMA(LDK(KP433883739), T5, VFNMS(LDK(KP781831482), Tf, VMUL(LDK(KP974927912), Ta)))); + Tl = VFMA(LDK(KP623489801), Ti, VFNMS(LDK(KP222520933), Tj, VFNMS(LDK(KP900968867), Tk, Th))); + ST(&(x[WS(rs, 3)]), VADD(Tg, Tl), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VSUB(Tl, Tg), ms, &(x[0])); + Tm = VBYI(VFMA(LDK(KP781831482), T5, VFMA(LDK(KP974927912), Tf, VMUL(LDK(KP433883739), Ta)))); + Tn = VFMA(LDK(KP623489801), Tk, VFNMS(LDK(KP900968867), Tj, VFNMS(LDK(KP222520933), Ti, Th))); + ST(&(x[WS(rs, 1)]), VADD(Tm, Tn), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VSUB(Tn, Tm), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 7, XSIMD_STRING("t1buv_7"), twinstr, &GENUS, {24, 18, 12, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_7) (planner *p) { + X(kdft_dit_register) (p, t1buv_7, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:31 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t1buv_8 -include t1bu.h -sign 1 */ + +/* + * This function contains 33 FP additions, 24 FP multiplications, + * (or, 23 additions, 14 multiplications, 10 fused multiply/add), + * 36 stack variables, 1 constants, and 16 memory accesses + */ +#include "t1bu.h" + +static void t1buv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 14)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(8, rs)) { + V T1, T2, Th, Tj, T5, T7, Ta, Tc; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Th = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tj = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Ta = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + { + V T3, Ti, Tk, T6, T8, Tb, Td; + T3 = BYTW(&(W[TWVL * 6]), T2); + Ti = BYTW(&(W[TWVL * 2]), Th); + Tk = BYTW(&(W[TWVL * 10]), Tj); + T6 = BYTW(&(W[0]), T5); + T8 = BYTW(&(W[TWVL * 8]), T7); + Tb = BYTW(&(W[TWVL * 12]), Ta); + Td = BYTW(&(W[TWVL * 4]), Tc); + { + V Tq, T4, Tr, Tl, Tt, T9, Tu, Te, Tw, Ts; + Tq = VADD(T1, T3); + T4 = VSUB(T1, T3); + Tr = VADD(Ti, Tk); + Tl = VSUB(Ti, Tk); + Tt = VADD(T6, T8); + T9 = VSUB(T6, T8); + Tu = VADD(Tb, Td); + Te = VSUB(Tb, Td); + Tw = VADD(Tq, Tr); + Ts = VSUB(Tq, Tr); + { + V Tx, Tv, Tm, Tf; + Tx = VADD(Tt, Tu); + Tv = VSUB(Tt, Tu); + Tm = VSUB(T9, Te); + Tf = VADD(T9, Te); + { + V Tp, Tn, To, Tg; + ST(&(x[0]), VADD(Tw, Tx), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VSUB(Tw, Tx), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(Tv, Ts), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(Tv, Ts), ms, &(x[0])); + Tp = VFMA(LDK(KP707106781), Tm, Tl); + Tn = VFNMS(LDK(KP707106781), Tm, Tl); + To = VFMA(LDK(KP707106781), Tf, T4); + Tg = VFNMS(LDK(KP707106781), Tf, T4); + ST(&(x[WS(rs, 1)]), VFMAI(Tp, To), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(Tp, To), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFMAI(Tn, Tg), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(Tn, Tg), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t1buv_8"), twinstr, &GENUS, {23, 14, 10, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_8) (planner *p) { + X(kdft_dit_register) (p, t1buv_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t1buv_8 -include t1bu.h -sign 1 */ + +/* + * This function contains 33 FP additions, 16 FP multiplications, + * (or, 33 additions, 16 multiplications, 0 fused multiply/add), + * 24 stack variables, 1 constants, and 16 memory accesses + */ +#include "t1bu.h" + +static void t1buv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 14)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(8, rs)) { + V Tl, Tq, Tg, Tr, T5, Tt, Ta, Tu, Ti, Tk, Tj; + Ti = LD(&(x[0]), ms, &(x[0])); + Tj = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tk = BYTW(&(W[TWVL * 6]), Tj); + Tl = VSUB(Ti, Tk); + Tq = VADD(Ti, Tk); + { + V Td, Tf, Tc, Te; + Tc = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Td = BYTW(&(W[TWVL * 2]), Tc); + Te = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tf = BYTW(&(W[TWVL * 10]), Te); + Tg = VSUB(Td, Tf); + Tr = VADD(Td, Tf); + } + { + V T2, T4, T1, T3; + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = BYTW(&(W[0]), T1); + T3 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T4 = BYTW(&(W[TWVL * 8]), T3); + T5 = VSUB(T2, T4); + Tt = VADD(T2, T4); + } + { + V T7, T9, T6, T8; + T6 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T7 = BYTW(&(W[TWVL * 12]), T6); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[TWVL * 4]), T8); + Ta = VSUB(T7, T9); + Tu = VADD(T7, T9); + } + { + V Ts, Tv, Tw, Tx; + Ts = VSUB(Tq, Tr); + Tv = VBYI(VSUB(Tt, Tu)); + ST(&(x[WS(rs, 6)]), VSUB(Ts, Tv), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(Ts, Tv), ms, &(x[0])); + Tw = VADD(Tq, Tr); + Tx = VADD(Tt, Tu); + ST(&(x[WS(rs, 4)]), VSUB(Tw, Tx), ms, &(x[0])); + ST(&(x[0]), VADD(Tw, Tx), ms, &(x[0])); + { + V Th, To, Tn, Tp, Tb, Tm; + Tb = VMUL(LDK(KP707106781), VSUB(T5, Ta)); + Th = VBYI(VSUB(Tb, Tg)); + To = VBYI(VADD(Tg, Tb)); + Tm = VMUL(LDK(KP707106781), VADD(T5, Ta)); + Tn = VSUB(Tl, Tm); + Tp = VADD(Tl, Tm); + ST(&(x[WS(rs, 3)]), VADD(Th, Tn), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VSUB(Tp, To), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VSUB(Tn, Th), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(To, Tp), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t1buv_8"), twinstr, &GENUS, {33, 16, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_8) (planner *p) { + X(kdft_dit_register) (p, t1buv_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1buv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:32 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 9 -name t1buv_9 -include t1bu.h -sign 1 */ + +/* + * This function contains 54 FP additions, 54 FP multiplications, + * (or, 20 additions, 20 multiplications, 34 fused multiply/add), + * 67 stack variables, 19 constants, and 18 memory accesses + */ +#include "t1bu.h" + +static void t1buv_9(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP939692620, +0.939692620785908384054109277324731469936208134); + DVK(KP907603734, +0.907603734547952313649323976213898122064543220); + DVK(KP666666666, +0.666666666666666666666666666666666666666666667); + DVK(KP852868531, +0.852868531952443209628250963940074071936020296); + DVK(KP879385241, +0.879385241571816768108218554649462939872416269); + DVK(KP984807753, +0.984807753012208059366743024589523013670643252); + DVK(KP826351822, +0.826351822333069651148283373230685203999624323); + DVK(KP347296355, +0.347296355333860697703433253538629592000751354); + DVK(KP898197570, +0.898197570222573798468955502359086394667167570); + DVK(KP673648177, +0.673648177666930348851716626769314796000375677); + DVK(KP420276625, +0.420276625461206169731530603237061658838781920); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP586256827, +0.586256827714544512072145703099641959914944179); + DVK(KP968908795, +0.968908795874236621082202410917456709164223497); + DVK(KP726681596, +0.726681596905677465811651808188092531873167623); + DVK(KP439692620, +0.439692620785908384054109277324731469936208134); + DVK(KP203604859, +0.203604859554852403062088995281827210665664861); + DVK(KP152703644, +0.152703644666139302296566746461370407999248646); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 16)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 16), MAKE_VOLATILE_STRIDE(9, rs)) { + V T1, T3, T5, T9, Tn, Tb, Td, Th, Tj, Tx, T6; + T1 = LD(&(x[0]), ms, &(x[0])); + { + V T2, T4, T8, Tm; + T2 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T8 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tm = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + { + V Ta, Tc, Tg, Ti; + Ta = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tc = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Tg = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Ti = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[TWVL * 4]), T2); + T5 = BYTW(&(W[TWVL * 10]), T4); + T9 = BYTW(&(W[TWVL * 2]), T8); + Tn = BYTW(&(W[0]), Tm); + Tb = BYTW(&(W[TWVL * 8]), Ta); + Td = BYTW(&(W[TWVL * 14]), Tc); + Th = BYTW(&(W[TWVL * 6]), Tg); + Tj = BYTW(&(W[TWVL * 12]), Ti); + } + } + Tx = VSUB(T3, T5); + T6 = VADD(T3, T5); + { + V Tl, Te, Tk, To, T7, TN; + Tl = VSUB(Td, Tb); + Te = VADD(Tb, Td); + Tk = VSUB(Th, Tj); + To = VADD(Th, Tj); + T7 = VFNMS(LDK(KP500000000), T6, T1); + TN = VADD(T1, T6); + { + V Tf, TP, Tp, TO; + Tf = VFNMS(LDK(KP500000000), Te, T9); + TP = VADD(T9, Te); + Tp = VFNMS(LDK(KP500000000), To, Tn); + TO = VADD(Tn, To); + { + V Tz, TC, Tu, TD, TA, Tq, TQ, TS; + Tz = VFNMS(LDK(KP152703644), Tl, Tf); + TC = VFMA(LDK(KP203604859), Tf, Tl); + Tu = VFNMS(LDK(KP439692620), Tk, Tf); + TD = VFNMS(LDK(KP726681596), Tk, Tp); + TA = VFMA(LDK(KP968908795), Tp, Tk); + Tq = VFNMS(LDK(KP586256827), Tp, Tl); + TQ = VADD(TO, TP); + TS = VMUL(LDK(KP866025403), VSUB(TO, TP)); + { + V TI, TB, TH, TE, Tr, TR, Tw, Tv; + Tv = VFNMS(LDK(KP420276625), Tu, Tl); + TI = VFMA(LDK(KP673648177), TA, Tz); + TB = VFNMS(LDK(KP673648177), TA, Tz); + TH = VFNMS(LDK(KP898197570), TD, TC); + TE = VFMA(LDK(KP898197570), TD, TC); + Tr = VFNMS(LDK(KP347296355), Tq, Tk); + ST(&(x[0]), VADD(TQ, TN), ms, &(x[0])); + TR = VFNMS(LDK(KP500000000), TQ, TN); + Tw = VFNMS(LDK(KP826351822), Tv, Tp); + { + V TM, TL, TF, TJ, Ts, Ty, TG, TK, Tt; + TM = VMUL(LDK(KP984807753), VFMA(LDK(KP879385241), Tx, TI)); + TL = VFMA(LDK(KP852868531), TE, T7); + TF = VFNMS(LDK(KP500000000), TE, TB); + TJ = VFMA(LDK(KP666666666), TI, TH); + Ts = VFNMS(LDK(KP907603734), Tr, Tf); + ST(&(x[WS(rs, 6)]), VFNMSI(TS, TR), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(TS, TR), ms, &(x[WS(rs, 1)])); + Ty = VMUL(LDK(KP984807753), VFNMS(LDK(KP879385241), Tx, Tw)); + ST(&(x[WS(rs, 8)]), VFNMSI(TM, TL), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(TM, TL), ms, &(x[WS(rs, 1)])); + TG = VFMA(LDK(KP852868531), TF, T7); + TK = VMUL(LDK(KP866025403), VFNMS(LDK(KP852868531), TJ, Tx)); + Tt = VFNMS(LDK(KP939692620), Ts, T7); + ST(&(x[WS(rs, 5)]), VFNMSI(TK, TG), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFMAI(TK, TG), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(Ty, Tt), ms, &(x[0])); + ST(&(x[WS(rs, 7)]), VFNMSI(Ty, Tt), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 9, XSIMD_STRING("t1buv_9"), twinstr, &GENUS, {20, 20, 34, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_9) (planner *p) { + X(kdft_dit_register) (p, t1buv_9, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 9 -name t1buv_9 -include t1bu.h -sign 1 */ + +/* + * This function contains 54 FP additions, 42 FP multiplications, + * (or, 38 additions, 26 multiplications, 16 fused multiply/add), + * 38 stack variables, 14 constants, and 18 memory accesses + */ +#include "t1bu.h" + +static void t1buv_9(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP939692620, +0.939692620785908384054109277324731469936208134); + DVK(KP296198132, +0.296198132726023843175338011893050938967728390); + DVK(KP852868531, +0.852868531952443209628250963940074071936020296); + DVK(KP173648177, +0.173648177666930348851716626769314796000375677); + DVK(KP556670399, +0.556670399226419366452912952047023132968291906); + DVK(KP766044443, +0.766044443118978035202392650555416673935832457); + DVK(KP642787609, +0.642787609686539326322643409907263432907559884); + DVK(KP663413948, +0.663413948168938396205421319635891297216863310); + DVK(KP150383733, +0.150383733180435296639271897612501926072238258); + DVK(KP342020143, +0.342020143325668733044099614682259580763083368); + DVK(KP813797681, +0.813797681349373692844693217248393223289101568); + DVK(KP984807753, +0.984807753012208059366743024589523013670643252); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 16)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 16), MAKE_VOLATILE_STRIDE(9, rs)) { + V T1, T6, Tu, Tg, Tf, TD, Tq, Tp, TE; + T1 = LD(&(x[0]), ms, &(x[0])); + { + V T3, T5, T2, T4; + T2 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[TWVL * 4]), T2); + T4 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T5 = BYTW(&(W[TWVL * 10]), T4); + T6 = VADD(T3, T5); + Tu = VMUL(LDK(KP866025403), VSUB(T3, T5)); + } + { + V T9, Td, Tb, T8, Tc, Ta, Te; + T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[0]), T8); + Tc = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Td = BYTW(&(W[TWVL * 12]), Tc); + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tb = BYTW(&(W[TWVL * 6]), Ta); + Tg = VSUB(Tb, Td); + Te = VADD(Tb, Td); + Tf = VFNMS(LDK(KP500000000), Te, T9); + TD = VADD(T9, Te); + } + { + V Tj, Tn, Tl, Ti, Tm, Tk, To; + Ti = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tj = BYTW(&(W[TWVL * 2]), Ti); + Tm = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Tn = BYTW(&(W[TWVL * 14]), Tm); + Tk = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tl = BYTW(&(W[TWVL * 8]), Tk); + Tq = VSUB(Tl, Tn); + To = VADD(Tl, Tn); + Tp = VFNMS(LDK(KP500000000), To, Tj); + TE = VADD(Tj, To); + } + { + V TF, TG, TH, TI; + TF = VBYI(VMUL(LDK(KP866025403), VSUB(TD, TE))); + TG = VADD(T1, T6); + TH = VADD(TD, TE); + TI = VFNMS(LDK(KP500000000), TH, TG); + ST(&(x[WS(rs, 3)]), VADD(TF, TI), ms, &(x[WS(rs, 1)])); + ST(&(x[0]), VADD(TG, TH), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VSUB(TI, TF), ms, &(x[0])); + } + { + V TC, Tv, Tw, Tx, Th, Tr, Ts, T7, TB; + TC = VBYI(VSUB(VFMA(LDK(KP984807753), Tf, VFMA(LDK(KP813797681), Tq, VFNMS(LDK(KP150383733), Tg, VMUL(LDK(KP342020143), Tp)))), Tu)); + Tv = VFMA(LDK(KP663413948), Tg, VMUL(LDK(KP642787609), Tf)); + Tw = VFMA(LDK(KP150383733), Tq, VMUL(LDK(KP984807753), Tp)); + Tx = VADD(Tv, Tw); + Th = VFNMS(LDK(KP556670399), Tg, VMUL(LDK(KP766044443), Tf)); + Tr = VFNMS(LDK(KP852868531), Tq, VMUL(LDK(KP173648177), Tp)); + Ts = VADD(Th, Tr); + T7 = VFNMS(LDK(KP500000000), T6, T1); + TB = VFMA(LDK(KP852868531), Tg, VFMA(LDK(KP173648177), Tf, VFMA(LDK(KP296198132), Tq, VFNMS(LDK(KP939692620), Tp, T7)))); + ST(&(x[WS(rs, 7)]), VSUB(TB, TC), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VADD(TB, TC), ms, &(x[0])); + { + V Tt, Ty, Tz, TA; + Tt = VADD(T7, Ts); + Ty = VBYI(VADD(Tu, Tx)); + ST(&(x[WS(rs, 8)]), VSUB(Tt, Ty), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VADD(Tt, Ty), ms, &(x[WS(rs, 1)])); + Tz = VBYI(VADD(Tu, VFNMS(LDK(KP500000000), Tx, VMUL(LDK(KP866025403), VSUB(Th, Tr))))); + TA = VFMA(LDK(KP866025403), VSUB(Tw, Tv), VFNMS(LDK(KP500000000), Ts, T7)); + ST(&(x[WS(rs, 4)]), VADD(Tz, TA), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VSUB(TA, Tz), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 9, XSIMD_STRING("t1buv_9"), twinstr, &GENUS, {38, 26, 16, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1buv_9) (planner *p) { + X(kdft_dit_register) (p, t1buv_9, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:33 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name t1bv_10 -include t1b.h -sign 1 */ + +/* + * This function contains 51 FP additions, 40 FP multiplications, + * (or, 33 additions, 22 multiplications, 18 fused multiply/add), + * 43 stack variables, 4 constants, and 20 memory accesses + */ +#include "t1b.h" + +static void t1bv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 18)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(10, rs)) { + V Td, TA, T4, Ta, Tk, TE, Tp, TF, TB, T9, T1, T2, Tb; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V Tg, Tn, Ti, Tl; + Tg = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tn = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Ti = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tl = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + { + V T6, T8, T5, Tc; + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + { + V T3, Th, To, Tj, Tm, T7; + T7 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[TWVL * 8]), T2); + Th = BYTW(&(W[TWVL * 6]), Tg); + To = BYTW(&(W[0]), Tn); + Tj = BYTW(&(W[TWVL * 16]), Ti); + Tm = BYTW(&(W[TWVL * 10]), Tl); + T6 = BYTW(&(W[TWVL * 2]), T5); + Td = BYTW(&(W[TWVL * 4]), Tc); + T8 = BYTW(&(W[TWVL * 12]), T7); + TA = VADD(T1, T3); + T4 = VSUB(T1, T3); + Ta = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Tk = VSUB(Th, Tj); + TE = VADD(Th, Tj); + Tp = VSUB(Tm, To); + TF = VADD(Tm, To); + } + TB = VADD(T6, T8); + T9 = VSUB(T6, T8); + } + } + Tb = BYTW(&(W[TWVL * 14]), Ta); + { + V TL, TG, Tw, Tq, TC, Te; + TL = VSUB(TE, TF); + TG = VADD(TE, TF); + Tw = VSUB(Tk, Tp); + Tq = VADD(Tk, Tp); + TC = VADD(Tb, Td); + Te = VSUB(Tb, Td); + { + V TM, TD, Tv, Tf; + TM = VSUB(TB, TC); + TD = VADD(TB, TC); + Tv = VSUB(T9, Te); + Tf = VADD(T9, Te); + { + V TP, TN, TH, TJ, Tz, Tx, Tr, Tt, TI, Ts; + TP = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TL, TM)); + TN = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TM, TL)); + TH = VADD(TD, TG); + TJ = VSUB(TD, TG); + Tz = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tv, Tw)); + Tx = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tw, Tv)); + Tr = VADD(Tf, Tq); + Tt = VSUB(Tf, Tq); + ST(&(x[0]), VADD(TA, TH), ms, &(x[0])); + TI = VFNMS(LDK(KP250000000), TH, TA); + ST(&(x[WS(rs, 5)]), VADD(T4, Tr), ms, &(x[WS(rs, 1)])); + Ts = VFNMS(LDK(KP250000000), Tr, T4); + { + V TK, TO, Tu, Ty; + TK = VFNMS(LDK(KP559016994), TJ, TI); + TO = VFMA(LDK(KP559016994), TJ, TI); + Tu = VFMA(LDK(KP559016994), Tt, Ts); + Ty = VFNMS(LDK(KP559016994), Tt, Ts); + ST(&(x[WS(rs, 8)]), VFMAI(TN, TK), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFNMSI(TN, TK), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFMAI(TP, TO), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFNMSI(TP, TO), ms, &(x[0])); + ST(&(x[WS(rs, 9)]), VFNMSI(Tx, Tu), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(Tx, Tu), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(Tz, Ty), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFMAI(Tz, Ty), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 10, XSIMD_STRING("t1bv_10"), twinstr, &GENUS, {33, 22, 18, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_10) (planner *p) { + X(kdft_dit_register) (p, t1bv_10, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name t1bv_10 -include t1b.h -sign 1 */ + +/* + * This function contains 51 FP additions, 30 FP multiplications, + * (or, 45 additions, 24 multiplications, 6 fused multiply/add), + * 32 stack variables, 4 constants, and 20 memory accesses + */ +#include "t1b.h" + +static void t1bv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 18)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(10, rs)) { + V Tu, TH, Tg, Tl, Tp, TD, TE, TJ, T5, Ta, To, TA, TB, TI, Tr; + V Tt, Ts; + Tr = LD(&(x[0]), ms, &(x[0])); + Ts = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tt = BYTW(&(W[TWVL * 8]), Ts); + Tu = VSUB(Tr, Tt); + TH = VADD(Tr, Tt); + { + V Td, Tk, Tf, Ti; + { + V Tc, Tj, Te, Th; + Tc = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Td = BYTW(&(W[TWVL * 6]), Tc); + Tj = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Tk = BYTW(&(W[0]), Tj); + Te = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tf = BYTW(&(W[TWVL * 16]), Te); + Th = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Ti = BYTW(&(W[TWVL * 10]), Th); + } + Tg = VSUB(Td, Tf); + Tl = VSUB(Ti, Tk); + Tp = VADD(Tg, Tl); + TD = VADD(Td, Tf); + TE = VADD(Ti, Tk); + TJ = VADD(TD, TE); + } + { + V T2, T9, T4, T7; + { + V T1, T8, T3, T6; + T1 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T2 = BYTW(&(W[TWVL * 2]), T1); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[TWVL * 4]), T8); + T3 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T4 = BYTW(&(W[TWVL * 12]), T3); + T6 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T7 = BYTW(&(W[TWVL * 14]), T6); + } + T5 = VSUB(T2, T4); + Ta = VSUB(T7, T9); + To = VADD(T5, Ta); + TA = VADD(T2, T4); + TB = VADD(T7, T9); + TI = VADD(TA, TB); + } + { + V Tq, Tv, Tw, Tn, Tz, Tb, Tm, Ty, Tx; + Tq = VMUL(LDK(KP559016994), VSUB(To, Tp)); + Tv = VADD(To, Tp); + Tw = VFNMS(LDK(KP250000000), Tv, Tu); + Tb = VSUB(T5, Ta); + Tm = VSUB(Tg, Tl); + Tn = VBYI(VFMA(LDK(KP951056516), Tb, VMUL(LDK(KP587785252), Tm))); + Tz = VBYI(VFNMS(LDK(KP951056516), Tm, VMUL(LDK(KP587785252), Tb))); + ST(&(x[WS(rs, 5)]), VADD(Tu, Tv), ms, &(x[WS(rs, 1)])); + Ty = VSUB(Tw, Tq); + ST(&(x[WS(rs, 3)]), VSUB(Ty, Tz), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(Tz, Ty), ms, &(x[WS(rs, 1)])); + Tx = VADD(Tq, Tw); + ST(&(x[WS(rs, 1)]), VADD(Tn, Tx), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VSUB(Tx, Tn), ms, &(x[WS(rs, 1)])); + } + { + V TM, TK, TL, TG, TP, TC, TF, TO, TN; + TM = VMUL(LDK(KP559016994), VSUB(TI, TJ)); + TK = VADD(TI, TJ); + TL = VFNMS(LDK(KP250000000), TK, TH); + TC = VSUB(TA, TB); + TF = VSUB(TD, TE); + TG = VBYI(VFNMS(LDK(KP951056516), TF, VMUL(LDK(KP587785252), TC))); + TP = VBYI(VFMA(LDK(KP951056516), TC, VMUL(LDK(KP587785252), TF))); + ST(&(x[0]), VADD(TH, TK), ms, &(x[0])); + TO = VADD(TM, TL); + ST(&(x[WS(rs, 4)]), VSUB(TO, TP), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VADD(TP, TO), ms, &(x[0])); + TN = VSUB(TL, TM); + ST(&(x[WS(rs, 2)]), VADD(TG, TN), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VSUB(TN, TG), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 10, XSIMD_STRING("t1bv_10"), twinstr, &GENUS, {45, 24, 6, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_10) (planner *p) { + X(kdft_dit_register) (p, t1bv_10, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,315 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:34 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 12 -name t1bv_12 -include t1b.h -sign 1 */ + +/* + * This function contains 59 FP additions, 42 FP multiplications, + * (or, 41 additions, 24 multiplications, 18 fused multiply/add), + * 41 stack variables, 2 constants, and 24 memory accesses + */ +#include "t1b.h" + +static void t1bv_12(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 22)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 22), MAKE_VOLATILE_STRIDE(12, rs)) { + V TI, Ti, TA, T7, Tm, TE, Tw, Tk, Tf, TB, TU, TM; + { + V T9, TK, Tj, TL, Te; + { + V T1, T4, T2, Tp, Tt, Tr; + T1 = LD(&(x[0]), ms, &(x[0])); + T4 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tp = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tt = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tr = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + { + V T5, T3, Tq, Tu, Ts, Td, Tb, T8, Tc, Ta; + T8 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Ta = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T5 = BYTW(&(W[TWVL * 14]), T4); + T3 = BYTW(&(W[TWVL * 6]), T2); + Tq = BYTW(&(W[TWVL * 16]), Tp); + Tu = BYTW(&(W[TWVL * 8]), Tt); + Ts = BYTW(&(W[0]), Tr); + T9 = BYTW(&(W[TWVL * 10]), T8); + Td = BYTW(&(W[TWVL * 2]), Tc); + Tb = BYTW(&(W[TWVL * 18]), Ta); + { + V Th, T6, Tl, Tv; + Th = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + TK = VSUB(T3, T5); + T6 = VADD(T3, T5); + Tl = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tv = VADD(Ts, Tu); + TI = VSUB(Tu, Ts); + Tj = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + TL = VSUB(Tb, Td); + Te = VADD(Tb, Td); + Ti = BYTW(&(W[TWVL * 4]), Th); + TA = VFNMS(LDK(KP500000000), T6, T1); + T7 = VADD(T1, T6); + Tm = BYTW(&(W[TWVL * 20]), Tl); + TE = VFNMS(LDK(KP500000000), Tv, Tq); + Tw = VADD(Tq, Tv); + } + } + } + Tk = BYTW(&(W[TWVL * 12]), Tj); + Tf = VADD(T9, Te); + TB = VFNMS(LDK(KP500000000), Te, T9); + TU = VSUB(TK, TL); + TM = VADD(TK, TL); + } + { + V Tn, TH, TC, TQ, Ty, Tg; + Tn = VADD(Tk, Tm); + TH = VSUB(Tk, Tm); + TC = VADD(TA, TB); + TQ = VSUB(TA, TB); + Ty = VADD(T7, Tf); + Tg = VSUB(T7, Tf); + { + V To, TD, TJ, TR; + To = VADD(Ti, Tn); + TD = VFNMS(LDK(KP500000000), Tn, Ti); + TJ = VSUB(TH, TI); + TR = VADD(TH, TI); + { + V TP, TN, TW, TS, TO, TG, TX, TV; + { + V Tz, Tx, TF, TT; + Tz = VADD(To, Tw); + Tx = VSUB(To, Tw); + TF = VADD(TD, TE); + TT = VSUB(TD, TE); + TP = VMUL(LDK(KP866025403), VADD(TM, TJ)); + TN = VMUL(LDK(KP866025403), VSUB(TJ, TM)); + TW = VFMA(LDK(KP866025403), TR, TQ); + TS = VFNMS(LDK(KP866025403), TR, TQ); + ST(&(x[WS(rs, 6)]), VSUB(Ty, Tz), ms, &(x[0])); + ST(&(x[0]), VADD(Ty, Tz), ms, &(x[0])); + ST(&(x[WS(rs, 9)]), VFMAI(Tx, Tg), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(Tx, Tg), ms, &(x[WS(rs, 1)])); + TO = VADD(TC, TF); + TG = VSUB(TC, TF); + TX = VFNMS(LDK(KP866025403), TU, TT); + TV = VFMA(LDK(KP866025403), TU, TT); + } + ST(&(x[WS(rs, 8)]), VFNMSI(TP, TO), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(TP, TO), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(TN, TG), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFNMSI(TN, TG), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFMAI(TX, TW), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(TX, TW), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFNMSI(TV, TS), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(TV, TS), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 12, XSIMD_STRING("t1bv_12"), twinstr, &GENUS, {41, 24, 18, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_12) (planner *p) { + X(kdft_dit_register) (p, t1bv_12, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 12 -name t1bv_12 -include t1b.h -sign 1 */ + +/* + * This function contains 59 FP additions, 30 FP multiplications, + * (or, 55 additions, 26 multiplications, 4 fused multiply/add), + * 28 stack variables, 2 constants, and 24 memory accesses + */ +#include "t1b.h" + +static void t1bv_12(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 22)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 22), MAKE_VOLATILE_STRIDE(12, rs)) { + V T1, Tt, T6, T7, TB, Tq, TC, TD, T9, Tu, Te, Tf, Tx, Tl, Ty; + V Tz; + { + V T5, T3, T4, T2; + T1 = LD(&(x[0]), ms, &(x[0])); + T4 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T5 = BYTW(&(W[TWVL * 14]), T4); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T3 = BYTW(&(W[TWVL * 6]), T2); + Tt = VSUB(T3, T5); + T6 = VADD(T3, T5); + T7 = VFNMS(LDK(KP500000000), T6, T1); + } + { + V Tn, Tp, Tm, TA, To; + Tm = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Tn = BYTW(&(W[0]), Tm); + TA = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + TB = BYTW(&(W[TWVL * 16]), TA); + To = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tp = BYTW(&(W[TWVL * 8]), To); + Tq = VSUB(Tn, Tp); + TC = VADD(Tn, Tp); + TD = VFNMS(LDK(KP500000000), TC, TB); + } + { + V Td, Tb, T8, Tc, Ta; + T8 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T9 = BYTW(&(W[TWVL * 10]), T8); + Tc = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Td = BYTW(&(W[TWVL * 2]), Tc); + Ta = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + Tb = BYTW(&(W[TWVL * 18]), Ta); + Tu = VSUB(Tb, Td); + Te = VADD(Tb, Td); + Tf = VFNMS(LDK(KP500000000), Te, T9); + } + { + V Ti, Tk, Th, Tw, Tj; + Th = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Ti = BYTW(&(W[TWVL * 12]), Th); + Tw = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Tx = BYTW(&(W[TWVL * 4]), Tw); + Tj = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tk = BYTW(&(W[TWVL * 20]), Tj); + Tl = VSUB(Ti, Tk); + Ty = VADD(Ti, Tk); + Tz = VFNMS(LDK(KP500000000), Ty, Tx); + } + { + V Ts, TG, TF, TH; + { + V Tg, Tr, Tv, TE; + Tg = VSUB(T7, Tf); + Tr = VMUL(LDK(KP866025403), VSUB(Tl, Tq)); + Ts = VSUB(Tg, Tr); + TG = VADD(Tg, Tr); + Tv = VMUL(LDK(KP866025403), VSUB(Tt, Tu)); + TE = VSUB(Tz, TD); + TF = VBYI(VADD(Tv, TE)); + TH = VBYI(VSUB(TE, Tv)); + } + ST(&(x[WS(rs, 11)]), VSUB(Ts, TF), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VADD(TG, TH), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(Ts, TF), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VSUB(TG, TH), ms, &(x[WS(rs, 1)])); + } + { + V TS, TW, TV, TX; + { + V TQ, TR, TT, TU; + TQ = VADD(T1, T6); + TR = VADD(T9, Te); + TS = VSUB(TQ, TR); + TW = VADD(TQ, TR); + TT = VADD(Tx, Ty); + TU = VADD(TB, TC); + TV = VBYI(VSUB(TT, TU)); + TX = VADD(TT, TU); + } + ST(&(x[WS(rs, 3)]), VSUB(TS, TV), ms, &(x[WS(rs, 1)])); + ST(&(x[0]), VADD(TW, TX), ms, &(x[0])); + ST(&(x[WS(rs, 9)]), VADD(TS, TV), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VSUB(TW, TX), ms, &(x[0])); + } + { + V TK, TO, TN, TP; + { + V TI, TJ, TL, TM; + TI = VADD(Tl, Tq); + TJ = VADD(Tt, Tu); + TK = VBYI(VMUL(LDK(KP866025403), VSUB(TI, TJ))); + TO = VBYI(VMUL(LDK(KP866025403), VADD(TJ, TI))); + TL = VADD(T7, Tf); + TM = VADD(Tz, TD); + TN = VSUB(TL, TM); + TP = VADD(TL, TM); + } + ST(&(x[WS(rs, 2)]), VADD(TK, TN), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VSUB(TP, TO), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VSUB(TN, TK), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(TO, TP), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 12, XSIMD_STRING("t1bv_12"), twinstr, &GENUS, {55, 26, 4, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_12) (planner *p) { + X(kdft_dit_register) (p, t1bv_12, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:34 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 15 -name t1bv_15 -include t1b.h -sign 1 */ + +/* + * This function contains 92 FP additions, 77 FP multiplications, + * (or, 50 additions, 35 multiplications, 42 fused multiply/add), + * 81 stack variables, 8 constants, and 30 memory accesses + */ +#include "t1b.h" + +static void t1bv_15(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP823639103, +0.823639103546331925877420039278190003029660514); + DVK(KP910592997, +0.910592997310029334643087372129977886038870291); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 28)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 28), MAKE_VOLATILE_STRIDE(15, rs)) { + V Tq, Ty, Th, TV, TK, Ts, T1f, T7, Tu, TA, TC, Tj, Tk, T1g, Tf; + { + V T1, T4, T2, T9, Te; + T1 = LD(&(x[0]), ms, &(x[0])); + T4 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V T8, Tp, Tx, Tg; + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Tp = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tx = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tg = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + { + V Tb, Td, Tr, T6, Tt, Tz, TB, Ti; + { + V T5, T3, Ta, Tc; + Ta = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T5 = BYTW(&(W[TWVL * 18]), T4); + T3 = BYTW(&(W[TWVL * 8]), T2); + T9 = BYTW(&(W[TWVL * 4]), T8); + Tq = BYTW(&(W[TWVL * 10]), Tp); + Ty = BYTW(&(W[TWVL * 16]), Tx); + Th = BYTW(&(W[TWVL * 22]), Tg); + Tb = BYTW(&(W[TWVL * 14]), Ta); + Td = BYTW(&(W[TWVL * 24]), Tc); + Tr = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + TV = VSUB(T3, T5); + T6 = VADD(T3, T5); + Tt = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + } + Tz = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TB = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Ti = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Te = VADD(Tb, Td); + TK = VSUB(Tb, Td); + Ts = BYTW(&(W[TWVL * 20]), Tr); + T1f = VADD(T1, T6); + T7 = VFNMS(LDK(KP500000000), T6, T1); + Tu = BYTW(&(W[0]), Tt); + TA = BYTW(&(W[TWVL * 26]), Tz); + TC = BYTW(&(W[TWVL * 6]), TB); + Tj = BYTW(&(W[TWVL * 2]), Ti); + Tk = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + } + } + T1g = VADD(T9, Te); + Tf = VFNMS(LDK(KP500000000), Te, T9); + } + { + V Tv, TN, TD, TO, Tl; + Tv = VADD(Ts, Tu); + TN = VSUB(Ts, Tu); + TD = VADD(TA, TC); + TO = VSUB(TA, TC); + Tl = BYTW(&(W[TWVL * 12]), Tk); + { + V Tw, T1j, TX, TP, TE, T1k, TL, Tm; + Tw = VFNMS(LDK(KP500000000), Tv, Tq); + T1j = VADD(Tq, Tv); + TX = VADD(TN, TO); + TP = VSUB(TN, TO); + TE = VFNMS(LDK(KP500000000), TD, Ty); + T1k = VADD(Ty, TD); + TL = VSUB(Tj, Tl); + Tm = VADD(Tj, Tl); + { + V TT, TF, T1q, T1l, TW, TM, T1h, Tn; + TT = VSUB(Tw, TE); + TF = VADD(Tw, TE); + T1q = VSUB(T1j, T1k); + T1l = VADD(T1j, T1k); + TW = VADD(TK, TL); + TM = VSUB(TK, TL); + T1h = VADD(Th, Tm); + Tn = VFNMS(LDK(KP500000000), Tm, Th); + { + V T10, TY, T16, TQ, T1r, T1i, TS, To, TZ, T1e; + T10 = VSUB(TW, TX); + TY = VADD(TW, TX); + T16 = VFNMS(LDK(KP618033988), TM, TP); + TQ = VFMA(LDK(KP618033988), TP, TM); + T1r = VSUB(T1g, T1h); + T1i = VADD(T1g, T1h); + TS = VSUB(Tf, Tn); + To = VADD(Tf, Tn); + TZ = VFNMS(LDK(KP250000000), TY, TV); + T1e = VMUL(LDK(KP866025403), VADD(TV, TY)); + { + V T1u, T1s, T1o, T18, TU, TG, TI, T19, T11, T1n, T1m; + T1u = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1q, T1r)); + T1s = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1r, T1q)); + T1m = VADD(T1i, T1l); + T1o = VSUB(T1i, T1l); + T18 = VFNMS(LDK(KP618033988), TS, TT); + TU = VFMA(LDK(KP618033988), TT, TS); + TG = VADD(To, TF); + TI = VSUB(To, TF); + T19 = VFNMS(LDK(KP559016994), T10, TZ); + T11 = VFMA(LDK(KP559016994), T10, TZ); + ST(&(x[0]), VADD(T1f, T1m), ms, &(x[0])); + T1n = VFNMS(LDK(KP250000000), T1m, T1f); + { + V T1a, T1c, T14, T12, T1p, T1t, T15, TJ, T1d, TH; + T1d = VADD(T7, TG); + TH = VFNMS(LDK(KP250000000), TG, T7); + T1a = VMUL(LDK(KP951056516), VFMA(LDK(KP910592997), T19, T18)); + T1c = VMUL(LDK(KP951056516), VFNMS(LDK(KP910592997), T19, T18)); + T14 = VMUL(LDK(KP951056516), VFNMS(LDK(KP910592997), T11, TU)); + T12 = VMUL(LDK(KP951056516), VFMA(LDK(KP910592997), T11, TU)); + T1p = VFNMS(LDK(KP559016994), T1o, T1n); + T1t = VFMA(LDK(KP559016994), T1o, T1n); + ST(&(x[WS(rs, 10)]), VFMAI(T1e, T1d), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFNMSI(T1e, T1d), ms, &(x[WS(rs, 1)])); + T15 = VFNMS(LDK(KP559016994), TI, TH); + TJ = VFMA(LDK(KP559016994), TI, TH); + { + V T17, T1b, T13, TR; + ST(&(x[WS(rs, 12)]), VFNMSI(T1s, T1p), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(T1s, T1p), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFNMSI(T1u, T1t), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VFMAI(T1u, T1t), ms, &(x[0])); + T17 = VFNMS(LDK(KP823639103), T16, T15); + T1b = VFMA(LDK(KP823639103), T16, T15); + T13 = VFMA(LDK(KP823639103), TQ, TJ); + TR = VFNMS(LDK(KP823639103), TQ, TJ); + ST(&(x[WS(rs, 13)]), VFMAI(T1a, T17), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VFNMSI(T1a, T17), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFMAI(T1c, T1b), ms, &(x[0])); + ST(&(x[WS(rs, 7)]), VFNMSI(T1c, T1b), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFMAI(T14, T13), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFNMSI(T14, T13), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFNMSI(T12, TR), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(T12, TR), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 15, XSIMD_STRING("t1bv_15"), twinstr, &GENUS, {50, 35, 42, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_15) (planner *p) { + X(kdft_dit_register) (p, t1bv_15, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 15 -name t1bv_15 -include t1b.h -sign 1 */ + +/* + * This function contains 92 FP additions, 53 FP multiplications, + * (or, 78 additions, 39 multiplications, 14 fused multiply/add), + * 52 stack variables, 10 constants, and 30 memory accesses + */ +#include "t1b.h" + +static void t1bv_15(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP216506350, +0.216506350946109661690930792688234045867850657); + DVK(KP484122918, +0.484122918275927110647408174972799951354115213); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP509036960, +0.509036960455127183450980863393907648510733164); + DVK(KP823639103, +0.823639103546331925877420039278190003029660514); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 28)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 28), MAKE_VOLATILE_STRIDE(15, rs)) { + V Ts, TV, T1f, TZ, T10, Tb, Tm, Tt, T1j, T1k, T1l, TI, TM, TR, Tz; + V TD, TQ, T1g, T1h, T1i; + { + V TT, Tr, Tp, Tq, To, TU; + TT = LD(&(x[0]), ms, &(x[0])); + Tq = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + Tr = BYTW(&(W[TWVL * 18]), Tq); + To = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tp = BYTW(&(W[TWVL * 8]), To); + Ts = VSUB(Tp, Tr); + TU = VADD(Tp, Tr); + TV = VFNMS(LDK(KP500000000), TU, TT); + T1f = VADD(TT, TU); + } + { + V Tx, TG, TK, TB, T5, Ty, Tg, TH, Tl, TL, Ta, TC; + { + V Tw, TF, TJ, TA; + Tw = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Tx = BYTW(&(W[TWVL * 4]), Tw); + TF = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + TG = BYTW(&(W[TWVL * 10]), TF); + TJ = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + TK = BYTW(&(W[TWVL * 16]), TJ); + TA = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + TB = BYTW(&(W[TWVL * 22]), TA); + } + { + V T2, T4, T1, T3; + T1 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T2 = BYTW(&(W[TWVL * 14]), T1); + T3 = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T4 = BYTW(&(W[TWVL * 24]), T3); + T5 = VSUB(T2, T4); + Ty = VADD(T2, T4); + } + { + V Td, Tf, Tc, Te; + Tc = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Td = BYTW(&(W[TWVL * 20]), Tc); + Te = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Tf = BYTW(&(W[0]), Te); + Tg = VSUB(Td, Tf); + TH = VADD(Td, Tf); + } + { + V Ti, Tk, Th, Tj; + Th = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + Ti = BYTW(&(W[TWVL * 26]), Th); + Tj = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tk = BYTW(&(W[TWVL * 6]), Tj); + Tl = VSUB(Ti, Tk); + TL = VADD(Ti, Tk); + } + { + V T7, T9, T6, T8; + T6 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T7 = BYTW(&(W[TWVL * 2]), T6); + T8 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[TWVL * 12]), T8); + Ta = VSUB(T7, T9); + TC = VADD(T7, T9); + } + TZ = VSUB(T5, Ta); + T10 = VSUB(Tg, Tl); + Tb = VADD(T5, Ta); + Tm = VADD(Tg, Tl); + Tt = VADD(Tb, Tm); + T1j = VADD(TG, TH); + T1k = VADD(TK, TL); + T1l = VADD(T1j, T1k); + TI = VFNMS(LDK(KP500000000), TH, TG); + TM = VFNMS(LDK(KP500000000), TL, TK); + TR = VADD(TI, TM); + Tz = VFNMS(LDK(KP500000000), Ty, Tx); + TD = VFNMS(LDK(KP500000000), TC, TB); + TQ = VADD(Tz, TD); + T1g = VADD(Tx, Ty); + T1h = VADD(TB, TC); + T1i = VADD(T1g, T1h); + } + { + V T1o, T1m, T1n, T1s, T1t, T1q, T1r, T1u, T1p; + T1o = VMUL(LDK(KP559016994), VSUB(T1i, T1l)); + T1m = VADD(T1i, T1l); + T1n = VFNMS(LDK(KP250000000), T1m, T1f); + T1q = VSUB(T1g, T1h); + T1r = VSUB(T1j, T1k); + T1s = VBYI(VFNMS(LDK(KP951056516), T1r, VMUL(LDK(KP587785252), T1q))); + T1t = VBYI(VFMA(LDK(KP951056516), T1q, VMUL(LDK(KP587785252), T1r))); + ST(&(x[0]), VADD(T1f, T1m), ms, &(x[0])); + T1u = VADD(T1o, T1n); + ST(&(x[WS(rs, 6)]), VADD(T1t, T1u), ms, &(x[0])); + ST(&(x[WS(rs, 9)]), VSUB(T1u, T1t), ms, &(x[WS(rs, 1)])); + T1p = VSUB(T1n, T1o); + ST(&(x[WS(rs, 3)]), VSUB(T1p, T1s), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 12)]), VADD(T1s, T1p), ms, &(x[0])); + } + { + V T11, T18, T1e, TO, T16, Tv, T15, TY, T1d, T19, TE, TN; + T11 = VFMA(LDK(KP823639103), TZ, VMUL(LDK(KP509036960), T10)); + T18 = VFNMS(LDK(KP823639103), T10, VMUL(LDK(KP509036960), TZ)); + T1e = VBYI(VMUL(LDK(KP866025403), VADD(Ts, Tt))); + TE = VSUB(Tz, TD); + TN = VSUB(TI, TM); + TO = VFMA(LDK(KP951056516), TE, VMUL(LDK(KP587785252), TN)); + T16 = VFNMS(LDK(KP951056516), TN, VMUL(LDK(KP587785252), TE)); + { + V Tn, Tu, TS, TW, TX; + Tn = VMUL(LDK(KP484122918), VSUB(Tb, Tm)); + Tu = VFNMS(LDK(KP216506350), Tt, VMUL(LDK(KP866025403), Ts)); + Tv = VADD(Tn, Tu); + T15 = VSUB(Tn, Tu); + TS = VMUL(LDK(KP559016994), VSUB(TQ, TR)); + TW = VADD(TQ, TR); + TX = VFNMS(LDK(KP250000000), TW, TV); + TY = VADD(TS, TX); + T1d = VADD(TV, TW); + T19 = VSUB(TX, TS); + } + { + V TP, T12, T1b, T1c; + ST(&(x[WS(rs, 5)]), VSUB(T1d, T1e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 10)]), VADD(T1e, T1d), ms, &(x[0])); + TP = VBYI(VADD(Tv, TO)); + T12 = VSUB(TY, T11); + ST(&(x[WS(rs, 1)]), VADD(TP, T12), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 14)]), VSUB(T12, TP), ms, &(x[0])); + T1b = VBYI(VSUB(T16, T15)); + T1c = VSUB(T19, T18); + ST(&(x[WS(rs, 7)]), VADD(T1b, T1c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 8)]), VSUB(T1c, T1b), ms, &(x[0])); + { + V T17, T1a, T13, T14; + T17 = VBYI(VADD(T15, T16)); + T1a = VADD(T18, T19); + ST(&(x[WS(rs, 2)]), VADD(T17, T1a), ms, &(x[0])); + ST(&(x[WS(rs, 13)]), VSUB(T1a, T17), ms, &(x[WS(rs, 1)])); + T13 = VBYI(VSUB(Tv, TO)); + T14 = VADD(T11, TY); + ST(&(x[WS(rs, 4)]), VADD(T13, T14), ms, &(x[0])); + ST(&(x[WS(rs, 11)]), VSUB(T14, T13), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 15, XSIMD_STRING("t1bv_15"), twinstr, &GENUS, {78, 39, 14, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_15) (planner *p) { + X(kdft_dit_register) (p, t1bv_15, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,418 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:34 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name t1bv_16 -include t1b.h -sign 1 */ + +/* + * This function contains 87 FP additions, 64 FP multiplications, + * (or, 53 additions, 30 multiplications, 34 fused multiply/add), + * 61 stack variables, 3 constants, and 32 memory accesses + */ +#include "t1b.h" + +static void t1bv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 30)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 30), MAKE_VOLATILE_STRIDE(16, rs)) { + V TO, Ta, TJ, TP, T14, Tq, T1i, T10, T1b, T1l, T13, T1c, TR, Tl, T15; + V Tv; + { + V Tc, TW, T4, T19, T9, TD, TI, Tj, TZ, T1a, Te, Th, Tn, Tr, Tu; + V Tp; + { + V T1, T2, T5, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + { + V Tz, TG, TB, TE; + Tz = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + TG = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + TB = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TE = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + { + V Ti, TX, TY, Td, Tg, Tm, Tt, To; + { + V T3, T6, T8, TA, TH, TC, TF, Tb; + Tb = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[TWVL * 14]), T2); + T6 = BYTW(&(W[TWVL * 6]), T5); + T8 = BYTW(&(W[TWVL * 22]), T7); + TA = BYTW(&(W[TWVL * 2]), Tz); + TH = BYTW(&(W[TWVL * 10]), TG); + TC = BYTW(&(W[TWVL * 18]), TB); + TF = BYTW(&(W[TWVL * 26]), TE); + Tc = BYTW(&(W[0]), Tb); + TW = VSUB(T1, T3); + T4 = VADD(T1, T3); + T19 = VSUB(T6, T8); + T9 = VADD(T6, T8); + Ti = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TD = VADD(TA, TC); + TX = VSUB(TA, TC); + TI = VADD(TF, TH); + TY = VSUB(TF, TH); + } + Td = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tg = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tm = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + Tj = BYTW(&(W[TWVL * 24]), Ti); + Tt = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + To = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + TZ = VADD(TX, TY); + T1a = VSUB(TX, TY); + Te = BYTW(&(W[TWVL * 16]), Td); + Th = BYTW(&(W[TWVL * 8]), Tg); + Tn = BYTW(&(W[TWVL * 28]), Tm); + Tr = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Tu = BYTW(&(W[TWVL * 20]), Tt); + Tp = BYTW(&(W[TWVL * 12]), To); + } + } + } + { + V Tf, T11, Tk, T12, Ts; + TO = VADD(T4, T9); + Ta = VSUB(T4, T9); + TJ = VSUB(TD, TI); + TP = VADD(TD, TI); + Tf = VADD(Tc, Te); + T11 = VSUB(Tc, Te); + Tk = VADD(Th, Tj); + T12 = VSUB(Th, Tj); + Ts = BYTW(&(W[TWVL * 4]), Tr); + T14 = VSUB(Tn, Tp); + Tq = VADD(Tn, Tp); + T1i = VFNMS(LDK(KP707106781), TZ, TW); + T10 = VFMA(LDK(KP707106781), TZ, TW); + T1b = VFMA(LDK(KP707106781), T1a, T19); + T1l = VFNMS(LDK(KP707106781), T1a, T19); + T13 = VFNMS(LDK(KP414213562), T12, T11); + T1c = VFMA(LDK(KP414213562), T11, T12); + TR = VADD(Tf, Tk); + Tl = VSUB(Tf, Tk); + T15 = VSUB(Tu, Ts); + Tv = VADD(Ts, Tu); + } + } + { + V T1d, T16, TS, Tw, TU, TQ; + T1d = VFMA(LDK(KP414213562), T14, T15); + T16 = VFNMS(LDK(KP414213562), T15, T14); + TS = VADD(Tq, Tv); + Tw = VSUB(Tq, Tv); + TU = VADD(TO, TP); + TQ = VSUB(TO, TP); + { + V T1e, T1j, T17, T1m; + T1e = VSUB(T1c, T1d); + T1j = VADD(T1c, T1d); + T17 = VADD(T13, T16); + T1m = VSUB(T13, T16); + { + V TV, TT, TK, Tx; + TV = VADD(TR, TS); + TT = VSUB(TR, TS); + TK = VSUB(Tl, Tw); + Tx = VADD(Tl, Tw); + { + V T1h, T1f, T1o, T1k; + T1h = VFMA(LDK(KP923879532), T1e, T1b); + T1f = VFNMS(LDK(KP923879532), T1e, T1b); + T1o = VFMA(LDK(KP923879532), T1j, T1i); + T1k = VFNMS(LDK(KP923879532), T1j, T1i); + { + V T1g, T18, T1p, T1n; + T1g = VFMA(LDK(KP923879532), T17, T10); + T18 = VFNMS(LDK(KP923879532), T17, T10); + T1p = VFNMS(LDK(KP923879532), T1m, T1l); + T1n = VFMA(LDK(KP923879532), T1m, T1l); + ST(&(x[WS(rs, 8)]), VSUB(TU, TV), ms, &(x[0])); + ST(&(x[0]), VADD(TU, TV), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(TT, TQ), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFNMSI(TT, TQ), ms, &(x[0])); + { + V TN, TL, TM, Ty; + TN = VFMA(LDK(KP707106781), TK, TJ); + TL = VFNMS(LDK(KP707106781), TK, TJ); + TM = VFMA(LDK(KP707106781), Tx, Ta); + Ty = VFNMS(LDK(KP707106781), Tx, Ta); + ST(&(x[WS(rs, 15)]), VFNMSI(T1h, T1g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(T1h, T1g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFMAI(T1f, T18), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(T1f, T18), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(T1p, T1o), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFMAI(T1p, T1o), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFNMSI(T1n, T1k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFMAI(T1n, T1k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VFMAI(TN, TM), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFNMSI(TN, TM), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFMAI(TL, Ty), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(TL, Ty), ms, &(x[0])); + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 16, XSIMD_STRING("t1bv_16"), twinstr, &GENUS, {53, 30, 34, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_16) (planner *p) { + X(kdft_dit_register) (p, t1bv_16, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name t1bv_16 -include t1b.h -sign 1 */ + +/* + * This function contains 87 FP additions, 42 FP multiplications, + * (or, 83 additions, 38 multiplications, 4 fused multiply/add), + * 36 stack variables, 3 constants, and 32 memory accesses + */ +#include "t1b.h" + +static void t1bv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 30)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 30), MAKE_VOLATILE_STRIDE(16, rs)) { + V TJ, T1b, TD, T1c, T17, T18, Ty, TK, T10, T11, T12, Tb, TM, T13, T14; + V T15, Tm, TN, TG, TI, TH; + TG = LD(&(x[0]), ms, &(x[0])); + TH = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + TI = BYTW(&(W[TWVL * 14]), TH); + TJ = VSUB(TG, TI); + T1b = VADD(TG, TI); + { + V TA, TC, Tz, TB; + Tz = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + TA = BYTW(&(W[TWVL * 6]), Tz); + TB = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + TC = BYTW(&(W[TWVL * 22]), TB); + TD = VSUB(TA, TC); + T1c = VADD(TA, TC); + } + { + V Tp, Tw, Tr, Tu, Ts, Tx; + { + V To, Tv, Tq, Tt; + To = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tp = BYTW(&(W[TWVL * 2]), To); + Tv = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tw = BYTW(&(W[TWVL * 10]), Tv); + Tq = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + Tr = BYTW(&(W[TWVL * 18]), Tq); + Tt = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + Tu = BYTW(&(W[TWVL * 26]), Tt); + } + T17 = VADD(Tp, Tr); + T18 = VADD(Tu, Tw); + Ts = VSUB(Tp, Tr); + Tx = VSUB(Tu, Tw); + Ty = VMUL(LDK(KP707106781), VSUB(Ts, Tx)); + TK = VMUL(LDK(KP707106781), VADD(Ts, Tx)); + } + { + V T2, T9, T4, T7, T5, Ta; + { + V T1, T8, T3, T6; + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = BYTW(&(W[0]), T1); + T8 = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[TWVL * 24]), T8); + T3 = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T4 = BYTW(&(W[TWVL * 16]), T3); + T6 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T7 = BYTW(&(W[TWVL * 8]), T6); + } + T10 = VADD(T2, T4); + T11 = VADD(T7, T9); + T12 = VSUB(T10, T11); + T5 = VSUB(T2, T4); + Ta = VSUB(T7, T9); + Tb = VFNMS(LDK(KP382683432), Ta, VMUL(LDK(KP923879532), T5)); + TM = VFMA(LDK(KP382683432), T5, VMUL(LDK(KP923879532), Ta)); + } + { + V Td, Tk, Tf, Ti, Tg, Tl; + { + V Tc, Tj, Te, Th; + Tc = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + Td = BYTW(&(W[TWVL * 28]), Tc); + Tj = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tk = BYTW(&(W[TWVL * 20]), Tj); + Te = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Tf = BYTW(&(W[TWVL * 12]), Te); + Th = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Ti = BYTW(&(W[TWVL * 4]), Th); + } + T13 = VADD(Td, Tf); + T14 = VADD(Ti, Tk); + T15 = VSUB(T13, T14); + Tg = VSUB(Td, Tf); + Tl = VSUB(Ti, Tk); + Tm = VFMA(LDK(KP923879532), Tg, VMUL(LDK(KP382683432), Tl)); + TN = VFNMS(LDK(KP382683432), Tg, VMUL(LDK(KP923879532), Tl)); + } + { + V T1a, T1g, T1f, T1h; + { + V T16, T19, T1d, T1e; + T16 = VMUL(LDK(KP707106781), VSUB(T12, T15)); + T19 = VSUB(T17, T18); + T1a = VBYI(VSUB(T16, T19)); + T1g = VBYI(VADD(T19, T16)); + T1d = VSUB(T1b, T1c); + T1e = VMUL(LDK(KP707106781), VADD(T12, T15)); + T1f = VSUB(T1d, T1e); + T1h = VADD(T1d, T1e); + } + ST(&(x[WS(rs, 6)]), VADD(T1a, T1f), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VSUB(T1h, T1g), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VSUB(T1f, T1a), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T1g, T1h), ms, &(x[0])); + } + { + V T1k, T1o, T1n, T1p; + { + V T1i, T1j, T1l, T1m; + T1i = VADD(T1b, T1c); + T1j = VADD(T17, T18); + T1k = VSUB(T1i, T1j); + T1o = VADD(T1i, T1j); + T1l = VADD(T10, T11); + T1m = VADD(T13, T14); + T1n = VBYI(VSUB(T1l, T1m)); + T1p = VADD(T1l, T1m); + } + ST(&(x[WS(rs, 12)]), VSUB(T1k, T1n), ms, &(x[0])); + ST(&(x[0]), VADD(T1o, T1p), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(T1k, T1n), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VSUB(T1o, T1p), ms, &(x[0])); + } + { + V TF, TQ, TP, TR; + { + V Tn, TE, TL, TO; + Tn = VSUB(Tb, Tm); + TE = VSUB(Ty, TD); + TF = VBYI(VSUB(Tn, TE)); + TQ = VBYI(VADD(TE, Tn)); + TL = VSUB(TJ, TK); + TO = VSUB(TM, TN); + TP = VSUB(TL, TO); + TR = VADD(TL, TO); + } + ST(&(x[WS(rs, 5)]), VADD(TF, TP), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VSUB(TR, TQ), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VSUB(TP, TF), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(TQ, TR), ms, &(x[WS(rs, 1)])); + } + { + V TU, TY, TX, TZ; + { + V TS, TT, TV, TW; + TS = VADD(TJ, TK); + TT = VADD(Tb, Tm); + TU = VADD(TS, TT); + TY = VSUB(TS, TT); + TV = VADD(TD, Ty); + TW = VADD(TM, TN); + TX = VBYI(VADD(TV, TW)); + TZ = VBYI(VSUB(TW, TV)); + } + ST(&(x[WS(rs, 15)]), VSUB(TU, TX), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(TY, TZ), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(TU, TX), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VSUB(TY, TZ), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 16, XSIMD_STRING("t1bv_16"), twinstr, &GENUS, {83, 38, 4, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_16) (planner *p) { + X(kdft_dit_register) (p, t1bv_16, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:32 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t1bv_2 -include t1b.h -sign 1 */ + +/* + * This function contains 3 FP additions, 2 FP multiplications, + * (or, 3 additions, 2 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 4 memory accesses + */ +#include "t1b.h" + +static void t1bv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(2, rs)) { + V T1, T2, T3; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[0]), T2); + ST(&(x[0]), VADD(T1, T3), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VSUB(T1, T3), ms, &(x[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("t1bv_2"), twinstr, &GENUS, {3, 2, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_2) (planner *p) { + X(kdft_dit_register) (p, t1bv_2, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t1bv_2 -include t1b.h -sign 1 */ + +/* + * This function contains 3 FP additions, 2 FP multiplications, + * (or, 3 additions, 2 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 4 memory accesses + */ +#include "t1b.h" + +static void t1bv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(2, rs)) { + V T1, T3, T2; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[0]), T2); + ST(&(x[WS(rs, 1)]), VSUB(T1, T3), ms, &(x[WS(rs, 1)])); + ST(&(x[0]), VADD(T1, T3), ms, &(x[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("t1bv_2"), twinstr, &GENUS, {3, 2, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_2) (planner *p) { + X(kdft_dit_register) (p, t1bv_2, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,519 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:35 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 20 -name t1bv_20 -include t1b.h -sign 1 */ + +/* + * This function contains 123 FP additions, 88 FP multiplications, + * (or, 77 additions, 42 multiplications, 46 fused multiply/add), + * 68 stack variables, 4 constants, and 40 memory accesses + */ +#include "t1b.h" + +static void t1bv_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 38)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 38), MAKE_VOLATILE_STRIDE(20, rs)) { + V T4, TX, T1m, T1K, T1y, Tk, Tf, T14, TQ, TZ, T1O, T1w, T1L, T1p, T1M; + V T1s, TF, TY, T1x, Tp; + { + V T1, TV, T2, TT; + T1 = LD(&(x[0]), ms, &(x[0])); + TV = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TT = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V T9, T1n, TK, T1v, TP, Te, T1q, T1u, TB, TD, Tm, T1o, Tz, Tn, T1r; + V TE, To; + { + V TM, TO, Ta, Tc; + { + V T5, T7, TG, TI, T1k, T1l; + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TG = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TI = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + { + V TW, T3, TU, T6, T8, TH, TJ, TL, TN; + TL = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TW = BYTW(&(W[TWVL * 28]), TV); + T3 = BYTW(&(W[TWVL * 18]), T2); + TU = BYTW(&(W[TWVL * 8]), TT); + T6 = BYTW(&(W[TWVL * 6]), T5); + T8 = BYTW(&(W[TWVL * 26]), T7); + TH = BYTW(&(W[TWVL * 24]), TG); + TJ = BYTW(&(W[TWVL * 4]), TI); + TM = BYTW(&(W[TWVL * 32]), TL); + TN = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T4 = VSUB(T1, T3); + T1k = VADD(T1, T3); + TX = VSUB(TU, TW); + T1l = VADD(TU, TW); + T9 = VSUB(T6, T8); + T1n = VADD(T6, T8); + TK = VSUB(TH, TJ); + T1v = VADD(TH, TJ); + TO = BYTW(&(W[TWVL * 12]), TN); + } + Ta = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T1m = VSUB(T1k, T1l); + T1K = VADD(T1k, T1l); + Tc = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + } + { + V Tb, Tx, Td, Th, Tj, Tw, Tg, Ti, Tv; + Tg = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Ti = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tv = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + TP = VSUB(TM, TO); + T1y = VADD(TM, TO); + Tb = BYTW(&(W[TWVL * 30]), Ta); + Tx = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + Td = BYTW(&(W[TWVL * 10]), Tc); + Th = BYTW(&(W[TWVL * 14]), Tg); + Tj = BYTW(&(W[TWVL * 34]), Ti); + Tw = BYTW(&(W[TWVL * 16]), Tv); + { + V TA, TC, Ty, Tl; + TA = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TC = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tl = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Ty = BYTW(&(W[TWVL * 36]), Tx); + Te = VSUB(Tb, Td); + T1q = VADD(Tb, Td); + Tk = VSUB(Th, Tj); + T1u = VADD(Th, Tj); + TB = BYTW(&(W[0]), TA); + TD = BYTW(&(W[TWVL * 20]), TC); + Tm = BYTW(&(W[TWVL * 22]), Tl); + T1o = VADD(Tw, Ty); + Tz = VSUB(Tw, Ty); + Tn = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + } + } + } + Tf = VADD(T9, Te); + T14 = VSUB(T9, Te); + TQ = VSUB(TK, TP); + TZ = VADD(TK, TP); + T1r = VADD(TB, TD); + TE = VSUB(TB, TD); + T1O = VADD(T1u, T1v); + T1w = VSUB(T1u, T1v); + To = BYTW(&(W[TWVL * 2]), Tn); + T1L = VADD(T1n, T1o); + T1p = VSUB(T1n, T1o); + T1M = VADD(T1q, T1r); + T1s = VSUB(T1q, T1r); + TF = VSUB(Tz, TE); + TY = VADD(Tz, TE); + T1x = VADD(Tm, To); + Tp = VSUB(Tm, To); + } + } + { + V T1V, T1N, T12, T1b, TR, T1G, T1t, T1z, T1P, Tq, T15, T11, T1j, T10; + T1V = VSUB(T1L, T1M); + T1N = VADD(T1L, T1M); + T12 = VSUB(TY, TZ); + T10 = VADD(TY, TZ); + T1b = VFNMS(LDK(KP618033988), TF, TQ); + TR = VFMA(LDK(KP618033988), TQ, TF); + T1G = VSUB(T1p, T1s); + T1t = VADD(T1p, T1s); + T1z = VSUB(T1x, T1y); + T1P = VADD(T1x, T1y); + Tq = VADD(Tk, Tp); + T15 = VSUB(Tk, Tp); + T11 = VFNMS(LDK(KP250000000), T10, TX); + T1j = VADD(TX, T10); + { + V T1J, T1H, T1D, T1Z, T1X, T1T, T1f, T1h, T19, T17, T1C, T1S, T1a, Tu, T1F; + V T1A; + T1F = VSUB(T1w, T1z); + T1A = VADD(T1w, T1z); + { + V T1W, T1Q, Tt, Tr; + T1W = VSUB(T1O, T1P); + T1Q = VADD(T1O, T1P); + Tt = VSUB(Tf, Tq); + Tr = VADD(Tf, Tq); + { + V T1e, T16, T1d, T13; + T1e = VFNMS(LDK(KP618033988), T14, T15); + T16 = VFMA(LDK(KP618033988), T15, T14); + T1d = VFNMS(LDK(KP559016994), T12, T11); + T13 = VFMA(LDK(KP559016994), T12, T11); + T1J = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1F, T1G)); + T1H = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1G, T1F)); + { + V T1B, T1R, Ts, T1i; + T1B = VADD(T1t, T1A); + T1D = VSUB(T1t, T1A); + T1Z = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1V, T1W)); + T1X = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1W, T1V)); + T1R = VADD(T1N, T1Q); + T1T = VSUB(T1N, T1Q); + Ts = VFNMS(LDK(KP250000000), Tr, T4); + T1i = VADD(T4, Tr); + T1f = VFNMS(LDK(KP951056516), T1e, T1d); + T1h = VFMA(LDK(KP951056516), T1e, T1d); + T19 = VFNMS(LDK(KP951056516), T16, T13); + T17 = VFMA(LDK(KP951056516), T16, T13); + ST(&(x[WS(rs, 10)]), VADD(T1m, T1B), ms, &(x[0])); + T1C = VFNMS(LDK(KP250000000), T1B, T1m); + ST(&(x[0]), VADD(T1K, T1R), ms, &(x[0])); + T1S = VFNMS(LDK(KP250000000), T1R, T1K); + T1a = VFNMS(LDK(KP559016994), Tt, Ts); + Tu = VFMA(LDK(KP559016994), Tt, Ts); + ST(&(x[WS(rs, 5)]), VFMAI(T1j, T1i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFNMSI(T1j, T1i), ms, &(x[WS(rs, 1)])); + } + } + } + { + V T1E, T1I, T1U, T1Y; + T1E = VFNMS(LDK(KP559016994), T1D, T1C); + T1I = VFMA(LDK(KP559016994), T1D, T1C); + T1U = VFMA(LDK(KP559016994), T1T, T1S); + T1Y = VFNMS(LDK(KP559016994), T1T, T1S); + { + V T1c, T1g, T18, TS; + T1c = VFMA(LDK(KP951056516), T1b, T1a); + T1g = VFNMS(LDK(KP951056516), T1b, T1a); + T18 = VFMA(LDK(KP951056516), TR, Tu); + TS = VFNMS(LDK(KP951056516), TR, Tu); + ST(&(x[WS(rs, 18)]), VFMAI(T1H, T1E), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFNMSI(T1H, T1E), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFNMSI(T1J, T1I), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFMAI(T1J, T1I), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VFMAI(T1X, T1U), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFNMSI(T1X, T1U), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFNMSI(T1Z, T1Y), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFMAI(T1Z, T1Y), ms, &(x[0])); + ST(&(x[WS(rs, 17)]), VFMAI(T1f, T1c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(T1f, T1c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFMAI(T1h, T1g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(T1h, T1g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFMAI(T19, T18), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFNMSI(T19, T18), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(T17, TS), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VFNMSI(T17, TS), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 20, XSIMD_STRING("t1bv_20"), twinstr, &GENUS, {77, 42, 46, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_20) (planner *p) { + X(kdft_dit_register) (p, t1bv_20, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 20 -name t1bv_20 -include t1b.h -sign 1 */ + +/* + * This function contains 123 FP additions, 62 FP multiplications, + * (or, 111 additions, 50 multiplications, 12 fused multiply/add), + * 54 stack variables, 4 constants, and 40 memory accesses + */ +#include "t1b.h" + +static void t1bv_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 38)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 38), MAKE_VOLATILE_STRIDE(20, rs)) { + V T4, T10, T1B, T1R, TF, T14, T15, TQ, Tf, Tq, Tr, T1N, T1O, T1P, T1t; + V T1w, T1D, TT, TU, T11, T1K, T1L, T1M, T1m, T1p, T1C, T1i, T1j; + { + V T1, TZ, T3, TX, TY, T2, TW, T1z, T1A; + T1 = LD(&(x[0]), ms, &(x[0])); + TY = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + TZ = BYTW(&(W[TWVL * 28]), TY); + T2 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T3 = BYTW(&(W[TWVL * 18]), T2); + TW = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + TX = BYTW(&(W[TWVL * 8]), TW); + T4 = VSUB(T1, T3); + T10 = VSUB(TX, TZ); + T1z = VADD(T1, T3); + T1A = VADD(TX, TZ); + T1B = VSUB(T1z, T1A); + T1R = VADD(T1z, T1A); + } + { + V T9, T1k, TK, T1s, TP, T1v, Te, T1n, Tk, T1r, Tz, T1l, TE, T1o, Tp; + V T1u; + { + V T6, T8, T5, T7; + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T6 = BYTW(&(W[TWVL * 6]), T5); + T7 = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T8 = BYTW(&(W[TWVL * 26]), T7); + T9 = VSUB(T6, T8); + T1k = VADD(T6, T8); + } + { + V TH, TJ, TG, TI; + TG = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TH = BYTW(&(W[TWVL * 24]), TG); + TI = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + TJ = BYTW(&(W[TWVL * 4]), TI); + TK = VSUB(TH, TJ); + T1s = VADD(TH, TJ); + } + { + V TM, TO, TL, TN; + TL = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TM = BYTW(&(W[TWVL * 32]), TL); + TN = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + TO = BYTW(&(W[TWVL * 12]), TN); + TP = VSUB(TM, TO); + T1v = VADD(TM, TO); + } + { + V Tb, Td, Ta, Tc; + Ta = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + Tb = BYTW(&(W[TWVL * 30]), Ta); + Tc = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Td = BYTW(&(W[TWVL * 10]), Tc); + Te = VSUB(Tb, Td); + T1n = VADD(Tb, Td); + } + { + V Th, Tj, Tg, Ti; + Tg = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Th = BYTW(&(W[TWVL * 14]), Tg); + Ti = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tj = BYTW(&(W[TWVL * 34]), Ti); + Tk = VSUB(Th, Tj); + T1r = VADD(Th, Tj); + } + { + V Tw, Ty, Tv, Tx; + Tv = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tw = BYTW(&(W[TWVL * 16]), Tv); + Tx = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + Ty = BYTW(&(W[TWVL * 36]), Tx); + Tz = VSUB(Tw, Ty); + T1l = VADD(Tw, Ty); + } + { + V TB, TD, TA, TC; + TA = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TB = BYTW(&(W[0]), TA); + TC = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + TD = BYTW(&(W[TWVL * 20]), TC); + TE = VSUB(TB, TD); + T1o = VADD(TB, TD); + } + { + V Tm, To, Tl, Tn; + Tl = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Tm = BYTW(&(W[TWVL * 22]), Tl); + Tn = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + To = BYTW(&(W[TWVL * 2]), Tn); + Tp = VSUB(Tm, To); + T1u = VADD(Tm, To); + } + TF = VSUB(Tz, TE); + T14 = VSUB(T9, Te); + T15 = VSUB(Tk, Tp); + TQ = VSUB(TK, TP); + Tf = VADD(T9, Te); + Tq = VADD(Tk, Tp); + Tr = VADD(Tf, Tq); + T1N = VADD(T1r, T1s); + T1O = VADD(T1u, T1v); + T1P = VADD(T1N, T1O); + T1t = VSUB(T1r, T1s); + T1w = VSUB(T1u, T1v); + T1D = VADD(T1t, T1w); + TT = VADD(Tz, TE); + TU = VADD(TK, TP); + T11 = VADD(TT, TU); + T1K = VADD(T1k, T1l); + T1L = VADD(T1n, T1o); + T1M = VADD(T1K, T1L); + T1m = VSUB(T1k, T1l); + T1p = VSUB(T1n, T1o); + T1C = VADD(T1m, T1p); + } + T1i = VADD(T4, Tr); + T1j = VBYI(VADD(T10, T11)); + ST(&(x[WS(rs, 15)]), VSUB(T1i, T1j), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VADD(T1i, T1j), ms, &(x[WS(rs, 1)])); + { + V T1Q, T1S, T1T, T1X, T1Z, T1V, T1W, T1Y, T1U; + T1Q = VMUL(LDK(KP559016994), VSUB(T1M, T1P)); + T1S = VADD(T1M, T1P); + T1T = VFNMS(LDK(KP250000000), T1S, T1R); + T1V = VSUB(T1K, T1L); + T1W = VSUB(T1N, T1O); + T1X = VBYI(VFMA(LDK(KP951056516), T1V, VMUL(LDK(KP587785252), T1W))); + T1Z = VBYI(VFNMS(LDK(KP951056516), T1W, VMUL(LDK(KP587785252), T1V))); + ST(&(x[0]), VADD(T1R, T1S), ms, &(x[0])); + T1Y = VSUB(T1T, T1Q); + ST(&(x[WS(rs, 8)]), VSUB(T1Y, T1Z), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VADD(T1Z, T1Y), ms, &(x[0])); + T1U = VADD(T1Q, T1T); + ST(&(x[WS(rs, 4)]), VSUB(T1U, T1X), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VADD(T1X, T1U), ms, &(x[0])); + } + { + V T1G, T1E, T1F, T1y, T1I, T1q, T1x, T1J, T1H; + T1G = VMUL(LDK(KP559016994), VSUB(T1C, T1D)); + T1E = VADD(T1C, T1D); + T1F = VFNMS(LDK(KP250000000), T1E, T1B); + T1q = VSUB(T1m, T1p); + T1x = VSUB(T1t, T1w); + T1y = VBYI(VFNMS(LDK(KP951056516), T1x, VMUL(LDK(KP587785252), T1q))); + T1I = VBYI(VFMA(LDK(KP951056516), T1q, VMUL(LDK(KP587785252), T1x))); + ST(&(x[WS(rs, 10)]), VADD(T1B, T1E), ms, &(x[0])); + T1J = VADD(T1G, T1F); + ST(&(x[WS(rs, 6)]), VADD(T1I, T1J), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VSUB(T1J, T1I), ms, &(x[0])); + T1H = VSUB(T1F, T1G); + ST(&(x[WS(rs, 2)]), VADD(T1y, T1H), ms, &(x[0])); + ST(&(x[WS(rs, 18)]), VSUB(T1H, T1y), ms, &(x[0])); + } + { + V TR, T16, T1d, T1b, T13, T1e, Tu, T1a; + TR = VFNMS(LDK(KP951056516), TQ, VMUL(LDK(KP587785252), TF)); + T16 = VFNMS(LDK(KP951056516), T15, VMUL(LDK(KP587785252), T14)); + T1d = VFMA(LDK(KP951056516), T14, VMUL(LDK(KP587785252), T15)); + T1b = VFMA(LDK(KP951056516), TF, VMUL(LDK(KP587785252), TQ)); + { + V TV, T12, Ts, Tt; + TV = VMUL(LDK(KP559016994), VSUB(TT, TU)); + T12 = VFNMS(LDK(KP250000000), T11, T10); + T13 = VSUB(TV, T12); + T1e = VADD(TV, T12); + Ts = VFNMS(LDK(KP250000000), Tr, T4); + Tt = VMUL(LDK(KP559016994), VSUB(Tf, Tq)); + Tu = VSUB(Ts, Tt); + T1a = VADD(Tt, Ts); + } + { + V TS, T17, T1g, T1h; + TS = VSUB(Tu, TR); + T17 = VBYI(VSUB(T13, T16)); + ST(&(x[WS(rs, 17)]), VSUB(TS, T17), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(TS, T17), ms, &(x[WS(rs, 1)])); + T1g = VADD(T1a, T1b); + T1h = VBYI(VSUB(T1e, T1d)); + ST(&(x[WS(rs, 11)]), VSUB(T1g, T1h), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VADD(T1g, T1h), ms, &(x[WS(rs, 1)])); + } + { + V T18, T19, T1c, T1f; + T18 = VADD(Tu, TR); + T19 = VBYI(VADD(T16, T13)); + ST(&(x[WS(rs, 13)]), VSUB(T18, T19), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(T18, T19), ms, &(x[WS(rs, 1)])); + T1c = VSUB(T1a, T1b); + T1f = VBYI(VADD(T1d, T1e)); + ST(&(x[WS(rs, 19)]), VSUB(T1c, T1f), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T1c, T1f), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 20, XSIMD_STRING("t1bv_20"), twinstr, &GENUS, {111, 50, 12, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_20) (planner *p) { + X(kdft_dit_register) (p, t1bv_20, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,934 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:36 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t1bv_25 -include t1b.h -sign 1 */ + +/* + * This function contains 248 FP additions, 241 FP multiplications, + * (or, 67 additions, 60 multiplications, 181 fused multiply/add), + * 208 stack variables, 67 constants, and 50 memory accesses + */ +#include "t1b.h" + +static void t1bv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP792626838, +0.792626838241819413632131824093538848057784557); + DVK(KP876091699, +0.876091699473550838204498029706869638173524346); + DVK(KP617882369, +0.617882369114440893914546919006756321695042882); + DVK(KP803003575, +0.803003575438660414833440593570376004635464850); + DVK(KP242145790, +0.242145790282157779872542093866183953459003101); + DVK(KP968583161, +0.968583161128631119490168375464735813836012403); + DVK(KP999544308, +0.999544308746292983948881682379742149196758193); + DVK(KP916574801, +0.916574801383451584742370439148878693530976769); + DVK(KP904730450, +0.904730450839922351881287709692877908104763647); + DVK(KP809385824, +0.809385824416008241660603814668679683846476688); + DVK(KP447417479, +0.447417479732227551498980015410057305749330693); + DVK(KP894834959, +0.894834959464455102997960030820114611498661386); + DVK(KP867381224, +0.867381224396525206773171885031575671309956167); + DVK(KP683113946, +0.683113946453479238701949862233725244439656928); + DVK(KP559154169, +0.559154169276087864842202529084232643714075927); + DVK(KP958953096, +0.958953096729998668045963838399037225970891871); + DVK(KP831864738, +0.831864738706457140726048799369896829771167132); + DVK(KP829049696, +0.829049696159252993975487806364305442437946767); + DVK(KP860541664, +0.860541664367944677098261680920518816412804187); + DVK(KP897376177, +0.897376177523557693138608077137219684419427330); + DVK(KP876306680, +0.876306680043863587308115903922062583399064238); + DVK(KP681693190, +0.681693190061530575150324149145440022633095390); + DVK(KP560319534, +0.560319534973832390111614715371676131169633784); + DVK(KP855719849, +0.855719849902058969314654733608091555096772472); + DVK(KP237294955, +0.237294955877110315393888866460840817927895961); + DVK(KP949179823, +0.949179823508441261575555465843363271711583843); + DVK(KP904508497, +0.904508497187473712051146708591409529430077295); + DVK(KP997675361, +0.997675361079556513670859573984492383596555031); + DVK(KP763932022, +0.763932022500210303590826331268723764559381640); + DVK(KP690983005, +0.690983005625052575897706582817180941139845410); + DVK(KP992114701, +0.992114701314477831049793042785778521453036709); + DVK(KP952936919, +0.952936919628306576880750665357914584765951388); + DVK(KP998026728, +0.998026728428271561952336806863450553336905220); + DVK(KP262346850, +0.262346850930607871785420028382979691334784273); + DVK(KP570584518, +0.570584518783621657366766175430996792655723863); + DVK(KP669429328, +0.669429328479476605641803240971985825917022098); + DVK(KP923225144, +0.923225144846402650453449441572664695995209956); + DVK(KP945422727, +0.945422727388575946270360266328811958657216298); + DVK(KP522616830, +0.522616830205754336872861364785224694908468440); + DVK(KP956723877, +0.956723877038460305821989399535483155872969262); + DVK(KP906616052, +0.906616052148196230441134447086066874408359177); + DVK(KP772036680, +0.772036680810363904029489473607579825330539880); + DVK(KP845997307, +0.845997307939530944175097360758058292389769300); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP921078979, +0.921078979742360627699756128143719920817673854); + DVK(KP912575812, +0.912575812670962425556968549836277086778922727); + DVK(KP982009705, +0.982009705009746369461829878184175962711969869); + DVK(KP734762448, +0.734762448793050413546343770063151342619912334); + DVK(KP494780565, +0.494780565770515410344588413655324772219443730); + DVK(KP447533225, +0.447533225982656890041886979663652563063114397); + DVK(KP269969613, +0.269969613759572083574752974412347470060951301); + DVK(KP244189809, +0.244189809627953270309879511234821255780225091); + DVK(KP667278218, +0.667278218140296670899089292254759909713898805); + DVK(KP603558818, +0.603558818296015001454675132653458027918768137); + DVK(KP522847744, +0.522847744331509716623755382187077770911012542); + DVK(KP578046249, +0.578046249379945007321754579646815604023525655); + DVK(KP987388751, +0.987388751065621252324603216482382109400433949); + DVK(KP893101515, +0.893101515366181661711202267938416198338079437); + DVK(KP120146378, +0.120146378570687701782758537356596213647956445); + DVK(KP132830569, +0.132830569247582714407653942074819768844536507); + DVK(KP869845200, +0.869845200362138853122720822420327157933056305); + DVK(KP786782374, +0.786782374965295178365099601674911834788448471); + DVK(KP066152395, +0.066152395967733048213034281011006031460903353); + DVK(KP059835404, +0.059835404262124915169548397419498386427871950); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(25, rs)) { + V T25, T1B, T2y, T1K, T2s, T23, T1S, T26, T20, T1X; + { + V T1O, T2X, Te, T3L, Td, T3Q, T3j, T3b, T2R, T2M, T2f, T27, T1y, T1H, T3M; + V TW, TR, TK, T2B, T3n, T3e, T2U, T2F, T2i, T2a, Tz, T1C, T3N, TQ, T11; + V T1b, T1c, T16; + { + V T1, T1g, T1i, T1p, T1k, T1m, Tb, T1N, T6, T1M; + { + V T7, T9, T2, T4, T1f, T1h, T1o; + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T9 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + T1f = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1h = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T1o = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + { + V T8, Ta, T3, T5, T1j; + T1j = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T8 = BYTW(&(W[TWVL * 18]), T7); + Ta = BYTW(&(W[TWVL * 28]), T9); + T3 = BYTW(&(W[TWVL * 8]), T2); + T5 = BYTW(&(W[TWVL * 38]), T4); + T1g = BYTW(&(W[TWVL * 4]), T1f); + T1i = BYTW(&(W[TWVL * 14]), T1h); + T1p = BYTW(&(W[TWVL * 34]), T1o); + T1k = BYTW(&(W[TWVL * 44]), T1j); + T1m = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + Tb = VADD(T8, Ta); + T1N = VSUB(T8, Ta); + T6 = VADD(T3, T5); + T1M = VSUB(T3, T5); + } + } + { + V T1v, T1l, Th, Tj, T1w, T1q, Tq, Tk, Tn, Tg; + Tg = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + { + V Tc, Ti, T1n, Tp; + Ti = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T1v = VSUB(T1i, T1k); + T1l = VADD(T1i, T1k); + T1n = BYTW(&(W[TWVL * 24]), T1m); + Tp = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T1O = VFMA(LDK(KP618033988), T1N, T1M); + T2X = VFNMS(LDK(KP618033988), T1M, T1N); + Te = VSUB(T6, Tb); + Tc = VADD(T6, Tb); + Th = BYTW(&(W[0]), Tg); + Tj = BYTW(&(W[TWVL * 10]), Ti); + T1w = VSUB(T1n, T1p); + T1q = VADD(T1n, T1p); + Tq = BYTW(&(W[TWVL * 30]), Tp); + Tk = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + T3L = VADD(T1, Tc); + Td = VFNMS(LDK(KP250000000), Tc, T1); + Tn = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + } + { + V T1x, T2K, TM, TB, Tw, Tm, Tx, Tr, TI, T2L, T1u, TD, TF, TL; + TL = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + { + V T1t, Tl, To, TH, T1s, T1r, TA, TC; + TA = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T1r = VADD(T1l, T1q); + T1t = VSUB(T1q, T1l); + T1x = VFMA(LDK(KP618033988), T1w, T1v); + T2K = VFNMS(LDK(KP618033988), T1v, T1w); + Tl = BYTW(&(W[TWVL * 40]), Tk); + To = BYTW(&(W[TWVL * 20]), Tn); + TM = BYTW(&(W[TWVL * 6]), TL); + TB = BYTW(&(W[TWVL * 46]), TA); + TH = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T1s = VFNMS(LDK(KP250000000), T1r, T1g); + T3Q = VADD(T1g, T1r); + TC = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tw = VSUB(Tj, Tl); + Tm = VADD(Tj, Tl); + Tx = VSUB(Tq, To); + Tr = VADD(To, Tq); + TI = BYTW(&(W[TWVL * 26]), TH); + T2L = VFMA(LDK(KP559016994), T1t, T1s); + T1u = VFNMS(LDK(KP559016994), T1t, T1s); + TD = BYTW(&(W[TWVL * 16]), TC); + TF = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + } + { + V Tu, Ty, T2E, TE, TN, TG, Tt, TV, Ts; + TV = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Ts = VADD(Tm, Tr); + Tu = VSUB(Tm, Tr); + Ty = VFNMS(LDK(KP618033988), Tx, Tw); + T2E = VFMA(LDK(KP618033988), Tw, Tx); + T3j = VFNMS(LDK(KP059835404), T2K, T2L); + T3b = VFMA(LDK(KP066152395), T2L, T2K); + T2R = VFNMS(LDK(KP786782374), T2K, T2L); + T2M = VFMA(LDK(KP869845200), T2L, T2K); + T2f = VFMA(LDK(KP132830569), T1u, T1x); + T27 = VFNMS(LDK(KP120146378), T1x, T1u); + T1y = VFNMS(LDK(KP893101515), T1x, T1u); + T1H = VFMA(LDK(KP987388751), T1u, T1x); + TE = VSUB(TB, TD); + TN = VADD(TD, TB); + TG = BYTW(&(W[TWVL * 36]), TF); + Tt = VFNMS(LDK(KP250000000), Ts, Th); + T3M = VADD(Th, Ts); + TW = BYTW(&(W[TWVL * 2]), TV); + { + V TJ, TO, Tv, T2D, TY, T15, T10, T13, TP; + { + V TX, T14, TZ, T12; + TX = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T14 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TZ = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + T12 = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + TJ = VSUB(TG, TI); + TO = VADD(TI, TG); + Tv = VFMA(LDK(KP559016994), Tu, Tt); + T2D = VFNMS(LDK(KP559016994), Tu, Tt); + TY = BYTW(&(W[TWVL * 12]), TX); + T15 = BYTW(&(W[TWVL * 32]), T14); + T10 = BYTW(&(W[TWVL * 42]), TZ); + T13 = BYTW(&(W[TWVL * 22]), T12); + } + TP = VADD(TN, TO); + TR = VSUB(TN, TO); + TK = VFMA(LDK(KP618033988), TJ, TE); + T2B = VFNMS(LDK(KP618033988), TE, TJ); + T3n = VFMA(LDK(KP578046249), T2D, T2E); + T3e = VFNMS(LDK(KP522847744), T2E, T2D); + T2U = VFNMS(LDK(KP987388751), T2D, T2E); + T2F = VFMA(LDK(KP893101515), T2E, T2D); + T2i = VFNMS(LDK(KP603558818), Ty, Tv); + T2a = VFMA(LDK(KP667278218), Tv, Ty); + Tz = VFNMS(LDK(KP244189809), Ty, Tv); + T1C = VFMA(LDK(KP269969613), Tv, Ty); + T3N = VADD(TM, TP); + TQ = VFMS(LDK(KP250000000), TP, TM); + T11 = VADD(TY, T10); + T1b = VSUB(TY, T10); + T1c = VSUB(T15, T13); + T16 = VADD(T13, T15); + } + } + } + } + } + { + V T2z, Tf, T3W, T3O, T1d, T2H, T3m, T2j, T2b, TT, T1D, T2G, T35, T2V, T2Z; + V T3A, T3g, T2I, T1a, T3R, T3X; + T2z = VFNMS(LDK(KP559016994), Te, Td); + Tf = VFMA(LDK(KP559016994), Te, Td); + { + V TS, T2A, T17, T19; + TS = VFNMS(LDK(KP559016994), TR, TQ); + T2A = VFMA(LDK(KP559016994), TR, TQ); + T3W = VSUB(T3M, T3N); + T3O = VADD(T3M, T3N); + T1d = VFNMS(LDK(KP618033988), T1c, T1b); + T2H = VFMA(LDK(KP618033988), T1b, T1c); + T17 = VADD(T11, T16); + T19 = VSUB(T16, T11); + { + V T3f, T2T, T2C, T18, T3P; + T3m = VFMA(LDK(KP447533225), T2B, T2A); + T3f = VFNMS(LDK(KP494780565), T2A, T2B); + T2T = VFNMS(LDK(KP132830569), T2A, T2B); + T2C = VFMA(LDK(KP120146378), T2B, T2A); + T2j = VFNMS(LDK(KP786782374), TK, TS); + T2b = VFMA(LDK(KP869845200), TS, TK); + TT = VFNMS(LDK(KP667278218), TS, TK); + T1D = VFMA(LDK(KP603558818), TK, TS); + T18 = VFNMS(LDK(KP250000000), T17, TW); + T3P = VADD(TW, T17); + T2G = VFMA(LDK(KP734762448), T2F, T2C); + T35 = VFNMS(LDK(KP734762448), T2F, T2C); + T2V = VFNMS(LDK(KP734762448), T2U, T2T); + T2Z = VFMA(LDK(KP734762448), T2U, T2T); + T3A = VFMA(LDK(KP982009705), T3f, T3e); + T3g = VFNMS(LDK(KP982009705), T3f, T3e); + T2I = VFMA(LDK(KP559016994), T19, T18); + T1a = VFNMS(LDK(KP559016994), T19, T18); + T3R = VADD(T3P, T3Q); + T3X = VSUB(T3P, T3Q); + } + } + { + V T2n, T2t, T1V, T22, T2l, T2d, T1Q, T1I, T2w, T1A, T1F, T2q; + { + V T2k, T1G, T28, T2g, T3K, T3E, T3a, T34, T3x, T3H, T2c, TU, T1T, T1U, T1z; + V T3o, T3t; + T2n = VFNMS(LDK(KP912575812), T2j, T2i); + T2k = VFMA(LDK(KP912575812), T2j, T2i); + T3o = VFNMS(LDK(KP921078979), T3n, T3m); + T3t = VFMA(LDK(KP921078979), T3n, T3m); + { + V T3c, T2Q, T2J, T3k, T1e; + T3c = VFNMS(LDK(KP667278218), T2I, T2H); + T2Q = VFNMS(LDK(KP059835404), T2H, T2I); + T2J = VFMA(LDK(KP066152395), T2I, T2H); + T3k = VFMA(LDK(KP603558818), T2H, T2I); + T1G = VFMA(LDK(KP578046249), T1a, T1d); + T1e = VFNMS(LDK(KP522847744), T1d, T1a); + T28 = VFNMS(LDK(KP494780565), T1a, T1d); + T2g = VFMA(LDK(KP447533225), T1d, T1a); + { + V T3U, T3S, T40, T3Y; + T3U = VSUB(T3O, T3R); + T3S = VADD(T3O, T3R); + T40 = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T3W, T3X)); + T3Y = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T3X, T3W)); + { + V T3s, T3l, T2N, T36; + T3s = VFNMS(LDK(KP845997307), T3k, T3j); + T3l = VFMA(LDK(KP845997307), T3k, T3j); + T2N = VFNMS(LDK(KP772036680), T2M, T2J); + T36 = VFMA(LDK(KP772036680), T2M, T2J); + { + V T30, T2S, T3d, T3z, T3T; + T30 = VFNMS(LDK(KP772036680), T2R, T2Q); + T2S = VFMA(LDK(KP772036680), T2R, T2Q); + T3d = VFNMS(LDK(KP845997307), T3c, T3b); + T3z = VFMA(LDK(KP845997307), T3c, T3b); + ST(&(x[0]), VADD(T3S, T3L), ms, &(x[0])); + T3T = VFNMS(LDK(KP250000000), T3S, T3L); + { + V T3C, T3p, T2O, T37; + T3C = VFMA(LDK(KP906616052), T3o, T3l); + T3p = VFNMS(LDK(KP906616052), T3o, T3l); + T2O = VFMA(LDK(KP956723877), T2N, T2G); + T37 = VFMA(LDK(KP522616830), T2V, T36); + { + V T31, T2W, T3u, T3h; + T31 = VFNMS(LDK(KP522616830), T2G, T30); + T2W = VFMA(LDK(KP945422727), T2V, T2S); + T3u = VFNMS(LDK(KP923225144), T3g, T3d); + T3h = VFMA(LDK(KP923225144), T3g, T3d); + { + V T3I, T3B, T3V, T3Z; + T3I = VFNMS(LDK(KP669429328), T3z, T3A); + T3B = VFMA(LDK(KP570584518), T3A, T3z); + T3V = VFMA(LDK(KP559016994), T3U, T3T); + T3Z = VFNMS(LDK(KP559016994), T3U, T3T); + { + V T3y, T3q, T2P, T38; + T3y = VFMA(LDK(KP262346850), T3p, T2X); + T3q = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T2X, T3p)); + T2P = VFMA(LDK(KP992114701), T2O, T2z); + T38 = VFNMS(LDK(KP690983005), T37, T2S); + { + V T32, T2Y, T3v, T3F; + T32 = VFMA(LDK(KP763932022), T31, T2N); + T2Y = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T2X, T2W)); + T3v = VFNMS(LDK(KP997675361), T3u, T3t); + T3F = VFNMS(LDK(KP904508497), T3u, T3s); + { + V T3i, T3r, T3J, T3D; + T3i = VFMA(LDK(KP949179823), T3h, T2z); + T3r = VFNMS(LDK(KP237294955), T3h, T2z); + T3J = VFNMS(LDK(KP669429328), T3C, T3I); + T3D = VFMA(LDK(KP618033988), T3C, T3B); + ST(&(x[WS(rs, 20)]), VFNMSI(T3Y, T3V), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFMAI(T3Y, T3V), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFMAI(T40, T3Z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 10)]), VFNMSI(T40, T3Z), ms, &(x[0])); + { + V T39, T33, T3w, T3G; + T39 = VFMA(LDK(KP855719849), T38, T35); + T33 = VFNMS(LDK(KP855719849), T32, T2Z); + ST(&(x[WS(rs, 3)]), VFMAI(T2Y, T2P), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 22)]), VFNMSI(T2Y, T2P), ms, &(x[0])); + T3w = VFMA(LDK(KP560319534), T3v, T3s); + T3G = VFNMS(LDK(KP681693190), T3F, T3t); + ST(&(x[WS(rs, 2)]), VFMAI(T3q, T3i), ms, &(x[0])); + ST(&(x[WS(rs, 23)]), VFNMSI(T3q, T3i), ms, &(x[WS(rs, 1)])); + T3K = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T3J, T3y)); + T3E = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T3D, T3y)); + T3a = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T39, T2X)); + T34 = VFMA(LDK(KP897376177), T33, T2z); + T3x = VFNMS(LDK(KP949179823), T3w, T3r); + T3H = VFNMS(LDK(KP860541664), T3G, T3r); + T2t = VFNMS(LDK(KP912575812), T2b, T2a); + T2c = VFMA(LDK(KP912575812), T2b, T2a); + TU = VFMA(LDK(KP829049696), TT, Tz); + T1T = VFNMS(LDK(KP829049696), TT, Tz); + T1U = VFNMS(LDK(KP831864738), T1y, T1e); + T1z = VFMA(LDK(KP831864738), T1y, T1e); + } + } + } + } + } + } + } + } + } + } + } + { + V T2o, T2h, T29, T2u, T2v, T2p; + T2o = VFNMS(LDK(KP958953096), T2g, T2f); + T2h = VFMA(LDK(KP958953096), T2g, T2f); + ST(&(x[WS(rs, 17)]), VFNMSI(T3a, T34), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 8)]), VFMAI(T3a, T34), ms, &(x[0])); + ST(&(x[WS(rs, 13)]), VFMAI(T3E, T3x), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 12)]), VFNMSI(T3E, T3x), ms, &(x[0])); + ST(&(x[WS(rs, 7)]), VFNMSI(T3K, T3H), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 18)]), VFMAI(T3K, T3H), ms, &(x[0])); + T1V = VFMA(LDK(KP559154169), T1U, T1T); + T22 = VFNMS(LDK(KP683113946), T1T, T1U); + T29 = VFNMS(LDK(KP867381224), T28, T27); + T2u = VFMA(LDK(KP867381224), T28, T27); + T2l = VFMA(LDK(KP894834959), T2k, T2h); + T2v = VFMA(LDK(KP447417479), T2k, T2u); + T2d = VFNMS(LDK(KP809385824), T2c, T29); + T2p = VFMA(LDK(KP447417479), T2c, T2o); + T1Q = VFMA(LDK(KP831864738), T1H, T1G); + T1I = VFNMS(LDK(KP831864738), T1H, T1G); + T2w = VFNMS(LDK(KP763932022), T2v, T2h); + T1A = VFMA(LDK(KP904730450), T1z, TU); + T1F = VFNMS(LDK(KP904730450), T1z, TU); + T2q = VFMA(LDK(KP690983005), T2p, T29); + } + } + { + V T2e, T1E, T1P, T2m; + T2e = VFNMS(LDK(KP992114701), T2d, Tf); + T1E = VFMA(LDK(KP916574801), T1D, T1C); + T1P = VFNMS(LDK(KP916574801), T1D, T1C); + T2m = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2l, T1O)); + { + V T1J, T2r, T1R, T1W, T1Z, T2x; + T2x = VFNMS(LDK(KP999544308), T2w, T2t); + T1J = VFNMS(LDK(KP904730450), T1I, T1F); + T25 = VFMA(LDK(KP968583161), T1A, Tf); + T1B = VFNMS(LDK(KP242145790), T1A, Tf); + T2r = VFNMS(LDK(KP999544308), T2q, T2n); + T1R = VFMA(LDK(KP904730450), T1Q, T1P); + T1W = VFNMS(LDK(KP904730450), T1Q, T1P); + T1Z = VADD(T1E, T1F); + ST(&(x[WS(rs, 21)]), VFMAI(T2m, T2e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFNMSI(T2m, T2e), ms, &(x[0])); + T2y = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T2x, T1O)); + T1K = VFNMS(LDK(KP618033988), T1J, T1E); + T2s = VFNMS(LDK(KP803003575), T2r, Tf); + T23 = VFMA(LDK(KP617882369), T1W, T22); + T1S = VFNMS(LDK(KP242145790), T1R, T1O); + T26 = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1R, T1O)); + T20 = VFNMS(LDK(KP683113946), T1Z, T1I); + T1X = VFMA(LDK(KP559016994), T1W, T1V); + } + } + } + } + } + { + V T1L, T24, T21, T1Y; + T1L = VFNMS(LDK(KP876091699), T1K, T1B); + ST(&(x[WS(rs, 16)]), VFMAI(T2y, T2s), ms, &(x[0])); + ST(&(x[WS(rs, 9)]), VFNMSI(T2y, T2s), ms, &(x[WS(rs, 1)])); + T24 = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T23, T1S)); + ST(&(x[WS(rs, 24)]), VFNMSI(T26, T25), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(T26, T25), ms, &(x[WS(rs, 1)])); + T21 = VFMA(LDK(KP792626838), T20, T1B); + T1Y = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1X, T1S)); + ST(&(x[WS(rs, 11)]), VFMAI(T24, T21), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 14)]), VFNMSI(T24, T21), ms, &(x[0])); + ST(&(x[WS(rs, 19)]), VFNMSI(T1Y, T1L), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VFMAI(T1Y, T1L), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 25, XSIMD_STRING("t1bv_25"), twinstr, &GENUS, {67, 60, 181, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_25) (planner *p) { + X(kdft_dit_register) (p, t1bv_25, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t1bv_25 -include t1b.h -sign 1 */ + +/* + * This function contains 248 FP additions, 188 FP multiplications, + * (or, 171 additions, 111 multiplications, 77 fused multiply/add), + * 100 stack variables, 40 constants, and 50 memory accesses + */ +#include "t1b.h" + +static void t1bv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP497379774, +0.497379774329709576484567492012895936835134813); + DVK(KP968583161, +0.968583161128631119490168375464735813836012403); + DVK(KP248689887, +0.248689887164854788242283746006447968417567406); + DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806); + DVK(KP809016994, +0.809016994374947424102293417182819058860154590); + DVK(KP309016994, +0.309016994374947424102293417182819058860154590); + DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483); + DVK(KP535826794, +0.535826794978996618271308767867639978063575346); + DVK(KP425779291, +0.425779291565072648862502445744251703979973042); + DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822); + DVK(KP963507348, +0.963507348203430549974383005744259307057084020); + DVK(KP876306680, +0.876306680043863587308115903922062583399064238); + DVK(KP844327925, +0.844327925502015078548558063966681505381659241); + DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691); + DVK(KP481753674, +0.481753674101715274987191502872129653528542010); + DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477); + DVK(KP851558583, +0.851558583130145297725004891488503407959946084); + DVK(KP904827052, +0.904827052466019527713668647932697593970413911); + DVK(KP125333233, +0.125333233564304245373118759816508793942918247); + DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418); + DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328); + DVK(KP684547105, +0.684547105928688673732283357621209269889519233); + DVK(KP637423989, +0.637423989748689710176712811676016195434917298); + DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344); + DVK(KP062790519, +0.062790519529313376076178224565631133122484832); + DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439); + DVK(KP770513242, +0.770513242775789230803009636396177847271667672); + DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596); + DVK(KP125581039, +0.125581039058626752152356449131262266244969664); + DVK(KP998026728, +0.998026728428271561952336806863450553336905220); + DVK(KP992114701, +0.992114701314477831049793042785778521453036709); + DVK(KP250666467, +0.250666467128608490746237519633017587885836494); + DVK(KP728968627, +0.728968627421411523146730319055259111372571664); + DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465); + DVK(KP293892626, +0.293892626146236564584352977319536384298826219); + DVK(KP475528258, +0.475528258147576786058219666689691071702849317); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(25, rs)) { + V T1A, T1z, T1R, T1S, T1B, T1C, T1Q, T2L, T1l, T2v, T1i, T3e, T2u, Tb, T2i; + V Tj, T3b, T2h, Tv, T2k, TD, T3a, T2l, T11, T2s, TY, T3d, T2r; + { + V T1v, T1x, T1y, T1q, T1s, T1t, T1P; + T1A = LD(&(x[0]), ms, &(x[0])); + { + V T1u, T1w, T1p, T1r; + T1u = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T1v = BYTW(&(W[TWVL * 18]), T1u); + T1w = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T1x = BYTW(&(W[TWVL * 28]), T1w); + T1y = VADD(T1v, T1x); + T1p = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T1q = BYTW(&(W[TWVL * 8]), T1p); + T1r = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + T1s = BYTW(&(W[TWVL * 38]), T1r); + T1t = VADD(T1q, T1s); + } + T1z = VMUL(LDK(KP559016994), VSUB(T1t, T1y)); + T1R = VSUB(T1v, T1x); + T1S = VMUL(LDK(KP587785252), T1R); + T1B = VADD(T1t, T1y); + T1C = VFNMS(LDK(KP250000000), T1B, T1A); + T1P = VSUB(T1q, T1s); + T1Q = VMUL(LDK(KP951056516), T1P); + T2L = VMUL(LDK(KP587785252), T1P); + } + { + V T1f, T19, T1b, T1c, T14, T16, T17, T1e; + T1e = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1f = BYTW(&(W[TWVL * 4]), T1e); + { + V T18, T1a, T13, T15; + T18 = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T19 = BYTW(&(W[TWVL * 24]), T18); + T1a = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + T1b = BYTW(&(W[TWVL * 34]), T1a); + T1c = VADD(T19, T1b); + T13 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T14 = BYTW(&(W[TWVL * 14]), T13); + T15 = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T16 = BYTW(&(W[TWVL * 44]), T15); + T17 = VADD(T14, T16); + } + { + V T1j, T1k, T1d, T1g, T1h; + T1j = VSUB(T14, T16); + T1k = VSUB(T19, T1b); + T1l = VFMA(LDK(KP475528258), T1j, VMUL(LDK(KP293892626), T1k)); + T2v = VFNMS(LDK(KP475528258), T1k, VMUL(LDK(KP293892626), T1j)); + T1d = VMUL(LDK(KP559016994), VSUB(T17, T1c)); + T1g = VADD(T17, T1c); + T1h = VFNMS(LDK(KP250000000), T1g, T1f); + T1i = VADD(T1d, T1h); + T3e = VADD(T1f, T1g); + T2u = VSUB(T1h, T1d); + } + } + { + V Tg, T7, T9, Td, T2, T4, Tc, Tf; + Tf = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tg = BYTW(&(W[TWVL * 6]), Tf); + { + V T6, T8, T1, T3; + T6 = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T7 = BYTW(&(W[TWVL * 26]), T6); + T8 = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[TWVL * 36]), T8); + Td = VADD(T7, T9); + T1 = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T2 = BYTW(&(W[TWVL * 16]), T1); + T3 = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T4 = BYTW(&(W[TWVL * 46]), T3); + Tc = VADD(T2, T4); + } + { + V T5, Ta, Te, Th, Ti; + T5 = VSUB(T2, T4); + Ta = VSUB(T7, T9); + Tb = VFMA(LDK(KP475528258), T5, VMUL(LDK(KP293892626), Ta)); + T2i = VFNMS(LDK(KP475528258), Ta, VMUL(LDK(KP293892626), T5)); + Te = VMUL(LDK(KP559016994), VSUB(Tc, Td)); + Th = VADD(Tc, Td); + Ti = VFNMS(LDK(KP250000000), Th, Tg); + Tj = VADD(Te, Ti); + T3b = VADD(Tg, Th); + T2h = VSUB(Ti, Te); + } + } + { + V TA, Tr, Tt, Tx, Tm, To, Tw, Tz; + Tz = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TA = BYTW(&(W[0]), Tz); + { + V Tq, Ts, Tl, Tn; + Tq = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tr = BYTW(&(W[TWVL * 20]), Tq); + Ts = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + Tt = BYTW(&(W[TWVL * 30]), Ts); + Tx = VADD(Tr, Tt); + Tl = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tm = BYTW(&(W[TWVL * 10]), Tl); + Tn = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + To = BYTW(&(W[TWVL * 40]), Tn); + Tw = VADD(Tm, To); + } + { + V Tp, Tu, Ty, TB, TC; + Tp = VSUB(Tm, To); + Tu = VSUB(Tr, Tt); + Tv = VFMA(LDK(KP475528258), Tp, VMUL(LDK(KP293892626), Tu)); + T2k = VFNMS(LDK(KP475528258), Tu, VMUL(LDK(KP293892626), Tp)); + Ty = VMUL(LDK(KP559016994), VSUB(Tw, Tx)); + TB = VADD(Tw, Tx); + TC = VFNMS(LDK(KP250000000), TB, TA); + TD = VADD(Ty, TC); + T3a = VADD(TA, TB); + T2l = VSUB(TC, Ty); + } + } + { + V TV, TP, TR, TS, TK, TM, TN, TU; + TU = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + TV = BYTW(&(W[TWVL * 2]), TU); + { + V TO, TQ, TJ, TL; + TO = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + TP = BYTW(&(W[TWVL * 22]), TO); + TQ = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TR = BYTW(&(W[TWVL * 32]), TQ); + TS = VADD(TP, TR); + TJ = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + TK = BYTW(&(W[TWVL * 12]), TJ); + TL = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + TM = BYTW(&(W[TWVL * 42]), TL); + TN = VADD(TK, TM); + } + { + V TZ, T10, TT, TW, TX; + TZ = VSUB(TK, TM); + T10 = VSUB(TP, TR); + T11 = VFMA(LDK(KP475528258), TZ, VMUL(LDK(KP293892626), T10)); + T2s = VFNMS(LDK(KP475528258), T10, VMUL(LDK(KP293892626), TZ)); + TT = VMUL(LDK(KP559016994), VSUB(TN, TS)); + TW = VADD(TN, TS); + TX = VFNMS(LDK(KP250000000), TW, TV); + TY = VADD(TT, TX); + T3d = VADD(TV, TW); + T2r = VSUB(TX, TT); + } + } + { + V T3g, T3o, T3k, T3l, T3j, T3m, T3p, T3n; + { + V T3c, T3f, T3h, T3i; + T3c = VSUB(T3a, T3b); + T3f = VSUB(T3d, T3e); + T3g = VBYI(VFMA(LDK(KP951056516), T3c, VMUL(LDK(KP587785252), T3f))); + T3o = VBYI(VFNMS(LDK(KP951056516), T3f, VMUL(LDK(KP587785252), T3c))); + T3k = VADD(T1A, T1B); + T3h = VADD(T3a, T3b); + T3i = VADD(T3d, T3e); + T3l = VADD(T3h, T3i); + T3j = VMUL(LDK(KP559016994), VSUB(T3h, T3i)); + T3m = VFNMS(LDK(KP250000000), T3l, T3k); + } + ST(&(x[0]), VADD(T3k, T3l), ms, &(x[0])); + T3p = VSUB(T3m, T3j); + ST(&(x[WS(rs, 10)]), VADD(T3o, T3p), ms, &(x[0])); + ST(&(x[WS(rs, 15)]), VSUB(T3p, T3o), ms, &(x[WS(rs, 1)])); + T3n = VADD(T3j, T3m); + ST(&(x[WS(rs, 5)]), VADD(T3g, T3n), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 20)]), VSUB(T3n, T3g), ms, &(x[0])); + } + { + V T2z, T2M, T2U, T2V, T2W, T34, T35, T36, T2X, T2Y, T2Z, T31, T32, T33, T2n; + V T2N, T2E, T2K, T2y, T2H, T2A, T2G, T38, T39; + T2z = VSUB(T1C, T1z); + T2M = VFNMS(LDK(KP951056516), T1R, T2L); + T2U = VFMA(LDK(KP1_369094211), T2k, VMUL(LDK(KP728968627), T2l)); + T2V = VFNMS(LDK(KP992114701), T2h, VMUL(LDK(KP250666467), T2i)); + T2W = VADD(T2U, T2V); + T34 = VFNMS(LDK(KP125581039), T2s, VMUL(LDK(KP998026728), T2r)); + T35 = VFMA(LDK(KP1_274847979), T2v, VMUL(LDK(KP770513242), T2u)); + T36 = VADD(T34, T35); + T2X = VFMA(LDK(KP1_996053456), T2s, VMUL(LDK(KP062790519), T2r)); + T2Y = VFNMS(LDK(KP637423989), T2u, VMUL(LDK(KP1_541026485), T2v)); + T2Z = VADD(T2X, T2Y); + T31 = VFNMS(LDK(KP1_457937254), T2k, VMUL(LDK(KP684547105), T2l)); + T32 = VFMA(LDK(KP1_984229402), T2i, VMUL(LDK(KP125333233), T2h)); + T33 = VADD(T31, T32); + { + V T2j, T2m, T2I, T2C, T2D, T2J; + T2j = VFNMS(LDK(KP851558583), T2i, VMUL(LDK(KP904827052), T2h)); + T2m = VFMA(LDK(KP1_752613360), T2k, VMUL(LDK(KP481753674), T2l)); + T2I = VADD(T2m, T2j); + T2C = VFMA(LDK(KP1_071653589), T2s, VMUL(LDK(KP844327925), T2r)); + T2D = VFMA(LDK(KP125581039), T2v, VMUL(LDK(KP998026728), T2u)); + T2J = VADD(T2C, T2D); + T2n = VSUB(T2j, T2m); + T2N = VADD(T2I, T2J); + T2E = VSUB(T2C, T2D); + T2K = VMUL(LDK(KP559016994), VSUB(T2I, T2J)); + } + { + V T2o, T2p, T2q, T2t, T2w, T2x; + T2o = VFNMS(LDK(KP963507348), T2k, VMUL(LDK(KP876306680), T2l)); + T2p = VFMA(LDK(KP1_809654104), T2i, VMUL(LDK(KP425779291), T2h)); + T2q = VSUB(T2o, T2p); + T2t = VFNMS(LDK(KP1_688655851), T2s, VMUL(LDK(KP535826794), T2r)); + T2w = VFNMS(LDK(KP1_996053456), T2v, VMUL(LDK(KP062790519), T2u)); + T2x = VADD(T2t, T2w); + T2y = VMUL(LDK(KP559016994), VSUB(T2q, T2x)); + T2H = VSUB(T2t, T2w); + T2A = VADD(T2q, T2x); + T2G = VADD(T2o, T2p); + } + { + V T2S, T2T, T30, T37; + T2S = VADD(T2z, T2A); + T2T = VBYI(VADD(T2M, T2N)); + ST(&(x[WS(rs, 23)]), VSUB(T2S, T2T), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VADD(T2S, T2T), ms, &(x[0])); + T30 = VADD(T2z, VADD(T2W, T2Z)); + T37 = VBYI(VSUB(VADD(T33, T36), T2M)); + ST(&(x[WS(rs, 22)]), VSUB(T30, T37), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VADD(T30, T37), ms, &(x[WS(rs, 1)])); + } + T38 = VBYI(VSUB(VFMA(LDK(KP951056516), VSUB(T2U, T2V), VFMA(LDK(KP309016994), T33, VFNMS(LDK(KP809016994), T36, VMUL(LDK(KP587785252), VSUB(T2X, T2Y))))), T2M)); + T39 = VFMA(LDK(KP309016994), T2W, VFMA(LDK(KP951056516), VSUB(T32, T31), VFMA(LDK(KP587785252), VSUB(T35, T34), VFNMS(LDK(KP809016994), T2Z, T2z)))); + ST(&(x[WS(rs, 8)]), VADD(T38, T39), ms, &(x[0])); + ST(&(x[WS(rs, 17)]), VSUB(T39, T38), ms, &(x[WS(rs, 1)])); + { + V T2F, T2Q, T2P, T2R, T2B, T2O; + T2B = VFNMS(LDK(KP250000000), T2A, T2z); + T2F = VFMA(LDK(KP951056516), T2n, VADD(T2y, VFNMS(LDK(KP587785252), T2E, T2B))); + T2Q = VFMA(LDK(KP587785252), T2n, VFMA(LDK(KP951056516), T2E, VSUB(T2B, T2y))); + T2O = VFNMS(LDK(KP250000000), T2N, T2M); + T2P = VBYI(VADD(VFMA(LDK(KP951056516), T2G, VMUL(LDK(KP587785252), T2H)), VADD(T2K, T2O))); + T2R = VBYI(VADD(VFNMS(LDK(KP951056516), T2H, VMUL(LDK(KP587785252), T2G)), VSUB(T2O, T2K))); + ST(&(x[WS(rs, 18)]), VSUB(T2F, T2P), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VADD(T2Q, T2R), ms, &(x[0])); + ST(&(x[WS(rs, 7)]), VADD(T2F, T2P), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VSUB(T2Q, T2R), ms, &(x[WS(rs, 1)])); + } + } + { + V T1D, T1T, T21, T22, T23, T2b, T2c, T2d, T24, T25, T26, T28, T29, T2a, TF; + V T1U, T1I, T1O, T1o, T1L, T1E, T1K, T2f, T2g; + T1D = VADD(T1z, T1C); + T1T = VADD(T1Q, T1S); + T21 = VFMA(LDK(KP1_688655851), Tv, VMUL(LDK(KP535826794), TD)); + T22 = VFMA(LDK(KP1_541026485), Tb, VMUL(LDK(KP637423989), Tj)); + T23 = VSUB(T21, T22); + T2b = VFMA(LDK(KP851558583), T11, VMUL(LDK(KP904827052), TY)); + T2c = VFMA(LDK(KP1_984229402), T1l, VMUL(LDK(KP125333233), T1i)); + T2d = VADD(T2b, T2c); + T24 = VFNMS(LDK(KP425779291), TY, VMUL(LDK(KP1_809654104), T11)); + T25 = VFNMS(LDK(KP992114701), T1i, VMUL(LDK(KP250666467), T1l)); + T26 = VADD(T24, T25); + T28 = VFNMS(LDK(KP1_071653589), Tv, VMUL(LDK(KP844327925), TD)); + T29 = VFNMS(LDK(KP770513242), Tj, VMUL(LDK(KP1_274847979), Tb)); + T2a = VADD(T28, T29); + { + V Tk, TE, T1M, T1G, T1H, T1N; + Tk = VFMA(LDK(KP1_071653589), Tb, VMUL(LDK(KP844327925), Tj)); + TE = VFMA(LDK(KP1_937166322), Tv, VMUL(LDK(KP248689887), TD)); + T1M = VADD(TE, Tk); + T1G = VFMA(LDK(KP1_752613360), T11, VMUL(LDK(KP481753674), TY)); + T1H = VFMA(LDK(KP1_457937254), T1l, VMUL(LDK(KP684547105), T1i)); + T1N = VADD(T1G, T1H); + TF = VSUB(Tk, TE); + T1U = VADD(T1M, T1N); + T1I = VSUB(T1G, T1H); + T1O = VMUL(LDK(KP559016994), VSUB(T1M, T1N)); + } + { + V TG, TH, TI, T12, T1m, T1n; + TG = VFNMS(LDK(KP497379774), Tv, VMUL(LDK(KP968583161), TD)); + TH = VFNMS(LDK(KP1_688655851), Tb, VMUL(LDK(KP535826794), Tj)); + TI = VADD(TG, TH); + T12 = VFNMS(LDK(KP963507348), T11, VMUL(LDK(KP876306680), TY)); + T1m = VFNMS(LDK(KP1_369094211), T1l, VMUL(LDK(KP728968627), T1i)); + T1n = VADD(T12, T1m); + T1o = VMUL(LDK(KP559016994), VSUB(TI, T1n)); + T1L = VSUB(T12, T1m); + T1E = VADD(TI, T1n); + T1K = VSUB(TG, TH); + } + { + V T1Z, T20, T27, T2e; + T1Z = VADD(T1D, T1E); + T20 = VBYI(VADD(T1T, T1U)); + ST(&(x[WS(rs, 24)]), VSUB(T1Z, T20), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VADD(T1Z, T20), ms, &(x[WS(rs, 1)])); + T27 = VADD(T1D, VADD(T23, T26)); + T2e = VBYI(VSUB(VADD(T2a, T2d), T1T)); + ST(&(x[WS(rs, 21)]), VSUB(T27, T2e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VADD(T27, T2e), ms, &(x[0])); + } + T2f = VBYI(VSUB(VFMA(LDK(KP309016994), T2a, VFMA(LDK(KP951056516), VADD(T21, T22), VFNMS(LDK(KP809016994), T2d, VMUL(LDK(KP587785252), VSUB(T24, T25))))), T1T)); + T2g = VFMA(LDK(KP951056516), VSUB(T29, T28), VFMA(LDK(KP309016994), T23, VFMA(LDK(KP587785252), VSUB(T2c, T2b), VFNMS(LDK(KP809016994), T26, T1D)))); + ST(&(x[WS(rs, 9)]), VADD(T2f, T2g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 16)]), VSUB(T2g, T2f), ms, &(x[0])); + { + V T1J, T1X, T1W, T1Y, T1F, T1V; + T1F = VFNMS(LDK(KP250000000), T1E, T1D); + T1J = VFMA(LDK(KP951056516), TF, VADD(T1o, VFNMS(LDK(KP587785252), T1I, T1F))); + T1X = VFMA(LDK(KP587785252), TF, VFMA(LDK(KP951056516), T1I, VSUB(T1F, T1o))); + T1V = VFNMS(LDK(KP250000000), T1U, T1T); + T1W = VBYI(VADD(VFMA(LDK(KP951056516), T1K, VMUL(LDK(KP587785252), T1L)), VADD(T1O, T1V))); + T1Y = VBYI(VADD(VFNMS(LDK(KP951056516), T1L, VMUL(LDK(KP587785252), T1K)), VSUB(T1V, T1O))); + ST(&(x[WS(rs, 19)]), VSUB(T1J, T1W), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VADD(T1X, T1Y), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VADD(T1J, T1W), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VSUB(T1X, T1Y), ms, &(x[0])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 25, XSIMD_STRING("t1bv_25"), twinstr, &GENUS, {171, 111, 77, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_25) (planner *p) { + X(kdft_dit_register) (p, t1bv_25, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:33 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 3 -name t1bv_3 -include t1b.h -sign 1 */ + +/* + * This function contains 8 FP additions, 8 FP multiplications, + * (or, 5 additions, 5 multiplications, 3 fused multiply/add), + * 12 stack variables, 2 constants, and 6 memory accesses + */ +#include "t1b.h" + +static void t1bv_3(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(3, rs)) { + V T1, T2, T4; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V T3, T5, T8, T6, T7; + T3 = BYTW(&(W[0]), T2); + T5 = BYTW(&(W[TWVL * 2]), T4); + T8 = VMUL(LDK(KP866025403), VSUB(T3, T5)); + T6 = VADD(T3, T5); + T7 = VFNMS(LDK(KP500000000), T6, T1); + ST(&(x[0]), VADD(T1, T6), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFNMSI(T8, T7), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(T8, T7), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 3, XSIMD_STRING("t1bv_3"), twinstr, &GENUS, {5, 5, 3, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_3) (planner *p) { + X(kdft_dit_register) (p, t1bv_3, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 3 -name t1bv_3 -include t1b.h -sign 1 */ + +/* + * This function contains 8 FP additions, 6 FP multiplications, + * (or, 7 additions, 5 multiplications, 1 fused multiply/add), + * 12 stack variables, 2 constants, and 6 memory accesses + */ +#include "t1b.h" + +static void t1bv_3(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(3, rs)) { + V T6, T2, T4, T7, T1, T3, T5, T8; + T6 = LD(&(x[0]), ms, &(x[0])); + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = BYTW(&(W[0]), T1); + T3 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T4 = BYTW(&(W[TWVL * 2]), T3); + T7 = VADD(T2, T4); + ST(&(x[0]), VADD(T6, T7), ms, &(x[0])); + T5 = VBYI(VMUL(LDK(KP866025403), VSUB(T2, T4))); + T8 = VFNMS(LDK(KP500000000), T7, T6); + ST(&(x[WS(rs, 1)]), VADD(T5, T8), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VSUB(T8, T5), ms, &(x[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 3, XSIMD_STRING("t1bv_3"), twinstr, &GENUS, {7, 5, 1, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_3) (planner *p) { + X(kdft_dit_register) (p, t1bv_3, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,865 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:35 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name t1bv_32 -include t1b.h -sign 1 */ + +/* + * This function contains 217 FP additions, 160 FP multiplications, + * (or, 119 additions, 62 multiplications, 98 fused multiply/add), + * 104 stack variables, 7 constants, and 64 memory accesses + */ +#include "t1b.h" + +static void t1bv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 62)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 62), MAKE_VOLATILE_STRIDE(32, rs)) { + V T26, T25, T2a, T2i, T24, T2c, T2g, T2k, T2h, T27; + { + V T4, T1z, T2o, T32, T2r, T3f, Tf, T1A, T34, T2O, T1D, TC, T33, T2L, T1C; + V Tr, T2C, T3a, T2F, T3b, T1r, T21, T1k, T20, TQ, TM, TS, TL, T2t, TJ; + V T10, T2u; + { + V Tt, T9, T2p, Te, T2q, TA, Tu, Tx; + { + V T1, T1x, T2, T1v; + T1 = LD(&(x[0]), ms, &(x[0])); + T1x = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T1v = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + { + V T5, Tc, T7, Ta, T2m, T2n; + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + Ta = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + { + V T1y, T3, T1w, T6, Td, T8, Tb, Ts, Tz; + Ts = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + T1y = BYTW(&(W[TWVL * 46]), T1x); + T3 = BYTW(&(W[TWVL * 30]), T2); + T1w = BYTW(&(W[TWVL * 14]), T1v); + T6 = BYTW(&(W[TWVL * 6]), T5); + Td = BYTW(&(W[TWVL * 22]), Tc); + T8 = BYTW(&(W[TWVL * 38]), T7); + Tb = BYTW(&(W[TWVL * 54]), Ta); + Tt = BYTW(&(W[TWVL * 58]), Ts); + Tz = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T4 = VSUB(T1, T3); + T2m = VADD(T1, T3); + T1z = VSUB(T1w, T1y); + T2n = VADD(T1w, T1y); + T9 = VSUB(T6, T8); + T2p = VADD(T6, T8); + Te = VSUB(Tb, Td); + T2q = VADD(Tb, Td); + TA = BYTW(&(W[TWVL * 10]), Tz); + } + Tu = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T2o = VADD(T2m, T2n); + T32 = VSUB(T2m, T2n); + Tx = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + } + } + { + V Tv, To, Ty, Ti, Tj, Tm, Th; + Th = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T2r = VADD(T2p, T2q); + T3f = VSUB(T2p, T2q); + Tf = VADD(T9, Te); + T1A = VSUB(T9, Te); + Tv = BYTW(&(W[TWVL * 26]), Tu); + To = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + Ty = BYTW(&(W[TWVL * 42]), Tx); + Ti = BYTW(&(W[TWVL * 2]), Th); + Tj = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tm = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + { + V T1f, T1h, T1a, T1c, T18, T2A, T2B, T1p; + { + V T15, T17, T1o, T1m; + { + V Tw, T2M, Tp, T2N, TB, Tk, Tn, T1n, T14, T16; + T14 = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T16 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + Tw = VSUB(Tt, Tv); + T2M = VADD(Tt, Tv); + Tp = BYTW(&(W[TWVL * 50]), To); + T2N = VADD(TA, Ty); + TB = VSUB(Ty, TA); + Tk = BYTW(&(W[TWVL * 34]), Tj); + Tn = BYTW(&(W[TWVL * 18]), Tm); + T15 = BYTW(&(W[TWVL * 60]), T14); + T17 = BYTW(&(W[TWVL * 28]), T16); + T1n = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + { + V T2J, Tl, T2K, Tq, T1l; + T1l = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T34 = VSUB(T2M, T2N); + T2O = VADD(T2M, T2N); + T1D = VFMA(LDK(KP414213562), Tw, TB); + TC = VFNMS(LDK(KP414213562), TB, Tw); + T2J = VADD(Ti, Tk); + Tl = VSUB(Ti, Tk); + T2K = VADD(Tn, Tp); + Tq = VSUB(Tn, Tp); + T1o = BYTW(&(W[TWVL * 12]), T1n); + T1m = BYTW(&(W[TWVL * 44]), T1l); + { + V T1e, T1g, T19, T1b; + T1e = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + T1g = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T19 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1b = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T33 = VSUB(T2J, T2K); + T2L = VADD(T2J, T2K); + T1C = VFMA(LDK(KP414213562), Tl, Tq); + Tr = VFNMS(LDK(KP414213562), Tq, Tl); + T1f = BYTW(&(W[TWVL * 52]), T1e); + T1h = BYTW(&(W[TWVL * 20]), T1g); + T1a = BYTW(&(W[TWVL * 4]), T19); + T1c = BYTW(&(W[TWVL * 36]), T1b); + } + } + } + T18 = VSUB(T15, T17); + T2A = VADD(T15, T17); + T2B = VADD(T1o, T1m); + T1p = VSUB(T1m, T1o); + } + { + V TG, TI, TZ, TX; + { + V T1i, T2E, T1d, T2D, TH, TY, TF; + TF = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T1i = VSUB(T1f, T1h); + T2E = VADD(T1f, T1h); + T1d = VSUB(T1a, T1c); + T2D = VADD(T1a, T1c); + TH = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TY = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + T2C = VADD(T2A, T2B); + T3a = VSUB(T2A, T2B); + TG = BYTW(&(W[0]), TF); + { + V TW, T1j, T1q, TP, TR, TK; + TW = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T2F = VADD(T2D, T2E); + T3b = VSUB(T2E, T2D); + T1j = VADD(T1d, T1i); + T1q = VSUB(T1i, T1d); + TI = BYTW(&(W[TWVL * 32]), TH); + TZ = BYTW(&(W[TWVL * 48]), TY); + TP = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + TX = BYTW(&(W[TWVL * 16]), TW); + TR = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TK = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T1r = VFMA(LDK(KP707106781), T1q, T1p); + T21 = VFNMS(LDK(KP707106781), T1q, T1p); + T1k = VFMA(LDK(KP707106781), T1j, T18); + T20 = VFNMS(LDK(KP707106781), T1j, T18); + TQ = BYTW(&(W[TWVL * 56]), TP); + TM = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + TS = BYTW(&(W[TWVL * 24]), TR); + TL = BYTW(&(W[TWVL * 8]), TK); + } + } + T2t = VADD(TG, TI); + TJ = VSUB(TG, TI); + T10 = VSUB(TX, TZ); + T2u = VADD(TX, TZ); + } + } + } + } + { + V T2s, TT, T2x, T2P, T2Y, T2G, T37, T2v, T2w, TO, T2W, T30, T2U, TN, T2V; + T2s = VSUB(T2o, T2r); + T2U = VADD(T2o, T2r); + TN = BYTW(&(W[TWVL * 40]), TM); + TT = VSUB(TQ, TS); + T2x = VADD(TQ, TS); + T2P = VSUB(T2L, T2O); + T2V = VADD(T2L, T2O); + T2Y = VADD(T2C, T2F); + T2G = VSUB(T2C, T2F); + T37 = VSUB(T2t, T2u); + T2v = VADD(T2t, T2u); + T2w = VADD(TL, TN); + TO = VSUB(TL, TN); + T2W = VSUB(T2U, T2V); + T30 = VADD(T2U, T2V); + { + V T1Y, T12, T1X, TV, T3n, T3t, T3m, T3q; + { + V T3o, T36, T3r, T3h, T3k, T3p, T3d, T3s, T2H, T2Q, T2Z, T31; + { + V T35, T3g, T38, T2y, T11, TU, T3c, T3j; + T35 = VADD(T33, T34); + T3g = VSUB(T33, T34); + T38 = VSUB(T2w, T2x); + T2y = VADD(T2w, T2x); + T11 = VSUB(TO, TT); + TU = VADD(TO, TT); + T3c = VFNMS(LDK(KP414213562), T3b, T3a); + T3j = VFMA(LDK(KP414213562), T3a, T3b); + T3o = VFNMS(LDK(KP707106781), T35, T32); + T36 = VFMA(LDK(KP707106781), T35, T32); + T3r = VFNMS(LDK(KP707106781), T3g, T3f); + T3h = VFMA(LDK(KP707106781), T3g, T3f); + { + V T3i, T39, T2z, T2X; + T3i = VFMA(LDK(KP414213562), T37, T38); + T39 = VFNMS(LDK(KP414213562), T38, T37); + T2z = VSUB(T2v, T2y); + T2X = VADD(T2v, T2y); + T1Y = VFNMS(LDK(KP707106781), T11, T10); + T12 = VFMA(LDK(KP707106781), T11, T10); + T1X = VFNMS(LDK(KP707106781), TU, TJ); + TV = VFMA(LDK(KP707106781), TU, TJ); + T3k = VSUB(T3i, T3j); + T3p = VADD(T3i, T3j); + T3d = VADD(T39, T3c); + T3s = VSUB(T39, T3c); + T2H = VADD(T2z, T2G); + T2Q = VSUB(T2z, T2G); + T2Z = VSUB(T2X, T2Y); + T31 = VADD(T2X, T2Y); + } + } + { + V T3v, T3u, T3l, T3e; + T3l = VFNMS(LDK(KP923879532), T3k, T3h); + T3n = VFMA(LDK(KP923879532), T3k, T3h); + T3t = VFMA(LDK(KP923879532), T3s, T3r); + T3v = VFNMS(LDK(KP923879532), T3s, T3r); + T3e = VFNMS(LDK(KP923879532), T3d, T36); + T3m = VFMA(LDK(KP923879532), T3d, T36); + { + V T2R, T2T, T2I, T2S; + T2R = VFNMS(LDK(KP707106781), T2Q, T2P); + T2T = VFMA(LDK(KP707106781), T2Q, T2P); + T2I = VFNMS(LDK(KP707106781), T2H, T2s); + T2S = VFMA(LDK(KP707106781), T2H, T2s); + ST(&(x[WS(rs, 16)]), VSUB(T30, T31), ms, &(x[0])); + ST(&(x[0]), VADD(T30, T31), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFMAI(T2Z, T2W), ms, &(x[0])); + ST(&(x[WS(rs, 24)]), VFNMSI(T2Z, T2W), ms, &(x[0])); + T3q = VFNMS(LDK(KP923879532), T3p, T3o); + T3u = VFMA(LDK(KP923879532), T3p, T3o); + ST(&(x[WS(rs, 18)]), VFMAI(T3l, T3e), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFNMSI(T3l, T3e), ms, &(x[0])); + ST(&(x[WS(rs, 28)]), VFNMSI(T2T, T2S), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(T2T, T2S), ms, &(x[0])); + ST(&(x[WS(rs, 20)]), VFMAI(T2R, T2I), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFNMSI(T2R, T2I), ms, &(x[0])); + } + ST(&(x[WS(rs, 26)]), VFMAI(T3v, T3u), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(T3v, T3u), ms, &(x[0])); + } + } + { + V T1U, T13, T1s, TE, T1M, T1I, T1N, T1B, T1V, T1E; + { + V Tg, TD, T1G, T1H; + Tg = VFMA(LDK(KP707106781), Tf, T4); + T1U = VFNMS(LDK(KP707106781), Tf, T4); + T26 = VSUB(Tr, TC); + TD = VADD(Tr, TC); + T1G = VFMA(LDK(KP198912367), TV, T12); + T13 = VFNMS(LDK(KP198912367), T12, TV); + T1s = VFNMS(LDK(KP198912367), T1r, T1k); + T1H = VFMA(LDK(KP198912367), T1k, T1r); + ST(&(x[WS(rs, 2)]), VFMAI(T3n, T3m), ms, &(x[0])); + ST(&(x[WS(rs, 30)]), VFNMSI(T3n, T3m), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VFNMSI(T3t, T3q), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFMAI(T3t, T3q), ms, &(x[0])); + TE = VFMA(LDK(KP923879532), TD, Tg); + T1M = VFNMS(LDK(KP923879532), TD, Tg); + T1I = VSUB(T1G, T1H); + T1N = VADD(T1G, T1H); + T1B = VFMA(LDK(KP707106781), T1A, T1z); + T25 = VFNMS(LDK(KP707106781), T1A, T1z); + T1V = VADD(T1C, T1D); + T1E = VSUB(T1C, T1D); + } + { + V T1W, T2e, T2f, T23; + { + V T28, T1Z, T1S, T1O, T1t, T1Q, T1F, T1P, T22, T29; + T28 = VFNMS(LDK(KP668178637), T1X, T1Y); + T1Z = VFMA(LDK(KP668178637), T1Y, T1X); + T1S = VFMA(LDK(KP980785280), T1N, T1M); + T1O = VFNMS(LDK(KP980785280), T1N, T1M); + T1t = VADD(T13, T1s); + T1Q = VSUB(T13, T1s); + T1F = VFMA(LDK(KP923879532), T1E, T1B); + T1P = VFNMS(LDK(KP923879532), T1E, T1B); + T1W = VFMA(LDK(KP923879532), T1V, T1U); + T2e = VFNMS(LDK(KP923879532), T1V, T1U); + T22 = VFMA(LDK(KP668178637), T21, T20); + T29 = VFNMS(LDK(KP668178637), T20, T21); + { + V T1K, T1u, T1R, T1T, T1L, T1J; + T1K = VFMA(LDK(KP980785280), T1t, TE); + T1u = VFNMS(LDK(KP980785280), T1t, TE); + T1R = VFMA(LDK(KP980785280), T1Q, T1P); + T1T = VFNMS(LDK(KP980785280), T1Q, T1P); + T1L = VFMA(LDK(KP980785280), T1I, T1F); + T1J = VFNMS(LDK(KP980785280), T1I, T1F); + T2f = VADD(T28, T29); + T2a = VSUB(T28, T29); + T23 = VADD(T1Z, T22); + T2i = VSUB(T1Z, T22); + ST(&(x[WS(rs, 23)]), VFNMSI(T1R, T1O), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFMAI(T1R, T1O), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 25)]), VFMAI(T1T, T1S), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(T1T, T1S), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(T1L, T1K), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 31)]), VFNMSI(T1L, T1K), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 17)]), VFMAI(T1J, T1u), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFNMSI(T1J, T1u), ms, &(x[WS(rs, 1)])); + } + } + T24 = VFNMS(LDK(KP831469612), T23, T1W); + T2c = VFMA(LDK(KP831469612), T23, T1W); + T2g = VFMA(LDK(KP831469612), T2f, T2e); + T2k = VFNMS(LDK(KP831469612), T2f, T2e); + } + } + } + } + } + T2h = VFMA(LDK(KP923879532), T26, T25); + T27 = VFNMS(LDK(KP923879532), T26, T25); + { + V T2j, T2l, T2d, T2b; + T2j = VFNMS(LDK(KP831469612), T2i, T2h); + T2l = VFMA(LDK(KP831469612), T2i, T2h); + T2d = VFMA(LDK(KP831469612), T2a, T27); + T2b = VFNMS(LDK(KP831469612), T2a, T27); + ST(&(x[WS(rs, 21)]), VFMAI(T2j, T2g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFNMSI(T2j, T2g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 27)]), VFNMSI(T2l, T2k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFMAI(T2l, T2k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 29)]), VFMAI(T2d, T2c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(T2d, T2c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFMAI(T2b, T24), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VFNMSI(T2b, T24), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 32, XSIMD_STRING("t1bv_32"), twinstr, &GENUS, {119, 62, 98, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_32) (planner *p) { + X(kdft_dit_register) (p, t1bv_32, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name t1bv_32 -include t1b.h -sign 1 */ + +/* + * This function contains 217 FP additions, 104 FP multiplications, + * (or, 201 additions, 88 multiplications, 16 fused multiply/add), + * 59 stack variables, 7 constants, and 64 memory accesses + */ +#include "t1b.h" + +static void t1bv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 62)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 62), MAKE_VOLATILE_STRIDE(32, rs)) { + V T4, T1D, T2P, T3h, Tf, T1y, T2K, T3i, TC, T1w, T2G, T3e, Tr, T1v, T2D; + V T3d, T1k, T20, T2y, T3a, T1r, T21, T2v, T39, TV, T1X, T2r, T37, T12, T1Y; + V T2o, T36; + { + V T1, T1C, T3, T1A, T1B, T2, T1z, T2N, T2O; + T1 = LD(&(x[0]), ms, &(x[0])); + T1B = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T1C = BYTW(&(W[TWVL * 46]), T1B); + T2 = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T3 = BYTW(&(W[TWVL * 30]), T2); + T1z = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T1A = BYTW(&(W[TWVL * 14]), T1z); + T4 = VSUB(T1, T3); + T1D = VSUB(T1A, T1C); + T2N = VADD(T1, T3); + T2O = VADD(T1A, T1C); + T2P = VSUB(T2N, T2O); + T3h = VADD(T2N, T2O); + } + { + V T6, Td, T8, Tb; + { + V T5, Tc, T7, Ta; + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T6 = BYTW(&(W[TWVL * 6]), T5); + Tc = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Td = BYTW(&(W[TWVL * 22]), Tc); + T7 = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + T8 = BYTW(&(W[TWVL * 38]), T7); + Ta = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + Tb = BYTW(&(W[TWVL * 54]), Ta); + } + { + V T9, Te, T2I, T2J; + T9 = VSUB(T6, T8); + Te = VSUB(Tb, Td); + Tf = VMUL(LDK(KP707106781), VADD(T9, Te)); + T1y = VMUL(LDK(KP707106781), VSUB(T9, Te)); + T2I = VADD(T6, T8); + T2J = VADD(Tb, Td); + T2K = VSUB(T2I, T2J); + T3i = VADD(T2I, T2J); + } + } + { + V Tt, TA, Tv, Ty; + { + V Ts, Tz, Tu, Tx; + Ts = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tt = BYTW(&(W[TWVL * 10]), Ts); + Tz = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TA = BYTW(&(W[TWVL * 26]), Tz); + Tu = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + Tv = BYTW(&(W[TWVL * 42]), Tu); + Tx = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + Ty = BYTW(&(W[TWVL * 58]), Tx); + } + { + V Tw, TB, T2E, T2F; + Tw = VSUB(Tt, Tv); + TB = VSUB(Ty, TA); + TC = VFNMS(LDK(KP382683432), TB, VMUL(LDK(KP923879532), Tw)); + T1w = VFMA(LDK(KP923879532), TB, VMUL(LDK(KP382683432), Tw)); + T2E = VADD(Ty, TA); + T2F = VADD(Tt, Tv); + T2G = VSUB(T2E, T2F); + T3e = VADD(T2E, T2F); + } + } + { + V Ti, Tp, Tk, Tn; + { + V Th, To, Tj, Tm; + Th = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Ti = BYTW(&(W[TWVL * 2]), Th); + To = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + Tp = BYTW(&(W[TWVL * 50]), To); + Tj = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tk = BYTW(&(W[TWVL * 34]), Tj); + Tm = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + Tn = BYTW(&(W[TWVL * 18]), Tm); + } + { + V Tl, Tq, T2B, T2C; + Tl = VSUB(Ti, Tk); + Tq = VSUB(Tn, Tp); + Tr = VFMA(LDK(KP382683432), Tl, VMUL(LDK(KP923879532), Tq)); + T1v = VFNMS(LDK(KP382683432), Tq, VMUL(LDK(KP923879532), Tl)); + T2B = VADD(Ti, Tk); + T2C = VADD(Tn, Tp); + T2D = VSUB(T2B, T2C); + T3d = VADD(T2B, T2C); + } + } + { + V T1g, T1i, T1o, T1m, T1a, T1c, T1d, T15, T17, T18; + { + V T1f, T1h, T1n, T1l; + T1f = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T1g = BYTW(&(W[TWVL * 12]), T1f); + T1h = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T1i = BYTW(&(W[TWVL * 44]), T1h); + T1n = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T1o = BYTW(&(W[TWVL * 28]), T1n); + T1l = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T1m = BYTW(&(W[TWVL * 60]), T1l); + { + V T19, T1b, T14, T16; + T19 = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + T1a = BYTW(&(W[TWVL * 52]), T19); + T1b = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T1c = BYTW(&(W[TWVL * 20]), T1b); + T1d = VSUB(T1a, T1c); + T14 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T15 = BYTW(&(W[TWVL * 4]), T14); + T16 = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T17 = BYTW(&(W[TWVL * 36]), T16); + T18 = VSUB(T15, T17); + } + } + { + V T1e, T1j, T2w, T2x; + T1e = VMUL(LDK(KP707106781), VSUB(T18, T1d)); + T1j = VSUB(T1g, T1i); + T1k = VSUB(T1e, T1j); + T20 = VADD(T1j, T1e); + T2w = VADD(T15, T17); + T2x = VADD(T1a, T1c); + T2y = VSUB(T2w, T2x); + T3a = VADD(T2w, T2x); + } + { + V T1p, T1q, T2t, T2u; + T1p = VSUB(T1m, T1o); + T1q = VMUL(LDK(KP707106781), VADD(T18, T1d)); + T1r = VSUB(T1p, T1q); + T21 = VADD(T1p, T1q); + T2t = VADD(T1m, T1o); + T2u = VADD(T1g, T1i); + T2v = VSUB(T2t, T2u); + T39 = VADD(T2t, T2u); + } + } + { + V TR, TT, TZ, TX, TL, TN, TO, TG, TI, TJ; + { + V TQ, TS, TY, TW; + TQ = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + TR = BYTW(&(W[TWVL * 16]), TQ); + TS = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + TT = BYTW(&(W[TWVL * 48]), TS); + TY = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TZ = BYTW(&(W[TWVL * 32]), TY); + TW = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TX = BYTW(&(W[0]), TW); + { + V TK, TM, TF, TH; + TK = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + TL = BYTW(&(W[TWVL * 56]), TK); + TM = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TN = BYTW(&(W[TWVL * 24]), TM); + TO = VSUB(TL, TN); + TF = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + TG = BYTW(&(W[TWVL * 8]), TF); + TH = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + TI = BYTW(&(W[TWVL * 40]), TH); + TJ = VSUB(TG, TI); + } + } + { + V TP, TU, T2p, T2q; + TP = VMUL(LDK(KP707106781), VSUB(TJ, TO)); + TU = VSUB(TR, TT); + TV = VSUB(TP, TU); + T1X = VADD(TU, TP); + T2p = VADD(TG, TI); + T2q = VADD(TL, TN); + T2r = VSUB(T2p, T2q); + T37 = VADD(T2p, T2q); + } + { + V T10, T11, T2m, T2n; + T10 = VSUB(TX, TZ); + T11 = VMUL(LDK(KP707106781), VADD(TJ, TO)); + T12 = VSUB(T10, T11); + T1Y = VADD(T10, T11); + T2m = VADD(TX, TZ); + T2n = VADD(TR, TT); + T2o = VSUB(T2m, T2n); + T36 = VADD(T2m, T2n); + } + } + { + V T3q, T3u, T3t, T3v; + { + V T3o, T3p, T3r, T3s; + T3o = VADD(T3h, T3i); + T3p = VADD(T3d, T3e); + T3q = VSUB(T3o, T3p); + T3u = VADD(T3o, T3p); + T3r = VADD(T36, T37); + T3s = VADD(T39, T3a); + T3t = VBYI(VSUB(T3r, T3s)); + T3v = VADD(T3r, T3s); + } + ST(&(x[WS(rs, 24)]), VSUB(T3q, T3t), ms, &(x[0])); + ST(&(x[0]), VADD(T3u, T3v), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VADD(T3q, T3t), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VSUB(T3u, T3v), ms, &(x[0])); + } + { + V T3f, T3j, T3c, T3k, T38, T3b; + T3f = VSUB(T3d, T3e); + T3j = VSUB(T3h, T3i); + T38 = VSUB(T36, T37); + T3b = VSUB(T39, T3a); + T3c = VMUL(LDK(KP707106781), VSUB(T38, T3b)); + T3k = VMUL(LDK(KP707106781), VADD(T38, T3b)); + { + V T3g, T3l, T3m, T3n; + T3g = VBYI(VSUB(T3c, T3f)); + T3l = VSUB(T3j, T3k); + ST(&(x[WS(rs, 12)]), VADD(T3g, T3l), ms, &(x[0])); + ST(&(x[WS(rs, 20)]), VSUB(T3l, T3g), ms, &(x[0])); + T3m = VBYI(VADD(T3f, T3c)); + T3n = VADD(T3j, T3k); + ST(&(x[WS(rs, 4)]), VADD(T3m, T3n), ms, &(x[0])); + ST(&(x[WS(rs, 28)]), VSUB(T3n, T3m), ms, &(x[0])); + } + } + { + V T2L, T31, T2R, T2Y, T2A, T2Z, T2U, T32, T2H, T2Q; + T2H = VMUL(LDK(KP707106781), VSUB(T2D, T2G)); + T2L = VSUB(T2H, T2K); + T31 = VADD(T2K, T2H); + T2Q = VMUL(LDK(KP707106781), VADD(T2D, T2G)); + T2R = VSUB(T2P, T2Q); + T2Y = VADD(T2P, T2Q); + { + V T2s, T2z, T2S, T2T; + T2s = VFNMS(LDK(KP382683432), T2r, VMUL(LDK(KP923879532), T2o)); + T2z = VFMA(LDK(KP923879532), T2v, VMUL(LDK(KP382683432), T2y)); + T2A = VSUB(T2s, T2z); + T2Z = VADD(T2s, T2z); + T2S = VFMA(LDK(KP382683432), T2o, VMUL(LDK(KP923879532), T2r)); + T2T = VFNMS(LDK(KP382683432), T2v, VMUL(LDK(KP923879532), T2y)); + T2U = VSUB(T2S, T2T); + T32 = VADD(T2S, T2T); + } + { + V T2M, T2V, T34, T35; + T2M = VBYI(VSUB(T2A, T2L)); + T2V = VSUB(T2R, T2U); + ST(&(x[WS(rs, 10)]), VADD(T2M, T2V), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VSUB(T2V, T2M), ms, &(x[0])); + T34 = VSUB(T2Y, T2Z); + T35 = VBYI(VSUB(T32, T31)); + ST(&(x[WS(rs, 18)]), VSUB(T34, T35), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VADD(T34, T35), ms, &(x[0])); + } + { + V T2W, T2X, T30, T33; + T2W = VBYI(VADD(T2L, T2A)); + T2X = VADD(T2R, T2U); + ST(&(x[WS(rs, 6)]), VADD(T2W, T2X), ms, &(x[0])); + ST(&(x[WS(rs, 26)]), VSUB(T2X, T2W), ms, &(x[0])); + T30 = VADD(T2Y, T2Z); + T33 = VBYI(VADD(T31, T32)); + ST(&(x[WS(rs, 30)]), VSUB(T30, T33), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T30, T33), ms, &(x[0])); + } + } + { + V TE, T1P, T1I, T1Q, T1t, T1M, T1F, T1N; + { + V Tg, TD, T1G, T1H; + Tg = VSUB(T4, Tf); + TD = VSUB(Tr, TC); + TE = VSUB(Tg, TD); + T1P = VADD(Tg, TD); + T1G = VFNMS(LDK(KP555570233), TV, VMUL(LDK(KP831469612), T12)); + T1H = VFMA(LDK(KP555570233), T1k, VMUL(LDK(KP831469612), T1r)); + T1I = VSUB(T1G, T1H); + T1Q = VADD(T1G, T1H); + } + { + V T13, T1s, T1x, T1E; + T13 = VFMA(LDK(KP831469612), TV, VMUL(LDK(KP555570233), T12)); + T1s = VFNMS(LDK(KP555570233), T1r, VMUL(LDK(KP831469612), T1k)); + T1t = VSUB(T13, T1s); + T1M = VADD(T13, T1s); + T1x = VSUB(T1v, T1w); + T1E = VSUB(T1y, T1D); + T1F = VSUB(T1x, T1E); + T1N = VADD(T1E, T1x); + } + { + V T1u, T1J, T1S, T1T; + T1u = VADD(TE, T1t); + T1J = VBYI(VADD(T1F, T1I)); + ST(&(x[WS(rs, 27)]), VSUB(T1u, T1J), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VADD(T1u, T1J), ms, &(x[WS(rs, 1)])); + T1S = VBYI(VADD(T1N, T1M)); + T1T = VADD(T1P, T1Q); + ST(&(x[WS(rs, 3)]), VADD(T1S, T1T), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 29)]), VSUB(T1T, T1S), ms, &(x[WS(rs, 1)])); + } + { + V T1K, T1L, T1O, T1R; + T1K = VSUB(TE, T1t); + T1L = VBYI(VSUB(T1I, T1F)); + ST(&(x[WS(rs, 21)]), VSUB(T1K, T1L), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VADD(T1K, T1L), ms, &(x[WS(rs, 1)])); + T1O = VBYI(VSUB(T1M, T1N)); + T1R = VSUB(T1P, T1Q); + ST(&(x[WS(rs, 13)]), VADD(T1O, T1R), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VSUB(T1R, T1O), ms, &(x[WS(rs, 1)])); + } + } + { + V T1W, T2h, T2a, T2i, T23, T2e, T27, T2f; + { + V T1U, T1V, T28, T29; + T1U = VADD(T4, Tf); + T1V = VADD(T1v, T1w); + T1W = VSUB(T1U, T1V); + T2h = VADD(T1U, T1V); + T28 = VFNMS(LDK(KP195090322), T1X, VMUL(LDK(KP980785280), T1Y)); + T29 = VFMA(LDK(KP195090322), T20, VMUL(LDK(KP980785280), T21)); + T2a = VSUB(T28, T29); + T2i = VADD(T28, T29); + } + { + V T1Z, T22, T25, T26; + T1Z = VFMA(LDK(KP980785280), T1X, VMUL(LDK(KP195090322), T1Y)); + T22 = VFNMS(LDK(KP195090322), T21, VMUL(LDK(KP980785280), T20)); + T23 = VSUB(T1Z, T22); + T2e = VADD(T1Z, T22); + T25 = VADD(Tr, TC); + T26 = VADD(T1D, T1y); + T27 = VSUB(T25, T26); + T2f = VADD(T26, T25); + } + { + V T24, T2b, T2k, T2l; + T24 = VADD(T1W, T23); + T2b = VBYI(VADD(T27, T2a)); + ST(&(x[WS(rs, 25)]), VSUB(T24, T2b), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(T24, T2b), ms, &(x[WS(rs, 1)])); + T2k = VBYI(VADD(T2f, T2e)); + T2l = VADD(T2h, T2i); + ST(&(x[WS(rs, 1)]), VADD(T2k, T2l), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 31)]), VSUB(T2l, T2k), ms, &(x[WS(rs, 1)])); + } + { + V T2c, T2d, T2g, T2j; + T2c = VSUB(T1W, T23); + T2d = VBYI(VSUB(T2a, T27)); + ST(&(x[WS(rs, 23)]), VSUB(T2c, T2d), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VADD(T2c, T2d), ms, &(x[WS(rs, 1)])); + T2g = VBYI(VSUB(T2e, T2f)); + T2j = VSUB(T2h, T2i); + ST(&(x[WS(rs, 15)]), VADD(T2g, T2j), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 17)]), VSUB(T2j, T2g), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 32, XSIMD_STRING("t1bv_32"), twinstr, &GENUS, {201, 88, 16, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_32) (planner *p) { + X(kdft_dit_register) (p, t1bv_32, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:33 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t1bv_4 -include t1b.h -sign 1 */ + +/* + * This function contains 11 FP additions, 8 FP multiplications, + * (or, 9 additions, 6 multiplications, 2 fused multiply/add), + * 13 stack variables, 0 constants, and 8 memory accesses + */ +#include "t1b.h" + +static void t1bv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(4, rs)) { + V T1, T7, T2, T5, T8, T3, T6; + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T8 = BYTW(&(W[TWVL * 4]), T7); + T3 = BYTW(&(W[TWVL * 2]), T2); + T6 = BYTW(&(W[0]), T5); + { + V Ta, T4, Tb, T9; + Ta = VADD(T1, T3); + T4 = VSUB(T1, T3); + Tb = VADD(T6, T8); + T9 = VSUB(T6, T8); + ST(&(x[0]), VADD(Ta, Tb), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VSUB(Ta, Tb), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(T9, T4), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(T9, T4), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t1bv_4"), twinstr, &GENUS, {9, 6, 2, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_4) (planner *p) { + X(kdft_dit_register) (p, t1bv_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t1bv_4 -include t1b.h -sign 1 */ + +/* + * This function contains 11 FP additions, 6 FP multiplications, + * (or, 11 additions, 6 multiplications, 0 fused multiply/add), + * 13 stack variables, 0 constants, and 8 memory accesses + */ +#include "t1b.h" + +static void t1bv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(4, rs)) { + V T1, T8, T3, T6, T7, T2, T5; + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T8 = BYTW(&(W[TWVL * 4]), T7); + T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T3 = BYTW(&(W[TWVL * 2]), T2); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T6 = BYTW(&(W[0]), T5); + { + V T4, T9, Ta, Tb; + T4 = VSUB(T1, T3); + T9 = VBYI(VSUB(T6, T8)); + ST(&(x[WS(rs, 3)]), VSUB(T4, T9), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T4, T9), ms, &(x[WS(rs, 1)])); + Ta = VADD(T1, T3); + Tb = VADD(T6, T8); + ST(&(x[WS(rs, 2)]), VSUB(Ta, Tb), ms, &(x[0])); + ST(&(x[0]), VADD(Ta, Tb), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t1bv_4"), twinstr, &GENUS, {11, 6, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_4) (planner *p) { + X(kdft_dit_register) (p, t1bv_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:33 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name t1bv_5 -include t1b.h -sign 1 */ + +/* + * This function contains 20 FP additions, 19 FP multiplications, + * (or, 11 additions, 10 multiplications, 9 fused multiply/add), + * 26 stack variables, 4 constants, and 10 memory accesses + */ +#include "t1b.h" + +static void t1bv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(5, rs)) { + V T1, T2, T9, T4, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T9 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V T3, Ta, T5, T8; + T3 = BYTW(&(W[0]), T2); + Ta = BYTW(&(W[TWVL * 4]), T9); + T5 = BYTW(&(W[TWVL * 6]), T4); + T8 = BYTW(&(W[TWVL * 2]), T7); + { + V T6, Tg, Tb, Th; + T6 = VADD(T3, T5); + Tg = VSUB(T3, T5); + Tb = VADD(T8, Ta); + Th = VSUB(T8, Ta); + { + V Te, Tc, Tk, Ti, Td, Tj, Tf; + Te = VSUB(T6, Tb); + Tc = VADD(T6, Tb); + Tk = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tg, Th)); + Ti = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Th, Tg)); + Td = VFNMS(LDK(KP250000000), Tc, T1); + ST(&(x[0]), VADD(T1, Tc), ms, &(x[0])); + Tj = VFNMS(LDK(KP559016994), Te, Td); + Tf = VFMA(LDK(KP559016994), Te, Td); + ST(&(x[WS(rs, 2)]), VFNMSI(Tk, Tj), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(Tk, Tj), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFNMSI(Ti, Tf), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(Ti, Tf), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("t1bv_5"), twinstr, &GENUS, {11, 10, 9, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_5) (planner *p) { + X(kdft_dit_register) (p, t1bv_5, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name t1bv_5 -include t1b.h -sign 1 */ + +/* + * This function contains 20 FP additions, 14 FP multiplications, + * (or, 17 additions, 11 multiplications, 3 fused multiply/add), + * 20 stack variables, 4 constants, and 10 memory accesses + */ +#include "t1b.h" + +static void t1bv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(5, rs)) { + V Tf, T5, Ta, Tc, Td, Tg; + Tf = LD(&(x[0]), ms, &(x[0])); + { + V T2, T9, T4, T7; + { + V T1, T8, T3, T6; + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = BYTW(&(W[0]), T1); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[TWVL * 4]), T8); + T3 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T4 = BYTW(&(W[TWVL * 6]), T3); + T6 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T7 = BYTW(&(W[TWVL * 2]), T6); + } + T5 = VSUB(T2, T4); + Ta = VSUB(T7, T9); + Tc = VADD(T2, T4); + Td = VADD(T7, T9); + Tg = VADD(Tc, Td); + } + ST(&(x[0]), VADD(Tf, Tg), ms, &(x[0])); + { + V Tb, Tj, Ti, Tk, Te, Th; + Tb = VBYI(VFMA(LDK(KP951056516), T5, VMUL(LDK(KP587785252), Ta))); + Tj = VBYI(VFNMS(LDK(KP951056516), Ta, VMUL(LDK(KP587785252), T5))); + Te = VMUL(LDK(KP559016994), VSUB(Tc, Td)); + Th = VFNMS(LDK(KP250000000), Tg, Tf); + Ti = VADD(Te, Th); + Tk = VSUB(Th, Te); + ST(&(x[WS(rs, 1)]), VADD(Tb, Ti), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VSUB(Tk, Tj), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VSUB(Ti, Tb), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(Tj, Tk), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("t1bv_5"), twinstr, &GENUS, {17, 11, 3, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_5) (planner *p) { + X(kdft_dit_register) (p, t1bv_5, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:33 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 6 -name t1bv_6 -include t1b.h -sign 1 */ + +/* + * This function contains 23 FP additions, 18 FP multiplications, + * (or, 17 additions, 12 multiplications, 6 fused multiply/add), + * 27 stack variables, 2 constants, and 12 memory accesses + */ +#include "t1b.h" + +static void t1bv_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 10)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(6, rs)) { + V T1, T2, Ta, Tc, T5, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V T3, Tb, Td, T6, T8; + T3 = BYTW(&(W[TWVL * 4]), T2); + Tb = BYTW(&(W[TWVL * 6]), Ta); + Td = BYTW(&(W[0]), Tc); + T6 = BYTW(&(W[TWVL * 2]), T5); + T8 = BYTW(&(W[TWVL * 8]), T7); + { + V Ti, T4, Tk, Te, Tj, T9; + Ti = VADD(T1, T3); + T4 = VSUB(T1, T3); + Tk = VADD(Tb, Td); + Te = VSUB(Tb, Td); + Tj = VADD(T6, T8); + T9 = VSUB(T6, T8); + { + V Tl, Tn, Tf, Th, Tm, Tg; + Tl = VADD(Tj, Tk); + Tn = VMUL(LDK(KP866025403), VSUB(Tj, Tk)); + Tf = VADD(T9, Te); + Th = VMUL(LDK(KP866025403), VSUB(T9, Te)); + ST(&(x[0]), VADD(Ti, Tl), ms, &(x[0])); + Tm = VFNMS(LDK(KP500000000), Tl, Ti); + ST(&(x[WS(rs, 3)]), VADD(T4, Tf), ms, &(x[WS(rs, 1)])); + Tg = VFNMS(LDK(KP500000000), Tf, T4); + ST(&(x[WS(rs, 4)]), VFMAI(Tn, Tm), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFNMSI(Tn, Tm), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFNMSI(Th, Tg), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(Th, Tg), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 6, XSIMD_STRING("t1bv_6"), twinstr, &GENUS, {17, 12, 6, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_6) (planner *p) { + X(kdft_dit_register) (p, t1bv_6, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 6 -name t1bv_6 -include t1b.h -sign 1 */ + +/* + * This function contains 23 FP additions, 14 FP multiplications, + * (or, 21 additions, 12 multiplications, 2 fused multiply/add), + * 19 stack variables, 2 constants, and 12 memory accesses + */ +#include "t1b.h" + +static void t1bv_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 10)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(6, rs)) { + V Tf, Ti, Ta, Tk, T5, Tj, Tc, Te, Td; + Tc = LD(&(x[0]), ms, &(x[0])); + Td = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Te = BYTW(&(W[TWVL * 4]), Td); + Tf = VSUB(Tc, Te); + Ti = VADD(Tc, Te); + { + V T7, T9, T6, T8; + T6 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = BYTW(&(W[TWVL * 6]), T6); + T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[0]), T8); + Ta = VSUB(T7, T9); + Tk = VADD(T7, T9); + } + { + V T2, T4, T1, T3; + T1 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T2 = BYTW(&(W[TWVL * 2]), T1); + T3 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T4 = BYTW(&(W[TWVL * 8]), T3); + T5 = VSUB(T2, T4); + Tj = VADD(T2, T4); + } + { + V Tb, Tg, Th, Tn, Tl, Tm; + Tb = VBYI(VMUL(LDK(KP866025403), VSUB(T5, Ta))); + Tg = VADD(T5, Ta); + Th = VFNMS(LDK(KP500000000), Tg, Tf); + ST(&(x[WS(rs, 1)]), VADD(Tb, Th), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(Tf, Tg), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VSUB(Th, Tb), ms, &(x[WS(rs, 1)])); + Tn = VBYI(VMUL(LDK(KP866025403), VSUB(Tj, Tk))); + Tl = VADD(Tj, Tk); + Tm = VFNMS(LDK(KP500000000), Tl, Ti); + ST(&(x[WS(rs, 2)]), VSUB(Tm, Tn), ms, &(x[0])); + ST(&(x[0]), VADD(Ti, Tl), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(Tn, Tm), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 6, XSIMD_STRING("t1bv_6"), twinstr, &GENUS, {21, 12, 2, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_6) (planner *p) { + X(kdft_dit_register) (p, t1bv_6, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1877 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:35 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name t1bv_64 -include t1b.h -sign 1 */ + +/* + * This function contains 519 FP additions, 384 FP multiplications, + * (or, 261 additions, 126 multiplications, 258 fused multiply/add), + * 187 stack variables, 15 constants, and 128 memory accesses + */ +#include "t1b.h" + +static void t1bv_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP820678790, +0.820678790828660330972281985331011598767386482); + DVK(KP098491403, +0.098491403357164253077197521291327432293052451); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP303346683, +0.303346683607342391675883946941299872384187453); + DVK(KP534511135, +0.534511135950791641089685961295362908582039528); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 126)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 126), MAKE_VOLATILE_STRIDE(64, rs)) { + V T6L, T6M, T6O, T6P, T75, T6V, T5A, T6A, T72, T6K, T6t, T6D, T6w, T6B, T6h; + V T6E; + { + V Ta, T3U, T3V, T37, T7a, T58, T7B, T6l, T1v, T24, T5Q, T7o, T5F, T7l, T43; + V T4F, T2i, T2R, T6b, T7v, T60, T7s, T4a, T4I, T5u, T7h, T5x, T7g, T1i, T3b; + V T4m, T4C, T7e, T5l, T7d, T5o, T3a, TV, T4B, T4j, T3X, T3Y, T6o, T7b, T5f; + V T7C, Tx, T38, T2p, T61, T2n, T65, T2D, T7p, T5M, T7m, T5T, T4G, T46, T25; + V T1S, T2q, T2u, T2w; + { + V T5q, T10, T5v, T15, T1b, T5s, T1c, T1e; + { + V T1V, T1p, T5B, T5O, T1u, T1X, T20, T21; + { + V T1, T2, T7, T5, T32, T34, T2X, T2Z; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 32)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 48)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T32 = LD(&(x[WS(rs, 56)]), ms, &(x[0])); + T34 = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T2X = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T2Z = LD(&(x[WS(rs, 40)]), ms, &(x[0])); + { + V T1m, T54, T6j, T36, T56, T31, T55, T1n, T1q, T1s, T4, T9; + { + V T3, T8, T6, T33, T35, T2Y, T30, T1l; + T1l = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[TWVL * 62]), T2); + T8 = BYTW(&(W[TWVL * 94]), T7); + T6 = BYTW(&(W[TWVL * 30]), T5); + T33 = BYTW(&(W[TWVL * 110]), T32); + T35 = BYTW(&(W[TWVL * 46]), T34); + T2Y = BYTW(&(W[TWVL * 14]), T2X); + T30 = BYTW(&(W[TWVL * 78]), T2Z); + T1m = BYTW(&(W[0]), T1l); + T54 = VSUB(T1, T3); + T4 = VADD(T1, T3); + T6j = VSUB(T6, T8); + T9 = VADD(T6, T8); + T36 = VADD(T33, T35); + T56 = VSUB(T33, T35); + T31 = VADD(T2Y, T30); + T55 = VSUB(T2Y, T30); + T1n = LD(&(x[WS(rs, 33)]), ms, &(x[WS(rs, 1)])); + } + T1q = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T1s = LD(&(x[WS(rs, 49)]), ms, &(x[WS(rs, 1)])); + Ta = VSUB(T4, T9); + T3U = VADD(T4, T9); + { + V T57, T6k, T1o, T1r, T1t, T1W, T1U, T1Z; + T1U = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T3V = VADD(T31, T36); + T37 = VSUB(T31, T36); + T57 = VADD(T55, T56); + T6k = VSUB(T55, T56); + T1o = BYTW(&(W[TWVL * 64]), T1n); + T1r = BYTW(&(W[TWVL * 32]), T1q); + T1t = BYTW(&(W[TWVL * 96]), T1s); + T1V = BYTW(&(W[TWVL * 16]), T1U); + T1W = LD(&(x[WS(rs, 41)]), ms, &(x[WS(rs, 1)])); + T1Z = LD(&(x[WS(rs, 57)]), ms, &(x[WS(rs, 1)])); + T7a = VFNMS(LDK(KP707106781), T57, T54); + T58 = VFMA(LDK(KP707106781), T57, T54); + T7B = VFNMS(LDK(KP707106781), T6k, T6j); + T6l = VFMA(LDK(KP707106781), T6k, T6j); + T1p = VADD(T1m, T1o); + T5B = VSUB(T1m, T1o); + T5O = VSUB(T1r, T1t); + T1u = VADD(T1r, T1t); + T1X = BYTW(&(W[TWVL * 80]), T1W); + T20 = BYTW(&(W[TWVL * 112]), T1Z); + T21 = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + } + } + } + { + V T5W, T2N, T69, T2L, T5Y, T2P, T48, T2c, T2h; + { + V T41, T1Y, T5C, T22, T2d, T29, T2b, T2f, T28, T2a, T2H, T2J; + T28 = LD(&(x[WS(rs, 63)]), ms, &(x[WS(rs, 1)])); + T2a = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T1v = VSUB(T1p, T1u); + T41 = VADD(T1p, T1u); + T1Y = VADD(T1V, T1X); + T5C = VSUB(T1V, T1X); + T22 = BYTW(&(W[TWVL * 48]), T21); + T2d = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T29 = BYTW(&(W[TWVL * 124]), T28); + T2b = BYTW(&(W[TWVL * 60]), T2a); + T2f = LD(&(x[WS(rs, 47)]), ms, &(x[WS(rs, 1)])); + T2H = LD(&(x[WS(rs, 55)]), ms, &(x[WS(rs, 1)])); + T2J = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + { + V T23, T5D, T2e, T2g, T2I, T2K, T2M; + T2M = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T23 = VADD(T20, T22); + T5D = VSUB(T20, T22); + T2e = BYTW(&(W[TWVL * 28]), T2d); + T2c = VADD(T29, T2b); + T5W = VSUB(T29, T2b); + T2g = BYTW(&(W[TWVL * 92]), T2f); + T2I = BYTW(&(W[TWVL * 108]), T2H); + T2K = BYTW(&(W[TWVL * 44]), T2J); + T2N = BYTW(&(W[TWVL * 12]), T2M); + { + V T5E, T5P, T42, T2O; + T5E = VADD(T5C, T5D); + T5P = VSUB(T5C, T5D); + T24 = VSUB(T1Y, T23); + T42 = VADD(T1Y, T23); + T69 = VSUB(T2g, T2e); + T2h = VADD(T2e, T2g); + T2O = LD(&(x[WS(rs, 39)]), ms, &(x[WS(rs, 1)])); + T2L = VADD(T2I, T2K); + T5Y = VSUB(T2I, T2K); + T5Q = VFMA(LDK(KP707106781), T5P, T5O); + T7o = VFNMS(LDK(KP707106781), T5P, T5O); + T5F = VFMA(LDK(KP707106781), T5E, T5B); + T7l = VFNMS(LDK(KP707106781), T5E, T5B); + T43 = VADD(T41, T42); + T4F = VSUB(T41, T42); + T2P = BYTW(&(W[TWVL * 76]), T2O); + } + } + } + T2i = VSUB(T2c, T2h); + T48 = VADD(T2c, T2h); + { + V TW, TY, T11, T2Q, T5X, T13; + TW = LD(&(x[WS(rs, 62)]), ms, &(x[0])); + TY = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + T11 = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T2Q = VADD(T2N, T2P); + T5X = VSUB(T2N, T2P); + T13 = LD(&(x[WS(rs, 46)]), ms, &(x[0])); + { + V T12, T5Z, T6a, T49, T14, T18, T1a; + { + V T17, T19, TX, TZ; + T17 = LD(&(x[WS(rs, 54)]), ms, &(x[0])); + T19 = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + TX = BYTW(&(W[TWVL * 122]), TW); + TZ = BYTW(&(W[TWVL * 58]), TY); + T12 = BYTW(&(W[TWVL * 26]), T11); + T5Z = VADD(T5X, T5Y); + T6a = VSUB(T5Y, T5X); + T2R = VSUB(T2L, T2Q); + T49 = VADD(T2Q, T2L); + T14 = BYTW(&(W[TWVL * 90]), T13); + T18 = BYTW(&(W[TWVL * 106]), T17); + T5q = VSUB(TX, TZ); + T10 = VADD(TX, TZ); + T1a = BYTW(&(W[TWVL * 42]), T19); + } + T6b = VFMA(LDK(KP707106781), T6a, T69); + T7v = VFNMS(LDK(KP707106781), T6a, T69); + T60 = VFMA(LDK(KP707106781), T5Z, T5W); + T7s = VFNMS(LDK(KP707106781), T5Z, T5W); + T4a = VADD(T48, T49); + T4I = VSUB(T48, T49); + T5v = VSUB(T14, T12); + T15 = VADD(T12, T14); + T1b = VADD(T18, T1a); + T5s = VSUB(T18, T1a); + } + T1c = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T1e = LD(&(x[WS(rs, 38)]), ms, &(x[0])); + } + } + } + { + V Th, T59, Tf, Tv, T5d, Tj, Tm, To; + { + V T5h, TQ, T5m, T5i, TO, TS, TJ, T4h, TD, TI; + { + V T4k, T16, TB, T1d, T1f, TE, TG, TA, Tz, TK, TM, TC; + Tz = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T4k = VADD(T10, T15); + T16 = VSUB(T10, T15); + TB = LD(&(x[WS(rs, 34)]), ms, &(x[0])); + T1d = BYTW(&(W[TWVL * 10]), T1c); + T1f = BYTW(&(W[TWVL * 74]), T1e); + TE = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + TG = LD(&(x[WS(rs, 50)]), ms, &(x[0])); + TA = BYTW(&(W[TWVL * 2]), Tz); + TK = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TM = LD(&(x[WS(rs, 42)]), ms, &(x[0])); + TC = BYTW(&(W[TWVL * 66]), TB); + { + V T1g, T5r, TF, TH, TL, TN, TP; + TP = LD(&(x[WS(rs, 58)]), ms, &(x[0])); + T1g = VADD(T1d, T1f); + T5r = VSUB(T1d, T1f); + TF = BYTW(&(W[TWVL * 34]), TE); + TH = BYTW(&(W[TWVL * 98]), TG); + TL = BYTW(&(W[TWVL * 18]), TK); + TN = BYTW(&(W[TWVL * 82]), TM); + T5h = VSUB(TA, TC); + TD = VADD(TA, TC); + TQ = BYTW(&(W[TWVL * 114]), TP); + { + V T5w, T5t, T4l, T1h, TR; + T5w = VSUB(T5s, T5r); + T5t = VADD(T5r, T5s); + T4l = VADD(T1g, T1b); + T1h = VSUB(T1b, T1g); + T5m = VSUB(TF, TH); + TI = VADD(TF, TH); + T5i = VSUB(TL, TN); + TO = VADD(TL, TN); + TR = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + T5u = VFMA(LDK(KP707106781), T5t, T5q); + T7h = VFNMS(LDK(KP707106781), T5t, T5q); + T5x = VFMA(LDK(KP707106781), T5w, T5v); + T7g = VFNMS(LDK(KP707106781), T5w, T5v); + T1i = VFNMS(LDK(KP414213562), T1h, T16); + T3b = VFMA(LDK(KP414213562), T16, T1h); + T4m = VADD(T4k, T4l); + T4C = VSUB(T4k, T4l); + TS = BYTW(&(W[TWVL * 50]), TR); + } + } + } + TJ = VSUB(TD, TI); + T4h = VADD(TD, TI); + { + V Tb, Td, Tr, T5j, TT, Tt, Tg; + Tb = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Td = LD(&(x[WS(rs, 36)]), ms, &(x[0])); + Tr = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + T5j = VSUB(TQ, TS); + TT = VADD(TQ, TS); + Tt = LD(&(x[WS(rs, 44)]), ms, &(x[0])); + Tg = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + { + V Ti, Tc, Te, Ts; + Ti = LD(&(x[WS(rs, 52)]), ms, &(x[0])); + Tc = BYTW(&(W[TWVL * 6]), Tb); + Te = BYTW(&(W[TWVL * 70]), Td); + Ts = BYTW(&(W[TWVL * 22]), Tr); + { + V T5k, T5n, TU, T4i, Tu; + T5k = VADD(T5i, T5j); + T5n = VSUB(T5i, T5j); + TU = VSUB(TO, TT); + T4i = VADD(TO, TT); + Tu = BYTW(&(W[TWVL * 86]), Tt); + Th = BYTW(&(W[TWVL * 38]), Tg); + T59 = VSUB(Tc, Te); + Tf = VADD(Tc, Te); + T7e = VFNMS(LDK(KP707106781), T5k, T5h); + T5l = VFMA(LDK(KP707106781), T5k, T5h); + T7d = VFNMS(LDK(KP707106781), T5n, T5m); + T5o = VFMA(LDK(KP707106781), T5n, T5m); + T3a = VFMA(LDK(KP414213562), TJ, TU); + TV = VFNMS(LDK(KP414213562), TU, TJ); + T4B = VSUB(T4h, T4i); + T4j = VADD(T4h, T4i); + Tv = VADD(Ts, Tu); + T5d = VSUB(Tu, Ts); + Tj = BYTW(&(W[TWVL * 102]), Ti); + } + } + Tm = LD(&(x[WS(rs, 60)]), ms, &(x[0])); + To = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + } + } + { + V T5b, T6m, Tl, T1A, T5G, T1Q, T5K, T1C, T1D, T5e, T6n, Tw, T1H, T1J; + { + V T1w, T1y, T1M, T1O, Tq, T5c, T1B; + T1w = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T1y = LD(&(x[WS(rs, 37)]), ms, &(x[WS(rs, 1)])); + T1M = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T1O = LD(&(x[WS(rs, 45)]), ms, &(x[WS(rs, 1)])); + T1B = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + { + V Tk, T5a, Tn, Tp; + Tk = VADD(Th, Tj); + T5a = VSUB(Th, Tj); + Tn = BYTW(&(W[TWVL * 118]), Tm); + Tp = BYTW(&(W[TWVL * 54]), To); + { + V T1x, T1z, T1N, T1P; + T1x = BYTW(&(W[TWVL * 8]), T1w); + T1z = BYTW(&(W[TWVL * 72]), T1y); + T1N = BYTW(&(W[TWVL * 24]), T1M); + T1P = BYTW(&(W[TWVL * 88]), T1O); + T5b = VFNMS(LDK(KP414213562), T5a, T59); + T6m = VFMA(LDK(KP414213562), T59, T5a); + T3X = VADD(Tf, Tk); + Tl = VSUB(Tf, Tk); + Tq = VADD(Tn, Tp); + T5c = VSUB(Tn, Tp); + T1A = VADD(T1x, T1z); + T5G = VSUB(T1x, T1z); + T1Q = VADD(T1N, T1P); + T5K = VSUB(T1N, T1P); + T1C = BYTW(&(W[TWVL * 40]), T1B); + } + } + T1D = LD(&(x[WS(rs, 53)]), ms, &(x[WS(rs, 1)])); + T5e = VFNMS(LDK(KP414213562), T5d, T5c); + T6n = VFMA(LDK(KP414213562), T5c, T5d); + T3Y = VADD(Tq, Tv); + Tw = VSUB(Tq, Tv); + T1H = LD(&(x[WS(rs, 61)]), ms, &(x[WS(rs, 1)])); + T1J = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + } + { + V T1I, T1K, T1F, T5H, T2k, T2l, T2z, T2B, T2j, T1E; + T2j = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1E = BYTW(&(W[TWVL * 104]), T1D); + T6o = VSUB(T6m, T6n); + T7b = VADD(T6m, T6n); + T5f = VADD(T5b, T5e); + T7C = VSUB(T5b, T5e); + Tx = VADD(Tl, Tw); + T38 = VSUB(Tl, Tw); + T1I = BYTW(&(W[TWVL * 120]), T1H); + T1K = BYTW(&(W[TWVL * 56]), T1J); + T1F = VADD(T1C, T1E); + T5H = VSUB(T1C, T1E); + T2k = BYTW(&(W[TWVL * 4]), T2j); + T2l = LD(&(x[WS(rs, 35)]), ms, &(x[WS(rs, 1)])); + T2z = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T2B = LD(&(x[WS(rs, 43)]), ms, &(x[WS(rs, 1)])); + { + V T5I, T5R, T44, T1G, T2m, T2A, T2C, T5S, T5L, T1R, T45, T2o, T5J, T1L; + T2o = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T5J = VSUB(T1I, T1K); + T1L = VADD(T1I, T1K); + T5I = VFNMS(LDK(KP414213562), T5H, T5G); + T5R = VFMA(LDK(KP414213562), T5G, T5H); + T44 = VADD(T1A, T1F); + T1G = VSUB(T1A, T1F); + T2m = BYTW(&(W[TWVL * 68]), T2l); + T2A = BYTW(&(W[TWVL * 20]), T2z); + T2C = BYTW(&(W[TWVL * 84]), T2B); + T5S = VFNMS(LDK(KP414213562), T5J, T5K); + T5L = VFMA(LDK(KP414213562), T5K, T5J); + T1R = VSUB(T1L, T1Q); + T45 = VADD(T1L, T1Q); + T2p = BYTW(&(W[TWVL * 36]), T2o); + T61 = VSUB(T2k, T2m); + T2n = VADD(T2k, T2m); + T65 = VSUB(T2C, T2A); + T2D = VADD(T2A, T2C); + T7p = VSUB(T5I, T5L); + T5M = VADD(T5I, T5L); + T7m = VSUB(T5R, T5S); + T5T = VADD(T5R, T5S); + T4G = VSUB(T44, T45); + T46 = VADD(T44, T45); + T25 = VSUB(T1G, T1R); + T1S = VADD(T1G, T1R); + T2q = LD(&(x[WS(rs, 51)]), ms, &(x[WS(rs, 1)])); + } + T2u = LD(&(x[WS(rs, 59)]), ms, &(x[WS(rs, 1)])); + T2w = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + } + } + } + } + { + V T67, T7w, T6e, T7t, T3s, T3E, T39, T3D, T1k, T3k, T3t, T3c, T1T, T3v, T3w; + V T26, T2G, T3y, T3z, T2T; + { + V T4A, T4N, T47, T4v, T2r, T2v, T2x, T4s, T40, T3W, T3Z; + T4A = VSUB(T3U, T3V); + T3W = VADD(T3U, T3V); + T3Z = VADD(T3X, T3Y); + T4N = VSUB(T3X, T3Y); + T47 = VSUB(T43, T46); + T4v = VADD(T43, T46); + T2r = BYTW(&(W[TWVL * 100]), T2q); + T2v = BYTW(&(W[TWVL * 116]), T2u); + T2x = BYTW(&(W[TWVL * 52]), T2w); + T4s = VADD(T3W, T3Z); + T40 = VSUB(T3W, T3Z); + { + V T4O, T4n, T4Q, T4H, T4E, T4W, T4u, T4y, T4d, T4J, T2F, T2S; + { + V T6c, T63, T2t, T4b, T6d, T66, T2E, T4c; + { + V T4D, T62, T2s, T64, T2y, T4t; + T4O = VSUB(T4B, T4C); + T4D = VADD(T4B, T4C); + T62 = VSUB(T2r, T2p); + T2s = VADD(T2p, T2r); + T64 = VSUB(T2v, T2x); + T2y = VADD(T2v, T2x); + T4t = VADD(T4j, T4m); + T4n = VSUB(T4j, T4m); + T4Q = VFMA(LDK(KP414213562), T4F, T4G); + T4H = VFNMS(LDK(KP414213562), T4G, T4F); + T4E = VFMA(LDK(KP707106781), T4D, T4A); + T4W = VFNMS(LDK(KP707106781), T4D, T4A); + T6c = VFNMS(LDK(KP414213562), T61, T62); + T63 = VFMA(LDK(KP414213562), T62, T61); + T2t = VSUB(T2n, T2s); + T4b = VADD(T2n, T2s); + T6d = VFMA(LDK(KP414213562), T64, T65); + T66 = VFNMS(LDK(KP414213562), T65, T64); + T2E = VSUB(T2y, T2D); + T4c = VADD(T2y, T2D); + T4u = VSUB(T4s, T4t); + T4y = VADD(T4s, T4t); + } + T67 = VADD(T63, T66); + T7w = VSUB(T66, T63); + T6e = VADD(T6c, T6d); + T7t = VSUB(T6d, T6c); + T4d = VADD(T4b, T4c); + T4J = VSUB(T4c, T4b); + T2F = VADD(T2t, T2E); + T2S = VSUB(T2E, T2t); + } + { + V Ty, T1j, T4R, T4K; + Ty = VFMA(LDK(KP707106781), Tx, Ta); + T3s = VFNMS(LDK(KP707106781), Tx, Ta); + T3E = VSUB(TV, T1i); + T1j = VADD(TV, T1i); + T39 = VFMA(LDK(KP707106781), T38, T37); + T3D = VFNMS(LDK(KP707106781), T38, T37); + T4R = VFMA(LDK(KP414213562), T4I, T4J); + T4K = VFNMS(LDK(KP414213562), T4J, T4I); + { + V T4w, T4e, T4P, T4Z; + T4w = VADD(T4a, T4d); + T4e = VSUB(T4a, T4d); + T4P = VFMA(LDK(KP707106781), T4O, T4N); + T4Z = VFNMS(LDK(KP707106781), T4O, T4N); + T1k = VFMA(LDK(KP923879532), T1j, Ty); + T3k = VFNMS(LDK(KP923879532), T1j, Ty); + { + V T4L, T50, T4S, T4X; + T4L = VADD(T4H, T4K); + T50 = VSUB(T4H, T4K); + T4S = VSUB(T4Q, T4R); + T4X = VADD(T4Q, T4R); + { + V T4f, T4o, T4x, T4z; + T4f = VADD(T47, T4e); + T4o = VSUB(T47, T4e); + T4x = VSUB(T4v, T4w); + T4z = VADD(T4v, T4w); + { + V T53, T51, T4M, T4U; + T53 = VFNMS(LDK(KP923879532), T50, T4Z); + T51 = VFMA(LDK(KP923879532), T50, T4Z); + T4M = VFNMS(LDK(KP923879532), T4L, T4E); + T4U = VFMA(LDK(KP923879532), T4L, T4E); + { + V T52, T4Y, T4T, T4V; + T52 = VFMA(LDK(KP923879532), T4X, T4W); + T4Y = VFNMS(LDK(KP923879532), T4X, T4W); + T4T = VFNMS(LDK(KP923879532), T4S, T4P); + T4V = VFMA(LDK(KP923879532), T4S, T4P); + { + V T4p, T4r, T4g, T4q; + T4p = VFNMS(LDK(KP707106781), T4o, T4n); + T4r = VFMA(LDK(KP707106781), T4o, T4n); + T4g = VFNMS(LDK(KP707106781), T4f, T40); + T4q = VFMA(LDK(KP707106781), T4f, T40); + ST(&(x[0]), VADD(T4y, T4z), ms, &(x[0])); + ST(&(x[WS(rs, 32)]), VSUB(T4y, T4z), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VFMAI(T4x, T4u), ms, &(x[0])); + ST(&(x[WS(rs, 48)]), VFNMSI(T4x, T4u), ms, &(x[0])); + ST(&(x[WS(rs, 44)]), VFNMSI(T51, T4Y), ms, &(x[0])); + ST(&(x[WS(rs, 20)]), VFMAI(T51, T4Y), ms, &(x[0])); + ST(&(x[WS(rs, 52)]), VFMAI(T53, T52), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFNMSI(T53, T52), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(T4V, T4U), ms, &(x[0])); + ST(&(x[WS(rs, 60)]), VFNMSI(T4V, T4U), ms, &(x[0])); + ST(&(x[WS(rs, 36)]), VFMAI(T4T, T4M), ms, &(x[0])); + ST(&(x[WS(rs, 28)]), VFNMSI(T4T, T4M), ms, &(x[0])); + ST(&(x[WS(rs, 56)]), VFNMSI(T4r, T4q), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFMAI(T4r, T4q), ms, &(x[0])); + ST(&(x[WS(rs, 40)]), VFMAI(T4p, T4g), ms, &(x[0])); + ST(&(x[WS(rs, 24)]), VFNMSI(T4p, T4g), ms, &(x[0])); + T3t = VADD(T3a, T3b); + T3c = VSUB(T3a, T3b); + } + } + } + } + } + } + T1T = VFMA(LDK(KP707106781), T1S, T1v); + T3v = VFNMS(LDK(KP707106781), T1S, T1v); + T3w = VFNMS(LDK(KP707106781), T25, T24); + T26 = VFMA(LDK(KP707106781), T25, T24); + T2G = VFMA(LDK(KP707106781), T2F, T2i); + T3y = VFNMS(LDK(KP707106781), T2F, T2i); + T3z = VFNMS(LDK(KP707106781), T2S, T2R); + T2T = VFMA(LDK(KP707106781), T2S, T2R); + } + } + } + { + V T3u, T3M, T3F, T3P, T3x, T3G, T3q, T3m, T3h, T3j, T3r, T3p, T2W, T3i; + { + V T3d, T3n, T27, T3e, T2U, T3f; + T3d = VFMA(LDK(KP923879532), T3c, T39); + T3n = VFNMS(LDK(KP923879532), T3c, T39); + T27 = VFNMS(LDK(KP198912367), T26, T1T); + T3e = VFMA(LDK(KP198912367), T1T, T26); + T2U = VFNMS(LDK(KP198912367), T2T, T2G); + T3f = VFMA(LDK(KP198912367), T2G, T2T); + T3u = VFMA(LDK(KP923879532), T3t, T3s); + T3M = VFNMS(LDK(KP923879532), T3t, T3s); + { + V T3g, T3l, T2V, T3o; + T3g = VSUB(T3e, T3f); + T3l = VADD(T3e, T3f); + T2V = VADD(T27, T2U); + T3o = VSUB(T27, T2U); + T3F = VFNMS(LDK(KP923879532), T3E, T3D); + T3P = VFMA(LDK(KP923879532), T3E, T3D); + T3x = VFMA(LDK(KP668178637), T3w, T3v); + T3G = VFNMS(LDK(KP668178637), T3v, T3w); + T3q = VFMA(LDK(KP980785280), T3l, T3k); + T3m = VFNMS(LDK(KP980785280), T3l, T3k); + T3h = VFNMS(LDK(KP980785280), T3g, T3d); + T3j = VFMA(LDK(KP980785280), T3g, T3d); + T3r = VFNMS(LDK(KP980785280), T3o, T3n); + T3p = VFMA(LDK(KP980785280), T3o, T3n); + T2W = VFNMS(LDK(KP980785280), T2V, T1k); + T3i = VFMA(LDK(KP980785280), T2V, T1k); + } + } + { + V T7n, T7Z, T8j, T89, T7k, T7O, T8g, T7Y, T7H, T7R, T80, T7q, T7u, T82, T83; + V T7x; + { + V T7c, T7W, T7D, T87, T7f, T7E, T3A, T3H, T7F, T7i; + T7c = VFNMS(LDK(KP923879532), T7b, T7a); + T7W = VFMA(LDK(KP923879532), T7b, T7a); + T7D = VFMA(LDK(KP923879532), T7C, T7B); + T87 = VFNMS(LDK(KP923879532), T7C, T7B); + T7f = VFNMS(LDK(KP668178637), T7e, T7d); + T7E = VFMA(LDK(KP668178637), T7d, T7e); + ST(&(x[WS(rs, 46)]), VFNMSI(T3p, T3m), ms, &(x[0])); + ST(&(x[WS(rs, 18)]), VFMAI(T3p, T3m), ms, &(x[0])); + ST(&(x[WS(rs, 50)]), VFMAI(T3r, T3q), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFNMSI(T3r, T3q), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(T3j, T3i), ms, &(x[0])); + ST(&(x[WS(rs, 62)]), VFNMSI(T3j, T3i), ms, &(x[0])); + ST(&(x[WS(rs, 34)]), VFMAI(T3h, T2W), ms, &(x[0])); + ST(&(x[WS(rs, 30)]), VFNMSI(T3h, T2W), ms, &(x[0])); + T3A = VFMA(LDK(KP668178637), T3z, T3y); + T3H = VFNMS(LDK(KP668178637), T3y, T3z); + T7F = VFMA(LDK(KP668178637), T7g, T7h); + T7i = VFNMS(LDK(KP668178637), T7h, T7g); + T7n = VFNMS(LDK(KP923879532), T7m, T7l); + T7Z = VFMA(LDK(KP923879532), T7m, T7l); + { + V T3I, T3N, T3B, T3Q; + T3I = VSUB(T3G, T3H); + T3N = VADD(T3G, T3H); + T3B = VADD(T3x, T3A); + T3Q = VSUB(T3x, T3A); + { + V T7j, T88, T7G, T7X; + T7j = VADD(T7f, T7i); + T88 = VSUB(T7f, T7i); + T7G = VSUB(T7E, T7F); + T7X = VADD(T7E, T7F); + { + V T3S, T3O, T3J, T3L; + T3S = VFNMS(LDK(KP831469612), T3N, T3M); + T3O = VFMA(LDK(KP831469612), T3N, T3M); + T3J = VFNMS(LDK(KP831469612), T3I, T3F); + T3L = VFMA(LDK(KP831469612), T3I, T3F); + { + V T3T, T3R, T3C, T3K; + T3T = VFMA(LDK(KP831469612), T3Q, T3P); + T3R = VFNMS(LDK(KP831469612), T3Q, T3P); + T3C = VFNMS(LDK(KP831469612), T3B, T3u); + T3K = VFMA(LDK(KP831469612), T3B, T3u); + T8j = VFNMS(LDK(KP831469612), T88, T87); + T89 = VFMA(LDK(KP831469612), T88, T87); + T7k = VFNMS(LDK(KP831469612), T7j, T7c); + T7O = VFMA(LDK(KP831469612), T7j, T7c); + T8g = VFNMS(LDK(KP831469612), T7X, T7W); + T7Y = VFMA(LDK(KP831469612), T7X, T7W); + T7H = VFMA(LDK(KP831469612), T7G, T7D); + T7R = VFNMS(LDK(KP831469612), T7G, T7D); + ST(&(x[WS(rs, 42)]), VFMAI(T3R, T3O), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VFNMSI(T3R, T3O), ms, &(x[0])); + ST(&(x[WS(rs, 54)]), VFNMSI(T3T, T3S), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFMAI(T3T, T3S), ms, &(x[0])); + ST(&(x[WS(rs, 58)]), VFMAI(T3L, T3K), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(T3L, T3K), ms, &(x[0])); + ST(&(x[WS(rs, 26)]), VFMAI(T3J, T3C), ms, &(x[0])); + ST(&(x[WS(rs, 38)]), VFNMSI(T3J, T3C), ms, &(x[0])); + T80 = VFNMS(LDK(KP923879532), T7p, T7o); + T7q = VFMA(LDK(KP923879532), T7p, T7o); + } + } + } + } + T7u = VFNMS(LDK(KP923879532), T7t, T7s); + T82 = VFMA(LDK(KP923879532), T7t, T7s); + T83 = VFNMS(LDK(KP923879532), T7w, T7v); + T7x = VFMA(LDK(KP923879532), T7w, T7v); + } + { + V T5g, T6I, T6p, T6T, T5p, T6q, T6r, T5y; + T5g = VFMA(LDK(KP923879532), T5f, T58); + T6I = VFNMS(LDK(KP923879532), T5f, T58); + { + V T7r, T7I, T7y, T7J; + T7r = VFNMS(LDK(KP534511135), T7q, T7n); + T7I = VFMA(LDK(KP534511135), T7n, T7q); + T7y = VFNMS(LDK(KP534511135), T7x, T7u); + T7J = VFMA(LDK(KP534511135), T7u, T7x); + { + V T81, T8a, T84, T8b; + T81 = VFMA(LDK(KP303346683), T80, T7Z); + T8a = VFNMS(LDK(KP303346683), T7Z, T80); + T84 = VFMA(LDK(KP303346683), T83, T82); + T8b = VFNMS(LDK(KP303346683), T82, T83); + T6p = VFMA(LDK(KP923879532), T6o, T6l); + T6T = VFNMS(LDK(KP923879532), T6o, T6l); + T5p = VFNMS(LDK(KP198912367), T5o, T5l); + T6q = VFMA(LDK(KP198912367), T5l, T5o); + { + V T7K, T7P, T7z, T7S; + T7K = VSUB(T7I, T7J); + T7P = VADD(T7I, T7J); + T7z = VADD(T7r, T7y); + T7S = VSUB(T7r, T7y); + { + V T8c, T8h, T85, T8k; + T8c = VSUB(T8a, T8b); + T8h = VADD(T8a, T8b); + T85 = VADD(T81, T84); + T8k = VSUB(T81, T84); + { + V T7Q, T7U, T7L, T7N; + T7Q = VFNMS(LDK(KP881921264), T7P, T7O); + T7U = VFMA(LDK(KP881921264), T7P, T7O); + T7L = VFNMS(LDK(KP881921264), T7K, T7H); + T7N = VFMA(LDK(KP881921264), T7K, T7H); + { + V T7T, T7V, T7A, T7M; + T7T = VFMA(LDK(KP881921264), T7S, T7R); + T7V = VFNMS(LDK(KP881921264), T7S, T7R); + T7A = VFNMS(LDK(KP881921264), T7z, T7k); + T7M = VFMA(LDK(KP881921264), T7z, T7k); + { + V T8i, T8m, T8d, T8f; + T8i = VFMA(LDK(KP956940335), T8h, T8g); + T8m = VFNMS(LDK(KP956940335), T8h, T8g); + T8d = VFNMS(LDK(KP956940335), T8c, T89); + T8f = VFMA(LDK(KP956940335), T8c, T89); + { + V T8l, T8n, T86, T8e; + T8l = VFNMS(LDK(KP956940335), T8k, T8j); + T8n = VFMA(LDK(KP956940335), T8k, T8j); + T86 = VFNMS(LDK(KP956940335), T85, T7Y); + T8e = VFMA(LDK(KP956940335), T85, T7Y); + ST(&(x[WS(rs, 53)]), VFMAI(T7V, T7U), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFNMSI(T7V, T7U), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 43)]), VFNMSI(T7T, T7Q), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 21)]), VFMAI(T7T, T7Q), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFMAI(T7N, T7M), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 59)]), VFNMSI(T7N, T7M), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 37)]), VFMAI(T7L, T7A), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 27)]), VFNMSI(T7L, T7A), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 51)]), VFNMSI(T8n, T8m), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFMAI(T8n, T8m), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 45)]), VFMAI(T8l, T8i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VFNMSI(T8l, T8i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 61)]), VFMAI(T8f, T8e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(T8f, T8e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 29)]), VFMAI(T8d, T86), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 35)]), VFNMSI(T8d, T86), ms, &(x[WS(rs, 1)])); + T6r = VFMA(LDK(KP198912367), T5u, T5x); + T5y = VFNMS(LDK(KP198912367), T5x, T5u); + } + } + } + } + } + } + } + } + { + V T5N, T5U, T68, T5z, T6U, T6f; + T5N = VFMA(LDK(KP923879532), T5M, T5F); + T6L = VFNMS(LDK(KP923879532), T5M, T5F); + T6M = VFNMS(LDK(KP923879532), T5T, T5Q); + T5U = VFMA(LDK(KP923879532), T5T, T5Q); + T68 = VFMA(LDK(KP923879532), T67, T60); + T6O = VFNMS(LDK(KP923879532), T67, T60); + T5z = VADD(T5p, T5y); + T6U = VSUB(T5p, T5y); + T6P = VFNMS(LDK(KP923879532), T6e, T6b); + T6f = VFMA(LDK(KP923879532), T6e, T6b); + { + V T5V, T6u, T6g, T6v, T6s, T6J; + T6s = VSUB(T6q, T6r); + T6J = VADD(T6q, T6r); + T5V = VFNMS(LDK(KP098491403), T5U, T5N); + T6u = VFMA(LDK(KP098491403), T5N, T5U); + T75 = VFMA(LDK(KP980785280), T6U, T6T); + T6V = VFNMS(LDK(KP980785280), T6U, T6T); + T5A = VFMA(LDK(KP980785280), T5z, T5g); + T6A = VFNMS(LDK(KP980785280), T5z, T5g); + T6g = VFNMS(LDK(KP098491403), T6f, T68); + T6v = VFMA(LDK(KP098491403), T68, T6f); + T72 = VFNMS(LDK(KP980785280), T6J, T6I); + T6K = VFMA(LDK(KP980785280), T6J, T6I); + T6t = VFMA(LDK(KP980785280), T6s, T6p); + T6D = VFNMS(LDK(KP980785280), T6s, T6p); + T6w = VSUB(T6u, T6v); + T6B = VADD(T6u, T6v); + T6h = VADD(T5V, T6g); + T6E = VSUB(T5V, T6g); + } + } + } + } + } + } + } + { + V T6W, T6N, T6G, T6C, T6z, T6x, T6H, T6F, T6y, T6i, T6X, T6Q; + T6W = VFNMS(LDK(KP820678790), T6L, T6M); + T6N = VFMA(LDK(KP820678790), T6M, T6L); + T6G = VFMA(LDK(KP995184726), T6B, T6A); + T6C = VFNMS(LDK(KP995184726), T6B, T6A); + T6z = VFMA(LDK(KP995184726), T6w, T6t); + T6x = VFNMS(LDK(KP995184726), T6w, T6t); + T6H = VFNMS(LDK(KP995184726), T6E, T6D); + T6F = VFMA(LDK(KP995184726), T6E, T6D); + T6y = VFMA(LDK(KP995184726), T6h, T5A); + T6i = VFNMS(LDK(KP995184726), T6h, T5A); + T6X = VFNMS(LDK(KP820678790), T6O, T6P); + T6Q = VFMA(LDK(KP820678790), T6P, T6O); + { + V T73, T6Y, T76, T6R; + ST(&(x[WS(rs, 49)]), VFMAI(T6H, T6G), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFNMSI(T6H, T6G), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 47)]), VFNMSI(T6F, T6C), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 17)]), VFMAI(T6F, T6C), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(T6z, T6y), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 63)]), VFNMSI(T6z, T6y), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 33)]), VFMAI(T6x, T6i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 31)]), VFNMSI(T6x, T6i), ms, &(x[WS(rs, 1)])); + T73 = VADD(T6W, T6X); + T6Y = VSUB(T6W, T6X); + T76 = VSUB(T6N, T6Q); + T6R = VADD(T6N, T6Q); + { + V T78, T74, T71, T6Z, T79, T77, T70, T6S; + T78 = VFNMS(LDK(KP773010453), T73, T72); + T74 = VFMA(LDK(KP773010453), T73, T72); + T71 = VFMA(LDK(KP773010453), T6Y, T6V); + T6Z = VFNMS(LDK(KP773010453), T6Y, T6V); + T79 = VFMA(LDK(KP773010453), T76, T75); + T77 = VFNMS(LDK(KP773010453), T76, T75); + T70 = VFMA(LDK(KP773010453), T6R, T6K); + T6S = VFNMS(LDK(KP773010453), T6R, T6K); + ST(&(x[WS(rs, 55)]), VFNMSI(T79, T78), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFMAI(T79, T78), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 41)]), VFMAI(T77, T74), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 23)]), VFNMSI(T77, T74), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 57)]), VFMAI(T71, T70), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(T71, T70), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 25)]), VFMAI(T6Z, T6S), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 39)]), VFNMSI(T6Z, T6S), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + VTW(0, 32), + VTW(0, 33), + VTW(0, 34), + VTW(0, 35), + VTW(0, 36), + VTW(0, 37), + VTW(0, 38), + VTW(0, 39), + VTW(0, 40), + VTW(0, 41), + VTW(0, 42), + VTW(0, 43), + VTW(0, 44), + VTW(0, 45), + VTW(0, 46), + VTW(0, 47), + VTW(0, 48), + VTW(0, 49), + VTW(0, 50), + VTW(0, 51), + VTW(0, 52), + VTW(0, 53), + VTW(0, 54), + VTW(0, 55), + VTW(0, 56), + VTW(0, 57), + VTW(0, 58), + VTW(0, 59), + VTW(0, 60), + VTW(0, 61), + VTW(0, 62), + VTW(0, 63), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 64, XSIMD_STRING("t1bv_64"), twinstr, &GENUS, {261, 126, 258, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_64) (planner *p) { + X(kdft_dit_register) (p, t1bv_64, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name t1bv_64 -include t1b.h -sign 1 */ + +/* + * This function contains 519 FP additions, 250 FP multiplications, + * (or, 467 additions, 198 multiplications, 52 fused multiply/add), + * 107 stack variables, 15 constants, and 128 memory accesses + */ +#include "t1b.h" + +static void t1bv_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP290284677, +0.290284677254462367636192375817395274691476278); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP471396736, +0.471396736825997648556387625905254377657460319); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP634393284, +0.634393284163645498215171613225493370675687095); + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP098017140, +0.098017140329560601994195563888641845861136673); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 126)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 126), MAKE_VOLATILE_STRIDE(64, rs)) { + V Tg, T4B, T6v, T7G, T3r, T4w, T5q, T7F, T5Y, T62, T28, T4d, T2g, T4a, T7g; + V T7Y, T6f, T6j, T2Z, T4k, T37, T4h, T7n, T81, T7w, T7x, T7y, T5M, T6q, T1k; + V T4s, T1r, T4t, T7t, T7u, T7v, T5F, T6p, TV, T4p, T12, T4q, T7A, T7B, TD; + V T4x, T3k, T4C, T5x, T6s, T1R, T4b, T7j, T7Z, T2j, T4e, T5V, T63, T2I, T4i; + V T7q, T82, T3a, T4l, T6c, T6k; + { + V T1, T3, T3p, T3n, Tb, Td, Te, T6, T8, T9, T2, T3o, T3m; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 32)]), ms, &(x[0])); + T3 = BYTW(&(W[TWVL * 62]), T2); + T3o = LD(&(x[WS(rs, 48)]), ms, &(x[0])); + T3p = BYTW(&(W[TWVL * 94]), T3o); + T3m = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T3n = BYTW(&(W[TWVL * 30]), T3m); + { + V Ta, Tc, T5, T7; + Ta = LD(&(x[WS(rs, 56)]), ms, &(x[0])); + Tb = BYTW(&(W[TWVL * 110]), Ta); + Tc = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + Td = BYTW(&(W[TWVL * 46]), Tc); + Te = VSUB(Tb, Td); + T5 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T6 = BYTW(&(W[TWVL * 14]), T5); + T7 = LD(&(x[WS(rs, 40)]), ms, &(x[0])); + T8 = BYTW(&(W[TWVL * 78]), T7); + T9 = VSUB(T6, T8); + } + { + V T4, Tf, T6t, T6u; + T4 = VSUB(T1, T3); + Tf = VMUL(LDK(KP707106781), VADD(T9, Te)); + Tg = VSUB(T4, Tf); + T4B = VADD(T4, Tf); + T6t = VADD(T6, T8); + T6u = VADD(Tb, Td); + T6v = VSUB(T6t, T6u); + T7G = VADD(T6t, T6u); + } + { + V T3l, T3q, T5o, T5p; + T3l = VMUL(LDK(KP707106781), VSUB(T9, Te)); + T3q = VSUB(T3n, T3p); + T3r = VSUB(T3l, T3q); + T4w = VADD(T3q, T3l); + T5o = VADD(T1, T3); + T5p = VADD(T3n, T3p); + T5q = VSUB(T5o, T5p); + T7F = VADD(T5o, T5p); + } + } + { + V T24, T26, T61, T2b, T2d, T60, T1W, T5W, T21, T5X, T22, T27; + { + V T23, T25, T2a, T2c; + T23 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T24 = BYTW(&(W[TWVL * 32]), T23); + T25 = LD(&(x[WS(rs, 49)]), ms, &(x[WS(rs, 1)])); + T26 = BYTW(&(W[TWVL * 96]), T25); + T61 = VADD(T24, T26); + T2a = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2b = BYTW(&(W[0]), T2a); + T2c = LD(&(x[WS(rs, 33)]), ms, &(x[WS(rs, 1)])); + T2d = BYTW(&(W[TWVL * 64]), T2c); + T60 = VADD(T2b, T2d); + } + { + V T1T, T1V, T1S, T1U; + T1S = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T1T = BYTW(&(W[TWVL * 16]), T1S); + T1U = LD(&(x[WS(rs, 41)]), ms, &(x[WS(rs, 1)])); + T1V = BYTW(&(W[TWVL * 80]), T1U); + T1W = VSUB(T1T, T1V); + T5W = VADD(T1T, T1V); + } + { + V T1Y, T20, T1X, T1Z; + T1X = LD(&(x[WS(rs, 57)]), ms, &(x[WS(rs, 1)])); + T1Y = BYTW(&(W[TWVL * 112]), T1X); + T1Z = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + T20 = BYTW(&(W[TWVL * 48]), T1Z); + T21 = VSUB(T1Y, T20); + T5X = VADD(T1Y, T20); + } + T5Y = VSUB(T5W, T5X); + T62 = VSUB(T60, T61); + T22 = VMUL(LDK(KP707106781), VSUB(T1W, T21)); + T27 = VSUB(T24, T26); + T28 = VSUB(T22, T27); + T4d = VADD(T27, T22); + { + V T2e, T2f, T7e, T7f; + T2e = VSUB(T2b, T2d); + T2f = VMUL(LDK(KP707106781), VADD(T1W, T21)); + T2g = VSUB(T2e, T2f); + T4a = VADD(T2e, T2f); + T7e = VADD(T60, T61); + T7f = VADD(T5W, T5X); + T7g = VSUB(T7e, T7f); + T7Y = VADD(T7e, T7f); + } + } + { + V T2V, T2X, T6i, T32, T34, T6h, T2N, T6d, T2S, T6e, T2T, T2Y; + { + V T2U, T2W, T31, T33; + T2U = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T2V = BYTW(&(W[TWVL * 28]), T2U); + T2W = LD(&(x[WS(rs, 47)]), ms, &(x[WS(rs, 1)])); + T2X = BYTW(&(W[TWVL * 92]), T2W); + T6i = VADD(T2V, T2X); + T31 = LD(&(x[WS(rs, 63)]), ms, &(x[WS(rs, 1)])); + T32 = BYTW(&(W[TWVL * 124]), T31); + T33 = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T34 = BYTW(&(W[TWVL * 60]), T33); + T6h = VADD(T32, T34); + } + { + V T2K, T2M, T2J, T2L; + T2J = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T2K = BYTW(&(W[TWVL * 12]), T2J); + T2L = LD(&(x[WS(rs, 39)]), ms, &(x[WS(rs, 1)])); + T2M = BYTW(&(W[TWVL * 76]), T2L); + T2N = VSUB(T2K, T2M); + T6d = VADD(T2K, T2M); + } + { + V T2P, T2R, T2O, T2Q; + T2O = LD(&(x[WS(rs, 55)]), ms, &(x[WS(rs, 1)])); + T2P = BYTW(&(W[TWVL * 108]), T2O); + T2Q = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T2R = BYTW(&(W[TWVL * 44]), T2Q); + T2S = VSUB(T2P, T2R); + T6e = VADD(T2P, T2R); + } + T6f = VSUB(T6d, T6e); + T6j = VSUB(T6h, T6i); + T2T = VMUL(LDK(KP707106781), VSUB(T2N, T2S)); + T2Y = VSUB(T2V, T2X); + T2Z = VSUB(T2T, T2Y); + T4k = VADD(T2Y, T2T); + { + V T35, T36, T7l, T7m; + T35 = VSUB(T32, T34); + T36 = VMUL(LDK(KP707106781), VADD(T2N, T2S)); + T37 = VSUB(T35, T36); + T4h = VADD(T35, T36); + T7l = VADD(T6h, T6i); + T7m = VADD(T6d, T6e); + T7n = VSUB(T7l, T7m); + T81 = VADD(T7l, T7m); + } + } + { + V T1g, T1i, T5K, T1m, T1o, T5J, T18, T5G, T1d, T5H, T5I, T5L; + { + V T1f, T1h, T1l, T1n; + T1f = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T1g = BYTW(&(W[TWVL * 26]), T1f); + T1h = LD(&(x[WS(rs, 46)]), ms, &(x[0])); + T1i = BYTW(&(W[TWVL * 90]), T1h); + T5K = VADD(T1g, T1i); + T1l = LD(&(x[WS(rs, 62)]), ms, &(x[0])); + T1m = BYTW(&(W[TWVL * 122]), T1l); + T1n = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + T1o = BYTW(&(W[TWVL * 58]), T1n); + T5J = VADD(T1m, T1o); + } + { + V T15, T17, T14, T16; + T14 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T15 = BYTW(&(W[TWVL * 10]), T14); + T16 = LD(&(x[WS(rs, 38)]), ms, &(x[0])); + T17 = BYTW(&(W[TWVL * 74]), T16); + T18 = VSUB(T15, T17); + T5G = VADD(T15, T17); + } + { + V T1a, T1c, T19, T1b; + T19 = LD(&(x[WS(rs, 54)]), ms, &(x[0])); + T1a = BYTW(&(W[TWVL * 106]), T19); + T1b = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + T1c = BYTW(&(W[TWVL * 42]), T1b); + T1d = VSUB(T1a, T1c); + T5H = VADD(T1a, T1c); + } + T7w = VADD(T5J, T5K); + T7x = VADD(T5G, T5H); + T7y = VSUB(T7w, T7x); + T5I = VSUB(T5G, T5H); + T5L = VSUB(T5J, T5K); + T5M = VFNMS(LDK(KP382683432), T5L, VMUL(LDK(KP923879532), T5I)); + T6q = VFMA(LDK(KP923879532), T5L, VMUL(LDK(KP382683432), T5I)); + { + V T1e, T1j, T1p, T1q; + T1e = VMUL(LDK(KP707106781), VSUB(T18, T1d)); + T1j = VSUB(T1g, T1i); + T1k = VSUB(T1e, T1j); + T4s = VADD(T1j, T1e); + T1p = VSUB(T1m, T1o); + T1q = VMUL(LDK(KP707106781), VADD(T18, T1d)); + T1r = VSUB(T1p, T1q); + T4t = VADD(T1p, T1q); + } + } + { + V TR, TT, T5A, TX, TZ, T5z, TJ, T5C, TO, T5D, T5B, T5E; + { + V TQ, TS, TW, TY; + TQ = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + TR = BYTW(&(W[TWVL * 34]), TQ); + TS = LD(&(x[WS(rs, 50)]), ms, &(x[0])); + TT = BYTW(&(W[TWVL * 98]), TS); + T5A = VADD(TR, TT); + TW = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + TX = BYTW(&(W[TWVL * 2]), TW); + TY = LD(&(x[WS(rs, 34)]), ms, &(x[0])); + TZ = BYTW(&(W[TWVL * 66]), TY); + T5z = VADD(TX, TZ); + } + { + V TG, TI, TF, TH; + TF = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TG = BYTW(&(W[TWVL * 18]), TF); + TH = LD(&(x[WS(rs, 42)]), ms, &(x[0])); + TI = BYTW(&(W[TWVL * 82]), TH); + TJ = VSUB(TG, TI); + T5C = VADD(TG, TI); + } + { + V TL, TN, TK, TM; + TK = LD(&(x[WS(rs, 58)]), ms, &(x[0])); + TL = BYTW(&(W[TWVL * 114]), TK); + TM = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + TN = BYTW(&(W[TWVL * 50]), TM); + TO = VSUB(TL, TN); + T5D = VADD(TL, TN); + } + T7t = VADD(T5z, T5A); + T7u = VADD(T5C, T5D); + T7v = VSUB(T7t, T7u); + T5B = VSUB(T5z, T5A); + T5E = VSUB(T5C, T5D); + T5F = VFMA(LDK(KP382683432), T5B, VMUL(LDK(KP923879532), T5E)); + T6p = VFNMS(LDK(KP382683432), T5E, VMUL(LDK(KP923879532), T5B)); + { + V TP, TU, T10, T11; + TP = VMUL(LDK(KP707106781), VSUB(TJ, TO)); + TU = VSUB(TR, TT); + TV = VSUB(TP, TU); + T4p = VADD(TU, TP); + T10 = VSUB(TX, TZ); + T11 = VMUL(LDK(KP707106781), VADD(TJ, TO)); + T12 = VSUB(T10, T11); + T4q = VADD(T10, T11); + } + } + { + V Tl, T5r, TB, T5u, Tq, T5s, Tw, T5v, Tr, TC; + { + V Ti, Tk, Th, Tj; + Th = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Ti = BYTW(&(W[TWVL * 6]), Th); + Tj = LD(&(x[WS(rs, 36)]), ms, &(x[0])); + Tk = BYTW(&(W[TWVL * 70]), Tj); + Tl = VSUB(Ti, Tk); + T5r = VADD(Ti, Tk); + } + { + V Ty, TA, Tx, Tz; + Tx = LD(&(x[WS(rs, 60)]), ms, &(x[0])); + Ty = BYTW(&(W[TWVL * 118]), Tx); + Tz = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + TA = BYTW(&(W[TWVL * 54]), Tz); + TB = VSUB(Ty, TA); + T5u = VADD(Ty, TA); + } + { + V Tn, Tp, Tm, To; + Tm = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + Tn = BYTW(&(W[TWVL * 38]), Tm); + To = LD(&(x[WS(rs, 52)]), ms, &(x[0])); + Tp = BYTW(&(W[TWVL * 102]), To); + Tq = VSUB(Tn, Tp); + T5s = VADD(Tn, Tp); + } + { + V Tt, Tv, Ts, Tu; + Ts = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Tt = BYTW(&(W[TWVL * 22]), Ts); + Tu = LD(&(x[WS(rs, 44)]), ms, &(x[0])); + Tv = BYTW(&(W[TWVL * 86]), Tu); + Tw = VSUB(Tt, Tv); + T5v = VADD(Tt, Tv); + } + T7A = VADD(T5r, T5s); + T7B = VADD(T5u, T5v); + Tr = VFMA(LDK(KP382683432), Tl, VMUL(LDK(KP923879532), Tq)); + TC = VFNMS(LDK(KP382683432), TB, VMUL(LDK(KP923879532), Tw)); + TD = VSUB(Tr, TC); + T4x = VADD(Tr, TC); + { + V T3i, T3j, T5t, T5w; + T3i = VFNMS(LDK(KP382683432), Tq, VMUL(LDK(KP923879532), Tl)); + T3j = VFMA(LDK(KP923879532), TB, VMUL(LDK(KP382683432), Tw)); + T3k = VSUB(T3i, T3j); + T4C = VADD(T3i, T3j); + T5t = VSUB(T5r, T5s); + T5w = VSUB(T5u, T5v); + T5x = VMUL(LDK(KP707106781), VADD(T5t, T5w)); + T6s = VMUL(LDK(KP707106781), VSUB(T5t, T5w)); + } + } + { + V T1z, T5P, T1P, T5T, T1E, T5Q, T1K, T5S; + { + V T1w, T1y, T1v, T1x; + T1v = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T1w = BYTW(&(W[TWVL * 8]), T1v); + T1x = LD(&(x[WS(rs, 37)]), ms, &(x[WS(rs, 1)])); + T1y = BYTW(&(W[TWVL * 72]), T1x); + T1z = VSUB(T1w, T1y); + T5P = VADD(T1w, T1y); + } + { + V T1M, T1O, T1L, T1N; + T1L = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T1M = BYTW(&(W[TWVL * 24]), T1L); + T1N = LD(&(x[WS(rs, 45)]), ms, &(x[WS(rs, 1)])); + T1O = BYTW(&(W[TWVL * 88]), T1N); + T1P = VSUB(T1M, T1O); + T5T = VADD(T1M, T1O); + } + { + V T1B, T1D, T1A, T1C; + T1A = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + T1B = BYTW(&(W[TWVL * 40]), T1A); + T1C = LD(&(x[WS(rs, 53)]), ms, &(x[WS(rs, 1)])); + T1D = BYTW(&(W[TWVL * 104]), T1C); + T1E = VSUB(T1B, T1D); + T5Q = VADD(T1B, T1D); + } + { + V T1H, T1J, T1G, T1I; + T1G = LD(&(x[WS(rs, 61)]), ms, &(x[WS(rs, 1)])); + T1H = BYTW(&(W[TWVL * 120]), T1G); + T1I = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + T1J = BYTW(&(W[TWVL * 56]), T1I); + T1K = VSUB(T1H, T1J); + T5S = VADD(T1H, T1J); + } + { + V T1F, T1Q, T7h, T7i; + T1F = VFNMS(LDK(KP382683432), T1E, VMUL(LDK(KP923879532), T1z)); + T1Q = VFMA(LDK(KP923879532), T1K, VMUL(LDK(KP382683432), T1P)); + T1R = VSUB(T1F, T1Q); + T4b = VADD(T1F, T1Q); + T7h = VADD(T5P, T5Q); + T7i = VADD(T5S, T5T); + T7j = VSUB(T7h, T7i); + T7Z = VADD(T7h, T7i); + } + { + V T2h, T2i, T5R, T5U; + T2h = VFMA(LDK(KP382683432), T1z, VMUL(LDK(KP923879532), T1E)); + T2i = VFNMS(LDK(KP382683432), T1K, VMUL(LDK(KP923879532), T1P)); + T2j = VSUB(T2h, T2i); + T4e = VADD(T2h, T2i); + T5R = VSUB(T5P, T5Q); + T5U = VSUB(T5S, T5T); + T5V = VMUL(LDK(KP707106781), VSUB(T5R, T5U)); + T63 = VMUL(LDK(KP707106781), VADD(T5R, T5U)); + } + } + { + V T2q, T66, T2G, T6a, T2v, T67, T2B, T69; + { + V T2n, T2p, T2m, T2o; + T2m = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T2n = BYTW(&(W[TWVL * 4]), T2m); + T2o = LD(&(x[WS(rs, 35)]), ms, &(x[WS(rs, 1)])); + T2p = BYTW(&(W[TWVL * 68]), T2o); + T2q = VSUB(T2n, T2p); + T66 = VADD(T2n, T2p); + } + { + V T2D, T2F, T2C, T2E; + T2C = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T2D = BYTW(&(W[TWVL * 20]), T2C); + T2E = LD(&(x[WS(rs, 43)]), ms, &(x[WS(rs, 1)])); + T2F = BYTW(&(W[TWVL * 84]), T2E); + T2G = VSUB(T2D, T2F); + T6a = VADD(T2D, T2F); + } + { + V T2s, T2u, T2r, T2t; + T2r = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T2s = BYTW(&(W[TWVL * 36]), T2r); + T2t = LD(&(x[WS(rs, 51)]), ms, &(x[WS(rs, 1)])); + T2u = BYTW(&(W[TWVL * 100]), T2t); + T2v = VSUB(T2s, T2u); + T67 = VADD(T2s, T2u); + } + { + V T2y, T2A, T2x, T2z; + T2x = LD(&(x[WS(rs, 59)]), ms, &(x[WS(rs, 1)])); + T2y = BYTW(&(W[TWVL * 116]), T2x); + T2z = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + T2A = BYTW(&(W[TWVL * 52]), T2z); + T2B = VSUB(T2y, T2A); + T69 = VADD(T2y, T2A); + } + { + V T2w, T2H, T7o, T7p; + T2w = VFNMS(LDK(KP382683432), T2v, VMUL(LDK(KP923879532), T2q)); + T2H = VFMA(LDK(KP923879532), T2B, VMUL(LDK(KP382683432), T2G)); + T2I = VSUB(T2w, T2H); + T4i = VADD(T2w, T2H); + T7o = VADD(T66, T67); + T7p = VADD(T69, T6a); + T7q = VSUB(T7o, T7p); + T82 = VADD(T7o, T7p); + } + { + V T38, T39, T68, T6b; + T38 = VFMA(LDK(KP382683432), T2q, VMUL(LDK(KP923879532), T2v)); + T39 = VFNMS(LDK(KP382683432), T2B, VMUL(LDK(KP923879532), T2G)); + T3a = VSUB(T38, T39); + T4l = VADD(T38, T39); + T68 = VSUB(T66, T67); + T6b = VSUB(T69, T6a); + T6c = VMUL(LDK(KP707106781), VSUB(T68, T6b)); + T6k = VMUL(LDK(KP707106781), VADD(T68, T6b)); + } + } + { + V T7s, T7R, T7M, T7U, T7D, T7T, T7J, T7Q; + { + V T7k, T7r, T7K, T7L; + T7k = VFNMS(LDK(KP382683432), T7j, VMUL(LDK(KP923879532), T7g)); + T7r = VFMA(LDK(KP923879532), T7n, VMUL(LDK(KP382683432), T7q)); + T7s = VSUB(T7k, T7r); + T7R = VADD(T7k, T7r); + T7K = VFMA(LDK(KP382683432), T7g, VMUL(LDK(KP923879532), T7j)); + T7L = VFNMS(LDK(KP382683432), T7n, VMUL(LDK(KP923879532), T7q)); + T7M = VSUB(T7K, T7L); + T7U = VADD(T7K, T7L); + } + { + V T7z, T7C, T7H, T7I; + T7z = VMUL(LDK(KP707106781), VSUB(T7v, T7y)); + T7C = VSUB(T7A, T7B); + T7D = VSUB(T7z, T7C); + T7T = VADD(T7C, T7z); + T7H = VSUB(T7F, T7G); + T7I = VMUL(LDK(KP707106781), VADD(T7v, T7y)); + T7J = VSUB(T7H, T7I); + T7Q = VADD(T7H, T7I); + } + { + V T7E, T7N, T7W, T7X; + T7E = VBYI(VSUB(T7s, T7D)); + T7N = VSUB(T7J, T7M); + ST(&(x[WS(rs, 20)]), VADD(T7E, T7N), ms, &(x[0])); + ST(&(x[WS(rs, 44)]), VSUB(T7N, T7E), ms, &(x[0])); + T7W = VSUB(T7Q, T7R); + T7X = VBYI(VSUB(T7U, T7T)); + ST(&(x[WS(rs, 36)]), VSUB(T7W, T7X), ms, &(x[0])); + ST(&(x[WS(rs, 28)]), VADD(T7W, T7X), ms, &(x[0])); + } + { + V T7O, T7P, T7S, T7V; + T7O = VBYI(VADD(T7D, T7s)); + T7P = VADD(T7J, T7M); + ST(&(x[WS(rs, 12)]), VADD(T7O, T7P), ms, &(x[0])); + ST(&(x[WS(rs, 52)]), VSUB(T7P, T7O), ms, &(x[0])); + T7S = VADD(T7Q, T7R); + T7V = VBYI(VADD(T7T, T7U)); + ST(&(x[WS(rs, 60)]), VSUB(T7S, T7V), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(T7S, T7V), ms, &(x[0])); + } + } + { + V T84, T8c, T8l, T8n, T87, T8h, T8b, T8g, T8i, T8m; + { + V T80, T83, T8j, T8k; + T80 = VSUB(T7Y, T7Z); + T83 = VSUB(T81, T82); + T84 = VMUL(LDK(KP707106781), VSUB(T80, T83)); + T8c = VMUL(LDK(KP707106781), VADD(T80, T83)); + T8j = VADD(T7Y, T7Z); + T8k = VADD(T81, T82); + T8l = VBYI(VSUB(T8j, T8k)); + T8n = VADD(T8j, T8k); + } + { + V T85, T86, T89, T8a; + T85 = VADD(T7t, T7u); + T86 = VADD(T7w, T7x); + T87 = VSUB(T85, T86); + T8h = VADD(T85, T86); + T89 = VADD(T7F, T7G); + T8a = VADD(T7A, T7B); + T8b = VSUB(T89, T8a); + T8g = VADD(T89, T8a); + } + T8i = VSUB(T8g, T8h); + ST(&(x[WS(rs, 48)]), VSUB(T8i, T8l), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VADD(T8i, T8l), ms, &(x[0])); + T8m = VADD(T8g, T8h); + ST(&(x[WS(rs, 32)]), VSUB(T8m, T8n), ms, &(x[0])); + ST(&(x[0]), VADD(T8m, T8n), ms, &(x[0])); + { + V T88, T8d, T8e, T8f; + T88 = VBYI(VSUB(T84, T87)); + T8d = VSUB(T8b, T8c); + ST(&(x[WS(rs, 24)]), VADD(T88, T8d), ms, &(x[0])); + ST(&(x[WS(rs, 40)]), VSUB(T8d, T88), ms, &(x[0])); + T8e = VBYI(VADD(T87, T84)); + T8f = VADD(T8b, T8c); + ST(&(x[WS(rs, 8)]), VADD(T8e, T8f), ms, &(x[0])); + ST(&(x[WS(rs, 56)]), VSUB(T8f, T8e), ms, &(x[0])); + } + } + { + V T5O, T6H, T6x, T6F, T6n, T6I, T6A, T6E; + { + V T5y, T5N, T6r, T6w; + T5y = VSUB(T5q, T5x); + T5N = VSUB(T5F, T5M); + T5O = VSUB(T5y, T5N); + T6H = VADD(T5y, T5N); + T6r = VSUB(T6p, T6q); + T6w = VSUB(T6s, T6v); + T6x = VSUB(T6r, T6w); + T6F = VADD(T6w, T6r); + { + V T65, T6y, T6m, T6z; + { + V T5Z, T64, T6g, T6l; + T5Z = VSUB(T5V, T5Y); + T64 = VSUB(T62, T63); + T65 = VFMA(LDK(KP831469612), T5Z, VMUL(LDK(KP555570233), T64)); + T6y = VFNMS(LDK(KP555570233), T5Z, VMUL(LDK(KP831469612), T64)); + T6g = VSUB(T6c, T6f); + T6l = VSUB(T6j, T6k); + T6m = VFNMS(LDK(KP555570233), T6l, VMUL(LDK(KP831469612), T6g)); + T6z = VFMA(LDK(KP555570233), T6g, VMUL(LDK(KP831469612), T6l)); + } + T6n = VSUB(T65, T6m); + T6I = VADD(T6y, T6z); + T6A = VSUB(T6y, T6z); + T6E = VADD(T65, T6m); + } + } + { + V T6o, T6B, T6K, T6L; + T6o = VADD(T5O, T6n); + T6B = VBYI(VADD(T6x, T6A)); + ST(&(x[WS(rs, 54)]), VSUB(T6o, T6B), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VADD(T6o, T6B), ms, &(x[0])); + T6K = VBYI(VADD(T6F, T6E)); + T6L = VADD(T6H, T6I); + ST(&(x[WS(rs, 6)]), VADD(T6K, T6L), ms, &(x[0])); + ST(&(x[WS(rs, 58)]), VSUB(T6L, T6K), ms, &(x[0])); + } + { + V T6C, T6D, T6G, T6J; + T6C = VSUB(T5O, T6n); + T6D = VBYI(VSUB(T6A, T6x)); + ST(&(x[WS(rs, 42)]), VSUB(T6C, T6D), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VADD(T6C, T6D), ms, &(x[0])); + T6G = VBYI(VSUB(T6E, T6F)); + T6J = VSUB(T6H, T6I); + ST(&(x[WS(rs, 26)]), VADD(T6G, T6J), ms, &(x[0])); + ST(&(x[WS(rs, 38)]), VSUB(T6J, T6G), ms, &(x[0])); + } + } + { + V T6O, T79, T6Z, T77, T6V, T7a, T72, T76; + { + V T6M, T6N, T6X, T6Y; + T6M = VADD(T5q, T5x); + T6N = VADD(T6p, T6q); + T6O = VSUB(T6M, T6N); + T79 = VADD(T6M, T6N); + T6X = VADD(T5F, T5M); + T6Y = VADD(T6v, T6s); + T6Z = VSUB(T6X, T6Y); + T77 = VADD(T6Y, T6X); + { + V T6R, T70, T6U, T71; + { + V T6P, T6Q, T6S, T6T; + T6P = VADD(T5Y, T5V); + T6Q = VADD(T62, T63); + T6R = VFMA(LDK(KP980785280), T6P, VMUL(LDK(KP195090322), T6Q)); + T70 = VFNMS(LDK(KP195090322), T6P, VMUL(LDK(KP980785280), T6Q)); + T6S = VADD(T6f, T6c); + T6T = VADD(T6j, T6k); + T6U = VFNMS(LDK(KP195090322), T6T, VMUL(LDK(KP980785280), T6S)); + T71 = VFMA(LDK(KP195090322), T6S, VMUL(LDK(KP980785280), T6T)); + } + T6V = VSUB(T6R, T6U); + T7a = VADD(T70, T71); + T72 = VSUB(T70, T71); + T76 = VADD(T6R, T6U); + } + } + { + V T6W, T73, T7c, T7d; + T6W = VADD(T6O, T6V); + T73 = VBYI(VADD(T6Z, T72)); + ST(&(x[WS(rs, 50)]), VSUB(T6W, T73), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VADD(T6W, T73), ms, &(x[0])); + T7c = VBYI(VADD(T77, T76)); + T7d = VADD(T79, T7a); + ST(&(x[WS(rs, 2)]), VADD(T7c, T7d), ms, &(x[0])); + ST(&(x[WS(rs, 62)]), VSUB(T7d, T7c), ms, &(x[0])); + } + { + V T74, T75, T78, T7b; + T74 = VSUB(T6O, T6V); + T75 = VBYI(VSUB(T72, T6Z)); + ST(&(x[WS(rs, 46)]), VSUB(T74, T75), ms, &(x[0])); + ST(&(x[WS(rs, 18)]), VADD(T74, T75), ms, &(x[0])); + T78 = VBYI(VSUB(T76, T77)); + T7b = VSUB(T79, T7a); + ST(&(x[WS(rs, 30)]), VADD(T78, T7b), ms, &(x[0])); + ST(&(x[WS(rs, 34)]), VSUB(T7b, T78), ms, &(x[0])); + } + } + { + V T4z, T5g, T4R, T59, T4H, T5j, T4O, T55, T4o, T4S, T4K, T4P, T52, T5k, T5c; + V T5h; + { + V T4y, T57, T4v, T58, T4r, T4u; + T4y = VADD(T4w, T4x); + T57 = VSUB(T4B, T4C); + T4r = VFMA(LDK(KP980785280), T4p, VMUL(LDK(KP195090322), T4q)); + T4u = VFNMS(LDK(KP195090322), T4t, VMUL(LDK(KP980785280), T4s)); + T4v = VADD(T4r, T4u); + T58 = VSUB(T4r, T4u); + T4z = VSUB(T4v, T4y); + T5g = VADD(T57, T58); + T4R = VADD(T4y, T4v); + T59 = VSUB(T57, T58); + } + { + V T4D, T54, T4G, T53, T4E, T4F; + T4D = VADD(T4B, T4C); + T54 = VSUB(T4x, T4w); + T4E = VFNMS(LDK(KP195090322), T4p, VMUL(LDK(KP980785280), T4q)); + T4F = VFMA(LDK(KP195090322), T4s, VMUL(LDK(KP980785280), T4t)); + T4G = VADD(T4E, T4F); + T53 = VSUB(T4E, T4F); + T4H = VSUB(T4D, T4G); + T5j = VADD(T54, T53); + T4O = VADD(T4D, T4G); + T55 = VSUB(T53, T54); + } + { + V T4g, T4I, T4n, T4J; + { + V T4c, T4f, T4j, T4m; + T4c = VADD(T4a, T4b); + T4f = VADD(T4d, T4e); + T4g = VFNMS(LDK(KP098017140), T4f, VMUL(LDK(KP995184726), T4c)); + T4I = VFMA(LDK(KP098017140), T4c, VMUL(LDK(KP995184726), T4f)); + T4j = VADD(T4h, T4i); + T4m = VADD(T4k, T4l); + T4n = VFMA(LDK(KP995184726), T4j, VMUL(LDK(KP098017140), T4m)); + T4J = VFNMS(LDK(KP098017140), T4j, VMUL(LDK(KP995184726), T4m)); + } + T4o = VSUB(T4g, T4n); + T4S = VADD(T4I, T4J); + T4K = VSUB(T4I, T4J); + T4P = VADD(T4g, T4n); + } + { + V T4Y, T5a, T51, T5b; + { + V T4W, T4X, T4Z, T50; + T4W = VSUB(T4a, T4b); + T4X = VSUB(T4e, T4d); + T4Y = VFNMS(LDK(KP634393284), T4X, VMUL(LDK(KP773010453), T4W)); + T5a = VFMA(LDK(KP634393284), T4W, VMUL(LDK(KP773010453), T4X)); + T4Z = VSUB(T4h, T4i); + T50 = VSUB(T4l, T4k); + T51 = VFMA(LDK(KP773010453), T4Z, VMUL(LDK(KP634393284), T50)); + T5b = VFNMS(LDK(KP634393284), T4Z, VMUL(LDK(KP773010453), T50)); + } + T52 = VSUB(T4Y, T51); + T5k = VADD(T5a, T5b); + T5c = VSUB(T5a, T5b); + T5h = VADD(T4Y, T51); + } + { + V T4A, T4L, T5i, T5l; + T4A = VBYI(VSUB(T4o, T4z)); + T4L = VSUB(T4H, T4K); + ST(&(x[WS(rs, 17)]), VADD(T4A, T4L), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 47)]), VSUB(T4L, T4A), ms, &(x[WS(rs, 1)])); + T5i = VADD(T5g, T5h); + T5l = VBYI(VADD(T5j, T5k)); + ST(&(x[WS(rs, 57)]), VSUB(T5i, T5l), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(T5i, T5l), ms, &(x[WS(rs, 1)])); + } + { + V T5m, T5n, T4M, T4N; + T5m = VSUB(T5g, T5h); + T5n = VBYI(VSUB(T5k, T5j)); + ST(&(x[WS(rs, 39)]), VSUB(T5m, T5n), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 25)]), VADD(T5m, T5n), ms, &(x[WS(rs, 1)])); + T4M = VBYI(VADD(T4z, T4o)); + T4N = VADD(T4H, T4K); + ST(&(x[WS(rs, 15)]), VADD(T4M, T4N), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 49)]), VSUB(T4N, T4M), ms, &(x[WS(rs, 1)])); + } + { + V T4Q, T4T, T56, T5d; + T4Q = VADD(T4O, T4P); + T4T = VBYI(VADD(T4R, T4S)); + ST(&(x[WS(rs, 63)]), VSUB(T4Q, T4T), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T4Q, T4T), ms, &(x[WS(rs, 1)])); + T56 = VBYI(VSUB(T52, T55)); + T5d = VSUB(T59, T5c); + ST(&(x[WS(rs, 23)]), VADD(T56, T5d), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 41)]), VSUB(T5d, T56), ms, &(x[WS(rs, 1)])); + } + { + V T5e, T5f, T4U, T4V; + T5e = VBYI(VADD(T55, T52)); + T5f = VADD(T59, T5c); + ST(&(x[WS(rs, 9)]), VADD(T5e, T5f), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 55)]), VSUB(T5f, T5e), ms, &(x[WS(rs, 1)])); + T4U = VSUB(T4O, T4P); + T4V = VBYI(VSUB(T4S, T4R)); + ST(&(x[WS(rs, 33)]), VSUB(T4U, T4V), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 31)]), VADD(T4U, T4V), ms, &(x[WS(rs, 1)])); + } + } + { + V T1u, T43, T3D, T3V, T3t, T45, T3B, T3K, T3d, T3E, T3w, T3A, T3R, T46, T3Y; + V T42; + { + V TE, T3U, T1t, T3T, T13, T1s; + TE = VSUB(Tg, TD); + T3U = VADD(T3r, T3k); + T13 = VFMA(LDK(KP831469612), TV, VMUL(LDK(KP555570233), T12)); + T1s = VFNMS(LDK(KP555570233), T1r, VMUL(LDK(KP831469612), T1k)); + T1t = VSUB(T13, T1s); + T3T = VADD(T13, T1s); + T1u = VSUB(TE, T1t); + T43 = VADD(T3U, T3T); + T3D = VADD(TE, T1t); + T3V = VSUB(T3T, T3U); + } + { + V T3s, T3I, T3h, T3J, T3f, T3g; + T3s = VSUB(T3k, T3r); + T3I = VADD(Tg, TD); + T3f = VFNMS(LDK(KP555570233), TV, VMUL(LDK(KP831469612), T12)); + T3g = VFMA(LDK(KP555570233), T1k, VMUL(LDK(KP831469612), T1r)); + T3h = VSUB(T3f, T3g); + T3J = VADD(T3f, T3g); + T3t = VSUB(T3h, T3s); + T45 = VADD(T3I, T3J); + T3B = VADD(T3s, T3h); + T3K = VSUB(T3I, T3J); + } + { + V T2l, T3u, T3c, T3v; + { + V T29, T2k, T30, T3b; + T29 = VSUB(T1R, T28); + T2k = VSUB(T2g, T2j); + T2l = VFMA(LDK(KP881921264), T29, VMUL(LDK(KP471396736), T2k)); + T3u = VFNMS(LDK(KP471396736), T29, VMUL(LDK(KP881921264), T2k)); + T30 = VSUB(T2I, T2Z); + T3b = VSUB(T37, T3a); + T3c = VFNMS(LDK(KP471396736), T3b, VMUL(LDK(KP881921264), T30)); + T3v = VFMA(LDK(KP471396736), T30, VMUL(LDK(KP881921264), T3b)); + } + T3d = VSUB(T2l, T3c); + T3E = VADD(T3u, T3v); + T3w = VSUB(T3u, T3v); + T3A = VADD(T2l, T3c); + } + { + V T3N, T3W, T3Q, T3X; + { + V T3L, T3M, T3O, T3P; + T3L = VADD(T28, T1R); + T3M = VADD(T2g, T2j); + T3N = VFMA(LDK(KP956940335), T3L, VMUL(LDK(KP290284677), T3M)); + T3W = VFNMS(LDK(KP290284677), T3L, VMUL(LDK(KP956940335), T3M)); + T3O = VADD(T2Z, T2I); + T3P = VADD(T37, T3a); + T3Q = VFNMS(LDK(KP290284677), T3P, VMUL(LDK(KP956940335), T3O)); + T3X = VFMA(LDK(KP290284677), T3O, VMUL(LDK(KP956940335), T3P)); + } + T3R = VSUB(T3N, T3Q); + T46 = VADD(T3W, T3X); + T3Y = VSUB(T3W, T3X); + T42 = VADD(T3N, T3Q); + } + { + V T3e, T3x, T44, T47; + T3e = VADD(T1u, T3d); + T3x = VBYI(VADD(T3t, T3w)); + ST(&(x[WS(rs, 53)]), VSUB(T3e, T3x), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VADD(T3e, T3x), ms, &(x[WS(rs, 1)])); + T44 = VBYI(VSUB(T42, T43)); + T47 = VSUB(T45, T46); + ST(&(x[WS(rs, 29)]), VADD(T44, T47), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 35)]), VSUB(T47, T44), ms, &(x[WS(rs, 1)])); + } + { + V T48, T49, T3y, T3z; + T48 = VBYI(VADD(T43, T42)); + T49 = VADD(T45, T46); + ST(&(x[WS(rs, 3)]), VADD(T48, T49), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 61)]), VSUB(T49, T48), ms, &(x[WS(rs, 1)])); + T3y = VSUB(T1u, T3d); + T3z = VBYI(VSUB(T3w, T3t)); + ST(&(x[WS(rs, 43)]), VSUB(T3y, T3z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 21)]), VADD(T3y, T3z), ms, &(x[WS(rs, 1)])); + } + { + V T3C, T3F, T3S, T3Z; + T3C = VBYI(VSUB(T3A, T3B)); + T3F = VSUB(T3D, T3E); + ST(&(x[WS(rs, 27)]), VADD(T3C, T3F), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 37)]), VSUB(T3F, T3C), ms, &(x[WS(rs, 1)])); + T3S = VADD(T3K, T3R); + T3Z = VBYI(VADD(T3V, T3Y)); + ST(&(x[WS(rs, 51)]), VSUB(T3S, T3Z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VADD(T3S, T3Z), ms, &(x[WS(rs, 1)])); + } + { + V T40, T41, T3G, T3H; + T40 = VSUB(T3K, T3R); + T41 = VBYI(VSUB(T3Y, T3V)); + ST(&(x[WS(rs, 45)]), VSUB(T40, T41), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VADD(T40, T41), ms, &(x[WS(rs, 1)])); + T3G = VBYI(VADD(T3B, T3A)); + T3H = VADD(T3D, T3E); + ST(&(x[WS(rs, 5)]), VADD(T3G, T3H), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 59)]), VSUB(T3H, T3G), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + VTW(0, 32), + VTW(0, 33), + VTW(0, 34), + VTW(0, 35), + VTW(0, 36), + VTW(0, 37), + VTW(0, 38), + VTW(0, 39), + VTW(0, 40), + VTW(0, 41), + VTW(0, 42), + VTW(0, 43), + VTW(0, 44), + VTW(0, 45), + VTW(0, 46), + VTW(0, 47), + VTW(0, 48), + VTW(0, 49), + VTW(0, 50), + VTW(0, 51), + VTW(0, 52), + VTW(0, 53), + VTW(0, 54), + VTW(0, 55), + VTW(0, 56), + VTW(0, 57), + VTW(0, 58), + VTW(0, 59), + VTW(0, 60), + VTW(0, 61), + VTW(0, 62), + VTW(0, 63), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 64, XSIMD_STRING("t1bv_64"), twinstr, &GENUS, {467, 198, 52, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_64) (planner *p) { + X(kdft_dit_register) (p, t1bv_64, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:33 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 7 -name t1bv_7 -include t1b.h -sign 1 */ + +/* + * This function contains 36 FP additions, 36 FP multiplications, + * (or, 15 additions, 15 multiplications, 21 fused multiply/add), + * 42 stack variables, 6 constants, and 14 memory accesses + */ +#include "t1b.h" + +static void t1bv_7(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP801937735, +0.801937735804838252472204639014890102331838324); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + DVK(KP692021471, +0.692021471630095869627814897002069140197260599); + DVK(KP554958132, +0.554958132087371191422194871006410481067288862); + DVK(KP356895867, +0.356895867892209443894399510021300583399127187); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 12)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 12), MAKE_VOLATILE_STRIDE(7, rs)) { + V T1, T2, T4, Te, Tc, T9, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Te = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T7 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V T3, T5, Tf, Td, Ta, T8; + T3 = BYTW(&(W[0]), T2); + T5 = BYTW(&(W[TWVL * 10]), T4); + Tf = BYTW(&(W[TWVL * 6]), Te); + Td = BYTW(&(W[TWVL * 4]), Tc); + Ta = BYTW(&(W[TWVL * 8]), T9); + T8 = BYTW(&(W[TWVL * 2]), T7); + { + V T6, Tm, Tg, Tk, Tb, Tl; + T6 = VADD(T3, T5); + Tm = VSUB(T3, T5); + Tg = VADD(Td, Tf); + Tk = VSUB(Td, Tf); + Tb = VADD(T8, Ta); + Tl = VSUB(T8, Ta); + { + V Tp, Tx, Tu, Th, Ts, Tn, Tq, Ty; + Tp = VFNMS(LDK(KP356895867), T6, Tg); + Tx = VFMA(LDK(KP554958132), Tk, Tm); + ST(&(x[0]), VADD(T1, VADD(T6, VADD(Tb, Tg))), ms, &(x[0])); + Tu = VFNMS(LDK(KP356895867), Tb, T6); + Th = VFNMS(LDK(KP356895867), Tg, Tb); + Ts = VFMA(LDK(KP554958132), Tl, Tk); + Tn = VFNMS(LDK(KP554958132), Tm, Tl); + Tq = VFNMS(LDK(KP692021471), Tp, Tb); + Ty = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), Tx, Tl)); + { + V Tv, Ti, Tt, To, Tr, Tw, Tj; + Tv = VFNMS(LDK(KP692021471), Tu, Tg); + Ti = VFNMS(LDK(KP692021471), Th, T6); + Tt = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Ts, Tm)); + To = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tn, Tk)); + Tr = VFNMS(LDK(KP900968867), Tq, T1); + Tw = VFNMS(LDK(KP900968867), Tv, T1); + Tj = VFNMS(LDK(KP900968867), Ti, T1); + ST(&(x[WS(rs, 5)]), VFNMSI(Tt, Tr), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VFMAI(Tt, Tr), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(Ty, Tw), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(Ty, Tw), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFNMSI(To, Tj), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(To, Tj), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 7, XSIMD_STRING("t1bv_7"), twinstr, &GENUS, {15, 15, 21, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_7) (planner *p) { + X(kdft_dit_register) (p, t1bv_7, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 7 -name t1bv_7 -include t1b.h -sign 1 */ + +/* + * This function contains 36 FP additions, 30 FP multiplications, + * (or, 24 additions, 18 multiplications, 12 fused multiply/add), + * 21 stack variables, 6 constants, and 14 memory accesses + */ +#include "t1b.h" + +static void t1bv_7(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP222520933, +0.222520933956314404288902564496794759466355569); + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP623489801, +0.623489801858733530525004884004239810632274731); + DVK(KP433883739, +0.433883739117558120475768332848358754609990728); + DVK(KP781831482, +0.781831482468029808708444526674057750232334519); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 12)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 12), MAKE_VOLATILE_STRIDE(7, rs)) { + V Th, Tf, Ti, T5, Tk, Ta, Tj, To, Tp; + Th = LD(&(x[0]), ms, &(x[0])); + { + V Tc, Te, Tb, Td; + Tb = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tc = BYTW(&(W[TWVL * 2]), Tb); + Td = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Te = BYTW(&(W[TWVL * 8]), Td); + Tf = VSUB(Tc, Te); + Ti = VADD(Tc, Te); + } + { + V T2, T4, T1, T3; + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = BYTW(&(W[0]), T1); + T3 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T4 = BYTW(&(W[TWVL * 10]), T3); + T5 = VSUB(T2, T4); + Tk = VADD(T2, T4); + } + { + V T7, T9, T6, T8; + T6 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T7 = BYTW(&(W[TWVL * 4]), T6); + T8 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T9 = BYTW(&(W[TWVL * 6]), T8); + Ta = VSUB(T7, T9); + Tj = VADD(T7, T9); + } + ST(&(x[0]), VADD(Th, VADD(Tk, VADD(Ti, Tj))), ms, &(x[0])); + To = VBYI(VFNMS(LDK(KP781831482), Ta, VFNMS(LDK(KP433883739), Tf, VMUL(LDK(KP974927912), T5)))); + Tp = VFMA(LDK(KP623489801), Tj, VFNMS(LDK(KP900968867), Ti, VFNMS(LDK(KP222520933), Tk, Th))); + ST(&(x[WS(rs, 2)]), VADD(To, Tp), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VSUB(Tp, To), ms, &(x[WS(rs, 1)])); + { + V Tg, Tl, Tm, Tn; + Tg = VBYI(VFMA(LDK(KP433883739), T5, VFNMS(LDK(KP781831482), Tf, VMUL(LDK(KP974927912), Ta)))); + Tl = VFMA(LDK(KP623489801), Ti, VFNMS(LDK(KP222520933), Tj, VFNMS(LDK(KP900968867), Tk, Th))); + ST(&(x[WS(rs, 3)]), VADD(Tg, Tl), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VSUB(Tl, Tg), ms, &(x[0])); + Tm = VBYI(VFMA(LDK(KP781831482), T5, VFMA(LDK(KP974927912), Tf, VMUL(LDK(KP433883739), Ta)))); + Tn = VFMA(LDK(KP623489801), Tk, VFNMS(LDK(KP900968867), Tj, VFNMS(LDK(KP222520933), Ti, Th))); + ST(&(x[WS(rs, 1)]), VADD(Tm, Tn), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VSUB(Tn, Tm), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 7, XSIMD_STRING("t1bv_7"), twinstr, &GENUS, {24, 18, 12, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_7) (planner *p) { + X(kdft_dit_register) (p, t1bv_7, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:33 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t1bv_8 -include t1b.h -sign 1 */ + +/* + * This function contains 33 FP additions, 24 FP multiplications, + * (or, 23 additions, 14 multiplications, 10 fused multiply/add), + * 36 stack variables, 1 constants, and 16 memory accesses + */ +#include "t1b.h" + +static void t1bv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 14)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(8, rs)) { + V T1, T2, Th, Tj, T5, T7, Ta, Tc; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Th = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tj = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Ta = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + { + V T3, Ti, Tk, T6, T8, Tb, Td; + T3 = BYTW(&(W[TWVL * 6]), T2); + Ti = BYTW(&(W[TWVL * 2]), Th); + Tk = BYTW(&(W[TWVL * 10]), Tj); + T6 = BYTW(&(W[0]), T5); + T8 = BYTW(&(W[TWVL * 8]), T7); + Tb = BYTW(&(W[TWVL * 12]), Ta); + Td = BYTW(&(W[TWVL * 4]), Tc); + { + V Tq, T4, Tr, Tl, Tt, T9, Tu, Te, Tw, Ts; + Tq = VADD(T1, T3); + T4 = VSUB(T1, T3); + Tr = VADD(Ti, Tk); + Tl = VSUB(Ti, Tk); + Tt = VADD(T6, T8); + T9 = VSUB(T6, T8); + Tu = VADD(Tb, Td); + Te = VSUB(Tb, Td); + Tw = VADD(Tq, Tr); + Ts = VSUB(Tq, Tr); + { + V Tx, Tv, Tm, Tf; + Tx = VADD(Tt, Tu); + Tv = VSUB(Tt, Tu); + Tm = VSUB(T9, Te); + Tf = VADD(T9, Te); + { + V Tp, Tn, To, Tg; + ST(&(x[0]), VADD(Tw, Tx), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VSUB(Tw, Tx), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(Tv, Ts), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(Tv, Ts), ms, &(x[0])); + Tp = VFMA(LDK(KP707106781), Tm, Tl); + Tn = VFNMS(LDK(KP707106781), Tm, Tl); + To = VFMA(LDK(KP707106781), Tf, T4); + Tg = VFNMS(LDK(KP707106781), Tf, T4); + ST(&(x[WS(rs, 1)]), VFMAI(Tp, To), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(Tp, To), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFMAI(Tn, Tg), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(Tn, Tg), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t1bv_8"), twinstr, &GENUS, {23, 14, 10, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_8) (planner *p) { + X(kdft_dit_register) (p, t1bv_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t1bv_8 -include t1b.h -sign 1 */ + +/* + * This function contains 33 FP additions, 16 FP multiplications, + * (or, 33 additions, 16 multiplications, 0 fused multiply/add), + * 24 stack variables, 1 constants, and 16 memory accesses + */ +#include "t1b.h" + +static void t1bv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 14)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(8, rs)) { + V Tl, Tq, Tg, Tr, T5, Tt, Ta, Tu, Ti, Tk, Tj; + Ti = LD(&(x[0]), ms, &(x[0])); + Tj = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tk = BYTW(&(W[TWVL * 6]), Tj); + Tl = VSUB(Ti, Tk); + Tq = VADD(Ti, Tk); + { + V Td, Tf, Tc, Te; + Tc = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Td = BYTW(&(W[TWVL * 2]), Tc); + Te = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tf = BYTW(&(W[TWVL * 10]), Te); + Tg = VSUB(Td, Tf); + Tr = VADD(Td, Tf); + } + { + V T2, T4, T1, T3; + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = BYTW(&(W[0]), T1); + T3 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T4 = BYTW(&(W[TWVL * 8]), T3); + T5 = VSUB(T2, T4); + Tt = VADD(T2, T4); + } + { + V T7, T9, T6, T8; + T6 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T7 = BYTW(&(W[TWVL * 12]), T6); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[TWVL * 4]), T8); + Ta = VSUB(T7, T9); + Tu = VADD(T7, T9); + } + { + V Ts, Tv, Tw, Tx; + Ts = VSUB(Tq, Tr); + Tv = VBYI(VSUB(Tt, Tu)); + ST(&(x[WS(rs, 6)]), VSUB(Ts, Tv), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(Ts, Tv), ms, &(x[0])); + Tw = VADD(Tq, Tr); + Tx = VADD(Tt, Tu); + ST(&(x[WS(rs, 4)]), VSUB(Tw, Tx), ms, &(x[0])); + ST(&(x[0]), VADD(Tw, Tx), ms, &(x[0])); + { + V Th, To, Tn, Tp, Tb, Tm; + Tb = VMUL(LDK(KP707106781), VSUB(T5, Ta)); + Th = VBYI(VSUB(Tb, Tg)); + To = VBYI(VADD(Tg, Tb)); + Tm = VMUL(LDK(KP707106781), VADD(T5, Ta)); + Tn = VSUB(Tl, Tm); + Tp = VADD(Tl, Tm); + ST(&(x[WS(rs, 3)]), VADD(Th, Tn), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VSUB(Tp, To), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VSUB(Tn, Th), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(To, Tp), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t1bv_8"), twinstr, &GENUS, {33, 16, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_8) (planner *p) { + X(kdft_dit_register) (p, t1bv_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1bv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:33 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 9 -name t1bv_9 -include t1b.h -sign 1 */ + +/* + * This function contains 54 FP additions, 54 FP multiplications, + * (or, 20 additions, 20 multiplications, 34 fused multiply/add), + * 67 stack variables, 19 constants, and 18 memory accesses + */ +#include "t1b.h" + +static void t1bv_9(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP939692620, +0.939692620785908384054109277324731469936208134); + DVK(KP907603734, +0.907603734547952313649323976213898122064543220); + DVK(KP666666666, +0.666666666666666666666666666666666666666666667); + DVK(KP852868531, +0.852868531952443209628250963940074071936020296); + DVK(KP879385241, +0.879385241571816768108218554649462939872416269); + DVK(KP984807753, +0.984807753012208059366743024589523013670643252); + DVK(KP826351822, +0.826351822333069651148283373230685203999624323); + DVK(KP347296355, +0.347296355333860697703433253538629592000751354); + DVK(KP898197570, +0.898197570222573798468955502359086394667167570); + DVK(KP673648177, +0.673648177666930348851716626769314796000375677); + DVK(KP420276625, +0.420276625461206169731530603237061658838781920); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP586256827, +0.586256827714544512072145703099641959914944179); + DVK(KP968908795, +0.968908795874236621082202410917456709164223497); + DVK(KP726681596, +0.726681596905677465811651808188092531873167623); + DVK(KP439692620, +0.439692620785908384054109277324731469936208134); + DVK(KP203604859, +0.203604859554852403062088995281827210665664861); + DVK(KP152703644, +0.152703644666139302296566746461370407999248646); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 16)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 16), MAKE_VOLATILE_STRIDE(9, rs)) { + V T1, T3, T5, T9, Tn, Tb, Td, Th, Tj, Tx, T6; + T1 = LD(&(x[0]), ms, &(x[0])); + { + V T2, T4, T8, Tm; + T2 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T8 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tm = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + { + V Ta, Tc, Tg, Ti; + Ta = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tc = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Tg = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Ti = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[TWVL * 4]), T2); + T5 = BYTW(&(W[TWVL * 10]), T4); + T9 = BYTW(&(W[TWVL * 2]), T8); + Tn = BYTW(&(W[0]), Tm); + Tb = BYTW(&(W[TWVL * 8]), Ta); + Td = BYTW(&(W[TWVL * 14]), Tc); + Th = BYTW(&(W[TWVL * 6]), Tg); + Tj = BYTW(&(W[TWVL * 12]), Ti); + } + } + Tx = VSUB(T3, T5); + T6 = VADD(T3, T5); + { + V Tl, Te, Tk, To, T7, TN; + Tl = VSUB(Td, Tb); + Te = VADD(Tb, Td); + Tk = VSUB(Th, Tj); + To = VADD(Th, Tj); + T7 = VFNMS(LDK(KP500000000), T6, T1); + TN = VADD(T1, T6); + { + V Tf, TP, Tp, TO; + Tf = VFNMS(LDK(KP500000000), Te, T9); + TP = VADD(T9, Te); + Tp = VFNMS(LDK(KP500000000), To, Tn); + TO = VADD(Tn, To); + { + V Tz, TC, Tu, TD, TA, Tq, TQ, TS; + Tz = VFNMS(LDK(KP152703644), Tl, Tf); + TC = VFMA(LDK(KP203604859), Tf, Tl); + Tu = VFNMS(LDK(KP439692620), Tk, Tf); + TD = VFNMS(LDK(KP726681596), Tk, Tp); + TA = VFMA(LDK(KP968908795), Tp, Tk); + Tq = VFNMS(LDK(KP586256827), Tp, Tl); + TQ = VADD(TO, TP); + TS = VMUL(LDK(KP866025403), VSUB(TO, TP)); + { + V TI, TB, TH, TE, Tr, TR, Tw, Tv; + Tv = VFNMS(LDK(KP420276625), Tu, Tl); + TI = VFMA(LDK(KP673648177), TA, Tz); + TB = VFNMS(LDK(KP673648177), TA, Tz); + TH = VFNMS(LDK(KP898197570), TD, TC); + TE = VFMA(LDK(KP898197570), TD, TC); + Tr = VFNMS(LDK(KP347296355), Tq, Tk); + ST(&(x[0]), VADD(TQ, TN), ms, &(x[0])); + TR = VFNMS(LDK(KP500000000), TQ, TN); + Tw = VFNMS(LDK(KP826351822), Tv, Tp); + { + V TM, TL, TF, TJ, Ts, Ty, TG, TK, Tt; + TM = VMUL(LDK(KP984807753), VFMA(LDK(KP879385241), Tx, TI)); + TL = VFMA(LDK(KP852868531), TE, T7); + TF = VFNMS(LDK(KP500000000), TE, TB); + TJ = VFMA(LDK(KP666666666), TI, TH); + Ts = VFNMS(LDK(KP907603734), Tr, Tf); + ST(&(x[WS(rs, 6)]), VFNMSI(TS, TR), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(TS, TR), ms, &(x[WS(rs, 1)])); + Ty = VMUL(LDK(KP984807753), VFNMS(LDK(KP879385241), Tx, Tw)); + ST(&(x[WS(rs, 8)]), VFNMSI(TM, TL), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(TM, TL), ms, &(x[WS(rs, 1)])); + TG = VFMA(LDK(KP852868531), TF, T7); + TK = VMUL(LDK(KP866025403), VFNMS(LDK(KP852868531), TJ, Tx)); + Tt = VFNMS(LDK(KP939692620), Ts, T7); + ST(&(x[WS(rs, 5)]), VFNMSI(TK, TG), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFMAI(TK, TG), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(Ty, Tt), ms, &(x[0])); + ST(&(x[WS(rs, 7)]), VFNMSI(Ty, Tt), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 9, XSIMD_STRING("t1bv_9"), twinstr, &GENUS, {20, 20, 34, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_9) (planner *p) { + X(kdft_dit_register) (p, t1bv_9, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 9 -name t1bv_9 -include t1b.h -sign 1 */ + +/* + * This function contains 54 FP additions, 42 FP multiplications, + * (or, 38 additions, 26 multiplications, 16 fused multiply/add), + * 38 stack variables, 14 constants, and 18 memory accesses + */ +#include "t1b.h" + +static void t1bv_9(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP939692620, +0.939692620785908384054109277324731469936208134); + DVK(KP296198132, +0.296198132726023843175338011893050938967728390); + DVK(KP852868531, +0.852868531952443209628250963940074071936020296); + DVK(KP173648177, +0.173648177666930348851716626769314796000375677); + DVK(KP556670399, +0.556670399226419366452912952047023132968291906); + DVK(KP766044443, +0.766044443118978035202392650555416673935832457); + DVK(KP642787609, +0.642787609686539326322643409907263432907559884); + DVK(KP663413948, +0.663413948168938396205421319635891297216863310); + DVK(KP150383733, +0.150383733180435296639271897612501926072238258); + DVK(KP342020143, +0.342020143325668733044099614682259580763083368); + DVK(KP813797681, +0.813797681349373692844693217248393223289101568); + DVK(KP984807753, +0.984807753012208059366743024589523013670643252); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 16)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 16), MAKE_VOLATILE_STRIDE(9, rs)) { + V T1, T6, Tu, Tg, Tf, TD, Tq, Tp, TE; + T1 = LD(&(x[0]), ms, &(x[0])); + { + V T3, T5, T2, T4; + T2 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[TWVL * 4]), T2); + T4 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T5 = BYTW(&(W[TWVL * 10]), T4); + T6 = VADD(T3, T5); + Tu = VMUL(LDK(KP866025403), VSUB(T3, T5)); + } + { + V T9, Td, Tb, T8, Tc, Ta, Te; + T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[0]), T8); + Tc = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Td = BYTW(&(W[TWVL * 12]), Tc); + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tb = BYTW(&(W[TWVL * 6]), Ta); + Tg = VSUB(Tb, Td); + Te = VADD(Tb, Td); + Tf = VFNMS(LDK(KP500000000), Te, T9); + TD = VADD(T9, Te); + } + { + V Tj, Tn, Tl, Ti, Tm, Tk, To; + Ti = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tj = BYTW(&(W[TWVL * 2]), Ti); + Tm = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Tn = BYTW(&(W[TWVL * 14]), Tm); + Tk = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tl = BYTW(&(W[TWVL * 8]), Tk); + Tq = VSUB(Tl, Tn); + To = VADD(Tl, Tn); + Tp = VFNMS(LDK(KP500000000), To, Tj); + TE = VADD(Tj, To); + } + { + V TF, TG, TH, TI; + TF = VBYI(VMUL(LDK(KP866025403), VSUB(TD, TE))); + TG = VADD(T1, T6); + TH = VADD(TD, TE); + TI = VFNMS(LDK(KP500000000), TH, TG); + ST(&(x[WS(rs, 3)]), VADD(TF, TI), ms, &(x[WS(rs, 1)])); + ST(&(x[0]), VADD(TG, TH), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VSUB(TI, TF), ms, &(x[0])); + } + { + V TC, Tv, Tw, Tx, Th, Tr, Ts, T7, TB; + TC = VBYI(VSUB(VFMA(LDK(KP984807753), Tf, VFMA(LDK(KP813797681), Tq, VFNMS(LDK(KP150383733), Tg, VMUL(LDK(KP342020143), Tp)))), Tu)); + Tv = VFMA(LDK(KP663413948), Tg, VMUL(LDK(KP642787609), Tf)); + Tw = VFMA(LDK(KP150383733), Tq, VMUL(LDK(KP984807753), Tp)); + Tx = VADD(Tv, Tw); + Th = VFNMS(LDK(KP556670399), Tg, VMUL(LDK(KP766044443), Tf)); + Tr = VFNMS(LDK(KP852868531), Tq, VMUL(LDK(KP173648177), Tp)); + Ts = VADD(Th, Tr); + T7 = VFNMS(LDK(KP500000000), T6, T1); + TB = VFMA(LDK(KP852868531), Tg, VFMA(LDK(KP173648177), Tf, VFMA(LDK(KP296198132), Tq, VFNMS(LDK(KP939692620), Tp, T7)))); + ST(&(x[WS(rs, 7)]), VSUB(TB, TC), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VADD(TB, TC), ms, &(x[0])); + { + V Tt, Ty, Tz, TA; + Tt = VADD(T7, Ts); + Ty = VBYI(VADD(Tu, Tx)); + ST(&(x[WS(rs, 8)]), VSUB(Tt, Ty), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VADD(Tt, Ty), ms, &(x[WS(rs, 1)])); + Tz = VBYI(VADD(Tu, VFNMS(LDK(KP500000000), Tx, VMUL(LDK(KP866025403), VSUB(Th, Tr))))); + TA = VFMA(LDK(KP866025403), VSUB(Tw, Tv), VFNMS(LDK(KP500000000), Ts, T7)); + ST(&(x[WS(rs, 4)]), VADD(Tz, TA), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VSUB(TA, Tz), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 9, XSIMD_STRING("t1bv_9"), twinstr, &GENUS, {38, 26, 16, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1bv_9) (planner *p) { + X(kdft_dit_register) (p, t1bv_9, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:14 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name t1fuv_10 -include t1fu.h */ + +/* + * This function contains 51 FP additions, 40 FP multiplications, + * (or, 33 additions, 22 multiplications, 18 fused multiply/add), + * 43 stack variables, 4 constants, and 20 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 18)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(10, rs)) { + V Td, TA, T4, Ta, Tk, TE, Tp, TF, TB, T9, T1, T2, Tb; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V Tg, Tn, Ti, Tl; + Tg = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tn = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Ti = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tl = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + { + V T6, T8, T5, Tc; + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + { + V T3, Th, To, Tj, Tm, T7; + T7 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[TWVL * 8]), T2); + Th = BYTWJ(&(W[TWVL * 6]), Tg); + To = BYTWJ(&(W[0]), Tn); + Tj = BYTWJ(&(W[TWVL * 16]), Ti); + Tm = BYTWJ(&(W[TWVL * 10]), Tl); + T6 = BYTWJ(&(W[TWVL * 2]), T5); + Td = BYTWJ(&(W[TWVL * 4]), Tc); + T8 = BYTWJ(&(W[TWVL * 12]), T7); + TA = VADD(T1, T3); + T4 = VSUB(T1, T3); + Ta = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Tk = VSUB(Th, Tj); + TE = VADD(Th, Tj); + Tp = VSUB(Tm, To); + TF = VADD(Tm, To); + } + TB = VADD(T6, T8); + T9 = VSUB(T6, T8); + } + } + Tb = BYTWJ(&(W[TWVL * 14]), Ta); + { + V TL, TG, Tw, Tq, TC, Te; + TL = VSUB(TE, TF); + TG = VADD(TE, TF); + Tw = VSUB(Tk, Tp); + Tq = VADD(Tk, Tp); + TC = VADD(Tb, Td); + Te = VSUB(Tb, Td); + { + V TM, TD, Tv, Tf; + TM = VSUB(TB, TC); + TD = VADD(TB, TC); + Tv = VSUB(T9, Te); + Tf = VADD(T9, Te); + { + V TP, TN, TH, TJ, Tz, Tx, Tr, Tt, TI, Ts; + TP = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TL, TM)); + TN = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TM, TL)); + TH = VADD(TD, TG); + TJ = VSUB(TD, TG); + Tz = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tv, Tw)); + Tx = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tw, Tv)); + Tr = VADD(Tf, Tq); + Tt = VSUB(Tf, Tq); + ST(&(x[0]), VADD(TA, TH), ms, &(x[0])); + TI = VFNMS(LDK(KP250000000), TH, TA); + ST(&(x[WS(rs, 5)]), VADD(T4, Tr), ms, &(x[WS(rs, 1)])); + Ts = VFNMS(LDK(KP250000000), Tr, T4); + { + V TK, TO, Tu, Ty; + TK = VFNMS(LDK(KP559016994), TJ, TI); + TO = VFMA(LDK(KP559016994), TJ, TI); + Tu = VFMA(LDK(KP559016994), Tt, Ts); + Ty = VFNMS(LDK(KP559016994), Tt, Ts); + ST(&(x[WS(rs, 8)]), VFNMSI(TN, TK), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(TN, TK), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(TP, TO), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(TP, TO), ms, &(x[0])); + ST(&(x[WS(rs, 9)]), VFMAI(Tx, Tu), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(Tx, Tu), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(Tz, Ty), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(Tz, Ty), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 10, XSIMD_STRING("t1fuv_10"), twinstr, &GENUS, {33, 22, 18, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_10) (planner *p) { + X(kdft_dit_register) (p, t1fuv_10, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name t1fuv_10 -include t1fu.h */ + +/* + * This function contains 51 FP additions, 30 FP multiplications, + * (or, 45 additions, 24 multiplications, 6 fused multiply/add), + * 32 stack variables, 4 constants, and 20 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 18)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(10, rs)) { + V Tr, TH, Tg, Tl, Tm, TA, TB, TJ, T5, Ta, Tb, TD, TE, TI, To; + V Tq, Tp; + To = LD(&(x[0]), ms, &(x[0])); + Tp = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tq = BYTWJ(&(W[TWVL * 8]), Tp); + Tr = VSUB(To, Tq); + TH = VADD(To, Tq); + { + V Td, Tk, Tf, Ti; + { + V Tc, Tj, Te, Th; + Tc = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Td = BYTWJ(&(W[TWVL * 6]), Tc); + Tj = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Tk = BYTWJ(&(W[0]), Tj); + Te = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tf = BYTWJ(&(W[TWVL * 16]), Te); + Th = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Ti = BYTWJ(&(W[TWVL * 10]), Th); + } + Tg = VSUB(Td, Tf); + Tl = VSUB(Ti, Tk); + Tm = VADD(Tg, Tl); + TA = VADD(Td, Tf); + TB = VADD(Ti, Tk); + TJ = VADD(TA, TB); + } + { + V T2, T9, T4, T7; + { + V T1, T8, T3, T6; + T1 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T2 = BYTWJ(&(W[TWVL * 2]), T1); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = BYTWJ(&(W[TWVL * 4]), T8); + T3 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T4 = BYTWJ(&(W[TWVL * 12]), T3); + T6 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T7 = BYTWJ(&(W[TWVL * 14]), T6); + } + T5 = VSUB(T2, T4); + Ta = VSUB(T7, T9); + Tb = VADD(T5, Ta); + TD = VADD(T2, T4); + TE = VADD(T7, T9); + TI = VADD(TD, TE); + } + { + V Tn, Ts, Tt, Tx, Tz, Tv, Tw, Ty, Tu; + Tn = VMUL(LDK(KP559016994), VSUB(Tb, Tm)); + Ts = VADD(Tb, Tm); + Tt = VFNMS(LDK(KP250000000), Ts, Tr); + Tv = VSUB(T5, Ta); + Tw = VSUB(Tg, Tl); + Tx = VBYI(VFMA(LDK(KP951056516), Tv, VMUL(LDK(KP587785252), Tw))); + Tz = VBYI(VFNMS(LDK(KP587785252), Tv, VMUL(LDK(KP951056516), Tw))); + ST(&(x[WS(rs, 5)]), VADD(Tr, Ts), ms, &(x[WS(rs, 1)])); + Ty = VSUB(Tt, Tn); + ST(&(x[WS(rs, 3)]), VSUB(Ty, Tz), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(Tz, Ty), ms, &(x[WS(rs, 1)])); + Tu = VADD(Tn, Tt); + ST(&(x[WS(rs, 1)]), VSUB(Tu, Tx), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VADD(Tx, Tu), ms, &(x[WS(rs, 1)])); + } + { + V TM, TK, TL, TG, TO, TC, TF, TP, TN; + TM = VMUL(LDK(KP559016994), VSUB(TI, TJ)); + TK = VADD(TI, TJ); + TL = VFNMS(LDK(KP250000000), TK, TH); + TC = VSUB(TA, TB); + TF = VSUB(TD, TE); + TG = VBYI(VFNMS(LDK(KP587785252), TF, VMUL(LDK(KP951056516), TC))); + TO = VBYI(VFMA(LDK(KP951056516), TF, VMUL(LDK(KP587785252), TC))); + ST(&(x[0]), VADD(TH, TK), ms, &(x[0])); + TP = VADD(TM, TL); + ST(&(x[WS(rs, 4)]), VADD(TO, TP), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VSUB(TP, TO), ms, &(x[0])); + TN = VSUB(TL, TM); + ST(&(x[WS(rs, 2)]), VADD(TG, TN), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VSUB(TN, TG), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 10, XSIMD_STRING("t1fuv_10"), twinstr, &GENUS, {45, 24, 6, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_10) (planner *p) { + X(kdft_dit_register) (p, t1fuv_10, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:12 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t1fuv_2 -include t1fu.h */ + +/* + * This function contains 3 FP additions, 2 FP multiplications, + * (or, 3 additions, 2 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 4 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(2, rs)) { + V T1, T2, T3; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[0]), T2); + ST(&(x[0]), VADD(T1, T3), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VSUB(T1, T3), ms, &(x[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("t1fuv_2"), twinstr, &GENUS, {3, 2, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_2) (planner *p) { + X(kdft_dit_register) (p, t1fuv_2, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t1fuv_2 -include t1fu.h */ + +/* + * This function contains 3 FP additions, 2 FP multiplications, + * (or, 3 additions, 2 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 4 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(2, rs)) { + V T1, T3, T2; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[0]), T2); + ST(&(x[WS(rs, 1)]), VSUB(T1, T3), ms, &(x[WS(rs, 1)])); + ST(&(x[0]), VADD(T1, T3), ms, &(x[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("t1fuv_2"), twinstr, &GENUS, {3, 2, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_2) (planner *p) { + X(kdft_dit_register) (p, t1fuv_2, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:12 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 3 -name t1fuv_3 -include t1fu.h */ + +/* + * This function contains 8 FP additions, 8 FP multiplications, + * (or, 5 additions, 5 multiplications, 3 fused multiply/add), + * 12 stack variables, 2 constants, and 6 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_3(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(3, rs)) { + V T1, T2, T4; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V T3, T5, T8, T6, T7; + T3 = BYTWJ(&(W[0]), T2); + T5 = BYTWJ(&(W[TWVL * 2]), T4); + T8 = VMUL(LDK(KP866025403), VSUB(T5, T3)); + T6 = VADD(T3, T5); + T7 = VFNMS(LDK(KP500000000), T6, T1); + ST(&(x[0]), VADD(T1, T6), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(T8, T7), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VFNMSI(T8, T7), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 3, XSIMD_STRING("t1fuv_3"), twinstr, &GENUS, {5, 5, 3, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_3) (planner *p) { + X(kdft_dit_register) (p, t1fuv_3, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 3 -name t1fuv_3 -include t1fu.h */ + +/* + * This function contains 8 FP additions, 6 FP multiplications, + * (or, 7 additions, 5 multiplications, 1 fused multiply/add), + * 12 stack variables, 2 constants, and 6 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_3(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(3, rs)) { + V T1, T3, T5, T6, T2, T4, T7, T8; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[0]), T2); + T4 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T5 = BYTWJ(&(W[TWVL * 2]), T4); + T6 = VADD(T3, T5); + ST(&(x[0]), VADD(T1, T6), ms, &(x[0])); + T7 = VFNMS(LDK(KP500000000), T6, T1); + T8 = VBYI(VMUL(LDK(KP866025403), VSUB(T5, T3))); + ST(&(x[WS(rs, 2)]), VSUB(T7, T8), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VADD(T7, T8), ms, &(x[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 3, XSIMD_STRING("t1fuv_3"), twinstr, &GENUS, {7, 5, 1, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_3) (planner *p) { + X(kdft_dit_register) (p, t1fuv_3, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:12 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t1fuv_4 -include t1fu.h */ + +/* + * This function contains 11 FP additions, 8 FP multiplications, + * (or, 9 additions, 6 multiplications, 2 fused multiply/add), + * 13 stack variables, 0 constants, and 8 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(4, rs)) { + V T1, T7, T2, T5, T8, T3, T6; + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T8 = BYTWJ(&(W[TWVL * 4]), T7); + T3 = BYTWJ(&(W[TWVL * 2]), T2); + T6 = BYTWJ(&(W[0]), T5); + { + V Ta, T4, Tb, T9; + Ta = VADD(T1, T3); + T4 = VSUB(T1, T3); + Tb = VADD(T6, T8); + T9 = VSUB(T6, T8); + ST(&(x[0]), VADD(Ta, Tb), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VSUB(Ta, Tb), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(T9, T4), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(T9, T4), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t1fuv_4"), twinstr, &GENUS, {9, 6, 2, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_4) (planner *p) { + X(kdft_dit_register) (p, t1fuv_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t1fuv_4 -include t1fu.h */ + +/* + * This function contains 11 FP additions, 6 FP multiplications, + * (or, 11 additions, 6 multiplications, 0 fused multiply/add), + * 13 stack variables, 0 constants, and 8 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(4, rs)) { + V T1, T8, T3, T6, T7, T2, T5; + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T8 = BYTWJ(&(W[TWVL * 4]), T7); + T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T3 = BYTWJ(&(W[TWVL * 2]), T2); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T6 = BYTWJ(&(W[0]), T5); + { + V T4, T9, Ta, Tb; + T4 = VSUB(T1, T3); + T9 = VBYI(VSUB(T6, T8)); + ST(&(x[WS(rs, 1)]), VSUB(T4, T9), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(T4, T9), ms, &(x[WS(rs, 1)])); + Ta = VADD(T1, T3); + Tb = VADD(T6, T8); + ST(&(x[WS(rs, 2)]), VSUB(Ta, Tb), ms, &(x[0])); + ST(&(x[0]), VADD(Ta, Tb), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t1fuv_4"), twinstr, &GENUS, {11, 6, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_4) (planner *p) { + X(kdft_dit_register) (p, t1fuv_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:12 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name t1fuv_5 -include t1fu.h */ + +/* + * This function contains 20 FP additions, 19 FP multiplications, + * (or, 11 additions, 10 multiplications, 9 fused multiply/add), + * 26 stack variables, 4 constants, and 10 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(5, rs)) { + V T1, T2, T9, T4, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T9 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V T3, Ta, T5, T8; + T3 = BYTWJ(&(W[0]), T2); + Ta = BYTWJ(&(W[TWVL * 4]), T9); + T5 = BYTWJ(&(W[TWVL * 6]), T4); + T8 = BYTWJ(&(W[TWVL * 2]), T7); + { + V T6, Tg, Tb, Th; + T6 = VADD(T3, T5); + Tg = VSUB(T3, T5); + Tb = VADD(T8, Ta); + Th = VSUB(T8, Ta); + { + V Te, Tc, Tk, Ti, Td, Tj, Tf; + Te = VSUB(T6, Tb); + Tc = VADD(T6, Tb); + Tk = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tg, Th)); + Ti = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Th, Tg)); + Td = VFNMS(LDK(KP250000000), Tc, T1); + ST(&(x[0]), VADD(T1, Tc), ms, &(x[0])); + Tj = VFNMS(LDK(KP559016994), Te, Td); + Tf = VFMA(LDK(KP559016994), Te, Td); + ST(&(x[WS(rs, 2)]), VFMAI(Tk, Tj), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFNMSI(Tk, Tj), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFMAI(Ti, Tf), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFNMSI(Ti, Tf), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("t1fuv_5"), twinstr, &GENUS, {11, 10, 9, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_5) (planner *p) { + X(kdft_dit_register) (p, t1fuv_5, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name t1fuv_5 -include t1fu.h */ + +/* + * This function contains 20 FP additions, 14 FP multiplications, + * (or, 17 additions, 11 multiplications, 3 fused multiply/add), + * 20 stack variables, 4 constants, and 10 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(5, rs)) { + V Tc, Tg, Th, T5, Ta, Td; + Tc = LD(&(x[0]), ms, &(x[0])); + { + V T2, T9, T4, T7; + { + V T1, T8, T3, T6; + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = BYTWJ(&(W[0]), T1); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = BYTWJ(&(W[TWVL * 4]), T8); + T3 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T4 = BYTWJ(&(W[TWVL * 6]), T3); + T6 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T7 = BYTWJ(&(W[TWVL * 2]), T6); + } + Tg = VSUB(T2, T4); + Th = VSUB(T7, T9); + T5 = VADD(T2, T4); + Ta = VADD(T7, T9); + Td = VADD(T5, Ta); + } + ST(&(x[0]), VADD(Tc, Td), ms, &(x[0])); + { + V Ti, Tj, Tf, Tk, Tb, Te; + Ti = VBYI(VFMA(LDK(KP951056516), Tg, VMUL(LDK(KP587785252), Th))); + Tj = VBYI(VFNMS(LDK(KP587785252), Tg, VMUL(LDK(KP951056516), Th))); + Tb = VMUL(LDK(KP559016994), VSUB(T5, Ta)); + Te = VFNMS(LDK(KP250000000), Td, Tc); + Tf = VADD(Tb, Te); + Tk = VSUB(Te, Tb); + ST(&(x[WS(rs, 1)]), VSUB(Tf, Ti), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VSUB(Tk, Tj), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VADD(Ti, Tf), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(Tj, Tk), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("t1fuv_5"), twinstr, &GENUS, {17, 11, 3, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_5) (planner *p) { + X(kdft_dit_register) (p, t1fuv_5, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:12 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 6 -name t1fuv_6 -include t1fu.h */ + +/* + * This function contains 23 FP additions, 18 FP multiplications, + * (or, 17 additions, 12 multiplications, 6 fused multiply/add), + * 27 stack variables, 2 constants, and 12 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 10)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(6, rs)) { + V T1, T2, Ta, Tc, T5, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V T3, Tb, Td, T6, T8; + T3 = BYTWJ(&(W[TWVL * 4]), T2); + Tb = BYTWJ(&(W[TWVL * 6]), Ta); + Td = BYTWJ(&(W[0]), Tc); + T6 = BYTWJ(&(W[TWVL * 2]), T5); + T8 = BYTWJ(&(W[TWVL * 8]), T7); + { + V Ti, T4, Tk, Te, Tj, T9; + Ti = VADD(T1, T3); + T4 = VSUB(T1, T3); + Tk = VADD(Tb, Td); + Te = VSUB(Tb, Td); + Tj = VADD(T6, T8); + T9 = VSUB(T6, T8); + { + V Tl, Tn, Tf, Th, Tm, Tg; + Tl = VADD(Tj, Tk); + Tn = VMUL(LDK(KP866025403), VSUB(Tk, Tj)); + Tf = VADD(T9, Te); + Th = VMUL(LDK(KP866025403), VSUB(Te, T9)); + ST(&(x[0]), VADD(Ti, Tl), ms, &(x[0])); + Tm = VFNMS(LDK(KP500000000), Tl, Ti); + ST(&(x[WS(rs, 3)]), VADD(T4, Tf), ms, &(x[WS(rs, 1)])); + Tg = VFNMS(LDK(KP500000000), Tf, T4); + ST(&(x[WS(rs, 2)]), VFNMSI(Tn, Tm), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(Tn, Tm), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFNMSI(Th, Tg), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(Th, Tg), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 6, XSIMD_STRING("t1fuv_6"), twinstr, &GENUS, {17, 12, 6, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_6) (planner *p) { + X(kdft_dit_register) (p, t1fuv_6, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 6 -name t1fuv_6 -include t1fu.h */ + +/* + * This function contains 23 FP additions, 14 FP multiplications, + * (or, 21 additions, 12 multiplications, 2 fused multiply/add), + * 19 stack variables, 2 constants, and 12 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 10)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(6, rs)) { + V T4, Ti, Te, Tk, T9, Tj, T1, T3, T2; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[TWVL * 4]), T2); + T4 = VSUB(T1, T3); + Ti = VADD(T1, T3); + { + V Tb, Td, Ta, Tc; + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tb = BYTWJ(&(W[TWVL * 6]), Ta); + Tc = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Td = BYTWJ(&(W[0]), Tc); + Te = VSUB(Tb, Td); + Tk = VADD(Tb, Td); + } + { + V T6, T8, T5, T7; + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T6 = BYTWJ(&(W[TWVL * 2]), T5); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T8 = BYTWJ(&(W[TWVL * 8]), T7); + T9 = VSUB(T6, T8); + Tj = VADD(T6, T8); + } + { + V Th, Tf, Tg, Tn, Tl, Tm; + Th = VBYI(VMUL(LDK(KP866025403), VSUB(Te, T9))); + Tf = VADD(T9, Te); + Tg = VFNMS(LDK(KP500000000), Tf, T4); + ST(&(x[WS(rs, 3)]), VADD(T4, Tf), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(Tg, Th), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VSUB(Tg, Th), ms, &(x[WS(rs, 1)])); + Tn = VBYI(VMUL(LDK(KP866025403), VSUB(Tk, Tj))); + Tl = VADD(Tj, Tk); + Tm = VFNMS(LDK(KP500000000), Tl, Ti); + ST(&(x[0]), VADD(Ti, Tl), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(Tm, Tn), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VSUB(Tm, Tn), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 6, XSIMD_STRING("t1fuv_6"), twinstr, &GENUS, {21, 12, 2, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_6) (planner *p) { + X(kdft_dit_register) (p, t1fuv_6, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:12 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 7 -name t1fuv_7 -include t1fu.h */ + +/* + * This function contains 36 FP additions, 36 FP multiplications, + * (or, 15 additions, 15 multiplications, 21 fused multiply/add), + * 42 stack variables, 6 constants, and 14 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_7(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP801937735, +0.801937735804838252472204639014890102331838324); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + DVK(KP692021471, +0.692021471630095869627814897002069140197260599); + DVK(KP554958132, +0.554958132087371191422194871006410481067288862); + DVK(KP356895867, +0.356895867892209443894399510021300583399127187); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 12)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 12), MAKE_VOLATILE_STRIDE(7, rs)) { + V T1, T2, T4, Te, Tc, T9, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Te = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T7 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V T3, T5, Tf, Td, Ta, T8; + T3 = BYTWJ(&(W[0]), T2); + T5 = BYTWJ(&(W[TWVL * 10]), T4); + Tf = BYTWJ(&(W[TWVL * 6]), Te); + Td = BYTWJ(&(W[TWVL * 4]), Tc); + Ta = BYTWJ(&(W[TWVL * 8]), T9); + T8 = BYTWJ(&(W[TWVL * 2]), T7); + { + V T6, Tk, Tg, Tl, Tb, Tm; + T6 = VADD(T3, T5); + Tk = VSUB(T5, T3); + Tg = VADD(Td, Tf); + Tl = VSUB(Tf, Td); + Tb = VADD(T8, Ta); + Tm = VSUB(Ta, T8); + { + V Th, Ts, Tp, Tu, Tn, Tx, Ti, Tt; + Th = VFNMS(LDK(KP356895867), T6, Tg); + Ts = VFMA(LDK(KP554958132), Tl, Tk); + ST(&(x[0]), VADD(T1, VADD(T6, VADD(Tb, Tg))), ms, &(x[0])); + Tp = VFNMS(LDK(KP356895867), Tb, T6); + Tu = VFNMS(LDK(KP356895867), Tg, Tb); + Tn = VFMA(LDK(KP554958132), Tm, Tl); + Tx = VFNMS(LDK(KP554958132), Tk, Tm); + Ti = VFNMS(LDK(KP692021471), Th, Tb); + Tt = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), Ts, Tm)); + { + V Tq, Tv, To, Ty, Tj, Tr, Tw; + Tq = VFNMS(LDK(KP692021471), Tp, Tg); + Tv = VFNMS(LDK(KP692021471), Tu, T6); + To = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tn, Tk)); + Ty = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tx, Tl)); + Tj = VFNMS(LDK(KP900968867), Ti, T1); + Tr = VFNMS(LDK(KP900968867), Tq, T1); + Tw = VFNMS(LDK(KP900968867), Tv, T1); + ST(&(x[WS(rs, 2)]), VFMAI(To, Tj), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFNMSI(To, Tj), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(Tt, Tr), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VFNMSI(Tt, Tr), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(Ty, Tw), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFNMSI(Ty, Tw), ms, &(x[0])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 7, XSIMD_STRING("t1fuv_7"), twinstr, &GENUS, {15, 15, 21, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_7) (planner *p) { + X(kdft_dit_register) (p, t1fuv_7, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 7 -name t1fuv_7 -include t1fu.h */ + +/* + * This function contains 36 FP additions, 30 FP multiplications, + * (or, 24 additions, 18 multiplications, 12 fused multiply/add), + * 21 stack variables, 6 constants, and 14 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_7(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP222520933, +0.222520933956314404288902564496794759466355569); + DVK(KP623489801, +0.623489801858733530525004884004239810632274731); + DVK(KP781831482, +0.781831482468029808708444526674057750232334519); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + DVK(KP433883739, +0.433883739117558120475768332848358754609990728); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 12)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 12), MAKE_VOLATILE_STRIDE(7, rs)) { + V T1, Tg, Tj, T6, Ti, Tb, Tk, Tp, To; + T1 = LD(&(x[0]), ms, &(x[0])); + { + V Td, Tf, Tc, Te; + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Td = BYTWJ(&(W[TWVL * 4]), Tc); + Te = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tf = BYTWJ(&(W[TWVL * 6]), Te); + Tg = VADD(Td, Tf); + Tj = VSUB(Tf, Td); + } + { + V T3, T5, T2, T4; + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[0]), T2); + T4 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T5 = BYTWJ(&(W[TWVL * 10]), T4); + T6 = VADD(T3, T5); + Ti = VSUB(T5, T3); + } + { + V T8, Ta, T7, T9; + T7 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T8 = BYTWJ(&(W[TWVL * 2]), T7); + T9 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Ta = BYTWJ(&(W[TWVL * 8]), T9); + Tb = VADD(T8, Ta); + Tk = VSUB(Ta, T8); + } + ST(&(x[0]), VADD(T1, VADD(T6, VADD(Tb, Tg))), ms, &(x[0])); + Tp = VBYI(VFMA(LDK(KP433883739), Ti, VFNMS(LDK(KP781831482), Tk, VMUL(LDK(KP974927912), Tj)))); + To = VFMA(LDK(KP623489801), Tb, VFNMS(LDK(KP222520933), Tg, VFNMS(LDK(KP900968867), T6, T1))); + ST(&(x[WS(rs, 4)]), VSUB(To, Tp), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VADD(To, Tp), ms, &(x[WS(rs, 1)])); + { + V Tl, Th, Tn, Tm; + Tl = VBYI(VFNMS(LDK(KP781831482), Tj, VFNMS(LDK(KP433883739), Tk, VMUL(LDK(KP974927912), Ti)))); + Th = VFMA(LDK(KP623489801), Tg, VFNMS(LDK(KP900968867), Tb, VFNMS(LDK(KP222520933), T6, T1))); + ST(&(x[WS(rs, 5)]), VSUB(Th, Tl), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VADD(Th, Tl), ms, &(x[0])); + Tn = VBYI(VFMA(LDK(KP781831482), Ti, VFMA(LDK(KP974927912), Tk, VMUL(LDK(KP433883739), Tj)))); + Tm = VFMA(LDK(KP623489801), T6, VFNMS(LDK(KP900968867), Tg, VFNMS(LDK(KP222520933), Tb, T1))); + ST(&(x[WS(rs, 6)]), VSUB(Tm, Tn), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VADD(Tm, Tn), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 7, XSIMD_STRING("t1fuv_7"), twinstr, &GENUS, {24, 18, 12, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_7) (planner *p) { + X(kdft_dit_register) (p, t1fuv_7, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:13 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t1fuv_8 -include t1fu.h */ + +/* + * This function contains 33 FP additions, 24 FP multiplications, + * (or, 23 additions, 14 multiplications, 10 fused multiply/add), + * 36 stack variables, 1 constants, and 16 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 14)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(8, rs)) { + V T1, T2, Th, Tj, T5, T7, Ta, Tc; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Th = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tj = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Ta = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + { + V T3, Ti, Tk, T6, T8, Tb, Td; + T3 = BYTWJ(&(W[TWVL * 6]), T2); + Ti = BYTWJ(&(W[TWVL * 2]), Th); + Tk = BYTWJ(&(W[TWVL * 10]), Tj); + T6 = BYTWJ(&(W[0]), T5); + T8 = BYTWJ(&(W[TWVL * 8]), T7); + Tb = BYTWJ(&(W[TWVL * 12]), Ta); + Td = BYTWJ(&(W[TWVL * 4]), Tc); + { + V Tq, T4, Tr, Tl, Tt, T9, Tu, Te, Tw, Ts; + Tq = VADD(T1, T3); + T4 = VSUB(T1, T3); + Tr = VADD(Ti, Tk); + Tl = VSUB(Ti, Tk); + Tt = VADD(T6, T8); + T9 = VSUB(T6, T8); + Tu = VADD(Tb, Td); + Te = VSUB(Tb, Td); + Tw = VSUB(Tq, Tr); + Ts = VADD(Tq, Tr); + { + V Tx, Tv, Tm, Tf; + Tx = VSUB(Tu, Tt); + Tv = VADD(Tt, Tu); + Tm = VSUB(Te, T9); + Tf = VADD(T9, Te); + { + V Tp, Tn, To, Tg; + ST(&(x[WS(rs, 2)]), VFMAI(Tx, Tw), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(Tx, Tw), ms, &(x[0])); + ST(&(x[0]), VADD(Ts, Tv), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VSUB(Ts, Tv), ms, &(x[0])); + Tp = VFMA(LDK(KP707106781), Tm, Tl); + Tn = VFNMS(LDK(KP707106781), Tm, Tl); + To = VFNMS(LDK(KP707106781), Tf, T4); + Tg = VFMA(LDK(KP707106781), Tf, T4); + ST(&(x[WS(rs, 5)]), VFNMSI(Tp, To), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFMAI(Tp, To), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(Tn, Tg), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(Tn, Tg), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t1fuv_8"), twinstr, &GENUS, {23, 14, 10, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_8) (planner *p) { + X(kdft_dit_register) (p, t1fuv_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t1fuv_8 -include t1fu.h */ + +/* + * This function contains 33 FP additions, 16 FP multiplications, + * (or, 33 additions, 16 multiplications, 0 fused multiply/add), + * 24 stack variables, 1 constants, and 16 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 14)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(8, rs)) { + V T4, Tq, Tm, Tr, T9, Tt, Te, Tu, T1, T3, T2; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T3 = BYTWJ(&(W[TWVL * 6]), T2); + T4 = VSUB(T1, T3); + Tq = VADD(T1, T3); + { + V Tj, Tl, Ti, Tk; + Ti = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tj = BYTWJ(&(W[TWVL * 2]), Ti); + Tk = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tl = BYTWJ(&(W[TWVL * 10]), Tk); + Tm = VSUB(Tj, Tl); + Tr = VADD(Tj, Tl); + } + { + V T6, T8, T5, T7; + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T6 = BYTWJ(&(W[0]), T5); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T8 = BYTWJ(&(W[TWVL * 8]), T7); + T9 = VSUB(T6, T8); + Tt = VADD(T6, T8); + } + { + V Tb, Td, Ta, Tc; + Ta = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Tb = BYTWJ(&(W[TWVL * 12]), Ta); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Td = BYTWJ(&(W[TWVL * 4]), Tc); + Te = VSUB(Tb, Td); + Tu = VADD(Tb, Td); + } + { + V Ts, Tv, Tw, Tx; + Ts = VADD(Tq, Tr); + Tv = VADD(Tt, Tu); + ST(&(x[WS(rs, 4)]), VSUB(Ts, Tv), ms, &(x[0])); + ST(&(x[0]), VADD(Ts, Tv), ms, &(x[0])); + Tw = VSUB(Tq, Tr); + Tx = VBYI(VSUB(Tu, Tt)); + ST(&(x[WS(rs, 6)]), VSUB(Tw, Tx), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(Tw, Tx), ms, &(x[0])); + { + V Tg, To, Tn, Tp, Tf, Th; + Tf = VMUL(LDK(KP707106781), VADD(T9, Te)); + Tg = VADD(T4, Tf); + To = VSUB(T4, Tf); + Th = VMUL(LDK(KP707106781), VSUB(Te, T9)); + Tn = VBYI(VSUB(Th, Tm)); + Tp = VBYI(VADD(Tm, Th)); + ST(&(x[WS(rs, 7)]), VSUB(Tg, Tn), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(To, Tp), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(Tg, Tn), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VSUB(To, Tp), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t1fuv_8"), twinstr, &GENUS, {33, 16, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_8) (planner *p) { + X(kdft_dit_register) (p, t1fuv_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fuv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:13 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 9 -name t1fuv_9 -include t1fu.h */ + +/* + * This function contains 54 FP additions, 54 FP multiplications, + * (or, 20 additions, 20 multiplications, 34 fused multiply/add), + * 67 stack variables, 19 constants, and 18 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_9(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP939692620, +0.939692620785908384054109277324731469936208134); + DVK(KP826351822, +0.826351822333069651148283373230685203999624323); + DVK(KP879385241, +0.879385241571816768108218554649462939872416269); + DVK(KP984807753, +0.984807753012208059366743024589523013670643252); + DVK(KP666666666, +0.666666666666666666666666666666666666666666667); + DVK(KP852868531, +0.852868531952443209628250963940074071936020296); + DVK(KP907603734, +0.907603734547952313649323976213898122064543220); + DVK(KP420276625, +0.420276625461206169731530603237061658838781920); + DVK(KP673648177, +0.673648177666930348851716626769314796000375677); + DVK(KP898197570, +0.898197570222573798468955502359086394667167570); + DVK(KP347296355, +0.347296355333860697703433253538629592000751354); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP439692620, +0.439692620785908384054109277324731469936208134); + DVK(KP203604859, +0.203604859554852403062088995281827210665664861); + DVK(KP152703644, +0.152703644666139302296566746461370407999248646); + DVK(KP586256827, +0.586256827714544512072145703099641959914944179); + DVK(KP968908795, +0.968908795874236621082202410917456709164223497); + DVK(KP726681596, +0.726681596905677465811651808188092531873167623); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 16)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 16), MAKE_VOLATILE_STRIDE(9, rs)) { + V T1, T3, T5, T9, Th, Tb, Td, Tj, Tl, TD, T6; + T1 = LD(&(x[0]), ms, &(x[0])); + { + V T2, T4, T8, Tg; + T2 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Tg = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V Ta, Tc, Ti, Tk; + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Ti = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tk = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T3 = BYTWJ(&(W[TWVL * 4]), T2); + T5 = BYTWJ(&(W[TWVL * 10]), T4); + T9 = BYTWJ(&(W[0]), T8); + Th = BYTWJ(&(W[TWVL * 2]), Tg); + Tb = BYTWJ(&(W[TWVL * 6]), Ta); + Td = BYTWJ(&(W[TWVL * 12]), Tc); + Tj = BYTWJ(&(W[TWVL * 8]), Ti); + Tl = BYTWJ(&(W[TWVL * 14]), Tk); + } + } + TD = VSUB(T5, T3); + T6 = VADD(T3, T5); + { + V Tt, Te, Tu, Tm, Tr, T7; + Tt = VSUB(Tb, Td); + Te = VADD(Tb, Td); + Tu = VSUB(Tl, Tj); + Tm = VADD(Tj, Tl); + Tr = VFNMS(LDK(KP500000000), T6, T1); + T7 = VADD(T1, T6); + { + V Tv, Tf, Ts, Tn; + Tv = VFNMS(LDK(KP500000000), Te, T9); + Tf = VADD(T9, Te); + Ts = VFNMS(LDK(KP500000000), Tm, Th); + Tn = VADD(Th, Tm); + { + V TG, TK, Tw, TJ, TF, TA, To, Tq; + TG = VFNMS(LDK(KP726681596), Tt, Tv); + TK = VFMA(LDK(KP968908795), Tv, Tt); + Tw = VFNMS(LDK(KP586256827), Tv, Tu); + TJ = VFNMS(LDK(KP152703644), Tu, Ts); + TF = VFMA(LDK(KP203604859), Ts, Tu); + TA = VFNMS(LDK(KP439692620), Tt, Ts); + To = VADD(Tf, Tn); + Tq = VMUL(LDK(KP866025403), VSUB(Tn, Tf)); + { + V TQ, TH, TL, TN, TB, Tp, Ty, TI, Tx; + Tx = VFNMS(LDK(KP347296355), Tw, Tt); + TQ = VFNMS(LDK(KP898197570), TG, TF); + TH = VFMA(LDK(KP898197570), TG, TF); + TL = VFMA(LDK(KP673648177), TK, TJ); + TN = VFNMS(LDK(KP673648177), TK, TJ); + TB = VFNMS(LDK(KP420276625), TA, Tu); + ST(&(x[0]), VADD(T7, To), ms, &(x[0])); + Tp = VFNMS(LDK(KP500000000), To, T7); + Ty = VFNMS(LDK(KP907603734), Tx, Ts); + TI = VFMA(LDK(KP852868531), TH, Tr); + { + V TO, TR, TM, TC, Tz, TP, TS, TE; + TO = VFNMS(LDK(KP500000000), TH, TN); + TR = VFMA(LDK(KP666666666), TL, TQ); + TM = VMUL(LDK(KP984807753), VFNMS(LDK(KP879385241), TD, TL)); + TC = VFNMS(LDK(KP826351822), TB, Tv); + ST(&(x[WS(rs, 6)]), VFNMSI(Tq, Tp), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(Tq, Tp), ms, &(x[WS(rs, 1)])); + Tz = VFNMS(LDK(KP939692620), Ty, Tr); + TP = VFMA(LDK(KP852868531), TO, Tr); + TS = VMUL(LDK(KP866025403), VFMA(LDK(KP852868531), TR, TD)); + ST(&(x[WS(rs, 8)]), VFMAI(TM, TI), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFNMSI(TM, TI), ms, &(x[WS(rs, 1)])); + TE = VMUL(LDK(KP984807753), VFMA(LDK(KP879385241), TD, TC)); + ST(&(x[WS(rs, 4)]), VFMAI(TS, TP), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFNMSI(TS, TP), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(TE, Tz), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VFNMSI(TE, Tz), ms, &(x[0])); + } + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 9, XSIMD_STRING("t1fuv_9"), twinstr, &GENUS, {20, 20, 34, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_9) (planner *p) { + X(kdft_dit_register) (p, t1fuv_9, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 9 -name t1fuv_9 -include t1fu.h */ + +/* + * This function contains 54 FP additions, 42 FP multiplications, + * (or, 38 additions, 26 multiplications, 16 fused multiply/add), + * 38 stack variables, 14 constants, and 18 memory accesses + */ +#include "t1fu.h" + +static void t1fuv_9(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP939692620, +0.939692620785908384054109277324731469936208134); + DVK(KP296198132, +0.296198132726023843175338011893050938967728390); + DVK(KP852868531, +0.852868531952443209628250963940074071936020296); + DVK(KP173648177, +0.173648177666930348851716626769314796000375677); + DVK(KP556670399, +0.556670399226419366452912952047023132968291906); + DVK(KP766044443, +0.766044443118978035202392650555416673935832457); + DVK(KP642787609, +0.642787609686539326322643409907263432907559884); + DVK(KP663413948, +0.663413948168938396205421319635891297216863310); + DVK(KP984807753, +0.984807753012208059366743024589523013670643252); + DVK(KP150383733, +0.150383733180435296639271897612501926072238258); + DVK(KP342020143, +0.342020143325668733044099614682259580763083368); + DVK(KP813797681, +0.813797681349373692844693217248393223289101568); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 16)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 16), MAKE_VOLATILE_STRIDE(9, rs)) { + V T1, T6, TA, Tt, Tf, Ts, Tw, Tn, Tv; + T1 = LD(&(x[0]), ms, &(x[0])); + { + V T3, T5, T2, T4; + T2 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[TWVL * 4]), T2); + T4 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T5 = BYTWJ(&(W[TWVL * 10]), T4); + T6 = VADD(T3, T5); + TA = VMUL(LDK(KP866025403), VSUB(T5, T3)); + } + { + V T9, Td, Tb, T8, Tc, Ta, Te; + T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T9 = BYTWJ(&(W[0]), T8); + Tc = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Td = BYTWJ(&(W[TWVL * 12]), Tc); + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tb = BYTWJ(&(W[TWVL * 6]), Ta); + Tt = VSUB(Td, Tb); + Te = VADD(Tb, Td); + Tf = VADD(T9, Te); + Ts = VFNMS(LDK(KP500000000), Te, T9); + } + { + V Th, Tl, Tj, Tg, Tk, Ti, Tm; + Tg = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Th = BYTWJ(&(W[TWVL * 2]), Tg); + Tk = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Tl = BYTWJ(&(W[TWVL * 14]), Tk); + Ti = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tj = BYTWJ(&(W[TWVL * 8]), Ti); + Tw = VSUB(Tl, Tj); + Tm = VADD(Tj, Tl); + Tn = VADD(Th, Tm); + Tv = VFNMS(LDK(KP500000000), Tm, Th); + } + { + V Tq, T7, To, Tp; + Tq = VBYI(VMUL(LDK(KP866025403), VSUB(Tn, Tf))); + T7 = VADD(T1, T6); + To = VADD(Tf, Tn); + Tp = VFNMS(LDK(KP500000000), To, T7); + ST(&(x[0]), VADD(T7, To), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VADD(Tp, Tq), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VSUB(Tp, Tq), ms, &(x[0])); + } + { + V TI, TB, TC, TD, Tu, Tx, Ty, Tr, TH; + TI = VBYI(VSUB(VFNMS(LDK(KP342020143), Tv, VFNMS(LDK(KP150383733), Tt, VFNMS(LDK(KP984807753), Ts, VMUL(LDK(KP813797681), Tw)))), TA)); + TB = VFNMS(LDK(KP642787609), Ts, VMUL(LDK(KP663413948), Tt)); + TC = VFNMS(LDK(KP984807753), Tv, VMUL(LDK(KP150383733), Tw)); + TD = VADD(TB, TC); + Tu = VFMA(LDK(KP766044443), Ts, VMUL(LDK(KP556670399), Tt)); + Tx = VFMA(LDK(KP173648177), Tv, VMUL(LDK(KP852868531), Tw)); + Ty = VADD(Tu, Tx); + Tr = VFNMS(LDK(KP500000000), T6, T1); + TH = VFMA(LDK(KP173648177), Ts, VFNMS(LDK(KP296198132), Tw, VFNMS(LDK(KP939692620), Tv, VFNMS(LDK(KP852868531), Tt, Tr)))); + ST(&(x[WS(rs, 7)]), VSUB(TH, TI), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VADD(TH, TI), ms, &(x[0])); + { + V Tz, TE, TF, TG; + Tz = VADD(Tr, Ty); + TE = VBYI(VADD(TA, TD)); + ST(&(x[WS(rs, 8)]), VSUB(Tz, TE), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VADD(TE, Tz), ms, &(x[WS(rs, 1)])); + TF = VFMA(LDK(KP866025403), VSUB(TB, TC), VFNMS(LDK(KP500000000), Ty, Tr)); + TG = VBYI(VADD(TA, VFNMS(LDK(KP500000000), TD, VMUL(LDK(KP866025403), VSUB(Tx, Tu))))); + ST(&(x[WS(rs, 5)]), VSUB(TF, TG), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VADD(TF, TG), ms, &(x[0])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 9, XSIMD_STRING("t1fuv_9"), twinstr, &GENUS, {38, 26, 16, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fuv_9) (planner *p) { + X(kdft_dit_register) (p, t1fuv_9, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:15 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name t1fv_10 -include t1f.h */ + +/* + * This function contains 51 FP additions, 40 FP multiplications, + * (or, 33 additions, 22 multiplications, 18 fused multiply/add), + * 43 stack variables, 4 constants, and 20 memory accesses + */ +#include "t1f.h" + +static void t1fv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 18)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(10, rs)) { + V Td, TA, T4, Ta, Tk, TE, Tp, TF, TB, T9, T1, T2, Tb; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V Tg, Tn, Ti, Tl; + Tg = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tn = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Ti = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tl = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + { + V T6, T8, T5, Tc; + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + { + V T3, Th, To, Tj, Tm, T7; + T7 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[TWVL * 8]), T2); + Th = BYTWJ(&(W[TWVL * 6]), Tg); + To = BYTWJ(&(W[0]), Tn); + Tj = BYTWJ(&(W[TWVL * 16]), Ti); + Tm = BYTWJ(&(W[TWVL * 10]), Tl); + T6 = BYTWJ(&(W[TWVL * 2]), T5); + Td = BYTWJ(&(W[TWVL * 4]), Tc); + T8 = BYTWJ(&(W[TWVL * 12]), T7); + TA = VADD(T1, T3); + T4 = VSUB(T1, T3); + Ta = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Tk = VSUB(Th, Tj); + TE = VADD(Th, Tj); + Tp = VSUB(Tm, To); + TF = VADD(Tm, To); + } + TB = VADD(T6, T8); + T9 = VSUB(T6, T8); + } + } + Tb = BYTWJ(&(W[TWVL * 14]), Ta); + { + V TL, TG, Tw, Tq, TC, Te; + TL = VSUB(TE, TF); + TG = VADD(TE, TF); + Tw = VSUB(Tk, Tp); + Tq = VADD(Tk, Tp); + TC = VADD(Tb, Td); + Te = VSUB(Tb, Td); + { + V TM, TD, Tv, Tf; + TM = VSUB(TB, TC); + TD = VADD(TB, TC); + Tv = VSUB(T9, Te); + Tf = VADD(T9, Te); + { + V TP, TN, TH, TJ, Tz, Tx, Tr, Tt, TI, Ts; + TP = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TL, TM)); + TN = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TM, TL)); + TH = VADD(TD, TG); + TJ = VSUB(TD, TG); + Tz = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tv, Tw)); + Tx = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tw, Tv)); + Tr = VADD(Tf, Tq); + Tt = VSUB(Tf, Tq); + ST(&(x[0]), VADD(TA, TH), ms, &(x[0])); + TI = VFNMS(LDK(KP250000000), TH, TA); + ST(&(x[WS(rs, 5)]), VADD(T4, Tr), ms, &(x[WS(rs, 1)])); + Ts = VFNMS(LDK(KP250000000), Tr, T4); + { + V TK, TO, Tu, Ty; + TK = VFNMS(LDK(KP559016994), TJ, TI); + TO = VFMA(LDK(KP559016994), TJ, TI); + Tu = VFMA(LDK(KP559016994), Tt, Ts); + Ty = VFNMS(LDK(KP559016994), Tt, Ts); + ST(&(x[WS(rs, 8)]), VFNMSI(TN, TK), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(TN, TK), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(TP, TO), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(TP, TO), ms, &(x[0])); + ST(&(x[WS(rs, 9)]), VFMAI(Tx, Tu), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(Tx, Tu), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(Tz, Ty), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(Tz, Ty), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 10, XSIMD_STRING("t1fv_10"), twinstr, &GENUS, {33, 22, 18, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_10) (planner *p) { + X(kdft_dit_register) (p, t1fv_10, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name t1fv_10 -include t1f.h */ + +/* + * This function contains 51 FP additions, 30 FP multiplications, + * (or, 45 additions, 24 multiplications, 6 fused multiply/add), + * 32 stack variables, 4 constants, and 20 memory accesses + */ +#include "t1f.h" + +static void t1fv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 18)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(10, rs)) { + V Tr, TH, Tg, Tl, Tm, TA, TB, TJ, T5, Ta, Tb, TD, TE, TI, To; + V Tq, Tp; + To = LD(&(x[0]), ms, &(x[0])); + Tp = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tq = BYTWJ(&(W[TWVL * 8]), Tp); + Tr = VSUB(To, Tq); + TH = VADD(To, Tq); + { + V Td, Tk, Tf, Ti; + { + V Tc, Tj, Te, Th; + Tc = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Td = BYTWJ(&(W[TWVL * 6]), Tc); + Tj = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Tk = BYTWJ(&(W[0]), Tj); + Te = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tf = BYTWJ(&(W[TWVL * 16]), Te); + Th = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Ti = BYTWJ(&(W[TWVL * 10]), Th); + } + Tg = VSUB(Td, Tf); + Tl = VSUB(Ti, Tk); + Tm = VADD(Tg, Tl); + TA = VADD(Td, Tf); + TB = VADD(Ti, Tk); + TJ = VADD(TA, TB); + } + { + V T2, T9, T4, T7; + { + V T1, T8, T3, T6; + T1 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T2 = BYTWJ(&(W[TWVL * 2]), T1); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = BYTWJ(&(W[TWVL * 4]), T8); + T3 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T4 = BYTWJ(&(W[TWVL * 12]), T3); + T6 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T7 = BYTWJ(&(W[TWVL * 14]), T6); + } + T5 = VSUB(T2, T4); + Ta = VSUB(T7, T9); + Tb = VADD(T5, Ta); + TD = VADD(T2, T4); + TE = VADD(T7, T9); + TI = VADD(TD, TE); + } + { + V Tn, Ts, Tt, Tx, Tz, Tv, Tw, Ty, Tu; + Tn = VMUL(LDK(KP559016994), VSUB(Tb, Tm)); + Ts = VADD(Tb, Tm); + Tt = VFNMS(LDK(KP250000000), Ts, Tr); + Tv = VSUB(T5, Ta); + Tw = VSUB(Tg, Tl); + Tx = VBYI(VFMA(LDK(KP951056516), Tv, VMUL(LDK(KP587785252), Tw))); + Tz = VBYI(VFNMS(LDK(KP587785252), Tv, VMUL(LDK(KP951056516), Tw))); + ST(&(x[WS(rs, 5)]), VADD(Tr, Ts), ms, &(x[WS(rs, 1)])); + Ty = VSUB(Tt, Tn); + ST(&(x[WS(rs, 3)]), VSUB(Ty, Tz), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(Tz, Ty), ms, &(x[WS(rs, 1)])); + Tu = VADD(Tn, Tt); + ST(&(x[WS(rs, 1)]), VSUB(Tu, Tx), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VADD(Tx, Tu), ms, &(x[WS(rs, 1)])); + } + { + V TM, TK, TL, TG, TO, TC, TF, TP, TN; + TM = VMUL(LDK(KP559016994), VSUB(TI, TJ)); + TK = VADD(TI, TJ); + TL = VFNMS(LDK(KP250000000), TK, TH); + TC = VSUB(TA, TB); + TF = VSUB(TD, TE); + TG = VBYI(VFNMS(LDK(KP587785252), TF, VMUL(LDK(KP951056516), TC))); + TO = VBYI(VFMA(LDK(KP951056516), TF, VMUL(LDK(KP587785252), TC))); + ST(&(x[0]), VADD(TH, TK), ms, &(x[0])); + TP = VADD(TM, TL); + ST(&(x[WS(rs, 4)]), VADD(TO, TP), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VSUB(TP, TO), ms, &(x[0])); + TN = VSUB(TL, TM); + ST(&(x[WS(rs, 2)]), VADD(TG, TN), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VSUB(TN, TG), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 10, XSIMD_STRING("t1fv_10"), twinstr, &GENUS, {45, 24, 6, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_10) (planner *p) { + X(kdft_dit_register) (p, t1fv_10, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,315 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:15 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 12 -name t1fv_12 -include t1f.h */ + +/* + * This function contains 59 FP additions, 42 FP multiplications, + * (or, 41 additions, 24 multiplications, 18 fused multiply/add), + * 41 stack variables, 2 constants, and 24 memory accesses + */ +#include "t1f.h" + +static void t1fv_12(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 22)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 22), MAKE_VOLATILE_STRIDE(12, rs)) { + V Tq, Ti, T7, TQ, Tu, TA, TU, Tk, TR, Tf, TE, TM; + { + V T9, TC, Tj, TD, Te; + { + V T1, T4, T2, Tm, Tx, To; + T1 = LD(&(x[0]), ms, &(x[0])); + T4 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tm = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Tx = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + To = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V T5, T3, Tn, Ty, Tp, Td, Tb, T8, Tc, Ta; + T8 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Ta = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T5 = BYTWJ(&(W[TWVL * 14]), T4); + T3 = BYTWJ(&(W[TWVL * 6]), T2); + Tn = BYTWJ(&(W[0]), Tm); + Ty = BYTWJ(&(W[TWVL * 16]), Tx); + Tp = BYTWJ(&(W[TWVL * 8]), To); + T9 = BYTWJ(&(W[TWVL * 10]), T8); + Td = BYTWJ(&(W[TWVL * 2]), Tc); + Tb = BYTWJ(&(W[TWVL * 18]), Ta); + { + V Th, T6, Tt, Tz; + Th = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + TC = VSUB(T5, T3); + T6 = VADD(T3, T5); + Tt = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Tz = VADD(Tn, Tp); + Tq = VSUB(Tn, Tp); + Tj = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + TD = VSUB(Td, Tb); + Te = VADD(Tb, Td); + Ti = BYTWJ(&(W[TWVL * 20]), Th); + T7 = VFNMS(LDK(KP500000000), T6, T1); + TQ = VADD(T1, T6); + Tu = BYTWJ(&(W[TWVL * 4]), Tt); + TA = VFNMS(LDK(KP500000000), Tz, Ty); + TU = VADD(Ty, Tz); + } + } + } + Tk = BYTWJ(&(W[TWVL * 12]), Tj); + TR = VADD(T9, Te); + Tf = VFNMS(LDK(KP500000000), Te, T9); + TE = VSUB(TC, TD); + TM = VADD(TC, TD); + } + { + V Tv, Tl, TI, Tg, TW, TS; + Tv = VADD(Tk, Ti); + Tl = VSUB(Ti, Tk); + TI = VADD(T7, Tf); + Tg = VSUB(T7, Tf); + TW = VADD(TQ, TR); + TS = VSUB(TQ, TR); + { + V TT, Tw, TL, Tr; + TT = VADD(Tu, Tv); + Tw = VFNMS(LDK(KP500000000), Tv, Tu); + TL = VSUB(Tl, Tq); + Tr = VADD(Tl, Tq); + { + V TP, TN, TG, Ts, TO, TK, TH, TF; + { + V TX, TV, TJ, TB; + TX = VADD(TT, TU); + TV = VSUB(TT, TU); + TJ = VADD(Tw, TA); + TB = VSUB(Tw, TA); + TP = VMUL(LDK(KP866025403), VADD(TM, TL)); + TN = VMUL(LDK(KP866025403), VSUB(TL, TM)); + TG = VFNMS(LDK(KP866025403), Tr, Tg); + Ts = VFMA(LDK(KP866025403), Tr, Tg); + ST(&(x[WS(rs, 6)]), VSUB(TW, TX), ms, &(x[0])); + ST(&(x[0]), VADD(TW, TX), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(TV, TS), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFNMSI(TV, TS), ms, &(x[WS(rs, 1)])); + TO = VADD(TI, TJ); + TK = VSUB(TI, TJ); + TH = VFMA(LDK(KP866025403), TE, TB); + TF = VFNMS(LDK(KP866025403), TE, TB); + } + ST(&(x[WS(rs, 4)]), VFMAI(TP, TO), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFNMSI(TP, TO), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFNMSI(TN, TK), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(TN, TK), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFNMSI(TH, TG), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(TH, TG), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFMAI(TF, Ts), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(TF, Ts), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 12, XSIMD_STRING("t1fv_12"), twinstr, &GENUS, {41, 24, 18, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_12) (planner *p) { + X(kdft_dit_register) (p, t1fv_12, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 12 -name t1fv_12 -include t1f.h */ + +/* + * This function contains 59 FP additions, 30 FP multiplications, + * (or, 55 additions, 26 multiplications, 4 fused multiply/add), + * 28 stack variables, 2 constants, and 24 memory accesses + */ +#include "t1f.h" + +static void t1fv_12(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 22)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 22), MAKE_VOLATILE_STRIDE(12, rs)) { + V T1, TH, T6, TA, Tq, TE, Tv, TL, T9, TI, Te, TB, Ti, TD, Tn; + V TK; + { + V T5, T3, T4, T2; + T1 = LD(&(x[0]), ms, &(x[0])); + T4 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T5 = BYTWJ(&(W[TWVL * 14]), T4); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T3 = BYTWJ(&(W[TWVL * 6]), T2); + TH = VSUB(T5, T3); + T6 = VADD(T3, T5); + TA = VFNMS(LDK(KP500000000), T6, T1); + } + { + V Tu, Ts, Tp, Tt, Tr; + Tp = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tq = BYTWJ(&(W[TWVL * 16]), Tp); + Tt = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tu = BYTWJ(&(W[TWVL * 8]), Tt); + Tr = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Ts = BYTWJ(&(W[0]), Tr); + TE = VSUB(Tu, Ts); + Tv = VADD(Ts, Tu); + TL = VFNMS(LDK(KP500000000), Tv, Tq); + } + { + V Td, Tb, T8, Tc, Ta; + T8 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T9 = BYTWJ(&(W[TWVL * 10]), T8); + Tc = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Td = BYTWJ(&(W[TWVL * 2]), Tc); + Ta = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + Tb = BYTWJ(&(W[TWVL * 18]), Ta); + TI = VSUB(Td, Tb); + Te = VADD(Tb, Td); + TB = VFNMS(LDK(KP500000000), Te, T9); + } + { + V Tm, Tk, Th, Tl, Tj; + Th = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Ti = BYTWJ(&(W[TWVL * 4]), Th); + Tl = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tm = BYTWJ(&(W[TWVL * 20]), Tl); + Tj = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Tk = BYTWJ(&(W[TWVL * 12]), Tj); + TD = VSUB(Tm, Tk); + Tn = VADD(Tk, Tm); + TK = VFNMS(LDK(KP500000000), Tn, Ti); + } + { + V Tg, Ty, Tx, Tz; + { + V T7, Tf, To, Tw; + T7 = VADD(T1, T6); + Tf = VADD(T9, Te); + Tg = VSUB(T7, Tf); + Ty = VADD(T7, Tf); + To = VADD(Ti, Tn); + Tw = VADD(Tq, Tv); + Tx = VBYI(VSUB(To, Tw)); + Tz = VADD(To, Tw); + } + ST(&(x[WS(rs, 9)]), VSUB(Tg, Tx), ms, &(x[WS(rs, 1)])); + ST(&(x[0]), VADD(Ty, Tz), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VADD(Tg, Tx), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VSUB(Ty, Tz), ms, &(x[0])); + } + { + V TS, TW, TV, TX; + { + V TQ, TR, TT, TU; + TQ = VADD(TA, TB); + TR = VADD(TK, TL); + TS = VSUB(TQ, TR); + TW = VADD(TQ, TR); + TT = VADD(TD, TE); + TU = VADD(TH, TI); + TV = VBYI(VMUL(LDK(KP866025403), VSUB(TT, TU))); + TX = VBYI(VMUL(LDK(KP866025403), VADD(TU, TT))); + } + ST(&(x[WS(rs, 10)]), VSUB(TS, TV), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(TW, TX), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(TS, TV), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VSUB(TW, TX), ms, &(x[0])); + } + { + V TG, TP, TN, TO; + { + V TC, TF, TJ, TM; + TC = VSUB(TA, TB); + TF = VMUL(LDK(KP866025403), VSUB(TD, TE)); + TG = VSUB(TC, TF); + TP = VADD(TC, TF); + TJ = VMUL(LDK(KP866025403), VSUB(TH, TI)); + TM = VSUB(TK, TL); + TN = VBYI(VADD(TJ, TM)); + TO = VBYI(VSUB(TJ, TM)); + } + ST(&(x[WS(rs, 5)]), VSUB(TG, TN), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VSUB(TP, TO), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(TN, TG), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(TO, TP), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 12, XSIMD_STRING("t1fv_12"), twinstr, &GENUS, {55, 26, 4, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_12) (planner *p) { + X(kdft_dit_register) (p, t1fv_12, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:15 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 15 -name t1fv_15 -include t1f.h */ + +/* + * This function contains 92 FP additions, 77 FP multiplications, + * (or, 50 additions, 35 multiplications, 42 fused multiply/add), + * 81 stack variables, 8 constants, and 30 memory accesses + */ +#include "t1f.h" + +static void t1fv_15(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP823639103, +0.823639103546331925877420039278190003029660514); + DVK(KP910592997, +0.910592997310029334643087372129977886038870291); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 28)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 28), MAKE_VOLATILE_STRIDE(15, rs)) { + V Tq, Ty, Th, T1b, T10, Ts, TP, T7, Tu, TA, TC, Tj, Tk, TQ, Tf; + { + V T1, T4, T2, T9, Te; + T1 = LD(&(x[0]), ms, &(x[0])); + T4 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V T8, Tp, Tx, Tg; + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Tp = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tx = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tg = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + { + V Tb, Td, Tr, T6, Tt, Tz, TB, Ti; + { + V T5, T3, Ta, Tc; + Ta = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T5 = BYTWJ(&(W[TWVL * 18]), T4); + T3 = BYTWJ(&(W[TWVL * 8]), T2); + T9 = BYTWJ(&(W[TWVL * 4]), T8); + Tq = BYTWJ(&(W[TWVL * 10]), Tp); + Ty = BYTWJ(&(W[TWVL * 16]), Tx); + Th = BYTWJ(&(W[TWVL * 22]), Tg); + Tb = BYTWJ(&(W[TWVL * 14]), Ta); + Td = BYTWJ(&(W[TWVL * 24]), Tc); + Tr = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T1b = VSUB(T5, T3); + T6 = VADD(T3, T5); + Tt = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + } + Tz = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TB = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Ti = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Te = VADD(Tb, Td); + T10 = VSUB(Td, Tb); + Ts = BYTWJ(&(W[TWVL * 20]), Tr); + TP = VFNMS(LDK(KP500000000), T6, T1); + T7 = VADD(T1, T6); + Tu = BYTWJ(&(W[0]), Tt); + TA = BYTWJ(&(W[TWVL * 26]), Tz); + TC = BYTWJ(&(W[TWVL * 6]), TB); + Tj = BYTWJ(&(W[TWVL * 2]), Ti); + Tk = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + } + } + TQ = VFNMS(LDK(KP500000000), Te, T9); + Tf = VADD(T9, Te); + } + { + V Tv, T13, TD, T14, Tl; + Tv = VADD(Ts, Tu); + T13 = VSUB(Tu, Ts); + TD = VADD(TA, TC); + T14 = VSUB(TC, TA); + Tl = BYTWJ(&(W[TWVL * 12]), Tk); + { + V TT, Tw, T1d, T15, TU, TE, T11, Tm; + TT = VFNMS(LDK(KP500000000), Tv, Tq); + Tw = VADD(Tq, Tv); + T1d = VADD(T13, T14); + T15 = VSUB(T13, T14); + TU = VFNMS(LDK(KP500000000), TD, Ty); + TE = VADD(Ty, TD); + T11 = VSUB(Tl, Tj); + Tm = VADD(Tj, Tl); + { + V T19, TV, TK, TF, T1c, T12, TR, Tn; + T19 = VSUB(TT, TU); + TV = VADD(TT, TU); + TK = VSUB(Tw, TE); + TF = VADD(Tw, TE); + T1c = VADD(T10, T11); + T12 = VSUB(T10, T11); + TR = VFNMS(LDK(KP500000000), Tm, Th); + Tn = VADD(Th, Tm); + { + V T1g, T1e, T1m, T16, T18, TS, TL, To, T1f, T1u; + T1g = VSUB(T1c, T1d); + T1e = VADD(T1c, T1d); + T1m = VFNMS(LDK(KP618033988), T12, T15); + T16 = VFMA(LDK(KP618033988), T15, T12); + T18 = VSUB(TQ, TR); + TS = VADD(TQ, TR); + TL = VSUB(Tf, Tn); + To = VADD(Tf, Tn); + T1f = VFNMS(LDK(KP250000000), T1e, T1b); + T1u = VMUL(LDK(KP866025403), VADD(T1b, T1e)); + { + V T1o, T1a, TY, TO, TM, TG, TI, T1p, T1h, T1t, TX, TW; + T1o = VFNMS(LDK(KP618033988), T18, T19); + T1a = VFMA(LDK(KP618033988), T19, T18); + TW = VADD(TS, TV); + TY = VSUB(TS, TV); + TO = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TK, TL)); + TM = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TL, TK)); + TG = VADD(To, TF); + TI = VSUB(To, TF); + T1p = VFNMS(LDK(KP559016994), T1g, T1f); + T1h = VFMA(LDK(KP559016994), T1g, T1f); + T1t = VADD(TP, TW); + TX = VFNMS(LDK(KP250000000), TW, TP); + { + V T1q, T1s, T1k, T1i, T1l, TZ, TJ, TN, TH; + ST(&(x[0]), VADD(T7, TG), ms, &(x[0])); + TH = VFNMS(LDK(KP250000000), TG, T7); + T1q = VMUL(LDK(KP951056516), VFNMS(LDK(KP910592997), T1p, T1o)); + T1s = VMUL(LDK(KP951056516), VFMA(LDK(KP910592997), T1p, T1o)); + T1k = VMUL(LDK(KP951056516), VFMA(LDK(KP910592997), T1h, T1a)); + T1i = VMUL(LDK(KP951056516), VFNMS(LDK(KP910592997), T1h, T1a)); + ST(&(x[WS(rs, 10)]), VFMAI(T1u, T1t), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFNMSI(T1u, T1t), ms, &(x[WS(rs, 1)])); + T1l = VFNMS(LDK(KP559016994), TY, TX); + TZ = VFMA(LDK(KP559016994), TY, TX); + TJ = VFNMS(LDK(KP559016994), TI, TH); + TN = VFMA(LDK(KP559016994), TI, TH); + { + V T1n, T1r, T1j, T17; + T1n = VFMA(LDK(KP823639103), T1m, T1l); + T1r = VFNMS(LDK(KP823639103), T1m, T1l); + T1j = VFNMS(LDK(KP823639103), T16, TZ); + T17 = VFMA(LDK(KP823639103), T16, TZ); + ST(&(x[WS(rs, 12)]), VFMAI(TM, TJ), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFNMSI(TM, TJ), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFMAI(TO, TN), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VFNMSI(TO, TN), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(T1q, T1n), ms, &(x[0])); + ST(&(x[WS(rs, 13)]), VFNMSI(T1q, T1n), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(T1s, T1r), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 8)]), VFNMSI(T1s, T1r), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(T1k, T1j), ms, &(x[0])); + ST(&(x[WS(rs, 11)]), VFNMSI(T1k, T1j), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 14)]), VFMAI(T1i, T17), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFNMSI(T1i, T17), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 15, XSIMD_STRING("t1fv_15"), twinstr, &GENUS, {50, 35, 42, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_15) (planner *p) { + X(kdft_dit_register) (p, t1fv_15, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 15 -name t1fv_15 -include t1f.h */ + +/* + * This function contains 92 FP additions, 53 FP multiplications, + * (or, 78 additions, 39 multiplications, 14 fused multiply/add), + * 52 stack variables, 10 constants, and 30 memory accesses + */ +#include "t1f.h" + +static void t1fv_15(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP216506350, +0.216506350946109661690930792688234045867850657); + DVK(KP484122918, +0.484122918275927110647408174972799951354115213); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP509036960, +0.509036960455127183450980863393907648510733164); + DVK(KP823639103, +0.823639103546331925877420039278190003029660514); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 28)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 28), MAKE_VOLATILE_STRIDE(15, rs)) { + V T1e, T7, TP, T12, T15, Tf, Tn, To, T1b, T1c, T1f, TQ, TR, TS, Tw; + V TE, TF, TT, TU, TV; + { + V T1, T5, T3, T4, T2, T6; + T1 = LD(&(x[0]), ms, &(x[0])); + T4 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T5 = BYTWJ(&(W[TWVL * 18]), T4); + T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[TWVL * 8]), T2); + T1e = VSUB(T5, T3); + T6 = VADD(T3, T5); + T7 = VADD(T1, T6); + TP = VFNMS(LDK(KP500000000), T6, T1); + } + { + V T9, Tq, Ty, Th, Te, T13, Tv, T10, TD, T11, Tm, T14; + { + V T8, Tp, Tx, Tg; + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = BYTWJ(&(W[TWVL * 4]), T8); + Tp = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tq = BYTWJ(&(W[TWVL * 10]), Tp); + Tx = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Ty = BYTWJ(&(W[TWVL * 16]), Tx); + Tg = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Th = BYTWJ(&(W[TWVL * 22]), Tg); + } + { + V Tb, Td, Ta, Tc; + Ta = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Tb = BYTWJ(&(W[TWVL * 14]), Ta); + Tc = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + Td = BYTWJ(&(W[TWVL * 24]), Tc); + Te = VADD(Tb, Td); + T13 = VSUB(Td, Tb); + } + { + V Ts, Tu, Tr, Tt; + Tr = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Ts = BYTWJ(&(W[TWVL * 20]), Tr); + Tt = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Tu = BYTWJ(&(W[0]), Tt); + Tv = VADD(Ts, Tu); + T10 = VSUB(Tu, Ts); + } + { + V TA, TC, Tz, TB; + Tz = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TA = BYTWJ(&(W[TWVL * 26]), Tz); + TB = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + TC = BYTWJ(&(W[TWVL * 6]), TB); + TD = VADD(TA, TC); + T11 = VSUB(TC, TA); + } + { + V Tj, Tl, Ti, Tk; + Ti = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tj = BYTWJ(&(W[TWVL * 2]), Ti); + Tk = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Tl = BYTWJ(&(W[TWVL * 12]), Tk); + Tm = VADD(Tj, Tl); + T14 = VSUB(Tl, Tj); + } + T12 = VSUB(T10, T11); + T15 = VSUB(T13, T14); + Tf = VADD(T9, Te); + Tn = VADD(Th, Tm); + To = VADD(Tf, Tn); + T1b = VADD(T13, T14); + T1c = VADD(T10, T11); + T1f = VADD(T1b, T1c); + TQ = VFNMS(LDK(KP500000000), Te, T9); + TR = VFNMS(LDK(KP500000000), Tm, Th); + TS = VADD(TQ, TR); + Tw = VADD(Tq, Tv); + TE = VADD(Ty, TD); + TF = VADD(Tw, TE); + TT = VFNMS(LDK(KP500000000), Tv, Tq); + TU = VFNMS(LDK(KP500000000), TD, Ty); + TV = VADD(TT, TU); + } + { + V TI, TG, TH, TM, TO, TK, TL, TN, TJ; + TI = VMUL(LDK(KP559016994), VSUB(To, TF)); + TG = VADD(To, TF); + TH = VFNMS(LDK(KP250000000), TG, T7); + TK = VSUB(Tw, TE); + TL = VSUB(Tf, Tn); + TM = VBYI(VFNMS(LDK(KP587785252), TL, VMUL(LDK(KP951056516), TK))); + TO = VBYI(VFMA(LDK(KP951056516), TL, VMUL(LDK(KP587785252), TK))); + ST(&(x[0]), VADD(T7, TG), ms, &(x[0])); + TN = VADD(TI, TH); + ST(&(x[WS(rs, 6)]), VSUB(TN, TO), ms, &(x[0])); + ST(&(x[WS(rs, 9)]), VADD(TO, TN), ms, &(x[WS(rs, 1)])); + TJ = VSUB(TH, TI); + ST(&(x[WS(rs, 3)]), VSUB(TJ, TM), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 12)]), VADD(TM, TJ), ms, &(x[0])); + } + { + V T16, T1m, T1u, T1h, T1o, T1a, T1p, TZ, T1t, T1l, T1d, T1g; + T16 = VFNMS(LDK(KP509036960), T15, VMUL(LDK(KP823639103), T12)); + T1m = VFMA(LDK(KP823639103), T15, VMUL(LDK(KP509036960), T12)); + T1u = VBYI(VMUL(LDK(KP866025403), VADD(T1e, T1f))); + T1d = VMUL(LDK(KP484122918), VSUB(T1b, T1c)); + T1g = VFNMS(LDK(KP216506350), T1f, VMUL(LDK(KP866025403), T1e)); + T1h = VSUB(T1d, T1g); + T1o = VADD(T1d, T1g); + { + V T18, T19, TY, TW, TX; + T18 = VSUB(TT, TU); + T19 = VSUB(TQ, TR); + T1a = VFNMS(LDK(KP587785252), T19, VMUL(LDK(KP951056516), T18)); + T1p = VFMA(LDK(KP951056516), T19, VMUL(LDK(KP587785252), T18)); + TY = VMUL(LDK(KP559016994), VSUB(TS, TV)); + TW = VADD(TS, TV); + TX = VFNMS(LDK(KP250000000), TW, TP); + TZ = VSUB(TX, TY); + T1t = VADD(TP, TW); + T1l = VADD(TY, TX); + } + { + V T17, T1i, T1r, T1s; + ST(&(x[WS(rs, 5)]), VSUB(T1t, T1u), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 10)]), VADD(T1t, T1u), ms, &(x[0])); + T17 = VSUB(TZ, T16); + T1i = VBYI(VSUB(T1a, T1h)); + ST(&(x[WS(rs, 8)]), VSUB(T17, T1i), ms, &(x[0])); + ST(&(x[WS(rs, 7)]), VADD(T17, T1i), ms, &(x[WS(rs, 1)])); + T1r = VSUB(T1l, T1m); + T1s = VBYI(VADD(T1p, T1o)); + ST(&(x[WS(rs, 11)]), VSUB(T1r, T1s), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VADD(T1r, T1s), ms, &(x[0])); + { + V T1n, T1q, T1j, T1k; + T1n = VADD(T1l, T1m); + T1q = VBYI(VSUB(T1o, T1p)); + ST(&(x[WS(rs, 14)]), VSUB(T1n, T1q), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VADD(T1n, T1q), ms, &(x[WS(rs, 1)])); + T1j = VADD(TZ, T16); + T1k = VBYI(VADD(T1a, T1h)); + ST(&(x[WS(rs, 13)]), VSUB(T1j, T1k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VADD(T1j, T1k), ms, &(x[0])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 15, XSIMD_STRING("t1fv_15"), twinstr, &GENUS, {78, 39, 14, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_15) (planner *p) { + X(kdft_dit_register) (p, t1fv_15, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,418 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:15 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name t1fv_16 -include t1f.h */ + +/* + * This function contains 87 FP additions, 64 FP multiplications, + * (or, 53 additions, 30 multiplications, 34 fused multiply/add), + * 61 stack variables, 3 constants, and 32 memory accesses + */ +#include "t1f.h" + +static void t1fv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 30)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 30), MAKE_VOLATILE_STRIDE(16, rs)) { + V TO, Ta, TJ, TP, T14, Tq, T1i, T10, T1b, T1l, T13, T1c, TR, Tl, T15; + V Tv; + { + V Tc, TW, T4, T19, T9, TD, TI, Tj, TZ, T1a, Te, Th, Tn, Tr, Tu; + V Tp; + { + V T1, T2, T5, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + { + V Tz, TG, TB, TE; + Tz = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TG = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TB = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + TE = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V Ti, TY, TX, Td, Tg, Tm, Tt, To; + { + V T3, T6, T8, TA, TH, TC, TF, Tb; + Tb = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[TWVL * 14]), T2); + T6 = BYTWJ(&(W[TWVL * 6]), T5); + T8 = BYTWJ(&(W[TWVL * 22]), T7); + TA = BYTWJ(&(W[TWVL * 26]), Tz); + TH = BYTWJ(&(W[TWVL * 18]), TG); + TC = BYTWJ(&(W[TWVL * 10]), TB); + TF = BYTWJ(&(W[TWVL * 2]), TE); + Tc = BYTWJ(&(W[0]), Tb); + TW = VSUB(T1, T3); + T4 = VADD(T1, T3); + T19 = VSUB(T6, T8); + T9 = VADD(T6, T8); + Ti = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TD = VADD(TA, TC); + TY = VSUB(TA, TC); + TI = VADD(TF, TH); + TX = VSUB(TF, TH); + } + Td = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tg = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tm = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + Tj = BYTWJ(&(W[TWVL * 24]), Ti); + Tt = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + To = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + TZ = VADD(TX, TY); + T1a = VSUB(TY, TX); + Te = BYTWJ(&(W[TWVL * 16]), Td); + Th = BYTWJ(&(W[TWVL * 8]), Tg); + Tn = BYTWJ(&(W[TWVL * 28]), Tm); + Tr = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Tu = BYTWJ(&(W[TWVL * 20]), Tt); + Tp = BYTWJ(&(W[TWVL * 12]), To); + } + } + } + { + V Tf, T11, Tk, T12, Ts; + TO = VADD(T4, T9); + Ta = VSUB(T4, T9); + TJ = VSUB(TD, TI); + TP = VADD(TI, TD); + Tf = VADD(Tc, Te); + T11 = VSUB(Tc, Te); + Tk = VADD(Th, Tj); + T12 = VSUB(Th, Tj); + Ts = BYTWJ(&(W[TWVL * 4]), Tr); + T14 = VSUB(Tn, Tp); + Tq = VADD(Tn, Tp); + T1i = VFNMS(LDK(KP707106781), TZ, TW); + T10 = VFMA(LDK(KP707106781), TZ, TW); + T1b = VFNMS(LDK(KP707106781), T1a, T19); + T1l = VFMA(LDK(KP707106781), T1a, T19); + T13 = VFNMS(LDK(KP414213562), T12, T11); + T1c = VFMA(LDK(KP414213562), T11, T12); + TR = VADD(Tf, Tk); + Tl = VSUB(Tf, Tk); + T15 = VSUB(Tu, Ts); + Tv = VADD(Ts, Tu); + } + } + { + V T1d, T16, TS, Tw, TU, TQ; + T1d = VFMA(LDK(KP414213562), T14, T15); + T16 = VFNMS(LDK(KP414213562), T15, T14); + TS = VADD(Tq, Tv); + Tw = VSUB(Tq, Tv); + TU = VSUB(TO, TP); + TQ = VADD(TO, TP); + { + V T1e, T1j, T17, T1m; + T1e = VSUB(T1c, T1d); + T1j = VADD(T1c, T1d); + T17 = VADD(T13, T16); + T1m = VSUB(T16, T13); + { + V TV, TT, TK, Tx; + TV = VSUB(TS, TR); + TT = VADD(TR, TS); + TK = VSUB(Tw, Tl); + Tx = VADD(Tl, Tw); + { + V T1h, T1f, T1o, T1k; + T1h = VFMA(LDK(KP923879532), T1e, T1b); + T1f = VFNMS(LDK(KP923879532), T1e, T1b); + T1o = VFMA(LDK(KP923879532), T1j, T1i); + T1k = VFNMS(LDK(KP923879532), T1j, T1i); + { + V T1g, T18, T1p, T1n; + T1g = VFMA(LDK(KP923879532), T17, T10); + T18 = VFNMS(LDK(KP923879532), T17, T10); + T1p = VFMA(LDK(KP923879532), T1m, T1l); + T1n = VFNMS(LDK(KP923879532), T1m, T1l); + ST(&(x[WS(rs, 12)]), VFNMSI(TV, TU), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(TV, TU), ms, &(x[0])); + ST(&(x[0]), VADD(TQ, TT), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VSUB(TQ, TT), ms, &(x[0])); + { + V TN, TL, TM, Ty; + TN = VFMA(LDK(KP707106781), TK, TJ); + TL = VFNMS(LDK(KP707106781), TK, TJ); + TM = VFMA(LDK(KP707106781), Tx, Ta); + Ty = VFNMS(LDK(KP707106781), Tx, Ta); + ST(&(x[WS(rs, 1)]), VFNMSI(T1h, T1g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFMAI(T1h, T1g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(T1f, T18), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFNMSI(T1f, T18), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFMAI(T1p, T1o), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFNMSI(T1p, T1o), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFMAI(T1n, T1k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFNMSI(T1n, T1k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 14)]), VFNMSI(TN, TM), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(TN, TM), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFMAI(TL, Ty), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(TL, Ty), ms, &(x[0])); + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 16, XSIMD_STRING("t1fv_16"), twinstr, &GENUS, {53, 30, 34, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_16) (planner *p) { + X(kdft_dit_register) (p, t1fv_16, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name t1fv_16 -include t1f.h */ + +/* + * This function contains 87 FP additions, 42 FP multiplications, + * (or, 83 additions, 38 multiplications, 4 fused multiply/add), + * 36 stack variables, 3 constants, and 32 memory accesses + */ +#include "t1f.h" + +static void t1fv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 30)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 30), MAKE_VOLATILE_STRIDE(16, rs)) { + V TJ, T10, TD, T11, T1b, T1c, Ty, TK, T16, T17, T18, Tb, TN, T13, T14; + V T15, Tm, TM, TG, TI, TH; + TG = LD(&(x[0]), ms, &(x[0])); + TH = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + TI = BYTWJ(&(W[TWVL * 14]), TH); + TJ = VSUB(TG, TI); + T10 = VADD(TG, TI); + { + V TA, TC, Tz, TB; + Tz = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + TA = BYTWJ(&(W[TWVL * 6]), Tz); + TB = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + TC = BYTWJ(&(W[TWVL * 22]), TB); + TD = VSUB(TA, TC); + T11 = VADD(TA, TC); + } + { + V Tp, Tw, Tr, Tu, Ts, Tx; + { + V To, Tv, Tq, Tt; + To = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + Tp = BYTWJ(&(W[TWVL * 26]), To); + Tv = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + Tw = BYTWJ(&(W[TWVL * 18]), Tv); + Tq = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tr = BYTWJ(&(W[TWVL * 10]), Tq); + Tt = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tu = BYTWJ(&(W[TWVL * 2]), Tt); + } + T1b = VADD(Tp, Tr); + T1c = VADD(Tu, Tw); + Ts = VSUB(Tp, Tr); + Tx = VSUB(Tu, Tw); + Ty = VMUL(LDK(KP707106781), VSUB(Ts, Tx)); + TK = VMUL(LDK(KP707106781), VADD(Tx, Ts)); + } + { + V T2, T9, T4, T7, T5, Ta; + { + V T1, T8, T3, T6; + T1 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T2 = BYTWJ(&(W[TWVL * 28]), T1); + T8 = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T9 = BYTWJ(&(W[TWVL * 20]), T8); + T3 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T4 = BYTWJ(&(W[TWVL * 12]), T3); + T6 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T7 = BYTWJ(&(W[TWVL * 4]), T6); + } + T16 = VADD(T2, T4); + T17 = VADD(T7, T9); + T18 = VSUB(T16, T17); + T5 = VSUB(T2, T4); + Ta = VSUB(T7, T9); + Tb = VFNMS(LDK(KP923879532), Ta, VMUL(LDK(KP382683432), T5)); + TN = VFMA(LDK(KP923879532), T5, VMUL(LDK(KP382683432), Ta)); + } + { + V Td, Tk, Tf, Ti, Tg, Tl; + { + V Tc, Tj, Te, Th; + Tc = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Td = BYTWJ(&(W[0]), Tc); + Tj = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + Tk = BYTWJ(&(W[TWVL * 24]), Tj); + Te = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tf = BYTWJ(&(W[TWVL * 16]), Te); + Th = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Ti = BYTWJ(&(W[TWVL * 8]), Th); + } + T13 = VADD(Td, Tf); + T14 = VADD(Ti, Tk); + T15 = VSUB(T13, T14); + Tg = VSUB(Td, Tf); + Tl = VSUB(Ti, Tk); + Tm = VFMA(LDK(KP382683432), Tg, VMUL(LDK(KP923879532), Tl)); + TM = VFNMS(LDK(KP382683432), Tl, VMUL(LDK(KP923879532), Tg)); + } + { + V T1a, T1g, T1f, T1h; + { + V T12, T19, T1d, T1e; + T12 = VSUB(T10, T11); + T19 = VMUL(LDK(KP707106781), VADD(T15, T18)); + T1a = VADD(T12, T19); + T1g = VSUB(T12, T19); + T1d = VSUB(T1b, T1c); + T1e = VMUL(LDK(KP707106781), VSUB(T18, T15)); + T1f = VBYI(VADD(T1d, T1e)); + T1h = VBYI(VSUB(T1e, T1d)); + } + ST(&(x[WS(rs, 14)]), VSUB(T1a, T1f), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VADD(T1g, T1h), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T1a, T1f), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VSUB(T1g, T1h), ms, &(x[0])); + } + { + V T1k, T1o, T1n, T1p; + { + V T1i, T1j, T1l, T1m; + T1i = VADD(T10, T11); + T1j = VADD(T1c, T1b); + T1k = VADD(T1i, T1j); + T1o = VSUB(T1i, T1j); + T1l = VADD(T13, T14); + T1m = VADD(T16, T17); + T1n = VADD(T1l, T1m); + T1p = VBYI(VSUB(T1m, T1l)); + } + ST(&(x[WS(rs, 8)]), VSUB(T1k, T1n), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(T1o, T1p), ms, &(x[0])); + ST(&(x[0]), VADD(T1k, T1n), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VSUB(T1o, T1p), ms, &(x[0])); + } + { + V TF, TQ, TP, TR; + { + V Tn, TE, TL, TO; + Tn = VSUB(Tb, Tm); + TE = VSUB(Ty, TD); + TF = VBYI(VSUB(Tn, TE)); + TQ = VBYI(VADD(TE, Tn)); + TL = VADD(TJ, TK); + TO = VADD(TM, TN); + TP = VSUB(TL, TO); + TR = VADD(TL, TO); + } + ST(&(x[WS(rs, 7)]), VADD(TF, TP), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VSUB(TR, TQ), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VSUB(TP, TF), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(TQ, TR), ms, &(x[WS(rs, 1)])); + } + { + V TU, TY, TX, TZ; + { + V TS, TT, TV, TW; + TS = VSUB(TJ, TK); + TT = VADD(Tm, Tb); + TU = VADD(TS, TT); + TY = VSUB(TS, TT); + TV = VADD(TD, Ty); + TW = VSUB(TN, TM); + TX = VBYI(VADD(TV, TW)); + TZ = VBYI(VSUB(TW, TV)); + } + ST(&(x[WS(rs, 13)]), VSUB(TU, TX), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VADD(TY, TZ), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(TU, TX), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VSUB(TY, TZ), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 16, XSIMD_STRING("t1fv_16"), twinstr, &GENUS, {83, 38, 4, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_16) (planner *p) { + X(kdft_dit_register) (p, t1fv_16, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:14 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t1fv_2 -include t1f.h */ + +/* + * This function contains 3 FP additions, 2 FP multiplications, + * (or, 3 additions, 2 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 4 memory accesses + */ +#include "t1f.h" + +static void t1fv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(2, rs)) { + V T1, T2, T3; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[0]), T2); + ST(&(x[0]), VADD(T1, T3), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VSUB(T1, T3), ms, &(x[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("t1fv_2"), twinstr, &GENUS, {3, 2, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_2) (planner *p) { + X(kdft_dit_register) (p, t1fv_2, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t1fv_2 -include t1f.h */ + +/* + * This function contains 3 FP additions, 2 FP multiplications, + * (or, 3 additions, 2 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 4 memory accesses + */ +#include "t1f.h" + +static void t1fv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(2, rs)) { + V T1, T3, T2; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[0]), T2); + ST(&(x[WS(rs, 1)]), VSUB(T1, T3), ms, &(x[WS(rs, 1)])); + ST(&(x[0]), VADD(T1, T3), ms, &(x[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("t1fv_2"), twinstr, &GENUS, {3, 2, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_2) (planner *p) { + X(kdft_dit_register) (p, t1fv_2, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,519 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:16 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 20 -name t1fv_20 -include t1f.h */ + +/* + * This function contains 123 FP additions, 88 FP multiplications, + * (or, 77 additions, 42 multiplications, 46 fused multiply/add), + * 68 stack variables, 4 constants, and 40 memory accesses + */ +#include "t1f.h" + +static void t1fv_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 38)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 38), MAKE_VOLATILE_STRIDE(20, rs)) { + V T4, Tx, T1m, T1K, T1y, Tk, Tf, T16, T10, TT, T1O, T1w, T1L, T1p, T1M; + V T1s, TZ, TI, T1x, Tp; + { + V T1, Tv, T2, Tt; + T1 = LD(&(x[0]), ms, &(x[0])); + Tv = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + Tt = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V T9, T1n, TN, T1v, TS, Te, T1q, T1u, TE, TG, Tm, T1o, TC, Tn, T1r; + V TH, To; + { + V TP, TR, Ta, Tc; + { + V T5, T7, TJ, TL, T1k, T1l; + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TJ = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TL = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + { + V Tw, T3, Tu, T6, T8, TK, TM, TO, TQ; + TO = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + Tw = BYTWJ(&(W[TWVL * 28]), Tv); + T3 = BYTWJ(&(W[TWVL * 18]), T2); + Tu = BYTWJ(&(W[TWVL * 8]), Tt); + T6 = BYTWJ(&(W[TWVL * 6]), T5); + T8 = BYTWJ(&(W[TWVL * 26]), T7); + TK = BYTWJ(&(W[TWVL * 24]), TJ); + TM = BYTWJ(&(W[TWVL * 4]), TL); + TP = BYTWJ(&(W[TWVL * 32]), TO); + TQ = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T4 = VSUB(T1, T3); + T1k = VADD(T1, T3); + Tx = VSUB(Tu, Tw); + T1l = VADD(Tu, Tw); + T9 = VSUB(T6, T8); + T1n = VADD(T6, T8); + TN = VSUB(TK, TM); + T1v = VADD(TK, TM); + TR = BYTWJ(&(W[TWVL * 12]), TQ); + } + Ta = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T1m = VSUB(T1k, T1l); + T1K = VADD(T1k, T1l); + Tc = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + } + { + V Tb, TA, Td, Th, Tj, Tz, Tg, Ti, Ty; + Tg = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Ti = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Ty = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + TS = VSUB(TP, TR); + T1y = VADD(TP, TR); + Tb = BYTWJ(&(W[TWVL * 30]), Ta); + TA = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + Td = BYTWJ(&(W[TWVL * 10]), Tc); + Th = BYTWJ(&(W[TWVL * 14]), Tg); + Tj = BYTWJ(&(W[TWVL * 34]), Ti); + Tz = BYTWJ(&(W[TWVL * 16]), Ty); + { + V TD, TF, TB, Tl; + TD = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TF = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tl = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + TB = BYTWJ(&(W[TWVL * 36]), TA); + Te = VSUB(Tb, Td); + T1q = VADD(Tb, Td); + Tk = VSUB(Th, Tj); + T1u = VADD(Th, Tj); + TE = BYTWJ(&(W[0]), TD); + TG = BYTWJ(&(W[TWVL * 20]), TF); + Tm = BYTWJ(&(W[TWVL * 22]), Tl); + T1o = VADD(Tz, TB); + TC = VSUB(Tz, TB); + Tn = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + } + } + } + Tf = VADD(T9, Te); + T16 = VSUB(T9, Te); + T10 = VSUB(TS, TN); + TT = VADD(TN, TS); + T1r = VADD(TE, TG); + TH = VSUB(TE, TG); + T1O = VADD(T1u, T1v); + T1w = VSUB(T1u, T1v); + To = BYTWJ(&(W[TWVL * 2]), Tn); + T1L = VADD(T1n, T1o); + T1p = VSUB(T1n, T1o); + T1M = VADD(T1q, T1r); + T1s = VSUB(T1q, T1r); + TZ = VSUB(TH, TC); + TI = VADD(TC, TH); + T1x = VADD(Tm, To); + Tp = VSUB(Tm, To); + } + } + { + V T1V, T1N, T14, T1d, T11, T1G, T1t, T1z, T1P, Tq, T17, T13, TV, TU; + T1V = VSUB(T1L, T1M); + T1N = VADD(T1L, T1M); + T14 = VSUB(TT, TI); + TU = VADD(TI, TT); + T1d = VFNMS(LDK(KP618033988), TZ, T10); + T11 = VFMA(LDK(KP618033988), T10, TZ); + T1G = VSUB(T1p, T1s); + T1t = VADD(T1p, T1s); + T1z = VSUB(T1x, T1y); + T1P = VADD(T1x, T1y); + Tq = VADD(Tk, Tp); + T17 = VSUB(Tk, Tp); + T13 = VFNMS(LDK(KP250000000), TU, Tx); + TV = VADD(Tx, TU); + { + V T1J, T1H, T1D, T1Z, T1X, T1T, T1h, T1j, T1b, T19, T1C, T1S, T1c, TY, T1F; + V T1A; + T1F = VSUB(T1w, T1z); + T1A = VADD(T1w, T1z); + { + V T1W, T1Q, TX, Tr; + T1W = VSUB(T1O, T1P); + T1Q = VADD(T1O, T1P); + TX = VSUB(Tf, Tq); + Tr = VADD(Tf, Tq); + { + V T1g, T18, T1f, T15; + T1g = VFNMS(LDK(KP618033988), T16, T17); + T18 = VFMA(LDK(KP618033988), T17, T16); + T1f = VFMA(LDK(KP559016994), T14, T13); + T15 = VFNMS(LDK(KP559016994), T14, T13); + T1J = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1F, T1G)); + T1H = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1G, T1F)); + { + V T1B, T1R, TW, Ts; + T1B = VADD(T1t, T1A); + T1D = VSUB(T1t, T1A); + T1Z = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1V, T1W)); + T1X = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1W, T1V)); + T1R = VADD(T1N, T1Q); + T1T = VSUB(T1N, T1Q); + TW = VFNMS(LDK(KP250000000), Tr, T4); + Ts = VADD(T4, Tr); + T1h = VFNMS(LDK(KP951056516), T1g, T1f); + T1j = VFMA(LDK(KP951056516), T1g, T1f); + T1b = VFNMS(LDK(KP951056516), T18, T15); + T19 = VFMA(LDK(KP951056516), T18, T15); + ST(&(x[WS(rs, 10)]), VADD(T1m, T1B), ms, &(x[0])); + T1C = VFNMS(LDK(KP250000000), T1B, T1m); + ST(&(x[0]), VADD(T1K, T1R), ms, &(x[0])); + T1S = VFNMS(LDK(KP250000000), T1R, T1K); + T1c = VFNMS(LDK(KP559016994), TX, TW); + TY = VFMA(LDK(KP559016994), TX, TW); + ST(&(x[WS(rs, 15)]), VFMAI(TV, Ts), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFNMSI(TV, Ts), ms, &(x[WS(rs, 1)])); + } + } + } + { + V T1E, T1I, T1U, T1Y; + T1E = VFNMS(LDK(KP559016994), T1D, T1C); + T1I = VFMA(LDK(KP559016994), T1D, T1C); + T1U = VFMA(LDK(KP559016994), T1T, T1S); + T1Y = VFNMS(LDK(KP559016994), T1T, T1S); + { + V T1e, T1i, T1a, T12; + T1e = VFNMS(LDK(KP951056516), T1d, T1c); + T1i = VFMA(LDK(KP951056516), T1d, T1c); + T1a = VFNMS(LDK(KP951056516), T11, TY); + T12 = VFMA(LDK(KP951056516), T11, TY); + ST(&(x[WS(rs, 18)]), VFNMSI(T1H, T1E), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(T1H, T1E), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFMAI(T1J, T1I), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(T1J, T1I), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VFNMSI(T1X, T1U), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(T1X, T1U), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFMAI(T1Z, T1Y), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFNMSI(T1Z, T1Y), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(T1h, T1e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 17)]), VFNMSI(T1h, T1e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(T1j, T1i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFNMSI(T1j, T1i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFMAI(T1b, T1a), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFNMSI(T1b, T1a), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VFMAI(T19, T12), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(T19, T12), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 20, XSIMD_STRING("t1fv_20"), twinstr, &GENUS, {77, 42, 46, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_20) (planner *p) { + X(kdft_dit_register) (p, t1fv_20, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 20 -name t1fv_20 -include t1f.h */ + +/* + * This function contains 123 FP additions, 62 FP multiplications, + * (or, 111 additions, 50 multiplications, 12 fused multiply/add), + * 54 stack variables, 4 constants, and 40 memory accesses + */ +#include "t1f.h" + +static void t1fv_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 38)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 38), MAKE_VOLATILE_STRIDE(20, rs)) { + V T4, Tx, T1B, T1U, TZ, T16, T17, T10, Tf, Tq, Tr, T1N, T1O, T1S, T1t; + V T1w, T1C, TI, TT, TU, T1K, T1L, T1R, T1m, T1p, T1D, Ts, TV; + { + V T1, Tw, T3, Tu, Tv, T2, Tt, T1z, T1A; + T1 = LD(&(x[0]), ms, &(x[0])); + Tv = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + Tw = BYTWJ(&(W[TWVL * 28]), Tv); + T2 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T3 = BYTWJ(&(W[TWVL * 18]), T2); + Tt = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tu = BYTWJ(&(W[TWVL * 8]), Tt); + T4 = VSUB(T1, T3); + Tx = VSUB(Tu, Tw); + T1z = VADD(T1, T3); + T1A = VADD(Tu, Tw); + T1B = VSUB(T1z, T1A); + T1U = VADD(T1z, T1A); + } + { + V T9, T1r, TN, T1l, TS, T1o, Te, T1u, Tk, T1k, TC, T1s, TH, T1v, Tp; + V T1n; + { + V T6, T8, T5, T7; + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T6 = BYTWJ(&(W[TWVL * 6]), T5); + T7 = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T8 = BYTWJ(&(W[TWVL * 26]), T7); + T9 = VSUB(T6, T8); + T1r = VADD(T6, T8); + } + { + V TK, TM, TJ, TL; + TJ = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TK = BYTWJ(&(W[TWVL * 24]), TJ); + TL = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + TM = BYTWJ(&(W[TWVL * 4]), TL); + TN = VSUB(TK, TM); + T1l = VADD(TK, TM); + } + { + V TP, TR, TO, TQ; + TO = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TP = BYTWJ(&(W[TWVL * 32]), TO); + TQ = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + TR = BYTWJ(&(W[TWVL * 12]), TQ); + TS = VSUB(TP, TR); + T1o = VADD(TP, TR); + } + { + V Tb, Td, Ta, Tc; + Ta = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + Tb = BYTWJ(&(W[TWVL * 30]), Ta); + Tc = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Td = BYTWJ(&(W[TWVL * 10]), Tc); + Te = VSUB(Tb, Td); + T1u = VADD(Tb, Td); + } + { + V Th, Tj, Tg, Ti; + Tg = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Th = BYTWJ(&(W[TWVL * 14]), Tg); + Ti = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tj = BYTWJ(&(W[TWVL * 34]), Ti); + Tk = VSUB(Th, Tj); + T1k = VADD(Th, Tj); + } + { + V Tz, TB, Ty, TA; + Ty = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tz = BYTWJ(&(W[TWVL * 16]), Ty); + TA = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + TB = BYTWJ(&(W[TWVL * 36]), TA); + TC = VSUB(Tz, TB); + T1s = VADD(Tz, TB); + } + { + V TE, TG, TD, TF; + TD = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TE = BYTWJ(&(W[0]), TD); + TF = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + TG = BYTWJ(&(W[TWVL * 20]), TF); + TH = VSUB(TE, TG); + T1v = VADD(TE, TG); + } + { + V Tm, To, Tl, Tn; + Tl = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Tm = BYTWJ(&(W[TWVL * 22]), Tl); + Tn = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + To = BYTWJ(&(W[TWVL * 2]), Tn); + Tp = VSUB(Tm, To); + T1n = VADD(Tm, To); + } + TZ = VSUB(TH, TC); + T16 = VSUB(T9, Te); + T17 = VSUB(Tk, Tp); + T10 = VSUB(TS, TN); + Tf = VADD(T9, Te); + Tq = VADD(Tk, Tp); + Tr = VADD(Tf, Tq); + T1N = VADD(T1k, T1l); + T1O = VADD(T1n, T1o); + T1S = VADD(T1N, T1O); + T1t = VSUB(T1r, T1s); + T1w = VSUB(T1u, T1v); + T1C = VADD(T1t, T1w); + TI = VADD(TC, TH); + TT = VADD(TN, TS); + TU = VADD(TI, TT); + T1K = VADD(T1r, T1s); + T1L = VADD(T1u, T1v); + T1R = VADD(T1K, T1L); + T1m = VSUB(T1k, T1l); + T1p = VSUB(T1n, T1o); + T1D = VADD(T1m, T1p); + } + Ts = VADD(T4, Tr); + TV = VBYI(VADD(Tx, TU)); + ST(&(x[WS(rs, 5)]), VSUB(Ts, TV), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VADD(Ts, TV), ms, &(x[WS(rs, 1)])); + { + V T1T, T1V, T1W, T1Q, T1Z, T1M, T1P, T1Y, T1X; + T1T = VMUL(LDK(KP559016994), VSUB(T1R, T1S)); + T1V = VADD(T1R, T1S); + T1W = VFNMS(LDK(KP250000000), T1V, T1U); + T1M = VSUB(T1K, T1L); + T1P = VSUB(T1N, T1O); + T1Q = VBYI(VFMA(LDK(KP951056516), T1M, VMUL(LDK(KP587785252), T1P))); + T1Z = VBYI(VFNMS(LDK(KP587785252), T1M, VMUL(LDK(KP951056516), T1P))); + ST(&(x[0]), VADD(T1U, T1V), ms, &(x[0])); + T1Y = VSUB(T1W, T1T); + ST(&(x[WS(rs, 8)]), VSUB(T1Y, T1Z), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VADD(T1Z, T1Y), ms, &(x[0])); + T1X = VADD(T1T, T1W); + ST(&(x[WS(rs, 4)]), VADD(T1Q, T1X), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VSUB(T1X, T1Q), ms, &(x[0])); + } + { + V T1G, T1E, T1F, T1y, T1J, T1q, T1x, T1I, T1H; + T1G = VMUL(LDK(KP559016994), VSUB(T1C, T1D)); + T1E = VADD(T1C, T1D); + T1F = VFNMS(LDK(KP250000000), T1E, T1B); + T1q = VSUB(T1m, T1p); + T1x = VSUB(T1t, T1w); + T1y = VBYI(VFNMS(LDK(KP587785252), T1x, VMUL(LDK(KP951056516), T1q))); + T1J = VBYI(VFMA(LDK(KP951056516), T1x, VMUL(LDK(KP587785252), T1q))); + ST(&(x[WS(rs, 10)]), VADD(T1B, T1E), ms, &(x[0])); + T1I = VADD(T1G, T1F); + ST(&(x[WS(rs, 6)]), VSUB(T1I, T1J), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VADD(T1J, T1I), ms, &(x[0])); + T1H = VSUB(T1F, T1G); + ST(&(x[WS(rs, 2)]), VADD(T1y, T1H), ms, &(x[0])); + ST(&(x[WS(rs, 18)]), VSUB(T1H, T1y), ms, &(x[0])); + } + { + V T11, T18, T1g, T1d, T15, T1f, TY, T1c; + T11 = VFMA(LDK(KP951056516), TZ, VMUL(LDK(KP587785252), T10)); + T18 = VFMA(LDK(KP951056516), T16, VMUL(LDK(KP587785252), T17)); + T1g = VFNMS(LDK(KP587785252), T16, VMUL(LDK(KP951056516), T17)); + T1d = VFNMS(LDK(KP587785252), TZ, VMUL(LDK(KP951056516), T10)); + { + V T13, T14, TW, TX; + T13 = VFMS(LDK(KP250000000), TU, Tx); + T14 = VMUL(LDK(KP559016994), VSUB(TT, TI)); + T15 = VADD(T13, T14); + T1f = VSUB(T14, T13); + TW = VMUL(LDK(KP559016994), VSUB(Tf, Tq)); + TX = VFNMS(LDK(KP250000000), Tr, T4); + TY = VADD(TW, TX); + T1c = VSUB(TX, TW); + } + { + V T12, T19, T1i, T1j; + T12 = VADD(TY, T11); + T19 = VBYI(VSUB(T15, T18)); + ST(&(x[WS(rs, 19)]), VSUB(T12, T19), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T12, T19), ms, &(x[WS(rs, 1)])); + T1i = VADD(T1c, T1d); + T1j = VBYI(VADD(T1g, T1f)); + ST(&(x[WS(rs, 13)]), VSUB(T1i, T1j), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(T1i, T1j), ms, &(x[WS(rs, 1)])); + } + { + V T1a, T1b, T1e, T1h; + T1a = VSUB(TY, T11); + T1b = VBYI(VADD(T18, T15)); + ST(&(x[WS(rs, 11)]), VSUB(T1a, T1b), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VADD(T1a, T1b), ms, &(x[WS(rs, 1)])); + T1e = VSUB(T1c, T1d); + T1h = VBYI(VSUB(T1f, T1g)); + ST(&(x[WS(rs, 17)]), VSUB(T1e, T1h), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(T1e, T1h), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 20, XSIMD_STRING("t1fv_20"), twinstr, &GENUS, {111, 50, 12, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_20) (planner *p) { + X(kdft_dit_register) (p, t1fv_20, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,932 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:17 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t1fv_25 -include t1f.h */ + +/* + * This function contains 248 FP additions, 241 FP multiplications, + * (or, 67 additions, 60 multiplications, 181 fused multiply/add), + * 208 stack variables, 67 constants, and 50 memory accesses + */ +#include "t1f.h" + +static void t1fv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP792626838, +0.792626838241819413632131824093538848057784557); + DVK(KP876091699, +0.876091699473550838204498029706869638173524346); + DVK(KP617882369, +0.617882369114440893914546919006756321695042882); + DVK(KP803003575, +0.803003575438660414833440593570376004635464850); + DVK(KP242145790, +0.242145790282157779872542093866183953459003101); + DVK(KP968583161, +0.968583161128631119490168375464735813836012403); + DVK(KP999544308, +0.999544308746292983948881682379742149196758193); + DVK(KP916574801, +0.916574801383451584742370439148878693530976769); + DVK(KP904730450, +0.904730450839922351881287709692877908104763647); + DVK(KP809385824, +0.809385824416008241660603814668679683846476688); + DVK(KP447417479, +0.447417479732227551498980015410057305749330693); + DVK(KP894834959, +0.894834959464455102997960030820114611498661386); + DVK(KP867381224, +0.867381224396525206773171885031575671309956167); + DVK(KP683113946, +0.683113946453479238701949862233725244439656928); + DVK(KP559154169, +0.559154169276087864842202529084232643714075927); + DVK(KP958953096, +0.958953096729998668045963838399037225970891871); + DVK(KP831864738, +0.831864738706457140726048799369896829771167132); + DVK(KP829049696, +0.829049696159252993975487806364305442437946767); + DVK(KP860541664, +0.860541664367944677098261680920518816412804187); + DVK(KP897376177, +0.897376177523557693138608077137219684419427330); + DVK(KP876306680, +0.876306680043863587308115903922062583399064238); + DVK(KP681693190, +0.681693190061530575150324149145440022633095390); + DVK(KP560319534, +0.560319534973832390111614715371676131169633784); + DVK(KP855719849, +0.855719849902058969314654733608091555096772472); + DVK(KP237294955, +0.237294955877110315393888866460840817927895961); + DVK(KP949179823, +0.949179823508441261575555465843363271711583843); + DVK(KP904508497, +0.904508497187473712051146708591409529430077295); + DVK(KP997675361, +0.997675361079556513670859573984492383596555031); + DVK(KP763932022, +0.763932022500210303590826331268723764559381640); + DVK(KP690983005, +0.690983005625052575897706582817180941139845410); + DVK(KP992114701, +0.992114701314477831049793042785778521453036709); + DVK(KP952936919, +0.952936919628306576880750665357914584765951388); + DVK(KP998026728, +0.998026728428271561952336806863450553336905220); + DVK(KP262346850, +0.262346850930607871785420028382979691334784273); + DVK(KP570584518, +0.570584518783621657366766175430996792655723863); + DVK(KP669429328, +0.669429328479476605641803240971985825917022098); + DVK(KP923225144, +0.923225144846402650453449441572664695995209956); + DVK(KP945422727, +0.945422727388575946270360266328811958657216298); + DVK(KP522616830, +0.522616830205754336872861364785224694908468440); + DVK(KP956723877, +0.956723877038460305821989399535483155872969262); + DVK(KP906616052, +0.906616052148196230441134447086066874408359177); + DVK(KP772036680, +0.772036680810363904029489473607579825330539880); + DVK(KP845997307, +0.845997307939530944175097360758058292389769300); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP921078979, +0.921078979742360627699756128143719920817673854); + DVK(KP912575812, +0.912575812670962425556968549836277086778922727); + DVK(KP982009705, +0.982009705009746369461829878184175962711969869); + DVK(KP734762448, +0.734762448793050413546343770063151342619912334); + DVK(KP494780565, +0.494780565770515410344588413655324772219443730); + DVK(KP447533225, +0.447533225982656890041886979663652563063114397); + DVK(KP269969613, +0.269969613759572083574752974412347470060951301); + DVK(KP244189809, +0.244189809627953270309879511234821255780225091); + DVK(KP667278218, +0.667278218140296670899089292254759909713898805); + DVK(KP603558818, +0.603558818296015001454675132653458027918768137); + DVK(KP522847744, +0.522847744331509716623755382187077770911012542); + DVK(KP578046249, +0.578046249379945007321754579646815604023525655); + DVK(KP987388751, +0.987388751065621252324603216482382109400433949); + DVK(KP893101515, +0.893101515366181661711202267938416198338079437); + DVK(KP120146378, +0.120146378570687701782758537356596213647956445); + DVK(KP132830569, +0.132830569247582714407653942074819768844536507); + DVK(KP869845200, +0.869845200362138853122720822420327157933056305); + DVK(KP786782374, +0.786782374965295178365099601674911834788448471); + DVK(KP066152395, +0.066152395967733048213034281011006031460903353); + DVK(KP059835404, +0.059835404262124915169548397419498386427871950); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(25, rs)) { + V T25, T1B, T2y, T1K, T2s, T23, T1S, T26, T20, T1X; + { + V T1O, T2X, Te, T3L, Td, T3Q, T3j, T3b, T2R, T2M, T2f, T27, T1y, T1H, T3M; + V TW, TR, TK, T2B, T3n, T3e, T2U, T2F, T2i, T2a, Tz, T1C, T3N, TQ, T11; + V T1b, T1c, T16; + { + V T1, T1g, T1i, T1p, T1k, T1m, Tb, T1N, T6, T1M; + { + V T7, T9, T2, T4, T1f, T1h, T1o; + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T9 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + T1f = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1h = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T1o = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + { + V T8, Ta, T3, T5, T1j; + T1j = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T8 = BYTWJ(&(W[TWVL * 18]), T7); + Ta = BYTWJ(&(W[TWVL * 28]), T9); + T3 = BYTWJ(&(W[TWVL * 8]), T2); + T5 = BYTWJ(&(W[TWVL * 38]), T4); + T1g = BYTWJ(&(W[TWVL * 4]), T1f); + T1i = BYTWJ(&(W[TWVL * 14]), T1h); + T1p = BYTWJ(&(W[TWVL * 34]), T1o); + T1k = BYTWJ(&(W[TWVL * 44]), T1j); + T1m = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + Tb = VADD(T8, Ta); + T1N = VSUB(T8, Ta); + T6 = VADD(T3, T5); + T1M = VSUB(T3, T5); + } + } + { + V T1v, T1l, Th, Tj, T1w, T1q, Tq, Tk, Tn, Tg; + Tg = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + { + V Tc, Ti, T1n, Tp; + Ti = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T1v = VSUB(T1i, T1k); + T1l = VADD(T1i, T1k); + T1n = BYTWJ(&(W[TWVL * 24]), T1m); + Tp = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T1O = VFMA(LDK(KP618033988), T1N, T1M); + T2X = VFNMS(LDK(KP618033988), T1M, T1N); + Te = VSUB(T6, Tb); + Tc = VADD(T6, Tb); + Th = BYTWJ(&(W[0]), Tg); + Tj = BYTWJ(&(W[TWVL * 10]), Ti); + T1w = VSUB(T1n, T1p); + T1q = VADD(T1n, T1p); + Tq = BYTWJ(&(W[TWVL * 30]), Tp); + Tk = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + T3L = VADD(T1, Tc); + Td = VFNMS(LDK(KP250000000), Tc, T1); + Tn = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + } + { + V T1x, T2K, TM, TB, Tw, Tm, Tx, Tr, TI, T2L, T1u, TD, TF, TL; + TL = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + { + V T1t, Tl, To, TH, T1s, T1r, TA, TC; + TA = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T1r = VADD(T1l, T1q); + T1t = VSUB(T1q, T1l); + T1x = VFMA(LDK(KP618033988), T1w, T1v); + T2K = VFNMS(LDK(KP618033988), T1v, T1w); + Tl = BYTWJ(&(W[TWVL * 40]), Tk); + To = BYTWJ(&(W[TWVL * 20]), Tn); + TM = BYTWJ(&(W[TWVL * 6]), TL); + TB = BYTWJ(&(W[TWVL * 46]), TA); + TH = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T1s = VFNMS(LDK(KP250000000), T1r, T1g); + T3Q = VADD(T1g, T1r); + TC = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tw = VSUB(Tj, Tl); + Tm = VADD(Tj, Tl); + Tx = VSUB(Tq, To); + Tr = VADD(To, Tq); + TI = BYTWJ(&(W[TWVL * 26]), TH); + T2L = VFMA(LDK(KP559016994), T1t, T1s); + T1u = VFNMS(LDK(KP559016994), T1t, T1s); + TD = BYTWJ(&(W[TWVL * 16]), TC); + TF = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + } + { + V Tu, Ty, T2E, TE, TN, TG, Tt, TV, Ts; + TV = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Ts = VADD(Tm, Tr); + Tu = VSUB(Tm, Tr); + Ty = VFNMS(LDK(KP618033988), Tx, Tw); + T2E = VFMA(LDK(KP618033988), Tw, Tx); + T3j = VFNMS(LDK(KP059835404), T2K, T2L); + T3b = VFMA(LDK(KP066152395), T2L, T2K); + T2R = VFNMS(LDK(KP786782374), T2K, T2L); + T2M = VFMA(LDK(KP869845200), T2L, T2K); + T2f = VFMA(LDK(KP132830569), T1u, T1x); + T27 = VFNMS(LDK(KP120146378), T1x, T1u); + T1y = VFNMS(LDK(KP893101515), T1x, T1u); + T1H = VFMA(LDK(KP987388751), T1u, T1x); + TE = VSUB(TB, TD); + TN = VADD(TD, TB); + TG = BYTWJ(&(W[TWVL * 36]), TF); + Tt = VFNMS(LDK(KP250000000), Ts, Th); + T3M = VADD(Th, Ts); + TW = BYTWJ(&(W[TWVL * 2]), TV); + { + V TJ, TO, Tv, T2D, TY, T15, T10, T13, TP; + { + V TX, T14, TZ, T12; + TX = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T14 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TZ = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + T12 = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + TJ = VSUB(TG, TI); + TO = VADD(TI, TG); + Tv = VFMA(LDK(KP559016994), Tu, Tt); + T2D = VFNMS(LDK(KP559016994), Tu, Tt); + TY = BYTWJ(&(W[TWVL * 12]), TX); + T15 = BYTWJ(&(W[TWVL * 32]), T14); + T10 = BYTWJ(&(W[TWVL * 42]), TZ); + T13 = BYTWJ(&(W[TWVL * 22]), T12); + } + TP = VADD(TN, TO); + TR = VSUB(TN, TO); + TK = VFMA(LDK(KP618033988), TJ, TE); + T2B = VFNMS(LDK(KP618033988), TE, TJ); + T3n = VFMA(LDK(KP578046249), T2D, T2E); + T3e = VFNMS(LDK(KP522847744), T2E, T2D); + T2U = VFNMS(LDK(KP987388751), T2D, T2E); + T2F = VFMA(LDK(KP893101515), T2E, T2D); + T2i = VFNMS(LDK(KP603558818), Ty, Tv); + T2a = VFMA(LDK(KP667278218), Tv, Ty); + Tz = VFNMS(LDK(KP244189809), Ty, Tv); + T1C = VFMA(LDK(KP269969613), Tv, Ty); + T3N = VADD(TM, TP); + TQ = VFMS(LDK(KP250000000), TP, TM); + T11 = VADD(TY, T10); + T1b = VSUB(TY, T10); + T1c = VSUB(T15, T13); + T16 = VADD(T13, T15); + } + } + } + } + } + { + V T2z, Tf, T3W, T3O, T1d, T2H, T3m, T2j, T2b, TT, T1D, T2G, T35, T2V, T2Z; + V T3A, T3g, T2I, T1a, T3R, T3X; + T2z = VFNMS(LDK(KP559016994), Te, Td); + Tf = VFMA(LDK(KP559016994), Te, Td); + { + V TS, T2A, T17, T19; + TS = VFNMS(LDK(KP559016994), TR, TQ); + T2A = VFMA(LDK(KP559016994), TR, TQ); + T3W = VSUB(T3M, T3N); + T3O = VADD(T3M, T3N); + T1d = VFNMS(LDK(KP618033988), T1c, T1b); + T2H = VFMA(LDK(KP618033988), T1b, T1c); + T17 = VADD(T11, T16); + T19 = VSUB(T16, T11); + { + V T3f, T2T, T2C, T18, T3P; + T3m = VFMA(LDK(KP447533225), T2B, T2A); + T3f = VFNMS(LDK(KP494780565), T2A, T2B); + T2T = VFNMS(LDK(KP132830569), T2A, T2B); + T2C = VFMA(LDK(KP120146378), T2B, T2A); + T2j = VFNMS(LDK(KP786782374), TK, TS); + T2b = VFMA(LDK(KP869845200), TS, TK); + TT = VFNMS(LDK(KP667278218), TS, TK); + T1D = VFMA(LDK(KP603558818), TK, TS); + T18 = VFNMS(LDK(KP250000000), T17, TW); + T3P = VADD(TW, T17); + T2G = VFMA(LDK(KP734762448), T2F, T2C); + T35 = VFNMS(LDK(KP734762448), T2F, T2C); + T2V = VFNMS(LDK(KP734762448), T2U, T2T); + T2Z = VFMA(LDK(KP734762448), T2U, T2T); + T3A = VFMA(LDK(KP982009705), T3f, T3e); + T3g = VFNMS(LDK(KP982009705), T3f, T3e); + T2I = VFMA(LDK(KP559016994), T19, T18); + T1a = VFNMS(LDK(KP559016994), T19, T18); + T3R = VADD(T3P, T3Q); + T3X = VSUB(T3P, T3Q); + } + } + { + V T2n, T2t, T1V, T22, T2l, T2d, T1Q, T1I, T2w, T1A, T1F, T2q; + { + V T2k, T1G, T28, T2g, T3K, T3E, T3a, T34, T3x, T3H, T2c, TU, T1T, T1U, T1z; + V T3o, T3t; + T2n = VFNMS(LDK(KP912575812), T2j, T2i); + T2k = VFMA(LDK(KP912575812), T2j, T2i); + T3o = VFNMS(LDK(KP921078979), T3n, T3m); + T3t = VFMA(LDK(KP921078979), T3n, T3m); + { + V T3c, T2Q, T2J, T3k, T1e; + T3c = VFNMS(LDK(KP667278218), T2I, T2H); + T2Q = VFNMS(LDK(KP059835404), T2H, T2I); + T2J = VFMA(LDK(KP066152395), T2I, T2H); + T3k = VFMA(LDK(KP603558818), T2H, T2I); + T1G = VFMA(LDK(KP578046249), T1a, T1d); + T1e = VFNMS(LDK(KP522847744), T1d, T1a); + T28 = VFNMS(LDK(KP494780565), T1a, T1d); + T2g = VFMA(LDK(KP447533225), T1d, T1a); + { + V T3U, T3S, T40, T3Y; + T3U = VSUB(T3O, T3R); + T3S = VADD(T3O, T3R); + T40 = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T3W, T3X)); + T3Y = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T3X, T3W)); + { + V T3s, T3l, T2N, T36; + T3s = VFNMS(LDK(KP845997307), T3k, T3j); + T3l = VFMA(LDK(KP845997307), T3k, T3j); + T2N = VFNMS(LDK(KP772036680), T2M, T2J); + T36 = VFMA(LDK(KP772036680), T2M, T2J); + { + V T30, T2S, T3d, T3z, T3T; + T30 = VFNMS(LDK(KP772036680), T2R, T2Q); + T2S = VFMA(LDK(KP772036680), T2R, T2Q); + T3d = VFNMS(LDK(KP845997307), T3c, T3b); + T3z = VFMA(LDK(KP845997307), T3c, T3b); + ST(&(x[0]), VADD(T3S, T3L), ms, &(x[0])); + T3T = VFNMS(LDK(KP250000000), T3S, T3L); + { + V T3C, T3p, T2O, T37; + T3C = VFMA(LDK(KP906616052), T3o, T3l); + T3p = VFNMS(LDK(KP906616052), T3o, T3l); + T2O = VFMA(LDK(KP956723877), T2N, T2G); + T37 = VFMA(LDK(KP522616830), T2V, T36); + { + V T31, T2W, T3u, T3h; + T31 = VFNMS(LDK(KP522616830), T2G, T30); + T2W = VFMA(LDK(KP945422727), T2V, T2S); + T3u = VFNMS(LDK(KP923225144), T3g, T3d); + T3h = VFMA(LDK(KP923225144), T3g, T3d); + { + V T3I, T3B, T3V, T3Z; + T3I = VFNMS(LDK(KP669429328), T3z, T3A); + T3B = VFMA(LDK(KP570584518), T3A, T3z); + T3V = VFMA(LDK(KP559016994), T3U, T3T); + T3Z = VFNMS(LDK(KP559016994), T3U, T3T); + { + V T3y, T3q, T2P, T38; + T3y = VFMA(LDK(KP262346850), T3p, T2X); + T3q = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T2X, T3p)); + T2P = VFMA(LDK(KP992114701), T2O, T2z); + T38 = VFNMS(LDK(KP690983005), T37, T2S); + { + V T32, T2Y, T3v, T3F; + T32 = VFMA(LDK(KP763932022), T31, T2N); + T2Y = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T2X, T2W)); + T3v = VFNMS(LDK(KP997675361), T3u, T3t); + T3F = VFNMS(LDK(KP904508497), T3u, T3s); + { + V T3i, T3r, T3J, T3D; + T3i = VFMA(LDK(KP949179823), T3h, T2z); + T3r = VFNMS(LDK(KP237294955), T3h, T2z); + T3J = VFNMS(LDK(KP669429328), T3C, T3I); + T3D = VFMA(LDK(KP618033988), T3C, T3B); + ST(&(x[WS(rs, 20)]), VFMAI(T3Y, T3V), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFNMSI(T3Y, T3V), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFNMSI(T40, T3Z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 10)]), VFMAI(T40, T3Z), ms, &(x[0])); + { + V T39, T33, T3w, T3G; + T39 = VFMA(LDK(KP855719849), T38, T35); + T33 = VFNMS(LDK(KP855719849), T32, T2Z); + ST(&(x[WS(rs, 22)]), VFMAI(T2Y, T2P), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFNMSI(T2Y, T2P), ms, &(x[WS(rs, 1)])); + T3w = VFMA(LDK(KP560319534), T3v, T3s); + T3G = VFNMS(LDK(KP681693190), T3F, T3t); + ST(&(x[WS(rs, 23)]), VFMAI(T3q, T3i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VFNMSI(T3q, T3i), ms, &(x[0])); + T3K = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T3J, T3y)); + T3E = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T3D, T3y)); + T3a = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T39, T2X)); + T34 = VFMA(LDK(KP897376177), T33, T2z); + T3x = VFNMS(LDK(KP949179823), T3w, T3r); + T3H = VFNMS(LDK(KP860541664), T3G, T3r); + T2t = VFNMS(LDK(KP912575812), T2b, T2a); + T2c = VFMA(LDK(KP912575812), T2b, T2a); + TU = VFMA(LDK(KP829049696), TT, Tz); + T1T = VFNMS(LDK(KP829049696), TT, Tz); + T1U = VFNMS(LDK(KP831864738), T1y, T1e); + T1z = VFMA(LDK(KP831864738), T1y, T1e); + } + } + } + } + } + } + } + } + } + } + } + { + V T2o, T2h, T29, T2u, T2v, T2p; + T2o = VFNMS(LDK(KP958953096), T2g, T2f); + T2h = VFMA(LDK(KP958953096), T2g, T2f); + ST(&(x[WS(rs, 17)]), VFMAI(T3a, T34), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 8)]), VFNMSI(T3a, T34), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFMAI(T3E, T3x), ms, &(x[0])); + ST(&(x[WS(rs, 13)]), VFNMSI(T3E, T3x), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 18)]), VFNMSI(T3K, T3H), ms, &(x[0])); + ST(&(x[WS(rs, 7)]), VFMAI(T3K, T3H), ms, &(x[WS(rs, 1)])); + T1V = VFMA(LDK(KP559154169), T1U, T1T); + T22 = VFNMS(LDK(KP683113946), T1T, T1U); + T29 = VFNMS(LDK(KP867381224), T28, T27); + T2u = VFMA(LDK(KP867381224), T28, T27); + T2l = VFMA(LDK(KP894834959), T2k, T2h); + T2v = VFMA(LDK(KP447417479), T2k, T2u); + T2d = VFNMS(LDK(KP809385824), T2c, T29); + T2p = VFMA(LDK(KP447417479), T2c, T2o); + T1Q = VFMA(LDK(KP831864738), T1H, T1G); + T1I = VFNMS(LDK(KP831864738), T1H, T1G); + T2w = VFNMS(LDK(KP763932022), T2v, T2h); + T1A = VFMA(LDK(KP904730450), T1z, TU); + T1F = VFNMS(LDK(KP904730450), T1z, TU); + T2q = VFMA(LDK(KP690983005), T2p, T29); + } + } + { + V T2e, T1E, T1P, T2m; + T2e = VFNMS(LDK(KP992114701), T2d, Tf); + T1E = VFMA(LDK(KP916574801), T1D, T1C); + T1P = VFNMS(LDK(KP916574801), T1D, T1C); + T2m = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2l, T1O)); + { + V T1J, T2r, T1R, T1W, T1Z, T2x; + T2x = VFNMS(LDK(KP999544308), T2w, T2t); + T1J = VFNMS(LDK(KP904730450), T1I, T1F); + T25 = VFMA(LDK(KP968583161), T1A, Tf); + T1B = VFNMS(LDK(KP242145790), T1A, Tf); + T2r = VFNMS(LDK(KP999544308), T2q, T2n); + T1R = VFMA(LDK(KP904730450), T1Q, T1P); + T1W = VFNMS(LDK(KP904730450), T1Q, T1P); + T1Z = VADD(T1E, T1F); + ST(&(x[WS(rs, 21)]), VFNMSI(T2m, T2e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFMAI(T2m, T2e), ms, &(x[0])); + T2y = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T2x, T1O)); + T1K = VFNMS(LDK(KP618033988), T1J, T1E); + T2s = VFNMS(LDK(KP803003575), T2r, Tf); + T23 = VFMA(LDK(KP617882369), T1W, T22); + T1S = VFNMS(LDK(KP242145790), T1R, T1O); + T26 = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1R, T1O)); + T20 = VFNMS(LDK(KP683113946), T1Z, T1I); + T1X = VFMA(LDK(KP559016994), T1W, T1V); + } + } + } + } + } + { + V T1L, T24, T21, T1Y; + T1L = VFNMS(LDK(KP876091699), T1K, T1B); + ST(&(x[WS(rs, 9)]), VFMAI(T2y, T2s), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 16)]), VFNMSI(T2y, T2s), ms, &(x[0])); + T24 = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T23, T1S)); + ST(&(x[WS(rs, 24)]), VFMAI(T26, T25), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFNMSI(T26, T25), ms, &(x[WS(rs, 1)])); + T21 = VFMA(LDK(KP792626838), T20, T1B); + T1Y = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1X, T1S)); + ST(&(x[WS(rs, 11)]), VFNMSI(T24, T21), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 14)]), VFMAI(T24, T21), ms, &(x[0])); + ST(&(x[WS(rs, 19)]), VFMAI(T1Y, T1L), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VFNMSI(T1Y, T1L), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 25, XSIMD_STRING("t1fv_25"), twinstr, &GENUS, {67, 60, 181, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_25) (planner *p) { + X(kdft_dit_register) (p, t1fv_25, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t1fv_25 -include t1f.h */ + +/* + * This function contains 248 FP additions, 188 FP multiplications, + * (or, 170 additions, 110 multiplications, 78 fused multiply/add), + * 99 stack variables, 40 constants, and 50 memory accesses + */ +#include "t1f.h" + +static void t1fv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP998026728, +0.998026728428271561952336806863450553336905220); + DVK(KP125581039, +0.125581039058626752152356449131262266244969664); + DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439); + DVK(KP062790519, +0.062790519529313376076178224565631133122484832); + DVK(KP809016994, +0.809016994374947424102293417182819058860154590); + DVK(KP309016994, +0.309016994374947424102293417182819058860154590); + DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465); + DVK(KP728968627, +0.728968627421411523146730319055259111372571664); + DVK(KP963507348, +0.963507348203430549974383005744259307057084020); + DVK(KP876306680, +0.876306680043863587308115903922062583399064238); + DVK(KP497379774, +0.497379774329709576484567492012895936835134813); + DVK(KP968583161, +0.968583161128631119490168375464735813836012403); + DVK(KP684547105, +0.684547105928688673732283357621209269889519233); + DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328); + DVK(KP481753674, +0.481753674101715274987191502872129653528542010); + DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477); + DVK(KP248689887, +0.248689887164854788242283746006447968417567406); + DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806); + DVK(KP992114701, +0.992114701314477831049793042785778521453036709); + DVK(KP250666467, +0.250666467128608490746237519633017587885836494); + DVK(KP425779291, +0.425779291565072648862502445744251703979973042); + DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822); + DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596); + DVK(KP770513242, +0.770513242775789230803009636396177847271667672); + DVK(KP844327925, +0.844327925502015078548558063966681505381659241); + DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691); + DVK(KP125333233, +0.125333233564304245373118759816508793942918247); + DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418); + DVK(KP904827052, +0.904827052466019527713668647932697593970413911); + DVK(KP851558583, +0.851558583130145297725004891488503407959946084); + DVK(KP637423989, +0.637423989748689710176712811676016195434917298); + DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344); + DVK(KP535826794, +0.535826794978996618271308767867639978063575346); + DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483); + DVK(KP293892626, +0.293892626146236564584352977319536384298826219); + DVK(KP475528258, +0.475528258147576786058219666689691071702849317); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(25, rs)) { + V Tc, Tb, Td, Te, T1C, T2t, T1E, T1x, T2m, T1u, T3c, T2n, Ty, T2i, Tv; + V T38, T2j, TS, T2f, TP, T39, T2g, T1d, T2p, T1a, T3b, T2q; + { + V T7, T9, Ta, T2, T4, T5, T1D; + Tc = LD(&(x[0]), ms, &(x[0])); + { + V T6, T8, T1, T3; + T6 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T7 = BYTWJ(&(W[TWVL * 18]), T6); + T8 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T9 = BYTWJ(&(W[TWVL * 28]), T8); + Ta = VADD(T7, T9); + T1 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T2 = BYTWJ(&(W[TWVL * 8]), T1); + T3 = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + T4 = BYTWJ(&(W[TWVL * 38]), T3); + T5 = VADD(T2, T4); + } + Tb = VMUL(LDK(KP559016994), VSUB(T5, Ta)); + Td = VADD(T5, Ta); + Te = VFNMS(LDK(KP250000000), Td, Tc); + T1C = VSUB(T2, T4); + T1D = VSUB(T7, T9); + T2t = VMUL(LDK(KP951056516), T1D); + T1E = VFMA(LDK(KP951056516), T1C, VMUL(LDK(KP587785252), T1D)); + } + { + V T1r, T1l, T1n, T1o, T1g, T1i, T1j, T1q; + T1q = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1r = BYTWJ(&(W[TWVL * 4]), T1q); + { + V T1k, T1m, T1f, T1h; + T1k = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T1l = BYTWJ(&(W[TWVL * 24]), T1k); + T1m = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + T1n = BYTWJ(&(W[TWVL * 34]), T1m); + T1o = VADD(T1l, T1n); + T1f = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T1g = BYTWJ(&(W[TWVL * 14]), T1f); + T1h = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T1i = BYTWJ(&(W[TWVL * 44]), T1h); + T1j = VADD(T1g, T1i); + } + { + V T1v, T1w, T1p, T1s, T1t; + T1v = VSUB(T1g, T1i); + T1w = VSUB(T1l, T1n); + T1x = VFMA(LDK(KP475528258), T1v, VMUL(LDK(KP293892626), T1w)); + T2m = VFNMS(LDK(KP293892626), T1v, VMUL(LDK(KP475528258), T1w)); + T1p = VMUL(LDK(KP559016994), VSUB(T1j, T1o)); + T1s = VADD(T1j, T1o); + T1t = VFNMS(LDK(KP250000000), T1s, T1r); + T1u = VADD(T1p, T1t); + T3c = VADD(T1r, T1s); + T2n = VSUB(T1t, T1p); + } + } + { + V Ts, Tm, To, Tp, Th, Tj, Tk, Tr; + Tr = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Ts = BYTWJ(&(W[0]), Tr); + { + V Tl, Tn, Tg, Ti; + Tl = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tm = BYTWJ(&(W[TWVL * 20]), Tl); + Tn = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + To = BYTWJ(&(W[TWVL * 30]), Tn); + Tp = VADD(Tm, To); + Tg = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Th = BYTWJ(&(W[TWVL * 10]), Tg); + Ti = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + Tj = BYTWJ(&(W[TWVL * 40]), Ti); + Tk = VADD(Th, Tj); + } + { + V Tw, Tx, Tq, Tt, Tu; + Tw = VSUB(Th, Tj); + Tx = VSUB(Tm, To); + Ty = VFMA(LDK(KP475528258), Tw, VMUL(LDK(KP293892626), Tx)); + T2i = VFNMS(LDK(KP293892626), Tw, VMUL(LDK(KP475528258), Tx)); + Tq = VMUL(LDK(KP559016994), VSUB(Tk, Tp)); + Tt = VADD(Tk, Tp); + Tu = VFNMS(LDK(KP250000000), Tt, Ts); + Tv = VADD(Tq, Tu); + T38 = VADD(Ts, Tt); + T2j = VSUB(Tu, Tq); + } + } + { + V TM, TG, TI, TJ, TB, TD, TE, TL; + TL = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + TM = BYTWJ(&(W[TWVL * 6]), TL); + { + V TF, TH, TA, TC; + TF = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TG = BYTWJ(&(W[TWVL * 26]), TF); + TH = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + TI = BYTWJ(&(W[TWVL * 36]), TH); + TJ = VADD(TG, TI); + TA = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + TB = BYTWJ(&(W[TWVL * 16]), TA); + TC = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + TD = BYTWJ(&(W[TWVL * 46]), TC); + TE = VADD(TB, TD); + } + { + V TQ, TR, TK, TN, TO; + TQ = VSUB(TB, TD); + TR = VSUB(TG, TI); + TS = VFMA(LDK(KP475528258), TQ, VMUL(LDK(KP293892626), TR)); + T2f = VFNMS(LDK(KP293892626), TQ, VMUL(LDK(KP475528258), TR)); + TK = VMUL(LDK(KP559016994), VSUB(TE, TJ)); + TN = VADD(TE, TJ); + TO = VFNMS(LDK(KP250000000), TN, TM); + TP = VADD(TK, TO); + T39 = VADD(TM, TN); + T2g = VSUB(TO, TK); + } + } + { + V T17, T11, T13, T14, TW, TY, TZ, T16; + T16 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T17 = BYTWJ(&(W[TWVL * 2]), T16); + { + V T10, T12, TV, TX; + T10 = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + T11 = BYTWJ(&(W[TWVL * 22]), T10); + T12 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T13 = BYTWJ(&(W[TWVL * 32]), T12); + T14 = VADD(T11, T13); + TV = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + TW = BYTWJ(&(W[TWVL * 12]), TV); + TX = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + TY = BYTWJ(&(W[TWVL * 42]), TX); + TZ = VADD(TW, TY); + } + { + V T1b, T1c, T15, T18, T19; + T1b = VSUB(TW, TY); + T1c = VSUB(T11, T13); + T1d = VFMA(LDK(KP475528258), T1b, VMUL(LDK(KP293892626), T1c)); + T2p = VFNMS(LDK(KP293892626), T1b, VMUL(LDK(KP475528258), T1c)); + T15 = VMUL(LDK(KP559016994), VSUB(TZ, T14)); + T18 = VADD(TZ, T14); + T19 = VFNMS(LDK(KP250000000), T18, T17); + T1a = VADD(T15, T19); + T3b = VADD(T17, T18); + T2q = VSUB(T19, T15); + } + } + { + V T3l, T3m, T3f, T3g, T3e, T3h, T3n, T3i; + { + V T3j, T3k, T3a, T3d; + T3j = VSUB(T38, T39); + T3k = VSUB(T3b, T3c); + T3l = VBYI(VFMA(LDK(KP951056516), T3j, VMUL(LDK(KP587785252), T3k))); + T3m = VBYI(VFNMS(LDK(KP587785252), T3j, VMUL(LDK(KP951056516), T3k))); + T3f = VADD(Tc, Td); + T3a = VADD(T38, T39); + T3d = VADD(T3b, T3c); + T3g = VADD(T3a, T3d); + T3e = VMUL(LDK(KP559016994), VSUB(T3a, T3d)); + T3h = VFNMS(LDK(KP250000000), T3g, T3f); + } + ST(&(x[0]), VADD(T3f, T3g), ms, &(x[0])); + T3n = VSUB(T3h, T3e); + ST(&(x[WS(rs, 10)]), VADD(T3m, T3n), ms, &(x[0])); + ST(&(x[WS(rs, 15)]), VSUB(T3n, T3m), ms, &(x[WS(rs, 1)])); + T3i = VADD(T3e, T3h); + ST(&(x[WS(rs, 5)]), VSUB(T3i, T3l), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 20)]), VADD(T3l, T3i), ms, &(x[0])); + } + { + V Tf, T1Z, T20, T21, T29, T2a, T2b, T26, T27, T28, T22, T23, T24, T1L, T1U; + V T1Q, T1S, T1A, T1V, T1N, T1O, T2d, T2e; + Tf = VADD(Tb, Te); + T1Z = VFMA(LDK(KP1_688655851), Ty, VMUL(LDK(KP535826794), Tv)); + T20 = VFMA(LDK(KP1_541026485), TS, VMUL(LDK(KP637423989), TP)); + T21 = VSUB(T1Z, T20); + T29 = VFMA(LDK(KP851558583), T1d, VMUL(LDK(KP904827052), T1a)); + T2a = VFMA(LDK(KP1_984229402), T1x, VMUL(LDK(KP125333233), T1u)); + T2b = VADD(T29, T2a); + T26 = VFNMS(LDK(KP844327925), Tv, VMUL(LDK(KP1_071653589), Ty)); + T27 = VFNMS(LDK(KP1_274847979), TS, VMUL(LDK(KP770513242), TP)); + T28 = VADD(T26, T27); + T22 = VFNMS(LDK(KP425779291), T1a, VMUL(LDK(KP1_809654104), T1d)); + T23 = VFNMS(LDK(KP992114701), T1u, VMUL(LDK(KP250666467), T1x)); + T24 = VADD(T22, T23); + { + V T1F, T1G, T1H, T1I, T1J, T1K; + T1F = VFMA(LDK(KP1_937166322), Ty, VMUL(LDK(KP248689887), Tv)); + T1G = VFMA(LDK(KP1_071653589), TS, VMUL(LDK(KP844327925), TP)); + T1H = VADD(T1F, T1G); + T1I = VFMA(LDK(KP1_752613360), T1d, VMUL(LDK(KP481753674), T1a)); + T1J = VFMA(LDK(KP1_457937254), T1x, VMUL(LDK(KP684547105), T1u)); + T1K = VADD(T1I, T1J); + T1L = VADD(T1H, T1K); + T1U = VSUB(T1J, T1I); + T1Q = VMUL(LDK(KP559016994), VSUB(T1K, T1H)); + T1S = VSUB(T1G, T1F); + } + { + V Tz, TT, TU, T1e, T1y, T1z; + Tz = VFNMS(LDK(KP497379774), Ty, VMUL(LDK(KP968583161), Tv)); + TT = VFNMS(LDK(KP1_688655851), TS, VMUL(LDK(KP535826794), TP)); + TU = VADD(Tz, TT); + T1e = VFNMS(LDK(KP963507348), T1d, VMUL(LDK(KP876306680), T1a)); + T1y = VFNMS(LDK(KP1_369094211), T1x, VMUL(LDK(KP728968627), T1u)); + T1z = VADD(T1e, T1y); + T1A = VADD(TU, T1z); + T1V = VMUL(LDK(KP559016994), VSUB(TU, T1z)); + T1N = VSUB(TT, Tz); + T1O = VSUB(T1e, T1y); + } + { + V T1B, T1M, T25, T2c; + T1B = VADD(Tf, T1A); + T1M = VBYI(VADD(T1E, T1L)); + ST(&(x[WS(rs, 1)]), VSUB(T1B, T1M), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 24)]), VADD(T1B, T1M), ms, &(x[0])); + T25 = VADD(Tf, VADD(T21, T24)); + T2c = VBYI(VADD(T1E, VSUB(T28, T2b))); + ST(&(x[WS(rs, 21)]), VSUB(T25, T2c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VADD(T25, T2c), ms, &(x[0])); + } + T2d = VBYI(VADD(T1E, VFMA(LDK(KP309016994), T28, VFMA(LDK(KP587785252), VSUB(T23, T22), VFNMS(LDK(KP951056516), VADD(T1Z, T20), VMUL(LDK(KP809016994), T2b)))))); + T2e = VFMA(LDK(KP309016994), T21, VFMA(LDK(KP951056516), VSUB(T26, T27), VFMA(LDK(KP587785252), VSUB(T2a, T29), VFNMS(LDK(KP809016994), T24, Tf)))); + ST(&(x[WS(rs, 9)]), VADD(T2d, T2e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 16)]), VSUB(T2e, T2d), ms, &(x[0])); + { + V T1R, T1X, T1W, T1Y, T1P, T1T; + T1P = VFMS(LDK(KP250000000), T1L, T1E); + T1R = VBYI(VADD(VFMA(LDK(KP587785252), T1N, VMUL(LDK(KP951056516), T1O)), VSUB(T1P, T1Q))); + T1X = VBYI(VADD(VFNMS(LDK(KP587785252), T1O, VMUL(LDK(KP951056516), T1N)), VADD(T1P, T1Q))); + T1T = VFNMS(LDK(KP250000000), T1A, Tf); + T1W = VFMA(LDK(KP587785252), T1S, VFNMS(LDK(KP951056516), T1U, VSUB(T1T, T1V))); + T1Y = VFMA(LDK(KP951056516), T1S, VADD(T1V, VFMA(LDK(KP587785252), T1U, T1T))); + ST(&(x[WS(rs, 11)]), VADD(T1R, T1W), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VSUB(T1Y, T1X), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 14)]), VSUB(T1W, T1R), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VADD(T1X, T1Y), ms, &(x[0])); + } + } + { + V T2u, T2w, T2h, T2k, T2l, T2A, T2B, T2C, T2o, T2r, T2s, T2x, T2y, T2z, T2M; + V T2X, T2N, T2W, T2R, T31, T2U, T30, T2E, T2F; + T2u = VFNMS(LDK(KP587785252), T1C, T2t); + T2w = VSUB(Te, Tb); + T2h = VFNMS(LDK(KP125333233), T2g, VMUL(LDK(KP1_984229402), T2f)); + T2k = VFMA(LDK(KP1_457937254), T2i, VMUL(LDK(KP684547105), T2j)); + T2l = VSUB(T2h, T2k); + T2A = VFNMS(LDK(KP1_996053456), T2p, VMUL(LDK(KP062790519), T2q)); + T2B = VFMA(LDK(KP1_541026485), T2m, VMUL(LDK(KP637423989), T2n)); + T2C = VSUB(T2A, T2B); + T2o = VFNMS(LDK(KP770513242), T2n, VMUL(LDK(KP1_274847979), T2m)); + T2r = VFMA(LDK(KP125581039), T2p, VMUL(LDK(KP998026728), T2q)); + T2s = VSUB(T2o, T2r); + T2x = VFNMS(LDK(KP1_369094211), T2i, VMUL(LDK(KP728968627), T2j)); + T2y = VFMA(LDK(KP250666467), T2f, VMUL(LDK(KP992114701), T2g)); + T2z = VSUB(T2x, T2y); + { + V T2G, T2H, T2I, T2J, T2K, T2L; + T2G = VFNMS(LDK(KP481753674), T2j, VMUL(LDK(KP1_752613360), T2i)); + T2H = VFMA(LDK(KP851558583), T2f, VMUL(LDK(KP904827052), T2g)); + T2I = VSUB(T2G, T2H); + T2J = VFNMS(LDK(KP844327925), T2q, VMUL(LDK(KP1_071653589), T2p)); + T2K = VFNMS(LDK(KP998026728), T2n, VMUL(LDK(KP125581039), T2m)); + T2L = VADD(T2J, T2K); + T2M = VMUL(LDK(KP559016994), VSUB(T2I, T2L)); + T2X = VSUB(T2J, T2K); + T2N = VADD(T2I, T2L); + T2W = VADD(T2G, T2H); + } + { + V T2P, T2Q, T2Y, T2S, T2T, T2Z; + T2P = VFNMS(LDK(KP425779291), T2g, VMUL(LDK(KP1_809654104), T2f)); + T2Q = VFMA(LDK(KP963507348), T2i, VMUL(LDK(KP876306680), T2j)); + T2Y = VADD(T2Q, T2P); + T2S = VFMA(LDK(KP1_688655851), T2p, VMUL(LDK(KP535826794), T2q)); + T2T = VFMA(LDK(KP1_996053456), T2m, VMUL(LDK(KP062790519), T2n)); + T2Z = VADD(T2S, T2T); + T2R = VSUB(T2P, T2Q); + T31 = VADD(T2Y, T2Z); + T2U = VSUB(T2S, T2T); + T30 = VMUL(LDK(KP559016994), VSUB(T2Y, T2Z)); + } + { + V T36, T37, T2v, T2D; + T36 = VBYI(VADD(T2u, T2N)); + T37 = VADD(T2w, T31); + ST(&(x[WS(rs, 2)]), VADD(T36, T37), ms, &(x[0])); + ST(&(x[WS(rs, 23)]), VSUB(T37, T36), ms, &(x[WS(rs, 1)])); + T2v = VBYI(VSUB(VADD(T2l, T2s), T2u)); + T2D = VADD(T2w, VADD(T2z, T2C)); + ST(&(x[WS(rs, 3)]), VADD(T2v, T2D), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 22)]), VSUB(T2D, T2v), ms, &(x[0])); + } + T2E = VFMA(LDK(KP309016994), T2z, VFNMS(LDK(KP809016994), T2C, VFNMS(LDK(KP587785252), VADD(T2r, T2o), VFNMS(LDK(KP951056516), VADD(T2k, T2h), T2w)))); + T2F = VBYI(VSUB(VFNMS(LDK(KP587785252), VADD(T2A, T2B), VFNMS(LDK(KP809016994), T2s, VFNMS(LDK(KP951056516), VADD(T2x, T2y), VMUL(LDK(KP309016994), T2l)))), T2u)); + ST(&(x[WS(rs, 17)]), VSUB(T2E, T2F), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 8)]), VADD(T2E, T2F), ms, &(x[0])); + { + V T2V, T34, T33, T35, T2O, T32; + T2O = VFNMS(LDK(KP250000000), T2N, T2u); + T2V = VBYI(VADD(T2M, VADD(T2O, VFNMS(LDK(KP587785252), T2U, VMUL(LDK(KP951056516), T2R))))); + T34 = VBYI(VADD(T2O, VSUB(VFMA(LDK(KP587785252), T2R, VMUL(LDK(KP951056516), T2U)), T2M))); + T32 = VFNMS(LDK(KP250000000), T31, T2w); + T33 = VFMA(LDK(KP951056516), T2W, VFMA(LDK(KP587785252), T2X, VADD(T30, T32))); + T35 = VFMA(LDK(KP587785252), T2W, VSUB(VFNMS(LDK(KP951056516), T2X, T32), T30)); + ST(&(x[WS(rs, 7)]), VADD(T2V, T33), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VSUB(T35, T34), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 18)]), VSUB(T33, T2V), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VADD(T34, T35), ms, &(x[0])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 25, XSIMD_STRING("t1fv_25"), twinstr, &GENUS, {170, 110, 78, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_25) (planner *p) { + X(kdft_dit_register) (p, t1fv_25, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:14 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 3 -name t1fv_3 -include t1f.h */ + +/* + * This function contains 8 FP additions, 8 FP multiplications, + * (or, 5 additions, 5 multiplications, 3 fused multiply/add), + * 12 stack variables, 2 constants, and 6 memory accesses + */ +#include "t1f.h" + +static void t1fv_3(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(3, rs)) { + V T1, T2, T4; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V T3, T5, T8, T6, T7; + T3 = BYTWJ(&(W[0]), T2); + T5 = BYTWJ(&(W[TWVL * 2]), T4); + T8 = VMUL(LDK(KP866025403), VSUB(T5, T3)); + T6 = VADD(T3, T5); + T7 = VFNMS(LDK(KP500000000), T6, T1); + ST(&(x[0]), VADD(T1, T6), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(T8, T7), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VFNMSI(T8, T7), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 3, XSIMD_STRING("t1fv_3"), twinstr, &GENUS, {5, 5, 3, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_3) (planner *p) { + X(kdft_dit_register) (p, t1fv_3, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 3 -name t1fv_3 -include t1f.h */ + +/* + * This function contains 8 FP additions, 6 FP multiplications, + * (or, 7 additions, 5 multiplications, 1 fused multiply/add), + * 12 stack variables, 2 constants, and 6 memory accesses + */ +#include "t1f.h" + +static void t1fv_3(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(3, rs)) { + V T1, T3, T5, T6, T2, T4, T7, T8; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[0]), T2); + T4 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T5 = BYTWJ(&(W[TWVL * 2]), T4); + T6 = VADD(T3, T5); + ST(&(x[0]), VADD(T1, T6), ms, &(x[0])); + T7 = VFNMS(LDK(KP500000000), T6, T1); + T8 = VBYI(VMUL(LDK(KP866025403), VSUB(T5, T3))); + ST(&(x[WS(rs, 2)]), VSUB(T7, T8), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VADD(T7, T8), ms, &(x[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 3, XSIMD_STRING("t1fv_3"), twinstr, &GENUS, {7, 5, 1, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_3) (planner *p) { + X(kdft_dit_register) (p, t1fv_3, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,863 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:15 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name t1fv_32 -include t1f.h */ + +/* + * This function contains 217 FP additions, 160 FP multiplications, + * (or, 119 additions, 62 multiplications, 98 fused multiply/add), + * 112 stack variables, 7 constants, and 64 memory accesses + */ +#include "t1f.h" + +static void t1fv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 62)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 62), MAKE_VOLATILE_STRIDE(32, rs)) { + V T26, T25, T1Z, T22, T1W, T2a, T2k, T2g; + { + V T4, T1z, T2o, T32, T2r, T3f, Tf, T1A, T34, T2L, T1D, TC, T33, T2O, T1C; + V Tr, T2C, T3a, T2F, T3b, T1r, T21, T1k, T20, TQ, TM, TS, TL, T2t, TJ; + V T10, T2u; + { + V Tt, T9, T2p, Te, T2q, TA, Tu, Tx; + { + V T1, T1x, T2, T1v; + T1 = LD(&(x[0]), ms, &(x[0])); + T1x = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T1v = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + { + V T5, Tc, T7, Ta, T2m, T2n; + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + Ta = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + { + V T1y, T3, T1w, T6, Td, T8, Tb, Ts, Tz; + Ts = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + T1y = BYTWJ(&(W[TWVL * 46]), T1x); + T3 = BYTWJ(&(W[TWVL * 30]), T2); + T1w = BYTWJ(&(W[TWVL * 14]), T1v); + T6 = BYTWJ(&(W[TWVL * 6]), T5); + Td = BYTWJ(&(W[TWVL * 22]), Tc); + T8 = BYTWJ(&(W[TWVL * 38]), T7); + Tb = BYTWJ(&(W[TWVL * 54]), Ta); + Tt = BYTWJ(&(W[TWVL * 58]), Ts); + Tz = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T4 = VSUB(T1, T3); + T2m = VADD(T1, T3); + T1z = VSUB(T1w, T1y); + T2n = VADD(T1w, T1y); + T9 = VSUB(T6, T8); + T2p = VADD(T6, T8); + Te = VSUB(Tb, Td); + T2q = VADD(Tb, Td); + TA = BYTWJ(&(W[TWVL * 10]), Tz); + } + Tu = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T2o = VADD(T2m, T2n); + T32 = VSUB(T2m, T2n); + Tx = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + } + } + { + V Tv, To, Ty, Ti, Tj, Tm, Th; + Th = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T2r = VADD(T2p, T2q); + T3f = VSUB(T2q, T2p); + Tf = VADD(T9, Te); + T1A = VSUB(Te, T9); + Tv = BYTWJ(&(W[TWVL * 26]), Tu); + To = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + Ty = BYTWJ(&(W[TWVL * 42]), Tx); + Ti = BYTWJ(&(W[TWVL * 2]), Th); + Tj = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tm = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + { + V T1f, T1h, T1a, T1c, T18, T2A, T2B, T1p; + { + V T15, T17, T1o, T1m; + { + V Tw, T2J, Tp, T2K, TB, Tk, Tn, T1n, T14, T16; + T14 = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T16 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + Tw = VSUB(Tt, Tv); + T2J = VADD(Tt, Tv); + Tp = BYTWJ(&(W[TWVL * 50]), To); + T2K = VADD(TA, Ty); + TB = VSUB(Ty, TA); + Tk = BYTWJ(&(W[TWVL * 34]), Tj); + Tn = BYTWJ(&(W[TWVL * 18]), Tm); + T15 = BYTWJ(&(W[TWVL * 60]), T14); + T17 = BYTWJ(&(W[TWVL * 28]), T16); + T1n = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + { + V T2M, Tl, T2N, Tq, T1l; + T1l = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T34 = VSUB(T2J, T2K); + T2L = VADD(T2J, T2K); + T1D = VFMA(LDK(KP414213562), Tw, TB); + TC = VFNMS(LDK(KP414213562), TB, Tw); + T2M = VADD(Ti, Tk); + Tl = VSUB(Ti, Tk); + T2N = VADD(Tn, Tp); + Tq = VSUB(Tn, Tp); + T1o = BYTWJ(&(W[TWVL * 12]), T1n); + T1m = BYTWJ(&(W[TWVL * 44]), T1l); + { + V T1e, T1g, T19, T1b; + T1e = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + T1g = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T19 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1b = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T33 = VSUB(T2M, T2N); + T2O = VADD(T2M, T2N); + T1C = VFMA(LDK(KP414213562), Tl, Tq); + Tr = VFNMS(LDK(KP414213562), Tq, Tl); + T1f = BYTWJ(&(W[TWVL * 52]), T1e); + T1h = BYTWJ(&(W[TWVL * 20]), T1g); + T1a = BYTWJ(&(W[TWVL * 4]), T19); + T1c = BYTWJ(&(W[TWVL * 36]), T1b); + } + } + } + T18 = VSUB(T15, T17); + T2A = VADD(T15, T17); + T2B = VADD(T1o, T1m); + T1p = VSUB(T1m, T1o); + } + { + V TG, TI, TZ, TX; + { + V T1i, T2E, T1d, T2D, TH, TY, TF; + TF = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T1i = VSUB(T1f, T1h); + T2E = VADD(T1f, T1h); + T1d = VSUB(T1a, T1c); + T2D = VADD(T1a, T1c); + TH = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TY = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + T2C = VADD(T2A, T2B); + T3a = VSUB(T2A, T2B); + TG = BYTWJ(&(W[0]), TF); + { + V TW, T1j, T1q, TP, TR, TK; + TW = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T2F = VADD(T2D, T2E); + T3b = VSUB(T2E, T2D); + T1j = VADD(T1d, T1i); + T1q = VSUB(T1i, T1d); + TI = BYTWJ(&(W[TWVL * 32]), TH); + TZ = BYTWJ(&(W[TWVL * 48]), TY); + TP = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + TX = BYTWJ(&(W[TWVL * 16]), TW); + TR = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TK = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T1r = VFMA(LDK(KP707106781), T1q, T1p); + T21 = VFNMS(LDK(KP707106781), T1q, T1p); + T1k = VFMA(LDK(KP707106781), T1j, T18); + T20 = VFNMS(LDK(KP707106781), T1j, T18); + TQ = BYTWJ(&(W[TWVL * 56]), TP); + TM = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + TS = BYTWJ(&(W[TWVL * 24]), TR); + TL = BYTWJ(&(W[TWVL * 8]), TK); + } + } + T2t = VADD(TG, TI); + TJ = VSUB(TG, TI); + T10 = VSUB(TX, TZ); + T2u = VADD(TX, TZ); + } + } + } + } + { + V T2s, TT, T2x, T2P, T2Y, T2G, T37, T2v, T2w, TO, T2W, T30, T2U, TN, T2V; + T2s = VSUB(T2o, T2r); + T2U = VADD(T2o, T2r); + TN = BYTWJ(&(W[TWVL * 40]), TM); + TT = VSUB(TQ, TS); + T2x = VADD(TQ, TS); + T2P = VSUB(T2L, T2O); + T2V = VADD(T2O, T2L); + T2Y = VADD(T2C, T2F); + T2G = VSUB(T2C, T2F); + T37 = VSUB(T2t, T2u); + T2v = VADD(T2t, T2u); + T2w = VADD(TL, TN); + TO = VSUB(TL, TN); + T2W = VADD(T2U, T2V); + T30 = VSUB(T2U, T2V); + { + V T3i, T3o, T36, T3r, T3h, T3j, T12, T1Y, TV, T1X, T3s, T3d, T2Q, T2H, T31; + V T2Z; + { + V T35, T3g, T38, T2y, T11, TU; + T35 = VADD(T33, T34); + T3g = VSUB(T34, T33); + T38 = VSUB(T2w, T2x); + T2y = VADD(T2w, T2x); + T11 = VSUB(TO, TT); + TU = VADD(TO, TT); + { + V T3c, T39, T2X, T2z; + T3c = VFNMS(LDK(KP414213562), T3b, T3a); + T3i = VFMA(LDK(KP414213562), T3a, T3b); + T3o = VFNMS(LDK(KP707106781), T35, T32); + T36 = VFMA(LDK(KP707106781), T35, T32); + T3r = VFNMS(LDK(KP707106781), T3g, T3f); + T3h = VFMA(LDK(KP707106781), T3g, T3f); + T39 = VFNMS(LDK(KP414213562), T38, T37); + T3j = VFMA(LDK(KP414213562), T37, T38); + T2X = VADD(T2v, T2y); + T2z = VSUB(T2v, T2y); + T12 = VFMA(LDK(KP707106781), T11, T10); + T1Y = VFNMS(LDK(KP707106781), T11, T10); + TV = VFMA(LDK(KP707106781), TU, TJ); + T1X = VFNMS(LDK(KP707106781), TU, TJ); + T3s = VSUB(T3c, T39); + T3d = VADD(T39, T3c); + T2Q = VSUB(T2G, T2z); + T2H = VADD(T2z, T2G); + T31 = VSUB(T2Y, T2X); + T2Z = VADD(T2X, T2Y); + } + } + { + V Tg, T1U, TD, T1G, T13, T1s, T1H, T1B, T1V, T1E, T3k, T3p, T2e, T2f; + Tg = VFMA(LDK(KP707106781), Tf, T4); + T1U = VFNMS(LDK(KP707106781), Tf, T4); + T3k = VSUB(T3i, T3j); + T3p = VADD(T3j, T3i); + { + V T3v, T3t, T3e, T3m; + T3v = VFNMS(LDK(KP923879532), T3s, T3r); + T3t = VFMA(LDK(KP923879532), T3s, T3r); + T3e = VFNMS(LDK(KP923879532), T3d, T36); + T3m = VFMA(LDK(KP923879532), T3d, T36); + { + V T2R, T2T, T2I, T2S; + T2R = VFNMS(LDK(KP707106781), T2Q, T2P); + T2T = VFMA(LDK(KP707106781), T2Q, T2P); + T2I = VFNMS(LDK(KP707106781), T2H, T2s); + T2S = VFMA(LDK(KP707106781), T2H, T2s); + ST(&(x[WS(rs, 24)]), VFNMSI(T31, T30), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFMAI(T31, T30), ms, &(x[0])); + ST(&(x[0]), VADD(T2W, T2Z), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VSUB(T2W, T2Z), ms, &(x[0])); + { + V T3u, T3q, T3l, T3n; + T3u = VFMA(LDK(KP923879532), T3p, T3o); + T3q = VFNMS(LDK(KP923879532), T3p, T3o); + T3l = VFNMS(LDK(KP923879532), T3k, T3h); + T3n = VFMA(LDK(KP923879532), T3k, T3h); + ST(&(x[WS(rs, 4)]), VFMAI(T2T, T2S), ms, &(x[0])); + ST(&(x[WS(rs, 28)]), VFNMSI(T2T, T2S), ms, &(x[0])); + ST(&(x[WS(rs, 20)]), VFMAI(T2R, T2I), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFNMSI(T2R, T2I), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VFNMSI(T3t, T3q), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFMAI(T3t, T3q), ms, &(x[0])); + ST(&(x[WS(rs, 26)]), VFMAI(T3v, T3u), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(T3v, T3u), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(T3n, T3m), ms, &(x[0])); + ST(&(x[WS(rs, 30)]), VFNMSI(T3n, T3m), ms, &(x[0])); + ST(&(x[WS(rs, 18)]), VFMAI(T3l, T3e), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFNMSI(T3l, T3e), ms, &(x[0])); + T26 = VSUB(TC, Tr); + TD = VADD(Tr, TC); + } + } + } + T1G = VFMA(LDK(KP198912367), TV, T12); + T13 = VFNMS(LDK(KP198912367), T12, TV); + T1s = VFNMS(LDK(KP198912367), T1r, T1k); + T1H = VFMA(LDK(KP198912367), T1k, T1r); + T1B = VFNMS(LDK(KP707106781), T1A, T1z); + T25 = VFMA(LDK(KP707106781), T1A, T1z); + T1V = VADD(T1C, T1D); + T1E = VSUB(T1C, T1D); + { + V T1S, T1O, T1K, T1u, T1R, T1T, T1L, T1J; + { + V TE, T1M, T1I, T1N, T1t, T1Q, T1F, T1P, T28, T29; + TE = VFMA(LDK(KP923879532), TD, Tg); + T1M = VFNMS(LDK(KP923879532), TD, Tg); + T1I = VSUB(T1G, T1H); + T1N = VADD(T1G, T1H); + T1t = VADD(T13, T1s); + T1Q = VSUB(T1s, T13); + T1F = VFMA(LDK(KP923879532), T1E, T1B); + T1P = VFNMS(LDK(KP923879532), T1E, T1B); + T28 = VFNMS(LDK(KP668178637), T1X, T1Y); + T1Z = VFMA(LDK(KP668178637), T1Y, T1X); + T1S = VFMA(LDK(KP980785280), T1N, T1M); + T1O = VFNMS(LDK(KP980785280), T1N, T1M); + T22 = VFMA(LDK(KP668178637), T21, T20); + T29 = VFNMS(LDK(KP668178637), T20, T21); + T1K = VFMA(LDK(KP980785280), T1t, TE); + T1u = VFNMS(LDK(KP980785280), T1t, TE); + T1R = VFNMS(LDK(KP980785280), T1Q, T1P); + T1T = VFMA(LDK(KP980785280), T1Q, T1P); + T1L = VFMA(LDK(KP980785280), T1I, T1F); + T1J = VFNMS(LDK(KP980785280), T1I, T1F); + T2e = VFNMS(LDK(KP923879532), T1V, T1U); + T1W = VFMA(LDK(KP923879532), T1V, T1U); + T2a = VSUB(T28, T29); + T2f = VADD(T28, T29); + } + ST(&(x[WS(rs, 23)]), VFMAI(T1R, T1O), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFNMSI(T1R, T1O), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 25)]), VFNMSI(T1T, T1S), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(T1T, T1S), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 31)]), VFMAI(T1L, T1K), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(T1L, T1K), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFMAI(T1J, T1u), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 17)]), VFNMSI(T1J, T1u), ms, &(x[WS(rs, 1)])); + } + T2k = VFNMS(LDK(KP831469612), T2f, T2e); + T2g = VFMA(LDK(KP831469612), T2f, T2e); + } + } + } + } + { + V T2i, T23, T2h, T27; + T2i = VSUB(T22, T1Z); + T23 = VADD(T1Z, T22); + T2h = VFNMS(LDK(KP923879532), T26, T25); + T27 = VFMA(LDK(KP923879532), T26, T25); + { + V T2c, T24, T2j, T2l, T2d, T2b; + T2c = VFMA(LDK(KP831469612), T23, T1W); + T24 = VFNMS(LDK(KP831469612), T23, T1W); + T2j = VFMA(LDK(KP831469612), T2i, T2h); + T2l = VFNMS(LDK(KP831469612), T2i, T2h); + T2d = VFMA(LDK(KP831469612), T2a, T27); + T2b = VFNMS(LDK(KP831469612), T2a, T27); + ST(&(x[WS(rs, 21)]), VFNMSI(T2j, T2g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFMAI(T2j, T2g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 27)]), VFMAI(T2l, T2k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFNMSI(T2l, T2k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFMAI(T2d, T2c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 29)]), VFNMSI(T2d, T2c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VFMAI(T2b, T24), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFNMSI(T2b, T24), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 32, XSIMD_STRING("t1fv_32"), twinstr, &GENUS, {119, 62, 98, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_32) (planner *p) { + X(kdft_dit_register) (p, t1fv_32, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name t1fv_32 -include t1f.h */ + +/* + * This function contains 217 FP additions, 104 FP multiplications, + * (or, 201 additions, 88 multiplications, 16 fused multiply/add), + * 59 stack variables, 7 constants, and 64 memory accesses + */ +#include "t1f.h" + +static void t1fv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 62)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 62), MAKE_VOLATILE_STRIDE(32, rs)) { + V T4, T1A, T2o, T32, Tf, T1v, T2r, T3f, TC, T1C, T2L, T34, Tr, T1D, T2O; + V T33, T1k, T20, T2F, T3b, T1r, T21, T2C, T3a, TV, T1X, T2y, T38, T12, T1Y; + V T2v, T37; + { + V T1, T1z, T3, T1x, T1y, T2, T1w, T2m, T2n; + T1 = LD(&(x[0]), ms, &(x[0])); + T1y = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T1z = BYTWJ(&(W[TWVL * 46]), T1y); + T2 = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T3 = BYTWJ(&(W[TWVL * 30]), T2); + T1w = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T1x = BYTWJ(&(W[TWVL * 14]), T1w); + T4 = VSUB(T1, T3); + T1A = VSUB(T1x, T1z); + T2m = VADD(T1, T3); + T2n = VADD(T1x, T1z); + T2o = VADD(T2m, T2n); + T32 = VSUB(T2m, T2n); + } + { + V T6, Td, T8, Tb; + { + V T5, Tc, T7, Ta; + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T6 = BYTWJ(&(W[TWVL * 6]), T5); + Tc = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Td = BYTWJ(&(W[TWVL * 22]), Tc); + T7 = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + T8 = BYTWJ(&(W[TWVL * 38]), T7); + Ta = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + Tb = BYTWJ(&(W[TWVL * 54]), Ta); + } + { + V T9, Te, T2p, T2q; + T9 = VSUB(T6, T8); + Te = VSUB(Tb, Td); + Tf = VMUL(LDK(KP707106781), VADD(T9, Te)); + T1v = VMUL(LDK(KP707106781), VSUB(Te, T9)); + T2p = VADD(T6, T8); + T2q = VADD(Tb, Td); + T2r = VADD(T2p, T2q); + T3f = VSUB(T2q, T2p); + } + } + { + V Tt, TA, Tv, Ty; + { + V Ts, Tz, Tu, Tx; + Ts = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + Tt = BYTWJ(&(W[TWVL * 58]), Ts); + Tz = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + TA = BYTWJ(&(W[TWVL * 42]), Tz); + Tu = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + Tv = BYTWJ(&(W[TWVL * 26]), Tu); + Tx = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Ty = BYTWJ(&(W[TWVL * 10]), Tx); + } + { + V Tw, TB, T2J, T2K; + Tw = VSUB(Tt, Tv); + TB = VSUB(Ty, TA); + TC = VFMA(LDK(KP923879532), Tw, VMUL(LDK(KP382683432), TB)); + T1C = VFNMS(LDK(KP923879532), TB, VMUL(LDK(KP382683432), Tw)); + T2J = VADD(Tt, Tv); + T2K = VADD(Ty, TA); + T2L = VADD(T2J, T2K); + T34 = VSUB(T2J, T2K); + } + } + { + V Ti, Tp, Tk, Tn; + { + V Th, To, Tj, Tm; + Th = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Ti = BYTWJ(&(W[TWVL * 2]), Th); + To = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + Tp = BYTWJ(&(W[TWVL * 50]), To); + Tj = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tk = BYTWJ(&(W[TWVL * 34]), Tj); + Tm = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + Tn = BYTWJ(&(W[TWVL * 18]), Tm); + } + { + V Tl, Tq, T2M, T2N; + Tl = VSUB(Ti, Tk); + Tq = VSUB(Tn, Tp); + Tr = VFNMS(LDK(KP382683432), Tq, VMUL(LDK(KP923879532), Tl)); + T1D = VFMA(LDK(KP382683432), Tl, VMUL(LDK(KP923879532), Tq)); + T2M = VADD(Ti, Tk); + T2N = VADD(Tn, Tp); + T2O = VADD(T2M, T2N); + T33 = VSUB(T2M, T2N); + } + } + { + V T15, T17, T1p, T1n, T1f, T1h, T1i, T1a, T1c, T1d; + { + V T14, T16, T1o, T1m; + T14 = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T15 = BYTWJ(&(W[TWVL * 60]), T14); + T16 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T17 = BYTWJ(&(W[TWVL * 28]), T16); + T1o = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T1p = BYTWJ(&(W[TWVL * 44]), T1o); + T1m = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T1n = BYTWJ(&(W[TWVL * 12]), T1m); + { + V T1e, T1g, T19, T1b; + T1e = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + T1f = BYTWJ(&(W[TWVL * 52]), T1e); + T1g = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T1h = BYTWJ(&(W[TWVL * 20]), T1g); + T1i = VSUB(T1f, T1h); + T19 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1a = BYTWJ(&(W[TWVL * 4]), T19); + T1b = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T1c = BYTWJ(&(W[TWVL * 36]), T1b); + T1d = VSUB(T1a, T1c); + } + } + { + V T18, T1j, T2D, T2E; + T18 = VSUB(T15, T17); + T1j = VMUL(LDK(KP707106781), VADD(T1d, T1i)); + T1k = VADD(T18, T1j); + T20 = VSUB(T18, T1j); + T2D = VADD(T1a, T1c); + T2E = VADD(T1f, T1h); + T2F = VADD(T2D, T2E); + T3b = VSUB(T2E, T2D); + } + { + V T1l, T1q, T2A, T2B; + T1l = VMUL(LDK(KP707106781), VSUB(T1i, T1d)); + T1q = VSUB(T1n, T1p); + T1r = VSUB(T1l, T1q); + T21 = VADD(T1q, T1l); + T2A = VADD(T15, T17); + T2B = VADD(T1n, T1p); + T2C = VADD(T2A, T2B); + T3a = VSUB(T2A, T2B); + } + } + { + V TG, TI, T10, TY, TQ, TS, TT, TL, TN, TO; + { + V TF, TH, TZ, TX; + TF = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TG = BYTWJ(&(W[0]), TF); + TH = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TI = BYTWJ(&(W[TWVL * 32]), TH); + TZ = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + T10 = BYTWJ(&(W[TWVL * 48]), TZ); + TX = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + TY = BYTWJ(&(W[TWVL * 16]), TX); + { + V TP, TR, TK, TM; + TP = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + TQ = BYTWJ(&(W[TWVL * 56]), TP); + TR = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TS = BYTWJ(&(W[TWVL * 24]), TR); + TT = VSUB(TQ, TS); + TK = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + TL = BYTWJ(&(W[TWVL * 8]), TK); + TM = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + TN = BYTWJ(&(W[TWVL * 40]), TM); + TO = VSUB(TL, TN); + } + } + { + V TJ, TU, T2w, T2x; + TJ = VSUB(TG, TI); + TU = VMUL(LDK(KP707106781), VADD(TO, TT)); + TV = VADD(TJ, TU); + T1X = VSUB(TJ, TU); + T2w = VADD(TL, TN); + T2x = VADD(TQ, TS); + T2y = VADD(T2w, T2x); + T38 = VSUB(T2x, T2w); + } + { + V TW, T11, T2t, T2u; + TW = VMUL(LDK(KP707106781), VSUB(TT, TO)); + T11 = VSUB(TY, T10); + T12 = VSUB(TW, T11); + T1Y = VADD(T11, TW); + T2t = VADD(TG, TI); + T2u = VADD(TY, T10); + T2v = VADD(T2t, T2u); + T37 = VSUB(T2t, T2u); + } + } + { + V T2W, T30, T2Z, T31; + { + V T2U, T2V, T2X, T2Y; + T2U = VADD(T2o, T2r); + T2V = VADD(T2O, T2L); + T2W = VADD(T2U, T2V); + T30 = VSUB(T2U, T2V); + T2X = VADD(T2v, T2y); + T2Y = VADD(T2C, T2F); + T2Z = VADD(T2X, T2Y); + T31 = VBYI(VSUB(T2Y, T2X)); + } + ST(&(x[WS(rs, 16)]), VSUB(T2W, T2Z), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VADD(T30, T31), ms, &(x[0])); + ST(&(x[0]), VADD(T2W, T2Z), ms, &(x[0])); + ST(&(x[WS(rs, 24)]), VSUB(T30, T31), ms, &(x[0])); + } + { + V T2s, T2P, T2H, T2Q, T2z, T2G; + T2s = VSUB(T2o, T2r); + T2P = VSUB(T2L, T2O); + T2z = VSUB(T2v, T2y); + T2G = VSUB(T2C, T2F); + T2H = VMUL(LDK(KP707106781), VADD(T2z, T2G)); + T2Q = VMUL(LDK(KP707106781), VSUB(T2G, T2z)); + { + V T2I, T2R, T2S, T2T; + T2I = VADD(T2s, T2H); + T2R = VBYI(VADD(T2P, T2Q)); + ST(&(x[WS(rs, 28)]), VSUB(T2I, T2R), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(T2I, T2R), ms, &(x[0])); + T2S = VSUB(T2s, T2H); + T2T = VBYI(VSUB(T2Q, T2P)); + ST(&(x[WS(rs, 20)]), VSUB(T2S, T2T), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VADD(T2S, T2T), ms, &(x[0])); + } + } + { + V T36, T3r, T3h, T3p, T3d, T3o, T3k, T3s, T35, T3g; + T35 = VMUL(LDK(KP707106781), VADD(T33, T34)); + T36 = VADD(T32, T35); + T3r = VSUB(T32, T35); + T3g = VMUL(LDK(KP707106781), VSUB(T34, T33)); + T3h = VADD(T3f, T3g); + T3p = VSUB(T3g, T3f); + { + V T39, T3c, T3i, T3j; + T39 = VFMA(LDK(KP923879532), T37, VMUL(LDK(KP382683432), T38)); + T3c = VFNMS(LDK(KP382683432), T3b, VMUL(LDK(KP923879532), T3a)); + T3d = VADD(T39, T3c); + T3o = VSUB(T3c, T39); + T3i = VFNMS(LDK(KP382683432), T37, VMUL(LDK(KP923879532), T38)); + T3j = VFMA(LDK(KP382683432), T3a, VMUL(LDK(KP923879532), T3b)); + T3k = VADD(T3i, T3j); + T3s = VSUB(T3j, T3i); + } + { + V T3e, T3l, T3u, T3v; + T3e = VADD(T36, T3d); + T3l = VBYI(VADD(T3h, T3k)); + ST(&(x[WS(rs, 30)]), VSUB(T3e, T3l), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T3e, T3l), ms, &(x[0])); + T3u = VBYI(VADD(T3p, T3o)); + T3v = VADD(T3r, T3s); + ST(&(x[WS(rs, 6)]), VADD(T3u, T3v), ms, &(x[0])); + ST(&(x[WS(rs, 26)]), VSUB(T3v, T3u), ms, &(x[0])); + } + { + V T3m, T3n, T3q, T3t; + T3m = VSUB(T36, T3d); + T3n = VBYI(VSUB(T3k, T3h)); + ST(&(x[WS(rs, 18)]), VSUB(T3m, T3n), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VADD(T3m, T3n), ms, &(x[0])); + T3q = VBYI(VSUB(T3o, T3p)); + T3t = VSUB(T3r, T3s); + ST(&(x[WS(rs, 10)]), VADD(T3q, T3t), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VSUB(T3t, T3q), ms, &(x[0])); + } + } + { + V TE, T1P, T1I, T1Q, T1t, T1M, T1F, T1N; + { + V Tg, TD, T1G, T1H; + Tg = VADD(T4, Tf); + TD = VADD(Tr, TC); + TE = VADD(Tg, TD); + T1P = VSUB(Tg, TD); + T1G = VFNMS(LDK(KP195090322), TV, VMUL(LDK(KP980785280), T12)); + T1H = VFMA(LDK(KP195090322), T1k, VMUL(LDK(KP980785280), T1r)); + T1I = VADD(T1G, T1H); + T1Q = VSUB(T1H, T1G); + } + { + V T13, T1s, T1B, T1E; + T13 = VFMA(LDK(KP980785280), TV, VMUL(LDK(KP195090322), T12)); + T1s = VFNMS(LDK(KP195090322), T1r, VMUL(LDK(KP980785280), T1k)); + T1t = VADD(T13, T1s); + T1M = VSUB(T1s, T13); + T1B = VSUB(T1v, T1A); + T1E = VSUB(T1C, T1D); + T1F = VADD(T1B, T1E); + T1N = VSUB(T1E, T1B); + } + { + V T1u, T1J, T1S, T1T; + T1u = VADD(TE, T1t); + T1J = VBYI(VADD(T1F, T1I)); + ST(&(x[WS(rs, 31)]), VSUB(T1u, T1J), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T1u, T1J), ms, &(x[WS(rs, 1)])); + T1S = VBYI(VADD(T1N, T1M)); + T1T = VADD(T1P, T1Q); + ST(&(x[WS(rs, 7)]), VADD(T1S, T1T), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 25)]), VSUB(T1T, T1S), ms, &(x[WS(rs, 1)])); + } + { + V T1K, T1L, T1O, T1R; + T1K = VSUB(TE, T1t); + T1L = VBYI(VSUB(T1I, T1F)); + ST(&(x[WS(rs, 17)]), VSUB(T1K, T1L), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VADD(T1K, T1L), ms, &(x[WS(rs, 1)])); + T1O = VBYI(VSUB(T1M, T1N)); + T1R = VSUB(T1P, T1Q); + ST(&(x[WS(rs, 9)]), VADD(T1O, T1R), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 23)]), VSUB(T1R, T1O), ms, &(x[WS(rs, 1)])); + } + } + { + V T1W, T2h, T2a, T2i, T23, T2e, T27, T2f; + { + V T1U, T1V, T28, T29; + T1U = VSUB(T4, Tf); + T1V = VADD(T1D, T1C); + T1W = VADD(T1U, T1V); + T2h = VSUB(T1U, T1V); + T28 = VFNMS(LDK(KP555570233), T1X, VMUL(LDK(KP831469612), T1Y)); + T29 = VFMA(LDK(KP555570233), T20, VMUL(LDK(KP831469612), T21)); + T2a = VADD(T28, T29); + T2i = VSUB(T29, T28); + } + { + V T1Z, T22, T25, T26; + T1Z = VFMA(LDK(KP831469612), T1X, VMUL(LDK(KP555570233), T1Y)); + T22 = VFNMS(LDK(KP555570233), T21, VMUL(LDK(KP831469612), T20)); + T23 = VADD(T1Z, T22); + T2e = VSUB(T22, T1Z); + T25 = VADD(T1A, T1v); + T26 = VSUB(TC, Tr); + T27 = VADD(T25, T26); + T2f = VSUB(T26, T25); + } + { + V T24, T2b, T2k, T2l; + T24 = VADD(T1W, T23); + T2b = VBYI(VADD(T27, T2a)); + ST(&(x[WS(rs, 29)]), VSUB(T24, T2b), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(T24, T2b), ms, &(x[WS(rs, 1)])); + T2k = VBYI(VADD(T2f, T2e)); + T2l = VADD(T2h, T2i); + ST(&(x[WS(rs, 5)]), VADD(T2k, T2l), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 27)]), VSUB(T2l, T2k), ms, &(x[WS(rs, 1)])); + } + { + V T2c, T2d, T2g, T2j; + T2c = VSUB(T1W, T23); + T2d = VBYI(VSUB(T2a, T27)); + ST(&(x[WS(rs, 19)]), VSUB(T2c, T2d), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VADD(T2c, T2d), ms, &(x[WS(rs, 1)])); + T2g = VBYI(VSUB(T2e, T2f)); + T2j = VSUB(T2h, T2i); + ST(&(x[WS(rs, 11)]), VADD(T2g, T2j), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 21)]), VSUB(T2j, T2g), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 32, XSIMD_STRING("t1fv_32"), twinstr, &GENUS, {201, 88, 16, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_32) (planner *p) { + X(kdft_dit_register) (p, t1fv_32, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:14 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t1fv_4 -include t1f.h */ + +/* + * This function contains 11 FP additions, 8 FP multiplications, + * (or, 9 additions, 6 multiplications, 2 fused multiply/add), + * 13 stack variables, 0 constants, and 8 memory accesses + */ +#include "t1f.h" + +static void t1fv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(4, rs)) { + V T1, T7, T2, T5, T8, T3, T6; + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T8 = BYTWJ(&(W[TWVL * 4]), T7); + T3 = BYTWJ(&(W[TWVL * 2]), T2); + T6 = BYTWJ(&(W[0]), T5); + { + V Ta, T4, Tb, T9; + Ta = VADD(T1, T3); + T4 = VSUB(T1, T3); + Tb = VADD(T6, T8); + T9 = VSUB(T6, T8); + ST(&(x[0]), VADD(Ta, Tb), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VSUB(Ta, Tb), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(T9, T4), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(T9, T4), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t1fv_4"), twinstr, &GENUS, {9, 6, 2, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_4) (planner *p) { + X(kdft_dit_register) (p, t1fv_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t1fv_4 -include t1f.h */ + +/* + * This function contains 11 FP additions, 6 FP multiplications, + * (or, 11 additions, 6 multiplications, 0 fused multiply/add), + * 13 stack variables, 0 constants, and 8 memory accesses + */ +#include "t1f.h" + +static void t1fv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(4, rs)) { + V T1, T8, T3, T6, T7, T2, T5; + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T8 = BYTWJ(&(W[TWVL * 4]), T7); + T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T3 = BYTWJ(&(W[TWVL * 2]), T2); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T6 = BYTWJ(&(W[0]), T5); + { + V T4, T9, Ta, Tb; + T4 = VSUB(T1, T3); + T9 = VBYI(VSUB(T6, T8)); + ST(&(x[WS(rs, 1)]), VSUB(T4, T9), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(T4, T9), ms, &(x[WS(rs, 1)])); + Ta = VADD(T1, T3); + Tb = VADD(T6, T8); + ST(&(x[WS(rs, 2)]), VSUB(Ta, Tb), ms, &(x[0])); + ST(&(x[0]), VADD(Ta, Tb), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t1fv_4"), twinstr, &GENUS, {11, 6, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_4) (planner *p) { + X(kdft_dit_register) (p, t1fv_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:14 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name t1fv_5 -include t1f.h */ + +/* + * This function contains 20 FP additions, 19 FP multiplications, + * (or, 11 additions, 10 multiplications, 9 fused multiply/add), + * 26 stack variables, 4 constants, and 10 memory accesses + */ +#include "t1f.h" + +static void t1fv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(5, rs)) { + V T1, T2, T9, T4, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T9 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V T3, Ta, T5, T8; + T3 = BYTWJ(&(W[0]), T2); + Ta = BYTWJ(&(W[TWVL * 4]), T9); + T5 = BYTWJ(&(W[TWVL * 6]), T4); + T8 = BYTWJ(&(W[TWVL * 2]), T7); + { + V T6, Tg, Tb, Th; + T6 = VADD(T3, T5); + Tg = VSUB(T3, T5); + Tb = VADD(T8, Ta); + Th = VSUB(T8, Ta); + { + V Te, Tc, Tk, Ti, Td, Tj, Tf; + Te = VSUB(T6, Tb); + Tc = VADD(T6, Tb); + Tk = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tg, Th)); + Ti = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Th, Tg)); + Td = VFNMS(LDK(KP250000000), Tc, T1); + ST(&(x[0]), VADD(T1, Tc), ms, &(x[0])); + Tj = VFNMS(LDK(KP559016994), Te, Td); + Tf = VFMA(LDK(KP559016994), Te, Td); + ST(&(x[WS(rs, 2)]), VFMAI(Tk, Tj), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFNMSI(Tk, Tj), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFMAI(Ti, Tf), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFNMSI(Ti, Tf), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("t1fv_5"), twinstr, &GENUS, {11, 10, 9, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_5) (planner *p) { + X(kdft_dit_register) (p, t1fv_5, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name t1fv_5 -include t1f.h */ + +/* + * This function contains 20 FP additions, 14 FP multiplications, + * (or, 17 additions, 11 multiplications, 3 fused multiply/add), + * 20 stack variables, 4 constants, and 10 memory accesses + */ +#include "t1f.h" + +static void t1fv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(5, rs)) { + V Tc, Tg, Th, T5, Ta, Td; + Tc = LD(&(x[0]), ms, &(x[0])); + { + V T2, T9, T4, T7; + { + V T1, T8, T3, T6; + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = BYTWJ(&(W[0]), T1); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = BYTWJ(&(W[TWVL * 4]), T8); + T3 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T4 = BYTWJ(&(W[TWVL * 6]), T3); + T6 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T7 = BYTWJ(&(W[TWVL * 2]), T6); + } + Tg = VSUB(T2, T4); + Th = VSUB(T7, T9); + T5 = VADD(T2, T4); + Ta = VADD(T7, T9); + Td = VADD(T5, Ta); + } + ST(&(x[0]), VADD(Tc, Td), ms, &(x[0])); + { + V Ti, Tj, Tf, Tk, Tb, Te; + Ti = VBYI(VFMA(LDK(KP951056516), Tg, VMUL(LDK(KP587785252), Th))); + Tj = VBYI(VFNMS(LDK(KP587785252), Tg, VMUL(LDK(KP951056516), Th))); + Tb = VMUL(LDK(KP559016994), VSUB(T5, Ta)); + Te = VFNMS(LDK(KP250000000), Td, Tc); + Tf = VADD(Tb, Te); + Tk = VSUB(Te, Tb); + ST(&(x[WS(rs, 1)]), VSUB(Tf, Ti), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VSUB(Tk, Tj), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VADD(Ti, Tf), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(Tj, Tk), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("t1fv_5"), twinstr, &GENUS, {17, 11, 3, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_5) (planner *p) { + X(kdft_dit_register) (p, t1fv_5, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:14 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 6 -name t1fv_6 -include t1f.h */ + +/* + * This function contains 23 FP additions, 18 FP multiplications, + * (or, 17 additions, 12 multiplications, 6 fused multiply/add), + * 27 stack variables, 2 constants, and 12 memory accesses + */ +#include "t1f.h" + +static void t1fv_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 10)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(6, rs)) { + V T1, T2, Ta, Tc, T5, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V T3, Tb, Td, T6, T8; + T3 = BYTWJ(&(W[TWVL * 4]), T2); + Tb = BYTWJ(&(W[TWVL * 6]), Ta); + Td = BYTWJ(&(W[0]), Tc); + T6 = BYTWJ(&(W[TWVL * 2]), T5); + T8 = BYTWJ(&(W[TWVL * 8]), T7); + { + V Ti, T4, Tk, Te, Tj, T9; + Ti = VADD(T1, T3); + T4 = VSUB(T1, T3); + Tk = VADD(Tb, Td); + Te = VSUB(Tb, Td); + Tj = VADD(T6, T8); + T9 = VSUB(T6, T8); + { + V Tl, Tn, Tf, Th, Tm, Tg; + Tl = VADD(Tj, Tk); + Tn = VMUL(LDK(KP866025403), VSUB(Tk, Tj)); + Tf = VADD(T9, Te); + Th = VMUL(LDK(KP866025403), VSUB(Te, T9)); + ST(&(x[0]), VADD(Ti, Tl), ms, &(x[0])); + Tm = VFNMS(LDK(KP500000000), Tl, Ti); + ST(&(x[WS(rs, 3)]), VADD(T4, Tf), ms, &(x[WS(rs, 1)])); + Tg = VFNMS(LDK(KP500000000), Tf, T4); + ST(&(x[WS(rs, 2)]), VFNMSI(Tn, Tm), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(Tn, Tm), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFNMSI(Th, Tg), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(Th, Tg), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 6, XSIMD_STRING("t1fv_6"), twinstr, &GENUS, {17, 12, 6, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_6) (planner *p) { + X(kdft_dit_register) (p, t1fv_6, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 6 -name t1fv_6 -include t1f.h */ + +/* + * This function contains 23 FP additions, 14 FP multiplications, + * (or, 21 additions, 12 multiplications, 2 fused multiply/add), + * 19 stack variables, 2 constants, and 12 memory accesses + */ +#include "t1f.h" + +static void t1fv_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 10)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(6, rs)) { + V T4, Ti, Te, Tk, T9, Tj, T1, T3, T2; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[TWVL * 4]), T2); + T4 = VSUB(T1, T3); + Ti = VADD(T1, T3); + { + V Tb, Td, Ta, Tc; + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tb = BYTWJ(&(W[TWVL * 6]), Ta); + Tc = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Td = BYTWJ(&(W[0]), Tc); + Te = VSUB(Tb, Td); + Tk = VADD(Tb, Td); + } + { + V T6, T8, T5, T7; + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T6 = BYTWJ(&(W[TWVL * 2]), T5); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T8 = BYTWJ(&(W[TWVL * 8]), T7); + T9 = VSUB(T6, T8); + Tj = VADD(T6, T8); + } + { + V Th, Tf, Tg, Tn, Tl, Tm; + Th = VBYI(VMUL(LDK(KP866025403), VSUB(Te, T9))); + Tf = VADD(T9, Te); + Tg = VFNMS(LDK(KP500000000), Tf, T4); + ST(&(x[WS(rs, 3)]), VADD(T4, Tf), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(Tg, Th), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VSUB(Tg, Th), ms, &(x[WS(rs, 1)])); + Tn = VBYI(VMUL(LDK(KP866025403), VSUB(Tk, Tj))); + Tl = VADD(Tj, Tk); + Tm = VFNMS(LDK(KP500000000), Tl, Ti); + ST(&(x[0]), VADD(Ti, Tl), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(Tm, Tn), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VSUB(Tm, Tn), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 6, XSIMD_STRING("t1fv_6"), twinstr, &GENUS, {21, 12, 2, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_6) (planner *p) { + X(kdft_dit_register) (p, t1fv_6, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1877 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:16 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name t1fv_64 -include t1f.h */ + +/* + * This function contains 519 FP additions, 384 FP multiplications, + * (or, 261 additions, 126 multiplications, 258 fused multiply/add), + * 187 stack variables, 15 constants, and 128 memory accesses + */ +#include "t1f.h" + +static void t1fv_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP820678790, +0.820678790828660330972281985331011598767386482); + DVK(KP098491403, +0.098491403357164253077197521291327432293052451); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP303346683, +0.303346683607342391675883946941299872384187453); + DVK(KP534511135, +0.534511135950791641089685961295362908582039528); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 126)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 126), MAKE_VOLATILE_STRIDE(64, rs)) { + V T6L, T6M, T6O, T6P, T75, T6V, T5A, T6A, T72, T6K, T6t, T6D, T6w, T6B, T6h; + V T6E; + { + V Ta, T3U, T3V, T37, T7a, T58, T7B, T6l, T1v, T24, T5Q, T7o, T5F, T7l, T43; + V T4F, T2i, T2R, T6b, T7v, T60, T7s, T4a, T4I, T5u, T7h, T5x, T7g, T1i, T3a; + V T4j, T4C, T7e, T5l, T7d, T5o, T3b, TV, T4B, T4m, T3X, T3Y, T6o, T7b, T5f; + V T7C, Tx, T38, T2p, T61, T2n, T65, T2D, T7p, T5M, T7m, T5T, T4G, T46, T25; + V T1S, T2q, T2u, T2w; + { + V T5q, T10, T5v, T15, T1b, T5s, T1c, T1e; + { + V T1V, T1p, T5B, T5O, T1u, T1X, T20, T21; + { + V T1, T2, T7, T5, T32, T34, T2X, T2Z; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 32)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 48)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T32 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T34 = LD(&(x[WS(rs, 40)]), ms, &(x[0])); + T2X = LD(&(x[WS(rs, 56)]), ms, &(x[0])); + T2Z = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + { + V T1m, T54, T6j, T36, T55, T31, T56, T1n, T1q, T1s, T4, T9; + { + V T3, T8, T6, T33, T35, T2Y, T30, T1l; + T1l = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[TWVL * 62]), T2); + T8 = BYTWJ(&(W[TWVL * 94]), T7); + T6 = BYTWJ(&(W[TWVL * 30]), T5); + T33 = BYTWJ(&(W[TWVL * 14]), T32); + T35 = BYTWJ(&(W[TWVL * 78]), T34); + T2Y = BYTWJ(&(W[TWVL * 110]), T2X); + T30 = BYTWJ(&(W[TWVL * 46]), T2Z); + T1m = BYTWJ(&(W[0]), T1l); + T54 = VSUB(T1, T3); + T4 = VADD(T1, T3); + T6j = VSUB(T6, T8); + T9 = VADD(T6, T8); + T36 = VADD(T33, T35); + T55 = VSUB(T33, T35); + T31 = VADD(T2Y, T30); + T56 = VSUB(T2Y, T30); + T1n = LD(&(x[WS(rs, 33)]), ms, &(x[WS(rs, 1)])); + } + T1q = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T1s = LD(&(x[WS(rs, 49)]), ms, &(x[WS(rs, 1)])); + Ta = VSUB(T4, T9); + T3U = VADD(T4, T9); + { + V T57, T6k, T1o, T1r, T1t, T1W, T1U, T1Z; + T1U = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T3V = VADD(T36, T31); + T37 = VSUB(T31, T36); + T57 = VADD(T55, T56); + T6k = VSUB(T56, T55); + T1o = BYTWJ(&(W[TWVL * 64]), T1n); + T1r = BYTWJ(&(W[TWVL * 32]), T1q); + T1t = BYTWJ(&(W[TWVL * 96]), T1s); + T1V = BYTWJ(&(W[TWVL * 16]), T1U); + T1W = LD(&(x[WS(rs, 41)]), ms, &(x[WS(rs, 1)])); + T1Z = LD(&(x[WS(rs, 57)]), ms, &(x[WS(rs, 1)])); + T7a = VFNMS(LDK(KP707106781), T57, T54); + T58 = VFMA(LDK(KP707106781), T57, T54); + T7B = VFMA(LDK(KP707106781), T6k, T6j); + T6l = VFNMS(LDK(KP707106781), T6k, T6j); + T1p = VADD(T1m, T1o); + T5B = VSUB(T1m, T1o); + T5O = VSUB(T1r, T1t); + T1u = VADD(T1r, T1t); + T1X = BYTWJ(&(W[TWVL * 80]), T1W); + T20 = BYTWJ(&(W[TWVL * 112]), T1Z); + T21 = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + } + } + } + { + V T5W, T2N, T69, T2L, T5Y, T2P, T48, T2c, T2h; + { + V T41, T1Y, T5C, T22, T2d, T29, T2b, T2f, T28, T2a, T2H, T2J; + T28 = LD(&(x[WS(rs, 63)]), ms, &(x[WS(rs, 1)])); + T2a = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T1v = VSUB(T1p, T1u); + T41 = VADD(T1p, T1u); + T1Y = VADD(T1V, T1X); + T5C = VSUB(T1V, T1X); + T22 = BYTWJ(&(W[TWVL * 48]), T21); + T2d = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T29 = BYTWJ(&(W[TWVL * 124]), T28); + T2b = BYTWJ(&(W[TWVL * 60]), T2a); + T2f = LD(&(x[WS(rs, 47)]), ms, &(x[WS(rs, 1)])); + T2H = LD(&(x[WS(rs, 55)]), ms, &(x[WS(rs, 1)])); + T2J = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + { + V T23, T5D, T2e, T2g, T2I, T2K, T2M; + T2M = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T23 = VADD(T20, T22); + T5D = VSUB(T20, T22); + T2e = BYTWJ(&(W[TWVL * 28]), T2d); + T2c = VADD(T29, T2b); + T5W = VSUB(T29, T2b); + T2g = BYTWJ(&(W[TWVL * 92]), T2f); + T2I = BYTWJ(&(W[TWVL * 108]), T2H); + T2K = BYTWJ(&(W[TWVL * 44]), T2J); + T2N = BYTWJ(&(W[TWVL * 12]), T2M); + { + V T5E, T5P, T42, T2O; + T5E = VADD(T5C, T5D); + T5P = VSUB(T5C, T5D); + T24 = VSUB(T1Y, T23); + T42 = VADD(T1Y, T23); + T69 = VSUB(T2g, T2e); + T2h = VADD(T2e, T2g); + T2O = LD(&(x[WS(rs, 39)]), ms, &(x[WS(rs, 1)])); + T2L = VADD(T2I, T2K); + T5Y = VSUB(T2I, T2K); + T5Q = VFMA(LDK(KP707106781), T5P, T5O); + T7o = VFNMS(LDK(KP707106781), T5P, T5O); + T5F = VFMA(LDK(KP707106781), T5E, T5B); + T7l = VFNMS(LDK(KP707106781), T5E, T5B); + T43 = VADD(T41, T42); + T4F = VSUB(T41, T42); + T2P = BYTWJ(&(W[TWVL * 76]), T2O); + } + } + } + T2i = VSUB(T2c, T2h); + T48 = VADD(T2c, T2h); + { + V TW, TY, T11, T2Q, T5X, T13; + TW = LD(&(x[WS(rs, 62)]), ms, &(x[0])); + TY = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + T11 = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T2Q = VADD(T2N, T2P); + T5X = VSUB(T2N, T2P); + T13 = LD(&(x[WS(rs, 46)]), ms, &(x[0])); + { + V T12, T5Z, T6a, T49, T14, T18, T1a; + { + V T17, T19, TX, TZ; + T17 = LD(&(x[WS(rs, 54)]), ms, &(x[0])); + T19 = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + TX = BYTWJ(&(W[TWVL * 122]), TW); + TZ = BYTWJ(&(W[TWVL * 58]), TY); + T12 = BYTWJ(&(W[TWVL * 26]), T11); + T5Z = VADD(T5X, T5Y); + T6a = VSUB(T5Y, T5X); + T2R = VSUB(T2L, T2Q); + T49 = VADD(T2Q, T2L); + T14 = BYTWJ(&(W[TWVL * 90]), T13); + T18 = BYTWJ(&(W[TWVL * 106]), T17); + T5q = VSUB(TX, TZ); + T10 = VADD(TX, TZ); + T1a = BYTWJ(&(W[TWVL * 42]), T19); + } + T6b = VFMA(LDK(KP707106781), T6a, T69); + T7v = VFNMS(LDK(KP707106781), T6a, T69); + T60 = VFMA(LDK(KP707106781), T5Z, T5W); + T7s = VFNMS(LDK(KP707106781), T5Z, T5W); + T4a = VADD(T48, T49); + T4I = VSUB(T48, T49); + T5v = VSUB(T14, T12); + T15 = VADD(T12, T14); + T1b = VADD(T18, T1a); + T5s = VSUB(T18, T1a); + } + T1c = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T1e = LD(&(x[WS(rs, 38)]), ms, &(x[0])); + } + } + } + { + V Th, T59, Tf, Tv, T5d, Tj, Tm, To; + { + V T5h, TQ, T5m, T5i, TO, TS, TJ, T4k, TD, TI; + { + V T4h, T16, TB, T1d, T1f, TE, TG, TA, Tz, TK, TM, TC; + Tz = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T4h = VADD(T10, T15); + T16 = VSUB(T10, T15); + TB = LD(&(x[WS(rs, 34)]), ms, &(x[0])); + T1d = BYTWJ(&(W[TWVL * 10]), T1c); + T1f = BYTWJ(&(W[TWVL * 74]), T1e); + TE = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + TG = LD(&(x[WS(rs, 50)]), ms, &(x[0])); + TA = BYTWJ(&(W[TWVL * 2]), Tz); + TK = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TM = LD(&(x[WS(rs, 42)]), ms, &(x[0])); + TC = BYTWJ(&(W[TWVL * 66]), TB); + { + V T1g, T5r, TF, TH, TL, TN, TP; + TP = LD(&(x[WS(rs, 58)]), ms, &(x[0])); + T1g = VADD(T1d, T1f); + T5r = VSUB(T1d, T1f); + TF = BYTWJ(&(W[TWVL * 34]), TE); + TH = BYTWJ(&(W[TWVL * 98]), TG); + TL = BYTWJ(&(W[TWVL * 18]), TK); + TN = BYTWJ(&(W[TWVL * 82]), TM); + T5h = VSUB(TA, TC); + TD = VADD(TA, TC); + TQ = BYTWJ(&(W[TWVL * 114]), TP); + { + V T5w, T5t, T4i, T1h, TR; + T5w = VSUB(T5s, T5r); + T5t = VADD(T5r, T5s); + T4i = VADD(T1g, T1b); + T1h = VSUB(T1b, T1g); + T5m = VSUB(TF, TH); + TI = VADD(TF, TH); + T5i = VSUB(TL, TN); + TO = VADD(TL, TN); + TR = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + T5u = VFMA(LDK(KP707106781), T5t, T5q); + T7h = VFNMS(LDK(KP707106781), T5t, T5q); + T5x = VFMA(LDK(KP707106781), T5w, T5v); + T7g = VFNMS(LDK(KP707106781), T5w, T5v); + T1i = VFNMS(LDK(KP414213562), T1h, T16); + T3a = VFMA(LDK(KP414213562), T16, T1h); + T4j = VADD(T4h, T4i); + T4C = VSUB(T4h, T4i); + TS = BYTWJ(&(W[TWVL * 50]), TR); + } + } + } + TJ = VSUB(TD, TI); + T4k = VADD(TD, TI); + { + V Tb, Td, Tr, T5j, TT, Tt, Tg; + Tb = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Td = LD(&(x[WS(rs, 36)]), ms, &(x[0])); + Tr = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + T5j = VSUB(TQ, TS); + TT = VADD(TQ, TS); + Tt = LD(&(x[WS(rs, 44)]), ms, &(x[0])); + Tg = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + { + V Ti, Tc, Te, Ts; + Ti = LD(&(x[WS(rs, 52)]), ms, &(x[0])); + Tc = BYTWJ(&(W[TWVL * 6]), Tb); + Te = BYTWJ(&(W[TWVL * 70]), Td); + Ts = BYTWJ(&(W[TWVL * 22]), Tr); + { + V T5k, T5n, TU, T4l, Tu; + T5k = VADD(T5i, T5j); + T5n = VSUB(T5i, T5j); + TU = VSUB(TO, TT); + T4l = VADD(TO, TT); + Tu = BYTWJ(&(W[TWVL * 86]), Tt); + Th = BYTWJ(&(W[TWVL * 38]), Tg); + T59 = VSUB(Tc, Te); + Tf = VADD(Tc, Te); + T7e = VFNMS(LDK(KP707106781), T5k, T5h); + T5l = VFMA(LDK(KP707106781), T5k, T5h); + T7d = VFNMS(LDK(KP707106781), T5n, T5m); + T5o = VFMA(LDK(KP707106781), T5n, T5m); + T3b = VFMA(LDK(KP414213562), TJ, TU); + TV = VFNMS(LDK(KP414213562), TU, TJ); + T4B = VSUB(T4k, T4l); + T4m = VADD(T4k, T4l); + Tv = VADD(Ts, Tu); + T5d = VSUB(Tu, Ts); + Tj = BYTWJ(&(W[TWVL * 102]), Ti); + } + } + Tm = LD(&(x[WS(rs, 60)]), ms, &(x[0])); + To = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + } + } + { + V T5b, T6m, Tl, T1A, T5G, T1Q, T5K, T1C, T1D, T5e, T6n, Tw, T1H, T1J; + { + V T1w, T1y, T1M, T1O, Tq, T5c, T1B; + T1w = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T1y = LD(&(x[WS(rs, 37)]), ms, &(x[WS(rs, 1)])); + T1M = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T1O = LD(&(x[WS(rs, 45)]), ms, &(x[WS(rs, 1)])); + T1B = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + { + V Tk, T5a, Tn, Tp; + Tk = VADD(Th, Tj); + T5a = VSUB(Th, Tj); + Tn = BYTWJ(&(W[TWVL * 118]), Tm); + Tp = BYTWJ(&(W[TWVL * 54]), To); + { + V T1x, T1z, T1N, T1P; + T1x = BYTWJ(&(W[TWVL * 8]), T1w); + T1z = BYTWJ(&(W[TWVL * 72]), T1y); + T1N = BYTWJ(&(W[TWVL * 24]), T1M); + T1P = BYTWJ(&(W[TWVL * 88]), T1O); + T5b = VFNMS(LDK(KP414213562), T5a, T59); + T6m = VFMA(LDK(KP414213562), T59, T5a); + T3X = VADD(Tf, Tk); + Tl = VSUB(Tf, Tk); + Tq = VADD(Tn, Tp); + T5c = VSUB(Tn, Tp); + T1A = VADD(T1x, T1z); + T5G = VSUB(T1x, T1z); + T1Q = VADD(T1N, T1P); + T5K = VSUB(T1N, T1P); + T1C = BYTWJ(&(W[TWVL * 40]), T1B); + } + } + T1D = LD(&(x[WS(rs, 53)]), ms, &(x[WS(rs, 1)])); + T5e = VFNMS(LDK(KP414213562), T5d, T5c); + T6n = VFMA(LDK(KP414213562), T5c, T5d); + T3Y = VADD(Tq, Tv); + Tw = VSUB(Tq, Tv); + T1H = LD(&(x[WS(rs, 61)]), ms, &(x[WS(rs, 1)])); + T1J = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + } + { + V T1I, T1K, T1F, T5H, T2k, T2l, T2z, T2B, T2j, T1E; + T2j = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1E = BYTWJ(&(W[TWVL * 104]), T1D); + T6o = VSUB(T6m, T6n); + T7b = VADD(T6m, T6n); + T5f = VADD(T5b, T5e); + T7C = VSUB(T5e, T5b); + Tx = VADD(Tl, Tw); + T38 = VSUB(Tw, Tl); + T1I = BYTWJ(&(W[TWVL * 120]), T1H); + T1K = BYTWJ(&(W[TWVL * 56]), T1J); + T1F = VADD(T1C, T1E); + T5H = VSUB(T1C, T1E); + T2k = BYTWJ(&(W[TWVL * 4]), T2j); + T2l = LD(&(x[WS(rs, 35)]), ms, &(x[WS(rs, 1)])); + T2z = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T2B = LD(&(x[WS(rs, 43)]), ms, &(x[WS(rs, 1)])); + { + V T5I, T5R, T44, T1G, T2m, T2A, T2C, T5S, T5L, T1R, T45, T2o, T5J, T1L; + T2o = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T5J = VSUB(T1I, T1K); + T1L = VADD(T1I, T1K); + T5I = VFNMS(LDK(KP414213562), T5H, T5G); + T5R = VFMA(LDK(KP414213562), T5G, T5H); + T44 = VADD(T1A, T1F); + T1G = VSUB(T1A, T1F); + T2m = BYTWJ(&(W[TWVL * 68]), T2l); + T2A = BYTWJ(&(W[TWVL * 20]), T2z); + T2C = BYTWJ(&(W[TWVL * 84]), T2B); + T5S = VFNMS(LDK(KP414213562), T5J, T5K); + T5L = VFMA(LDK(KP414213562), T5K, T5J); + T1R = VSUB(T1L, T1Q); + T45 = VADD(T1L, T1Q); + T2p = BYTWJ(&(W[TWVL * 36]), T2o); + T61 = VSUB(T2k, T2m); + T2n = VADD(T2k, T2m); + T65 = VSUB(T2C, T2A); + T2D = VADD(T2A, T2C); + T7p = VSUB(T5I, T5L); + T5M = VADD(T5I, T5L); + T7m = VSUB(T5R, T5S); + T5T = VADD(T5R, T5S); + T4G = VSUB(T44, T45); + T46 = VADD(T44, T45); + T25 = VSUB(T1G, T1R); + T1S = VADD(T1G, T1R); + T2q = LD(&(x[WS(rs, 51)]), ms, &(x[WS(rs, 1)])); + } + T2u = LD(&(x[WS(rs, 59)]), ms, &(x[WS(rs, 1)])); + T2w = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + } + } + } + } + { + V T67, T7w, T6e, T7t, T3s, T3E, T39, T3D, T1k, T3k, T3t, T3c, T1T, T3v, T3w; + V T26, T2G, T3y, T3z, T2T; + { + V T4A, T4N, T47, T4v, T2r, T2v, T2x, T4s, T40, T3W, T3Z; + T4A = VSUB(T3U, T3V); + T3W = VADD(T3U, T3V); + T3Z = VADD(T3X, T3Y); + T4N = VSUB(T3Y, T3X); + T47 = VSUB(T43, T46); + T4v = VADD(T43, T46); + T2r = BYTWJ(&(W[TWVL * 100]), T2q); + T2v = BYTWJ(&(W[TWVL * 116]), T2u); + T2x = BYTWJ(&(W[TWVL * 52]), T2w); + T4s = VADD(T3W, T3Z); + T40 = VSUB(T3W, T3Z); + { + V T4O, T4n, T4R, T4H, T4E, T4W, T4u, T4y, T4d, T4J, T2F, T2S; + { + V T6c, T63, T2t, T4b, T6d, T66, T2E, T4c; + { + V T4D, T62, T2s, T64, T2y, T4t; + T4O = VSUB(T4C, T4B); + T4D = VADD(T4B, T4C); + T62 = VSUB(T2r, T2p); + T2s = VADD(T2p, T2r); + T64 = VSUB(T2v, T2x); + T2y = VADD(T2v, T2x); + T4t = VADD(T4m, T4j); + T4n = VSUB(T4j, T4m); + T4R = VFMA(LDK(KP414213562), T4F, T4G); + T4H = VFNMS(LDK(KP414213562), T4G, T4F); + T4E = VFMA(LDK(KP707106781), T4D, T4A); + T4W = VFNMS(LDK(KP707106781), T4D, T4A); + T6c = VFNMS(LDK(KP414213562), T61, T62); + T63 = VFMA(LDK(KP414213562), T62, T61); + T2t = VSUB(T2n, T2s); + T4b = VADD(T2n, T2s); + T6d = VFMA(LDK(KP414213562), T64, T65); + T66 = VFNMS(LDK(KP414213562), T65, T64); + T2E = VSUB(T2y, T2D); + T4c = VADD(T2y, T2D); + T4u = VADD(T4s, T4t); + T4y = VSUB(T4s, T4t); + } + T67 = VADD(T63, T66); + T7w = VSUB(T66, T63); + T6e = VADD(T6c, T6d); + T7t = VSUB(T6d, T6c); + T4d = VADD(T4b, T4c); + T4J = VSUB(T4c, T4b); + T2F = VADD(T2t, T2E); + T2S = VSUB(T2E, T2t); + } + { + V Ty, T1j, T4Q, T4K; + Ty = VFMA(LDK(KP707106781), Tx, Ta); + T3s = VFNMS(LDK(KP707106781), Tx, Ta); + T3E = VSUB(T1i, TV); + T1j = VADD(TV, T1i); + T39 = VFMA(LDK(KP707106781), T38, T37); + T3D = VFNMS(LDK(KP707106781), T38, T37); + T4Q = VFMA(LDK(KP414213562), T4I, T4J); + T4K = VFNMS(LDK(KP414213562), T4J, T4I); + { + V T4w, T4e, T4P, T4Z; + T4w = VADD(T4a, T4d); + T4e = VSUB(T4a, T4d); + T4P = VFMA(LDK(KP707106781), T4O, T4N); + T4Z = VFNMS(LDK(KP707106781), T4O, T4N); + T1k = VFMA(LDK(KP923879532), T1j, Ty); + T3k = VFNMS(LDK(KP923879532), T1j, Ty); + { + V T4L, T50, T4S, T4X; + T4L = VADD(T4H, T4K); + T50 = VSUB(T4K, T4H); + T4S = VSUB(T4Q, T4R); + T4X = VADD(T4R, T4Q); + { + V T4f, T4o, T4x, T4z; + T4f = VADD(T47, T4e); + T4o = VSUB(T4e, T47); + T4x = VADD(T4v, T4w); + T4z = VSUB(T4w, T4v); + { + V T53, T51, T4M, T4U; + T53 = VFNMS(LDK(KP923879532), T50, T4Z); + T51 = VFMA(LDK(KP923879532), T50, T4Z); + T4M = VFNMS(LDK(KP923879532), T4L, T4E); + T4U = VFMA(LDK(KP923879532), T4L, T4E); + { + V T52, T4Y, T4T, T4V; + T52 = VFMA(LDK(KP923879532), T4X, T4W); + T4Y = VFNMS(LDK(KP923879532), T4X, T4W); + T4T = VFNMS(LDK(KP923879532), T4S, T4P); + T4V = VFMA(LDK(KP923879532), T4S, T4P); + { + V T4p, T4r, T4g, T4q; + T4p = VFNMS(LDK(KP707106781), T4o, T4n); + T4r = VFMA(LDK(KP707106781), T4o, T4n); + T4g = VFNMS(LDK(KP707106781), T4f, T40); + T4q = VFMA(LDK(KP707106781), T4f, T40); + ST(&(x[WS(rs, 16)]), VFMAI(T4z, T4y), ms, &(x[0])); + ST(&(x[WS(rs, 48)]), VFNMSI(T4z, T4y), ms, &(x[0])); + ST(&(x[0]), VADD(T4u, T4x), ms, &(x[0])); + ST(&(x[WS(rs, 32)]), VSUB(T4u, T4x), ms, &(x[0])); + ST(&(x[WS(rs, 44)]), VFNMSI(T51, T4Y), ms, &(x[0])); + ST(&(x[WS(rs, 20)]), VFMAI(T51, T4Y), ms, &(x[0])); + ST(&(x[WS(rs, 52)]), VFMAI(T53, T52), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFNMSI(T53, T52), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(T4V, T4U), ms, &(x[0])); + ST(&(x[WS(rs, 60)]), VFNMSI(T4V, T4U), ms, &(x[0])); + ST(&(x[WS(rs, 36)]), VFMAI(T4T, T4M), ms, &(x[0])); + ST(&(x[WS(rs, 28)]), VFNMSI(T4T, T4M), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFMAI(T4r, T4q), ms, &(x[0])); + ST(&(x[WS(rs, 56)]), VFNMSI(T4r, T4q), ms, &(x[0])); + ST(&(x[WS(rs, 40)]), VFMAI(T4p, T4g), ms, &(x[0])); + ST(&(x[WS(rs, 24)]), VFNMSI(T4p, T4g), ms, &(x[0])); + T3t = VADD(T3b, T3a); + T3c = VSUB(T3a, T3b); + } + } + } + } + } + } + T1T = VFMA(LDK(KP707106781), T1S, T1v); + T3v = VFNMS(LDK(KP707106781), T1S, T1v); + T3w = VFNMS(LDK(KP707106781), T25, T24); + T26 = VFMA(LDK(KP707106781), T25, T24); + T2G = VFMA(LDK(KP707106781), T2F, T2i); + T3y = VFNMS(LDK(KP707106781), T2F, T2i); + T3z = VFNMS(LDK(KP707106781), T2S, T2R); + T2T = VFMA(LDK(KP707106781), T2S, T2R); + } + } + } + { + V T3u, T3M, T3F, T3P, T3x, T3H, T3q, T3m, T3h, T3j, T3r, T3p, T2W, T3i; + { + V T3d, T3n, T27, T3f, T2U, T3e; + T3d = VFMA(LDK(KP923879532), T3c, T39); + T3n = VFNMS(LDK(KP923879532), T3c, T39); + T27 = VFNMS(LDK(KP198912367), T26, T1T); + T3f = VFMA(LDK(KP198912367), T1T, T26); + T2U = VFNMS(LDK(KP198912367), T2T, T2G); + T3e = VFMA(LDK(KP198912367), T2G, T2T); + T3u = VFMA(LDK(KP923879532), T3t, T3s); + T3M = VFNMS(LDK(KP923879532), T3t, T3s); + { + V T3g, T3l, T2V, T3o; + T3g = VSUB(T3e, T3f); + T3l = VADD(T3f, T3e); + T2V = VADD(T27, T2U); + T3o = VSUB(T2U, T27); + T3F = VFNMS(LDK(KP923879532), T3E, T3D); + T3P = VFMA(LDK(KP923879532), T3E, T3D); + T3x = VFMA(LDK(KP668178637), T3w, T3v); + T3H = VFNMS(LDK(KP668178637), T3v, T3w); + T3q = VFMA(LDK(KP980785280), T3l, T3k); + T3m = VFNMS(LDK(KP980785280), T3l, T3k); + T3h = VFNMS(LDK(KP980785280), T3g, T3d); + T3j = VFMA(LDK(KP980785280), T3g, T3d); + T3r = VFNMS(LDK(KP980785280), T3o, T3n); + T3p = VFMA(LDK(KP980785280), T3o, T3n); + T2W = VFNMS(LDK(KP980785280), T2V, T1k); + T3i = VFMA(LDK(KP980785280), T2V, T1k); + } + } + { + V T7n, T7Z, T8j, T89, T7k, T7O, T8g, T7Y, T7H, T7R, T80, T7q, T7u, T82, T83; + V T7x; + { + V T7c, T7W, T7D, T87, T7f, T7F, T3A, T3G, T7E, T7i; + T7c = VFNMS(LDK(KP923879532), T7b, T7a); + T7W = VFMA(LDK(KP923879532), T7b, T7a); + T7D = VFNMS(LDK(KP923879532), T7C, T7B); + T87 = VFMA(LDK(KP923879532), T7C, T7B); + T7f = VFNMS(LDK(KP668178637), T7e, T7d); + T7F = VFMA(LDK(KP668178637), T7d, T7e); + ST(&(x[WS(rs, 46)]), VFNMSI(T3p, T3m), ms, &(x[0])); + ST(&(x[WS(rs, 18)]), VFMAI(T3p, T3m), ms, &(x[0])); + ST(&(x[WS(rs, 50)]), VFMAI(T3r, T3q), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFNMSI(T3r, T3q), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(T3j, T3i), ms, &(x[0])); + ST(&(x[WS(rs, 62)]), VFNMSI(T3j, T3i), ms, &(x[0])); + ST(&(x[WS(rs, 34)]), VFMAI(T3h, T2W), ms, &(x[0])); + ST(&(x[WS(rs, 30)]), VFNMSI(T3h, T2W), ms, &(x[0])); + T3A = VFMA(LDK(KP668178637), T3z, T3y); + T3G = VFNMS(LDK(KP668178637), T3y, T3z); + T7E = VFMA(LDK(KP668178637), T7g, T7h); + T7i = VFNMS(LDK(KP668178637), T7h, T7g); + T7n = VFNMS(LDK(KP923879532), T7m, T7l); + T7Z = VFMA(LDK(KP923879532), T7m, T7l); + { + V T3I, T3N, T3B, T3Q; + T3I = VSUB(T3G, T3H); + T3N = VADD(T3H, T3G); + T3B = VADD(T3x, T3A); + T3Q = VSUB(T3A, T3x); + { + V T7j, T88, T7G, T7X; + T7j = VADD(T7f, T7i); + T88 = VSUB(T7f, T7i); + T7G = VSUB(T7E, T7F); + T7X = VADD(T7F, T7E); + { + V T3S, T3O, T3J, T3L; + T3S = VFNMS(LDK(KP831469612), T3N, T3M); + T3O = VFMA(LDK(KP831469612), T3N, T3M); + T3J = VFNMS(LDK(KP831469612), T3I, T3F); + T3L = VFMA(LDK(KP831469612), T3I, T3F); + { + V T3T, T3R, T3C, T3K; + T3T = VFMA(LDK(KP831469612), T3Q, T3P); + T3R = VFNMS(LDK(KP831469612), T3Q, T3P); + T3C = VFNMS(LDK(KP831469612), T3B, T3u); + T3K = VFMA(LDK(KP831469612), T3B, T3u); + T8j = VFNMS(LDK(KP831469612), T88, T87); + T89 = VFMA(LDK(KP831469612), T88, T87); + T7k = VFNMS(LDK(KP831469612), T7j, T7c); + T7O = VFMA(LDK(KP831469612), T7j, T7c); + T8g = VFNMS(LDK(KP831469612), T7X, T7W); + T7Y = VFMA(LDK(KP831469612), T7X, T7W); + T7H = VFNMS(LDK(KP831469612), T7G, T7D); + T7R = VFMA(LDK(KP831469612), T7G, T7D); + ST(&(x[WS(rs, 42)]), VFMAI(T3R, T3O), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VFNMSI(T3R, T3O), ms, &(x[0])); + ST(&(x[WS(rs, 54)]), VFNMSI(T3T, T3S), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFMAI(T3T, T3S), ms, &(x[0])); + ST(&(x[WS(rs, 58)]), VFMAI(T3L, T3K), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(T3L, T3K), ms, &(x[0])); + ST(&(x[WS(rs, 26)]), VFMAI(T3J, T3C), ms, &(x[0])); + ST(&(x[WS(rs, 38)]), VFNMSI(T3J, T3C), ms, &(x[0])); + T80 = VFNMS(LDK(KP923879532), T7p, T7o); + T7q = VFMA(LDK(KP923879532), T7p, T7o); + } + } + } + } + T7u = VFNMS(LDK(KP923879532), T7t, T7s); + T82 = VFMA(LDK(KP923879532), T7t, T7s); + T83 = VFNMS(LDK(KP923879532), T7w, T7v); + T7x = VFMA(LDK(KP923879532), T7w, T7v); + } + { + V T5g, T6I, T6p, T6T, T5p, T6q, T6r, T5y; + T5g = VFMA(LDK(KP923879532), T5f, T58); + T6I = VFNMS(LDK(KP923879532), T5f, T58); + { + V T7r, T7I, T7y, T7J; + T7r = VFNMS(LDK(KP534511135), T7q, T7n); + T7I = VFMA(LDK(KP534511135), T7n, T7q); + T7y = VFNMS(LDK(KP534511135), T7x, T7u); + T7J = VFMA(LDK(KP534511135), T7u, T7x); + { + V T81, T8a, T84, T8b; + T81 = VFMA(LDK(KP303346683), T80, T7Z); + T8a = VFNMS(LDK(KP303346683), T7Z, T80); + T84 = VFMA(LDK(KP303346683), T83, T82); + T8b = VFNMS(LDK(KP303346683), T82, T83); + T6p = VFMA(LDK(KP923879532), T6o, T6l); + T6T = VFNMS(LDK(KP923879532), T6o, T6l); + T5p = VFNMS(LDK(KP198912367), T5o, T5l); + T6q = VFMA(LDK(KP198912367), T5l, T5o); + { + V T7K, T7P, T7z, T7S; + T7K = VSUB(T7I, T7J); + T7P = VADD(T7I, T7J); + T7z = VADD(T7r, T7y); + T7S = VSUB(T7y, T7r); + { + V T8c, T8h, T85, T8k; + T8c = VSUB(T8a, T8b); + T8h = VADD(T8a, T8b); + T85 = VADD(T81, T84); + T8k = VSUB(T84, T81); + { + V T7Q, T7U, T7L, T7N; + T7Q = VFNMS(LDK(KP881921264), T7P, T7O); + T7U = VFMA(LDK(KP881921264), T7P, T7O); + T7L = VFNMS(LDK(KP881921264), T7K, T7H); + T7N = VFMA(LDK(KP881921264), T7K, T7H); + { + V T7T, T7V, T7A, T7M; + T7T = VFNMS(LDK(KP881921264), T7S, T7R); + T7V = VFMA(LDK(KP881921264), T7S, T7R); + T7A = VFNMS(LDK(KP881921264), T7z, T7k); + T7M = VFMA(LDK(KP881921264), T7z, T7k); + { + V T8i, T8m, T8d, T8f; + T8i = VFMA(LDK(KP956940335), T8h, T8g); + T8m = VFNMS(LDK(KP956940335), T8h, T8g); + T8d = VFNMS(LDK(KP956940335), T8c, T89); + T8f = VFMA(LDK(KP956940335), T8c, T89); + { + V T8l, T8n, T86, T8e; + T8l = VFMA(LDK(KP956940335), T8k, T8j); + T8n = VFNMS(LDK(KP956940335), T8k, T8j); + T86 = VFNMS(LDK(KP956940335), T85, T7Y); + T8e = VFMA(LDK(KP956940335), T85, T7Y); + ST(&(x[WS(rs, 53)]), VFNMSI(T7V, T7U), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFMAI(T7V, T7U), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 43)]), VFMAI(T7T, T7Q), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 21)]), VFNMSI(T7T, T7Q), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 59)]), VFMAI(T7N, T7M), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFNMSI(T7N, T7M), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 27)]), VFMAI(T7L, T7A), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 37)]), VFNMSI(T7L, T7A), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 51)]), VFMAI(T8n, T8m), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFNMSI(T8n, T8m), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 45)]), VFNMSI(T8l, T8i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VFMAI(T8l, T8i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFMAI(T8f, T8e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 61)]), VFNMSI(T8f, T8e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 35)]), VFMAI(T8d, T86), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 29)]), VFNMSI(T8d, T86), ms, &(x[WS(rs, 1)])); + T6r = VFMA(LDK(KP198912367), T5u, T5x); + T5y = VFNMS(LDK(KP198912367), T5x, T5u); + } + } + } + } + } + } + } + } + { + V T5N, T5U, T68, T5z, T6U, T6f; + T5N = VFMA(LDK(KP923879532), T5M, T5F); + T6L = VFNMS(LDK(KP923879532), T5M, T5F); + T6M = VFNMS(LDK(KP923879532), T5T, T5Q); + T5U = VFMA(LDK(KP923879532), T5T, T5Q); + T68 = VFMA(LDK(KP923879532), T67, T60); + T6O = VFNMS(LDK(KP923879532), T67, T60); + T5z = VADD(T5p, T5y); + T6U = VSUB(T5y, T5p); + T6P = VFNMS(LDK(KP923879532), T6e, T6b); + T6f = VFMA(LDK(KP923879532), T6e, T6b); + { + V T5V, T6u, T6g, T6v, T6s, T6J; + T6s = VSUB(T6q, T6r); + T6J = VADD(T6q, T6r); + T5V = VFNMS(LDK(KP098491403), T5U, T5N); + T6u = VFMA(LDK(KP098491403), T5N, T5U); + T75 = VFNMS(LDK(KP980785280), T6U, T6T); + T6V = VFMA(LDK(KP980785280), T6U, T6T); + T5A = VFMA(LDK(KP980785280), T5z, T5g); + T6A = VFNMS(LDK(KP980785280), T5z, T5g); + T6g = VFNMS(LDK(KP098491403), T6f, T68); + T6v = VFMA(LDK(KP098491403), T68, T6f); + T72 = VFNMS(LDK(KP980785280), T6J, T6I); + T6K = VFMA(LDK(KP980785280), T6J, T6I); + T6t = VFMA(LDK(KP980785280), T6s, T6p); + T6D = VFNMS(LDK(KP980785280), T6s, T6p); + T6w = VSUB(T6u, T6v); + T6B = VADD(T6u, T6v); + T6h = VADD(T5V, T6g); + T6E = VSUB(T6g, T5V); + } + } + } + } + } + } + } + { + V T6W, T6N, T6G, T6C, T6z, T6x, T6H, T6F, T6y, T6i, T6X, T6Q; + T6W = VFNMS(LDK(KP820678790), T6L, T6M); + T6N = VFMA(LDK(KP820678790), T6M, T6L); + T6G = VFMA(LDK(KP995184726), T6B, T6A); + T6C = VFNMS(LDK(KP995184726), T6B, T6A); + T6z = VFMA(LDK(KP995184726), T6w, T6t); + T6x = VFNMS(LDK(KP995184726), T6w, T6t); + T6H = VFMA(LDK(KP995184726), T6E, T6D); + T6F = VFNMS(LDK(KP995184726), T6E, T6D); + T6y = VFMA(LDK(KP995184726), T6h, T5A); + T6i = VFNMS(LDK(KP995184726), T6h, T5A); + T6X = VFNMS(LDK(KP820678790), T6O, T6P); + T6Q = VFMA(LDK(KP820678790), T6P, T6O); + { + V T73, T6Y, T76, T6R; + ST(&(x[WS(rs, 49)]), VFNMSI(T6H, T6G), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFMAI(T6H, T6G), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 47)]), VFMAI(T6F, T6C), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 17)]), VFNMSI(T6F, T6C), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 63)]), VFMAI(T6z, T6y), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(T6z, T6y), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 31)]), VFMAI(T6x, T6i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 33)]), VFNMSI(T6x, T6i), ms, &(x[WS(rs, 1)])); + T73 = VADD(T6W, T6X); + T6Y = VSUB(T6W, T6X); + T76 = VSUB(T6Q, T6N); + T6R = VADD(T6N, T6Q); + { + V T78, T74, T71, T6Z, T79, T77, T70, T6S; + T78 = VFNMS(LDK(KP773010453), T73, T72); + T74 = VFMA(LDK(KP773010453), T73, T72); + T71 = VFMA(LDK(KP773010453), T6Y, T6V); + T6Z = VFNMS(LDK(KP773010453), T6Y, T6V); + T79 = VFNMS(LDK(KP773010453), T76, T75); + T77 = VFMA(LDK(KP773010453), T76, T75); + T70 = VFMA(LDK(KP773010453), T6R, T6K); + T6S = VFNMS(LDK(KP773010453), T6R, T6K); + ST(&(x[WS(rs, 55)]), VFMAI(T79, T78), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFNMSI(T79, T78), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 41)]), VFNMSI(T77, T74), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 23)]), VFMAI(T77, T74), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(T71, T70), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 57)]), VFNMSI(T71, T70), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 39)]), VFMAI(T6Z, T6S), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 25)]), VFNMSI(T6Z, T6S), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + VTW(0, 32), + VTW(0, 33), + VTW(0, 34), + VTW(0, 35), + VTW(0, 36), + VTW(0, 37), + VTW(0, 38), + VTW(0, 39), + VTW(0, 40), + VTW(0, 41), + VTW(0, 42), + VTW(0, 43), + VTW(0, 44), + VTW(0, 45), + VTW(0, 46), + VTW(0, 47), + VTW(0, 48), + VTW(0, 49), + VTW(0, 50), + VTW(0, 51), + VTW(0, 52), + VTW(0, 53), + VTW(0, 54), + VTW(0, 55), + VTW(0, 56), + VTW(0, 57), + VTW(0, 58), + VTW(0, 59), + VTW(0, 60), + VTW(0, 61), + VTW(0, 62), + VTW(0, 63), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 64, XSIMD_STRING("t1fv_64"), twinstr, &GENUS, {261, 126, 258, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_64) (planner *p) { + X(kdft_dit_register) (p, t1fv_64, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name t1fv_64 -include t1f.h */ + +/* + * This function contains 519 FP additions, 250 FP multiplications, + * (or, 467 additions, 198 multiplications, 52 fused multiply/add), + * 107 stack variables, 15 constants, and 128 memory accesses + */ +#include "t1f.h" + +static void t1fv_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP098017140, +0.098017140329560601994195563888641845861136673); + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP634393284, +0.634393284163645498215171613225493370675687095); + DVK(KP471396736, +0.471396736825997648556387625905254377657460319); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP290284677, +0.290284677254462367636192375817395274691476278); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 126)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 126), MAKE_VOLATILE_STRIDE(64, rs)) { + V Tg, T4a, T6r, T7f, T3o, T4B, T5q, T7e, T5R, T62, T28, T4o, T2g, T4l, T7n; + V T7Z, T68, T6j, T2C, T4s, T3a, T4v, T7u, T82, T7E, T7F, T7V, T5F, T6u, T1k; + V T4e, T1r, T4d, T7B, T7C, T7W, T5M, T6v, TV, T4g, T12, T4h, T7h, T7i, TD; + V T4C, T3h, T4b, T5x, T6s, T1R, T4m, T7q, T80, T2j, T4p, T5Y, T63, T2Z, T4w; + V T7x, T83, T33, T4t, T6f, T6k; + { + V T1, T3, T3m, T3k, Tb, Td, Te, T6, T8, T9, T2, T3l, T3j; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 32)]), ms, &(x[0])); + T3 = BYTWJ(&(W[TWVL * 62]), T2); + T3l = LD(&(x[WS(rs, 48)]), ms, &(x[0])); + T3m = BYTWJ(&(W[TWVL * 94]), T3l); + T3j = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T3k = BYTWJ(&(W[TWVL * 30]), T3j); + { + V Ta, Tc, T5, T7; + Ta = LD(&(x[WS(rs, 56)]), ms, &(x[0])); + Tb = BYTWJ(&(W[TWVL * 110]), Ta); + Tc = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + Td = BYTWJ(&(W[TWVL * 46]), Tc); + Te = VSUB(Tb, Td); + T5 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T6 = BYTWJ(&(W[TWVL * 14]), T5); + T7 = LD(&(x[WS(rs, 40)]), ms, &(x[0])); + T8 = BYTWJ(&(W[TWVL * 78]), T7); + T9 = VSUB(T6, T8); + } + { + V T4, Tf, T6p, T6q; + T4 = VSUB(T1, T3); + Tf = VMUL(LDK(KP707106781), VADD(T9, Te)); + Tg = VADD(T4, Tf); + T4a = VSUB(T4, Tf); + T6p = VADD(Tb, Td); + T6q = VADD(T6, T8); + T6r = VSUB(T6p, T6q); + T7f = VADD(T6q, T6p); + } + { + V T3i, T3n, T5o, T5p; + T3i = VMUL(LDK(KP707106781), VSUB(Te, T9)); + T3n = VSUB(T3k, T3m); + T3o = VSUB(T3i, T3n); + T4B = VADD(T3n, T3i); + T5o = VADD(T1, T3); + T5p = VADD(T3k, T3m); + T5q = VSUB(T5o, T5p); + T7e = VADD(T5o, T5p); + } + } + { + V T24, T26, T5Q, T2b, T2d, T5P, T1W, T60, T21, T61, T22, T27; + { + V T23, T25, T2a, T2c; + T23 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T24 = BYTWJ(&(W[TWVL * 32]), T23); + T25 = LD(&(x[WS(rs, 49)]), ms, &(x[WS(rs, 1)])); + T26 = BYTWJ(&(W[TWVL * 96]), T25); + T5Q = VADD(T24, T26); + T2a = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2b = BYTWJ(&(W[0]), T2a); + T2c = LD(&(x[WS(rs, 33)]), ms, &(x[WS(rs, 1)])); + T2d = BYTWJ(&(W[TWVL * 64]), T2c); + T5P = VADD(T2b, T2d); + } + { + V T1T, T1V, T1S, T1U; + T1S = LD(&(x[WS(rs, 57)]), ms, &(x[WS(rs, 1)])); + T1T = BYTWJ(&(W[TWVL * 112]), T1S); + T1U = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + T1V = BYTWJ(&(W[TWVL * 48]), T1U); + T1W = VSUB(T1T, T1V); + T60 = VADD(T1T, T1V); + } + { + V T1Y, T20, T1X, T1Z; + T1X = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T1Y = BYTWJ(&(W[TWVL * 16]), T1X); + T1Z = LD(&(x[WS(rs, 41)]), ms, &(x[WS(rs, 1)])); + T20 = BYTWJ(&(W[TWVL * 80]), T1Z); + T21 = VSUB(T1Y, T20); + T61 = VADD(T1Y, T20); + } + T5R = VSUB(T5P, T5Q); + T62 = VSUB(T60, T61); + T22 = VMUL(LDK(KP707106781), VSUB(T1W, T21)); + T27 = VSUB(T24, T26); + T28 = VSUB(T22, T27); + T4o = VADD(T27, T22); + { + V T2e, T2f, T7l, T7m; + T2e = VSUB(T2b, T2d); + T2f = VMUL(LDK(KP707106781), VADD(T21, T1W)); + T2g = VADD(T2e, T2f); + T4l = VSUB(T2e, T2f); + T7l = VADD(T5P, T5Q); + T7m = VADD(T61, T60); + T7n = VADD(T7l, T7m); + T7Z = VSUB(T7l, T7m); + } + } + { + V T2n, T2p, T66, T36, T38, T67, T2v, T6i, T2A, T6h, T2q, T2B; + { + V T2m, T2o, T35, T37; + T2m = LD(&(x[WS(rs, 63)]), ms, &(x[WS(rs, 1)])); + T2n = BYTWJ(&(W[TWVL * 124]), T2m); + T2o = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T2p = BYTWJ(&(W[TWVL * 60]), T2o); + T66 = VADD(T2n, T2p); + T35 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T36 = BYTWJ(&(W[TWVL * 28]), T35); + T37 = LD(&(x[WS(rs, 47)]), ms, &(x[WS(rs, 1)])); + T38 = BYTWJ(&(W[TWVL * 92]), T37); + T67 = VADD(T36, T38); + } + { + V T2s, T2u, T2r, T2t; + T2r = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T2s = BYTWJ(&(W[TWVL * 12]), T2r); + T2t = LD(&(x[WS(rs, 39)]), ms, &(x[WS(rs, 1)])); + T2u = BYTWJ(&(W[TWVL * 76]), T2t); + T2v = VSUB(T2s, T2u); + T6i = VADD(T2s, T2u); + } + { + V T2x, T2z, T2w, T2y; + T2w = LD(&(x[WS(rs, 55)]), ms, &(x[WS(rs, 1)])); + T2x = BYTWJ(&(W[TWVL * 108]), T2w); + T2y = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T2z = BYTWJ(&(W[TWVL * 44]), T2y); + T2A = VSUB(T2x, T2z); + T6h = VADD(T2x, T2z); + } + T68 = VSUB(T66, T67); + T6j = VSUB(T6h, T6i); + T2q = VSUB(T2n, T2p); + T2B = VMUL(LDK(KP707106781), VADD(T2v, T2A)); + T2C = VADD(T2q, T2B); + T4s = VSUB(T2q, T2B); + { + V T34, T39, T7s, T7t; + T34 = VMUL(LDK(KP707106781), VSUB(T2A, T2v)); + T39 = VSUB(T36, T38); + T3a = VSUB(T34, T39); + T4v = VADD(T39, T34); + T7s = VADD(T66, T67); + T7t = VADD(T6i, T6h); + T7u = VADD(T7s, T7t); + T82 = VSUB(T7s, T7t); + } + } + { + V T1g, T1i, T5A, T1m, T1o, T5z, T18, T5C, T1d, T5D, T5B, T5E; + { + V T1f, T1h, T1l, T1n; + T1f = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + T1g = BYTWJ(&(W[TWVL * 34]), T1f); + T1h = LD(&(x[WS(rs, 50)]), ms, &(x[0])); + T1i = BYTWJ(&(W[TWVL * 98]), T1h); + T5A = VADD(T1g, T1i); + T1l = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T1m = BYTWJ(&(W[TWVL * 2]), T1l); + T1n = LD(&(x[WS(rs, 34)]), ms, &(x[0])); + T1o = BYTWJ(&(W[TWVL * 66]), T1n); + T5z = VADD(T1m, T1o); + } + { + V T15, T17, T14, T16; + T14 = LD(&(x[WS(rs, 58)]), ms, &(x[0])); + T15 = BYTWJ(&(W[TWVL * 114]), T14); + T16 = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + T17 = BYTWJ(&(W[TWVL * 50]), T16); + T18 = VSUB(T15, T17); + T5C = VADD(T15, T17); + } + { + V T1a, T1c, T19, T1b; + T19 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T1a = BYTWJ(&(W[TWVL * 18]), T19); + T1b = LD(&(x[WS(rs, 42)]), ms, &(x[0])); + T1c = BYTWJ(&(W[TWVL * 82]), T1b); + T1d = VSUB(T1a, T1c); + T5D = VADD(T1a, T1c); + } + T7E = VADD(T5z, T5A); + T7F = VADD(T5D, T5C); + T7V = VSUB(T7E, T7F); + T5B = VSUB(T5z, T5A); + T5E = VSUB(T5C, T5D); + T5F = VFMA(LDK(KP923879532), T5B, VMUL(LDK(KP382683432), T5E)); + T6u = VFNMS(LDK(KP382683432), T5B, VMUL(LDK(KP923879532), T5E)); + { + V T1e, T1j, T1p, T1q; + T1e = VMUL(LDK(KP707106781), VSUB(T18, T1d)); + T1j = VSUB(T1g, T1i); + T1k = VSUB(T1e, T1j); + T4e = VADD(T1j, T1e); + T1p = VSUB(T1m, T1o); + T1q = VMUL(LDK(KP707106781), VADD(T1d, T18)); + T1r = VADD(T1p, T1q); + T4d = VSUB(T1p, T1q); + } + } + { + V TG, TI, T5G, TY, T10, T5H, TO, T5K, TT, T5J, T5I, T5L; + { + V TF, TH, TX, TZ; + TF = LD(&(x[WS(rs, 62)]), ms, &(x[0])); + TG = BYTWJ(&(W[TWVL * 122]), TF); + TH = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + TI = BYTWJ(&(W[TWVL * 58]), TH); + T5G = VADD(TG, TI); + TX = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TY = BYTWJ(&(W[TWVL * 26]), TX); + TZ = LD(&(x[WS(rs, 46)]), ms, &(x[0])); + T10 = BYTWJ(&(W[TWVL * 90]), TZ); + T5H = VADD(TY, T10); + } + { + V TL, TN, TK, TM; + TK = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + TL = BYTWJ(&(W[TWVL * 10]), TK); + TM = LD(&(x[WS(rs, 38)]), ms, &(x[0])); + TN = BYTWJ(&(W[TWVL * 74]), TM); + TO = VSUB(TL, TN); + T5K = VADD(TL, TN); + } + { + V TQ, TS, TP, TR; + TP = LD(&(x[WS(rs, 54)]), ms, &(x[0])); + TQ = BYTWJ(&(W[TWVL * 106]), TP); + TR = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + TS = BYTWJ(&(W[TWVL * 42]), TR); + TT = VSUB(TQ, TS); + T5J = VADD(TQ, TS); + } + T7B = VADD(T5G, T5H); + T7C = VADD(T5K, T5J); + T7W = VSUB(T7B, T7C); + T5I = VSUB(T5G, T5H); + T5L = VSUB(T5J, T5K); + T5M = VFNMS(LDK(KP382683432), T5L, VMUL(LDK(KP923879532), T5I)); + T6v = VFMA(LDK(KP382683432), T5I, VMUL(LDK(KP923879532), T5L)); + { + V TJ, TU, TW, T11; + TJ = VSUB(TG, TI); + TU = VMUL(LDK(KP707106781), VADD(TO, TT)); + TV = VADD(TJ, TU); + T4g = VSUB(TJ, TU); + TW = VMUL(LDK(KP707106781), VSUB(TT, TO)); + T11 = VSUB(TY, T10); + T12 = VSUB(TW, T11); + T4h = VADD(T11, TW); + } + } + { + V Tl, T5r, TB, T5v, Tq, T5s, Tw, T5u, Tr, TC; + { + V Ti, Tk, Th, Tj; + Th = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Ti = BYTWJ(&(W[TWVL * 6]), Th); + Tj = LD(&(x[WS(rs, 36)]), ms, &(x[0])); + Tk = BYTWJ(&(W[TWVL * 70]), Tj); + Tl = VSUB(Ti, Tk); + T5r = VADD(Ti, Tk); + } + { + V Ty, TA, Tx, Tz; + Tx = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Ty = BYTWJ(&(W[TWVL * 22]), Tx); + Tz = LD(&(x[WS(rs, 44)]), ms, &(x[0])); + TA = BYTWJ(&(W[TWVL * 86]), Tz); + TB = VSUB(Ty, TA); + T5v = VADD(Ty, TA); + } + { + V Tn, Tp, Tm, To; + Tm = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + Tn = BYTWJ(&(W[TWVL * 38]), Tm); + To = LD(&(x[WS(rs, 52)]), ms, &(x[0])); + Tp = BYTWJ(&(W[TWVL * 102]), To); + Tq = VSUB(Tn, Tp); + T5s = VADD(Tn, Tp); + } + { + V Tt, Tv, Ts, Tu; + Ts = LD(&(x[WS(rs, 60)]), ms, &(x[0])); + Tt = BYTWJ(&(W[TWVL * 118]), Ts); + Tu = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + Tv = BYTWJ(&(W[TWVL * 54]), Tu); + Tw = VSUB(Tt, Tv); + T5u = VADD(Tt, Tv); + } + T7h = VADD(T5r, T5s); + T7i = VADD(T5u, T5v); + Tr = VFNMS(LDK(KP382683432), Tq, VMUL(LDK(KP923879532), Tl)); + TC = VFMA(LDK(KP923879532), Tw, VMUL(LDK(KP382683432), TB)); + TD = VADD(Tr, TC); + T4C = VSUB(TC, Tr); + { + V T3f, T3g, T5t, T5w; + T3f = VFNMS(LDK(KP923879532), TB, VMUL(LDK(KP382683432), Tw)); + T3g = VFMA(LDK(KP382683432), Tl, VMUL(LDK(KP923879532), Tq)); + T3h = VSUB(T3f, T3g); + T4b = VADD(T3g, T3f); + T5t = VSUB(T5r, T5s); + T5w = VSUB(T5u, T5v); + T5x = VMUL(LDK(KP707106781), VADD(T5t, T5w)); + T6s = VMUL(LDK(KP707106781), VSUB(T5w, T5t)); + } + } + { + V T1z, T5V, T1P, T5T, T1E, T5W, T1K, T5S; + { + V T1w, T1y, T1v, T1x; + T1v = LD(&(x[WS(rs, 61)]), ms, &(x[WS(rs, 1)])); + T1w = BYTWJ(&(W[TWVL * 120]), T1v); + T1x = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + T1y = BYTWJ(&(W[TWVL * 56]), T1x); + T1z = VSUB(T1w, T1y); + T5V = VADD(T1w, T1y); + } + { + V T1M, T1O, T1L, T1N; + T1L = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + T1M = BYTWJ(&(W[TWVL * 40]), T1L); + T1N = LD(&(x[WS(rs, 53)]), ms, &(x[WS(rs, 1)])); + T1O = BYTWJ(&(W[TWVL * 104]), T1N); + T1P = VSUB(T1M, T1O); + T5T = VADD(T1M, T1O); + } + { + V T1B, T1D, T1A, T1C; + T1A = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T1B = BYTWJ(&(W[TWVL * 24]), T1A); + T1C = LD(&(x[WS(rs, 45)]), ms, &(x[WS(rs, 1)])); + T1D = BYTWJ(&(W[TWVL * 88]), T1C); + T1E = VSUB(T1B, T1D); + T5W = VADD(T1B, T1D); + } + { + V T1H, T1J, T1G, T1I; + T1G = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T1H = BYTWJ(&(W[TWVL * 8]), T1G); + T1I = LD(&(x[WS(rs, 37)]), ms, &(x[WS(rs, 1)])); + T1J = BYTWJ(&(W[TWVL * 72]), T1I); + T1K = VSUB(T1H, T1J); + T5S = VADD(T1H, T1J); + } + { + V T1F, T1Q, T7o, T7p; + T1F = VFNMS(LDK(KP923879532), T1E, VMUL(LDK(KP382683432), T1z)); + T1Q = VFMA(LDK(KP382683432), T1K, VMUL(LDK(KP923879532), T1P)); + T1R = VSUB(T1F, T1Q); + T4m = VADD(T1Q, T1F); + T7o = VADD(T5S, T5T); + T7p = VADD(T5V, T5W); + T7q = VADD(T7o, T7p); + T80 = VSUB(T7p, T7o); + } + { + V T2h, T2i, T5U, T5X; + T2h = VFNMS(LDK(KP382683432), T1P, VMUL(LDK(KP923879532), T1K)); + T2i = VFMA(LDK(KP923879532), T1z, VMUL(LDK(KP382683432), T1E)); + T2j = VADD(T2h, T2i); + T4p = VSUB(T2i, T2h); + T5U = VSUB(T5S, T5T); + T5X = VSUB(T5V, T5W); + T5Y = VMUL(LDK(KP707106781), VADD(T5U, T5X)); + T63 = VMUL(LDK(KP707106781), VSUB(T5X, T5U)); + } + } + { + V T2H, T69, T2X, T6d, T2M, T6a, T2S, T6c; + { + V T2E, T2G, T2D, T2F; + T2D = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T2E = BYTWJ(&(W[TWVL * 4]), T2D); + T2F = LD(&(x[WS(rs, 35)]), ms, &(x[WS(rs, 1)])); + T2G = BYTWJ(&(W[TWVL * 68]), T2F); + T2H = VSUB(T2E, T2G); + T69 = VADD(T2E, T2G); + } + { + V T2U, T2W, T2T, T2V; + T2T = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T2U = BYTWJ(&(W[TWVL * 20]), T2T); + T2V = LD(&(x[WS(rs, 43)]), ms, &(x[WS(rs, 1)])); + T2W = BYTWJ(&(W[TWVL * 84]), T2V); + T2X = VSUB(T2U, T2W); + T6d = VADD(T2U, T2W); + } + { + V T2J, T2L, T2I, T2K; + T2I = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T2J = BYTWJ(&(W[TWVL * 36]), T2I); + T2K = LD(&(x[WS(rs, 51)]), ms, &(x[WS(rs, 1)])); + T2L = BYTWJ(&(W[TWVL * 100]), T2K); + T2M = VSUB(T2J, T2L); + T6a = VADD(T2J, T2L); + } + { + V T2P, T2R, T2O, T2Q; + T2O = LD(&(x[WS(rs, 59)]), ms, &(x[WS(rs, 1)])); + T2P = BYTWJ(&(W[TWVL * 116]), T2O); + T2Q = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + T2R = BYTWJ(&(W[TWVL * 52]), T2Q); + T2S = VSUB(T2P, T2R); + T6c = VADD(T2P, T2R); + } + { + V T2N, T2Y, T7v, T7w; + T2N = VFNMS(LDK(KP382683432), T2M, VMUL(LDK(KP923879532), T2H)); + T2Y = VFMA(LDK(KP923879532), T2S, VMUL(LDK(KP382683432), T2X)); + T2Z = VADD(T2N, T2Y); + T4w = VSUB(T2Y, T2N); + T7v = VADD(T69, T6a); + T7w = VADD(T6c, T6d); + T7x = VADD(T7v, T7w); + T83 = VSUB(T7w, T7v); + } + { + V T31, T32, T6b, T6e; + T31 = VFNMS(LDK(KP923879532), T2X, VMUL(LDK(KP382683432), T2S)); + T32 = VFMA(LDK(KP382683432), T2H, VMUL(LDK(KP923879532), T2M)); + T33 = VSUB(T31, T32); + T4t = VADD(T32, T31); + T6b = VSUB(T69, T6a); + T6e = VSUB(T6c, T6d); + T6f = VMUL(LDK(KP707106781), VADD(T6b, T6e)); + T6k = VMUL(LDK(KP707106781), VSUB(T6e, T6b)); + } + } + { + V T7k, T7M, T7R, T7T, T7z, T7I, T7H, T7N, T7O, T7S; + { + V T7g, T7j, T7P, T7Q; + T7g = VADD(T7e, T7f); + T7j = VADD(T7h, T7i); + T7k = VSUB(T7g, T7j); + T7M = VADD(T7g, T7j); + T7P = VADD(T7n, T7q); + T7Q = VADD(T7u, T7x); + T7R = VADD(T7P, T7Q); + T7T = VBYI(VSUB(T7Q, T7P)); + } + { + V T7r, T7y, T7D, T7G; + T7r = VSUB(T7n, T7q); + T7y = VSUB(T7u, T7x); + T7z = VMUL(LDK(KP707106781), VADD(T7r, T7y)); + T7I = VMUL(LDK(KP707106781), VSUB(T7y, T7r)); + T7D = VADD(T7B, T7C); + T7G = VADD(T7E, T7F); + T7H = VSUB(T7D, T7G); + T7N = VADD(T7G, T7D); + } + T7O = VADD(T7M, T7N); + ST(&(x[WS(rs, 32)]), VSUB(T7O, T7R), ms, &(x[0])); + ST(&(x[0]), VADD(T7O, T7R), ms, &(x[0])); + T7S = VSUB(T7M, T7N); + ST(&(x[WS(rs, 48)]), VSUB(T7S, T7T), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VADD(T7S, T7T), ms, &(x[0])); + { + V T7A, T7J, T7K, T7L; + T7A = VADD(T7k, T7z); + T7J = VBYI(VADD(T7H, T7I)); + ST(&(x[WS(rs, 56)]), VSUB(T7A, T7J), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VADD(T7A, T7J), ms, &(x[0])); + T7K = VSUB(T7k, T7z); + T7L = VBYI(VSUB(T7I, T7H)); + ST(&(x[WS(rs, 40)]), VSUB(T7K, T7L), ms, &(x[0])); + ST(&(x[WS(rs, 24)]), VADD(T7K, T7L), ms, &(x[0])); + } + } + { + V T7Y, T8j, T8c, T8k, T85, T8g, T89, T8h; + { + V T7U, T7X, T8a, T8b; + T7U = VSUB(T7e, T7f); + T7X = VMUL(LDK(KP707106781), VADD(T7V, T7W)); + T7Y = VADD(T7U, T7X); + T8j = VSUB(T7U, T7X); + T8a = VFNMS(LDK(KP382683432), T7Z, VMUL(LDK(KP923879532), T80)); + T8b = VFMA(LDK(KP382683432), T82, VMUL(LDK(KP923879532), T83)); + T8c = VADD(T8a, T8b); + T8k = VSUB(T8b, T8a); + } + { + V T81, T84, T87, T88; + T81 = VFMA(LDK(KP923879532), T7Z, VMUL(LDK(KP382683432), T80)); + T84 = VFNMS(LDK(KP382683432), T83, VMUL(LDK(KP923879532), T82)); + T85 = VADD(T81, T84); + T8g = VSUB(T84, T81); + T87 = VSUB(T7i, T7h); + T88 = VMUL(LDK(KP707106781), VSUB(T7W, T7V)); + T89 = VADD(T87, T88); + T8h = VSUB(T88, T87); + } + { + V T86, T8d, T8m, T8n; + T86 = VADD(T7Y, T85); + T8d = VBYI(VADD(T89, T8c)); + ST(&(x[WS(rs, 60)]), VSUB(T86, T8d), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(T86, T8d), ms, &(x[0])); + T8m = VBYI(VADD(T8h, T8g)); + T8n = VADD(T8j, T8k); + ST(&(x[WS(rs, 12)]), VADD(T8m, T8n), ms, &(x[0])); + ST(&(x[WS(rs, 52)]), VSUB(T8n, T8m), ms, &(x[0])); + } + { + V T8e, T8f, T8i, T8l; + T8e = VSUB(T7Y, T85); + T8f = VBYI(VSUB(T8c, T89)); + ST(&(x[WS(rs, 36)]), VSUB(T8e, T8f), ms, &(x[0])); + ST(&(x[WS(rs, 28)]), VADD(T8e, T8f), ms, &(x[0])); + T8i = VBYI(VSUB(T8g, T8h)); + T8l = VSUB(T8j, T8k); + ST(&(x[WS(rs, 20)]), VADD(T8i, T8l), ms, &(x[0])); + ST(&(x[WS(rs, 44)]), VSUB(T8l, T8i), ms, &(x[0])); + } + } + { + V T5O, T6H, T6x, T6F, T6n, T6I, T6A, T6E; + { + V T5y, T5N, T6t, T6w; + T5y = VADD(T5q, T5x); + T5N = VADD(T5F, T5M); + T5O = VADD(T5y, T5N); + T6H = VSUB(T5y, T5N); + T6t = VADD(T6r, T6s); + T6w = VADD(T6u, T6v); + T6x = VADD(T6t, T6w); + T6F = VSUB(T6w, T6t); + { + V T65, T6y, T6m, T6z; + { + V T5Z, T64, T6g, T6l; + T5Z = VADD(T5R, T5Y); + T64 = VADD(T62, T63); + T65 = VFMA(LDK(KP980785280), T5Z, VMUL(LDK(KP195090322), T64)); + T6y = VFNMS(LDK(KP195090322), T5Z, VMUL(LDK(KP980785280), T64)); + T6g = VADD(T68, T6f); + T6l = VADD(T6j, T6k); + T6m = VFNMS(LDK(KP195090322), T6l, VMUL(LDK(KP980785280), T6g)); + T6z = VFMA(LDK(KP195090322), T6g, VMUL(LDK(KP980785280), T6l)); + } + T6n = VADD(T65, T6m); + T6I = VSUB(T6z, T6y); + T6A = VADD(T6y, T6z); + T6E = VSUB(T6m, T65); + } + } + { + V T6o, T6B, T6K, T6L; + T6o = VADD(T5O, T6n); + T6B = VBYI(VADD(T6x, T6A)); + ST(&(x[WS(rs, 62)]), VSUB(T6o, T6B), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T6o, T6B), ms, &(x[0])); + T6K = VBYI(VADD(T6F, T6E)); + T6L = VADD(T6H, T6I); + ST(&(x[WS(rs, 14)]), VADD(T6K, T6L), ms, &(x[0])); + ST(&(x[WS(rs, 50)]), VSUB(T6L, T6K), ms, &(x[0])); + } + { + V T6C, T6D, T6G, T6J; + T6C = VSUB(T5O, T6n); + T6D = VBYI(VSUB(T6A, T6x)); + ST(&(x[WS(rs, 34)]), VSUB(T6C, T6D), ms, &(x[0])); + ST(&(x[WS(rs, 30)]), VADD(T6C, T6D), ms, &(x[0])); + T6G = VBYI(VSUB(T6E, T6F)); + T6J = VSUB(T6H, T6I); + ST(&(x[WS(rs, 18)]), VADD(T6G, T6J), ms, &(x[0])); + ST(&(x[WS(rs, 46)]), VSUB(T6J, T6G), ms, &(x[0])); + } + } + { + V T6O, T79, T6Z, T77, T6V, T7a, T72, T76; + { + V T6M, T6N, T6X, T6Y; + T6M = VSUB(T5q, T5x); + T6N = VSUB(T6v, T6u); + T6O = VADD(T6M, T6N); + T79 = VSUB(T6M, T6N); + T6X = VSUB(T6s, T6r); + T6Y = VSUB(T5M, T5F); + T6Z = VADD(T6X, T6Y); + T77 = VSUB(T6Y, T6X); + { + V T6R, T70, T6U, T71; + { + V T6P, T6Q, T6S, T6T; + T6P = VSUB(T5R, T5Y); + T6Q = VSUB(T63, T62); + T6R = VFMA(LDK(KP831469612), T6P, VMUL(LDK(KP555570233), T6Q)); + T70 = VFNMS(LDK(KP555570233), T6P, VMUL(LDK(KP831469612), T6Q)); + T6S = VSUB(T68, T6f); + T6T = VSUB(T6k, T6j); + T6U = VFNMS(LDK(KP555570233), T6T, VMUL(LDK(KP831469612), T6S)); + T71 = VFMA(LDK(KP555570233), T6S, VMUL(LDK(KP831469612), T6T)); + } + T6V = VADD(T6R, T6U); + T7a = VSUB(T71, T70); + T72 = VADD(T70, T71); + T76 = VSUB(T6U, T6R); + } + } + { + V T6W, T73, T7c, T7d; + T6W = VADD(T6O, T6V); + T73 = VBYI(VADD(T6Z, T72)); + ST(&(x[WS(rs, 58)]), VSUB(T6W, T73), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VADD(T6W, T73), ms, &(x[0])); + T7c = VBYI(VADD(T77, T76)); + T7d = VADD(T79, T7a); + ST(&(x[WS(rs, 10)]), VADD(T7c, T7d), ms, &(x[0])); + ST(&(x[WS(rs, 54)]), VSUB(T7d, T7c), ms, &(x[0])); + } + { + V T74, T75, T78, T7b; + T74 = VSUB(T6O, T6V); + T75 = VBYI(VSUB(T72, T6Z)); + ST(&(x[WS(rs, 38)]), VSUB(T74, T75), ms, &(x[0])); + ST(&(x[WS(rs, 26)]), VADD(T74, T75), ms, &(x[0])); + T78 = VBYI(VSUB(T76, T77)); + T7b = VSUB(T79, T7a); + ST(&(x[WS(rs, 22)]), VADD(T78, T7b), ms, &(x[0])); + ST(&(x[WS(rs, 42)]), VSUB(T7b, T78), ms, &(x[0])); + } + } + { + V T4k, T5h, T4R, T59, T4H, T5j, T4P, T4Y, T4z, T4S, T4K, T4O, T55, T5k, T5c; + V T5g; + { + V T4c, T57, T4j, T58, T4f, T4i; + T4c = VADD(T4a, T4b); + T57 = VSUB(T4C, T4B); + T4f = VFMA(LDK(KP831469612), T4d, VMUL(LDK(KP555570233), T4e)); + T4i = VFNMS(LDK(KP555570233), T4h, VMUL(LDK(KP831469612), T4g)); + T4j = VADD(T4f, T4i); + T58 = VSUB(T4i, T4f); + T4k = VADD(T4c, T4j); + T5h = VSUB(T58, T57); + T4R = VSUB(T4c, T4j); + T59 = VADD(T57, T58); + } + { + V T4D, T4W, T4G, T4X, T4E, T4F; + T4D = VADD(T4B, T4C); + T4W = VSUB(T4a, T4b); + T4E = VFNMS(LDK(KP555570233), T4d, VMUL(LDK(KP831469612), T4e)); + T4F = VFMA(LDK(KP555570233), T4g, VMUL(LDK(KP831469612), T4h)); + T4G = VADD(T4E, T4F); + T4X = VSUB(T4F, T4E); + T4H = VADD(T4D, T4G); + T5j = VSUB(T4W, T4X); + T4P = VSUB(T4G, T4D); + T4Y = VADD(T4W, T4X); + } + { + V T4r, T4I, T4y, T4J; + { + V T4n, T4q, T4u, T4x; + T4n = VADD(T4l, T4m); + T4q = VADD(T4o, T4p); + T4r = VFMA(LDK(KP956940335), T4n, VMUL(LDK(KP290284677), T4q)); + T4I = VFNMS(LDK(KP290284677), T4n, VMUL(LDK(KP956940335), T4q)); + T4u = VADD(T4s, T4t); + T4x = VADD(T4v, T4w); + T4y = VFNMS(LDK(KP290284677), T4x, VMUL(LDK(KP956940335), T4u)); + T4J = VFMA(LDK(KP290284677), T4u, VMUL(LDK(KP956940335), T4x)); + } + T4z = VADD(T4r, T4y); + T4S = VSUB(T4J, T4I); + T4K = VADD(T4I, T4J); + T4O = VSUB(T4y, T4r); + } + { + V T51, T5a, T54, T5b; + { + V T4Z, T50, T52, T53; + T4Z = VSUB(T4l, T4m); + T50 = VSUB(T4p, T4o); + T51 = VFMA(LDK(KP881921264), T4Z, VMUL(LDK(KP471396736), T50)); + T5a = VFNMS(LDK(KP471396736), T4Z, VMUL(LDK(KP881921264), T50)); + T52 = VSUB(T4s, T4t); + T53 = VSUB(T4w, T4v); + T54 = VFNMS(LDK(KP471396736), T53, VMUL(LDK(KP881921264), T52)); + T5b = VFMA(LDK(KP471396736), T52, VMUL(LDK(KP881921264), T53)); + } + T55 = VADD(T51, T54); + T5k = VSUB(T5b, T5a); + T5c = VADD(T5a, T5b); + T5g = VSUB(T54, T51); + } + { + V T4A, T4L, T5i, T5l; + T4A = VADD(T4k, T4z); + T4L = VBYI(VADD(T4H, T4K)); + ST(&(x[WS(rs, 61)]), VSUB(T4A, T4L), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(T4A, T4L), ms, &(x[WS(rs, 1)])); + T5i = VBYI(VSUB(T5g, T5h)); + T5l = VSUB(T5j, T5k); + ST(&(x[WS(rs, 21)]), VADD(T5i, T5l), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 43)]), VSUB(T5l, T5i), ms, &(x[WS(rs, 1)])); + } + { + V T5m, T5n, T4M, T4N; + T5m = VBYI(VADD(T5h, T5g)); + T5n = VADD(T5j, T5k); + ST(&(x[WS(rs, 11)]), VADD(T5m, T5n), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 53)]), VSUB(T5n, T5m), ms, &(x[WS(rs, 1)])); + T4M = VSUB(T4k, T4z); + T4N = VBYI(VSUB(T4K, T4H)); + ST(&(x[WS(rs, 35)]), VSUB(T4M, T4N), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 29)]), VADD(T4M, T4N), ms, &(x[WS(rs, 1)])); + } + { + V T4Q, T4T, T56, T5d; + T4Q = VBYI(VSUB(T4O, T4P)); + T4T = VSUB(T4R, T4S); + ST(&(x[WS(rs, 19)]), VADD(T4Q, T4T), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 45)]), VSUB(T4T, T4Q), ms, &(x[WS(rs, 1)])); + T56 = VADD(T4Y, T55); + T5d = VBYI(VADD(T59, T5c)); + ST(&(x[WS(rs, 59)]), VSUB(T56, T5d), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VADD(T56, T5d), ms, &(x[WS(rs, 1)])); + } + { + V T5e, T5f, T4U, T4V; + T5e = VSUB(T4Y, T55); + T5f = VBYI(VSUB(T5c, T59)); + ST(&(x[WS(rs, 37)]), VSUB(T5e, T5f), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 27)]), VADD(T5e, T5f), ms, &(x[WS(rs, 1)])); + T4U = VBYI(VADD(T4P, T4O)); + T4V = VADD(T4R, T4S); + ST(&(x[WS(rs, 13)]), VADD(T4U, T4V), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 51)]), VSUB(T4V, T4U), ms, &(x[WS(rs, 1)])); + } + } + { + V T1u, T43, T3D, T3V, T3t, T45, T3B, T3K, T3d, T3E, T3w, T3A, T3R, T46, T3Y; + V T42; + { + V TE, T3T, T1t, T3U, T13, T1s; + TE = VSUB(Tg, TD); + T3T = VADD(T3o, T3h); + T13 = VFMA(LDK(KP195090322), TV, VMUL(LDK(KP980785280), T12)); + T1s = VFNMS(LDK(KP195090322), T1r, VMUL(LDK(KP980785280), T1k)); + T1t = VSUB(T13, T1s); + T3U = VADD(T1s, T13); + T1u = VADD(TE, T1t); + T43 = VSUB(T3U, T3T); + T3D = VSUB(TE, T1t); + T3V = VADD(T3T, T3U); + } + { + V T3p, T3I, T3s, T3J, T3q, T3r; + T3p = VSUB(T3h, T3o); + T3I = VADD(Tg, TD); + T3q = VFNMS(LDK(KP195090322), T12, VMUL(LDK(KP980785280), TV)); + T3r = VFMA(LDK(KP980785280), T1r, VMUL(LDK(KP195090322), T1k)); + T3s = VSUB(T3q, T3r); + T3J = VADD(T3r, T3q); + T3t = VADD(T3p, T3s); + T45 = VSUB(T3I, T3J); + T3B = VSUB(T3s, T3p); + T3K = VADD(T3I, T3J); + } + { + V T2l, T3u, T3c, T3v; + { + V T29, T2k, T30, T3b; + T29 = VSUB(T1R, T28); + T2k = VSUB(T2g, T2j); + T2l = VFMA(LDK(KP634393284), T29, VMUL(LDK(KP773010453), T2k)); + T3u = VFNMS(LDK(KP634393284), T2k, VMUL(LDK(KP773010453), T29)); + T30 = VSUB(T2C, T2Z); + T3b = VSUB(T33, T3a); + T3c = VFNMS(LDK(KP634393284), T3b, VMUL(LDK(KP773010453), T30)); + T3v = VFMA(LDK(KP773010453), T3b, VMUL(LDK(KP634393284), T30)); + } + T3d = VADD(T2l, T3c); + T3E = VSUB(T3v, T3u); + T3w = VADD(T3u, T3v); + T3A = VSUB(T3c, T2l); + } + { + V T3N, T3W, T3Q, T3X; + { + V T3L, T3M, T3O, T3P; + T3L = VADD(T28, T1R); + T3M = VADD(T2g, T2j); + T3N = VFMA(LDK(KP098017140), T3L, VMUL(LDK(KP995184726), T3M)); + T3W = VFNMS(LDK(KP098017140), T3M, VMUL(LDK(KP995184726), T3L)); + T3O = VADD(T2C, T2Z); + T3P = VADD(T3a, T33); + T3Q = VFNMS(LDK(KP098017140), T3P, VMUL(LDK(KP995184726), T3O)); + T3X = VFMA(LDK(KP995184726), T3P, VMUL(LDK(KP098017140), T3O)); + } + T3R = VADD(T3N, T3Q); + T46 = VSUB(T3X, T3W); + T3Y = VADD(T3W, T3X); + T42 = VSUB(T3Q, T3N); + } + { + V T3e, T3x, T44, T47; + T3e = VADD(T1u, T3d); + T3x = VBYI(VADD(T3t, T3w)); + ST(&(x[WS(rs, 57)]), VSUB(T3e, T3x), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(T3e, T3x), ms, &(x[WS(rs, 1)])); + T44 = VBYI(VSUB(T42, T43)); + T47 = VSUB(T45, T46); + ST(&(x[WS(rs, 17)]), VADD(T44, T47), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 47)]), VSUB(T47, T44), ms, &(x[WS(rs, 1)])); + } + { + V T48, T49, T3y, T3z; + T48 = VBYI(VADD(T43, T42)); + T49 = VADD(T45, T46); + ST(&(x[WS(rs, 15)]), VADD(T48, T49), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 49)]), VSUB(T49, T48), ms, &(x[WS(rs, 1)])); + T3y = VSUB(T1u, T3d); + T3z = VBYI(VSUB(T3w, T3t)); + ST(&(x[WS(rs, 39)]), VSUB(T3y, T3z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 25)]), VADD(T3y, T3z), ms, &(x[WS(rs, 1)])); + } + { + V T3C, T3F, T3S, T3Z; + T3C = VBYI(VSUB(T3A, T3B)); + T3F = VSUB(T3D, T3E); + ST(&(x[WS(rs, 23)]), VADD(T3C, T3F), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 41)]), VSUB(T3F, T3C), ms, &(x[WS(rs, 1)])); + T3S = VADD(T3K, T3R); + T3Z = VBYI(VADD(T3V, T3Y)); + ST(&(x[WS(rs, 63)]), VSUB(T3S, T3Z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T3S, T3Z), ms, &(x[WS(rs, 1)])); + } + { + V T40, T41, T3G, T3H; + T40 = VSUB(T3K, T3R); + T41 = VBYI(VSUB(T3Y, T3V)); + ST(&(x[WS(rs, 33)]), VSUB(T40, T41), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 31)]), VADD(T40, T41), ms, &(x[WS(rs, 1)])); + T3G = VBYI(VADD(T3B, T3A)); + T3H = VADD(T3D, T3E); + ST(&(x[WS(rs, 9)]), VADD(T3G, T3H), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 55)]), VSUB(T3H, T3G), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + VTW(0, 32), + VTW(0, 33), + VTW(0, 34), + VTW(0, 35), + VTW(0, 36), + VTW(0, 37), + VTW(0, 38), + VTW(0, 39), + VTW(0, 40), + VTW(0, 41), + VTW(0, 42), + VTW(0, 43), + VTW(0, 44), + VTW(0, 45), + VTW(0, 46), + VTW(0, 47), + VTW(0, 48), + VTW(0, 49), + VTW(0, 50), + VTW(0, 51), + VTW(0, 52), + VTW(0, 53), + VTW(0, 54), + VTW(0, 55), + VTW(0, 56), + VTW(0, 57), + VTW(0, 58), + VTW(0, 59), + VTW(0, 60), + VTW(0, 61), + VTW(0, 62), + VTW(0, 63), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 64, XSIMD_STRING("t1fv_64"), twinstr, &GENUS, {467, 198, 52, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_64) (planner *p) { + X(kdft_dit_register) (p, t1fv_64, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:14 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 7 -name t1fv_7 -include t1f.h */ + +/* + * This function contains 36 FP additions, 36 FP multiplications, + * (or, 15 additions, 15 multiplications, 21 fused multiply/add), + * 42 stack variables, 6 constants, and 14 memory accesses + */ +#include "t1f.h" + +static void t1fv_7(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP801937735, +0.801937735804838252472204639014890102331838324); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + DVK(KP692021471, +0.692021471630095869627814897002069140197260599); + DVK(KP554958132, +0.554958132087371191422194871006410481067288862); + DVK(KP356895867, +0.356895867892209443894399510021300583399127187); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 12)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 12), MAKE_VOLATILE_STRIDE(7, rs)) { + V T1, T2, T4, Te, Tc, T9, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Te = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T7 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V T3, T5, Tf, Td, Ta, T8; + T3 = BYTWJ(&(W[0]), T2); + T5 = BYTWJ(&(W[TWVL * 10]), T4); + Tf = BYTWJ(&(W[TWVL * 6]), Te); + Td = BYTWJ(&(W[TWVL * 4]), Tc); + Ta = BYTWJ(&(W[TWVL * 8]), T9); + T8 = BYTWJ(&(W[TWVL * 2]), T7); + { + V T6, Tk, Tg, Tl, Tb, Tm; + T6 = VADD(T3, T5); + Tk = VSUB(T5, T3); + Tg = VADD(Td, Tf); + Tl = VSUB(Tf, Td); + Tb = VADD(T8, Ta); + Tm = VSUB(Ta, T8); + { + V Th, Ts, Tp, Tu, Tn, Tx, Ti, Tt; + Th = VFNMS(LDK(KP356895867), T6, Tg); + Ts = VFMA(LDK(KP554958132), Tl, Tk); + ST(&(x[0]), VADD(T1, VADD(T6, VADD(Tb, Tg))), ms, &(x[0])); + Tp = VFNMS(LDK(KP356895867), Tb, T6); + Tu = VFNMS(LDK(KP356895867), Tg, Tb); + Tn = VFMA(LDK(KP554958132), Tm, Tl); + Tx = VFNMS(LDK(KP554958132), Tk, Tm); + Ti = VFNMS(LDK(KP692021471), Th, Tb); + Tt = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), Ts, Tm)); + { + V Tq, Tv, To, Ty, Tj, Tr, Tw; + Tq = VFNMS(LDK(KP692021471), Tp, Tg); + Tv = VFNMS(LDK(KP692021471), Tu, T6); + To = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tn, Tk)); + Ty = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tx, Tl)); + Tj = VFNMS(LDK(KP900968867), Ti, T1); + Tr = VFNMS(LDK(KP900968867), Tq, T1); + Tw = VFNMS(LDK(KP900968867), Tv, T1); + ST(&(x[WS(rs, 2)]), VFMAI(To, Tj), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFNMSI(To, Tj), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(Tt, Tr), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VFNMSI(Tt, Tr), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(Ty, Tw), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFNMSI(Ty, Tw), ms, &(x[0])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 7, XSIMD_STRING("t1fv_7"), twinstr, &GENUS, {15, 15, 21, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_7) (planner *p) { + X(kdft_dit_register) (p, t1fv_7, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 7 -name t1fv_7 -include t1f.h */ + +/* + * This function contains 36 FP additions, 30 FP multiplications, + * (or, 24 additions, 18 multiplications, 12 fused multiply/add), + * 21 stack variables, 6 constants, and 14 memory accesses + */ +#include "t1f.h" + +static void t1fv_7(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP900968867, +0.900968867902419126236102319507445051165919162); + DVK(KP222520933, +0.222520933956314404288902564496794759466355569); + DVK(KP623489801, +0.623489801858733530525004884004239810632274731); + DVK(KP781831482, +0.781831482468029808708444526674057750232334519); + DVK(KP974927912, +0.974927912181823607018131682993931217232785801); + DVK(KP433883739, +0.433883739117558120475768332848358754609990728); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 12)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 12), MAKE_VOLATILE_STRIDE(7, rs)) { + V T1, Tg, Tj, T6, Ti, Tb, Tk, Tp, To; + T1 = LD(&(x[0]), ms, &(x[0])); + { + V Td, Tf, Tc, Te; + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Td = BYTWJ(&(W[TWVL * 4]), Tc); + Te = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tf = BYTWJ(&(W[TWVL * 6]), Te); + Tg = VADD(Td, Tf); + Tj = VSUB(Tf, Td); + } + { + V T3, T5, T2, T4; + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[0]), T2); + T4 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T5 = BYTWJ(&(W[TWVL * 10]), T4); + T6 = VADD(T3, T5); + Ti = VSUB(T5, T3); + } + { + V T8, Ta, T7, T9; + T7 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T8 = BYTWJ(&(W[TWVL * 2]), T7); + T9 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Ta = BYTWJ(&(W[TWVL * 8]), T9); + Tb = VADD(T8, Ta); + Tk = VSUB(Ta, T8); + } + ST(&(x[0]), VADD(T1, VADD(T6, VADD(Tb, Tg))), ms, &(x[0])); + Tp = VBYI(VFMA(LDK(KP433883739), Ti, VFNMS(LDK(KP781831482), Tk, VMUL(LDK(KP974927912), Tj)))); + To = VFMA(LDK(KP623489801), Tb, VFNMS(LDK(KP222520933), Tg, VFNMS(LDK(KP900968867), T6, T1))); + ST(&(x[WS(rs, 4)]), VSUB(To, Tp), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VADD(To, Tp), ms, &(x[WS(rs, 1)])); + { + V Tl, Th, Tn, Tm; + Tl = VBYI(VFNMS(LDK(KP781831482), Tj, VFNMS(LDK(KP433883739), Tk, VMUL(LDK(KP974927912), Ti)))); + Th = VFMA(LDK(KP623489801), Tg, VFNMS(LDK(KP900968867), Tb, VFNMS(LDK(KP222520933), T6, T1))); + ST(&(x[WS(rs, 5)]), VSUB(Th, Tl), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VADD(Th, Tl), ms, &(x[0])); + Tn = VBYI(VFMA(LDK(KP781831482), Ti, VFMA(LDK(KP974927912), Tk, VMUL(LDK(KP433883739), Tj)))); + Tm = VFMA(LDK(KP623489801), T6, VFNMS(LDK(KP900968867), Tg, VFNMS(LDK(KP222520933), Tb, T1))); + ST(&(x[WS(rs, 6)]), VSUB(Tm, Tn), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VADD(Tm, Tn), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 7, XSIMD_STRING("t1fv_7"), twinstr, &GENUS, {24, 18, 12, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_7) (planner *p) { + X(kdft_dit_register) (p, t1fv_7, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:14 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t1fv_8 -include t1f.h */ + +/* + * This function contains 33 FP additions, 24 FP multiplications, + * (or, 23 additions, 14 multiplications, 10 fused multiply/add), + * 36 stack variables, 1 constants, and 16 memory accesses + */ +#include "t1f.h" + +static void t1fv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 14)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(8, rs)) { + V T1, T2, Th, Tj, T5, T7, Ta, Tc; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Th = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tj = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Ta = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + { + V T3, Ti, Tk, T6, T8, Tb, Td; + T3 = BYTWJ(&(W[TWVL * 6]), T2); + Ti = BYTWJ(&(W[TWVL * 2]), Th); + Tk = BYTWJ(&(W[TWVL * 10]), Tj); + T6 = BYTWJ(&(W[0]), T5); + T8 = BYTWJ(&(W[TWVL * 8]), T7); + Tb = BYTWJ(&(W[TWVL * 12]), Ta); + Td = BYTWJ(&(W[TWVL * 4]), Tc); + { + V Tq, T4, Tr, Tl, Tt, T9, Tu, Te, Tw, Ts; + Tq = VADD(T1, T3); + T4 = VSUB(T1, T3); + Tr = VADD(Ti, Tk); + Tl = VSUB(Ti, Tk); + Tt = VADD(T6, T8); + T9 = VSUB(T6, T8); + Tu = VADD(Tb, Td); + Te = VSUB(Tb, Td); + Tw = VSUB(Tq, Tr); + Ts = VADD(Tq, Tr); + { + V Tx, Tv, Tm, Tf; + Tx = VSUB(Tu, Tt); + Tv = VADD(Tt, Tu); + Tm = VSUB(Te, T9); + Tf = VADD(T9, Te); + { + V Tp, Tn, To, Tg; + ST(&(x[WS(rs, 2)]), VFMAI(Tx, Tw), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(Tx, Tw), ms, &(x[0])); + ST(&(x[0]), VADD(Ts, Tv), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VSUB(Ts, Tv), ms, &(x[0])); + Tp = VFMA(LDK(KP707106781), Tm, Tl); + Tn = VFNMS(LDK(KP707106781), Tm, Tl); + To = VFNMS(LDK(KP707106781), Tf, T4); + Tg = VFMA(LDK(KP707106781), Tf, T4); + ST(&(x[WS(rs, 5)]), VFNMSI(Tp, To), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFMAI(Tp, To), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(Tn, Tg), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(Tn, Tg), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t1fv_8"), twinstr, &GENUS, {23, 14, 10, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_8) (planner *p) { + X(kdft_dit_register) (p, t1fv_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t1fv_8 -include t1f.h */ + +/* + * This function contains 33 FP additions, 16 FP multiplications, + * (or, 33 additions, 16 multiplications, 0 fused multiply/add), + * 24 stack variables, 1 constants, and 16 memory accesses + */ +#include "t1f.h" + +static void t1fv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 14)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(8, rs)) { + V T4, Tq, Tm, Tr, T9, Tt, Te, Tu, T1, T3, T2; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T3 = BYTWJ(&(W[TWVL * 6]), T2); + T4 = VSUB(T1, T3); + Tq = VADD(T1, T3); + { + V Tj, Tl, Ti, Tk; + Ti = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tj = BYTWJ(&(W[TWVL * 2]), Ti); + Tk = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tl = BYTWJ(&(W[TWVL * 10]), Tk); + Tm = VSUB(Tj, Tl); + Tr = VADD(Tj, Tl); + } + { + V T6, T8, T5, T7; + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T6 = BYTWJ(&(W[0]), T5); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T8 = BYTWJ(&(W[TWVL * 8]), T7); + T9 = VSUB(T6, T8); + Tt = VADD(T6, T8); + } + { + V Tb, Td, Ta, Tc; + Ta = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Tb = BYTWJ(&(W[TWVL * 12]), Ta); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Td = BYTWJ(&(W[TWVL * 4]), Tc); + Te = VSUB(Tb, Td); + Tu = VADD(Tb, Td); + } + { + V Ts, Tv, Tw, Tx; + Ts = VADD(Tq, Tr); + Tv = VADD(Tt, Tu); + ST(&(x[WS(rs, 4)]), VSUB(Ts, Tv), ms, &(x[0])); + ST(&(x[0]), VADD(Ts, Tv), ms, &(x[0])); + Tw = VSUB(Tq, Tr); + Tx = VBYI(VSUB(Tu, Tt)); + ST(&(x[WS(rs, 6)]), VSUB(Tw, Tx), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(Tw, Tx), ms, &(x[0])); + { + V Tg, To, Tn, Tp, Tf, Th; + Tf = VMUL(LDK(KP707106781), VADD(T9, Te)); + Tg = VADD(T4, Tf); + To = VSUB(T4, Tf); + Th = VMUL(LDK(KP707106781), VSUB(Te, T9)); + Tn = VBYI(VSUB(Th, Tm)); + Tp = VBYI(VADD(Tm, Th)); + ST(&(x[WS(rs, 7)]), VSUB(Tg, Tn), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(To, Tp), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(Tg, Tn), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VSUB(To, Tp), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t1fv_8"), twinstr, &GENUS, {33, 16, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_8) (planner *p) { + X(kdft_dit_register) (p, t1fv_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1fv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:15 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 9 -name t1fv_9 -include t1f.h */ + +/* + * This function contains 54 FP additions, 54 FP multiplications, + * (or, 20 additions, 20 multiplications, 34 fused multiply/add), + * 67 stack variables, 19 constants, and 18 memory accesses + */ +#include "t1f.h" + +static void t1fv_9(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP939692620, +0.939692620785908384054109277324731469936208134); + DVK(KP826351822, +0.826351822333069651148283373230685203999624323); + DVK(KP879385241, +0.879385241571816768108218554649462939872416269); + DVK(KP984807753, +0.984807753012208059366743024589523013670643252); + DVK(KP666666666, +0.666666666666666666666666666666666666666666667); + DVK(KP852868531, +0.852868531952443209628250963940074071936020296); + DVK(KP907603734, +0.907603734547952313649323976213898122064543220); + DVK(KP420276625, +0.420276625461206169731530603237061658838781920); + DVK(KP673648177, +0.673648177666930348851716626769314796000375677); + DVK(KP898197570, +0.898197570222573798468955502359086394667167570); + DVK(KP347296355, +0.347296355333860697703433253538629592000751354); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP439692620, +0.439692620785908384054109277324731469936208134); + DVK(KP203604859, +0.203604859554852403062088995281827210665664861); + DVK(KP152703644, +0.152703644666139302296566746461370407999248646); + DVK(KP586256827, +0.586256827714544512072145703099641959914944179); + DVK(KP968908795, +0.968908795874236621082202410917456709164223497); + DVK(KP726681596, +0.726681596905677465811651808188092531873167623); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 16)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 16), MAKE_VOLATILE_STRIDE(9, rs)) { + V T1, T3, T5, T9, Th, Tb, Td, Tj, Tl, TD, T6; + T1 = LD(&(x[0]), ms, &(x[0])); + { + V T2, T4, T8, Tg; + T2 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Tg = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V Ta, Tc, Ti, Tk; + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Ti = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tk = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T3 = BYTWJ(&(W[TWVL * 4]), T2); + T5 = BYTWJ(&(W[TWVL * 10]), T4); + T9 = BYTWJ(&(W[0]), T8); + Th = BYTWJ(&(W[TWVL * 2]), Tg); + Tb = BYTWJ(&(W[TWVL * 6]), Ta); + Td = BYTWJ(&(W[TWVL * 12]), Tc); + Tj = BYTWJ(&(W[TWVL * 8]), Ti); + Tl = BYTWJ(&(W[TWVL * 14]), Tk); + } + } + TD = VSUB(T5, T3); + T6 = VADD(T3, T5); + { + V Tt, Te, Tu, Tm, Tr, T7; + Tt = VSUB(Tb, Td); + Te = VADD(Tb, Td); + Tu = VSUB(Tl, Tj); + Tm = VADD(Tj, Tl); + Tr = VFNMS(LDK(KP500000000), T6, T1); + T7 = VADD(T1, T6); + { + V Tv, Tf, Ts, Tn; + Tv = VFNMS(LDK(KP500000000), Te, T9); + Tf = VADD(T9, Te); + Ts = VFNMS(LDK(KP500000000), Tm, Th); + Tn = VADD(Th, Tm); + { + V TG, TK, Tw, TJ, TF, TA, To, Tq; + TG = VFNMS(LDK(KP726681596), Tt, Tv); + TK = VFMA(LDK(KP968908795), Tv, Tt); + Tw = VFNMS(LDK(KP586256827), Tv, Tu); + TJ = VFNMS(LDK(KP152703644), Tu, Ts); + TF = VFMA(LDK(KP203604859), Ts, Tu); + TA = VFNMS(LDK(KP439692620), Tt, Ts); + To = VADD(Tf, Tn); + Tq = VMUL(LDK(KP866025403), VSUB(Tn, Tf)); + { + V TQ, TH, TL, TN, TB, Tp, Ty, TI, Tx; + Tx = VFNMS(LDK(KP347296355), Tw, Tt); + TQ = VFNMS(LDK(KP898197570), TG, TF); + TH = VFMA(LDK(KP898197570), TG, TF); + TL = VFMA(LDK(KP673648177), TK, TJ); + TN = VFNMS(LDK(KP673648177), TK, TJ); + TB = VFNMS(LDK(KP420276625), TA, Tu); + ST(&(x[0]), VADD(T7, To), ms, &(x[0])); + Tp = VFNMS(LDK(KP500000000), To, T7); + Ty = VFNMS(LDK(KP907603734), Tx, Ts); + TI = VFMA(LDK(KP852868531), TH, Tr); + { + V TO, TR, TM, TC, Tz, TP, TS, TE; + TO = VFNMS(LDK(KP500000000), TH, TN); + TR = VFMA(LDK(KP666666666), TL, TQ); + TM = VMUL(LDK(KP984807753), VFNMS(LDK(KP879385241), TD, TL)); + TC = VFNMS(LDK(KP826351822), TB, Tv); + ST(&(x[WS(rs, 6)]), VFNMSI(Tq, Tp), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(Tq, Tp), ms, &(x[WS(rs, 1)])); + Tz = VFNMS(LDK(KP939692620), Ty, Tr); + TP = VFMA(LDK(KP852868531), TO, Tr); + TS = VMUL(LDK(KP866025403), VFMA(LDK(KP852868531), TR, TD)); + ST(&(x[WS(rs, 8)]), VFMAI(TM, TI), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFNMSI(TM, TI), ms, &(x[WS(rs, 1)])); + TE = VMUL(LDK(KP984807753), VFMA(LDK(KP879385241), TD, TC)); + ST(&(x[WS(rs, 4)]), VFMAI(TS, TP), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFNMSI(TS, TP), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(TE, Tz), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VFNMSI(TE, Tz), ms, &(x[0])); + } + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 9, XSIMD_STRING("t1fv_9"), twinstr, &GENUS, {20, 20, 34, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_9) (planner *p) { + X(kdft_dit_register) (p, t1fv_9, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 9 -name t1fv_9 -include t1f.h */ + +/* + * This function contains 54 FP additions, 42 FP multiplications, + * (or, 38 additions, 26 multiplications, 16 fused multiply/add), + * 38 stack variables, 14 constants, and 18 memory accesses + */ +#include "t1f.h" + +static void t1fv_9(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP939692620, +0.939692620785908384054109277324731469936208134); + DVK(KP296198132, +0.296198132726023843175338011893050938967728390); + DVK(KP852868531, +0.852868531952443209628250963940074071936020296); + DVK(KP173648177, +0.173648177666930348851716626769314796000375677); + DVK(KP556670399, +0.556670399226419366452912952047023132968291906); + DVK(KP766044443, +0.766044443118978035202392650555416673935832457); + DVK(KP642787609, +0.642787609686539326322643409907263432907559884); + DVK(KP663413948, +0.663413948168938396205421319635891297216863310); + DVK(KP984807753, +0.984807753012208059366743024589523013670643252); + DVK(KP150383733, +0.150383733180435296639271897612501926072238258); + DVK(KP342020143, +0.342020143325668733044099614682259580763083368); + DVK(KP813797681, +0.813797681349373692844693217248393223289101568); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 16)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 16), MAKE_VOLATILE_STRIDE(9, rs)) { + V T1, T6, TA, Tt, Tf, Ts, Tw, Tn, Tv; + T1 = LD(&(x[0]), ms, &(x[0])); + { + V T3, T5, T2, T4; + T2 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[TWVL * 4]), T2); + T4 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T5 = BYTWJ(&(W[TWVL * 10]), T4); + T6 = VADD(T3, T5); + TA = VMUL(LDK(KP866025403), VSUB(T5, T3)); + } + { + V T9, Td, Tb, T8, Tc, Ta, Te; + T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T9 = BYTWJ(&(W[0]), T8); + Tc = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Td = BYTWJ(&(W[TWVL * 12]), Tc); + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tb = BYTWJ(&(W[TWVL * 6]), Ta); + Tt = VSUB(Td, Tb); + Te = VADD(Tb, Td); + Tf = VADD(T9, Te); + Ts = VFNMS(LDK(KP500000000), Te, T9); + } + { + V Th, Tl, Tj, Tg, Tk, Ti, Tm; + Tg = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Th = BYTWJ(&(W[TWVL * 2]), Tg); + Tk = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Tl = BYTWJ(&(W[TWVL * 14]), Tk); + Ti = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tj = BYTWJ(&(W[TWVL * 8]), Ti); + Tw = VSUB(Tl, Tj); + Tm = VADD(Tj, Tl); + Tn = VADD(Th, Tm); + Tv = VFNMS(LDK(KP500000000), Tm, Th); + } + { + V Tq, T7, To, Tp; + Tq = VBYI(VMUL(LDK(KP866025403), VSUB(Tn, Tf))); + T7 = VADD(T1, T6); + To = VADD(Tf, Tn); + Tp = VFNMS(LDK(KP500000000), To, T7); + ST(&(x[0]), VADD(T7, To), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VADD(Tp, Tq), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VSUB(Tp, Tq), ms, &(x[0])); + } + { + V TI, TB, TC, TD, Tu, Tx, Ty, Tr, TH; + TI = VBYI(VSUB(VFNMS(LDK(KP342020143), Tv, VFNMS(LDK(KP150383733), Tt, VFNMS(LDK(KP984807753), Ts, VMUL(LDK(KP813797681), Tw)))), TA)); + TB = VFNMS(LDK(KP642787609), Ts, VMUL(LDK(KP663413948), Tt)); + TC = VFNMS(LDK(KP984807753), Tv, VMUL(LDK(KP150383733), Tw)); + TD = VADD(TB, TC); + Tu = VFMA(LDK(KP766044443), Ts, VMUL(LDK(KP556670399), Tt)); + Tx = VFMA(LDK(KP173648177), Tv, VMUL(LDK(KP852868531), Tw)); + Ty = VADD(Tu, Tx); + Tr = VFNMS(LDK(KP500000000), T6, T1); + TH = VFMA(LDK(KP173648177), Ts, VFNMS(LDK(KP296198132), Tw, VFNMS(LDK(KP939692620), Tv, VFNMS(LDK(KP852868531), Tt, Tr)))); + ST(&(x[WS(rs, 7)]), VSUB(TH, TI), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VADD(TH, TI), ms, &(x[0])); + { + V Tz, TE, TF, TG; + Tz = VADD(Tr, Ty); + TE = VBYI(VADD(TA, TD)); + ST(&(x[WS(rs, 8)]), VSUB(Tz, TE), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VADD(TE, Tz), ms, &(x[WS(rs, 1)])); + TF = VFMA(LDK(KP866025403), VSUB(TB, TC), VFNMS(LDK(KP500000000), Ty, Tr)); + TG = VBYI(VADD(TA, VFNMS(LDK(KP500000000), TD, VMUL(LDK(KP866025403), VSUB(Tx, Tu))))); + ST(&(x[WS(rs, 5)]), VSUB(TF, TG), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VADD(TF, TG), ms, &(x[0])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 9, XSIMD_STRING("t1fv_9"), twinstr, &GENUS, {38, 26, 16, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1fv_9) (planner *p) { + X(kdft_dit_register) (p, t1fv_9, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1sv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1sv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,809 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:52 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name t1sv_16 -include ts.h */ + +/* + * This function contains 174 FP additions, 100 FP multiplications, + * (or, 104 additions, 30 multiplications, 70 fused multiply/add), + * 113 stack variables, 3 constants, and 64 memory accesses + */ +#include "ts.h" + +static void t1sv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 30); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 30), MAKE_VOLATILE_STRIDE(32, rs)) { + V T2S, T2O, T2B, T2j, T2A, T24, T3J, T3L, T2Q, T2I, T2R, T2L, T2C, T2y, T3D; + V T3F; + { + V T3o, T3z, T1I, T8, T35, T2o, T1s, T2r, T36, T2w, T1F, T2p, T1N, T3k, Tl; + V T3A, T2V, T1T, Tz, T1U, T30, T29, T11, T2c, TH, TK, TJ, T31, T2h, T1e; + V T2a, T1Z, TI, T1Y, TF; + { + V Ta, Td, Tg, Tj, T2t, T1y, Tf, T1J, Tb, Tc, T2v, T1E, Ti; + { + V T1, T3n, T3, T6, T5, T1h, T1k, T1n, T1q, T1m, T3l, T4, T1j, T1p, T2k; + V T1i, T2, T1g; + T1 = LD(&(ri[0]), ms, &(ri[0])); + T3n = LD(&(ii[0]), ms, &(ii[0])); + T3 = LD(&(ri[WS(rs, 8)]), ms, &(ri[0])); + T6 = LD(&(ii[WS(rs, 8)]), ms, &(ii[0])); + T2 = LDW(&(W[TWVL * 14])); + T5 = LDW(&(W[TWVL * 15])); + T1h = LD(&(ri[WS(rs, 15)]), ms, &(ri[WS(rs, 1)])); + T1k = LD(&(ii[WS(rs, 15)]), ms, &(ii[WS(rs, 1)])); + T1g = LDW(&(W[TWVL * 28])); + T1n = LD(&(ri[WS(rs, 7)]), ms, &(ri[WS(rs, 1)])); + T1q = LD(&(ii[WS(rs, 7)]), ms, &(ii[WS(rs, 1)])); + T1m = LDW(&(W[TWVL * 12])); + T3l = VMUL(T2, T6); + T4 = VMUL(T2, T3); + T1j = LDW(&(W[TWVL * 29])); + T1p = LDW(&(W[TWVL * 13])); + T2k = VMUL(T1g, T1k); + T1i = VMUL(T1g, T1h); + { + V T1u, T1x, T1A, T2s, T1v, T1D, T1z, T1w, T1C, T2u, T1B, T9; + { + V T2l, T1l, T1t, T2n, T1r; + { + V T2m, T1o, T3m, T7; + T1u = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)])); + T2m = VMUL(T1m, T1q); + T1o = VMUL(T1m, T1n); + T3m = VFNMS(T5, T3, T3l); + T7 = VFMA(T5, T6, T4); + T1x = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)])); + T2l = VFNMS(T1j, T1h, T2k); + T1l = VFMA(T1j, T1k, T1i); + T1t = LDW(&(W[TWVL * 4])); + T2n = VFNMS(T1p, T1n, T2m); + T1r = VFMA(T1p, T1q, T1o); + T3o = VADD(T3m, T3n); + T3z = VSUB(T3n, T3m); + T1I = VSUB(T1, T7); + T8 = VADD(T1, T7); + } + T1A = LD(&(ri[WS(rs, 11)]), ms, &(ri[WS(rs, 1)])); + T2s = VMUL(T1t, T1x); + T1v = VMUL(T1t, T1u); + T35 = VADD(T2l, T2n); + T2o = VSUB(T2l, T2n); + T1s = VADD(T1l, T1r); + T2r = VSUB(T1l, T1r); + T1D = LD(&(ii[WS(rs, 11)]), ms, &(ii[WS(rs, 1)])); + T1z = LDW(&(W[TWVL * 20])); + } + T1w = LDW(&(W[TWVL * 5])); + T1C = LDW(&(W[TWVL * 21])); + Ta = LD(&(ri[WS(rs, 4)]), ms, &(ri[0])); + Td = LD(&(ii[WS(rs, 4)]), ms, &(ii[0])); + T9 = LDW(&(W[TWVL * 6])); + Tg = LD(&(ri[WS(rs, 12)]), ms, &(ri[0])); + Tj = LD(&(ii[WS(rs, 12)]), ms, &(ii[0])); + T2u = VMUL(T1z, T1D); + T1B = VMUL(T1z, T1A); + T2t = VFNMS(T1w, T1u, T2s); + T1y = VFMA(T1w, T1x, T1v); + Tf = LDW(&(W[TWVL * 22])); + T1J = VMUL(T9, Td); + Tb = VMUL(T9, Ta); + Tc = LDW(&(W[TWVL * 7])); + T2v = VFNMS(T1C, T1A, T2u); + T1E = VFMA(T1C, T1D, T1B); + Ti = LDW(&(W[TWVL * 23])); + } + } + { + V TW, TZ, TY, T27, TX, T26, TU; + { + V To, Tr, Tu, Tx, Tq, Tw, T1P, Tp, T1R, Tv; + { + V T1K, Te, T1M, Tk, Tn, Tt, T1L, Th; + To = LD(&(ri[WS(rs, 2)]), ms, &(ri[0])); + T1L = VMUL(Tf, Tj); + Th = VMUL(Tf, Tg); + Tr = LD(&(ii[WS(rs, 2)]), ms, &(ii[0])); + T1K = VFNMS(Tc, Ta, T1J); + Te = VFMA(Tc, Td, Tb); + T36 = VADD(T2t, T2v); + T2w = VSUB(T2t, T2v); + T1F = VADD(T1y, T1E); + T2p = VSUB(T1y, T1E); + T1M = VFNMS(Ti, Tg, T1L); + Tk = VFMA(Ti, Tj, Th); + Tn = LDW(&(W[TWVL * 2])); + Tu = LD(&(ri[WS(rs, 10)]), ms, &(ri[0])); + Tx = LD(&(ii[WS(rs, 10)]), ms, &(ii[0])); + Tt = LDW(&(W[TWVL * 18])); + Tq = LDW(&(W[TWVL * 3])); + Tw = LDW(&(W[TWVL * 19])); + T1N = VSUB(T1K, T1M); + T3k = VADD(T1K, T1M); + Tl = VADD(Te, Tk); + T3A = VSUB(Te, Tk); + T1P = VMUL(Tn, Tr); + Tp = VMUL(Tn, To); + T1R = VMUL(Tt, Tx); + Tv = VMUL(Tt, Tu); + } + { + V TQ, TT, T1Q, Ts, T1S, Ty, TV, T25, TR, TP, TS; + TQ = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + TT = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + TP = LDW(&(W[0])); + TW = LD(&(ri[WS(rs, 9)]), ms, &(ri[WS(rs, 1)])); + T1Q = VFNMS(Tq, To, T1P); + Ts = VFMA(Tq, Tr, Tp); + T1S = VFNMS(Tw, Tu, T1R); + Ty = VFMA(Tw, Tx, Tv); + TZ = LD(&(ii[WS(rs, 9)]), ms, &(ii[WS(rs, 1)])); + TV = LDW(&(W[TWVL * 16])); + T25 = VMUL(TP, TT); + TR = VMUL(TP, TQ); + TS = LDW(&(W[TWVL * 1])); + TY = LDW(&(W[TWVL * 17])); + T2V = VADD(T1Q, T1S); + T1T = VSUB(T1Q, T1S); + Tz = VADD(Ts, Ty); + T1U = VSUB(Ts, Ty); + T27 = VMUL(TV, TZ); + TX = VMUL(TV, TW); + T26 = VFNMS(TS, TQ, T25); + TU = VFMA(TS, TT, TR); + } + } + { + V T19, T1c, T1b, T2f, T1a, T2e, T17; + { + V T13, T16, T12, T28, T10, T18, T15, T2d, T14; + T13 = LD(&(ri[WS(rs, 5)]), ms, &(ri[WS(rs, 1)])); + T16 = LD(&(ii[WS(rs, 5)]), ms, &(ii[WS(rs, 1)])); + T12 = LDW(&(W[TWVL * 8])); + T19 = LD(&(ri[WS(rs, 13)]), ms, &(ri[WS(rs, 1)])); + T28 = VFNMS(TY, TW, T27); + T10 = VFMA(TY, TZ, TX); + T1c = LD(&(ii[WS(rs, 13)]), ms, &(ii[WS(rs, 1)])); + T18 = LDW(&(W[TWVL * 24])); + T15 = LDW(&(W[TWVL * 9])); + T1b = LDW(&(W[TWVL * 25])); + T2d = VMUL(T12, T16); + T14 = VMUL(T12, T13); + T30 = VADD(T26, T28); + T29 = VSUB(T26, T28); + T11 = VADD(TU, T10); + T2c = VSUB(TU, T10); + T2f = VMUL(T18, T1c); + T1a = VMUL(T18, T19); + T2e = VFNMS(T15, T13, T2d); + T17 = VFMA(T15, T16, T14); + } + { + V TB, TE, TA, T2g, T1d, TG, TD, T1X, TC; + TB = LD(&(ri[WS(rs, 14)]), ms, &(ri[0])); + TE = LD(&(ii[WS(rs, 14)]), ms, &(ii[0])); + TA = LDW(&(W[TWVL * 26])); + TH = LD(&(ri[WS(rs, 6)]), ms, &(ri[0])); + T2g = VFNMS(T1b, T19, T2f); + T1d = VFMA(T1b, T1c, T1a); + TK = LD(&(ii[WS(rs, 6)]), ms, &(ii[0])); + TG = LDW(&(W[TWVL * 10])); + TD = LDW(&(W[TWVL * 27])); + TJ = LDW(&(W[TWVL * 11])); + T1X = VMUL(TA, TE); + TC = VMUL(TA, TB); + T31 = VADD(T2e, T2g); + T2h = VSUB(T2e, T2g); + T1e = VADD(T17, T1d); + T2a = VSUB(T17, T1d); + T1Z = VMUL(TG, TK); + TI = VMUL(TG, TH); + T1Y = VFNMS(TD, TB, T1X); + TF = VFMA(TD, TE, TC); + } + } + } + } + { + V T2U, Tm, T3p, T3u, T34, T1G, T1f, T2Z, T20, TL, T32, T3f, T3g, T37; + T2U = VSUB(T8, Tl); + Tm = VADD(T8, Tl); + T3p = VADD(T3k, T3o); + T3u = VSUB(T3o, T3k); + T34 = VSUB(T1s, T1F); + T1G = VADD(T1s, T1F); + T1f = VADD(T11, T1e); + T2Z = VSUB(T11, T1e); + T20 = VFNMS(TJ, TH, T1Z); + TL = VFMA(TJ, TK, TI); + T32 = VSUB(T30, T31); + T3f = VADD(T30, T31); + T3g = VADD(T35, T36); + T37 = VSUB(T35, T36); + { + V T3r, T1H, T21, T1W, T3i, T3h, T3j, T2X, TN, T3t, T2W, TM; + T3r = VSUB(T1G, T1f); + T1H = VADD(T1f, T1G); + T21 = VSUB(T1Y, T20); + T2W = VADD(T1Y, T20); + T1W = VSUB(TF, TL); + TM = VADD(TF, TL); + T3i = VADD(T3f, T3g); + T3h = VSUB(T3f, T3g); + T3j = VADD(T2V, T2W); + T2X = VSUB(T2V, T2W); + TN = VADD(Tz, TM); + T3t = VSUB(TM, Tz); + { + V T2E, T1O, T3B, T3H, T2x, T2q, T2K, T2J, T3C, T23, T3I, T2H; + { + V T2F, T1V, T22, T2G; + T2E = VADD(T1I, T1N); + T1O = VSUB(T1I, T1N); + { + V T3b, T33, T3c, T38; + T3b = VSUB(T32, T2Z); + T33 = VADD(T2Z, T32); + T3c = VADD(T34, T37); + T38 = VSUB(T34, T37); + { + V T3a, T2Y, T3s, T3q; + T3a = VSUB(T2U, T2X); + T2Y = VADD(T2U, T2X); + T3s = VSUB(T3p, T3j); + T3q = VADD(T3j, T3p); + { + V T3x, T3v, T3e, TO; + T3x = VSUB(T3u, T3t); + T3v = VADD(T3t, T3u); + T3e = VSUB(Tm, TN); + TO = VADD(Tm, TN); + { + V T3d, T3w, T3y, T39; + T3d = VSUB(T3b, T3c); + T3w = VADD(T3b, T3c); + T3y = VSUB(T38, T33); + T39 = VADD(T33, T38); + ST(&(ii[WS(rs, 4)]), VADD(T3r, T3s), ms, &(ii[0])); + ST(&(ii[WS(rs, 12)]), VSUB(T3s, T3r), ms, &(ii[0])); + ST(&(ii[0]), VADD(T3i, T3q), ms, &(ii[0])); + ST(&(ii[WS(rs, 8)]), VSUB(T3q, T3i), ms, &(ii[0])); + ST(&(ri[WS(rs, 4)]), VADD(T3e, T3h), ms, &(ri[0])); + ST(&(ri[WS(rs, 12)]), VSUB(T3e, T3h), ms, &(ri[0])); + ST(&(ri[0]), VADD(TO, T1H), ms, &(ri[0])); + ST(&(ri[WS(rs, 8)]), VSUB(TO, T1H), ms, &(ri[0])); + ST(&(ri[WS(rs, 6)]), VFMA(LDK(KP707106781), T3d, T3a), ms, &(ri[0])); + ST(&(ri[WS(rs, 14)]), VFNMS(LDK(KP707106781), T3d, T3a), ms, &(ri[0])); + ST(&(ii[WS(rs, 10)]), VFNMS(LDK(KP707106781), T3w, T3v), ms, &(ii[0])); + ST(&(ii[WS(rs, 2)]), VFMA(LDK(KP707106781), T3w, T3v), ms, &(ii[0])); + ST(&(ii[WS(rs, 14)]), VFNMS(LDK(KP707106781), T3y, T3x), ms, &(ii[0])); + ST(&(ii[WS(rs, 6)]), VFMA(LDK(KP707106781), T3y, T3x), ms, &(ii[0])); + ST(&(ri[WS(rs, 2)]), VFMA(LDK(KP707106781), T39, T2Y), ms, &(ri[0])); + ST(&(ri[WS(rs, 10)]), VFNMS(LDK(KP707106781), T39, T2Y), ms, &(ri[0])); + T3B = VSUB(T3z, T3A); + T3H = VADD(T3A, T3z); + } + } + } + } + T2F = VADD(T1U, T1T); + T1V = VSUB(T1T, T1U); + T22 = VADD(T1W, T21); + T2G = VSUB(T1W, T21); + { + V T2M, T2N, T2b, T2i; + T2x = VSUB(T2r, T2w); + T2M = VADD(T2r, T2w); + T2N = VSUB(T2o, T2p); + T2q = VADD(T2o, T2p); + T2K = VSUB(T29, T2a); + T2b = VADD(T29, T2a); + T2i = VSUB(T2c, T2h); + T2J = VADD(T2c, T2h); + T3C = VADD(T1V, T22); + T23 = VSUB(T1V, T22); + T2S = VFMA(LDK(KP414213562), T2M, T2N); + T2O = VFNMS(LDK(KP414213562), T2N, T2M); + T3I = VSUB(T2G, T2F); + T2H = VADD(T2F, T2G); + T2B = VFNMS(LDK(KP414213562), T2b, T2i); + T2j = VFMA(LDK(KP414213562), T2i, T2b); + } + } + T2A = VFNMS(LDK(KP707106781), T23, T1O); + T24 = VFMA(LDK(KP707106781), T23, T1O); + T3J = VFMA(LDK(KP707106781), T3I, T3H); + T3L = VFNMS(LDK(KP707106781), T3I, T3H); + T2Q = VFNMS(LDK(KP707106781), T2H, T2E); + T2I = VFMA(LDK(KP707106781), T2H, T2E); + T2R = VFNMS(LDK(KP414213562), T2J, T2K); + T2L = VFMA(LDK(KP414213562), T2K, T2J); + T2C = VFMA(LDK(KP414213562), T2q, T2x); + T2y = VFNMS(LDK(KP414213562), T2x, T2q); + T3D = VFMA(LDK(KP707106781), T3C, T3B); + T3F = VFNMS(LDK(KP707106781), T3C, T3B); + } + } + } + } + { + V T3E, T2T, T2P, T3G; + T3E = VADD(T2R, T2S); + T2T = VSUB(T2R, T2S); + T2P = VADD(T2L, T2O); + T3G = VSUB(T2O, T2L); + { + V T3K, T2D, T2z, T3M; + T3K = VSUB(T2C, T2B); + T2D = VADD(T2B, T2C); + T2z = VSUB(T2j, T2y); + T3M = VADD(T2j, T2y); + ST(&(ri[WS(rs, 5)]), VFMA(LDK(KP923879532), T2T, T2Q), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 13)]), VFNMS(LDK(KP923879532), T2T, T2Q), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 9)]), VFNMS(LDK(KP923879532), T3E, T3D), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 1)]), VFMA(LDK(KP923879532), T3E, T3D), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 13)]), VFNMS(LDK(KP923879532), T3G, T3F), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 5)]), VFMA(LDK(KP923879532), T3G, T3F), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 1)]), VFMA(LDK(KP923879532), T2P, T2I), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 9)]), VFNMS(LDK(KP923879532), T2P, T2I), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 15)]), VFMA(LDK(KP923879532), T2D, T2A), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 7)]), VFNMS(LDK(KP923879532), T2D, T2A), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 11)]), VFNMS(LDK(KP923879532), T3K, T3J), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 3)]), VFMA(LDK(KP923879532), T3K, T3J), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 15)]), VFMA(LDK(KP923879532), T3M, T3L), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 7)]), VFNMS(LDK(KP923879532), T3M, T3L), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 3)]), VFMA(LDK(KP923879532), T2z, T24), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 11)]), VFNMS(LDK(KP923879532), T2z, T24), ms, &(ri[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 16, XSIMD_STRING("t1sv_16"), twinstr, &GENUS, {104, 30, 70, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1sv_16) (planner *p) { + X(kdft_dit_register) (p, t1sv_16, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name t1sv_16 -include ts.h */ + +/* + * This function contains 174 FP additions, 84 FP multiplications, + * (or, 136 additions, 46 multiplications, 38 fused multiply/add), + * 52 stack variables, 3 constants, and 64 memory accesses + */ +#include "ts.h" + +static void t1sv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 30); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 30), MAKE_VOLATILE_STRIDE(32, rs)) { + V T7, T37, T1t, T2U, Ti, T38, T1w, T2R, Tu, T2s, T1C, T2c, TF, T2t, T1H; + V T2d, T1f, T1q, T2B, T2C, T2D, T2E, T1Z, T2j, T24, T2k, TS, T13, T2w, T2x; + V T2y, T2z, T1O, T2g, T1T, T2h; + { + V T1, T2T, T6, T2S; + T1 = LD(&(ri[0]), ms, &(ri[0])); + T2T = LD(&(ii[0]), ms, &(ii[0])); + { + V T3, T5, T2, T4; + T3 = LD(&(ri[WS(rs, 8)]), ms, &(ri[0])); + T5 = LD(&(ii[WS(rs, 8)]), ms, &(ii[0])); + T2 = LDW(&(W[TWVL * 14])); + T4 = LDW(&(W[TWVL * 15])); + T6 = VFMA(T2, T3, VMUL(T4, T5)); + T2S = VFNMS(T4, T3, VMUL(T2, T5)); + } + T7 = VADD(T1, T6); + T37 = VSUB(T2T, T2S); + T1t = VSUB(T1, T6); + T2U = VADD(T2S, T2T); + } + { + V Tc, T1u, Th, T1v; + { + V T9, Tb, T8, Ta; + T9 = LD(&(ri[WS(rs, 4)]), ms, &(ri[0])); + Tb = LD(&(ii[WS(rs, 4)]), ms, &(ii[0])); + T8 = LDW(&(W[TWVL * 6])); + Ta = LDW(&(W[TWVL * 7])); + Tc = VFMA(T8, T9, VMUL(Ta, Tb)); + T1u = VFNMS(Ta, T9, VMUL(T8, Tb)); + } + { + V Te, Tg, Td, Tf; + Te = LD(&(ri[WS(rs, 12)]), ms, &(ri[0])); + Tg = LD(&(ii[WS(rs, 12)]), ms, &(ii[0])); + Td = LDW(&(W[TWVL * 22])); + Tf = LDW(&(W[TWVL * 23])); + Th = VFMA(Td, Te, VMUL(Tf, Tg)); + T1v = VFNMS(Tf, Te, VMUL(Td, Tg)); + } + Ti = VADD(Tc, Th); + T38 = VSUB(Tc, Th); + T1w = VSUB(T1u, T1v); + T2R = VADD(T1u, T1v); + } + { + V To, T1y, Tt, T1z, T1A, T1B; + { + V Tl, Tn, Tk, Tm; + Tl = LD(&(ri[WS(rs, 2)]), ms, &(ri[0])); + Tn = LD(&(ii[WS(rs, 2)]), ms, &(ii[0])); + Tk = LDW(&(W[TWVL * 2])); + Tm = LDW(&(W[TWVL * 3])); + To = VFMA(Tk, Tl, VMUL(Tm, Tn)); + T1y = VFNMS(Tm, Tl, VMUL(Tk, Tn)); + } + { + V Tq, Ts, Tp, Tr; + Tq = LD(&(ri[WS(rs, 10)]), ms, &(ri[0])); + Ts = LD(&(ii[WS(rs, 10)]), ms, &(ii[0])); + Tp = LDW(&(W[TWVL * 18])); + Tr = LDW(&(W[TWVL * 19])); + Tt = VFMA(Tp, Tq, VMUL(Tr, Ts)); + T1z = VFNMS(Tr, Tq, VMUL(Tp, Ts)); + } + Tu = VADD(To, Tt); + T2s = VADD(T1y, T1z); + T1A = VSUB(T1y, T1z); + T1B = VSUB(To, Tt); + T1C = VSUB(T1A, T1B); + T2c = VADD(T1B, T1A); + } + { + V Tz, T1E, TE, T1F, T1D, T1G; + { + V Tw, Ty, Tv, Tx; + Tw = LD(&(ri[WS(rs, 14)]), ms, &(ri[0])); + Ty = LD(&(ii[WS(rs, 14)]), ms, &(ii[0])); + Tv = LDW(&(W[TWVL * 26])); + Tx = LDW(&(W[TWVL * 27])); + Tz = VFMA(Tv, Tw, VMUL(Tx, Ty)); + T1E = VFNMS(Tx, Tw, VMUL(Tv, Ty)); + } + { + V TB, TD, TA, TC; + TB = LD(&(ri[WS(rs, 6)]), ms, &(ri[0])); + TD = LD(&(ii[WS(rs, 6)]), ms, &(ii[0])); + TA = LDW(&(W[TWVL * 10])); + TC = LDW(&(W[TWVL * 11])); + TE = VFMA(TA, TB, VMUL(TC, TD)); + T1F = VFNMS(TC, TB, VMUL(TA, TD)); + } + TF = VADD(Tz, TE); + T2t = VADD(T1E, T1F); + T1D = VSUB(Tz, TE); + T1G = VSUB(T1E, T1F); + T1H = VADD(T1D, T1G); + T2d = VSUB(T1D, T1G); + } + { + V T19, T20, T1p, T1X, T1e, T21, T1k, T1W; + { + V T16, T18, T15, T17; + T16 = LD(&(ri[WS(rs, 15)]), ms, &(ri[WS(rs, 1)])); + T18 = LD(&(ii[WS(rs, 15)]), ms, &(ii[WS(rs, 1)])); + T15 = LDW(&(W[TWVL * 28])); + T17 = LDW(&(W[TWVL * 29])); + T19 = VFMA(T15, T16, VMUL(T17, T18)); + T20 = VFNMS(T17, T16, VMUL(T15, T18)); + } + { + V T1m, T1o, T1l, T1n; + T1m = LD(&(ri[WS(rs, 11)]), ms, &(ri[WS(rs, 1)])); + T1o = LD(&(ii[WS(rs, 11)]), ms, &(ii[WS(rs, 1)])); + T1l = LDW(&(W[TWVL * 20])); + T1n = LDW(&(W[TWVL * 21])); + T1p = VFMA(T1l, T1m, VMUL(T1n, T1o)); + T1X = VFNMS(T1n, T1m, VMUL(T1l, T1o)); + } + { + V T1b, T1d, T1a, T1c; + T1b = LD(&(ri[WS(rs, 7)]), ms, &(ri[WS(rs, 1)])); + T1d = LD(&(ii[WS(rs, 7)]), ms, &(ii[WS(rs, 1)])); + T1a = LDW(&(W[TWVL * 12])); + T1c = LDW(&(W[TWVL * 13])); + T1e = VFMA(T1a, T1b, VMUL(T1c, T1d)); + T21 = VFNMS(T1c, T1b, VMUL(T1a, T1d)); + } + { + V T1h, T1j, T1g, T1i; + T1h = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)])); + T1j = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)])); + T1g = LDW(&(W[TWVL * 4])); + T1i = LDW(&(W[TWVL * 5])); + T1k = VFMA(T1g, T1h, VMUL(T1i, T1j)); + T1W = VFNMS(T1i, T1h, VMUL(T1g, T1j)); + } + T1f = VADD(T19, T1e); + T1q = VADD(T1k, T1p); + T2B = VSUB(T1f, T1q); + T2C = VADD(T20, T21); + T2D = VADD(T1W, T1X); + T2E = VSUB(T2C, T2D); + { + V T1V, T1Y, T22, T23; + T1V = VSUB(T19, T1e); + T1Y = VSUB(T1W, T1X); + T1Z = VSUB(T1V, T1Y); + T2j = VADD(T1V, T1Y); + T22 = VSUB(T20, T21); + T23 = VSUB(T1k, T1p); + T24 = VADD(T22, T23); + T2k = VSUB(T22, T23); + } + } + { + V TM, T1K, T12, T1R, TR, T1L, TX, T1Q; + { + V TJ, TL, TI, TK; + TJ = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + TL = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + TI = LDW(&(W[0])); + TK = LDW(&(W[TWVL * 1])); + TM = VFMA(TI, TJ, VMUL(TK, TL)); + T1K = VFNMS(TK, TJ, VMUL(TI, TL)); + } + { + V TZ, T11, TY, T10; + TZ = LD(&(ri[WS(rs, 13)]), ms, &(ri[WS(rs, 1)])); + T11 = LD(&(ii[WS(rs, 13)]), ms, &(ii[WS(rs, 1)])); + TY = LDW(&(W[TWVL * 24])); + T10 = LDW(&(W[TWVL * 25])); + T12 = VFMA(TY, TZ, VMUL(T10, T11)); + T1R = VFNMS(T10, TZ, VMUL(TY, T11)); + } + { + V TO, TQ, TN, TP; + TO = LD(&(ri[WS(rs, 9)]), ms, &(ri[WS(rs, 1)])); + TQ = LD(&(ii[WS(rs, 9)]), ms, &(ii[WS(rs, 1)])); + TN = LDW(&(W[TWVL * 16])); + TP = LDW(&(W[TWVL * 17])); + TR = VFMA(TN, TO, VMUL(TP, TQ)); + T1L = VFNMS(TP, TO, VMUL(TN, TQ)); + } + { + V TU, TW, TT, TV; + TU = LD(&(ri[WS(rs, 5)]), ms, &(ri[WS(rs, 1)])); + TW = LD(&(ii[WS(rs, 5)]), ms, &(ii[WS(rs, 1)])); + TT = LDW(&(W[TWVL * 8])); + TV = LDW(&(W[TWVL * 9])); + TX = VFMA(TT, TU, VMUL(TV, TW)); + T1Q = VFNMS(TV, TU, VMUL(TT, TW)); + } + TS = VADD(TM, TR); + T13 = VADD(TX, T12); + T2w = VSUB(TS, T13); + T2x = VADD(T1K, T1L); + T2y = VADD(T1Q, T1R); + T2z = VSUB(T2x, T2y); + { + V T1M, T1N, T1P, T1S; + T1M = VSUB(T1K, T1L); + T1N = VSUB(TX, T12); + T1O = VADD(T1M, T1N); + T2g = VSUB(T1M, T1N); + T1P = VSUB(TM, TR); + T1S = VSUB(T1Q, T1R); + T1T = VSUB(T1P, T1S); + T2h = VADD(T1P, T1S); + } + } + { + V T1J, T27, T3g, T3i, T26, T3h, T2a, T3d; + { + V T1x, T1I, T3e, T3f; + T1x = VSUB(T1t, T1w); + T1I = VMUL(LDK(KP707106781), VSUB(T1C, T1H)); + T1J = VADD(T1x, T1I); + T27 = VSUB(T1x, T1I); + T3e = VMUL(LDK(KP707106781), VSUB(T2d, T2c)); + T3f = VADD(T38, T37); + T3g = VADD(T3e, T3f); + T3i = VSUB(T3f, T3e); + } + { + V T1U, T25, T28, T29; + T1U = VFMA(LDK(KP923879532), T1O, VMUL(LDK(KP382683432), T1T)); + T25 = VFNMS(LDK(KP923879532), T24, VMUL(LDK(KP382683432), T1Z)); + T26 = VADD(T1U, T25); + T3h = VSUB(T25, T1U); + T28 = VFNMS(LDK(KP923879532), T1T, VMUL(LDK(KP382683432), T1O)); + T29 = VFMA(LDK(KP382683432), T24, VMUL(LDK(KP923879532), T1Z)); + T2a = VSUB(T28, T29); + T3d = VADD(T28, T29); + } + ST(&(ri[WS(rs, 11)]), VSUB(T1J, T26), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 11)]), VSUB(T3g, T3d), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 3)]), VADD(T1J, T26), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 3)]), VADD(T3d, T3g), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 15)]), VSUB(T27, T2a), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 15)]), VSUB(T3i, T3h), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 7)]), VADD(T27, T2a), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 7)]), VADD(T3h, T3i), ms, &(ii[WS(rs, 1)])); + } + { + V T2v, T2H, T32, T34, T2G, T33, T2K, T2Z; + { + V T2r, T2u, T30, T31; + T2r = VSUB(T7, Ti); + T2u = VSUB(T2s, T2t); + T2v = VADD(T2r, T2u); + T2H = VSUB(T2r, T2u); + T30 = VSUB(TF, Tu); + T31 = VSUB(T2U, T2R); + T32 = VADD(T30, T31); + T34 = VSUB(T31, T30); + } + { + V T2A, T2F, T2I, T2J; + T2A = VADD(T2w, T2z); + T2F = VSUB(T2B, T2E); + T2G = VMUL(LDK(KP707106781), VADD(T2A, T2F)); + T33 = VMUL(LDK(KP707106781), VSUB(T2F, T2A)); + T2I = VSUB(T2z, T2w); + T2J = VADD(T2B, T2E); + T2K = VMUL(LDK(KP707106781), VSUB(T2I, T2J)); + T2Z = VMUL(LDK(KP707106781), VADD(T2I, T2J)); + } + ST(&(ri[WS(rs, 10)]), VSUB(T2v, T2G), ms, &(ri[0])); + ST(&(ii[WS(rs, 10)]), VSUB(T32, T2Z), ms, &(ii[0])); + ST(&(ri[WS(rs, 2)]), VADD(T2v, T2G), ms, &(ri[0])); + ST(&(ii[WS(rs, 2)]), VADD(T2Z, T32), ms, &(ii[0])); + ST(&(ri[WS(rs, 14)]), VSUB(T2H, T2K), ms, &(ri[0])); + ST(&(ii[WS(rs, 14)]), VSUB(T34, T33), ms, &(ii[0])); + ST(&(ri[WS(rs, 6)]), VADD(T2H, T2K), ms, &(ri[0])); + ST(&(ii[WS(rs, 6)]), VADD(T33, T34), ms, &(ii[0])); + } + { + V T2f, T2n, T3a, T3c, T2m, T3b, T2q, T35; + { + V T2b, T2e, T36, T39; + T2b = VADD(T1t, T1w); + T2e = VMUL(LDK(KP707106781), VADD(T2c, T2d)); + T2f = VADD(T2b, T2e); + T2n = VSUB(T2b, T2e); + T36 = VMUL(LDK(KP707106781), VADD(T1C, T1H)); + T39 = VSUB(T37, T38); + T3a = VADD(T36, T39); + T3c = VSUB(T39, T36); + } + { + V T2i, T2l, T2o, T2p; + T2i = VFMA(LDK(KP382683432), T2g, VMUL(LDK(KP923879532), T2h)); + T2l = VFNMS(LDK(KP382683432), T2k, VMUL(LDK(KP923879532), T2j)); + T2m = VADD(T2i, T2l); + T3b = VSUB(T2l, T2i); + T2o = VFNMS(LDK(KP382683432), T2h, VMUL(LDK(KP923879532), T2g)); + T2p = VFMA(LDK(KP923879532), T2k, VMUL(LDK(KP382683432), T2j)); + T2q = VSUB(T2o, T2p); + T35 = VADD(T2o, T2p); + } + ST(&(ri[WS(rs, 9)]), VSUB(T2f, T2m), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 9)]), VSUB(T3a, T35), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 1)]), VADD(T2f, T2m), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 1)]), VADD(T35, T3a), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 13)]), VSUB(T2n, T2q), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 13)]), VSUB(T3c, T3b), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 5)]), VADD(T2n, T2q), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 5)]), VADD(T3b, T3c), ms, &(ii[WS(rs, 1)])); + } + { + V TH, T2L, T2W, T2Y, T1s, T2X, T2O, T2P; + { + V Tj, TG, T2Q, T2V; + Tj = VADD(T7, Ti); + TG = VADD(Tu, TF); + TH = VADD(Tj, TG); + T2L = VSUB(Tj, TG); + T2Q = VADD(T2s, T2t); + T2V = VADD(T2R, T2U); + T2W = VADD(T2Q, T2V); + T2Y = VSUB(T2V, T2Q); + } + { + V T14, T1r, T2M, T2N; + T14 = VADD(TS, T13); + T1r = VADD(T1f, T1q); + T1s = VADD(T14, T1r); + T2X = VSUB(T1r, T14); + T2M = VADD(T2x, T2y); + T2N = VADD(T2C, T2D); + T2O = VSUB(T2M, T2N); + T2P = VADD(T2M, T2N); + } + ST(&(ri[WS(rs, 8)]), VSUB(TH, T1s), ms, &(ri[0])); + ST(&(ii[WS(rs, 8)]), VSUB(T2W, T2P), ms, &(ii[0])); + ST(&(ri[0]), VADD(TH, T1s), ms, &(ri[0])); + ST(&(ii[0]), VADD(T2P, T2W), ms, &(ii[0])); + ST(&(ri[WS(rs, 12)]), VSUB(T2L, T2O), ms, &(ri[0])); + ST(&(ii[WS(rs, 12)]), VSUB(T2Y, T2X), ms, &(ii[0])); + ST(&(ri[WS(rs, 4)]), VADD(T2L, T2O), ms, &(ri[0])); + ST(&(ii[WS(rs, 4)]), VADD(T2X, T2Y), ms, &(ii[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 16, XSIMD_STRING("t1sv_16"), twinstr, &GENUS, {136, 46, 38, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1sv_16) (planner *p) { + X(kdft_dit_register) (p, t1sv_16, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1sv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1sv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:51 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t1sv_2 -include ts.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 4 additions, 2 multiplications, 2 fused multiply/add), + * 11 stack variables, 0 constants, and 8 memory accesses + */ +#include "ts.h" + +static void t1sv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + (mb * 2); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 2), MAKE_VOLATILE_STRIDE(4, rs)) { + V T1, Ta, T3, T6, T2, T5; + T1 = LD(&(ri[0]), ms, &(ri[0])); + Ta = LD(&(ii[0]), ms, &(ii[0])); + T3 = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + T6 = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + T2 = LDW(&(W[0])); + T5 = LDW(&(W[TWVL * 1])); + { + V T8, T4, T9, T7; + T8 = VMUL(T2, T6); + T4 = VMUL(T2, T3); + T9 = VFNMS(T5, T3, T8); + T7 = VFMA(T5, T6, T4); + ST(&(ii[0]), VADD(T9, Ta), ms, &(ii[0])); + ST(&(ii[WS(rs, 1)]), VSUB(Ta, T9), ms, &(ii[WS(rs, 1)])); + ST(&(ri[0]), VADD(T1, T7), ms, &(ri[0])); + ST(&(ri[WS(rs, 1)]), VSUB(T1, T7), ms, &(ri[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("t1sv_2"), twinstr, &GENUS, {4, 2, 2, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1sv_2) (planner *p) { + X(kdft_dit_register) (p, t1sv_2, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t1sv_2 -include ts.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 4 additions, 2 multiplications, 2 fused multiply/add), + * 9 stack variables, 0 constants, and 8 memory accesses + */ +#include "ts.h" + +static void t1sv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + (mb * 2); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 2), MAKE_VOLATILE_STRIDE(4, rs)) { + V T1, T8, T6, T7; + T1 = LD(&(ri[0]), ms, &(ri[0])); + T8 = LD(&(ii[0]), ms, &(ii[0])); + { + V T3, T5, T2, T4; + T3 = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + T5 = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + T2 = LDW(&(W[0])); + T4 = LDW(&(W[TWVL * 1])); + T6 = VFMA(T2, T3, VMUL(T4, T5)); + T7 = VFNMS(T4, T3, VMUL(T2, T5)); + } + ST(&(ri[WS(rs, 1)]), VSUB(T1, T6), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 1)]), VSUB(T8, T7), ms, &(ii[WS(rs, 1)])); + ST(&(ri[0]), VADD(T1, T6), ms, &(ri[0])); + ST(&(ii[0]), VADD(T7, T8), ms, &(ii[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("t1sv_2"), twinstr, &GENUS, {4, 2, 2, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1sv_2) (planner *p) { + X(kdft_dit_register) (p, t1sv_2, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1sv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1sv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1784 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:53 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name t1sv_32 -include ts.h */ + +/* + * This function contains 434 FP additions, 260 FP multiplications, + * (or, 236 additions, 62 multiplications, 198 fused multiply/add), + * 158 stack variables, 7 constants, and 128 memory accesses + */ +#include "ts.h" + +static void t1sv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 62); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 62), MAKE_VOLATILE_STRIDE(64, rs)) { + V T8Z, T90; + { + V T87, T8x, T3w, T8, T3B, T83, Tl, T8y, T6F, Tz, T3J, T5T, T6G, TM, T3Q; + V T5U, T46, T5Y, T7D, T6L, T5X, T3Z, T6M, T1f, T4l, T61, T7E, T6R, T60, T4e; + V T6O, T1G, T5r, T6c, T78, T7N, T54, T6f, T32, T7b, T4S, T65, T6X, T7I, T4v; + V T68, T29, T70, T4x, T2f, T5b, T5s, T7O, T7e, T5t, T5i, T79, T3t, T2h, T2k; + V T2j, T2o, T2r, T4H, T2y, T2n, T2q, T4y, T2i; + { + V T3U, TU, TW, TZ, TY, T13, T16, T12, T15, T3V, TX, T44, T1d; + { + V T1, T86, T3, T6, T5, Ta, Td, Tg, Tj, Tf, T84, T4, Tc, Ti, T3x; + V Tb, T2, T9; + T1 = LD(&(ri[0]), ms, &(ri[0])); + T86 = LD(&(ii[0]), ms, &(ii[0])); + T3 = LD(&(ri[WS(rs, 16)]), ms, &(ri[0])); + T6 = LD(&(ii[WS(rs, 16)]), ms, &(ii[0])); + T2 = LDW(&(W[TWVL * 30])); + T5 = LDW(&(W[TWVL * 31])); + Ta = LD(&(ri[WS(rs, 8)]), ms, &(ri[0])); + Td = LD(&(ii[WS(rs, 8)]), ms, &(ii[0])); + T9 = LDW(&(W[TWVL * 14])); + Tg = LD(&(ri[WS(rs, 24)]), ms, &(ri[0])); + Tj = LD(&(ii[WS(rs, 24)]), ms, &(ii[0])); + Tf = LDW(&(W[TWVL * 46])); + T84 = VMUL(T2, T6); + T4 = VMUL(T2, T3); + Tc = LDW(&(W[TWVL * 15])); + Ti = LDW(&(W[TWVL * 47])); + T3x = VMUL(T9, Td); + Tb = VMUL(T9, Ta); + { + V Tu, Tx, T3F, Ts, Tt, Tw; + { + V To, Tr, Tq, T3E, Tp; + { + V T3y, Te, Tn, T3A, Tk; + { + V T3z, Th, T85, T7; + To = LD(&(ri[WS(rs, 4)]), ms, &(ri[0])); + T3z = VMUL(Tf, Tj); + Th = VMUL(Tf, Tg); + T85 = VFNMS(T5, T3, T84); + T7 = VFMA(T5, T6, T4); + Tr = LD(&(ii[WS(rs, 4)]), ms, &(ii[0])); + T3y = VFNMS(Tc, Ta, T3x); + Te = VFMA(Tc, Td, Tb); + Tn = LDW(&(W[TWVL * 6])); + T3A = VFNMS(Ti, Tg, T3z); + Tk = VFMA(Ti, Tj, Th); + T87 = VADD(T85, T86); + T8x = VSUB(T86, T85); + T3w = VSUB(T1, T7); + T8 = VADD(T1, T7); + } + Tq = LDW(&(W[TWVL * 7])); + T3E = VMUL(Tn, Tr); + Tp = VMUL(Tn, To); + T3B = VSUB(T3y, T3A); + T83 = VADD(T3y, T3A); + Tl = VADD(Te, Tk); + T8y = VSUB(Te, Tk); + } + Tu = LD(&(ri[WS(rs, 20)]), ms, &(ri[0])); + Tx = LD(&(ii[WS(rs, 20)]), ms, &(ii[0])); + T3F = VFNMS(Tq, To, T3E); + Ts = VFMA(Tq, Tr, Tp); + Tt = LDW(&(W[TWVL * 38])); + Tw = LDW(&(W[TWVL * 39])); + } + { + V TB, TE, TD, TH, TK, T3G, Tv, TG, TJ, T3L, TC, TA; + TB = LD(&(ri[WS(rs, 28)]), ms, &(ri[0])); + TE = LD(&(ii[WS(rs, 28)]), ms, &(ii[0])); + TA = LDW(&(W[TWVL * 54])); + TD = LDW(&(W[TWVL * 55])); + TH = LD(&(ri[WS(rs, 12)]), ms, &(ri[0])); + TK = LD(&(ii[WS(rs, 12)]), ms, &(ii[0])); + T3G = VMUL(Tt, Tx); + Tv = VMUL(Tt, Tu); + TG = LDW(&(W[TWVL * 22])); + TJ = LDW(&(W[TWVL * 23])); + T3L = VMUL(TA, TE); + TC = VMUL(TA, TB); + { + V T19, T1c, T3P, T3K, T18, T1b, TV, T43, T1a; + { + V TQ, TT, T3M, TF, TS, T3I, T3D, T3O, TL, T3T, TR; + { + V T3H, Ty, T3N, TI, TP; + TQ = LD(&(ri[WS(rs, 2)]), ms, &(ri[0])); + TT = LD(&(ii[WS(rs, 2)]), ms, &(ii[0])); + T3H = VFNMS(Tw, Tu, T3G); + Ty = VFMA(Tw, Tx, Tv); + T3N = VMUL(TG, TK); + TI = VMUL(TG, TH); + T3M = VFNMS(TD, TB, T3L); + TF = VFMA(TD, TE, TC); + TP = LDW(&(W[TWVL * 2])); + TS = LDW(&(W[TWVL * 3])); + T6F = VADD(T3F, T3H); + T3I = VSUB(T3F, T3H); + Tz = VADD(Ts, Ty); + T3D = VSUB(Ts, Ty); + T3O = VFNMS(TJ, TH, T3N); + TL = VFMA(TJ, TK, TI); + T3T = VMUL(TP, TT); + TR = VMUL(TP, TQ); + } + T19 = LD(&(ri[WS(rs, 26)]), ms, &(ri[0])); + T1c = LD(&(ii[WS(rs, 26)]), ms, &(ii[0])); + T3J = VADD(T3D, T3I); + T5T = VSUB(T3I, T3D); + T6G = VADD(T3M, T3O); + T3P = VSUB(T3M, T3O); + TM = VADD(TF, TL); + T3K = VSUB(TF, TL); + T3U = VFNMS(TS, TQ, T3T); + TU = VFMA(TS, TT, TR); + T18 = LDW(&(W[TWVL * 50])); + T1b = LDW(&(W[TWVL * 51])); + } + TW = LD(&(ri[WS(rs, 18)]), ms, &(ri[0])); + TZ = LD(&(ii[WS(rs, 18)]), ms, &(ii[0])); + T3Q = VSUB(T3K, T3P); + T5U = VADD(T3K, T3P); + TV = LDW(&(W[TWVL * 34])); + TY = LDW(&(W[TWVL * 35])); + T43 = VMUL(T18, T1c); + T1a = VMUL(T18, T19); + T13 = LD(&(ri[WS(rs, 10)]), ms, &(ri[0])); + T16 = LD(&(ii[WS(rs, 10)]), ms, &(ii[0])); + T12 = LDW(&(W[TWVL * 18])); + T15 = LDW(&(W[TWVL * 19])); + T3V = VMUL(TV, TZ); + TX = VMUL(TV, TW); + T44 = VFNMS(T1b, T19, T43); + T1d = VFMA(T1b, T1c, T1a); + } + } + } + } + { + V T4Z, T2H, T2J, T2M, T2L, T2Q, T2T, T2P, T2S, T5p, T30, T50, T2K; + { + V T49, T1l, T1n, T1q, T1p, T1u, T1x, T4j, T1E, T1t, T1w, T4a, T1o; + { + V T1A, T1D, T1C, T4i, T1B, T1m; + { + V T1h, T1k, T41, T14, T3W, T10, T1g, T1j; + T1h = LD(&(ri[WS(rs, 30)]), ms, &(ri[0])); + T1k = LD(&(ii[WS(rs, 30)]), ms, &(ii[0])); + T41 = VMUL(T12, T16); + T14 = VMUL(T12, T13); + T3W = VFNMS(TY, TW, T3V); + T10 = VFMA(TY, TZ, TX); + T1g = LDW(&(W[TWVL * 58])); + T1j = LDW(&(W[TWVL * 59])); + { + V T6J, T3X, T11, T40, T48, T1i, T6K, T45, T1e, T3Y, T1z, T42, T17; + T1A = LD(&(ri[WS(rs, 22)]), ms, &(ri[0])); + T1D = LD(&(ii[WS(rs, 22)]), ms, &(ii[0])); + T42 = VFNMS(T15, T13, T41); + T17 = VFMA(T15, T16, T14); + T6J = VADD(T3U, T3W); + T3X = VSUB(T3U, T3W); + T11 = VADD(TU, T10); + T40 = VSUB(TU, T10); + T48 = VMUL(T1g, T1k); + T1i = VMUL(T1g, T1h); + T6K = VADD(T42, T44); + T45 = VSUB(T42, T44); + T1e = VADD(T17, T1d); + T3Y = VSUB(T17, T1d); + T1z = LDW(&(W[TWVL * 42])); + T1C = LDW(&(W[TWVL * 43])); + T49 = VFNMS(T1j, T1h, T48); + T1l = VFMA(T1j, T1k, T1i); + T46 = VADD(T40, T45); + T5Y = VSUB(T40, T45); + T7D = VADD(T6J, T6K); + T6L = VSUB(T6J, T6K); + T5X = VADD(T3X, T3Y); + T3Z = VSUB(T3X, T3Y); + T6M = VSUB(T11, T1e); + T1f = VADD(T11, T1e); + T4i = VMUL(T1z, T1D); + T1B = VMUL(T1z, T1A); + } + } + T1n = LD(&(ri[WS(rs, 14)]), ms, &(ri[0])); + T1q = LD(&(ii[WS(rs, 14)]), ms, &(ii[0])); + T1m = LDW(&(W[TWVL * 26])); + T1p = LDW(&(W[TWVL * 27])); + T1u = LD(&(ri[WS(rs, 6)]), ms, &(ri[0])); + T1x = LD(&(ii[WS(rs, 6)]), ms, &(ii[0])); + T4j = VFNMS(T1C, T1A, T4i); + T1E = VFMA(T1C, T1D, T1B); + T1t = LDW(&(W[TWVL * 10])); + T1w = LDW(&(W[TWVL * 11])); + T4a = VMUL(T1m, T1q); + T1o = VMUL(T1m, T1n); + } + { + V T2W, T2Z, T6P, T4c, T1s, T4f, T6Q, T4k, T1F, T4d, T2V, T2Y, T5o, T2X, T2I; + { + V T2D, T2G, T2C, T2F, T4g, T1v, T4b, T1r; + T2D = LD(&(ri[WS(rs, 31)]), ms, &(ri[WS(rs, 1)])); + T2G = LD(&(ii[WS(rs, 31)]), ms, &(ii[WS(rs, 1)])); + T2C = LDW(&(W[TWVL * 60])); + T2F = LDW(&(W[TWVL * 61])); + T4g = VMUL(T1t, T1x); + T1v = VMUL(T1t, T1u); + T4b = VFNMS(T1p, T1n, T4a); + T1r = VFMA(T1p, T1q, T1o); + T2W = LD(&(ri[WS(rs, 23)]), ms, &(ri[WS(rs, 1)])); + T2Z = LD(&(ii[WS(rs, 23)]), ms, &(ii[WS(rs, 1)])); + { + V T4Y, T2E, T4h, T1y; + T4Y = VMUL(T2C, T2G); + T2E = VMUL(T2C, T2D); + T4h = VFNMS(T1w, T1u, T4g); + T1y = VFMA(T1w, T1x, T1v); + T6P = VADD(T49, T4b); + T4c = VSUB(T49, T4b); + T1s = VADD(T1l, T1r); + T4f = VSUB(T1l, T1r); + T4Z = VFNMS(T2F, T2D, T4Y); + T2H = VFMA(T2F, T2G, T2E); + T6Q = VADD(T4h, T4j); + T4k = VSUB(T4h, T4j); + T1F = VADD(T1y, T1E); + T4d = VSUB(T1y, T1E); + T2V = LDW(&(W[TWVL * 44])); + } + T2Y = LDW(&(W[TWVL * 45])); + } + T2J = LD(&(ri[WS(rs, 15)]), ms, &(ri[WS(rs, 1)])); + T2M = LD(&(ii[WS(rs, 15)]), ms, &(ii[WS(rs, 1)])); + T4l = VADD(T4f, T4k); + T61 = VSUB(T4f, T4k); + T7E = VADD(T6P, T6Q); + T6R = VSUB(T6P, T6Q); + T60 = VADD(T4c, T4d); + T4e = VSUB(T4c, T4d); + T6O = VSUB(T1s, T1F); + T1G = VADD(T1s, T1F); + T5o = VMUL(T2V, T2Z); + T2X = VMUL(T2V, T2W); + T2I = LDW(&(W[TWVL * 28])); + T2L = LDW(&(W[TWVL * 29])); + T2Q = LD(&(ri[WS(rs, 7)]), ms, &(ri[WS(rs, 1)])); + T2T = LD(&(ii[WS(rs, 7)]), ms, &(ii[WS(rs, 1)])); + T2P = LDW(&(W[TWVL * 12])); + T2S = LDW(&(W[TWVL * 13])); + T5p = VFNMS(T2Y, T2W, T5o); + T30 = VFMA(T2Y, T2Z, T2X); + T50 = VMUL(T2I, T2M); + T2K = VMUL(T2I, T2J); + } + } + { + V T4q, T1O, T1Q, T1T, T1S, T1X, T20, T4Q, T27, T1W, T1Z, T4r, T1R; + { + V T23, T26, T25, T4P, T24, T1P; + { + V T1K, T1N, T5m, T2R, T1J, T1M, T51, T2N; + T1K = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + T1N = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + T5m = VMUL(T2P, T2T); + T2R = VMUL(T2P, T2Q); + T1J = LDW(&(W[0])); + T1M = LDW(&(W[TWVL * 1])); + T51 = VFNMS(T2L, T2J, T50); + T2N = VFMA(T2L, T2M, T2K); + { + V T76, T52, T2O, T5l, T77, T5q, T31, T53, T22; + T23 = LD(&(ri[WS(rs, 25)]), ms, &(ri[WS(rs, 1)])); + T26 = LD(&(ii[WS(rs, 25)]), ms, &(ii[WS(rs, 1)])); + { + V T5n, T2U, T4p, T1L; + T5n = VFNMS(T2S, T2Q, T5m); + T2U = VFMA(T2S, T2T, T2R); + T4p = VMUL(T1J, T1N); + T1L = VMUL(T1J, T1K); + T76 = VADD(T4Z, T51); + T52 = VSUB(T4Z, T51); + T2O = VADD(T2H, T2N); + T5l = VSUB(T2H, T2N); + T77 = VADD(T5n, T5p); + T5q = VSUB(T5n, T5p); + T31 = VADD(T2U, T30); + T53 = VSUB(T2U, T30); + T4q = VFNMS(T1M, T1K, T4p); + T1O = VFMA(T1M, T1N, T1L); + T22 = LDW(&(W[TWVL * 48])); + } + T25 = LDW(&(W[TWVL * 49])); + T5r = VADD(T5l, T5q); + T6c = VSUB(T5l, T5q); + T78 = VSUB(T76, T77); + T7N = VADD(T76, T77); + T54 = VSUB(T52, T53); + T6f = VADD(T52, T53); + T32 = VADD(T2O, T31); + T7b = VSUB(T2O, T31); + T4P = VMUL(T22, T26); + T24 = VMUL(T22, T23); + } + } + T1Q = LD(&(ri[WS(rs, 17)]), ms, &(ri[WS(rs, 1)])); + T1T = LD(&(ii[WS(rs, 17)]), ms, &(ii[WS(rs, 1)])); + T1P = LDW(&(W[TWVL * 32])); + T1S = LDW(&(W[TWVL * 33])); + T1X = LD(&(ri[WS(rs, 9)]), ms, &(ri[WS(rs, 1)])); + T20 = LD(&(ii[WS(rs, 9)]), ms, &(ii[WS(rs, 1)])); + T4Q = VFNMS(T25, T23, T4P); + T27 = VFMA(T25, T26, T24); + T1W = LDW(&(W[TWVL * 16])); + T1Z = LDW(&(W[TWVL * 17])); + T4r = VMUL(T1P, T1T); + T1R = VMUL(T1P, T1Q); + } + { + V T56, T38, T3a, T3d, T3c, T3h, T3k, T3g, T3j, T5g, T3r, T57, T3b; + { + V T3n, T3q, T6V, T4t, T1V, T4M, T6W, T4R, T28, T4u, T3m, T3p, T5f, T3o, T39; + { + V T34, T37, T33, T36, T4N, T1Y, T4s, T1U; + T34 = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)])); + T37 = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)])); + T33 = LDW(&(W[TWVL * 4])); + T36 = LDW(&(W[TWVL * 5])); + T4N = VMUL(T1W, T20); + T1Y = VMUL(T1W, T1X); + T4s = VFNMS(T1S, T1Q, T4r); + T1U = VFMA(T1S, T1T, T1R); + T3n = LD(&(ri[WS(rs, 11)]), ms, &(ri[WS(rs, 1)])); + T3q = LD(&(ii[WS(rs, 11)]), ms, &(ii[WS(rs, 1)])); + { + V T55, T35, T4O, T21; + T55 = VMUL(T33, T37); + T35 = VMUL(T33, T34); + T4O = VFNMS(T1Z, T1X, T4N); + T21 = VFMA(T1Z, T20, T1Y); + T6V = VADD(T4q, T4s); + T4t = VSUB(T4q, T4s); + T1V = VADD(T1O, T1U); + T4M = VSUB(T1O, T1U); + T56 = VFNMS(T36, T34, T55); + T38 = VFMA(T36, T37, T35); + T6W = VADD(T4O, T4Q); + T4R = VSUB(T4O, T4Q); + T28 = VADD(T21, T27); + T4u = VSUB(T21, T27); + T3m = LDW(&(W[TWVL * 20])); + } + T3p = LDW(&(W[TWVL * 21])); + } + T3a = LD(&(ri[WS(rs, 19)]), ms, &(ri[WS(rs, 1)])); + T3d = LD(&(ii[WS(rs, 19)]), ms, &(ii[WS(rs, 1)])); + T4S = VADD(T4M, T4R); + T65 = VSUB(T4M, T4R); + T6X = VSUB(T6V, T6W); + T7I = VADD(T6V, T6W); + T4v = VSUB(T4t, T4u); + T68 = VADD(T4t, T4u); + T29 = VADD(T1V, T28); + T70 = VSUB(T1V, T28); + T5f = VMUL(T3m, T3q); + T3o = VMUL(T3m, T3n); + T39 = LDW(&(W[TWVL * 36])); + T3c = LDW(&(W[TWVL * 37])); + T3h = LD(&(ri[WS(rs, 27)]), ms, &(ri[WS(rs, 1)])); + T3k = LD(&(ii[WS(rs, 27)]), ms, &(ii[WS(rs, 1)])); + T3g = LDW(&(W[TWVL * 52])); + T3j = LDW(&(W[TWVL * 53])); + T5g = VFNMS(T3p, T3n, T5f); + T3r = VFMA(T3p, T3q, T3o); + T57 = VMUL(T39, T3d); + T3b = VMUL(T39, T3a); + } + { + V T2u, T2x, T2w, T4G, T2v, T2g; + { + V T2b, T2e, T5d, T3i, T2a, T2d, T58, T3e, T2t; + T2b = LD(&(ri[WS(rs, 5)]), ms, &(ri[WS(rs, 1)])); + T2e = LD(&(ii[WS(rs, 5)]), ms, &(ii[WS(rs, 1)])); + T5d = VMUL(T3g, T3k); + T3i = VMUL(T3g, T3h); + T2a = LDW(&(W[TWVL * 8])); + T2d = LDW(&(W[TWVL * 9])); + T58 = VFNMS(T3c, T3a, T57); + T3e = VFMA(T3c, T3d, T3b); + T2u = LD(&(ri[WS(rs, 13)]), ms, &(ri[WS(rs, 1)])); + T2x = LD(&(ii[WS(rs, 13)]), ms, &(ii[WS(rs, 1)])); + { + V T5e, T3l, T4w, T2c; + T5e = VFNMS(T3j, T3h, T5d); + T3l = VFMA(T3j, T3k, T3i); + T4w = VMUL(T2a, T2e); + T2c = VMUL(T2a, T2b); + { + V T7c, T59, T3f, T5a; + T7c = VADD(T56, T58); + T59 = VSUB(T56, T58); + T3f = VADD(T38, T3e); + T5a = VSUB(T38, T3e); + { + V T7d, T5h, T3s, T5c; + T7d = VADD(T5e, T5g); + T5h = VSUB(T5e, T5g); + T3s = VADD(T3l, T3r); + T5c = VSUB(T3l, T3r); + T4x = VFNMS(T2d, T2b, T4w); + T2f = VFMA(T2d, T2e, T2c); + T5b = VSUB(T59, T5a); + T5s = VADD(T5a, T59); + T2t = LDW(&(W[TWVL * 24])); + T7O = VADD(T7c, T7d); + T7e = VSUB(T7c, T7d); + T5t = VSUB(T5c, T5h); + T5i = VADD(T5c, T5h); + T79 = VSUB(T3s, T3f); + T3t = VADD(T3f, T3s); + } + } + } + T2w = LDW(&(W[TWVL * 25])); + T4G = VMUL(T2t, T2x); + T2v = VMUL(T2t, T2u); + } + T2h = LD(&(ri[WS(rs, 21)]), ms, &(ri[WS(rs, 1)])); + T2k = LD(&(ii[WS(rs, 21)]), ms, &(ii[WS(rs, 1)])); + T2g = LDW(&(W[TWVL * 40])); + T2j = LDW(&(W[TWVL * 41])); + T2o = LD(&(ri[WS(rs, 29)]), ms, &(ri[WS(rs, 1)])); + T2r = LD(&(ii[WS(rs, 29)]), ms, &(ii[WS(rs, 1)])); + T4H = VFNMS(T2w, T2u, T4G); + T2y = VFMA(T2w, T2x, T2v); + T2n = LDW(&(W[TWVL * 56])); + T2q = LDW(&(W[TWVL * 57])); + T4y = VMUL(T2g, T2k); + T2i = VMUL(T2g, T2h); + } + } + } + } + } + { + V T4C, T4T, T4U, T4J, T7A, T7w, T7j, T75, T7i, T6U, T8p, T8n, T8v, T8t, T7q; + V T7y, T7t, T7z, T7g, T7k; + { + V T6E, T8j, T6H, T8k, T73, T6Y, T7S, T8i, T8h, T7V; + { + V T7P, T7Y, T7C, TO, T89, T8e, T3u, T7M, T8d, T1H, T7K, T7X, T2B, T7H; + { + V T71, T2m, T72, T4I, T2z, T4D, Tm, TN, T2A, T7J; + T6E = VSUB(T8, Tl); + Tm = VADD(T8, Tl); + TN = VADD(Tz, TM); + T8j = VSUB(TM, Tz); + T7P = VSUB(T7N, T7O); + T7Y = VADD(T7N, T7O); + { + V T82, T4E, T2p, T4z, T2l, T88; + T82 = VADD(T6F, T6G); + T6H = VSUB(T6F, T6G); + T4E = VMUL(T2n, T2r); + T2p = VMUL(T2n, T2o); + T4z = VFNMS(T2j, T2h, T4y); + T2l = VFMA(T2j, T2k, T2i); + T8k = VSUB(T87, T83); + T88 = VADD(T83, T87); + T7C = VSUB(Tm, TN); + TO = VADD(Tm, TN); + { + V T4F, T2s, T4A, T4B; + T4F = VFNMS(T2q, T2o, T4E); + T2s = VFMA(T2q, T2r, T2p); + T71 = VADD(T4x, T4z); + T4A = VSUB(T4x, T4z); + T2m = VADD(T2f, T2l); + T4B = VSUB(T2f, T2l); + T89 = VADD(T82, T88); + T8e = VSUB(T88, T82); + T72 = VADD(T4F, T4H); + T4I = VSUB(T4F, T4H); + T2z = VADD(T2s, T2y); + T4D = VSUB(T2s, T2y); + T4C = VSUB(T4A, T4B); + T4T = VADD(T4B, T4A); + } + } + T3u = VADD(T32, T3t); + T7M = VSUB(T32, T3t); + T7J = VADD(T71, T72); + T73 = VSUB(T71, T72); + T4U = VSUB(T4D, T4I); + T4J = VADD(T4D, T4I); + T6Y = VSUB(T2z, T2m); + T2A = VADD(T2m, T2z); + T8d = VSUB(T1G, T1f); + T1H = VADD(T1f, T1G); + T7K = VSUB(T7I, T7J); + T7X = VADD(T7I, T7J); + T2B = VADD(T29, T2A); + T7H = VSUB(T29, T2A); + } + { + V T1I, T80, T7Q, T7U, T7F, T7L, T7T, T3v, T8b, T8c, T8a, T7W, T81, T7Z; + T7W = VSUB(TO, T1H); + T1I = VADD(TO, T1H); + T7Z = VSUB(T7X, T7Y); + T80 = VADD(T7X, T7Y); + T7Q = VSUB(T7M, T7P); + T7U = VADD(T7M, T7P); + T7F = VSUB(T7D, T7E); + T81 = VADD(T7D, T7E); + T7L = VADD(T7H, T7K); + T7T = VSUB(T7K, T7H); + T3v = VADD(T2B, T3u); + T8b = VSUB(T3u, T2B); + ST(&(ri[WS(rs, 24)]), VSUB(T7W, T7Z), ms, &(ri[0])); + ST(&(ri[WS(rs, 8)]), VADD(T7W, T7Z), ms, &(ri[0])); + T8c = VSUB(T89, T81); + T8a = VADD(T81, T89); + { + V T8f, T8g, T7G, T7R; + T7S = VSUB(T7C, T7F); + T7G = VADD(T7C, T7F); + T7R = VADD(T7L, T7Q); + T8i = VSUB(T7Q, T7L); + T8h = VSUB(T8e, T8d); + T8f = VADD(T8d, T8e); + ST(&(ri[0]), VADD(T1I, T3v), ms, &(ri[0])); + ST(&(ri[WS(rs, 16)]), VSUB(T1I, T3v), ms, &(ri[0])); + T8g = VADD(T7T, T7U); + T7V = VSUB(T7T, T7U); + ST(&(ii[WS(rs, 16)]), VSUB(T8a, T80), ms, &(ii[0])); + ST(&(ii[0]), VADD(T80, T8a), ms, &(ii[0])); + ST(&(ii[WS(rs, 24)]), VSUB(T8c, T8b), ms, &(ii[0])); + ST(&(ii[WS(rs, 8)]), VADD(T8b, T8c), ms, &(ii[0])); + ST(&(ri[WS(rs, 4)]), VFMA(LDK(KP707106781), T7R, T7G), ms, &(ri[0])); + ST(&(ri[WS(rs, 20)]), VFNMS(LDK(KP707106781), T7R, T7G), ms, &(ri[0])); + ST(&(ii[WS(rs, 20)]), VFNMS(LDK(KP707106781), T8g, T8f), ms, &(ii[0])); + ST(&(ii[WS(rs, 4)]), VFMA(LDK(KP707106781), T8g, T8f), ms, &(ii[0])); + } + } + } + { + V T7f, T7a, T7m, T6I, T7s, T7r, T8r, T8l, T8m, T6T, T8s, T7p; + { + V T7n, T6N, T6S, T7o, T7u, T7v, T6Z, T74; + T7f = VSUB(T7b, T7e); + T7u = VADD(T7b, T7e); + T7v = VADD(T78, T79); + T7a = VSUB(T78, T79); + ST(&(ri[WS(rs, 12)]), VFMA(LDK(KP707106781), T7V, T7S), ms, &(ri[0])); + ST(&(ri[WS(rs, 28)]), VFNMS(LDK(KP707106781), T7V, T7S), ms, &(ri[0])); + ST(&(ii[WS(rs, 28)]), VFNMS(LDK(KP707106781), T8i, T8h), ms, &(ii[0])); + ST(&(ii[WS(rs, 12)]), VFMA(LDK(KP707106781), T8i, T8h), ms, &(ii[0])); + T7m = VADD(T6E, T6H); + T6I = VSUB(T6E, T6H); + T7A = VFMA(LDK(KP414213562), T7u, T7v); + T7w = VFNMS(LDK(KP414213562), T7v, T7u); + T7n = VADD(T6M, T6L); + T6N = VSUB(T6L, T6M); + T6S = VADD(T6O, T6R); + T7o = VSUB(T6O, T6R); + T7s = VADD(T6X, T6Y); + T6Z = VSUB(T6X, T6Y); + T74 = VSUB(T70, T73); + T7r = VADD(T70, T73); + T8r = VSUB(T8k, T8j); + T8l = VADD(T8j, T8k); + T8m = VADD(T6N, T6S); + T6T = VSUB(T6N, T6S); + T7j = VFNMS(LDK(KP414213562), T6Z, T74); + T75 = VFMA(LDK(KP414213562), T74, T6Z); + T8s = VSUB(T7o, T7n); + T7p = VADD(T7n, T7o); + } + T7i = VFNMS(LDK(KP707106781), T6T, T6I); + T6U = VFMA(LDK(KP707106781), T6T, T6I); + T8p = VFNMS(LDK(KP707106781), T8m, T8l); + T8n = VFMA(LDK(KP707106781), T8m, T8l); + T8v = VFNMS(LDK(KP707106781), T8s, T8r); + T8t = VFMA(LDK(KP707106781), T8s, T8r); + T7q = VFMA(LDK(KP707106781), T7p, T7m); + T7y = VFNMS(LDK(KP707106781), T7p, T7m); + T7t = VFMA(LDK(KP414213562), T7s, T7r); + T7z = VFNMS(LDK(KP414213562), T7r, T7s); + T7g = VFNMS(LDK(KP414213562), T7f, T7a); + T7k = VFMA(LDK(KP414213562), T7a, T7f); + } + } + { + V T5S, T8O, T8N, T5V, T6d, T6g, T66, T4L, T5I, T69, T5y, T4o, T8J, T8L, T5M; + V T5Q, T5A, T5w, T5H, T4W, T5O, T5G, T8D, T8F; + { + V T5C, T3S, T8C, T4n, T8H, T8B, T8I, T5F, T5L, T5k, T5K, T5v, T4V; + { + V T5D, T47, T4m, T5E, T8z, T8A, T3C, T3R, T5j, T5u, T4K; + T5S = VSUB(T3w, T3B); + T3C = VADD(T3w, T3B); + T3R = VADD(T3J, T3Q); + T8O = VSUB(T3Q, T3J); + { + V T8o, T7B, T7x, T8q; + T8o = VADD(T7z, T7A); + T7B = VSUB(T7z, T7A); + T7x = VADD(T7t, T7w); + T8q = VSUB(T7w, T7t); + { + V T8u, T7l, T7h, T8w; + T8u = VSUB(T7k, T7j); + T7l = VADD(T7j, T7k); + T7h = VSUB(T75, T7g); + T8w = VADD(T75, T7g); + ST(&(ri[WS(rs, 10)]), VFMA(LDK(KP923879532), T7B, T7y), ms, &(ri[0])); + ST(&(ri[WS(rs, 26)]), VFNMS(LDK(KP923879532), T7B, T7y), ms, &(ri[0])); + ST(&(ii[WS(rs, 18)]), VFNMS(LDK(KP923879532), T8o, T8n), ms, &(ii[0])); + ST(&(ii[WS(rs, 2)]), VFMA(LDK(KP923879532), T8o, T8n), ms, &(ii[0])); + ST(&(ii[WS(rs, 26)]), VFNMS(LDK(KP923879532), T8q, T8p), ms, &(ii[0])); + ST(&(ii[WS(rs, 10)]), VFMA(LDK(KP923879532), T8q, T8p), ms, &(ii[0])); + ST(&(ri[WS(rs, 2)]), VFMA(LDK(KP923879532), T7x, T7q), ms, &(ri[0])); + ST(&(ri[WS(rs, 18)]), VFNMS(LDK(KP923879532), T7x, T7q), ms, &(ri[0])); + ST(&(ri[WS(rs, 30)]), VFMA(LDK(KP923879532), T7l, T7i), ms, &(ri[0])); + ST(&(ri[WS(rs, 14)]), VFNMS(LDK(KP923879532), T7l, T7i), ms, &(ri[0])); + ST(&(ii[WS(rs, 22)]), VFNMS(LDK(KP923879532), T8u, T8t), ms, &(ii[0])); + ST(&(ii[WS(rs, 6)]), VFMA(LDK(KP923879532), T8u, T8t), ms, &(ii[0])); + ST(&(ii[WS(rs, 30)]), VFMA(LDK(KP923879532), T8w, T8v), ms, &(ii[0])); + ST(&(ii[WS(rs, 14)]), VFNMS(LDK(KP923879532), T8w, T8v), ms, &(ii[0])); + ST(&(ri[WS(rs, 6)]), VFMA(LDK(KP923879532), T7h, T6U), ms, &(ri[0])); + ST(&(ri[WS(rs, 22)]), VFNMS(LDK(KP923879532), T7h, T6U), ms, &(ri[0])); + T5C = VFMA(LDK(KP707106781), T3R, T3C); + T3S = VFNMS(LDK(KP707106781), T3R, T3C); + } + } + T5D = VFMA(LDK(KP414213562), T3Z, T46); + T47 = VFNMS(LDK(KP414213562), T46, T3Z); + T4m = VFMA(LDK(KP414213562), T4l, T4e); + T5E = VFNMS(LDK(KP414213562), T4e, T4l); + T8N = VADD(T8y, T8x); + T8z = VSUB(T8x, T8y); + T8A = VADD(T5T, T5U); + T5V = VSUB(T5T, T5U); + T6d = VSUB(T5i, T5b); + T5j = VADD(T5b, T5i); + T5u = VADD(T5s, T5t); + T6g = VSUB(T5s, T5t); + T66 = VSUB(T4J, T4C); + T4K = VADD(T4C, T4J); + T8C = VADD(T47, T4m); + T4n = VSUB(T47, T4m); + T8H = VFNMS(LDK(KP707106781), T8A, T8z); + T8B = VFMA(LDK(KP707106781), T8A, T8z); + T8I = VSUB(T5E, T5D); + T5F = VADD(T5D, T5E); + T5L = VFMA(LDK(KP707106781), T5j, T54); + T5k = VFNMS(LDK(KP707106781), T5j, T54); + T5K = VFMA(LDK(KP707106781), T5u, T5r); + T5v = VFNMS(LDK(KP707106781), T5u, T5r); + T4L = VFNMS(LDK(KP707106781), T4K, T4v); + T5I = VFMA(LDK(KP707106781), T4K, T4v); + T4V = VADD(T4T, T4U); + T69 = VSUB(T4T, T4U); + } + T5y = VFNMS(LDK(KP923879532), T4n, T3S); + T4o = VFMA(LDK(KP923879532), T4n, T3S); + T8J = VFMA(LDK(KP923879532), T8I, T8H); + T8L = VFNMS(LDK(KP923879532), T8I, T8H); + T5M = VFNMS(LDK(KP198912367), T5L, T5K); + T5Q = VFMA(LDK(KP198912367), T5K, T5L); + T5A = VFMA(LDK(KP668178637), T5k, T5v); + T5w = VFNMS(LDK(KP668178637), T5v, T5k); + T5H = VFMA(LDK(KP707106781), T4V, T4S); + T4W = VFNMS(LDK(KP707106781), T4V, T4S); + T5O = VFNMS(LDK(KP923879532), T5F, T5C); + T5G = VFMA(LDK(KP923879532), T5F, T5C); + T8D = VFMA(LDK(KP923879532), T8C, T8B); + T8F = VFNMS(LDK(KP923879532), T8C, T8B); + } + { + V T6p, T6q, T6o, T5W, T8W, T63; + { + V T5J, T5P, T5z, T4X, T5Z, T62; + T5J = VFMA(LDK(KP198912367), T5I, T5H); + T5P = VFNMS(LDK(KP198912367), T5H, T5I); + T5z = VFNMS(LDK(KP668178637), T4L, T4W); + T4X = VFMA(LDK(KP668178637), T4W, T4L); + T6p = VFNMS(LDK(KP414213562), T5X, T5Y); + T5Z = VFMA(LDK(KP414213562), T5Y, T5X); + T62 = VFNMS(LDK(KP414213562), T61, T60); + T6q = VFMA(LDK(KP414213562), T60, T61); + { + V T8G, T5N, T5R, T8E; + T8G = VSUB(T5M, T5J); + T5N = VADD(T5J, T5M); + T5R = VSUB(T5P, T5Q); + T8E = VADD(T5P, T5Q); + { + V T5B, T8K, T8M, T5x; + T5B = VADD(T5z, T5A); + T8K = VSUB(T5A, T5z); + T8M = VADD(T4X, T5w); + T5x = VSUB(T4X, T5w); + T6o = VFNMS(LDK(KP707106781), T5V, T5S); + T5W = VFMA(LDK(KP707106781), T5V, T5S); + T8W = VADD(T5Z, T62); + T63 = VSUB(T5Z, T62); + ST(&(ii[WS(rs, 25)]), VFNMS(LDK(KP980785280), T8G, T8F), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 9)]), VFMA(LDK(KP980785280), T8G, T8F), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 1)]), VFMA(LDK(KP980785280), T5N, T5G), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 17)]), VFNMS(LDK(KP980785280), T5N, T5G), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 9)]), VFMA(LDK(KP980785280), T5R, T5O), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 25)]), VFNMS(LDK(KP980785280), T5R, T5O), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 17)]), VFNMS(LDK(KP980785280), T8E, T8D), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 1)]), VFMA(LDK(KP980785280), T8E, T8D), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 29)]), VFMA(LDK(KP831469612), T5B, T5y), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 13)]), VFNMS(LDK(KP831469612), T5B, T5y), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 21)]), VFNMS(LDK(KP831469612), T8K, T8J), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 5)]), VFMA(LDK(KP831469612), T8K, T8J), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 29)]), VFMA(LDK(KP831469612), T8M, T8L), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 13)]), VFNMS(LDK(KP831469612), T8M, T8L), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 5)]), VFMA(LDK(KP831469612), T5x, T4o), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 21)]), VFNMS(LDK(KP831469612), T5x, T4o), ms, &(ri[WS(rs, 1)])); + } + } + } + { + V T6k, T64, T8V, T6r, T8R, T8T, T6y, T6C, T6m, T6i, T6v, T6B, T6l, T6b, T6A; + V T6s, T8X; + { + V T6x, T6e, T6w, T6h, T6u, T67, T6t, T6a, T8P, T8Q; + T6k = VFNMS(LDK(KP923879532), T63, T5W); + T64 = VFMA(LDK(KP923879532), T63, T5W); + T8V = VFNMS(LDK(KP707106781), T8O, T8N); + T8P = VFMA(LDK(KP707106781), T8O, T8N); + T8Q = VSUB(T6q, T6p); + T6r = VADD(T6p, T6q); + T6x = VFMA(LDK(KP707106781), T6d, T6c); + T6e = VFNMS(LDK(KP707106781), T6d, T6c); + T6w = VFMA(LDK(KP707106781), T6g, T6f); + T6h = VFNMS(LDK(KP707106781), T6g, T6f); + T6u = VFMA(LDK(KP707106781), T66, T65); + T67 = VFNMS(LDK(KP707106781), T66, T65); + T6t = VFMA(LDK(KP707106781), T69, T68); + T6a = VFNMS(LDK(KP707106781), T69, T68); + T8R = VFMA(LDK(KP923879532), T8Q, T8P); + T8T = VFNMS(LDK(KP923879532), T8Q, T8P); + T6y = VFNMS(LDK(KP198912367), T6x, T6w); + T6C = VFMA(LDK(KP198912367), T6w, T6x); + T6m = VFMA(LDK(KP668178637), T6e, T6h); + T6i = VFNMS(LDK(KP668178637), T6h, T6e); + T6v = VFMA(LDK(KP198912367), T6u, T6t); + T6B = VFNMS(LDK(KP198912367), T6t, T6u); + T6l = VFNMS(LDK(KP668178637), T67, T6a); + T6b = VFMA(LDK(KP668178637), T6a, T67); + } + T6A = VFMA(LDK(KP923879532), T6r, T6o); + T6s = VFNMS(LDK(KP923879532), T6r, T6o); + T8X = VFNMS(LDK(KP923879532), T8W, T8V); + T8Z = VFMA(LDK(KP923879532), T8W, T8V); + { + V T6z, T6D, T8Y, T6n, T8S, T8U, T6j; + T6z = VSUB(T6v, T6y); + T90 = VADD(T6v, T6y); + T6D = VADD(T6B, T6C); + T8Y = VSUB(T6C, T6B); + T6n = VSUB(T6l, T6m); + T8S = VADD(T6l, T6m); + T8U = VSUB(T6i, T6b); + T6j = VADD(T6b, T6i); + ST(&(ri[WS(rs, 7)]), VFMA(LDK(KP980785280), T6z, T6s), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 23)]), VFNMS(LDK(KP980785280), T6z, T6s), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 23)]), VFNMS(LDK(KP980785280), T8Y, T8X), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 7)]), VFMA(LDK(KP980785280), T8Y, T8X), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 11)]), VFMA(LDK(KP831469612), T6n, T6k), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 27)]), VFNMS(LDK(KP831469612), T6n, T6k), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 19)]), VFNMS(LDK(KP831469612), T8S, T8R), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 3)]), VFMA(LDK(KP831469612), T8S, T8R), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 27)]), VFNMS(LDK(KP831469612), T8U, T8T), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 11)]), VFMA(LDK(KP831469612), T8U, T8T), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 3)]), VFMA(LDK(KP831469612), T6j, T64), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 19)]), VFNMS(LDK(KP831469612), T6j, T64), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 31)]), VFMA(LDK(KP980785280), T6D, T6A), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 15)]), VFNMS(LDK(KP980785280), T6D, T6A), ms, &(ri[WS(rs, 1)])); + } + } + } + } + } + } + ST(&(ii[WS(rs, 31)]), VFMA(LDK(KP980785280), T90, T8Z), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 15)]), VFNMS(LDK(KP980785280), T90, T8Z), ms, &(ii[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 32, XSIMD_STRING("t1sv_32"), twinstr, &GENUS, {236, 62, 198, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1sv_32) (planner *p) { + X(kdft_dit_register) (p, t1sv_32, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name t1sv_32 -include ts.h */ + +/* + * This function contains 434 FP additions, 208 FP multiplications, + * (or, 340 additions, 114 multiplications, 94 fused multiply/add), + * 96 stack variables, 7 constants, and 128 memory accesses + */ +#include "ts.h" + +static void t1sv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 62); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 62), MAKE_VOLATILE_STRIDE(64, rs)) { + V Tj, T5F, T7C, T7Q, T35, T4T, T78, T7m, T1Q, T61, T5Y, T6J, T3K, T59, T41; + V T56, T2B, T67, T6e, T6O, T4b, T5d, T4s, T5g, TG, T7l, T5I, T73, T3a, T4U; + V T3f, T4V, T14, T5N, T5M, T6E, T3m, T4Y, T3r, T4Z, T1r, T5P, T5S, T6F, T3x; + V T51, T3C, T52, T2d, T5Z, T64, T6K, T3V, T57, T44, T5a, T2Y, T6f, T6a, T6P; + V T4m, T5h, T4v, T5e; + { + V T1, T76, T6, T75, Tc, T32, Th, T33; + T1 = LD(&(ri[0]), ms, &(ri[0])); + T76 = LD(&(ii[0]), ms, &(ii[0])); + { + V T3, T5, T2, T4; + T3 = LD(&(ri[WS(rs, 16)]), ms, &(ri[0])); + T5 = LD(&(ii[WS(rs, 16)]), ms, &(ii[0])); + T2 = LDW(&(W[TWVL * 30])); + T4 = LDW(&(W[TWVL * 31])); + T6 = VFMA(T2, T3, VMUL(T4, T5)); + T75 = VFNMS(T4, T3, VMUL(T2, T5)); + } + { + V T9, Tb, T8, Ta; + T9 = LD(&(ri[WS(rs, 8)]), ms, &(ri[0])); + Tb = LD(&(ii[WS(rs, 8)]), ms, &(ii[0])); + T8 = LDW(&(W[TWVL * 14])); + Ta = LDW(&(W[TWVL * 15])); + Tc = VFMA(T8, T9, VMUL(Ta, Tb)); + T32 = VFNMS(Ta, T9, VMUL(T8, Tb)); + } + { + V Te, Tg, Td, Tf; + Te = LD(&(ri[WS(rs, 24)]), ms, &(ri[0])); + Tg = LD(&(ii[WS(rs, 24)]), ms, &(ii[0])); + Td = LDW(&(W[TWVL * 46])); + Tf = LDW(&(W[TWVL * 47])); + Th = VFMA(Td, Te, VMUL(Tf, Tg)); + T33 = VFNMS(Tf, Te, VMUL(Td, Tg)); + } + { + V T7, Ti, T7A, T7B; + T7 = VADD(T1, T6); + Ti = VADD(Tc, Th); + Tj = VADD(T7, Ti); + T5F = VSUB(T7, Ti); + T7A = VSUB(T76, T75); + T7B = VSUB(Tc, Th); + T7C = VSUB(T7A, T7B); + T7Q = VADD(T7B, T7A); + } + { + V T31, T34, T74, T77; + T31 = VSUB(T1, T6); + T34 = VSUB(T32, T33); + T35 = VSUB(T31, T34); + T4T = VADD(T31, T34); + T74 = VADD(T32, T33); + T77 = VADD(T75, T76); + T78 = VADD(T74, T77); + T7m = VSUB(T77, T74); + } + } + { + V T1y, T3G, T1O, T3Z, T1D, T3H, T1J, T3Y; + { + V T1v, T1x, T1u, T1w; + T1v = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + T1x = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + T1u = LDW(&(W[0])); + T1w = LDW(&(W[TWVL * 1])); + T1y = VFMA(T1u, T1v, VMUL(T1w, T1x)); + T3G = VFNMS(T1w, T1v, VMUL(T1u, T1x)); + } + { + V T1L, T1N, T1K, T1M; + T1L = LD(&(ri[WS(rs, 25)]), ms, &(ri[WS(rs, 1)])); + T1N = LD(&(ii[WS(rs, 25)]), ms, &(ii[WS(rs, 1)])); + T1K = LDW(&(W[TWVL * 48])); + T1M = LDW(&(W[TWVL * 49])); + T1O = VFMA(T1K, T1L, VMUL(T1M, T1N)); + T3Z = VFNMS(T1M, T1L, VMUL(T1K, T1N)); + } + { + V T1A, T1C, T1z, T1B; + T1A = LD(&(ri[WS(rs, 17)]), ms, &(ri[WS(rs, 1)])); + T1C = LD(&(ii[WS(rs, 17)]), ms, &(ii[WS(rs, 1)])); + T1z = LDW(&(W[TWVL * 32])); + T1B = LDW(&(W[TWVL * 33])); + T1D = VFMA(T1z, T1A, VMUL(T1B, T1C)); + T3H = VFNMS(T1B, T1A, VMUL(T1z, T1C)); + } + { + V T1G, T1I, T1F, T1H; + T1G = LD(&(ri[WS(rs, 9)]), ms, &(ri[WS(rs, 1)])); + T1I = LD(&(ii[WS(rs, 9)]), ms, &(ii[WS(rs, 1)])); + T1F = LDW(&(W[TWVL * 16])); + T1H = LDW(&(W[TWVL * 17])); + T1J = VFMA(T1F, T1G, VMUL(T1H, T1I)); + T3Y = VFNMS(T1H, T1G, VMUL(T1F, T1I)); + } + { + V T1E, T1P, T5W, T5X; + T1E = VADD(T1y, T1D); + T1P = VADD(T1J, T1O); + T1Q = VADD(T1E, T1P); + T61 = VSUB(T1E, T1P); + T5W = VADD(T3G, T3H); + T5X = VADD(T3Y, T3Z); + T5Y = VSUB(T5W, T5X); + T6J = VADD(T5W, T5X); + } + { + V T3I, T3J, T3X, T40; + T3I = VSUB(T3G, T3H); + T3J = VSUB(T1J, T1O); + T3K = VADD(T3I, T3J); + T59 = VSUB(T3I, T3J); + T3X = VSUB(T1y, T1D); + T40 = VSUB(T3Y, T3Z); + T41 = VSUB(T3X, T40); + T56 = VADD(T3X, T40); + } + } + { + V T2j, T4o, T2z, T49, T2o, T4p, T2u, T48; + { + V T2g, T2i, T2f, T2h; + T2g = LD(&(ri[WS(rs, 31)]), ms, &(ri[WS(rs, 1)])); + T2i = LD(&(ii[WS(rs, 31)]), ms, &(ii[WS(rs, 1)])); + T2f = LDW(&(W[TWVL * 60])); + T2h = LDW(&(W[TWVL * 61])); + T2j = VFMA(T2f, T2g, VMUL(T2h, T2i)); + T4o = VFNMS(T2h, T2g, VMUL(T2f, T2i)); + } + { + V T2w, T2y, T2v, T2x; + T2w = LD(&(ri[WS(rs, 23)]), ms, &(ri[WS(rs, 1)])); + T2y = LD(&(ii[WS(rs, 23)]), ms, &(ii[WS(rs, 1)])); + T2v = LDW(&(W[TWVL * 44])); + T2x = LDW(&(W[TWVL * 45])); + T2z = VFMA(T2v, T2w, VMUL(T2x, T2y)); + T49 = VFNMS(T2x, T2w, VMUL(T2v, T2y)); + } + { + V T2l, T2n, T2k, T2m; + T2l = LD(&(ri[WS(rs, 15)]), ms, &(ri[WS(rs, 1)])); + T2n = LD(&(ii[WS(rs, 15)]), ms, &(ii[WS(rs, 1)])); + T2k = LDW(&(W[TWVL * 28])); + T2m = LDW(&(W[TWVL * 29])); + T2o = VFMA(T2k, T2l, VMUL(T2m, T2n)); + T4p = VFNMS(T2m, T2l, VMUL(T2k, T2n)); + } + { + V T2r, T2t, T2q, T2s; + T2r = LD(&(ri[WS(rs, 7)]), ms, &(ri[WS(rs, 1)])); + T2t = LD(&(ii[WS(rs, 7)]), ms, &(ii[WS(rs, 1)])); + T2q = LDW(&(W[TWVL * 12])); + T2s = LDW(&(W[TWVL * 13])); + T2u = VFMA(T2q, T2r, VMUL(T2s, T2t)); + T48 = VFNMS(T2s, T2r, VMUL(T2q, T2t)); + } + { + V T2p, T2A, T6c, T6d; + T2p = VADD(T2j, T2o); + T2A = VADD(T2u, T2z); + T2B = VADD(T2p, T2A); + T67 = VSUB(T2p, T2A); + T6c = VADD(T4o, T4p); + T6d = VADD(T48, T49); + T6e = VSUB(T6c, T6d); + T6O = VADD(T6c, T6d); + } + { + V T47, T4a, T4q, T4r; + T47 = VSUB(T2j, T2o); + T4a = VSUB(T48, T49); + T4b = VSUB(T47, T4a); + T5d = VADD(T47, T4a); + T4q = VSUB(T4o, T4p); + T4r = VSUB(T2u, T2z); + T4s = VADD(T4q, T4r); + T5g = VSUB(T4q, T4r); + } + } + { + V To, T36, TE, T3d, Tt, T37, Tz, T3c; + { + V Tl, Tn, Tk, Tm; + Tl = LD(&(ri[WS(rs, 4)]), ms, &(ri[0])); + Tn = LD(&(ii[WS(rs, 4)]), ms, &(ii[0])); + Tk = LDW(&(W[TWVL * 6])); + Tm = LDW(&(W[TWVL * 7])); + To = VFMA(Tk, Tl, VMUL(Tm, Tn)); + T36 = VFNMS(Tm, Tl, VMUL(Tk, Tn)); + } + { + V TB, TD, TA, TC; + TB = LD(&(ri[WS(rs, 12)]), ms, &(ri[0])); + TD = LD(&(ii[WS(rs, 12)]), ms, &(ii[0])); + TA = LDW(&(W[TWVL * 22])); + TC = LDW(&(W[TWVL * 23])); + TE = VFMA(TA, TB, VMUL(TC, TD)); + T3d = VFNMS(TC, TB, VMUL(TA, TD)); + } + { + V Tq, Ts, Tp, Tr; + Tq = LD(&(ri[WS(rs, 20)]), ms, &(ri[0])); + Ts = LD(&(ii[WS(rs, 20)]), ms, &(ii[0])); + Tp = LDW(&(W[TWVL * 38])); + Tr = LDW(&(W[TWVL * 39])); + Tt = VFMA(Tp, Tq, VMUL(Tr, Ts)); + T37 = VFNMS(Tr, Tq, VMUL(Tp, Ts)); + } + { + V Tw, Ty, Tv, Tx; + Tw = LD(&(ri[WS(rs, 28)]), ms, &(ri[0])); + Ty = LD(&(ii[WS(rs, 28)]), ms, &(ii[0])); + Tv = LDW(&(W[TWVL * 54])); + Tx = LDW(&(W[TWVL * 55])); + Tz = VFMA(Tv, Tw, VMUL(Tx, Ty)); + T3c = VFNMS(Tx, Tw, VMUL(Tv, Ty)); + } + { + V Tu, TF, T5G, T5H; + Tu = VADD(To, Tt); + TF = VADD(Tz, TE); + TG = VADD(Tu, TF); + T7l = VSUB(TF, Tu); + T5G = VADD(T36, T37); + T5H = VADD(T3c, T3d); + T5I = VSUB(T5G, T5H); + T73 = VADD(T5G, T5H); + } + { + V T38, T39, T3b, T3e; + T38 = VSUB(T36, T37); + T39 = VSUB(To, Tt); + T3a = VSUB(T38, T39); + T4U = VADD(T39, T38); + T3b = VSUB(Tz, TE); + T3e = VSUB(T3c, T3d); + T3f = VADD(T3b, T3e); + T4V = VSUB(T3b, T3e); + } + } + { + V TM, T3i, T12, T3p, TR, T3j, TX, T3o; + { + V TJ, TL, TI, TK; + TJ = LD(&(ri[WS(rs, 2)]), ms, &(ri[0])); + TL = LD(&(ii[WS(rs, 2)]), ms, &(ii[0])); + TI = LDW(&(W[TWVL * 2])); + TK = LDW(&(W[TWVL * 3])); + TM = VFMA(TI, TJ, VMUL(TK, TL)); + T3i = VFNMS(TK, TJ, VMUL(TI, TL)); + } + { + V TZ, T11, TY, T10; + TZ = LD(&(ri[WS(rs, 26)]), ms, &(ri[0])); + T11 = LD(&(ii[WS(rs, 26)]), ms, &(ii[0])); + TY = LDW(&(W[TWVL * 50])); + T10 = LDW(&(W[TWVL * 51])); + T12 = VFMA(TY, TZ, VMUL(T10, T11)); + T3p = VFNMS(T10, TZ, VMUL(TY, T11)); + } + { + V TO, TQ, TN, TP; + TO = LD(&(ri[WS(rs, 18)]), ms, &(ri[0])); + TQ = LD(&(ii[WS(rs, 18)]), ms, &(ii[0])); + TN = LDW(&(W[TWVL * 34])); + TP = LDW(&(W[TWVL * 35])); + TR = VFMA(TN, TO, VMUL(TP, TQ)); + T3j = VFNMS(TP, TO, VMUL(TN, TQ)); + } + { + V TU, TW, TT, TV; + TU = LD(&(ri[WS(rs, 10)]), ms, &(ri[0])); + TW = LD(&(ii[WS(rs, 10)]), ms, &(ii[0])); + TT = LDW(&(W[TWVL * 18])); + TV = LDW(&(W[TWVL * 19])); + TX = VFMA(TT, TU, VMUL(TV, TW)); + T3o = VFNMS(TV, TU, VMUL(TT, TW)); + } + { + V TS, T13, T5K, T5L; + TS = VADD(TM, TR); + T13 = VADD(TX, T12); + T14 = VADD(TS, T13); + T5N = VSUB(TS, T13); + T5K = VADD(T3i, T3j); + T5L = VADD(T3o, T3p); + T5M = VSUB(T5K, T5L); + T6E = VADD(T5K, T5L); + } + { + V T3k, T3l, T3n, T3q; + T3k = VSUB(T3i, T3j); + T3l = VSUB(TX, T12); + T3m = VADD(T3k, T3l); + T4Y = VSUB(T3k, T3l); + T3n = VSUB(TM, TR); + T3q = VSUB(T3o, T3p); + T3r = VSUB(T3n, T3q); + T4Z = VADD(T3n, T3q); + } + } + { + V T19, T3t, T1p, T3A, T1e, T3u, T1k, T3z; + { + V T16, T18, T15, T17; + T16 = LD(&(ri[WS(rs, 30)]), ms, &(ri[0])); + T18 = LD(&(ii[WS(rs, 30)]), ms, &(ii[0])); + T15 = LDW(&(W[TWVL * 58])); + T17 = LDW(&(W[TWVL * 59])); + T19 = VFMA(T15, T16, VMUL(T17, T18)); + T3t = VFNMS(T17, T16, VMUL(T15, T18)); + } + { + V T1m, T1o, T1l, T1n; + T1m = LD(&(ri[WS(rs, 22)]), ms, &(ri[0])); + T1o = LD(&(ii[WS(rs, 22)]), ms, &(ii[0])); + T1l = LDW(&(W[TWVL * 42])); + T1n = LDW(&(W[TWVL * 43])); + T1p = VFMA(T1l, T1m, VMUL(T1n, T1o)); + T3A = VFNMS(T1n, T1m, VMUL(T1l, T1o)); + } + { + V T1b, T1d, T1a, T1c; + T1b = LD(&(ri[WS(rs, 14)]), ms, &(ri[0])); + T1d = LD(&(ii[WS(rs, 14)]), ms, &(ii[0])); + T1a = LDW(&(W[TWVL * 26])); + T1c = LDW(&(W[TWVL * 27])); + T1e = VFMA(T1a, T1b, VMUL(T1c, T1d)); + T3u = VFNMS(T1c, T1b, VMUL(T1a, T1d)); + } + { + V T1h, T1j, T1g, T1i; + T1h = LD(&(ri[WS(rs, 6)]), ms, &(ri[0])); + T1j = LD(&(ii[WS(rs, 6)]), ms, &(ii[0])); + T1g = LDW(&(W[TWVL * 10])); + T1i = LDW(&(W[TWVL * 11])); + T1k = VFMA(T1g, T1h, VMUL(T1i, T1j)); + T3z = VFNMS(T1i, T1h, VMUL(T1g, T1j)); + } + { + V T1f, T1q, T5Q, T5R; + T1f = VADD(T19, T1e); + T1q = VADD(T1k, T1p); + T1r = VADD(T1f, T1q); + T5P = VSUB(T1f, T1q); + T5Q = VADD(T3t, T3u); + T5R = VADD(T3z, T3A); + T5S = VSUB(T5Q, T5R); + T6F = VADD(T5Q, T5R); + } + { + V T3v, T3w, T3y, T3B; + T3v = VSUB(T3t, T3u); + T3w = VSUB(T1k, T1p); + T3x = VADD(T3v, T3w); + T51 = VSUB(T3v, T3w); + T3y = VSUB(T19, T1e); + T3B = VSUB(T3z, T3A); + T3C = VSUB(T3y, T3B); + T52 = VADD(T3y, T3B); + } + } + { + V T1V, T3R, T20, T3S, T3Q, T3T, T26, T3M, T2b, T3N, T3L, T3O; + { + V T1S, T1U, T1R, T1T; + T1S = LD(&(ri[WS(rs, 5)]), ms, &(ri[WS(rs, 1)])); + T1U = LD(&(ii[WS(rs, 5)]), ms, &(ii[WS(rs, 1)])); + T1R = LDW(&(W[TWVL * 8])); + T1T = LDW(&(W[TWVL * 9])); + T1V = VFMA(T1R, T1S, VMUL(T1T, T1U)); + T3R = VFNMS(T1T, T1S, VMUL(T1R, T1U)); + } + { + V T1X, T1Z, T1W, T1Y; + T1X = LD(&(ri[WS(rs, 21)]), ms, &(ri[WS(rs, 1)])); + T1Z = LD(&(ii[WS(rs, 21)]), ms, &(ii[WS(rs, 1)])); + T1W = LDW(&(W[TWVL * 40])); + T1Y = LDW(&(W[TWVL * 41])); + T20 = VFMA(T1W, T1X, VMUL(T1Y, T1Z)); + T3S = VFNMS(T1Y, T1X, VMUL(T1W, T1Z)); + } + T3Q = VSUB(T1V, T20); + T3T = VSUB(T3R, T3S); + { + V T23, T25, T22, T24; + T23 = LD(&(ri[WS(rs, 29)]), ms, &(ri[WS(rs, 1)])); + T25 = LD(&(ii[WS(rs, 29)]), ms, &(ii[WS(rs, 1)])); + T22 = LDW(&(W[TWVL * 56])); + T24 = LDW(&(W[TWVL * 57])); + T26 = VFMA(T22, T23, VMUL(T24, T25)); + T3M = VFNMS(T24, T23, VMUL(T22, T25)); + } + { + V T28, T2a, T27, T29; + T28 = LD(&(ri[WS(rs, 13)]), ms, &(ri[WS(rs, 1)])); + T2a = LD(&(ii[WS(rs, 13)]), ms, &(ii[WS(rs, 1)])); + T27 = LDW(&(W[TWVL * 24])); + T29 = LDW(&(W[TWVL * 25])); + T2b = VFMA(T27, T28, VMUL(T29, T2a)); + T3N = VFNMS(T29, T28, VMUL(T27, T2a)); + } + T3L = VSUB(T26, T2b); + T3O = VSUB(T3M, T3N); + { + V T21, T2c, T62, T63; + T21 = VADD(T1V, T20); + T2c = VADD(T26, T2b); + T2d = VADD(T21, T2c); + T5Z = VSUB(T2c, T21); + T62 = VADD(T3R, T3S); + T63 = VADD(T3M, T3N); + T64 = VSUB(T62, T63); + T6K = VADD(T62, T63); + } + { + V T3P, T3U, T42, T43; + T3P = VSUB(T3L, T3O); + T3U = VADD(T3Q, T3T); + T3V = VMUL(LDK(KP707106781), VSUB(T3P, T3U)); + T57 = VMUL(LDK(KP707106781), VADD(T3U, T3P)); + T42 = VSUB(T3T, T3Q); + T43 = VADD(T3L, T3O); + T44 = VMUL(LDK(KP707106781), VSUB(T42, T43)); + T5a = VMUL(LDK(KP707106781), VADD(T42, T43)); + } + } + { + V T2G, T4c, T2L, T4d, T4e, T4f, T2R, T4i, T2W, T4j, T4h, T4k; + { + V T2D, T2F, T2C, T2E; + T2D = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)])); + T2F = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)])); + T2C = LDW(&(W[TWVL * 4])); + T2E = LDW(&(W[TWVL * 5])); + T2G = VFMA(T2C, T2D, VMUL(T2E, T2F)); + T4c = VFNMS(T2E, T2D, VMUL(T2C, T2F)); + } + { + V T2I, T2K, T2H, T2J; + T2I = LD(&(ri[WS(rs, 19)]), ms, &(ri[WS(rs, 1)])); + T2K = LD(&(ii[WS(rs, 19)]), ms, &(ii[WS(rs, 1)])); + T2H = LDW(&(W[TWVL * 36])); + T2J = LDW(&(W[TWVL * 37])); + T2L = VFMA(T2H, T2I, VMUL(T2J, T2K)); + T4d = VFNMS(T2J, T2I, VMUL(T2H, T2K)); + } + T4e = VSUB(T4c, T4d); + T4f = VSUB(T2G, T2L); + { + V T2O, T2Q, T2N, T2P; + T2O = LD(&(ri[WS(rs, 27)]), ms, &(ri[WS(rs, 1)])); + T2Q = LD(&(ii[WS(rs, 27)]), ms, &(ii[WS(rs, 1)])); + T2N = LDW(&(W[TWVL * 52])); + T2P = LDW(&(W[TWVL * 53])); + T2R = VFMA(T2N, T2O, VMUL(T2P, T2Q)); + T4i = VFNMS(T2P, T2O, VMUL(T2N, T2Q)); + } + { + V T2T, T2V, T2S, T2U; + T2T = LD(&(ri[WS(rs, 11)]), ms, &(ri[WS(rs, 1)])); + T2V = LD(&(ii[WS(rs, 11)]), ms, &(ii[WS(rs, 1)])); + T2S = LDW(&(W[TWVL * 20])); + T2U = LDW(&(W[TWVL * 21])); + T2W = VFMA(T2S, T2T, VMUL(T2U, T2V)); + T4j = VFNMS(T2U, T2T, VMUL(T2S, T2V)); + } + T4h = VSUB(T2R, T2W); + T4k = VSUB(T4i, T4j); + { + V T2M, T2X, T68, T69; + T2M = VADD(T2G, T2L); + T2X = VADD(T2R, T2W); + T2Y = VADD(T2M, T2X); + T6f = VSUB(T2X, T2M); + T68 = VADD(T4c, T4d); + T69 = VADD(T4i, T4j); + T6a = VSUB(T68, T69); + T6P = VADD(T68, T69); + } + { + V T4g, T4l, T4t, T4u; + T4g = VSUB(T4e, T4f); + T4l = VADD(T4h, T4k); + T4m = VMUL(LDK(KP707106781), VSUB(T4g, T4l)); + T5h = VMUL(LDK(KP707106781), VADD(T4g, T4l)); + T4t = VSUB(T4h, T4k); + T4u = VADD(T4f, T4e); + T4v = VMUL(LDK(KP707106781), VSUB(T4t, T4u)); + T5e = VMUL(LDK(KP707106781), VADD(T4u, T4t)); + } + } + { + V T1t, T6X, T7a, T7c, T30, T7b, T70, T71; + { + V TH, T1s, T72, T79; + TH = VADD(Tj, TG); + T1s = VADD(T14, T1r); + T1t = VADD(TH, T1s); + T6X = VSUB(TH, T1s); + T72 = VADD(T6E, T6F); + T79 = VADD(T73, T78); + T7a = VADD(T72, T79); + T7c = VSUB(T79, T72); + } + { + V T2e, T2Z, T6Y, T6Z; + T2e = VADD(T1Q, T2d); + T2Z = VADD(T2B, T2Y); + T30 = VADD(T2e, T2Z); + T7b = VSUB(T2Z, T2e); + T6Y = VADD(T6J, T6K); + T6Z = VADD(T6O, T6P); + T70 = VSUB(T6Y, T6Z); + T71 = VADD(T6Y, T6Z); + } + ST(&(ri[WS(rs, 16)]), VSUB(T1t, T30), ms, &(ri[0])); + ST(&(ii[WS(rs, 16)]), VSUB(T7a, T71), ms, &(ii[0])); + ST(&(ri[0]), VADD(T1t, T30), ms, &(ri[0])); + ST(&(ii[0]), VADD(T71, T7a), ms, &(ii[0])); + ST(&(ri[WS(rs, 24)]), VSUB(T6X, T70), ms, &(ri[0])); + ST(&(ii[WS(rs, 24)]), VSUB(T7c, T7b), ms, &(ii[0])); + ST(&(ri[WS(rs, 8)]), VADD(T6X, T70), ms, &(ri[0])); + ST(&(ii[WS(rs, 8)]), VADD(T7b, T7c), ms, &(ii[0])); + } + { + V T6H, T6T, T7g, T7i, T6M, T6U, T6R, T6V; + { + V T6D, T6G, T7e, T7f; + T6D = VSUB(Tj, TG); + T6G = VSUB(T6E, T6F); + T6H = VADD(T6D, T6G); + T6T = VSUB(T6D, T6G); + T7e = VSUB(T1r, T14); + T7f = VSUB(T78, T73); + T7g = VADD(T7e, T7f); + T7i = VSUB(T7f, T7e); + } + { + V T6I, T6L, T6N, T6Q; + T6I = VSUB(T1Q, T2d); + T6L = VSUB(T6J, T6K); + T6M = VADD(T6I, T6L); + T6U = VSUB(T6L, T6I); + T6N = VSUB(T2B, T2Y); + T6Q = VSUB(T6O, T6P); + T6R = VSUB(T6N, T6Q); + T6V = VADD(T6N, T6Q); + } + { + V T6S, T7d, T6W, T7h; + T6S = VMUL(LDK(KP707106781), VADD(T6M, T6R)); + ST(&(ri[WS(rs, 20)]), VSUB(T6H, T6S), ms, &(ri[0])); + ST(&(ri[WS(rs, 4)]), VADD(T6H, T6S), ms, &(ri[0])); + T7d = VMUL(LDK(KP707106781), VADD(T6U, T6V)); + ST(&(ii[WS(rs, 4)]), VADD(T7d, T7g), ms, &(ii[0])); + ST(&(ii[WS(rs, 20)]), VSUB(T7g, T7d), ms, &(ii[0])); + T6W = VMUL(LDK(KP707106781), VSUB(T6U, T6V)); + ST(&(ri[WS(rs, 28)]), VSUB(T6T, T6W), ms, &(ri[0])); + ST(&(ri[WS(rs, 12)]), VADD(T6T, T6W), ms, &(ri[0])); + T7h = VMUL(LDK(KP707106781), VSUB(T6R, T6M)); + ST(&(ii[WS(rs, 12)]), VADD(T7h, T7i), ms, &(ii[0])); + ST(&(ii[WS(rs, 28)]), VSUB(T7i, T7h), ms, &(ii[0])); + } + } + { + V T5J, T7n, T7t, T6n, T5U, T7k, T6x, T6B, T6q, T7s, T66, T6k, T6u, T6A, T6h; + V T6l; + { + V T5O, T5T, T60, T65; + T5J = VSUB(T5F, T5I); + T7n = VADD(T7l, T7m); + T7t = VSUB(T7m, T7l); + T6n = VADD(T5F, T5I); + T5O = VSUB(T5M, T5N); + T5T = VADD(T5P, T5S); + T5U = VMUL(LDK(KP707106781), VSUB(T5O, T5T)); + T7k = VMUL(LDK(KP707106781), VADD(T5O, T5T)); + { + V T6v, T6w, T6o, T6p; + T6v = VADD(T67, T6a); + T6w = VADD(T6e, T6f); + T6x = VFNMS(LDK(KP382683432), T6w, VMUL(LDK(KP923879532), T6v)); + T6B = VFMA(LDK(KP923879532), T6w, VMUL(LDK(KP382683432), T6v)); + T6o = VADD(T5N, T5M); + T6p = VSUB(T5P, T5S); + T6q = VMUL(LDK(KP707106781), VADD(T6o, T6p)); + T7s = VMUL(LDK(KP707106781), VSUB(T6p, T6o)); + } + T60 = VSUB(T5Y, T5Z); + T65 = VSUB(T61, T64); + T66 = VFMA(LDK(KP923879532), T60, VMUL(LDK(KP382683432), T65)); + T6k = VFNMS(LDK(KP923879532), T65, VMUL(LDK(KP382683432), T60)); + { + V T6s, T6t, T6b, T6g; + T6s = VADD(T5Y, T5Z); + T6t = VADD(T61, T64); + T6u = VFMA(LDK(KP382683432), T6s, VMUL(LDK(KP923879532), T6t)); + T6A = VFNMS(LDK(KP382683432), T6t, VMUL(LDK(KP923879532), T6s)); + T6b = VSUB(T67, T6a); + T6g = VSUB(T6e, T6f); + T6h = VFNMS(LDK(KP923879532), T6g, VMUL(LDK(KP382683432), T6b)); + T6l = VFMA(LDK(KP382683432), T6g, VMUL(LDK(KP923879532), T6b)); + } + } + { + V T5V, T6i, T7r, T7u; + T5V = VADD(T5J, T5U); + T6i = VADD(T66, T6h); + ST(&(ri[WS(rs, 22)]), VSUB(T5V, T6i), ms, &(ri[0])); + ST(&(ri[WS(rs, 6)]), VADD(T5V, T6i), ms, &(ri[0])); + T7r = VADD(T6k, T6l); + T7u = VADD(T7s, T7t); + ST(&(ii[WS(rs, 6)]), VADD(T7r, T7u), ms, &(ii[0])); + ST(&(ii[WS(rs, 22)]), VSUB(T7u, T7r), ms, &(ii[0])); + } + { + V T6j, T6m, T7v, T7w; + T6j = VSUB(T5J, T5U); + T6m = VSUB(T6k, T6l); + ST(&(ri[WS(rs, 30)]), VSUB(T6j, T6m), ms, &(ri[0])); + ST(&(ri[WS(rs, 14)]), VADD(T6j, T6m), ms, &(ri[0])); + T7v = VSUB(T6h, T66); + T7w = VSUB(T7t, T7s); + ST(&(ii[WS(rs, 14)]), VADD(T7v, T7w), ms, &(ii[0])); + ST(&(ii[WS(rs, 30)]), VSUB(T7w, T7v), ms, &(ii[0])); + } + { + V T6r, T6y, T7j, T7o; + T6r = VADD(T6n, T6q); + T6y = VADD(T6u, T6x); + ST(&(ri[WS(rs, 18)]), VSUB(T6r, T6y), ms, &(ri[0])); + ST(&(ri[WS(rs, 2)]), VADD(T6r, T6y), ms, &(ri[0])); + T7j = VADD(T6A, T6B); + T7o = VADD(T7k, T7n); + ST(&(ii[WS(rs, 2)]), VADD(T7j, T7o), ms, &(ii[0])); + ST(&(ii[WS(rs, 18)]), VSUB(T7o, T7j), ms, &(ii[0])); + } + { + V T6z, T6C, T7p, T7q; + T6z = VSUB(T6n, T6q); + T6C = VSUB(T6A, T6B); + ST(&(ri[WS(rs, 26)]), VSUB(T6z, T6C), ms, &(ri[0])); + ST(&(ri[WS(rs, 10)]), VADD(T6z, T6C), ms, &(ri[0])); + T7p = VSUB(T6x, T6u); + T7q = VSUB(T7n, T7k); + ST(&(ii[WS(rs, 10)]), VADD(T7p, T7q), ms, &(ii[0])); + ST(&(ii[WS(rs, 26)]), VSUB(T7q, T7p), ms, &(ii[0])); + } + } + { + V T3h, T4D, T7R, T7X, T3E, T7O, T4N, T4R, T46, T4A, T4G, T7W, T4K, T4Q, T4x; + V T4B, T3g, T7P; + T3g = VMUL(LDK(KP707106781), VSUB(T3a, T3f)); + T3h = VSUB(T35, T3g); + T4D = VADD(T35, T3g); + T7P = VMUL(LDK(KP707106781), VSUB(T4V, T4U)); + T7R = VADD(T7P, T7Q); + T7X = VSUB(T7Q, T7P); + { + V T3s, T3D, T4L, T4M; + T3s = VFNMS(LDK(KP923879532), T3r, VMUL(LDK(KP382683432), T3m)); + T3D = VFMA(LDK(KP382683432), T3x, VMUL(LDK(KP923879532), T3C)); + T3E = VSUB(T3s, T3D); + T7O = VADD(T3s, T3D); + T4L = VADD(T4b, T4m); + T4M = VADD(T4s, T4v); + T4N = VFNMS(LDK(KP555570233), T4M, VMUL(LDK(KP831469612), T4L)); + T4R = VFMA(LDK(KP831469612), T4M, VMUL(LDK(KP555570233), T4L)); + } + { + V T3W, T45, T4E, T4F; + T3W = VSUB(T3K, T3V); + T45 = VSUB(T41, T44); + T46 = VFMA(LDK(KP980785280), T3W, VMUL(LDK(KP195090322), T45)); + T4A = VFNMS(LDK(KP980785280), T45, VMUL(LDK(KP195090322), T3W)); + T4E = VFMA(LDK(KP923879532), T3m, VMUL(LDK(KP382683432), T3r)); + T4F = VFNMS(LDK(KP923879532), T3x, VMUL(LDK(KP382683432), T3C)); + T4G = VADD(T4E, T4F); + T7W = VSUB(T4F, T4E); + } + { + V T4I, T4J, T4n, T4w; + T4I = VADD(T3K, T3V); + T4J = VADD(T41, T44); + T4K = VFMA(LDK(KP555570233), T4I, VMUL(LDK(KP831469612), T4J)); + T4Q = VFNMS(LDK(KP555570233), T4J, VMUL(LDK(KP831469612), T4I)); + T4n = VSUB(T4b, T4m); + T4w = VSUB(T4s, T4v); + T4x = VFNMS(LDK(KP980785280), T4w, VMUL(LDK(KP195090322), T4n)); + T4B = VFMA(LDK(KP195090322), T4w, VMUL(LDK(KP980785280), T4n)); + } + { + V T3F, T4y, T7V, T7Y; + T3F = VADD(T3h, T3E); + T4y = VADD(T46, T4x); + ST(&(ri[WS(rs, 23)]), VSUB(T3F, T4y), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 7)]), VADD(T3F, T4y), ms, &(ri[WS(rs, 1)])); + T7V = VADD(T4A, T4B); + T7Y = VADD(T7W, T7X); + ST(&(ii[WS(rs, 7)]), VADD(T7V, T7Y), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 23)]), VSUB(T7Y, T7V), ms, &(ii[WS(rs, 1)])); + } + { + V T4z, T4C, T7Z, T80; + T4z = VSUB(T3h, T3E); + T4C = VSUB(T4A, T4B); + ST(&(ri[WS(rs, 31)]), VSUB(T4z, T4C), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 15)]), VADD(T4z, T4C), ms, &(ri[WS(rs, 1)])); + T7Z = VSUB(T4x, T46); + T80 = VSUB(T7X, T7W); + ST(&(ii[WS(rs, 15)]), VADD(T7Z, T80), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 31)]), VSUB(T80, T7Z), ms, &(ii[WS(rs, 1)])); + } + { + V T4H, T4O, T7N, T7S; + T4H = VADD(T4D, T4G); + T4O = VADD(T4K, T4N); + ST(&(ri[WS(rs, 19)]), VSUB(T4H, T4O), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 3)]), VADD(T4H, T4O), ms, &(ri[WS(rs, 1)])); + T7N = VADD(T4Q, T4R); + T7S = VADD(T7O, T7R); + ST(&(ii[WS(rs, 3)]), VADD(T7N, T7S), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 19)]), VSUB(T7S, T7N), ms, &(ii[WS(rs, 1)])); + } + { + V T4P, T4S, T7T, T7U; + T4P = VSUB(T4D, T4G); + T4S = VSUB(T4Q, T4R); + ST(&(ri[WS(rs, 27)]), VSUB(T4P, T4S), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 11)]), VADD(T4P, T4S), ms, &(ri[WS(rs, 1)])); + T7T = VSUB(T4N, T4K); + T7U = VSUB(T7R, T7O); + ST(&(ii[WS(rs, 11)]), VADD(T7T, T7U), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 27)]), VSUB(T7U, T7T), ms, &(ii[WS(rs, 1)])); + } + } + { + V T4X, T5p, T7D, T7J, T54, T7y, T5z, T5D, T5c, T5m, T5s, T7I, T5w, T5C, T5j; + V T5n, T4W, T7z; + T4W = VMUL(LDK(KP707106781), VADD(T4U, T4V)); + T4X = VSUB(T4T, T4W); + T5p = VADD(T4T, T4W); + T7z = VMUL(LDK(KP707106781), VADD(T3a, T3f)); + T7D = VADD(T7z, T7C); + T7J = VSUB(T7C, T7z); + { + V T50, T53, T5x, T5y; + T50 = VFNMS(LDK(KP382683432), T4Z, VMUL(LDK(KP923879532), T4Y)); + T53 = VFMA(LDK(KP923879532), T51, VMUL(LDK(KP382683432), T52)); + T54 = VSUB(T50, T53); + T7y = VADD(T50, T53); + T5x = VADD(T5d, T5e); + T5y = VADD(T5g, T5h); + T5z = VFNMS(LDK(KP195090322), T5y, VMUL(LDK(KP980785280), T5x)); + T5D = VFMA(LDK(KP195090322), T5x, VMUL(LDK(KP980785280), T5y)); + } + { + V T58, T5b, T5q, T5r; + T58 = VSUB(T56, T57); + T5b = VSUB(T59, T5a); + T5c = VFMA(LDK(KP555570233), T58, VMUL(LDK(KP831469612), T5b)); + T5m = VFNMS(LDK(KP831469612), T58, VMUL(LDK(KP555570233), T5b)); + T5q = VFMA(LDK(KP382683432), T4Y, VMUL(LDK(KP923879532), T4Z)); + T5r = VFNMS(LDK(KP382683432), T51, VMUL(LDK(KP923879532), T52)); + T5s = VADD(T5q, T5r); + T7I = VSUB(T5r, T5q); + } + { + V T5u, T5v, T5f, T5i; + T5u = VADD(T56, T57); + T5v = VADD(T59, T5a); + T5w = VFMA(LDK(KP980785280), T5u, VMUL(LDK(KP195090322), T5v)); + T5C = VFNMS(LDK(KP195090322), T5u, VMUL(LDK(KP980785280), T5v)); + T5f = VSUB(T5d, T5e); + T5i = VSUB(T5g, T5h); + T5j = VFNMS(LDK(KP831469612), T5i, VMUL(LDK(KP555570233), T5f)); + T5n = VFMA(LDK(KP831469612), T5f, VMUL(LDK(KP555570233), T5i)); + } + { + V T55, T5k, T7H, T7K; + T55 = VADD(T4X, T54); + T5k = VADD(T5c, T5j); + ST(&(ri[WS(rs, 21)]), VSUB(T55, T5k), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 5)]), VADD(T55, T5k), ms, &(ri[WS(rs, 1)])); + T7H = VADD(T5m, T5n); + T7K = VADD(T7I, T7J); + ST(&(ii[WS(rs, 5)]), VADD(T7H, T7K), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 21)]), VSUB(T7K, T7H), ms, &(ii[WS(rs, 1)])); + } + { + V T5l, T5o, T7L, T7M; + T5l = VSUB(T4X, T54); + T5o = VSUB(T5m, T5n); + ST(&(ri[WS(rs, 29)]), VSUB(T5l, T5o), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 13)]), VADD(T5l, T5o), ms, &(ri[WS(rs, 1)])); + T7L = VSUB(T5j, T5c); + T7M = VSUB(T7J, T7I); + ST(&(ii[WS(rs, 13)]), VADD(T7L, T7M), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 29)]), VSUB(T7M, T7L), ms, &(ii[WS(rs, 1)])); + } + { + V T5t, T5A, T7x, T7E; + T5t = VADD(T5p, T5s); + T5A = VADD(T5w, T5z); + ST(&(ri[WS(rs, 17)]), VSUB(T5t, T5A), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 1)]), VADD(T5t, T5A), ms, &(ri[WS(rs, 1)])); + T7x = VADD(T5C, T5D); + T7E = VADD(T7y, T7D); + ST(&(ii[WS(rs, 1)]), VADD(T7x, T7E), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 17)]), VSUB(T7E, T7x), ms, &(ii[WS(rs, 1)])); + } + { + V T5B, T5E, T7F, T7G; + T5B = VSUB(T5p, T5s); + T5E = VSUB(T5C, T5D); + ST(&(ri[WS(rs, 25)]), VSUB(T5B, T5E), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 9)]), VADD(T5B, T5E), ms, &(ri[WS(rs, 1)])); + T7F = VSUB(T5z, T5w); + T7G = VSUB(T7D, T7y); + ST(&(ii[WS(rs, 9)]), VADD(T7F, T7G), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 25)]), VSUB(T7G, T7F), ms, &(ii[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 32, XSIMD_STRING("t1sv_32"), twinstr, &GENUS, {340, 114, 94, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1sv_32) (planner *p) { + X(kdft_dit_register) (p, t1sv_32, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1sv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1sv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:51 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t1sv_4 -include ts.h */ + +/* + * This function contains 22 FP additions, 12 FP multiplications, + * (or, 16 additions, 6 multiplications, 6 fused multiply/add), + * 35 stack variables, 0 constants, and 16 memory accesses + */ +#include "ts.h" + +static void t1sv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + (mb * 6); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 6), MAKE_VOLATILE_STRIDE(8, rs)) { + V T1, Tv, T3, T6, T5, Ta, Td, Tc, Tg, Tj, Tt, T4, Tf, Ti, Tn; + V Tb, T2, T9; + T1 = LD(&(ri[0]), ms, &(ri[0])); + Tv = LD(&(ii[0]), ms, &(ii[0])); + T3 = LD(&(ri[WS(rs, 2)]), ms, &(ri[0])); + T6 = LD(&(ii[WS(rs, 2)]), ms, &(ii[0])); + T2 = LDW(&(W[TWVL * 2])); + T5 = LDW(&(W[TWVL * 3])); + Ta = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + Td = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + T9 = LDW(&(W[0])); + Tc = LDW(&(W[TWVL * 1])); + Tg = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)])); + Tj = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)])); + Tt = VMUL(T2, T6); + T4 = VMUL(T2, T3); + Tf = LDW(&(W[TWVL * 4])); + Ti = LDW(&(W[TWVL * 5])); + Tn = VMUL(T9, Td); + Tb = VMUL(T9, Ta); + { + V Tu, T7, Tp, Th, To, Te; + Tu = VFNMS(T5, T3, Tt); + T7 = VFMA(T5, T6, T4); + Tp = VMUL(Tf, Tj); + Th = VMUL(Tf, Tg); + To = VFNMS(Tc, Ta, Tn); + Te = VFMA(Tc, Td, Tb); + { + V Tw, Tx, T8, Tm, Tq, Tk; + Tw = VADD(Tu, Tv); + Tx = VSUB(Tv, Tu); + T8 = VADD(T1, T7); + Tm = VSUB(T1, T7); + Tq = VFNMS(Ti, Tg, Tp); + Tk = VFMA(Ti, Tj, Th); + { + V Ts, Tr, Tl, Ty; + Ts = VADD(To, Tq); + Tr = VSUB(To, Tq); + Tl = VADD(Te, Tk); + Ty = VSUB(Te, Tk); + ST(&(ri[WS(rs, 1)]), VADD(Tm, Tr), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 3)]), VSUB(Tm, Tr), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 2)]), VSUB(Tw, Ts), ms, &(ii[0])); + ST(&(ii[0]), VADD(Ts, Tw), ms, &(ii[0])); + ST(&(ii[WS(rs, 3)]), VADD(Ty, Tx), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 1)]), VSUB(Tx, Ty), ms, &(ii[WS(rs, 1)])); + ST(&(ri[0]), VADD(T8, Tl), ms, &(ri[0])); + ST(&(ri[WS(rs, 2)]), VSUB(T8, Tl), ms, &(ri[0])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t1sv_4"), twinstr, &GENUS, {16, 6, 6, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1sv_4) (planner *p) { + X(kdft_dit_register) (p, t1sv_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t1sv_4 -include ts.h */ + +/* + * This function contains 22 FP additions, 12 FP multiplications, + * (or, 16 additions, 6 multiplications, 6 fused multiply/add), + * 13 stack variables, 0 constants, and 16 memory accesses + */ +#include "ts.h" + +static void t1sv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + (mb * 6); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 6), MAKE_VOLATILE_STRIDE(8, rs)) { + V T1, Tp, T6, To, Tc, Tk, Th, Tl; + T1 = LD(&(ri[0]), ms, &(ri[0])); + Tp = LD(&(ii[0]), ms, &(ii[0])); + { + V T3, T5, T2, T4; + T3 = LD(&(ri[WS(rs, 2)]), ms, &(ri[0])); + T5 = LD(&(ii[WS(rs, 2)]), ms, &(ii[0])); + T2 = LDW(&(W[TWVL * 2])); + T4 = LDW(&(W[TWVL * 3])); + T6 = VFMA(T2, T3, VMUL(T4, T5)); + To = VFNMS(T4, T3, VMUL(T2, T5)); + } + { + V T9, Tb, T8, Ta; + T9 = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + Tb = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + T8 = LDW(&(W[0])); + Ta = LDW(&(W[TWVL * 1])); + Tc = VFMA(T8, T9, VMUL(Ta, Tb)); + Tk = VFNMS(Ta, T9, VMUL(T8, Tb)); + } + { + V Te, Tg, Td, Tf; + Te = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)])); + Tg = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)])); + Td = LDW(&(W[TWVL * 4])); + Tf = LDW(&(W[TWVL * 5])); + Th = VFMA(Td, Te, VMUL(Tf, Tg)); + Tl = VFNMS(Tf, Te, VMUL(Td, Tg)); + } + { + V T7, Ti, Tn, Tq; + T7 = VADD(T1, T6); + Ti = VADD(Tc, Th); + ST(&(ri[WS(rs, 2)]), VSUB(T7, Ti), ms, &(ri[0])); + ST(&(ri[0]), VADD(T7, Ti), ms, &(ri[0])); + Tn = VADD(Tk, Tl); + Tq = VADD(To, Tp); + ST(&(ii[0]), VADD(Tn, Tq), ms, &(ii[0])); + ST(&(ii[WS(rs, 2)]), VSUB(Tq, Tn), ms, &(ii[0])); + } + { + V Tj, Tm, Tr, Ts; + Tj = VSUB(T1, T6); + Tm = VSUB(Tk, Tl); + ST(&(ri[WS(rs, 3)]), VSUB(Tj, Tm), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 1)]), VADD(Tj, Tm), ms, &(ri[WS(rs, 1)])); + Tr = VSUB(Tp, To); + Ts = VSUB(Tc, Th); + ST(&(ii[WS(rs, 1)]), VSUB(Tr, Ts), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 3)]), VADD(Ts, Tr), ms, &(ii[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t1sv_4"), twinstr, &GENUS, {16, 6, 6, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1sv_4) (planner *p) { + X(kdft_dit_register) (p, t1sv_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t1sv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t1sv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,379 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:51 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t1sv_8 -include ts.h */ + +/* + * This function contains 66 FP additions, 36 FP multiplications, + * (or, 44 additions, 14 multiplications, 22 fused multiply/add), + * 59 stack variables, 1 constants, and 32 memory accesses + */ +#include "ts.h" + +static void t1sv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 14); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 14), MAKE_VOLATILE_STRIDE(16, rs)) { + V T1, T1m, T1l, T7, TS, Tk, TQ, Te, To, Tr, Tu, T14, TF, Tx, T16; + V TL, Tt, TW, Tp, Tq, Tw; + { + V T3, T6, T2, T5; + T1 = LD(&(ri[0]), ms, &(ri[0])); + T1m = LD(&(ii[0]), ms, &(ii[0])); + T3 = LD(&(ri[WS(rs, 4)]), ms, &(ri[0])); + T6 = LD(&(ii[WS(rs, 4)]), ms, &(ii[0])); + T2 = LDW(&(W[TWVL * 6])); + T5 = LDW(&(W[TWVL * 7])); + { + V Tg, Tj, Ti, Ta, Td, T1k, T4, T9, Tc, TR, Th, Tf; + Tg = LD(&(ri[WS(rs, 6)]), ms, &(ri[0])); + Tj = LD(&(ii[WS(rs, 6)]), ms, &(ii[0])); + Tf = LDW(&(W[TWVL * 10])); + Ti = LDW(&(W[TWVL * 11])); + Ta = LD(&(ri[WS(rs, 2)]), ms, &(ri[0])); + Td = LD(&(ii[WS(rs, 2)]), ms, &(ii[0])); + T1k = VMUL(T2, T6); + T4 = VMUL(T2, T3); + T9 = LDW(&(W[TWVL * 2])); + Tc = LDW(&(W[TWVL * 3])); + TR = VMUL(Tf, Tj); + Th = VMUL(Tf, Tg); + { + V TB, TE, TH, TK, TG, TD, TJ, T13, TC, TA, TP, Tb, T15, TI, Tn; + TB = LD(&(ri[WS(rs, 7)]), ms, &(ri[WS(rs, 1)])); + TE = LD(&(ii[WS(rs, 7)]), ms, &(ii[WS(rs, 1)])); + T1l = VFNMS(T5, T3, T1k); + T7 = VFMA(T5, T6, T4); + TP = VMUL(T9, Td); + Tb = VMUL(T9, Ta); + TS = VFNMS(Ti, Tg, TR); + Tk = VFMA(Ti, Tj, Th); + TA = LDW(&(W[TWVL * 12])); + TH = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)])); + TK = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)])); + TG = LDW(&(W[TWVL * 4])); + TQ = VFNMS(Tc, Ta, TP); + Te = VFMA(Tc, Td, Tb); + TD = LDW(&(W[TWVL * 13])); + TJ = LDW(&(W[TWVL * 5])); + T13 = VMUL(TA, TE); + TC = VMUL(TA, TB); + To = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + T15 = VMUL(TG, TK); + TI = VMUL(TG, TH); + Tr = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + Tn = LDW(&(W[0])); + Tu = LD(&(ri[WS(rs, 5)]), ms, &(ri[WS(rs, 1)])); + T14 = VFNMS(TD, TB, T13); + TF = VFMA(TD, TE, TC); + Tx = LD(&(ii[WS(rs, 5)]), ms, &(ii[WS(rs, 1)])); + T16 = VFNMS(TJ, TH, T15); + TL = VFMA(TJ, TK, TI); + Tt = LDW(&(W[TWVL * 8])); + TW = VMUL(Tn, Tr); + Tp = VMUL(Tn, To); + Tq = LDW(&(W[TWVL * 1])); + Tw = LDW(&(W[TWVL * 9])); + } + } + } + { + V T8, T1g, TM, T1j, TX, Ts, T1n, T1r, T1s, Tl, T1c, T18, TZ, Ty, T1a; + V TU; + { + V TO, T17, T12, TY, Tv, TT; + T8 = VADD(T1, T7); + TO = VSUB(T1, T7); + T17 = VSUB(T14, T16); + T1g = VADD(T14, T16); + TM = VADD(TF, TL); + T12 = VSUB(TF, TL); + TY = VMUL(Tt, Tx); + Tv = VMUL(Tt, Tu); + TT = VSUB(TQ, TS); + T1j = VADD(TQ, TS); + TX = VFNMS(Tq, To, TW); + Ts = VFMA(Tq, Tr, Tp); + T1n = VADD(T1l, T1m); + T1r = VSUB(T1m, T1l); + T1s = VSUB(Te, Tk); + Tl = VADD(Te, Tk); + T1c = VADD(T12, T17); + T18 = VSUB(T12, T17); + TZ = VFNMS(Tw, Tu, TY); + Ty = VFMA(Tw, Tx, Tv); + T1a = VSUB(TO, TT); + TU = VADD(TO, TT); + } + { + V T1v, T1t, Tm, T1e, T1o, T1q, TN, T1p, T1d, T1u, T19, T1w, T1i, T1h; + { + V T10, T1f, Tz, TV, T11, T1b; + T1v = VADD(T1s, T1r); + T1t = VSUB(T1r, T1s); + T10 = VSUB(TX, TZ); + T1f = VADD(TX, TZ); + Tz = VADD(Ts, Ty); + TV = VSUB(Ts, Ty); + T11 = VADD(TV, T10); + T1b = VSUB(T10, TV); + Tm = VADD(T8, Tl); + T1e = VSUB(T8, Tl); + T1o = VADD(T1j, T1n); + T1q = VSUB(T1n, T1j); + TN = VADD(Tz, TM); + T1p = VSUB(TM, Tz); + T1d = VSUB(T1b, T1c); + T1u = VADD(T1b, T1c); + T19 = VADD(T11, T18); + T1w = VSUB(T18, T11); + T1i = VADD(T1f, T1g); + T1h = VSUB(T1f, T1g); + } + ST(&(ii[WS(rs, 6)]), VSUB(T1q, T1p), ms, &(ii[0])); + ST(&(ri[0]), VADD(Tm, TN), ms, &(ri[0])); + ST(&(ri[WS(rs, 4)]), VSUB(Tm, TN), ms, &(ri[0])); + ST(&(ii[WS(rs, 1)]), VFMA(LDK(KP707106781), T1u, T1t), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 5)]), VFNMS(LDK(KP707106781), T1u, T1t), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 3)]), VFMA(LDK(KP707106781), T1d, T1a), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 7)]), VFNMS(LDK(KP707106781), T1d, T1a), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 3)]), VFMA(LDK(KP707106781), T1w, T1v), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 7)]), VFNMS(LDK(KP707106781), T1w, T1v), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 1)]), VFMA(LDK(KP707106781), T19, TU), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 5)]), VFNMS(LDK(KP707106781), T19, TU), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 6)]), VSUB(T1e, T1h), ms, &(ri[0])); + ST(&(ii[0]), VADD(T1i, T1o), ms, &(ii[0])); + ST(&(ii[WS(rs, 4)]), VSUB(T1o, T1i), ms, &(ii[0])); + ST(&(ri[WS(rs, 2)]), VADD(T1e, T1h), ms, &(ri[0])); + ST(&(ii[WS(rs, 2)]), VADD(T1p, T1q), ms, &(ii[0])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t1sv_8"), twinstr, &GENUS, {44, 14, 22, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1sv_8) (planner *p) { + X(kdft_dit_register) (p, t1sv_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t1sv_8 -include ts.h */ + +/* + * This function contains 66 FP additions, 32 FP multiplications, + * (or, 52 additions, 18 multiplications, 14 fused multiply/add), + * 28 stack variables, 1 constants, and 32 memory accesses + */ +#include "ts.h" + +static void t1sv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 14); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 14), MAKE_VOLATILE_STRIDE(16, rs)) { + V T7, T1e, TH, T19, TF, T13, TR, TU, Ti, T1f, TK, T16, Tu, T12, TM; + V TP; + { + V T1, T18, T6, T17; + T1 = LD(&(ri[0]), ms, &(ri[0])); + T18 = LD(&(ii[0]), ms, &(ii[0])); + { + V T3, T5, T2, T4; + T3 = LD(&(ri[WS(rs, 4)]), ms, &(ri[0])); + T5 = LD(&(ii[WS(rs, 4)]), ms, &(ii[0])); + T2 = LDW(&(W[TWVL * 6])); + T4 = LDW(&(W[TWVL * 7])); + T6 = VFMA(T2, T3, VMUL(T4, T5)); + T17 = VFNMS(T4, T3, VMUL(T2, T5)); + } + T7 = VADD(T1, T6); + T1e = VSUB(T18, T17); + TH = VSUB(T1, T6); + T19 = VADD(T17, T18); + } + { + V Tz, TS, TE, TT; + { + V Tw, Ty, Tv, Tx; + Tw = LD(&(ri[WS(rs, 7)]), ms, &(ri[WS(rs, 1)])); + Ty = LD(&(ii[WS(rs, 7)]), ms, &(ii[WS(rs, 1)])); + Tv = LDW(&(W[TWVL * 12])); + Tx = LDW(&(W[TWVL * 13])); + Tz = VFMA(Tv, Tw, VMUL(Tx, Ty)); + TS = VFNMS(Tx, Tw, VMUL(Tv, Ty)); + } + { + V TB, TD, TA, TC; + TB = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)])); + TD = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)])); + TA = LDW(&(W[TWVL * 4])); + TC = LDW(&(W[TWVL * 5])); + TE = VFMA(TA, TB, VMUL(TC, TD)); + TT = VFNMS(TC, TB, VMUL(TA, TD)); + } + TF = VADD(Tz, TE); + T13 = VADD(TS, TT); + TR = VSUB(Tz, TE); + TU = VSUB(TS, TT); + } + { + V Tc, TI, Th, TJ; + { + V T9, Tb, T8, Ta; + T9 = LD(&(ri[WS(rs, 2)]), ms, &(ri[0])); + Tb = LD(&(ii[WS(rs, 2)]), ms, &(ii[0])); + T8 = LDW(&(W[TWVL * 2])); + Ta = LDW(&(W[TWVL * 3])); + Tc = VFMA(T8, T9, VMUL(Ta, Tb)); + TI = VFNMS(Ta, T9, VMUL(T8, Tb)); + } + { + V Te, Tg, Td, Tf; + Te = LD(&(ri[WS(rs, 6)]), ms, &(ri[0])); + Tg = LD(&(ii[WS(rs, 6)]), ms, &(ii[0])); + Td = LDW(&(W[TWVL * 10])); + Tf = LDW(&(W[TWVL * 11])); + Th = VFMA(Td, Te, VMUL(Tf, Tg)); + TJ = VFNMS(Tf, Te, VMUL(Td, Tg)); + } + Ti = VADD(Tc, Th); + T1f = VSUB(Tc, Th); + TK = VSUB(TI, TJ); + T16 = VADD(TI, TJ); + } + { + V To, TN, Tt, TO; + { + V Tl, Tn, Tk, Tm; + Tl = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + Tn = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + Tk = LDW(&(W[0])); + Tm = LDW(&(W[TWVL * 1])); + To = VFMA(Tk, Tl, VMUL(Tm, Tn)); + TN = VFNMS(Tm, Tl, VMUL(Tk, Tn)); + } + { + V Tq, Ts, Tp, Tr; + Tq = LD(&(ri[WS(rs, 5)]), ms, &(ri[WS(rs, 1)])); + Ts = LD(&(ii[WS(rs, 5)]), ms, &(ii[WS(rs, 1)])); + Tp = LDW(&(W[TWVL * 8])); + Tr = LDW(&(W[TWVL * 9])); + Tt = VFMA(Tp, Tq, VMUL(Tr, Ts)); + TO = VFNMS(Tr, Tq, VMUL(Tp, Ts)); + } + Tu = VADD(To, Tt); + T12 = VADD(TN, TO); + TM = VSUB(To, Tt); + TP = VSUB(TN, TO); + } + { + V Tj, TG, T1b, T1c; + Tj = VADD(T7, Ti); + TG = VADD(Tu, TF); + ST(&(ri[WS(rs, 4)]), VSUB(Tj, TG), ms, &(ri[0])); + ST(&(ri[0]), VADD(Tj, TG), ms, &(ri[0])); + { + V T15, T1a, T11, T14; + T15 = VADD(T12, T13); + T1a = VADD(T16, T19); + ST(&(ii[0]), VADD(T15, T1a), ms, &(ii[0])); + ST(&(ii[WS(rs, 4)]), VSUB(T1a, T15), ms, &(ii[0])); + T11 = VSUB(T7, Ti); + T14 = VSUB(T12, T13); + ST(&(ri[WS(rs, 6)]), VSUB(T11, T14), ms, &(ri[0])); + ST(&(ri[WS(rs, 2)]), VADD(T11, T14), ms, &(ri[0])); + } + T1b = VSUB(TF, Tu); + T1c = VSUB(T19, T16); + ST(&(ii[WS(rs, 2)]), VADD(T1b, T1c), ms, &(ii[0])); + ST(&(ii[WS(rs, 6)]), VSUB(T1c, T1b), ms, &(ii[0])); + { + V TX, T1g, T10, T1d, TY, TZ; + TX = VSUB(TH, TK); + T1g = VSUB(T1e, T1f); + TY = VSUB(TP, TM); + TZ = VADD(TR, TU); + T10 = VMUL(LDK(KP707106781), VSUB(TY, TZ)); + T1d = VMUL(LDK(KP707106781), VADD(TY, TZ)); + ST(&(ri[WS(rs, 7)]), VSUB(TX, T10), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 5)]), VSUB(T1g, T1d), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 3)]), VADD(TX, T10), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 1)]), VADD(T1d, T1g), ms, &(ii[WS(rs, 1)])); + } + { + V TL, T1i, TW, T1h, TQ, TV; + TL = VADD(TH, TK); + T1i = VADD(T1f, T1e); + TQ = VADD(TM, TP); + TV = VSUB(TR, TU); + TW = VMUL(LDK(KP707106781), VADD(TQ, TV)); + T1h = VMUL(LDK(KP707106781), VSUB(TV, TQ)); + ST(&(ri[WS(rs, 5)]), VSUB(TL, TW), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 7)]), VSUB(T1i, T1h), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 1)]), VADD(TL, TW), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 3)]), VADD(T1h, T1i), ms, &(ii[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t1sv_8"), twinstr, &GENUS, {52, 18, 14, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t1sv_8) (planner *p) { + X(kdft_dit_register) (p, t1sv_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:45 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name t2bv_10 -include t2b.h -sign 1 */ + +/* + * This function contains 51 FP additions, 40 FP multiplications, + * (or, 33 additions, 22 multiplications, 18 fused multiply/add), + * 43 stack variables, 4 constants, and 20 memory accesses + */ +#include "t2b.h" + +static void t2bv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 18)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(10, rs)) { + V Td, TA, T4, Ta, Tk, TE, Tp, TF, TB, T9, T1, T2, Tb; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V Tg, Tn, Ti, Tl; + Tg = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tn = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Ti = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tl = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + { + V T6, T8, T5, Tc; + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + { + V T3, Th, To, Tj, Tm, T7; + T7 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[TWVL * 8]), T2); + Th = BYTW(&(W[TWVL * 6]), Tg); + To = BYTW(&(W[0]), Tn); + Tj = BYTW(&(W[TWVL * 16]), Ti); + Tm = BYTW(&(W[TWVL * 10]), Tl); + T6 = BYTW(&(W[TWVL * 2]), T5); + Td = BYTW(&(W[TWVL * 4]), Tc); + T8 = BYTW(&(W[TWVL * 12]), T7); + TA = VADD(T1, T3); + T4 = VSUB(T1, T3); + Ta = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Tk = VSUB(Th, Tj); + TE = VADD(Th, Tj); + Tp = VSUB(Tm, To); + TF = VADD(Tm, To); + } + TB = VADD(T6, T8); + T9 = VSUB(T6, T8); + } + } + Tb = BYTW(&(W[TWVL * 14]), Ta); + { + V TL, TG, Tw, Tq, TC, Te; + TL = VSUB(TE, TF); + TG = VADD(TE, TF); + Tw = VSUB(Tk, Tp); + Tq = VADD(Tk, Tp); + TC = VADD(Tb, Td); + Te = VSUB(Tb, Td); + { + V TM, TD, Tv, Tf; + TM = VSUB(TB, TC); + TD = VADD(TB, TC); + Tv = VSUB(T9, Te); + Tf = VADD(T9, Te); + { + V TP, TN, TH, TJ, Tz, Tx, Tr, Tt, TI, Ts; + TP = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TL, TM)); + TN = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TM, TL)); + TH = VADD(TD, TG); + TJ = VSUB(TD, TG); + Tz = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tv, Tw)); + Tx = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tw, Tv)); + Tr = VADD(Tf, Tq); + Tt = VSUB(Tf, Tq); + ST(&(x[0]), VADD(TA, TH), ms, &(x[0])); + TI = VFNMS(LDK(KP250000000), TH, TA); + ST(&(x[WS(rs, 5)]), VADD(T4, Tr), ms, &(x[WS(rs, 1)])); + Ts = VFNMS(LDK(KP250000000), Tr, T4); + { + V TK, TO, Tu, Ty; + TK = VFNMS(LDK(KP559016994), TJ, TI); + TO = VFMA(LDK(KP559016994), TJ, TI); + Tu = VFMA(LDK(KP559016994), Tt, Ts); + Ty = VFNMS(LDK(KP559016994), Tt, Ts); + ST(&(x[WS(rs, 8)]), VFMAI(TN, TK), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFNMSI(TN, TK), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFMAI(TP, TO), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFNMSI(TP, TO), ms, &(x[0])); + ST(&(x[WS(rs, 9)]), VFNMSI(Tx, Tu), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(Tx, Tu), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(Tz, Ty), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFMAI(Tz, Ty), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 10, XSIMD_STRING("t2bv_10"), twinstr, &GENUS, {33, 22, 18, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_10) (planner *p) { + X(kdft_dit_register) (p, t2bv_10, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name t2bv_10 -include t2b.h -sign 1 */ + +/* + * This function contains 51 FP additions, 30 FP multiplications, + * (or, 45 additions, 24 multiplications, 6 fused multiply/add), + * 32 stack variables, 4 constants, and 20 memory accesses + */ +#include "t2b.h" + +static void t2bv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 18)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(10, rs)) { + V Tu, TH, Tg, Tl, Tp, TD, TE, TJ, T5, Ta, To, TA, TB, TI, Tr; + V Tt, Ts; + Tr = LD(&(x[0]), ms, &(x[0])); + Ts = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tt = BYTW(&(W[TWVL * 8]), Ts); + Tu = VSUB(Tr, Tt); + TH = VADD(Tr, Tt); + { + V Td, Tk, Tf, Ti; + { + V Tc, Tj, Te, Th; + Tc = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Td = BYTW(&(W[TWVL * 6]), Tc); + Tj = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Tk = BYTW(&(W[0]), Tj); + Te = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tf = BYTW(&(W[TWVL * 16]), Te); + Th = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Ti = BYTW(&(W[TWVL * 10]), Th); + } + Tg = VSUB(Td, Tf); + Tl = VSUB(Ti, Tk); + Tp = VADD(Tg, Tl); + TD = VADD(Td, Tf); + TE = VADD(Ti, Tk); + TJ = VADD(TD, TE); + } + { + V T2, T9, T4, T7; + { + V T1, T8, T3, T6; + T1 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T2 = BYTW(&(W[TWVL * 2]), T1); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[TWVL * 4]), T8); + T3 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T4 = BYTW(&(W[TWVL * 12]), T3); + T6 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T7 = BYTW(&(W[TWVL * 14]), T6); + } + T5 = VSUB(T2, T4); + Ta = VSUB(T7, T9); + To = VADD(T5, Ta); + TA = VADD(T2, T4); + TB = VADD(T7, T9); + TI = VADD(TA, TB); + } + { + V Tq, Tv, Tw, Tn, Tz, Tb, Tm, Ty, Tx; + Tq = VMUL(LDK(KP559016994), VSUB(To, Tp)); + Tv = VADD(To, Tp); + Tw = VFNMS(LDK(KP250000000), Tv, Tu); + Tb = VSUB(T5, Ta); + Tm = VSUB(Tg, Tl); + Tn = VBYI(VFMA(LDK(KP951056516), Tb, VMUL(LDK(KP587785252), Tm))); + Tz = VBYI(VFNMS(LDK(KP951056516), Tm, VMUL(LDK(KP587785252), Tb))); + ST(&(x[WS(rs, 5)]), VADD(Tu, Tv), ms, &(x[WS(rs, 1)])); + Ty = VSUB(Tw, Tq); + ST(&(x[WS(rs, 3)]), VSUB(Ty, Tz), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(Tz, Ty), ms, &(x[WS(rs, 1)])); + Tx = VADD(Tq, Tw); + ST(&(x[WS(rs, 1)]), VADD(Tn, Tx), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VSUB(Tx, Tn), ms, &(x[WS(rs, 1)])); + } + { + V TM, TK, TL, TG, TP, TC, TF, TO, TN; + TM = VMUL(LDK(KP559016994), VSUB(TI, TJ)); + TK = VADD(TI, TJ); + TL = VFNMS(LDK(KP250000000), TK, TH); + TC = VSUB(TA, TB); + TF = VSUB(TD, TE); + TG = VBYI(VFNMS(LDK(KP951056516), TF, VMUL(LDK(KP587785252), TC))); + TP = VBYI(VFMA(LDK(KP951056516), TC, VMUL(LDK(KP587785252), TF))); + ST(&(x[0]), VADD(TH, TK), ms, &(x[0])); + TO = VADD(TM, TL); + ST(&(x[WS(rs, 4)]), VSUB(TO, TP), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VADD(TP, TO), ms, &(x[0])); + TN = VSUB(TL, TM); + ST(&(x[WS(rs, 2)]), VADD(TG, TN), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VSUB(TN, TG), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 10, XSIMD_STRING("t2bv_10"), twinstr, &GENUS, {45, 24, 6, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_10) (planner *p) { + X(kdft_dit_register) (p, t2bv_10, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,418 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:40 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name t2bv_16 -include t2b.h -sign 1 */ + +/* + * This function contains 87 FP additions, 64 FP multiplications, + * (or, 53 additions, 30 multiplications, 34 fused multiply/add), + * 61 stack variables, 3 constants, and 32 memory accesses + */ +#include "t2b.h" + +static void t2bv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 30)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 30), MAKE_VOLATILE_STRIDE(16, rs)) { + V TO, Ta, TJ, TP, T14, Tq, T1i, T10, T1b, T1l, T13, T1c, TR, Tl, T15; + V Tv; + { + V Tc, TW, T4, T19, T9, TD, TI, Tj, TZ, T1a, Te, Th, Tn, Tr, Tu; + V Tp; + { + V T1, T2, T5, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + { + V Tz, TG, TB, TE; + Tz = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + TG = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + TB = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TE = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + { + V Ti, TX, TY, Td, Tg, Tm, Tt, To; + { + V T3, T6, T8, TA, TH, TC, TF, Tb; + Tb = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[TWVL * 14]), T2); + T6 = BYTW(&(W[TWVL * 6]), T5); + T8 = BYTW(&(W[TWVL * 22]), T7); + TA = BYTW(&(W[TWVL * 2]), Tz); + TH = BYTW(&(W[TWVL * 10]), TG); + TC = BYTW(&(W[TWVL * 18]), TB); + TF = BYTW(&(W[TWVL * 26]), TE); + Tc = BYTW(&(W[0]), Tb); + TW = VSUB(T1, T3); + T4 = VADD(T1, T3); + T19 = VSUB(T6, T8); + T9 = VADD(T6, T8); + Ti = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TD = VADD(TA, TC); + TX = VSUB(TA, TC); + TI = VADD(TF, TH); + TY = VSUB(TF, TH); + } + Td = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tg = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tm = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + Tj = BYTW(&(W[TWVL * 24]), Ti); + Tt = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + To = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + TZ = VADD(TX, TY); + T1a = VSUB(TX, TY); + Te = BYTW(&(W[TWVL * 16]), Td); + Th = BYTW(&(W[TWVL * 8]), Tg); + Tn = BYTW(&(W[TWVL * 28]), Tm); + Tr = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Tu = BYTW(&(W[TWVL * 20]), Tt); + Tp = BYTW(&(W[TWVL * 12]), To); + } + } + } + { + V Tf, T11, Tk, T12, Ts; + TO = VADD(T4, T9); + Ta = VSUB(T4, T9); + TJ = VSUB(TD, TI); + TP = VADD(TD, TI); + Tf = VADD(Tc, Te); + T11 = VSUB(Tc, Te); + Tk = VADD(Th, Tj); + T12 = VSUB(Th, Tj); + Ts = BYTW(&(W[TWVL * 4]), Tr); + T14 = VSUB(Tn, Tp); + Tq = VADD(Tn, Tp); + T1i = VFNMS(LDK(KP707106781), TZ, TW); + T10 = VFMA(LDK(KP707106781), TZ, TW); + T1b = VFMA(LDK(KP707106781), T1a, T19); + T1l = VFNMS(LDK(KP707106781), T1a, T19); + T13 = VFNMS(LDK(KP414213562), T12, T11); + T1c = VFMA(LDK(KP414213562), T11, T12); + TR = VADD(Tf, Tk); + Tl = VSUB(Tf, Tk); + T15 = VSUB(Tu, Ts); + Tv = VADD(Ts, Tu); + } + } + { + V T1d, T16, TS, Tw, TU, TQ; + T1d = VFMA(LDK(KP414213562), T14, T15); + T16 = VFNMS(LDK(KP414213562), T15, T14); + TS = VADD(Tq, Tv); + Tw = VSUB(Tq, Tv); + TU = VADD(TO, TP); + TQ = VSUB(TO, TP); + { + V T1e, T1j, T17, T1m; + T1e = VSUB(T1c, T1d); + T1j = VADD(T1c, T1d); + T17 = VADD(T13, T16); + T1m = VSUB(T13, T16); + { + V TV, TT, TK, Tx; + TV = VADD(TR, TS); + TT = VSUB(TR, TS); + TK = VSUB(Tl, Tw); + Tx = VADD(Tl, Tw); + { + V T1h, T1f, T1o, T1k; + T1h = VFMA(LDK(KP923879532), T1e, T1b); + T1f = VFNMS(LDK(KP923879532), T1e, T1b); + T1o = VFMA(LDK(KP923879532), T1j, T1i); + T1k = VFNMS(LDK(KP923879532), T1j, T1i); + { + V T1g, T18, T1p, T1n; + T1g = VFMA(LDK(KP923879532), T17, T10); + T18 = VFNMS(LDK(KP923879532), T17, T10); + T1p = VFNMS(LDK(KP923879532), T1m, T1l); + T1n = VFMA(LDK(KP923879532), T1m, T1l); + ST(&(x[WS(rs, 8)]), VSUB(TU, TV), ms, &(x[0])); + ST(&(x[0]), VADD(TU, TV), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(TT, TQ), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFNMSI(TT, TQ), ms, &(x[0])); + { + V TN, TL, TM, Ty; + TN = VFMA(LDK(KP707106781), TK, TJ); + TL = VFNMS(LDK(KP707106781), TK, TJ); + TM = VFMA(LDK(KP707106781), Tx, Ta); + Ty = VFNMS(LDK(KP707106781), Tx, Ta); + ST(&(x[WS(rs, 15)]), VFNMSI(T1h, T1g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(T1h, T1g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFMAI(T1f, T18), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(T1f, T18), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(T1p, T1o), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFMAI(T1p, T1o), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFNMSI(T1n, T1k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFMAI(T1n, T1k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VFMAI(TN, TM), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFNMSI(TN, TM), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFMAI(TL, Ty), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(TL, Ty), ms, &(x[0])); + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 16, XSIMD_STRING("t2bv_16"), twinstr, &GENUS, {53, 30, 34, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_16) (planner *p) { + X(kdft_dit_register) (p, t2bv_16, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name t2bv_16 -include t2b.h -sign 1 */ + +/* + * This function contains 87 FP additions, 42 FP multiplications, + * (or, 83 additions, 38 multiplications, 4 fused multiply/add), + * 36 stack variables, 3 constants, and 32 memory accesses + */ +#include "t2b.h" + +static void t2bv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 30)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 30), MAKE_VOLATILE_STRIDE(16, rs)) { + V TJ, T1b, TD, T1c, T17, T18, Ty, TK, T10, T11, T12, Tb, TM, T13, T14; + V T15, Tm, TN, TG, TI, TH; + TG = LD(&(x[0]), ms, &(x[0])); + TH = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + TI = BYTW(&(W[TWVL * 14]), TH); + TJ = VSUB(TG, TI); + T1b = VADD(TG, TI); + { + V TA, TC, Tz, TB; + Tz = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + TA = BYTW(&(W[TWVL * 6]), Tz); + TB = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + TC = BYTW(&(W[TWVL * 22]), TB); + TD = VSUB(TA, TC); + T1c = VADD(TA, TC); + } + { + V Tp, Tw, Tr, Tu, Ts, Tx; + { + V To, Tv, Tq, Tt; + To = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tp = BYTW(&(W[TWVL * 2]), To); + Tv = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tw = BYTW(&(W[TWVL * 10]), Tv); + Tq = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + Tr = BYTW(&(W[TWVL * 18]), Tq); + Tt = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + Tu = BYTW(&(W[TWVL * 26]), Tt); + } + T17 = VADD(Tp, Tr); + T18 = VADD(Tu, Tw); + Ts = VSUB(Tp, Tr); + Tx = VSUB(Tu, Tw); + Ty = VMUL(LDK(KP707106781), VSUB(Ts, Tx)); + TK = VMUL(LDK(KP707106781), VADD(Ts, Tx)); + } + { + V T2, T9, T4, T7, T5, Ta; + { + V T1, T8, T3, T6; + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = BYTW(&(W[0]), T1); + T8 = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[TWVL * 24]), T8); + T3 = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T4 = BYTW(&(W[TWVL * 16]), T3); + T6 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T7 = BYTW(&(W[TWVL * 8]), T6); + } + T10 = VADD(T2, T4); + T11 = VADD(T7, T9); + T12 = VSUB(T10, T11); + T5 = VSUB(T2, T4); + Ta = VSUB(T7, T9); + Tb = VFNMS(LDK(KP382683432), Ta, VMUL(LDK(KP923879532), T5)); + TM = VFMA(LDK(KP382683432), T5, VMUL(LDK(KP923879532), Ta)); + } + { + V Td, Tk, Tf, Ti, Tg, Tl; + { + V Tc, Tj, Te, Th; + Tc = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + Td = BYTW(&(W[TWVL * 28]), Tc); + Tj = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tk = BYTW(&(W[TWVL * 20]), Tj); + Te = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Tf = BYTW(&(W[TWVL * 12]), Te); + Th = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Ti = BYTW(&(W[TWVL * 4]), Th); + } + T13 = VADD(Td, Tf); + T14 = VADD(Ti, Tk); + T15 = VSUB(T13, T14); + Tg = VSUB(Td, Tf); + Tl = VSUB(Ti, Tk); + Tm = VFMA(LDK(KP923879532), Tg, VMUL(LDK(KP382683432), Tl)); + TN = VFNMS(LDK(KP382683432), Tg, VMUL(LDK(KP923879532), Tl)); + } + { + V T1a, T1g, T1f, T1h; + { + V T16, T19, T1d, T1e; + T16 = VMUL(LDK(KP707106781), VSUB(T12, T15)); + T19 = VSUB(T17, T18); + T1a = VBYI(VSUB(T16, T19)); + T1g = VBYI(VADD(T19, T16)); + T1d = VSUB(T1b, T1c); + T1e = VMUL(LDK(KP707106781), VADD(T12, T15)); + T1f = VSUB(T1d, T1e); + T1h = VADD(T1d, T1e); + } + ST(&(x[WS(rs, 6)]), VADD(T1a, T1f), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VSUB(T1h, T1g), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VSUB(T1f, T1a), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T1g, T1h), ms, &(x[0])); + } + { + V T1k, T1o, T1n, T1p; + { + V T1i, T1j, T1l, T1m; + T1i = VADD(T1b, T1c); + T1j = VADD(T17, T18); + T1k = VSUB(T1i, T1j); + T1o = VADD(T1i, T1j); + T1l = VADD(T10, T11); + T1m = VADD(T13, T14); + T1n = VBYI(VSUB(T1l, T1m)); + T1p = VADD(T1l, T1m); + } + ST(&(x[WS(rs, 12)]), VSUB(T1k, T1n), ms, &(x[0])); + ST(&(x[0]), VADD(T1o, T1p), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(T1k, T1n), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VSUB(T1o, T1p), ms, &(x[0])); + } + { + V TF, TQ, TP, TR; + { + V Tn, TE, TL, TO; + Tn = VSUB(Tb, Tm); + TE = VSUB(Ty, TD); + TF = VBYI(VSUB(Tn, TE)); + TQ = VBYI(VADD(TE, Tn)); + TL = VSUB(TJ, TK); + TO = VSUB(TM, TN); + TP = VSUB(TL, TO); + TR = VADD(TL, TO); + } + ST(&(x[WS(rs, 5)]), VADD(TF, TP), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VSUB(TR, TQ), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VSUB(TP, TF), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(TQ, TR), ms, &(x[WS(rs, 1)])); + } + { + V TU, TY, TX, TZ; + { + V TS, TT, TV, TW; + TS = VADD(TJ, TK); + TT = VADD(Tb, Tm); + TU = VADD(TS, TT); + TY = VSUB(TS, TT); + TV = VADD(TD, Ty); + TW = VADD(TM, TN); + TX = VBYI(VADD(TV, TW)); + TZ = VBYI(VSUB(TW, TV)); + } + ST(&(x[WS(rs, 15)]), VSUB(TU, TX), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(TY, TZ), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(TU, TX), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VSUB(TY, TZ), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 16, XSIMD_STRING("t2bv_16"), twinstr, &GENUS, {83, 38, 4, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_16) (planner *p) { + X(kdft_dit_register) (p, t2bv_16, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:39 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t2bv_2 -include t2b.h -sign 1 */ + +/* + * This function contains 3 FP additions, 2 FP multiplications, + * (or, 3 additions, 2 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 4 memory accesses + */ +#include "t2b.h" + +static void t2bv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(2, rs)) { + V T1, T2, T3; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[0]), T2); + ST(&(x[0]), VADD(T1, T3), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VSUB(T1, T3), ms, &(x[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("t2bv_2"), twinstr, &GENUS, {3, 2, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_2) (planner *p) { + X(kdft_dit_register) (p, t2bv_2, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t2bv_2 -include t2b.h -sign 1 */ + +/* + * This function contains 3 FP additions, 2 FP multiplications, + * (or, 3 additions, 2 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 4 memory accesses + */ +#include "t2b.h" + +static void t2bv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(2, rs)) { + V T1, T3, T2; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[0]), T2); + ST(&(x[WS(rs, 1)]), VSUB(T1, T3), ms, &(x[WS(rs, 1)])); + ST(&(x[0]), VADD(T1, T3), ms, &(x[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("t2bv_2"), twinstr, &GENUS, {3, 2, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_2) (planner *p) { + X(kdft_dit_register) (p, t2bv_2, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,519 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:46 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 20 -name t2bv_20 -include t2b.h -sign 1 */ + +/* + * This function contains 123 FP additions, 88 FP multiplications, + * (or, 77 additions, 42 multiplications, 46 fused multiply/add), + * 68 stack variables, 4 constants, and 40 memory accesses + */ +#include "t2b.h" + +static void t2bv_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 38)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 38), MAKE_VOLATILE_STRIDE(20, rs)) { + V T4, TX, T1m, T1K, T1y, Tk, Tf, T14, TQ, TZ, T1O, T1w, T1L, T1p, T1M; + V T1s, TF, TY, T1x, Tp; + { + V T1, TV, T2, TT; + T1 = LD(&(x[0]), ms, &(x[0])); + TV = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TT = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V T9, T1n, TK, T1v, TP, Te, T1q, T1u, TB, TD, Tm, T1o, Tz, Tn, T1r; + V TE, To; + { + V TM, TO, Ta, Tc; + { + V T5, T7, TG, TI, T1k, T1l; + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TG = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TI = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + { + V TW, T3, TU, T6, T8, TH, TJ, TL, TN; + TL = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TW = BYTW(&(W[TWVL * 28]), TV); + T3 = BYTW(&(W[TWVL * 18]), T2); + TU = BYTW(&(W[TWVL * 8]), TT); + T6 = BYTW(&(W[TWVL * 6]), T5); + T8 = BYTW(&(W[TWVL * 26]), T7); + TH = BYTW(&(W[TWVL * 24]), TG); + TJ = BYTW(&(W[TWVL * 4]), TI); + TM = BYTW(&(W[TWVL * 32]), TL); + TN = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T4 = VSUB(T1, T3); + T1k = VADD(T1, T3); + TX = VSUB(TU, TW); + T1l = VADD(TU, TW); + T9 = VSUB(T6, T8); + T1n = VADD(T6, T8); + TK = VSUB(TH, TJ); + T1v = VADD(TH, TJ); + TO = BYTW(&(W[TWVL * 12]), TN); + } + Ta = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T1m = VSUB(T1k, T1l); + T1K = VADD(T1k, T1l); + Tc = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + } + { + V Tb, Tx, Td, Th, Tj, Tw, Tg, Ti, Tv; + Tg = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Ti = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tv = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + TP = VSUB(TM, TO); + T1y = VADD(TM, TO); + Tb = BYTW(&(W[TWVL * 30]), Ta); + Tx = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + Td = BYTW(&(W[TWVL * 10]), Tc); + Th = BYTW(&(W[TWVL * 14]), Tg); + Tj = BYTW(&(W[TWVL * 34]), Ti); + Tw = BYTW(&(W[TWVL * 16]), Tv); + { + V TA, TC, Ty, Tl; + TA = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TC = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tl = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Ty = BYTW(&(W[TWVL * 36]), Tx); + Te = VSUB(Tb, Td); + T1q = VADD(Tb, Td); + Tk = VSUB(Th, Tj); + T1u = VADD(Th, Tj); + TB = BYTW(&(W[0]), TA); + TD = BYTW(&(W[TWVL * 20]), TC); + Tm = BYTW(&(W[TWVL * 22]), Tl); + T1o = VADD(Tw, Ty); + Tz = VSUB(Tw, Ty); + Tn = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + } + } + } + Tf = VADD(T9, Te); + T14 = VSUB(T9, Te); + TQ = VSUB(TK, TP); + TZ = VADD(TK, TP); + T1r = VADD(TB, TD); + TE = VSUB(TB, TD); + T1O = VADD(T1u, T1v); + T1w = VSUB(T1u, T1v); + To = BYTW(&(W[TWVL * 2]), Tn); + T1L = VADD(T1n, T1o); + T1p = VSUB(T1n, T1o); + T1M = VADD(T1q, T1r); + T1s = VSUB(T1q, T1r); + TF = VSUB(Tz, TE); + TY = VADD(Tz, TE); + T1x = VADD(Tm, To); + Tp = VSUB(Tm, To); + } + } + { + V T1V, T1N, T12, T1b, TR, T1G, T1t, T1z, T1P, Tq, T15, T11, T1j, T10; + T1V = VSUB(T1L, T1M); + T1N = VADD(T1L, T1M); + T12 = VSUB(TY, TZ); + T10 = VADD(TY, TZ); + T1b = VFNMS(LDK(KP618033988), TF, TQ); + TR = VFMA(LDK(KP618033988), TQ, TF); + T1G = VSUB(T1p, T1s); + T1t = VADD(T1p, T1s); + T1z = VSUB(T1x, T1y); + T1P = VADD(T1x, T1y); + Tq = VADD(Tk, Tp); + T15 = VSUB(Tk, Tp); + T11 = VFNMS(LDK(KP250000000), T10, TX); + T1j = VADD(TX, T10); + { + V T1J, T1H, T1D, T1Z, T1X, T1T, T1f, T1h, T19, T17, T1C, T1S, T1a, Tu, T1F; + V T1A; + T1F = VSUB(T1w, T1z); + T1A = VADD(T1w, T1z); + { + V T1W, T1Q, Tt, Tr; + T1W = VSUB(T1O, T1P); + T1Q = VADD(T1O, T1P); + Tt = VSUB(Tf, Tq); + Tr = VADD(Tf, Tq); + { + V T1e, T16, T1d, T13; + T1e = VFNMS(LDK(KP618033988), T14, T15); + T16 = VFMA(LDK(KP618033988), T15, T14); + T1d = VFNMS(LDK(KP559016994), T12, T11); + T13 = VFMA(LDK(KP559016994), T12, T11); + T1J = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1F, T1G)); + T1H = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1G, T1F)); + { + V T1B, T1R, Ts, T1i; + T1B = VADD(T1t, T1A); + T1D = VSUB(T1t, T1A); + T1Z = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1V, T1W)); + T1X = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1W, T1V)); + T1R = VADD(T1N, T1Q); + T1T = VSUB(T1N, T1Q); + Ts = VFNMS(LDK(KP250000000), Tr, T4); + T1i = VADD(T4, Tr); + T1f = VFNMS(LDK(KP951056516), T1e, T1d); + T1h = VFMA(LDK(KP951056516), T1e, T1d); + T19 = VFNMS(LDK(KP951056516), T16, T13); + T17 = VFMA(LDK(KP951056516), T16, T13); + ST(&(x[WS(rs, 10)]), VADD(T1m, T1B), ms, &(x[0])); + T1C = VFNMS(LDK(KP250000000), T1B, T1m); + ST(&(x[0]), VADD(T1K, T1R), ms, &(x[0])); + T1S = VFNMS(LDK(KP250000000), T1R, T1K); + T1a = VFNMS(LDK(KP559016994), Tt, Ts); + Tu = VFMA(LDK(KP559016994), Tt, Ts); + ST(&(x[WS(rs, 5)]), VFMAI(T1j, T1i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFNMSI(T1j, T1i), ms, &(x[WS(rs, 1)])); + } + } + } + { + V T1E, T1I, T1U, T1Y; + T1E = VFNMS(LDK(KP559016994), T1D, T1C); + T1I = VFMA(LDK(KP559016994), T1D, T1C); + T1U = VFMA(LDK(KP559016994), T1T, T1S); + T1Y = VFNMS(LDK(KP559016994), T1T, T1S); + { + V T1c, T1g, T18, TS; + T1c = VFMA(LDK(KP951056516), T1b, T1a); + T1g = VFNMS(LDK(KP951056516), T1b, T1a); + T18 = VFMA(LDK(KP951056516), TR, Tu); + TS = VFNMS(LDK(KP951056516), TR, Tu); + ST(&(x[WS(rs, 18)]), VFMAI(T1H, T1E), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFNMSI(T1H, T1E), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFNMSI(T1J, T1I), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFMAI(T1J, T1I), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VFMAI(T1X, T1U), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFNMSI(T1X, T1U), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFNMSI(T1Z, T1Y), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFMAI(T1Z, T1Y), ms, &(x[0])); + ST(&(x[WS(rs, 17)]), VFMAI(T1f, T1c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(T1f, T1c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFMAI(T1h, T1g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(T1h, T1g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFMAI(T19, T18), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFNMSI(T19, T18), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(T17, TS), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VFNMSI(T17, TS), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 20, XSIMD_STRING("t2bv_20"), twinstr, &GENUS, {77, 42, 46, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_20) (planner *p) { + X(kdft_dit_register) (p, t2bv_20, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 20 -name t2bv_20 -include t2b.h -sign 1 */ + +/* + * This function contains 123 FP additions, 62 FP multiplications, + * (or, 111 additions, 50 multiplications, 12 fused multiply/add), + * 54 stack variables, 4 constants, and 40 memory accesses + */ +#include "t2b.h" + +static void t2bv_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 38)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 38), MAKE_VOLATILE_STRIDE(20, rs)) { + V T4, T10, T1B, T1R, TF, T14, T15, TQ, Tf, Tq, Tr, T1N, T1O, T1P, T1t; + V T1w, T1D, TT, TU, T11, T1K, T1L, T1M, T1m, T1p, T1C, T1i, T1j; + { + V T1, TZ, T3, TX, TY, T2, TW, T1z, T1A; + T1 = LD(&(x[0]), ms, &(x[0])); + TY = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + TZ = BYTW(&(W[TWVL * 28]), TY); + T2 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T3 = BYTW(&(W[TWVL * 18]), T2); + TW = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + TX = BYTW(&(W[TWVL * 8]), TW); + T4 = VSUB(T1, T3); + T10 = VSUB(TX, TZ); + T1z = VADD(T1, T3); + T1A = VADD(TX, TZ); + T1B = VSUB(T1z, T1A); + T1R = VADD(T1z, T1A); + } + { + V T9, T1k, TK, T1s, TP, T1v, Te, T1n, Tk, T1r, Tz, T1l, TE, T1o, Tp; + V T1u; + { + V T6, T8, T5, T7; + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T6 = BYTW(&(W[TWVL * 6]), T5); + T7 = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T8 = BYTW(&(W[TWVL * 26]), T7); + T9 = VSUB(T6, T8); + T1k = VADD(T6, T8); + } + { + V TH, TJ, TG, TI; + TG = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TH = BYTW(&(W[TWVL * 24]), TG); + TI = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + TJ = BYTW(&(W[TWVL * 4]), TI); + TK = VSUB(TH, TJ); + T1s = VADD(TH, TJ); + } + { + V TM, TO, TL, TN; + TL = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TM = BYTW(&(W[TWVL * 32]), TL); + TN = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + TO = BYTW(&(W[TWVL * 12]), TN); + TP = VSUB(TM, TO); + T1v = VADD(TM, TO); + } + { + V Tb, Td, Ta, Tc; + Ta = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + Tb = BYTW(&(W[TWVL * 30]), Ta); + Tc = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Td = BYTW(&(W[TWVL * 10]), Tc); + Te = VSUB(Tb, Td); + T1n = VADD(Tb, Td); + } + { + V Th, Tj, Tg, Ti; + Tg = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Th = BYTW(&(W[TWVL * 14]), Tg); + Ti = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tj = BYTW(&(W[TWVL * 34]), Ti); + Tk = VSUB(Th, Tj); + T1r = VADD(Th, Tj); + } + { + V Tw, Ty, Tv, Tx; + Tv = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tw = BYTW(&(W[TWVL * 16]), Tv); + Tx = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + Ty = BYTW(&(W[TWVL * 36]), Tx); + Tz = VSUB(Tw, Ty); + T1l = VADD(Tw, Ty); + } + { + V TB, TD, TA, TC; + TA = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TB = BYTW(&(W[0]), TA); + TC = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + TD = BYTW(&(W[TWVL * 20]), TC); + TE = VSUB(TB, TD); + T1o = VADD(TB, TD); + } + { + V Tm, To, Tl, Tn; + Tl = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Tm = BYTW(&(W[TWVL * 22]), Tl); + Tn = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + To = BYTW(&(W[TWVL * 2]), Tn); + Tp = VSUB(Tm, To); + T1u = VADD(Tm, To); + } + TF = VSUB(Tz, TE); + T14 = VSUB(T9, Te); + T15 = VSUB(Tk, Tp); + TQ = VSUB(TK, TP); + Tf = VADD(T9, Te); + Tq = VADD(Tk, Tp); + Tr = VADD(Tf, Tq); + T1N = VADD(T1r, T1s); + T1O = VADD(T1u, T1v); + T1P = VADD(T1N, T1O); + T1t = VSUB(T1r, T1s); + T1w = VSUB(T1u, T1v); + T1D = VADD(T1t, T1w); + TT = VADD(Tz, TE); + TU = VADD(TK, TP); + T11 = VADD(TT, TU); + T1K = VADD(T1k, T1l); + T1L = VADD(T1n, T1o); + T1M = VADD(T1K, T1L); + T1m = VSUB(T1k, T1l); + T1p = VSUB(T1n, T1o); + T1C = VADD(T1m, T1p); + } + T1i = VADD(T4, Tr); + T1j = VBYI(VADD(T10, T11)); + ST(&(x[WS(rs, 15)]), VSUB(T1i, T1j), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VADD(T1i, T1j), ms, &(x[WS(rs, 1)])); + { + V T1Q, T1S, T1T, T1X, T1Z, T1V, T1W, T1Y, T1U; + T1Q = VMUL(LDK(KP559016994), VSUB(T1M, T1P)); + T1S = VADD(T1M, T1P); + T1T = VFNMS(LDK(KP250000000), T1S, T1R); + T1V = VSUB(T1K, T1L); + T1W = VSUB(T1N, T1O); + T1X = VBYI(VFMA(LDK(KP951056516), T1V, VMUL(LDK(KP587785252), T1W))); + T1Z = VBYI(VFNMS(LDK(KP951056516), T1W, VMUL(LDK(KP587785252), T1V))); + ST(&(x[0]), VADD(T1R, T1S), ms, &(x[0])); + T1Y = VSUB(T1T, T1Q); + ST(&(x[WS(rs, 8)]), VSUB(T1Y, T1Z), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VADD(T1Z, T1Y), ms, &(x[0])); + T1U = VADD(T1Q, T1T); + ST(&(x[WS(rs, 4)]), VSUB(T1U, T1X), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VADD(T1X, T1U), ms, &(x[0])); + } + { + V T1G, T1E, T1F, T1y, T1I, T1q, T1x, T1J, T1H; + T1G = VMUL(LDK(KP559016994), VSUB(T1C, T1D)); + T1E = VADD(T1C, T1D); + T1F = VFNMS(LDK(KP250000000), T1E, T1B); + T1q = VSUB(T1m, T1p); + T1x = VSUB(T1t, T1w); + T1y = VBYI(VFNMS(LDK(KP951056516), T1x, VMUL(LDK(KP587785252), T1q))); + T1I = VBYI(VFMA(LDK(KP951056516), T1q, VMUL(LDK(KP587785252), T1x))); + ST(&(x[WS(rs, 10)]), VADD(T1B, T1E), ms, &(x[0])); + T1J = VADD(T1G, T1F); + ST(&(x[WS(rs, 6)]), VADD(T1I, T1J), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VSUB(T1J, T1I), ms, &(x[0])); + T1H = VSUB(T1F, T1G); + ST(&(x[WS(rs, 2)]), VADD(T1y, T1H), ms, &(x[0])); + ST(&(x[WS(rs, 18)]), VSUB(T1H, T1y), ms, &(x[0])); + } + { + V TR, T16, T1d, T1b, T13, T1e, Tu, T1a; + TR = VFNMS(LDK(KP951056516), TQ, VMUL(LDK(KP587785252), TF)); + T16 = VFNMS(LDK(KP951056516), T15, VMUL(LDK(KP587785252), T14)); + T1d = VFMA(LDK(KP951056516), T14, VMUL(LDK(KP587785252), T15)); + T1b = VFMA(LDK(KP951056516), TF, VMUL(LDK(KP587785252), TQ)); + { + V TV, T12, Ts, Tt; + TV = VMUL(LDK(KP559016994), VSUB(TT, TU)); + T12 = VFNMS(LDK(KP250000000), T11, T10); + T13 = VSUB(TV, T12); + T1e = VADD(TV, T12); + Ts = VFNMS(LDK(KP250000000), Tr, T4); + Tt = VMUL(LDK(KP559016994), VSUB(Tf, Tq)); + Tu = VSUB(Ts, Tt); + T1a = VADD(Tt, Ts); + } + { + V TS, T17, T1g, T1h; + TS = VSUB(Tu, TR); + T17 = VBYI(VSUB(T13, T16)); + ST(&(x[WS(rs, 17)]), VSUB(TS, T17), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(TS, T17), ms, &(x[WS(rs, 1)])); + T1g = VADD(T1a, T1b); + T1h = VBYI(VSUB(T1e, T1d)); + ST(&(x[WS(rs, 11)]), VSUB(T1g, T1h), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VADD(T1g, T1h), ms, &(x[WS(rs, 1)])); + } + { + V T18, T19, T1c, T1f; + T18 = VADD(Tu, TR); + T19 = VBYI(VADD(T16, T13)); + ST(&(x[WS(rs, 13)]), VSUB(T18, T19), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(T18, T19), ms, &(x[WS(rs, 1)])); + T1c = VSUB(T1a, T1b); + T1f = VBYI(VADD(T1d, T1e)); + ST(&(x[WS(rs, 19)]), VSUB(T1c, T1f), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T1c, T1f), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 20, XSIMD_STRING("t2bv_20"), twinstr, &GENUS, {111, 50, 12, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_20) (planner *p) { + X(kdft_dit_register) (p, t2bv_20, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,934 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:46 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t2bv_25 -include t2b.h -sign 1 */ + +/* + * This function contains 248 FP additions, 241 FP multiplications, + * (or, 67 additions, 60 multiplications, 181 fused multiply/add), + * 208 stack variables, 67 constants, and 50 memory accesses + */ +#include "t2b.h" + +static void t2bv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP792626838, +0.792626838241819413632131824093538848057784557); + DVK(KP876091699, +0.876091699473550838204498029706869638173524346); + DVK(KP617882369, +0.617882369114440893914546919006756321695042882); + DVK(KP803003575, +0.803003575438660414833440593570376004635464850); + DVK(KP242145790, +0.242145790282157779872542093866183953459003101); + DVK(KP968583161, +0.968583161128631119490168375464735813836012403); + DVK(KP999544308, +0.999544308746292983948881682379742149196758193); + DVK(KP916574801, +0.916574801383451584742370439148878693530976769); + DVK(KP904730450, +0.904730450839922351881287709692877908104763647); + DVK(KP809385824, +0.809385824416008241660603814668679683846476688); + DVK(KP447417479, +0.447417479732227551498980015410057305749330693); + DVK(KP894834959, +0.894834959464455102997960030820114611498661386); + DVK(KP867381224, +0.867381224396525206773171885031575671309956167); + DVK(KP683113946, +0.683113946453479238701949862233725244439656928); + DVK(KP559154169, +0.559154169276087864842202529084232643714075927); + DVK(KP958953096, +0.958953096729998668045963838399037225970891871); + DVK(KP831864738, +0.831864738706457140726048799369896829771167132); + DVK(KP829049696, +0.829049696159252993975487806364305442437946767); + DVK(KP860541664, +0.860541664367944677098261680920518816412804187); + DVK(KP897376177, +0.897376177523557693138608077137219684419427330); + DVK(KP876306680, +0.876306680043863587308115903922062583399064238); + DVK(KP681693190, +0.681693190061530575150324149145440022633095390); + DVK(KP560319534, +0.560319534973832390111614715371676131169633784); + DVK(KP855719849, +0.855719849902058969314654733608091555096772472); + DVK(KP237294955, +0.237294955877110315393888866460840817927895961); + DVK(KP949179823, +0.949179823508441261575555465843363271711583843); + DVK(KP904508497, +0.904508497187473712051146708591409529430077295); + DVK(KP997675361, +0.997675361079556513670859573984492383596555031); + DVK(KP763932022, +0.763932022500210303590826331268723764559381640); + DVK(KP690983005, +0.690983005625052575897706582817180941139845410); + DVK(KP992114701, +0.992114701314477831049793042785778521453036709); + DVK(KP952936919, +0.952936919628306576880750665357914584765951388); + DVK(KP998026728, +0.998026728428271561952336806863450553336905220); + DVK(KP262346850, +0.262346850930607871785420028382979691334784273); + DVK(KP570584518, +0.570584518783621657366766175430996792655723863); + DVK(KP669429328, +0.669429328479476605641803240971985825917022098); + DVK(KP923225144, +0.923225144846402650453449441572664695995209956); + DVK(KP945422727, +0.945422727388575946270360266328811958657216298); + DVK(KP522616830, +0.522616830205754336872861364785224694908468440); + DVK(KP956723877, +0.956723877038460305821989399535483155872969262); + DVK(KP906616052, +0.906616052148196230441134447086066874408359177); + DVK(KP772036680, +0.772036680810363904029489473607579825330539880); + DVK(KP845997307, +0.845997307939530944175097360758058292389769300); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP921078979, +0.921078979742360627699756128143719920817673854); + DVK(KP912575812, +0.912575812670962425556968549836277086778922727); + DVK(KP982009705, +0.982009705009746369461829878184175962711969869); + DVK(KP734762448, +0.734762448793050413546343770063151342619912334); + DVK(KP494780565, +0.494780565770515410344588413655324772219443730); + DVK(KP447533225, +0.447533225982656890041886979663652563063114397); + DVK(KP269969613, +0.269969613759572083574752974412347470060951301); + DVK(KP244189809, +0.244189809627953270309879511234821255780225091); + DVK(KP667278218, +0.667278218140296670899089292254759909713898805); + DVK(KP603558818, +0.603558818296015001454675132653458027918768137); + DVK(KP522847744, +0.522847744331509716623755382187077770911012542); + DVK(KP578046249, +0.578046249379945007321754579646815604023525655); + DVK(KP987388751, +0.987388751065621252324603216482382109400433949); + DVK(KP893101515, +0.893101515366181661711202267938416198338079437); + DVK(KP120146378, +0.120146378570687701782758537356596213647956445); + DVK(KP132830569, +0.132830569247582714407653942074819768844536507); + DVK(KP869845200, +0.869845200362138853122720822420327157933056305); + DVK(KP786782374, +0.786782374965295178365099601674911834788448471); + DVK(KP066152395, +0.066152395967733048213034281011006031460903353); + DVK(KP059835404, +0.059835404262124915169548397419498386427871950); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(25, rs)) { + V T25, T1B, T2y, T1K, T2s, T23, T1S, T26, T20, T1X; + { + V T1O, T2X, Te, T3L, Td, T3Q, T3j, T3b, T2R, T2M, T2f, T27, T1y, T1H, T3M; + V TW, TR, TK, T2B, T3n, T3e, T2U, T2F, T2i, T2a, Tz, T1C, T3N, TQ, T11; + V T1b, T1c, T16; + { + V T1, T1g, T1i, T1p, T1k, T1m, Tb, T1N, T6, T1M; + { + V T7, T9, T2, T4, T1f, T1h, T1o; + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T9 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + T1f = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1h = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T1o = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + { + V T8, Ta, T3, T5, T1j; + T1j = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T8 = BYTW(&(W[TWVL * 18]), T7); + Ta = BYTW(&(W[TWVL * 28]), T9); + T3 = BYTW(&(W[TWVL * 8]), T2); + T5 = BYTW(&(W[TWVL * 38]), T4); + T1g = BYTW(&(W[TWVL * 4]), T1f); + T1i = BYTW(&(W[TWVL * 14]), T1h); + T1p = BYTW(&(W[TWVL * 34]), T1o); + T1k = BYTW(&(W[TWVL * 44]), T1j); + T1m = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + Tb = VADD(T8, Ta); + T1N = VSUB(T8, Ta); + T6 = VADD(T3, T5); + T1M = VSUB(T3, T5); + } + } + { + V T1v, T1l, Th, Tj, T1w, T1q, Tq, Tk, Tn, Tg; + Tg = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + { + V Tc, Ti, T1n, Tp; + Ti = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T1v = VSUB(T1i, T1k); + T1l = VADD(T1i, T1k); + T1n = BYTW(&(W[TWVL * 24]), T1m); + Tp = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T1O = VFMA(LDK(KP618033988), T1N, T1M); + T2X = VFNMS(LDK(KP618033988), T1M, T1N); + Te = VSUB(T6, Tb); + Tc = VADD(T6, Tb); + Th = BYTW(&(W[0]), Tg); + Tj = BYTW(&(W[TWVL * 10]), Ti); + T1w = VSUB(T1n, T1p); + T1q = VADD(T1n, T1p); + Tq = BYTW(&(W[TWVL * 30]), Tp); + Tk = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + T3L = VADD(T1, Tc); + Td = VFNMS(LDK(KP250000000), Tc, T1); + Tn = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + } + { + V T1x, T2K, TM, TB, Tw, Tm, Tx, Tr, TI, T2L, T1u, TD, TF, TL; + TL = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + { + V T1t, Tl, To, TH, T1s, T1r, TA, TC; + TA = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T1r = VADD(T1l, T1q); + T1t = VSUB(T1q, T1l); + T1x = VFMA(LDK(KP618033988), T1w, T1v); + T2K = VFNMS(LDK(KP618033988), T1v, T1w); + Tl = BYTW(&(W[TWVL * 40]), Tk); + To = BYTW(&(W[TWVL * 20]), Tn); + TM = BYTW(&(W[TWVL * 6]), TL); + TB = BYTW(&(W[TWVL * 46]), TA); + TH = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T1s = VFNMS(LDK(KP250000000), T1r, T1g); + T3Q = VADD(T1g, T1r); + TC = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tw = VSUB(Tj, Tl); + Tm = VADD(Tj, Tl); + Tx = VSUB(Tq, To); + Tr = VADD(To, Tq); + TI = BYTW(&(W[TWVL * 26]), TH); + T2L = VFMA(LDK(KP559016994), T1t, T1s); + T1u = VFNMS(LDK(KP559016994), T1t, T1s); + TD = BYTW(&(W[TWVL * 16]), TC); + TF = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + } + { + V Tu, Ty, T2E, TE, TN, TG, Tt, TV, Ts; + TV = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Ts = VADD(Tm, Tr); + Tu = VSUB(Tm, Tr); + Ty = VFNMS(LDK(KP618033988), Tx, Tw); + T2E = VFMA(LDK(KP618033988), Tw, Tx); + T3j = VFNMS(LDK(KP059835404), T2K, T2L); + T3b = VFMA(LDK(KP066152395), T2L, T2K); + T2R = VFNMS(LDK(KP786782374), T2K, T2L); + T2M = VFMA(LDK(KP869845200), T2L, T2K); + T2f = VFMA(LDK(KP132830569), T1u, T1x); + T27 = VFNMS(LDK(KP120146378), T1x, T1u); + T1y = VFNMS(LDK(KP893101515), T1x, T1u); + T1H = VFMA(LDK(KP987388751), T1u, T1x); + TE = VSUB(TB, TD); + TN = VADD(TD, TB); + TG = BYTW(&(W[TWVL * 36]), TF); + Tt = VFNMS(LDK(KP250000000), Ts, Th); + T3M = VADD(Th, Ts); + TW = BYTW(&(W[TWVL * 2]), TV); + { + V TJ, TO, Tv, T2D, TY, T15, T10, T13, TP; + { + V TX, T14, TZ, T12; + TX = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T14 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TZ = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + T12 = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + TJ = VSUB(TG, TI); + TO = VADD(TI, TG); + Tv = VFMA(LDK(KP559016994), Tu, Tt); + T2D = VFNMS(LDK(KP559016994), Tu, Tt); + TY = BYTW(&(W[TWVL * 12]), TX); + T15 = BYTW(&(W[TWVL * 32]), T14); + T10 = BYTW(&(W[TWVL * 42]), TZ); + T13 = BYTW(&(W[TWVL * 22]), T12); + } + TP = VADD(TN, TO); + TR = VSUB(TN, TO); + TK = VFMA(LDK(KP618033988), TJ, TE); + T2B = VFNMS(LDK(KP618033988), TE, TJ); + T3n = VFMA(LDK(KP578046249), T2D, T2E); + T3e = VFNMS(LDK(KP522847744), T2E, T2D); + T2U = VFNMS(LDK(KP987388751), T2D, T2E); + T2F = VFMA(LDK(KP893101515), T2E, T2D); + T2i = VFNMS(LDK(KP603558818), Ty, Tv); + T2a = VFMA(LDK(KP667278218), Tv, Ty); + Tz = VFNMS(LDK(KP244189809), Ty, Tv); + T1C = VFMA(LDK(KP269969613), Tv, Ty); + T3N = VADD(TM, TP); + TQ = VFMS(LDK(KP250000000), TP, TM); + T11 = VADD(TY, T10); + T1b = VSUB(TY, T10); + T1c = VSUB(T15, T13); + T16 = VADD(T13, T15); + } + } + } + } + } + { + V T2z, Tf, T3W, T3O, T1d, T2H, T3m, T2j, T2b, TT, T1D, T2G, T35, T2V, T2Z; + V T3A, T3g, T2I, T1a, T3R, T3X; + T2z = VFNMS(LDK(KP559016994), Te, Td); + Tf = VFMA(LDK(KP559016994), Te, Td); + { + V TS, T2A, T17, T19; + TS = VFNMS(LDK(KP559016994), TR, TQ); + T2A = VFMA(LDK(KP559016994), TR, TQ); + T3W = VSUB(T3M, T3N); + T3O = VADD(T3M, T3N); + T1d = VFNMS(LDK(KP618033988), T1c, T1b); + T2H = VFMA(LDK(KP618033988), T1b, T1c); + T17 = VADD(T11, T16); + T19 = VSUB(T16, T11); + { + V T3f, T2T, T2C, T18, T3P; + T3m = VFMA(LDK(KP447533225), T2B, T2A); + T3f = VFNMS(LDK(KP494780565), T2A, T2B); + T2T = VFNMS(LDK(KP132830569), T2A, T2B); + T2C = VFMA(LDK(KP120146378), T2B, T2A); + T2j = VFNMS(LDK(KP786782374), TK, TS); + T2b = VFMA(LDK(KP869845200), TS, TK); + TT = VFNMS(LDK(KP667278218), TS, TK); + T1D = VFMA(LDK(KP603558818), TK, TS); + T18 = VFNMS(LDK(KP250000000), T17, TW); + T3P = VADD(TW, T17); + T2G = VFMA(LDK(KP734762448), T2F, T2C); + T35 = VFNMS(LDK(KP734762448), T2F, T2C); + T2V = VFNMS(LDK(KP734762448), T2U, T2T); + T2Z = VFMA(LDK(KP734762448), T2U, T2T); + T3A = VFMA(LDK(KP982009705), T3f, T3e); + T3g = VFNMS(LDK(KP982009705), T3f, T3e); + T2I = VFMA(LDK(KP559016994), T19, T18); + T1a = VFNMS(LDK(KP559016994), T19, T18); + T3R = VADD(T3P, T3Q); + T3X = VSUB(T3P, T3Q); + } + } + { + V T2n, T2t, T1V, T22, T2l, T2d, T1Q, T1I, T2w, T1A, T1F, T2q; + { + V T2k, T1G, T28, T2g, T3K, T3E, T3a, T34, T3x, T3H, T2c, TU, T1T, T1U, T1z; + V T3o, T3t; + T2n = VFNMS(LDK(KP912575812), T2j, T2i); + T2k = VFMA(LDK(KP912575812), T2j, T2i); + T3o = VFNMS(LDK(KP921078979), T3n, T3m); + T3t = VFMA(LDK(KP921078979), T3n, T3m); + { + V T3c, T2Q, T2J, T3k, T1e; + T3c = VFNMS(LDK(KP667278218), T2I, T2H); + T2Q = VFNMS(LDK(KP059835404), T2H, T2I); + T2J = VFMA(LDK(KP066152395), T2I, T2H); + T3k = VFMA(LDK(KP603558818), T2H, T2I); + T1G = VFMA(LDK(KP578046249), T1a, T1d); + T1e = VFNMS(LDK(KP522847744), T1d, T1a); + T28 = VFNMS(LDK(KP494780565), T1a, T1d); + T2g = VFMA(LDK(KP447533225), T1d, T1a); + { + V T3U, T3S, T40, T3Y; + T3U = VSUB(T3O, T3R); + T3S = VADD(T3O, T3R); + T40 = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T3W, T3X)); + T3Y = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T3X, T3W)); + { + V T3s, T3l, T2N, T36; + T3s = VFNMS(LDK(KP845997307), T3k, T3j); + T3l = VFMA(LDK(KP845997307), T3k, T3j); + T2N = VFNMS(LDK(KP772036680), T2M, T2J); + T36 = VFMA(LDK(KP772036680), T2M, T2J); + { + V T30, T2S, T3d, T3z, T3T; + T30 = VFNMS(LDK(KP772036680), T2R, T2Q); + T2S = VFMA(LDK(KP772036680), T2R, T2Q); + T3d = VFNMS(LDK(KP845997307), T3c, T3b); + T3z = VFMA(LDK(KP845997307), T3c, T3b); + ST(&(x[0]), VADD(T3S, T3L), ms, &(x[0])); + T3T = VFNMS(LDK(KP250000000), T3S, T3L); + { + V T3C, T3p, T2O, T37; + T3C = VFMA(LDK(KP906616052), T3o, T3l); + T3p = VFNMS(LDK(KP906616052), T3o, T3l); + T2O = VFMA(LDK(KP956723877), T2N, T2G); + T37 = VFMA(LDK(KP522616830), T2V, T36); + { + V T31, T2W, T3u, T3h; + T31 = VFNMS(LDK(KP522616830), T2G, T30); + T2W = VFMA(LDK(KP945422727), T2V, T2S); + T3u = VFNMS(LDK(KP923225144), T3g, T3d); + T3h = VFMA(LDK(KP923225144), T3g, T3d); + { + V T3I, T3B, T3V, T3Z; + T3I = VFNMS(LDK(KP669429328), T3z, T3A); + T3B = VFMA(LDK(KP570584518), T3A, T3z); + T3V = VFMA(LDK(KP559016994), T3U, T3T); + T3Z = VFNMS(LDK(KP559016994), T3U, T3T); + { + V T3y, T3q, T2P, T38; + T3y = VFMA(LDK(KP262346850), T3p, T2X); + T3q = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T2X, T3p)); + T2P = VFMA(LDK(KP992114701), T2O, T2z); + T38 = VFNMS(LDK(KP690983005), T37, T2S); + { + V T32, T2Y, T3v, T3F; + T32 = VFMA(LDK(KP763932022), T31, T2N); + T2Y = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T2X, T2W)); + T3v = VFNMS(LDK(KP997675361), T3u, T3t); + T3F = VFNMS(LDK(KP904508497), T3u, T3s); + { + V T3i, T3r, T3J, T3D; + T3i = VFMA(LDK(KP949179823), T3h, T2z); + T3r = VFNMS(LDK(KP237294955), T3h, T2z); + T3J = VFNMS(LDK(KP669429328), T3C, T3I); + T3D = VFMA(LDK(KP618033988), T3C, T3B); + ST(&(x[WS(rs, 20)]), VFNMSI(T3Y, T3V), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFMAI(T3Y, T3V), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFMAI(T40, T3Z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 10)]), VFNMSI(T40, T3Z), ms, &(x[0])); + { + V T39, T33, T3w, T3G; + T39 = VFMA(LDK(KP855719849), T38, T35); + T33 = VFNMS(LDK(KP855719849), T32, T2Z); + ST(&(x[WS(rs, 3)]), VFMAI(T2Y, T2P), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 22)]), VFNMSI(T2Y, T2P), ms, &(x[0])); + T3w = VFMA(LDK(KP560319534), T3v, T3s); + T3G = VFNMS(LDK(KP681693190), T3F, T3t); + ST(&(x[WS(rs, 2)]), VFMAI(T3q, T3i), ms, &(x[0])); + ST(&(x[WS(rs, 23)]), VFNMSI(T3q, T3i), ms, &(x[WS(rs, 1)])); + T3K = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T3J, T3y)); + T3E = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T3D, T3y)); + T3a = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T39, T2X)); + T34 = VFMA(LDK(KP897376177), T33, T2z); + T3x = VFNMS(LDK(KP949179823), T3w, T3r); + T3H = VFNMS(LDK(KP860541664), T3G, T3r); + T2t = VFNMS(LDK(KP912575812), T2b, T2a); + T2c = VFMA(LDK(KP912575812), T2b, T2a); + TU = VFMA(LDK(KP829049696), TT, Tz); + T1T = VFNMS(LDK(KP829049696), TT, Tz); + T1U = VFNMS(LDK(KP831864738), T1y, T1e); + T1z = VFMA(LDK(KP831864738), T1y, T1e); + } + } + } + } + } + } + } + } + } + } + } + { + V T2o, T2h, T29, T2u, T2v, T2p; + T2o = VFNMS(LDK(KP958953096), T2g, T2f); + T2h = VFMA(LDK(KP958953096), T2g, T2f); + ST(&(x[WS(rs, 17)]), VFNMSI(T3a, T34), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 8)]), VFMAI(T3a, T34), ms, &(x[0])); + ST(&(x[WS(rs, 13)]), VFMAI(T3E, T3x), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 12)]), VFNMSI(T3E, T3x), ms, &(x[0])); + ST(&(x[WS(rs, 7)]), VFNMSI(T3K, T3H), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 18)]), VFMAI(T3K, T3H), ms, &(x[0])); + T1V = VFMA(LDK(KP559154169), T1U, T1T); + T22 = VFNMS(LDK(KP683113946), T1T, T1U); + T29 = VFNMS(LDK(KP867381224), T28, T27); + T2u = VFMA(LDK(KP867381224), T28, T27); + T2l = VFMA(LDK(KP894834959), T2k, T2h); + T2v = VFMA(LDK(KP447417479), T2k, T2u); + T2d = VFNMS(LDK(KP809385824), T2c, T29); + T2p = VFMA(LDK(KP447417479), T2c, T2o); + T1Q = VFMA(LDK(KP831864738), T1H, T1G); + T1I = VFNMS(LDK(KP831864738), T1H, T1G); + T2w = VFNMS(LDK(KP763932022), T2v, T2h); + T1A = VFMA(LDK(KP904730450), T1z, TU); + T1F = VFNMS(LDK(KP904730450), T1z, TU); + T2q = VFMA(LDK(KP690983005), T2p, T29); + } + } + { + V T2e, T1E, T1P, T2m; + T2e = VFNMS(LDK(KP992114701), T2d, Tf); + T1E = VFMA(LDK(KP916574801), T1D, T1C); + T1P = VFNMS(LDK(KP916574801), T1D, T1C); + T2m = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2l, T1O)); + { + V T1J, T2r, T1R, T1W, T1Z, T2x; + T2x = VFNMS(LDK(KP999544308), T2w, T2t); + T1J = VFNMS(LDK(KP904730450), T1I, T1F); + T25 = VFMA(LDK(KP968583161), T1A, Tf); + T1B = VFNMS(LDK(KP242145790), T1A, Tf); + T2r = VFNMS(LDK(KP999544308), T2q, T2n); + T1R = VFMA(LDK(KP904730450), T1Q, T1P); + T1W = VFNMS(LDK(KP904730450), T1Q, T1P); + T1Z = VADD(T1E, T1F); + ST(&(x[WS(rs, 21)]), VFMAI(T2m, T2e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFNMSI(T2m, T2e), ms, &(x[0])); + T2y = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T2x, T1O)); + T1K = VFNMS(LDK(KP618033988), T1J, T1E); + T2s = VFNMS(LDK(KP803003575), T2r, Tf); + T23 = VFMA(LDK(KP617882369), T1W, T22); + T1S = VFNMS(LDK(KP242145790), T1R, T1O); + T26 = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1R, T1O)); + T20 = VFNMS(LDK(KP683113946), T1Z, T1I); + T1X = VFMA(LDK(KP559016994), T1W, T1V); + } + } + } + } + } + { + V T1L, T24, T21, T1Y; + T1L = VFNMS(LDK(KP876091699), T1K, T1B); + ST(&(x[WS(rs, 16)]), VFMAI(T2y, T2s), ms, &(x[0])); + ST(&(x[WS(rs, 9)]), VFNMSI(T2y, T2s), ms, &(x[WS(rs, 1)])); + T24 = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T23, T1S)); + ST(&(x[WS(rs, 24)]), VFNMSI(T26, T25), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(T26, T25), ms, &(x[WS(rs, 1)])); + T21 = VFMA(LDK(KP792626838), T20, T1B); + T1Y = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1X, T1S)); + ST(&(x[WS(rs, 11)]), VFMAI(T24, T21), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 14)]), VFNMSI(T24, T21), ms, &(x[0])); + ST(&(x[WS(rs, 19)]), VFNMSI(T1Y, T1L), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VFMAI(T1Y, T1L), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 25, XSIMD_STRING("t2bv_25"), twinstr, &GENUS, {67, 60, 181, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_25) (planner *p) { + X(kdft_dit_register) (p, t2bv_25, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t2bv_25 -include t2b.h -sign 1 */ + +/* + * This function contains 248 FP additions, 188 FP multiplications, + * (or, 171 additions, 111 multiplications, 77 fused multiply/add), + * 100 stack variables, 40 constants, and 50 memory accesses + */ +#include "t2b.h" + +static void t2bv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP497379774, +0.497379774329709576484567492012895936835134813); + DVK(KP968583161, +0.968583161128631119490168375464735813836012403); + DVK(KP248689887, +0.248689887164854788242283746006447968417567406); + DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806); + DVK(KP809016994, +0.809016994374947424102293417182819058860154590); + DVK(KP309016994, +0.309016994374947424102293417182819058860154590); + DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483); + DVK(KP535826794, +0.535826794978996618271308767867639978063575346); + DVK(KP425779291, +0.425779291565072648862502445744251703979973042); + DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822); + DVK(KP963507348, +0.963507348203430549974383005744259307057084020); + DVK(KP876306680, +0.876306680043863587308115903922062583399064238); + DVK(KP844327925, +0.844327925502015078548558063966681505381659241); + DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691); + DVK(KP481753674, +0.481753674101715274987191502872129653528542010); + DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477); + DVK(KP851558583, +0.851558583130145297725004891488503407959946084); + DVK(KP904827052, +0.904827052466019527713668647932697593970413911); + DVK(KP125333233, +0.125333233564304245373118759816508793942918247); + DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418); + DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328); + DVK(KP684547105, +0.684547105928688673732283357621209269889519233); + DVK(KP637423989, +0.637423989748689710176712811676016195434917298); + DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344); + DVK(KP062790519, +0.062790519529313376076178224565631133122484832); + DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439); + DVK(KP770513242, +0.770513242775789230803009636396177847271667672); + DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596); + DVK(KP125581039, +0.125581039058626752152356449131262266244969664); + DVK(KP998026728, +0.998026728428271561952336806863450553336905220); + DVK(KP992114701, +0.992114701314477831049793042785778521453036709); + DVK(KP250666467, +0.250666467128608490746237519633017587885836494); + DVK(KP728968627, +0.728968627421411523146730319055259111372571664); + DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465); + DVK(KP293892626, +0.293892626146236564584352977319536384298826219); + DVK(KP475528258, +0.475528258147576786058219666689691071702849317); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(25, rs)) { + V T1A, T1z, T1R, T1S, T1B, T1C, T1Q, T2L, T1l, T2v, T1i, T3e, T2u, Tb, T2i; + V Tj, T3b, T2h, Tv, T2k, TD, T3a, T2l, T11, T2s, TY, T3d, T2r; + { + V T1v, T1x, T1y, T1q, T1s, T1t, T1P; + T1A = LD(&(x[0]), ms, &(x[0])); + { + V T1u, T1w, T1p, T1r; + T1u = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T1v = BYTW(&(W[TWVL * 18]), T1u); + T1w = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T1x = BYTW(&(W[TWVL * 28]), T1w); + T1y = VADD(T1v, T1x); + T1p = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T1q = BYTW(&(W[TWVL * 8]), T1p); + T1r = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + T1s = BYTW(&(W[TWVL * 38]), T1r); + T1t = VADD(T1q, T1s); + } + T1z = VMUL(LDK(KP559016994), VSUB(T1t, T1y)); + T1R = VSUB(T1v, T1x); + T1S = VMUL(LDK(KP587785252), T1R); + T1B = VADD(T1t, T1y); + T1C = VFNMS(LDK(KP250000000), T1B, T1A); + T1P = VSUB(T1q, T1s); + T1Q = VMUL(LDK(KP951056516), T1P); + T2L = VMUL(LDK(KP587785252), T1P); + } + { + V T1f, T19, T1b, T1c, T14, T16, T17, T1e; + T1e = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1f = BYTW(&(W[TWVL * 4]), T1e); + { + V T18, T1a, T13, T15; + T18 = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T19 = BYTW(&(W[TWVL * 24]), T18); + T1a = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + T1b = BYTW(&(W[TWVL * 34]), T1a); + T1c = VADD(T19, T1b); + T13 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T14 = BYTW(&(W[TWVL * 14]), T13); + T15 = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T16 = BYTW(&(W[TWVL * 44]), T15); + T17 = VADD(T14, T16); + } + { + V T1j, T1k, T1d, T1g, T1h; + T1j = VSUB(T14, T16); + T1k = VSUB(T19, T1b); + T1l = VFMA(LDK(KP475528258), T1j, VMUL(LDK(KP293892626), T1k)); + T2v = VFNMS(LDK(KP475528258), T1k, VMUL(LDK(KP293892626), T1j)); + T1d = VMUL(LDK(KP559016994), VSUB(T17, T1c)); + T1g = VADD(T17, T1c); + T1h = VFNMS(LDK(KP250000000), T1g, T1f); + T1i = VADD(T1d, T1h); + T3e = VADD(T1f, T1g); + T2u = VSUB(T1h, T1d); + } + } + { + V Tg, T7, T9, Td, T2, T4, Tc, Tf; + Tf = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tg = BYTW(&(W[TWVL * 6]), Tf); + { + V T6, T8, T1, T3; + T6 = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T7 = BYTW(&(W[TWVL * 26]), T6); + T8 = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[TWVL * 36]), T8); + Td = VADD(T7, T9); + T1 = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T2 = BYTW(&(W[TWVL * 16]), T1); + T3 = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T4 = BYTW(&(W[TWVL * 46]), T3); + Tc = VADD(T2, T4); + } + { + V T5, Ta, Te, Th, Ti; + T5 = VSUB(T2, T4); + Ta = VSUB(T7, T9); + Tb = VFMA(LDK(KP475528258), T5, VMUL(LDK(KP293892626), Ta)); + T2i = VFNMS(LDK(KP475528258), Ta, VMUL(LDK(KP293892626), T5)); + Te = VMUL(LDK(KP559016994), VSUB(Tc, Td)); + Th = VADD(Tc, Td); + Ti = VFNMS(LDK(KP250000000), Th, Tg); + Tj = VADD(Te, Ti); + T3b = VADD(Tg, Th); + T2h = VSUB(Ti, Te); + } + } + { + V TA, Tr, Tt, Tx, Tm, To, Tw, Tz; + Tz = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TA = BYTW(&(W[0]), Tz); + { + V Tq, Ts, Tl, Tn; + Tq = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tr = BYTW(&(W[TWVL * 20]), Tq); + Ts = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + Tt = BYTW(&(W[TWVL * 30]), Ts); + Tx = VADD(Tr, Tt); + Tl = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tm = BYTW(&(W[TWVL * 10]), Tl); + Tn = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + To = BYTW(&(W[TWVL * 40]), Tn); + Tw = VADD(Tm, To); + } + { + V Tp, Tu, Ty, TB, TC; + Tp = VSUB(Tm, To); + Tu = VSUB(Tr, Tt); + Tv = VFMA(LDK(KP475528258), Tp, VMUL(LDK(KP293892626), Tu)); + T2k = VFNMS(LDK(KP475528258), Tu, VMUL(LDK(KP293892626), Tp)); + Ty = VMUL(LDK(KP559016994), VSUB(Tw, Tx)); + TB = VADD(Tw, Tx); + TC = VFNMS(LDK(KP250000000), TB, TA); + TD = VADD(Ty, TC); + T3a = VADD(TA, TB); + T2l = VSUB(TC, Ty); + } + } + { + V TV, TP, TR, TS, TK, TM, TN, TU; + TU = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + TV = BYTW(&(W[TWVL * 2]), TU); + { + V TO, TQ, TJ, TL; + TO = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + TP = BYTW(&(W[TWVL * 22]), TO); + TQ = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TR = BYTW(&(W[TWVL * 32]), TQ); + TS = VADD(TP, TR); + TJ = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + TK = BYTW(&(W[TWVL * 12]), TJ); + TL = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + TM = BYTW(&(W[TWVL * 42]), TL); + TN = VADD(TK, TM); + } + { + V TZ, T10, TT, TW, TX; + TZ = VSUB(TK, TM); + T10 = VSUB(TP, TR); + T11 = VFMA(LDK(KP475528258), TZ, VMUL(LDK(KP293892626), T10)); + T2s = VFNMS(LDK(KP475528258), T10, VMUL(LDK(KP293892626), TZ)); + TT = VMUL(LDK(KP559016994), VSUB(TN, TS)); + TW = VADD(TN, TS); + TX = VFNMS(LDK(KP250000000), TW, TV); + TY = VADD(TT, TX); + T3d = VADD(TV, TW); + T2r = VSUB(TX, TT); + } + } + { + V T3g, T3o, T3k, T3l, T3j, T3m, T3p, T3n; + { + V T3c, T3f, T3h, T3i; + T3c = VSUB(T3a, T3b); + T3f = VSUB(T3d, T3e); + T3g = VBYI(VFMA(LDK(KP951056516), T3c, VMUL(LDK(KP587785252), T3f))); + T3o = VBYI(VFNMS(LDK(KP951056516), T3f, VMUL(LDK(KP587785252), T3c))); + T3k = VADD(T1A, T1B); + T3h = VADD(T3a, T3b); + T3i = VADD(T3d, T3e); + T3l = VADD(T3h, T3i); + T3j = VMUL(LDK(KP559016994), VSUB(T3h, T3i)); + T3m = VFNMS(LDK(KP250000000), T3l, T3k); + } + ST(&(x[0]), VADD(T3k, T3l), ms, &(x[0])); + T3p = VSUB(T3m, T3j); + ST(&(x[WS(rs, 10)]), VADD(T3o, T3p), ms, &(x[0])); + ST(&(x[WS(rs, 15)]), VSUB(T3p, T3o), ms, &(x[WS(rs, 1)])); + T3n = VADD(T3j, T3m); + ST(&(x[WS(rs, 5)]), VADD(T3g, T3n), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 20)]), VSUB(T3n, T3g), ms, &(x[0])); + } + { + V T2z, T2M, T2U, T2V, T2W, T34, T35, T36, T2X, T2Y, T2Z, T31, T32, T33, T2n; + V T2N, T2E, T2K, T2y, T2H, T2A, T2G, T38, T39; + T2z = VSUB(T1C, T1z); + T2M = VFNMS(LDK(KP951056516), T1R, T2L); + T2U = VFMA(LDK(KP1_369094211), T2k, VMUL(LDK(KP728968627), T2l)); + T2V = VFNMS(LDK(KP992114701), T2h, VMUL(LDK(KP250666467), T2i)); + T2W = VADD(T2U, T2V); + T34 = VFNMS(LDK(KP125581039), T2s, VMUL(LDK(KP998026728), T2r)); + T35 = VFMA(LDK(KP1_274847979), T2v, VMUL(LDK(KP770513242), T2u)); + T36 = VADD(T34, T35); + T2X = VFMA(LDK(KP1_996053456), T2s, VMUL(LDK(KP062790519), T2r)); + T2Y = VFNMS(LDK(KP637423989), T2u, VMUL(LDK(KP1_541026485), T2v)); + T2Z = VADD(T2X, T2Y); + T31 = VFNMS(LDK(KP1_457937254), T2k, VMUL(LDK(KP684547105), T2l)); + T32 = VFMA(LDK(KP1_984229402), T2i, VMUL(LDK(KP125333233), T2h)); + T33 = VADD(T31, T32); + { + V T2j, T2m, T2I, T2C, T2D, T2J; + T2j = VFNMS(LDK(KP851558583), T2i, VMUL(LDK(KP904827052), T2h)); + T2m = VFMA(LDK(KP1_752613360), T2k, VMUL(LDK(KP481753674), T2l)); + T2I = VADD(T2m, T2j); + T2C = VFMA(LDK(KP1_071653589), T2s, VMUL(LDK(KP844327925), T2r)); + T2D = VFMA(LDK(KP125581039), T2v, VMUL(LDK(KP998026728), T2u)); + T2J = VADD(T2C, T2D); + T2n = VSUB(T2j, T2m); + T2N = VADD(T2I, T2J); + T2E = VSUB(T2C, T2D); + T2K = VMUL(LDK(KP559016994), VSUB(T2I, T2J)); + } + { + V T2o, T2p, T2q, T2t, T2w, T2x; + T2o = VFNMS(LDK(KP963507348), T2k, VMUL(LDK(KP876306680), T2l)); + T2p = VFMA(LDK(KP1_809654104), T2i, VMUL(LDK(KP425779291), T2h)); + T2q = VSUB(T2o, T2p); + T2t = VFNMS(LDK(KP1_688655851), T2s, VMUL(LDK(KP535826794), T2r)); + T2w = VFNMS(LDK(KP1_996053456), T2v, VMUL(LDK(KP062790519), T2u)); + T2x = VADD(T2t, T2w); + T2y = VMUL(LDK(KP559016994), VSUB(T2q, T2x)); + T2H = VSUB(T2t, T2w); + T2A = VADD(T2q, T2x); + T2G = VADD(T2o, T2p); + } + { + V T2S, T2T, T30, T37; + T2S = VADD(T2z, T2A); + T2T = VBYI(VADD(T2M, T2N)); + ST(&(x[WS(rs, 23)]), VSUB(T2S, T2T), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VADD(T2S, T2T), ms, &(x[0])); + T30 = VADD(T2z, VADD(T2W, T2Z)); + T37 = VBYI(VSUB(VADD(T33, T36), T2M)); + ST(&(x[WS(rs, 22)]), VSUB(T30, T37), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VADD(T30, T37), ms, &(x[WS(rs, 1)])); + } + T38 = VBYI(VSUB(VFMA(LDK(KP951056516), VSUB(T2U, T2V), VFMA(LDK(KP309016994), T33, VFNMS(LDK(KP809016994), T36, VMUL(LDK(KP587785252), VSUB(T2X, T2Y))))), T2M)); + T39 = VFMA(LDK(KP309016994), T2W, VFMA(LDK(KP951056516), VSUB(T32, T31), VFMA(LDK(KP587785252), VSUB(T35, T34), VFNMS(LDK(KP809016994), T2Z, T2z)))); + ST(&(x[WS(rs, 8)]), VADD(T38, T39), ms, &(x[0])); + ST(&(x[WS(rs, 17)]), VSUB(T39, T38), ms, &(x[WS(rs, 1)])); + { + V T2F, T2Q, T2P, T2R, T2B, T2O; + T2B = VFNMS(LDK(KP250000000), T2A, T2z); + T2F = VFMA(LDK(KP951056516), T2n, VADD(T2y, VFNMS(LDK(KP587785252), T2E, T2B))); + T2Q = VFMA(LDK(KP587785252), T2n, VFMA(LDK(KP951056516), T2E, VSUB(T2B, T2y))); + T2O = VFNMS(LDK(KP250000000), T2N, T2M); + T2P = VBYI(VADD(VFMA(LDK(KP951056516), T2G, VMUL(LDK(KP587785252), T2H)), VADD(T2K, T2O))); + T2R = VBYI(VADD(VFNMS(LDK(KP951056516), T2H, VMUL(LDK(KP587785252), T2G)), VSUB(T2O, T2K))); + ST(&(x[WS(rs, 18)]), VSUB(T2F, T2P), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VADD(T2Q, T2R), ms, &(x[0])); + ST(&(x[WS(rs, 7)]), VADD(T2F, T2P), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VSUB(T2Q, T2R), ms, &(x[WS(rs, 1)])); + } + } + { + V T1D, T1T, T21, T22, T23, T2b, T2c, T2d, T24, T25, T26, T28, T29, T2a, TF; + V T1U, T1I, T1O, T1o, T1L, T1E, T1K, T2f, T2g; + T1D = VADD(T1z, T1C); + T1T = VADD(T1Q, T1S); + T21 = VFMA(LDK(KP1_688655851), Tv, VMUL(LDK(KP535826794), TD)); + T22 = VFMA(LDK(KP1_541026485), Tb, VMUL(LDK(KP637423989), Tj)); + T23 = VSUB(T21, T22); + T2b = VFMA(LDK(KP851558583), T11, VMUL(LDK(KP904827052), TY)); + T2c = VFMA(LDK(KP1_984229402), T1l, VMUL(LDK(KP125333233), T1i)); + T2d = VADD(T2b, T2c); + T24 = VFNMS(LDK(KP425779291), TY, VMUL(LDK(KP1_809654104), T11)); + T25 = VFNMS(LDK(KP992114701), T1i, VMUL(LDK(KP250666467), T1l)); + T26 = VADD(T24, T25); + T28 = VFNMS(LDK(KP1_071653589), Tv, VMUL(LDK(KP844327925), TD)); + T29 = VFNMS(LDK(KP770513242), Tj, VMUL(LDK(KP1_274847979), Tb)); + T2a = VADD(T28, T29); + { + V Tk, TE, T1M, T1G, T1H, T1N; + Tk = VFMA(LDK(KP1_071653589), Tb, VMUL(LDK(KP844327925), Tj)); + TE = VFMA(LDK(KP1_937166322), Tv, VMUL(LDK(KP248689887), TD)); + T1M = VADD(TE, Tk); + T1G = VFMA(LDK(KP1_752613360), T11, VMUL(LDK(KP481753674), TY)); + T1H = VFMA(LDK(KP1_457937254), T1l, VMUL(LDK(KP684547105), T1i)); + T1N = VADD(T1G, T1H); + TF = VSUB(Tk, TE); + T1U = VADD(T1M, T1N); + T1I = VSUB(T1G, T1H); + T1O = VMUL(LDK(KP559016994), VSUB(T1M, T1N)); + } + { + V TG, TH, TI, T12, T1m, T1n; + TG = VFNMS(LDK(KP497379774), Tv, VMUL(LDK(KP968583161), TD)); + TH = VFNMS(LDK(KP1_688655851), Tb, VMUL(LDK(KP535826794), Tj)); + TI = VADD(TG, TH); + T12 = VFNMS(LDK(KP963507348), T11, VMUL(LDK(KP876306680), TY)); + T1m = VFNMS(LDK(KP1_369094211), T1l, VMUL(LDK(KP728968627), T1i)); + T1n = VADD(T12, T1m); + T1o = VMUL(LDK(KP559016994), VSUB(TI, T1n)); + T1L = VSUB(T12, T1m); + T1E = VADD(TI, T1n); + T1K = VSUB(TG, TH); + } + { + V T1Z, T20, T27, T2e; + T1Z = VADD(T1D, T1E); + T20 = VBYI(VADD(T1T, T1U)); + ST(&(x[WS(rs, 24)]), VSUB(T1Z, T20), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VADD(T1Z, T20), ms, &(x[WS(rs, 1)])); + T27 = VADD(T1D, VADD(T23, T26)); + T2e = VBYI(VSUB(VADD(T2a, T2d), T1T)); + ST(&(x[WS(rs, 21)]), VSUB(T27, T2e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VADD(T27, T2e), ms, &(x[0])); + } + T2f = VBYI(VSUB(VFMA(LDK(KP309016994), T2a, VFMA(LDK(KP951056516), VADD(T21, T22), VFNMS(LDK(KP809016994), T2d, VMUL(LDK(KP587785252), VSUB(T24, T25))))), T1T)); + T2g = VFMA(LDK(KP951056516), VSUB(T29, T28), VFMA(LDK(KP309016994), T23, VFMA(LDK(KP587785252), VSUB(T2c, T2b), VFNMS(LDK(KP809016994), T26, T1D)))); + ST(&(x[WS(rs, 9)]), VADD(T2f, T2g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 16)]), VSUB(T2g, T2f), ms, &(x[0])); + { + V T1J, T1X, T1W, T1Y, T1F, T1V; + T1F = VFNMS(LDK(KP250000000), T1E, T1D); + T1J = VFMA(LDK(KP951056516), TF, VADD(T1o, VFNMS(LDK(KP587785252), T1I, T1F))); + T1X = VFMA(LDK(KP587785252), TF, VFMA(LDK(KP951056516), T1I, VSUB(T1F, T1o))); + T1V = VFNMS(LDK(KP250000000), T1U, T1T); + T1W = VBYI(VADD(VFMA(LDK(KP951056516), T1K, VMUL(LDK(KP587785252), T1L)), VADD(T1O, T1V))); + T1Y = VBYI(VADD(VFNMS(LDK(KP951056516), T1L, VMUL(LDK(KP587785252), T1K)), VSUB(T1V, T1O))); + ST(&(x[WS(rs, 19)]), VSUB(T1J, T1W), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VADD(T1X, T1Y), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VADD(T1J, T1W), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VSUB(T1X, T1Y), ms, &(x[0])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 25, XSIMD_STRING("t2bv_25"), twinstr, &GENUS, {171, 111, 77, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_25) (planner *p) { + X(kdft_dit_register) (p, t2bv_25, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,865 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:41 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name t2bv_32 -include t2b.h -sign 1 */ + +/* + * This function contains 217 FP additions, 160 FP multiplications, + * (or, 119 additions, 62 multiplications, 98 fused multiply/add), + * 104 stack variables, 7 constants, and 64 memory accesses + */ +#include "t2b.h" + +static void t2bv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 62)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 62), MAKE_VOLATILE_STRIDE(32, rs)) { + V T26, T25, T2a, T2i, T24, T2c, T2g, T2k, T2h, T27; + { + V T4, T1z, T2o, T32, T2r, T3f, Tf, T1A, T34, T2O, T1D, TC, T33, T2L, T1C; + V Tr, T2C, T3a, T2F, T3b, T1r, T21, T1k, T20, TQ, TM, TS, TL, T2t, TJ; + V T10, T2u; + { + V Tt, T9, T2p, Te, T2q, TA, Tu, Tx; + { + V T1, T1x, T2, T1v; + T1 = LD(&(x[0]), ms, &(x[0])); + T1x = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T1v = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + { + V T5, Tc, T7, Ta, T2m, T2n; + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + Ta = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + { + V T1y, T3, T1w, T6, Td, T8, Tb, Ts, Tz; + Ts = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + T1y = BYTW(&(W[TWVL * 46]), T1x); + T3 = BYTW(&(W[TWVL * 30]), T2); + T1w = BYTW(&(W[TWVL * 14]), T1v); + T6 = BYTW(&(W[TWVL * 6]), T5); + Td = BYTW(&(W[TWVL * 22]), Tc); + T8 = BYTW(&(W[TWVL * 38]), T7); + Tb = BYTW(&(W[TWVL * 54]), Ta); + Tt = BYTW(&(W[TWVL * 58]), Ts); + Tz = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T4 = VSUB(T1, T3); + T2m = VADD(T1, T3); + T1z = VSUB(T1w, T1y); + T2n = VADD(T1w, T1y); + T9 = VSUB(T6, T8); + T2p = VADD(T6, T8); + Te = VSUB(Tb, Td); + T2q = VADD(Tb, Td); + TA = BYTW(&(W[TWVL * 10]), Tz); + } + Tu = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T2o = VADD(T2m, T2n); + T32 = VSUB(T2m, T2n); + Tx = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + } + } + { + V Tv, To, Ty, Ti, Tj, Tm, Th; + Th = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T2r = VADD(T2p, T2q); + T3f = VSUB(T2p, T2q); + Tf = VADD(T9, Te); + T1A = VSUB(T9, Te); + Tv = BYTW(&(W[TWVL * 26]), Tu); + To = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + Ty = BYTW(&(W[TWVL * 42]), Tx); + Ti = BYTW(&(W[TWVL * 2]), Th); + Tj = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tm = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + { + V T1f, T1h, T1a, T1c, T18, T2A, T2B, T1p; + { + V T15, T17, T1o, T1m; + { + V Tw, T2M, Tp, T2N, TB, Tk, Tn, T1n, T14, T16; + T14 = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T16 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + Tw = VSUB(Tt, Tv); + T2M = VADD(Tt, Tv); + Tp = BYTW(&(W[TWVL * 50]), To); + T2N = VADD(TA, Ty); + TB = VSUB(Ty, TA); + Tk = BYTW(&(W[TWVL * 34]), Tj); + Tn = BYTW(&(W[TWVL * 18]), Tm); + T15 = BYTW(&(W[TWVL * 60]), T14); + T17 = BYTW(&(W[TWVL * 28]), T16); + T1n = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + { + V T2J, Tl, T2K, Tq, T1l; + T1l = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T34 = VSUB(T2M, T2N); + T2O = VADD(T2M, T2N); + T1D = VFMA(LDK(KP414213562), Tw, TB); + TC = VFNMS(LDK(KP414213562), TB, Tw); + T2J = VADD(Ti, Tk); + Tl = VSUB(Ti, Tk); + T2K = VADD(Tn, Tp); + Tq = VSUB(Tn, Tp); + T1o = BYTW(&(W[TWVL * 12]), T1n); + T1m = BYTW(&(W[TWVL * 44]), T1l); + { + V T1e, T1g, T19, T1b; + T1e = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + T1g = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T19 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1b = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T33 = VSUB(T2J, T2K); + T2L = VADD(T2J, T2K); + T1C = VFMA(LDK(KP414213562), Tl, Tq); + Tr = VFNMS(LDK(KP414213562), Tq, Tl); + T1f = BYTW(&(W[TWVL * 52]), T1e); + T1h = BYTW(&(W[TWVL * 20]), T1g); + T1a = BYTW(&(W[TWVL * 4]), T19); + T1c = BYTW(&(W[TWVL * 36]), T1b); + } + } + } + T18 = VSUB(T15, T17); + T2A = VADD(T15, T17); + T2B = VADD(T1o, T1m); + T1p = VSUB(T1m, T1o); + } + { + V TG, TI, TZ, TX; + { + V T1i, T2E, T1d, T2D, TH, TY, TF; + TF = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T1i = VSUB(T1f, T1h); + T2E = VADD(T1f, T1h); + T1d = VSUB(T1a, T1c); + T2D = VADD(T1a, T1c); + TH = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TY = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + T2C = VADD(T2A, T2B); + T3a = VSUB(T2A, T2B); + TG = BYTW(&(W[0]), TF); + { + V TW, T1j, T1q, TP, TR, TK; + TW = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T2F = VADD(T2D, T2E); + T3b = VSUB(T2E, T2D); + T1j = VADD(T1d, T1i); + T1q = VSUB(T1i, T1d); + TI = BYTW(&(W[TWVL * 32]), TH); + TZ = BYTW(&(W[TWVL * 48]), TY); + TP = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + TX = BYTW(&(W[TWVL * 16]), TW); + TR = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TK = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T1r = VFMA(LDK(KP707106781), T1q, T1p); + T21 = VFNMS(LDK(KP707106781), T1q, T1p); + T1k = VFMA(LDK(KP707106781), T1j, T18); + T20 = VFNMS(LDK(KP707106781), T1j, T18); + TQ = BYTW(&(W[TWVL * 56]), TP); + TM = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + TS = BYTW(&(W[TWVL * 24]), TR); + TL = BYTW(&(W[TWVL * 8]), TK); + } + } + T2t = VADD(TG, TI); + TJ = VSUB(TG, TI); + T10 = VSUB(TX, TZ); + T2u = VADD(TX, TZ); + } + } + } + } + { + V T2s, TT, T2x, T2P, T2Y, T2G, T37, T2v, T2w, TO, T2W, T30, T2U, TN, T2V; + T2s = VSUB(T2o, T2r); + T2U = VADD(T2o, T2r); + TN = BYTW(&(W[TWVL * 40]), TM); + TT = VSUB(TQ, TS); + T2x = VADD(TQ, TS); + T2P = VSUB(T2L, T2O); + T2V = VADD(T2L, T2O); + T2Y = VADD(T2C, T2F); + T2G = VSUB(T2C, T2F); + T37 = VSUB(T2t, T2u); + T2v = VADD(T2t, T2u); + T2w = VADD(TL, TN); + TO = VSUB(TL, TN); + T2W = VSUB(T2U, T2V); + T30 = VADD(T2U, T2V); + { + V T1Y, T12, T1X, TV, T3n, T3t, T3m, T3q; + { + V T3o, T36, T3r, T3h, T3k, T3p, T3d, T3s, T2H, T2Q, T2Z, T31; + { + V T35, T3g, T38, T2y, T11, TU, T3c, T3j; + T35 = VADD(T33, T34); + T3g = VSUB(T33, T34); + T38 = VSUB(T2w, T2x); + T2y = VADD(T2w, T2x); + T11 = VSUB(TO, TT); + TU = VADD(TO, TT); + T3c = VFNMS(LDK(KP414213562), T3b, T3a); + T3j = VFMA(LDK(KP414213562), T3a, T3b); + T3o = VFNMS(LDK(KP707106781), T35, T32); + T36 = VFMA(LDK(KP707106781), T35, T32); + T3r = VFNMS(LDK(KP707106781), T3g, T3f); + T3h = VFMA(LDK(KP707106781), T3g, T3f); + { + V T3i, T39, T2z, T2X; + T3i = VFMA(LDK(KP414213562), T37, T38); + T39 = VFNMS(LDK(KP414213562), T38, T37); + T2z = VSUB(T2v, T2y); + T2X = VADD(T2v, T2y); + T1Y = VFNMS(LDK(KP707106781), T11, T10); + T12 = VFMA(LDK(KP707106781), T11, T10); + T1X = VFNMS(LDK(KP707106781), TU, TJ); + TV = VFMA(LDK(KP707106781), TU, TJ); + T3k = VSUB(T3i, T3j); + T3p = VADD(T3i, T3j); + T3d = VADD(T39, T3c); + T3s = VSUB(T39, T3c); + T2H = VADD(T2z, T2G); + T2Q = VSUB(T2z, T2G); + T2Z = VSUB(T2X, T2Y); + T31 = VADD(T2X, T2Y); + } + } + { + V T3v, T3u, T3l, T3e; + T3l = VFNMS(LDK(KP923879532), T3k, T3h); + T3n = VFMA(LDK(KP923879532), T3k, T3h); + T3t = VFMA(LDK(KP923879532), T3s, T3r); + T3v = VFNMS(LDK(KP923879532), T3s, T3r); + T3e = VFNMS(LDK(KP923879532), T3d, T36); + T3m = VFMA(LDK(KP923879532), T3d, T36); + { + V T2R, T2T, T2I, T2S; + T2R = VFNMS(LDK(KP707106781), T2Q, T2P); + T2T = VFMA(LDK(KP707106781), T2Q, T2P); + T2I = VFNMS(LDK(KP707106781), T2H, T2s); + T2S = VFMA(LDK(KP707106781), T2H, T2s); + ST(&(x[WS(rs, 16)]), VSUB(T30, T31), ms, &(x[0])); + ST(&(x[0]), VADD(T30, T31), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFMAI(T2Z, T2W), ms, &(x[0])); + ST(&(x[WS(rs, 24)]), VFNMSI(T2Z, T2W), ms, &(x[0])); + T3q = VFNMS(LDK(KP923879532), T3p, T3o); + T3u = VFMA(LDK(KP923879532), T3p, T3o); + ST(&(x[WS(rs, 18)]), VFMAI(T3l, T3e), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFNMSI(T3l, T3e), ms, &(x[0])); + ST(&(x[WS(rs, 28)]), VFNMSI(T2T, T2S), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(T2T, T2S), ms, &(x[0])); + ST(&(x[WS(rs, 20)]), VFMAI(T2R, T2I), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFNMSI(T2R, T2I), ms, &(x[0])); + } + ST(&(x[WS(rs, 26)]), VFMAI(T3v, T3u), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(T3v, T3u), ms, &(x[0])); + } + } + { + V T1U, T13, T1s, TE, T1M, T1I, T1N, T1B, T1V, T1E; + { + V Tg, TD, T1G, T1H; + Tg = VFMA(LDK(KP707106781), Tf, T4); + T1U = VFNMS(LDK(KP707106781), Tf, T4); + T26 = VSUB(Tr, TC); + TD = VADD(Tr, TC); + T1G = VFMA(LDK(KP198912367), TV, T12); + T13 = VFNMS(LDK(KP198912367), T12, TV); + T1s = VFNMS(LDK(KP198912367), T1r, T1k); + T1H = VFMA(LDK(KP198912367), T1k, T1r); + ST(&(x[WS(rs, 2)]), VFMAI(T3n, T3m), ms, &(x[0])); + ST(&(x[WS(rs, 30)]), VFNMSI(T3n, T3m), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VFNMSI(T3t, T3q), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFMAI(T3t, T3q), ms, &(x[0])); + TE = VFMA(LDK(KP923879532), TD, Tg); + T1M = VFNMS(LDK(KP923879532), TD, Tg); + T1I = VSUB(T1G, T1H); + T1N = VADD(T1G, T1H); + T1B = VFMA(LDK(KP707106781), T1A, T1z); + T25 = VFNMS(LDK(KP707106781), T1A, T1z); + T1V = VADD(T1C, T1D); + T1E = VSUB(T1C, T1D); + } + { + V T1W, T2e, T2f, T23; + { + V T28, T1Z, T1S, T1O, T1t, T1Q, T1F, T1P, T22, T29; + T28 = VFNMS(LDK(KP668178637), T1X, T1Y); + T1Z = VFMA(LDK(KP668178637), T1Y, T1X); + T1S = VFMA(LDK(KP980785280), T1N, T1M); + T1O = VFNMS(LDK(KP980785280), T1N, T1M); + T1t = VADD(T13, T1s); + T1Q = VSUB(T13, T1s); + T1F = VFMA(LDK(KP923879532), T1E, T1B); + T1P = VFNMS(LDK(KP923879532), T1E, T1B); + T1W = VFMA(LDK(KP923879532), T1V, T1U); + T2e = VFNMS(LDK(KP923879532), T1V, T1U); + T22 = VFMA(LDK(KP668178637), T21, T20); + T29 = VFNMS(LDK(KP668178637), T20, T21); + { + V T1K, T1u, T1R, T1T, T1L, T1J; + T1K = VFMA(LDK(KP980785280), T1t, TE); + T1u = VFNMS(LDK(KP980785280), T1t, TE); + T1R = VFMA(LDK(KP980785280), T1Q, T1P); + T1T = VFNMS(LDK(KP980785280), T1Q, T1P); + T1L = VFMA(LDK(KP980785280), T1I, T1F); + T1J = VFNMS(LDK(KP980785280), T1I, T1F); + T2f = VADD(T28, T29); + T2a = VSUB(T28, T29); + T23 = VADD(T1Z, T22); + T2i = VSUB(T1Z, T22); + ST(&(x[WS(rs, 23)]), VFNMSI(T1R, T1O), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFMAI(T1R, T1O), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 25)]), VFMAI(T1T, T1S), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(T1T, T1S), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(T1L, T1K), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 31)]), VFNMSI(T1L, T1K), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 17)]), VFMAI(T1J, T1u), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFNMSI(T1J, T1u), ms, &(x[WS(rs, 1)])); + } + } + T24 = VFNMS(LDK(KP831469612), T23, T1W); + T2c = VFMA(LDK(KP831469612), T23, T1W); + T2g = VFMA(LDK(KP831469612), T2f, T2e); + T2k = VFNMS(LDK(KP831469612), T2f, T2e); + } + } + } + } + } + T2h = VFMA(LDK(KP923879532), T26, T25); + T27 = VFNMS(LDK(KP923879532), T26, T25); + { + V T2j, T2l, T2d, T2b; + T2j = VFNMS(LDK(KP831469612), T2i, T2h); + T2l = VFMA(LDK(KP831469612), T2i, T2h); + T2d = VFMA(LDK(KP831469612), T2a, T27); + T2b = VFNMS(LDK(KP831469612), T2a, T27); + ST(&(x[WS(rs, 21)]), VFMAI(T2j, T2g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFNMSI(T2j, T2g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 27)]), VFNMSI(T2l, T2k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFMAI(T2l, T2k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 29)]), VFMAI(T2d, T2c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(T2d, T2c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFMAI(T2b, T24), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VFNMSI(T2b, T24), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 32, XSIMD_STRING("t2bv_32"), twinstr, &GENUS, {119, 62, 98, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_32) (planner *p) { + X(kdft_dit_register) (p, t2bv_32, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name t2bv_32 -include t2b.h -sign 1 */ + +/* + * This function contains 217 FP additions, 104 FP multiplications, + * (or, 201 additions, 88 multiplications, 16 fused multiply/add), + * 59 stack variables, 7 constants, and 64 memory accesses + */ +#include "t2b.h" + +static void t2bv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 62)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 62), MAKE_VOLATILE_STRIDE(32, rs)) { + V T4, T1D, T2P, T3h, Tf, T1y, T2K, T3i, TC, T1w, T2G, T3e, Tr, T1v, T2D; + V T3d, T1k, T20, T2y, T3a, T1r, T21, T2v, T39, TV, T1X, T2r, T37, T12, T1Y; + V T2o, T36; + { + V T1, T1C, T3, T1A, T1B, T2, T1z, T2N, T2O; + T1 = LD(&(x[0]), ms, &(x[0])); + T1B = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T1C = BYTW(&(W[TWVL * 46]), T1B); + T2 = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T3 = BYTW(&(W[TWVL * 30]), T2); + T1z = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T1A = BYTW(&(W[TWVL * 14]), T1z); + T4 = VSUB(T1, T3); + T1D = VSUB(T1A, T1C); + T2N = VADD(T1, T3); + T2O = VADD(T1A, T1C); + T2P = VSUB(T2N, T2O); + T3h = VADD(T2N, T2O); + } + { + V T6, Td, T8, Tb; + { + V T5, Tc, T7, Ta; + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T6 = BYTW(&(W[TWVL * 6]), T5); + Tc = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Td = BYTW(&(W[TWVL * 22]), Tc); + T7 = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + T8 = BYTW(&(W[TWVL * 38]), T7); + Ta = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + Tb = BYTW(&(W[TWVL * 54]), Ta); + } + { + V T9, Te, T2I, T2J; + T9 = VSUB(T6, T8); + Te = VSUB(Tb, Td); + Tf = VMUL(LDK(KP707106781), VADD(T9, Te)); + T1y = VMUL(LDK(KP707106781), VSUB(T9, Te)); + T2I = VADD(T6, T8); + T2J = VADD(Tb, Td); + T2K = VSUB(T2I, T2J); + T3i = VADD(T2I, T2J); + } + } + { + V Tt, TA, Tv, Ty; + { + V Ts, Tz, Tu, Tx; + Ts = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tt = BYTW(&(W[TWVL * 10]), Ts); + Tz = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TA = BYTW(&(W[TWVL * 26]), Tz); + Tu = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + Tv = BYTW(&(W[TWVL * 42]), Tu); + Tx = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + Ty = BYTW(&(W[TWVL * 58]), Tx); + } + { + V Tw, TB, T2E, T2F; + Tw = VSUB(Tt, Tv); + TB = VSUB(Ty, TA); + TC = VFNMS(LDK(KP382683432), TB, VMUL(LDK(KP923879532), Tw)); + T1w = VFMA(LDK(KP923879532), TB, VMUL(LDK(KP382683432), Tw)); + T2E = VADD(Ty, TA); + T2F = VADD(Tt, Tv); + T2G = VSUB(T2E, T2F); + T3e = VADD(T2E, T2F); + } + } + { + V Ti, Tp, Tk, Tn; + { + V Th, To, Tj, Tm; + Th = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Ti = BYTW(&(W[TWVL * 2]), Th); + To = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + Tp = BYTW(&(W[TWVL * 50]), To); + Tj = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tk = BYTW(&(W[TWVL * 34]), Tj); + Tm = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + Tn = BYTW(&(W[TWVL * 18]), Tm); + } + { + V Tl, Tq, T2B, T2C; + Tl = VSUB(Ti, Tk); + Tq = VSUB(Tn, Tp); + Tr = VFMA(LDK(KP382683432), Tl, VMUL(LDK(KP923879532), Tq)); + T1v = VFNMS(LDK(KP382683432), Tq, VMUL(LDK(KP923879532), Tl)); + T2B = VADD(Ti, Tk); + T2C = VADD(Tn, Tp); + T2D = VSUB(T2B, T2C); + T3d = VADD(T2B, T2C); + } + } + { + V T1g, T1i, T1o, T1m, T1a, T1c, T1d, T15, T17, T18; + { + V T1f, T1h, T1n, T1l; + T1f = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T1g = BYTW(&(W[TWVL * 12]), T1f); + T1h = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T1i = BYTW(&(W[TWVL * 44]), T1h); + T1n = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T1o = BYTW(&(W[TWVL * 28]), T1n); + T1l = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T1m = BYTW(&(W[TWVL * 60]), T1l); + { + V T19, T1b, T14, T16; + T19 = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + T1a = BYTW(&(W[TWVL * 52]), T19); + T1b = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T1c = BYTW(&(W[TWVL * 20]), T1b); + T1d = VSUB(T1a, T1c); + T14 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T15 = BYTW(&(W[TWVL * 4]), T14); + T16 = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T17 = BYTW(&(W[TWVL * 36]), T16); + T18 = VSUB(T15, T17); + } + } + { + V T1e, T1j, T2w, T2x; + T1e = VMUL(LDK(KP707106781), VSUB(T18, T1d)); + T1j = VSUB(T1g, T1i); + T1k = VSUB(T1e, T1j); + T20 = VADD(T1j, T1e); + T2w = VADD(T15, T17); + T2x = VADD(T1a, T1c); + T2y = VSUB(T2w, T2x); + T3a = VADD(T2w, T2x); + } + { + V T1p, T1q, T2t, T2u; + T1p = VSUB(T1m, T1o); + T1q = VMUL(LDK(KP707106781), VADD(T18, T1d)); + T1r = VSUB(T1p, T1q); + T21 = VADD(T1p, T1q); + T2t = VADD(T1m, T1o); + T2u = VADD(T1g, T1i); + T2v = VSUB(T2t, T2u); + T39 = VADD(T2t, T2u); + } + } + { + V TR, TT, TZ, TX, TL, TN, TO, TG, TI, TJ; + { + V TQ, TS, TY, TW; + TQ = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + TR = BYTW(&(W[TWVL * 16]), TQ); + TS = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + TT = BYTW(&(W[TWVL * 48]), TS); + TY = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TZ = BYTW(&(W[TWVL * 32]), TY); + TW = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TX = BYTW(&(W[0]), TW); + { + V TK, TM, TF, TH; + TK = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + TL = BYTW(&(W[TWVL * 56]), TK); + TM = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TN = BYTW(&(W[TWVL * 24]), TM); + TO = VSUB(TL, TN); + TF = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + TG = BYTW(&(W[TWVL * 8]), TF); + TH = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + TI = BYTW(&(W[TWVL * 40]), TH); + TJ = VSUB(TG, TI); + } + } + { + V TP, TU, T2p, T2q; + TP = VMUL(LDK(KP707106781), VSUB(TJ, TO)); + TU = VSUB(TR, TT); + TV = VSUB(TP, TU); + T1X = VADD(TU, TP); + T2p = VADD(TG, TI); + T2q = VADD(TL, TN); + T2r = VSUB(T2p, T2q); + T37 = VADD(T2p, T2q); + } + { + V T10, T11, T2m, T2n; + T10 = VSUB(TX, TZ); + T11 = VMUL(LDK(KP707106781), VADD(TJ, TO)); + T12 = VSUB(T10, T11); + T1Y = VADD(T10, T11); + T2m = VADD(TX, TZ); + T2n = VADD(TR, TT); + T2o = VSUB(T2m, T2n); + T36 = VADD(T2m, T2n); + } + } + { + V T3q, T3u, T3t, T3v; + { + V T3o, T3p, T3r, T3s; + T3o = VADD(T3h, T3i); + T3p = VADD(T3d, T3e); + T3q = VSUB(T3o, T3p); + T3u = VADD(T3o, T3p); + T3r = VADD(T36, T37); + T3s = VADD(T39, T3a); + T3t = VBYI(VSUB(T3r, T3s)); + T3v = VADD(T3r, T3s); + } + ST(&(x[WS(rs, 24)]), VSUB(T3q, T3t), ms, &(x[0])); + ST(&(x[0]), VADD(T3u, T3v), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VADD(T3q, T3t), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VSUB(T3u, T3v), ms, &(x[0])); + } + { + V T3f, T3j, T3c, T3k, T38, T3b; + T3f = VSUB(T3d, T3e); + T3j = VSUB(T3h, T3i); + T38 = VSUB(T36, T37); + T3b = VSUB(T39, T3a); + T3c = VMUL(LDK(KP707106781), VSUB(T38, T3b)); + T3k = VMUL(LDK(KP707106781), VADD(T38, T3b)); + { + V T3g, T3l, T3m, T3n; + T3g = VBYI(VSUB(T3c, T3f)); + T3l = VSUB(T3j, T3k); + ST(&(x[WS(rs, 12)]), VADD(T3g, T3l), ms, &(x[0])); + ST(&(x[WS(rs, 20)]), VSUB(T3l, T3g), ms, &(x[0])); + T3m = VBYI(VADD(T3f, T3c)); + T3n = VADD(T3j, T3k); + ST(&(x[WS(rs, 4)]), VADD(T3m, T3n), ms, &(x[0])); + ST(&(x[WS(rs, 28)]), VSUB(T3n, T3m), ms, &(x[0])); + } + } + { + V T2L, T31, T2R, T2Y, T2A, T2Z, T2U, T32, T2H, T2Q; + T2H = VMUL(LDK(KP707106781), VSUB(T2D, T2G)); + T2L = VSUB(T2H, T2K); + T31 = VADD(T2K, T2H); + T2Q = VMUL(LDK(KP707106781), VADD(T2D, T2G)); + T2R = VSUB(T2P, T2Q); + T2Y = VADD(T2P, T2Q); + { + V T2s, T2z, T2S, T2T; + T2s = VFNMS(LDK(KP382683432), T2r, VMUL(LDK(KP923879532), T2o)); + T2z = VFMA(LDK(KP923879532), T2v, VMUL(LDK(KP382683432), T2y)); + T2A = VSUB(T2s, T2z); + T2Z = VADD(T2s, T2z); + T2S = VFMA(LDK(KP382683432), T2o, VMUL(LDK(KP923879532), T2r)); + T2T = VFNMS(LDK(KP382683432), T2v, VMUL(LDK(KP923879532), T2y)); + T2U = VSUB(T2S, T2T); + T32 = VADD(T2S, T2T); + } + { + V T2M, T2V, T34, T35; + T2M = VBYI(VSUB(T2A, T2L)); + T2V = VSUB(T2R, T2U); + ST(&(x[WS(rs, 10)]), VADD(T2M, T2V), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VSUB(T2V, T2M), ms, &(x[0])); + T34 = VSUB(T2Y, T2Z); + T35 = VBYI(VSUB(T32, T31)); + ST(&(x[WS(rs, 18)]), VSUB(T34, T35), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VADD(T34, T35), ms, &(x[0])); + } + { + V T2W, T2X, T30, T33; + T2W = VBYI(VADD(T2L, T2A)); + T2X = VADD(T2R, T2U); + ST(&(x[WS(rs, 6)]), VADD(T2W, T2X), ms, &(x[0])); + ST(&(x[WS(rs, 26)]), VSUB(T2X, T2W), ms, &(x[0])); + T30 = VADD(T2Y, T2Z); + T33 = VBYI(VADD(T31, T32)); + ST(&(x[WS(rs, 30)]), VSUB(T30, T33), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T30, T33), ms, &(x[0])); + } + } + { + V TE, T1P, T1I, T1Q, T1t, T1M, T1F, T1N; + { + V Tg, TD, T1G, T1H; + Tg = VSUB(T4, Tf); + TD = VSUB(Tr, TC); + TE = VSUB(Tg, TD); + T1P = VADD(Tg, TD); + T1G = VFNMS(LDK(KP555570233), TV, VMUL(LDK(KP831469612), T12)); + T1H = VFMA(LDK(KP555570233), T1k, VMUL(LDK(KP831469612), T1r)); + T1I = VSUB(T1G, T1H); + T1Q = VADD(T1G, T1H); + } + { + V T13, T1s, T1x, T1E; + T13 = VFMA(LDK(KP831469612), TV, VMUL(LDK(KP555570233), T12)); + T1s = VFNMS(LDK(KP555570233), T1r, VMUL(LDK(KP831469612), T1k)); + T1t = VSUB(T13, T1s); + T1M = VADD(T13, T1s); + T1x = VSUB(T1v, T1w); + T1E = VSUB(T1y, T1D); + T1F = VSUB(T1x, T1E); + T1N = VADD(T1E, T1x); + } + { + V T1u, T1J, T1S, T1T; + T1u = VADD(TE, T1t); + T1J = VBYI(VADD(T1F, T1I)); + ST(&(x[WS(rs, 27)]), VSUB(T1u, T1J), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VADD(T1u, T1J), ms, &(x[WS(rs, 1)])); + T1S = VBYI(VADD(T1N, T1M)); + T1T = VADD(T1P, T1Q); + ST(&(x[WS(rs, 3)]), VADD(T1S, T1T), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 29)]), VSUB(T1T, T1S), ms, &(x[WS(rs, 1)])); + } + { + V T1K, T1L, T1O, T1R; + T1K = VSUB(TE, T1t); + T1L = VBYI(VSUB(T1I, T1F)); + ST(&(x[WS(rs, 21)]), VSUB(T1K, T1L), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VADD(T1K, T1L), ms, &(x[WS(rs, 1)])); + T1O = VBYI(VSUB(T1M, T1N)); + T1R = VSUB(T1P, T1Q); + ST(&(x[WS(rs, 13)]), VADD(T1O, T1R), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VSUB(T1R, T1O), ms, &(x[WS(rs, 1)])); + } + } + { + V T1W, T2h, T2a, T2i, T23, T2e, T27, T2f; + { + V T1U, T1V, T28, T29; + T1U = VADD(T4, Tf); + T1V = VADD(T1v, T1w); + T1W = VSUB(T1U, T1V); + T2h = VADD(T1U, T1V); + T28 = VFNMS(LDK(KP195090322), T1X, VMUL(LDK(KP980785280), T1Y)); + T29 = VFMA(LDK(KP195090322), T20, VMUL(LDK(KP980785280), T21)); + T2a = VSUB(T28, T29); + T2i = VADD(T28, T29); + } + { + V T1Z, T22, T25, T26; + T1Z = VFMA(LDK(KP980785280), T1X, VMUL(LDK(KP195090322), T1Y)); + T22 = VFNMS(LDK(KP195090322), T21, VMUL(LDK(KP980785280), T20)); + T23 = VSUB(T1Z, T22); + T2e = VADD(T1Z, T22); + T25 = VADD(Tr, TC); + T26 = VADD(T1D, T1y); + T27 = VSUB(T25, T26); + T2f = VADD(T26, T25); + } + { + V T24, T2b, T2k, T2l; + T24 = VADD(T1W, T23); + T2b = VBYI(VADD(T27, T2a)); + ST(&(x[WS(rs, 25)]), VSUB(T24, T2b), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(T24, T2b), ms, &(x[WS(rs, 1)])); + T2k = VBYI(VADD(T2f, T2e)); + T2l = VADD(T2h, T2i); + ST(&(x[WS(rs, 1)]), VADD(T2k, T2l), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 31)]), VSUB(T2l, T2k), ms, &(x[WS(rs, 1)])); + } + { + V T2c, T2d, T2g, T2j; + T2c = VSUB(T1W, T23); + T2d = VBYI(VSUB(T2a, T27)); + ST(&(x[WS(rs, 23)]), VSUB(T2c, T2d), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VADD(T2c, T2d), ms, &(x[WS(rs, 1)])); + T2g = VBYI(VSUB(T2e, T2f)); + T2j = VSUB(T2h, T2i); + ST(&(x[WS(rs, 15)]), VADD(T2g, T2j), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 17)]), VSUB(T2j, T2g), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 32, XSIMD_STRING("t2bv_32"), twinstr, &GENUS, {201, 88, 16, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_32) (planner *p) { + X(kdft_dit_register) (p, t2bv_32, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:39 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t2bv_4 -include t2b.h -sign 1 */ + +/* + * This function contains 11 FP additions, 8 FP multiplications, + * (or, 9 additions, 6 multiplications, 2 fused multiply/add), + * 13 stack variables, 0 constants, and 8 memory accesses + */ +#include "t2b.h" + +static void t2bv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(4, rs)) { + V T1, T7, T2, T5, T8, T3, T6; + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T8 = BYTW(&(W[TWVL * 4]), T7); + T3 = BYTW(&(W[TWVL * 2]), T2); + T6 = BYTW(&(W[0]), T5); + { + V Ta, T4, Tb, T9; + Ta = VADD(T1, T3); + T4 = VSUB(T1, T3); + Tb = VADD(T6, T8); + T9 = VSUB(T6, T8); + ST(&(x[0]), VADD(Ta, Tb), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VSUB(Ta, Tb), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(T9, T4), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(T9, T4), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t2bv_4"), twinstr, &GENUS, {9, 6, 2, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_4) (planner *p) { + X(kdft_dit_register) (p, t2bv_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t2bv_4 -include t2b.h -sign 1 */ + +/* + * This function contains 11 FP additions, 6 FP multiplications, + * (or, 11 additions, 6 multiplications, 0 fused multiply/add), + * 13 stack variables, 0 constants, and 8 memory accesses + */ +#include "t2b.h" + +static void t2bv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(4, rs)) { + V T1, T8, T3, T6, T7, T2, T5; + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T8 = BYTW(&(W[TWVL * 4]), T7); + T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T3 = BYTW(&(W[TWVL * 2]), T2); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T6 = BYTW(&(W[0]), T5); + { + V T4, T9, Ta, Tb; + T4 = VSUB(T1, T3); + T9 = VBYI(VSUB(T6, T8)); + ST(&(x[WS(rs, 3)]), VSUB(T4, T9), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T4, T9), ms, &(x[WS(rs, 1)])); + Ta = VADD(T1, T3); + Tb = VADD(T6, T8); + ST(&(x[WS(rs, 2)]), VSUB(Ta, Tb), ms, &(x[0])); + ST(&(x[0]), VADD(Ta, Tb), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t2bv_4"), twinstr, &GENUS, {11, 6, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_4) (planner *p) { + X(kdft_dit_register) (p, t2bv_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:45 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name t2bv_5 -include t2b.h -sign 1 */ + +/* + * This function contains 20 FP additions, 19 FP multiplications, + * (or, 11 additions, 10 multiplications, 9 fused multiply/add), + * 26 stack variables, 4 constants, and 10 memory accesses + */ +#include "t2b.h" + +static void t2bv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(5, rs)) { + V T1, T2, T9, T4, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T9 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V T3, Ta, T5, T8; + T3 = BYTW(&(W[0]), T2); + Ta = BYTW(&(W[TWVL * 4]), T9); + T5 = BYTW(&(W[TWVL * 6]), T4); + T8 = BYTW(&(W[TWVL * 2]), T7); + { + V T6, Tg, Tb, Th; + T6 = VADD(T3, T5); + Tg = VSUB(T3, T5); + Tb = VADD(T8, Ta); + Th = VSUB(T8, Ta); + { + V Te, Tc, Tk, Ti, Td, Tj, Tf; + Te = VSUB(T6, Tb); + Tc = VADD(T6, Tb); + Tk = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tg, Th)); + Ti = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Th, Tg)); + Td = VFNMS(LDK(KP250000000), Tc, T1); + ST(&(x[0]), VADD(T1, Tc), ms, &(x[0])); + Tj = VFNMS(LDK(KP559016994), Te, Td); + Tf = VFMA(LDK(KP559016994), Te, Td); + ST(&(x[WS(rs, 2)]), VFNMSI(Tk, Tj), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(Tk, Tj), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFNMSI(Ti, Tf), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(Ti, Tf), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("t2bv_5"), twinstr, &GENUS, {11, 10, 9, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_5) (planner *p) { + X(kdft_dit_register) (p, t2bv_5, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name t2bv_5 -include t2b.h -sign 1 */ + +/* + * This function contains 20 FP additions, 14 FP multiplications, + * (or, 17 additions, 11 multiplications, 3 fused multiply/add), + * 20 stack variables, 4 constants, and 10 memory accesses + */ +#include "t2b.h" + +static void t2bv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(5, rs)) { + V Tf, T5, Ta, Tc, Td, Tg; + Tf = LD(&(x[0]), ms, &(x[0])); + { + V T2, T9, T4, T7; + { + V T1, T8, T3, T6; + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = BYTW(&(W[0]), T1); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[TWVL * 4]), T8); + T3 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T4 = BYTW(&(W[TWVL * 6]), T3); + T6 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T7 = BYTW(&(W[TWVL * 2]), T6); + } + T5 = VSUB(T2, T4); + Ta = VSUB(T7, T9); + Tc = VADD(T2, T4); + Td = VADD(T7, T9); + Tg = VADD(Tc, Td); + } + ST(&(x[0]), VADD(Tf, Tg), ms, &(x[0])); + { + V Tb, Tj, Ti, Tk, Te, Th; + Tb = VBYI(VFMA(LDK(KP951056516), T5, VMUL(LDK(KP587785252), Ta))); + Tj = VBYI(VFNMS(LDK(KP951056516), Ta, VMUL(LDK(KP587785252), T5))); + Te = VMUL(LDK(KP559016994), VSUB(Tc, Td)); + Th = VFNMS(LDK(KP250000000), Tg, Tf); + Ti = VADD(Te, Th); + Tk = VSUB(Th, Te); + ST(&(x[WS(rs, 1)]), VADD(Tb, Ti), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VSUB(Tk, Tj), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VSUB(Ti, Tb), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(Tj, Tk), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("t2bv_5"), twinstr, &GENUS, {17, 11, 3, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_5) (planner *p) { + X(kdft_dit_register) (p, t2bv_5, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1877 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:44 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name t2bv_64 -include t2b.h -sign 1 */ + +/* + * This function contains 519 FP additions, 384 FP multiplications, + * (or, 261 additions, 126 multiplications, 258 fused multiply/add), + * 187 stack variables, 15 constants, and 128 memory accesses + */ +#include "t2b.h" + +static void t2bv_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP820678790, +0.820678790828660330972281985331011598767386482); + DVK(KP098491403, +0.098491403357164253077197521291327432293052451); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP303346683, +0.303346683607342391675883946941299872384187453); + DVK(KP534511135, +0.534511135950791641089685961295362908582039528); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 126)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 126), MAKE_VOLATILE_STRIDE(64, rs)) { + V T6L, T6M, T6O, T6P, T75, T6V, T5A, T6A, T72, T6K, T6t, T6D, T6w, T6B, T6h; + V T6E; + { + V Ta, T3U, T3V, T37, T7a, T58, T7B, T6l, T1v, T24, T5Q, T7o, T5F, T7l, T43; + V T4F, T2i, T2R, T6b, T7v, T60, T7s, T4a, T4I, T5u, T7h, T5x, T7g, T1i, T3b; + V T4m, T4C, T7e, T5l, T7d, T5o, T3a, TV, T4B, T4j, T3X, T3Y, T6o, T7b, T5f; + V T7C, Tx, T38, T2p, T61, T2n, T65, T2D, T7p, T5M, T7m, T5T, T4G, T46, T25; + V T1S, T2q, T2u, T2w; + { + V T5q, T10, T5v, T15, T1b, T5s, T1c, T1e; + { + V T1V, T1p, T5B, T5O, T1u, T1X, T20, T21; + { + V T1, T2, T7, T5, T32, T34, T2X, T2Z; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 32)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 48)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T32 = LD(&(x[WS(rs, 56)]), ms, &(x[0])); + T34 = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T2X = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T2Z = LD(&(x[WS(rs, 40)]), ms, &(x[0])); + { + V T1m, T54, T6j, T36, T56, T31, T55, T1n, T1q, T1s, T4, T9; + { + V T3, T8, T6, T33, T35, T2Y, T30, T1l; + T1l = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTW(&(W[TWVL * 62]), T2); + T8 = BYTW(&(W[TWVL * 94]), T7); + T6 = BYTW(&(W[TWVL * 30]), T5); + T33 = BYTW(&(W[TWVL * 110]), T32); + T35 = BYTW(&(W[TWVL * 46]), T34); + T2Y = BYTW(&(W[TWVL * 14]), T2X); + T30 = BYTW(&(W[TWVL * 78]), T2Z); + T1m = BYTW(&(W[0]), T1l); + T54 = VSUB(T1, T3); + T4 = VADD(T1, T3); + T6j = VSUB(T6, T8); + T9 = VADD(T6, T8); + T36 = VADD(T33, T35); + T56 = VSUB(T33, T35); + T31 = VADD(T2Y, T30); + T55 = VSUB(T2Y, T30); + T1n = LD(&(x[WS(rs, 33)]), ms, &(x[WS(rs, 1)])); + } + T1q = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T1s = LD(&(x[WS(rs, 49)]), ms, &(x[WS(rs, 1)])); + Ta = VSUB(T4, T9); + T3U = VADD(T4, T9); + { + V T57, T6k, T1o, T1r, T1t, T1W, T1U, T1Z; + T1U = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T3V = VADD(T31, T36); + T37 = VSUB(T31, T36); + T57 = VADD(T55, T56); + T6k = VSUB(T55, T56); + T1o = BYTW(&(W[TWVL * 64]), T1n); + T1r = BYTW(&(W[TWVL * 32]), T1q); + T1t = BYTW(&(W[TWVL * 96]), T1s); + T1V = BYTW(&(W[TWVL * 16]), T1U); + T1W = LD(&(x[WS(rs, 41)]), ms, &(x[WS(rs, 1)])); + T1Z = LD(&(x[WS(rs, 57)]), ms, &(x[WS(rs, 1)])); + T7a = VFNMS(LDK(KP707106781), T57, T54); + T58 = VFMA(LDK(KP707106781), T57, T54); + T7B = VFNMS(LDK(KP707106781), T6k, T6j); + T6l = VFMA(LDK(KP707106781), T6k, T6j); + T1p = VADD(T1m, T1o); + T5B = VSUB(T1m, T1o); + T5O = VSUB(T1r, T1t); + T1u = VADD(T1r, T1t); + T1X = BYTW(&(W[TWVL * 80]), T1W); + T20 = BYTW(&(W[TWVL * 112]), T1Z); + T21 = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + } + } + } + { + V T5W, T2N, T69, T2L, T5Y, T2P, T48, T2c, T2h; + { + V T41, T1Y, T5C, T22, T2d, T29, T2b, T2f, T28, T2a, T2H, T2J; + T28 = LD(&(x[WS(rs, 63)]), ms, &(x[WS(rs, 1)])); + T2a = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T1v = VSUB(T1p, T1u); + T41 = VADD(T1p, T1u); + T1Y = VADD(T1V, T1X); + T5C = VSUB(T1V, T1X); + T22 = BYTW(&(W[TWVL * 48]), T21); + T2d = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T29 = BYTW(&(W[TWVL * 124]), T28); + T2b = BYTW(&(W[TWVL * 60]), T2a); + T2f = LD(&(x[WS(rs, 47)]), ms, &(x[WS(rs, 1)])); + T2H = LD(&(x[WS(rs, 55)]), ms, &(x[WS(rs, 1)])); + T2J = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + { + V T23, T5D, T2e, T2g, T2I, T2K, T2M; + T2M = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T23 = VADD(T20, T22); + T5D = VSUB(T20, T22); + T2e = BYTW(&(W[TWVL * 28]), T2d); + T2c = VADD(T29, T2b); + T5W = VSUB(T29, T2b); + T2g = BYTW(&(W[TWVL * 92]), T2f); + T2I = BYTW(&(W[TWVL * 108]), T2H); + T2K = BYTW(&(W[TWVL * 44]), T2J); + T2N = BYTW(&(W[TWVL * 12]), T2M); + { + V T5E, T5P, T42, T2O; + T5E = VADD(T5C, T5D); + T5P = VSUB(T5C, T5D); + T24 = VSUB(T1Y, T23); + T42 = VADD(T1Y, T23); + T69 = VSUB(T2g, T2e); + T2h = VADD(T2e, T2g); + T2O = LD(&(x[WS(rs, 39)]), ms, &(x[WS(rs, 1)])); + T2L = VADD(T2I, T2K); + T5Y = VSUB(T2I, T2K); + T5Q = VFMA(LDK(KP707106781), T5P, T5O); + T7o = VFNMS(LDK(KP707106781), T5P, T5O); + T5F = VFMA(LDK(KP707106781), T5E, T5B); + T7l = VFNMS(LDK(KP707106781), T5E, T5B); + T43 = VADD(T41, T42); + T4F = VSUB(T41, T42); + T2P = BYTW(&(W[TWVL * 76]), T2O); + } + } + } + T2i = VSUB(T2c, T2h); + T48 = VADD(T2c, T2h); + { + V TW, TY, T11, T2Q, T5X, T13; + TW = LD(&(x[WS(rs, 62)]), ms, &(x[0])); + TY = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + T11 = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T2Q = VADD(T2N, T2P); + T5X = VSUB(T2N, T2P); + T13 = LD(&(x[WS(rs, 46)]), ms, &(x[0])); + { + V T12, T5Z, T6a, T49, T14, T18, T1a; + { + V T17, T19, TX, TZ; + T17 = LD(&(x[WS(rs, 54)]), ms, &(x[0])); + T19 = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + TX = BYTW(&(W[TWVL * 122]), TW); + TZ = BYTW(&(W[TWVL * 58]), TY); + T12 = BYTW(&(W[TWVL * 26]), T11); + T5Z = VADD(T5X, T5Y); + T6a = VSUB(T5Y, T5X); + T2R = VSUB(T2L, T2Q); + T49 = VADD(T2Q, T2L); + T14 = BYTW(&(W[TWVL * 90]), T13); + T18 = BYTW(&(W[TWVL * 106]), T17); + T5q = VSUB(TX, TZ); + T10 = VADD(TX, TZ); + T1a = BYTW(&(W[TWVL * 42]), T19); + } + T6b = VFMA(LDK(KP707106781), T6a, T69); + T7v = VFNMS(LDK(KP707106781), T6a, T69); + T60 = VFMA(LDK(KP707106781), T5Z, T5W); + T7s = VFNMS(LDK(KP707106781), T5Z, T5W); + T4a = VADD(T48, T49); + T4I = VSUB(T48, T49); + T5v = VSUB(T14, T12); + T15 = VADD(T12, T14); + T1b = VADD(T18, T1a); + T5s = VSUB(T18, T1a); + } + T1c = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T1e = LD(&(x[WS(rs, 38)]), ms, &(x[0])); + } + } + } + { + V Th, T59, Tf, Tv, T5d, Tj, Tm, To; + { + V T5h, TQ, T5m, T5i, TO, TS, TJ, T4h, TD, TI; + { + V T4k, T16, TB, T1d, T1f, TE, TG, TA, Tz, TK, TM, TC; + Tz = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T4k = VADD(T10, T15); + T16 = VSUB(T10, T15); + TB = LD(&(x[WS(rs, 34)]), ms, &(x[0])); + T1d = BYTW(&(W[TWVL * 10]), T1c); + T1f = BYTW(&(W[TWVL * 74]), T1e); + TE = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + TG = LD(&(x[WS(rs, 50)]), ms, &(x[0])); + TA = BYTW(&(W[TWVL * 2]), Tz); + TK = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TM = LD(&(x[WS(rs, 42)]), ms, &(x[0])); + TC = BYTW(&(W[TWVL * 66]), TB); + { + V T1g, T5r, TF, TH, TL, TN, TP; + TP = LD(&(x[WS(rs, 58)]), ms, &(x[0])); + T1g = VADD(T1d, T1f); + T5r = VSUB(T1d, T1f); + TF = BYTW(&(W[TWVL * 34]), TE); + TH = BYTW(&(W[TWVL * 98]), TG); + TL = BYTW(&(W[TWVL * 18]), TK); + TN = BYTW(&(W[TWVL * 82]), TM); + T5h = VSUB(TA, TC); + TD = VADD(TA, TC); + TQ = BYTW(&(W[TWVL * 114]), TP); + { + V T5w, T5t, T4l, T1h, TR; + T5w = VSUB(T5s, T5r); + T5t = VADD(T5r, T5s); + T4l = VADD(T1g, T1b); + T1h = VSUB(T1b, T1g); + T5m = VSUB(TF, TH); + TI = VADD(TF, TH); + T5i = VSUB(TL, TN); + TO = VADD(TL, TN); + TR = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + T5u = VFMA(LDK(KP707106781), T5t, T5q); + T7h = VFNMS(LDK(KP707106781), T5t, T5q); + T5x = VFMA(LDK(KP707106781), T5w, T5v); + T7g = VFNMS(LDK(KP707106781), T5w, T5v); + T1i = VFNMS(LDK(KP414213562), T1h, T16); + T3b = VFMA(LDK(KP414213562), T16, T1h); + T4m = VADD(T4k, T4l); + T4C = VSUB(T4k, T4l); + TS = BYTW(&(W[TWVL * 50]), TR); + } + } + } + TJ = VSUB(TD, TI); + T4h = VADD(TD, TI); + { + V Tb, Td, Tr, T5j, TT, Tt, Tg; + Tb = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Td = LD(&(x[WS(rs, 36)]), ms, &(x[0])); + Tr = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + T5j = VSUB(TQ, TS); + TT = VADD(TQ, TS); + Tt = LD(&(x[WS(rs, 44)]), ms, &(x[0])); + Tg = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + { + V Ti, Tc, Te, Ts; + Ti = LD(&(x[WS(rs, 52)]), ms, &(x[0])); + Tc = BYTW(&(W[TWVL * 6]), Tb); + Te = BYTW(&(W[TWVL * 70]), Td); + Ts = BYTW(&(W[TWVL * 22]), Tr); + { + V T5k, T5n, TU, T4i, Tu; + T5k = VADD(T5i, T5j); + T5n = VSUB(T5i, T5j); + TU = VSUB(TO, TT); + T4i = VADD(TO, TT); + Tu = BYTW(&(W[TWVL * 86]), Tt); + Th = BYTW(&(W[TWVL * 38]), Tg); + T59 = VSUB(Tc, Te); + Tf = VADD(Tc, Te); + T7e = VFNMS(LDK(KP707106781), T5k, T5h); + T5l = VFMA(LDK(KP707106781), T5k, T5h); + T7d = VFNMS(LDK(KP707106781), T5n, T5m); + T5o = VFMA(LDK(KP707106781), T5n, T5m); + T3a = VFMA(LDK(KP414213562), TJ, TU); + TV = VFNMS(LDK(KP414213562), TU, TJ); + T4B = VSUB(T4h, T4i); + T4j = VADD(T4h, T4i); + Tv = VADD(Ts, Tu); + T5d = VSUB(Tu, Ts); + Tj = BYTW(&(W[TWVL * 102]), Ti); + } + } + Tm = LD(&(x[WS(rs, 60)]), ms, &(x[0])); + To = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + } + } + { + V T5b, T6m, Tl, T1A, T5G, T1Q, T5K, T1C, T1D, T5e, T6n, Tw, T1H, T1J; + { + V T1w, T1y, T1M, T1O, Tq, T5c, T1B; + T1w = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T1y = LD(&(x[WS(rs, 37)]), ms, &(x[WS(rs, 1)])); + T1M = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T1O = LD(&(x[WS(rs, 45)]), ms, &(x[WS(rs, 1)])); + T1B = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + { + V Tk, T5a, Tn, Tp; + Tk = VADD(Th, Tj); + T5a = VSUB(Th, Tj); + Tn = BYTW(&(W[TWVL * 118]), Tm); + Tp = BYTW(&(W[TWVL * 54]), To); + { + V T1x, T1z, T1N, T1P; + T1x = BYTW(&(W[TWVL * 8]), T1w); + T1z = BYTW(&(W[TWVL * 72]), T1y); + T1N = BYTW(&(W[TWVL * 24]), T1M); + T1P = BYTW(&(W[TWVL * 88]), T1O); + T5b = VFNMS(LDK(KP414213562), T5a, T59); + T6m = VFMA(LDK(KP414213562), T59, T5a); + T3X = VADD(Tf, Tk); + Tl = VSUB(Tf, Tk); + Tq = VADD(Tn, Tp); + T5c = VSUB(Tn, Tp); + T1A = VADD(T1x, T1z); + T5G = VSUB(T1x, T1z); + T1Q = VADD(T1N, T1P); + T5K = VSUB(T1N, T1P); + T1C = BYTW(&(W[TWVL * 40]), T1B); + } + } + T1D = LD(&(x[WS(rs, 53)]), ms, &(x[WS(rs, 1)])); + T5e = VFNMS(LDK(KP414213562), T5d, T5c); + T6n = VFMA(LDK(KP414213562), T5c, T5d); + T3Y = VADD(Tq, Tv); + Tw = VSUB(Tq, Tv); + T1H = LD(&(x[WS(rs, 61)]), ms, &(x[WS(rs, 1)])); + T1J = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + } + { + V T1I, T1K, T1F, T5H, T2k, T2l, T2z, T2B, T2j, T1E; + T2j = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1E = BYTW(&(W[TWVL * 104]), T1D); + T6o = VSUB(T6m, T6n); + T7b = VADD(T6m, T6n); + T5f = VADD(T5b, T5e); + T7C = VSUB(T5b, T5e); + Tx = VADD(Tl, Tw); + T38 = VSUB(Tl, Tw); + T1I = BYTW(&(W[TWVL * 120]), T1H); + T1K = BYTW(&(W[TWVL * 56]), T1J); + T1F = VADD(T1C, T1E); + T5H = VSUB(T1C, T1E); + T2k = BYTW(&(W[TWVL * 4]), T2j); + T2l = LD(&(x[WS(rs, 35)]), ms, &(x[WS(rs, 1)])); + T2z = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T2B = LD(&(x[WS(rs, 43)]), ms, &(x[WS(rs, 1)])); + { + V T5I, T5R, T44, T1G, T2m, T2A, T2C, T5S, T5L, T1R, T45, T2o, T5J, T1L; + T2o = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T5J = VSUB(T1I, T1K); + T1L = VADD(T1I, T1K); + T5I = VFNMS(LDK(KP414213562), T5H, T5G); + T5R = VFMA(LDK(KP414213562), T5G, T5H); + T44 = VADD(T1A, T1F); + T1G = VSUB(T1A, T1F); + T2m = BYTW(&(W[TWVL * 68]), T2l); + T2A = BYTW(&(W[TWVL * 20]), T2z); + T2C = BYTW(&(W[TWVL * 84]), T2B); + T5S = VFNMS(LDK(KP414213562), T5J, T5K); + T5L = VFMA(LDK(KP414213562), T5K, T5J); + T1R = VSUB(T1L, T1Q); + T45 = VADD(T1L, T1Q); + T2p = BYTW(&(W[TWVL * 36]), T2o); + T61 = VSUB(T2k, T2m); + T2n = VADD(T2k, T2m); + T65 = VSUB(T2C, T2A); + T2D = VADD(T2A, T2C); + T7p = VSUB(T5I, T5L); + T5M = VADD(T5I, T5L); + T7m = VSUB(T5R, T5S); + T5T = VADD(T5R, T5S); + T4G = VSUB(T44, T45); + T46 = VADD(T44, T45); + T25 = VSUB(T1G, T1R); + T1S = VADD(T1G, T1R); + T2q = LD(&(x[WS(rs, 51)]), ms, &(x[WS(rs, 1)])); + } + T2u = LD(&(x[WS(rs, 59)]), ms, &(x[WS(rs, 1)])); + T2w = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + } + } + } + } + { + V T67, T7w, T6e, T7t, T3s, T3E, T39, T3D, T1k, T3k, T3t, T3c, T1T, T3v, T3w; + V T26, T2G, T3y, T3z, T2T; + { + V T4A, T4N, T47, T4v, T2r, T2v, T2x, T4s, T40, T3W, T3Z; + T4A = VSUB(T3U, T3V); + T3W = VADD(T3U, T3V); + T3Z = VADD(T3X, T3Y); + T4N = VSUB(T3X, T3Y); + T47 = VSUB(T43, T46); + T4v = VADD(T43, T46); + T2r = BYTW(&(W[TWVL * 100]), T2q); + T2v = BYTW(&(W[TWVL * 116]), T2u); + T2x = BYTW(&(W[TWVL * 52]), T2w); + T4s = VADD(T3W, T3Z); + T40 = VSUB(T3W, T3Z); + { + V T4O, T4n, T4Q, T4H, T4E, T4W, T4u, T4y, T4d, T4J, T2F, T2S; + { + V T6c, T63, T2t, T4b, T6d, T66, T2E, T4c; + { + V T4D, T62, T2s, T64, T2y, T4t; + T4O = VSUB(T4B, T4C); + T4D = VADD(T4B, T4C); + T62 = VSUB(T2r, T2p); + T2s = VADD(T2p, T2r); + T64 = VSUB(T2v, T2x); + T2y = VADD(T2v, T2x); + T4t = VADD(T4j, T4m); + T4n = VSUB(T4j, T4m); + T4Q = VFMA(LDK(KP414213562), T4F, T4G); + T4H = VFNMS(LDK(KP414213562), T4G, T4F); + T4E = VFMA(LDK(KP707106781), T4D, T4A); + T4W = VFNMS(LDK(KP707106781), T4D, T4A); + T6c = VFNMS(LDK(KP414213562), T61, T62); + T63 = VFMA(LDK(KP414213562), T62, T61); + T2t = VSUB(T2n, T2s); + T4b = VADD(T2n, T2s); + T6d = VFMA(LDK(KP414213562), T64, T65); + T66 = VFNMS(LDK(KP414213562), T65, T64); + T2E = VSUB(T2y, T2D); + T4c = VADD(T2y, T2D); + T4u = VSUB(T4s, T4t); + T4y = VADD(T4s, T4t); + } + T67 = VADD(T63, T66); + T7w = VSUB(T66, T63); + T6e = VADD(T6c, T6d); + T7t = VSUB(T6d, T6c); + T4d = VADD(T4b, T4c); + T4J = VSUB(T4c, T4b); + T2F = VADD(T2t, T2E); + T2S = VSUB(T2E, T2t); + } + { + V Ty, T1j, T4R, T4K; + Ty = VFMA(LDK(KP707106781), Tx, Ta); + T3s = VFNMS(LDK(KP707106781), Tx, Ta); + T3E = VSUB(TV, T1i); + T1j = VADD(TV, T1i); + T39 = VFMA(LDK(KP707106781), T38, T37); + T3D = VFNMS(LDK(KP707106781), T38, T37); + T4R = VFMA(LDK(KP414213562), T4I, T4J); + T4K = VFNMS(LDK(KP414213562), T4J, T4I); + { + V T4w, T4e, T4P, T4Z; + T4w = VADD(T4a, T4d); + T4e = VSUB(T4a, T4d); + T4P = VFMA(LDK(KP707106781), T4O, T4N); + T4Z = VFNMS(LDK(KP707106781), T4O, T4N); + T1k = VFMA(LDK(KP923879532), T1j, Ty); + T3k = VFNMS(LDK(KP923879532), T1j, Ty); + { + V T4L, T50, T4S, T4X; + T4L = VADD(T4H, T4K); + T50 = VSUB(T4H, T4K); + T4S = VSUB(T4Q, T4R); + T4X = VADD(T4Q, T4R); + { + V T4f, T4o, T4x, T4z; + T4f = VADD(T47, T4e); + T4o = VSUB(T47, T4e); + T4x = VSUB(T4v, T4w); + T4z = VADD(T4v, T4w); + { + V T53, T51, T4M, T4U; + T53 = VFNMS(LDK(KP923879532), T50, T4Z); + T51 = VFMA(LDK(KP923879532), T50, T4Z); + T4M = VFNMS(LDK(KP923879532), T4L, T4E); + T4U = VFMA(LDK(KP923879532), T4L, T4E); + { + V T52, T4Y, T4T, T4V; + T52 = VFMA(LDK(KP923879532), T4X, T4W); + T4Y = VFNMS(LDK(KP923879532), T4X, T4W); + T4T = VFNMS(LDK(KP923879532), T4S, T4P); + T4V = VFMA(LDK(KP923879532), T4S, T4P); + { + V T4p, T4r, T4g, T4q; + T4p = VFNMS(LDK(KP707106781), T4o, T4n); + T4r = VFMA(LDK(KP707106781), T4o, T4n); + T4g = VFNMS(LDK(KP707106781), T4f, T40); + T4q = VFMA(LDK(KP707106781), T4f, T40); + ST(&(x[0]), VADD(T4y, T4z), ms, &(x[0])); + ST(&(x[WS(rs, 32)]), VSUB(T4y, T4z), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VFMAI(T4x, T4u), ms, &(x[0])); + ST(&(x[WS(rs, 48)]), VFNMSI(T4x, T4u), ms, &(x[0])); + ST(&(x[WS(rs, 44)]), VFNMSI(T51, T4Y), ms, &(x[0])); + ST(&(x[WS(rs, 20)]), VFMAI(T51, T4Y), ms, &(x[0])); + ST(&(x[WS(rs, 52)]), VFMAI(T53, T52), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFNMSI(T53, T52), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(T4V, T4U), ms, &(x[0])); + ST(&(x[WS(rs, 60)]), VFNMSI(T4V, T4U), ms, &(x[0])); + ST(&(x[WS(rs, 36)]), VFMAI(T4T, T4M), ms, &(x[0])); + ST(&(x[WS(rs, 28)]), VFNMSI(T4T, T4M), ms, &(x[0])); + ST(&(x[WS(rs, 56)]), VFNMSI(T4r, T4q), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFMAI(T4r, T4q), ms, &(x[0])); + ST(&(x[WS(rs, 40)]), VFMAI(T4p, T4g), ms, &(x[0])); + ST(&(x[WS(rs, 24)]), VFNMSI(T4p, T4g), ms, &(x[0])); + T3t = VADD(T3a, T3b); + T3c = VSUB(T3a, T3b); + } + } + } + } + } + } + T1T = VFMA(LDK(KP707106781), T1S, T1v); + T3v = VFNMS(LDK(KP707106781), T1S, T1v); + T3w = VFNMS(LDK(KP707106781), T25, T24); + T26 = VFMA(LDK(KP707106781), T25, T24); + T2G = VFMA(LDK(KP707106781), T2F, T2i); + T3y = VFNMS(LDK(KP707106781), T2F, T2i); + T3z = VFNMS(LDK(KP707106781), T2S, T2R); + T2T = VFMA(LDK(KP707106781), T2S, T2R); + } + } + } + { + V T3u, T3M, T3F, T3P, T3x, T3G, T3q, T3m, T3h, T3j, T3r, T3p, T2W, T3i; + { + V T3d, T3n, T27, T3e, T2U, T3f; + T3d = VFMA(LDK(KP923879532), T3c, T39); + T3n = VFNMS(LDK(KP923879532), T3c, T39); + T27 = VFNMS(LDK(KP198912367), T26, T1T); + T3e = VFMA(LDK(KP198912367), T1T, T26); + T2U = VFNMS(LDK(KP198912367), T2T, T2G); + T3f = VFMA(LDK(KP198912367), T2G, T2T); + T3u = VFMA(LDK(KP923879532), T3t, T3s); + T3M = VFNMS(LDK(KP923879532), T3t, T3s); + { + V T3g, T3l, T2V, T3o; + T3g = VSUB(T3e, T3f); + T3l = VADD(T3e, T3f); + T2V = VADD(T27, T2U); + T3o = VSUB(T27, T2U); + T3F = VFNMS(LDK(KP923879532), T3E, T3D); + T3P = VFMA(LDK(KP923879532), T3E, T3D); + T3x = VFMA(LDK(KP668178637), T3w, T3v); + T3G = VFNMS(LDK(KP668178637), T3v, T3w); + T3q = VFMA(LDK(KP980785280), T3l, T3k); + T3m = VFNMS(LDK(KP980785280), T3l, T3k); + T3h = VFNMS(LDK(KP980785280), T3g, T3d); + T3j = VFMA(LDK(KP980785280), T3g, T3d); + T3r = VFNMS(LDK(KP980785280), T3o, T3n); + T3p = VFMA(LDK(KP980785280), T3o, T3n); + T2W = VFNMS(LDK(KP980785280), T2V, T1k); + T3i = VFMA(LDK(KP980785280), T2V, T1k); + } + } + { + V T7n, T7Z, T8j, T89, T7k, T7O, T8g, T7Y, T7H, T7R, T80, T7q, T7u, T82, T83; + V T7x; + { + V T7c, T7W, T7D, T87, T7f, T7E, T3A, T3H, T7F, T7i; + T7c = VFNMS(LDK(KP923879532), T7b, T7a); + T7W = VFMA(LDK(KP923879532), T7b, T7a); + T7D = VFMA(LDK(KP923879532), T7C, T7B); + T87 = VFNMS(LDK(KP923879532), T7C, T7B); + T7f = VFNMS(LDK(KP668178637), T7e, T7d); + T7E = VFMA(LDK(KP668178637), T7d, T7e); + ST(&(x[WS(rs, 46)]), VFNMSI(T3p, T3m), ms, &(x[0])); + ST(&(x[WS(rs, 18)]), VFMAI(T3p, T3m), ms, &(x[0])); + ST(&(x[WS(rs, 50)]), VFMAI(T3r, T3q), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFNMSI(T3r, T3q), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(T3j, T3i), ms, &(x[0])); + ST(&(x[WS(rs, 62)]), VFNMSI(T3j, T3i), ms, &(x[0])); + ST(&(x[WS(rs, 34)]), VFMAI(T3h, T2W), ms, &(x[0])); + ST(&(x[WS(rs, 30)]), VFNMSI(T3h, T2W), ms, &(x[0])); + T3A = VFMA(LDK(KP668178637), T3z, T3y); + T3H = VFNMS(LDK(KP668178637), T3y, T3z); + T7F = VFMA(LDK(KP668178637), T7g, T7h); + T7i = VFNMS(LDK(KP668178637), T7h, T7g); + T7n = VFNMS(LDK(KP923879532), T7m, T7l); + T7Z = VFMA(LDK(KP923879532), T7m, T7l); + { + V T3I, T3N, T3B, T3Q; + T3I = VSUB(T3G, T3H); + T3N = VADD(T3G, T3H); + T3B = VADD(T3x, T3A); + T3Q = VSUB(T3x, T3A); + { + V T7j, T88, T7G, T7X; + T7j = VADD(T7f, T7i); + T88 = VSUB(T7f, T7i); + T7G = VSUB(T7E, T7F); + T7X = VADD(T7E, T7F); + { + V T3S, T3O, T3J, T3L; + T3S = VFNMS(LDK(KP831469612), T3N, T3M); + T3O = VFMA(LDK(KP831469612), T3N, T3M); + T3J = VFNMS(LDK(KP831469612), T3I, T3F); + T3L = VFMA(LDK(KP831469612), T3I, T3F); + { + V T3T, T3R, T3C, T3K; + T3T = VFMA(LDK(KP831469612), T3Q, T3P); + T3R = VFNMS(LDK(KP831469612), T3Q, T3P); + T3C = VFNMS(LDK(KP831469612), T3B, T3u); + T3K = VFMA(LDK(KP831469612), T3B, T3u); + T8j = VFNMS(LDK(KP831469612), T88, T87); + T89 = VFMA(LDK(KP831469612), T88, T87); + T7k = VFNMS(LDK(KP831469612), T7j, T7c); + T7O = VFMA(LDK(KP831469612), T7j, T7c); + T8g = VFNMS(LDK(KP831469612), T7X, T7W); + T7Y = VFMA(LDK(KP831469612), T7X, T7W); + T7H = VFMA(LDK(KP831469612), T7G, T7D); + T7R = VFNMS(LDK(KP831469612), T7G, T7D); + ST(&(x[WS(rs, 42)]), VFMAI(T3R, T3O), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VFNMSI(T3R, T3O), ms, &(x[0])); + ST(&(x[WS(rs, 54)]), VFNMSI(T3T, T3S), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFMAI(T3T, T3S), ms, &(x[0])); + ST(&(x[WS(rs, 58)]), VFMAI(T3L, T3K), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(T3L, T3K), ms, &(x[0])); + ST(&(x[WS(rs, 26)]), VFMAI(T3J, T3C), ms, &(x[0])); + ST(&(x[WS(rs, 38)]), VFNMSI(T3J, T3C), ms, &(x[0])); + T80 = VFNMS(LDK(KP923879532), T7p, T7o); + T7q = VFMA(LDK(KP923879532), T7p, T7o); + } + } + } + } + T7u = VFNMS(LDK(KP923879532), T7t, T7s); + T82 = VFMA(LDK(KP923879532), T7t, T7s); + T83 = VFNMS(LDK(KP923879532), T7w, T7v); + T7x = VFMA(LDK(KP923879532), T7w, T7v); + } + { + V T5g, T6I, T6p, T6T, T5p, T6q, T6r, T5y; + T5g = VFMA(LDK(KP923879532), T5f, T58); + T6I = VFNMS(LDK(KP923879532), T5f, T58); + { + V T7r, T7I, T7y, T7J; + T7r = VFNMS(LDK(KP534511135), T7q, T7n); + T7I = VFMA(LDK(KP534511135), T7n, T7q); + T7y = VFNMS(LDK(KP534511135), T7x, T7u); + T7J = VFMA(LDK(KP534511135), T7u, T7x); + { + V T81, T8a, T84, T8b; + T81 = VFMA(LDK(KP303346683), T80, T7Z); + T8a = VFNMS(LDK(KP303346683), T7Z, T80); + T84 = VFMA(LDK(KP303346683), T83, T82); + T8b = VFNMS(LDK(KP303346683), T82, T83); + T6p = VFMA(LDK(KP923879532), T6o, T6l); + T6T = VFNMS(LDK(KP923879532), T6o, T6l); + T5p = VFNMS(LDK(KP198912367), T5o, T5l); + T6q = VFMA(LDK(KP198912367), T5l, T5o); + { + V T7K, T7P, T7z, T7S; + T7K = VSUB(T7I, T7J); + T7P = VADD(T7I, T7J); + T7z = VADD(T7r, T7y); + T7S = VSUB(T7r, T7y); + { + V T8c, T8h, T85, T8k; + T8c = VSUB(T8a, T8b); + T8h = VADD(T8a, T8b); + T85 = VADD(T81, T84); + T8k = VSUB(T81, T84); + { + V T7Q, T7U, T7L, T7N; + T7Q = VFNMS(LDK(KP881921264), T7P, T7O); + T7U = VFMA(LDK(KP881921264), T7P, T7O); + T7L = VFNMS(LDK(KP881921264), T7K, T7H); + T7N = VFMA(LDK(KP881921264), T7K, T7H); + { + V T7T, T7V, T7A, T7M; + T7T = VFMA(LDK(KP881921264), T7S, T7R); + T7V = VFNMS(LDK(KP881921264), T7S, T7R); + T7A = VFNMS(LDK(KP881921264), T7z, T7k); + T7M = VFMA(LDK(KP881921264), T7z, T7k); + { + V T8i, T8m, T8d, T8f; + T8i = VFMA(LDK(KP956940335), T8h, T8g); + T8m = VFNMS(LDK(KP956940335), T8h, T8g); + T8d = VFNMS(LDK(KP956940335), T8c, T89); + T8f = VFMA(LDK(KP956940335), T8c, T89); + { + V T8l, T8n, T86, T8e; + T8l = VFNMS(LDK(KP956940335), T8k, T8j); + T8n = VFMA(LDK(KP956940335), T8k, T8j); + T86 = VFNMS(LDK(KP956940335), T85, T7Y); + T8e = VFMA(LDK(KP956940335), T85, T7Y); + ST(&(x[WS(rs, 53)]), VFMAI(T7V, T7U), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFNMSI(T7V, T7U), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 43)]), VFNMSI(T7T, T7Q), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 21)]), VFMAI(T7T, T7Q), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFMAI(T7N, T7M), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 59)]), VFNMSI(T7N, T7M), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 37)]), VFMAI(T7L, T7A), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 27)]), VFNMSI(T7L, T7A), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 51)]), VFNMSI(T8n, T8m), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFMAI(T8n, T8m), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 45)]), VFMAI(T8l, T8i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VFNMSI(T8l, T8i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 61)]), VFMAI(T8f, T8e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(T8f, T8e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 29)]), VFMAI(T8d, T86), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 35)]), VFNMSI(T8d, T86), ms, &(x[WS(rs, 1)])); + T6r = VFMA(LDK(KP198912367), T5u, T5x); + T5y = VFNMS(LDK(KP198912367), T5x, T5u); + } + } + } + } + } + } + } + } + { + V T5N, T5U, T68, T5z, T6U, T6f; + T5N = VFMA(LDK(KP923879532), T5M, T5F); + T6L = VFNMS(LDK(KP923879532), T5M, T5F); + T6M = VFNMS(LDK(KP923879532), T5T, T5Q); + T5U = VFMA(LDK(KP923879532), T5T, T5Q); + T68 = VFMA(LDK(KP923879532), T67, T60); + T6O = VFNMS(LDK(KP923879532), T67, T60); + T5z = VADD(T5p, T5y); + T6U = VSUB(T5p, T5y); + T6P = VFNMS(LDK(KP923879532), T6e, T6b); + T6f = VFMA(LDK(KP923879532), T6e, T6b); + { + V T5V, T6u, T6g, T6v, T6s, T6J; + T6s = VSUB(T6q, T6r); + T6J = VADD(T6q, T6r); + T5V = VFNMS(LDK(KP098491403), T5U, T5N); + T6u = VFMA(LDK(KP098491403), T5N, T5U); + T75 = VFMA(LDK(KP980785280), T6U, T6T); + T6V = VFNMS(LDK(KP980785280), T6U, T6T); + T5A = VFMA(LDK(KP980785280), T5z, T5g); + T6A = VFNMS(LDK(KP980785280), T5z, T5g); + T6g = VFNMS(LDK(KP098491403), T6f, T68); + T6v = VFMA(LDK(KP098491403), T68, T6f); + T72 = VFNMS(LDK(KP980785280), T6J, T6I); + T6K = VFMA(LDK(KP980785280), T6J, T6I); + T6t = VFMA(LDK(KP980785280), T6s, T6p); + T6D = VFNMS(LDK(KP980785280), T6s, T6p); + T6w = VSUB(T6u, T6v); + T6B = VADD(T6u, T6v); + T6h = VADD(T5V, T6g); + T6E = VSUB(T5V, T6g); + } + } + } + } + } + } + } + { + V T6W, T6N, T6G, T6C, T6z, T6x, T6H, T6F, T6y, T6i, T6X, T6Q; + T6W = VFNMS(LDK(KP820678790), T6L, T6M); + T6N = VFMA(LDK(KP820678790), T6M, T6L); + T6G = VFMA(LDK(KP995184726), T6B, T6A); + T6C = VFNMS(LDK(KP995184726), T6B, T6A); + T6z = VFMA(LDK(KP995184726), T6w, T6t); + T6x = VFNMS(LDK(KP995184726), T6w, T6t); + T6H = VFNMS(LDK(KP995184726), T6E, T6D); + T6F = VFMA(LDK(KP995184726), T6E, T6D); + T6y = VFMA(LDK(KP995184726), T6h, T5A); + T6i = VFNMS(LDK(KP995184726), T6h, T5A); + T6X = VFNMS(LDK(KP820678790), T6O, T6P); + T6Q = VFMA(LDK(KP820678790), T6P, T6O); + { + V T73, T6Y, T76, T6R; + ST(&(x[WS(rs, 49)]), VFMAI(T6H, T6G), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFNMSI(T6H, T6G), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 47)]), VFNMSI(T6F, T6C), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 17)]), VFMAI(T6F, T6C), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(T6z, T6y), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 63)]), VFNMSI(T6z, T6y), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 33)]), VFMAI(T6x, T6i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 31)]), VFNMSI(T6x, T6i), ms, &(x[WS(rs, 1)])); + T73 = VADD(T6W, T6X); + T6Y = VSUB(T6W, T6X); + T76 = VSUB(T6N, T6Q); + T6R = VADD(T6N, T6Q); + { + V T78, T74, T71, T6Z, T79, T77, T70, T6S; + T78 = VFNMS(LDK(KP773010453), T73, T72); + T74 = VFMA(LDK(KP773010453), T73, T72); + T71 = VFMA(LDK(KP773010453), T6Y, T6V); + T6Z = VFNMS(LDK(KP773010453), T6Y, T6V); + T79 = VFMA(LDK(KP773010453), T76, T75); + T77 = VFNMS(LDK(KP773010453), T76, T75); + T70 = VFMA(LDK(KP773010453), T6R, T6K); + T6S = VFNMS(LDK(KP773010453), T6R, T6K); + ST(&(x[WS(rs, 55)]), VFNMSI(T79, T78), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFMAI(T79, T78), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 41)]), VFMAI(T77, T74), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 23)]), VFNMSI(T77, T74), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 57)]), VFMAI(T71, T70), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(T71, T70), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 25)]), VFMAI(T6Z, T6S), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 39)]), VFNMSI(T6Z, T6S), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + VTW(0, 32), + VTW(0, 33), + VTW(0, 34), + VTW(0, 35), + VTW(0, 36), + VTW(0, 37), + VTW(0, 38), + VTW(0, 39), + VTW(0, 40), + VTW(0, 41), + VTW(0, 42), + VTW(0, 43), + VTW(0, 44), + VTW(0, 45), + VTW(0, 46), + VTW(0, 47), + VTW(0, 48), + VTW(0, 49), + VTW(0, 50), + VTW(0, 51), + VTW(0, 52), + VTW(0, 53), + VTW(0, 54), + VTW(0, 55), + VTW(0, 56), + VTW(0, 57), + VTW(0, 58), + VTW(0, 59), + VTW(0, 60), + VTW(0, 61), + VTW(0, 62), + VTW(0, 63), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 64, XSIMD_STRING("t2bv_64"), twinstr, &GENUS, {261, 126, 258, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_64) (planner *p) { + X(kdft_dit_register) (p, t2bv_64, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name t2bv_64 -include t2b.h -sign 1 */ + +/* + * This function contains 519 FP additions, 250 FP multiplications, + * (or, 467 additions, 198 multiplications, 52 fused multiply/add), + * 107 stack variables, 15 constants, and 128 memory accesses + */ +#include "t2b.h" + +static void t2bv_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP290284677, +0.290284677254462367636192375817395274691476278); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP471396736, +0.471396736825997648556387625905254377657460319); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP634393284, +0.634393284163645498215171613225493370675687095); + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP098017140, +0.098017140329560601994195563888641845861136673); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 126)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 126), MAKE_VOLATILE_STRIDE(64, rs)) { + V Tg, T4B, T6v, T7G, T3r, T4w, T5q, T7F, T5Y, T62, T28, T4d, T2g, T4a, T7g; + V T7Y, T6f, T6j, T2Z, T4k, T37, T4h, T7n, T81, T7w, T7x, T7y, T5M, T6q, T1k; + V T4s, T1r, T4t, T7t, T7u, T7v, T5F, T6p, TV, T4p, T12, T4q, T7A, T7B, TD; + V T4x, T3k, T4C, T5x, T6s, T1R, T4b, T7j, T7Z, T2j, T4e, T5V, T63, T2I, T4i; + V T7q, T82, T3a, T4l, T6c, T6k; + { + V T1, T3, T3p, T3n, Tb, Td, Te, T6, T8, T9, T2, T3o, T3m; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 32)]), ms, &(x[0])); + T3 = BYTW(&(W[TWVL * 62]), T2); + T3o = LD(&(x[WS(rs, 48)]), ms, &(x[0])); + T3p = BYTW(&(W[TWVL * 94]), T3o); + T3m = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T3n = BYTW(&(W[TWVL * 30]), T3m); + { + V Ta, Tc, T5, T7; + Ta = LD(&(x[WS(rs, 56)]), ms, &(x[0])); + Tb = BYTW(&(W[TWVL * 110]), Ta); + Tc = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + Td = BYTW(&(W[TWVL * 46]), Tc); + Te = VSUB(Tb, Td); + T5 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T6 = BYTW(&(W[TWVL * 14]), T5); + T7 = LD(&(x[WS(rs, 40)]), ms, &(x[0])); + T8 = BYTW(&(W[TWVL * 78]), T7); + T9 = VSUB(T6, T8); + } + { + V T4, Tf, T6t, T6u; + T4 = VSUB(T1, T3); + Tf = VMUL(LDK(KP707106781), VADD(T9, Te)); + Tg = VSUB(T4, Tf); + T4B = VADD(T4, Tf); + T6t = VADD(T6, T8); + T6u = VADD(Tb, Td); + T6v = VSUB(T6t, T6u); + T7G = VADD(T6t, T6u); + } + { + V T3l, T3q, T5o, T5p; + T3l = VMUL(LDK(KP707106781), VSUB(T9, Te)); + T3q = VSUB(T3n, T3p); + T3r = VSUB(T3l, T3q); + T4w = VADD(T3q, T3l); + T5o = VADD(T1, T3); + T5p = VADD(T3n, T3p); + T5q = VSUB(T5o, T5p); + T7F = VADD(T5o, T5p); + } + } + { + V T24, T26, T61, T2b, T2d, T60, T1W, T5W, T21, T5X, T22, T27; + { + V T23, T25, T2a, T2c; + T23 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T24 = BYTW(&(W[TWVL * 32]), T23); + T25 = LD(&(x[WS(rs, 49)]), ms, &(x[WS(rs, 1)])); + T26 = BYTW(&(W[TWVL * 96]), T25); + T61 = VADD(T24, T26); + T2a = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2b = BYTW(&(W[0]), T2a); + T2c = LD(&(x[WS(rs, 33)]), ms, &(x[WS(rs, 1)])); + T2d = BYTW(&(W[TWVL * 64]), T2c); + T60 = VADD(T2b, T2d); + } + { + V T1T, T1V, T1S, T1U; + T1S = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T1T = BYTW(&(W[TWVL * 16]), T1S); + T1U = LD(&(x[WS(rs, 41)]), ms, &(x[WS(rs, 1)])); + T1V = BYTW(&(W[TWVL * 80]), T1U); + T1W = VSUB(T1T, T1V); + T5W = VADD(T1T, T1V); + } + { + V T1Y, T20, T1X, T1Z; + T1X = LD(&(x[WS(rs, 57)]), ms, &(x[WS(rs, 1)])); + T1Y = BYTW(&(W[TWVL * 112]), T1X); + T1Z = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + T20 = BYTW(&(W[TWVL * 48]), T1Z); + T21 = VSUB(T1Y, T20); + T5X = VADD(T1Y, T20); + } + T5Y = VSUB(T5W, T5X); + T62 = VSUB(T60, T61); + T22 = VMUL(LDK(KP707106781), VSUB(T1W, T21)); + T27 = VSUB(T24, T26); + T28 = VSUB(T22, T27); + T4d = VADD(T27, T22); + { + V T2e, T2f, T7e, T7f; + T2e = VSUB(T2b, T2d); + T2f = VMUL(LDK(KP707106781), VADD(T1W, T21)); + T2g = VSUB(T2e, T2f); + T4a = VADD(T2e, T2f); + T7e = VADD(T60, T61); + T7f = VADD(T5W, T5X); + T7g = VSUB(T7e, T7f); + T7Y = VADD(T7e, T7f); + } + } + { + V T2V, T2X, T6i, T32, T34, T6h, T2N, T6d, T2S, T6e, T2T, T2Y; + { + V T2U, T2W, T31, T33; + T2U = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T2V = BYTW(&(W[TWVL * 28]), T2U); + T2W = LD(&(x[WS(rs, 47)]), ms, &(x[WS(rs, 1)])); + T2X = BYTW(&(W[TWVL * 92]), T2W); + T6i = VADD(T2V, T2X); + T31 = LD(&(x[WS(rs, 63)]), ms, &(x[WS(rs, 1)])); + T32 = BYTW(&(W[TWVL * 124]), T31); + T33 = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T34 = BYTW(&(W[TWVL * 60]), T33); + T6h = VADD(T32, T34); + } + { + V T2K, T2M, T2J, T2L; + T2J = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T2K = BYTW(&(W[TWVL * 12]), T2J); + T2L = LD(&(x[WS(rs, 39)]), ms, &(x[WS(rs, 1)])); + T2M = BYTW(&(W[TWVL * 76]), T2L); + T2N = VSUB(T2K, T2M); + T6d = VADD(T2K, T2M); + } + { + V T2P, T2R, T2O, T2Q; + T2O = LD(&(x[WS(rs, 55)]), ms, &(x[WS(rs, 1)])); + T2P = BYTW(&(W[TWVL * 108]), T2O); + T2Q = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T2R = BYTW(&(W[TWVL * 44]), T2Q); + T2S = VSUB(T2P, T2R); + T6e = VADD(T2P, T2R); + } + T6f = VSUB(T6d, T6e); + T6j = VSUB(T6h, T6i); + T2T = VMUL(LDK(KP707106781), VSUB(T2N, T2S)); + T2Y = VSUB(T2V, T2X); + T2Z = VSUB(T2T, T2Y); + T4k = VADD(T2Y, T2T); + { + V T35, T36, T7l, T7m; + T35 = VSUB(T32, T34); + T36 = VMUL(LDK(KP707106781), VADD(T2N, T2S)); + T37 = VSUB(T35, T36); + T4h = VADD(T35, T36); + T7l = VADD(T6h, T6i); + T7m = VADD(T6d, T6e); + T7n = VSUB(T7l, T7m); + T81 = VADD(T7l, T7m); + } + } + { + V T1g, T1i, T5K, T1m, T1o, T5J, T18, T5G, T1d, T5H, T5I, T5L; + { + V T1f, T1h, T1l, T1n; + T1f = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T1g = BYTW(&(W[TWVL * 26]), T1f); + T1h = LD(&(x[WS(rs, 46)]), ms, &(x[0])); + T1i = BYTW(&(W[TWVL * 90]), T1h); + T5K = VADD(T1g, T1i); + T1l = LD(&(x[WS(rs, 62)]), ms, &(x[0])); + T1m = BYTW(&(W[TWVL * 122]), T1l); + T1n = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + T1o = BYTW(&(W[TWVL * 58]), T1n); + T5J = VADD(T1m, T1o); + } + { + V T15, T17, T14, T16; + T14 = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T15 = BYTW(&(W[TWVL * 10]), T14); + T16 = LD(&(x[WS(rs, 38)]), ms, &(x[0])); + T17 = BYTW(&(W[TWVL * 74]), T16); + T18 = VSUB(T15, T17); + T5G = VADD(T15, T17); + } + { + V T1a, T1c, T19, T1b; + T19 = LD(&(x[WS(rs, 54)]), ms, &(x[0])); + T1a = BYTW(&(W[TWVL * 106]), T19); + T1b = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + T1c = BYTW(&(W[TWVL * 42]), T1b); + T1d = VSUB(T1a, T1c); + T5H = VADD(T1a, T1c); + } + T7w = VADD(T5J, T5K); + T7x = VADD(T5G, T5H); + T7y = VSUB(T7w, T7x); + T5I = VSUB(T5G, T5H); + T5L = VSUB(T5J, T5K); + T5M = VFNMS(LDK(KP382683432), T5L, VMUL(LDK(KP923879532), T5I)); + T6q = VFMA(LDK(KP923879532), T5L, VMUL(LDK(KP382683432), T5I)); + { + V T1e, T1j, T1p, T1q; + T1e = VMUL(LDK(KP707106781), VSUB(T18, T1d)); + T1j = VSUB(T1g, T1i); + T1k = VSUB(T1e, T1j); + T4s = VADD(T1j, T1e); + T1p = VSUB(T1m, T1o); + T1q = VMUL(LDK(KP707106781), VADD(T18, T1d)); + T1r = VSUB(T1p, T1q); + T4t = VADD(T1p, T1q); + } + } + { + V TR, TT, T5A, TX, TZ, T5z, TJ, T5C, TO, T5D, T5B, T5E; + { + V TQ, TS, TW, TY; + TQ = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + TR = BYTW(&(W[TWVL * 34]), TQ); + TS = LD(&(x[WS(rs, 50)]), ms, &(x[0])); + TT = BYTW(&(W[TWVL * 98]), TS); + T5A = VADD(TR, TT); + TW = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + TX = BYTW(&(W[TWVL * 2]), TW); + TY = LD(&(x[WS(rs, 34)]), ms, &(x[0])); + TZ = BYTW(&(W[TWVL * 66]), TY); + T5z = VADD(TX, TZ); + } + { + V TG, TI, TF, TH; + TF = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TG = BYTW(&(W[TWVL * 18]), TF); + TH = LD(&(x[WS(rs, 42)]), ms, &(x[0])); + TI = BYTW(&(W[TWVL * 82]), TH); + TJ = VSUB(TG, TI); + T5C = VADD(TG, TI); + } + { + V TL, TN, TK, TM; + TK = LD(&(x[WS(rs, 58)]), ms, &(x[0])); + TL = BYTW(&(W[TWVL * 114]), TK); + TM = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + TN = BYTW(&(W[TWVL * 50]), TM); + TO = VSUB(TL, TN); + T5D = VADD(TL, TN); + } + T7t = VADD(T5z, T5A); + T7u = VADD(T5C, T5D); + T7v = VSUB(T7t, T7u); + T5B = VSUB(T5z, T5A); + T5E = VSUB(T5C, T5D); + T5F = VFMA(LDK(KP382683432), T5B, VMUL(LDK(KP923879532), T5E)); + T6p = VFNMS(LDK(KP382683432), T5E, VMUL(LDK(KP923879532), T5B)); + { + V TP, TU, T10, T11; + TP = VMUL(LDK(KP707106781), VSUB(TJ, TO)); + TU = VSUB(TR, TT); + TV = VSUB(TP, TU); + T4p = VADD(TU, TP); + T10 = VSUB(TX, TZ); + T11 = VMUL(LDK(KP707106781), VADD(TJ, TO)); + T12 = VSUB(T10, T11); + T4q = VADD(T10, T11); + } + } + { + V Tl, T5r, TB, T5u, Tq, T5s, Tw, T5v, Tr, TC; + { + V Ti, Tk, Th, Tj; + Th = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Ti = BYTW(&(W[TWVL * 6]), Th); + Tj = LD(&(x[WS(rs, 36)]), ms, &(x[0])); + Tk = BYTW(&(W[TWVL * 70]), Tj); + Tl = VSUB(Ti, Tk); + T5r = VADD(Ti, Tk); + } + { + V Ty, TA, Tx, Tz; + Tx = LD(&(x[WS(rs, 60)]), ms, &(x[0])); + Ty = BYTW(&(W[TWVL * 118]), Tx); + Tz = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + TA = BYTW(&(W[TWVL * 54]), Tz); + TB = VSUB(Ty, TA); + T5u = VADD(Ty, TA); + } + { + V Tn, Tp, Tm, To; + Tm = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + Tn = BYTW(&(W[TWVL * 38]), Tm); + To = LD(&(x[WS(rs, 52)]), ms, &(x[0])); + Tp = BYTW(&(W[TWVL * 102]), To); + Tq = VSUB(Tn, Tp); + T5s = VADD(Tn, Tp); + } + { + V Tt, Tv, Ts, Tu; + Ts = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Tt = BYTW(&(W[TWVL * 22]), Ts); + Tu = LD(&(x[WS(rs, 44)]), ms, &(x[0])); + Tv = BYTW(&(W[TWVL * 86]), Tu); + Tw = VSUB(Tt, Tv); + T5v = VADD(Tt, Tv); + } + T7A = VADD(T5r, T5s); + T7B = VADD(T5u, T5v); + Tr = VFMA(LDK(KP382683432), Tl, VMUL(LDK(KP923879532), Tq)); + TC = VFNMS(LDK(KP382683432), TB, VMUL(LDK(KP923879532), Tw)); + TD = VSUB(Tr, TC); + T4x = VADD(Tr, TC); + { + V T3i, T3j, T5t, T5w; + T3i = VFNMS(LDK(KP382683432), Tq, VMUL(LDK(KP923879532), Tl)); + T3j = VFMA(LDK(KP923879532), TB, VMUL(LDK(KP382683432), Tw)); + T3k = VSUB(T3i, T3j); + T4C = VADD(T3i, T3j); + T5t = VSUB(T5r, T5s); + T5w = VSUB(T5u, T5v); + T5x = VMUL(LDK(KP707106781), VADD(T5t, T5w)); + T6s = VMUL(LDK(KP707106781), VSUB(T5t, T5w)); + } + } + { + V T1z, T5P, T1P, T5T, T1E, T5Q, T1K, T5S; + { + V T1w, T1y, T1v, T1x; + T1v = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T1w = BYTW(&(W[TWVL * 8]), T1v); + T1x = LD(&(x[WS(rs, 37)]), ms, &(x[WS(rs, 1)])); + T1y = BYTW(&(W[TWVL * 72]), T1x); + T1z = VSUB(T1w, T1y); + T5P = VADD(T1w, T1y); + } + { + V T1M, T1O, T1L, T1N; + T1L = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T1M = BYTW(&(W[TWVL * 24]), T1L); + T1N = LD(&(x[WS(rs, 45)]), ms, &(x[WS(rs, 1)])); + T1O = BYTW(&(W[TWVL * 88]), T1N); + T1P = VSUB(T1M, T1O); + T5T = VADD(T1M, T1O); + } + { + V T1B, T1D, T1A, T1C; + T1A = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + T1B = BYTW(&(W[TWVL * 40]), T1A); + T1C = LD(&(x[WS(rs, 53)]), ms, &(x[WS(rs, 1)])); + T1D = BYTW(&(W[TWVL * 104]), T1C); + T1E = VSUB(T1B, T1D); + T5Q = VADD(T1B, T1D); + } + { + V T1H, T1J, T1G, T1I; + T1G = LD(&(x[WS(rs, 61)]), ms, &(x[WS(rs, 1)])); + T1H = BYTW(&(W[TWVL * 120]), T1G); + T1I = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + T1J = BYTW(&(W[TWVL * 56]), T1I); + T1K = VSUB(T1H, T1J); + T5S = VADD(T1H, T1J); + } + { + V T1F, T1Q, T7h, T7i; + T1F = VFNMS(LDK(KP382683432), T1E, VMUL(LDK(KP923879532), T1z)); + T1Q = VFMA(LDK(KP923879532), T1K, VMUL(LDK(KP382683432), T1P)); + T1R = VSUB(T1F, T1Q); + T4b = VADD(T1F, T1Q); + T7h = VADD(T5P, T5Q); + T7i = VADD(T5S, T5T); + T7j = VSUB(T7h, T7i); + T7Z = VADD(T7h, T7i); + } + { + V T2h, T2i, T5R, T5U; + T2h = VFMA(LDK(KP382683432), T1z, VMUL(LDK(KP923879532), T1E)); + T2i = VFNMS(LDK(KP382683432), T1K, VMUL(LDK(KP923879532), T1P)); + T2j = VSUB(T2h, T2i); + T4e = VADD(T2h, T2i); + T5R = VSUB(T5P, T5Q); + T5U = VSUB(T5S, T5T); + T5V = VMUL(LDK(KP707106781), VSUB(T5R, T5U)); + T63 = VMUL(LDK(KP707106781), VADD(T5R, T5U)); + } + } + { + V T2q, T66, T2G, T6a, T2v, T67, T2B, T69; + { + V T2n, T2p, T2m, T2o; + T2m = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T2n = BYTW(&(W[TWVL * 4]), T2m); + T2o = LD(&(x[WS(rs, 35)]), ms, &(x[WS(rs, 1)])); + T2p = BYTW(&(W[TWVL * 68]), T2o); + T2q = VSUB(T2n, T2p); + T66 = VADD(T2n, T2p); + } + { + V T2D, T2F, T2C, T2E; + T2C = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T2D = BYTW(&(W[TWVL * 20]), T2C); + T2E = LD(&(x[WS(rs, 43)]), ms, &(x[WS(rs, 1)])); + T2F = BYTW(&(W[TWVL * 84]), T2E); + T2G = VSUB(T2D, T2F); + T6a = VADD(T2D, T2F); + } + { + V T2s, T2u, T2r, T2t; + T2r = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T2s = BYTW(&(W[TWVL * 36]), T2r); + T2t = LD(&(x[WS(rs, 51)]), ms, &(x[WS(rs, 1)])); + T2u = BYTW(&(W[TWVL * 100]), T2t); + T2v = VSUB(T2s, T2u); + T67 = VADD(T2s, T2u); + } + { + V T2y, T2A, T2x, T2z; + T2x = LD(&(x[WS(rs, 59)]), ms, &(x[WS(rs, 1)])); + T2y = BYTW(&(W[TWVL * 116]), T2x); + T2z = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + T2A = BYTW(&(W[TWVL * 52]), T2z); + T2B = VSUB(T2y, T2A); + T69 = VADD(T2y, T2A); + } + { + V T2w, T2H, T7o, T7p; + T2w = VFNMS(LDK(KP382683432), T2v, VMUL(LDK(KP923879532), T2q)); + T2H = VFMA(LDK(KP923879532), T2B, VMUL(LDK(KP382683432), T2G)); + T2I = VSUB(T2w, T2H); + T4i = VADD(T2w, T2H); + T7o = VADD(T66, T67); + T7p = VADD(T69, T6a); + T7q = VSUB(T7o, T7p); + T82 = VADD(T7o, T7p); + } + { + V T38, T39, T68, T6b; + T38 = VFMA(LDK(KP382683432), T2q, VMUL(LDK(KP923879532), T2v)); + T39 = VFNMS(LDK(KP382683432), T2B, VMUL(LDK(KP923879532), T2G)); + T3a = VSUB(T38, T39); + T4l = VADD(T38, T39); + T68 = VSUB(T66, T67); + T6b = VSUB(T69, T6a); + T6c = VMUL(LDK(KP707106781), VSUB(T68, T6b)); + T6k = VMUL(LDK(KP707106781), VADD(T68, T6b)); + } + } + { + V T7s, T7R, T7M, T7U, T7D, T7T, T7J, T7Q; + { + V T7k, T7r, T7K, T7L; + T7k = VFNMS(LDK(KP382683432), T7j, VMUL(LDK(KP923879532), T7g)); + T7r = VFMA(LDK(KP923879532), T7n, VMUL(LDK(KP382683432), T7q)); + T7s = VSUB(T7k, T7r); + T7R = VADD(T7k, T7r); + T7K = VFMA(LDK(KP382683432), T7g, VMUL(LDK(KP923879532), T7j)); + T7L = VFNMS(LDK(KP382683432), T7n, VMUL(LDK(KP923879532), T7q)); + T7M = VSUB(T7K, T7L); + T7U = VADD(T7K, T7L); + } + { + V T7z, T7C, T7H, T7I; + T7z = VMUL(LDK(KP707106781), VSUB(T7v, T7y)); + T7C = VSUB(T7A, T7B); + T7D = VSUB(T7z, T7C); + T7T = VADD(T7C, T7z); + T7H = VSUB(T7F, T7G); + T7I = VMUL(LDK(KP707106781), VADD(T7v, T7y)); + T7J = VSUB(T7H, T7I); + T7Q = VADD(T7H, T7I); + } + { + V T7E, T7N, T7W, T7X; + T7E = VBYI(VSUB(T7s, T7D)); + T7N = VSUB(T7J, T7M); + ST(&(x[WS(rs, 20)]), VADD(T7E, T7N), ms, &(x[0])); + ST(&(x[WS(rs, 44)]), VSUB(T7N, T7E), ms, &(x[0])); + T7W = VSUB(T7Q, T7R); + T7X = VBYI(VSUB(T7U, T7T)); + ST(&(x[WS(rs, 36)]), VSUB(T7W, T7X), ms, &(x[0])); + ST(&(x[WS(rs, 28)]), VADD(T7W, T7X), ms, &(x[0])); + } + { + V T7O, T7P, T7S, T7V; + T7O = VBYI(VADD(T7D, T7s)); + T7P = VADD(T7J, T7M); + ST(&(x[WS(rs, 12)]), VADD(T7O, T7P), ms, &(x[0])); + ST(&(x[WS(rs, 52)]), VSUB(T7P, T7O), ms, &(x[0])); + T7S = VADD(T7Q, T7R); + T7V = VBYI(VADD(T7T, T7U)); + ST(&(x[WS(rs, 60)]), VSUB(T7S, T7V), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(T7S, T7V), ms, &(x[0])); + } + } + { + V T84, T8c, T8l, T8n, T87, T8h, T8b, T8g, T8i, T8m; + { + V T80, T83, T8j, T8k; + T80 = VSUB(T7Y, T7Z); + T83 = VSUB(T81, T82); + T84 = VMUL(LDK(KP707106781), VSUB(T80, T83)); + T8c = VMUL(LDK(KP707106781), VADD(T80, T83)); + T8j = VADD(T7Y, T7Z); + T8k = VADD(T81, T82); + T8l = VBYI(VSUB(T8j, T8k)); + T8n = VADD(T8j, T8k); + } + { + V T85, T86, T89, T8a; + T85 = VADD(T7t, T7u); + T86 = VADD(T7w, T7x); + T87 = VSUB(T85, T86); + T8h = VADD(T85, T86); + T89 = VADD(T7F, T7G); + T8a = VADD(T7A, T7B); + T8b = VSUB(T89, T8a); + T8g = VADD(T89, T8a); + } + T8i = VSUB(T8g, T8h); + ST(&(x[WS(rs, 48)]), VSUB(T8i, T8l), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VADD(T8i, T8l), ms, &(x[0])); + T8m = VADD(T8g, T8h); + ST(&(x[WS(rs, 32)]), VSUB(T8m, T8n), ms, &(x[0])); + ST(&(x[0]), VADD(T8m, T8n), ms, &(x[0])); + { + V T88, T8d, T8e, T8f; + T88 = VBYI(VSUB(T84, T87)); + T8d = VSUB(T8b, T8c); + ST(&(x[WS(rs, 24)]), VADD(T88, T8d), ms, &(x[0])); + ST(&(x[WS(rs, 40)]), VSUB(T8d, T88), ms, &(x[0])); + T8e = VBYI(VADD(T87, T84)); + T8f = VADD(T8b, T8c); + ST(&(x[WS(rs, 8)]), VADD(T8e, T8f), ms, &(x[0])); + ST(&(x[WS(rs, 56)]), VSUB(T8f, T8e), ms, &(x[0])); + } + } + { + V T5O, T6H, T6x, T6F, T6n, T6I, T6A, T6E; + { + V T5y, T5N, T6r, T6w; + T5y = VSUB(T5q, T5x); + T5N = VSUB(T5F, T5M); + T5O = VSUB(T5y, T5N); + T6H = VADD(T5y, T5N); + T6r = VSUB(T6p, T6q); + T6w = VSUB(T6s, T6v); + T6x = VSUB(T6r, T6w); + T6F = VADD(T6w, T6r); + { + V T65, T6y, T6m, T6z; + { + V T5Z, T64, T6g, T6l; + T5Z = VSUB(T5V, T5Y); + T64 = VSUB(T62, T63); + T65 = VFMA(LDK(KP831469612), T5Z, VMUL(LDK(KP555570233), T64)); + T6y = VFNMS(LDK(KP555570233), T5Z, VMUL(LDK(KP831469612), T64)); + T6g = VSUB(T6c, T6f); + T6l = VSUB(T6j, T6k); + T6m = VFNMS(LDK(KP555570233), T6l, VMUL(LDK(KP831469612), T6g)); + T6z = VFMA(LDK(KP555570233), T6g, VMUL(LDK(KP831469612), T6l)); + } + T6n = VSUB(T65, T6m); + T6I = VADD(T6y, T6z); + T6A = VSUB(T6y, T6z); + T6E = VADD(T65, T6m); + } + } + { + V T6o, T6B, T6K, T6L; + T6o = VADD(T5O, T6n); + T6B = VBYI(VADD(T6x, T6A)); + ST(&(x[WS(rs, 54)]), VSUB(T6o, T6B), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VADD(T6o, T6B), ms, &(x[0])); + T6K = VBYI(VADD(T6F, T6E)); + T6L = VADD(T6H, T6I); + ST(&(x[WS(rs, 6)]), VADD(T6K, T6L), ms, &(x[0])); + ST(&(x[WS(rs, 58)]), VSUB(T6L, T6K), ms, &(x[0])); + } + { + V T6C, T6D, T6G, T6J; + T6C = VSUB(T5O, T6n); + T6D = VBYI(VSUB(T6A, T6x)); + ST(&(x[WS(rs, 42)]), VSUB(T6C, T6D), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VADD(T6C, T6D), ms, &(x[0])); + T6G = VBYI(VSUB(T6E, T6F)); + T6J = VSUB(T6H, T6I); + ST(&(x[WS(rs, 26)]), VADD(T6G, T6J), ms, &(x[0])); + ST(&(x[WS(rs, 38)]), VSUB(T6J, T6G), ms, &(x[0])); + } + } + { + V T6O, T79, T6Z, T77, T6V, T7a, T72, T76; + { + V T6M, T6N, T6X, T6Y; + T6M = VADD(T5q, T5x); + T6N = VADD(T6p, T6q); + T6O = VSUB(T6M, T6N); + T79 = VADD(T6M, T6N); + T6X = VADD(T5F, T5M); + T6Y = VADD(T6v, T6s); + T6Z = VSUB(T6X, T6Y); + T77 = VADD(T6Y, T6X); + { + V T6R, T70, T6U, T71; + { + V T6P, T6Q, T6S, T6T; + T6P = VADD(T5Y, T5V); + T6Q = VADD(T62, T63); + T6R = VFMA(LDK(KP980785280), T6P, VMUL(LDK(KP195090322), T6Q)); + T70 = VFNMS(LDK(KP195090322), T6P, VMUL(LDK(KP980785280), T6Q)); + T6S = VADD(T6f, T6c); + T6T = VADD(T6j, T6k); + T6U = VFNMS(LDK(KP195090322), T6T, VMUL(LDK(KP980785280), T6S)); + T71 = VFMA(LDK(KP195090322), T6S, VMUL(LDK(KP980785280), T6T)); + } + T6V = VSUB(T6R, T6U); + T7a = VADD(T70, T71); + T72 = VSUB(T70, T71); + T76 = VADD(T6R, T6U); + } + } + { + V T6W, T73, T7c, T7d; + T6W = VADD(T6O, T6V); + T73 = VBYI(VADD(T6Z, T72)); + ST(&(x[WS(rs, 50)]), VSUB(T6W, T73), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VADD(T6W, T73), ms, &(x[0])); + T7c = VBYI(VADD(T77, T76)); + T7d = VADD(T79, T7a); + ST(&(x[WS(rs, 2)]), VADD(T7c, T7d), ms, &(x[0])); + ST(&(x[WS(rs, 62)]), VSUB(T7d, T7c), ms, &(x[0])); + } + { + V T74, T75, T78, T7b; + T74 = VSUB(T6O, T6V); + T75 = VBYI(VSUB(T72, T6Z)); + ST(&(x[WS(rs, 46)]), VSUB(T74, T75), ms, &(x[0])); + ST(&(x[WS(rs, 18)]), VADD(T74, T75), ms, &(x[0])); + T78 = VBYI(VSUB(T76, T77)); + T7b = VSUB(T79, T7a); + ST(&(x[WS(rs, 30)]), VADD(T78, T7b), ms, &(x[0])); + ST(&(x[WS(rs, 34)]), VSUB(T7b, T78), ms, &(x[0])); + } + } + { + V T4z, T5g, T4R, T59, T4H, T5j, T4O, T55, T4o, T4S, T4K, T4P, T52, T5k, T5c; + V T5h; + { + V T4y, T57, T4v, T58, T4r, T4u; + T4y = VADD(T4w, T4x); + T57 = VSUB(T4B, T4C); + T4r = VFMA(LDK(KP980785280), T4p, VMUL(LDK(KP195090322), T4q)); + T4u = VFNMS(LDK(KP195090322), T4t, VMUL(LDK(KP980785280), T4s)); + T4v = VADD(T4r, T4u); + T58 = VSUB(T4r, T4u); + T4z = VSUB(T4v, T4y); + T5g = VADD(T57, T58); + T4R = VADD(T4y, T4v); + T59 = VSUB(T57, T58); + } + { + V T4D, T54, T4G, T53, T4E, T4F; + T4D = VADD(T4B, T4C); + T54 = VSUB(T4x, T4w); + T4E = VFNMS(LDK(KP195090322), T4p, VMUL(LDK(KP980785280), T4q)); + T4F = VFMA(LDK(KP195090322), T4s, VMUL(LDK(KP980785280), T4t)); + T4G = VADD(T4E, T4F); + T53 = VSUB(T4E, T4F); + T4H = VSUB(T4D, T4G); + T5j = VADD(T54, T53); + T4O = VADD(T4D, T4G); + T55 = VSUB(T53, T54); + } + { + V T4g, T4I, T4n, T4J; + { + V T4c, T4f, T4j, T4m; + T4c = VADD(T4a, T4b); + T4f = VADD(T4d, T4e); + T4g = VFNMS(LDK(KP098017140), T4f, VMUL(LDK(KP995184726), T4c)); + T4I = VFMA(LDK(KP098017140), T4c, VMUL(LDK(KP995184726), T4f)); + T4j = VADD(T4h, T4i); + T4m = VADD(T4k, T4l); + T4n = VFMA(LDK(KP995184726), T4j, VMUL(LDK(KP098017140), T4m)); + T4J = VFNMS(LDK(KP098017140), T4j, VMUL(LDK(KP995184726), T4m)); + } + T4o = VSUB(T4g, T4n); + T4S = VADD(T4I, T4J); + T4K = VSUB(T4I, T4J); + T4P = VADD(T4g, T4n); + } + { + V T4Y, T5a, T51, T5b; + { + V T4W, T4X, T4Z, T50; + T4W = VSUB(T4a, T4b); + T4X = VSUB(T4e, T4d); + T4Y = VFNMS(LDK(KP634393284), T4X, VMUL(LDK(KP773010453), T4W)); + T5a = VFMA(LDK(KP634393284), T4W, VMUL(LDK(KP773010453), T4X)); + T4Z = VSUB(T4h, T4i); + T50 = VSUB(T4l, T4k); + T51 = VFMA(LDK(KP773010453), T4Z, VMUL(LDK(KP634393284), T50)); + T5b = VFNMS(LDK(KP634393284), T4Z, VMUL(LDK(KP773010453), T50)); + } + T52 = VSUB(T4Y, T51); + T5k = VADD(T5a, T5b); + T5c = VSUB(T5a, T5b); + T5h = VADD(T4Y, T51); + } + { + V T4A, T4L, T5i, T5l; + T4A = VBYI(VSUB(T4o, T4z)); + T4L = VSUB(T4H, T4K); + ST(&(x[WS(rs, 17)]), VADD(T4A, T4L), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 47)]), VSUB(T4L, T4A), ms, &(x[WS(rs, 1)])); + T5i = VADD(T5g, T5h); + T5l = VBYI(VADD(T5j, T5k)); + ST(&(x[WS(rs, 57)]), VSUB(T5i, T5l), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(T5i, T5l), ms, &(x[WS(rs, 1)])); + } + { + V T5m, T5n, T4M, T4N; + T5m = VSUB(T5g, T5h); + T5n = VBYI(VSUB(T5k, T5j)); + ST(&(x[WS(rs, 39)]), VSUB(T5m, T5n), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 25)]), VADD(T5m, T5n), ms, &(x[WS(rs, 1)])); + T4M = VBYI(VADD(T4z, T4o)); + T4N = VADD(T4H, T4K); + ST(&(x[WS(rs, 15)]), VADD(T4M, T4N), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 49)]), VSUB(T4N, T4M), ms, &(x[WS(rs, 1)])); + } + { + V T4Q, T4T, T56, T5d; + T4Q = VADD(T4O, T4P); + T4T = VBYI(VADD(T4R, T4S)); + ST(&(x[WS(rs, 63)]), VSUB(T4Q, T4T), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T4Q, T4T), ms, &(x[WS(rs, 1)])); + T56 = VBYI(VSUB(T52, T55)); + T5d = VSUB(T59, T5c); + ST(&(x[WS(rs, 23)]), VADD(T56, T5d), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 41)]), VSUB(T5d, T56), ms, &(x[WS(rs, 1)])); + } + { + V T5e, T5f, T4U, T4V; + T5e = VBYI(VADD(T55, T52)); + T5f = VADD(T59, T5c); + ST(&(x[WS(rs, 9)]), VADD(T5e, T5f), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 55)]), VSUB(T5f, T5e), ms, &(x[WS(rs, 1)])); + T4U = VSUB(T4O, T4P); + T4V = VBYI(VSUB(T4S, T4R)); + ST(&(x[WS(rs, 33)]), VSUB(T4U, T4V), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 31)]), VADD(T4U, T4V), ms, &(x[WS(rs, 1)])); + } + } + { + V T1u, T43, T3D, T3V, T3t, T45, T3B, T3K, T3d, T3E, T3w, T3A, T3R, T46, T3Y; + V T42; + { + V TE, T3U, T1t, T3T, T13, T1s; + TE = VSUB(Tg, TD); + T3U = VADD(T3r, T3k); + T13 = VFMA(LDK(KP831469612), TV, VMUL(LDK(KP555570233), T12)); + T1s = VFNMS(LDK(KP555570233), T1r, VMUL(LDK(KP831469612), T1k)); + T1t = VSUB(T13, T1s); + T3T = VADD(T13, T1s); + T1u = VSUB(TE, T1t); + T43 = VADD(T3U, T3T); + T3D = VADD(TE, T1t); + T3V = VSUB(T3T, T3U); + } + { + V T3s, T3I, T3h, T3J, T3f, T3g; + T3s = VSUB(T3k, T3r); + T3I = VADD(Tg, TD); + T3f = VFNMS(LDK(KP555570233), TV, VMUL(LDK(KP831469612), T12)); + T3g = VFMA(LDK(KP555570233), T1k, VMUL(LDK(KP831469612), T1r)); + T3h = VSUB(T3f, T3g); + T3J = VADD(T3f, T3g); + T3t = VSUB(T3h, T3s); + T45 = VADD(T3I, T3J); + T3B = VADD(T3s, T3h); + T3K = VSUB(T3I, T3J); + } + { + V T2l, T3u, T3c, T3v; + { + V T29, T2k, T30, T3b; + T29 = VSUB(T1R, T28); + T2k = VSUB(T2g, T2j); + T2l = VFMA(LDK(KP881921264), T29, VMUL(LDK(KP471396736), T2k)); + T3u = VFNMS(LDK(KP471396736), T29, VMUL(LDK(KP881921264), T2k)); + T30 = VSUB(T2I, T2Z); + T3b = VSUB(T37, T3a); + T3c = VFNMS(LDK(KP471396736), T3b, VMUL(LDK(KP881921264), T30)); + T3v = VFMA(LDK(KP471396736), T30, VMUL(LDK(KP881921264), T3b)); + } + T3d = VSUB(T2l, T3c); + T3E = VADD(T3u, T3v); + T3w = VSUB(T3u, T3v); + T3A = VADD(T2l, T3c); + } + { + V T3N, T3W, T3Q, T3X; + { + V T3L, T3M, T3O, T3P; + T3L = VADD(T28, T1R); + T3M = VADD(T2g, T2j); + T3N = VFMA(LDK(KP956940335), T3L, VMUL(LDK(KP290284677), T3M)); + T3W = VFNMS(LDK(KP290284677), T3L, VMUL(LDK(KP956940335), T3M)); + T3O = VADD(T2Z, T2I); + T3P = VADD(T37, T3a); + T3Q = VFNMS(LDK(KP290284677), T3P, VMUL(LDK(KP956940335), T3O)); + T3X = VFMA(LDK(KP290284677), T3O, VMUL(LDK(KP956940335), T3P)); + } + T3R = VSUB(T3N, T3Q); + T46 = VADD(T3W, T3X); + T3Y = VSUB(T3W, T3X); + T42 = VADD(T3N, T3Q); + } + { + V T3e, T3x, T44, T47; + T3e = VADD(T1u, T3d); + T3x = VBYI(VADD(T3t, T3w)); + ST(&(x[WS(rs, 53)]), VSUB(T3e, T3x), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VADD(T3e, T3x), ms, &(x[WS(rs, 1)])); + T44 = VBYI(VSUB(T42, T43)); + T47 = VSUB(T45, T46); + ST(&(x[WS(rs, 29)]), VADD(T44, T47), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 35)]), VSUB(T47, T44), ms, &(x[WS(rs, 1)])); + } + { + V T48, T49, T3y, T3z; + T48 = VBYI(VADD(T43, T42)); + T49 = VADD(T45, T46); + ST(&(x[WS(rs, 3)]), VADD(T48, T49), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 61)]), VSUB(T49, T48), ms, &(x[WS(rs, 1)])); + T3y = VSUB(T1u, T3d); + T3z = VBYI(VSUB(T3w, T3t)); + ST(&(x[WS(rs, 43)]), VSUB(T3y, T3z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 21)]), VADD(T3y, T3z), ms, &(x[WS(rs, 1)])); + } + { + V T3C, T3F, T3S, T3Z; + T3C = VBYI(VSUB(T3A, T3B)); + T3F = VSUB(T3D, T3E); + ST(&(x[WS(rs, 27)]), VADD(T3C, T3F), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 37)]), VSUB(T3F, T3C), ms, &(x[WS(rs, 1)])); + T3S = VADD(T3K, T3R); + T3Z = VBYI(VADD(T3V, T3Y)); + ST(&(x[WS(rs, 51)]), VSUB(T3S, T3Z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VADD(T3S, T3Z), ms, &(x[WS(rs, 1)])); + } + { + V T40, T41, T3G, T3H; + T40 = VSUB(T3K, T3R); + T41 = VBYI(VSUB(T3Y, T3V)); + ST(&(x[WS(rs, 45)]), VSUB(T40, T41), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VADD(T40, T41), ms, &(x[WS(rs, 1)])); + T3G = VBYI(VADD(T3B, T3A)); + T3H = VADD(T3D, T3E); + ST(&(x[WS(rs, 5)]), VADD(T3G, T3H), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 59)]), VSUB(T3H, T3G), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + VTW(0, 32), + VTW(0, 33), + VTW(0, 34), + VTW(0, 35), + VTW(0, 36), + VTW(0, 37), + VTW(0, 38), + VTW(0, 39), + VTW(0, 40), + VTW(0, 41), + VTW(0, 42), + VTW(0, 43), + VTW(0, 44), + VTW(0, 45), + VTW(0, 46), + VTW(0, 47), + VTW(0, 48), + VTW(0, 49), + VTW(0, 50), + VTW(0, 51), + VTW(0, 52), + VTW(0, 53), + VTW(0, 54), + VTW(0, 55), + VTW(0, 56), + VTW(0, 57), + VTW(0, 58), + VTW(0, 59), + VTW(0, 60), + VTW(0, 61), + VTW(0, 62), + VTW(0, 63), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 64, XSIMD_STRING("t2bv_64"), twinstr, &GENUS, {467, 198, 52, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_64) (planner *p) { + X(kdft_dit_register) (p, t2bv_64, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:39 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t2bv_8 -include t2b.h -sign 1 */ + +/* + * This function contains 33 FP additions, 24 FP multiplications, + * (or, 23 additions, 14 multiplications, 10 fused multiply/add), + * 36 stack variables, 1 constants, and 16 memory accesses + */ +#include "t2b.h" + +static void t2bv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 14)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(8, rs)) { + V T1, T2, Th, Tj, T5, T7, Ta, Tc; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Th = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tj = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Ta = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + { + V T3, Ti, Tk, T6, T8, Tb, Td; + T3 = BYTW(&(W[TWVL * 6]), T2); + Ti = BYTW(&(W[TWVL * 2]), Th); + Tk = BYTW(&(W[TWVL * 10]), Tj); + T6 = BYTW(&(W[0]), T5); + T8 = BYTW(&(W[TWVL * 8]), T7); + Tb = BYTW(&(W[TWVL * 12]), Ta); + Td = BYTW(&(W[TWVL * 4]), Tc); + { + V Tq, T4, Tr, Tl, Tt, T9, Tu, Te, Tw, Ts; + Tq = VADD(T1, T3); + T4 = VSUB(T1, T3); + Tr = VADD(Ti, Tk); + Tl = VSUB(Ti, Tk); + Tt = VADD(T6, T8); + T9 = VSUB(T6, T8); + Tu = VADD(Tb, Td); + Te = VSUB(Tb, Td); + Tw = VADD(Tq, Tr); + Ts = VSUB(Tq, Tr); + { + V Tx, Tv, Tm, Tf; + Tx = VADD(Tt, Tu); + Tv = VSUB(Tt, Tu); + Tm = VSUB(T9, Te); + Tf = VADD(T9, Te); + { + V Tp, Tn, To, Tg; + ST(&(x[0]), VADD(Tw, Tx), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VSUB(Tw, Tx), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(Tv, Ts), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(Tv, Ts), ms, &(x[0])); + Tp = VFMA(LDK(KP707106781), Tm, Tl); + Tn = VFNMS(LDK(KP707106781), Tm, Tl); + To = VFMA(LDK(KP707106781), Tf, T4); + Tg = VFNMS(LDK(KP707106781), Tf, T4); + ST(&(x[WS(rs, 1)]), VFMAI(Tp, To), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(Tp, To), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFMAI(Tn, Tg), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(Tn, Tg), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t2bv_8"), twinstr, &GENUS, {23, 14, 10, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_8) (planner *p) { + X(kdft_dit_register) (p, t2bv_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t2bv_8 -include t2b.h -sign 1 */ + +/* + * This function contains 33 FP additions, 16 FP multiplications, + * (or, 33 additions, 16 multiplications, 0 fused multiply/add), + * 24 stack variables, 1 constants, and 16 memory accesses + */ +#include "t2b.h" + +static void t2bv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 14)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(8, rs)) { + V Tl, Tq, Tg, Tr, T5, Tt, Ta, Tu, Ti, Tk, Tj; + Ti = LD(&(x[0]), ms, &(x[0])); + Tj = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tk = BYTW(&(W[TWVL * 6]), Tj); + Tl = VSUB(Ti, Tk); + Tq = VADD(Ti, Tk); + { + V Td, Tf, Tc, Te; + Tc = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Td = BYTW(&(W[TWVL * 2]), Tc); + Te = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tf = BYTW(&(W[TWVL * 10]), Te); + Tg = VSUB(Td, Tf); + Tr = VADD(Td, Tf); + } + { + V T2, T4, T1, T3; + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = BYTW(&(W[0]), T1); + T3 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T4 = BYTW(&(W[TWVL * 8]), T3); + T5 = VSUB(T2, T4); + Tt = VADD(T2, T4); + } + { + V T7, T9, T6, T8; + T6 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T7 = BYTW(&(W[TWVL * 12]), T6); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = BYTW(&(W[TWVL * 4]), T8); + Ta = VSUB(T7, T9); + Tu = VADD(T7, T9); + } + { + V Ts, Tv, Tw, Tx; + Ts = VSUB(Tq, Tr); + Tv = VBYI(VSUB(Tt, Tu)); + ST(&(x[WS(rs, 6)]), VSUB(Ts, Tv), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(Ts, Tv), ms, &(x[0])); + Tw = VADD(Tq, Tr); + Tx = VADD(Tt, Tu); + ST(&(x[WS(rs, 4)]), VSUB(Tw, Tx), ms, &(x[0])); + ST(&(x[0]), VADD(Tw, Tx), ms, &(x[0])); + { + V Th, To, Tn, Tp, Tb, Tm; + Tb = VMUL(LDK(KP707106781), VSUB(T5, Ta)); + Th = VBYI(VSUB(Tb, Tg)); + To = VBYI(VADD(Tg, Tb)); + Tm = VMUL(LDK(KP707106781), VADD(T5, Ta)); + Tn = VSUB(Tl, Tm); + Tp = VADD(Tl, Tm); + ST(&(x[WS(rs, 3)]), VADD(Th, Tn), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VSUB(Tp, To), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VSUB(Tn, Th), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(To, Tp), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t2bv_8"), twinstr, &GENUS, {33, 16, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2bv_8) (planner *p) { + X(kdft_dit_register) (p, t2bv_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:24 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name t2fv_10 -include t2f.h */ + +/* + * This function contains 51 FP additions, 40 FP multiplications, + * (or, 33 additions, 22 multiplications, 18 fused multiply/add), + * 43 stack variables, 4 constants, and 20 memory accesses + */ +#include "t2f.h" + +static void t2fv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 18)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(10, rs)) { + V Td, TA, T4, Ta, Tk, TE, Tp, TF, TB, T9, T1, T2, Tb; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V Tg, Tn, Ti, Tl; + Tg = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tn = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Ti = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tl = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + { + V T6, T8, T5, Tc; + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + { + V T3, Th, To, Tj, Tm, T7; + T7 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[TWVL * 8]), T2); + Th = BYTWJ(&(W[TWVL * 6]), Tg); + To = BYTWJ(&(W[0]), Tn); + Tj = BYTWJ(&(W[TWVL * 16]), Ti); + Tm = BYTWJ(&(W[TWVL * 10]), Tl); + T6 = BYTWJ(&(W[TWVL * 2]), T5); + Td = BYTWJ(&(W[TWVL * 4]), Tc); + T8 = BYTWJ(&(W[TWVL * 12]), T7); + TA = VADD(T1, T3); + T4 = VSUB(T1, T3); + Ta = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Tk = VSUB(Th, Tj); + TE = VADD(Th, Tj); + Tp = VSUB(Tm, To); + TF = VADD(Tm, To); + } + TB = VADD(T6, T8); + T9 = VSUB(T6, T8); + } + } + Tb = BYTWJ(&(W[TWVL * 14]), Ta); + { + V TL, TG, Tw, Tq, TC, Te; + TL = VSUB(TE, TF); + TG = VADD(TE, TF); + Tw = VSUB(Tk, Tp); + Tq = VADD(Tk, Tp); + TC = VADD(Tb, Td); + Te = VSUB(Tb, Td); + { + V TM, TD, Tv, Tf; + TM = VSUB(TB, TC); + TD = VADD(TB, TC); + Tv = VSUB(T9, Te); + Tf = VADD(T9, Te); + { + V TP, TN, TH, TJ, Tz, Tx, Tr, Tt, TI, Ts; + TP = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TL, TM)); + TN = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TM, TL)); + TH = VADD(TD, TG); + TJ = VSUB(TD, TG); + Tz = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tv, Tw)); + Tx = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tw, Tv)); + Tr = VADD(Tf, Tq); + Tt = VSUB(Tf, Tq); + ST(&(x[0]), VADD(TA, TH), ms, &(x[0])); + TI = VFNMS(LDK(KP250000000), TH, TA); + ST(&(x[WS(rs, 5)]), VADD(T4, Tr), ms, &(x[WS(rs, 1)])); + Ts = VFNMS(LDK(KP250000000), Tr, T4); + { + V TK, TO, Tu, Ty; + TK = VFNMS(LDK(KP559016994), TJ, TI); + TO = VFMA(LDK(KP559016994), TJ, TI); + Tu = VFMA(LDK(KP559016994), Tt, Ts); + Ty = VFNMS(LDK(KP559016994), Tt, Ts); + ST(&(x[WS(rs, 8)]), VFNMSI(TN, TK), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(TN, TK), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(TP, TO), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(TP, TO), ms, &(x[0])); + ST(&(x[WS(rs, 9)]), VFMAI(Tx, Tu), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(Tx, Tu), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(Tz, Ty), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(Tz, Ty), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 10, XSIMD_STRING("t2fv_10"), twinstr, &GENUS, {33, 22, 18, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_10) (planner *p) { + X(kdft_dit_register) (p, t2fv_10, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name t2fv_10 -include t2f.h */ + +/* + * This function contains 51 FP additions, 30 FP multiplications, + * (or, 45 additions, 24 multiplications, 6 fused multiply/add), + * 32 stack variables, 4 constants, and 20 memory accesses + */ +#include "t2f.h" + +static void t2fv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 18)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(10, rs)) { + V Tr, TH, Tg, Tl, Tm, TA, TB, TJ, T5, Ta, Tb, TD, TE, TI, To; + V Tq, Tp; + To = LD(&(x[0]), ms, &(x[0])); + Tp = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tq = BYTWJ(&(W[TWVL * 8]), Tp); + Tr = VSUB(To, Tq); + TH = VADD(To, Tq); + { + V Td, Tk, Tf, Ti; + { + V Tc, Tj, Te, Th; + Tc = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Td = BYTWJ(&(W[TWVL * 6]), Tc); + Tj = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Tk = BYTWJ(&(W[0]), Tj); + Te = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tf = BYTWJ(&(W[TWVL * 16]), Te); + Th = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Ti = BYTWJ(&(W[TWVL * 10]), Th); + } + Tg = VSUB(Td, Tf); + Tl = VSUB(Ti, Tk); + Tm = VADD(Tg, Tl); + TA = VADD(Td, Tf); + TB = VADD(Ti, Tk); + TJ = VADD(TA, TB); + } + { + V T2, T9, T4, T7; + { + V T1, T8, T3, T6; + T1 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T2 = BYTWJ(&(W[TWVL * 2]), T1); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = BYTWJ(&(W[TWVL * 4]), T8); + T3 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T4 = BYTWJ(&(W[TWVL * 12]), T3); + T6 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T7 = BYTWJ(&(W[TWVL * 14]), T6); + } + T5 = VSUB(T2, T4); + Ta = VSUB(T7, T9); + Tb = VADD(T5, Ta); + TD = VADD(T2, T4); + TE = VADD(T7, T9); + TI = VADD(TD, TE); + } + { + V Tn, Ts, Tt, Tx, Tz, Tv, Tw, Ty, Tu; + Tn = VMUL(LDK(KP559016994), VSUB(Tb, Tm)); + Ts = VADD(Tb, Tm); + Tt = VFNMS(LDK(KP250000000), Ts, Tr); + Tv = VSUB(T5, Ta); + Tw = VSUB(Tg, Tl); + Tx = VBYI(VFMA(LDK(KP951056516), Tv, VMUL(LDK(KP587785252), Tw))); + Tz = VBYI(VFNMS(LDK(KP587785252), Tv, VMUL(LDK(KP951056516), Tw))); + ST(&(x[WS(rs, 5)]), VADD(Tr, Ts), ms, &(x[WS(rs, 1)])); + Ty = VSUB(Tt, Tn); + ST(&(x[WS(rs, 3)]), VSUB(Ty, Tz), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(Tz, Ty), ms, &(x[WS(rs, 1)])); + Tu = VADD(Tn, Tt); + ST(&(x[WS(rs, 1)]), VSUB(Tu, Tx), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VADD(Tx, Tu), ms, &(x[WS(rs, 1)])); + } + { + V TM, TK, TL, TG, TO, TC, TF, TP, TN; + TM = VMUL(LDK(KP559016994), VSUB(TI, TJ)); + TK = VADD(TI, TJ); + TL = VFNMS(LDK(KP250000000), TK, TH); + TC = VSUB(TA, TB); + TF = VSUB(TD, TE); + TG = VBYI(VFNMS(LDK(KP587785252), TF, VMUL(LDK(KP951056516), TC))); + TO = VBYI(VFMA(LDK(KP951056516), TF, VMUL(LDK(KP587785252), TC))); + ST(&(x[0]), VADD(TH, TK), ms, &(x[0])); + TP = VADD(TM, TL); + ST(&(x[WS(rs, 4)]), VADD(TO, TP), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VSUB(TP, TO), ms, &(x[0])); + TN = VSUB(TL, TM); + ST(&(x[WS(rs, 2)]), VADD(TG, TN), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VSUB(TN, TG), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 10, XSIMD_STRING("t2fv_10"), twinstr, &GENUS, {45, 24, 6, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_10) (planner *p) { + X(kdft_dit_register) (p, t2fv_10, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,418 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:18 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name t2fv_16 -include t2f.h */ + +/* + * This function contains 87 FP additions, 64 FP multiplications, + * (or, 53 additions, 30 multiplications, 34 fused multiply/add), + * 61 stack variables, 3 constants, and 32 memory accesses + */ +#include "t2f.h" + +static void t2fv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 30)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 30), MAKE_VOLATILE_STRIDE(16, rs)) { + V TO, Ta, TJ, TP, T14, Tq, T1i, T10, T1b, T1l, T13, T1c, TR, Tl, T15; + V Tv; + { + V Tc, TW, T4, T19, T9, TD, TI, Tj, TZ, T1a, Te, Th, Tn, Tr, Tu; + V Tp; + { + V T1, T2, T5, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + { + V Tz, TG, TB, TE; + Tz = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TG = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TB = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + TE = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V Ti, TY, TX, Td, Tg, Tm, Tt, To; + { + V T3, T6, T8, TA, TH, TC, TF, Tb; + Tb = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[TWVL * 14]), T2); + T6 = BYTWJ(&(W[TWVL * 6]), T5); + T8 = BYTWJ(&(W[TWVL * 22]), T7); + TA = BYTWJ(&(W[TWVL * 26]), Tz); + TH = BYTWJ(&(W[TWVL * 18]), TG); + TC = BYTWJ(&(W[TWVL * 10]), TB); + TF = BYTWJ(&(W[TWVL * 2]), TE); + Tc = BYTWJ(&(W[0]), Tb); + TW = VSUB(T1, T3); + T4 = VADD(T1, T3); + T19 = VSUB(T6, T8); + T9 = VADD(T6, T8); + Ti = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TD = VADD(TA, TC); + TY = VSUB(TA, TC); + TI = VADD(TF, TH); + TX = VSUB(TF, TH); + } + Td = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tg = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tm = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + Tj = BYTWJ(&(W[TWVL * 24]), Ti); + Tt = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + To = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + TZ = VADD(TX, TY); + T1a = VSUB(TY, TX); + Te = BYTWJ(&(W[TWVL * 16]), Td); + Th = BYTWJ(&(W[TWVL * 8]), Tg); + Tn = BYTWJ(&(W[TWVL * 28]), Tm); + Tr = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Tu = BYTWJ(&(W[TWVL * 20]), Tt); + Tp = BYTWJ(&(W[TWVL * 12]), To); + } + } + } + { + V Tf, T11, Tk, T12, Ts; + TO = VADD(T4, T9); + Ta = VSUB(T4, T9); + TJ = VSUB(TD, TI); + TP = VADD(TI, TD); + Tf = VADD(Tc, Te); + T11 = VSUB(Tc, Te); + Tk = VADD(Th, Tj); + T12 = VSUB(Th, Tj); + Ts = BYTWJ(&(W[TWVL * 4]), Tr); + T14 = VSUB(Tn, Tp); + Tq = VADD(Tn, Tp); + T1i = VFNMS(LDK(KP707106781), TZ, TW); + T10 = VFMA(LDK(KP707106781), TZ, TW); + T1b = VFNMS(LDK(KP707106781), T1a, T19); + T1l = VFMA(LDK(KP707106781), T1a, T19); + T13 = VFNMS(LDK(KP414213562), T12, T11); + T1c = VFMA(LDK(KP414213562), T11, T12); + TR = VADD(Tf, Tk); + Tl = VSUB(Tf, Tk); + T15 = VSUB(Tu, Ts); + Tv = VADD(Ts, Tu); + } + } + { + V T1d, T16, TS, Tw, TU, TQ; + T1d = VFMA(LDK(KP414213562), T14, T15); + T16 = VFNMS(LDK(KP414213562), T15, T14); + TS = VADD(Tq, Tv); + Tw = VSUB(Tq, Tv); + TU = VSUB(TO, TP); + TQ = VADD(TO, TP); + { + V T1e, T1j, T17, T1m; + T1e = VSUB(T1c, T1d); + T1j = VADD(T1c, T1d); + T17 = VADD(T13, T16); + T1m = VSUB(T16, T13); + { + V TV, TT, TK, Tx; + TV = VSUB(TS, TR); + TT = VADD(TR, TS); + TK = VSUB(Tw, Tl); + Tx = VADD(Tl, Tw); + { + V T1h, T1f, T1o, T1k; + T1h = VFMA(LDK(KP923879532), T1e, T1b); + T1f = VFNMS(LDK(KP923879532), T1e, T1b); + T1o = VFMA(LDK(KP923879532), T1j, T1i); + T1k = VFNMS(LDK(KP923879532), T1j, T1i); + { + V T1g, T18, T1p, T1n; + T1g = VFMA(LDK(KP923879532), T17, T10); + T18 = VFNMS(LDK(KP923879532), T17, T10); + T1p = VFMA(LDK(KP923879532), T1m, T1l); + T1n = VFNMS(LDK(KP923879532), T1m, T1l); + ST(&(x[WS(rs, 12)]), VFNMSI(TV, TU), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(TV, TU), ms, &(x[0])); + ST(&(x[0]), VADD(TQ, TT), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VSUB(TQ, TT), ms, &(x[0])); + { + V TN, TL, TM, Ty; + TN = VFMA(LDK(KP707106781), TK, TJ); + TL = VFNMS(LDK(KP707106781), TK, TJ); + TM = VFMA(LDK(KP707106781), Tx, Ta); + Ty = VFNMS(LDK(KP707106781), Tx, Ta); + ST(&(x[WS(rs, 1)]), VFNMSI(T1h, T1g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFMAI(T1h, T1g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(T1f, T18), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFNMSI(T1f, T18), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFMAI(T1p, T1o), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFNMSI(T1p, T1o), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFMAI(T1n, T1k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFNMSI(T1n, T1k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 14)]), VFNMSI(TN, TM), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(TN, TM), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFMAI(TL, Ty), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(TL, Ty), ms, &(x[0])); + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 16, XSIMD_STRING("t2fv_16"), twinstr, &GENUS, {53, 30, 34, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_16) (planner *p) { + X(kdft_dit_register) (p, t2fv_16, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name t2fv_16 -include t2f.h */ + +/* + * This function contains 87 FP additions, 42 FP multiplications, + * (or, 83 additions, 38 multiplications, 4 fused multiply/add), + * 36 stack variables, 3 constants, and 32 memory accesses + */ +#include "t2f.h" + +static void t2fv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 30)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 30), MAKE_VOLATILE_STRIDE(16, rs)) { + V TJ, T10, TD, T11, T1b, T1c, Ty, TK, T16, T17, T18, Tb, TN, T13, T14; + V T15, Tm, TM, TG, TI, TH; + TG = LD(&(x[0]), ms, &(x[0])); + TH = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + TI = BYTWJ(&(W[TWVL * 14]), TH); + TJ = VSUB(TG, TI); + T10 = VADD(TG, TI); + { + V TA, TC, Tz, TB; + Tz = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + TA = BYTWJ(&(W[TWVL * 6]), Tz); + TB = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + TC = BYTWJ(&(W[TWVL * 22]), TB); + TD = VSUB(TA, TC); + T11 = VADD(TA, TC); + } + { + V Tp, Tw, Tr, Tu, Ts, Tx; + { + V To, Tv, Tq, Tt; + To = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + Tp = BYTWJ(&(W[TWVL * 26]), To); + Tv = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + Tw = BYTWJ(&(W[TWVL * 18]), Tv); + Tq = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tr = BYTWJ(&(W[TWVL * 10]), Tq); + Tt = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tu = BYTWJ(&(W[TWVL * 2]), Tt); + } + T1b = VADD(Tp, Tr); + T1c = VADD(Tu, Tw); + Ts = VSUB(Tp, Tr); + Tx = VSUB(Tu, Tw); + Ty = VMUL(LDK(KP707106781), VSUB(Ts, Tx)); + TK = VMUL(LDK(KP707106781), VADD(Tx, Ts)); + } + { + V T2, T9, T4, T7, T5, Ta; + { + V T1, T8, T3, T6; + T1 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T2 = BYTWJ(&(W[TWVL * 28]), T1); + T8 = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T9 = BYTWJ(&(W[TWVL * 20]), T8); + T3 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T4 = BYTWJ(&(W[TWVL * 12]), T3); + T6 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T7 = BYTWJ(&(W[TWVL * 4]), T6); + } + T16 = VADD(T2, T4); + T17 = VADD(T7, T9); + T18 = VSUB(T16, T17); + T5 = VSUB(T2, T4); + Ta = VSUB(T7, T9); + Tb = VFNMS(LDK(KP923879532), Ta, VMUL(LDK(KP382683432), T5)); + TN = VFMA(LDK(KP923879532), T5, VMUL(LDK(KP382683432), Ta)); + } + { + V Td, Tk, Tf, Ti, Tg, Tl; + { + V Tc, Tj, Te, Th; + Tc = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Td = BYTWJ(&(W[0]), Tc); + Tj = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + Tk = BYTWJ(&(W[TWVL * 24]), Tj); + Te = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tf = BYTWJ(&(W[TWVL * 16]), Te); + Th = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Ti = BYTWJ(&(W[TWVL * 8]), Th); + } + T13 = VADD(Td, Tf); + T14 = VADD(Ti, Tk); + T15 = VSUB(T13, T14); + Tg = VSUB(Td, Tf); + Tl = VSUB(Ti, Tk); + Tm = VFMA(LDK(KP382683432), Tg, VMUL(LDK(KP923879532), Tl)); + TM = VFNMS(LDK(KP382683432), Tl, VMUL(LDK(KP923879532), Tg)); + } + { + V T1a, T1g, T1f, T1h; + { + V T12, T19, T1d, T1e; + T12 = VSUB(T10, T11); + T19 = VMUL(LDK(KP707106781), VADD(T15, T18)); + T1a = VADD(T12, T19); + T1g = VSUB(T12, T19); + T1d = VSUB(T1b, T1c); + T1e = VMUL(LDK(KP707106781), VSUB(T18, T15)); + T1f = VBYI(VADD(T1d, T1e)); + T1h = VBYI(VSUB(T1e, T1d)); + } + ST(&(x[WS(rs, 14)]), VSUB(T1a, T1f), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VADD(T1g, T1h), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T1a, T1f), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VSUB(T1g, T1h), ms, &(x[0])); + } + { + V T1k, T1o, T1n, T1p; + { + V T1i, T1j, T1l, T1m; + T1i = VADD(T10, T11); + T1j = VADD(T1c, T1b); + T1k = VADD(T1i, T1j); + T1o = VSUB(T1i, T1j); + T1l = VADD(T13, T14); + T1m = VADD(T16, T17); + T1n = VADD(T1l, T1m); + T1p = VBYI(VSUB(T1m, T1l)); + } + ST(&(x[WS(rs, 8)]), VSUB(T1k, T1n), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(T1o, T1p), ms, &(x[0])); + ST(&(x[0]), VADD(T1k, T1n), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VSUB(T1o, T1p), ms, &(x[0])); + } + { + V TF, TQ, TP, TR; + { + V Tn, TE, TL, TO; + Tn = VSUB(Tb, Tm); + TE = VSUB(Ty, TD); + TF = VBYI(VSUB(Tn, TE)); + TQ = VBYI(VADD(TE, Tn)); + TL = VADD(TJ, TK); + TO = VADD(TM, TN); + TP = VSUB(TL, TO); + TR = VADD(TL, TO); + } + ST(&(x[WS(rs, 7)]), VADD(TF, TP), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VSUB(TR, TQ), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VSUB(TP, TF), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(TQ, TR), ms, &(x[WS(rs, 1)])); + } + { + V TU, TY, TX, TZ; + { + V TS, TT, TV, TW; + TS = VSUB(TJ, TK); + TT = VADD(Tm, Tb); + TU = VADD(TS, TT); + TY = VSUB(TS, TT); + TV = VADD(TD, Ty); + TW = VSUB(TN, TM); + TX = VBYI(VADD(TV, TW)); + TZ = VBYI(VSUB(TW, TV)); + } + ST(&(x[WS(rs, 13)]), VSUB(TU, TX), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VADD(TY, TZ), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(TU, TX), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VSUB(TY, TZ), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 16, XSIMD_STRING("t2fv_16"), twinstr, &GENUS, {83, 38, 4, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_16) (planner *p) { + X(kdft_dit_register) (p, t2fv_16, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:18 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t2fv_2 -include t2f.h */ + +/* + * This function contains 3 FP additions, 2 FP multiplications, + * (or, 3 additions, 2 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 4 memory accesses + */ +#include "t2f.h" + +static void t2fv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(2, rs)) { + V T1, T2, T3; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[0]), T2); + ST(&(x[0]), VADD(T1, T3), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VSUB(T1, T3), ms, &(x[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("t2fv_2"), twinstr, &GENUS, {3, 2, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_2) (planner *p) { + X(kdft_dit_register) (p, t2fv_2, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t2fv_2 -include t2f.h */ + +/* + * This function contains 3 FP additions, 2 FP multiplications, + * (or, 3 additions, 2 multiplications, 0 fused multiply/add), + * 5 stack variables, 0 constants, and 4 memory accesses + */ +#include "t2f.h" + +static void t2fv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 2)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(2, rs)) { + V T1, T3, T2; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[0]), T2); + ST(&(x[WS(rs, 1)]), VSUB(T1, T3), ms, &(x[WS(rs, 1)])); + ST(&(x[0]), VADD(T1, T3), ms, &(x[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 2, XSIMD_STRING("t2fv_2"), twinstr, &GENUS, {3, 2, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_2) (planner *p) { + X(kdft_dit_register) (p, t2fv_2, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,519 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:24 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 20 -name t2fv_20 -include t2f.h */ + +/* + * This function contains 123 FP additions, 88 FP multiplications, + * (or, 77 additions, 42 multiplications, 46 fused multiply/add), + * 68 stack variables, 4 constants, and 40 memory accesses + */ +#include "t2f.h" + +static void t2fv_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 38)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 38), MAKE_VOLATILE_STRIDE(20, rs)) { + V T4, Tx, T1m, T1K, T1y, Tk, Tf, T16, T10, TT, T1O, T1w, T1L, T1p, T1M; + V T1s, TZ, TI, T1x, Tp; + { + V T1, Tv, T2, Tt; + T1 = LD(&(x[0]), ms, &(x[0])); + Tv = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + Tt = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V T9, T1n, TN, T1v, TS, Te, T1q, T1u, TE, TG, Tm, T1o, TC, Tn, T1r; + V TH, To; + { + V TP, TR, Ta, Tc; + { + V T5, T7, TJ, TL, T1k, T1l; + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TJ = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TL = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + { + V Tw, T3, Tu, T6, T8, TK, TM, TO, TQ; + TO = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + Tw = BYTWJ(&(W[TWVL * 28]), Tv); + T3 = BYTWJ(&(W[TWVL * 18]), T2); + Tu = BYTWJ(&(W[TWVL * 8]), Tt); + T6 = BYTWJ(&(W[TWVL * 6]), T5); + T8 = BYTWJ(&(W[TWVL * 26]), T7); + TK = BYTWJ(&(W[TWVL * 24]), TJ); + TM = BYTWJ(&(W[TWVL * 4]), TL); + TP = BYTWJ(&(W[TWVL * 32]), TO); + TQ = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T4 = VSUB(T1, T3); + T1k = VADD(T1, T3); + Tx = VSUB(Tu, Tw); + T1l = VADD(Tu, Tw); + T9 = VSUB(T6, T8); + T1n = VADD(T6, T8); + TN = VSUB(TK, TM); + T1v = VADD(TK, TM); + TR = BYTWJ(&(W[TWVL * 12]), TQ); + } + Ta = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T1m = VSUB(T1k, T1l); + T1K = VADD(T1k, T1l); + Tc = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + } + { + V Tb, TA, Td, Th, Tj, Tz, Tg, Ti, Ty; + Tg = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Ti = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Ty = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + TS = VSUB(TP, TR); + T1y = VADD(TP, TR); + Tb = BYTWJ(&(W[TWVL * 30]), Ta); + TA = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + Td = BYTWJ(&(W[TWVL * 10]), Tc); + Th = BYTWJ(&(W[TWVL * 14]), Tg); + Tj = BYTWJ(&(W[TWVL * 34]), Ti); + Tz = BYTWJ(&(W[TWVL * 16]), Ty); + { + V TD, TF, TB, Tl; + TD = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TF = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tl = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + TB = BYTWJ(&(W[TWVL * 36]), TA); + Te = VSUB(Tb, Td); + T1q = VADD(Tb, Td); + Tk = VSUB(Th, Tj); + T1u = VADD(Th, Tj); + TE = BYTWJ(&(W[0]), TD); + TG = BYTWJ(&(W[TWVL * 20]), TF); + Tm = BYTWJ(&(W[TWVL * 22]), Tl); + T1o = VADD(Tz, TB); + TC = VSUB(Tz, TB); + Tn = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + } + } + } + Tf = VADD(T9, Te); + T16 = VSUB(T9, Te); + T10 = VSUB(TS, TN); + TT = VADD(TN, TS); + T1r = VADD(TE, TG); + TH = VSUB(TE, TG); + T1O = VADD(T1u, T1v); + T1w = VSUB(T1u, T1v); + To = BYTWJ(&(W[TWVL * 2]), Tn); + T1L = VADD(T1n, T1o); + T1p = VSUB(T1n, T1o); + T1M = VADD(T1q, T1r); + T1s = VSUB(T1q, T1r); + TZ = VSUB(TH, TC); + TI = VADD(TC, TH); + T1x = VADD(Tm, To); + Tp = VSUB(Tm, To); + } + } + { + V T1V, T1N, T14, T1d, T11, T1G, T1t, T1z, T1P, Tq, T17, T13, TV, TU; + T1V = VSUB(T1L, T1M); + T1N = VADD(T1L, T1M); + T14 = VSUB(TT, TI); + TU = VADD(TI, TT); + T1d = VFNMS(LDK(KP618033988), TZ, T10); + T11 = VFMA(LDK(KP618033988), T10, TZ); + T1G = VSUB(T1p, T1s); + T1t = VADD(T1p, T1s); + T1z = VSUB(T1x, T1y); + T1P = VADD(T1x, T1y); + Tq = VADD(Tk, Tp); + T17 = VSUB(Tk, Tp); + T13 = VFNMS(LDK(KP250000000), TU, Tx); + TV = VADD(Tx, TU); + { + V T1J, T1H, T1D, T1Z, T1X, T1T, T1h, T1j, T1b, T19, T1C, T1S, T1c, TY, T1F; + V T1A; + T1F = VSUB(T1w, T1z); + T1A = VADD(T1w, T1z); + { + V T1W, T1Q, TX, Tr; + T1W = VSUB(T1O, T1P); + T1Q = VADD(T1O, T1P); + TX = VSUB(Tf, Tq); + Tr = VADD(Tf, Tq); + { + V T1g, T18, T1f, T15; + T1g = VFNMS(LDK(KP618033988), T16, T17); + T18 = VFMA(LDK(KP618033988), T17, T16); + T1f = VFMA(LDK(KP559016994), T14, T13); + T15 = VFNMS(LDK(KP559016994), T14, T13); + T1J = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1F, T1G)); + T1H = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1G, T1F)); + { + V T1B, T1R, TW, Ts; + T1B = VADD(T1t, T1A); + T1D = VSUB(T1t, T1A); + T1Z = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1V, T1W)); + T1X = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1W, T1V)); + T1R = VADD(T1N, T1Q); + T1T = VSUB(T1N, T1Q); + TW = VFNMS(LDK(KP250000000), Tr, T4); + Ts = VADD(T4, Tr); + T1h = VFNMS(LDK(KP951056516), T1g, T1f); + T1j = VFMA(LDK(KP951056516), T1g, T1f); + T1b = VFNMS(LDK(KP951056516), T18, T15); + T19 = VFMA(LDK(KP951056516), T18, T15); + ST(&(x[WS(rs, 10)]), VADD(T1m, T1B), ms, &(x[0])); + T1C = VFNMS(LDK(KP250000000), T1B, T1m); + ST(&(x[0]), VADD(T1K, T1R), ms, &(x[0])); + T1S = VFNMS(LDK(KP250000000), T1R, T1K); + T1c = VFNMS(LDK(KP559016994), TX, TW); + TY = VFMA(LDK(KP559016994), TX, TW); + ST(&(x[WS(rs, 15)]), VFMAI(TV, Ts), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFNMSI(TV, Ts), ms, &(x[WS(rs, 1)])); + } + } + } + { + V T1E, T1I, T1U, T1Y; + T1E = VFNMS(LDK(KP559016994), T1D, T1C); + T1I = VFMA(LDK(KP559016994), T1D, T1C); + T1U = VFMA(LDK(KP559016994), T1T, T1S); + T1Y = VFNMS(LDK(KP559016994), T1T, T1S); + { + V T1e, T1i, T1a, T12; + T1e = VFNMS(LDK(KP951056516), T1d, T1c); + T1i = VFMA(LDK(KP951056516), T1d, T1c); + T1a = VFNMS(LDK(KP951056516), T11, TY); + T12 = VFMA(LDK(KP951056516), T11, TY); + ST(&(x[WS(rs, 18)]), VFNMSI(T1H, T1E), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(T1H, T1E), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFMAI(T1J, T1I), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(T1J, T1I), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VFNMSI(T1X, T1U), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(T1X, T1U), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFMAI(T1Z, T1Y), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFNMSI(T1Z, T1Y), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(T1h, T1e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 17)]), VFNMSI(T1h, T1e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(T1j, T1i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFNMSI(T1j, T1i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFMAI(T1b, T1a), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFNMSI(T1b, T1a), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VFMAI(T19, T12), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(T19, T12), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 20, XSIMD_STRING("t2fv_20"), twinstr, &GENUS, {77, 42, 46, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_20) (planner *p) { + X(kdft_dit_register) (p, t2fv_20, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 20 -name t2fv_20 -include t2f.h */ + +/* + * This function contains 123 FP additions, 62 FP multiplications, + * (or, 111 additions, 50 multiplications, 12 fused multiply/add), + * 54 stack variables, 4 constants, and 40 memory accesses + */ +#include "t2f.h" + +static void t2fv_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 38)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 38), MAKE_VOLATILE_STRIDE(20, rs)) { + V T4, Tx, T1B, T1U, TZ, T16, T17, T10, Tf, Tq, Tr, T1N, T1O, T1S, T1t; + V T1w, T1C, TI, TT, TU, T1K, T1L, T1R, T1m, T1p, T1D, Ts, TV; + { + V T1, Tw, T3, Tu, Tv, T2, Tt, T1z, T1A; + T1 = LD(&(x[0]), ms, &(x[0])); + Tv = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + Tw = BYTWJ(&(W[TWVL * 28]), Tv); + T2 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T3 = BYTWJ(&(W[TWVL * 18]), T2); + Tt = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tu = BYTWJ(&(W[TWVL * 8]), Tt); + T4 = VSUB(T1, T3); + Tx = VSUB(Tu, Tw); + T1z = VADD(T1, T3); + T1A = VADD(Tu, Tw); + T1B = VSUB(T1z, T1A); + T1U = VADD(T1z, T1A); + } + { + V T9, T1r, TN, T1l, TS, T1o, Te, T1u, Tk, T1k, TC, T1s, TH, T1v, Tp; + V T1n; + { + V T6, T8, T5, T7; + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T6 = BYTWJ(&(W[TWVL * 6]), T5); + T7 = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T8 = BYTWJ(&(W[TWVL * 26]), T7); + T9 = VSUB(T6, T8); + T1r = VADD(T6, T8); + } + { + V TK, TM, TJ, TL; + TJ = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TK = BYTWJ(&(W[TWVL * 24]), TJ); + TL = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + TM = BYTWJ(&(W[TWVL * 4]), TL); + TN = VSUB(TK, TM); + T1l = VADD(TK, TM); + } + { + V TP, TR, TO, TQ; + TO = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TP = BYTWJ(&(W[TWVL * 32]), TO); + TQ = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + TR = BYTWJ(&(W[TWVL * 12]), TQ); + TS = VSUB(TP, TR); + T1o = VADD(TP, TR); + } + { + V Tb, Td, Ta, Tc; + Ta = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + Tb = BYTWJ(&(W[TWVL * 30]), Ta); + Tc = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Td = BYTWJ(&(W[TWVL * 10]), Tc); + Te = VSUB(Tb, Td); + T1u = VADD(Tb, Td); + } + { + V Th, Tj, Tg, Ti; + Tg = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Th = BYTWJ(&(W[TWVL * 14]), Tg); + Ti = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tj = BYTWJ(&(W[TWVL * 34]), Ti); + Tk = VSUB(Th, Tj); + T1k = VADD(Th, Tj); + } + { + V Tz, TB, Ty, TA; + Ty = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tz = BYTWJ(&(W[TWVL * 16]), Ty); + TA = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + TB = BYTWJ(&(W[TWVL * 36]), TA); + TC = VSUB(Tz, TB); + T1s = VADD(Tz, TB); + } + { + V TE, TG, TD, TF; + TD = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TE = BYTWJ(&(W[0]), TD); + TF = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + TG = BYTWJ(&(W[TWVL * 20]), TF); + TH = VSUB(TE, TG); + T1v = VADD(TE, TG); + } + { + V Tm, To, Tl, Tn; + Tl = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Tm = BYTWJ(&(W[TWVL * 22]), Tl); + Tn = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + To = BYTWJ(&(W[TWVL * 2]), Tn); + Tp = VSUB(Tm, To); + T1n = VADD(Tm, To); + } + TZ = VSUB(TH, TC); + T16 = VSUB(T9, Te); + T17 = VSUB(Tk, Tp); + T10 = VSUB(TS, TN); + Tf = VADD(T9, Te); + Tq = VADD(Tk, Tp); + Tr = VADD(Tf, Tq); + T1N = VADD(T1k, T1l); + T1O = VADD(T1n, T1o); + T1S = VADD(T1N, T1O); + T1t = VSUB(T1r, T1s); + T1w = VSUB(T1u, T1v); + T1C = VADD(T1t, T1w); + TI = VADD(TC, TH); + TT = VADD(TN, TS); + TU = VADD(TI, TT); + T1K = VADD(T1r, T1s); + T1L = VADD(T1u, T1v); + T1R = VADD(T1K, T1L); + T1m = VSUB(T1k, T1l); + T1p = VSUB(T1n, T1o); + T1D = VADD(T1m, T1p); + } + Ts = VADD(T4, Tr); + TV = VBYI(VADD(Tx, TU)); + ST(&(x[WS(rs, 5)]), VSUB(Ts, TV), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VADD(Ts, TV), ms, &(x[WS(rs, 1)])); + { + V T1T, T1V, T1W, T1Q, T1Z, T1M, T1P, T1Y, T1X; + T1T = VMUL(LDK(KP559016994), VSUB(T1R, T1S)); + T1V = VADD(T1R, T1S); + T1W = VFNMS(LDK(KP250000000), T1V, T1U); + T1M = VSUB(T1K, T1L); + T1P = VSUB(T1N, T1O); + T1Q = VBYI(VFMA(LDK(KP951056516), T1M, VMUL(LDK(KP587785252), T1P))); + T1Z = VBYI(VFNMS(LDK(KP587785252), T1M, VMUL(LDK(KP951056516), T1P))); + ST(&(x[0]), VADD(T1U, T1V), ms, &(x[0])); + T1Y = VSUB(T1W, T1T); + ST(&(x[WS(rs, 8)]), VSUB(T1Y, T1Z), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VADD(T1Z, T1Y), ms, &(x[0])); + T1X = VADD(T1T, T1W); + ST(&(x[WS(rs, 4)]), VADD(T1Q, T1X), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VSUB(T1X, T1Q), ms, &(x[0])); + } + { + V T1G, T1E, T1F, T1y, T1J, T1q, T1x, T1I, T1H; + T1G = VMUL(LDK(KP559016994), VSUB(T1C, T1D)); + T1E = VADD(T1C, T1D); + T1F = VFNMS(LDK(KP250000000), T1E, T1B); + T1q = VSUB(T1m, T1p); + T1x = VSUB(T1t, T1w); + T1y = VBYI(VFNMS(LDK(KP587785252), T1x, VMUL(LDK(KP951056516), T1q))); + T1J = VBYI(VFMA(LDK(KP951056516), T1x, VMUL(LDK(KP587785252), T1q))); + ST(&(x[WS(rs, 10)]), VADD(T1B, T1E), ms, &(x[0])); + T1I = VADD(T1G, T1F); + ST(&(x[WS(rs, 6)]), VSUB(T1I, T1J), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VADD(T1J, T1I), ms, &(x[0])); + T1H = VSUB(T1F, T1G); + ST(&(x[WS(rs, 2)]), VADD(T1y, T1H), ms, &(x[0])); + ST(&(x[WS(rs, 18)]), VSUB(T1H, T1y), ms, &(x[0])); + } + { + V T11, T18, T1g, T1d, T15, T1f, TY, T1c; + T11 = VFMA(LDK(KP951056516), TZ, VMUL(LDK(KP587785252), T10)); + T18 = VFMA(LDK(KP951056516), T16, VMUL(LDK(KP587785252), T17)); + T1g = VFNMS(LDK(KP587785252), T16, VMUL(LDK(KP951056516), T17)); + T1d = VFNMS(LDK(KP587785252), TZ, VMUL(LDK(KP951056516), T10)); + { + V T13, T14, TW, TX; + T13 = VFMS(LDK(KP250000000), TU, Tx); + T14 = VMUL(LDK(KP559016994), VSUB(TT, TI)); + T15 = VADD(T13, T14); + T1f = VSUB(T14, T13); + TW = VMUL(LDK(KP559016994), VSUB(Tf, Tq)); + TX = VFNMS(LDK(KP250000000), Tr, T4); + TY = VADD(TW, TX); + T1c = VSUB(TX, TW); + } + { + V T12, T19, T1i, T1j; + T12 = VADD(TY, T11); + T19 = VBYI(VSUB(T15, T18)); + ST(&(x[WS(rs, 19)]), VSUB(T12, T19), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T12, T19), ms, &(x[WS(rs, 1)])); + T1i = VADD(T1c, T1d); + T1j = VBYI(VADD(T1g, T1f)); + ST(&(x[WS(rs, 13)]), VSUB(T1i, T1j), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(T1i, T1j), ms, &(x[WS(rs, 1)])); + } + { + V T1a, T1b, T1e, T1h; + T1a = VSUB(TY, T11); + T1b = VBYI(VADD(T18, T15)); + ST(&(x[WS(rs, 11)]), VSUB(T1a, T1b), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VADD(T1a, T1b), ms, &(x[WS(rs, 1)])); + T1e = VSUB(T1c, T1d); + T1h = VBYI(VSUB(T1f, T1g)); + ST(&(x[WS(rs, 17)]), VSUB(T1e, T1h), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(T1e, T1h), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 20, XSIMD_STRING("t2fv_20"), twinstr, &GENUS, {111, 50, 12, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_20) (planner *p) { + X(kdft_dit_register) (p, t2fv_20, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,932 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:24 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t2fv_25 -include t2f.h */ + +/* + * This function contains 248 FP additions, 241 FP multiplications, + * (or, 67 additions, 60 multiplications, 181 fused multiply/add), + * 208 stack variables, 67 constants, and 50 memory accesses + */ +#include "t2f.h" + +static void t2fv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP792626838, +0.792626838241819413632131824093538848057784557); + DVK(KP876091699, +0.876091699473550838204498029706869638173524346); + DVK(KP617882369, +0.617882369114440893914546919006756321695042882); + DVK(KP803003575, +0.803003575438660414833440593570376004635464850); + DVK(KP242145790, +0.242145790282157779872542093866183953459003101); + DVK(KP968583161, +0.968583161128631119490168375464735813836012403); + DVK(KP999544308, +0.999544308746292983948881682379742149196758193); + DVK(KP916574801, +0.916574801383451584742370439148878693530976769); + DVK(KP904730450, +0.904730450839922351881287709692877908104763647); + DVK(KP809385824, +0.809385824416008241660603814668679683846476688); + DVK(KP447417479, +0.447417479732227551498980015410057305749330693); + DVK(KP894834959, +0.894834959464455102997960030820114611498661386); + DVK(KP867381224, +0.867381224396525206773171885031575671309956167); + DVK(KP683113946, +0.683113946453479238701949862233725244439656928); + DVK(KP559154169, +0.559154169276087864842202529084232643714075927); + DVK(KP958953096, +0.958953096729998668045963838399037225970891871); + DVK(KP831864738, +0.831864738706457140726048799369896829771167132); + DVK(KP829049696, +0.829049696159252993975487806364305442437946767); + DVK(KP860541664, +0.860541664367944677098261680920518816412804187); + DVK(KP897376177, +0.897376177523557693138608077137219684419427330); + DVK(KP876306680, +0.876306680043863587308115903922062583399064238); + DVK(KP681693190, +0.681693190061530575150324149145440022633095390); + DVK(KP560319534, +0.560319534973832390111614715371676131169633784); + DVK(KP855719849, +0.855719849902058969314654733608091555096772472); + DVK(KP237294955, +0.237294955877110315393888866460840817927895961); + DVK(KP949179823, +0.949179823508441261575555465843363271711583843); + DVK(KP904508497, +0.904508497187473712051146708591409529430077295); + DVK(KP997675361, +0.997675361079556513670859573984492383596555031); + DVK(KP763932022, +0.763932022500210303590826331268723764559381640); + DVK(KP690983005, +0.690983005625052575897706582817180941139845410); + DVK(KP992114701, +0.992114701314477831049793042785778521453036709); + DVK(KP952936919, +0.952936919628306576880750665357914584765951388); + DVK(KP998026728, +0.998026728428271561952336806863450553336905220); + DVK(KP262346850, +0.262346850930607871785420028382979691334784273); + DVK(KP570584518, +0.570584518783621657366766175430996792655723863); + DVK(KP669429328, +0.669429328479476605641803240971985825917022098); + DVK(KP923225144, +0.923225144846402650453449441572664695995209956); + DVK(KP945422727, +0.945422727388575946270360266328811958657216298); + DVK(KP522616830, +0.522616830205754336872861364785224694908468440); + DVK(KP956723877, +0.956723877038460305821989399535483155872969262); + DVK(KP906616052, +0.906616052148196230441134447086066874408359177); + DVK(KP772036680, +0.772036680810363904029489473607579825330539880); + DVK(KP845997307, +0.845997307939530944175097360758058292389769300); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP921078979, +0.921078979742360627699756128143719920817673854); + DVK(KP912575812, +0.912575812670962425556968549836277086778922727); + DVK(KP982009705, +0.982009705009746369461829878184175962711969869); + DVK(KP734762448, +0.734762448793050413546343770063151342619912334); + DVK(KP494780565, +0.494780565770515410344588413655324772219443730); + DVK(KP447533225, +0.447533225982656890041886979663652563063114397); + DVK(KP269969613, +0.269969613759572083574752974412347470060951301); + DVK(KP244189809, +0.244189809627953270309879511234821255780225091); + DVK(KP667278218, +0.667278218140296670899089292254759909713898805); + DVK(KP603558818, +0.603558818296015001454675132653458027918768137); + DVK(KP522847744, +0.522847744331509716623755382187077770911012542); + DVK(KP578046249, +0.578046249379945007321754579646815604023525655); + DVK(KP987388751, +0.987388751065621252324603216482382109400433949); + DVK(KP893101515, +0.893101515366181661711202267938416198338079437); + DVK(KP120146378, +0.120146378570687701782758537356596213647956445); + DVK(KP132830569, +0.132830569247582714407653942074819768844536507); + DVK(KP869845200, +0.869845200362138853122720822420327157933056305); + DVK(KP786782374, +0.786782374965295178365099601674911834788448471); + DVK(KP066152395, +0.066152395967733048213034281011006031460903353); + DVK(KP059835404, +0.059835404262124915169548397419498386427871950); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(25, rs)) { + V T25, T1B, T2y, T1K, T2s, T23, T1S, T26, T20, T1X; + { + V T1O, T2X, Te, T3L, Td, T3Q, T3j, T3b, T2R, T2M, T2f, T27, T1y, T1H, T3M; + V TW, TR, TK, T2B, T3n, T3e, T2U, T2F, T2i, T2a, Tz, T1C, T3N, TQ, T11; + V T1b, T1c, T16; + { + V T1, T1g, T1i, T1p, T1k, T1m, Tb, T1N, T6, T1M; + { + V T7, T9, T2, T4, T1f, T1h, T1o; + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T9 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + T1f = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1h = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T1o = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + { + V T8, Ta, T3, T5, T1j; + T1j = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T8 = BYTWJ(&(W[TWVL * 18]), T7); + Ta = BYTWJ(&(W[TWVL * 28]), T9); + T3 = BYTWJ(&(W[TWVL * 8]), T2); + T5 = BYTWJ(&(W[TWVL * 38]), T4); + T1g = BYTWJ(&(W[TWVL * 4]), T1f); + T1i = BYTWJ(&(W[TWVL * 14]), T1h); + T1p = BYTWJ(&(W[TWVL * 34]), T1o); + T1k = BYTWJ(&(W[TWVL * 44]), T1j); + T1m = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + Tb = VADD(T8, Ta); + T1N = VSUB(T8, Ta); + T6 = VADD(T3, T5); + T1M = VSUB(T3, T5); + } + } + { + V T1v, T1l, Th, Tj, T1w, T1q, Tq, Tk, Tn, Tg; + Tg = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + { + V Tc, Ti, T1n, Tp; + Ti = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T1v = VSUB(T1i, T1k); + T1l = VADD(T1i, T1k); + T1n = BYTWJ(&(W[TWVL * 24]), T1m); + Tp = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T1O = VFMA(LDK(KP618033988), T1N, T1M); + T2X = VFNMS(LDK(KP618033988), T1M, T1N); + Te = VSUB(T6, Tb); + Tc = VADD(T6, Tb); + Th = BYTWJ(&(W[0]), Tg); + Tj = BYTWJ(&(W[TWVL * 10]), Ti); + T1w = VSUB(T1n, T1p); + T1q = VADD(T1n, T1p); + Tq = BYTWJ(&(W[TWVL * 30]), Tp); + Tk = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + T3L = VADD(T1, Tc); + Td = VFNMS(LDK(KP250000000), Tc, T1); + Tn = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + } + { + V T1x, T2K, TM, TB, Tw, Tm, Tx, Tr, TI, T2L, T1u, TD, TF, TL; + TL = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + { + V T1t, Tl, To, TH, T1s, T1r, TA, TC; + TA = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T1r = VADD(T1l, T1q); + T1t = VSUB(T1q, T1l); + T1x = VFMA(LDK(KP618033988), T1w, T1v); + T2K = VFNMS(LDK(KP618033988), T1v, T1w); + Tl = BYTWJ(&(W[TWVL * 40]), Tk); + To = BYTWJ(&(W[TWVL * 20]), Tn); + TM = BYTWJ(&(W[TWVL * 6]), TL); + TB = BYTWJ(&(W[TWVL * 46]), TA); + TH = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T1s = VFNMS(LDK(KP250000000), T1r, T1g); + T3Q = VADD(T1g, T1r); + TC = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tw = VSUB(Tj, Tl); + Tm = VADD(Tj, Tl); + Tx = VSUB(Tq, To); + Tr = VADD(To, Tq); + TI = BYTWJ(&(W[TWVL * 26]), TH); + T2L = VFMA(LDK(KP559016994), T1t, T1s); + T1u = VFNMS(LDK(KP559016994), T1t, T1s); + TD = BYTWJ(&(W[TWVL * 16]), TC); + TF = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + } + { + V Tu, Ty, T2E, TE, TN, TG, Tt, TV, Ts; + TV = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Ts = VADD(Tm, Tr); + Tu = VSUB(Tm, Tr); + Ty = VFNMS(LDK(KP618033988), Tx, Tw); + T2E = VFMA(LDK(KP618033988), Tw, Tx); + T3j = VFNMS(LDK(KP059835404), T2K, T2L); + T3b = VFMA(LDK(KP066152395), T2L, T2K); + T2R = VFNMS(LDK(KP786782374), T2K, T2L); + T2M = VFMA(LDK(KP869845200), T2L, T2K); + T2f = VFMA(LDK(KP132830569), T1u, T1x); + T27 = VFNMS(LDK(KP120146378), T1x, T1u); + T1y = VFNMS(LDK(KP893101515), T1x, T1u); + T1H = VFMA(LDK(KP987388751), T1u, T1x); + TE = VSUB(TB, TD); + TN = VADD(TD, TB); + TG = BYTWJ(&(W[TWVL * 36]), TF); + Tt = VFNMS(LDK(KP250000000), Ts, Th); + T3M = VADD(Th, Ts); + TW = BYTWJ(&(W[TWVL * 2]), TV); + { + V TJ, TO, Tv, T2D, TY, T15, T10, T13, TP; + { + V TX, T14, TZ, T12; + TX = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T14 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TZ = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + T12 = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + TJ = VSUB(TG, TI); + TO = VADD(TI, TG); + Tv = VFMA(LDK(KP559016994), Tu, Tt); + T2D = VFNMS(LDK(KP559016994), Tu, Tt); + TY = BYTWJ(&(W[TWVL * 12]), TX); + T15 = BYTWJ(&(W[TWVL * 32]), T14); + T10 = BYTWJ(&(W[TWVL * 42]), TZ); + T13 = BYTWJ(&(W[TWVL * 22]), T12); + } + TP = VADD(TN, TO); + TR = VSUB(TN, TO); + TK = VFMA(LDK(KP618033988), TJ, TE); + T2B = VFNMS(LDK(KP618033988), TE, TJ); + T3n = VFMA(LDK(KP578046249), T2D, T2E); + T3e = VFNMS(LDK(KP522847744), T2E, T2D); + T2U = VFNMS(LDK(KP987388751), T2D, T2E); + T2F = VFMA(LDK(KP893101515), T2E, T2D); + T2i = VFNMS(LDK(KP603558818), Ty, Tv); + T2a = VFMA(LDK(KP667278218), Tv, Ty); + Tz = VFNMS(LDK(KP244189809), Ty, Tv); + T1C = VFMA(LDK(KP269969613), Tv, Ty); + T3N = VADD(TM, TP); + TQ = VFMS(LDK(KP250000000), TP, TM); + T11 = VADD(TY, T10); + T1b = VSUB(TY, T10); + T1c = VSUB(T15, T13); + T16 = VADD(T13, T15); + } + } + } + } + } + { + V T2z, Tf, T3W, T3O, T1d, T2H, T3m, T2j, T2b, TT, T1D, T2G, T35, T2V, T2Z; + V T3A, T3g, T2I, T1a, T3R, T3X; + T2z = VFNMS(LDK(KP559016994), Te, Td); + Tf = VFMA(LDK(KP559016994), Te, Td); + { + V TS, T2A, T17, T19; + TS = VFNMS(LDK(KP559016994), TR, TQ); + T2A = VFMA(LDK(KP559016994), TR, TQ); + T3W = VSUB(T3M, T3N); + T3O = VADD(T3M, T3N); + T1d = VFNMS(LDK(KP618033988), T1c, T1b); + T2H = VFMA(LDK(KP618033988), T1b, T1c); + T17 = VADD(T11, T16); + T19 = VSUB(T16, T11); + { + V T3f, T2T, T2C, T18, T3P; + T3m = VFMA(LDK(KP447533225), T2B, T2A); + T3f = VFNMS(LDK(KP494780565), T2A, T2B); + T2T = VFNMS(LDK(KP132830569), T2A, T2B); + T2C = VFMA(LDK(KP120146378), T2B, T2A); + T2j = VFNMS(LDK(KP786782374), TK, TS); + T2b = VFMA(LDK(KP869845200), TS, TK); + TT = VFNMS(LDK(KP667278218), TS, TK); + T1D = VFMA(LDK(KP603558818), TK, TS); + T18 = VFNMS(LDK(KP250000000), T17, TW); + T3P = VADD(TW, T17); + T2G = VFMA(LDK(KP734762448), T2F, T2C); + T35 = VFNMS(LDK(KP734762448), T2F, T2C); + T2V = VFNMS(LDK(KP734762448), T2U, T2T); + T2Z = VFMA(LDK(KP734762448), T2U, T2T); + T3A = VFMA(LDK(KP982009705), T3f, T3e); + T3g = VFNMS(LDK(KP982009705), T3f, T3e); + T2I = VFMA(LDK(KP559016994), T19, T18); + T1a = VFNMS(LDK(KP559016994), T19, T18); + T3R = VADD(T3P, T3Q); + T3X = VSUB(T3P, T3Q); + } + } + { + V T2n, T2t, T1V, T22, T2l, T2d, T1Q, T1I, T2w, T1A, T1F, T2q; + { + V T2k, T1G, T28, T2g, T3K, T3E, T3a, T34, T3x, T3H, T2c, TU, T1T, T1U, T1z; + V T3o, T3t; + T2n = VFNMS(LDK(KP912575812), T2j, T2i); + T2k = VFMA(LDK(KP912575812), T2j, T2i); + T3o = VFNMS(LDK(KP921078979), T3n, T3m); + T3t = VFMA(LDK(KP921078979), T3n, T3m); + { + V T3c, T2Q, T2J, T3k, T1e; + T3c = VFNMS(LDK(KP667278218), T2I, T2H); + T2Q = VFNMS(LDK(KP059835404), T2H, T2I); + T2J = VFMA(LDK(KP066152395), T2I, T2H); + T3k = VFMA(LDK(KP603558818), T2H, T2I); + T1G = VFMA(LDK(KP578046249), T1a, T1d); + T1e = VFNMS(LDK(KP522847744), T1d, T1a); + T28 = VFNMS(LDK(KP494780565), T1a, T1d); + T2g = VFMA(LDK(KP447533225), T1d, T1a); + { + V T3U, T3S, T40, T3Y; + T3U = VSUB(T3O, T3R); + T3S = VADD(T3O, T3R); + T40 = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T3W, T3X)); + T3Y = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T3X, T3W)); + { + V T3s, T3l, T2N, T36; + T3s = VFNMS(LDK(KP845997307), T3k, T3j); + T3l = VFMA(LDK(KP845997307), T3k, T3j); + T2N = VFNMS(LDK(KP772036680), T2M, T2J); + T36 = VFMA(LDK(KP772036680), T2M, T2J); + { + V T30, T2S, T3d, T3z, T3T; + T30 = VFNMS(LDK(KP772036680), T2R, T2Q); + T2S = VFMA(LDK(KP772036680), T2R, T2Q); + T3d = VFNMS(LDK(KP845997307), T3c, T3b); + T3z = VFMA(LDK(KP845997307), T3c, T3b); + ST(&(x[0]), VADD(T3S, T3L), ms, &(x[0])); + T3T = VFNMS(LDK(KP250000000), T3S, T3L); + { + V T3C, T3p, T2O, T37; + T3C = VFMA(LDK(KP906616052), T3o, T3l); + T3p = VFNMS(LDK(KP906616052), T3o, T3l); + T2O = VFMA(LDK(KP956723877), T2N, T2G); + T37 = VFMA(LDK(KP522616830), T2V, T36); + { + V T31, T2W, T3u, T3h; + T31 = VFNMS(LDK(KP522616830), T2G, T30); + T2W = VFMA(LDK(KP945422727), T2V, T2S); + T3u = VFNMS(LDK(KP923225144), T3g, T3d); + T3h = VFMA(LDK(KP923225144), T3g, T3d); + { + V T3I, T3B, T3V, T3Z; + T3I = VFNMS(LDK(KP669429328), T3z, T3A); + T3B = VFMA(LDK(KP570584518), T3A, T3z); + T3V = VFMA(LDK(KP559016994), T3U, T3T); + T3Z = VFNMS(LDK(KP559016994), T3U, T3T); + { + V T3y, T3q, T2P, T38; + T3y = VFMA(LDK(KP262346850), T3p, T2X); + T3q = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T2X, T3p)); + T2P = VFMA(LDK(KP992114701), T2O, T2z); + T38 = VFNMS(LDK(KP690983005), T37, T2S); + { + V T32, T2Y, T3v, T3F; + T32 = VFMA(LDK(KP763932022), T31, T2N); + T2Y = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T2X, T2W)); + T3v = VFNMS(LDK(KP997675361), T3u, T3t); + T3F = VFNMS(LDK(KP904508497), T3u, T3s); + { + V T3i, T3r, T3J, T3D; + T3i = VFMA(LDK(KP949179823), T3h, T2z); + T3r = VFNMS(LDK(KP237294955), T3h, T2z); + T3J = VFNMS(LDK(KP669429328), T3C, T3I); + T3D = VFMA(LDK(KP618033988), T3C, T3B); + ST(&(x[WS(rs, 20)]), VFMAI(T3Y, T3V), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFNMSI(T3Y, T3V), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFNMSI(T40, T3Z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 10)]), VFMAI(T40, T3Z), ms, &(x[0])); + { + V T39, T33, T3w, T3G; + T39 = VFMA(LDK(KP855719849), T38, T35); + T33 = VFNMS(LDK(KP855719849), T32, T2Z); + ST(&(x[WS(rs, 22)]), VFMAI(T2Y, T2P), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFNMSI(T2Y, T2P), ms, &(x[WS(rs, 1)])); + T3w = VFMA(LDK(KP560319534), T3v, T3s); + T3G = VFNMS(LDK(KP681693190), T3F, T3t); + ST(&(x[WS(rs, 23)]), VFMAI(T3q, T3i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VFNMSI(T3q, T3i), ms, &(x[0])); + T3K = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T3J, T3y)); + T3E = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T3D, T3y)); + T3a = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T39, T2X)); + T34 = VFMA(LDK(KP897376177), T33, T2z); + T3x = VFNMS(LDK(KP949179823), T3w, T3r); + T3H = VFNMS(LDK(KP860541664), T3G, T3r); + T2t = VFNMS(LDK(KP912575812), T2b, T2a); + T2c = VFMA(LDK(KP912575812), T2b, T2a); + TU = VFMA(LDK(KP829049696), TT, Tz); + T1T = VFNMS(LDK(KP829049696), TT, Tz); + T1U = VFNMS(LDK(KP831864738), T1y, T1e); + T1z = VFMA(LDK(KP831864738), T1y, T1e); + } + } + } + } + } + } + } + } + } + } + } + { + V T2o, T2h, T29, T2u, T2v, T2p; + T2o = VFNMS(LDK(KP958953096), T2g, T2f); + T2h = VFMA(LDK(KP958953096), T2g, T2f); + ST(&(x[WS(rs, 17)]), VFMAI(T3a, T34), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 8)]), VFNMSI(T3a, T34), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFMAI(T3E, T3x), ms, &(x[0])); + ST(&(x[WS(rs, 13)]), VFNMSI(T3E, T3x), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 18)]), VFNMSI(T3K, T3H), ms, &(x[0])); + ST(&(x[WS(rs, 7)]), VFMAI(T3K, T3H), ms, &(x[WS(rs, 1)])); + T1V = VFMA(LDK(KP559154169), T1U, T1T); + T22 = VFNMS(LDK(KP683113946), T1T, T1U); + T29 = VFNMS(LDK(KP867381224), T28, T27); + T2u = VFMA(LDK(KP867381224), T28, T27); + T2l = VFMA(LDK(KP894834959), T2k, T2h); + T2v = VFMA(LDK(KP447417479), T2k, T2u); + T2d = VFNMS(LDK(KP809385824), T2c, T29); + T2p = VFMA(LDK(KP447417479), T2c, T2o); + T1Q = VFMA(LDK(KP831864738), T1H, T1G); + T1I = VFNMS(LDK(KP831864738), T1H, T1G); + T2w = VFNMS(LDK(KP763932022), T2v, T2h); + T1A = VFMA(LDK(KP904730450), T1z, TU); + T1F = VFNMS(LDK(KP904730450), T1z, TU); + T2q = VFMA(LDK(KP690983005), T2p, T29); + } + } + { + V T2e, T1E, T1P, T2m; + T2e = VFNMS(LDK(KP992114701), T2d, Tf); + T1E = VFMA(LDK(KP916574801), T1D, T1C); + T1P = VFNMS(LDK(KP916574801), T1D, T1C); + T2m = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2l, T1O)); + { + V T1J, T2r, T1R, T1W, T1Z, T2x; + T2x = VFNMS(LDK(KP999544308), T2w, T2t); + T1J = VFNMS(LDK(KP904730450), T1I, T1F); + T25 = VFMA(LDK(KP968583161), T1A, Tf); + T1B = VFNMS(LDK(KP242145790), T1A, Tf); + T2r = VFNMS(LDK(KP999544308), T2q, T2n); + T1R = VFMA(LDK(KP904730450), T1Q, T1P); + T1W = VFNMS(LDK(KP904730450), T1Q, T1P); + T1Z = VADD(T1E, T1F); + ST(&(x[WS(rs, 21)]), VFNMSI(T2m, T2e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFMAI(T2m, T2e), ms, &(x[0])); + T2y = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T2x, T1O)); + T1K = VFNMS(LDK(KP618033988), T1J, T1E); + T2s = VFNMS(LDK(KP803003575), T2r, Tf); + T23 = VFMA(LDK(KP617882369), T1W, T22); + T1S = VFNMS(LDK(KP242145790), T1R, T1O); + T26 = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1R, T1O)); + T20 = VFNMS(LDK(KP683113946), T1Z, T1I); + T1X = VFMA(LDK(KP559016994), T1W, T1V); + } + } + } + } + } + { + V T1L, T24, T21, T1Y; + T1L = VFNMS(LDK(KP876091699), T1K, T1B); + ST(&(x[WS(rs, 9)]), VFMAI(T2y, T2s), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 16)]), VFNMSI(T2y, T2s), ms, &(x[0])); + T24 = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T23, T1S)); + ST(&(x[WS(rs, 24)]), VFMAI(T26, T25), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFNMSI(T26, T25), ms, &(x[WS(rs, 1)])); + T21 = VFMA(LDK(KP792626838), T20, T1B); + T1Y = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1X, T1S)); + ST(&(x[WS(rs, 11)]), VFNMSI(T24, T21), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 14)]), VFMAI(T24, T21), ms, &(x[0])); + ST(&(x[WS(rs, 19)]), VFMAI(T1Y, T1L), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VFNMSI(T1Y, T1L), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 25, XSIMD_STRING("t2fv_25"), twinstr, &GENUS, {67, 60, 181, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_25) (planner *p) { + X(kdft_dit_register) (p, t2fv_25, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t2fv_25 -include t2f.h */ + +/* + * This function contains 248 FP additions, 188 FP multiplications, + * (or, 170 additions, 110 multiplications, 78 fused multiply/add), + * 99 stack variables, 40 constants, and 50 memory accesses + */ +#include "t2f.h" + +static void t2fv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP998026728, +0.998026728428271561952336806863450553336905220); + DVK(KP125581039, +0.125581039058626752152356449131262266244969664); + DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439); + DVK(KP062790519, +0.062790519529313376076178224565631133122484832); + DVK(KP809016994, +0.809016994374947424102293417182819058860154590); + DVK(KP309016994, +0.309016994374947424102293417182819058860154590); + DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465); + DVK(KP728968627, +0.728968627421411523146730319055259111372571664); + DVK(KP963507348, +0.963507348203430549974383005744259307057084020); + DVK(KP876306680, +0.876306680043863587308115903922062583399064238); + DVK(KP497379774, +0.497379774329709576484567492012895936835134813); + DVK(KP968583161, +0.968583161128631119490168375464735813836012403); + DVK(KP684547105, +0.684547105928688673732283357621209269889519233); + DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328); + DVK(KP481753674, +0.481753674101715274987191502872129653528542010); + DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477); + DVK(KP248689887, +0.248689887164854788242283746006447968417567406); + DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806); + DVK(KP992114701, +0.992114701314477831049793042785778521453036709); + DVK(KP250666467, +0.250666467128608490746237519633017587885836494); + DVK(KP425779291, +0.425779291565072648862502445744251703979973042); + DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822); + DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596); + DVK(KP770513242, +0.770513242775789230803009636396177847271667672); + DVK(KP844327925, +0.844327925502015078548558063966681505381659241); + DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691); + DVK(KP125333233, +0.125333233564304245373118759816508793942918247); + DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418); + DVK(KP904827052, +0.904827052466019527713668647932697593970413911); + DVK(KP851558583, +0.851558583130145297725004891488503407959946084); + DVK(KP637423989, +0.637423989748689710176712811676016195434917298); + DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344); + DVK(KP535826794, +0.535826794978996618271308767867639978063575346); + DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483); + DVK(KP293892626, +0.293892626146236564584352977319536384298826219); + DVK(KP475528258, +0.475528258147576786058219666689691071702849317); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(25, rs)) { + V Tc, Tb, Td, Te, T1C, T2t, T1E, T1x, T2m, T1u, T3c, T2n, Ty, T2i, Tv; + V T38, T2j, TS, T2f, TP, T39, T2g, T1d, T2p, T1a, T3b, T2q; + { + V T7, T9, Ta, T2, T4, T5, T1D; + Tc = LD(&(x[0]), ms, &(x[0])); + { + V T6, T8, T1, T3; + T6 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T7 = BYTWJ(&(W[TWVL * 18]), T6); + T8 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T9 = BYTWJ(&(W[TWVL * 28]), T8); + Ta = VADD(T7, T9); + T1 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T2 = BYTWJ(&(W[TWVL * 8]), T1); + T3 = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + T4 = BYTWJ(&(W[TWVL * 38]), T3); + T5 = VADD(T2, T4); + } + Tb = VMUL(LDK(KP559016994), VSUB(T5, Ta)); + Td = VADD(T5, Ta); + Te = VFNMS(LDK(KP250000000), Td, Tc); + T1C = VSUB(T2, T4); + T1D = VSUB(T7, T9); + T2t = VMUL(LDK(KP951056516), T1D); + T1E = VFMA(LDK(KP951056516), T1C, VMUL(LDK(KP587785252), T1D)); + } + { + V T1r, T1l, T1n, T1o, T1g, T1i, T1j, T1q; + T1q = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1r = BYTWJ(&(W[TWVL * 4]), T1q); + { + V T1k, T1m, T1f, T1h; + T1k = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T1l = BYTWJ(&(W[TWVL * 24]), T1k); + T1m = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + T1n = BYTWJ(&(W[TWVL * 34]), T1m); + T1o = VADD(T1l, T1n); + T1f = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T1g = BYTWJ(&(W[TWVL * 14]), T1f); + T1h = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T1i = BYTWJ(&(W[TWVL * 44]), T1h); + T1j = VADD(T1g, T1i); + } + { + V T1v, T1w, T1p, T1s, T1t; + T1v = VSUB(T1g, T1i); + T1w = VSUB(T1l, T1n); + T1x = VFMA(LDK(KP475528258), T1v, VMUL(LDK(KP293892626), T1w)); + T2m = VFNMS(LDK(KP293892626), T1v, VMUL(LDK(KP475528258), T1w)); + T1p = VMUL(LDK(KP559016994), VSUB(T1j, T1o)); + T1s = VADD(T1j, T1o); + T1t = VFNMS(LDK(KP250000000), T1s, T1r); + T1u = VADD(T1p, T1t); + T3c = VADD(T1r, T1s); + T2n = VSUB(T1t, T1p); + } + } + { + V Ts, Tm, To, Tp, Th, Tj, Tk, Tr; + Tr = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Ts = BYTWJ(&(W[0]), Tr); + { + V Tl, Tn, Tg, Ti; + Tl = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tm = BYTWJ(&(W[TWVL * 20]), Tl); + Tn = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + To = BYTWJ(&(W[TWVL * 30]), Tn); + Tp = VADD(Tm, To); + Tg = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Th = BYTWJ(&(W[TWVL * 10]), Tg); + Ti = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + Tj = BYTWJ(&(W[TWVL * 40]), Ti); + Tk = VADD(Th, Tj); + } + { + V Tw, Tx, Tq, Tt, Tu; + Tw = VSUB(Th, Tj); + Tx = VSUB(Tm, To); + Ty = VFMA(LDK(KP475528258), Tw, VMUL(LDK(KP293892626), Tx)); + T2i = VFNMS(LDK(KP293892626), Tw, VMUL(LDK(KP475528258), Tx)); + Tq = VMUL(LDK(KP559016994), VSUB(Tk, Tp)); + Tt = VADD(Tk, Tp); + Tu = VFNMS(LDK(KP250000000), Tt, Ts); + Tv = VADD(Tq, Tu); + T38 = VADD(Ts, Tt); + T2j = VSUB(Tu, Tq); + } + } + { + V TM, TG, TI, TJ, TB, TD, TE, TL; + TL = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + TM = BYTWJ(&(W[TWVL * 6]), TL); + { + V TF, TH, TA, TC; + TF = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TG = BYTWJ(&(W[TWVL * 26]), TF); + TH = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + TI = BYTWJ(&(W[TWVL * 36]), TH); + TJ = VADD(TG, TI); + TA = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + TB = BYTWJ(&(W[TWVL * 16]), TA); + TC = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + TD = BYTWJ(&(W[TWVL * 46]), TC); + TE = VADD(TB, TD); + } + { + V TQ, TR, TK, TN, TO; + TQ = VSUB(TB, TD); + TR = VSUB(TG, TI); + TS = VFMA(LDK(KP475528258), TQ, VMUL(LDK(KP293892626), TR)); + T2f = VFNMS(LDK(KP293892626), TQ, VMUL(LDK(KP475528258), TR)); + TK = VMUL(LDK(KP559016994), VSUB(TE, TJ)); + TN = VADD(TE, TJ); + TO = VFNMS(LDK(KP250000000), TN, TM); + TP = VADD(TK, TO); + T39 = VADD(TM, TN); + T2g = VSUB(TO, TK); + } + } + { + V T17, T11, T13, T14, TW, TY, TZ, T16; + T16 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T17 = BYTWJ(&(W[TWVL * 2]), T16); + { + V T10, T12, TV, TX; + T10 = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + T11 = BYTWJ(&(W[TWVL * 22]), T10); + T12 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T13 = BYTWJ(&(W[TWVL * 32]), T12); + T14 = VADD(T11, T13); + TV = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + TW = BYTWJ(&(W[TWVL * 12]), TV); + TX = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + TY = BYTWJ(&(W[TWVL * 42]), TX); + TZ = VADD(TW, TY); + } + { + V T1b, T1c, T15, T18, T19; + T1b = VSUB(TW, TY); + T1c = VSUB(T11, T13); + T1d = VFMA(LDK(KP475528258), T1b, VMUL(LDK(KP293892626), T1c)); + T2p = VFNMS(LDK(KP293892626), T1b, VMUL(LDK(KP475528258), T1c)); + T15 = VMUL(LDK(KP559016994), VSUB(TZ, T14)); + T18 = VADD(TZ, T14); + T19 = VFNMS(LDK(KP250000000), T18, T17); + T1a = VADD(T15, T19); + T3b = VADD(T17, T18); + T2q = VSUB(T19, T15); + } + } + { + V T3l, T3m, T3f, T3g, T3e, T3h, T3n, T3i; + { + V T3j, T3k, T3a, T3d; + T3j = VSUB(T38, T39); + T3k = VSUB(T3b, T3c); + T3l = VBYI(VFMA(LDK(KP951056516), T3j, VMUL(LDK(KP587785252), T3k))); + T3m = VBYI(VFNMS(LDK(KP587785252), T3j, VMUL(LDK(KP951056516), T3k))); + T3f = VADD(Tc, Td); + T3a = VADD(T38, T39); + T3d = VADD(T3b, T3c); + T3g = VADD(T3a, T3d); + T3e = VMUL(LDK(KP559016994), VSUB(T3a, T3d)); + T3h = VFNMS(LDK(KP250000000), T3g, T3f); + } + ST(&(x[0]), VADD(T3f, T3g), ms, &(x[0])); + T3n = VSUB(T3h, T3e); + ST(&(x[WS(rs, 10)]), VADD(T3m, T3n), ms, &(x[0])); + ST(&(x[WS(rs, 15)]), VSUB(T3n, T3m), ms, &(x[WS(rs, 1)])); + T3i = VADD(T3e, T3h); + ST(&(x[WS(rs, 5)]), VSUB(T3i, T3l), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 20)]), VADD(T3l, T3i), ms, &(x[0])); + } + { + V Tf, T1Z, T20, T21, T29, T2a, T2b, T26, T27, T28, T22, T23, T24, T1L, T1U; + V T1Q, T1S, T1A, T1V, T1N, T1O, T2d, T2e; + Tf = VADD(Tb, Te); + T1Z = VFMA(LDK(KP1_688655851), Ty, VMUL(LDK(KP535826794), Tv)); + T20 = VFMA(LDK(KP1_541026485), TS, VMUL(LDK(KP637423989), TP)); + T21 = VSUB(T1Z, T20); + T29 = VFMA(LDK(KP851558583), T1d, VMUL(LDK(KP904827052), T1a)); + T2a = VFMA(LDK(KP1_984229402), T1x, VMUL(LDK(KP125333233), T1u)); + T2b = VADD(T29, T2a); + T26 = VFNMS(LDK(KP844327925), Tv, VMUL(LDK(KP1_071653589), Ty)); + T27 = VFNMS(LDK(KP1_274847979), TS, VMUL(LDK(KP770513242), TP)); + T28 = VADD(T26, T27); + T22 = VFNMS(LDK(KP425779291), T1a, VMUL(LDK(KP1_809654104), T1d)); + T23 = VFNMS(LDK(KP992114701), T1u, VMUL(LDK(KP250666467), T1x)); + T24 = VADD(T22, T23); + { + V T1F, T1G, T1H, T1I, T1J, T1K; + T1F = VFMA(LDK(KP1_937166322), Ty, VMUL(LDK(KP248689887), Tv)); + T1G = VFMA(LDK(KP1_071653589), TS, VMUL(LDK(KP844327925), TP)); + T1H = VADD(T1F, T1G); + T1I = VFMA(LDK(KP1_752613360), T1d, VMUL(LDK(KP481753674), T1a)); + T1J = VFMA(LDK(KP1_457937254), T1x, VMUL(LDK(KP684547105), T1u)); + T1K = VADD(T1I, T1J); + T1L = VADD(T1H, T1K); + T1U = VSUB(T1J, T1I); + T1Q = VMUL(LDK(KP559016994), VSUB(T1K, T1H)); + T1S = VSUB(T1G, T1F); + } + { + V Tz, TT, TU, T1e, T1y, T1z; + Tz = VFNMS(LDK(KP497379774), Ty, VMUL(LDK(KP968583161), Tv)); + TT = VFNMS(LDK(KP1_688655851), TS, VMUL(LDK(KP535826794), TP)); + TU = VADD(Tz, TT); + T1e = VFNMS(LDK(KP963507348), T1d, VMUL(LDK(KP876306680), T1a)); + T1y = VFNMS(LDK(KP1_369094211), T1x, VMUL(LDK(KP728968627), T1u)); + T1z = VADD(T1e, T1y); + T1A = VADD(TU, T1z); + T1V = VMUL(LDK(KP559016994), VSUB(TU, T1z)); + T1N = VSUB(TT, Tz); + T1O = VSUB(T1e, T1y); + } + { + V T1B, T1M, T25, T2c; + T1B = VADD(Tf, T1A); + T1M = VBYI(VADD(T1E, T1L)); + ST(&(x[WS(rs, 1)]), VSUB(T1B, T1M), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 24)]), VADD(T1B, T1M), ms, &(x[0])); + T25 = VADD(Tf, VADD(T21, T24)); + T2c = VBYI(VADD(T1E, VSUB(T28, T2b))); + ST(&(x[WS(rs, 21)]), VSUB(T25, T2c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VADD(T25, T2c), ms, &(x[0])); + } + T2d = VBYI(VADD(T1E, VFMA(LDK(KP309016994), T28, VFMA(LDK(KP587785252), VSUB(T23, T22), VFNMS(LDK(KP951056516), VADD(T1Z, T20), VMUL(LDK(KP809016994), T2b)))))); + T2e = VFMA(LDK(KP309016994), T21, VFMA(LDK(KP951056516), VSUB(T26, T27), VFMA(LDK(KP587785252), VSUB(T2a, T29), VFNMS(LDK(KP809016994), T24, Tf)))); + ST(&(x[WS(rs, 9)]), VADD(T2d, T2e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 16)]), VSUB(T2e, T2d), ms, &(x[0])); + { + V T1R, T1X, T1W, T1Y, T1P, T1T; + T1P = VFMS(LDK(KP250000000), T1L, T1E); + T1R = VBYI(VADD(VFMA(LDK(KP587785252), T1N, VMUL(LDK(KP951056516), T1O)), VSUB(T1P, T1Q))); + T1X = VBYI(VADD(VFNMS(LDK(KP587785252), T1O, VMUL(LDK(KP951056516), T1N)), VADD(T1P, T1Q))); + T1T = VFNMS(LDK(KP250000000), T1A, Tf); + T1W = VFMA(LDK(KP587785252), T1S, VFNMS(LDK(KP951056516), T1U, VSUB(T1T, T1V))); + T1Y = VFMA(LDK(KP951056516), T1S, VADD(T1V, VFMA(LDK(KP587785252), T1U, T1T))); + ST(&(x[WS(rs, 11)]), VADD(T1R, T1W), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VSUB(T1Y, T1X), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 14)]), VSUB(T1W, T1R), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VADD(T1X, T1Y), ms, &(x[0])); + } + } + { + V T2u, T2w, T2h, T2k, T2l, T2A, T2B, T2C, T2o, T2r, T2s, T2x, T2y, T2z, T2M; + V T2X, T2N, T2W, T2R, T31, T2U, T30, T2E, T2F; + T2u = VFNMS(LDK(KP587785252), T1C, T2t); + T2w = VSUB(Te, Tb); + T2h = VFNMS(LDK(KP125333233), T2g, VMUL(LDK(KP1_984229402), T2f)); + T2k = VFMA(LDK(KP1_457937254), T2i, VMUL(LDK(KP684547105), T2j)); + T2l = VSUB(T2h, T2k); + T2A = VFNMS(LDK(KP1_996053456), T2p, VMUL(LDK(KP062790519), T2q)); + T2B = VFMA(LDK(KP1_541026485), T2m, VMUL(LDK(KP637423989), T2n)); + T2C = VSUB(T2A, T2B); + T2o = VFNMS(LDK(KP770513242), T2n, VMUL(LDK(KP1_274847979), T2m)); + T2r = VFMA(LDK(KP125581039), T2p, VMUL(LDK(KP998026728), T2q)); + T2s = VSUB(T2o, T2r); + T2x = VFNMS(LDK(KP1_369094211), T2i, VMUL(LDK(KP728968627), T2j)); + T2y = VFMA(LDK(KP250666467), T2f, VMUL(LDK(KP992114701), T2g)); + T2z = VSUB(T2x, T2y); + { + V T2G, T2H, T2I, T2J, T2K, T2L; + T2G = VFNMS(LDK(KP481753674), T2j, VMUL(LDK(KP1_752613360), T2i)); + T2H = VFMA(LDK(KP851558583), T2f, VMUL(LDK(KP904827052), T2g)); + T2I = VSUB(T2G, T2H); + T2J = VFNMS(LDK(KP844327925), T2q, VMUL(LDK(KP1_071653589), T2p)); + T2K = VFNMS(LDK(KP998026728), T2n, VMUL(LDK(KP125581039), T2m)); + T2L = VADD(T2J, T2K); + T2M = VMUL(LDK(KP559016994), VSUB(T2I, T2L)); + T2X = VSUB(T2J, T2K); + T2N = VADD(T2I, T2L); + T2W = VADD(T2G, T2H); + } + { + V T2P, T2Q, T2Y, T2S, T2T, T2Z; + T2P = VFNMS(LDK(KP425779291), T2g, VMUL(LDK(KP1_809654104), T2f)); + T2Q = VFMA(LDK(KP963507348), T2i, VMUL(LDK(KP876306680), T2j)); + T2Y = VADD(T2Q, T2P); + T2S = VFMA(LDK(KP1_688655851), T2p, VMUL(LDK(KP535826794), T2q)); + T2T = VFMA(LDK(KP1_996053456), T2m, VMUL(LDK(KP062790519), T2n)); + T2Z = VADD(T2S, T2T); + T2R = VSUB(T2P, T2Q); + T31 = VADD(T2Y, T2Z); + T2U = VSUB(T2S, T2T); + T30 = VMUL(LDK(KP559016994), VSUB(T2Y, T2Z)); + } + { + V T36, T37, T2v, T2D; + T36 = VBYI(VADD(T2u, T2N)); + T37 = VADD(T2w, T31); + ST(&(x[WS(rs, 2)]), VADD(T36, T37), ms, &(x[0])); + ST(&(x[WS(rs, 23)]), VSUB(T37, T36), ms, &(x[WS(rs, 1)])); + T2v = VBYI(VSUB(VADD(T2l, T2s), T2u)); + T2D = VADD(T2w, VADD(T2z, T2C)); + ST(&(x[WS(rs, 3)]), VADD(T2v, T2D), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 22)]), VSUB(T2D, T2v), ms, &(x[0])); + } + T2E = VFMA(LDK(KP309016994), T2z, VFNMS(LDK(KP809016994), T2C, VFNMS(LDK(KP587785252), VADD(T2r, T2o), VFNMS(LDK(KP951056516), VADD(T2k, T2h), T2w)))); + T2F = VBYI(VSUB(VFNMS(LDK(KP587785252), VADD(T2A, T2B), VFNMS(LDK(KP809016994), T2s, VFNMS(LDK(KP951056516), VADD(T2x, T2y), VMUL(LDK(KP309016994), T2l)))), T2u)); + ST(&(x[WS(rs, 17)]), VSUB(T2E, T2F), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 8)]), VADD(T2E, T2F), ms, &(x[0])); + { + V T2V, T34, T33, T35, T2O, T32; + T2O = VFNMS(LDK(KP250000000), T2N, T2u); + T2V = VBYI(VADD(T2M, VADD(T2O, VFNMS(LDK(KP587785252), T2U, VMUL(LDK(KP951056516), T2R))))); + T34 = VBYI(VADD(T2O, VSUB(VFMA(LDK(KP587785252), T2R, VMUL(LDK(KP951056516), T2U)), T2M))); + T32 = VFNMS(LDK(KP250000000), T31, T2w); + T33 = VFMA(LDK(KP951056516), T2W, VFMA(LDK(KP587785252), T2X, VADD(T30, T32))); + T35 = VFMA(LDK(KP587785252), T2W, VSUB(VFNMS(LDK(KP951056516), T2X, T32), T30)); + ST(&(x[WS(rs, 7)]), VADD(T2V, T33), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VSUB(T35, T34), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 18)]), VSUB(T33, T2V), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VADD(T34, T35), ms, &(x[0])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 25, XSIMD_STRING("t2fv_25"), twinstr, &GENUS, {170, 110, 78, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_25) (planner *p) { + X(kdft_dit_register) (p, t2fv_25, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,863 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:20 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name t2fv_32 -include t2f.h */ + +/* + * This function contains 217 FP additions, 160 FP multiplications, + * (or, 119 additions, 62 multiplications, 98 fused multiply/add), + * 112 stack variables, 7 constants, and 64 memory accesses + */ +#include "t2f.h" + +static void t2fv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 62)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 62), MAKE_VOLATILE_STRIDE(32, rs)) { + V T26, T25, T1Z, T22, T1W, T2a, T2k, T2g; + { + V T4, T1z, T2o, T32, T2r, T3f, Tf, T1A, T34, T2L, T1D, TC, T33, T2O, T1C; + V Tr, T2C, T3a, T2F, T3b, T1r, T21, T1k, T20, TQ, TM, TS, TL, T2t, TJ; + V T10, T2u; + { + V Tt, T9, T2p, Te, T2q, TA, Tu, Tx; + { + V T1, T1x, T2, T1v; + T1 = LD(&(x[0]), ms, &(x[0])); + T1x = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T1v = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + { + V T5, Tc, T7, Ta, T2m, T2n; + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tc = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + Ta = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + { + V T1y, T3, T1w, T6, Td, T8, Tb, Ts, Tz; + Ts = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + T1y = BYTWJ(&(W[TWVL * 46]), T1x); + T3 = BYTWJ(&(W[TWVL * 30]), T2); + T1w = BYTWJ(&(W[TWVL * 14]), T1v); + T6 = BYTWJ(&(W[TWVL * 6]), T5); + Td = BYTWJ(&(W[TWVL * 22]), Tc); + T8 = BYTWJ(&(W[TWVL * 38]), T7); + Tb = BYTWJ(&(W[TWVL * 54]), Ta); + Tt = BYTWJ(&(W[TWVL * 58]), Ts); + Tz = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T4 = VSUB(T1, T3); + T2m = VADD(T1, T3); + T1z = VSUB(T1w, T1y); + T2n = VADD(T1w, T1y); + T9 = VSUB(T6, T8); + T2p = VADD(T6, T8); + Te = VSUB(Tb, Td); + T2q = VADD(Tb, Td); + TA = BYTWJ(&(W[TWVL * 10]), Tz); + } + Tu = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T2o = VADD(T2m, T2n); + T32 = VSUB(T2m, T2n); + Tx = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + } + } + { + V Tv, To, Ty, Ti, Tj, Tm, Th; + Th = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T2r = VADD(T2p, T2q); + T3f = VSUB(T2q, T2p); + Tf = VADD(T9, Te); + T1A = VSUB(Te, T9); + Tv = BYTWJ(&(W[TWVL * 26]), Tu); + To = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + Ty = BYTWJ(&(W[TWVL * 42]), Tx); + Ti = BYTWJ(&(W[TWVL * 2]), Th); + Tj = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tm = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + { + V T1f, T1h, T1a, T1c, T18, T2A, T2B, T1p; + { + V T15, T17, T1o, T1m; + { + V Tw, T2J, Tp, T2K, TB, Tk, Tn, T1n, T14, T16; + T14 = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T16 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + Tw = VSUB(Tt, Tv); + T2J = VADD(Tt, Tv); + Tp = BYTWJ(&(W[TWVL * 50]), To); + T2K = VADD(TA, Ty); + TB = VSUB(Ty, TA); + Tk = BYTWJ(&(W[TWVL * 34]), Tj); + Tn = BYTWJ(&(W[TWVL * 18]), Tm); + T15 = BYTWJ(&(W[TWVL * 60]), T14); + T17 = BYTWJ(&(W[TWVL * 28]), T16); + T1n = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + { + V T2M, Tl, T2N, Tq, T1l; + T1l = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T34 = VSUB(T2J, T2K); + T2L = VADD(T2J, T2K); + T1D = VFMA(LDK(KP414213562), Tw, TB); + TC = VFNMS(LDK(KP414213562), TB, Tw); + T2M = VADD(Ti, Tk); + Tl = VSUB(Ti, Tk); + T2N = VADD(Tn, Tp); + Tq = VSUB(Tn, Tp); + T1o = BYTWJ(&(W[TWVL * 12]), T1n); + T1m = BYTWJ(&(W[TWVL * 44]), T1l); + { + V T1e, T1g, T19, T1b; + T1e = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + T1g = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T19 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1b = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T33 = VSUB(T2M, T2N); + T2O = VADD(T2M, T2N); + T1C = VFMA(LDK(KP414213562), Tl, Tq); + Tr = VFNMS(LDK(KP414213562), Tq, Tl); + T1f = BYTWJ(&(W[TWVL * 52]), T1e); + T1h = BYTWJ(&(W[TWVL * 20]), T1g); + T1a = BYTWJ(&(W[TWVL * 4]), T19); + T1c = BYTWJ(&(W[TWVL * 36]), T1b); + } + } + } + T18 = VSUB(T15, T17); + T2A = VADD(T15, T17); + T2B = VADD(T1o, T1m); + T1p = VSUB(T1m, T1o); + } + { + V TG, TI, TZ, TX; + { + V T1i, T2E, T1d, T2D, TH, TY, TF; + TF = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T1i = VSUB(T1f, T1h); + T2E = VADD(T1f, T1h); + T1d = VSUB(T1a, T1c); + T2D = VADD(T1a, T1c); + TH = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TY = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + T2C = VADD(T2A, T2B); + T3a = VSUB(T2A, T2B); + TG = BYTWJ(&(W[0]), TF); + { + V TW, T1j, T1q, TP, TR, TK; + TW = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T2F = VADD(T2D, T2E); + T3b = VSUB(T2E, T2D); + T1j = VADD(T1d, T1i); + T1q = VSUB(T1i, T1d); + TI = BYTWJ(&(W[TWVL * 32]), TH); + TZ = BYTWJ(&(W[TWVL * 48]), TY); + TP = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + TX = BYTWJ(&(W[TWVL * 16]), TW); + TR = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TK = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T1r = VFMA(LDK(KP707106781), T1q, T1p); + T21 = VFNMS(LDK(KP707106781), T1q, T1p); + T1k = VFMA(LDK(KP707106781), T1j, T18); + T20 = VFNMS(LDK(KP707106781), T1j, T18); + TQ = BYTWJ(&(W[TWVL * 56]), TP); + TM = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + TS = BYTWJ(&(W[TWVL * 24]), TR); + TL = BYTWJ(&(W[TWVL * 8]), TK); + } + } + T2t = VADD(TG, TI); + TJ = VSUB(TG, TI); + T10 = VSUB(TX, TZ); + T2u = VADD(TX, TZ); + } + } + } + } + { + V T2s, TT, T2x, T2P, T2Y, T2G, T37, T2v, T2w, TO, T2W, T30, T2U, TN, T2V; + T2s = VSUB(T2o, T2r); + T2U = VADD(T2o, T2r); + TN = BYTWJ(&(W[TWVL * 40]), TM); + TT = VSUB(TQ, TS); + T2x = VADD(TQ, TS); + T2P = VSUB(T2L, T2O); + T2V = VADD(T2O, T2L); + T2Y = VADD(T2C, T2F); + T2G = VSUB(T2C, T2F); + T37 = VSUB(T2t, T2u); + T2v = VADD(T2t, T2u); + T2w = VADD(TL, TN); + TO = VSUB(TL, TN); + T2W = VADD(T2U, T2V); + T30 = VSUB(T2U, T2V); + { + V T3i, T3o, T36, T3r, T3h, T3j, T12, T1Y, TV, T1X, T3s, T3d, T2Q, T2H, T31; + V T2Z; + { + V T35, T3g, T38, T2y, T11, TU; + T35 = VADD(T33, T34); + T3g = VSUB(T34, T33); + T38 = VSUB(T2w, T2x); + T2y = VADD(T2w, T2x); + T11 = VSUB(TO, TT); + TU = VADD(TO, TT); + { + V T3c, T39, T2X, T2z; + T3c = VFNMS(LDK(KP414213562), T3b, T3a); + T3i = VFMA(LDK(KP414213562), T3a, T3b); + T3o = VFNMS(LDK(KP707106781), T35, T32); + T36 = VFMA(LDK(KP707106781), T35, T32); + T3r = VFNMS(LDK(KP707106781), T3g, T3f); + T3h = VFMA(LDK(KP707106781), T3g, T3f); + T39 = VFNMS(LDK(KP414213562), T38, T37); + T3j = VFMA(LDK(KP414213562), T37, T38); + T2X = VADD(T2v, T2y); + T2z = VSUB(T2v, T2y); + T12 = VFMA(LDK(KP707106781), T11, T10); + T1Y = VFNMS(LDK(KP707106781), T11, T10); + TV = VFMA(LDK(KP707106781), TU, TJ); + T1X = VFNMS(LDK(KP707106781), TU, TJ); + T3s = VSUB(T3c, T39); + T3d = VADD(T39, T3c); + T2Q = VSUB(T2G, T2z); + T2H = VADD(T2z, T2G); + T31 = VSUB(T2Y, T2X); + T2Z = VADD(T2X, T2Y); + } + } + { + V Tg, T1U, TD, T1G, T13, T1s, T1H, T1B, T1V, T1E, T3k, T3p, T2e, T2f; + Tg = VFMA(LDK(KP707106781), Tf, T4); + T1U = VFNMS(LDK(KP707106781), Tf, T4); + T3k = VSUB(T3i, T3j); + T3p = VADD(T3j, T3i); + { + V T3v, T3t, T3e, T3m; + T3v = VFNMS(LDK(KP923879532), T3s, T3r); + T3t = VFMA(LDK(KP923879532), T3s, T3r); + T3e = VFNMS(LDK(KP923879532), T3d, T36); + T3m = VFMA(LDK(KP923879532), T3d, T36); + { + V T2R, T2T, T2I, T2S; + T2R = VFNMS(LDK(KP707106781), T2Q, T2P); + T2T = VFMA(LDK(KP707106781), T2Q, T2P); + T2I = VFNMS(LDK(KP707106781), T2H, T2s); + T2S = VFMA(LDK(KP707106781), T2H, T2s); + ST(&(x[WS(rs, 24)]), VFNMSI(T31, T30), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFMAI(T31, T30), ms, &(x[0])); + ST(&(x[0]), VADD(T2W, T2Z), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VSUB(T2W, T2Z), ms, &(x[0])); + { + V T3u, T3q, T3l, T3n; + T3u = VFMA(LDK(KP923879532), T3p, T3o); + T3q = VFNMS(LDK(KP923879532), T3p, T3o); + T3l = VFNMS(LDK(KP923879532), T3k, T3h); + T3n = VFMA(LDK(KP923879532), T3k, T3h); + ST(&(x[WS(rs, 4)]), VFMAI(T2T, T2S), ms, &(x[0])); + ST(&(x[WS(rs, 28)]), VFNMSI(T2T, T2S), ms, &(x[0])); + ST(&(x[WS(rs, 20)]), VFMAI(T2R, T2I), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFNMSI(T2R, T2I), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VFNMSI(T3t, T3q), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFMAI(T3t, T3q), ms, &(x[0])); + ST(&(x[WS(rs, 26)]), VFMAI(T3v, T3u), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(T3v, T3u), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(T3n, T3m), ms, &(x[0])); + ST(&(x[WS(rs, 30)]), VFNMSI(T3n, T3m), ms, &(x[0])); + ST(&(x[WS(rs, 18)]), VFMAI(T3l, T3e), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFNMSI(T3l, T3e), ms, &(x[0])); + T26 = VSUB(TC, Tr); + TD = VADD(Tr, TC); + } + } + } + T1G = VFMA(LDK(KP198912367), TV, T12); + T13 = VFNMS(LDK(KP198912367), T12, TV); + T1s = VFNMS(LDK(KP198912367), T1r, T1k); + T1H = VFMA(LDK(KP198912367), T1k, T1r); + T1B = VFNMS(LDK(KP707106781), T1A, T1z); + T25 = VFMA(LDK(KP707106781), T1A, T1z); + T1V = VADD(T1C, T1D); + T1E = VSUB(T1C, T1D); + { + V T1S, T1O, T1K, T1u, T1R, T1T, T1L, T1J; + { + V TE, T1M, T1I, T1N, T1t, T1Q, T1F, T1P, T28, T29; + TE = VFMA(LDK(KP923879532), TD, Tg); + T1M = VFNMS(LDK(KP923879532), TD, Tg); + T1I = VSUB(T1G, T1H); + T1N = VADD(T1G, T1H); + T1t = VADD(T13, T1s); + T1Q = VSUB(T1s, T13); + T1F = VFMA(LDK(KP923879532), T1E, T1B); + T1P = VFNMS(LDK(KP923879532), T1E, T1B); + T28 = VFNMS(LDK(KP668178637), T1X, T1Y); + T1Z = VFMA(LDK(KP668178637), T1Y, T1X); + T1S = VFMA(LDK(KP980785280), T1N, T1M); + T1O = VFNMS(LDK(KP980785280), T1N, T1M); + T22 = VFMA(LDK(KP668178637), T21, T20); + T29 = VFNMS(LDK(KP668178637), T20, T21); + T1K = VFMA(LDK(KP980785280), T1t, TE); + T1u = VFNMS(LDK(KP980785280), T1t, TE); + T1R = VFNMS(LDK(KP980785280), T1Q, T1P); + T1T = VFMA(LDK(KP980785280), T1Q, T1P); + T1L = VFMA(LDK(KP980785280), T1I, T1F); + T1J = VFNMS(LDK(KP980785280), T1I, T1F); + T2e = VFNMS(LDK(KP923879532), T1V, T1U); + T1W = VFMA(LDK(KP923879532), T1V, T1U); + T2a = VSUB(T28, T29); + T2f = VADD(T28, T29); + } + ST(&(x[WS(rs, 23)]), VFMAI(T1R, T1O), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFNMSI(T1R, T1O), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 25)]), VFNMSI(T1T, T1S), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(T1T, T1S), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 31)]), VFMAI(T1L, T1K), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(T1L, T1K), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFMAI(T1J, T1u), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 17)]), VFNMSI(T1J, T1u), ms, &(x[WS(rs, 1)])); + } + T2k = VFNMS(LDK(KP831469612), T2f, T2e); + T2g = VFMA(LDK(KP831469612), T2f, T2e); + } + } + } + } + { + V T2i, T23, T2h, T27; + T2i = VSUB(T22, T1Z); + T23 = VADD(T1Z, T22); + T2h = VFNMS(LDK(KP923879532), T26, T25); + T27 = VFMA(LDK(KP923879532), T26, T25); + { + V T2c, T24, T2j, T2l, T2d, T2b; + T2c = VFMA(LDK(KP831469612), T23, T1W); + T24 = VFNMS(LDK(KP831469612), T23, T1W); + T2j = VFMA(LDK(KP831469612), T2i, T2h); + T2l = VFNMS(LDK(KP831469612), T2i, T2h); + T2d = VFMA(LDK(KP831469612), T2a, T27); + T2b = VFNMS(LDK(KP831469612), T2a, T27); + ST(&(x[WS(rs, 21)]), VFNMSI(T2j, T2g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFMAI(T2j, T2g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 27)]), VFMAI(T2l, T2k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFNMSI(T2l, T2k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFMAI(T2d, T2c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 29)]), VFNMSI(T2d, T2c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VFMAI(T2b, T24), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFNMSI(T2b, T24), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 32, XSIMD_STRING("t2fv_32"), twinstr, &GENUS, {119, 62, 98, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_32) (planner *p) { + X(kdft_dit_register) (p, t2fv_32, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name t2fv_32 -include t2f.h */ + +/* + * This function contains 217 FP additions, 104 FP multiplications, + * (or, 201 additions, 88 multiplications, 16 fused multiply/add), + * 59 stack variables, 7 constants, and 64 memory accesses + */ +#include "t2f.h" + +static void t2fv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 62)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 62), MAKE_VOLATILE_STRIDE(32, rs)) { + V T4, T1A, T2o, T32, Tf, T1v, T2r, T3f, TC, T1C, T2L, T34, Tr, T1D, T2O; + V T33, T1k, T20, T2F, T3b, T1r, T21, T2C, T3a, TV, T1X, T2y, T38, T12, T1Y; + V T2v, T37; + { + V T1, T1z, T3, T1x, T1y, T2, T1w, T2m, T2n; + T1 = LD(&(x[0]), ms, &(x[0])); + T1y = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T1z = BYTWJ(&(W[TWVL * 46]), T1y); + T2 = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T3 = BYTWJ(&(W[TWVL * 30]), T2); + T1w = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T1x = BYTWJ(&(W[TWVL * 14]), T1w); + T4 = VSUB(T1, T3); + T1A = VSUB(T1x, T1z); + T2m = VADD(T1, T3); + T2n = VADD(T1x, T1z); + T2o = VADD(T2m, T2n); + T32 = VSUB(T2m, T2n); + } + { + V T6, Td, T8, Tb; + { + V T5, Tc, T7, Ta; + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T6 = BYTWJ(&(W[TWVL * 6]), T5); + Tc = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Td = BYTWJ(&(W[TWVL * 22]), Tc); + T7 = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + T8 = BYTWJ(&(W[TWVL * 38]), T7); + Ta = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + Tb = BYTWJ(&(W[TWVL * 54]), Ta); + } + { + V T9, Te, T2p, T2q; + T9 = VSUB(T6, T8); + Te = VSUB(Tb, Td); + Tf = VMUL(LDK(KP707106781), VADD(T9, Te)); + T1v = VMUL(LDK(KP707106781), VSUB(Te, T9)); + T2p = VADD(T6, T8); + T2q = VADD(Tb, Td); + T2r = VADD(T2p, T2q); + T3f = VSUB(T2q, T2p); + } + } + { + V Tt, TA, Tv, Ty; + { + V Ts, Tz, Tu, Tx; + Ts = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + Tt = BYTWJ(&(W[TWVL * 58]), Ts); + Tz = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + TA = BYTWJ(&(W[TWVL * 42]), Tz); + Tu = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + Tv = BYTWJ(&(W[TWVL * 26]), Tu); + Tx = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Ty = BYTWJ(&(W[TWVL * 10]), Tx); + } + { + V Tw, TB, T2J, T2K; + Tw = VSUB(Tt, Tv); + TB = VSUB(Ty, TA); + TC = VFMA(LDK(KP923879532), Tw, VMUL(LDK(KP382683432), TB)); + T1C = VFNMS(LDK(KP923879532), TB, VMUL(LDK(KP382683432), Tw)); + T2J = VADD(Tt, Tv); + T2K = VADD(Ty, TA); + T2L = VADD(T2J, T2K); + T34 = VSUB(T2J, T2K); + } + } + { + V Ti, Tp, Tk, Tn; + { + V Th, To, Tj, Tm; + Th = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Ti = BYTWJ(&(W[TWVL * 2]), Th); + To = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + Tp = BYTWJ(&(W[TWVL * 50]), To); + Tj = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tk = BYTWJ(&(W[TWVL * 34]), Tj); + Tm = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + Tn = BYTWJ(&(W[TWVL * 18]), Tm); + } + { + V Tl, Tq, T2M, T2N; + Tl = VSUB(Ti, Tk); + Tq = VSUB(Tn, Tp); + Tr = VFNMS(LDK(KP382683432), Tq, VMUL(LDK(KP923879532), Tl)); + T1D = VFMA(LDK(KP382683432), Tl, VMUL(LDK(KP923879532), Tq)); + T2M = VADD(Ti, Tk); + T2N = VADD(Tn, Tp); + T2O = VADD(T2M, T2N); + T33 = VSUB(T2M, T2N); + } + } + { + V T15, T17, T1p, T1n, T1f, T1h, T1i, T1a, T1c, T1d; + { + V T14, T16, T1o, T1m; + T14 = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T15 = BYTWJ(&(W[TWVL * 60]), T14); + T16 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T17 = BYTWJ(&(W[TWVL * 28]), T16); + T1o = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T1p = BYTWJ(&(W[TWVL * 44]), T1o); + T1m = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T1n = BYTWJ(&(W[TWVL * 12]), T1m); + { + V T1e, T1g, T19, T1b; + T1e = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + T1f = BYTWJ(&(W[TWVL * 52]), T1e); + T1g = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T1h = BYTWJ(&(W[TWVL * 20]), T1g); + T1i = VSUB(T1f, T1h); + T19 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1a = BYTWJ(&(W[TWVL * 4]), T19); + T1b = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T1c = BYTWJ(&(W[TWVL * 36]), T1b); + T1d = VSUB(T1a, T1c); + } + } + { + V T18, T1j, T2D, T2E; + T18 = VSUB(T15, T17); + T1j = VMUL(LDK(KP707106781), VADD(T1d, T1i)); + T1k = VADD(T18, T1j); + T20 = VSUB(T18, T1j); + T2D = VADD(T1a, T1c); + T2E = VADD(T1f, T1h); + T2F = VADD(T2D, T2E); + T3b = VSUB(T2E, T2D); + } + { + V T1l, T1q, T2A, T2B; + T1l = VMUL(LDK(KP707106781), VSUB(T1i, T1d)); + T1q = VSUB(T1n, T1p); + T1r = VSUB(T1l, T1q); + T21 = VADD(T1q, T1l); + T2A = VADD(T15, T17); + T2B = VADD(T1n, T1p); + T2C = VADD(T2A, T2B); + T3a = VSUB(T2A, T2B); + } + } + { + V TG, TI, T10, TY, TQ, TS, TT, TL, TN, TO; + { + V TF, TH, TZ, TX; + TF = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TG = BYTWJ(&(W[0]), TF); + TH = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TI = BYTWJ(&(W[TWVL * 32]), TH); + TZ = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + T10 = BYTWJ(&(W[TWVL * 48]), TZ); + TX = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + TY = BYTWJ(&(W[TWVL * 16]), TX); + { + V TP, TR, TK, TM; + TP = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + TQ = BYTWJ(&(W[TWVL * 56]), TP); + TR = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TS = BYTWJ(&(W[TWVL * 24]), TR); + TT = VSUB(TQ, TS); + TK = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + TL = BYTWJ(&(W[TWVL * 8]), TK); + TM = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + TN = BYTWJ(&(W[TWVL * 40]), TM); + TO = VSUB(TL, TN); + } + } + { + V TJ, TU, T2w, T2x; + TJ = VSUB(TG, TI); + TU = VMUL(LDK(KP707106781), VADD(TO, TT)); + TV = VADD(TJ, TU); + T1X = VSUB(TJ, TU); + T2w = VADD(TL, TN); + T2x = VADD(TQ, TS); + T2y = VADD(T2w, T2x); + T38 = VSUB(T2x, T2w); + } + { + V TW, T11, T2t, T2u; + TW = VMUL(LDK(KP707106781), VSUB(TT, TO)); + T11 = VSUB(TY, T10); + T12 = VSUB(TW, T11); + T1Y = VADD(T11, TW); + T2t = VADD(TG, TI); + T2u = VADD(TY, T10); + T2v = VADD(T2t, T2u); + T37 = VSUB(T2t, T2u); + } + } + { + V T2W, T30, T2Z, T31; + { + V T2U, T2V, T2X, T2Y; + T2U = VADD(T2o, T2r); + T2V = VADD(T2O, T2L); + T2W = VADD(T2U, T2V); + T30 = VSUB(T2U, T2V); + T2X = VADD(T2v, T2y); + T2Y = VADD(T2C, T2F); + T2Z = VADD(T2X, T2Y); + T31 = VBYI(VSUB(T2Y, T2X)); + } + ST(&(x[WS(rs, 16)]), VSUB(T2W, T2Z), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VADD(T30, T31), ms, &(x[0])); + ST(&(x[0]), VADD(T2W, T2Z), ms, &(x[0])); + ST(&(x[WS(rs, 24)]), VSUB(T30, T31), ms, &(x[0])); + } + { + V T2s, T2P, T2H, T2Q, T2z, T2G; + T2s = VSUB(T2o, T2r); + T2P = VSUB(T2L, T2O); + T2z = VSUB(T2v, T2y); + T2G = VSUB(T2C, T2F); + T2H = VMUL(LDK(KP707106781), VADD(T2z, T2G)); + T2Q = VMUL(LDK(KP707106781), VSUB(T2G, T2z)); + { + V T2I, T2R, T2S, T2T; + T2I = VADD(T2s, T2H); + T2R = VBYI(VADD(T2P, T2Q)); + ST(&(x[WS(rs, 28)]), VSUB(T2I, T2R), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(T2I, T2R), ms, &(x[0])); + T2S = VSUB(T2s, T2H); + T2T = VBYI(VSUB(T2Q, T2P)); + ST(&(x[WS(rs, 20)]), VSUB(T2S, T2T), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VADD(T2S, T2T), ms, &(x[0])); + } + } + { + V T36, T3r, T3h, T3p, T3d, T3o, T3k, T3s, T35, T3g; + T35 = VMUL(LDK(KP707106781), VADD(T33, T34)); + T36 = VADD(T32, T35); + T3r = VSUB(T32, T35); + T3g = VMUL(LDK(KP707106781), VSUB(T34, T33)); + T3h = VADD(T3f, T3g); + T3p = VSUB(T3g, T3f); + { + V T39, T3c, T3i, T3j; + T39 = VFMA(LDK(KP923879532), T37, VMUL(LDK(KP382683432), T38)); + T3c = VFNMS(LDK(KP382683432), T3b, VMUL(LDK(KP923879532), T3a)); + T3d = VADD(T39, T3c); + T3o = VSUB(T3c, T39); + T3i = VFNMS(LDK(KP382683432), T37, VMUL(LDK(KP923879532), T38)); + T3j = VFMA(LDK(KP382683432), T3a, VMUL(LDK(KP923879532), T3b)); + T3k = VADD(T3i, T3j); + T3s = VSUB(T3j, T3i); + } + { + V T3e, T3l, T3u, T3v; + T3e = VADD(T36, T3d); + T3l = VBYI(VADD(T3h, T3k)); + ST(&(x[WS(rs, 30)]), VSUB(T3e, T3l), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T3e, T3l), ms, &(x[0])); + T3u = VBYI(VADD(T3p, T3o)); + T3v = VADD(T3r, T3s); + ST(&(x[WS(rs, 6)]), VADD(T3u, T3v), ms, &(x[0])); + ST(&(x[WS(rs, 26)]), VSUB(T3v, T3u), ms, &(x[0])); + } + { + V T3m, T3n, T3q, T3t; + T3m = VSUB(T36, T3d); + T3n = VBYI(VSUB(T3k, T3h)); + ST(&(x[WS(rs, 18)]), VSUB(T3m, T3n), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VADD(T3m, T3n), ms, &(x[0])); + T3q = VBYI(VSUB(T3o, T3p)); + T3t = VSUB(T3r, T3s); + ST(&(x[WS(rs, 10)]), VADD(T3q, T3t), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VSUB(T3t, T3q), ms, &(x[0])); + } + } + { + V TE, T1P, T1I, T1Q, T1t, T1M, T1F, T1N; + { + V Tg, TD, T1G, T1H; + Tg = VADD(T4, Tf); + TD = VADD(Tr, TC); + TE = VADD(Tg, TD); + T1P = VSUB(Tg, TD); + T1G = VFNMS(LDK(KP195090322), TV, VMUL(LDK(KP980785280), T12)); + T1H = VFMA(LDK(KP195090322), T1k, VMUL(LDK(KP980785280), T1r)); + T1I = VADD(T1G, T1H); + T1Q = VSUB(T1H, T1G); + } + { + V T13, T1s, T1B, T1E; + T13 = VFMA(LDK(KP980785280), TV, VMUL(LDK(KP195090322), T12)); + T1s = VFNMS(LDK(KP195090322), T1r, VMUL(LDK(KP980785280), T1k)); + T1t = VADD(T13, T1s); + T1M = VSUB(T1s, T13); + T1B = VSUB(T1v, T1A); + T1E = VSUB(T1C, T1D); + T1F = VADD(T1B, T1E); + T1N = VSUB(T1E, T1B); + } + { + V T1u, T1J, T1S, T1T; + T1u = VADD(TE, T1t); + T1J = VBYI(VADD(T1F, T1I)); + ST(&(x[WS(rs, 31)]), VSUB(T1u, T1J), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T1u, T1J), ms, &(x[WS(rs, 1)])); + T1S = VBYI(VADD(T1N, T1M)); + T1T = VADD(T1P, T1Q); + ST(&(x[WS(rs, 7)]), VADD(T1S, T1T), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 25)]), VSUB(T1T, T1S), ms, &(x[WS(rs, 1)])); + } + { + V T1K, T1L, T1O, T1R; + T1K = VSUB(TE, T1t); + T1L = VBYI(VSUB(T1I, T1F)); + ST(&(x[WS(rs, 17)]), VSUB(T1K, T1L), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VADD(T1K, T1L), ms, &(x[WS(rs, 1)])); + T1O = VBYI(VSUB(T1M, T1N)); + T1R = VSUB(T1P, T1Q); + ST(&(x[WS(rs, 9)]), VADD(T1O, T1R), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 23)]), VSUB(T1R, T1O), ms, &(x[WS(rs, 1)])); + } + } + { + V T1W, T2h, T2a, T2i, T23, T2e, T27, T2f; + { + V T1U, T1V, T28, T29; + T1U = VSUB(T4, Tf); + T1V = VADD(T1D, T1C); + T1W = VADD(T1U, T1V); + T2h = VSUB(T1U, T1V); + T28 = VFNMS(LDK(KP555570233), T1X, VMUL(LDK(KP831469612), T1Y)); + T29 = VFMA(LDK(KP555570233), T20, VMUL(LDK(KP831469612), T21)); + T2a = VADD(T28, T29); + T2i = VSUB(T29, T28); + } + { + V T1Z, T22, T25, T26; + T1Z = VFMA(LDK(KP831469612), T1X, VMUL(LDK(KP555570233), T1Y)); + T22 = VFNMS(LDK(KP555570233), T21, VMUL(LDK(KP831469612), T20)); + T23 = VADD(T1Z, T22); + T2e = VSUB(T22, T1Z); + T25 = VADD(T1A, T1v); + T26 = VSUB(TC, Tr); + T27 = VADD(T25, T26); + T2f = VSUB(T26, T25); + } + { + V T24, T2b, T2k, T2l; + T24 = VADD(T1W, T23); + T2b = VBYI(VADD(T27, T2a)); + ST(&(x[WS(rs, 29)]), VSUB(T24, T2b), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(T24, T2b), ms, &(x[WS(rs, 1)])); + T2k = VBYI(VADD(T2f, T2e)); + T2l = VADD(T2h, T2i); + ST(&(x[WS(rs, 5)]), VADD(T2k, T2l), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 27)]), VSUB(T2l, T2k), ms, &(x[WS(rs, 1)])); + } + { + V T2c, T2d, T2g, T2j; + T2c = VSUB(T1W, T23); + T2d = VBYI(VSUB(T2a, T27)); + ST(&(x[WS(rs, 19)]), VSUB(T2c, T2d), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VADD(T2c, T2d), ms, &(x[WS(rs, 1)])); + T2g = VBYI(VSUB(T2e, T2f)); + T2j = VSUB(T2h, T2i); + ST(&(x[WS(rs, 11)]), VADD(T2g, T2j), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 21)]), VSUB(T2j, T2g), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 32, XSIMD_STRING("t2fv_32"), twinstr, &GENUS, {201, 88, 16, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_32) (planner *p) { + X(kdft_dit_register) (p, t2fv_32, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:18 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t2fv_4 -include t2f.h */ + +/* + * This function contains 11 FP additions, 8 FP multiplications, + * (or, 9 additions, 6 multiplications, 2 fused multiply/add), + * 13 stack variables, 0 constants, and 8 memory accesses + */ +#include "t2f.h" + +static void t2fv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(4, rs)) { + V T1, T7, T2, T5, T8, T3, T6; + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T8 = BYTWJ(&(W[TWVL * 4]), T7); + T3 = BYTWJ(&(W[TWVL * 2]), T2); + T6 = BYTWJ(&(W[0]), T5); + { + V Ta, T4, Tb, T9; + Ta = VADD(T1, T3); + T4 = VSUB(T1, T3); + Tb = VADD(T6, T8); + T9 = VSUB(T6, T8); + ST(&(x[0]), VADD(Ta, Tb), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VSUB(Ta, Tb), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(T9, T4), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(T9, T4), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t2fv_4"), twinstr, &GENUS, {9, 6, 2, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_4) (planner *p) { + X(kdft_dit_register) (p, t2fv_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t2fv_4 -include t2f.h */ + +/* + * This function contains 11 FP additions, 6 FP multiplications, + * (or, 11 additions, 6 multiplications, 0 fused multiply/add), + * 13 stack variables, 0 constants, and 8 memory accesses + */ +#include "t2f.h" + +static void t2fv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(4, rs)) { + V T1, T8, T3, T6, T7, T2, T5; + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T8 = BYTWJ(&(W[TWVL * 4]), T7); + T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T3 = BYTWJ(&(W[TWVL * 2]), T2); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T6 = BYTWJ(&(W[0]), T5); + { + V T4, T9, Ta, Tb; + T4 = VSUB(T1, T3); + T9 = VBYI(VSUB(T6, T8)); + ST(&(x[WS(rs, 1)]), VSUB(T4, T9), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(T4, T9), ms, &(x[WS(rs, 1)])); + Ta = VADD(T1, T3); + Tb = VADD(T6, T8); + ST(&(x[WS(rs, 2)]), VSUB(Ta, Tb), ms, &(x[0])); + ST(&(x[0]), VADD(Ta, Tb), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t2fv_4"), twinstr, &GENUS, {11, 6, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_4) (planner *p) { + X(kdft_dit_register) (p, t2fv_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:23 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name t2fv_5 -include t2f.h */ + +/* + * This function contains 20 FP additions, 19 FP multiplications, + * (or, 11 additions, 10 multiplications, 9 fused multiply/add), + * 26 stack variables, 4 constants, and 10 memory accesses + */ +#include "t2f.h" + +static void t2fv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(5, rs)) { + V T1, T2, T9, T4, T7; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T9 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T4 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V T3, Ta, T5, T8; + T3 = BYTWJ(&(W[0]), T2); + Ta = BYTWJ(&(W[TWVL * 4]), T9); + T5 = BYTWJ(&(W[TWVL * 6]), T4); + T8 = BYTWJ(&(W[TWVL * 2]), T7); + { + V T6, Tg, Tb, Th; + T6 = VADD(T3, T5); + Tg = VSUB(T3, T5); + Tb = VADD(T8, Ta); + Th = VSUB(T8, Ta); + { + V Te, Tc, Tk, Ti, Td, Tj, Tf; + Te = VSUB(T6, Tb); + Tc = VADD(T6, Tb); + Tk = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tg, Th)); + Ti = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Th, Tg)); + Td = VFNMS(LDK(KP250000000), Tc, T1); + ST(&(x[0]), VADD(T1, Tc), ms, &(x[0])); + Tj = VFNMS(LDK(KP559016994), Te, Td); + Tf = VFMA(LDK(KP559016994), Te, Td); + ST(&(x[WS(rs, 2)]), VFMAI(Tk, Tj), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFNMSI(Tk, Tj), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFMAI(Ti, Tf), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFNMSI(Ti, Tf), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("t2fv_5"), twinstr, &GENUS, {11, 10, 9, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_5) (planner *p) { + X(kdft_dit_register) (p, t2fv_5, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name t2fv_5 -include t2f.h */ + +/* + * This function contains 20 FP additions, 14 FP multiplications, + * (or, 17 additions, 11 multiplications, 3 fused multiply/add), + * 20 stack variables, 4 constants, and 10 memory accesses + */ +#include "t2f.h" + +static void t2fv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(5, rs)) { + V Tc, Tg, Th, T5, Ta, Td; + Tc = LD(&(x[0]), ms, &(x[0])); + { + V T2, T9, T4, T7; + { + V T1, T8, T3, T6; + T1 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2 = BYTWJ(&(W[0]), T1); + T8 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = BYTWJ(&(W[TWVL * 4]), T8); + T3 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T4 = BYTWJ(&(W[TWVL * 6]), T3); + T6 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T7 = BYTWJ(&(W[TWVL * 2]), T6); + } + Tg = VSUB(T2, T4); + Th = VSUB(T7, T9); + T5 = VADD(T2, T4); + Ta = VADD(T7, T9); + Td = VADD(T5, Ta); + } + ST(&(x[0]), VADD(Tc, Td), ms, &(x[0])); + { + V Ti, Tj, Tf, Tk, Tb, Te; + Ti = VBYI(VFMA(LDK(KP951056516), Tg, VMUL(LDK(KP587785252), Th))); + Tj = VBYI(VFNMS(LDK(KP587785252), Tg, VMUL(LDK(KP951056516), Th))); + Tb = VMUL(LDK(KP559016994), VSUB(T5, Ta)); + Te = VFNMS(LDK(KP250000000), Td, Tc); + Tf = VADD(Tb, Te); + Tk = VSUB(Te, Tb); + ST(&(x[WS(rs, 1)]), VSUB(Tf, Ti), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VSUB(Tk, Tj), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VADD(Ti, Tf), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(Tj, Tk), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("t2fv_5"), twinstr, &GENUS, {17, 11, 3, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_5) (planner *p) { + X(kdft_dit_register) (p, t2fv_5, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1877 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:21 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name t2fv_64 -include t2f.h */ + +/* + * This function contains 519 FP additions, 384 FP multiplications, + * (or, 261 additions, 126 multiplications, 258 fused multiply/add), + * 187 stack variables, 15 constants, and 128 memory accesses + */ +#include "t2f.h" + +static void t2fv_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP820678790, +0.820678790828660330972281985331011598767386482); + DVK(KP098491403, +0.098491403357164253077197521291327432293052451); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP303346683, +0.303346683607342391675883946941299872384187453); + DVK(KP534511135, +0.534511135950791641089685961295362908582039528); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 126)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 126), MAKE_VOLATILE_STRIDE(64, rs)) { + V T6L, T6M, T6O, T6P, T75, T6V, T5A, T6A, T72, T6K, T6t, T6D, T6w, T6B, T6h; + V T6E; + { + V Ta, T3U, T3V, T37, T7a, T58, T7B, T6l, T1v, T24, T5Q, T7o, T5F, T7l, T43; + V T4F, T2i, T2R, T6b, T7v, T60, T7s, T4a, T4I, T5u, T7h, T5x, T7g, T1i, T3a; + V T4j, T4C, T7e, T5l, T7d, T5o, T3b, TV, T4B, T4m, T3X, T3Y, T6o, T7b, T5f; + V T7C, Tx, T38, T2p, T61, T2n, T65, T2D, T7p, T5M, T7m, T5T, T4G, T46, T25; + V T1S, T2q, T2u, T2w; + { + V T5q, T10, T5v, T15, T1b, T5s, T1c, T1e; + { + V T1V, T1p, T5B, T5O, T1u, T1X, T20, T21; + { + V T1, T2, T7, T5, T32, T34, T2X, T2Z; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 32)]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 48)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T32 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T34 = LD(&(x[WS(rs, 40)]), ms, &(x[0])); + T2X = LD(&(x[WS(rs, 56)]), ms, &(x[0])); + T2Z = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + { + V T1m, T54, T6j, T36, T55, T31, T56, T1n, T1q, T1s, T4, T9; + { + V T3, T8, T6, T33, T35, T2Y, T30, T1l; + T1l = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = BYTWJ(&(W[TWVL * 62]), T2); + T8 = BYTWJ(&(W[TWVL * 94]), T7); + T6 = BYTWJ(&(W[TWVL * 30]), T5); + T33 = BYTWJ(&(W[TWVL * 14]), T32); + T35 = BYTWJ(&(W[TWVL * 78]), T34); + T2Y = BYTWJ(&(W[TWVL * 110]), T2X); + T30 = BYTWJ(&(W[TWVL * 46]), T2Z); + T1m = BYTWJ(&(W[0]), T1l); + T54 = VSUB(T1, T3); + T4 = VADD(T1, T3); + T6j = VSUB(T6, T8); + T9 = VADD(T6, T8); + T36 = VADD(T33, T35); + T55 = VSUB(T33, T35); + T31 = VADD(T2Y, T30); + T56 = VSUB(T2Y, T30); + T1n = LD(&(x[WS(rs, 33)]), ms, &(x[WS(rs, 1)])); + } + T1q = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T1s = LD(&(x[WS(rs, 49)]), ms, &(x[WS(rs, 1)])); + Ta = VSUB(T4, T9); + T3U = VADD(T4, T9); + { + V T57, T6k, T1o, T1r, T1t, T1W, T1U, T1Z; + T1U = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T3V = VADD(T36, T31); + T37 = VSUB(T31, T36); + T57 = VADD(T55, T56); + T6k = VSUB(T56, T55); + T1o = BYTWJ(&(W[TWVL * 64]), T1n); + T1r = BYTWJ(&(W[TWVL * 32]), T1q); + T1t = BYTWJ(&(W[TWVL * 96]), T1s); + T1V = BYTWJ(&(W[TWVL * 16]), T1U); + T1W = LD(&(x[WS(rs, 41)]), ms, &(x[WS(rs, 1)])); + T1Z = LD(&(x[WS(rs, 57)]), ms, &(x[WS(rs, 1)])); + T7a = VFNMS(LDK(KP707106781), T57, T54); + T58 = VFMA(LDK(KP707106781), T57, T54); + T7B = VFMA(LDK(KP707106781), T6k, T6j); + T6l = VFNMS(LDK(KP707106781), T6k, T6j); + T1p = VADD(T1m, T1o); + T5B = VSUB(T1m, T1o); + T5O = VSUB(T1r, T1t); + T1u = VADD(T1r, T1t); + T1X = BYTWJ(&(W[TWVL * 80]), T1W); + T20 = BYTWJ(&(W[TWVL * 112]), T1Z); + T21 = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + } + } + } + { + V T5W, T2N, T69, T2L, T5Y, T2P, T48, T2c, T2h; + { + V T41, T1Y, T5C, T22, T2d, T29, T2b, T2f, T28, T2a, T2H, T2J; + T28 = LD(&(x[WS(rs, 63)]), ms, &(x[WS(rs, 1)])); + T2a = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T1v = VSUB(T1p, T1u); + T41 = VADD(T1p, T1u); + T1Y = VADD(T1V, T1X); + T5C = VSUB(T1V, T1X); + T22 = BYTWJ(&(W[TWVL * 48]), T21); + T2d = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T29 = BYTWJ(&(W[TWVL * 124]), T28); + T2b = BYTWJ(&(W[TWVL * 60]), T2a); + T2f = LD(&(x[WS(rs, 47)]), ms, &(x[WS(rs, 1)])); + T2H = LD(&(x[WS(rs, 55)]), ms, &(x[WS(rs, 1)])); + T2J = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + { + V T23, T5D, T2e, T2g, T2I, T2K, T2M; + T2M = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T23 = VADD(T20, T22); + T5D = VSUB(T20, T22); + T2e = BYTWJ(&(W[TWVL * 28]), T2d); + T2c = VADD(T29, T2b); + T5W = VSUB(T29, T2b); + T2g = BYTWJ(&(W[TWVL * 92]), T2f); + T2I = BYTWJ(&(W[TWVL * 108]), T2H); + T2K = BYTWJ(&(W[TWVL * 44]), T2J); + T2N = BYTWJ(&(W[TWVL * 12]), T2M); + { + V T5E, T5P, T42, T2O; + T5E = VADD(T5C, T5D); + T5P = VSUB(T5C, T5D); + T24 = VSUB(T1Y, T23); + T42 = VADD(T1Y, T23); + T69 = VSUB(T2g, T2e); + T2h = VADD(T2e, T2g); + T2O = LD(&(x[WS(rs, 39)]), ms, &(x[WS(rs, 1)])); + T2L = VADD(T2I, T2K); + T5Y = VSUB(T2I, T2K); + T5Q = VFMA(LDK(KP707106781), T5P, T5O); + T7o = VFNMS(LDK(KP707106781), T5P, T5O); + T5F = VFMA(LDK(KP707106781), T5E, T5B); + T7l = VFNMS(LDK(KP707106781), T5E, T5B); + T43 = VADD(T41, T42); + T4F = VSUB(T41, T42); + T2P = BYTWJ(&(W[TWVL * 76]), T2O); + } + } + } + T2i = VSUB(T2c, T2h); + T48 = VADD(T2c, T2h); + { + V TW, TY, T11, T2Q, T5X, T13; + TW = LD(&(x[WS(rs, 62)]), ms, &(x[0])); + TY = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + T11 = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T2Q = VADD(T2N, T2P); + T5X = VSUB(T2N, T2P); + T13 = LD(&(x[WS(rs, 46)]), ms, &(x[0])); + { + V T12, T5Z, T6a, T49, T14, T18, T1a; + { + V T17, T19, TX, TZ; + T17 = LD(&(x[WS(rs, 54)]), ms, &(x[0])); + T19 = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + TX = BYTWJ(&(W[TWVL * 122]), TW); + TZ = BYTWJ(&(W[TWVL * 58]), TY); + T12 = BYTWJ(&(W[TWVL * 26]), T11); + T5Z = VADD(T5X, T5Y); + T6a = VSUB(T5Y, T5X); + T2R = VSUB(T2L, T2Q); + T49 = VADD(T2Q, T2L); + T14 = BYTWJ(&(W[TWVL * 90]), T13); + T18 = BYTWJ(&(W[TWVL * 106]), T17); + T5q = VSUB(TX, TZ); + T10 = VADD(TX, TZ); + T1a = BYTWJ(&(W[TWVL * 42]), T19); + } + T6b = VFMA(LDK(KP707106781), T6a, T69); + T7v = VFNMS(LDK(KP707106781), T6a, T69); + T60 = VFMA(LDK(KP707106781), T5Z, T5W); + T7s = VFNMS(LDK(KP707106781), T5Z, T5W); + T4a = VADD(T48, T49); + T4I = VSUB(T48, T49); + T5v = VSUB(T14, T12); + T15 = VADD(T12, T14); + T1b = VADD(T18, T1a); + T5s = VSUB(T18, T1a); + } + T1c = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T1e = LD(&(x[WS(rs, 38)]), ms, &(x[0])); + } + } + } + { + V Th, T59, Tf, Tv, T5d, Tj, Tm, To; + { + V T5h, TQ, T5m, T5i, TO, TS, TJ, T4k, TD, TI; + { + V T4h, T16, TB, T1d, T1f, TE, TG, TA, Tz, TK, TM, TC; + Tz = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T4h = VADD(T10, T15); + T16 = VSUB(T10, T15); + TB = LD(&(x[WS(rs, 34)]), ms, &(x[0])); + T1d = BYTWJ(&(W[TWVL * 10]), T1c); + T1f = BYTWJ(&(W[TWVL * 74]), T1e); + TE = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + TG = LD(&(x[WS(rs, 50)]), ms, &(x[0])); + TA = BYTWJ(&(W[TWVL * 2]), Tz); + TK = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TM = LD(&(x[WS(rs, 42)]), ms, &(x[0])); + TC = BYTWJ(&(W[TWVL * 66]), TB); + { + V T1g, T5r, TF, TH, TL, TN, TP; + TP = LD(&(x[WS(rs, 58)]), ms, &(x[0])); + T1g = VADD(T1d, T1f); + T5r = VSUB(T1d, T1f); + TF = BYTWJ(&(W[TWVL * 34]), TE); + TH = BYTWJ(&(W[TWVL * 98]), TG); + TL = BYTWJ(&(W[TWVL * 18]), TK); + TN = BYTWJ(&(W[TWVL * 82]), TM); + T5h = VSUB(TA, TC); + TD = VADD(TA, TC); + TQ = BYTWJ(&(W[TWVL * 114]), TP); + { + V T5w, T5t, T4i, T1h, TR; + T5w = VSUB(T5s, T5r); + T5t = VADD(T5r, T5s); + T4i = VADD(T1g, T1b); + T1h = VSUB(T1b, T1g); + T5m = VSUB(TF, TH); + TI = VADD(TF, TH); + T5i = VSUB(TL, TN); + TO = VADD(TL, TN); + TR = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + T5u = VFMA(LDK(KP707106781), T5t, T5q); + T7h = VFNMS(LDK(KP707106781), T5t, T5q); + T5x = VFMA(LDK(KP707106781), T5w, T5v); + T7g = VFNMS(LDK(KP707106781), T5w, T5v); + T1i = VFNMS(LDK(KP414213562), T1h, T16); + T3a = VFMA(LDK(KP414213562), T16, T1h); + T4j = VADD(T4h, T4i); + T4C = VSUB(T4h, T4i); + TS = BYTWJ(&(W[TWVL * 50]), TR); + } + } + } + TJ = VSUB(TD, TI); + T4k = VADD(TD, TI); + { + V Tb, Td, Tr, T5j, TT, Tt, Tg; + Tb = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Td = LD(&(x[WS(rs, 36)]), ms, &(x[0])); + Tr = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + T5j = VSUB(TQ, TS); + TT = VADD(TQ, TS); + Tt = LD(&(x[WS(rs, 44)]), ms, &(x[0])); + Tg = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + { + V Ti, Tc, Te, Ts; + Ti = LD(&(x[WS(rs, 52)]), ms, &(x[0])); + Tc = BYTWJ(&(W[TWVL * 6]), Tb); + Te = BYTWJ(&(W[TWVL * 70]), Td); + Ts = BYTWJ(&(W[TWVL * 22]), Tr); + { + V T5k, T5n, TU, T4l, Tu; + T5k = VADD(T5i, T5j); + T5n = VSUB(T5i, T5j); + TU = VSUB(TO, TT); + T4l = VADD(TO, TT); + Tu = BYTWJ(&(W[TWVL * 86]), Tt); + Th = BYTWJ(&(W[TWVL * 38]), Tg); + T59 = VSUB(Tc, Te); + Tf = VADD(Tc, Te); + T7e = VFNMS(LDK(KP707106781), T5k, T5h); + T5l = VFMA(LDK(KP707106781), T5k, T5h); + T7d = VFNMS(LDK(KP707106781), T5n, T5m); + T5o = VFMA(LDK(KP707106781), T5n, T5m); + T3b = VFMA(LDK(KP414213562), TJ, TU); + TV = VFNMS(LDK(KP414213562), TU, TJ); + T4B = VSUB(T4k, T4l); + T4m = VADD(T4k, T4l); + Tv = VADD(Ts, Tu); + T5d = VSUB(Tu, Ts); + Tj = BYTWJ(&(W[TWVL * 102]), Ti); + } + } + Tm = LD(&(x[WS(rs, 60)]), ms, &(x[0])); + To = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + } + } + { + V T5b, T6m, Tl, T1A, T5G, T1Q, T5K, T1C, T1D, T5e, T6n, Tw, T1H, T1J; + { + V T1w, T1y, T1M, T1O, Tq, T5c, T1B; + T1w = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T1y = LD(&(x[WS(rs, 37)]), ms, &(x[WS(rs, 1)])); + T1M = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T1O = LD(&(x[WS(rs, 45)]), ms, &(x[WS(rs, 1)])); + T1B = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + { + V Tk, T5a, Tn, Tp; + Tk = VADD(Th, Tj); + T5a = VSUB(Th, Tj); + Tn = BYTWJ(&(W[TWVL * 118]), Tm); + Tp = BYTWJ(&(W[TWVL * 54]), To); + { + V T1x, T1z, T1N, T1P; + T1x = BYTWJ(&(W[TWVL * 8]), T1w); + T1z = BYTWJ(&(W[TWVL * 72]), T1y); + T1N = BYTWJ(&(W[TWVL * 24]), T1M); + T1P = BYTWJ(&(W[TWVL * 88]), T1O); + T5b = VFNMS(LDK(KP414213562), T5a, T59); + T6m = VFMA(LDK(KP414213562), T59, T5a); + T3X = VADD(Tf, Tk); + Tl = VSUB(Tf, Tk); + Tq = VADD(Tn, Tp); + T5c = VSUB(Tn, Tp); + T1A = VADD(T1x, T1z); + T5G = VSUB(T1x, T1z); + T1Q = VADD(T1N, T1P); + T5K = VSUB(T1N, T1P); + T1C = BYTWJ(&(W[TWVL * 40]), T1B); + } + } + T1D = LD(&(x[WS(rs, 53)]), ms, &(x[WS(rs, 1)])); + T5e = VFNMS(LDK(KP414213562), T5d, T5c); + T6n = VFMA(LDK(KP414213562), T5c, T5d); + T3Y = VADD(Tq, Tv); + Tw = VSUB(Tq, Tv); + T1H = LD(&(x[WS(rs, 61)]), ms, &(x[WS(rs, 1)])); + T1J = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + } + { + V T1I, T1K, T1F, T5H, T2k, T2l, T2z, T2B, T2j, T1E; + T2j = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1E = BYTWJ(&(W[TWVL * 104]), T1D); + T6o = VSUB(T6m, T6n); + T7b = VADD(T6m, T6n); + T5f = VADD(T5b, T5e); + T7C = VSUB(T5e, T5b); + Tx = VADD(Tl, Tw); + T38 = VSUB(Tw, Tl); + T1I = BYTWJ(&(W[TWVL * 120]), T1H); + T1K = BYTWJ(&(W[TWVL * 56]), T1J); + T1F = VADD(T1C, T1E); + T5H = VSUB(T1C, T1E); + T2k = BYTWJ(&(W[TWVL * 4]), T2j); + T2l = LD(&(x[WS(rs, 35)]), ms, &(x[WS(rs, 1)])); + T2z = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T2B = LD(&(x[WS(rs, 43)]), ms, &(x[WS(rs, 1)])); + { + V T5I, T5R, T44, T1G, T2m, T2A, T2C, T5S, T5L, T1R, T45, T2o, T5J, T1L; + T2o = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T5J = VSUB(T1I, T1K); + T1L = VADD(T1I, T1K); + T5I = VFNMS(LDK(KP414213562), T5H, T5G); + T5R = VFMA(LDK(KP414213562), T5G, T5H); + T44 = VADD(T1A, T1F); + T1G = VSUB(T1A, T1F); + T2m = BYTWJ(&(W[TWVL * 68]), T2l); + T2A = BYTWJ(&(W[TWVL * 20]), T2z); + T2C = BYTWJ(&(W[TWVL * 84]), T2B); + T5S = VFNMS(LDK(KP414213562), T5J, T5K); + T5L = VFMA(LDK(KP414213562), T5K, T5J); + T1R = VSUB(T1L, T1Q); + T45 = VADD(T1L, T1Q); + T2p = BYTWJ(&(W[TWVL * 36]), T2o); + T61 = VSUB(T2k, T2m); + T2n = VADD(T2k, T2m); + T65 = VSUB(T2C, T2A); + T2D = VADD(T2A, T2C); + T7p = VSUB(T5I, T5L); + T5M = VADD(T5I, T5L); + T7m = VSUB(T5R, T5S); + T5T = VADD(T5R, T5S); + T4G = VSUB(T44, T45); + T46 = VADD(T44, T45); + T25 = VSUB(T1G, T1R); + T1S = VADD(T1G, T1R); + T2q = LD(&(x[WS(rs, 51)]), ms, &(x[WS(rs, 1)])); + } + T2u = LD(&(x[WS(rs, 59)]), ms, &(x[WS(rs, 1)])); + T2w = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + } + } + } + } + { + V T67, T7w, T6e, T7t, T3s, T3E, T39, T3D, T1k, T3k, T3t, T3c, T1T, T3v, T3w; + V T26, T2G, T3y, T3z, T2T; + { + V T4A, T4N, T47, T4v, T2r, T2v, T2x, T4s, T40, T3W, T3Z; + T4A = VSUB(T3U, T3V); + T3W = VADD(T3U, T3V); + T3Z = VADD(T3X, T3Y); + T4N = VSUB(T3Y, T3X); + T47 = VSUB(T43, T46); + T4v = VADD(T43, T46); + T2r = BYTWJ(&(W[TWVL * 100]), T2q); + T2v = BYTWJ(&(W[TWVL * 116]), T2u); + T2x = BYTWJ(&(W[TWVL * 52]), T2w); + T4s = VADD(T3W, T3Z); + T40 = VSUB(T3W, T3Z); + { + V T4O, T4n, T4R, T4H, T4E, T4W, T4u, T4y, T4d, T4J, T2F, T2S; + { + V T6c, T63, T2t, T4b, T6d, T66, T2E, T4c; + { + V T4D, T62, T2s, T64, T2y, T4t; + T4O = VSUB(T4C, T4B); + T4D = VADD(T4B, T4C); + T62 = VSUB(T2r, T2p); + T2s = VADD(T2p, T2r); + T64 = VSUB(T2v, T2x); + T2y = VADD(T2v, T2x); + T4t = VADD(T4m, T4j); + T4n = VSUB(T4j, T4m); + T4R = VFMA(LDK(KP414213562), T4F, T4G); + T4H = VFNMS(LDK(KP414213562), T4G, T4F); + T4E = VFMA(LDK(KP707106781), T4D, T4A); + T4W = VFNMS(LDK(KP707106781), T4D, T4A); + T6c = VFNMS(LDK(KP414213562), T61, T62); + T63 = VFMA(LDK(KP414213562), T62, T61); + T2t = VSUB(T2n, T2s); + T4b = VADD(T2n, T2s); + T6d = VFMA(LDK(KP414213562), T64, T65); + T66 = VFNMS(LDK(KP414213562), T65, T64); + T2E = VSUB(T2y, T2D); + T4c = VADD(T2y, T2D); + T4u = VADD(T4s, T4t); + T4y = VSUB(T4s, T4t); + } + T67 = VADD(T63, T66); + T7w = VSUB(T66, T63); + T6e = VADD(T6c, T6d); + T7t = VSUB(T6d, T6c); + T4d = VADD(T4b, T4c); + T4J = VSUB(T4c, T4b); + T2F = VADD(T2t, T2E); + T2S = VSUB(T2E, T2t); + } + { + V Ty, T1j, T4Q, T4K; + Ty = VFMA(LDK(KP707106781), Tx, Ta); + T3s = VFNMS(LDK(KP707106781), Tx, Ta); + T3E = VSUB(T1i, TV); + T1j = VADD(TV, T1i); + T39 = VFMA(LDK(KP707106781), T38, T37); + T3D = VFNMS(LDK(KP707106781), T38, T37); + T4Q = VFMA(LDK(KP414213562), T4I, T4J); + T4K = VFNMS(LDK(KP414213562), T4J, T4I); + { + V T4w, T4e, T4P, T4Z; + T4w = VADD(T4a, T4d); + T4e = VSUB(T4a, T4d); + T4P = VFMA(LDK(KP707106781), T4O, T4N); + T4Z = VFNMS(LDK(KP707106781), T4O, T4N); + T1k = VFMA(LDK(KP923879532), T1j, Ty); + T3k = VFNMS(LDK(KP923879532), T1j, Ty); + { + V T4L, T50, T4S, T4X; + T4L = VADD(T4H, T4K); + T50 = VSUB(T4K, T4H); + T4S = VSUB(T4Q, T4R); + T4X = VADD(T4R, T4Q); + { + V T4f, T4o, T4x, T4z; + T4f = VADD(T47, T4e); + T4o = VSUB(T4e, T47); + T4x = VADD(T4v, T4w); + T4z = VSUB(T4w, T4v); + { + V T53, T51, T4M, T4U; + T53 = VFNMS(LDK(KP923879532), T50, T4Z); + T51 = VFMA(LDK(KP923879532), T50, T4Z); + T4M = VFNMS(LDK(KP923879532), T4L, T4E); + T4U = VFMA(LDK(KP923879532), T4L, T4E); + { + V T52, T4Y, T4T, T4V; + T52 = VFMA(LDK(KP923879532), T4X, T4W); + T4Y = VFNMS(LDK(KP923879532), T4X, T4W); + T4T = VFNMS(LDK(KP923879532), T4S, T4P); + T4V = VFMA(LDK(KP923879532), T4S, T4P); + { + V T4p, T4r, T4g, T4q; + T4p = VFNMS(LDK(KP707106781), T4o, T4n); + T4r = VFMA(LDK(KP707106781), T4o, T4n); + T4g = VFNMS(LDK(KP707106781), T4f, T40); + T4q = VFMA(LDK(KP707106781), T4f, T40); + ST(&(x[WS(rs, 16)]), VFMAI(T4z, T4y), ms, &(x[0])); + ST(&(x[WS(rs, 48)]), VFNMSI(T4z, T4y), ms, &(x[0])); + ST(&(x[0]), VADD(T4u, T4x), ms, &(x[0])); + ST(&(x[WS(rs, 32)]), VSUB(T4u, T4x), ms, &(x[0])); + ST(&(x[WS(rs, 44)]), VFNMSI(T51, T4Y), ms, &(x[0])); + ST(&(x[WS(rs, 20)]), VFMAI(T51, T4Y), ms, &(x[0])); + ST(&(x[WS(rs, 52)]), VFMAI(T53, T52), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFNMSI(T53, T52), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(T4V, T4U), ms, &(x[0])); + ST(&(x[WS(rs, 60)]), VFNMSI(T4V, T4U), ms, &(x[0])); + ST(&(x[WS(rs, 36)]), VFMAI(T4T, T4M), ms, &(x[0])); + ST(&(x[WS(rs, 28)]), VFNMSI(T4T, T4M), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFMAI(T4r, T4q), ms, &(x[0])); + ST(&(x[WS(rs, 56)]), VFNMSI(T4r, T4q), ms, &(x[0])); + ST(&(x[WS(rs, 40)]), VFMAI(T4p, T4g), ms, &(x[0])); + ST(&(x[WS(rs, 24)]), VFNMSI(T4p, T4g), ms, &(x[0])); + T3t = VADD(T3b, T3a); + T3c = VSUB(T3a, T3b); + } + } + } + } + } + } + T1T = VFMA(LDK(KP707106781), T1S, T1v); + T3v = VFNMS(LDK(KP707106781), T1S, T1v); + T3w = VFNMS(LDK(KP707106781), T25, T24); + T26 = VFMA(LDK(KP707106781), T25, T24); + T2G = VFMA(LDK(KP707106781), T2F, T2i); + T3y = VFNMS(LDK(KP707106781), T2F, T2i); + T3z = VFNMS(LDK(KP707106781), T2S, T2R); + T2T = VFMA(LDK(KP707106781), T2S, T2R); + } + } + } + { + V T3u, T3M, T3F, T3P, T3x, T3H, T3q, T3m, T3h, T3j, T3r, T3p, T2W, T3i; + { + V T3d, T3n, T27, T3f, T2U, T3e; + T3d = VFMA(LDK(KP923879532), T3c, T39); + T3n = VFNMS(LDK(KP923879532), T3c, T39); + T27 = VFNMS(LDK(KP198912367), T26, T1T); + T3f = VFMA(LDK(KP198912367), T1T, T26); + T2U = VFNMS(LDK(KP198912367), T2T, T2G); + T3e = VFMA(LDK(KP198912367), T2G, T2T); + T3u = VFMA(LDK(KP923879532), T3t, T3s); + T3M = VFNMS(LDK(KP923879532), T3t, T3s); + { + V T3g, T3l, T2V, T3o; + T3g = VSUB(T3e, T3f); + T3l = VADD(T3f, T3e); + T2V = VADD(T27, T2U); + T3o = VSUB(T2U, T27); + T3F = VFNMS(LDK(KP923879532), T3E, T3D); + T3P = VFMA(LDK(KP923879532), T3E, T3D); + T3x = VFMA(LDK(KP668178637), T3w, T3v); + T3H = VFNMS(LDK(KP668178637), T3v, T3w); + T3q = VFMA(LDK(KP980785280), T3l, T3k); + T3m = VFNMS(LDK(KP980785280), T3l, T3k); + T3h = VFNMS(LDK(KP980785280), T3g, T3d); + T3j = VFMA(LDK(KP980785280), T3g, T3d); + T3r = VFNMS(LDK(KP980785280), T3o, T3n); + T3p = VFMA(LDK(KP980785280), T3o, T3n); + T2W = VFNMS(LDK(KP980785280), T2V, T1k); + T3i = VFMA(LDK(KP980785280), T2V, T1k); + } + } + { + V T7n, T7Z, T8j, T89, T7k, T7O, T8g, T7Y, T7H, T7R, T80, T7q, T7u, T82, T83; + V T7x; + { + V T7c, T7W, T7D, T87, T7f, T7F, T3A, T3G, T7E, T7i; + T7c = VFNMS(LDK(KP923879532), T7b, T7a); + T7W = VFMA(LDK(KP923879532), T7b, T7a); + T7D = VFNMS(LDK(KP923879532), T7C, T7B); + T87 = VFMA(LDK(KP923879532), T7C, T7B); + T7f = VFNMS(LDK(KP668178637), T7e, T7d); + T7F = VFMA(LDK(KP668178637), T7d, T7e); + ST(&(x[WS(rs, 46)]), VFNMSI(T3p, T3m), ms, &(x[0])); + ST(&(x[WS(rs, 18)]), VFMAI(T3p, T3m), ms, &(x[0])); + ST(&(x[WS(rs, 50)]), VFMAI(T3r, T3q), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFNMSI(T3r, T3q), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(T3j, T3i), ms, &(x[0])); + ST(&(x[WS(rs, 62)]), VFNMSI(T3j, T3i), ms, &(x[0])); + ST(&(x[WS(rs, 34)]), VFMAI(T3h, T2W), ms, &(x[0])); + ST(&(x[WS(rs, 30)]), VFNMSI(T3h, T2W), ms, &(x[0])); + T3A = VFMA(LDK(KP668178637), T3z, T3y); + T3G = VFNMS(LDK(KP668178637), T3y, T3z); + T7E = VFMA(LDK(KP668178637), T7g, T7h); + T7i = VFNMS(LDK(KP668178637), T7h, T7g); + T7n = VFNMS(LDK(KP923879532), T7m, T7l); + T7Z = VFMA(LDK(KP923879532), T7m, T7l); + { + V T3I, T3N, T3B, T3Q; + T3I = VSUB(T3G, T3H); + T3N = VADD(T3H, T3G); + T3B = VADD(T3x, T3A); + T3Q = VSUB(T3A, T3x); + { + V T7j, T88, T7G, T7X; + T7j = VADD(T7f, T7i); + T88 = VSUB(T7f, T7i); + T7G = VSUB(T7E, T7F); + T7X = VADD(T7F, T7E); + { + V T3S, T3O, T3J, T3L; + T3S = VFNMS(LDK(KP831469612), T3N, T3M); + T3O = VFMA(LDK(KP831469612), T3N, T3M); + T3J = VFNMS(LDK(KP831469612), T3I, T3F); + T3L = VFMA(LDK(KP831469612), T3I, T3F); + { + V T3T, T3R, T3C, T3K; + T3T = VFMA(LDK(KP831469612), T3Q, T3P); + T3R = VFNMS(LDK(KP831469612), T3Q, T3P); + T3C = VFNMS(LDK(KP831469612), T3B, T3u); + T3K = VFMA(LDK(KP831469612), T3B, T3u); + T8j = VFNMS(LDK(KP831469612), T88, T87); + T89 = VFMA(LDK(KP831469612), T88, T87); + T7k = VFNMS(LDK(KP831469612), T7j, T7c); + T7O = VFMA(LDK(KP831469612), T7j, T7c); + T8g = VFNMS(LDK(KP831469612), T7X, T7W); + T7Y = VFMA(LDK(KP831469612), T7X, T7W); + T7H = VFNMS(LDK(KP831469612), T7G, T7D); + T7R = VFMA(LDK(KP831469612), T7G, T7D); + ST(&(x[WS(rs, 42)]), VFMAI(T3R, T3O), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VFNMSI(T3R, T3O), ms, &(x[0])); + ST(&(x[WS(rs, 54)]), VFNMSI(T3T, T3S), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFMAI(T3T, T3S), ms, &(x[0])); + ST(&(x[WS(rs, 58)]), VFMAI(T3L, T3K), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(T3L, T3K), ms, &(x[0])); + ST(&(x[WS(rs, 26)]), VFMAI(T3J, T3C), ms, &(x[0])); + ST(&(x[WS(rs, 38)]), VFNMSI(T3J, T3C), ms, &(x[0])); + T80 = VFNMS(LDK(KP923879532), T7p, T7o); + T7q = VFMA(LDK(KP923879532), T7p, T7o); + } + } + } + } + T7u = VFNMS(LDK(KP923879532), T7t, T7s); + T82 = VFMA(LDK(KP923879532), T7t, T7s); + T83 = VFNMS(LDK(KP923879532), T7w, T7v); + T7x = VFMA(LDK(KP923879532), T7w, T7v); + } + { + V T5g, T6I, T6p, T6T, T5p, T6q, T6r, T5y; + T5g = VFMA(LDK(KP923879532), T5f, T58); + T6I = VFNMS(LDK(KP923879532), T5f, T58); + { + V T7r, T7I, T7y, T7J; + T7r = VFNMS(LDK(KP534511135), T7q, T7n); + T7I = VFMA(LDK(KP534511135), T7n, T7q); + T7y = VFNMS(LDK(KP534511135), T7x, T7u); + T7J = VFMA(LDK(KP534511135), T7u, T7x); + { + V T81, T8a, T84, T8b; + T81 = VFMA(LDK(KP303346683), T80, T7Z); + T8a = VFNMS(LDK(KP303346683), T7Z, T80); + T84 = VFMA(LDK(KP303346683), T83, T82); + T8b = VFNMS(LDK(KP303346683), T82, T83); + T6p = VFMA(LDK(KP923879532), T6o, T6l); + T6T = VFNMS(LDK(KP923879532), T6o, T6l); + T5p = VFNMS(LDK(KP198912367), T5o, T5l); + T6q = VFMA(LDK(KP198912367), T5l, T5o); + { + V T7K, T7P, T7z, T7S; + T7K = VSUB(T7I, T7J); + T7P = VADD(T7I, T7J); + T7z = VADD(T7r, T7y); + T7S = VSUB(T7y, T7r); + { + V T8c, T8h, T85, T8k; + T8c = VSUB(T8a, T8b); + T8h = VADD(T8a, T8b); + T85 = VADD(T81, T84); + T8k = VSUB(T84, T81); + { + V T7Q, T7U, T7L, T7N; + T7Q = VFNMS(LDK(KP881921264), T7P, T7O); + T7U = VFMA(LDK(KP881921264), T7P, T7O); + T7L = VFNMS(LDK(KP881921264), T7K, T7H); + T7N = VFMA(LDK(KP881921264), T7K, T7H); + { + V T7T, T7V, T7A, T7M; + T7T = VFNMS(LDK(KP881921264), T7S, T7R); + T7V = VFMA(LDK(KP881921264), T7S, T7R); + T7A = VFNMS(LDK(KP881921264), T7z, T7k); + T7M = VFMA(LDK(KP881921264), T7z, T7k); + { + V T8i, T8m, T8d, T8f; + T8i = VFMA(LDK(KP956940335), T8h, T8g); + T8m = VFNMS(LDK(KP956940335), T8h, T8g); + T8d = VFNMS(LDK(KP956940335), T8c, T89); + T8f = VFMA(LDK(KP956940335), T8c, T89); + { + V T8l, T8n, T86, T8e; + T8l = VFMA(LDK(KP956940335), T8k, T8j); + T8n = VFNMS(LDK(KP956940335), T8k, T8j); + T86 = VFNMS(LDK(KP956940335), T85, T7Y); + T8e = VFMA(LDK(KP956940335), T85, T7Y); + ST(&(x[WS(rs, 53)]), VFNMSI(T7V, T7U), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFMAI(T7V, T7U), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 43)]), VFMAI(T7T, T7Q), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 21)]), VFNMSI(T7T, T7Q), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 59)]), VFMAI(T7N, T7M), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFNMSI(T7N, T7M), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 27)]), VFMAI(T7L, T7A), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 37)]), VFNMSI(T7L, T7A), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 51)]), VFMAI(T8n, T8m), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFNMSI(T8n, T8m), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 45)]), VFNMSI(T8l, T8i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VFMAI(T8l, T8i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFMAI(T8f, T8e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 61)]), VFNMSI(T8f, T8e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 35)]), VFMAI(T8d, T86), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 29)]), VFNMSI(T8d, T86), ms, &(x[WS(rs, 1)])); + T6r = VFMA(LDK(KP198912367), T5u, T5x); + T5y = VFNMS(LDK(KP198912367), T5x, T5u); + } + } + } + } + } + } + } + } + { + V T5N, T5U, T68, T5z, T6U, T6f; + T5N = VFMA(LDK(KP923879532), T5M, T5F); + T6L = VFNMS(LDK(KP923879532), T5M, T5F); + T6M = VFNMS(LDK(KP923879532), T5T, T5Q); + T5U = VFMA(LDK(KP923879532), T5T, T5Q); + T68 = VFMA(LDK(KP923879532), T67, T60); + T6O = VFNMS(LDK(KP923879532), T67, T60); + T5z = VADD(T5p, T5y); + T6U = VSUB(T5y, T5p); + T6P = VFNMS(LDK(KP923879532), T6e, T6b); + T6f = VFMA(LDK(KP923879532), T6e, T6b); + { + V T5V, T6u, T6g, T6v, T6s, T6J; + T6s = VSUB(T6q, T6r); + T6J = VADD(T6q, T6r); + T5V = VFNMS(LDK(KP098491403), T5U, T5N); + T6u = VFMA(LDK(KP098491403), T5N, T5U); + T75 = VFNMS(LDK(KP980785280), T6U, T6T); + T6V = VFMA(LDK(KP980785280), T6U, T6T); + T5A = VFMA(LDK(KP980785280), T5z, T5g); + T6A = VFNMS(LDK(KP980785280), T5z, T5g); + T6g = VFNMS(LDK(KP098491403), T6f, T68); + T6v = VFMA(LDK(KP098491403), T68, T6f); + T72 = VFNMS(LDK(KP980785280), T6J, T6I); + T6K = VFMA(LDK(KP980785280), T6J, T6I); + T6t = VFMA(LDK(KP980785280), T6s, T6p); + T6D = VFNMS(LDK(KP980785280), T6s, T6p); + T6w = VSUB(T6u, T6v); + T6B = VADD(T6u, T6v); + T6h = VADD(T5V, T6g); + T6E = VSUB(T6g, T5V); + } + } + } + } + } + } + } + { + V T6W, T6N, T6G, T6C, T6z, T6x, T6H, T6F, T6y, T6i, T6X, T6Q; + T6W = VFNMS(LDK(KP820678790), T6L, T6M); + T6N = VFMA(LDK(KP820678790), T6M, T6L); + T6G = VFMA(LDK(KP995184726), T6B, T6A); + T6C = VFNMS(LDK(KP995184726), T6B, T6A); + T6z = VFMA(LDK(KP995184726), T6w, T6t); + T6x = VFNMS(LDK(KP995184726), T6w, T6t); + T6H = VFMA(LDK(KP995184726), T6E, T6D); + T6F = VFNMS(LDK(KP995184726), T6E, T6D); + T6y = VFMA(LDK(KP995184726), T6h, T5A); + T6i = VFNMS(LDK(KP995184726), T6h, T5A); + T6X = VFNMS(LDK(KP820678790), T6O, T6P); + T6Q = VFMA(LDK(KP820678790), T6P, T6O); + { + V T73, T6Y, T76, T6R; + ST(&(x[WS(rs, 49)]), VFNMSI(T6H, T6G), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFMAI(T6H, T6G), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 47)]), VFMAI(T6F, T6C), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 17)]), VFNMSI(T6F, T6C), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 63)]), VFMAI(T6z, T6y), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(T6z, T6y), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 31)]), VFMAI(T6x, T6i), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 33)]), VFNMSI(T6x, T6i), ms, &(x[WS(rs, 1)])); + T73 = VADD(T6W, T6X); + T6Y = VSUB(T6W, T6X); + T76 = VSUB(T6Q, T6N); + T6R = VADD(T6N, T6Q); + { + V T78, T74, T71, T6Z, T79, T77, T70, T6S; + T78 = VFNMS(LDK(KP773010453), T73, T72); + T74 = VFMA(LDK(KP773010453), T73, T72); + T71 = VFMA(LDK(KP773010453), T6Y, T6V); + T6Z = VFNMS(LDK(KP773010453), T6Y, T6V); + T79 = VFNMS(LDK(KP773010453), T76, T75); + T77 = VFMA(LDK(KP773010453), T76, T75); + T70 = VFMA(LDK(KP773010453), T6R, T6K); + T6S = VFNMS(LDK(KP773010453), T6R, T6K); + ST(&(x[WS(rs, 55)]), VFMAI(T79, T78), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFNMSI(T79, T78), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 41)]), VFNMSI(T77, T74), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 23)]), VFMAI(T77, T74), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(T71, T70), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 57)]), VFNMSI(T71, T70), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 39)]), VFMAI(T6Z, T6S), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 25)]), VFNMSI(T6Z, T6S), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + VTW(0, 32), + VTW(0, 33), + VTW(0, 34), + VTW(0, 35), + VTW(0, 36), + VTW(0, 37), + VTW(0, 38), + VTW(0, 39), + VTW(0, 40), + VTW(0, 41), + VTW(0, 42), + VTW(0, 43), + VTW(0, 44), + VTW(0, 45), + VTW(0, 46), + VTW(0, 47), + VTW(0, 48), + VTW(0, 49), + VTW(0, 50), + VTW(0, 51), + VTW(0, 52), + VTW(0, 53), + VTW(0, 54), + VTW(0, 55), + VTW(0, 56), + VTW(0, 57), + VTW(0, 58), + VTW(0, 59), + VTW(0, 60), + VTW(0, 61), + VTW(0, 62), + VTW(0, 63), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 64, XSIMD_STRING("t2fv_64"), twinstr, &GENUS, {261, 126, 258, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_64) (planner *p) { + X(kdft_dit_register) (p, t2fv_64, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name t2fv_64 -include t2f.h */ + +/* + * This function contains 519 FP additions, 250 FP multiplications, + * (or, 467 additions, 198 multiplications, 52 fused multiply/add), + * 107 stack variables, 15 constants, and 128 memory accesses + */ +#include "t2f.h" + +static void t2fv_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP995184726, +0.995184726672196886244836953109479921575474869); + DVK(KP098017140, +0.098017140329560601994195563888641845861136673); + DVK(KP773010453, +0.773010453362736960810906609758469800971041293); + DVK(KP634393284, +0.634393284163645498215171613225493370675687095); + DVK(KP471396736, +0.471396736825997648556387625905254377657460319); + DVK(KP881921264, +0.881921264348355029712756863660388349508442621); + DVK(KP290284677, +0.290284677254462367636192375817395274691476278); + DVK(KP956940335, +0.956940335732208864935797886980269969482849206); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 126)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 126), MAKE_VOLATILE_STRIDE(64, rs)) { + V Tg, T4a, T6r, T7f, T3o, T4B, T5q, T7e, T5R, T62, T28, T4o, T2g, T4l, T7n; + V T7Z, T68, T6j, T2C, T4s, T3a, T4v, T7u, T82, T7E, T7F, T7V, T5F, T6u, T1k; + V T4e, T1r, T4d, T7B, T7C, T7W, T5M, T6v, TV, T4g, T12, T4h, T7h, T7i, TD; + V T4C, T3h, T4b, T5x, T6s, T1R, T4m, T7q, T80, T2j, T4p, T5Y, T63, T2Z, T4w; + V T7x, T83, T33, T4t, T6f, T6k; + { + V T1, T3, T3m, T3k, Tb, Td, Te, T6, T8, T9, T2, T3l, T3j; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 32)]), ms, &(x[0])); + T3 = BYTWJ(&(W[TWVL * 62]), T2); + T3l = LD(&(x[WS(rs, 48)]), ms, &(x[0])); + T3m = BYTWJ(&(W[TWVL * 94]), T3l); + T3j = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T3k = BYTWJ(&(W[TWVL * 30]), T3j); + { + V Ta, Tc, T5, T7; + Ta = LD(&(x[WS(rs, 56)]), ms, &(x[0])); + Tb = BYTWJ(&(W[TWVL * 110]), Ta); + Tc = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + Td = BYTWJ(&(W[TWVL * 46]), Tc); + Te = VSUB(Tb, Td); + T5 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T6 = BYTWJ(&(W[TWVL * 14]), T5); + T7 = LD(&(x[WS(rs, 40)]), ms, &(x[0])); + T8 = BYTWJ(&(W[TWVL * 78]), T7); + T9 = VSUB(T6, T8); + } + { + V T4, Tf, T6p, T6q; + T4 = VSUB(T1, T3); + Tf = VMUL(LDK(KP707106781), VADD(T9, Te)); + Tg = VADD(T4, Tf); + T4a = VSUB(T4, Tf); + T6p = VADD(Tb, Td); + T6q = VADD(T6, T8); + T6r = VSUB(T6p, T6q); + T7f = VADD(T6q, T6p); + } + { + V T3i, T3n, T5o, T5p; + T3i = VMUL(LDK(KP707106781), VSUB(Te, T9)); + T3n = VSUB(T3k, T3m); + T3o = VSUB(T3i, T3n); + T4B = VADD(T3n, T3i); + T5o = VADD(T1, T3); + T5p = VADD(T3k, T3m); + T5q = VSUB(T5o, T5p); + T7e = VADD(T5o, T5p); + } + } + { + V T24, T26, T5Q, T2b, T2d, T5P, T1W, T60, T21, T61, T22, T27; + { + V T23, T25, T2a, T2c; + T23 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T24 = BYTWJ(&(W[TWVL * 32]), T23); + T25 = LD(&(x[WS(rs, 49)]), ms, &(x[WS(rs, 1)])); + T26 = BYTWJ(&(W[TWVL * 96]), T25); + T5Q = VADD(T24, T26); + T2a = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T2b = BYTWJ(&(W[0]), T2a); + T2c = LD(&(x[WS(rs, 33)]), ms, &(x[WS(rs, 1)])); + T2d = BYTWJ(&(W[TWVL * 64]), T2c); + T5P = VADD(T2b, T2d); + } + { + V T1T, T1V, T1S, T1U; + T1S = LD(&(x[WS(rs, 57)]), ms, &(x[WS(rs, 1)])); + T1T = BYTWJ(&(W[TWVL * 112]), T1S); + T1U = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + T1V = BYTWJ(&(W[TWVL * 48]), T1U); + T1W = VSUB(T1T, T1V); + T60 = VADD(T1T, T1V); + } + { + V T1Y, T20, T1X, T1Z; + T1X = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T1Y = BYTWJ(&(W[TWVL * 16]), T1X); + T1Z = LD(&(x[WS(rs, 41)]), ms, &(x[WS(rs, 1)])); + T20 = BYTWJ(&(W[TWVL * 80]), T1Z); + T21 = VSUB(T1Y, T20); + T61 = VADD(T1Y, T20); + } + T5R = VSUB(T5P, T5Q); + T62 = VSUB(T60, T61); + T22 = VMUL(LDK(KP707106781), VSUB(T1W, T21)); + T27 = VSUB(T24, T26); + T28 = VSUB(T22, T27); + T4o = VADD(T27, T22); + { + V T2e, T2f, T7l, T7m; + T2e = VSUB(T2b, T2d); + T2f = VMUL(LDK(KP707106781), VADD(T21, T1W)); + T2g = VADD(T2e, T2f); + T4l = VSUB(T2e, T2f); + T7l = VADD(T5P, T5Q); + T7m = VADD(T61, T60); + T7n = VADD(T7l, T7m); + T7Z = VSUB(T7l, T7m); + } + } + { + V T2n, T2p, T66, T36, T38, T67, T2v, T6i, T2A, T6h, T2q, T2B; + { + V T2m, T2o, T35, T37; + T2m = LD(&(x[WS(rs, 63)]), ms, &(x[WS(rs, 1)])); + T2n = BYTWJ(&(W[TWVL * 124]), T2m); + T2o = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T2p = BYTWJ(&(W[TWVL * 60]), T2o); + T66 = VADD(T2n, T2p); + T35 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T36 = BYTWJ(&(W[TWVL * 28]), T35); + T37 = LD(&(x[WS(rs, 47)]), ms, &(x[WS(rs, 1)])); + T38 = BYTWJ(&(W[TWVL * 92]), T37); + T67 = VADD(T36, T38); + } + { + V T2s, T2u, T2r, T2t; + T2r = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T2s = BYTWJ(&(W[TWVL * 12]), T2r); + T2t = LD(&(x[WS(rs, 39)]), ms, &(x[WS(rs, 1)])); + T2u = BYTWJ(&(W[TWVL * 76]), T2t); + T2v = VSUB(T2s, T2u); + T6i = VADD(T2s, T2u); + } + { + V T2x, T2z, T2w, T2y; + T2w = LD(&(x[WS(rs, 55)]), ms, &(x[WS(rs, 1)])); + T2x = BYTWJ(&(W[TWVL * 108]), T2w); + T2y = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T2z = BYTWJ(&(W[TWVL * 44]), T2y); + T2A = VSUB(T2x, T2z); + T6h = VADD(T2x, T2z); + } + T68 = VSUB(T66, T67); + T6j = VSUB(T6h, T6i); + T2q = VSUB(T2n, T2p); + T2B = VMUL(LDK(KP707106781), VADD(T2v, T2A)); + T2C = VADD(T2q, T2B); + T4s = VSUB(T2q, T2B); + { + V T34, T39, T7s, T7t; + T34 = VMUL(LDK(KP707106781), VSUB(T2A, T2v)); + T39 = VSUB(T36, T38); + T3a = VSUB(T34, T39); + T4v = VADD(T39, T34); + T7s = VADD(T66, T67); + T7t = VADD(T6i, T6h); + T7u = VADD(T7s, T7t); + T82 = VSUB(T7s, T7t); + } + } + { + V T1g, T1i, T5A, T1m, T1o, T5z, T18, T5C, T1d, T5D, T5B, T5E; + { + V T1f, T1h, T1l, T1n; + T1f = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + T1g = BYTWJ(&(W[TWVL * 34]), T1f); + T1h = LD(&(x[WS(rs, 50)]), ms, &(x[0])); + T1i = BYTWJ(&(W[TWVL * 98]), T1h); + T5A = VADD(T1g, T1i); + T1l = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T1m = BYTWJ(&(W[TWVL * 2]), T1l); + T1n = LD(&(x[WS(rs, 34)]), ms, &(x[0])); + T1o = BYTWJ(&(W[TWVL * 66]), T1n); + T5z = VADD(T1m, T1o); + } + { + V T15, T17, T14, T16; + T14 = LD(&(x[WS(rs, 58)]), ms, &(x[0])); + T15 = BYTWJ(&(W[TWVL * 114]), T14); + T16 = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + T17 = BYTWJ(&(W[TWVL * 50]), T16); + T18 = VSUB(T15, T17); + T5C = VADD(T15, T17); + } + { + V T1a, T1c, T19, T1b; + T19 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T1a = BYTWJ(&(W[TWVL * 18]), T19); + T1b = LD(&(x[WS(rs, 42)]), ms, &(x[0])); + T1c = BYTWJ(&(W[TWVL * 82]), T1b); + T1d = VSUB(T1a, T1c); + T5D = VADD(T1a, T1c); + } + T7E = VADD(T5z, T5A); + T7F = VADD(T5D, T5C); + T7V = VSUB(T7E, T7F); + T5B = VSUB(T5z, T5A); + T5E = VSUB(T5C, T5D); + T5F = VFMA(LDK(KP923879532), T5B, VMUL(LDK(KP382683432), T5E)); + T6u = VFNMS(LDK(KP382683432), T5B, VMUL(LDK(KP923879532), T5E)); + { + V T1e, T1j, T1p, T1q; + T1e = VMUL(LDK(KP707106781), VSUB(T18, T1d)); + T1j = VSUB(T1g, T1i); + T1k = VSUB(T1e, T1j); + T4e = VADD(T1j, T1e); + T1p = VSUB(T1m, T1o); + T1q = VMUL(LDK(KP707106781), VADD(T1d, T18)); + T1r = VADD(T1p, T1q); + T4d = VSUB(T1p, T1q); + } + } + { + V TG, TI, T5G, TY, T10, T5H, TO, T5K, TT, T5J, T5I, T5L; + { + V TF, TH, TX, TZ; + TF = LD(&(x[WS(rs, 62)]), ms, &(x[0])); + TG = BYTWJ(&(W[TWVL * 122]), TF); + TH = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + TI = BYTWJ(&(W[TWVL * 58]), TH); + T5G = VADD(TG, TI); + TX = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TY = BYTWJ(&(W[TWVL * 26]), TX); + TZ = LD(&(x[WS(rs, 46)]), ms, &(x[0])); + T10 = BYTWJ(&(W[TWVL * 90]), TZ); + T5H = VADD(TY, T10); + } + { + V TL, TN, TK, TM; + TK = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + TL = BYTWJ(&(W[TWVL * 10]), TK); + TM = LD(&(x[WS(rs, 38)]), ms, &(x[0])); + TN = BYTWJ(&(W[TWVL * 74]), TM); + TO = VSUB(TL, TN); + T5K = VADD(TL, TN); + } + { + V TQ, TS, TP, TR; + TP = LD(&(x[WS(rs, 54)]), ms, &(x[0])); + TQ = BYTWJ(&(W[TWVL * 106]), TP); + TR = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + TS = BYTWJ(&(W[TWVL * 42]), TR); + TT = VSUB(TQ, TS); + T5J = VADD(TQ, TS); + } + T7B = VADD(T5G, T5H); + T7C = VADD(T5K, T5J); + T7W = VSUB(T7B, T7C); + T5I = VSUB(T5G, T5H); + T5L = VSUB(T5J, T5K); + T5M = VFNMS(LDK(KP382683432), T5L, VMUL(LDK(KP923879532), T5I)); + T6v = VFMA(LDK(KP382683432), T5I, VMUL(LDK(KP923879532), T5L)); + { + V TJ, TU, TW, T11; + TJ = VSUB(TG, TI); + TU = VMUL(LDK(KP707106781), VADD(TO, TT)); + TV = VADD(TJ, TU); + T4g = VSUB(TJ, TU); + TW = VMUL(LDK(KP707106781), VSUB(TT, TO)); + T11 = VSUB(TY, T10); + T12 = VSUB(TW, T11); + T4h = VADD(T11, TW); + } + } + { + V Tl, T5r, TB, T5v, Tq, T5s, Tw, T5u, Tr, TC; + { + V Ti, Tk, Th, Tj; + Th = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Ti = BYTWJ(&(W[TWVL * 6]), Th); + Tj = LD(&(x[WS(rs, 36)]), ms, &(x[0])); + Tk = BYTWJ(&(W[TWVL * 70]), Tj); + Tl = VSUB(Ti, Tk); + T5r = VADD(Ti, Tk); + } + { + V Ty, TA, Tx, Tz; + Tx = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Ty = BYTWJ(&(W[TWVL * 22]), Tx); + Tz = LD(&(x[WS(rs, 44)]), ms, &(x[0])); + TA = BYTWJ(&(W[TWVL * 86]), Tz); + TB = VSUB(Ty, TA); + T5v = VADD(Ty, TA); + } + { + V Tn, Tp, Tm, To; + Tm = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + Tn = BYTWJ(&(W[TWVL * 38]), Tm); + To = LD(&(x[WS(rs, 52)]), ms, &(x[0])); + Tp = BYTWJ(&(W[TWVL * 102]), To); + Tq = VSUB(Tn, Tp); + T5s = VADD(Tn, Tp); + } + { + V Tt, Tv, Ts, Tu; + Ts = LD(&(x[WS(rs, 60)]), ms, &(x[0])); + Tt = BYTWJ(&(W[TWVL * 118]), Ts); + Tu = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + Tv = BYTWJ(&(W[TWVL * 54]), Tu); + Tw = VSUB(Tt, Tv); + T5u = VADD(Tt, Tv); + } + T7h = VADD(T5r, T5s); + T7i = VADD(T5u, T5v); + Tr = VFNMS(LDK(KP382683432), Tq, VMUL(LDK(KP923879532), Tl)); + TC = VFMA(LDK(KP923879532), Tw, VMUL(LDK(KP382683432), TB)); + TD = VADD(Tr, TC); + T4C = VSUB(TC, Tr); + { + V T3f, T3g, T5t, T5w; + T3f = VFNMS(LDK(KP923879532), TB, VMUL(LDK(KP382683432), Tw)); + T3g = VFMA(LDK(KP382683432), Tl, VMUL(LDK(KP923879532), Tq)); + T3h = VSUB(T3f, T3g); + T4b = VADD(T3g, T3f); + T5t = VSUB(T5r, T5s); + T5w = VSUB(T5u, T5v); + T5x = VMUL(LDK(KP707106781), VADD(T5t, T5w)); + T6s = VMUL(LDK(KP707106781), VSUB(T5w, T5t)); + } + } + { + V T1z, T5V, T1P, T5T, T1E, T5W, T1K, T5S; + { + V T1w, T1y, T1v, T1x; + T1v = LD(&(x[WS(rs, 61)]), ms, &(x[WS(rs, 1)])); + T1w = BYTWJ(&(W[TWVL * 120]), T1v); + T1x = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + T1y = BYTWJ(&(W[TWVL * 56]), T1x); + T1z = VSUB(T1w, T1y); + T5V = VADD(T1w, T1y); + } + { + V T1M, T1O, T1L, T1N; + T1L = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + T1M = BYTWJ(&(W[TWVL * 40]), T1L); + T1N = LD(&(x[WS(rs, 53)]), ms, &(x[WS(rs, 1)])); + T1O = BYTWJ(&(W[TWVL * 104]), T1N); + T1P = VSUB(T1M, T1O); + T5T = VADD(T1M, T1O); + } + { + V T1B, T1D, T1A, T1C; + T1A = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T1B = BYTWJ(&(W[TWVL * 24]), T1A); + T1C = LD(&(x[WS(rs, 45)]), ms, &(x[WS(rs, 1)])); + T1D = BYTWJ(&(W[TWVL * 88]), T1C); + T1E = VSUB(T1B, T1D); + T5W = VADD(T1B, T1D); + } + { + V T1H, T1J, T1G, T1I; + T1G = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T1H = BYTWJ(&(W[TWVL * 8]), T1G); + T1I = LD(&(x[WS(rs, 37)]), ms, &(x[WS(rs, 1)])); + T1J = BYTWJ(&(W[TWVL * 72]), T1I); + T1K = VSUB(T1H, T1J); + T5S = VADD(T1H, T1J); + } + { + V T1F, T1Q, T7o, T7p; + T1F = VFNMS(LDK(KP923879532), T1E, VMUL(LDK(KP382683432), T1z)); + T1Q = VFMA(LDK(KP382683432), T1K, VMUL(LDK(KP923879532), T1P)); + T1R = VSUB(T1F, T1Q); + T4m = VADD(T1Q, T1F); + T7o = VADD(T5S, T5T); + T7p = VADD(T5V, T5W); + T7q = VADD(T7o, T7p); + T80 = VSUB(T7p, T7o); + } + { + V T2h, T2i, T5U, T5X; + T2h = VFNMS(LDK(KP382683432), T1P, VMUL(LDK(KP923879532), T1K)); + T2i = VFMA(LDK(KP923879532), T1z, VMUL(LDK(KP382683432), T1E)); + T2j = VADD(T2h, T2i); + T4p = VSUB(T2i, T2h); + T5U = VSUB(T5S, T5T); + T5X = VSUB(T5V, T5W); + T5Y = VMUL(LDK(KP707106781), VADD(T5U, T5X)); + T63 = VMUL(LDK(KP707106781), VSUB(T5X, T5U)); + } + } + { + V T2H, T69, T2X, T6d, T2M, T6a, T2S, T6c; + { + V T2E, T2G, T2D, T2F; + T2D = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T2E = BYTWJ(&(W[TWVL * 4]), T2D); + T2F = LD(&(x[WS(rs, 35)]), ms, &(x[WS(rs, 1)])); + T2G = BYTWJ(&(W[TWVL * 68]), T2F); + T2H = VSUB(T2E, T2G); + T69 = VADD(T2E, T2G); + } + { + V T2U, T2W, T2T, T2V; + T2T = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T2U = BYTWJ(&(W[TWVL * 20]), T2T); + T2V = LD(&(x[WS(rs, 43)]), ms, &(x[WS(rs, 1)])); + T2W = BYTWJ(&(W[TWVL * 84]), T2V); + T2X = VSUB(T2U, T2W); + T6d = VADD(T2U, T2W); + } + { + V T2J, T2L, T2I, T2K; + T2I = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T2J = BYTWJ(&(W[TWVL * 36]), T2I); + T2K = LD(&(x[WS(rs, 51)]), ms, &(x[WS(rs, 1)])); + T2L = BYTWJ(&(W[TWVL * 100]), T2K); + T2M = VSUB(T2J, T2L); + T6a = VADD(T2J, T2L); + } + { + V T2P, T2R, T2O, T2Q; + T2O = LD(&(x[WS(rs, 59)]), ms, &(x[WS(rs, 1)])); + T2P = BYTWJ(&(W[TWVL * 116]), T2O); + T2Q = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + T2R = BYTWJ(&(W[TWVL * 52]), T2Q); + T2S = VSUB(T2P, T2R); + T6c = VADD(T2P, T2R); + } + { + V T2N, T2Y, T7v, T7w; + T2N = VFNMS(LDK(KP382683432), T2M, VMUL(LDK(KP923879532), T2H)); + T2Y = VFMA(LDK(KP923879532), T2S, VMUL(LDK(KP382683432), T2X)); + T2Z = VADD(T2N, T2Y); + T4w = VSUB(T2Y, T2N); + T7v = VADD(T69, T6a); + T7w = VADD(T6c, T6d); + T7x = VADD(T7v, T7w); + T83 = VSUB(T7w, T7v); + } + { + V T31, T32, T6b, T6e; + T31 = VFNMS(LDK(KP923879532), T2X, VMUL(LDK(KP382683432), T2S)); + T32 = VFMA(LDK(KP382683432), T2H, VMUL(LDK(KP923879532), T2M)); + T33 = VSUB(T31, T32); + T4t = VADD(T32, T31); + T6b = VSUB(T69, T6a); + T6e = VSUB(T6c, T6d); + T6f = VMUL(LDK(KP707106781), VADD(T6b, T6e)); + T6k = VMUL(LDK(KP707106781), VSUB(T6e, T6b)); + } + } + { + V T7k, T7M, T7R, T7T, T7z, T7I, T7H, T7N, T7O, T7S; + { + V T7g, T7j, T7P, T7Q; + T7g = VADD(T7e, T7f); + T7j = VADD(T7h, T7i); + T7k = VSUB(T7g, T7j); + T7M = VADD(T7g, T7j); + T7P = VADD(T7n, T7q); + T7Q = VADD(T7u, T7x); + T7R = VADD(T7P, T7Q); + T7T = VBYI(VSUB(T7Q, T7P)); + } + { + V T7r, T7y, T7D, T7G; + T7r = VSUB(T7n, T7q); + T7y = VSUB(T7u, T7x); + T7z = VMUL(LDK(KP707106781), VADD(T7r, T7y)); + T7I = VMUL(LDK(KP707106781), VSUB(T7y, T7r)); + T7D = VADD(T7B, T7C); + T7G = VADD(T7E, T7F); + T7H = VSUB(T7D, T7G); + T7N = VADD(T7G, T7D); + } + T7O = VADD(T7M, T7N); + ST(&(x[WS(rs, 32)]), VSUB(T7O, T7R), ms, &(x[0])); + ST(&(x[0]), VADD(T7O, T7R), ms, &(x[0])); + T7S = VSUB(T7M, T7N); + ST(&(x[WS(rs, 48)]), VSUB(T7S, T7T), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VADD(T7S, T7T), ms, &(x[0])); + { + V T7A, T7J, T7K, T7L; + T7A = VADD(T7k, T7z); + T7J = VBYI(VADD(T7H, T7I)); + ST(&(x[WS(rs, 56)]), VSUB(T7A, T7J), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VADD(T7A, T7J), ms, &(x[0])); + T7K = VSUB(T7k, T7z); + T7L = VBYI(VSUB(T7I, T7H)); + ST(&(x[WS(rs, 40)]), VSUB(T7K, T7L), ms, &(x[0])); + ST(&(x[WS(rs, 24)]), VADD(T7K, T7L), ms, &(x[0])); + } + } + { + V T7Y, T8j, T8c, T8k, T85, T8g, T89, T8h; + { + V T7U, T7X, T8a, T8b; + T7U = VSUB(T7e, T7f); + T7X = VMUL(LDK(KP707106781), VADD(T7V, T7W)); + T7Y = VADD(T7U, T7X); + T8j = VSUB(T7U, T7X); + T8a = VFNMS(LDK(KP382683432), T7Z, VMUL(LDK(KP923879532), T80)); + T8b = VFMA(LDK(KP382683432), T82, VMUL(LDK(KP923879532), T83)); + T8c = VADD(T8a, T8b); + T8k = VSUB(T8b, T8a); + } + { + V T81, T84, T87, T88; + T81 = VFMA(LDK(KP923879532), T7Z, VMUL(LDK(KP382683432), T80)); + T84 = VFNMS(LDK(KP382683432), T83, VMUL(LDK(KP923879532), T82)); + T85 = VADD(T81, T84); + T8g = VSUB(T84, T81); + T87 = VSUB(T7i, T7h); + T88 = VMUL(LDK(KP707106781), VSUB(T7W, T7V)); + T89 = VADD(T87, T88); + T8h = VSUB(T88, T87); + } + { + V T86, T8d, T8m, T8n; + T86 = VADD(T7Y, T85); + T8d = VBYI(VADD(T89, T8c)); + ST(&(x[WS(rs, 60)]), VSUB(T86, T8d), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(T86, T8d), ms, &(x[0])); + T8m = VBYI(VADD(T8h, T8g)); + T8n = VADD(T8j, T8k); + ST(&(x[WS(rs, 12)]), VADD(T8m, T8n), ms, &(x[0])); + ST(&(x[WS(rs, 52)]), VSUB(T8n, T8m), ms, &(x[0])); + } + { + V T8e, T8f, T8i, T8l; + T8e = VSUB(T7Y, T85); + T8f = VBYI(VSUB(T8c, T89)); + ST(&(x[WS(rs, 36)]), VSUB(T8e, T8f), ms, &(x[0])); + ST(&(x[WS(rs, 28)]), VADD(T8e, T8f), ms, &(x[0])); + T8i = VBYI(VSUB(T8g, T8h)); + T8l = VSUB(T8j, T8k); + ST(&(x[WS(rs, 20)]), VADD(T8i, T8l), ms, &(x[0])); + ST(&(x[WS(rs, 44)]), VSUB(T8l, T8i), ms, &(x[0])); + } + } + { + V T5O, T6H, T6x, T6F, T6n, T6I, T6A, T6E; + { + V T5y, T5N, T6t, T6w; + T5y = VADD(T5q, T5x); + T5N = VADD(T5F, T5M); + T5O = VADD(T5y, T5N); + T6H = VSUB(T5y, T5N); + T6t = VADD(T6r, T6s); + T6w = VADD(T6u, T6v); + T6x = VADD(T6t, T6w); + T6F = VSUB(T6w, T6t); + { + V T65, T6y, T6m, T6z; + { + V T5Z, T64, T6g, T6l; + T5Z = VADD(T5R, T5Y); + T64 = VADD(T62, T63); + T65 = VFMA(LDK(KP980785280), T5Z, VMUL(LDK(KP195090322), T64)); + T6y = VFNMS(LDK(KP195090322), T5Z, VMUL(LDK(KP980785280), T64)); + T6g = VADD(T68, T6f); + T6l = VADD(T6j, T6k); + T6m = VFNMS(LDK(KP195090322), T6l, VMUL(LDK(KP980785280), T6g)); + T6z = VFMA(LDK(KP195090322), T6g, VMUL(LDK(KP980785280), T6l)); + } + T6n = VADD(T65, T6m); + T6I = VSUB(T6z, T6y); + T6A = VADD(T6y, T6z); + T6E = VSUB(T6m, T65); + } + } + { + V T6o, T6B, T6K, T6L; + T6o = VADD(T5O, T6n); + T6B = VBYI(VADD(T6x, T6A)); + ST(&(x[WS(rs, 62)]), VSUB(T6o, T6B), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T6o, T6B), ms, &(x[0])); + T6K = VBYI(VADD(T6F, T6E)); + T6L = VADD(T6H, T6I); + ST(&(x[WS(rs, 14)]), VADD(T6K, T6L), ms, &(x[0])); + ST(&(x[WS(rs, 50)]), VSUB(T6L, T6K), ms, &(x[0])); + } + { + V T6C, T6D, T6G, T6J; + T6C = VSUB(T5O, T6n); + T6D = VBYI(VSUB(T6A, T6x)); + ST(&(x[WS(rs, 34)]), VSUB(T6C, T6D), ms, &(x[0])); + ST(&(x[WS(rs, 30)]), VADD(T6C, T6D), ms, &(x[0])); + T6G = VBYI(VSUB(T6E, T6F)); + T6J = VSUB(T6H, T6I); + ST(&(x[WS(rs, 18)]), VADD(T6G, T6J), ms, &(x[0])); + ST(&(x[WS(rs, 46)]), VSUB(T6J, T6G), ms, &(x[0])); + } + } + { + V T6O, T79, T6Z, T77, T6V, T7a, T72, T76; + { + V T6M, T6N, T6X, T6Y; + T6M = VSUB(T5q, T5x); + T6N = VSUB(T6v, T6u); + T6O = VADD(T6M, T6N); + T79 = VSUB(T6M, T6N); + T6X = VSUB(T6s, T6r); + T6Y = VSUB(T5M, T5F); + T6Z = VADD(T6X, T6Y); + T77 = VSUB(T6Y, T6X); + { + V T6R, T70, T6U, T71; + { + V T6P, T6Q, T6S, T6T; + T6P = VSUB(T5R, T5Y); + T6Q = VSUB(T63, T62); + T6R = VFMA(LDK(KP831469612), T6P, VMUL(LDK(KP555570233), T6Q)); + T70 = VFNMS(LDK(KP555570233), T6P, VMUL(LDK(KP831469612), T6Q)); + T6S = VSUB(T68, T6f); + T6T = VSUB(T6k, T6j); + T6U = VFNMS(LDK(KP555570233), T6T, VMUL(LDK(KP831469612), T6S)); + T71 = VFMA(LDK(KP555570233), T6S, VMUL(LDK(KP831469612), T6T)); + } + T6V = VADD(T6R, T6U); + T7a = VSUB(T71, T70); + T72 = VADD(T70, T71); + T76 = VSUB(T6U, T6R); + } + } + { + V T6W, T73, T7c, T7d; + T6W = VADD(T6O, T6V); + T73 = VBYI(VADD(T6Z, T72)); + ST(&(x[WS(rs, 58)]), VSUB(T6W, T73), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VADD(T6W, T73), ms, &(x[0])); + T7c = VBYI(VADD(T77, T76)); + T7d = VADD(T79, T7a); + ST(&(x[WS(rs, 10)]), VADD(T7c, T7d), ms, &(x[0])); + ST(&(x[WS(rs, 54)]), VSUB(T7d, T7c), ms, &(x[0])); + } + { + V T74, T75, T78, T7b; + T74 = VSUB(T6O, T6V); + T75 = VBYI(VSUB(T72, T6Z)); + ST(&(x[WS(rs, 38)]), VSUB(T74, T75), ms, &(x[0])); + ST(&(x[WS(rs, 26)]), VADD(T74, T75), ms, &(x[0])); + T78 = VBYI(VSUB(T76, T77)); + T7b = VSUB(T79, T7a); + ST(&(x[WS(rs, 22)]), VADD(T78, T7b), ms, &(x[0])); + ST(&(x[WS(rs, 42)]), VSUB(T7b, T78), ms, &(x[0])); + } + } + { + V T4k, T5h, T4R, T59, T4H, T5j, T4P, T4Y, T4z, T4S, T4K, T4O, T55, T5k, T5c; + V T5g; + { + V T4c, T57, T4j, T58, T4f, T4i; + T4c = VADD(T4a, T4b); + T57 = VSUB(T4C, T4B); + T4f = VFMA(LDK(KP831469612), T4d, VMUL(LDK(KP555570233), T4e)); + T4i = VFNMS(LDK(KP555570233), T4h, VMUL(LDK(KP831469612), T4g)); + T4j = VADD(T4f, T4i); + T58 = VSUB(T4i, T4f); + T4k = VADD(T4c, T4j); + T5h = VSUB(T58, T57); + T4R = VSUB(T4c, T4j); + T59 = VADD(T57, T58); + } + { + V T4D, T4W, T4G, T4X, T4E, T4F; + T4D = VADD(T4B, T4C); + T4W = VSUB(T4a, T4b); + T4E = VFNMS(LDK(KP555570233), T4d, VMUL(LDK(KP831469612), T4e)); + T4F = VFMA(LDK(KP555570233), T4g, VMUL(LDK(KP831469612), T4h)); + T4G = VADD(T4E, T4F); + T4X = VSUB(T4F, T4E); + T4H = VADD(T4D, T4G); + T5j = VSUB(T4W, T4X); + T4P = VSUB(T4G, T4D); + T4Y = VADD(T4W, T4X); + } + { + V T4r, T4I, T4y, T4J; + { + V T4n, T4q, T4u, T4x; + T4n = VADD(T4l, T4m); + T4q = VADD(T4o, T4p); + T4r = VFMA(LDK(KP956940335), T4n, VMUL(LDK(KP290284677), T4q)); + T4I = VFNMS(LDK(KP290284677), T4n, VMUL(LDK(KP956940335), T4q)); + T4u = VADD(T4s, T4t); + T4x = VADD(T4v, T4w); + T4y = VFNMS(LDK(KP290284677), T4x, VMUL(LDK(KP956940335), T4u)); + T4J = VFMA(LDK(KP290284677), T4u, VMUL(LDK(KP956940335), T4x)); + } + T4z = VADD(T4r, T4y); + T4S = VSUB(T4J, T4I); + T4K = VADD(T4I, T4J); + T4O = VSUB(T4y, T4r); + } + { + V T51, T5a, T54, T5b; + { + V T4Z, T50, T52, T53; + T4Z = VSUB(T4l, T4m); + T50 = VSUB(T4p, T4o); + T51 = VFMA(LDK(KP881921264), T4Z, VMUL(LDK(KP471396736), T50)); + T5a = VFNMS(LDK(KP471396736), T4Z, VMUL(LDK(KP881921264), T50)); + T52 = VSUB(T4s, T4t); + T53 = VSUB(T4w, T4v); + T54 = VFNMS(LDK(KP471396736), T53, VMUL(LDK(KP881921264), T52)); + T5b = VFMA(LDK(KP471396736), T52, VMUL(LDK(KP881921264), T53)); + } + T55 = VADD(T51, T54); + T5k = VSUB(T5b, T5a); + T5c = VADD(T5a, T5b); + T5g = VSUB(T54, T51); + } + { + V T4A, T4L, T5i, T5l; + T4A = VADD(T4k, T4z); + T4L = VBYI(VADD(T4H, T4K)); + ST(&(x[WS(rs, 61)]), VSUB(T4A, T4L), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(T4A, T4L), ms, &(x[WS(rs, 1)])); + T5i = VBYI(VSUB(T5g, T5h)); + T5l = VSUB(T5j, T5k); + ST(&(x[WS(rs, 21)]), VADD(T5i, T5l), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 43)]), VSUB(T5l, T5i), ms, &(x[WS(rs, 1)])); + } + { + V T5m, T5n, T4M, T4N; + T5m = VBYI(VADD(T5h, T5g)); + T5n = VADD(T5j, T5k); + ST(&(x[WS(rs, 11)]), VADD(T5m, T5n), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 53)]), VSUB(T5n, T5m), ms, &(x[WS(rs, 1)])); + T4M = VSUB(T4k, T4z); + T4N = VBYI(VSUB(T4K, T4H)); + ST(&(x[WS(rs, 35)]), VSUB(T4M, T4N), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 29)]), VADD(T4M, T4N), ms, &(x[WS(rs, 1)])); + } + { + V T4Q, T4T, T56, T5d; + T4Q = VBYI(VSUB(T4O, T4P)); + T4T = VSUB(T4R, T4S); + ST(&(x[WS(rs, 19)]), VADD(T4Q, T4T), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 45)]), VSUB(T4T, T4Q), ms, &(x[WS(rs, 1)])); + T56 = VADD(T4Y, T55); + T5d = VBYI(VADD(T59, T5c)); + ST(&(x[WS(rs, 59)]), VSUB(T56, T5d), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VADD(T56, T5d), ms, &(x[WS(rs, 1)])); + } + { + V T5e, T5f, T4U, T4V; + T5e = VSUB(T4Y, T55); + T5f = VBYI(VSUB(T5c, T59)); + ST(&(x[WS(rs, 37)]), VSUB(T5e, T5f), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 27)]), VADD(T5e, T5f), ms, &(x[WS(rs, 1)])); + T4U = VBYI(VADD(T4P, T4O)); + T4V = VADD(T4R, T4S); + ST(&(x[WS(rs, 13)]), VADD(T4U, T4V), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 51)]), VSUB(T4V, T4U), ms, &(x[WS(rs, 1)])); + } + } + { + V T1u, T43, T3D, T3V, T3t, T45, T3B, T3K, T3d, T3E, T3w, T3A, T3R, T46, T3Y; + V T42; + { + V TE, T3T, T1t, T3U, T13, T1s; + TE = VSUB(Tg, TD); + T3T = VADD(T3o, T3h); + T13 = VFMA(LDK(KP195090322), TV, VMUL(LDK(KP980785280), T12)); + T1s = VFNMS(LDK(KP195090322), T1r, VMUL(LDK(KP980785280), T1k)); + T1t = VSUB(T13, T1s); + T3U = VADD(T1s, T13); + T1u = VADD(TE, T1t); + T43 = VSUB(T3U, T3T); + T3D = VSUB(TE, T1t); + T3V = VADD(T3T, T3U); + } + { + V T3p, T3I, T3s, T3J, T3q, T3r; + T3p = VSUB(T3h, T3o); + T3I = VADD(Tg, TD); + T3q = VFNMS(LDK(KP195090322), T12, VMUL(LDK(KP980785280), TV)); + T3r = VFMA(LDK(KP980785280), T1r, VMUL(LDK(KP195090322), T1k)); + T3s = VSUB(T3q, T3r); + T3J = VADD(T3r, T3q); + T3t = VADD(T3p, T3s); + T45 = VSUB(T3I, T3J); + T3B = VSUB(T3s, T3p); + T3K = VADD(T3I, T3J); + } + { + V T2l, T3u, T3c, T3v; + { + V T29, T2k, T30, T3b; + T29 = VSUB(T1R, T28); + T2k = VSUB(T2g, T2j); + T2l = VFMA(LDK(KP634393284), T29, VMUL(LDK(KP773010453), T2k)); + T3u = VFNMS(LDK(KP634393284), T2k, VMUL(LDK(KP773010453), T29)); + T30 = VSUB(T2C, T2Z); + T3b = VSUB(T33, T3a); + T3c = VFNMS(LDK(KP634393284), T3b, VMUL(LDK(KP773010453), T30)); + T3v = VFMA(LDK(KP773010453), T3b, VMUL(LDK(KP634393284), T30)); + } + T3d = VADD(T2l, T3c); + T3E = VSUB(T3v, T3u); + T3w = VADD(T3u, T3v); + T3A = VSUB(T3c, T2l); + } + { + V T3N, T3W, T3Q, T3X; + { + V T3L, T3M, T3O, T3P; + T3L = VADD(T28, T1R); + T3M = VADD(T2g, T2j); + T3N = VFMA(LDK(KP098017140), T3L, VMUL(LDK(KP995184726), T3M)); + T3W = VFNMS(LDK(KP098017140), T3M, VMUL(LDK(KP995184726), T3L)); + T3O = VADD(T2C, T2Z); + T3P = VADD(T3a, T33); + T3Q = VFNMS(LDK(KP098017140), T3P, VMUL(LDK(KP995184726), T3O)); + T3X = VFMA(LDK(KP995184726), T3P, VMUL(LDK(KP098017140), T3O)); + } + T3R = VADD(T3N, T3Q); + T46 = VSUB(T3X, T3W); + T3Y = VADD(T3W, T3X); + T42 = VSUB(T3Q, T3N); + } + { + V T3e, T3x, T44, T47; + T3e = VADD(T1u, T3d); + T3x = VBYI(VADD(T3t, T3w)); + ST(&(x[WS(rs, 57)]), VSUB(T3e, T3x), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(T3e, T3x), ms, &(x[WS(rs, 1)])); + T44 = VBYI(VSUB(T42, T43)); + T47 = VSUB(T45, T46); + ST(&(x[WS(rs, 17)]), VADD(T44, T47), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 47)]), VSUB(T47, T44), ms, &(x[WS(rs, 1)])); + } + { + V T48, T49, T3y, T3z; + T48 = VBYI(VADD(T43, T42)); + T49 = VADD(T45, T46); + ST(&(x[WS(rs, 15)]), VADD(T48, T49), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 49)]), VSUB(T49, T48), ms, &(x[WS(rs, 1)])); + T3y = VSUB(T1u, T3d); + T3z = VBYI(VSUB(T3w, T3t)); + ST(&(x[WS(rs, 39)]), VSUB(T3y, T3z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 25)]), VADD(T3y, T3z), ms, &(x[WS(rs, 1)])); + } + { + V T3C, T3F, T3S, T3Z; + T3C = VBYI(VSUB(T3A, T3B)); + T3F = VSUB(T3D, T3E); + ST(&(x[WS(rs, 23)]), VADD(T3C, T3F), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 41)]), VSUB(T3F, T3C), ms, &(x[WS(rs, 1)])); + T3S = VADD(T3K, T3R); + T3Z = VBYI(VADD(T3V, T3Y)); + ST(&(x[WS(rs, 63)]), VSUB(T3S, T3Z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T3S, T3Z), ms, &(x[WS(rs, 1)])); + } + { + V T40, T41, T3G, T3H; + T40 = VSUB(T3K, T3R); + T41 = VBYI(VSUB(T3Y, T3V)); + ST(&(x[WS(rs, 33)]), VSUB(T40, T41), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 31)]), VADD(T40, T41), ms, &(x[WS(rs, 1)])); + T3G = VBYI(VADD(T3B, T3A)); + T3H = VADD(T3D, T3E); + ST(&(x[WS(rs, 9)]), VADD(T3G, T3H), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 55)]), VSUB(T3H, T3G), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + VTW(0, 8), + VTW(0, 9), + VTW(0, 10), + VTW(0, 11), + VTW(0, 12), + VTW(0, 13), + VTW(0, 14), + VTW(0, 15), + VTW(0, 16), + VTW(0, 17), + VTW(0, 18), + VTW(0, 19), + VTW(0, 20), + VTW(0, 21), + VTW(0, 22), + VTW(0, 23), + VTW(0, 24), + VTW(0, 25), + VTW(0, 26), + VTW(0, 27), + VTW(0, 28), + VTW(0, 29), + VTW(0, 30), + VTW(0, 31), + VTW(0, 32), + VTW(0, 33), + VTW(0, 34), + VTW(0, 35), + VTW(0, 36), + VTW(0, 37), + VTW(0, 38), + VTW(0, 39), + VTW(0, 40), + VTW(0, 41), + VTW(0, 42), + VTW(0, 43), + VTW(0, 44), + VTW(0, 45), + VTW(0, 46), + VTW(0, 47), + VTW(0, 48), + VTW(0, 49), + VTW(0, 50), + VTW(0, 51), + VTW(0, 52), + VTW(0, 53), + VTW(0, 54), + VTW(0, 55), + VTW(0, 56), + VTW(0, 57), + VTW(0, 58), + VTW(0, 59), + VTW(0, 60), + VTW(0, 61), + VTW(0, 62), + VTW(0, 63), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 64, XSIMD_STRING("t2fv_64"), twinstr, &GENUS, {467, 198, 52, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_64) (planner *p) { + X(kdft_dit_register) (p, t2fv_64, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:18 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t2fv_8 -include t2f.h */ + +/* + * This function contains 33 FP additions, 24 FP multiplications, + * (or, 23 additions, 14 multiplications, 10 fused multiply/add), + * 36 stack variables, 1 constants, and 16 memory accesses + */ +#include "t2f.h" + +static void t2fv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 14)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(8, rs)) { + V T1, T2, Th, Tj, T5, T7, Ta, Tc; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Th = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tj = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Ta = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + { + V T3, Ti, Tk, T6, T8, Tb, Td; + T3 = BYTWJ(&(W[TWVL * 6]), T2); + Ti = BYTWJ(&(W[TWVL * 2]), Th); + Tk = BYTWJ(&(W[TWVL * 10]), Tj); + T6 = BYTWJ(&(W[0]), T5); + T8 = BYTWJ(&(W[TWVL * 8]), T7); + Tb = BYTWJ(&(W[TWVL * 12]), Ta); + Td = BYTWJ(&(W[TWVL * 4]), Tc); + { + V Tq, T4, Tr, Tl, Tt, T9, Tu, Te, Tw, Ts; + Tq = VADD(T1, T3); + T4 = VSUB(T1, T3); + Tr = VADD(Ti, Tk); + Tl = VSUB(Ti, Tk); + Tt = VADD(T6, T8); + T9 = VSUB(T6, T8); + Tu = VADD(Tb, Td); + Te = VSUB(Tb, Td); + Tw = VSUB(Tq, Tr); + Ts = VADD(Tq, Tr); + { + V Tx, Tv, Tm, Tf; + Tx = VSUB(Tu, Tt); + Tv = VADD(Tt, Tu); + Tm = VSUB(Te, T9); + Tf = VADD(T9, Te); + { + V Tp, Tn, To, Tg; + ST(&(x[WS(rs, 2)]), VFMAI(Tx, Tw), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(Tx, Tw), ms, &(x[0])); + ST(&(x[0]), VADD(Ts, Tv), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VSUB(Ts, Tv), ms, &(x[0])); + Tp = VFMA(LDK(KP707106781), Tm, Tl); + Tn = VFNMS(LDK(KP707106781), Tm, Tl); + To = VFNMS(LDK(KP707106781), Tf, T4); + Tg = VFMA(LDK(KP707106781), Tf, T4); + ST(&(x[WS(rs, 5)]), VFNMSI(Tp, To), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFMAI(Tp, To), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(Tn, Tg), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(Tn, Tg), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t2fv_8"), twinstr, &GENUS, {23, 14, 10, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_8) (planner *p) { + X(kdft_dit_register) (p, t2fv_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t2fv_8 -include t2f.h */ + +/* + * This function contains 33 FP additions, 16 FP multiplications, + * (or, 33 additions, 16 multiplications, 0 fused multiply/add), + * 24 stack variables, 1 constants, and 16 memory accesses + */ +#include "t2f.h" + +static void t2fv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 14)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(8, rs)) { + V T4, Tq, Tm, Tr, T9, Tt, Te, Tu, T1, T3, T2; + T1 = LD(&(x[0]), ms, &(x[0])); + T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T3 = BYTWJ(&(W[TWVL * 6]), T2); + T4 = VSUB(T1, T3); + Tq = VADD(T1, T3); + { + V Tj, Tl, Ti, Tk; + Ti = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tj = BYTWJ(&(W[TWVL * 2]), Ti); + Tk = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tl = BYTWJ(&(W[TWVL * 10]), Tk); + Tm = VSUB(Tj, Tl); + Tr = VADD(Tj, Tl); + } + { + V T6, T8, T5, T7; + T5 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T6 = BYTWJ(&(W[0]), T5); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T8 = BYTWJ(&(W[TWVL * 8]), T7); + T9 = VSUB(T6, T8); + Tt = VADD(T6, T8); + } + { + V Tb, Td, Ta, Tc; + Ta = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Tb = BYTWJ(&(W[TWVL * 12]), Ta); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Td = BYTWJ(&(W[TWVL * 4]), Tc); + Te = VSUB(Tb, Td); + Tu = VADD(Tb, Td); + } + { + V Ts, Tv, Tw, Tx; + Ts = VADD(Tq, Tr); + Tv = VADD(Tt, Tu); + ST(&(x[WS(rs, 4)]), VSUB(Ts, Tv), ms, &(x[0])); + ST(&(x[0]), VADD(Ts, Tv), ms, &(x[0])); + Tw = VSUB(Tq, Tr); + Tx = VBYI(VSUB(Tu, Tt)); + ST(&(x[WS(rs, 6)]), VSUB(Tw, Tx), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(Tw, Tx), ms, &(x[0])); + { + V Tg, To, Tn, Tp, Tf, Th; + Tf = VMUL(LDK(KP707106781), VADD(T9, Te)); + Tg = VADD(T4, Tf); + To = VSUB(T4, Tf); + Th = VMUL(LDK(KP707106781), VSUB(Te, T9)); + Tn = VBYI(VSUB(Th, Tm)); + Tp = VBYI(VADD(Tm, Th)); + ST(&(x[WS(rs, 7)]), VSUB(Tg, Tn), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(To, Tp), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(Tg, Tn), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VSUB(To, Tp), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 2), + VTW(0, 3), + VTW(0, 4), + VTW(0, 5), + VTW(0, 6), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t2fv_8"), twinstr, &GENUS, {33, 16, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2fv_8) (planner *p) { + X(kdft_dit_register) (p, t2fv_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2sv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2sv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,824 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:54 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -n 16 -name t2sv_16 -include ts.h */ + +/* + * This function contains 196 FP additions, 134 FP multiplications, + * (or, 104 additions, 42 multiplications, 92 fused multiply/add), + * 120 stack variables, 3 constants, and 64 memory accesses + */ +#include "ts.h" + +static void t2sv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 8); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 8), MAKE_VOLATILE_STRIDE(32, rs)) { + V T34, T30, T2N, T2v, T2M, T2g, T3V, T3X, T32, T2U, T33, T2X, T2O, T2K, T3P; + V T3R; + { + V T2, Tf, TM, TO, T3, T6, T5, Th; + T2 = LDW(&(W[0])); + Tf = LDW(&(W[TWVL * 2])); + TM = LDW(&(W[TWVL * 6])); + TO = LDW(&(W[TWVL * 7])); + T3 = LDW(&(W[TWVL * 4])); + T6 = LDW(&(W[TWVL * 5])); + T5 = LDW(&(W[TWVL * 1])); + Th = LDW(&(W[TWVL * 3])); + { + V TW, TZ, Te, T1U, T3A, T3L, T2D, T1G, T3h, T2A, T2B, T1R, T3i, T2I, Tx; + V T3M, T1Z, T3w, TL, T26, T25, T37, T1l, T2q, T1d, T2o, T2l, T3c, T1r, T2s; + V TX, T10, TV, T2a; + { + V Tz, TP, TT, Tq, TF, Tu, TI, Tm, TC, T1j, T1p, T1m, T1f, T1O, T1M; + V T1K, T2F, Tj, Tn, T1Q, T2G, Tk, T1V, Tr, Tv; + { + V T1, Ti, Tb, T3z, T8, Tc, T1u, T1D, T1L, T1z, T9, T3x, T1v, T1w, T1A; + V T1E; + { + V T7, T1i, T1e, T1C, T1y; + T1 = LD(&(ri[0]), ms, &(ri[0])); + { + V Tg, TN, TS, Tp; + Tg = VMUL(T2, Tf); + TN = VMUL(T2, TM); + TS = VMUL(T2, TO); + Tp = VMUL(Tf, T3); + { + V T4, Tt, Ta, Tl; + T4 = VMUL(T2, T3); + Tt = VMUL(Tf, T6); + Ta = VMUL(T2, T6); + Tl = VMUL(T2, Th); + Ti = VFNMS(T5, Th, Tg); + Tz = VFMA(T5, Th, Tg); + TP = VFMA(T5, TO, TN); + TT = VFNMS(T5, TM, TS); + TW = VFMA(Th, T6, Tp); + Tq = VFNMS(Th, T6, Tp); + TF = VFNMS(T5, T6, T4); + T7 = VFMA(T5, T6, T4); + Tu = VFMA(Th, T3, Tt); + TZ = VFNMS(Th, T3, Tt); + TI = VFMA(T5, T3, Ta); + Tb = VFNMS(T5, T3, Ta); + Tm = VFMA(T5, Tf, Tl); + TC = VFNMS(T5, Tf, Tl); + T1i = VMUL(Ti, T6); + T1e = VMUL(Ti, T3); + T1C = VMUL(Tz, T6); + T1y = VMUL(Tz, T3); + T3z = LD(&(ii[0]), ms, &(ii[0])); + } + } + T8 = LD(&(ri[WS(rs, 8)]), ms, &(ri[0])); + Tc = LD(&(ii[WS(rs, 8)]), ms, &(ii[0])); + T1u = LD(&(ri[WS(rs, 15)]), ms, &(ri[WS(rs, 1)])); + T1j = VFNMS(Tm, T3, T1i); + T1p = VFMA(Tm, T3, T1i); + T1m = VFNMS(Tm, T6, T1e); + T1f = VFMA(Tm, T6, T1e); + T1D = VFNMS(TC, T3, T1C); + T1O = VFMA(TC, T3, T1C); + T1L = VFNMS(TC, T6, T1y); + T1z = VFMA(TC, T6, T1y); + T9 = VMUL(T7, T8); + T3x = VMUL(T7, Tc); + T1v = VMUL(TM, T1u); + T1w = LD(&(ii[WS(rs, 15)]), ms, &(ii[WS(rs, 1)])); + T1A = LD(&(ri[WS(rs, 7)]), ms, &(ri[WS(rs, 1)])); + T1E = LD(&(ii[WS(rs, 7)]), ms, &(ii[WS(rs, 1)])); + } + { + V T1x, T2x, T1F, T2z, T1N, T1P; + { + V T1H, T1J, T1I, T2E; + { + V Td, T3y, T2w, T1B, T2y; + T1H = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)])); + T1J = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)])); + Td = VFMA(Tb, Tc, T9); + T3y = VFNMS(Tb, T8, T3x); + T1M = LD(&(ri[WS(rs, 11)]), ms, &(ri[WS(rs, 1)])); + T1x = VFMA(TO, T1w, T1v); + T2w = VMUL(TM, T1w); + T1B = VMUL(T1z, T1A); + T2y = VMUL(T1z, T1E); + T1I = VMUL(Tf, T1H); + T2E = VMUL(Tf, T1J); + Te = VADD(T1, Td); + T1U = VSUB(T1, Td); + T3A = VADD(T3y, T3z); + T3L = VSUB(T3z, T3y); + T2x = VFNMS(TO, T1u, T2w); + T1F = VFMA(T1D, T1E, T1B); + T2z = VFNMS(T1D, T1A, T2y); + T1N = VMUL(T1L, T1M); + T1P = LD(&(ii[WS(rs, 11)]), ms, &(ii[WS(rs, 1)])); + } + T1K = VFMA(Th, T1J, T1I); + T2F = VFNMS(Th, T1H, T2E); + } + Tj = LD(&(ri[WS(rs, 4)]), ms, &(ri[0])); + Tn = LD(&(ii[WS(rs, 4)]), ms, &(ii[0])); + T2D = VSUB(T1x, T1F); + T1G = VADD(T1x, T1F); + T3h = VADD(T2x, T2z); + T2A = VSUB(T2x, T2z); + T1Q = VFMA(T1O, T1P, T1N); + T2G = VMUL(T1L, T1P); + Tk = VMUL(Ti, Tj); + T1V = VMUL(Ti, Tn); + Tr = LD(&(ri[WS(rs, 12)]), ms, &(ri[0])); + Tv = LD(&(ii[WS(rs, 12)]), ms, &(ii[0])); + } + } + { + V TE, T22, T15, T17, TK, T16, T2h, T24, T19, T1b; + { + V To, T1W, TG, TJ, Tw, T1Y, TH, T23; + { + V TA, TD, TB, T21, T2H, Ts, T1X; + TA = LD(&(ri[WS(rs, 2)]), ms, &(ri[0])); + TD = LD(&(ii[WS(rs, 2)]), ms, &(ii[0])); + T2B = VSUB(T1K, T1Q); + T1R = VADD(T1K, T1Q); + T2H = VFNMS(T1O, T1M, T2G); + To = VFMA(Tm, Tn, Tk); + T1W = VFNMS(Tm, Tj, T1V); + Ts = VMUL(Tq, Tr); + T1X = VMUL(Tq, Tv); + TB = VMUL(Tz, TA); + T21 = VMUL(Tz, TD); + TG = LD(&(ri[WS(rs, 10)]), ms, &(ri[0])); + T3i = VADD(T2F, T2H); + T2I = VSUB(T2F, T2H); + TJ = LD(&(ii[WS(rs, 10)]), ms, &(ii[0])); + Tw = VFMA(Tu, Tv, Ts); + T1Y = VFNMS(Tu, Tr, T1X); + TE = VFMA(TC, TD, TB); + T22 = VFNMS(TC, TA, T21); + TH = VMUL(TF, TG); + } + T15 = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + T17 = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + T23 = VMUL(TF, TJ); + Tx = VADD(To, Tw); + T3M = VSUB(To, Tw); + T1Z = VSUB(T1W, T1Y); + T3w = VADD(T1W, T1Y); + TK = VFMA(TI, TJ, TH); + T16 = VMUL(T2, T15); + T2h = VMUL(T2, T17); + T24 = VFNMS(TI, TG, T23); + T19 = LD(&(ri[WS(rs, 9)]), ms, &(ri[WS(rs, 1)])); + T1b = LD(&(ii[WS(rs, 9)]), ms, &(ii[WS(rs, 1)])); + } + { + V T1g, T1k, T18, T2i, T1a, T2j, T1h, T2p, T1n, T1q; + T1g = LD(&(ri[WS(rs, 5)]), ms, &(ri[WS(rs, 1)])); + T1k = LD(&(ii[WS(rs, 5)]), ms, &(ii[WS(rs, 1)])); + TL = VADD(TE, TK); + T26 = VSUB(TE, TK); + T18 = VFMA(T5, T17, T16); + T2i = VFNMS(T5, T15, T2h); + T25 = VSUB(T22, T24); + T37 = VADD(T22, T24); + T1a = VMUL(T3, T19); + T2j = VMUL(T3, T1b); + T1h = VMUL(T1f, T1g); + T2p = VMUL(T1f, T1k); + T1n = LD(&(ri[WS(rs, 13)]), ms, &(ri[WS(rs, 1)])); + T1q = LD(&(ii[WS(rs, 13)]), ms, &(ii[WS(rs, 1)])); + { + V TQ, TU, TR, T29; + { + V T1c, T2k, T1o, T2r; + TQ = LD(&(ri[WS(rs, 14)]), ms, &(ri[0])); + TU = LD(&(ii[WS(rs, 14)]), ms, &(ii[0])); + T1c = VFMA(T6, T1b, T1a); + T2k = VFNMS(T6, T19, T2j); + T1l = VFMA(T1j, T1k, T1h); + T2q = VFNMS(T1j, T1g, T2p); + T1o = VMUL(T1m, T1n); + T2r = VMUL(T1m, T1q); + TR = VMUL(TP, TQ); + T29 = VMUL(TP, TU); + T1d = VADD(T18, T1c); + T2o = VSUB(T18, T1c); + T2l = VSUB(T2i, T2k); + T3c = VADD(T2i, T2k); + T1r = VFMA(T1p, T1q, T1o); + T2s = VFNMS(T1p, T1n, T2r); + TX = LD(&(ri[WS(rs, 6)]), ms, &(ri[0])); + T10 = LD(&(ii[WS(rs, 6)]), ms, &(ii[0])); + } + TV = VFMA(TT, TU, TR); + T2a = VFNMS(TT, TQ, T29); + } + } + } + } + { + V T36, Ty, T3B, T3G, T1s, T2m, T2t, T3d, TY, T2b, T3g, T1S, T3s, T3j; + T36 = VSUB(Te, Tx); + Ty = VADD(Te, Tx); + T3B = VADD(T3w, T3A); + T3G = VSUB(T3A, T3w); + T1s = VADD(T1l, T1r); + T2m = VSUB(T1l, T1r); + T2t = VSUB(T2q, T2s); + T3d = VADD(T2q, T2s); + TY = VMUL(TW, TX); + T2b = VMUL(TW, T10); + T3g = VSUB(T1G, T1R); + T1S = VADD(T1G, T1R); + T3s = VADD(T3h, T3i); + T3j = VSUB(T3h, T3i); + { + V T3D, T1T, T3u, T3t, T28, T12, T38, T2d, T3n, T3f; + { + V T1t, T3b, T3e, T3r, T11, T2c; + T1t = VADD(T1d, T1s); + T3b = VSUB(T1d, T1s); + T3e = VSUB(T3c, T3d); + T3r = VADD(T3c, T3d); + T11 = VFMA(TZ, T10, TY); + T2c = VFNMS(TZ, TX, T2b); + T3D = VSUB(T1S, T1t); + T1T = VADD(T1t, T1S); + T3u = VADD(T3r, T3s); + T3t = VSUB(T3r, T3s); + T28 = VSUB(TV, T11); + T12 = VADD(TV, T11); + T38 = VADD(T2a, T2c); + T2d = VSUB(T2a, T2c); + T3n = VSUB(T3e, T3b); + T3f = VADD(T3b, T3e); + } + { + V T2Q, T20, T3N, T3T, T2J, T2C, T2W, T2V, T3O, T2f, T3U, T2T; + { + V T2R, T27, T2e, T2S, T13, T3F; + T2Q = VADD(T1U, T1Z); + T20 = VSUB(T1U, T1Z); + T3N = VSUB(T3L, T3M); + T3T = VADD(T3M, T3L); + T13 = VADD(TL, T12); + T3F = VSUB(T12, TL); + { + V T3v, T39, T3o, T3k; + T3v = VADD(T37, T38); + T39 = VSUB(T37, T38); + T3o = VADD(T3g, T3j); + T3k = VSUB(T3g, T3j); + { + V T3H, T3J, T14, T3q; + T3H = VADD(T3F, T3G); + T3J = VSUB(T3G, T3F); + T14 = VADD(Ty, T13); + T3q = VSUB(Ty, T13); + { + V T3a, T3m, T3C, T3E; + T3a = VADD(T36, T39); + T3m = VSUB(T36, T39); + T3C = VADD(T3v, T3B); + T3E = VSUB(T3B, T3v); + { + V T3I, T3p, T3l, T3K; + T3I = VADD(T3n, T3o); + T3p = VSUB(T3n, T3o); + T3l = VADD(T3f, T3k); + T3K = VSUB(T3k, T3f); + ST(&(ri[WS(rs, 4)]), VADD(T3q, T3t), ms, &(ri[0])); + ST(&(ri[WS(rs, 12)]), VSUB(T3q, T3t), ms, &(ri[0])); + ST(&(ri[0]), VADD(T14, T1T), ms, &(ri[0])); + ST(&(ri[WS(rs, 8)]), VSUB(T14, T1T), ms, &(ri[0])); + ST(&(ii[WS(rs, 4)]), VADD(T3D, T3E), ms, &(ii[0])); + ST(&(ii[WS(rs, 12)]), VSUB(T3E, T3D), ms, &(ii[0])); + ST(&(ii[0]), VADD(T3u, T3C), ms, &(ii[0])); + ST(&(ii[WS(rs, 8)]), VSUB(T3C, T3u), ms, &(ii[0])); + ST(&(ri[WS(rs, 6)]), VFMA(LDK(KP707106781), T3p, T3m), ms, &(ri[0])); + ST(&(ri[WS(rs, 14)]), VFNMS(LDK(KP707106781), T3p, T3m), ms, &(ri[0])); + ST(&(ii[WS(rs, 10)]), VFNMS(LDK(KP707106781), T3I, T3H), ms, &(ii[0])); + ST(&(ii[WS(rs, 2)]), VFMA(LDK(KP707106781), T3I, T3H), ms, &(ii[0])); + ST(&(ii[WS(rs, 14)]), VFNMS(LDK(KP707106781), T3K, T3J), ms, &(ii[0])); + ST(&(ii[WS(rs, 6)]), VFMA(LDK(KP707106781), T3K, T3J), ms, &(ii[0])); + ST(&(ri[WS(rs, 2)]), VFMA(LDK(KP707106781), T3l, T3a), ms, &(ri[0])); + ST(&(ri[WS(rs, 10)]), VFNMS(LDK(KP707106781), T3l, T3a), ms, &(ri[0])); + T2R = VADD(T26, T25); + T27 = VSUB(T25, T26); + T2e = VADD(T28, T2d); + T2S = VSUB(T28, T2d); + } + } + } + } + { + V T2Y, T2Z, T2n, T2u; + T2J = VSUB(T2D, T2I); + T2Y = VADD(T2D, T2I); + T2Z = VSUB(T2A, T2B); + T2C = VADD(T2A, T2B); + T2W = VSUB(T2l, T2m); + T2n = VADD(T2l, T2m); + T2u = VSUB(T2o, T2t); + T2V = VADD(T2o, T2t); + T3O = VADD(T27, T2e); + T2f = VSUB(T27, T2e); + T34 = VFMA(LDK(KP414213562), T2Y, T2Z); + T30 = VFNMS(LDK(KP414213562), T2Z, T2Y); + T3U = VSUB(T2S, T2R); + T2T = VADD(T2R, T2S); + T2N = VFNMS(LDK(KP414213562), T2n, T2u); + T2v = VFMA(LDK(KP414213562), T2u, T2n); + } + } + T2M = VFNMS(LDK(KP707106781), T2f, T20); + T2g = VFMA(LDK(KP707106781), T2f, T20); + T3V = VFMA(LDK(KP707106781), T3U, T3T); + T3X = VFNMS(LDK(KP707106781), T3U, T3T); + T32 = VFNMS(LDK(KP707106781), T2T, T2Q); + T2U = VFMA(LDK(KP707106781), T2T, T2Q); + T33 = VFNMS(LDK(KP414213562), T2V, T2W); + T2X = VFMA(LDK(KP414213562), T2W, T2V); + T2O = VFMA(LDK(KP414213562), T2C, T2J); + T2K = VFNMS(LDK(KP414213562), T2J, T2C); + T3P = VFMA(LDK(KP707106781), T3O, T3N); + T3R = VFNMS(LDK(KP707106781), T3O, T3N); + } + } + } + } + } + { + V T3Q, T35, T31, T3S; + T3Q = VADD(T33, T34); + T35 = VSUB(T33, T34); + T31 = VADD(T2X, T30); + T3S = VSUB(T30, T2X); + { + V T3W, T2P, T2L, T3Y; + T3W = VSUB(T2O, T2N); + T2P = VADD(T2N, T2O); + T2L = VSUB(T2v, T2K); + T3Y = VADD(T2v, T2K); + ST(&(ri[WS(rs, 5)]), VFMA(LDK(KP923879532), T35, T32), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 13)]), VFNMS(LDK(KP923879532), T35, T32), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 9)]), VFNMS(LDK(KP923879532), T3Q, T3P), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 1)]), VFMA(LDK(KP923879532), T3Q, T3P), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 13)]), VFNMS(LDK(KP923879532), T3S, T3R), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 5)]), VFMA(LDK(KP923879532), T3S, T3R), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 1)]), VFMA(LDK(KP923879532), T31, T2U), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 9)]), VFNMS(LDK(KP923879532), T31, T2U), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 15)]), VFMA(LDK(KP923879532), T2P, T2M), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 7)]), VFNMS(LDK(KP923879532), T2P, T2M), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 11)]), VFNMS(LDK(KP923879532), T3W, T3V), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 3)]), VFMA(LDK(KP923879532), T3W, T3V), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 15)]), VFMA(LDK(KP923879532), T3Y, T3X), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 7)]), VFNMS(LDK(KP923879532), T3Y, T3X), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 3)]), VFMA(LDK(KP923879532), T2L, T2g), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 11)]), VFNMS(LDK(KP923879532), T2L, T2g), ms, &(ri[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 15), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 16, XSIMD_STRING("t2sv_16"), twinstr, &GENUS, {104, 42, 92, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2sv_16) (planner *p) { + X(kdft_dit_register) (p, t2sv_16, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -n 16 -name t2sv_16 -include ts.h */ + +/* + * This function contains 196 FP additions, 108 FP multiplications, + * (or, 156 additions, 68 multiplications, 40 fused multiply/add), + * 82 stack variables, 3 constants, and 64 memory accesses + */ +#include "ts.h" + +static void t2sv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 8); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 8), MAKE_VOLATILE_STRIDE(32, rs)) { + V T2, T5, Tg, Ti, Tk, To, TE, TC, T6, T3, T8, TW, TJ, Tt, TU; + V Tc, Tx, TH, TN, TO, TP, TR, T1f, T1k, T1b, T1i, T1y, T1H, T1u, T1F; + { + V T7, Tv, Ta, Ts, T4, Tw, Tb, Tr; + { + V Th, Tn, Tj, Tm; + T2 = LDW(&(W[0])); + T5 = LDW(&(W[TWVL * 1])); + Tg = LDW(&(W[TWVL * 2])); + Ti = LDW(&(W[TWVL * 3])); + Th = VMUL(T2, Tg); + Tn = VMUL(T5, Tg); + Tj = VMUL(T5, Ti); + Tm = VMUL(T2, Ti); + Tk = VSUB(Th, Tj); + To = VADD(Tm, Tn); + TE = VSUB(Tm, Tn); + TC = VADD(Th, Tj); + T6 = LDW(&(W[TWVL * 5])); + T7 = VMUL(T5, T6); + Tv = VMUL(Tg, T6); + Ta = VMUL(T2, T6); + Ts = VMUL(Ti, T6); + T3 = LDW(&(W[TWVL * 4])); + T4 = VMUL(T2, T3); + Tw = VMUL(Ti, T3); + Tb = VMUL(T5, T3); + Tr = VMUL(Tg, T3); + } + T8 = VADD(T4, T7); + TW = VSUB(Tv, Tw); + TJ = VADD(Ta, Tb); + Tt = VSUB(Tr, Ts); + TU = VADD(Tr, Ts); + Tc = VSUB(Ta, Tb); + Tx = VADD(Tv, Tw); + TH = VSUB(T4, T7); + TN = LDW(&(W[TWVL * 6])); + TO = LDW(&(W[TWVL * 7])); + TP = VFMA(T2, TN, VMUL(T5, TO)); + TR = VFNMS(T5, TN, VMUL(T2, TO)); + { + V T1d, T1e, T19, T1a; + T1d = VMUL(Tk, T6); + T1e = VMUL(To, T3); + T1f = VSUB(T1d, T1e); + T1k = VADD(T1d, T1e); + T19 = VMUL(Tk, T3); + T1a = VMUL(To, T6); + T1b = VADD(T19, T1a); + T1i = VSUB(T19, T1a); + } + { + V T1w, T1x, T1s, T1t; + T1w = VMUL(TC, T6); + T1x = VMUL(TE, T3); + T1y = VSUB(T1w, T1x); + T1H = VADD(T1w, T1x); + T1s = VMUL(TC, T3); + T1t = VMUL(TE, T6); + T1u = VADD(T1s, T1t); + T1F = VSUB(T1s, T1t); + } + } + { + V Tf, T3r, T1N, T3e, TA, T3s, T1Q, T3b, TM, T2M, T1W, T2w, TZ, T2N, T21; + V T2x, T1B, T1K, T2V, T2W, T2X, T2Y, T2j, T2D, T2o, T2E, T18, T1n, T2Q, T2R; + V T2S, T2T, T28, T2A, T2d, T2B; + { + V T1, T3d, Te, T3c, T9, Td; + T1 = LD(&(ri[0]), ms, &(ri[0])); + T3d = LD(&(ii[0]), ms, &(ii[0])); + T9 = LD(&(ri[WS(rs, 8)]), ms, &(ri[0])); + Td = LD(&(ii[WS(rs, 8)]), ms, &(ii[0])); + Te = VFMA(T8, T9, VMUL(Tc, Td)); + T3c = VFNMS(Tc, T9, VMUL(T8, Td)); + Tf = VADD(T1, Te); + T3r = VSUB(T3d, T3c); + T1N = VSUB(T1, Te); + T3e = VADD(T3c, T3d); + } + { + V Tq, T1O, Tz, T1P; + { + V Tl, Tp, Tu, Ty; + Tl = LD(&(ri[WS(rs, 4)]), ms, &(ri[0])); + Tp = LD(&(ii[WS(rs, 4)]), ms, &(ii[0])); + Tq = VFMA(Tk, Tl, VMUL(To, Tp)); + T1O = VFNMS(To, Tl, VMUL(Tk, Tp)); + Tu = LD(&(ri[WS(rs, 12)]), ms, &(ri[0])); + Ty = LD(&(ii[WS(rs, 12)]), ms, &(ii[0])); + Tz = VFMA(Tt, Tu, VMUL(Tx, Ty)); + T1P = VFNMS(Tx, Tu, VMUL(Tt, Ty)); + } + TA = VADD(Tq, Tz); + T3s = VSUB(Tq, Tz); + T1Q = VSUB(T1O, T1P); + T3b = VADD(T1O, T1P); + } + { + V TG, T1S, TL, T1T, T1U, T1V; + { + V TD, TF, TI, TK; + TD = LD(&(ri[WS(rs, 2)]), ms, &(ri[0])); + TF = LD(&(ii[WS(rs, 2)]), ms, &(ii[0])); + TG = VFMA(TC, TD, VMUL(TE, TF)); + T1S = VFNMS(TE, TD, VMUL(TC, TF)); + TI = LD(&(ri[WS(rs, 10)]), ms, &(ri[0])); + TK = LD(&(ii[WS(rs, 10)]), ms, &(ii[0])); + TL = VFMA(TH, TI, VMUL(TJ, TK)); + T1T = VFNMS(TJ, TI, VMUL(TH, TK)); + } + TM = VADD(TG, TL); + T2M = VADD(T1S, T1T); + T1U = VSUB(T1S, T1T); + T1V = VSUB(TG, TL); + T1W = VSUB(T1U, T1V); + T2w = VADD(T1V, T1U); + } + { + V TT, T1Y, TY, T1Z, T1X, T20; + { + V TQ, TS, TV, TX; + TQ = LD(&(ri[WS(rs, 14)]), ms, &(ri[0])); + TS = LD(&(ii[WS(rs, 14)]), ms, &(ii[0])); + TT = VFMA(TP, TQ, VMUL(TR, TS)); + T1Y = VFNMS(TR, TQ, VMUL(TP, TS)); + TV = LD(&(ri[WS(rs, 6)]), ms, &(ri[0])); + TX = LD(&(ii[WS(rs, 6)]), ms, &(ii[0])); + TY = VFMA(TU, TV, VMUL(TW, TX)); + T1Z = VFNMS(TW, TV, VMUL(TU, TX)); + } + TZ = VADD(TT, TY); + T2N = VADD(T1Y, T1Z); + T1X = VSUB(TT, TY); + T20 = VSUB(T1Y, T1Z); + T21 = VADD(T1X, T20); + T2x = VSUB(T1X, T20); + } + { + V T1r, T2k, T1J, T2h, T1A, T2l, T1E, T2g; + { + V T1p, T1q, T1G, T1I; + T1p = LD(&(ri[WS(rs, 15)]), ms, &(ri[WS(rs, 1)])); + T1q = LD(&(ii[WS(rs, 15)]), ms, &(ii[WS(rs, 1)])); + T1r = VFMA(TN, T1p, VMUL(TO, T1q)); + T2k = VFNMS(TO, T1p, VMUL(TN, T1q)); + T1G = LD(&(ri[WS(rs, 11)]), ms, &(ri[WS(rs, 1)])); + T1I = LD(&(ii[WS(rs, 11)]), ms, &(ii[WS(rs, 1)])); + T1J = VFMA(T1F, T1G, VMUL(T1H, T1I)); + T2h = VFNMS(T1H, T1G, VMUL(T1F, T1I)); + } + { + V T1v, T1z, T1C, T1D; + T1v = LD(&(ri[WS(rs, 7)]), ms, &(ri[WS(rs, 1)])); + T1z = LD(&(ii[WS(rs, 7)]), ms, &(ii[WS(rs, 1)])); + T1A = VFMA(T1u, T1v, VMUL(T1y, T1z)); + T2l = VFNMS(T1y, T1v, VMUL(T1u, T1z)); + T1C = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)])); + T1D = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)])); + T1E = VFMA(Tg, T1C, VMUL(Ti, T1D)); + T2g = VFNMS(Ti, T1C, VMUL(Tg, T1D)); + } + T1B = VADD(T1r, T1A); + T1K = VADD(T1E, T1J); + T2V = VSUB(T1B, T1K); + T2W = VADD(T2k, T2l); + T2X = VADD(T2g, T2h); + T2Y = VSUB(T2W, T2X); + { + V T2f, T2i, T2m, T2n; + T2f = VSUB(T1r, T1A); + T2i = VSUB(T2g, T2h); + T2j = VSUB(T2f, T2i); + T2D = VADD(T2f, T2i); + T2m = VSUB(T2k, T2l); + T2n = VSUB(T1E, T1J); + T2o = VADD(T2m, T2n); + T2E = VSUB(T2m, T2n); + } + } + { + V T14, T24, T1m, T2b, T17, T25, T1h, T2a; + { + V T12, T13, T1j, T1l; + T12 = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + T13 = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + T14 = VFMA(T2, T12, VMUL(T5, T13)); + T24 = VFNMS(T5, T12, VMUL(T2, T13)); + T1j = LD(&(ri[WS(rs, 13)]), ms, &(ri[WS(rs, 1)])); + T1l = LD(&(ii[WS(rs, 13)]), ms, &(ii[WS(rs, 1)])); + T1m = VFMA(T1i, T1j, VMUL(T1k, T1l)); + T2b = VFNMS(T1k, T1j, VMUL(T1i, T1l)); + } + { + V T15, T16, T1c, T1g; + T15 = LD(&(ri[WS(rs, 9)]), ms, &(ri[WS(rs, 1)])); + T16 = LD(&(ii[WS(rs, 9)]), ms, &(ii[WS(rs, 1)])); + T17 = VFMA(T3, T15, VMUL(T6, T16)); + T25 = VFNMS(T6, T15, VMUL(T3, T16)); + T1c = LD(&(ri[WS(rs, 5)]), ms, &(ri[WS(rs, 1)])); + T1g = LD(&(ii[WS(rs, 5)]), ms, &(ii[WS(rs, 1)])); + T1h = VFMA(T1b, T1c, VMUL(T1f, T1g)); + T2a = VFNMS(T1f, T1c, VMUL(T1b, T1g)); + } + T18 = VADD(T14, T17); + T1n = VADD(T1h, T1m); + T2Q = VSUB(T18, T1n); + T2R = VADD(T24, T25); + T2S = VADD(T2a, T2b); + T2T = VSUB(T2R, T2S); + { + V T26, T27, T29, T2c; + T26 = VSUB(T24, T25); + T27 = VSUB(T1h, T1m); + T28 = VADD(T26, T27); + T2A = VSUB(T26, T27); + T29 = VSUB(T14, T17); + T2c = VSUB(T2a, T2b); + T2d = VSUB(T29, T2c); + T2B = VADD(T29, T2c); + } + } + { + V T23, T2r, T3A, T3C, T2q, T3B, T2u, T3x; + { + V T1R, T22, T3y, T3z; + T1R = VSUB(T1N, T1Q); + T22 = VMUL(LDK(KP707106781), VSUB(T1W, T21)); + T23 = VADD(T1R, T22); + T2r = VSUB(T1R, T22); + T3y = VMUL(LDK(KP707106781), VSUB(T2x, T2w)); + T3z = VADD(T3s, T3r); + T3A = VADD(T3y, T3z); + T3C = VSUB(T3z, T3y); + } + { + V T2e, T2p, T2s, T2t; + T2e = VFMA(LDK(KP923879532), T28, VMUL(LDK(KP382683432), T2d)); + T2p = VFNMS(LDK(KP923879532), T2o, VMUL(LDK(KP382683432), T2j)); + T2q = VADD(T2e, T2p); + T3B = VSUB(T2p, T2e); + T2s = VFNMS(LDK(KP923879532), T2d, VMUL(LDK(KP382683432), T28)); + T2t = VFMA(LDK(KP382683432), T2o, VMUL(LDK(KP923879532), T2j)); + T2u = VSUB(T2s, T2t); + T3x = VADD(T2s, T2t); + } + ST(&(ri[WS(rs, 11)]), VSUB(T23, T2q), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 11)]), VSUB(T3A, T3x), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 3)]), VADD(T23, T2q), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 3)]), VADD(T3x, T3A), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 15)]), VSUB(T2r, T2u), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 15)]), VSUB(T3C, T3B), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 7)]), VADD(T2r, T2u), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 7)]), VADD(T3B, T3C), ms, &(ii[WS(rs, 1)])); + } + { + V T2P, T31, T3m, T3o, T30, T3n, T34, T3j; + { + V T2L, T2O, T3k, T3l; + T2L = VSUB(Tf, TA); + T2O = VSUB(T2M, T2N); + T2P = VADD(T2L, T2O); + T31 = VSUB(T2L, T2O); + T3k = VSUB(TZ, TM); + T3l = VSUB(T3e, T3b); + T3m = VADD(T3k, T3l); + T3o = VSUB(T3l, T3k); + } + { + V T2U, T2Z, T32, T33; + T2U = VADD(T2Q, T2T); + T2Z = VSUB(T2V, T2Y); + T30 = VMUL(LDK(KP707106781), VADD(T2U, T2Z)); + T3n = VMUL(LDK(KP707106781), VSUB(T2Z, T2U)); + T32 = VSUB(T2T, T2Q); + T33 = VADD(T2V, T2Y); + T34 = VMUL(LDK(KP707106781), VSUB(T32, T33)); + T3j = VMUL(LDK(KP707106781), VADD(T32, T33)); + } + ST(&(ri[WS(rs, 10)]), VSUB(T2P, T30), ms, &(ri[0])); + ST(&(ii[WS(rs, 10)]), VSUB(T3m, T3j), ms, &(ii[0])); + ST(&(ri[WS(rs, 2)]), VADD(T2P, T30), ms, &(ri[0])); + ST(&(ii[WS(rs, 2)]), VADD(T3j, T3m), ms, &(ii[0])); + ST(&(ri[WS(rs, 14)]), VSUB(T31, T34), ms, &(ri[0])); + ST(&(ii[WS(rs, 14)]), VSUB(T3o, T3n), ms, &(ii[0])); + ST(&(ri[WS(rs, 6)]), VADD(T31, T34), ms, &(ri[0])); + ST(&(ii[WS(rs, 6)]), VADD(T3n, T3o), ms, &(ii[0])); + } + { + V T2z, T2H, T3u, T3w, T2G, T3v, T2K, T3p; + { + V T2v, T2y, T3q, T3t; + T2v = VADD(T1N, T1Q); + T2y = VMUL(LDK(KP707106781), VADD(T2w, T2x)); + T2z = VADD(T2v, T2y); + T2H = VSUB(T2v, T2y); + T3q = VMUL(LDK(KP707106781), VADD(T1W, T21)); + T3t = VSUB(T3r, T3s); + T3u = VADD(T3q, T3t); + T3w = VSUB(T3t, T3q); + } + { + V T2C, T2F, T2I, T2J; + T2C = VFMA(LDK(KP382683432), T2A, VMUL(LDK(KP923879532), T2B)); + T2F = VFNMS(LDK(KP382683432), T2E, VMUL(LDK(KP923879532), T2D)); + T2G = VADD(T2C, T2F); + T3v = VSUB(T2F, T2C); + T2I = VFNMS(LDK(KP382683432), T2B, VMUL(LDK(KP923879532), T2A)); + T2J = VFMA(LDK(KP923879532), T2E, VMUL(LDK(KP382683432), T2D)); + T2K = VSUB(T2I, T2J); + T3p = VADD(T2I, T2J); + } + ST(&(ri[WS(rs, 9)]), VSUB(T2z, T2G), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 9)]), VSUB(T3u, T3p), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 1)]), VADD(T2z, T2G), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 1)]), VADD(T3p, T3u), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 13)]), VSUB(T2H, T2K), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 13)]), VSUB(T3w, T3v), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 5)]), VADD(T2H, T2K), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 5)]), VADD(T3v, T3w), ms, &(ii[WS(rs, 1)])); + } + { + V T11, T35, T3g, T3i, T1M, T3h, T38, T39; + { + V TB, T10, T3a, T3f; + TB = VADD(Tf, TA); + T10 = VADD(TM, TZ); + T11 = VADD(TB, T10); + T35 = VSUB(TB, T10); + T3a = VADD(T2M, T2N); + T3f = VADD(T3b, T3e); + T3g = VADD(T3a, T3f); + T3i = VSUB(T3f, T3a); + } + { + V T1o, T1L, T36, T37; + T1o = VADD(T18, T1n); + T1L = VADD(T1B, T1K); + T1M = VADD(T1o, T1L); + T3h = VSUB(T1L, T1o); + T36 = VADD(T2R, T2S); + T37 = VADD(T2W, T2X); + T38 = VSUB(T36, T37); + T39 = VADD(T36, T37); + } + ST(&(ri[WS(rs, 8)]), VSUB(T11, T1M), ms, &(ri[0])); + ST(&(ii[WS(rs, 8)]), VSUB(T3g, T39), ms, &(ii[0])); + ST(&(ri[0]), VADD(T11, T1M), ms, &(ri[0])); + ST(&(ii[0]), VADD(T39, T3g), ms, &(ii[0])); + ST(&(ri[WS(rs, 12)]), VSUB(T35, T38), ms, &(ri[0])); + ST(&(ii[WS(rs, 12)]), VSUB(T3i, T3h), ms, &(ii[0])); + ST(&(ri[WS(rs, 4)]), VADD(T35, T38), ms, &(ri[0])); + ST(&(ii[WS(rs, 4)]), VADD(T3h, T3i), ms, &(ii[0])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 15), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 16, XSIMD_STRING("t2sv_16"), twinstr, &GENUS, {156, 68, 40, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2sv_16) (planner *p) { + X(kdft_dit_register) (p, t2sv_16, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2sv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2sv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1800 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:54 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -n 32 -name t2sv_32 -include ts.h */ + +/* + * This function contains 488 FP additions, 350 FP multiplications, + * (or, 236 additions, 98 multiplications, 252 fused multiply/add), + * 204 stack variables, 7 constants, and 128 memory accesses + */ +#include "ts.h" + +static void t2sv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 8); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 8), MAKE_VOLATILE_STRIDE(64, rs)) { + V T6H, T74, T6U, T6E, T9r, T9t, T78, T7c, T6W, T6S, T73, T6K, T7a, T72, T9x; + V T9z; + { + V T2, T8, T3, T6, Te, Ti, T5, Tc; + T2 = LDW(&(W[0])); + T8 = LDW(&(W[TWVL * 4])); + T3 = LDW(&(W[TWVL * 2])); + T6 = LDW(&(W[TWVL * 3])); + Te = LDW(&(W[TWVL * 6])); + Ti = LDW(&(W[TWVL * 7])); + T5 = LDW(&(W[TWVL * 1])); + Tc = LDW(&(W[TWVL * 5])); + { + V T2X, T2T, T34, T31, Tq, T46, T97, T8H, TH, T98, T4b, T8D, TZ, T7f, T1g; + V T7g, T4j, T6t, T4q, T6u, T6x, T4z, T7m, T1J, T4G, T6y, T8d, T7l, T4O, T6A; + V T2k, T7o, T6B, T4V, T7r, T8e, T5E, T6P, T3G, T7L, T6M, T61, T8n, T7I, T55; + V T6I, T2N, T7A, T5s, T6F, T7x, T8i, T2R, T2U, T57, T3a, T5h, T62, T5L, T7J; + V T43, T63, T5S, T8o, T7O, T2V, T2Y, T32, T35; + { + V T1w, T23, T1K, T1F, T1s, T1N, T26, T1z, T2w, T2s, T3Q, T3M, T3r, T3n, T2b; + V T1U, T3C, T3j, T3z, T3f, T1R, T29, TR, Th, T2J, T2F, Td, TP, T1Z, T1V; + V T2g, T2c, T1m, T4u, T1D, T1G, T1p, T1t, T1E, T4D, T1x, T1A, T1q, T4v; + { + V T1, Ts, T19, TJ, T7, TM, Tb, T11, T1C, T1o, TA, T15, TE, T1d, Tw; + V T8G, Tk, Tn, Tj, TW, TS, To, Tt, Tx, TB, TF, Tl; + { + V T1Y, T1S, T2f, T2a; + T1 = LD(&(ri[0]), ms, &(ri[0])); + { + V Tr, T18, T4, Ta; + Tr = VMUL(T2, T8); + T18 = VMUL(T3, T8); + T4 = VMUL(T2, T3); + Ta = VMUL(T2, T6); + { + V T10, T1n, Tz, T14; + T10 = VMUL(T2, Te); + T1n = VMUL(T8, Te); + Tz = VMUL(T3, Te); + T14 = VMUL(T2, Ti); + { + V T1r, TD, T1c, Tv; + T1r = VMUL(T8, Ti); + TD = VMUL(T3, Ti); + T1c = VMUL(T3, Tc); + Tv = VMUL(T2, Tc); + T1w = VFNMS(T5, Tc, Tr); + Ts = VFMA(T5, Tc, Tr); + T19 = VFNMS(T6, Tc, T18); + T23 = VFMA(T6, Tc, T18); + TJ = VFNMS(T5, T6, T4); + T7 = VFMA(T5, T6, T4); + TM = VFMA(T5, T3, Ta); + Tb = VFNMS(T5, T3, Ta); + T11 = VFNMS(T5, Ti, T10); + T1C = VFMA(T5, Ti, T10); + T1o = VFMA(Tc, Ti, T1n); + TA = VFMA(T6, Ti, Tz); + T1K = VFNMS(T6, Ti, Tz); + T1F = VFNMS(T5, Te, T14); + T15 = VFMA(T5, Te, T14); + T1s = VFNMS(Tc, Te, T1r); + T1N = VFMA(T6, Te, TD); + TE = VFNMS(T6, Te, TD); + T26 = VFNMS(T6, T8, T1c); + T1d = VFMA(T6, T8, T1c); + T1z = VFMA(T5, T8, Tv); + Tw = VFNMS(T5, T8, Tv); + { + V T2v, T2r, T3P, T3L; + T2v = VMUL(T1w, Ti); + T2r = VMUL(T1w, Te); + T3P = VMUL(Ts, Ti); + T3L = VMUL(Ts, Te); + { + V T3q, T3m, T2W, T2S; + T3q = VMUL(T19, Ti); + T3m = VMUL(T19, Te); + T2W = VMUL(T23, Ti); + T2S = VMUL(T23, Te); + { + V T1T, T3i, T3e, T1Q; + T1T = VMUL(TJ, Tc); + T3i = VMUL(TJ, Ti); + T3e = VMUL(TJ, Te); + T1Q = VMUL(TJ, T8); + { + V Tg, T2I, T2E, T9; + Tg = VMUL(T7, Tc); + T2I = VMUL(T7, Ti); + T2E = VMUL(T7, Te); + T9 = VMUL(T7, T8); + T2w = VFNMS(T1z, Te, T2v); + T2s = VFMA(T1z, Ti, T2r); + T3Q = VFNMS(Tw, Te, T3P); + T3M = VFMA(Tw, Ti, T3L); + T3r = VFNMS(T1d, Te, T3q); + T3n = VFMA(T1d, Ti, T3m); + T2X = VFNMS(T26, Te, T2W); + T2T = VFMA(T26, Ti, T2S); + T2b = VFNMS(TM, T8, T1T); + T1U = VFMA(TM, T8, T1T); + T3C = VFNMS(TM, Te, T3i); + T3j = VFMA(TM, Te, T3i); + T3z = VFMA(TM, Ti, T3e); + T3f = VFNMS(TM, Ti, T3e); + T1R = VFNMS(TM, Tc, T1Q); + T29 = VFMA(TM, Tc, T1Q); + TR = VFNMS(Tb, T8, Tg); + Th = VFMA(Tb, T8, Tg); + T34 = VFMA(Tb, Te, T2I); + T2J = VFNMS(Tb, Te, T2I); + T31 = VFNMS(Tb, Ti, T2E); + T2F = VFMA(Tb, Ti, T2E); + Td = VFNMS(Tb, Tc, T9); + TP = VFMA(Tb, Tc, T9); + T1Y = VMUL(T1R, Ti); + T1S = VMUL(T1R, Te); + T2f = VMUL(T29, Ti); + T2a = VMUL(T29, Te); + T8G = LD(&(ii[0]), ms, &(ii[0])); + } + } + } + } + } + } + } + Tk = LD(&(ri[WS(rs, 16)]), ms, &(ri[0])); + { + V Tm, Tf, TV, TQ; + Tm = VMUL(Td, Ti); + Tf = VMUL(Td, Te); + TV = VMUL(TP, Ti); + TQ = VMUL(TP, Te); + T1Z = VFNMS(T1U, Te, T1Y); + T1V = VFMA(T1U, Ti, T1S); + T2g = VFNMS(T2b, Te, T2f); + T2c = VFMA(T2b, Ti, T2a); + Tn = VFNMS(Th, Te, Tm); + Tj = VFMA(Th, Ti, Tf); + TW = VFNMS(TR, Te, TV); + TS = VFMA(TR, Ti, TQ); + } + To = LD(&(ii[WS(rs, 16)]), ms, &(ii[0])); + } + Tt = LD(&(ri[WS(rs, 8)]), ms, &(ri[0])); + Tx = LD(&(ii[WS(rs, 8)]), ms, &(ii[0])); + TB = LD(&(ri[WS(rs, 24)]), ms, &(ri[0])); + TF = LD(&(ii[WS(rs, 24)]), ms, &(ii[0])); + Tl = VMUL(Tj, Tk); + { + V TO, T4f, TT, TX; + { + V Ty, T48, TG, T4a; + { + V TK, TN, T8E, Tu, T47, TC, T49, Tp, TL, T4e, T8F; + TK = LD(&(ri[WS(rs, 4)]), ms, &(ri[0])); + TN = LD(&(ii[WS(rs, 4)]), ms, &(ii[0])); + T8E = VMUL(Tj, To); + Tu = VMUL(Ts, Tt); + T47 = VMUL(Ts, Tx); + TC = VMUL(TA, TB); + T49 = VMUL(TA, TF); + Tp = VFMA(Tn, To, Tl); + TL = VMUL(TJ, TK); + T4e = VMUL(TJ, TN); + T8F = VFNMS(Tn, Tk, T8E); + Ty = VFMA(Tw, Tx, Tu); + T48 = VFNMS(Tw, Tt, T47); + TG = VFMA(TE, TF, TC); + T4a = VFNMS(TE, TB, T49); + Tq = VADD(T1, Tp); + T46 = VSUB(T1, Tp); + TO = VFMA(TM, TN, TL); + T97 = VSUB(T8G, T8F); + T8H = VADD(T8F, T8G); + T4f = VFNMS(TM, TK, T4e); + } + TH = VADD(Ty, TG); + T98 = VSUB(Ty, TG); + T4b = VSUB(T48, T4a); + T8D = VADD(T48, T4a); + TT = LD(&(ri[WS(rs, 20)]), ms, &(ri[0])); + TX = LD(&(ii[WS(rs, 20)]), ms, &(ii[0])); + } + { + V T12, T16, T1a, T1e, T4k, T4p; + T12 = LD(&(ri[WS(rs, 28)]), ms, &(ri[0])); + T16 = LD(&(ii[WS(rs, 28)]), ms, &(ii[0])); + T1a = LD(&(ri[WS(rs, 12)]), ms, &(ri[0])); + T1e = LD(&(ii[WS(rs, 12)]), ms, &(ii[0])); + { + V TY, T4h, T17, T4m, T1f, T4o, T4d, T4i; + { + V T1j, T1l, TU, T4g, T13, T4l, T1b, T4n, T1k, T4t; + T1j = LD(&(ri[WS(rs, 2)]), ms, &(ri[0])); + T1l = LD(&(ii[WS(rs, 2)]), ms, &(ii[0])); + TU = VMUL(TS, TT); + T4g = VMUL(TS, TX); + T13 = VMUL(T11, T12); + T4l = VMUL(T11, T16); + T1b = VMUL(T19, T1a); + T4n = VMUL(T19, T1e); + T1k = VMUL(T7, T1j); + T4t = VMUL(T7, T1l); + TY = VFMA(TW, TX, TU); + T4h = VFNMS(TW, TT, T4g); + T17 = VFMA(T15, T16, T13); + T4m = VFNMS(T15, T12, T4l); + T1f = VFMA(T1d, T1e, T1b); + T4o = VFNMS(T1d, T1a, T4n); + T1m = VFMA(Tb, T1l, T1k); + T4u = VFNMS(Tb, T1j, T4t); + } + TZ = VADD(TO, TY); + T4d = VSUB(TO, TY); + T7f = VADD(T4f, T4h); + T4i = VSUB(T4f, T4h); + T1g = VADD(T17, T1f); + T4k = VSUB(T17, T1f); + T7g = VADD(T4m, T4o); + T4p = VSUB(T4m, T4o); + T1D = LD(&(ri[WS(rs, 26)]), ms, &(ri[0])); + T1G = LD(&(ii[WS(rs, 26)]), ms, &(ii[0])); + T4j = VADD(T4d, T4i); + T6t = VSUB(T4i, T4d); + } + T1p = LD(&(ri[WS(rs, 18)]), ms, &(ri[0])); + T1t = LD(&(ii[WS(rs, 18)]), ms, &(ii[0])); + T4q = VSUB(T4k, T4p); + T6u = VADD(T4k, T4p); + T1E = VMUL(T1C, T1D); + T4D = VMUL(T1C, T1G); + T1x = LD(&(ri[WS(rs, 10)]), ms, &(ri[0])); + T1A = LD(&(ii[WS(rs, 10)]), ms, &(ii[0])); + T1q = VMUL(T1o, T1p); + T4v = VMUL(T1o, T1t); + } + } + } + { + V T3l, T5z, T3E, T5Z, T3v, T3x, T3w, T3t, T5B, T5W; + { + V T1P, T4J, T1W, T20, T2i, T4T, T1X, T4K, T24, T27; + { + V T2d, T2h, T1v, T4A, T7j, T4x, T2e, T4y, T1I, T4F, T7k, T4S; + { + V T1L, T1O, T1H, T4E, T1y, T4B, T1u, T4w, T1M, T4I, T1B, T4C; + T1L = LD(&(ri[WS(rs, 30)]), ms, &(ri[0])); + T1O = LD(&(ii[WS(rs, 30)]), ms, &(ii[0])); + T1H = VFMA(T1F, T1G, T1E); + T4E = VFNMS(T1F, T1D, T4D); + T1y = VMUL(T1w, T1x); + T4B = VMUL(T1w, T1A); + T1u = VFMA(T1s, T1t, T1q); + T4w = VFNMS(T1s, T1p, T4v); + T1M = VMUL(T1K, T1L); + T4I = VMUL(T1K, T1O); + T2d = LD(&(ri[WS(rs, 22)]), ms, &(ri[0])); + T2h = LD(&(ii[WS(rs, 22)]), ms, &(ii[0])); + T1B = VFMA(T1z, T1A, T1y); + T4C = VFNMS(T1z, T1x, T4B); + T1v = VADD(T1m, T1u); + T4A = VSUB(T1m, T1u); + T7j = VADD(T4u, T4w); + T4x = VSUB(T4u, T4w); + T1P = VFMA(T1N, T1O, T1M); + T4J = VFNMS(T1N, T1L, T4I); + T2e = VMUL(T2c, T2d); + T4y = VSUB(T1B, T1H); + T1I = VADD(T1B, T1H); + T4F = VSUB(T4C, T4E); + T7k = VADD(T4C, T4E); + T4S = VMUL(T2c, T2h); + } + T1W = LD(&(ri[WS(rs, 14)]), ms, &(ri[0])); + T20 = LD(&(ii[WS(rs, 14)]), ms, &(ii[0])); + T2i = VFMA(T2g, T2h, T2e); + T6x = VADD(T4x, T4y); + T4z = VSUB(T4x, T4y); + T7m = VSUB(T1v, T1I); + T1J = VADD(T1v, T1I); + T4G = VADD(T4A, T4F); + T6y = VSUB(T4A, T4F); + T8d = VADD(T7j, T7k); + T7l = VSUB(T7j, T7k); + T4T = VFNMS(T2g, T2d, T4S); + T1X = VMUL(T1V, T1W); + T4K = VMUL(T1V, T20); + T24 = LD(&(ri[WS(rs, 6)]), ms, &(ri[0])); + T27 = LD(&(ii[WS(rs, 6)]), ms, &(ii[0])); + } + { + V T22, T4P, T7p, T4M, T28, T4R, T3g, T3k; + T3g = LD(&(ri[WS(rs, 31)]), ms, &(ri[WS(rs, 1)])); + T3k = LD(&(ii[WS(rs, 31)]), ms, &(ii[WS(rs, 1)])); + { + V T3A, T3D, T21, T4L, T25, T4Q, T3h, T5y, T3B, T5Y; + T3A = LD(&(ri[WS(rs, 23)]), ms, &(ri[WS(rs, 1)])); + T3D = LD(&(ii[WS(rs, 23)]), ms, &(ii[WS(rs, 1)])); + T21 = VFMA(T1Z, T20, T1X); + T4L = VFNMS(T1Z, T1W, T4K); + T25 = VMUL(T23, T24); + T4Q = VMUL(T23, T27); + T3h = VMUL(T3f, T3g); + T5y = VMUL(T3f, T3k); + T3B = VMUL(T3z, T3A); + T5Y = VMUL(T3z, T3D); + T22 = VADD(T1P, T21); + T4P = VSUB(T1P, T21); + T7p = VADD(T4J, T4L); + T4M = VSUB(T4J, T4L); + T28 = VFMA(T26, T27, T25); + T4R = VFNMS(T26, T24, T4Q); + T3l = VFMA(T3j, T3k, T3h); + T5z = VFNMS(T3j, T3g, T5y); + T3E = VFMA(T3C, T3D, T3B); + T5Z = VFNMS(T3C, T3A, T5Y); + } + { + V T3o, T3s, T2j, T4N, T7q, T4U, T3p, T5A; + T3o = LD(&(ri[WS(rs, 15)]), ms, &(ri[WS(rs, 1)])); + T3s = LD(&(ii[WS(rs, 15)]), ms, &(ii[WS(rs, 1)])); + T2j = VADD(T28, T2i); + T4N = VSUB(T28, T2i); + T7q = VADD(T4R, T4T); + T4U = VSUB(T4R, T4T); + T3v = LD(&(ri[WS(rs, 7)]), ms, &(ri[WS(rs, 1)])); + T3x = LD(&(ii[WS(rs, 7)]), ms, &(ii[WS(rs, 1)])); + T3p = VMUL(T3n, T3o); + T5A = VMUL(T3n, T3s); + T4O = VSUB(T4M, T4N); + T6A = VADD(T4M, T4N); + T2k = VADD(T22, T2j); + T7o = VSUB(T22, T2j); + T6B = VSUB(T4P, T4U); + T4V = VADD(T4P, T4U); + T7r = VSUB(T7p, T7q); + T8e = VADD(T7p, T7q); + T3w = VMUL(TP, T3v); + T3t = VFMA(T3r, T3s, T3p); + T5B = VFNMS(T3r, T3o, T5A); + T5W = VMUL(TP, T3x); + } + } + } + { + V T2t, T2q, T50, T2L, T5q, T2u, T2x, T2A, T2C; + { + V T2n, T2p, T2G, T2K, T5V, T3u, T5C, T7G, T5X, T2o, T4Z, T2H, T5D, T3F, T5p; + V T3y, T60, T7H; + T2n = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + T2p = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + T2G = LD(&(ri[WS(rs, 25)]), ms, &(ri[WS(rs, 1)])); + T2K = LD(&(ii[WS(rs, 25)]), ms, &(ii[WS(rs, 1)])); + T3y = VFMA(TR, T3x, T3w); + T5V = VSUB(T3l, T3t); + T3u = VADD(T3l, T3t); + T5C = VSUB(T5z, T5B); + T7G = VADD(T5z, T5B); + T5X = VFNMS(TR, T3v, T5W); + T2o = VMUL(T2, T2n); + T4Z = VMUL(T2, T2p); + T2H = VMUL(T2F, T2G); + T5D = VSUB(T3y, T3E); + T3F = VADD(T3y, T3E); + T5p = VMUL(T2F, T2K); + T2t = LD(&(ri[WS(rs, 17)]), ms, &(ri[WS(rs, 1)])); + T60 = VSUB(T5X, T5Z); + T7H = VADD(T5X, T5Z); + T2q = VFMA(T5, T2p, T2o); + T50 = VFNMS(T5, T2n, T4Z); + T2L = VFMA(T2J, T2K, T2H); + T5E = VSUB(T5C, T5D); + T6P = VADD(T5C, T5D); + T3G = VADD(T3u, T3F); + T7L = VSUB(T3u, T3F); + T5q = VFNMS(T2J, T2G, T5p); + T6M = VSUB(T5V, T60); + T61 = VADD(T5V, T60); + T8n = VADD(T7G, T7H); + T7I = VSUB(T7G, T7H); + T2u = VMUL(T2s, T2t); + T2x = LD(&(ii[WS(rs, 17)]), ms, &(ii[WS(rs, 1)])); + T2A = LD(&(ri[WS(rs, 9)]), ms, &(ri[WS(rs, 1)])); + T2C = LD(&(ii[WS(rs, 9)]), ms, &(ii[WS(rs, 1)])); + } + { + V T3N, T2z, T5m, T3K, T5G, T41, T5Q, T3O, T7v, T53, T2M, T54, T7w, T5r, T3R; + V T3U, T3W; + { + V T3H, T3J, T3Y, T40, T52, T2D, T5o; + T3H = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)])); + T3J = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)])); + T3Y = LD(&(ri[WS(rs, 11)]), ms, &(ri[WS(rs, 1)])); + T40 = LD(&(ii[WS(rs, 11)]), ms, &(ii[WS(rs, 1)])); + T3N = LD(&(ri[WS(rs, 19)]), ms, &(ri[WS(rs, 1)])); + { + V T2y, T51, T2B, T5n; + T2y = VFMA(T2w, T2x, T2u); + T51 = VMUL(T2s, T2x); + T2B = VMUL(T8, T2A); + T5n = VMUL(T8, T2C); + { + V T3I, T5F, T3Z, T5P; + T3I = VMUL(T3, T3H); + T5F = VMUL(T3, T3J); + T3Z = VMUL(Td, T3Y); + T5P = VMUL(Td, T40); + T2z = VADD(T2q, T2y); + T5m = VSUB(T2q, T2y); + T52 = VFNMS(T2w, T2t, T51); + T2D = VFMA(Tc, T2C, T2B); + T5o = VFNMS(Tc, T2A, T5n); + T3K = VFMA(T6, T3J, T3I); + T5G = VFNMS(T6, T3H, T5F); + T41 = VFMA(Th, T40, T3Z); + T5Q = VFNMS(Th, T3Y, T5P); + T3O = VMUL(T3M, T3N); + } + } + T7v = VADD(T50, T52); + T53 = VSUB(T50, T52); + T2M = VADD(T2D, T2L); + T54 = VSUB(T2D, T2L); + T7w = VADD(T5o, T5q); + T5r = VSUB(T5o, T5q); + T3R = LD(&(ii[WS(rs, 19)]), ms, &(ii[WS(rs, 1)])); + T3U = LD(&(ri[WS(rs, 27)]), ms, &(ri[WS(rs, 1)])); + T3W = LD(&(ii[WS(rs, 27)]), ms, &(ii[WS(rs, 1)])); + } + { + V T2O, T37, T39, T3T, T5K, T5I, T3X, T5O, T56, T38, T5g, T7M, T5J; + { + V T3S, T5H, T3V, T5N, T2P, T2Q; + T2O = LD(&(ri[WS(rs, 5)]), ms, &(ri[WS(rs, 1)])); + T55 = VSUB(T53, T54); + T6I = VADD(T53, T54); + T2N = VADD(T2z, T2M); + T7A = VSUB(T2z, T2M); + T5s = VADD(T5m, T5r); + T6F = VSUB(T5m, T5r); + T7x = VSUB(T7v, T7w); + T8i = VADD(T7v, T7w); + T3S = VFMA(T3Q, T3R, T3O); + T5H = VMUL(T3M, T3R); + T3V = VMUL(Te, T3U); + T5N = VMUL(Te, T3W); + T2P = VMUL(T29, T2O); + T2Q = LD(&(ii[WS(rs, 5)]), ms, &(ii[WS(rs, 1)])); + T37 = LD(&(ri[WS(rs, 13)]), ms, &(ri[WS(rs, 1)])); + T39 = LD(&(ii[WS(rs, 13)]), ms, &(ii[WS(rs, 1)])); + T3T = VADD(T3K, T3S); + T5K = VSUB(T3K, T3S); + T5I = VFNMS(T3Q, T3N, T5H); + T3X = VFMA(Ti, T3W, T3V); + T5O = VFNMS(Ti, T3U, T5N); + T2R = VFMA(T2b, T2Q, T2P); + T56 = VMUL(T29, T2Q); + T38 = VMUL(T1R, T37); + T5g = VMUL(T1R, T39); + } + T2U = LD(&(ri[WS(rs, 21)]), ms, &(ri[WS(rs, 1)])); + T7M = VADD(T5G, T5I); + T5J = VSUB(T5G, T5I); + { + V T42, T5M, T7N, T5R; + T42 = VADD(T3X, T41); + T5M = VSUB(T3X, T41); + T7N = VADD(T5O, T5Q); + T5R = VSUB(T5O, T5Q); + T57 = VFNMS(T2b, T2O, T56); + T3a = VFMA(T1U, T39, T38); + T5h = VFNMS(T1U, T37, T5g); + T62 = VADD(T5K, T5J); + T5L = VSUB(T5J, T5K); + T7J = VSUB(T42, T3T); + T43 = VADD(T3T, T42); + T63 = VSUB(T5M, T5R); + T5S = VADD(T5M, T5R); + T8o = VADD(T7M, T7N); + T7O = VSUB(T7M, T7N); + T2V = VMUL(T2T, T2U); + } + T2Y = LD(&(ii[WS(rs, 21)]), ms, &(ii[WS(rs, 1)])); + T32 = LD(&(ri[WS(rs, 29)]), ms, &(ri[WS(rs, 1)])); + T35 = LD(&(ii[WS(rs, 29)]), ms, &(ii[WS(rs, 1)])); + } + } + } + } + } + { + V T5t, T5c, T5u, T5j, T8Z, T90; + { + V T7e, T8T, T8y, T7h, T8U, T8c, T8J, T44, T8u, T8q, T7y, T7D, T8w, T2m, T3d; + V T8h, T8R, T8P, T8k, T8x, T8B, T8f; + { + V T1i, T8O, T8N, T2l, T3c, T8j; + { + V T8p, T5b, T30, T59, T36, T5f, TI, T1h, T8m, T5a, T7B; + TI = VADD(Tq, TH); + T7e = VSUB(Tq, TH); + T8T = VSUB(T1g, TZ); + T1h = VADD(TZ, T1g); + T8y = VADD(T8n, T8o); + T8p = VSUB(T8n, T8o); + { + V T8C, T8I, T2Z, T58, T33, T5e; + T7h = VSUB(T7f, T7g); + T8C = VADD(T7f, T7g); + T8I = VADD(T8D, T8H); + T8U = VSUB(T8H, T8D); + T2Z = VFMA(T2X, T2Y, T2V); + T58 = VMUL(T2T, T2Y); + T33 = VMUL(T31, T32); + T5e = VMUL(T31, T35); + T1i = VADD(TI, T1h); + T8c = VSUB(TI, T1h); + T8O = VSUB(T8I, T8C); + T8J = VADD(T8C, T8I); + T5b = VSUB(T2R, T2Z); + T30 = VADD(T2R, T2Z); + T59 = VFNMS(T2X, T2U, T58); + T36 = VFMA(T34, T35, T33); + T5f = VFNMS(T34, T32, T5e); + } + T44 = VADD(T3G, T43); + T8m = VSUB(T3G, T43); + T5a = VSUB(T57, T59); + T7B = VADD(T57, T59); + { + V T5d, T3b, T5i, T7C; + T5d = VSUB(T36, T3a); + T3b = VADD(T36, T3a); + T5i = VSUB(T5f, T5h); + T7C = VADD(T5f, T5h); + T8N = VSUB(T2k, T1J); + T2l = VADD(T1J, T2k); + T8u = VADD(T8m, T8p); + T8q = VSUB(T8m, T8p); + T5t = VADD(T5b, T5a); + T5c = VSUB(T5a, T5b); + T7y = VSUB(T3b, T30); + T3c = VADD(T30, T3b); + T5u = VSUB(T5d, T5i); + T5j = VADD(T5d, T5i); + T8j = VADD(T7B, T7C); + T7D = VSUB(T7B, T7C); + } + } + T8w = VSUB(T1i, T2l); + T2m = VADD(T1i, T2l); + T3d = VADD(T2N, T3c); + T8h = VSUB(T2N, T3c); + T8R = VSUB(T8O, T8N); + T8P = VADD(T8N, T8O); + T8k = VSUB(T8i, T8j); + T8x = VADD(T8i, T8j); + T8B = VADD(T8d, T8e); + T8f = VSUB(T8d, T8e); + } + { + V T7P, T7K, T7X, T7Y, T82, T7z, T7W, T7i, T8a, T86, T91, T8V, T8W, T7t, T7E; + V T81; + { + V T84, T85, T7n, T7s, T8L, T45; + T8L = VSUB(T44, T3d); + T45 = VADD(T3d, T44); + { + V T8t, T8l, T8A, T8z; + T8t = VSUB(T8k, T8h); + T8l = VADD(T8h, T8k); + T8A = VADD(T8x, T8y); + T8z = VSUB(T8x, T8y); + { + V T8M, T8K, T8s, T8g; + T8M = VSUB(T8J, T8B); + T8K = VADD(T8B, T8J); + T8s = VSUB(T8c, T8f); + T8g = VADD(T8c, T8f); + ST(&(ri[0]), VADD(T2m, T45), ms, &(ri[0])); + ST(&(ri[WS(rs, 16)]), VSUB(T2m, T45), ms, &(ri[0])); + { + V T8v, T8Q, T8S, T8r; + T8v = VSUB(T8t, T8u); + T8Q = VADD(T8t, T8u); + T8S = VSUB(T8q, T8l); + T8r = VADD(T8l, T8q); + ST(&(ri[WS(rs, 8)]), VADD(T8w, T8z), ms, &(ri[0])); + ST(&(ri[WS(rs, 24)]), VSUB(T8w, T8z), ms, &(ri[0])); + ST(&(ii[WS(rs, 24)]), VSUB(T8M, T8L), ms, &(ii[0])); + ST(&(ii[WS(rs, 8)]), VADD(T8L, T8M), ms, &(ii[0])); + ST(&(ii[WS(rs, 16)]), VSUB(T8K, T8A), ms, &(ii[0])); + ST(&(ii[0]), VADD(T8A, T8K), ms, &(ii[0])); + ST(&(ri[WS(rs, 12)]), VFMA(LDK(KP707106781), T8v, T8s), ms, &(ri[0])); + ST(&(ri[WS(rs, 28)]), VFNMS(LDK(KP707106781), T8v, T8s), ms, &(ri[0])); + ST(&(ii[WS(rs, 20)]), VFNMS(LDK(KP707106781), T8Q, T8P), ms, &(ii[0])); + ST(&(ii[WS(rs, 4)]), VFMA(LDK(KP707106781), T8Q, T8P), ms, &(ii[0])); + ST(&(ii[WS(rs, 28)]), VFNMS(LDK(KP707106781), T8S, T8R), ms, &(ii[0])); + ST(&(ii[WS(rs, 12)]), VFMA(LDK(KP707106781), T8S, T8R), ms, &(ii[0])); + ST(&(ri[WS(rs, 4)]), VFMA(LDK(KP707106781), T8r, T8g), ms, &(ri[0])); + ST(&(ri[WS(rs, 20)]), VFNMS(LDK(KP707106781), T8r, T8g), ms, &(ri[0])); + } + } + } + T7P = VSUB(T7L, T7O); + T84 = VADD(T7L, T7O); + T85 = VADD(T7I, T7J); + T7K = VSUB(T7I, T7J); + T7X = VADD(T7m, T7l); + T7n = VSUB(T7l, T7m); + T7s = VADD(T7o, T7r); + T7Y = VSUB(T7o, T7r); + T82 = VADD(T7x, T7y); + T7z = VSUB(T7x, T7y); + T7W = VADD(T7e, T7h); + T7i = VSUB(T7e, T7h); + T8a = VFMA(LDK(KP414213562), T84, T85); + T86 = VFNMS(LDK(KP414213562), T85, T84); + T91 = VSUB(T8U, T8T); + T8V = VADD(T8T, T8U); + T8W = VADD(T7n, T7s); + T7t = VSUB(T7n, T7s); + T7E = VSUB(T7A, T7D); + T81 = VADD(T7A, T7D); + } + { + V T7S, T7u, T7T, T7F, T92, T7Z, T89, T83, T7U, T7Q; + T7S = VFNMS(LDK(KP707106781), T7t, T7i); + T7u = VFMA(LDK(KP707106781), T7t, T7i); + T7T = VFNMS(LDK(KP414213562), T7z, T7E); + T7F = VFMA(LDK(KP414213562), T7E, T7z); + T92 = VSUB(T7Y, T7X); + T7Z = VADD(T7X, T7Y); + T89 = VFNMS(LDK(KP414213562), T81, T82); + T83 = VFMA(LDK(KP414213562), T82, T81); + T7U = VFMA(LDK(KP414213562), T7K, T7P); + T7Q = VFNMS(LDK(KP414213562), T7P, T7K); + { + V T8X, T95, T93, T80, T88, T87, T7V, T94, T96, T7R, T8Y, T8b; + T8Z = VFNMS(LDK(KP707106781), T8W, T8V); + T8X = VFMA(LDK(KP707106781), T8W, T8V); + T95 = VFNMS(LDK(KP707106781), T92, T91); + T93 = VFMA(LDK(KP707106781), T92, T91); + T80 = VFMA(LDK(KP707106781), T7Z, T7W); + T88 = VFNMS(LDK(KP707106781), T7Z, T7W); + T90 = VSUB(T86, T83); + T87 = VADD(T83, T86); + T7V = VADD(T7T, T7U); + T94 = VSUB(T7U, T7T); + T96 = VADD(T7F, T7Q); + T7R = VSUB(T7F, T7Q); + T8Y = VADD(T89, T8a); + T8b = VSUB(T89, T8a); + ST(&(ri[WS(rs, 2)]), VFMA(LDK(KP923879532), T87, T80), ms, &(ri[0])); + ST(&(ri[WS(rs, 18)]), VFNMS(LDK(KP923879532), T87, T80), ms, &(ri[0])); + ST(&(ri[WS(rs, 30)]), VFMA(LDK(KP923879532), T7V, T7S), ms, &(ri[0])); + ST(&(ri[WS(rs, 14)]), VFNMS(LDK(KP923879532), T7V, T7S), ms, &(ri[0])); + ST(&(ii[WS(rs, 22)]), VFNMS(LDK(KP923879532), T94, T93), ms, &(ii[0])); + ST(&(ii[WS(rs, 6)]), VFMA(LDK(KP923879532), T94, T93), ms, &(ii[0])); + ST(&(ii[WS(rs, 30)]), VFMA(LDK(KP923879532), T96, T95), ms, &(ii[0])); + ST(&(ii[WS(rs, 14)]), VFNMS(LDK(KP923879532), T96, T95), ms, &(ii[0])); + ST(&(ri[WS(rs, 6)]), VFMA(LDK(KP923879532), T7R, T7u), ms, &(ri[0])); + ST(&(ri[WS(rs, 22)]), VFNMS(LDK(KP923879532), T7R, T7u), ms, &(ri[0])); + ST(&(ii[WS(rs, 18)]), VFNMS(LDK(KP923879532), T8Y, T8X), ms, &(ii[0])); + ST(&(ii[WS(rs, 2)]), VFMA(LDK(KP923879532), T8Y, T8X), ms, &(ii[0])); + ST(&(ri[WS(rs, 26)]), VFNMS(LDK(KP923879532), T8b, T88), ms, &(ri[0])); + ST(&(ri[WS(rs, 10)]), VFMA(LDK(KP923879532), T8b, T88), ms, &(ri[0])); + } + } + } + } + { + V T6s, T9o, T9n, T6v, T6N, T6Q, T6G, T6J, T68, T4Y, T9f, T9d, T9l, T9j, T6g; + V T6o, T6q, T6m, T66, T6a, T6p, T6j, T5x, T69; + { + V T6d, T6e, T6c, T4s, T9c, T4X, T9h, T9b, T5T, T64, T5k, T5v, T9i, T6f; + { + V T4c, T4r, T4H, T4W, T99, T9a; + T6s = VSUB(T46, T4b); + T4c = VADD(T46, T4b); + T4r = VADD(T4j, T4q); + T9o = VSUB(T4q, T4j); + T6d = VFMA(LDK(KP414213562), T4z, T4G); + T4H = VFNMS(LDK(KP414213562), T4G, T4z); + T4W = VFMA(LDK(KP414213562), T4V, T4O); + T6e = VFNMS(LDK(KP414213562), T4O, T4V); + T9n = VADD(T98, T97); + T99 = VSUB(T97, T98); + T9a = VADD(T6t, T6u); + T6v = VSUB(T6t, T6u); + ST(&(ii[WS(rs, 26)]), VFNMS(LDK(KP923879532), T90, T8Z), ms, &(ii[0])); + ST(&(ii[WS(rs, 10)]), VFMA(LDK(KP923879532), T90, T8Z), ms, &(ii[0])); + T6c = VFMA(LDK(KP707106781), T4r, T4c); + T4s = VFNMS(LDK(KP707106781), T4r, T4c); + T9c = VADD(T4H, T4W); + T4X = VSUB(T4H, T4W); + T9h = VFNMS(LDK(KP707106781), T9a, T99); + T9b = VFMA(LDK(KP707106781), T9a, T99); + T6N = VSUB(T5S, T5L); + T5T = VADD(T5L, T5S); + T64 = VADD(T62, T63); + T6Q = VSUB(T62, T63); + T6G = VSUB(T5j, T5c); + T5k = VADD(T5c, T5j); + T5v = VADD(T5t, T5u); + T6J = VSUB(T5t, T5u); + } + T68 = VFNMS(LDK(KP923879532), T4X, T4s); + T4Y = VFMA(LDK(KP923879532), T4X, T4s); + T9f = VFNMS(LDK(KP923879532), T9c, T9b); + T9d = VFMA(LDK(KP923879532), T9c, T9b); + T9i = VSUB(T6e, T6d); + T6f = VADD(T6d, T6e); + { + V T6l, T5U, T6k, T65; + T6l = VFMA(LDK(KP707106781), T5T, T5E); + T5U = VFNMS(LDK(KP707106781), T5T, T5E); + T6k = VFMA(LDK(KP707106781), T64, T61); + T65 = VFNMS(LDK(KP707106781), T64, T61); + { + V T6i, T5l, T6h, T5w; + T6i = VFMA(LDK(KP707106781), T5k, T55); + T5l = VFNMS(LDK(KP707106781), T5k, T55); + T6h = VFMA(LDK(KP707106781), T5v, T5s); + T5w = VFNMS(LDK(KP707106781), T5v, T5s); + T9l = VFNMS(LDK(KP923879532), T9i, T9h); + T9j = VFMA(LDK(KP923879532), T9i, T9h); + T6g = VFMA(LDK(KP923879532), T6f, T6c); + T6o = VFNMS(LDK(KP923879532), T6f, T6c); + T6q = VFMA(LDK(KP198912367), T6k, T6l); + T6m = VFNMS(LDK(KP198912367), T6l, T6k); + T66 = VFNMS(LDK(KP668178637), T65, T5U); + T6a = VFMA(LDK(KP668178637), T5U, T65); + T6p = VFNMS(LDK(KP198912367), T6h, T6i); + T6j = VFMA(LDK(KP198912367), T6i, T6h); + T5x = VFMA(LDK(KP668178637), T5w, T5l); + T69 = VFNMS(LDK(KP668178637), T5l, T5w); + } + } + } + { + V T6Y, T6w, T9w, T6D, T9v, T9p, T9q, T71, T77, T6O, T76, T6R; + { + V T6Z, T6z, T6C, T70; + { + V T6n, T9g, T9e, T6r; + T6n = VADD(T6j, T6m); + T9g = VSUB(T6m, T6j); + T9e = VADD(T6p, T6q); + T6r = VSUB(T6p, T6q); + { + V T9k, T6b, T67, T9m; + T9k = VSUB(T6a, T69); + T6b = VADD(T69, T6a); + T67 = VSUB(T5x, T66); + T9m = VADD(T5x, T66); + ST(&(ii[WS(rs, 25)]), VFNMS(LDK(KP980785280), T9g, T9f), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 9)]), VFMA(LDK(KP980785280), T9g, T9f), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 1)]), VFMA(LDK(KP980785280), T6n, T6g), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 17)]), VFNMS(LDK(KP980785280), T6n, T6g), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 9)]), VFMA(LDK(KP980785280), T6r, T6o), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 25)]), VFNMS(LDK(KP980785280), T6r, T6o), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 17)]), VFNMS(LDK(KP980785280), T9e, T9d), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 1)]), VFMA(LDK(KP980785280), T9e, T9d), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 29)]), VFMA(LDK(KP831469612), T6b, T68), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 13)]), VFNMS(LDK(KP831469612), T6b, T68), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 21)]), VFNMS(LDK(KP831469612), T9k, T9j), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 5)]), VFMA(LDK(KP831469612), T9k, T9j), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 29)]), VFMA(LDK(KP831469612), T9m, T9l), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 13)]), VFNMS(LDK(KP831469612), T9m, T9l), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 5)]), VFMA(LDK(KP831469612), T67, T4Y), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 21)]), VFNMS(LDK(KP831469612), T67, T4Y), ms, &(ri[WS(rs, 1)])); + T6Y = VFNMS(LDK(KP707106781), T6v, T6s); + T6w = VFMA(LDK(KP707106781), T6v, T6s); + } + } + T6Z = VFNMS(LDK(KP414213562), T6x, T6y); + T6z = VFMA(LDK(KP414213562), T6y, T6x); + T6C = VFNMS(LDK(KP414213562), T6B, T6A); + T70 = VFMA(LDK(KP414213562), T6A, T6B); + T9w = VADD(T6z, T6C); + T6D = VSUB(T6z, T6C); + T9v = VFNMS(LDK(KP707106781), T9o, T9n); + T9p = VFMA(LDK(KP707106781), T9o, T9n); + T9q = VSUB(T70, T6Z); + T71 = VADD(T6Z, T70); + T77 = VFMA(LDK(KP707106781), T6N, T6M); + T6O = VFNMS(LDK(KP707106781), T6N, T6M); + T76 = VFMA(LDK(KP707106781), T6Q, T6P); + T6R = VFNMS(LDK(KP707106781), T6Q, T6P); + T6H = VFNMS(LDK(KP707106781), T6G, T6F); + T74 = VFMA(LDK(KP707106781), T6G, T6F); + } + T6U = VFNMS(LDK(KP923879532), T6D, T6w); + T6E = VFMA(LDK(KP923879532), T6D, T6w); + T9r = VFMA(LDK(KP923879532), T9q, T9p); + T9t = VFNMS(LDK(KP923879532), T9q, T9p); + T78 = VFNMS(LDK(KP198912367), T77, T76); + T7c = VFMA(LDK(KP198912367), T76, T77); + T6W = VFMA(LDK(KP668178637), T6O, T6R); + T6S = VFNMS(LDK(KP668178637), T6R, T6O); + T73 = VFMA(LDK(KP707106781), T6J, T6I); + T6K = VFNMS(LDK(KP707106781), T6J, T6I); + T7a = VFMA(LDK(KP923879532), T71, T6Y); + T72 = VFNMS(LDK(KP923879532), T71, T6Y); + T9x = VFNMS(LDK(KP923879532), T9w, T9v); + T9z = VFMA(LDK(KP923879532), T9w, T9v); + } + } + } + } + } + { + V T7b, T75, T6L, T6V; + T7b = VFNMS(LDK(KP198912367), T73, T74); + T75 = VFMA(LDK(KP198912367), T74, T73); + T6L = VFMA(LDK(KP668178637), T6K, T6H); + T6V = VFNMS(LDK(KP668178637), T6H, T6K); + { + V T79, T9A, T9y, T7d; + T79 = VSUB(T75, T78); + T9A = VADD(T75, T78); + T9y = VSUB(T7c, T7b); + T7d = VADD(T7b, T7c); + { + V T9s, T6X, T6T, T9u; + T9s = VADD(T6V, T6W); + T6X = VSUB(T6V, T6W); + T6T = VADD(T6L, T6S); + T9u = VSUB(T6S, T6L); + ST(&(ii[WS(rs, 31)]), VFMA(LDK(KP980785280), T9A, T9z), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 15)]), VFNMS(LDK(KP980785280), T9A, T9z), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 7)]), VFMA(LDK(KP980785280), T79, T72), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 23)]), VFNMS(LDK(KP980785280), T79, T72), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 31)]), VFMA(LDK(KP980785280), T7d, T7a), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 15)]), VFNMS(LDK(KP980785280), T7d, T7a), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 23)]), VFNMS(LDK(KP980785280), T9y, T9x), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 7)]), VFMA(LDK(KP980785280), T9y, T9x), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 11)]), VFMA(LDK(KP831469612), T6X, T6U), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 27)]), VFNMS(LDK(KP831469612), T6X, T6U), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 19)]), VFNMS(LDK(KP831469612), T9s, T9r), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 3)]), VFMA(LDK(KP831469612), T9s, T9r), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 27)]), VFNMS(LDK(KP831469612), T9u, T9t), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 11)]), VFMA(LDK(KP831469612), T9u, T9t), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 3)]), VFMA(LDK(KP831469612), T6T, T6E), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 19)]), VFNMS(LDK(KP831469612), T6T, T6E), ms, &(ri[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 27), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 32, XSIMD_STRING("t2sv_32"), twinstr, &GENUS, {236, 98, 252, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2sv_32) (planner *p) { + X(kdft_dit_register) (p, t2sv_32, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -n 32 -name t2sv_32 -include ts.h */ + +/* + * This function contains 488 FP additions, 280 FP multiplications, + * (or, 376 additions, 168 multiplications, 112 fused multiply/add), + * 158 stack variables, 7 constants, and 128 memory accesses + */ +#include "ts.h" + +static void t2sv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 8); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 8), MAKE_VOLATILE_STRIDE(64, rs)) { + V T2, T5, T3, T6, T8, TM, TO, Td, T9, Te, Th, Tl, TD, TH, T1y; + V T1H, T15, T1A, T11, T1F, T1n, T1p, T2q, T2I, T2u, T2K, T2V, T3b, T2Z, T3d; + V Tu, Ty, T3l, T3n, T1t, T1v, T2f, T2h, T1a, T1e, T32, T34, T1W, T1Y, T2C; + V T2E, Tg, TR, Tk, TS, Tm, TV, To, TT, T1M, T21, T1P, T22, T1Q, T25; + V T1S, T23; + { + V Ts, T1d, Tx, T18, Tt, T1c, Tw, T19, TB, T14, TG, TZ, TC, T13, TF; + V T10; + { + V T4, Tc, T7, Tb; + T2 = LDW(&(W[0])); + T5 = LDW(&(W[TWVL * 1])); + T3 = LDW(&(W[TWVL * 2])); + T6 = LDW(&(W[TWVL * 3])); + T4 = VMUL(T2, T3); + Tc = VMUL(T5, T3); + T7 = VMUL(T5, T6); + Tb = VMUL(T2, T6); + T8 = VADD(T4, T7); + TM = VSUB(T4, T7); + TO = VADD(Tb, Tc); + Td = VSUB(Tb, Tc); + T9 = LDW(&(W[TWVL * 4])); + Ts = VMUL(T2, T9); + T1d = VMUL(T6, T9); + Tx = VMUL(T5, T9); + T18 = VMUL(T3, T9); + Te = LDW(&(W[TWVL * 5])); + Tt = VMUL(T5, Te); + T1c = VMUL(T3, Te); + Tw = VMUL(T2, Te); + T19 = VMUL(T6, Te); + Th = LDW(&(W[TWVL * 6])); + TB = VMUL(T3, Th); + T14 = VMUL(T5, Th); + TG = VMUL(T6, Th); + TZ = VMUL(T2, Th); + Tl = LDW(&(W[TWVL * 7])); + TC = VMUL(T6, Tl); + T13 = VMUL(T2, Tl); + TF = VMUL(T3, Tl); + T10 = VMUL(T5, Tl); + } + TD = VADD(TB, TC); + TH = VSUB(TF, TG); + T1y = VADD(TZ, T10); + T1H = VADD(TF, TG); + T15 = VADD(T13, T14); + T1A = VSUB(T13, T14); + T11 = VSUB(TZ, T10); + T1F = VSUB(TB, TC); + T1n = VFMA(T9, Th, VMUL(Te, Tl)); + T1p = VFNMS(Te, Th, VMUL(T9, Tl)); + { + V T2o, T2p, T2s, T2t; + T2o = VMUL(T8, Th); + T2p = VMUL(Td, Tl); + T2q = VADD(T2o, T2p); + T2I = VSUB(T2o, T2p); + T2s = VMUL(T8, Tl); + T2t = VMUL(Td, Th); + T2u = VSUB(T2s, T2t); + T2K = VADD(T2s, T2t); + } + { + V T2T, T2U, T2X, T2Y; + T2T = VMUL(TM, Th); + T2U = VMUL(TO, Tl); + T2V = VSUB(T2T, T2U); + T3b = VADD(T2T, T2U); + T2X = VMUL(TM, Tl); + T2Y = VMUL(TO, Th); + T2Z = VADD(T2X, T2Y); + T3d = VSUB(T2X, T2Y); + Tu = VADD(Ts, Tt); + Ty = VSUB(Tw, Tx); + T3l = VFMA(Tu, Th, VMUL(Ty, Tl)); + T3n = VFNMS(Ty, Th, VMUL(Tu, Tl)); + } + T1t = VSUB(Ts, Tt); + T1v = VADD(Tw, Tx); + T2f = VFMA(T1t, Th, VMUL(T1v, Tl)); + T2h = VFNMS(T1v, Th, VMUL(T1t, Tl)); + T1a = VSUB(T18, T19); + T1e = VADD(T1c, T1d); + T32 = VFMA(T1a, Th, VMUL(T1e, Tl)); + T34 = VFNMS(T1e, Th, VMUL(T1a, Tl)); + T1W = VADD(T18, T19); + T1Y = VSUB(T1c, T1d); + T2C = VFMA(T1W, Th, VMUL(T1Y, Tl)); + T2E = VFNMS(T1Y, Th, VMUL(T1W, Tl)); + { + V Ta, Tf, Ti, Tj; + Ta = VMUL(T8, T9); + Tf = VMUL(Td, Te); + Tg = VSUB(Ta, Tf); + TR = VADD(Ta, Tf); + Ti = VMUL(T8, Te); + Tj = VMUL(Td, T9); + Tk = VADD(Ti, Tj); + TS = VSUB(Ti, Tj); + } + Tm = VFMA(Tg, Th, VMUL(Tk, Tl)); + TV = VFNMS(TS, Th, VMUL(TR, Tl)); + To = VFNMS(Tk, Th, VMUL(Tg, Tl)); + TT = VFMA(TR, Th, VMUL(TS, Tl)); + { + V T1K, T1L, T1N, T1O; + T1K = VMUL(TM, T9); + T1L = VMUL(TO, Te); + T1M = VSUB(T1K, T1L); + T21 = VADD(T1K, T1L); + T1N = VMUL(TM, Te); + T1O = VMUL(TO, T9); + T1P = VADD(T1N, T1O); + T22 = VSUB(T1N, T1O); + } + T1Q = VFMA(T1M, Th, VMUL(T1P, Tl)); + T25 = VFNMS(T22, Th, VMUL(T21, Tl)); + T1S = VFNMS(T1P, Th, VMUL(T1M, Tl)); + T23 = VFMA(T21, Th, VMUL(T22, Tl)); + } + { + V TL, T6f, T8c, T8q, T3F, T5t, T7I, T7W, T2y, T6B, T6y, T7j, T4k, T5J, T4B; + V T5G, T3h, T6H, T6O, T7o, T4L, T5N, T52, T5Q, T1i, T7V, T6i, T7D, T3K, T5u; + V T3P, T5v, T1E, T6n, T6m, T7e, T3W, T5y, T41, T5z, T29, T6p, T6s, T7f, T47; + V T5B, T4c, T5C, T2R, T6z, T6E, T7k, T4v, T5H, T4E, T5K, T3y, T6P, T6K, T7p; + V T4W, T5R, T55, T5O; + { + V T1, T7G, Tq, T7F, TA, T3C, TJ, T3D, Tn, Tp; + T1 = LD(&(ri[0]), ms, &(ri[0])); + T7G = LD(&(ii[0]), ms, &(ii[0])); + Tn = LD(&(ri[WS(rs, 16)]), ms, &(ri[0])); + Tp = LD(&(ii[WS(rs, 16)]), ms, &(ii[0])); + Tq = VFMA(Tm, Tn, VMUL(To, Tp)); + T7F = VFNMS(To, Tn, VMUL(Tm, Tp)); + { + V Tv, Tz, TE, TI; + Tv = LD(&(ri[WS(rs, 8)]), ms, &(ri[0])); + Tz = LD(&(ii[WS(rs, 8)]), ms, &(ii[0])); + TA = VFMA(Tu, Tv, VMUL(Ty, Tz)); + T3C = VFNMS(Ty, Tv, VMUL(Tu, Tz)); + TE = LD(&(ri[WS(rs, 24)]), ms, &(ri[0])); + TI = LD(&(ii[WS(rs, 24)]), ms, &(ii[0])); + TJ = VFMA(TD, TE, VMUL(TH, TI)); + T3D = VFNMS(TH, TE, VMUL(TD, TI)); + } + { + V Tr, TK, T8a, T8b; + Tr = VADD(T1, Tq); + TK = VADD(TA, TJ); + TL = VADD(Tr, TK); + T6f = VSUB(Tr, TK); + T8a = VSUB(T7G, T7F); + T8b = VSUB(TA, TJ); + T8c = VSUB(T8a, T8b); + T8q = VADD(T8b, T8a); + } + { + V T3B, T3E, T7E, T7H; + T3B = VSUB(T1, Tq); + T3E = VSUB(T3C, T3D); + T3F = VSUB(T3B, T3E); + T5t = VADD(T3B, T3E); + T7E = VADD(T3C, T3D); + T7H = VADD(T7F, T7G); + T7I = VADD(T7E, T7H); + T7W = VSUB(T7H, T7E); + } + } + { + V T2e, T4g, T2w, T4z, T2j, T4h, T2n, T4y; + { + V T2c, T2d, T2r, T2v; + T2c = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + T2d = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + T2e = VFMA(T2, T2c, VMUL(T5, T2d)); + T4g = VFNMS(T5, T2c, VMUL(T2, T2d)); + T2r = LD(&(ri[WS(rs, 25)]), ms, &(ri[WS(rs, 1)])); + T2v = LD(&(ii[WS(rs, 25)]), ms, &(ii[WS(rs, 1)])); + T2w = VFMA(T2q, T2r, VMUL(T2u, T2v)); + T4z = VFNMS(T2u, T2r, VMUL(T2q, T2v)); + } + { + V T2g, T2i, T2l, T2m; + T2g = LD(&(ri[WS(rs, 17)]), ms, &(ri[WS(rs, 1)])); + T2i = LD(&(ii[WS(rs, 17)]), ms, &(ii[WS(rs, 1)])); + T2j = VFMA(T2f, T2g, VMUL(T2h, T2i)); + T4h = VFNMS(T2h, T2g, VMUL(T2f, T2i)); + T2l = LD(&(ri[WS(rs, 9)]), ms, &(ri[WS(rs, 1)])); + T2m = LD(&(ii[WS(rs, 9)]), ms, &(ii[WS(rs, 1)])); + T2n = VFMA(T9, T2l, VMUL(Te, T2m)); + T4y = VFNMS(Te, T2l, VMUL(T9, T2m)); + } + { + V T2k, T2x, T6w, T6x; + T2k = VADD(T2e, T2j); + T2x = VADD(T2n, T2w); + T2y = VADD(T2k, T2x); + T6B = VSUB(T2k, T2x); + T6w = VADD(T4g, T4h); + T6x = VADD(T4y, T4z); + T6y = VSUB(T6w, T6x); + T7j = VADD(T6w, T6x); + } + { + V T4i, T4j, T4x, T4A; + T4i = VSUB(T4g, T4h); + T4j = VSUB(T2n, T2w); + T4k = VADD(T4i, T4j); + T5J = VSUB(T4i, T4j); + T4x = VSUB(T2e, T2j); + T4A = VSUB(T4y, T4z); + T4B = VSUB(T4x, T4A); + T5G = VADD(T4x, T4A); + } + } + { + V T31, T4Y, T3f, T4J, T36, T4Z, T3a, T4I; + { + V T2W, T30, T3c, T3e; + T2W = LD(&(ri[WS(rs, 31)]), ms, &(ri[WS(rs, 1)])); + T30 = LD(&(ii[WS(rs, 31)]), ms, &(ii[WS(rs, 1)])); + T31 = VFMA(T2V, T2W, VMUL(T2Z, T30)); + T4Y = VFNMS(T2Z, T2W, VMUL(T2V, T30)); + T3c = LD(&(ri[WS(rs, 23)]), ms, &(ri[WS(rs, 1)])); + T3e = LD(&(ii[WS(rs, 23)]), ms, &(ii[WS(rs, 1)])); + T3f = VFMA(T3b, T3c, VMUL(T3d, T3e)); + T4J = VFNMS(T3d, T3c, VMUL(T3b, T3e)); + } + { + V T33, T35, T38, T39; + T33 = LD(&(ri[WS(rs, 15)]), ms, &(ri[WS(rs, 1)])); + T35 = LD(&(ii[WS(rs, 15)]), ms, &(ii[WS(rs, 1)])); + T36 = VFMA(T32, T33, VMUL(T34, T35)); + T4Z = VFNMS(T34, T33, VMUL(T32, T35)); + T38 = LD(&(ri[WS(rs, 7)]), ms, &(ri[WS(rs, 1)])); + T39 = LD(&(ii[WS(rs, 7)]), ms, &(ii[WS(rs, 1)])); + T3a = VFMA(TR, T38, VMUL(TS, T39)); + T4I = VFNMS(TS, T38, VMUL(TR, T39)); + } + { + V T37, T3g, T6M, T6N; + T37 = VADD(T31, T36); + T3g = VADD(T3a, T3f); + T3h = VADD(T37, T3g); + T6H = VSUB(T37, T3g); + T6M = VADD(T4Y, T4Z); + T6N = VADD(T4I, T4J); + T6O = VSUB(T6M, T6N); + T7o = VADD(T6M, T6N); + } + { + V T4H, T4K, T50, T51; + T4H = VSUB(T31, T36); + T4K = VSUB(T4I, T4J); + T4L = VSUB(T4H, T4K); + T5N = VADD(T4H, T4K); + T50 = VSUB(T4Y, T4Z); + T51 = VSUB(T3a, T3f); + T52 = VADD(T50, T51); + T5Q = VSUB(T50, T51); + } + } + { + V TQ, T3G, T1g, T3N, TX, T3H, T17, T3M; + { + V TN, TP, T1b, T1f; + TN = LD(&(ri[WS(rs, 4)]), ms, &(ri[0])); + TP = LD(&(ii[WS(rs, 4)]), ms, &(ii[0])); + TQ = VFMA(TM, TN, VMUL(TO, TP)); + T3G = VFNMS(TO, TN, VMUL(TM, TP)); + T1b = LD(&(ri[WS(rs, 12)]), ms, &(ri[0])); + T1f = LD(&(ii[WS(rs, 12)]), ms, &(ii[0])); + T1g = VFMA(T1a, T1b, VMUL(T1e, T1f)); + T3N = VFNMS(T1e, T1b, VMUL(T1a, T1f)); + } + { + V TU, TW, T12, T16; + TU = LD(&(ri[WS(rs, 20)]), ms, &(ri[0])); + TW = LD(&(ii[WS(rs, 20)]), ms, &(ii[0])); + TX = VFMA(TT, TU, VMUL(TV, TW)); + T3H = VFNMS(TV, TU, VMUL(TT, TW)); + T12 = LD(&(ri[WS(rs, 28)]), ms, &(ri[0])); + T16 = LD(&(ii[WS(rs, 28)]), ms, &(ii[0])); + T17 = VFMA(T11, T12, VMUL(T15, T16)); + T3M = VFNMS(T15, T12, VMUL(T11, T16)); + } + { + V TY, T1h, T6g, T6h; + TY = VADD(TQ, TX); + T1h = VADD(T17, T1g); + T1i = VADD(TY, T1h); + T7V = VSUB(T1h, TY); + T6g = VADD(T3G, T3H); + T6h = VADD(T3M, T3N); + T6i = VSUB(T6g, T6h); + T7D = VADD(T6g, T6h); + } + { + V T3I, T3J, T3L, T3O; + T3I = VSUB(T3G, T3H); + T3J = VSUB(TQ, TX); + T3K = VSUB(T3I, T3J); + T5u = VADD(T3J, T3I); + T3L = VSUB(T17, T1g); + T3O = VSUB(T3M, T3N); + T3P = VADD(T3L, T3O); + T5v = VSUB(T3L, T3O); + } + } + { + V T1m, T3S, T1C, T3Z, T1r, T3T, T1x, T3Y; + { + V T1k, T1l, T1z, T1B; + T1k = LD(&(ri[WS(rs, 2)]), ms, &(ri[0])); + T1l = LD(&(ii[WS(rs, 2)]), ms, &(ii[0])); + T1m = VFMA(T8, T1k, VMUL(Td, T1l)); + T3S = VFNMS(Td, T1k, VMUL(T8, T1l)); + T1z = LD(&(ri[WS(rs, 26)]), ms, &(ri[0])); + T1B = LD(&(ii[WS(rs, 26)]), ms, &(ii[0])); + T1C = VFMA(T1y, T1z, VMUL(T1A, T1B)); + T3Z = VFNMS(T1A, T1z, VMUL(T1y, T1B)); + } + { + V T1o, T1q, T1u, T1w; + T1o = LD(&(ri[WS(rs, 18)]), ms, &(ri[0])); + T1q = LD(&(ii[WS(rs, 18)]), ms, &(ii[0])); + T1r = VFMA(T1n, T1o, VMUL(T1p, T1q)); + T3T = VFNMS(T1p, T1o, VMUL(T1n, T1q)); + T1u = LD(&(ri[WS(rs, 10)]), ms, &(ri[0])); + T1w = LD(&(ii[WS(rs, 10)]), ms, &(ii[0])); + T1x = VFMA(T1t, T1u, VMUL(T1v, T1w)); + T3Y = VFNMS(T1v, T1u, VMUL(T1t, T1w)); + } + { + V T1s, T1D, T6k, T6l; + T1s = VADD(T1m, T1r); + T1D = VADD(T1x, T1C); + T1E = VADD(T1s, T1D); + T6n = VSUB(T1s, T1D); + T6k = VADD(T3S, T3T); + T6l = VADD(T3Y, T3Z); + T6m = VSUB(T6k, T6l); + T7e = VADD(T6k, T6l); + } + { + V T3U, T3V, T3X, T40; + T3U = VSUB(T3S, T3T); + T3V = VSUB(T1x, T1C); + T3W = VADD(T3U, T3V); + T5y = VSUB(T3U, T3V); + T3X = VSUB(T1m, T1r); + T40 = VSUB(T3Y, T3Z); + T41 = VSUB(T3X, T40); + T5z = VADD(T3X, T40); + } + } + { + V T1J, T43, T27, T4a, T1U, T44, T20, T49; + { + V T1G, T1I, T24, T26; + T1G = LD(&(ri[WS(rs, 30)]), ms, &(ri[0])); + T1I = LD(&(ii[WS(rs, 30)]), ms, &(ii[0])); + T1J = VFMA(T1F, T1G, VMUL(T1H, T1I)); + T43 = VFNMS(T1H, T1G, VMUL(T1F, T1I)); + T24 = LD(&(ri[WS(rs, 22)]), ms, &(ri[0])); + T26 = LD(&(ii[WS(rs, 22)]), ms, &(ii[0])); + T27 = VFMA(T23, T24, VMUL(T25, T26)); + T4a = VFNMS(T25, T24, VMUL(T23, T26)); + } + { + V T1R, T1T, T1X, T1Z; + T1R = LD(&(ri[WS(rs, 14)]), ms, &(ri[0])); + T1T = LD(&(ii[WS(rs, 14)]), ms, &(ii[0])); + T1U = VFMA(T1Q, T1R, VMUL(T1S, T1T)); + T44 = VFNMS(T1S, T1R, VMUL(T1Q, T1T)); + T1X = LD(&(ri[WS(rs, 6)]), ms, &(ri[0])); + T1Z = LD(&(ii[WS(rs, 6)]), ms, &(ii[0])); + T20 = VFMA(T1W, T1X, VMUL(T1Y, T1Z)); + T49 = VFNMS(T1Y, T1X, VMUL(T1W, T1Z)); + } + { + V T1V, T28, T6q, T6r; + T1V = VADD(T1J, T1U); + T28 = VADD(T20, T27); + T29 = VADD(T1V, T28); + T6p = VSUB(T1V, T28); + T6q = VADD(T43, T44); + T6r = VADD(T49, T4a); + T6s = VSUB(T6q, T6r); + T7f = VADD(T6q, T6r); + } + { + V T45, T46, T48, T4b; + T45 = VSUB(T43, T44); + T46 = VSUB(T20, T27); + T47 = VADD(T45, T46); + T5B = VSUB(T45, T46); + T48 = VSUB(T1J, T1U); + T4b = VSUB(T49, T4a); + T4c = VSUB(T48, T4b); + T5C = VADD(T48, T4b); + } + } + { + V T2B, T4r, T2G, T4s, T4q, T4t, T2M, T4m, T2P, T4n, T4l, T4o; + { + V T2z, T2A, T2D, T2F; + T2z = LD(&(ri[WS(rs, 5)]), ms, &(ri[WS(rs, 1)])); + T2A = LD(&(ii[WS(rs, 5)]), ms, &(ii[WS(rs, 1)])); + T2B = VFMA(T21, T2z, VMUL(T22, T2A)); + T4r = VFNMS(T22, T2z, VMUL(T21, T2A)); + T2D = LD(&(ri[WS(rs, 21)]), ms, &(ri[WS(rs, 1)])); + T2F = LD(&(ii[WS(rs, 21)]), ms, &(ii[WS(rs, 1)])); + T2G = VFMA(T2C, T2D, VMUL(T2E, T2F)); + T4s = VFNMS(T2E, T2D, VMUL(T2C, T2F)); + } + T4q = VSUB(T2B, T2G); + T4t = VSUB(T4r, T4s); + { + V T2J, T2L, T2N, T2O; + T2J = LD(&(ri[WS(rs, 29)]), ms, &(ri[WS(rs, 1)])); + T2L = LD(&(ii[WS(rs, 29)]), ms, &(ii[WS(rs, 1)])); + T2M = VFMA(T2I, T2J, VMUL(T2K, T2L)); + T4m = VFNMS(T2K, T2J, VMUL(T2I, T2L)); + T2N = LD(&(ri[WS(rs, 13)]), ms, &(ri[WS(rs, 1)])); + T2O = LD(&(ii[WS(rs, 13)]), ms, &(ii[WS(rs, 1)])); + T2P = VFMA(T1M, T2N, VMUL(T1P, T2O)); + T4n = VFNMS(T1P, T2N, VMUL(T1M, T2O)); + } + T4l = VSUB(T2M, T2P); + T4o = VSUB(T4m, T4n); + { + V T2H, T2Q, T6C, T6D; + T2H = VADD(T2B, T2G); + T2Q = VADD(T2M, T2P); + T2R = VADD(T2H, T2Q); + T6z = VSUB(T2Q, T2H); + T6C = VADD(T4r, T4s); + T6D = VADD(T4m, T4n); + T6E = VSUB(T6C, T6D); + T7k = VADD(T6C, T6D); + } + { + V T4p, T4u, T4C, T4D; + T4p = VSUB(T4l, T4o); + T4u = VADD(T4q, T4t); + T4v = VMUL(LDK(KP707106781), VSUB(T4p, T4u)); + T5H = VMUL(LDK(KP707106781), VADD(T4u, T4p)); + T4C = VSUB(T4t, T4q); + T4D = VADD(T4l, T4o); + T4E = VMUL(LDK(KP707106781), VSUB(T4C, T4D)); + T5K = VMUL(LDK(KP707106781), VADD(T4C, T4D)); + } + } + { + V T3k, T4M, T3p, T4N, T4O, T4P, T3t, T4S, T3w, T4T, T4R, T4U; + { + V T3i, T3j, T3m, T3o; + T3i = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)])); + T3j = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)])); + T3k = VFMA(T3, T3i, VMUL(T6, T3j)); + T4M = VFNMS(T6, T3i, VMUL(T3, T3j)); + T3m = LD(&(ri[WS(rs, 19)]), ms, &(ri[WS(rs, 1)])); + T3o = LD(&(ii[WS(rs, 19)]), ms, &(ii[WS(rs, 1)])); + T3p = VFMA(T3l, T3m, VMUL(T3n, T3o)); + T4N = VFNMS(T3n, T3m, VMUL(T3l, T3o)); + } + T4O = VSUB(T4M, T4N); + T4P = VSUB(T3k, T3p); + { + V T3r, T3s, T3u, T3v; + T3r = LD(&(ri[WS(rs, 27)]), ms, &(ri[WS(rs, 1)])); + T3s = LD(&(ii[WS(rs, 27)]), ms, &(ii[WS(rs, 1)])); + T3t = VFMA(Th, T3r, VMUL(Tl, T3s)); + T4S = VFNMS(Tl, T3r, VMUL(Th, T3s)); + T3u = LD(&(ri[WS(rs, 11)]), ms, &(ri[WS(rs, 1)])); + T3v = LD(&(ii[WS(rs, 11)]), ms, &(ii[WS(rs, 1)])); + T3w = VFMA(Tg, T3u, VMUL(Tk, T3v)); + T4T = VFNMS(Tk, T3u, VMUL(Tg, T3v)); + } + T4R = VSUB(T3t, T3w); + T4U = VSUB(T4S, T4T); + { + V T3q, T3x, T6I, T6J; + T3q = VADD(T3k, T3p); + T3x = VADD(T3t, T3w); + T3y = VADD(T3q, T3x); + T6P = VSUB(T3x, T3q); + T6I = VADD(T4M, T4N); + T6J = VADD(T4S, T4T); + T6K = VSUB(T6I, T6J); + T7p = VADD(T6I, T6J); + } + { + V T4Q, T4V, T53, T54; + T4Q = VSUB(T4O, T4P); + T4V = VADD(T4R, T4U); + T4W = VMUL(LDK(KP707106781), VSUB(T4Q, T4V)); + T5R = VMUL(LDK(KP707106781), VADD(T4Q, T4V)); + T53 = VSUB(T4R, T4U); + T54 = VADD(T4P, T4O); + T55 = VMUL(LDK(KP707106781), VSUB(T53, T54)); + T5O = VMUL(LDK(KP707106781), VADD(T54, T53)); + } + } + { + V T2b, T7x, T7K, T7M, T3A, T7L, T7A, T7B; + { + V T1j, T2a, T7C, T7J; + T1j = VADD(TL, T1i); + T2a = VADD(T1E, T29); + T2b = VADD(T1j, T2a); + T7x = VSUB(T1j, T2a); + T7C = VADD(T7e, T7f); + T7J = VADD(T7D, T7I); + T7K = VADD(T7C, T7J); + T7M = VSUB(T7J, T7C); + } + { + V T2S, T3z, T7y, T7z; + T2S = VADD(T2y, T2R); + T3z = VADD(T3h, T3y); + T3A = VADD(T2S, T3z); + T7L = VSUB(T3z, T2S); + T7y = VADD(T7j, T7k); + T7z = VADD(T7o, T7p); + T7A = VSUB(T7y, T7z); + T7B = VADD(T7y, T7z); + } + ST(&(ri[WS(rs, 16)]), VSUB(T2b, T3A), ms, &(ri[0])); + ST(&(ii[WS(rs, 16)]), VSUB(T7K, T7B), ms, &(ii[0])); + ST(&(ri[0]), VADD(T2b, T3A), ms, &(ri[0])); + ST(&(ii[0]), VADD(T7B, T7K), ms, &(ii[0])); + ST(&(ri[WS(rs, 24)]), VSUB(T7x, T7A), ms, &(ri[0])); + ST(&(ii[WS(rs, 24)]), VSUB(T7M, T7L), ms, &(ii[0])); + ST(&(ri[WS(rs, 8)]), VADD(T7x, T7A), ms, &(ri[0])); + ST(&(ii[WS(rs, 8)]), VADD(T7L, T7M), ms, &(ii[0])); + } + { + V T7h, T7t, T7Q, T7S, T7m, T7u, T7r, T7v; + { + V T7d, T7g, T7O, T7P; + T7d = VSUB(TL, T1i); + T7g = VSUB(T7e, T7f); + T7h = VADD(T7d, T7g); + T7t = VSUB(T7d, T7g); + T7O = VSUB(T29, T1E); + T7P = VSUB(T7I, T7D); + T7Q = VADD(T7O, T7P); + T7S = VSUB(T7P, T7O); + } + { + V T7i, T7l, T7n, T7q; + T7i = VSUB(T2y, T2R); + T7l = VSUB(T7j, T7k); + T7m = VADD(T7i, T7l); + T7u = VSUB(T7l, T7i); + T7n = VSUB(T3h, T3y); + T7q = VSUB(T7o, T7p); + T7r = VSUB(T7n, T7q); + T7v = VADD(T7n, T7q); + } + { + V T7s, T7N, T7w, T7R; + T7s = VMUL(LDK(KP707106781), VADD(T7m, T7r)); + ST(&(ri[WS(rs, 20)]), VSUB(T7h, T7s), ms, &(ri[0])); + ST(&(ri[WS(rs, 4)]), VADD(T7h, T7s), ms, &(ri[0])); + T7N = VMUL(LDK(KP707106781), VADD(T7u, T7v)); + ST(&(ii[WS(rs, 4)]), VADD(T7N, T7Q), ms, &(ii[0])); + ST(&(ii[WS(rs, 20)]), VSUB(T7Q, T7N), ms, &(ii[0])); + T7w = VMUL(LDK(KP707106781), VSUB(T7u, T7v)); + ST(&(ri[WS(rs, 28)]), VSUB(T7t, T7w), ms, &(ri[0])); + ST(&(ri[WS(rs, 12)]), VADD(T7t, T7w), ms, &(ri[0])); + T7R = VMUL(LDK(KP707106781), VSUB(T7r, T7m)); + ST(&(ii[WS(rs, 12)]), VADD(T7R, T7S), ms, &(ii[0])); + ST(&(ii[WS(rs, 28)]), VSUB(T7S, T7R), ms, &(ii[0])); + } + } + { + V T6j, T7X, T83, T6X, T6u, T7U, T77, T7b, T70, T82, T6G, T6U, T74, T7a, T6R; + V T6V; + { + V T6o, T6t, T6A, T6F; + T6j = VSUB(T6f, T6i); + T7X = VADD(T7V, T7W); + T83 = VSUB(T7W, T7V); + T6X = VADD(T6f, T6i); + T6o = VSUB(T6m, T6n); + T6t = VADD(T6p, T6s); + T6u = VMUL(LDK(KP707106781), VSUB(T6o, T6t)); + T7U = VMUL(LDK(KP707106781), VADD(T6o, T6t)); + { + V T75, T76, T6Y, T6Z; + T75 = VADD(T6H, T6K); + T76 = VADD(T6O, T6P); + T77 = VFNMS(LDK(KP382683432), T76, VMUL(LDK(KP923879532), T75)); + T7b = VFMA(LDK(KP923879532), T76, VMUL(LDK(KP382683432), T75)); + T6Y = VADD(T6n, T6m); + T6Z = VSUB(T6p, T6s); + T70 = VMUL(LDK(KP707106781), VADD(T6Y, T6Z)); + T82 = VMUL(LDK(KP707106781), VSUB(T6Z, T6Y)); + } + T6A = VSUB(T6y, T6z); + T6F = VSUB(T6B, T6E); + T6G = VFMA(LDK(KP923879532), T6A, VMUL(LDK(KP382683432), T6F)); + T6U = VFNMS(LDK(KP923879532), T6F, VMUL(LDK(KP382683432), T6A)); + { + V T72, T73, T6L, T6Q; + T72 = VADD(T6y, T6z); + T73 = VADD(T6B, T6E); + T74 = VFMA(LDK(KP382683432), T72, VMUL(LDK(KP923879532), T73)); + T7a = VFNMS(LDK(KP382683432), T73, VMUL(LDK(KP923879532), T72)); + T6L = VSUB(T6H, T6K); + T6Q = VSUB(T6O, T6P); + T6R = VFNMS(LDK(KP923879532), T6Q, VMUL(LDK(KP382683432), T6L)); + T6V = VFMA(LDK(KP382683432), T6Q, VMUL(LDK(KP923879532), T6L)); + } + } + { + V T6v, T6S, T81, T84; + T6v = VADD(T6j, T6u); + T6S = VADD(T6G, T6R); + ST(&(ri[WS(rs, 22)]), VSUB(T6v, T6S), ms, &(ri[0])); + ST(&(ri[WS(rs, 6)]), VADD(T6v, T6S), ms, &(ri[0])); + T81 = VADD(T6U, T6V); + T84 = VADD(T82, T83); + ST(&(ii[WS(rs, 6)]), VADD(T81, T84), ms, &(ii[0])); + ST(&(ii[WS(rs, 22)]), VSUB(T84, T81), ms, &(ii[0])); + } + { + V T6T, T6W, T85, T86; + T6T = VSUB(T6j, T6u); + T6W = VSUB(T6U, T6V); + ST(&(ri[WS(rs, 30)]), VSUB(T6T, T6W), ms, &(ri[0])); + ST(&(ri[WS(rs, 14)]), VADD(T6T, T6W), ms, &(ri[0])); + T85 = VSUB(T6R, T6G); + T86 = VSUB(T83, T82); + ST(&(ii[WS(rs, 14)]), VADD(T85, T86), ms, &(ii[0])); + ST(&(ii[WS(rs, 30)]), VSUB(T86, T85), ms, &(ii[0])); + } + { + V T71, T78, T7T, T7Y; + T71 = VADD(T6X, T70); + T78 = VADD(T74, T77); + ST(&(ri[WS(rs, 18)]), VSUB(T71, T78), ms, &(ri[0])); + ST(&(ri[WS(rs, 2)]), VADD(T71, T78), ms, &(ri[0])); + T7T = VADD(T7a, T7b); + T7Y = VADD(T7U, T7X); + ST(&(ii[WS(rs, 2)]), VADD(T7T, T7Y), ms, &(ii[0])); + ST(&(ii[WS(rs, 18)]), VSUB(T7Y, T7T), ms, &(ii[0])); + } + { + V T79, T7c, T7Z, T80; + T79 = VSUB(T6X, T70); + T7c = VSUB(T7a, T7b); + ST(&(ri[WS(rs, 26)]), VSUB(T79, T7c), ms, &(ri[0])); + ST(&(ri[WS(rs, 10)]), VADD(T79, T7c), ms, &(ri[0])); + T7Z = VSUB(T77, T74); + T80 = VSUB(T7X, T7U); + ST(&(ii[WS(rs, 10)]), VADD(T7Z, T80), ms, &(ii[0])); + ST(&(ii[WS(rs, 26)]), VSUB(T80, T7Z), ms, &(ii[0])); + } + } + { + V T3R, T5d, T8r, T8x, T4e, T8o, T5n, T5r, T4G, T5a, T5g, T8w, T5k, T5q, T57; + V T5b, T3Q, T8p; + T3Q = VMUL(LDK(KP707106781), VSUB(T3K, T3P)); + T3R = VSUB(T3F, T3Q); + T5d = VADD(T3F, T3Q); + T8p = VMUL(LDK(KP707106781), VSUB(T5v, T5u)); + T8r = VADD(T8p, T8q); + T8x = VSUB(T8q, T8p); + { + V T42, T4d, T5l, T5m; + T42 = VFNMS(LDK(KP923879532), T41, VMUL(LDK(KP382683432), T3W)); + T4d = VFMA(LDK(KP382683432), T47, VMUL(LDK(KP923879532), T4c)); + T4e = VSUB(T42, T4d); + T8o = VADD(T42, T4d); + T5l = VADD(T4L, T4W); + T5m = VADD(T52, T55); + T5n = VFNMS(LDK(KP555570233), T5m, VMUL(LDK(KP831469612), T5l)); + T5r = VFMA(LDK(KP831469612), T5m, VMUL(LDK(KP555570233), T5l)); + } + { + V T4w, T4F, T5e, T5f; + T4w = VSUB(T4k, T4v); + T4F = VSUB(T4B, T4E); + T4G = VFMA(LDK(KP980785280), T4w, VMUL(LDK(KP195090322), T4F)); + T5a = VFNMS(LDK(KP980785280), T4F, VMUL(LDK(KP195090322), T4w)); + T5e = VFMA(LDK(KP923879532), T3W, VMUL(LDK(KP382683432), T41)); + T5f = VFNMS(LDK(KP923879532), T47, VMUL(LDK(KP382683432), T4c)); + T5g = VADD(T5e, T5f); + T8w = VSUB(T5f, T5e); + } + { + V T5i, T5j, T4X, T56; + T5i = VADD(T4k, T4v); + T5j = VADD(T4B, T4E); + T5k = VFMA(LDK(KP555570233), T5i, VMUL(LDK(KP831469612), T5j)); + T5q = VFNMS(LDK(KP555570233), T5j, VMUL(LDK(KP831469612), T5i)); + T4X = VSUB(T4L, T4W); + T56 = VSUB(T52, T55); + T57 = VFNMS(LDK(KP980785280), T56, VMUL(LDK(KP195090322), T4X)); + T5b = VFMA(LDK(KP195090322), T56, VMUL(LDK(KP980785280), T4X)); + } + { + V T4f, T58, T8v, T8y; + T4f = VADD(T3R, T4e); + T58 = VADD(T4G, T57); + ST(&(ri[WS(rs, 23)]), VSUB(T4f, T58), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 7)]), VADD(T4f, T58), ms, &(ri[WS(rs, 1)])); + T8v = VADD(T5a, T5b); + T8y = VADD(T8w, T8x); + ST(&(ii[WS(rs, 7)]), VADD(T8v, T8y), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 23)]), VSUB(T8y, T8v), ms, &(ii[WS(rs, 1)])); + } + { + V T59, T5c, T8z, T8A; + T59 = VSUB(T3R, T4e); + T5c = VSUB(T5a, T5b); + ST(&(ri[WS(rs, 31)]), VSUB(T59, T5c), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 15)]), VADD(T59, T5c), ms, &(ri[WS(rs, 1)])); + T8z = VSUB(T57, T4G); + T8A = VSUB(T8x, T8w); + ST(&(ii[WS(rs, 15)]), VADD(T8z, T8A), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 31)]), VSUB(T8A, T8z), ms, &(ii[WS(rs, 1)])); + } + { + V T5h, T5o, T8n, T8s; + T5h = VADD(T5d, T5g); + T5o = VADD(T5k, T5n); + ST(&(ri[WS(rs, 19)]), VSUB(T5h, T5o), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 3)]), VADD(T5h, T5o), ms, &(ri[WS(rs, 1)])); + T8n = VADD(T5q, T5r); + T8s = VADD(T8o, T8r); + ST(&(ii[WS(rs, 3)]), VADD(T8n, T8s), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 19)]), VSUB(T8s, T8n), ms, &(ii[WS(rs, 1)])); + } + { + V T5p, T5s, T8t, T8u; + T5p = VSUB(T5d, T5g); + T5s = VSUB(T5q, T5r); + ST(&(ri[WS(rs, 27)]), VSUB(T5p, T5s), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 11)]), VADD(T5p, T5s), ms, &(ri[WS(rs, 1)])); + T8t = VSUB(T5n, T5k); + T8u = VSUB(T8r, T8o); + ST(&(ii[WS(rs, 11)]), VADD(T8t, T8u), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 27)]), VSUB(T8u, T8t), ms, &(ii[WS(rs, 1)])); + } + } + { + V T5x, T5Z, T8d, T8j, T5E, T88, T69, T6d, T5M, T5W, T62, T8i, T66, T6c, T5T; + V T5X, T5w, T89; + T5w = VMUL(LDK(KP707106781), VADD(T5u, T5v)); + T5x = VSUB(T5t, T5w); + T5Z = VADD(T5t, T5w); + T89 = VMUL(LDK(KP707106781), VADD(T3K, T3P)); + T8d = VADD(T89, T8c); + T8j = VSUB(T8c, T89); + { + V T5A, T5D, T67, T68; + T5A = VFNMS(LDK(KP382683432), T5z, VMUL(LDK(KP923879532), T5y)); + T5D = VFMA(LDK(KP923879532), T5B, VMUL(LDK(KP382683432), T5C)); + T5E = VSUB(T5A, T5D); + T88 = VADD(T5A, T5D); + T67 = VADD(T5N, T5O); + T68 = VADD(T5Q, T5R); + T69 = VFNMS(LDK(KP195090322), T68, VMUL(LDK(KP980785280), T67)); + T6d = VFMA(LDK(KP195090322), T67, VMUL(LDK(KP980785280), T68)); + } + { + V T5I, T5L, T60, T61; + T5I = VSUB(T5G, T5H); + T5L = VSUB(T5J, T5K); + T5M = VFMA(LDK(KP555570233), T5I, VMUL(LDK(KP831469612), T5L)); + T5W = VFNMS(LDK(KP831469612), T5I, VMUL(LDK(KP555570233), T5L)); + T60 = VFMA(LDK(KP382683432), T5y, VMUL(LDK(KP923879532), T5z)); + T61 = VFNMS(LDK(KP382683432), T5B, VMUL(LDK(KP923879532), T5C)); + T62 = VADD(T60, T61); + T8i = VSUB(T61, T60); + } + { + V T64, T65, T5P, T5S; + T64 = VADD(T5G, T5H); + T65 = VADD(T5J, T5K); + T66 = VFMA(LDK(KP980785280), T64, VMUL(LDK(KP195090322), T65)); + T6c = VFNMS(LDK(KP195090322), T64, VMUL(LDK(KP980785280), T65)); + T5P = VSUB(T5N, T5O); + T5S = VSUB(T5Q, T5R); + T5T = VFNMS(LDK(KP831469612), T5S, VMUL(LDK(KP555570233), T5P)); + T5X = VFMA(LDK(KP831469612), T5P, VMUL(LDK(KP555570233), T5S)); + } + { + V T5F, T5U, T8h, T8k; + T5F = VADD(T5x, T5E); + T5U = VADD(T5M, T5T); + ST(&(ri[WS(rs, 21)]), VSUB(T5F, T5U), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 5)]), VADD(T5F, T5U), ms, &(ri[WS(rs, 1)])); + T8h = VADD(T5W, T5X); + T8k = VADD(T8i, T8j); + ST(&(ii[WS(rs, 5)]), VADD(T8h, T8k), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 21)]), VSUB(T8k, T8h), ms, &(ii[WS(rs, 1)])); + } + { + V T5V, T5Y, T8l, T8m; + T5V = VSUB(T5x, T5E); + T5Y = VSUB(T5W, T5X); + ST(&(ri[WS(rs, 29)]), VSUB(T5V, T5Y), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 13)]), VADD(T5V, T5Y), ms, &(ri[WS(rs, 1)])); + T8l = VSUB(T5T, T5M); + T8m = VSUB(T8j, T8i); + ST(&(ii[WS(rs, 13)]), VADD(T8l, T8m), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 29)]), VSUB(T8m, T8l), ms, &(ii[WS(rs, 1)])); + } + { + V T63, T6a, T87, T8e; + T63 = VADD(T5Z, T62); + T6a = VADD(T66, T69); + ST(&(ri[WS(rs, 17)]), VSUB(T63, T6a), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 1)]), VADD(T63, T6a), ms, &(ri[WS(rs, 1)])); + T87 = VADD(T6c, T6d); + T8e = VADD(T88, T8d); + ST(&(ii[WS(rs, 1)]), VADD(T87, T8e), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 17)]), VSUB(T8e, T87), ms, &(ii[WS(rs, 1)])); + } + { + V T6b, T6e, T8f, T8g; + T6b = VSUB(T5Z, T62); + T6e = VSUB(T6c, T6d); + ST(&(ri[WS(rs, 25)]), VSUB(T6b, T6e), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 9)]), VADD(T6b, T6e), ms, &(ri[WS(rs, 1)])); + T8f = VSUB(T69, T66); + T8g = VSUB(T8d, T88); + ST(&(ii[WS(rs, 9)]), VADD(T8f, T8g), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 25)]), VSUB(T8g, T8f), ms, &(ii[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 27), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 32, XSIMD_STRING("t2sv_32"), twinstr, &GENUS, {376, 168, 112, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2sv_32) (planner *p) { + X(kdft_dit_register) (p, t2sv_32, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2sv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2sv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:54 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -n 4 -name t2sv_4 -include ts.h */ + +/* + * This function contains 24 FP additions, 16 FP multiplications, + * (or, 16 additions, 8 multiplications, 8 fused multiply/add), + * 37 stack variables, 0 constants, and 16 memory accesses + */ +#include "ts.h" + +static void t2sv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + (mb * 4); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 4), MAKE_VOLATILE_STRIDE(8, rs)) { + V T2, T6, T3, T5, T1, Tx, T8, Tc, Tf, Ta, T4, Th, Tj, Tl; + T2 = LDW(&(W[0])); + T6 = LDW(&(W[TWVL * 3])); + T3 = LDW(&(W[TWVL * 2])); + T5 = LDW(&(W[TWVL * 1])); + T1 = LD(&(ri[0]), ms, &(ri[0])); + Tx = LD(&(ii[0]), ms, &(ii[0])); + T8 = LD(&(ri[WS(rs, 2)]), ms, &(ri[0])); + Tc = LD(&(ii[WS(rs, 2)]), ms, &(ii[0])); + Tf = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + Ta = VMUL(T2, T6); + T4 = VMUL(T2, T3); + Th = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + Tj = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)])); + Tl = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)])); + { + V Tg, Tb, T7, Tp, Tk, Tr, Ti; + Tg = VMUL(T2, Tf); + Tb = VFNMS(T5, T3, Ta); + T7 = VFMA(T5, T6, T4); + Tp = VMUL(T2, Th); + Tk = VMUL(T3, Tj); + Tr = VMUL(T3, Tl); + Ti = VFMA(T5, Th, Tg); + { + V Tv, T9, Tq, Tm, Ts, Tw, Td; + Tv = VMUL(T7, Tc); + T9 = VMUL(T7, T8); + Tq = VFNMS(T5, Tf, Tp); + Tm = VFMA(T6, Tl, Tk); + Ts = VFNMS(T6, Tj, Tr); + Tw = VFNMS(Tb, T8, Tv); + Td = VFMA(Tb, Tc, T9); + { + V Tn, TA, Tu, Tt; + Tn = VADD(Ti, Tm); + TA = VSUB(Ti, Tm); + Tu = VADD(Tq, Ts); + Tt = VSUB(Tq, Ts); + { + V Ty, Tz, Te, To; + Ty = VADD(Tw, Tx); + Tz = VSUB(Tx, Tw); + Te = VADD(T1, Td); + To = VSUB(T1, Td); + ST(&(ii[WS(rs, 3)]), VADD(TA, Tz), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 1)]), VSUB(Tz, TA), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 2)]), VSUB(Ty, Tu), ms, &(ii[0])); + ST(&(ii[0]), VADD(Tu, Ty), ms, &(ii[0])); + ST(&(ri[WS(rs, 1)]), VADD(To, Tt), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 3)]), VSUB(To, Tt), ms, &(ri[WS(rs, 1)])); + ST(&(ri[0]), VADD(Te, Tn), ms, &(ri[0])); + ST(&(ri[WS(rs, 2)]), VSUB(Te, Tn), ms, &(ri[0])); + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t2sv_4"), twinstr, &GENUS, {16, 8, 8, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2sv_4) (planner *p) { + X(kdft_dit_register) (p, t2sv_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -n 4 -name t2sv_4 -include ts.h */ + +/* + * This function contains 24 FP additions, 16 FP multiplications, + * (or, 16 additions, 8 multiplications, 8 fused multiply/add), + * 21 stack variables, 0 constants, and 16 memory accesses + */ +#include "ts.h" + +static void t2sv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + (mb * 4); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 4), MAKE_VOLATILE_STRIDE(8, rs)) { + V T2, T4, T3, T5, T6, T8; + T2 = LDW(&(W[0])); + T4 = LDW(&(W[TWVL * 1])); + T3 = LDW(&(W[TWVL * 2])); + T5 = LDW(&(W[TWVL * 3])); + T6 = VFMA(T2, T3, VMUL(T4, T5)); + T8 = VFNMS(T4, T3, VMUL(T2, T5)); + { + V T1, Tp, Ta, To, Te, Tk, Th, Tl, T7, T9; + T1 = LD(&(ri[0]), ms, &(ri[0])); + Tp = LD(&(ii[0]), ms, &(ii[0])); + T7 = LD(&(ri[WS(rs, 2)]), ms, &(ri[0])); + T9 = LD(&(ii[WS(rs, 2)]), ms, &(ii[0])); + Ta = VFMA(T6, T7, VMUL(T8, T9)); + To = VFNMS(T8, T7, VMUL(T6, T9)); + { + V Tc, Td, Tf, Tg; + Tc = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + Td = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + Te = VFMA(T2, Tc, VMUL(T4, Td)); + Tk = VFNMS(T4, Tc, VMUL(T2, Td)); + Tf = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)])); + Tg = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)])); + Th = VFMA(T3, Tf, VMUL(T5, Tg)); + Tl = VFNMS(T5, Tf, VMUL(T3, Tg)); + } + { + V Tb, Ti, Tn, Tq; + Tb = VADD(T1, Ta); + Ti = VADD(Te, Th); + ST(&(ri[WS(rs, 2)]), VSUB(Tb, Ti), ms, &(ri[0])); + ST(&(ri[0]), VADD(Tb, Ti), ms, &(ri[0])); + Tn = VADD(Tk, Tl); + Tq = VADD(To, Tp); + ST(&(ii[0]), VADD(Tn, Tq), ms, &(ii[0])); + ST(&(ii[WS(rs, 2)]), VSUB(Tq, Tn), ms, &(ii[0])); + } + { + V Tj, Tm, Tr, Ts; + Tj = VSUB(T1, Ta); + Tm = VSUB(Tk, Tl); + ST(&(ri[WS(rs, 3)]), VSUB(Tj, Tm), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 1)]), VADD(Tj, Tm), ms, &(ri[WS(rs, 1)])); + Tr = VSUB(Tp, To); + Ts = VSUB(Te, Th); + ST(&(ii[WS(rs, 1)]), VSUB(Tr, Ts), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 3)]), VADD(Ts, Tr), ms, &(ii[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t2sv_4"), twinstr, &GENUS, {16, 8, 8, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2sv_4) (planner *p) { + X(kdft_dit_register) (p, t2sv_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t2sv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t2sv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,389 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:54 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -n 8 -name t2sv_8 -include ts.h */ + +/* + * This function contains 74 FP additions, 50 FP multiplications, + * (or, 44 additions, 20 multiplications, 30 fused multiply/add), + * 64 stack variables, 1 constants, and 32 memory accesses + */ +#include "ts.h" + +static void t2sv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 6); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 6), MAKE_VOLATILE_STRIDE(16, rs)) { + V T1m, T1l, T1k, T1u, T1n, T1o; + { + V T2, T3, Tl, Tn, T5, T6; + T2 = LDW(&(W[0])); + T3 = LDW(&(W[TWVL * 2])); + Tl = LDW(&(W[TWVL * 4])); + Tn = LDW(&(W[TWVL * 5])); + T5 = LDW(&(W[TWVL * 1])); + T6 = LDW(&(W[TWVL * 3])); + { + V T1, T1s, TK, T1r, Td, Tk, TG, TC, TY, Tu, TW, TL, TM, TO, TQ; + V Tx, Tz, TD, TH; + { + V T8, T4, Tm, Tr, Tc, Ta; + T1 = LD(&(ri[0]), ms, &(ri[0])); + T1s = LD(&(ii[0]), ms, &(ii[0])); + T8 = LD(&(ri[WS(rs, 4)]), ms, &(ri[0])); + T4 = VMUL(T2, T3); + Tm = VMUL(T2, Tl); + Tr = VMUL(T2, Tn); + Tc = LD(&(ii[WS(rs, 4)]), ms, &(ii[0])); + Ta = VMUL(T2, T6); + { + V Tp, Tt, Tg, T7, Tf, To, Ts, Ti, Tb, Tj; + Tp = LD(&(ri[WS(rs, 6)]), ms, &(ri[0])); + Tt = LD(&(ii[WS(rs, 6)]), ms, &(ii[0])); + Tg = LD(&(ri[WS(rs, 2)]), ms, &(ri[0])); + T7 = VFNMS(T5, T6, T4); + Tf = VFMA(T5, T6, T4); + To = VFMA(T5, Tn, Tm); + Ts = VFNMS(T5, Tl, Tr); + Ti = VFNMS(T5, T3, Ta); + Tb = VFMA(T5, T3, Ta); + Tj = LD(&(ii[WS(rs, 2)]), ms, &(ii[0])); + TK = LD(&(ri[WS(rs, 7)]), ms, &(ri[WS(rs, 1)])); + { + V T1q, T9, Th, TF; + T1q = VMUL(T7, Tc); + T9 = VMUL(T7, T8); + Th = VMUL(Tf, Tg); + TF = VMUL(Tf, Tn); + { + V TB, TX, Tq, TV; + TB = VMUL(Tf, Tl); + TX = VMUL(To, Tt); + Tq = VMUL(To, Tp); + TV = VMUL(Tf, Tj); + T1r = VFNMS(Tb, T8, T1q); + Td = VFMA(Tb, Tc, T9); + Tk = VFMA(Ti, Tj, Th); + TG = VFNMS(Ti, Tl, TF); + TC = VFMA(Ti, Tn, TB); + TY = VFNMS(Ts, Tp, TX); + Tu = VFMA(Ts, Tt, Tq); + TW = VFNMS(Ti, Tg, TV); + TL = VMUL(Tl, TK); + } + } + TM = LD(&(ii[WS(rs, 7)]), ms, &(ii[WS(rs, 1)])); + TO = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)])); + TQ = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)])); + Tx = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + Tz = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + TD = LD(&(ri[WS(rs, 5)]), ms, &(ri[WS(rs, 1)])); + TH = LD(&(ii[WS(rs, 5)]), ms, &(ii[WS(rs, 1)])); + } + } + { + V Te, T1p, T1g, T10, TS, T18, T1d, T1t, T1x, T1y, Tv, TJ, T11, T16; + { + V TN, T1a, TR, T1c, TA, T13, TI, T15; + { + V TU, T19, TP, T1b, Ty, T12, TE, T14, TZ; + TU = VSUB(T1, Td); + Te = VADD(T1, Td); + TN = VFMA(Tn, TM, TL); + T19 = VMUL(Tl, TM); + TP = VMUL(T3, TO); + T1b = VMUL(T3, TQ); + Ty = VMUL(T2, Tx); + T12 = VMUL(T2, Tz); + TE = VMUL(TC, TD); + T14 = VMUL(TC, TH); + T1p = VADD(TW, TY); + TZ = VSUB(TW, TY); + T1a = VFNMS(Tn, TK, T19); + TR = VFMA(T6, TQ, TP); + T1c = VFNMS(T6, TO, T1b); + TA = VFMA(T5, Tz, Ty); + T13 = VFNMS(T5, Tx, T12); + TI = VFMA(TG, TH, TE); + T15 = VFNMS(TG, TD, T14); + T1g = VSUB(TU, TZ); + T10 = VADD(TU, TZ); + } + TS = VADD(TN, TR); + T18 = VSUB(TN, TR); + T1d = VSUB(T1a, T1c); + T1m = VADD(T1a, T1c); + T1t = VADD(T1r, T1s); + T1x = VSUB(T1s, T1r); + T1y = VSUB(Tk, Tu); + Tv = VADD(Tk, Tu); + TJ = VADD(TA, TI); + T11 = VSUB(TA, TI); + T16 = VSUB(T13, T15); + T1l = VADD(T13, T15); + } + { + V Tw, T1w, T1v, TT; + { + V T1i, T1e, T1B, T1z, T1h, T17; + T1i = VADD(T18, T1d); + T1e = VSUB(T18, T1d); + T1B = VADD(T1y, T1x); + T1z = VSUB(T1x, T1y); + T1h = VSUB(T16, T11); + T17 = VADD(T11, T16); + T1k = VSUB(Te, Tv); + Tw = VADD(Te, Tv); + { + V T1A, T1j, T1C, T1f; + T1A = VADD(T1h, T1i); + T1j = VSUB(T1h, T1i); + T1C = VSUB(T1e, T17); + T1f = VADD(T17, T1e); + T1w = VSUB(T1t, T1p); + T1u = VADD(T1p, T1t); + T1v = VSUB(TS, TJ); + TT = VADD(TJ, TS); + ST(&(ii[WS(rs, 1)]), VFMA(LDK(KP707106781), T1A, T1z), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 5)]), VFNMS(LDK(KP707106781), T1A, T1z), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 3)]), VFMA(LDK(KP707106781), T1j, T1g), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 7)]), VFNMS(LDK(KP707106781), T1j, T1g), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 3)]), VFMA(LDK(KP707106781), T1C, T1B), ms, &(ii[WS(rs, 1)])); + ST(&(ii[WS(rs, 7)]), VFNMS(LDK(KP707106781), T1C, T1B), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 1)]), VFMA(LDK(KP707106781), T1f, T10), ms, &(ri[WS(rs, 1)])); + ST(&(ri[WS(rs, 5)]), VFNMS(LDK(KP707106781), T1f, T10), ms, &(ri[WS(rs, 1)])); + } + } + ST(&(ri[WS(rs, 4)]), VSUB(Tw, TT), ms, &(ri[0])); + ST(&(ri[0]), VADD(Tw, TT), ms, &(ri[0])); + ST(&(ii[WS(rs, 6)]), VSUB(T1w, T1v), ms, &(ii[0])); + ST(&(ii[WS(rs, 2)]), VADD(T1v, T1w), ms, &(ii[0])); + } + } + } + } + T1n = VSUB(T1l, T1m); + T1o = VADD(T1l, T1m); + ST(&(ii[0]), VADD(T1o, T1u), ms, &(ii[0])); + ST(&(ii[WS(rs, 4)]), VSUB(T1u, T1o), ms, &(ii[0])); + ST(&(ri[WS(rs, 2)]), VADD(T1k, T1n), ms, &(ri[0])); + ST(&(ri[WS(rs, 6)]), VSUB(T1k, T1n), ms, &(ri[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 7), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t2sv_8"), twinstr, &GENUS, {44, 20, 30, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2sv_8) (planner *p) { + X(kdft_dit_register) (p, t2sv_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -n 8 -name t2sv_8 -include ts.h */ + +/* + * This function contains 74 FP additions, 44 FP multiplications, + * (or, 56 additions, 26 multiplications, 18 fused multiply/add), + * 42 stack variables, 1 constants, and 32 memory accesses + */ +#include "ts.h" + +static void t2sv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + (mb * 6); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 6), MAKE_VOLATILE_STRIDE(16, rs)) { + V T2, T5, T3, T6, T8, Tc, Tg, Ti, Tl, Tm, Tn, Tz, Tp, Tx; + { + V T4, Tb, T7, Ta; + T2 = LDW(&(W[0])); + T5 = LDW(&(W[TWVL * 1])); + T3 = LDW(&(W[TWVL * 2])); + T6 = LDW(&(W[TWVL * 3])); + T4 = VMUL(T2, T3); + Tb = VMUL(T5, T3); + T7 = VMUL(T5, T6); + Ta = VMUL(T2, T6); + T8 = VSUB(T4, T7); + Tc = VADD(Ta, Tb); + Tg = VADD(T4, T7); + Ti = VSUB(Ta, Tb); + Tl = LDW(&(W[TWVL * 4])); + Tm = LDW(&(W[TWVL * 5])); + Tn = VFMA(T2, Tl, VMUL(T5, Tm)); + Tz = VFNMS(Ti, Tl, VMUL(Tg, Tm)); + Tp = VFNMS(T5, Tl, VMUL(T2, Tm)); + Tx = VFMA(Tg, Tl, VMUL(Ti, Tm)); + } + { + V Tf, T1i, TL, T1d, TJ, T17, TV, TY, Ts, T1j, TO, T1a, TC, T16, TQ; + V TT; + { + V T1, T1c, Te, T1b, T9, Td; + T1 = LD(&(ri[0]), ms, &(ri[0])); + T1c = LD(&(ii[0]), ms, &(ii[0])); + T9 = LD(&(ri[WS(rs, 4)]), ms, &(ri[0])); + Td = LD(&(ii[WS(rs, 4)]), ms, &(ii[0])); + Te = VFMA(T8, T9, VMUL(Tc, Td)); + T1b = VFNMS(Tc, T9, VMUL(T8, Td)); + Tf = VADD(T1, Te); + T1i = VSUB(T1c, T1b); + TL = VSUB(T1, Te); + T1d = VADD(T1b, T1c); + } + { + V TF, TW, TI, TX; + { + V TD, TE, TG, TH; + TD = LD(&(ri[WS(rs, 7)]), ms, &(ri[WS(rs, 1)])); + TE = LD(&(ii[WS(rs, 7)]), ms, &(ii[WS(rs, 1)])); + TF = VFMA(Tl, TD, VMUL(Tm, TE)); + TW = VFNMS(Tm, TD, VMUL(Tl, TE)); + TG = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)])); + TH = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)])); + TI = VFMA(T3, TG, VMUL(T6, TH)); + TX = VFNMS(T6, TG, VMUL(T3, TH)); + } + TJ = VADD(TF, TI); + T17 = VADD(TW, TX); + TV = VSUB(TF, TI); + TY = VSUB(TW, TX); + } + { + V Tk, TM, Tr, TN; + { + V Th, Tj, To, Tq; + Th = LD(&(ri[WS(rs, 2)]), ms, &(ri[0])); + Tj = LD(&(ii[WS(rs, 2)]), ms, &(ii[0])); + Tk = VFMA(Tg, Th, VMUL(Ti, Tj)); + TM = VFNMS(Ti, Th, VMUL(Tg, Tj)); + To = LD(&(ri[WS(rs, 6)]), ms, &(ri[0])); + Tq = LD(&(ii[WS(rs, 6)]), ms, &(ii[0])); + Tr = VFMA(Tn, To, VMUL(Tp, Tq)); + TN = VFNMS(Tp, To, VMUL(Tn, Tq)); + } + Ts = VADD(Tk, Tr); + T1j = VSUB(Tk, Tr); + TO = VSUB(TM, TN); + T1a = VADD(TM, TN); + } + { + V Tw, TR, TB, TS; + { + V Tu, Tv, Ty, TA; + Tu = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)])); + Tv = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)])); + Tw = VFMA(T2, Tu, VMUL(T5, Tv)); + TR = VFNMS(T5, Tu, VMUL(T2, Tv)); + Ty = LD(&(ri[WS(rs, 5)]), ms, &(ri[WS(rs, 1)])); + TA = LD(&(ii[WS(rs, 5)]), ms, &(ii[WS(rs, 1)])); + TB = VFMA(Tx, Ty, VMUL(Tz, TA)); + TS = VFNMS(Tz, Ty, VMUL(Tx, TA)); + } + TC = VADD(Tw, TB); + T16 = VADD(TR, TS); + TQ = VSUB(Tw, TB); + TT = VSUB(TR, TS); + } + { + V Tt, TK, T1f, T1g; + Tt = VADD(Tf, Ts); + TK = VADD(TC, TJ); + ST(&(ri[WS(rs, 4)]), VSUB(Tt, TK), ms, &(ri[0])); + ST(&(ri[0]), VADD(Tt, TK), ms, &(ri[0])); + { + V T19, T1e, T15, T18; + T19 = VADD(T16, T17); + T1e = VADD(T1a, T1d); + ST(&(ii[0]), VADD(T19, T1e), ms, &(ii[0])); + ST(&(ii[WS(rs, 4)]), VSUB(T1e, T19), ms, &(ii[0])); + T15 = VSUB(Tf, Ts); + T18 = VSUB(T16, T17); + ST(&(ri[WS(rs, 6)]), VSUB(T15, T18), ms, &(ri[0])); + ST(&(ri[WS(rs, 2)]), VADD(T15, T18), ms, &(ri[0])); + } + T1f = VSUB(TJ, TC); + T1g = VSUB(T1d, T1a); + ST(&(ii[WS(rs, 2)]), VADD(T1f, T1g), ms, &(ii[0])); + ST(&(ii[WS(rs, 6)]), VSUB(T1g, T1f), ms, &(ii[0])); + { + V T11, T1k, T14, T1h, T12, T13; + T11 = VSUB(TL, TO); + T1k = VSUB(T1i, T1j); + T12 = VSUB(TT, TQ); + T13 = VADD(TV, TY); + T14 = VMUL(LDK(KP707106781), VSUB(T12, T13)); + T1h = VMUL(LDK(KP707106781), VADD(T12, T13)); + ST(&(ri[WS(rs, 7)]), VSUB(T11, T14), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 5)]), VSUB(T1k, T1h), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 3)]), VADD(T11, T14), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 1)]), VADD(T1h, T1k), ms, &(ii[WS(rs, 1)])); + } + { + V TP, T1m, T10, T1l, TU, TZ; + TP = VADD(TL, TO); + T1m = VADD(T1j, T1i); + TU = VADD(TQ, TT); + TZ = VSUB(TV, TY); + T10 = VMUL(LDK(KP707106781), VADD(TU, TZ)); + T1l = VMUL(LDK(KP707106781), VSUB(TZ, TU)); + ST(&(ri[WS(rs, 5)]), VSUB(TP, T10), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 7)]), VSUB(T1m, T1l), ms, &(ii[WS(rs, 1)])); + ST(&(ri[WS(rs, 1)]), VADD(TP, T10), ms, &(ri[WS(rs, 1)])); + ST(&(ii[WS(rs, 3)]), VADD(T1l, T1m), ms, &(ii[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 7), + {TW_NEXT, (2 * VL), 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t2sv_8"), twinstr, &GENUS, {56, 26, 18, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t2sv_8) (planner *p) { + X(kdft_dit_register) (p, t2sv_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t3bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t3bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 10 -name t3bv_10 -include t3b.h -sign 1 */ + +/* + * This function contains 57 FP additions, 52 FP multiplications, + * (or, 39 additions, 34 multiplications, 18 fused multiply/add), + * 57 stack variables, 4 constants, and 20 memory accesses + */ +#include "t3b.h" + +static void t3bv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(10, rs)) { + V T1, T7, Th, Tx, Tr, Td, Tp, T6, Tv, Tc, Te, Ti, Tl, T2, T3; + V T5; + T2 = LDW(&(W[0])); + T3 = LDW(&(W[TWVL * 2])); + T5 = LDW(&(W[TWVL * 4])); + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V To, Tw, Tq, Tu, Ta, T4, Tt, Tk, Tb; + To = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tw = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Tq = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tu = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Ta = VZMULJ(T2, T3); + T4 = VZMUL(T2, T3); + Th = VZMULJ(T2, T5); + Tt = VZMULJ(T3, T5); + Tb = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tx = VZMUL(T2, Tw); + Tr = VZMUL(T5, Tq); + Tk = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Td = VZMULJ(Ta, T5); + Tp = VZMUL(T4, To); + T6 = VZMULJ(T4, T5); + Tv = VZMUL(Tt, Tu); + Tc = VZMUL(Ta, Tb); + Te = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Ti = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Tl = VZMUL(T3, Tk); + } + { + V TN, Ts, T8, Ty, TO, Tf, Tj; + TN = VADD(Tp, Tr); + Ts = VSUB(Tp, Tr); + T8 = VZMUL(T6, T7); + Ty = VSUB(Tv, Tx); + TO = VADD(Tv, Tx); + Tf = VZMUL(Td, Te); + Tj = VZMUL(Th, Ti); + { + V T9, TJ, TP, TU, Tz, TF, Tg, TK, Tm, TL; + T9 = VSUB(T1, T8); + TJ = VADD(T1, T8); + TP = VADD(TN, TO); + TU = VSUB(TN, TO); + Tz = VADD(Ts, Ty); + TF = VSUB(Ts, Ty); + Tg = VSUB(Tc, Tf); + TK = VADD(Tc, Tf); + Tm = VSUB(Tj, Tl); + TL = VADD(Tj, Tl); + { + V TM, TV, Tn, TE; + TM = VADD(TK, TL); + TV = VSUB(TK, TL); + Tn = VADD(Tg, Tm); + TE = VSUB(Tg, Tm); + { + V TW, TY, TS, TQ, TG, TI, TC, TA, TR, TB; + TW = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TV, TU)); + TY = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TU, TV)); + TS = VSUB(TM, TP); + TQ = VADD(TM, TP); + TG = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TF, TE)); + TI = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TE, TF)); + TC = VSUB(Tn, Tz); + TA = VADD(Tn, Tz); + ST(&(x[0]), VADD(TJ, TQ), ms, &(x[0])); + TR = VFNMS(LDK(KP250000000), TQ, TJ); + ST(&(x[WS(rs, 5)]), VADD(T9, TA), ms, &(x[WS(rs, 1)])); + TB = VFNMS(LDK(KP250000000), TA, T9); + { + V TX, TT, TH, TD; + TX = VFMA(LDK(KP559016994), TS, TR); + TT = VFNMS(LDK(KP559016994), TS, TR); + TH = VFNMS(LDK(KP559016994), TC, TB); + TD = VFMA(LDK(KP559016994), TC, TB); + ST(&(x[WS(rs, 8)]), VFMAI(TW, TT), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFNMSI(TW, TT), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFMAI(TY, TX), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFNMSI(TY, TX), ms, &(x[0])); + ST(&(x[WS(rs, 9)]), VFNMSI(TG, TD), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(TG, TD), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(TI, TH), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFMAI(TI, TH), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 10, XSIMD_STRING("t3bv_10"), twinstr, &GENUS, {39, 34, 18, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3bv_10) (planner *p) { + X(kdft_dit_register) (p, t3bv_10, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 10 -name t3bv_10 -include t3b.h -sign 1 */ + +/* + * This function contains 57 FP additions, 42 FP multiplications, + * (or, 51 additions, 36 multiplications, 6 fused multiply/add), + * 41 stack variables, 4 constants, and 20 memory accesses + */ +#include "t3b.h" + +static void t3bv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(10, rs)) { + V T1, T2, T3, Ti, T6, T7, TA, Tb, To; + T1 = LDW(&(W[0])); + T2 = LDW(&(W[TWVL * 2])); + T3 = VZMULJ(T1, T2); + Ti = VZMUL(T1, T2); + T6 = LDW(&(W[TWVL * 4])); + T7 = VZMULJ(T3, T6); + TA = VZMULJ(Ti, T6); + Tb = VZMULJ(T1, T6); + To = VZMULJ(T2, T6); + { + V TD, TQ, Tn, Tt, Tx, TM, TN, TS, Ta, Tg, Tw, TJ, TK, TR, Tz; + V TC, TB; + Tz = LD(&(x[0]), ms, &(x[0])); + TB = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + TC = VZMUL(TA, TB); + TD = VSUB(Tz, TC); + TQ = VADD(Tz, TC); + { + V Tk, Ts, Tm, Tq; + { + V Tj, Tr, Tl, Tp; + Tj = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tk = VZMUL(Ti, Tj); + Tr = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Ts = VZMUL(T1, Tr); + Tl = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tm = VZMUL(T6, Tl); + Tp = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tq = VZMUL(To, Tp); + } + Tn = VSUB(Tk, Tm); + Tt = VSUB(Tq, Ts); + Tx = VADD(Tn, Tt); + TM = VADD(Tk, Tm); + TN = VADD(Tq, Ts); + TS = VADD(TM, TN); + } + { + V T5, Tf, T9, Td; + { + V T4, Te, T8, Tc; + T4 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T5 = VZMUL(T3, T4); + Te = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Tf = VZMUL(T2, Te); + T8 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T9 = VZMUL(T7, T8); + Tc = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Td = VZMUL(Tb, Tc); + } + Ta = VSUB(T5, T9); + Tg = VSUB(Td, Tf); + Tw = VADD(Ta, Tg); + TJ = VADD(T5, T9); + TK = VADD(Td, Tf); + TR = VADD(TJ, TK); + } + { + V Ty, TE, TF, Tv, TI, Th, Tu, TH, TG; + Ty = VMUL(LDK(KP559016994), VSUB(Tw, Tx)); + TE = VADD(Tw, Tx); + TF = VFNMS(LDK(KP250000000), TE, TD); + Th = VSUB(Ta, Tg); + Tu = VSUB(Tn, Tt); + Tv = VBYI(VFMA(LDK(KP951056516), Th, VMUL(LDK(KP587785252), Tu))); + TI = VBYI(VFNMS(LDK(KP951056516), Tu, VMUL(LDK(KP587785252), Th))); + ST(&(x[WS(rs, 5)]), VADD(TD, TE), ms, &(x[WS(rs, 1)])); + TH = VSUB(TF, Ty); + ST(&(x[WS(rs, 3)]), VSUB(TH, TI), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(TI, TH), ms, &(x[WS(rs, 1)])); + TG = VADD(Ty, TF); + ST(&(x[WS(rs, 1)]), VADD(Tv, TG), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VSUB(TG, Tv), ms, &(x[WS(rs, 1)])); + } + { + V TV, TT, TU, TP, TY, TL, TO, TX, TW; + TV = VMUL(LDK(KP559016994), VSUB(TR, TS)); + TT = VADD(TR, TS); + TU = VFNMS(LDK(KP250000000), TT, TQ); + TL = VSUB(TJ, TK); + TO = VSUB(TM, TN); + TP = VBYI(VFNMS(LDK(KP951056516), TO, VMUL(LDK(KP587785252), TL))); + TY = VBYI(VFMA(LDK(KP951056516), TL, VMUL(LDK(KP587785252), TO))); + ST(&(x[0]), VADD(TQ, TT), ms, &(x[0])); + TX = VADD(TV, TU); + ST(&(x[WS(rs, 4)]), VSUB(TX, TY), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VADD(TY, TX), ms, &(x[0])); + TW = VSUB(TU, TV); + ST(&(x[WS(rs, 2)]), VADD(TP, TW), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VSUB(TW, TP), ms, &(x[0])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 10, XSIMD_STRING("t3bv_10"), twinstr, &GENUS, {51, 36, 6, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3bv_10) (planner *p) { + X(kdft_dit_register) (p, t3bv_10, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t3bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t3bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:47 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 16 -name t3bv_16 -include t3b.h -sign 1 */ + +/* + * This function contains 98 FP additions, 86 FP multiplications, + * (or, 64 additions, 52 multiplications, 34 fused multiply/add), + * 70 stack variables, 3 constants, and 32 memory accesses + */ +#include "t3b.h" + +static void t3bv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(16, rs)) { + V T13, Tg, TY, T14, T1A, T1q, T1f, T1x, T1r, T1i, Tt, T16, TB, T1j, T1k; + V TH; + { + V T2, T8, Tu, T3; + T2 = LDW(&(W[0])); + T8 = LDW(&(W[TWVL * 2])); + Tu = LDW(&(W[TWVL * 6])); + T3 = LDW(&(W[TWVL * 4])); + { + V Ty, T1o, Tf, T1b, T7, Tr, TQ, TX, T1g, Tl, To, Tw, TG, Tz, T1p; + V T1e, TC; + { + V T1, T5, Ta, Td; + T1 = LD(&(x[0]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Td = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + { + V TR, TN, TM, TE, Tb, Tp, Tm, Te, T6, TW, TO, TS; + { + V TL, Tx, T9, TU, Tc, T4, TV; + TL = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tx = VZMULJ(T2, T8); + T9 = VZMUL(T2, T8); + TR = VZMULJ(T2, Tu); + TU = VZMULJ(T8, T3); + Tc = VZMUL(T8, T3); + T4 = VZMULJ(T2, T3); + TN = VZMUL(T2, T3); + TV = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + TM = VZMUL(Tx, TL); + Ty = VZMULJ(Tx, T3); + TE = VZMUL(Tx, T3); + Tb = VZMUL(T9, Ta); + Tp = VZMUL(T9, T3); + Tm = VZMULJ(T9, T3); + Te = VZMUL(Tc, Td); + T6 = VZMUL(T4, T5); + TW = VZMUL(TU, TV); + } + TO = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TS = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + { + V TP, TT, Ti, Tk, Tn, Th, Tq, Tj; + Th = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Tq = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + Tj = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T1o = VSUB(Tb, Te); + Tf = VADD(Tb, Te); + T1b = VSUB(T1, T6); + T7 = VADD(T1, T6); + TP = VZMUL(TN, TO); + TT = VZMUL(TR, TS); + Ti = VZMUL(T2, Th); + Tr = VZMUL(Tp, Tq); + Tk = VZMUL(T3, Tj); + Tn = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V T1c, T1d, Tv, TF; + Tv = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + TF = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T1c = VSUB(TM, TP); + TQ = VADD(TM, TP); + T1d = VSUB(TT, TW); + TX = VADD(TT, TW); + T1g = VSUB(Ti, Tk); + Tl = VADD(Ti, Tk); + To = VZMUL(Tm, Tn); + Tw = VZMUL(Tu, Tv); + TG = VZMUL(TE, TF); + Tz = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T1p = VSUB(T1c, T1d); + T1e = VADD(T1c, T1d); + TC = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + } + } + } + } + { + V T1h, Ts, TA, TD; + T13 = VADD(T7, Tf); + Tg = VSUB(T7, Tf); + T1h = VSUB(To, Tr); + Ts = VADD(To, Tr); + TY = VSUB(TQ, TX); + T14 = VADD(TQ, TX); + TA = VZMUL(Ty, Tz); + T1A = VFNMS(LDK(KP707106781), T1p, T1o); + T1q = VFMA(LDK(KP707106781), T1p, T1o); + T1f = VFMA(LDK(KP707106781), T1e, T1b); + T1x = VFNMS(LDK(KP707106781), T1e, T1b); + TD = VZMUL(T8, TC); + T1r = VFMA(LDK(KP414213562), T1g, T1h); + T1i = VFNMS(LDK(KP414213562), T1h, T1g); + Tt = VSUB(Tl, Ts); + T16 = VADD(Tl, Ts); + TB = VADD(Tw, TA); + T1j = VSUB(Tw, TA); + T1k = VSUB(TG, TD); + TH = VADD(TD, TG); + } + } + } + { + V T15, T19, T1l, T1s, TI, T17; + T15 = VSUB(T13, T14); + T19 = VADD(T13, T14); + T1l = VFNMS(LDK(KP414213562), T1k, T1j); + T1s = VFMA(LDK(KP414213562), T1j, T1k); + TI = VSUB(TB, TH); + T17 = VADD(TB, TH); + { + V T1y, T1t, T1B, T1m; + T1y = VADD(T1r, T1s); + T1t = VSUB(T1r, T1s); + T1B = VSUB(T1i, T1l); + T1m = VADD(T1i, T1l); + { + V T18, T1a, TJ, TZ; + T18 = VSUB(T16, T17); + T1a = VADD(T16, T17); + TJ = VADD(Tt, TI); + TZ = VSUB(Tt, TI); + { + V T1u, T1w, T1z, T1D; + T1u = VFNMS(LDK(KP923879532), T1t, T1q); + T1w = VFMA(LDK(KP923879532), T1t, T1q); + T1z = VFNMS(LDK(KP923879532), T1y, T1x); + T1D = VFMA(LDK(KP923879532), T1y, T1x); + { + V T1n, T1v, T1C, T1E; + T1n = VFNMS(LDK(KP923879532), T1m, T1f); + T1v = VFMA(LDK(KP923879532), T1m, T1f); + T1C = VFMA(LDK(KP923879532), T1B, T1A); + T1E = VFNMS(LDK(KP923879532), T1B, T1A); + ST(&(x[WS(rs, 8)]), VSUB(T19, T1a), ms, &(x[0])); + ST(&(x[0]), VADD(T19, T1a), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(T18, T15), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFNMSI(T18, T15), ms, &(x[0])); + { + V T10, T12, TK, T11; + T10 = VFNMS(LDK(KP707106781), TZ, TY); + T12 = VFMA(LDK(KP707106781), TZ, TY); + TK = VFNMS(LDK(KP707106781), TJ, Tg); + T11 = VFMA(LDK(KP707106781), TJ, Tg); + ST(&(x[WS(rs, 15)]), VFNMSI(T1w, T1v), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(T1w, T1v), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFMAI(T1u, T1n), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(T1u, T1n), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(T1E, T1D), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFMAI(T1E, T1D), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFNMSI(T1C, T1z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFMAI(T1C, T1z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VFMAI(T12, T11), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFNMSI(T12, T11), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFMAI(T10, TK), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(T10, TK), ms, &(x[0])); + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 15), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 16, XSIMD_STRING("t3bv_16"), twinstr, &GENUS, {64, 52, 34, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3bv_16) (planner *p) { + X(kdft_dit_register) (p, t3bv_16, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 16 -name t3bv_16 -include t3b.h -sign 1 */ + +/* + * This function contains 98 FP additions, 64 FP multiplications, + * (or, 94 additions, 60 multiplications, 4 fused multiply/add), + * 51 stack variables, 3 constants, and 32 memory accesses + */ +#include "t3b.h" + +static void t3bv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(16, rs)) { + V T1, T8, T9, Tl, Ti, TE, T4, Ta, TO, TV, Td, Tm, TA, TH, Ts; + T1 = LDW(&(W[0])); + T8 = LDW(&(W[TWVL * 2])); + T9 = VZMUL(T1, T8); + Tl = VZMULJ(T1, T8); + Ti = LDW(&(W[TWVL * 6])); + TE = VZMULJ(T1, Ti); + T4 = LDW(&(W[TWVL * 4])); + Ta = VZMULJ(T9, T4); + TO = VZMUL(T8, T4); + TV = VZMULJ(T1, T4); + Td = VZMUL(T9, T4); + Tm = VZMULJ(Tl, T4); + TA = VZMUL(T1, T4); + TH = VZMULJ(T8, T4); + Ts = VZMUL(Tl, T4); + { + V TY, T1q, TR, T1r, T1m, T1n, TL, TZ, T1f, T1g, T1h, Th, T11, T1i, T1j; + V T1k, Tw, T12, TU, TX, TW; + TU = LD(&(x[0]), ms, &(x[0])); + TW = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + TX = VZMUL(TV, TW); + TY = VSUB(TU, TX); + T1q = VADD(TU, TX); + { + V TN, TQ, TM, TP; + TM = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + TN = VZMUL(T9, TM); + TP = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + TQ = VZMUL(TO, TP); + TR = VSUB(TN, TQ); + T1r = VADD(TN, TQ); + } + { + V Tz, TJ, TC, TG, TD, TK; + { + V Ty, TI, TB, TF; + Ty = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tz = VZMUL(Tl, Ty); + TI = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + TJ = VZMUL(TH, TI); + TB = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TC = VZMUL(TA, TB); + TF = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TG = VZMUL(TE, TF); + } + T1m = VADD(Tz, TC); + T1n = VADD(TG, TJ); + TD = VSUB(Tz, TC); + TK = VSUB(TG, TJ); + TL = VMUL(LDK(KP707106781), VSUB(TD, TK)); + TZ = VMUL(LDK(KP707106781), VADD(TD, TK)); + } + { + V T3, Tf, T6, Tc, T7, Tg; + { + V T2, Te, T5, Tb; + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = VZMUL(T1, T2); + Te = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + Tf = VZMUL(Td, Te); + T5 = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T6 = VZMUL(T4, T5); + Tb = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tc = VZMUL(Ta, Tb); + } + T1f = VADD(T3, T6); + T1g = VADD(Tc, Tf); + T1h = VSUB(T1f, T1g); + T7 = VSUB(T3, T6); + Tg = VSUB(Tc, Tf); + Th = VFNMS(LDK(KP382683432), Tg, VMUL(LDK(KP923879532), T7)); + T11 = VFMA(LDK(KP382683432), T7, VMUL(LDK(KP923879532), Tg)); + } + { + V Tk, Tu, To, Tr, Tp, Tv; + { + V Tj, Tt, Tn, Tq; + Tj = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + Tk = VZMUL(Ti, Tj); + Tt = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tu = VZMUL(Ts, Tt); + Tn = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + To = VZMUL(Tm, Tn); + Tq = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Tr = VZMUL(T8, Tq); + } + T1i = VADD(Tk, To); + T1j = VADD(Tr, Tu); + T1k = VSUB(T1i, T1j); + Tp = VSUB(Tk, To); + Tv = VSUB(Tr, Tu); + Tw = VFMA(LDK(KP923879532), Tp, VMUL(LDK(KP382683432), Tv)); + T12 = VFNMS(LDK(KP382683432), Tp, VMUL(LDK(KP923879532), Tv)); + } + { + V T1p, T1v, T1u, T1w; + { + V T1l, T1o, T1s, T1t; + T1l = VMUL(LDK(KP707106781), VSUB(T1h, T1k)); + T1o = VSUB(T1m, T1n); + T1p = VBYI(VSUB(T1l, T1o)); + T1v = VBYI(VADD(T1o, T1l)); + T1s = VSUB(T1q, T1r); + T1t = VMUL(LDK(KP707106781), VADD(T1h, T1k)); + T1u = VSUB(T1s, T1t); + T1w = VADD(T1s, T1t); + } + ST(&(x[WS(rs, 6)]), VADD(T1p, T1u), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VSUB(T1w, T1v), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VSUB(T1u, T1p), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T1v, T1w), ms, &(x[0])); + } + { + V T1z, T1D, T1C, T1E; + { + V T1x, T1y, T1A, T1B; + T1x = VADD(T1q, T1r); + T1y = VADD(T1m, T1n); + T1z = VSUB(T1x, T1y); + T1D = VADD(T1x, T1y); + T1A = VADD(T1f, T1g); + T1B = VADD(T1i, T1j); + T1C = VBYI(VSUB(T1A, T1B)); + T1E = VADD(T1A, T1B); + } + ST(&(x[WS(rs, 12)]), VSUB(T1z, T1C), ms, &(x[0])); + ST(&(x[0]), VADD(T1D, T1E), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(T1z, T1C), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VSUB(T1D, T1E), ms, &(x[0])); + } + { + V TT, T15, T14, T16; + { + V Tx, TS, T10, T13; + Tx = VSUB(Th, Tw); + TS = VSUB(TL, TR); + TT = VBYI(VSUB(Tx, TS)); + T15 = VBYI(VADD(TS, Tx)); + T10 = VSUB(TY, TZ); + T13 = VSUB(T11, T12); + T14 = VSUB(T10, T13); + T16 = VADD(T10, T13); + } + ST(&(x[WS(rs, 5)]), VADD(TT, T14), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VSUB(T16, T15), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VSUB(T14, TT), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(T15, T16), ms, &(x[WS(rs, 1)])); + } + { + V T19, T1d, T1c, T1e; + { + V T17, T18, T1a, T1b; + T17 = VADD(TY, TZ); + T18 = VADD(Th, Tw); + T19 = VADD(T17, T18); + T1d = VSUB(T17, T18); + T1a = VADD(TR, TL); + T1b = VADD(T11, T12); + T1c = VBYI(VADD(T1a, T1b)); + T1e = VBYI(VSUB(T1b, T1a)); + } + ST(&(x[WS(rs, 15)]), VSUB(T19, T1c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(T1d, T1e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T19, T1c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VSUB(T1d, T1e), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 15), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 16, XSIMD_STRING("t3bv_16"), twinstr, &GENUS, {94, 60, 4, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3bv_16) (planner *p) { + X(kdft_dit_register) (p, t3bv_16, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t3bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t3bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,533 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:49 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 20 -name t3bv_20 -include t3b.h -sign 1 */ + +/* + * This function contains 138 FP additions, 118 FP multiplications, + * (or, 92 additions, 72 multiplications, 46 fused multiply/add), + * 90 stack variables, 4 constants, and 40 memory accesses + */ +#include "t3b.h" + +static void t3bv_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(20, rs)) { + V T19, T1u, T1p, T1x, T1m, T1w, T1t, TI; + { + V T2, T8, T3, Td; + T2 = LDW(&(W[0])); + T8 = LDW(&(W[TWVL * 2])); + T3 = LDW(&(W[TWVL * 4])); + Td = LDW(&(W[TWVL * 6])); + { + V T7, T1g, T1F, T23, T1n, Tp, T18, T27, T1P, T1I, TU, T1L, T28, T1S, T1o; + V TE, T1l, T1j, T26, T2e; + { + V T1, T1e, T5, T1b; + T1 = LD(&(x[0]), ms, &(x[0])); + T1e = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T5 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T1b = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V TA, Tx, TQ, T1O, T10, Th, T1G, T1R, T17, T1J, To, Ts, TR, Tv, TK; + V TM, TP, Ty, TB; + { + V Tq, Tt, T13, T16, Tk, Tn; + { + V Tl, Ti, T11, T14, TV, Tc, T6, Tb, Tf, TW, TY, T1f; + { + V T1d, Ta, T9, T4; + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + TA = VZMULJ(T2, T8); + T9 = VZMUL(T2, T8); + Tx = VZMUL(T8, T3); + Tl = VZMULJ(T8, T3); + T4 = VZMUL(T2, T3); + Tq = VZMULJ(T2, T3); + Tt = VZMULJ(T2, Td); + Ti = VZMULJ(T8, Td); + T11 = VZMULJ(TA, Td); + T14 = VZMULJ(TA, T3); + TQ = VZMUL(TA, T3); + T1d = VZMULJ(T9, Td); + TV = VZMUL(T9, T3); + Tc = VZMULJ(T9, T3); + T6 = VZMUL(T4, T5); + Tb = VZMUL(T9, Ta); + Tf = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TW = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TY = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1f = VZMUL(T1d, T1e); + } + { + V T1D, TX, TZ, T15, T1E, Tg, T12, T1c, Te, Tj, Tm; + T12 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T1c = VZMUL(Tc, T1b); + Te = VZMULJ(Tc, Td); + T7 = VSUB(T1, T6); + T1D = VADD(T1, T6); + TX = VZMUL(TV, TW); + TZ = VZMUL(T8, TY); + T15 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T13 = VZMUL(T11, T12); + T1g = VSUB(T1c, T1f); + T1E = VADD(T1c, T1f); + Tg = VZMUL(Te, Tf); + Tj = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + Tm = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T1O = VADD(TX, TZ); + T10 = VSUB(TX, TZ); + T16 = VZMUL(T14, T15); + T1F = VSUB(T1D, T1E); + T23 = VADD(T1D, T1E); + Th = VSUB(Tb, Tg); + T1G = VADD(Tb, Tg); + Tk = VZMUL(Ti, Tj); + Tn = VZMUL(Tl, Tm); + } + } + { + V Tr, Tu, TJ, TL, TO; + Tr = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T1R = VADD(T13, T16); + T17 = VSUB(T13, T16); + Tu = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + TJ = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + TL = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + TO = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T1J = VADD(Tk, Tn); + To = VSUB(Tk, Tn); + Ts = VZMUL(Tq, Tr); + TR = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tv = VZMUL(Tt, Tu); + TK = VZMUL(T3, TJ); + TM = VZMUL(Td, TL); + TP = VZMUL(T2, TO); + Ty = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + TB = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + } + } + { + V T1N, Tw, T1H, TN, Tz, TC, T1i, TT, T1K, TS; + T1n = VSUB(Th, To); + Tp = VADD(Th, To); + TS = VZMUL(TQ, TR); + T1N = VADD(Ts, Tv); + Tw = VSUB(Ts, Tv); + T1H = VADD(TK, TM); + TN = VSUB(TK, TM); + Tz = VZMUL(Tx, Ty); + TC = VZMUL(TA, TB); + T18 = VSUB(T10, T17); + T1i = VADD(T10, T17); + TT = VSUB(TP, TS); + T1K = VADD(TP, TS); + T27 = VADD(T1N, T1O); + T1P = VSUB(T1N, T1O); + { + V TD, T1Q, T24, T1h, T25; + TD = VSUB(Tz, TC); + T1Q = VADD(Tz, TC); + T1I = VSUB(T1G, T1H); + T24 = VADD(T1G, T1H); + T1h = VADD(TN, TT); + TU = VSUB(TN, TT); + T25 = VADD(T1J, T1K); + T1L = VSUB(T1J, T1K); + T28 = VADD(T1Q, T1R); + T1S = VSUB(T1Q, T1R); + T1o = VSUB(Tw, TD); + TE = VADD(Tw, TD); + T1l = VSUB(T1h, T1i); + T1j = VADD(T1h, T1i); + T26 = VADD(T24, T25); + T2e = VSUB(T24, T25); + } + } + } + } + { + V T1M, T1Z, T1Y, T1T, T29, T2f, TH, TF, T1k, T1C; + T1M = VADD(T1I, T1L); + T1Z = VSUB(T1I, T1L); + T1Y = VSUB(T1P, T1S); + T1T = VADD(T1P, T1S); + T29 = VADD(T27, T28); + T2f = VSUB(T27, T28); + TH = VSUB(Tp, TE); + TF = VADD(Tp, TE); + T1k = VFNMS(LDK(KP250000000), T1j, T1g); + T1C = VADD(T1g, T1j); + { + V T1W, T2c, TG, T2i, T2g, T22, T20, T1V, T2b, T1U, T2a, T1B; + T19 = VFMA(LDK(KP618033988), T18, TU); + T1u = VFNMS(LDK(KP618033988), TU, T18); + T1W = VSUB(T1M, T1T); + T1U = VADD(T1M, T1T); + T2c = VSUB(T26, T29); + T2a = VADD(T26, T29); + TG = VFNMS(LDK(KP250000000), TF, T7); + T1B = VADD(T7, TF); + T2i = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T2e, T2f)); + T2g = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T2f, T2e)); + T22 = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1Y, T1Z)); + T20 = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1Z, T1Y)); + ST(&(x[WS(rs, 10)]), VADD(T1F, T1U), ms, &(x[0])); + T1V = VFNMS(LDK(KP250000000), T1U, T1F); + ST(&(x[0]), VADD(T23, T2a), ms, &(x[0])); + T2b = VFNMS(LDK(KP250000000), T2a, T23); + ST(&(x[WS(rs, 5)]), VFMAI(T1C, T1B), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFNMSI(T1C, T1B), ms, &(x[WS(rs, 1)])); + T1p = VFMA(LDK(KP618033988), T1o, T1n); + T1x = VFNMS(LDK(KP618033988), T1n, T1o); + { + V T21, T1X, T2h, T2d; + T21 = VFMA(LDK(KP559016994), T1W, T1V); + T1X = VFNMS(LDK(KP559016994), T1W, T1V); + T2h = VFNMS(LDK(KP559016994), T2c, T2b); + T2d = VFMA(LDK(KP559016994), T2c, T2b); + ST(&(x[WS(rs, 18)]), VFMAI(T20, T1X), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFNMSI(T20, T1X), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFNMSI(T22, T21), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFMAI(T22, T21), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VFMAI(T2g, T2d), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFNMSI(T2g, T2d), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFNMSI(T2i, T2h), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFMAI(T2i, T2h), ms, &(x[0])); + T1m = VFMA(LDK(KP559016994), T1l, T1k); + T1w = VFNMS(LDK(KP559016994), T1l, T1k); + T1t = VFNMS(LDK(KP559016994), TH, TG); + TI = VFMA(LDK(KP559016994), TH, TG); + } + } + } + } + } + { + V T1A, T1y, T1q, T1s, T1a, T1r, T1z, T1v; + T1A = VFMA(LDK(KP951056516), T1x, T1w); + T1y = VFNMS(LDK(KP951056516), T1x, T1w); + T1q = VFMA(LDK(KP951056516), T1p, T1m); + T1s = VFNMS(LDK(KP951056516), T1p, T1m); + T1a = VFNMS(LDK(KP951056516), T19, TI); + T1r = VFMA(LDK(KP951056516), T19, TI); + T1z = VFNMS(LDK(KP951056516), T1u, T1t); + T1v = VFMA(LDK(KP951056516), T1u, T1t); + ST(&(x[WS(rs, 9)]), VFMAI(T1s, T1r), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFNMSI(T1s, T1r), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(T1q, T1a), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VFNMSI(T1q, T1a), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 17)]), VFMAI(T1y, T1v), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(T1y, T1v), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFMAI(T1A, T1z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(T1A, T1z), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 19), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 20, XSIMD_STRING("t3bv_20"), twinstr, &GENUS, {92, 72, 46, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3bv_20) (planner *p) { + X(kdft_dit_register) (p, t3bv_20, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 20 -name t3bv_20 -include t3b.h -sign 1 */ + +/* + * This function contains 138 FP additions, 92 FP multiplications, + * (or, 126 additions, 80 multiplications, 12 fused multiply/add), + * 73 stack variables, 4 constants, and 40 memory accesses + */ +#include "t3b.h" + +static void t3bv_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(20, rs)) { + V T2, T8, T9, TA, T3, Tc, T4, TV, T14, Tl, Tq, Tx, TQ, Td, Te; + V T1g, Ti, Tt, T11; + T2 = LDW(&(W[0])); + T8 = LDW(&(W[TWVL * 2])); + T9 = VZMUL(T2, T8); + TA = VZMULJ(T2, T8); + T3 = LDW(&(W[TWVL * 4])); + Tc = VZMULJ(T9, T3); + T4 = VZMUL(T2, T3); + TV = VZMUL(T9, T3); + T14 = VZMULJ(TA, T3); + Tl = VZMULJ(T8, T3); + Tq = VZMULJ(T2, T3); + Tx = VZMUL(T8, T3); + TQ = VZMUL(TA, T3); + Td = LDW(&(W[TWVL * 6])); + Te = VZMULJ(Tc, Td); + T1g = VZMULJ(T9, Td); + Ti = VZMULJ(T8, Td); + Tt = VZMULJ(T2, Td); + T11 = VZMULJ(TA, Td); + { + V T7, T1j, T1U, T2a, TU, T1n, T1o, T18, Tp, TE, TF, T26, T27, T28, T1M; + V T1P, T1W, T1b, T1c, T1k, T23, T24, T25, T1F, T1I, T1V, T1B, T1C; + { + V T1, T1i, T6, T1f, T1h, T5, T1e, T1S, T1T; + T1 = LD(&(x[0]), ms, &(x[0])); + T1h = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T1i = VZMUL(T1g, T1h); + T5 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T6 = VZMUL(T4, T5); + T1e = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T1f = VZMUL(Tc, T1e); + T7 = VSUB(T1, T6); + T1j = VSUB(T1f, T1i); + T1S = VADD(T1, T6); + T1T = VADD(T1f, T1i); + T1U = VSUB(T1S, T1T); + T2a = VADD(T1S, T1T); + } + { + V Th, T1D, T10, T1L, T17, T1O, To, T1G, Tw, T1K, TN, T1E, TT, T1H, TD; + V T1N; + { + V Tb, Tg, Ta, Tf; + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tb = VZMUL(T9, Ta); + Tf = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + Tg = VZMUL(Te, Tf); + Th = VSUB(Tb, Tg); + T1D = VADD(Tb, Tg); + } + { + V TX, TZ, TW, TY; + TW = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + TX = VZMUL(TV, TW); + TY = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + TZ = VZMUL(T8, TY); + T10 = VSUB(TX, TZ); + T1L = VADD(TX, TZ); + } + { + V T13, T16, T12, T15; + T12 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T13 = VZMUL(T11, T12); + T15 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T16 = VZMUL(T14, T15); + T17 = VSUB(T13, T16); + T1O = VADD(T13, T16); + } + { + V Tk, Tn, Tj, Tm; + Tj = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + Tk = VZMUL(Ti, Tj); + Tm = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tn = VZMUL(Tl, Tm); + To = VSUB(Tk, Tn); + T1G = VADD(Tk, Tn); + } + { + V Ts, Tv, Tr, Tu; + Tr = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Ts = VZMUL(Tq, Tr); + Tu = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tv = VZMUL(Tt, Tu); + Tw = VSUB(Ts, Tv); + T1K = VADD(Ts, Tv); + } + { + V TK, TM, TJ, TL; + TJ = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + TK = VZMUL(T3, TJ); + TL = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + TM = VZMUL(Td, TL); + TN = VSUB(TK, TM); + T1E = VADD(TK, TM); + } + { + V TP, TS, TO, TR; + TO = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TP = VZMUL(T2, TO); + TR = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + TS = VZMUL(TQ, TR); + TT = VSUB(TP, TS); + T1H = VADD(TP, TS); + } + { + V Tz, TC, Ty, TB; + Ty = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Tz = VZMUL(Tx, Ty); + TB = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + TC = VZMUL(TA, TB); + TD = VSUB(Tz, TC); + T1N = VADD(Tz, TC); + } + TU = VSUB(TN, TT); + T1n = VSUB(Th, To); + T1o = VSUB(Tw, TD); + T18 = VSUB(T10, T17); + Tp = VADD(Th, To); + TE = VADD(Tw, TD); + TF = VADD(Tp, TE); + T26 = VADD(T1K, T1L); + T27 = VADD(T1N, T1O); + T28 = VADD(T26, T27); + T1M = VSUB(T1K, T1L); + T1P = VSUB(T1N, T1O); + T1W = VADD(T1M, T1P); + T1b = VADD(TN, TT); + T1c = VADD(T10, T17); + T1k = VADD(T1b, T1c); + T23 = VADD(T1D, T1E); + T24 = VADD(T1G, T1H); + T25 = VADD(T23, T24); + T1F = VSUB(T1D, T1E); + T1I = VSUB(T1G, T1H); + T1V = VADD(T1F, T1I); + } + T1B = VADD(T7, TF); + T1C = VBYI(VADD(T1j, T1k)); + ST(&(x[WS(rs, 15)]), VSUB(T1B, T1C), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VADD(T1B, T1C), ms, &(x[WS(rs, 1)])); + { + V T29, T2b, T2c, T2g, T2i, T2e, T2f, T2h, T2d; + T29 = VMUL(LDK(KP559016994), VSUB(T25, T28)); + T2b = VADD(T25, T28); + T2c = VFNMS(LDK(KP250000000), T2b, T2a); + T2e = VSUB(T23, T24); + T2f = VSUB(T26, T27); + T2g = VBYI(VFMA(LDK(KP951056516), T2e, VMUL(LDK(KP587785252), T2f))); + T2i = VBYI(VFNMS(LDK(KP951056516), T2f, VMUL(LDK(KP587785252), T2e))); + ST(&(x[0]), VADD(T2a, T2b), ms, &(x[0])); + T2h = VSUB(T2c, T29); + ST(&(x[WS(rs, 8)]), VSUB(T2h, T2i), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VADD(T2i, T2h), ms, &(x[0])); + T2d = VADD(T29, T2c); + ST(&(x[WS(rs, 4)]), VSUB(T2d, T2g), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VADD(T2g, T2d), ms, &(x[0])); + } + { + V T1Z, T1X, T1Y, T1R, T21, T1J, T1Q, T22, T20; + T1Z = VMUL(LDK(KP559016994), VSUB(T1V, T1W)); + T1X = VADD(T1V, T1W); + T1Y = VFNMS(LDK(KP250000000), T1X, T1U); + T1J = VSUB(T1F, T1I); + T1Q = VSUB(T1M, T1P); + T1R = VBYI(VFNMS(LDK(KP951056516), T1Q, VMUL(LDK(KP587785252), T1J))); + T21 = VBYI(VFMA(LDK(KP951056516), T1J, VMUL(LDK(KP587785252), T1Q))); + ST(&(x[WS(rs, 10)]), VADD(T1U, T1X), ms, &(x[0])); + T22 = VADD(T1Z, T1Y); + ST(&(x[WS(rs, 6)]), VADD(T21, T22), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VSUB(T22, T21), ms, &(x[0])); + T20 = VSUB(T1Y, T1Z); + ST(&(x[WS(rs, 2)]), VADD(T1R, T20), ms, &(x[0])); + ST(&(x[WS(rs, 18)]), VSUB(T20, T1R), ms, &(x[0])); + } + { + V T19, T1p, T1w, T1u, T1m, T1x, TI, T1t; + T19 = VFNMS(LDK(KP951056516), T18, VMUL(LDK(KP587785252), TU)); + T1p = VFNMS(LDK(KP951056516), T1o, VMUL(LDK(KP587785252), T1n)); + T1w = VFMA(LDK(KP951056516), T1n, VMUL(LDK(KP587785252), T1o)); + T1u = VFMA(LDK(KP951056516), TU, VMUL(LDK(KP587785252), T18)); + { + V T1d, T1l, TG, TH; + T1d = VMUL(LDK(KP559016994), VSUB(T1b, T1c)); + T1l = VFNMS(LDK(KP250000000), T1k, T1j); + T1m = VSUB(T1d, T1l); + T1x = VADD(T1d, T1l); + TG = VFNMS(LDK(KP250000000), TF, T7); + TH = VMUL(LDK(KP559016994), VSUB(Tp, TE)); + TI = VSUB(TG, TH); + T1t = VADD(TH, TG); + } + { + V T1a, T1q, T1z, T1A; + T1a = VSUB(TI, T19); + T1q = VBYI(VSUB(T1m, T1p)); + ST(&(x[WS(rs, 17)]), VSUB(T1a, T1q), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(T1a, T1q), ms, &(x[WS(rs, 1)])); + T1z = VADD(T1t, T1u); + T1A = VBYI(VSUB(T1x, T1w)); + ST(&(x[WS(rs, 11)]), VSUB(T1z, T1A), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VADD(T1z, T1A), ms, &(x[WS(rs, 1)])); + } + { + V T1r, T1s, T1v, T1y; + T1r = VADD(TI, T19); + T1s = VBYI(VADD(T1p, T1m)); + ST(&(x[WS(rs, 13)]), VSUB(T1r, T1s), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(T1r, T1s), ms, &(x[WS(rs, 1)])); + T1v = VSUB(T1t, T1u); + T1y = VBYI(VADD(T1w, T1x)); + ST(&(x[WS(rs, 19)]), VSUB(T1v, T1y), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T1v, T1y), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 19), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 20, XSIMD_STRING("t3bv_20"), twinstr, &GENUS, {126, 80, 12, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3bv_20) (planner *p) { + X(kdft_dit_register) (p, t3bv_20, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t3bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t3bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,950 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:50 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 25 -name t3bv_25 -include t3b.h -sign 1 */ + +/* + * This function contains 268 FP additions, 281 FP multiplications, + * (or, 87 additions, 100 multiplications, 181 fused multiply/add), + * 223 stack variables, 67 constants, and 50 memory accesses + */ +#include "t3b.h" + +static void t3bv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP792626838, +0.792626838241819413632131824093538848057784557); + DVK(KP876091699, +0.876091699473550838204498029706869638173524346); + DVK(KP617882369, +0.617882369114440893914546919006756321695042882); + DVK(KP803003575, +0.803003575438660414833440593570376004635464850); + DVK(KP242145790, +0.242145790282157779872542093866183953459003101); + DVK(KP968583161, +0.968583161128631119490168375464735813836012403); + DVK(KP999544308, +0.999544308746292983948881682379742149196758193); + DVK(KP916574801, +0.916574801383451584742370439148878693530976769); + DVK(KP904730450, +0.904730450839922351881287709692877908104763647); + DVK(KP809385824, +0.809385824416008241660603814668679683846476688); + DVK(KP447417479, +0.447417479732227551498980015410057305749330693); + DVK(KP894834959, +0.894834959464455102997960030820114611498661386); + DVK(KP867381224, +0.867381224396525206773171885031575671309956167); + DVK(KP683113946, +0.683113946453479238701949862233725244439656928); + DVK(KP559154169, +0.559154169276087864842202529084232643714075927); + DVK(KP958953096, +0.958953096729998668045963838399037225970891871); + DVK(KP831864738, +0.831864738706457140726048799369896829771167132); + DVK(KP829049696, +0.829049696159252993975487806364305442437946767); + DVK(KP860541664, +0.860541664367944677098261680920518816412804187); + DVK(KP897376177, +0.897376177523557693138608077137219684419427330); + DVK(KP876306680, +0.876306680043863587308115903922062583399064238); + DVK(KP681693190, +0.681693190061530575150324149145440022633095390); + DVK(KP560319534, +0.560319534973832390111614715371676131169633784); + DVK(KP855719849, +0.855719849902058969314654733608091555096772472); + DVK(KP237294955, +0.237294955877110315393888866460840817927895961); + DVK(KP949179823, +0.949179823508441261575555465843363271711583843); + DVK(KP904508497, +0.904508497187473712051146708591409529430077295); + DVK(KP997675361, +0.997675361079556513670859573984492383596555031); + DVK(KP763932022, +0.763932022500210303590826331268723764559381640); + DVK(KP690983005, +0.690983005625052575897706582817180941139845410); + DVK(KP992114701, +0.992114701314477831049793042785778521453036709); + DVK(KP952936919, +0.952936919628306576880750665357914584765951388); + DVK(KP998026728, +0.998026728428271561952336806863450553336905220); + DVK(KP262346850, +0.262346850930607871785420028382979691334784273); + DVK(KP570584518, +0.570584518783621657366766175430996792655723863); + DVK(KP669429328, +0.669429328479476605641803240971985825917022098); + DVK(KP923225144, +0.923225144846402650453449441572664695995209956); + DVK(KP945422727, +0.945422727388575946270360266328811958657216298); + DVK(KP522616830, +0.522616830205754336872861364785224694908468440); + DVK(KP956723877, +0.956723877038460305821989399535483155872969262); + DVK(KP906616052, +0.906616052148196230441134447086066874408359177); + DVK(KP772036680, +0.772036680810363904029489473607579825330539880); + DVK(KP845997307, +0.845997307939530944175097360758058292389769300); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP912575812, +0.912575812670962425556968549836277086778922727); + DVK(KP921078979, +0.921078979742360627699756128143719920817673854); + DVK(KP982009705, +0.982009705009746369461829878184175962711969869); + DVK(KP734762448, +0.734762448793050413546343770063151342619912334); + DVK(KP494780565, +0.494780565770515410344588413655324772219443730); + DVK(KP447533225, +0.447533225982656890041886979663652563063114397); + DVK(KP603558818, +0.603558818296015001454675132653458027918768137); + DVK(KP667278218, +0.667278218140296670899089292254759909713898805); + DVK(KP244189809, +0.244189809627953270309879511234821255780225091); + DVK(KP269969613, +0.269969613759572083574752974412347470060951301); + DVK(KP578046249, +0.578046249379945007321754579646815604023525655); + DVK(KP522847744, +0.522847744331509716623755382187077770911012542); + DVK(KP132830569, +0.132830569247582714407653942074819768844536507); + DVK(KP120146378, +0.120146378570687701782758537356596213647956445); + DVK(KP893101515, +0.893101515366181661711202267938416198338079437); + DVK(KP987388751, +0.987388751065621252324603216482382109400433949); + DVK(KP059835404, +0.059835404262124915169548397419498386427871950); + DVK(KP066152395, +0.066152395967733048213034281011006031460903353); + DVK(KP786782374, +0.786782374965295178365099601674911834788448471); + DVK(KP869845200, +0.869845200362138853122720822420327157933056305); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(25, rs)) { + V T2t, T1Z, T2W, T28, T2Q, T2r, T2g, T2u, T2o, T2l; + { + V T2, T5, T3, T9; + T2 = LDW(&(W[0])); + T5 = LDW(&(W[TWVL * 4])); + T3 = LDW(&(W[TWVL * 2])); + T9 = LDW(&(W[TWVL * 6])); + { + V T2c, T3l, Tn, T49, Tm, T4e, TN, T32, T1d, T3a, T3f, T3z, T3H, T25, T1W; + V T2v, T2D, T4a, T1g, T18, T2Z, T11, T31, TK, T1q, T1j, T1n, T4b, T17; + { + V T1, T1l, Tr, T4, Ty, T1E, Tu, TX, TD, T1h, Tz, T1e, T1I, T1o, TU; + V Tk, T2b, T1B, T1D, T1N, T1F, Td, T2a, T1J; + { + V T7, Tb, TC, Tg, T1L, Ta, T6, Tj, T1A; + T1 = LD(&(x[0]), ms, &(x[0])); + { + V Tf, Ti, Te, Th; + Tf = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + Ti = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tb = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + Te = VZMUL(T2, T5); + TC = VZMULJ(T2, T5); + T1l = VZMUL(T3, T5); + Tr = VZMULJ(T3, T5); + T4 = VZMUL(T2, T3); + Ty = VZMULJ(T2, T3); + T1E = VZMULJ(T2, T9); + Th = VZMULJ(T5, T9); + Tu = VZMULJ(T3, T9); + Tg = VZMUL(Te, Tf); + TX = VZMULJ(Te, T9); + TD = VZMULJ(TC, T9); + T1h = VZMULJ(Ty, T9); + Tz = VZMUL(Ty, T5); + T1e = VZMULJ(Ty, T5); + T1L = VZMULJ(Tr, T9); + Ta = VZMULJ(T4, T9); + T1I = VZMUL(T4, T5); + T6 = VZMULJ(T4, T5); + Tj = VZMUL(Th, Ti); + } + T1A = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1o = VZMULJ(T1e, T9); + { + V Tc, T8, T1C, T1M; + T1C = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T1M = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tc = VZMUL(Ta, Tb); + T8 = VZMUL(T6, T7); + TU = VZMULJ(T6, T9); + Tk = VADD(Tg, Tj); + T2b = VSUB(Tg, Tj); + T1B = VZMUL(T3, T1A); + T1D = VZMUL(TC, T1C); + T1N = VZMUL(T1L, T1M); + T1F = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + Td = VADD(T8, Tc); + T2a = VSUB(T8, Tc); + T1J = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + } + } + { + V Tq, Tt, TF, T1T, T1H, Tw, T1U, T1O, TA, Tp, Ts, TE; + Tp = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Ts = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + TE = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + { + V T1K, Tv, T1G, Tl; + Tv = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + T1G = VZMUL(T1E, T1F); + T2c = VFMA(LDK(KP618033988), T2b, T2a); + T3l = VFNMS(LDK(KP618033988), T2a, T2b); + Tn = VSUB(Td, Tk); + Tl = VADD(Td, Tk); + T1K = VZMUL(T1I, T1J); + Tq = VZMUL(T2, Tp); + Tt = VZMUL(Tr, Ts); + TF = VZMUL(TD, TE); + T1T = VSUB(T1D, T1G); + T1H = VADD(T1D, T1G); + T49 = VADD(T1, Tl); + Tm = VFNMS(LDK(KP250000000), Tl, T1); + Tw = VZMUL(Tu, Tv); + T1U = VSUB(T1K, T1N); + T1O = VADD(T1K, T1N); + TA = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + } + { + V Tx, TL, T1R, T38, T1V, T13, TQ, TZ, TS, T1Q, TV, TG, TM, T12, T1c; + V T16; + T12 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + { + V TP, TY, T1P, TB, TR; + TP = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + TY = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TR = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tx = VADD(Tt, Tw); + TL = VSUB(Tt, Tw); + T1R = VSUB(T1O, T1H); + T1P = VADD(T1H, T1O); + T38 = VFNMS(LDK(KP618033988), T1T, T1U); + T1V = VFMA(LDK(KP618033988), T1U, T1T); + TB = VZMUL(Tz, TA); + T13 = VZMUL(T4, T12); + TQ = VZMUL(T9, TP); + TZ = VZMUL(TX, TY); + TS = VZMUL(T5, TR); + T4e = VADD(T1B, T1P); + T1Q = VFNMS(LDK(KP250000000), T1P, T1B); + TV = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + TG = VADD(TB, TF); + TM = VSUB(TF, TB); + } + T1c = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V T14, TT, TJ, T15, T10, TI, T1p, T1f, T1i, T1m; + T1f = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T14 = VADD(TS, TQ); + TT = VSUB(TQ, TS); + { + V T39, T1S, TW, TH; + T39 = VFMA(LDK(KP559016994), T1R, T1Q); + T1S = VFNMS(LDK(KP559016994), T1R, T1Q); + TW = VZMUL(TU, TV); + TH = VADD(Tx, TG); + TJ = VSUB(Tx, TG); + TN = VFNMS(LDK(KP618033988), TM, TL); + T32 = VFMA(LDK(KP618033988), TL, TM); + T1d = VZMUL(Ty, T1c); + T3a = VFMA(LDK(KP869845200), T39, T38); + T3f = VFNMS(LDK(KP786782374), T38, T39); + T3z = VFMA(LDK(KP066152395), T39, T38); + T3H = VFNMS(LDK(KP059835404), T38, T39); + T25 = VFMA(LDK(KP987388751), T1S, T1V); + T1W = VFNMS(LDK(KP893101515), T1V, T1S); + T2v = VFNMS(LDK(KP120146378), T1V, T1S); + T2D = VFMA(LDK(KP132830569), T1S, T1V); + T15 = VADD(TZ, TW); + T10 = VSUB(TW, TZ); + TI = VFNMS(LDK(KP250000000), TH, Tq); + T4a = VADD(Tq, TH); + T1g = VZMUL(T1e, T1f); + } + T1p = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T1i = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + T1m = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + T18 = VSUB(T14, T15); + T16 = VADD(T14, T15); + T2Z = VFNMS(LDK(KP618033988), TT, T10); + T11 = VFMA(LDK(KP618033988), T10, TT); + T31 = VFNMS(LDK(KP559016994), TJ, TI); + TK = VFMA(LDK(KP559016994), TJ, TI); + T1q = VZMUL(T1o, T1p); + T1j = VZMUL(T1h, T1i); + T1n = VZMUL(T1l, T1m); + } + T4b = VADD(T13, T16); + T17 = VFMS(LDK(KP250000000), T16, T13); + } + } + } + { + V T33, T3i, T3C, T3L, T20, TO, T2y, T2G, T1k, T1w, T1r, T1x, T2Y, T19, T4k; + V T4c; + T33 = VFMA(LDK(KP893101515), T32, T31); + T3i = VFNMS(LDK(KP987388751), T31, T32); + T3C = VFNMS(LDK(KP522847744), T32, T31); + T3L = VFMA(LDK(KP578046249), T31, T32); + T20 = VFMA(LDK(KP269969613), TK, TN); + TO = VFNMS(LDK(KP244189809), TN, TK); + T2y = VFMA(LDK(KP667278218), TK, TN); + T2G = VFNMS(LDK(KP603558818), TN, TK); + T1k = VADD(T1g, T1j); + T1w = VSUB(T1g, T1j); + T1r = VADD(T1n, T1q); + T1x = VSUB(T1q, T1n); + T2Y = VFMA(LDK(KP559016994), T18, T17); + T19 = VFNMS(LDK(KP559016994), T18, T17); + T4k = VSUB(T4a, T4b); + T4c = VADD(T4a, T4b); + { + V T2X, To, T35, T1y, T2H, T2z, T1a, T21, T3t, T34, T3n, T3j, T3E, T3Y, T3M; + V T3R, T1v, T36, T4l, T4f, T1u, T1s; + T2X = VFNMS(LDK(KP559016994), Tn, Tm); + To = VFMA(LDK(KP559016994), Tn, Tm); + T1u = VSUB(T1r, T1k); + T1s = VADD(T1k, T1r); + T35 = VFMA(LDK(KP618033988), T1w, T1x); + T1y = VFNMS(LDK(KP618033988), T1x, T1w); + { + V T3K, T30, T3h, T3D, T4d, T1t; + T3K = VFMA(LDK(KP447533225), T2Z, T2Y); + T30 = VFMA(LDK(KP120146378), T2Z, T2Y); + T3h = VFNMS(LDK(KP132830569), T2Y, T2Z); + T3D = VFNMS(LDK(KP494780565), T2Y, T2Z); + T2H = VFNMS(LDK(KP786782374), T11, T19); + T2z = VFMA(LDK(KP869845200), T19, T11); + T1a = VFNMS(LDK(KP667278218), T19, T11); + T21 = VFMA(LDK(KP603558818), T11, T19); + T4d = VADD(T1d, T1s); + T1t = VFNMS(LDK(KP250000000), T1s, T1d); + T3t = VFNMS(LDK(KP734762448), T33, T30); + T34 = VFMA(LDK(KP734762448), T33, T30); + T3n = VFMA(LDK(KP734762448), T3i, T3h); + T3j = VFNMS(LDK(KP734762448), T3i, T3h); + T3E = VFNMS(LDK(KP982009705), T3D, T3C); + T3Y = VFMA(LDK(KP982009705), T3D, T3C); + T3M = VFNMS(LDK(KP921078979), T3L, T3K); + T3R = VFMA(LDK(KP921078979), T3L, T3K); + T1v = VFNMS(LDK(KP559016994), T1u, T1t); + T36 = VFMA(LDK(KP559016994), T1u, T1t); + T4l = VSUB(T4d, T4e); + T4f = VADD(T4d, T4e); + } + { + V T2L, T2R, T2j, T2q, T2J, T2B, T2e, T26, T2U, T1Y, T23, T2O; + { + V T2I, T24, T2w, T2E, T48, T42, T3y, T3s, T3V, T45, T2A, T1b, T2h, T2i, T1X; + T2L = VFNMS(LDK(KP912575812), T2H, T2G); + T2I = VFMA(LDK(KP912575812), T2H, T2G); + { + V T3A, T3e, T37, T3I, T1z; + T3A = VFNMS(LDK(KP667278218), T36, T35); + T3e = VFNMS(LDK(KP059835404), T35, T36); + T37 = VFMA(LDK(KP066152395), T36, T35); + T3I = VFMA(LDK(KP603558818), T35, T36); + T24 = VFMA(LDK(KP578046249), T1v, T1y); + T1z = VFNMS(LDK(KP522847744), T1y, T1v); + T2w = VFNMS(LDK(KP494780565), T1v, T1y); + T2E = VFMA(LDK(KP447533225), T1y, T1v); + { + V T4i, T4g, T4o, T4m; + T4i = VSUB(T4c, T4f); + T4g = VADD(T4c, T4f); + T4o = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T4k, T4l)); + T4m = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T4l, T4k)); + { + V T3Q, T3J, T3b, T3u; + T3Q = VFNMS(LDK(KP845997307), T3I, T3H); + T3J = VFMA(LDK(KP845997307), T3I, T3H); + T3b = VFNMS(LDK(KP772036680), T3a, T37); + T3u = VFMA(LDK(KP772036680), T3a, T37); + { + V T3o, T3g, T3B, T3X, T4h; + T3o = VFNMS(LDK(KP772036680), T3f, T3e); + T3g = VFMA(LDK(KP772036680), T3f, T3e); + T3B = VFNMS(LDK(KP845997307), T3A, T3z); + T3X = VFMA(LDK(KP845997307), T3A, T3z); + ST(&(x[0]), VADD(T4g, T49), ms, &(x[0])); + T4h = VFNMS(LDK(KP250000000), T4g, T49); + { + V T40, T3N, T3c, T3v; + T40 = VFMA(LDK(KP906616052), T3M, T3J); + T3N = VFNMS(LDK(KP906616052), T3M, T3J); + T3c = VFMA(LDK(KP956723877), T3b, T34); + T3v = VFMA(LDK(KP522616830), T3j, T3u); + { + V T3p, T3k, T3S, T3F; + T3p = VFNMS(LDK(KP522616830), T34, T3o); + T3k = VFMA(LDK(KP945422727), T3j, T3g); + T3S = VFNMS(LDK(KP923225144), T3E, T3B); + T3F = VFMA(LDK(KP923225144), T3E, T3B); + { + V T46, T3Z, T4j, T4n; + T46 = VFNMS(LDK(KP669429328), T3X, T3Y); + T3Z = VFMA(LDK(KP570584518), T3Y, T3X); + T4j = VFMA(LDK(KP559016994), T4i, T4h); + T4n = VFNMS(LDK(KP559016994), T4i, T4h); + { + V T3W, T3O, T3d, T3w; + T3W = VFMA(LDK(KP262346850), T3N, T3l); + T3O = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T3l, T3N)); + T3d = VFMA(LDK(KP992114701), T3c, T2X); + T3w = VFNMS(LDK(KP690983005), T3v, T3g); + { + V T3q, T3m, T3T, T43; + T3q = VFMA(LDK(KP763932022), T3p, T3b); + T3m = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T3l, T3k)); + T3T = VFNMS(LDK(KP997675361), T3S, T3R); + T43 = VFNMS(LDK(KP904508497), T3S, T3Q); + { + V T3G, T3P, T47, T41; + T3G = VFMA(LDK(KP949179823), T3F, T2X); + T3P = VFNMS(LDK(KP237294955), T3F, T2X); + T47 = VFNMS(LDK(KP669429328), T40, T46); + T41 = VFMA(LDK(KP618033988), T40, T3Z); + ST(&(x[WS(rs, 20)]), VFNMSI(T4m, T4j), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFMAI(T4m, T4j), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFMAI(T4o, T4n), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 10)]), VFNMSI(T4o, T4n), ms, &(x[0])); + { + V T3x, T3r, T3U, T44; + T3x = VFMA(LDK(KP855719849), T3w, T3t); + T3r = VFNMS(LDK(KP855719849), T3q, T3n); + ST(&(x[WS(rs, 3)]), VFMAI(T3m, T3d), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 22)]), VFNMSI(T3m, T3d), ms, &(x[0])); + T3U = VFMA(LDK(KP560319534), T3T, T3Q); + T44 = VFNMS(LDK(KP681693190), T43, T3R); + ST(&(x[WS(rs, 2)]), VFMAI(T3O, T3G), ms, &(x[0])); + ST(&(x[WS(rs, 23)]), VFNMSI(T3O, T3G), ms, &(x[WS(rs, 1)])); + T48 = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T47, T3W)); + T42 = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T41, T3W)); + T3y = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T3x, T3l)); + T3s = VFMA(LDK(KP897376177), T3r, T2X); + T3V = VFNMS(LDK(KP949179823), T3U, T3P); + T45 = VFNMS(LDK(KP860541664), T44, T3P); + T2R = VFNMS(LDK(KP912575812), T2z, T2y); + T2A = VFMA(LDK(KP912575812), T2z, T2y); + T1b = VFMA(LDK(KP829049696), T1a, TO); + T2h = VFNMS(LDK(KP829049696), T1a, TO); + T2i = VFNMS(LDK(KP831864738), T1W, T1z); + T1X = VFMA(LDK(KP831864738), T1W, T1z); + } + } + } + } + } + } + } + } + } + } + } + { + V T2M, T2F, T2x, T2S, T2T, T2N; + T2M = VFNMS(LDK(KP958953096), T2E, T2D); + T2F = VFMA(LDK(KP958953096), T2E, T2D); + ST(&(x[WS(rs, 17)]), VFNMSI(T3y, T3s), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 8)]), VFMAI(T3y, T3s), ms, &(x[0])); + ST(&(x[WS(rs, 13)]), VFMAI(T42, T3V), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 12)]), VFNMSI(T42, T3V), ms, &(x[0])); + ST(&(x[WS(rs, 7)]), VFNMSI(T48, T45), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 18)]), VFMAI(T48, T45), ms, &(x[0])); + T2j = VFMA(LDK(KP559154169), T2i, T2h); + T2q = VFNMS(LDK(KP683113946), T2h, T2i); + T2x = VFNMS(LDK(KP867381224), T2w, T2v); + T2S = VFMA(LDK(KP867381224), T2w, T2v); + T2J = VFMA(LDK(KP894834959), T2I, T2F); + T2T = VFMA(LDK(KP447417479), T2I, T2S); + T2B = VFNMS(LDK(KP809385824), T2A, T2x); + T2N = VFMA(LDK(KP447417479), T2A, T2M); + T2e = VFMA(LDK(KP831864738), T25, T24); + T26 = VFNMS(LDK(KP831864738), T25, T24); + T2U = VFNMS(LDK(KP763932022), T2T, T2F); + T1Y = VFMA(LDK(KP904730450), T1X, T1b); + T23 = VFNMS(LDK(KP904730450), T1X, T1b); + T2O = VFMA(LDK(KP690983005), T2N, T2x); + } + } + { + V T2C, T22, T2d, T2K; + T2C = VFNMS(LDK(KP992114701), T2B, To); + T22 = VFMA(LDK(KP916574801), T21, T20); + T2d = VFNMS(LDK(KP916574801), T21, T20); + T2K = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2J, T2c)); + { + V T27, T2P, T2f, T2k, T2n, T2V; + T2V = VFNMS(LDK(KP999544308), T2U, T2R); + T27 = VFNMS(LDK(KP904730450), T26, T23); + T2t = VFMA(LDK(KP968583161), T1Y, To); + T1Z = VFNMS(LDK(KP242145790), T1Y, To); + T2P = VFNMS(LDK(KP999544308), T2O, T2L); + T2f = VFMA(LDK(KP904730450), T2e, T2d); + T2k = VFNMS(LDK(KP904730450), T2e, T2d); + T2n = VADD(T22, T23); + ST(&(x[WS(rs, 21)]), VFMAI(T2K, T2C), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFNMSI(T2K, T2C), ms, &(x[0])); + T2W = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T2V, T2c)); + T28 = VFNMS(LDK(KP618033988), T27, T22); + T2Q = VFNMS(LDK(KP803003575), T2P, To); + T2r = VFMA(LDK(KP617882369), T2k, T2q); + T2g = VFNMS(LDK(KP242145790), T2f, T2c); + T2u = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T2f, T2c)); + T2o = VFNMS(LDK(KP683113946), T2n, T26); + T2l = VFMA(LDK(KP559016994), T2k, T2j); + } + } + } + } + } + } + } + { + V T29, T2s, T2p, T2m; + T29 = VFNMS(LDK(KP876091699), T28, T1Z); + ST(&(x[WS(rs, 16)]), VFMAI(T2W, T2Q), ms, &(x[0])); + ST(&(x[WS(rs, 9)]), VFNMSI(T2W, T2Q), ms, &(x[WS(rs, 1)])); + T2s = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T2r, T2g)); + ST(&(x[WS(rs, 24)]), VFNMSI(T2u, T2t), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(T2u, T2t), ms, &(x[WS(rs, 1)])); + T2p = VFMA(LDK(KP792626838), T2o, T1Z); + T2m = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T2l, T2g)); + ST(&(x[WS(rs, 11)]), VFMAI(T2s, T2p), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 14)]), VFNMSI(T2s, T2p), ms, &(x[0])); + ST(&(x[WS(rs, 19)]), VFNMSI(T2m, T29), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VFMAI(T2m, T29), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 24), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 25, XSIMD_STRING("t3bv_25"), twinstr, &GENUS, {87, 100, 181, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3bv_25) (planner *p) { + X(kdft_dit_register) (p, t3bv_25, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 25 -name t3bv_25 -include t3b.h -sign 1 */ + +/* + * This function contains 268 FP additions, 228 FP multiplications, + * (or, 191 additions, 151 multiplications, 77 fused multiply/add), + * 124 stack variables, 40 constants, and 50 memory accesses + */ +#include "t3b.h" + +static void t3bv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP497379774, +0.497379774329709576484567492012895936835134813); + DVK(KP968583161, +0.968583161128631119490168375464735813836012403); + DVK(KP248689887, +0.248689887164854788242283746006447968417567406); + DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806); + DVK(KP809016994, +0.809016994374947424102293417182819058860154590); + DVK(KP309016994, +0.309016994374947424102293417182819058860154590); + DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483); + DVK(KP535826794, +0.535826794978996618271308767867639978063575346); + DVK(KP425779291, +0.425779291565072648862502445744251703979973042); + DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822); + DVK(KP963507348, +0.963507348203430549974383005744259307057084020); + DVK(KP876306680, +0.876306680043863587308115903922062583399064238); + DVK(KP844327925, +0.844327925502015078548558063966681505381659241); + DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691); + DVK(KP481753674, +0.481753674101715274987191502872129653528542010); + DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477); + DVK(KP851558583, +0.851558583130145297725004891488503407959946084); + DVK(KP904827052, +0.904827052466019527713668647932697593970413911); + DVK(KP125333233, +0.125333233564304245373118759816508793942918247); + DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418); + DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328); + DVK(KP684547105, +0.684547105928688673732283357621209269889519233); + DVK(KP637423989, +0.637423989748689710176712811676016195434917298); + DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344); + DVK(KP062790519, +0.062790519529313376076178224565631133122484832); + DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439); + DVK(KP770513242, +0.770513242775789230803009636396177847271667672); + DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596); + DVK(KP125581039, +0.125581039058626752152356449131262266244969664); + DVK(KP998026728, +0.998026728428271561952336806863450553336905220); + DVK(KP992114701, +0.992114701314477831049793042785778521453036709); + DVK(KP250666467, +0.250666467128608490746237519633017587885836494); + DVK(KP728968627, +0.728968627421411523146730319055259111372571664); + DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465); + DVK(KP293892626, +0.293892626146236564584352977319536384298826219); + DVK(KP475528258, +0.475528258147576786058219666689691071702849317); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(25, rs)) { + V T1, Td, T8, T9, TF, Te, Tu, TB, TC, T1s, T15, Tf, TY, T4, Ta; + V Tx, T1T, Tg, T1N, T1v, T18, TG, T1o, T11; + T1 = LDW(&(W[TWVL * 4])); + Td = LDW(&(W[TWVL * 2])); + T8 = LDW(&(W[0])); + T9 = VZMUL(T8, T1); + TF = VZMULJ(T8, T1); + Te = VZMUL(T8, Td); + Tu = VZMULJ(Td, T1); + TB = VZMULJ(T8, Td); + TC = VZMUL(TB, T1); + T1s = VZMUL(Te, T1); + T15 = VZMUL(Td, T1); + Tf = VZMULJ(Te, T1); + TY = VZMULJ(TB, T1); + T4 = LDW(&(W[TWVL * 6])); + Ta = VZMULJ(T9, T4); + Tx = VZMULJ(Td, T4); + T1T = VZMULJ(T1, T4); + Tg = VZMULJ(Tf, T4); + T1N = VZMULJ(Te, T4); + T1v = VZMULJ(Tu, T4); + T18 = VZMULJ(TY, T4); + TG = VZMULJ(TF, T4); + T1o = VZMULJ(T8, T4); + T11 = VZMULJ(TB, T4); + { + V T1Y, T1X, T2f, T2g, T1Z, T20, T2e, T39, T1H, T2T, T1E, T3C, T2S, Tk, T2G; + V Ts, T3z, T2F, TK, T2I, TS, T3y, T2J, T1k, T2Q, T1h, T3B, T2P; + { + V T1S, T1V, T1W, T1M, T1P, T1Q, T2d; + T1Y = LD(&(x[0]), ms, &(x[0])); + { + V T1R, T1U, T1L, T1O; + T1R = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T1S = VZMUL(T9, T1R); + T1U = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T1V = VZMUL(T1T, T1U); + T1W = VADD(T1S, T1V); + T1L = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T1M = VZMUL(Tf, T1L); + T1O = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + T1P = VZMUL(T1N, T1O); + T1Q = VADD(T1M, T1P); + } + T1X = VMUL(LDK(KP559016994), VSUB(T1Q, T1W)); + T2f = VSUB(T1S, T1V); + T2g = VMUL(LDK(KP587785252), T2f); + T1Z = VADD(T1Q, T1W); + T20 = VFNMS(LDK(KP250000000), T1Z, T1Y); + T2d = VSUB(T1M, T1P); + T2e = VMUL(LDK(KP951056516), T2d); + T39 = VMUL(LDK(KP587785252), T2d); + } + { + V T1B, T1u, T1x, T1y, T1n, T1q, T1r, T1A; + T1A = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1B = VZMUL(Td, T1A); + { + V T1t, T1w, T1m, T1p; + T1t = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T1u = VZMUL(T1s, T1t); + T1w = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + T1x = VZMUL(T1v, T1w); + T1y = VADD(T1u, T1x); + T1m = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T1n = VZMUL(TF, T1m); + T1p = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T1q = VZMUL(T1o, T1p); + T1r = VADD(T1n, T1q); + } + { + V T1F, T1G, T1z, T1C, T1D; + T1F = VSUB(T1n, T1q); + T1G = VSUB(T1u, T1x); + T1H = VFMA(LDK(KP475528258), T1F, VMUL(LDK(KP293892626), T1G)); + T2T = VFNMS(LDK(KP475528258), T1G, VMUL(LDK(KP293892626), T1F)); + T1z = VMUL(LDK(KP559016994), VSUB(T1r, T1y)); + T1C = VADD(T1r, T1y); + T1D = VFNMS(LDK(KP250000000), T1C, T1B); + T1E = VADD(T1z, T1D); + T3C = VADD(T1B, T1C); + T2S = VSUB(T1D, T1z); + } + } + { + V Tp, Tc, Ti, Tm, T3, T6, Tl, To; + To = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tp = VZMUL(Te, To); + { + V Tb, Th, T2, T5; + Tb = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + Tc = VZMUL(Ta, Tb); + Th = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + Ti = VZMUL(Tg, Th); + Tm = VADD(Tc, Ti); + T2 = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T3 = VZMUL(T1, T2); + T5 = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T6 = VZMUL(T4, T5); + Tl = VADD(T3, T6); + } + { + V T7, Tj, Tn, Tq, Tr; + T7 = VSUB(T3, T6); + Tj = VSUB(Tc, Ti); + Tk = VFMA(LDK(KP475528258), T7, VMUL(LDK(KP293892626), Tj)); + T2G = VFNMS(LDK(KP475528258), Tj, VMUL(LDK(KP293892626), T7)); + Tn = VMUL(LDK(KP559016994), VSUB(Tl, Tm)); + Tq = VADD(Tl, Tm); + Tr = VFNMS(LDK(KP250000000), Tq, Tp); + Ts = VADD(Tn, Tr); + T3z = VADD(Tp, Tq); + T2F = VSUB(Tr, Tn); + } + } + { + V TP, TE, TI, TM, Tw, Tz, TL, TO; + TO = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TP = VZMUL(T8, TO); + { + V TD, TH, Tv, Ty; + TD = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + TE = VZMUL(TC, TD); + TH = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + TI = VZMUL(TG, TH); + TM = VADD(TE, TI); + Tv = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tw = VZMUL(Tu, Tv); + Ty = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + Tz = VZMUL(Tx, Ty); + TL = VADD(Tw, Tz); + } + { + V TA, TJ, TN, TQ, TR; + TA = VSUB(Tw, Tz); + TJ = VSUB(TE, TI); + TK = VFMA(LDK(KP475528258), TA, VMUL(LDK(KP293892626), TJ)); + T2I = VFNMS(LDK(KP475528258), TJ, VMUL(LDK(KP293892626), TA)); + TN = VMUL(LDK(KP559016994), VSUB(TL, TM)); + TQ = VADD(TL, TM); + TR = VFNMS(LDK(KP250000000), TQ, TP); + TS = VADD(TN, TR); + T3y = VADD(TP, TQ); + T2J = VSUB(TR, TN); + } + } + { + V T1e, T17, T1a, T1b, T10, T13, T14, T1d; + T1d = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T1e = VZMUL(TB, T1d); + { + V T16, T19, TZ, T12; + T16 = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + T17 = VZMUL(T15, T16); + T19 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T1a = VZMUL(T18, T19); + T1b = VADD(T17, T1a); + TZ = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T10 = VZMUL(TY, TZ); + T12 = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + T13 = VZMUL(T11, T12); + T14 = VADD(T10, T13); + } + { + V T1i, T1j, T1c, T1f, T1g; + T1i = VSUB(T10, T13); + T1j = VSUB(T17, T1a); + T1k = VFMA(LDK(KP475528258), T1i, VMUL(LDK(KP293892626), T1j)); + T2Q = VFNMS(LDK(KP475528258), T1j, VMUL(LDK(KP293892626), T1i)); + T1c = VMUL(LDK(KP559016994), VSUB(T14, T1b)); + T1f = VADD(T14, T1b); + T1g = VFNMS(LDK(KP250000000), T1f, T1e); + T1h = VADD(T1c, T1g); + T3B = VADD(T1e, T1f); + T2P = VSUB(T1g, T1c); + } + } + { + V T3E, T3M, T3I, T3J, T3H, T3K, T3N, T3L; + { + V T3A, T3D, T3F, T3G; + T3A = VSUB(T3y, T3z); + T3D = VSUB(T3B, T3C); + T3E = VBYI(VFMA(LDK(KP951056516), T3A, VMUL(LDK(KP587785252), T3D))); + T3M = VBYI(VFNMS(LDK(KP951056516), T3D, VMUL(LDK(KP587785252), T3A))); + T3I = VADD(T1Y, T1Z); + T3F = VADD(T3y, T3z); + T3G = VADD(T3B, T3C); + T3J = VADD(T3F, T3G); + T3H = VMUL(LDK(KP559016994), VSUB(T3F, T3G)); + T3K = VFNMS(LDK(KP250000000), T3J, T3I); + } + ST(&(x[0]), VADD(T3I, T3J), ms, &(x[0])); + T3N = VSUB(T3K, T3H); + ST(&(x[WS(rs, 10)]), VADD(T3M, T3N), ms, &(x[0])); + ST(&(x[WS(rs, 15)]), VSUB(T3N, T3M), ms, &(x[WS(rs, 1)])); + T3L = VADD(T3H, T3K); + ST(&(x[WS(rs, 5)]), VADD(T3E, T3L), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 20)]), VSUB(T3L, T3E), ms, &(x[0])); + } + { + V T2X, T3a, T3i, T3j, T3k, T3s, T3t, T3u, T3l, T3m, T3n, T3p, T3q, T3r, T2L; + V T3b, T32, T38, T2W, T35, T2Y, T34, T3w, T3x; + T2X = VSUB(T20, T1X); + T3a = VFNMS(LDK(KP951056516), T2f, T39); + T3i = VFMA(LDK(KP1_369094211), T2I, VMUL(LDK(KP728968627), T2J)); + T3j = VFNMS(LDK(KP992114701), T2F, VMUL(LDK(KP250666467), T2G)); + T3k = VADD(T3i, T3j); + T3s = VFNMS(LDK(KP125581039), T2Q, VMUL(LDK(KP998026728), T2P)); + T3t = VFMA(LDK(KP1_274847979), T2T, VMUL(LDK(KP770513242), T2S)); + T3u = VADD(T3s, T3t); + T3l = VFMA(LDK(KP1_996053456), T2Q, VMUL(LDK(KP062790519), T2P)); + T3m = VFNMS(LDK(KP637423989), T2S, VMUL(LDK(KP1_541026485), T2T)); + T3n = VADD(T3l, T3m); + T3p = VFNMS(LDK(KP1_457937254), T2I, VMUL(LDK(KP684547105), T2J)); + T3q = VFMA(LDK(KP1_984229402), T2G, VMUL(LDK(KP125333233), T2F)); + T3r = VADD(T3p, T3q); + { + V T2H, T2K, T36, T30, T31, T37; + T2H = VFNMS(LDK(KP851558583), T2G, VMUL(LDK(KP904827052), T2F)); + T2K = VFMA(LDK(KP1_752613360), T2I, VMUL(LDK(KP481753674), T2J)); + T36 = VADD(T2K, T2H); + T30 = VFMA(LDK(KP1_071653589), T2Q, VMUL(LDK(KP844327925), T2P)); + T31 = VFMA(LDK(KP125581039), T2T, VMUL(LDK(KP998026728), T2S)); + T37 = VADD(T30, T31); + T2L = VSUB(T2H, T2K); + T3b = VADD(T36, T37); + T32 = VSUB(T30, T31); + T38 = VMUL(LDK(KP559016994), VSUB(T36, T37)); + } + { + V T2M, T2N, T2O, T2R, T2U, T2V; + T2M = VFNMS(LDK(KP963507348), T2I, VMUL(LDK(KP876306680), T2J)); + T2N = VFMA(LDK(KP1_809654104), T2G, VMUL(LDK(KP425779291), T2F)); + T2O = VSUB(T2M, T2N); + T2R = VFNMS(LDK(KP1_688655851), T2Q, VMUL(LDK(KP535826794), T2P)); + T2U = VFNMS(LDK(KP1_996053456), T2T, VMUL(LDK(KP062790519), T2S)); + T2V = VADD(T2R, T2U); + T2W = VMUL(LDK(KP559016994), VSUB(T2O, T2V)); + T35 = VSUB(T2R, T2U); + T2Y = VADD(T2O, T2V); + T34 = VADD(T2M, T2N); + } + { + V T3g, T3h, T3o, T3v; + T3g = VADD(T2X, T2Y); + T3h = VBYI(VADD(T3a, T3b)); + ST(&(x[WS(rs, 23)]), VSUB(T3g, T3h), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VADD(T3g, T3h), ms, &(x[0])); + T3o = VADD(T2X, VADD(T3k, T3n)); + T3v = VBYI(VSUB(VADD(T3r, T3u), T3a)); + ST(&(x[WS(rs, 22)]), VSUB(T3o, T3v), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VADD(T3o, T3v), ms, &(x[WS(rs, 1)])); + } + T3w = VBYI(VSUB(VFMA(LDK(KP951056516), VSUB(T3i, T3j), VFMA(LDK(KP309016994), T3r, VFNMS(LDK(KP809016994), T3u, VMUL(LDK(KP587785252), VSUB(T3l, T3m))))), T3a)); + T3x = VFMA(LDK(KP309016994), T3k, VFMA(LDK(KP951056516), VSUB(T3q, T3p), VFMA(LDK(KP587785252), VSUB(T3t, T3s), VFNMS(LDK(KP809016994), T3n, T2X)))); + ST(&(x[WS(rs, 8)]), VADD(T3w, T3x), ms, &(x[0])); + ST(&(x[WS(rs, 17)]), VSUB(T3x, T3w), ms, &(x[WS(rs, 1)])); + { + V T33, T3e, T3d, T3f, T2Z, T3c; + T2Z = VFNMS(LDK(KP250000000), T2Y, T2X); + T33 = VFMA(LDK(KP951056516), T2L, VADD(T2W, VFNMS(LDK(KP587785252), T32, T2Z))); + T3e = VFMA(LDK(KP587785252), T2L, VFMA(LDK(KP951056516), T32, VSUB(T2Z, T2W))); + T3c = VFNMS(LDK(KP250000000), T3b, T3a); + T3d = VBYI(VADD(VFMA(LDK(KP951056516), T34, VMUL(LDK(KP587785252), T35)), VADD(T38, T3c))); + T3f = VBYI(VADD(VFNMS(LDK(KP951056516), T35, VMUL(LDK(KP587785252), T34)), VSUB(T3c, T38))); + ST(&(x[WS(rs, 18)]), VSUB(T33, T3d), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VADD(T3e, T3f), ms, &(x[0])); + ST(&(x[WS(rs, 7)]), VADD(T33, T3d), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VSUB(T3e, T3f), ms, &(x[WS(rs, 1)])); + } + } + { + V T21, T2h, T2p, T2q, T2r, T2z, T2A, T2B, T2s, T2t, T2u, T2w, T2x, T2y, TU; + V T2i, T26, T2c, T1K, T29, T22, T28, T2D, T2E; + T21 = VADD(T1X, T20); + T2h = VADD(T2e, T2g); + T2p = VFMA(LDK(KP1_688655851), TK, VMUL(LDK(KP535826794), TS)); + T2q = VFMA(LDK(KP1_541026485), Tk, VMUL(LDK(KP637423989), Ts)); + T2r = VSUB(T2p, T2q); + T2z = VFMA(LDK(KP851558583), T1k, VMUL(LDK(KP904827052), T1h)); + T2A = VFMA(LDK(KP1_984229402), T1H, VMUL(LDK(KP125333233), T1E)); + T2B = VADD(T2z, T2A); + T2s = VFNMS(LDK(KP425779291), T1h, VMUL(LDK(KP1_809654104), T1k)); + T2t = VFNMS(LDK(KP992114701), T1E, VMUL(LDK(KP250666467), T1H)); + T2u = VADD(T2s, T2t); + T2w = VFNMS(LDK(KP1_071653589), TK, VMUL(LDK(KP844327925), TS)); + T2x = VFNMS(LDK(KP770513242), Ts, VMUL(LDK(KP1_274847979), Tk)); + T2y = VADD(T2w, T2x); + { + V Tt, TT, T2a, T24, T25, T2b; + Tt = VFMA(LDK(KP1_071653589), Tk, VMUL(LDK(KP844327925), Ts)); + TT = VFMA(LDK(KP1_937166322), TK, VMUL(LDK(KP248689887), TS)); + T2a = VADD(TT, Tt); + T24 = VFMA(LDK(KP1_752613360), T1k, VMUL(LDK(KP481753674), T1h)); + T25 = VFMA(LDK(KP1_457937254), T1H, VMUL(LDK(KP684547105), T1E)); + T2b = VADD(T24, T25); + TU = VSUB(Tt, TT); + T2i = VADD(T2a, T2b); + T26 = VSUB(T24, T25); + T2c = VMUL(LDK(KP559016994), VSUB(T2a, T2b)); + } + { + V TV, TW, TX, T1l, T1I, T1J; + TV = VFNMS(LDK(KP497379774), TK, VMUL(LDK(KP968583161), TS)); + TW = VFNMS(LDK(KP1_688655851), Tk, VMUL(LDK(KP535826794), Ts)); + TX = VADD(TV, TW); + T1l = VFNMS(LDK(KP963507348), T1k, VMUL(LDK(KP876306680), T1h)); + T1I = VFNMS(LDK(KP1_369094211), T1H, VMUL(LDK(KP728968627), T1E)); + T1J = VADD(T1l, T1I); + T1K = VMUL(LDK(KP559016994), VSUB(TX, T1J)); + T29 = VSUB(T1l, T1I); + T22 = VADD(TX, T1J); + T28 = VSUB(TV, TW); + } + { + V T2n, T2o, T2v, T2C; + T2n = VADD(T21, T22); + T2o = VBYI(VADD(T2h, T2i)); + ST(&(x[WS(rs, 24)]), VSUB(T2n, T2o), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VADD(T2n, T2o), ms, &(x[WS(rs, 1)])); + T2v = VADD(T21, VADD(T2r, T2u)); + T2C = VBYI(VSUB(VADD(T2y, T2B), T2h)); + ST(&(x[WS(rs, 21)]), VSUB(T2v, T2C), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VADD(T2v, T2C), ms, &(x[0])); + } + T2D = VBYI(VSUB(VFMA(LDK(KP309016994), T2y, VFMA(LDK(KP951056516), VADD(T2p, T2q), VFNMS(LDK(KP809016994), T2B, VMUL(LDK(KP587785252), VSUB(T2s, T2t))))), T2h)); + T2E = VFMA(LDK(KP951056516), VSUB(T2x, T2w), VFMA(LDK(KP309016994), T2r, VFMA(LDK(KP587785252), VSUB(T2A, T2z), VFNMS(LDK(KP809016994), T2u, T21)))); + ST(&(x[WS(rs, 9)]), VADD(T2D, T2E), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 16)]), VSUB(T2E, T2D), ms, &(x[0])); + { + V T27, T2l, T2k, T2m, T23, T2j; + T23 = VFNMS(LDK(KP250000000), T22, T21); + T27 = VFMA(LDK(KP951056516), TU, VADD(T1K, VFNMS(LDK(KP587785252), T26, T23))); + T2l = VFMA(LDK(KP587785252), TU, VFMA(LDK(KP951056516), T26, VSUB(T23, T1K))); + T2j = VFNMS(LDK(KP250000000), T2i, T2h); + T2k = VBYI(VADD(VFMA(LDK(KP951056516), T28, VMUL(LDK(KP587785252), T29)), VADD(T2c, T2j))); + T2m = VBYI(VADD(VFNMS(LDK(KP951056516), T29, VMUL(LDK(KP587785252), T28)), VSUB(T2j, T2c))); + ST(&(x[WS(rs, 19)]), VSUB(T27, T2k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VADD(T2l, T2m), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VADD(T27, T2k), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VSUB(T2l, T2m), ms, &(x[0])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 24), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 25, XSIMD_STRING("t3bv_25"), twinstr, &GENUS, {191, 151, 77, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3bv_25) (planner *p) { + X(kdft_dit_register) (p, t3bv_25, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t3bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t3bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,883 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 32 -name t3bv_32 -include t3b.h -sign 1 */ + +/* + * This function contains 244 FP additions, 214 FP multiplications, + * (or, 146 additions, 116 multiplications, 98 fused multiply/add), + * 120 stack variables, 7 constants, and 64 memory accesses + */ +#include "t3b.h" + +static void t3bv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(32, rs)) { + V T2B, T2A, T2F, T2N, T2H, T2z, T2P, T2L, T2C, T2M; + { + V T2, T5, T3, T7; + T2 = LDW(&(W[0])); + T5 = LDW(&(W[TWVL * 4])); + T3 = LDW(&(W[TWVL * 2])); + T7 = LDW(&(W[TWVL * 6])); + { + V T24, Tb, T3x, T2T, T3K, T2W, T25, Tr, T3z, T3j, T28, TX, T3y, T3g, T27; + V TG, T37, T3F, T3G, T3a, T2Y, T15, T1p, T2Z, T2w, T1V, T2v, T1N, T32, T1h; + V T17, T1a; + { + V T1, Tz, TT, T4, TC, Tv, T12, T1D, T1w, T18, T1t, T1O, TK, TP, T1c; + V T1m, Tf, T6, Te, TL, TQ, T2S, Tp, TU, Ti, Ta, TM, TR, Tm, TJ; + V T22, T9, T1Z; + T1 = LD(&(x[0]), ms, &(x[0])); + T22 = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T9 = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T1Z = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + { + V Tn, TH, Tk, To, Th, Tg, T8, Tl, T20, T23, TI; + { + V Td, T1C, Tc, T21; + Td = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tz = VZMUL(T2, T5); + T1C = VZMULJ(T2, T5); + Tn = VZMUL(T3, T5); + TT = VZMULJ(T3, T5); + Tc = VZMUL(T2, T3); + T4 = VZMULJ(T2, T3); + TH = VZMUL(T3, T7); + T21 = VZMULJ(T3, T7); + Tk = VZMUL(T2, T7); + TC = VZMULJ(T2, T7); + Tv = VZMULJ(T5, T7); + T12 = VZMULJ(Tz, T7); + T20 = VZMUL(T1C, T1Z); + T1D = VZMULJ(T1C, T7); + T1w = VZMULJ(Tn, T7); + T18 = VZMULJ(TT, T7); + T1t = VZMUL(Tc, T7); + T1O = VZMULJ(Tc, T7); + TK = VZMUL(Tc, T5); + TP = VZMULJ(Tc, T5); + T1c = VZMUL(T4, T7); + T1m = VZMULJ(T4, T7); + Tf = VZMULJ(T4, T5); + T6 = VZMUL(T4, T5); + T23 = VZMUL(T21, T22); + Te = VZMUL(Tc, Td); + } + TL = VZMULJ(TK, T7); + TQ = VZMULJ(TP, T7); + To = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Th = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + Tg = VZMULJ(Tf, T7); + T8 = VZMULJ(T6, T7); + T2S = VADD(T20, T23); + T24 = VSUB(T20, T23); + Tl = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + TI = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + Tp = VZMUL(Tn, To); + TU = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Ti = VZMUL(Tg, Th); + Ta = VZMUL(T8, T9); + TM = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TR = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + Tm = VZMUL(Tk, Tl); + TJ = VZMUL(TH, TI); + } + { + V Tu, TE, Tw, TA; + { + V T3h, TO, T3i, TW; + { + V TV, T2U, Tj, T2R, TN, TS, T2V, Tq, Tt, TD; + Tt = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + TV = VZMUL(TT, TU); + T2U = VADD(Te, Ti); + Tj = VSUB(Te, Ti); + T2R = VADD(T1, Ta); + Tb = VSUB(T1, Ta); + TN = VZMUL(TL, TM); + TS = VZMUL(TQ, TR); + T2V = VADD(Tm, Tp); + Tq = VSUB(Tm, Tp); + Tu = VZMUL(T4, Tt); + TD = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + T3x = VSUB(T2R, T2S); + T2T = VADD(T2R, T2S); + T3h = VADD(TJ, TN); + TO = VSUB(TJ, TN); + T3i = VADD(TV, TS); + TW = VSUB(TS, TV); + T3K = VSUB(T2U, T2V); + T2W = VADD(T2U, T2V); + T25 = VSUB(Tj, Tq); + Tr = VADD(Tj, Tq); + TE = VZMUL(TC, TD); + } + Tw = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + T3z = VSUB(T3h, T3i); + T3j = VADD(T3h, T3i); + T28 = VFMA(LDK(KP414213562), TO, TW); + TX = VFNMS(LDK(KP414213562), TW, TO); + TA = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + } + { + V T35, T1z, T1T, T36, T39, T1L, T1B, T1F; + { + V T1v, T1y, Ty, T3e, T1S, T1Q, T1I, T3f, TF, T1K, T1A, T1E; + { + V T1u, T1x, Tx, T1R; + T1u = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T1x = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + Tx = VZMUL(Tv, Tw); + T1R = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + { + V T1P, T1H, T1J, TB; + T1P = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T1H = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + T1J = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + TB = VZMUL(Tz, TA); + T1v = VZMUL(T1t, T1u); + T1y = VZMUL(T1w, T1x); + Ty = VSUB(Tu, Tx); + T3e = VADD(Tu, Tx); + T1S = VZMUL(Tf, T1R); + T1Q = VZMUL(T1O, T1P); + T1I = VZMUL(T7, T1H); + T3f = VADD(TB, TE); + TF = VSUB(TB, TE); + T1K = VZMUL(T6, T1J); + T1A = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1E = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + } + } + T35 = VADD(T1v, T1y); + T1z = VSUB(T1v, T1y); + T1T = VSUB(T1Q, T1S); + T36 = VADD(T1S, T1Q); + T3y = VSUB(T3e, T3f); + T3g = VADD(T3e, T3f); + T27 = VFMA(LDK(KP414213562), Ty, TF); + TG = VFNMS(LDK(KP414213562), TF, Ty); + T39 = VADD(T1I, T1K); + T1L = VSUB(T1I, T1K); + T1B = VZMUL(T3, T1A); + T1F = VZMUL(T1D, T1E); + } + { + V T11, T14, T1o, T1l, T1e, T1U, T1M, T1g, T16, T19; + { + V T10, T13, T1n, T1k; + T10 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T13 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T1n = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + T1k = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + { + V T1d, T1f, T1G, T38; + T1d = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + T1f = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T1G = VSUB(T1B, T1F); + T38 = VADD(T1B, T1F); + T37 = VADD(T35, T36); + T3F = VSUB(T35, T36); + T11 = VZMUL(T2, T10); + T14 = VZMUL(T12, T13); + T1o = VZMUL(T1m, T1n); + T1l = VZMUL(T5, T1k); + T1e = VZMUL(T1c, T1d); + T3G = VSUB(T39, T38); + T3a = VADD(T38, T39); + T1U = VSUB(T1L, T1G); + T1M = VADD(T1G, T1L); + T1g = VZMUL(TK, T1f); + } + T16 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T19 = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + } + T2Y = VADD(T11, T14); + T15 = VSUB(T11, T14); + T1p = VSUB(T1l, T1o); + T2Z = VADD(T1l, T1o); + T2w = VFNMS(LDK(KP707106781), T1U, T1T); + T1V = VFMA(LDK(KP707106781), T1U, T1T); + T2v = VFNMS(LDK(KP707106781), T1M, T1z); + T1N = VFMA(LDK(KP707106781), T1M, T1z); + T32 = VADD(T1e, T1g); + T1h = VSUB(T1e, T1g); + T17 = VZMUL(TP, T16); + T1a = VZMUL(T18, T19); + } + } + } + } + { + V T2X, T3k, T3b, T3t, T1b, T31, T30, T3C, T3r, T3v, T3p, T3q; + T2X = VSUB(T2T, T2W); + T3p = VADD(T2T, T2W); + T3q = VADD(T3g, T3j); + T3k = VSUB(T3g, T3j); + T3b = VSUB(T37, T3a); + T3t = VADD(T37, T3a); + T1b = VSUB(T17, T1a); + T31 = VADD(T17, T1a); + T30 = VADD(T2Y, T2Z); + T3C = VSUB(T2Y, T2Z); + T3r = VSUB(T3p, T3q); + T3v = VADD(T3p, T3q); + { + V T1r, T2t, T1j, T2s, T3S, T3Y, T3R, T3V; + { + V T3B, T3T, T3M, T3W, T3U, T3P, T3X, T3I, T3l, T3c, T3w, T3u; + { + V T3L, T3A, T33, T3D, T1i, T1q, T3O, T3H; + T3L = VSUB(T3y, T3z); + T3A = VADD(T3y, T3z); + T33 = VADD(T31, T32); + T3D = VSUB(T31, T32); + T1i = VADD(T1b, T1h); + T1q = VSUB(T1b, T1h); + T3O = VFMA(LDK(KP414213562), T3F, T3G); + T3H = VFNMS(LDK(KP414213562), T3G, T3F); + T3B = VFMA(LDK(KP707106781), T3A, T3x); + T3T = VFNMS(LDK(KP707106781), T3A, T3x); + T3M = VFMA(LDK(KP707106781), T3L, T3K); + T3W = VFNMS(LDK(KP707106781), T3L, T3K); + { + V T3E, T3N, T3s, T34; + T3E = VFNMS(LDK(KP414213562), T3D, T3C); + T3N = VFMA(LDK(KP414213562), T3C, T3D); + T3s = VADD(T30, T33); + T34 = VSUB(T30, T33); + T1r = VFMA(LDK(KP707106781), T1q, T1p); + T2t = VFNMS(LDK(KP707106781), T1q, T1p); + T1j = VFMA(LDK(KP707106781), T1i, T15); + T2s = VFNMS(LDK(KP707106781), T1i, T15); + T3U = VADD(T3N, T3O); + T3P = VSUB(T3N, T3O); + T3X = VSUB(T3E, T3H); + T3I = VADD(T3E, T3H); + T3l = VSUB(T34, T3b); + T3c = VADD(T34, T3b); + T3w = VADD(T3s, T3t); + T3u = VSUB(T3s, T3t); + } + } + { + V T40, T3Z, T3Q, T3J; + T3S = VFMA(LDK(KP923879532), T3P, T3M); + T3Q = VFNMS(LDK(KP923879532), T3P, T3M); + T40 = VFNMS(LDK(KP923879532), T3X, T3W); + T3Y = VFMA(LDK(KP923879532), T3X, T3W); + T3R = VFMA(LDK(KP923879532), T3I, T3B); + T3J = VFNMS(LDK(KP923879532), T3I, T3B); + { + V T3o, T3m, T3n, T3d; + T3o = VFMA(LDK(KP707106781), T3l, T3k); + T3m = VFNMS(LDK(KP707106781), T3l, T3k); + T3n = VFMA(LDK(KP707106781), T3c, T2X); + T3d = VFNMS(LDK(KP707106781), T3c, T2X); + ST(&(x[WS(rs, 16)]), VSUB(T3v, T3w), ms, &(x[0])); + ST(&(x[0]), VADD(T3v, T3w), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFMAI(T3u, T3r), ms, &(x[0])); + ST(&(x[WS(rs, 24)]), VFNMSI(T3u, T3r), ms, &(x[0])); + T3Z = VFMA(LDK(KP923879532), T3U, T3T); + T3V = VFNMS(LDK(KP923879532), T3U, T3T); + ST(&(x[WS(rs, 18)]), VFMAI(T3Q, T3J), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFNMSI(T3Q, T3J), ms, &(x[0])); + ST(&(x[WS(rs, 28)]), VFNMSI(T3o, T3n), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(T3o, T3n), ms, &(x[0])); + ST(&(x[WS(rs, 20)]), VFMAI(T3m, T3d), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFNMSI(T3m, T3d), ms, &(x[0])); + } + ST(&(x[WS(rs, 26)]), VFMAI(T40, T3Z), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(T40, T3Z), ms, &(x[0])); + } + } + { + V T2p, T1s, T1W, T2h, TZ, T2i, T2d, T26, T29, T2q; + { + V Ts, TY, T2b, T2c; + T2p = VFNMS(LDK(KP707106781), Tr, Tb); + Ts = VFMA(LDK(KP707106781), Tr, Tb); + TY = VADD(TG, TX); + T2B = VSUB(TG, TX); + T1s = VFNMS(LDK(KP198912367), T1r, T1j); + T2b = VFMA(LDK(KP198912367), T1j, T1r); + T2c = VFMA(LDK(KP198912367), T1N, T1V); + T1W = VFNMS(LDK(KP198912367), T1V, T1N); + ST(&(x[WS(rs, 2)]), VFMAI(T3S, T3R), ms, &(x[0])); + ST(&(x[WS(rs, 30)]), VFNMSI(T3S, T3R), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VFNMSI(T3Y, T3V), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFMAI(T3Y, T3V), ms, &(x[0])); + T2h = VFNMS(LDK(KP923879532), TY, Ts); + TZ = VFMA(LDK(KP923879532), TY, Ts); + T2i = VADD(T2b, T2c); + T2d = VSUB(T2b, T2c); + T2A = VFNMS(LDK(KP707106781), T25, T24); + T26 = VFMA(LDK(KP707106781), T25, T24); + T29 = VSUB(T27, T28); + T2q = VADD(T27, T28); + } + { + V T2J, T2r, T2K, T2y; + { + V T2u, T2D, T2j, T2n, T2l, T1X, T2k, T2a, T2E, T2x; + T2u = VFMA(LDK(KP668178637), T2t, T2s); + T2D = VFNMS(LDK(KP668178637), T2s, T2t); + T2j = VFNMS(LDK(KP980785280), T2i, T2h); + T2n = VFMA(LDK(KP980785280), T2i, T2h); + T2l = VSUB(T1s, T1W); + T1X = VADD(T1s, T1W); + T2k = VFNMS(LDK(KP923879532), T29, T26); + T2a = VFMA(LDK(KP923879532), T29, T26); + T2J = VFNMS(LDK(KP923879532), T2q, T2p); + T2r = VFMA(LDK(KP923879532), T2q, T2p); + T2E = VFNMS(LDK(KP668178637), T2v, T2w); + T2x = VFMA(LDK(KP668178637), T2w, T2v); + { + V T1Y, T2f, T2o, T2m, T2e, T2g; + T1Y = VFNMS(LDK(KP980785280), T1X, TZ); + T2f = VFMA(LDK(KP980785280), T1X, TZ); + T2o = VFNMS(LDK(KP980785280), T2l, T2k); + T2m = VFMA(LDK(KP980785280), T2l, T2k); + T2e = VFNMS(LDK(KP980785280), T2d, T2a); + T2g = VFMA(LDK(KP980785280), T2d, T2a); + T2F = VSUB(T2D, T2E); + T2K = VADD(T2D, T2E); + T2N = VSUB(T2u, T2x); + T2y = VADD(T2u, T2x); + ST(&(x[WS(rs, 23)]), VFNMSI(T2m, T2j), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFMAI(T2m, T2j), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 25)]), VFMAI(T2o, T2n), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(T2o, T2n), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFMAI(T2g, T2f), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 31)]), VFNMSI(T2g, T2f), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 17)]), VFMAI(T2e, T1Y), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFNMSI(T2e, T1Y), ms, &(x[WS(rs, 1)])); + } + } + T2H = VFMA(LDK(KP831469612), T2y, T2r); + T2z = VFNMS(LDK(KP831469612), T2y, T2r); + T2P = VFNMS(LDK(KP831469612), T2K, T2J); + T2L = VFMA(LDK(KP831469612), T2K, T2J); + } + } + } + } + } + } + T2C = VFNMS(LDK(KP923879532), T2B, T2A); + T2M = VFMA(LDK(KP923879532), T2B, T2A); + { + V T2Q, T2O, T2G, T2I; + T2Q = VFMA(LDK(KP831469612), T2N, T2M); + T2O = VFNMS(LDK(KP831469612), T2N, T2M); + T2G = VFNMS(LDK(KP831469612), T2F, T2C); + T2I = VFMA(LDK(KP831469612), T2F, T2C); + ST(&(x[WS(rs, 21)]), VFMAI(T2O, T2L), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFNMSI(T2O, T2L), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 27)]), VFNMSI(T2Q, T2P), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFMAI(T2Q, T2P), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 29)]), VFMAI(T2I, T2H), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(T2I, T2H), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFMAI(T2G, T2z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VFNMSI(T2G, T2z), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 27), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 32, XSIMD_STRING("t3bv_32"), twinstr, &GENUS, {146, 116, 98, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3bv_32) (planner *p) { + X(kdft_dit_register) (p, t3bv_32, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 32 -name t3bv_32 -include t3b.h -sign 1 */ + +/* + * This function contains 244 FP additions, 158 FP multiplications, + * (or, 228 additions, 142 multiplications, 16 fused multiply/add), + * 90 stack variables, 7 constants, and 64 memory accesses + */ +#include "t3b.h" + +static void t3bv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(32, rs)) { + V T2, T5, T3, T4, Tc, T1v, TH, Tz, Tn, T6, TS, Tf, TK, T7, T8; + V Tv, T1I, T25, Tg, Tk, T1N, T1Q, TC, T16, T12, T1w, TL, TP, TT, T1m; + V T1f; + T2 = LDW(&(W[0])); + T5 = LDW(&(W[TWVL * 4])); + T3 = LDW(&(W[TWVL * 2])); + T4 = VZMULJ(T2, T3); + Tc = VZMUL(T2, T3); + T1v = VZMULJ(T2, T5); + TH = VZMULJ(T3, T5); + Tz = VZMUL(T2, T5); + Tn = VZMUL(T3, T5); + T6 = VZMUL(T4, T5); + TS = VZMUL(Tc, T5); + Tf = VZMULJ(T4, T5); + TK = VZMULJ(Tc, T5); + T7 = LDW(&(W[TWVL * 6])); + T8 = VZMULJ(T6, T7); + Tv = VZMULJ(T5, T7); + T1I = VZMULJ(Tc, T7); + T25 = VZMULJ(T3, T7); + Tg = VZMULJ(Tf, T7); + Tk = VZMUL(T2, T7); + T1N = VZMUL(Tc, T7); + T1Q = VZMULJ(Tn, T7); + TC = VZMULJ(T2, T7); + T16 = VZMUL(T4, T7); + T12 = VZMULJ(TH, T7); + T1w = VZMULJ(T1v, T7); + TL = VZMULJ(TK, T7); + TP = VZMUL(T3, T7); + TT = VZMULJ(TS, T7); + T1m = VZMULJ(Tz, T7); + T1f = VZMULJ(T4, T7); + { + V Tb, T28, T3k, T3M, Tr, T22, T3f, T3N, TX, T20, T3b, T3J, TG, T1Z, T38; + V T3I, T1M, T2v, T33, T3F, T1V, T2w, T30, T3E, T1j, T2s, T2W, T3C, T1r, T2t; + V T2T, T3B; + { + V T1, T27, Ta, T24, T26, T9, T23, T3i, T3j; + T1 = LD(&(x[0]), ms, &(x[0])); + T26 = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T27 = VZMUL(T25, T26); + T9 = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + Ta = VZMUL(T8, T9); + T23 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T24 = VZMUL(T1v, T23); + Tb = VSUB(T1, Ta); + T28 = VSUB(T24, T27); + T3i = VADD(T1, Ta); + T3j = VADD(T24, T27); + T3k = VSUB(T3i, T3j); + T3M = VADD(T3i, T3j); + } + { + V Te, Tp, Ti, Tm; + { + V Td, To, Th, Tl; + Td = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Te = VZMUL(Tc, Td); + To = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Tp = VZMUL(Tn, To); + Th = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + Ti = VZMUL(Tg, Th); + Tl = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + Tm = VZMUL(Tk, Tl); + } + { + V Tj, Tq, T3d, T3e; + Tj = VSUB(Te, Ti); + Tq = VSUB(Tm, Tp); + Tr = VMUL(LDK(KP707106781), VADD(Tj, Tq)); + T22 = VMUL(LDK(KP707106781), VSUB(Tj, Tq)); + T3d = VADD(Te, Ti); + T3e = VADD(Tm, Tp); + T3f = VSUB(T3d, T3e); + T3N = VADD(T3d, T3e); + } + } + { + V TJ, TV, TN, TR; + { + V TI, TU, TM, TQ; + TI = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + TJ = VZMUL(TH, TI); + TU = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TV = VZMUL(TT, TU); + TM = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + TN = VZMUL(TL, TM); + TQ = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + TR = VZMUL(TP, TQ); + } + { + V TO, TW, T39, T3a; + TO = VSUB(TJ, TN); + TW = VSUB(TR, TV); + TX = VFNMS(LDK(KP382683432), TW, VMUL(LDK(KP923879532), TO)); + T20 = VFMA(LDK(KP923879532), TW, VMUL(LDK(KP382683432), TO)); + T39 = VADD(TR, TV); + T3a = VADD(TJ, TN); + T3b = VSUB(T39, T3a); + T3J = VADD(T39, T3a); + } + } + { + V Tu, TE, Tx, TB; + { + V Tt, TD, Tw, TA; + Tt = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tu = VZMUL(T4, Tt); + TD = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + TE = VZMUL(TC, TD); + Tw = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tx = VZMUL(Tv, Tw); + TA = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TB = VZMUL(Tz, TA); + } + { + V Ty, TF, T36, T37; + Ty = VSUB(Tu, Tx); + TF = VSUB(TB, TE); + TG = VFMA(LDK(KP382683432), Ty, VMUL(LDK(KP923879532), TF)); + T1Z = VFNMS(LDK(KP382683432), TF, VMUL(LDK(KP923879532), Ty)); + T36 = VADD(Tu, Tx); + T37 = VADD(TB, TE); + T38 = VSUB(T36, T37); + T3I = VADD(T36, T37); + } + } + { + V T1H, T1K, T1S, T1P, T1B, T1D, T1E, T1u, T1y, T1z; + { + V T1G, T1J, T1R, T1O; + T1G = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T1H = VZMUL(Tf, T1G); + T1J = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T1K = VZMUL(T1I, T1J); + T1R = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T1S = VZMUL(T1Q, T1R); + T1O = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T1P = VZMUL(T1N, T1O); + { + V T1A, T1C, T1t, T1x; + T1A = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + T1B = VZMUL(T7, T1A); + T1C = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T1D = VZMUL(T6, T1C); + T1E = VSUB(T1B, T1D); + T1t = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1u = VZMUL(T3, T1t); + T1x = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T1y = VZMUL(T1w, T1x); + T1z = VSUB(T1u, T1y); + } + } + { + V T1F, T1L, T31, T32; + T1F = VMUL(LDK(KP707106781), VSUB(T1z, T1E)); + T1L = VSUB(T1H, T1K); + T1M = VSUB(T1F, T1L); + T2v = VADD(T1L, T1F); + T31 = VADD(T1u, T1y); + T32 = VADD(T1B, T1D); + T33 = VSUB(T31, T32); + T3F = VADD(T31, T32); + } + { + V T1T, T1U, T2Y, T2Z; + T1T = VSUB(T1P, T1S); + T1U = VMUL(LDK(KP707106781), VADD(T1z, T1E)); + T1V = VSUB(T1T, T1U); + T2w = VADD(T1T, T1U); + T2Y = VADD(T1P, T1S); + T2Z = VADD(T1H, T1K); + T30 = VSUB(T2Y, T2Z); + T3E = VADD(T2Y, T2Z); + } + } + { + V T1e, T1h, T1o, T1l, T18, T1a, T1b, T11, T14, T15; + { + V T1d, T1g, T1n, T1k; + T1d = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T1e = VZMUL(T5, T1d); + T1g = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + T1h = VZMUL(T1f, T1g); + T1n = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T1o = VZMUL(T1m, T1n); + T1k = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T1l = VZMUL(T2, T1k); + { + V T17, T19, T10, T13; + T17 = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + T18 = VZMUL(T16, T17); + T19 = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T1a = VZMUL(TS, T19); + T1b = VSUB(T18, T1a); + T10 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T11 = VZMUL(TK, T10); + T13 = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + T14 = VZMUL(T12, T13); + T15 = VSUB(T11, T14); + } + } + { + V T1c, T1i, T2U, T2V; + T1c = VMUL(LDK(KP707106781), VSUB(T15, T1b)); + T1i = VSUB(T1e, T1h); + T1j = VSUB(T1c, T1i); + T2s = VADD(T1i, T1c); + T2U = VADD(T11, T14); + T2V = VADD(T18, T1a); + T2W = VSUB(T2U, T2V); + T3C = VADD(T2U, T2V); + } + { + V T1p, T1q, T2R, T2S; + T1p = VSUB(T1l, T1o); + T1q = VMUL(LDK(KP707106781), VADD(T15, T1b)); + T1r = VSUB(T1p, T1q); + T2t = VADD(T1p, T1q); + T2R = VADD(T1l, T1o); + T2S = VADD(T1e, T1h); + T2T = VSUB(T2R, T2S); + T3B = VADD(T2R, T2S); + } + } + { + V T3V, T3Z, T3Y, T40; + { + V T3T, T3U, T3W, T3X; + T3T = VADD(T3M, T3N); + T3U = VADD(T3I, T3J); + T3V = VSUB(T3T, T3U); + T3Z = VADD(T3T, T3U); + T3W = VADD(T3B, T3C); + T3X = VADD(T3E, T3F); + T3Y = VBYI(VSUB(T3W, T3X)); + T40 = VADD(T3W, T3X); + } + ST(&(x[WS(rs, 24)]), VSUB(T3V, T3Y), ms, &(x[0])); + ST(&(x[0]), VADD(T3Z, T40), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VADD(T3V, T3Y), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VSUB(T3Z, T40), ms, &(x[0])); + } + { + V T3K, T3O, T3H, T3P, T3D, T3G; + T3K = VSUB(T3I, T3J); + T3O = VSUB(T3M, T3N); + T3D = VSUB(T3B, T3C); + T3G = VSUB(T3E, T3F); + T3H = VMUL(LDK(KP707106781), VSUB(T3D, T3G)); + T3P = VMUL(LDK(KP707106781), VADD(T3D, T3G)); + { + V T3L, T3Q, T3R, T3S; + T3L = VBYI(VSUB(T3H, T3K)); + T3Q = VSUB(T3O, T3P); + ST(&(x[WS(rs, 12)]), VADD(T3L, T3Q), ms, &(x[0])); + ST(&(x[WS(rs, 20)]), VSUB(T3Q, T3L), ms, &(x[0])); + T3R = VBYI(VADD(T3K, T3H)); + T3S = VADD(T3O, T3P); + ST(&(x[WS(rs, 4)]), VADD(T3R, T3S), ms, &(x[0])); + ST(&(x[WS(rs, 28)]), VSUB(T3S, T3R), ms, &(x[0])); + } + } + { + V T3g, T3w, T3m, T3t, T35, T3u, T3p, T3x, T3c, T3l; + T3c = VMUL(LDK(KP707106781), VSUB(T38, T3b)); + T3g = VSUB(T3c, T3f); + T3w = VADD(T3f, T3c); + T3l = VMUL(LDK(KP707106781), VADD(T38, T3b)); + T3m = VSUB(T3k, T3l); + T3t = VADD(T3k, T3l); + { + V T2X, T34, T3n, T3o; + T2X = VFNMS(LDK(KP382683432), T2W, VMUL(LDK(KP923879532), T2T)); + T34 = VFMA(LDK(KP923879532), T30, VMUL(LDK(KP382683432), T33)); + T35 = VSUB(T2X, T34); + T3u = VADD(T2X, T34); + T3n = VFMA(LDK(KP382683432), T2T, VMUL(LDK(KP923879532), T2W)); + T3o = VFNMS(LDK(KP382683432), T30, VMUL(LDK(KP923879532), T33)); + T3p = VSUB(T3n, T3o); + T3x = VADD(T3n, T3o); + } + { + V T3h, T3q, T3z, T3A; + T3h = VBYI(VSUB(T35, T3g)); + T3q = VSUB(T3m, T3p); + ST(&(x[WS(rs, 10)]), VADD(T3h, T3q), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VSUB(T3q, T3h), ms, &(x[0])); + T3z = VSUB(T3t, T3u); + T3A = VBYI(VSUB(T3x, T3w)); + ST(&(x[WS(rs, 18)]), VSUB(T3z, T3A), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VADD(T3z, T3A), ms, &(x[0])); + } + { + V T3r, T3s, T3v, T3y; + T3r = VBYI(VADD(T3g, T35)); + T3s = VADD(T3m, T3p); + ST(&(x[WS(rs, 6)]), VADD(T3r, T3s), ms, &(x[0])); + ST(&(x[WS(rs, 26)]), VSUB(T3s, T3r), ms, &(x[0])); + T3v = VADD(T3t, T3u); + T3y = VBYI(VADD(T3w, T3x)); + ST(&(x[WS(rs, 30)]), VSUB(T3v, T3y), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T3v, T3y), ms, &(x[0])); + } + } + { + V TZ, T2k, T2d, T2l, T1X, T2h, T2a, T2i; + { + V Ts, TY, T2b, T2c; + Ts = VSUB(Tb, Tr); + TY = VSUB(TG, TX); + TZ = VSUB(Ts, TY); + T2k = VADD(Ts, TY); + T2b = VFNMS(LDK(KP555570233), T1j, VMUL(LDK(KP831469612), T1r)); + T2c = VFMA(LDK(KP555570233), T1M, VMUL(LDK(KP831469612), T1V)); + T2d = VSUB(T2b, T2c); + T2l = VADD(T2b, T2c); + } + { + V T1s, T1W, T21, T29; + T1s = VFMA(LDK(KP831469612), T1j, VMUL(LDK(KP555570233), T1r)); + T1W = VFNMS(LDK(KP555570233), T1V, VMUL(LDK(KP831469612), T1M)); + T1X = VSUB(T1s, T1W); + T2h = VADD(T1s, T1W); + T21 = VSUB(T1Z, T20); + T29 = VSUB(T22, T28); + T2a = VSUB(T21, T29); + T2i = VADD(T29, T21); + } + { + V T1Y, T2e, T2n, T2o; + T1Y = VADD(TZ, T1X); + T2e = VBYI(VADD(T2a, T2d)); + ST(&(x[WS(rs, 27)]), VSUB(T1Y, T2e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VADD(T1Y, T2e), ms, &(x[WS(rs, 1)])); + T2n = VBYI(VADD(T2i, T2h)); + T2o = VADD(T2k, T2l); + ST(&(x[WS(rs, 3)]), VADD(T2n, T2o), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 29)]), VSUB(T2o, T2n), ms, &(x[WS(rs, 1)])); + } + { + V T2f, T2g, T2j, T2m; + T2f = VSUB(TZ, T1X); + T2g = VBYI(VSUB(T2d, T2a)); + ST(&(x[WS(rs, 21)]), VSUB(T2f, T2g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VADD(T2f, T2g), ms, &(x[WS(rs, 1)])); + T2j = VBYI(VSUB(T2h, T2i)); + T2m = VSUB(T2k, T2l); + ST(&(x[WS(rs, 13)]), VADD(T2j, T2m), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VSUB(T2m, T2j), ms, &(x[WS(rs, 1)])); + } + } + { + V T2r, T2M, T2F, T2N, T2y, T2J, T2C, T2K; + { + V T2p, T2q, T2D, T2E; + T2p = VADD(Tb, Tr); + T2q = VADD(T1Z, T20); + T2r = VSUB(T2p, T2q); + T2M = VADD(T2p, T2q); + T2D = VFNMS(LDK(KP195090322), T2s, VMUL(LDK(KP980785280), T2t)); + T2E = VFMA(LDK(KP195090322), T2v, VMUL(LDK(KP980785280), T2w)); + T2F = VSUB(T2D, T2E); + T2N = VADD(T2D, T2E); + } + { + V T2u, T2x, T2A, T2B; + T2u = VFMA(LDK(KP980785280), T2s, VMUL(LDK(KP195090322), T2t)); + T2x = VFNMS(LDK(KP195090322), T2w, VMUL(LDK(KP980785280), T2v)); + T2y = VSUB(T2u, T2x); + T2J = VADD(T2u, T2x); + T2A = VADD(TG, TX); + T2B = VADD(T28, T22); + T2C = VSUB(T2A, T2B); + T2K = VADD(T2B, T2A); + } + { + V T2z, T2G, T2P, T2Q; + T2z = VADD(T2r, T2y); + T2G = VBYI(VADD(T2C, T2F)); + ST(&(x[WS(rs, 25)]), VSUB(T2z, T2G), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(T2z, T2G), ms, &(x[WS(rs, 1)])); + T2P = VBYI(VADD(T2K, T2J)); + T2Q = VADD(T2M, T2N); + ST(&(x[WS(rs, 1)]), VADD(T2P, T2Q), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 31)]), VSUB(T2Q, T2P), ms, &(x[WS(rs, 1)])); + } + { + V T2H, T2I, T2L, T2O; + T2H = VSUB(T2r, T2y); + T2I = VBYI(VSUB(T2F, T2C)); + ST(&(x[WS(rs, 23)]), VSUB(T2H, T2I), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VADD(T2H, T2I), ms, &(x[WS(rs, 1)])); + T2L = VBYI(VSUB(T2J, T2K)); + T2O = VSUB(T2M, T2N); + ST(&(x[WS(rs, 15)]), VADD(T2L, T2O), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 17)]), VSUB(T2O, T2L), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 27), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 32, XSIMD_STRING("t3bv_32"), twinstr, &GENUS, {228, 142, 16, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3bv_32) (planner *p) { + X(kdft_dit_register) (p, t3bv_32, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t3bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t3bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:47 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 4 -name t3bv_4 -include t3b.h -sign 1 */ + +/* + * This function contains 12 FP additions, 10 FP multiplications, + * (or, 10 additions, 8 multiplications, 2 fused multiply/add), + * 16 stack variables, 0 constants, and 8 memory accesses + */ +#include "t3b.h" + +static void t3bv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(4, rs)) { + V T2, T3, T1, Ta, T5, T8; + T2 = LDW(&(W[0])); + T3 = LDW(&(W[TWVL * 2])); + T1 = LD(&(x[0]), ms, &(x[0])); + Ta = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + { + V T4, Tb, T9, T6; + T4 = VZMULJ(T2, T3); + Tb = VZMUL(T3, Ta); + T9 = VZMUL(T2, T8); + T6 = VZMUL(T4, T5); + { + V Tc, Te, T7, Td; + Tc = VSUB(T9, Tb); + Te = VADD(T9, Tb); + T7 = VSUB(T1, T6); + Td = VADD(T1, T6); + ST(&(x[0]), VADD(Td, Te), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VSUB(Td, Te), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(Tc, T7), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(Tc, T7), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t3bv_4"), twinstr, &GENUS, {10, 8, 2, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3bv_4) (planner *p) { + X(kdft_dit_register) (p, t3bv_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 4 -name t3bv_4 -include t3b.h -sign 1 */ + +/* + * This function contains 12 FP additions, 8 FP multiplications, + * (or, 12 additions, 8 multiplications, 0 fused multiply/add), + * 16 stack variables, 0 constants, and 8 memory accesses + */ +#include "t3b.h" + +static void t3bv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(4, rs)) { + V T2, T3, T4; + T2 = LDW(&(W[0])); + T3 = LDW(&(W[TWVL * 2])); + T4 = VZMULJ(T2, T3); + { + V T1, Tb, T6, T9, Ta, T5, T8; + T1 = LD(&(x[0]), ms, &(x[0])); + Ta = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Tb = VZMUL(T3, Ta); + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T6 = VZMUL(T4, T5); + T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T9 = VZMUL(T2, T8); + { + V T7, Tc, Td, Te; + T7 = VSUB(T1, T6); + Tc = VBYI(VSUB(T9, Tb)); + ST(&(x[WS(rs, 3)]), VSUB(T7, Tc), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T7, Tc), ms, &(x[WS(rs, 1)])); + Td = VADD(T1, T6); + Te = VADD(T9, Tb); + ST(&(x[WS(rs, 2)]), VSUB(Td, Te), ms, &(x[0])); + ST(&(x[0]), VADD(Td, Te), ms, &(x[0])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t3bv_4"), twinstr, &GENUS, {12, 8, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3bv_4) (planner *p) { + X(kdft_dit_register) (p, t3bv_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t3bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t3bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:48 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 5 -name t3bv_5 -include t3b.h -sign 1 */ + +/* + * This function contains 22 FP additions, 23 FP multiplications, + * (or, 13 additions, 14 multiplications, 9 fused multiply/add), + * 30 stack variables, 4 constants, and 10 memory accesses + */ +#include "t3b.h" + +static void t3bv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(5, rs)) { + V T2, T5, T1, T3, Td, T7, Tb; + T2 = LDW(&(W[0])); + T5 = LDW(&(W[TWVL * 2])); + T1 = LD(&(x[0]), ms, &(x[0])); + T3 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Td = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T7 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tb = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V Ta, T6, T4, Te, Tc, T8; + Ta = VZMULJ(T2, T5); + T6 = VZMUL(T2, T5); + T4 = VZMUL(T2, T3); + Te = VZMUL(T5, Td); + Tc = VZMUL(Ta, Tb); + T8 = VZMUL(T6, T7); + { + V Tf, Tl, T9, Tk; + Tf = VADD(Tc, Te); + Tl = VSUB(Tc, Te); + T9 = VADD(T4, T8); + Tk = VSUB(T4, T8); + { + V Ti, Tg, To, Tm, Th, Tn, Tj; + Ti = VSUB(T9, Tf); + Tg = VADD(T9, Tf); + To = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tk, Tl)); + Tm = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tl, Tk)); + Th = VFNMS(LDK(KP250000000), Tg, T1); + ST(&(x[0]), VADD(T1, Tg), ms, &(x[0])); + Tn = VFNMS(LDK(KP559016994), Ti, Th); + Tj = VFMA(LDK(KP559016994), Ti, Th); + ST(&(x[WS(rs, 2)]), VFNMSI(To, Tn), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(To, Tn), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFNMSI(Tm, Tj), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFMAI(Tm, Tj), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("t3bv_5"), twinstr, &GENUS, {13, 14, 9, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3bv_5) (planner *p) { + X(kdft_dit_register) (p, t3bv_5, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 5 -name t3bv_5 -include t3b.h -sign 1 */ + +/* + * This function contains 22 FP additions, 18 FP multiplications, + * (or, 19 additions, 15 multiplications, 3 fused multiply/add), + * 24 stack variables, 4 constants, and 10 memory accesses + */ +#include "t3b.h" + +static void t3bv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(5, rs)) { + V T1, T4, T5, T9; + T1 = LDW(&(W[0])); + T4 = LDW(&(W[TWVL * 2])); + T5 = VZMUL(T1, T4); + T9 = VZMULJ(T1, T4); + { + V Tj, T8, Te, Tg, Th, Tk; + Tj = LD(&(x[0]), ms, &(x[0])); + { + V T3, Td, T7, Tb; + { + V T2, Tc, T6, Ta; + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = VZMUL(T1, T2); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Td = VZMUL(T4, Tc); + T6 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = VZMUL(T5, T6); + Ta = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tb = VZMUL(T9, Ta); + } + T8 = VSUB(T3, T7); + Te = VSUB(Tb, Td); + Tg = VADD(T3, T7); + Th = VADD(Tb, Td); + Tk = VADD(Tg, Th); + } + ST(&(x[0]), VADD(Tj, Tk), ms, &(x[0])); + { + V Tf, Tn, Tm, To, Ti, Tl; + Tf = VBYI(VFMA(LDK(KP951056516), T8, VMUL(LDK(KP587785252), Te))); + Tn = VBYI(VFNMS(LDK(KP951056516), Te, VMUL(LDK(KP587785252), T8))); + Ti = VMUL(LDK(KP559016994), VSUB(Tg, Th)); + Tl = VFNMS(LDK(KP250000000), Tk, Tj); + Tm = VADD(Ti, Tl); + To = VSUB(Tl, Ti); + ST(&(x[WS(rs, 1)]), VADD(Tf, Tm), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VSUB(To, Tn), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VSUB(Tm, Tf), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(Tn, To), ms, &(x[0])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("t3bv_5"), twinstr, &GENUS, {19, 15, 3, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3bv_5) (planner *p) { + X(kdft_dit_register) (p, t3bv_5, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t3bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t3bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:47 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 8 -name t3bv_8 -include t3b.h -sign 1 */ + +/* + * This function contains 37 FP additions, 32 FP multiplications, + * (or, 27 additions, 22 multiplications, 10 fused multiply/add), + * 43 stack variables, 1 constants, and 16 memory accesses + */ +#include "t3b.h" + +static void t3bv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(8, rs)) { + V T2, T3, Tb, T1, T5, Tn, Tq, T8, Td, T4, Ta, Tp, Tg, Ti, T9; + T2 = LDW(&(W[0])); + T3 = LDW(&(W[TWVL * 2])); + Tb = LDW(&(W[TWVL * 4])); + T1 = LD(&(x[0]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tn = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tq = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Td = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T4 = VZMUL(T2, T3); + Ta = VZMULJ(T2, T3); + Tp = VZMULJ(T2, Tb); + Tg = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Ti = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = VZMUL(T2, T8); + { + V T6, To, Tc, Tr, Th, Tj; + T6 = VZMUL(T4, T5); + To = VZMUL(Ta, Tn); + Tc = VZMULJ(Ta, Tb); + Tr = VZMUL(Tp, Tq); + Th = VZMUL(Tb, Tg); + Tj = VZMUL(T3, Ti); + { + V Tx, T7, Te, Ts, Ty, Tk, TB; + Tx = VADD(T1, T6); + T7 = VSUB(T1, T6); + Te = VZMUL(Tc, Td); + Ts = VSUB(To, Tr); + Ty = VADD(To, Tr); + Tk = VSUB(Th, Tj); + TB = VADD(Th, Tj); + { + V Tf, TA, Tz, TD; + Tf = VSUB(T9, Te); + TA = VADD(T9, Te); + Tz = VSUB(Tx, Ty); + TD = VADD(Tx, Ty); + { + V TC, TE, Tl, Tt; + TC = VSUB(TA, TB); + TE = VADD(TA, TB); + Tl = VADD(Tf, Tk); + Tt = VSUB(Tf, Tk); + { + V Tu, Tw, Tm, Tv; + ST(&(x[0]), VADD(TD, TE), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VSUB(TD, TE), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(TC, Tz), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(TC, Tz), ms, &(x[0])); + Tu = VFNMS(LDK(KP707106781), Tt, Ts); + Tw = VFMA(LDK(KP707106781), Tt, Ts); + Tm = VFNMS(LDK(KP707106781), Tl, T7); + Tv = VFMA(LDK(KP707106781), Tl, T7); + ST(&(x[WS(rs, 1)]), VFMAI(Tw, Tv), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFNMSI(Tw, Tv), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFMAI(Tu, Tm), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(Tu, Tm), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t3bv_8"), twinstr, &GENUS, {27, 22, 10, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3bv_8) (planner *p) { + X(kdft_dit_register) (p, t3bv_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 8 -name t3bv_8 -include t3b.h -sign 1 */ + +/* + * This function contains 37 FP additions, 24 FP multiplications, + * (or, 37 additions, 24 multiplications, 0 fused multiply/add), + * 31 stack variables, 1 constants, and 16 memory accesses + */ +#include "t3b.h" + +static void t3bv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ii; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(8, rs)) { + V T1, T4, T5, Tp, T6, T7, Tj; + T1 = LDW(&(W[0])); + T4 = LDW(&(W[TWVL * 2])); + T5 = VZMULJ(T1, T4); + Tp = VZMUL(T1, T4); + T6 = LDW(&(W[TWVL * 4])); + T7 = VZMULJ(T5, T6); + Tj = VZMULJ(T1, T6); + { + V Ts, Tx, Tm, Ty, Ta, TA, Tf, TB, To, Tr, Tq; + To = LD(&(x[0]), ms, &(x[0])); + Tq = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tr = VZMUL(Tp, Tq); + Ts = VSUB(To, Tr); + Tx = VADD(To, Tr); + { + V Ti, Tl, Th, Tk; + Th = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Ti = VZMUL(T5, Th); + Tk = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tl = VZMUL(Tj, Tk); + Tm = VSUB(Ti, Tl); + Ty = VADD(Ti, Tl); + } + { + V T3, T9, T2, T8; + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = VZMUL(T1, T2); + T8 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T9 = VZMUL(T7, T8); + Ta = VSUB(T3, T9); + TA = VADD(T3, T9); + } + { + V Tc, Te, Tb, Td; + Tb = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Tc = VZMUL(T6, Tb); + Td = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Te = VZMUL(T4, Td); + Tf = VSUB(Tc, Te); + TB = VADD(Tc, Te); + } + { + V Tz, TC, TD, TE; + Tz = VSUB(Tx, Ty); + TC = VBYI(VSUB(TA, TB)); + ST(&(x[WS(rs, 6)]), VSUB(Tz, TC), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(Tz, TC), ms, &(x[0])); + TD = VADD(Tx, Ty); + TE = VADD(TA, TB); + ST(&(x[WS(rs, 4)]), VSUB(TD, TE), ms, &(x[0])); + ST(&(x[0]), VADD(TD, TE), ms, &(x[0])); + { + V Tn, Tv, Tu, Tw, Tg, Tt; + Tg = VMUL(LDK(KP707106781), VSUB(Ta, Tf)); + Tn = VBYI(VSUB(Tg, Tm)); + Tv = VBYI(VADD(Tm, Tg)); + Tt = VMUL(LDK(KP707106781), VADD(Ta, Tf)); + Tu = VSUB(Ts, Tt); + Tw = VADD(Ts, Tt); + ST(&(x[WS(rs, 3)]), VADD(Tn, Tu), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VSUB(Tw, Tv), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VSUB(Tu, Tn), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(Tv, Tw), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t3bv_8"), twinstr, &GENUS, {37, 24, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3bv_8) (planner *p) { + X(kdft_dit_register) (p, t3bv_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t3fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t3fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:26 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 10 -name t3fv_10 -include t3f.h */ + +/* + * This function contains 57 FP additions, 52 FP multiplications, + * (or, 39 additions, 34 multiplications, 18 fused multiply/add), + * 57 stack variables, 4 constants, and 20 memory accesses + */ +#include "t3f.h" + +static void t3fv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(10, rs)) { + V T1, T7, Th, Tx, Tr, Td, Tp, T6, Tv, Tc, Te, Ti, Tl, T2, T3; + V T5; + T2 = LDW(&(W[0])); + T3 = LDW(&(W[TWVL * 2])); + T5 = LDW(&(W[TWVL * 4])); + T1 = LD(&(x[0]), ms, &(x[0])); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V To, Tw, Tq, Tu, Ta, T4, Tt, Tk, Tb; + To = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tw = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Tq = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tu = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Ta = VZMULJ(T2, T3); + T4 = VZMUL(T2, T3); + Th = VZMULJ(T2, T5); + Tt = VZMULJ(T3, T5); + Tb = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tx = VZMULJ(T2, Tw); + Tr = VZMULJ(T5, Tq); + Tk = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Td = VZMULJ(Ta, T5); + Tp = VZMULJ(T4, To); + T6 = VZMULJ(T4, T5); + Tv = VZMULJ(Tt, Tu); + Tc = VZMULJ(Ta, Tb); + Te = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Ti = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Tl = VZMULJ(T3, Tk); + } + { + V TN, Ts, T8, Ty, TO, Tf, Tj; + TN = VADD(Tp, Tr); + Ts = VSUB(Tp, Tr); + T8 = VZMULJ(T6, T7); + Ty = VSUB(Tv, Tx); + TO = VADD(Tv, Tx); + Tf = VZMULJ(Td, Te); + Tj = VZMULJ(Th, Ti); + { + V T9, TJ, TP, TU, Tz, TF, Tg, TK, Tm, TL; + T9 = VSUB(T1, T8); + TJ = VADD(T1, T8); + TP = VADD(TN, TO); + TU = VSUB(TN, TO); + Tz = VADD(Ts, Ty); + TF = VSUB(Ts, Ty); + Tg = VSUB(Tc, Tf); + TK = VADD(Tc, Tf); + Tm = VSUB(Tj, Tl); + TL = VADD(Tj, Tl); + { + V TM, TV, Tn, TE; + TM = VADD(TK, TL); + TV = VSUB(TK, TL); + Tn = VADD(Tg, Tm); + TE = VSUB(Tg, Tm); + { + V TW, TY, TS, TQ, TG, TI, TC, TA, TR, TB; + TW = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TV, TU)); + TY = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TU, TV)); + TS = VSUB(TM, TP); + TQ = VADD(TM, TP); + TG = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TF, TE)); + TI = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TE, TF)); + TC = VSUB(Tn, Tz); + TA = VADD(Tn, Tz); + ST(&(x[0]), VADD(TJ, TQ), ms, &(x[0])); + TR = VFNMS(LDK(KP250000000), TQ, TJ); + ST(&(x[WS(rs, 5)]), VADD(T9, TA), ms, &(x[WS(rs, 1)])); + TB = VFNMS(LDK(KP250000000), TA, T9); + { + V TX, TT, TH, TD; + TX = VFMA(LDK(KP559016994), TS, TR); + TT = VFNMS(LDK(KP559016994), TS, TR); + TH = VFNMS(LDK(KP559016994), TC, TB); + TD = VFMA(LDK(KP559016994), TC, TB); + ST(&(x[WS(rs, 8)]), VFNMSI(TW, TT), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(TW, TT), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(TY, TX), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(TY, TX), ms, &(x[0])); + ST(&(x[WS(rs, 9)]), VFMAI(TG, TD), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(TG, TD), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(TI, TH), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFNMSI(TI, TH), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 10, XSIMD_STRING("t3fv_10"), twinstr, &GENUS, {39, 34, 18, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3fv_10) (planner *p) { + X(kdft_dit_register) (p, t3fv_10, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 10 -name t3fv_10 -include t3f.h */ + +/* + * This function contains 57 FP additions, 42 FP multiplications, + * (or, 51 additions, 36 multiplications, 6 fused multiply/add), + * 41 stack variables, 4 constants, and 20 memory accesses + */ +#include "t3f.h" + +static void t3fv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(10, rs)) { + V T1, T2, T3, Ti, T6, T7, Tx, Tb, To; + T1 = LDW(&(W[0])); + T2 = LDW(&(W[TWVL * 2])); + T3 = VZMULJ(T1, T2); + Ti = VZMUL(T1, T2); + T6 = LDW(&(W[TWVL * 4])); + T7 = VZMULJ(T3, T6); + Tx = VZMULJ(Ti, T6); + Tb = VZMULJ(T1, T6); + To = VZMULJ(T2, T6); + { + V TA, TQ, Tn, Tt, Tu, TJ, TK, TS, Ta, Tg, Th, TM, TN, TR, Tw; + V Tz, Ty; + Tw = LD(&(x[0]), ms, &(x[0])); + Ty = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tz = VZMULJ(Tx, Ty); + TA = VSUB(Tw, Tz); + TQ = VADD(Tw, Tz); + { + V Tk, Ts, Tm, Tq; + { + V Tj, Tr, Tl, Tp; + Tj = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tk = VZMULJ(Ti, Tj); + Tr = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Ts = VZMULJ(T1, Tr); + Tl = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tm = VZMULJ(T6, Tl); + Tp = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tq = VZMULJ(To, Tp); + } + Tn = VSUB(Tk, Tm); + Tt = VSUB(Tq, Ts); + Tu = VADD(Tn, Tt); + TJ = VADD(Tk, Tm); + TK = VADD(Tq, Ts); + TS = VADD(TJ, TK); + } + { + V T5, Tf, T9, Td; + { + V T4, Te, T8, Tc; + T4 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T5 = VZMULJ(T3, T4); + Te = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Tf = VZMULJ(T2, Te); + T8 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T9 = VZMULJ(T7, T8); + Tc = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Td = VZMULJ(Tb, Tc); + } + Ta = VSUB(T5, T9); + Tg = VSUB(Td, Tf); + Th = VADD(Ta, Tg); + TM = VADD(T5, T9); + TN = VADD(Td, Tf); + TR = VADD(TM, TN); + } + { + V Tv, TB, TC, TG, TI, TE, TF, TH, TD; + Tv = VMUL(LDK(KP559016994), VSUB(Th, Tu)); + TB = VADD(Th, Tu); + TC = VFNMS(LDK(KP250000000), TB, TA); + TE = VSUB(Ta, Tg); + TF = VSUB(Tn, Tt); + TG = VBYI(VFMA(LDK(KP951056516), TE, VMUL(LDK(KP587785252), TF))); + TI = VBYI(VFNMS(LDK(KP587785252), TE, VMUL(LDK(KP951056516), TF))); + ST(&(x[WS(rs, 5)]), VADD(TA, TB), ms, &(x[WS(rs, 1)])); + TH = VSUB(TC, Tv); + ST(&(x[WS(rs, 3)]), VSUB(TH, TI), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(TI, TH), ms, &(x[WS(rs, 1)])); + TD = VADD(Tv, TC); + ST(&(x[WS(rs, 1)]), VSUB(TD, TG), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VADD(TG, TD), ms, &(x[WS(rs, 1)])); + } + { + V TV, TT, TU, TP, TX, TL, TO, TY, TW; + TV = VMUL(LDK(KP559016994), VSUB(TR, TS)); + TT = VADD(TR, TS); + TU = VFNMS(LDK(KP250000000), TT, TQ); + TL = VSUB(TJ, TK); + TO = VSUB(TM, TN); + TP = VBYI(VFNMS(LDK(KP587785252), TO, VMUL(LDK(KP951056516), TL))); + TX = VBYI(VFMA(LDK(KP951056516), TO, VMUL(LDK(KP587785252), TL))); + ST(&(x[0]), VADD(TQ, TT), ms, &(x[0])); + TY = VADD(TV, TU); + ST(&(x[WS(rs, 4)]), VADD(TX, TY), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VSUB(TY, TX), ms, &(x[0])); + TW = VSUB(TU, TV); + ST(&(x[WS(rs, 2)]), VADD(TP, TW), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VSUB(TW, TP), ms, &(x[0])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 10, XSIMD_STRING("t3fv_10"), twinstr, &GENUS, {51, 36, 6, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3fv_10) (planner *p) { + X(kdft_dit_register) (p, t3fv_10, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t3fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t3fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:25 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 16 -name t3fv_16 -include t3f.h */ + +/* + * This function contains 98 FP additions, 86 FP multiplications, + * (or, 64 additions, 52 multiplications, 34 fused multiply/add), + * 70 stack variables, 3 constants, and 32 memory accesses + */ +#include "t3f.h" + +static void t3fv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(16, rs)) { + V T13, Tg, TY, T14, T1A, T1q, T1f, T1x, T1r, T1i, Tt, T16, TB, T1j, T1k; + V TH; + { + V T2, T8, Tu, T3; + T2 = LDW(&(W[0])); + T8 = LDW(&(W[TWVL * 2])); + Tu = LDW(&(W[TWVL * 6])); + T3 = LDW(&(W[TWVL * 4])); + { + V Ty, T1o, Tf, T1b, T7, Tr, TR, TX, T1g, Tl, To, Tw, TG, Tz, T1p; + V T1e, TC; + { + V T1, T5, Ta, Td; + T1 = LD(&(x[0]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Td = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + { + V Tx, TO, TE, Tb, Tm, Tp, TN, Te, T6, TW, TP, TS; + { + V TM, T9, TL, Tc, TU, T4, TV; + TM = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + Tx = VZMULJ(T2, T8); + T9 = VZMUL(T2, T8); + TL = VZMULJ(T2, Tu); + TO = VZMULJ(T8, T3); + Tc = VZMUL(T8, T3); + TU = VZMUL(T2, T3); + T4 = VZMULJ(T2, T3); + TV = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TE = VZMUL(Tx, T3); + Ty = VZMULJ(Tx, T3); + Tb = VZMULJ(T9, Ta); + Tm = VZMULJ(T9, T3); + Tp = VZMUL(T9, T3); + TN = VZMULJ(TL, TM); + Te = VZMULJ(Tc, Td); + T6 = VZMULJ(T4, T5); + TW = VZMULJ(TU, TV); + } + TP = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + TS = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V TQ, TT, Ti, Tk, Tn, Th, Tq, Tj; + Th = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Tq = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + Tj = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T1o = VSUB(Tb, Te); + Tf = VADD(Tb, Te); + T1b = VSUB(T1, T6); + T7 = VADD(T1, T6); + TQ = VZMULJ(TO, TP); + TT = VZMULJ(Tx, TS); + Ti = VZMULJ(T2, Th); + Tr = VZMULJ(Tp, Tq); + Tk = VZMULJ(T3, Tj); + Tn = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V T1d, T1c, Tv, TF; + Tv = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + TF = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T1d = VSUB(TN, TQ); + TR = VADD(TN, TQ); + T1c = VSUB(TT, TW); + TX = VADD(TT, TW); + T1g = VSUB(Ti, Tk); + Tl = VADD(Ti, Tk); + To = VZMULJ(Tm, Tn); + Tw = VZMULJ(Tu, Tv); + TG = VZMULJ(TE, TF); + Tz = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T1p = VSUB(T1d, T1c); + T1e = VADD(T1c, T1d); + TC = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + } + } + } + } + { + V T1h, Ts, TA, TD; + T13 = VADD(T7, Tf); + Tg = VSUB(T7, Tf); + T1h = VSUB(To, Tr); + Ts = VADD(To, Tr); + TY = VSUB(TR, TX); + T14 = VADD(TX, TR); + TA = VZMULJ(Ty, Tz); + T1A = VFMA(LDK(KP707106781), T1p, T1o); + T1q = VFNMS(LDK(KP707106781), T1p, T1o); + T1f = VFMA(LDK(KP707106781), T1e, T1b); + T1x = VFNMS(LDK(KP707106781), T1e, T1b); + TD = VZMULJ(T8, TC); + T1r = VFMA(LDK(KP414213562), T1g, T1h); + T1i = VFNMS(LDK(KP414213562), T1h, T1g); + Tt = VSUB(Tl, Ts); + T16 = VADD(Tl, Ts); + TB = VADD(Tw, TA); + T1j = VSUB(Tw, TA); + T1k = VSUB(TG, TD); + TH = VADD(TD, TG); + } + } + } + { + V T15, T19, T1l, T1s, TI, T17; + T15 = VADD(T13, T14); + T19 = VSUB(T13, T14); + T1l = VFNMS(LDK(KP414213562), T1k, T1j); + T1s = VFMA(LDK(KP414213562), T1j, T1k); + TI = VSUB(TB, TH); + T17 = VADD(TB, TH); + { + V T1y, T1t, T1B, T1m; + T1y = VADD(T1r, T1s); + T1t = VSUB(T1r, T1s); + T1B = VSUB(T1l, T1i); + T1m = VADD(T1i, T1l); + { + V T18, T1a, TJ, TZ; + T18 = VADD(T16, T17); + T1a = VSUB(T17, T16); + TJ = VADD(Tt, TI); + TZ = VSUB(TI, Tt); + { + V T1u, T1w, T1z, T1D; + T1u = VFNMS(LDK(KP923879532), T1t, T1q); + T1w = VFMA(LDK(KP923879532), T1t, T1q); + T1z = VFNMS(LDK(KP923879532), T1y, T1x); + T1D = VFMA(LDK(KP923879532), T1y, T1x); + { + V T1n, T1v, T1C, T1E; + T1n = VFNMS(LDK(KP923879532), T1m, T1f); + T1v = VFMA(LDK(KP923879532), T1m, T1f); + T1C = VFNMS(LDK(KP923879532), T1B, T1A); + T1E = VFMA(LDK(KP923879532), T1B, T1A); + ST(&(x[WS(rs, 12)]), VFNMSI(T1a, T19), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(T1a, T19), ms, &(x[0])); + ST(&(x[0]), VADD(T15, T18), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VSUB(T15, T18), ms, &(x[0])); + { + V T10, T12, TK, T11; + T10 = VFNMS(LDK(KP707106781), TZ, TY); + T12 = VFMA(LDK(KP707106781), TZ, TY); + TK = VFNMS(LDK(KP707106781), TJ, Tg); + T11 = VFMA(LDK(KP707106781), TJ, Tg); + ST(&(x[WS(rs, 1)]), VFNMSI(T1w, T1v), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFMAI(T1w, T1v), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(T1u, T1n), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFNMSI(T1u, T1n), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFMAI(T1E, T1D), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFNMSI(T1E, T1D), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFMAI(T1C, T1z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFNMSI(T1C, T1z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 14)]), VFNMSI(T12, T11), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(T12, T11), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFMAI(T10, TK), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(T10, TK), ms, &(x[0])); + } + } + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 15), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 16, XSIMD_STRING("t3fv_16"), twinstr, &GENUS, {64, 52, 34, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3fv_16) (planner *p) { + X(kdft_dit_register) (p, t3fv_16, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 16 -name t3fv_16 -include t3f.h */ + +/* + * This function contains 98 FP additions, 64 FP multiplications, + * (or, 94 additions, 60 multiplications, 4 fused multiply/add), + * 51 stack variables, 3 constants, and 32 memory accesses + */ +#include "t3f.h" + +static void t3fv_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(16, rs)) { + V T4, T5, T6, To, T1, Ty, T7, T8, TO, TV, Te, Tp, TB, TH, Ts; + T4 = LDW(&(W[0])); + T5 = LDW(&(W[TWVL * 2])); + T6 = VZMULJ(T4, T5); + To = VZMUL(T4, T5); + T1 = LDW(&(W[TWVL * 6])); + Ty = VZMULJ(T4, T1); + T7 = LDW(&(W[TWVL * 4])); + T8 = VZMULJ(T6, T7); + TO = VZMUL(T5, T7); + TV = VZMULJ(T4, T7); + Te = VZMUL(T6, T7); + Tp = VZMULJ(To, T7); + TB = VZMULJ(T5, T7); + TH = VZMUL(T4, T7); + Ts = VZMUL(To, T7); + { + V TY, T1f, TR, T1g, T1q, T1r, TL, TZ, T1l, T1m, T1n, Ti, T12, T1i, T1j; + V T1k, Tw, T11, TU, TX, TW; + TU = LD(&(x[0]), ms, &(x[0])); + TW = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + TX = VZMULJ(TV, TW); + TY = VSUB(TU, TX); + T1f = VADD(TU, TX); + { + V TN, TQ, TM, TP; + TM = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + TN = VZMULJ(To, TM); + TP = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + TQ = VZMULJ(TO, TP); + TR = VSUB(TN, TQ); + T1g = VADD(TN, TQ); + } + { + V TA, TJ, TD, TG, TE, TK; + { + V Tz, TI, TC, TF; + Tz = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TA = VZMULJ(Ty, Tz); + TI = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TJ = VZMULJ(TH, TI); + TC = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + TD = VZMULJ(TB, TC); + TF = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + TG = VZMULJ(T6, TF); + } + T1q = VADD(TA, TD); + T1r = VADD(TG, TJ); + TE = VSUB(TA, TD); + TK = VSUB(TG, TJ); + TL = VMUL(LDK(KP707106781), VSUB(TE, TK)); + TZ = VMUL(LDK(KP707106781), VADD(TK, TE)); + } + { + V T3, Tg, Ta, Td, Tb, Th; + { + V T2, Tf, T9, Tc; + T2 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T3 = VZMULJ(T1, T2); + Tf = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tg = VZMULJ(Te, Tf); + T9 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Ta = VZMULJ(T8, T9); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Td = VZMULJ(T5, Tc); + } + T1l = VADD(T3, Ta); + T1m = VADD(Td, Tg); + T1n = VSUB(T1l, T1m); + Tb = VSUB(T3, Ta); + Th = VSUB(Td, Tg); + Ti = VFNMS(LDK(KP923879532), Th, VMUL(LDK(KP382683432), Tb)); + T12 = VFMA(LDK(KP923879532), Tb, VMUL(LDK(KP382683432), Th)); + } + { + V Tk, Tu, Tm, Tr, Tn, Tv; + { + V Tj, Tt, Tl, Tq; + Tj = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Tk = VZMULJ(T4, Tj); + Tt = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + Tu = VZMULJ(Ts, Tt); + Tl = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tm = VZMULJ(T7, Tl); + Tq = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tr = VZMULJ(Tp, Tq); + } + T1i = VADD(Tk, Tm); + T1j = VADD(Tr, Tu); + T1k = VSUB(T1i, T1j); + Tn = VSUB(Tk, Tm); + Tv = VSUB(Tr, Tu); + Tw = VFMA(LDK(KP382683432), Tn, VMUL(LDK(KP923879532), Tv)); + T11 = VFNMS(LDK(KP382683432), Tv, VMUL(LDK(KP923879532), Tn)); + } + { + V T1p, T1v, T1u, T1w; + { + V T1h, T1o, T1s, T1t; + T1h = VSUB(T1f, T1g); + T1o = VMUL(LDK(KP707106781), VADD(T1k, T1n)); + T1p = VADD(T1h, T1o); + T1v = VSUB(T1h, T1o); + T1s = VSUB(T1q, T1r); + T1t = VMUL(LDK(KP707106781), VSUB(T1n, T1k)); + T1u = VBYI(VADD(T1s, T1t)); + T1w = VBYI(VSUB(T1t, T1s)); + } + ST(&(x[WS(rs, 14)]), VSUB(T1p, T1u), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VADD(T1v, T1w), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T1p, T1u), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VSUB(T1v, T1w), ms, &(x[0])); + } + { + V T1z, T1D, T1C, T1E; + { + V T1x, T1y, T1A, T1B; + T1x = VADD(T1f, T1g); + T1y = VADD(T1r, T1q); + T1z = VADD(T1x, T1y); + T1D = VSUB(T1x, T1y); + T1A = VADD(T1i, T1j); + T1B = VADD(T1l, T1m); + T1C = VADD(T1A, T1B); + T1E = VBYI(VSUB(T1B, T1A)); + } + ST(&(x[WS(rs, 8)]), VSUB(T1z, T1C), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(T1D, T1E), ms, &(x[0])); + ST(&(x[0]), VADD(T1z, T1C), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VSUB(T1D, T1E), ms, &(x[0])); + } + { + V TT, T15, T14, T16; + { + V Tx, TS, T10, T13; + Tx = VSUB(Ti, Tw); + TS = VSUB(TL, TR); + TT = VBYI(VSUB(Tx, TS)); + T15 = VBYI(VADD(TS, Tx)); + T10 = VADD(TY, TZ); + T13 = VADD(T11, T12); + T14 = VSUB(T10, T13); + T16 = VADD(T10, T13); + } + ST(&(x[WS(rs, 7)]), VADD(TT, T14), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VSUB(T16, T15), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VSUB(T14, TT), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T15, T16), ms, &(x[WS(rs, 1)])); + } + { + V T19, T1d, T1c, T1e; + { + V T17, T18, T1a, T1b; + T17 = VSUB(TY, TZ); + T18 = VADD(Tw, Ti); + T19 = VADD(T17, T18); + T1d = VSUB(T17, T18); + T1a = VADD(TR, TL); + T1b = VSUB(T12, T11); + T1c = VBYI(VADD(T1a, T1b)); + T1e = VBYI(VSUB(T1b, T1a)); + } + ST(&(x[WS(rs, 13)]), VSUB(T19, T1c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VADD(T1d, T1e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(T19, T1c), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VSUB(T1d, T1e), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 15), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 16, XSIMD_STRING("t3fv_16"), twinstr, &GENUS, {94, 60, 4, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3fv_16) (planner *p) { + X(kdft_dit_register) (p, t3fv_16, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t3fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t3fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,533 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:27 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 20 -name t3fv_20 -include t3f.h */ + +/* + * This function contains 138 FP additions, 118 FP multiplications, + * (or, 92 additions, 72 multiplications, 46 fused multiply/add), + * 90 stack variables, 4 constants, and 40 memory accesses + */ +#include "t3f.h" + +static void t3fv_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(20, rs)) { + V T1k, T1w, T1r, T1z, T1o, T1y, T1v, T1h; + { + V T2, T8, T3, Td; + T2 = LDW(&(W[0])); + T8 = LDW(&(W[TWVL * 2])); + T3 = LDW(&(W[TWVL * 4])); + Td = LDW(&(W[TWVL * 6])); + { + V T7, TM, T1F, T23, T1p, Tp, T1j, T27, T1P, T1I, T1i, T1L, T28, T1S, T1q; + V TE, T1n, T1d, T26, T2e; + { + V T1, TK, T5, TH; + T1 = LD(&(x[0]), ms, &(x[0])); + TK = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T5 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TH = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + { + V TA, Tx, TU, T1O, T14, Th, T1G, T1R, T1b, T1J, To, Ts, TV, Tv, TO; + V TQ, TT, Ty, TB; + { + V Tq, Tt, T17, T1a, Tk, Tn; + { + V Tl, Ti, T15, T18, TZ, Tc, T6, Tb, Tf, T10, T12, TL; + { + V TJ, Ta, T9, T4; + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + TA = VZMULJ(T2, T8); + T9 = VZMUL(T2, T8); + Tx = VZMUL(T8, T3); + Tl = VZMULJ(T8, T3); + T4 = VZMUL(T2, T3); + Tq = VZMULJ(T2, T3); + Tt = VZMULJ(T2, Td); + Ti = VZMULJ(T8, Td); + T15 = VZMULJ(TA, Td); + T18 = VZMULJ(TA, T3); + TU = VZMUL(TA, T3); + TJ = VZMULJ(T9, Td); + TZ = VZMUL(T9, T3); + Tc = VZMULJ(T9, T3); + T6 = VZMULJ(T4, T5); + Tb = VZMULJ(T9, Ta); + Tf = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + T10 = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T12 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + TL = VZMULJ(TJ, TK); + } + { + V T1D, T11, T13, T19, T1E, Tg, T16, TI, Te, Tj, Tm; + T16 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + TI = VZMULJ(Tc, TH); + Te = VZMULJ(Tc, Td); + T7 = VSUB(T1, T6); + T1D = VADD(T1, T6); + T11 = VZMULJ(TZ, T10); + T13 = VZMULJ(T8, T12); + T19 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T17 = VZMULJ(T15, T16); + TM = VSUB(TI, TL); + T1E = VADD(TI, TL); + Tg = VZMULJ(Te, Tf); + Tj = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + Tm = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T1O = VADD(T11, T13); + T14 = VSUB(T11, T13); + T1a = VZMULJ(T18, T19); + T1F = VSUB(T1D, T1E); + T23 = VADD(T1D, T1E); + Th = VSUB(Tb, Tg); + T1G = VADD(Tb, Tg); + Tk = VZMULJ(Ti, Tj); + Tn = VZMULJ(Tl, Tm); + } + } + { + V Tr, Tu, TN, TP, TS; + Tr = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T1R = VADD(T17, T1a); + T1b = VSUB(T17, T1a); + Tu = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + TN = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + TP = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + TS = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T1J = VADD(Tk, Tn); + To = VSUB(Tk, Tn); + Ts = VZMULJ(Tq, Tr); + TV = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tv = VZMULJ(Tt, Tu); + TO = VZMULJ(T3, TN); + TQ = VZMULJ(Td, TP); + TT = VZMULJ(T2, TS); + Ty = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + TB = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + } + } + { + V T1N, Tw, T1H, TR, Tz, TC, T1c, TX, T1K, TW; + T1p = VSUB(Th, To); + Tp = VADD(Th, To); + TW = VZMULJ(TU, TV); + T1N = VADD(Ts, Tv); + Tw = VSUB(Ts, Tv); + T1H = VADD(TO, TQ); + TR = VSUB(TO, TQ); + Tz = VZMULJ(Tx, Ty); + TC = VZMULJ(TA, TB); + T1j = VSUB(T1b, T14); + T1c = VADD(T14, T1b); + TX = VSUB(TT, TW); + T1K = VADD(TT, TW); + T27 = VADD(T1N, T1O); + T1P = VSUB(T1N, T1O); + { + V TD, T1Q, T24, TY, T25; + TD = VSUB(Tz, TC); + T1Q = VADD(Tz, TC); + T1I = VSUB(T1G, T1H); + T24 = VADD(T1G, T1H); + TY = VADD(TR, TX); + T1i = VSUB(TX, TR); + T25 = VADD(T1J, T1K); + T1L = VSUB(T1J, T1K); + T28 = VADD(T1Q, T1R); + T1S = VSUB(T1Q, T1R); + T1q = VSUB(Tw, TD); + TE = VADD(Tw, TD); + T1n = VSUB(T1c, TY); + T1d = VADD(TY, T1c); + T26 = VADD(T24, T25); + T2e = VSUB(T24, T25); + } + } + } + } + { + V T1M, T1Z, T1Y, T1T, T29, T2f, T1g, TF, T1m, T1e; + T1M = VADD(T1I, T1L); + T1Z = VSUB(T1I, T1L); + T1Y = VSUB(T1P, T1S); + T1T = VADD(T1P, T1S); + T29 = VADD(T27, T28); + T2f = VSUB(T27, T28); + T1g = VSUB(Tp, TE); + TF = VADD(Tp, TE); + T1m = VFNMS(LDK(KP250000000), T1d, TM); + T1e = VADD(TM, T1d); + { + V T1W, T2c, T1f, T2i, T2g, T22, T20, T1V, T2b, T1U, T2a, TG; + T1k = VFMA(LDK(KP618033988), T1j, T1i); + T1w = VFNMS(LDK(KP618033988), T1i, T1j); + T1W = VSUB(T1M, T1T); + T1U = VADD(T1M, T1T); + T2c = VSUB(T26, T29); + T2a = VADD(T26, T29); + T1f = VFNMS(LDK(KP250000000), TF, T7); + TG = VADD(T7, TF); + T2i = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T2e, T2f)); + T2g = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T2f, T2e)); + T22 = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1Y, T1Z)); + T20 = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1Z, T1Y)); + ST(&(x[WS(rs, 10)]), VADD(T1F, T1U), ms, &(x[0])); + T1V = VFNMS(LDK(KP250000000), T1U, T1F); + ST(&(x[0]), VADD(T23, T2a), ms, &(x[0])); + T2b = VFNMS(LDK(KP250000000), T2a, T23); + ST(&(x[WS(rs, 15)]), VFMAI(T1e, TG), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFNMSI(T1e, TG), ms, &(x[WS(rs, 1)])); + T1r = VFMA(LDK(KP618033988), T1q, T1p); + T1z = VFNMS(LDK(KP618033988), T1p, T1q); + { + V T21, T1X, T2h, T2d; + T21 = VFMA(LDK(KP559016994), T1W, T1V); + T1X = VFNMS(LDK(KP559016994), T1W, T1V); + T2h = VFNMS(LDK(KP559016994), T2c, T2b); + T2d = VFMA(LDK(KP559016994), T2c, T2b); + ST(&(x[WS(rs, 18)]), VFNMSI(T20, T1X), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(T20, T1X), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFMAI(T22, T21), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(T22, T21), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VFNMSI(T2g, T2d), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VFMAI(T2g, T2d), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFMAI(T2i, T2h), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFNMSI(T2i, T2h), ms, &(x[0])); + T1o = VFNMS(LDK(KP559016994), T1n, T1m); + T1y = VFMA(LDK(KP559016994), T1n, T1m); + T1v = VFNMS(LDK(KP559016994), T1g, T1f); + T1h = VFMA(LDK(KP559016994), T1g, T1f); + } + } + } + } + } + { + V T1C, T1A, T1s, T1u, T1l, T1t, T1B, T1x; + T1C = VFMA(LDK(KP951056516), T1z, T1y); + T1A = VFNMS(LDK(KP951056516), T1z, T1y); + T1s = VFMA(LDK(KP951056516), T1r, T1o); + T1u = VFNMS(LDK(KP951056516), T1r, T1o); + T1l = VFMA(LDK(KP951056516), T1k, T1h); + T1t = VFNMS(LDK(KP951056516), T1k, T1h); + T1B = VFMA(LDK(KP951056516), T1w, T1v); + T1x = VFNMS(LDK(KP951056516), T1w, T1v); + ST(&(x[WS(rs, 11)]), VFMAI(T1u, T1t), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFNMSI(T1u, T1t), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VFMAI(T1s, T1l), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(T1s, T1l), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFMAI(T1A, T1x), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 17)]), VFNMSI(T1A, T1x), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(T1C, T1B), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFNMSI(T1C, T1B), ms, &(x[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 19), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 20, XSIMD_STRING("t3fv_20"), twinstr, &GENUS, {92, 72, 46, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3fv_20) (planner *p) { + X(kdft_dit_register) (p, t3fv_20, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 20 -name t3fv_20 -include t3f.h */ + +/* + * This function contains 138 FP additions, 92 FP multiplications, + * (or, 126 additions, 80 multiplications, 12 fused multiply/add), + * 73 stack variables, 4 constants, and 40 memory accesses + */ +#include "t3f.h" + +static void t3fv_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(20, rs)) { + V T2, T8, T9, TA, T3, Tc, T4, TZ, T18, Tl, Tq, Tx, TU, Td, Te; + V T15, Ti, Tt, TJ; + T2 = LDW(&(W[0])); + T8 = LDW(&(W[TWVL * 2])); + T9 = VZMUL(T2, T8); + TA = VZMULJ(T2, T8); + T3 = LDW(&(W[TWVL * 4])); + Tc = VZMULJ(T9, T3); + T4 = VZMUL(T2, T3); + TZ = VZMUL(T9, T3); + T18 = VZMULJ(TA, T3); + Tl = VZMULJ(T8, T3); + Tq = VZMULJ(T2, T3); + Tx = VZMUL(T8, T3); + TU = VZMUL(TA, T3); + Td = LDW(&(W[TWVL * 6])); + Te = VZMULJ(Tc, Td); + T15 = VZMULJ(TA, Td); + Ti = VZMULJ(T8, Td); + Tt = VZMULJ(T2, Td); + TJ = VZMULJ(T9, Td); + { + V T7, TM, T1U, T2d, T1i, T1p, T1q, T1j, Tp, TE, TF, T26, T27, T2b, T1M; + V T1P, T1V, TY, T1c, T1d, T23, T24, T2a, T1F, T1I, T1W, TG, T1e; + { + V T1, TL, T6, TI, TK, T5, TH, T1S, T1T; + T1 = LD(&(x[0]), ms, &(x[0])); + TK = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + TL = VZMULJ(TJ, TK); + T5 = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + T6 = VZMULJ(T4, T5); + TH = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + TI = VZMULJ(Tc, TH); + T7 = VSUB(T1, T6); + TM = VSUB(TI, TL); + T1S = VADD(T1, T6); + T1T = VADD(TI, TL); + T1U = VSUB(T1S, T1T); + T2d = VADD(T1S, T1T); + } + { + V Th, T1K, T14, T1E, T1b, T1H, To, T1N, Tw, T1D, TR, T1L, TX, T1O, TD; + V T1G; + { + V Tb, Tg, Ta, Tf; + Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tb = VZMULJ(T9, Ta); + Tf = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + Tg = VZMULJ(Te, Tf); + Th = VSUB(Tb, Tg); + T1K = VADD(Tb, Tg); + } + { + V T11, T13, T10, T12; + T10 = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T11 = VZMULJ(TZ, T10); + T12 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T13 = VZMULJ(T8, T12); + T14 = VSUB(T11, T13); + T1E = VADD(T11, T13); + } + { + V T17, T1a, T16, T19; + T16 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T17 = VZMULJ(T15, T16); + T19 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T1a = VZMULJ(T18, T19); + T1b = VSUB(T17, T1a); + T1H = VADD(T17, T1a); + } + { + V Tk, Tn, Tj, Tm; + Tj = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + Tk = VZMULJ(Ti, Tj); + Tm = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tn = VZMULJ(Tl, Tm); + To = VSUB(Tk, Tn); + T1N = VADD(Tk, Tn); + } + { + V Ts, Tv, Tr, Tu; + Tr = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + Ts = VZMULJ(Tq, Tr); + Tu = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tv = VZMULJ(Tt, Tu); + Tw = VSUB(Ts, Tv); + T1D = VADD(Ts, Tv); + } + { + V TO, TQ, TN, TP; + TN = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + TO = VZMULJ(T3, TN); + TP = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + TQ = VZMULJ(Td, TP); + TR = VSUB(TO, TQ); + T1L = VADD(TO, TQ); + } + { + V TT, TW, TS, TV; + TS = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TT = VZMULJ(T2, TS); + TV = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + TW = VZMULJ(TU, TV); + TX = VSUB(TT, TW); + T1O = VADD(TT, TW); + } + { + V Tz, TC, Ty, TB; + Ty = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Tz = VZMULJ(Tx, Ty); + TB = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + TC = VZMULJ(TA, TB); + TD = VSUB(Tz, TC); + T1G = VADD(Tz, TC); + } + T1i = VSUB(TX, TR); + T1p = VSUB(Th, To); + T1q = VSUB(Tw, TD); + T1j = VSUB(T1b, T14); + Tp = VADD(Th, To); + TE = VADD(Tw, TD); + TF = VADD(Tp, TE); + T26 = VADD(T1D, T1E); + T27 = VADD(T1G, T1H); + T2b = VADD(T26, T27); + T1M = VSUB(T1K, T1L); + T1P = VSUB(T1N, T1O); + T1V = VADD(T1M, T1P); + TY = VADD(TR, TX); + T1c = VADD(T14, T1b); + T1d = VADD(TY, T1c); + T23 = VADD(T1K, T1L); + T24 = VADD(T1N, T1O); + T2a = VADD(T23, T24); + T1F = VSUB(T1D, T1E); + T1I = VSUB(T1G, T1H); + T1W = VADD(T1F, T1I); + } + TG = VADD(T7, TF); + T1e = VBYI(VADD(TM, T1d)); + ST(&(x[WS(rs, 5)]), VSUB(TG, T1e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VADD(TG, T1e), ms, &(x[WS(rs, 1)])); + { + V T2c, T2e, T2f, T29, T2i, T25, T28, T2h, T2g; + T2c = VMUL(LDK(KP559016994), VSUB(T2a, T2b)); + T2e = VADD(T2a, T2b); + T2f = VFNMS(LDK(KP250000000), T2e, T2d); + T25 = VSUB(T23, T24); + T28 = VSUB(T26, T27); + T29 = VBYI(VFMA(LDK(KP951056516), T25, VMUL(LDK(KP587785252), T28))); + T2i = VBYI(VFNMS(LDK(KP587785252), T25, VMUL(LDK(KP951056516), T28))); + ST(&(x[0]), VADD(T2d, T2e), ms, &(x[0])); + T2h = VSUB(T2f, T2c); + ST(&(x[WS(rs, 8)]), VSUB(T2h, T2i), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VADD(T2i, T2h), ms, &(x[0])); + T2g = VADD(T2c, T2f); + ST(&(x[WS(rs, 4)]), VADD(T29, T2g), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VSUB(T2g, T29), ms, &(x[0])); + } + { + V T1Z, T1X, T1Y, T1R, T22, T1J, T1Q, T21, T20; + T1Z = VMUL(LDK(KP559016994), VSUB(T1V, T1W)); + T1X = VADD(T1V, T1W); + T1Y = VFNMS(LDK(KP250000000), T1X, T1U); + T1J = VSUB(T1F, T1I); + T1Q = VSUB(T1M, T1P); + T1R = VBYI(VFNMS(LDK(KP587785252), T1Q, VMUL(LDK(KP951056516), T1J))); + T22 = VBYI(VFMA(LDK(KP951056516), T1Q, VMUL(LDK(KP587785252), T1J))); + ST(&(x[WS(rs, 10)]), VADD(T1U, T1X), ms, &(x[0])); + T21 = VADD(T1Z, T1Y); + ST(&(x[WS(rs, 6)]), VSUB(T21, T22), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VADD(T22, T21), ms, &(x[0])); + T20 = VSUB(T1Y, T1Z); + ST(&(x[WS(rs, 2)]), VADD(T1R, T20), ms, &(x[0])); + ST(&(x[WS(rs, 18)]), VSUB(T20, T1R), ms, &(x[0])); + } + { + V T1k, T1r, T1z, T1w, T1o, T1y, T1h, T1v; + T1k = VFMA(LDK(KP951056516), T1i, VMUL(LDK(KP587785252), T1j)); + T1r = VFMA(LDK(KP951056516), T1p, VMUL(LDK(KP587785252), T1q)); + T1z = VFNMS(LDK(KP587785252), T1p, VMUL(LDK(KP951056516), T1q)); + T1w = VFNMS(LDK(KP587785252), T1i, VMUL(LDK(KP951056516), T1j)); + { + V T1m, T1n, T1f, T1g; + T1m = VFMS(LDK(KP250000000), T1d, TM); + T1n = VMUL(LDK(KP559016994), VSUB(T1c, TY)); + T1o = VADD(T1m, T1n); + T1y = VSUB(T1n, T1m); + T1f = VMUL(LDK(KP559016994), VSUB(Tp, TE)); + T1g = VFNMS(LDK(KP250000000), TF, T7); + T1h = VADD(T1f, T1g); + T1v = VSUB(T1g, T1f); + } + { + V T1l, T1s, T1B, T1C; + T1l = VADD(T1h, T1k); + T1s = VBYI(VSUB(T1o, T1r)); + ST(&(x[WS(rs, 19)]), VSUB(T1l, T1s), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T1l, T1s), ms, &(x[WS(rs, 1)])); + T1B = VADD(T1v, T1w); + T1C = VBYI(VADD(T1z, T1y)); + ST(&(x[WS(rs, 13)]), VSUB(T1B, T1C), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VADD(T1B, T1C), ms, &(x[WS(rs, 1)])); + } + { + V T1t, T1u, T1x, T1A; + T1t = VSUB(T1h, T1k); + T1u = VBYI(VADD(T1r, T1o)); + ST(&(x[WS(rs, 11)]), VSUB(T1t, T1u), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VADD(T1t, T1u), ms, &(x[WS(rs, 1)])); + T1x = VSUB(T1v, T1w); + T1A = VBYI(VSUB(T1y, T1z)); + ST(&(x[WS(rs, 17)]), VSUB(T1x, T1A), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(T1x, T1A), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 19), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 20, XSIMD_STRING("t3fv_20"), twinstr, &GENUS, {126, 80, 12, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3fv_20) (planner *p) { + X(kdft_dit_register) (p, t3fv_20, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t3fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t3fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,948 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:28 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 25 -name t3fv_25 -include t3f.h */ + +/* + * This function contains 268 FP additions, 281 FP multiplications, + * (or, 87 additions, 100 multiplications, 181 fused multiply/add), + * 223 stack variables, 67 constants, and 50 memory accesses + */ +#include "t3f.h" + +static void t3fv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP792626838, +0.792626838241819413632131824093538848057784557); + DVK(KP876091699, +0.876091699473550838204498029706869638173524346); + DVK(KP617882369, +0.617882369114440893914546919006756321695042882); + DVK(KP803003575, +0.803003575438660414833440593570376004635464850); + DVK(KP242145790, +0.242145790282157779872542093866183953459003101); + DVK(KP968583161, +0.968583161128631119490168375464735813836012403); + DVK(KP999544308, +0.999544308746292983948881682379742149196758193); + DVK(KP916574801, +0.916574801383451584742370439148878693530976769); + DVK(KP904730450, +0.904730450839922351881287709692877908104763647); + DVK(KP809385824, +0.809385824416008241660603814668679683846476688); + DVK(KP447417479, +0.447417479732227551498980015410057305749330693); + DVK(KP894834959, +0.894834959464455102997960030820114611498661386); + DVK(KP867381224, +0.867381224396525206773171885031575671309956167); + DVK(KP683113946, +0.683113946453479238701949862233725244439656928); + DVK(KP559154169, +0.559154169276087864842202529084232643714075927); + DVK(KP958953096, +0.958953096729998668045963838399037225970891871); + DVK(KP831864738, +0.831864738706457140726048799369896829771167132); + DVK(KP829049696, +0.829049696159252993975487806364305442437946767); + DVK(KP860541664, +0.860541664367944677098261680920518816412804187); + DVK(KP897376177, +0.897376177523557693138608077137219684419427330); + DVK(KP876306680, +0.876306680043863587308115903922062583399064238); + DVK(KP681693190, +0.681693190061530575150324149145440022633095390); + DVK(KP560319534, +0.560319534973832390111614715371676131169633784); + DVK(KP855719849, +0.855719849902058969314654733608091555096772472); + DVK(KP237294955, +0.237294955877110315393888866460840817927895961); + DVK(KP949179823, +0.949179823508441261575555465843363271711583843); + DVK(KP904508497, +0.904508497187473712051146708591409529430077295); + DVK(KP997675361, +0.997675361079556513670859573984492383596555031); + DVK(KP763932022, +0.763932022500210303590826331268723764559381640); + DVK(KP690983005, +0.690983005625052575897706582817180941139845410); + DVK(KP992114701, +0.992114701314477831049793042785778521453036709); + DVK(KP952936919, +0.952936919628306576880750665357914584765951388); + DVK(KP998026728, +0.998026728428271561952336806863450553336905220); + DVK(KP262346850, +0.262346850930607871785420028382979691334784273); + DVK(KP570584518, +0.570584518783621657366766175430996792655723863); + DVK(KP669429328, +0.669429328479476605641803240971985825917022098); + DVK(KP923225144, +0.923225144846402650453449441572664695995209956); + DVK(KP945422727, +0.945422727388575946270360266328811958657216298); + DVK(KP522616830, +0.522616830205754336872861364785224694908468440); + DVK(KP956723877, +0.956723877038460305821989399535483155872969262); + DVK(KP906616052, +0.906616052148196230441134447086066874408359177); + DVK(KP772036680, +0.772036680810363904029489473607579825330539880); + DVK(KP845997307, +0.845997307939530944175097360758058292389769300); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP912575812, +0.912575812670962425556968549836277086778922727); + DVK(KP921078979, +0.921078979742360627699756128143719920817673854); + DVK(KP982009705, +0.982009705009746369461829878184175962711969869); + DVK(KP734762448, +0.734762448793050413546343770063151342619912334); + DVK(KP494780565, +0.494780565770515410344588413655324772219443730); + DVK(KP447533225, +0.447533225982656890041886979663652563063114397); + DVK(KP603558818, +0.603558818296015001454675132653458027918768137); + DVK(KP667278218, +0.667278218140296670899089292254759909713898805); + DVK(KP244189809, +0.244189809627953270309879511234821255780225091); + DVK(KP269969613, +0.269969613759572083574752974412347470060951301); + DVK(KP578046249, +0.578046249379945007321754579646815604023525655); + DVK(KP522847744, +0.522847744331509716623755382187077770911012542); + DVK(KP132830569, +0.132830569247582714407653942074819768844536507); + DVK(KP120146378, +0.120146378570687701782758537356596213647956445); + DVK(KP893101515, +0.893101515366181661711202267938416198338079437); + DVK(KP987388751, +0.987388751065621252324603216482382109400433949); + DVK(KP059835404, +0.059835404262124915169548397419498386427871950); + DVK(KP066152395, +0.066152395967733048213034281011006031460903353); + DVK(KP786782374, +0.786782374965295178365099601674911834788448471); + DVK(KP869845200, +0.869845200362138853122720822420327157933056305); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(25, rs)) { + V T2t, T1Z, T2W, T28, T2Q, T2r, T2g, T2u, T2o, T2l; + { + V T2, T5, T3, T9; + T2 = LDW(&(W[0])); + T5 = LDW(&(W[TWVL * 4])); + T3 = LDW(&(W[TWVL * 2])); + T9 = LDW(&(W[TWVL * 6])); + { + V T2c, T3l, Tn, T49, Tm, T4e, TN, T32, T1d, T3a, T3f, T3z, T3H, T25, T1W; + V T2v, T2D, T4a, T1g, T18, T2Z, T11, T31, TK, T1q, T1j, T1n, T4b, T17; + { + V T1, T1l, Tr, T4, Ty, T1E, Tu, TX, TD, T1h, Tz, T1e, T1I, T1o, TU; + V Tk, T2b, T1B, T1D, T1N, T1F, Td, T2a, T1J; + { + V T7, Tb, TC, Tg, T1L, Ta, T6, Tj, T1A; + T1 = LD(&(x[0]), ms, &(x[0])); + { + V Tf, Ti, Te, Th; + Tf = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + Ti = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Tb = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + Te = VZMUL(T2, T5); + TC = VZMULJ(T2, T5); + T1l = VZMUL(T3, T5); + Tr = VZMULJ(T3, T5); + T4 = VZMUL(T2, T3); + Ty = VZMULJ(T2, T3); + T1E = VZMULJ(T2, T9); + Th = VZMULJ(T5, T9); + Tu = VZMULJ(T3, T9); + Tg = VZMULJ(Te, Tf); + TX = VZMULJ(Te, T9); + TD = VZMULJ(TC, T9); + T1h = VZMULJ(Ty, T9); + Tz = VZMUL(Ty, T5); + T1e = VZMULJ(Ty, T5); + T1L = VZMULJ(Tr, T9); + Ta = VZMULJ(T4, T9); + T1I = VZMUL(T4, T5); + T6 = VZMULJ(T4, T5); + Tj = VZMULJ(Th, Ti); + } + T1A = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1o = VZMULJ(T1e, T9); + { + V Tc, T8, T1C, T1M; + T1C = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T1M = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tc = VZMULJ(Ta, Tb); + T8 = VZMULJ(T6, T7); + TU = VZMULJ(T6, T9); + Tk = VADD(Tg, Tj); + T2b = VSUB(Tg, Tj); + T1B = VZMULJ(T3, T1A); + T1D = VZMULJ(TC, T1C); + T1N = VZMULJ(T1L, T1M); + T1F = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + Td = VADD(T8, Tc); + T2a = VSUB(T8, Tc); + T1J = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + } + } + { + V Tq, Tt, TF, T1T, T1H, Tw, T1U, T1O, TA, Tp, Ts, TE; + Tp = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Ts = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + TE = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + { + V T1K, Tv, T1G, Tl; + Tv = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + T1G = VZMULJ(T1E, T1F); + T2c = VFMA(LDK(KP618033988), T2b, T2a); + T3l = VFNMS(LDK(KP618033988), T2a, T2b); + Tn = VSUB(Td, Tk); + Tl = VADD(Td, Tk); + T1K = VZMULJ(T1I, T1J); + Tq = VZMULJ(T2, Tp); + Tt = VZMULJ(Tr, Ts); + TF = VZMULJ(TD, TE); + T1T = VSUB(T1D, T1G); + T1H = VADD(T1D, T1G); + T49 = VADD(T1, Tl); + Tm = VFNMS(LDK(KP250000000), Tl, T1); + Tw = VZMULJ(Tu, Tv); + T1U = VSUB(T1K, T1N); + T1O = VADD(T1K, T1N); + TA = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + } + { + V Tx, TL, T1R, T38, T1V, T13, TQ, TZ, TS, T1Q, TV, TG, TM, T12, T1c; + V T16; + T12 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + { + V TP, TY, T1P, TB, TR; + TP = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + TY = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TR = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + Tx = VADD(Tt, Tw); + TL = VSUB(Tt, Tw); + T1R = VSUB(T1O, T1H); + T1P = VADD(T1H, T1O); + T38 = VFNMS(LDK(KP618033988), T1T, T1U); + T1V = VFMA(LDK(KP618033988), T1U, T1T); + TB = VZMULJ(Tz, TA); + T13 = VZMULJ(T4, T12); + TQ = VZMULJ(T9, TP); + TZ = VZMULJ(TX, TY); + TS = VZMULJ(T5, TR); + T4e = VADD(T1B, T1P); + T1Q = VFNMS(LDK(KP250000000), T1P, T1B); + TV = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + TG = VADD(TB, TF); + TM = VSUB(TF, TB); + } + T1c = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V T14, TT, TJ, T15, T10, TI, T1p, T1f, T1i, T1m; + T1f = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T14 = VADD(TS, TQ); + TT = VSUB(TQ, TS); + { + V T39, T1S, TW, TH; + T39 = VFMA(LDK(KP559016994), T1R, T1Q); + T1S = VFNMS(LDK(KP559016994), T1R, T1Q); + TW = VZMULJ(TU, TV); + TH = VADD(Tx, TG); + TJ = VSUB(Tx, TG); + TN = VFNMS(LDK(KP618033988), TM, TL); + T32 = VFMA(LDK(KP618033988), TL, TM); + T1d = VZMULJ(Ty, T1c); + T3a = VFMA(LDK(KP869845200), T39, T38); + T3f = VFNMS(LDK(KP786782374), T38, T39); + T3z = VFMA(LDK(KP066152395), T39, T38); + T3H = VFNMS(LDK(KP059835404), T38, T39); + T25 = VFMA(LDK(KP987388751), T1S, T1V); + T1W = VFNMS(LDK(KP893101515), T1V, T1S); + T2v = VFNMS(LDK(KP120146378), T1V, T1S); + T2D = VFMA(LDK(KP132830569), T1S, T1V); + T15 = VADD(TZ, TW); + T10 = VSUB(TW, TZ); + TI = VFNMS(LDK(KP250000000), TH, Tq); + T4a = VADD(Tq, TH); + T1g = VZMULJ(T1e, T1f); + } + T1p = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T1i = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + T1m = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + T18 = VSUB(T14, T15); + T16 = VADD(T14, T15); + T2Z = VFNMS(LDK(KP618033988), TT, T10); + T11 = VFMA(LDK(KP618033988), T10, TT); + T31 = VFNMS(LDK(KP559016994), TJ, TI); + TK = VFMA(LDK(KP559016994), TJ, TI); + T1q = VZMULJ(T1o, T1p); + T1j = VZMULJ(T1h, T1i); + T1n = VZMULJ(T1l, T1m); + } + T4b = VADD(T13, T16); + T17 = VFMS(LDK(KP250000000), T16, T13); + } + } + } + { + V T33, T3i, T3C, T3L, T20, TO, T2y, T2G, T1k, T1w, T1r, T1x, T2Y, T19, T4k; + V T4c; + T33 = VFMA(LDK(KP893101515), T32, T31); + T3i = VFNMS(LDK(KP987388751), T31, T32); + T3C = VFNMS(LDK(KP522847744), T32, T31); + T3L = VFMA(LDK(KP578046249), T31, T32); + T20 = VFMA(LDK(KP269969613), TK, TN); + TO = VFNMS(LDK(KP244189809), TN, TK); + T2y = VFMA(LDK(KP667278218), TK, TN); + T2G = VFNMS(LDK(KP603558818), TN, TK); + T1k = VADD(T1g, T1j); + T1w = VSUB(T1g, T1j); + T1r = VADD(T1n, T1q); + T1x = VSUB(T1q, T1n); + T2Y = VFMA(LDK(KP559016994), T18, T17); + T19 = VFNMS(LDK(KP559016994), T18, T17); + T4k = VSUB(T4a, T4b); + T4c = VADD(T4a, T4b); + { + V T2X, To, T35, T1y, T2H, T2z, T1a, T21, T3t, T34, T3n, T3j, T3E, T3Y, T3M; + V T3R, T1v, T36, T4l, T4f, T1u, T1s; + T2X = VFNMS(LDK(KP559016994), Tn, Tm); + To = VFMA(LDK(KP559016994), Tn, Tm); + T1u = VSUB(T1r, T1k); + T1s = VADD(T1k, T1r); + T35 = VFMA(LDK(KP618033988), T1w, T1x); + T1y = VFNMS(LDK(KP618033988), T1x, T1w); + { + V T3K, T30, T3h, T3D, T4d, T1t; + T3K = VFMA(LDK(KP447533225), T2Z, T2Y); + T30 = VFMA(LDK(KP120146378), T2Z, T2Y); + T3h = VFNMS(LDK(KP132830569), T2Y, T2Z); + T3D = VFNMS(LDK(KP494780565), T2Y, T2Z); + T2H = VFNMS(LDK(KP786782374), T11, T19); + T2z = VFMA(LDK(KP869845200), T19, T11); + T1a = VFNMS(LDK(KP667278218), T19, T11); + T21 = VFMA(LDK(KP603558818), T11, T19); + T4d = VADD(T1d, T1s); + T1t = VFNMS(LDK(KP250000000), T1s, T1d); + T3t = VFNMS(LDK(KP734762448), T33, T30); + T34 = VFMA(LDK(KP734762448), T33, T30); + T3n = VFMA(LDK(KP734762448), T3i, T3h); + T3j = VFNMS(LDK(KP734762448), T3i, T3h); + T3E = VFNMS(LDK(KP982009705), T3D, T3C); + T3Y = VFMA(LDK(KP982009705), T3D, T3C); + T3M = VFNMS(LDK(KP921078979), T3L, T3K); + T3R = VFMA(LDK(KP921078979), T3L, T3K); + T1v = VFNMS(LDK(KP559016994), T1u, T1t); + T36 = VFMA(LDK(KP559016994), T1u, T1t); + T4l = VSUB(T4d, T4e); + T4f = VADD(T4d, T4e); + } + { + V T2L, T2R, T2j, T2q, T2J, T2B, T2e, T26, T2U, T1Y, T23, T2O; + { + V T2I, T24, T2w, T2E, T48, T42, T3y, T3s, T3V, T45, T2A, T1b, T2h, T2i, T1X; + T2L = VFNMS(LDK(KP912575812), T2H, T2G); + T2I = VFMA(LDK(KP912575812), T2H, T2G); + { + V T3A, T3e, T37, T3I, T1z; + T3A = VFNMS(LDK(KP667278218), T36, T35); + T3e = VFNMS(LDK(KP059835404), T35, T36); + T37 = VFMA(LDK(KP066152395), T36, T35); + T3I = VFMA(LDK(KP603558818), T35, T36); + T24 = VFMA(LDK(KP578046249), T1v, T1y); + T1z = VFNMS(LDK(KP522847744), T1y, T1v); + T2w = VFNMS(LDK(KP494780565), T1v, T1y); + T2E = VFMA(LDK(KP447533225), T1y, T1v); + { + V T4i, T4g, T4o, T4m; + T4i = VSUB(T4c, T4f); + T4g = VADD(T4c, T4f); + T4o = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T4k, T4l)); + T4m = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T4l, T4k)); + { + V T3Q, T3J, T3b, T3u; + T3Q = VFNMS(LDK(KP845997307), T3I, T3H); + T3J = VFMA(LDK(KP845997307), T3I, T3H); + T3b = VFNMS(LDK(KP772036680), T3a, T37); + T3u = VFMA(LDK(KP772036680), T3a, T37); + { + V T3o, T3g, T3B, T3X, T4h; + T3o = VFNMS(LDK(KP772036680), T3f, T3e); + T3g = VFMA(LDK(KP772036680), T3f, T3e); + T3B = VFNMS(LDK(KP845997307), T3A, T3z); + T3X = VFMA(LDK(KP845997307), T3A, T3z); + ST(&(x[0]), VADD(T4g, T49), ms, &(x[0])); + T4h = VFNMS(LDK(KP250000000), T4g, T49); + { + V T40, T3N, T3c, T3v; + T40 = VFMA(LDK(KP906616052), T3M, T3J); + T3N = VFNMS(LDK(KP906616052), T3M, T3J); + T3c = VFMA(LDK(KP956723877), T3b, T34); + T3v = VFMA(LDK(KP522616830), T3j, T3u); + { + V T3p, T3k, T3S, T3F; + T3p = VFNMS(LDK(KP522616830), T34, T3o); + T3k = VFMA(LDK(KP945422727), T3j, T3g); + T3S = VFNMS(LDK(KP923225144), T3E, T3B); + T3F = VFMA(LDK(KP923225144), T3E, T3B); + { + V T46, T3Z, T4j, T4n; + T46 = VFNMS(LDK(KP669429328), T3X, T3Y); + T3Z = VFMA(LDK(KP570584518), T3Y, T3X); + T4j = VFMA(LDK(KP559016994), T4i, T4h); + T4n = VFNMS(LDK(KP559016994), T4i, T4h); + { + V T3W, T3O, T3d, T3w; + T3W = VFMA(LDK(KP262346850), T3N, T3l); + T3O = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T3l, T3N)); + T3d = VFMA(LDK(KP992114701), T3c, T2X); + T3w = VFNMS(LDK(KP690983005), T3v, T3g); + { + V T3q, T3m, T3T, T43; + T3q = VFMA(LDK(KP763932022), T3p, T3b); + T3m = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T3l, T3k)); + T3T = VFNMS(LDK(KP997675361), T3S, T3R); + T43 = VFNMS(LDK(KP904508497), T3S, T3Q); + { + V T3G, T3P, T47, T41; + T3G = VFMA(LDK(KP949179823), T3F, T2X); + T3P = VFNMS(LDK(KP237294955), T3F, T2X); + T47 = VFNMS(LDK(KP669429328), T40, T46); + T41 = VFMA(LDK(KP618033988), T40, T3Z); + ST(&(x[WS(rs, 20)]), VFMAI(T4m, T4j), ms, &(x[0])); + ST(&(x[WS(rs, 5)]), VFNMSI(T4m, T4j), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFNMSI(T4o, T4n), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 10)]), VFMAI(T4o, T4n), ms, &(x[0])); + { + V T3x, T3r, T3U, T44; + T3x = VFMA(LDK(KP855719849), T3w, T3t); + T3r = VFNMS(LDK(KP855719849), T3q, T3n); + ST(&(x[WS(rs, 22)]), VFMAI(T3m, T3d), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFNMSI(T3m, T3d), ms, &(x[WS(rs, 1)])); + T3U = VFMA(LDK(KP560319534), T3T, T3Q); + T44 = VFNMS(LDK(KP681693190), T43, T3R); + ST(&(x[WS(rs, 23)]), VFMAI(T3O, T3G), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 2)]), VFNMSI(T3O, T3G), ms, &(x[0])); + T48 = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T47, T3W)); + T42 = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T41, T3W)); + T3y = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T3x, T3l)); + T3s = VFMA(LDK(KP897376177), T3r, T2X); + T3V = VFNMS(LDK(KP949179823), T3U, T3P); + T45 = VFNMS(LDK(KP860541664), T44, T3P); + T2R = VFNMS(LDK(KP912575812), T2z, T2y); + T2A = VFMA(LDK(KP912575812), T2z, T2y); + T1b = VFMA(LDK(KP829049696), T1a, TO); + T2h = VFNMS(LDK(KP829049696), T1a, TO); + T2i = VFNMS(LDK(KP831864738), T1W, T1z); + T1X = VFMA(LDK(KP831864738), T1W, T1z); + } + } + } + } + } + } + } + } + } + } + } + { + V T2M, T2F, T2x, T2S, T2T, T2N; + T2M = VFNMS(LDK(KP958953096), T2E, T2D); + T2F = VFMA(LDK(KP958953096), T2E, T2D); + ST(&(x[WS(rs, 17)]), VFMAI(T3y, T3s), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 8)]), VFNMSI(T3y, T3s), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFMAI(T42, T3V), ms, &(x[0])); + ST(&(x[WS(rs, 13)]), VFNMSI(T42, T3V), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 18)]), VFNMSI(T48, T45), ms, &(x[0])); + ST(&(x[WS(rs, 7)]), VFMAI(T48, T45), ms, &(x[WS(rs, 1)])); + T2j = VFMA(LDK(KP559154169), T2i, T2h); + T2q = VFNMS(LDK(KP683113946), T2h, T2i); + T2x = VFNMS(LDK(KP867381224), T2w, T2v); + T2S = VFMA(LDK(KP867381224), T2w, T2v); + T2J = VFMA(LDK(KP894834959), T2I, T2F); + T2T = VFMA(LDK(KP447417479), T2I, T2S); + T2B = VFNMS(LDK(KP809385824), T2A, T2x); + T2N = VFMA(LDK(KP447417479), T2A, T2M); + T2e = VFMA(LDK(KP831864738), T25, T24); + T26 = VFNMS(LDK(KP831864738), T25, T24); + T2U = VFNMS(LDK(KP763932022), T2T, T2F); + T1Y = VFMA(LDK(KP904730450), T1X, T1b); + T23 = VFNMS(LDK(KP904730450), T1X, T1b); + T2O = VFMA(LDK(KP690983005), T2N, T2x); + } + } + { + V T2C, T22, T2d, T2K; + T2C = VFNMS(LDK(KP992114701), T2B, To); + T22 = VFMA(LDK(KP916574801), T21, T20); + T2d = VFNMS(LDK(KP916574801), T21, T20); + T2K = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2J, T2c)); + { + V T27, T2P, T2f, T2k, T2n, T2V; + T2V = VFNMS(LDK(KP999544308), T2U, T2R); + T27 = VFNMS(LDK(KP904730450), T26, T23); + T2t = VFMA(LDK(KP968583161), T1Y, To); + T1Z = VFNMS(LDK(KP242145790), T1Y, To); + T2P = VFNMS(LDK(KP999544308), T2O, T2L); + T2f = VFMA(LDK(KP904730450), T2e, T2d); + T2k = VFNMS(LDK(KP904730450), T2e, T2d); + T2n = VADD(T22, T23); + ST(&(x[WS(rs, 21)]), VFNMSI(T2K, T2C), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFMAI(T2K, T2C), ms, &(x[0])); + T2W = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T2V, T2c)); + T28 = VFNMS(LDK(KP618033988), T27, T22); + T2Q = VFNMS(LDK(KP803003575), T2P, To); + T2r = VFMA(LDK(KP617882369), T2k, T2q); + T2g = VFNMS(LDK(KP242145790), T2f, T2c); + T2u = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T2f, T2c)); + T2o = VFNMS(LDK(KP683113946), T2n, T26); + T2l = VFMA(LDK(KP559016994), T2k, T2j); + } + } + } + } + } + } + } + { + V T29, T2s, T2p, T2m; + T29 = VFNMS(LDK(KP876091699), T28, T1Z); + ST(&(x[WS(rs, 9)]), VFMAI(T2W, T2Q), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 16)]), VFNMSI(T2W, T2Q), ms, &(x[0])); + T2s = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T2r, T2g)); + ST(&(x[WS(rs, 24)]), VFMAI(T2u, T2t), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFNMSI(T2u, T2t), ms, &(x[WS(rs, 1)])); + T2p = VFMA(LDK(KP792626838), T2o, T1Z); + T2m = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T2l, T2g)); + ST(&(x[WS(rs, 11)]), VFNMSI(T2s, T2p), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 14)]), VFMAI(T2s, T2p), ms, &(x[0])); + ST(&(x[WS(rs, 19)]), VFMAI(T2m, T29), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 6)]), VFNMSI(T2m, T29), ms, &(x[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 24), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 25, XSIMD_STRING("t3fv_25"), twinstr, &GENUS, {87, 100, 181, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3fv_25) (planner *p) { + X(kdft_dit_register) (p, t3fv_25, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 25 -name t3fv_25 -include t3f.h */ + +/* + * This function contains 268 FP additions, 228 FP multiplications, + * (or, 190 additions, 150 multiplications, 78 fused multiply/add), + * 123 stack variables, 40 constants, and 50 memory accesses + */ +#include "t3f.h" + +static void t3fv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP998026728, +0.998026728428271561952336806863450553336905220); + DVK(KP125581039, +0.125581039058626752152356449131262266244969664); + DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439); + DVK(KP062790519, +0.062790519529313376076178224565631133122484832); + DVK(KP809016994, +0.809016994374947424102293417182819058860154590); + DVK(KP309016994, +0.309016994374947424102293417182819058860154590); + DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465); + DVK(KP728968627, +0.728968627421411523146730319055259111372571664); + DVK(KP963507348, +0.963507348203430549974383005744259307057084020); + DVK(KP876306680, +0.876306680043863587308115903922062583399064238); + DVK(KP497379774, +0.497379774329709576484567492012895936835134813); + DVK(KP968583161, +0.968583161128631119490168375464735813836012403); + DVK(KP684547105, +0.684547105928688673732283357621209269889519233); + DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328); + DVK(KP481753674, +0.481753674101715274987191502872129653528542010); + DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477); + DVK(KP248689887, +0.248689887164854788242283746006447968417567406); + DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806); + DVK(KP992114701, +0.992114701314477831049793042785778521453036709); + DVK(KP250666467, +0.250666467128608490746237519633017587885836494); + DVK(KP425779291, +0.425779291565072648862502445744251703979973042); + DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822); + DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596); + DVK(KP770513242, +0.770513242775789230803009636396177847271667672); + DVK(KP844327925, +0.844327925502015078548558063966681505381659241); + DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691); + DVK(KP125333233, +0.125333233564304245373118759816508793942918247); + DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418); + DVK(KP904827052, +0.904827052466019527713668647932697593970413911); + DVK(KP851558583, +0.851558583130145297725004891488503407959946084); + DVK(KP637423989, +0.637423989748689710176712811676016195434917298); + DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344); + DVK(KP535826794, +0.535826794978996618271308767867639978063575346); + DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483); + DVK(KP293892626, +0.293892626146236564584352977319536384298826219); + DVK(KP475528258, +0.475528258147576786058219666689691071702849317); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(25, rs)) { + V T1, T4, T2, T3, TA, Td, Tp, Tw, Tx, T1G, T1j, T5, T1c, T8, T9; + V Ts, T1J, Tg, T1C, T1m, TX, TB, T1f, TU; + T1 = LDW(&(W[0])); + T4 = LDW(&(W[TWVL * 4])); + T2 = LDW(&(W[TWVL * 2])); + T3 = VZMUL(T1, T2); + TA = VZMULJ(T1, T4); + Td = VZMUL(T1, T4); + Tp = VZMULJ(T2, T4); + Tw = VZMULJ(T1, T2); + Tx = VZMUL(Tw, T4); + T1G = VZMUL(T3, T4); + T1j = VZMUL(T2, T4); + T5 = VZMULJ(T3, T4); + T1c = VZMULJ(Tw, T4); + T8 = LDW(&(W[TWVL * 6])); + T9 = VZMULJ(T3, T8); + Ts = VZMULJ(T2, T8); + T1J = VZMULJ(Tp, T8); + Tg = VZMULJ(T4, T8); + T1C = VZMULJ(T1, T8); + T1m = VZMULJ(T1c, T8); + TX = VZMULJ(T5, T8); + TB = VZMULJ(TA, T8); + T1f = VZMULJ(Tw, T8); + TU = VZMULJ(Td, T8); + { + V Tl, Tk, Tm, Tn, T20, T2R, T22, T1V, T2K, T1S, T3A, T2L, TN, T2G, TK; + V T3w, T2H, T19, T2D, T16, T3x, T2E, T1y, T2N, T1v, T3z, T2O; + { + V Tf, Ti, Tj, T7, Tb, Tc, T21; + Tl = LD(&(x[0]), ms, &(x[0])); + { + V Te, Th, T6, Ta; + Te = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + Tf = VZMULJ(Td, Te); + Th = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + Ti = VZMULJ(Tg, Th); + Tj = VADD(Tf, Ti); + T6 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T7 = VZMULJ(T5, T6); + Ta = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + Tb = VZMULJ(T9, Ta); + Tc = VADD(T7, Tb); + } + Tk = VMUL(LDK(KP559016994), VSUB(Tc, Tj)); + Tm = VADD(Tc, Tj); + Tn = VFNMS(LDK(KP250000000), Tm, Tl); + T20 = VSUB(T7, Tb); + T21 = VSUB(Tf, Ti); + T2R = VMUL(LDK(KP951056516), T21); + T22 = VFMA(LDK(KP951056516), T20, VMUL(LDK(KP587785252), T21)); + } + { + V T1P, T1I, T1L, T1M, T1B, T1E, T1F, T1O; + T1O = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1P = VZMULJ(T2, T1O); + { + V T1H, T1K, T1A, T1D; + T1H = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T1I = VZMULJ(T1G, T1H); + T1K = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + T1L = VZMULJ(T1J, T1K); + T1M = VADD(T1I, T1L); + T1A = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T1B = VZMULJ(TA, T1A); + T1D = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T1E = VZMULJ(T1C, T1D); + T1F = VADD(T1B, T1E); + } + { + V T1T, T1U, T1N, T1Q, T1R; + T1T = VSUB(T1B, T1E); + T1U = VSUB(T1I, T1L); + T1V = VFMA(LDK(KP475528258), T1T, VMUL(LDK(KP293892626), T1U)); + T2K = VFNMS(LDK(KP293892626), T1T, VMUL(LDK(KP475528258), T1U)); + T1N = VMUL(LDK(KP559016994), VSUB(T1F, T1M)); + T1Q = VADD(T1F, T1M); + T1R = VFNMS(LDK(KP250000000), T1Q, T1P); + T1S = VADD(T1N, T1R); + T3A = VADD(T1P, T1Q); + T2L = VSUB(T1R, T1N); + } + } + { + V TH, Tz, TD, TE, Tr, Tu, Tv, TG; + TG = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + TH = VZMULJ(T1, TG); + { + V Ty, TC, Tq, Tt; + Ty = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + Tz = VZMULJ(Tx, Ty); + TC = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + TD = VZMULJ(TB, TC); + TE = VADD(Tz, TD); + Tq = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Tr = VZMULJ(Tp, Tq); + Tt = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + Tu = VZMULJ(Ts, Tt); + Tv = VADD(Tr, Tu); + } + { + V TL, TM, TF, TI, TJ; + TL = VSUB(Tr, Tu); + TM = VSUB(Tz, TD); + TN = VFMA(LDK(KP475528258), TL, VMUL(LDK(KP293892626), TM)); + T2G = VFNMS(LDK(KP293892626), TL, VMUL(LDK(KP475528258), TM)); + TF = VMUL(LDK(KP559016994), VSUB(Tv, TE)); + TI = VADD(Tv, TE); + TJ = VFNMS(LDK(KP250000000), TI, TH); + TK = VADD(TF, TJ); + T3w = VADD(TH, TI); + T2H = VSUB(TJ, TF); + } + } + { + V T13, TW, TZ, T10, TQ, TS, TT, T12; + T12 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T13 = VZMULJ(T3, T12); + { + V TV, TY, TP, TR; + TV = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TW = VZMULJ(TU, TV); + TY = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + TZ = VZMULJ(TX, TY); + T10 = VADD(TW, TZ); + TP = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + TQ = VZMULJ(T4, TP); + TR = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + TS = VZMULJ(T8, TR); + TT = VADD(TQ, TS); + } + { + V T17, T18, T11, T14, T15; + T17 = VSUB(TQ, TS); + T18 = VSUB(TW, TZ); + T19 = VFMA(LDK(KP475528258), T17, VMUL(LDK(KP293892626), T18)); + T2D = VFNMS(LDK(KP293892626), T17, VMUL(LDK(KP475528258), T18)); + T11 = VMUL(LDK(KP559016994), VSUB(TT, T10)); + T14 = VADD(TT, T10); + T15 = VFNMS(LDK(KP250000000), T14, T13); + T16 = VADD(T11, T15); + T3x = VADD(T13, T14); + T2E = VSUB(T15, T11); + } + } + { + V T1s, T1l, T1o, T1p, T1e, T1h, T1i, T1r; + T1r = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T1s = VZMULJ(Tw, T1r); + { + V T1k, T1n, T1d, T1g; + T1k = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + T1l = VZMULJ(T1j, T1k); + T1n = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T1o = VZMULJ(T1m, T1n); + T1p = VADD(T1l, T1o); + T1d = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T1e = VZMULJ(T1c, T1d); + T1g = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + T1h = VZMULJ(T1f, T1g); + T1i = VADD(T1e, T1h); + } + { + V T1w, T1x, T1q, T1t, T1u; + T1w = VSUB(T1e, T1h); + T1x = VSUB(T1l, T1o); + T1y = VFMA(LDK(KP475528258), T1w, VMUL(LDK(KP293892626), T1x)); + T2N = VFNMS(LDK(KP293892626), T1w, VMUL(LDK(KP475528258), T1x)); + T1q = VMUL(LDK(KP559016994), VSUB(T1i, T1p)); + T1t = VADD(T1i, T1p); + T1u = VFNMS(LDK(KP250000000), T1t, T1s); + T1v = VADD(T1q, T1u); + T3z = VADD(T1s, T1t); + T2O = VSUB(T1u, T1q); + } + } + { + V T3J, T3K, T3D, T3E, T3C, T3F, T3L, T3G; + { + V T3H, T3I, T3y, T3B; + T3H = VSUB(T3w, T3x); + T3I = VSUB(T3z, T3A); + T3J = VBYI(VFMA(LDK(KP951056516), T3H, VMUL(LDK(KP587785252), T3I))); + T3K = VBYI(VFNMS(LDK(KP587785252), T3H, VMUL(LDK(KP951056516), T3I))); + T3D = VADD(Tl, Tm); + T3y = VADD(T3w, T3x); + T3B = VADD(T3z, T3A); + T3E = VADD(T3y, T3B); + T3C = VMUL(LDK(KP559016994), VSUB(T3y, T3B)); + T3F = VFNMS(LDK(KP250000000), T3E, T3D); + } + ST(&(x[0]), VADD(T3D, T3E), ms, &(x[0])); + T3L = VSUB(T3F, T3C); + ST(&(x[WS(rs, 10)]), VADD(T3K, T3L), ms, &(x[0])); + ST(&(x[WS(rs, 15)]), VSUB(T3L, T3K), ms, &(x[WS(rs, 1)])); + T3G = VADD(T3C, T3F); + ST(&(x[WS(rs, 5)]), VSUB(T3G, T3J), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 20)]), VADD(T3J, T3G), ms, &(x[0])); + } + { + V To, T2n, T2o, T2p, T2x, T2y, T2z, T2u, T2v, T2w, T2q, T2r, T2s, T29, T2i; + V T2e, T2g, T1Y, T2j, T2b, T2c, T2B, T2C; + To = VADD(Tk, Tn); + T2n = VFMA(LDK(KP1_688655851), TN, VMUL(LDK(KP535826794), TK)); + T2o = VFMA(LDK(KP1_541026485), T19, VMUL(LDK(KP637423989), T16)); + T2p = VSUB(T2n, T2o); + T2x = VFMA(LDK(KP851558583), T1y, VMUL(LDK(KP904827052), T1v)); + T2y = VFMA(LDK(KP1_984229402), T1V, VMUL(LDK(KP125333233), T1S)); + T2z = VADD(T2x, T2y); + T2u = VFNMS(LDK(KP844327925), TK, VMUL(LDK(KP1_071653589), TN)); + T2v = VFNMS(LDK(KP1_274847979), T19, VMUL(LDK(KP770513242), T16)); + T2w = VADD(T2u, T2v); + T2q = VFNMS(LDK(KP425779291), T1v, VMUL(LDK(KP1_809654104), T1y)); + T2r = VFNMS(LDK(KP992114701), T1S, VMUL(LDK(KP250666467), T1V)); + T2s = VADD(T2q, T2r); + { + V T23, T24, T25, T26, T27, T28; + T23 = VFMA(LDK(KP1_937166322), TN, VMUL(LDK(KP248689887), TK)); + T24 = VFMA(LDK(KP1_071653589), T19, VMUL(LDK(KP844327925), T16)); + T25 = VADD(T23, T24); + T26 = VFMA(LDK(KP1_752613360), T1y, VMUL(LDK(KP481753674), T1v)); + T27 = VFMA(LDK(KP1_457937254), T1V, VMUL(LDK(KP684547105), T1S)); + T28 = VADD(T26, T27); + T29 = VADD(T25, T28); + T2i = VSUB(T27, T26); + T2e = VMUL(LDK(KP559016994), VSUB(T28, T25)); + T2g = VSUB(T24, T23); + } + { + V TO, T1a, T1b, T1z, T1W, T1X; + TO = VFNMS(LDK(KP497379774), TN, VMUL(LDK(KP968583161), TK)); + T1a = VFNMS(LDK(KP1_688655851), T19, VMUL(LDK(KP535826794), T16)); + T1b = VADD(TO, T1a); + T1z = VFNMS(LDK(KP963507348), T1y, VMUL(LDK(KP876306680), T1v)); + T1W = VFNMS(LDK(KP1_369094211), T1V, VMUL(LDK(KP728968627), T1S)); + T1X = VADD(T1z, T1W); + T1Y = VADD(T1b, T1X); + T2j = VMUL(LDK(KP559016994), VSUB(T1b, T1X)); + T2b = VSUB(T1a, TO); + T2c = VSUB(T1z, T1W); + } + { + V T1Z, T2a, T2t, T2A; + T1Z = VADD(To, T1Y); + T2a = VBYI(VADD(T22, T29)); + ST(&(x[WS(rs, 1)]), VSUB(T1Z, T2a), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 24)]), VADD(T1Z, T2a), ms, &(x[0])); + T2t = VADD(To, VADD(T2p, T2s)); + T2A = VBYI(VADD(T22, VSUB(T2w, T2z))); + ST(&(x[WS(rs, 21)]), VSUB(T2t, T2A), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VADD(T2t, T2A), ms, &(x[0])); + } + T2B = VBYI(VADD(T22, VFMA(LDK(KP309016994), T2w, VFMA(LDK(KP587785252), VSUB(T2r, T2q), VFNMS(LDK(KP951056516), VADD(T2n, T2o), VMUL(LDK(KP809016994), T2z)))))); + T2C = VFMA(LDK(KP309016994), T2p, VFMA(LDK(KP951056516), VSUB(T2u, T2v), VFMA(LDK(KP587785252), VSUB(T2y, T2x), VFNMS(LDK(KP809016994), T2s, To)))); + ST(&(x[WS(rs, 9)]), VADD(T2B, T2C), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 16)]), VSUB(T2C, T2B), ms, &(x[0])); + { + V T2f, T2l, T2k, T2m, T2d, T2h; + T2d = VFMS(LDK(KP250000000), T29, T22); + T2f = VBYI(VADD(VFMA(LDK(KP587785252), T2b, VMUL(LDK(KP951056516), T2c)), VSUB(T2d, T2e))); + T2l = VBYI(VADD(VFNMS(LDK(KP587785252), T2c, VMUL(LDK(KP951056516), T2b)), VADD(T2d, T2e))); + T2h = VFNMS(LDK(KP250000000), T1Y, To); + T2k = VFMA(LDK(KP587785252), T2g, VFNMS(LDK(KP951056516), T2i, VSUB(T2h, T2j))); + T2m = VFMA(LDK(KP951056516), T2g, VADD(T2j, VFMA(LDK(KP587785252), T2i, T2h))); + ST(&(x[WS(rs, 11)]), VADD(T2f, T2k), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VSUB(T2m, T2l), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 14)]), VSUB(T2k, T2f), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VADD(T2l, T2m), ms, &(x[0])); + } + } + { + V T2S, T2U, T2F, T2I, T2J, T2Y, T2Z, T30, T2M, T2P, T2Q, T2V, T2W, T2X, T3a; + V T3l, T3b, T3k, T3f, T3p, T3i, T3o, T32, T33; + T2S = VFNMS(LDK(KP587785252), T20, T2R); + T2U = VSUB(Tn, Tk); + T2F = VFNMS(LDK(KP125333233), T2E, VMUL(LDK(KP1_984229402), T2D)); + T2I = VFMA(LDK(KP1_457937254), T2G, VMUL(LDK(KP684547105), T2H)); + T2J = VSUB(T2F, T2I); + T2Y = VFNMS(LDK(KP1_996053456), T2N, VMUL(LDK(KP062790519), T2O)); + T2Z = VFMA(LDK(KP1_541026485), T2K, VMUL(LDK(KP637423989), T2L)); + T30 = VSUB(T2Y, T2Z); + T2M = VFNMS(LDK(KP770513242), T2L, VMUL(LDK(KP1_274847979), T2K)); + T2P = VFMA(LDK(KP125581039), T2N, VMUL(LDK(KP998026728), T2O)); + T2Q = VSUB(T2M, T2P); + T2V = VFNMS(LDK(KP1_369094211), T2G, VMUL(LDK(KP728968627), T2H)); + T2W = VFMA(LDK(KP250666467), T2D, VMUL(LDK(KP992114701), T2E)); + T2X = VSUB(T2V, T2W); + { + V T34, T35, T36, T37, T38, T39; + T34 = VFNMS(LDK(KP481753674), T2H, VMUL(LDK(KP1_752613360), T2G)); + T35 = VFMA(LDK(KP851558583), T2D, VMUL(LDK(KP904827052), T2E)); + T36 = VSUB(T34, T35); + T37 = VFNMS(LDK(KP844327925), T2O, VMUL(LDK(KP1_071653589), T2N)); + T38 = VFNMS(LDK(KP998026728), T2L, VMUL(LDK(KP125581039), T2K)); + T39 = VADD(T37, T38); + T3a = VMUL(LDK(KP559016994), VSUB(T36, T39)); + T3l = VSUB(T37, T38); + T3b = VADD(T36, T39); + T3k = VADD(T34, T35); + } + { + V T3d, T3e, T3m, T3g, T3h, T3n; + T3d = VFNMS(LDK(KP425779291), T2E, VMUL(LDK(KP1_809654104), T2D)); + T3e = VFMA(LDK(KP963507348), T2G, VMUL(LDK(KP876306680), T2H)); + T3m = VADD(T3e, T3d); + T3g = VFMA(LDK(KP1_688655851), T2N, VMUL(LDK(KP535826794), T2O)); + T3h = VFMA(LDK(KP1_996053456), T2K, VMUL(LDK(KP062790519), T2L)); + T3n = VADD(T3g, T3h); + T3f = VSUB(T3d, T3e); + T3p = VADD(T3m, T3n); + T3i = VSUB(T3g, T3h); + T3o = VMUL(LDK(KP559016994), VSUB(T3m, T3n)); + } + { + V T3u, T3v, T2T, T31; + T3u = VBYI(VADD(T2S, T3b)); + T3v = VADD(T2U, T3p); + ST(&(x[WS(rs, 2)]), VADD(T3u, T3v), ms, &(x[0])); + ST(&(x[WS(rs, 23)]), VSUB(T3v, T3u), ms, &(x[WS(rs, 1)])); + T2T = VBYI(VSUB(VADD(T2J, T2Q), T2S)); + T31 = VADD(T2U, VADD(T2X, T30)); + ST(&(x[WS(rs, 3)]), VADD(T2T, T31), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 22)]), VSUB(T31, T2T), ms, &(x[0])); + } + T32 = VFMA(LDK(KP309016994), T2X, VFNMS(LDK(KP809016994), T30, VFNMS(LDK(KP587785252), VADD(T2P, T2M), VFNMS(LDK(KP951056516), VADD(T2I, T2F), T2U)))); + T33 = VBYI(VSUB(VFNMS(LDK(KP587785252), VADD(T2Y, T2Z), VFNMS(LDK(KP809016994), T2Q, VFNMS(LDK(KP951056516), VADD(T2V, T2W), VMUL(LDK(KP309016994), T2J)))), T2S)); + ST(&(x[WS(rs, 17)]), VSUB(T32, T33), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 8)]), VADD(T32, T33), ms, &(x[0])); + { + V T3j, T3s, T3r, T3t, T3c, T3q; + T3c = VFNMS(LDK(KP250000000), T3b, T2S); + T3j = VBYI(VADD(T3a, VADD(T3c, VFNMS(LDK(KP587785252), T3i, VMUL(LDK(KP951056516), T3f))))); + T3s = VBYI(VADD(T3c, VSUB(VFMA(LDK(KP587785252), T3f, VMUL(LDK(KP951056516), T3i)), T3a))); + T3q = VFNMS(LDK(KP250000000), T3p, T2U); + T3r = VFMA(LDK(KP951056516), T3k, VFMA(LDK(KP587785252), T3l, VADD(T3o, T3q))); + T3t = VFMA(LDK(KP587785252), T3k, VSUB(VFNMS(LDK(KP951056516), T3l, T3q), T3o)); + ST(&(x[WS(rs, 7)]), VADD(T3j, T3r), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VSUB(T3t, T3s), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 18)]), VSUB(T3r, T3j), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VADD(T3s, T3t), ms, &(x[0])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 24), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 25, XSIMD_STRING("t3fv_25"), twinstr, &GENUS, {190, 150, 78, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3fv_25) (planner *p) { + X(kdft_dit_register) (p, t3fv_25, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t3fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t3fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,881 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:26 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 32 -name t3fv_32 -include t3f.h */ + +/* + * This function contains 244 FP additions, 214 FP multiplications, + * (or, 146 additions, 116 multiplications, 98 fused multiply/add), + * 118 stack variables, 7 constants, and 64 memory accesses + */ +#include "t3f.h" + +static void t3fv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(32, rs)) { + V T2B, T2A, T2u, T2x, T2r, T2F, T2L, T2P; + { + V T2, T5, T3, T7; + T2 = LDW(&(W[0])); + T5 = LDW(&(W[TWVL * 4])); + T3 = LDW(&(W[TWVL * 2])); + T7 = LDW(&(W[TWVL * 6])); + { + V T24, Tb, T3x, T2T, T3K, T2W, T25, Tr, T3z, T3g, T28, TX, T3y, T3j, T27; + V TG, T37, T3F, T3G, T3a, T2Y, T15, T1p, T2Z, T2w, T1V, T2v, T1N, T32, T1h; + V T17, T1a; + { + V T1, Tz, TT, T4, TC, Tv, T12, T1D, T1w, T18, T1t, T1O, TK, TP, T1c; + V T1m, Tf, T6, Te, TL, TQ, T2S, Tp, TU, Ti, Ta, TM, TR, Tm, TJ; + V T22, T9, T1Z; + T1 = LD(&(x[0]), ms, &(x[0])); + T22 = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T9 = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + T1Z = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + { + V Tn, TH, Tk, To, Th, Tg, T8, Tl, T20, T23, TI; + { + V Td, T1C, Tc, T21; + Td = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tz = VZMUL(T2, T5); + T1C = VZMULJ(T2, T5); + Tn = VZMUL(T3, T5); + TT = VZMULJ(T3, T5); + Tc = VZMUL(T2, T3); + T4 = VZMULJ(T2, T3); + TH = VZMUL(T3, T7); + T21 = VZMULJ(T3, T7); + Tk = VZMUL(T2, T7); + TC = VZMULJ(T2, T7); + Tv = VZMULJ(T5, T7); + T12 = VZMULJ(Tz, T7); + T20 = VZMULJ(T1C, T1Z); + T1D = VZMULJ(T1C, T7); + T1w = VZMULJ(Tn, T7); + T18 = VZMULJ(TT, T7); + T1t = VZMUL(Tc, T7); + T1O = VZMULJ(Tc, T7); + TK = VZMUL(Tc, T5); + TP = VZMULJ(Tc, T5); + T1c = VZMUL(T4, T7); + T1m = VZMULJ(T4, T7); + Tf = VZMULJ(T4, T5); + T6 = VZMUL(T4, T5); + T23 = VZMULJ(T21, T22); + Te = VZMULJ(Tc, Td); + } + TL = VZMULJ(TK, T7); + TQ = VZMULJ(TP, T7); + To = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Th = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + Tg = VZMULJ(Tf, T7); + T8 = VZMULJ(T6, T7); + T2S = VADD(T20, T23); + T24 = VSUB(T20, T23); + Tl = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + TI = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + Tp = VZMULJ(Tn, To); + TU = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Ti = VZMULJ(Tg, Th); + Ta = VZMULJ(T8, T9); + TM = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TR = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + Tm = VZMULJ(Tk, Tl); + TJ = VZMULJ(TH, TI); + } + { + V Tu, TE, Tw, TA; + { + V T3e, TO, T3f, TW; + { + V TV, T2U, Tj, T2R, TN, TS, T2V, Tq, Tt, TD; + Tt = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + TV = VZMULJ(TT, TU); + T2U = VADD(Te, Ti); + Tj = VSUB(Te, Ti); + T2R = VADD(T1, Ta); + Tb = VSUB(T1, Ta); + TN = VZMULJ(TL, TM); + TS = VZMULJ(TQ, TR); + T2V = VADD(Tm, Tp); + Tq = VSUB(Tm, Tp); + Tu = VZMULJ(T4, Tt); + TD = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + T3x = VSUB(T2R, T2S); + T2T = VADD(T2R, T2S); + T3e = VADD(TJ, TN); + TO = VSUB(TJ, TN); + T3f = VADD(TV, TS); + TW = VSUB(TS, TV); + T3K = VSUB(T2V, T2U); + T2W = VADD(T2U, T2V); + T25 = VSUB(Tq, Tj); + Tr = VADD(Tj, Tq); + TE = VZMULJ(TC, TD); + } + Tw = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + T3z = VSUB(T3e, T3f); + T3g = VADD(T3e, T3f); + T28 = VFMA(LDK(KP414213562), TO, TW); + TX = VFNMS(LDK(KP414213562), TW, TO); + TA = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + } + { + V T35, T1z, T1T, T36, T39, T1L, T1B, T1F; + { + V T1v, T1y, Ty, T3h, T1S, T1Q, T1I, T3i, TF, T1K, T1A, T1E; + { + V T1u, T1x, Tx, T1R; + T1u = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T1x = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + Tx = VZMULJ(Tv, Tw); + T1R = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + { + V T1P, T1H, T1J, TB; + T1P = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T1H = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + T1J = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + TB = VZMULJ(Tz, TA); + T1v = VZMULJ(T1t, T1u); + T1y = VZMULJ(T1w, T1x); + Ty = VSUB(Tu, Tx); + T3h = VADD(Tu, Tx); + T1S = VZMULJ(Tf, T1R); + T1Q = VZMULJ(T1O, T1P); + T1I = VZMULJ(T7, T1H); + T3i = VADD(TB, TE); + TF = VSUB(TB, TE); + T1K = VZMULJ(T6, T1J); + T1A = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1E = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + } + } + T35 = VADD(T1v, T1y); + T1z = VSUB(T1v, T1y); + T1T = VSUB(T1Q, T1S); + T36 = VADD(T1S, T1Q); + T3y = VSUB(T3h, T3i); + T3j = VADD(T3h, T3i); + T27 = VFMA(LDK(KP414213562), Ty, TF); + TG = VFNMS(LDK(KP414213562), TF, Ty); + T39 = VADD(T1I, T1K); + T1L = VSUB(T1I, T1K); + T1B = VZMULJ(T3, T1A); + T1F = VZMULJ(T1D, T1E); + } + { + V T11, T14, T1o, T1l, T1e, T1U, T1M, T1g, T16, T19; + { + V T10, T13, T1n, T1k; + T10 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T13 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T1n = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + T1k = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + { + V T1d, T1f, T1G, T38; + T1d = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + T1f = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T1G = VSUB(T1B, T1F); + T38 = VADD(T1B, T1F); + T37 = VADD(T35, T36); + T3F = VSUB(T35, T36); + T11 = VZMULJ(T2, T10); + T14 = VZMULJ(T12, T13); + T1o = VZMULJ(T1m, T1n); + T1l = VZMULJ(T5, T1k); + T1e = VZMULJ(T1c, T1d); + T3G = VSUB(T39, T38); + T3a = VADD(T38, T39); + T1U = VSUB(T1L, T1G); + T1M = VADD(T1G, T1L); + T1g = VZMULJ(TK, T1f); + } + T16 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T19 = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + } + T2Y = VADD(T11, T14); + T15 = VSUB(T11, T14); + T1p = VSUB(T1l, T1o); + T2Z = VADD(T1l, T1o); + T2w = VFNMS(LDK(KP707106781), T1U, T1T); + T1V = VFMA(LDK(KP707106781), T1U, T1T); + T2v = VFNMS(LDK(KP707106781), T1M, T1z); + T1N = VFMA(LDK(KP707106781), T1M, T1z); + T32 = VADD(T1e, T1g); + T1h = VSUB(T1e, T1g); + T17 = VZMULJ(TP, T16); + T1a = VZMULJ(T18, T19); + } + } + } + } + { + V T2X, T3k, T3b, T3t, T1b, T31, T30, T3C, T3r, T3v, T3p, T3q; + T2X = VSUB(T2T, T2W); + T3p = VADD(T2T, T2W); + T3q = VADD(T3j, T3g); + T3k = VSUB(T3g, T3j); + T3b = VSUB(T37, T3a); + T3t = VADD(T37, T3a); + T1b = VSUB(T17, T1a); + T31 = VADD(T17, T1a); + T30 = VADD(T2Y, T2Z); + T3C = VSUB(T2Y, T2Z); + T3r = VADD(T3p, T3q); + T3v = VSUB(T3p, T3q); + { + V T3N, T3B, T3T, T3M, T3W, T3O, T2t, T1r, T2s, T1j, T3I, T3X, T3c, T3l, T3u; + V T3w; + { + V T3L, T3A, T33, T3D, T1i, T1q; + T3L = VSUB(T3z, T3y); + T3A = VADD(T3y, T3z); + T33 = VADD(T31, T32); + T3D = VSUB(T31, T32); + T1i = VADD(T1b, T1h); + T1q = VSUB(T1b, T1h); + { + V T3H, T3E, T34, T3s; + T3N = VFMA(LDK(KP414213562), T3F, T3G); + T3H = VFNMS(LDK(KP414213562), T3G, T3F); + T3B = VFMA(LDK(KP707106781), T3A, T3x); + T3T = VFNMS(LDK(KP707106781), T3A, T3x); + T3M = VFMA(LDK(KP707106781), T3L, T3K); + T3W = VFNMS(LDK(KP707106781), T3L, T3K); + T3O = VFMA(LDK(KP414213562), T3C, T3D); + T3E = VFNMS(LDK(KP414213562), T3D, T3C); + T34 = VSUB(T30, T33); + T3s = VADD(T30, T33); + T2t = VFNMS(LDK(KP707106781), T1q, T1p); + T1r = VFMA(LDK(KP707106781), T1q, T1p); + T2s = VFNMS(LDK(KP707106781), T1i, T15); + T1j = VFMA(LDK(KP707106781), T1i, T15); + T3I = VADD(T3E, T3H); + T3X = VSUB(T3H, T3E); + T3c = VADD(T34, T3b); + T3l = VSUB(T3b, T34); + T3u = VADD(T3s, T3t); + T3w = VSUB(T3t, T3s); + } + } + { + V T2p, Ts, TY, T1s, T2b, T2c, T1W, T26, T29, T2q, T3U, T3P, T2J, T2K; + T2p = VFNMS(LDK(KP707106781), Tr, Tb); + Ts = VFMA(LDK(KP707106781), Tr, Tb); + T3U = VADD(T3O, T3N); + T3P = VSUB(T3N, T3O); + { + V T3Y, T40, T3R, T3J; + T3Y = VFMA(LDK(KP923879532), T3X, T3W); + T40 = VFNMS(LDK(KP923879532), T3X, T3W); + T3R = VFMA(LDK(KP923879532), T3I, T3B); + T3J = VFNMS(LDK(KP923879532), T3I, T3B); + { + V T3o, T3m, T3n, T3d; + T3o = VFMA(LDK(KP707106781), T3l, T3k); + T3m = VFNMS(LDK(KP707106781), T3l, T3k); + T3n = VFMA(LDK(KP707106781), T3c, T2X); + T3d = VFNMS(LDK(KP707106781), T3c, T2X); + ST(&(x[WS(rs, 24)]), VFNMSI(T3w, T3v), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VFMAI(T3w, T3v), ms, &(x[0])); + ST(&(x[0]), VADD(T3r, T3u), ms, &(x[0])); + ST(&(x[WS(rs, 16)]), VSUB(T3r, T3u), ms, &(x[0])); + { + V T3V, T3Z, T3S, T3Q; + T3V = VFNMS(LDK(KP923879532), T3U, T3T); + T3Z = VFMA(LDK(KP923879532), T3U, T3T); + T3S = VFMA(LDK(KP923879532), T3P, T3M); + T3Q = VFNMS(LDK(KP923879532), T3P, T3M); + ST(&(x[WS(rs, 4)]), VFMAI(T3o, T3n), ms, &(x[0])); + ST(&(x[WS(rs, 28)]), VFNMSI(T3o, T3n), ms, &(x[0])); + ST(&(x[WS(rs, 20)]), VFMAI(T3m, T3d), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VFNMSI(T3m, T3d), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VFNMSI(T3Y, T3V), ms, &(x[0])); + ST(&(x[WS(rs, 10)]), VFMAI(T3Y, T3V), ms, &(x[0])); + ST(&(x[WS(rs, 26)]), VFMAI(T40, T3Z), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(T40, T3Z), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VFMAI(T3S, T3R), ms, &(x[0])); + ST(&(x[WS(rs, 30)]), VFNMSI(T3S, T3R), ms, &(x[0])); + ST(&(x[WS(rs, 18)]), VFMAI(T3Q, T3J), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VFNMSI(T3Q, T3J), ms, &(x[0])); + TY = VADD(TG, TX); + T2B = VSUB(TX, TG); + } + } + } + T1s = VFNMS(LDK(KP198912367), T1r, T1j); + T2b = VFMA(LDK(KP198912367), T1j, T1r); + T2c = VFMA(LDK(KP198912367), T1N, T1V); + T1W = VFNMS(LDK(KP198912367), T1V, T1N); + T2A = VFMA(LDK(KP707106781), T25, T24); + T26 = VFNMS(LDK(KP707106781), T25, T24); + T29 = VSUB(T27, T28); + T2q = VADD(T27, T28); + { + V T2j, T2n, T1Y, T2f, T2o, T2m, T2e, T2g; + { + V T2h, TZ, T2i, T2d, T2l, T1X, T2k, T2a, T2D, T2E; + T2h = VFNMS(LDK(KP923879532), TY, Ts); + TZ = VFMA(LDK(KP923879532), TY, Ts); + T2i = VADD(T2b, T2c); + T2d = VSUB(T2b, T2c); + T2l = VSUB(T1W, T1s); + T1X = VADD(T1s, T1W); + T2k = VFNMS(LDK(KP923879532), T29, T26); + T2a = VFMA(LDK(KP923879532), T29, T26); + T2u = VFMA(LDK(KP668178637), T2t, T2s); + T2D = VFNMS(LDK(KP668178637), T2s, T2t); + T2j = VFNMS(LDK(KP980785280), T2i, T2h); + T2n = VFMA(LDK(KP980785280), T2i, T2h); + T2E = VFNMS(LDK(KP668178637), T2v, T2w); + T2x = VFMA(LDK(KP668178637), T2w, T2v); + T1Y = VFNMS(LDK(KP980785280), T1X, TZ); + T2f = VFMA(LDK(KP980785280), T1X, TZ); + T2o = VFMA(LDK(KP980785280), T2l, T2k); + T2m = VFNMS(LDK(KP980785280), T2l, T2k); + T2e = VFNMS(LDK(KP980785280), T2d, T2a); + T2g = VFMA(LDK(KP980785280), T2d, T2a); + T2r = VFMA(LDK(KP923879532), T2q, T2p); + T2J = VFNMS(LDK(KP923879532), T2q, T2p); + T2K = VADD(T2D, T2E); + T2F = VSUB(T2D, T2E); + } + ST(&(x[WS(rs, 23)]), VFMAI(T2m, T2j), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 9)]), VFNMSI(T2m, T2j), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 25)]), VFNMSI(T2o, T2n), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(T2o, T2n), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 31)]), VFMAI(T2g, T2f), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(T2g, T2f), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VFMAI(T2e, T1Y), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 17)]), VFNMSI(T2e, T1Y), ms, &(x[WS(rs, 1)])); + } + T2L = VFMA(LDK(KP831469612), T2K, T2J); + T2P = VFNMS(LDK(KP831469612), T2K, T2J); + } + } + } + } + } + { + V T2y, T2N, T2C, T2M; + T2y = VADD(T2u, T2x); + T2N = VSUB(T2x, T2u); + T2C = VFMA(LDK(KP923879532), T2B, T2A); + T2M = VFNMS(LDK(KP923879532), T2B, T2A); + { + V T2z, T2H, T2Q, T2O, T2G, T2I; + T2z = VFNMS(LDK(KP831469612), T2y, T2r); + T2H = VFMA(LDK(KP831469612), T2y, T2r); + T2Q = VFNMS(LDK(KP831469612), T2N, T2M); + T2O = VFMA(LDK(KP831469612), T2N, T2M); + T2G = VFNMS(LDK(KP831469612), T2F, T2C); + T2I = VFMA(LDK(KP831469612), T2F, T2C); + ST(&(x[WS(rs, 21)]), VFNMSI(T2O, T2L), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 11)]), VFMAI(T2O, T2L), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 27)]), VFMAI(T2Q, T2P), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VFNMSI(T2Q, T2P), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFMAI(T2I, T2H), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 29)]), VFNMSI(T2I, T2H), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 19)]), VFMAI(T2G, T2z), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VFNMSI(T2G, T2z), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 27), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 32, XSIMD_STRING("t3fv_32"), twinstr, &GENUS, {146, 116, 98, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3fv_32) (planner *p) { + X(kdft_dit_register) (p, t3fv_32, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 32 -name t3fv_32 -include t3f.h */ + +/* + * This function contains 244 FP additions, 158 FP multiplications, + * (or, 228 additions, 142 multiplications, 16 fused multiply/add), + * 90 stack variables, 7 constants, and 64 memory accesses + */ +#include "t3f.h" + +static void t3fv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(32, rs)) { + V T2, T5, T3, T4, Tc, T1C, TP, Tz, Tn, T6, TS, Tf, TK, T7, T8; + V Tv, T1w, T22, Tg, Tk, T1D, T1R, TC, T18, T12, T1t, TH, TL, TT, T1n; + V T1c; + T2 = LDW(&(W[0])); + T5 = LDW(&(W[TWVL * 4])); + T3 = LDW(&(W[TWVL * 2])); + T4 = VZMULJ(T2, T3); + Tc = VZMUL(T2, T3); + T1C = VZMULJ(T2, T5); + TP = VZMULJ(T3, T5); + Tz = VZMUL(T2, T5); + Tn = VZMUL(T3, T5); + T6 = VZMUL(T4, T5); + TS = VZMULJ(Tc, T5); + Tf = VZMULJ(T4, T5); + TK = VZMUL(Tc, T5); + T7 = LDW(&(W[TWVL * 6])); + T8 = VZMULJ(T6, T7); + Tv = VZMULJ(T5, T7); + T1w = VZMULJ(Tn, T7); + T22 = VZMULJ(T3, T7); + Tg = VZMULJ(Tf, T7); + Tk = VZMUL(T2, T7); + T1D = VZMULJ(T1C, T7); + T1R = VZMULJ(Tc, T7); + TC = VZMULJ(T2, T7); + T18 = VZMULJ(TP, T7); + T12 = VZMULJ(Tz, T7); + T1t = VZMUL(Tc, T7); + TH = VZMUL(T3, T7); + TL = VZMULJ(TK, T7); + TT = VZMULJ(TS, T7); + T1n = VZMULJ(T4, T7); + T1c = VZMUL(T4, T7); + { + V Tb, T25, T2T, T3x, Tr, T1Z, T2W, T3K, TX, T27, T3g, T3z, TG, T28, T3j; + V T3y, T1N, T2v, T3a, T3G, T1V, T2w, T37, T3F, T1j, T2s, T33, T3D, T1r, T2t; + V T30, T3C; + { + V T1, T24, Ta, T21, T23, T9, T20, T2R, T2S; + T1 = LD(&(x[0]), ms, &(x[0])); + T23 = LD(&(x[WS(rs, 24)]), ms, &(x[0])); + T24 = VZMULJ(T22, T23); + T9 = LD(&(x[WS(rs, 16)]), ms, &(x[0])); + Ta = VZMULJ(T8, T9); + T20 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); + T21 = VZMULJ(T1C, T20); + Tb = VSUB(T1, Ta); + T25 = VSUB(T21, T24); + T2R = VADD(T1, Ta); + T2S = VADD(T21, T24); + T2T = VADD(T2R, T2S); + T3x = VSUB(T2R, T2S); + } + { + V Te, Tp, Ti, Tm; + { + V Td, To, Th, Tl; + Td = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Te = VZMULJ(Tc, Td); + To = LD(&(x[WS(rs, 12)]), ms, &(x[0])); + Tp = VZMULJ(Tn, To); + Th = LD(&(x[WS(rs, 20)]), ms, &(x[0])); + Ti = VZMULJ(Tg, Th); + Tl = LD(&(x[WS(rs, 28)]), ms, &(x[0])); + Tm = VZMULJ(Tk, Tl); + } + { + V Tj, Tq, T2U, T2V; + Tj = VSUB(Te, Ti); + Tq = VSUB(Tm, Tp); + Tr = VMUL(LDK(KP707106781), VADD(Tj, Tq)); + T1Z = VMUL(LDK(KP707106781), VSUB(Tq, Tj)); + T2U = VADD(Te, Ti); + T2V = VADD(Tm, Tp); + T2W = VADD(T2U, T2V); + T3K = VSUB(T2V, T2U); + } + } + { + V TJ, TV, TN, TR; + { + V TI, TU, TM, TQ; + TI = LD(&(x[WS(rs, 30)]), ms, &(x[0])); + TJ = VZMULJ(TH, TI); + TU = LD(&(x[WS(rs, 22)]), ms, &(x[0])); + TV = VZMULJ(TT, TU); + TM = LD(&(x[WS(rs, 14)]), ms, &(x[0])); + TN = VZMULJ(TL, TM); + TQ = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + TR = VZMULJ(TP, TQ); + } + { + V TO, TW, T3e, T3f; + TO = VSUB(TJ, TN); + TW = VSUB(TR, TV); + TX = VFMA(LDK(KP923879532), TO, VMUL(LDK(KP382683432), TW)); + T27 = VFNMS(LDK(KP923879532), TW, VMUL(LDK(KP382683432), TO)); + T3e = VADD(TJ, TN); + T3f = VADD(TR, TV); + T3g = VADD(T3e, T3f); + T3z = VSUB(T3e, T3f); + } + } + { + V Tu, TE, Tx, TB; + { + V Tt, TD, Tw, TA; + Tt = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tu = VZMULJ(T4, Tt); + TD = LD(&(x[WS(rs, 26)]), ms, &(x[0])); + TE = VZMULJ(TC, TD); + Tw = LD(&(x[WS(rs, 18)]), ms, &(x[0])); + Tx = VZMULJ(Tv, Tw); + TA = LD(&(x[WS(rs, 10)]), ms, &(x[0])); + TB = VZMULJ(Tz, TA); + } + { + V Ty, TF, T3h, T3i; + Ty = VSUB(Tu, Tx); + TF = VSUB(TB, TE); + TG = VFNMS(LDK(KP382683432), TF, VMUL(LDK(KP923879532), Ty)); + T28 = VFMA(LDK(KP382683432), Ty, VMUL(LDK(KP923879532), TF)); + T3h = VADD(Tu, Tx); + T3i = VADD(TB, TE); + T3j = VADD(T3h, T3i); + T3y = VSUB(T3h, T3i); + } + } + { + V T1v, T1y, T1T, T1Q, T1I, T1K, T1L, T1B, T1F, T1G; + { + V T1u, T1x, T1S, T1P; + T1u = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)])); + T1v = VZMULJ(T1t, T1u); + T1x = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); + T1y = VZMULJ(T1w, T1x); + T1S = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); + T1T = VZMULJ(T1R, T1S); + T1P = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + T1Q = VZMULJ(Tf, T1P); + { + V T1H, T1J, T1A, T1E; + T1H = LD(&(x[WS(rs, 27)]), ms, &(x[WS(rs, 1)])); + T1I = VZMULJ(T7, T1H); + T1J = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)])); + T1K = VZMULJ(T6, T1J); + T1L = VSUB(T1I, T1K); + T1A = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T1B = VZMULJ(T3, T1A); + T1E = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)])); + T1F = VZMULJ(T1D, T1E); + T1G = VSUB(T1B, T1F); + } + } + { + V T1z, T1M, T38, T39; + T1z = VSUB(T1v, T1y); + T1M = VMUL(LDK(KP707106781), VADD(T1G, T1L)); + T1N = VADD(T1z, T1M); + T2v = VSUB(T1z, T1M); + T38 = VADD(T1B, T1F); + T39 = VADD(T1I, T1K); + T3a = VADD(T38, T39); + T3G = VSUB(T39, T38); + } + { + V T1O, T1U, T35, T36; + T1O = VMUL(LDK(KP707106781), VSUB(T1L, T1G)); + T1U = VSUB(T1Q, T1T); + T1V = VSUB(T1O, T1U); + T2w = VADD(T1U, T1O); + T35 = VADD(T1v, T1y); + T36 = VADD(T1Q, T1T); + T37 = VADD(T35, T36); + T3F = VSUB(T35, T36); + } + } + { + V T11, T14, T1p, T1m, T1e, T1g, T1h, T17, T1a, T1b; + { + V T10, T13, T1o, T1l; + T10 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T11 = VZMULJ(T2, T10); + T13 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)])); + T14 = VZMULJ(T12, T13); + T1o = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)])); + T1p = VZMULJ(T1n, T1o); + T1l = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)])); + T1m = VZMULJ(T5, T1l); + { + V T1d, T1f, T16, T19; + T1d = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)])); + T1e = VZMULJ(T1c, T1d); + T1f = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); + T1g = VZMULJ(TK, T1f); + T1h = VSUB(T1e, T1g); + T16 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T17 = VZMULJ(TS, T16); + T19 = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)])); + T1a = VZMULJ(T18, T19); + T1b = VSUB(T17, T1a); + } + } + { + V T15, T1i, T31, T32; + T15 = VSUB(T11, T14); + T1i = VMUL(LDK(KP707106781), VADD(T1b, T1h)); + T1j = VADD(T15, T1i); + T2s = VSUB(T15, T1i); + T31 = VADD(T17, T1a); + T32 = VADD(T1e, T1g); + T33 = VADD(T31, T32); + T3D = VSUB(T32, T31); + } + { + V T1k, T1q, T2Y, T2Z; + T1k = VMUL(LDK(KP707106781), VSUB(T1h, T1b)); + T1q = VSUB(T1m, T1p); + T1r = VSUB(T1k, T1q); + T2t = VADD(T1q, T1k); + T2Y = VADD(T11, T14); + T2Z = VADD(T1m, T1p); + T30 = VADD(T2Y, T2Z); + T3C = VSUB(T2Y, T2Z); + } + } + { + V T3r, T3v, T3u, T3w; + { + V T3p, T3q, T3s, T3t; + T3p = VADD(T2T, T2W); + T3q = VADD(T3j, T3g); + T3r = VADD(T3p, T3q); + T3v = VSUB(T3p, T3q); + T3s = VADD(T30, T33); + T3t = VADD(T37, T3a); + T3u = VADD(T3s, T3t); + T3w = VBYI(VSUB(T3t, T3s)); + } + ST(&(x[WS(rs, 16)]), VSUB(T3r, T3u), ms, &(x[0])); + ST(&(x[WS(rs, 8)]), VADD(T3v, T3w), ms, &(x[0])); + ST(&(x[0]), VADD(T3r, T3u), ms, &(x[0])); + ST(&(x[WS(rs, 24)]), VSUB(T3v, T3w), ms, &(x[0])); + } + { + V T2X, T3k, T3c, T3l, T34, T3b; + T2X = VSUB(T2T, T2W); + T3k = VSUB(T3g, T3j); + T34 = VSUB(T30, T33); + T3b = VSUB(T37, T3a); + T3c = VMUL(LDK(KP707106781), VADD(T34, T3b)); + T3l = VMUL(LDK(KP707106781), VSUB(T3b, T34)); + { + V T3d, T3m, T3n, T3o; + T3d = VADD(T2X, T3c); + T3m = VBYI(VADD(T3k, T3l)); + ST(&(x[WS(rs, 28)]), VSUB(T3d, T3m), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VADD(T3d, T3m), ms, &(x[0])); + T3n = VSUB(T2X, T3c); + T3o = VBYI(VSUB(T3l, T3k)); + ST(&(x[WS(rs, 20)]), VSUB(T3n, T3o), ms, &(x[0])); + ST(&(x[WS(rs, 12)]), VADD(T3n, T3o), ms, &(x[0])); + } + } + { + V T3B, T3W, T3M, T3U, T3I, T3T, T3P, T3X, T3A, T3L; + T3A = VMUL(LDK(KP707106781), VADD(T3y, T3z)); + T3B = VADD(T3x, T3A); + T3W = VSUB(T3x, T3A); + T3L = VMUL(LDK(KP707106781), VSUB(T3z, T3y)); + T3M = VADD(T3K, T3L); + T3U = VSUB(T3L, T3K); + { + V T3E, T3H, T3N, T3O; + T3E = VFMA(LDK(KP923879532), T3C, VMUL(LDK(KP382683432), T3D)); + T3H = VFNMS(LDK(KP382683432), T3G, VMUL(LDK(KP923879532), T3F)); + T3I = VADD(T3E, T3H); + T3T = VSUB(T3H, T3E); + T3N = VFNMS(LDK(KP382683432), T3C, VMUL(LDK(KP923879532), T3D)); + T3O = VFMA(LDK(KP382683432), T3F, VMUL(LDK(KP923879532), T3G)); + T3P = VADD(T3N, T3O); + T3X = VSUB(T3O, T3N); + } + { + V T3J, T3Q, T3Z, T40; + T3J = VADD(T3B, T3I); + T3Q = VBYI(VADD(T3M, T3P)); + ST(&(x[WS(rs, 30)]), VSUB(T3J, T3Q), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(T3J, T3Q), ms, &(x[0])); + T3Z = VBYI(VADD(T3U, T3T)); + T40 = VADD(T3W, T3X); + ST(&(x[WS(rs, 6)]), VADD(T3Z, T40), ms, &(x[0])); + ST(&(x[WS(rs, 26)]), VSUB(T40, T3Z), ms, &(x[0])); + } + { + V T3R, T3S, T3V, T3Y; + T3R = VSUB(T3B, T3I); + T3S = VBYI(VSUB(T3P, T3M)); + ST(&(x[WS(rs, 18)]), VSUB(T3R, T3S), ms, &(x[0])); + ST(&(x[WS(rs, 14)]), VADD(T3R, T3S), ms, &(x[0])); + T3V = VBYI(VSUB(T3T, T3U)); + T3Y = VSUB(T3W, T3X); + ST(&(x[WS(rs, 10)]), VADD(T3V, T3Y), ms, &(x[0])); + ST(&(x[WS(rs, 22)]), VSUB(T3Y, T3V), ms, &(x[0])); + } + } + { + V TZ, T2k, T2d, T2l, T1X, T2h, T2a, T2i; + { + V Ts, TY, T2b, T2c; + Ts = VADD(Tb, Tr); + TY = VADD(TG, TX); + TZ = VADD(Ts, TY); + T2k = VSUB(Ts, TY); + T2b = VFNMS(LDK(KP195090322), T1j, VMUL(LDK(KP980785280), T1r)); + T2c = VFMA(LDK(KP195090322), T1N, VMUL(LDK(KP980785280), T1V)); + T2d = VADD(T2b, T2c); + T2l = VSUB(T2c, T2b); + } + { + V T1s, T1W, T26, T29; + T1s = VFMA(LDK(KP980785280), T1j, VMUL(LDK(KP195090322), T1r)); + T1W = VFNMS(LDK(KP195090322), T1V, VMUL(LDK(KP980785280), T1N)); + T1X = VADD(T1s, T1W); + T2h = VSUB(T1W, T1s); + T26 = VSUB(T1Z, T25); + T29 = VSUB(T27, T28); + T2a = VADD(T26, T29); + T2i = VSUB(T29, T26); + } + { + V T1Y, T2e, T2n, T2o; + T1Y = VADD(TZ, T1X); + T2e = VBYI(VADD(T2a, T2d)); + ST(&(x[WS(rs, 31)]), VSUB(T1Y, T2e), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(T1Y, T2e), ms, &(x[WS(rs, 1)])); + T2n = VBYI(VADD(T2i, T2h)); + T2o = VADD(T2k, T2l); + ST(&(x[WS(rs, 7)]), VADD(T2n, T2o), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 25)]), VSUB(T2o, T2n), ms, &(x[WS(rs, 1)])); + } + { + V T2f, T2g, T2j, T2m; + T2f = VSUB(TZ, T1X); + T2g = VBYI(VSUB(T2d, T2a)); + ST(&(x[WS(rs, 17)]), VSUB(T2f, T2g), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 15)]), VADD(T2f, T2g), ms, &(x[WS(rs, 1)])); + T2j = VBYI(VSUB(T2h, T2i)); + T2m = VSUB(T2k, T2l); + ST(&(x[WS(rs, 9)]), VADD(T2j, T2m), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 23)]), VSUB(T2m, T2j), ms, &(x[WS(rs, 1)])); + } + } + { + V T2r, T2M, T2F, T2N, T2y, T2J, T2C, T2K; + { + V T2p, T2q, T2D, T2E; + T2p = VSUB(Tb, Tr); + T2q = VADD(T28, T27); + T2r = VADD(T2p, T2q); + T2M = VSUB(T2p, T2q); + T2D = VFNMS(LDK(KP555570233), T2s, VMUL(LDK(KP831469612), T2t)); + T2E = VFMA(LDK(KP555570233), T2v, VMUL(LDK(KP831469612), T2w)); + T2F = VADD(T2D, T2E); + T2N = VSUB(T2E, T2D); + } + { + V T2u, T2x, T2A, T2B; + T2u = VFMA(LDK(KP831469612), T2s, VMUL(LDK(KP555570233), T2t)); + T2x = VFNMS(LDK(KP555570233), T2w, VMUL(LDK(KP831469612), T2v)); + T2y = VADD(T2u, T2x); + T2J = VSUB(T2x, T2u); + T2A = VADD(T25, T1Z); + T2B = VSUB(TX, TG); + T2C = VADD(T2A, T2B); + T2K = VSUB(T2B, T2A); + } + { + V T2z, T2G, T2P, T2Q; + T2z = VADD(T2r, T2y); + T2G = VBYI(VADD(T2C, T2F)); + ST(&(x[WS(rs, 29)]), VSUB(T2z, T2G), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(T2z, T2G), ms, &(x[WS(rs, 1)])); + T2P = VBYI(VADD(T2K, T2J)); + T2Q = VADD(T2M, T2N); + ST(&(x[WS(rs, 5)]), VADD(T2P, T2Q), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 27)]), VSUB(T2Q, T2P), ms, &(x[WS(rs, 1)])); + } + { + V T2H, T2I, T2L, T2O; + T2H = VSUB(T2r, T2y); + T2I = VBYI(VSUB(T2F, T2C)); + ST(&(x[WS(rs, 19)]), VSUB(T2H, T2I), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 13)]), VADD(T2H, T2I), ms, &(x[WS(rs, 1)])); + T2L = VBYI(VSUB(T2J, T2K)); + T2O = VSUB(T2M, T2N); + ST(&(x[WS(rs, 11)]), VADD(T2L, T2O), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 21)]), VSUB(T2O, T2L), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 9), + VTW(0, 27), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 32, XSIMD_STRING("t3fv_32"), twinstr, &GENUS, {228, 142, 16, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3fv_32) (planner *p) { + X(kdft_dit_register) (p, t3fv_32, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t3fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t3fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:25 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 4 -name t3fv_4 -include t3f.h */ + +/* + * This function contains 12 FP additions, 10 FP multiplications, + * (or, 10 additions, 8 multiplications, 2 fused multiply/add), + * 16 stack variables, 0 constants, and 8 memory accesses + */ +#include "t3f.h" + +static void t3fv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(4, rs)) { + V T2, T3, T1, Ta, T5, T8; + T2 = LDW(&(W[0])); + T3 = LDW(&(W[TWVL * 2])); + T1 = LD(&(x[0]), ms, &(x[0])); + Ta = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + { + V T4, Tb, T9, T6; + T4 = VZMULJ(T2, T3); + Tb = VZMULJ(T3, Ta); + T9 = VZMULJ(T2, T8); + T6 = VZMULJ(T4, T5); + { + V Tc, Te, T7, Td; + Tc = VSUB(T9, Tb); + Te = VADD(T9, Tb); + T7 = VSUB(T1, T6); + Td = VADD(T1, T6); + ST(&(x[0]), VADD(Td, Te), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VSUB(Td, Te), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFMAI(Tc, T7), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(Tc, T7), ms, &(x[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t3fv_4"), twinstr, &GENUS, {10, 8, 2, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3fv_4) (planner *p) { + X(kdft_dit_register) (p, t3fv_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 4 -name t3fv_4 -include t3f.h */ + +/* + * This function contains 12 FP additions, 8 FP multiplications, + * (or, 12 additions, 8 multiplications, 0 fused multiply/add), + * 16 stack variables, 0 constants, and 8 memory accesses + */ +#include "t3f.h" + +static void t3fv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(4, rs)) { + V T2, T3, T4; + T2 = LDW(&(W[0])); + T3 = LDW(&(W[TWVL * 2])); + T4 = VZMULJ(T2, T3); + { + V T1, Tb, T6, T9, Ta, T5, T8; + T1 = LD(&(x[0]), ms, &(x[0])); + Ta = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Tb = VZMULJ(T3, Ta); + T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + T6 = VZMULJ(T4, T5); + T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T9 = VZMULJ(T2, T8); + { + V T7, Tc, Td, Te; + T7 = VSUB(T1, T6); + Tc = VBYI(VSUB(T9, Tb)); + ST(&(x[WS(rs, 1)]), VSUB(T7, Tc), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(T7, Tc), ms, &(x[WS(rs, 1)])); + Td = VADD(T1, T6); + Te = VADD(T9, Tb); + ST(&(x[WS(rs, 2)]), VSUB(Td, Te), ms, &(x[0])); + ST(&(x[0]), VADD(Td, Te), ms, &(x[0])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 4, XSIMD_STRING("t3fv_4"), twinstr, &GENUS, {12, 8, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3fv_4) (planner *p) { + X(kdft_dit_register) (p, t3fv_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t3fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t3fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:26 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 5 -name t3fv_5 -include t3f.h */ + +/* + * This function contains 22 FP additions, 23 FP multiplications, + * (or, 13 additions, 14 multiplications, 9 fused multiply/add), + * 30 stack variables, 4 constants, and 10 memory accesses + */ +#include "t3f.h" + +static void t3fv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(5, rs)) { + V T2, T5, T1, T3, Td, T7, Tb; + T2 = LDW(&(W[0])); + T5 = LDW(&(W[TWVL * 2])); + T1 = LD(&(x[0]), ms, &(x[0])); + T3 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Td = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T7 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tb = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + { + V Ta, T6, T4, Te, Tc, T8; + Ta = VZMULJ(T2, T5); + T6 = VZMUL(T2, T5); + T4 = VZMULJ(T2, T3); + Te = VZMULJ(T5, Td); + Tc = VZMULJ(Ta, Tb); + T8 = VZMULJ(T6, T7); + { + V Tf, Tl, T9, Tk; + Tf = VADD(Tc, Te); + Tl = VSUB(Tc, Te); + T9 = VADD(T4, T8); + Tk = VSUB(T4, T8); + { + V Ti, Tg, To, Tm, Th, Tn, Tj; + Ti = VSUB(T9, Tf); + Tg = VADD(T9, Tf); + To = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tk, Tl)); + Tm = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tl, Tk)); + Th = VFNMS(LDK(KP250000000), Tg, T1); + ST(&(x[0]), VADD(T1, Tg), ms, &(x[0])); + Tn = VFNMS(LDK(KP559016994), Ti, Th); + Tj = VFMA(LDK(KP559016994), Ti, Th); + ST(&(x[WS(rs, 2)]), VFMAI(To, Tn), ms, &(x[0])); + ST(&(x[WS(rs, 3)]), VFNMSI(To, Tn), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VFMAI(Tm, Tj), ms, &(x[0])); + ST(&(x[WS(rs, 1)]), VFNMSI(Tm, Tj), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("t3fv_5"), twinstr, &GENUS, {13, 14, 9, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3fv_5) (planner *p) { + X(kdft_dit_register) (p, t3fv_5, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 5 -name t3fv_5 -include t3f.h */ + +/* + * This function contains 22 FP additions, 18 FP multiplications, + * (or, 19 additions, 15 multiplications, 3 fused multiply/add), + * 24 stack variables, 4 constants, and 10 memory accesses + */ +#include "t3f.h" + +static void t3fv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(5, rs)) { + V T1, T4, T5, T9; + T1 = LDW(&(W[0])); + T4 = LDW(&(W[TWVL * 2])); + T5 = VZMUL(T1, T4); + T9 = VZMULJ(T1, T4); + { + V Tg, Tk, Tl, T8, Te, Th; + Tg = LD(&(x[0]), ms, &(x[0])); + { + V T3, Td, T7, Tb; + { + V T2, Tc, T6, Ta; + T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T3 = VZMULJ(T1, T2); + Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Td = VZMULJ(T4, Tc); + T6 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T7 = VZMULJ(T5, T6); + Ta = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tb = VZMULJ(T9, Ta); + } + Tk = VSUB(T3, T7); + Tl = VSUB(Tb, Td); + T8 = VADD(T3, T7); + Te = VADD(Tb, Td); + Th = VADD(T8, Te); + } + ST(&(x[0]), VADD(Tg, Th), ms, &(x[0])); + { + V Tm, Tn, Tj, To, Tf, Ti; + Tm = VBYI(VFMA(LDK(KP951056516), Tk, VMUL(LDK(KP587785252), Tl))); + Tn = VBYI(VFNMS(LDK(KP587785252), Tk, VMUL(LDK(KP951056516), Tl))); + Tf = VMUL(LDK(KP559016994), VSUB(T8, Te)); + Ti = VFNMS(LDK(KP250000000), Th, Tg); + Tj = VADD(Tf, Ti); + To = VSUB(Ti, Tf); + ST(&(x[WS(rs, 1)]), VSUB(Tj, Tm), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VSUB(To, Tn), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 4)]), VADD(Tm, Tj), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(Tn, To), ms, &(x[0])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 5, XSIMD_STRING("t3fv_5"), twinstr, &GENUS, {19, 15, 3, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3fv_5) (planner *p) { + X(kdft_dit_register) (p, t3fv_5, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/common/t3fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/common/t3fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:47:25 EST 2014 */ + +#include "codelet-dft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 8 -name t3fv_8 -include t3f.h */ + +/* + * This function contains 37 FP additions, 32 FP multiplications, + * (or, 27 additions, 22 multiplications, 10 fused multiply/add), + * 43 stack variables, 1 constants, and 16 memory accesses + */ +#include "t3f.h" + +static void t3fv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(8, rs)) { + V T2, T3, Tb, T1, T5, Tn, Tq, T8, Td, T4, Ta, Tp, Tg, Ti, T9; + T2 = LDW(&(W[0])); + T3 = LDW(&(W[TWVL * 2])); + Tb = LDW(&(W[TWVL * 4])); + T1 = LD(&(x[0]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + Tn = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tq = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + Td = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + T4 = VZMUL(T2, T3); + Ta = VZMULJ(T2, T3); + Tp = VZMULJ(T2, Tb); + Tg = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Ti = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + T9 = VZMULJ(T2, T8); + { + V T6, To, Tc, Tr, Th, Tj; + T6 = VZMULJ(T4, T5); + To = VZMULJ(Ta, Tn); + Tc = VZMULJ(Ta, Tb); + Tr = VZMULJ(Tp, Tq); + Th = VZMULJ(Tb, Tg); + Tj = VZMULJ(T3, Ti); + { + V Tx, T7, Te, Ts, Ty, Tk, TB; + Tx = VADD(T1, T6); + T7 = VSUB(T1, T6); + Te = VZMULJ(Tc, Td); + Ts = VSUB(To, Tr); + Ty = VADD(To, Tr); + Tk = VSUB(Th, Tj); + TB = VADD(Th, Tj); + { + V Tf, TA, Tz, TD; + Tf = VSUB(T9, Te); + TA = VADD(T9, Te); + Tz = VADD(Tx, Ty); + TD = VSUB(Tx, Ty); + { + V TC, TE, Tl, Tt; + TC = VADD(TA, TB); + TE = VSUB(TB, TA); + Tl = VADD(Tf, Tk); + Tt = VSUB(Tk, Tf); + { + V Tu, Tw, Tm, Tv; + ST(&(x[WS(rs, 2)]), VFMAI(TE, TD), ms, &(x[0])); + ST(&(x[WS(rs, 6)]), VFNMSI(TE, TD), ms, &(x[0])); + ST(&(x[0]), VADD(Tz, TC), ms, &(x[0])); + ST(&(x[WS(rs, 4)]), VSUB(Tz, TC), ms, &(x[0])); + Tu = VFNMS(LDK(KP707106781), Tt, Ts); + Tw = VFMA(LDK(KP707106781), Tt, Ts); + Tm = VFMA(LDK(KP707106781), Tl, T7); + Tv = VFNMS(LDK(KP707106781), Tl, T7); + ST(&(x[WS(rs, 5)]), VFNMSI(Tw, Tv), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VFMAI(Tw, Tv), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 7)]), VFMAI(Tu, Tm), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VFNMSI(Tu, Tm), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t3fv_8"), twinstr, &GENUS, {27, 22, 10, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3fv_8) (planner *p) { + X(kdft_dit_register) (p, t3fv_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 8 -name t3fv_8 -include t3f.h */ + +/* + * This function contains 37 FP additions, 24 FP multiplications, + * (or, 37 additions, 24 multiplications, 0 fused multiply/add), + * 31 stack variables, 1 constants, and 16 memory accesses + */ +#include "t3f.h" + +static void t3fv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + R *x; + x = ri; + for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(8, rs)) { + V T2, T3, Ta, T4, Tb, Tc, Tq; + T2 = LDW(&(W[0])); + T3 = LDW(&(W[TWVL * 2])); + Ta = VZMULJ(T2, T3); + T4 = VZMUL(T2, T3); + Tb = LDW(&(W[TWVL * 4])); + Tc = VZMULJ(Ta, Tb); + Tq = VZMULJ(T2, Tb); + { + V T7, Tx, Tt, Ty, Tf, TA, Tk, TB, T1, T6, T5; + T1 = LD(&(x[0]), ms, &(x[0])); + T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0])); + T6 = VZMULJ(T4, T5); + T7 = VSUB(T1, T6); + Tx = VADD(T1, T6); + { + V Tp, Ts, To, Tr; + To = LD(&(x[WS(rs, 2)]), ms, &(x[0])); + Tp = VZMULJ(Ta, To); + Tr = LD(&(x[WS(rs, 6)]), ms, &(x[0])); + Ts = VZMULJ(Tq, Tr); + Tt = VSUB(Tp, Ts); + Ty = VADD(Tp, Ts); + } + { + V T9, Te, T8, Td; + T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); + T9 = VZMULJ(T2, T8); + Td = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); + Te = VZMULJ(Tc, Td); + Tf = VSUB(T9, Te); + TA = VADD(T9, Te); + } + { + V Th, Tj, Tg, Ti; + Tg = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)])); + Th = VZMULJ(Tb, Tg); + Ti = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); + Tj = VZMULJ(T3, Ti); + Tk = VSUB(Th, Tj); + TB = VADD(Th, Tj); + } + { + V Tz, TC, TD, TE; + Tz = VADD(Tx, Ty); + TC = VADD(TA, TB); + ST(&(x[WS(rs, 4)]), VSUB(Tz, TC), ms, &(x[0])); + ST(&(x[0]), VADD(Tz, TC), ms, &(x[0])); + TD = VSUB(Tx, Ty); + TE = VBYI(VSUB(TB, TA)); + ST(&(x[WS(rs, 6)]), VSUB(TD, TE), ms, &(x[0])); + ST(&(x[WS(rs, 2)]), VADD(TD, TE), ms, &(x[0])); + { + V Tm, Tv, Tu, Tw, Tl, Tn; + Tl = VMUL(LDK(KP707106781), VADD(Tf, Tk)); + Tm = VADD(T7, Tl); + Tv = VSUB(T7, Tl); + Tn = VMUL(LDK(KP707106781), VSUB(Tk, Tf)); + Tu = VBYI(VSUB(Tn, Tt)); + Tw = VBYI(VADD(Tt, Tn)); + ST(&(x[WS(rs, 7)]), VSUB(Tm, Tu), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 3)]), VADD(Tv, Tw), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 1)]), VADD(Tm, Tu), ms, &(x[WS(rs, 1)])); + ST(&(x[WS(rs, 5)]), VSUB(Tv, Tw), ms, &(x[WS(rs, 1)])); + } + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(0, 1), + VTW(0, 3), + VTW(0, 7), + {TW_NEXT, VL, 0} +}; + +static const ct_desc desc = { 8, XSIMD_STRING("t3fv_8"), twinstr, &GENUS, {37, 24, 0, 0}, 0, 0, 0 }; + +void XSIMD(codelet_t3fv_8) (planner *p) { + X(kdft_dit_register) (p, t3fv_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/n1b.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/n1b.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#define GENUS XSIMD(dft_n1bsimd_genus) +extern const kdft_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/n1f.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/n1f.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#define GENUS XSIMD(dft_n1fsimd_genus) +extern const kdft_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/n2b.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/n2b.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#undef LD +#define LD LDA + +#define GENUS XSIMD(dft_n2bsimd_genus) +extern const kdft_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/n2f.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/n2f.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#undef LD +#define LD LDA + +#define GENUS XSIMD(dft_n2fsimd_genus) +extern const kdft_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/n2s.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/n2s.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#undef LD +#define LD LDA + +#define GENUS XSIMD(dft_n2ssimd_genus) +extern const kdft_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,13 @@ +AM_CFLAGS = $(NEON_CFLAGS) +SIMD_HEADER=simd-neon.h + +include $(top_srcdir)/dft/simd/codlist.mk +include $(top_srcdir)/dft/simd/simd.mk + +if HAVE_NEON + +BUILT_SOURCES = $(EXTRA_DIST) +noinst_LTLIBRARIES = libdft_neon_codelets.la +libdft_neon_codelets_la_SOURCES = $(BUILT_SOURCES) + +endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,967 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# This file contains a standard list of DFT SIMD codelets. It is +# included by common/Makefile to generate the C files with the actual +# codelets in them. It is included by {sse,sse2,...}/Makefile to +# generate and compile stub files that include common/*.c + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +DIST_COMMON = $(top_srcdir)/dft/simd/codlist.mk \ + $(top_srcdir)/dft/simd/simd.mk $(srcdir)/Makefile.in \ + $(srcdir)/Makefile.am $(top_srcdir)/depcomp +subdir = dft/simd/neon +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libdft_neon_codelets_la_LIBADD = +am__libdft_neon_codelets_la_SOURCES_DIST = n1fv_2.c n1fv_3.c n1fv_4.c \ + n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c n1fv_9.c n1fv_10.c \ + n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c n1fv_16.c \ + n1fv_32.c n1fv_64.c n1fv_128.c n1fv_20.c n1fv_25.c n1bv_2.c \ + n1bv_3.c n1bv_4.c n1bv_5.c n1bv_6.c n1bv_7.c n1bv_8.c n1bv_9.c \ + n1bv_10.c n1bv_11.c n1bv_12.c n1bv_13.c n1bv_14.c n1bv_15.c \ + n1bv_16.c n1bv_32.c n1bv_64.c n1bv_128.c n1bv_20.c n1bv_25.c \ + n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c n2fv_12.c \ + n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_20.c n2bv_2.c \ + n2bv_4.c n2bv_6.c n2bv_8.c n2bv_10.c n2bv_12.c n2bv_14.c \ + n2bv_16.c n2bv_32.c n2bv_64.c n2bv_20.c n2sv_4.c n2sv_8.c \ + n2sv_16.c n2sv_32.c n2sv_64.c t1fuv_2.c t1fuv_3.c t1fuv_4.c \ + t1fuv_5.c t1fuv_6.c t1fuv_7.c t1fuv_8.c t1fuv_9.c t1fuv_10.c \ + t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c t1fv_7.c t1fv_8.c \ + t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c t1fv_16.c t1fv_32.c \ + t1fv_64.c t1fv_20.c t1fv_25.c t2fv_2.c t2fv_4.c t2fv_8.c \ + t2fv_16.c t2fv_32.c t2fv_64.c t2fv_5.c t2fv_10.c t2fv_20.c \ + t2fv_25.c t3fv_4.c t3fv_8.c t3fv_16.c t3fv_32.c t3fv_5.c \ + t3fv_10.c t3fv_20.c t3fv_25.c t1buv_2.c t1buv_3.c t1buv_4.c \ + t1buv_5.c t1buv_6.c t1buv_7.c t1buv_8.c t1buv_9.c t1buv_10.c \ + t1bv_2.c t1bv_3.c t1bv_4.c t1bv_5.c t1bv_6.c t1bv_7.c t1bv_8.c \ + t1bv_9.c t1bv_10.c t1bv_12.c t1bv_15.c t1bv_16.c t1bv_32.c \ + t1bv_64.c t1bv_20.c t1bv_25.c t2bv_2.c t2bv_4.c t2bv_8.c \ + t2bv_16.c t2bv_32.c t2bv_64.c t2bv_5.c t2bv_10.c t2bv_20.c \ + t2bv_25.c t3bv_4.c t3bv_8.c t3bv_16.c t3bv_32.c t3bv_5.c \ + t3bv_10.c t3bv_20.c t3bv_25.c t1sv_2.c t1sv_4.c t1sv_8.c \ + t1sv_16.c t1sv_32.c t2sv_4.c t2sv_8.c t2sv_16.c t2sv_32.c \ + q1fv_2.c q1fv_4.c q1fv_5.c q1fv_8.c q1bv_2.c q1bv_4.c q1bv_5.c \ + q1bv_8.c genus.c codlist.c +am__objects_1 = n1fv_2.lo n1fv_3.lo n1fv_4.lo n1fv_5.lo n1fv_6.lo \ + n1fv_7.lo n1fv_8.lo n1fv_9.lo n1fv_10.lo n1fv_11.lo n1fv_12.lo \ + n1fv_13.lo n1fv_14.lo n1fv_15.lo n1fv_16.lo n1fv_32.lo \ + n1fv_64.lo n1fv_128.lo n1fv_20.lo n1fv_25.lo +am__objects_2 = n1bv_2.lo n1bv_3.lo n1bv_4.lo n1bv_5.lo n1bv_6.lo \ + n1bv_7.lo n1bv_8.lo n1bv_9.lo n1bv_10.lo n1bv_11.lo n1bv_12.lo \ + n1bv_13.lo n1bv_14.lo n1bv_15.lo n1bv_16.lo n1bv_32.lo \ + n1bv_64.lo n1bv_128.lo n1bv_20.lo n1bv_25.lo +am__objects_3 = n2fv_2.lo n2fv_4.lo n2fv_6.lo n2fv_8.lo n2fv_10.lo \ + n2fv_12.lo n2fv_14.lo n2fv_16.lo n2fv_32.lo n2fv_64.lo \ + n2fv_20.lo +am__objects_4 = n2bv_2.lo n2bv_4.lo n2bv_6.lo n2bv_8.lo n2bv_10.lo \ + n2bv_12.lo n2bv_14.lo n2bv_16.lo n2bv_32.lo n2bv_64.lo \ + n2bv_20.lo +am__objects_5 = n2sv_4.lo n2sv_8.lo n2sv_16.lo n2sv_32.lo n2sv_64.lo +am__objects_6 = t1fuv_2.lo t1fuv_3.lo t1fuv_4.lo t1fuv_5.lo t1fuv_6.lo \ + t1fuv_7.lo t1fuv_8.lo t1fuv_9.lo t1fuv_10.lo +am__objects_7 = t1fv_2.lo t1fv_3.lo t1fv_4.lo t1fv_5.lo t1fv_6.lo \ + t1fv_7.lo t1fv_8.lo t1fv_9.lo t1fv_10.lo t1fv_12.lo t1fv_15.lo \ + t1fv_16.lo t1fv_32.lo t1fv_64.lo t1fv_20.lo t1fv_25.lo +am__objects_8 = t2fv_2.lo t2fv_4.lo t2fv_8.lo t2fv_16.lo t2fv_32.lo \ + t2fv_64.lo t2fv_5.lo t2fv_10.lo t2fv_20.lo t2fv_25.lo +am__objects_9 = t3fv_4.lo t3fv_8.lo t3fv_16.lo t3fv_32.lo t3fv_5.lo \ + t3fv_10.lo t3fv_20.lo t3fv_25.lo +am__objects_10 = t1buv_2.lo t1buv_3.lo t1buv_4.lo t1buv_5.lo \ + t1buv_6.lo t1buv_7.lo t1buv_8.lo t1buv_9.lo t1buv_10.lo +am__objects_11 = t1bv_2.lo t1bv_3.lo t1bv_4.lo t1bv_5.lo t1bv_6.lo \ + t1bv_7.lo t1bv_8.lo t1bv_9.lo t1bv_10.lo t1bv_12.lo t1bv_15.lo \ + t1bv_16.lo t1bv_32.lo t1bv_64.lo t1bv_20.lo t1bv_25.lo +am__objects_12 = t2bv_2.lo t2bv_4.lo t2bv_8.lo t2bv_16.lo t2bv_32.lo \ + t2bv_64.lo t2bv_5.lo t2bv_10.lo t2bv_20.lo t2bv_25.lo +am__objects_13 = t3bv_4.lo t3bv_8.lo t3bv_16.lo t3bv_32.lo t3bv_5.lo \ + t3bv_10.lo t3bv_20.lo t3bv_25.lo +am__objects_14 = t1sv_2.lo t1sv_4.lo t1sv_8.lo t1sv_16.lo t1sv_32.lo +am__objects_15 = t2sv_4.lo t2sv_8.lo t2sv_16.lo t2sv_32.lo +am__objects_16 = q1fv_2.lo q1fv_4.lo q1fv_5.lo q1fv_8.lo +am__objects_17 = q1bv_2.lo q1bv_4.lo q1bv_5.lo q1bv_8.lo +am__objects_18 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \ + $(am__objects_4) $(am__objects_5) $(am__objects_6) \ + $(am__objects_7) $(am__objects_8) $(am__objects_9) \ + $(am__objects_10) $(am__objects_11) $(am__objects_12) \ + $(am__objects_13) $(am__objects_14) $(am__objects_15) \ + $(am__objects_16) $(am__objects_17) +am__objects_19 = $(am__objects_18) genus.lo codlist.lo +@HAVE_NEON_TRUE@am__objects_20 = $(am__objects_19) +@HAVE_NEON_TRUE@am_libdft_neon_codelets_la_OBJECTS = \ +@HAVE_NEON_TRUE@ $(am__objects_20) +libdft_neon_codelets_la_OBJECTS = \ + $(am_libdft_neon_codelets_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +@HAVE_NEON_TRUE@am_libdft_neon_codelets_la_rpath = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libdft_neon_codelets_la_SOURCES) +DIST_SOURCES = $(am__libdft_neon_codelets_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(NEON_CFLAGS) +SIMD_HEADER = simd-neon.h + +########################################################################### +# n1fv_ is a hard-coded FFTW_FORWARD FFT of size , using SIMD +N1F = n1fv_2.c n1fv_3.c n1fv_4.c n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c \ +n1fv_9.c n1fv_10.c n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c \ +n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_20.c n1fv_25.c + + +# as above, with restricted input vector stride +N2F = n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c n2fv_12.c \ +n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_20.c + + +# as above, but FFTW_BACKWARD +N1B = n1bv_2.c n1bv_3.c n1bv_4.c n1bv_5.c n1bv_6.c n1bv_7.c n1bv_8.c \ +n1bv_9.c n1bv_10.c n1bv_11.c n1bv_12.c n1bv_13.c n1bv_14.c n1bv_15.c \ +n1bv_16.c n1bv_32.c n1bv_64.c n1bv_128.c n1bv_20.c n1bv_25.c + +N2B = n2bv_2.c n2bv_4.c n2bv_6.c n2bv_8.c n2bv_10.c n2bv_12.c \ +n2bv_14.c n2bv_16.c n2bv_32.c n2bv_64.c n2bv_20.c + + +# split-complex codelets +N2S = n2sv_4.c n2sv_8.c n2sv_16.c n2sv_32.c n2sv_64.c + +########################################################################### +# t1fv_ is a "twiddle" FFT of size , implementing a radix-r DIT step +# for an FFTW_FORWARD transform, using SIMD +T1F = t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c t1fv_7.c t1fv_8.c \ +t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c t1fv_16.c t1fv_32.c t1fv_64.c \ +t1fv_20.c t1fv_25.c + + +# same as t1fv_*, but with different twiddle storage scheme +T2F = t2fv_2.c t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c t2fv_64.c \ +t2fv_5.c t2fv_10.c t2fv_20.c t2fv_25.c + +T3F = t3fv_4.c t3fv_8.c t3fv_16.c t3fv_32.c t3fv_5.c t3fv_10.c \ +t3fv_20.c t3fv_25.c + +T1FU = t1fuv_2.c t1fuv_3.c t1fuv_4.c t1fuv_5.c t1fuv_6.c t1fuv_7.c \ +t1fuv_8.c t1fuv_9.c t1fuv_10.c + + +# as above, but FFTW_BACKWARD +T1B = t1bv_2.c t1bv_3.c t1bv_4.c t1bv_5.c t1bv_6.c t1bv_7.c t1bv_8.c \ +t1bv_9.c t1bv_10.c t1bv_12.c t1bv_15.c t1bv_16.c t1bv_32.c t1bv_64.c \ +t1bv_20.c t1bv_25.c + + +# same as t1bv_*, but with different twiddle storage scheme +T2B = t2bv_2.c t2bv_4.c t2bv_8.c t2bv_16.c t2bv_32.c t2bv_64.c \ +t2bv_5.c t2bv_10.c t2bv_20.c t2bv_25.c + +T3B = t3bv_4.c t3bv_8.c t3bv_16.c t3bv_32.c t3bv_5.c t3bv_10.c \ +t3bv_20.c t3bv_25.c + +T1BU = t1buv_2.c t1buv_3.c t1buv_4.c t1buv_5.c t1buv_6.c t1buv_7.c \ +t1buv_8.c t1buv_9.c t1buv_10.c + + +# split-complex codelets +T1S = t1sv_2.c t1sv_4.c t1sv_8.c t1sv_16.c t1sv_32.c +T2S = t2sv_4.c t2sv_8.c t2sv_16.c t2sv_32.c + +########################################################################### +# q1fv_ is twiddle FFTW_FORWARD FFTs of size (DIF step), +# where the output is transposed, using SIMD. This is used for +# in-place transposes in sizes that are divisible by ^2. These +# codelets have size ~ ^2, so you should probably not use +# bigger than 8 or so. +Q1F = q1fv_2.c q1fv_4.c q1fv_5.c q1fv_8.c + +# as above, but FFTW_BACKWARD +Q1B = q1bv_2.c q1bv_4.c q1bv_5.c q1bv_8.c + +########################################################################### +SIMD_CODELETS = $(N1F) $(N1B) $(N2F) $(N2B) $(N2S) $(T1FU) $(T1F) \ +$(T2F) $(T3F) $(T1BU) $(T1B) $(T2B) $(T3B) $(T1S) $(T2S) $(Q1F) $(Q1B) + +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft \ +-I$(top_srcdir)/dft/simd -I$(top_srcdir)/simd-support + +EXTRA_DIST = $(SIMD_CODELETS) genus.c codlist.c +@HAVE_NEON_TRUE@BUILT_SOURCES = $(EXTRA_DIST) +@HAVE_NEON_TRUE@noinst_LTLIBRARIES = libdft_neon_codelets.la +@HAVE_NEON_TRUE@libdft_neon_codelets_la_SOURCES = $(BUILT_SOURCES) +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/dft/simd/codlist.mk $(top_srcdir)/dft/simd/simd.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu dft/simd/neon/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu dft/simd/neon/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(top_srcdir)/dft/simd/codlist.mk $(top_srcdir)/dft/simd/simd.mk: + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libdft_neon_codelets.la: $(libdft_neon_codelets_la_OBJECTS) $(libdft_neon_codelets_la_DEPENDENCIES) $(EXTRA_libdft_neon_codelets_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(am_libdft_neon_codelets_la_rpath) $(libdft_neon_codelets_la_OBJECTS) $(libdft_neon_codelets_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/codlist.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/genus.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_11.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_128.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_13.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_11.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_128.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_13.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2sv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2sv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2sv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2sv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_8.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: all check install install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + + +$(EXTRA_DIST): Makefile + ( \ + echo "/* Generated automatically. DO NOT EDIT! */"; \ + echo "#define SIMD_HEADER \"$(SIMD_HEADER)\""; \ + echo "#include \"../common/"$*".c\""; \ + ) >$@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/codlist.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/codlist.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/codlist.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/genus.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/genus.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/genus.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_11.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_11.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_11.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_128.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_128.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_128.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_13.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_13.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_13.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_14.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_15.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1bv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1bv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_11.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_11.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_11.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_128.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_128.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_128.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_13.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_13.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_13.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_14.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_15.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n1fv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n1fv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2bv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2bv_14.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2bv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2bv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2fv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2fv_14.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2fv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2fv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2sv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2sv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2sv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2sv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2sv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2sv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2sv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2sv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2sv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2sv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2sv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2sv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/n2sv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/n2sv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/n2sv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/q1bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/q1bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/q1bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/q1bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/q1bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/q1bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/q1bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/q1bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/q1bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/q1bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/q1bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/q1bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/q1fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/q1fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/q1fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/q1fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/q1fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/q1fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/q1fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/q1fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/q1fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/q1fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/q1fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/q1fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1buv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1buv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1buv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1buv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1buv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1buv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1buv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1buv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1buv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1buv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1bv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1bv_15.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1bv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1bv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1bv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1bv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1bv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1bv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1bv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fuv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fuv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fuv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fuv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fuv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fuv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fuv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fuv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fuv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fuv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fv_15.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1fv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1fv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1sv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1sv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1sv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1sv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1sv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1sv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1sv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1sv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1sv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1sv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1sv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1sv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t1sv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t1sv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t1sv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2bv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2bv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2fv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2fv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2sv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2sv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2sv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2sv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2sv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2sv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2sv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2sv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2sv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t2sv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t2sv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t2sv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t3bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t3bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t3bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t3bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t3bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t3bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t3bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t3bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t3bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t3bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t3bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t3bv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t3bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t3bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t3bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t3bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t3bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t3bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t3bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t3bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t3bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t3bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t3bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t3bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t3fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t3fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t3fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t3fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t3fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t3fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t3fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t3fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t3fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t3fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t3fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t3fv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t3fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t3fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t3fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t3fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t3fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t3fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t3fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t3fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t3fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/neon/t3fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/neon/t3fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/t3fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/q1b.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/q1b.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#define VTW VTW1 +#define TWVL TWVL1 +#define BYTW BYTW1 +#define BYTWJ BYTWJ1 + +#define GENUS XSIMD(dft_q1bsimd_genus) +extern const ct_genus GENUS; + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/q1f.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/q1f.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#define VTW VTW1 +#define TWVL TWVL1 +#define BYTW BYTW1 +#define BYTWJ BYTWJ1 + +#define GENUS XSIMD(dft_q1fsimd_genus) +extern const ct_genus GENUS; + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/simd.mk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/simd.mk Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,12 @@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft \ +-I$(top_srcdir)/dft/simd -I$(top_srcdir)/simd-support + +EXTRA_DIST = $(SIMD_CODELETS) genus.c codlist.c + +$(EXTRA_DIST): Makefile + ( \ + echo "/* Generated automatically. DO NOT EDIT! */"; \ + echo "#define SIMD_HEADER \"$(SIMD_HEADER)\""; \ + echo "#include \"../common/"$*".c\""; \ + ) >$@ + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,13 @@ +AM_CFLAGS = $(SSE2_CFLAGS) +SIMD_HEADER=simd-sse2.h + +include $(top_srcdir)/dft/simd/codlist.mk +include $(top_srcdir)/dft/simd/simd.mk + +if HAVE_SSE2 + +BUILT_SOURCES = $(EXTRA_DIST) +noinst_LTLIBRARIES = libdft_sse2_codelets.la +libdft_sse2_codelets_la_SOURCES = $(BUILT_SOURCES) + +endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,967 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# This file contains a standard list of DFT SIMD codelets. It is +# included by common/Makefile to generate the C files with the actual +# codelets in them. It is included by {sse,sse2,...}/Makefile to +# generate and compile stub files that include common/*.c + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +DIST_COMMON = $(top_srcdir)/dft/simd/codlist.mk \ + $(top_srcdir)/dft/simd/simd.mk $(srcdir)/Makefile.in \ + $(srcdir)/Makefile.am $(top_srcdir)/depcomp +subdir = dft/simd/sse2 +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libdft_sse2_codelets_la_LIBADD = +am__libdft_sse2_codelets_la_SOURCES_DIST = n1fv_2.c n1fv_3.c n1fv_4.c \ + n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c n1fv_9.c n1fv_10.c \ + n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c n1fv_16.c \ + n1fv_32.c n1fv_64.c n1fv_128.c n1fv_20.c n1fv_25.c n1bv_2.c \ + n1bv_3.c n1bv_4.c n1bv_5.c n1bv_6.c n1bv_7.c n1bv_8.c n1bv_9.c \ + n1bv_10.c n1bv_11.c n1bv_12.c n1bv_13.c n1bv_14.c n1bv_15.c \ + n1bv_16.c n1bv_32.c n1bv_64.c n1bv_128.c n1bv_20.c n1bv_25.c \ + n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c n2fv_12.c \ + n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_20.c n2bv_2.c \ + n2bv_4.c n2bv_6.c n2bv_8.c n2bv_10.c n2bv_12.c n2bv_14.c \ + n2bv_16.c n2bv_32.c n2bv_64.c n2bv_20.c n2sv_4.c n2sv_8.c \ + n2sv_16.c n2sv_32.c n2sv_64.c t1fuv_2.c t1fuv_3.c t1fuv_4.c \ + t1fuv_5.c t1fuv_6.c t1fuv_7.c t1fuv_8.c t1fuv_9.c t1fuv_10.c \ + t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c t1fv_7.c t1fv_8.c \ + t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c t1fv_16.c t1fv_32.c \ + t1fv_64.c t1fv_20.c t1fv_25.c t2fv_2.c t2fv_4.c t2fv_8.c \ + t2fv_16.c t2fv_32.c t2fv_64.c t2fv_5.c t2fv_10.c t2fv_20.c \ + t2fv_25.c t3fv_4.c t3fv_8.c t3fv_16.c t3fv_32.c t3fv_5.c \ + t3fv_10.c t3fv_20.c t3fv_25.c t1buv_2.c t1buv_3.c t1buv_4.c \ + t1buv_5.c t1buv_6.c t1buv_7.c t1buv_8.c t1buv_9.c t1buv_10.c \ + t1bv_2.c t1bv_3.c t1bv_4.c t1bv_5.c t1bv_6.c t1bv_7.c t1bv_8.c \ + t1bv_9.c t1bv_10.c t1bv_12.c t1bv_15.c t1bv_16.c t1bv_32.c \ + t1bv_64.c t1bv_20.c t1bv_25.c t2bv_2.c t2bv_4.c t2bv_8.c \ + t2bv_16.c t2bv_32.c t2bv_64.c t2bv_5.c t2bv_10.c t2bv_20.c \ + t2bv_25.c t3bv_4.c t3bv_8.c t3bv_16.c t3bv_32.c t3bv_5.c \ + t3bv_10.c t3bv_20.c t3bv_25.c t1sv_2.c t1sv_4.c t1sv_8.c \ + t1sv_16.c t1sv_32.c t2sv_4.c t2sv_8.c t2sv_16.c t2sv_32.c \ + q1fv_2.c q1fv_4.c q1fv_5.c q1fv_8.c q1bv_2.c q1bv_4.c q1bv_5.c \ + q1bv_8.c genus.c codlist.c +am__objects_1 = n1fv_2.lo n1fv_3.lo n1fv_4.lo n1fv_5.lo n1fv_6.lo \ + n1fv_7.lo n1fv_8.lo n1fv_9.lo n1fv_10.lo n1fv_11.lo n1fv_12.lo \ + n1fv_13.lo n1fv_14.lo n1fv_15.lo n1fv_16.lo n1fv_32.lo \ + n1fv_64.lo n1fv_128.lo n1fv_20.lo n1fv_25.lo +am__objects_2 = n1bv_2.lo n1bv_3.lo n1bv_4.lo n1bv_5.lo n1bv_6.lo \ + n1bv_7.lo n1bv_8.lo n1bv_9.lo n1bv_10.lo n1bv_11.lo n1bv_12.lo \ + n1bv_13.lo n1bv_14.lo n1bv_15.lo n1bv_16.lo n1bv_32.lo \ + n1bv_64.lo n1bv_128.lo n1bv_20.lo n1bv_25.lo +am__objects_3 = n2fv_2.lo n2fv_4.lo n2fv_6.lo n2fv_8.lo n2fv_10.lo \ + n2fv_12.lo n2fv_14.lo n2fv_16.lo n2fv_32.lo n2fv_64.lo \ + n2fv_20.lo +am__objects_4 = n2bv_2.lo n2bv_4.lo n2bv_6.lo n2bv_8.lo n2bv_10.lo \ + n2bv_12.lo n2bv_14.lo n2bv_16.lo n2bv_32.lo n2bv_64.lo \ + n2bv_20.lo +am__objects_5 = n2sv_4.lo n2sv_8.lo n2sv_16.lo n2sv_32.lo n2sv_64.lo +am__objects_6 = t1fuv_2.lo t1fuv_3.lo t1fuv_4.lo t1fuv_5.lo t1fuv_6.lo \ + t1fuv_7.lo t1fuv_8.lo t1fuv_9.lo t1fuv_10.lo +am__objects_7 = t1fv_2.lo t1fv_3.lo t1fv_4.lo t1fv_5.lo t1fv_6.lo \ + t1fv_7.lo t1fv_8.lo t1fv_9.lo t1fv_10.lo t1fv_12.lo t1fv_15.lo \ + t1fv_16.lo t1fv_32.lo t1fv_64.lo t1fv_20.lo t1fv_25.lo +am__objects_8 = t2fv_2.lo t2fv_4.lo t2fv_8.lo t2fv_16.lo t2fv_32.lo \ + t2fv_64.lo t2fv_5.lo t2fv_10.lo t2fv_20.lo t2fv_25.lo +am__objects_9 = t3fv_4.lo t3fv_8.lo t3fv_16.lo t3fv_32.lo t3fv_5.lo \ + t3fv_10.lo t3fv_20.lo t3fv_25.lo +am__objects_10 = t1buv_2.lo t1buv_3.lo t1buv_4.lo t1buv_5.lo \ + t1buv_6.lo t1buv_7.lo t1buv_8.lo t1buv_9.lo t1buv_10.lo +am__objects_11 = t1bv_2.lo t1bv_3.lo t1bv_4.lo t1bv_5.lo t1bv_6.lo \ + t1bv_7.lo t1bv_8.lo t1bv_9.lo t1bv_10.lo t1bv_12.lo t1bv_15.lo \ + t1bv_16.lo t1bv_32.lo t1bv_64.lo t1bv_20.lo t1bv_25.lo +am__objects_12 = t2bv_2.lo t2bv_4.lo t2bv_8.lo t2bv_16.lo t2bv_32.lo \ + t2bv_64.lo t2bv_5.lo t2bv_10.lo t2bv_20.lo t2bv_25.lo +am__objects_13 = t3bv_4.lo t3bv_8.lo t3bv_16.lo t3bv_32.lo t3bv_5.lo \ + t3bv_10.lo t3bv_20.lo t3bv_25.lo +am__objects_14 = t1sv_2.lo t1sv_4.lo t1sv_8.lo t1sv_16.lo t1sv_32.lo +am__objects_15 = t2sv_4.lo t2sv_8.lo t2sv_16.lo t2sv_32.lo +am__objects_16 = q1fv_2.lo q1fv_4.lo q1fv_5.lo q1fv_8.lo +am__objects_17 = q1bv_2.lo q1bv_4.lo q1bv_5.lo q1bv_8.lo +am__objects_18 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \ + $(am__objects_4) $(am__objects_5) $(am__objects_6) \ + $(am__objects_7) $(am__objects_8) $(am__objects_9) \ + $(am__objects_10) $(am__objects_11) $(am__objects_12) \ + $(am__objects_13) $(am__objects_14) $(am__objects_15) \ + $(am__objects_16) $(am__objects_17) +am__objects_19 = $(am__objects_18) genus.lo codlist.lo +@HAVE_SSE2_TRUE@am__objects_20 = $(am__objects_19) +@HAVE_SSE2_TRUE@am_libdft_sse2_codelets_la_OBJECTS = \ +@HAVE_SSE2_TRUE@ $(am__objects_20) +libdft_sse2_codelets_la_OBJECTS = \ + $(am_libdft_sse2_codelets_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +@HAVE_SSE2_TRUE@am_libdft_sse2_codelets_la_rpath = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libdft_sse2_codelets_la_SOURCES) +DIST_SOURCES = $(am__libdft_sse2_codelets_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(SSE2_CFLAGS) +SIMD_HEADER = simd-sse2.h + +########################################################################### +# n1fv_ is a hard-coded FFTW_FORWARD FFT of size , using SIMD +N1F = n1fv_2.c n1fv_3.c n1fv_4.c n1fv_5.c n1fv_6.c n1fv_7.c n1fv_8.c \ +n1fv_9.c n1fv_10.c n1fv_11.c n1fv_12.c n1fv_13.c n1fv_14.c n1fv_15.c \ +n1fv_16.c n1fv_32.c n1fv_64.c n1fv_128.c n1fv_20.c n1fv_25.c + + +# as above, with restricted input vector stride +N2F = n2fv_2.c n2fv_4.c n2fv_6.c n2fv_8.c n2fv_10.c n2fv_12.c \ +n2fv_14.c n2fv_16.c n2fv_32.c n2fv_64.c n2fv_20.c + + +# as above, but FFTW_BACKWARD +N1B = n1bv_2.c n1bv_3.c n1bv_4.c n1bv_5.c n1bv_6.c n1bv_7.c n1bv_8.c \ +n1bv_9.c n1bv_10.c n1bv_11.c n1bv_12.c n1bv_13.c n1bv_14.c n1bv_15.c \ +n1bv_16.c n1bv_32.c n1bv_64.c n1bv_128.c n1bv_20.c n1bv_25.c + +N2B = n2bv_2.c n2bv_4.c n2bv_6.c n2bv_8.c n2bv_10.c n2bv_12.c \ +n2bv_14.c n2bv_16.c n2bv_32.c n2bv_64.c n2bv_20.c + + +# split-complex codelets +N2S = n2sv_4.c n2sv_8.c n2sv_16.c n2sv_32.c n2sv_64.c + +########################################################################### +# t1fv_ is a "twiddle" FFT of size , implementing a radix-r DIT step +# for an FFTW_FORWARD transform, using SIMD +T1F = t1fv_2.c t1fv_3.c t1fv_4.c t1fv_5.c t1fv_6.c t1fv_7.c t1fv_8.c \ +t1fv_9.c t1fv_10.c t1fv_12.c t1fv_15.c t1fv_16.c t1fv_32.c t1fv_64.c \ +t1fv_20.c t1fv_25.c + + +# same as t1fv_*, but with different twiddle storage scheme +T2F = t2fv_2.c t2fv_4.c t2fv_8.c t2fv_16.c t2fv_32.c t2fv_64.c \ +t2fv_5.c t2fv_10.c t2fv_20.c t2fv_25.c + +T3F = t3fv_4.c t3fv_8.c t3fv_16.c t3fv_32.c t3fv_5.c t3fv_10.c \ +t3fv_20.c t3fv_25.c + +T1FU = t1fuv_2.c t1fuv_3.c t1fuv_4.c t1fuv_5.c t1fuv_6.c t1fuv_7.c \ +t1fuv_8.c t1fuv_9.c t1fuv_10.c + + +# as above, but FFTW_BACKWARD +T1B = t1bv_2.c t1bv_3.c t1bv_4.c t1bv_5.c t1bv_6.c t1bv_7.c t1bv_8.c \ +t1bv_9.c t1bv_10.c t1bv_12.c t1bv_15.c t1bv_16.c t1bv_32.c t1bv_64.c \ +t1bv_20.c t1bv_25.c + + +# same as t1bv_*, but with different twiddle storage scheme +T2B = t2bv_2.c t2bv_4.c t2bv_8.c t2bv_16.c t2bv_32.c t2bv_64.c \ +t2bv_5.c t2bv_10.c t2bv_20.c t2bv_25.c + +T3B = t3bv_4.c t3bv_8.c t3bv_16.c t3bv_32.c t3bv_5.c t3bv_10.c \ +t3bv_20.c t3bv_25.c + +T1BU = t1buv_2.c t1buv_3.c t1buv_4.c t1buv_5.c t1buv_6.c t1buv_7.c \ +t1buv_8.c t1buv_9.c t1buv_10.c + + +# split-complex codelets +T1S = t1sv_2.c t1sv_4.c t1sv_8.c t1sv_16.c t1sv_32.c +T2S = t2sv_4.c t2sv_8.c t2sv_16.c t2sv_32.c + +########################################################################### +# q1fv_ is twiddle FFTW_FORWARD FFTs of size (DIF step), +# where the output is transposed, using SIMD. This is used for +# in-place transposes in sizes that are divisible by ^2. These +# codelets have size ~ ^2, so you should probably not use +# bigger than 8 or so. +Q1F = q1fv_2.c q1fv_4.c q1fv_5.c q1fv_8.c + +# as above, but FFTW_BACKWARD +Q1B = q1bv_2.c q1bv_4.c q1bv_5.c q1bv_8.c + +########################################################################### +SIMD_CODELETS = $(N1F) $(N1B) $(N2F) $(N2B) $(N2S) $(T1FU) $(T1F) \ +$(T2F) $(T3F) $(T1BU) $(T1B) $(T2B) $(T3B) $(T1S) $(T2S) $(Q1F) $(Q1B) + +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft \ +-I$(top_srcdir)/dft/simd -I$(top_srcdir)/simd-support + +EXTRA_DIST = $(SIMD_CODELETS) genus.c codlist.c +@HAVE_SSE2_TRUE@BUILT_SOURCES = $(EXTRA_DIST) +@HAVE_SSE2_TRUE@noinst_LTLIBRARIES = libdft_sse2_codelets.la +@HAVE_SSE2_TRUE@libdft_sse2_codelets_la_SOURCES = $(BUILT_SOURCES) +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/dft/simd/codlist.mk $(top_srcdir)/dft/simd/simd.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu dft/simd/sse2/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu dft/simd/sse2/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(top_srcdir)/dft/simd/codlist.mk $(top_srcdir)/dft/simd/simd.mk: + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libdft_sse2_codelets.la: $(libdft_sse2_codelets_la_OBJECTS) $(libdft_sse2_codelets_la_DEPENDENCIES) $(EXTRA_libdft_sse2_codelets_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(am_libdft_sse2_codelets_la_rpath) $(libdft_sse2_codelets_la_OBJECTS) $(libdft_sse2_codelets_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/codlist.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/genus.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_11.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_128.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_13.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1bv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_11.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_128.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_13.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n1fv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/n2sv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/q1fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1buv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1bv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fuv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1fv_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t1sv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2fv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2sv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2sv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2sv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t2sv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3bv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t3fv_8.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: all check install install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + + +$(EXTRA_DIST): Makefile + ( \ + echo "/* Generated automatically. DO NOT EDIT! */"; \ + echo "#define SIMD_HEADER \"$(SIMD_HEADER)\""; \ + echo "#include \"../common/"$*".c\""; \ + ) >$@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/codlist.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/codlist.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/codlist.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/genus.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/genus.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/genus.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_11.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_11.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_11.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_128.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_128.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_128.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_13.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_13.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_13.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_14.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_15.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1bv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1bv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_11.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_11.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_11.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_128.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_128.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_128.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_13.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_13.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_13.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_14.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_15.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n1fv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n1fv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2bv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2bv_14.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2bv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2bv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2fv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2fv_14.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2fv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2fv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2sv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2sv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2sv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2sv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2sv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2sv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2sv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2sv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2sv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2sv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2sv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2sv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/n2sv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/n2sv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/n2sv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/q1bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/q1bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/q1bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/q1bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/q1bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/q1bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/q1bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/q1bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/q1bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/q1bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/q1bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/q1bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/q1fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/q1fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/q1fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/q1fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/q1fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/q1fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/q1fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/q1fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/q1fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/q1fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/q1fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/q1fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1buv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1buv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1buv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1buv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1buv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1buv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1buv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1buv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1buv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1buv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1bv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1bv_15.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1bv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1bv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1bv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1bv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1bv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1bv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1bv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fuv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fuv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fuv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fuv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fuv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fuv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fuv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fuv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fuv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fuv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fv_15.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fv_3.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fv_7.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1fv_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1fv_9.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1sv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1sv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1sv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1sv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1sv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1sv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1sv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1sv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1sv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1sv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1sv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1sv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t1sv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t1sv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t1sv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2bv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2bv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2bv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2fv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2fv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2fv_64.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2sv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2sv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2sv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2sv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2sv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2sv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2sv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2sv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2sv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t2sv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t2sv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t2sv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t3bv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t3bv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t3bv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t3bv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t3bv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t3bv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t3bv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t3bv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t3bv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t3bv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t3bv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t3bv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t3bv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t3bv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t3bv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t3bv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t3bv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t3bv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t3bv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t3bv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t3bv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t3bv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t3bv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t3bv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t3fv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t3fv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t3fv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t3fv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t3fv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t3fv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t3fv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t3fv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t3fv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t3fv_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t3fv_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t3fv_25.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t3fv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t3fv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t3fv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t3fv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t3fv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t3fv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t3fv_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t3fv_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t3fv_5.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/sse2/t3fv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/sse2/t3fv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/t3fv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/t1b.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/t1b.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#undef LD +#define LD LDA +#undef ST +#define ST STA + +#define VTW VTW1 +#define TWVL TWVL1 +#define BYTW BYTW1 +#define BYTWJ BYTWJ1 + +#define GENUS XSIMD(dft_t1bsimd_genus) +extern const ct_genus GENUS; + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/t1bu.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/t1bu.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#define VTW VTW1 +#define TWVL TWVL1 +#define BYTW BYTW1 +#define BYTWJ BYTWJ1 + +#define GENUS XSIMD(dft_t1busimd_genus) +extern const ct_genus GENUS; + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/t1f.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/t1f.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#undef LD +#define LD LDA +#undef ST +#define ST STA + +#define VTW VTW1 +#define TWVL TWVL1 +#define BYTW BYTW1 +#define BYTWJ BYTWJ1 + +#define GENUS XSIMD(dft_t1fsimd_genus) +extern const ct_genus GENUS; + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/t1fu.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/t1fu.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#define VTW VTW1 +#define TWVL TWVL1 +#define BYTW BYTW1 +#define BYTWJ BYTWJ1 + +#define GENUS XSIMD(dft_t1fusimd_genus) +extern const ct_genus GENUS; + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/t2b.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/t2b.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#undef LD +#define LD LDA +#undef ST +#define ST STA + +#define VTW VTW2 +#define TWVL TWVL2 +#define BYTW BYTW2 +#define BYTWJ BYTWJ2 + +#define GENUS XSIMD(dft_t2bsimd_genus) +extern const ct_genus GENUS; + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/t2f.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/t2f.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#undef LD +#define LD LDA +#undef ST +#define ST STA + +#define VTW VTW2 +#define TWVL TWVL2 +#define BYTW BYTW2 +#define BYTWJ BYTWJ2 + +#define GENUS XSIMD(dft_t2fsimd_genus) +extern const ct_genus GENUS; + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/t3b.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/t3b.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#undef LD +#define LD LDA +#undef ST +#define ST STA + +#define VTW VTW3 +#define TWVL TWVL3 +#define LDW(x) LDA(x, 0, 0) /* load twiddle factor */ + +/* same as t1b otherwise */ +#define GENUS XSIMD(dft_t1bsimd_genus) +extern const ct_genus GENUS; + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/t3f.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/t3f.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#undef LD +#define LD LDA +#undef ST +#define ST STA + +#define VTW VTW3 +#define TWVL TWVL3 +#define LDW(x) LDA(x, 0, 0) /* load twiddle factor */ + +/* same as t1f otherwise */ +#define GENUS XSIMD(dft_t1fsimd_genus) +extern const ct_genus GENUS; + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/simd/ts.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/simd/ts.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#undef LD +#define LD LDA +#undef ST +#define ST STA + +#define VTW VTWS +#define TWVL TWVLS +#define LDW(x) LDA(x, 0, 0) /* load twiddle factor */ + +#define GENUS XSIMD(dft_tssimd_genus) +extern const ct_genus GENUS; + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/solve.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/solve.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "dft.h" + +/* use the apply() operation for DFT problems */ +void X(dft_solve)(const plan *ego_, const problem *p_) +{ + const plan_dft *ego = (const plan_dft *) ego_; + const problem_dft *p = (const problem_dft *) p_; + ego->apply(ego_, + UNTAINT(p->ri), UNTAINT(p->ii), + UNTAINT(p->ro), UNTAINT(p->io)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/vrank-geq1.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/vrank-geq1.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + + +/* Plans for handling vector transform loops. These are *just* the + loops, and rely on child plans for the actual DFTs. + + They form a wrapper around solvers that don't have apply functions + for non-null vectors. + + vrank-geq1 plans also recursively handle the case of multi-dimensional + vectors, obviating the need for most solvers to deal with this. We + can also play games here, such as reordering the vector loops. + + Each vrank-geq1 plan reduces the vector rank by 1, picking out a + dimension determined by the vecloop_dim field of the solver. */ + +#include "dft.h" + +typedef struct { + solver super; + int vecloop_dim; + const int *buddies; + int nbuddies; +} S; + +typedef struct { + plan_dft super; + + plan *cld; + INT vl; + INT ivs, ovs; + const S *solver; +} P; + +static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + INT i, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + dftapply cldapply = ((plan_dft *) ego->cld)->apply; + + for (i = 0; i < vl; ++i) { + cldapply(ego->cld, + ri + i * ivs, ii + i * ivs, ro + i * ovs, io + i * ovs); + } +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *s = ego->solver; + p->print(p, "(dft-vrank>=1-x%D/%d%(%p%))", + ego->vl, s->vecloop_dim, ego->cld); +} + +static int pickdim(const S *ego, const tensor *vecsz, int oop, int *dp) +{ + return X(pickdim)(ego->vecloop_dim, ego->buddies, ego->nbuddies, + vecsz, oop, dp); +} + +static int applicable0(const solver *ego_, const problem *p_, int *dp) +{ + const S *ego = (const S *) ego_; + const problem_dft *p = (const problem_dft *) p_; + + return (1 + && FINITE_RNK(p->vecsz->rnk) + && p->vecsz->rnk > 0 + + /* do not bother looping over rank-0 problems, + since they are handled via rdft */ + && p->sz->rnk > 0 + + && pickdim(ego, p->vecsz, p->ri != p->ro, dp) + ); +} + +static int applicable(const solver *ego_, const problem *p_, + const planner *plnr, int *dp) +{ + const S *ego = (const S *)ego_; + const problem_dft *p; + + if (!applicable0(ego_, p_, dp)) return 0; + + /* fftw2 behavior */ + if (NO_VRANK_SPLITSP(plnr) && (ego->vecloop_dim != ego->buddies[0])) + return 0; + + p = (const problem_dft *) p_; + + if (NO_UGLYP(plnr)) { + /* Heuristic: if the transform is multi-dimensional, and the + vector stride is less than the transform size, then we + probably want to use a rank>=2 plan first in order to combine + this vector with the transform-dimension vectors. */ + { + iodim *d = p->vecsz->dims + *dp; + if (1 + && p->sz->rnk > 1 + && X(imin)(X(iabs)(d->is), X(iabs)(d->os)) + < X(tensor_max_index)(p->sz) + ) + return 0; + } + + if (NO_NONTHREADEDP(plnr)) return 0; /* prefer threaded version */ + } + + return 1; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_dft *p; + P *pln; + plan *cld; + int vdim; + iodim *d; + + static const plan_adt padt = { + X(dft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr, &vdim)) + return (plan *) 0; + p = (const problem_dft *) p_; + + d = p->vecsz->dims + vdim; + + A(d->n > 1); + cld = X(mkplan_d)(plnr, + X(mkproblem_dft_d)( + X(tensor_copy)(p->sz), + X(tensor_copy_except)(p->vecsz, vdim), + TAINT(p->ri, d->is), TAINT(p->ii, d->is), + TAINT(p->ro, d->os), TAINT(p->io, d->os))); + if (!cld) return (plan *) 0; + + pln = MKPLAN_DFT(P, &padt, apply); + + pln->cld = cld; + pln->vl = d->n; + pln->ivs = d->is; + pln->ovs = d->os; + + pln->solver = ego; + X(ops_zero)(&pln->super.super.ops); + pln->super.super.ops.other = 3.14159; /* magic to prefer codelet loops */ + X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); + + if (p->sz->rnk != 1 || (p->sz->dims[0].n > 64)) + pln->super.super.pcost = pln->vl * cld->pcost; + + return &(pln->super.super); +} + +static solver *mksolver(int vecloop_dim, const int *buddies, int nbuddies) +{ + static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->vecloop_dim = vecloop_dim; + slv->buddies = buddies; + slv->nbuddies = nbuddies; + return &(slv->super); +} + +void X(dft_vrank_geq1_register)(planner *p) +{ + int i; + + /* FIXME: Should we try other vecloop_dim values? */ + static const int buddies[] = { 1, -1 }; + + const int nbuddies = (int)(sizeof(buddies) / sizeof(buddies[0])); + + for (i = 0; i < nbuddies; ++i) + REGISTER_SOLVER(p, mksolver(buddies[i], buddies, nbuddies)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/dft/zero.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/dft/zero.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "dft.h" + +/* fill a complex array with zeros. */ +static void recur(const iodim *dims, int rnk, R *ri, R *ii) +{ + if (rnk == RNK_MINFTY) + return; + else if (rnk == 0) + ri[0] = ii[0] = K(0.0); + else if (rnk > 0) { + INT i, n = dims[0].n; + INT is = dims[0].is; + + if (rnk == 1) { + /* this case is redundant but faster */ + for (i = 0; i < n; ++i) + ri[i * is] = ii[i * is] = K(0.0); + } else { + for (i = 0; i < n; ++i) + recur(dims + 1, rnk - 1, ri + i * is, ii + i * is); + } + } +} + + +void X(dft_zerotens)(tensor *sz, R *ri, R *ii) +{ + recur(sz->dims, sz->rnk, ri, ii); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/FAQ/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/FAQ/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,23 @@ +BFNNCONV_SRC = bfnnconv.pl m-ascii.pl m-html.pl m-info.pl m-lout.pl m-post.pl + +FAQ = fftw-faq.ascii fftw-faq.html +EXTRA_DIST = fftw-faq.bfnn $(FAQ) $(BFNNCONV_SRC) html.refs + +html.refs2: html.refs + cp -f ${srcdir}/html.refs html.refs2 + +$(FAQ): $(BFNNCONV_SRC) fftw-faq.bfnn html.refs2 + @echo converting... + perl -I${srcdir} ${srcdir}/bfnnconv.pl < ${srcdir}/fftw-faq.bfnn + @echo converting again... + perl -I${srcdir} ${srcdir}/bfnnconv.pl < ${srcdir}/fftw-faq.bfnn + rm -f fftw-faq.ascii + mv stdin.ascii fftw-faq.ascii + rm -rf fftw-faq.html + mv -f stdin.html fftw-faq.html + +faq: $(FAQ) + +clean-local: + rm -f *~ core a.out *.lout *.ps *.info *.ascii *.xrefdb *.post + rm -rf *.html html.refs2 diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/FAQ/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/FAQ/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,480 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = doc/FAQ +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +BFNNCONV_SRC = bfnnconv.pl m-ascii.pl m-html.pl m-info.pl m-lout.pl m-post.pl +FAQ = fftw-faq.ascii fftw-faq.html +EXTRA_DIST = fftw-faq.bfnn $(FAQ) $(BFNNCONV_SRC) html.refs +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu doc/FAQ/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu doc/FAQ/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-local mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + clean-local cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags-am uninstall uninstall-am + + +html.refs2: html.refs + cp -f ${srcdir}/html.refs html.refs2 + +$(FAQ): $(BFNNCONV_SRC) fftw-faq.bfnn html.refs2 + @echo converting... + perl -I${srcdir} ${srcdir}/bfnnconv.pl < ${srcdir}/fftw-faq.bfnn + @echo converting again... + perl -I${srcdir} ${srcdir}/bfnnconv.pl < ${srcdir}/fftw-faq.bfnn + rm -f fftw-faq.ascii + mv stdin.ascii fftw-faq.ascii + rm -rf fftw-faq.html + mv -f stdin.html fftw-faq.html + +faq: $(FAQ) + +clean-local: + rm -f *~ core a.out *.lout *.ps *.info *.ascii *.xrefdb *.post + rm -rf *.html html.refs2 + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/FAQ/bfnnconv.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/FAQ/bfnnconv.pl Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,298 @@ +#!/usr/bin/perl -- +# Copyright (C) 1993-1995 Ian Jackson. + +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# It is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with GNU Emacs; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +# (Note: I do not consider works produced using these BFNN processing +# tools to be derivative works of the tools, so they are NOT covered +# by the GPL. However, I would appreciate it if you credited me if +# appropriate in any documents you format using BFNN.) + +@outputs=('ascii','info','html'); + +while ($ARGV[0] =~ m/^\-/) { + $_= shift(@ARGV); + if (m/^-only/) { + @outputs= (shift(@ARGV)); + } else { + warn "unknown option `$_' ignored"; + } +} + +$prefix= $ARGV[0]; +$prefix= 'stdin' unless length($prefix); +$prefix =~ s/\.bfnn$//; + +if (open(O,"$prefix.xrefdb")) { + @xrefdb= ; + close(O); +} else { + warn "no $prefix.xrefdb ($!)"; +} + +$section= -1; +for $thisxr (@xrefdb) { + $_= $thisxr; + chop; + if (m/^Q (\w+) ((\d+)\.(\d+)) (.*)$/) { + $qrefn{$1}= $2; + $qreft{$1}= $5; + $qn2ref{$3,$4}= $1; + $maxsection= $3; + $maxquestion[$3]= $4; + } elsif (m/^S (\d+) /) { + $maxsection= $1; + $sn2title{$1}=$'; + } +} + +open(U,">$prefix.xrefdb-new"); + +for $x (@outputs) { require("m-$x.pl"); } + +&call('init'); + +while (<>) { + chop; + next if m/^\\comment\b/; + if (!m/\S/) { + &call('endpara'); + next; + } + if (s/^\\section +//) { + $line= $_; + $section++; $question=0; + print U "S $section $line\n"; + $|=1; print "S$section",' 'x10,"\r"; $|=0; + &call('endpara'); + &call('startmajorheading',"$section", + "Section $section", + $section<$maxsection ? "Section ".($section+1) : '', + $section>1 ? 'Section '.($section-1) : 'Top'); + &text($line); + &call('endmajorheading'); + if ($section) { + &call('endpara'); + &call('startindex'); + for $thisxr (@xrefdb) { + $_= $thisxr; + chop; + if (m/^Q (\w+) (\d+)\.(\d+) (.*)$/) { + $ref= $1; $num1= $2; $num2= $3; $text= $4; + next unless $num1 == $section; + &call('startindexitem',$ref,"Q$num1.$num2","Question $num1.$num2"); + &text($text); + &call('endindexitem'); + } + } + &call('endindex'); + } + } elsif (s/^\\question \d{2}[a-z]{3}((:\w+)?) +//) { + $line= $_; + $question++; + $qrefstring= $1; + $qrefstring= "q_${section}_$question" unless $qrefstring =~ s/^://; + print U "Q $qrefstring $section.$question $line\n"; + $|=1; print "Q$section.$question",' 'x10,"\r"; $|=0; + &call('endpara'); + &call('startminorheading',$qrefstring, + "Question $section.$question", + $question < $maxquestion[$section] ? "Question $section.".($question+1) : + $section < $maxsection ? "Question ".($section+1).".1" : '', + $question > 1 ? "Question $section.".($question-1) : + $section > 1 ? "Question ".($section-1).'.'.($maxquestion[$section-1]) : + 'Top', + "Section $section"); + &text("Question $section.$question. $line"); + &call('endminorheading'); + } elsif (s/^\\only +//) { + @saveoutputs= @outputs; + @outputs=(); + for $x (split(/\s+/,$_)) { + push(@outputs,$x) if grep($x eq $_, @saveoutputs); + } + } elsif (s/^\\endonly$//) { + @outputs= @saveoutputs; + } elsif (s/^\\copyto +//) { + $fh= $'; + while(<>) { + last if m/^\\endcopy$/; + while (s/^([^\`]*)\`//) { + print $fh $1; + m/([^\\])\`/ || warn "`$_'"; + $_= $'; + $cmd= $`.$1; + $it= `$cmd`; chop $it; + print $fh $it; + } + print $fh $_; + } + } elsif (m/\\index$/) { + &call('startindex'); + for $thisxr (@xrefdb) { + $_= $thisxr; + chop; + if (m/^Q (\w+) (\d+\.\d+) (.*)$/) { + $ref= $1; $num= $2; $text= $3; + &call('startindexitem',$ref,"Q$num","Question $num"); + &text($text); + &call('endindexitem'); + } elsif (m/^S (\d+) (.*)$/) { + $num= $1; $text= $2; + next unless $num; + &call('startindexmainitem',"s_$num", + "Section $num.","Section $num"); + &text($text); + &call('endindexitem'); + } else { + warn $_; + } + } + &call('endindex'); + } elsif (m/^\\call-(\w+) +(\w+)\s*(.*)$/) { + $fn= $1.'_'.$2; + eval { &$fn($3); }; + warn $@ if length($@); + } elsif (m/^\\call +(\w+)\s*(.*)$/) { + eval { &call($1,$2); }; + warn $@ if length($@); + } elsif (s/^\\set +(\w+)\s*//) { + $svalue= $'; $svari= $1; + eval("\$user_$svari=\$svalue"); $@ && warn "setting $svalue failed: $@\n"; + } elsif (m/^\\verbatim$/) { + &call('startverbatim'); + while (<>) { + chop; + last if m/^\\endverbatim$/; + &call('verbatim',$_); + } + &call('endverbatim'); + } else { + s/\.$/\. /; + &text($_." "); + } +} + +print ' 'x25,"\r"; +&call('finish'); +rename("$prefix.xrefdb-new","$prefix.xrefdb") || warn "rename xrefdb: $!"; +exit 0; + + +sub text { + local($in,$rhs,$word,$refn,$reft,$fn,$style); + $in= "$holdover$_[0]"; + $holdover= ''; + while ($in =~ m/\\/) { +#print STDERR ">$`##$'\n"; + $rhs=$'; + &call('text',$`); + $_= $rhs; + if (m/^\w+ $/) { + $holdover= "\\$&"; + $in= ''; + } elsif (s/^fn\s+([^\s\\]*\w)//) { + $in= $_; + $word= $1; + &call('courier'); + &call('text',$word); + &call('endcourier'); + } elsif (s/^tab\s+(\d+)\s+//) { + $in= $_; &call('tab',$1); + } elsif (s/^nl\s+//) { + $in= $_; &call('newline'); + } elsif (s/^qref\s+(\w+)//) { + $refn= $qrefn{$1}; + $reft= $qreft{$1}; + if (!length($refn)) { + warn "unknown question `$1'"; + } + $in= "$`\\pageref:$1:$refn:$reft\\endpageref.$_"; + } elsif (s/^pageref:(\w+):([^:\n]+)://) { + $in= $_; + &call('pageref',$1,$2); + } elsif (s/^endpageref\.//) { + $in= $_; &call('endpageref'); + } elsif (s/^(\w+)\{//) { + $in= $_; $fn= $1; + eval { &call("$fn"); }; + if (length($@)) { warn $@; $fn= 'x'; } + push(@styles,$fn); + } elsif (s/^\}//) { + $in= $_; + $fn= pop(@styles); + if ($fn ne 'x') { &call("end$fn"); } + } elsif (s/^\\//) { + $in= $_; + &call('text',"\\"); + } elsif (s,^(\w+)\s+([-A-Za-z0-9.\@:/]*\w),,) { +#print STDERR "**$&**$_\n"; + $in= $_; + $style=$1; $word= $2; + &call($style); + &call('text',$word); + &call("end$style"); + } else { + warn "unknown control `\\$_'"; + $in= $_; + } + } + &call('text',$in); +} + + +sub call { + local ($fnbase, @callargs) = @_; + local ($coutput); + for $coutput (@outputs) { + if ($fnbase eq 'text' && eval("\@${coutput}_cmds")) { +#print STDERR "special handling text (@callargs) for $coutput\n"; + $evstrg= "\$${coutput}_args[\$#${coutput}_args].=\"\@callargs\""; + eval($evstrg); + length($@) && warn "call adding for $coutput (($evstrg)): $@"; + } else { + $fntc= $coutput.'_'.$fnbase; + &$fntc(@callargs); + } + } +} + + +sub recurse { + local (@outputs) = $coutput; + local ($holdover); + &text($_[0]); +} + + +sub arg { +#print STDERR "arg($_[0]) from $coutput\n"; + $cmd= $_[0]; + eval("push(\@${coutput}_cmds,\$cmd); push(\@${coutput}_args,'')"); + length($@) && warn "arg setting up for $coutput: $@"; +} + +sub endarg { +#print STDERR "endarg($_[0]) from $coutput\n"; + $evstrg= "\$${coutput}_cmd= \$cmd= pop(\@${coutput}_cmds); ". + "\$${coutput}_arg= \$arg= pop(\@${coutput}_args); "; + eval($evstrg); + length($@) && warn "endarg extracting for $coutput (($evstrg)): $@"; +#print STDERR ">call $coutput $cmd $arg< (($evstrg))\n"; + $evstrg= "&${coutput}_do_${cmd}(\$arg)"; + eval($evstrg); + length($@) && warn "endarg running ${coutput}_do_${cmd} (($evstrg)): $@"; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/FAQ/fftw-faq.ascii --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/FAQ/fftw-faq.ascii Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,842 @@ + FFTW FREQUENTLY ASKED QUESTIONS WITH ANSWERS + 04 Mar 2014 + Matteo Frigo + Steven G. Johnson + + +This is the list of Frequently Asked Questions about FFTW, a collection of +fast C routines for computing the Discrete Fourier Transform in one or +more dimensions. + +=============================================================================== + +Index + + Section 1. Introduction and General Information + Q1.1 What is FFTW? + Q1.2 How do I obtain FFTW? + Q1.3 Is FFTW free software? + Q1.4 What is this about non-free licenses? + Q1.5 In the West? I thought MIT was in the East? + + Section 2. Installing FFTW + Q2.1 Which systems does FFTW run on? + Q2.2 Does FFTW run on Windows? + Q2.3 My compiler has trouble with FFTW. + Q2.4 FFTW does not compile on Solaris, complaining about const. + Q2.5 What's the difference between --enable-3dnow and --enable-k7? + Q2.6 What's the difference between the fma and the non-fma versions? + Q2.7 Which language is FFTW written in? + Q2.8 Can I call FFTW from Fortran? + Q2.9 Can I call FFTW from C++? + Q2.10 Why isn't FFTW written in Fortran/C++? + Q2.11 How do I compile FFTW to run in single precision? + Q2.12 --enable-k7 does not work on x86-64 + + Section 3. Using FFTW + Q3.1 Why not support the FFTW 2 interface in FFTW 3? + Q3.2 Why do FFTW 3 plans encapsulate the input/output arrays and not ju + Q3.3 FFTW seems really slow. + Q3.4 FFTW slows down after repeated calls. + Q3.5 An FFTW routine is crashing when I call it. + Q3.6 My Fortran program crashes when calling FFTW. + Q3.7 FFTW gives results different from my old FFT. + Q3.8 FFTW gives different results between runs + Q3.9 Can I save FFTW's plans? + Q3.10 Why does your inverse transform return a scaled result? + Q3.11 How can I make FFTW put the origin (zero frequency) at the center + Q3.12 How do I FFT an image/audio file in *foobar* format? + Q3.13 My program does not link (on Unix). + Q3.14 I included your header, but linking still fails. + Q3.15 My program crashes, complaining about stack space. + Q3.16 FFTW seems to have a memory leak. + Q3.17 The output of FFTW's transform is all zeros. + Q3.18 How do I call FFTW from the Microsoft language du jour? + Q3.19 Can I compute only a subset of the DFT outputs? + Q3.20 Can I use FFTW's routines for in-place and out-of-place matrix tra + + Section 4. Internals of FFTW + Q4.1 How does FFTW work? + Q4.2 Why is FFTW so fast? + + Section 5. Known bugs + Q5.1 FFTW 1.1 crashes in rfftwnd on Linux. + Q5.2 The MPI transforms in FFTW 1.2 give incorrect results/leak memory. + Q5.3 The test programs in FFTW 1.2.1 fail when I change FFTW to use sin + Q5.4 The test program in FFTW 1.2.1 fails for n > 46340. + Q5.5 The threaded code fails on Linux Redhat 5.0 + Q5.6 FFTW 2.0's rfftwnd fails for rank > 1 transforms with a final dime + Q5.7 FFTW 2.0's complex transforms give the wrong results with prime fa + Q5.8 FFTW 2.1.1's MPI test programs crash with MPICH. + Q5.9 FFTW 2.1.2's multi-threaded transforms don't work on AIX. + Q5.10 FFTW 2.1.2's complex transforms give incorrect results for large p + Q5.11 FFTW 2.1.3's multi-threaded transforms don't give any speedup on S + Q5.12 FFTW 2.1.3 crashes on AIX. + +=============================================================================== + +Section 1. Introduction and General Information + + Q1.1 What is FFTW? + Q1.2 How do I obtain FFTW? + Q1.3 Is FFTW free software? + Q1.4 What is this about non-free licenses? + Q1.5 In the West? I thought MIT was in the East? + +------------------------------------------------------------------------------- + +Question 1.1. What is FFTW? + +FFTW is a free collection of fast C routines for computing the Discrete +Fourier Transform in one or more dimensions. It includes complex, real, +symmetric, and parallel transforms, and can handle arbitrary array sizes +efficiently. FFTW is typically faster than other publically-available FFT +implementations, and is even competitive with vendor-tuned libraries. +(See our web page for extensive benchmarks.) To achieve this performance, +FFTW uses novel code-generation and runtime self-optimization techniques +(along with many other tricks). + +------------------------------------------------------------------------------- + +Question 1.2. How do I obtain FFTW? + +FFTW can be found at the FFTW web page. You can also retrieve it from +ftp.fftw.org in /pub/fftw. + +------------------------------------------------------------------------------- + +Question 1.3. Is FFTW free software? + +Starting with version 1.3, FFTW is Free Software in the technical sense +defined by the Free Software Foundation (see Categories of Free and +Non-Free Software), and is distributed under the terms of the GNU General +Public License. Previous versions of FFTW were distributed without fee +for noncommercial use, but were not technically ``free.'' + +Non-free licenses for FFTW are also available that permit different terms +of use than the GPL. + +------------------------------------------------------------------------------- + +Question 1.4. What is this about non-free licenses? + +The non-free licenses are for companies that wish to use FFTW in their +products but are unwilling to release their software under the GPL (which +would require them to release source code and allow free redistribution). +Such users can purchase an unlimited-use license from MIT. Contact us for +more details. + +We could instead have released FFTW under the LGPL, or even disallowed +non-Free usage. Suffice it to say, however, that MIT owns the copyright +to FFTW and they only let us GPL it because we convinced them that it +would neither affect their licensing revenue nor irritate existing +licensees. + +------------------------------------------------------------------------------- + +Question 1.5. In the West? I thought MIT was in the East? + +Not to an Italian. You could say that we're a Spaghetti Western (with +apologies to Sergio Leone). + +=============================================================================== + +Section 2. Installing FFTW + + Q2.1 Which systems does FFTW run on? + Q2.2 Does FFTW run on Windows? + Q2.3 My compiler has trouble with FFTW. + Q2.4 FFTW does not compile on Solaris, complaining about const. + Q2.5 What's the difference between --enable-3dnow and --enable-k7? + Q2.6 What's the difference between the fma and the non-fma versions? + Q2.7 Which language is FFTW written in? + Q2.8 Can I call FFTW from Fortran? + Q2.9 Can I call FFTW from C++? + Q2.10 Why isn't FFTW written in Fortran/C++? + Q2.11 How do I compile FFTW to run in single precision? + Q2.12 --enable-k7 does not work on x86-64 + +------------------------------------------------------------------------------- + +Question 2.1. Which systems does FFTW run on? + +FFTW is written in ANSI C, and should work on any system with a decent C +compiler. (See also Q2.2 `Does FFTW run on Windows?', Q2.3 `My compiler +has trouble with FFTW.'.) FFTW can also take advantage of certain +hardware-specific features, such as cycle counters and SIMD instructions, +but this is optional. + +------------------------------------------------------------------------------- + +Question 2.2. Does FFTW run on Windows? + +Yes, many people have reported successfully using FFTW on Windows with +various compilers. FFTW was not developed on Windows, but the source code +is essentially straight ANSI C. See also the FFTW Windows installation +notes, Q2.3 `My compiler has trouble with FFTW.', and Q3.18 `How do I call +FFTW from the Microsoft language du jour?'. + +------------------------------------------------------------------------------- + +Question 2.3. My compiler has trouble with FFTW. + +Complain fiercely to the vendor of the compiler. + +We have successfully used gcc 3.2.x on x86 and PPC, a recent Compaq C +compiler for Alpha, version 6 of IBM's xlc compiler for AIX, Intel's icc +versions 5-7, and Sun WorkShop cc version 6. + +FFTW is likely to push compilers to their limits, however, and several +compiler bugs have been exposed by FFTW. A partial list follows. + +gcc 2.95.x for Solaris/SPARC produces incorrect code for the test program +(workaround: recompile the libbench2 directory with -O2). + +NetBSD/macppc 1.6 comes with a gcc version that also miscompiles the test +program. (Please report a workaround if you know one.) + +gcc 3.2.3 for ARM reportedly crashes during compilation. This bug is +reportedly fixed in later versions of gcc. + +Versions 8.0 and 8.1 of Intel's icc falsely claim to be gcc, so you should +specify CC="icc -no-gcc"; this is automatic in FFTW 3.1. icc-8.0.066 +reportely produces incorrect code for FFTW 2.1.5, but is fixed in version +8.1. icc-7.1 compiler build 20030402Z appears to produce incorrect +dependencies, causing the compilation to fail. icc-7.1 build 20030307Z +appears to work fine. (Use icc -V to check which build you have.) As of +2003/04/18, build 20030402Z appears not to be available any longer on +Intel's website, whereas the older build 20030307Z is available. + +ranlib of GNU binutils 2.9.1 on Irix has been observed to corrupt the FFTW +libraries, causing a link failure when FFTW is compiled. Since ranlib is +completely superfluous on Irix, we suggest deleting it from your system +and replacing it with a symbolic link to /bin/echo. + +If support for SIMD instructions is enabled in FFTW, further compiler +problems may appear: + +gcc 3.4.[0123] for x86 produces incorrect SSE2 code for FFTW when -O2 (the +best choice for FFTW) is used, causing FFTW to crash (make check crashes). +This bug is fixed in gcc 3.4.4. On x86_64 (amd64/em64t), gcc 3.4.4 +reportedly still has a similar problem, but this is fixed as of gcc 3.4.6. + +gcc-3.2 for x86 produces incorrect SIMD code if -O3 is used. The same +compiler produces incorrect SIMD code if no optimization is used, too. +When using gcc-3.2, it is a good idea not to change the default CFLAGS +selected by the configure script. + +Some 3.0.x and 3.1.x versions of gcc on x86 may crash. gcc so-called 2.96 +shipping with RedHat 7.3 crashes when compiling SIMD code. In both cases, +please upgrade to gcc-3.2 or later. + +Intel's icc 6.0 misaligns SSE constants, but FFTW has a workaround. icc +8.x fails to compile FFTW 3.0.x because it falsely claims to be gcc; we +believe this to be a bug in icc, but FFTW 3.1 has a workaround. + +Visual C++ 2003 reportedly produces incorrect code for SSE/SSE2 when +compiling FFTW. This bug was reportedly fixed in VC++ 2005; +alternatively, you could switch to the Intel compiler. VC++ 6.0 also +reportedly produces incorrect code for the file reodft11e-r2hc-odd.c +unless optimizations are disabled for that file. + +gcc 2.95 on MacOS X miscompiles AltiVec code (fixed in later versions). +gcc 3.2.x miscompiles AltiVec permutations, but FFTW has a workaround. +gcc 4.0.1 on MacOS for Intel crashes when compiling FFTW; a workaround is +to compile one file without optimization: cd kernel; make CFLAGS=" " +trig.lo. + +gcc 4.1.1 reportedly crashes when compiling FFTW for MIPS; the workaround +is to compile the file it crashes on (t2_64.c) with a lower optimization +level. + +gcc versions 4.1.2 to 4.2.0 for x86 reportedly miscompile FFTW 3.1's test +program, causing make check to crash (gcc bug #26528). The bug was +reportedly fixed in gcc version 4.2.1 and later. A workaround is to +compile libbench2/verify-lib.c without optimization. + +------------------------------------------------------------------------------- + +Question 2.4. FFTW does not compile on Solaris, complaining about const. + +We know that at least on Solaris 2.5.x with Sun's compilers 4.2 you might +get error messages from make such as + +"./fftw.h", line 88: warning: const is a keyword in ANSI C + +This is the case when the configure script reports that const does not +work: + +checking for working const... (cached) no + +You should be aware that Solaris comes with two compilers, namely, +/opt/SUNWspro/SC4.2/bin/cc and /usr/ucb/cc. The latter compiler is +non-ANSI. Indeed, it is a perverse shell script that calls the real +compiler in non-ANSI mode. In order to compile FFTW, change your path so +that the right cc is used. + +To know whether your compiler is the right one, type cc -V. If the +compiler prints ``ucbcc'', as in + +ucbcc: WorkShop Compilers 4.2 30 Oct 1996 C 4.2 + +then the compiler is wrong. The right message is something like + +cc: WorkShop Compilers 4.2 30 Oct 1996 C 4.2 + +------------------------------------------------------------------------------- + +Question 2.5. What's the difference between --enable-3dnow and --enable-k7? + +--enable-k7 enables 3DNow! instructions on K7 processors (AMD Athlon and +its variants). K7 support is provided by assembly routines generated by a +special purpose compiler. As of fftw-3.2, --enable-k7 is no longer +supported. + +--enable-3dnow enables generic 3DNow! support using gcc builtin functions. +This works on earlier AMD processors, but it is not as fast as our special +assembly routines. As of fftw-3.1, --enable-3dnow is no longer supported. + +------------------------------------------------------------------------------- + +Question 2.6. What's the difference between the fma and the non-fma versions? + +The fma version tries to exploit the fused multiply-add instructions +implemented in many processors such as PowerPC, ia-64, and MIPS. The two +FFTW packages are otherwise identical. In FFTW 3.1, the fma and non-fma +versions were merged together into a single package, and the configure +script attempts to automatically guess which version to use. + +The FFTW 3.1 configure script enables fma by default on PowerPC, Itanium, +and PA-RISC, and disables it otherwise. You can force one or the other by +using the --enable-fma or --disable-fma flag for configure. + +Definitely use fma if you have a PowerPC-based system with gcc (or IBM +xlc). This includes all GNU/Linux systems for PowerPC and the older +PowerPC-based MacOS systems. Also use it on PA-RISC and Itanium with the +HP/UX compiler. + +Definitely do not use the fma version if you have an ia-32 processor +(Intel, AMD, MacOS on Intel, etcetera). + +For other architectures/compilers, the situation is not so clear. For +example, ia-64 has the fma instruction, but gcc-3.2 appears not to exploit +it correctly. Other compilers may do the right thing, but we have not +tried them. Please send us your feedback so that we can update this FAQ +entry. + +------------------------------------------------------------------------------- + +Question 2.7. Which language is FFTW written in? + +FFTW is written in ANSI C. Most of the code, however, was automatically +generated by a program called genfft, written in the Objective Caml +dialect of ML. You do not need to know ML or to have an Objective Caml +compiler in order to use FFTW. + +genfft is provided with the FFTW sources, which means that you can play +with the code generator if you want. In this case, you need a working +Objective Caml system. Objective Caml is available from the Caml web +page. + +------------------------------------------------------------------------------- + +Question 2.8. Can I call FFTW from Fortran? + +Yes, FFTW (versions 1.3 and higher) contains a Fortran-callable interface, +documented in the FFTW manual. + +By default, FFTW configures its Fortran interface to work with the first +compiler it finds, e.g. g77. To configure for a different, incompatible +Fortran compiler foobar, use ./configure F77=foobar when installing FFTW. +(In the case of g77, however, FFTW 3.x also includes an extra set of +Fortran-callable routines with one less underscore at the end of +identifiers, which should cover most other Fortran compilers on Linux at +least.) + +------------------------------------------------------------------------------- + +Question 2.9. Can I call FFTW from C++? + +Most definitely. FFTW should compile and/or link under any C++ compiler. +Moreover, it is likely that the C++ template class is +bit-compatible with FFTW's complex-number format (see the FFTW manual for +more details). + +------------------------------------------------------------------------------- + +Question 2.10. Why isn't FFTW written in Fortran/C++? + +Because we don't like those languages, and neither approaches the +portability of C. + +------------------------------------------------------------------------------- + +Question 2.11. How do I compile FFTW to run in single precision? + +On a Unix system: configure --enable-float. On a non-Unix system: edit +config.h to #define the symbol FFTW_SINGLE (for FFTW 3.x). In both cases, +you must then recompile FFTW. In FFTW 3, all FFTW identifiers will then +begin with fftwf_ instead of fftw_. + +------------------------------------------------------------------------------- + +Question 2.12. --enable-k7 does not work on x86-64 + +Support for --enable-k7 was discontinued in fftw-3.2. + +The fftw-3.1 release supports --enable-k7. This option only works on +32-bit x86 machines that implement 3DNow!, including the AMD Athlon and +the AMD Opteron in 32-bit mode. --enable-k7 does not work on AMD Opteron +in 64-bit mode. Use --enable-sse for x86-64 machines. + +FFTW supports 3DNow! by means of assembly code generated by a +special-purpose compiler. It is hard to produce assembly code that works +in both 32-bit and 64-bit mode. + +=============================================================================== + +Section 3. Using FFTW + + Q3.1 Why not support the FFTW 2 interface in FFTW 3? + Q3.2 Why do FFTW 3 plans encapsulate the input/output arrays and not ju + Q3.3 FFTW seems really slow. + Q3.4 FFTW slows down after repeated calls. + Q3.5 An FFTW routine is crashing when I call it. + Q3.6 My Fortran program crashes when calling FFTW. + Q3.7 FFTW gives results different from my old FFT. + Q3.8 FFTW gives different results between runs + Q3.9 Can I save FFTW's plans? + Q3.10 Why does your inverse transform return a scaled result? + Q3.11 How can I make FFTW put the origin (zero frequency) at the center + Q3.12 How do I FFT an image/audio file in *foobar* format? + Q3.13 My program does not link (on Unix). + Q3.14 I included your header, but linking still fails. + Q3.15 My program crashes, complaining about stack space. + Q3.16 FFTW seems to have a memory leak. + Q3.17 The output of FFTW's transform is all zeros. + Q3.18 How do I call FFTW from the Microsoft language du jour? + Q3.19 Can I compute only a subset of the DFT outputs? + Q3.20 Can I use FFTW's routines for in-place and out-of-place matrix tra + +------------------------------------------------------------------------------- + +Question 3.1. Why not support the FFTW 2 interface in FFTW 3? + +FFTW 3 has semantics incompatible with earlier versions: its plans can +only be used for a given stride, multiplicity, and other characteristics +of the input and output arrays; these stronger semantics are necessary for +performance reasons. Thus, it is impossible to efficiently emulate the +older interface (whose plans can be used for any transform of the same +size). We believe that it should be possible to upgrade most programs +without any difficulty, however. + +------------------------------------------------------------------------------- + +Question 3.2. Why do FFTW 3 plans encapsulate the input/output arrays and not just the algorithm? + +There are several reasons: + +* It was important for performance reasons that the plan be specific to + array characteristics like the stride (and alignment, for SIMD), and + requiring that the user maintain these invariants is error prone. +* In most high-performance applications, as far as we can tell, you are + usually transforming the same array over and over, so FFTW's semantics + should not be a burden. +* If you need to transform another array of the same size, creating a new + plan once the first exists is a cheap operation. +* If you need to transform many arrays of the same size at once, you + should really use the plan_many routines in FFTW's "advanced" interface. +* If the abovementioned array characteristics are the same, you are + willing to pay close attention to the documentation, and you really need + to, we provide a "new-array execution" interface to apply a plan to a + new array. + +------------------------------------------------------------------------------- + +Question 3.3. FFTW seems really slow. + +You are probably recreating the plan before every transform, rather than +creating it once and reusing it for all transforms of the same size. FFTW +is designed to be used in the following way: + +* First, you create a plan. This will take several seconds. +* Then, you reuse the plan many times to perform FFTs. These are fast. + +If you don't need to compute many transforms and the time for the planner +is significant, you have two options. First, you can use the +FFTW_ESTIMATE option in the planner, which uses heuristics instead of +runtime measurements and produces a good plan in a short time. Second, +you can use the wisdom feature to precompute the plan; see Q3.9 `Can I +save FFTW's plans?' + +------------------------------------------------------------------------------- + +Question 3.4. FFTW slows down after repeated calls. + +Probably, NaNs or similar are creeping into your data, and the slowdown is +due to the resulting floating-point exceptions. For example, be aware +that repeatedly FFTing the same array is a diverging process (because FFTW +computes the unnormalized transform). + +------------------------------------------------------------------------------- + +Question 3.5. An FFTW routine is crashing when I call it. + +Did the FFTW test programs pass (make check, or cd tests; make bigcheck if +you want to be paranoid)? If so, you almost certainly have a bug in your +own code. For example, you could be passing invalid arguments (such as +wrongly-sized arrays) to FFTW, or you could simply have memory corruption +elsewhere in your program that causes random crashes later on. Please +don't complain to us unless you can come up with a minimal self-contained +program (preferably under 30 lines) that illustrates the problem. + +------------------------------------------------------------------------------- + +Question 3.6. My Fortran program crashes when calling FFTW. + +As described in the manual, on 64-bit machines you must store the plans in +variables large enough to hold a pointer, for example integer*8. We +recommend using integer*8 on 32-bit machines as well, to simplify porting. + +------------------------------------------------------------------------------- + +Question 3.7. FFTW gives results different from my old FFT. + +People follow many different conventions for the DFT, and you should be +sure to know the ones that we use (described in the FFTW manual). In +particular, you should be aware that the FFTW_FORWARD/FFTW_BACKWARD +directions correspond to signs of -1/+1 in the exponent of the DFT +definition. (*Numerical Recipes* uses the opposite convention.) + +You should also know that we compute an unnormalized transform. In +contrast, Matlab is an example of program that computes a normalized +transform. See Q3.10 `Why does your inverse transform return a scaled +result?'. + +Finally, note that floating-point arithmetic is not exact, so different +FFT algorithms will give slightly different results (on the order of the +numerical accuracy; typically a fractional difference of 1e-15 or so in +double precision). + +------------------------------------------------------------------------------- + +Question 3.8. FFTW gives different results between runs + +If you use FFTW_MEASURE or FFTW_PATIENT mode, then the algorithm FFTW +employs is not deterministic: it depends on runtime performance +measurements. This will cause the results to vary slightly from run to +run. However, the differences should be slight, on the order of the +floating-point precision, and therefore should have no practical impact on +most applications. + +If you use saved plans (wisdom) or FFTW_ESTIMATE mode, however, then the +algorithm is deterministic and the results should be identical between +runs. + +------------------------------------------------------------------------------- + +Question 3.9. Can I save FFTW's plans? + +Yes. Starting with version 1.2, FFTW provides the wisdom mechanism for +saving plans; see the FFTW manual. + +------------------------------------------------------------------------------- + +Question 3.10. Why does your inverse transform return a scaled result? + +Computing the forward transform followed by the backward transform (or +vice versa) yields the original array scaled by the size of the array. +(For multi-dimensional transforms, the size of the array is the product of +the dimensions.) We could, instead, have chosen a normalization that +would have returned the unscaled array. Or, to accomodate the many +conventions in this matter, the transform routines could have accepted a +"scale factor" parameter. We did not do this, however, for two reasons. +First, we didn't want to sacrifice performance in the common case where +the scale factor is 1. Second, in real applications the FFT is followed or +preceded by some computation on the data, into which the scale factor can +typically be absorbed at little or no cost. + +------------------------------------------------------------------------------- + +Question 3.11. How can I make FFTW put the origin (zero frequency) at the center of its output? + +For human viewing of a spectrum, it is often convenient to put the origin +in frequency space at the center of the output array, rather than in the +zero-th element (the default in FFTW). If all of the dimensions of your +array are even, you can accomplish this by simply multiplying each element +of the input array by (-1)^(i + j + ...), where i, j, etcetera are the +indices of the element. (This trick is a general property of the DFT, and +is not specific to FFTW.) + +------------------------------------------------------------------------------- + +Question 3.12. How do I FFT an image/audio file in *foobar* format? + +FFTW performs an FFT on an array of floating-point values. You can +certainly use it to compute the transform of an image or audio stream, but +you are responsible for figuring out your data format and converting it to +the form FFTW requires. + +------------------------------------------------------------------------------- + +Question 3.13. My program does not link (on Unix). + +The libraries must be listed in the correct order (-lfftw3 -lm for FFTW +3.x) and *after* your program sources/objects. (The general rule is that +if *A* uses *B*, then *A* must be listed before *B* in the link command.). + +------------------------------------------------------------------------------- + +Question 3.14. I included your header, but linking still fails. + +You're a C++ programmer, aren't you? You have to compile the FFTW library +and link it into your program, not just #include . (Yes, this is +really a FAQ.) + +------------------------------------------------------------------------------- + +Question 3.15. My program crashes, complaining about stack space. + +You cannot declare large arrays with automatic storage (e.g. via +fftw_complex array[N]); you should use fftw_malloc (or equivalent) to +allocate the arrays you want to transform if they are larger than a few +hundred elements. + +------------------------------------------------------------------------------- + +Question 3.16. FFTW seems to have a memory leak. + +After you create a plan, FFTW caches the information required to quickly +recreate the plan. (See Q3.9 `Can I save FFTW's plans?') It also +maintains a small amount of other persistent memory. You can deallocate +all of FFTW's internally allocated memory, if you wish, by calling +fftw_cleanup(), as documented in the manual. + +------------------------------------------------------------------------------- + +Question 3.17. The output of FFTW's transform is all zeros. + +You should initialize your input array *after* creating the plan, unless +you use FFTW_ESTIMATE: planning with FFTW_MEASURE or FFTW_PATIENT +overwrites the input/output arrays, as described in the manual. + +------------------------------------------------------------------------------- + +Question 3.18. How do I call FFTW from the Microsoft language du jour? + +Please *do not* ask us Windows-specific questions. We do not use Windows. +We know nothing about Visual Basic, Visual C++, or .NET. Please find the +appropriate Usenet discussion group and ask your question there. See also +Q2.2 `Does FFTW run on Windows?'. + +------------------------------------------------------------------------------- + +Question 3.19. Can I compute only a subset of the DFT outputs? + +In general, no, an FFT intrinsically computes all outputs from all inputs. +In principle, there is something called a *pruned FFT* that can do what +you want, but to compute K outputs out of N the complexity is in general +O(N log K) instead of O(N log N), thus saving only a small additive factor +in the log. (The same argument holds if you instead have only K nonzero +inputs.) + +There are some specific cases in which you can get the O(N log K) +performance benefits easily, however, by combining a few ordinary FFTs. +In particular, the case where you want the first K outputs, where K +divides N, can be handled by performing N/K transforms of size K and then +summing the outputs multiplied by appropriate phase factors. For more +details, see pruned FFTs with FFTW. + +There are also some algorithms that compute pruned transforms +*approximately*, but they are beyond the scope of this FAQ. + +------------------------------------------------------------------------------- + +Question 3.20. Can I use FFTW's routines for in-place and out-of-place matrix transposition? + +You can use the FFTW guru interface to create a rank-0 transform of vector +rank 2 where the vector strides are transposed. (A rank-0 transform is +equivalent to a 1D transform of size 1, which. just copies the input into +the output.) Specifying the same location for the input and output makes +the transpose in-place. + +For double-valued data stored in row-major format, plan creation looks +like this: + +fftw_plan plan_transpose(int rows, int cols, double *in, double *out) +{ + const unsigned flags = FFTW_ESTIMATE; /* other flags are possible */ + fftw_iodim howmany_dims[2]; + + howmany_dims[0].n = rows; + howmany_dims[0].is = cols; + howmany_dims[0].os = 1; + + howmany_dims[1].n = cols; + howmany_dims[1].is = 1; + howmany_dims[1].os = rows; + + return fftw_plan_guru_r2r(/*rank=*/ 0, /*dims=*/ NULL, + /*howmany_rank=*/ 2, howmany_dims, + in, out, /*kind=*/ NULL, flags); +} +(This entry was written by Rhys Ulerich.) + +=============================================================================== + +Section 4. Internals of FFTW + + Q4.1 How does FFTW work? + Q4.2 Why is FFTW so fast? + +------------------------------------------------------------------------------- + +Question 4.1. How does FFTW work? + +The innovation (if it can be so called) in FFTW consists in having a +variety of composable *solvers*, representing different FFT algorithms and +implementation strategies, whose combination into a particular *plan* for +a given size can be determined at runtime according to the characteristics +of your machine/compiler. This peculiar software architecture allows FFTW +to adapt itself to almost any machine. + +For more details (albeit somewhat outdated), see the paper "FFTW: An +Adaptive Software Architecture for the FFT", by M. Frigo and S. G. +Johnson, *Proc. ICASSP* 3, 1381 (1998), also available at the FFTW web +page. + +------------------------------------------------------------------------------- + +Question 4.2. Why is FFTW so fast? + +This is a complex question, and there is no simple answer. In fact, the +authors do not fully know the answer, either. In addition to many small +performance hacks throughout FFTW, there are three general reasons for +FFTW's speed. + +* FFTW uses a variety of FFT algorithms and implementation styles that + can be arbitrarily composed to adapt itself to a machine. See Q4.1 `How + does FFTW work?'. +* FFTW uses a code generator to produce highly-optimized routines for + computing small transforms. +* FFTW uses explicit divide-and-conquer to take advantage of the memory + hierarchy. + +For more details (albeit somewhat outdated), see the paper "FFTW: An +Adaptive Software Architecture for the FFT", by M. Frigo and S. G. +Johnson, *Proc. ICASSP* 3, 1381 (1998), available along with other +references at the FFTW web page. + +=============================================================================== + +Section 5. Known bugs + + Q5.1 FFTW 1.1 crashes in rfftwnd on Linux. + Q5.2 The MPI transforms in FFTW 1.2 give incorrect results/leak memory. + Q5.3 The test programs in FFTW 1.2.1 fail when I change FFTW to use sin + Q5.4 The test program in FFTW 1.2.1 fails for n > 46340. + Q5.5 The threaded code fails on Linux Redhat 5.0 + Q5.6 FFTW 2.0's rfftwnd fails for rank > 1 transforms with a final dime + Q5.7 FFTW 2.0's complex transforms give the wrong results with prime fa + Q5.8 FFTW 2.1.1's MPI test programs crash with MPICH. + Q5.9 FFTW 2.1.2's multi-threaded transforms don't work on AIX. + Q5.10 FFTW 2.1.2's complex transforms give incorrect results for large p + Q5.11 FFTW 2.1.3's multi-threaded transforms don't give any speedup on S + Q5.12 FFTW 2.1.3 crashes on AIX. + +------------------------------------------------------------------------------- + +Question 5.1. FFTW 1.1 crashes in rfftwnd on Linux. + +This bug was fixed in FFTW 1.2. There was a bug in rfftwnd causing an +incorrect amount of memory to be allocated. The bug showed up in Linux +with libc-5.3.12 (and nowhere else that we know of). + +------------------------------------------------------------------------------- + +Question 5.2. The MPI transforms in FFTW 1.2 give incorrect results/leak memory. + +These bugs were corrected in FFTW 1.2.1. The MPI transforms (really, just +the transpose routines) in FFTW 1.2 had bugs that could cause errors in +some situations. + +------------------------------------------------------------------------------- + +Question 5.3. The test programs in FFTW 1.2.1 fail when I change FFTW to use single precision. + +This bug was fixed in FFTW 1.3. (Older versions of FFTW did work in +single precision, but the test programs didn't--the error tolerances in +the tests were set for double precision.) + +------------------------------------------------------------------------------- + +Question 5.4. The test program in FFTW 1.2.1 fails for n > 46340. + +This bug was fixed in FFTW 1.3. FFTW 1.2.1 produced the right answer, but +the test program was wrong. For large n, n*n in the naive transform that +we used for comparison overflows 32 bit integer precision, breaking the +test. + +------------------------------------------------------------------------------- + +Question 5.5. The threaded code fails on Linux Redhat 5.0 + +We had problems with glibc-2.0.5. The code should work with glibc-2.0.7. + +------------------------------------------------------------------------------- + +Question 5.6. FFTW 2.0's rfftwnd fails for rank > 1 transforms with a final dimension >= 65536. + +This bug was fixed in FFTW 2.0.1. (There was a 32-bit integer overflow +due to a poorly-parenthesized expression.) + +------------------------------------------------------------------------------- + +Question 5.7. FFTW 2.0's complex transforms give the wrong results with prime factors 17 to 97. + +There was a bug in the complex transforms that could cause incorrect +results under (hopefully rare) circumstances for lengths with +intermediate-size prime factors (17-97). This bug was fixed in FFTW +2.1.1. + +------------------------------------------------------------------------------- + +Question 5.8. FFTW 2.1.1's MPI test programs crash with MPICH. + +This bug was fixed in FFTW 2.1.2. The 2.1/2.1.1 MPI test programs crashed +when using the MPICH implementation of MPI with the ch_p4 device (TCP/IP); +the transforms themselves worked fine. + +------------------------------------------------------------------------------- + +Question 5.9. FFTW 2.1.2's multi-threaded transforms don't work on AIX. + +This bug was fixed in FFTW 2.1.3. The multi-threaded transforms in +previous versions didn't work with AIX's pthreads implementation, which +idiosyncratically creates threads in detached (non-joinable) mode by +default. + +------------------------------------------------------------------------------- + +Question 5.10. FFTW 2.1.2's complex transforms give incorrect results for large prime sizes. + +This bug was fixed in FFTW 2.1.3. FFTW's complex-transform algorithm for +prime sizes (in versions 2.0 to 2.1.2) had an integer overflow problem +that caused incorrect results for many primes greater than 32768 (on +32-bit machines). (Sizes without large prime factors are not affected.) + +------------------------------------------------------------------------------- + +Question 5.11. FFTW 2.1.3's multi-threaded transforms don't give any speedup on Solaris. + +This bug was fixed in FFTW 2.1.4. (By default, Solaris creates threads +that do not parallelize over multiple processors, so one has to request +the proper behavior specifically.) + +------------------------------------------------------------------------------- + +Question 5.12. FFTW 2.1.3 crashes on AIX. + +The FFTW 2.1.3 configure script picked incorrect compiler flags for the +xlc compiler on newer IBM processors. This is fixed in FFTW 2.1.4. + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/FAQ/fftw-faq.bfnn --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/FAQ/fftw-faq.bfnn Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,708 @@ +\comment This is the source for the FFTW FAQ list, in +\comment the Bizarre Format With No Name. It is turned into Lout +\comment input, HTML, plain ASCII and an Info document by a Perl script. +\comment +\comment The format and scripts come from the Linux FAQ, by +\comment Ian Jackson. +\set brieftitle FFTW FAQ +\set author Matteo Frigo and Steven G. Johnson / fftw@fftw.org +\set authormail fftw@fftw.org +\set title FFTW Frequently Asked Questions with Answers +\set copyholder Matteo Frigo and Massachusetts Institute of Technology +\call-html startup html.refs2 +\copyto ASCII + FFTW FREQUENTLY ASKED QUESTIONS WITH ANSWERS + `date '+%d %h %Y'` + Matteo Frigo + Steven G. Johnson + + +\endcopy +\copyto INFO +INFO-DIR-SECTION Development +START-INFO-DIR-ENTRY +* FFTW FAQ: (fftw-faq). FFTW Frequently Asked Questions with Answers. +END-INFO-DIR-ENTRY + + +File: $prefix.info, Node: Top, Next: Question 1.1, Up: (dir) + + FFTW FREQUENTLY ASKED QUESTIONS WITH ANSWERS + `date '+%d %h %Y'` + Matteo Frigo + Steven G. Johnson + + +\endcopy + +This is the list of Frequently Asked Questions about FFTW, a +collection of fast C routines for computing the Discrete Fourier +Transform in one or more dimensions. + +\section Index + +\index + +\comment ###################################################################### + +\section Introduction and General Information + +\question 26aug:whatisfftw What is FFTW? + +FFTW is a free collection of fast C routines for computing the +Discrete Fourier Transform in one or more dimensions. It includes +complex, real, symmetric, and parallel transforms, and can handle +arbitrary array sizes efficiently. FFTW is typically faster than +other publically-available FFT implementations, and is even +competitive with vendor-tuned libraries. (See our web page for +extensive benchmarks.) To achieve this performance, FFTW uses novel +code-generation and runtime self-optimization techniques (along with +many other tricks). + +\question 26aug:whereisfftw How do I obtain FFTW? + +FFTW can be found at \docref{the FFTW web page\}. You can also +retrieve it from \ftpon ftp.fftw.org in \ftpin /pub/fftw. + +\question 26aug:isfftwfree Is FFTW free software? + +Starting with version 1.3, FFTW is Free Software in the technical +sense defined by the Free Software Foundation (see \docref{Categories +of Free and Non-Free Software\}), and is distributed under the terms +of the GNU General Public License. Previous versions of FFTW were +distributed without fee for noncommercial use, but were not +technically ``free.'' + +Non-free licenses for FFTW are also available that permit different +terms of use than the GPL. + +\question 10apr:nonfree What is this about non-free licenses? + +The non-free licenses are for companies that wish to use FFTW in their +products but are unwilling to release their software under the GPL +(which would require them to release source code and allow free +redistribution). Such users can purchase an unlimited-use license +from MIT. Contact us for more details. + +We could instead have released FFTW under the LGPL, or even disallowed +non-Free usage. Suffice it to say, however, that MIT owns the +copyright to FFTW and they only let us GPL it because we convinced +them that it would neither affect their licensing revenue nor irritate +existing licensees. + +\question 24oct:west In the West? I thought MIT was in the East? + +Not to an Italian. You could say that we're a Spaghetti Western +(with apologies to Sergio Leone). + +\comment ###################################################################### + +\section Installing FFTW + +\question 26aug:systems Which systems does FFTW run on? + +FFTW is written in ANSI C, and should work on any system with a decent +C compiler. (See also \qref runOnWindows, \qref compilerCrashes.) +FFTW can also take advantage of certain hardware-specific features, +such as cycle counters and SIMD instructions, but this is optional. + +\question 26aug:runOnWindows Does FFTW run on Windows? + +Yes, many people have reported successfully using FFTW on Windows with +various compilers. FFTW was not developed on Windows, but the source +code is essentially straight ANSI C. See also the \docref{FFTW +Windows installation notes\}, \qref compilerCrashes, and \qref +vbetalia. + +\question 26aug:compilerCrashes My compiler has trouble with FFTW. + +Complain fiercely to the vendor of the compiler. + +We have successfully used \courier{gcc\} 3.2.x on x86 and PPC, a +recent Compaq C compiler for Alpha, version 6 of IBM's \courier{xlc\} +compiler for AIX, Intel's \courier{icc\} versions 5-7, and Sun +WorkShop \courier{cc\} version 6. + +FFTW is likely to push compilers to their limits, however, and several +compiler bugs have been exposed by FFTW. A partial list follows. + +\courier{gcc\} 2.95.x for Solaris/SPARC produces incorrect code for +the test program (workaround: recompile the \courier{libbench2\} +directory with \courier{-O2\}). + +NetBSD/macppc 1.6 comes with a \courier{gcc\} version that also +miscompiles the test program. (Please report a workaround if you know +one.) + +\courier{gcc\} 3.2.3 for ARM reportedly crashes during compilation. +This bug is reportedly fixed in later versions of \courier{gcc\}. + +Versions 8.0 and 8.1 of Intel's \courier{icc\} falsely claim to be +\courier{gcc\}, so you should specify \courier{CC="icc -no-gcc"\}; +this is automatic in FFTW 3.1. \courier{icc-8.0.066\} reportely +produces incorrect code for FFTW 2.1.5, but is fixed in version 8.1. +\courier{icc-7.1\} compiler build 20030402Z appears to produce +incorrect dependencies, causing the compilation to fail. +\courier{icc-7.1\} build 20030307Z appears to work fine. (Use +\courier{icc -V\} to check which build you have.) As of 2003/04/18, +build 20030402Z appears not to be available any longer on Intel's +website, whereas the older build 20030307Z is available. + +\courier{ranlib\} of GNU \courier{binutils\} 2.9.1 on Irix has been +observed to corrupt the FFTW libraries, causing a link failure when +FFTW is compiled. Since \courier{ranlib\} is completely superfluous +on Irix, we suggest deleting it from your system and replacing it with +a symbolic link to \courier{/bin/echo\}. + +If support for SIMD instructions is enabled in FFTW, further compiler +problems may appear: + +\courier{gcc\} 3.4.[0123] for x86 produces incorrect SSE2 code for +FFTW when \courier{-O2\} (the best choice for FFTW) is used, causing +FFTW to crash (\courier{make check\} crashes). This bug is fixed in +\courier{gcc\} 3.4.4. On x86_64 (amd64/em64t), \courier{gcc\} 3.4.4 +reportedly still has a similar problem, but this is fixed as of +\courier{gcc\} 3.4.6. + +\courier{gcc-3.2\} for x86 produces incorrect SIMD code if +\courier{-O3\} is used. The same compiler produces incorrect SIMD +code if no optimization is used, too. When using \courier{gcc-3.2\}, +it is a good idea not to change the default \courier{CFLAGS\} selected +by the \courier{configure\} script. + +Some 3.0.x and 3.1.x versions of \courier{gcc\} on \courier{x86\} may +crash. \courier{gcc\} so-called 2.96 shipping with RedHat 7.3 crashes +when compiling SIMD code. In both cases, please upgrade to +\courier{gcc-3.2\} or later. + +Intel's \courier{icc\} 6.0 misaligns SSE constants, but FFTW has a +workaround. \courier{icc\} 8.x fails to compile FFTW 3.0.x because it +falsely claims to be \courier{gcc\}; we believe this to be a bug in +\courier{icc\}, but FFTW 3.1 has a workaround. + +Visual C++ 2003 reportedly produces incorrect code for SSE/SSE2 when +compiling FFTW. This bug was reportedly fixed in VC++ 2005; +alternatively, you could switch to the Intel compiler. VC++ 6.0 also +reportedly produces incorrect code for the file +\courier{reodft11e-r2hc-odd.c\} unless optimizations are disabled for +that file. + +\courier{gcc\} 2.95 on MacOS X miscompiles AltiVec code (fixed in +later versions). \courier{gcc\} 3.2.x miscompiles AltiVec +permutations, but FFTW has a workaround. \courier{gcc\} 4.0.1 on +MacOS for Intel crashes when compiling FFTW; a workaround is to +compile one file without optimization: \courier{cd kernel; make +CFLAGS=" " trig.lo\}. + +\courier{gcc\} 4.1.1 reportedly crashes when compiling FFTW for MIPS; +the workaround is to compile the file it crashes on +(\courier{t2_64.c\}) with a lower optimization level. + +\courier{gcc\} versions 4.1.2 to 4.2.0 for x86 reportedly miscompile +FFTW 3.1's test program, causing \courier{make check\} to crash +(\courier{gcc\} bug #26528). The bug was reportedly fixed in +\courier{gcc\} version 4.2.1 and later. A workaround is to compile +\courier{libbench2/verify-lib.c\} without optimization. + +\question 26aug:solarisSucks FFTW does not compile on Solaris, complaining about \courier{const\}. + +We know that at least on Solaris 2.5.x with Sun's compilers 4.2 you +might get error messages from \courier{make\} such as + +\courier{"./fftw.h", line 88: warning: const is a keyword in ANSI C\} + +This is the case when the \courier{configure\} script reports that +\courier{const\} does not work: + +\courier{checking for working const... (cached) no\} + +You should be aware that Solaris comes with two compilers, namely, +\courier{/opt/SUNWspro/SC4.2/bin/cc\} and \courier{/usr/ucb/cc\}. The +latter compiler is non-ANSI. Indeed, it is a perverse shell script +that calls the real compiler in non-ANSI mode. In order +to compile FFTW, change your path so that the right \courier{cc\} +is used. + +To know whether your compiler is the right one, type +\courier{cc -V\}. If the compiler prints ``\courier{ucbcc\}'', +as in + +\courier{ucbcc: WorkShop Compilers 4.2 30 Oct 1996 C 4.2\} + +then the compiler is wrong. The right message is something like + +\courier{cc: WorkShop Compilers 4.2 30 Oct 1996 C 4.2\} + +\question 19mar:3dnow What's the difference between \courier{--enable-3dnow\} and \courier{--enable-k7\}? + +\courier{--enable-k7\} enables 3DNow! instructions on K7 processors +(AMD Athlon and its variants). K7 support is provided by assembly +routines generated by a special purpose compiler. +As of fftw-3.2, --enable-k7 is no longer supported. + +\courier{--enable-3dnow\} enables generic 3DNow! support using +\courier{gcc\} builtin functions. This works on earlier AMD +processors, but it is not as fast as our special assembly routines. +As of fftw-3.1, --enable-3dnow is no longer supported. + +\question 18apr:fma What's the difference between the fma and the non-fma versions? + +The fma version tries to exploit the fused multiply-add instructions +implemented in many processors such as PowerPC, ia-64, and MIPS. The +two FFTW packages are otherwise identical. In FFTW 3.1, the fma and +non-fma versions were merged together into a single package, and the +\courier{configure\} script attempts to automatically guess which +version to use. + +The FFTW 3.1 \courier{configure\} script enables fma by default on +PowerPC, Itanium, and PA-RISC, and disables it otherwise. You can +force one or the other by using the \courier{--enable-fma\} or +\courier{--disable-fma\} flag for \courier{configure\}. + +Definitely use fma if you have a PowerPC-based system with +\courier{gcc\} (or IBM \courier{xlc\}). This includes all GNU/Linux +systems for PowerPC and the older PowerPC-based MacOS systems. Also +use it on PA-RISC and Itanium with the HP/UX compiler. + +Definitely do not use the fma version if you have an ia-32 processor +(Intel, AMD, MacOS on Intel, etcetera). + +For other architectures/compilers, the situation is not so clear. For +example, ia-64 has the fma instruction, but \courier{gcc-3.2\} appears +not to exploit it correctly. Other compilers may do the right thing, +but we have not tried them. Please send us your feedback so that we +can update this FAQ entry. + +\question 26aug:languages Which language is FFTW written in? + +FFTW is written in ANSI C. Most of the code, however, was +automatically generated by a program called \courier{genfft\}, written +in the Objective Caml dialect of ML. You do not need to know ML or to +have an Objective Caml compiler in order to use FFTW. + +\courier{genfft\} is provided with the FFTW sources, which means that +you can play with the code generator if you want. In this case, you +need a working Objective Caml system. Objective Caml is available +from \docref{the Caml web page\}. + +\question 26aug:fortran Can I call FFTW from Fortran? + +Yes, FFTW (versions 1.3 and higher) contains a Fortran-callable +interface, documented in the FFTW manual. + +By default, FFTW configures its Fortran interface to work with the +first compiler it finds, e.g. \courier{g77\}. To configure for a +different, incompatible Fortran compiler \courier{foobar\}, use +\courier{./configure F77=foobar\} when installing FFTW. (In the case +of \courier{g77\}, however, FFTW 3.x also includes an extra set of +Fortran-callable routines with one less underscore at the end of +identifiers, which should cover most other Fortran compilers on Linux +at least.) + +\question 26aug:cplusplus Can I call FFTW from C++? + +Most definitely. FFTW should compile and/or link under any C++ +compiler. Moreover, it is likely that the C++ \courier{\} +template class is bit-compatible with FFTW's complex-number format +(see the FFTW manual for more details). + +\question 26aug:whynotfortran Why isn't FFTW written in Fortran/C++? + +Because we don't like those languages, and neither approaches the +portability of C. + +\question 29mar:singleprec How do I compile FFTW to run in single precision? + +On a Unix system: \courier{configure --enable-float\}. On a non-Unix +system: edit \courier{config.h\} to \courier{#define\} the symbol +\courier{FFTW_SINGLE\} (for FFTW 3.x). In both cases, you must then +recompile FFTW. In FFTW 3, all FFTW identifiers will then begin with +\courier{fftwf_\} instead of \courier{fftw_\}. + +\question 28mar:64bitk7 --enable-k7 does not work on x86-64 + +Support for --enable-k7 was discontinued in fftw-3.2. + +The fftw-3.1 release supports --enable-k7. This option only works on +32-bit x86 machines that implement 3DNow!, including the AMD Athlon +and the AMD Opteron in 32-bit mode. --enable-k7 does not work on AMD +Opteron in 64-bit mode. Use --enable-sse for x86-64 machines. + +FFTW supports 3DNow! by means of assembly code generated by a +special-purpose compiler. It is hard to produce assembly code that +works in both 32-bit and 64-bit mode. + +\comment ###################################################################### + +\section Using FFTW + +\question 15mar:fftw2to3 Why not support the FFTW 2 interface in FFTW 3? + +FFTW 3 has semantics incompatible with earlier versions: its plans can +only be used for a given stride, multiplicity, and other +characteristics of the input and output arrays; these stronger +semantics are necessary for performance reasons. Thus, it is +impossible to efficiently emulate the older interface (whose plans can +be used for any transform of the same size). We believe that it +should be possible to upgrade most programs without any difficulty, +however. + +\question 20mar:planperarray Why do FFTW 3 plans encapsulate the input/output arrays and not just the algorithm? + +There are several reasons: + +\call startlist +\call item +It was important for performance reasons that the plan be specific to +array characteristics like the stride (and alignment, for SIMD), and +requiring that the user maintain these invariants is error prone. +\call item +In most high-performance applications, as far as we can tell, you are +usually transforming the same array over and over, so FFTW's semantics +should not be a burden. +\call item +If you need to transform another array of the same size, creating a +new plan once the first exists is a cheap operation. +\call item +If you need to transform many arrays of the same size at once, you +should really use the \courier{plan_many\} routines in FFTW's "advanced" +interface. +\call item +If the abovementioned array characteristics are the same, you are +willing to pay close attention to the documentation, and you really +need to, we provide a "new-array execution" interface to apply a plan +to a new array. +\call endlist + +\question 25may:slow FFTW seems really slow. + +You are probably recreating the plan before every transform, rather +than creating it once and reusing it for all transforms of the same +size. FFTW is designed to be used in the following way: + +\call startlist +\call item +First, you create a plan. This will take several seconds. +\call item +Then, you reuse the plan many times to perform FFTs. These are fast. +\call endlist + +If you don't need to compute many transforms and the time for the +planner is significant, you have two options. First, you can use the +\courier{FFTW_ESTIMATE\} option in the planner, which uses heuristics +instead of runtime measurements and produces a good plan in a short +time. Second, you can use the wisdom feature to precompute the plan; +see \qref savePlans + +\question 22oct:slows FFTW slows down after repeated calls. + +Probably, NaNs or similar are creeping into your data, and the +slowdown is due to the resulting floating-point exceptions. For +example, be aware that repeatedly FFTing the same array is a diverging +process (because FFTW computes the unnormalized transform). + +\question 22oct:segfault An FFTW routine is crashing when I call it. + +Did the FFTW test programs pass (\courier{make check\}, or \courier{cd +tests; make bigcheck\} if you want to be paranoid)? If so, you almost +certainly have a bug in your own code. For example, you could be +passing invalid arguments (such as wrongly-sized arrays) to FFTW, or +you could simply have memory corruption elsewhere in your program that +causes random crashes later on. Please don't complain to us unless +you can come up with a minimal self-contained program (preferably +under 30 lines) that illustrates the problem. + +\question 22oct:fortran64 My Fortran program crashes when calling FFTW. + +As described in the manual, on 64-bit machines you must store the +plans in variables large enough to hold a pointer, for example +\courier{integer*8\}. We recommend using \courier{integer*8\} on +32-bit machines as well, to simplify porting. + +\question 24mar:conventions FFTW gives results different from my old FFT. + +People follow many different conventions for the DFT, and you should +be sure to know the ones that we use (described in the FFTW manual). +In particular, you should be aware that the +\courier{FFTW_FORWARD\}/\courier{FFTW_BACKWARD\} directions correspond +to signs of -1/+1 in the exponent of the DFT definition. +(\italic{Numerical Recipes\} uses the opposite convention.) + +You should also know that we compute an unnormalized transform. In +contrast, Matlab is an example of program that computes a normalized +transform. See \qref whyscaled. + +Finally, note that floating-point arithmetic is not exact, so +different FFT algorithms will give slightly different results (on the +order of the numerical accuracy; typically a fractional difference of +1e-15 or so in double precision). + +\question 31aug:nondeterministic FFTW gives different results between runs + +If you use \courier{FFTW_MEASURE\} or \courier{FFTW_PATIENT\} mode, +then the algorithm FFTW employs is not deterministic: it depends on +runtime performance measurements. This will cause the results to vary +slightly from run to run. However, the differences should be slight, +on the order of the floating-point precision, and therefore should +have no practical impact on most applications. + +If you use saved plans (wisdom) or \courier{FFTW_ESTIMATE\} mode, +however, then the algorithm is deterministic and the results should be +identical between runs. + +\question 26aug:savePlans Can I save FFTW's plans? + +Yes. Starting with version 1.2, FFTW provides the \courier{wisdom\} +mechanism for saving plans; see the FFTW manual. + +\question 14sep:whyscaled Why does your inverse transform return a scaled result? + +Computing the forward transform followed by the backward transform (or +vice versa) yields the original array scaled by the size of the array. +(For multi-dimensional transforms, the size of the array is the +product of the dimensions.) We could, instead, have chosen a +normalization that would have returned the unscaled array. Or, to +accomodate the many conventions in this matter, the transform routines +could have accepted a "scale factor" parameter. We did not do this, +however, for two reasons. First, we didn't want to sacrifice +performance in the common case where the scale factor is 1. Second, in +real applications the FFT is followed or preceded by some computation +on the data, into which the scale factor can typically be absorbed at +little or no cost. + +\question 02dec:centerorigin How can I make FFTW put the origin (zero frequency) at the center of its output? + +For human viewing of a spectrum, it is often convenient to put the +origin in frequency space at the center of the output array, rather +than in the zero-th element (the default in FFTW). If all of the +dimensions of your array are even, you can accomplish this by simply +multiplying each element of the input array by (-1)^(i + j + ...), +where i, j, etcetera are the indices of the element. (This trick is a +general property of the DFT, and is not specific to FFTW.) + +\question 08may:imageaudio How do I FFT an image/audio file in \italic{foobar\} format? + +FFTW performs an FFT on an array of floating-point values. You can +certainly use it to compute the transform of an image or audio stream, +but you are responsible for figuring out your data format and +converting it to the form FFTW requires. + +\question 09apr:linkfails My program does not link (on Unix). + +The libraries must be listed in the correct order (\courier{-lfftw3 +-lm\} for FFTW 3.x) and \italic{after\} your program sources/objects. +(The general rule is that if \italic{A\} uses \italic{B\}, then +\italic{A\} must be listed before \italic{B\} in the link command.). + +\question 15mar:linkheader I included your header, but linking still fails. + +You're a C++ programmer, aren't you? You have to compile the FFTW +library and link it into your program, not just \courier{#include +\}. (Yes, this is really a FAQ.) + +\question 22oct:nostack My program crashes, complaining about stack space. + +You cannot declare large arrays with automatic storage (e.g. via +\courier{fftw_complex array[N]\}); you should use +\courier{fftw_malloc\} (or equivalent) to allocate the arrays you want +to transform if they are larger than a few hundred elements. + +\question 13may:leaks FFTW seems to have a memory leak. + +After you create a plan, FFTW caches the information required to +quickly recreate the plan. (See \qref savePlans) It also maintains a +small amount of other persistent memory. You can deallocate all of +FFTW's internally allocated memory, if you wish, by calling +\courier{fftw_cleanup()\}, as documented in the manual. + +\question 16may:allzero The output of FFTW's transform is all zeros. + +You should initialize your input array \italic{after\} creating the +plan, unless you use \courier{FFTW_ESTIMATE\}: planning with +\courier{FFTW_MEASURE\} or \courier{FFTW_PATIENT\} overwrites the +input/output arrays, as described in the manual. + +\question 05sep:vbetalia How do I call FFTW from the Microsoft language du jour? + +Please \italic{do not\} ask us Windows-specific questions. We do not +use Windows. We know nothing about Visual Basic, Visual C++, or .NET. +Please find the appropriate Usenet discussion group and ask your +question there. See also \qref runOnWindows. + +\question 15oct:pruned Can I compute only a subset of the DFT outputs? + +In general, no, an FFT intrinsically computes all outputs from all +inputs. In principle, there is something called a \italic{pruned +FFT\} that can do what you want, but to compute K outputs out of N the +complexity is in general O(N log K) instead of O(N log N), thus saving +only a small additive factor in the log. (The same argument holds if +you instead have only K nonzero inputs.) + +There are some specific cases in which you can get the O(N log K) +performance benefits easily, however, by combining a few ordinary +FFTs. In particular, the case where you want the first K outputs, +where K divides N, can be handled by performing N/K transforms of size +K and then summing the outputs multiplied by appropriate phase +factors. For more details, see \docref{pruned FFTs with FFTW\}. + +There are also some algorithms that compute pruned transforms +\italic{approximately\}, but they are beyond the scope of this FAQ. + +\question 21jan:transpose Can I use FFTW's routines for in-place and out-of-place matrix transposition? + +You can use the FFTW guru interface to create a rank-0 transform of +vector rank 2 where the vector strides are transposed. (A rank-0 +transform is equivalent to a 1D transform of size 1, which. just +copies the input into the output.) Specifying the same location for +the input and output makes the transpose in-place. + +For double-valued data stored in row-major format, plan creation looks like +this: + +\verbatim +fftw_plan plan_transpose(int rows, int cols, double *in, double *out) +{ + const unsigned flags = FFTW_ESTIMATE; /* other flags are possible */ + fftw_iodim howmany_dims[2]; + + howmany_dims[0].n = rows; + howmany_dims[0].is = cols; + howmany_dims[0].os = 1; + + howmany_dims[1].n = cols; + howmany_dims[1].is = 1; + howmany_dims[1].os = rows; + + return fftw_plan_guru_r2r(/*rank=*/ 0, /*dims=*/ NULL, + /*howmany_rank=*/ 2, howmany_dims, + in, out, /*kind=*/ NULL, flags); +} +\endverbatim + +(This entry was written by Rhys Ulerich.) + +\comment ###################################################################### + +\section Internals of FFTW + +\question 26aug:howworks How does FFTW work? + +The innovation (if it can be so called) in FFTW consists in having a +variety of composable \italic{solvers\}, representing different FFT +algorithms and implementation strategies, whose combination into a +particular \italic{plan\} for a given size can be determined at +runtime according to the characteristics of your machine/compiler. +This peculiar software architecture allows FFTW to adapt itself to +almost any machine. + +For more details (albeit somewhat outdated), see the paper "FFTW: An +Adaptive Software Architecture for the FFT", by M. Frigo and +S. G. Johnson, \italic{Proc. ICASSP\} 3, 1381 (1998), also +available at \docref{the FFTW web page\}. + +\question 26aug:whyfast Why is FFTW so fast? + +This is a complex question, and there is no simple answer. In fact, +the authors do not fully know the answer, either. In addition to many +small performance hacks throughout FFTW, there are three general +reasons for FFTW's speed. + +\call startlist +\call item + FFTW uses a variety of FFT algorithms and implementation styles +that can be arbitrarily composed to adapt itself to +a machine. See \qref howworks. +\call item + FFTW uses a code generator to produce highly-optimized +routines for computing small transforms. +\call item + FFTW uses explicit divide-and-conquer to take advantage +of the memory hierarchy. +\call endlist + +For more details (albeit somewhat outdated), see the paper "FFTW: An +Adaptive Software Architecture for the FFT", by M. Frigo and +S. G. Johnson, \italic{Proc. ICASSP\} 3, 1381 (1998), +available along with other references at \docref{the FFTW web page\}. + +\comment ###################################################################### + +\section Known bugs + +\question 27aug:rfftwndbug FFTW 1.1 crashes in rfftwnd on Linux. + +This bug was fixed in FFTW 1.2. There was a bug in \courier{rfftwnd\} +causing an incorrect amount of memory to be allocated. The bug showed +up in Linux with libc-5.3.12 (and nowhere else that we know of). + +\question 15oct:fftwmpibug The MPI transforms in FFTW 1.2 give incorrect results/leak memory. + +These bugs were corrected in FFTW 1.2.1. The MPI transforms (really, +just the transpose routines) in FFTW 1.2 had bugs that could cause +errors in some situations. + +\question 05nov:testsingbug The test programs in FFTW 1.2.1 fail when I change FFTW to use single precision. + +This bug was fixed in FFTW 1.3. (Older versions of FFTW did +work in single precision, but the test programs didn't--the error +tolerances in the tests were set for double precision.) + +\question 24mar:teststoobig The test program in FFTW 1.2.1 fails for n > 46340. + +This bug was fixed in FFTW 1.3. FFTW 1.2.1 produced the right answer, +but the test program was wrong. For large n, n*n in the naive +transform that we used for comparison overflows 32 bit integer +precision, breaking the test. + +\question 24aug:linuxthreads The threaded code fails on Linux Redhat 5.0 + +We had problems with glibc-2.0.5. The code should work with +glibc-2.0.7. + +\question 26sep:bigrfftwnd FFTW 2.0's rfftwnd fails for rank > 1 transforms with a final dimension >= 65536. + +This bug was fixed in FFTW 2.0.1. (There was a 32-bit integer overflow due +to a poorly-parenthesized expression.) + +\question 26mar:primebug FFTW 2.0's complex transforms give the wrong results with prime factors 17 to 97. + +There was a bug in the complex transforms that could cause incorrect +results under (hopefully rare) circumstances for lengths with +intermediate-size prime factors (17-97). This bug was fixed in FFTW +2.1.1. + +\question 05apr:mpichbug FFTW 2.1.1's MPI test programs crash with MPICH. + +This bug was fixed in FFTW 2.1.2. The 2.1/2.1.1 MPI test programs crashed +when using the MPICH implementation of MPI with the \courier{ch_p4\} +device (TCP/IP); the transforms themselves worked fine. + +\question 25may:aixthreadbug FFTW 2.1.2's multi-threaded transforms don't work on AIX. + +This bug was fixed in FFTW 2.1.3. The multi-threaded transforms in +previous versions didn't work with AIX's \courier{pthreads\} +implementation, which idiosyncratically creates threads in detached +(non-joinable) mode by default. + +\question 27sep:bigprimebug FFTW 2.1.2's complex transforms give incorrect results for large prime sizes. + +This bug was fixed in FFTW 2.1.3. FFTW's complex-transform algorithm +for prime sizes (in versions 2.0 to 2.1.2) had an integer overflow +problem that caused incorrect results for many primes greater than +32768 (on 32-bit machines). (Sizes without large prime factors are +not affected.) + +\question 25may:solaristhreadbug FFTW 2.1.3's multi-threaded transforms don't give any speedup on Solaris. + +This bug was fixed in FFTW 2.1.4. (By default, Solaris creates +threads that do not parallelize over multiple processors, so one has +to request the proper behavior specifically.) + +\question 03may:aixflags FFTW 2.1.3 crashes on AIX. + +The FFTW 2.1.3 \courier{configure\} script picked incorrect compiler +flags for the \courier{xlc\} compiler on newer IBM processors. This +is fixed in FFTW 2.1.4. + +\comment Here it ends! + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/FAQ/fftw-faq.html/index.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/FAQ/fftw-faq.html/index.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,110 @@ + + + +FFTW Frequently Asked Questions with Answers + + + + + + + + +

+FFTW Frequently Asked Questions with Answers +

+This is the list of Frequently Asked Questions about FFTW, a +collection of fast C routines for computing the Discrete Fourier +Transform in one or more dimensions. +

+Index +

+ +
+
+Matteo Frigo and Steven G. Johnson / fftw@fftw.org +- 04 March 2014 +

+Extracted from FFTW Frequently Asked Questions with Answers, +Copyright © 2014 Matteo Frigo and Massachusetts Institute of Technology. + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/FAQ/fftw-faq.html/section1.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/FAQ/fftw-faq.html/section1.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,85 @@ + + + +FFTW FAQ - Section 1 + + + + + +

+FFTW FAQ - Section 1
+Introduction and General Information +

+ +
+ +

+Question 1.1. What is FFTW? +

+ +FFTW is a free collection of fast C routines for computing the +Discrete Fourier Transform in one or more dimensions. It includes +complex, real, symmetric, and parallel transforms, and can handle +arbitrary array sizes efficiently. FFTW is typically faster than +other publically-available FFT implementations, and is even +competitive with vendor-tuned libraries. (See our web page for +extensive benchmarks.) To achieve this performance, FFTW uses novel +code-generation and runtime self-optimization techniques (along with +many other tricks). +

+Question 1.2. How do I obtain FFTW? +

+ +FFTW can be found at the FFTW web page. You can also retrieve it from ftp.fftw.org in /pub/fftw. +

+Question 1.3. Is FFTW free software? +

+ +Starting with version 1.3, FFTW is Free Software in the technical +sense defined by the Free Software Foundation (see +Categories of Free and Non-Free Software), and is distributed under the terms of the GNU General Public License. Previous versions of FFTW were +distributed without fee for noncommercial use, but were not +technically ``free.'' +

+Non-free licenses for FFTW are also available that permit different +terms of use than the GPL. +

+Question 1.4. What is this about non-free +licenses? +

+ +The non-free licenses are for companies that wish to use FFTW in their +products but are unwilling to release their software under the GPL +(which would require them to release source code and allow free +redistribution). Such users can purchase an unlimited-use license +from MIT. Contact us for more details. + +

+We could instead have released FFTW under the LGPL, or even disallowed +non-Free usage. Suffice it to say, however, that MIT owns the +copyright to FFTW and they only let us GPL it because we convinced +them that it would neither affect their licensing revenue nor irritate +existing licensees. +

+Question 1.5. In the West? I thought MIT was in the +East? +

+ +Not to an Italian. You could say that we're a Spaghetti Western +(with apologies to Sergio Leone).
+Next: Installing FFTW.
+Return to contents.

+

+Matteo Frigo and Steven G. Johnson / fftw@fftw.org +- 04 March 2014 +

+Extracted from FFTW Frequently Asked Questions with Answers, +Copyright © 2014 Matteo Frigo and Massachusetts Institute of Technology. + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/FAQ/fftw-faq.html/section2.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/FAQ/fftw-faq.html/section2.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,285 @@ + + + +FFTW FAQ - Section 2 + + + + + +

+FFTW FAQ - Section 2
+Installing FFTW +

+ +
+ +

+Question 2.1. Which systems does FFTW run +on? +

+ +FFTW is written in ANSI C, and should work on any system with a decent +C compiler. (See also Q2.2 `Does FFTW run on Windows?', Q2.3 `My compiler has trouble with FFTW.'.) FFTW can also take advantage of certain hardware-specific features, +such as cycle counters and SIMD instructions, but this is optional. + +

+Question 2.2. Does FFTW run on Windows? +

+ +Yes, many people have reported successfully using FFTW on Windows with +various compilers. FFTW was not developed on Windows, but the source +code is essentially straight ANSI C. See also the +FFTW Windows installation notes, Q2.3 `My compiler has trouble with FFTW.', and Q3.18 `How do I call FFTW from the Microsoft language du +jour?'. +

+Question 2.3. My compiler has trouble with +FFTW. +

+ +Complain fiercely to the vendor of the compiler. + +

+We have successfully used gcc 3.2.x on x86 and PPC, a recent Compaq C compiler for Alpha, version 6 of IBM's +xlc compiler for AIX, Intel's icc versions 5-7, and Sun WorkShop cc version 6. +

+FFTW is likely to push compilers to their limits, however, and several +compiler bugs have been exposed by FFTW. A partial list follows. + +

+gcc 2.95.x for Solaris/SPARC produces incorrect code for +the test program (workaround: recompile the +libbench2 directory with -O2). +

+NetBSD/macppc 1.6 comes with a gcc version that also miscompiles the test program. (Please report a workaround if you know +one.) +

+gcc 3.2.3 for ARM reportedly crashes during compilation. +This bug is reportedly fixed in later versions of +gcc. +

+Versions 8.0 and 8.1 of Intel's icc falsely claim to be gcc, so you should specify CC="icc -no-gcc"; this is automatic in FFTW 3.1. icc-8.0.066 reportely produces incorrect code for FFTW 2.1.5, but is fixed in version 8.1. +icc-7.1 compiler build 20030402Z appears to produce +incorrect dependencies, causing the compilation to fail. +icc-7.1 build 20030307Z appears to work fine. (Use +icc -V to check which build you have.) As of 2003/04/18, +build 20030402Z appears not to be available any longer on Intel's +website, whereas the older build 20030307Z is available. + +

+ranlib of GNU binutils 2.9.1 on Irix has been observed to corrupt the FFTW libraries, causing a link failure when +FFTW is compiled. Since ranlib is completely superfluous on Irix, we suggest deleting it from your system and replacing it with +a symbolic link to /bin/echo. +

+If support for SIMD instructions is enabled in FFTW, further compiler +problems may appear: +

+gcc 3.4.[0123] for x86 produces incorrect SSE2 code for +FFTW when -O2 (the best choice for FFTW) is used, causing +FFTW to crash (make check crashes). This bug is fixed in gcc 3.4.4. On x86_64 (amd64/em64t), gcc 3.4.4 reportedly still has a similar problem, but this is fixed as of +gcc 3.4.6. +

+gcc-3.2 for x86 produces incorrect SIMD code if +-O3 is used. The same compiler produces incorrect SIMD +code if no optimization is used, too. When using +gcc-3.2, it is a good idea not to change the default +CFLAGS selected by the configure script. +

+Some 3.0.x and 3.1.x versions of gcc on x86 may crash. gcc so-called 2.96 shipping with RedHat 7.3 crashes +when compiling SIMD code. In both cases, please upgrade to +gcc-3.2 or later. +

+Intel's icc 6.0 misaligns SSE constants, but FFTW has a +workaround. icc 8.x fails to compile FFTW 3.0.x because it +falsely claims to be gcc; we believe this to be a bug in icc, but FFTW 3.1 has a workaround. +

+Visual C++ 2003 reportedly produces incorrect code for SSE/SSE2 when +compiling FFTW. This bug was reportedly fixed in VC++ 2005; +alternatively, you could switch to the Intel compiler. VC++ 6.0 also +reportedly produces incorrect code for the file +reodft11e-r2hc-odd.c unless optimizations are disabled for that file. +

+gcc 2.95 on MacOS X miscompiles AltiVec code (fixed in +later versions). gcc 3.2.x miscompiles AltiVec permutations, but FFTW has a workaround. +gcc 4.0.1 on MacOS for Intel crashes when compiling FFTW; a workaround is to +compile one file without optimization: cd kernel; make CFLAGS=" " trig.lo. +

+gcc 4.1.1 reportedly crashes when compiling FFTW for MIPS; +the workaround is to compile the file it crashes on +(t2_64.c) with a lower optimization level. +

+gcc versions 4.1.2 to 4.2.0 for x86 reportedly miscompile +FFTW 3.1's test program, causing make check to crash (gcc bug #26528). The bug was reportedly fixed in +gcc version 4.2.1 and later. A workaround is to compile +libbench2/verify-lib.c without optimization. +

+Question 2.4. FFTW does not compile on Solaris, complaining about +const. +

+ +We know that at least on Solaris 2.5.x with Sun's compilers 4.2 you +might get error messages from make such as +

+"./fftw.h", line 88: warning: const is a keyword in ANSI +C +

+This is the case when the configure script reports that const does not work: +

+checking for working const... (cached) no +

+You should be aware that Solaris comes with two compilers, namely, +/opt/SUNWspro/SC4.2/bin/cc and /usr/ucb/cc. The latter compiler is non-ANSI. Indeed, it is a perverse shell script +that calls the real compiler in non-ANSI mode. In order +to compile FFTW, change your path so that the right +cc is used. +

+To know whether your compiler is the right one, type +cc -V. If the compiler prints ``ucbcc'', as in +

+ucbcc: WorkShop Compilers 4.2 30 Oct 1996 C +4.2 +

+then the compiler is wrong. The right message is something like + +

+cc: WorkShop Compilers 4.2 30 Oct 1996 C +4.2 +

+Question 2.5. What's the difference between +--enable-3dnow and --enable-k7? +

+ +--enable-k7 enables 3DNow! instructions on K7 processors +(AMD Athlon and its variants). K7 support is provided by assembly +routines generated by a special purpose compiler. +As of fftw-3.2, --enable-k7 is no longer supported. + +

+--enable-3dnow enables generic 3DNow! support using gcc builtin functions. This works on earlier AMD +processors, but it is not as fast as our special assembly routines. +As of fftw-3.1, --enable-3dnow is no longer supported. + +

+Question 2.6. What's the difference between the fma and the non-fma +versions? +

+ +The fma version tries to exploit the fused multiply-add instructions +implemented in many processors such as PowerPC, ia-64, and MIPS. The +two FFTW packages are otherwise identical. In FFTW 3.1, the fma and +non-fma versions were merged together into a single package, and the +configure script attempts to automatically guess which +version to use. +

+The FFTW 3.1 configure script enables fma by default on PowerPC, Itanium, and PA-RISC, and disables it otherwise. You can +force one or the other by using the --enable-fma or --disable-fma flag for configure. +

+Definitely use fma if you have a PowerPC-based system with +gcc (or IBM xlc). This includes all GNU/Linux systems for PowerPC and the older PowerPC-based MacOS systems. Also +use it on PA-RISC and Itanium with the HP/UX compiler. + +

+Definitely do not use the fma version if you have an ia-32 processor +(Intel, AMD, MacOS on Intel, etcetera). + +

+For other architectures/compilers, the situation is not so clear. For +example, ia-64 has the fma instruction, but +gcc-3.2 appears not to exploit it correctly. Other compilers may do the right thing, +but we have not tried them. Please send us your feedback so that we +can update this FAQ entry. +

+Question 2.7. Which language is FFTW written +in? +

+ +FFTW is written in ANSI C. Most of the code, however, was +automatically generated by a program called +genfft, written in the Objective Caml dialect of ML. You do not need to know ML or to +have an Objective Caml compiler in order to use FFTW. + +

+genfft is provided with the FFTW sources, which means that +you can play with the code generator if you want. In this case, you +need a working Objective Caml system. Objective Caml is available +from the Caml web page. +

+Question 2.8. Can I call FFTW from Fortran? +

+ +Yes, FFTW (versions 1.3 and higher) contains a Fortran-callable +interface, documented in the FFTW manual. + +

+By default, FFTW configures its Fortran interface to work with the +first compiler it finds, e.g. g77. To configure for a different, incompatible Fortran compiler +foobar, use ./configure F77=foobar when installing FFTW. (In the case of g77, however, FFTW 3.x also includes an extra set of +Fortran-callable routines with one less underscore at the end of +identifiers, which should cover most other Fortran compilers on Linux +at least.) +

+Question 2.9. Can I call FFTW from C++? +

+ +Most definitely. FFTW should compile and/or link under any C++ +compiler. Moreover, it is likely that the C++ +<complex> template class is bit-compatible with FFTW's complex-number format +(see the FFTW manual for more details). + +

+Question 2.10. Why isn't FFTW written in +Fortran/C++? +

+ +Because we don't like those languages, and neither approaches the +portability of C. +

+Question 2.11. How do I compile FFTW to run in single +precision? +

+ +On a Unix system: configure --enable-float. On a non-Unix system: edit config.h to #define the symbol FFTW_SINGLE (for FFTW 3.x). In both cases, you must then +recompile FFTW. In FFTW 3, all FFTW identifiers will then begin with +fftwf_ instead of fftw_. +

+Question 2.12. --enable-k7 does not work on +x86-64 +

+ +Support for --enable-k7 was discontinued in fftw-3.2. + +

+The fftw-3.1 release supports --enable-k7. This option only works on +32-bit x86 machines that implement 3DNow!, including the AMD Athlon +and the AMD Opteron in 32-bit mode. --enable-k7 does not work on AMD +Opteron in 64-bit mode. Use --enable-sse for x86-64 machines. + +

+FFTW supports 3DNow! by means of assembly code generated by a +special-purpose compiler. It is hard to produce assembly code that +works in both 32-bit and 64-bit mode.


+Next: Using FFTW.
+Back: Introduction and General Information.
+Return to contents.

+

+Matteo Frigo and Steven G. Johnson / fftw@fftw.org +- 04 March 2014 +

+Extracted from FFTW Frequently Asked Questions with Answers, +Copyright © 2014 Matteo Frigo and Massachusetts Institute of Technology. + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/FAQ/fftw-faq.html/section3.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/FAQ/fftw-faq.html/section3.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,334 @@ + + + +FFTW FAQ - Section 3 + + + + + +

+FFTW FAQ - Section 3
+Using FFTW +

+ +
+ +

+Question 3.1. Why not support the FFTW 2 interface in FFTW +3? +

+ +FFTW 3 has semantics incompatible with earlier versions: its plans can +only be used for a given stride, multiplicity, and other +characteristics of the input and output arrays; these stronger +semantics are necessary for performance reasons. Thus, it is +impossible to efficiently emulate the older interface (whose plans can +be used for any transform of the same size). We believe that it +should be possible to upgrade most programs without any difficulty, +however. +

+Question 3.2. Why do FFTW 3 plans encapsulate the input/output arrays +and not just the algorithm? +

+ +There are several reasons: +
    +
  • It was important for performance reasons that the plan be specific to +array characteristics like the stride (and alignment, for SIMD), and +requiring that the user maintain these invariants is error prone. + +
  • In most high-performance applications, as far as we can tell, you are +usually transforming the same array over and over, so FFTW's semantics +should not be a burden. +
  • If you need to transform another array of the same size, creating a +new plan once the first exists is a cheap operation. + +
  • If you need to transform many arrays of the same size at once, you +should really use the plan_many routines in FFTW's "advanced" +interface. +
  • If the abovementioned array characteristics are the same, you are +willing to pay close attention to the documentation, and you really +need to, we provide a "new-array execution" interface to +apply a plan to a new array. +
+ +

+Question 3.3. FFTW seems really slow. +

+ +You are probably recreating the plan before every transform, rather +than creating it once and reusing it for all transforms of the same +size. FFTW is designed to be used in the following way: + +
    +
  • First, you create a plan. This will take several seconds. + +
  • Then, you reuse the plan many times to perform FFTs. These are fast. + +
+If you don't need to compute many transforms and the time for the +planner is significant, you have two options. First, you can use the +FFTW_ESTIMATE option in the planner, which uses heuristics +instead of runtime measurements and produces a good plan in a short +time. Second, you can use the wisdom feature to precompute the plan; +see Q3.9 `Can I save FFTW's plans?' +

+Question 3.4. FFTW slows down after repeated +calls. +

+ +Probably, NaNs or similar are creeping into your data, and the +slowdown is due to the resulting floating-point exceptions. For +example, be aware that repeatedly FFTing the same array is a diverging +process (because FFTW computes the unnormalized transform). + +

+Question 3.5. An FFTW routine is crashing when I call +it. +

+ +Did the FFTW test programs pass (make check, or cd tests; make bigcheck if you want to be paranoid)? If so, you almost +certainly have a bug in your own code. For example, you could be +passing invalid arguments (such as wrongly-sized arrays) to FFTW, or +you could simply have memory corruption elsewhere in your program that +causes random crashes later on. Please don't complain to us unless +you can come up with a minimal self-contained program (preferably +under 30 lines) that illustrates the problem. + +

+Question 3.6. My Fortran program crashes when calling +FFTW. +

+ +As described in the manual, on 64-bit machines you must store the +plans in variables large enough to hold a pointer, for example +integer*8. We recommend using integer*8 on 32-bit machines as well, to simplify porting. + +

+Question 3.7. FFTW gives results different from my old +FFT. +

+ +People follow many different conventions for the DFT, and you should +be sure to know the ones that we use (described in the FFTW manual). +In particular, you should be aware that the +FFTW_FORWARD/FFTW_BACKWARD directions correspond to signs of -1/+1 in the exponent of the DFT definition. +(Numerical Recipes uses the opposite convention.) +

+You should also know that we compute an unnormalized transform. In +contrast, Matlab is an example of program that computes a normalized +transform. See Q3.10 `Why does your inverse transform return a scaled +result?'. +

+Finally, note that floating-point arithmetic is not exact, so +different FFT algorithms will give slightly different results (on the +order of the numerical accuracy; typically a fractional difference of +1e-15 or so in double precision). +

+Question 3.8. FFTW gives different results between +runs +

+ +If you use FFTW_MEASURE or FFTW_PATIENT mode, then the algorithm FFTW employs is not deterministic: it depends on +runtime performance measurements. This will cause the results to vary +slightly from run to run. However, the differences should be slight, +on the order of the floating-point precision, and therefore should +have no practical impact on most applications. + +

+If you use saved plans (wisdom) or FFTW_ESTIMATE mode, however, then the algorithm is deterministic and the results should be +identical between runs. +

+Question 3.9. Can I save FFTW's plans? +

+ +Yes. Starting with version 1.2, FFTW provides the +wisdom mechanism for saving plans; see the FFTW manual. + +

+Question 3.10. Why does your inverse transform return a scaled +result? +

+ +Computing the forward transform followed by the backward transform (or +vice versa) yields the original array scaled by the size of the array. + (For multi-dimensional transforms, the size of the array is the +product of the dimensions.) We could, instead, have chosen a +normalization that would have returned the unscaled array. Or, to +accomodate the many conventions in this matter, the transform routines +could have accepted a "scale factor" parameter. We did not +do this, however, for two reasons. First, we didn't want to sacrifice +performance in the common case where the scale factor is 1. Second, in +real applications the FFT is followed or preceded by some computation +on the data, into which the scale factor can typically be absorbed at +little or no cost. +

+Question 3.11. How can I make FFTW put the origin (zero frequency) at +the center of its output? +

+ +For human viewing of a spectrum, it is often convenient to put the +origin in frequency space at the center of the output array, rather +than in the zero-th element (the default in FFTW). If all of the +dimensions of your array are even, you can accomplish this by simply +multiplying each element of the input array by (-1)^(i + j + ...), +where i, j, etcetera are the indices of the element. (This trick is a +general property of the DFT, and is not specific to FFTW.) + +

+Question 3.12. How do I FFT an image/audio file in +foobar format? +

+ +FFTW performs an FFT on an array of floating-point values. You can +certainly use it to compute the transform of an image or audio stream, +but you are responsible for figuring out your data format and +converting it to the form FFTW requires. + +

+Question 3.13. My program does not link (on +Unix). +

+ +The libraries must be listed in the correct order +(-lfftw3 -lm for FFTW 3.x) and after your program sources/objects. (The general rule is that if A uses B, then A must be listed before B in the link command.). +

+Question 3.14. I included your header, but linking still +fails. +

+ +You're a C++ programmer, aren't you? You have to compile the FFTW +library and link it into your program, not just +#include <fftw3.h>. (Yes, this is really a FAQ.) +

+Question 3.15. My program crashes, complaining about stack +space. +

+ +You cannot declare large arrays with automatic storage (e.g. via +fftw_complex array[N]); you should use fftw_malloc (or equivalent) to allocate the arrays you want +to transform if they are larger than a few hundred elements. + +

+Question 3.16. FFTW seems to have a memory +leak. +

+ +After you create a plan, FFTW caches the information required to +quickly recreate the plan. (See Q3.9 `Can I save FFTW's plans?') It also maintains a small amount of other persistent memory. You can deallocate all of +FFTW's internally allocated memory, if you wish, by calling +fftw_cleanup(), as documented in the manual. +

+Question 3.17. The output of FFTW's transform is all +zeros. +

+ +You should initialize your input array after creating the plan, unless you use FFTW_ESTIMATE: planning with FFTW_MEASURE or FFTW_PATIENT overwrites the input/output arrays, as described in the manual. + +

+Question 3.18. How do I call FFTW from the Microsoft language du +jour? +

+ +Please do not ask us Windows-specific questions. We do not +use Windows. We know nothing about Visual Basic, Visual C++, or .NET. + Please find the appropriate Usenet discussion group and ask your +question there. See also Q2.2 `Does FFTW run on Windows?'. +

+Question 3.19. Can I compute only a subset of the DFT +outputs? +

+ +In general, no, an FFT intrinsically computes all outputs from all +inputs. In principle, there is something called a +pruned FFT that can do what you want, but to compute K outputs out of N the +complexity is in general O(N log K) instead of O(N log N), thus saving +only a small additive factor in the log. (The same argument holds if +you instead have only K nonzero inputs.) + +

+There are some specific cases in which you can get the O(N log K) +performance benefits easily, however, by combining a few ordinary +FFTs. In particular, the case where you want the first K outputs, +where K divides N, can be handled by performing N/K transforms of size +K and then summing the outputs multiplied by appropriate phase +factors. For more details, see pruned FFTs with FFTW. +

+There are also some algorithms that compute pruned transforms +approximately, but they are beyond the scope of this FAQ. + +

+Question 3.20. Can I use FFTW's routines for in-place and +out-of-place matrix transposition? +

+ +You can use the FFTW guru interface to create a rank-0 transform of +vector rank 2 where the vector strides are transposed. (A rank-0 +transform is equivalent to a 1D transform of size 1, which. just +copies the input into the output.) Specifying the same location for +the input and output makes the transpose in-place. + +

+For double-valued data stored in row-major format, plan creation looks +like this:

+fftw_plan plan_transpose(int rows, int cols, double *in, double *out)
+{
+    const unsigned flags = FFTW_ESTIMATE; /* other flags are possible */
+    fftw_iodim howmany_dims[2];
+
+    howmany_dims[0].n  = rows;
+    howmany_dims[0].is = cols;
+    howmany_dims[0].os = 1;
+
+    howmany_dims[1].n  = cols;
+    howmany_dims[1].is = 1;
+    howmany_dims[1].os = rows;
+
+    return fftw_plan_guru_r2r(/*rank=*/ 0, /*dims=*/ NULL,
+                              /*howmany_rank=*/ 2, howmany_dims,
+                              in, out, /*kind=*/ NULL, flags);
+}
+
+(This entry was written by Rhys Ulerich.) +
+Next: Internals of FFTW.
+Back: Installing FFTW.
+Return to contents.

+

+Matteo Frigo and Steven G. Johnson / fftw@fftw.org +- 04 March 2014 +

+Extracted from FFTW Frequently Asked Questions with Answers, +Copyright © 2014 Matteo Frigo and Massachusetts Institute of Technology. + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/FAQ/fftw-faq.html/section4.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/FAQ/fftw-faq.html/section4.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,64 @@ + + + +FFTW FAQ - Section 4 + + + + + +

+FFTW FAQ - Section 4
+Internals of FFTW +

+ +
+ +

+Question 4.1. How does FFTW work? +

+ +The innovation (if it can be so called) in FFTW consists in having a +variety of composable solvers, representing different FFT algorithms and implementation strategies, whose combination into a +particular plan for a given size can be determined at runtime according to the characteristics of your machine/compiler. +This peculiar software architecture allows FFTW to adapt itself to +almost any machine. +

+For more details (albeit somewhat outdated), see the paper "FFTW: +An Adaptive Software Architecture for the FFT", by M. Frigo and +S. G. Johnson, Proc. ICASSP 3, 1381 (1998), also available at the FFTW web page. +

+Question 4.2. Why is FFTW so fast? +

+ +This is a complex question, and there is no simple answer. In fact, +the authors do not fully know the answer, either. In addition to many +small performance hacks throughout FFTW, there are three general +reasons for FFTW's speed. +
    +
  • FFTW uses a variety of FFT algorithms and implementation styles +that can be arbitrarily composed to adapt itself to +a machine. See Q4.1 `How does FFTW work?'. +
  • FFTW uses a code generator to produce highly-optimized +routines for computing small transforms. + +
  • FFTW uses explicit divide-and-conquer to take advantage +of the memory hierarchy. +
+For more details (albeit somewhat outdated), see the paper "FFTW: +An Adaptive Software Architecture for the FFT", by M. Frigo and +S. G. Johnson, Proc. ICASSP 3, 1381 (1998), available along with other references at +the FFTW web page.
+Next: Known bugs.
+Back: Using FFTW.
+Return to contents.

+

+Matteo Frigo and Steven G. Johnson / fftw@fftw.org +- 04 March 2014 +

+Extracted from FFTW Frequently Asked Questions with Answers, +Copyright © 2014 Matteo Frigo and Massachusetts Institute of Technology. + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/FAQ/fftw-faq.html/section5.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/FAQ/fftw-faq.html/section5.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,148 @@ + + + +FFTW FAQ - Section 5 + + + + + +

+FFTW FAQ - Section 5
+Known bugs +

+ +
+ +

+Question 5.1. FFTW 1.1 crashes in rfftwnd on +Linux. +

+ +This bug was fixed in FFTW 1.2. There was a bug in +rfftwnd causing an incorrect amount of memory to be allocated. The bug showed +up in Linux with libc-5.3.12 (and nowhere else that we know of). + +

+Question 5.2. The MPI transforms in FFTW 1.2 give incorrect +results/leak memory. +

+ +These bugs were corrected in FFTW 1.2.1. The MPI transforms (really, +just the transpose routines) in FFTW 1.2 had bugs that could cause +errors in some situations. +

+Question 5.3. The test programs in FFTW 1.2.1 fail when I change FFTW +to use single precision. +

+ +This bug was fixed in FFTW 1.3. (Older versions of FFTW did +work in single precision, but the test programs didn't--the error +tolerances in the tests were set for double precision.) + +

+Question 5.4. The test program in FFTW 1.2.1 fails for n > +46340. +

+ +This bug was fixed in FFTW 1.3. FFTW 1.2.1 produced the right answer, +but the test program was wrong. For large n, n*n in the naive +transform that we used for comparison overflows 32 bit integer +precision, breaking the test. +

+Question 5.5. The threaded code fails on Linux Redhat +5.0 +

+ +We had problems with glibc-2.0.5. The code should work with +glibc-2.0.7. +

+Question 5.6. FFTW 2.0's rfftwnd fails for rank > 1 transforms +with a final dimension >= 65536. +

+ +This bug was fixed in FFTW 2.0.1. (There was a 32-bit integer +overflow due to a poorly-parenthesized expression.) +

+Question 5.7. FFTW 2.0's complex transforms give the wrong results +with prime factors 17 to 97. +

+ +There was a bug in the complex transforms that could cause incorrect +results under (hopefully rare) circumstances for lengths with +intermediate-size prime factors (17-97). This bug was fixed in FFTW +2.1.1. +

+Question 5.8. FFTW 2.1.1's MPI test programs crash with +MPICH. +

+ +This bug was fixed in FFTW 2.1.2. The 2.1/2.1.1 MPI test programs +crashed when using the MPICH implementation of MPI with the +ch_p4 device (TCP/IP); the transforms themselves worked fine. + +

+Question 5.9. FFTW 2.1.2's multi-threaded transforms don't work on +AIX. +

+ +This bug was fixed in FFTW 2.1.3. The multi-threaded transforms in +previous versions didn't work with AIX's +pthreads implementation, which idiosyncratically creates threads in detached +(non-joinable) mode by default. +

+Question 5.10. FFTW 2.1.2's complex transforms give incorrect results +for large prime sizes. +

+ +This bug was fixed in FFTW 2.1.3. FFTW's complex-transform algorithm +for prime sizes (in versions 2.0 to 2.1.2) had an integer overflow +problem that caused incorrect results for many primes greater than +32768 (on 32-bit machines). (Sizes without large prime factors are +not affected.) +

+Question 5.11. FFTW 2.1.3's multi-threaded transforms don't give any +speedup on Solaris. +

+ +This bug was fixed in FFTW 2.1.4. (By default, Solaris creates +threads that do not parallelize over multiple processors, so one has +to request the proper behavior specifically.) + +

+Question 5.12. FFTW 2.1.3 crashes on AIX. +

+ +The FFTW 2.1.3 configure script picked incorrect compiler flags for the xlc compiler on newer IBM processors. This +is fixed in FFTW 2.1.4.
+Back: Internals of FFTW.
+Return to contents.

+

+Matteo Frigo and Steven G. Johnson / fftw@fftw.org +- 04 March 2014 +

+Extracted from FFTW Frequently Asked Questions with Answers, +Copyright © 2014 Matteo Frigo and Massachusetts Institute of Technology. + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/FAQ/html.refs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/FAQ/html.refs Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,7 @@ +\ References for the FFTW FAQ +\ +the FFTW web page \ http://www.fftw.org +FFTW Windows installation notes \ http://www.fftw.org/install/windows.html +Categories of Free and Non-Free Software \ http://www.gnu.org/philosophy/categories.html +the Caml web page \ http://caml.inria.fr +pruned FFTs with FFTW \ http://www.fftw.org/pruned.html diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/FAQ/m-ascii.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/FAQ/m-ascii.pl Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,189 @@ +## ASCII output +# Copyright (C) 1993-1995 Ian Jackson. + +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# It is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with GNU Emacs; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +# (Note: I do not consider works produced using these BFNN processing +# tools to be derivative works of the tools, so they are NOT covered +# by the GPL. However, I would appreciate it if you credited me if +# appropriate in any documents you format using BFNN.) + +sub ascii_init { + open(ASCII,">$prefix.ascii"); +} + +sub ascii_startmajorheading { + print ASCII '='x79,"\n\n"; + $ascii_status= 'h'; + &ascii_text($_[0] ? "Section $_[0]. " : ''); +} + +sub ascii_startminorheading { + print ASCII '-'x79,"\n\n"; + $ascii_status= 'h'; +} + +sub ascii_italic { &ascii_text('*'); } +sub ascii_enditalic { $ascii_para .= '*'; } + +sub ascii_email { &ascii_text('<'); } sub ascii_endemail { &ascii_text('>'); } + +sub ascii_ftpon { } sub ascii_endftpon { } +sub ascii_ftpin { } sub ascii_endftpin { } +sub ascii_docref { } sub ascii_enddocref { } +sub ascii_courier { } sub ascii_endcourier { } +sub ascii_newsgroup { } sub ascii_endnewsgroup { } +sub ascii_ftpsilent { $ascii_ignore++; } +sub ascii_endftpsilent { $ascii_ignore--; } + +sub ascii_text { + return if $ascii_ignore; + if ($ascii_status eq '') { + $ascii_status= 'p'; + } + $ascii_para .= $_[0]; +} + +sub ascii_tab { + local ($n) = $_[0]-length($ascii_para); + $ascii_para .= ' 'x$n if $n>0; +} + +sub ascii_newline { + return unless $ascii_status eq 'p'; + &ascii_writepara; +} + +sub ascii_writepara { + local ($thisline, $thisword, $rest); + for (;;) { + last unless $ascii_para =~ m/\S/; + $thisline= $ascii_indentstring; + for (;;) { + last unless $ascii_para =~ m/^(\s*\S+)/; + unless (length($1) + length($thisline) < 75 || + length($thisline) == length($ascii_indentstring)) { + last; + } + $thisline .= $1; + $ascii_para= $'; + } + $ascii_para =~ s/^\s*//; + print ASCII $thisline,"\n"; + $ascii_indentstring= $ascii_nextindent; + last unless length($ascii_para); + } + $ascii_status= ''; $ascii_para= ''; +} + +sub ascii_endpara { + return unless $ascii_status eq 'p'; + &ascii_writepara; + print ASCII "\n"; +} + +sub ascii_endheading { + $ascii_para =~ s/\s*$//; + print ASCII "$ascii_para\n\n"; + $ascii_status= ''; + $ascii_para= ''; +} + +sub ascii_endmajorheading { &ascii_endheading(@_); } +sub ascii_endminorheading { &ascii_endheading(@_); } + +sub ascii_startverbatim { + $ascii_vstatus= $ascii_status; + &ascii_writepara; +} + +sub ascii_verbatim { + print ASCII $_[0],"\n"; +} + +sub ascii_endverbatim { + $ascii_status= $ascii_vstatus; +} + +sub ascii_finish { + close(ASCII); +} + +sub ascii_startindex { $ascii_status= ''; } +sub ascii_endindex { $ascii_status= 'p'; } + +sub ascii_endindexitem { + printf ASCII " %-11s %-.66s\n",$ascii_left,$ascii_para; + $ascii_status= 'p'; + $ascii_para= ''; +} + +sub ascii_startindexitem { + $ascii_left= $_[1]; +} + +sub ascii_startindexmainitem { + $ascii_left= $_[1]; + print ASCII "\n" if $ascii_status eq 'p'; +} + +sub ascii_startindent { + $ascii_istatus= $ascii_status; + &ascii_writepara; + $ascii_indentstring= " $ascii_indentstring"; + $ascii_nextindent= " $ascii_nextindent"; +} + +sub ascii_endindent { + $ascii_indentstring =~ s/^ //; + $ascii_nextindent =~ s/^ //; + $ascii_status= $ascii_istatus; +} + +sub ascii_startpackedlist { $ascii_plc=0; } +sub ascii_endpackedlist { &ascii_newline if !$ascii_plc; } +sub ascii_packeditem { + &ascii_newline if !$ascii_plc; + &ascii_tab($ascii_plc*40+5); + $ascii_plc= !$ascii_plc; +} + +sub ascii_startlist { + &ascii_endpara; + $ascii_indentstring= " $ascii_indentstring"; + $ascii_nextindent= " $ascii_nextindent"; +} + +sub ascii_endlist { + &ascii_endpara; + $ascii_indentstring =~ s/^ //; + $ascii_nextindent =~ s/^ //; +} + +sub ascii_item { + &ascii_newline; + $ascii_indentstring =~ s/ $/* /; +} + +sub ascii_pageref { + &ascii_text("Q$_[1] \`"); +} + +sub ascii_endpageref { + &ascii_text("'"); +} + +1; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/FAQ/m-html.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/FAQ/m-html.pl Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,337 @@ +## HTML output +# Copyright (C) 1993-1995 Ian Jackson. + +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# It is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with GNU Emacs; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +# (Note: I do not consider works produced using these BFNN processing +# tools to be derivative works of the tools, so they are NOT covered +# by the GPL. However, I would appreciate it if you credited me if +# appropriate in any documents you format using BFNN.) + +%saniarray= ('<','lt', '>','gt', '&','amp', '"','quot'); + +sub html_init { + $html_prefix = './'.$prefix; + $html_prefix =~ s:^\.//:/:; + system('rm','-r',"$html_prefix.html"); + system('mkdir',"$html_prefix.html"); + open(HTML,">$html_prefix.html/index.html"); + print HTML "\n"; + print HTML "\n"; + $html_needpara= -1; + $html_end=''; + chop($html_date=`date '+%d %B %Y'`); + chop($html_year=`date '+%Y'`); +} + +sub html_startup { + print HTML < +$user_title + + + + + + + + +

+$user_title +

+END + &html_readrefs($_[0]); + if (length($user_copyrightref)) { + local ($refn) = $qrefn{$user_copyrightref}; + if (!length($refn)) { + warn "unknown question (copyright) `$user_copyrightref'"; + } + $refn =~ m/(\d+)\.(\d+)/; + local ($s,$n) = ($1,$2); + $html_copyrighthref= ($s == $html_sectionn)?'':"section$s.html"; + $html_copyrighthref.= "#$qn2ref{$s,$n}"; + } +} + +sub html_close { + print HTML $html_end,"
\n$user_author\n"; + print HTML "- $html_date\n

\n"; + print HTML "Extracted from $user_title,\n"; + print HTML "" if length($html_copyrighthref); + print HTML "Copyright © $html_year $user_copyholder."; + print HTML "" if length($html_copyrighthref); + print HTML "\n\n"; + close(HTML); +} + +sub html_startmajorheading { + local ($ref, $this,$next,$back) = @_; + local ($nextt,$backt); + $this =~ s/^Section /section/; $html_sectionn= $ref; + $next =~ s/^Section /section/ && ($nextt= $sn2title{$'}); + $back =~ s/^Section /section/ ? ($backt= $sn2title{$'}) : ($back=''); + if ($html_sectionn) { + &html_close; + open(HTML,">$html_prefix.html/$this.html"); + print HTML "\n"; + print HTML "\n"; + $html_end= "
\n"; + $html_end.= "Next: $nextt.
\n" + if $next; + $html_end.= "Back: $backt.
\n" + if $back; + $html_end.= ""; + $html_end.= "Return to contents.

\n"; + print HTML < +$user_brieftitle - Section $html_sectionn + + + + +END + print HTML "" if $next; + print HTML "" if $back; + print HTML < +

+$user_brieftitle - Section $html_sectionn
+END + $html_needpara= -1; + } + else { + print HTML "\n

\n"; + $html_needpara=-1; + } +} + +sub html_endmajorheading { + print HTML "\n

\n\n"; + $html_needpara=-1; +} + +sub html_startminorheading { + local ($ref, $this) = @_; + $html_needpara=0; + $this =~ m/^Question (\d+)\.(\d+)/; + local ($s,$n) = ($1,$2); + print HTML "\n

\n"; +} + +sub html_endminorheading { + print HTML "\n

\n\n"; + $html_needpara=-1; +} + +sub html_newsgroup { &arg('newsgroup'); } +sub html_endnewsgroup { &endarg('newsgroup'); } +sub html_do_newsgroup { + print HTML "$_[0]"; +} + +sub html_email { &arg('email'); } +sub html_endemail { &endarg('email'); } +sub html_do_email { + print HTML "$_[0]"; +} + +sub html_courier { print HTML "" ; } +sub html_endcourier { print HTML ""; } +sub html_italic { print HTML "" ; } +sub html_enditalic { print HTML "" ; } + +sub html_docref { &arg('docref'); } +sub html_enddocref { &endarg('docref'); } +sub html_do_docref { + if (!defined($html_refval{$_[0]})) { + warn "undefined HTML reference $_[0]"; + $html_refval{$n}='UNDEFINED'; + } + print HTML ""; + &recurse($_[0]); + print HTML ""; +} + +sub html_readrefs { + local ($p); + open(HTMLREFS,"<$_[0]") || (warn("failed to open HTML refs $_[0]: $!"),return); + while() { + next if m/^\\\s/; + s/\s*\n$//; + if (s/^\\prefix\s*//) { + $p= $'; next; + } elsif (s/^\s*(\S.*\S)\s*\\\s*//) { + $_=$1; $v=$'; + s/\\\\/\\/g; + $html_refval{$_}= $p.$v; + } else { + warn("ununderstood line in HTML refs >$_<"); + } + } + close(HTMLREFS); +} + +sub html_ftpsilent { &arg('ftpsilent'); } +sub html_endftpsilent { &endarg('ftpsilent'); } +sub html_do_ftpsilent { + if ($_[0] =~ m/:/) { + $html_ftpsite= $`; + $html_ftpdir= $'.'/'; + } else { + $html_ftpsite= $_[0]; + $html_ftpdir= ''; + } +} + +sub html_ftpon { &arg('ftpon'); } +sub html_endftpon { &endarg('ftpon'); } +sub html_do_ftpon { +#print STDERR "ftpon($_[0])\n"; + $html_ftpsite= $_[0]; $html_ftpdir= ''; + print HTML ""; + &recurse($_[0]); + print HTML ""; +} + +sub html_ftpin { &arg('ftpin'); } +sub html_endftpin { &endarg('ftpin'); } +sub html_do_ftpin { +#print STDERR "ftpin($_[0])\n"; + print HTML ""; + &recurse($_[0]); + print HTML ""; +} + +sub html_text { + print HTML "\n

\n" if $html_needpara > 0; + $html_needpara=0; + $html_stuff= &html_sanitise($_[0]); + while ($html_stuff =~ s/^(.{40,70}) //) { + print HTML "$1\n"; + } + print HTML $html_stuff; +} + +sub html_tab { + $htmltabignore++ || warn "html tab ignored"; +} + +sub html_newline { print HTML "
\n" ; } +sub html_startverbatim { print HTML "

\n"   ;                       }
+sub html_verbatim      { print HTML &html_sanitise($_[0]),"\n";         }
+sub html_endverbatim   { print HTML "
\n" ; $html_needpara= -1; } + +sub html_endpara { + $html_needpara || $html_needpara++; +} + +sub html_finish { + &html_close; +} + +sub html_startindex { + print HTML "
    \n"; +} + +sub html_endindex { + print HTML "

\n"; +} + +sub html_startindexitem { + local ($ref,$qval) = @_; + $qval =~ m/Q(\d+)\.(\d+)/; + local ($s,$n) = ($1,$2); + print HTML "
  • Q$s.$n. "; + $html_indexunhead=''; +} + +sub html_startindexmainitem { + local ($ref,$s) = @_; + $s =~ m/\d+/; $s= $&; + print HTML "

    " if ($s > 1); + print HTML "
  • Section $s. "; + $html_indexunhead=''; +} + +sub html_endindexitem { + print HTML "$html_indexunhead\n"; +} + +sub html_startlist { + print HTML "\n"; + $html_itemend="
      "; +} + +sub html_endlist { + print HTML "$html_itemend\n
    \n"; + $html_needpara=-1 +} + +sub html_item { + print HTML "$html_itemend\n
  • "; + $html_itemend=""; + $html_needpara=-1; +} + +sub html_startpackedlist { + print HTML "\n"; + $html_itemend=""; +} + +sub html_endpackedlist { + print HTML "$html_itemend\n\n"; + $html_needpara=-1; +} + +sub html_packeditem { + print HTML "$html_itemend\n
  • "; + $html_itemend=""; + $html_needpara=-1; +} + +sub html_startindent { print HTML "
    \n"; } +sub html_endindent { print HTML "
    \n"; } + +sub html_pageref { + local ($ref,$sq) = @_; + $sq =~ m/(\d+)\.(\d+)/; + local ($s,$n) = ($1,$2); + print HTML "Q$sq \`"; +} + +sub html_endpageref { + print HTML "'"; +} + +sub html_sanitise { + local ($in) = @_; + local ($out); + while ($in =~ m/[<>&"]/) { + $out.= $`. '&'. $saniarray{$&}. ';'; + $in=$'; + } + $out.= $in; + $out; +} + +1; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/FAQ/m-info.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/FAQ/m-info.pl Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,226 @@ +## Info output +# Copyright (C) 1993-1995 Ian Jackson. + +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# It is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with GNU Emacs; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +# (Note: I do not consider works produced using these BFNN processing +# tools to be derivative works of the tools, so they are NOT covered +# by the GPL. However, I would appreciate it if you credited me if +# appropriate in any documents you format using BFNN.) + +sub info_init { + open(INFO,">$prefix.info"); + print INFO <'); } + +sub info_ftpon { } sub info_endftpon { } +sub info_ftpin { } sub info_endftpin { } +sub info_docref { } sub info_enddocref { } +sub info_courier { } sub info_endcourier { } +sub info_newsgroup { } sub info_endnewsgroup { } +sub info_ftpsilent { $info_ignore++; } +sub info_endftpsilent { $info_ignore--; } + +sub info_text { + return if $info_ignore; + if ($info_status eq '') { + $info_status= 'p'; + } + $info_para .= $_[0]; +} + +sub info_tab { + local ($n) = $_[0]-length($info_para); + $info_para .= ' 'x$n if $n>0; +} + +sub info_newline { + return unless $info_status eq 'p'; + print INFO &info_writepara; +} + +sub info_writepara { + local ($thisline, $thisword, $rest, $output); + for (;;) { + last unless $info_para =~ m/\S/; + $thisline= $info_indentstring; + for (;;) { + last unless $info_para =~ m/^(\s*\S+)/; + unless (length($1) + length($thisline) < 75 || + length($thisline) == length($info_indentstring)) { + last; + } + $thisline .= $1; + $info_para= $'; + } + $info_para =~ s/^\s*//; + $output.= $thisline."\n"; + $info_indentstring= $info_nextindent; + last unless length($info_para); + } + $info_status= ''; $info_para= ''; + return $output; +} + +sub info_endpara { + return unless $info_status eq 'p'; + print INFO &info_writepara; + print INFO "\n"; +} + +sub info_endheading { + $info_para =~ s/\s*$//; + print INFO "$info_para\n\n"; + $info_status= ''; + $info_para= ''; +} + +sub info_endmajorheading { &info_endheading(@_); } +sub info_endminorheading { &info_endheading(@_); } + +sub info_startverbatim { + print INFO &info_writepara; +} + +sub info_verbatim { + print INFO $_[0],"\n"; +} + +sub info_endverbatim { + $info_status= $info_vstatus; +} + +sub info_finish { + close(INFO); +} + +sub info_startindex { + &info_endpara; + $info_moredetail= ''; + $info_status= ''; +} + +sub info_endindex { + print INFO "$info_moredetail\n" if length($info_moredetail); +} + +sub info_endindexitem { + $info_indentstring= sprintf("* %-17s ",$info_label.'::'); + $info_nextindent= ' 'x20; + local ($txt); + $txt= &info_writepara; + if ($info_main) { + print INFO $label.$txt; + $txt =~ s/^.{20}//; + $info_moredetail.= $txt; + } else { + $info_moredetail.= $label.$txt; + } + $info_indentstring= $info_nextindent= ''; + $info_status='p'; +} + +sub info_startindexitem { + print INFO "* Menu:\n" if $info_status eq ''; + $info_status= ''; + $info_label= $_[2]; + $info_main= 0; +} + +sub info_startindexmainitem { + print INFO "* Menu:\n" if $info_status eq ''; + $info_label= $_[2]; + $info_main= 1; + $info_moredetail .= "\n$_[2], "; + $info_status= ''; +} + +sub info_startindent { + $info_istatus= $info_status; + print INFO &info_writepara; + $info_indentstring= " $info_indentstring"; + $info_nextindent= " $info_nextindent"; +} + +sub info_endindent { + $info_indentstring =~ s/^ //; + $info_nextindent =~ s/^ //; + $info_status= $info_istatus; +} + +sub info_startpackedlist { $info_plc=0; } +sub info_endpackedlist { &info_newline if !$info_plc; } +sub info_packeditem { + &info_newline if !$info_plc; + &info_tab($info_plc*40+5); + $info_plc= !$info_plc; +} + +sub info_startlist { + $info_istatus= $info_status; + print INFO &info_writepara; + $info_indentstring= " $info_indentstring"; + $info_nextindent= " $info_nextindent"; +} + +sub info_endlist { + $info_indentstring =~ s/^ //; + $info_nextindent =~ s/^ //; + $info_status= $info_lstatus; +} + +sub info_item { + &info_newline; + $info_indentstring =~ s/ $/* /; +} + +sub info_pageref { + &info_text("*Note Question $_[1]:: \`"); +} + +sub info_endpageref { + &info_text("'"); +} + +1; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/FAQ/m-lout.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/FAQ/m-lout.pl Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,242 @@ +## Lout output +# Copyright (C) 1993-1995 Ian Jackson. + +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# It is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with GNU Emacs; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +# (Note: I do not consider works produced using these BFNN processing +# tools to be derivative works of the tools, so they are NOT covered +# by the GPL. However, I would appreciate it if you credited me if +# appropriate in any documents you format using BFNN.) + +sub lout_init { + open(LOUT,">$prefix.lout"); + chop($dprint= `date '+%d %B %Y'`); + $dprint =~ s/^0//; +} + +sub lout_startup { + local ($lbs) = &lout_sanitise($user_brieftitle); + print LOUT <0)*40+5); + $lout_plc= !$lout_plc; +} + +sub lout_startlist { + &lout_endpara; + print LOUT "\@RawIndentedList style {\@Bullet} indent {0.5i} gap {1.1vx}\n"; + $lout_styles .= 'l'; + $lout_status= ''; +} + +sub lout_endlist { + &lout_endpara; + print LOUT "\@EndList\n\n"; + $lout_styles =~ s/.$//; +} + +sub lout_item { + &lout_endpara; + print LOUT "\@ListItem{"; + $lout_styles.= 'I'; +} + +sub lout_startindex { + print LOUT "//0.0fe\n"; +} + +sub lout_endindex { + $lout_status='p'; +} + +sub lout_startindexmainitem { + $lout_marker= $_[0]; + $lout_status= ''; + print LOUT "//0.3vx Bold \@Font \@HAdjust { \@HContract { { $_[1] } |3cx {"; + $lout_iiendheight= '1.00'; + $lout_styles .= 'X'; +} + +sub lout_startindexitem { + $lout_marker= $_[0]; + print LOUT "\@HAdjust { \@HContract { { $_[1] } |3cx {"; + $lout_iiendheight= '0.95'; + $lout_styles .= 'X'; +} + +sub lout_endindexitem { + print LOUT "} } |0c \@PageOf { $lout_marker } } //${lout_iiendheight}vx\n"; + $lout_styles =~ s/.$//; +} + +sub lout_email { &lout_courier; &lout_text('<'); } +sub lout_endemail { &lout_text('>'); &lout_endcourier; } + +sub lout_ftpon { &lout_courier; } sub lout_endftpon { &lout_endcourier; } +sub lout_ftpin { &lout_courier; } sub lout_endftpin { &lout_endcourier; } +sub lout_docref { } sub lout_enddocref { } +sub lout_ftpsilent { $lout_ignore++; } +sub lout_endftpsilent { $lout_ignore--; } + +sub lout_newsgroup { &lout_courier; } +sub lout_endnewsgroup { &lout_endcourier; } + +sub lout_text { + return if $lout_ignore; + $lout_status= 'p'; + $_= &lout_sanitise($_[0]); + s/ $/\n/ unless $lout_styles =~ m/[fhX]/; + print LOUT $_; +} + +sub lout_tab { + local ($size) = $_[0]*0.5; + print LOUT " |${size}ft "; +} + +sub lout_newline { + print LOUT " //1.0vx\n"; +} + +sub lout_sanitise { + local ($in) = @_; + local ($out); + $in= ' '.$in.' '; + $out=''; + while ($in =~ m/(\s)(\S*[\@\/|\\\"\^\&\{\}\#]\S*)(\s)/) { + $out .= $`.$1; + $in = $3.$'; + $_= $2; + s/[\\\"]/\\$&/g; + $out .= '"'.$_.'"'; + } + $out .= $in; + $out =~ s/^ //; $out =~ s/ $//; + $out; +} + +sub lout_endpara { + return if $lout_status eq ''; + if ($lout_styles eq '') { + print LOUT "\@LP\n\n"; + } elsif ($lout_styles =~ s/I$//) { + print LOUT "}\n"; + } + $lout_status= ''; +} + +sub lout_startverbatim { + print LOUT "//0.4f\n\@RawIndentedDisplay lines \@Break". + " { {0.7 1.0} \@Scale {Courier Bold} \@Font {\n"; +} + +sub lout_verbatim { + $_= $_[0]; + s/^\s*//; + print LOUT &lout_sanitise($_),"\n"; +} + +sub lout_endverbatim { print LOUT "}\n}\n//0.4f\n"; } + +1; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/FAQ/m-post.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/FAQ/m-post.pl Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,189 @@ +## POST output +# Copyright (C) 1993-1995 Ian Jackson. + +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# It is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with GNU Emacs; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +# (Note: I do not consider works produced using these BFNN processing +# tools to be derivative works of the tools, so they are NOT covered +# by the GPL. However, I would appreciate it if you credited me if +# appropriate in any documents you format using BFNN.) + +sub post_init { + open(POST,">$prefix.post"); +} + +sub post_startmajorheading { + print POST '='x79,"\n\n"; + $post_status= 'h'; + &post_text($_[0] ? "Section $_[0]. " : ''); +} + +sub post_startminorheading { + print POST '-'x77,"\n\n"; + $post_status= 'h'; +} + +sub post_italic { &post_text('*'); } +sub post_enditalic { $post_para .= '*'; } + +sub post_email { &post_text('<'); } sub post_endemail { &post_text('>'); } + +sub post_ftpon { } sub post_endftpon { } +sub post_ftpin { } sub post_endftpin { } +sub post_docref { } sub post_enddocref { } +sub post_courier { } sub post_endcourier { } +sub post_newsgroup { } sub post_endnewsgroup { } +sub post_ftpsilent { $post_ignore++; } +sub post_endftpsilent { $post_ignore--; } + +sub post_text { + return if $post_ignore; + if ($post_status eq '') { + $post_status= 'p'; + } + $post_para .= $_[0]; +} + +sub post_tab { + local ($n) = $_[0]-length($post_para); + $post_para .= ' 'x$n if $n>0; +} + +sub post_newline { + return unless $post_status eq 'p'; + &post_writepara; +} + +sub post_writepara { + local ($thisline, $thisword, $rest); + for (;;) { + last unless $post_para =~ m/\S/; + $thisline= $post_indentstring; + for (;;) { + last unless $post_para =~ m/^(\s*\S+)/; + unless (length($1) + length($thisline) < 75 || + length($thisline) == length($post_indentstring)) { + last; + } + $thisline .= $1; + $post_para= $'; + } + $post_para =~ s/^\s*//; + print POST $thisline,"\n"; + $post_indentstring= $post_nextindent; + last unless length($post_para); + } + $post_status= ''; $post_para= ''; +} + +sub post_endpara { + return unless $post_status eq 'p'; + &post_writepara; + print POST "\n"; +} + +sub post_endheading { + $post_para =~ s/\s*$//; + print POST "$post_para\n\n"; + $post_status= ''; + $post_para= ''; +} + +sub post_endmajorheading { &post_endheading(@_); } +sub post_endminorheading { &post_endheading(@_); } + +sub post_startverbatim { + $post_vstatus= $post_status; + &post_writepara; +} + +sub post_verbatim { + print POST $_[0],"\n"; +} + +sub post_endverbatim { + $post_status= $post_vstatus; +} + +sub post_finish { + close(POST); +} + +sub post_startindex { $post_status= ''; } +sub post_endindex { $post_status= 'p'; } + +sub post_endindexitem { + printf POST " %-11s %-.66s\n",$post_left,$post_para; + $post_status= 'p'; + $post_para= ''; +} + +sub post_startindexitem { + $post_left= $_[1]; +} + +sub post_startindexmainitem { + $post_left= $_[1]; + print POST "\n" if $post_status eq 'p'; +} + +sub post_startindent { + $post_istatus= $post_status; + &post_writepara; + $post_indentstring= " $post_indentstring"; + $post_nextindent= " $post_nextindent"; +} + +sub post_endindent { + $post_indentstring =~ s/^ //; + $post_nextindent =~ s/^ //; + $post_status= $post_istatus; +} + +sub post_startpackedlist { $post_plc=0; } +sub post_endpackedlist { &post_newline if !$post_plc; } +sub post_packeditem { + &post_newline if !$post_plc; + &post_tab($post_plc*40+5); + $post_plc= !$post_plc; +} + +sub post_startlist { + &post_endpara; + $post_indentstring= " $post_indentstring"; + $post_nextindent= " $post_nextindent"; +} + +sub post_endlist { + &post_endpara; + $post_indentstring =~ s/^ //; + $post_nextindent =~ s/^ //; +} + +sub post_item { + &post_newline; + $post_indentstring =~ s/ $/* /; +} + +sub post_pageref { + &post_text("Q$_[1] \`"); +} + +sub post_endpageref { + &post_text("'"); +} + +1; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,37 @@ +SUBDIRS = FAQ + +info_TEXINFOS = fftw3.texi +fftw3_TEXINFOS = acknowledgements.texi cindex.texi fftw3.texi findex.texi install.texi intro.texi legacy-fortran.texi license.texi modern-fortran.texi mpi.texi other.texi reference.texi threads.texi tutorial.texi upgrading.texi version.texi rfftwnd.pdf rfftwnd.eps + +DVIPS = dvips -Pwww + +EQN_IMAGES = equation-dft.png equation-dht.png equation-idft.png \ +equation-redft00.png equation-redft01.png equation-redft10.png \ +equation-redft11.png equation-rodft00.png equation-rodft01.png \ +equation-rodft10.png equation-rodft11.png + +EXTRA_DIST = f77_wisdom.f fftw3.pdf html rfftwnd.fig rfftwnd.eps \ +rfftwnd.pdf rfftwnd-for-html.png $(EQN_IMAGES) + +html: $(fftw3_TEXINFOS) $(EQN_IMAGES) rfftwnd-for-html.png + $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) \ + --html --number-sections -o html fftw3.texi + for i in $(EQN_IMAGES); do cp -f ${srcdir}/$$i html; done + cp -f ${srcdir}/rfftwnd-for-html.png html + +maintainer-clean-local: + rm -rf html + +if MAINTAINER_MODE +# generate the figure for the manual and distribute the binaries, so that +# people don't need to have fig2dev installed. +rfftwnd.eps: rfftwnd.fig + fig2dev -L eps -m .7 ${srcdir}/rfftwnd.fig rfftwnd.eps + +rfftwnd-for-html.png: rfftwnd.fig + fig2dev -L png -m 1 ${srcdir}/rfftwnd.fig rfftwnd-for-html.png + +rfftwnd.pdf: rfftwnd.fig + fig2dev -L pdf -m .7 ${srcdir}/rfftwnd.fig rfftwnd.pdf + +endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1004 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = doc +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(fftw3_TEXINFOS) mdate-sh $(srcdir)/version.texi \ + $(srcdir)/stamp-vti texinfo.tex +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +AM_V_DVIPS = $(am__v_DVIPS_@AM_V@) +am__v_DVIPS_ = $(am__v_DVIPS_@AM_DEFAULT_V@) +am__v_DVIPS_0 = @echo " DVIPS " $@; +am__v_DVIPS_1 = +AM_V_MAKEINFO = $(am__v_MAKEINFO_@AM_V@) +am__v_MAKEINFO_ = $(am__v_MAKEINFO_@AM_DEFAULT_V@) +am__v_MAKEINFO_0 = @echo " MAKEINFO" $@; +am__v_MAKEINFO_1 = +AM_V_INFOHTML = $(am__v_INFOHTML_@AM_V@) +am__v_INFOHTML_ = $(am__v_INFOHTML_@AM_DEFAULT_V@) +am__v_INFOHTML_0 = @echo " INFOHTML" $@; +am__v_INFOHTML_1 = +AM_V_TEXI2DVI = $(am__v_TEXI2DVI_@AM_V@) +am__v_TEXI2DVI_ = $(am__v_TEXI2DVI_@AM_DEFAULT_V@) +am__v_TEXI2DVI_0 = @echo " TEXI2DVI" $@; +am__v_TEXI2DVI_1 = +AM_V_TEXI2PDF = $(am__v_TEXI2PDF_@AM_V@) +am__v_TEXI2PDF_ = $(am__v_TEXI2PDF_@AM_DEFAULT_V@) +am__v_TEXI2PDF_0 = @echo " TEXI2PDF" $@; +am__v_TEXI2PDF_1 = +AM_V_texinfo = $(am__v_texinfo_@AM_V@) +am__v_texinfo_ = $(am__v_texinfo_@AM_DEFAULT_V@) +am__v_texinfo_0 = -q +am__v_texinfo_1 = +AM_V_texidevnull = $(am__v_texidevnull_@AM_V@) +am__v_texidevnull_ = $(am__v_texidevnull_@AM_DEFAULT_V@) +am__v_texidevnull_0 = > /dev/null +am__v_texidevnull_1 = +INFO_DEPS = $(srcdir)/fftw3.info +am__TEXINFO_TEX_DIR = $(srcdir) +DVIS = fftw3.dvi +PDFS = fftw3.pdf +PSS = fftw3.ps +HTMLS = fftw3.html +TEXINFOS = fftw3.texi +TEXI2DVI = texi2dvi +TEXI2PDF = $(TEXI2DVI) --pdf --batch +MAKEINFOHTML = $(MAKEINFO) --html +AM_MAKEINFOHTMLFLAGS = $(AM_MAKEINFOFLAGS) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__installdirs = "$(DESTDIR)$(infodir)" +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = FAQ +info_TEXINFOS = fftw3.texi +fftw3_TEXINFOS = acknowledgements.texi cindex.texi fftw3.texi findex.texi install.texi intro.texi legacy-fortran.texi license.texi modern-fortran.texi mpi.texi other.texi reference.texi threads.texi tutorial.texi upgrading.texi version.texi rfftwnd.pdf rfftwnd.eps +DVIPS = dvips -Pwww +EQN_IMAGES = equation-dft.png equation-dht.png equation-idft.png \ +equation-redft00.png equation-redft01.png equation-redft10.png \ +equation-redft11.png equation-rodft00.png equation-rodft01.png \ +equation-rodft10.png equation-rodft11.png + +EXTRA_DIST = f77_wisdom.f fftw3.pdf html rfftwnd.fig rfftwnd.eps \ +rfftwnd.pdf rfftwnd-for-html.png $(EQN_IMAGES) + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .dvi .html .info .pdf .ps .texi +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu doc/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu doc/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +.texi.info: + $(AM_V_MAKEINFO)restore=: && backupdir="$(am__leading_dot)am$$$$" && \ + am__cwd=`pwd` && $(am__cd) $(srcdir) && \ + rm -rf $$backupdir && mkdir $$backupdir && \ + if ($(MAKEINFO) --version) >/dev/null 2>&1; then \ + for f in $@ $@-[0-9] $@-[0-9][0-9] $(@:.info=).i[0-9] $(@:.info=).i[0-9][0-9]; do \ + if test -f $$f; then mv $$f $$backupdir; restore=mv; else :; fi; \ + done; \ + else :; fi && \ + cd "$$am__cwd"; \ + if $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) \ + -o $@ $<; \ + then \ + rc=0; \ + $(am__cd) $(srcdir); \ + else \ + rc=$$?; \ + $(am__cd) $(srcdir) && \ + $$restore $$backupdir/* `echo "./$@" | sed 's|[^/]*$$||'`; \ + fi; \ + rm -rf $$backupdir; exit $$rc + +.texi.dvi: + $(AM_V_TEXI2DVI)TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \ + MAKEINFO='$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir)' \ + $(TEXI2DVI) $(AM_V_texinfo) --build-dir=$(@:.dvi=.t2d) -o $@ $(AM_V_texidevnull) \ + $< + +.texi.pdf: + $(AM_V_TEXI2PDF)TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \ + MAKEINFO='$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir)' \ + $(TEXI2PDF) $(AM_V_texinfo) --build-dir=$(@:.pdf=.t2p) -o $@ $(AM_V_texidevnull) \ + $< + +.texi.html: + $(AM_V_MAKEINFO)rm -rf $(@:.html=.htp) + $(AM_V_at)if $(MAKEINFOHTML) $(AM_MAKEINFOHTMLFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) \ + -o $(@:.html=.htp) $<; \ + then \ + rm -rf $@ && mv $(@:.html=.htp) $@; \ + else \ + rm -rf $(@:.html=.htp); exit 1; \ + fi +$(srcdir)/fftw3.info: fftw3.texi $(srcdir)/version.texi $(fftw3_TEXINFOS) +fftw3.dvi: fftw3.texi $(srcdir)/version.texi $(fftw3_TEXINFOS) +fftw3.pdf: fftw3.texi $(srcdir)/version.texi $(fftw3_TEXINFOS) +fftw3.html: fftw3.texi $(srcdir)/version.texi $(fftw3_TEXINFOS) +$(srcdir)/version.texi: @MAINTAINER_MODE_TRUE@ $(srcdir)/stamp-vti +$(srcdir)/stamp-vti: fftw3.texi $(top_srcdir)/configure + @(dir=.; test -f ./fftw3.texi || dir=$(srcdir); \ + set `$(SHELL) $(srcdir)/mdate-sh $$dir/fftw3.texi`; \ + echo "@set UPDATED $$1 $$2 $$3"; \ + echo "@set UPDATED-MONTH $$2 $$3"; \ + echo "@set EDITION $(VERSION)"; \ + echo "@set VERSION $(VERSION)") > vti.tmp + @cmp -s vti.tmp $(srcdir)/version.texi \ + || (echo "Updating $(srcdir)/version.texi"; \ + cp vti.tmp $(srcdir)/version.texi) + -@rm -f vti.tmp + @cp $(srcdir)/version.texi $@ + +mostlyclean-vti: + -rm -f vti.tmp + +maintainer-clean-vti: +@MAINTAINER_MODE_TRUE@ -rm -f $(srcdir)/stamp-vti $(srcdir)/version.texi +.dvi.ps: + $(AM_V_DVIPS)TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \ + $(DVIPS) $(AM_V_texinfo) -o $@ $< + +uninstall-dvi-am: + @$(NORMAL_UNINSTALL) + @list='$(DVIS)'; test -n "$(dvidir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(dvidir)/$$f'"; \ + rm -f "$(DESTDIR)$(dvidir)/$$f"; \ + done + +uninstall-html-am: + @$(NORMAL_UNINSTALL) + @list='$(HTMLS)'; test -n "$(htmldir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -rf '$(DESTDIR)$(htmldir)/$$f'"; \ + rm -rf "$(DESTDIR)$(htmldir)/$$f"; \ + done + +uninstall-info-am: + @$(PRE_UNINSTALL) + @if test -d '$(DESTDIR)$(infodir)' && $(am__can_run_installinfo); then \ + list='$(INFO_DEPS)'; \ + for file in $$list; do \ + relfile=`echo "$$file" | sed 's|^.*/||'`; \ + echo " install-info --info-dir='$(DESTDIR)$(infodir)' --remove '$(DESTDIR)$(infodir)/$$relfile'"; \ + if install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$$relfile"; \ + then :; else test ! -f "$(DESTDIR)$(infodir)/$$relfile" || exit 1; fi; \ + done; \ + else :; fi + @$(NORMAL_UNINSTALL) + @list='$(INFO_DEPS)'; \ + for file in $$list; do \ + relfile=`echo "$$file" | sed 's|^.*/||'`; \ + relfile_i=`echo "$$relfile" | sed 's|\.info$$||;s|$$|.i|'`; \ + (if test -d "$(DESTDIR)$(infodir)" && cd "$(DESTDIR)$(infodir)"; then \ + echo " cd '$(DESTDIR)$(infodir)' && rm -f $$relfile $$relfile-[0-9] $$relfile-[0-9][0-9] $$relfile_i[0-9] $$relfile_i[0-9][0-9]"; \ + rm -f $$relfile $$relfile-[0-9] $$relfile-[0-9][0-9] $$relfile_i[0-9] $$relfile_i[0-9][0-9]; \ + else :; fi); \ + done + +uninstall-pdf-am: + @$(NORMAL_UNINSTALL) + @list='$(PDFS)'; test -n "$(pdfdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(pdfdir)/$$f'"; \ + rm -f "$(DESTDIR)$(pdfdir)/$$f"; \ + done + +uninstall-ps-am: + @$(NORMAL_UNINSTALL) + @list='$(PSS)'; test -n "$(psdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(psdir)/$$f'"; \ + rm -f "$(DESTDIR)$(psdir)/$$f"; \ + done + +dist-info: $(INFO_DEPS) + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + list='$(INFO_DEPS)'; \ + for base in $$list; do \ + case $$base in \ + $(srcdir)/*) base=`echo "$$base" | sed "s|^$$srcdirstrip/||"`;; \ + esac; \ + if test -f $$base; then d=.; else d=$(srcdir); fi; \ + base_i=`echo "$$base" | sed 's|\.info$$||;s|$$|.i|'`; \ + for file in $$d/$$base $$d/$$base-[0-9] $$d/$$base-[0-9][0-9] $$d/$$base_i[0-9] $$d/$$base_i[0-9][0-9]; do \ + if test -f $$file; then \ + relfile=`expr "$$file" : "$$d/\(.*\)"`; \ + test -f "$(distdir)/$$relfile" || \ + cp -p $$file "$(distdir)/$$relfile"; \ + else :; fi; \ + done; \ + done + +mostlyclean-aminfo: + -rm -rf fftw3.t2d fftw3.t2p + +clean-aminfo: + -test -z "fftw3.dvi fftw3.pdf fftw3.ps fftw3.html" \ + || rm -rf fftw3.dvi fftw3.pdf fftw3.ps fftw3.html + +maintainer-clean-aminfo: + @list='$(INFO_DEPS)'; for i in $$list; do \ + i_i=`echo "$$i" | sed 's|\.info$$||;s|$$|.i|'`; \ + echo " rm -f $$i $$i-[0-9] $$i-[0-9][0-9] $$i_i[0-9] $$i_i[0-9][0-9]"; \ + rm -f $$i $$i-[0-9] $$i-[0-9][0-9] $$i_i[0-9] $$i_i[0-9][0-9]; \ + done + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$(top_distdir)" distdir="$(distdir)" \ + dist-info +check-am: all-am +check: check-recursive +all-am: Makefile $(INFO_DEPS) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(infodir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-aminfo clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: $(DVIS) + +html-am: $(HTMLS) + +info: info-recursive + +info-am: $(INFO_DEPS) + +install-data-am: install-info-am + +install-dvi: install-dvi-recursive + +install-dvi-am: $(DVIS) + @$(NORMAL_INSTALL) + @list='$(DVIS)'; test -n "$(dvidir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(dvidir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(dvidir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(dvidir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(dvidir)" || exit $$?; \ + done +install-exec-am: + +install-html: install-html-recursive + +install-html-am: $(HTMLS) + @$(NORMAL_INSTALL) + @list='$(HTMLS)'; list2=; test -n "$(htmldir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(htmldir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p" || test -d "$$p"; then d=; else d="$(srcdir)/"; fi; \ + $(am__strip_dir) \ + d2=$$d$$p; \ + if test -d "$$d2"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)/$$f'"; \ + $(MKDIR_P) "$(DESTDIR)$(htmldir)/$$f" || exit 1; \ + echo " $(INSTALL_DATA) '$$d2'/* '$(DESTDIR)$(htmldir)/$$f'"; \ + $(INSTALL_DATA) "$$d2"/* "$(DESTDIR)$(htmldir)/$$f" || exit $$?; \ + else \ + list2="$$list2 $$d2"; \ + fi; \ + done; \ + test -z "$$list2" || { echo "$$list2" | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(htmldir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(htmldir)" || exit $$?; \ + done; } +install-info: install-info-recursive + +install-info-am: $(INFO_DEPS) + @$(NORMAL_INSTALL) + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + list='$(INFO_DEPS)'; test -n "$(infodir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(infodir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(infodir)" || exit 1; \ + fi; \ + for file in $$list; do \ + case $$file in \ + $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \ + esac; \ + if test -f $$file; then d=.; else d=$(srcdir); fi; \ + file_i=`echo "$$file" | sed 's|\.info$$||;s|$$|.i|'`; \ + for ifile in $$d/$$file $$d/$$file-[0-9] $$d/$$file-[0-9][0-9] \ + $$d/$$file_i[0-9] $$d/$$file_i[0-9][0-9] ; do \ + if test -f $$ifile; then \ + echo "$$ifile"; \ + else : ; fi; \ + done; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(infodir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(infodir)" || exit $$?; done + @$(POST_INSTALL) + @if $(am__can_run_installinfo); then \ + list='$(INFO_DEPS)'; test -n "$(infodir)" || list=; \ + for file in $$list; do \ + relfile=`echo "$$file" | sed 's|^.*/||'`; \ + echo " install-info --info-dir='$(DESTDIR)$(infodir)' '$(DESTDIR)$(infodir)/$$relfile'";\ + install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$$relfile" || :;\ + done; \ + else : ; fi +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: $(PDFS) + @$(NORMAL_INSTALL) + @list='$(PDFS)'; test -n "$(pdfdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pdfdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pdfdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pdfdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(pdfdir)" || exit $$?; done +install-ps: install-ps-recursive + +install-ps-am: $(PSS) + @$(NORMAL_INSTALL) + @list='$(PSS)'; test -n "$(psdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(psdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(psdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(psdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(psdir)" || exit $$?; done +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-aminfo \ + maintainer-clean-generic maintainer-clean-local \ + maintainer-clean-vti + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-aminfo mostlyclean-generic \ + mostlyclean-libtool mostlyclean-vti + +pdf: pdf-recursive + +pdf-am: $(PDFS) + +ps: ps-recursive + +ps-am: $(PSS) + +uninstall-am: uninstall-dvi-am uninstall-html-am uninstall-info-am \ + uninstall-pdf-am uninstall-ps-am + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-aminfo clean-generic clean-libtool \ + cscopelist-am ctags ctags-am dist-info distclean \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-aminfo \ + maintainer-clean-generic maintainer-clean-local \ + maintainer-clean-vti mostlyclean mostlyclean-aminfo \ + mostlyclean-generic mostlyclean-libtool mostlyclean-vti pdf \ + pdf-am ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-dvi-am uninstall-html-am uninstall-info-am \ + uninstall-pdf-am uninstall-ps-am + + +html: $(fftw3_TEXINFOS) $(EQN_IMAGES) rfftwnd-for-html.png + $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) \ + --html --number-sections -o html fftw3.texi + for i in $(EQN_IMAGES); do cp -f ${srcdir}/$$i html; done + cp -f ${srcdir}/rfftwnd-for-html.png html + +maintainer-clean-local: + rm -rf html + +# generate the figure for the manual and distribute the binaries, so that +# people don't need to have fig2dev installed. +@MAINTAINER_MODE_TRUE@rfftwnd.eps: rfftwnd.fig +@MAINTAINER_MODE_TRUE@ fig2dev -L eps -m .7 ${srcdir}/rfftwnd.fig rfftwnd.eps + +@MAINTAINER_MODE_TRUE@rfftwnd-for-html.png: rfftwnd.fig +@MAINTAINER_MODE_TRUE@ fig2dev -L png -m 1 ${srcdir}/rfftwnd.fig rfftwnd-for-html.png + +@MAINTAINER_MODE_TRUE@rfftwnd.pdf: rfftwnd.fig +@MAINTAINER_MODE_TRUE@ fig2dev -L pdf -m .7 ${srcdir}/rfftwnd.fig rfftwnd.pdf + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/acknowledgements.texi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/acknowledgements.texi Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,89 @@ +@node Acknowledgments, License and Copyright, Installation and Customization, Top +@chapter Acknowledgments + +Matteo Frigo was supported in part by the Special Research Program SFB +F011 ``AURORA'' of the Austrian Science Fund FWF and by MIT Lincoln +Laboratory. For previous versions of FFTW, he was supported in part by the +Defense Advanced Research Projects Agency (DARPA), under Grants +N00014-94-1-0985 and F30602-97-1-0270, and by a Digital Equipment +Corporation Fellowship. + +Steven G. Johnson was supported in part by a Dept.@ of Defense NDSEG +Fellowship, an MIT Karl Taylor Compton Fellowship, and by the Materials +Research Science and Engineering Center program of the National Science +Foundation under award DMR-9400334. + +Code for the Cell Broadband Engine was graciously donated to the FFTW +project by the IBM Austin Research Lab and included in fftw-3.2. (This +code was removed in fftw-3.3.) + +Code for the MIPS paired-single SIMD support was graciously donated to +the FFTW project by CodeSourcery, Inc. + +We are grateful to Sun Microsystems Inc.@ for its donation of a +cluster of 9 8-processor Ultra HPC 5000 SMPs (24 Gflops peak). These +machines served as the primary platform for the development of early +versions of FFTW. + +We thank Intel Corporation for donating a four-processor Pentium Pro +machine. We thank the GNU/Linux community for giving us a decent OS to +run on that machine. + +We are thankful to the AMD corporation for donating an AMD Athlon XP 1700+ +computer to the FFTW project. + +We thank the Compaq/HP testdrive program and VA Software Corporation +(SourceForge.net) for providing remote access to machines that were used +to test FFTW. + +The @code{genfft} suite of code generators was written using Objective +Caml, a dialect of ML. Objective Caml is a small and elegant language +developed by Xavier Leroy. The implementation is available from +@uref{http://caml.inria.fr/, @code{http://caml.inria.fr/}}. In previous +releases of FFTW, @code{genfft} was written in Caml Light, by the same +authors. An even earlier implementation of @code{genfft} was written in +Scheme, but Caml is definitely better for this kind of application. +@cindex Caml +@cindex LISP + + +FFTW uses many tools from the GNU project, including @code{automake}, +@code{texinfo}, and @code{libtool}. + +Prof.@ Charles E.@ Leiserson of MIT provided continuous support and +encouragement. This program would not exist without him. Charles also +proposed the name ``codelets'' for the basic FFT blocks. +@cindex codelet + + +Prof.@ John D.@ Joannopoulos of MIT demonstrated continuing tolerance of +Steven's ``extra-curricular'' computer-science activities, as well as +remarkable creativity in working them into his grant proposals. +Steven's physics degree would not exist without him. + +Franz Franchetti wrote SIMD extensions to FFTW 2, which eventually +led to the SIMD support in FFTW 3. + +Stefan Kral wrote most of the K7 code generator distributed with FFTW +3.0.x and 3.1.x. + +Andrew Sterian contributed the Windows timing code in FFTW 2. + +Didier Miras reported a bug in the test procedure used in FFTW 1.2. We +now use a completely different test algorithm by Funda Ergun that does +not require a separate FFT program to compare against. + +Wolfgang Reimer contributed the Pentium cycle counter and a few fixes +that help portability. + +Ming-Chang Liu uncovered a well-hidden bug in the complex transforms of +FFTW 2.0 and supplied a patch to correct it. + +The FFTW FAQ was written in @code{bfnn} (Bizarre Format With No Name) +and formatted using the tools developed by Ian Jackson for the Linux +FAQ. + +@emph{We are especially thankful to all of our users for their +continuing support, feedback, and interest during our development of +FFTW.} + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/cindex.texi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/cindex.texi Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +@node Concept Index, Library Index, License and Copyright, Top +@chapter Concept Index +@printindex cp diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/equation-dft.png Binary file fft/fftw/fftw-3.3.4/doc/equation-dft.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/equation-dht.png Binary file fft/fftw/fftw-3.3.4/doc/equation-dht.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/equation-idft.png Binary file fft/fftw/fftw-3.3.4/doc/equation-idft.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/equation-redft00.png Binary file fft/fftw/fftw-3.3.4/doc/equation-redft00.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/equation-redft01.png Binary file fft/fftw/fftw-3.3.4/doc/equation-redft01.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/equation-redft10.png Binary file fft/fftw/fftw-3.3.4/doc/equation-redft10.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/equation-redft11.png Binary file fft/fftw/fftw-3.3.4/doc/equation-redft11.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/equation-rodft00.png Binary file fft/fftw/fftw-3.3.4/doc/equation-rodft00.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/equation-rodft01.png Binary file fft/fftw/fftw-3.3.4/doc/equation-rodft01.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/equation-rodft10.png Binary file fft/fftw/fftw-3.3.4/doc/equation-rodft10.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/equation-rodft11.png Binary file fft/fftw/fftw-3.3.4/doc/equation-rodft11.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/f77_wisdom.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/f77_wisdom.f Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,79 @@ +c Copyright (c) 2003, 2007-14 Matteo Frigo +c Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology +c +c This program is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this program; if not, write to the Free Software +c Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +c +cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc +c +c This is an example implementation of Fortran wisdom export/import +c to/from a Fortran unit (file), exploiting the generic +c dfftw_export_wisdom/dfftw_import_wisdom functions. +c +c We cannot compile this file into the FFTW library itself, lest all +c FFTW-calling programs be required to link to the Fortran I/O +c libraries. +c +cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc + +c Strictly speaking, the '$' format specifier, which allows us to +c write a character without a trailing newline, is not standard F77. +c However, it seems to be a nearly universal extension. + subroutine write_char(c, iunit) + character c + integer iunit + write(iunit,321) c + 321 format(a,$) + end + + subroutine export_wisdom_to_file(iunit) + integer iunit + external write_char + call dfftw_export_wisdom(write_char, iunit) + end + +c Fortran 77 does not have any portable way to read an arbitrary +c file one character at a time. The best alternative seems to be to +c read a whole line into a buffer, since for fftw-exported wisdom we +c can bound the line length. (If the file contains longer lines, +c then the lines will be truncated and the wisdom import should +c simply fail.) Ugh. + subroutine read_char(ic, iunit) + integer ic + integer iunit + character*256 buf + save buf + integer ibuf + data ibuf/257/ + save ibuf + if (ibuf .lt. 257) then + ic = ichar(buf(ibuf:ibuf)) + ibuf = ibuf + 1 + return + endif + read(iunit,123,end=666) buf + ic = ichar(buf(1:1)) + ibuf = 2 + return + 666 ic = -1 + ibuf = 257 + 123 format(a256) + end + + subroutine import_wisdom_from_file(isuccess, iunit) + integer isuccess + integer iunit + external read_char + call dfftw_import_wisdom(isuccess, read_char, iunit) + end diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/fftw3.info --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/fftw3.info Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,165 @@ +This is fftw3.info, produced by makeinfo version 4.13 from fftw3.texi. + +This manual is for FFTW (version 3.3.4, 20 September 2013). + + Copyright (C) 2003 Matteo Frigo. + + Copyright (C) 2003 Massachusetts Institute of Technology. + + Permission is granted to make and distribute verbatim copies of + this manual provided the copyright notice and this permission + notice are preserved on all copies. + + Permission is granted to copy and distribute modified versions of + this manual under the conditions for verbatim copying, provided + that the entire resulting derived work is distributed under the + terms of a permission notice identical to this one. + + Permission is granted to copy and distribute translations of this + manual into another language, under the above conditions for + modified versions, except that this permission notice may be + stated in a translation approved by the Free Software Foundation. + +INFO-DIR-SECTION Development +START-INFO-DIR-ENTRY +* fftw3: (fftw3). FFTW User's Manual. +END-INFO-DIR-ENTRY + + +Indirect: +fftw3.info-1: 1060 +fftw3.info-2: 298053 + +Tag Table: +(Indirect) +Node: Top1060 +Node: Introduction1733 +Node: Tutorial8069 +Ref: Tutorial-Footnote-19313 +Node: Complex One-Dimensional DFTs9407 +Node: Complex Multi-Dimensional DFTs15163 +Ref: Complex Multi-Dimensional DFTs-Footnote-118595 +Node: One-Dimensional DFTs of Real Data18730 +Node: Multi-Dimensional DFTs of Real Data23175 +Node: More DFTs of Real Data27105 +Node: The Halfcomplex-format DFT30607 +Node: Real even/odd DFTs (cosine/sine transforms)33216 +Ref: Real even/odd DFTs (cosine/sine transforms)-Footnote-138826 +Ref: Real even/odd DFTs (cosine/sine transforms)-Footnote-239015 +Node: The Discrete Hartley Transform39948 +Ref: The Discrete Hartley Transform-Footnote-142133 +Node: Other Important Topics42382 +Node: SIMD alignment and fftw_malloc42675 +Node: Multi-dimensional Array Format44935 +Node: Row-major Format45556 +Node: Column-major Format47249 +Node: Fixed-size Arrays in C48333 +Node: Dynamic Arrays in C49769 +Node: Dynamic Arrays in C-The Wrong Way51407 +Node: Words of Wisdom-Saving Plans53155 +Node: Caveats in Using Wisdom55830 +Node: FFTW Reference57918 +Node: Data Types and Files58406 +Node: Complex numbers58838 +Node: Precision60579 +Node: Memory Allocation62141 +Node: Using Plans63712 +Node: Basic Interface67752 +Ref: Basic Interface-Footnote-168496 +Node: Complex DFTs68560 +Node: Planner Flags72527 +Node: Real-data DFTs77982 +Node: Real-data DFT Array Format82978 +Node: Real-to-Real Transforms85233 +Node: Real-to-Real Transform Kinds89203 +Node: Advanced Interface91671 +Node: Advanced Complex DFTs92411 +Node: Advanced Real-data DFTs96670 +Node: Advanced Real-to-real Transforms98997 +Node: Guru Interface100103 +Node: Interleaved and split arrays101026 +Node: Guru vector and transform sizes102069 +Node: Guru Complex DFTs104634 +Node: Guru Real-data DFTs107470 +Node: Guru Real-to-real Transforms110393 +Node: 64-bit Guru Interface111712 +Node: New-array Execute Functions114035 +Node: Wisdom118534 +Node: Wisdom Export118893 +Node: Wisdom Import120867 +Node: Forgetting Wisdom122889 +Node: Wisdom Utilities123261 +Node: What FFTW Really Computes124628 +Node: The 1d Discrete Fourier Transform (DFT)125453 +Node: The 1d Real-data DFT126812 +Node: 1d Real-even DFTs (DCTs)128466 +Node: 1d Real-odd DFTs (DSTs)131675 +Node: 1d Discrete Hartley Transforms (DHTs)134617 +Node: Multi-dimensional Transforms135293 +Node: Multi-threaded FFTW137896 +Node: Installation and Supported Hardware/Software139365 +Node: Usage of Multi-threaded FFTW141190 +Node: How Many Threads to Use?144498 +Node: Thread safety145522 +Node: Distributed-memory FFTW with MPI147690 +Node: FFTW MPI Installation150269 +Node: Linking and Initializing MPI FFTW152061 +Node: 2d MPI example153291 +Node: MPI Data Distribution157527 +Node: Basic and advanced distribution interfaces160405 +Node: Load balancing164840 +Node: Transposed distributions166526 +Node: One-dimensional distributions170298 +Node: Multi-dimensional MPI DFTs of Real Data172867 +Node: Other Multi-dimensional Real-data MPI Transforms177515 +Node: FFTW MPI Transposes179688 +Node: Basic distributed-transpose interface180528 +Node: Advanced distributed-transpose interface182712 +Node: An improved replacement for MPI_Alltoall184000 +Node: FFTW MPI Wisdom185976 +Ref: FFTW MPI Wisdom-Footnote-1188719 +Node: Avoiding MPI Deadlocks189632 +Node: FFTW MPI Performance Tips190661 +Node: Combining MPI and Threads192130 +Node: FFTW MPI Reference195601 +Node: MPI Files and Data Types196180 +Node: MPI Initialization197176 +Node: Using MPI Plans198275 +Node: MPI Data Distribution Functions200101 +Node: MPI Plan Creation205557 +Node: MPI Wisdom Communication216234 +Node: FFTW MPI Fortran Interface217160 +Ref: FFTW MPI Fortran Interface-Footnote-1223189 +Node: Calling FFTW from Modern Fortran223596 +Node: Overview of Fortran interface224947 +Node: Extended and quadruple precision in Fortran228399 +Node: Reversing array dimensions229780 +Node: FFTW Fortran type reference233315 +Node: Plan execution in Fortran237802 +Node: Allocating aligned memory in Fortran240698 +Node: Accessing the wisdom API from Fortran244062 +Node: Wisdom File Export/Import from Fortran244839 +Node: Wisdom String Export/Import from Fortran246501 +Node: Wisdom Generic Export/Import from Fortran248489 +Node: Defining an FFTW module250719 +Node: Calling FFTW from Legacy Fortran251788 +Node: Fortran-interface routines253345 +Ref: Fortran-interface routines-Footnote-1257003 +Ref: Fortran-interface routines-Footnote-2257206 +Node: FFTW Constants in Fortran257339 +Node: FFTW Execution in Fortran258494 +Node: Fortran Examples261250 +Node: Wisdom of Fortran?264669 +Node: Upgrading from FFTW version 2266349 +Ref: Upgrading from FFTW version 2-Footnote-1275972 +Node: Installation and Customization276155 +Node: Installation on Unix277799 +Node: Installation on non-Unix systems286462 +Node: Cycle Counters288677 +Node: Generating your own code290429 +Node: Acknowledgments292464 +Node: License and Copyright296184 +Node: Concept Index298053 +Node: Library Index334695 + +End Tag Table diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/fftw3.info-1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/fftw3.info-1 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,6294 @@ +This is fftw3.info, produced by makeinfo version 4.13 from fftw3.texi. + +This manual is for FFTW (version 3.3.4, 20 September 2013). + + Copyright (C) 2003 Matteo Frigo. + + Copyright (C) 2003 Massachusetts Institute of Technology. + + Permission is granted to make and distribute verbatim copies of + this manual provided the copyright notice and this permission + notice are preserved on all copies. + + Permission is granted to copy and distribute modified versions of + this manual under the conditions for verbatim copying, provided + that the entire resulting derived work is distributed under the + terms of a permission notice identical to this one. + + Permission is granted to copy and distribute translations of this + manual into another language, under the above conditions for + modified versions, except that this permission notice may be + stated in a translation approved by the Free Software Foundation. + +INFO-DIR-SECTION Development +START-INFO-DIR-ENTRY +* fftw3: (fftw3). FFTW User's Manual. +END-INFO-DIR-ENTRY + + +File: fftw3.info, Node: Top, Next: Introduction, Prev: (dir), Up: (dir) + +FFTW User Manual +**************** + +Welcome to FFTW, the Fastest Fourier Transform in the West. FFTW is a +collection of fast C routines to compute the discrete Fourier transform. +This manual documents FFTW version 3.3.4. + +* Menu: + +* Introduction:: +* Tutorial:: +* Other Important Topics:: +* FFTW Reference:: +* Multi-threaded FFTW:: +* Distributed-memory FFTW with MPI:: +* Calling FFTW from Modern Fortran:: +* Calling FFTW from Legacy Fortran:: +* Upgrading from FFTW version 2:: +* Installation and Customization:: +* Acknowledgments:: +* License and Copyright:: +* Concept Index:: +* Library Index:: + + +File: fftw3.info, Node: Introduction, Next: Tutorial, Prev: Top, Up: Top + +1 Introduction +************** + +This manual documents version 3.3.4 of FFTW, the _Fastest Fourier +Transform in the West_. FFTW is a comprehensive collection of fast C +routines for computing the discrete Fourier transform (DFT) and various +special cases thereof. + * FFTW computes the DFT of complex data, real data, even- or + odd-symmetric real data (these symmetric transforms are usually + known as the discrete cosine or sine transform, respectively), and + the discrete Hartley transform (DHT) of real data. + + * The input data can have arbitrary length. FFTW employs O(n + log n) algorithms for all lengths, including prime numbers. + + * FFTW supports arbitrary multi-dimensional data. + + * FFTW supports the SSE, SSE2, AVX, Altivec, and MIPS PS instruction + sets. + + * FFTW includes parallel (multi-threaded) transforms for + shared-memory systems. + + * Starting with version 3.3, FFTW includes distributed-memory + parallel transforms using MPI. + + We assume herein that you are familiar with the properties and uses +of the DFT that are relevant to your application. Otherwise, see e.g. +`The Fast Fourier Transform and Its Applications' by E. O. Brigham +(Prentice-Hall, Englewood Cliffs, NJ, 1988). Our web page +(http://www.fftw.org) also has links to FFT-related information online. + + In order to use FFTW effectively, you need to learn one basic concept +of FFTW's internal structure: FFTW does not use a fixed algorithm for +computing the transform, but instead it adapts the DFT algorithm to +details of the underlying hardware in order to maximize performance. +Hence, the computation of the transform is split into two phases. +First, FFTW's "planner" "learns" the fastest way to compute the +transform on your machine. The planner produces a data structure +called a "plan" that contains this information. Subsequently, the plan +is "executed" to transform the array of input data as dictated by the +plan. The plan can be reused as many times as needed. In typical +high-performance applications, many transforms of the same size are +computed and, consequently, a relatively expensive initialization of +this sort is acceptable. On the other hand, if you need a single +transform of a given size, the one-time cost of the planner becomes +significant. For this case, FFTW provides fast planners based on +heuristics or on previously computed plans. + + FFTW supports transforms of data with arbitrary length, rank, +multiplicity, and a general memory layout. In simple cases, however, +this generality may be unnecessary and confusing. Consequently, we +organized the interface to FFTW into three levels of increasing +generality. + * The "basic interface" computes a single transform of + contiguous data. + + * The "advanced interface" computes transforms of multiple or + strided arrays. + + * The "guru interface" supports the most general data layouts, + multiplicities, and strides. + We expect that most users will be best served by the basic interface, +whereas the guru interface requires careful attention to the +documentation to avoid problems. + + Besides the automatic performance adaptation performed by the +planner, it is also possible for advanced users to customize FFTW +manually. For example, if code space is a concern, we provide a tool +that links only the subset of FFTW needed by your application. +Conversely, you may need to extend FFTW because the standard +distribution is not sufficient for your needs. For example, the +standard FFTW distribution works most efficiently for arrays whose size +can be factored into small primes (2, 3, 5, and 7), and otherwise it +uses a slower general-purpose routine. If you need efficient +transforms of other sizes, you can use FFTW's code generator, which +produces fast C programs ("codelets") for any particular array size you +may care about. For example, if you need transforms of size 513 = 19 x +3^3, you can customize FFTW to support the factor 19 efficiently. + + For more information regarding FFTW, see the paper, "The Design and +Implementation of FFTW3," by M. Frigo and S. G. Johnson, which was an +invited paper in `Proc. IEEE' 93 (2), p. 216 (2005). The code +generator is described in the paper "A fast Fourier transform compiler", by +M. Frigo, in the `Proceedings of the 1999 ACM SIGPLAN Conference on +Programming Language Design and Implementation (PLDI), Atlanta, +Georgia, May 1999'. These papers, along with the latest version of +FFTW, the FAQ, benchmarks, and other links, are available at the FFTW +home page (http://www.fftw.org). + + The current version of FFTW incorporates many good ideas from the +past thirty years of FFT literature. In one way or another, FFTW uses +the Cooley-Tukey algorithm, the prime factor algorithm, Rader's +algorithm for prime sizes, and a split-radix algorithm (with a +"conjugate-pair" variation pointed out to us by Dan Bernstein). FFTW's +code generator also produces new algorithms that we do not completely +understand. The reader is referred to the cited papers for the +appropriate references. + + The rest of this manual is organized as follows. We first discuss +the sequential (single-processor) implementation. We start by +describing the basic interface/features of FFTW in *note Tutorial::. +Next, *note Other Important Topics:: discusses data alignment (*note +SIMD alignment and fftw_malloc::), the storage scheme of +multi-dimensional arrays (*note Multi-dimensional Array Format::), and +FFTW's mechanism for storing plans on disk (*note Words of +Wisdom-Saving Plans::). Next, *note FFTW Reference:: provides +comprehensive documentation of all FFTW's features. Parallel +transforms are discussed in their own chapters: *note Multi-threaded +FFTW:: and *note Distributed-memory FFTW with MPI::. Fortran +programmers can also use FFTW, as described in *note Calling FFTW from +Legacy Fortran:: and *note Calling FFTW from Modern Fortran::. *note +Installation and Customization:: explains how to install FFTW in your +computer system and how to adapt FFTW to your needs. License and +copyright information is given in *note License and Copyright::. +Finally, we thank all the people who helped us in *note +Acknowledgments::. + + +File: fftw3.info, Node: Tutorial, Next: Other Important Topics, Prev: Introduction, Up: Top + +2 Tutorial +********** + +* Menu: + +* Complex One-Dimensional DFTs:: +* Complex Multi-Dimensional DFTs:: +* One-Dimensional DFTs of Real Data:: +* Multi-Dimensional DFTs of Real Data:: +* More DFTs of Real Data:: + + This chapter describes the basic usage of FFTW, i.e., how to compute the +Fourier transform of a single array. This chapter tells the truth, but +not the _whole_ truth. Specifically, FFTW implements additional +routines and flags that are not documented here, although in many cases +we try to indicate where added capabilities exist. For more complete +information, see *note FFTW Reference::. (Note that you need to +compile and install FFTW before you can use it in a program. For the +details of the installation, see *note Installation and +Customization::.) + + We recommend that you read this tutorial in order.(1) At the least, +read the first section (*note Complex One-Dimensional DFTs::) before +reading any of the others, even if your main interest lies in one of +the other transform types. + + Users of FFTW version 2 and earlier may also want to read *note +Upgrading from FFTW version 2::. + + ---------- Footnotes ---------- + + (1) You can read the tutorial in bit-reversed order after computing +your first transform. + + +File: fftw3.info, Node: Complex One-Dimensional DFTs, Next: Complex Multi-Dimensional DFTs, Prev: Tutorial, Up: Tutorial + +2.1 Complex One-Dimensional DFTs +================================ + + Plan: To bother about the best method of accomplishing an + accidental result. [Ambrose Bierce, `The Enlarged Devil's + Dictionary'.] + + The basic usage of FFTW to compute a one-dimensional DFT of size `N' +is simple, and it typically looks something like this code: + + #include + ... + { + fftw_complex *in, *out; + fftw_plan p; + ... + in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N); + out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N); + p = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE); + ... + fftw_execute(p); /* repeat as needed */ + ... + fftw_destroy_plan(p); + fftw_free(in); fftw_free(out); + } + + You must link this code with the `fftw3' library. On Unix systems, +link with `-lfftw3 -lm'. + + The example code first allocates the input and output arrays. You +can allocate them in any way that you like, but we recommend using +`fftw_malloc', which behaves like `malloc' except that it properly +aligns the array when SIMD instructions (such as SSE and Altivec) are +available (*note SIMD alignment and fftw_malloc::). [Alternatively, we +provide a convenient wrapper function `fftw_alloc_complex(N)' which has +the same effect.] + + The data is an array of type `fftw_complex', which is by default a +`double[2]' composed of the real (`in[i][0]') and imaginary +(`in[i][1]') parts of a complex number. + + The next step is to create a "plan", which is an object that +contains all the data that FFTW needs to compute the FFT. This +function creates the plan: + + fftw_plan fftw_plan_dft_1d(int n, fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); + + The first argument, `n', is the size of the transform you are trying +to compute. The size `n' can be any positive integer, but sizes that +are products of small factors are transformed most efficiently +(although prime sizes still use an O(n log n) algorithm). + + The next two arguments are pointers to the input and output arrays of +the transform. These pointers can be equal, indicating an "in-place" +transform. + + The fourth argument, `sign', can be either `FFTW_FORWARD' (`-1') or +`FFTW_BACKWARD' (`+1'), and indicates the direction of the transform +you are interested in; technically, it is the sign of the exponent in +the transform. + + The `flags' argument is usually either `FFTW_MEASURE' or `FFTW_ESTIMATE'. +`FFTW_MEASURE' instructs FFTW to run and measure the execution time of +several FFTs in order to find the best way to compute the transform of +size `n'. This process takes some time (usually a few seconds), +depending on your machine and on the size of the transform. +`FFTW_ESTIMATE', on the contrary, does not run any computation and just +builds a reasonable plan that is probably sub-optimal. In short, if +your program performs many transforms of the same size and +initialization time is not important, use `FFTW_MEASURE'; otherwise use +the estimate. + + _You must create the plan before initializing the input_, because +`FFTW_MEASURE' overwrites the `in'/`out' arrays. (Technically, +`FFTW_ESTIMATE' does not touch your arrays, but you should always +create plans first just to be sure.) + + Once the plan has been created, you can use it as many times as you +like for transforms on the specified `in'/`out' arrays, computing the +actual transforms via `fftw_execute(plan)': + void fftw_execute(const fftw_plan plan); + + The DFT results are stored in-order in the array `out', with the +zero-frequency (DC) component in `out[0]'. If `in != out', the +transform is "out-of-place" and the input array `in' is not modified. +Otherwise, the input array is overwritten with the transform. + + If you want to transform a _different_ array of the same size, you +can create a new plan with `fftw_plan_dft_1d' and FFTW automatically +reuses the information from the previous plan, if possible. +Alternatively, with the "guru" interface you can apply a given plan to +a different array, if you are careful. *Note FFTW Reference::. + + When you are done with the plan, you deallocate it by calling +`fftw_destroy_plan(plan)': + void fftw_destroy_plan(fftw_plan plan); + If you allocate an array with `fftw_malloc()' you must deallocate it +with `fftw_free()'. Do not use `free()' or, heaven forbid, `delete'. + + FFTW computes an _unnormalized_ DFT. Thus, computing a forward +followed by a backward transform (or vice versa) results in the original +array scaled by `n'. For the definition of the DFT, see *note What +FFTW Really Computes::. + + If you have a C compiler, such as `gcc', that supports the C99 +standard, and you `#include ' _before_ `', then +`fftw_complex' is the native double-precision complex type and you can +manipulate it with ordinary arithmetic. Otherwise, FFTW defines its +own complex type, which is bit-compatible with the C99 complex type. +*Note Complex numbers::. (The C++ `' template class may also +be usable via a typecast.) + + To use single or long-double precision versions of FFTW, replace the +`fftw_' prefix by `fftwf_' or `fftwl_' and link with `-lfftw3f' or +`-lfftw3l', but use the _same_ `' header file. + + Many more flags exist besides `FFTW_MEASURE' and `FFTW_ESTIMATE'. +For example, use `FFTW_PATIENT' if you're willing to wait even longer +for a possibly even faster plan (*note FFTW Reference::). You can also +save plans for future use, as described by *note Words of Wisdom-Saving +Plans::. + + +File: fftw3.info, Node: Complex Multi-Dimensional DFTs, Next: One-Dimensional DFTs of Real Data, Prev: Complex One-Dimensional DFTs, Up: Tutorial + +2.2 Complex Multi-Dimensional DFTs +================================== + +Multi-dimensional transforms work much the same way as one-dimensional +transforms: you allocate arrays of `fftw_complex' (preferably using +`fftw_malloc'), create an `fftw_plan', execute it as many times as you +want with `fftw_execute(plan)', and clean up with +`fftw_destroy_plan(plan)' (and `fftw_free'). + + FFTW provides two routines for creating plans for 2d and 3d +transforms, and one routine for creating plans of arbitrary +dimensionality. The 2d and 3d routines have the following signature: + fftw_plan fftw_plan_dft_2d(int n0, int n1, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); + fftw_plan fftw_plan_dft_3d(int n0, int n1, int n2, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); + + These routines create plans for `n0' by `n1' two-dimensional (2d) +transforms and `n0' by `n1' by `n2' 3d transforms, respectively. All +of these transforms operate on contiguous arrays in the C-standard +"row-major" order, so that the last dimension has the fastest-varying +index in the array. This layout is described further in *note +Multi-dimensional Array Format::. + + FFTW can also compute transforms of higher dimensionality. In order +to avoid confusion between the various meanings of the the word +"dimension", we use the term _rank_ to denote the number of independent +indices in an array.(1) For example, we say that a 2d transform has +rank 2, a 3d transform has rank 3, and so on. You can plan transforms +of arbitrary rank by means of the following function: + + fftw_plan fftw_plan_dft(int rank, const int *n, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); + + Here, `n' is a pointer to an array `n[rank]' denoting an `n[0]' by +`n[1]' by ... by `n[rank-1]' transform. Thus, for example, the call + fftw_plan_dft_2d(n0, n1, in, out, sign, flags); + is equivalent to the following code fragment: + int n[2]; + n[0] = n0; + n[1] = n1; + fftw_plan_dft(2, n, in, out, sign, flags); + `fftw_plan_dft' is not restricted to 2d and 3d transforms, however, +but it can plan transforms of arbitrary rank. + + You may have noticed that all the planner routines described so far +have overlapping functionality. For example, you can plan a 1d or 2d +transform by using `fftw_plan_dft' with a `rank' of `1' or `2', or even +by calling `fftw_plan_dft_3d' with `n0' and/or `n1' equal to `1' (with +no loss in efficiency). This pattern continues, and FFTW's planning +routines in general form a "partial order," sequences of interfaces +with strictly increasing generality but correspondingly greater +complexity. + + `fftw_plan_dft' is the most general complex-DFT routine that we +describe in this tutorial, but there are also the advanced and guru +interfaces, which allow one to efficiently combine multiple/strided +transforms into a single FFTW plan, transform a subset of a larger +multi-dimensional array, and/or to handle more general complex-number +formats. For more information, see *note FFTW Reference::. + + ---------- Footnotes ---------- + + (1) The term "rank" is commonly used in the APL, FORTRAN, and Common +Lisp traditions, although it is not so common in the C world. + + +File: fftw3.info, Node: One-Dimensional DFTs of Real Data, Next: Multi-Dimensional DFTs of Real Data, Prev: Complex Multi-Dimensional DFTs, Up: Tutorial + +2.3 One-Dimensional DFTs of Real Data +===================================== + +In many practical applications, the input data `in[i]' are purely real +numbers, in which case the DFT output satisfies the "Hermitian" redundancy: +`out[i]' is the conjugate of `out[n-i]'. It is possible to take +advantage of these circumstances in order to achieve roughly a factor +of two improvement in both speed and memory usage. + + In exchange for these speed and space advantages, the user sacrifices +some of the simplicity of FFTW's complex transforms. First of all, the +input and output arrays are of _different sizes and types_: the input +is `n' real numbers, while the output is `n/2+1' complex numbers (the +non-redundant outputs); this also requires slight "padding" of the +input array for in-place transforms. Second, the inverse transform +(complex to real) has the side-effect of _overwriting its input array_, +by default. Neither of these inconveniences should pose a serious +problem for users, but it is important to be aware of them. + + The routines to perform real-data transforms are almost the same as +those for complex transforms: you allocate arrays of `double' and/or +`fftw_complex' (preferably using `fftw_malloc' or +`fftw_alloc_complex'), create an `fftw_plan', execute it as many times +as you want with `fftw_execute(plan)', and clean up with +`fftw_destroy_plan(plan)' (and `fftw_free'). The only differences are +that the input (or output) is of type `double' and there are new +routines to create the plan. In one dimension: + + fftw_plan fftw_plan_dft_r2c_1d(int n, double *in, fftw_complex *out, + unsigned flags); + fftw_plan fftw_plan_dft_c2r_1d(int n, fftw_complex *in, double *out, + unsigned flags); + + for the real input to complex-Hermitian output ("r2c") and +complex-Hermitian input to real output ("c2r") transforms. Unlike the +complex DFT planner, there is no `sign' argument. Instead, r2c DFTs +are always `FFTW_FORWARD' and c2r DFTs are always `FFTW_BACKWARD'. (For +single/long-double precision `fftwf' and `fftwl', `double' should be +replaced by `float' and `long double', respectively.) + + Here, `n' is the "logical" size of the DFT, not necessarily the +physical size of the array. In particular, the real (`double') array +has `n' elements, while the complex (`fftw_complex') array has `n/2+1' +elements (where the division is rounded down). For an in-place +transform, `in' and `out' are aliased to the same array, which must be +big enough to hold both; so, the real array would actually have +`2*(n/2+1)' elements, where the elements beyond the first `n' are +unused padding. (Note that this is very different from the concept of +"zero-padding" a transform to a larger length, which changes the +logical size of the DFT by actually adding new input data.) The kth +element of the complex array is exactly the same as the kth element of +the corresponding complex DFT. All positive `n' are supported; +products of small factors are most efficient, but an O(n log n) +algorithm is used even for prime sizes. + + As noted above, the c2r transform destroys its input array even for +out-of-place transforms. This can be prevented, if necessary, by +including `FFTW_PRESERVE_INPUT' in the `flags', with unfortunately some +sacrifice in performance. This flag is also not currently supported +for multi-dimensional real DFTs (next section). + + Readers familiar with DFTs of real data will recall that the 0th (the +"DC") and `n/2'-th (the "Nyquist" frequency, when `n' is even) elements +of the complex output are purely real. Some implementations therefore +store the Nyquist element where the DC imaginary part would go, in +order to make the input and output arrays the same size. Such packing, +however, does not generalize well to multi-dimensional transforms, and +the space savings are miniscule in any case; FFTW does not support it. + + An alternative interface for one-dimensional r2c and c2r DFTs can be +found in the `r2r' interface (*note The Halfcomplex-format DFT::), with +"halfcomplex"-format output that _is_ the same size (and type) as the +input array. That interface, although it is not very useful for +multi-dimensional transforms, may sometimes yield better performance. + + +File: fftw3.info, Node: Multi-Dimensional DFTs of Real Data, Next: More DFTs of Real Data, Prev: One-Dimensional DFTs of Real Data, Up: Tutorial + +2.4 Multi-Dimensional DFTs of Real Data +======================================= + +Multi-dimensional DFTs of real data use the following planner routines: + + fftw_plan fftw_plan_dft_r2c_2d(int n0, int n1, + double *in, fftw_complex *out, + unsigned flags); + fftw_plan fftw_plan_dft_r2c_3d(int n0, int n1, int n2, + double *in, fftw_complex *out, + unsigned flags); + fftw_plan fftw_plan_dft_r2c(int rank, const int *n, + double *in, fftw_complex *out, + unsigned flags); + + as well as the corresponding `c2r' routines with the input/output +types swapped. These routines work similarly to their complex +analogues, except for the fact that here the complex output array is cut +roughly in half and the real array requires padding for in-place +transforms (as in 1d, above). + + As before, `n' is the logical size of the array, and the +consequences of this on the the format of the complex arrays deserve +careful attention. Suppose that the real data has dimensions n[0] x +n[1] x n[2] x ... x n[d-1] (in row-major order). Then, after an r2c +transform, the output is an n[0] x n[1] x n[2] x ... x (n[d-1]/2 + 1) +array of `fftw_complex' values in row-major order, corresponding to +slightly over half of the output of the corresponding complex DFT. +(The division is rounded down.) The ordering of the data is otherwise +exactly the same as in the complex-DFT case. + + For out-of-place transforms, this is the end of the story: the real +data is stored as a row-major array of size n[0] x n[1] x n[2] x ... x +n[d-1] and the complex data is stored as a row-major array of size +n[0] x n[1] x n[2] x ... x (n[d-1]/2 + 1) . + + For in-place transforms, however, extra padding of the real-data +array is necessary because the complex array is larger than the real +array, and the two arrays share the same memory locations. Thus, for +in-place transforms, the final dimension of the real-data array must be +padded with extra values to accommodate the size of the complex +data--two values if the last dimension is even and one if it is odd. That +is, the last dimension of the real data must physically contain 2 * +(n[d-1]/2+1) `double' values (exactly enough to hold the complex data). +This physical array size does not, however, change the _logical_ array +size--only n[d-1] values are actually stored in the last dimension, and +n[d-1] is the last dimension passed to the plan-creation routine. + + For example, consider the transform of a two-dimensional real array +of size `n0' by `n1'. The output of the r2c transform is a +two-dimensional complex array of size `n0' by `n1/2+1', where the `y' +dimension has been cut nearly in half because of redundancies in the +output. Because `fftw_complex' is twice the size of `double', the +output array is slightly bigger than the input array. Thus, if we want +to compute the transform in place, we must _pad_ the input array so +that it is of size `n0' by `2*(n1/2+1)'. If `n1' is even, then there +are two padding elements at the end of each row (which need not be +initialized, as they are only used for output). + + These transforms are unnormalized, so an r2c followed by a c2r +transform (or vice versa) will result in the original data scaled by +the number of real data elements--that is, the product of the (logical) +dimensions of the real data. + + (Because the last dimension is treated specially, if it is equal to +`1' the transform is _not_ equivalent to a lower-dimensional r2c/c2r +transform. In that case, the last complex dimension also has size `1' +(`=1/2+1'), and no advantage is gained over the complex transforms.) + + +File: fftw3.info, Node: More DFTs of Real Data, Prev: Multi-Dimensional DFTs of Real Data, Up: Tutorial + +2.5 More DFTs of Real Data +========================== + +* Menu: + +* The Halfcomplex-format DFT:: +* Real even/odd DFTs (cosine/sine transforms):: +* The Discrete Hartley Transform:: + + FFTW supports several other transform types via a unified "r2r" +(real-to-real) interface, so called because it takes a real (`double') +array and outputs a real array of the same size. These r2r transforms +currently fall into three categories: DFTs of real input and +complex-Hermitian output in halfcomplex format, DFTs of real input with +even/odd symmetry (a.k.a. discrete cosine/sine transforms, DCTs/DSTs), +and discrete Hartley transforms (DHTs), all described in more detail by +the following sections. + + The r2r transforms follow the by now familiar interface of creating +an `fftw_plan', executing it with `fftw_execute(plan)', and destroying +it with `fftw_destroy_plan(plan)'. Furthermore, all r2r transforms +share the same planner interface: + + fftw_plan fftw_plan_r2r_1d(int n, double *in, double *out, + fftw_r2r_kind kind, unsigned flags); + fftw_plan fftw_plan_r2r_2d(int n0, int n1, double *in, double *out, + fftw_r2r_kind kind0, fftw_r2r_kind kind1, + unsigned flags); + fftw_plan fftw_plan_r2r_3d(int n0, int n1, int n2, + double *in, double *out, + fftw_r2r_kind kind0, + fftw_r2r_kind kind1, + fftw_r2r_kind kind2, + unsigned flags); + fftw_plan fftw_plan_r2r(int rank, const int *n, double *in, double *out, + const fftw_r2r_kind *kind, unsigned flags); + + Just as for the complex DFT, these plan 1d/2d/3d/multi-dimensional +transforms for contiguous arrays in row-major order, transforming (real) +input to output of the same size, where `n' specifies the _physical_ +dimensions of the arrays. All positive `n' are supported (with the +exception of `n=1' for the `FFTW_REDFT00' kind, noted in the real-even +subsection below); products of small factors are most efficient +(factorizing `n-1' and `n+1' for `FFTW_REDFT00' and `FFTW_RODFT00' +kinds, described below), but an O(n log n) algorithm is used even for +prime sizes. + + Each dimension has a "kind" parameter, of type `fftw_r2r_kind', +specifying the kind of r2r transform to be used for that dimension. (In +the case of `fftw_plan_r2r', this is an array `kind[rank]' where +`kind[i]' is the transform kind for the dimension `n[i]'.) The kind +can be one of a set of predefined constants, defined in the following +subsections. + + In other words, FFTW computes the separable product of the specified +r2r transforms over each dimension, which can be used e.g. for partial +differential equations with mixed boundary conditions. (For some r2r +kinds, notably the halfcomplex DFT and the DHT, such a separable +product is somewhat problematic in more than one dimension, however, as +is described below.) + + In the current version of FFTW, all r2r transforms except for the +halfcomplex type are computed via pre- or post-processing of +halfcomplex transforms, and they are therefore not as fast as they +could be. Since most other general DCT/DST codes employ a similar +algorithm, however, FFTW's implementation should provide at least +competitive performance. + + +File: fftw3.info, Node: The Halfcomplex-format DFT, Next: Real even/odd DFTs (cosine/sine transforms), Prev: More DFTs of Real Data, Up: More DFTs of Real Data + +2.5.1 The Halfcomplex-format DFT +-------------------------------- + +An r2r kind of `FFTW_R2HC' ("r2hc") corresponds to an r2c DFT (*note +One-Dimensional DFTs of Real Data::) but with "halfcomplex" format +output, and may sometimes be faster and/or more convenient than the +latter. The inverse "hc2r" transform is of kind `FFTW_HC2R'. This +consists of the non-redundant half of the complex output for a 1d +real-input DFT of size `n', stored as a sequence of `n' real numbers +(`double') in the format: + + r0, r1, r2, r(n/2), i((n+1)/2-1), ..., i2, i1 + + Here, rk is the real part of the kth output, and ik is the imaginary +part. (Division by 2 is rounded down.) For a halfcomplex array +`hc[n]', the kth component thus has its real part in `hc[k]' and its +imaginary part in `hc[n-k]', with the exception of `k' `==' `0' or +`n/2' (the latter only if `n' is even)--in these two cases, the +imaginary part is zero due to symmetries of the real-input DFT, and is +not stored. Thus, the r2hc transform of `n' real values is a +halfcomplex array of length `n', and vice versa for hc2r. + + Aside from the differing format, the output of +`FFTW_R2HC'/`FFTW_HC2R' is otherwise exactly the same as for the +corresponding 1d r2c/c2r transform (i.e. `FFTW_FORWARD'/`FFTW_BACKWARD' +transforms, respectively). Recall that these transforms are +unnormalized, so r2hc followed by hc2r will result in the original data +multiplied by `n'. Furthermore, like the c2r transform, an +out-of-place hc2r transform will _destroy its input_ array. + + Although these halfcomplex transforms can be used with the +multi-dimensional r2r interface, the interpretation of such a separable +product of transforms along each dimension is problematic. For example, +consider a two-dimensional `n0' by `n1', r2hc by r2hc transform planned +by `fftw_plan_r2r_2d(n0, n1, in, out, FFTW_R2HC, FFTW_R2HC, +FFTW_MEASURE)'. Conceptually, FFTW first transforms the rows (of size +`n1') to produce halfcomplex rows, and then transforms the columns (of +size `n0'). Half of these column transforms, however, are of imaginary +parts, and should therefore be multiplied by i and combined with the +r2hc transforms of the real columns to produce the 2d DFT amplitudes; +FFTW's r2r transform does _not_ perform this combination for you. +Thus, if a multi-dimensional real-input/output DFT is required, we +recommend using the ordinary r2c/c2r interface (*note Multi-Dimensional +DFTs of Real Data::). + + +File: fftw3.info, Node: Real even/odd DFTs (cosine/sine transforms), Next: The Discrete Hartley Transform, Prev: The Halfcomplex-format DFT, Up: More DFTs of Real Data + +2.5.2 Real even/odd DFTs (cosine/sine transforms) +------------------------------------------------- + +The Fourier transform of a real-even function f(-x) = f(x) is +real-even, and i times the Fourier transform of a real-odd function +f(-x) = -f(x) is real-odd. Similar results hold for a discrete Fourier +transform, and thus for these symmetries the need for complex +inputs/outputs is entirely eliminated. Moreover, one gains a factor of +two in speed/space from the fact that the data are real, and an +additional factor of two from the even/odd symmetry: only the +non-redundant (first) half of the array need be stored. The result is +the real-even DFT ("REDFT") and the real-odd DFT ("RODFT"), also known +as the discrete cosine and sine transforms ("DCT" and "DST"), +respectively. + + (In this section, we describe the 1d transforms; multi-dimensional +transforms are just a separable product of these transforms operating +along each dimension.) + + Because of the discrete sampling, one has an additional choice: is +the data even/odd around a sampling point, or around the point halfway +between two samples? The latter corresponds to _shifting_ the samples +by _half_ an interval, and gives rise to several transform variants +denoted by REDFTab and RODFTab: a and b are 0 or 1, and indicate +whether the input (a) and/or output (b) are shifted by half a sample (1 +means it is shifted). These are also known as types I-IV of the DCT +and DST, and all four types are supported by FFTW's r2r interface.(1) + + The r2r kinds for the various REDFT and RODFT types supported by +FFTW, along with the boundary conditions at both ends of the _input_ +array (`n' real numbers `in[j=0..n-1]'), are: + + * `FFTW_REDFT00' (DCT-I): even around j=0 and even around j=n-1. + + * `FFTW_REDFT10' (DCT-II, "the" DCT): even around j=-0.5 and even + around j=n-0.5. + + * `FFTW_REDFT01' (DCT-III, "the" IDCT): even around j=0 and odd + around j=n. + + * `FFTW_REDFT11' (DCT-IV): even around j=-0.5 and odd around j=n-0.5. + + * `FFTW_RODFT00' (DST-I): odd around j=-1 and odd around j=n. + + * `FFTW_RODFT10' (DST-II): odd around j=-0.5 and odd around j=n-0.5. + + * `FFTW_RODFT01' (DST-III): odd around j=-1 and even around j=n-1. + + * `FFTW_RODFT11' (DST-IV): odd around j=-0.5 and even around j=n-0.5. + + + Note that these symmetries apply to the "logical" array being +transformed; *there are no constraints on your physical input data*. +So, for example, if you specify a size-5 REDFT00 (DCT-I) of the data +abcde, it corresponds to the DFT of the logical even array abcdedcb of +size 8. A size-4 REDFT10 (DCT-II) of the data abcd corresponds to the +size-8 logical DFT of the even array abcddcba, shifted by half a sample. + + All of these transforms are invertible. The inverse of R*DFT00 is +R*DFT00; of R*DFT10 is R*DFT01 and vice versa (these are often called +simply "the" DCT and IDCT, respectively); and of R*DFT11 is R*DFT11. +However, the transforms computed by FFTW are unnormalized, exactly like +the corresponding real and complex DFTs, so computing a transform +followed by its inverse yields the original array scaled by N, where N +is the _logical_ DFT size. For REDFT00, N=2(n-1); for RODFT00, +N=2(n+1); otherwise, N=2n. + + Note that the boundary conditions of the transform output array are +given by the input boundary conditions of the inverse transform. Thus, +the above transforms are all inequivalent in terms of input/output +boundary conditions, even neglecting the 0.5 shift difference. + + FFTW is most efficient when N is a product of small factors; note +that this _differs_ from the factorization of the physical size `n' for +REDFT00 and RODFT00! There is another oddity: `n=1' REDFT00 transforms +correspond to N=0, and so are _not defined_ (the planner will return +`NULL'). Otherwise, any positive `n' is supported. + + For the precise mathematical definitions of these transforms as used +by FFTW, see *note What FFTW Really Computes::. (For people accustomed +to the DCT/DST, FFTW's definitions have a coefficient of 2 in front of +the cos/sin functions so that they correspond precisely to an even/odd +DFT of size N. Some authors also include additional multiplicative +factors of sqrt(2) for selected inputs and outputs; this makes the +transform orthogonal, but sacrifices the direct equivalence to a +symmetric DFT.) + +Which type do you need? +....................... + +Since the required flavor of even/odd DFT depends upon your problem, +you are the best judge of this choice, but we can make a few comments +on relative efficiency to help you in your selection. In particular, +R*DFT01 and R*DFT10 tend to be slightly faster than R*DFT11 (especially +for odd sizes), while the R*DFT00 transforms are sometimes +significantly slower (especially for even sizes).(2) + + Thus, if only the boundary conditions on the transform inputs are +specified, we generally recommend R*DFT10 over R*DFT00 and R*DFT01 over +R*DFT11 (unless the half-sample shift or the self-inverse property is +significant for your problem). + + If performance is important to you and you are using only small sizes +(say n<200), e.g. for multi-dimensional transforms, then you might +consider generating hard-coded transforms of those sizes and types that +you are interested in (*note Generating your own code::). + + We are interested in hearing what types of symmetric transforms you +find most useful. + + ---------- Footnotes ---------- + + (1) There are also type V-VIII transforms, which correspond to a +logical DFT of _odd_ size N, independent of whether the physical size +`n' is odd, but we do not support these variants. + + (2) R*DFT00 is sometimes slower in FFTW because we discovered that +the standard algorithm for computing this by a pre/post-processed real +DFT--the algorithm used in FFTPACK, Numerical Recipes, and other +sources for decades now--has serious numerical problems: it already +loses several decimal places of accuracy for 16k sizes. There seem to +be only two alternatives in the literature that do not suffer +similarly: a recursive decomposition into smaller DCTs, which would +require a large set of codelets for efficiency and generality, or +sacrificing a factor of 2 in speed to use a real DFT of twice the size. +We currently employ the latter technique for general n, as well as a +limited form of the former method: a split-radix decomposition when n +is odd (N a multiple of 4). For N containing many factors of 2, the +split-radix method seems to recover most of the speed of the standard +algorithm without the accuracy tradeoff. + + +File: fftw3.info, Node: The Discrete Hartley Transform, Prev: Real even/odd DFTs (cosine/sine transforms), Up: More DFTs of Real Data + +2.5.3 The Discrete Hartley Transform +------------------------------------ + +If you are planning to use the DHT because you've heard that it is +"faster" than the DFT (FFT), *stop here*. The DHT is not faster than +the DFT. That story is an old but enduring misconception that was +debunked in 1987. + + The discrete Hartley transform (DHT) is an invertible linear +transform closely related to the DFT. In the DFT, one multiplies each +input by cos - i * sin (a complex exponential), whereas in the DHT each +input is multiplied by simply cos + sin. Thus, the DHT transforms `n' +real numbers to `n' real numbers, and has the convenient property of +being its own inverse. In FFTW, a DHT (of any positive `n') can be +specified by an r2r kind of `FFTW_DHT'. + + Like the DFT, in FFTW the DHT is unnormalized, so computing a DHT of +size `n' followed by another DHT of the same size will result in the +original array multiplied by `n'. + + The DHT was originally proposed as a more efficient alternative to +the DFT for real data, but it was subsequently shown that a specialized +DFT (such as FFTW's r2hc or r2c transforms) could be just as fast. In +FFTW, the DHT is actually computed by post-processing an r2hc +transform, so there is ordinarily no reason to prefer it from a +performance perspective.(1) However, we have heard rumors that the DHT +might be the most appropriate transform in its own right for certain +applications, and we would be very interested to hear from anyone who +finds it useful. + + If `FFTW_DHT' is specified for multiple dimensions of a +multi-dimensional transform, FFTW computes the separable product of 1d +DHTs along each dimension. Unfortunately, this is not quite the same +thing as a true multi-dimensional DHT; you can compute the latter, if +necessary, with at most `rank-1' post-processing passes [see e.g. H. +Hao and R. N. Bracewell, Proc. IEEE 75, 264-266 (1987)]. + + For the precise mathematical definition of the DHT as used by FFTW, +see *note What FFTW Really Computes::. + + ---------- Footnotes ---------- + + (1) We provide the DHT mainly as a byproduct of some internal +algorithms. FFTW computes a real input/output DFT of _prime_ size by +re-expressing it as a DHT plus post/pre-processing and then using +Rader's prime-DFT algorithm adapted to the DHT. + + +File: fftw3.info, Node: Other Important Topics, Next: FFTW Reference, Prev: Tutorial, Up: Top + +3 Other Important Topics +************************ + +* Menu: + +* SIMD alignment and fftw_malloc:: +* Multi-dimensional Array Format:: +* Words of Wisdom-Saving Plans:: +* Caveats in Using Wisdom:: + + +File: fftw3.info, Node: SIMD alignment and fftw_malloc, Next: Multi-dimensional Array Format, Prev: Other Important Topics, Up: Other Important Topics + +3.1 SIMD alignment and fftw_malloc +================================== + +SIMD, which stands for "Single Instruction Multiple Data," is a set of +special operations supported by some processors to perform a single +operation on several numbers (usually 2 or 4) simultaneously. SIMD +floating-point instructions are available on several popular CPUs: +SSE/SSE2/AVX on recent x86/x86-64 processors, AltiVec (single precision) +on some PowerPCs (Apple G4 and higher), NEON on some ARM models, and +MIPS Paired Single (currently only in FFTW 3.2.x). FFTW can be +compiled to support the SIMD instructions on any of these systems. + + A program linking to an FFTW library compiled with SIMD support can +obtain a nonnegligible speedup for most complex and r2c/c2r transforms. +In order to obtain this speedup, however, the arrays of complex (or +real) data passed to FFTW must be specially aligned in memory +(typically 16-byte aligned), and often this alignment is more stringent +than that provided by the usual `malloc' (etc.) allocation routines. + + In order to guarantee proper alignment for SIMD, therefore, in case +your program is ever linked against a SIMD-using FFTW, we recommend +allocating your transform data with `fftw_malloc' and de-allocating it +with `fftw_free'. These have exactly the same interface and behavior as +`malloc'/`free', except that for a SIMD FFTW they ensure that the +returned pointer has the necessary alignment (by calling `memalign' or +its equivalent on your OS). + + You are not _required_ to use `fftw_malloc'. You can allocate your +data in any way that you like, from `malloc' to `new' (in C++) to a +fixed-size array declaration. If the array happens not to be properly +aligned, FFTW will not use the SIMD extensions. + + Since `fftw_malloc' only ever needs to be used for real and complex +arrays, we provide two convenient wrapper routines `fftw_alloc_real(N)' +and `fftw_alloc_complex(N)' that are equivalent to +`(double*)fftw_malloc(sizeof(double) * N)' and +`(fftw_complex*)fftw_malloc(sizeof(fftw_complex) * N)', respectively +(or their equivalents in other precisions). + + +File: fftw3.info, Node: Multi-dimensional Array Format, Next: Words of Wisdom-Saving Plans, Prev: SIMD alignment and fftw_malloc, Up: Other Important Topics + +3.2 Multi-dimensional Array Format +================================== + +This section describes the format in which multi-dimensional arrays are +stored in FFTW. We felt that a detailed discussion of this topic was +necessary. Since several different formats are common, this topic is +often a source of confusion. + +* Menu: + +* Row-major Format:: +* Column-major Format:: +* Fixed-size Arrays in C:: +* Dynamic Arrays in C:: +* Dynamic Arrays in C-The Wrong Way:: + + +File: fftw3.info, Node: Row-major Format, Next: Column-major Format, Prev: Multi-dimensional Array Format, Up: Multi-dimensional Array Format + +3.2.1 Row-major Format +---------------------- + +The multi-dimensional arrays passed to `fftw_plan_dft' etcetera are +expected to be stored as a single contiguous block in "row-major" order +(sometimes called "C order"). Basically, this means that as you step +through adjacent memory locations, the first dimension's index varies +most slowly and the last dimension's index varies most quickly. + + To be more explicit, let us consider an array of rank d whose +dimensions are n[0] x n[1] x n[2] x ... x n[d-1] . Now, we specify a +location in the array by a sequence of d (zero-based) indices, one for +each dimension: (i[0], i[1], ..., i[d-1]). If the array is stored in +row-major order, then this element is located at the position i[d-1] + +n[d-1] * (i[d-2] + n[d-2] * (... + n[1] * i[0])). + + Note that, for the ordinary complex DFT, each element of the array +must be of type `fftw_complex'; i.e. a (real, imaginary) pair of +(double-precision) numbers. + + In the advanced FFTW interface, the physical dimensions n from which +the indices are computed can be different from (larger than) the +logical dimensions of the transform to be computed, in order to +transform a subset of a larger array. Note also that, in the advanced +interface, the expression above is multiplied by a "stride" to get the +actual array index--this is useful in situations where each element of +the multi-dimensional array is actually a data structure (or another +array), and you just want to transform a single field. In the basic +interface, however, the stride is 1. + + +File: fftw3.info, Node: Column-major Format, Next: Fixed-size Arrays in C, Prev: Row-major Format, Up: Multi-dimensional Array Format + +3.2.2 Column-major Format +------------------------- + +Readers from the Fortran world are used to arrays stored in +"column-major" order (sometimes called "Fortran order"). This is +essentially the exact opposite of row-major order in that, here, the +_first_ dimension's index varies most quickly. + + If you have an array stored in column-major order and wish to +transform it using FFTW, it is quite easy to do. When creating the +plan, simply pass the dimensions of the array to the planner in +_reverse order_. For example, if your array is a rank three `N x M x +L' matrix in column-major order, you should pass the dimensions of the +array as if it were an `L x M x N' matrix (which it is, from the +perspective of FFTW). This is done for you _automatically_ by the FFTW +legacy-Fortran interface (*note Calling FFTW from Legacy Fortran::), +but you must do it manually with the modern Fortran interface (*note +Reversing array dimensions::). + + +File: fftw3.info, Node: Fixed-size Arrays in C, Next: Dynamic Arrays in C, Prev: Column-major Format, Up: Multi-dimensional Array Format + +3.2.3 Fixed-size Arrays in C +---------------------------- + +A multi-dimensional array whose size is declared at compile time in C +is _already_ in row-major order. You don't have to do anything special +to transform it. For example: + + { + fftw_complex data[N0][N1][N2]; + fftw_plan plan; + ... + plan = fftw_plan_dft_3d(N0, N1, N2, &data[0][0][0], &data[0][0][0], + FFTW_FORWARD, FFTW_ESTIMATE); + ... + } + + This will plan a 3d in-place transform of size `N0 x N1 x N2'. +Notice how we took the address of the zero-th element to pass to the +planner (we could also have used a typecast). + + However, we tend to _discourage_ users from declaring their arrays +in this way, for two reasons. First, this allocates the array on the +stack ("automatic" storage), which has a very limited size on most +operating systems (declaring an array with more than a few thousand +elements will often cause a crash). (You can get around this +limitation on many systems by declaring the array as `static' and/or +global, but that has its own drawbacks.) Second, it may not optimally +align the array for use with a SIMD FFTW (*note SIMD alignment and +fftw_malloc::). Instead, we recommend using `fftw_malloc', as +described below. + + +File: fftw3.info, Node: Dynamic Arrays in C, Next: Dynamic Arrays in C-The Wrong Way, Prev: Fixed-size Arrays in C, Up: Multi-dimensional Array Format + +3.2.4 Dynamic Arrays in C +------------------------- + +We recommend allocating most arrays dynamically, with `fftw_malloc'. +This isn't too hard to do, although it is not as straightforward for +multi-dimensional arrays as it is for one-dimensional arrays. + + Creating the array is simple: using a dynamic-allocation routine like +`fftw_malloc', allocate an array big enough to store N `fftw_complex' +values (for a complex DFT), where N is the product of the sizes of the +array dimensions (i.e. the total number of complex values in the +array). For example, here is code to allocate a 5 x 12 x 27 rank-3 +array: + + fftw_complex *an_array; + an_array = (fftw_complex*) fftw_malloc(5*12*27 * sizeof(fftw_complex)); + + Accessing the array elements, however, is more tricky--you can't +simply use multiple applications of the `[]' operator like you could +for fixed-size arrays. Instead, you have to explicitly compute the +offset into the array using the formula given earlier for row-major +arrays. For example, to reference the (i,j,k)-th element of the array +allocated above, you would use the expression `an_array[k + 27 * (j + +12 * i)]'. + + This pain can be alleviated somewhat by defining appropriate macros, +or, in C++, creating a class and overloading the `()' operator. The +recent C99 standard provides a way to reinterpret the dynamic array as +a "variable-length" multi-dimensional array amenable to `[]', but this +feature is not yet widely supported by compilers. + + +File: fftw3.info, Node: Dynamic Arrays in C-The Wrong Way, Prev: Dynamic Arrays in C, Up: Multi-dimensional Array Format + +3.2.5 Dynamic Arrays in C--The Wrong Way +---------------------------------------- + +A different method for allocating multi-dimensional arrays in C is +often suggested that is incompatible with FFTW: _using it will cause +FFTW to die a painful death_. We discuss the technique here, however, +because it is so commonly known and used. This method is to create +arrays of pointers of arrays of pointers of ...etcetera. For example, +the analogue in this method to the example above is: + + int i,j; + fftw_complex ***a_bad_array; /* another way to make a 5x12x27 array */ + + a_bad_array = (fftw_complex ***) malloc(5 * sizeof(fftw_complex **)); + for (i = 0; i < 5; ++i) { + a_bad_array[i] = + (fftw_complex **) malloc(12 * sizeof(fftw_complex *)); + for (j = 0; j < 12; ++j) + a_bad_array[i][j] = + (fftw_complex *) malloc(27 * sizeof(fftw_complex)); + } + + As you can see, this sort of array is inconvenient to allocate (and +deallocate). On the other hand, it has the advantage that the +(i,j,k)-th element can be referenced simply by `a_bad_array[i][j][k]'. + + If you like this technique and want to maximize convenience in +accessing the array, but still want to pass the array to FFTW, you can +use a hybrid method. Allocate the array as one contiguous block, but +also declare an array of arrays of pointers that point to appropriate +places in the block. That sort of trick is beyond the scope of this +documentation; for more information on multi-dimensional arrays in C, +see the `comp.lang.c' FAQ (http://c-faq.com/aryptr/dynmuldimary.html). + + +File: fftw3.info, Node: Words of Wisdom-Saving Plans, Next: Caveats in Using Wisdom, Prev: Multi-dimensional Array Format, Up: Other Important Topics + +3.3 Words of Wisdom--Saving Plans +================================= + +FFTW implements a method for saving plans to disk and restoring them. +In fact, what FFTW does is more general than just saving and loading +plans. The mechanism is called "wisdom". Here, we describe this +feature at a high level. *Note FFTW Reference::, for a less casual but +more complete discussion of how to use wisdom in FFTW. + + Plans created with the `FFTW_MEASURE', `FFTW_PATIENT', or +`FFTW_EXHAUSTIVE' options produce near-optimal FFT performance, but may +require a long time to compute because FFTW must measure the runtime of +many possible plans and select the best one. This setup is designed +for the situations where so many transforms of the same size must be +computed that the start-up time is irrelevant. For short +initialization times, but slower transforms, we have provided +`FFTW_ESTIMATE'. The `wisdom' mechanism is a way to get the best of +both worlds: you compute a good plan once, save it to disk, and later +reload it as many times as necessary. The wisdom mechanism can +actually save and reload many plans at once, not just one. + + Whenever you create a plan, the FFTW planner accumulates wisdom, +which is information sufficient to reconstruct the plan. After +planning, you can save this information to disk by means of the +function: + int fftw_export_wisdom_to_filename(const char *filename); + (This function returns non-zero on success.) + + The next time you run the program, you can restore the wisdom with +`fftw_import_wisdom_from_filename' (which also returns non-zero on +success), and then recreate the plan using the same flags as before. + int fftw_import_wisdom_from_filename(const char *filename); + + Wisdom is automatically used for any size to which it is applicable, +as long as the planner flags are not more "patient" than those with +which the wisdom was created. For example, wisdom created with +`FFTW_MEASURE' can be used if you later plan with `FFTW_ESTIMATE' or +`FFTW_MEASURE', but not with `FFTW_PATIENT'. + + The `wisdom' is cumulative, and is stored in a global, private data +structure managed internally by FFTW. The storage space required is +minimal, proportional to the logarithm of the sizes the wisdom was +generated from. If memory usage is a concern, however, the wisdom can +be forgotten and its associated memory freed by calling: + void fftw_forget_wisdom(void); + + Wisdom can be exported to a file, a string, or any other medium. +For details, see *note Wisdom::. + + +File: fftw3.info, Node: Caveats in Using Wisdom, Prev: Words of Wisdom-Saving Plans, Up: Other Important Topics + +3.4 Caveats in Using Wisdom +=========================== + + For in much wisdom is much grief, and he that increaseth knowledge + increaseth sorrow. [Ecclesiastes 1:18] + + There are pitfalls to using wisdom, in that it can negate FFTW's +ability to adapt to changing hardware and other conditions. For +example, it would be perfectly possible to export wisdom from a program +running on one processor and import it into a program running on +another processor. Doing so, however, would mean that the second +program would use plans optimized for the first processor, instead of +the one it is running on. + + It should be safe to reuse wisdom as long as the hardware and program +binaries remain unchanged. (Actually, the optimal plan may change even +between runs of the same binary on identical hardware, due to +differences in the virtual memory environment, etcetera. Users +seriously interested in performance should worry about this problem, +too.) It is likely that, if the same wisdom is used for two different +program binaries, even running on the same machine, the plans may be +sub-optimal because of differing code alignments. It is therefore wise +to recreate wisdom every time an application is recompiled. The more +the underlying hardware and software changes between the creation of +wisdom and its use, the greater grows the risk of sub-optimal plans. + + Nevertheless, if the choice is between using `FFTW_ESTIMATE' or +using possibly-suboptimal wisdom (created on the same machine, but for a +different binary), the wisdom is likely to be better. For this reason, +we provide a function to import wisdom from a standard system-wide +location (`/etc/fftw/wisdom' on Unix): + + int fftw_import_system_wisdom(void); + + FFTW also provides a standalone program, `fftw-wisdom' (described by +its own `man' page on Unix) with which users can create wisdom, e.g. +for a canonical set of sizes to store in the system wisdom file. *Note +Wisdom Utilities::. + + +File: fftw3.info, Node: FFTW Reference, Next: Multi-threaded FFTW, Prev: Other Important Topics, Up: Top + +4 FFTW Reference +**************** + +This chapter provides a complete reference for all sequential (i.e., +one-processor) FFTW functions. Parallel transforms are described in +later chapters. + +* Menu: + +* Data Types and Files:: +* Using Plans:: +* Basic Interface:: +* Advanced Interface:: +* Guru Interface:: +* New-array Execute Functions:: +* Wisdom:: +* What FFTW Really Computes:: + + +File: fftw3.info, Node: Data Types and Files, Next: Using Plans, Prev: FFTW Reference, Up: FFTW Reference + +4.1 Data Types and Files +======================== + +All programs using FFTW should include its header file: + + #include + + You must also link to the FFTW library. On Unix, this means adding +`-lfftw3 -lm' at the _end_ of the link command. + +* Menu: + +* Complex numbers:: +* Precision:: +* Memory Allocation:: + + +File: fftw3.info, Node: Complex numbers, Next: Precision, Prev: Data Types and Files, Up: Data Types and Files + +4.1.1 Complex numbers +--------------------- + +The default FFTW interface uses `double' precision for all +floating-point numbers, and defines a `fftw_complex' type to hold +complex numbers as: + + typedef double fftw_complex[2]; + + Here, the `[0]' element holds the real part and the `[1]' element +holds the imaginary part. + + Alternatively, if you have a C compiler (such as `gcc') that +supports the C99 revision of the ANSI C standard, you can use C's new +native complex type (which is binary-compatible with the typedef above). +In particular, if you `#include ' _before_ `', then +`fftw_complex' is defined to be the native complex type and you can +manipulate it with ordinary arithmetic (e.g. `x = y * (3+4*I)', where +`x' and `y' are `fftw_complex' and `I' is the standard symbol for the +imaginary unit); + + C++ has its own `complex' template class, defined in the standard +`' header file. Reportedly, the C++ standards committee has +recently agreed to mandate that the storage format used for this type +be binary-compatible with the C99 type, i.e. an array `T[2]' with +consecutive real `[0]' and imaginary `[1]' parts. (See report +`http://www.open-std.org/jtc1/sc22/WG21/docs/papers/2002/n1388.pdf +WG21/N1388'.) Although not part of the official standard as of this +writing, the proposal stated that: "This solution has been tested with +all current major implementations of the standard library and shown to +be working." To the extent that this is true, if you have a variable +`complex *x', you can pass it directly to FFTW via +`reinterpret_cast(x)'. + + +File: fftw3.info, Node: Precision, Next: Memory Allocation, Prev: Complex numbers, Up: Data Types and Files + +4.1.2 Precision +--------------- + +You can install single and long-double precision versions of FFTW, +which replace `double' with `float' and `long double', respectively +(*note Installation and Customization::). To use these interfaces, you: + + * Link to the single/long-double libraries; on Unix, `-lfftw3f' or + `-lfftw3l' instead of (or in addition to) `-lfftw3'. (You can + link to the different-precision libraries simultaneously.) + + * Include the _same_ `' header file. + + * Replace all lowercase instances of `fftw_' with `fftwf_' or + `fftwl_' for single or long-double precision, respectively. + (`fftw_complex' becomes `fftwf_complex', `fftw_execute' becomes + `fftwf_execute', etcetera.) + + * Uppercase names, i.e. names beginning with `FFTW_', remain the + same. + + * Replace `double' with `float' or `long double' for subroutine + parameters. + + + Depending upon your compiler and/or hardware, `long double' may not +be any more precise than `double' (or may not be supported at all, +although it is standard in C99). + + We also support using the nonstandard `__float128' +quadruple-precision type provided by recent versions of `gcc' on 32- +and 64-bit x86 hardware (*note Installation and Customization::). To +use this type, link with `-lfftw3q -lquadmath -lm' (the `libquadmath' +library provided by `gcc' is needed for quadruple-precision +trigonometric functions) and use `fftwq_' identifiers. + + +File: fftw3.info, Node: Memory Allocation, Prev: Precision, Up: Data Types and Files + +4.1.3 Memory Allocation +----------------------- + + void *fftw_malloc(size_t n); + void fftw_free(void *p); + + These are functions that behave identically to `malloc' and `free', +except that they guarantee that the returned pointer obeys any special +alignment restrictions imposed by any algorithm in FFTW (e.g. for SIMD +acceleration). *Note SIMD alignment and fftw_malloc::. + + Data allocated by `fftw_malloc' _must_ be deallocated by `fftw_free' +and not by the ordinary `free'. + + These routines simply call through to your operating system's +`malloc' or, if necessary, its aligned equivalent (e.g. `memalign'), so +you normally need not worry about any significant time or space +overhead. You are _not required_ to use them to allocate your data, +but we strongly recommend it. + + Note: in C++, just as with ordinary `malloc', you must typecast the +output of `fftw_malloc' to whatever pointer type you are allocating. + + We also provide the following two convenience functions to allocate +real and complex arrays with `n' elements, which are equivalent to +`(double *) fftw_malloc(sizeof(double) * n)' and `(fftw_complex *) +fftw_malloc(sizeof(fftw_complex) * n)', respectively: + + double *fftw_alloc_real(size_t n); + fftw_complex *fftw_alloc_complex(size_t n); + + The equivalent functions in other precisions allocate arrays of `n' +elements in that precision. e.g. `fftwf_alloc_real(n)' is equivalent +to `(float *) fftwf_malloc(sizeof(float) * n)'. + + +File: fftw3.info, Node: Using Plans, Next: Basic Interface, Prev: Data Types and Files, Up: FFTW Reference + +4.2 Using Plans +=============== + +Plans for all transform types in FFTW are stored as type `fftw_plan' +(an opaque pointer type), and are created by one of the various +planning routines described in the following sections. An `fftw_plan' +contains all information necessary to compute the transform, including +the pointers to the input and output arrays. + + void fftw_execute(const fftw_plan plan); + + This executes the `plan', to compute the corresponding transform on +the arrays for which it was planned (which must still exist). The plan +is not modified, and `fftw_execute' can be called as many times as +desired. + + To apply a given plan to a different array, you can use the +new-array execute interface. *Note New-array Execute Functions::. + + `fftw_execute' (and equivalents) is the only function in FFTW +guaranteed to be thread-safe; see *note Thread safety::. + + This function: + void fftw_destroy_plan(fftw_plan plan); + deallocates the `plan' and all its associated data. + + FFTW's planner saves some other persistent data, such as the +accumulated wisdom and a list of algorithms available in the current +configuration. If you want to deallocate all of that and reset FFTW to +the pristine state it was in when you started your program, you can +call: + + void fftw_cleanup(void); + + After calling `fftw_cleanup', all existing plans become undefined, +and you should not attempt to execute them nor to destroy them. You can +however create and execute/destroy new plans, in which case FFTW starts +accumulating wisdom information again. + + `fftw_cleanup' does not deallocate your plans, however. To prevent +memory leaks, you must still call `fftw_destroy_plan' before executing +`fftw_cleanup'. + + Occasionally, it may useful to know FFTW's internal "cost" metric +that it uses to compare plans to one another; this cost is proportional +to an execution time of the plan, in undocumented units, if the plan +was created with the `FFTW_MEASURE' or other timing-based options, or +alternatively is a heuristic cost function for `FFTW_ESTIMATE' plans. +(The cost values of measured and estimated plans are not comparable, +being in different units. Also, costs from different FFTW versions or +the same version compiled differently may not be in the same units. +Plans created from wisdom have a cost of 0 since no timing measurement +is performed for them. Finally, certain problems for which only one +top-level algorithm was possible may have required no measurements of +the cost of the whole plan, in which case `fftw_cost' will also return +0.) The cost metric for a given plan is returned by: + + double fftw_cost(const fftw_plan plan); + + The following two routines are provided purely for academic purposes +(that is, for entertainment). + + void fftw_flops(const fftw_plan plan, + double *add, double *mul, double *fma); + + Given a `plan', set `add', `mul', and `fma' to an exact count of the +number of floating-point additions, multiplications, and fused +multiply-add operations involved in the plan's execution. The total +number of floating-point operations (flops) is `add + mul + 2*fma', or +`add + mul + fma' if the hardware supports fused multiply-add +instructions (although the number of FMA operations is only approximate +because of compiler voodoo). (The number of operations should be an +integer, but we use `double' to avoid overflowing `int' for large +transforms; the arguments are of type `double' even for single and +long-double precision versions of FFTW.) + + void fftw_fprint_plan(const fftw_plan plan, FILE *output_file); + void fftw_print_plan(const fftw_plan plan); + char *fftw_sprint_plan(const fftw_plan plan); + + This outputs a "nerd-readable" representation of the `plan' to the +given file, to `stdout', or two a newly allocated NUL-terminated string +(which the caller is responsible for deallocating with `free'), +respectively. + + +File: fftw3.info, Node: Basic Interface, Next: Advanced Interface, Prev: Using Plans, Up: FFTW Reference + +4.3 Basic Interface +=================== + +Recall that the FFTW API is divided into three parts(1): the "basic +interface" computes a single transform of contiguous data, the "advanced +interface" computes transforms of multiple or strided arrays, and the +"guru interface" supports the most general data layouts, +multiplicities, and strides. This section describes the the basic +interface, which we expect to satisfy the needs of most users. + +* Menu: + +* Complex DFTs:: +* Planner Flags:: +* Real-data DFTs:: +* Real-data DFT Array Format:: +* Real-to-Real Transforms:: +* Real-to-Real Transform Kinds:: + + ---------- Footnotes ---------- + + (1) Gallia est omnis divisa in partes tres (Julius Caesar). + + +File: fftw3.info, Node: Complex DFTs, Next: Planner Flags, Prev: Basic Interface, Up: Basic Interface + +4.3.1 Complex DFTs +------------------ + + fftw_plan fftw_plan_dft_1d(int n0, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); + fftw_plan fftw_plan_dft_2d(int n0, int n1, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); + fftw_plan fftw_plan_dft_3d(int n0, int n1, int n2, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); + fftw_plan fftw_plan_dft(int rank, const int *n, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); + + Plan a complex input/output discrete Fourier transform (DFT) in zero +or more dimensions, returning an `fftw_plan' (*note Using Plans::). + + Once you have created a plan for a certain transform type and +parameters, then creating another plan of the same type and parameters, +but for different arrays, is fast and shares constant data with the +first plan (if it still exists). + + The planner returns `NULL' if the plan cannot be created. In the +standard FFTW distribution, the basic interface is guaranteed to return +a non-`NULL' plan. A plan may be `NULL', however, if you are using a +customized FFTW configuration supporting a restricted set of transforms. + +Arguments +......... + + * `rank' is the rank of the transform (it should be the size of the + array `*n'), and can be any non-negative integer. (*Note Complex + Multi-Dimensional DFTs::, for the definition of "rank".) The + `_1d', `_2d', and `_3d' planners correspond to a `rank' of `1', + `2', and `3', respectively. The rank may be zero, which is + equivalent to a rank-1 transform of size 1, i.e. a copy of one + number from input to output. + + * `n0', `n1', `n2', or `n[0..rank-1]' (as appropriate for each + routine) specify the size of the transform dimensions. They can + be any positive integer. + + - Multi-dimensional arrays are stored in row-major order with + dimensions: `n0' x `n1'; or `n0' x `n1' x `n2'; or `n[0]' x + `n[1]' x ... x `n[rank-1]'. *Note Multi-dimensional Array + Format::. + + - FFTW is best at handling sizes of the form 2^a 3^b 5^c 7^d + 11^e 13^f, where e+f is either 0 or 1, and the other exponents + are arbitrary. Other sizes are computed by means of a slow, + general-purpose algorithm (which nevertheless retains O(n log + n) performance even for prime sizes). It is possible to + customize FFTW for different array sizes; see *note + Installation and Customization::. Transforms whose sizes are + powers of 2 are especially fast. + + * `in' and `out' point to the input and output arrays of the + transform, which may be the same (yielding an in-place transform). These + arrays are overwritten during planning, unless `FFTW_ESTIMATE' is + used in the flags. (The arrays need not be initialized, but they + must be allocated.) + + If `in == out', the transform is "in-place" and the input array is + overwritten. If `in != out', the two arrays must not overlap (but + FFTW does not check for this condition). + + * `sign' is the sign of the exponent in the formula that defines the + Fourier transform. It can be -1 (= `FFTW_FORWARD') or +1 (= + `FFTW_BACKWARD'). + + * `flags' is a bitwise OR (`|') of zero or more planner flags, as + defined in *note Planner Flags::. + + + FFTW computes an unnormalized transform: computing a forward +followed by a backward transform (or vice versa) will result in the +original data multiplied by the size of the transform (the product of +the dimensions). For more information, see *note What FFTW Really +Computes::. + + +File: fftw3.info, Node: Planner Flags, Next: Real-data DFTs, Prev: Complex DFTs, Up: Basic Interface + +4.3.2 Planner Flags +------------------- + +All of the planner routines in FFTW accept an integer `flags' argument, +which is a bitwise OR (`|') of zero or more of the flag constants +defined below. These flags control the rigor (and time) of the +planning process, and can also impose (or lift) restrictions on the +type of transform algorithm that is employed. + + _Important:_ the planner overwrites the input array during planning +unless a saved plan (*note Wisdom::) is available for that problem, so +you should initialize your input data after creating the plan. The +only exceptions to this are the `FFTW_ESTIMATE' and `FFTW_WISDOM_ONLY' +flags, as mentioned below. + + In all cases, if wisdom is available for the given problem that +was created with equal-or-greater planning rigor, then the more +rigorous wisdom is used. For example, in `FFTW_ESTIMATE' mode any +available wisdom is used, whereas in `FFTW_PATIENT' mode only wisdom +created in patient or exhaustive mode can be used. *Note Words of +Wisdom-Saving Plans::. + +Planning-rigor flags +.................... + + * `FFTW_ESTIMATE' specifies that, instead of actual measurements of + different algorithms, a simple heuristic is used to pick a + (probably sub-optimal) plan quickly. With this flag, the + input/output arrays are not overwritten during planning. + + * `FFTW_MEASURE' tells FFTW to find an optimized plan by actually + _computing_ several FFTs and measuring their execution time. + Depending on your machine, this can take some time (often a few + seconds). `FFTW_MEASURE' is the default planning option. + + * `FFTW_PATIENT' is like `FFTW_MEASURE', but considers a wider range + of algorithms and often produces a "more optimal" plan (especially + for large transforms), but at the expense of several times longer + planning time (especially for large transforms). + + * `FFTW_EXHAUSTIVE' is like `FFTW_PATIENT', but considers an even + wider range of algorithms, including many that we think are + unlikely to be fast, to produce the most optimal plan but with a + substantially increased planning time. + + * `FFTW_WISDOM_ONLY' is a special planning mode in which the plan is + only created if wisdom is available for the given problem, and + otherwise a `NULL' plan is returned. This can be combined with + other flags, e.g. `FFTW_WISDOM_ONLY | FFTW_PATIENT' creates a plan + only if wisdom is available that was created in `FFTW_PATIENT' or + `FFTW_EXHAUSTIVE' mode. The `FFTW_WISDOM_ONLY' flag is intended + for users who need to detect whether wisdom is available; for + example, if wisdom is not available one may wish to allocate new + arrays for planning so that user data is not overwritten. + + +Algorithm-restriction flags +........................... + + * `FFTW_DESTROY_INPUT' specifies that an out-of-place transform is + allowed to _overwrite its input_ array with arbitrary data; this + can sometimes allow more efficient algorithms to be employed. + + * `FFTW_PRESERVE_INPUT' specifies that an out-of-place transform must + _not change its input_ array. This is ordinarily the _default_, + except for c2r and hc2r (i.e. complex-to-real) transforms for + which `FFTW_DESTROY_INPUT' is the default. In the latter cases, + passing `FFTW_PRESERVE_INPUT' will attempt to use algorithms that + do not destroy the input, at the expense of worse performance; for + multi-dimensional c2r transforms, however, no input-preserving + algorithms are implemented and the planner will return `NULL' if + one is requested. + + * `FFTW_UNALIGNED' specifies that the algorithm may not impose any + unusual alignment requirements on the input/output arrays (i.e. no + SIMD may be used). This flag is normally _not necessary_, since + the planner automatically detects misaligned arrays. The only use + for this flag is if you want to use the new-array execute + interface to execute a given plan on a different array that may + not be aligned like the original. (Using `fftw_malloc' makes this + flag unnecessary even then. You can also use `fftw_alignment_of' + to detect whether two arrays are equivalently aligned.) + + +Limiting planning time +...................... + + extern void fftw_set_timelimit(double seconds); + + This function instructs FFTW to spend at most `seconds' seconds +(approximately) in the planner. If `seconds == FFTW_NO_TIMELIMIT' (the +default value, which is negative), then planning time is unbounded. +Otherwise, FFTW plans with a progressively wider range of algorithms +until the the given time limit is reached or the given range of +algorithms is explored, returning the best available plan. + + For example, specifying `FFTW_PATIENT' first plans in +`FFTW_ESTIMATE' mode, then in `FFTW_MEASURE' mode, then finally (time +permitting) in `FFTW_PATIENT'. If `FFTW_EXHAUSTIVE' is specified +instead, the planner will further progress to `FFTW_EXHAUSTIVE' mode. + + Note that the `seconds' argument specifies only a rough limit; in +practice, the planner may use somewhat more time if the time limit is +reached when the planner is in the middle of an operation that cannot +be interrupted. At the very least, the planner will complete planning +in `FFTW_ESTIMATE' mode (which is thus equivalent to a time limit of 0). + + +File: fftw3.info, Node: Real-data DFTs, Next: Real-data DFT Array Format, Prev: Planner Flags, Up: Basic Interface + +4.3.3 Real-data DFTs +-------------------- + + fftw_plan fftw_plan_dft_r2c_1d(int n0, + double *in, fftw_complex *out, + unsigned flags); + fftw_plan fftw_plan_dft_r2c_2d(int n0, int n1, + double *in, fftw_complex *out, + unsigned flags); + fftw_plan fftw_plan_dft_r2c_3d(int n0, int n1, int n2, + double *in, fftw_complex *out, + unsigned flags); + fftw_plan fftw_plan_dft_r2c(int rank, const int *n, + double *in, fftw_complex *out, + unsigned flags); + + Plan a real-input/complex-output discrete Fourier transform (DFT) in +zero or more dimensions, returning an `fftw_plan' (*note Using Plans::). + + Once you have created a plan for a certain transform type and +parameters, then creating another plan of the same type and parameters, +but for different arrays, is fast and shares constant data with the +first plan (if it still exists). + + The planner returns `NULL' if the plan cannot be created. A +non-`NULL' plan is always returned by the basic interface unless you +are using a customized FFTW configuration supporting a restricted set +of transforms, or if you use the `FFTW_PRESERVE_INPUT' flag with a +multi-dimensional out-of-place c2r transform (see below). + +Arguments +......... + + * `rank' is the rank of the transform (it should be the size of the + array `*n'), and can be any non-negative integer. (*Note Complex + Multi-Dimensional DFTs::, for the definition of "rank".) The + `_1d', `_2d', and `_3d' planners correspond to a `rank' of `1', + `2', and `3', respectively. The rank may be zero, which is + equivalent to a rank-1 transform of size 1, i.e. a copy of one + real number (with zero imaginary part) from input to output. + + * `n0', `n1', `n2', or `n[0..rank-1]', (as appropriate for each + routine) specify the size of the transform dimensions. They can + be any positive integer. This is different in general from the + _physical_ array dimensions, which are described in *note + Real-data DFT Array Format::. + + - FFTW is best at handling sizes of the form 2^a 3^b 5^c 7^d + 11^e 13^f, where e+f is either 0 or 1, and the other exponents + are arbitrary. Other sizes are computed by means of a slow, + general-purpose algorithm (which nevertheless retains O(n log + n) performance even for prime sizes). (It is possible to + customize FFTW for different array sizes; see *note + Installation and Customization::.) Transforms whose sizes + are powers of 2 are especially fast, and it is generally + beneficial for the _last_ dimension of an r2c/c2r transform + to be _even_. + + * `in' and `out' point to the input and output arrays of the + transform, which may be the same (yielding an in-place transform). These + arrays are overwritten during planning, unless `FFTW_ESTIMATE' is + used in the flags. (The arrays need not be initialized, but they + must be allocated.) For an in-place transform, it is important to + remember that the real array will require padding, described in + *note Real-data DFT Array Format::. + + * `flags' is a bitwise OR (`|') of zero or more planner flags, as + defined in *note Planner Flags::. + + + The inverse transforms, taking complex input (storing the +non-redundant half of a logically Hermitian array) to real output, are +given by: + + fftw_plan fftw_plan_dft_c2r_1d(int n0, + fftw_complex *in, double *out, + unsigned flags); + fftw_plan fftw_plan_dft_c2r_2d(int n0, int n1, + fftw_complex *in, double *out, + unsigned flags); + fftw_plan fftw_plan_dft_c2r_3d(int n0, int n1, int n2, + fftw_complex *in, double *out, + unsigned flags); + fftw_plan fftw_plan_dft_c2r(int rank, const int *n, + fftw_complex *in, double *out, + unsigned flags); + + The arguments are the same as for the r2c transforms, except that the +input and output data formats are reversed. + + FFTW computes an unnormalized transform: computing an r2c followed +by a c2r transform (or vice versa) will result in the original data +multiplied by the size of the transform (the product of the logical +dimensions). An r2c transform produces the same output as a +`FFTW_FORWARD' complex DFT of the same input, and a c2r transform is +correspondingly equivalent to `FFTW_BACKWARD'. For more information, +see *note What FFTW Really Computes::. + + +File: fftw3.info, Node: Real-data DFT Array Format, Next: Real-to-Real Transforms, Prev: Real-data DFTs, Up: Basic Interface + +4.3.4 Real-data DFT Array Format +-------------------------------- + +The output of a DFT of real data (r2c) contains symmetries that, in +principle, make half of the outputs redundant (*note What FFTW Really +Computes::). (Similarly for the input of an inverse c2r transform.) In +practice, it is not possible to entirely realize these savings in an +efficient and understandable format that generalizes to +multi-dimensional transforms. Instead, the output of the r2c +transforms is _slightly_ over half of the output of the corresponding +complex transform. We do not "pack" the data in any way, but store it +as an ordinary array of `fftw_complex' values. In fact, this data is +simply a subsection of what would be the array in the corresponding +complex transform. + + Specifically, for a real transform of d (= `rank') dimensions n[0] x +n[1] x n[2] x ... x n[d-1] , the complex data is an n[0] x n[1] x n[2] +x ... x (n[d-1]/2 + 1) array of `fftw_complex' values in row-major +order (with the division rounded down). That is, we only store the +_lower_ half (non-negative frequencies), plus one element, of the last +dimension of the data from the ordinary complex transform. (We could +have instead taken half of any other dimension, but implementation +turns out to be simpler if the last, contiguous, dimension is used.) + + For an out-of-place transform, the real data is simply an array with +physical dimensions n[0] x n[1] x n[2] x ... x n[d-1] in row-major +order. + + For an in-place transform, some complications arise since the +complex data is slightly larger than the real data. In this case, the +final dimension of the real data must be _padded_ with extra values to +accommodate the size of the complex data--two extra if the last +dimension is even and one if it is odd. That is, the last dimension of +the real data must physically contain 2 * (n[d-1]/2+1) `double' values +(exactly enough to hold the complex data). This physical array size +does not, however, change the _logical_ array size--only n[d-1] values +are actually stored in the last dimension, and n[d-1] is the last +dimension passed to the planner. + + +File: fftw3.info, Node: Real-to-Real Transforms, Next: Real-to-Real Transform Kinds, Prev: Real-data DFT Array Format, Up: Basic Interface + +4.3.5 Real-to-Real Transforms +----------------------------- + + fftw_plan fftw_plan_r2r_1d(int n, double *in, double *out, + fftw_r2r_kind kind, unsigned flags); + fftw_plan fftw_plan_r2r_2d(int n0, int n1, double *in, double *out, + fftw_r2r_kind kind0, fftw_r2r_kind kind1, + unsigned flags); + fftw_plan fftw_plan_r2r_3d(int n0, int n1, int n2, + double *in, double *out, + fftw_r2r_kind kind0, + fftw_r2r_kind kind1, + fftw_r2r_kind kind2, + unsigned flags); + fftw_plan fftw_plan_r2r(int rank, const int *n, double *in, double *out, + const fftw_r2r_kind *kind, unsigned flags); + + Plan a real input/output (r2r) transform of various kinds in zero or +more dimensions, returning an `fftw_plan' (*note Using Plans::). + + Once you have created a plan for a certain transform type and +parameters, then creating another plan of the same type and parameters, +but for different arrays, is fast and shares constant data with the +first plan (if it still exists). + + The planner returns `NULL' if the plan cannot be created. A +non-`NULL' plan is always returned by the basic interface unless you +are using a customized FFTW configuration supporting a restricted set +of transforms, or for size-1 `FFTW_REDFT00' kinds (which are not +defined). + +Arguments +......... + + * `rank' is the dimensionality of the transform (it should be the + size of the arrays `*n' and `*kind'), and can be any non-negative + integer. The `_1d', `_2d', and `_3d' planners correspond to a + `rank' of `1', `2', and `3', respectively. A `rank' of zero is + equivalent to a copy of one number from input to output. + + * `n', or `n0'/`n1'/`n2', or `n[rank]', respectively, gives the + (physical) size of the transform dimensions. They can be any + positive integer. + + - Multi-dimensional arrays are stored in row-major order with + dimensions: `n0' x `n1'; or `n0' x `n1' x `n2'; or `n[0]' x + `n[1]' x ... x `n[rank-1]'. *Note Multi-dimensional Array + Format::. + + - FFTW is generally best at handling sizes of the form 2^a 3^b + 5^c 7^d 11^e 13^f, where e+f is either 0 or 1, and the other + exponents are arbitrary. Other sizes are computed by means + of a slow, general-purpose algorithm (which nevertheless + retains O(n log n) performance even for prime sizes). (It + is possible to customize FFTW for different array sizes; see + *note Installation and Customization::.) Transforms whose + sizes are powers of 2 are especially fast. + + - For a `REDFT00' or `RODFT00' transform kind in a dimension of + size n, it is n-1 or n+1, respectively, that should be + factorizable in the above form. + + * `in' and `out' point to the input and output arrays of the + transform, which may be the same (yielding an in-place transform). These + arrays are overwritten during planning, unless `FFTW_ESTIMATE' is + used in the flags. (The arrays need not be initialized, but they + must be allocated.) + + * `kind', or `kind0'/`kind1'/`kind2', or `kind[rank]', is the kind + of r2r transform used for the corresponding dimension. The valid + kind constants are described in *note Real-to-Real Transform + Kinds::. In a multi-dimensional transform, what is computed is + the separable product formed by taking each transform kind along + the corresponding dimension, one dimension after another. + + * `flags' is a bitwise OR (`|') of zero or more planner flags, as + defined in *note Planner Flags::. + + + +File: fftw3.info, Node: Real-to-Real Transform Kinds, Prev: Real-to-Real Transforms, Up: Basic Interface + +4.3.6 Real-to-Real Transform Kinds +---------------------------------- + +FFTW currently supports 11 different r2r transform kinds, specified by +one of the constants below. For the precise definitions of these +transforms, see *note What FFTW Really Computes::. For a more +colloquial introduction to these transform kinds, see *note More DFTs +of Real Data::. + + For dimension of size `n', there is a corresponding "logical" +dimension `N' that determines the normalization (and the optimal +factorization); the formula for `N' is given for each kind below. +Also, with each transform kind is listed its corrsponding inverse +transform. FFTW computes unnormalized transforms: a transform followed +by its inverse will result in the original data multiplied by `N' (or +the product of the `N''s for each dimension, in multi-dimensions). + + * `FFTW_R2HC' computes a real-input DFT with output in "halfcomplex" + format, i.e. real and imaginary parts for a transform of size `n' + stored as: r0, r1, r2, r(n/2), i((n+1)/2-1), ..., i2, i1 (Logical + `N=n', inverse is `FFTW_HC2R'.) + + * `FFTW_HC2R' computes the reverse of `FFTW_R2HC', above. (Logical + `N=n', inverse is `FFTW_R2HC'.) + + * `FFTW_DHT' computes a discrete Hartley transform. (Logical `N=n', + inverse is `FFTW_DHT'.) + + * `FFTW_REDFT00' computes an REDFT00 transform, i.e. a DCT-I. + (Logical `N=2*(n-1)', inverse is `FFTW_REDFT00'.) + + * `FFTW_REDFT10' computes an REDFT10 transform, i.e. a DCT-II + (sometimes called "the" DCT). (Logical `N=2*n', inverse is + `FFTW_REDFT01'.) + + * `FFTW_REDFT01' computes an REDFT01 transform, i.e. a DCT-III + (sometimes called "the" IDCT, being the inverse of DCT-II). + (Logical `N=2*n', inverse is `FFTW_REDFT=10'.) + + * `FFTW_REDFT11' computes an REDFT11 transform, i.e. a DCT-IV. + (Logical `N=2*n', inverse is `FFTW_REDFT11'.) + + * `FFTW_RODFT00' computes an RODFT00 transform, i.e. a DST-I. + (Logical `N=2*(n+1)', inverse is `FFTW_RODFT00'.) + + * `FFTW_RODFT10' computes an RODFT10 transform, i.e. a DST-II. + (Logical `N=2*n', inverse is `FFTW_RODFT01'.) + + * `FFTW_RODFT01' computes an RODFT01 transform, i.e. a DST-III. + (Logical `N=2*n', inverse is `FFTW_RODFT=10'.) + + * `FFTW_RODFT11' computes an RODFT11 transform, i.e. a DST-IV. + (Logical `N=2*n', inverse is `FFTW_RODFT11'.) + + + +File: fftw3.info, Node: Advanced Interface, Next: Guru Interface, Prev: Basic Interface, Up: FFTW Reference + +4.4 Advanced Interface +====================== + +FFTW's "advanced" interface supplements the basic interface with four +new planner routines, providing a new level of flexibility: you can plan +a transform of multiple arrays simultaneously, operate on non-contiguous +(strided) data, and transform a subset of a larger multi-dimensional +array. Other than these additional features, the planner operates in +the same fashion as in the basic interface, and the resulting +`fftw_plan' is used in the same way (*note Using Plans::). + +* Menu: + +* Advanced Complex DFTs:: +* Advanced Real-data DFTs:: +* Advanced Real-to-real Transforms:: + + +File: fftw3.info, Node: Advanced Complex DFTs, Next: Advanced Real-data DFTs, Prev: Advanced Interface, Up: Advanced Interface + +4.4.1 Advanced Complex DFTs +--------------------------- + + fftw_plan fftw_plan_many_dft(int rank, const int *n, int howmany, + fftw_complex *in, const int *inembed, + int istride, int idist, + fftw_complex *out, const int *onembed, + int ostride, int odist, + int sign, unsigned flags); + + This routine plans multiple multidimensional complex DFTs, and it +extends the `fftw_plan_dft' routine (*note Complex DFTs::) to compute +`howmany' transforms, each having rank `rank' and size `n'. In +addition, the transform data need not be contiguous, but it may be laid +out in memory with an arbitrary stride. To account for these +possibilities, `fftw_plan_many_dft' adds the new parameters `howmany', +{`i',`o'}`nembed', {`i',`o'}`stride', and {`i',`o'}`dist'. The FFTW +basic interface (*note Complex DFTs::) provides routines specialized +for ranks 1, 2, and 3, but the advanced interface handles only the +general-rank case. + + `howmany' is the number of transforms to compute. The resulting +plan computes `howmany' transforms, where the input of the `k'-th +transform is at location `in+k*idist' (in C pointer arithmetic), and +its output is at location `out+k*odist'. Plans obtained in this way +can often be faster than calling FFTW multiple times for the individual +transforms. The basic `fftw_plan_dft' interface corresponds to +`howmany=1' (in which case the `dist' parameters are ignored). + + Each of the `howmany' transforms has rank `rank' and size `n', as in +the basic interface. In addition, the advanced interface allows the +input and output arrays of each transform to be row-major subarrays of +larger rank-`rank' arrays, described by `inembed' and `onembed' +parameters, respectively. {`i',`o'}`nembed' must be arrays of length +`rank', and `n' should be elementwise less than or equal to +{`i',`o'}`nembed'. Passing `NULL' for an `nembed' parameter is +equivalent to passing `n' (i.e. same physical and logical dimensions, +as in the basic interface.) + + The `stride' parameters indicate that the `j'-th element of the +input or output arrays is located at `j*istride' or `j*ostride', +respectively. (For a multi-dimensional array, `j' is the ordinary +row-major index.) When combined with the `k'-th transform in a +`howmany' loop, from above, this means that the (`j',`k')-th element is +at `j*stride+k*dist'. (The basic `fftw_plan_dft' interface corresponds +to a stride of 1.) + + For in-place transforms, the input and output `stride' and `dist' +parameters should be the same; otherwise, the planner may return `NULL'. + + Arrays `n', `inembed', and `onembed' are not used after this +function returns. You can safely free or reuse them. + + *Examples*: One transform of one 5 by 6 array contiguous in memory: + int rank = 2; + int n[] = {5, 6}; + int howmany = 1; + int idist = odist = 0; /* unused because howmany = 1 */ + int istride = ostride = 1; /* array is contiguous in memory */ + int *inembed = n, *onembed = n; + + Transform of three 5 by 6 arrays, each contiguous in memory, stored +in memory one after another: + int rank = 2; + int n[] = {5, 6}; + int howmany = 3; + int idist = odist = n[0]*n[1]; /* = 30, the distance in memory + between the first element + of the first array and the + first element of the second array */ + int istride = ostride = 1; /* array is contiguous in memory */ + int *inembed = n, *onembed = n; + + Transform each column of a 2d array with 10 rows and 3 columns: + int rank = 1; /* not 2: we are computing 1d transforms */ + int n[] = {10}; /* 1d transforms of length 10 */ + int howmany = 3; + int idist = odist = 1; + int istride = ostride = 3; /* distance between two elements in + the same column */ + int *inembed = n, *onembed = n; + + +File: fftw3.info, Node: Advanced Real-data DFTs, Next: Advanced Real-to-real Transforms, Prev: Advanced Complex DFTs, Up: Advanced Interface + +4.4.2 Advanced Real-data DFTs +----------------------------- + + fftw_plan fftw_plan_many_dft_r2c(int rank, const int *n, int howmany, + double *in, const int *inembed, + int istride, int idist, + fftw_complex *out, const int *onembed, + int ostride, int odist, + unsigned flags); + fftw_plan fftw_plan_many_dft_c2r(int rank, const int *n, int howmany, + fftw_complex *in, const int *inembed, + int istride, int idist, + double *out, const int *onembed, + int ostride, int odist, + unsigned flags); + + Like `fftw_plan_many_dft', these two functions add `howmany', +`nembed', `stride', and `dist' parameters to the `fftw_plan_dft_r2c' +and `fftw_plan_dft_c2r' functions, but otherwise behave the same as the +basic interface. + + The interpretation of `howmany', `stride', and `dist' are the same +as for `fftw_plan_many_dft', above. Note that the `stride' and `dist' +for the real array are in units of `double', and for the complex array +are in units of `fftw_complex'. + + If an `nembed' parameter is `NULL', it is interpreted as what it +would be in the basic interface, as described in *note Real-data DFT +Array Format::. That is, for the complex array the size is assumed to +be the same as `n', but with the last dimension cut roughly in half. +For the real array, the size is assumed to be `n' if the transform is +out-of-place, or `n' with the last dimension "padded" if the transform +is in-place. + + If an `nembed' parameter is non-`NULL', it is interpreted as the +physical size of the corresponding array, in row-major order, just as +for `fftw_plan_many_dft'. In this case, each dimension of `nembed' +should be `>=' what it would be in the basic interface (e.g. the halved +or padded `n'). + + Arrays `n', `inembed', and `onembed' are not used after this +function returns. You can safely free or reuse them. + + +File: fftw3.info, Node: Advanced Real-to-real Transforms, Prev: Advanced Real-data DFTs, Up: Advanced Interface + +4.4.3 Advanced Real-to-real Transforms +-------------------------------------- + + fftw_plan fftw_plan_many_r2r(int rank, const int *n, int howmany, + double *in, const int *inembed, + int istride, int idist, + double *out, const int *onembed, + int ostride, int odist, + const fftw_r2r_kind *kind, unsigned flags); + + Like `fftw_plan_many_dft', this functions adds `howmany', `nembed', +`stride', and `dist' parameters to the `fftw_plan_r2r' function, but +otherwise behave the same as the basic interface. The interpretation +of those additional parameters are the same as for +`fftw_plan_many_dft'. (Of course, the `stride' and `dist' parameters +are now in units of `double', not `fftw_complex'.) + + Arrays `n', `inembed', `onembed', and `kind' are not used after this +function returns. You can safely free or reuse them. + + +File: fftw3.info, Node: Guru Interface, Next: New-array Execute Functions, Prev: Advanced Interface, Up: FFTW Reference + +4.5 Guru Interface +================== + +The "guru" interface to FFTW is intended to expose as much as possible +of the flexibility in the underlying FFTW architecture. It allows one +to compute multi-dimensional "vectors" (loops) of multi-dimensional +transforms, where each vector/transform dimension has an independent +size and stride. One can also use more general complex-number formats, +e.g. separate real and imaginary arrays. + + For those users who require the flexibility of the guru interface, +it is important that they pay special attention to the documentation +lest they shoot themselves in the foot. + +* Menu: + +* Interleaved and split arrays:: +* Guru vector and transform sizes:: +* Guru Complex DFTs:: +* Guru Real-data DFTs:: +* Guru Real-to-real Transforms:: +* 64-bit Guru Interface:: + + +File: fftw3.info, Node: Interleaved and split arrays, Next: Guru vector and transform sizes, Prev: Guru Interface, Up: Guru Interface + +4.5.1 Interleaved and split arrays +---------------------------------- + +The guru interface supports two representations of complex numbers, +which we call the interleaved and the split format. + + The "interleaved" format is the same one used by the basic and +advanced interfaces, and it is documented in *note Complex numbers::. +In the interleaved format, you provide pointers to the real part of a +complex number, and the imaginary part understood to be stored in the +next memory location. + + The "split" format allows separate pointers to the real and +imaginary parts of a complex array. + + Technically, the interleaved format is redundant, because you can +always express an interleaved array in terms of a split array with +appropriate pointers and strides. On the other hand, the interleaved +format is simpler to use, and it is common in practice. Hence, FFTW +supports it as a special case. + + +File: fftw3.info, Node: Guru vector and transform sizes, Next: Guru Complex DFTs, Prev: Interleaved and split arrays, Up: Guru Interface + +4.5.2 Guru vector and transform sizes +------------------------------------- + +The guru interface introduces one basic new data structure, +`fftw_iodim', that is used to specify sizes and strides for +multi-dimensional transforms and vectors: + + typedef struct { + int n; + int is; + int os; + } fftw_iodim; + + Here, `n' is the size of the dimension, and `is' and `os' are the +strides of that dimension for the input and output arrays. (The stride +is the separation of consecutive elements along this dimension.) + + The meaning of the stride parameter depends on the type of the array +that the stride refers to. _If the array is interleaved complex, +strides are expressed in units of complex numbers (`fftw_complex'). If +the array is split complex or real, strides are expressed in units of +real numbers (`double')._ This convention is consistent with the usual +pointer arithmetic in the C language. An interleaved array is denoted +by a pointer `p' to `fftw_complex', so that `p+1' points to the next +complex number. Split arrays are denoted by pointers to `double', in +which case pointer arithmetic operates in units of `sizeof(double)'. + + The guru planner interfaces all take a (`rank', `dims[rank]') pair +describing the transform size, and a (`howmany_rank', +`howmany_dims[howmany_rank]') pair describing the "vector" size (a +multi-dimensional loop of transforms to perform), where `dims' and +`howmany_dims' are arrays of `fftw_iodim'. + + For example, the `howmany' parameter in the advanced complex-DFT +interface corresponds to `howmany_rank' = 1, `howmany_dims[0].n' = +`howmany', `howmany_dims[0].is' = `idist', and `howmany_dims[0].os' = +`odist'. (To compute a single transform, you can just use +`howmany_rank' = 0.) + + A row-major multidimensional array with dimensions `n[rank]' (*note +Row-major Format::) corresponds to `dims[i].n' = `n[i]' and the +recurrence `dims[i].is' = `n[i+1] * dims[i+1].is' (similarly for `os'). +The stride of the last (`i=rank-1') dimension is the overall stride of +the array. e.g. to be equivalent to the advanced complex-DFT +interface, you would have `dims[rank-1].is' = `istride' and +`dims[rank-1].os' = `ostride'. + + In general, we only guarantee FFTW to return a non-`NULL' plan if +the vector and transform dimensions correspond to a set of distinct +indices, and for in-place transforms the input/output strides should be +the same. + + +File: fftw3.info, Node: Guru Complex DFTs, Next: Guru Real-data DFTs, Prev: Guru vector and transform sizes, Up: Guru Interface + +4.5.3 Guru Complex DFTs +----------------------- + + fftw_plan fftw_plan_guru_dft( + int rank, const fftw_iodim *dims, + int howmany_rank, const fftw_iodim *howmany_dims, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); + + fftw_plan fftw_plan_guru_split_dft( + int rank, const fftw_iodim *dims, + int howmany_rank, const fftw_iodim *howmany_dims, + double *ri, double *ii, double *ro, double *io, + unsigned flags); + + These two functions plan a complex-data, multi-dimensional DFT for +the interleaved and split format, respectively. Transform dimensions +are given by (`rank', `dims') over a multi-dimensional vector (loop) of +dimensions (`howmany_rank', `howmany_dims'). `dims' and `howmany_dims' +should point to `fftw_iodim' arrays of length `rank' and +`howmany_rank', respectively. + + `flags' is a bitwise OR (`|') of zero or more planner flags, as +defined in *note Planner Flags::. + + In the `fftw_plan_guru_dft' function, the pointers `in' and `out' +point to the interleaved input and output arrays, respectively. The +sign can be either -1 (= `FFTW_FORWARD') or +1 (= `FFTW_BACKWARD'). If +the pointers are equal, the transform is in-place. + + In the `fftw_plan_guru_split_dft' function, `ri' and `ii' point to +the real and imaginary input arrays, and `ro' and `io' point to the +real and imaginary output arrays. The input and output pointers may be +the same, indicating an in-place transform. For example, for +`fftw_complex' pointers `in' and `out', the corresponding parameters +are: + + ri = (double *) in; + ii = (double *) in + 1; + ro = (double *) out; + io = (double *) out + 1; + + Because `fftw_plan_guru_split_dft' accepts split arrays, strides are +expressed in units of `double'. For a contiguous `fftw_complex' array, +the overall stride of the transform should be 2, the distance between +consecutive real parts or between consecutive imaginary parts; see +*note Guru vector and transform sizes::. Note that the dimension +strides are applied equally to the real and imaginary parts; real and +imaginary arrays with different strides are not supported. + + There is no `sign' parameter in `fftw_plan_guru_split_dft'. This +function always plans for an `FFTW_FORWARD' transform. To plan for an +`FFTW_BACKWARD' transform, you can exploit the identity that the +backwards DFT is equal to the forwards DFT with the real and imaginary +parts swapped. For example, in the case of the `fftw_complex' arrays +above, the `FFTW_BACKWARD' transform is computed by the parameters: + + ri = (double *) in + 1; + ii = (double *) in; + ro = (double *) out + 1; + io = (double *) out; + + +File: fftw3.info, Node: Guru Real-data DFTs, Next: Guru Real-to-real Transforms, Prev: Guru Complex DFTs, Up: Guru Interface + +4.5.4 Guru Real-data DFTs +------------------------- + + fftw_plan fftw_plan_guru_dft_r2c( + int rank, const fftw_iodim *dims, + int howmany_rank, const fftw_iodim *howmany_dims, + double *in, fftw_complex *out, + unsigned flags); + + fftw_plan fftw_plan_guru_split_dft_r2c( + int rank, const fftw_iodim *dims, + int howmany_rank, const fftw_iodim *howmany_dims, + double *in, double *ro, double *io, + unsigned flags); + + fftw_plan fftw_plan_guru_dft_c2r( + int rank, const fftw_iodim *dims, + int howmany_rank, const fftw_iodim *howmany_dims, + fftw_complex *in, double *out, + unsigned flags); + + fftw_plan fftw_plan_guru_split_dft_c2r( + int rank, const fftw_iodim *dims, + int howmany_rank, const fftw_iodim *howmany_dims, + double *ri, double *ii, double *out, + unsigned flags); + + Plan a real-input (r2c) or real-output (c2r), multi-dimensional DFT +with transform dimensions given by (`rank', `dims') over a +multi-dimensional vector (loop) of dimensions (`howmany_rank', +`howmany_dims'). `dims' and `howmany_dims' should point to +`fftw_iodim' arrays of length `rank' and `howmany_rank', respectively. +As for the basic and advanced interfaces, an r2c transform is +`FFTW_FORWARD' and a c2r transform is `FFTW_BACKWARD'. + + The _last_ dimension of `dims' is interpreted specially: that +dimension of the real array has size `dims[rank-1].n', but that +dimension of the complex array has size `dims[rank-1].n/2+1' (division +rounded down). The strides, on the other hand, are taken to be exactly +as specified. It is up to the user to specify the strides +appropriately for the peculiar dimensions of the data, and we do not +guarantee that the planner will succeed (return non-`NULL') for any +dimensions other than those described in *note Real-data DFT Array +Format:: and generalized in *note Advanced Real-data DFTs::. (That is, +for an in-place transform, each individual dimension should be able to +operate in place.) + + `in' and `out' point to the input and output arrays for r2c and c2r +transforms, respectively. For split arrays, `ri' and `ii' point to the +real and imaginary input arrays for a c2r transform, and `ro' and `io' +point to the real and imaginary output arrays for an r2c transform. +`in' and `ro' or `ri' and `out' may be the same, indicating an in-place +transform. (In-place transforms where `in' and `io' or `ii' and `out' +are the same are not currently supported.) + + `flags' is a bitwise OR (`|') of zero or more planner flags, as +defined in *note Planner Flags::. + + In-place transforms of rank greater than 1 are currently only +supported for interleaved arrays. For split arrays, the planner will +return `NULL'. + + +File: fftw3.info, Node: Guru Real-to-real Transforms, Next: 64-bit Guru Interface, Prev: Guru Real-data DFTs, Up: Guru Interface + +4.5.5 Guru Real-to-real Transforms +---------------------------------- + + fftw_plan fftw_plan_guru_r2r(int rank, const fftw_iodim *dims, + int howmany_rank, + const fftw_iodim *howmany_dims, + double *in, double *out, + const fftw_r2r_kind *kind, + unsigned flags); + + Plan a real-to-real (r2r) multi-dimensional `FFTW_FORWARD' transform +with transform dimensions given by (`rank', `dims') over a +multi-dimensional vector (loop) of dimensions (`howmany_rank', +`howmany_dims'). `dims' and `howmany_dims' should point to +`fftw_iodim' arrays of length `rank' and `howmany_rank', respectively. + + The transform kind of each dimension is given by the `kind' +parameter, which should point to an array of length `rank'. Valid +`fftw_r2r_kind' constants are given in *note Real-to-Real Transform +Kinds::. + + `in' and `out' point to the real input and output arrays; they may +be the same, indicating an in-place transform. + + `flags' is a bitwise OR (`|') of zero or more planner flags, as +defined in *note Planner Flags::. + + +File: fftw3.info, Node: 64-bit Guru Interface, Prev: Guru Real-to-real Transforms, Up: Guru Interface + +4.5.6 64-bit Guru Interface +--------------------------- + +When compiled in 64-bit mode on a 64-bit architecture (where addresses +are 64 bits wide), FFTW uses 64-bit quantities internally for all +transform sizes, strides, and so on--you don't have to do anything +special to exploit this. However, in the ordinary FFTW interfaces, you +specify the transform size by an `int' quantity, which is normally only +32 bits wide. This means that, even though FFTW is using 64-bit sizes +internally, you cannot specify a single transform dimension larger than +2^31-1 numbers. + + We expect that few users will require transforms larger than this, +but, for those who do, we provide a 64-bit version of the guru +interface in which all sizes are specified as integers of type +`ptrdiff_t' instead of `int'. (`ptrdiff_t' is a signed integer type +defined by the C standard to be wide enough to represent address +differences, and thus must be at least 64 bits wide on a 64-bit +machine.) We stress that there is _no performance advantage_ to using +this interface--the same internal FFTW code is employed regardless--and +it is only necessary if you want to specify very large transform sizes. + + In particular, the 64-bit guru interface is a set of planner routines +that are exactly the same as the guru planner routines, except that +they are named with `guru64' instead of `guru' and they take arguments +of type `fftw_iodim64' instead of `fftw_iodim'. For example, instead +of `fftw_plan_guru_dft', we have `fftw_plan_guru64_dft'. + + fftw_plan fftw_plan_guru64_dft( + int rank, const fftw_iodim64 *dims, + int howmany_rank, const fftw_iodim64 *howmany_dims, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); + + The `fftw_iodim64' type is similar to `fftw_iodim', with the same +interpretation, except that it uses type `ptrdiff_t' instead of type +`int'. + + typedef struct { + ptrdiff_t n; + ptrdiff_t is; + ptrdiff_t os; + } fftw_iodim64; + + Every other `fftw_plan_guru' function also has a `fftw_plan_guru64' +equivalent, but we do not repeat their documentation here since they +are identical to the 32-bit versions except as noted above. + + +File: fftw3.info, Node: New-array Execute Functions, Next: Wisdom, Prev: Guru Interface, Up: FFTW Reference + +4.6 New-array Execute Functions +=============================== + +Normally, one executes a plan for the arrays with which the plan was +created, by calling `fftw_execute(plan)' as described in *note Using +Plans::. However, it is possible for sophisticated users to apply a +given plan to a _different_ array using the "new-array execute" +functions detailed below, provided that the following conditions are +met: + + * The array size, strides, etcetera are the same (since those are + set by the plan). + + * The input and output arrays are the same (in-place) or different + (out-of-place) if the plan was originally created to be in-place or + out-of-place, respectively. + + * For split arrays, the separations between the real and imaginary + parts, `ii-ri' and `io-ro', are the same as they were for the + input and output arrays when the plan was created. (This + condition is automatically satisfied for interleaved arrays.) + + * The "alignment" of the new input/output arrays is the same as that + of the input/output arrays when the plan was created, unless the + plan was created with the `FFTW_UNALIGNED' flag. Here, the + alignment is a platform-dependent quantity (for example, it is the + address modulo 16 if SSE SIMD instructions are used, but the + address modulo 4 for non-SIMD single-precision FFTW on the same + machine). In general, only arrays allocated with `fftw_malloc' + are guaranteed to be equally aligned (*note SIMD alignment and + fftw_malloc::). + + + The alignment issue is especially critical, because if you don't use +`fftw_malloc' then you may have little control over the alignment of +arrays in memory. For example, neither the C++ `new' function nor the +Fortran `allocate' statement provide strong enough guarantees about +data alignment. If you don't use `fftw_malloc', therefore, you +probably have to use `FFTW_UNALIGNED' (which disables most SIMD +support). If possible, it is probably better for you to simply create +multiple plans (creating a new plan is quick once one exists for a +given size), or better yet re-use the same array for your transforms. + + For rare circumstances in which you cannot control the alignment of +allocated memory, but wish to determine where a given array is aligned +like the original array for which a plan was created, you can use the +`fftw_alignment_of' function: + int fftw_alignment_of(double *p); + Two arrays have equivalent alignment (for the purposes of applying a +plan) if and only if `fftw_alignment_of' returns the same value for the +corresponding pointers to their data (typecast to `double*' if +necessary). + + If you are tempted to use the new-array execute interface because you +want to transform a known bunch of arrays of the same size, you should +probably go use the advanced interface instead (*note Advanced +Interface::)). + + The new-array execute functions are: + + void fftw_execute_dft( + const fftw_plan p, + fftw_complex *in, fftw_complex *out); + + void fftw_execute_split_dft( + const fftw_plan p, + double *ri, double *ii, double *ro, double *io); + + void fftw_execute_dft_r2c( + const fftw_plan p, + double *in, fftw_complex *out); + + void fftw_execute_split_dft_r2c( + const fftw_plan p, + double *in, double *ro, double *io); + + void fftw_execute_dft_c2r( + const fftw_plan p, + fftw_complex *in, double *out); + + void fftw_execute_split_dft_c2r( + const fftw_plan p, + double *ri, double *ii, double *out); + + void fftw_execute_r2r( + const fftw_plan p, + double *in, double *out); + + These execute the `plan' to compute the corresponding transform on +the input/output arrays specified by the subsequent arguments. The +input/output array arguments have the same meanings as the ones passed +to the guru planner routines in the preceding sections. The `plan' is +not modified, and these routines can be called as many times as +desired, or intermixed with calls to the ordinary `fftw_execute'. + + The `plan' _must_ have been created for the transform type +corresponding to the execute function, e.g. it must be a complex-DFT +plan for `fftw_execute_dft'. Any of the planner routines for that +transform type, from the basic to the guru interface, could have been +used to create the plan, however. + + +File: fftw3.info, Node: Wisdom, Next: What FFTW Really Computes, Prev: New-array Execute Functions, Up: FFTW Reference + +4.7 Wisdom +========== + +This section documents the FFTW mechanism for saving and restoring +plans from disk. This mechanism is called "wisdom". + +* Menu: + +* Wisdom Export:: +* Wisdom Import:: +* Forgetting Wisdom:: +* Wisdom Utilities:: + + +File: fftw3.info, Node: Wisdom Export, Next: Wisdom Import, Prev: Wisdom, Up: Wisdom + +4.7.1 Wisdom Export +------------------- + + int fftw_export_wisdom_to_filename(const char *filename); + void fftw_export_wisdom_to_file(FILE *output_file); + char *fftw_export_wisdom_to_string(void); + void fftw_export_wisdom(void (*write_char)(char c, void *), void *data); + + These functions allow you to export all currently accumulated wisdom +in a form from which it can be later imported and restored, even during +a separate run of the program. (*Note Words of Wisdom-Saving Plans::.) +The current store of wisdom is not affected by calling any of these +routines. + + `fftw_export_wisdom' exports the wisdom to any output medium, as +specified by the callback function `write_char'. `write_char' is a +`putc'-like function that writes the character `c' to some output; its +second parameter is the `data' pointer passed to `fftw_export_wisdom'. +For convenience, the following three "wrapper" routines are provided: + + `fftw_export_wisdom_to_filename' writes wisdom to a file named +`filename' (which is created or overwritten), returning `1' on success +and `0' on failure. A lower-level function, which requires you to open +and close the file yourself (e.g. if you want to write wisdom to a +portion of a larger file) is `fftw_export_wisdom_to_file'. This writes +the wisdom to the current position in `output_file', which should be +open with write permission; upon exit, the file remains open and is +positioned at the end of the wisdom data. + + `fftw_export_wisdom_to_string' returns a pointer to a +`NULL'-terminated string holding the wisdom data. This string is +dynamically allocated, and it is the responsibility of the caller to +deallocate it with `free' when it is no longer needed. + + All of these routines export the wisdom in the same format, which we +will not document here except to say that it is LISP-like ASCII text +that is insensitive to white space. + + +File: fftw3.info, Node: Wisdom Import, Next: Forgetting Wisdom, Prev: Wisdom Export, Up: Wisdom + +4.7.2 Wisdom Import +------------------- + + int fftw_import_system_wisdom(void); + int fftw_import_wisdom_from_filename(const char *filename); + int fftw_import_wisdom_from_string(const char *input_string); + int fftw_import_wisdom(int (*read_char)(void *), void *data); + + These functions import wisdom into a program from data stored by the +`fftw_export_wisdom' functions above. (*Note Words of Wisdom-Saving +Plans::.) The imported wisdom replaces any wisdom already accumulated +by the running program. + + `fftw_import_wisdom' imports wisdom from any input medium, as +specified by the callback function `read_char'. `read_char' is a +`getc'-like function that returns the next character in the input; its +parameter is the `data' pointer passed to `fftw_import_wisdom'. If the +end of the input data is reached (which should never happen for valid +data), `read_char' should return `EOF' (as defined in `'). +For convenience, the following three "wrapper" routines are provided: + + `fftw_import_wisdom_from_filename' reads wisdom from a file named +`filename'. A lower-level function, which requires you to open and +close the file yourself (e.g. if you want to read wisdom from a portion +of a larger file) is `fftw_import_wisdom_from_file'. This reads wisdom +from the current position in `input_file' (which should be open with +read permission); upon exit, the file remains open, but the position of +the read pointer is unspecified. + + `fftw_import_wisdom_from_string' reads wisdom from the +`NULL'-terminated string `input_string'. + + `fftw_import_system_wisdom' reads wisdom from an +implementation-defined standard file (`/etc/fftw/wisdom' on Unix and +GNU systems). + + The return value of these import routines is `1' if the wisdom was +read successfully and `0' otherwise. Note that, in all of these +functions, any data in the input stream past the end of the wisdom data +is simply ignored. + + +File: fftw3.info, Node: Forgetting Wisdom, Next: Wisdom Utilities, Prev: Wisdom Import, Up: Wisdom + +4.7.3 Forgetting Wisdom +----------------------- + + void fftw_forget_wisdom(void); + + Calling `fftw_forget_wisdom' causes all accumulated `wisdom' to be +discarded and its associated memory to be freed. (New `wisdom' can +still be gathered subsequently, however.) + + +File: fftw3.info, Node: Wisdom Utilities, Prev: Forgetting Wisdom, Up: Wisdom + +4.7.4 Wisdom Utilities +---------------------- + +FFTW includes two standalone utility programs that deal with wisdom. We +merely summarize them here, since they come with their own `man' pages +for Unix and GNU systems (with HTML versions on our web site). + + The first program is `fftw-wisdom' (or `fftwf-wisdom' in single +precision, etcetera), which can be used to create a wisdom file +containing plans for any of the transform sizes and types supported by +FFTW. It is preferable to create wisdom directly from your executable +(*note Caveats in Using Wisdom::), but this program is useful for +creating global wisdom files for `fftw_import_system_wisdom'. + + The second program is `fftw-wisdom-to-conf', which takes a wisdom +file as input and produces a "configuration routine" as output. The +latter is a C subroutine that you can compile and link into your +program, replacing a routine of the same name in the FFTW library, that +determines which parts of FFTW are callable by your program. +`fftw-wisdom-to-conf' produces a configuration routine that links to +only those parts of FFTW needed by the saved plans in the wisdom, +greatly reducing the size of statically linked executables (which should +only attempt to create plans corresponding to those in the wisdom, +however). + + +File: fftw3.info, Node: What FFTW Really Computes, Prev: Wisdom, Up: FFTW Reference + +4.8 What FFTW Really Computes +============================= + +In this section, we provide precise mathematical definitions for the +transforms that FFTW computes. These transform definitions are fairly +standard, but some authors follow slightly different conventions for the +normalization of the transform (the constant factor in front) and the +sign of the complex exponent. We begin by presenting the +one-dimensional (1d) transform definitions, and then give the +straightforward extension to multi-dimensional transforms. + +* Menu: + +* The 1d Discrete Fourier Transform (DFT):: +* The 1d Real-data DFT:: +* 1d Real-even DFTs (DCTs):: +* 1d Real-odd DFTs (DSTs):: +* 1d Discrete Hartley Transforms (DHTs):: +* Multi-dimensional Transforms:: + + +File: fftw3.info, Node: The 1d Discrete Fourier Transform (DFT), Next: The 1d Real-data DFT, Prev: What FFTW Really Computes, Up: What FFTW Really Computes + +4.8.1 The 1d Discrete Fourier Transform (DFT) +--------------------------------------------- + +The forward (`FFTW_FORWARD') discrete Fourier transform (DFT) of a 1d +complex array X of size n computes an array Y, where: Y[k] = sum for j = 0 to (n - 1) of X[j] * exp(-2 pi j k sqrt(-1)/n) . + The backward (`FFTW_BACKWARD') DFT computes: Y[k] = sum for j = 0 to (n - 1) of X[j] * exp(2 pi j k sqrt(-1)/n) . + FFTW computes an unnormalized transform, in that there is no +coefficient in front of the summation in the DFT. In other words, +applying the forward and then the backward transform will multiply the +input by n. + + From above, an `FFTW_FORWARD' transform corresponds to a sign of -1 +in the exponent of the DFT. Note also that we use the standard +"in-order" output ordering--the k-th output corresponds to the +frequency k/n (or k/T, where T is your total sampling period). For +those who like to think in terms of positive and negative frequencies, +this means that the positive frequencies are stored in the first half +of the output and the negative frequencies are stored in backwards +order in the second half of the output. (The frequency -k/n is the +same as the frequency (n-k)/n.) + + +File: fftw3.info, Node: The 1d Real-data DFT, Next: 1d Real-even DFTs (DCTs), Prev: The 1d Discrete Fourier Transform (DFT), Up: What FFTW Really Computes + +4.8.2 The 1d Real-data DFT +-------------------------- + +The real-input (r2c) DFT in FFTW computes the _forward_ transform Y of +the size `n' real array X, exactly as defined above, i.e. Y[k] = sum for j = 0 to (n - 1) of X[j] * exp(-2 pi j k sqrt(-1)/n) . + This output array Y can easily be shown to possess the "Hermitian" +symmetry Y[k] = Y[n-k]*, where we take Y to be periodic so that Y[n] = +Y[0]. + + As a result of this symmetry, half of the output Y is redundant +(being the complex conjugate of the other half), and so the 1d r2c +transforms only output elements 0...n/2 of Y (n/2+1 complex numbers), +where the division by 2 is rounded down. + + Moreover, the Hermitian symmetry implies that Y[0] and, if n is +even, the Y[n/2] element, are purely real. So, for the `R2HC' r2r +transform, these elements are not stored in the halfcomplex output +format. + + The c2r and `H2RC' r2r transforms compute the backward DFT of the +_complex_ array X with Hermitian symmetry, stored in the r2c/`R2HC' +output formats, respectively, where the backward transform is defined +exactly as for the complex case: Y[k] = sum for j = 0 to (n - 1) of X[j] * exp(2 pi j k sqrt(-1)/n) . + The outputs `Y' of this transform can easily be seen to be purely +real, and are stored as an array of real numbers. + + Like FFTW's complex DFT, these transforms are unnormalized. In other +words, applying the real-to-complex (forward) and then the +complex-to-real (backward) transform will multiply the input by n. + + +File: fftw3.info, Node: 1d Real-even DFTs (DCTs), Next: 1d Real-odd DFTs (DSTs), Prev: The 1d Real-data DFT, Up: What FFTW Really Computes + +4.8.3 1d Real-even DFTs (DCTs) +------------------------------ + +The Real-even symmetry DFTs in FFTW are exactly equivalent to the +unnormalized forward (and backward) DFTs as defined above, where the +input array X of length N is purely real and is also "even" symmetry. +In this case, the output array is likewise real and even symmetry. + + For the case of `REDFT00', this even symmetry means that X[j] = +X[N-j], where we take X to be periodic so that X[N] = X[0]. Because of +this redundancy, only the first n real numbers are actually stored, +where N = 2(n-1). + + The proper definition of even symmetry for `REDFT10', `REDFT01', and +`REDFT11' transforms is somewhat more intricate because of the shifts +by 1/2 of the input and/or output, although the corresponding boundary +conditions are given in *note Real even/odd DFTs (cosine/sine +transforms)::. Because of the even symmetry, however, the sine terms +in the DFT all cancel and the remaining cosine terms are written +explicitly below. This formulation often leads people to call such a +transform a "discrete cosine transform" (DCT), although it is really +just a special case of the DFT. + + In each of the definitions below, we transform a real array X of +length n to a real array Y of length n: + +REDFT00 (DCT-I) +............... + +An `REDFT00' transform (type-I DCT) in FFTW is defined by: Y[k] = X[0] ++ (-1)^k X[n-1] + 2 (sum for j = 1 to n-2 of X[j] cos(pi jk /(n-1))). +Note that this transform is not defined for n=1. For n=2, the +summation term above is dropped as you might expect. + +REDFT10 (DCT-II) +................ + +An `REDFT10' transform (type-II DCT, sometimes called "the" DCT) in +FFTW is defined by: Y[k] = 2 (sum for j = 0 to n-1 of X[j] cos(pi +(j+1/2) k / n)). + +REDFT01 (DCT-III) +................. + +An `REDFT01' transform (type-III DCT) in FFTW is defined by: Y[k] = +X[0] + 2 (sum for j = 1 to n-1 of X[j] cos(pi j (k+1/2) / n)). In the +case of n=1, this reduces to Y[0] = X[0]. Up to a scale factor (see +below), this is the inverse of `REDFT10' ("the" DCT), and so the +`REDFT01' (DCT-III) is sometimes called the "IDCT". + +REDFT11 (DCT-IV) +................ + +An `REDFT11' transform (type-IV DCT) in FFTW is defined by: Y[k] = 2 +(sum for j = 0 to n-1 of X[j] cos(pi (j+1/2) (k+1/2) / n)). + +Inverses and Normalization +.......................... + +These definitions correspond directly to the unnormalized DFTs used +elsewhere in FFTW (hence the factors of 2 in front of the summations). +The unnormalized inverse of `REDFT00' is `REDFT00', of `REDFT10' is +`REDFT01' and vice versa, and of `REDFT11' is `REDFT11'. Each +unnormalized inverse results in the original array multiplied by N, +where N is the _logical_ DFT size. For `REDFT00', N=2(n-1) (note that +n=1 is not defined); otherwise, N=2n. + + In defining the discrete cosine transform, some authors also include +additional factors of sqrt(2) (or its inverse) multiplying selected +inputs and/or outputs. This is a mostly cosmetic change that makes the +transform orthogonal, but sacrifices the direct equivalence to a +symmetric DFT. + + +File: fftw3.info, Node: 1d Real-odd DFTs (DSTs), Next: 1d Discrete Hartley Transforms (DHTs), Prev: 1d Real-even DFTs (DCTs), Up: What FFTW Really Computes + +4.8.4 1d Real-odd DFTs (DSTs) +----------------------------- + +The Real-odd symmetry DFTs in FFTW are exactly equivalent to the +unnormalized forward (and backward) DFTs as defined above, where the +input array X of length N is purely real and is also "odd" symmetry. In +this case, the output is odd symmetry and purely imaginary. + + For the case of `RODFT00', this odd symmetry means that X[j] = +-X[N-j], where we take X to be periodic so that X[N] = X[0]. Because +of this redundancy, only the first n real numbers starting at j=1 are +actually stored (the j=0 element is zero), where N = 2(n+1). + + The proper definition of odd symmetry for `RODFT10', `RODFT01', and +`RODFT11' transforms is somewhat more intricate because of the shifts +by 1/2 of the input and/or output, although the corresponding boundary +conditions are given in *note Real even/odd DFTs (cosine/sine +transforms)::. Because of the odd symmetry, however, the cosine terms +in the DFT all cancel and the remaining sine terms are written +explicitly below. This formulation often leads people to call such a +transform a "discrete sine transform" (DST), although it is really just +a special case of the DFT. + + In each of the definitions below, we transform a real array X of +length n to a real array Y of length n: + +RODFT00 (DST-I) +............... + +An `RODFT00' transform (type-I DST) in FFTW is defined by: Y[k] = 2 +(sum for j = 0 to n-1 of X[j] sin(pi (j+1)(k+1) / (n+1))). + +RODFT10 (DST-II) +................ + +An `RODFT10' transform (type-II DST) in FFTW is defined by: Y[k] = 2 +(sum for j = 0 to n-1 of X[j] sin(pi (j+1/2) (k+1) / n)). + +RODFT01 (DST-III) +................. + +An `RODFT01' transform (type-III DST) in FFTW is defined by: Y[k] = +(-1)^k X[n-1] + 2 (sum for j = 0 to n-2 of X[j] sin(pi (j+1) (k+1/2) / +n)). In the case of n=1, this reduces to Y[0] = X[0]. + +RODFT11 (DST-IV) +................ + +An `RODFT11' transform (type-IV DST) in FFTW is defined by: Y[k] = 2 +(sum for j = 0 to n-1 of X[j] sin(pi (j+1/2) (k+1/2) / n)). + +Inverses and Normalization +.......................... + +These definitions correspond directly to the unnormalized DFTs used +elsewhere in FFTW (hence the factors of 2 in front of the summations). +The unnormalized inverse of `RODFT00' is `RODFT00', of `RODFT10' is +`RODFT01' and vice versa, and of `RODFT11' is `RODFT11'. Each +unnormalized inverse results in the original array multiplied by N, +where N is the _logical_ DFT size. For `RODFT00', N=2(n+1); otherwise, +N=2n. + + In defining the discrete sine transform, some authors also include +additional factors of sqrt(2) (or its inverse) multiplying selected +inputs and/or outputs. This is a mostly cosmetic change that makes the +transform orthogonal, but sacrifices the direct equivalence to an +antisymmetric DFT. + + +File: fftw3.info, Node: 1d Discrete Hartley Transforms (DHTs), Next: Multi-dimensional Transforms, Prev: 1d Real-odd DFTs (DSTs), Up: What FFTW Really Computes + +4.8.5 1d Discrete Hartley Transforms (DHTs) +------------------------------------------- + +The discrete Hartley transform (DHT) of a 1d real array X of size n +computes a real array Y of the same size, where: Y[k] = sum for j = 0 to (n - 1) of X[j] * [cos(2 pi j k / n) + sin(2 pi j k / n)]. + FFTW computes an unnormalized transform, in that there is no +coefficient in front of the summation in the DHT. In other words, +applying the transform twice (the DHT is its own inverse) will multiply +the input by n. + + +File: fftw3.info, Node: Multi-dimensional Transforms, Prev: 1d Discrete Hartley Transforms (DHTs), Up: What FFTW Really Computes + +4.8.6 Multi-dimensional Transforms +---------------------------------- + +The multi-dimensional transforms of FFTW, in general, compute simply the +separable product of the given 1d transform along each dimension of the +array. Since each of these transforms is unnormalized, computing the +forward followed by the backward/inverse multi-dimensional transform +will result in the original array scaled by the product of the +normalization factors for each dimension (e.g. the product of the +dimension sizes, for a multi-dimensional DFT). + + The definition of FFTW's multi-dimensional DFT of real data (r2c) +deserves special attention. In this case, we logically compute the full +multi-dimensional DFT of the input data; since the input data are purely +real, the output data have the Hermitian symmetry and therefore only one +non-redundant half need be stored. More specifically, for an n[0] x +n[1] x n[2] x ... x n[d-1] multi-dimensional real-input DFT, the full +(logical) complex output array Y[k[0], k[1], ..., k[d-1]] has the +symmetry: Y[k[0], k[1], ..., k[d-1]] = Y[n[0] - k[0], n[1] - k[1], ..., +n[d-1] - k[d-1]]* (where each dimension is periodic). Because of this +symmetry, we only store the k[d-1] = 0...n[d-1]/2 elements of the +_last_ dimension (division by 2 is rounded down). (We could instead +have cut any other dimension in half, but the last dimension proved +computationally convenient.) This results in the peculiar array format +described in more detail by *note Real-data DFT Array Format::. + + The multi-dimensional c2r transform is simply the unnormalized +inverse of the r2c transform. i.e. it is the same as FFTW's complex +backward multi-dimensional DFT, operating on a Hermitian input array in +the peculiar format mentioned above and outputting a real array (since +the DFT output is purely real). + + We should remind the user that the separable product of 1d transforms +along each dimension, as computed by FFTW, is not always the same thing +as the usual multi-dimensional transform. A multi-dimensional `R2HC' +(or `HC2R') transform is not identical to the multi-dimensional DFT, +requiring some post-processing to combine the requisite real and +imaginary parts, as was described in *note The Halfcomplex-format +DFT::. Likewise, FFTW's multidimensional `FFTW_DHT' r2r transform is +not the same thing as the logical multi-dimensional discrete Hartley +transform defined in the literature, as discussed in *note The Discrete +Hartley Transform::. + + +File: fftw3.info, Node: Multi-threaded FFTW, Next: Distributed-memory FFTW with MPI, Prev: FFTW Reference, Up: Top + +5 Multi-threaded FFTW +********************* + +In this chapter we document the parallel FFTW routines for +shared-memory parallel hardware. These routines, which support +parallel one- and multi-dimensional transforms of both real and complex +data, are the easiest way to take advantage of multiple processors with +FFTW. They work just like the corresponding uniprocessor transform +routines, except that you have an extra initialization routine to call, +and there is a routine to set the number of threads to employ. Any +program that uses the uniprocessor FFTW can therefore be trivially +modified to use the multi-threaded FFTW. + + A shared-memory machine is one in which all CPUs can directly access +the same main memory, and such machines are now common due to the +ubiquity of multi-core CPUs. FFTW's multi-threading support allows you +to utilize these additional CPUs transparently from a single program. +However, this does not necessarily translate into performance +gains--when multiple threads/CPUs are employed, there is an overhead +required for synchronization that may outweigh the computatational +parallelism. Therefore, you can only benefit from threads if your +problem is sufficiently large. + +* Menu: + +* Installation and Supported Hardware/Software:: +* Usage of Multi-threaded FFTW:: +* How Many Threads to Use?:: +* Thread safety:: + + +File: fftw3.info, Node: Installation and Supported Hardware/Software, Next: Usage of Multi-threaded FFTW, Prev: Multi-threaded FFTW, Up: Multi-threaded FFTW + +5.1 Installation and Supported Hardware/Software +================================================ + +All of the FFTW threads code is located in the `threads' subdirectory +of the FFTW package. On Unix systems, the FFTW threads libraries and +header files can be automatically configured, compiled, and installed +along with the uniprocessor FFTW libraries simply by including +`--enable-threads' in the flags to the `configure' script (*note +Installation on Unix::), or `--enable-openmp' to use OpenMP +(http://www.openmp.org) threads. + + The threads routines require your operating system to have some sort +of shared-memory threads support. Specifically, the FFTW threads +package works with POSIX threads (available on most Unix variants, from +GNU/Linux to MacOS X) and Win32 threads. OpenMP threads, which are +supported in many common compilers (e.g. gcc) are also supported, and +may give better performance on some systems. (OpenMP threads are also +useful if you are employing OpenMP in your own code, in order to +minimize conflicts between threading models.) If you have a +shared-memory machine that uses a different threads API, it should be a +simple matter of programming to include support for it; see the file +`threads/threads.c' for more detail. + + You can compile FFTW with _both_ `--enable-threads' and +`--enable-openmp' at the same time, since they install libraries with +different names (`fftw3_threads' and `fftw3_omp', as described below). +However, your programs may only link to _one_ of these two libraries at +a time. + + Ideally, of course, you should also have multiple processors in +order to get any benefit from the threaded transforms. + + +File: fftw3.info, Node: Usage of Multi-threaded FFTW, Next: How Many Threads to Use?, Prev: Installation and Supported Hardware/Software, Up: Multi-threaded FFTW + +5.2 Usage of Multi-threaded FFTW +================================ + +Here, it is assumed that the reader is already familiar with the usage +of the uniprocessor FFTW routines, described elsewhere in this manual. +We only describe what one has to change in order to use the +multi-threaded routines. + + First, programs using the parallel complex transforms should be +linked with `-lfftw3_threads -lfftw3 -lm' on Unix, or `-lfftw3_omp +-lfftw3 -lm' if you compiled with OpenMP. You will also need to link +with whatever library is responsible for threads on your system (e.g. +`-lpthread' on GNU/Linux) or include whatever compiler flag enables +OpenMP (e.g. `-fopenmp' with gcc). + + Second, before calling _any_ FFTW routines, you should call the +function: + + int fftw_init_threads(void); + + This function, which need only be called once, performs any one-time +initialization required to use threads on your system. It returns zero +if there was some error (which should not happen under normal +circumstances) and a non-zero value otherwise. + + Third, before creating a plan that you want to parallelize, you +should call: + + void fftw_plan_with_nthreads(int nthreads); + + The `nthreads' argument indicates the number of threads you want +FFTW to use (or actually, the maximum number). All plans subsequently +created with any planner routine will use that many threads. You can +call `fftw_plan_with_nthreads', create some plans, call +`fftw_plan_with_nthreads' again with a different argument, and create +some more plans for a new number of threads. Plans already created +before a call to `fftw_plan_with_nthreads' are unaffected. If you pass +an `nthreads' argument of `1' (the default), threads are disabled for +subsequent plans. + + With OpenMP, to configure FFTW to use all of the currently running +OpenMP threads (set by `omp_set_num_threads(nthreads)' or by the +`OMP_NUM_THREADS' environment variable), you can do: +`fftw_plan_with_nthreads(omp_get_max_threads())'. (The `omp_' OpenMP +functions are declared via `#include '.) + + Given a plan, you then execute it as usual with +`fftw_execute(plan)', and the execution will use the number of threads +specified when the plan was created. When done, you destroy it as +usual with `fftw_destroy_plan'. As described in *note Thread safety::, +plan _execution_ is thread-safe, but plan creation and destruction are +_not_: you should create/destroy plans only from a single thread, but +can safely execute multiple plans in parallel. + + There is one additional routine: if you want to get rid of all memory +and other resources allocated internally by FFTW, you can call: + + void fftw_cleanup_threads(void); + + which is much like the `fftw_cleanup()' function except that it also +gets rid of threads-related data. You must _not_ execute any +previously created plans after calling this function. + + We should also mention one other restriction: if you save wisdom +from a program using the multi-threaded FFTW, that wisdom _cannot be +used_ by a program using only the single-threaded FFTW (i.e. not calling +`fftw_init_threads'). *Note Words of Wisdom-Saving Plans::. + + +File: fftw3.info, Node: How Many Threads to Use?, Next: Thread safety, Prev: Usage of Multi-threaded FFTW, Up: Multi-threaded FFTW + +5.3 How Many Threads to Use? +============================ + +There is a fair amount of overhead involved in synchronizing threads, +so the optimal number of threads to use depends upon the size of the +transform as well as on the number of processors you have. + + As a general rule, you don't want to use more threads than you have +processors. (Using more threads will work, but there will be extra +overhead with no benefit.) In fact, if the problem size is too small, +you may want to use fewer threads than you have processors. + + You will have to experiment with your system to see what level of +parallelization is best for your problem size. Typically, the problem +will have to involve at least a few thousand data points before threads +become beneficial. If you plan with `FFTW_PATIENT', it will +automatically disable threads for sizes that don't benefit from +parallelization. + + +File: fftw3.info, Node: Thread safety, Prev: How Many Threads to Use?, Up: Multi-threaded FFTW + +5.4 Thread safety +================= + +Users writing multi-threaded programs (including OpenMP) must concern +themselves with the "thread safety" of the libraries they use--that is, +whether it is safe to call routines in parallel from multiple threads. +FFTW can be used in such an environment, but some care must be taken +because the planner routines share data (e.g. wisdom and trigonometric +tables) between calls and plans. + + The upshot is that the only thread-safe (re-entrant) routine in FFTW +is `fftw_execute' (and the new-array variants thereof). All other +routines (e.g. the planner) should only be called from one thread at a +time. So, for example, you can wrap a semaphore lock around any calls +to the planner; even more simply, you can just create all of your plans +from one thread. We do not think this should be an important +restriction (FFTW is designed for the situation where the only +performance-sensitive code is the actual execution of the transform), +and the benefits of shared data between plans are great. + + Note also that, since the plan is not modified by `fftw_execute', it +is safe to execute the _same plan_ in parallel by multiple threads. +However, since a given plan operates by default on a fixed array, you +need to use one of the new-array execute functions (*note New-array +Execute Functions::) so that different threads compute the transform of +different data. + + (Users should note that these comments only apply to programs using +shared-memory threads or OpenMP. Parallelism using MPI or forked +processes involves a separate address-space and global variables for +each process, and is not susceptible to problems of this sort.) + + If you are configured FFTW with the `--enable-debug' or +`--enable-debug-malloc' flags (*note Installation on Unix::), then +`fftw_execute' is not thread-safe. These flags are not documented +because they are intended only for developing and debugging FFTW, but +if you must use `--enable-debug' then you should also specifically pass +`--disable-debug-malloc' for `fftw_execute' to be thread-safe. + + +File: fftw3.info, Node: Distributed-memory FFTW with MPI, Next: Calling FFTW from Modern Fortran, Prev: Multi-threaded FFTW, Up: Top + +6 Distributed-memory FFTW with MPI +********************************** + +In this chapter we document the parallel FFTW routines for parallel +systems supporting the MPI message-passing interface. Unlike the +shared-memory threads described in the previous chapter, MPI allows you +to use _distributed-memory_ parallelism, where each CPU has its own +separate memory, and which can scale up to clusters of many thousands +of processors. This capability comes at a price, however: each process +only stores a _portion_ of the data to be transformed, which means that +the data structures and programming-interface are quite different from +the serial or threads versions of FFTW. + + Distributed-memory parallelism is especially useful when you are +transforming arrays so large that they do not fit into the memory of a +single processor. The storage per-process required by FFTW's MPI +routines is proportional to the total array size divided by the number +of processes. Conversely, distributed-memory parallelism can easily +pose an unacceptably high communications overhead for small problems; +the threshold problem size for which parallelism becomes advantageous +will depend on the precise problem you are interested in, your +hardware, and your MPI implementation. + + A note on terminology: in MPI, you divide the data among a set of +"processes" which each run in their own memory address space. +Generally, each process runs on a different physical processor, but +this is not required. A set of processes in MPI is described by an +opaque data structure called a "communicator," the most common of which +is the predefined communicator `MPI_COMM_WORLD' which refers to _all_ +processes. For more information on these and other concepts common to +all MPI programs, we refer the reader to the documentation at the MPI +home page (http://www.mcs.anl.gov/research/projects/mpi/). + + We assume in this chapter that the reader is familiar with the usage +of the serial (uniprocessor) FFTW, and focus only on the concepts new +to the MPI interface. + +* Menu: + +* FFTW MPI Installation:: +* Linking and Initializing MPI FFTW:: +* 2d MPI example:: +* MPI Data Distribution:: +* Multi-dimensional MPI DFTs of Real Data:: +* Other Multi-dimensional Real-data MPI Transforms:: +* FFTW MPI Transposes:: +* FFTW MPI Wisdom:: +* Avoiding MPI Deadlocks:: +* FFTW MPI Performance Tips:: +* Combining MPI and Threads:: +* FFTW MPI Reference:: +* FFTW MPI Fortran Interface:: + + +File: fftw3.info, Node: FFTW MPI Installation, Next: Linking and Initializing MPI FFTW, Prev: Distributed-memory FFTW with MPI, Up: Distributed-memory FFTW with MPI + +6.1 FFTW MPI Installation +========================= + +All of the FFTW MPI code is located in the `mpi' subdirectory of the +FFTW package. On Unix systems, the FFTW MPI libraries and header files +are automatically configured, compiled, and installed along with the +uniprocessor FFTW libraries simply by including `--enable-mpi' in the +flags to the `configure' script (*note Installation on Unix::). + + Any implementation of the MPI standard, version 1 or later, should +work with FFTW. The `configure' script will attempt to automatically +detect how to compile and link code using your MPI implementation. In +some cases, especially if you have multiple different MPI +implementations installed or have an unusual MPI software package, you +may need to provide this information explicitly. + + Most commonly, one compiles MPI code by invoking a special compiler +command, typically `mpicc' for C code. The `configure' script knows +the most common names for this command, but you can specify the MPI +compilation command explicitly by setting the `MPICC' variable, as in +`./configure MPICC=mpicc ...'. + + If, instead of a special compiler command, you need to link a certain +library, you can specify the link command via the `MPILIBS' variable, +as in `./configure MPILIBS=-lmpi ...'. Note that if your MPI library +is installed in a non-standard location (one the compiler does not know +about by default), you may also have to specify the location of the +library and header files via `LDFLAGS' and `CPPFLAGS' variables, +respectively, as in `./configure LDFLAGS=-L/path/to/mpi/libs +CPPFLAGS=-I/path/to/mpi/include ...'. + + +File: fftw3.info, Node: Linking and Initializing MPI FFTW, Next: 2d MPI example, Prev: FFTW MPI Installation, Up: Distributed-memory FFTW with MPI + +6.2 Linking and Initializing MPI FFTW +===================================== + +Programs using the MPI FFTW routines should be linked with `-lfftw3_mpi +-lfftw3 -lm' on Unix in double precision, `-lfftw3f_mpi -lfftw3f -lm' +in single precision, and so on (*note Precision::). You will also need +to link with whatever library is responsible for MPI on your system; in +most MPI implementations, there is a special compiler alias named +`mpicc' to compile and link MPI code. + + Before calling any FFTW routines except possibly `fftw_init_threads' +(*note Combining MPI and Threads::), but after calling `MPI_Init', you +should call the function: + + void fftw_mpi_init(void); + + If, at the end of your program, you want to get rid of all memory and +other resources allocated internally by FFTW, for both the serial and +MPI routines, you can call: + + void fftw_mpi_cleanup(void); + + which is much like the `fftw_cleanup()' function except that it also +gets rid of FFTW's MPI-related data. You must _not_ execute any +previously created plans after calling this function. + + +File: fftw3.info, Node: 2d MPI example, Next: MPI Data Distribution, Prev: Linking and Initializing MPI FFTW, Up: Distributed-memory FFTW with MPI + +6.3 2d MPI example +================== + +Before we document the FFTW MPI interface in detail, we begin with a +simple example outlining how one would perform a two-dimensional `N0' +by `N1' complex DFT. + + #include + + int main(int argc, char **argv) + { + const ptrdiff_t N0 = ..., N1 = ...; + fftw_plan plan; + fftw_complex *data; + ptrdiff_t alloc_local, local_n0, local_0_start, i, j; + + MPI_Init(&argc, &argv); + fftw_mpi_init(); + + /* get local data size and allocate */ + alloc_local = fftw_mpi_local_size_2d(N0, N1, MPI_COMM_WORLD, + &local_n0, &local_0_start); + data = fftw_alloc_complex(alloc_local); + + /* create plan for in-place forward DFT */ + plan = fftw_mpi_plan_dft_2d(N0, N1, data, data, MPI_COMM_WORLD, + FFTW_FORWARD, FFTW_ESTIMATE); + + /* initialize data to some function my_function(x,y) */ + for (i = 0; i < local_n0; ++i) for (j = 0; j < N1; ++j) + data[i*N1 + j] = my_function(local_0_start + i, j); + + /* compute transforms, in-place, as many times as desired */ + fftw_execute(plan); + + fftw_destroy_plan(plan); + + MPI_Finalize(); + } + + As can be seen above, the MPI interface follows the same basic style +of allocate/plan/execute/destroy as the serial FFTW routines. All of +the MPI-specific routines are prefixed with `fftw_mpi_' instead of +`fftw_'. There are a few important differences, however: + + First, we must call `fftw_mpi_init()' after calling `MPI_Init' +(required in all MPI programs) and before calling any other `fftw_mpi_' +routine. + + Second, when we create the plan with `fftw_mpi_plan_dft_2d', +analogous to `fftw_plan_dft_2d', we pass an additional argument: the +communicator, indicating which processes will participate in the +transform (here `MPI_COMM_WORLD', indicating all processes). Whenever +you create, execute, or destroy a plan for an MPI transform, you must +call the corresponding FFTW routine on _all_ processes in the +communicator for that transform. (That is, these are _collective_ +calls.) Note that the plan for the MPI transform uses the standard +`fftw_execute' and `fftw_destroy' routines (on the other hand, there +are MPI-specific new-array execute functions documented below). + + Third, all of the FFTW MPI routines take `ptrdiff_t' arguments +instead of `int' as for the serial FFTW. `ptrdiff_t' is a standard C +integer type which is (at least) 32 bits wide on a 32-bit machine and +64 bits wide on a 64-bit machine. This is to make it easy to specify +very large parallel transforms on a 64-bit machine. (You can specify +64-bit transform sizes in the serial FFTW, too, but only by using the +`guru64' planner interface. *Note 64-bit Guru Interface::.) + + Fourth, and most importantly, you don't allocate the entire +two-dimensional array on each process. Instead, you call +`fftw_mpi_local_size_2d' to find out what _portion_ of the array +resides on each processor, and how much space to allocate. Here, the +portion of the array on each process is a `local_n0' by `N1' slice of +the total array, starting at index `local_0_start'. The total number +of `fftw_complex' numbers to allocate is given by the `alloc_local' +return value, which _may_ be greater than `local_n0 * N1' (in case some +intermediate calculations require additional storage). The data +distribution in FFTW's MPI interface is described in more detail by the +next section. + + Given the portion of the array that resides on the local process, it +is straightforward to initialize the data (here to a function +`myfunction') and otherwise manipulate it. Of course, at the end of +the program you may want to output the data somehow, but synchronizing +this output is up to you and is beyond the scope of this manual. (One +good way to output a large multi-dimensional distributed array in MPI +to a portable binary file is to use the free HDF5 library; see the HDF +home page (http://www.hdfgroup.org/).) + + +File: fftw3.info, Node: MPI Data Distribution, Next: Multi-dimensional MPI DFTs of Real Data, Prev: 2d MPI example, Up: Distributed-memory FFTW with MPI + +6.4 MPI Data Distribution +========================= + +The most important concept to understand in using FFTW's MPI interface +is the data distribution. With a serial or multithreaded FFT, all of +the inputs and outputs are stored as a single contiguous chunk of +memory. With a distributed-memory FFT, the inputs and outputs are +broken into disjoint blocks, one per process. + + In particular, FFTW uses a _1d block distribution_ of the data, +distributed along the _first dimension_. For example, if you want to +perform a 100 x 200 complex DFT, distributed over 4 processes, each +process will get a 25 x 200 slice of the data. That is, process 0 +will get rows 0 through 24, process 1 will get rows 25 through 49, +process 2 will get rows 50 through 74, and process 3 will get rows 75 +through 99. If you take the same array but distribute it over 3 +processes, then it is not evenly divisible so the different processes +will have unequal chunks. FFTW's default choice in this case is to +assign 34 rows to processes 0 and 1, and 32 rows to process 2. + + FFTW provides several `fftw_mpi_local_size' routines that you can +call to find out what portion of an array is stored on the current +process. In most cases, you should use the default block sizes picked +by FFTW, but it is also possible to specify your own block size. For +example, with a 100 x 200 array on three processes, you can tell FFTW +to use a block size of 40, which would assign 40 rows to processes 0 +and 1, and 20 rows to process 2. FFTW's default is to divide the data +equally among the processes if possible, and as best it can otherwise. +The rows are always assigned in "rank order," i.e. process 0 gets the +first block of rows, then process 1, and so on. (You can change this +by using `MPI_Comm_split' to create a new communicator with re-ordered +processes.) However, you should always call the `fftw_mpi_local_size' +routines, if possible, rather than trying to predict FFTW's +distribution choices. + + In particular, it is critical that you allocate the storage size that +is returned by `fftw_mpi_local_size', which is _not_ necessarily the +size of the local slice of the array. The reason is that intermediate +steps of FFTW's algorithms involve transposing the array and +redistributing the data, so at these intermediate steps FFTW may +require more local storage space (albeit always proportional to the +total size divided by the number of processes). The +`fftw_mpi_local_size' functions know how much storage is required for +these intermediate steps and tell you the correct amount to allocate. + +* Menu: + +* Basic and advanced distribution interfaces:: +* Load balancing:: +* Transposed distributions:: +* One-dimensional distributions:: + + +File: fftw3.info, Node: Basic and advanced distribution interfaces, Next: Load balancing, Prev: MPI Data Distribution, Up: MPI Data Distribution + +6.4.1 Basic and advanced distribution interfaces +------------------------------------------------ + +As with the planner interface, the `fftw_mpi_local_size' distribution +interface is broken into basic and advanced (`_many') interfaces, where +the latter allows you to specify the block size manually and also to +request block sizes when computing multiple transforms simultaneously. +These functions are documented more exhaustively by the FFTW MPI +Reference, but we summarize the basic ideas here using a couple of +two-dimensional examples. + + For the 100 x 200 complex-DFT example, above, we would find the +distribution by calling the following function in the basic interface: + + ptrdiff_t fftw_mpi_local_size_2d(ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start); + + Given the total size of the data to be transformed (here, `n0 = 100' +and `n1 = 200') and an MPI communicator (`comm'), this function +provides three numbers. + + First, it describes the shape of the local data: the current process +should store a `local_n0' by `n1' slice of the overall dataset, in +row-major order (`n1' dimension contiguous), starting at index +`local_0_start'. That is, if the total dataset is viewed as a `n0' by +`n1' matrix, the current process should store the rows `local_0_start' +to `local_0_start+local_n0-1'. Obviously, if you are running with only +a single MPI process, that process will store the entire array: +`local_0_start' will be zero and `local_n0' will be `n0'. *Note +Row-major Format::. + + Second, the return value is the total number of data elements (e.g., +complex numbers for a complex DFT) that should be allocated for the +input and output arrays on the current process (ideally with +`fftw_malloc' or an `fftw_alloc' function, to ensure optimal +alignment). It might seem that this should always be equal to +`local_n0 * n1', but this is _not_ the case. FFTW's distributed FFT +algorithms require data redistributions at intermediate stages of the +transform, and in some circumstances this may require slightly larger +local storage. This is discussed in more detail below, under *note +Load balancing::. + + The advanced-interface `local_size' function for multidimensional +transforms returns the same three things (`local_n0', `local_0_start', +and the total number of elements to allocate), but takes more inputs: + + ptrdiff_t fftw_mpi_local_size_many(int rnk, const ptrdiff_t *n, + ptrdiff_t howmany, + ptrdiff_t block0, + MPI_Comm comm, + ptrdiff_t *local_n0, + ptrdiff_t *local_0_start); + + The two-dimensional case above corresponds to `rnk = 2' and an array +`n' of length 2 with `n[0] = n0' and `n[1] = n1'. This routine is for +any `rnk > 1'; one-dimensional transforms have their own interface +because they work slightly differently, as discussed below. + + First, the advanced interface allows you to perform multiple +transforms at once, of interleaved data, as specified by the `howmany' +parameter. (`hoamany' is 1 for a single transform.) + + Second, here you can specify your desired block size in the `n0' +dimension, `block0'. To use FFTW's default block size, pass +`FFTW_MPI_DEFAULT_BLOCK' (0) for `block0'. Otherwise, on `P' +processes, FFTW will return `local_n0' equal to `block0' on the first +`P / block0' processes (rounded down), return `local_n0' equal to `n0 - +block0 * (P / block0)' on the next process, and `local_n0' equal to +zero on any remaining processes. In general, we recommend using the +default block size (which corresponds to `n0 / P', rounded up). + + For example, suppose you have `P = 4' processes and `n0 = 21'. The +default will be a block size of `6', which will give `local_n0 = 6' on +the first three processes and `local_n0 = 3' on the last process. +Instead, however, you could specify `block0 = 5' if you wanted, which +would give `local_n0 = 5' on processes 0 to 2, `local_n0 = 6' on +process 3. (This choice, while it may look superficially more +"balanced," has the same critical path as FFTW's default but requires +more communications.) + + +File: fftw3.info, Node: Load balancing, Next: Transposed distributions, Prev: Basic and advanced distribution interfaces, Up: MPI Data Distribution + +6.4.2 Load balancing +-------------------- + +Ideally, when you parallelize a transform over some P processes, each +process should end up with work that takes equal time. Otherwise, all +of the processes end up waiting on whichever process is slowest. This +goal is known as "load balancing." In this section, we describe the +circumstances under which FFTW is able to load-balance well, and in +particular how you should choose your transform size in order to load +balance. + + Load balancing is especially difficult when you are parallelizing +over heterogeneous machines; for example, if one of your processors is a +old 486 and another is a Pentium IV, obviously you should give the +Pentium more work to do than the 486 since the latter is much slower. +FFTW does not deal with this problem, however--it assumes that your +processes run on hardware of comparable speed, and that the goal is +therefore to divide the problem as equally as possible. + + For a multi-dimensional complex DFT, FFTW can divide the problem +equally among the processes if: (i) the _first_ dimension `n0' is +divisible by P; and (ii), the _product_ of the subsequent dimensions is +divisible by P. (For the advanced interface, where you can specify +multiple simultaneous transforms via some "vector" length `howmany', a +factor of `howmany' is included in the product of the subsequent +dimensions.) + + For a one-dimensional complex DFT, the length `N' of the data should +be divisible by P _squared_ to be able to divide the problem equally +among the processes. + + +File: fftw3.info, Node: Transposed distributions, Next: One-dimensional distributions, Prev: Load balancing, Up: MPI Data Distribution + +6.4.3 Transposed distributions +------------------------------ + +Internally, FFTW's MPI transform algorithms work by first computing +transforms of the data local to each process, then by globally +_transposing_ the data in some fashion to redistribute the data among +the processes, transforming the new data local to each process, and +transposing back. For example, a two-dimensional `n0' by `n1' array, +distributed across the `n0' dimension, is transformd by: (i) +transforming the `n1' dimension, which are local to each process; (ii) +transposing to an `n1' by `n0' array, distributed across the `n1' +dimension; (iii) transforming the `n0' dimension, which is now local to +each process; (iv) transposing back. + + However, in many applications it is acceptable to compute a +multidimensional DFT whose results are produced in transposed order +(e.g., `n1' by `n0' in two dimensions). This provides a significant +performance advantage, because it means that the final transposition +step can be omitted. FFTW supports this optimization, which you +specify by passing the flag `FFTW_MPI_TRANSPOSED_OUT' to the planner +routines. To compute the inverse transform of transposed output, you +specify `FFTW_MPI_TRANSPOSED_IN' to tell it that the input is +transposed. In this section, we explain how to interpret the output +format of such a transform. + + Suppose you have are transforming multi-dimensional data with (at +least two) dimensions n[0] x n[1] x n[2] x ... x n[d-1] . As always, +it is distributed along the first dimension n[0] . Now, if we compute +its DFT with the `FFTW_MPI_TRANSPOSED_OUT' flag, the resulting output +data are stored with the first _two_ dimensions transposed: n[1] x n[0] +x n[2] x ... x n[d-1] , distributed along the n[1] dimension. +Conversely, if we take the n[1] x n[0] x n[2] x ... x n[d-1] data and +transform it with the `FFTW_MPI_TRANSPOSED_IN' flag, then the format +goes back to the original n[0] x n[1] x n[2] x ... x n[d-1] array. + + There are two ways to find the portion of the transposed array that +resides on the current process. First, you can simply call the +appropriate `local_size' function, passing n[1] x n[0] x n[2] x ... x +n[d-1] (the transposed dimensions). This would mean calling the +`local_size' function twice, once for the transposed and once for the +non-transposed dimensions. Alternatively, you can call one of the +`local_size_transposed' functions, which returns both the +non-transposed and transposed data distribution from a single call. +For example, for a 3d transform with transposed output (or input), you +might call: + + ptrdiff_t fftw_mpi_local_size_3d_transposed( + ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, + ptrdiff_t *local_n1, ptrdiff_t *local_1_start); + + Here, `local_n0' and `local_0_start' give the size and starting +index of the `n0' dimension for the _non_-transposed data, as in the +previous sections. For _transposed_ data (e.g. the output for +`FFTW_MPI_TRANSPOSED_OUT'), `local_n1' and `local_1_start' give the +size and starting index of the `n1' dimension, which is the first +dimension of the transposed data (`n1' by `n0' by `n2'). + + (Note that `FFTW_MPI_TRANSPOSED_IN' is completely equivalent to +performing `FFTW_MPI_TRANSPOSED_OUT' and passing the first two +dimensions to the planner in reverse order, or vice versa. If you pass +_both_ the `FFTW_MPI_TRANSPOSED_IN' and `FFTW_MPI_TRANSPOSED_OUT' +flags, it is equivalent to swapping the first two dimensions passed to +the planner and passing _neither_ flag.) + + +File: fftw3.info, Node: One-dimensional distributions, Prev: Transposed distributions, Up: MPI Data Distribution + +6.4.4 One-dimensional distributions +----------------------------------- + +For one-dimensional distributed DFTs using FFTW, matters are slightly +more complicated because the data distribution is more closely tied to +how the algorithm works. In particular, you can no longer pass an +arbitrary block size and must accept FFTW's default; also, the block +sizes may be different for input and output. Also, the data +distribution depends on the flags and transform direction, in order for +forward and backward transforms to work correctly. + + ptrdiff_t fftw_mpi_local_size_1d(ptrdiff_t n0, MPI_Comm comm, + int sign, unsigned flags, + ptrdiff_t *local_ni, ptrdiff_t *local_i_start, + ptrdiff_t *local_no, ptrdiff_t *local_o_start); + + This function computes the data distribution for a 1d transform of +size `n0' with the given transform `sign' and `flags'. Both input and +output data use block distributions. The input on the current process +will consist of `local_ni' numbers starting at index `local_i_start'; +e.g. if only a single process is used, then `local_ni' will be `n0' and +`local_i_start' will be `0'. Similarly for the output, with `local_no' +numbers starting at index `local_o_start'. The return value of +`fftw_mpi_local_size_1d' will be the total number of elements to +allocate on the current process (which might be slightly larger than +the local size due to intermediate steps in the algorithm). + + As mentioned above (*note Load balancing::), the data will be divided +equally among the processes if `n0' is divisible by the _square_ of the +number of processes. In this case, `local_ni' will equal `local_no'. +Otherwise, they may be different. + + For some applications, such as convolutions, the order of the output +data is irrelevant. In this case, performance can be improved by +specifying that the output data be stored in an FFTW-defined +"scrambled" format. (In particular, this is the analogue of transposed +output in the multidimensional case: scrambled output saves a +communications step.) If you pass `FFTW_MPI_SCRAMBLED_OUT' in the +flags, then the output is stored in this (undocumented) scrambled +order. Conversely, to perform the inverse transform of data in +scrambled order, pass the `FFTW_MPI_SCRAMBLED_IN' flag. + + In MPI FFTW, only composite sizes `n0' can be parallelized; we have +not yet implemented a parallel algorithm for large prime sizes. + + +File: fftw3.info, Node: Multi-dimensional MPI DFTs of Real Data, Next: Other Multi-dimensional Real-data MPI Transforms, Prev: MPI Data Distribution, Up: Distributed-memory FFTW with MPI + +6.5 Multi-dimensional MPI DFTs of Real Data +=========================================== + +FFTW's MPI interface also supports multi-dimensional DFTs of real data, +similar to the serial r2c and c2r interfaces. (Parallel +one-dimensional real-data DFTs are not currently supported; you must +use a complex transform and set the imaginary parts of the inputs to +zero.) + + The key points to understand for r2c and c2r MPI transforms (compared +to the MPI complex DFTs or the serial r2c/c2r transforms), are: + + * Just as for serial transforms, r2c/c2r DFTs transform n[0] x n[1] + x n[2] x ... x n[d-1] real data to/from n[0] x n[1] x n[2] x ... + x (n[d-1]/2 + 1) complex data: the last dimension of the complex + data is cut in half (rounded down), plus one. As for the serial + transforms, the sizes you pass to the `plan_dft_r2c' and + `plan_dft_c2r' are the n[0] x n[1] x n[2] x ... x n[d-1] + dimensions of the real data. + + * Although the real data is _conceptually_ n[0] x n[1] x n[2] x ... + x n[d-1] , it is _physically_ stored as an n[0] x n[1] x n[2] x + ... x [2 (n[d-1]/2 + 1)] array, where the last dimension has been + _padded_ to make it the same size as the complex output. This is + much like the in-place serial r2c/c2r interface (*note + Multi-Dimensional DFTs of Real Data::), except that in MPI the + padding is required even for out-of-place data. The extra padding + numbers are ignored by FFTW (they are _not_ like zero-padding the + transform to a larger size); they are only used to determine the + data layout. + + * The data distribution in MPI for _both_ the real and complex data + is determined by the shape of the _complex_ data. That is, you + call the appropriate `local size' function for the n[0] x n[1] x + n[2] x ... x (n[d-1]/2 + 1) + + complex data, and then use the _same_ distribution for the real + data except that the last complex dimension is replaced by a + (padded) real dimension of twice the length. + + + For example suppose we are performing an out-of-place r2c transform +of L x M x N real data [padded to L x M x 2(N/2+1) ], resulting in L x +M x N/2+1 complex data. Similar to the example in *note 2d MPI +example::, we might do something like: + + #include + + int main(int argc, char **argv) + { + const ptrdiff_t L = ..., M = ..., N = ...; + fftw_plan plan; + double *rin; + fftw_complex *cout; + ptrdiff_t alloc_local, local_n0, local_0_start, i, j, k; + + MPI_Init(&argc, &argv); + fftw_mpi_init(); + + /* get local data size and allocate */ + alloc_local = fftw_mpi_local_size_3d(L, M, N/2+1, MPI_COMM_WORLD, + &local_n0, &local_0_start); + rin = fftw_alloc_real(2 * alloc_local); + cout = fftw_alloc_complex(alloc_local); + + /* create plan for out-of-place r2c DFT */ + plan = fftw_mpi_plan_dft_r2c_3d(L, M, N, rin, cout, MPI_COMM_WORLD, + FFTW_MEASURE); + + /* initialize rin to some function my_func(x,y,z) */ + for (i = 0; i < local_n0; ++i) + for (j = 0; j < M; ++j) + for (k = 0; k < N; ++k) + rin[(i*M + j) * (2*(N/2+1)) + k] = my_func(local_0_start+i, j, k); + + /* compute transforms as many times as desired */ + fftw_execute(plan); + + fftw_destroy_plan(plan); + + MPI_Finalize(); + } + + Note that we allocated `rin' using `fftw_alloc_real' with an +argument of `2 * alloc_local': since `alloc_local' is the number of +_complex_ values to allocate, the number of _real_ values is twice as +many. The `rin' array is then local_n0 x M x 2(N/2+1) in row-major +order, so its `(i,j,k)' element is at the index `(i*M + j) * +(2*(N/2+1)) + k' (*note Multi-dimensional Array Format::). + + As for the complex transforms, improved performance can be obtained +by specifying that the output is the transpose of the input or vice +versa (*note Transposed distributions::). In our L x M x N r2c +example, including `FFTW_TRANSPOSED_OUT' in the flags means that the +input would be a padded L x M x 2(N/2+1) real array distributed over +the `L' dimension, while the output would be a M x L x N/2+1 complex +array distributed over the `M' dimension. To perform the inverse c2r +transform with the same data distributions, you would use the +`FFTW_TRANSPOSED_IN' flag. + + +File: fftw3.info, Node: Other Multi-dimensional Real-data MPI Transforms, Next: FFTW MPI Transposes, Prev: Multi-dimensional MPI DFTs of Real Data, Up: Distributed-memory FFTW with MPI + +6.6 Other multi-dimensional Real-Data MPI Transforms +==================================================== + +FFTW's MPI interface also supports multi-dimensional `r2r' transforms +of all kinds supported by the serial interface (e.g. discrete cosine +and sine transforms, discrete Hartley transforms, etc.). Only +multi-dimensional `r2r' transforms, not one-dimensional transforms, are +currently parallelized. + + These are used much like the multidimensional complex DFTs discussed +above, except that the data is real rather than complex, and one needs +to pass an r2r transform kind (`fftw_r2r_kind') for each dimension as +in the serial FFTW (*note More DFTs of Real Data::). + + For example, one might perform a two-dimensional L x M that is an +REDFT10 (DCT-II) in the first dimension and an RODFT10 (DST-II) in the +second dimension with code like: + + const ptrdiff_t L = ..., M = ...; + fftw_plan plan; + double *data; + ptrdiff_t alloc_local, local_n0, local_0_start, i, j; + + /* get local data size and allocate */ + alloc_local = fftw_mpi_local_size_2d(L, M, MPI_COMM_WORLD, + &local_n0, &local_0_start); + data = fftw_alloc_real(alloc_local); + + /* create plan for in-place REDFT10 x RODFT10 */ + plan = fftw_mpi_plan_r2r_2d(L, M, data, data, MPI_COMM_WORLD, + FFTW_REDFT10, FFTW_RODFT10, FFTW_MEASURE); + + /* initialize data to some function my_function(x,y) */ + for (i = 0; i < local_n0; ++i) for (j = 0; j < M; ++j) + data[i*M + j] = my_function(local_0_start + i, j); + + /* compute transforms, in-place, as many times as desired */ + fftw_execute(plan); + + fftw_destroy_plan(plan); + + Notice that we use the same `local_size' functions as we did for +complex data, only now we interpret the sizes in terms of real rather +than complex values, and correspondingly use `fftw_alloc_real'. + + +File: fftw3.info, Node: FFTW MPI Transposes, Next: FFTW MPI Wisdom, Prev: Other Multi-dimensional Real-data MPI Transforms, Up: Distributed-memory FFTW with MPI + +6.7 FFTW MPI Transposes +======================= + +The FFTW's MPI Fourier transforms rely on one or more _global +transposition_ step for their communications. For example, the +multidimensional transforms work by transforming along some dimensions, +then transposing to make the first dimension local and transforming +that, then transposing back. Because global transposition of a +block-distributed matrix has many other potential uses besides FFTs, +FFTW's transpose routines can be called directly, as documented in this +section. + +* Menu: + +* Basic distributed-transpose interface:: +* Advanced distributed-transpose interface:: +* An improved replacement for MPI_Alltoall:: + + +File: fftw3.info, Node: Basic distributed-transpose interface, Next: Advanced distributed-transpose interface, Prev: FFTW MPI Transposes, Up: FFTW MPI Transposes + +6.7.1 Basic distributed-transpose interface +------------------------------------------- + +In particular, suppose that we have an `n0' by `n1' array in row-major +order, block-distributed across the `n0' dimension. To transpose this +into an `n1' by `n0' array block-distributed across the `n1' dimension, +we would create a plan by calling the following function: + + fftw_plan fftw_mpi_plan_transpose(ptrdiff_t n0, ptrdiff_t n1, + double *in, double *out, + MPI_Comm comm, unsigned flags); + + The input and output arrays (`in' and `out') can be the same. The +transpose is actually executed by calling `fftw_execute' on the plan, +as usual. + + The `flags' are the usual FFTW planner flags, but support two +additional flags: `FFTW_MPI_TRANSPOSED_OUT' and/or +`FFTW_MPI_TRANSPOSED_IN'. What these flags indicate, for transpose +plans, is that the output and/or input, respectively, are _locally_ +transposed. That is, on each process input data is normally stored as +a `local_n0' by `n1' array in row-major order, but for an +`FFTW_MPI_TRANSPOSED_IN' plan the input data is stored as `n1' by +`local_n0' in row-major order. Similarly, `FFTW_MPI_TRANSPOSED_OUT' +means that the output is `n0' by `local_n1' instead of `local_n1' by +`n0'. + + To determine the local size of the array on each process before and +after the transpose, as well as the amount of storage that must be +allocated, one should call `fftw_mpi_local_size_2d_transposed', just as +for a 2d DFT as described in the previous section: + + ptrdiff_t fftw_mpi_local_size_2d_transposed + (ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, + ptrdiff_t *local_n1, ptrdiff_t *local_1_start); + + Again, the return value is the local storage to allocate, which in +this case is the number of _real_ (`double') values rather than complex +numbers as in the previous examples. + + +File: fftw3.info, Node: Advanced distributed-transpose interface, Next: An improved replacement for MPI_Alltoall, Prev: Basic distributed-transpose interface, Up: FFTW MPI Transposes + +6.7.2 Advanced distributed-transpose interface +---------------------------------------------- + +The above routines are for a transpose of a matrix of numbers (of type +`double'), using FFTW's default block sizes. More generally, one can +perform transposes of _tuples_ of numbers, with user-specified block +sizes for the input and output: + + fftw_plan fftw_mpi_plan_many_transpose + (ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t howmany, + ptrdiff_t block0, ptrdiff_t block1, + double *in, double *out, MPI_Comm comm, unsigned flags); + + In this case, one is transposing an `n0' by `n1' matrix of +`howmany'-tuples (e.g. `howmany = 2' for complex numbers). The input +is distributed along the `n0' dimension with block size `block0', and +the `n1' by `n0' output is distributed along the `n1' dimension with +block size `block1'. If `FFTW_MPI_DEFAULT_BLOCK' (0) is passed for a +block size then FFTW uses its default block size. To get the local +size of the data on each process, you should then call +`fftw_mpi_local_size_many_transposed'. + + +File: fftw3.info, Node: An improved replacement for MPI_Alltoall, Prev: Advanced distributed-transpose interface, Up: FFTW MPI Transposes + +6.7.3 An improved replacement for MPI_Alltoall +---------------------------------------------- + +We close this section by noting that FFTW's MPI transpose routines can +be thought of as a generalization for the `MPI_Alltoall' function +(albeit only for floating-point types), and in some circumstances can +function as an improved replacement. + + `MPI_Alltoall' is defined by the MPI standard as: + + int MPI_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcnt, MPI_Datatype recvtype, + MPI_Comm comm); + + In particular, for `double*' arrays `in' and `out', consider the +call: + + MPI_Alltoall(in, howmany, MPI_DOUBLE, out, howmany MPI_DOUBLE, comm); + + This is completely equivalent to: + + MPI_Comm_size(comm, &P); + plan = fftw_mpi_plan_many_transpose(P, P, howmany, 1, 1, in, out, comm, FFTW_ESTIMATE); + fftw_execute(plan); + fftw_destroy_plan(plan); + + That is, computing a P x P transpose on `P' processes, with a block +size of 1, is just a standard all-to-all communication. + + However, using the FFTW routine instead of `MPI_Alltoall' may have +certain advantages. First of all, FFTW's routine can operate in-place +(`in == out') whereas `MPI_Alltoall' can only operate out-of-place. + + Second, even for out-of-place plans, FFTW's routine may be faster, +especially if you need to perform the all-to-all communication many +times and can afford to use `FFTW_MEASURE' or `FFTW_PATIENT'. It +should certainly be no slower, not including the time to create the +plan, since one of the possible algorithms that FFTW uses for an +out-of-place transpose _is_ simply to call `MPI_Alltoall'. However, +FFTW also considers several other possible algorithms that, depending +on your MPI implementation and your hardware, may be faster. + + +File: fftw3.info, Node: FFTW MPI Wisdom, Next: Avoiding MPI Deadlocks, Prev: FFTW MPI Transposes, Up: Distributed-memory FFTW with MPI + +6.8 FFTW MPI Wisdom +=================== + +FFTW's "wisdom" facility (*note Words of Wisdom-Saving Plans::) can be +used to save MPI plans as well as to save uniprocessor plans. However, +for MPI there are several unavoidable complications. + + First, the MPI standard does not guarantee that every process can +perform file I/O (at least, not using C stdio routines)--in general, we +may only assume that process 0 is capable of I/O.(1) So, if we want to +export the wisdom from a single process to a file, we must first export +the wisdom to a string, then send it to process 0, then write it to a +file. + + Second, in principle we may want to have separate wisdom for every +process, since in general the processes may run on different hardware +even for a single MPI program. However, in practice FFTW's MPI code is +designed for the case of homogeneous hardware (*note Load balancing::), +and in this case it is convenient to use the same wisdom for every +process. Thus, we need a mechanism to synchronize the wisdom. + + To address both of these problems, FFTW provides the following two +functions: + + void fftw_mpi_broadcast_wisdom(MPI_Comm comm); + void fftw_mpi_gather_wisdom(MPI_Comm comm); + + Given a communicator `comm', `fftw_mpi_broadcast_wisdom' will +broadcast the wisdom from process 0 to all other processes. +Conversely, `fftw_mpi_gather_wisdom' will collect wisdom from all +processes onto process 0. (If the plans created for the same problem +by different processes are not the same, `fftw_mpi_gather_wisdom' will +arbitrarily choose one of the plans.) Both of these functions may +result in suboptimal plans for different processes if the processes are +running on non-identical hardware. Both of these functions are +_collective_ calls, which means that they must be executed by all +processes in the communicator. + + So, for example, a typical code snippet to import wisdom from a file +and use it on all processes would be: + + { + int rank; + + fftw_mpi_init(); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rank == 0) fftw_import_wisdom_from_filename("mywisdom"); + fftw_mpi_broadcast_wisdom(MPI_COMM_WORLD); + } + + (Note that we must call `fftw_mpi_init' before importing any wisdom +that might contain MPI plans.) Similarly, a typical code snippet to +export wisdom from all processes to a file is: + + { + int rank; + + fftw_mpi_gather_wisdom(MPI_COMM_WORLD); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rank == 0) fftw_export_wisdom_to_filename("mywisdom"); + } + + ---------- Footnotes ---------- + + (1) In fact, even this assumption is not technically guaranteed by +the standard, although it seems to be universal in actual MPI +implementations and is widely assumed by MPI-using software. +Technically, you need to query the `MPI_IO' attribute of +`MPI_COMM_WORLD' with `MPI_Attr_get'. If this attribute is +`MPI_PROC_NULL', no I/O is possible. If it is `MPI_ANY_SOURCE', any +process can perform I/O. Otherwise, it is the rank of a process that +can perform I/O ... but since it is not guaranteed to yield the _same_ +rank on all processes, you have to do an `MPI_Allreduce' of some kind +if you want all processes to agree about which is going to do I/O. And +even then, the standard only guarantees that this process can perform +output, but not input. See e.g. `Parallel Programming with MPI' by P. +S. Pacheco, section 8.1.3. Needless to say, in our experience +virtually no MPI programmers worry about this. + + +File: fftw3.info, Node: Avoiding MPI Deadlocks, Next: FFTW MPI Performance Tips, Prev: FFTW MPI Wisdom, Up: Distributed-memory FFTW with MPI + +6.9 Avoiding MPI Deadlocks +========================== + +An MPI program can _deadlock_ if one process is waiting for a message +from another process that never gets sent. To avoid deadlocks when +using FFTW's MPI routines, it is important to know which functions are +_collective_: that is, which functions must _always_ be called in the +_same order_ from _every_ process in a given communicator. (For +example, `MPI_Barrier' is the canonical example of a collective +function in the MPI standard.) + + The functions in FFTW that are _always_ collective are: every +function beginning with `fftw_mpi_plan', as well as +`fftw_mpi_broadcast_wisdom' and `fftw_mpi_gather_wisdom'. Also, the +following functions from the ordinary FFTW interface are collective +when they are applied to a plan created by an `fftw_mpi_plan' function: +`fftw_execute', `fftw_destroy_plan', and `fftw_flops'. + + +File: fftw3.info, Node: FFTW MPI Performance Tips, Next: Combining MPI and Threads, Prev: Avoiding MPI Deadlocks, Up: Distributed-memory FFTW with MPI + +6.10 FFTW MPI Performance Tips +============================== + +In this section, we collect a few tips on getting the best performance +out of FFTW's MPI transforms. + + First, because of the 1d block distribution, FFTW's parallelization +is currently limited by the size of the first dimension. +(Multidimensional block distributions may be supported by a future +version.) More generally, you should ideally arrange the dimensions so +that FFTW can divide them equally among the processes. *Note Load +balancing::. + + Second, if it is not too inconvenient, you should consider working +with transposed output for multidimensional plans, as this saves a +considerable amount of communications. *Note Transposed +distributions::. + + Third, the fastest choices are generally either an in-place transform +or an out-of-place transform with the `FFTW_DESTROY_INPUT' flag (which +allows the input array to be used as scratch space). In-place is +especially beneficial if the amount of data per process is large. + + Fourth, if you have multiple arrays to transform at once, rather than +calling FFTW's MPI transforms several times it usually seems to be +faster to interleave the data and use the advanced interface. (This +groups the communications together instead of requiring separate +messages for each transform.) + + +File: fftw3.info, Node: Combining MPI and Threads, Next: FFTW MPI Reference, Prev: FFTW MPI Performance Tips, Up: Distributed-memory FFTW with MPI + +6.11 Combining MPI and Threads +============================== + +In certain cases, it may be advantageous to combine MPI +(distributed-memory) and threads (shared-memory) parallelization. FFTW +supports this, with certain caveats. For example, if you have a +cluster of 4-processor shared-memory nodes, you may want to use threads +within the nodes and MPI between the nodes, instead of MPI for all +parallelization. + + In particular, it is possible to seamlessly combine the MPI FFTW +routines with the multi-threaded FFTW routines (*note Multi-threaded +FFTW::). However, some care must be taken in the initialization code, +which should look something like this: + + int threads_ok; + + int main(int argc, char **argv) + { + int provided; + MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided); + threads_ok = provided >= MPI_THREAD_FUNNELED; + + if (threads_ok) threads_ok = fftw_init_threads(); + fftw_mpi_init(); + + ... + if (threads_ok) fftw_plan_with_nthreads(...); + ... + + MPI_Finalize(); + } + + First, note that instead of calling `MPI_Init', you should call +`MPI_Init_threads', which is the initialization routine defined by the +MPI-2 standard to indicate to MPI that your program will be +multithreaded. We pass `MPI_THREAD_FUNNELED', which indicates that we +will only call MPI routines from the main thread. (FFTW will launch +additional threads internally, but the extra threads will not call MPI +code.) (You may also pass `MPI_THREAD_SERIALIZED' or +`MPI_THREAD_MULTIPLE', which requests additional multithreading support +from the MPI implementation, but this is not required by FFTW.) The +`provided' parameter returns what level of threads support is actually +supported by your MPI implementation; this _must_ be at least +`MPI_THREAD_FUNNELED' if you want to call the FFTW threads routines, so +we define a global variable `threads_ok' to record this. You should +only call `fftw_init_threads' or `fftw_plan_with_nthreads' if +`threads_ok' is true. For more information on thread safety in MPI, +see the MPI and Threads +(http://www.mpi-forum.org/docs/mpi-20-html/node162.htm) section of the +MPI-2 standard. + + Second, we must call `fftw_init_threads' _before_ `fftw_mpi_init'. +This is critical for technical reasons having to do with how FFTW +initializes its list of algorithms. + + Then, if you call `fftw_plan_with_nthreads(N)', _every_ MPI process +will launch (up to) `N' threads to parallelize its transforms. + + For example, in the hypothetical cluster of 4-processor nodes, you +might wish to launch only a single MPI process per node, and then call +`fftw_plan_with_nthreads(4)' on each process to use all processors in +the nodes. + + This may or may not be faster than simply using as many MPI processes +as you have processors, however. On the one hand, using threads within +a node eliminates the need for explicit message passing within the +node. On the other hand, FFTW's transpose routines are not +multi-threaded, and this means that the communications that do take +place will not benefit from parallelization within the node. Moreover, +many MPI implementations already have optimizations to exploit shared +memory when it is available, so adding the multithreaded FFTW on top of +this may be superfluous. + + +File: fftw3.info, Node: FFTW MPI Reference, Next: FFTW MPI Fortran Interface, Prev: Combining MPI and Threads, Up: Distributed-memory FFTW with MPI + +6.12 FFTW MPI Reference +======================= + +This chapter provides a complete reference to all FFTW MPI functions, +datatypes, and constants. See also *note FFTW Reference:: for +information on functions and types in common with the serial interface. + +* Menu: + +* MPI Files and Data Types:: +* MPI Initialization:: +* Using MPI Plans:: +* MPI Data Distribution Functions:: +* MPI Plan Creation:: +* MPI Wisdom Communication:: + + +File: fftw3.info, Node: MPI Files and Data Types, Next: MPI Initialization, Prev: FFTW MPI Reference, Up: FFTW MPI Reference + +6.12.1 MPI Files and Data Types +------------------------------- + +All programs using FFTW's MPI support should include its header file: + + #include + + Note that this header file includes the serial-FFTW `fftw3.h' header +file, and also the `mpi.h' header file for MPI, so you need not include +those files separately. + + You must also link to _both_ the FFTW MPI library and to the serial +FFTW library. On Unix, this means adding `-lfftw3_mpi -lfftw3 -lm' at +the end of the link command. + + Different precisions are handled as in the serial interface: *Note +Precision::. That is, `fftw_' functions become `fftwf_' (in single +precision) etcetera, and the libraries become `-lfftw3f_mpi -lfftw3f +-lm' etcetera on Unix. Long-double precision is supported in MPI, but +quad precision (`fftwq_') is not due to the lack of MPI support for +this type. + + +File: fftw3.info, Node: MPI Initialization, Next: Using MPI Plans, Prev: MPI Files and Data Types, Up: FFTW MPI Reference + +6.12.2 MPI Initialization +------------------------- + +Before calling any other FFTW MPI (`fftw_mpi_') function, and before +importing any wisdom for MPI problems, you must call: + + void fftw_mpi_init(void); + + If FFTW threads support is used, however, `fftw_mpi_init' should be +called _after_ `fftw_init_threads' (*note Combining MPI and Threads::). +Calling `fftw_mpi_init' additional times (before `fftw_mpi_cleanup') +has no effect. + + If you want to deallocate all persistent data and reset FFTW to the +pristine state it was in when you started your program, you can call: + + void fftw_mpi_cleanup(void); + + (This calls `fftw_cleanup', so you need not call the serial cleanup +routine too, although it is safe to do so.) After calling +`fftw_mpi_cleanup', all existing plans become undefined, and you should +not attempt to execute or destroy them. You must call `fftw_mpi_init' +again after `fftw_mpi_cleanup' if you want to resume using the MPI FFTW +routines. + + +File: fftw3.info, Node: Using MPI Plans, Next: MPI Data Distribution Functions, Prev: MPI Initialization, Up: FFTW MPI Reference + +6.12.3 Using MPI Plans +---------------------- + +Once an MPI plan is created, you can execute and destroy it using +`fftw_execute', `fftw_destroy_plan', and the other functions in the +serial interface that operate on generic plans (*note Using Plans::). + + The `fftw_execute' and `fftw_destroy_plan' functions, applied to MPI +plans, are _collective_ calls: they must be called for all processes in +the communicator that was used to create the plan. + + You must _not_ use the serial new-array plan-execution functions +`fftw_execute_dft' and so on (*note New-array Execute Functions::) with +MPI plans. Such functions are specialized to the problem type, and +there are specific new-array execute functions for MPI plans: + + void fftw_mpi_execute_dft(fftw_plan p, fftw_complex *in, fftw_complex *out); + void fftw_mpi_execute_dft_r2c(fftw_plan p, double *in, fftw_complex *out); + void fftw_mpi_execute_dft_c2r(fftw_plan p, fftw_complex *in, double *out); + void fftw_mpi_execute_r2r(fftw_plan p, double *in, double *out); + + These functions have the same restrictions as those of the serial +new-array execute functions. They are _always_ safe to apply to the +_same_ `in' and `out' arrays that were used to create the plan. They +can only be applied to new arrarys if those arrays have the same types, +dimensions, in-placeness, and alignment as the original arrays, where +the best way to ensure the same alignment is to use FFTW's +`fftw_malloc' and related allocation functions for all arrays (*note +Memory Allocation::). Note that distributed transposes (*note FFTW MPI +Transposes::) use `fftw_mpi_execute_r2r', since they count as rank-zero +r2r plans from FFTW's perspective. + + +File: fftw3.info, Node: MPI Data Distribution Functions, Next: MPI Plan Creation, Prev: Using MPI Plans, Up: FFTW MPI Reference + +6.12.4 MPI Data Distribution Functions +-------------------------------------- + +As described above (*note MPI Data Distribution::), in order to +allocate your arrays, _before_ creating a plan, you must first call one +of the following routines to determine the required allocation size and +the portion of the array locally stored on a given process. The +`MPI_Comm' communicator passed here must be equivalent to the +communicator used below for plan creation. + + The basic interface for multidimensional transforms consists of the +functions: + + ptrdiff_t fftw_mpi_local_size_2d(ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start); + ptrdiff_t fftw_mpi_local_size_3d(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, + MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start); + ptrdiff_t fftw_mpi_local_size(int rnk, const ptrdiff_t *n, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start); + + ptrdiff_t fftw_mpi_local_size_2d_transposed(ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, + ptrdiff_t *local_n1, ptrdiff_t *local_1_start); + ptrdiff_t fftw_mpi_local_size_3d_transposed(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, + MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, + ptrdiff_t *local_n1, ptrdiff_t *local_1_start); + ptrdiff_t fftw_mpi_local_size_transposed(int rnk, const ptrdiff_t *n, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, + ptrdiff_t *local_n1, ptrdiff_t *local_1_start); + + These functions return the number of elements to allocate (complex +numbers for DFT/r2c/c2r plans, real numbers for r2r plans), whereas the +`local_n0' and `local_0_start' return the portion (`local_0_start' to +`local_0_start + local_n0 - 1') of the first dimension of an n[0] x +n[1] x n[2] x ... x n[d-1] array that is stored on the local process. +*Note Basic and advanced distribution interfaces::. For +`FFTW_MPI_TRANSPOSED_OUT' plans, the `_transposed' variants are useful +in order to also return the local portion of the first dimension in the +n[1] x n[0] x n[2] x ... x n[d-1] transposed output. *Note Transposed +distributions::. The advanced interface for multidimensional +transforms is: + + ptrdiff_t fftw_mpi_local_size_many(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t block0, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start); + ptrdiff_t fftw_mpi_local_size_many_transposed(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t block0, ptrdiff_t block1, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, + ptrdiff_t *local_n1, ptrdiff_t *local_1_start); + + These differ from the basic interface in only two ways. First, they +allow you to specify block sizes `block0' and `block1' (the latter for +the transposed output); you can pass `FFTW_MPI_DEFAULT_BLOCK' to use +FFTW's default block size as in the basic interface. Second, you can +pass a `howmany' parameter, corresponding to the advanced planning +interface below: this is for transforms of contiguous `howmany'-tuples +of numbers (`howmany = 1' in the basic interface). + + The corresponding basic and advanced routines for one-dimensional +transforms (currently only complex DFTs) are: + + ptrdiff_t fftw_mpi_local_size_1d( + ptrdiff_t n0, MPI_Comm comm, int sign, unsigned flags, + ptrdiff_t *local_ni, ptrdiff_t *local_i_start, + ptrdiff_t *local_no, ptrdiff_t *local_o_start); + ptrdiff_t fftw_mpi_local_size_many_1d( + ptrdiff_t n0, ptrdiff_t howmany, + MPI_Comm comm, int sign, unsigned flags, + ptrdiff_t *local_ni, ptrdiff_t *local_i_start, + ptrdiff_t *local_no, ptrdiff_t *local_o_start); + + As above, the return value is the number of elements to allocate +(complex numbers, for complex DFTs). The `local_ni' and +`local_i_start' arguments return the portion (`local_i_start' to +`local_i_start + local_ni - 1') of the 1d array that is stored on this +process for the transform _input_, and `local_no' and `local_o_start' +are the corresponding quantities for the input. The `sign' +(`FFTW_FORWARD' or `FFTW_BACKWARD') and `flags' must match the +arguments passed when creating a plan. Although the inputs and outputs +have different data distributions in general, it is guaranteed that the +_output_ data distribution of an `FFTW_FORWARD' plan will match the +_input_ data distribution of an `FFTW_BACKWARD' plan and vice versa; +similarly for the `FFTW_MPI_SCRAMBLED_OUT' and `FFTW_MPI_SCRAMBLED_IN' +flags. *Note One-dimensional distributions::. + + +File: fftw3.info, Node: MPI Plan Creation, Next: MPI Wisdom Communication, Prev: MPI Data Distribution Functions, Up: FFTW MPI Reference + +6.12.5 MPI Plan Creation +------------------------ + +Complex-data MPI DFTs +..................... + +Plans for complex-data DFTs (*note 2d MPI example::) are created by: + + fftw_plan fftw_mpi_plan_dft_1d(ptrdiff_t n0, fftw_complex *in, fftw_complex *out, + MPI_Comm comm, int sign, unsigned flags); + fftw_plan fftw_mpi_plan_dft_2d(ptrdiff_t n0, ptrdiff_t n1, + fftw_complex *in, fftw_complex *out, + MPI_Comm comm, int sign, unsigned flags); + fftw_plan fftw_mpi_plan_dft_3d(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, + fftw_complex *in, fftw_complex *out, + MPI_Comm comm, int sign, unsigned flags); + fftw_plan fftw_mpi_plan_dft(int rnk, const ptrdiff_t *n, + fftw_complex *in, fftw_complex *out, + MPI_Comm comm, int sign, unsigned flags); + fftw_plan fftw_mpi_plan_many_dft(int rnk, const ptrdiff_t *n, + ptrdiff_t howmany, ptrdiff_t block, ptrdiff_t tblock, + fftw_complex *in, fftw_complex *out, + MPI_Comm comm, int sign, unsigned flags); + + These are similar to their serial counterparts (*note Complex DFTs::) +in specifying the dimensions, sign, and flags of the transform. The +`comm' argument gives an MPI communicator that specifies the set of +processes to participate in the transform; plan creation is a +collective function that must be called for all processes in the +communicator. The `in' and `out' pointers refer only to a portion of +the overall transform data (*note MPI Data Distribution::) as specified +by the `local_size' functions in the previous section. Unless `flags' +contains `FFTW_ESTIMATE', these arrays are overwritten during plan +creation as for the serial interface. For multi-dimensional +transforms, any dimensions `> 1' are supported; for one-dimensional +transforms, only composite (non-prime) `n0' are currently supported +(unlike the serial FFTW). Requesting an unsupported transform size +will yield a `NULL' plan. (As in the serial interface, highly +composite sizes generally yield the best performance.) + + The advanced-interface `fftw_mpi_plan_many_dft' additionally allows +you to specify the block sizes for the first dimension (`block') of the +n[0] x n[1] x n[2] x ... x n[d-1] input data and the first dimension +(`tblock') of the n[1] x n[0] x n[2] x ... x n[d-1] transposed data +(at intermediate steps of the transform, and for the output if +`FFTW_TRANSPOSED_OUT' is specified in `flags'). These must be the same +block sizes as were passed to the corresponding `local_size' function; +you can pass `FFTW_MPI_DEFAULT_BLOCK' to use FFTW's default block size +as in the basic interface. Also, the `howmany' parameter specifies +that the transform is of contiguous `howmany'-tuples rather than +individual complex numbers; this corresponds to the same parameter in +the serial advanced interface (*note Advanced Complex DFTs::) with +`stride = howmany' and `dist = 1'. + +MPI flags +......... + +The `flags' can be any of those for the serial FFTW (*note Planner +Flags::), and in addition may include one or more of the following +MPI-specific flags, which improve performance at the cost of changing +the output or input data formats. + + * `FFTW_MPI_SCRAMBLED_OUT', `FFTW_MPI_SCRAMBLED_IN': valid for 1d + transforms only, these flags indicate that the output/input of the + transform are in an undocumented "scrambled" order. A forward + `FFTW_MPI_SCRAMBLED_OUT' transform can be inverted by a backward + `FFTW_MPI_SCRAMBLED_IN' (times the usual 1/N normalization). + *Note One-dimensional distributions::. + + * `FFTW_MPI_TRANSPOSED_OUT', `FFTW_MPI_TRANSPOSED_IN': valid for + multidimensional (`rnk > 1') transforms only, these flags specify + that the output or input of an n[0] x n[1] x n[2] x ... x n[d-1] + transform is transposed to n[1] x n[0] x n[2] x ... x n[d-1] . + *Note Transposed distributions::. + + +Real-data MPI DFTs +.................. + +Plans for real-input/output (r2c/c2r) DFTs (*note Multi-dimensional MPI +DFTs of Real Data::) are created by: + + fftw_plan fftw_mpi_plan_dft_r2c_2d(ptrdiff_t n0, ptrdiff_t n1, + double *in, fftw_complex *out, + MPI_Comm comm, unsigned flags); + fftw_plan fftw_mpi_plan_dft_r2c_2d(ptrdiff_t n0, ptrdiff_t n1, + double *in, fftw_complex *out, + MPI_Comm comm, unsigned flags); + fftw_plan fftw_mpi_plan_dft_r2c_3d(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, + double *in, fftw_complex *out, + MPI_Comm comm, unsigned flags); + fftw_plan fftw_mpi_plan_dft_r2c(int rnk, const ptrdiff_t *n, + double *in, fftw_complex *out, + MPI_Comm comm, unsigned flags); + fftw_plan fftw_mpi_plan_dft_c2r_2d(ptrdiff_t n0, ptrdiff_t n1, + fftw_complex *in, double *out, + MPI_Comm comm, unsigned flags); + fftw_plan fftw_mpi_plan_dft_c2r_2d(ptrdiff_t n0, ptrdiff_t n1, + fftw_complex *in, double *out, + MPI_Comm comm, unsigned flags); + fftw_plan fftw_mpi_plan_dft_c2r_3d(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, + fftw_complex *in, double *out, + MPI_Comm comm, unsigned flags); + fftw_plan fftw_mpi_plan_dft_c2r(int rnk, const ptrdiff_t *n, + fftw_complex *in, double *out, + MPI_Comm comm, unsigned flags); + + Similar to the serial interface (*note Real-data DFTs::), these +transform logically n[0] x n[1] x n[2] x ... x n[d-1] real data +to/from n[0] x n[1] x n[2] x ... x (n[d-1]/2 + 1) complex data, +representing the non-redundant half of the conjugate-symmetry output of +a real-input DFT (*note Multi-dimensional Transforms::). However, the +real array must be stored within a padded n[0] x n[1] x n[2] x ... x [2 +(n[d-1]/2 + 1)] + + array (much like the in-place serial r2c transforms, but here for +out-of-place transforms as well). Currently, only multi-dimensional +(`rnk > 1') r2c/c2r transforms are supported (requesting a plan for +`rnk = 1' will yield `NULL'). As explained above (*note +Multi-dimensional MPI DFTs of Real Data::), the data distribution of +both the real and complex arrays is given by the `local_size' function +called for the dimensions of the _complex_ array. Similar to the other +planning functions, the input and output arrays are overwritten when +the plan is created except in `FFTW_ESTIMATE' mode. + + As for the complex DFTs above, there is an advance interface that +allows you to manually specify block sizes and to transform contiguous +`howmany'-tuples of real/complex numbers: + + fftw_plan fftw_mpi_plan_many_dft_r2c + (int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t iblock, ptrdiff_t oblock, + double *in, fftw_complex *out, + MPI_Comm comm, unsigned flags); + fftw_plan fftw_mpi_plan_many_dft_c2r + (int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t iblock, ptrdiff_t oblock, + fftw_complex *in, double *out, + MPI_Comm comm, unsigned flags); + +MPI r2r transforms +.................. + +There are corresponding plan-creation routines for r2r transforms +(*note More DFTs of Real Data::), currently supporting multidimensional +(`rnk > 1') transforms only (`rnk = 1' will yield a `NULL' plan): + + fftw_plan fftw_mpi_plan_r2r_2d(ptrdiff_t n0, ptrdiff_t n1, + double *in, double *out, + MPI_Comm comm, + fftw_r2r_kind kind0, fftw_r2r_kind kind1, + unsigned flags); + fftw_plan fftw_mpi_plan_r2r_3d(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, + double *in, double *out, + MPI_Comm comm, + fftw_r2r_kind kind0, fftw_r2r_kind kind1, fftw_r2r_kind kind2, + unsigned flags); + fftw_plan fftw_mpi_plan_r2r(int rnk, const ptrdiff_t *n, + double *in, double *out, + MPI_Comm comm, const fftw_r2r_kind *kind, + unsigned flags); + fftw_plan fftw_mpi_plan_many_r2r(int rnk, const ptrdiff_t *n, + ptrdiff_t iblock, ptrdiff_t oblock, + double *in, double *out, + MPI_Comm comm, const fftw_r2r_kind *kind, + unsigned flags); + + The parameters are much the same as for the complex DFTs above, +except that the arrays are of real numbers (and hence the outputs of the +`local_size' data-distribution functions should be interpreted as +counts of real rather than complex numbers). Also, the `kind' +parameters specify the r2r kinds along each dimension as for the serial +interface (*note Real-to-Real Transform Kinds::). *Note Other +Multi-dimensional Real-data MPI Transforms::. + +MPI transposition +................. + +FFTW also provides routines to plan a transpose of a distributed `n0' +by `n1' array of real numbers, or an array of `howmany'-tuples of real +numbers with specified block sizes (*note FFTW MPI Transposes::): + + fftw_plan fftw_mpi_plan_transpose(ptrdiff_t n0, ptrdiff_t n1, + double *in, double *out, + MPI_Comm comm, unsigned flags); + fftw_plan fftw_mpi_plan_many_transpose + (ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t howmany, + ptrdiff_t block0, ptrdiff_t block1, + double *in, double *out, MPI_Comm comm, unsigned flags); + + These plans are used with the `fftw_mpi_execute_r2r' new-array +execute function (*note Using MPI Plans::), since they count as (rank +zero) r2r plans from FFTW's perspective. + + +File: fftw3.info, Node: MPI Wisdom Communication, Prev: MPI Plan Creation, Up: FFTW MPI Reference + +6.12.6 MPI Wisdom Communication +------------------------------- + +To facilitate synchronizing wisdom among the different MPI processes, +we provide two functions: + + void fftw_mpi_gather_wisdom(MPI_Comm comm); + void fftw_mpi_broadcast_wisdom(MPI_Comm comm); + + The `fftw_mpi_gather_wisdom' function gathers all wisdom in the +given communicator `comm' to the process of rank 0 in the communicator: +that process obtains the union of all wisdom on all the processes. As +a side effect, some other processes will gain additional wisdom from +other processes, but only process 0 will gain the complete union. + + The `fftw_mpi_broadcast_wisdom' does the reverse: it exports wisdom +from process 0 in `comm' to all other processes in the communicator, +replacing any wisdom they currently have. + + *Note FFTW MPI Wisdom::. + + +File: fftw3.info, Node: FFTW MPI Fortran Interface, Prev: FFTW MPI Reference, Up: Distributed-memory FFTW with MPI + +6.13 FFTW MPI Fortran Interface +=============================== + +The FFTW MPI interface is callable from modern Fortran compilers +supporting the Fortran 2003 `iso_c_binding' standard for calling C +functions. As described in *note Calling FFTW from Modern Fortran::, +this means that you can directly call FFTW's C interface from Fortran +with only minor changes in syntax. There are, however, a few things +specific to the MPI interface to keep in mind: + + * Instead of including `fftw3.f03' as in *note Overview of Fortran + interface::, you should `include 'fftw3-mpi.f03'' (after `use, + intrinsic :: iso_c_binding' as before). The `fftw3-mpi.f03' file + includes `fftw3.f03', so you should _not_ `include' them both + yourself. (You will also want to include the MPI header file, + usually via `include 'mpif.h'' or similar, although though this is + not needed by `fftw3-mpi.f03' per se.) (To use the `fftwl_' `long + double' extended-precision routines in supporting compilers, you + should include `fftw3f-mpi.f03' in _addition_ to `fftw3-mpi.f03'. + *Note Extended and quadruple precision in Fortran::.) + + * Because of the different storage conventions between C and Fortran, + you reverse the order of your array dimensions when passing them to + FFTW (*note Reversing array dimensions::). This is merely a + difference in notation and incurs no performance overhead. + However, it means that, whereas in C the _first_ dimension is + distributed, in Fortran the _last_ dimension of your array is + distributed. + + * In Fortran, communicators are stored as `integer' types; there is + no `MPI_Comm' type, nor is there any way to access a C `MPI_Comm'. + Fortunately, this is taken care of for you by the FFTW Fortran + interface: whenever the C interface expects an `MPI_Comm' type, + you should pass the Fortran communicator as an `integer'.(1) + + * Because you need to call the `local_size' function to find out how + much space to allocate, and this may be _larger_ than the local + portion of the array (*note MPI Data Distribution::), you should + _always_ allocate your arrays dynamically using FFTW's allocation + routines as described in *note Allocating aligned memory in + Fortran::. (Coincidentally, this also provides the best + performance by guaranteeding proper data alignment.) + + * Because all sizes in the MPI FFTW interface are declared as + `ptrdiff_t' in C, you should use `integer(C_INTPTR_T)' in Fortran + (*note FFTW Fortran type reference::). + + * In Fortran, because of the language semantics, we generally + recommend using the new-array execute functions for all plans, + even in the common case where you are executing the plan on the + same arrays for which the plan was created (*note Plan execution + in Fortran::). However, note that in the MPI interface these + functions are changed: `fftw_execute_dft' becomes + `fftw_mpi_execute_dft', etcetera. *Note Using MPI Plans::. + + + For example, here is a Fortran code snippet to perform a distributed +L x M complex DFT in-place. (This assumes you have already +initialized MPI with `MPI_init' and have also performed `call +fftw_mpi_init'.) + + use, intrinsic :: iso_c_binding + include 'fftw3-mpi.f03' + integer(C_INTPTR_T), parameter :: L = ... + integer(C_INTPTR_T), parameter :: M = ... + type(C_PTR) :: plan, cdata + complex(C_DOUBLE_COMPLEX), pointer :: data(:,:) + integer(C_INTPTR_T) :: i, j, alloc_local, local_M, local_j_offset + + ! get local data size and allocate (note dimension reversal) + alloc_local = fftw_mpi_local_size_2d(M, L, MPI_COMM_WORLD, & + local_M, local_j_offset) + cdata = fftw_alloc_complex(alloc_local) + call c_f_pointer(cdata, data, [L,local_M]) + + ! create MPI plan for in-place forward DFT (note dimension reversal) + plan = fftw_mpi_plan_dft_2d(M, L, data, data, MPI_COMM_WORLD, & + FFTW_FORWARD, FFTW_MEASURE) + + ! initialize data to some function my_function(i,j) + do j = 1, local_M + do i = 1, L + data(i, j) = my_function(i, j + local_j_offset) + end do + end do + + ! compute transform (as many times as desired) + call fftw_mpi_execute_dft(plan, data, data) + + call fftw_destroy_plan(plan) + call fftw_free(cdata) + + Note that when we called `fftw_mpi_local_size_2d' and +`fftw_mpi_plan_dft_2d' with the dimensions in reversed order, since a L +x M Fortran array is viewed by FFTW in C as a M x L array. This +means that the array was distributed over the `M' dimension, the local +portion of which is a L x local_M array in Fortran. (You must _not_ +use an `allocate' statement to allocate an L x local_M array, however; +you must allocate `alloc_local' complex numbers, which may be greater +than `L * local_M', in order to reserve space for intermediate steps of +the transform.) Finally, we mention that because C's array indices are +zero-based, the `local_j_offset' argument can conveniently be +interpreted as an offset in the 1-based `j' index (rather than as a +starting index as in C). + + If instead you had used the `ior(FFTW_MEASURE, +FFTW_MPI_TRANSPOSED_OUT)' flag, the output of the transform would be a +transposed M x local_L array, associated with the _same_ `cdata' +allocation (since the transform is in-place), and which you could +declare with: + + complex(C_DOUBLE_COMPLEX), pointer :: tdata(:,:) + ... + call c_f_pointer(cdata, tdata, [M,local_L]) + + where `local_L' would have been obtained by changing the +`fftw_mpi_local_size_2d' call to: + + alloc_local = fftw_mpi_local_size_2d_transposed(M, L, MPI_COMM_WORLD, & + local_M, local_j_offset, local_L, local_i_offset) + + ---------- Footnotes ---------- + + (1) Technically, this is because you aren't actually calling the C +functions directly. You are calling wrapper functions that translate +the communicator with `MPI_Comm_f2c' before calling the ordinary C +interface. This is all done transparently, however, since the +`fftw3-mpi.f03' interface file renames the wrappers so that they are +called in Fortran with the same names as the C interface functions. + + +File: fftw3.info, Node: Calling FFTW from Modern Fortran, Next: Calling FFTW from Legacy Fortran, Prev: Distributed-memory FFTW with MPI, Up: Top + +7 Calling FFTW from Modern Fortran +********************************** + +Fortran 2003 standardized ways for Fortran code to call C libraries, +and this allows us to support a direct translation of the FFTW C API +into Fortran. Compared to the legacy Fortran 77 interface (*note +Calling FFTW from Legacy Fortran::), this direct interface offers many +advantages, especially compile-time type-checking and aligned memory +allocation. As of this writing, support for these C interoperability +features seems widespread, having been implemented in nearly all major +Fortran compilers (e.g. GNU, Intel, IBM, Oracle/Solaris, Portland +Group, NAG). + + This chapter documents that interface. For the most part, since this +interface allows Fortran to call the C interface directly, the usage is +identical to C translated to Fortran syntax. However, there are a few +subtle points such as memory allocation, wisdom, and data types that +deserve closer attention. + +* Menu: + +* Overview of Fortran interface:: +* Reversing array dimensions:: +* FFTW Fortran type reference:: +* Plan execution in Fortran:: +* Allocating aligned memory in Fortran:: +* Accessing the wisdom API from Fortran:: +* Defining an FFTW module:: + + +File: fftw3.info, Node: Overview of Fortran interface, Next: Reversing array dimensions, Prev: Calling FFTW from Modern Fortran, Up: Calling FFTW from Modern Fortran + +7.1 Overview of Fortran interface +================================= + +FFTW provides a file `fftw3.f03' that defines Fortran 2003 interfaces +for all of its C routines, except for the MPI routines described +elsewhere, which can be found in the same directory as `fftw3.h' (the C +header file). In any Fortran subroutine where you want to use FFTW +functions, you should begin with: + + use, intrinsic :: iso_c_binding + include 'fftw3.f03' + + This includes the interface definitions and the standard +`iso_c_binding' module (which defines the equivalents of C types). You +can also put the FFTW functions into a module if you prefer (*note +Defining an FFTW module::). + + At this point, you can now call anything in the FFTW C interface +directly, almost exactly as in C other than minor changes in syntax. +For example: + + type(C_PTR) :: plan + complex(C_DOUBLE_COMPLEX), dimension(1024,1000) :: in, out + plan = fftw_plan_dft_2d(1000,1024, in,out, FFTW_FORWARD,FFTW_ESTIMATE) + ... + call fftw_execute_dft(plan, in, out) + ... + call fftw_destroy_plan(plan) + + A few important things to keep in mind are: + + * FFTW plans are `type(C_PTR)'. Other C types are mapped in the + obvious way via the `iso_c_binding' standard: `int' turns into + `integer(C_INT)', `fftw_complex' turns into + `complex(C_DOUBLE_COMPLEX)', `double' turns into `real(C_DOUBLE)', + and so on. *Note FFTW Fortran type reference::. + + * Functions in C become functions in Fortran if they have a return + value, and subroutines in Fortran otherwise. + + * The ordering of the Fortran array dimensions must be _reversed_ + when they are passed to the FFTW plan creation, thanks to + differences in array indexing conventions (*note Multi-dimensional + Array Format::). This is _unlike_ the legacy Fortran interface + (*note Fortran-interface routines::), which reversed the dimensions + for you. *Note Reversing array dimensions::. + + * Using ordinary Fortran array declarations like this works, but may + yield suboptimal performance because the data may not be not + aligned to exploit SIMD instructions on modern proessors (*note + SIMD alignment and fftw_malloc::). Better performance will often + be obtained by allocating with `fftw_alloc'. *Note Allocating + aligned memory in Fortran::. + + * Similar to the legacy Fortran interface (*note FFTW Execution in + Fortran::), we currently recommend _not_ using `fftw_execute' but + rather using the more specialized functions like + `fftw_execute_dft' (*note New-array Execute Functions::). + However, you should execute the plan on the `same arrays' as the + ones for which you created the plan, unless you are especially + careful. *Note Plan execution in Fortran::. To prevent you from + using `fftw_execute' by mistake, the `fftw3.f03' file does not + provide an `fftw_execute' interface declaration. + + * Multiple planner flags are combined with `ior' (equivalent to `|' + in C). e.g. `FFTW_MEASURE | FFTW_DESTROY_INPUT' becomes + `ior(FFTW_MEASURE, FFTW_DESTROY_INPUT)'. (You can also use `+' as + long as you don't try to include a given flag more than once.) + + +* Menu: + +* Extended and quadruple precision in Fortran:: + + +File: fftw3.info, Node: Extended and quadruple precision in Fortran, Prev: Overview of Fortran interface, Up: Overview of Fortran interface + +7.1.1 Extended and quadruple precision in Fortran +------------------------------------------------- + +If FFTW is compiled in `long double' (extended) precision (*note +Installation and Customization::), you may be able to call the +resulting `fftwl_' routines (*note Precision::) from Fortran if your +compiler supports the `C_LONG_DOUBLE_COMPLEX' type code. + + Because some Fortran compilers do not support +`C_LONG_DOUBLE_COMPLEX', the `fftwl_' declarations are segregated into +a separate interface file `fftw3l.f03', which you should include _in +addition_ to `fftw3.f03' (which declares precision-independent `FFTW_' +constants): + + use, intrinsic :: iso_c_binding + include 'fftw3.f03' + include 'fftw3l.f03' + + We also support using the nonstandard `__float128' +quadruple-precision type provided by recent versions of `gcc' on 32- +and 64-bit x86 hardware (*note Installation and Customization::), using +the corresponding `real(16)' and `complex(16)' types supported by +`gfortran'. The quadruple-precision `fftwq_' functions (*note +Precision::) are declared in a `fftw3q.f03' interface file, which +should be included in addition to `fftw3l.f03', as above. You should +also link with `-lfftw3q -lquadmath -lm' as in C. + + +File: fftw3.info, Node: Reversing array dimensions, Next: FFTW Fortran type reference, Prev: Overview of Fortran interface, Up: Calling FFTW from Modern Fortran + +7.2 Reversing array dimensions +============================== + +A minor annoyance in calling FFTW from Fortran is that FFTW's array +dimensions are defined in the C convention (row-major order), while +Fortran's array dimensions are the opposite convention (column-major +order). *Note Multi-dimensional Array Format::. This is just a +bookkeeping difference, with no effect on performance. The only +consequence of this is that, whenever you create an FFTW plan for a +multi-dimensional transform, you must always _reverse the ordering of +the dimensions_. + + For example, consider the three-dimensional (L x M x N ) arrays: + + complex(C_DOUBLE_COMPLEX), dimension(L,M,N) :: in, out + + To plan a DFT for these arrays using `fftw_plan_dft_3d', you could +do: + + plan = fftw_plan_dft_3d(N,M,L, in,out, FFTW_FORWARD,FFTW_ESTIMATE) + + That is, from FFTW's perspective this is a N x M x L array. _No +data transposition need occur_, as this is _only notation_. Similarly, +to use the more generic routine `fftw_plan_dft' with the same arrays, +you could do: + + integer(C_INT), dimension(3) :: n = [N,M,L] + plan = fftw_plan_dft_3d(3, n, in,out, FFTW_FORWARD,FFTW_ESTIMATE) + + Note, by the way, that this is different from the legacy Fortran +interface (*note Fortran-interface routines::), which automatically +reverses the order of the array dimension for you. Here, you are +calling the C interface directly, so there is no "translation" layer. + + An important thing to keep in mind is the implication of this for +multidimensional real-to-complex transforms (*note Multi-Dimensional +DFTs of Real Data::). In C, a multidimensional real-to-complex DFT +chops the last dimension roughly in half (N x M x L real input goes to +N x M x L/2+1 complex output). In Fortran, because the array +dimension notation is reversed, the _first_ dimension of the complex +data is chopped roughly in half. For example consider the `r2c' +transform of L x M x N real input in Fortran: + + type(C_PTR) :: plan + real(C_DOUBLE), dimension(L,M,N) :: in + complex(C_DOUBLE_COMPLEX), dimension(L/2+1,M,N) :: out + plan = fftw_plan_dft_r2c_3d(N,M,L, in,out, FFTW_ESTIMATE) + ... + call fftw_execute_dft_r2c(plan, in, out) + + Alternatively, for an in-place r2c transform, as described in the C +documentation we must _pad_ the _first_ dimension of the real input +with an extra two entries (which are ignored by FFTW) so as to leave +enough space for the complex output. The input is _allocated_ as a +2[L/2+1] x M x N array, even though only L x M x N of it is actually +used. In this example, we will allocate the array as a pointer type, +using `fftw_alloc' to ensure aligned memory for maximum performance +(*note Allocating aligned memory in Fortran::); this also makes it easy +to reference the same memory as both a real array and a complex array. + + real(C_DOUBLE), pointer :: in(:,:,:) + complex(C_DOUBLE_COMPLEX), pointer :: out(:,:,:) + type(C_PTR) :: plan, data + data = fftw_alloc_complex(int((L/2+1) * M * N, C_SIZE_T)) + call c_f_pointer(data, in, [2*(L/2+1),M,N]) + call c_f_pointer(data, out, [L/2+1,M,N]) + plan = fftw_plan_dft_r2c_3d(N,M,L, in,out, FFTW_ESTIMATE) + ... + call fftw_execute_dft_r2c(plan, in, out) + ... + call fftw_destroy_plan(plan) + call fftw_free(data) + + +File: fftw3.info, Node: FFTW Fortran type reference, Next: Plan execution in Fortran, Prev: Reversing array dimensions, Up: Calling FFTW from Modern Fortran + +7.3 FFTW Fortran type reference +=============================== + +The following are the most important type correspondences between the C +interface and Fortran: + + * Plans (`fftw_plan' and variants) are `type(C_PTR)' (i.e. an opaque + pointer). + + * The C floating-point types `double', `float', and `long double' + correspond to `real(C_DOUBLE)', `real(C_FLOAT)', and + `real(C_LONG_DOUBLE)', respectively. The C complex types + `fftw_complex', `fftwf_complex', and `fftwl_complex' correspond in + Fortran to `complex(C_DOUBLE_COMPLEX)', + `complex(C_FLOAT_COMPLEX)', and `complex(C_LONG_DOUBLE_COMPLEX)', + respectively. Just as in C (*note Precision::), the FFTW + subroutines and types are prefixed with `fftw_', `fftwf_', and + `fftwl_' for the different precisions, and link to different + libraries (`-lfftw3', `-lfftw3f', and `-lfftw3l' on Unix), but use + the _same_ include file `fftw3.f03' and the _same_ constants (all + of which begin with `FFTW_'). The exception is `long double' + precision, for which you should _also_ include `fftw3l.f03' (*note + Extended and quadruple precision in Fortran::). + + * The C integer types `int' and `unsigned' (used for planner flags) + become `integer(C_INT)'. The C integer type `ptrdiff_t' (e.g. in + the *note 64-bit Guru Interface::) becomes `integer(C_INTPTR_T)', + and `size_t' (in `fftw_malloc' etc.) becomes `integer(C_SIZE_T)'. + + * The `fftw_r2r_kind' type (*note Real-to-Real Transform Kinds::) + becomes `integer(C_FFTW_R2R_KIND)'. The various constant values + of the C enumerated type (`FFTW_R2HC' etc.) become simply integer + constants of the same names in Fortran. + + * Numeric array pointer arguments (e.g. `double *') become + `dimension(*), intent(out)' arrays of the same type, or + `dimension(*), intent(in)' if they are pointers to constant data + (e.g. `const int *'). There are a few exceptions where numeric + pointers refer to scalar outputs (e.g. for `fftw_flops'), in which + case they are `intent(out)' scalar arguments in Fortran too. For + the new-array execute functions (*note New-array Execute + Functions::), the input arrays are declared `dimension(*), + intent(inout)', since they can be modified in the case of in-place + or `FFTW_DESTROY_INPUT' transforms. + + * Pointer _return_ values (e.g `double *') become `type(C_PTR)'. + (If they are pointers to arrays, as for `fftw_alloc_real', you can + convert them back to Fortran array pointers with the standard + intrinsic function `c_f_pointer'.) + + * The `fftw_iodim' type in the guru interface (*note Guru vector and + transform sizes::) becomes `type(fftw_iodim)' in Fortran, a + derived data type (the Fortran analogue of C's `struct') with + three `integer(C_INT)' components: `n', `is', and `os', with the + same meanings as in C. The `fftw_iodim64' type in the 64-bit guru + interface (*note 64-bit Guru Interface::) is the same, except that + its components are of type `integer(C_INTPTR_T)'. + + * Using the wisdom import/export functions from Fortran is a bit + tricky, and is discussed in *note Accessing the wisdom API from + Fortran::. In brief, the `FILE *' arguments map to `type(C_PTR)', + `const char *' to `character(C_CHAR), dimension(*), intent(in)' + (null-terminated!), and the generic read-char/write-char functions + map to `type(C_FUNPTR)'. + + + You may be wondering if you need to search-and-replace +`real(kind(0.0d0))' (or whatever your favorite Fortran spelling of +"double precision" is) with `real(C_DOUBLE)' everywhere in your +program, and similarly for `complex' and `integer' types. The answer +is no; you can still use your existing types. As long as these types +match their C counterparts, things should work without a hitch. The +worst that can happen, e.g. in the (unlikely) event of a system where +`real(kind(0.0d0))' is different from `real(C_DOUBLE)', is that the +compiler will give you a type-mismatch error. That is, if you don't +use the `iso_c_binding' kinds you need to accept at least the +theoretical possibility of having to change your code in response to +compiler errors on some future machine, but you don't need to worry +about silently compiling incorrect code that yields runtime errors. + + +File: fftw3.info, Node: Plan execution in Fortran, Next: Allocating aligned memory in Fortran, Prev: FFTW Fortran type reference, Up: Calling FFTW from Modern Fortran + +7.4 Plan execution in Fortran +============================= + +In C, in order to use a plan, one normally calls `fftw_execute', which +executes the plan to perform the transform on the input/output arrays +passed when the plan was created (*note Using Plans::). The +corresponding subroutine call in modern Fortran is: + call fftw_execute(plan) + + However, we have had reports that this causes problems with some +recent optimizing Fortran compilers. The problem is, because the +input/output arrays are not passed as explicit arguments to +`fftw_execute', the semantics of Fortran (unlike C) allow the compiler +to assume that the input/output arrays are not changed by +`fftw_execute'. As a consequence, certain compilers end up +repositioning the call to `fftw_execute', assuming incorrectly that it +does nothing to the arrays. + + There are various workarounds to this, but the safest and simplest +thing is to not use `fftw_execute' in Fortran. Instead, use the +functions described in *note New-array Execute Functions::, which take +the input/output arrays as explicit arguments. For example, if the +plan is for a complex-data DFT and was created for the arrays `in' and +`out', you would do: + call fftw_execute_dft(plan, in, out) + + There are a few things to be careful of, however: + + * You must use the correct type of execute function, matching the way + the plan was created. Complex DFT plans should use + `fftw_execute_dft', Real-input (r2c) DFT plans should use use + `fftw_execute_dft_r2c', and real-output (c2r) DFT plans should use + `fftw_execute_dft_c2r'. The various r2r plans should use + `fftw_execute_r2r'. Fortunately, if you use the wrong one you + will get a compile-time type-mismatch error (unlike legacy + Fortran). + + * You should normally pass the same input/output arrays that were + used when creating the plan. This is always safe. + + * _If_ you pass _different_ input/output arrays compared to those + used when creating the plan, you must abide by all the + restrictions of the new-array execute functions (*note New-array + Execute Functions::). The most tricky of these is the requirement + that the new arrays have the same alignment as the original + arrays; the best (and possibly only) way to guarantee this is to + use the `fftw_alloc' functions to allocate your arrays (*note + Allocating aligned memory in Fortran::). Alternatively, you can + use the `FFTW_UNALIGNED' flag when creating the plan, in which + case the plan does not depend on the alignment, but this may + sacrifice substantial performance on architectures (like x86) with + SIMD instructions (*note SIMD alignment and fftw_malloc::). + + + +File: fftw3.info, Node: Allocating aligned memory in Fortran, Next: Accessing the wisdom API from Fortran, Prev: Plan execution in Fortran, Up: Calling FFTW from Modern Fortran + +7.5 Allocating aligned memory in Fortran +======================================== + +In order to obtain maximum performance in FFTW, you should store your +data in arrays that have been specially aligned in memory (*note SIMD +alignment and fftw_malloc::). Enforcing alignment also permits you to +safely use the new-array execute functions (*note New-array Execute +Functions::) to apply a given plan to more than one pair of in/out +arrays. Unfortunately, standard Fortran arrays do _not_ provide any +alignment guarantees. The _only_ way to allocate aligned memory in +standard Fortran is to allocate it with an external C function, like +the `fftw_alloc_real' and `fftw_alloc_complex' functions. Fortunately, +Fortran 2003 provides a simple way to associate such allocated memory +with a standard Fortran array pointer that you can then use normally. + + We therefore recommend allocating all your input/output arrays using +the following technique: + + 1. Declare a `pointer', `arr', to your array of the desired type and + dimensions. For example, `real(C_DOUBLE), pointer :: a(:,:)' for + a 2d real array, or `complex(C_DOUBLE_COMPLEX), pointer :: + a(:,:,:)' for a 3d complex array. + + 2. The number of elements to allocate must be an `integer(C_SIZE_T)'. + You can either declare a variable of this type, e.g. + `integer(C_SIZE_T) :: sz', to store the number of elements to + allocate, or you can use the `int(..., C_SIZE_T)' intrinsic + function. e.g. set `sz = L * M * N' or use `int(L * M * N, + C_SIZE_T)' for an L x M x N array. + + 3. Declare a `type(C_PTR) :: p' to hold the return value from FFTW's + allocation routine. Set `p = fftw_alloc_real(sz)' for a real + array, or `p = fftw_alloc_complex(sz)' for a complex array. + + 4. Associate your pointer `arr' with the allocated memory `p' using + the standard `c_f_pointer' subroutine: `call c_f_pointer(p, arr, + [...dimensions...])', where `[...dimensions...])' are an array of + the dimensions of the array (in the usual Fortran order). e.g. + `call c_f_pointer(p, arr, [L,M,N])' for an L x M x N array. + (Alternatively, you can omit the dimensions argument if you + specified the shape explicitly when declaring `arr'.) You can now + use `arr' as a usual multidimensional array. + + 5. When you are done using the array, deallocate the memory by `call + fftw_free(p)' on `p'. + + + For example, here is how we would allocate an L x M 2d real array: + + real(C_DOUBLE), pointer :: arr(:,:) + type(C_PTR) :: p + p = fftw_alloc_real(int(L * M, C_SIZE_T)) + call c_f_pointer(p, arr, [L,M]) + _...use arr and arr(i,j) as usual..._ + call fftw_free(p) + + and here is an L x M x N 3d complex array: + + complex(C_DOUBLE_COMPLEX), pointer :: arr(:,:,:) + type(C_PTR) :: p + p = fftw_alloc_complex(int(L * M * N, C_SIZE_T)) + call c_f_pointer(p, arr, [L,M,N]) + _...use arr and arr(i,j,k) as usual..._ + call fftw_free(p) + + See *note Reversing array dimensions:: for an example allocating a +single array and associating both real and complex array pointers with +it, for in-place real-to-complex transforms. + + +File: fftw3.info, Node: Accessing the wisdom API from Fortran, Next: Defining an FFTW module, Prev: Allocating aligned memory in Fortran, Up: Calling FFTW from Modern Fortran + +7.6 Accessing the wisdom API from Fortran +========================================= + +As explained in *note Words of Wisdom-Saving Plans::, FFTW provides a +"wisdom" API for saving plans to disk so that they can be recreated +quickly. The C API for exporting (*note Wisdom Export::) and importing +(*note Wisdom Import::) wisdom is somewhat tricky to use from Fortran, +however, because of differences in file I/O and string types between C +and Fortran. + +* Menu: + +* Wisdom File Export/Import from Fortran:: +* Wisdom String Export/Import from Fortran:: +* Wisdom Generic Export/Import from Fortran:: + + +File: fftw3.info, Node: Wisdom File Export/Import from Fortran, Next: Wisdom String Export/Import from Fortran, Prev: Accessing the wisdom API from Fortran, Up: Accessing the wisdom API from Fortran + +7.6.1 Wisdom File Export/Import from Fortran +-------------------------------------------- + +The easiest way to export and import wisdom is to do so using +`fftw_export_wisdom_to_filename' and `fftw_wisdom_from_filename'. The +only trick is that these require you to pass a C string, which is an +array of type `CHARACTER(C_CHAR)' that is terminated by `C_NULL_CHAR'. +You can call them like this: + + integer(C_INT) :: ret + ret = fftw_export_wisdom_to_filename(C_CHAR_'my_wisdom.dat' // C_NULL_CHAR) + if (ret .eq. 0) stop 'error exporting wisdom to file' + ret = fftw_import_wisdom_from_filename(C_CHAR_'my_wisdom.dat' // C_NULL_CHAR) + if (ret .eq. 0) stop 'error importing wisdom from file' + + Note that prepending `C_CHAR_' is needed to specify that the literal +string is of kind `C_CHAR', and we null-terminate the string by +appending `// C_NULL_CHAR'. These functions return an `integer(C_INT)' +(`ret') which is `0' if an error occurred during export/import and +nonzero otherwise. + + It is also possible to use the lower-level routines +`fftw_export_wisdom_to_file' and `fftw_import_wisdom_from_file', which +accept parameters of the C type `FILE*', expressed in Fortran as +`type(C_PTR)'. However, you are then responsible for creating the +`FILE*' yourself. You can do this by using `iso_c_binding' to define +Fortran intefaces for the C library functions `fopen' and `fclose', +which is a bit strange in Fortran but workable. + + +File: fftw3.info, Node: Wisdom String Export/Import from Fortran, Next: Wisdom Generic Export/Import from Fortran, Prev: Wisdom File Export/Import from Fortran, Up: Accessing the wisdom API from Fortran + +7.6.2 Wisdom String Export/Import from Fortran +---------------------------------------------- + +Dealing with FFTW's C string export/import is a bit more painful. In +particular, the `fftw_export_wisdom_to_string' function requires you to +deal with a dynamically allocated C string. To get its length, you +must define an interface to the C `strlen' function, and to deallocate +it you must define an interface to C `free': + + use, intrinsic :: iso_c_binding + interface + integer(C_INT) function strlen(s) bind(C, name='strlen') + import + type(C_PTR), value :: s + end function strlen + subroutine free(p) bind(C, name='free') + import + type(C_PTR), value :: p + end subroutine free + end interface + + Given these definitions, you can then export wisdom to a Fortran +character array: + + character(C_CHAR), pointer :: s(:) + integer(C_SIZE_T) :: slen + type(C_PTR) :: p + p = fftw_export_wisdom_to_string() + if (.not. c_associated(p)) stop 'error exporting wisdom' + slen = strlen(p) + call c_f_pointer(p, s, [slen+1]) + ... + call free(p) + + Note that `slen' is the length of the C string, but the length of +the array is `slen+1' because it includes the terminating null +character. (You can omit the `+1' if you don't want Fortran to know +about the null character.) The standard `c_associated' function checks +whether `p' is a null pointer, which is returned by +`fftw_export_wisdom_to_string' if there was an error. + + To import wisdom from a string, use `fftw_import_wisdom_from_string' +as usual; note that the argument of this function must be a +`character(C_CHAR)' that is terminated by the `C_NULL_CHAR' character, +like the `s' array above. + + +File: fftw3.info, Node: Wisdom Generic Export/Import from Fortran, Prev: Wisdom String Export/Import from Fortran, Up: Accessing the wisdom API from Fortran + +7.6.3 Wisdom Generic Export/Import from Fortran +----------------------------------------------- + +The most generic wisdom export/import functions allow you to provide an +arbitrary callback function to read/write one character at a time in +any way you want. However, your callback function must be written in a +special way, using the `bind(C)' attribute to be passed to a C +interface. + + In particular, to call the generic wisdom export function +`fftw_export_wisdom', you would write a callback subroutine of the form: + + subroutine my_write_char(c, p) bind(C) + use, intrinsic :: iso_c_binding + character(C_CHAR), value :: c + type(C_PTR), value :: p + _...write c..._ + end subroutine my_write_char + + Given such a subroutine (along with the corresponding interface +definition), you could then export wisdom using: + + call fftw_export_wisdom(c_funloc(my_write_char), p) + + The standard `c_funloc' intrinsic converts a Fortran `bind(C)' +subroutine into a C function pointer. The parameter `p' is a +`type(C_PTR)' to any arbitrary data that you want to pass to +`my_write_char' (or `C_NULL_PTR' if none). (Note that you can get a C +pointer to Fortran data using the intrinsic `c_loc', and convert it +back to a Fortran pointer in `my_write_char' using `c_f_pointer'.) + + Similarly, to use the generic `fftw_import_wisdom', you would define +a callback function of the form: + + integer(C_INT) function my_read_char(p) bind(C) + use, intrinsic :: iso_c_binding + type(C_PTR), value :: p + character :: c + _...read a character c..._ + my_read_char = ichar(c, C_INT) + end function my_read_char + + .... + + integer(C_INT) :: ret + ret = fftw_import_wisdom(c_funloc(my_read_char), p) + if (ret .eq. 0) stop 'error importing wisdom' + + Your function can return `-1' if the end of the input is reached. +Again, `p' is an arbitrary `type(C_PTR' that is passed through to your +function. `fftw_import_wisdom' returns `0' if an error occurred and +nonzero otherwise. + + +File: fftw3.info, Node: Defining an FFTW module, Prev: Accessing the wisdom API from Fortran, Up: Calling FFTW from Modern Fortran + +7.7 Defining an FFTW module +=========================== + +Rather than using the `include' statement to include the `fftw3.f03' +interface file in any subroutine where you want to use FFTW, you might +prefer to define an FFTW Fortran module. FFTW does not install itself +as a module, primarily because `fftw3.f03' can be shared between +different Fortran compilers while modules (in general) cannot. +However, it is trivial to define your own FFTW module if you want. +Just create a file containing: + + module FFTW3 + use, intrinsic :: iso_c_binding + include 'fftw3.f03' + end module + + Compile this file into a module as usual for your compiler (e.g. with +`gfortran -c' you will get a file `fftw3.mod'). Now, instead of +`include 'fftw3.f03'', whenever you want to use FFTW routines you can +just do: + + use FFTW3 + + as usual for Fortran modules. (You still need to link to the FFTW +library, of course.) + + +File: fftw3.info, Node: Calling FFTW from Legacy Fortran, Next: Upgrading from FFTW version 2, Prev: Calling FFTW from Modern Fortran, Up: Top + +8 Calling FFTW from Legacy Fortran +********************************** + +This chapter describes the interface to FFTW callable by Fortran code +in older compilers not supporting the Fortran 2003 C interoperability +features (*note Calling FFTW from Modern Fortran::). This interface +has the major disadvantage that it is not type-checked, so if you +mistake the argument types or ordering then your program will not have +any compiler errors, and will likely crash at runtime. So, greater +care is needed. Also, technically interfacing older Fortran versions +to C is nonstandard, but in practice we have found that the techniques +used in this chapter have worked with all known Fortran compilers for +many years. + + The legacy Fortran interface differs from the C interface only in the +prefix (`dfftw_' instead of `fftw_' in double precision) and a few +other minor details. This Fortran interface is included in the FFTW +libraries by default, unless a Fortran compiler isn't found on your +system or `--disable-fortran' is included in the `configure' flags. We +assume here that the reader is already familiar with the usage of FFTW +in C, as described elsewhere in this manual. + + The MPI parallel interface to FFTW is _not_ currently available to +legacy Fortran. + +* Menu: + +* Fortran-interface routines:: +* FFTW Constants in Fortran:: +* FFTW Execution in Fortran:: +* Fortran Examples:: +* Wisdom of Fortran?:: + + +File: fftw3.info, Node: Fortran-interface routines, Next: FFTW Constants in Fortran, Prev: Calling FFTW from Legacy Fortran, Up: Calling FFTW from Legacy Fortran + +8.1 Fortran-interface routines +============================== + +Nearly all of the FFTW functions have Fortran-callable equivalents. +The name of the legacy Fortran routine is the same as that of the +corresponding C routine, but with the `fftw_' prefix replaced by +`dfftw_'.(1) The single and long-double precision versions use +`sfftw_' and `lfftw_', respectively, instead of `fftwf_' and `fftwl_'; +quadruple precision (`real*16') is available on some systems as +`fftwq_' (*note Precision::). (Note that `long double' on x86 hardware +is usually at most 80-bit extended precision, _not_ quadruple +precision.) + + For the most part, all of the arguments to the functions are the +same, with the following exceptions: + + * `plan' variables (what would be of type `fftw_plan' in C), must be + declared as a type that is at least as big as a pointer (address) + on your machine. We recommend using `integer*8' everywhere, since + this should always be big enough. + + * Any function that returns a value (e.g. `fftw_plan_dft') is + converted into a _subroutine_. The return value is converted into + an additional _first_ parameter of this subroutine.(2) + + * The Fortran routines expect multi-dimensional arrays to be in + _column-major_ order, which is the ordinary format of Fortran + arrays (*note Multi-dimensional Array Format::). They do this + transparently and costlessly simply by reversing the order of the + dimensions passed to FFTW, but this has one important consequence + for multi-dimensional real-complex transforms, discussed below. + + * Wisdom import and export is somewhat more tricky because one cannot + easily pass files or strings between C and Fortran; see *note + Wisdom of Fortran?::. + + * Legacy Fortran cannot use the `fftw_malloc' dynamic-allocation + routine. If you want to exploit the SIMD FFTW (*note SIMD + alignment and fftw_malloc::), you'll need to figure out some other + way to ensure that your arrays are at least 16-byte aligned. + + * Since Fortran 77 does not have data structures, the `fftw_iodim' + structure from the guru interface (*note Guru vector and transform + sizes::) must be split into separate arguments. In particular, any + `fftw_iodim' array arguments in the C guru interface become three + integer array arguments (`n', `is', and `os') in the Fortran guru + interface, all of whose lengths should be equal to the + corresponding `rank' argument. + + * The guru planner interface in Fortran does _not_ do any automatic + translation between column-major and row-major; you are responsible + for setting the strides etcetera to correspond to your Fortran + arrays. However, as a slight bug that we are preserving for + backwards compatibility, the `plan_guru_r2r' in Fortran _does_ + reverse the order of its `kind' array parameter, so the `kind' + array of that routine should be in the reverse of the order of the + iodim arrays (see above). + + + In general, you should take care to use Fortran data types that +correspond to (i.e. are the same size as) the C types used by FFTW. In +practice, this correspondence is usually straightforward (i.e. +`integer' corresponds to `int', `real' corresponds to `float', +etcetera). The native Fortran double/single-precision complex type +should be compatible with `fftw_complex'/`fftwf_complex'. Such simple +correspondences are assumed in the examples below. + + ---------- Footnotes ---------- + + (1) Technically, Fortran 77 identifiers are not allowed to have more +than 6 characters, nor may they contain underscores. Any compiler that +enforces this limitation doesn't deserve to link to FFTW. + + (2) The reason for this is that some Fortran implementations seem to +have trouble with C function return values, and vice versa. + + +File: fftw3.info, Node: FFTW Constants in Fortran, Next: FFTW Execution in Fortran, Prev: Fortran-interface routines, Up: Calling FFTW from Legacy Fortran + +8.2 FFTW Constants in Fortran +============================= + +When creating plans in FFTW, a number of constants are used to specify +options, such as `FFTW_MEASURE' or `FFTW_ESTIMATE'. The same constants +must be used with the wrapper routines, but of course the C header +files where the constants are defined can't be incorporated directly +into Fortran code. + + Instead, we have placed Fortran equivalents of the FFTW constant +definitions in the file `fftw3.f', which can be found in the same +directory as `fftw3.h'. If your Fortran compiler supports a +preprocessor of some sort, you should be able to `include' or +`#include' this file; otherwise, you can paste it directly into your +code. + + In C, you combine different flags (like `FFTW_PRESERVE_INPUT' and +`FFTW_MEASURE') using the ``|'' operator; in Fortran you should just +use ``+''. (Take care not to add in the same flag more than once, +though. Alternatively, you can use the `ior' intrinsic function +standardized in Fortran 95.) + + +File: fftw3.info, Node: FFTW Execution in Fortran, Next: Fortran Examples, Prev: FFTW Constants in Fortran, Up: Calling FFTW from Legacy Fortran + +8.3 FFTW Execution in Fortran +============================= + +In C, in order to use a plan, one normally calls `fftw_execute', which +executes the plan to perform the transform on the input/output arrays +passed when the plan was created (*note Using Plans::). The +corresponding subroutine call in legacy Fortran is: + call dfftw_execute(plan) + + However, we have had reports that this causes problems with some +recent optimizing Fortran compilers. The problem is, because the +input/output arrays are not passed as explicit arguments to +`dfftw_execute', the semantics of Fortran (unlike C) allow the compiler +to assume that the input/output arrays are not changed by +`dfftw_execute'. As a consequence, certain compilers end up optimizing +out or repositioning the call to `dfftw_execute', assuming incorrectly +that it does nothing. + + There are various workarounds to this, but the safest and simplest +thing is to not use `dfftw_execute' in Fortran. Instead, use the +functions described in *note New-array Execute Functions::, which take +the input/output arrays as explicit arguments. For example, if the +plan is for a complex-data DFT and was created for the arrays `in' and +`out', you would do: + call dfftw_execute_dft(plan, in, out) + + There are a few things to be careful of, however: + + * You must use the correct type of execute function, matching the way + the plan was created. Complex DFT plans should use + `dfftw_execute_dft', Real-input (r2c) DFT plans should use use + `dfftw_execute_dft_r2c', and real-output (c2r) DFT plans should + use `dfftw_execute_dft_c2r'. The various r2r plans should use + `dfftw_execute_r2r'. + + * You should normally pass the same input/output arrays that were + used when creating the plan. This is always safe. + + * _If_ you pass _different_ input/output arrays compared to those + used when creating the plan, you must abide by all the + restrictions of the new-array execute functions (*note New-array + Execute Functions::). The most difficult of these, in Fortran, is + the requirement that the new arrays have the same alignment as the + original arrays, because there seems to be no way in legacy + Fortran to obtain guaranteed-aligned arrays (analogous to + `fftw_malloc' in C). You can, of course, use the `FFTW_UNALIGNED' + flag when creating the plan, in which case the plan does not + depend on the alignment, but this may sacrifice substantial + performance on architectures (like x86) with SIMD instructions + (*note SIMD alignment and fftw_malloc::). + + + +File: fftw3.info, Node: Fortran Examples, Next: Wisdom of Fortran?, Prev: FFTW Execution in Fortran, Up: Calling FFTW from Legacy Fortran + +8.4 Fortran Examples +==================== + +In C, you might have something like the following to transform a +one-dimensional complex array: + + fftw_complex in[N], out[N]; + fftw_plan plan; + + plan = fftw_plan_dft_1d(N,in,out,FFTW_FORWARD,FFTW_ESTIMATE); + fftw_execute(plan); + fftw_destroy_plan(plan); + + In Fortran, you would use the following to accomplish the same thing: + + double complex in, out + dimension in(N), out(N) + integer*8 plan + + call dfftw_plan_dft_1d(plan,N,in,out,FFTW_FORWARD,FFTW_ESTIMATE) + call dfftw_execute_dft(plan, in, out) + call dfftw_destroy_plan(plan) + + Notice how all routines are called as Fortran subroutines, and the +plan is returned via the first argument to `dfftw_plan_dft_1d'. Notice +also that we changed `fftw_execute' to `dfftw_execute_dft' (*note FFTW +Execution in Fortran::). To do the same thing, but using 8 threads in +parallel (*note Multi-threaded FFTW::), you would simply prefix these +calls with: + + integer iret + call dfftw_init_threads(iret) + call dfftw_plan_with_nthreads(8) + + (You might want to check the value of `iret': if it is zero, it +indicates an unlikely error during thread initialization.) + + To transform a three-dimensional array in-place with C, you might do: + + fftw_complex arr[L][M][N]; + fftw_plan plan; + + plan = fftw_plan_dft_3d(L,M,N, arr,arr, + FFTW_FORWARD, FFTW_ESTIMATE); + fftw_execute(plan); + fftw_destroy_plan(plan); + + In Fortran, you would use this instead: + + double complex arr + dimension arr(L,M,N) + integer*8 plan + + call dfftw_plan_dft_3d(plan, L,M,N, arr,arr, + & FFTW_FORWARD, FFTW_ESTIMATE) + call dfftw_execute_dft(plan, arr, arr) + call dfftw_destroy_plan(plan) + + Note that we pass the array dimensions in the "natural" order in +both C and Fortran. + + To transform a one-dimensional real array in Fortran, you might do: + + double precision in + dimension in(N) + double complex out + dimension out(N/2 + 1) + integer*8 plan + + call dfftw_plan_dft_r2c_1d(plan,N,in,out,FFTW_ESTIMATE) + call dfftw_execute_dft_r2c(plan, in, out) + call dfftw_destroy_plan(plan) + + To transform a two-dimensional real array, out of place, you might +use the following: + + double precision in + dimension in(M,N) + double complex out + dimension out(M/2 + 1, N) + integer*8 plan + + call dfftw_plan_dft_r2c_2d(plan,M,N,in,out,FFTW_ESTIMATE) + call dfftw_execute_dft_r2c(plan, in, out) + call dfftw_destroy_plan(plan) + + *Important:* Notice that it is the _first_ dimension of the complex +output array that is cut in half in Fortran, rather than the last +dimension as in C. This is a consequence of the interface routines +reversing the order of the array dimensions passed to FFTW so that the +Fortran program can use its ordinary column-major order. + + +File: fftw3.info, Node: Wisdom of Fortran?, Prev: Fortran Examples, Up: Calling FFTW from Legacy Fortran + +8.5 Wisdom of Fortran? +====================== + +In this section, we discuss how one can import/export FFTW wisdom +(saved plans) to/from a Fortran program; we assume that the reader is +already familiar with wisdom, as described in *note Words of +Wisdom-Saving Plans::. + + The basic problem is that is difficult to (portably) pass files and +strings between Fortran and C, so we cannot provide a direct Fortran +equivalent to the `fftw_export_wisdom_to_file', etcetera, functions. +Fortran interfaces _are_ provided for the functions that do not take +file/string arguments, however: `dfftw_import_system_wisdom', +`dfftw_import_wisdom', `dfftw_export_wisdom', and `dfftw_forget_wisdom'. + + So, for example, to import the system-wide wisdom, you would do: + + integer isuccess + call dfftw_import_system_wisdom(isuccess) + + As usual, the C return value is turned into a first parameter; +`isuccess' is non-zero on success and zero on failure (e.g. if there is +no system wisdom installed). + + If you want to import/export wisdom from/to an arbitrary file or +elsewhere, you can employ the generic `dfftw_import_wisdom' and +`dfftw_export_wisdom' functions, for which you must supply a subroutine +to read/write one character at a time. The FFTW package contains an +example file `doc/f77_wisdom.f' demonstrating how to implement +`import_wisdom_from_file' and `export_wisdom_to_file' subroutines in +this way. (These routines cannot be compiled into the FFTW library +itself, lest all FFTW-using programs be required to link with the +Fortran I/O library.) + + +File: fftw3.info, Node: Upgrading from FFTW version 2, Next: Installation and Customization, Prev: Calling FFTW from Legacy Fortran, Up: Top + +9 Upgrading from FFTW version 2 +******************************* + +In this chapter, we outline the process for updating codes designed for +the older FFTW 2 interface to work with FFTW 3. The interface for FFTW +3 is not backwards-compatible with the interface for FFTW 2 and earlier +versions; codes written to use those versions will fail to link with +FFTW 3. Nor is it possible to write "compatibility wrappers" to bridge +the gap (at least not efficiently), because FFTW 3 has different +semantics from previous versions. However, upgrading should be a +straightforward process because the data formats are identical and the +overall style of planning/execution is essentially the same. + + Unlike FFTW 2, there are no separate header files for real and +complex transforms (or even for different precisions) in FFTW 3; all +interfaces are defined in the `' header file. + +Numeric Types +============= + +The main difference in data types is that `fftw_complex' in FFTW 2 was +defined as a `struct' with macros `c_re' and `c_im' for accessing the +real/imaginary parts. (This is binary-compatible with FFTW 3 on any +machine except perhaps for some older Crays in single precision.) The +equivalent macros for FFTW 3 are: + + #define c_re(c) ((c)[0]) + #define c_im(c) ((c)[1]) + + This does not work if you are using the C99 complex type, however, +unless you insert a `double*' typecast into the above macros (*note +Complex numbers::). + + Also, FFTW 2 had an `fftw_real' typedef that was an alias for +`double' (in double precision). In FFTW 3 you should just use `double' +(or whatever precision you are employing). + +Plans +===== + +The major difference between FFTW 2 and FFTW 3 is in the +planning/execution division of labor. In FFTW 2, plans were found for a +given transform size and type, and then could be applied to _any_ +arrays and for _any_ multiplicity/stride parameters. In FFTW 3, you +specify the particular arrays, stride parameters, etcetera when +creating the plan, and the plan is then executed for _those_ arrays +(unless the guru interface is used) and _those_ parameters _only_. +(FFTW 2 had "specific planner" routines that planned for a particular +array and stride, but the plan could still be used for other arrays and +strides.) That is, much of the information that was formerly specified +at execution time is now specified at planning time. + + Like FFTW 2's specific planner routines, the FFTW 3 planner +overwrites the input/output arrays unless you use `FFTW_ESTIMATE'. + + FFTW 2 had separate data types `fftw_plan', `fftwnd_plan', +`rfftw_plan', and `rfftwnd_plan' for complex and real one- and +multi-dimensional transforms, and each type had its own `destroy' +function. In FFTW 3, all plans are of type `fftw_plan' and all are +destroyed by `fftw_destroy_plan(plan)'. + + Where you formerly used `fftw_create_plan' and `fftw_one' to plan +and compute a single 1d transform, you would now use `fftw_plan_dft_1d' +to plan the transform. If you used the generic `fftw' function to +execute the transform with multiplicity (`howmany') and stride +parameters, you would now use the advanced interface +`fftw_plan_many_dft' to specify those parameters. The plans are now +executed with `fftw_execute(plan)', which takes all of its parameters +(including the input/output arrays) from the plan. + + In-place transforms no longer interpret their output argument as +scratch space, nor is there an `FFTW_IN_PLACE' flag. You simply pass +the same pointer for both the input and output arguments. (Previously, +the output `ostride' and `odist' parameters were ignored for in-place +transforms; now, if they are specified via the advanced interface, they +are significant even in the in-place case, although they should +normally equal the corresponding input parameters.) + + The `FFTW_ESTIMATE' and `FFTW_MEASURE' flags have the same meaning +as before, although the planning time will differ. You may also +consider using `FFTW_PATIENT', which is like `FFTW_MEASURE' except that +it takes more time in order to consider a wider variety of algorithms. + + For multi-dimensional complex DFTs, instead of `fftwnd_create_plan' +(or `fftw2d_create_plan' or `fftw3d_create_plan'), followed by +`fftwnd_one', you would use `fftw_plan_dft' (or `fftw_plan_dft_2d' or +`fftw_plan_dft_3d'). followed by `fftw_execute'. If you used `fftwnd' +to to specify strides etcetera, you would instead specify these via +`fftw_plan_many_dft'. + + The analogues to `rfftw_create_plan' and `rfftw_one' with +`FFTW_REAL_TO_COMPLEX' or `FFTW_COMPLEX_TO_REAL' directions are +`fftw_plan_r2r_1d' with kind `FFTW_R2HC' or `FFTW_HC2R', followed by +`fftw_execute'. The stride etcetera arguments of `rfftw' are now in +`fftw_plan_many_r2r'. + + Instead of `rfftwnd_create_plan' (or `rfftw2d_create_plan' or +`rfftw3d_create_plan') followed by `rfftwnd_one_real_to_complex' or +`rfftwnd_one_complex_to_real', you now use `fftw_plan_dft_r2c' (or +`fftw_plan_dft_r2c_2d' or `fftw_plan_dft_r2c_3d') or +`fftw_plan_dft_c2r' (or `fftw_plan_dft_c2r_2d' or +`fftw_plan_dft_c2r_3d'), respectively, followed by `fftw_execute'. As +usual, the strides etcetera of `rfftwnd_real_to_complex' or +`rfftwnd_complex_to_real' are no specified in the advanced planner +routines, `fftw_plan_many_dft_r2c' or `fftw_plan_many_dft_c2r'. + +Wisdom +====== + +In FFTW 2, you had to supply the `FFTW_USE_WISDOM' flag in order to use +wisdom; in FFTW 3, wisdom is always used. (You could simulate the FFTW +2 wisdom-less behavior by calling `fftw_forget_wisdom' after every +planner call.) + + The FFTW 3 wisdom import/export routines are almost the same as +before (although the storage format is entirely different). There is +one significant difference, however. In FFTW 2, the import routines +would never read past the end of the wisdom, so you could store extra +data beyond the wisdom in the same file, for example. In FFTW 3, the +file-import routine may read up to a few hundred bytes past the end of +the wisdom, so you cannot store other data just beyond it.(1) + + Wisdom has been enhanced by additional humility in FFTW 3: whereas +FFTW 2 would re-use wisdom for a given transform size regardless of the +stride etc., in FFTW 3 wisdom is only used with the strides etc. for +which it was created. Unfortunately, this means FFTW 3 has to create +new plans from scratch more often than FFTW 2 (in FFTW 2, planning e.g. +one transform of size 1024 also created wisdom for all smaller powers +of 2, but this no longer occurs). + + FFTW 3 also has the new routine `fftw_import_system_wisdom' to +import wisdom from a standard system-wide location. + +Memory allocation +================= + +In FFTW 3, we recommend allocating your arrays with `fftw_malloc' and +deallocating them with `fftw_free'; this is not required, but allows +optimal performance when SIMD acceleration is used. (Those two +functions actually existed in FFTW 2, and worked the same way, but were +not documented.) + + In FFTW 2, there were `fftw_malloc_hook' and `fftw_free_hook' +functions that allowed the user to replace FFTW's memory-allocation +routines (e.g. to implement different error-handling, since by default +FFTW prints an error message and calls `exit' to abort the program if +`malloc' returns `NULL'). These hooks are not supported in FFTW 3; +those few users who require this functionality can just directly modify +the memory-allocation routines in FFTW (they are defined in +`kernel/alloc.c'). + +Fortran interface +================= + +In FFTW 2, the subroutine names were obtained by replacing `fftw_' with +`fftw_f77'; in FFTW 3, you replace `fftw_' with `dfftw_' (or `sfftw_' +or `lfftw_', depending upon the precision). + + In FFTW 3, we have begun recommending that you always declare the +type used to store plans as `integer*8'. (Too many people didn't notice +our instruction to switch from `integer' to `integer*8' for 64-bit +machines.) + + In FFTW 3, we provide a `fftw3.f' "header file" to include in your +code (and which is officially installed on Unix systems). (In FFTW 2, +we supplied a `fftw_f77.i' file, but it was not installed.) + + Otherwise, the C-Fortran interface relationship is much the same as +it was before (e.g. return values become initial parameters, and +multi-dimensional arrays are in column-major order). Unlike FFTW 2, we +do provide some support for wisdom import/export in Fortran (*note +Wisdom of Fortran?::). + +Threads +======= + +Like FFTW 2, only the execution routines are thread-safe. All planner +routines, etcetera, should be called by only a single thread at a time +(*note Thread safety::). _Unlike_ FFTW 2, there is no special +`FFTW_THREADSAFE' flag for the planner to allow a given plan to be +usable by multiple threads in parallel; this is now the case by default. + + The multi-threaded version of FFTW 2 required you to pass the number +of threads each time you execute the transform. The number of threads +is now stored in the plan, and is specified before the planner is +called by `fftw_plan_with_nthreads'. The threads initialization +routine used to be called `fftw_threads_init' and would return zero on +success; the new routine is called `fftw_init_threads' and returns zero +on failure. *Note Multi-threaded FFTW::. + + There is no separate threads header file in FFTW 3; all the function +prototypes are in `'. However, you still have to link to a +separate library (`-lfftw3_threads -lfftw3 -lm' on Unix), as well as to +the threading library (e.g. POSIX threads on Unix). + + ---------- Footnotes ---------- + + (1) We do our own buffering because GNU libc I/O routines are +horribly slow for single-character I/O, apparently for thread-safety +reasons (whether you are using threads or not). + + +File: fftw3.info, Node: Installation and Customization, Next: Acknowledgments, Prev: Upgrading from FFTW version 2, Up: Top + +10 Installation and Customization +********************************* + +This chapter describes the installation and customization of FFTW, the +latest version of which may be downloaded from the FFTW home page +(http://www.fftw.org). + + In principle, FFTW should work on any system with an ANSI C compiler +(`gcc' is fine). However, planner time is drastically reduced if FFTW +can exploit a hardware cycle counter; FFTW comes with cycle-counter +support for all modern general-purpose CPUs, but you may need to add a +couple of lines of code if your compiler is not yet supported (*note +Cycle Counters::). (On Unix, there will be a warning at the end of the +`configure' output if no cycle counter is found.) + + Installation of FFTW is simplest if you have a Unix or a GNU system, +such as GNU/Linux, and we describe this case in the first section below, +including the use of special configuration options to e.g. install +different precisions or exploit optimizations for particular +architectures (e.g. SIMD). Compilation on non-Unix systems is a more +manual process, but we outline the procedure in the second section. It +is also likely that pre-compiled binaries will be available for popular +systems. + + Finally, we describe how you can customize FFTW for particular needs +by generating _codelets_ for fast transforms of sizes not supported +efficiently by the standard FFTW distribution. + +* Menu: + +* Installation on Unix:: +* Installation on non-Unix systems:: +* Cycle Counters:: +* Generating your own code:: + + +File: fftw3.info, Node: Installation on Unix, Next: Installation on non-Unix systems, Prev: Installation and Customization, Up: Installation and Customization + +10.1 Installation on Unix +========================= + +FFTW comes with a `configure' program in the GNU style. Installation +can be as simple as: + + ./configure + make + make install + + This will build the uniprocessor complex and real transform libraries +along with the test programs. (We recommend that you use GNU `make' if +it is available; on some systems it is called `gmake'.) The "`make +install'" command installs the fftw and rfftw libraries in standard +places, and typically requires root privileges (unless you specify a +different install directory with the `--prefix' flag to `configure'). +You can also type "`make check'" to put the FFTW test programs through +their paces. If you have problems during configuration or compilation, +you may want to run "`make distclean'" before trying again; this +ensures that you don't have any stale files left over from previous +compilation attempts. + + The `configure' script chooses the `gcc' compiler by default, if it +is available; you can select some other compiler with: + ./configure CC="" + + The `configure' script knows good `CFLAGS' (C compiler flags) for a +few systems. If your system is not known, the `configure' script will +print out a warning. In this case, you should re-configure FFTW with +the command + ./configure CFLAGS="" + and then compile as usual. If you do find an optimal set of +`CFLAGS' for your system, please let us know what they are (along with +the output of `config.guess') so that we can include them in future +releases. + + `configure' supports all the standard flags defined by the GNU +Coding Standards; see the `INSTALL' file in FFTW or the GNU web page +(http://www.gnu.org/prep/standards/html_node/index.html). Note +especially `--help' to list all flags and `--enable-shared' to create +shared, rather than static, libraries. `configure' also accepts a few +FFTW-specific flags, particularly: + + * `--enable-float': Produces a single-precision version of FFTW + (`float') instead of the default double-precision (`double'). + *Note Precision::. + + * `--enable-long-double': Produces a long-double precision version of + FFTW (`long double') instead of the default double-precision + (`double'). The `configure' script will halt with an error + message if `long double' is the same size as `double' on your + machine/compiler. *Note Precision::. + + * `--enable-quad-precision': Produces a quadruple-precision version + of FFTW using the nonstandard `__float128' type provided by `gcc' + 4.6 or later on x86, x86-64, and Itanium architectures, instead of + the default double-precision (`double'). The `configure' script + will halt with an error message if the compiler is not `gcc' + version 4.6 or later or if `gcc''s `libquadmath' library is not + installed. *Note Precision::. + + * `--enable-threads': Enables compilation and installation of the + FFTW threads library (*note Multi-threaded FFTW::), which provides + a simple interface to parallel transforms for SMP systems. By + default, the threads routines are not compiled. + + * `--enable-openmp': Like `--enable-threads', but using OpenMP + compiler directives in order to induce parallelism rather than + spawning its own threads directly, and installing an `fftw3_omp' + library rather than an `fftw3_threads' library (*note + Multi-threaded FFTW::). You can use both `--enable-openmp' and + `--enable-threads' since they compile/install libraries with + different names. By default, the OpenMP routines are not compiled. + + * `--with-combined-threads': By default, if `--enable-threads' is + used, the threads support is compiled into a separate library that + must be linked in addition to the main FFTW library. This is so + that users of the serial library do not need to link the system + threads libraries. If `--with-combined-threads' is specified, + however, then no separate threads library is created, and threads + are included in the main FFTW library. This is mainly useful + under Windows, where no system threads library is required and + inter-library dependencies are problematic. + + * `--enable-mpi': Enables compilation and installation of the FFTW + MPI library (*note Distributed-memory FFTW with MPI::), which + provides parallel transforms for distributed-memory systems with + MPI. (By default, the MPI routines are not compiled.) *Note FFTW + MPI Installation::. + + * `--disable-fortran': Disables inclusion of legacy-Fortran wrapper + routines (*note Calling FFTW from Legacy Fortran::) in the standard + FFTW libraries. These wrapper routines increase the library size + by only a negligible amount, so they are included by default as + long as the `configure' script finds a Fortran compiler on your + system. (To specify a particular Fortran compiler foo, pass + `F77='foo to `configure'.) + + * `--with-g77-wrappers': By default, when Fortran wrappers are + included, the wrappers employ the linking conventions of the + Fortran compiler detected by the `configure' script. If this + compiler is GNU `g77', however, then _two_ versions of the + wrappers are included: one with `g77''s idiosyncratic convention + of appending two underscores to identifiers, and one with the more + common convention of appending only a single underscore. This + way, the same FFTW library will work with both `g77' and other + Fortran compilers, such as GNU `gfortran'. However, the converse + is not true: if you configure with a different compiler, then the + `g77'-compatible wrappers are not included. By specifying + `--with-g77-wrappers', the `g77'-compatible wrappers are included + in addition to wrappers for whatever Fortran compiler `configure' + finds. + + * `--with-slow-timer': Disables the use of hardware cycle counters, + and falls back on `gettimeofday' or `clock'. This greatly worsens + performance, and should generally not be used (unless you don't + have a cycle counter but still really want an optimized plan + regardless of the time). *Note Cycle Counters::. + + * `--enable-sse', `--enable-sse2', `--enable-avx', + `--enable-altivec', `--enable-neon': Enable the compilation of + SIMD code for SSE (Pentium III+), SSE2 (Pentium IV+), AVX (Sandy + Bridge, Interlagos), AltiVec (PowerPC G4+), NEON (some ARM + processors). SSE, AltiVec, and NEON only work with + `--enable-float' (above). SSE2 works in both single and double + precision (and is simply SSE in single precision). The resulting + code will _still work_ on earlier CPUs lacking the SIMD extensions + (SIMD is automatically disabled, although the FFTW library is + still larger). + - These options require a compiler supporting SIMD extensions, + and compiler support is always a bit flaky: see the FFTW FAQ + for a list of compiler versions that have problems compiling + FFTW. + + - With AltiVec and `gcc', you may have to use the + `-mabi=altivec' option when compiling any code that links to + FFTW, in order to properly align the stack; otherwise, FFTW + could crash when it tries to use an AltiVec feature. (This + is not necessary on MacOS X.) + + - With SSE/SSE2 and `gcc', you should use a version of gcc that + properly aligns the stack when compiling any code that links + to FFTW. By default, `gcc' 2.95 and later versions align the + stack as needed, but you should not compile FFTW with the + `-Os' option or the `-mpreferred-stack-boundary' option with + an argument less than 4. + + - Because of the large variety of ARM processors and ABIs, FFTW + does not attempt to guess the correct `gcc' flags for + generating NEON code. In general, you will have to provide + them on the command line. This command line is known to have + worked at least once: + ./configure --with-slow-timer --host=arm-linux-gnueabi \ + --enable-single --enable-neon \ + "CC=arm-linux-gnueabi-gcc -march=armv7-a -mfloat-abi=softfp" + + + To force `configure' to use a particular C compiler foo (instead of +the default, usually `gcc'), pass `CC='foo to the `configure' script; +you may also need to set the flags via the variable `CFLAGS' as +described above. + + +File: fftw3.info, Node: Installation on non-Unix systems, Next: Cycle Counters, Prev: Installation on Unix, Up: Installation and Customization + +10.2 Installation on non-Unix systems +===================================== + +It should be relatively straightforward to compile FFTW even on non-Unix +systems lacking the niceties of a `configure' script. Basically, you +need to edit the `config.h' header (copy it from `config.h.in') to +`#define' the various options and compiler characteristics, and then +compile all the `.c' files in the relevant directories. + + The `config.h' header contains about 100 options to set, each one +initially an `#undef', each documented with a comment, and most of them +fairly obvious. For most of the options, you should simply `#define' +them to `1' if they are applicable, although a few options require a +particular value (e.g. `SIZEOF_LONG_LONG' should be defined to the size +of the `long long' type, in bytes, or zero if it is not supported). We +will likely post some sample `config.h' files for various operating +systems and compilers for you to use (at least as a starting point). +Please let us know if you have to hand-create a configuration file +(and/or a pre-compiled binary) that you want to share. + + To create the FFTW library, you will then need to compile all of the +`.c' files in the `kernel', `dft', `dft/scalar', `dft/scalar/codelets', +`rdft', `rdft/scalar', `rdft/scalar/r2cf', `rdft/scalar/r2cb', +`rdft/scalar/r2r', `reodft', and `api' directories. If you are +compiling with SIMD support (e.g. you defined `HAVE_SSE2' in +`config.h'), then you also need to compile the `.c' files in the +`simd-support', `{dft,rdft}/simd', `{dft,rdft}/simd/*' directories. + + Once these files are all compiled, link them into a library, or a +shared library, or directly into your program. + + To compile the FFTW test program, additionally compile the code in +the `libbench2/' directory, and link it into a library. Then compile +the code in the `tests/' directory and link it to the `libbench2' and +FFTW libraries. To compile the `fftw-wisdom' (command-line) tool +(*note Wisdom Utilities::), compile `tools/fftw-wisdom.c' and link it +to the `libbench2' and FFTW libraries + + +File: fftw3.info, Node: Cycle Counters, Next: Generating your own code, Prev: Installation on non-Unix systems, Up: Installation and Customization + +10.3 Cycle Counters +=================== + +FFTW's planner actually executes and times different possible FFT +algorithms in order to pick the fastest plan for a given n. In order +to do this in as short a time as possible, however, the timer must have +a very high resolution, and to accomplish this we employ the hardware +"cycle counters" that are available on most CPUs. Currently, FFTW +supports the cycle counters on x86, PowerPC/POWER, Alpha, UltraSPARC +(SPARC v9), IA64, PA-RISC, and MIPS processors. + + Access to the cycle counters, unfortunately, is a compiler and/or +operating-system dependent task, often requiring inline assembly +language, and it may be that your compiler is not supported. If you are +_not_ supported, FFTW will by default fall back on its estimator +(effectively using `FFTW_ESTIMATE' for all plans). + + You can add support by editing the file `kernel/cycle.h'; normally, +this will involve adapting one of the examples already present in order +to use the inline-assembler syntax for your C compiler, and will only +require a couple of lines of code. Anyone adding support for a new +system to `cycle.h' is encouraged to email us at . + + If a cycle counter is not available on your system (e.g. some +embedded processor), and you don't want to use estimated plans, as a +last resort you can use the `--with-slow-timer' option to `configure' +(on Unix) or `#define WITH_SLOW_TIMER' in `config.h' (elsewhere). This +will use the much lower-resolution `gettimeofday' function, or even +`clock' if the former is unavailable, and planning will be extremely +slow. + + +File: fftw3.info, Node: Generating your own code, Prev: Cycle Counters, Up: Installation and Customization + +10.4 Generating your own code +============================= + +The directory `genfft' contains the programs that were used to generate +FFTW's "codelets," which are hard-coded transforms of small sizes. We +do not expect casual users to employ the generator, which is a rather +sophisticated program that generates directed acyclic graphs of FFT +algorithms and performs algebraic simplifications on them. It was +written in Objective Caml, a dialect of ML, which is available at +`http://caml.inria.fr/ocaml/index.en.html'. + + If you have Objective Caml installed (along with recent versions of +GNU `autoconf', `automake', and `libtool'), then you can change the set +of codelets that are generated or play with the generation options. +The set of generated codelets is specified by the +`{dft,rdft}/{codelets,simd}/*/Makefile.am' files. For example, you can +add efficient REDFT codelets of small sizes by modifying +`rdft/codelets/r2r/Makefile.am'. After you modify any `Makefile.am' +files, you can type `sh bootstrap.sh' in the top-level directory +followed by `make' to re-generate the files. + + We do not provide more details about the code-generation process, +since we do not expect that most users will need to generate their own +code. However, feel free to contact us at if you are +interested in the subject. + + You might find it interesting to learn Caml and/or some modern +programming techniques that we used in the generator (including monadic +programming), especially if you heard the rumor that Java and +object-oriented programming are the latest advancement in the field. +The internal operation of the codelet generator is described in the +paper, "A Fast Fourier Transform Compiler," by M. Frigo, which is +available from the FFTW home page (http://www.fftw.org) and also +appeared in the `Proceedings of the 1999 ACM SIGPLAN Conference on +Programming Language Design and Implementation (PLDI)'. + + +File: fftw3.info, Node: Acknowledgments, Next: License and Copyright, Prev: Installation and Customization, Up: Top + +11 Acknowledgments +****************** + +Matteo Frigo was supported in part by the Special Research Program SFB +F011 "AURORA" of the Austrian Science Fund FWF and by MIT Lincoln +Laboratory. For previous versions of FFTW, he was supported in part by +the Defense Advanced Research Projects Agency (DARPA), under Grants +N00014-94-1-0985 and F30602-97-1-0270, and by a Digital Equipment +Corporation Fellowship. + + Steven G. Johnson was supported in part by a Dept. of Defense NDSEG +Fellowship, an MIT Karl Taylor Compton Fellowship, and by the Materials +Research Science and Engineering Center program of the National Science +Foundation under award DMR-9400334. + + Code for the Cell Broadband Engine was graciously donated to the FFTW +project by the IBM Austin Research Lab and included in fftw-3.2. (This +code was removed in fftw-3.3.) + + Code for the MIPS paired-single SIMD support was graciously donated +to the FFTW project by CodeSourcery, Inc. + + We are grateful to Sun Microsystems Inc. for its donation of a +cluster of 9 8-processor Ultra HPC 5000 SMPs (24 Gflops peak). These +machines served as the primary platform for the development of early +versions of FFTW. + + We thank Intel Corporation for donating a four-processor Pentium Pro +machine. We thank the GNU/Linux community for giving us a decent OS to +run on that machine. + + We are thankful to the AMD corporation for donating an AMD Athlon XP +1700+ computer to the FFTW project. + + We thank the Compaq/HP testdrive program and VA Software Corporation +(SourceForge.net) for providing remote access to machines that were used +to test FFTW. + + The `genfft' suite of code generators was written using Objective +Caml, a dialect of ML. Objective Caml is a small and elegant language +developed by Xavier Leroy. The implementation is available from +`http://caml.inria.fr/' (http://caml.inria.fr/). In previous releases +of FFTW, `genfft' was written in Caml Light, by the same authors. An +even earlier implementation of `genfft' was written in Scheme, but Caml +is definitely better for this kind of application. + + FFTW uses many tools from the GNU project, including `automake', +`texinfo', and `libtool'. + + Prof. Charles E. Leiserson of MIT provided continuous support and +encouragement. This program would not exist without him. Charles also +proposed the name "codelets" for the basic FFT blocks. + + Prof. John D. Joannopoulos of MIT demonstrated continuing tolerance +of Steven's "extra-curricular" computer-science activities, as well as +remarkable creativity in working them into his grant proposals. +Steven's physics degree would not exist without him. + + Franz Franchetti wrote SIMD extensions to FFTW 2, which eventually +led to the SIMD support in FFTW 3. + + Stefan Kral wrote most of the K7 code generator distributed with FFTW +3.0.x and 3.1.x. + + Andrew Sterian contributed the Windows timing code in FFTW 2. + + Didier Miras reported a bug in the test procedure used in FFTW 1.2. +We now use a completely different test algorithm by Funda Ergun that +does not require a separate FFT program to compare against. + + Wolfgang Reimer contributed the Pentium cycle counter and a few fixes +that help portability. + + Ming-Chang Liu uncovered a well-hidden bug in the complex transforms +of FFTW 2.0 and supplied a patch to correct it. + + The FFTW FAQ was written in `bfnn' (Bizarre Format With No Name) and +formatted using the tools developed by Ian Jackson for the Linux FAQ. + + _We are especially thankful to all of our users for their continuing +support, feedback, and interest during our development of FFTW._ + + +File: fftw3.info, Node: License and Copyright, Next: Concept Index, Prev: Acknowledgments, Up: Top + +12 License and Copyright +************************ + +FFTW is Copyright (C) 2003, 2007-11 Matteo Frigo, Copyright (C) 2003, +2007-11 Massachusetts Institute of Technology. + + FFTW is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + + This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + + You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software Foundation, +Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA You +can also find the GPL on the GNU web site +(http://www.gnu.org/licenses/gpl-2.0.html). + + In addition, we kindly ask you to acknowledge FFTW and its authors in +any program or publication in which you use FFTW. (You are not +_required_ to do so; it is up to your common sense to decide whether +you want to comply with this request or not.) For general +publications, we suggest referencing: Matteo Frigo and Steven G. +Johnson, "The design and implementation of FFTW3," Proc. IEEE 93 (2), +216-231 (2005). + + Non-free versions of FFTW are available under terms different from +those of the General Public License. (e.g. they do not require you to +accompany any object code using FFTW with the corresponding source +code.) For these alternative terms you must purchase a license from +MIT's Technology Licensing Office. Users interested in such a license +should contact us () for more information. + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/fftw3.info-2 Binary file fft/fftw/fftw-3.3.4/doc/fftw3.info-2 has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/fftw3.pdf Binary file fft/fftw/fftw-3.3.4/doc/fftw3.pdf has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/fftw3.texi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/fftw3.texi Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,278 @@ +\input texinfo @c -*-texinfo-*- +@c Update by C-x C-e on: (texinfo-multiple-files-update "fftw3.texi" nil t) +@setfilename fftw3.info +@include version.texi +@settitle FFTW @value{VERSION} +@setchapternewpage odd +@c define constant index (ct) +@defcodeindex ct +@syncodeindex ct fn +@syncodeindex vr fn +@syncodeindex pg fn +@syncodeindex tp fn +@c define foreign function index (ff) +@defcodeindex ff +@syncodeindex ff cp +@c define foreign constant index (fc) +@defcodeindex fc +@syncodeindex fc cp +@c define foreign program index (fp) +@defcodeindex fp +@syncodeindex fp cp +@comment %**end of header + +@iftex +@paragraphindent 0 +@parskip=@medskipamount +@end iftex + +@c +@c The following macros are coded in a weird way: + +@c @macro FOO +@c @noindent +@c +@c @refill +@c @end macro + +@c The @noindent/@refill stuff is not necessary in texinfo up to version +@c 4, but it is a hack necessary to make texinfo-5 work. + +@c Texinfo has been stable for the first 15 years of FFTW's history. +@c Then some genius, with too much time in his hands and on a mission to +@c deliver the world from the evil of the C language, decided to rewrite +@c makeinfo in Perl, the old C version of makeinfo being, as I said, +@c evil. The official excuse for the rewrite was that now I can have my +@c manual in XML format, as if XML were a feature. + +@c The result of this stroke of genius is that texinfo-5 has different +@c rules for macro expansion than texinfo-4 does, specifically regarding +@c whether or not spaces after a macro are ignored. Texinfo-4 had weird +@c rules, but at least they were constant and internally more or less +@c consistent. Texinfo-5 has different rules, and even worse the rules +@c in texinfo-5 are inconsistent between the TeX and HTML output +@c processors. This situation makes it almost impossible for us to +@c produce a manual that works with both texinfo 4 and 5 in all modes +@c (TeX, info, and html). The @noindent/@refill hack is my best shot at +@c patching this situation. + +@c "@noindent" has two effects: First, it makes texinfo-5 believe that +@c the next "@ifinfo" is on a new line, otherwise texinfo-5 complains +@c that it is not (even though it obviously is). Second, "@noindent" is +@c a macro that eats extra space, and we want this effect because somehow +@c macro expansion in texinfo-5 inserts extra spaces that were not there +@c in texinfo-4. + +@c "@refill" stops texinfo-5 from interpreting the rest of the line after +@c a macro invocation as an argument to "@end tex". For example, in +@c "FFTW uses @Onlogn algorithms", somehow texinfo-5 thinks that +@c "algorithms" is an argument to "@end tex". "@noindent" would have the +@c same effect (as would any other macro invocation, I think), but, +@c unlike "@noindent", "@refill" does not eat spaces and does not scan +@c the rest of the input file for macro arguments. However, "@refill" is +@c deemed "obsolete" in the texinfo-5 source code, so expect this to +@c break at some point. + +@c This situation is wholly unsatisfactory, and the GNU project is +@c obviously out of control. If this nonsense persists, we will abandon +@c texinfo and produce a latex-only version of the manual. + + +@macro Onlogn +@noindent +@ifinfo +O(n log n) +@end ifinfo +@html +O(n log n) +@end html +@tex +$O(n \\log n)$ +@end tex +@refill +@end macro + +@macro ndims +@noindent +@ifinfo +n[0] x n[1] x n[2] x ... x n[d-1] +@end ifinfo +@html +n0 × n1 × n2 × … × nd-1 +@end html +@tex +$n_0 \\times n_1 \\times n_2 \\times \\cdots \\times n_{d-1}$ +@end tex +@refill +@end macro + +@macro ndimshalf +@noindent +@ifinfo +n[0] x n[1] x n[2] x ... x (n[d-1]/2 + 1) +@end ifinfo +@html +n0 × n1 × n2 × … × (nd-1/2 + 1) +@end html +@tex +$n_0 \\times n_1 \\times n_2 \\times \\cdots \\times (n_{d-1}/2 + 1)$ +@end tex +@refill +@end macro + +@macro ndimspad +@noindent +@ifinfo +n[0] x n[1] x n[2] x ... x [2 (n[d-1]/2 + 1)] +@end ifinfo +@html +n0 × n1 × n2 × … × [2 (nd-1/2 + 1)] +@end html +@tex +$n_0 \\times n_1 \\times n_2 \\times \\cdots \\times [2(n_{d-1}/2 + 1)]$ +@end tex +@refill +@end macro + +@macro twodims{d1, d2} +@noindent +@ifinfo +\d1\ x \d2\ +@end ifinfo +@html +\d1\ × \d2\ +@end html +@tex +$\d1\ \\times \d2\$ +@end tex +@refill +@end macro + +@macro threedims{d1, d2, d3} +@noindent +@ifinfo +\d1\ x \d2\ x \d3\ +@end ifinfo +@html +\d1\ × \d2\ × \d3\ +@end html +@tex +$\d1\ \\times \d2\ \\times \d3\$ +@end tex +@refill +@end macro + +@macro dimk{k} +@noindent +@ifinfo +n[\k\] +@end ifinfo +@html +n\k\ +@end html +@tex +$n_\k\$ +@end tex +@refill +@end macro + + +@macro ndimstrans +@noindent +@ifinfo +n[1] x n[0] x n[2] x ... x n[d-1] +@end ifinfo +@html +n1 × n0 × n2 ×…× nd-1 +@end html +@tex +$n_1 \\times n_0 \\times n_2 \\times \\cdots \\times n_{d-1}$ +@end tex +@refill +@end macro + +@copying +This manual is for FFTW +(version @value{VERSION}, @value{UPDATED}). + +Copyright @copyright{} 2003 Matteo Frigo. + +Copyright @copyright{} 2003 Massachusetts Institute of Technology. + +@quotation +Permission is granted to make and distribute verbatim copies of this +manual provided the copyright notice and this permission notice are +preserved on all copies. + +Permission is granted to copy and distribute modified versions of this +manual under the conditions for verbatim copying, provided that the +entire resulting derived work is distributed under the terms of a +permission notice identical to this one. + +Permission is granted to copy and distribute translations of this manual +into another language, under the above conditions for modified versions, +except that this permission notice may be stated in a translation +approved by the Free Software Foundation. +@end quotation +@end copying + +@dircategory Development +@direntry +* fftw3: (fftw3). FFTW User's Manual. +@end direntry + +@titlepage +@title FFTW +@subtitle for version @value{VERSION}, @value{UPDATED} +@author Matteo Frigo +@author Steven G. Johnson +@page +@vskip 0pt plus 1filll +@insertcopying +@end titlepage + +@contents + +@ifnottex +@node Top, Introduction, (dir), (dir) +@top FFTW User Manual +Welcome to FFTW, the Fastest Fourier Transform in the West. FFTW is a +collection of fast C routines to compute the discrete Fourier transform. +This manual documents FFTW version @value{VERSION}. +@end ifnottex + +@menu +* Introduction:: +* Tutorial:: +* Other Important Topics:: +* FFTW Reference:: +* Multi-threaded FFTW:: +* Distributed-memory FFTW with MPI:: +* Calling FFTW from Modern Fortran:: +* Calling FFTW from Legacy Fortran:: +* Upgrading from FFTW version 2:: +* Installation and Customization:: +* Acknowledgments:: +* License and Copyright:: +* Concept Index:: +* Library Index:: +@end menu + +@c ************************************************************ +@include intro.texi +@include tutorial.texi +@include other.texi +@include reference.texi +@include threads.texi +@include mpi.texi +@include modern-fortran.texi +@include legacy-fortran.texi +@include upgrading.texi +@include install.texi +@include acknowledgements.texi +@include license.texi +@include cindex.texi +@include findex.texi +@c ************************************************************ + +@bye diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/findex.texi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/findex.texi Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +@node Library Index, , Concept Index, Top +@chapter Library Index +@printindex fn diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/1d-Discrete-Hartley-Transforms-_0028DHTs_0029.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/1d-Discrete-Hartley-Transforms-_0028DHTs_0029.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,71 @@ + + +1d Discrete Hartley Transforms (DHTs) - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    4.8.5 1d Discrete Hartley Transforms (DHTs)

    + +

    The discrete Hartley transform (DHT) of a 1d real array X of size +n computes a real array Y of the same size, where: +

    .
    + +

    FFTW computes an unnormalized transform, in that there is no coefficient +in front of the summation in the DHT. In other words, applying the +transform twice (the DHT is its own inverse) will multiply the input by +n. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/1d-Real_002deven-DFTs-_0028DCTs_0029.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/1d-Real_002deven-DFTs-_0028DCTs_0029.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,127 @@ + + +1d Real-even DFTs (DCTs) - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    4.8.3 1d Real-even DFTs (DCTs)

    + +

    The Real-even symmetry DFTs in FFTW are exactly equivalent to the unnormalized +forward (and backward) DFTs as defined above, where the input array +X of length N is purely real and is also even symmetry. In +this case, the output array is likewise real and even symmetry. + + +

    For the case of REDFT00, this even symmetry means that +Xj = XN-j,where we take X to be periodic so that +XN = X0. Because of this redundancy, only the first n real numbers are +actually stored, where N = 2(n-1). + +

    The proper definition of even symmetry for REDFT10, +REDFT01, and REDFT11 transforms is somewhat more intricate +because of the shifts by 1/2 of the input and/or output, although +the corresponding boundary conditions are given in Real even/odd DFTs (cosine/sine transforms). Because of the even symmetry, however, +the sine terms in the DFT all cancel and the remaining cosine terms are +written explicitly below. This formulation often leads people to call +such a transform a discrete cosine transform (DCT), although it is +really just a special case of the DFT. + + +

    In each of the definitions below, we transform a real array X of +length n to a real array Y of length n: + +

    REDFT00 (DCT-I)
    + +

    An REDFT00 transform (type-I DCT) in FFTW is defined by: +

    .
    Note that this transform is not defined for n=1. For n=2, +the summation term above is dropped as you might expect. + +
    REDFT10 (DCT-II)
    + +

    An REDFT10 transform (type-II DCT, sometimes called “the” DCT) in FFTW is defined by: +

    .
    + +
    REDFT01 (DCT-III)
    + +

    An REDFT01 transform (type-III DCT) in FFTW is defined by: +

    .
    In the case of n=1, this reduces to +Y0 = X0. Up to a scale factor (see below), this is the inverse of REDFT10 (“the” DCT), and so the REDFT01 (DCT-III) is sometimes called the “IDCT”. + + +
    REDFT11 (DCT-IV)
    + +

    An REDFT11 transform (type-IV DCT) in FFTW is defined by: +

    .
    + +
    Inverses and Normalization
    + +

    These definitions correspond directly to the unnormalized DFTs used +elsewhere in FFTW (hence the factors of 2 in front of the +summations). The unnormalized inverse of REDFT00 is +REDFT00, of REDFT10 is REDFT01 and vice versa, and +of REDFT11 is REDFT11. Each unnormalized inverse results +in the original array multiplied by N, where N is the +logical DFT size. For REDFT00, N=2(n-1) (note that +n=1 is not defined); otherwise, N=2n. + + +

    In defining the discrete cosine transform, some authors also include +additional factors of +√2(or its inverse) multiplying selected inputs and/or outputs. This is a +mostly cosmetic change that makes the transform orthogonal, but +sacrifices the direct equivalence to a symmetric DFT. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/1d-Real_002dodd-DFTs-_0028DSTs_0029.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/1d-Real_002dodd-DFTs-_0028DSTs_0029.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,126 @@ + + +1d Real-odd DFTs (DSTs) - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    4.8.4 1d Real-odd DFTs (DSTs)

    + +

    The Real-odd symmetry DFTs in FFTW are exactly equivalent to the unnormalized +forward (and backward) DFTs as defined above, where the input array +X of length N is purely real and is also odd symmetry. In +this case, the output is odd symmetry and purely imaginary. + + +

    For the case of RODFT00, this odd symmetry means that +Xj = -XN-j,where we take X to be periodic so that +XN = X0. Because of this redundancy, only the first n real numbers +starting at j=1 are actually stored (the j=0 element is +zero), where N = 2(n+1). + +

    The proper definition of odd symmetry for RODFT10, +RODFT01, and RODFT11 transforms is somewhat more intricate +because of the shifts by 1/2 of the input and/or output, although +the corresponding boundary conditions are given in Real even/odd DFTs (cosine/sine transforms). Because of the odd symmetry, however, +the cosine terms in the DFT all cancel and the remaining sine terms are +written explicitly below. This formulation often leads people to call +such a transform a discrete sine transform (DST), although it is +really just a special case of the DFT. + + +

    In each of the definitions below, we transform a real array X of +length n to a real array Y of length n: + +

    RODFT00 (DST-I)
    + +

    An RODFT00 transform (type-I DST) in FFTW is defined by: +

    .
    + +
    RODFT10 (DST-II)
    + +

    An RODFT10 transform (type-II DST) in FFTW is defined by: +

    .
    + +
    RODFT01 (DST-III)
    + +

    An RODFT01 transform (type-III DST) in FFTW is defined by: +

    .
    In the case of n=1, this reduces to +Y0 = X0. + +
    RODFT11 (DST-IV)
    + +

    An RODFT11 transform (type-IV DST) in FFTW is defined by: +

    .
    + +
    Inverses and Normalization
    + +

    These definitions correspond directly to the unnormalized DFTs used +elsewhere in FFTW (hence the factors of 2 in front of the +summations). The unnormalized inverse of RODFT00 is +RODFT00, of RODFT10 is RODFT01 and vice versa, and +of RODFT11 is RODFT11. Each unnormalized inverse results +in the original array multiplied by N, where N is the +logical DFT size. For RODFT00, N=2(n+1); +otherwise, N=2n. + + +

    In defining the discrete sine transform, some authors also include +additional factors of +√2(or its inverse) multiplying selected inputs and/or outputs. This is a +mostly cosmetic change that makes the transform orthogonal, but +sacrifices the direct equivalence to an antisymmetric DFT. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/2d-MPI-example.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/2d-MPI-example.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,154 @@ + + +2d MPI example - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    6.3 2d MPI example

    + +

    Before we document the FFTW MPI interface in detail, we begin with a +simple example outlining how one would perform a two-dimensional +N0 by N1 complex DFT. + +

         #include <fftw3-mpi.h>
    +     
    +     int main(int argc, char **argv)
    +     {
    +         const ptrdiff_t N0 = ..., N1 = ...;
    +         fftw_plan plan;
    +         fftw_complex *data;
    +         ptrdiff_t alloc_local, local_n0, local_0_start, i, j;
    +     
    +         MPI_Init(&argc, &argv);
    +         fftw_mpi_init();
    +     
    +         /* get local data size and allocate */
    +         alloc_local = fftw_mpi_local_size_2d(N0, N1, MPI_COMM_WORLD,
    +                                              &local_n0, &local_0_start);
    +         data = fftw_alloc_complex(alloc_local);
    +     
    +         /* create plan for in-place forward DFT */
    +         plan = fftw_mpi_plan_dft_2d(N0, N1, data, data, MPI_COMM_WORLD,
    +                                     FFTW_FORWARD, FFTW_ESTIMATE);
    +     
    +         /* initialize data to some function my_function(x,y) */
    +         for (i = 0; i < local_n0; ++i) for (j = 0; j < N1; ++j)
    +            data[i*N1 + j] = my_function(local_0_start + i, j);
    +     
    +         /* compute transforms, in-place, as many times as desired */
    +         fftw_execute(plan);
    +     
    +         fftw_destroy_plan(plan);
    +     
    +         MPI_Finalize();
    +     }
    +
    +

    As can be seen above, the MPI interface follows the same basic style +of allocate/plan/execute/destroy as the serial FFTW routines. All of +the MPI-specific routines are prefixed with ‘fftw_mpi_’ instead +of ‘fftw_’. There are a few important differences, however: + +

    First, we must call fftw_mpi_init() after calling +MPI_Init (required in all MPI programs) and before calling any +other ‘fftw_mpi_’ routine. + + +

    Second, when we create the plan with fftw_mpi_plan_dft_2d, +analogous to fftw_plan_dft_2d, we pass an additional argument: +the communicator, indicating which processes will participate in the +transform (here MPI_COMM_WORLD, indicating all processes). +Whenever you create, execute, or destroy a plan for an MPI transform, +you must call the corresponding FFTW routine on all processes +in the communicator for that transform. (That is, these are +collective calls.) Note that the plan for the MPI transform +uses the standard fftw_execute and fftw_destroy routines +(on the other hand, there are MPI-specific new-array execute functions +documented below). + + +

    Third, all of the FFTW MPI routines take ptrdiff_t arguments +instead of int as for the serial FFTW. ptrdiff_t is a +standard C integer type which is (at least) 32 bits wide on a 32-bit +machine and 64 bits wide on a 64-bit machine. This is to make it easy +to specify very large parallel transforms on a 64-bit machine. (You +can specify 64-bit transform sizes in the serial FFTW, too, but only +by using the ‘guru64’ planner interface. See 64-bit Guru Interface.) + + +

    Fourth, and most importantly, you don't allocate the entire +two-dimensional array on each process. Instead, you call +fftw_mpi_local_size_2d to find out what portion of the +array resides on each processor, and how much space to allocate. +Here, the portion of the array on each process is a local_n0 by +N1 slice of the total array, starting at index +local_0_start. The total number of fftw_complex numbers +to allocate is given by the alloc_local return value, which +may be greater than local_n0 * N1 (in case some +intermediate calculations require additional storage). The data +distribution in FFTW's MPI interface is described in more detail by +the next section. + + +

    Given the portion of the array that resides on the local process, it +is straightforward to initialize the data (here to a function +myfunction) and otherwise manipulate it. Of course, at the end +of the program you may want to output the data somehow, but +synchronizing this output is up to you and is beyond the scope of this +manual. (One good way to output a large multi-dimensional distributed +array in MPI to a portable binary file is to use the free HDF5 +library; see the HDF home page.) + + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/64_002dbit-Guru-Interface.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/64_002dbit-Guru-Interface.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,113 @@ + + +64-bit Guru Interface - FFTW 3.3.4 + + + + + + + + + + + + +

    + + +

    +Previous: Guru Real-to-real Transforms, +Up: Guru Interface +


    +
    + +

    4.5.6 64-bit Guru Interface

    + +

    +When compiled in 64-bit mode on a 64-bit architecture (where addresses +are 64 bits wide), FFTW uses 64-bit quantities internally for all +transform sizes, strides, and so on—you don't have to do anything +special to exploit this. However, in the ordinary FFTW interfaces, +you specify the transform size by an int quantity, which is +normally only 32 bits wide. This means that, even though FFTW is +using 64-bit sizes internally, you cannot specify a single transform +dimension larger than +231−1numbers. + +

    We expect that few users will require transforms larger than this, but, +for those who do, we provide a 64-bit version of the guru interface in +which all sizes are specified as integers of type ptrdiff_t +instead of int. (ptrdiff_t is a signed integer type +defined by the C standard to be wide enough to represent address +differences, and thus must be at least 64 bits wide on a 64-bit +machine.) We stress that there is no performance advantage to +using this interface—the same internal FFTW code is employed +regardless—and it is only necessary if you want to specify very +large transform sizes. + + +

    In particular, the 64-bit guru interface is a set of planner routines +that are exactly the same as the guru planner routines, except that +they are named with ‘guru64’ instead of ‘guru’ and they take +arguments of type fftw_iodim64 instead of fftw_iodim. +For example, instead of fftw_plan_guru_dft, we have +fftw_plan_guru64_dft. + +

         fftw_plan fftw_plan_guru64_dft(
    +          int rank, const fftw_iodim64 *dims,
    +          int howmany_rank, const fftw_iodim64 *howmany_dims,
    +          fftw_complex *in, fftw_complex *out,
    +          int sign, unsigned flags);
    +
    +

    +The fftw_iodim64 type is similar to fftw_iodim, with the +same interpretation, except that it uses type ptrdiff_t instead +of type int. + +

         typedef struct {
    +          ptrdiff_t n;
    +          ptrdiff_t is;
    +          ptrdiff_t os;
    +     } fftw_iodim64;
    +
    +

    +Every other ‘fftw_plan_guru’ function also has a +‘fftw_plan_guru64’ equivalent, but we do not repeat their +documentation here since they are identical to the 32-bit versions +except as noted above. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Accessing-the-wisdom-API-from-Fortran.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Accessing-the-wisdom-API-from-Fortran.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,75 @@ + + +Accessing the wisdom API from Fortran - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    7.6 Accessing the wisdom API from Fortran

    + +

    +As explained in Words of Wisdom-Saving Plans, FFTW provides a +“wisdom” API for saving plans to disk so that they can be recreated +quickly. The C API for exporting (see Wisdom Export) and +importing (see Wisdom Import) wisdom is somewhat tricky to use +from Fortran, however, because of differences in file I/O and string +types between C and Fortran. + +

    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Acknowledgments.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Acknowledgments.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,142 @@ + + +Acknowledgments - FFTW 3.3.4 + + + + + + + + + + + + +
    + +

    +Next: , +Previous: Installation and Customization, +Up: Top +


    +
    + +

    11 Acknowledgments

    + +

    Matteo Frigo was supported in part by the Special Research Program SFB +F011 “AURORA” of the Austrian Science Fund FWF and by MIT Lincoln +Laboratory. For previous versions of FFTW, he was supported in part by the +Defense Advanced Research Projects Agency (DARPA), under Grants +N00014-94-1-0985 and F30602-97-1-0270, and by a Digital Equipment +Corporation Fellowship. + +

    Steven G. Johnson was supported in part by a Dept. of Defense NDSEG +Fellowship, an MIT Karl Taylor Compton Fellowship, and by the Materials +Research Science and Engineering Center program of the National Science +Foundation under award DMR-9400334. + +

    Code for the Cell Broadband Engine was graciously donated to the FFTW +project by the IBM Austin Research Lab and included in fftw-3.2. (This +code was removed in fftw-3.3.) + +

    Code for the MIPS paired-single SIMD support was graciously donated to +the FFTW project by CodeSourcery, Inc. + +

    We are grateful to Sun Microsystems Inc. for its donation of a +cluster of 9 8-processor Ultra HPC 5000 SMPs (24 Gflops peak). These +machines served as the primary platform for the development of early +versions of FFTW. + +

    We thank Intel Corporation for donating a four-processor Pentium Pro +machine. We thank the GNU/Linux community for giving us a decent OS to +run on that machine. + +

    We are thankful to the AMD corporation for donating an AMD Athlon XP 1700+ +computer to the FFTW project. + +

    We thank the Compaq/HP testdrive program and VA Software Corporation +(SourceForge.net) for providing remote access to machines that were used +to test FFTW. + +

    The genfft suite of code generators was written using Objective +Caml, a dialect of ML. Objective Caml is a small and elegant language +developed by Xavier Leroy. The implementation is available from +http://caml.inria.fr/. In previous +releases of FFTW, genfft was written in Caml Light, by the same +authors. An even earlier implementation of genfft was written in +Scheme, but Caml is definitely better for this kind of application. + + +

    FFTW uses many tools from the GNU project, including automake, +texinfo, and libtool. + +

    Prof. Charles E. Leiserson of MIT provided continuous support and +encouragement. This program would not exist without him. Charles also +proposed the name “codelets” for the basic FFT blocks. + + +

    Prof. John D. Joannopoulos of MIT demonstrated continuing tolerance of +Steven's “extra-curricular” computer-science activities, as well as +remarkable creativity in working them into his grant proposals. +Steven's physics degree would not exist without him. + +

    Franz Franchetti wrote SIMD extensions to FFTW 2, which eventually +led to the SIMD support in FFTW 3. + +

    Stefan Kral wrote most of the K7 code generator distributed with FFTW +3.0.x and 3.1.x. + +

    Andrew Sterian contributed the Windows timing code in FFTW 2. + +

    Didier Miras reported a bug in the test procedure used in FFTW 1.2. We +now use a completely different test algorithm by Funda Ergun that does +not require a separate FFT program to compare against. + +

    Wolfgang Reimer contributed the Pentium cycle counter and a few fixes +that help portability. + +

    Ming-Chang Liu uncovered a well-hidden bug in the complex transforms of +FFTW 2.0 and supplied a patch to correct it. + +

    The FFTW FAQ was written in bfnn (Bizarre Format With No Name) +and formatted using the tools developed by Ian Jackson for the Linux +FAQ. + +

    We are especially thankful to all of our users for their +continuing support, feedback, and interest during our development of +FFTW. + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Advanced-Complex-DFTs.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Advanced-Complex-DFTs.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,149 @@ + + +Advanced Complex DFTs - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    4.4.1 Advanced Complex DFTs

    + +
         fftw_plan fftw_plan_many_dft(int rank, const int *n, int howmany,
    +                                  fftw_complex *in, const int *inembed,
    +                                  int istride, int idist,
    +                                  fftw_complex *out, const int *onembed,
    +                                  int ostride, int odist,
    +                                  int sign, unsigned flags);
    +
    +

    +This routine plans multiple multidimensional complex DFTs, and it +extends the fftw_plan_dft routine (see Complex DFTs) to +compute howmany transforms, each having rank rank and size +n. In addition, the transform data need not be contiguous, but +it may be laid out in memory with an arbitrary stride. To account for +these possibilities, fftw_plan_many_dft adds the new parameters +howmany, {i,o}nembed, +{i,o}stride, and +{i,o}dist. The FFTW basic interface +(see Complex DFTs) provides routines specialized for ranks 1, 2, +and 3, but the advanced interface handles only the general-rank +case. + +

    howmany is the number of transforms to compute. The resulting +plan computes howmany transforms, where the input of the +k-th transform is at location in+k*idist (in C pointer +arithmetic), and its output is at location out+k*odist. Plans +obtained in this way can often be faster than calling FFTW multiple +times for the individual transforms. The basic fftw_plan_dft +interface corresponds to howmany=1 (in which case the dist +parameters are ignored). + + +

    Each of the howmany transforms has rank rank and size +n, as in the basic interface. In addition, the advanced +interface allows the input and output arrays of each transform to be +row-major subarrays of larger rank-rank arrays, described by +inembed and onembed parameters, respectively. +{i,o}nembed must be arrays of length rank, +and n should be elementwise less than or equal to +{i,o}nembed. Passing NULL for an +nembed parameter is equivalent to passing n (i.e. same +physical and logical dimensions, as in the basic interface.) + +

    The stride parameters indicate that the j-th element of +the input or output arrays is located at j*istride or +j*ostride, respectively. (For a multi-dimensional array, +j is the ordinary row-major index.) When combined with the +k-th transform in a howmany loop, from above, this means +that the (j,k)-th element is at j*stride+k*dist. +(The basic fftw_plan_dft interface corresponds to a stride of 1.) + + +

    For in-place transforms, the input and output stride and +dist parameters should be the same; otherwise, the planner may +return NULL. + +

    Arrays n, inembed, and onembed are not used after +this function returns. You can safely free or reuse them. + +

    Examples: +One transform of one 5 by 6 array contiguous in memory: +

            int rank = 2;
    +        int n[] = {5, 6};
    +        int howmany = 1;
    +        int idist = odist = 0; /* unused because howmany = 1 */
    +        int istride = ostride = 1; /* array is contiguous in memory */
    +        int *inembed = n, *onembed = n;
    +
    +

    Transform of three 5 by 6 arrays, each contiguous in memory, +stored in memory one after another: +

            int rank = 2;
    +        int n[] = {5, 6};
    +        int howmany = 3;
    +        int idist = odist = n[0]*n[1]; /* = 30, the distance in memory
    +                                          between the first element
    +                                          of the first array and the
    +                                          first element of the second array */
    +        int istride = ostride = 1; /* array is contiguous in memory */
    +        int *inembed = n, *onembed = n;
    +
    +

    Transform each column of a 2d array with 10 rows and 3 columns: +

            int rank = 1; /* not 2: we are computing 1d transforms */
    +        int n[] = {10}; /* 1d transforms of length 10 */
    +        int howmany = 3;
    +        int idist = odist = 1;
    +        int istride = ostride = 3; /* distance between two elements in
    +                                      the same column */
    +        int *inembed = n, *onembed = n;
    +
    + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Advanced-Interface.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Advanced-Interface.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,76 @@ + + +Advanced Interface - FFTW 3.3.4 + + + + + + + + + + + + + +
    + +

    +Next: , +Previous: Basic Interface, +Up: FFTW Reference +


    +
    + +

    4.4 Advanced Interface

    + +

    +FFTW's “advanced” interface supplements the basic interface with four +new planner routines, providing a new level of flexibility: you can plan +a transform of multiple arrays simultaneously, operate on non-contiguous +(strided) data, and transform a subset of a larger multi-dimensional +array. Other than these additional features, the planner operates in +the same fashion as in the basic interface, and the resulting +fftw_plan is used in the same way (see Using Plans). + +

    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Advanced-Real_002ddata-DFTs.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Advanced-Real_002ddata-DFTs.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,103 @@ + + +Advanced Real-data DFTs - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    4.4.2 Advanced Real-data DFTs

    + +
         fftw_plan fftw_plan_many_dft_r2c(int rank, const int *n, int howmany,
    +                                      double *in, const int *inembed,
    +                                      int istride, int idist,
    +                                      fftw_complex *out, const int *onembed,
    +                                      int ostride, int odist,
    +                                      unsigned flags);
    +     fftw_plan fftw_plan_many_dft_c2r(int rank, const int *n, int howmany,
    +                                      fftw_complex *in, const int *inembed,
    +                                      int istride, int idist,
    +                                      double *out, const int *onembed,
    +                                      int ostride, int odist,
    +                                      unsigned flags);
    +
    +

    +Like fftw_plan_many_dft, these two functions add howmany, +nembed, stride, and dist parameters to the +fftw_plan_dft_r2c and fftw_plan_dft_c2r functions, but +otherwise behave the same as the basic interface. + +

    The interpretation of howmany, stride, and dist are +the same as for fftw_plan_many_dft, above. Note that the +stride and dist for the real array are in units of +double, and for the complex array are in units of +fftw_complex. + +

    If an nembed parameter is NULL, it is interpreted as what +it would be in the basic interface, as described in Real-data DFT Array Format. That is, for the complex array the size is assumed to be +the same as n, but with the last dimension cut roughly in half. +For the real array, the size is assumed to be n if the transform +is out-of-place, or n with the last dimension “padded” if the +transform is in-place. + +

    If an nembed parameter is non-NULL, it is interpreted as +the physical size of the corresponding array, in row-major order, just +as for fftw_plan_many_dft. In this case, each dimension of +nembed should be >= what it would be in the basic +interface (e.g. the halved or padded n). + +

    Arrays n, inembed, and onembed are not used after +this function returns. You can safely free or reuse them. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Advanced-Real_002dto_002dreal-Transforms.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Advanced-Real_002dto_002dreal-Transforms.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,79 @@ + + +Advanced Real-to-real Transforms - FFTW 3.3.4 + + + + + + + + + + + + +

    + + +

    +Previous: Advanced Real-data DFTs, +Up: Advanced Interface +


    +
    + +

    4.4.3 Advanced Real-to-real Transforms

    + +
         fftw_plan fftw_plan_many_r2r(int rank, const int *n, int howmany,
    +                                  double *in, const int *inembed,
    +                                  int istride, int idist,
    +                                  double *out, const int *onembed,
    +                                  int ostride, int odist,
    +                                  const fftw_r2r_kind *kind, unsigned flags);
    +
    +

    +Like fftw_plan_many_dft, this functions adds howmany, +nembed, stride, and dist parameters to the +fftw_plan_r2r function, but otherwise behave the same as the +basic interface. The interpretation of those additional parameters are +the same as for fftw_plan_many_dft. (Of course, the +stride and dist parameters are now in units of +double, not fftw_complex.) + +

    Arrays n, inembed, onembed, and kind are not +used after this function returns. You can safely free or reuse them. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Advanced-distributed_002dtranspose-interface.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Advanced-distributed_002dtranspose-interface.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,82 @@ + + +Advanced distributed-transpose interface - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    6.7.2 Advanced distributed-transpose interface

    + +

    The above routines are for a transpose of a matrix of numbers (of type +double), using FFTW's default block sizes. More generally, one +can perform transposes of tuples of numbers, with +user-specified block sizes for the input and output: + +

         fftw_plan fftw_mpi_plan_many_transpose
    +                     (ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t howmany,
    +                      ptrdiff_t block0, ptrdiff_t block1,
    +                      double *in, double *out, MPI_Comm comm, unsigned flags);
    +
    +

    +In this case, one is transposing an n0 by n1 matrix of +howmany-tuples (e.g. howmany = 2 for complex numbers). +The input is distributed along the n0 dimension with block size +block0, and the n1 by n0 output is distributed +along the n1 dimension with block size block1. If +FFTW_MPI_DEFAULT_BLOCK (0) is passed for a block size then FFTW +uses its default block size. To get the local size of the data on +each process, you should then call fftw_mpi_local_size_many_transposed. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Allocating-aligned-memory-in-Fortran.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Allocating-aligned-memory-in-Fortran.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,129 @@ + + +Allocating aligned memory in Fortran - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    7.5 Allocating aligned memory in Fortran

    + +

    In order to obtain maximum performance in FFTW, you should store your +data in arrays that have been specially aligned in memory (see SIMD alignment and fftw_malloc). Enforcing alignment also permits you to +safely use the new-array execute functions (see New-array Execute Functions) to apply a given plan to more than one pair of in/out +arrays. Unfortunately, standard Fortran arrays do not provide +any alignment guarantees. The only way to allocate aligned +memory in standard Fortran is to allocate it with an external C +function, like the fftw_alloc_real and +fftw_alloc_complex functions. Fortunately, Fortran 2003 provides +a simple way to associate such allocated memory with a standard Fortran +array pointer that you can then use normally. + +

    We therefore recommend allocating all your input/output arrays using +the following technique: + +

      + +
    1. Declare a pointer, arr, to your array of the desired type +and dimensions. For example, real(C_DOUBLE), pointer :: a(:,:) +for a 2d real array, or complex(C_DOUBLE_COMPLEX), pointer :: +a(:,:,:) for a 3d complex array. + +
    2. The number of elements to allocate must be an +integer(C_SIZE_T). You can either declare a variable of this +type, e.g. integer(C_SIZE_T) :: sz, to store the number of +elements to allocate, or you can use the int(..., C_SIZE_T) +intrinsic function. e.g. set sz = L * M * N or use +int(L * M * N, C_SIZE_T) for an L × M × N array. + +
    3. Declare a type(C_PTR) :: p to hold the return value from +FFTW's allocation routine. Set p = fftw_alloc_real(sz) for a real array, or p = fftw_alloc_complex(sz) for a complex array. + +
    4. Associate your pointer arr with the allocated memory p +using the standard c_f_pointer subroutine: call +c_f_pointer(p, arr, [...dimensions...]), where +[...dimensions...]) are an array of the dimensions of the array +(in the usual Fortran order). e.g. call c_f_pointer(p, arr, +[L,M,N]) for an L × M × N array. (Alternatively, you can +omit the dimensions argument if you specified the shape explicitly +when declaring arr.) You can now use arr as a usual +multidimensional array. + +
    5. When you are done using the array, deallocate the memory by call +fftw_free(p) on p. + +
    + +

    For example, here is how we would allocate an L × M 2d real array: + +

           real(C_DOUBLE), pointer :: arr(:,:)
    +       type(C_PTR) :: p
    +       p = fftw_alloc_real(int(L * M, C_SIZE_T))
    +       call c_f_pointer(p, arr, [L,M])
    +       ...use arr and arr(i,j) as usual...
    +       call fftw_free(p)
    +
    +

    and here is an L × M × N 3d complex array: + +

           complex(C_DOUBLE_COMPLEX), pointer :: arr(:,:,:)
    +       type(C_PTR) :: p
    +       p = fftw_alloc_complex(int(L * M * N, C_SIZE_T))
    +       call c_f_pointer(p, arr, [L,M,N])
    +       ...use arr and arr(i,j,k) as usual...
    +       call fftw_free(p)
    +
    +

    See Reversing array dimensions for an example allocating a +single array and associating both real and complex array pointers with +it, for in-place real-to-complex transforms. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/An-improved-replacement-for-MPI_005fAlltoall.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/An-improved-replacement-for-MPI_005fAlltoall.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,104 @@ + + +An improved replacement for MPI_Alltoall - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    6.7.3 An improved replacement for MPI_Alltoall

    + +

    We close this section by noting that FFTW's MPI transpose routines can +be thought of as a generalization for the MPI_Alltoall function +(albeit only for floating-point types), and in some circumstances can +function as an improved replacement. + + +

    MPI_Alltoall is defined by the MPI standard as: + +

         int MPI_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype,
    +                      void *recvbuf, int recvcnt, MPI_Datatype recvtype,
    +                      MPI_Comm comm);
    +
    +

    In particular, for double* arrays in and out, +consider the call: + +

         MPI_Alltoall(in, howmany, MPI_DOUBLE, out, howmany MPI_DOUBLE, comm);
    +
    +

    This is completely equivalent to: + +

         MPI_Comm_size(comm, &P);
    +     plan = fftw_mpi_plan_many_transpose(P, P, howmany, 1, 1, in, out, comm, FFTW_ESTIMATE);
    +     fftw_execute(plan);
    +     fftw_destroy_plan(plan);
    +
    +

    That is, computing a P × P transpose on P processes, +with a block size of 1, is just a standard all-to-all communication. + +

    However, using the FFTW routine instead of MPI_Alltoall may +have certain advantages. First of all, FFTW's routine can operate +in-place (in == out) whereas MPI_Alltoall can only +operate out-of-place. + + +

    Second, even for out-of-place plans, FFTW's routine may be faster, +especially if you need to perform the all-to-all communication many +times and can afford to use FFTW_MEASURE or +FFTW_PATIENT. It should certainly be no slower, not including +the time to create the plan, since one of the possible algorithms that +FFTW uses for an out-of-place transpose is simply to call +MPI_Alltoall. However, FFTW also considers several other +possible algorithms that, depending on your MPI implementation and +your hardware, may be faster. + + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Avoiding-MPI-Deadlocks.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Avoiding-MPI-Deadlocks.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,80 @@ + + +Avoiding MPI Deadlocks - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    6.9 Avoiding MPI Deadlocks

    + +

    +An MPI program can deadlock if one process is waiting for a +message from another process that never gets sent. To avoid deadlocks +when using FFTW's MPI routines, it is important to know which +functions are collective: that is, which functions must +always be called in the same order from every +process in a given communicator. (For example, MPI_Barrier is +the canonical example of a collective function in the MPI standard.) + + +

    The functions in FFTW that are always collective are: every +function beginning with ‘fftw_mpi_plan’, as well as +fftw_mpi_broadcast_wisdom and fftw_mpi_gather_wisdom. +Also, the following functions from the ordinary FFTW interface are +collective when they are applied to a plan created by an +‘fftw_mpi_plan’ function: fftw_execute, +fftw_destroy_plan, and fftw_flops. + + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Basic-Interface.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Basic-Interface.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,85 @@ + + +Basic Interface - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    +Next: , +Previous: Using Plans, +Up: FFTW Reference +


    +
    + +

    4.3 Basic Interface

    + +

    +Recall that the FFTW API is divided into three parts1: the basic interface +computes a single transform of contiguous data, the advanced +interface computes transforms of multiple or strided arrays, and the +guru interface supports the most general data layouts, +multiplicities, and strides. This section describes the the basic +interface, which we expect to satisfy the needs of most users. + +

    + + +
    +
    +

    Footnotes

    [1] Gallia est +omnis divisa in partes tres (Julius Caesar).

    + +
    + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Basic-and-advanced-distribution-interfaces.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Basic-and-advanced-distribution-interfaces.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,148 @@ + + +Basic and advanced distribution interfaces - FFTW 3.3.4 + + + + + + + + + + + + + +
    + +

    +Next: , +Previous: MPI Data Distribution, +Up: MPI Data Distribution +


    +
    + +

    6.4.1 Basic and advanced distribution interfaces

    + +

    As with the planner interface, the ‘fftw_mpi_local_size’ +distribution interface is broken into basic and advanced +(‘_many’) interfaces, where the latter allows you to specify the +block size manually and also to request block sizes when computing +multiple transforms simultaneously. These functions are documented +more exhaustively by the FFTW MPI Reference, but we summarize the +basic ideas here using a couple of two-dimensional examples. + +

    For the 100 × 200 complex-DFT example, above, we would find +the distribution by calling the following function in the basic +interface: + +

         ptrdiff_t fftw_mpi_local_size_2d(ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm,
    +                                      ptrdiff_t *local_n0, ptrdiff_t *local_0_start);
    +
    +

    +Given the total size of the data to be transformed (here, n0 = +100 and n1 = 200) and an MPI communicator (comm), this +function provides three numbers. + +

    First, it describes the shape of the local data: the current process +should store a local_n0 by n1 slice of the overall +dataset, in row-major order (n1 dimension contiguous), starting +at index local_0_start. That is, if the total dataset is +viewed as a n0 by n1 matrix, the current process should +store the rows local_0_start to +local_0_start+local_n0-1. Obviously, if you are running with +only a single MPI process, that process will store the entire array: +local_0_start will be zero and local_n0 will be +n0. See Row-major Format. + + +

    Second, the return value is the total number of data elements (e.g., +complex numbers for a complex DFT) that should be allocated for the +input and output arrays on the current process (ideally with +fftw_malloc or an ‘fftw_alloc’ function, to ensure optimal +alignment). It might seem that this should always be equal to +local_n0 * n1, but this is not the case. FFTW's +distributed FFT algorithms require data redistributions at +intermediate stages of the transform, and in some circumstances this +may require slightly larger local storage. This is discussed in more +detail below, under Load balancing. + + +

    The advanced-interface ‘local_size’ function for multidimensional +transforms returns the same three things (local_n0, +local_0_start, and the total number of elements to allocate), +but takes more inputs: + +

         ptrdiff_t fftw_mpi_local_size_many(int rnk, const ptrdiff_t *n,
    +                                        ptrdiff_t howmany,
    +                                        ptrdiff_t block0,
    +                                        MPI_Comm comm,
    +                                        ptrdiff_t *local_n0,
    +                                        ptrdiff_t *local_0_start);
    +
    +

    +The two-dimensional case above corresponds to rnk = 2 and an +array n of length 2 with n[0] = n0 and n[1] = n1. +This routine is for any rnk > 1; one-dimensional transforms +have their own interface because they work slightly differently, as +discussed below. + +

    First, the advanced interface allows you to perform multiple +transforms at once, of interleaved data, as specified by the +howmany parameter. (hoamany is 1 for a single +transform.) + +

    Second, here you can specify your desired block size in the n0 +dimension, block0. To use FFTW's default block size, pass +FFTW_MPI_DEFAULT_BLOCK (0) for block0. Otherwise, on +P processes, FFTW will return local_n0 equal to +block0 on the first P / block0 processes (rounded down), +return local_n0 equal to n0 - block0 * (P / block0) on +the next process, and local_n0 equal to zero on any remaining +processes. In general, we recommend using the default block size +(which corresponds to n0 / P, rounded up). + + +

    For example, suppose you have P = 4 processes and n0 = +21. The default will be a block size of 6, which will give +local_n0 = 6 on the first three processes and local_n0 = +3 on the last process. Instead, however, you could specify +block0 = 5 if you wanted, which would give local_n0 = 5 +on processes 0 to 2, local_n0 = 6 on process 3. (This choice, +while it may look superficially more “balanced,” has the same +critical path as FFTW's default but requires more communications.) + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Basic-distributed_002dtranspose-interface.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Basic-distributed_002dtranspose-interface.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,104 @@ + + +Basic distributed-transpose interface - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    6.7.1 Basic distributed-transpose interface

    + +

    In particular, suppose that we have an n0 by n1 array in +row-major order, block-distributed across the n0 dimension. To +transpose this into an n1 by n0 array block-distributed +across the n1 dimension, we would create a plan by calling the +following function: + +

         fftw_plan fftw_mpi_plan_transpose(ptrdiff_t n0, ptrdiff_t n1,
    +                                       double *in, double *out,
    +                                       MPI_Comm comm, unsigned flags);
    +
    +

    +The input and output arrays (in and out) can be the +same. The transpose is actually executed by calling +fftw_execute on the plan, as usual. + + +

    The flags are the usual FFTW planner flags, but support +two additional flags: FFTW_MPI_TRANSPOSED_OUT and/or +FFTW_MPI_TRANSPOSED_IN. What these flags indicate, for +transpose plans, is that the output and/or input, respectively, are +locally transposed. That is, on each process input data is +normally stored as a local_n0 by n1 array in row-major +order, but for an FFTW_MPI_TRANSPOSED_IN plan the input data is +stored as n1 by local_n0 in row-major order. Similarly, +FFTW_MPI_TRANSPOSED_OUT means that the output is n0 by +local_n1 instead of local_n1 by n0. + + +

    To determine the local size of the array on each process before and +after the transpose, as well as the amount of storage that must be +allocated, one should call fftw_mpi_local_size_2d_transposed, +just as for a 2d DFT as described in the previous section: + +

         ptrdiff_t fftw_mpi_local_size_2d_transposed
    +                     (ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm,
    +                      ptrdiff_t *local_n0, ptrdiff_t *local_0_start,
    +                      ptrdiff_t *local_n1, ptrdiff_t *local_1_start);
    +
    +

    +Again, the return value is the local storage to allocate, which in +this case is the number of real (double) values rather +than complex numbers as in the previous examples. + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Calling-FFTW-from-Legacy-Fortran.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Calling-FFTW-from-Legacy-Fortran.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,92 @@ + + +Calling FFTW from Legacy Fortran - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    8 Calling FFTW from Legacy Fortran

    + +

    +This chapter describes the interface to FFTW callable by Fortran code +in older compilers not supporting the Fortran 2003 C interoperability +features (see Calling FFTW from Modern Fortran). This interface +has the major disadvantage that it is not type-checked, so if you +mistake the argument types or ordering then your program will not have +any compiler errors, and will likely crash at runtime. So, greater +care is needed. Also, technically interfacing older Fortran versions +to C is nonstandard, but in practice we have found that the techniques +used in this chapter have worked with all known Fortran compilers for +many years. + +

    The legacy Fortran interface differs from the C interface only in the +prefix (‘dfftw_’ instead of ‘fftw_’ in double precision) and +a few other minor details. This Fortran interface is included in the +FFTW libraries by default, unless a Fortran compiler isn't found on +your system or --disable-fortran is included in the +configure flags. We assume here that the reader is already +familiar with the usage of FFTW in C, as described elsewhere in this +manual. + +

    The MPI parallel interface to FFTW is not currently available +to legacy Fortran. + +

    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Calling-FFTW-from-Modern-Fortran.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Calling-FFTW-from-Modern-Fortran.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,87 @@ + + +Calling FFTW from Modern Fortran - FFTW 3.3.4 + + + + + + + + + + + + + + +

    7 Calling FFTW from Modern Fortran

    + +

    +Fortran 2003 standardized ways for Fortran code to call C libraries, +and this allows us to support a direct translation of the FFTW C API +into Fortran. Compared to the legacy Fortran 77 interface +(see Calling FFTW from Legacy Fortran), this direct interface +offers many advantages, especially compile-time type-checking and +aligned memory allocation. As of this writing, support for these C +interoperability features seems widespread, having been implemented in +nearly all major Fortran compilers (e.g. GNU, Intel, IBM, +Oracle/Solaris, Portland Group, NAG). + +This chapter documents that interface. For the most part, since this +interface allows Fortran to call the C interface directly, the usage +is identical to C translated to Fortran syntax. However, there are a +few subtle points such as memory allocation, wisdom, and data types +that deserve closer attention. + +

    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Caveats-in-Using-Wisdom.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Caveats-in-Using-Wisdom.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,101 @@ + + +Caveats in Using Wisdom - FFTW 3.3.4 + + + + + + + + + + + + + + +

    3.4 Caveats in Using Wisdom

    + +

    +

    +For in much wisdom is much grief, and he that increaseth knowledge +increaseth sorrow. +[Ecclesiastes 1:18] +
    + +

    There are pitfalls to using wisdom, in that it can negate FFTW's +ability to adapt to changing hardware and other conditions. For +example, it would be perfectly possible to export wisdom from a +program running on one processor and import it into a program running +on another processor. Doing so, however, would mean that the second +program would use plans optimized for the first processor, instead of +the one it is running on. + +

    It should be safe to reuse wisdom as long as the hardware and program +binaries remain unchanged. (Actually, the optimal plan may change even +between runs of the same binary on identical hardware, due to +differences in the virtual memory environment, etcetera. Users +seriously interested in performance should worry about this problem, +too.) It is likely that, if the same wisdom is used for two +different program binaries, even running on the same machine, the +plans may be sub-optimal because of differing code alignments. It is +therefore wise to recreate wisdom every time an application is +recompiled. The more the underlying hardware and software changes +between the creation of wisdom and its use, the greater grows +the risk of sub-optimal plans. + +

    Nevertheless, if the choice is between using FFTW_ESTIMATE or +using possibly-suboptimal wisdom (created on the same machine, but for a +different binary), the wisdom is likely to be better. For this reason, +we provide a function to import wisdom from a standard system-wide +location (/etc/fftw/wisdom on Unix): + +

         int fftw_import_system_wisdom(void);
    +
    +

    +FFTW also provides a standalone program, fftw-wisdom (described +by its own man page on Unix) with which users can create wisdom, +e.g. for a canonical set of sizes to store in the system wisdom file. +See Wisdom Utilities. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Column_002dmajor-Format.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Column_002dmajor-Format.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,80 @@ + + +Column-major Format - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    3.2.2 Column-major Format

    + +

    +Readers from the Fortran world are used to arrays stored in +column-major order (sometimes called “Fortran order”). This is +essentially the exact opposite of row-major order in that, here, the +first dimension's index varies most quickly. + +

    If you have an array stored in column-major order and wish to +transform it using FFTW, it is quite easy to do. When creating the +plan, simply pass the dimensions of the array to the planner in +reverse order. For example, if your array is a rank three +N x M x L matrix in column-major order, you should pass the +dimensions of the array as if it were an L x M x N matrix +(which it is, from the perspective of FFTW). This is done for you +automatically by the FFTW legacy-Fortran interface +(see Calling FFTW from Legacy Fortran), but you must do it +manually with the modern Fortran interface (see Reversing array dimensions). + + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Combining-MPI-and-Threads.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Combining-MPI-and-Threads.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,135 @@ + + +Combining MPI and Threads - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    6.11 Combining MPI and Threads

    + +

    +In certain cases, it may be advantageous to combine MPI +(distributed-memory) and threads (shared-memory) parallelization. +FFTW supports this, with certain caveats. For example, if you have a +cluster of 4-processor shared-memory nodes, you may want to use +threads within the nodes and MPI between the nodes, instead of MPI for +all parallelization. + +

    In particular, it is possible to seamlessly combine the MPI FFTW +routines with the multi-threaded FFTW routines (see Multi-threaded FFTW). However, some care must be taken in the initialization code, +which should look something like this: + +

         int threads_ok;
    +     
    +     int main(int argc, char **argv)
    +     {
    +         int provided;
    +         MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided);
    +         threads_ok = provided >= MPI_THREAD_FUNNELED;
    +     
    +         if (threads_ok) threads_ok = fftw_init_threads();
    +         fftw_mpi_init();
    +     
    +         ...
    +         if (threads_ok) fftw_plan_with_nthreads(...);
    +         ...
    +     
    +         MPI_Finalize();
    +     }
    +
    +

    +First, note that instead of calling MPI_Init, you should call +MPI_Init_threads, which is the initialization routine defined +by the MPI-2 standard to indicate to MPI that your program will be +multithreaded. We pass MPI_THREAD_FUNNELED, which indicates +that we will only call MPI routines from the main thread. (FFTW will +launch additional threads internally, but the extra threads will not +call MPI code.) (You may also pass MPI_THREAD_SERIALIZED or +MPI_THREAD_MULTIPLE, which requests additional multithreading +support from the MPI implementation, but this is not required by +FFTW.) The provided parameter returns what level of threads +support is actually supported by your MPI implementation; this +must be at least MPI_THREAD_FUNNELED if you want to call +the FFTW threads routines, so we define a global variable +threads_ok to record this. You should only call +fftw_init_threads or fftw_plan_with_nthreads if +threads_ok is true. For more information on thread safety in +MPI, see the +MPI and Threads section of the MPI-2 standard. + + +

    Second, we must call fftw_init_threads before +fftw_mpi_init. This is critical for technical reasons having +to do with how FFTW initializes its list of algorithms. + +

    Then, if you call fftw_plan_with_nthreads(N), every MPI +process will launch (up to) N threads to parallelize its transforms. + +

    For example, in the hypothetical cluster of 4-processor nodes, you +might wish to launch only a single MPI process per node, and then call +fftw_plan_with_nthreads(4) on each process to use all +processors in the nodes. + +

    This may or may not be faster than simply using as many MPI processes +as you have processors, however. On the one hand, using threads +within a node eliminates the need for explicit message passing within +the node. On the other hand, FFTW's transpose routines are not +multi-threaded, and this means that the communications that do take +place will not benefit from parallelization within the node. +Moreover, many MPI implementations already have optimizations to +exploit shared memory when it is available, so adding the +multithreaded FFTW on top of this may be superfluous. + + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Complex-DFTs.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Complex-DFTs.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,141 @@ + + +Complex DFTs - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    +Next: , +Previous: Basic Interface, +Up: Basic Interface +


    +
    + +

    4.3.1 Complex DFTs

    + +
         fftw_plan fftw_plan_dft_1d(int n0,
    +                                fftw_complex *in, fftw_complex *out,
    +                                int sign, unsigned flags);
    +     fftw_plan fftw_plan_dft_2d(int n0, int n1,
    +                                fftw_complex *in, fftw_complex *out,
    +                                int sign, unsigned flags);
    +     fftw_plan fftw_plan_dft_3d(int n0, int n1, int n2,
    +                                fftw_complex *in, fftw_complex *out,
    +                                int sign, unsigned flags);
    +     fftw_plan fftw_plan_dft(int rank, const int *n,
    +                             fftw_complex *in, fftw_complex *out,
    +                             int sign, unsigned flags);
    +
    +

    +Plan a complex input/output discrete Fourier transform (DFT) in zero or +more dimensions, returning an fftw_plan (see Using Plans). + +

    Once you have created a plan for a certain transform type and +parameters, then creating another plan of the same type and parameters, +but for different arrays, is fast and shares constant data with the +first plan (if it still exists). + +

    The planner returns NULL if the plan cannot be created. In the +standard FFTW distribution, the basic interface is guaranteed to return +a non-NULL plan. A plan may be NULL, however, if you are +using a customized FFTW configuration supporting a restricted set of +transforms. + +

    Arguments
    + +
      +
    • rank is the rank of the transform (it should be the size of the +array *n), and can be any non-negative integer. (See Complex Multi-Dimensional DFTs, for the definition of “rank”.) The +‘_1d’, ‘_2d’, and ‘_3d’ planners correspond to a +rank of 1, 2, and 3, respectively. The rank +may be zero, which is equivalent to a rank-1 transform of size 1, i.e. a +copy of one number from input to output. + +
    • n0, n1, n2, or n[0..rank-1] (as appropriate +for each routine) specify the size of the transform dimensions. They +can be any positive integer. + +
        +
      • Multi-dimensional arrays are stored in row-major order with dimensions: +n0 x n1; or n0 x n1 x n2; or +n[0] x n[1] x ... x n[rank-1]. +See Multi-dimensional Array Format. +
      • FFTW is best at handling sizes of the form +2a 3b 5c 7d + 11e 13f,where e+f is either 0 or 1, and the other exponents +are arbitrary. Other sizes are computed by means of a slow, +general-purpose algorithm (which nevertheless retains O(n log n) performance even for prime sizes). It is possible to customize FFTW +for different array sizes; see Installation and Customization. +Transforms whose sizes are powers of 2 are especially fast. +
      + +
    • in and out point to the input and output arrays of the +transform, which may be the same (yielding an in-place transform). +These arrays are overwritten during planning, unless +FFTW_ESTIMATE is used in the flags. (The arrays need not be +initialized, but they must be allocated.) + +

      If in == out, the transform is in-place and the input +array is overwritten. If in != out, the two arrays must +not overlap (but FFTW does not check for this condition). + +

    • sign is the sign of the exponent in the formula that defines the +Fourier transform. It can be -1 (= FFTW_FORWARD) or ++1 (= FFTW_BACKWARD). + +
    • flags is a bitwise OR (‘|’) of zero or more planner flags, +as defined in Planner Flags. + +
    + +

    FFTW computes an unnormalized transform: computing a forward followed by +a backward transform (or vice versa) will result in the original data +multiplied by the size of the transform (the product of the dimensions). +For more information, see What FFTW Really Computes. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Complex-Multi_002dDimensional-DFTs.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Complex-Multi_002dDimensional-DFTs.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,138 @@ + + +Complex Multi-Dimensional DFTs - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    2.2 Complex Multi-Dimensional DFTs

    + +

    Multi-dimensional transforms work much the same way as one-dimensional +transforms: you allocate arrays of fftw_complex (preferably +using fftw_malloc), create an fftw_plan, execute it as +many times as you want with fftw_execute(plan), and clean up +with fftw_destroy_plan(plan) (and fftw_free). + +

    FFTW provides two routines for creating plans for 2d and 3d transforms, +and one routine for creating plans of arbitrary dimensionality. +The 2d and 3d routines have the following signature: +

         fftw_plan fftw_plan_dft_2d(int n0, int n1,
    +                                fftw_complex *in, fftw_complex *out,
    +                                int sign, unsigned flags);
    +     fftw_plan fftw_plan_dft_3d(int n0, int n1, int n2,
    +                                fftw_complex *in, fftw_complex *out,
    +                                int sign, unsigned flags);
    +
    +

    +These routines create plans for n0 by n1 two-dimensional +(2d) transforms and n0 by n1 by n2 3d transforms, +respectively. All of these transforms operate on contiguous arrays in +the C-standard row-major order, so that the last dimension has the +fastest-varying index in the array. This layout is described further in +Multi-dimensional Array Format. + +

    FFTW can also compute transforms of higher dimensionality. In order to +avoid confusion between the various meanings of the the word +“dimension”, we use the term rank +to denote the number of independent indices in an array.1 For +example, we say that a 2d transform has rank 2, a 3d transform has +rank 3, and so on. You can plan transforms of arbitrary rank by +means of the following function: + +

         fftw_plan fftw_plan_dft(int rank, const int *n,
    +                             fftw_complex *in, fftw_complex *out,
    +                             int sign, unsigned flags);
    +
    +

    +Here, n is a pointer to an array n[rank] denoting an +n[0] by n[1] by ... by n[rank-1] transform. +Thus, for example, the call +

         fftw_plan_dft_2d(n0, n1, in, out, sign, flags);
    +
    +

    is equivalent to the following code fragment: +

         int n[2];
    +     n[0] = n0;
    +     n[1] = n1;
    +     fftw_plan_dft(2, n, in, out, sign, flags);
    +
    +

    fftw_plan_dft is not restricted to 2d and 3d transforms, +however, but it can plan transforms of arbitrary rank. + +

    You may have noticed that all the planner routines described so far +have overlapping functionality. For example, you can plan a 1d or 2d +transform by using fftw_plan_dft with a rank of 1 +or 2, or even by calling fftw_plan_dft_3d with n0 +and/or n1 equal to 1 (with no loss in efficiency). This +pattern continues, and FFTW's planning routines in general form a +“partial order,” sequences of +interfaces with strictly increasing generality but correspondingly +greater complexity. + +

    fftw_plan_dft is the most general complex-DFT routine that we +describe in this tutorial, but there are also the advanced and guru interfaces, +which allow one to efficiently combine multiple/strided transforms +into a single FFTW plan, transform a subset of a larger +multi-dimensional array, and/or to handle more general complex-number +formats. For more information, see FFTW Reference. + + +

    +
    +

    Footnotes

    [1] The +term “rank” is commonly used in the APL, FORTRAN, and Common Lisp +traditions, although it is not so common in the C world.

    + +
    + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Complex-One_002dDimensional-DFTs.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Complex-One_002dDimensional-DFTs.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,192 @@ + + +Complex One-Dimensional DFTs - FFTW 3.3.4 + + + + + + + + + + + + + +
    + + +

    +Next: , +Previous: Tutorial, +Up: Tutorial +


    +
    + +

    2.1 Complex One-Dimensional DFTs

    + +
    +Plan: To bother about the best method of accomplishing an accidental result. +[Ambrose Bierce, The Enlarged Devil's Dictionary.] +
    + +

    The basic usage of FFTW to compute a one-dimensional DFT of size +N is simple, and it typically looks something like this code: + +

         #include <fftw3.h>
    +     ...
    +     {
    +         fftw_complex *in, *out;
    +         fftw_plan p;
    +         ...
    +         in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
    +         out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
    +         p = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
    +         ...
    +         fftw_execute(p); /* repeat as needed */
    +         ...
    +         fftw_destroy_plan(p);
    +         fftw_free(in); fftw_free(out);
    +     }
    +
    +

    You must link this code with the fftw3 library. On Unix systems, +link with -lfftw3 -lm. + +

    The example code first allocates the input and output arrays. You can +allocate them in any way that you like, but we recommend using +fftw_malloc, which behaves like +malloc except that it properly aligns the array when SIMD +instructions (such as SSE and Altivec) are available (see SIMD alignment and fftw_malloc). [Alternatively, we provide a convenient wrapper function fftw_alloc_complex(N) which has the same effect.] + + +

    The data is an array of type fftw_complex, which is by default a +double[2] composed of the real (in[i][0]) and imaginary +(in[i][1]) parts of a complex number. + +The next step is to create a plan, which is an object +that contains all the data that FFTW needs to compute the FFT. +This function creates the plan: + +

         fftw_plan fftw_plan_dft_1d(int n, fftw_complex *in, fftw_complex *out,
    +                                int sign, unsigned flags);
    +
    +

    +The first argument, n, is the size of the transform you are +trying to compute. The size n can be any positive integer, but +sizes that are products of small factors are transformed most +efficiently (although prime sizes still use an O(n log n) algorithm). + +

    The next two arguments are pointers to the input and output arrays of +the transform. These pointers can be equal, indicating an +in-place transform. + + +

    The fourth argument, sign, can be either FFTW_FORWARD +(-1) or FFTW_BACKWARD (+1), +and indicates the direction of the transform you are interested in; +technically, it is the sign of the exponent in the transform. + +

    The flags argument is usually either FFTW_MEASURE or +FFTW_ESTIMATE. FFTW_MEASURE instructs FFTW to run +and measure the execution time of several FFTs in order to find the +best way to compute the transform of size n. This process takes +some time (usually a few seconds), depending on your machine and on +the size of the transform. FFTW_ESTIMATE, on the contrary, +does not run any computation and just builds a +reasonable plan that is probably sub-optimal. In short, if your +program performs many transforms of the same size and initialization +time is not important, use FFTW_MEASURE; otherwise use the +estimate. + +

    You must create the plan before initializing the input, because +FFTW_MEASURE overwrites the in/out arrays. +(Technically, FFTW_ESTIMATE does not touch your arrays, but you +should always create plans first just to be sure.) + +

    Once the plan has been created, you can use it as many times as you +like for transforms on the specified in/out arrays, +computing the actual transforms via fftw_execute(plan): +

         void fftw_execute(const fftw_plan plan);
    +
    +

    +The DFT results are stored in-order in the array out, with the +zero-frequency (DC) component in out[0]. +If in != out, the transform is out-of-place and the input +array in is not modified. Otherwise, the input array is +overwritten with the transform. + +

    If you want to transform a different array of the same size, you +can create a new plan with fftw_plan_dft_1d and FFTW +automatically reuses the information from the previous plan, if +possible. Alternatively, with the “guru” interface you can apply a +given plan to a different array, if you are careful. +See FFTW Reference. + +

    When you are done with the plan, you deallocate it by calling +fftw_destroy_plan(plan): +

         void fftw_destroy_plan(fftw_plan plan);
    +
    +

    If you allocate an array with fftw_malloc() you must deallocate +it with fftw_free(). Do not use free() or, heaven +forbid, delete. + +FFTW computes an unnormalized DFT. Thus, computing a forward +followed by a backward transform (or vice versa) results in the original +array scaled by n. For the definition of the DFT, see What FFTW Really Computes. + + +

    If you have a C compiler, such as gcc, that supports the +C99 standard, and you #include <complex.h> before +<fftw3.h>, then fftw_complex is the native +double-precision complex type and you can manipulate it with ordinary +arithmetic. Otherwise, FFTW defines its own complex type, which is +bit-compatible with the C99 complex type. See Complex numbers. +(The C++ <complex> template class may also be usable via a +typecast.) + +To use single or long-double precision versions of FFTW, replace the +fftw_ prefix by fftwf_ or fftwl_ and link with +-lfftw3f or -lfftw3l, but use the same +<fftw3.h> header file. + + +

    Many more flags exist besides FFTW_MEASURE and +FFTW_ESTIMATE. For example, use FFTW_PATIENT if you're +willing to wait even longer for a possibly even faster plan (see FFTW Reference). +You can also save plans for future use, as described by Words of Wisdom-Saving Plans. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Complex-numbers.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Complex-numbers.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,96 @@ + + +Complex numbers - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    +Next: , +Previous: Data Types and Files, +Up: Data Types and Files +


    +
    + +

    4.1.1 Complex numbers

    + +

    The default FFTW interface uses double precision for all +floating-point numbers, and defines a fftw_complex type to hold +complex numbers as: + +

         typedef double fftw_complex[2];
    +
    +

    +Here, the [0] element holds the real part and the [1] +element holds the imaginary part. + +

    Alternatively, if you have a C compiler (such as gcc) that +supports the C99 revision of the ANSI C standard, you can use C's new +native complex type (which is binary-compatible with the typedef above). +In particular, if you #include <complex.h> before +<fftw3.h>, then fftw_complex is defined to be the native +complex type and you can manipulate it with ordinary arithmetic +(e.g. x = y * (3+4*I), where x and y are +fftw_complex and I is the standard symbol for the +imaginary unit); + + +

    C++ has its own complex<T> template class, defined in the +standard <complex> header file. Reportedly, the C++ standards +committee has recently agreed to mandate that the storage format used +for this type be binary-compatible with the C99 type, i.e. an array +T[2] with consecutive real [0] and imaginary [1] +parts. (See report +http://www.open-std.org/jtc1/sc22/WG21/docs/papers/2002/n1388.pdf WG21/N1388.) Although not part of the official standard as of this +writing, the proposal stated that: “This solution has been tested with +all current major implementations of the standard library and shown to +be working.” To the extent that this is true, if you have a variable +complex<double> *x, you can pass it directly to FFTW via +reinterpret_cast<fftw_complex*>(x). + + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Concept-Index.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Concept-Index.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,357 @@ + + +Concept Index - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    +Next: , +Previous: License and Copyright, +Up: Top +


    +
    + +

    13 Concept Index

    + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Cycle-Counters.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Cycle-Counters.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,90 @@ + + +Cycle Counters - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    10.3 Cycle Counters

    + +

    +FFTW's planner actually executes and times different possible FFT +algorithms in order to pick the fastest plan for a given n. In +order to do this in as short a time as possible, however, the timer must +have a very high resolution, and to accomplish this we employ the +hardware cycle counters that are available on most CPUs. +Currently, FFTW supports the cycle counters on x86, PowerPC/POWER, Alpha, +UltraSPARC (SPARC v9), IA64, PA-RISC, and MIPS processors. + +

    Access to the cycle counters, unfortunately, is a compiler and/or +operating-system dependent task, often requiring inline assembly +language, and it may be that your compiler is not supported. If you are +not supported, FFTW will by default fall back on its estimator +(effectively using FFTW_ESTIMATE for all plans). + +You can add support by editing the file kernel/cycle.h; normally, +this will involve adapting one of the examples already present in order +to use the inline-assembler syntax for your C compiler, and will only +require a couple of lines of code. Anyone adding support for a new +system to cycle.h is encouraged to email us at fftw@fftw.org. + +

    If a cycle counter is not available on your system (e.g. some embedded +processor), and you don't want to use estimated plans, as a last resort +you can use the --with-slow-timer option to configure (on +Unix) or #define WITH_SLOW_TIMER in config.h (elsewhere). +This will use the much lower-resolution gettimeofday function, or even +clock if the former is unavailable, and planning will be +extremely slow. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Data-Types-and-Files.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Data-Types-and-Files.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,74 @@ + + +Data Types and Files - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    +Next: , +Previous: FFTW Reference, +Up: FFTW Reference +


    +
    + +

    4.1 Data Types and Files

    + +

    All programs using FFTW should include its header file: + +

         #include <fftw3.h>
    +
    +

    You must also link to the FFTW library. On Unix, this +means adding -lfftw3 -lm at the end of the link command. + +

    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Defining-an-FFTW-module.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Defining-an-FFTW-module.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,81 @@ + + +Defining an FFTW module - FFTW 3.3.4 + + + + + + + + + + + + + + +

    7.7 Defining an FFTW module

    + +

    Rather than using the include statement to include the +fftw3.f03 interface file in any subroutine where you want to +use FFTW, you might prefer to define an FFTW Fortran module. FFTW +does not install itself as a module, primarily because +fftw3.f03 can be shared between different Fortran compilers while +modules (in general) cannot. However, it is trivial to define your +own FFTW module if you want. Just create a file containing: + +

           module FFTW3
    +         use, intrinsic :: iso_c_binding
    +         include 'fftw3.f03'
    +       end module
    +
    +

    Compile this file into a module as usual for your compiler (e.g. with +gfortran -c you will get a file fftw3.mod). Now, +instead of include 'fftw3.f03', whenever you want to use FFTW +routines you can just do: + +

           use FFTW3
    +
    +

    as usual for Fortran modules. (You still need to link to the FFTW +library, of course.) + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Distributed_002dmemory-FFTW-with-MPI.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Distributed_002dmemory-FFTW-with-MPI.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,115 @@ + + +Distributed-memory FFTW with MPI - FFTW 3.3.4 + + + + + + + + + + + + +

    + + +

    +Next: , +Previous: Multi-threaded FFTW, +Up: Top +


    +
    + +

    6 Distributed-memory FFTW with MPI

    + +

    +In this chapter we document the parallel FFTW routines for parallel +systems supporting the MPI message-passing interface. Unlike the +shared-memory threads described in the previous chapter, MPI allows +you to use distributed-memory parallelism, where each CPU has +its own separate memory, and which can scale up to clusters of many +thousands of processors. This capability comes at a price, however: +each process only stores a portion of the data to be +transformed, which means that the data structures and +programming-interface are quite different from the serial or threads +versions of FFTW. + + +

    Distributed-memory parallelism is especially useful when you are +transforming arrays so large that they do not fit into the memory of a +single processor. The storage per-process required by FFTW's MPI +routines is proportional to the total array size divided by the number +of processes. Conversely, distributed-memory parallelism can easily +pose an unacceptably high communications overhead for small problems; +the threshold problem size for which parallelism becomes advantageous +will depend on the precise problem you are interested in, your +hardware, and your MPI implementation. + +

    A note on terminology: in MPI, you divide the data among a set of +“processes” which each run in their own memory address space. +Generally, each process runs on a different physical processor, but +this is not required. A set of processes in MPI is described by an +opaque data structure called a “communicator,” the most common of +which is the predefined communicator MPI_COMM_WORLD which +refers to all processes. For more information on these and +other concepts common to all MPI programs, we refer the reader to the +documentation at the MPI home page. + + +

    We assume in this chapter that the reader is familiar with the usage +of the serial (uniprocessor) FFTW, and focus only on the concepts new +to the MPI interface. + +

    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Dynamic-Arrays-in-C.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Dynamic-Arrays-in-C.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,91 @@ + + +Dynamic Arrays in C - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    3.2.4 Dynamic Arrays in C

    + +

    We recommend allocating most arrays dynamically, with +fftw_malloc. This isn't too hard to do, although it is not as +straightforward for multi-dimensional arrays as it is for +one-dimensional arrays. + +

    Creating the array is simple: using a dynamic-allocation routine like +fftw_malloc, allocate an array big enough to store N +fftw_complex values (for a complex DFT), where N is the product +of the sizes of the array dimensions (i.e. the total number of complex +values in the array). For example, here is code to allocate a +5 × 12 × 27 rank-3 array: + +

         fftw_complex *an_array;
    +     an_array = (fftw_complex*) fftw_malloc(5*12*27 * sizeof(fftw_complex));
    +
    +

    Accessing the array elements, however, is more tricky—you can't +simply use multiple applications of the ‘[]’ operator like you +could for fixed-size arrays. Instead, you have to explicitly compute +the offset into the array using the formula given earlier for +row-major arrays. For example, to reference the (i,j,k)-th +element of the array allocated above, you would use the expression +an_array[k + 27 * (j + 12 * i)]. + +

    This pain can be alleviated somewhat by defining appropriate macros, +or, in C++, creating a class and overloading the ‘()’ operator. +The recent C99 standard provides a way to reinterpret the dynamic +array as a “variable-length” multi-dimensional array amenable to +‘[]’, but this feature is not yet widely supported by compilers. + + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Dynamic-Arrays-in-C_002dThe-Wrong-Way.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Dynamic-Arrays-in-C_002dThe-Wrong-Way.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,93 @@ + + +Dynamic Arrays in C-The Wrong Way - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    3.2.5 Dynamic Arrays in C—The Wrong Way

    + +

    A different method for allocating multi-dimensional arrays in C is +often suggested that is incompatible with FFTW: using it will +cause FFTW to die a painful death. We discuss the technique here, +however, because it is so commonly known and used. This method is to +create arrays of pointers of arrays of pointers of ...etcetera. +For example, the analogue in this method to the example above is: + +

         int i,j;
    +     fftw_complex ***a_bad_array;  /* another way to make a 5x12x27 array */
    +     
    +     a_bad_array = (fftw_complex ***) malloc(5 * sizeof(fftw_complex **));
    +     for (i = 0; i < 5; ++i) {
    +          a_bad_array[i] =
    +             (fftw_complex **) malloc(12 * sizeof(fftw_complex *));
    +          for (j = 0; j < 12; ++j)
    +               a_bad_array[i][j] =
    +                     (fftw_complex *) malloc(27 * sizeof(fftw_complex));
    +     }
    +
    +

    As you can see, this sort of array is inconvenient to allocate (and +deallocate). On the other hand, it has the advantage that the +(i,j,k)-th element can be referenced simply by +a_bad_array[i][j][k]. + +

    If you like this technique and want to maximize convenience in accessing +the array, but still want to pass the array to FFTW, you can use a +hybrid method. Allocate the array as one contiguous block, but also +declare an array of arrays of pointers that point to appropriate places +in the block. That sort of trick is beyond the scope of this +documentation; for more information on multi-dimensional arrays in C, +see the comp.lang.c +FAQ. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Extended-and-quadruple-precision-in-Fortran.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Extended-and-quadruple-precision-in-Fortran.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,86 @@ + + +Extended and quadruple precision in Fortran - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    7.1.1 Extended and quadruple precision in Fortran

    + +

    +If FFTW is compiled in long double (extended) precision +(see Installation and Customization), you may be able to call the +resulting fftwl_ routines (see Precision) from Fortran if +your compiler supports the C_LONG_DOUBLE_COMPLEX type code. + +

    Because some Fortran compilers do not support +C_LONG_DOUBLE_COMPLEX, the fftwl_ declarations are +segregated into a separate interface file fftw3l.f03, which you +should include in addition to fftw3.f03 (which declares +precision-independent ‘FFTW_’ constants): + +

    +

           use, intrinsic :: iso_c_binding
    +       include 'fftw3.f03'
    +       include 'fftw3l.f03'
    +
    +

    We also support using the nonstandard __float128 +quadruple-precision type provided by recent versions of gcc on +32- and 64-bit x86 hardware (see Installation and Customization), +using the corresponding real(16) and complex(16) types +supported by gfortran. The quadruple-precision ‘fftwq_’ +functions (see Precision) are declared in a fftw3q.f03 +interface file, which should be included in addition to +fftw3l.f03, as above. You should also link with +-lfftw3q -lquadmath -lm as in C. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/FFTW-Constants-in-Fortran.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/FFTW-Constants-in-Fortran.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,80 @@ + + +FFTW Constants in Fortran - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    8.2 FFTW Constants in Fortran

    + +

    When creating plans in FFTW, a number of constants are used to specify +options, such as FFTW_MEASURE or FFTW_ESTIMATE. The +same constants must be used with the wrapper routines, but of course the +C header files where the constants are defined can't be incorporated +directly into Fortran code. + +

    Instead, we have placed Fortran equivalents of the FFTW constant +definitions in the file fftw3.f, which can be found in the same +directory as fftw3.h. If your Fortran compiler supports a +preprocessor of some sort, you should be able to include or +#include this file; otherwise, you can paste it directly into +your code. + +

    In C, you combine different flags (like FFTW_PRESERVE_INPUT and +FFTW_MEASURE) using the ‘|’ operator; in Fortran +you should just use ‘+’. (Take care not to add in the +same flag more than once, though. Alternatively, you can use the +ior intrinsic function standardized in Fortran 95.) + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/FFTW-Execution-in-Fortran.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/FFTW-Execution-in-Fortran.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,112 @@ + + +FFTW Execution in Fortran - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    8.3 FFTW Execution in Fortran

    + +

    In C, in order to use a plan, one normally calls fftw_execute, +which executes the plan to perform the transform on the input/output +arrays passed when the plan was created (see Using Plans). The +corresponding subroutine call in legacy Fortran is: +

                 call dfftw_execute(plan)
    +
    +

    +However, we have had reports that this causes problems with some +recent optimizing Fortran compilers. The problem is, because the +input/output arrays are not passed as explicit arguments to +dfftw_execute, the semantics of Fortran (unlike C) allow the +compiler to assume that the input/output arrays are not changed by +dfftw_execute. As a consequence, certain compilers end up +optimizing out or repositioning the call to dfftw_execute, +assuming incorrectly that it does nothing. + +

    There are various workarounds to this, but the safest and simplest +thing is to not use dfftw_execute in Fortran. Instead, use the +functions described in New-array Execute Functions, which take +the input/output arrays as explicit arguments. For example, if the +plan is for a complex-data DFT and was created for the arrays +in and out, you would do: +

                 call dfftw_execute_dft(plan, in, out)
    +
    +

    +There are a few things to be careful of, however: + +

      +
    • You must use the correct type of execute function, matching the way +the plan was created. Complex DFT plans should use +dfftw_execute_dft, Real-input (r2c) DFT plans should use use +dfftw_execute_dft_r2c, and real-output (c2r) DFT plans should +use dfftw_execute_dft_c2r. The various r2r plans should use +dfftw_execute_r2r. + +
    • You should normally pass the same input/output arrays that were used when +creating the plan. This is always safe. + +
    • If you pass different input/output arrays compared to +those used when creating the plan, you must abide by all the +restrictions of the new-array execute functions (see New-array Execute Functions). The most difficult of these, in Fortran, is the +requirement that the new arrays have the same alignment as the +original arrays, because there seems to be no way in legacy Fortran to obtain +guaranteed-aligned arrays (analogous to fftw_malloc in C). You +can, of course, use the FFTW_UNALIGNED flag when creating the +plan, in which case the plan does not depend on the alignment, but +this may sacrifice substantial performance on architectures (like x86) +with SIMD instructions (see SIMD alignment and fftw_malloc). + +
    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/FFTW-Fortran-type-reference.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/FFTW-Fortran-type-reference.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,129 @@ + + +FFTW Fortran type reference - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    7.3 FFTW Fortran type reference

    + +

    The following are the most important type correspondences between the +C interface and Fortran: + +

      +
    • Plans (fftw_plan and variants) are type(C_PTR) (i.e. an +opaque pointer). + +
    • The C floating-point types double, float, and long +double correspond to real(C_DOUBLE), real(C_FLOAT), and +real(C_LONG_DOUBLE), respectively. The C complex types +fftw_complex, fftwf_complex, and fftwl_complex +correspond in Fortran to complex(C_DOUBLE_COMPLEX), +complex(C_FLOAT_COMPLEX), and +complex(C_LONG_DOUBLE_COMPLEX), respectively. +Just as in C +(see Precision), the FFTW subroutines and types are prefixed with +‘fftw_’, fftwf_, and fftwl_ for the different precisions, and link to different libraries (-lfftw3, -lfftw3f, and -lfftw3l on Unix), but use the same include file fftw3.f03 and the same constants (all of which begin with ‘FFTW_’). The exception is long double precision, for which you should also include fftw3l.f03 (see Extended and quadruple precision in Fortran). + +
    • The C integer types int and unsigned (used for planner +flags) become integer(C_INT). The C integer type ptrdiff_t (e.g. in the 64-bit Guru Interface) becomes integer(C_INTPTR_T), and size_t (in fftw_malloc etc.) becomes integer(C_SIZE_T). + +
    • The fftw_r2r_kind type (see Real-to-Real Transform Kinds) +becomes integer(C_FFTW_R2R_KIND). The various constant values +of the C enumerated type (FFTW_R2HC etc.) become simply integer +constants of the same names in Fortran. + +
    • Numeric array pointer arguments (e.g. double *) +become dimension(*), intent(out) arrays of the same type, or +dimension(*), intent(in) if they are pointers to constant data +(e.g. const int *). There are a few exceptions where numeric +pointers refer to scalar outputs (e.g. for fftw_flops), in which +case they are intent(out) scalar arguments in Fortran too. +For the new-array execute functions (see New-array Execute Functions), +the input arrays are declared dimension(*), intent(inout), since +they can be modified in the case of in-place or FFTW_DESTROY_INPUT +transforms. + +
    • Pointer return values (e.g double *) become +type(C_PTR). (If they are pointers to arrays, as for +fftw_alloc_real, you can convert them back to Fortran array +pointers with the standard intrinsic function c_f_pointer.) + +
    • The fftw_iodim type in the guru interface (see Guru vector and transform sizes) becomes type(fftw_iodim) in Fortran, a +derived data type (the Fortran analogue of C's struct) with +three integer(C_INT) components: n, is, and +os, with the same meanings as in C. The fftw_iodim64 type in the 64-bit guru interface (see 64-bit Guru Interface) is the same, except that its components are of type integer(C_INTPTR_T). + +
    • Using the wisdom import/export functions from Fortran is a bit tricky, +and is discussed in Accessing the wisdom API from Fortran. In +brief, the FILE * arguments map to type(C_PTR), const char * to character(C_CHAR), dimension(*), intent(in) (null-terminated!), and the generic read-char/write-char functions map to type(C_FUNPTR). + +
    + +

    You may be wondering if you need to search-and-replace +real(kind(0.0d0)) (or whatever your favorite Fortran spelling +of “double precision” is) with real(C_DOUBLE) everywhere in +your program, and similarly for complex and integer +types. The answer is no; you can still use your existing types. As +long as these types match their C counterparts, things should work +without a hitch. The worst that can happen, e.g. in the (unlikely) +event of a system where real(kind(0.0d0)) is different from +real(C_DOUBLE), is that the compiler will give you a +type-mismatch error. That is, if you don't use the +iso_c_binding kinds you need to accept at least the theoretical +possibility of having to change your code in response to compiler +errors on some future machine, but you don't need to worry about +silently compiling incorrect code that yields runtime errors. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/FFTW-MPI-Fortran-Interface.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/FFTW-MPI-Fortran-Interface.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,189 @@ + + +FFTW MPI Fortran Interface - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    6.13 FFTW MPI Fortran Interface

    + +

    +The FFTW MPI interface is callable from modern Fortran compilers +supporting the Fortran 2003 iso_c_binding standard for calling +C functions. As described in Calling FFTW from Modern Fortran, +this means that you can directly call FFTW's C interface from Fortran +with only minor changes in syntax. There are, however, a few things +specific to the MPI interface to keep in mind: + +

      +
    • Instead of including fftw3.f03 as in Overview of Fortran interface, you should include 'fftw3-mpi.f03' (after +use, intrinsic :: iso_c_binding as before). The +fftw3-mpi.f03 file includes fftw3.f03, so you should +not include them both yourself. (You will also want to +include the MPI header file, usually via include 'mpif.h' or +similar, although though this is not needed by fftw3-mpi.f03 +per se.) (To use the ‘fftwl_long double extended-precision routines in supporting compilers, you should include fftw3f-mpi.f03 in addition to fftw3-mpi.f03. See Extended and quadruple precision in Fortran.) + +
    • Because of the different storage conventions between C and Fortran, +you reverse the order of your array dimensions when passing them to +FFTW (see Reversing array dimensions). This is merely a +difference in notation and incurs no performance overhead. However, +it means that, whereas in C the first dimension is distributed, +in Fortran the last dimension of your array is distributed. + +
    • In Fortran, communicators are stored as integer types; there is +no MPI_Comm type, nor is there any way to access a C +MPI_Comm. Fortunately, this is taken care of for you by the +FFTW Fortran interface: whenever the C interface expects an +MPI_Comm type, you should pass the Fortran communicator as an +integer.1 + +
    • Because you need to call the ‘local_size’ function to find out +how much space to allocate, and this may be larger than the +local portion of the array (see MPI Data Distribution), you should +always allocate your arrays dynamically using FFTW's allocation +routines as described in Allocating aligned memory in Fortran. +(Coincidentally, this also provides the best performance by +guaranteeding proper data alignment.) + +
    • Because all sizes in the MPI FFTW interface are declared as +ptrdiff_t in C, you should use integer(C_INTPTR_T) in +Fortran (see FFTW Fortran type reference). + +
    • In Fortran, because of the language semantics, we generally recommend +using the new-array execute functions for all plans, even in the +common case where you are executing the plan on the same arrays for +which the plan was created (see Plan execution in Fortran). +However, note that in the MPI interface these functions are changed: +fftw_execute_dft becomes fftw_mpi_execute_dft, +etcetera. See Using MPI Plans. + +
    + +

    For example, here is a Fortran code snippet to perform a distributed +L × M complex DFT in-place. (This assumes you have already +initialized MPI with MPI_init and have also performed +call fftw_mpi_init.) + +

           use, intrinsic :: iso_c_binding
    +       include 'fftw3-mpi.f03'
    +       integer(C_INTPTR_T), parameter :: L = ...
    +       integer(C_INTPTR_T), parameter :: M = ...
    +       type(C_PTR) :: plan, cdata
    +       complex(C_DOUBLE_COMPLEX), pointer :: data(:,:)
    +       integer(C_INTPTR_T) :: i, j, alloc_local, local_M, local_j_offset
    +     
    +     !   get local data size and allocate (note dimension reversal)
    +       alloc_local = fftw_mpi_local_size_2d(M, L, MPI_COMM_WORLD, &
    +                                            local_M, local_j_offset)
    +       cdata = fftw_alloc_complex(alloc_local)
    +       call c_f_pointer(cdata, data, [L,local_M])
    +     
    +     !   create MPI plan for in-place forward DFT (note dimension reversal)
    +       plan = fftw_mpi_plan_dft_2d(M, L, data, data, MPI_COMM_WORLD, &
    +                                   FFTW_FORWARD, FFTW_MEASURE)
    +     
    +     ! initialize data to some function my_function(i,j)
    +       do j = 1, local_M
    +         do i = 1, L
    +           data(i, j) = my_function(i, j + local_j_offset)
    +         end do
    +       end do
    +     
    +     ! compute transform (as many times as desired)
    +       call fftw_mpi_execute_dft(plan, data, data)
    +     
    +       call fftw_destroy_plan(plan)
    +       call fftw_free(cdata)
    +
    +

    Note that when we called fftw_mpi_local_size_2d and +fftw_mpi_plan_dft_2d with the dimensions in reversed order, +since a L × M Fortran array is viewed by FFTW in C as a +M × L array. This means that the array was distributed over +the M dimension, the local portion of which is a +L × local_M array in Fortran. (You must not use an +allocate statement to allocate an L × local_M array, +however; you must allocate alloc_local complex numbers, which +may be greater than L * local_M, in order to reserve space for +intermediate steps of the transform.) Finally, we mention that +because C's array indices are zero-based, the local_j_offset +argument can conveniently be interpreted as an offset in the 1-based +j index (rather than as a starting index as in C). + +

    If instead you had used the ior(FFTW_MEASURE, +FFTW_MPI_TRANSPOSED_OUT) flag, the output of the transform would be a +transposed M × local_L array, associated with the same +cdata allocation (since the transform is in-place), and which +you could declare with: + +

           complex(C_DOUBLE_COMPLEX), pointer :: tdata(:,:)
    +       ...
    +       call c_f_pointer(cdata, tdata, [M,local_L])
    +
    +

    where local_L would have been obtained by changing the +fftw_mpi_local_size_2d call to: + +

           alloc_local = fftw_mpi_local_size_2d_transposed(M, L, MPI_COMM_WORLD, &
    +                                local_M, local_j_offset, local_L, local_i_offset)
    +
    +
    +
    +

    Footnotes

    [1] Technically, this is because you aren't +actually calling the C functions directly. You are calling wrapper +functions that translate the communicator with MPI_Comm_f2c +before calling the ordinary C interface. This is all done +transparently, however, since the fftw3-mpi.f03 interface file +renames the wrappers so that they are called in Fortran with the same +names as the C interface functions.

    + +
    + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/FFTW-MPI-Installation.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/FFTW-MPI-Installation.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,92 @@ + + +FFTW MPI Installation - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    6.1 FFTW MPI Installation

    + +

    All of the FFTW MPI code is located in the mpi subdirectory of +the FFTW package. On Unix systems, the FFTW MPI libraries and header +files are automatically configured, compiled, and installed along with +the uniprocessor FFTW libraries simply by including +--enable-mpi in the flags to the configure script +(see Installation on Unix). + + +

    Any implementation of the MPI standard, version 1 or later, should +work with FFTW. The configure script will attempt to +automatically detect how to compile and link code using your MPI +implementation. In some cases, especially if you have multiple +different MPI implementations installed or have an unusual MPI +software package, you may need to provide this information explicitly. + +

    Most commonly, one compiles MPI code by invoking a special compiler +command, typically mpicc for C code. The configure +script knows the most common names for this command, but you can +specify the MPI compilation command explicitly by setting the +MPICC variable, as in ‘./configure MPICC=mpicc ...’. + + +

    If, instead of a special compiler command, you need to link a certain +library, you can specify the link command via the MPILIBS +variable, as in ‘./configure MPILIBS=-lmpi ...’. Note that if +your MPI library is installed in a non-standard location (one the +compiler does not know about by default), you may also have to specify +the location of the library and header files via LDFLAGS and +CPPFLAGS variables, respectively, as in ‘./configure +LDFLAGS=-L/path/to/mpi/libs CPPFLAGS=-I/path/to/mpi/include ...’. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/FFTW-MPI-Performance-Tips.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/FFTW-MPI-Performance-Tips.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,88 @@ + + +FFTW MPI Performance Tips - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    6.10 FFTW MPI Performance Tips

    + +

    In this section, we collect a few tips on getting the best performance +out of FFTW's MPI transforms. + +

    First, because of the 1d block distribution, FFTW's parallelization is +currently limited by the size of the first dimension. +(Multidimensional block distributions may be supported by a future +version.) More generally, you should ideally arrange the dimensions so +that FFTW can divide them equally among the processes. See Load balancing. + + +

    Second, if it is not too inconvenient, you should consider working +with transposed output for multidimensional plans, as this saves a +considerable amount of communications. See Transposed distributions. + + +

    Third, the fastest choices are generally either an in-place transform +or an out-of-place transform with the FFTW_DESTROY_INPUT flag +(which allows the input array to be used as scratch space). In-place +is especially beneficial if the amount of data per process is large. + + +

    Fourth, if you have multiple arrays to transform at once, rather than +calling FFTW's MPI transforms several times it usually seems to be +faster to interleave the data and use the advanced interface. (This +groups the communications together instead of requiring separate +messages for each transform.) + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/FFTW-MPI-Reference.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/FFTW-MPI-Reference.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,73 @@ + + +FFTW MPI Reference - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    6.12 FFTW MPI Reference

    + +

    This chapter provides a complete reference to all FFTW MPI functions, +datatypes, and constants. See also FFTW Reference for information +on functions and types in common with the serial interface. + +

    + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/FFTW-MPI-Transposes.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/FFTW-MPI-Transposes.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,76 @@ + + +FFTW MPI Transposes - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    6.7 FFTW MPI Transposes

    + +

    +The FFTW's MPI Fourier transforms rely on one or more global +transposition step for their communications. For example, the +multidimensional transforms work by transforming along some +dimensions, then transposing to make the first dimension local and +transforming that, then transposing back. Because global +transposition of a block-distributed matrix has many other potential +uses besides FFTs, FFTW's transpose routines can be called directly, +as documented in this section. + +

    + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/FFTW-MPI-Wisdom.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/FFTW-MPI-Wisdom.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,142 @@ + + +FFTW MPI Wisdom - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    6.8 FFTW MPI Wisdom

    + +

    +FFTW's “wisdom” facility (see Words of Wisdom-Saving Plans) can +be used to save MPI plans as well as to save uniprocessor plans. +However, for MPI there are several unavoidable complications. + +

    First, the MPI standard does not guarantee that every process can +perform file I/O (at least, not using C stdio routines)—in general, +we may only assume that process 0 is capable of I/O.1 So, if we +want to export the wisdom from a single process to a file, we must +first export the wisdom to a string, then send it to process 0, then +write it to a file. + +

    Second, in principle we may want to have separate wisdom for every +process, since in general the processes may run on different hardware +even for a single MPI program. However, in practice FFTW's MPI code +is designed for the case of homogeneous hardware (see Load balancing), and in this case it is convenient to use the same wisdom +for every process. Thus, we need a mechanism to synchronize the wisdom. + +

    To address both of these problems, FFTW provides the following two +functions: + +

         void fftw_mpi_broadcast_wisdom(MPI_Comm comm);
    +     void fftw_mpi_gather_wisdom(MPI_Comm comm);
    +
    +

    +Given a communicator comm, fftw_mpi_broadcast_wisdom +will broadcast the wisdom from process 0 to all other processes. +Conversely, fftw_mpi_gather_wisdom will collect wisdom from all +processes onto process 0. (If the plans created for the same problem +by different processes are not the same, fftw_mpi_gather_wisdom +will arbitrarily choose one of the plans.) Both of these functions +may result in suboptimal plans for different processes if the +processes are running on non-identical hardware. Both of these +functions are collective calls, which means that they must be +executed by all processes in the communicator. + + +

    So, for example, a typical code snippet to import wisdom from a file +and use it on all processes would be: + +

         {
    +         int rank;
    +     
    +         fftw_mpi_init();
    +         MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    +         if (rank == 0) fftw_import_wisdom_from_filename("mywisdom");
    +         fftw_mpi_broadcast_wisdom(MPI_COMM_WORLD);
    +     }
    +
    +

    (Note that we must call fftw_mpi_init before importing any +wisdom that might contain MPI plans.) Similarly, a typical code +snippet to export wisdom from all processes to a file is: + +

         {
    +         int rank;
    +     
    +         fftw_mpi_gather_wisdom(MPI_COMM_WORLD);
    +         MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    +         if (rank == 0) fftw_export_wisdom_to_filename("mywisdom");
    +     }
    +
    + +
    +
    +

    Footnotes

    [1] In fact, +even this assumption is not technically guaranteed by the standard, +although it seems to be universal in actual MPI implementations and is +widely assumed by MPI-using software. Technically, you need to query +the MPI_IO attribute of MPI_COMM_WORLD with +MPI_Attr_get. If this attribute is MPI_PROC_NULL, no +I/O is possible. If it is MPI_ANY_SOURCE, any process can +perform I/O. Otherwise, it is the rank of a process that can perform +I/O ... but since it is not guaranteed to yield the same rank +on all processes, you have to do an MPI_Allreduce of some kind +if you want all processes to agree about which is going to do I/O. +And even then, the standard only guarantees that this process can +perform output, but not input. See e.g. Parallel Programming +with MPI by P. S. Pacheco, section 8.1.3. Needless to say, in our +experience virtually no MPI programmers worry about this.

    + +
    + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/FFTW-Reference.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/FFTW-Reference.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,75 @@ + + +FFTW Reference - FFTW 3.3.4 + + + + + + + + + + + + +
    + +

    +Next: , +Previous: Other Important Topics, +Up: Top +


    +
    + +

    4 FFTW Reference

    + +

    This chapter provides a complete reference for all sequential (i.e., +one-processor) FFTW functions. Parallel transforms are described in +later chapters. + +

    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Fixed_002dsize-Arrays-in-C.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Fixed_002dsize-Arrays-in-C.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,91 @@ + + +Fixed-size Arrays in C - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    3.2.3 Fixed-size Arrays in C

    + +

    +A multi-dimensional array whose size is declared at compile time in C +is already in row-major order. You don't have to do anything +special to transform it. For example: + +

         {
    +          fftw_complex data[N0][N1][N2];
    +          fftw_plan plan;
    +          ...
    +          plan = fftw_plan_dft_3d(N0, N1, N2, &data[0][0][0], &data[0][0][0],
    +                                  FFTW_FORWARD, FFTW_ESTIMATE);
    +          ...
    +     }
    +
    +

    This will plan a 3d in-place transform of size N0 x N1 x N2. +Notice how we took the address of the zero-th element to pass to the +planner (we could also have used a typecast). + +

    However, we tend to discourage users from declaring their +arrays in this way, for two reasons. First, this allocates the array +on the stack (“automatic” storage), which has a very limited size on +most operating systems (declaring an array with more than a few +thousand elements will often cause a crash). (You can get around this +limitation on many systems by declaring the array as +static and/or global, but that has its own drawbacks.) +Second, it may not optimally align the array for use with a SIMD +FFTW (see SIMD alignment and fftw_malloc). Instead, we recommend +using fftw_malloc, as described below. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Forgetting-Wisdom.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Forgetting-Wisdom.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,68 @@ + + +Forgetting Wisdom - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    +Next: , +Previous: Wisdom Import, +Up: Wisdom +


    +
    + +

    4.7.3 Forgetting Wisdom

    + +
         void fftw_forget_wisdom(void);
    +
    +

    +Calling fftw_forget_wisdom causes all accumulated wisdom +to be discarded and its associated memory to be freed. (New +wisdom can still be gathered subsequently, however.) + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Fortran-Examples.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Fortran-Examples.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,155 @@ + + +Fortran Examples - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    8.4 Fortran Examples

    + +

    In C, you might have something like the following to transform a +one-dimensional complex array: + +

                 fftw_complex in[N], out[N];
    +             fftw_plan plan;
    +     
    +             plan = fftw_plan_dft_1d(N,in,out,FFTW_FORWARD,FFTW_ESTIMATE);
    +             fftw_execute(plan);
    +             fftw_destroy_plan(plan);
    +
    +

    In Fortran, you would use the following to accomplish the same thing: + +

                 double complex in, out
    +             dimension in(N), out(N)
    +             integer*8 plan
    +     
    +             call dfftw_plan_dft_1d(plan,N,in,out,FFTW_FORWARD,FFTW_ESTIMATE)
    +             call dfftw_execute_dft(plan, in, out)
    +             call dfftw_destroy_plan(plan)
    +
    +

    +Notice how all routines are called as Fortran subroutines, and the +plan is returned via the first argument to dfftw_plan_dft_1d. +Notice also that we changed fftw_execute to +dfftw_execute_dft (see FFTW Execution in Fortran). To do +the same thing, but using 8 threads in parallel (see Multi-threaded FFTW), you would simply prefix these calls with: + +

                 integer iret
    +             call dfftw_init_threads(iret)
    +             call dfftw_plan_with_nthreads(8)
    +
    +

    +(You might want to check the value of iret: if it is zero, it +indicates an unlikely error during thread initialization.) + +

    To transform a three-dimensional array in-place with C, you might do: + +

                 fftw_complex arr[L][M][N];
    +             fftw_plan plan;
    +     
    +             plan = fftw_plan_dft_3d(L,M,N, arr,arr,
    +                                     FFTW_FORWARD, FFTW_ESTIMATE);
    +             fftw_execute(plan);
    +             fftw_destroy_plan(plan);
    +
    +

    In Fortran, you would use this instead: + +

                 double complex arr
    +             dimension arr(L,M,N)
    +             integer*8 plan
    +     
    +             call dfftw_plan_dft_3d(plan, L,M,N, arr,arr,
    +            &                       FFTW_FORWARD, FFTW_ESTIMATE)
    +             call dfftw_execute_dft(plan, arr, arr)
    +             call dfftw_destroy_plan(plan)
    +
    +

    +Note that we pass the array dimensions in the “natural” order in both C +and Fortran. + +

    To transform a one-dimensional real array in Fortran, you might do: + +

                 double precision in
    +             dimension in(N)
    +             double complex out
    +             dimension out(N/2 + 1)
    +             integer*8 plan
    +     
    +             call dfftw_plan_dft_r2c_1d(plan,N,in,out,FFTW_ESTIMATE)
    +             call dfftw_execute_dft_r2c(plan, in, out)
    +             call dfftw_destroy_plan(plan)
    +
    +

    +To transform a two-dimensional real array, out of place, you might use +the following: + +

                 double precision in
    +             dimension in(M,N)
    +             double complex out
    +             dimension out(M/2 + 1, N)
    +             integer*8 plan
    +     
    +             call dfftw_plan_dft_r2c_2d(plan,M,N,in,out,FFTW_ESTIMATE)
    +             call dfftw_execute_dft_r2c(plan, in, out)
    +             call dfftw_destroy_plan(plan)
    +
    +

    +Important: Notice that it is the first dimension of the +complex output array that is cut in half in Fortran, rather than the +last dimension as in C. This is a consequence of the interface routines +reversing the order of the array dimensions passed to FFTW so that the +Fortran program can use its ordinary column-major order. + + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Fortran_002dinterface-routines.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Fortran_002dinterface-routines.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,141 @@ + + +Fortran-interface routines - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    8.1 Fortran-interface routines

    + +

    Nearly all of the FFTW functions have Fortran-callable equivalents. +The name of the legacy Fortran routine is the same as that of the +corresponding C routine, but with the ‘fftw_’ prefix replaced by +‘dfftw_’.1 The single and long-double precision +versions use ‘sfftw_’ and ‘lfftw_’, respectively, instead of +‘fftwf_’ and ‘fftwl_’; quadruple precision (real*16) +is available on some systems as ‘fftwq_’ (see Precision). +(Note that long double on x86 hardware is usually at most +80-bit extended precision, not quadruple precision.) + +

    For the most part, all of the arguments to the functions are the same, +with the following exceptions: + +

      +
    • plan variables (what would be of type fftw_plan in C), +must be declared as a type that is at least as big as a pointer +(address) on your machine. We recommend using integer*8 everywhere, +since this should always be big enough. + +
    • Any function that returns a value (e.g. fftw_plan_dft) is +converted into a subroutine. The return value is converted into +an additional first parameter of this subroutine.2 + +
    • The Fortran routines expect multi-dimensional arrays to be in +column-major order, which is the ordinary format of Fortran +arrays (see Multi-dimensional Array Format). They do this +transparently and costlessly simply by reversing the order of the +dimensions passed to FFTW, but this has one important consequence for +multi-dimensional real-complex transforms, discussed below. + +
    • Wisdom import and export is somewhat more tricky because one cannot +easily pass files or strings between C and Fortran; see Wisdom of Fortran?. + +
    • Legacy Fortran cannot use the fftw_malloc dynamic-allocation routine. +If you want to exploit the SIMD FFTW (see SIMD alignment and fftw_malloc), you'll +need to figure out some other way to ensure that your arrays are at +least 16-byte aligned. + +
    • Since Fortran 77 does not have data structures, the fftw_iodim +structure from the guru interface (see Guru vector and transform sizes) must be split into separate arguments. In particular, any +fftw_iodim array arguments in the C guru interface become three +integer array arguments (n, is, and os) in the +Fortran guru interface, all of whose lengths should be equal to the +corresponding rank argument. + +
    • The guru planner interface in Fortran does not do any automatic +translation between column-major and row-major; you are responsible +for setting the strides etcetera to correspond to your Fortran arrays. +However, as a slight bug that we are preserving for backwards +compatibility, the ‘plan_guru_r2r’ in Fortran does reverse the +order of its kind array parameter, so the kind array +of that routine should be in the reverse of the order of the iodim +arrays (see above). + +
    + +

    In general, you should take care to use Fortran data types that +correspond to (i.e. are the same size as) the C types used by FFTW. +In practice, this correspondence is usually straightforward +(i.e. integer corresponds to int, real +corresponds to float, etcetera). The native Fortran +double/single-precision complex type should be compatible with +fftw_complex/fftwf_complex. Such simple correspondences +are assumed in the examples below. + + + +

    +
    +

    Footnotes

    [1] Technically, Fortran 77 identifiers are not +allowed to have more than 6 characters, nor may they contain +underscores. Any compiler that enforces this limitation doesn't +deserve to link to FFTW.

    + +

    [2] The +reason for this is that some Fortran implementations seem to have +trouble with C function return values, and vice versa.

    + +
    + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Generating-your-own-code.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Generating-your-own-code.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,95 @@ + + +Generating your own code - FFTW 3.3.4 + + + + + + + + + + + + + + +

    10.4 Generating your own code

    + +

    +The directory genfft contains the programs that were used to +generate FFTW's “codelets,” which are hard-coded transforms of small +sizes. +We do not expect casual users to employ the generator, which is a rather +sophisticated program that generates directed acyclic graphs of FFT +algorithms and performs algebraic simplifications on them. It was +written in Objective Caml, a dialect of ML, which is available at +http://caml.inria.fr/ocaml/index.en.html. + + +

    If you have Objective Caml installed (along with recent versions of +GNU autoconf, automake, and libtool), then you +can change the set of codelets that are generated or play with the +generation options. The set of generated codelets is specified by the +{dft,rdft}/{codelets,simd}/*/Makefile.am files. For example, you can add +efficient REDFT codelets of small sizes by modifying +rdft/codelets/r2r/Makefile.am. +After you modify any Makefile.am files, you can type sh +bootstrap.sh in the top-level directory followed by make to +re-generate the files. + +

    We do not provide more details about the code-generation process, since +we do not expect that most users will need to generate their own code. +However, feel free to contact us at fftw@fftw.org if +you are interested in the subject. + +

    You might find it interesting to learn Caml and/or some modern +programming techniques that we used in the generator (including monadic +programming), especially if you heard the rumor that Java and +object-oriented programming are the latest advancement in the field. +The internal operation of the codelet generator is described in the +paper, “A Fast Fourier Transform Compiler,” by M. Frigo, which is +available from the FFTW home page and also +appeared in the Proceedings of the 1999 ACM SIGPLAN Conference on +Programming Language Design and Implementation (PLDI). + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Guru-Complex-DFTs.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Guru-Complex-DFTs.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,124 @@ + + +Guru Complex DFTs - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    4.5.3 Guru Complex DFTs

    + +
         fftw_plan fftw_plan_guru_dft(
    +          int rank, const fftw_iodim *dims,
    +          int howmany_rank, const fftw_iodim *howmany_dims,
    +          fftw_complex *in, fftw_complex *out,
    +          int sign, unsigned flags);
    +     
    +     fftw_plan fftw_plan_guru_split_dft(
    +          int rank, const fftw_iodim *dims,
    +          int howmany_rank, const fftw_iodim *howmany_dims,
    +          double *ri, double *ii, double *ro, double *io,
    +          unsigned flags);
    +
    +

    +These two functions plan a complex-data, multi-dimensional DFT +for the interleaved and split format, respectively. +Transform dimensions are given by (rank, dims) over a +multi-dimensional vector (loop) of dimensions (howmany_rank, +howmany_dims). dims and howmany_dims should point +to fftw_iodim arrays of length rank and +howmany_rank, respectively. + +

    flags is a bitwise OR (‘|’) of zero or more planner flags, +as defined in Planner Flags. + +

    In the fftw_plan_guru_dft function, the pointers in and +out point to the interleaved input and output arrays, +respectively. The sign can be either -1 (= +FFTW_FORWARD) or +1 (= FFTW_BACKWARD). If the +pointers are equal, the transform is in-place. + +

    In the fftw_plan_guru_split_dft function, +ri and ii point to the real and imaginary input arrays, +and ro and io point to the real and imaginary output +arrays. The input and output pointers may be the same, indicating an +in-place transform. For example, for fftw_complex pointers +in and out, the corresponding parameters are: + +

         ri = (double *) in;
    +     ii = (double *) in + 1;
    +     ro = (double *) out;
    +     io = (double *) out + 1;
    +
    +

    Because fftw_plan_guru_split_dft accepts split arrays, strides +are expressed in units of double. For a contiguous +fftw_complex array, the overall stride of the transform should +be 2, the distance between consecutive real parts or between +consecutive imaginary parts; see Guru vector and transform sizes. Note that the dimension strides are applied equally to the +real and imaginary parts; real and imaginary arrays with different +strides are not supported. + +

    There is no sign parameter in fftw_plan_guru_split_dft. +This function always plans for an FFTW_FORWARD transform. To +plan for an FFTW_BACKWARD transform, you can exploit the +identity that the backwards DFT is equal to the forwards DFT with the +real and imaginary parts swapped. For example, in the case of the +fftw_complex arrays above, the FFTW_BACKWARD transform +is computed by the parameters: + +

         ri = (double *) in + 1;
    +     ii = (double *) in;
    +     ro = (double *) out + 1;
    +     io = (double *) out;
    +
    + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Guru-Interface.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Guru-Interface.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,83 @@ + + +Guru Interface - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    4.5 Guru Interface

    + +

    +The “guru” interface to FFTW is intended to expose as much as possible +of the flexibility in the underlying FFTW architecture. It allows one +to compute multi-dimensional “vectors” (loops) of multi-dimensional +transforms, where each vector/transform dimension has an independent +size and stride. +One can also use more general complex-number formats, e.g. separate real +and imaginary arrays. + +

    For those users who require the flexibility of the guru interface, it is +important that they pay special attention to the documentation lest they +shoot themselves in the foot. + +

    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Guru-Real_002ddata-DFTs.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Guru-Real_002ddata-DFTs.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,125 @@ + + +Guru Real-data DFTs - FFTW 3.3.4 + + + + + + + + + + + + + +
    + + +

    +Next: , +Previous: Guru Complex DFTs, +Up: Guru Interface +


    +
    + +

    4.5.4 Guru Real-data DFTs

    + +
         fftw_plan fftw_plan_guru_dft_r2c(
    +          int rank, const fftw_iodim *dims,
    +          int howmany_rank, const fftw_iodim *howmany_dims,
    +          double *in, fftw_complex *out,
    +          unsigned flags);
    +     
    +     fftw_plan fftw_plan_guru_split_dft_r2c(
    +          int rank, const fftw_iodim *dims,
    +          int howmany_rank, const fftw_iodim *howmany_dims,
    +          double *in, double *ro, double *io,
    +          unsigned flags);
    +     
    +     fftw_plan fftw_plan_guru_dft_c2r(
    +          int rank, const fftw_iodim *dims,
    +          int howmany_rank, const fftw_iodim *howmany_dims,
    +          fftw_complex *in, double *out,
    +          unsigned flags);
    +     
    +     fftw_plan fftw_plan_guru_split_dft_c2r(
    +          int rank, const fftw_iodim *dims,
    +          int howmany_rank, const fftw_iodim *howmany_dims,
    +          double *ri, double *ii, double *out,
    +          unsigned flags);
    +
    +

    +Plan a real-input (r2c) or real-output (c2r), multi-dimensional DFT with +transform dimensions given by (rank, dims) over a +multi-dimensional vector (loop) of dimensions (howmany_rank, +howmany_dims). dims and howmany_dims should point +to fftw_iodim arrays of length rank and +howmany_rank, respectively. As for the basic and advanced +interfaces, an r2c transform is FFTW_FORWARD and a c2r transform +is FFTW_BACKWARD. + +

    The last dimension of dims is interpreted specially: +that dimension of the real array has size dims[rank-1].n, but +that dimension of the complex array has size dims[rank-1].n/2+1 +(division rounded down). The strides, on the other hand, are taken to +be exactly as specified. It is up to the user to specify the strides +appropriately for the peculiar dimensions of the data, and we do not +guarantee that the planner will succeed (return non-NULL) for +any dimensions other than those described in Real-data DFT Array Format and generalized in Advanced Real-data DFTs. (That is, +for an in-place transform, each individual dimension should be able to +operate in place.) + + +

    in and out point to the input and output arrays for r2c +and c2r transforms, respectively. For split arrays, ri and +ii point to the real and imaginary input arrays for a c2r +transform, and ro and io point to the real and imaginary +output arrays for an r2c transform. in and ro or +ri and out may be the same, indicating an in-place +transform. (In-place transforms where in and io or +ii and out are the same are not currently supported.) + +

    flags is a bitwise OR (‘|’) of zero or more planner flags, +as defined in Planner Flags. + +

    In-place transforms of rank greater than 1 are currently only +supported for interleaved arrays. For split arrays, the planner will +return NULL. + + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Guru-Real_002dto_002dreal-Transforms.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Guru-Real_002dto_002dreal-Transforms.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,87 @@ + + +Guru Real-to-real Transforms - FFTW 3.3.4 + + + + + + + + + + + + + +

    + + +

    +Next: , +Previous: Guru Real-data DFTs, +Up: Guru Interface +


    +
    + +

    4.5.5 Guru Real-to-real Transforms

    + +
         fftw_plan fftw_plan_guru_r2r(int rank, const fftw_iodim *dims,
    +                                  int howmany_rank,
    +                                  const fftw_iodim *howmany_dims,
    +                                  double *in, double *out,
    +                                  const fftw_r2r_kind *kind,
    +                                  unsigned flags);
    +
    +

    +Plan a real-to-real (r2r) multi-dimensional FFTW_FORWARD +transform with transform dimensions given by (rank, dims) +over a multi-dimensional vector (loop) of dimensions +(howmany_rank, howmany_dims). dims and +howmany_dims should point to fftw_iodim arrays of length +rank and howmany_rank, respectively. + +

    The transform kind of each dimension is given by the kind +parameter, which should point to an array of length rank. Valid +fftw_r2r_kind constants are given in Real-to-Real Transform Kinds. + +

    in and out point to the real input and output arrays; they +may be the same, indicating an in-place transform. + +

    flags is a bitwise OR (‘|’) of zero or more planner flags, +as defined in Planner Flags. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Guru-vector-and-transform-sizes.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Guru-vector-and-transform-sizes.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,116 @@ + + +Guru vector and transform sizes - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    4.5.2 Guru vector and transform sizes

    + +

    The guru interface introduces one basic new data structure, +fftw_iodim, that is used to specify sizes and strides for +multi-dimensional transforms and vectors: + +

         typedef struct {
    +          int n;
    +          int is;
    +          int os;
    +     } fftw_iodim;
    +
    +

    +Here, n is the size of the dimension, and is and os +are the strides of that dimension for the input and output arrays. (The +stride is the separation of consecutive elements along this dimension.) + +

    The meaning of the stride parameter depends on the type of the array +that the stride refers to. If the array is interleaved complex, +strides are expressed in units of complex numbers +(fftw_complex). If the array is split complex or real, strides +are expressed in units of real numbers (double). This +convention is consistent with the usual pointer arithmetic in the C +language. An interleaved array is denoted by a pointer p to +fftw_complex, so that p+1 points to the next complex +number. Split arrays are denoted by pointers to double, in +which case pointer arithmetic operates in units of +sizeof(double). + + +

    The guru planner interfaces all take a (rank, dims[rank]) +pair describing the transform size, and a (howmany_rank, +howmany_dims[howmany_rank]) pair describing the “vector” size (a +multi-dimensional loop of transforms to perform), where dims and +howmany_dims are arrays of fftw_iodim. + +

    For example, the howmany parameter in the advanced complex-DFT +interface corresponds to howmany_rank = 1, +howmany_dims[0].n = howmany, howmany_dims[0].is = +idist, and howmany_dims[0].os = odist. +(To compute a single transform, you can just use howmany_rank = 0.) + +

    A row-major multidimensional array with dimensions n[rank] +(see Row-major Format) corresponds to dims[i].n = +n[i] and the recurrence dims[i].is = n[i+1] * +dims[i+1].is (similarly for os). The stride of the last +(i=rank-1) dimension is the overall stride of the array. +e.g. to be equivalent to the advanced complex-DFT interface, you would +have dims[rank-1].is = istride and +dims[rank-1].os = ostride. + + +

    In general, we only guarantee FFTW to return a non-NULL plan if +the vector and transform dimensions correspond to a set of distinct +indices, and for in-place transforms the input/output strides should +be the same. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/How-Many-Threads-to-Use_003f.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/How-Many-Threads-to-Use_003f.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,79 @@ + + +How Many Threads to Use? - FFTW 3.3.4 + + + + + + + + + + + + + +

    + + +

    +Next: , +Previous: Usage of Multi-threaded FFTW, +Up: Multi-threaded FFTW +


    +
    + +

    5.3 How Many Threads to Use?

    + +

    There is a fair amount of overhead involved in synchronizing threads, +so the optimal number of threads to use depends upon the size of the +transform as well as on the number of processors you have. + +

    As a general rule, you don't want to use more threads than you have +processors. (Using more threads will work, but there will be extra +overhead with no benefit.) In fact, if the problem size is too small, +you may want to use fewer threads than you have processors. + +

    You will have to experiment with your system to see what level of +parallelization is best for your problem size. Typically, the problem +will have to involve at least a few thousand data points before threads +become beneficial. If you plan with FFTW_PATIENT, it will +automatically disable threads for sizes that don't benefit from +parallelization. + + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Installation-and-Customization.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Installation-and-Customization.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,95 @@ + + +Installation and Customization - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    +Next: , +Previous: Upgrading from FFTW version 2, +Up: Top +


    +
    + +

    10 Installation and Customization

    + +

    +This chapter describes the installation and customization of FFTW, the +latest version of which may be downloaded from +the FFTW home page. + +

    In principle, FFTW should work on any system with an ANSI C compiler +(gcc is fine). However, planner time is drastically reduced if +FFTW can exploit a hardware cycle counter; FFTW comes with cycle-counter +support for all modern general-purpose CPUs, but you may need to add a +couple of lines of code if your compiler is not yet supported +(see Cycle Counters). (On Unix, there will be a warning at the end +of the configure output if no cycle counter is found.) + + +

    Installation of FFTW is simplest if you have a Unix or a GNU system, +such as GNU/Linux, and we describe this case in the first section below, +including the use of special configuration options to e.g. install +different precisions or exploit optimizations for particular +architectures (e.g. SIMD). Compilation on non-Unix systems is a more +manual process, but we outline the procedure in the second section. It +is also likely that pre-compiled binaries will be available for popular +systems. + +

    Finally, we describe how you can customize FFTW for particular needs by +generating codelets for fast transforms of sizes not supported +efficiently by the standard FFTW distribution. + + +

    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Installation-and-Supported-Hardware_002fSoftware.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Installation-and-Supported-Hardware_002fSoftware.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,92 @@ + + +Installation and Supported Hardware/Software - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    5.1 Installation and Supported Hardware/Software

    + +

    All of the FFTW threads code is located in the threads +subdirectory of the FFTW package. On Unix systems, the FFTW threads +libraries and header files can be automatically configured, compiled, +and installed along with the uniprocessor FFTW libraries simply by +including --enable-threads in the flags to the configure +script (see Installation on Unix), or --enable-openmp to use +OpenMP threads. + + +

    The threads routines require your operating system to have some sort +of shared-memory threads support. Specifically, the FFTW threads +package works with POSIX threads (available on most Unix variants, +from GNU/Linux to MacOS X) and Win32 threads. OpenMP threads, which +are supported in many common compilers (e.g. gcc) are also supported, +and may give better performance on some systems. (OpenMP threads are +also useful if you are employing OpenMP in your own code, in order to +minimize conflicts between threading models.) If you have a +shared-memory machine that uses a different threads API, it should be +a simple matter of programming to include support for it; see the file +threads/threads.c for more detail. + +

    You can compile FFTW with both --enable-threads and +--enable-openmp at the same time, since they install libraries +with different names (‘fftw3_threads’ and ‘fftw3_omp’, as +described below). However, your programs may only link to one +of these two libraries at a time. + +

    Ideally, of course, you should also have multiple processors in order to +get any benefit from the threaded transforms. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Installation-on-Unix.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Installation-on-Unix.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,218 @@ + + +Installation on Unix - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    10.1 Installation on Unix

    + +

    FFTW comes with a configure program in the GNU style. +Installation can be as simple as: + +

         ./configure
    +     make
    +     make install
    +
    +

    This will build the uniprocessor complex and real transform libraries +along with the test programs. (We recommend that you use GNU +make if it is available; on some systems it is called +gmake.) The “make install” command installs the fftw +and rfftw libraries in standard places, and typically requires root +privileges (unless you specify a different install directory with the +--prefix flag to configure). You can also type +“make check” to put the FFTW test programs through their paces. +If you have problems during configuration or compilation, you may want +to run “make distclean” before trying again; this ensures that +you don't have any stale files left over from previous compilation +attempts. + +

    The configure script chooses the gcc compiler by default, +if it is available; you can select some other compiler with: +

         ./configure CC="<the name of your C compiler>"
    +
    +

    The configure script knows good CFLAGS (C compiler flags) +for a few systems. If your system is not known, the configure +script will print out a warning. In this case, you should re-configure +FFTW with the command +

         ./configure CFLAGS="<write your CFLAGS here>"
    +
    +

    and then compile as usual. If you do find an optimal set of +CFLAGS for your system, please let us know what they are (along +with the output of config.guess) so that we can include them in +future releases. + +

    configure supports all the standard flags defined by the GNU +Coding Standards; see the INSTALL file in FFTW or +the GNU web page. +Note especially --help to list all flags and +--enable-shared to create shared, rather than static, libraries. +configure also accepts a few FFTW-specific flags, particularly: + +

      +
    • --enable-float: Produces a single-precision version of FFTW +(float) instead of the default double-precision (double). +See Precision. + +
    • --enable-long-double: Produces a long-double precision version of +FFTW (long double) instead of the default double-precision +(double). The configure script will halt with an error +message if long double is the same size as double on your +machine/compiler. See Precision. + +
    • --enable-quad-precision: Produces a quadruple-precision version +of FFTW using the nonstandard __float128 type provided by +gcc 4.6 or later on x86, x86-64, and Itanium architectures, +instead of the default double-precision (double). The +configure script will halt with an error message if the +compiler is not gcc version 4.6 or later or if gcc's +libquadmath library is not installed. See Precision. + +
    • --enable-threads: Enables compilation and installation of the +FFTW threads library (see Multi-threaded FFTW), which provides a +simple interface to parallel transforms for SMP systems. By default, +the threads routines are not compiled. + +
    • --enable-openmp: Like --enable-threads, but using OpenMP +compiler directives in order to induce parallelism rather than +spawning its own threads directly, and installing an ‘fftw3_omp’ library +rather than an ‘fftw3_threads’ library (see Multi-threaded FFTW). You can use both --enable-openmp and --enable-threads +since they compile/install libraries with different names. By default, +the OpenMP routines are not compiled. + +
    • --with-combined-threads: By default, if --enable-threads +is used, the threads support is compiled into a separate library that +must be linked in addition to the main FFTW library. This is so that +users of the serial library do not need to link the system threads +libraries. If --with-combined-threads is specified, however, +then no separate threads library is created, and threads are included +in the main FFTW library. This is mainly useful under Windows, where +no system threads library is required and inter-library dependencies +are problematic. + +
    • --enable-mpi: Enables compilation and installation of the FFTW +MPI library (see Distributed-memory FFTW with MPI), which provides +parallel transforms for distributed-memory systems with MPI. (By +default, the MPI routines are not compiled.) See FFTW MPI Installation. + +
    • --disable-fortran: Disables inclusion of legacy-Fortran +wrapper routines (see Calling FFTW from Legacy Fortran) in the standard +FFTW libraries. These wrapper routines increase the library size by +only a negligible amount, so they are included by default as long as +the configure script finds a Fortran compiler on your system. +(To specify a particular Fortran compiler foo, pass +F77=foo to configure.) + +
    • --with-g77-wrappers: By default, when Fortran wrappers are +included, the wrappers employ the linking conventions of the Fortran +compiler detected by the configure script. If this compiler is +GNU g77, however, then two versions of the wrappers are +included: one with g77's idiosyncratic convention of appending +two underscores to identifiers, and one with the more common +convention of appending only a single underscore. This way, the same +FFTW library will work with both g77 and other Fortran +compilers, such as GNU gfortran. However, the converse is not +true: if you configure with a different compiler, then the +g77-compatible wrappers are not included. By specifying +--with-g77-wrappers, the g77-compatible wrappers are +included in addition to wrappers for whatever Fortran compiler +configure finds. + +
    • --with-slow-timer: Disables the use of hardware cycle counters, +and falls back on gettimeofday or clock. This greatly +worsens performance, and should generally not be used (unless you don't +have a cycle counter but still really want an optimized plan regardless +of the time). See Cycle Counters. + +
    • --enable-sse, --enable-sse2, --enable-avx, +--enable-altivec, --enable-neon: Enable the compilation of +SIMD code for SSE (Pentium III+), SSE2 (Pentium IV+), AVX (Sandy Bridge, +Interlagos), AltiVec (PowerPC G4+), NEON (some ARM processors). SSE, +AltiVec, and NEON only work with --enable-float (above). SSE2 +works in both single and double precision (and is simply SSE in single +precision). The resulting code will still work on earlier CPUs +lacking the SIMD extensions (SIMD is automatically disabled, although +the FFTW library is still larger). +
        +
      • These options require a compiler supporting SIMD extensions, and +compiler support is always a bit flaky: see the FFTW FAQ for a list of +compiler versions that have problems compiling FFTW. +
      • With AltiVec and gcc, you may have to use the +-mabi=altivec option when compiling any code that links to FFTW, +in order to properly align the stack; otherwise, FFTW could crash when +it tries to use an AltiVec feature. (This is not necessary on MacOS X.) +
      • With SSE/SSE2 and gcc, you should use a version of gcc that +properly aligns the stack when compiling any code that links to FFTW. +By default, gcc 2.95 and later versions align the stack as +needed, but you should not compile FFTW with the -Os option or the +-mpreferred-stack-boundary option with an argument less than 4. +
      • Because of the large variety of ARM processors and ABIs, FFTW +does not attempt to guess the correct gcc flags for generating +NEON code. In general, you will have to provide them on the command line. +This command line is known to have worked at least once: +
                       ./configure --with-slow-timer --host=arm-linux-gnueabi \
        +                 --enable-single --enable-neon \
        +                 "CC=arm-linux-gnueabi-gcc -march=armv7-a -mfloat-abi=softfp"
        +
        +
      + +
    + +

    To force configure to use a particular C compiler foo +(instead of the default, usually gcc), pass CC=foo to the +configure script; you may also need to set the flags via the variable +CFLAGS as described above. + + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Installation-on-non_002dUnix-systems.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Installation-on-non_002dUnix-systems.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,102 @@ + + +Installation on non-Unix systems - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    10.2 Installation on non-Unix systems

    + +

    It should be relatively straightforward to compile FFTW even on non-Unix +systems lacking the niceties of a configure script. Basically, +you need to edit the config.h header (copy it from +config.h.in) to #define the various options and compiler +characteristics, and then compile all the ‘.c’ files in the +relevant directories. + +

    The config.h header contains about 100 options to set, each one +initially an #undef, each documented with a comment, and most of +them fairly obvious. For most of the options, you should simply +#define them to 1 if they are applicable, although a few +options require a particular value (e.g. SIZEOF_LONG_LONG should +be defined to the size of the long long type, in bytes, or zero +if it is not supported). We will likely post some sample +config.h files for various operating systems and compilers for +you to use (at least as a starting point). Please let us know if you +have to hand-create a configuration file (and/or a pre-compiled binary) +that you want to share. + +

    To create the FFTW library, you will then need to compile all of the +‘.c’ files in the kernel, dft, dft/scalar, +dft/scalar/codelets, rdft, rdft/scalar, +rdft/scalar/r2cf, rdft/scalar/r2cb, +rdft/scalar/r2r, reodft, and api directories. +If you are compiling with SIMD support (e.g. you defined +HAVE_SSE2 in config.h), then you also need to compile +the .c files in the simd-support, +{dft,rdft}/simd, {dft,rdft}/simd/* directories. + +

    Once these files are all compiled, link them into a library, or a shared +library, or directly into your program. + +

    To compile the FFTW test program, additionally compile the code in the +libbench2/ directory, and link it into a library. Then compile +the code in the tests/ directory and link it to the +libbench2 and FFTW libraries. To compile the fftw-wisdom +(command-line) tool (see Wisdom Utilities), compile +tools/fftw-wisdom.c and link it to the libbench2 and FFTW +libraries + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Interleaved-and-split-arrays.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Interleaved-and-split-arrays.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,81 @@ + + +Interleaved and split arrays - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    4.5.1 Interleaved and split arrays

    + +

    The guru interface supports two representations of complex numbers, +which we call the interleaved and the split format. + +

    The interleaved format is the same one used by the basic and +advanced interfaces, and it is documented in Complex numbers. +In the interleaved format, you provide pointers to the real part of a +complex number, and the imaginary part understood to be stored in the +next memory location. + + +

    The split format allows separate pointers to the real and +imaginary parts of a complex array. + + +

    Technically, the interleaved format is redundant, because you can +always express an interleaved array in terms of a split array with +appropriate pointers and strides. On the other hand, the interleaved +format is simpler to use, and it is common in practice. Hence, FFTW +supports it as a special case. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Introduction.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Introduction.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,197 @@ + + +Introduction - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    +Next: , +Previous: Top, +Up: Top +


    +
    + +

    1 Introduction

    + +

    This manual documents version 3.3.4 of FFTW, the +Fastest Fourier Transform in the West. FFTW is a comprehensive +collection of fast C routines for computing the discrete Fourier +transform (DFT) and various special cases thereof. + +

      +
    • FFTW computes the DFT of complex data, real data, even- + or odd-symmetric real data (these symmetric transforms are usually + known as the discrete cosine or sine transform, respectively), and the + discrete Hartley transform (DHT) of real data. + +
    • The input data can have arbitrary length. + FFTW employs O(n log n) algorithms for all lengths, including + prime numbers. + +
    • FFTW supports arbitrary multi-dimensional data. + +
    • FFTW supports the SSE, SSE2, AVX, Altivec, and MIPS PS instruction + sets. + +
    • FFTW includes parallel (multi-threaded) transforms + for shared-memory systems. +
    • Starting with version 3.3, FFTW includes distributed-memory parallel + transforms using MPI. +
    + +

    We assume herein that you are familiar with the properties and uses of +the DFT that are relevant to your application. Otherwise, see +e.g. The Fast Fourier Transform and Its Applications by E. O. Brigham +(Prentice-Hall, Englewood Cliffs, NJ, 1988). +Our web page also has links to FFT-related +information online. + + + + + + + + + + + + + + + + + + +

    In order to use FFTW effectively, you need to learn one basic concept +of FFTW's internal structure: FFTW does not use a fixed algorithm for +computing the transform, but instead it adapts the DFT algorithm to +details of the underlying hardware in order to maximize performance. +Hence, the computation of the transform is split into two phases. +First, FFTW's planner “learns” the fastest way to compute the +transform on your machine. The planner +produces a data structure called a plan that contains this +information. Subsequently, the plan is executed +to transform the array of input data as dictated by the plan. The +plan can be reused as many times as needed. In typical +high-performance applications, many transforms of the same size are +computed and, consequently, a relatively expensive initialization of +this sort is acceptable. On the other hand, if you need a single +transform of a given size, the one-time cost of the planner becomes +significant. For this case, FFTW provides fast planners based on +heuristics or on previously computed plans. + +

    FFTW supports transforms of data with arbitrary length, rank, +multiplicity, and a general memory layout. In simple cases, however, +this generality may be unnecessary and confusing. Consequently, we +organized the interface to FFTW into three levels of increasing +generality. +

      +
    • The basic interface computes a single + transform of contiguous data. +
    • The advanced interface computes transforms + of multiple or strided arrays. +
    • The guru interface supports the most general data + layouts, multiplicities, and strides. +
    + We expect that most users will be best served by the basic interface, +whereas the guru interface requires careful attention to the +documentation to avoid problems. + + +

    Besides the automatic performance adaptation performed by the planner, +it is also possible for advanced users to customize FFTW manually. For +example, if code space is a concern, we provide a tool that links only +the subset of FFTW needed by your application. Conversely, you may need +to extend FFTW because the standard distribution is not sufficient for +your needs. For example, the standard FFTW distribution works most +efficiently for arrays whose size can be factored into small primes +(2, 3, 5, and 7), and otherwise it uses a +slower general-purpose routine. If you need efficient transforms of +other sizes, you can use FFTW's code generator, which produces fast C +programs (“codelets”) for any particular array size you may care +about. +For example, if you need transforms of size +513 = 19*33,you can customize FFTW to support the factor 19 efficiently. + +

    For more information regarding FFTW, see the paper, “The Design and +Implementation of FFTW3,” by M. Frigo and S. G. Johnson, which was an +invited paper in Proc. IEEE 93 (2), p. 216 (2005). The +code generator is described in the paper “A fast Fourier transform +compiler”, +by M. Frigo, in the Proceedings of the 1999 ACM SIGPLAN Conference +on Programming Language Design and Implementation (PLDI), Atlanta, +Georgia, May 1999. These papers, along with the latest version of +FFTW, the FAQ, benchmarks, and other links, are available at +the FFTW home page. + +

    The current version of FFTW incorporates many good ideas from the past +thirty years of FFT literature. In one way or another, FFTW uses the +Cooley-Tukey algorithm, the prime factor algorithm, Rader's algorithm +for prime sizes, and a split-radix algorithm (with a +“conjugate-pair” variation pointed out to us by Dan Bernstein). +FFTW's code generator also produces new algorithms that we do not +completely understand. +The reader is referred to the cited papers for the appropriate +references. + +

    The rest of this manual is organized as follows. We first discuss the +sequential (single-processor) implementation. We start by describing +the basic interface/features of FFTW in Tutorial. +Next, Other Important Topics discusses data alignment +(see SIMD alignment and fftw_malloc), +the storage scheme of multi-dimensional arrays +(see Multi-dimensional Array Format), and FFTW's mechanism for +storing plans on disk (see Words of Wisdom-Saving Plans). Next, +FFTW Reference provides comprehensive documentation of all +FFTW's features. Parallel transforms are discussed in their own +chapters: Multi-threaded FFTW and Distributed-memory FFTW with MPI. Fortran programmers can also use FFTW, as described in +Calling FFTW from Legacy Fortran and Calling FFTW from Modern Fortran. Installation and Customization explains how to +install FFTW in your computer system and how to adapt FFTW to your +needs. License and copyright information is given in License and Copyright. Finally, we thank all the people who helped us in +Acknowledgments. + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Library-Index.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Library-Index.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,385 @@ + + +Library Index - FFTW 3.3.4 + + + + + + + + + + + +

    + +

    +Previous: Concept Index, +Up: Top +


    +
    + +

    14 Library Index

    + + + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/License-and-Copyright.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/License-and-Copyright.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,92 @@ + + +License and Copyright - FFTW 3.3.4 + + + + + + + + + + + + +
    + +

    +Next: , +Previous: Acknowledgments, +Up: Top +


    +
    + +

    12 License and Copyright

    + +

    FFTW is Copyright © 2003, 2007-11 Matteo Frigo, Copyright +© 2003, 2007-11 Massachusetts Institute of Technology. + +

    FFTW is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +

    This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +

    You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA You can also +find the GPL on the GNU web site. + +

    In addition, we kindly ask you to acknowledge FFTW and its authors in +any program or publication in which you use FFTW. (You are not +required to do so; it is up to your common sense to decide +whether you want to comply with this request or not.) For general +publications, we suggest referencing: Matteo Frigo and Steven +G. Johnson, “The design and implementation of FFTW3,” +Proc. IEEE 93 (2), 216–231 (2005). + +

    Non-free versions of FFTW are available under terms different from those +of the General Public License. (e.g. they do not require you to +accompany any object code using FFTW with the corresponding source +code.) For these alternative terms you must purchase a license from MIT's +Technology Licensing Office. Users interested in such a license should +contact us (fftw@fftw.org) for more information. + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Linking-and-Initializing-MPI-FFTW.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Linking-and-Initializing-MPI-FFTW.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,88 @@ + + +Linking and Initializing MPI FFTW - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    6.2 Linking and Initializing MPI FFTW

    + +

    Programs using the MPI FFTW routines should be linked with +-lfftw3_mpi -lfftw3 -lm on Unix in double precision, +-lfftw3f_mpi -lfftw3f -lm in single precision, and so on +(see Precision). You will also need to link with whatever library +is responsible for MPI on your system; in most MPI implementations, +there is a special compiler alias named mpicc to compile and +link MPI code. + + +

    Before calling any FFTW routines except possibly +fftw_init_threads (see Combining MPI and Threads), but after calling +MPI_Init, you should call the function: + +

         void fftw_mpi_init(void);
    +
    +

    +If, at the end of your program, you want to get rid of all memory and +other resources allocated internally by FFTW, for both the serial and +MPI routines, you can call: + +

         void fftw_mpi_cleanup(void);
    +
    +

    +which is much like the fftw_cleanup() function except that it +also gets rid of FFTW's MPI-related data. You must not execute +any previously created plans after calling this function. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Load-balancing.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Load-balancing.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,89 @@ + + +Load balancing - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    6.4.2 Load balancing

    + +

    +Ideally, when you parallelize a transform over some P +processes, each process should end up with work that takes equal time. +Otherwise, all of the processes end up waiting on whichever process is +slowest. This goal is known as “load balancing.” In this section, +we describe the circumstances under which FFTW is able to load-balance +well, and in particular how you should choose your transform size in +order to load balance. + +

    Load balancing is especially difficult when you are parallelizing over +heterogeneous machines; for example, if one of your processors is a +old 486 and another is a Pentium IV, obviously you should give the +Pentium more work to do than the 486 since the latter is much slower. +FFTW does not deal with this problem, however—it assumes that your +processes run on hardware of comparable speed, and that the goal is +therefore to divide the problem as equally as possible. + +

    For a multi-dimensional complex DFT, FFTW can divide the problem +equally among the processes if: (i) the first dimension +n0 is divisible by P; and (ii), the product of +the subsequent dimensions is divisible by P. (For the advanced +interface, where you can specify multiple simultaneous transforms via +some “vector” length howmany, a factor of howmany is +included in the product of the subsequent dimensions.) + +

    For a one-dimensional complex DFT, the length N of the data +should be divisible by P squared to be able to divide +the problem equally among the processes. + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/MPI-Data-Distribution-Functions.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/MPI-Data-Distribution-Functions.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,150 @@ + + +MPI Data Distribution Functions - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    +Next: , +Previous: Using MPI Plans, +Up: FFTW MPI Reference +


    +
    + +

    6.12.4 MPI Data Distribution Functions

    + +

    As described above (see MPI Data Distribution), in order to +allocate your arrays, before creating a plan, you must first +call one of the following routines to determine the required +allocation size and the portion of the array locally stored on a given +process. The MPI_Comm communicator passed here must be +equivalent to the communicator used below for plan creation. + +

    The basic interface for multidimensional transforms consists of the +functions: + +

    +

         ptrdiff_t fftw_mpi_local_size_2d(ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm,
    +                                      ptrdiff_t *local_n0, ptrdiff_t *local_0_start);
    +     ptrdiff_t fftw_mpi_local_size_3d(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2,
    +                                      MPI_Comm comm,
    +                                      ptrdiff_t *local_n0, ptrdiff_t *local_0_start);
    +     ptrdiff_t fftw_mpi_local_size(int rnk, const ptrdiff_t *n, MPI_Comm comm,
    +                                   ptrdiff_t *local_n0, ptrdiff_t *local_0_start);
    +     
    +     ptrdiff_t fftw_mpi_local_size_2d_transposed(ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm,
    +                                                 ptrdiff_t *local_n0, ptrdiff_t *local_0_start,
    +                                                 ptrdiff_t *local_n1, ptrdiff_t *local_1_start);
    +     ptrdiff_t fftw_mpi_local_size_3d_transposed(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2,
    +                                                 MPI_Comm comm,
    +                                                 ptrdiff_t *local_n0, ptrdiff_t *local_0_start,
    +                                                 ptrdiff_t *local_n1, ptrdiff_t *local_1_start);
    +     ptrdiff_t fftw_mpi_local_size_transposed(int rnk, const ptrdiff_t *n, MPI_Comm comm,
    +                                              ptrdiff_t *local_n0, ptrdiff_t *local_0_start,
    +                                              ptrdiff_t *local_n1, ptrdiff_t *local_1_start);
    +
    +

    These functions return the number of elements to allocate (complex +numbers for DFT/r2c/c2r plans, real numbers for r2r plans), whereas +the local_n0 and local_0_start return the portion +(local_0_start to local_0_start + local_n0 - 1) of the +first dimension of an n0 × n1 × n2 × … × nd-1 array that is stored on the local +process. See Basic and advanced distribution interfaces. For +FFTW_MPI_TRANSPOSED_OUT plans, the ‘_transposed’ variants +are useful in order to also return the local portion of the first +dimension in the n1 × n0 × n2 ×…× nd-1 transposed output. +See Transposed distributions. +The advanced interface for multidimensional transforms is: + +

    +

         ptrdiff_t fftw_mpi_local_size_many(int rnk, const ptrdiff_t *n, ptrdiff_t howmany,
    +                                        ptrdiff_t block0, MPI_Comm comm,
    +                                        ptrdiff_t *local_n0, ptrdiff_t *local_0_start);
    +     ptrdiff_t fftw_mpi_local_size_many_transposed(int rnk, const ptrdiff_t *n, ptrdiff_t howmany,
    +                                                   ptrdiff_t block0, ptrdiff_t block1, MPI_Comm comm,
    +                                                   ptrdiff_t *local_n0, ptrdiff_t *local_0_start,
    +                                                   ptrdiff_t *local_n1, ptrdiff_t *local_1_start);
    +
    +

    These differ from the basic interface in only two ways. First, they +allow you to specify block sizes block0 and block1 (the +latter for the transposed output); you can pass +FFTW_MPI_DEFAULT_BLOCK to use FFTW's default block size as in +the basic interface. Second, you can pass a howmany parameter, +corresponding to the advanced planning interface below: this is for +transforms of contiguous howmany-tuples of numbers +(howmany = 1 in the basic interface). + +

    The corresponding basic and advanced routines for one-dimensional +transforms (currently only complex DFTs) are: + +

    +

         ptrdiff_t fftw_mpi_local_size_1d(
    +                  ptrdiff_t n0, MPI_Comm comm, int sign, unsigned flags,
    +                  ptrdiff_t *local_ni, ptrdiff_t *local_i_start,
    +                  ptrdiff_t *local_no, ptrdiff_t *local_o_start);
    +     ptrdiff_t fftw_mpi_local_size_many_1d(
    +                  ptrdiff_t n0, ptrdiff_t howmany,
    +                  MPI_Comm comm, int sign, unsigned flags,
    +                  ptrdiff_t *local_ni, ptrdiff_t *local_i_start,
    +                  ptrdiff_t *local_no, ptrdiff_t *local_o_start);
    +
    +

    As above, the return value is the number of elements to allocate +(complex numbers, for complex DFTs). The local_ni and +local_i_start arguments return the portion +(local_i_start to local_i_start + local_ni - 1) of the +1d array that is stored on this process for the transform +input, and local_no and local_o_start are the +corresponding quantities for the input. The sign +(FFTW_FORWARD or FFTW_BACKWARD) and flags must +match the arguments passed when creating a plan. Although the inputs +and outputs have different data distributions in general, it is +guaranteed that the output data distribution of an +FFTW_FORWARD plan will match the input data distribution +of an FFTW_BACKWARD plan and vice versa; similarly for the +FFTW_MPI_SCRAMBLED_OUT and FFTW_MPI_SCRAMBLED_IN flags. +See One-dimensional distributions. + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/MPI-Data-Distribution.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/MPI-Data-Distribution.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,113 @@ + + +MPI Data Distribution - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    6.4 MPI Data Distribution

    + +

    +The most important concept to understand in using FFTW's MPI interface +is the data distribution. With a serial or multithreaded FFT, all of +the inputs and outputs are stored as a single contiguous chunk of +memory. With a distributed-memory FFT, the inputs and outputs are +broken into disjoint blocks, one per process. + +

    In particular, FFTW uses a 1d block distribution of the data, +distributed along the first dimension. For example, if you +want to perform a 100 × 200 complex DFT, distributed over 4 +processes, each process will get a 25 × 200 slice of the data. +That is, process 0 will get rows 0 through 24, process 1 will get rows +25 through 49, process 2 will get rows 50 through 74, and process 3 +will get rows 75 through 99. If you take the same array but +distribute it over 3 processes, then it is not evenly divisible so the +different processes will have unequal chunks. FFTW's default choice +in this case is to assign 34 rows to processes 0 and 1, and 32 rows to +process 2. + + +

    FFTW provides several ‘fftw_mpi_local_size’ routines that you can +call to find out what portion of an array is stored on the current +process. In most cases, you should use the default block sizes picked +by FFTW, but it is also possible to specify your own block size. For +example, with a 100 × 200 array on three processes, you can +tell FFTW to use a block size of 40, which would assign 40 rows to +processes 0 and 1, and 20 rows to process 2. FFTW's default is to +divide the data equally among the processes if possible, and as best +it can otherwise. The rows are always assigned in “rank order,” +i.e. process 0 gets the first block of rows, then process 1, and so +on. (You can change this by using MPI_Comm_split to create a +new communicator with re-ordered processes.) However, you should +always call the ‘fftw_mpi_local_size’ routines, if possible, +rather than trying to predict FFTW's distribution choices. + +

    In particular, it is critical that you allocate the storage size that +is returned by ‘fftw_mpi_local_size’, which is not +necessarily the size of the local slice of the array. The reason is +that intermediate steps of FFTW's algorithms involve transposing the +array and redistributing the data, so at these intermediate steps FFTW +may require more local storage space (albeit always proportional to +the total size divided by the number of processes). The +‘fftw_mpi_local_size’ functions know how much storage is required +for these intermediate steps and tell you the correct amount to +allocate. + +

    + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/MPI-Files-and-Data-Types.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/MPI-Files-and-Data-Types.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,79 @@ + + +MPI Files and Data Types - FFTW 3.3.4 + + + + + + + + + + + + + +
    + +

    +Next: , +Previous: FFTW MPI Reference, +Up: FFTW MPI Reference +


    +
    + +

    6.12.1 MPI Files and Data Types

    + +

    All programs using FFTW's MPI support should include its header file: + +

         #include <fftw3-mpi.h>
    +
    +

    Note that this header file includes the serial-FFTW fftw3.h +header file, and also the mpi.h header file for MPI, so you +need not include those files separately. + +

    You must also link to both the FFTW MPI library and to the +serial FFTW library. On Unix, this means adding -lfftw3_mpi +-lfftw3 -lm at the end of the link command. + +

    Different precisions are handled as in the serial interface: +See Precision. That is, ‘fftw_’ functions become +fftwf_ (in single precision) etcetera, and the libraries become +-lfftw3f_mpi -lfftw3f -lm etcetera on Unix. Long-double +precision is supported in MPI, but quad precision (‘fftwq_’) is +not due to the lack of MPI support for this type. + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/MPI-Initialization.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/MPI-Initialization.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,83 @@ + + +MPI Initialization - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    +Next: , +Previous: MPI Files and Data Types, +Up: FFTW MPI Reference +


    +
    + +

    6.12.2 MPI Initialization

    + +

    Before calling any other FFTW MPI (‘fftw_mpi_’) function, and +before importing any wisdom for MPI problems, you must call: + +

    +

         void fftw_mpi_init(void);
    +
    +

    If FFTW threads support is used, however, fftw_mpi_init should +be called after fftw_init_threads (see Combining MPI and Threads). Calling fftw_mpi_init additional times (before +fftw_mpi_cleanup) has no effect. + +

    If you want to deallocate all persistent data and reset FFTW to the +pristine state it was in when you started your program, you can call: + +

    +

         void fftw_mpi_cleanup(void);
    +
    +

    (This calls fftw_cleanup, so you need not call the serial +cleanup routine too, although it is safe to do so.) After calling +fftw_mpi_cleanup, all existing plans become undefined, and you +should not attempt to execute or destroy them. You must call +fftw_mpi_init again after fftw_mpi_cleanup if you want +to resume using the MPI FFTW routines. + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/MPI-Plan-Creation.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/MPI-Plan-Creation.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,253 @@ + + +MPI Plan Creation - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    6.12.5 MPI Plan Creation

    + +
    Complex-data MPI DFTs
    + +

    Plans for complex-data DFTs (see 2d MPI example) are created by: + +

    +

         fftw_plan fftw_mpi_plan_dft_1d(ptrdiff_t n0, fftw_complex *in, fftw_complex *out,
    +                                    MPI_Comm comm, int sign, unsigned flags);
    +     fftw_plan fftw_mpi_plan_dft_2d(ptrdiff_t n0, ptrdiff_t n1,
    +                                    fftw_complex *in, fftw_complex *out,
    +                                    MPI_Comm comm, int sign, unsigned flags);
    +     fftw_plan fftw_mpi_plan_dft_3d(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2,
    +                                    fftw_complex *in, fftw_complex *out,
    +                                    MPI_Comm comm, int sign, unsigned flags);
    +     fftw_plan fftw_mpi_plan_dft(int rnk, const ptrdiff_t *n,
    +                                 fftw_complex *in, fftw_complex *out,
    +                                 MPI_Comm comm, int sign, unsigned flags);
    +     fftw_plan fftw_mpi_plan_many_dft(int rnk, const ptrdiff_t *n,
    +                                      ptrdiff_t howmany, ptrdiff_t block, ptrdiff_t tblock,
    +                                      fftw_complex *in, fftw_complex *out,
    +                                      MPI_Comm comm, int sign, unsigned flags);
    +
    +

    These are similar to their serial counterparts (see Complex DFTs) +in specifying the dimensions, sign, and flags of the transform. The +comm argument gives an MPI communicator that specifies the set +of processes to participate in the transform; plan creation is a +collective function that must be called for all processes in the +communicator. The in and out pointers refer only to a +portion of the overall transform data (see MPI Data Distribution) +as specified by the ‘local_size’ functions in the previous +section. Unless flags contains FFTW_ESTIMATE, these +arrays are overwritten during plan creation as for the serial +interface. For multi-dimensional transforms, any dimensions > +1 are supported; for one-dimensional transforms, only composite +(non-prime) n0 are currently supported (unlike the serial +FFTW). Requesting an unsupported transform size will yield a +NULL plan. (As in the serial interface, highly composite sizes +generally yield the best performance.) + +

    The advanced-interface fftw_mpi_plan_many_dft additionally +allows you to specify the block sizes for the first dimension +(block) of the n0 × n1 × n2 × … × nd-1 input data and the first dimension +(tblock) of the n1 × n0 × n2 ×…× nd-1 transposed data (at intermediate +steps of the transform, and for the output if +FFTW_TRANSPOSED_OUT is specified in flags). These must +be the same block sizes as were passed to the corresponding +‘local_size’ function; you can pass FFTW_MPI_DEFAULT_BLOCK +to use FFTW's default block size as in the basic interface. Also, the +howmany parameter specifies that the transform is of contiguous +howmany-tuples rather than individual complex numbers; this +corresponds to the same parameter in the serial advanced interface +(see Advanced Complex DFTs) with stride = howmany and +dist = 1. + +

    MPI flags
    + +

    The flags can be any of those for the serial FFTW +(see Planner Flags), and in addition may include one or more of +the following MPI-specific flags, which improve performance at the +cost of changing the output or input data formats. + +

      +
    • FFTW_MPI_SCRAMBLED_OUT, FFTW_MPI_SCRAMBLED_IN: valid for +1d transforms only, these flags indicate that the output/input of the +transform are in an undocumented “scrambled” order. A forward +FFTW_MPI_SCRAMBLED_OUT transform can be inverted by a backward +FFTW_MPI_SCRAMBLED_IN (times the usual 1/N normalization). +See One-dimensional distributions. + +
    • FFTW_MPI_TRANSPOSED_OUT, FFTW_MPI_TRANSPOSED_IN: valid +for multidimensional (rnk > 1) transforms only, these flags +specify that the output or input of an n0 × n1 × n2 × … × nd-1 transform is +transposed to n1 × n0 × n2 ×…× nd-1. See Transposed distributions. + +
    + +
    Real-data MPI DFTs
    + +

    Plans for real-input/output (r2c/c2r) DFTs (see Multi-dimensional MPI DFTs of Real Data) are created by: + +

    +

         fftw_plan fftw_mpi_plan_dft_r2c_2d(ptrdiff_t n0, ptrdiff_t n1,
    +                                        double *in, fftw_complex *out,
    +                                        MPI_Comm comm, unsigned flags);
    +     fftw_plan fftw_mpi_plan_dft_r2c_2d(ptrdiff_t n0, ptrdiff_t n1,
    +                                        double *in, fftw_complex *out,
    +                                        MPI_Comm comm, unsigned flags);
    +     fftw_plan fftw_mpi_plan_dft_r2c_3d(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2,
    +                                        double *in, fftw_complex *out,
    +                                        MPI_Comm comm, unsigned flags);
    +     fftw_plan fftw_mpi_plan_dft_r2c(int rnk, const ptrdiff_t *n,
    +                                     double *in, fftw_complex *out,
    +                                     MPI_Comm comm, unsigned flags);
    +     fftw_plan fftw_mpi_plan_dft_c2r_2d(ptrdiff_t n0, ptrdiff_t n1,
    +                                        fftw_complex *in, double *out,
    +                                        MPI_Comm comm, unsigned flags);
    +     fftw_plan fftw_mpi_plan_dft_c2r_2d(ptrdiff_t n0, ptrdiff_t n1,
    +                                        fftw_complex *in, double *out,
    +                                        MPI_Comm comm, unsigned flags);
    +     fftw_plan fftw_mpi_plan_dft_c2r_3d(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2,
    +                                        fftw_complex *in, double *out,
    +                                        MPI_Comm comm, unsigned flags);
    +     fftw_plan fftw_mpi_plan_dft_c2r(int rnk, const ptrdiff_t *n,
    +                                     fftw_complex *in, double *out,
    +                                     MPI_Comm comm, unsigned flags);
    +
    +

    Similar to the serial interface (see Real-data DFTs), these +transform logically n0 × n1 × n2 × … × nd-1 real data to/from n0 × n1 × n2 × … × (nd-1/2 + 1) complex +data, representing the non-redundant half of the conjugate-symmetry +output of a real-input DFT (see Multi-dimensional Transforms). +However, the real array must be stored within a padded n0 × n1 × n2 × … × [2 (nd-1/2 + 1)] + +

    array (much like the in-place serial r2c transforms, but here for +out-of-place transforms as well). Currently, only multi-dimensional +(rnk > 1) r2c/c2r transforms are supported (requesting a plan +for rnk = 1 will yield NULL). As explained above +(see Multi-dimensional MPI DFTs of Real Data), the data +distribution of both the real and complex arrays is given by the +‘local_size’ function called for the dimensions of the +complex array. Similar to the other planning functions, the +input and output arrays are overwritten when the plan is created +except in FFTW_ESTIMATE mode. + +

    As for the complex DFTs above, there is an advance interface that +allows you to manually specify block sizes and to transform contiguous +howmany-tuples of real/complex numbers: + +

    +

         fftw_plan fftw_mpi_plan_many_dft_r2c
    +                   (int rnk, const ptrdiff_t *n, ptrdiff_t howmany,
    +                    ptrdiff_t iblock, ptrdiff_t oblock,
    +                    double *in, fftw_complex *out,
    +                    MPI_Comm comm, unsigned flags);
    +     fftw_plan fftw_mpi_plan_many_dft_c2r
    +                   (int rnk, const ptrdiff_t *n, ptrdiff_t howmany,
    +                    ptrdiff_t iblock, ptrdiff_t oblock,
    +                    fftw_complex *in, double *out,
    +                    MPI_Comm comm, unsigned flags);
    +
    +
    MPI r2r transforms
    + +

    There are corresponding plan-creation routines for r2r +transforms (see More DFTs of Real Data), currently supporting +multidimensional (rnk > 1) transforms only (rnk = 1 will +yield a NULL plan): + +

         fftw_plan fftw_mpi_plan_r2r_2d(ptrdiff_t n0, ptrdiff_t n1,
    +                                    double *in, double *out,
    +                                    MPI_Comm comm,
    +                                    fftw_r2r_kind kind0, fftw_r2r_kind kind1,
    +                                    unsigned flags);
    +     fftw_plan fftw_mpi_plan_r2r_3d(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2,
    +                                    double *in, double *out,
    +                                    MPI_Comm comm,
    +                                    fftw_r2r_kind kind0, fftw_r2r_kind kind1, fftw_r2r_kind kind2,
    +                                    unsigned flags);
    +     fftw_plan fftw_mpi_plan_r2r(int rnk, const ptrdiff_t *n,
    +                                 double *in, double *out,
    +                                 MPI_Comm comm, const fftw_r2r_kind *kind,
    +                                 unsigned flags);
    +     fftw_plan fftw_mpi_plan_many_r2r(int rnk, const ptrdiff_t *n,
    +                                      ptrdiff_t iblock, ptrdiff_t oblock,
    +                                      double *in, double *out,
    +                                      MPI_Comm comm, const fftw_r2r_kind *kind,
    +                                      unsigned flags);
    +
    +

    The parameters are much the same as for the complex DFTs above, except +that the arrays are of real numbers (and hence the outputs of the +‘local_size’ data-distribution functions should be interpreted as +counts of real rather than complex numbers). Also, the kind +parameters specify the r2r kinds along each dimension as for the +serial interface (see Real-to-Real Transform Kinds). See Other Multi-dimensional Real-data MPI Transforms. + +

    MPI transposition
    + +

    +FFTW also provides routines to plan a transpose of a distributed +n0 by n1 array of real numbers, or an array of +howmany-tuples of real numbers with specified block sizes +(see FFTW MPI Transposes): + +

    +

         fftw_plan fftw_mpi_plan_transpose(ptrdiff_t n0, ptrdiff_t n1,
    +                                       double *in, double *out,
    +                                       MPI_Comm comm, unsigned flags);
    +     fftw_plan fftw_mpi_plan_many_transpose
    +                     (ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t howmany,
    +                      ptrdiff_t block0, ptrdiff_t block1,
    +                      double *in, double *out, MPI_Comm comm, unsigned flags);
    +
    +

    These plans are used with the fftw_mpi_execute_r2r new-array +execute function (see Using MPI Plans), since they count as (rank +zero) r2r plans from FFTW's perspective. + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/MPI-Wisdom-Communication.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/MPI-Wisdom-Communication.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,79 @@ + + +MPI Wisdom Communication - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    +Previous: MPI Plan Creation, +Up: FFTW MPI Reference +


    +
    + +

    6.12.6 MPI Wisdom Communication

    + +

    To facilitate synchronizing wisdom among the different MPI processes, +we provide two functions: + +

    +

         void fftw_mpi_gather_wisdom(MPI_Comm comm);
    +     void fftw_mpi_broadcast_wisdom(MPI_Comm comm);
    +
    +

    The fftw_mpi_gather_wisdom function gathers all wisdom in the +given communicator comm to the process of rank 0 in the +communicator: that process obtains the union of all wisdom on all the +processes. As a side effect, some other processes will gain +additional wisdom from other processes, but only process 0 will gain +the complete union. + +

    The fftw_mpi_broadcast_wisdom does the reverse: it exports +wisdom from process 0 in comm to all other processes in the +communicator, replacing any wisdom they currently have. + +

    See FFTW MPI Wisdom. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Memory-Allocation.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Memory-Allocation.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,98 @@ + + +Memory Allocation - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    +Previous: Precision, +Up: Data Types and Files +


    +
    + +

    4.1.3 Memory Allocation

    + +
         void *fftw_malloc(size_t n);
    +     void fftw_free(void *p);
    +
    +

    +These are functions that behave identically to malloc and +free, except that they guarantee that the returned pointer obeys +any special alignment restrictions imposed by any algorithm in FFTW +(e.g. for SIMD acceleration). See SIMD alignment and fftw_malloc. + + +

    Data allocated by fftw_malloc must be deallocated by +fftw_free and not by the ordinary free. + +

    These routines simply call through to your operating system's +malloc or, if necessary, its aligned equivalent +(e.g. memalign), so you normally need not worry about any +significant time or space overhead. You are not required to use +them to allocate your data, but we strongly recommend it. + +

    Note: in C++, just as with ordinary malloc, you must typecast +the output of fftw_malloc to whatever pointer type you are +allocating. + + +

    We also provide the following two convenience functions to allocate +real and complex arrays with n elements, which are equivalent +to (double *) fftw_malloc(sizeof(double) * n) and +(fftw_complex *) fftw_malloc(sizeof(fftw_complex) * n), +respectively: + +

         double *fftw_alloc_real(size_t n);
    +     fftw_complex *fftw_alloc_complex(size_t n);
    +
    +

    +The equivalent functions in other precisions allocate arrays of n +elements in that precision. e.g. fftwf_alloc_real(n) is +equivalent to (float *) fftwf_malloc(sizeof(float) * n). + + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/More-DFTs-of-Real-Data.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/More-DFTs-of-Real-Data.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,127 @@ + + +More DFTs of Real Data - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    +Previous: Multi-Dimensional DFTs of Real Data, +Up: Tutorial +


    +
    + +

    2.5 More DFTs of Real Data

    + + + +

    FFTW supports several other transform types via a unified r2r +(real-to-real) interface, +so called because it takes a real (double) array and outputs a +real array of the same size. These r2r transforms currently fall into +three categories: DFTs of real input and complex-Hermitian output in +halfcomplex format, DFTs of real input with even/odd symmetry +(a.k.a. discrete cosine/sine transforms, DCTs/DSTs), and discrete +Hartley transforms (DHTs), all described in more detail by the +following sections. + +

    The r2r transforms follow the by now familiar interface of creating an +fftw_plan, executing it with fftw_execute(plan), and +destroying it with fftw_destroy_plan(plan). Furthermore, all +r2r transforms share the same planner interface: + +

         fftw_plan fftw_plan_r2r_1d(int n, double *in, double *out,
    +                                fftw_r2r_kind kind, unsigned flags);
    +     fftw_plan fftw_plan_r2r_2d(int n0, int n1, double *in, double *out,
    +                                fftw_r2r_kind kind0, fftw_r2r_kind kind1,
    +                                unsigned flags);
    +     fftw_plan fftw_plan_r2r_3d(int n0, int n1, int n2,
    +                                double *in, double *out,
    +                                fftw_r2r_kind kind0,
    +                                fftw_r2r_kind kind1,
    +                                fftw_r2r_kind kind2,
    +                                unsigned flags);
    +     fftw_plan fftw_plan_r2r(int rank, const int *n, double *in, double *out,
    +                             const fftw_r2r_kind *kind, unsigned flags);
    +
    +

    +Just as for the complex DFT, these plan 1d/2d/3d/multi-dimensional +transforms for contiguous arrays in row-major order, transforming (real) +input to output of the same size, where n specifies the +physical dimensions of the arrays. All positive n are +supported (with the exception of n=1 for the FFTW_REDFT00 +kind, noted in the real-even subsection below); products of small +factors are most efficient (factorizing n-1 and n+1 for +FFTW_REDFT00 and FFTW_RODFT00 kinds, described below), but +an O(n log n) algorithm is used even for prime sizes. + +

    Each dimension has a kind parameter, of type +fftw_r2r_kind, specifying the kind of r2r transform to be used +for that dimension. +(In the case of fftw_plan_r2r, this is an array kind[rank] +where kind[i] is the transform kind for the dimension +n[i].) The kind can be one of a set of predefined constants, +defined in the following subsections. + +

    In other words, FFTW computes the separable product of the specified +r2r transforms over each dimension, which can be used e.g. for partial +differential equations with mixed boundary conditions. (For some r2r +kinds, notably the halfcomplex DFT and the DHT, such a separable +product is somewhat problematic in more than one dimension, however, +as is described below.) + +

    In the current version of FFTW, all r2r transforms except for the +halfcomplex type are computed via pre- or post-processing of +halfcomplex transforms, and they are therefore not as fast as they +could be. Since most other general DCT/DST codes employ a similar +algorithm, however, FFTW's implementation should provide at least +competitive performance. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Multi_002dDimensional-DFTs-of-Real-Data.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Multi_002dDimensional-DFTs-of-Real-Data.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,137 @@ + + +Multi-Dimensional DFTs of Real Data - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    2.4 Multi-Dimensional DFTs of Real Data

    + +

    Multi-dimensional DFTs of real data use the following planner routines: + +

         fftw_plan fftw_plan_dft_r2c_2d(int n0, int n1,
    +                                    double *in, fftw_complex *out,
    +                                    unsigned flags);
    +     fftw_plan fftw_plan_dft_r2c_3d(int n0, int n1, int n2,
    +                                    double *in, fftw_complex *out,
    +                                    unsigned flags);
    +     fftw_plan fftw_plan_dft_r2c(int rank, const int *n,
    +                                 double *in, fftw_complex *out,
    +                                 unsigned flags);
    +
    +

    +as well as the corresponding c2r routines with the input/output +types swapped. These routines work similarly to their complex +analogues, except for the fact that here the complex output array is cut +roughly in half and the real array requires padding for in-place +transforms (as in 1d, above). + +

    As before, n is the logical size of the array, and the +consequences of this on the the format of the complex arrays deserve +careful attention. +Suppose that the real data has dimensions n0 × n1 × n2 × … × nd-1 (in row-major order). +Then, after an r2c transform, the output is an n0 × n1 × n2 × … × (nd-1/2 + 1) array of +fftw_complex values in row-major order, corresponding to slightly +over half of the output of the corresponding complex DFT. (The division +is rounded down.) The ordering of the data is otherwise exactly the +same as in the complex-DFT case. + +

    For out-of-place transforms, this is the end of the story: the real +data is stored as a row-major array of size n0 × n1 × n2 × … × nd-1 and the complex +data is stored as a row-major array of size n0 × n1 × n2 × … × (nd-1/2 + 1). + +

    For in-place transforms, however, extra padding of the real-data array +is necessary because the complex array is larger than the real array, +and the two arrays share the same memory locations. Thus, for +in-place transforms, the final dimension of the real-data array must +be padded with extra values to accommodate the size of the complex +data—two values if the last dimension is even and one if it is odd. +That is, the last dimension of the real data must physically contain +2 * (nd-1/2+1)double values (exactly enough to hold the complex data). +This physical array size does not, however, change the logical +array size—only +nd-1values are actually stored in the last dimension, and +nd-1is the last dimension passed to the plan-creation routine. + +

    For example, consider the transform of a two-dimensional real array of +size n0 by n1. The output of the r2c transform is a +two-dimensional complex array of size n0 by n1/2+1, where +the y dimension has been cut nearly in half because of +redundancies in the output. Because fftw_complex is twice the +size of double, the output array is slightly bigger than the +input array. Thus, if we want to compute the transform in place, we +must pad the input array so that it is of size n0 by +2*(n1/2+1). If n1 is even, then there are two padding +elements at the end of each row (which need not be initialized, as they +are only used for output). + +

    The following illustration depicts the input and output arrays just +described, for both the out-of-place and in-place transforms (with the +arrows indicating consecutive memory locations): +rfftwnd-for-html.png + +

    These transforms are unnormalized, so an r2c followed by a c2r +transform (or vice versa) will result in the original data scaled by +the number of real data elements—that is, the product of the +(logical) dimensions of the real data. + + +

    (Because the last dimension is treated specially, if it is equal to +1 the transform is not equivalent to a lower-dimensional +r2c/c2r transform. In that case, the last complex dimension also has +size 1 (=1/2+1), and no advantage is gained over the +complex transforms.) + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Multi_002ddimensional-Array-Format.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Multi_002ddimensional-Array-Format.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,75 @@ + + +Multi-dimensional Array Format - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    3.2 Multi-dimensional Array Format

    + +

    This section describes the format in which multi-dimensional arrays +are stored in FFTW. We felt that a detailed discussion of this topic +was necessary. Since several different formats are common, this topic +is often a source of confusion. + +

    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Multi_002ddimensional-MPI-DFTs-of-Real-Data.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Multi_002ddimensional-MPI-DFTs-of-Real-Data.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,158 @@ + + +Multi-dimensional MPI DFTs of Real Data - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    6.5 Multi-dimensional MPI DFTs of Real Data

    + +

    FFTW's MPI interface also supports multi-dimensional DFTs of real +data, similar to the serial r2c and c2r interfaces. (Parallel +one-dimensional real-data DFTs are not currently supported; you must +use a complex transform and set the imaginary parts of the inputs to +zero.) + +

    The key points to understand for r2c and c2r MPI transforms (compared +to the MPI complex DFTs or the serial r2c/c2r transforms), are: + +

      +
    • Just as for serial transforms, r2c/c2r DFTs transform n0 × n1 × n2 × … × nd-1 real +data to/from n0 × n1 × n2 × … × (nd-1/2 + 1) complex data: the last dimension of the +complex data is cut in half (rounded down), plus one. As for the +serial transforms, the sizes you pass to the ‘plan_dft_r2c’ and +‘plan_dft_c2r’ are the n0 × n1 × n2 × … × nd-1 dimensions of the real data. + +
    • Although the real data is conceptually n0 × n1 × n2 × … × nd-1, it is +physically stored as an n0 × n1 × n2 × … × [2 (nd-1/2 + 1)] array, where the last +dimension has been padded to make it the same size as the +complex output. This is much like the in-place serial r2c/c2r +interface (see Multi-Dimensional DFTs of Real Data), except that +in MPI the padding is required even for out-of-place data. The extra +padding numbers are ignored by FFTW (they are not like +zero-padding the transform to a larger size); they are only used to +determine the data layout. + +
    • The data distribution in MPI for both the real and complex data +is determined by the shape of the complex data. That is, you +call the appropriate ‘local size’ function for the n0 × n1 × n2 × … × (nd-1/2 + 1) + +

      complex data, and then use the same distribution for the real +data except that the last complex dimension is replaced by a (padded) +real dimension of twice the length. + +

    + +

    For example suppose we are performing an out-of-place r2c transform of +L × M × N real data [padded to L × M × 2(N/2+1)], +resulting in L × M × N/2+1 complex data. Similar to the +example in 2d MPI example, we might do something like: + +

         #include <fftw3-mpi.h>
    +     
    +     int main(int argc, char **argv)
    +     {
    +         const ptrdiff_t L = ..., M = ..., N = ...;
    +         fftw_plan plan;
    +         double *rin;
    +         fftw_complex *cout;
    +         ptrdiff_t alloc_local, local_n0, local_0_start, i, j, k;
    +     
    +         MPI_Init(&argc, &argv);
    +         fftw_mpi_init();
    +     
    +         /* get local data size and allocate */
    +         alloc_local = fftw_mpi_local_size_3d(L, M, N/2+1, MPI_COMM_WORLD,
    +                                              &local_n0, &local_0_start);
    +         rin = fftw_alloc_real(2 * alloc_local);
    +         cout = fftw_alloc_complex(alloc_local);
    +     
    +         /* create plan for out-of-place r2c DFT */
    +         plan = fftw_mpi_plan_dft_r2c_3d(L, M, N, rin, cout, MPI_COMM_WORLD,
    +                                         FFTW_MEASURE);
    +     
    +         /* initialize rin to some function my_func(x,y,z) */
    +         for (i = 0; i < local_n0; ++i)
    +            for (j = 0; j < M; ++j)
    +              for (k = 0; k < N; ++k)
    +            rin[(i*M + j) * (2*(N/2+1)) + k] = my_func(local_0_start+i, j, k);
    +     
    +         /* compute transforms as many times as desired */
    +         fftw_execute(plan);
    +     
    +         fftw_destroy_plan(plan);
    +     
    +         MPI_Finalize();
    +     }
    +
    +

    Note that we allocated rin using fftw_alloc_real with an +argument of 2 * alloc_local: since alloc_local is the +number of complex values to allocate, the number of real +values is twice as many. The rin array is then +local_n0 × M × 2(N/2+1) in row-major order, so its +(i,j,k) element is at the index (i*M + j) * (2*(N/2+1)) + +k (see Multi-dimensional Array Format). + +

    As for the complex transforms, improved performance can be obtained by +specifying that the output is the transpose of the input or vice versa +(see Transposed distributions). In our L × M × N r2c +example, including FFTW_TRANSPOSED_OUT in the flags means that +the input would be a padded L × M × 2(N/2+1) real array +distributed over the L dimension, while the output would be a +M × L × N/2+1 complex array distributed over the M +dimension. To perform the inverse c2r transform with the same data +distributions, you would use the FFTW_TRANSPOSED_IN flag. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Multi_002ddimensional-Transforms.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Multi_002ddimensional-Transforms.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,100 @@ + + +Multi-dimensional Transforms - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    4.8.6 Multi-dimensional Transforms

    + +

    The multi-dimensional transforms of FFTW, in general, compute simply the +separable product of the given 1d transform along each dimension of the +array. Since each of these transforms is unnormalized, computing the +forward followed by the backward/inverse multi-dimensional transform +will result in the original array scaled by the product of the +normalization factors for each dimension (e.g. the product of the +dimension sizes, for a multi-dimensional DFT). + +

    The definition of FFTW's multi-dimensional DFT of real data (r2c) +deserves special attention. In this case, we logically compute the full +multi-dimensional DFT of the input data; since the input data are purely +real, the output data have the Hermitian symmetry and therefore only one +non-redundant half need be stored. More specifically, for an n0 × n1 × n2 × … × nd-1 multi-dimensional real-input DFT, the full (logical) complex output array +Y[k0, k1, ..., +kd-1]has the symmetry: +Y[k0, k1, ..., +kd-1] = Y[n0 - +k0, n1 - k1, ..., +nd-1 - kd-1]*(where each dimension is periodic). Because of this symmetry, we only +store the +kd-1 = 0...nd-1/2+1elements of the last dimension (division by 2 is rounded +down). (We could instead have cut any other dimension in half, but the +last dimension proved computationally convenient.) This results in the +peculiar array format described in more detail by Real-data DFT Array Format. + +

    The multi-dimensional c2r transform is simply the unnormalized inverse +of the r2c transform. i.e. it is the same as FFTW's complex backward +multi-dimensional DFT, operating on a Hermitian input array in the +peculiar format mentioned above and outputting a real array (since the +DFT output is purely real). + +

    We should remind the user that the separable product of 1d transforms +along each dimension, as computed by FFTW, is not always the same thing +as the usual multi-dimensional transform. A multi-dimensional +R2HC (or HC2R) transform is not identical to the +multi-dimensional DFT, requiring some post-processing to combine the +requisite real and imaginary parts, as was described in The Halfcomplex-format DFT. Likewise, FFTW's multidimensional +FFTW_DHT r2r transform is not the same thing as the logical +multi-dimensional discrete Hartley transform defined in the literature, +as discussed in The Discrete Hartley Transform. + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Multi_002dthreaded-FFTW.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Multi_002dthreaded-FFTW.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,90 @@ + + +Multi-threaded FFTW - FFTW 3.3.4 + + + + + + + + + + + + +

    + + +

    +Next: , +Previous: FFTW Reference, +Up: Top +


    +
    + +

    5 Multi-threaded FFTW

    + +

    In this chapter we document the parallel FFTW routines for +shared-memory parallel hardware. These routines, which support +parallel one- and multi-dimensional transforms of both real and +complex data, are the easiest way to take advantage of multiple +processors with FFTW. They work just like the corresponding +uniprocessor transform routines, except that you have an extra +initialization routine to call, and there is a routine to set the +number of threads to employ. Any program that uses the uniprocessor +FFTW can therefore be trivially modified to use the multi-threaded +FFTW. + +

    A shared-memory machine is one in which all CPUs can directly access +the same main memory, and such machines are now common due to the +ubiquity of multi-core CPUs. FFTW's multi-threading support allows +you to utilize these additional CPUs transparently from a single +program. However, this does not necessarily translate into +performance gains—when multiple threads/CPUs are employed, there is +an overhead required for synchronization that may outweigh the +computatational parallelism. Therefore, you can only benefit from +threads if your problem is sufficiently large. + + +

    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/New_002darray-Execute-Functions.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/New_002darray-Execute-Functions.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,163 @@ + + +New-array Execute Functions - FFTW 3.3.4 + + + + + + + + + + + + + +
    + + +

    +Next: , +Previous: Guru Interface, +Up: FFTW Reference +


    +
    + +

    4.6 New-array Execute Functions

    + +

    +Normally, one executes a plan for the arrays with which the plan was +created, by calling fftw_execute(plan) as described in Using Plans. +However, it is possible for sophisticated users to apply a given plan +to a different array using the “new-array execute” functions +detailed below, provided that the following conditions are met: + +

      +
    • The array size, strides, etcetera are the same (since those are set by +the plan). + +
    • The input and output arrays are the same (in-place) or different +(out-of-place) if the plan was originally created to be in-place or +out-of-place, respectively. + +
    • For split arrays, the separations between the real and imaginary +parts, ii-ri and io-ro, are the same as they were for +the input and output arrays when the plan was created. (This +condition is automatically satisfied for interleaved arrays.) + +
    • The alignment of the new input/output arrays is the same as that +of the input/output arrays when the plan was created, unless the plan +was created with the FFTW_UNALIGNED flag. +Here, the alignment is a platform-dependent quantity (for example, it is +the address modulo 16 if SSE SIMD instructions are used, but the address +modulo 4 for non-SIMD single-precision FFTW on the same machine). In +general, only arrays allocated with fftw_malloc are guaranteed to +be equally aligned (see SIMD alignment and fftw_malloc). + +
    + +

    The alignment issue is especially critical, because if you don't use +fftw_malloc then you may have little control over the alignment +of arrays in memory. For example, neither the C++ new function +nor the Fortran allocate statement provide strong enough +guarantees about data alignment. If you don't use fftw_malloc, +therefore, you probably have to use FFTW_UNALIGNED (which +disables most SIMD support). If possible, it is probably better for +you to simply create multiple plans (creating a new plan is quick once +one exists for a given size), or better yet re-use the same array for +your transforms. + +

    For rare circumstances in which you cannot control the alignment of +allocated memory, but wish to determine where a given array is +aligned like the original array for which a plan was created, you can +use the fftw_alignment_of function: +

         int fftw_alignment_of(double *p);
    +
    +

    Two arrays have equivalent alignment (for the purposes of applying a +plan) if and only if fftw_alignment_of returns the same value +for the corresponding pointers to their data (typecast to double* +if necessary). + +

    If you are tempted to use the new-array execute interface because you +want to transform a known bunch of arrays of the same size, you should +probably go use the advanced interface instead (see Advanced Interface)). + +

    The new-array execute functions are: + +

         void fftw_execute_dft(
    +          const fftw_plan p,
    +          fftw_complex *in, fftw_complex *out);
    +     
    +     void fftw_execute_split_dft(
    +          const fftw_plan p,
    +          double *ri, double *ii, double *ro, double *io);
    +     
    +     void fftw_execute_dft_r2c(
    +          const fftw_plan p,
    +          double *in, fftw_complex *out);
    +     
    +     void fftw_execute_split_dft_r2c(
    +          const fftw_plan p,
    +          double *in, double *ro, double *io);
    +     
    +     void fftw_execute_dft_c2r(
    +          const fftw_plan p,
    +          fftw_complex *in, double *out);
    +     
    +     void fftw_execute_split_dft_c2r(
    +          const fftw_plan p,
    +          double *ri, double *ii, double *out);
    +     
    +     void fftw_execute_r2r(
    +          const fftw_plan p,
    +          double *in, double *out);
    +
    +

    +These execute the plan to compute the corresponding transform on +the input/output arrays specified by the subsequent arguments. The +input/output array arguments have the same meanings as the ones passed +to the guru planner routines in the preceding sections. The plan +is not modified, and these routines can be called as many times as +desired, or intermixed with calls to the ordinary fftw_execute. + +

    The plan must have been created for the transform type +corresponding to the execute function, e.g. it must be a complex-DFT +plan for fftw_execute_dft. Any of the planner routines for that +transform type, from the basic to the guru interface, could have been +used to create the plan, however. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/One_002dDimensional-DFTs-of-Real-Data.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/One_002dDimensional-DFTs-of-Real-Data.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,142 @@ + + +One-Dimensional DFTs of Real Data - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    2.3 One-Dimensional DFTs of Real Data

    + +

    In many practical applications, the input data in[i] are purely +real numbers, in which case the DFT output satisfies the “Hermitian” +redundancy: out[i] is the conjugate of out[n-i]. It is +possible to take advantage of these circumstances in order to achieve +roughly a factor of two improvement in both speed and memory usage. + +

    In exchange for these speed and space advantages, the user sacrifices +some of the simplicity of FFTW's complex transforms. First of all, the +input and output arrays are of different sizes and types: the +input is n real numbers, while the output is n/2+1 +complex numbers (the non-redundant outputs); this also requires slight +“padding” of the input array for +in-place transforms. Second, the inverse transform (complex to real) +has the side-effect of overwriting its input array, by default. +Neither of these inconveniences should pose a serious problem for +users, but it is important to be aware of them. + +

    The routines to perform real-data transforms are almost the same as +those for complex transforms: you allocate arrays of double +and/or fftw_complex (preferably using fftw_malloc or +fftw_alloc_complex), create an fftw_plan, execute it as +many times as you want with fftw_execute(plan), and clean up +with fftw_destroy_plan(plan) (and fftw_free). The only +differences are that the input (or output) is of type double +and there are new routines to create the plan. In one dimension: + +

         fftw_plan fftw_plan_dft_r2c_1d(int n, double *in, fftw_complex *out,
    +                                    unsigned flags);
    +     fftw_plan fftw_plan_dft_c2r_1d(int n, fftw_complex *in, double *out,
    +                                    unsigned flags);
    +
    +

    +for the real input to complex-Hermitian output (r2c) and +complex-Hermitian input to real output (c2r) transforms. +Unlike the complex DFT planner, there is no sign argument. +Instead, r2c DFTs are always FFTW_FORWARD and c2r DFTs are +always FFTW_BACKWARD. +(For single/long-double precision +fftwf and fftwl, double should be replaced by +float and long double, respectively.) + + +

    Here, n is the “logical” size of the DFT, not necessarily the +physical size of the array. In particular, the real (double) +array has n elements, while the complex (fftw_complex) +array has n/2+1 elements (where the division is rounded down). +For an in-place transform, +in and out are aliased to the same array, which must be +big enough to hold both; so, the real array would actually have +2*(n/2+1) elements, where the elements beyond the first +n are unused padding. (Note that this is very different from +the concept of “zero-padding” a transform to a larger length, which +changes the logical size of the DFT by actually adding new input +data.) The kth element of the complex array is exactly the +same as the kth element of the corresponding complex DFT. All +positive n are supported; products of small factors are most +efficient, but an O(n log n) algorithm is used even for prime sizes. + +

    As noted above, the c2r transform destroys its input array even for +out-of-place transforms. This can be prevented, if necessary, by +including FFTW_PRESERVE_INPUT in the flags, with +unfortunately some sacrifice in performance. +This flag is also not currently supported for multi-dimensional real +DFTs (next section). + +

    Readers familiar with DFTs of real data will recall that the 0th (the +“DC”) and n/2-th (the “Nyquist” frequency, when n is +even) elements of the complex output are purely real. Some +implementations therefore store the Nyquist element where the DC +imaginary part would go, in order to make the input and output arrays +the same size. Such packing, however, does not generalize well to +multi-dimensional transforms, and the space savings are miniscule in +any case; FFTW does not support it. + +

    An alternative interface for one-dimensional r2c and c2r DFTs can be +found in the ‘r2r’ interface (see The Halfcomplex-format DFT), with “halfcomplex”-format output that is the same size +(and type) as the input array. +That interface, although it is not very useful for multi-dimensional +transforms, may sometimes yield better performance. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/One_002ddimensional-distributions.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/One_002ddimensional-distributions.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,106 @@ + + +One-dimensional distributions - FFTW 3.3.4 + + + + + + + + + + + + +

    + + +

    +Previous: Transposed distributions, +Up: MPI Data Distribution +


    +
    + +

    6.4.4 One-dimensional distributions

    + +

    For one-dimensional distributed DFTs using FFTW, matters are slightly +more complicated because the data distribution is more closely tied to +how the algorithm works. In particular, you can no longer pass an +arbitrary block size and must accept FFTW's default; also, the block +sizes may be different for input and output. Also, the data +distribution depends on the flags and transform direction, in order +for forward and backward transforms to work correctly. + +

         ptrdiff_t fftw_mpi_local_size_1d(ptrdiff_t n0, MPI_Comm comm,
    +                     int sign, unsigned flags,
    +                     ptrdiff_t *local_ni, ptrdiff_t *local_i_start,
    +                     ptrdiff_t *local_no, ptrdiff_t *local_o_start);
    +
    +

    +This function computes the data distribution for a 1d transform of +size n0 with the given transform sign and flags. +Both input and output data use block distributions. The input on the +current process will consist of local_ni numbers starting at +index local_i_start; e.g. if only a single process is used, +then local_ni will be n0 and local_i_start will +be 0. Similarly for the output, with local_no numbers +starting at index local_o_start. The return value of +fftw_mpi_local_size_1d will be the total number of elements to +allocate on the current process (which might be slightly larger than +the local size due to intermediate steps in the algorithm). + +

    As mentioned above (see Load balancing), the data will be divided +equally among the processes if n0 is divisible by the +square of the number of processes. In this case, +local_ni will equal local_no. Otherwise, they may be +different. + +

    For some applications, such as convolutions, the order of the output +data is irrelevant. In this case, performance can be improved by +specifying that the output data be stored in an FFTW-defined +“scrambled” format. (In particular, this is the analogue of +transposed output in the multidimensional case: scrambled output saves +a communications step.) If you pass FFTW_MPI_SCRAMBLED_OUT in +the flags, then the output is stored in this (undocumented) scrambled +order. Conversely, to perform the inverse transform of data in +scrambled order, pass the FFTW_MPI_SCRAMBLED_IN flag. + + +

    In MPI FFTW, only composite sizes n0 can be parallelized; we +have not yet implemented a parallel algorithm for large prime sizes. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Other-Important-Topics.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Other-Important-Topics.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,67 @@ + + +Other Important Topics - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    +Next: , +Previous: Tutorial, +Up: Top +


    +
    + +

    3 Other Important Topics

    + + + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Other-Multi_002ddimensional-Real_002ddata-MPI-Transforms.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Other-Multi_002ddimensional-Real_002ddata-MPI-Transforms.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,104 @@ + + +Other Multi-dimensional Real-data MPI Transforms - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    6.6 Other multi-dimensional Real-Data MPI Transforms

    + +

    FFTW's MPI interface also supports multi-dimensional ‘r2r’ +transforms of all kinds supported by the serial interface +(e.g. discrete cosine and sine transforms, discrete Hartley +transforms, etc.). Only multi-dimensional ‘r2r’ transforms, not +one-dimensional transforms, are currently parallelized. + +

    These are used much like the multidimensional complex DFTs discussed +above, except that the data is real rather than complex, and one needs +to pass an r2r transform kind (fftw_r2r_kind) for each +dimension as in the serial FFTW (see More DFTs of Real Data). + +

    For example, one might perform a two-dimensional L × M that is +an REDFT10 (DCT-II) in the first dimension and an RODFT10 (DST-II) in +the second dimension with code like: + +

             const ptrdiff_t L = ..., M = ...;
    +         fftw_plan plan;
    +         double *data;
    +         ptrdiff_t alloc_local, local_n0, local_0_start, i, j;
    +     
    +         /* get local data size and allocate */
    +         alloc_local = fftw_mpi_local_size_2d(L, M, MPI_COMM_WORLD,
    +                                              &local_n0, &local_0_start);
    +         data = fftw_alloc_real(alloc_local);
    +     
    +         /* create plan for in-place REDFT10 x RODFT10 */
    +         plan = fftw_mpi_plan_r2r_2d(L, M, data, data, MPI_COMM_WORLD,
    +                                     FFTW_REDFT10, FFTW_RODFT10, FFTW_MEASURE);
    +     
    +         /* initialize data to some function my_function(x,y) */
    +         for (i = 0; i < local_n0; ++i) for (j = 0; j < M; ++j)
    +            data[i*M + j] = my_function(local_0_start + i, j);
    +     
    +         /* compute transforms, in-place, as many times as desired */
    +         fftw_execute(plan);
    +     
    +         fftw_destroy_plan(plan);
    +
    +

    Notice that we use the same ‘local_size’ functions as we did for +complex data, only now we interpret the sizes in terms of real rather +than complex values, and correspondingly use fftw_alloc_real. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Overview-of-Fortran-interface.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Overview-of-Fortran-interface.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,128 @@ + + +Overview of Fortran interface - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    7.1 Overview of Fortran interface

    + +

    FFTW provides a file fftw3.f03 that defines Fortran 2003 +interfaces for all of its C routines, except for the MPI routines +described elsewhere, which can be found in the same directory as +fftw3.h (the C header file). In any Fortran subroutine where +you want to use FFTW functions, you should begin with: + +

    +

           use, intrinsic :: iso_c_binding
    +       include 'fftw3.f03'
    +
    +

    This includes the interface definitions and the standard +iso_c_binding module (which defines the equivalents of C +types). You can also put the FFTW functions into a module if you +prefer (see Defining an FFTW module). + +

    At this point, you can now call anything in the FFTW C interface +directly, almost exactly as in C other than minor changes in syntax. +For example: + +

    +

           type(C_PTR) :: plan
    +       complex(C_DOUBLE_COMPLEX), dimension(1024,1000) :: in, out
    +       plan = fftw_plan_dft_2d(1000,1024, in,out, FFTW_FORWARD,FFTW_ESTIMATE)
    +       ...
    +       call fftw_execute_dft(plan, in, out)
    +       ...
    +       call fftw_destroy_plan(plan)
    +
    +

    A few important things to keep in mind are: + +

      +
    • FFTW plans are type(C_PTR). Other C types are mapped in the +obvious way via the iso_c_binding standard: int turns +into integer(C_INT), fftw_complex turns into +complex(C_DOUBLE_COMPLEX), double turns into +real(C_DOUBLE), and so on. See FFTW Fortran type reference. + +
    • Functions in C become functions in Fortran if they have a return value, +and subroutines in Fortran otherwise. + +
    • The ordering of the Fortran array dimensions must be reversed +when they are passed to the FFTW plan creation, thanks to differences +in array indexing conventions (see Multi-dimensional Array Format). This is unlike the legacy Fortran interface +(see Fortran-interface routines), which reversed the dimensions +for you. See Reversing array dimensions. + +
    • Using ordinary Fortran array declarations like this works, but may +yield suboptimal performance because the data may not be not aligned +to exploit SIMD instructions on modern proessors (see SIMD alignment and fftw_malloc). Better performance will often be obtained +by allocating with ‘fftw_alloc’. See Allocating aligned memory in Fortran. + +
    • Similar to the legacy Fortran interface (see FFTW Execution in Fortran), we currently recommend not using fftw_execute +but rather using the more specialized functions like +fftw_execute_dft (see New-array Execute Functions). +However, you should execute the plan on the same arrays as the +ones for which you created the plan, unless you are especially +careful. See Plan execution in Fortran. To prevent +you from using fftw_execute by mistake, the fftw3.f03 +file does not provide an fftw_execute interface declaration. + +
    • Multiple planner flags are combined with ior (equivalent to ‘|’ in C). e.g. FFTW_MEASURE | FFTW_DESTROY_INPUT becomes ior(FFTW_MEASURE, FFTW_DESTROY_INPUT). (You can also use ‘+’ as long as you don't try to include a given flag more than once.) + +
    + + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Plan-execution-in-Fortran.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Plan-execution-in-Fortran.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,113 @@ + + +Plan execution in Fortran - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    7.4 Plan execution in Fortran

    + +

    In C, in order to use a plan, one normally calls fftw_execute, +which executes the plan to perform the transform on the input/output +arrays passed when the plan was created (see Using Plans). The +corresponding subroutine call in modern Fortran is: +

          call fftw_execute(plan)
    +
    +

    +However, we have had reports that this causes problems with some +recent optimizing Fortran compilers. The problem is, because the +input/output arrays are not passed as explicit arguments to +fftw_execute, the semantics of Fortran (unlike C) allow the +compiler to assume that the input/output arrays are not changed by +fftw_execute. As a consequence, certain compilers end up +repositioning the call to fftw_execute, assuming incorrectly +that it does nothing to the arrays. + +

    There are various workarounds to this, but the safest and simplest +thing is to not use fftw_execute in Fortran. Instead, use the +functions described in New-array Execute Functions, which take +the input/output arrays as explicit arguments. For example, if the +plan is for a complex-data DFT and was created for the arrays +in and out, you would do: +

          call fftw_execute_dft(plan, in, out)
    +
    +

    +There are a few things to be careful of, however: + +

      +
    • You must use the correct type of execute function, matching the way +the plan was created. Complex DFT plans should use +fftw_execute_dft, Real-input (r2c) DFT plans should use use +fftw_execute_dft_r2c, and real-output (c2r) DFT plans should +use fftw_execute_dft_c2r. The various r2r plans should use +fftw_execute_r2r. Fortunately, if you use the wrong one you +will get a compile-time type-mismatch error (unlike legacy Fortran). + +
    • You should normally pass the same input/output arrays that were used when +creating the plan. This is always safe. + +
    • If you pass different input/output arrays compared to +those used when creating the plan, you must abide by all the +restrictions of the new-array execute functions (see New-array Execute Functions). The most tricky of these is the +requirement that the new arrays have the same alignment as the +original arrays; the best (and possibly only) way to guarantee this +is to use the ‘fftw_alloc’ functions to allocate your arrays (see Allocating aligned memory in Fortran). Alternatively, you can +use the FFTW_UNALIGNED flag when creating the +plan, in which case the plan does not depend on the alignment, but +this may sacrifice substantial performance on architectures (like x86) +with SIMD instructions (see SIMD alignment and fftw_malloc). + +
    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Planner-Flags.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Planner-Flags.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,171 @@ + + +Planner Flags - FFTW 3.3.4 + + + + + + + + + + + + + +
    + +

    +Next: , +Previous: Complex DFTs, +Up: Basic Interface +


    +
    + +

    4.3.2 Planner Flags

    + +

    All of the planner routines in FFTW accept an integer flags +argument, which is a bitwise OR (‘|’) of zero or more of the flag +constants defined below. These flags control the rigor (and time) of +the planning process, and can also impose (or lift) restrictions on the +type of transform algorithm that is employed. + +

    Important: the planner overwrites the input array during +planning unless a saved plan (see Wisdom) is available for that +problem, so you should initialize your input data after creating the +plan. The only exceptions to this are the FFTW_ESTIMATE and +FFTW_WISDOM_ONLY flags, as mentioned below. + +

    In all cases, if wisdom is available for the given problem that was +created with equal-or-greater planning rigor, then the more rigorous +wisdom is used. For example, in FFTW_ESTIMATE mode any available +wisdom is used, whereas in FFTW_PATIENT mode only wisdom created +in patient or exhaustive mode can be used. See Words of Wisdom-Saving Plans. + +

    Planning-rigor flags
    + +
      +
    • FFTW_ESTIMATE specifies that, instead of actual measurements of +different algorithms, a simple heuristic is used to pick a (probably +sub-optimal) plan quickly. With this flag, the input/output arrays are +not overwritten during planning. + +
    • FFTW_MEASURE tells FFTW to find an optimized plan by actually +computing several FFTs and measuring their execution time. +Depending on your machine, this can take some time (often a few +seconds). FFTW_MEASURE is the default planning option. + +
    • FFTW_PATIENT is like FFTW_MEASURE, but considers a wider +range of algorithms and often produces a “more optimal” plan +(especially for large transforms), but at the expense of several times +longer planning time (especially for large transforms). + +
    • FFTW_EXHAUSTIVE is like FFTW_PATIENT, but considers an +even wider range of algorithms, including many that we think are +unlikely to be fast, to produce the most optimal plan but with a +substantially increased planning time. + +
    • FFTW_WISDOM_ONLY is a special planning mode in which the plan +is only created if wisdom is available for the given problem, and +otherwise a NULL plan is returned. This can be combined with +other flags, e.g. ‘FFTW_WISDOM_ONLY | FFTW_PATIENT’ creates a +plan only if wisdom is available that was created in +FFTW_PATIENT or FFTW_EXHAUSTIVE mode. The +FFTW_WISDOM_ONLY flag is intended for users who need to detect +whether wisdom is available; for example, if wisdom is not available +one may wish to allocate new arrays for planning so that user data is +not overwritten. + +
    + +
    Algorithm-restriction flags
    + +
      +
    • FFTW_DESTROY_INPUT specifies that an out-of-place transform is +allowed to overwrite its input array with arbitrary data; this +can sometimes allow more efficient algorithms to be employed. + +
    • FFTW_PRESERVE_INPUT specifies that an out-of-place transform must +not change its input array. This is ordinarily the +default, except for c2r and hc2r (i.e. complex-to-real) +transforms for which FFTW_DESTROY_INPUT is the default. In the +latter cases, passing FFTW_PRESERVE_INPUT will attempt to use +algorithms that do not destroy the input, at the expense of worse +performance; for multi-dimensional c2r transforms, however, no +input-preserving algorithms are implemented and the planner will return +NULL if one is requested. + +
    • FFTW_UNALIGNED specifies that the algorithm may not impose any +unusual alignment requirements on the input/output arrays (i.e. no +SIMD may be used). This flag is normally not necessary, since +the planner automatically detects misaligned arrays. The only use for +this flag is if you want to use the new-array execute interface to +execute a given plan on a different array that may not be aligned like +the original. (Using fftw_malloc makes this flag unnecessary +even then. You can also use fftw_alignment_of to detect +whether two arrays are equivalently aligned.) + +
    + +
    Limiting planning time
    + +
         extern void fftw_set_timelimit(double seconds);
    +
    +

    +This function instructs FFTW to spend at most seconds seconds +(approximately) in the planner. If seconds == +FFTW_NO_TIMELIMIT (the default value, which is negative), then +planning time is unbounded. Otherwise, FFTW plans with a +progressively wider range of algorithms until the the given time limit +is reached or the given range of algorithms is explored, returning the +best available plan. + + +

    For example, specifying FFTW_PATIENT first plans in +FFTW_ESTIMATE mode, then in FFTW_MEASURE mode, then +finally (time permitting) in FFTW_PATIENT. If +FFTW_EXHAUSTIVE is specified instead, the planner will further +progress to FFTW_EXHAUSTIVE mode. + +

    Note that the seconds argument specifies only a rough limit; in +practice, the planner may use somewhat more time if the time limit is +reached when the planner is in the middle of an operation that cannot +be interrupted. At the very least, the planner will complete planning +in FFTW_ESTIMATE mode (which is thus equivalent to a time limit +of 0). + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Precision.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Precision.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,100 @@ + + +Precision - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    +Next: , +Previous: Complex numbers, +Up: Data Types and Files +


    +
    + +

    4.1.2 Precision

    + +

    +You can install single and long-double precision versions of FFTW, +which replace double with float and long double, +respectively (see Installation and Customization). To use these +interfaces, you: + +

      +
    • Link to the single/long-double libraries; on Unix, -lfftw3f or +-lfftw3l instead of (or in addition to) -lfftw3. (You +can link to the different-precision libraries simultaneously.) + +
    • Include the same <fftw3.h> header file. + +
    • Replace all lowercase instances of ‘fftw_’ with ‘fftwf_’ or +‘fftwl_’ for single or long-double precision, respectively. +(fftw_complex becomes fftwf_complex, fftw_execute +becomes fftwf_execute, etcetera.) + +
    • Uppercase names, i.e. names beginning with ‘FFTW_’, remain the +same. + +
    • Replace double with float or long double for +subroutine parameters. + +
    + +

    Depending upon your compiler and/or hardware, long double may not +be any more precise than double (or may not be supported at all, +although it is standard in C99). + + +

    We also support using the nonstandard __float128 +quadruple-precision type provided by recent versions of gcc on +32- and 64-bit x86 hardware (see Installation and Customization). +To use this type, link with -lfftw3q -lquadmath -lm (the +libquadmath library provided by gcc is needed for +quadruple-precision trigonometric functions) and use ‘fftwq_’ +identifiers. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Real-even_002fodd-DFTs-_0028cosine_002fsine-transforms_0029.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Real-even_002fodd-DFTs-_0028cosine_002fsine-transforms_0029.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,202 @@ + + +Real even/odd DFTs (cosine/sine transforms) - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    2.5.2 Real even/odd DFTs (cosine/sine transforms)

    + +

    The Fourier transform of a real-even function f(-x) = f(x) is +real-even, and i times the Fourier transform of a real-odd +function f(-x) = -f(x) is real-odd. Similar results hold for a +discrete Fourier transform, and thus for these symmetries the need for +complex inputs/outputs is entirely eliminated. Moreover, one gains a +factor of two in speed/space from the fact that the data are real, and +an additional factor of two from the even/odd symmetry: only the +non-redundant (first) half of the array need be stored. The result is +the real-even DFT (REDFT) and the real-odd DFT (RODFT), also +known as the discrete cosine and sine transforms (DCT and +DST), respectively. + + +

    (In this section, we describe the 1d transforms; multi-dimensional +transforms are just a separable product of these transforms operating +along each dimension.) + +

    Because of the discrete sampling, one has an additional choice: is the +data even/odd around a sampling point, or around the point halfway +between two samples? The latter corresponds to shifting the +samples by half an interval, and gives rise to several transform +variants denoted by REDFTab and RODFTab: a and +b are 0 or 1, and indicate whether the input +(a) and/or output (b) are shifted by half a sample +(1 means it is shifted). These are also known as types I-IV of +the DCT and DST, and all four types are supported by FFTW's r2r +interface.1 + +

    The r2r kinds for the various REDFT and RODFT types supported by FFTW, +along with the boundary conditions at both ends of the input +array (n real numbers in[j=0..n-1]), are: + +

      +
    • FFTW_REDFT00 (DCT-I): even around j=0 and even around j=n-1. + +
    • FFTW_REDFT10 (DCT-II, “the” DCT): even around j=-0.5 and even around j=n-0.5. + +
    • FFTW_REDFT01 (DCT-III, “the” IDCT): even around j=0 and odd around j=n. + +
    • FFTW_REDFT11 (DCT-IV): even around j=-0.5 and odd around j=n-0.5. + +
    • FFTW_RODFT00 (DST-I): odd around j=-1 and odd around j=n. + +
    • FFTW_RODFT10 (DST-II): odd around j=-0.5 and odd around j=n-0.5. + +
    • FFTW_RODFT01 (DST-III): odd around j=-1 and even around j=n-1. + +
    • FFTW_RODFT11 (DST-IV): odd around j=-0.5 and even around j=n-0.5. + +
    + +

    Note that these symmetries apply to the “logical” array being +transformed; there are no constraints on your physical input +data. So, for example, if you specify a size-5 REDFT00 (DCT-I) of the +data abcde, it corresponds to the DFT of the logical even array +abcdedcb of size 8. A size-4 REDFT10 (DCT-II) of the data +abcd corresponds to the size-8 logical DFT of the even array +abcddcba, shifted by half a sample. + +

    All of these transforms are invertible. The inverse of R*DFT00 is +R*DFT00; of R*DFT10 is R*DFT01 and vice versa (these are often called +simply “the” DCT and IDCT, respectively); and of R*DFT11 is R*DFT11. +However, the transforms computed by FFTW are unnormalized, exactly +like the corresponding real and complex DFTs, so computing a transform +followed by its inverse yields the original array scaled by N, +where N is the logical DFT size. For REDFT00, +N=2(n-1); for RODFT00, N=2(n+1); otherwise, N=2n. + + +

    Note that the boundary conditions of the transform output array are +given by the input boundary conditions of the inverse transform. +Thus, the above transforms are all inequivalent in terms of +input/output boundary conditions, even neglecting the 0.5 shift +difference. + +

    FFTW is most efficient when N is a product of small factors; note +that this differs from the factorization of the physical size +n for REDFT00 and RODFT00! There is another oddity: n=1 +REDFT00 transforms correspond to N=0, and so are not +defined (the planner will return NULL). Otherwise, any positive +n is supported. + +

    For the precise mathematical definitions of these transforms as used by +FFTW, see What FFTW Really Computes. (For people accustomed to +the DCT/DST, FFTW's definitions have a coefficient of 2 in front +of the cos/sin functions so that they correspond precisely to an +even/odd DFT of size N. Some authors also include additional +multiplicative factors of +√2for selected inputs and outputs; this makes +the transform orthogonal, but sacrifices the direct equivalence to a +symmetric DFT.) + +

    Which type do you need?
    + +

    Since the required flavor of even/odd DFT depends upon your problem, +you are the best judge of this choice, but we can make a few comments +on relative efficiency to help you in your selection. In particular, +R*DFT01 and R*DFT10 tend to be slightly faster than R*DFT11 +(especially for odd sizes), while the R*DFT00 transforms are sometimes +significantly slower (especially for even sizes).2 + +

    Thus, if only the boundary conditions on the transform inputs are +specified, we generally recommend R*DFT10 over R*DFT00 and R*DFT01 over +R*DFT11 (unless the half-sample shift or the self-inverse property is +significant for your problem). + +

    If performance is important to you and you are using only small sizes +(say n<200), e.g. for multi-dimensional transforms, then you +might consider generating hard-coded transforms of those sizes and types +that you are interested in (see Generating your own code). + +

    We are interested in hearing what types of symmetric transforms you find +most useful. + + +

    +
    +

    Footnotes

    [1] There are also type V-VIII transforms, which +correspond to a logical DFT of odd size N, independent of +whether the physical size n is odd, but we do not support these +variants.

    + +

    [2] R*DFT00 is +sometimes slower in FFTW because we discovered that the standard +algorithm for computing this by a pre/post-processed real DFT—the +algorithm used in FFTPACK, Numerical Recipes, and other sources for +decades now—has serious numerical problems: it already loses several +decimal places of accuracy for 16k sizes. There seem to be only two +alternatives in the literature that do not suffer similarly: a +recursive decomposition into smaller DCTs, which would require a large +set of codelets for efficiency and generality, or sacrificing a factor of +2 +in speed to use a real DFT of twice the size. We currently +employ the latter technique for general n, as well as a limited +form of the former method: a split-radix decomposition when n +is odd (N a multiple of 4). For N containing many +factors of 2, the split-radix method seems to recover most of the +speed of the standard algorithm without the accuracy tradeoff.

    + +
    + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Real_002ddata-DFT-Array-Format.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Real_002ddata-DFT-Array-Format.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,98 @@ + + +Real-data DFT Array Format - FFTW 3.3.4 + + + + + + + + + + + + + +
    + + +

    +Next: , +Previous: Real-data DFTs, +Up: Basic Interface +


    +
    + +

    4.3.4 Real-data DFT Array Format

    + +

    +The output of a DFT of real data (r2c) contains symmetries that, in +principle, make half of the outputs redundant (see What FFTW Really Computes). (Similarly for the input of an inverse c2r transform.) In +practice, it is not possible to entirely realize these savings in an +efficient and understandable format that generalizes to +multi-dimensional transforms. Instead, the output of the r2c +transforms is slightly over half of the output of the +corresponding complex transform. We do not “pack” the data in any +way, but store it as an ordinary array of fftw_complex values. +In fact, this data is simply a subsection of what would be the array in +the corresponding complex transform. + +

    Specifically, for a real transform of d (= rank) +dimensions n0 × n1 × n2 × … × nd-1, the complex data is an n0 × n1 × n2 × … × (nd-1/2 + 1) array of +fftw_complex values in row-major order (with the division rounded +down). That is, we only store the lower half (non-negative +frequencies), plus one element, of the last dimension of the data from +the ordinary complex transform. (We could have instead taken half of +any other dimension, but implementation turns out to be simpler if the +last, contiguous, dimension is used.) + +

    For an out-of-place transform, the real data is simply an array with +physical dimensions n0 × n1 × n2 × … × nd-1 in row-major order. + +

    For an in-place transform, some complications arise since the complex data +is slightly larger than the real data. In this case, the final +dimension of the real data must be padded with extra values to +accommodate the size of the complex data—two extra if the last +dimension is even and one if it is odd. That is, the last dimension of +the real data must physically contain +2 * (nd-1/2+1)double values (exactly enough to hold the complex data). This +physical array size does not, however, change the logical array +size—only +nd-1values are actually stored in the last dimension, and +nd-1is the last dimension passed to the planner. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Real_002ddata-DFTs.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Real_002ddata-DFTs.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,158 @@ + + +Real-data DFTs - FFTW 3.3.4 + + + + + + + + + + + + + +

    + + +

    +Next: , +Previous: Planner Flags, +Up: Basic Interface +


    +
    + +

    4.3.3 Real-data DFTs

    + +
         fftw_plan fftw_plan_dft_r2c_1d(int n0,
    +                                    double *in, fftw_complex *out,
    +                                    unsigned flags);
    +     fftw_plan fftw_plan_dft_r2c_2d(int n0, int n1,
    +                                    double *in, fftw_complex *out,
    +                                    unsigned flags);
    +     fftw_plan fftw_plan_dft_r2c_3d(int n0, int n1, int n2,
    +                                    double *in, fftw_complex *out,
    +                                    unsigned flags);
    +     fftw_plan fftw_plan_dft_r2c(int rank, const int *n,
    +                                 double *in, fftw_complex *out,
    +                                 unsigned flags);
    +
    +

    +Plan a real-input/complex-output discrete Fourier transform (DFT) in +zero or more dimensions, returning an fftw_plan (see Using Plans). + +

    Once you have created a plan for a certain transform type and +parameters, then creating another plan of the same type and parameters, +but for different arrays, is fast and shares constant data with the +first plan (if it still exists). + +

    The planner returns NULL if the plan cannot be created. A +non-NULL plan is always returned by the basic interface unless +you are using a customized FFTW configuration supporting a restricted +set of transforms, or if you use the FFTW_PRESERVE_INPUT flag +with a multi-dimensional out-of-place c2r transform (see below). + +

    Arguments
    + +
      +
    • rank is the rank of the transform (it should be the size of the +array *n), and can be any non-negative integer. (See Complex Multi-Dimensional DFTs, for the definition of “rank”.) The +‘_1d’, ‘_2d’, and ‘_3d’ planners correspond to a +rank of 1, 2, and 3, respectively. The rank +may be zero, which is equivalent to a rank-1 transform of size 1, i.e. a +copy of one real number (with zero imaginary part) from input to output. + +
    • n0, n1, n2, or n[0..rank-1], (as appropriate +for each routine) specify the size of the transform dimensions. They +can be any positive integer. This is different in general from the +physical array dimensions, which are described in Real-data DFT Array Format. + +
        +
      • FFTW is best at handling sizes of the form +2a 3b 5c 7d + 11e 13f,where e+f is either 0 or 1, and the other exponents +are arbitrary. Other sizes are computed by means of a slow, +general-purpose algorithm (which nevertheless retains O(n log n) performance even for prime sizes). (It is possible to customize FFTW +for different array sizes; see Installation and Customization.) +Transforms whose sizes are powers of 2 are especially fast, and +it is generally beneficial for the last dimension of an r2c/c2r +transform to be even. +
      + +
    • in and out point to the input and output arrays of the +transform, which may be the same (yielding an in-place transform). +These arrays are overwritten during planning, unless +FFTW_ESTIMATE is used in the flags. (The arrays need not be +initialized, but they must be allocated.) For an in-place transform, it +is important to remember that the real array will require padding, +described in Real-data DFT Array Format. + +
    • flags is a bitwise OR (‘|’) of zero or more planner flags, +as defined in Planner Flags. + +
    + +

    The inverse transforms, taking complex input (storing the non-redundant +half of a logically Hermitian array) to real output, are given by: + +

         fftw_plan fftw_plan_dft_c2r_1d(int n0,
    +                                    fftw_complex *in, double *out,
    +                                    unsigned flags);
    +     fftw_plan fftw_plan_dft_c2r_2d(int n0, int n1,
    +                                    fftw_complex *in, double *out,
    +                                    unsigned flags);
    +     fftw_plan fftw_plan_dft_c2r_3d(int n0, int n1, int n2,
    +                                    fftw_complex *in, double *out,
    +                                    unsigned flags);
    +     fftw_plan fftw_plan_dft_c2r(int rank, const int *n,
    +                                 fftw_complex *in, double *out,
    +                                 unsigned flags);
    +
    +

    +The arguments are the same as for the r2c transforms, except that the +input and output data formats are reversed. + +

    FFTW computes an unnormalized transform: computing an r2c followed by a +c2r transform (or vice versa) will result in the original data +multiplied by the size of the transform (the product of the logical +dimensions). +An r2c transform produces the same output as a FFTW_FORWARD +complex DFT of the same input, and a c2r transform is correspondingly +equivalent to FFTW_BACKWARD. For more information, see What FFTW Really Computes. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Real_002dto_002dReal-Transform-Kinds.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Real_002dto_002dReal-Transform-Kinds.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,115 @@ + + +Real-to-Real Transform Kinds - FFTW 3.3.4 + + + + + + + + + + + + +

    + + +

    +Previous: Real-to-Real Transforms, +Up: Basic Interface +


    +
    + +

    4.3.6 Real-to-Real Transform Kinds

    + +

    +FFTW currently supports 11 different r2r transform kinds, specified by +one of the constants below. For the precise definitions of these +transforms, see What FFTW Really Computes. For a more colloquial +introduction to these transform kinds, see More DFTs of Real Data. + +

    For dimension of size n, there is a corresponding “logical” +dimension N that determines the normalization (and the optimal +factorization); the formula for N is given for each kind below. +Also, with each transform kind is listed its corrsponding inverse +transform. FFTW computes unnormalized transforms: a transform followed +by its inverse will result in the original data multiplied by N +(or the product of the N's for each dimension, in +multi-dimensions). + +

      +
    • FFTW_R2HC computes a real-input DFT with output in +“halfcomplex” format, i.e. real and imaginary parts for a transform of +size n stored as: +

      +r0, r1, r2, ..., rn/2, i(n+1)/2-1, ..., i2, i1 +

      (Logical N=n, inverse is FFTW_HC2R.) + +
    • FFTW_HC2R computes the reverse of FFTW_R2HC, above. +(Logical N=n, inverse is FFTW_R2HC.) + +
    • FFTW_DHT computes a discrete Hartley transform. +(Logical N=n, inverse is FFTW_DHT.) + +
    • FFTW_REDFT00 computes an REDFT00 transform, i.e. a DCT-I. +(Logical N=2*(n-1), inverse is FFTW_REDFT00.) + +
    • FFTW_REDFT10 computes an REDFT10 transform, i.e. a DCT-II (sometimes called “the” DCT). +(Logical N=2*n, inverse is FFTW_REDFT01.) + +
    • FFTW_REDFT01 computes an REDFT01 transform, i.e. a DCT-III (sometimes called “the” IDCT, being the inverse of DCT-II). +(Logical N=2*n, inverse is FFTW_REDFT=10.) + +
    • FFTW_REDFT11 computes an REDFT11 transform, i.e. a DCT-IV. +(Logical N=2*n, inverse is FFTW_REDFT11.) + +
    • FFTW_RODFT00 computes an RODFT00 transform, i.e. a DST-I. +(Logical N=2*(n+1), inverse is FFTW_RODFT00.) + +
    • FFTW_RODFT10 computes an RODFT10 transform, i.e. a DST-II. +(Logical N=2*n, inverse is FFTW_RODFT01.) + +
    • FFTW_RODFT01 computes an RODFT01 transform, i.e. a DST-III. +(Logical N=2*n, inverse is FFTW_RODFT=10.) + +
    • FFTW_RODFT11 computes an RODFT11 transform, i.e. a DST-IV. +(Logical N=2*n, inverse is FFTW_RODFT11.) + +
    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Real_002dto_002dReal-Transforms.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Real_002dto_002dReal-Transforms.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,143 @@ + + +Real-to-Real Transforms - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    4.3.5 Real-to-Real Transforms

    + +

    +

         fftw_plan fftw_plan_r2r_1d(int n, double *in, double *out,
    +                                fftw_r2r_kind kind, unsigned flags);
    +     fftw_plan fftw_plan_r2r_2d(int n0, int n1, double *in, double *out,
    +                                fftw_r2r_kind kind0, fftw_r2r_kind kind1,
    +                                unsigned flags);
    +     fftw_plan fftw_plan_r2r_3d(int n0, int n1, int n2,
    +                                double *in, double *out,
    +                                fftw_r2r_kind kind0,
    +                                fftw_r2r_kind kind1,
    +                                fftw_r2r_kind kind2,
    +                                unsigned flags);
    +     fftw_plan fftw_plan_r2r(int rank, const int *n, double *in, double *out,
    +                             const fftw_r2r_kind *kind, unsigned flags);
    +
    +

    +Plan a real input/output (r2r) transform of various kinds in zero or +more dimensions, returning an fftw_plan (see Using Plans). + +

    Once you have created a plan for a certain transform type and +parameters, then creating another plan of the same type and parameters, +but for different arrays, is fast and shares constant data with the +first plan (if it still exists). + +

    The planner returns NULL if the plan cannot be created. A +non-NULL plan is always returned by the basic interface unless +you are using a customized FFTW configuration supporting a restricted +set of transforms, or for size-1 FFTW_REDFT00 kinds (which are +not defined). + + +

    Arguments
    + +
      +
    • rank is the dimensionality of the transform (it should be the +size of the arrays *n and *kind), and can be any +non-negative integer. The ‘_1d’, ‘_2d’, and ‘_3d’ +planners correspond to a rank of 1, 2, and +3, respectively. A rank of zero is equivalent to a copy +of one number from input to output. + +
    • n, or n0/n1/n2, or n[rank], +respectively, gives the (physical) size of the transform dimensions. +They can be any positive integer. + +
        +
      • Multi-dimensional arrays are stored in row-major order with dimensions: +n0 x n1; or n0 x n1 x n2; or +n[0] x n[1] x ... x n[rank-1]. +See Multi-dimensional Array Format. +
      • FFTW is generally best at handling sizes of the form +2a 3b 5c 7d + 11e 13f,where e+f is either 0 or 1, and the other exponents +are arbitrary. Other sizes are computed by means of a slow, +general-purpose algorithm (which nevertheless retains O(n log n) performance even for prime sizes). (It is possible to customize FFTW +for different array sizes; see Installation and Customization.) +Transforms whose sizes are powers of 2 are especially fast. +
      • For a REDFT00 or RODFT00 transform kind in a dimension of +size n, it is n-1 or n+1, respectively, that +should be factorizable in the above form. +
      + +
    • in and out point to the input and output arrays of the +transform, which may be the same (yielding an in-place transform). +These arrays are overwritten during planning, unless +FFTW_ESTIMATE is used in the flags. (The arrays need not be +initialized, but they must be allocated.) + +
    • kind, or kind0/kind1/kind2, or +kind[rank], is the kind of r2r transform used for the +corresponding dimension. The valid kind constants are described in +Real-to-Real Transform Kinds. In a multi-dimensional transform, +what is computed is the separable product formed by taking each +transform kind along the corresponding dimension, one dimension after +another. + +
    • flags is a bitwise OR (‘|’) of zero or more planner flags, +as defined in Planner Flags. + +
    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Reversing-array-dimensions.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Reversing-array-dimensions.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,132 @@ + + +Reversing array dimensions - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    7.2 Reversing array dimensions

    + +

    A minor annoyance in calling FFTW from Fortran is that FFTW's array +dimensions are defined in the C convention (row-major order), while +Fortran's array dimensions are the opposite convention (column-major +order). See Multi-dimensional Array Format. This is just a +bookkeeping difference, with no effect on performance. The only +consequence of this is that, whenever you create an FFTW plan for a +multi-dimensional transform, you must always reverse the +ordering of the dimensions. + +

    For example, consider the three-dimensional (L × M × N) arrays: + +

           complex(C_DOUBLE_COMPLEX), dimension(L,M,N) :: in, out
    +
    +

    To plan a DFT for these arrays using fftw_plan_dft_3d, you could do: + +

    +

           plan = fftw_plan_dft_3d(N,M,L, in,out, FFTW_FORWARD,FFTW_ESTIMATE)
    +
    +

    That is, from FFTW's perspective this is a N × M × L array. +No data transposition need occur, as this is only +notation. Similarly, to use the more generic routine +fftw_plan_dft with the same arrays, you could do: + +

           integer(C_INT), dimension(3) :: n = [N,M,L]
    +       plan = fftw_plan_dft_3d(3, n, in,out, FFTW_FORWARD,FFTW_ESTIMATE)
    +
    +

    Note, by the way, that this is different from the legacy Fortran +interface (see Fortran-interface routines), which automatically +reverses the order of the array dimension for you. Here, you are +calling the C interface directly, so there is no “translation” layer. + +

    An important thing to keep in mind is the implication of this for +multidimensional real-to-complex transforms (see Multi-Dimensional DFTs of Real Data). In C, a multidimensional real-to-complex DFT +chops the last dimension roughly in half (N × M × L real input +goes to N × M × L/2+1 complex output). In Fortran, because +the array dimension notation is reversed, the first dimension of +the complex data is chopped roughly in half. For example consider the +‘r2c’ transform of L × M × N real input in Fortran: + +

    +

           type(C_PTR) :: plan
    +       real(C_DOUBLE), dimension(L,M,N) :: in
    +       complex(C_DOUBLE_COMPLEX), dimension(L/2+1,M,N) :: out
    +       plan = fftw_plan_dft_r2c_3d(N,M,L, in,out, FFTW_ESTIMATE)
    +       ...
    +       call fftw_execute_dft_r2c(plan, in, out)
    +
    +

    Alternatively, for an in-place r2c transform, as described in the C +documentation we must pad the first dimension of the +real input with an extra two entries (which are ignored by FFTW) so as +to leave enough space for the complex output. The input is +allocated as a 2[L/2+1] × M × N array, even though only +L × M × N of it is actually used. In this example, we will +allocate the array as a pointer type, using ‘fftw_alloc’ to +ensure aligned memory for maximum performance (see Allocating aligned memory in Fortran); this also makes it easy to reference the +same memory as both a real array and a complex array. + +

    +

           real(C_DOUBLE), pointer :: in(:,:,:)
    +       complex(C_DOUBLE_COMPLEX), pointer :: out(:,:,:)
    +       type(C_PTR) :: plan, data
    +       data = fftw_alloc_complex(int((L/2+1) * M * N, C_SIZE_T))
    +       call c_f_pointer(data, in, [2*(L/2+1),M,N])
    +       call c_f_pointer(data, out, [L/2+1,M,N])
    +       plan = fftw_plan_dft_r2c_3d(N,M,L, in,out, FFTW_ESTIMATE)
    +       ...
    +       call fftw_execute_dft_r2c(plan, in, out)
    +       ...
    +       call fftw_destroy_plan(plan)
    +       call fftw_free(data)
    +
    + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Row_002dmajor-Format.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Row_002dmajor-Format.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,93 @@ + + +Row-major Format - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    3.2.1 Row-major Format

    + +

    +The multi-dimensional arrays passed to fftw_plan_dft etcetera +are expected to be stored as a single contiguous block in +row-major order (sometimes called “C order”). Basically, this +means that as you step through adjacent memory locations, the first +dimension's index varies most slowly and the last dimension's index +varies most quickly. + +

    To be more explicit, let us consider an array of rank d whose +dimensions are n0 × n1 × n2 × … × nd-1. Now, we specify a location in the array by a +sequence of d (zero-based) indices, one for each dimension: +(i0, i1, i2,..., id-1). If the array is stored in row-major +order, then this element is located at the position +id-1 + nd-1 * (id-2 + nd-2 * (... + n1 * i0)). + +

    Note that, for the ordinary complex DFT, each element of the array +must be of type fftw_complex; i.e. a (real, imaginary) pair of +(double-precision) numbers. + +

    In the advanced FFTW interface, the physical dimensions n from +which the indices are computed can be different from (larger than) +the logical dimensions of the transform to be computed, in order to +transform a subset of a larger array. +Note also that, in the advanced interface, the expression above is +multiplied by a stride to get the actual array index—this is +useful in situations where each element of the multi-dimensional array +is actually a data structure (or another array), and you just want to +transform a single field. In the basic interface, however, the stride +is 1. + + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/SIMD-alignment-and-fftw_005fmalloc.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/SIMD-alignment-and-fftw_005fmalloc.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,102 @@ + + +SIMD alignment and fftw_malloc - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    3.1 SIMD alignment and fftw_malloc

    + +

    SIMD, which stands for “Single Instruction Multiple Data,” is a set of +special operations supported by some processors to perform a single +operation on several numbers (usually 2 or 4) simultaneously. SIMD +floating-point instructions are available on several popular CPUs: +SSE/SSE2/AVX on recent x86/x86-64 processors, AltiVec (single precision) +on some PowerPCs (Apple G4 and higher), NEON on some ARM models, and MIPS Paired Single +(currently only in FFTW 3.2.x). FFTW can be compiled to support the +SIMD instructions on any of these systems. + + +

    A program linking to an FFTW library compiled with SIMD support can +obtain a nonnegligible speedup for most complex and r2c/c2r +transforms. In order to obtain this speedup, however, the arrays of +complex (or real) data passed to FFTW must be specially aligned in +memory (typically 16-byte aligned), and often this alignment is more +stringent than that provided by the usual malloc (etc.) +allocation routines. + +

    In order to guarantee proper alignment for SIMD, therefore, in case +your program is ever linked against a SIMD-using FFTW, we recommend +allocating your transform data with fftw_malloc and +de-allocating it with fftw_free. +These have exactly the same interface and behavior as +malloc/free, except that for a SIMD FFTW they ensure +that the returned pointer has the necessary alignment (by calling +memalign or its equivalent on your OS). + +

    You are not required to use fftw_malloc. You can +allocate your data in any way that you like, from malloc to +new (in C++) to a fixed-size array declaration. If the array +happens not to be properly aligned, FFTW will not use the SIMD +extensions. + +Since fftw_malloc only ever needs to be used for real and +complex arrays, we provide two convenient wrapper routines +fftw_alloc_real(N) and fftw_alloc_complex(N) that are +equivalent to (double*)fftw_malloc(sizeof(double) * N) and +(fftw_complex*)fftw_malloc(sizeof(fftw_complex) * N), +respectively (or their equivalents in other precisions). + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/The-1d-Discrete-Fourier-Transform-_0028DFT_0029.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/The-1d-Discrete-Fourier-Transform-_0028DFT_0029.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,84 @@ + + +The 1d Discrete Fourier Transform (DFT) - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    4.8.1 The 1d Discrete Fourier Transform (DFT)

    + +

    The forward (FFTW_FORWARD) discrete Fourier transform (DFT) of a +1d complex array X of size n computes an array Y, +where: +

    .
    The backward (FFTW_BACKWARD) DFT computes: +
    .
    + +

    FFTW computes an unnormalized transform, in that there is no coefficient +in front of the summation in the DFT. In other words, applying the +forward and then the backward transform will multiply the input by +n. + +

    From above, an FFTW_FORWARD transform corresponds to a sign of +-1 in the exponent of the DFT. Note also that we use the +standard “in-order” output ordering—the k-th output +corresponds to the frequency k/n (or k/T, where T +is your total sampling period). For those who like to think in terms of +positive and negative frequencies, this means that the positive +frequencies are stored in the first half of the output and the negative +frequencies are stored in backwards order in the second half of the +output. (The frequency -k/n is the same as the frequency +(n-k)/n.) + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/The-1d-Real_002ddata-DFT.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/The-1d-Real_002ddata-DFT.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,94 @@ + + +The 1d Real-data DFT - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    4.8.2 The 1d Real-data DFT

    + +

    The real-input (r2c) DFT in FFTW computes the forward transform +Y of the size n real array X, exactly as defined +above, i.e. +

    .
    This output array Y can easily be shown to possess the +“Hermitian” symmetry +Yk = Yn-k*,where we take Y to be periodic so that +Yn = Y0. + +

    As a result of this symmetry, half of the output Y is redundant +(being the complex conjugate of the other half), and so the 1d r2c +transforms only output elements 0...n/2 of Y +(n/2+1 complex numbers), where the division by 2 is +rounded down. + +

    Moreover, the Hermitian symmetry implies that +Y0and, if n is even, the +Yn/2element, are purely real. So, for the R2HC r2r transform, these +elements are not stored in the halfcomplex output format. + + +

    The c2r and H2RC r2r transforms compute the backward DFT of the +complex array X with Hermitian symmetry, stored in the +r2c/R2HC output formats, respectively, where the backward +transform is defined exactly as for the complex case: +

    .
    The outputs Y of this transform can easily be seen to be purely +real, and are stored as an array of real numbers. + +

    Like FFTW's complex DFT, these transforms are unnormalized. In other +words, applying the real-to-complex (forward) and then the +complex-to-real (backward) transform will multiply the input by +n. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/The-Discrete-Hartley-Transform.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/The-Discrete-Hartley-Transform.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,104 @@ + + +The Discrete Hartley Transform - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    2.5.3 The Discrete Hartley Transform

    + +

    If you are planning to use the DHT because you've heard that it is +“faster” than the DFT (FFT), stop here. The DHT is not +faster than the DFT. That story is an old but enduring misconception +that was debunked in 1987. + +

    The discrete Hartley transform (DHT) is an invertible linear transform +closely related to the DFT. In the DFT, one multiplies each input by +cos - i * sin (a complex exponential), whereas in the DHT each +input is multiplied by simply cos + sin. Thus, the DHT +transforms n real numbers to n real numbers, and has the +convenient property of being its own inverse. In FFTW, a DHT (of any +positive n) can be specified by an r2r kind of FFTW_DHT. + +Like the DFT, in FFTW the DHT is unnormalized, so computing a DHT of +size n followed by another DHT of the same size will result in +the original array multiplied by n. + +The DHT was originally proposed as a more efficient alternative to the +DFT for real data, but it was subsequently shown that a specialized DFT +(such as FFTW's r2hc or r2c transforms) could be just as fast. In FFTW, +the DHT is actually computed by post-processing an r2hc transform, so +there is ordinarily no reason to prefer it from a performance +perspective.1 +However, we have heard rumors that the DHT might be the most appropriate +transform in its own right for certain applications, and we would be +very interested to hear from anyone who finds it useful. + +

    If FFTW_DHT is specified for multiple dimensions of a +multi-dimensional transform, FFTW computes the separable product of 1d +DHTs along each dimension. Unfortunately, this is not quite the same +thing as a true multi-dimensional DHT; you can compute the latter, if +necessary, with at most rank-1 post-processing passes +[see e.g. H. Hao and R. N. Bracewell, Proc. IEEE 75, 264–266 (1987)]. + +

    For the precise mathematical definition of the DHT as used by FFTW, see +What FFTW Really Computes. + +

    +
    +

    Footnotes

    [1] We provide the DHT mainly as a byproduct of some +internal algorithms. FFTW computes a real input/output DFT of +prime size by re-expressing it as a DHT plus post/pre-processing +and then using Rader's prime-DFT algorithm adapted to the DHT.

    + +
    + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/The-Halfcomplex_002dformat-DFT.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/The-Halfcomplex_002dformat-DFT.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,111 @@ + + +The Halfcomplex-format DFT - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    2.5.1 The Halfcomplex-format DFT

    + +

    An r2r kind of FFTW_R2HC (r2hc) corresponds to an r2c DFT +(see One-Dimensional DFTs of Real Data) but with “halfcomplex” +format output, and may sometimes be faster and/or more convenient than +the latter. +The inverse hc2r transform is of kind FFTW_HC2R. +This consists of the non-redundant half of the complex output for a 1d +real-input DFT of size n, stored as a sequence of n real +numbers (double) in the format: + +

    +r0, r1, r2, ..., rn/2, i(n+1)/2-1, ..., i2, i1 +

    + +

    Here, +rkis the real part of the kth output, and +ikis the imaginary part. (Division by 2 is rounded down.) For a +halfcomplex array hc[n], the kth component thus has its +real part in hc[k] and its imaginary part in hc[n-k], with +the exception of k == 0 or n/2 (the latter +only if n is even)—in these two cases, the imaginary part is +zero due to symmetries of the real-input DFT, and is not stored. +Thus, the r2hc transform of n real values is a halfcomplex array of +length n, and vice versa for hc2r. + + +

    Aside from the differing format, the output of +FFTW_R2HC/FFTW_HC2R is otherwise exactly the same as for +the corresponding 1d r2c/c2r transform +(i.e. FFTW_FORWARD/FFTW_BACKWARD transforms, respectively). +Recall that these transforms are unnormalized, so r2hc followed by hc2r +will result in the original data multiplied by n. Furthermore, +like the c2r transform, an out-of-place hc2r transform will +destroy its input array. + +

    Although these halfcomplex transforms can be used with the +multi-dimensional r2r interface, the interpretation of such a separable +product of transforms along each dimension is problematic. For example, +consider a two-dimensional n0 by n1, r2hc by r2hc +transform planned by fftw_plan_r2r_2d(n0, n1, in, out, FFTW_R2HC, +FFTW_R2HC, FFTW_MEASURE). Conceptually, FFTW first transforms the rows +(of size n1) to produce halfcomplex rows, and then transforms the +columns (of size n0). Half of these column transforms, however, +are of imaginary parts, and should therefore be multiplied by i +and combined with the r2hc transforms of the real columns to produce the +2d DFT amplitudes; FFTW's r2r transform does not perform this +combination for you. Thus, if a multi-dimensional real-input/output DFT +is required, we recommend using the ordinary r2c/c2r +interface (see Multi-Dimensional DFTs of Real Data). + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Thread-safety.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Thread-safety.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,93 @@ + + +Thread safety - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    +Previous: How Many Threads to Use?, +Up: Multi-threaded FFTW +


    +
    + +

    5.4 Thread safety

    + +

    Users writing multi-threaded programs (including OpenMP) must concern +themselves with the thread safety of the libraries they +use—that is, whether it is safe to call routines in parallel from +multiple threads. FFTW can be used in such an environment, but some +care must be taken because the planner routines share data +(e.g. wisdom and trigonometric tables) between calls and plans. + +

    The upshot is that the only thread-safe (re-entrant) routine in FFTW is +fftw_execute (and the new-array variants thereof). All other routines +(e.g. the planner) should only be called from one thread at a time. So, +for example, you can wrap a semaphore lock around any calls to the +planner; even more simply, you can just create all of your plans from +one thread. We do not think this should be an important restriction +(FFTW is designed for the situation where the only performance-sensitive +code is the actual execution of the transform), and the benefits of +shared data between plans are great. + +

    Note also that, since the plan is not modified by fftw_execute, +it is safe to execute the same plan in parallel by multiple +threads. However, since a given plan operates by default on a fixed +array, you need to use one of the new-array execute functions (see New-array Execute Functions) so that different threads compute the transform of different data. + +

    (Users should note that these comments only apply to programs using +shared-memory threads or OpenMP. Parallelism using MPI or forked processes +involves a separate address-space and global variables for each process, +and is not susceptible to problems of this sort.) + +

    If you are configured FFTW with the --enable-debug or +--enable-debug-malloc flags (see Installation on Unix), +then fftw_execute is not thread-safe. These flags are not +documented because they are intended only for developing +and debugging FFTW, but if you must use --enable-debug then you +should also specifically pass --disable-debug-malloc for +fftw_execute to be thread-safe. + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Transposed-distributions.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Transposed-distributions.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,129 @@ + + +Transposed distributions - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    6.4.3 Transposed distributions

    + +

    Internally, FFTW's MPI transform algorithms work by first computing +transforms of the data local to each process, then by globally +transposing the data in some fashion to redistribute the data +among the processes, transforming the new data local to each process, +and transposing back. For example, a two-dimensional n0 by +n1 array, distributed across the n0 dimension, is +transformd by: (i) transforming the n1 dimension, which are +local to each process; (ii) transposing to an n1 by n0 +array, distributed across the n1 dimension; (iii) transforming +the n0 dimension, which is now local to each process; (iv) +transposing back. + + +

    However, in many applications it is acceptable to compute a +multidimensional DFT whose results are produced in transposed order +(e.g., n1 by n0 in two dimensions). This provides a +significant performance advantage, because it means that the final +transposition step can be omitted. FFTW supports this optimization, +which you specify by passing the flag FFTW_MPI_TRANSPOSED_OUT +to the planner routines. To compute the inverse transform of +transposed output, you specify FFTW_MPI_TRANSPOSED_IN to tell +it that the input is transposed. In this section, we explain how to +interpret the output format of such a transform. + + +

    Suppose you have are transforming multi-dimensional data with (at +least two) dimensions n0 × n1 × n2 × … × nd-1. As always, it is distributed along +the first dimension n0. Now, if we compute its DFT with the +FFTW_MPI_TRANSPOSED_OUT flag, the resulting output data are stored +with the first two dimensions transposed: n1 × n0 × n2 ×…× nd-1, +distributed along the n1 dimension. Conversely, if we take the +n1 × n0 × n2 ×…× nd-1 data and transform it with the +FFTW_MPI_TRANSPOSED_IN flag, then the format goes back to the +original n0 × n1 × n2 × … × nd-1 array. + +

    There are two ways to find the portion of the transposed array that +resides on the current process. First, you can simply call the +appropriate ‘local_size’ function, passing n1 × n0 × n2 ×…× nd-1 (the +transposed dimensions). This would mean calling the ‘local_size’ +function twice, once for the transposed and once for the +non-transposed dimensions. Alternatively, you can call one of the +‘local_size_transposed’ functions, which returns both the +non-transposed and transposed data distribution from a single call. +For example, for a 3d transform with transposed output (or input), you +might call: + +

         ptrdiff_t fftw_mpi_local_size_3d_transposed(
    +                     ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, MPI_Comm comm,
    +                     ptrdiff_t *local_n0, ptrdiff_t *local_0_start,
    +                     ptrdiff_t *local_n1, ptrdiff_t *local_1_start);
    +
    +

    +Here, local_n0 and local_0_start give the size and +starting index of the n0 dimension for the +non-transposed data, as in the previous sections. For +transposed data (e.g. the output for +FFTW_MPI_TRANSPOSED_OUT), local_n1 and +local_1_start give the size and starting index of the n1 +dimension, which is the first dimension of the transposed data +(n1 by n0 by n2). + +

    (Note that FFTW_MPI_TRANSPOSED_IN is completely equivalent to +performing FFTW_MPI_TRANSPOSED_OUT and passing the first two +dimensions to the planner in reverse order, or vice versa. If you +pass both the FFTW_MPI_TRANSPOSED_IN and +FFTW_MPI_TRANSPOSED_OUT flags, it is equivalent to swapping the +first two dimensions passed to the planner and passing neither +flag.) + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Tutorial.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Tutorial.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,90 @@ + + +Tutorial - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    +Next: , +Previous: Introduction, +Up: Top +


    +
    + +

    2 Tutorial

    + + + +

    This chapter describes the basic usage of FFTW, i.e., how to compute +the Fourier transform of a single array. This chapter tells the +truth, but not the whole truth. Specifically, FFTW implements +additional routines and flags that are not documented here, although +in many cases we try to indicate where added capabilities exist. For +more complete information, see FFTW Reference. (Note that you +need to compile and install FFTW before you can use it in a program. +For the details of the installation, see Installation and Customization.) + +

    We recommend that you read this tutorial in order.1 At the least, read the first section (see Complex One-Dimensional DFTs) before reading any of the others, even if your +main interest lies in one of the other transform types. + +

    Users of FFTW version 2 and earlier may also want to read Upgrading from FFTW version 2. + + +

    +
    +

    Footnotes

    [1] You can +read the tutorial in bit-reversed order after computing your first +transform.

    + +
    + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Upgrading-from-FFTW-version-2.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Upgrading-from-FFTW-version-2.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,258 @@ + + +Upgrading from FFTW version 2 - FFTW 3.3.4 + + + + + + + + + + + + + + +

    9 Upgrading from FFTW version 2

    + +

    In this chapter, we outline the process for updating codes designed for +the older FFTW 2 interface to work with FFTW 3. The interface for FFTW +3 is not backwards-compatible with the interface for FFTW 2 and earlier +versions; codes written to use those versions will fail to link with +FFTW 3. Nor is it possible to write “compatibility wrappers” to +bridge the gap (at least not efficiently), because FFTW 3 has different +semantics from previous versions. However, upgrading should be a +straightforward process because the data formats are identical and the +overall style of planning/execution is essentially the same. + +

    Unlike FFTW 2, there are no separate header files for real and complex +transforms (or even for different precisions) in FFTW 3; all interfaces +are defined in the <fftw3.h> header file. + +

    Numeric Types

    + +

    The main difference in data types is that fftw_complex in FFTW 2 +was defined as a struct with macros c_re and c_im +for accessing the real/imaginary parts. (This is binary-compatible with +FFTW 3 on any machine except perhaps for some older Crays in single +precision.) The equivalent macros for FFTW 3 are: + +

         #define c_re(c) ((c)[0])
    +     #define c_im(c) ((c)[1])
    +
    +

    This does not work if you are using the C99 complex type, however, +unless you insert a double* typecast into the above macros +(see Complex numbers). + +

    Also, FFTW 2 had an fftw_real typedef that was an alias for +double (in double precision). In FFTW 3 you should just use +double (or whatever precision you are employing). + +

    Plans

    + +

    The major difference between FFTW 2 and FFTW 3 is in the +planning/execution division of labor. In FFTW 2, plans were found for a +given transform size and type, and then could be applied to any +arrays and for any multiplicity/stride parameters. In FFTW 3, +you specify the particular arrays, stride parameters, etcetera when +creating the plan, and the plan is then executed for those arrays +(unless the guru interface is used) and those parameters +only. (FFTW 2 had “specific planner” routines that planned for +a particular array and stride, but the plan could still be used for +other arrays and strides.) That is, much of the information that was +formerly specified at execution time is now specified at planning time. + +

    Like FFTW 2's specific planner routines, the FFTW 3 planner overwrites +the input/output arrays unless you use FFTW_ESTIMATE. + +

    FFTW 2 had separate data types fftw_plan, fftwnd_plan, +rfftw_plan, and rfftwnd_plan for complex and real one- and +multi-dimensional transforms, and each type had its own ‘destroy’ +function. In FFTW 3, all plans are of type fftw_plan and all are +destroyed by fftw_destroy_plan(plan). + +

    Where you formerly used fftw_create_plan and fftw_one to +plan and compute a single 1d transform, you would now use +fftw_plan_dft_1d to plan the transform. If you used the generic +fftw function to execute the transform with multiplicity +(howmany) and stride parameters, you would now use the advanced +interface fftw_plan_many_dft to specify those parameters. The +plans are now executed with fftw_execute(plan), which takes all +of its parameters (including the input/output arrays) from the plan. + +

    In-place transforms no longer interpret their output argument as scratch +space, nor is there an FFTW_IN_PLACE flag. You simply pass the +same pointer for both the input and output arguments. (Previously, the +output ostride and odist parameters were ignored for +in-place transforms; now, if they are specified via the advanced +interface, they are significant even in the in-place case, although they +should normally equal the corresponding input parameters.) + +

    The FFTW_ESTIMATE and FFTW_MEASURE flags have the same +meaning as before, although the planning time will differ. You may also +consider using FFTW_PATIENT, which is like FFTW_MEASURE +except that it takes more time in order to consider a wider variety of +algorithms. + +

    For multi-dimensional complex DFTs, instead of fftwnd_create_plan +(or fftw2d_create_plan or fftw3d_create_plan), followed by +fftwnd_one, you would use fftw_plan_dft (or +fftw_plan_dft_2d or fftw_plan_dft_3d). followed by +fftw_execute. If you used fftwnd to to specify strides +etcetera, you would instead specify these via fftw_plan_many_dft. + +

    The analogues to rfftw_create_plan and rfftw_one with +FFTW_REAL_TO_COMPLEX or FFTW_COMPLEX_TO_REAL directions +are fftw_plan_r2r_1d with kind FFTW_R2HC or +FFTW_HC2R, followed by fftw_execute. The stride etcetera +arguments of rfftw are now in fftw_plan_many_r2r. + +

    Instead of rfftwnd_create_plan (or rfftw2d_create_plan or +rfftw3d_create_plan) followed by +rfftwnd_one_real_to_complex or +rfftwnd_one_complex_to_real, you now use fftw_plan_dft_r2c +(or fftw_plan_dft_r2c_2d or fftw_plan_dft_r2c_3d) or +fftw_plan_dft_c2r (or fftw_plan_dft_c2r_2d or +fftw_plan_dft_c2r_3d), respectively, followed by +fftw_execute. As usual, the strides etcetera of +rfftwnd_real_to_complex or rfftwnd_complex_to_real are no +specified in the advanced planner routines, +fftw_plan_many_dft_r2c or fftw_plan_many_dft_c2r. + +

    Wisdom

    + +

    In FFTW 2, you had to supply the FFTW_USE_WISDOM flag in order to +use wisdom; in FFTW 3, wisdom is always used. (You could simulate the +FFTW 2 wisdom-less behavior by calling fftw_forget_wisdom after +every planner call.) + +

    The FFTW 3 wisdom import/export routines are almost the same as before +(although the storage format is entirely different). There is one +significant difference, however. In FFTW 2, the import routines would +never read past the end of the wisdom, so you could store extra data +beyond the wisdom in the same file, for example. In FFTW 3, the +file-import routine may read up to a few hundred bytes past the end of +the wisdom, so you cannot store other data just beyond it.1 + +

    Wisdom has been enhanced by additional humility in FFTW 3: whereas FFTW +2 would re-use wisdom for a given transform size regardless of the +stride etc., in FFTW 3 wisdom is only used with the strides etc. for +which it was created. Unfortunately, this means FFTW 3 has to create +new plans from scratch more often than FFTW 2 (in FFTW 2, planning +e.g. one transform of size 1024 also created wisdom for all smaller +powers of 2, but this no longer occurs). + +

    FFTW 3 also has the new routine fftw_import_system_wisdom to +import wisdom from a standard system-wide location. + +

    Memory allocation

    + +

    In FFTW 3, we recommend allocating your arrays with fftw_malloc +and deallocating them with fftw_free; this is not required, but +allows optimal performance when SIMD acceleration is used. (Those two +functions actually existed in FFTW 2, and worked the same way, but were +not documented.) + +

    In FFTW 2, there were fftw_malloc_hook and fftw_free_hook +functions that allowed the user to replace FFTW's memory-allocation +routines (e.g. to implement different error-handling, since by default +FFTW prints an error message and calls exit to abort the program +if malloc returns NULL). These hooks are not supported in +FFTW 3; those few users who require this functionality can just +directly modify the memory-allocation routines in FFTW (they are defined +in kernel/alloc.c). + +

    Fortran interface

    + +

    In FFTW 2, the subroutine names were obtained by replacing ‘fftw_’ +with ‘fftw_f77’; in FFTW 3, you replace ‘fftw_’ with +‘dfftw_’ (or ‘sfftw_’ or ‘lfftw_’, depending upon the +precision). + +

    In FFTW 3, we have begun recommending that you always declare the type +used to store plans as integer*8. (Too many people didn't notice +our instruction to switch from integer to integer*8 for +64-bit machines.) + +

    In FFTW 3, we provide a fftw3.f “header file” to include in +your code (and which is officially installed on Unix systems). (In FFTW +2, we supplied a fftw_f77.i file, but it was not installed.) + +

    Otherwise, the C-Fortran interface relationship is much the same as it +was before (e.g. return values become initial parameters, and +multi-dimensional arrays are in column-major order). Unlike FFTW 2, we +do provide some support for wisdom import/export in Fortran +(see Wisdom of Fortran?). + +

    Threads

    + +

    Like FFTW 2, only the execution routines are thread-safe. All planner +routines, etcetera, should be called by only a single thread at a time +(see Thread safety). Unlike FFTW 2, there is no special +FFTW_THREADSAFE flag for the planner to allow a given plan to be +usable by multiple threads in parallel; this is now the case by default. + +

    The multi-threaded version of FFTW 2 required you to pass the number of +threads each time you execute the transform. The number of threads is +now stored in the plan, and is specified before the planner is called by +fftw_plan_with_nthreads. The threads initialization routine used +to be called fftw_threads_init and would return zero on success; +the new routine is called fftw_init_threads and returns zero on +failure. See Multi-threaded FFTW. + +

    There is no separate threads header file in FFTW 3; all the function +prototypes are in <fftw3.h>. However, you still have to link to +a separate library (-lfftw3_threads -lfftw3 -lm on Unix), as well as +to the threading library (e.g. POSIX threads on Unix). + +

    +
    +

    Footnotes

    [1] We +do our own buffering because GNU libc I/O routines are horribly slow for +single-character I/O, apparently for thread-safety reasons (whether you +are using threads or not).

    + +
    + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Usage-of-Multi_002dthreaded-FFTW.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Usage-of-Multi_002dthreaded-FFTW.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,132 @@ + + +Usage of Multi-threaded FFTW - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    5.2 Usage of Multi-threaded FFTW

    + +

    Here, it is assumed that the reader is already familiar with the usage +of the uniprocessor FFTW routines, described elsewhere in this manual. +We only describe what one has to change in order to use the +multi-threaded routines. + +

    First, programs using the parallel complex transforms should be linked +with -lfftw3_threads -lfftw3 -lm on Unix, or -lfftw3_omp +-lfftw3 -lm if you compiled with OpenMP. You will also need to link +with whatever library is responsible for threads on your system +(e.g. -lpthread on GNU/Linux) or include whatever compiler flag +enables OpenMP (e.g. -fopenmp with gcc). + + +

    Second, before calling any FFTW routines, you should call the +function: + +

         int fftw_init_threads(void);
    +
    +

    +This function, which need only be called once, performs any one-time +initialization required to use threads on your system. It returns zero +if there was some error (which should not happen under normal +circumstances) and a non-zero value otherwise. + +

    Third, before creating a plan that you want to parallelize, you should +call: + +

         void fftw_plan_with_nthreads(int nthreads);
    +
    +

    +The nthreads argument indicates the number of threads you want +FFTW to use (or actually, the maximum number). All plans subsequently +created with any planner routine will use that many threads. You can +call fftw_plan_with_nthreads, create some plans, call +fftw_plan_with_nthreads again with a different argument, and +create some more plans for a new number of threads. Plans already created +before a call to fftw_plan_with_nthreads are unaffected. If you +pass an nthreads argument of 1 (the default), threads are +disabled for subsequent plans. + +

    With OpenMP, to configure FFTW to use all of the currently running +OpenMP threads (set by omp_set_num_threads(nthreads) or by the +OMP_NUM_THREADS environment variable), you can do: +fftw_plan_with_nthreads(omp_get_max_threads()). (The ‘omp_’ +OpenMP functions are declared via #include <omp.h>.) + +

    Given a plan, you then execute it as usual with +fftw_execute(plan), and the execution will use the number of +threads specified when the plan was created. When done, you destroy +it as usual with fftw_destroy_plan. As described in +Thread safety, plan execution is thread-safe, but plan +creation and destruction are not: you should create/destroy +plans only from a single thread, but can safely execute multiple plans +in parallel. + +

    There is one additional routine: if you want to get rid of all memory +and other resources allocated internally by FFTW, you can call: + +

         void fftw_cleanup_threads(void);
    +
    +

    +which is much like the fftw_cleanup() function except that it +also gets rid of threads-related data. You must not execute any +previously created plans after calling this function. + +

    We should also mention one other restriction: if you save wisdom from a +program using the multi-threaded FFTW, that wisdom cannot be used +by a program using only the single-threaded FFTW (i.e. not calling +fftw_init_threads). See Words of Wisdom-Saving Plans. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Using-MPI-Plans.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Using-MPI-Plans.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,90 @@ + + +Using MPI Plans - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    6.12.3 Using MPI Plans

    + +

    Once an MPI plan is created, you can execute and destroy it using +fftw_execute, fftw_destroy_plan, and the other functions +in the serial interface that operate on generic plans (see Using Plans). + +

    The fftw_execute and fftw_destroy_plan functions, applied to +MPI plans, are collective calls: they must be called for all processes +in the communicator that was used to create the plan. + +

    You must not use the serial new-array plan-execution functions +fftw_execute_dft and so on (see New-array Execute Functions) with MPI plans. Such functions are specialized to the +problem type, and there are specific new-array execute functions for MPI plans: + +

    +

         void fftw_mpi_execute_dft(fftw_plan p, fftw_complex *in, fftw_complex *out);
    +     void fftw_mpi_execute_dft_r2c(fftw_plan p, double *in, fftw_complex *out);
    +     void fftw_mpi_execute_dft_c2r(fftw_plan p, fftw_complex *in, double *out);
    +     void fftw_mpi_execute_r2r(fftw_plan p, double *in, double *out);
    +
    +

    These functions have the same restrictions as those of the serial +new-array execute functions. They are always safe to apply to +the same in and out arrays that were used to +create the plan. They can only be applied to new arrarys if those +arrays have the same types, dimensions, in-placeness, and alignment as +the original arrays, where the best way to ensure the same alignment +is to use FFTW's fftw_malloc and related allocation functions +for all arrays (see Memory Allocation). Note that distributed +transposes (see FFTW MPI Transposes) use +fftw_mpi_execute_r2r, since they count as rank-zero r2r plans +from FFTW's perspective. + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Using-Plans.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Using-Plans.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,150 @@ + + +Using Plans - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    +Next: , +Previous: Data Types and Files, +Up: FFTW Reference +


    +
    + +

    4.2 Using Plans

    + +

    Plans for all transform types in FFTW are stored as type +fftw_plan (an opaque pointer type), and are created by one of the +various planning routines described in the following sections. +An fftw_plan contains all information necessary to compute the +transform, including the pointers to the input and output arrays. + +

         void fftw_execute(const fftw_plan plan);
    +
    +

    +This executes the plan, to compute the corresponding transform on +the arrays for which it was planned (which must still exist). The plan +is not modified, and fftw_execute can be called as many times as +desired. + +

    To apply a given plan to a different array, you can use the new-array execute +interface. See New-array Execute Functions. + +

    fftw_execute (and equivalents) is the only function in FFTW +guaranteed to be thread-safe; see Thread safety. + +

    This function: +

         void fftw_destroy_plan(fftw_plan plan);
    +
    +

    deallocates the plan and all its associated data. + +

    FFTW's planner saves some other persistent data, such as the +accumulated wisdom and a list of algorithms available in the current +configuration. If you want to deallocate all of that and reset FFTW +to the pristine state it was in when you started your program, you can +call: + +

         void fftw_cleanup(void);
    +
    +

    +After calling fftw_cleanup, all existing plans become undefined, +and you should not attempt to execute them nor to destroy them. You can +however create and execute/destroy new plans, in which case FFTW starts +accumulating wisdom information again. + +

    fftw_cleanup does not deallocate your plans, however. To prevent +memory leaks, you must still call fftw_destroy_plan before +executing fftw_cleanup. + +

    Occasionally, it may useful to know FFTW's internal “cost” metric +that it uses to compare plans to one another; this cost is +proportional to an execution time of the plan, in undocumented units, +if the plan was created with the FFTW_MEASURE or other +timing-based options, or alternatively is a heuristic cost function +for FFTW_ESTIMATE plans. (The cost values of measured and +estimated plans are not comparable, being in different units. Also, +costs from different FFTW versions or the same version compiled +differently may not be in the same units. Plans created from wisdom +have a cost of 0 since no timing measurement is performed for them. +Finally, certain problems for which only one top-level algorithm was +possible may have required no measurements of the cost of the whole +plan, in which case fftw_cost will also return 0.) The cost +metric for a given plan is returned by: + +

         double fftw_cost(const fftw_plan plan);
    +
    +

    +The following two routines are provided purely for academic purposes +(that is, for entertainment). + +

         void fftw_flops(const fftw_plan plan,
    +                     double *add, double *mul, double *fma);
    +
    +

    +Given a plan, set add, mul, and fma to an +exact count of the number of floating-point additions, multiplications, +and fused multiply-add operations involved in the plan's execution. The +total number of floating-point operations (flops) is add + mul + +2*fma, or add + mul + fma if the hardware supports fused +multiply-add instructions (although the number of FMA operations is only +approximate because of compiler voodoo). (The number of operations +should be an integer, but we use double to avoid overflowing +int for large transforms; the arguments are of type double +even for single and long-double precision versions of FFTW.) + +

         void fftw_fprint_plan(const fftw_plan plan, FILE *output_file);
    +     void fftw_print_plan(const fftw_plan plan);
    +     char *fftw_sprint_plan(const fftw_plan plan);
    +
    +

    +This outputs a “nerd-readable” representation of the plan to +the given file, to stdout, or two a newly allocated +NUL-terminated string (which the caller is responsible for deallocating +with free), respectively. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/What-FFTW-Really-Computes.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/What-FFTW-Really-Computes.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,76 @@ + + +What FFTW Really Computes - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    +Previous: Wisdom, +Up: FFTW Reference +


    +
    + +

    4.8 What FFTW Really Computes

    + +

    In this section, we provide precise mathematical definitions for the +transforms that FFTW computes. These transform definitions are fairly +standard, but some authors follow slightly different conventions for the +normalization of the transform (the constant factor in front) and the +sign of the complex exponent. We begin by presenting the +one-dimensional (1d) transform definitions, and then give the +straightforward extension to multi-dimensional transforms. + +

    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Wisdom-Export.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Wisdom-Export.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,98 @@ + + +Wisdom Export - FFTW 3.3.4 + + + + + + + + + + + + + +
    + +

    +Next: , +Previous: Wisdom, +Up: Wisdom +


    +
    + +

    4.7.1 Wisdom Export

    + +
         int fftw_export_wisdom_to_filename(const char *filename);
    +     void fftw_export_wisdom_to_file(FILE *output_file);
    +     char *fftw_export_wisdom_to_string(void);
    +     void fftw_export_wisdom(void (*write_char)(char c, void *), void *data);
    +
    +

    +These functions allow you to export all currently accumulated wisdom +in a form from which it can be later imported and restored, even +during a separate run of the program. (See Words of Wisdom-Saving Plans.) The current store of wisdom is not affected by calling any +of these routines. + +

    fftw_export_wisdom exports the wisdom to any output +medium, as specified by the callback function +write_char. write_char is a putc-like function that +writes the character c to some output; its second parameter is +the data pointer passed to fftw_export_wisdom. For +convenience, the following three “wrapper” routines are provided: + +

    fftw_export_wisdom_to_filename writes wisdom to a file named +filename (which is created or overwritten), returning 1 +on success and 0 on failure. A lower-level function, which +requires you to open and close the file yourself (e.g. if you want to +write wisdom to a portion of a larger file) is +fftw_export_wisdom_to_file. This writes the wisdom to the +current position in output_file, which should be open with +write permission; upon exit, the file remains open and is positioned +at the end of the wisdom data. + +

    fftw_export_wisdom_to_string returns a pointer to a +NULL-terminated string holding the wisdom data. This string is +dynamically allocated, and it is the responsibility of the caller to +deallocate it with free when it is no longer needed. + +

    All of these routines export the wisdom in the same format, which we +will not document here except to say that it is LISP-like ASCII text +that is insensitive to white space. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Wisdom-File-Export_002fImport-from-Fortran.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Wisdom-File-Export_002fImport-from-Fortran.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,90 @@ + + +Wisdom File Export/Import from Fortran - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    7.6.1 Wisdom File Export/Import from Fortran

    + +

    The easiest way to export and import wisdom is to do so using +fftw_export_wisdom_to_filename and +fftw_wisdom_from_filename. The only trick is that these +require you to pass a C string, which is an array of type +CHARACTER(C_CHAR) that is terminated by C_NULL_CHAR. +You can call them like this: + +

           integer(C_INT) :: ret
    +       ret = fftw_export_wisdom_to_filename(C_CHAR_'my_wisdom.dat' // C_NULL_CHAR)
    +       if (ret .eq. 0) stop 'error exporting wisdom to file'
    +       ret = fftw_import_wisdom_from_filename(C_CHAR_'my_wisdom.dat' // C_NULL_CHAR)
    +       if (ret .eq. 0) stop 'error importing wisdom from file'
    +
    +

    Note that prepending ‘C_CHAR_’ is needed to specify that the +literal string is of kind C_CHAR, and we null-terminate the +string by appending ‘// C_NULL_CHAR’. These functions return an +integer(C_INT) (ret) which is 0 if an error +occurred during export/import and nonzero otherwise. + +

    It is also possible to use the lower-level routines +fftw_export_wisdom_to_file and +fftw_import_wisdom_from_file, which accept parameters of the C +type FILE*, expressed in Fortran as type(C_PTR). +However, you are then responsible for creating the FILE* +yourself. You can do this by using iso_c_binding to define +Fortran intefaces for the C library functions fopen and +fclose, which is a bit strange in Fortran but workable. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Wisdom-Generic-Export_002fImport-from-Fortran.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Wisdom-Generic-Export_002fImport-from-Fortran.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,112 @@ + + +Wisdom Generic Export/Import from Fortran - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    7.6.3 Wisdom Generic Export/Import from Fortran

    + +

    The most generic wisdom export/import functions allow you to provide +an arbitrary callback function to read/write one character at a time +in any way you want. However, your callback function must be written +in a special way, using the bind(C) attribute to be passed to a +C interface. + +

    In particular, to call the generic wisdom export function +fftw_export_wisdom, you would write a callback subroutine of the form: + +

           subroutine my_write_char(c, p) bind(C)
    +         use, intrinsic :: iso_c_binding
    +         character(C_CHAR), value :: c
    +         type(C_PTR), value :: p
    +         ...write c...
    +       end subroutine my_write_char
    +
    +

    Given such a subroutine (along with the corresponding interface definition), you could then export wisdom using: + +

    +

           call fftw_export_wisdom(c_funloc(my_write_char), p)
    +
    +

    The standard c_funloc intrinsic converts a Fortran +bind(C) subroutine into a C function pointer. The parameter +p is a type(C_PTR) to any arbitrary data that you want +to pass to my_write_char (or C_NULL_PTR if none). (Note +that you can get a C pointer to Fortran data using the intrinsic +c_loc, and convert it back to a Fortran pointer in +my_write_char using c_f_pointer.) + +

    Similarly, to use the generic fftw_import_wisdom, you would +define a callback function of the form: + +

    +

           integer(C_INT) function my_read_char(p) bind(C)
    +         use, intrinsic :: iso_c_binding
    +         type(C_PTR), value :: p
    +         character :: c
    +         ...read a character c...
    +         my_read_char = ichar(c, C_INT)
    +       end function my_read_char
    +     
    +       ....
    +     
    +       integer(C_INT) :: ret
    +       ret = fftw_import_wisdom(c_funloc(my_read_char), p)
    +       if (ret .eq. 0) stop 'error importing wisdom'
    +
    +

    Your function can return -1 if the end of the input is reached. +Again, p is an arbitrary type(C_PTR that is passed +through to your function. fftw_import_wisdom returns 0 +if an error occurred and nonzero otherwise. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Wisdom-Import.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Wisdom-Import.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,101 @@ + + +Wisdom Import - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    +Next: , +Previous: Wisdom Export, +Up: Wisdom +


    +
    + +

    4.7.2 Wisdom Import

    + +
         int fftw_import_system_wisdom(void);
    +     int fftw_import_wisdom_from_filename(const char *filename);
    +     int fftw_import_wisdom_from_string(const char *input_string);
    +     int fftw_import_wisdom(int (*read_char)(void *), void *data);
    +
    +

    +These functions import wisdom into a program from data stored by the +fftw_export_wisdom functions above. (See Words of Wisdom-Saving Plans.) The imported wisdom replaces any wisdom +already accumulated by the running program. + +

    fftw_import_wisdom imports wisdom from any input medium, as +specified by the callback function read_char. read_char is +a getc-like function that returns the next character in the +input; its parameter is the data pointer passed to +fftw_import_wisdom. If the end of the input data is reached +(which should never happen for valid data), read_char should +return EOF (as defined in <stdio.h>). For convenience, +the following three “wrapper” routines are provided: + +

    fftw_import_wisdom_from_filename reads wisdom from a file named +filename. A lower-level function, which requires you to open +and close the file yourself (e.g. if you want to read wisdom from a +portion of a larger file) is fftw_import_wisdom_from_file. This +reads wisdom from the current position in input_file (which +should be open with read permission); upon exit, the file remains +open, but the position of the read pointer is unspecified. + +

    fftw_import_wisdom_from_string reads wisdom from the +NULL-terminated string input_string. + +

    fftw_import_system_wisdom reads wisdom from an +implementation-defined standard file (/etc/fftw/wisdom on Unix +and GNU systems). + + +

    The return value of these import routines is 1 if the wisdom was +read successfully and 0 otherwise. Note that, in all of these +functions, any data in the input stream past the end of the wisdom data +is simply ignored. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Wisdom-String-Export_002fImport-from-Fortran.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Wisdom-String-Export_002fImport-from-Fortran.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,106 @@ + + +Wisdom String Export/Import from Fortran - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    7.6.2 Wisdom String Export/Import from Fortran

    + +

    Dealing with FFTW's C string export/import is a bit more painful. In +particular, the fftw_export_wisdom_to_string function requires +you to deal with a dynamically allocated C string. To get its length, +you must define an interface to the C strlen function, and to +deallocate it you must define an interface to C free: + +

           use, intrinsic :: iso_c_binding
    +       interface
    +         integer(C_INT) function strlen(s) bind(C, name='strlen')
    +           import
    +           type(C_PTR), value :: s
    +         end function strlen
    +         subroutine free(p) bind(C, name='free')
    +           import
    +           type(C_PTR), value :: p
    +         end subroutine free
    +       end interface
    +
    +

    Given these definitions, you can then export wisdom to a Fortran +character array: + +

           character(C_CHAR), pointer :: s(:)
    +       integer(C_SIZE_T) :: slen
    +       type(C_PTR) :: p
    +       p = fftw_export_wisdom_to_string()
    +       if (.not. c_associated(p)) stop 'error exporting wisdom'
    +       slen = strlen(p)
    +       call c_f_pointer(p, s, [slen+1])
    +       ...
    +       call free(p)
    +
    +

    +Note that slen is the length of the C string, but the length of +the array is slen+1 because it includes the terminating null +character. (You can omit the ‘+1’ if you don't want Fortran to +know about the null character.) The standard c_associated function +checks whether p is a null pointer, which is returned by +fftw_export_wisdom_to_string if there was an error. + +

    To import wisdom from a string, use +fftw_import_wisdom_from_string as usual; note that the argument +of this function must be a character(C_CHAR) that is terminated +by the C_NULL_CHAR character, like the s array above. + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Wisdom-Utilities.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Wisdom-Utilities.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,83 @@ + + +Wisdom Utilities - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    +Previous: Forgetting Wisdom, +Up: Wisdom +


    +
    + +

    4.7.4 Wisdom Utilities

    + +

    FFTW includes two standalone utility programs that deal with wisdom. We +merely summarize them here, since they come with their own man +pages for Unix and GNU systems (with HTML versions on our web site). + +

    The first program is fftw-wisdom (or fftwf-wisdom in +single precision, etcetera), which can be used to create a wisdom file +containing plans for any of the transform sizes and types supported by +FFTW. It is preferable to create wisdom directly from your executable +(see Caveats in Using Wisdom), but this program is useful for +creating global wisdom files for fftw_import_system_wisdom. + + +

    The second program is fftw-wisdom-to-conf, which takes a wisdom +file as input and produces a configuration routine as output. The +latter is a C subroutine that you can compile and link into your +program, replacing a routine of the same name in the FFTW library, that +determines which parts of FFTW are callable by your program. +fftw-wisdom-to-conf produces a configuration routine that links +to only those parts of FFTW needed by the saved plans in the wisdom, +greatly reducing the size of statically linked executables (which should +only attempt to create plans corresponding to those in the wisdom, +however). + + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Wisdom-of-Fortran_003f.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Wisdom-of-Fortran_003f.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,91 @@ + + +Wisdom of Fortran? - FFTW 3.3.4 + + + + + + + + + + + + +

    + +

    8.5 Wisdom of Fortran?

    + +

    In this section, we discuss how one can import/export FFTW wisdom +(saved plans) to/from a Fortran program; we assume that the reader is +already familiar with wisdom, as described in Words of Wisdom-Saving Plans. + +

    The basic problem is that is difficult to (portably) pass files and +strings between Fortran and C, so we cannot provide a direct Fortran +equivalent to the fftw_export_wisdom_to_file, etcetera, +functions. Fortran interfaces are provided for the functions +that do not take file/string arguments, however: +dfftw_import_system_wisdom, dfftw_import_wisdom, +dfftw_export_wisdom, and dfftw_forget_wisdom. + + +

    So, for example, to import the system-wide wisdom, you would do: + +

                 integer isuccess
    +             call dfftw_import_system_wisdom(isuccess)
    +
    +

    As usual, the C return value is turned into a first parameter; +isuccess is non-zero on success and zero on failure (e.g. if +there is no system wisdom installed). + +

    If you want to import/export wisdom from/to an arbitrary file or +elsewhere, you can employ the generic dfftw_import_wisdom and +dfftw_export_wisdom functions, for which you must supply a +subroutine to read/write one character at a time. The FFTW package +contains an example file doc/f77_wisdom.f demonstrating how to +implement import_wisdom_from_file and +export_wisdom_to_file subroutines in this way. (These routines +cannot be compiled into the FFTW library itself, lest all FFTW-using +programs be required to link with the Fortran I/O library.) + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Wisdom.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Wisdom.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,72 @@ + + +Wisdom - FFTW 3.3.4 + + + + + + + + + + + + + +

    + +

    4.7 Wisdom

    + +

    +This section documents the FFTW mechanism for saving and restoring +plans from disk. This mechanism is called wisdom. + +

    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/Words-of-Wisdom_002dSaving-Plans.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/Words-of-Wisdom_002dSaving-Plans.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,113 @@ + + +Words of Wisdom-Saving Plans - FFTW 3.3.4 + + + + + + + + + + + + + + + +

    3.3 Words of Wisdom—Saving Plans

    + +

    +FFTW implements a method for saving plans to disk and restoring them. +In fact, what FFTW does is more general than just saving and loading +plans. The mechanism is called wisdom. Here, we describe +this feature at a high level. See FFTW Reference, for a less casual +but more complete discussion of how to use wisdom in FFTW. + +

    Plans created with the FFTW_MEASURE, FFTW_PATIENT, or +FFTW_EXHAUSTIVE options produce near-optimal FFT performance, +but may require a long time to compute because FFTW must measure the +runtime of many possible plans and select the best one. This setup is +designed for the situations where so many transforms of the same size +must be computed that the start-up time is irrelevant. For short +initialization times, but slower transforms, we have provided +FFTW_ESTIMATE. The wisdom mechanism is a way to get the +best of both worlds: you compute a good plan once, save it to +disk, and later reload it as many times as necessary. The wisdom +mechanism can actually save and reload many plans at once, not just +one. + + +

    Whenever you create a plan, the FFTW planner accumulates wisdom, which +is information sufficient to reconstruct the plan. After planning, +you can save this information to disk by means of the function: +

         int fftw_export_wisdom_to_filename(const char *filename);
    +
    +

    (This function returns non-zero on success.) + +

    The next time you run the program, you can restore the wisdom with +fftw_import_wisdom_from_filename (which also returns non-zero on success), +and then recreate the plan using the same flags as before. +

         int fftw_import_wisdom_from_filename(const char *filename);
    +
    +

    +Wisdom is automatically used for any size to which it is applicable, as +long as the planner flags are not more “patient” than those with which +the wisdom was created. For example, wisdom created with +FFTW_MEASURE can be used if you later plan with +FFTW_ESTIMATE or FFTW_MEASURE, but not with +FFTW_PATIENT. + +

    The wisdom is cumulative, and is stored in a global, private +data structure managed internally by FFTW. The storage space required +is minimal, proportional to the logarithm of the sizes the wisdom was +generated from. If memory usage is a concern, however, the wisdom can +be forgotten and its associated memory freed by calling: +

         void fftw_forget_wisdom(void);
    +
    +

    +Wisdom can be exported to a file, a string, or any other medium. +For details, see Wisdom. + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/equation-dft.png Binary file fft/fftw/fftw-3.3.4/doc/html/equation-dft.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/equation-dht.png Binary file fft/fftw/fftw-3.3.4/doc/html/equation-dht.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/equation-idft.png Binary file fft/fftw/fftw-3.3.4/doc/html/equation-idft.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/equation-redft00.png Binary file fft/fftw/fftw-3.3.4/doc/html/equation-redft00.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/equation-redft01.png Binary file fft/fftw/fftw-3.3.4/doc/html/equation-redft01.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/equation-redft10.png Binary file fft/fftw/fftw-3.3.4/doc/html/equation-redft10.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/equation-redft11.png Binary file fft/fftw/fftw-3.3.4/doc/html/equation-redft11.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/equation-rodft00.png Binary file fft/fftw/fftw-3.3.4/doc/html/equation-rodft00.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/equation-rodft01.png Binary file fft/fftw/fftw-3.3.4/doc/html/equation-rodft01.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/equation-rodft10.png Binary file fft/fftw/fftw-3.3.4/doc/html/equation-rodft10.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/equation-rodft11.png Binary file fft/fftw/fftw-3.3.4/doc/html/equation-rodft11.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/index.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/html/index.html Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,248 @@ + + +FFTW 3.3.4 + + + + + + + + + + +

    FFTW 3.3.4

    +
    +

    Table of Contents

    + +
    + + + +
    + +

    +Next: , +Previous: (dir), +Up: (dir) +


    +
    + +

    FFTW User Manual

    + +

    Welcome to FFTW, the Fastest Fourier Transform in the West. FFTW is a +collection of fast C routines to compute the discrete Fourier transform. +This manual documents FFTW version 3.3.4. + +

    + + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/html/rfftwnd-for-html.png Binary file fft/fftw/fftw-3.3.4/doc/html/rfftwnd-for-html.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/install.texi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/install.texi Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,361 @@ +@node Installation and Customization, Acknowledgments, Upgrading from FFTW version 2, Top +@chapter Installation and Customization +@cindex installation + +This chapter describes the installation and customization of FFTW, the +latest version of which may be downloaded from +@uref{http://www.fftw.org, the FFTW home page}. + +In principle, FFTW should work on any system with an ANSI C compiler +(@code{gcc} is fine). However, planner time is drastically reduced if +FFTW can exploit a hardware cycle counter; FFTW comes with cycle-counter +support for all modern general-purpose CPUs, but you may need to add a +couple of lines of code if your compiler is not yet supported +(@pxref{Cycle Counters}). (On Unix, there will be a warning at the end +of the @code{configure} output if no cycle counter is found.) +@cindex cycle counter +@cindex compiler +@cindex portability + + +Installation of FFTW is simplest if you have a Unix or a GNU system, +such as GNU/Linux, and we describe this case in the first section below, +including the use of special configuration options to e.g. install +different precisions or exploit optimizations for particular +architectures (e.g. SIMD). Compilation on non-Unix systems is a more +manual process, but we outline the procedure in the second section. It +is also likely that pre-compiled binaries will be available for popular +systems. + +Finally, we describe how you can customize FFTW for particular needs by +generating @emph{codelets} for fast transforms of sizes not supported +efficiently by the standard FFTW distribution. +@cindex codelet + +@menu +* Installation on Unix:: +* Installation on non-Unix systems:: +* Cycle Counters:: +* Generating your own code:: +@end menu + +@c ------------------------------------------------------------ + +@node Installation on Unix, Installation on non-Unix systems, Installation and Customization, Installation and Customization +@section Installation on Unix + +FFTW comes with a @code{configure} program in the GNU style. +Installation can be as simple as: +@fpindex configure + +@example +./configure +make +make install +@end example + +This will build the uniprocessor complex and real transform libraries +along with the test programs. (We recommend that you use GNU +@code{make} if it is available; on some systems it is called +@code{gmake}.) The ``@code{make install}'' command installs the fftw +and rfftw libraries in standard places, and typically requires root +privileges (unless you specify a different install directory with the +@code{--prefix} flag to @code{configure}). You can also type +``@code{make check}'' to put the FFTW test programs through their paces. +If you have problems during configuration or compilation, you may want +to run ``@code{make distclean}'' before trying again; this ensures that +you don't have any stale files left over from previous compilation +attempts. + +The @code{configure} script chooses the @code{gcc} compiler by default, +if it is available; you can select some other compiler with: +@example +./configure CC="@r{@i{}}" +@end example + +The @code{configure} script knows good @code{CFLAGS} (C compiler flags) +@cindex compiler flags +for a few systems. If your system is not known, the @code{configure} +script will print out a warning. In this case, you should re-configure +FFTW with the command +@example +./configure CFLAGS="@r{@i{}}" +@end example +and then compile as usual. If you do find an optimal set of +@code{CFLAGS} for your system, please let us know what they are (along +with the output of @code{config.guess}) so that we can include them in +future releases. + +@code{configure} supports all the standard flags defined by the GNU +Coding Standards; see the @code{INSTALL} file in FFTW or +@uref{http://www.gnu.org/prep/standards/html_node/index.html, the GNU web page}. +Note especially @code{--help} to list all flags and +@code{--enable-shared} to create shared, rather than static, libraries. +@code{configure} also accepts a few FFTW-specific flags, particularly: + +@itemize @bullet + +@item +@cindex precision +@code{--enable-float}: Produces a single-precision version of FFTW +(@code{float}) instead of the default double-precision (@code{double}). +@xref{Precision}. + +@item +@cindex precision +@code{--enable-long-double}: Produces a long-double precision version of +FFTW (@code{long double}) instead of the default double-precision +(@code{double}). The @code{configure} script will halt with an error +message if @code{long double} is the same size as @code{double} on your +machine/compiler. @xref{Precision}. + +@item +@cindex precision +@code{--enable-quad-precision}: Produces a quadruple-precision version +of FFTW using the nonstandard @code{__float128} type provided by +@code{gcc} 4.6 or later on x86, x86-64, and Itanium architectures, +instead of the default double-precision (@code{double}). The +@code{configure} script will halt with an error message if the +compiler is not @code{gcc} version 4.6 or later or if @code{gcc}'s +@code{libquadmath} library is not installed. @xref{Precision}. + +@item +@cindex threads +@code{--enable-threads}: Enables compilation and installation of the +FFTW threads library (@pxref{Multi-threaded FFTW}), which provides a +simple interface to parallel transforms for SMP systems. By default, +the threads routines are not compiled. + +@item +@code{--enable-openmp}: Like @code{--enable-threads}, but using OpenMP +compiler directives in order to induce parallelism rather than +spawning its own threads directly, and installing an @samp{fftw3_omp} library +rather than an @samp{fftw3_threads} library (@pxref{Multi-threaded +FFTW}). You can use both @code{--enable-openmp} and @code{--enable-threads} +since they compile/install libraries with different names. By default, +the OpenMP routines are not compiled. + +@item +@code{--with-combined-threads}: By default, if @code{--enable-threads} +is used, the threads support is compiled into a separate library that +must be linked in addition to the main FFTW library. This is so that +users of the serial library do not need to link the system threads +libraries. If @code{--with-combined-threads} is specified, however, +then no separate threads library is created, and threads are included +in the main FFTW library. This is mainly useful under Windows, where +no system threads library is required and inter-library dependencies +are problematic. + +@item +@cindex MPI +@code{--enable-mpi}: Enables compilation and installation of the FFTW +MPI library (@pxref{Distributed-memory FFTW with MPI}), which provides +parallel transforms for distributed-memory systems with MPI. (By +default, the MPI routines are not compiled.) @xref{FFTW MPI +Installation}. + +@item +@cindex Fortran-callable wrappers +@code{--disable-fortran}: Disables inclusion of legacy-Fortran +wrapper routines (@pxref{Calling FFTW from Legacy Fortran}) in the standard +FFTW libraries. These wrapper routines increase the library size by +only a negligible amount, so they are included by default as long as +the @code{configure} script finds a Fortran compiler on your system. +(To specify a particular Fortran compiler @i{foo}, pass +@code{F77=}@i{foo} to @code{configure}.) + +@item +@code{--with-g77-wrappers}: By default, when Fortran wrappers are +included, the wrappers employ the linking conventions of the Fortran +compiler detected by the @code{configure} script. If this compiler is +GNU @code{g77}, however, then @emph{two} versions of the wrappers are +included: one with @code{g77}'s idiosyncratic convention of appending +two underscores to identifiers, and one with the more common +convention of appending only a single underscore. This way, the same +FFTW library will work with both @code{g77} and other Fortran +compilers, such as GNU @code{gfortran}. However, the converse is not +true: if you configure with a different compiler, then the +@code{g77}-compatible wrappers are not included. By specifying +@code{--with-g77-wrappers}, the @code{g77}-compatible wrappers are +included in addition to wrappers for whatever Fortran compiler +@code{configure} finds. +@fpindex g77 + +@item +@code{--with-slow-timer}: Disables the use of hardware cycle counters, +and falls back on @code{gettimeofday} or @code{clock}. This greatly +worsens performance, and should generally not be used (unless you don't +have a cycle counter but still really want an optimized plan regardless +of the time). @xref{Cycle Counters}. + +@item +@code{--enable-sse}, @code{--enable-sse2}, @code{--enable-avx}, +@code{--enable-altivec}, @code{--enable-neon}: Enable the compilation of +SIMD code for SSE (Pentium III+), SSE2 (Pentium IV+), AVX (Sandy Bridge, +Interlagos), AltiVec (PowerPC G4+), NEON (some ARM processors). SSE, +AltiVec, and NEON only work with @code{--enable-float} (above). SSE2 +works in both single and double precision (and is simply SSE in single +precision). The resulting code will @emph{still work} on earlier CPUs +lacking the SIMD extensions (SIMD is automatically disabled, although +the FFTW library is still larger). +@itemize @minus +@item +These options require a compiler supporting SIMD extensions, and +compiler support is always a bit flaky: see the FFTW FAQ for a list of +compiler versions that have problems compiling FFTW. +@item +With AltiVec and @code{gcc}, you may have to use the +@code{-mabi=altivec} option when compiling any code that links to FFTW, +in order to properly align the stack; otherwise, FFTW could crash when +it tries to use an AltiVec feature. (This is not necessary on MacOS X.) +@item +With SSE/SSE2 and @code{gcc}, you should use a version of gcc that +properly aligns the stack when compiling any code that links to FFTW. +By default, @code{gcc} 2.95 and later versions align the stack as +needed, but you should not compile FFTW with the @code{-Os} option or the +@code{-mpreferred-stack-boundary} option with an argument less than 4. +@item +Because of the large variety of ARM processors and ABIs, FFTW +does not attempt to guess the correct @code{gcc} flags for generating +NEON code. In general, you will have to provide them on the command line. +This command line is known to have worked at least once: +@example +./configure --with-slow-timer --host=arm-linux-gnueabi \ + --enable-single --enable-neon \ + "CC=arm-linux-gnueabi-gcc -march=armv7-a -mfloat-abi=softfp" +@end example +@end itemize + +@end itemize + +@cindex compiler +To force @code{configure} to use a particular C compiler @i{foo} +(instead of the default, usually @code{gcc}), pass @code{CC=}@i{foo} to the +@code{configure} script; you may also need to set the flags via the variable +@code{CFLAGS} as described above. +@cindex compiler flags + +@c ------------------------------------------------------------ +@node Installation on non-Unix systems, Cycle Counters, Installation on Unix, Installation and Customization +@section Installation on non-Unix systems + +It should be relatively straightforward to compile FFTW even on non-Unix +systems lacking the niceties of a @code{configure} script. Basically, +you need to edit the @code{config.h} header (copy it from +@code{config.h.in}) to @code{#define} the various options and compiler +characteristics, and then compile all the @samp{.c} files in the +relevant directories. + +The @code{config.h} header contains about 100 options to set, each one +initially an @code{#undef}, each documented with a comment, and most of +them fairly obvious. For most of the options, you should simply +@code{#define} them to @code{1} if they are applicable, although a few +options require a particular value (e.g. @code{SIZEOF_LONG_LONG} should +be defined to the size of the @code{long long} type, in bytes, or zero +if it is not supported). We will likely post some sample +@code{config.h} files for various operating systems and compilers for +you to use (at least as a starting point). Please let us know if you +have to hand-create a configuration file (and/or a pre-compiled binary) +that you want to share. + +To create the FFTW library, you will then need to compile all of the +@samp{.c} files in the @code{kernel}, @code{dft}, @code{dft/scalar}, +@code{dft/scalar/codelets}, @code{rdft}, @code{rdft/scalar}, +@code{rdft/scalar/r2cf}, @code{rdft/scalar/r2cb}, +@code{rdft/scalar/r2r}, @code{reodft}, and @code{api} directories. +If you are compiling with SIMD support (e.g. you defined +@code{HAVE_SSE2} in @code{config.h}), then you also need to compile +the @code{.c} files in the @code{simd-support}, +@code{@{dft,rdft@}/simd}, @code{@{dft,rdft@}/simd/*} directories. + +Once these files are all compiled, link them into a library, or a shared +library, or directly into your program. + +To compile the FFTW test program, additionally compile the code in the +@code{libbench2/} directory, and link it into a library. Then compile +the code in the @code{tests/} directory and link it to the +@code{libbench2} and FFTW libraries. To compile the @code{fftw-wisdom} +(command-line) tool (@pxref{Wisdom Utilities}), compile +@code{tools/fftw-wisdom.c} and link it to the @code{libbench2} and FFTW +libraries + +@c ------------------------------------------------------------ +@node Cycle Counters, Generating your own code, Installation on non-Unix systems, Installation and Customization +@section Cycle Counters +@cindex cycle counter + +FFTW's planner actually executes and times different possible FFT +algorithms in order to pick the fastest plan for a given @math{n}. In +order to do this in as short a time as possible, however, the timer must +have a very high resolution, and to accomplish this we employ the +hardware @dfn{cycle counters} that are available on most CPUs. +Currently, FFTW supports the cycle counters on x86, PowerPC/POWER, Alpha, +UltraSPARC (SPARC v9), IA64, PA-RISC, and MIPS processors. + +@cindex compiler +Access to the cycle counters, unfortunately, is a compiler and/or +operating-system dependent task, often requiring inline assembly +language, and it may be that your compiler is not supported. If you are +@emph{not} supported, FFTW will by default fall back on its estimator +(effectively using @code{FFTW_ESTIMATE} for all plans). +@ctindex FFTW_ESTIMATE + +You can add support by editing the file @code{kernel/cycle.h}; normally, +this will involve adapting one of the examples already present in order +to use the inline-assembler syntax for your C compiler, and will only +require a couple of lines of code. Anyone adding support for a new +system to @code{cycle.h} is encouraged to email us at @email{fftw@@fftw.org}. + +If a cycle counter is not available on your system (e.g. some embedded +processor), and you don't want to use estimated plans, as a last resort +you can use the @code{--with-slow-timer} option to @code{configure} (on +Unix) or @code{#define WITH_SLOW_TIMER} in @code{config.h} (elsewhere). +This will use the much lower-resolution @code{gettimeofday} function, or even +@code{clock} if the former is unavailable, and planning will be +extremely slow. + +@c ------------------------------------------------------------ +@node Generating your own code, , Cycle Counters, Installation and Customization +@section Generating your own code +@cindex code generator + +The directory @code{genfft} contains the programs that were used to +generate FFTW's ``codelets,'' which are hard-coded transforms of small +sizes. +@cindex codelet +We do not expect casual users to employ the generator, which is a rather +sophisticated program that generates directed acyclic graphs of FFT +algorithms and performs algebraic simplifications on them. It was +written in Objective Caml, a dialect of ML, which is available at +@uref{http://caml.inria.fr/ocaml/index.en.html}. +@cindex Caml + + +If you have Objective Caml installed (along with recent versions of +GNU @code{autoconf}, @code{automake}, and @code{libtool}), then you +can change the set of codelets that are generated or play with the +generation options. The set of generated codelets is specified by the +@code{@{dft,rdft@}/@{codelets,simd@}/*/Makefile.am} files. For example, you can add +efficient REDFT codelets of small sizes by modifying +@code{rdft/codelets/r2r/Makefile.am}. +@cindex REDFT +After you modify any @code{Makefile.am} files, you can type @code{sh +bootstrap.sh} in the top-level directory followed by @code{make} to +re-generate the files. + +We do not provide more details about the code-generation process, since +we do not expect that most users will need to generate their own code. +However, feel free to contact us at @email{fftw@@fftw.org} if +you are interested in the subject. + +@cindex monadic programming +You might find it interesting to learn Caml and/or some modern +programming techniques that we used in the generator (including monadic +programming), especially if you heard the rumor that Java and +object-oriented programming are the latest advancement in the field. +The internal operation of the codelet generator is described in the +paper, ``A Fast Fourier Transform Compiler,'' by M. Frigo, which is +available from the @uref{http://www.fftw.org,FFTW home page} and also +appeared in the @cite{Proceedings of the 1999 ACM SIGPLAN Conference on +Programming Language Design and Implementation (PLDI)}. + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/intro.texi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/intro.texi Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,165 @@ +@node Introduction, Tutorial, Top, Top +@chapter Introduction +This manual documents version @value{VERSION} of FFTW, the +@emph{Fastest Fourier Transform in the West}. FFTW is a comprehensive +collection of fast C routines for computing the discrete Fourier +transform (DFT) and various special cases thereof. +@cindex discrete Fourier transform +@cindex DFT +@itemize @bullet +@item FFTW computes the DFT of complex data, real data, even- + or odd-symmetric real data (these symmetric transforms are usually + known as the discrete cosine or sine transform, respectively), and the + discrete Hartley transform (DHT) of real data. + +@item The input data can have arbitrary length. + FFTW employs @Onlogn{} algorithms for all lengths, including + prime numbers. + +@item FFTW supports arbitrary multi-dimensional data. + +@item FFTW supports the SSE, SSE2, AVX, Altivec, and MIPS PS instruction + sets. + +@item FFTW includes parallel (multi-threaded) transforms + for shared-memory systems. +@item Starting with version 3.3, FFTW includes distributed-memory parallel + transforms using MPI. +@end itemize + +We assume herein that you are familiar with the properties and uses of +the DFT that are relevant to your application. Otherwise, see +e.g. @cite{The Fast Fourier Transform and Its Applications} by E. O. Brigham +(Prentice-Hall, Englewood Cliffs, NJ, 1988). +@uref{http://www.fftw.org, Our web page} also has links to FFT-related +information online. +@cindex FFTW + +@c TODO: revise. We don't need to brag any longer +@c +@c FFTW is usually faster (and sometimes much faster) than all other +@c freely-available Fourier transform programs found on the Net. It is +@c competitive with (and often faster than) the FFT codes in Sun's +@c Performance Library, IBM's ESSL library, HP's CXML library, and +@c Intel's MKL library, which are targeted at specific machines. +@c Moreover, FFTW's performance is @emph{portable}. Indeed, FFTW is +@c unique in that it automatically adapts itself to your machine, your +@c cache, the size of your memory, your number of registers, and all the +@c other factors that normally make it impossible to optimize a program +@c for more than one machine. An extensive comparison of FFTW's +@c performance with that of other Fourier transform codes has been made, +@c and the results are available on the Web at +@c @uref{http://fftw.org/benchfft, the benchFFT home page}. +@c @cindex benchmark +@c @fpindex benchfft + +In order to use FFTW effectively, you need to learn one basic concept +of FFTW's internal structure: FFTW does not use a fixed algorithm for +computing the transform, but instead it adapts the DFT algorithm to +details of the underlying hardware in order to maximize performance. +Hence, the computation of the transform is split into two phases. +First, FFTW's @dfn{planner} ``learns'' the fastest way to compute the +transform on your machine. The planner +@cindex planner +produces a data structure called a @dfn{plan} that contains this +@cindex plan +information. Subsequently, the plan is @dfn{executed} +@cindex execute +to transform the array of input data as dictated by the plan. The +plan can be reused as many times as needed. In typical +high-performance applications, many transforms of the same size are +computed and, consequently, a relatively expensive initialization of +this sort is acceptable. On the other hand, if you need a single +transform of a given size, the one-time cost of the planner becomes +significant. For this case, FFTW provides fast planners based on +heuristics or on previously computed plans. + +FFTW supports transforms of data with arbitrary length, rank, +multiplicity, and a general memory layout. In simple cases, however, +this generality may be unnecessary and confusing. Consequently, we +organized the interface to FFTW into three levels of increasing +generality. +@itemize @bullet +@item The @dfn{basic interface} computes a single + transform of contiguous data. +@item The @dfn{advanced interface} computes transforms + of multiple or strided arrays. +@item The @dfn{guru interface} supports the most general data + layouts, multiplicities, and strides. +@end itemize +We expect that most users will be best served by the basic interface, +whereas the guru interface requires careful attention to the +documentation to avoid problems. +@cindex basic interface +@cindex advanced interface +@cindex guru interface + + +Besides the automatic performance adaptation performed by the planner, +it is also possible for advanced users to customize FFTW manually. For +example, if code space is a concern, we provide a tool that links only +the subset of FFTW needed by your application. Conversely, you may need +to extend FFTW because the standard distribution is not sufficient for +your needs. For example, the standard FFTW distribution works most +efficiently for arrays whose size can be factored into small primes +(@math{2}, @math{3}, @math{5}, and @math{7}), and otherwise it uses a +slower general-purpose routine. If you need efficient transforms of +other sizes, you can use FFTW's code generator, which produces fast C +programs (``codelets'') for any particular array size you may care +about. +@cindex code generator +@cindex codelet +For example, if you need transforms of size +@ifinfo +@math{513 = 19 x 3^3}, +@end ifinfo +@tex +$513 = 19 \cdot 3^3$, +@end tex +@html +513 = 19*33, +@end html +you can customize FFTW to support the factor @math{19} efficiently. + +For more information regarding FFTW, see the paper, ``The Design and +Implementation of FFTW3,'' by M. Frigo and S. G. Johnson, which was an +invited paper in @cite{Proc. IEEE} @b{93} (2), p. 216 (2005). The +code generator is described in the paper ``A fast Fourier transform +compiler'', +@cindex compiler +by M. Frigo, in the @cite{Proceedings of the 1999 ACM SIGPLAN Conference +on Programming Language Design and Implementation (PLDI), Atlanta, +Georgia, May 1999}. These papers, along with the latest version of +FFTW, the FAQ, benchmarks, and other links, are available at +@uref{http://www.fftw.org, the FFTW home page}. + +The current version of FFTW incorporates many good ideas from the past +thirty years of FFT literature. In one way or another, FFTW uses the +Cooley-Tukey algorithm, the prime factor algorithm, Rader's algorithm +for prime sizes, and a split-radix algorithm (with a +``conjugate-pair'' variation pointed out to us by Dan Bernstein). +FFTW's code generator also produces new algorithms that we do not +completely understand. +@cindex algorithm +The reader is referred to the cited papers for the appropriate +references. + +The rest of this manual is organized as follows. We first discuss the +sequential (single-processor) implementation. We start by describing +the basic interface/features of FFTW in @ref{Tutorial}. +Next, @ref{Other Important Topics} discusses data alignment +(@pxref{SIMD alignment and fftw_malloc}), +the storage scheme of multi-dimensional arrays +(@pxref{Multi-dimensional Array Format}), and FFTW's mechanism for +storing plans on disk (@pxref{Words of Wisdom-Saving Plans}). Next, +@ref{FFTW Reference} provides comprehensive documentation of all +FFTW's features. Parallel transforms are discussed in their own +chapters: @ref{Multi-threaded FFTW} and @ref{Distributed-memory FFTW +with MPI}. Fortran programmers can also use FFTW, as described in +@ref{Calling FFTW from Legacy Fortran} and @ref{Calling FFTW from +Modern Fortran}. @ref{Installation and Customization} explains how to +install FFTW in your computer system and how to adapt FFTW to your +needs. License and copyright information is given in @ref{License and +Copyright}. Finally, we thank all the people who helped us in +@ref{Acknowledgments}. + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/legacy-fortran.texi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/legacy-fortran.texi Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,374 @@ +@node Calling FFTW from Legacy Fortran, Upgrading from FFTW version 2, Calling FFTW from Modern Fortran, Top +@chapter Calling FFTW from Legacy Fortran +@cindex Fortran interface + +This chapter describes the interface to FFTW callable by Fortran code +in older compilers not supporting the Fortran 2003 C interoperability +features (@pxref{Calling FFTW from Modern Fortran}). This interface +has the major disadvantage that it is not type-checked, so if you +mistake the argument types or ordering then your program will not have +any compiler errors, and will likely crash at runtime. So, greater +care is needed. Also, technically interfacing older Fortran versions +to C is nonstandard, but in practice we have found that the techniques +used in this chapter have worked with all known Fortran compilers for +many years. + +The legacy Fortran interface differs from the C interface only in the +prefix (@samp{dfftw_} instead of @samp{fftw_} in double precision) and +a few other minor details. This Fortran interface is included in the +FFTW libraries by default, unless a Fortran compiler isn't found on +your system or @code{--disable-fortran} is included in the +@code{configure} flags. We assume here that the reader is already +familiar with the usage of FFTW in C, as described elsewhere in this +manual. + +The MPI parallel interface to FFTW is @emph{not} currently available +to legacy Fortran. + +@menu +* Fortran-interface routines:: +* FFTW Constants in Fortran:: +* FFTW Execution in Fortran:: +* Fortran Examples:: +* Wisdom of Fortran?:: +@end menu + +@c ------------------------------------------------------- +@node Fortran-interface routines, FFTW Constants in Fortran, Calling FFTW from Legacy Fortran, Calling FFTW from Legacy Fortran +@section Fortran-interface routines + +Nearly all of the FFTW functions have Fortran-callable equivalents. +The name of the legacy Fortran routine is the same as that of the +corresponding C routine, but with the @samp{fftw_} prefix replaced by +@samp{dfftw_}.@footnote{Technically, Fortran 77 identifiers are not +allowed to have more than 6 characters, nor may they contain +underscores. Any compiler that enforces this limitation doesn't +deserve to link to FFTW.} The single and long-double precision +versions use @samp{sfftw_} and @samp{lfftw_}, respectively, instead of +@samp{fftwf_} and @samp{fftwl_}; quadruple precision (@code{real*16}) +is available on some systems as @samp{fftwq_} (@pxref{Precision}). +(Note that @code{long double} on x86 hardware is usually at most +80-bit extended precision, @emph{not} quadruple precision.) + +For the most part, all of the arguments to the functions are the same, +with the following exceptions: + +@itemize @bullet + +@item +@code{plan} variables (what would be of type @code{fftw_plan} in C), +must be declared as a type that is at least as big as a pointer +(address) on your machine. We recommend using @code{integer*8} everywhere, +since this should always be big enough. +@cindex portability + +@item +Any function that returns a value (e.g. @code{fftw_plan_dft}) is +converted into a @emph{subroutine}. The return value is converted into +an additional @emph{first} parameter of this subroutine.@footnote{The +reason for this is that some Fortran implementations seem to have +trouble with C function return values, and vice versa.} + +@item +@cindex column-major +The Fortran routines expect multi-dimensional arrays to be in +@emph{column-major} order, which is the ordinary format of Fortran +arrays (@pxref{Multi-dimensional Array Format}). They do this +transparently and costlessly simply by reversing the order of the +dimensions passed to FFTW, but this has one important consequence for +multi-dimensional real-complex transforms, discussed below. + +@item +Wisdom import and export is somewhat more tricky because one cannot +easily pass files or strings between C and Fortran; see @ref{Wisdom of +Fortran?}. + +@item +Legacy Fortran cannot use the @code{fftw_malloc} dynamic-allocation routine. +If you want to exploit the SIMD FFTW (@pxref{SIMD alignment and fftw_malloc}), you'll +need to figure out some other way to ensure that your arrays are at +least 16-byte aligned. + +@item +@tindex fftw_iodim +@cindex guru interface +Since Fortran 77 does not have data structures, the @code{fftw_iodim} +structure from the guru interface (@pxref{Guru vector and transform +sizes}) must be split into separate arguments. In particular, any +@code{fftw_iodim} array arguments in the C guru interface become three +integer array arguments (@code{n}, @code{is}, and @code{os}) in the +Fortran guru interface, all of whose lengths should be equal to the +corresponding @code{rank} argument. + +@item +The guru planner interface in Fortran does @emph{not} do any automatic +translation between column-major and row-major; you are responsible +for setting the strides etcetera to correspond to your Fortran arrays. +However, as a slight bug that we are preserving for backwards +compatibility, the @samp{plan_guru_r2r} in Fortran @emph{does} reverse the +order of its @code{kind} array parameter, so the @code{kind} array +of that routine should be in the reverse of the order of the iodim +arrays (see above). + +@end itemize + +In general, you should take care to use Fortran data types that +correspond to (i.e. are the same size as) the C types used by FFTW. +In practice, this correspondence is usually straightforward +(i.e. @code{integer} corresponds to @code{int}, @code{real} +corresponds to @code{float}, etcetera). The native Fortran +double/single-precision complex type should be compatible with +@code{fftw_complex}/@code{fftwf_complex}. Such simple correspondences +are assumed in the examples below. +@cindex portability + +@c ------------------------------------------------------- +@node FFTW Constants in Fortran, FFTW Execution in Fortran, Fortran-interface routines, Calling FFTW from Legacy Fortran +@section FFTW Constants in Fortran + +When creating plans in FFTW, a number of constants are used to specify +options, such as @code{FFTW_MEASURE} or @code{FFTW_ESTIMATE}. The +same constants must be used with the wrapper routines, but of course the +C header files where the constants are defined can't be incorporated +directly into Fortran code. + +Instead, we have placed Fortran equivalents of the FFTW constant +definitions in the file @code{fftw3.f}, which can be found in the same +directory as @code{fftw3.h}. If your Fortran compiler supports a +preprocessor of some sort, you should be able to @code{include} or +@code{#include} this file; otherwise, you can paste it directly into +your code. + +@cindex flags +In C, you combine different flags (like @code{FFTW_PRESERVE_INPUT} and +@code{FFTW_MEASURE}) using the @samp{@code{|}} operator; in Fortran +you should just use @samp{@code{+}}. (Take care not to add in the +same flag more than once, though. Alternatively, you can use the +@code{ior} intrinsic function standardized in Fortran 95.) + +@c ------------------------------------------------------- +@node FFTW Execution in Fortran, Fortran Examples, FFTW Constants in Fortran, Calling FFTW from Legacy Fortran +@section FFTW Execution in Fortran + +In C, in order to use a plan, one normally calls @code{fftw_execute}, +which executes the plan to perform the transform on the input/output +arrays passed when the plan was created (@pxref{Using Plans}). The +corresponding subroutine call in legacy Fortran is: +@example + call dfftw_execute(plan) +@end example +@findex dfftw_execute + +However, we have had reports that this causes problems with some +recent optimizing Fortran compilers. The problem is, because the +input/output arrays are not passed as explicit arguments to +@code{dfftw_execute}, the semantics of Fortran (unlike C) allow the +compiler to assume that the input/output arrays are not changed by +@code{dfftw_execute}. As a consequence, certain compilers end up +optimizing out or repositioning the call to @code{dfftw_execute}, +assuming incorrectly that it does nothing. + +There are various workarounds to this, but the safest and simplest +thing is to not use @code{dfftw_execute} in Fortran. Instead, use the +functions described in @ref{New-array Execute Functions}, which take +the input/output arrays as explicit arguments. For example, if the +plan is for a complex-data DFT and was created for the arrays +@code{in} and @code{out}, you would do: +@example + call dfftw_execute_dft(plan, in, out) +@end example +@findex dfftw_execute_dft + +There are a few things to be careful of, however: + +@itemize @bullet + +@item +You must use the correct type of execute function, matching the way +the plan was created. Complex DFT plans should use +@code{dfftw_execute_dft}, Real-input (r2c) DFT plans should use use +@code{dfftw_execute_dft_r2c}, and real-output (c2r) DFT plans should +use @code{dfftw_execute_dft_c2r}. The various r2r plans should use +@code{dfftw_execute_r2r}. + +@item +You should normally pass the same input/output arrays that were used when +creating the plan. This is always safe. + +@item +@emph{If} you pass @emph{different} input/output arrays compared to +those used when creating the plan, you must abide by all the +restrictions of the new-array execute functions (@pxref{New-array +Execute Functions}). The most difficult of these, in Fortran, is the +requirement that the new arrays have the same alignment as the +original arrays, because there seems to be no way in legacy Fortran to obtain +guaranteed-aligned arrays (analogous to @code{fftw_malloc} in C). You +can, of course, use the @code{FFTW_UNALIGNED} flag when creating the +plan, in which case the plan does not depend on the alignment, but +this may sacrifice substantial performance on architectures (like x86) +with SIMD instructions (@pxref{SIMD alignment and fftw_malloc}). +@ctindex FFTW_UNALIGNED + +@end itemize + +@c ------------------------------------------------------- +@node Fortran Examples, Wisdom of Fortran?, FFTW Execution in Fortran, Calling FFTW from Legacy Fortran +@section Fortran Examples + +In C, you might have something like the following to transform a +one-dimensional complex array: + +@example + fftw_complex in[N], out[N]; + fftw_plan plan; + + plan = fftw_plan_dft_1d(N,in,out,FFTW_FORWARD,FFTW_ESTIMATE); + fftw_execute(plan); + fftw_destroy_plan(plan); +@end example + +In Fortran, you would use the following to accomplish the same thing: + +@example + double complex in, out + dimension in(N), out(N) + integer*8 plan + + call dfftw_plan_dft_1d(plan,N,in,out,FFTW_FORWARD,FFTW_ESTIMATE) + call dfftw_execute_dft(plan, in, out) + call dfftw_destroy_plan(plan) +@end example +@findex dfftw_plan_dft_1d +@findex dfftw_execute_dft +@findex dfftw_destroy_plan + +Notice how all routines are called as Fortran subroutines, and the +plan is returned via the first argument to @code{dfftw_plan_dft_1d}. +Notice also that we changed @code{fftw_execute} to +@code{dfftw_execute_dft} (@pxref{FFTW Execution in Fortran}). To do +the same thing, but using 8 threads in parallel (@pxref{Multi-threaded +FFTW}), you would simply prefix these calls with: + +@example + integer iret + call dfftw_init_threads(iret) + call dfftw_plan_with_nthreads(8) +@end example +@findex dfftw_init_threads +@findex dfftw_plan_with_nthreads + +(You might want to check the value of @code{iret}: if it is zero, it +indicates an unlikely error during thread initialization.) + +To transform a three-dimensional array in-place with C, you might do: + +@example + fftw_complex arr[L][M][N]; + fftw_plan plan; + + plan = fftw_plan_dft_3d(L,M,N, arr,arr, + FFTW_FORWARD, FFTW_ESTIMATE); + fftw_execute(plan); + fftw_destroy_plan(plan); +@end example + +In Fortran, you would use this instead: + +@example + double complex arr + dimension arr(L,M,N) + integer*8 plan + + call dfftw_plan_dft_3d(plan, L,M,N, arr,arr, + & FFTW_FORWARD, FFTW_ESTIMATE) + call dfftw_execute_dft(plan, arr, arr) + call dfftw_destroy_plan(plan) +@end example +@findex dfftw_plan_dft_3d + +Note that we pass the array dimensions in the ``natural'' order in both C +and Fortran. + +To transform a one-dimensional real array in Fortran, you might do: + +@example + double precision in + dimension in(N) + double complex out + dimension out(N/2 + 1) + integer*8 plan + + call dfftw_plan_dft_r2c_1d(plan,N,in,out,FFTW_ESTIMATE) + call dfftw_execute_dft_r2c(plan, in, out) + call dfftw_destroy_plan(plan) +@end example +@findex dfftw_plan_dft_r2c_1d +@findex dfftw_execute_dft_r2c + +To transform a two-dimensional real array, out of place, you might use +the following: + +@example + double precision in + dimension in(M,N) + double complex out + dimension out(M/2 + 1, N) + integer*8 plan + + call dfftw_plan_dft_r2c_2d(plan,M,N,in,out,FFTW_ESTIMATE) + call dfftw_execute_dft_r2c(plan, in, out) + call dfftw_destroy_plan(plan) +@end example +@findex dfftw_plan_dft_r2c_2d + +@strong{Important:} Notice that it is the @emph{first} dimension of the +complex output array that is cut in half in Fortran, rather than the +last dimension as in C. This is a consequence of the interface routines +reversing the order of the array dimensions passed to FFTW so that the +Fortran program can use its ordinary column-major order. +@cindex column-major +@cindex r2c/c2r multi-dimensional array format + +@c ------------------------------------------------------- +@node Wisdom of Fortran?, , Fortran Examples, Calling FFTW from Legacy Fortran +@section Wisdom of Fortran? + +In this section, we discuss how one can import/export FFTW wisdom +(saved plans) to/from a Fortran program; we assume that the reader is +already familiar with wisdom, as described in @ref{Words of +Wisdom-Saving Plans}. + +@cindex portability +The basic problem is that is difficult to (portably) pass files and +strings between Fortran and C, so we cannot provide a direct Fortran +equivalent to the @code{fftw_export_wisdom_to_file}, etcetera, +functions. Fortran interfaces @emph{are} provided for the functions +that do not take file/string arguments, however: +@code{dfftw_import_system_wisdom}, @code{dfftw_import_wisdom}, +@code{dfftw_export_wisdom}, and @code{dfftw_forget_wisdom}. +@findex dfftw_import_system_wisdom +@findex dfftw_import_wisdom +@findex dfftw_export_wisdom +@findex dfftw_forget_wisdom + + +So, for example, to import the system-wide wisdom, you would do: + +@example + integer isuccess + call dfftw_import_system_wisdom(isuccess) +@end example + +As usual, the C return value is turned into a first parameter; +@code{isuccess} is non-zero on success and zero on failure (e.g. if +there is no system wisdom installed). + +If you want to import/export wisdom from/to an arbitrary file or +elsewhere, you can employ the generic @code{dfftw_import_wisdom} and +@code{dfftw_export_wisdom} functions, for which you must supply a +subroutine to read/write one character at a time. The FFTW package +contains an example file @code{doc/f77_wisdom.f} demonstrating how to +implement @code{import_wisdom_from_file} and +@code{export_wisdom_to_file} subroutines in this way. (These routines +cannot be compiled into the FFTW library itself, lest all FFTW-using +programs be required to link with the Fortran I/O library.) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/license.texi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/license.texi Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,38 @@ +@node License and Copyright, Concept Index, Acknowledgments, Top +@chapter License and Copyright + +FFTW is Copyright @copyright{} 2003, 2007-11 Matteo Frigo, Copyright +@copyright{} 2003, 2007-11 Massachusetts Institute of Technology. + +FFTW is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA You can also +find the @uref{http://www.gnu.org/licenses/gpl-2.0.html, GPL on the GNU +web site}. + +In addition, we kindly ask you to acknowledge FFTW and its authors in +any program or publication in which you use FFTW. (You are not +@emph{required} to do so; it is up to your common sense to decide +whether you want to comply with this request or not.) For general +publications, we suggest referencing: Matteo Frigo and Steven +G. Johnson, ``The design and implementation of FFTW3,'' +@i{Proc. IEEE} @b{93} (2), 216--231 (2005). + +Non-free versions of FFTW are available under terms different from those +of the General Public License. (e.g. they do not require you to +accompany any object code using FFTW with the corresponding source +code.) For these alternative terms you must purchase a license from MIT's +Technology Licensing Office. Users interested in such a license should +contact us (@email{fftw@@fftw.org}) for more information. + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/mdate-sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/mdate-sh Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,224 @@ +#!/bin/sh +# Get modification time of a file or directory and pretty-print it. + +scriptversion=2010-08-21.06; # UTC + +# Copyright (C) 1995-2013 Free Software Foundation, Inc. +# written by Ulrich Drepper , June 1995 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +fi + +case $1 in + '') + echo "$0: No file. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: mdate-sh [--help] [--version] FILE + +Pretty-print the modification day of FILE, in the format: +1 January 1970 + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "mdate-sh $scriptversion" + exit $? + ;; +esac + +error () +{ + echo "$0: $1" >&2 + exit 1 +} + + +# Prevent date giving response in another language. +LANG=C +export LANG +LC_ALL=C +export LC_ALL +LC_TIME=C +export LC_TIME + +# GNU ls changes its time format in response to the TIME_STYLE +# variable. Since we cannot assume 'unset' works, revert this +# variable to its documented default. +if test "${TIME_STYLE+set}" = set; then + TIME_STYLE=posix-long-iso + export TIME_STYLE +fi + +save_arg1=$1 + +# Find out how to get the extended ls output of a file or directory. +if ls -L /dev/null 1>/dev/null 2>&1; then + ls_command='ls -L -l -d' +else + ls_command='ls -l -d' +fi +# Avoid user/group names that might have spaces, when possible. +if ls -n /dev/null 1>/dev/null 2>&1; then + ls_command="$ls_command -n" +fi + +# A 'ls -l' line looks as follows on OS/2. +# drwxrwx--- 0 Aug 11 2001 foo +# This differs from Unix, which adds ownership information. +# drwxrwx--- 2 root root 4096 Aug 11 2001 foo +# +# To find the date, we split the line on spaces and iterate on words +# until we find a month. This cannot work with files whose owner is a +# user named "Jan", or "Feb", etc. However, it's unlikely that '/' +# will be owned by a user whose name is a month. So we first look at +# the extended ls output of the root directory to decide how many +# words should be skipped to get the date. + +# On HPUX /bin/sh, "set" interprets "-rw-r--r--" as options, so the "x" below. +set x`$ls_command /` + +# Find which argument is the month. +month= +command= +until test $month +do + test $# -gt 0 || error "failed parsing '$ls_command /' output" + shift + # Add another shift to the command. + command="$command shift;" + case $1 in + Jan) month=January; nummonth=1;; + Feb) month=February; nummonth=2;; + Mar) month=March; nummonth=3;; + Apr) month=April; nummonth=4;; + May) month=May; nummonth=5;; + Jun) month=June; nummonth=6;; + Jul) month=July; nummonth=7;; + Aug) month=August; nummonth=8;; + Sep) month=September; nummonth=9;; + Oct) month=October; nummonth=10;; + Nov) month=November; nummonth=11;; + Dec) month=December; nummonth=12;; + esac +done + +test -n "$month" || error "failed parsing '$ls_command /' output" + +# Get the extended ls output of the file or directory. +set dummy x`eval "$ls_command \"\\\$save_arg1\""` + +# Remove all preceding arguments +eval $command + +# Because of the dummy argument above, month is in $2. +# +# On a POSIX system, we should have +# +# $# = 5 +# $1 = file size +# $2 = month +# $3 = day +# $4 = year or time +# $5 = filename +# +# On Darwin 7.7.0 and 7.6.0, we have +# +# $# = 4 +# $1 = day +# $2 = month +# $3 = year or time +# $4 = filename + +# Get the month. +case $2 in + Jan) month=January; nummonth=1;; + Feb) month=February; nummonth=2;; + Mar) month=March; nummonth=3;; + Apr) month=April; nummonth=4;; + May) month=May; nummonth=5;; + Jun) month=June; nummonth=6;; + Jul) month=July; nummonth=7;; + Aug) month=August; nummonth=8;; + Sep) month=September; nummonth=9;; + Oct) month=October; nummonth=10;; + Nov) month=November; nummonth=11;; + Dec) month=December; nummonth=12;; +esac + +case $3 in + ???*) day=$1;; + *) day=$3; shift;; +esac + +# Here we have to deal with the problem that the ls output gives either +# the time of day or the year. +case $3 in + *:*) set `date`; eval year=\$$# + case $2 in + Jan) nummonthtod=1;; + Feb) nummonthtod=2;; + Mar) nummonthtod=3;; + Apr) nummonthtod=4;; + May) nummonthtod=5;; + Jun) nummonthtod=6;; + Jul) nummonthtod=7;; + Aug) nummonthtod=8;; + Sep) nummonthtod=9;; + Oct) nummonthtod=10;; + Nov) nummonthtod=11;; + Dec) nummonthtod=12;; + esac + # For the first six month of the year the time notation can also + # be used for files modified in the last year. + if (expr $nummonth \> $nummonthtod) > /dev/null; + then + year=`expr $year - 1` + fi;; + *) year=$3;; +esac + +# The result. +echo $day $month $year + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/modern-fortran.texi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/modern-fortran.texi Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,725 @@ +@node Calling FFTW from Modern Fortran, Calling FFTW from Legacy Fortran, Distributed-memory FFTW with MPI, Top +@chapter Calling FFTW from Modern Fortran +@cindex Fortran interface + +Fortran 2003 standardized ways for Fortran code to call C libraries, +and this allows us to support a direct translation of the FFTW C API +into Fortran. Compared to the legacy Fortran 77 interface +(@pxref{Calling FFTW from Legacy Fortran}), this direct interface +offers many advantages, especially compile-time type-checking and +aligned memory allocation. As of this writing, support for these C +interoperability features seems widespread, having been implemented in +nearly all major Fortran compilers (e.g. GNU, Intel, IBM, +Oracle/Solaris, Portland Group, NAG). +@cindex portability + +This chapter documents that interface. For the most part, since this +interface allows Fortran to call the C interface directly, the usage +is identical to C translated to Fortran syntax. However, there are a +few subtle points such as memory allocation, wisdom, and data types +that deserve closer attention. + +@menu +* Overview of Fortran interface:: +* Reversing array dimensions:: +* FFTW Fortran type reference:: +* Plan execution in Fortran:: +* Allocating aligned memory in Fortran:: +* Accessing the wisdom API from Fortran:: +* Defining an FFTW module:: +@end menu + +@c ------------------------------------------------------- +@node Overview of Fortran interface, Reversing array dimensions, Calling FFTW from Modern Fortran, Calling FFTW from Modern Fortran +@section Overview of Fortran interface + +FFTW provides a file @code{fftw3.f03} that defines Fortran 2003 +interfaces for all of its C routines, except for the MPI routines +described elsewhere, which can be found in the same directory as +@code{fftw3.h} (the C header file). In any Fortran subroutine where +you want to use FFTW functions, you should begin with: + +@cindex iso_c_binding +@example + use, intrinsic :: iso_c_binding + include 'fftw3.f03' +@end example + +This includes the interface definitions and the standard +@code{iso_c_binding} module (which defines the equivalents of C +types). You can also put the FFTW functions into a module if you +prefer (@pxref{Defining an FFTW module}). + +At this point, you can now call anything in the FFTW C interface +directly, almost exactly as in C other than minor changes in syntax. +For example: + +@findex fftw_plan_dft_2d +@findex fftw_execute_dft +@findex fftw_destroy_plan +@example + type(C_PTR) :: plan + complex(C_DOUBLE_COMPLEX), dimension(1024,1000) :: in, out + plan = fftw_plan_dft_2d(1000,1024, in,out, FFTW_FORWARD,FFTW_ESTIMATE) + ... + call fftw_execute_dft(plan, in, out) + ... + call fftw_destroy_plan(plan) +@end example + +A few important things to keep in mind are: + +@itemize @bullet + +@item +@tindex fftw_complex +@ctindex C_PTR +@ctindex C_INT +@ctindex C_DOUBLE +@ctindex C_DOUBLE_COMPLEX +FFTW plans are @code{type(C_PTR)}. Other C types are mapped in the +obvious way via the @code{iso_c_binding} standard: @code{int} turns +into @code{integer(C_INT)}, @code{fftw_complex} turns into +@code{complex(C_DOUBLE_COMPLEX)}, @code{double} turns into +@code{real(C_DOUBLE)}, and so on. @xref{FFTW Fortran type reference}. + +@item +Functions in C become functions in Fortran if they have a return value, +and subroutines in Fortran otherwise. + +@item +The ordering of the Fortran array dimensions must be @emph{reversed} +when they are passed to the FFTW plan creation, thanks to differences +in array indexing conventions (@pxref{Multi-dimensional Array +Format}). This is @emph{unlike} the legacy Fortran interface +(@pxref{Fortran-interface routines}), which reversed the dimensions +for you. @xref{Reversing array dimensions}. + +@item +@cindex alignment +@cindex SIMD +Using ordinary Fortran array declarations like this works, but may +yield suboptimal performance because the data may not be not aligned +to exploit SIMD instructions on modern proessors (@pxref{SIMD +alignment and fftw_malloc}). Better performance will often be obtained +by allocating with @samp{fftw_alloc}. @xref{Allocating aligned memory +in Fortran}. + +@item +@findex fftw_execute +Similar to the legacy Fortran interface (@pxref{FFTW Execution in +Fortran}), we currently recommend @emph{not} using @code{fftw_execute} +but rather using the more specialized functions like +@code{fftw_execute_dft} (@pxref{New-array Execute Functions}). +However, you should execute the plan on the @code{same arrays} as the +ones for which you created the plan, unless you are especially +careful. @xref{Plan execution in Fortran}. To prevent +you from using @code{fftw_execute} by mistake, the @code{fftw3.f03} +file does not provide an @code{fftw_execute} interface declaration. + +@item +@cindex flags +Multiple planner flags are combined with @code{ior} (equivalent to @samp{|} in C). e.g. @code{FFTW_MEASURE | FFTW_DESTROY_INPUT} becomes @code{ior(FFTW_MEASURE, FFTW_DESTROY_INPUT)}. (You can also use @samp{+} as long as you don't try to include a given flag more than once.) + +@end itemize + +@menu +* Extended and quadruple precision in Fortran:: +@end menu + +@node Extended and quadruple precision in Fortran, , Overview of Fortran interface, Overview of Fortran interface +@subsection Extended and quadruple precision in Fortran +@cindex precision + +If FFTW is compiled in @code{long double} (extended) precision +(@pxref{Installation and Customization}), you may be able to call the +resulting @code{fftwl_} routines (@pxref{Precision}) from Fortran if +your compiler supports the @code{C_LONG_DOUBLE_COMPLEX} type code. + +Because some Fortran compilers do not support +@code{C_LONG_DOUBLE_COMPLEX}, the @code{fftwl_} declarations are +segregated into a separate interface file @code{fftw3l.f03}, which you +should include @emph{in addition} to @code{fftw3.f03} (which declares +precision-independent @samp{FFTW_} constants): + +@cindex iso_c_binding +@example + use, intrinsic :: iso_c_binding + include 'fftw3.f03' + include 'fftw3l.f03' +@end example + +We also support using the nonstandard @code{__float128} +quadruple-precision type provided by recent versions of @code{gcc} on +32- and 64-bit x86 hardware (@pxref{Installation and Customization}), +using the corresponding @code{real(16)} and @code{complex(16)} types +supported by @code{gfortran}. The quadruple-precision @samp{fftwq_} +functions (@pxref{Precision}) are declared in a @code{fftw3q.f03} +interface file, which should be included in addition to +@code{fftw3l.f03}, as above. You should also link with +@code{-lfftw3q -lquadmath -lm} as in C. + +@c ------------------------------------------------------- +@node Reversing array dimensions, FFTW Fortran type reference, Overview of Fortran interface, Calling FFTW from Modern Fortran +@section Reversing array dimensions + +@cindex row-major +@cindex column-major +A minor annoyance in calling FFTW from Fortran is that FFTW's array +dimensions are defined in the C convention (row-major order), while +Fortran's array dimensions are the opposite convention (column-major +order). @xref{Multi-dimensional Array Format}. This is just a +bookkeeping difference, with no effect on performance. The only +consequence of this is that, whenever you create an FFTW plan for a +multi-dimensional transform, you must always @emph{reverse the +ordering of the dimensions}. + +For example, consider the three-dimensional (@threedims{L,M,N}) arrays: + +@example + complex(C_DOUBLE_COMPLEX), dimension(L,M,N) :: in, out +@end example + +To plan a DFT for these arrays using @code{fftw_plan_dft_3d}, you could do: + +@findex fftw_plan_dft_3d +@example + plan = fftw_plan_dft_3d(N,M,L, in,out, FFTW_FORWARD,FFTW_ESTIMATE) +@end example + +That is, from FFTW's perspective this is a @threedims{N,M,L} array. +@emph{No data transposition need occur}, as this is @emph{only +notation}. Similarly, to use the more generic routine +@code{fftw_plan_dft} with the same arrays, you could do: + +@example + integer(C_INT), dimension(3) :: n = [N,M,L] + plan = fftw_plan_dft_3d(3, n, in,out, FFTW_FORWARD,FFTW_ESTIMATE) +@end example + +Note, by the way, that this is different from the legacy Fortran +interface (@pxref{Fortran-interface routines}), which automatically +reverses the order of the array dimension for you. Here, you are +calling the C interface directly, so there is no ``translation'' layer. + +@cindex r2c/c2r multi-dimensional array format +An important thing to keep in mind is the implication of this for +multidimensional real-to-complex transforms (@pxref{Multi-Dimensional +DFTs of Real Data}). In C, a multidimensional real-to-complex DFT +chops the last dimension roughly in half (@threedims{N,M,L} real input +goes to @threedims{N,M,L/2+1} complex output). In Fortran, because +the array dimension notation is reversed, the @emph{first} dimension of +the complex data is chopped roughly in half. For example consider the +@samp{r2c} transform of @threedims{L,M,N} real input in Fortran: + +@findex fftw_plan_dft_r2c_3d +@findex fftw_execute_dft_r2c +@example + type(C_PTR) :: plan + real(C_DOUBLE), dimension(L,M,N) :: in + complex(C_DOUBLE_COMPLEX), dimension(L/2+1,M,N) :: out + plan = fftw_plan_dft_r2c_3d(N,M,L, in,out, FFTW_ESTIMATE) + ... + call fftw_execute_dft_r2c(plan, in, out) +@end example + +@cindex in-place +@cindex padding +Alternatively, for an in-place r2c transform, as described in the C +documentation we must @emph{pad} the @emph{first} dimension of the +real input with an extra two entries (which are ignored by FFTW) so as +to leave enough space for the complex output. The input is +@emph{allocated} as a @threedims{2[L/2+1],M,N} array, even though only +@threedims{L,M,N} of it is actually used. In this example, we will +allocate the array as a pointer type, using @samp{fftw_alloc} to +ensure aligned memory for maximum performance (@pxref{Allocating +aligned memory in Fortran}); this also makes it easy to reference the +same memory as both a real array and a complex array. + +@findex fftw_alloc_complex +@findex c_f_pointer +@example + real(C_DOUBLE), pointer :: in(:,:,:) + complex(C_DOUBLE_COMPLEX), pointer :: out(:,:,:) + type(C_PTR) :: plan, data + data = fftw_alloc_complex(int((L/2+1) * M * N, C_SIZE_T)) + call c_f_pointer(data, in, [2*(L/2+1),M,N]) + call c_f_pointer(data, out, [L/2+1,M,N]) + plan = fftw_plan_dft_r2c_3d(N,M,L, in,out, FFTW_ESTIMATE) + ... + call fftw_execute_dft_r2c(plan, in, out) + ... + call fftw_destroy_plan(plan) + call fftw_free(data) +@end example + +@c ------------------------------------------------------- +@node FFTW Fortran type reference, Plan execution in Fortran, Reversing array dimensions, Calling FFTW from Modern Fortran +@section FFTW Fortran type reference + +The following are the most important type correspondences between the +C interface and Fortran: + +@itemize @bullet + +@item +@tindex fftw_plan +Plans (@code{fftw_plan} and variants) are @code{type(C_PTR)} (i.e. an +opaque pointer). + +@item +@tindex fftw_complex +@cindex precision +@ctindex C_DOUBLE +@ctindex C_FLOAT +@ctindex C_LONG_DOUBLE +@ctindex C_DOUBLE_COMPLEX +@ctindex C_FLOAT_COMPLEX +@ctindex C_LONG_DOUBLE_COMPLEX +The C floating-point types @code{double}, @code{float}, and @code{long +double} correspond to @code{real(C_DOUBLE)}, @code{real(C_FLOAT)}, and +@code{real(C_LONG_DOUBLE)}, respectively. The C complex types +@code{fftw_complex}, @code{fftwf_complex}, and @code{fftwl_complex} +correspond in Fortran to @code{complex(C_DOUBLE_COMPLEX)}, +@code{complex(C_FLOAT_COMPLEX)}, and +@code{complex(C_LONG_DOUBLE_COMPLEX)}, respectively. +Just as in C +(@pxref{Precision}), the FFTW subroutines and types are prefixed with +@samp{fftw_}, @code{fftwf_}, and @code{fftwl_} for the different precisions, and link to different libraries (@code{-lfftw3}, @code{-lfftw3f}, and @code{-lfftw3l} on Unix), but use the @emph{same} include file @code{fftw3.f03} and the @emph{same} constants (all of which begin with @samp{FFTW_}). The exception is @code{long double} precision, for which you should @emph{also} include @code{fftw3l.f03} (@pxref{Extended and quadruple precision in Fortran}). + +@item +@tindex ptrdiff_t +@ctindex C_INT +@ctindex C_INTPTR_T +@ctindex C_SIZE_T +@findex fftw_malloc +The C integer types @code{int} and @code{unsigned} (used for planner +flags) become @code{integer(C_INT)}. The C integer type @code{ptrdiff_t} (e.g. in the @ref{64-bit Guru Interface}) becomes @code{integer(C_INTPTR_T)}, and @code{size_t} (in @code{fftw_malloc} etc.) becomes @code{integer(C_SIZE_T)}. + +@item +@tindex fftw_r2r_kind +@ctindex C_FFTW_R2R_KIND +The @code{fftw_r2r_kind} type (@pxref{Real-to-Real Transform Kinds}) +becomes @code{integer(C_FFTW_R2R_KIND)}. The various constant values +of the C enumerated type (@code{FFTW_R2HC} etc.) become simply integer +constants of the same names in Fortran. + +@item +@ctindex FFTW_DESTROY_INPUT +@cindex in-place +@findex fftw_flops +Numeric array pointer arguments (e.g. @code{double *}) +become @code{dimension(*), intent(out)} arrays of the same type, or +@code{dimension(*), intent(in)} if they are pointers to constant data +(e.g. @code{const int *}). There are a few exceptions where numeric +pointers refer to scalar outputs (e.g. for @code{fftw_flops}), in which +case they are @code{intent(out)} scalar arguments in Fortran too. +For the new-array execute functions (@pxref{New-array Execute Functions}), +the input arrays are declared @code{dimension(*), intent(inout)}, since +they can be modified in the case of in-place or @code{FFTW_DESTROY_INPUT} +transforms. + +@item +@findex fftw_alloc_real +@findex c_f_pointer +Pointer @emph{return} values (e.g @code{double *}) become +@code{type(C_PTR)}. (If they are pointers to arrays, as for +@code{fftw_alloc_real}, you can convert them back to Fortran array +pointers with the standard intrinsic function @code{c_f_pointer}.) + +@item +@cindex guru interface +@tindex fftw_iodim +@tindex fftw_iodim64 +@cindex 64-bit architecture +The @code{fftw_iodim} type in the guru interface (@pxref{Guru vector +and transform sizes}) becomes @code{type(fftw_iodim)} in Fortran, a +derived data type (the Fortran analogue of C's @code{struct}) with +three @code{integer(C_INT)} components: @code{n}, @code{is}, and +@code{os}, with the same meanings as in C. The @code{fftw_iodim64} type in the 64-bit guru interface (@pxref{64-bit Guru Interface}) is the same, except that its components are of type @code{integer(C_INTPTR_T)}. + +@item +@ctindex C_FUNPTR +Using the wisdom import/export functions from Fortran is a bit tricky, +and is discussed in @ref{Accessing the wisdom API from Fortran}. In +brief, the @code{FILE *} arguments map to @code{type(C_PTR)}, @code{const char *} to @code{character(C_CHAR), dimension(*), intent(in)} (null-terminated!), and the generic read-char/write-char functions map to @code{type(C_FUNPTR)}. + +@end itemize + +@cindex portability +You may be wondering if you need to search-and-replace +@code{real(kind(0.0d0))} (or whatever your favorite Fortran spelling +of ``double precision'' is) with @code{real(C_DOUBLE)} everywhere in +your program, and similarly for @code{complex} and @code{integer} +types. The answer is no; you can still use your existing types. As +long as these types match their C counterparts, things should work +without a hitch. The worst that can happen, e.g. in the (unlikely) +event of a system where @code{real(kind(0.0d0))} is different from +@code{real(C_DOUBLE)}, is that the compiler will give you a +type-mismatch error. That is, if you don't use the +@code{iso_c_binding} kinds you need to accept at least the theoretical +possibility of having to change your code in response to compiler +errors on some future machine, but you don't need to worry about +silently compiling incorrect code that yields runtime errors. + +@c ------------------------------------------------------- +@node Plan execution in Fortran, Allocating aligned memory in Fortran, FFTW Fortran type reference, Calling FFTW from Modern Fortran +@section Plan execution in Fortran + +In C, in order to use a plan, one normally calls @code{fftw_execute}, +which executes the plan to perform the transform on the input/output +arrays passed when the plan was created (@pxref{Using Plans}). The +corresponding subroutine call in modern Fortran is: +@example + call fftw_execute(plan) +@end example +@findex fftw_execute + +However, we have had reports that this causes problems with some +recent optimizing Fortran compilers. The problem is, because the +input/output arrays are not passed as explicit arguments to +@code{fftw_execute}, the semantics of Fortran (unlike C) allow the +compiler to assume that the input/output arrays are not changed by +@code{fftw_execute}. As a consequence, certain compilers end up +repositioning the call to @code{fftw_execute}, assuming incorrectly +that it does nothing to the arrays. + +There are various workarounds to this, but the safest and simplest +thing is to not use @code{fftw_execute} in Fortran. Instead, use the +functions described in @ref{New-array Execute Functions}, which take +the input/output arrays as explicit arguments. For example, if the +plan is for a complex-data DFT and was created for the arrays +@code{in} and @code{out}, you would do: +@example + call fftw_execute_dft(plan, in, out) +@end example +@findex fftw_execute_dft + +There are a few things to be careful of, however: + +@itemize @bullet + +@item +@findex fftw_execute_dft_r2c +@findex fftw_execute_dft_c2r +@findex fftw_execute_r2r +You must use the correct type of execute function, matching the way +the plan was created. Complex DFT plans should use +@code{fftw_execute_dft}, Real-input (r2c) DFT plans should use use +@code{fftw_execute_dft_r2c}, and real-output (c2r) DFT plans should +use @code{fftw_execute_dft_c2r}. The various r2r plans should use +@code{fftw_execute_r2r}. Fortunately, if you use the wrong one you +will get a compile-time type-mismatch error (unlike legacy Fortran). + +@item +You should normally pass the same input/output arrays that were used when +creating the plan. This is always safe. + +@item +@emph{If} you pass @emph{different} input/output arrays compared to +those used when creating the plan, you must abide by all the +restrictions of the new-array execute functions (@pxref{New-array +Execute Functions}). The most tricky of these is the +requirement that the new arrays have the same alignment as the +original arrays; the best (and possibly only) way to guarantee this +is to use the @samp{fftw_alloc} functions to allocate your arrays (@pxref{Allocating aligned memory in Fortran}). Alternatively, you can +use the @code{FFTW_UNALIGNED} flag when creating the +plan, in which case the plan does not depend on the alignment, but +this may sacrifice substantial performance on architectures (like x86) +with SIMD instructions (@pxref{SIMD alignment and fftw_malloc}). +@ctindex FFTW_UNALIGNED + +@end itemize + +@c ------------------------------------------------------- +@node Allocating aligned memory in Fortran, Accessing the wisdom API from Fortran, Plan execution in Fortran, Calling FFTW from Modern Fortran +@section Allocating aligned memory in Fortran + +@cindex alignment +@findex fftw_alloc_real +@findex fftw_alloc_complex +In order to obtain maximum performance in FFTW, you should store your +data in arrays that have been specially aligned in memory (@pxref{SIMD +alignment and fftw_malloc}). Enforcing alignment also permits you to +safely use the new-array execute functions (@pxref{New-array Execute +Functions}) to apply a given plan to more than one pair of in/out +arrays. Unfortunately, standard Fortran arrays do @emph{not} provide +any alignment guarantees. The @emph{only} way to allocate aligned +memory in standard Fortran is to allocate it with an external C +function, like the @code{fftw_alloc_real} and +@code{fftw_alloc_complex} functions. Fortunately, Fortran 2003 provides +a simple way to associate such allocated memory with a standard Fortran +array pointer that you can then use normally. + +We therefore recommend allocating all your input/output arrays using +the following technique: + +@enumerate + +@item +Declare a @code{pointer}, @code{arr}, to your array of the desired type +and dimensions. For example, @code{real(C_DOUBLE), pointer :: a(:,:)} +for a 2d real array, or @code{complex(C_DOUBLE_COMPLEX), pointer :: +a(:,:,:)} for a 3d complex array. + +@item +The number of elements to allocate must be an +@code{integer(C_SIZE_T)}. You can either declare a variable of this +type, e.g. @code{integer(C_SIZE_T) :: sz}, to store the number of +elements to allocate, or you can use the @code{int(..., C_SIZE_T)} +intrinsic function. e.g. set @code{sz = L * M * N} or use +@code{int(L * M * N, C_SIZE_T)} for an @threedims{L,M,N} array. + +@item +Declare a @code{type(C_PTR) :: p} to hold the return value from +FFTW's allocation routine. Set @code{p = fftw_alloc_real(sz)} for a real array, or @code{p = fftw_alloc_complex(sz)} for a complex array. + +@item +@findex c_f_pointer +Associate your pointer @code{arr} with the allocated memory @code{p} +using the standard @code{c_f_pointer} subroutine: @code{call +c_f_pointer(p, arr, [...dimensions...])}, where +@code{[...dimensions...])} are an array of the dimensions of the array +(in the usual Fortran order). e.g. @code{call c_f_pointer(p, arr, +[L,M,N])} for an @threedims{L,M,N} array. (Alternatively, you can +omit the dimensions argument if you specified the shape explicitly +when declaring @code{arr}.) You can now use @code{arr} as a usual +multidimensional array. + +@item +When you are done using the array, deallocate the memory by @code{call +fftw_free(p)} on @code{p}. + +@end enumerate + +For example, here is how we would allocate an @twodims{L,M} 2d real array: + +@example + real(C_DOUBLE), pointer :: arr(:,:) + type(C_PTR) :: p + p = fftw_alloc_real(int(L * M, C_SIZE_T)) + call c_f_pointer(p, arr, [L,M]) + @emph{...use arr and arr(i,j) as usual...} + call fftw_free(p) +@end example + +and here is an @threedims{L,M,N} 3d complex array: + +@example + complex(C_DOUBLE_COMPLEX), pointer :: arr(:,:,:) + type(C_PTR) :: p + p = fftw_alloc_complex(int(L * M * N, C_SIZE_T)) + call c_f_pointer(p, arr, [L,M,N]) + @emph{...use arr and arr(i,j,k) as usual...} + call fftw_free(p) +@end example + +See @ref{Reversing array dimensions} for an example allocating a +single array and associating both real and complex array pointers with +it, for in-place real-to-complex transforms. + +@c ------------------------------------------------------- +@node Accessing the wisdom API from Fortran, Defining an FFTW module, Allocating aligned memory in Fortran, Calling FFTW from Modern Fortran +@section Accessing the wisdom API from Fortran +@cindex wisdom +@cindex saving plans to disk + +As explained in @ref{Words of Wisdom-Saving Plans}, FFTW provides a +``wisdom'' API for saving plans to disk so that they can be recreated +quickly. The C API for exporting (@pxref{Wisdom Export}) and +importing (@pxref{Wisdom Import}) wisdom is somewhat tricky to use +from Fortran, however, because of differences in file I/O and string +types between C and Fortran. + +@menu +* Wisdom File Export/Import from Fortran:: +* Wisdom String Export/Import from Fortran:: +* Wisdom Generic Export/Import from Fortran:: +@end menu + +@c =========> +@node Wisdom File Export/Import from Fortran, Wisdom String Export/Import from Fortran, Accessing the wisdom API from Fortran, Accessing the wisdom API from Fortran +@subsection Wisdom File Export/Import from Fortran + +@findex fftw_import wisdom_from_filename +@findex fftw_export_wisdom_to_filename +The easiest way to export and import wisdom is to do so using +@code{fftw_export_wisdom_to_filename} and +@code{fftw_wisdom_from_filename}. The only trick is that these +require you to pass a C string, which is an array of type +@code{CHARACTER(C_CHAR)} that is terminated by @code{C_NULL_CHAR}. +You can call them like this: + +@example + integer(C_INT) :: ret + ret = fftw_export_wisdom_to_filename(C_CHAR_'my_wisdom.dat' // C_NULL_CHAR) + if (ret .eq. 0) stop 'error exporting wisdom to file' + ret = fftw_import_wisdom_from_filename(C_CHAR_'my_wisdom.dat' // C_NULL_CHAR) + if (ret .eq. 0) stop 'error importing wisdom from file' +@end example + +Note that prepending @samp{C_CHAR_} is needed to specify that the +literal string is of kind @code{C_CHAR}, and we null-terminate the +string by appending @samp{// C_NULL_CHAR}. These functions return an +@code{integer(C_INT)} (@code{ret}) which is @code{0} if an error +occurred during export/import and nonzero otherwise. + +It is also possible to use the lower-level routines +@code{fftw_export_wisdom_to_file} and +@code{fftw_import_wisdom_from_file}, which accept parameters of the C +type @code{FILE*}, expressed in Fortran as @code{type(C_PTR)}. +However, you are then responsible for creating the @code{FILE*} +yourself. You can do this by using @code{iso_c_binding} to define +Fortran intefaces for the C library functions @code{fopen} and +@code{fclose}, which is a bit strange in Fortran but workable. + +@c =========> +@node Wisdom String Export/Import from Fortran, Wisdom Generic Export/Import from Fortran, Wisdom File Export/Import from Fortran, Accessing the wisdom API from Fortran +@subsection Wisdom String Export/Import from Fortran + +@findex fftw_export_wisdom_to_string +Dealing with FFTW's C string export/import is a bit more painful. In +particular, the @code{fftw_export_wisdom_to_string} function requires +you to deal with a dynamically allocated C string. To get its length, +you must define an interface to the C @code{strlen} function, and to +deallocate it you must define an interface to C @code{free}: + +@example + use, intrinsic :: iso_c_binding + interface + integer(C_INT) function strlen(s) bind(C, name='strlen') + import + type(C_PTR), value :: s + end function strlen + subroutine free(p) bind(C, name='free') + import + type(C_PTR), value :: p + end subroutine free + end interface +@end example + +Given these definitions, you can then export wisdom to a Fortran +character array: + +@example + character(C_CHAR), pointer :: s(:) + integer(C_SIZE_T) :: slen + type(C_PTR) :: p + p = fftw_export_wisdom_to_string() + if (.not. c_associated(p)) stop 'error exporting wisdom' + slen = strlen(p) + call c_f_pointer(p, s, [slen+1]) + ... + call free(p) +@end example +@findex c_associated +@findex c_f_pointer + +Note that @code{slen} is the length of the C string, but the length of +the array is @code{slen+1} because it includes the terminating null +character. (You can omit the @samp{+1} if you don't want Fortran to +know about the null character.) The standard @code{c_associated} function +checks whether @code{p} is a null pointer, which is returned by +@code{fftw_export_wisdom_to_string} if there was an error. + +@findex fftw_import_wisdom_from_string +To import wisdom from a string, use +@code{fftw_import_wisdom_from_string} as usual; note that the argument +of this function must be a @code{character(C_CHAR)} that is terminated +by the @code{C_NULL_CHAR} character, like the @code{s} array above. + +@c =========> +@node Wisdom Generic Export/Import from Fortran, , Wisdom String Export/Import from Fortran, Accessing the wisdom API from Fortran +@subsection Wisdom Generic Export/Import from Fortran + +The most generic wisdom export/import functions allow you to provide +an arbitrary callback function to read/write one character at a time +in any way you want. However, your callback function must be written +in a special way, using the @code{bind(C)} attribute to be passed to a +C interface. + +@findex fftw_export_wisdom +In particular, to call the generic wisdom export function +@code{fftw_export_wisdom}, you would write a callback subroutine of the form: + +@example + subroutine my_write_char(c, p) bind(C) + use, intrinsic :: iso_c_binding + character(C_CHAR), value :: c + type(C_PTR), value :: p + @emph{...write c...} + end subroutine my_write_char +@end example + +Given such a subroutine (along with the corresponding interface definition), you could then export wisdom using: + +@findex c_funloc +@example + call fftw_export_wisdom(c_funloc(my_write_char), p) +@end example + +@findex c_loc +@findex c_f_pointer +The standard @code{c_funloc} intrinsic converts a Fortran +@code{bind(C)} subroutine into a C function pointer. The parameter +@code{p} is a @code{type(C_PTR)} to any arbitrary data that you want +to pass to @code{my_write_char} (or @code{C_NULL_PTR} if none). (Note +that you can get a C pointer to Fortran data using the intrinsic +@code{c_loc}, and convert it back to a Fortran pointer in +@code{my_write_char} using @code{c_f_pointer}.) + +Similarly, to use the generic @code{fftw_import_wisdom}, you would +define a callback function of the form: + +@findex fftw_import_wisdom +@example + integer(C_INT) function my_read_char(p) bind(C) + use, intrinsic :: iso_c_binding + type(C_PTR), value :: p + character :: c + @emph{...read a character c...} + my_read_char = ichar(c, C_INT) + end function my_read_char + + .... + + integer(C_INT) :: ret + ret = fftw_import_wisdom(c_funloc(my_read_char), p) + if (ret .eq. 0) stop 'error importing wisdom' +@end example + +Your function can return @code{-1} if the end of the input is reached. +Again, @code{p} is an arbitrary @code{type(C_PTR} that is passed +through to your function. @code{fftw_import_wisdom} returns @code{0} +if an error occurred and nonzero otherwise. + +@c ------------------------------------------------------- +@node Defining an FFTW module, , Accessing the wisdom API from Fortran, Calling FFTW from Modern Fortran +@section Defining an FFTW module + +Rather than using the @code{include} statement to include the +@code{fftw3.f03} interface file in any subroutine where you want to +use FFTW, you might prefer to define an FFTW Fortran module. FFTW +does not install itself as a module, primarily because +@code{fftw3.f03} can be shared between different Fortran compilers while +modules (in general) cannot. However, it is trivial to define your +own FFTW module if you want. Just create a file containing: + +@example + module FFTW3 + use, intrinsic :: iso_c_binding + include 'fftw3.f03' + end module +@end example + +Compile this file into a module as usual for your compiler (e.g. with +@code{gfortran -c} you will get a file @code{fftw3.mod}). Now, +instead of @code{include 'fftw3.f03'}, whenever you want to use FFTW +routines you can just do: + +@example + use FFTW3 +@end example + +as usual for Fortran modules. (You still need to link to the FFTW +library, of course.) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/mpi.texi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/mpi.texi Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1768 @@ +@node Distributed-memory FFTW with MPI, Calling FFTW from Modern Fortran, Multi-threaded FFTW, Top +@chapter Distributed-memory FFTW with MPI +@cindex MPI + +@cindex parallel transform +In this chapter we document the parallel FFTW routines for parallel +systems supporting the MPI message-passing interface. Unlike the +shared-memory threads described in the previous chapter, MPI allows +you to use @emph{distributed-memory} parallelism, where each CPU has +its own separate memory, and which can scale up to clusters of many +thousands of processors. This capability comes at a price, however: +each process only stores a @emph{portion} of the data to be +transformed, which means that the data structures and +programming-interface are quite different from the serial or threads +versions of FFTW. +@cindex data distribution + + +Distributed-memory parallelism is especially useful when you are +transforming arrays so large that they do not fit into the memory of a +single processor. The storage per-process required by FFTW's MPI +routines is proportional to the total array size divided by the number +of processes. Conversely, distributed-memory parallelism can easily +pose an unacceptably high communications overhead for small problems; +the threshold problem size for which parallelism becomes advantageous +will depend on the precise problem you are interested in, your +hardware, and your MPI implementation. + +A note on terminology: in MPI, you divide the data among a set of +``processes'' which each run in their own memory address space. +Generally, each process runs on a different physical processor, but +this is not required. A set of processes in MPI is described by an +opaque data structure called a ``communicator,'' the most common of +which is the predefined communicator @code{MPI_COMM_WORLD} which +refers to @emph{all} processes. For more information on these and +other concepts common to all MPI programs, we refer the reader to the +documentation at @uref{http://www.mcs.anl.gov/research/projects/mpi/, the MPI home +page}. +@cindex MPI communicator +@ctindex MPI_COMM_WORLD + + +We assume in this chapter that the reader is familiar with the usage +of the serial (uniprocessor) FFTW, and focus only on the concepts new +to the MPI interface. + +@menu +* FFTW MPI Installation:: +* Linking and Initializing MPI FFTW:: +* 2d MPI example:: +* MPI Data Distribution:: +* Multi-dimensional MPI DFTs of Real Data:: +* Other Multi-dimensional Real-data MPI Transforms:: +* FFTW MPI Transposes:: +* FFTW MPI Wisdom:: +* Avoiding MPI Deadlocks:: +* FFTW MPI Performance Tips:: +* Combining MPI and Threads:: +* FFTW MPI Reference:: +* FFTW MPI Fortran Interface:: +@end menu + +@c ------------------------------------------------------------ +@node FFTW MPI Installation, Linking and Initializing MPI FFTW, Distributed-memory FFTW with MPI, Distributed-memory FFTW with MPI +@section FFTW MPI Installation + +All of the FFTW MPI code is located in the @code{mpi} subdirectory of +the FFTW package. On Unix systems, the FFTW MPI libraries and header +files are automatically configured, compiled, and installed along with +the uniprocessor FFTW libraries simply by including +@code{--enable-mpi} in the flags to the @code{configure} script +(@pxref{Installation on Unix}). +@fpindex configure + + +Any implementation of the MPI standard, version 1 or later, should +work with FFTW. The @code{configure} script will attempt to +automatically detect how to compile and link code using your MPI +implementation. In some cases, especially if you have multiple +different MPI implementations installed or have an unusual MPI +software package, you may need to provide this information explicitly. + +Most commonly, one compiles MPI code by invoking a special compiler +command, typically @code{mpicc} for C code. The @code{configure} +script knows the most common names for this command, but you can +specify the MPI compilation command explicitly by setting the +@code{MPICC} variable, as in @samp{./configure MPICC=mpicc ...}. +@fpindex mpicc + + +If, instead of a special compiler command, you need to link a certain +library, you can specify the link command via the @code{MPILIBS} +variable, as in @samp{./configure MPILIBS=-lmpi ...}. Note that if +your MPI library is installed in a non-standard location (one the +compiler does not know about by default), you may also have to specify +the location of the library and header files via @code{LDFLAGS} and +@code{CPPFLAGS} variables, respectively, as in @samp{./configure +LDFLAGS=-L/path/to/mpi/libs CPPFLAGS=-I/path/to/mpi/include ...}. + +@c ------------------------------------------------------------ +@node Linking and Initializing MPI FFTW, 2d MPI example, FFTW MPI Installation, Distributed-memory FFTW with MPI +@section Linking and Initializing MPI FFTW + +Programs using the MPI FFTW routines should be linked with +@code{-lfftw3_mpi -lfftw3 -lm} on Unix in double precision, +@code{-lfftw3f_mpi -lfftw3f -lm} in single precision, and so on +(@pxref{Precision}). You will also need to link with whatever library +is responsible for MPI on your system; in most MPI implementations, +there is a special compiler alias named @code{mpicc} to compile and +link MPI code. +@fpindex mpicc +@cindex linking on Unix +@cindex precision + + +@findex fftw_init_threads +Before calling any FFTW routines except possibly +@code{fftw_init_threads} (@pxref{Combining MPI and Threads}), but after calling +@code{MPI_Init}, you should call the function: + +@example +void fftw_mpi_init(void); +@end example +@findex fftw_mpi_init + +If, at the end of your program, you want to get rid of all memory and +other resources allocated internally by FFTW, for both the serial and +MPI routines, you can call: + +@example +void fftw_mpi_cleanup(void); +@end example +@findex fftw_mpi_cleanup + +which is much like the @code{fftw_cleanup()} function except that it +also gets rid of FFTW's MPI-related data. You must @emph{not} execute +any previously created plans after calling this function. + +@c ------------------------------------------------------------ +@node 2d MPI example, MPI Data Distribution, Linking and Initializing MPI FFTW, Distributed-memory FFTW with MPI +@section 2d MPI example + +Before we document the FFTW MPI interface in detail, we begin with a +simple example outlining how one would perform a two-dimensional +@code{N0} by @code{N1} complex DFT. + +@example +#include + +int main(int argc, char **argv) +@{ + const ptrdiff_t N0 = ..., N1 = ...; + fftw_plan plan; + fftw_complex *data; + ptrdiff_t alloc_local, local_n0, local_0_start, i, j; + + MPI_Init(&argc, &argv); + fftw_mpi_init(); + + /* @r{get local data size and allocate} */ + alloc_local = fftw_mpi_local_size_2d(N0, N1, MPI_COMM_WORLD, + &local_n0, &local_0_start); + data = fftw_alloc_complex(alloc_local); + + /* @r{create plan for in-place forward DFT} */ + plan = fftw_mpi_plan_dft_2d(N0, N1, data, data, MPI_COMM_WORLD, + FFTW_FORWARD, FFTW_ESTIMATE); + + /* @r{initialize data to some function} my_function(x,y) */ + for (i = 0; i < local_n0; ++i) for (j = 0; j < N1; ++j) + data[i*N1 + j] = my_function(local_0_start + i, j); + + /* @r{compute transforms, in-place, as many times as desired} */ + fftw_execute(plan); + + fftw_destroy_plan(plan); + + MPI_Finalize(); +@} +@end example + +As can be seen above, the MPI interface follows the same basic style +of allocate/plan/execute/destroy as the serial FFTW routines. All of +the MPI-specific routines are prefixed with @samp{fftw_mpi_} instead +of @samp{fftw_}. There are a few important differences, however: + +First, we must call @code{fftw_mpi_init()} after calling +@code{MPI_Init} (required in all MPI programs) and before calling any +other @samp{fftw_mpi_} routine. +@findex MPI_Init +@findex fftw_mpi_init + + +Second, when we create the plan with @code{fftw_mpi_plan_dft_2d}, +analogous to @code{fftw_plan_dft_2d}, we pass an additional argument: +the communicator, indicating which processes will participate in the +transform (here @code{MPI_COMM_WORLD}, indicating all processes). +Whenever you create, execute, or destroy a plan for an MPI transform, +you must call the corresponding FFTW routine on @emph{all} processes +in the communicator for that transform. (That is, these are +@emph{collective} calls.) Note that the plan for the MPI transform +uses the standard @code{fftw_execute} and @code{fftw_destroy} routines +(on the other hand, there are MPI-specific new-array execute functions +documented below). +@cindex collective function +@findex fftw_mpi_plan_dft_2d +@ctindex MPI_COMM_WORLD + + +Third, all of the FFTW MPI routines take @code{ptrdiff_t} arguments +instead of @code{int} as for the serial FFTW. @code{ptrdiff_t} is a +standard C integer type which is (at least) 32 bits wide on a 32-bit +machine and 64 bits wide on a 64-bit machine. This is to make it easy +to specify very large parallel transforms on a 64-bit machine. (You +can specify 64-bit transform sizes in the serial FFTW, too, but only +by using the @samp{guru64} planner interface. @xref{64-bit Guru +Interface}.) +@tindex ptrdiff_t +@cindex 64-bit architecture + + +Fourth, and most importantly, you don't allocate the entire +two-dimensional array on each process. Instead, you call +@code{fftw_mpi_local_size_2d} to find out what @emph{portion} of the +array resides on each processor, and how much space to allocate. +Here, the portion of the array on each process is a @code{local_n0} by +@code{N1} slice of the total array, starting at index +@code{local_0_start}. The total number of @code{fftw_complex} numbers +to allocate is given by the @code{alloc_local} return value, which +@emph{may} be greater than @code{local_n0 * N1} (in case some +intermediate calculations require additional storage). The data +distribution in FFTW's MPI interface is described in more detail by +the next section. +@findex fftw_mpi_local_size_2d +@cindex data distribution + + +Given the portion of the array that resides on the local process, it +is straightforward to initialize the data (here to a function +@code{myfunction}) and otherwise manipulate it. Of course, at the end +of the program you may want to output the data somehow, but +synchronizing this output is up to you and is beyond the scope of this +manual. (One good way to output a large multi-dimensional distributed +array in MPI to a portable binary file is to use the free HDF5 +library; see the @uref{http://www.hdfgroup.org/, HDF home page}.) +@cindex HDF5 +@cindex MPI I/O + +@c ------------------------------------------------------------ +@node MPI Data Distribution, Multi-dimensional MPI DFTs of Real Data, 2d MPI example, Distributed-memory FFTW with MPI +@section MPI Data Distribution +@cindex data distribution + +The most important concept to understand in using FFTW's MPI interface +is the data distribution. With a serial or multithreaded FFT, all of +the inputs and outputs are stored as a single contiguous chunk of +memory. With a distributed-memory FFT, the inputs and outputs are +broken into disjoint blocks, one per process. + +In particular, FFTW uses a @emph{1d block distribution} of the data, +distributed along the @emph{first dimension}. For example, if you +want to perform a @twodims{100,200} complex DFT, distributed over 4 +processes, each process will get a @twodims{25,200} slice of the data. +That is, process 0 will get rows 0 through 24, process 1 will get rows +25 through 49, process 2 will get rows 50 through 74, and process 3 +will get rows 75 through 99. If you take the same array but +distribute it over 3 processes, then it is not evenly divisible so the +different processes will have unequal chunks. FFTW's default choice +in this case is to assign 34 rows to processes 0 and 1, and 32 rows to +process 2. +@cindex block distribution + + +FFTW provides several @samp{fftw_mpi_local_size} routines that you can +call to find out what portion of an array is stored on the current +process. In most cases, you should use the default block sizes picked +by FFTW, but it is also possible to specify your own block size. For +example, with a @twodims{100,200} array on three processes, you can +tell FFTW to use a block size of 40, which would assign 40 rows to +processes 0 and 1, and 20 rows to process 2. FFTW's default is to +divide the data equally among the processes if possible, and as best +it can otherwise. The rows are always assigned in ``rank order,'' +i.e. process 0 gets the first block of rows, then process 1, and so +on. (You can change this by using @code{MPI_Comm_split} to create a +new communicator with re-ordered processes.) However, you should +always call the @samp{fftw_mpi_local_size} routines, if possible, +rather than trying to predict FFTW's distribution choices. + +In particular, it is critical that you allocate the storage size that +is returned by @samp{fftw_mpi_local_size}, which is @emph{not} +necessarily the size of the local slice of the array. The reason is +that intermediate steps of FFTW's algorithms involve transposing the +array and redistributing the data, so at these intermediate steps FFTW +may require more local storage space (albeit always proportional to +the total size divided by the number of processes). The +@samp{fftw_mpi_local_size} functions know how much storage is required +for these intermediate steps and tell you the correct amount to +allocate. + +@menu +* Basic and advanced distribution interfaces:: +* Load balancing:: +* Transposed distributions:: +* One-dimensional distributions:: +@end menu + +@node Basic and advanced distribution interfaces, Load balancing, MPI Data Distribution, MPI Data Distribution +@subsection Basic and advanced distribution interfaces + +As with the planner interface, the @samp{fftw_mpi_local_size} +distribution interface is broken into basic and advanced +(@samp{_many}) interfaces, where the latter allows you to specify the +block size manually and also to request block sizes when computing +multiple transforms simultaneously. These functions are documented +more exhaustively by the FFTW MPI Reference, but we summarize the +basic ideas here using a couple of two-dimensional examples. + +For the @twodims{100,200} complex-DFT example, above, we would find +the distribution by calling the following function in the basic +interface: + +@example +ptrdiff_t fftw_mpi_local_size_2d(ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start); +@end example +@findex fftw_mpi_local_size_2d + +Given the total size of the data to be transformed (here, @code{n0 = +100} and @code{n1 = 200}) and an MPI communicator (@code{comm}), this +function provides three numbers. + +First, it describes the shape of the local data: the current process +should store a @code{local_n0} by @code{n1} slice of the overall +dataset, in row-major order (@code{n1} dimension contiguous), starting +at index @code{local_0_start}. That is, if the total dataset is +viewed as a @code{n0} by @code{n1} matrix, the current process should +store the rows @code{local_0_start} to +@code{local_0_start+local_n0-1}. Obviously, if you are running with +only a single MPI process, that process will store the entire array: +@code{local_0_start} will be zero and @code{local_n0} will be +@code{n0}. @xref{Row-major Format}. +@cindex row-major + + +Second, the return value is the total number of data elements (e.g., +complex numbers for a complex DFT) that should be allocated for the +input and output arrays on the current process (ideally with +@code{fftw_malloc} or an @samp{fftw_alloc} function, to ensure optimal +alignment). It might seem that this should always be equal to +@code{local_n0 * n1}, but this is @emph{not} the case. FFTW's +distributed FFT algorithms require data redistributions at +intermediate stages of the transform, and in some circumstances this +may require slightly larger local storage. This is discussed in more +detail below, under @ref{Load balancing}. +@findex fftw_malloc +@findex fftw_alloc_complex + + +@cindex advanced interface +The advanced-interface @samp{local_size} function for multidimensional +transforms returns the same three things (@code{local_n0}, +@code{local_0_start}, and the total number of elements to allocate), +but takes more inputs: + +@example +ptrdiff_t fftw_mpi_local_size_many(int rnk, const ptrdiff_t *n, + ptrdiff_t howmany, + ptrdiff_t block0, + MPI_Comm comm, + ptrdiff_t *local_n0, + ptrdiff_t *local_0_start); +@end example +@findex fftw_mpi_local_size_many + +The two-dimensional case above corresponds to @code{rnk = 2} and an +array @code{n} of length 2 with @code{n[0] = n0} and @code{n[1] = n1}. +This routine is for any @code{rnk > 1}; one-dimensional transforms +have their own interface because they work slightly differently, as +discussed below. + +First, the advanced interface allows you to perform multiple +transforms at once, of interleaved data, as specified by the +@code{howmany} parameter. (@code{hoamany} is 1 for a single +transform.) + +Second, here you can specify your desired block size in the @code{n0} +dimension, @code{block0}. To use FFTW's default block size, pass +@code{FFTW_MPI_DEFAULT_BLOCK} (0) for @code{block0}. Otherwise, on +@code{P} processes, FFTW will return @code{local_n0} equal to +@code{block0} on the first @code{P / block0} processes (rounded down), +return @code{local_n0} equal to @code{n0 - block0 * (P / block0)} on +the next process, and @code{local_n0} equal to zero on any remaining +processes. In general, we recommend using the default block size +(which corresponds to @code{n0 / P}, rounded up). +@ctindex FFTW_MPI_DEFAULT_BLOCK +@cindex block distribution + + +For example, suppose you have @code{P = 4} processes and @code{n0 = +21}. The default will be a block size of @code{6}, which will give +@code{local_n0 = 6} on the first three processes and @code{local_n0 = +3} on the last process. Instead, however, you could specify +@code{block0 = 5} if you wanted, which would give @code{local_n0 = 5} +on processes 0 to 2, @code{local_n0 = 6} on process 3. (This choice, +while it may look superficially more ``balanced,'' has the same +critical path as FFTW's default but requires more communications.) + +@node Load balancing, Transposed distributions, Basic and advanced distribution interfaces, MPI Data Distribution +@subsection Load balancing +@cindex load balancing + +Ideally, when you parallelize a transform over some @math{P} +processes, each process should end up with work that takes equal time. +Otherwise, all of the processes end up waiting on whichever process is +slowest. This goal is known as ``load balancing.'' In this section, +we describe the circumstances under which FFTW is able to load-balance +well, and in particular how you should choose your transform size in +order to load balance. + +Load balancing is especially difficult when you are parallelizing over +heterogeneous machines; for example, if one of your processors is a +old 486 and another is a Pentium IV, obviously you should give the +Pentium more work to do than the 486 since the latter is much slower. +FFTW does not deal with this problem, however---it assumes that your +processes run on hardware of comparable speed, and that the goal is +therefore to divide the problem as equally as possible. + +For a multi-dimensional complex DFT, FFTW can divide the problem +equally among the processes if: (i) the @emph{first} dimension +@code{n0} is divisible by @math{P}; and (ii), the @emph{product} of +the subsequent dimensions is divisible by @math{P}. (For the advanced +interface, where you can specify multiple simultaneous transforms via +some ``vector'' length @code{howmany}, a factor of @code{howmany} is +included in the product of the subsequent dimensions.) + +For a one-dimensional complex DFT, the length @code{N} of the data +should be divisible by @math{P} @emph{squared} to be able to divide +the problem equally among the processes. + +@node Transposed distributions, One-dimensional distributions, Load balancing, MPI Data Distribution +@subsection Transposed distributions + +Internally, FFTW's MPI transform algorithms work by first computing +transforms of the data local to each process, then by globally +@emph{transposing} the data in some fashion to redistribute the data +among the processes, transforming the new data local to each process, +and transposing back. For example, a two-dimensional @code{n0} by +@code{n1} array, distributed across the @code{n0} dimension, is +transformd by: (i) transforming the @code{n1} dimension, which are +local to each process; (ii) transposing to an @code{n1} by @code{n0} +array, distributed across the @code{n1} dimension; (iii) transforming +the @code{n0} dimension, which is now local to each process; (iv) +transposing back. +@cindex transpose + + +However, in many applications it is acceptable to compute a +multidimensional DFT whose results are produced in transposed order +(e.g., @code{n1} by @code{n0} in two dimensions). This provides a +significant performance advantage, because it means that the final +transposition step can be omitted. FFTW supports this optimization, +which you specify by passing the flag @code{FFTW_MPI_TRANSPOSED_OUT} +to the planner routines. To compute the inverse transform of +transposed output, you specify @code{FFTW_MPI_TRANSPOSED_IN} to tell +it that the input is transposed. In this section, we explain how to +interpret the output format of such a transform. +@ctindex FFTW_MPI_TRANSPOSED_OUT +@ctindex FFTW_MPI_TRANSPOSED_IN + + +Suppose you have are transforming multi-dimensional data with (at +least two) dimensions @ndims{}. As always, it is distributed along +the first dimension @dimk{0}. Now, if we compute its DFT with the +@code{FFTW_MPI_TRANSPOSED_OUT} flag, the resulting output data are stored +with the first @emph{two} dimensions transposed: @ndimstrans{}, +distributed along the @dimk{1} dimension. Conversely, if we take the +@ndimstrans{} data and transform it with the +@code{FFTW_MPI_TRANSPOSED_IN} flag, then the format goes back to the +original @ndims{} array. + +There are two ways to find the portion of the transposed array that +resides on the current process. First, you can simply call the +appropriate @samp{local_size} function, passing @ndimstrans{} (the +transposed dimensions). This would mean calling the @samp{local_size} +function twice, once for the transposed and once for the +non-transposed dimensions. Alternatively, you can call one of the +@samp{local_size_transposed} functions, which returns both the +non-transposed and transposed data distribution from a single call. +For example, for a 3d transform with transposed output (or input), you +might call: + +@example +ptrdiff_t fftw_mpi_local_size_3d_transposed( + ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, + ptrdiff_t *local_n1, ptrdiff_t *local_1_start); +@end example +@findex fftw_mpi_local_size_3d_transposed + +Here, @code{local_n0} and @code{local_0_start} give the size and +starting index of the @code{n0} dimension for the +@emph{non}-transposed data, as in the previous sections. For +@emph{transposed} data (e.g. the output for +@code{FFTW_MPI_TRANSPOSED_OUT}), @code{local_n1} and +@code{local_1_start} give the size and starting index of the @code{n1} +dimension, which is the first dimension of the transposed data +(@code{n1} by @code{n0} by @code{n2}). + +(Note that @code{FFTW_MPI_TRANSPOSED_IN} is completely equivalent to +performing @code{FFTW_MPI_TRANSPOSED_OUT} and passing the first two +dimensions to the planner in reverse order, or vice versa. If you +pass @emph{both} the @code{FFTW_MPI_TRANSPOSED_IN} and +@code{FFTW_MPI_TRANSPOSED_OUT} flags, it is equivalent to swapping the +first two dimensions passed to the planner and passing @emph{neither} +flag.) + +@node One-dimensional distributions, , Transposed distributions, MPI Data Distribution +@subsection One-dimensional distributions + +For one-dimensional distributed DFTs using FFTW, matters are slightly +more complicated because the data distribution is more closely tied to +how the algorithm works. In particular, you can no longer pass an +arbitrary block size and must accept FFTW's default; also, the block +sizes may be different for input and output. Also, the data +distribution depends on the flags and transform direction, in order +for forward and backward transforms to work correctly. + +@example +ptrdiff_t fftw_mpi_local_size_1d(ptrdiff_t n0, MPI_Comm comm, + int sign, unsigned flags, + ptrdiff_t *local_ni, ptrdiff_t *local_i_start, + ptrdiff_t *local_no, ptrdiff_t *local_o_start); +@end example +@findex fftw_mpi_local_size_1d + +This function computes the data distribution for a 1d transform of +size @code{n0} with the given transform @code{sign} and @code{flags}. +Both input and output data use block distributions. The input on the +current process will consist of @code{local_ni} numbers starting at +index @code{local_i_start}; e.g. if only a single process is used, +then @code{local_ni} will be @code{n0} and @code{local_i_start} will +be @code{0}. Similarly for the output, with @code{local_no} numbers +starting at index @code{local_o_start}. The return value of +@code{fftw_mpi_local_size_1d} will be the total number of elements to +allocate on the current process (which might be slightly larger than +the local size due to intermediate steps in the algorithm). + +As mentioned above (@pxref{Load balancing}), the data will be divided +equally among the processes if @code{n0} is divisible by the +@emph{square} of the number of processes. In this case, +@code{local_ni} will equal @code{local_no}. Otherwise, they may be +different. + +For some applications, such as convolutions, the order of the output +data is irrelevant. In this case, performance can be improved by +specifying that the output data be stored in an FFTW-defined +``scrambled'' format. (In particular, this is the analogue of +transposed output in the multidimensional case: scrambled output saves +a communications step.) If you pass @code{FFTW_MPI_SCRAMBLED_OUT} in +the flags, then the output is stored in this (undocumented) scrambled +order. Conversely, to perform the inverse transform of data in +scrambled order, pass the @code{FFTW_MPI_SCRAMBLED_IN} flag. +@ctindex FFTW_MPI_SCRAMBLED_OUT +@ctindex FFTW_MPI_SCRAMBLED_IN + + +In MPI FFTW, only composite sizes @code{n0} can be parallelized; we +have not yet implemented a parallel algorithm for large prime sizes. + +@c ------------------------------------------------------------ +@node Multi-dimensional MPI DFTs of Real Data, Other Multi-dimensional Real-data MPI Transforms, MPI Data Distribution, Distributed-memory FFTW with MPI +@section Multi-dimensional MPI DFTs of Real Data + +FFTW's MPI interface also supports multi-dimensional DFTs of real +data, similar to the serial r2c and c2r interfaces. (Parallel +one-dimensional real-data DFTs are not currently supported; you must +use a complex transform and set the imaginary parts of the inputs to +zero.) + +The key points to understand for r2c and c2r MPI transforms (compared +to the MPI complex DFTs or the serial r2c/c2r transforms), are: + +@itemize @bullet + +@item +Just as for serial transforms, r2c/c2r DFTs transform @ndims{} real +data to/from @ndimshalf{} complex data: the last dimension of the +complex data is cut in half (rounded down), plus one. As for the +serial transforms, the sizes you pass to the @samp{plan_dft_r2c} and +@samp{plan_dft_c2r} are the @ndims{} dimensions of the real data. + +@item +@cindex padding +Although the real data is @emph{conceptually} @ndims{}, it is +@emph{physically} stored as an @ndimspad{} array, where the last +dimension has been @emph{padded} to make it the same size as the +complex output. This is much like the in-place serial r2c/c2r +interface (@pxref{Multi-Dimensional DFTs of Real Data}), except that +in MPI the padding is required even for out-of-place data. The extra +padding numbers are ignored by FFTW (they are @emph{not} like +zero-padding the transform to a larger size); they are only used to +determine the data layout. + +@item +@cindex data distribution +The data distribution in MPI for @emph{both} the real and complex data +is determined by the shape of the @emph{complex} data. That is, you +call the appropriate @samp{local size} function for the @ndimshalf{} +complex data, and then use the @emph{same} distribution for the real +data except that the last complex dimension is replaced by a (padded) +real dimension of twice the length. + +@end itemize + +For example suppose we are performing an out-of-place r2c transform of +@threedims{L,M,N} real data [padded to @threedims{L,M,2(N/2+1)}], +resulting in @threedims{L,M,N/2+1} complex data. Similar to the +example in @ref{2d MPI example}, we might do something like: + +@example +#include + +int main(int argc, char **argv) +@{ + const ptrdiff_t L = ..., M = ..., N = ...; + fftw_plan plan; + double *rin; + fftw_complex *cout; + ptrdiff_t alloc_local, local_n0, local_0_start, i, j, k; + + MPI_Init(&argc, &argv); + fftw_mpi_init(); + + /* @r{get local data size and allocate} */ + alloc_local = fftw_mpi_local_size_3d(L, M, N/2+1, MPI_COMM_WORLD, + &local_n0, &local_0_start); + rin = fftw_alloc_real(2 * alloc_local); + cout = fftw_alloc_complex(alloc_local); + + /* @r{create plan for out-of-place r2c DFT} */ + plan = fftw_mpi_plan_dft_r2c_3d(L, M, N, rin, cout, MPI_COMM_WORLD, + FFTW_MEASURE); + + /* @r{initialize rin to some function} my_func(x,y,z) */ + for (i = 0; i < local_n0; ++i) + for (j = 0; j < M; ++j) + for (k = 0; k < N; ++k) + rin[(i*M + j) * (2*(N/2+1)) + k] = my_func(local_0_start+i, j, k); + + /* @r{compute transforms as many times as desired} */ + fftw_execute(plan); + + fftw_destroy_plan(plan); + + MPI_Finalize(); +@} +@end example + +@findex fftw_alloc_real +@cindex row-major +Note that we allocated @code{rin} using @code{fftw_alloc_real} with an +argument of @code{2 * alloc_local}: since @code{alloc_local} is the +number of @emph{complex} values to allocate, the number of @emph{real} +values is twice as many. The @code{rin} array is then +@threedims{local_n0,M,2(N/2+1)} in row-major order, so its +@code{(i,j,k)} element is at the index @code{(i*M + j) * (2*(N/2+1)) + +k} (@pxref{Multi-dimensional Array Format }). + +@cindex transpose +@ctindex FFTW_TRANSPOSED_OUT +@ctindex FFTW_TRANSPOSED_IN +As for the complex transforms, improved performance can be obtained by +specifying that the output is the transpose of the input or vice versa +(@pxref{Transposed distributions}). In our @threedims{L,M,N} r2c +example, including @code{FFTW_TRANSPOSED_OUT} in the flags means that +the input would be a padded @threedims{L,M,2(N/2+1)} real array +distributed over the @code{L} dimension, while the output would be a +@threedims{M,L,N/2+1} complex array distributed over the @code{M} +dimension. To perform the inverse c2r transform with the same data +distributions, you would use the @code{FFTW_TRANSPOSED_IN} flag. + +@c ------------------------------------------------------------ +@node Other Multi-dimensional Real-data MPI Transforms, FFTW MPI Transposes, Multi-dimensional MPI DFTs of Real Data, Distributed-memory FFTW with MPI +@section Other multi-dimensional Real-Data MPI Transforms + +@cindex r2r +FFTW's MPI interface also supports multi-dimensional @samp{r2r} +transforms of all kinds supported by the serial interface +(e.g. discrete cosine and sine transforms, discrete Hartley +transforms, etc.). Only multi-dimensional @samp{r2r} transforms, not +one-dimensional transforms, are currently parallelized. + +@tindex fftw_r2r_kind +These are used much like the multidimensional complex DFTs discussed +above, except that the data is real rather than complex, and one needs +to pass an r2r transform kind (@code{fftw_r2r_kind}) for each +dimension as in the serial FFTW (@pxref{More DFTs of Real Data}). + +For example, one might perform a two-dimensional @twodims{L,M} that is +an REDFT10 (DCT-II) in the first dimension and an RODFT10 (DST-II) in +the second dimension with code like: + +@example + const ptrdiff_t L = ..., M = ...; + fftw_plan plan; + double *data; + ptrdiff_t alloc_local, local_n0, local_0_start, i, j; + + /* @r{get local data size and allocate} */ + alloc_local = fftw_mpi_local_size_2d(L, M, MPI_COMM_WORLD, + &local_n0, &local_0_start); + data = fftw_alloc_real(alloc_local); + + /* @r{create plan for in-place REDFT10 x RODFT10} */ + plan = fftw_mpi_plan_r2r_2d(L, M, data, data, MPI_COMM_WORLD, + FFTW_REDFT10, FFTW_RODFT10, FFTW_MEASURE); + + /* @r{initialize data to some function} my_function(x,y) */ + for (i = 0; i < local_n0; ++i) for (j = 0; j < M; ++j) + data[i*M + j] = my_function(local_0_start + i, j); + + /* @r{compute transforms, in-place, as many times as desired} */ + fftw_execute(plan); + + fftw_destroy_plan(plan); +@end example + +@findex fftw_alloc_real +Notice that we use the same @samp{local_size} functions as we did for +complex data, only now we interpret the sizes in terms of real rather +than complex values, and correspondingly use @code{fftw_alloc_real}. + +@c ------------------------------------------------------------ +@node FFTW MPI Transposes, FFTW MPI Wisdom, Other Multi-dimensional Real-data MPI Transforms, Distributed-memory FFTW with MPI +@section FFTW MPI Transposes +@cindex transpose + +The FFTW's MPI Fourier transforms rely on one or more @emph{global +transposition} step for their communications. For example, the +multidimensional transforms work by transforming along some +dimensions, then transposing to make the first dimension local and +transforming that, then transposing back. Because global +transposition of a block-distributed matrix has many other potential +uses besides FFTs, FFTW's transpose routines can be called directly, +as documented in this section. + +@menu +* Basic distributed-transpose interface:: +* Advanced distributed-transpose interface:: +* An improved replacement for MPI_Alltoall:: +@end menu + +@node Basic distributed-transpose interface, Advanced distributed-transpose interface, FFTW MPI Transposes, FFTW MPI Transposes +@subsection Basic distributed-transpose interface + +In particular, suppose that we have an @code{n0} by @code{n1} array in +row-major order, block-distributed across the @code{n0} dimension. To +transpose this into an @code{n1} by @code{n0} array block-distributed +across the @code{n1} dimension, we would create a plan by calling the +following function: + +@example +fftw_plan fftw_mpi_plan_transpose(ptrdiff_t n0, ptrdiff_t n1, + double *in, double *out, + MPI_Comm comm, unsigned flags); +@end example +@findex fftw_mpi_plan_transpose + +The input and output arrays (@code{in} and @code{out}) can be the +same. The transpose is actually executed by calling +@code{fftw_execute} on the plan, as usual. +@findex fftw_execute + + +The @code{flags} are the usual FFTW planner flags, but support +two additional flags: @code{FFTW_MPI_TRANSPOSED_OUT} and/or +@code{FFTW_MPI_TRANSPOSED_IN}. What these flags indicate, for +transpose plans, is that the output and/or input, respectively, are +@emph{locally} transposed. That is, on each process input data is +normally stored as a @code{local_n0} by @code{n1} array in row-major +order, but for an @code{FFTW_MPI_TRANSPOSED_IN} plan the input data is +stored as @code{n1} by @code{local_n0} in row-major order. Similarly, +@code{FFTW_MPI_TRANSPOSED_OUT} means that the output is @code{n0} by +@code{local_n1} instead of @code{local_n1} by @code{n0}. +@ctindex FFTW_MPI_TRANSPOSED_OUT +@ctindex FFTW_MPI_TRANSPOSED_IN + + +To determine the local size of the array on each process before and +after the transpose, as well as the amount of storage that must be +allocated, one should call @code{fftw_mpi_local_size_2d_transposed}, +just as for a 2d DFT as described in the previous section: +@cindex data distribution + +@example +ptrdiff_t fftw_mpi_local_size_2d_transposed + (ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, + ptrdiff_t *local_n1, ptrdiff_t *local_1_start); +@end example +@findex fftw_mpi_local_size_2d_transposed + +Again, the return value is the local storage to allocate, which in +this case is the number of @emph{real} (@code{double}) values rather +than complex numbers as in the previous examples. + +@node Advanced distributed-transpose interface, An improved replacement for MPI_Alltoall, Basic distributed-transpose interface, FFTW MPI Transposes +@subsection Advanced distributed-transpose interface + +The above routines are for a transpose of a matrix of numbers (of type +@code{double}), using FFTW's default block sizes. More generally, one +can perform transposes of @emph{tuples} of numbers, with +user-specified block sizes for the input and output: + +@example +fftw_plan fftw_mpi_plan_many_transpose + (ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t howmany, + ptrdiff_t block0, ptrdiff_t block1, + double *in, double *out, MPI_Comm comm, unsigned flags); +@end example +@findex fftw_mpi_plan_many_transpose + +In this case, one is transposing an @code{n0} by @code{n1} matrix of +@code{howmany}-tuples (e.g. @code{howmany = 2} for complex numbers). +The input is distributed along the @code{n0} dimension with block size +@code{block0}, and the @code{n1} by @code{n0} output is distributed +along the @code{n1} dimension with block size @code{block1}. If +@code{FFTW_MPI_DEFAULT_BLOCK} (0) is passed for a block size then FFTW +uses its default block size. To get the local size of the data on +each process, you should then call @code{fftw_mpi_local_size_many_transposed}. +@ctindex FFTW_MPI_DEFAULT_BLOCK +@findex fftw_mpi_local_size_many_transposed + +@node An improved replacement for MPI_Alltoall, , Advanced distributed-transpose interface, FFTW MPI Transposes +@subsection An improved replacement for MPI_Alltoall + +We close this section by noting that FFTW's MPI transpose routines can +be thought of as a generalization for the @code{MPI_Alltoall} function +(albeit only for floating-point types), and in some circumstances can +function as an improved replacement. +@findex MPI_Alltoall + + +@code{MPI_Alltoall} is defined by the MPI standard as: + +@example +int MPI_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcnt, MPI_Datatype recvtype, + MPI_Comm comm); +@end example + +In particular, for @code{double*} arrays @code{in} and @code{out}, +consider the call: + +@example +MPI_Alltoall(in, howmany, MPI_DOUBLE, out, howmany MPI_DOUBLE, comm); +@end example + +This is completely equivalent to: + +@example +MPI_Comm_size(comm, &P); +plan = fftw_mpi_plan_many_transpose(P, P, howmany, 1, 1, in, out, comm, FFTW_ESTIMATE); +fftw_execute(plan); +fftw_destroy_plan(plan); +@end example + +That is, computing a @twodims{P,P} transpose on @code{P} processes, +with a block size of 1, is just a standard all-to-all communication. + +However, using the FFTW routine instead of @code{MPI_Alltoall} may +have certain advantages. First of all, FFTW's routine can operate +in-place (@code{in == out}) whereas @code{MPI_Alltoall} can only +operate out-of-place. +@cindex in-place + + +Second, even for out-of-place plans, FFTW's routine may be faster, +especially if you need to perform the all-to-all communication many +times and can afford to use @code{FFTW_MEASURE} or +@code{FFTW_PATIENT}. It should certainly be no slower, not including +the time to create the plan, since one of the possible algorithms that +FFTW uses for an out-of-place transpose @emph{is} simply to call +@code{MPI_Alltoall}. However, FFTW also considers several other +possible algorithms that, depending on your MPI implementation and +your hardware, may be faster. +@ctindex FFTW_MEASURE +@ctindex FFTW_PATIENT + +@c ------------------------------------------------------------ +@node FFTW MPI Wisdom, Avoiding MPI Deadlocks, FFTW MPI Transposes, Distributed-memory FFTW with MPI +@section FFTW MPI Wisdom +@cindex wisdom +@cindex saving plans to disk + +FFTW's ``wisdom'' facility (@pxref{Words of Wisdom-Saving Plans}) can +be used to save MPI plans as well as to save uniprocessor plans. +However, for MPI there are several unavoidable complications. + +@cindex MPI I/O +First, the MPI standard does not guarantee that every process can +perform file I/O (at least, not using C stdio routines)---in general, +we may only assume that process 0 is capable of I/O.@footnote{In fact, +even this assumption is not technically guaranteed by the standard, +although it seems to be universal in actual MPI implementations and is +widely assumed by MPI-using software. Technically, you need to query +the @code{MPI_IO} attribute of @code{MPI_COMM_WORLD} with +@code{MPI_Attr_get}. If this attribute is @code{MPI_PROC_NULL}, no +I/O is possible. If it is @code{MPI_ANY_SOURCE}, any process can +perform I/O. Otherwise, it is the rank of a process that can perform +I/O ... but since it is not guaranteed to yield the @emph{same} rank +on all processes, you have to do an @code{MPI_Allreduce} of some kind +if you want all processes to agree about which is going to do I/O. +And even then, the standard only guarantees that this process can +perform output, but not input. See e.g. @cite{Parallel Programming +with MPI} by P. S. Pacheco, section 8.1.3. Needless to say, in our +experience virtually no MPI programmers worry about this.} So, if we +want to export the wisdom from a single process to a file, we must +first export the wisdom to a string, then send it to process 0, then +write it to a file. + +Second, in principle we may want to have separate wisdom for every +process, since in general the processes may run on different hardware +even for a single MPI program. However, in practice FFTW's MPI code +is designed for the case of homogeneous hardware (@pxref{Load +balancing}), and in this case it is convenient to use the same wisdom +for every process. Thus, we need a mechanism to synchronize the wisdom. + +To address both of these problems, FFTW provides the following two +functions: + +@example +void fftw_mpi_broadcast_wisdom(MPI_Comm comm); +void fftw_mpi_gather_wisdom(MPI_Comm comm); +@end example +@findex fftw_mpi_gather_wisdom +@findex fftw_mpi_broadcast_wisdom + +Given a communicator @code{comm}, @code{fftw_mpi_broadcast_wisdom} +will broadcast the wisdom from process 0 to all other processes. +Conversely, @code{fftw_mpi_gather_wisdom} will collect wisdom from all +processes onto process 0. (If the plans created for the same problem +by different processes are not the same, @code{fftw_mpi_gather_wisdom} +will arbitrarily choose one of the plans.) Both of these functions +may result in suboptimal plans for different processes if the +processes are running on non-identical hardware. Both of these +functions are @emph{collective} calls, which means that they must be +executed by all processes in the communicator. +@cindex collective function + + +So, for example, a typical code snippet to import wisdom from a file +and use it on all processes would be: + +@example +@{ + int rank; + + fftw_mpi_init(); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rank == 0) fftw_import_wisdom_from_filename("mywisdom"); + fftw_mpi_broadcast_wisdom(MPI_COMM_WORLD); +@} +@end example + +(Note that we must call @code{fftw_mpi_init} before importing any +wisdom that might contain MPI plans.) Similarly, a typical code +snippet to export wisdom from all processes to a file is: +@findex fftw_mpi_init + +@example +@{ + int rank; + + fftw_mpi_gather_wisdom(MPI_COMM_WORLD); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rank == 0) fftw_export_wisdom_to_filename("mywisdom"); +@} +@end example + +@c ------------------------------------------------------------ +@node Avoiding MPI Deadlocks, FFTW MPI Performance Tips, FFTW MPI Wisdom, Distributed-memory FFTW with MPI +@section Avoiding MPI Deadlocks +@cindex deadlock + +An MPI program can @emph{deadlock} if one process is waiting for a +message from another process that never gets sent. To avoid deadlocks +when using FFTW's MPI routines, it is important to know which +functions are @emph{collective}: that is, which functions must +@emph{always} be called in the @emph{same order} from @emph{every} +process in a given communicator. (For example, @code{MPI_Barrier} is +the canonical example of a collective function in the MPI standard.) +@cindex collective function +@findex MPI_Barrier + + +The functions in FFTW that are @emph{always} collective are: every +function beginning with @samp{fftw_mpi_plan}, as well as +@code{fftw_mpi_broadcast_wisdom} and @code{fftw_mpi_gather_wisdom}. +Also, the following functions from the ordinary FFTW interface are +collective when they are applied to a plan created by an +@samp{fftw_mpi_plan} function: @code{fftw_execute}, +@code{fftw_destroy_plan}, and @code{fftw_flops}. +@findex fftw_execute +@findex fftw_destroy_plan +@findex fftw_flops + +@c ------------------------------------------------------------ +@node FFTW MPI Performance Tips, Combining MPI and Threads, Avoiding MPI Deadlocks, Distributed-memory FFTW with MPI +@section FFTW MPI Performance Tips + +In this section, we collect a few tips on getting the best performance +out of FFTW's MPI transforms. + +First, because of the 1d block distribution, FFTW's parallelization is +currently limited by the size of the first dimension. +(Multidimensional block distributions may be supported by a future +version.) More generally, you should ideally arrange the dimensions so +that FFTW can divide them equally among the processes. @xref{Load +balancing}. +@cindex block distribution +@cindex load balancing + + +Second, if it is not too inconvenient, you should consider working +with transposed output for multidimensional plans, as this saves a +considerable amount of communications. @xref{Transposed distributions}. +@cindex transpose + + +Third, the fastest choices are generally either an in-place transform +or an out-of-place transform with the @code{FFTW_DESTROY_INPUT} flag +(which allows the input array to be used as scratch space). In-place +is especially beneficial if the amount of data per process is large. +@ctindex FFTW_DESTROY_INPUT + + +Fourth, if you have multiple arrays to transform at once, rather than +calling FFTW's MPI transforms several times it usually seems to be +faster to interleave the data and use the advanced interface. (This +groups the communications together instead of requiring separate +messages for each transform.) + +@c ------------------------------------------------------------ +@node Combining MPI and Threads, FFTW MPI Reference, FFTW MPI Performance Tips, Distributed-memory FFTW with MPI +@section Combining MPI and Threads +@cindex threads + +In certain cases, it may be advantageous to combine MPI +(distributed-memory) and threads (shared-memory) parallelization. +FFTW supports this, with certain caveats. For example, if you have a +cluster of 4-processor shared-memory nodes, you may want to use +threads within the nodes and MPI between the nodes, instead of MPI for +all parallelization. + +In particular, it is possible to seamlessly combine the MPI FFTW +routines with the multi-threaded FFTW routines (@pxref{Multi-threaded +FFTW}). However, some care must be taken in the initialization code, +which should look something like this: + +@example +int threads_ok; + +int main(int argc, char **argv) +@{ + int provided; + MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided); + threads_ok = provided >= MPI_THREAD_FUNNELED; + + if (threads_ok) threads_ok = fftw_init_threads(); + fftw_mpi_init(); + + ... + if (threads_ok) fftw_plan_with_nthreads(...); + ... + + MPI_Finalize(); +@} +@end example +@findex fftw_mpi_init +@findex fftw_init_threads +@findex fftw_plan_with_nthreads + +First, note that instead of calling @code{MPI_Init}, you should call +@code{MPI_Init_threads}, which is the initialization routine defined +by the MPI-2 standard to indicate to MPI that your program will be +multithreaded. We pass @code{MPI_THREAD_FUNNELED}, which indicates +that we will only call MPI routines from the main thread. (FFTW will +launch additional threads internally, but the extra threads will not +call MPI code.) (You may also pass @code{MPI_THREAD_SERIALIZED} or +@code{MPI_THREAD_MULTIPLE}, which requests additional multithreading +support from the MPI implementation, but this is not required by +FFTW.) The @code{provided} parameter returns what level of threads +support is actually supported by your MPI implementation; this +@emph{must} be at least @code{MPI_THREAD_FUNNELED} if you want to call +the FFTW threads routines, so we define a global variable +@code{threads_ok} to record this. You should only call +@code{fftw_init_threads} or @code{fftw_plan_with_nthreads} if +@code{threads_ok} is true. For more information on thread safety in +MPI, see the +@uref{http://www.mpi-forum.org/docs/mpi-20-html/node162.htm, MPI and +Threads} section of the MPI-2 standard. +@cindex thread safety + + +Second, we must call @code{fftw_init_threads} @emph{before} +@code{fftw_mpi_init}. This is critical for technical reasons having +to do with how FFTW initializes its list of algorithms. + +Then, if you call @code{fftw_plan_with_nthreads(N)}, @emph{every} MPI +process will launch (up to) @code{N} threads to parallelize its transforms. + +For example, in the hypothetical cluster of 4-processor nodes, you +might wish to launch only a single MPI process per node, and then call +@code{fftw_plan_with_nthreads(4)} on each process to use all +processors in the nodes. + +This may or may not be faster than simply using as many MPI processes +as you have processors, however. On the one hand, using threads +within a node eliminates the need for explicit message passing within +the node. On the other hand, FFTW's transpose routines are not +multi-threaded, and this means that the communications that do take +place will not benefit from parallelization within the node. +Moreover, many MPI implementations already have optimizations to +exploit shared memory when it is available, so adding the +multithreaded FFTW on top of this may be superfluous. +@cindex transpose + +@c ------------------------------------------------------------ +@node FFTW MPI Reference, FFTW MPI Fortran Interface, Combining MPI and Threads, Distributed-memory FFTW with MPI +@section FFTW MPI Reference + +This chapter provides a complete reference to all FFTW MPI functions, +datatypes, and constants. See also @ref{FFTW Reference} for information +on functions and types in common with the serial interface. + +@menu +* MPI Files and Data Types:: +* MPI Initialization:: +* Using MPI Plans:: +* MPI Data Distribution Functions:: +* MPI Plan Creation:: +* MPI Wisdom Communication:: +@end menu + +@node MPI Files and Data Types, MPI Initialization, FFTW MPI Reference, FFTW MPI Reference +@subsection MPI Files and Data Types + +All programs using FFTW's MPI support should include its header file: + +@example +#include +@end example + +Note that this header file includes the serial-FFTW @code{fftw3.h} +header file, and also the @code{mpi.h} header file for MPI, so you +need not include those files separately. + +You must also link to @emph{both} the FFTW MPI library and to the +serial FFTW library. On Unix, this means adding @code{-lfftw3_mpi +-lfftw3 -lm} at the end of the link command. + +@cindex precision +Different precisions are handled as in the serial interface: +@xref{Precision}. That is, @samp{fftw_} functions become +@code{fftwf_} (in single precision) etcetera, and the libraries become +@code{-lfftw3f_mpi -lfftw3f -lm} etcetera on Unix. Long-double +precision is supported in MPI, but quad precision (@samp{fftwq_}) is +not due to the lack of MPI support for this type. + +@node MPI Initialization, Using MPI Plans, MPI Files and Data Types, FFTW MPI Reference +@subsection MPI Initialization + +Before calling any other FFTW MPI (@samp{fftw_mpi_}) function, and +before importing any wisdom for MPI problems, you must call: + +@findex fftw_mpi_init +@example +void fftw_mpi_init(void); +@end example + +@findex fftw_init_threads +If FFTW threads support is used, however, @code{fftw_mpi_init} should +be called @emph{after} @code{fftw_init_threads} (@pxref{Combining MPI +and Threads}). Calling @code{fftw_mpi_init} additional times (before +@code{fftw_mpi_cleanup}) has no effect. + + +If you want to deallocate all persistent data and reset FFTW to the +pristine state it was in when you started your program, you can call: + +@findex fftw_mpi_cleanup +@example +void fftw_mpi_cleanup(void); +@end example + +@findex fftw_cleanup +(This calls @code{fftw_cleanup}, so you need not call the serial +cleanup routine too, although it is safe to do so.) After calling +@code{fftw_mpi_cleanup}, all existing plans become undefined, and you +should not attempt to execute or destroy them. You must call +@code{fftw_mpi_init} again after @code{fftw_mpi_cleanup} if you want +to resume using the MPI FFTW routines. + +@node Using MPI Plans, MPI Data Distribution Functions, MPI Initialization, FFTW MPI Reference +@subsection Using MPI Plans + +Once an MPI plan is created, you can execute and destroy it using +@code{fftw_execute}, @code{fftw_destroy_plan}, and the other functions +in the serial interface that operate on generic plans (@pxref{Using +Plans}). + +@cindex collective function +@cindex MPI communicator +The @code{fftw_execute} and @code{fftw_destroy_plan} functions, applied to +MPI plans, are @emph{collective} calls: they must be called for all processes +in the communicator that was used to create the plan. + +@cindex new-array execution +You must @emph{not} use the serial new-array plan-execution functions +@code{fftw_execute_dft} and so on (@pxref{New-array Execute +Functions}) with MPI plans. Such functions are specialized to the +problem type, and there are specific new-array execute functions for MPI plans: + +@findex fftw_mpi_execute_dft +@findex fftw_mpi_execute_dft_r2c +@findex fftw_mpi_execute_dft_c2r +@findex fftw_mpi_execute_r2r +@example +void fftw_mpi_execute_dft(fftw_plan p, fftw_complex *in, fftw_complex *out); +void fftw_mpi_execute_dft_r2c(fftw_plan p, double *in, fftw_complex *out); +void fftw_mpi_execute_dft_c2r(fftw_plan p, fftw_complex *in, double *out); +void fftw_mpi_execute_r2r(fftw_plan p, double *in, double *out); +@end example + +@cindex alignment +@findex fftw_malloc +These functions have the same restrictions as those of the serial +new-array execute functions. They are @emph{always} safe to apply to +the @emph{same} @code{in} and @code{out} arrays that were used to +create the plan. They can only be applied to new arrarys if those +arrays have the same types, dimensions, in-placeness, and alignment as +the original arrays, where the best way to ensure the same alignment +is to use FFTW's @code{fftw_malloc} and related allocation functions +for all arrays (@pxref{Memory Allocation}). Note that distributed +transposes (@pxref{FFTW MPI Transposes}) use +@code{fftw_mpi_execute_r2r}, since they count as rank-zero r2r plans +from FFTW's perspective. + +@node MPI Data Distribution Functions, MPI Plan Creation, Using MPI Plans, FFTW MPI Reference +@subsection MPI Data Distribution Functions + +@cindex data distribution +As described above (@pxref{MPI Data Distribution}), in order to +allocate your arrays, @emph{before} creating a plan, you must first +call one of the following routines to determine the required +allocation size and the portion of the array locally stored on a given +process. The @code{MPI_Comm} communicator passed here must be +equivalent to the communicator used below for plan creation. + +The basic interface for multidimensional transforms consists of the +functions: + +@findex fftw_mpi_local_size_2d +@findex fftw_mpi_local_size_3d +@findex fftw_mpi_local_size +@findex fftw_mpi_local_size_2d_transposed +@findex fftw_mpi_local_size_3d_transposed +@findex fftw_mpi_local_size_transposed +@example +ptrdiff_t fftw_mpi_local_size_2d(ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start); +ptrdiff_t fftw_mpi_local_size_3d(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, + MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start); +ptrdiff_t fftw_mpi_local_size(int rnk, const ptrdiff_t *n, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start); + +ptrdiff_t fftw_mpi_local_size_2d_transposed(ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, + ptrdiff_t *local_n1, ptrdiff_t *local_1_start); +ptrdiff_t fftw_mpi_local_size_3d_transposed(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, + MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, + ptrdiff_t *local_n1, ptrdiff_t *local_1_start); +ptrdiff_t fftw_mpi_local_size_transposed(int rnk, const ptrdiff_t *n, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, + ptrdiff_t *local_n1, ptrdiff_t *local_1_start); +@end example + +These functions return the number of elements to allocate (complex +numbers for DFT/r2c/c2r plans, real numbers for r2r plans), whereas +the @code{local_n0} and @code{local_0_start} return the portion +(@code{local_0_start} to @code{local_0_start + local_n0 - 1}) of the +first dimension of an @ndims{} array that is stored on the local +process. @xref{Basic and advanced distribution interfaces}. For +@code{FFTW_MPI_TRANSPOSED_OUT} plans, the @samp{_transposed} variants +are useful in order to also return the local portion of the first +dimension in the @ndimstrans{} transposed output. +@xref{Transposed distributions}. +The advanced interface for multidimensional transforms is: + +@cindex advanced interface +@findex fftw_mpi_local_size_many +@findex fftw_mpi_local_size_many_transposed +@example +ptrdiff_t fftw_mpi_local_size_many(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t block0, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start); +ptrdiff_t fftw_mpi_local_size_many_transposed(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t block0, ptrdiff_t block1, MPI_Comm comm, + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, + ptrdiff_t *local_n1, ptrdiff_t *local_1_start); +@end example + +These differ from the basic interface in only two ways. First, they +allow you to specify block sizes @code{block0} and @code{block1} (the +latter for the transposed output); you can pass +@code{FFTW_MPI_DEFAULT_BLOCK} to use FFTW's default block size as in +the basic interface. Second, you can pass a @code{howmany} parameter, +corresponding to the advanced planning interface below: this is for +transforms of contiguous @code{howmany}-tuples of numbers +(@code{howmany = 1} in the basic interface). + +The corresponding basic and advanced routines for one-dimensional +transforms (currently only complex DFTs) are: + +@findex fftw_mpi_local_size_1d +@findex fftw_mpi_local_size_many_1d +@example +ptrdiff_t fftw_mpi_local_size_1d( + ptrdiff_t n0, MPI_Comm comm, int sign, unsigned flags, + ptrdiff_t *local_ni, ptrdiff_t *local_i_start, + ptrdiff_t *local_no, ptrdiff_t *local_o_start); +ptrdiff_t fftw_mpi_local_size_many_1d( + ptrdiff_t n0, ptrdiff_t howmany, + MPI_Comm comm, int sign, unsigned flags, + ptrdiff_t *local_ni, ptrdiff_t *local_i_start, + ptrdiff_t *local_no, ptrdiff_t *local_o_start); +@end example + +@ctindex FFTW_MPI_SCRAMBLED_OUT +@ctindex FFTW_MPI_SCRAMBLED_IN +As above, the return value is the number of elements to allocate +(complex numbers, for complex DFTs). The @code{local_ni} and +@code{local_i_start} arguments return the portion +(@code{local_i_start} to @code{local_i_start + local_ni - 1}) of the +1d array that is stored on this process for the transform +@emph{input}, and @code{local_no} and @code{local_o_start} are the +corresponding quantities for the input. The @code{sign} +(@code{FFTW_FORWARD} or @code{FFTW_BACKWARD}) and @code{flags} must +match the arguments passed when creating a plan. Although the inputs +and outputs have different data distributions in general, it is +guaranteed that the @emph{output} data distribution of an +@code{FFTW_FORWARD} plan will match the @emph{input} data distribution +of an @code{FFTW_BACKWARD} plan and vice versa; similarly for the +@code{FFTW_MPI_SCRAMBLED_OUT} and @code{FFTW_MPI_SCRAMBLED_IN} flags. +@xref{One-dimensional distributions}. + +@node MPI Plan Creation, MPI Wisdom Communication, MPI Data Distribution Functions, FFTW MPI Reference +@subsection MPI Plan Creation + +@subsubheading Complex-data MPI DFTs + +Plans for complex-data DFTs (@pxref{2d MPI example}) are created by: + +@findex fftw_mpi_plan_dft_1d +@findex fftw_mpi_plan_dft_2d +@findex fftw_mpi_plan_dft_3d +@findex fftw_mpi_plan_dft +@findex fftw_mpi_plan_many_dft +@example +fftw_plan fftw_mpi_plan_dft_1d(ptrdiff_t n0, fftw_complex *in, fftw_complex *out, + MPI_Comm comm, int sign, unsigned flags); +fftw_plan fftw_mpi_plan_dft_2d(ptrdiff_t n0, ptrdiff_t n1, + fftw_complex *in, fftw_complex *out, + MPI_Comm comm, int sign, unsigned flags); +fftw_plan fftw_mpi_plan_dft_3d(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, + fftw_complex *in, fftw_complex *out, + MPI_Comm comm, int sign, unsigned flags); +fftw_plan fftw_mpi_plan_dft(int rnk, const ptrdiff_t *n, + fftw_complex *in, fftw_complex *out, + MPI_Comm comm, int sign, unsigned flags); +fftw_plan fftw_mpi_plan_many_dft(int rnk, const ptrdiff_t *n, + ptrdiff_t howmany, ptrdiff_t block, ptrdiff_t tblock, + fftw_complex *in, fftw_complex *out, + MPI_Comm comm, int sign, unsigned flags); +@end example + +@cindex MPI communicator +@cindex collective function +These are similar to their serial counterparts (@pxref{Complex DFTs}) +in specifying the dimensions, sign, and flags of the transform. The +@code{comm} argument gives an MPI communicator that specifies the set +of processes to participate in the transform; plan creation is a +collective function that must be called for all processes in the +communicator. The @code{in} and @code{out} pointers refer only to a +portion of the overall transform data (@pxref{MPI Data Distribution}) +as specified by the @samp{local_size} functions in the previous +section. Unless @code{flags} contains @code{FFTW_ESTIMATE}, these +arrays are overwritten during plan creation as for the serial +interface. For multi-dimensional transforms, any dimensions @code{> +1} are supported; for one-dimensional transforms, only composite +(non-prime) @code{n0} are currently supported (unlike the serial +FFTW). Requesting an unsupported transform size will yield a +@code{NULL} plan. (As in the serial interface, highly composite sizes +generally yield the best performance.) + +@cindex advanced interface +@ctindex FFTW_MPI_DEFAULT_BLOCK +@cindex stride +The advanced-interface @code{fftw_mpi_plan_many_dft} additionally +allows you to specify the block sizes for the first dimension +(@code{block}) of the @ndims{} input data and the first dimension +(@code{tblock}) of the @ndimstrans{} transposed data (at intermediate +steps of the transform, and for the output if +@code{FFTW_TRANSPOSED_OUT} is specified in @code{flags}). These must +be the same block sizes as were passed to the corresponding +@samp{local_size} function; you can pass @code{FFTW_MPI_DEFAULT_BLOCK} +to use FFTW's default block size as in the basic interface. Also, the +@code{howmany} parameter specifies that the transform is of contiguous +@code{howmany}-tuples rather than individual complex numbers; this +corresponds to the same parameter in the serial advanced interface +(@pxref{Advanced Complex DFTs}) with @code{stride = howmany} and +@code{dist = 1}. + +@subsubheading MPI flags + +The @code{flags} can be any of those for the serial FFTW +(@pxref{Planner Flags}), and in addition may include one or more of +the following MPI-specific flags, which improve performance at the +cost of changing the output or input data formats. + +@itemize @bullet + +@item +@ctindex FFTW_MPI_SCRAMBLED_OUT +@ctindex FFTW_MPI_SCRAMBLED_IN +@code{FFTW_MPI_SCRAMBLED_OUT}, @code{FFTW_MPI_SCRAMBLED_IN}: valid for +1d transforms only, these flags indicate that the output/input of the +transform are in an undocumented ``scrambled'' order. A forward +@code{FFTW_MPI_SCRAMBLED_OUT} transform can be inverted by a backward +@code{FFTW_MPI_SCRAMBLED_IN} (times the usual 1/@i{N} normalization). +@xref{One-dimensional distributions}. + +@item +@ctindex FFTW_MPI_TRANSPOSED_OUT +@ctindex FFTW_MPI_TRANSPOSED_IN +@code{FFTW_MPI_TRANSPOSED_OUT}, @code{FFTW_MPI_TRANSPOSED_IN}: valid +for multidimensional (@code{rnk > 1}) transforms only, these flags +specify that the output or input of an @ndims{} transform is +transposed to @ndimstrans{}. @xref{Transposed distributions}. + +@end itemize + +@subsubheading Real-data MPI DFTs + +@cindex r2c +Plans for real-input/output (r2c/c2r) DFTs (@pxref{Multi-dimensional +MPI DFTs of Real Data}) are created by: + +@findex fftw_mpi_plan_dft_r2c_2d +@findex fftw_mpi_plan_dft_r2c_2d +@findex fftw_mpi_plan_dft_r2c_3d +@findex fftw_mpi_plan_dft_r2c +@findex fftw_mpi_plan_dft_c2r_2d +@findex fftw_mpi_plan_dft_c2r_2d +@findex fftw_mpi_plan_dft_c2r_3d +@findex fftw_mpi_plan_dft_c2r +@example +fftw_plan fftw_mpi_plan_dft_r2c_2d(ptrdiff_t n0, ptrdiff_t n1, + double *in, fftw_complex *out, + MPI_Comm comm, unsigned flags); +fftw_plan fftw_mpi_plan_dft_r2c_2d(ptrdiff_t n0, ptrdiff_t n1, + double *in, fftw_complex *out, + MPI_Comm comm, unsigned flags); +fftw_plan fftw_mpi_plan_dft_r2c_3d(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, + double *in, fftw_complex *out, + MPI_Comm comm, unsigned flags); +fftw_plan fftw_mpi_plan_dft_r2c(int rnk, const ptrdiff_t *n, + double *in, fftw_complex *out, + MPI_Comm comm, unsigned flags); +fftw_plan fftw_mpi_plan_dft_c2r_2d(ptrdiff_t n0, ptrdiff_t n1, + fftw_complex *in, double *out, + MPI_Comm comm, unsigned flags); +fftw_plan fftw_mpi_plan_dft_c2r_2d(ptrdiff_t n0, ptrdiff_t n1, + fftw_complex *in, double *out, + MPI_Comm comm, unsigned flags); +fftw_plan fftw_mpi_plan_dft_c2r_3d(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, + fftw_complex *in, double *out, + MPI_Comm comm, unsigned flags); +fftw_plan fftw_mpi_plan_dft_c2r(int rnk, const ptrdiff_t *n, + fftw_complex *in, double *out, + MPI_Comm comm, unsigned flags); +@end example + +Similar to the serial interface (@pxref{Real-data DFTs}), these +transform logically @ndims{} real data to/from @ndimshalf{} complex +data, representing the non-redundant half of the conjugate-symmetry +output of a real-input DFT (@pxref{Multi-dimensional Transforms}). +However, the real array must be stored within a padded @ndimspad{} +array (much like the in-place serial r2c transforms, but here for +out-of-place transforms as well). Currently, only multi-dimensional +(@code{rnk > 1}) r2c/c2r transforms are supported (requesting a plan +for @code{rnk = 1} will yield @code{NULL}). As explained above +(@pxref{Multi-dimensional MPI DFTs of Real Data}), the data +distribution of both the real and complex arrays is given by the +@samp{local_size} function called for the dimensions of the +@emph{complex} array. Similar to the other planning functions, the +input and output arrays are overwritten when the plan is created +except in @code{FFTW_ESTIMATE} mode. + +As for the complex DFTs above, there is an advance interface that +allows you to manually specify block sizes and to transform contiguous +@code{howmany}-tuples of real/complex numbers: + +@findex fftw_mpi_plan_many_dft_r2c +@findex fftw_mpi_plan_many_dft_c2r +@example +fftw_plan fftw_mpi_plan_many_dft_r2c + (int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t iblock, ptrdiff_t oblock, + double *in, fftw_complex *out, + MPI_Comm comm, unsigned flags); +fftw_plan fftw_mpi_plan_many_dft_c2r + (int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t iblock, ptrdiff_t oblock, + fftw_complex *in, double *out, + MPI_Comm comm, unsigned flags); +@end example + +@subsubheading MPI r2r transforms + +@cindex r2r +There are corresponding plan-creation routines for r2r +transforms (@pxref{More DFTs of Real Data}), currently supporting +multidimensional (@code{rnk > 1}) transforms only (@code{rnk = 1} will +yield a @code{NULL} plan): + +@example +fftw_plan fftw_mpi_plan_r2r_2d(ptrdiff_t n0, ptrdiff_t n1, + double *in, double *out, + MPI_Comm comm, + fftw_r2r_kind kind0, fftw_r2r_kind kind1, + unsigned flags); +fftw_plan fftw_mpi_plan_r2r_3d(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, + double *in, double *out, + MPI_Comm comm, + fftw_r2r_kind kind0, fftw_r2r_kind kind1, fftw_r2r_kind kind2, + unsigned flags); +fftw_plan fftw_mpi_plan_r2r(int rnk, const ptrdiff_t *n, + double *in, double *out, + MPI_Comm comm, const fftw_r2r_kind *kind, + unsigned flags); +fftw_plan fftw_mpi_plan_many_r2r(int rnk, const ptrdiff_t *n, + ptrdiff_t iblock, ptrdiff_t oblock, + double *in, double *out, + MPI_Comm comm, const fftw_r2r_kind *kind, + unsigned flags); +@end example + +The parameters are much the same as for the complex DFTs above, except +that the arrays are of real numbers (and hence the outputs of the +@samp{local_size} data-distribution functions should be interpreted as +counts of real rather than complex numbers). Also, the @code{kind} +parameters specify the r2r kinds along each dimension as for the +serial interface (@pxref{Real-to-Real Transform Kinds}). @xref{Other +Multi-dimensional Real-data MPI Transforms}. + +@subsubheading MPI transposition +@cindex transpose + +FFTW also provides routines to plan a transpose of a distributed +@code{n0} by @code{n1} array of real numbers, or an array of +@code{howmany}-tuples of real numbers with specified block sizes +(@pxref{FFTW MPI Transposes}): + +@findex fftw_mpi_plan_transpose +@findex fftw_mpi_plan_many_transpose +@example +fftw_plan fftw_mpi_plan_transpose(ptrdiff_t n0, ptrdiff_t n1, + double *in, double *out, + MPI_Comm comm, unsigned flags); +fftw_plan fftw_mpi_plan_many_transpose + (ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t howmany, + ptrdiff_t block0, ptrdiff_t block1, + double *in, double *out, MPI_Comm comm, unsigned flags); +@end example + +@cindex new-array execution +@findex fftw_mpi_execute_r2r +These plans are used with the @code{fftw_mpi_execute_r2r} new-array +execute function (@pxref{Using MPI Plans }), since they count as (rank +zero) r2r plans from FFTW's perspective. + +@node MPI Wisdom Communication, , MPI Plan Creation, FFTW MPI Reference +@subsection MPI Wisdom Communication + +To facilitate synchronizing wisdom among the different MPI processes, +we provide two functions: + +@findex fftw_mpi_gather_wisdom +@findex fftw_mpi_broadcast_wisdom +@example +void fftw_mpi_gather_wisdom(MPI_Comm comm); +void fftw_mpi_broadcast_wisdom(MPI_Comm comm); +@end example + +The @code{fftw_mpi_gather_wisdom} function gathers all wisdom in the +given communicator @code{comm} to the process of rank 0 in the +communicator: that process obtains the union of all wisdom on all the +processes. As a side effect, some other processes will gain +additional wisdom from other processes, but only process 0 will gain +the complete union. + +The @code{fftw_mpi_broadcast_wisdom} does the reverse: it exports +wisdom from process 0 in @code{comm} to all other processes in the +communicator, replacing any wisdom they currently have. + +@xref{FFTW MPI Wisdom}. + +@c ------------------------------------------------------------ +@node FFTW MPI Fortran Interface, , FFTW MPI Reference, Distributed-memory FFTW with MPI +@section FFTW MPI Fortran Interface +@cindex Fortran interface + +@cindex iso_c_binding +The FFTW MPI interface is callable from modern Fortran compilers +supporting the Fortran 2003 @code{iso_c_binding} standard for calling +C functions. As described in @ref{Calling FFTW from Modern Fortran}, +this means that you can directly call FFTW's C interface from Fortran +with only minor changes in syntax. There are, however, a few things +specific to the MPI interface to keep in mind: + +@itemize @bullet + +@item +Instead of including @code{fftw3.f03} as in @ref{Overview of Fortran +interface }, you should @code{include 'fftw3-mpi.f03'} (after +@code{use, intrinsic :: iso_c_binding} as before). The +@code{fftw3-mpi.f03} file includes @code{fftw3.f03}, so you should +@emph{not} @code{include} them both yourself. (You will also want to +include the MPI header file, usually via @code{include 'mpif.h'} or +similar, although though this is not needed by @code{fftw3-mpi.f03} +@i{per se}.) (To use the @samp{fftwl_} @code{long double} extended-precision routines in supporting compilers, you should include @code{fftw3f-mpi.f03} in @emph{addition} to @code{fftw3-mpi.f03}. @xref{Extended and quadruple precision in Fortran}.) + +@item +Because of the different storage conventions between C and Fortran, +you reverse the order of your array dimensions when passing them to +FFTW (@pxref{Reversing array dimensions}). This is merely a +difference in notation and incurs no performance overhead. However, +it means that, whereas in C the @emph{first} dimension is distributed, +in Fortran the @emph{last} dimension of your array is distributed. + +@item +@cindex MPI communicator +In Fortran, communicators are stored as @code{integer} types; there is +no @code{MPI_Comm} type, nor is there any way to access a C +@code{MPI_Comm}. Fortunately, this is taken care of for you by the +FFTW Fortran interface: whenever the C interface expects an +@code{MPI_Comm} type, you should pass the Fortran communicator as an +@code{integer}.@footnote{Technically, this is because you aren't +actually calling the C functions directly. You are calling wrapper +functions that translate the communicator with @code{MPI_Comm_f2c} +before calling the ordinary C interface. This is all done +transparently, however, since the @code{fftw3-mpi.f03} interface file +renames the wrappers so that they are called in Fortran with the same +names as the C interface functions.} + +@item +Because you need to call the @samp{local_size} function to find out +how much space to allocate, and this may be @emph{larger} than the +local portion of the array (@pxref{MPI Data Distribution}), you should +@emph{always} allocate your arrays dynamically using FFTW's allocation +routines as described in @ref{Allocating aligned memory in Fortran}. +(Coincidentally, this also provides the best performance by +guaranteeding proper data alignment.) + +@item +Because all sizes in the MPI FFTW interface are declared as +@code{ptrdiff_t} in C, you should use @code{integer(C_INTPTR_T)} in +Fortran (@pxref{FFTW Fortran type reference}). + +@item +@findex fftw_execute_dft +@findex fftw_mpi_execute_dft +@cindex new-array execution +In Fortran, because of the language semantics, we generally recommend +using the new-array execute functions for all plans, even in the +common case where you are executing the plan on the same arrays for +which the plan was created (@pxref{Plan execution in Fortran}). +However, note that in the MPI interface these functions are changed: +@code{fftw_execute_dft} becomes @code{fftw_mpi_execute_dft}, +etcetera. @xref{Using MPI Plans}. + +@end itemize + +For example, here is a Fortran code snippet to perform a distributed +@twodims{L,M} complex DFT in-place. (This assumes you have already +initialized MPI with @code{MPI_init} and have also performed +@code{call fftw_mpi_init}.) + +@example + use, intrinsic :: iso_c_binding + include 'fftw3-mpi.f03' + integer(C_INTPTR_T), parameter :: L = ... + integer(C_INTPTR_T), parameter :: M = ... + type(C_PTR) :: plan, cdata + complex(C_DOUBLE_COMPLEX), pointer :: data(:,:) + integer(C_INTPTR_T) :: i, j, alloc_local, local_M, local_j_offset + +! @r{get local data size and allocate (note dimension reversal)} + alloc_local = fftw_mpi_local_size_2d(M, L, MPI_COMM_WORLD, & + local_M, local_j_offset) + cdata = fftw_alloc_complex(alloc_local) + call c_f_pointer(cdata, data, [L,local_M]) + +! @r{create MPI plan for in-place forward DFT (note dimension reversal)} + plan = fftw_mpi_plan_dft_2d(M, L, data, data, MPI_COMM_WORLD, & + FFTW_FORWARD, FFTW_MEASURE) + +! @r{initialize data to some function} my_function(i,j) + do j = 1, local_M + do i = 1, L + data(i, j) = my_function(i, j + local_j_offset) + end do + end do + +! @r{compute transform (as many times as desired)} + call fftw_mpi_execute_dft(plan, data, data) + + call fftw_destroy_plan(plan) + call fftw_free(cdata) +@end example + +Note that when we called @code{fftw_mpi_local_size_2d} and +@code{fftw_mpi_plan_dft_2d} with the dimensions in reversed order, +since a @twodims{L,M} Fortran array is viewed by FFTW in C as a +@twodims{M, L} array. This means that the array was distributed over +the @code{M} dimension, the local portion of which is a +@twodims{L,local_M} array in Fortran. (You must @emph{not} use an +@code{allocate} statement to allocate an @twodims{L,local_M} array, +however; you must allocate @code{alloc_local} complex numbers, which +may be greater than @code{L * local_M}, in order to reserve space for +intermediate steps of the transform.) Finally, we mention that +because C's array indices are zero-based, the @code{local_j_offset} +argument can conveniently be interpreted as an offset in the 1-based +@code{j} index (rather than as a starting index as in C). + +If instead you had used the @code{ior(FFTW_MEASURE, +FFTW_MPI_TRANSPOSED_OUT)} flag, the output of the transform would be a +transposed @twodims{M,local_L} array, associated with the @emph{same} +@code{cdata} allocation (since the transform is in-place), and which +you could declare with: + +@example + complex(C_DOUBLE_COMPLEX), pointer :: tdata(:,:) + ... + call c_f_pointer(cdata, tdata, [M,local_L]) +@end example + +where @code{local_L} would have been obtained by changing the +@code{fftw_mpi_local_size_2d} call to: + +@example + alloc_local = fftw_mpi_local_size_2d_transposed(M, L, MPI_COMM_WORLD, & + local_M, local_j_offset, local_L, local_i_offset) +@end example diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/other.texi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/other.texi Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,398 @@ +@node Other Important Topics, FFTW Reference, Tutorial, Top +@chapter Other Important Topics +@menu +* SIMD alignment and fftw_malloc:: +* Multi-dimensional Array Format:: +* Words of Wisdom-Saving Plans:: +* Caveats in Using Wisdom:: +@end menu + +@c ------------------------------------------------------------ +@node SIMD alignment and fftw_malloc, Multi-dimensional Array Format, Other Important Topics, Other Important Topics +@section SIMD alignment and fftw_malloc + +SIMD, which stands for ``Single Instruction Multiple Data,'' is a set of +special operations supported by some processors to perform a single +operation on several numbers (usually 2 or 4) simultaneously. SIMD +floating-point instructions are available on several popular CPUs: +SSE/SSE2/AVX on recent x86/x86-64 processors, AltiVec (single precision) +on some PowerPCs (Apple G4 and higher), NEON on some ARM models, and MIPS Paired Single +(currently only in FFTW 3.2.x). FFTW can be compiled to support the +SIMD instructions on any of these systems. +@cindex SIMD +@cindex SSE +@cindex SSE2 +@cindex AVX +@cindex AltiVec +@cindex MIPS PS +@cindex precision + + +A program linking to an FFTW library compiled with SIMD support can +obtain a nonnegligible speedup for most complex and r2c/c2r +transforms. In order to obtain this speedup, however, the arrays of +complex (or real) data passed to FFTW must be specially aligned in +memory (typically 16-byte aligned), and often this alignment is more +stringent than that provided by the usual @code{malloc} (etc.) +allocation routines. + +@cindex portability +In order to guarantee proper alignment for SIMD, therefore, in case +your program is ever linked against a SIMD-using FFTW, we recommend +allocating your transform data with @code{fftw_malloc} and +de-allocating it with @code{fftw_free}. +@findex fftw_malloc +@findex fftw_free +These have exactly the same interface and behavior as +@code{malloc}/@code{free}, except that for a SIMD FFTW they ensure +that the returned pointer has the necessary alignment (by calling +@code{memalign} or its equivalent on your OS). + +You are not @emph{required} to use @code{fftw_malloc}. You can +allocate your data in any way that you like, from @code{malloc} to +@code{new} (in C++) to a fixed-size array declaration. If the array +happens not to be properly aligned, FFTW will not use the SIMD +extensions. +@cindex C++ + +@findex fftw_alloc_real +@findex fftw_alloc_complex +Since @code{fftw_malloc} only ever needs to be used for real and +complex arrays, we provide two convenient wrapper routines +@code{fftw_alloc_real(N)} and @code{fftw_alloc_complex(N)} that are +equivalent to @code{(double*)fftw_malloc(sizeof(double) * N)} and +@code{(fftw_complex*)fftw_malloc(sizeof(fftw_complex) * N)}, +respectively (or their equivalents in other precisions). + +@c ------------------------------------------------------------ +@node Multi-dimensional Array Format, Words of Wisdom-Saving Plans, SIMD alignment and fftw_malloc, Other Important Topics +@section Multi-dimensional Array Format + +This section describes the format in which multi-dimensional arrays +are stored in FFTW. We felt that a detailed discussion of this topic +was necessary. Since several different formats are common, this topic +is often a source of confusion. + +@menu +* Row-major Format:: +* Column-major Format:: +* Fixed-size Arrays in C:: +* Dynamic Arrays in C:: +* Dynamic Arrays in C-The Wrong Way:: +@end menu + +@c =========> +@node Row-major Format, Column-major Format, Multi-dimensional Array Format, Multi-dimensional Array Format +@subsection Row-major Format +@cindex row-major + +The multi-dimensional arrays passed to @code{fftw_plan_dft} etcetera +are expected to be stored as a single contiguous block in +@dfn{row-major} order (sometimes called ``C order''). Basically, this +means that as you step through adjacent memory locations, the first +dimension's index varies most slowly and the last dimension's index +varies most quickly. + +To be more explicit, let us consider an array of rank @math{d} whose +dimensions are @ndims{}. Now, we specify a location in the array by a +sequence of @math{d} (zero-based) indices, one for each dimension: +@tex +$(i_0, i_1, i_2, \ldots, i_{d-1})$. +@end tex +@ifinfo +(i[0], i[1], ..., i[d-1]). +@end ifinfo +@html +(i0, i1, i2,..., id-1). +@end html +If the array is stored in row-major +order, then this element is located at the position +@tex +$i_{d-1} + n_{d-1} (i_{d-2} + n_{d-2} (\ldots + n_1 i_0))$. +@end tex +@ifinfo +i[d-1] + n[d-1] * (i[d-2] + n[d-2] * (... + n[1] * i[0])). +@end ifinfo +@html +id-1 + nd-1 * (id-2 + nd-2 * (... + n1 * i0)). +@end html + +Note that, for the ordinary complex DFT, each element of the array +must be of type @code{fftw_complex}; i.e. a (real, imaginary) pair of +(double-precision) numbers. + +In the advanced FFTW interface, the physical dimensions @math{n} from +which the indices are computed can be different from (larger than) +the logical dimensions of the transform to be computed, in order to +transform a subset of a larger array. +@cindex advanced interface +Note also that, in the advanced interface, the expression above is +multiplied by a @dfn{stride} to get the actual array index---this is +useful in situations where each element of the multi-dimensional array +is actually a data structure (or another array), and you just want to +transform a single field. In the basic interface, however, the stride +is 1. +@cindex stride + +@c =========> +@node Column-major Format, Fixed-size Arrays in C, Row-major Format, Multi-dimensional Array Format +@subsection Column-major Format +@cindex column-major + +Readers from the Fortran world are used to arrays stored in +@dfn{column-major} order (sometimes called ``Fortran order''). This is +essentially the exact opposite of row-major order in that, here, the +@emph{first} dimension's index varies most quickly. + +If you have an array stored in column-major order and wish to +transform it using FFTW, it is quite easy to do. When creating the +plan, simply pass the dimensions of the array to the planner in +@emph{reverse order}. For example, if your array is a rank three +@code{N x M x L} matrix in column-major order, you should pass the +dimensions of the array as if it were an @code{L x M x N} matrix +(which it is, from the perspective of FFTW). This is done for you +@emph{automatically} by the FFTW legacy-Fortran interface +(@pxref{Calling FFTW from Legacy Fortran}), but you must do it +manually with the modern Fortran interface (@pxref{Reversing array +dimensions}). +@cindex Fortran interface + +@c =========> +@node Fixed-size Arrays in C, Dynamic Arrays in C, Column-major Format, Multi-dimensional Array Format +@subsection Fixed-size Arrays in C +@cindex C multi-dimensional arrays + +A multi-dimensional array whose size is declared at compile time in C +is @emph{already} in row-major order. You don't have to do anything +special to transform it. For example: + +@example +@{ + fftw_complex data[N0][N1][N2]; + fftw_plan plan; + ... + plan = fftw_plan_dft_3d(N0, N1, N2, &data[0][0][0], &data[0][0][0], + FFTW_FORWARD, FFTW_ESTIMATE); + ... +@} +@end example + +This will plan a 3d in-place transform of size @code{N0 x N1 x N2}. +Notice how we took the address of the zero-th element to pass to the +planner (we could also have used a typecast). + +However, we tend to @emph{discourage} users from declaring their +arrays in this way, for two reasons. First, this allocates the array +on the stack (``automatic'' storage), which has a very limited size on +most operating systems (declaring an array with more than a few +thousand elements will often cause a crash). (You can get around this +limitation on many systems by declaring the array as +@code{static} and/or global, but that has its own drawbacks.) +Second, it may not optimally align the array for use with a SIMD +FFTW (@pxref{SIMD alignment and fftw_malloc}). Instead, we recommend +using @code{fftw_malloc}, as described below. + +@c =========> +@node Dynamic Arrays in C, Dynamic Arrays in C-The Wrong Way, Fixed-size Arrays in C, Multi-dimensional Array Format +@subsection Dynamic Arrays in C + +We recommend allocating most arrays dynamically, with +@code{fftw_malloc}. This isn't too hard to do, although it is not as +straightforward for multi-dimensional arrays as it is for +one-dimensional arrays. + +Creating the array is simple: using a dynamic-allocation routine like +@code{fftw_malloc}, allocate an array big enough to store N +@code{fftw_complex} values (for a complex DFT), where N is the product +of the sizes of the array dimensions (i.e. the total number of complex +values in the array). For example, here is code to allocate a +@threedims{5,12,27} rank-3 array: +@findex fftw_malloc + +@example +fftw_complex *an_array; +an_array = (fftw_complex*) fftw_malloc(5*12*27 * sizeof(fftw_complex)); +@end example + +Accessing the array elements, however, is more tricky---you can't +simply use multiple applications of the @samp{[]} operator like you +could for fixed-size arrays. Instead, you have to explicitly compute +the offset into the array using the formula given earlier for +row-major arrays. For example, to reference the @math{(i,j,k)}-th +element of the array allocated above, you would use the expression +@code{an_array[k + 27 * (j + 12 * i)]}. + +This pain can be alleviated somewhat by defining appropriate macros, +or, in C++, creating a class and overloading the @samp{()} operator. +The recent C99 standard provides a way to reinterpret the dynamic +array as a ``variable-length'' multi-dimensional array amenable to +@samp{[]}, but this feature is not yet widely supported by compilers. +@cindex C99 +@cindex C++ + +@c =========> +@node Dynamic Arrays in C-The Wrong Way, , Dynamic Arrays in C, Multi-dimensional Array Format +@subsection Dynamic Arrays in C---The Wrong Way + +A different method for allocating multi-dimensional arrays in C is +often suggested that is incompatible with FFTW: @emph{using it will +cause FFTW to die a painful death}. We discuss the technique here, +however, because it is so commonly known and used. This method is to +create arrays of pointers of arrays of pointers of @dots{}etcetera. +For example, the analogue in this method to the example above is: + +@example +int i,j; +fftw_complex ***a_bad_array; /* @r{another way to make a 5x12x27 array} */ + +a_bad_array = (fftw_complex ***) malloc(5 * sizeof(fftw_complex **)); +for (i = 0; i < 5; ++i) @{ + a_bad_array[i] = + (fftw_complex **) malloc(12 * sizeof(fftw_complex *)); + for (j = 0; j < 12; ++j) + a_bad_array[i][j] = + (fftw_complex *) malloc(27 * sizeof(fftw_complex)); +@} +@end example + +As you can see, this sort of array is inconvenient to allocate (and +deallocate). On the other hand, it has the advantage that the +@math{(i,j,k)}-th element can be referenced simply by +@code{a_bad_array[i][j][k]}. + +If you like this technique and want to maximize convenience in accessing +the array, but still want to pass the array to FFTW, you can use a +hybrid method. Allocate the array as one contiguous block, but also +declare an array of arrays of pointers that point to appropriate places +in the block. That sort of trick is beyond the scope of this +documentation; for more information on multi-dimensional arrays in C, +see the @code{comp.lang.c} +@uref{http://c-faq.com/aryptr/dynmuldimary.html, FAQ}. + +@c ------------------------------------------------------------ +@node Words of Wisdom-Saving Plans, Caveats in Using Wisdom, Multi-dimensional Array Format, Other Important Topics +@section Words of Wisdom---Saving Plans +@cindex wisdom +@cindex saving plans to disk + +FFTW implements a method for saving plans to disk and restoring them. +In fact, what FFTW does is more general than just saving and loading +plans. The mechanism is called @dfn{wisdom}. Here, we describe +this feature at a high level. @xref{FFTW Reference}, for a less casual +but more complete discussion of how to use wisdom in FFTW. + +Plans created with the @code{FFTW_MEASURE}, @code{FFTW_PATIENT}, or +@code{FFTW_EXHAUSTIVE} options produce near-optimal FFT performance, +but may require a long time to compute because FFTW must measure the +runtime of many possible plans and select the best one. This setup is +designed for the situations where so many transforms of the same size +must be computed that the start-up time is irrelevant. For short +initialization times, but slower transforms, we have provided +@code{FFTW_ESTIMATE}. The @code{wisdom} mechanism is a way to get the +best of both worlds: you compute a good plan once, save it to +disk, and later reload it as many times as necessary. The wisdom +mechanism can actually save and reload many plans at once, not just +one. +@ctindex FFTW_MEASURE +@ctindex FFTW_PATIENT +@ctindex FFTW_EXHAUSTIVE +@ctindex FFTW_ESTIMATE + + +Whenever you create a plan, the FFTW planner accumulates wisdom, which +is information sufficient to reconstruct the plan. After planning, +you can save this information to disk by means of the function: +@example +int fftw_export_wisdom_to_filename(const char *filename); +@end example +@findex fftw_export_wisdom_to_filename +(This function returns non-zero on success.) + +The next time you run the program, you can restore the wisdom with +@code{fftw_import_wisdom_from_filename} (which also returns non-zero on success), +and then recreate the plan using the same flags as before. +@example +int fftw_import_wisdom_from_filename(const char *filename); +@end example +@findex fftw_import_wisdom_from_filename + +Wisdom is automatically used for any size to which it is applicable, as +long as the planner flags are not more ``patient'' than those with which +the wisdom was created. For example, wisdom created with +@code{FFTW_MEASURE} can be used if you later plan with +@code{FFTW_ESTIMATE} or @code{FFTW_MEASURE}, but not with +@code{FFTW_PATIENT}. + +The @code{wisdom} is cumulative, and is stored in a global, private +data structure managed internally by FFTW. The storage space required +is minimal, proportional to the logarithm of the sizes the wisdom was +generated from. If memory usage is a concern, however, the wisdom can +be forgotten and its associated memory freed by calling: +@example +void fftw_forget_wisdom(void); +@end example +@findex fftw_forget_wisdom + +Wisdom can be exported to a file, a string, or any other medium. +For details, see @ref{Wisdom}. + +@node Caveats in Using Wisdom, , Words of Wisdom-Saving Plans, Other Important Topics +@section Caveats in Using Wisdom +@cindex wisdom, problems with + +@quotation +@html + +@end html +For in much wisdom is much grief, and he that increaseth knowledge +increaseth sorrow. +@html + +@end html +[Ecclesiastes 1:18] +@cindex Ecclesiastes +@end quotation +@iftex +@medskip +@end iftex + +@cindex portability +There are pitfalls to using wisdom, in that it can negate FFTW's +ability to adapt to changing hardware and other conditions. For +example, it would be perfectly possible to export wisdom from a +program running on one processor and import it into a program running +on another processor. Doing so, however, would mean that the second +program would use plans optimized for the first processor, instead of +the one it is running on. + +It should be safe to reuse wisdom as long as the hardware and program +binaries remain unchanged. (Actually, the optimal plan may change even +between runs of the same binary on identical hardware, due to +differences in the virtual memory environment, etcetera. Users +seriously interested in performance should worry about this problem, +too.) It is likely that, if the same wisdom is used for two +different program binaries, even running on the same machine, the +plans may be sub-optimal because of differing code alignments. It is +therefore wise to recreate wisdom every time an application is +recompiled. The more the underlying hardware and software changes +between the creation of wisdom and its use, the greater grows +the risk of sub-optimal plans. + +Nevertheless, if the choice is between using @code{FFTW_ESTIMATE} or +using possibly-suboptimal wisdom (created on the same machine, but for a +different binary), the wisdom is likely to be better. For this reason, +we provide a function to import wisdom from a standard system-wide +location (@code{/etc/fftw/wisdom} on Unix): +@cindex wisdom, system-wide + +@example +int fftw_import_system_wisdom(void); +@end example +@findex fftw_import_system_wisdom + +FFTW also provides a standalone program, @code{fftw-wisdom} (described +by its own @code{man} page on Unix) with which users can create wisdom, +e.g. for a canonical set of sizes to store in the system wisdom file. +@xref{Wisdom Utilities}. +@cindex fftw-wisdom utility + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/reference.texi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/reference.texi Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2454 @@ +@node FFTW Reference, Multi-threaded FFTW, Other Important Topics, Top +@chapter FFTW Reference + +This chapter provides a complete reference for all sequential (i.e., +one-processor) FFTW functions. Parallel transforms are described in +later chapters. + +@menu +* Data Types and Files:: +* Using Plans:: +* Basic Interface:: +* Advanced Interface:: +* Guru Interface:: +* New-array Execute Functions:: +* Wisdom:: +* What FFTW Really Computes:: +@end menu + +@c ------------------------------------------------------------ +@node Data Types and Files, Using Plans, FFTW Reference, FFTW Reference +@section Data Types and Files + +All programs using FFTW should include its header file: + +@example +#include +@end example + +You must also link to the FFTW library. On Unix, this +means adding @code{-lfftw3 -lm} at the @emph{end} of the link command. + +@menu +* Complex numbers:: +* Precision:: +* Memory Allocation:: +@end menu + +@c =========> +@node Complex numbers, Precision, Data Types and Files, Data Types and Files +@subsection Complex numbers + +The default FFTW interface uses @code{double} precision for all +floating-point numbers, and defines a @code{fftw_complex} type to hold +complex numbers as: + +@example +typedef double fftw_complex[2]; +@end example +@tindex fftw_complex + +Here, the @code{[0]} element holds the real part and the @code{[1]} +element holds the imaginary part. + +Alternatively, if you have a C compiler (such as @code{gcc}) that +supports the C99 revision of the ANSI C standard, you can use C's new +native complex type (which is binary-compatible with the typedef above). +In particular, if you @code{#include } @emph{before} +@code{}, then @code{fftw_complex} is defined to be the native +complex type and you can manipulate it with ordinary arithmetic +(e.g. @code{x = y * (3+4*I)}, where @code{x} and @code{y} are +@code{fftw_complex} and @code{I} is the standard symbol for the +imaginary unit); +@cindex C99 + + +C++ has its own @code{complex} template class, defined in the +standard @code{} header file. Reportedly, the C++ standards +committee has recently agreed to mandate that the storage format used +for this type be binary-compatible with the C99 type, i.e. an array +@code{T[2]} with consecutive real @code{[0]} and imaginary @code{[1]} +parts. (See report +@uref{http://www.open-std.org/jtc1/sc22/WG21/docs/papers/2002/n1388.pdf +WG21/N1388}.) Although not part of the official standard as of this +writing, the proposal stated that: ``This solution has been tested with +all current major implementations of the standard library and shown to +be working.'' To the extent that this is true, if you have a variable +@code{complex *x}, you can pass it directly to FFTW via +@code{reinterpret_cast(x)}. +@cindex C++ +@cindex portability + +@c =========> +@node Precision, Memory Allocation, Complex numbers, Data Types and Files +@subsection Precision +@cindex precision + +You can install single and long-double precision versions of FFTW, +which replace @code{double} with @code{float} and @code{long double}, +respectively (@pxref{Installation and Customization}). To use these +interfaces, you: + +@itemize @bullet + +@item +Link to the single/long-double libraries; on Unix, @code{-lfftw3f} or +@code{-lfftw3l} instead of (or in addition to) @code{-lfftw3}. (You +can link to the different-precision libraries simultaneously.) + +@item +Include the @emph{same} @code{} header file. + +@item +Replace all lowercase instances of @samp{fftw_} with @samp{fftwf_} or +@samp{fftwl_} for single or long-double precision, respectively. +(@code{fftw_complex} becomes @code{fftwf_complex}, @code{fftw_execute} +becomes @code{fftwf_execute}, etcetera.) + +@item +Uppercase names, i.e. names beginning with @samp{FFTW_}, remain the +same. + +@item +Replace @code{double} with @code{float} or @code{long double} for +subroutine parameters. + +@end itemize + +Depending upon your compiler and/or hardware, @code{long double} may not +be any more precise than @code{double} (or may not be supported at all, +although it is standard in C99). +@cindex C99 + + +We also support using the nonstandard @code{__float128} +quadruple-precision type provided by recent versions of @code{gcc} on +32- and 64-bit x86 hardware (@pxref{Installation and Customization}). +To use this type, link with @code{-lfftw3q -lquadmath -lm} (the +@code{libquadmath} library provided by @code{gcc} is needed for +quadruple-precision trigonometric functions) and use @samp{fftwq_} +identifiers. + +@c =========> +@node Memory Allocation, , Precision, Data Types and Files +@subsection Memory Allocation + +@example +void *fftw_malloc(size_t n); +void fftw_free(void *p); +@end example +@findex fftw_malloc +@findex fftw_free + +These are functions that behave identically to @code{malloc} and +@code{free}, except that they guarantee that the returned pointer obeys +any special alignment restrictions imposed by any algorithm in FFTW +(e.g. for SIMD acceleration). @xref{SIMD alignment and fftw_malloc}. +@cindex alignment + + +Data allocated by @code{fftw_malloc} @emph{must} be deallocated by +@code{fftw_free} and not by the ordinary @code{free}. + +These routines simply call through to your operating system's +@code{malloc} or, if necessary, its aligned equivalent +(e.g. @code{memalign}), so you normally need not worry about any +significant time or space overhead. You are @emph{not required} to use +them to allocate your data, but we strongly recommend it. + +Note: in C++, just as with ordinary @code{malloc}, you must typecast +the output of @code{fftw_malloc} to whatever pointer type you are +allocating. +@cindex C++ + + +We also provide the following two convenience functions to allocate +real and complex arrays with @code{n} elements, which are equivalent +to @code{(double *) fftw_malloc(sizeof(double) * n)} and +@code{(fftw_complex *) fftw_malloc(sizeof(fftw_complex) * n)}, +respectively: + +@example +double *fftw_alloc_real(size_t n); +fftw_complex *fftw_alloc_complex(size_t n); +@end example +@findex fftw_alloc_real +@findex fftw_alloc_complex + +The equivalent functions in other precisions allocate arrays of @code{n} +elements in that precision. e.g. @code{fftwf_alloc_real(n)} is +equivalent to @code{(float *) fftwf_malloc(sizeof(float) * n)}. +@cindex precision + +@c ------------------------------------------------------------ +@node Using Plans, Basic Interface, Data Types and Files, FFTW Reference +@section Using Plans + +Plans for all transform types in FFTW are stored as type +@code{fftw_plan} (an opaque pointer type), and are created by one of the +various planning routines described in the following sections. +@tindex fftw_plan +An @code{fftw_plan} contains all information necessary to compute the +transform, including the pointers to the input and output arrays. + +@example +void fftw_execute(const fftw_plan plan); +@end example +@findex fftw_execute + +This executes the @code{plan}, to compute the corresponding transform on +the arrays for which it was planned (which must still exist). The plan +is not modified, and @code{fftw_execute} can be called as many times as +desired. + +To apply a given plan to a different array, you can use the new-array execute +interface. @xref{New-array Execute Functions}. + +@code{fftw_execute} (and equivalents) is the only function in FFTW +guaranteed to be thread-safe; see @ref{Thread safety}. + +This function: +@example +void fftw_destroy_plan(fftw_plan plan); +@end example +@findex fftw_destroy_plan +deallocates the @code{plan} and all its associated data. + +FFTW's planner saves some other persistent data, such as the +accumulated wisdom and a list of algorithms available in the current +configuration. If you want to deallocate all of that and reset FFTW +to the pristine state it was in when you started your program, you can +call: + +@example +void fftw_cleanup(void); +@end example +@findex fftw_cleanup + +After calling @code{fftw_cleanup}, all existing plans become undefined, +and you should not attempt to execute them nor to destroy them. You can +however create and execute/destroy new plans, in which case FFTW starts +accumulating wisdom information again. + +@code{fftw_cleanup} does not deallocate your plans, however. To prevent +memory leaks, you must still call @code{fftw_destroy_plan} before +executing @code{fftw_cleanup}. + +Occasionally, it may useful to know FFTW's internal ``cost'' metric +that it uses to compare plans to one another; this cost is +proportional to an execution time of the plan, in undocumented units, +if the plan was created with the @code{FFTW_MEASURE} or other +timing-based options, or alternatively is a heuristic cost function +for @code{FFTW_ESTIMATE} plans. (The cost values of measured and +estimated plans are not comparable, being in different units. Also, +costs from different FFTW versions or the same version compiled +differently may not be in the same units. Plans created from wisdom +have a cost of 0 since no timing measurement is performed for them. +Finally, certain problems for which only one top-level algorithm was +possible may have required no measurements of the cost of the whole +plan, in which case @code{fftw_cost} will also return 0.) The cost +metric for a given plan is returned by: + +@example +double fftw_cost(const fftw_plan plan); +@end example +@findex fftw_cost + +The following two routines are provided purely for academic purposes +(that is, for entertainment). + +@example +void fftw_flops(const fftw_plan plan, + double *add, double *mul, double *fma); +@end example +@findex fftw_flops + +Given a @code{plan}, set @code{add}, @code{mul}, and @code{fma} to an +exact count of the number of floating-point additions, multiplications, +and fused multiply-add operations involved in the plan's execution. The +total number of floating-point operations (flops) is @code{add + mul + +2*fma}, or @code{add + mul + fma} if the hardware supports fused +multiply-add instructions (although the number of FMA operations is only +approximate because of compiler voodoo). (The number of operations +should be an integer, but we use @code{double} to avoid overflowing +@code{int} for large transforms; the arguments are of type @code{double} +even for single and long-double precision versions of FFTW.) + +@example +void fftw_fprint_plan(const fftw_plan plan, FILE *output_file); +void fftw_print_plan(const fftw_plan plan); +char *fftw_sprint_plan(const fftw_plan plan); +@end example +@findex fftw_fprint_plan +@findex fftw_print_plan + +This outputs a ``nerd-readable'' representation of the @code{plan} to +the given file, to @code{stdout}, or two a newly allocated +NUL-terminated string (which the caller is responsible for deallocating +with @code{free}), respectively. + +@c ------------------------------------------------------------ +@node Basic Interface, Advanced Interface, Using Plans, FFTW Reference +@section Basic Interface +@cindex basic interface + +Recall that the FFTW API is divided into three parts@footnote{@i{Gallia est +omnis divisa in partes tres} (Julius Caesar).}: the @dfn{basic interface} +computes a single transform of contiguous data, the @dfn{advanced +interface} computes transforms of multiple or strided arrays, and the +@dfn{guru interface} supports the most general data layouts, +multiplicities, and strides. This section describes the the basic +interface, which we expect to satisfy the needs of most users. + +@menu +* Complex DFTs:: +* Planner Flags:: +* Real-data DFTs:: +* Real-data DFT Array Format:: +* Real-to-Real Transforms:: +* Real-to-Real Transform Kinds:: +@end menu + +@c =========> +@node Complex DFTs, Planner Flags, Basic Interface, Basic Interface +@subsection Complex DFTs + +@example +fftw_plan fftw_plan_dft_1d(int n0, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); +fftw_plan fftw_plan_dft_2d(int n0, int n1, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); +fftw_plan fftw_plan_dft_3d(int n0, int n1, int n2, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); +fftw_plan fftw_plan_dft(int rank, const int *n, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); +@end example +@findex fftw_plan_dft_1d +@findex fftw_plan_dft_2d +@findex fftw_plan_dft_3d +@findex fftw_plan_dft + +Plan a complex input/output discrete Fourier transform (DFT) in zero or +more dimensions, returning an @code{fftw_plan} (@pxref{Using Plans}). + +Once you have created a plan for a certain transform type and +parameters, then creating another plan of the same type and parameters, +but for different arrays, is fast and shares constant data with the +first plan (if it still exists). + +The planner returns @code{NULL} if the plan cannot be created. In the +standard FFTW distribution, the basic interface is guaranteed to return +a non-@code{NULL} plan. A plan may be @code{NULL}, however, if you are +using a customized FFTW configuration supporting a restricted set of +transforms. + +@subsubheading Arguments +@itemize @bullet + +@item +@code{rank} is the rank of the transform (it should be the size of the +array @code{*n}), and can be any non-negative integer. (@xref{Complex +Multi-Dimensional DFTs}, for the definition of ``rank''.) The +@samp{_1d}, @samp{_2d}, and @samp{_3d} planners correspond to a +@code{rank} of @code{1}, @code{2}, and @code{3}, respectively. The rank +may be zero, which is equivalent to a rank-1 transform of size 1, i.e. a +copy of one number from input to output. + +@item +@code{n0}, @code{n1}, @code{n2}, or @code{n[0..rank-1]} (as appropriate +for each routine) specify the size of the transform dimensions. They +can be any positive integer. + +@itemize @minus +@item +@cindex row-major +Multi-dimensional arrays are stored in row-major order with dimensions: +@code{n0} x @code{n1}; or @code{n0} x @code{n1} x @code{n2}; or +@code{n[0]} x @code{n[1]} x ... x @code{n[rank-1]}. +@xref{Multi-dimensional Array Format}. +@item +FFTW is best at handling sizes of the form +@ifinfo +@math{2^a 3^b 5^c 7^d 11^e 13^f}, +@end ifinfo +@tex +$2^a 3^b 5^c 7^d 11^e 13^f$, +@end tex +@html +2a 3b 5c 7d + 11e 13f, +@end html +where @math{e+f} is either @math{0} or @math{1}, and the other exponents +are arbitrary. Other sizes are computed by means of a slow, +general-purpose algorithm (which nevertheless retains @Onlogn{} performance even for prime sizes). It is possible to customize FFTW +for different array sizes; see @ref{Installation and Customization}. +Transforms whose sizes are powers of @math{2} are especially fast. +@end itemize + +@item +@code{in} and @code{out} point to the input and output arrays of the +transform, which may be the same (yielding an in-place transform). +@cindex in-place +These arrays are overwritten during planning, unless +@code{FFTW_ESTIMATE} is used in the flags. (The arrays need not be +initialized, but they must be allocated.) + +If @code{in == out}, the transform is @dfn{in-place} and the input +array is overwritten. If @code{in != out}, the two arrays must +not overlap (but FFTW does not check for this condition). + +@item +@ctindex FFTW_FORWARD +@ctindex FFTW_BACKWARD +@code{sign} is the sign of the exponent in the formula that defines the +Fourier transform. It can be @math{-1} (= @code{FFTW_FORWARD}) or +@math{+1} (= @code{FFTW_BACKWARD}). + +@item +@cindex flags +@code{flags} is a bitwise OR (@samp{|}) of zero or more planner flags, +as defined in @ref{Planner Flags}. + +@end itemize + +FFTW computes an unnormalized transform: computing a forward followed by +a backward transform (or vice versa) will result in the original data +multiplied by the size of the transform (the product of the dimensions). +@cindex normalization +For more information, see @ref{What FFTW Really Computes}. + +@c =========> +@node Planner Flags, Real-data DFTs, Complex DFTs, Basic Interface +@subsection Planner Flags + +All of the planner routines in FFTW accept an integer @code{flags} +argument, which is a bitwise OR (@samp{|}) of zero or more of the flag +constants defined below. These flags control the rigor (and time) of +the planning process, and can also impose (or lift) restrictions on the +type of transform algorithm that is employed. + +@emph{Important:} the planner overwrites the input array during +planning unless a saved plan (@pxref{Wisdom}) is available for that +problem, so you should initialize your input data after creating the +plan. The only exceptions to this are the @code{FFTW_ESTIMATE} and +@code{FFTW_WISDOM_ONLY} flags, as mentioned below. + +In all cases, if wisdom is available for the given problem that was +created with equal-or-greater planning rigor, then the more rigorous +wisdom is used. For example, in @code{FFTW_ESTIMATE} mode any available +wisdom is used, whereas in @code{FFTW_PATIENT} mode only wisdom created +in patient or exhaustive mode can be used. @xref{Words of Wisdom-Saving +Plans}. + +@subsubheading Planning-rigor flags +@itemize @bullet + +@item +@ctindex FFTW_ESTIMATE +@code{FFTW_ESTIMATE} specifies that, instead of actual measurements of +different algorithms, a simple heuristic is used to pick a (probably +sub-optimal) plan quickly. With this flag, the input/output arrays are +not overwritten during planning. + +@item +@ctindex FFTW_MEASURE +@code{FFTW_MEASURE} tells FFTW to find an optimized plan by actually +@emph{computing} several FFTs and measuring their execution time. +Depending on your machine, this can take some time (often a few +seconds). @code{FFTW_MEASURE} is the default planning option. + +@item +@ctindex FFTW_PATIENT +@code{FFTW_PATIENT} is like @code{FFTW_MEASURE}, but considers a wider +range of algorithms and often produces a ``more optimal'' plan +(especially for large transforms), but at the expense of several times +longer planning time (especially for large transforms). + +@item +@ctindex FFTW_EXHAUSTIVE +@code{FFTW_EXHAUSTIVE} is like @code{FFTW_PATIENT}, but considers an +even wider range of algorithms, including many that we think are +unlikely to be fast, to produce the most optimal plan but with a +substantially increased planning time. + +@item +@ctindex FFTW_WISDOM_ONLY +@code{FFTW_WISDOM_ONLY} is a special planning mode in which the plan +is only created if wisdom is available for the given problem, and +otherwise a @code{NULL} plan is returned. This can be combined with +other flags, e.g. @samp{FFTW_WISDOM_ONLY | FFTW_PATIENT} creates a +plan only if wisdom is available that was created in +@code{FFTW_PATIENT} or @code{FFTW_EXHAUSTIVE} mode. The +@code{FFTW_WISDOM_ONLY} flag is intended for users who need to detect +whether wisdom is available; for example, if wisdom is not available +one may wish to allocate new arrays for planning so that user data is +not overwritten. + +@end itemize + +@subsubheading Algorithm-restriction flags +@itemize @bullet + +@item +@ctindex FFTW_DESTROY_INPUT +@code{FFTW_DESTROY_INPUT} specifies that an out-of-place transform is +allowed to @emph{overwrite its input} array with arbitrary data; this +can sometimes allow more efficient algorithms to be employed. +@cindex out-of-place + +@item +@ctindex FFTW_PRESERVE_INPUT +@code{FFTW_PRESERVE_INPUT} specifies that an out-of-place transform must +@emph{not change its input} array. This is ordinarily the +@emph{default}, except for c2r and hc2r (i.e. complex-to-real) +transforms for which @code{FFTW_DESTROY_INPUT} is the default. In the +latter cases, passing @code{FFTW_PRESERVE_INPUT} will attempt to use +algorithms that do not destroy the input, at the expense of worse +performance; for multi-dimensional c2r transforms, however, no +input-preserving algorithms are implemented and the planner will return +@code{NULL} if one is requested. +@cindex c2r +@cindex hc2r + +@item +@ctindex FFTW_UNALIGNED +@cindex alignment +@findex fftw_malloc +@findex fftw_alignment_of +@code{FFTW_UNALIGNED} specifies that the algorithm may not impose any +unusual alignment requirements on the input/output arrays (i.e. no +SIMD may be used). This flag is normally @emph{not necessary}, since +the planner automatically detects misaligned arrays. The only use for +this flag is if you want to use the new-array execute interface to +execute a given plan on a different array that may not be aligned like +the original. (Using @code{fftw_malloc} makes this flag unnecessary +even then. You can also use @code{fftw_alignment_of} to detect +whether two arrays are equivalently aligned.) + +@end itemize + +@subsubheading Limiting planning time + +@example +extern void fftw_set_timelimit(double seconds); +@end example +@findex fftw_set_timelimit + +This function instructs FFTW to spend at most @code{seconds} seconds +(approximately) in the planner. If @code{seconds == +FFTW_NO_TIMELIMIT} (the default value, which is negative), then +planning time is unbounded. Otherwise, FFTW plans with a +progressively wider range of algorithms until the the given time limit +is reached or the given range of algorithms is explored, returning the +best available plan. +@ctindex FFTW_NO_TIMELIMIT + + +For example, specifying @code{FFTW_PATIENT} first plans in +@code{FFTW_ESTIMATE} mode, then in @code{FFTW_MEASURE} mode, then +finally (time permitting) in @code{FFTW_PATIENT}. If +@code{FFTW_EXHAUSTIVE} is specified instead, the planner will further +progress to @code{FFTW_EXHAUSTIVE} mode. + +Note that the @code{seconds} argument specifies only a rough limit; in +practice, the planner may use somewhat more time if the time limit is +reached when the planner is in the middle of an operation that cannot +be interrupted. At the very least, the planner will complete planning +in @code{FFTW_ESTIMATE} mode (which is thus equivalent to a time limit +of 0). + + +@c =========> +@node Real-data DFTs, Real-data DFT Array Format, Planner Flags, Basic Interface +@subsection Real-data DFTs + +@example +fftw_plan fftw_plan_dft_r2c_1d(int n0, + double *in, fftw_complex *out, + unsigned flags); +fftw_plan fftw_plan_dft_r2c_2d(int n0, int n1, + double *in, fftw_complex *out, + unsigned flags); +fftw_plan fftw_plan_dft_r2c_3d(int n0, int n1, int n2, + double *in, fftw_complex *out, + unsigned flags); +fftw_plan fftw_plan_dft_r2c(int rank, const int *n, + double *in, fftw_complex *out, + unsigned flags); +@end example +@findex fftw_plan_dft_r2c_1d +@findex fftw_plan_dft_r2c_2d +@findex fftw_plan_dft_r2c_3d +@findex fftw_plan_dft_r2c +@cindex r2c + +Plan a real-input/complex-output discrete Fourier transform (DFT) in +zero or more dimensions, returning an @code{fftw_plan} (@pxref{Using +Plans}). + +Once you have created a plan for a certain transform type and +parameters, then creating another plan of the same type and parameters, +but for different arrays, is fast and shares constant data with the +first plan (if it still exists). + +The planner returns @code{NULL} if the plan cannot be created. A +non-@code{NULL} plan is always returned by the basic interface unless +you are using a customized FFTW configuration supporting a restricted +set of transforms, or if you use the @code{FFTW_PRESERVE_INPUT} flag +with a multi-dimensional out-of-place c2r transform (see below). + +@subsubheading Arguments +@itemize @bullet + +@item +@code{rank} is the rank of the transform (it should be the size of the +array @code{*n}), and can be any non-negative integer. (@xref{Complex +Multi-Dimensional DFTs}, for the definition of ``rank''.) The +@samp{_1d}, @samp{_2d}, and @samp{_3d} planners correspond to a +@code{rank} of @code{1}, @code{2}, and @code{3}, respectively. The rank +may be zero, which is equivalent to a rank-1 transform of size 1, i.e. a +copy of one real number (with zero imaginary part) from input to output. + +@item +@code{n0}, @code{n1}, @code{n2}, or @code{n[0..rank-1]}, (as appropriate +for each routine) specify the size of the transform dimensions. They +can be any positive integer. This is different in general from the +@emph{physical} array dimensions, which are described in @ref{Real-data +DFT Array Format}. + +@itemize @minus +@item +FFTW is best at handling sizes of the form +@ifinfo +@math{2^a 3^b 5^c 7^d 11^e 13^f}, +@end ifinfo +@tex +$2^a 3^b 5^c 7^d 11^e 13^f$, +@end tex +@html +2a 3b 5c 7d + 11e 13f, +@end html +where @math{e+f} is either @math{0} or @math{1}, and the other exponents +are arbitrary. Other sizes are computed by means of a slow, +general-purpose algorithm (which nevertheless retains @Onlogn{} performance even for prime sizes). (It is possible to customize FFTW +for different array sizes; see @ref{Installation and Customization}.) +Transforms whose sizes are powers of @math{2} are especially fast, and +it is generally beneficial for the @emph{last} dimension of an r2c/c2r +transform to be @emph{even}. +@end itemize + +@item +@code{in} and @code{out} point to the input and output arrays of the +transform, which may be the same (yielding an in-place transform). +@cindex in-place +These arrays are overwritten during planning, unless +@code{FFTW_ESTIMATE} is used in the flags. (The arrays need not be +initialized, but they must be allocated.) For an in-place transform, it +is important to remember that the real array will require padding, +described in @ref{Real-data DFT Array Format}. +@cindex padding + +@item +@cindex flags +@code{flags} is a bitwise OR (@samp{|}) of zero or more planner flags, +as defined in @ref{Planner Flags}. + +@end itemize + +The inverse transforms, taking complex input (storing the non-redundant +half of a logically Hermitian array) to real output, are given by: + +@example +fftw_plan fftw_plan_dft_c2r_1d(int n0, + fftw_complex *in, double *out, + unsigned flags); +fftw_plan fftw_plan_dft_c2r_2d(int n0, int n1, + fftw_complex *in, double *out, + unsigned flags); +fftw_plan fftw_plan_dft_c2r_3d(int n0, int n1, int n2, + fftw_complex *in, double *out, + unsigned flags); +fftw_plan fftw_plan_dft_c2r(int rank, const int *n, + fftw_complex *in, double *out, + unsigned flags); +@end example +@findex fftw_plan_dft_c2r_1d +@findex fftw_plan_dft_c2r_2d +@findex fftw_plan_dft_c2r_3d +@findex fftw_plan_dft_c2r +@cindex c2r + +The arguments are the same as for the r2c transforms, except that the +input and output data formats are reversed. + +FFTW computes an unnormalized transform: computing an r2c followed by a +c2r transform (or vice versa) will result in the original data +multiplied by the size of the transform (the product of the logical +dimensions). +@cindex normalization +An r2c transform produces the same output as a @code{FFTW_FORWARD} +complex DFT of the same input, and a c2r transform is correspondingly +equivalent to @code{FFTW_BACKWARD}. For more information, see @ref{What +FFTW Really Computes}. + +@c =========> +@node Real-data DFT Array Format, Real-to-Real Transforms, Real-data DFTs, Basic Interface +@subsection Real-data DFT Array Format +@cindex r2c/c2r multi-dimensional array format + +The output of a DFT of real data (r2c) contains symmetries that, in +principle, make half of the outputs redundant (@pxref{What FFTW Really +Computes}). (Similarly for the input of an inverse c2r transform.) In +practice, it is not possible to entirely realize these savings in an +efficient and understandable format that generalizes to +multi-dimensional transforms. Instead, the output of the r2c +transforms is @emph{slightly} over half of the output of the +corresponding complex transform. We do not ``pack'' the data in any +way, but store it as an ordinary array of @code{fftw_complex} values. +In fact, this data is simply a subsection of what would be the array in +the corresponding complex transform. + +Specifically, for a real transform of @math{d} (= @code{rank}) +dimensions @ndims{}, the complex data is an @ndimshalf array of +@code{fftw_complex} values in row-major order (with the division rounded +down). That is, we only store the @emph{lower} half (non-negative +frequencies), plus one element, of the last dimension of the data from +the ordinary complex transform. (We could have instead taken half of +any other dimension, but implementation turns out to be simpler if the +last, contiguous, dimension is used.) + +@cindex out-of-place +For an out-of-place transform, the real data is simply an array with +physical dimensions @ndims in row-major order. + +@cindex in-place +@cindex padding +For an in-place transform, some complications arise since the complex data +is slightly larger than the real data. In this case, the final +dimension of the real data must be @emph{padded} with extra values to +accommodate the size of the complex data---two extra if the last +dimension is even and one if it is odd. That is, the last dimension of +the real data must physically contain +@tex +$2 (n_{d-1}/2+1)$ +@end tex +@ifinfo +2 * (n[d-1]/2+1) +@end ifinfo +@html +2 * (nd-1/2+1) +@end html +@code{double} values (exactly enough to hold the complex data). This +physical array size does not, however, change the @emph{logical} array +size---only +@tex +$n_{d-1}$ +@end tex +@ifinfo +n[d-1] +@end ifinfo +@html +nd-1 +@end html +values are actually stored in the last dimension, and +@tex +$n_{d-1}$ +@end tex +@ifinfo +n[d-1] +@end ifinfo +@html +nd-1 +@end html +is the last dimension passed to the planner. + +@c =========> +@node Real-to-Real Transforms, Real-to-Real Transform Kinds, Real-data DFT Array Format, Basic Interface +@subsection Real-to-Real Transforms +@cindex r2r + +@example +fftw_plan fftw_plan_r2r_1d(int n, double *in, double *out, + fftw_r2r_kind kind, unsigned flags); +fftw_plan fftw_plan_r2r_2d(int n0, int n1, double *in, double *out, + fftw_r2r_kind kind0, fftw_r2r_kind kind1, + unsigned flags); +fftw_plan fftw_plan_r2r_3d(int n0, int n1, int n2, + double *in, double *out, + fftw_r2r_kind kind0, + fftw_r2r_kind kind1, + fftw_r2r_kind kind2, + unsigned flags); +fftw_plan fftw_plan_r2r(int rank, const int *n, double *in, double *out, + const fftw_r2r_kind *kind, unsigned flags); +@end example +@findex fftw_plan_r2r_1d +@findex fftw_plan_r2r_2d +@findex fftw_plan_r2r_3d +@findex fftw_plan_r2r + +Plan a real input/output (r2r) transform of various kinds in zero or +more dimensions, returning an @code{fftw_plan} (@pxref{Using Plans}). + +Once you have created a plan for a certain transform type and +parameters, then creating another plan of the same type and parameters, +but for different arrays, is fast and shares constant data with the +first plan (if it still exists). + +The planner returns @code{NULL} if the plan cannot be created. A +non-@code{NULL} plan is always returned by the basic interface unless +you are using a customized FFTW configuration supporting a restricted +set of transforms, or for size-1 @code{FFTW_REDFT00} kinds (which are +not defined). +@ctindex FFTW_REDFT00 + +@subsubheading Arguments +@itemize @bullet + +@item +@code{rank} is the dimensionality of the transform (it should be the +size of the arrays @code{*n} and @code{*kind}), and can be any +non-negative integer. The @samp{_1d}, @samp{_2d}, and @samp{_3d} +planners correspond to a @code{rank} of @code{1}, @code{2}, and +@code{3}, respectively. A @code{rank} of zero is equivalent to a copy +of one number from input to output. + +@item +@code{n}, or @code{n0}/@code{n1}/@code{n2}, or @code{n[rank]}, +respectively, gives the (physical) size of the transform dimensions. +They can be any positive integer. + +@itemize @minus +@item +@cindex row-major +Multi-dimensional arrays are stored in row-major order with dimensions: +@code{n0} x @code{n1}; or @code{n0} x @code{n1} x @code{n2}; or +@code{n[0]} x @code{n[1]} x ... x @code{n[rank-1]}. +@xref{Multi-dimensional Array Format}. +@item +FFTW is generally best at handling sizes of the form +@ifinfo +@math{2^a 3^b 5^c 7^d 11^e 13^f}, +@end ifinfo +@tex +$2^a 3^b 5^c 7^d 11^e 13^f$, +@end tex +@html +2a 3b 5c 7d + 11e 13f, +@end html +where @math{e+f} is either @math{0} or @math{1}, and the other exponents +are arbitrary. Other sizes are computed by means of a slow, +general-purpose algorithm (which nevertheless retains @Onlogn{} performance even for prime sizes). (It is possible to customize FFTW +for different array sizes; see @ref{Installation and Customization}.) +Transforms whose sizes are powers of @math{2} are especially fast. +@item +For a @code{REDFT00} or @code{RODFT00} transform kind in a dimension of +size @math{n}, it is @math{n-1} or @math{n+1}, respectively, that +should be factorizable in the above form. +@end itemize + +@item +@code{in} and @code{out} point to the input and output arrays of the +transform, which may be the same (yielding an in-place transform). +@cindex in-place +These arrays are overwritten during planning, unless +@code{FFTW_ESTIMATE} is used in the flags. (The arrays need not be +initialized, but they must be allocated.) + +@item +@code{kind}, or @code{kind0}/@code{kind1}/@code{kind2}, or +@code{kind[rank]}, is the kind of r2r transform used for the +corresponding dimension. The valid kind constants are described in +@ref{Real-to-Real Transform Kinds}. In a multi-dimensional transform, +what is computed is the separable product formed by taking each +transform kind along the corresponding dimension, one dimension after +another. + +@item +@cindex flags +@code{flags} is a bitwise OR (@samp{|}) of zero or more planner flags, +as defined in @ref{Planner Flags}. + +@end itemize + +@c =========> +@node Real-to-Real Transform Kinds, , Real-to-Real Transforms, Basic Interface +@subsection Real-to-Real Transform Kinds +@cindex kind (r2r) + +FFTW currently supports 11 different r2r transform kinds, specified by +one of the constants below. For the precise definitions of these +transforms, see @ref{What FFTW Really Computes}. For a more colloquial +introduction to these transform kinds, see @ref{More DFTs of Real Data}. + +For dimension of size @code{n}, there is a corresponding ``logical'' +dimension @code{N} that determines the normalization (and the optimal +factorization); the formula for @code{N} is given for each kind below. +Also, with each transform kind is listed its corrsponding inverse +transform. FFTW computes unnormalized transforms: a transform followed +by its inverse will result in the original data multiplied by @code{N} +(or the product of the @code{N}'s for each dimension, in +multi-dimensions). +@cindex normalization + +@itemize @bullet + +@item +@ctindex FFTW_R2HC +@code{FFTW_R2HC} computes a real-input DFT with output in +``halfcomplex'' format, i.e. real and imaginary parts for a transform of +size @code{n} stored as: +@tex +$$ +r_0, r_1, r_2, \ldots, r_{n/2}, i_{(n+1)/2-1}, \ldots, i_2, i_1 +$$ +@end tex +@ifinfo +r0, r1, r2, r(n/2), i((n+1)/2-1), ..., i2, i1 +@end ifinfo +@html +

    +r0, r1, r2, ..., rn/2, i(n+1)/2-1, ..., i2, i1 +

    +@end html +(Logical @code{N=n}, inverse is @code{FFTW_HC2R}.) + +@item +@ctindex FFTW_HC2R +@code{FFTW_HC2R} computes the reverse of @code{FFTW_R2HC}, above. +(Logical @code{N=n}, inverse is @code{FFTW_R2HC}.) + +@item +@ctindex FFTW_DHT +@code{FFTW_DHT} computes a discrete Hartley transform. +(Logical @code{N=n}, inverse is @code{FFTW_DHT}.) +@cindex discrete Hartley transform + +@item +@ctindex FFTW_REDFT00 +@code{FFTW_REDFT00} computes an REDFT00 transform, i.e. a DCT-I. +(Logical @code{N=2*(n-1)}, inverse is @code{FFTW_REDFT00}.) +@cindex discrete cosine transform +@cindex DCT + +@item +@ctindex FFTW_REDFT10 +@code{FFTW_REDFT10} computes an REDFT10 transform, i.e. a DCT-II (sometimes called ``the'' DCT). +(Logical @code{N=2*n}, inverse is @code{FFTW_REDFT01}.) + +@item +@ctindex FFTW_REDFT01 +@code{FFTW_REDFT01} computes an REDFT01 transform, i.e. a DCT-III (sometimes called ``the'' IDCT, being the inverse of DCT-II). +(Logical @code{N=2*n}, inverse is @code{FFTW_REDFT=10}.) +@cindex IDCT + +@item +@ctindex FFTW_REDFT11 +@code{FFTW_REDFT11} computes an REDFT11 transform, i.e. a DCT-IV. +(Logical @code{N=2*n}, inverse is @code{FFTW_REDFT11}.) + +@item +@ctindex FFTW_RODFT00 +@code{FFTW_RODFT00} computes an RODFT00 transform, i.e. a DST-I. +(Logical @code{N=2*(n+1)}, inverse is @code{FFTW_RODFT00}.) +@cindex discrete sine transform +@cindex DST + +@item +@ctindex FFTW_RODFT10 +@code{FFTW_RODFT10} computes an RODFT10 transform, i.e. a DST-II. +(Logical @code{N=2*n}, inverse is @code{FFTW_RODFT01}.) + +@item +@ctindex FFTW_RODFT01 +@code{FFTW_RODFT01} computes an RODFT01 transform, i.e. a DST-III. +(Logical @code{N=2*n}, inverse is @code{FFTW_RODFT=10}.) + +@item +@ctindex FFTW_RODFT11 +@code{FFTW_RODFT11} computes an RODFT11 transform, i.e. a DST-IV. +(Logical @code{N=2*n}, inverse is @code{FFTW_RODFT11}.) + +@end itemize + +@c ------------------------------------------------------------ +@node Advanced Interface, Guru Interface, Basic Interface, FFTW Reference +@section Advanced Interface +@cindex advanced interface + +FFTW's ``advanced'' interface supplements the basic interface with four +new planner routines, providing a new level of flexibility: you can plan +a transform of multiple arrays simultaneously, operate on non-contiguous +(strided) data, and transform a subset of a larger multi-dimensional +array. Other than these additional features, the planner operates in +the same fashion as in the basic interface, and the resulting +@code{fftw_plan} is used in the same way (@pxref{Using Plans}). + +@menu +* Advanced Complex DFTs:: +* Advanced Real-data DFTs:: +* Advanced Real-to-real Transforms:: +@end menu + +@c =========> +@node Advanced Complex DFTs, Advanced Real-data DFTs, Advanced Interface, Advanced Interface +@subsection Advanced Complex DFTs + +@example +fftw_plan fftw_plan_many_dft(int rank, const int *n, int howmany, + fftw_complex *in, const int *inembed, + int istride, int idist, + fftw_complex *out, const int *onembed, + int ostride, int odist, + int sign, unsigned flags); +@end example +@findex fftw_plan_many_dft + +This routine plans multiple multidimensional complex DFTs, and it +extends the @code{fftw_plan_dft} routine (@pxref{Complex DFTs}) to +compute @code{howmany} transforms, each having rank @code{rank} and size +@code{n}. In addition, the transform data need not be contiguous, but +it may be laid out in memory with an arbitrary stride. To account for +these possibilities, @code{fftw_plan_many_dft} adds the new parameters +@code{howmany}, @{@code{i},@code{o}@}@code{nembed}, +@{@code{i},@code{o}@}@code{stride}, and +@{@code{i},@code{o}@}@code{dist}. The FFTW basic interface +(@pxref{Complex DFTs}) provides routines specialized for ranks 1, 2, +and@tie{}3, but the advanced interface handles only the general-rank +case. + +@code{howmany} is the number of transforms to compute. The resulting +plan computes @code{howmany} transforms, where the input of the +@code{k}-th transform is at location @code{in+k*idist} (in C pointer +arithmetic), and its output is at location @code{out+k*odist}. Plans +obtained in this way can often be faster than calling FFTW multiple +times for the individual transforms. The basic @code{fftw_plan_dft} +interface corresponds to @code{howmany=1} (in which case the @code{dist} +parameters are ignored). +@cindex howmany parameter +@cindex dist + + +Each of the @code{howmany} transforms has rank @code{rank} and size +@code{n}, as in the basic interface. In addition, the advanced +interface allows the input and output arrays of each transform to be +row-major subarrays of larger rank-@code{rank} arrays, described by +@code{inembed} and @code{onembed} parameters, respectively. +@{@code{i},@code{o}@}@code{nembed} must be arrays of length @code{rank}, +and @code{n} should be elementwise less than or equal to +@{@code{i},@code{o}@}@code{nembed}. Passing @code{NULL} for an +@code{nembed} parameter is equivalent to passing @code{n} (i.e. same +physical and logical dimensions, as in the basic interface.) + +The @code{stride} parameters indicate that the @code{j}-th element of +the input or output arrays is located at @code{j*istride} or +@code{j*ostride}, respectively. (For a multi-dimensional array, +@code{j} is the ordinary row-major index.) When combined with the +@code{k}-th transform in a @code{howmany} loop, from above, this means +that the (@code{j},@code{k})-th element is at @code{j*stride+k*dist}. +(The basic @code{fftw_plan_dft} interface corresponds to a stride of 1.) +@cindex stride + + +For in-place transforms, the input and output @code{stride} and +@code{dist} parameters should be the same; otherwise, the planner may +return @code{NULL}. + +Arrays @code{n}, @code{inembed}, and @code{onembed} are not used after +this function returns. You can safely free or reuse them. + +@strong{Examples}: +One transform of one 5 by 6 array contiguous in memory: +@example + int rank = 2; + int n[] = @{5, 6@}; + int howmany = 1; + int idist = odist = 0; /* unused because howmany = 1 */ + int istride = ostride = 1; /* array is contiguous in memory */ + int *inembed = n, *onembed = n; +@end example + +Transform of three 5 by 6 arrays, each contiguous in memory, +stored in memory one after another: +@example + int rank = 2; + int n[] = @{5, 6@}; + int howmany = 3; + int idist = odist = n[0]*n[1]; /* = 30, the distance in memory + between the first element + of the first array and the + first element of the second array */ + int istride = ostride = 1; /* array is contiguous in memory */ + int *inembed = n, *onembed = n; +@end example + +Transform each column of a 2d array with 10 rows and 3 columns: +@example + int rank = 1; /* not 2: we are computing 1d transforms */ + int n[] = @{10@}; /* 1d transforms of length 10 */ + int howmany = 3; + int idist = odist = 1; + int istride = ostride = 3; /* distance between two elements in + the same column */ + int *inembed = n, *onembed = n; +@end example + +@c =========> +@node Advanced Real-data DFTs, Advanced Real-to-real Transforms, Advanced Complex DFTs, Advanced Interface +@subsection Advanced Real-data DFTs + +@example +fftw_plan fftw_plan_many_dft_r2c(int rank, const int *n, int howmany, + double *in, const int *inembed, + int istride, int idist, + fftw_complex *out, const int *onembed, + int ostride, int odist, + unsigned flags); +fftw_plan fftw_plan_many_dft_c2r(int rank, const int *n, int howmany, + fftw_complex *in, const int *inembed, + int istride, int idist, + double *out, const int *onembed, + int ostride, int odist, + unsigned flags); +@end example +@findex fftw_plan_many_dft_r2c +@findex fftw_plan_many_dft_c2r + +Like @code{fftw_plan_many_dft}, these two functions add @code{howmany}, +@code{nembed}, @code{stride}, and @code{dist} parameters to the +@code{fftw_plan_dft_r2c} and @code{fftw_plan_dft_c2r} functions, but +otherwise behave the same as the basic interface. + +The interpretation of @code{howmany}, @code{stride}, and @code{dist} are +the same as for @code{fftw_plan_many_dft}, above. Note that the +@code{stride} and @code{dist} for the real array are in units of +@code{double}, and for the complex array are in units of +@code{fftw_complex}. + +If an @code{nembed} parameter is @code{NULL}, it is interpreted as what +it would be in the basic interface, as described in @ref{Real-data DFT +Array Format}. That is, for the complex array the size is assumed to be +the same as @code{n}, but with the last dimension cut roughly in half. +For the real array, the size is assumed to be @code{n} if the transform +is out-of-place, or @code{n} with the last dimension ``padded'' if the +transform is in-place. + +If an @code{nembed} parameter is non-@code{NULL}, it is interpreted as +the physical size of the corresponding array, in row-major order, just +as for @code{fftw_plan_many_dft}. In this case, each dimension of +@code{nembed} should be @code{>=} what it would be in the basic +interface (e.g. the halved or padded @code{n}). + +Arrays @code{n}, @code{inembed}, and @code{onembed} are not used after +this function returns. You can safely free or reuse them. + +@c =========> +@node Advanced Real-to-real Transforms, , Advanced Real-data DFTs, Advanced Interface +@subsection Advanced Real-to-real Transforms + +@example +fftw_plan fftw_plan_many_r2r(int rank, const int *n, int howmany, + double *in, const int *inembed, + int istride, int idist, + double *out, const int *onembed, + int ostride, int odist, + const fftw_r2r_kind *kind, unsigned flags); +@end example +@findex fftw_plan_many_r2r + +Like @code{fftw_plan_many_dft}, this functions adds @code{howmany}, +@code{nembed}, @code{stride}, and @code{dist} parameters to the +@code{fftw_plan_r2r} function, but otherwise behave the same as the +basic interface. The interpretation of those additional parameters are +the same as for @code{fftw_plan_many_dft}. (Of course, the +@code{stride} and @code{dist} parameters are now in units of +@code{double}, not @code{fftw_complex}.) + +Arrays @code{n}, @code{inembed}, @code{onembed}, and @code{kind} are not +used after this function returns. You can safely free or reuse them. + +@c ------------------------------------------------------------ +@node Guru Interface, New-array Execute Functions, Advanced Interface, FFTW Reference +@section Guru Interface +@cindex guru interface + +The ``guru'' interface to FFTW is intended to expose as much as possible +of the flexibility in the underlying FFTW architecture. It allows one +to compute multi-dimensional ``vectors'' (loops) of multi-dimensional +transforms, where each vector/transform dimension has an independent +size and stride. +@cindex vector +One can also use more general complex-number formats, e.g. separate real +and imaginary arrays. + +For those users who require the flexibility of the guru interface, it is +important that they pay special attention to the documentation lest they +shoot themselves in the foot. + +@menu +* Interleaved and split arrays:: +* Guru vector and transform sizes:: +* Guru Complex DFTs:: +* Guru Real-data DFTs:: +* Guru Real-to-real Transforms:: +* 64-bit Guru Interface:: +@end menu + +@c =========> +@node Interleaved and split arrays, Guru vector and transform sizes, Guru Interface, Guru Interface +@subsection Interleaved and split arrays + +The guru interface supports two representations of complex numbers, +which we call the interleaved and the split format. + +The @dfn{interleaved} format is the same one used by the basic and +advanced interfaces, and it is documented in @ref{Complex numbers}. +In the interleaved format, you provide pointers to the real part of a +complex number, and the imaginary part understood to be stored in the +next memory location. +@cindex interleaved format + + +The @dfn{split} format allows separate pointers to the real and +imaginary parts of a complex array. +@cindex split format + + +Technically, the interleaved format is redundant, because you can +always express an interleaved array in terms of a split array with +appropriate pointers and strides. On the other hand, the interleaved +format is simpler to use, and it is common in practice. Hence, FFTW +supports it as a special case. + +@c =========> +@node Guru vector and transform sizes, Guru Complex DFTs, Interleaved and split arrays, Guru Interface +@subsection Guru vector and transform sizes + +The guru interface introduces one basic new data structure, +@code{fftw_iodim}, that is used to specify sizes and strides for +multi-dimensional transforms and vectors: + +@example +typedef struct @{ + int n; + int is; + int os; +@} fftw_iodim; +@end example +@tindex fftw_iodim + +Here, @code{n} is the size of the dimension, and @code{is} and @code{os} +are the strides of that dimension for the input and output arrays. (The +stride is the separation of consecutive elements along this dimension.) + +The meaning of the stride parameter depends on the type of the array +that the stride refers to. @emph{If the array is interleaved complex, +strides are expressed in units of complex numbers +(@code{fftw_complex}). If the array is split complex or real, strides +are expressed in units of real numbers (@code{double}).} This +convention is consistent with the usual pointer arithmetic in the C +language. An interleaved array is denoted by a pointer @code{p} to +@code{fftw_complex}, so that @code{p+1} points to the next complex +number. Split arrays are denoted by pointers to @code{double}, in +which case pointer arithmetic operates in units of +@code{sizeof(double)}. +@cindex stride + + +The guru planner interfaces all take a (@code{rank}, @code{dims[rank]}) +pair describing the transform size, and a (@code{howmany_rank}, +@code{howmany_dims[howmany_rank]}) pair describing the ``vector'' size (a +multi-dimensional loop of transforms to perform), where @code{dims} and +@code{howmany_dims} are arrays of @code{fftw_iodim}. + +For example, the @code{howmany} parameter in the advanced complex-DFT +interface corresponds to @code{howmany_rank} = 1, +@code{howmany_dims[0].n} = @code{howmany}, @code{howmany_dims[0].is} = +@code{idist}, and @code{howmany_dims[0].os} = @code{odist}. +@cindex howmany loop +@cindex dist +(To compute a single transform, you can just use @code{howmany_rank} = 0.) + + +A row-major multidimensional array with dimensions @code{n[rank]} +(@pxref{Row-major Format}) corresponds to @code{dims[i].n} = +@code{n[i]} and the recurrence @code{dims[i].is} = @code{n[i+1] * +dims[i+1].is} (similarly for @code{os}). The stride of the last +(@code{i=rank-1}) dimension is the overall stride of the array. +e.g. to be equivalent to the advanced complex-DFT interface, you would +have @code{dims[rank-1].is} = @code{istride} and +@code{dims[rank-1].os} = @code{ostride}. +@cindex row-major + + +In general, we only guarantee FFTW to return a non-@code{NULL} plan if +the vector and transform dimensions correspond to a set of distinct +indices, and for in-place transforms the input/output strides should +be the same. + +@c =========> +@node Guru Complex DFTs, Guru Real-data DFTs, Guru vector and transform sizes, Guru Interface +@subsection Guru Complex DFTs + +@example +fftw_plan fftw_plan_guru_dft( + int rank, const fftw_iodim *dims, + int howmany_rank, const fftw_iodim *howmany_dims, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); + +fftw_plan fftw_plan_guru_split_dft( + int rank, const fftw_iodim *dims, + int howmany_rank, const fftw_iodim *howmany_dims, + double *ri, double *ii, double *ro, double *io, + unsigned flags); +@end example +@findex fftw_plan_guru_dft +@findex fftw_plan_guru_split_dft + +These two functions plan a complex-data, multi-dimensional DFT +for the interleaved and split format, respectively. +Transform dimensions are given by (@code{rank}, @code{dims}) over a +multi-dimensional vector (loop) of dimensions (@code{howmany_rank}, +@code{howmany_dims}). @code{dims} and @code{howmany_dims} should point +to @code{fftw_iodim} arrays of length @code{rank} and +@code{howmany_rank}, respectively. + +@cindex flags +@code{flags} is a bitwise OR (@samp{|}) of zero or more planner flags, +as defined in @ref{Planner Flags}. + +In the @code{fftw_plan_guru_dft} function, the pointers @code{in} and +@code{out} point to the interleaved input and output arrays, +respectively. The sign can be either @math{-1} (= +@code{FFTW_FORWARD}) or @math{+1} (= @code{FFTW_BACKWARD}). If the +pointers are equal, the transform is in-place. + +In the @code{fftw_plan_guru_split_dft} function, +@code{ri} and @code{ii} point to the real and imaginary input arrays, +and @code{ro} and @code{io} point to the real and imaginary output +arrays. The input and output pointers may be the same, indicating an +in-place transform. For example, for @code{fftw_complex} pointers +@code{in} and @code{out}, the corresponding parameters are: + +@example +ri = (double *) in; +ii = (double *) in + 1; +ro = (double *) out; +io = (double *) out + 1; +@end example + +Because @code{fftw_plan_guru_split_dft} accepts split arrays, strides +are expressed in units of @code{double}. For a contiguous +@code{fftw_complex} array, the overall stride of the transform should +be 2, the distance between consecutive real parts or between +consecutive imaginary parts; see @ref{Guru vector and transform +sizes}. Note that the dimension strides are applied equally to the +real and imaginary parts; real and imaginary arrays with different +strides are not supported. + +There is no @code{sign} parameter in @code{fftw_plan_guru_split_dft}. +This function always plans for an @code{FFTW_FORWARD} transform. To +plan for an @code{FFTW_BACKWARD} transform, you can exploit the +identity that the backwards DFT is equal to the forwards DFT with the +real and imaginary parts swapped. For example, in the case of the +@code{fftw_complex} arrays above, the @code{FFTW_BACKWARD} transform +is computed by the parameters: + +@example +ri = (double *) in + 1; +ii = (double *) in; +ro = (double *) out + 1; +io = (double *) out; +@end example + +@c =========> +@node Guru Real-data DFTs, Guru Real-to-real Transforms, Guru Complex DFTs, Guru Interface +@subsection Guru Real-data DFTs + +@example +fftw_plan fftw_plan_guru_dft_r2c( + int rank, const fftw_iodim *dims, + int howmany_rank, const fftw_iodim *howmany_dims, + double *in, fftw_complex *out, + unsigned flags); + +fftw_plan fftw_plan_guru_split_dft_r2c( + int rank, const fftw_iodim *dims, + int howmany_rank, const fftw_iodim *howmany_dims, + double *in, double *ro, double *io, + unsigned flags); + +fftw_plan fftw_plan_guru_dft_c2r( + int rank, const fftw_iodim *dims, + int howmany_rank, const fftw_iodim *howmany_dims, + fftw_complex *in, double *out, + unsigned flags); + +fftw_plan fftw_plan_guru_split_dft_c2r( + int rank, const fftw_iodim *dims, + int howmany_rank, const fftw_iodim *howmany_dims, + double *ri, double *ii, double *out, + unsigned flags); +@end example +@findex fftw_plan_guru_dft_r2c +@findex fftw_plan_guru_split_dft_r2c +@findex fftw_plan_guru_dft_c2r +@findex fftw_plan_guru_split_dft_c2r + +Plan a real-input (r2c) or real-output (c2r), multi-dimensional DFT with +transform dimensions given by (@code{rank}, @code{dims}) over a +multi-dimensional vector (loop) of dimensions (@code{howmany_rank}, +@code{howmany_dims}). @code{dims} and @code{howmany_dims} should point +to @code{fftw_iodim} arrays of length @code{rank} and +@code{howmany_rank}, respectively. As for the basic and advanced +interfaces, an r2c transform is @code{FFTW_FORWARD} and a c2r transform +is @code{FFTW_BACKWARD}. + +The @emph{last} dimension of @code{dims} is interpreted specially: +that dimension of the real array has size @code{dims[rank-1].n}, but +that dimension of the complex array has size @code{dims[rank-1].n/2+1} +(division rounded down). The strides, on the other hand, are taken to +be exactly as specified. It is up to the user to specify the strides +appropriately for the peculiar dimensions of the data, and we do not +guarantee that the planner will succeed (return non-@code{NULL}) for +any dimensions other than those described in @ref{Real-data DFT Array +Format} and generalized in @ref{Advanced Real-data DFTs}. (That is, +for an in-place transform, each individual dimension should be able to +operate in place.) +@cindex in-place + + +@code{in} and @code{out} point to the input and output arrays for r2c +and c2r transforms, respectively. For split arrays, @code{ri} and +@code{ii} point to the real and imaginary input arrays for a c2r +transform, and @code{ro} and @code{io} point to the real and imaginary +output arrays for an r2c transform. @code{in} and @code{ro} or +@code{ri} and @code{out} may be the same, indicating an in-place +transform. (In-place transforms where @code{in} and @code{io} or +@code{ii} and @code{out} are the same are not currently supported.) + +@cindex flags +@code{flags} is a bitwise OR (@samp{|}) of zero or more planner flags, +as defined in @ref{Planner Flags}. + +In-place transforms of rank greater than 1 are currently only +supported for interleaved arrays. For split arrays, the planner will +return @code{NULL}. +@cindex in-place + +@c =========> +@node Guru Real-to-real Transforms, 64-bit Guru Interface, Guru Real-data DFTs, Guru Interface +@subsection Guru Real-to-real Transforms + +@example +fftw_plan fftw_plan_guru_r2r(int rank, const fftw_iodim *dims, + int howmany_rank, + const fftw_iodim *howmany_dims, + double *in, double *out, + const fftw_r2r_kind *kind, + unsigned flags); +@end example +@findex fftw_plan_guru_r2r + +Plan a real-to-real (r2r) multi-dimensional @code{FFTW_FORWARD} +transform with transform dimensions given by (@code{rank}, @code{dims}) +over a multi-dimensional vector (loop) of dimensions +(@code{howmany_rank}, @code{howmany_dims}). @code{dims} and +@code{howmany_dims} should point to @code{fftw_iodim} arrays of length +@code{rank} and @code{howmany_rank}, respectively. + +The transform kind of each dimension is given by the @code{kind} +parameter, which should point to an array of length @code{rank}. Valid +@code{fftw_r2r_kind} constants are given in @ref{Real-to-Real Transform +Kinds}. + +@code{in} and @code{out} point to the real input and output arrays; they +may be the same, indicating an in-place transform. + +@cindex flags +@code{flags} is a bitwise OR (@samp{|}) of zero or more planner flags, +as defined in @ref{Planner Flags}. + +@c =========> +@node 64-bit Guru Interface, , Guru Real-to-real Transforms, Guru Interface +@subsection 64-bit Guru Interface +@cindex 64-bit architecture + +When compiled in 64-bit mode on a 64-bit architecture (where addresses +are 64 bits wide), FFTW uses 64-bit quantities internally for all +transform sizes, strides, and so on---you don't have to do anything +special to exploit this. However, in the ordinary FFTW interfaces, +you specify the transform size by an @code{int} quantity, which is +normally only 32 bits wide. This means that, even though FFTW is +using 64-bit sizes internally, you cannot specify a single transform +dimension larger than +@ifinfo +2^31-1 +@end ifinfo +@html +231−1 +@end html +@tex +$2^31-1$ +@end tex +numbers. + +We expect that few users will require transforms larger than this, but, +for those who do, we provide a 64-bit version of the guru interface in +which all sizes are specified as integers of type @code{ptrdiff_t} +instead of @code{int}. (@code{ptrdiff_t} is a signed integer type +defined by the C standard to be wide enough to represent address +differences, and thus must be at least 64 bits wide on a 64-bit +machine.) We stress that there is @emph{no performance advantage} to +using this interface---the same internal FFTW code is employed +regardless---and it is only necessary if you want to specify very +large transform sizes. +@tindex ptrdiff_t + + +In particular, the 64-bit guru interface is a set of planner routines +that are exactly the same as the guru planner routines, except that +they are named with @samp{guru64} instead of @samp{guru} and they take +arguments of type @code{fftw_iodim64} instead of @code{fftw_iodim}. +For example, instead of @code{fftw_plan_guru_dft}, we have +@code{fftw_plan_guru64_dft}. + +@example +fftw_plan fftw_plan_guru64_dft( + int rank, const fftw_iodim64 *dims, + int howmany_rank, const fftw_iodim64 *howmany_dims, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); +@end example +@findex fftw_plan_guru64_dft + +The @code{fftw_iodim64} type is similar to @code{fftw_iodim}, with the +same interpretation, except that it uses type @code{ptrdiff_t} instead +of type @code{int}. + +@example +typedef struct @{ + ptrdiff_t n; + ptrdiff_t is; + ptrdiff_t os; +@} fftw_iodim64; +@end example +@tindex fftw_iodim64 + +Every other @samp{fftw_plan_guru} function also has a +@samp{fftw_plan_guru64} equivalent, but we do not repeat their +documentation here since they are identical to the 32-bit versions +except as noted above. + +@c ----------------------------------------------------------- +@node New-array Execute Functions, Wisdom, Guru Interface, FFTW Reference +@section New-array Execute Functions +@cindex execute +@cindex new-array execution + +Normally, one executes a plan for the arrays with which the plan was +created, by calling @code{fftw_execute(plan)} as described in @ref{Using +Plans}. +@findex fftw_execute +However, it is possible for sophisticated users to apply a given plan +to a @emph{different} array using the ``new-array execute'' functions +detailed below, provided that the following conditions are met: + +@itemize @bullet + +@item +The array size, strides, etcetera are the same (since those are set by +the plan). + +@item +The input and output arrays are the same (in-place) or different +(out-of-place) if the plan was originally created to be in-place or +out-of-place, respectively. + +@item +For split arrays, the separations between the real and imaginary +parts, @code{ii-ri} and @code{io-ro}, are the same as they were for +the input and output arrays when the plan was created. (This +condition is automatically satisfied for interleaved arrays.) + +@item +The @dfn{alignment} of the new input/output arrays is the same as that +of the input/output arrays when the plan was created, unless the plan +was created with the @code{FFTW_UNALIGNED} flag. +@ctindex FFTW_UNALIGNED +Here, the alignment is a platform-dependent quantity (for example, it is +the address modulo 16 if SSE SIMD instructions are used, but the address +modulo 4 for non-SIMD single-precision FFTW on the same machine). In +general, only arrays allocated with @code{fftw_malloc} are guaranteed to +be equally aligned (@pxref{SIMD alignment and fftw_malloc}). + +@end itemize + +@cindex alignment +The alignment issue is especially critical, because if you don't use +@code{fftw_malloc} then you may have little control over the alignment +of arrays in memory. For example, neither the C++ @code{new} function +nor the Fortran @code{allocate} statement provide strong enough +guarantees about data alignment. If you don't use @code{fftw_malloc}, +therefore, you probably have to use @code{FFTW_UNALIGNED} (which +disables most SIMD support). If possible, it is probably better for +you to simply create multiple plans (creating a new plan is quick once +one exists for a given size), or better yet re-use the same array for +your transforms. + +@findex fftw_alignment_of +For rare circumstances in which you cannot control the alignment of +allocated memory, but wish to determine where a given array is +aligned like the original array for which a plan was created, you can +use the @code{fftw_alignment_of} function: +@example +int fftw_alignment_of(double *p); +@end example +Two arrays have equivalent alignment (for the purposes of applying a +plan) if and only if @code{fftw_alignment_of} returns the same value +for the corresponding pointers to their data (typecast to @code{double*} +if necessary). + +If you are tempted to use the new-array execute interface because you +want to transform a known bunch of arrays of the same size, you should +probably go use the advanced interface instead (@pxref{Advanced +Interface})). + +The new-array execute functions are: + +@example +void fftw_execute_dft( + const fftw_plan p, + fftw_complex *in, fftw_complex *out); + +void fftw_execute_split_dft( + const fftw_plan p, + double *ri, double *ii, double *ro, double *io); + +void fftw_execute_dft_r2c( + const fftw_plan p, + double *in, fftw_complex *out); + +void fftw_execute_split_dft_r2c( + const fftw_plan p, + double *in, double *ro, double *io); + +void fftw_execute_dft_c2r( + const fftw_plan p, + fftw_complex *in, double *out); + +void fftw_execute_split_dft_c2r( + const fftw_plan p, + double *ri, double *ii, double *out); + +void fftw_execute_r2r( + const fftw_plan p, + double *in, double *out); +@end example +@findex fftw_execute_dft +@findex fftw_execute_split_dft +@findex fftw_execute_dft_r2c +@findex fftw_execute_split_dft_r2c +@findex fftw_execute_dft_c2r +@findex fftw_execute_split_dft_c2r +@findex fftw_execute_r2r + +These execute the @code{plan} to compute the corresponding transform on +the input/output arrays specified by the subsequent arguments. The +input/output array arguments have the same meanings as the ones passed +to the guru planner routines in the preceding sections. The @code{plan} +is not modified, and these routines can be called as many times as +desired, or intermixed with calls to the ordinary @code{fftw_execute}. + +The @code{plan} @emph{must} have been created for the transform type +corresponding to the execute function, e.g. it must be a complex-DFT +plan for @code{fftw_execute_dft}. Any of the planner routines for that +transform type, from the basic to the guru interface, could have been +used to create the plan, however. + +@c ------------------------------------------------------------ +@node Wisdom, What FFTW Really Computes, New-array Execute Functions, FFTW Reference +@section Wisdom +@cindex wisdom +@cindex saving plans to disk + +This section documents the FFTW mechanism for saving and restoring +plans from disk. This mechanism is called @dfn{wisdom}. + +@menu +* Wisdom Export:: +* Wisdom Import:: +* Forgetting Wisdom:: +* Wisdom Utilities:: +@end menu + +@c =========> +@node Wisdom Export, Wisdom Import, Wisdom, Wisdom +@subsection Wisdom Export + +@example +int fftw_export_wisdom_to_filename(const char *filename); +void fftw_export_wisdom_to_file(FILE *output_file); +char *fftw_export_wisdom_to_string(void); +void fftw_export_wisdom(void (*write_char)(char c, void *), void *data); +@end example +@findex fftw_export_wisdom +@findex fftw_export_wisdom_to_filename +@findex fftw_export_wisdom_to_file +@findex fftw_export_wisdom_to_string + +These functions allow you to export all currently accumulated wisdom +in a form from which it can be later imported and restored, even +during a separate run of the program. (@xref{Words of Wisdom-Saving +Plans}.) The current store of wisdom is not affected by calling any +of these routines. + +@code{fftw_export_wisdom} exports the wisdom to any output +medium, as specified by the callback function +@code{write_char}. @code{write_char} is a @code{putc}-like function that +writes the character @code{c} to some output; its second parameter is +the @code{data} pointer passed to @code{fftw_export_wisdom}. For +convenience, the following three ``wrapper'' routines are provided: + +@code{fftw_export_wisdom_to_filename} writes wisdom to a file named +@code{filename} (which is created or overwritten), returning @code{1} +on success and @code{0} on failure. A lower-level function, which +requires you to open and close the file yourself (e.g. if you want to +write wisdom to a portion of a larger file) is +@code{fftw_export_wisdom_to_file}. This writes the wisdom to the +current position in @code{output_file}, which should be open with +write permission; upon exit, the file remains open and is positioned +at the end of the wisdom data. + +@code{fftw_export_wisdom_to_string} returns a pointer to a +@code{NULL}-terminated string holding the wisdom data. This string is +dynamically allocated, and it is the responsibility of the caller to +deallocate it with @code{free} when it is no longer needed. + +All of these routines export the wisdom in the same format, which we +will not document here except to say that it is LISP-like ASCII text +that is insensitive to white space. + +@c =========> +@node Wisdom Import, Forgetting Wisdom, Wisdom Export, Wisdom +@subsection Wisdom Import + +@example +int fftw_import_system_wisdom(void); +int fftw_import_wisdom_from_filename(const char *filename); +int fftw_import_wisdom_from_string(const char *input_string); +int fftw_import_wisdom(int (*read_char)(void *), void *data); +@end example +@findex fftw_import_wisdom +@findex fftw_import_system_wisdom +@findex fftw_import_wisdom_from_filename +@findex fftw_import_wisdom_from_file +@findex fftw_import_wisdom_from_string + +These functions import wisdom into a program from data stored by the +@code{fftw_export_wisdom} functions above. (@xref{Words of +Wisdom-Saving Plans}.) The imported wisdom replaces any wisdom +already accumulated by the running program. + +@code{fftw_import_wisdom} imports wisdom from any input medium, as +specified by the callback function @code{read_char}. @code{read_char} is +a @code{getc}-like function that returns the next character in the +input; its parameter is the @code{data} pointer passed to +@code{fftw_import_wisdom}. If the end of the input data is reached +(which should never happen for valid data), @code{read_char} should +return @code{EOF} (as defined in @code{}). For convenience, +the following three ``wrapper'' routines are provided: + +@code{fftw_import_wisdom_from_filename} reads wisdom from a file named +@code{filename}. A lower-level function, which requires you to open +and close the file yourself (e.g. if you want to read wisdom from a +portion of a larger file) is @code{fftw_import_wisdom_from_file}. This +reads wisdom from the current position in @code{input_file} (which +should be open with read permission); upon exit, the file remains +open, but the position of the read pointer is unspecified. + +@code{fftw_import_wisdom_from_string} reads wisdom from the +@code{NULL}-terminated string @code{input_string}. + +@code{fftw_import_system_wisdom} reads wisdom from an +implementation-defined standard file (@code{/etc/fftw/wisdom} on Unix +and GNU systems). +@cindex wisdom, system-wide + + +The return value of these import routines is @code{1} if the wisdom was +read successfully and @code{0} otherwise. Note that, in all of these +functions, any data in the input stream past the end of the wisdom data +is simply ignored. + +@c =========> +@node Forgetting Wisdom, Wisdom Utilities, Wisdom Import, Wisdom +@subsection Forgetting Wisdom + +@example +void fftw_forget_wisdom(void); +@end example +@findex fftw_forget_wisdom + +Calling @code{fftw_forget_wisdom} causes all accumulated @code{wisdom} +to be discarded and its associated memory to be freed. (New +@code{wisdom} can still be gathered subsequently, however.) + +@c =========> +@node Wisdom Utilities, , Forgetting Wisdom, Wisdom +@subsection Wisdom Utilities + +FFTW includes two standalone utility programs that deal with wisdom. We +merely summarize them here, since they come with their own @code{man} +pages for Unix and GNU systems (with HTML versions on our web site). + +The first program is @code{fftw-wisdom} (or @code{fftwf-wisdom} in +single precision, etcetera), which can be used to create a wisdom file +containing plans for any of the transform sizes and types supported by +FFTW. It is preferable to create wisdom directly from your executable +(@pxref{Caveats in Using Wisdom}), but this program is useful for +creating global wisdom files for @code{fftw_import_system_wisdom}. +@cindex fftw-wisdom utility + + +The second program is @code{fftw-wisdom-to-conf}, which takes a wisdom +file as input and produces a @dfn{configuration routine} as output. The +latter is a C subroutine that you can compile and link into your +program, replacing a routine of the same name in the FFTW library, that +determines which parts of FFTW are callable by your program. +@code{fftw-wisdom-to-conf} produces a configuration routine that links +to only those parts of FFTW needed by the saved plans in the wisdom, +greatly reducing the size of statically linked executables (which should +only attempt to create plans corresponding to those in the wisdom, +however). +@cindex fftw-wisdom-to-conf utility +@cindex configuration routines + +@c ------------------------------------------------------------ +@node What FFTW Really Computes, , Wisdom, FFTW Reference +@section What FFTW Really Computes + +In this section, we provide precise mathematical definitions for the +transforms that FFTW computes. These transform definitions are fairly +standard, but some authors follow slightly different conventions for the +normalization of the transform (the constant factor in front) and the +sign of the complex exponent. We begin by presenting the +one-dimensional (1d) transform definitions, and then give the +straightforward extension to multi-dimensional transforms. + +@menu +* The 1d Discrete Fourier Transform (DFT):: +* The 1d Real-data DFT:: +* 1d Real-even DFTs (DCTs):: +* 1d Real-odd DFTs (DSTs):: +* 1d Discrete Hartley Transforms (DHTs):: +* Multi-dimensional Transforms:: +@end menu + +@c =========> +@node The 1d Discrete Fourier Transform (DFT), The 1d Real-data DFT, What FFTW Really Computes, What FFTW Really Computes +@subsection The 1d Discrete Fourier Transform (DFT) + +@cindex discrete Fourier transform +@cindex DFT +The forward (@code{FFTW_FORWARD}) discrete Fourier transform (DFT) of a +1d complex array @math{X} of size @math{n} computes an array @math{Y}, +where: +@tex +$$ +Y_k = \sum_{j = 0}^{n - 1} X_j e^{-2\pi j k \sqrt{-1}/n} \ . +$$ +@end tex +@ifinfo +@center Y[k] = sum for j = 0 to (n - 1) of X[j] * exp(-2 pi j k sqrt(-1)/n) . +@end ifinfo +@html +
    .
    +@end html +The backward (@code{FFTW_BACKWARD}) DFT computes: +@tex +$$ +Y_k = \sum_{j = 0}^{n - 1} X_j e^{2\pi j k \sqrt{-1}/n} \ . +$$ +@end tex +@ifinfo +@center Y[k] = sum for j = 0 to (n - 1) of X[j] * exp(2 pi j k sqrt(-1)/n) . +@end ifinfo +@html +
    .
    +@end html + +@cindex normalization +FFTW computes an unnormalized transform, in that there is no coefficient +in front of the summation in the DFT. In other words, applying the +forward and then the backward transform will multiply the input by +@math{n}. + +@cindex frequency +From above, an @code{FFTW_FORWARD} transform corresponds to a sign of +@math{-1} in the exponent of the DFT. Note also that we use the +standard ``in-order'' output ordering---the @math{k}-th output +corresponds to the frequency @math{k/n} (or @math{k/T}, where @math{T} +is your total sampling period). For those who like to think in terms of +positive and negative frequencies, this means that the positive +frequencies are stored in the first half of the output and the negative +frequencies are stored in backwards order in the second half of the +output. (The frequency @math{-k/n} is the same as the frequency +@math{(n-k)/n}.) + +@c =========> +@node The 1d Real-data DFT, 1d Real-even DFTs (DCTs), The 1d Discrete Fourier Transform (DFT), What FFTW Really Computes +@subsection The 1d Real-data DFT + +The real-input (r2c) DFT in FFTW computes the @emph{forward} transform +@math{Y} of the size @code{n} real array @math{X}, exactly as defined +above, i.e. +@tex +$$ +Y_k = \sum_{j = 0}^{n - 1} X_j e^{-2\pi j k \sqrt{-1}/n} \ . +$$ +@end tex +@ifinfo +@center Y[k] = sum for j = 0 to (n - 1) of X[j] * exp(-2 pi j k sqrt(-1)/n) . +@end ifinfo +@html +
    .
    +@end html +This output array @math{Y} can easily be shown to possess the +``Hermitian'' symmetry +@cindex Hermitian +@tex +$Y_k = Y_{n-k}^*$, +@end tex +@ifinfo +Y[k] = Y[n-k]*, +@end ifinfo +@html +Yk = Yn-k*, +@end html +where we take @math{Y} to be periodic so that +@tex +$Y_n = Y_0$. +@end tex +@ifinfo +Y[n] = Y[0]. +@end ifinfo +@html +Yn = Y0. +@end html + +As a result of this symmetry, half of the output @math{Y} is redundant +(being the complex conjugate of the other half), and so the 1d r2c +transforms only output elements @math{0}@dots{}@math{n/2} of @math{Y} +(@math{n/2+1} complex numbers), where the division by @math{2} is +rounded down. + +Moreover, the Hermitian symmetry implies that +@tex +$Y_0$ +@end tex +@ifinfo +Y[0] +@end ifinfo +@html +Y0 +@end html +and, if @math{n} is even, the +@tex +$Y_{n/2}$ +@end tex +@ifinfo +Y[n/2] +@end ifinfo +@html +Yn/2 +@end html +element, are purely real. So, for the @code{R2HC} r2r transform, these +elements are not stored in the halfcomplex output format. +@cindex r2r +@ctindex R2HC +@cindex halfcomplex format + + +The c2r and @code{H2RC} r2r transforms compute the backward DFT of the +@emph{complex} array @math{X} with Hermitian symmetry, stored in the +r2c/@code{R2HC} output formats, respectively, where the backward +transform is defined exactly as for the complex case: +@tex +$$ +Y_k = \sum_{j = 0}^{n - 1} X_j e^{2\pi j k \sqrt{-1}/n} \ . +$$ +@end tex +@ifinfo +@center Y[k] = sum for j = 0 to (n - 1) of X[j] * exp(2 pi j k sqrt(-1)/n) . +@end ifinfo +@html +
    .
    +@end html +The outputs @code{Y} of this transform can easily be seen to be purely +real, and are stored as an array of real numbers. + +@cindex normalization +Like FFTW's complex DFT, these transforms are unnormalized. In other +words, applying the real-to-complex (forward) and then the +complex-to-real (backward) transform will multiply the input by +@math{n}. + +@c =========> +@node 1d Real-even DFTs (DCTs), 1d Real-odd DFTs (DSTs), The 1d Real-data DFT, What FFTW Really Computes +@subsection 1d Real-even DFTs (DCTs) + +The Real-even symmetry DFTs in FFTW are exactly equivalent to the unnormalized +forward (and backward) DFTs as defined above, where the input array +@math{X} of length @math{N} is purely real and is also @dfn{even} symmetry. In +this case, the output array is likewise real and even symmetry. +@cindex real-even DFT +@cindex REDFT + + +@ctindex REDFT00 +For the case of @code{REDFT00}, this even symmetry means that +@tex +$X_j = X_{N-j}$, +@end tex +@ifinfo +X[j] = X[N-j], +@end ifinfo +@html +Xj = XN-j, +@end html +where we take @math{X} to be periodic so that +@tex +$X_N = X_0$. +@end tex +@ifinfo +X[N] = X[0]. +@end ifinfo +@html +XN = X0. +@end html +Because of this redundancy, only the first @math{n} real numbers are +actually stored, where @math{N = 2(n-1)}. + +The proper definition of even symmetry for @code{REDFT10}, +@code{REDFT01}, and @code{REDFT11} transforms is somewhat more intricate +because of the shifts by @math{1/2} of the input and/or output, although +the corresponding boundary conditions are given in @ref{Real even/odd +DFTs (cosine/sine transforms)}. Because of the even symmetry, however, +the sine terms in the DFT all cancel and the remaining cosine terms are +written explicitly below. This formulation often leads people to call +such a transform a @dfn{discrete cosine transform} (DCT), although it is +really just a special case of the DFT. +@cindex discrete cosine transform +@cindex DCT + + +In each of the definitions below, we transform a real array @math{X} of +length @math{n} to a real array @math{Y} of length @math{n}: + +@subsubheading REDFT00 (DCT-I) +@ctindex REDFT00 +An @code{REDFT00} transform (type-I DCT) in FFTW is defined by: +@tex +$$ +Y_k = X_0 + (-1)^k X_{n-1} + + 2 \sum_{j=1}^{n-2} X_j \cos [ \pi j k / (n-1)]. +$$ +@end tex +@ifinfo +Y[k] = X[0] + (-1)^k X[n-1] + 2 (sum for j = 1 to n-2 of X[j] cos(pi jk /(n-1))). +@end ifinfo +@html +
    .
    +@end html +Note that this transform is not defined for @math{n=1}. For @math{n=2}, +the summation term above is dropped as you might expect. + +@subsubheading REDFT10 (DCT-II) +@ctindex REDFT10 +An @code{REDFT10} transform (type-II DCT, sometimes called ``the'' DCT) in FFTW is defined by: +@tex +$$ +Y_k = 2 \sum_{j=0}^{n-1} X_j \cos [\pi (j+1/2) k / n]. +$$ +@end tex +@ifinfo +Y[k] = 2 (sum for j = 0 to n-1 of X[j] cos(pi (j+1/2) k / n)). +@end ifinfo +@html +
    .
    +@end html + +@subsubheading REDFT01 (DCT-III) +@ctindex REDFT01 +An @code{REDFT01} transform (type-III DCT) in FFTW is defined by: +@tex +$$ +Y_k = X_0 + 2 \sum_{j=1}^{n-1} X_j \cos [\pi j (k+1/2) / n]. +$$ +@end tex +@ifinfo +Y[k] = X[0] + 2 (sum for j = 1 to n-1 of X[j] cos(pi j (k+1/2) / n)). +@end ifinfo +@html +
    .
    +@end html +In the case of @math{n=1}, this reduces to +@tex +$Y_0 = X_0$. +@end tex +@ifinfo +Y[0] = X[0]. +@end ifinfo +@html +Y0 = X0. +@end html +Up to a scale factor (see below), this is the inverse of @code{REDFT10} (``the'' DCT), and so the @code{REDFT01} (DCT-III) is sometimes called the ``IDCT''. +@cindex IDCT + +@subsubheading REDFT11 (DCT-IV) +@ctindex REDFT11 +An @code{REDFT11} transform (type-IV DCT) in FFTW is defined by: +@tex +$$ +Y_k = 2 \sum_{j=0}^{n-1} X_j \cos [\pi (j+1/2) (k+1/2) / n]. +$$ +@end tex +@ifinfo +Y[k] = 2 (sum for j = 0 to n-1 of X[j] cos(pi (j+1/2) (k+1/2) / n)). +@end ifinfo +@html +
    .
    +@end html + +@subsubheading Inverses and Normalization + +These definitions correspond directly to the unnormalized DFTs used +elsewhere in FFTW (hence the factors of @math{2} in front of the +summations). The unnormalized inverse of @code{REDFT00} is +@code{REDFT00}, of @code{REDFT10} is @code{REDFT01} and vice versa, and +of @code{REDFT11} is @code{REDFT11}. Each unnormalized inverse results +in the original array multiplied by @math{N}, where @math{N} is the +@emph{logical} DFT size. For @code{REDFT00}, @math{N=2(n-1)} (note that +@math{n=1} is not defined); otherwise, @math{N=2n}. +@cindex normalization + + +In defining the discrete cosine transform, some authors also include +additional factors of +@ifinfo +sqrt(2) +@end ifinfo +@html +√2 +@end html +@tex +$\sqrt{2}$ +@end tex +(or its inverse) multiplying selected inputs and/or outputs. This is a +mostly cosmetic change that makes the transform orthogonal, but +sacrifices the direct equivalence to a symmetric DFT. + +@c =========> +@node 1d Real-odd DFTs (DSTs), 1d Discrete Hartley Transforms (DHTs), 1d Real-even DFTs (DCTs), What FFTW Really Computes +@subsection 1d Real-odd DFTs (DSTs) + +The Real-odd symmetry DFTs in FFTW are exactly equivalent to the unnormalized +forward (and backward) DFTs as defined above, where the input array +@math{X} of length @math{N} is purely real and is also @dfn{odd} symmetry. In +this case, the output is odd symmetry and purely imaginary. +@cindex real-odd DFT +@cindex RODFT + + +@ctindex RODFT00 +For the case of @code{RODFT00}, this odd symmetry means that +@tex +$X_j = -X_{N-j}$, +@end tex +@ifinfo +X[j] = -X[N-j], +@end ifinfo +@html +Xj = -XN-j, +@end html +where we take @math{X} to be periodic so that +@tex +$X_N = X_0$. +@end tex +@ifinfo +X[N] = X[0]. +@end ifinfo +@html +XN = X0. +@end html +Because of this redundancy, only the first @math{n} real numbers +starting at @math{j=1} are actually stored (the @math{j=0} element is +zero), where @math{N = 2(n+1)}. + +The proper definition of odd symmetry for @code{RODFT10}, +@code{RODFT01}, and @code{RODFT11} transforms is somewhat more intricate +because of the shifts by @math{1/2} of the input and/or output, although +the corresponding boundary conditions are given in @ref{Real even/odd +DFTs (cosine/sine transforms)}. Because of the odd symmetry, however, +the cosine terms in the DFT all cancel and the remaining sine terms are +written explicitly below. This formulation often leads people to call +such a transform a @dfn{discrete sine transform} (DST), although it is +really just a special case of the DFT. +@cindex discrete sine transform +@cindex DST + + +In each of the definitions below, we transform a real array @math{X} of +length @math{n} to a real array @math{Y} of length @math{n}: + +@subsubheading RODFT00 (DST-I) +@ctindex RODFT00 +An @code{RODFT00} transform (type-I DST) in FFTW is defined by: +@tex +$$ +Y_k = 2 \sum_{j=0}^{n-1} X_j \sin [ \pi (j+1) (k+1) / (n+1)]. +$$ +@end tex +@ifinfo +Y[k] = 2 (sum for j = 0 to n-1 of X[j] sin(pi (j+1)(k+1) / (n+1))). +@end ifinfo +@html +
    .
    +@end html + +@subsubheading RODFT10 (DST-II) +@ctindex RODFT10 +An @code{RODFT10} transform (type-II DST) in FFTW is defined by: +@tex +$$ +Y_k = 2 \sum_{j=0}^{n-1} X_j \sin [\pi (j+1/2) (k+1) / n]. +$$ +@end tex +@ifinfo +Y[k] = 2 (sum for j = 0 to n-1 of X[j] sin(pi (j+1/2) (k+1) / n)). +@end ifinfo +@html +
    .
    +@end html + +@subsubheading RODFT01 (DST-III) +@ctindex RODFT01 +An @code{RODFT01} transform (type-III DST) in FFTW is defined by: +@tex +$$ +Y_k = (-1)^k X_{n-1} + 2 \sum_{j=0}^{n-2} X_j \sin [\pi (j+1) (k+1/2) / n]. +$$ +@end tex +@ifinfo +Y[k] = (-1)^k X[n-1] + 2 (sum for j = 0 to n-2 of X[j] sin(pi (j+1) (k+1/2) / n)). +@end ifinfo +@html +
    .
    +@end html +In the case of @math{n=1}, this reduces to +@tex +$Y_0 = X_0$. +@end tex +@ifinfo +Y[0] = X[0]. +@end ifinfo +@html +Y0 = X0. +@end html + +@subsubheading RODFT11 (DST-IV) +@ctindex RODFT11 +An @code{RODFT11} transform (type-IV DST) in FFTW is defined by: +@tex +$$ +Y_k = 2 \sum_{j=0}^{n-1} X_j \sin [\pi (j+1/2) (k+1/2) / n]. +$$ +@end tex +@ifinfo +Y[k] = 2 (sum for j = 0 to n-1 of X[j] sin(pi (j+1/2) (k+1/2) / n)). +@end ifinfo +@html +
    .
    +@end html + +@subsubheading Inverses and Normalization + +These definitions correspond directly to the unnormalized DFTs used +elsewhere in FFTW (hence the factors of @math{2} in front of the +summations). The unnormalized inverse of @code{RODFT00} is +@code{RODFT00}, of @code{RODFT10} is @code{RODFT01} and vice versa, and +of @code{RODFT11} is @code{RODFT11}. Each unnormalized inverse results +in the original array multiplied by @math{N}, where @math{N} is the +@emph{logical} DFT size. For @code{RODFT00}, @math{N=2(n+1)}; +otherwise, @math{N=2n}. +@cindex normalization + + +In defining the discrete sine transform, some authors also include +additional factors of +@ifinfo +sqrt(2) +@end ifinfo +@html +√2 +@end html +@tex +$\sqrt{2}$ +@end tex +(or its inverse) multiplying selected inputs and/or outputs. This is a +mostly cosmetic change that makes the transform orthogonal, but +sacrifices the direct equivalence to an antisymmetric DFT. + +@c =========> +@node 1d Discrete Hartley Transforms (DHTs), Multi-dimensional Transforms, 1d Real-odd DFTs (DSTs), What FFTW Really Computes +@subsection 1d Discrete Hartley Transforms (DHTs) + +@cindex discrete Hartley transform +@cindex DHT +The discrete Hartley transform (DHT) of a 1d real array @math{X} of size +@math{n} computes a real array @math{Y} of the same size, where: +@tex +$$ +Y_k = \sum_{j = 0}^{n - 1} X_j [ \cos(2\pi j k / n) + \sin(2\pi j k / n)]. +$$ +@end tex +@ifinfo +@center Y[k] = sum for j = 0 to (n - 1) of X[j] * [cos(2 pi j k / n) + sin(2 pi j k / n)]. +@end ifinfo +@html +
    .
    +@end html + +@cindex normalization +FFTW computes an unnormalized transform, in that there is no coefficient +in front of the summation in the DHT. In other words, applying the +transform twice (the DHT is its own inverse) will multiply the input by +@math{n}. + +@c =========> +@node Multi-dimensional Transforms, , 1d Discrete Hartley Transforms (DHTs), What FFTW Really Computes +@subsection Multi-dimensional Transforms + +The multi-dimensional transforms of FFTW, in general, compute simply the +separable product of the given 1d transform along each dimension of the +array. Since each of these transforms is unnormalized, computing the +forward followed by the backward/inverse multi-dimensional transform +will result in the original array scaled by the product of the +normalization factors for each dimension (e.g. the product of the +dimension sizes, for a multi-dimensional DFT). + +@tex +As an explicit example, consider the following exact mathematical +definition of our multi-dimensional DFT. Let $X$ be a $d$-dimensional +complex array whose elements are $X[j_1, j_2, \ldots, j_d]$, where $0 +\leq j_s < n_s$ for all~$s \in \{ 1, 2, \ldots, d \}$. Let also +$\omega_s = e^{2\pi \sqrt{-1}/n_s}$, for all ~$s \in \{ 1, 2, \ldots, d +\}$. + +The forward transform computes a complex array~$Y$, whose +structure is the same as that of~$X$, defined by + +$$ +Y[k_1, k_2, \ldots, k_d] = + \sum_{j_1 = 0}^{n_1 - 1} + \sum_{j_2 = 0}^{n_2 - 1} + \cdots + \sum_{j_d = 0}^{n_d - 1} + X[j_1, j_2, \ldots, j_d] + \omega_1^{-j_1 k_1} + \omega_2^{-j_2 k_2} + \cdots + \omega_d^{-j_d k_d} \ . +$$ + +The backward transform computes +$$ +Y[k_1, k_2, \ldots, k_d] = + \sum_{j_1 = 0}^{n_1 - 1} + \sum_{j_2 = 0}^{n_2 - 1} + \cdots + \sum_{j_d = 0}^{n_d - 1} + X[j_1, j_2, \ldots, j_d] + \omega_1^{j_1 k_1} + \omega_2^{j_2 k_2} + \cdots + \omega_d^{j_d k_d} \ . +$$ + +Computing the forward transform followed by the backward transform +will multiply the array by $\prod_{s=1}^{d} n_d$. +@end tex + +@cindex r2c +The definition of FFTW's multi-dimensional DFT of real data (r2c) +deserves special attention. In this case, we logically compute the full +multi-dimensional DFT of the input data; since the input data are purely +real, the output data have the Hermitian symmetry and therefore only one +non-redundant half need be stored. More specifically, for an @ndims multi-dimensional real-input DFT, the full (logical) complex output array +@tex +$Y[k_0, k_1, \ldots, k_{d-1}]$ +@end tex +@html +Y[k0, k1, ..., +kd-1] +@end html +@ifinfo +Y[k[0], k[1], ..., k[d-1]] +@end ifinfo +has the symmetry: +@tex +$$ +Y[k_0, k_1, \ldots, k_{d-1}] = Y[n_0 - k_0, n_1 - k_1, \ldots, n_{d-1} - k_{d-1}]^* +$$ +@end tex +@html +Y[k0, k1, ..., +kd-1] = Y[n0 - +k0, n1 - k1, ..., +nd-1 - kd-1]* +@end html +@ifinfo +Y[k[0], k[1], ..., k[d-1]] = Y[n[0] - k[0], n[1] - k[1], ..., n[d-1] - k[d-1]]* +@end ifinfo +(where each dimension is periodic). Because of this symmetry, we only +store the +@tex +$k_{d-1} = 0 \cdots n_{d-1}/2$ +@end tex +@html +kd-1 = 0...nd-1/2+1 +@end html +@ifinfo +k[d-1] = 0...n[d-1]/2 +@end ifinfo +elements of the @emph{last} dimension (division by @math{2} is rounded +down). (We could instead have cut any other dimension in half, but the +last dimension proved computationally convenient.) This results in the +peculiar array format described in more detail by @ref{Real-data DFT +Array Format}. + +The multi-dimensional c2r transform is simply the unnormalized inverse +of the r2c transform. i.e. it is the same as FFTW's complex backward +multi-dimensional DFT, operating on a Hermitian input array in the +peculiar format mentioned above and outputting a real array (since the +DFT output is purely real). + +We should remind the user that the separable product of 1d transforms +along each dimension, as computed by FFTW, is not always the same thing +as the usual multi-dimensional transform. A multi-dimensional +@code{R2HC} (or @code{HC2R}) transform is not identical to the +multi-dimensional DFT, requiring some post-processing to combine the +requisite real and imaginary parts, as was described in @ref{The +Halfcomplex-format DFT}. Likewise, FFTW's multidimensional +@code{FFTW_DHT} r2r transform is not the same thing as the logical +multi-dimensional discrete Hartley transform defined in the literature, +as discussed in @ref{The Discrete Hartley Transform}. + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/rfftwnd-for-html.png Binary file fft/fftw/fftw-3.3.4/doc/rfftwnd-for-html.png has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/rfftwnd.eps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/rfftwnd.eps Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2258 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Title: ./rfftwnd.fig +%%Creator: fig2dev Version 3.2 Patchlevel 5d +%%CreationDate: Thu Jan 16 10:25:46 2014 +%%BoundingBox: 0 0 270 405 +%Magnification: 0.7000 +%%EndComments +%%BeginProlog +/$F2psDict 200 dict def +$F2psDict begin +$F2psDict /mtrx matrix put +/col-1 {0 setgray} bind def +/col0 {0.000 0.000 0.000 srgb} bind def +/col1 {0.000 0.000 1.000 srgb} bind def +/col2 {0.000 1.000 0.000 srgb} bind def +/col3 {0.000 1.000 1.000 srgb} bind def +/col4 {1.000 0.000 0.000 srgb} bind def +/col5 {1.000 0.000 1.000 srgb} bind def +/col6 {1.000 1.000 0.000 srgb} bind def +/col7 {1.000 1.000 1.000 srgb} bind def +/col8 {0.000 0.000 0.560 srgb} bind def +/col9 {0.000 0.000 0.690 srgb} bind def +/col10 {0.000 0.000 0.820 srgb} bind def +/col11 {0.530 0.810 1.000 srgb} bind def +/col12 {0.000 0.560 0.000 srgb} bind def +/col13 {0.000 0.690 0.000 srgb} bind def +/col14 {0.000 0.820 0.000 srgb} bind def +/col15 {0.000 0.560 0.560 srgb} bind def +/col16 {0.000 0.690 0.690 srgb} bind def +/col17 {0.000 0.820 0.820 srgb} bind def +/col18 {0.560 0.000 0.000 srgb} bind def +/col19 {0.690 0.000 0.000 srgb} bind def +/col20 {0.820 0.000 0.000 srgb} bind def +/col21 {0.560 0.000 0.560 srgb} bind def +/col22 {0.690 0.000 0.690 srgb} bind def +/col23 {0.820 0.000 0.820 srgb} bind def +/col24 {0.500 0.190 0.000 srgb} bind def +/col25 {0.630 0.250 0.000 srgb} bind def +/col26 {0.750 0.380 0.000 srgb} bind def +/col27 {1.000 0.500 0.500 srgb} bind def +/col28 {1.000 0.630 0.630 srgb} bind def +/col29 {1.000 0.750 0.750 srgb} bind def +/col30 {1.000 0.880 0.880 srgb} bind def +/col31 {1.000 0.840 0.000 srgb} bind def +/col32 {0.475 0.490 0.475 srgb} bind def +/col33 {0.937 0.922 0.937 srgb} bind def +/col34 {0.906 0.188 0.125 srgb} bind def +/col35 {0.969 0.557 0.525 srgb} bind def +/col36 {0.412 0.588 0.780 srgb} bind def +/col37 {0.525 0.667 0.843 srgb} bind def +/col38 {0.875 0.859 0.000 srgb} bind def + +end + +/cp {closepath} bind def +/ef {eofill} bind def +/gr {grestore} bind def +/gs {gsave} bind def +/sa {save} bind def +/rs {restore} bind def +/l {lineto} bind def +/m {moveto} bind def +/rm {rmoveto} bind def +/n {newpath} bind def +/s {stroke} bind def +/sh {show} bind def +/slc {setlinecap} bind def +/slj {setlinejoin} bind def +/slw {setlinewidth} bind def +/srgb {setrgbcolor} bind def +/rot {rotate} bind def +/sc {scale} bind def +/sd {setdash} bind def +/ff {findfont} bind def +/sf {setfont} bind def +/scf {scalefont} bind def +/sw {stringwidth} bind def +/tr {translate} bind def +/tnt {dup dup currentrgbcolor + 4 -2 roll dup 1 exch sub 3 -1 roll mul add + 4 -2 roll dup 1 exch sub 3 -1 roll mul add + 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb} + bind def +/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul + 4 -2 roll mul srgb} bind def +/reencdict 12 dict def /ReEncode { reencdict begin +/newcodesandnames exch def /newfontname exch def /basefontname exch def +/basefontdict basefontname findfont def /newfont basefontdict maxlength dict def +basefontdict { exch dup /FID ne { dup /Encoding eq +{ exch dup length array copy newfont 3 1 roll put } +{ exch newfont 3 1 roll put } ifelse } { pop pop } ifelse } forall +newfont /FontName newfontname put newcodesandnames aload pop +128 1 255 { newfont /Encoding get exch /.notdef put } for +newcodesandnames length 2 idiv { newfont /Encoding get 3 1 roll put } repeat +newfontname newfont definefont pop end } def +/isovec [ +8#055 /minus 8#200 /grave 8#201 /acute 8#202 /circumflex 8#203 /tilde +8#204 /macron 8#205 /breve 8#206 /dotaccent 8#207 /dieresis +8#210 /ring 8#211 /cedilla 8#212 /hungarumlaut 8#213 /ogonek 8#214 /caron +8#220 /dotlessi 8#230 /oe 8#231 /OE +8#240 /space 8#241 /exclamdown 8#242 /cent 8#243 /sterling +8#244 /currency 8#245 /yen 8#246 /brokenbar 8#247 /section 8#250 /dieresis +8#251 /copyright 8#252 /ordfeminine 8#253 /guillemotleft 8#254 /logicalnot +8#255 /hyphen 8#256 /registered 8#257 /macron 8#260 /degree 8#261 /plusminus +8#262 /twosuperior 8#263 /threesuperior 8#264 /acute 8#265 /mu 8#266 /paragraph +8#267 /periodcentered 8#270 /cedilla 8#271 /onesuperior 8#272 /ordmasculine +8#273 /guillemotright 8#274 /onequarter 8#275 /onehalf +8#276 /threequarters 8#277 /questiondown 8#300 /Agrave 8#301 /Aacute +8#302 /Acircumflex 8#303 /Atilde 8#304 /Adieresis 8#305 /Aring +8#306 /AE 8#307 /Ccedilla 8#310 /Egrave 8#311 /Eacute +8#312 /Ecircumflex 8#313 /Edieresis 8#314 /Igrave 8#315 /Iacute +8#316 /Icircumflex 8#317 /Idieresis 8#320 /Eth 8#321 /Ntilde 8#322 /Ograve +8#323 /Oacute 8#324 /Ocircumflex 8#325 /Otilde 8#326 /Odieresis 8#327 /multiply +8#330 /Oslash 8#331 /Ugrave 8#332 /Uacute 8#333 /Ucircumflex +8#334 /Udieresis 8#335 /Yacute 8#336 /Thorn 8#337 /germandbls 8#340 /agrave +8#341 /aacute 8#342 /acircumflex 8#343 /atilde 8#344 /adieresis 8#345 /aring +8#346 /ae 8#347 /ccedilla 8#350 /egrave 8#351 /eacute +8#352 /ecircumflex 8#353 /edieresis 8#354 /igrave 8#355 /iacute +8#356 /icircumflex 8#357 /idieresis 8#360 /eth 8#361 /ntilde 8#362 /ograve +8#363 /oacute 8#364 /ocircumflex 8#365 /otilde 8#366 /odieresis 8#367 /divide +8#370 /oslash 8#371 /ugrave 8#372 /uacute 8#373 /ucircumflex +8#374 /udieresis 8#375 /yacute 8#376 /thorn 8#377 /ydieresis] def +/Helvetica /Helvetica-iso isovec ReEncode +/Helvetica-Bold /Helvetica-Bold-iso isovec ReEncode +/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def +/$F2psEnd {$F2psEnteredState restore end} def + +/pageheader { +save +newpath 0 405 moveto 0 0 lineto 270 0 lineto 270 405 lineto closepath clip newpath +-2.5 407.2 translate +1 -1 scale +$F2psBegin +10 setmiterlimit +0 slj 0 slc + 0.04200 0.04200 sc +} bind def +/pagefooter { +$F2psEnd +restore +} bind def +%%EndProlog +pageheader +% +% Fig objects follow +% +% +% here starts figure with depth 998 +% Polyline +0 slj +0 slc +0.000 slw +n 1221 7280 m 6435 7280 l 6435 9676 l 1221 9676 l + 1221 7280 l cp gs col7 1.00 shd ef gr +% Polyline +7.500 slw +n 1221 7280 m 6435 7280 l 6435 9676 l 1221 9676 l + 1221 7280 l cp gs col32 s gr +% Polyline +0.000 slw +n 1221 7280 m 1620 7280 l 1620 7656 l 1221 7656 l + 1221 7280 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 1221 7280 m 1620 7280 l 1620 7656 l 1221 7656 l + 1221 7280 l cp gs col32 s gr +% Polyline +0.000 slw +n 1620 7280 m 2019 7280 l 2019 7656 l 1620 7656 l + 1620 7280 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 1620 7280 m 2019 7280 l 2019 7656 l 1620 7656 l + 1620 7280 l cp gs col32 s gr +% Polyline +0.000 slw +n 2019 7280 m 2418 7280 l 2418 7656 l 2019 7656 l + 2019 7280 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 2019 7280 m 2418 7280 l 2418 7656 l 2019 7656 l + 2019 7280 l cp gs col32 s gr +% Polyline +0.000 slw +n 2418 7280 m 2817 7280 l 2817 7656 l 2418 7656 l + 2418 7280 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 2418 7280 m 2817 7280 l 2817 7656 l 2418 7656 l + 2418 7280 l cp gs col32 s gr +% Polyline +0.000 slw +n 4038 7280 m 4438 7280 l 4438 7656 l 4038 7656 l + 4038 7280 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4038 7280 m 4438 7280 l 4438 7656 l 4038 7656 l + 4038 7280 l cp gs col32 s gr +% Polyline +0.000 slw +n 4438 7280 m 4837 7280 l 4837 7656 l 4438 7656 l + 4438 7280 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4438 7280 m 4837 7280 l 4837 7656 l 4438 7656 l + 4438 7280 l cp gs col32 s gr +% Polyline +0.000 slw +n 4837 7280 m 5236 7280 l 5236 7656 l 4837 7656 l + 4837 7280 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4837 7280 m 5236 7280 l 5236 7656 l 4837 7656 l + 4837 7280 l cp gs col32 s gr +% Polyline +0.000 slw +n 5236 7280 m 5635 7280 l 5635 7656 l 5236 7656 l + 5236 7280 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 5236 7280 m 5635 7280 l 5635 7656 l 5236 7656 l + 5236 7280 l cp gs col32 s gr +% Polyline +0.000 slw +n 1221 7656 m 1620 7656 l 1620 8032 l 1221 8032 l + 1221 7656 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 1221 7656 m 1620 7656 l 1620 8032 l 1221 8032 l + 1221 7656 l cp gs col32 s gr +% Polyline +0.000 slw +n 1620 7656 m 2019 7656 l 2019 8032 l 1620 8032 l + 1620 7656 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 1620 7656 m 2019 7656 l 2019 8032 l 1620 8032 l + 1620 7656 l cp gs col32 s gr +% Polyline +0.000 slw +n 2019 7656 m 2418 7656 l 2418 8032 l 2019 8032 l + 2019 7656 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 2019 7656 m 2418 7656 l 2418 8032 l 2019 8032 l + 2019 7656 l cp gs col32 s gr +% Polyline +0.000 slw +n 2418 7656 m 2817 7656 l 2817 8032 l 2418 8032 l + 2418 7656 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 2418 7656 m 2817 7656 l 2817 8032 l 2418 8032 l + 2418 7656 l cp gs col32 s gr +% Polyline +0.000 slw +n 4038 7656 m 4438 7656 l 4438 8032 l 4038 8032 l + 4038 7656 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4038 7656 m 4438 7656 l 4438 8032 l 4038 8032 l + 4038 7656 l cp gs col32 s gr +% Polyline +0.000 slw +n 4438 7656 m 4837 7656 l 4837 8032 l 4438 8032 l + 4438 7656 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4438 7656 m 4837 7656 l 4837 8032 l 4438 8032 l + 4438 7656 l cp gs col32 s gr +% Polyline +0.000 slw +n 4837 7656 m 5236 7656 l 5236 8032 l 4837 8032 l + 4837 7656 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4837 7656 m 5236 7656 l 5236 8032 l 4837 8032 l + 4837 7656 l cp gs col32 s gr +% Polyline +0.000 slw +n 5236 7656 m 5635 7656 l 5635 8032 l 5236 8032 l + 5236 7656 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 5236 7656 m 5635 7656 l 5635 8032 l 5236 8032 l + 5236 7656 l cp gs col32 s gr +% Polyline +0.000 slw +n 1221 8924 m 1620 8924 l 1620 9300 l 1221 9300 l + 1221 8924 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 1221 8924 m 1620 8924 l 1620 9300 l 1221 9300 l + 1221 8924 l cp gs col32 s gr +% Polyline +0.000 slw +n 1620 8924 m 2019 8924 l 2019 9300 l 1620 9300 l + 1620 8924 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 1620 8924 m 2019 8924 l 2019 9300 l 1620 9300 l + 1620 8924 l cp gs col32 s gr +% Polyline +0.000 slw +n 2019 8924 m 2418 8924 l 2418 9300 l 2019 9300 l + 2019 8924 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 2019 8924 m 2418 8924 l 2418 9300 l 2019 9300 l + 2019 8924 l cp gs col32 s gr +% Polyline +0.000 slw +n 2418 8924 m 2817 8924 l 2817 9300 l 2418 9300 l + 2418 8924 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 2418 8924 m 2817 8924 l 2817 9300 l 2418 9300 l + 2418 8924 l cp gs col32 s gr +% Polyline +0.000 slw +n 4038 8924 m 4438 8924 l 4438 9300 l 4038 9300 l + 4038 8924 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4038 8924 m 4438 8924 l 4438 9300 l 4038 9300 l + 4038 8924 l cp gs col32 s gr +% Polyline +0.000 slw +n 4438 8924 m 4837 8924 l 4837 9300 l 4438 9300 l + 4438 8924 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4438 8924 m 4837 8924 l 4837 9300 l 4438 9300 l + 4438 8924 l cp gs col32 s gr +% Polyline +0.000 slw +n 4837 8924 m 5236 8924 l 5236 9300 l 4837 9300 l + 4837 8924 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4837 8924 m 5236 8924 l 5236 9300 l 4837 9300 l + 4837 8924 l cp gs col32 s gr +% Polyline +0.000 slw +n 5236 8924 m 5635 8924 l 5635 9300 l 5236 9300 l + 5236 8924 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 5236 8924 m 5635 8924 l 5635 9300 l 5236 9300 l + 5236 8924 l cp gs col32 s gr +% Polyline +0.000 slw +n 1221 9300 m 1620 9300 l 1620 9676 l 1221 9676 l + 1221 9300 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 1221 9300 m 1620 9300 l 1620 9676 l 1221 9676 l + 1221 9300 l cp gs col32 s gr +% Polyline +0.000 slw +n 1620 9300 m 2019 9300 l 2019 9676 l 1620 9676 l + 1620 9300 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 1620 9300 m 2019 9300 l 2019 9676 l 1620 9676 l + 1620 9300 l cp gs col32 s gr +% Polyline +0.000 slw +n 2019 9300 m 2418 9300 l 2418 9676 l 2019 9676 l + 2019 9300 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 2019 9300 m 2418 9300 l 2418 9676 l 2019 9676 l + 2019 9300 l cp gs col32 s gr +% Polyline +0.000 slw +n 2418 9300 m 2817 9300 l 2817 9676 l 2418 9676 l + 2418 9300 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 2418 9300 m 2817 9300 l 2817 9676 l 2418 9676 l + 2418 9300 l cp gs col32 s gr +% Polyline +0.000 slw +n 4038 9300 m 4438 9300 l 4438 9676 l 4038 9676 l + 4038 9300 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4038 9300 m 4438 9300 l 4438 9676 l 4038 9676 l + 4038 9300 l cp gs col32 s gr +% Polyline +0.000 slw +n 4438 9300 m 4837 9300 l 4837 9676 l 4438 9676 l + 4438 9300 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4438 9300 m 4837 9300 l 4837 9676 l 4438 9676 l + 4438 9300 l cp gs col32 s gr +% Polyline +0.000 slw +n 4837 9300 m 5236 9300 l 5236 9676 l 4837 9676 l + 4837 9300 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4837 9300 m 5236 9300 l 5236 9676 l 4837 9676 l + 4837 9300 l cp gs col32 s gr +% Polyline +0.000 slw +n 5236 9300 m 5635 9300 l 5635 9676 l 5236 9676 l + 5236 9300 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 5236 9300 m 5635 9300 l 5635 9676 l 5236 9676 l + 5236 9300 l cp gs col32 s gr +/Helvetica-iso ff 225.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 225.00 scf sf +2064 7726 m +gs 1 -1 sc (...) col0 sh gr +% Polyline +n 2819 7658 m + 2869 7658 l gs col32 s gr +% Polyline +n 2952 7658 m + 3002 7658 l gs col32 s gr +% Polyline +n 3085 7658 m + 3135 7658 l gs col32 s gr +% Polyline +n 3219 7658 m + 3252 7658 l gs col32 s gr +% Polyline +n 2819 8033 m + 2869 8033 l gs col32 s gr +% Polyline +n 2952 8033 m + 3002 8033 l gs col32 s gr +% Polyline +n 3085 8033 m + 3135 8033 l gs col32 s gr +% Polyline +n 3219 8033 m + 3252 8033 l gs col32 s gr +% Polyline +n 2819 8025 m + 2819 8075 l gs col32 s gr +% Polyline +n 2819 8158 m + 2819 8208 l gs col32 s gr +% Polyline +n 2819 8291 m + 2819 8341 l gs col32 s gr +% Polyline +n 2419 8025 m + 2419 8075 l gs col32 s gr +% Polyline +n 2419 8158 m + 2419 8208 l gs col32 s gr +% Polyline +n 2419 8291 m + 2419 8341 l gs col32 s gr +% Polyline +n 2019 8025 m + 2019 8075 l gs col32 s gr +% Polyline +n 2019 8158 m + 2019 8208 l gs col32 s gr +% Polyline +n 2019 8291 m + 2019 8341 l gs col32 s gr +% Polyline +n 1619 8025 m + 1619 8075 l gs col32 s gr +% Polyline +n 1619 8158 m + 1619 8208 l gs col32 s gr +% Polyline +n 1619 8291 m + 1619 8341 l gs col32 s gr +% Polyline +n 4036 7658 m + 3986 7658 l gs col32 s gr +% Polyline +n 3902 7658 m + 3852 7658 l gs col32 s gr +% Polyline +n 3769 7658 m + 3719 7658 l gs col32 s gr +% Polyline +n 3636 7658 m + 3602 7658 l gs col32 s gr +% Polyline +n 4036 8033 m + 3986 8033 l gs col32 s gr +% Polyline +n 3902 8033 m + 3852 8033 l gs col32 s gr +% Polyline +n 3769 8033 m + 3719 8033 l gs col32 s gr +% Polyline +n 3636 8033 m + 3602 8033 l gs col32 s gr +% Polyline +n 4035 8025 m + 4035 8075 l gs col32 s gr +% Polyline +n 4035 8158 m + 4035 8208 l gs col32 s gr +% Polyline +n 4035 8291 m + 4035 8341 l gs col32 s gr +% Polyline +n 4435 8025 m + 4435 8075 l gs col32 s gr +% Polyline +n 4435 8158 m + 4435 8208 l gs col32 s gr +% Polyline +n 4435 8291 m + 4435 8341 l gs col32 s gr +% Polyline +n 4835 8025 m + 4835 8075 l gs col32 s gr +% Polyline +n 4835 8158 m + 4835 8208 l gs col32 s gr +% Polyline +n 4835 8291 m + 4835 8341 l gs col32 s gr +% Polyline +n 5235 8025 m + 5235 8075 l gs col32 s gr +% Polyline +n 5235 8158 m + 5235 8208 l gs col32 s gr +% Polyline +n 5235 8291 m + 5235 8341 l gs col32 s gr +% Polyline +n 4036 9300 m + 3986 9300 l gs col32 s gr +% Polyline +n 3902 9300 m + 3852 9300 l gs col32 s gr +% Polyline +n 3769 9300 m + 3719 9300 l gs col32 s gr +% Polyline +n 3636 9300 m + 3602 9300 l gs col32 s gr +% Polyline +n 4036 8925 m + 3986 8925 l gs col32 s gr +% Polyline +n 3902 8925 m + 3852 8925 l gs col32 s gr +% Polyline +n 3769 8925 m + 3719 8925 l gs col32 s gr +% Polyline +n 3636 8925 m + 3602 8925 l gs col32 s gr +% Polyline +n 4035 8933 m + 4035 8883 l gs col32 s gr +% Polyline +n 4035 8800 m + 4035 8750 l gs col32 s gr +% Polyline +n 4035 8666 m + 4035 8616 l gs col32 s gr +% Polyline +n 4435 8933 m + 4435 8883 l gs col32 s gr +% Polyline +n 4435 8800 m + 4435 8750 l gs col32 s gr +% Polyline +n 4435 8666 m + 4435 8616 l gs col32 s gr +% Polyline +n 4835 8933 m + 4835 8883 l gs col32 s gr +% Polyline +n 4835 8800 m + 4835 8750 l gs col32 s gr +% Polyline +n 4835 8666 m + 4835 8616 l gs col32 s gr +% Polyline +n 5235 8933 m + 5235 8883 l gs col32 s gr +% Polyline +n 5235 8800 m + 5235 8750 l gs col32 s gr +% Polyline +n 5235 8666 m + 5235 8616 l gs col32 s gr +% Polyline +n 2819 9300 m + 2869 9300 l gs col32 s gr +% Polyline +n 2952 9300 m + 3002 9300 l gs col32 s gr +% Polyline +n 3085 9300 m + 3135 9300 l gs col32 s gr +% Polyline +n 3219 9300 m + 3252 9300 l gs col32 s gr +% Polyline +n 2819 8925 m + 2869 8925 l gs col32 s gr +% Polyline +n 2952 8925 m + 3002 8925 l gs col32 s gr +% Polyline +n 3085 8925 m + 3135 8925 l gs col32 s gr +% Polyline +n 3219 8925 m + 3252 8925 l gs col32 s gr +% Polyline +n 2819 8933 m + 2819 8883 l gs col32 s gr +% Polyline +n 2819 8800 m + 2819 8750 l gs col32 s gr +% Polyline +n 2819 8666 m + 2819 8616 l gs col32 s gr +% Polyline +n 2419 8933 m + 2419 8883 l gs col32 s gr +% Polyline +n 2419 8800 m + 2419 8750 l gs col32 s gr +% Polyline +n 2419 8666 m + 2419 8616 l gs col32 s gr +% Polyline +n 2019 8933 m + 2019 8883 l gs col32 s gr +% Polyline +n 2019 8800 m + 2019 8750 l gs col32 s gr +% Polyline +n 2019 8666 m + 2019 8616 l gs col32 s gr +% Polyline +n 1619 8933 m + 1619 8883 l gs col32 s gr +% Polyline +n 1619 8800 m + 1619 8750 l gs col32 s gr +% Polyline +n 1619 8666 m + 1619 8616 l gs col32 s gr +/Helvetica-iso ff 195.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 195.00 scf sf +2338 7001 m +gs 1 -1 sc (ny ) col0 sh gr +/Helvetica-iso ff 195.00 scf sf +2605 7001 m +gs 1 -1 sc (+ 2-ny%2) col34 sh gr +/Helvetica-iso ff 195.00 scf sf +3500 7001 m +gs 1 -1 sc ( = 2*\(ny/2+1\)) col0 sh gr +/Helvetica-iso ff 195.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 195.00 scf sf +681 8451 m +gs 1 -1 sc (nx) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +1364 7179 m +gs 1 -1 sc (0) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +6097 7179 m +gs 1 -1 sc (ny+1) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +1064 7479 m +gs 1 -1 sc (0) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +864 9479 m +gs 1 -1 sc (nx-1) col0 sh gr +% Polyline +0.000 slw +n 5636 7280 m 6035 7280 l 6035 7656 l 5636 7656 l + 5636 7280 l cp gs col35 1.00 shd ef gr +% Polyline +7.500 slw +n 5636 7280 m 6035 7280 l 6035 7656 l 5636 7656 l + 5636 7280 l cp gs col32 s gr +% Polyline +0.000 slw +n 5636 7656 m 6035 7656 l 6035 8032 l 5636 8032 l + 5636 7656 l cp gs col35 1.00 shd ef gr +% Polyline +7.500 slw +n 5636 7656 m 6035 7656 l 6035 8032 l 5636 8032 l + 5636 7656 l cp gs col32 s gr +% Polyline +0.000 slw +n 5636 8924 m 6035 8924 l 6035 9300 l 5636 9300 l + 5636 8924 l cp gs col35 1.00 shd ef gr +% Polyline +7.500 slw +n 5636 8924 m 6035 8924 l 6035 9300 l 5636 9300 l + 5636 8924 l cp gs col32 s gr +% Polyline +0.000 slw +n 5636 9300 m 6035 9300 l 6035 9676 l 5636 9676 l + 5636 9300 l cp gs col35 1.00 shd ef gr +% Polyline +7.500 slw +n 5636 9300 m 6035 9300 l 6035 9676 l 5636 9676 l + 5636 9300 l cp gs col32 s gr +% Polyline +0.000 slw +n 6036 7280 m 6435 7280 l 6435 7656 l 6036 7656 l + 6036 7280 l cp gs col35 1.00 shd ef gr +% Polyline +7.500 slw +n 6036 7280 m 6435 7280 l 6435 7656 l 6036 7656 l + 6036 7280 l cp gs col32 s gr +% Polyline +0.000 slw +n 6036 7656 m 6435 7656 l 6435 8032 l 6036 8032 l + 6036 7656 l cp gs col35 1.00 shd ef gr +% Polyline +7.500 slw +n 6036 7656 m 6435 7656 l 6435 8032 l 6036 8032 l + 6036 7656 l cp gs col32 s gr +% Polyline +0.000 slw +n 6036 8924 m 6435 8924 l 6435 9300 l 6036 9300 l + 6036 8924 l cp gs col35 1.00 shd ef gr +% Polyline +7.500 slw +n 6036 8924 m 6435 8924 l 6435 9300 l 6036 9300 l + 6036 8924 l cp gs col32 s gr +% Polyline +0.000 slw +n 6036 9300 m 6435 9300 l 6435 9676 l 6036 9676 l + 6036 9300 l cp gs col35 1.00 shd ef gr +% Polyline +7.500 slw +n 6036 9300 m 6435 9300 l 6435 9676 l 6036 9676 l + 6036 9300 l cp gs col32 s gr +% Polyline +n 5635 7283 m + 5635 9683 l gs col0 s gr +% Polyline +n 1420 7515 m + 6312 7515 l gs col0 s gr +% Polyline +0.000 slw +n 6348 7518 m 6117 7462 l + 6117 7515 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 6348 7518 m 6117 7462 l + 6117 7515 l gs col0 s gr +% Polyline +0.000 slw +n 6348 7512 m 6117 7568 l + 6117 7515 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 6348 7512 m 6117 7568 l + 6117 7515 l gs col0 s gr +% Polyline +n 1420 7891 m + 5863 7891 l gs col0 s gr +% Polyline +0.000 slw +n 5895 7894 m 5685 7838 l + 5685 7891 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 5895 7894 m 5685 7838 l + 5685 7891 l gs col0 s gr +% Polyline +0.000 slw +n 5895 7888 m 5685 7944 l + 5685 7891 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 5895 7888 m 5685 7944 l + 5685 7891 l gs col0 s gr +% Polyline +n 1420 9112 m + 5863 9112 l gs col0 s gr +% Polyline +0.000 slw +n 5895 9115 m 5685 9059 l + 5685 9112 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 5895 9115 m 5685 9059 l + 5685 9112 l gs col0 s gr +% Polyline +0.000 slw +n 5895 9109 m 5685 9165 l + 5685 9112 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 5895 9109 m 5685 9165 l + 5685 9112 l gs col0 s gr +% Polyline +n 1420 9488 m + 5863 9488 l gs col0 s gr +% Polyline +0.000 slw +n 5895 9491 m 5685 9435 l + 5685 9488 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 5895 9491 m 5685 9435 l + 5685 9488 l gs col0 s gr +% Polyline +0.000 slw +n 5895 9485 m 5685 9541 l + 5685 9488 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 5895 9485 m 5685 9541 l + 5685 9488 l gs col0 s gr +/Helvetica-iso ff 375.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 375.00 scf sf +6250 9461 m +gs 1 -1 sc 90.0 rot (\(padding\)) col0 sh gr +/Helvetica-Bold-iso ff 240.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-Bold-iso ff 240.00 scf sf +428 9283 m +gs 1 -1 sc 90.0 rot (input, in-place) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +1264 7429 m +gs 1 -1 sc (0) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +1681 7429 m +gs 1 -1 sc (1) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +2081 7429 m +gs 1 -1 sc (2) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +2481 7429 m +gs 1 -1 sc (3) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +4081 7429 m +gs 1 -1 sc (ny-4) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +4897 7429 m +gs 1 -1 sc (ny-2) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +5297 7429 m +gs 1 -1 sc (ny-1) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +4497 7429 m +gs 1 -1 sc (ny-3) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +1264 7795 m +gs 1 -1 sc (ny+2) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +1664 7795 m +gs 1 -1 sc (ny+3) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +5681 7429 m +gs 1 -1 sc (ny) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +6081 7429 m +gs 1 -1 sc (ny+1) col0 sh gr +% Polyline +0.000 slw +n 5226 5196 m 5623 5196 l 5623 5572 l 5226 5572 l + 5226 5196 l cp gs col36 1.00 shd ef gr +% Polyline +n 4827 5196 m 5226 5196 l 5226 5572 l 4827 5572 l + 4827 5196 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 4835 5194 m 5631 5194 l 5631 5569 l 4835 5569 l + 4835 5194 l cp gs col38 s gr +% Polyline +0.000 slw +n 4434 5196 m 4832 5196 l 4832 5572 l 4434 5572 l + 4434 5196 l cp gs col36 1.00 shd ef gr +% Polyline +n 4035 5196 m 4434 5196 l 4434 5572 l 4035 5572 l + 4035 5196 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 4044 5194 m 4840 5194 l 4840 5569 l 4044 5569 l + 4044 5194 l cp gs col38 s gr +% Polyline +0.000 slw +n 6026 5196 m 6440 5196 l 6440 5572 l 6026 5572 l + 6026 5196 l cp gs col36 1.00 shd ef gr +% Polyline +n 5627 5196 m 6026 5196 l 6026 5572 l 5627 5572 l + 5627 5196 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 5635 5194 m 6440 5194 l 6440 5569 l 5635 5569 l + 5635 5194 l cp gs col38 s gr +% Polyline +0.000 slw +n 5226 5571 m 5623 5571 l 5623 5947 l 5226 5947 l + 5226 5571 l cp gs col36 1.00 shd ef gr +% Polyline +n 4827 5571 m 5226 5571 l 5226 5947 l 4827 5947 l + 4827 5571 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 4835 5569 m 5631 5569 l 5631 5944 l 4835 5944 l + 4835 5569 l cp gs col38 s gr +% Polyline +0.000 slw +n 4434 5571 m 4832 5571 l 4832 5947 l 4434 5947 l + 4434 5571 l cp gs col36 1.00 shd ef gr +% Polyline +n 4035 5571 m 4434 5571 l 4434 5947 l 4035 5947 l + 4035 5571 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 4044 5569 m 4840 5569 l 4840 5944 l 4044 5944 l + 4044 5569 l cp gs col38 s gr +% Polyline +0.000 slw +n 6026 5571 m 6440 5571 l 6440 5947 l 6026 5947 l + 6026 5571 l cp gs col36 1.00 shd ef gr +% Polyline +n 5627 5571 m 6026 5571 l 6026 5947 l 5627 5947 l + 5627 5571 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 5635 5569 m 6440 5569 l 6440 5944 l 5635 5944 l + 5635 5569 l cp gs col38 s gr +% Polyline +0.000 slw +n 2409 5571 m 2807 5571 l 2807 5947 l 2409 5947 l + 2409 5571 l cp gs col36 1.00 shd ef gr +% Polyline +n 2010 5571 m 2409 5571 l 2409 5947 l 2010 5947 l + 2010 5571 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 2019 5561 m 2815 5561 l 2815 5936 l 2019 5936 l + 2019 5561 l cp gs col38 s gr +% Polyline +0.000 slw +n 1618 5571 m 2015 5571 l 2015 5947 l 1618 5947 l + 1618 5571 l cp gs col36 1.00 shd ef gr +% Polyline +n 1219 5571 m 1618 5571 l 1618 5947 l 1219 5947 l + 1219 5571 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 1227 5561 m 2023 5561 l 2023 5939 l 1227 5939 l + 1227 5561 l cp gs col38 s gr +% Polyline +0.000 slw +n 2409 5196 m 2807 5196 l 2807 5572 l 2409 5572 l + 2409 5196 l cp gs col36 1.00 shd ef gr +% Polyline +n 2010 5196 m 2409 5196 l 2409 5572 l 2010 5572 l + 2010 5196 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 2019 5186 m 2815 5186 l 2815 5561 l 2019 5561 l + 2019 5186 l cp gs col38 s gr +% Polyline +0.000 slw +n 1618 5196 m 2015 5196 l 2015 5572 l 1618 5572 l + 1618 5196 l cp gs col36 1.00 shd ef gr +% Polyline +n 1219 5196 m 1618 5196 l 1618 5572 l 1219 5572 l + 1219 5196 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 1227 5186 m 2023 5186 l 2023 5561 l 1227 5561 l + 1227 5186 l cp gs col38 s gr +% Polyline +0.000 slw +n 5226 3546 m 5623 3546 l 5623 3922 l 5226 3922 l + 5226 3546 l cp gs col36 1.00 shd ef gr +% Polyline +n 4827 3546 m 5226 3546 l 5226 3922 l 4827 3922 l + 4827 3546 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 4835 3544 m 5631 3544 l 5631 3919 l 4835 3919 l + 4835 3544 l cp gs col32 s gr +% Polyline +0.000 slw +n 4434 3546 m 4832 3546 l 4832 3922 l 4434 3922 l + 4434 3546 l cp gs col36 1.00 shd ef gr +% Polyline +n 4035 3546 m 4434 3546 l 4434 3922 l 4035 3922 l + 4035 3546 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 4044 3544 m 4840 3544 l 4840 3919 l 4044 3919 l + 4044 3544 l cp gs col38 s gr +% Polyline +0.000 slw +n 5990 3546 m 6432 3546 l 6432 3955 l 5990 3955 l + 5990 3546 l cp gs col36 1.00 shd ef gr +% Polyline +n 5627 3546 m 6026 3546 l 6026 3922 l 5627 3922 l + 5627 3546 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 5635 3544 m 6440 3544 l 6440 3919 l 5635 3919 l + 5635 3544 l cp gs col38 s gr +% Polyline +0.000 slw +n 5226 3921 m 5623 3921 l 5623 4297 l 5226 4297 l + 5226 3921 l cp gs col36 1.00 shd ef gr +% Polyline +n 4827 3921 m 5226 3921 l 5226 4297 l 4827 4297 l + 4827 3921 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 4835 3919 m 5631 3919 l 5631 4294 l 4835 4294 l + 4835 3919 l cp gs col38 s gr +% Polyline +0.000 slw +n 4434 3921 m 4832 3921 l 4832 4297 l 4434 4297 l + 4434 3921 l cp gs col36 1.00 shd ef gr +% Polyline +n 4035 3921 m 4434 3921 l 4434 4297 l 4035 4297 l + 4035 3921 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 4044 3919 m 4840 3919 l 4840 4294 l 4044 4294 l + 4044 3919 l cp gs col38 s gr +% Polyline +0.000 slw +n 6026 3921 m 6432 3921 l 6432 4297 l 6026 4297 l + 6026 3921 l cp gs col36 1.00 shd ef gr +% Polyline +n 5627 3921 m 6026 3921 l 6026 4297 l 5627 4297 l + 5627 3921 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 5635 3919 m 6440 3919 l 6440 4294 l 5635 4294 l + 5635 3919 l cp gs col38 s gr +% Polyline +0.000 slw +n 2409 3921 m 2807 3921 l 2807 4297 l 2409 4297 l + 2409 3921 l cp gs col36 1.00 shd ef gr +% Polyline +n 2010 3921 m 2409 3921 l 2409 4297 l 2010 4297 l + 2010 3921 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 2019 3919 m 2815 3919 l 2815 4294 l 2019 4294 l + 2019 3919 l cp gs col38 s gr +% Polyline +0.000 slw +n 1618 3921 m 2015 3921 l 2015 4297 l 1618 4297 l + 1618 3921 l cp gs col36 1.00 shd ef gr +% Polyline +n 1219 3921 m 1618 3921 l 1618 4297 l 1219 4297 l + 1219 3921 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 1227 3919 m 2023 3919 l 2023 4294 l 1227 4294 l + 1227 3919 l cp gs col38 s gr +% Polyline +0.000 slw +n 2409 3546 m 2815 3546 l 2815 3922 l 2409 3922 l + 2409 3546 l cp gs col36 1.00 shd ef gr +% Polyline +n 2010 3546 m 2409 3546 l 2409 3922 l 2010 3922 l + 2010 3546 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 2019 3544 m 2815 3544 l 2815 3919 l 2019 3919 l + 2019 3544 l cp gs col38 s gr +% Polyline +0.000 slw +n 1618 3546 m 2015 3546 l 2015 3922 l 1618 3922 l + 1618 3546 l cp gs col36 1.00 shd ef gr +% Polyline +n 1219 3546 m 1618 3546 l 1618 3922 l 1219 3922 l + 1219 3546 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 1227 3544 m 2023 3544 l 2023 3919 l 1227 3919 l + 1227 3544 l cp gs col38 s gr +% Polyline +n 1221 3546 m 6440 3546 l 6440 5941 l 1221 5941 l + 1221 3546 l cp gs col32 s gr +/Helvetica-iso ff 225.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 225.00 scf sf +2064 3993 m +gs 1 -1 sc (...) col0 sh gr +% Polyline +n 2819 3915 m + 2869 3915 l gs col32 s gr +% Polyline +n 2952 3915 m + 3002 3915 l gs col32 s gr +% Polyline +n 3085 3915 m + 3135 3915 l gs col32 s gr +% Polyline +n 3219 3915 m + 3252 3915 l gs col32 s gr +% Polyline +n 2819 4290 m + 2869 4290 l gs col32 s gr +% Polyline +n 2952 4290 m + 3002 4290 l gs col32 s gr +% Polyline +n 3085 4290 m + 3135 4290 l gs col32 s gr +% Polyline +n 3219 4290 m + 3252 4290 l gs col32 s gr +% Polyline +n 2819 4282 m + 2819 4332 l gs col32 s gr +% Polyline +n 2819 4415 m + 2819 4465 l gs col32 s gr +% Polyline +n 2819 4548 m + 2819 4598 l gs col32 s gr +% Polyline +n 2019 4282 m + 2019 4332 l gs col32 s gr +% Polyline +n 2019 4415 m + 2019 4465 l gs col32 s gr +% Polyline +n 2019 4548 m + 2019 4598 l gs col32 s gr +% Polyline +n 4036 3915 m + 3986 3915 l gs col32 s gr +% Polyline +n 3902 3915 m + 3852 3915 l gs col32 s gr +% Polyline +n 3769 3915 m + 3719 3915 l gs col32 s gr +% Polyline +n 3636 3915 m + 3602 3915 l gs col32 s gr +% Polyline +n 4036 4290 m + 3986 4290 l gs col32 s gr +% Polyline +n 3902 4290 m + 3852 4290 l gs col32 s gr +% Polyline +n 3769 4290 m + 3719 4290 l gs col32 s gr +% Polyline +n 3636 4290 m + 3602 4290 l gs col32 s gr +% Polyline +n 4035 4282 m + 4035 4332 l gs col32 s gr +% Polyline +n 4035 4415 m + 4035 4465 l gs col32 s gr +% Polyline +n 4035 4548 m + 4035 4598 l gs col32 s gr +% Polyline +n 4835 4282 m + 4835 4332 l gs col32 s gr +% Polyline +n 4835 4415 m + 4835 4465 l gs col32 s gr +% Polyline +n 4835 4548 m + 4835 4598 l gs col32 s gr +% Polyline +n 4036 5565 m + 3986 5565 l gs col32 s gr +% Polyline +n 3902 5565 m + 3852 5565 l gs col32 s gr +% Polyline +n 3769 5565 m + 3719 5565 l gs col32 s gr +% Polyline +n 3636 5565 m + 3602 5565 l gs col32 s gr +% Polyline +n 4036 5190 m + 3986 5190 l gs col32 s gr +% Polyline +n 3902 5190 m + 3852 5190 l gs col32 s gr +% Polyline +n 3769 5190 m + 3719 5190 l gs col32 s gr +% Polyline +n 3636 5190 m + 3602 5190 l gs col32 s gr +% Polyline +n 4035 5198 m + 4035 5148 l gs col32 s gr +% Polyline +n 4035 5065 m + 4035 5015 l gs col32 s gr +% Polyline +n 4035 4932 m + 4035 4882 l gs col32 s gr +% Polyline +n 4835 5198 m + 4835 5148 l gs col32 s gr +% Polyline +n 4835 5065 m + 4835 5015 l gs col32 s gr +% Polyline +n 4835 4932 m + 4835 4882 l gs col32 s gr +% Polyline +n 2819 5565 m + 2869 5565 l gs col32 s gr +% Polyline +n 2952 5565 m + 3002 5565 l gs col32 s gr +% Polyline +n 3085 5565 m + 3135 5565 l gs col32 s gr +% Polyline +n 3219 5565 m + 3252 5565 l gs col32 s gr +% Polyline +n 2819 5190 m + 2869 5190 l gs col32 s gr +% Polyline +n 2952 5190 m + 3002 5190 l gs col32 s gr +% Polyline +n 3085 5190 m + 3135 5190 l gs col32 s gr +% Polyline +n 3219 5190 m + 3252 5190 l gs col32 s gr +% Polyline +n 2819 5198 m + 2819 5148 l gs col32 s gr +% Polyline +n 2819 5065 m + 2819 5015 l gs col32 s gr +% Polyline +n 2819 4932 m + 2819 4882 l gs col32 s gr +% Polyline +n 2019 5198 m + 2019 5148 l gs col32 s gr +% Polyline +n 2019 5065 m + 2019 5015 l gs col32 s gr +% Polyline +n 2019 4932 m + 2019 4882 l gs col32 s gr +/Helvetica-iso ff 195.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 195.00 scf sf +3181 3267 m +gs 1 -1 sc (ny/2+1) col0 sh gr +/Helvetica-iso ff 195.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 195.00 scf sf +681 4717 m +gs 1 -1 sc (nx) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +1564 3445 m +gs 1 -1 sc (0) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +5831 3445 m +gs 1 -1 sc (ny/2) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +1064 3745 m +gs 1 -1 sc (0) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +864 5745 m +gs 1 -1 sc (nx-1) col0 sh gr +% Polyline +n 5635 4282 m + 5635 4332 l gs col32 s gr +% Polyline +n 5635 4415 m + 5635 4465 l gs col32 s gr +% Polyline +n 5635 4548 m + 5635 4598 l gs col32 s gr +% Polyline +n 5635 5198 m + 5635 5148 l gs col32 s gr +% Polyline +n 5635 5065 m + 5635 5015 l gs col32 s gr +% Polyline +n 5635 4932 m + 5635 4882 l gs col32 s gr +% Polyline +n 1420 3781 m + 6312 3781 l gs col0 s gr +% Polyline +0.000 slw +n 6348 3784 m 6117 3728 l + 6117 3781 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 6348 3784 m 6117 3728 l + 6117 3781 l gs col0 s gr +% Polyline +0.000 slw +n 6348 3778 m 6117 3834 l + 6117 3781 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 6348 3778 m 6117 3834 l + 6117 3781 l gs col0 s gr +% Polyline +n 1420 4169 m + 6312 4169 l gs col0 s gr +% Polyline +0.000 slw +n 6348 4172 m 6117 4116 l + 6117 4169 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 6348 4172 m 6117 4116 l + 6117 4169 l gs col0 s gr +% Polyline +0.000 slw +n 6348 4166 m 6117 4222 l + 6117 4169 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 6348 4166 m 6117 4222 l + 6117 4169 l gs col0 s gr +% Polyline +n 1420 5390 m + 6312 5390 l gs col0 s gr +% Polyline +0.000 slw +n 6348 5393 m 6117 5337 l + 6117 5390 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 6348 5393 m 6117 5337 l + 6117 5390 l gs col0 s gr +% Polyline +0.000 slw +n 6348 5387 m 6117 5443 l + 6117 5390 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 6348 5387 m 6117 5443 l + 6117 5390 l gs col0 s gr +% Polyline +n 1420 5766 m + 6312 5766 l gs col0 s gr +% Polyline +0.000 slw +n 6348 5769 m 6117 5713 l + 6117 5766 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 6348 5769 m 6117 5713 l + 6117 5766 l gs col0 s gr +% Polyline +0.000 slw +n 6348 5763 m 6117 5819 l + 6117 5766 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 6348 5763 m 6117 5819 l + 6117 5766 l gs col0 s gr +% Polyline +0.000 slw +n 1469 6215 m 1868 6215 l 1868 6591 l 1469 6591 l + 1469 6215 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 1469 6215 m 1868 6215 l 1868 6591 l 1469 6591 l + 1469 6215 l cp gs col32 s gr +/Helvetica-iso ff 195.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 195.00 scf sf +1981 6463 m +gs 1 -1 sc (= double) col0 sh gr +% Polyline +0.000 slw +n 4026 6217 m 4432 6217 l 4432 6593 l 4026 6593 l + 4026 6217 l cp gs col36 1.00 shd ef gr +% Polyline +n 3627 6217 m 4026 6217 l 4026 6593 l 3627 6593 l + 3627 6217 l cp gs col37 1.00 shd ef gr +% Polyline +7.500 slw +n 3635 6215 m 4440 6215 l 4440 6590 l 3635 6590 l + 3635 6215 l cp gs col38 s gr +/Helvetica-iso ff 195.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 195.00 scf sf +4547 6463 m +gs 1 -1 sc (= fftw_complex) col0 sh gr +/Helvetica-Bold-iso ff 240.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-Bold-iso ff 240.00 scf sf +428 5128 m +gs 1 -1 sc 90.0 rot (output) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +1264 3679 m +gs 1 -1 sc (0) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +2081 3679 m +gs 1 -1 sc (1) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +4097 3679 m +gs 1 -1 sc (ny/2-2) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +4914 3679 m +gs 1 -1 sc (ny/2-1) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +1264 4062 m +gs 1 -1 sc (ny/2+1) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +5697 3679 m +gs 1 -1 sc (ny/2) col0 sh gr +% Polyline +0.000 slw +n 1221 495 m 5635 495 l 5635 2890 l 1221 2890 l + 1221 495 l cp gs col7 1.00 shd ef gr +% Polyline +7.500 slw +n 1221 495 m 5635 495 l 5635 2890 l 1221 2890 l + 1221 495 l cp gs col32 s gr +% Polyline +0.000 slw +n 1221 495 m 1620 495 l 1620 871 l 1221 871 l + 1221 495 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 1221 495 m 1620 495 l 1620 871 l 1221 871 l + 1221 495 l cp gs col32 s gr +% Polyline +0.000 slw +n 1620 495 m 2019 495 l 2019 871 l 1620 871 l + 1620 495 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 1620 495 m 2019 495 l 2019 871 l 1620 871 l + 1620 495 l cp gs col32 s gr +% Polyline +0.000 slw +n 2019 495 m 2418 495 l 2418 871 l 2019 871 l + 2019 495 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 2019 495 m 2418 495 l 2418 871 l 2019 871 l + 2019 495 l cp gs col32 s gr +% Polyline +0.000 slw +n 2418 495 m 2817 495 l 2817 871 l 2418 871 l + 2418 495 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 2418 495 m 2817 495 l 2817 871 l 2418 871 l + 2418 495 l cp gs col32 s gr +% Polyline +0.000 slw +n 4038 495 m 4438 495 l 4438 871 l 4038 871 l + 4038 495 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4038 495 m 4438 495 l 4438 871 l 4038 871 l + 4038 495 l cp gs col32 s gr +% Polyline +0.000 slw +n 4438 495 m 4837 495 l 4837 871 l 4438 871 l + 4438 495 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4438 495 m 4837 495 l 4837 871 l 4438 871 l + 4438 495 l cp gs col32 s gr +% Polyline +0.000 slw +n 4837 495 m 5236 495 l 5236 871 l 4837 871 l + 4837 495 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4837 495 m 5236 495 l 5236 871 l 4837 871 l + 4837 495 l cp gs col32 s gr +% Polyline +0.000 slw +n 5236 495 m 5635 495 l 5635 871 l 5236 871 l + 5236 495 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 5236 495 m 5635 495 l 5635 871 l 5236 871 l + 5236 495 l cp gs col32 s gr +% Polyline +0.000 slw +n 1221 871 m 1620 871 l 1620 1247 l 1221 1247 l + 1221 871 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 1221 871 m 1620 871 l 1620 1247 l 1221 1247 l + 1221 871 l cp gs col32 s gr +% Polyline +0.000 slw +n 1620 871 m 2019 871 l 2019 1247 l 1620 1247 l + 1620 871 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 1620 871 m 2019 871 l 2019 1247 l 1620 1247 l + 1620 871 l cp gs col32 s gr +% Polyline +0.000 slw +n 2019 871 m 2418 871 l 2418 1247 l 2019 1247 l + 2019 871 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 2019 871 m 2418 871 l 2418 1247 l 2019 1247 l + 2019 871 l cp gs col32 s gr +% Polyline +0.000 slw +n 2418 871 m 2817 871 l 2817 1247 l 2418 1247 l + 2418 871 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 2418 871 m 2817 871 l 2817 1247 l 2418 1247 l + 2418 871 l cp gs col32 s gr +% Polyline +0.000 slw +n 4038 871 m 4438 871 l 4438 1247 l 4038 1247 l + 4038 871 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4038 871 m 4438 871 l 4438 1247 l 4038 1247 l + 4038 871 l cp gs col32 s gr +% Polyline +0.000 slw +n 4438 871 m 4837 871 l 4837 1247 l 4438 1247 l + 4438 871 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4438 871 m 4837 871 l 4837 1247 l 4438 1247 l + 4438 871 l cp gs col32 s gr +% Polyline +0.000 slw +n 4837 871 m 5236 871 l 5236 1247 l 4837 1247 l + 4837 871 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4837 871 m 5236 871 l 5236 1247 l 4837 1247 l + 4837 871 l cp gs col32 s gr +% Polyline +0.000 slw +n 5236 871 m 5635 871 l 5635 1247 l 5236 1247 l + 5236 871 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 5236 871 m 5635 871 l 5635 1247 l 5236 1247 l + 5236 871 l cp gs col32 s gr +% Polyline +0.000 slw +n 1221 2139 m 1620 2139 l 1620 2515 l 1221 2515 l + 1221 2139 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 1221 2139 m 1620 2139 l 1620 2515 l 1221 2515 l + 1221 2139 l cp gs col32 s gr +% Polyline +0.000 slw +n 1620 2139 m 2019 2139 l 2019 2515 l 1620 2515 l + 1620 2139 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 1620 2139 m 2019 2139 l 2019 2515 l 1620 2515 l + 1620 2139 l cp gs col32 s gr +% Polyline +0.000 slw +n 2019 2139 m 2418 2139 l 2418 2515 l 2019 2515 l + 2019 2139 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 2019 2139 m 2418 2139 l 2418 2515 l 2019 2515 l + 2019 2139 l cp gs col32 s gr +% Polyline +0.000 slw +n 2418 2139 m 2817 2139 l 2817 2515 l 2418 2515 l + 2418 2139 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 2418 2139 m 2817 2139 l 2817 2515 l 2418 2515 l + 2418 2139 l cp gs col32 s gr +% Polyline +0.000 slw +n 4038 2139 m 4438 2139 l 4438 2515 l 4038 2515 l + 4038 2139 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4038 2139 m 4438 2139 l 4438 2515 l 4038 2515 l + 4038 2139 l cp gs col32 s gr +% Polyline +0.000 slw +n 4438 2139 m 4837 2139 l 4837 2515 l 4438 2515 l + 4438 2139 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4438 2139 m 4837 2139 l 4837 2515 l 4438 2515 l + 4438 2139 l cp gs col32 s gr +% Polyline +0.000 slw +n 4837 2139 m 5236 2139 l 5236 2515 l 4837 2515 l + 4837 2139 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4837 2139 m 5236 2139 l 5236 2515 l 4837 2515 l + 4837 2139 l cp gs col32 s gr +% Polyline +0.000 slw +n 5236 2139 m 5635 2139 l 5635 2515 l 5236 2515 l + 5236 2139 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 5236 2139 m 5635 2139 l 5635 2515 l 5236 2515 l + 5236 2139 l cp gs col32 s gr +% Polyline +0.000 slw +n 1221 2515 m 1620 2515 l 1620 2890 l 1221 2890 l + 1221 2515 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 1221 2515 m 1620 2515 l 1620 2890 l 1221 2890 l + 1221 2515 l cp gs col32 s gr +% Polyline +0.000 slw +n 1620 2515 m 2019 2515 l 2019 2890 l 1620 2890 l + 1620 2515 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 1620 2515 m 2019 2515 l 2019 2890 l 1620 2890 l + 1620 2515 l cp gs col32 s gr +% Polyline +0.000 slw +n 2019 2515 m 2418 2515 l 2418 2890 l 2019 2890 l + 2019 2515 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 2019 2515 m 2418 2515 l 2418 2890 l 2019 2890 l + 2019 2515 l cp gs col32 s gr +% Polyline +0.000 slw +n 2418 2515 m 2817 2515 l 2817 2890 l 2418 2890 l + 2418 2515 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 2418 2515 m 2817 2515 l 2817 2890 l 2418 2890 l + 2418 2515 l cp gs col32 s gr +% Polyline +0.000 slw +n 4038 2515 m 4438 2515 l 4438 2890 l 4038 2890 l + 4038 2515 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4038 2515 m 4438 2515 l 4438 2890 l 4038 2890 l + 4038 2515 l cp gs col32 s gr +% Polyline +0.000 slw +n 4438 2515 m 4837 2515 l 4837 2890 l 4438 2890 l + 4438 2515 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4438 2515 m 4837 2515 l 4837 2890 l 4438 2890 l + 4438 2515 l cp gs col32 s gr +% Polyline +0.000 slw +n 4837 2515 m 5236 2515 l 5236 2890 l 4837 2890 l + 4837 2515 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 4837 2515 m 5236 2515 l 5236 2890 l 4837 2890 l + 4837 2515 l cp gs col32 s gr +% Polyline +0.000 slw +n 5236 2515 m 5635 2515 l 5635 2890 l 5236 2890 l + 5236 2515 l cp gs col33 1.00 shd ef gr +% Polyline +7.500 slw +n 5236 2515 m 5635 2515 l 5635 2890 l 5236 2890 l + 5236 2515 l cp gs col32 s gr +% Polyline +n 1420 730 m + 5459 730 l gs col0 s gr +% Polyline +0.000 slw +n 5488 733 m 5298 677 l + 5298 730 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 5488 733 m 5298 677 l + 5298 730 l gs col0 s gr +% Polyline +0.000 slw +n 5488 727 m 5298 783 l + 5298 730 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 5488 727 m 5298 783 l + 5298 730 l gs col0 s gr +/Helvetica-iso ff 225.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 225.00 scf sf +2064 943 m +gs 1 -1 sc (...) col0 sh gr +% Polyline +n 2819 873 m + 2869 873 l gs col32 s gr +% Polyline +n 2952 873 m + 3002 873 l gs col32 s gr +% Polyline +n 3085 873 m + 3135 873 l gs col32 s gr +% Polyline +n 3219 873 m + 3252 873 l gs col32 s gr +% Polyline +n 2819 1248 m + 2869 1248 l gs col32 s gr +% Polyline +n 2952 1248 m + 3002 1248 l gs col32 s gr +% Polyline +n 3085 1248 m + 3135 1248 l gs col32 s gr +% Polyline +n 3219 1248 m + 3252 1248 l gs col32 s gr +% Polyline +n 2819 1240 m + 2819 1290 l gs col32 s gr +% Polyline +n 2819 1373 m + 2819 1423 l gs col32 s gr +% Polyline +n 2819 1506 m + 2819 1556 l gs col32 s gr +% Polyline +n 2419 1240 m + 2419 1290 l gs col32 s gr +% Polyline +n 2419 1373 m + 2419 1423 l gs col32 s gr +% Polyline +n 2419 1506 m + 2419 1556 l gs col32 s gr +% Polyline +n 2019 1240 m + 2019 1290 l gs col32 s gr +% Polyline +n 2019 1373 m + 2019 1423 l gs col32 s gr +% Polyline +n 2019 1506 m + 2019 1556 l gs col32 s gr +% Polyline +n 1619 1240 m + 1619 1290 l gs col32 s gr +% Polyline +n 1619 1373 m + 1619 1423 l gs col32 s gr +% Polyline +n 1619 1506 m + 1619 1556 l gs col32 s gr +% Polyline +n 4036 873 m + 3986 873 l gs col32 s gr +% Polyline +n 3902 873 m + 3852 873 l gs col32 s gr +% Polyline +n 3769 873 m + 3719 873 l gs col32 s gr +% Polyline +n 3636 873 m + 3602 873 l gs col32 s gr +% Polyline +n 4036 1248 m + 3986 1248 l gs col32 s gr +% Polyline +n 3902 1248 m + 3852 1248 l gs col32 s gr +% Polyline +n 3769 1248 m + 3719 1248 l gs col32 s gr +% Polyline +n 3636 1248 m + 3602 1248 l gs col32 s gr +% Polyline +n 4035 1240 m + 4035 1290 l gs col32 s gr +% Polyline +n 4035 1373 m + 4035 1423 l gs col32 s gr +% Polyline +n 4035 1506 m + 4035 1556 l gs col32 s gr +% Polyline +n 4435 1240 m + 4435 1290 l gs col32 s gr +% Polyline +n 4435 1373 m + 4435 1423 l gs col32 s gr +% Polyline +n 4435 1506 m + 4435 1556 l gs col32 s gr +% Polyline +n 4835 1240 m + 4835 1290 l gs col32 s gr +% Polyline +n 4835 1373 m + 4835 1423 l gs col32 s gr +% Polyline +n 4835 1506 m + 4835 1556 l gs col32 s gr +% Polyline +n 5235 1240 m + 5235 1290 l gs col32 s gr +% Polyline +n 5235 1373 m + 5235 1423 l gs col32 s gr +% Polyline +n 5235 1506 m + 5235 1556 l gs col32 s gr +% Polyline +n 4036 2515 m + 3986 2515 l gs col32 s gr +% Polyline +n 3902 2515 m + 3852 2515 l gs col32 s gr +% Polyline +n 3769 2515 m + 3719 2515 l gs col32 s gr +% Polyline +n 3636 2515 m + 3602 2515 l gs col32 s gr +% Polyline +n 4036 2140 m + 3986 2140 l gs col32 s gr +% Polyline +n 3902 2140 m + 3852 2140 l gs col32 s gr +% Polyline +n 3769 2140 m + 3719 2140 l gs col32 s gr +% Polyline +n 3636 2140 m + 3602 2140 l gs col32 s gr +% Polyline +n 4035 2148 m + 4035 2098 l gs col32 s gr +% Polyline +n 4035 2015 m + 4035 1965 l gs col32 s gr +% Polyline +n 4035 1881 m + 4035 1831 l gs col32 s gr +% Polyline +n 4435 2148 m + 4435 2098 l gs col32 s gr +% Polyline +n 4435 2015 m + 4435 1965 l gs col32 s gr +% Polyline +n 4435 1881 m + 4435 1831 l gs col32 s gr +% Polyline +n 4835 2148 m + 4835 2098 l gs col32 s gr +% Polyline +n 4835 2015 m + 4835 1965 l gs col32 s gr +% Polyline +n 4835 1881 m + 4835 1831 l gs col32 s gr +% Polyline +n 5235 2148 m + 5235 2098 l gs col32 s gr +% Polyline +n 5235 2015 m + 5235 1965 l gs col32 s gr +% Polyline +n 5235 1881 m + 5235 1831 l gs col32 s gr +% Polyline +n 2819 2515 m + 2869 2515 l gs col32 s gr +% Polyline +n 2952 2515 m + 3002 2515 l gs col32 s gr +% Polyline +n 3085 2515 m + 3135 2515 l gs col32 s gr +% Polyline +n 3219 2515 m + 3252 2515 l gs col32 s gr +% Polyline +n 2819 2140 m + 2869 2140 l gs col32 s gr +% Polyline +n 2952 2140 m + 3002 2140 l gs col32 s gr +% Polyline +n 3085 2140 m + 3135 2140 l gs col32 s gr +% Polyline +n 3219 2140 m + 3252 2140 l gs col32 s gr +% Polyline +n 2819 2148 m + 2819 2098 l gs col32 s gr +% Polyline +n 2819 2015 m + 2819 1965 l gs col32 s gr +% Polyline +n 2819 1881 m + 2819 1831 l gs col32 s gr +% Polyline +n 2419 2148 m + 2419 2098 l gs col32 s gr +% Polyline +n 2419 2015 m + 2419 1965 l gs col32 s gr +% Polyline +n 2419 1881 m + 2419 1831 l gs col32 s gr +% Polyline +n 2019 2148 m + 2019 2098 l gs col32 s gr +% Polyline +n 2019 2015 m + 2019 1965 l gs col32 s gr +% Polyline +n 2019 1881 m + 2019 1831 l gs col32 s gr +% Polyline +n 1619 2148 m + 1619 2098 l gs col32 s gr +% Polyline +n 1619 2015 m + 1619 1965 l gs col32 s gr +% Polyline +n 1619 1881 m + 1619 1831 l gs col32 s gr +/Helvetica-iso ff 195.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 195.00 scf sf +3381 217 m +gs 1 -1 sc (ny) col0 sh gr +/Helvetica-iso ff 195.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 195.00 scf sf +681 1667 m +gs 1 -1 sc (nx) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +1364 395 m +gs 1 -1 sc (0) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +5281 395 m +gs 1 -1 sc (ny-1) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +1064 695 m +gs 1 -1 sc (0) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +864 2695 m +gs 1 -1 sc (nx-1) col0 sh gr +% Polyline +n 1420 1106 m + 5459 1106 l gs col0 s gr +% Polyline +0.000 slw +n 5488 1109 m 5298 1053 l + 5298 1106 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 5488 1109 m 5298 1053 l + 5298 1106 l gs col0 s gr +% Polyline +0.000 slw +n 5488 1103 m 5298 1159 l + 5298 1106 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 5488 1103 m 5298 1159 l + 5298 1106 l gs col0 s gr +% Polyline +n 1420 2327 m + 5459 2327 l gs col0 s gr +% Polyline +0.000 slw +n 5488 2330 m 5298 2274 l + 5298 2327 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 5488 2330 m 5298 2274 l + 5298 2327 l gs col0 s gr +% Polyline +0.000 slw +n 5488 2324 m 5298 2380 l + 5298 2327 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 5488 2324 m 5298 2380 l + 5298 2327 l gs col0 s gr +% Polyline +n 1420 2703 m + 5459 2703 l gs col0 s gr +% Polyline +0.000 slw +n 5488 2706 m 5298 2650 l + 5298 2703 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 5488 2706 m 5298 2650 l + 5298 2703 l gs col0 s gr +% Polyline +0.000 slw +n 5488 2700 m 5298 2755 l + 5298 2703 l gs 0.00 setgray ef gr +% Polyline +7.500 slw +n 5488 2700 m 5298 2755 l + 5298 2703 l gs col0 s gr +/Helvetica-Bold-iso ff 240.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-Bold-iso ff 240.00 scf sf +428 2734 m +gs 1 -1 sc 90.0 rot (input, out-of-place) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +1264 629 m +gs 1 -1 sc (0) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +1681 629 m +gs 1 -1 sc (1) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +2081 629 m +gs 1 -1 sc (2) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +2481 629 m +gs 1 -1 sc (3) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +4081 629 m +gs 1 -1 sc (ny-4) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +4897 629 m +gs 1 -1 sc (ny-2) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +5297 629 m +gs 1 -1 sc (ny-1) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +4497 629 m +gs 1 -1 sc (ny-3) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +74 89 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +1264 1012 m +gs 1 -1 sc (ny) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +1425 4800 m +gs 1 -1 sc ( ) col0 sh gr +/Helvetica-iso ff 150.00 scf sf +1664 1012 m +gs 1 -1 sc (ny+1) col0 sh gr +% Polyline +n 273 3662 m + 273 3039 l gs col0 s gr +% Polyline +n 382 3920 m + 156 3662 l gs col0 s gr +% Polyline +n 273 3662 m + 148 3662 l gs col0 s gr +% Polyline +n 487 3662 m + 487 3039 l gs col0 s gr +% Polyline +n 378 3920 m + 604 3662 l gs col0 s gr +% Polyline +n 487 3662 m + 612 3662 l gs col0 s gr +% Polyline +n 273 6130 m + 273 6753 l gs col0 s gr +% Polyline +n 382 5872 m + 156 6130 l gs col0 s gr +% Polyline +n 273 6130 m + 148 6130 l gs col0 s gr +% Polyline +n 487 6129 m + 487 6753 l gs col0 s gr +% Polyline +n 378 5872 m + 604 6129 l gs col0 s gr +% Polyline +n 487 6129 m + 612 6129 l gs col0 s gr +% here ends figure; +pagefooter +showpage +%%Trailer +%EOF diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/rfftwnd.fig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/rfftwnd.fig Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1148 @@ +#FIG 3.2 +Portrait +Flush left +Inches +Letter +100.00 +Single +-2 +1200 2 +0 32 #797d79 +0 33 #efebef +0 34 #e73020 +0 35 #f78e86 +0 36 #6996c7 +0 37 #86aad7 +0 38 #dfdb00 +6 75 75 6450 9750 +2 1 0 0 7 7 998 0 20 4.000 0 0 0 0 0 5 + 1221 7280 6435 7280 6435 9676 1221 9676 1221 7280 +2 1 0 1 32 32 997 0 -1 4.000 0 0 0 0 0 5 + 1221 7280 6435 7280 6435 9676 1221 9676 1221 7280 +2 1 0 0 33 33 996 0 20 4.000 0 0 0 0 0 5 + 1221 7280 1620 7280 1620 7656 1221 7656 1221 7280 +2 1 0 1 32 32 995 0 -1 4.000 0 0 0 0 0 5 + 1221 7280 1620 7280 1620 7656 1221 7656 1221 7280 +2 1 0 0 33 33 994 0 20 4.000 0 0 0 0 0 5 + 1620 7280 2019 7280 2019 7656 1620 7656 1620 7280 +2 1 0 1 32 32 993 0 -1 4.000 0 0 0 0 0 5 + 1620 7280 2019 7280 2019 7656 1620 7656 1620 7280 +2 1 0 0 33 33 992 0 20 4.000 0 0 0 0 0 5 + 2019 7280 2418 7280 2418 7656 2019 7656 2019 7280 +2 1 0 1 32 32 991 0 -1 4.000 0 0 0 0 0 5 + 2019 7280 2418 7280 2418 7656 2019 7656 2019 7280 +2 1 0 0 33 33 990 0 20 4.000 0 0 0 0 0 5 + 2418 7280 2817 7280 2817 7656 2418 7656 2418 7280 +2 1 0 1 32 32 989 0 -1 4.000 0 0 0 0 0 5 + 2418 7280 2817 7280 2817 7656 2418 7656 2418 7280 +2 1 0 0 33 33 988 0 20 4.000 0 0 0 0 0 5 + 4038 7280 4438 7280 4438 7656 4038 7656 4038 7280 +2 1 0 1 32 32 987 0 -1 4.000 0 0 0 0 0 5 + 4038 7280 4438 7280 4438 7656 4038 7656 4038 7280 +2 1 0 0 33 33 986 0 20 4.000 0 0 0 0 0 5 + 4438 7280 4837 7280 4837 7656 4438 7656 4438 7280 +2 1 0 1 32 32 985 0 -1 4.000 0 0 0 0 0 5 + 4438 7280 4837 7280 4837 7656 4438 7656 4438 7280 +2 1 0 0 33 33 984 0 20 4.000 0 0 0 0 0 5 + 4837 7280 5236 7280 5236 7656 4837 7656 4837 7280 +2 1 0 1 32 32 983 0 -1 4.000 0 0 0 0 0 5 + 4837 7280 5236 7280 5236 7656 4837 7656 4837 7280 +2 1 0 0 33 33 982 0 20 4.000 0 0 0 0 0 5 + 5236 7280 5635 7280 5635 7656 5236 7656 5236 7280 +2 1 0 1 32 32 981 0 -1 4.000 0 0 0 0 0 5 + 5236 7280 5635 7280 5635 7656 5236 7656 5236 7280 +2 1 0 0 33 33 980 0 20 4.000 0 0 0 0 0 5 + 1221 7656 1620 7656 1620 8032 1221 8032 1221 7656 +2 1 0 1 32 32 979 0 -1 4.000 0 0 0 0 0 5 + 1221 7656 1620 7656 1620 8032 1221 8032 1221 7656 +2 1 0 0 33 33 978 0 20 4.000 0 0 0 0 0 5 + 1620 7656 2019 7656 2019 8032 1620 8032 1620 7656 +2 1 0 1 32 32 977 0 -1 4.000 0 0 0 0 0 5 + 1620 7656 2019 7656 2019 8032 1620 8032 1620 7656 +2 1 0 0 33 33 976 0 20 4.000 0 0 0 0 0 5 + 2019 7656 2418 7656 2418 8032 2019 8032 2019 7656 +2 1 0 1 32 32 975 0 -1 4.000 0 0 0 0 0 5 + 2019 7656 2418 7656 2418 8032 2019 8032 2019 7656 +2 1 0 0 33 33 974 0 20 4.000 0 0 0 0 0 5 + 2418 7656 2817 7656 2817 8032 2418 8032 2418 7656 +2 1 0 1 32 32 973 0 -1 4.000 0 0 0 0 0 5 + 2418 7656 2817 7656 2817 8032 2418 8032 2418 7656 +2 1 0 0 33 33 972 0 20 4.000 0 0 0 0 0 5 + 4038 7656 4438 7656 4438 8032 4038 8032 4038 7656 +2 1 0 1 32 32 971 0 -1 4.000 0 0 0 0 0 5 + 4038 7656 4438 7656 4438 8032 4038 8032 4038 7656 +2 1 0 0 33 33 970 0 20 4.000 0 0 0 0 0 5 + 4438 7656 4837 7656 4837 8032 4438 8032 4438 7656 +2 1 0 1 32 32 969 0 -1 4.000 0 0 0 0 0 5 + 4438 7656 4837 7656 4837 8032 4438 8032 4438 7656 +2 1 0 0 33 33 968 0 20 4.000 0 0 0 0 0 5 + 4837 7656 5236 7656 5236 8032 4837 8032 4837 7656 +2 1 0 1 32 32 967 0 -1 4.000 0 0 0 0 0 5 + 4837 7656 5236 7656 5236 8032 4837 8032 4837 7656 +2 1 0 0 33 33 966 0 20 4.000 0 0 0 0 0 5 + 5236 7656 5635 7656 5635 8032 5236 8032 5236 7656 +2 1 0 1 32 32 965 0 -1 4.000 0 0 0 0 0 5 + 5236 7656 5635 7656 5635 8032 5236 8032 5236 7656 +2 1 0 0 33 33 964 0 20 4.000 0 0 0 0 0 5 + 1221 8924 1620 8924 1620 9300 1221 9300 1221 8924 +2 1 0 1 32 32 963 0 -1 4.000 0 0 0 0 0 5 + 1221 8924 1620 8924 1620 9300 1221 9300 1221 8924 +2 1 0 0 33 33 962 0 20 4.000 0 0 0 0 0 5 + 1620 8924 2019 8924 2019 9300 1620 9300 1620 8924 +2 1 0 1 32 32 961 0 -1 4.000 0 0 0 0 0 5 + 1620 8924 2019 8924 2019 9300 1620 9300 1620 8924 +2 1 0 0 33 33 960 0 20 4.000 0 0 0 0 0 5 + 2019 8924 2418 8924 2418 9300 2019 9300 2019 8924 +2 1 0 1 32 32 959 0 -1 4.000 0 0 0 0 0 5 + 2019 8924 2418 8924 2418 9300 2019 9300 2019 8924 +2 1 0 0 33 33 958 0 20 4.000 0 0 0 0 0 5 + 2418 8924 2817 8924 2817 9300 2418 9300 2418 8924 +2 1 0 1 32 32 957 0 -1 4.000 0 0 0 0 0 5 + 2418 8924 2817 8924 2817 9300 2418 9300 2418 8924 +2 1 0 0 33 33 956 0 20 4.000 0 0 0 0 0 5 + 4038 8924 4438 8924 4438 9300 4038 9300 4038 8924 +2 1 0 1 32 32 955 0 -1 4.000 0 0 0 0 0 5 + 4038 8924 4438 8924 4438 9300 4038 9300 4038 8924 +2 1 0 0 33 33 954 0 20 4.000 0 0 0 0 0 5 + 4438 8924 4837 8924 4837 9300 4438 9300 4438 8924 +2 1 0 1 32 32 953 0 -1 4.000 0 0 0 0 0 5 + 4438 8924 4837 8924 4837 9300 4438 9300 4438 8924 +2 1 0 0 33 33 952 0 20 4.000 0 0 0 0 0 5 + 4837 8924 5236 8924 5236 9300 4837 9300 4837 8924 +2 1 0 1 32 32 951 0 -1 4.000 0 0 0 0 0 5 + 4837 8924 5236 8924 5236 9300 4837 9300 4837 8924 +2 1 0 0 33 33 950 0 20 4.000 0 0 0 0 0 5 + 5236 8924 5635 8924 5635 9300 5236 9300 5236 8924 +2 1 0 1 32 32 949 0 -1 4.000 0 0 0 0 0 5 + 5236 8924 5635 8924 5635 9300 5236 9300 5236 8924 +2 1 0 0 33 33 948 0 20 4.000 0 0 0 0 0 5 + 1221 9300 1620 9300 1620 9676 1221 9676 1221 9300 +2 1 0 1 32 32 947 0 -1 4.000 0 0 0 0 0 5 + 1221 9300 1620 9300 1620 9676 1221 9676 1221 9300 +2 1 0 0 33 33 946 0 20 4.000 0 0 0 0 0 5 + 1620 9300 2019 9300 2019 9676 1620 9676 1620 9300 +2 1 0 1 32 32 945 0 -1 4.000 0 0 0 0 0 5 + 1620 9300 2019 9300 2019 9676 1620 9676 1620 9300 +2 1 0 0 33 33 944 0 20 4.000 0 0 0 0 0 5 + 2019 9300 2418 9300 2418 9676 2019 9676 2019 9300 +2 1 0 1 32 32 943 0 -1 4.000 0 0 0 0 0 5 + 2019 9300 2418 9300 2418 9676 2019 9676 2019 9300 +2 1 0 0 33 33 942 0 20 4.000 0 0 0 0 0 5 + 2418 9300 2817 9300 2817 9676 2418 9676 2418 9300 +2 1 0 1 32 32 941 0 -1 4.000 0 0 0 0 0 5 + 2418 9300 2817 9300 2817 9676 2418 9676 2418 9300 +2 1 0 0 33 33 940 0 20 4.000 0 0 0 0 0 5 + 4038 9300 4438 9300 4438 9676 4038 9676 4038 9300 +2 1 0 1 32 32 939 0 -1 4.000 0 0 0 0 0 5 + 4038 9300 4438 9300 4438 9676 4038 9676 4038 9300 +2 1 0 0 33 33 938 0 20 4.000 0 0 0 0 0 5 + 4438 9300 4837 9300 4837 9676 4438 9676 4438 9300 +2 1 0 1 32 32 937 0 -1 4.000 0 0 0 0 0 5 + 4438 9300 4837 9300 4837 9676 4438 9676 4438 9300 +2 1 0 0 33 33 936 0 20 4.000 0 0 0 0 0 5 + 4837 9300 5236 9300 5236 9676 4837 9676 4837 9300 +2 1 0 1 32 32 935 0 -1 4.000 0 0 0 0 0 5 + 4837 9300 5236 9300 5236 9676 4837 9676 4837 9300 +2 1 0 0 33 33 934 0 20 4.000 0 0 0 0 0 5 + 5236 9300 5635 9300 5635 9676 5236 9676 5236 9300 +2 1 0 1 32 32 933 0 -1 4.000 0 0 0 0 0 5 + 5236 9300 5635 9300 5635 9676 5236 9676 5236 9300 +2 1 0 1 32 32 930 0 -1 4.000 0 0 0 0 0 2 + 2819 7658 2869 7658 +2 1 0 1 32 32 929 0 -1 4.000 0 0 0 0 0 2 + 2952 7658 3002 7658 +2 1 0 1 32 32 928 0 -1 4.000 0 0 0 0 0 2 + 3085 7658 3135 7658 +2 1 0 1 32 32 927 0 -1 4.000 0 0 0 0 0 2 + 3219 7658 3252 7658 +2 1 0 1 32 32 926 0 -1 4.000 0 0 0 0 0 2 + 2819 8033 2869 8033 +2 1 0 1 32 32 925 0 -1 4.000 0 0 0 0 0 2 + 2952 8033 3002 8033 +2 1 0 1 32 32 924 0 -1 4.000 0 0 0 0 0 2 + 3085 8033 3135 8033 +2 1 0 1 32 32 923 0 -1 4.000 0 0 0 0 0 2 + 3219 8033 3252 8033 +2 1 0 1 32 32 922 0 -1 4.000 0 0 0 0 0 2 + 2819 8025 2819 8075 +2 1 0 1 32 32 921 0 -1 4.000 0 0 0 0 0 2 + 2819 8158 2819 8208 +2 1 0 1 32 32 920 0 -1 4.000 0 0 0 0 0 2 + 2819 8291 2819 8341 +2 1 0 1 32 32 919 0 -1 4.000 0 0 0 0 0 2 + 2419 8025 2419 8075 +2 1 0 1 32 32 918 0 -1 4.000 0 0 0 0 0 2 + 2419 8158 2419 8208 +2 1 0 1 32 32 917 0 -1 4.000 0 0 0 0 0 2 + 2419 8291 2419 8341 +2 1 0 1 32 32 916 0 -1 4.000 0 0 0 0 0 2 + 2019 8025 2019 8075 +2 1 0 1 32 32 915 0 -1 4.000 0 0 0 0 0 2 + 2019 8158 2019 8208 +2 1 0 1 32 32 914 0 -1 4.000 0 0 0 0 0 2 + 2019 8291 2019 8341 +2 1 0 1 32 32 913 0 -1 4.000 0 0 0 0 0 2 + 1619 8025 1619 8075 +2 1 0 1 32 32 912 0 -1 4.000 0 0 0 0 0 2 + 1619 8158 1619 8208 +2 1 0 1 32 32 911 0 -1 4.000 0 0 0 0 0 2 + 1619 8291 1619 8341 +2 1 0 1 32 32 910 0 -1 4.000 0 0 0 0 0 2 + 4036 7658 3986 7658 +2 1 0 1 32 32 909 0 -1 4.000 0 0 0 0 0 2 + 3902 7658 3852 7658 +2 1 0 1 32 32 908 0 -1 4.000 0 0 0 0 0 2 + 3769 7658 3719 7658 +2 1 0 1 32 32 907 0 -1 4.000 0 0 0 0 0 2 + 3636 7658 3602 7658 +2 1 0 1 32 32 906 0 -1 4.000 0 0 0 0 0 2 + 4036 8033 3986 8033 +2 1 0 1 32 32 905 0 -1 4.000 0 0 0 0 0 2 + 3902 8033 3852 8033 +2 1 0 1 32 32 904 0 -1 4.000 0 0 0 0 0 2 + 3769 8033 3719 8033 +2 1 0 1 32 32 903 0 -1 4.000 0 0 0 0 0 2 + 3636 8033 3602 8033 +2 1 0 1 32 32 902 0 -1 4.000 0 0 0 0 0 2 + 4035 8025 4035 8075 +2 1 0 1 32 32 901 0 -1 4.000 0 0 0 0 0 2 + 4035 8158 4035 8208 +2 1 0 1 32 32 900 0 -1 4.000 0 0 0 0 0 2 + 4035 8291 4035 8341 +2 1 0 1 32 32 899 0 -1 4.000 0 0 0 0 0 2 + 4435 8025 4435 8075 +2 1 0 1 32 32 898 0 -1 4.000 0 0 0 0 0 2 + 4435 8158 4435 8208 +2 1 0 1 32 32 897 0 -1 4.000 0 0 0 0 0 2 + 4435 8291 4435 8341 +2 1 0 1 32 32 896 0 -1 4.000 0 0 0 0 0 2 + 4835 8025 4835 8075 +2 1 0 1 32 32 895 0 -1 4.000 0 0 0 0 0 2 + 4835 8158 4835 8208 +2 1 0 1 32 32 894 0 -1 4.000 0 0 0 0 0 2 + 4835 8291 4835 8341 +2 1 0 1 32 32 893 0 -1 4.000 0 0 0 0 0 2 + 5235 8025 5235 8075 +2 1 0 1 32 32 892 0 -1 4.000 0 0 0 0 0 2 + 5235 8158 5235 8208 +2 1 0 1 32 32 891 0 -1 4.000 0 0 0 0 0 2 + 5235 8291 5235 8341 +2 1 0 1 32 32 890 0 -1 4.000 0 0 0 0 0 2 + 4036 9300 3986 9300 +2 1 0 1 32 32 889 0 -1 4.000 0 0 0 0 0 2 + 3902 9300 3852 9300 +2 1 0 1 32 32 888 0 -1 4.000 0 0 0 0 0 2 + 3769 9300 3719 9300 +2 1 0 1 32 32 887 0 -1 4.000 0 0 0 0 0 2 + 3636 9300 3602 9300 +2 1 0 1 32 32 886 0 -1 4.000 0 0 0 0 0 2 + 4036 8925 3986 8925 +2 1 0 1 32 32 885 0 -1 4.000 0 0 0 0 0 2 + 3902 8925 3852 8925 +2 1 0 1 32 32 884 0 -1 4.000 0 0 0 0 0 2 + 3769 8925 3719 8925 +2 1 0 1 32 32 883 0 -1 4.000 0 0 0 0 0 2 + 3636 8925 3602 8925 +2 1 0 1 32 32 882 0 -1 4.000 0 0 0 0 0 2 + 4035 8933 4035 8883 +2 1 0 1 32 32 881 0 -1 4.000 0 0 0 0 0 2 + 4035 8800 4035 8750 +2 1 0 1 32 32 880 0 -1 4.000 0 0 0 0 0 2 + 4035 8666 4035 8616 +2 1 0 1 32 32 879 0 -1 4.000 0 0 0 0 0 2 + 4435 8933 4435 8883 +2 1 0 1 32 32 878 0 -1 4.000 0 0 0 0 0 2 + 4435 8800 4435 8750 +2 1 0 1 32 32 877 0 -1 4.000 0 0 0 0 0 2 + 4435 8666 4435 8616 +2 1 0 1 32 32 876 0 -1 4.000 0 0 0 0 0 2 + 4835 8933 4835 8883 +2 1 0 1 32 32 875 0 -1 4.000 0 0 0 0 0 2 + 4835 8800 4835 8750 +2 1 0 1 32 32 874 0 -1 4.000 0 0 0 0 0 2 + 4835 8666 4835 8616 +2 1 0 1 32 32 873 0 -1 4.000 0 0 0 0 0 2 + 5235 8933 5235 8883 +2 1 0 1 32 32 872 0 -1 4.000 0 0 0 0 0 2 + 5235 8800 5235 8750 +2 1 0 1 32 32 871 0 -1 4.000 0 0 0 0 0 2 + 5235 8666 5235 8616 +2 1 0 1 32 32 870 0 -1 4.000 0 0 0 0 0 2 + 2819 9300 2869 9300 +2 1 0 1 32 32 869 0 -1 4.000 0 0 0 0 0 2 + 2952 9300 3002 9300 +2 1 0 1 32 32 868 0 -1 4.000 0 0 0 0 0 2 + 3085 9300 3135 9300 +2 1 0 1 32 32 867 0 -1 4.000 0 0 0 0 0 2 + 3219 9300 3252 9300 +2 1 0 1 32 32 866 0 -1 4.000 0 0 0 0 0 2 + 2819 8925 2869 8925 +2 1 0 1 32 32 865 0 -1 4.000 0 0 0 0 0 2 + 2952 8925 3002 8925 +2 1 0 1 32 32 864 0 -1 4.000 0 0 0 0 0 2 + 3085 8925 3135 8925 +2 1 0 1 32 32 863 0 -1 4.000 0 0 0 0 0 2 + 3219 8925 3252 8925 +2 1 0 1 32 32 862 0 -1 4.000 0 0 0 0 0 2 + 2819 8933 2819 8883 +2 1 0 1 32 32 861 0 -1 4.000 0 0 0 0 0 2 + 2819 8800 2819 8750 +2 1 0 1 32 32 860 0 -1 4.000 0 0 0 0 0 2 + 2819 8666 2819 8616 +2 1 0 1 32 32 859 0 -1 4.000 0 0 0 0 0 2 + 2419 8933 2419 8883 +2 1 0 1 32 32 858 0 -1 4.000 0 0 0 0 0 2 + 2419 8800 2419 8750 +2 1 0 1 32 32 857 0 -1 4.000 0 0 0 0 0 2 + 2419 8666 2419 8616 +2 1 0 1 32 32 856 0 -1 4.000 0 0 0 0 0 2 + 2019 8933 2019 8883 +2 1 0 1 32 32 855 0 -1 4.000 0 0 0 0 0 2 + 2019 8800 2019 8750 +2 1 0 1 32 32 854 0 -1 4.000 0 0 0 0 0 2 + 2019 8666 2019 8616 +2 1 0 1 32 32 853 0 -1 4.000 0 0 0 0 0 2 + 1619 8933 1619 8883 +2 1 0 1 32 32 852 0 -1 4.000 0 0 0 0 0 2 + 1619 8800 1619 8750 +2 1 0 1 32 32 851 0 -1 4.000 0 0 0 0 0 2 + 1619 8666 1619 8616 +2 1 0 0 35 35 836 0 20 4.000 0 0 0 0 0 5 + 5636 7280 6035 7280 6035 7656 5636 7656 5636 7280 +2 1 0 1 32 32 835 0 -1 4.000 0 0 0 0 0 5 + 5636 7280 6035 7280 6035 7656 5636 7656 5636 7280 +2 1 0 0 35 35 834 0 20 4.000 0 0 0 0 0 5 + 5636 7656 6035 7656 6035 8032 5636 8032 5636 7656 +2 1 0 1 32 32 833 0 -1 4.000 0 0 0 0 0 5 + 5636 7656 6035 7656 6035 8032 5636 8032 5636 7656 +2 1 0 0 35 35 832 0 20 4.000 0 0 0 0 0 5 + 5636 8924 6035 8924 6035 9300 5636 9300 5636 8924 +2 1 0 1 32 32 831 0 -1 4.000 0 0 0 0 0 5 + 5636 8924 6035 8924 6035 9300 5636 9300 5636 8924 +2 1 0 0 35 35 830 0 20 4.000 0 0 0 0 0 5 + 5636 9300 6035 9300 6035 9676 5636 9676 5636 9300 +2 1 0 1 32 32 829 0 -1 4.000 0 0 0 0 0 5 + 5636 9300 6035 9300 6035 9676 5636 9676 5636 9300 +2 1 0 0 35 35 828 0 20 4.000 0 0 0 0 0 5 + 6036 7280 6435 7280 6435 7656 6036 7656 6036 7280 +2 1 0 1 32 32 827 0 -1 4.000 0 0 0 0 0 5 + 6036 7280 6435 7280 6435 7656 6036 7656 6036 7280 +2 1 0 0 35 35 826 0 20 4.000 0 0 0 0 0 5 + 6036 7656 6435 7656 6435 8032 6036 8032 6036 7656 +2 1 0 1 32 32 825 0 -1 4.000 0 0 0 0 0 5 + 6036 7656 6435 7656 6435 8032 6036 8032 6036 7656 +2 1 0 0 35 35 824 0 20 4.000 0 0 0 0 0 5 + 6036 8924 6435 8924 6435 9300 6036 9300 6036 8924 +2 1 0 1 32 32 823 0 -1 4.000 0 0 0 0 0 5 + 6036 8924 6435 8924 6435 9300 6036 9300 6036 8924 +2 1 0 0 35 35 822 0 20 4.000 0 0 0 0 0 5 + 6036 9300 6435 9300 6435 9676 6036 9676 6036 9300 +2 1 0 1 32 32 821 0 -1 4.000 0 0 0 0 0 5 + 6036 9300 6435 9300 6435 9676 6036 9676 6036 9300 +2 1 0 1 0 0 820 0 -1 4.000 0 0 0 0 0 2 + 5635 7283 5635 9683 +2 1 0 1 0 0 819 0 -1 4.000 0 0 0 0 0 2 + 1420 7515 6312 7515 +2 1 0 0 0 0 818 0 20 4.000 0 0 0 0 0 3 + 6348 7518 6117 7462 6117 7515 +2 1 0 1 0 0 817 0 -1 4.000 0 0 0 0 0 3 + 6348 7518 6117 7462 6117 7515 +2 1 0 0 0 0 816 0 20 4.000 0 0 0 0 0 3 + 6348 7512 6117 7568 6117 7515 +2 1 0 1 0 0 815 0 -1 4.000 0 0 0 0 0 3 + 6348 7512 6117 7568 6117 7515 +2 1 0 1 0 0 814 0 -1 4.000 0 0 0 0 0 2 + 1420 7891 5863 7891 +2 1 0 0 0 0 813 0 20 4.000 0 0 0 0 0 3 + 5895 7894 5685 7838 5685 7891 +2 1 0 1 0 0 812 0 -1 4.000 0 0 0 0 0 3 + 5895 7894 5685 7838 5685 7891 +2 1 0 0 0 0 811 0 20 4.000 0 0 0 0 0 3 + 5895 7888 5685 7944 5685 7891 +2 1 0 1 0 0 810 0 -1 4.000 0 0 0 0 0 3 + 5895 7888 5685 7944 5685 7891 +2 1 0 1 0 0 809 0 -1 4.000 0 0 0 0 0 2 + 1420 9112 5863 9112 +2 1 0 0 0 0 808 0 20 4.000 0 0 0 0 0 3 + 5895 9115 5685 9059 5685 9112 +2 1 0 1 0 0 807 0 -1 4.000 0 0 0 0 0 3 + 5895 9115 5685 9059 5685 9112 +2 1 0 0 0 0 806 0 20 4.000 0 0 0 0 0 3 + 5895 9109 5685 9165 5685 9112 +2 1 0 1 0 0 805 0 -1 4.000 0 0 0 0 0 3 + 5895 9109 5685 9165 5685 9112 +2 1 0 1 0 0 804 0 -1 4.000 0 0 0 0 0 2 + 1420 9488 5863 9488 +2 1 0 0 0 0 803 0 20 4.000 0 0 0 0 0 3 + 5895 9491 5685 9435 5685 9488 +2 1 0 1 0 0 802 0 -1 4.000 0 0 0 0 0 3 + 5895 9491 5685 9435 5685 9488 +2 1 0 0 0 0 801 0 20 4.000 0 0 0 0 0 3 + 5895 9485 5685 9541 5685 9488 +2 1 0 1 0 0 800 0 -1 4.000 0 0 0 0 0 3 + 5895 9485 5685 9541 5685 9488 +2 1 0 0 36 36 771 0 20 4.000 0 0 0 0 0 5 + 5226 5196 5623 5196 5623 5572 5226 5572 5226 5196 +2 1 0 0 37 37 770 0 20 4.000 0 0 0 0 0 5 + 4827 5196 5226 5196 5226 5572 4827 5572 4827 5196 +2 1 0 1 38 38 769 0 -1 4.000 0 0 0 0 0 5 + 4835 5194 5631 5194 5631 5569 4835 5569 4835 5194 +2 1 0 0 36 36 768 0 20 4.000 0 0 0 0 0 5 + 4434 5196 4832 5196 4832 5572 4434 5572 4434 5196 +2 1 0 0 37 37 767 0 20 4.000 0 0 0 0 0 5 + 4035 5196 4434 5196 4434 5572 4035 5572 4035 5196 +2 1 0 1 38 38 766 0 -1 4.000 0 0 0 0 0 5 + 4044 5194 4840 5194 4840 5569 4044 5569 4044 5194 +2 1 0 0 36 36 765 0 20 4.000 0 0 0 0 0 5 + 6026 5196 6440 5196 6440 5572 6026 5572 6026 5196 +2 1 0 0 37 37 764 0 20 4.000 0 0 0 0 0 5 + 5627 5196 6026 5196 6026 5572 5627 5572 5627 5196 +2 1 0 1 38 38 763 0 -1 4.000 0 0 0 0 0 5 + 5635 5194 6440 5194 6440 5569 5635 5569 5635 5194 +2 1 0 0 36 36 762 0 20 4.000 0 0 0 0 0 5 + 5226 5571 5623 5571 5623 5947 5226 5947 5226 5571 +2 1 0 0 37 37 761 0 20 4.000 0 0 0 0 0 5 + 4827 5571 5226 5571 5226 5947 4827 5947 4827 5571 +2 1 0 1 38 38 760 0 -1 4.000 0 0 0 0 0 5 + 4835 5569 5631 5569 5631 5944 4835 5944 4835 5569 +2 1 0 0 36 36 759 0 20 4.000 0 0 0 0 0 5 + 4434 5571 4832 5571 4832 5947 4434 5947 4434 5571 +2 1 0 0 37 37 758 0 20 4.000 0 0 0 0 0 5 + 4035 5571 4434 5571 4434 5947 4035 5947 4035 5571 +2 1 0 1 38 38 757 0 -1 4.000 0 0 0 0 0 5 + 4044 5569 4840 5569 4840 5944 4044 5944 4044 5569 +2 1 0 0 36 36 756 0 20 4.000 0 0 0 0 0 5 + 6026 5571 6440 5571 6440 5947 6026 5947 6026 5571 +2 1 0 0 37 37 755 0 20 4.000 0 0 0 0 0 5 + 5627 5571 6026 5571 6026 5947 5627 5947 5627 5571 +2 1 0 1 38 38 754 0 -1 4.000 0 0 0 0 0 5 + 5635 5569 6440 5569 6440 5944 5635 5944 5635 5569 +2 1 0 0 36 36 753 0 20 4.000 0 0 0 0 0 5 + 2409 5571 2807 5571 2807 5947 2409 5947 2409 5571 +2 1 0 0 37 37 752 0 20 4.000 0 0 0 0 0 5 + 2010 5571 2409 5571 2409 5947 2010 5947 2010 5571 +2 1 0 1 38 38 751 0 -1 4.000 0 0 0 0 0 5 + 2019 5561 2815 5561 2815 5936 2019 5936 2019 5561 +2 1 0 0 36 36 750 0 20 4.000 0 0 0 0 0 5 + 1618 5571 2015 5571 2015 5947 1618 5947 1618 5571 +2 1 0 0 37 37 749 0 20 4.000 0 0 0 0 0 5 + 1219 5571 1618 5571 1618 5947 1219 5947 1219 5571 +2 1 0 1 38 38 748 0 -1 4.000 0 0 0 0 0 5 + 1227 5561 2023 5561 2023 5939 1227 5939 1227 5561 +2 1 0 0 36 36 747 0 20 4.000 0 0 0 0 0 5 + 2409 5196 2807 5196 2807 5572 2409 5572 2409 5196 +2 1 0 0 37 37 746 0 20 4.000 0 0 0 0 0 5 + 2010 5196 2409 5196 2409 5572 2010 5572 2010 5196 +2 1 0 1 38 38 745 0 -1 4.000 0 0 0 0 0 5 + 2019 5186 2815 5186 2815 5561 2019 5561 2019 5186 +2 1 0 0 36 36 744 0 20 4.000 0 0 0 0 0 5 + 1618 5196 2015 5196 2015 5572 1618 5572 1618 5196 +2 1 0 0 37 37 743 0 20 4.000 0 0 0 0 0 5 + 1219 5196 1618 5196 1618 5572 1219 5572 1219 5196 +2 1 0 1 38 38 742 0 -1 4.000 0 0 0 0 0 5 + 1227 5186 2023 5186 2023 5561 1227 5561 1227 5186 +2 1 0 0 36 36 741 0 20 4.000 0 0 0 0 0 5 + 5226 3546 5623 3546 5623 3922 5226 3922 5226 3546 +2 1 0 0 37 37 740 0 20 4.000 0 0 0 0 0 5 + 4827 3546 5226 3546 5226 3922 4827 3922 4827 3546 +2 1 0 1 32 32 739 0 -1 4.000 0 0 0 0 0 5 + 4835 3544 5631 3544 5631 3919 4835 3919 4835 3544 +2 1 0 0 36 36 738 0 20 4.000 0 0 0 0 0 5 + 4434 3546 4832 3546 4832 3922 4434 3922 4434 3546 +2 1 0 0 37 37 737 0 20 4.000 0 0 0 0 0 5 + 4035 3546 4434 3546 4434 3922 4035 3922 4035 3546 +2 1 0 1 38 38 736 0 -1 4.000 0 0 0 0 0 5 + 4044 3544 4840 3544 4840 3919 4044 3919 4044 3544 +2 1 0 0 36 36 735 0 20 4.000 0 0 0 0 0 5 + 5990 3546 6432 3546 6432 3955 5990 3955 5990 3546 +2 1 0 0 37 37 734 0 20 4.000 0 0 0 0 0 5 + 5627 3546 6026 3546 6026 3922 5627 3922 5627 3546 +2 1 0 1 38 38 733 0 -1 4.000 0 0 0 0 0 5 + 5635 3544 6440 3544 6440 3919 5635 3919 5635 3544 +2 1 0 0 36 36 732 0 20 4.000 0 0 0 0 0 5 + 5226 3921 5623 3921 5623 4297 5226 4297 5226 3921 +2 1 0 0 37 37 731 0 20 4.000 0 0 0 0 0 5 + 4827 3921 5226 3921 5226 4297 4827 4297 4827 3921 +2 1 0 1 38 38 730 0 -1 4.000 0 0 0 0 0 5 + 4835 3919 5631 3919 5631 4294 4835 4294 4835 3919 +2 1 0 0 36 36 729 0 20 4.000 0 0 0 0 0 5 + 4434 3921 4832 3921 4832 4297 4434 4297 4434 3921 +2 1 0 0 37 37 728 0 20 4.000 0 0 0 0 0 5 + 4035 3921 4434 3921 4434 4297 4035 4297 4035 3921 +2 1 0 1 38 38 727 0 -1 4.000 0 0 0 0 0 5 + 4044 3919 4840 3919 4840 4294 4044 4294 4044 3919 +2 1 0 0 36 36 726 0 20 4.000 0 0 0 0 0 5 + 6026 3921 6432 3921 6432 4297 6026 4297 6026 3921 +2 1 0 0 37 37 725 0 20 4.000 0 0 0 0 0 5 + 5627 3921 6026 3921 6026 4297 5627 4297 5627 3921 +2 1 0 1 38 38 724 0 -1 4.000 0 0 0 0 0 5 + 5635 3919 6440 3919 6440 4294 5635 4294 5635 3919 +2 1 0 0 36 36 723 0 20 4.000 0 0 0 0 0 5 + 2409 3921 2807 3921 2807 4297 2409 4297 2409 3921 +2 1 0 0 37 37 722 0 20 4.000 0 0 0 0 0 5 + 2010 3921 2409 3921 2409 4297 2010 4297 2010 3921 +2 1 0 1 38 38 721 0 -1 4.000 0 0 0 0 0 5 + 2019 3919 2815 3919 2815 4294 2019 4294 2019 3919 +2 1 0 0 36 36 720 0 20 4.000 0 0 0 0 0 5 + 1618 3921 2015 3921 2015 4297 1618 4297 1618 3921 +2 1 0 0 37 37 719 0 20 4.000 0 0 0 0 0 5 + 1219 3921 1618 3921 1618 4297 1219 4297 1219 3921 +2 1 0 1 38 38 718 0 -1 4.000 0 0 0 0 0 5 + 1227 3919 2023 3919 2023 4294 1227 4294 1227 3919 +2 1 0 0 36 36 717 0 20 4.000 0 0 0 0 0 5 + 2409 3546 2815 3546 2815 3922 2409 3922 2409 3546 +2 1 0 0 37 37 716 0 20 4.000 0 0 0 0 0 5 + 2010 3546 2409 3546 2409 3922 2010 3922 2010 3546 +2 1 0 1 38 38 715 0 -1 4.000 0 0 0 0 0 5 + 2019 3544 2815 3544 2815 3919 2019 3919 2019 3544 +2 1 0 0 36 36 714 0 20 4.000 0 0 0 0 0 5 + 1618 3546 2015 3546 2015 3922 1618 3922 1618 3546 +2 1 0 0 37 37 713 0 20 4.000 0 0 0 0 0 5 + 1219 3546 1618 3546 1618 3922 1219 3922 1219 3546 +2 1 0 1 38 38 712 0 -1 4.000 0 0 0 0 0 5 + 1227 3544 2023 3544 2023 3919 1227 3919 1227 3544 +2 1 0 1 32 32 711 0 -1 4.000 0 0 0 0 0 5 + 1221 3546 6440 3546 6440 5941 1221 5941 1221 3546 +2 1 0 1 32 32 708 0 -1 4.000 0 0 0 0 0 2 + 2819 3915 2869 3915 +2 1 0 1 32 32 707 0 -1 4.000 0 0 0 0 0 2 + 2952 3915 3002 3915 +2 1 0 1 32 32 706 0 -1 4.000 0 0 0 0 0 2 + 3085 3915 3135 3915 +2 1 0 1 32 32 705 0 -1 4.000 0 0 0 0 0 2 + 3219 3915 3252 3915 +2 1 0 1 32 32 704 0 -1 4.000 0 0 0 0 0 2 + 2819 4290 2869 4290 +2 1 0 1 32 32 703 0 -1 4.000 0 0 0 0 0 2 + 2952 4290 3002 4290 +2 1 0 1 32 32 702 0 -1 4.000 0 0 0 0 0 2 + 3085 4290 3135 4290 +2 1 0 1 32 32 701 0 -1 4.000 0 0 0 0 0 2 + 3219 4290 3252 4290 +2 1 0 1 32 32 700 0 -1 4.000 0 0 0 0 0 2 + 2819 4282 2819 4332 +2 1 0 1 32 32 699 0 -1 4.000 0 0 0 0 0 2 + 2819 4415 2819 4465 +2 1 0 1 32 32 698 0 -1 4.000 0 0 0 0 0 2 + 2819 4548 2819 4598 +2 1 0 1 32 32 697 0 -1 4.000 0 0 0 0 0 2 + 2019 4282 2019 4332 +2 1 0 1 32 32 696 0 -1 4.000 0 0 0 0 0 2 + 2019 4415 2019 4465 +2 1 0 1 32 32 695 0 -1 4.000 0 0 0 0 0 2 + 2019 4548 2019 4598 +2 1 0 1 32 32 694 0 -1 4.000 0 0 0 0 0 2 + 4036 3915 3986 3915 +2 1 0 1 32 32 693 0 -1 4.000 0 0 0 0 0 2 + 3902 3915 3852 3915 +2 1 0 1 32 32 692 0 -1 4.000 0 0 0 0 0 2 + 3769 3915 3719 3915 +2 1 0 1 32 32 691 0 -1 4.000 0 0 0 0 0 2 + 3636 3915 3602 3915 +2 1 0 1 32 32 690 0 -1 4.000 0 0 0 0 0 2 + 4036 4290 3986 4290 +2 1 0 1 32 32 689 0 -1 4.000 0 0 0 0 0 2 + 3902 4290 3852 4290 +2 1 0 1 32 32 688 0 -1 4.000 0 0 0 0 0 2 + 3769 4290 3719 4290 +2 1 0 1 32 32 687 0 -1 4.000 0 0 0 0 0 2 + 3636 4290 3602 4290 +2 1 0 1 32 32 686 0 -1 4.000 0 0 0 0 0 2 + 4035 4282 4035 4332 +2 1 0 1 32 32 685 0 -1 4.000 0 0 0 0 0 2 + 4035 4415 4035 4465 +2 1 0 1 32 32 684 0 -1 4.000 0 0 0 0 0 2 + 4035 4548 4035 4598 +2 1 0 1 32 32 683 0 -1 4.000 0 0 0 0 0 2 + 4835 4282 4835 4332 +2 1 0 1 32 32 682 0 -1 4.000 0 0 0 0 0 2 + 4835 4415 4835 4465 +2 1 0 1 32 32 681 0 -1 4.000 0 0 0 0 0 2 + 4835 4548 4835 4598 +2 1 0 1 32 32 680 0 -1 4.000 0 0 0 0 0 2 + 4036 5565 3986 5565 +2 1 0 1 32 32 679 0 -1 4.000 0 0 0 0 0 2 + 3902 5565 3852 5565 +2 1 0 1 32 32 678 0 -1 4.000 0 0 0 0 0 2 + 3769 5565 3719 5565 +2 1 0 1 32 32 677 0 -1 4.000 0 0 0 0 0 2 + 3636 5565 3602 5565 +2 1 0 1 32 32 676 0 -1 4.000 0 0 0 0 0 2 + 4036 5190 3986 5190 +2 1 0 1 32 32 675 0 -1 4.000 0 0 0 0 0 2 + 3902 5190 3852 5190 +2 1 0 1 32 32 674 0 -1 4.000 0 0 0 0 0 2 + 3769 5190 3719 5190 +2 1 0 1 32 32 673 0 -1 4.000 0 0 0 0 0 2 + 3636 5190 3602 5190 +2 1 0 1 32 32 672 0 -1 4.000 0 0 0 0 0 2 + 4035 5198 4035 5148 +2 1 0 1 32 32 671 0 -1 4.000 0 0 0 0 0 2 + 4035 5065 4035 5015 +2 1 0 1 32 32 670 0 -1 4.000 0 0 0 0 0 2 + 4035 4932 4035 4882 +2 1 0 1 32 32 669 0 -1 4.000 0 0 0 0 0 2 + 4835 5198 4835 5148 +2 1 0 1 32 32 668 0 -1 4.000 0 0 0 0 0 2 + 4835 5065 4835 5015 +2 1 0 1 32 32 667 0 -1 4.000 0 0 0 0 0 2 + 4835 4932 4835 4882 +2 1 0 1 32 32 666 0 -1 4.000 0 0 0 0 0 2 + 2819 5565 2869 5565 +2 1 0 1 32 32 665 0 -1 4.000 0 0 0 0 0 2 + 2952 5565 3002 5565 +2 1 0 1 32 32 664 0 -1 4.000 0 0 0 0 0 2 + 3085 5565 3135 5565 +2 1 0 1 32 32 663 0 -1 4.000 0 0 0 0 0 2 + 3219 5565 3252 5565 +2 1 0 1 32 32 662 0 -1 4.000 0 0 0 0 0 2 + 2819 5190 2869 5190 +2 1 0 1 32 32 661 0 -1 4.000 0 0 0 0 0 2 + 2952 5190 3002 5190 +2 1 0 1 32 32 660 0 -1 4.000 0 0 0 0 0 2 + 3085 5190 3135 5190 +2 1 0 1 32 32 659 0 -1 4.000 0 0 0 0 0 2 + 3219 5190 3252 5190 +2 1 0 1 32 32 658 0 -1 4.000 0 0 0 0 0 2 + 2819 5198 2819 5148 +2 1 0 1 32 32 657 0 -1 4.000 0 0 0 0 0 2 + 2819 5065 2819 5015 +2 1 0 1 32 32 656 0 -1 4.000 0 0 0 0 0 2 + 2819 4932 2819 4882 +2 1 0 1 32 32 655 0 -1 4.000 0 0 0 0 0 2 + 2019 5198 2019 5148 +2 1 0 1 32 32 654 0 -1 4.000 0 0 0 0 0 2 + 2019 5065 2019 5015 +2 1 0 1 32 32 653 0 -1 4.000 0 0 0 0 0 2 + 2019 4932 2019 4882 +2 1 0 1 32 32 640 0 -1 4.000 0 0 0 0 0 2 + 5635 4282 5635 4332 +2 1 0 1 32 32 639 0 -1 4.000 0 0 0 0 0 2 + 5635 4415 5635 4465 +2 1 0 1 32 32 638 0 -1 4.000 0 0 0 0 0 2 + 5635 4548 5635 4598 +2 1 0 1 32 32 637 0 -1 4.000 0 0 0 0 0 2 + 5635 5198 5635 5148 +2 1 0 1 32 32 636 0 -1 4.000 0 0 0 0 0 2 + 5635 5065 5635 5015 +2 1 0 1 32 32 635 0 -1 4.000 0 0 0 0 0 2 + 5635 4932 5635 4882 +2 1 0 1 0 0 634 0 -1 4.000 0 0 0 0 0 2 + 1420 3781 6312 3781 +2 1 0 0 0 0 633 0 20 4.000 0 0 0 0 0 3 + 6348 3784 6117 3728 6117 3781 +2 1 0 1 0 0 632 0 -1 4.000 0 0 0 0 0 3 + 6348 3784 6117 3728 6117 3781 +2 1 0 0 0 0 631 0 20 4.000 0 0 0 0 0 3 + 6348 3778 6117 3834 6117 3781 +2 1 0 1 0 0 630 0 -1 4.000 0 0 0 0 0 3 + 6348 3778 6117 3834 6117 3781 +2 1 0 1 0 0 629 0 -1 4.000 0 0 0 0 0 2 + 1420 4169 6312 4169 +2 1 0 0 0 0 628 0 20 4.000 0 0 0 0 0 3 + 6348 4172 6117 4116 6117 4169 +2 1 0 1 0 0 627 0 -1 4.000 0 0 0 0 0 3 + 6348 4172 6117 4116 6117 4169 +2 1 0 0 0 0 626 0 20 4.000 0 0 0 0 0 3 + 6348 4166 6117 4222 6117 4169 +2 1 0 1 0 0 625 0 -1 4.000 0 0 0 0 0 3 + 6348 4166 6117 4222 6117 4169 +2 1 0 1 0 0 624 0 -1 4.000 0 0 0 0 0 2 + 1420 5390 6312 5390 +2 1 0 0 0 0 623 0 20 4.000 0 0 0 0 0 3 + 6348 5393 6117 5337 6117 5390 +2 1 0 1 0 0 622 0 -1 4.000 0 0 0 0 0 3 + 6348 5393 6117 5337 6117 5390 +2 1 0 0 0 0 621 0 20 4.000 0 0 0 0 0 3 + 6348 5387 6117 5443 6117 5390 +2 1 0 1 0 0 620 0 -1 4.000 0 0 0 0 0 3 + 6348 5387 6117 5443 6117 5390 +2 1 0 1 0 0 619 0 -1 4.000 0 0 0 0 0 2 + 1420 5766 6312 5766 +2 1 0 0 0 0 618 0 20 4.000 0 0 0 0 0 3 + 6348 5769 6117 5713 6117 5766 +2 1 0 1 0 0 617 0 -1 4.000 0 0 0 0 0 3 + 6348 5769 6117 5713 6117 5766 +2 1 0 0 0 0 616 0 20 4.000 0 0 0 0 0 3 + 6348 5763 6117 5819 6117 5766 +2 1 0 1 0 0 615 0 -1 4.000 0 0 0 0 0 3 + 6348 5763 6117 5819 6117 5766 +2 1 0 0 33 33 614 0 20 4.000 0 0 0 0 0 5 + 1469 6215 1868 6215 1868 6591 1469 6591 1469 6215 +2 1 0 1 32 32 613 0 -1 4.000 0 0 0 0 0 5 + 1469 6215 1868 6215 1868 6591 1469 6591 1469 6215 +2 1 0 0 36 36 610 0 20 4.000 0 0 0 0 0 5 + 4026 6217 4432 6217 4432 6593 4026 6593 4026 6217 +2 1 0 0 37 37 609 0 20 4.000 0 0 0 0 0 5 + 3627 6217 4026 6217 4026 6593 3627 6593 3627 6217 +2 1 0 1 38 38 608 0 -1 4.000 0 0 0 0 0 5 + 3635 6215 4440 6215 4440 6590 3635 6590 3635 6215 +2 1 0 0 7 7 591 0 20 4.000 0 0 0 0 0 5 + 1221 495 5635 495 5635 2890 1221 2890 1221 495 +2 1 0 1 32 32 590 0 -1 4.000 0 0 0 0 0 5 + 1221 495 5635 495 5635 2890 1221 2890 1221 495 +2 1 0 0 33 33 589 0 20 4.000 0 0 0 0 0 5 + 1221 495 1620 495 1620 871 1221 871 1221 495 +2 1 0 1 32 32 588 0 -1 4.000 0 0 0 0 0 5 + 1221 495 1620 495 1620 871 1221 871 1221 495 +2 1 0 0 33 33 587 0 20 4.000 0 0 0 0 0 5 + 1620 495 2019 495 2019 871 1620 871 1620 495 +2 1 0 1 32 32 586 0 -1 4.000 0 0 0 0 0 5 + 1620 495 2019 495 2019 871 1620 871 1620 495 +2 1 0 0 33 33 585 0 20 4.000 0 0 0 0 0 5 + 2019 495 2418 495 2418 871 2019 871 2019 495 +2 1 0 1 32 32 584 0 -1 4.000 0 0 0 0 0 5 + 2019 495 2418 495 2418 871 2019 871 2019 495 +2 1 0 0 33 33 583 0 20 4.000 0 0 0 0 0 5 + 2418 495 2817 495 2817 871 2418 871 2418 495 +2 1 0 1 32 32 582 0 -1 4.000 0 0 0 0 0 5 + 2418 495 2817 495 2817 871 2418 871 2418 495 +2 1 0 0 33 33 581 0 20 4.000 0 0 0 0 0 5 + 4038 495 4438 495 4438 871 4038 871 4038 495 +2 1 0 1 32 32 580 0 -1 4.000 0 0 0 0 0 5 + 4038 495 4438 495 4438 871 4038 871 4038 495 +2 1 0 0 33 33 579 0 20 4.000 0 0 0 0 0 5 + 4438 495 4837 495 4837 871 4438 871 4438 495 +2 1 0 1 32 32 578 0 -1 4.000 0 0 0 0 0 5 + 4438 495 4837 495 4837 871 4438 871 4438 495 +2 1 0 0 33 33 577 0 20 4.000 0 0 0 0 0 5 + 4837 495 5236 495 5236 871 4837 871 4837 495 +2 1 0 1 32 32 576 0 -1 4.000 0 0 0 0 0 5 + 4837 495 5236 495 5236 871 4837 871 4837 495 +2 1 0 0 33 33 575 0 20 4.000 0 0 0 0 0 5 + 5236 495 5635 495 5635 871 5236 871 5236 495 +2 1 0 1 32 32 574 0 -1 4.000 0 0 0 0 0 5 + 5236 495 5635 495 5635 871 5236 871 5236 495 +2 1 0 0 33 33 573 0 20 4.000 0 0 0 0 0 5 + 1221 871 1620 871 1620 1247 1221 1247 1221 871 +2 1 0 1 32 32 572 0 -1 4.000 0 0 0 0 0 5 + 1221 871 1620 871 1620 1247 1221 1247 1221 871 +2 1 0 0 33 33 571 0 20 4.000 0 0 0 0 0 5 + 1620 871 2019 871 2019 1247 1620 1247 1620 871 +2 1 0 1 32 32 570 0 -1 4.000 0 0 0 0 0 5 + 1620 871 2019 871 2019 1247 1620 1247 1620 871 +2 1 0 0 33 33 569 0 20 4.000 0 0 0 0 0 5 + 2019 871 2418 871 2418 1247 2019 1247 2019 871 +2 1 0 1 32 32 568 0 -1 4.000 0 0 0 0 0 5 + 2019 871 2418 871 2418 1247 2019 1247 2019 871 +2 1 0 0 33 33 567 0 20 4.000 0 0 0 0 0 5 + 2418 871 2817 871 2817 1247 2418 1247 2418 871 +2 1 0 1 32 32 566 0 -1 4.000 0 0 0 0 0 5 + 2418 871 2817 871 2817 1247 2418 1247 2418 871 +2 1 0 0 33 33 565 0 20 4.000 0 0 0 0 0 5 + 4038 871 4438 871 4438 1247 4038 1247 4038 871 +2 1 0 1 32 32 564 0 -1 4.000 0 0 0 0 0 5 + 4038 871 4438 871 4438 1247 4038 1247 4038 871 +2 1 0 0 33 33 563 0 20 4.000 0 0 0 0 0 5 + 4438 871 4837 871 4837 1247 4438 1247 4438 871 +2 1 0 1 32 32 562 0 -1 4.000 0 0 0 0 0 5 + 4438 871 4837 871 4837 1247 4438 1247 4438 871 +2 1 0 0 33 33 561 0 20 4.000 0 0 0 0 0 5 + 4837 871 5236 871 5236 1247 4837 1247 4837 871 +2 1 0 1 32 32 560 0 -1 4.000 0 0 0 0 0 5 + 4837 871 5236 871 5236 1247 4837 1247 4837 871 +2 1 0 0 33 33 559 0 20 4.000 0 0 0 0 0 5 + 5236 871 5635 871 5635 1247 5236 1247 5236 871 +2 1 0 1 32 32 558 0 -1 4.000 0 0 0 0 0 5 + 5236 871 5635 871 5635 1247 5236 1247 5236 871 +2 1 0 0 33 33 557 0 20 4.000 0 0 0 0 0 5 + 1221 2139 1620 2139 1620 2515 1221 2515 1221 2139 +2 1 0 1 32 32 556 0 -1 4.000 0 0 0 0 0 5 + 1221 2139 1620 2139 1620 2515 1221 2515 1221 2139 +2 1 0 0 33 33 555 0 20 4.000 0 0 0 0 0 5 + 1620 2139 2019 2139 2019 2515 1620 2515 1620 2139 +2 1 0 1 32 32 554 0 -1 4.000 0 0 0 0 0 5 + 1620 2139 2019 2139 2019 2515 1620 2515 1620 2139 +2 1 0 0 33 33 553 0 20 4.000 0 0 0 0 0 5 + 2019 2139 2418 2139 2418 2515 2019 2515 2019 2139 +2 1 0 1 32 32 552 0 -1 4.000 0 0 0 0 0 5 + 2019 2139 2418 2139 2418 2515 2019 2515 2019 2139 +2 1 0 0 33 33 551 0 20 4.000 0 0 0 0 0 5 + 2418 2139 2817 2139 2817 2515 2418 2515 2418 2139 +2 1 0 1 32 32 550 0 -1 4.000 0 0 0 0 0 5 + 2418 2139 2817 2139 2817 2515 2418 2515 2418 2139 +2 1 0 0 33 33 549 0 20 4.000 0 0 0 0 0 5 + 4038 2139 4438 2139 4438 2515 4038 2515 4038 2139 +2 1 0 1 32 32 548 0 -1 4.000 0 0 0 0 0 5 + 4038 2139 4438 2139 4438 2515 4038 2515 4038 2139 +2 1 0 0 33 33 547 0 20 4.000 0 0 0 0 0 5 + 4438 2139 4837 2139 4837 2515 4438 2515 4438 2139 +2 1 0 1 32 32 546 0 -1 4.000 0 0 0 0 0 5 + 4438 2139 4837 2139 4837 2515 4438 2515 4438 2139 +2 1 0 0 33 33 545 0 20 4.000 0 0 0 0 0 5 + 4837 2139 5236 2139 5236 2515 4837 2515 4837 2139 +2 1 0 1 32 32 544 0 -1 4.000 0 0 0 0 0 5 + 4837 2139 5236 2139 5236 2515 4837 2515 4837 2139 +2 1 0 0 33 33 543 0 20 4.000 0 0 0 0 0 5 + 5236 2139 5635 2139 5635 2515 5236 2515 5236 2139 +2 1 0 1 32 32 542 0 -1 4.000 0 0 0 0 0 5 + 5236 2139 5635 2139 5635 2515 5236 2515 5236 2139 +2 1 0 0 33 33 541 0 20 4.000 0 0 0 0 0 5 + 1221 2515 1620 2515 1620 2890 1221 2890 1221 2515 +2 1 0 1 32 32 540 0 -1 4.000 0 0 0 0 0 5 + 1221 2515 1620 2515 1620 2890 1221 2890 1221 2515 +2 1 0 0 33 33 539 0 20 4.000 0 0 0 0 0 5 + 1620 2515 2019 2515 2019 2890 1620 2890 1620 2515 +2 1 0 1 32 32 538 0 -1 4.000 0 0 0 0 0 5 + 1620 2515 2019 2515 2019 2890 1620 2890 1620 2515 +2 1 0 0 33 33 537 0 20 4.000 0 0 0 0 0 5 + 2019 2515 2418 2515 2418 2890 2019 2890 2019 2515 +2 1 0 1 32 32 536 0 -1 4.000 0 0 0 0 0 5 + 2019 2515 2418 2515 2418 2890 2019 2890 2019 2515 +2 1 0 0 33 33 535 0 20 4.000 0 0 0 0 0 5 + 2418 2515 2817 2515 2817 2890 2418 2890 2418 2515 +2 1 0 1 32 32 534 0 -1 4.000 0 0 0 0 0 5 + 2418 2515 2817 2515 2817 2890 2418 2890 2418 2515 +2 1 0 0 33 33 533 0 20 4.000 0 0 0 0 0 5 + 4038 2515 4438 2515 4438 2890 4038 2890 4038 2515 +2 1 0 1 32 32 532 0 -1 4.000 0 0 0 0 0 5 + 4038 2515 4438 2515 4438 2890 4038 2890 4038 2515 +2 1 0 0 33 33 531 0 20 4.000 0 0 0 0 0 5 + 4438 2515 4837 2515 4837 2890 4438 2890 4438 2515 +2 1 0 1 32 32 530 0 -1 4.000 0 0 0 0 0 5 + 4438 2515 4837 2515 4837 2890 4438 2890 4438 2515 +2 1 0 0 33 33 529 0 20 4.000 0 0 0 0 0 5 + 4837 2515 5236 2515 5236 2890 4837 2890 4837 2515 +2 1 0 1 32 32 528 0 -1 4.000 0 0 0 0 0 5 + 4837 2515 5236 2515 5236 2890 4837 2890 4837 2515 +2 1 0 0 33 33 527 0 20 4.000 0 0 0 0 0 5 + 5236 2515 5635 2515 5635 2890 5236 2890 5236 2515 +2 1 0 1 32 32 526 0 -1 4.000 0 0 0 0 0 5 + 5236 2515 5635 2515 5635 2890 5236 2890 5236 2515 +2 1 0 1 0 0 525 0 -1 4.000 0 0 0 0 0 2 + 1420 730 5459 730 +2 1 0 0 0 0 524 0 20 4.000 0 0 0 0 0 3 + 5488 733 5298 677 5298 730 +2 1 0 1 0 0 523 0 -1 4.000 0 0 0 0 0 3 + 5488 733 5298 677 5298 730 +2 1 0 0 0 0 522 0 20 4.000 0 0 0 0 0 3 + 5488 727 5298 783 5298 730 +2 1 0 1 0 0 521 0 -1 4.000 0 0 0 0 0 3 + 5488 727 5298 783 5298 730 +2 1 0 1 32 32 518 0 -1 4.000 0 0 0 0 0 2 + 2819 873 2869 873 +2 1 0 1 32 32 517 0 -1 4.000 0 0 0 0 0 2 + 2952 873 3002 873 +2 1 0 1 32 32 516 0 -1 4.000 0 0 0 0 0 2 + 3085 873 3135 873 +2 1 0 1 32 32 515 0 -1 4.000 0 0 0 0 0 2 + 3219 873 3252 873 +2 1 0 1 32 32 514 0 -1 4.000 0 0 0 0 0 2 + 2819 1248 2869 1248 +2 1 0 1 32 32 513 0 -1 4.000 0 0 0 0 0 2 + 2952 1248 3002 1248 +2 1 0 1 32 32 512 0 -1 4.000 0 0 0 0 0 2 + 3085 1248 3135 1248 +2 1 0 1 32 32 511 0 -1 4.000 0 0 0 0 0 2 + 3219 1248 3252 1248 +2 1 0 1 32 32 510 0 -1 4.000 0 0 0 0 0 2 + 2819 1240 2819 1290 +2 1 0 1 32 32 509 0 -1 4.000 0 0 0 0 0 2 + 2819 1373 2819 1423 +2 1 0 1 32 32 508 0 -1 4.000 0 0 0 0 0 2 + 2819 1506 2819 1556 +2 1 0 1 32 32 507 0 -1 4.000 0 0 0 0 0 2 + 2419 1240 2419 1290 +2 1 0 1 32 32 506 0 -1 4.000 0 0 0 0 0 2 + 2419 1373 2419 1423 +2 1 0 1 32 32 505 0 -1 4.000 0 0 0 0 0 2 + 2419 1506 2419 1556 +2 1 0 1 32 32 504 0 -1 4.000 0 0 0 0 0 2 + 2019 1240 2019 1290 +2 1 0 1 32 32 503 0 -1 4.000 0 0 0 0 0 2 + 2019 1373 2019 1423 +2 1 0 1 32 32 502 0 -1 4.000 0 0 0 0 0 2 + 2019 1506 2019 1556 +2 1 0 1 32 32 501 0 -1 4.000 0 0 0 0 0 2 + 1619 1240 1619 1290 +2 1 0 1 32 32 500 0 -1 4.000 0 0 0 0 0 2 + 1619 1373 1619 1423 +2 1 0 1 32 32 499 0 -1 4.000 0 0 0 0 0 2 + 1619 1506 1619 1556 +2 1 0 1 32 32 498 0 -1 4.000 0 0 0 0 0 2 + 4036 873 3986 873 +2 1 0 1 32 32 497 0 -1 4.000 0 0 0 0 0 2 + 3902 873 3852 873 +2 1 0 1 32 32 496 0 -1 4.000 0 0 0 0 0 2 + 3769 873 3719 873 +2 1 0 1 32 32 495 0 -1 4.000 0 0 0 0 0 2 + 3636 873 3602 873 +2 1 0 1 32 32 494 0 -1 4.000 0 0 0 0 0 2 + 4036 1248 3986 1248 +2 1 0 1 32 32 493 0 -1 4.000 0 0 0 0 0 2 + 3902 1248 3852 1248 +2 1 0 1 32 32 492 0 -1 4.000 0 0 0 0 0 2 + 3769 1248 3719 1248 +2 1 0 1 32 32 491 0 -1 4.000 0 0 0 0 0 2 + 3636 1248 3602 1248 +2 1 0 1 32 32 490 0 -1 4.000 0 0 0 0 0 2 + 4035 1240 4035 1290 +2 1 0 1 32 32 489 0 -1 4.000 0 0 0 0 0 2 + 4035 1373 4035 1423 +2 1 0 1 32 32 488 0 -1 4.000 0 0 0 0 0 2 + 4035 1506 4035 1556 +2 1 0 1 32 32 487 0 -1 4.000 0 0 0 0 0 2 + 4435 1240 4435 1290 +2 1 0 1 32 32 486 0 -1 4.000 0 0 0 0 0 2 + 4435 1373 4435 1423 +2 1 0 1 32 32 485 0 -1 4.000 0 0 0 0 0 2 + 4435 1506 4435 1556 +2 1 0 1 32 32 484 0 -1 4.000 0 0 0 0 0 2 + 4835 1240 4835 1290 +2 1 0 1 32 32 483 0 -1 4.000 0 0 0 0 0 2 + 4835 1373 4835 1423 +2 1 0 1 32 32 482 0 -1 4.000 0 0 0 0 0 2 + 4835 1506 4835 1556 +2 1 0 1 32 32 481 0 -1 4.000 0 0 0 0 0 2 + 5235 1240 5235 1290 +2 1 0 1 32 32 480 0 -1 4.000 0 0 0 0 0 2 + 5235 1373 5235 1423 +2 1 0 1 32 32 479 0 -1 4.000 0 0 0 0 0 2 + 5235 1506 5235 1556 +2 1 0 1 32 32 478 0 -1 4.000 0 0 0 0 0 2 + 4036 2515 3986 2515 +2 1 0 1 32 32 477 0 -1 4.000 0 0 0 0 0 2 + 3902 2515 3852 2515 +2 1 0 1 32 32 476 0 -1 4.000 0 0 0 0 0 2 + 3769 2515 3719 2515 +2 1 0 1 32 32 475 0 -1 4.000 0 0 0 0 0 2 + 3636 2515 3602 2515 +2 1 0 1 32 32 474 0 -1 4.000 0 0 0 0 0 2 + 4036 2140 3986 2140 +2 1 0 1 32 32 473 0 -1 4.000 0 0 0 0 0 2 + 3902 2140 3852 2140 +2 1 0 1 32 32 472 0 -1 4.000 0 0 0 0 0 2 + 3769 2140 3719 2140 +2 1 0 1 32 32 471 0 -1 4.000 0 0 0 0 0 2 + 3636 2140 3602 2140 +2 1 0 1 32 32 470 0 -1 4.000 0 0 0 0 0 2 + 4035 2148 4035 2098 +2 1 0 1 32 32 469 0 -1 4.000 0 0 0 0 0 2 + 4035 2015 4035 1965 +2 1 0 1 32 32 468 0 -1 4.000 0 0 0 0 0 2 + 4035 1881 4035 1831 +2 1 0 1 32 32 467 0 -1 4.000 0 0 0 0 0 2 + 4435 2148 4435 2098 +2 1 0 1 32 32 466 0 -1 4.000 0 0 0 0 0 2 + 4435 2015 4435 1965 +2 1 0 1 32 32 465 0 -1 4.000 0 0 0 0 0 2 + 4435 1881 4435 1831 +2 1 0 1 32 32 464 0 -1 4.000 0 0 0 0 0 2 + 4835 2148 4835 2098 +2 1 0 1 32 32 463 0 -1 4.000 0 0 0 0 0 2 + 4835 2015 4835 1965 +2 1 0 1 32 32 462 0 -1 4.000 0 0 0 0 0 2 + 4835 1881 4835 1831 +2 1 0 1 32 32 461 0 -1 4.000 0 0 0 0 0 2 + 5235 2148 5235 2098 +2 1 0 1 32 32 460 0 -1 4.000 0 0 0 0 0 2 + 5235 2015 5235 1965 +2 1 0 1 32 32 459 0 -1 4.000 0 0 0 0 0 2 + 5235 1881 5235 1831 +2 1 0 1 32 32 458 0 -1 4.000 0 0 0 0 0 2 + 2819 2515 2869 2515 +2 1 0 1 32 32 457 0 -1 4.000 0 0 0 0 0 2 + 2952 2515 3002 2515 +2 1 0 1 32 32 456 0 -1 4.000 0 0 0 0 0 2 + 3085 2515 3135 2515 +2 1 0 1 32 32 455 0 -1 4.000 0 0 0 0 0 2 + 3219 2515 3252 2515 +2 1 0 1 32 32 454 0 -1 4.000 0 0 0 0 0 2 + 2819 2140 2869 2140 +2 1 0 1 32 32 453 0 -1 4.000 0 0 0 0 0 2 + 2952 2140 3002 2140 +2 1 0 1 32 32 452 0 -1 4.000 0 0 0 0 0 2 + 3085 2140 3135 2140 +2 1 0 1 32 32 451 0 -1 4.000 0 0 0 0 0 2 + 3219 2140 3252 2140 +2 1 0 1 32 32 450 0 -1 4.000 0 0 0 0 0 2 + 2819 2148 2819 2098 +2 1 0 1 32 32 449 0 -1 4.000 0 0 0 0 0 2 + 2819 2015 2819 1965 +2 1 0 1 32 32 448 0 -1 4.000 0 0 0 0 0 2 + 2819 1881 2819 1831 +2 1 0 1 32 32 447 0 -1 4.000 0 0 0 0 0 2 + 2419 2148 2419 2098 +2 1 0 1 32 32 446 0 -1 4.000 0 0 0 0 0 2 + 2419 2015 2419 1965 +2 1 0 1 32 32 445 0 -1 4.000 0 0 0 0 0 2 + 2419 1881 2419 1831 +2 1 0 1 32 32 444 0 -1 4.000 0 0 0 0 0 2 + 2019 2148 2019 2098 +2 1 0 1 32 32 443 0 -1 4.000 0 0 0 0 0 2 + 2019 2015 2019 1965 +2 1 0 1 32 32 442 0 -1 4.000 0 0 0 0 0 2 + 2019 1881 2019 1831 +2 1 0 1 32 32 441 0 -1 4.000 0 0 0 0 0 2 + 1619 2148 1619 2098 +2 1 0 1 32 32 440 0 -1 4.000 0 0 0 0 0 2 + 1619 2015 1619 1965 +2 1 0 1 32 32 439 0 -1 4.000 0 0 0 0 0 2 + 1619 1881 1619 1831 +2 1 0 1 0 0 426 0 -1 4.000 0 0 0 0 0 2 + 1420 1106 5459 1106 +2 1 0 0 0 0 425 0 20 4.000 0 0 0 0 0 3 + 5488 1109 5298 1053 5298 1106 +2 1 0 1 0 0 424 0 -1 4.000 0 0 0 0 0 3 + 5488 1109 5298 1053 5298 1106 +2 1 0 0 0 0 423 0 20 4.000 0 0 0 0 0 3 + 5488 1103 5298 1159 5298 1106 +2 1 0 1 0 0 422 0 -1 4.000 0 0 0 0 0 3 + 5488 1103 5298 1159 5298 1106 +2 1 0 1 0 0 421 0 -1 4.000 0 0 0 0 0 2 + 1420 2327 5459 2327 +2 1 0 0 0 0 420 0 20 4.000 0 0 0 0 0 3 + 5488 2330 5298 2274 5298 2327 +2 1 0 1 0 0 419 0 -1 4.000 0 0 0 0 0 3 + 5488 2330 5298 2274 5298 2327 +2 1 0 0 0 0 418 0 20 4.000 0 0 0 0 0 3 + 5488 2324 5298 2380 5298 2327 +2 1 0 1 0 0 417 0 -1 4.000 0 0 0 0 0 3 + 5488 2324 5298 2380 5298 2327 +2 1 0 1 0 0 416 0 -1 4.000 0 0 0 0 0 2 + 1420 2703 5459 2703 +2 1 0 0 0 0 415 0 20 4.000 0 0 0 0 0 3 + 5488 2706 5298 2650 5298 2703 +2 1 0 1 0 0 414 0 -1 4.000 0 0 0 0 0 3 + 5488 2706 5298 2650 5298 2703 +2 1 0 0 0 0 413 0 20 4.000 0 0 0 0 0 3 + 5488 2700 5298 2755 5298 2703 +2 1 0 1 0 0 412 0 -1 4.000 0 0 0 0 0 3 + 5488 2700 5298 2755 5298 2703 +2 1 0 1 0 0 389 0 -1 4.000 0 0 0 0 0 2 + 273 3662 273 3039 +2 1 0 1 0 0 388 0 -1 4.000 0 0 0 0 0 2 + 382 3920 156 3662 +2 1 0 1 0 0 387 0 -1 4.000 0 0 0 0 0 2 + 273 3662 148 3662 +2 1 0 1 0 0 386 0 -1 4.000 0 0 0 0 0 2 + 487 3662 487 3039 +2 1 0 1 0 0 385 0 -1 4.000 0 0 0 0 0 2 + 378 3920 604 3662 +2 1 0 1 0 0 384 0 -1 4.000 0 0 0 0 0 2 + 487 3662 612 3662 +2 1 0 1 0 0 383 0 -1 4.000 0 0 0 0 0 2 + 273 6130 273 6753 +2 1 0 1 0 0 382 0 -1 4.000 0 0 0 0 0 2 + 382 5872 156 6130 +2 1 0 1 0 0 381 0 -1 4.000 0 0 0 0 0 2 + 273 6130 148 6130 +2 1 0 1 0 0 380 0 -1 4.000 0 0 0 0 0 2 + 487 6129 487 6753 +2 1 0 1 0 0 379 0 -1 4.000 0 0 0 0 0 2 + 378 5872 604 6129 +2 1 0 1 0 0 378 0 -1 4.000 0 0 0 0 0 2 + 487 6129 612 6129 +4 0 0 931 -1 16 15 0.0000 4 30 135 2064 7726 ...\001 +4 0 0 849 -1 16 13 0.0000 4 150 270 2338 7001 ny \001 +4 0 34 848 -1 16 13 0.0000 4 180 870 2605 7001 + 2-ny%2\001 +4 0 0 847 -1 16 13 0.0000 4 180 1110 3500 7001 = 2*(ny/2+1)\001 +4 0 0 845 -1 16 13 0.0000 4 105 195 681 8451 nx\001 +4 0 0 843 -1 16 10 0.0000 4 120 90 1364 7179 0\001 +4 0 0 841 -1 16 10 0.0000 4 150 345 6097 7179 ny+1\001 +4 0 0 839 -1 16 10 0.0000 4 120 90 1064 7479 0\001 +4 0 0 837 -1 16 10 0.0000 4 120 375 864 9479 nx-1\001 +4 0 0 798 -1 16 25 1.5708 4 360 1575 6250 9461 (padding)\001 +4 0 0 796 -1 18 16 1.5708 4 240 1695 428 9283 input, in-place\001 +4 0 0 794 -1 16 10 0.0000 4 120 90 1264 7429 0\001 +4 0 0 792 -1 16 10 0.0000 4 120 90 1681 7429 1\001 +4 0 0 790 -1 16 10 0.0000 4 120 90 2081 7429 2\001 +4 0 0 788 -1 16 10 0.0000 4 120 90 2481 7429 3\001 +4 0 0 786 -1 16 10 0.0000 4 150 360 4081 7429 ny-4\001 +4 0 0 784 -1 16 10 0.0000 4 150 360 4897 7429 ny-2\001 +4 0 0 782 -1 16 10 0.0000 4 150 360 5297 7429 ny-1\001 +4 0 0 780 -1 16 10 0.0000 4 150 360 4497 7429 ny-3\001 +4 0 0 778 -1 16 10 0.0000 4 150 345 1264 7795 ny+2\001 +4 0 0 776 -1 16 10 0.0000 4 150 345 1664 7795 ny+3\001 +4 0 0 774 -1 16 10 0.0000 4 120 165 5681 7429 ny\001 +4 0 0 772 -1 16 10 0.0000 4 150 345 6081 7429 ny+1\001 +4 0 0 709 -1 16 15 0.0000 4 30 135 2064 3993 ...\001 +4 0 0 651 -1 16 13 0.0000 4 180 585 3181 3267 ny/2+1\001 +4 0 0 649 -1 16 13 0.0000 4 105 195 681 4717 nx\001 +4 0 0 647 -1 16 10 0.0000 4 120 90 1564 3445 0\001 +4 0 0 645 -1 16 10 0.0000 4 150 300 5831 3445 ny/2\001 +4 0 0 643 -1 16 10 0.0000 4 120 90 1064 3745 0\001 +4 0 0 641 -1 16 10 0.0000 4 120 375 864 5745 nx-1\001 +4 0 0 611 -1 16 13 0.0000 4 165 855 1981 6463 = double\001 +4 0 0 606 -1 16 13 0.0000 4 180 1230 4547 6463 = fftw_complex\001 +4 0 0 604 -1 18 16 1.5708 4 225 780 428 5128 output\001 +4 0 0 602 -1 16 10 0.0000 4 120 90 1264 3679 0\001 +4 0 0 600 -1 16 10 0.0000 4 120 90 2081 3679 1\001 +4 0 0 598 -1 16 10 0.0000 4 150 495 4097 3679 ny/2-2\001 +4 0 0 596 -1 16 10 0.0000 4 150 495 4914 3679 ny/2-1\001 +4 0 0 594 -1 16 10 0.0000 4 150 480 1264 4062 ny/2+1\001 +4 0 0 592 -1 16 10 0.0000 4 150 300 5697 3679 ny/2\001 +4 0 0 519 -1 16 15 0.0000 4 30 135 2064 943 ...\001 +4 0 0 437 -1 16 13 0.0000 4 150 210 3381 217 ny\001 +4 0 0 435 -1 16 13 0.0000 4 105 195 681 1667 nx\001 +4 0 0 433 -1 16 10 0.0000 4 120 90 1364 395 0\001 +4 0 0 431 -1 16 10 0.0000 4 150 360 5281 395 ny-1\001 +4 0 0 429 -1 16 10 0.0000 4 120 90 1064 695 0\001 +4 0 0 427 -1 16 10 0.0000 4 120 375 864 2695 nx-1\001 +4 0 0 410 -1 18 16 1.5708 4 240 2235 428 2734 input, out-of-place\001 +4 0 0 408 -1 16 10 0.0000 4 120 90 1264 629 0\001 +4 0 0 406 -1 16 10 0.0000 4 120 90 1681 629 1\001 +4 0 0 404 -1 16 10 0.0000 4 120 90 2081 629 2\001 +4 0 0 402 -1 16 10 0.0000 4 120 90 2481 629 3\001 +4 0 0 400 -1 16 10 0.0000 4 150 360 4081 629 ny-4\001 +4 0 0 398 -1 16 10 0.0000 4 150 360 4897 629 ny-2\001 +4 0 0 396 -1 16 10 0.0000 4 150 360 5297 629 ny-1\001 +4 0 0 394 -1 16 10 0.0000 4 150 360 4497 629 ny-3\001 +4 0 0 392 -1 16 10 0.0000 4 120 165 1264 1012 ny\001 +4 0 0 390 -1 16 10 0.0000 4 150 345 1664 1012 ny+1\001 +-6 +4 0 0 932 -1 16 15 0.0000 4 15 60 74 89 \001 +4 0 0 850 -1 16 13 0.0000 4 15 60 74 89 \001 +4 0 0 846 -1 16 13 0.0000 4 15 60 74 89 \001 +4 0 0 844 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 842 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 840 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 838 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 799 -1 16 25 0.0000 4 15 90 74 89 \001 +4 0 0 797 -1 18 16 0.0000 4 15 60 74 89 \001 +4 0 0 795 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 793 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 791 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 789 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 787 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 785 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 783 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 781 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 779 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 777 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 775 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 773 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 710 -1 16 15 0.0000 4 15 60 74 89 \001 +4 0 0 652 -1 16 13 0.0000 4 15 60 74 89 \001 +4 0 0 650 -1 16 13 0.0000 4 15 60 74 89 \001 +4 0 0 648 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 646 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 644 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 642 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 612 -1 16 13 0.0000 4 15 60 74 89 \001 +4 0 0 607 -1 16 13 0.0000 4 15 60 74 89 \001 +4 0 0 605 -1 18 16 0.0000 4 15 60 74 89 \001 +4 0 0 603 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 601 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 599 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 597 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 595 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 593 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 520 -1 16 15 0.0000 4 15 60 74 89 \001 +4 0 0 438 -1 16 13 0.0000 4 15 60 74 89 \001 +4 0 0 436 -1 16 13 0.0000 4 15 60 74 89 \001 +4 0 0 434 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 432 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 430 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 428 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 411 -1 18 16 0.0000 4 15 60 74 89 \001 +4 0 0 409 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 407 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 405 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 403 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 401 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 399 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 397 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 395 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 393 -1 16 10 0.0000 4 15 45 74 89 \001 +4 0 0 391 -1 16 10 0.0000 4 15 45 1425 4800 \001 diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/rfftwnd.pdf Binary file fft/fftw/fftw-3.3.4/doc/rfftwnd.pdf has changed diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/stamp-vti --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/stamp-vti Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,4 @@ +@set UPDATED 20 September 2013 +@set UPDATED-MONTH September 2013 +@set EDITION 3.3.4 +@set VERSION 3.3.4 diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/texinfo.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/texinfo.tex Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,10079 @@ +% texinfo.tex -- TeX macros to handle Texinfo files. +% +% Load plain if necessary, i.e., if running under initex. +\expandafter\ifx\csname fmtname\endcsname\relax\input plain\fi +% +\def\texinfoversion{2013-02-01.11} +% +% Copyright 1985, 1986, 1988, 1990, 1991, 1992, 1993, 1994, 1995, +% 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +% 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. +% +% This texinfo.tex file is free software: you can redistribute it and/or +% modify it under the terms of the GNU General Public License as +% published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This texinfo.tex file is distributed in the hope that it will be +% useful, but WITHOUT ANY WARRANTY; without even the implied warranty +% of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +% General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . +% +% As a special exception, when this file is read by TeX when processing +% a Texinfo source document, you may use the result without +% restriction. This Exception is an additional permission under section 7 +% of the GNU General Public License, version 3 ("GPLv3"). +% +% Please try the latest version of texinfo.tex before submitting bug +% reports; you can get the latest version from: +% http://ftp.gnu.org/gnu/texinfo/ (the Texinfo release area), or +% http://ftpmirror.gnu.org/texinfo/ (same, via a mirror), or +% http://www.gnu.org/software/texinfo/ (the Texinfo home page) +% The texinfo.tex in any given distribution could well be out +% of date, so if that's what you're using, please check. +% +% Send bug reports to bug-texinfo@gnu.org. Please include including a +% complete document in each bug report with which we can reproduce the +% problem. Patches are, of course, greatly appreciated. +% +% To process a Texinfo manual with TeX, it's most reliable to use the +% texi2dvi shell script that comes with the distribution. For a simple +% manual foo.texi, however, you can get away with this: +% tex foo.texi +% texindex foo.?? +% tex foo.texi +% tex foo.texi +% dvips foo.dvi -o # or whatever; this makes foo.ps. +% The extra TeX runs get the cross-reference information correct. +% Sometimes one run after texindex suffices, and sometimes you need more +% than two; texi2dvi does it as many times as necessary. +% +% It is possible to adapt texinfo.tex for other languages, to some +% extent. You can get the existing language-specific files from the +% full Texinfo distribution. +% +% The GNU Texinfo home page is http://www.gnu.org/software/texinfo. + + +\message{Loading texinfo [version \texinfoversion]:} + +% If in a .fmt file, print the version number +% and turn on active characters that we couldn't do earlier because +% they might have appeared in the input file name. +\everyjob{\message{[Texinfo version \texinfoversion]}% + \catcode`+=\active \catcode`\_=\active} + +\chardef\other=12 + +% We never want plain's \outer definition of \+ in Texinfo. +% For @tex, we can use \tabalign. +\let\+ = \relax + +% Save some plain tex macros whose names we will redefine. +\let\ptexb=\b +\let\ptexbullet=\bullet +\let\ptexc=\c +\let\ptexcomma=\, +\let\ptexdot=\. +\let\ptexdots=\dots +\let\ptexend=\end +\let\ptexequiv=\equiv +\let\ptexexclam=\! +\let\ptexfootnote=\footnote +\let\ptexgtr=> +\let\ptexhat=^ +\let\ptexi=\i +\let\ptexindent=\indent +\let\ptexinsert=\insert +\let\ptexlbrace=\{ +\let\ptexless=< +\let\ptexnewwrite\newwrite +\let\ptexnoindent=\noindent +\let\ptexplus=+ +\let\ptexraggedright=\raggedright +\let\ptexrbrace=\} +\let\ptexslash=\/ +\let\ptexstar=\* +\let\ptext=\t +\let\ptextop=\top +{\catcode`\'=\active \global\let\ptexquoteright'}% active in plain's math mode + +% If this character appears in an error message or help string, it +% starts a new line in the output. +\newlinechar = `^^J + +% Use TeX 3.0's \inputlineno to get the line number, for better error +% messages, but if we're using an old version of TeX, don't do anything. +% +\ifx\inputlineno\thisisundefined + \let\linenumber = \empty % Pre-3.0. +\else + \def\linenumber{l.\the\inputlineno:\space} +\fi + +% Set up fixed words for English if not already set. +\ifx\putwordAppendix\undefined \gdef\putwordAppendix{Appendix}\fi +\ifx\putwordChapter\undefined \gdef\putwordChapter{Chapter}\fi +\ifx\putworderror\undefined \gdef\putworderror{error}\fi +\ifx\putwordfile\undefined \gdef\putwordfile{file}\fi +\ifx\putwordin\undefined \gdef\putwordin{in}\fi +\ifx\putwordIndexIsEmpty\undefined \gdef\putwordIndexIsEmpty{(Index is empty)}\fi +\ifx\putwordIndexNonexistent\undefined \gdef\putwordIndexNonexistent{(Index is nonexistent)}\fi +\ifx\putwordInfo\undefined \gdef\putwordInfo{Info}\fi +\ifx\putwordInstanceVariableof\undefined \gdef\putwordInstanceVariableof{Instance Variable of}\fi +\ifx\putwordMethodon\undefined \gdef\putwordMethodon{Method on}\fi +\ifx\putwordNoTitle\undefined \gdef\putwordNoTitle{No Title}\fi +\ifx\putwordof\undefined \gdef\putwordof{of}\fi +\ifx\putwordon\undefined \gdef\putwordon{on}\fi +\ifx\putwordpage\undefined \gdef\putwordpage{page}\fi +\ifx\putwordsection\undefined \gdef\putwordsection{section}\fi +\ifx\putwordSection\undefined \gdef\putwordSection{Section}\fi +\ifx\putwordsee\undefined \gdef\putwordsee{see}\fi +\ifx\putwordSee\undefined \gdef\putwordSee{See}\fi +\ifx\putwordShortTOC\undefined \gdef\putwordShortTOC{Short Contents}\fi +\ifx\putwordTOC\undefined \gdef\putwordTOC{Table of Contents}\fi +% +\ifx\putwordMJan\undefined \gdef\putwordMJan{January}\fi +\ifx\putwordMFeb\undefined \gdef\putwordMFeb{February}\fi +\ifx\putwordMMar\undefined \gdef\putwordMMar{March}\fi +\ifx\putwordMApr\undefined \gdef\putwordMApr{April}\fi +\ifx\putwordMMay\undefined \gdef\putwordMMay{May}\fi +\ifx\putwordMJun\undefined \gdef\putwordMJun{June}\fi +\ifx\putwordMJul\undefined \gdef\putwordMJul{July}\fi +\ifx\putwordMAug\undefined \gdef\putwordMAug{August}\fi +\ifx\putwordMSep\undefined \gdef\putwordMSep{September}\fi +\ifx\putwordMOct\undefined \gdef\putwordMOct{October}\fi +\ifx\putwordMNov\undefined \gdef\putwordMNov{November}\fi +\ifx\putwordMDec\undefined \gdef\putwordMDec{December}\fi +% +\ifx\putwordDefmac\undefined \gdef\putwordDefmac{Macro}\fi +\ifx\putwordDefspec\undefined \gdef\putwordDefspec{Special Form}\fi +\ifx\putwordDefvar\undefined \gdef\putwordDefvar{Variable}\fi +\ifx\putwordDefopt\undefined \gdef\putwordDefopt{User Option}\fi +\ifx\putwordDeffunc\undefined \gdef\putwordDeffunc{Function}\fi + +% Since the category of space is not known, we have to be careful. +\chardef\spacecat = 10 +\def\spaceisspace{\catcode`\ =\spacecat} + +% sometimes characters are active, so we need control sequences. +\chardef\ampChar = `\& +\chardef\colonChar = `\: +\chardef\commaChar = `\, +\chardef\dashChar = `\- +\chardef\dotChar = `\. +\chardef\exclamChar= `\! +\chardef\hashChar = `\# +\chardef\lquoteChar= `\` +\chardef\questChar = `\? +\chardef\rquoteChar= `\' +\chardef\semiChar = `\; +\chardef\slashChar = `\/ +\chardef\underChar = `\_ + +% Ignore a token. +% +\def\gobble#1{} + +% The following is used inside several \edef's. +\def\makecsname#1{\expandafter\noexpand\csname#1\endcsname} + +% Hyphenation fixes. +\hyphenation{ + Flor-i-da Ghost-script Ghost-view Mac-OS Post-Script + ap-pen-dix bit-map bit-maps + data-base data-bases eshell fall-ing half-way long-est man-u-script + man-u-scripts mini-buf-fer mini-buf-fers over-view par-a-digm + par-a-digms rath-er rec-tan-gu-lar ro-bot-ics se-vere-ly set-up spa-ces + spell-ing spell-ings + stand-alone strong-est time-stamp time-stamps which-ever white-space + wide-spread wrap-around +} + +% Margin to add to right of even pages, to left of odd pages. +\newdimen\bindingoffset +\newdimen\normaloffset +\newdimen\pagewidth \newdimen\pageheight + +% For a final copy, take out the rectangles +% that mark overfull boxes (in case you have decided +% that the text looks ok even though it passes the margin). +% +\def\finalout{\overfullrule=0pt } + +% Sometimes it is convenient to have everything in the transcript file +% and nothing on the terminal. We don't just call \tracingall here, +% since that produces some useless output on the terminal. We also make +% some effort to order the tracing commands to reduce output in the log +% file; cf. trace.sty in LaTeX. +% +\def\gloggingall{\begingroup \globaldefs = 1 \loggingall \endgroup}% +\def\loggingall{% + \tracingstats2 + \tracingpages1 + \tracinglostchars2 % 2 gives us more in etex + \tracingparagraphs1 + \tracingoutput1 + \tracingmacros2 + \tracingrestores1 + \showboxbreadth\maxdimen \showboxdepth\maxdimen + \ifx\eTeXversion\thisisundefined\else % etex gives us more logging + \tracingscantokens1 + \tracingifs1 + \tracinggroups1 + \tracingnesting2 + \tracingassigns1 + \fi + \tracingcommands3 % 3 gives us more in etex + \errorcontextlines16 +}% + +% @errormsg{MSG}. Do the index-like expansions on MSG, but if things +% aren't perfect, it's not the end of the world, being an error message, +% after all. +% +\def\errormsg{\begingroup \indexnofonts \doerrormsg} +\def\doerrormsg#1{\errmessage{#1}} + +% add check for \lastpenalty to plain's definitions. If the last thing +% we did was a \nobreak, we don't want to insert more space. +% +\def\smallbreak{\ifnum\lastpenalty<10000\par\ifdim\lastskip<\smallskipamount + \removelastskip\penalty-50\smallskip\fi\fi} +\def\medbreak{\ifnum\lastpenalty<10000\par\ifdim\lastskip<\medskipamount + \removelastskip\penalty-100\medskip\fi\fi} +\def\bigbreak{\ifnum\lastpenalty<10000\par\ifdim\lastskip<\bigskipamount + \removelastskip\penalty-200\bigskip\fi\fi} + +% Do @cropmarks to get crop marks. +% +\newif\ifcropmarks +\let\cropmarks = \cropmarkstrue +% +% Dimensions to add cropmarks at corners. +% Added by P. A. MacKay, 12 Nov. 1986 +% +\newdimen\outerhsize \newdimen\outervsize % set by the paper size routines +\newdimen\cornerlong \cornerlong=1pc +\newdimen\cornerthick \cornerthick=.3pt +\newdimen\topandbottommargin \topandbottommargin=.75in + +% Output a mark which sets \thischapter, \thissection and \thiscolor. +% We dump everything together because we only have one kind of mark. +% This works because we only use \botmark / \topmark, not \firstmark. +% +% A mark contains a subexpression of the \ifcase ... \fi construct. +% \get*marks macros below extract the needed part using \ifcase. +% +% Another complication is to let the user choose whether \thischapter +% (\thissection) refers to the chapter (section) in effect at the top +% of a page, or that at the bottom of a page. The solution is +% described on page 260 of The TeXbook. It involves outputting two +% marks for the sectioning macros, one before the section break, and +% one after. I won't pretend I can describe this better than DEK... +\def\domark{% + \toks0=\expandafter{\lastchapterdefs}% + \toks2=\expandafter{\lastsectiondefs}% + \toks4=\expandafter{\prevchapterdefs}% + \toks6=\expandafter{\prevsectiondefs}% + \toks8=\expandafter{\lastcolordefs}% + \mark{% + \the\toks0 \the\toks2 + \noexpand\or \the\toks4 \the\toks6 + \noexpand\else \the\toks8 + }% +} +% \topmark doesn't work for the very first chapter (after the title +% page or the contents), so we use \firstmark there -- this gets us +% the mark with the chapter defs, unless the user sneaks in, e.g., +% @setcolor (or @url, or @link, etc.) between @contents and the very +% first @chapter. +\def\gettopheadingmarks{% + \ifcase0\topmark\fi + \ifx\thischapter\empty \ifcase0\firstmark\fi \fi +} +\def\getbottomheadingmarks{\ifcase1\botmark\fi} +\def\getcolormarks{\ifcase2\topmark\fi} + +% Avoid "undefined control sequence" errors. +\def\lastchapterdefs{} +\def\lastsectiondefs{} +\def\prevchapterdefs{} +\def\prevsectiondefs{} +\def\lastcolordefs{} + +% Main output routine. +\chardef\PAGE = 255 +\output = {\onepageout{\pagecontents\PAGE}} + +\newbox\headlinebox +\newbox\footlinebox + +% \onepageout takes a vbox as an argument. Note that \pagecontents +% does insertions, but you have to call it yourself. +\def\onepageout#1{% + \ifcropmarks \hoffset=0pt \else \hoffset=\normaloffset \fi + % + \ifodd\pageno \advance\hoffset by \bindingoffset + \else \advance\hoffset by -\bindingoffset\fi + % + % Do this outside of the \shipout so @code etc. will be expanded in + % the headline as they should be, not taken literally (outputting ''code). + \ifodd\pageno \getoddheadingmarks \else \getevenheadingmarks \fi + \setbox\headlinebox = \vbox{\let\hsize=\pagewidth \makeheadline}% + \ifodd\pageno \getoddfootingmarks \else \getevenfootingmarks \fi + \setbox\footlinebox = \vbox{\let\hsize=\pagewidth \makefootline}% + % + {% + % Have to do this stuff outside the \shipout because we want it to + % take effect in \write's, yet the group defined by the \vbox ends + % before the \shipout runs. + % + \indexdummies % don't expand commands in the output. + \normalturnoffactive % \ in index entries must not stay \, e.g., if + % the page break happens to be in the middle of an example. + % We don't want .vr (or whatever) entries like this: + % \entry{{\tt \indexbackslash }acronym}{32}{\code {\acronym}} + % "\acronym" won't work when it's read back in; + % it needs to be + % {\code {{\tt \backslashcurfont }acronym} + \shipout\vbox{% + % Do this early so pdf references go to the beginning of the page. + \ifpdfmakepagedest \pdfdest name{\the\pageno} xyz\fi + % + \ifcropmarks \vbox to \outervsize\bgroup + \hsize = \outerhsize + \vskip-\topandbottommargin + \vtop to0pt{% + \line{\ewtop\hfil\ewtop}% + \nointerlineskip + \line{% + \vbox{\moveleft\cornerthick\nstop}% + \hfill + \vbox{\moveright\cornerthick\nstop}% + }% + \vss}% + \vskip\topandbottommargin + \line\bgroup + \hfil % center the page within the outer (page) hsize. + \ifodd\pageno\hskip\bindingoffset\fi + \vbox\bgroup + \fi + % + \unvbox\headlinebox + \pagebody{#1}% + \ifdim\ht\footlinebox > 0pt + % Only leave this space if the footline is nonempty. + % (We lessened \vsize for it in \oddfootingyyy.) + % The \baselineskip=24pt in plain's \makefootline has no effect. + \vskip 24pt + \unvbox\footlinebox + \fi + % + \ifcropmarks + \egroup % end of \vbox\bgroup + \hfil\egroup % end of (centering) \line\bgroup + \vskip\topandbottommargin plus1fill minus1fill + \boxmaxdepth = \cornerthick + \vbox to0pt{\vss + \line{% + \vbox{\moveleft\cornerthick\nsbot}% + \hfill + \vbox{\moveright\cornerthick\nsbot}% + }% + \nointerlineskip + \line{\ewbot\hfil\ewbot}% + }% + \egroup % \vbox from first cropmarks clause + \fi + }% end of \shipout\vbox + }% end of group with \indexdummies + \advancepageno + \ifnum\outputpenalty>-20000 \else\dosupereject\fi +} + +\newinsert\margin \dimen\margin=\maxdimen + +\def\pagebody#1{\vbox to\pageheight{\boxmaxdepth=\maxdepth #1}} +{\catcode`\@ =11 +\gdef\pagecontents#1{\ifvoid\topins\else\unvbox\topins\fi +% marginal hacks, juha@viisa.uucp (Juha Takala) +\ifvoid\margin\else % marginal info is present + \rlap{\kern\hsize\vbox to\z@{\kern1pt\box\margin \vss}}\fi +\dimen@=\dp#1\relax \unvbox#1\relax +\ifvoid\footins\else\vskip\skip\footins\footnoterule \unvbox\footins\fi +\ifr@ggedbottom \kern-\dimen@ \vfil \fi} +} + +% Here are the rules for the cropmarks. Note that they are +% offset so that the space between them is truly \outerhsize or \outervsize +% (P. A. MacKay, 12 November, 1986) +% +\def\ewtop{\vrule height\cornerthick depth0pt width\cornerlong} +\def\nstop{\vbox + {\hrule height\cornerthick depth\cornerlong width\cornerthick}} +\def\ewbot{\vrule height0pt depth\cornerthick width\cornerlong} +\def\nsbot{\vbox + {\hrule height\cornerlong depth\cornerthick width\cornerthick}} + +% Parse an argument, then pass it to #1. The argument is the rest of +% the input line (except we remove a trailing comment). #1 should be a +% macro which expects an ordinary undelimited TeX argument. +% +\def\parsearg{\parseargusing{}} +\def\parseargusing#1#2{% + \def\argtorun{#2}% + \begingroup + \obeylines + \spaceisspace + #1% + \parseargline\empty% Insert the \empty token, see \finishparsearg below. +} + +{\obeylines % + \gdef\parseargline#1^^M{% + \endgroup % End of the group started in \parsearg. + \argremovecomment #1\comment\ArgTerm% + }% +} + +% First remove any @comment, then any @c comment. +\def\argremovecomment#1\comment#2\ArgTerm{\argremovec #1\c\ArgTerm} +\def\argremovec#1\c#2\ArgTerm{\argcheckspaces#1\^^M\ArgTerm} + +% Each occurrence of `\^^M' or `\^^M' is replaced by a single space. +% +% \argremovec might leave us with trailing space, e.g., +% @end itemize @c foo +% This space token undergoes the same procedure and is eventually removed +% by \finishparsearg. +% +\def\argcheckspaces#1\^^M{\argcheckspacesX#1\^^M \^^M} +\def\argcheckspacesX#1 \^^M{\argcheckspacesY#1\^^M} +\def\argcheckspacesY#1\^^M#2\^^M#3\ArgTerm{% + \def\temp{#3}% + \ifx\temp\empty + % Do not use \next, perhaps the caller of \parsearg uses it; reuse \temp: + \let\temp\finishparsearg + \else + \let\temp\argcheckspaces + \fi + % Put the space token in: + \temp#1 #3\ArgTerm +} + +% If a _delimited_ argument is enclosed in braces, they get stripped; so +% to get _exactly_ the rest of the line, we had to prevent such situation. +% We prepended an \empty token at the very beginning and we expand it now, +% just before passing the control to \argtorun. +% (Similarly, we have to think about #3 of \argcheckspacesY above: it is +% either the null string, or it ends with \^^M---thus there is no danger +% that a pair of braces would be stripped. +% +% But first, we have to remove the trailing space token. +% +\def\finishparsearg#1 \ArgTerm{\expandafter\argtorun\expandafter{#1}} + +% \parseargdef\foo{...} +% is roughly equivalent to +% \def\foo{\parsearg\Xfoo} +% \def\Xfoo#1{...} +% +% Actually, I use \csname\string\foo\endcsname, ie. \\foo, as it is my +% favourite TeX trick. --kasal, 16nov03 + +\def\parseargdef#1{% + \expandafter \doparseargdef \csname\string#1\endcsname #1% +} +\def\doparseargdef#1#2{% + \def#2{\parsearg#1}% + \def#1##1% +} + +% Several utility definitions with active space: +{ + \obeyspaces + \gdef\obeyedspace{ } + + % Make each space character in the input produce a normal interword + % space in the output. Don't allow a line break at this space, as this + % is used only in environments like @example, where each line of input + % should produce a line of output anyway. + % + \gdef\sepspaces{\obeyspaces\let =\tie} + + % If an index command is used in an @example environment, any spaces + % therein should become regular spaces in the raw index file, not the + % expansion of \tie (\leavevmode \penalty \@M \ ). + \gdef\unsepspaces{\let =\space} +} + + +\def\flushcr{\ifx\par\lisppar \def\next##1{}\else \let\next=\relax \fi \next} + +% Define the framework for environments in texinfo.tex. It's used like this: +% +% \envdef\foo{...} +% \def\Efoo{...} +% +% It's the responsibility of \envdef to insert \begingroup before the +% actual body; @end closes the group after calling \Efoo. \envdef also +% defines \thisenv, so the current environment is known; @end checks +% whether the environment name matches. The \checkenv macro can also be +% used to check whether the current environment is the one expected. +% +% Non-false conditionals (@iftex, @ifset) don't fit into this, so they +% are not treated as environments; they don't open a group. (The +% implementation of @end takes care not to call \endgroup in this +% special case.) + + +% At run-time, environments start with this: +\def\startenvironment#1{\begingroup\def\thisenv{#1}} +% initialize +\let\thisenv\empty + +% ... but they get defined via ``\envdef\foo{...}'': +\long\def\envdef#1#2{\def#1{\startenvironment#1#2}} +\def\envparseargdef#1#2{\parseargdef#1{\startenvironment#1#2}} + +% Check whether we're in the right environment: +\def\checkenv#1{% + \def\temp{#1}% + \ifx\thisenv\temp + \else + \badenverr + \fi +} + +% Environment mismatch, #1 expected: +\def\badenverr{% + \errhelp = \EMsimple + \errmessage{This command can appear only \inenvironment\temp, + not \inenvironment\thisenv}% +} +\def\inenvironment#1{% + \ifx#1\empty + outside of any environment% + \else + in environment \expandafter\string#1% + \fi +} + +% @end foo executes the definition of \Efoo. +% But first, it executes a specialized version of \checkenv +% +\parseargdef\end{% + \if 1\csname iscond.#1\endcsname + \else + % The general wording of \badenverr may not be ideal. + \expandafter\checkenv\csname#1\endcsname + \csname E#1\endcsname + \endgroup + \fi +} + +\newhelp\EMsimple{Press RETURN to continue.} + + +% Be sure we're in horizontal mode when doing a tie, since we make space +% equivalent to this in @example-like environments. Otherwise, a space +% at the beginning of a line will start with \penalty -- and +% since \penalty is valid in vertical mode, we'd end up putting the +% penalty on the vertical list instead of in the new paragraph. +{\catcode`@ = 11 + % Avoid using \@M directly, because that causes trouble + % if the definition is written into an index file. + \global\let\tiepenalty = \@M + \gdef\tie{\leavevmode\penalty\tiepenalty\ } +} + +% @: forces normal size whitespace following. +\def\:{\spacefactor=1000 } + +% @* forces a line break. +\def\*{\unskip\hfil\break\hbox{}\ignorespaces} + +% @/ allows a line break. +\let\/=\allowbreak + +% @. is an end-of-sentence period. +\def\.{.\spacefactor=\endofsentencespacefactor\space} + +% @! is an end-of-sentence bang. +\def\!{!\spacefactor=\endofsentencespacefactor\space} + +% @? is an end-of-sentence query. +\def\?{?\spacefactor=\endofsentencespacefactor\space} + +% @frenchspacing on|off says whether to put extra space after punctuation. +% +\def\onword{on} +\def\offword{off} +% +\parseargdef\frenchspacing{% + \def\temp{#1}% + \ifx\temp\onword \plainfrenchspacing + \else\ifx\temp\offword \plainnonfrenchspacing + \else + \errhelp = \EMsimple + \errmessage{Unknown @frenchspacing option `\temp', must be on|off}% + \fi\fi +} + +% @w prevents a word break. Without the \leavevmode, @w at the +% beginning of a paragraph, when TeX is still in vertical mode, would +% produce a whole line of output instead of starting the paragraph. +\def\w#1{\leavevmode\hbox{#1}} + +% @group ... @end group forces ... to be all on one page, by enclosing +% it in a TeX vbox. We use \vtop instead of \vbox to construct the box +% to keep its height that of a normal line. According to the rules for +% \topskip (p.114 of the TeXbook), the glue inserted is +% max (\topskip - \ht (first item), 0). If that height is large, +% therefore, no glue is inserted, and the space between the headline and +% the text is small, which looks bad. +% +% Another complication is that the group might be very large. This can +% cause the glue on the previous page to be unduly stretched, because it +% does not have much material. In this case, it's better to add an +% explicit \vfill so that the extra space is at the bottom. The +% threshold for doing this is if the group is more than \vfilllimit +% percent of a page (\vfilllimit can be changed inside of @tex). +% +\newbox\groupbox +\def\vfilllimit{0.7} +% +\envdef\group{% + \ifnum\catcode`\^^M=\active \else + \errhelp = \groupinvalidhelp + \errmessage{@group invalid in context where filling is enabled}% + \fi + \startsavinginserts + % + \setbox\groupbox = \vtop\bgroup + % Do @comment since we are called inside an environment such as + % @example, where each end-of-line in the input causes an + % end-of-line in the output. We don't want the end-of-line after + % the `@group' to put extra space in the output. Since @group + % should appear on a line by itself (according to the Texinfo + % manual), we don't worry about eating any user text. + \comment +} +% +% The \vtop produces a box with normal height and large depth; thus, TeX puts +% \baselineskip glue before it, and (when the next line of text is done) +% \lineskip glue after it. Thus, space below is not quite equal to space +% above. But it's pretty close. +\def\Egroup{% + % To get correct interline space between the last line of the group + % and the first line afterwards, we have to propagate \prevdepth. + \endgraf % Not \par, as it may have been set to \lisppar. + \global\dimen1 = \prevdepth + \egroup % End the \vtop. + % \dimen0 is the vertical size of the group's box. + \dimen0 = \ht\groupbox \advance\dimen0 by \dp\groupbox + % \dimen2 is how much space is left on the page (more or less). + \dimen2 = \pageheight \advance\dimen2 by -\pagetotal + % if the group doesn't fit on the current page, and it's a big big + % group, force a page break. + \ifdim \dimen0 > \dimen2 + \ifdim \pagetotal < \vfilllimit\pageheight + \page + \fi + \fi + \box\groupbox + \prevdepth = \dimen1 + \checkinserts +} +% +% TeX puts in an \escapechar (i.e., `@') at the beginning of the help +% message, so this ends up printing `@group can only ...'. +% +\newhelp\groupinvalidhelp{% +group can only be used in environments such as @example,^^J% +where each line of input produces a line of output.} + +% @need space-in-mils +% forces a page break if there is not space-in-mils remaining. + +\newdimen\mil \mil=0.001in + +\parseargdef\need{% + % Ensure vertical mode, so we don't make a big box in the middle of a + % paragraph. + \par + % + % If the @need value is less than one line space, it's useless. + \dimen0 = #1\mil + \dimen2 = \ht\strutbox + \advance\dimen2 by \dp\strutbox + \ifdim\dimen0 > \dimen2 + % + % Do a \strut just to make the height of this box be normal, so the + % normal leading is inserted relative to the preceding line. + % And a page break here is fine. + \vtop to #1\mil{\strut\vfil}% + % + % TeX does not even consider page breaks if a penalty added to the + % main vertical list is 10000 or more. But in order to see if the + % empty box we just added fits on the page, we must make it consider + % page breaks. On the other hand, we don't want to actually break the + % page after the empty box. So we use a penalty of 9999. + % + % There is an extremely small chance that TeX will actually break the + % page at this \penalty, if there are no other feasible breakpoints in + % sight. (If the user is using lots of big @group commands, which + % almost-but-not-quite fill up a page, TeX will have a hard time doing + % good page breaking, for example.) However, I could not construct an + % example where a page broke at this \penalty; if it happens in a real + % document, then we can reconsider our strategy. + \penalty9999 + % + % Back up by the size of the box, whether we did a page break or not. + \kern -#1\mil + % + % Do not allow a page break right after this kern. + \nobreak + \fi +} + +% @br forces paragraph break (and is undocumented). + +\let\br = \par + +% @page forces the start of a new page. +% +\def\page{\par\vfill\supereject} + +% @exdent text.... +% outputs text on separate line in roman font, starting at standard page margin + +% This records the amount of indent in the innermost environment. +% That's how much \exdent should take out. +\newskip\exdentamount + +% This defn is used inside fill environments such as @defun. +\parseargdef\exdent{\hfil\break\hbox{\kern -\exdentamount{\rm#1}}\hfil\break} + +% This defn is used inside nofill environments such as @example. +\parseargdef\nofillexdent{{\advance \leftskip by -\exdentamount + \leftline{\hskip\leftskip{\rm#1}}}} + +% @inmargin{WHICH}{TEXT} puts TEXT in the WHICH margin next to the current +% paragraph. For more general purposes, use the \margin insertion +% class. WHICH is `l' or `r'. Not documented, written for gawk manual. +% +\newskip\inmarginspacing \inmarginspacing=1cm +\def\strutdepth{\dp\strutbox} +% +\def\doinmargin#1#2{\strut\vadjust{% + \nobreak + \kern-\strutdepth + \vtop to \strutdepth{% + \baselineskip=\strutdepth + \vss + % if you have multiple lines of stuff to put here, you'll need to + % make the vbox yourself of the appropriate size. + \ifx#1l% + \llap{\ignorespaces #2\hskip\inmarginspacing}% + \else + \rlap{\hskip\hsize \hskip\inmarginspacing \ignorespaces #2}% + \fi + \null + }% +}} +\def\inleftmargin{\doinmargin l} +\def\inrightmargin{\doinmargin r} +% +% @inmargin{TEXT [, RIGHT-TEXT]} +% (if RIGHT-TEXT is given, use TEXT for left page, RIGHT-TEXT for right; +% else use TEXT for both). +% +\def\inmargin#1{\parseinmargin #1,,\finish} +\def\parseinmargin#1,#2,#3\finish{% not perfect, but better than nothing. + \setbox0 = \hbox{\ignorespaces #2}% + \ifdim\wd0 > 0pt + \def\lefttext{#1}% have both texts + \def\righttext{#2}% + \else + \def\lefttext{#1}% have only one text + \def\righttext{#1}% + \fi + % + \ifodd\pageno + \def\temp{\inrightmargin\righttext}% odd page -> outside is right margin + \else + \def\temp{\inleftmargin\lefttext}% + \fi + \temp +} + +% @| inserts a changebar to the left of the current line. It should +% surround any changed text. This approach does *not* work if the +% change spans more than two lines of output. To handle that, we would +% have adopt a much more difficult approach (putting marks into the main +% vertical list for the beginning and end of each change). This command +% is not documented, not supported, and doesn't work. +% +\def\|{% + % \vadjust can only be used in horizontal mode. + \leavevmode + % + % Append this vertical mode material after the current line in the output. + \vadjust{% + % We want to insert a rule with the height and depth of the current + % leading; that is exactly what \strutbox is supposed to record. + \vskip-\baselineskip + % + % \vadjust-items are inserted at the left edge of the type. So + % the \llap here moves out into the left-hand margin. + \llap{% + % + % For a thicker or thinner bar, change the `1pt'. + \vrule height\baselineskip width1pt + % + % This is the space between the bar and the text. + \hskip 12pt + }% + }% +} + +% @include FILE -- \input text of FILE. +% +\def\include{\parseargusing\filenamecatcodes\includezzz} +\def\includezzz#1{% + \pushthisfilestack + \def\thisfile{#1}% + {% + \makevalueexpandable % we want to expand any @value in FILE. + \turnoffactive % and allow special characters in the expansion + \indexnofonts % Allow `@@' and other weird things in file names. + \wlog{texinfo.tex: doing @include of #1^^J}% + \edef\temp{\noexpand\input #1 }% + % + % This trickery is to read FILE outside of a group, in case it makes + % definitions, etc. + \expandafter + }\temp + \popthisfilestack +} +\def\filenamecatcodes{% + \catcode`\\=\other + \catcode`~=\other + \catcode`^=\other + \catcode`_=\other + \catcode`|=\other + \catcode`<=\other + \catcode`>=\other + \catcode`+=\other + \catcode`-=\other + \catcode`\`=\other + \catcode`\'=\other +} + +\def\pushthisfilestack{% + \expandafter\pushthisfilestackX\popthisfilestack\StackTerm +} +\def\pushthisfilestackX{% + \expandafter\pushthisfilestackY\thisfile\StackTerm +} +\def\pushthisfilestackY #1\StackTerm #2\StackTerm {% + \gdef\popthisfilestack{\gdef\thisfile{#1}\gdef\popthisfilestack{#2}}% +} + +\def\popthisfilestack{\errthisfilestackempty} +\def\errthisfilestackempty{\errmessage{Internal error: + the stack of filenames is empty.}} +% +\def\thisfile{} + +% @center line +% outputs that line, centered. +% +\parseargdef\center{% + \ifhmode + \let\centersub\centerH + \else + \let\centersub\centerV + \fi + \centersub{\hfil \ignorespaces#1\unskip \hfil}% + \let\centersub\relax % don't let the definition persist, just in case +} +\def\centerH#1{{% + \hfil\break + \advance\hsize by -\leftskip + \advance\hsize by -\rightskip + \line{#1}% + \break +}} +% +\newcount\centerpenalty +\def\centerV#1{% + % The idea here is the same as in \startdefun, \cartouche, etc.: if + % @center is the first thing after a section heading, we need to wipe + % out the negative parskip inserted by \sectionheading, but still + % prevent a page break here. + \centerpenalty = \lastpenalty + \ifnum\centerpenalty>10000 \vskip\parskip \fi + \ifnum\centerpenalty>9999 \penalty\centerpenalty \fi + \line{\kern\leftskip #1\kern\rightskip}% +} + +% @sp n outputs n lines of vertical space +% +\parseargdef\sp{\vskip #1\baselineskip} + +% @comment ...line which is ignored... +% @c is the same as @comment +% @ignore ... @end ignore is another way to write a comment +% +\def\comment{\begingroup \catcode`\^^M=\other% +\catcode`\@=\other \catcode`\{=\other \catcode`\}=\other% +\commentxxx} +{\catcode`\^^M=\other \gdef\commentxxx#1^^M{\endgroup}} +% +\let\c=\comment + +% @paragraphindent NCHARS +% We'll use ems for NCHARS, close enough. +% NCHARS can also be the word `asis' or `none'. +% We cannot feasibly implement @paragraphindent asis, though. +% +\def\asisword{asis} % no translation, these are keywords +\def\noneword{none} +% +\parseargdef\paragraphindent{% + \def\temp{#1}% + \ifx\temp\asisword + \else + \ifx\temp\noneword + \defaultparindent = 0pt + \else + \defaultparindent = #1em + \fi + \fi + \parindent = \defaultparindent +} + +% @exampleindent NCHARS +% We'll use ems for NCHARS like @paragraphindent. +% It seems @exampleindent asis isn't necessary, but +% I preserve it to make it similar to @paragraphindent. +\parseargdef\exampleindent{% + \def\temp{#1}% + \ifx\temp\asisword + \else + \ifx\temp\noneword + \lispnarrowing = 0pt + \else + \lispnarrowing = #1em + \fi + \fi +} + +% @firstparagraphindent WORD +% If WORD is `none', then suppress indentation of the first paragraph +% after a section heading. If WORD is `insert', then do indent at such +% paragraphs. +% +% The paragraph indentation is suppressed or not by calling +% \suppressfirstparagraphindent, which the sectioning commands do. +% We switch the definition of this back and forth according to WORD. +% By default, we suppress indentation. +% +\def\suppressfirstparagraphindent{\dosuppressfirstparagraphindent} +\def\insertword{insert} +% +\parseargdef\firstparagraphindent{% + \def\temp{#1}% + \ifx\temp\noneword + \let\suppressfirstparagraphindent = \dosuppressfirstparagraphindent + \else\ifx\temp\insertword + \let\suppressfirstparagraphindent = \relax + \else + \errhelp = \EMsimple + \errmessage{Unknown @firstparagraphindent option `\temp'}% + \fi\fi +} + +% Here is how we actually suppress indentation. Redefine \everypar to +% \kern backwards by \parindent, and then reset itself to empty. +% +% We also make \indent itself not actually do anything until the next +% paragraph. +% +\gdef\dosuppressfirstparagraphindent{% + \gdef\indent{% + \restorefirstparagraphindent + \indent + }% + \gdef\noindent{% + \restorefirstparagraphindent + \noindent + }% + \global\everypar = {% + \kern -\parindent + \restorefirstparagraphindent + }% +} + +\gdef\restorefirstparagraphindent{% + \global \let \indent = \ptexindent + \global \let \noindent = \ptexnoindent + \global \everypar = {}% +} + + +% @refill is a no-op. +\let\refill=\relax + +% If working on a large document in chapters, it is convenient to +% be able to disable indexing, cross-referencing, and contents, for test runs. +% This is done with @novalidate (before @setfilename). +% +\newif\iflinks \linkstrue % by default we want the aux files. +\let\novalidate = \linksfalse + +% @setfilename is done at the beginning of every texinfo file. +% So open here the files we need to have open while reading the input. +% This makes it possible to make a .fmt file for texinfo. +\def\setfilename{% + \fixbackslash % Turn off hack to swallow `\input texinfo'. + \iflinks + \tryauxfile + % Open the new aux file. TeX will close it automatically at exit. + \immediate\openout\auxfile=\jobname.aux + \fi % \openindices needs to do some work in any case. + \openindices + \let\setfilename=\comment % Ignore extra @setfilename cmds. + % + % If texinfo.cnf is present on the system, read it. + % Useful for site-wide @afourpaper, etc. + \openin 1 texinfo.cnf + \ifeof 1 \else \input texinfo.cnf \fi + \closein 1 + % + \comment % Ignore the actual filename. +} + +% Called from \setfilename. +% +\def\openindices{% + \newindex{cp}% + \newcodeindex{fn}% + \newcodeindex{vr}% + \newcodeindex{tp}% + \newcodeindex{ky}% + \newcodeindex{pg}% +} + +% @bye. +\outer\def\bye{\pagealignmacro\tracingstats=1\ptexend} + + +\message{pdf,} +% adobe `portable' document format +\newcount\tempnum +\newcount\lnkcount +\newtoks\filename +\newcount\filenamelength +\newcount\pgn +\newtoks\toksA +\newtoks\toksB +\newtoks\toksC +\newtoks\toksD +\newbox\boxA +\newcount\countA +\newif\ifpdf +\newif\ifpdfmakepagedest + +% when pdftex is run in dvi mode, \pdfoutput is defined (so \pdfoutput=1 +% can be set). So we test for \relax and 0 as well as being undefined. +\ifx\pdfoutput\thisisundefined +\else + \ifx\pdfoutput\relax + \else + \ifcase\pdfoutput + \else + \pdftrue + \fi + \fi +\fi + +% PDF uses PostScript string constants for the names of xref targets, +% for display in the outlines, and in other places. Thus, we have to +% double any backslashes. Otherwise, a name like "\node" will be +% interpreted as a newline (\n), followed by o, d, e. Not good. +% +% See http://www.ntg.nl/pipermail/ntg-pdftex/2004-July/000654.html and +% related messages. The final outcome is that it is up to the TeX user +% to double the backslashes and otherwise make the string valid, so +% that's what we do. pdftex 1.30.0 (ca.2005) introduced a primitive to +% do this reliably, so we use it. + +% #1 is a control sequence in which to do the replacements, +% which we \xdef. +\def\txiescapepdf#1{% + \ifx\pdfescapestring\thisisundefined + % No primitive available; should we give a warning or log? + % Many times it won't matter. + \else + % The expandable \pdfescapestring primitive escapes parentheses, + % backslashes, and other special chars. + \xdef#1{\pdfescapestring{#1}}% + \fi +} + +\newhelp\nopdfimagehelp{Texinfo supports .png, .jpg, .jpeg, and .pdf images +with PDF output, and none of those formats could be found. (.eps cannot +be supported due to the design of the PDF format; use regular TeX (DVI +output) for that.)} + +\ifpdf + % + % Color manipulation macros based on pdfcolor.tex, + % except using rgb instead of cmyk; the latter is said to render as a + % very dark gray on-screen and a very dark halftone in print, instead + % of actual black. + \def\rgbDarkRed{0.50 0.09 0.12} + \def\rgbBlack{0 0 0} + % + % k sets the color for filling (usual text, etc.); + % K sets the color for stroking (thin rules, e.g., normal _'s). + \def\pdfsetcolor#1{\pdfliteral{#1 rg #1 RG}} + % + % Set color, and create a mark which defines \thiscolor accordingly, + % so that \makeheadline knows which color to restore. + \def\setcolor#1{% + \xdef\lastcolordefs{\gdef\noexpand\thiscolor{#1}}% + \domark + \pdfsetcolor{#1}% + } + % + \def\maincolor{\rgbBlack} + \pdfsetcolor{\maincolor} + \edef\thiscolor{\maincolor} + \def\lastcolordefs{} + % + \def\makefootline{% + \baselineskip24pt + \line{\pdfsetcolor{\maincolor}\the\footline}% + } + % + \def\makeheadline{% + \vbox to 0pt{% + \vskip-22.5pt + \line{% + \vbox to8.5pt{}% + % Extract \thiscolor definition from the marks. + \getcolormarks + % Typeset the headline with \maincolor, then restore the color. + \pdfsetcolor{\maincolor}\the\headline\pdfsetcolor{\thiscolor}% + }% + \vss + }% + \nointerlineskip + } + % + % + \pdfcatalog{/PageMode /UseOutlines} + % + % #1 is image name, #2 width (might be empty/whitespace), #3 height (ditto). + \def\dopdfimage#1#2#3{% + \def\pdfimagewidth{#2}\setbox0 = \hbox{\ignorespaces #2}% + \def\pdfimageheight{#3}\setbox2 = \hbox{\ignorespaces #3}% + % + % pdftex (and the PDF format) support .pdf, .png, .jpg (among + % others). Let's try in that order, PDF first since if + % someone has a scalable image, presumably better to use that than a + % bitmap. + \let\pdfimgext=\empty + \begingroup + \openin 1 #1.pdf \ifeof 1 + \openin 1 #1.PDF \ifeof 1 + \openin 1 #1.png \ifeof 1 + \openin 1 #1.jpg \ifeof 1 + \openin 1 #1.jpeg \ifeof 1 + \openin 1 #1.JPG \ifeof 1 + \errhelp = \nopdfimagehelp + \errmessage{Could not find image file #1 for pdf}% + \else \gdef\pdfimgext{JPG}% + \fi + \else \gdef\pdfimgext{jpeg}% + \fi + \else \gdef\pdfimgext{jpg}% + \fi + \else \gdef\pdfimgext{png}% + \fi + \else \gdef\pdfimgext{PDF}% + \fi + \else \gdef\pdfimgext{pdf}% + \fi + \closein 1 + \endgroup + % + % without \immediate, ancient pdftex seg faults when the same image is + % included twice. (Version 3.14159-pre-1.0-unofficial-20010704.) + \ifnum\pdftexversion < 14 + \immediate\pdfimage + \else + \immediate\pdfximage + \fi + \ifdim \wd0 >0pt width \pdfimagewidth \fi + \ifdim \wd2 >0pt height \pdfimageheight \fi + \ifnum\pdftexversion<13 + #1.\pdfimgext + \else + {#1.\pdfimgext}% + \fi + \ifnum\pdftexversion < 14 \else + \pdfrefximage \pdflastximage + \fi} + % + \def\pdfmkdest#1{{% + % We have to set dummies so commands such as @code, and characters + % such as \, aren't expanded when present in a section title. + \indexnofonts + \turnoffactive + \makevalueexpandable + \def\pdfdestname{#1}% + \txiescapepdf\pdfdestname + \safewhatsit{\pdfdest name{\pdfdestname} xyz}% + }} + % + % used to mark target names; must be expandable. + \def\pdfmkpgn#1{#1} + % + % by default, use a color that is dark enough to print on paper as + % nearly black, but still distinguishable for online viewing. + \def\urlcolor{\rgbDarkRed} + \def\linkcolor{\rgbDarkRed} + \def\endlink{\setcolor{\maincolor}\pdfendlink} + % + % Adding outlines to PDF; macros for calculating structure of outlines + % come from Petr Olsak + \def\expnumber#1{\expandafter\ifx\csname#1\endcsname\relax 0% + \else \csname#1\endcsname \fi} + \def\advancenumber#1{\tempnum=\expnumber{#1}\relax + \advance\tempnum by 1 + \expandafter\xdef\csname#1\endcsname{\the\tempnum}} + % + % #1 is the section text, which is what will be displayed in the + % outline by the pdf viewer. #2 is the pdf expression for the number + % of subentries (or empty, for subsubsections). #3 is the node text, + % which might be empty if this toc entry had no corresponding node. + % #4 is the page number + % + \def\dopdfoutline#1#2#3#4{% + % Generate a link to the node text if that exists; else, use the + % page number. We could generate a destination for the section + % text in the case where a section has no node, but it doesn't + % seem worth the trouble, since most documents are normally structured. + \edef\pdfoutlinedest{#3}% + \ifx\pdfoutlinedest\empty + \def\pdfoutlinedest{#4}% + \else + \txiescapepdf\pdfoutlinedest + \fi + % + % Also escape PDF chars in the display string. + \edef\pdfoutlinetext{#1}% + \txiescapepdf\pdfoutlinetext + % + \pdfoutline goto name{\pdfmkpgn{\pdfoutlinedest}}#2{\pdfoutlinetext}% + } + % + \def\pdfmakeoutlines{% + \begingroup + % Read toc silently, to get counts of subentries for \pdfoutline. + \def\partentry##1##2##3##4{}% ignore parts in the outlines + \def\numchapentry##1##2##3##4{% + \def\thischapnum{##2}% + \def\thissecnum{0}% + \def\thissubsecnum{0}% + }% + \def\numsecentry##1##2##3##4{% + \advancenumber{chap\thischapnum}% + \def\thissecnum{##2}% + \def\thissubsecnum{0}% + }% + \def\numsubsecentry##1##2##3##4{% + \advancenumber{sec\thissecnum}% + \def\thissubsecnum{##2}% + }% + \def\numsubsubsecentry##1##2##3##4{% + \advancenumber{subsec\thissubsecnum}% + }% + \def\thischapnum{0}% + \def\thissecnum{0}% + \def\thissubsecnum{0}% + % + % use \def rather than \let here because we redefine \chapentry et + % al. a second time, below. + \def\appentry{\numchapentry}% + \def\appsecentry{\numsecentry}% + \def\appsubsecentry{\numsubsecentry}% + \def\appsubsubsecentry{\numsubsubsecentry}% + \def\unnchapentry{\numchapentry}% + \def\unnsecentry{\numsecentry}% + \def\unnsubsecentry{\numsubsecentry}% + \def\unnsubsubsecentry{\numsubsubsecentry}% + \readdatafile{toc}% + % + % Read toc second time, this time actually producing the outlines. + % The `-' means take the \expnumber as the absolute number of + % subentries, which we calculated on our first read of the .toc above. + % + % We use the node names as the destinations. + \def\numchapentry##1##2##3##4{% + \dopdfoutline{##1}{count-\expnumber{chap##2}}{##3}{##4}}% + \def\numsecentry##1##2##3##4{% + \dopdfoutline{##1}{count-\expnumber{sec##2}}{##3}{##4}}% + \def\numsubsecentry##1##2##3##4{% + \dopdfoutline{##1}{count-\expnumber{subsec##2}}{##3}{##4}}% + \def\numsubsubsecentry##1##2##3##4{% count is always zero + \dopdfoutline{##1}{}{##3}{##4}}% + % + % PDF outlines are displayed using system fonts, instead of + % document fonts. Therefore we cannot use special characters, + % since the encoding is unknown. For example, the eogonek from + % Latin 2 (0xea) gets translated to a | character. Info from + % Staszek Wawrykiewicz, 19 Jan 2004 04:09:24 +0100. + % + % TODO this right, we have to translate 8-bit characters to + % their "best" equivalent, based on the @documentencoding. Too + % much work for too little return. Just use the ASCII equivalents + % we use for the index sort strings. + % + \indexnofonts + \setupdatafile + % We can have normal brace characters in the PDF outlines, unlike + % Texinfo index files. So set that up. + \def\{{\lbracecharliteral}% + \def\}{\rbracecharliteral}% + \catcode`\\=\active \otherbackslash + \input \tocreadfilename + \endgroup + } + {\catcode`[=1 \catcode`]=2 + \catcode`{=\other \catcode`}=\other + \gdef\lbracecharliteral[{]% + \gdef\rbracecharliteral[}]% + ] + % + \def\skipspaces#1{\def\PP{#1}\def\D{|}% + \ifx\PP\D\let\nextsp\relax + \else\let\nextsp\skipspaces + \addtokens{\filename}{\PP}% + \advance\filenamelength by 1 + \fi + \nextsp} + \def\getfilename#1{% + \filenamelength=0 + % If we don't expand the argument now, \skipspaces will get + % snagged on things like "@value{foo}". + \edef\temp{#1}% + \expandafter\skipspaces\temp|\relax + } + \ifnum\pdftexversion < 14 + \let \startlink \pdfannotlink + \else + \let \startlink \pdfstartlink + \fi + % make a live url in pdf output. + \def\pdfurl#1{% + \begingroup + % it seems we really need yet another set of dummies; have not + % tried to figure out what each command should do in the context + % of @url. for now, just make @/ a no-op, that's the only one + % people have actually reported a problem with. + % + \normalturnoffactive + \def\@{@}% + \let\/=\empty + \makevalueexpandable + % do we want to go so far as to use \indexnofonts instead of just + % special-casing \var here? + \def\var##1{##1}% + % + \leavevmode\setcolor{\urlcolor}% + \startlink attr{/Border [0 0 0]}% + user{/Subtype /Link /A << /S /URI /URI (#1) >>}% + \endgroup} + \def\pdfgettoks#1.{\setbox\boxA=\hbox{\toksA={#1.}\toksB={}\maketoks}} + \def\addtokens#1#2{\edef\addtoks{\noexpand#1={\the#1#2}}\addtoks} + \def\adn#1{\addtokens{\toksC}{#1}\global\countA=1\let\next=\maketoks} + \def\poptoks#1#2|ENDTOKS|{\let\first=#1\toksD={#1}\toksA={#2}} + \def\maketoks{% + \expandafter\poptoks\the\toksA|ENDTOKS|\relax + \ifx\first0\adn0 + \else\ifx\first1\adn1 \else\ifx\first2\adn2 \else\ifx\first3\adn3 + \else\ifx\first4\adn4 \else\ifx\first5\adn5 \else\ifx\first6\adn6 + \else\ifx\first7\adn7 \else\ifx\first8\adn8 \else\ifx\first9\adn9 + \else + \ifnum0=\countA\else\makelink\fi + \ifx\first.\let\next=\done\else + \let\next=\maketoks + \addtokens{\toksB}{\the\toksD} + \ifx\first,\addtokens{\toksB}{\space}\fi + \fi + \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi + \next} + \def\makelink{\addtokens{\toksB}% + {\noexpand\pdflink{\the\toksC}}\toksC={}\global\countA=0} + \def\pdflink#1{% + \startlink attr{/Border [0 0 0]} goto name{\pdfmkpgn{#1}} + \setcolor{\linkcolor}#1\endlink} + \def\done{\edef\st{\global\noexpand\toksA={\the\toksB}}\st} +\else + % non-pdf mode + \let\pdfmkdest = \gobble + \let\pdfurl = \gobble + \let\endlink = \relax + \let\setcolor = \gobble + \let\pdfsetcolor = \gobble + \let\pdfmakeoutlines = \relax +\fi % \ifx\pdfoutput + + +\message{fonts,} + +% Change the current font style to #1, remembering it in \curfontstyle. +% For now, we do not accumulate font styles: @b{@i{foo}} prints foo in +% italics, not bold italics. +% +\def\setfontstyle#1{% + \def\curfontstyle{#1}% not as a control sequence, because we are \edef'd. + \csname ten#1\endcsname % change the current font +} + +% Select #1 fonts with the current style. +% +\def\selectfonts#1{\csname #1fonts\endcsname \csname\curfontstyle\endcsname} + +\def\rm{\fam=0 \setfontstyle{rm}} +\def\it{\fam=\itfam \setfontstyle{it}} +\def\sl{\fam=\slfam \setfontstyle{sl}} +\def\bf{\fam=\bffam \setfontstyle{bf}}\def\bfstylename{bf} +\def\tt{\fam=\ttfam \setfontstyle{tt}} + +% Unfortunately, we have to override this for titles and the like, since +% in those cases "rm" is bold. Sigh. +\def\rmisbold{\rm\def\curfontstyle{bf}} + +% Texinfo sort of supports the sans serif font style, which plain TeX does not. +% So we set up a \sf. +\newfam\sffam +\def\sf{\fam=\sffam \setfontstyle{sf}} +\let\li = \sf % Sometimes we call it \li, not \sf. + +% We don't need math for this font style. +\def\ttsl{\setfontstyle{ttsl}} + + +% Set the baselineskip to #1, and the lineskip and strut size +% correspondingly. There is no deep meaning behind these magic numbers +% used as factors; they just match (closely enough) what Knuth defined. +% +\def\lineskipfactor{.08333} +\def\strutheightpercent{.70833} +\def\strutdepthpercent {.29167} +% +% can get a sort of poor man's double spacing by redefining this. +\def\baselinefactor{1} +% +\newdimen\textleading +\def\setleading#1{% + \dimen0 = #1\relax + \normalbaselineskip = \baselinefactor\dimen0 + \normallineskip = \lineskipfactor\normalbaselineskip + \normalbaselines + \setbox\strutbox =\hbox{% + \vrule width0pt height\strutheightpercent\baselineskip + depth \strutdepthpercent \baselineskip + }% +} + +% PDF CMaps. See also LaTeX's t1.cmap. +% +% do nothing with this by default. +\expandafter\let\csname cmapOT1\endcsname\gobble +\expandafter\let\csname cmapOT1IT\endcsname\gobble +\expandafter\let\csname cmapOT1TT\endcsname\gobble + +% if we are producing pdf, and we have \pdffontattr, then define cmaps. +% (\pdffontattr was introduced many years ago, but people still run +% older pdftex's; it's easy to conditionalize, so we do.) +\ifpdf \ifx\pdffontattr\thisisundefined \else + \begingroup + \catcode`\^^M=\active \def^^M{^^J}% Output line endings as the ^^J char. + \catcode`\%=12 \immediate\pdfobj stream {%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-OT1-0) +%%Title: (TeX-OT1-0 TeX OT1 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (OT1) +/Supplement 0 +>> def +/CMapName /TeX-OT1-0 def +/CMapType 2 def +1 begincodespacerange +<00> <7F> +endcodespacerange +8 beginbfrange +<00> <01> <0393> +<09> <0A> <03A8> +<23> <26> <0023> +<28> <3B> <0028> +<3F> <5B> <003F> +<5D> <5E> <005D> +<61> <7A> <0061> +<7B> <7C> <2013> +endbfrange +40 beginbfchar +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<07> <03D2> +<08> <03A6> +<0B> <00660066> +<0C> <00660069> +<0D> <0066006C> +<0E> <006600660069> +<0F> <00660066006C> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<21> <0021> +<22> <201D> +<27> <2019> +<3C> <00A1> +<3D> <003D> +<3E> <00BF> +<5C> <201C> +<5F> <02D9> +<60> <2018> +<7D> <02DD> +<7E> <007E> +<7F> <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + }\endgroup + \expandafter\edef\csname cmapOT1\endcsname#1{% + \pdffontattr#1{/ToUnicode \the\pdflastobj\space 0 R}% + }% +% +% \cmapOT1IT + \begingroup + \catcode`\^^M=\active \def^^M{^^J}% Output line endings as the ^^J char. + \catcode`\%=12 \immediate\pdfobj stream {%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-OT1IT-0) +%%Title: (TeX-OT1IT-0 TeX OT1IT 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (OT1IT) +/Supplement 0 +>> def +/CMapName /TeX-OT1IT-0 def +/CMapType 2 def +1 begincodespacerange +<00> <7F> +endcodespacerange +8 beginbfrange +<00> <01> <0393> +<09> <0A> <03A8> +<25> <26> <0025> +<28> <3B> <0028> +<3F> <5B> <003F> +<5D> <5E> <005D> +<61> <7A> <0061> +<7B> <7C> <2013> +endbfrange +42 beginbfchar +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<07> <03D2> +<08> <03A6> +<0B> <00660066> +<0C> <00660069> +<0D> <0066006C> +<0E> <006600660069> +<0F> <00660066006C> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<21> <0021> +<22> <201D> +<23> <0023> +<24> <00A3> +<27> <2019> +<3C> <00A1> +<3D> <003D> +<3E> <00BF> +<5C> <201C> +<5F> <02D9> +<60> <2018> +<7D> <02DD> +<7E> <007E> +<7F> <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + }\endgroup + \expandafter\edef\csname cmapOT1IT\endcsname#1{% + \pdffontattr#1{/ToUnicode \the\pdflastobj\space 0 R}% + }% +% +% \cmapOT1TT + \begingroup + \catcode`\^^M=\active \def^^M{^^J}% Output line endings as the ^^J char. + \catcode`\%=12 \immediate\pdfobj stream {%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-OT1TT-0) +%%Title: (TeX-OT1TT-0 TeX OT1TT 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (OT1TT) +/Supplement 0 +>> def +/CMapName /TeX-OT1TT-0 def +/CMapType 2 def +1 begincodespacerange +<00> <7F> +endcodespacerange +5 beginbfrange +<00> <01> <0393> +<09> <0A> <03A8> +<21> <26> <0021> +<28> <5F> <0028> +<61> <7E> <0061> +endbfrange +32 beginbfchar +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<07> <03D2> +<08> <03A6> +<0B> <2191> +<0C> <2193> +<0D> <0027> +<0E> <00A1> +<0F> <00BF> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<20> <2423> +<27> <2019> +<60> <2018> +<7F> <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + }\endgroup + \expandafter\edef\csname cmapOT1TT\endcsname#1{% + \pdffontattr#1{/ToUnicode \the\pdflastobj\space 0 R}% + }% +\fi\fi + + +% Set the font macro #1 to the font named \fontprefix#2. +% #3 is the font's design size, #4 is a scale factor, #5 is the CMap +% encoding (only OT1, OT1IT and OT1TT are allowed, or empty to omit). +% Example: +% #1 = \textrm +% #2 = \rmshape +% #3 = 10 +% #4 = \mainmagstep +% #5 = OT1 +% +\def\setfont#1#2#3#4#5{% + \font#1=\fontprefix#2#3 scaled #4 + \csname cmap#5\endcsname#1% +} +% This is what gets called when #5 of \setfont is empty. +\let\cmap\gobble +% +% (end of cmaps) + +% Use cm as the default font prefix. +% To specify the font prefix, you must define \fontprefix +% before you read in texinfo.tex. +\ifx\fontprefix\thisisundefined +\def\fontprefix{cm} +\fi +% Support font families that don't use the same naming scheme as CM. +\def\rmshape{r} +\def\rmbshape{bx} % where the normal face is bold +\def\bfshape{b} +\def\bxshape{bx} +\def\ttshape{tt} +\def\ttbshape{tt} +\def\ttslshape{sltt} +\def\itshape{ti} +\def\itbshape{bxti} +\def\slshape{sl} +\def\slbshape{bxsl} +\def\sfshape{ss} +\def\sfbshape{ss} +\def\scshape{csc} +\def\scbshape{csc} + +% Definitions for a main text size of 11pt. (The default in Texinfo.) +% +\def\definetextfontsizexi{% +% Text fonts (11.2pt, magstep1). +\def\textnominalsize{11pt} +\edef\mainmagstep{\magstephalf} +\setfont\textrm\rmshape{10}{\mainmagstep}{OT1} +\setfont\texttt\ttshape{10}{\mainmagstep}{OT1TT} +\setfont\textbf\bfshape{10}{\mainmagstep}{OT1} +\setfont\textit\itshape{10}{\mainmagstep}{OT1IT} +\setfont\textsl\slshape{10}{\mainmagstep}{OT1} +\setfont\textsf\sfshape{10}{\mainmagstep}{OT1} +\setfont\textsc\scshape{10}{\mainmagstep}{OT1} +\setfont\textttsl\ttslshape{10}{\mainmagstep}{OT1TT} +\font\texti=cmmi10 scaled \mainmagstep +\font\textsy=cmsy10 scaled \mainmagstep +\def\textecsize{1095} + +% A few fonts for @defun names and args. +\setfont\defbf\bfshape{10}{\magstep1}{OT1} +\setfont\deftt\ttshape{10}{\magstep1}{OT1TT} +\setfont\defttsl\ttslshape{10}{\magstep1}{OT1TT} +\def\df{\let\tentt=\deftt \let\tenbf = \defbf \let\tenttsl=\defttsl \bf} + +% Fonts for indices, footnotes, small examples (9pt). +\def\smallnominalsize{9pt} +\setfont\smallrm\rmshape{9}{1000}{OT1} +\setfont\smalltt\ttshape{9}{1000}{OT1TT} +\setfont\smallbf\bfshape{10}{900}{OT1} +\setfont\smallit\itshape{9}{1000}{OT1IT} +\setfont\smallsl\slshape{9}{1000}{OT1} +\setfont\smallsf\sfshape{9}{1000}{OT1} +\setfont\smallsc\scshape{10}{900}{OT1} +\setfont\smallttsl\ttslshape{10}{900}{OT1TT} +\font\smalli=cmmi9 +\font\smallsy=cmsy9 +\def\smallecsize{0900} + +% Fonts for small examples (8pt). +\def\smallernominalsize{8pt} +\setfont\smallerrm\rmshape{8}{1000}{OT1} +\setfont\smallertt\ttshape{8}{1000}{OT1TT} +\setfont\smallerbf\bfshape{10}{800}{OT1} +\setfont\smallerit\itshape{8}{1000}{OT1IT} +\setfont\smallersl\slshape{8}{1000}{OT1} +\setfont\smallersf\sfshape{8}{1000}{OT1} +\setfont\smallersc\scshape{10}{800}{OT1} +\setfont\smallerttsl\ttslshape{10}{800}{OT1TT} +\font\smalleri=cmmi8 +\font\smallersy=cmsy8 +\def\smallerecsize{0800} + +% Fonts for title page (20.4pt): +\def\titlenominalsize{20pt} +\setfont\titlerm\rmbshape{12}{\magstep3}{OT1} +\setfont\titleit\itbshape{10}{\magstep4}{OT1IT} +\setfont\titlesl\slbshape{10}{\magstep4}{OT1} +\setfont\titlett\ttbshape{12}{\magstep3}{OT1TT} +\setfont\titlettsl\ttslshape{10}{\magstep4}{OT1TT} +\setfont\titlesf\sfbshape{17}{\magstep1}{OT1} +\let\titlebf=\titlerm +\setfont\titlesc\scbshape{10}{\magstep4}{OT1} +\font\titlei=cmmi12 scaled \magstep3 +\font\titlesy=cmsy10 scaled \magstep4 +\def\titleecsize{2074} + +% Chapter (and unnumbered) fonts (17.28pt). +\def\chapnominalsize{17pt} +\setfont\chaprm\rmbshape{12}{\magstep2}{OT1} +\setfont\chapit\itbshape{10}{\magstep3}{OT1IT} +\setfont\chapsl\slbshape{10}{\magstep3}{OT1} +\setfont\chaptt\ttbshape{12}{\magstep2}{OT1TT} +\setfont\chapttsl\ttslshape{10}{\magstep3}{OT1TT} +\setfont\chapsf\sfbshape{17}{1000}{OT1} +\let\chapbf=\chaprm +\setfont\chapsc\scbshape{10}{\magstep3}{OT1} +\font\chapi=cmmi12 scaled \magstep2 +\font\chapsy=cmsy10 scaled \magstep3 +\def\chapecsize{1728} + +% Section fonts (14.4pt). +\def\secnominalsize{14pt} +\setfont\secrm\rmbshape{12}{\magstep1}{OT1} +\setfont\secit\itbshape{10}{\magstep2}{OT1IT} +\setfont\secsl\slbshape{10}{\magstep2}{OT1} +\setfont\sectt\ttbshape{12}{\magstep1}{OT1TT} +\setfont\secttsl\ttslshape{10}{\magstep2}{OT1TT} +\setfont\secsf\sfbshape{12}{\magstep1}{OT1} +\let\secbf\secrm +\setfont\secsc\scbshape{10}{\magstep2}{OT1} +\font\seci=cmmi12 scaled \magstep1 +\font\secsy=cmsy10 scaled \magstep2 +\def\sececsize{1440} + +% Subsection fonts (13.15pt). +\def\ssecnominalsize{13pt} +\setfont\ssecrm\rmbshape{12}{\magstephalf}{OT1} +\setfont\ssecit\itbshape{10}{1315}{OT1IT} +\setfont\ssecsl\slbshape{10}{1315}{OT1} +\setfont\ssectt\ttbshape{12}{\magstephalf}{OT1TT} +\setfont\ssecttsl\ttslshape{10}{1315}{OT1TT} +\setfont\ssecsf\sfbshape{12}{\magstephalf}{OT1} +\let\ssecbf\ssecrm +\setfont\ssecsc\scbshape{10}{1315}{OT1} +\font\sseci=cmmi12 scaled \magstephalf +\font\ssecsy=cmsy10 scaled 1315 +\def\ssececsize{1200} + +% Reduced fonts for @acro in text (10pt). +\def\reducednominalsize{10pt} +\setfont\reducedrm\rmshape{10}{1000}{OT1} +\setfont\reducedtt\ttshape{10}{1000}{OT1TT} +\setfont\reducedbf\bfshape{10}{1000}{OT1} +\setfont\reducedit\itshape{10}{1000}{OT1IT} +\setfont\reducedsl\slshape{10}{1000}{OT1} +\setfont\reducedsf\sfshape{10}{1000}{OT1} +\setfont\reducedsc\scshape{10}{1000}{OT1} +\setfont\reducedttsl\ttslshape{10}{1000}{OT1TT} +\font\reducedi=cmmi10 +\font\reducedsy=cmsy10 +\def\reducedecsize{1000} + +\textleading = 13.2pt % line spacing for 11pt CM +\textfonts % reset the current fonts +\rm +} % end of 11pt text font size definitions, \definetextfontsizexi + + +% Definitions to make the main text be 10pt Computer Modern, with +% section, chapter, etc., sizes following suit. This is for the GNU +% Press printing of the Emacs 22 manual. Maybe other manuals in the +% future. Used with @smallbook, which sets the leading to 12pt. +% +\def\definetextfontsizex{% +% Text fonts (10pt). +\def\textnominalsize{10pt} +\edef\mainmagstep{1000} +\setfont\textrm\rmshape{10}{\mainmagstep}{OT1} +\setfont\texttt\ttshape{10}{\mainmagstep}{OT1TT} +\setfont\textbf\bfshape{10}{\mainmagstep}{OT1} +\setfont\textit\itshape{10}{\mainmagstep}{OT1IT} +\setfont\textsl\slshape{10}{\mainmagstep}{OT1} +\setfont\textsf\sfshape{10}{\mainmagstep}{OT1} +\setfont\textsc\scshape{10}{\mainmagstep}{OT1} +\setfont\textttsl\ttslshape{10}{\mainmagstep}{OT1TT} +\font\texti=cmmi10 scaled \mainmagstep +\font\textsy=cmsy10 scaled \mainmagstep +\def\textecsize{1000} + +% A few fonts for @defun names and args. +\setfont\defbf\bfshape{10}{\magstephalf}{OT1} +\setfont\deftt\ttshape{10}{\magstephalf}{OT1TT} +\setfont\defttsl\ttslshape{10}{\magstephalf}{OT1TT} +\def\df{\let\tentt=\deftt \let\tenbf = \defbf \let\tenttsl=\defttsl \bf} + +% Fonts for indices, footnotes, small examples (9pt). +\def\smallnominalsize{9pt} +\setfont\smallrm\rmshape{9}{1000}{OT1} +\setfont\smalltt\ttshape{9}{1000}{OT1TT} +\setfont\smallbf\bfshape{10}{900}{OT1} +\setfont\smallit\itshape{9}{1000}{OT1IT} +\setfont\smallsl\slshape{9}{1000}{OT1} +\setfont\smallsf\sfshape{9}{1000}{OT1} +\setfont\smallsc\scshape{10}{900}{OT1} +\setfont\smallttsl\ttslshape{10}{900}{OT1TT} +\font\smalli=cmmi9 +\font\smallsy=cmsy9 +\def\smallecsize{0900} + +% Fonts for small examples (8pt). +\def\smallernominalsize{8pt} +\setfont\smallerrm\rmshape{8}{1000}{OT1} +\setfont\smallertt\ttshape{8}{1000}{OT1TT} +\setfont\smallerbf\bfshape{10}{800}{OT1} +\setfont\smallerit\itshape{8}{1000}{OT1IT} +\setfont\smallersl\slshape{8}{1000}{OT1} +\setfont\smallersf\sfshape{8}{1000}{OT1} +\setfont\smallersc\scshape{10}{800}{OT1} +\setfont\smallerttsl\ttslshape{10}{800}{OT1TT} +\font\smalleri=cmmi8 +\font\smallersy=cmsy8 +\def\smallerecsize{0800} + +% Fonts for title page (20.4pt): +\def\titlenominalsize{20pt} +\setfont\titlerm\rmbshape{12}{\magstep3}{OT1} +\setfont\titleit\itbshape{10}{\magstep4}{OT1IT} +\setfont\titlesl\slbshape{10}{\magstep4}{OT1} +\setfont\titlett\ttbshape{12}{\magstep3}{OT1TT} +\setfont\titlettsl\ttslshape{10}{\magstep4}{OT1TT} +\setfont\titlesf\sfbshape{17}{\magstep1}{OT1} +\let\titlebf=\titlerm +\setfont\titlesc\scbshape{10}{\magstep4}{OT1} +\font\titlei=cmmi12 scaled \magstep3 +\font\titlesy=cmsy10 scaled \magstep4 +\def\titleecsize{2074} + +% Chapter fonts (14.4pt). +\def\chapnominalsize{14pt} +\setfont\chaprm\rmbshape{12}{\magstep1}{OT1} +\setfont\chapit\itbshape{10}{\magstep2}{OT1IT} +\setfont\chapsl\slbshape{10}{\magstep2}{OT1} +\setfont\chaptt\ttbshape{12}{\magstep1}{OT1TT} +\setfont\chapttsl\ttslshape{10}{\magstep2}{OT1TT} +\setfont\chapsf\sfbshape{12}{\magstep1}{OT1} +\let\chapbf\chaprm +\setfont\chapsc\scbshape{10}{\magstep2}{OT1} +\font\chapi=cmmi12 scaled \magstep1 +\font\chapsy=cmsy10 scaled \magstep2 +\def\chapecsize{1440} + +% Section fonts (12pt). +\def\secnominalsize{12pt} +\setfont\secrm\rmbshape{12}{1000}{OT1} +\setfont\secit\itbshape{10}{\magstep1}{OT1IT} +\setfont\secsl\slbshape{10}{\magstep1}{OT1} +\setfont\sectt\ttbshape{12}{1000}{OT1TT} +\setfont\secttsl\ttslshape{10}{\magstep1}{OT1TT} +\setfont\secsf\sfbshape{12}{1000}{OT1} +\let\secbf\secrm +\setfont\secsc\scbshape{10}{\magstep1}{OT1} +\font\seci=cmmi12 +\font\secsy=cmsy10 scaled \magstep1 +\def\sececsize{1200} + +% Subsection fonts (10pt). +\def\ssecnominalsize{10pt} +\setfont\ssecrm\rmbshape{10}{1000}{OT1} +\setfont\ssecit\itbshape{10}{1000}{OT1IT} +\setfont\ssecsl\slbshape{10}{1000}{OT1} +\setfont\ssectt\ttbshape{10}{1000}{OT1TT} +\setfont\ssecttsl\ttslshape{10}{1000}{OT1TT} +\setfont\ssecsf\sfbshape{10}{1000}{OT1} +\let\ssecbf\ssecrm +\setfont\ssecsc\scbshape{10}{1000}{OT1} +\font\sseci=cmmi10 +\font\ssecsy=cmsy10 +\def\ssececsize{1000} + +% Reduced fonts for @acro in text (9pt). +\def\reducednominalsize{9pt} +\setfont\reducedrm\rmshape{9}{1000}{OT1} +\setfont\reducedtt\ttshape{9}{1000}{OT1TT} +\setfont\reducedbf\bfshape{10}{900}{OT1} +\setfont\reducedit\itshape{9}{1000}{OT1IT} +\setfont\reducedsl\slshape{9}{1000}{OT1} +\setfont\reducedsf\sfshape{9}{1000}{OT1} +\setfont\reducedsc\scshape{10}{900}{OT1} +\setfont\reducedttsl\ttslshape{10}{900}{OT1TT} +\font\reducedi=cmmi9 +\font\reducedsy=cmsy9 +\def\reducedecsize{0900} + +\divide\parskip by 2 % reduce space between paragraphs +\textleading = 12pt % line spacing for 10pt CM +\textfonts % reset the current fonts +\rm +} % end of 10pt text font size definitions, \definetextfontsizex + + +% We provide the user-level command +% @fonttextsize 10 +% (or 11) to redefine the text font size. pt is assumed. +% +\def\xiword{11} +\def\xword{10} +\def\xwordpt{10pt} +% +\parseargdef\fonttextsize{% + \def\textsizearg{#1}% + %\wlog{doing @fonttextsize \textsizearg}% + % + % Set \globaldefs so that documents can use this inside @tex, since + % makeinfo 4.8 does not support it, but we need it nonetheless. + % + \begingroup \globaldefs=1 + \ifx\textsizearg\xword \definetextfontsizex + \else \ifx\textsizearg\xiword \definetextfontsizexi + \else + \errhelp=\EMsimple + \errmessage{@fonttextsize only supports `10' or `11', not `\textsizearg'} + \fi\fi + \endgroup +} + + +% In order for the font changes to affect most math symbols and letters, +% we have to define the \textfont of the standard families. Since +% texinfo doesn't allow for producing subscripts and superscripts except +% in the main text, we don't bother to reset \scriptfont and +% \scriptscriptfont (which would also require loading a lot more fonts). +% +\def\resetmathfonts{% + \textfont0=\tenrm \textfont1=\teni \textfont2=\tensy + \textfont\itfam=\tenit \textfont\slfam=\tensl \textfont\bffam=\tenbf + \textfont\ttfam=\tentt \textfont\sffam=\tensf +} + +% The font-changing commands redefine the meanings of \tenSTYLE, instead +% of just \STYLE. We do this because \STYLE needs to also set the +% current \fam for math mode. Our \STYLE (e.g., \rm) commands hardwire +% \tenSTYLE to set the current font. +% +% Each font-changing command also sets the names \lsize (one size lower) +% and \lllsize (three sizes lower). These relative commands are used in +% the LaTeX logo and acronyms. +% +% This all needs generalizing, badly. +% +\def\textfonts{% + \let\tenrm=\textrm \let\tenit=\textit \let\tensl=\textsl + \let\tenbf=\textbf \let\tentt=\texttt \let\smallcaps=\textsc + \let\tensf=\textsf \let\teni=\texti \let\tensy=\textsy + \let\tenttsl=\textttsl + \def\curfontsize{text}% + \def\lsize{reduced}\def\lllsize{smaller}% + \resetmathfonts \setleading{\textleading}} +\def\titlefonts{% + \let\tenrm=\titlerm \let\tenit=\titleit \let\tensl=\titlesl + \let\tenbf=\titlebf \let\tentt=\titlett \let\smallcaps=\titlesc + \let\tensf=\titlesf \let\teni=\titlei \let\tensy=\titlesy + \let\tenttsl=\titlettsl + \def\curfontsize{title}% + \def\lsize{chap}\def\lllsize{subsec}% + \resetmathfonts \setleading{27pt}} +\def\titlefont#1{{\titlefonts\rmisbold #1}} +\def\chapfonts{% + \let\tenrm=\chaprm \let\tenit=\chapit \let\tensl=\chapsl + \let\tenbf=\chapbf \let\tentt=\chaptt \let\smallcaps=\chapsc + \let\tensf=\chapsf \let\teni=\chapi \let\tensy=\chapsy + \let\tenttsl=\chapttsl + \def\curfontsize{chap}% + \def\lsize{sec}\def\lllsize{text}% + \resetmathfonts \setleading{19pt}} +\def\secfonts{% + \let\tenrm=\secrm \let\tenit=\secit \let\tensl=\secsl + \let\tenbf=\secbf \let\tentt=\sectt \let\smallcaps=\secsc + \let\tensf=\secsf \let\teni=\seci \let\tensy=\secsy + \let\tenttsl=\secttsl + \def\curfontsize{sec}% + \def\lsize{subsec}\def\lllsize{reduced}% + \resetmathfonts \setleading{16pt}} +\def\subsecfonts{% + \let\tenrm=\ssecrm \let\tenit=\ssecit \let\tensl=\ssecsl + \let\tenbf=\ssecbf \let\tentt=\ssectt \let\smallcaps=\ssecsc + \let\tensf=\ssecsf \let\teni=\sseci \let\tensy=\ssecsy + \let\tenttsl=\ssecttsl + \def\curfontsize{ssec}% + \def\lsize{text}\def\lllsize{small}% + \resetmathfonts \setleading{15pt}} +\let\subsubsecfonts = \subsecfonts +\def\reducedfonts{% + \let\tenrm=\reducedrm \let\tenit=\reducedit \let\tensl=\reducedsl + \let\tenbf=\reducedbf \let\tentt=\reducedtt \let\reducedcaps=\reducedsc + \let\tensf=\reducedsf \let\teni=\reducedi \let\tensy=\reducedsy + \let\tenttsl=\reducedttsl + \def\curfontsize{reduced}% + \def\lsize{small}\def\lllsize{smaller}% + \resetmathfonts \setleading{10.5pt}} +\def\smallfonts{% + \let\tenrm=\smallrm \let\tenit=\smallit \let\tensl=\smallsl + \let\tenbf=\smallbf \let\tentt=\smalltt \let\smallcaps=\smallsc + \let\tensf=\smallsf \let\teni=\smalli \let\tensy=\smallsy + \let\tenttsl=\smallttsl + \def\curfontsize{small}% + \def\lsize{smaller}\def\lllsize{smaller}% + \resetmathfonts \setleading{10.5pt}} +\def\smallerfonts{% + \let\tenrm=\smallerrm \let\tenit=\smallerit \let\tensl=\smallersl + \let\tenbf=\smallerbf \let\tentt=\smallertt \let\smallcaps=\smallersc + \let\tensf=\smallersf \let\teni=\smalleri \let\tensy=\smallersy + \let\tenttsl=\smallerttsl + \def\curfontsize{smaller}% + \def\lsize{smaller}\def\lllsize{smaller}% + \resetmathfonts \setleading{9.5pt}} + +% Fonts for short table of contents. +\setfont\shortcontrm\rmshape{12}{1000}{OT1} +\setfont\shortcontbf\bfshape{10}{\magstep1}{OT1} % no cmb12 +\setfont\shortcontsl\slshape{12}{1000}{OT1} +\setfont\shortconttt\ttshape{12}{1000}{OT1TT} + +% Define these just so they can be easily changed for other fonts. +\def\angleleft{$\langle$} +\def\angleright{$\rangle$} + +% Set the fonts to use with the @small... environments. +\let\smallexamplefonts = \smallfonts + +% About \smallexamplefonts. If we use \smallfonts (9pt), @smallexample +% can fit this many characters: +% 8.5x11=86 smallbook=72 a4=90 a5=69 +% If we use \scriptfonts (8pt), then we can fit this many characters: +% 8.5x11=90+ smallbook=80 a4=90+ a5=77 +% For me, subjectively, the few extra characters that fit aren't worth +% the additional smallness of 8pt. So I'm making the default 9pt. +% +% By the way, for comparison, here's what fits with @example (10pt): +% 8.5x11=71 smallbook=60 a4=75 a5=58 +% --karl, 24jan03. + +% Set up the default fonts, so we can use them for creating boxes. +% +\definetextfontsizexi + + +\message{markup,} + +% Check if we are currently using a typewriter font. Since all the +% Computer Modern typewriter fonts have zero interword stretch (and +% shrink), and it is reasonable to expect all typewriter fonts to have +% this property, we can check that font parameter. +% +\def\ifmonospace{\ifdim\fontdimen3\font=0pt } + +% Markup style infrastructure. \defmarkupstylesetup\INITMACRO will +% define and register \INITMACRO to be called on markup style changes. +% \INITMACRO can check \currentmarkupstyle for the innermost +% style and the set of \ifmarkupSTYLE switches for all styles +% currently in effect. +\newif\ifmarkupvar +\newif\ifmarkupsamp +\newif\ifmarkupkey +%\newif\ifmarkupfile % @file == @samp. +%\newif\ifmarkupoption % @option == @samp. +\newif\ifmarkupcode +\newif\ifmarkupkbd +%\newif\ifmarkupenv % @env == @code. +%\newif\ifmarkupcommand % @command == @code. +\newif\ifmarkuptex % @tex (and part of @math, for now). +\newif\ifmarkupexample +\newif\ifmarkupverb +\newif\ifmarkupverbatim + +\let\currentmarkupstyle\empty + +\def\setupmarkupstyle#1{% + \csname markup#1true\endcsname + \def\currentmarkupstyle{#1}% + \markupstylesetup +} + +\let\markupstylesetup\empty + +\def\defmarkupstylesetup#1{% + \expandafter\def\expandafter\markupstylesetup + \expandafter{\markupstylesetup #1}% + \def#1% +} + +% Markup style setup for left and right quotes. +\defmarkupstylesetup\markupsetuplq{% + \expandafter\let\expandafter \temp + \csname markupsetuplq\currentmarkupstyle\endcsname + \ifx\temp\relax \markupsetuplqdefault \else \temp \fi +} + +\defmarkupstylesetup\markupsetuprq{% + \expandafter\let\expandafter \temp + \csname markupsetuprq\currentmarkupstyle\endcsname + \ifx\temp\relax \markupsetuprqdefault \else \temp \fi +} + +{ +\catcode`\'=\active +\catcode`\`=\active + +\gdef\markupsetuplqdefault{\let`\lq} +\gdef\markupsetuprqdefault{\let'\rq} + +\gdef\markupsetcodequoteleft{\let`\codequoteleft} +\gdef\markupsetcodequoteright{\let'\codequoteright} +} + +\let\markupsetuplqcode \markupsetcodequoteleft +\let\markupsetuprqcode \markupsetcodequoteright +% +\let\markupsetuplqexample \markupsetcodequoteleft +\let\markupsetuprqexample \markupsetcodequoteright +% +\let\markupsetuplqkbd \markupsetcodequoteleft +\let\markupsetuprqkbd \markupsetcodequoteright +% +\let\markupsetuplqsamp \markupsetcodequoteleft +\let\markupsetuprqsamp \markupsetcodequoteright +% +\let\markupsetuplqverb \markupsetcodequoteleft +\let\markupsetuprqverb \markupsetcodequoteright +% +\let\markupsetuplqverbatim \markupsetcodequoteleft +\let\markupsetuprqverbatim \markupsetcodequoteright + +% Allow an option to not use regular directed right quote/apostrophe +% (char 0x27), but instead the undirected quote from cmtt (char 0x0d). +% The undirected quote is ugly, so don't make it the default, but it +% works for pasting with more pdf viewers (at least evince), the +% lilypond developers report. xpdf does work with the regular 0x27. +% +\def\codequoteright{% + \expandafter\ifx\csname SETtxicodequoteundirected\endcsname\relax + \expandafter\ifx\csname SETcodequoteundirected\endcsname\relax + '% + \else \char'15 \fi + \else \char'15 \fi +} +% +% and a similar option for the left quote char vs. a grave accent. +% Modern fonts display ASCII 0x60 as a grave accent, so some people like +% the code environments to do likewise. +% +\def\codequoteleft{% + \expandafter\ifx\csname SETtxicodequotebacktick\endcsname\relax + \expandafter\ifx\csname SETcodequotebacktick\endcsname\relax + % [Knuth] pp. 380,381,391 + % \relax disables Spanish ligatures ?` and !` of \tt font. + \relax`% + \else \char'22 \fi + \else \char'22 \fi +} + +% Commands to set the quote options. +% +\parseargdef\codequoteundirected{% + \def\temp{#1}% + \ifx\temp\onword + \expandafter\let\csname SETtxicodequoteundirected\endcsname + = t% + \else\ifx\temp\offword + \expandafter\let\csname SETtxicodequoteundirected\endcsname + = \relax + \else + \errhelp = \EMsimple + \errmessage{Unknown @codequoteundirected value `\temp', must be on|off}% + \fi\fi +} +% +\parseargdef\codequotebacktick{% + \def\temp{#1}% + \ifx\temp\onword + \expandafter\let\csname SETtxicodequotebacktick\endcsname + = t% + \else\ifx\temp\offword + \expandafter\let\csname SETtxicodequotebacktick\endcsname + = \relax + \else + \errhelp = \EMsimple + \errmessage{Unknown @codequotebacktick value `\temp', must be on|off}% + \fi\fi +} + +% [Knuth] pp. 380,381,391, disable Spanish ligatures ?` and !` of \tt font. +\def\noligaturesquoteleft{\relax\lq} + +% Count depth in font-changes, for error checks +\newcount\fontdepth \fontdepth=0 + +% Font commands. + +% #1 is the font command (\sl or \it), #2 is the text to slant. +% If we are in a monospaced environment, however, 1) always use \ttsl, +% and 2) do not add an italic correction. +\def\dosmartslant#1#2{% + \ifusingtt + {{\ttsl #2}\let\next=\relax}% + {\def\next{{#1#2}\futurelet\next\smartitaliccorrection}}% + \next +} +\def\smartslanted{\dosmartslant\sl} +\def\smartitalic{\dosmartslant\it} + +% Output an italic correction unless \next (presumed to be the following +% character) is such as not to need one. +\def\smartitaliccorrection{% + \ifx\next,% + \else\ifx\next-% + \else\ifx\next.% + \else\ptexslash + \fi\fi\fi + \aftersmartic +} + +% Unconditional use \ttsl, and no ic. @var is set to this for defuns. +\def\ttslanted#1{{\ttsl #1}} + +% @cite is like \smartslanted except unconditionally use \sl. We never want +% ttsl for book titles, do we? +\def\cite#1{{\sl #1}\futurelet\next\smartitaliccorrection} + +\def\aftersmartic{} +\def\var#1{% + \let\saveaftersmartic = \aftersmartic + \def\aftersmartic{\null\let\aftersmartic=\saveaftersmartic}% + \smartslanted{#1}% +} + +\let\i=\smartitalic +\let\slanted=\smartslanted +\let\dfn=\smartslanted +\let\emph=\smartitalic + +% Explicit font changes: @r, @sc, undocumented @ii. +\def\r#1{{\rm #1}} % roman font +\def\sc#1{{\smallcaps#1}} % smallcaps font +\def\ii#1{{\it #1}} % italic font + +% @b, explicit bold. Also @strong. +\def\b#1{{\bf #1}} +\let\strong=\b + +% @sansserif, explicit sans. +\def\sansserif#1{{\sf #1}} + +% We can't just use \exhyphenpenalty, because that only has effect at +% the end of a paragraph. Restore normal hyphenation at the end of the +% group within which \nohyphenation is presumably called. +% +\def\nohyphenation{\hyphenchar\font = -1 \aftergroup\restorehyphenation} +\def\restorehyphenation{\hyphenchar\font = `- } + +% Set sfcode to normal for the chars that usually have another value. +% Can't use plain's \frenchspacing because it uses the `\x notation, and +% sometimes \x has an active definition that messes things up. +% +\catcode`@=11 + \def\plainfrenchspacing{% + \sfcode\dotChar =\@m \sfcode\questChar=\@m \sfcode\exclamChar=\@m + \sfcode\colonChar=\@m \sfcode\semiChar =\@m \sfcode\commaChar =\@m + \def\endofsentencespacefactor{1000}% for @. and friends + } + \def\plainnonfrenchspacing{% + \sfcode`\.3000\sfcode`\?3000\sfcode`\!3000 + \sfcode`\:2000\sfcode`\;1500\sfcode`\,1250 + \def\endofsentencespacefactor{3000}% for @. and friends + } +\catcode`@=\other +\def\endofsentencespacefactor{3000}% default + +% @t, explicit typewriter. +\def\t#1{% + {\tt \rawbackslash \plainfrenchspacing #1}% + \null +} + +% @samp. +\def\samp#1{{\setupmarkupstyle{samp}\lq\tclose{#1}\rq\null}} + +% @indicateurl is \samp, that is, with quotes. +\let\indicateurl=\samp + +% @code (and similar) prints in typewriter, but with spaces the same +% size as normal in the surrounding text, without hyphenation, etc. +% This is a subroutine for that. +\def\tclose#1{% + {% + % Change normal interword space to be same as for the current font. + \spaceskip = \fontdimen2\font + % + % Switch to typewriter. + \tt + % + % But `\ ' produces the large typewriter interword space. + \def\ {{\spaceskip = 0pt{} }}% + % + % Turn off hyphenation. + \nohyphenation + % + \rawbackslash + \plainfrenchspacing + #1% + }% + \null % reset spacefactor to 1000 +} + +% We *must* turn on hyphenation at `-' and `_' in @code. +% Otherwise, it is too hard to avoid overfull hboxes +% in the Emacs manual, the Library manual, etc. +% +% Unfortunately, TeX uses one parameter (\hyphenchar) to control +% both hyphenation at - and hyphenation within words. +% We must therefore turn them both off (\tclose does that) +% and arrange explicitly to hyphenate at a dash. +% -- rms. +{ + \catcode`\-=\active \catcode`\_=\active + \catcode`\'=\active \catcode`\`=\active + \global\let'=\rq \global\let`=\lq % default definitions + % + \global\def\code{\begingroup + \setupmarkupstyle{code}% + % The following should really be moved into \setupmarkupstyle handlers. + \catcode\dashChar=\active \catcode\underChar=\active + \ifallowcodebreaks + \let-\codedash + \let_\codeunder + \else + \let-\normaldash + \let_\realunder + \fi + \codex + } +} + +\def\codex #1{\tclose{#1}\endgroup} + +\def\normaldash{-} +\def\codedash{-\discretionary{}{}{}} +\def\codeunder{% + % this is all so @math{@code{var_name}+1} can work. In math mode, _ + % is "active" (mathcode"8000) and \normalunderscore (or \char95, etc.) + % will therefore expand the active definition of _, which is us + % (inside @code that is), therefore an endless loop. + \ifusingtt{\ifmmode + \mathchar"075F % class 0=ordinary, family 7=ttfam, pos 0x5F=_. + \else\normalunderscore \fi + \discretionary{}{}{}}% + {\_}% +} + +% An additional complication: the above will allow breaks after, e.g., +% each of the four underscores in __typeof__. This is bad. +% @allowcodebreaks provides a document-level way to turn breaking at - +% and _ on and off. +% +\newif\ifallowcodebreaks \allowcodebreakstrue + +\def\keywordtrue{true} +\def\keywordfalse{false} + +\parseargdef\allowcodebreaks{% + \def\txiarg{#1}% + \ifx\txiarg\keywordtrue + \allowcodebreakstrue + \else\ifx\txiarg\keywordfalse + \allowcodebreaksfalse + \else + \errhelp = \EMsimple + \errmessage{Unknown @allowcodebreaks option `\txiarg', must be true|false}% + \fi\fi +} + +% For @command, @env, @file, @option quotes seem unnecessary, +% so use \code rather than \samp. +\let\command=\code +\let\env=\code +\let\file=\code +\let\option=\code + +% @uref (abbreviation for `urlref') takes an optional (comma-separated) +% second argument specifying the text to display and an optional third +% arg as text to display instead of (rather than in addition to) the url +% itself. First (mandatory) arg is the url. +% (This \urefnobreak definition isn't used now, leaving it for a while +% for comparison.) +\def\urefnobreak#1{\dourefnobreak #1,,,\finish} +\def\dourefnobreak#1,#2,#3,#4\finish{\begingroup + \unsepspaces + \pdfurl{#1}% + \setbox0 = \hbox{\ignorespaces #3}% + \ifdim\wd0 > 0pt + \unhbox0 % third arg given, show only that + \else + \setbox0 = \hbox{\ignorespaces #2}% + \ifdim\wd0 > 0pt + \ifpdf + \unhbox0 % PDF: 2nd arg given, show only it + \else + \unhbox0\ (\code{#1})% DVI: 2nd arg given, show both it and url + \fi + \else + \code{#1}% only url given, so show it + \fi + \fi + \endlink +\endgroup} + +% This \urefbreak definition is the active one. +\def\urefbreak{\begingroup \urefcatcodes \dourefbreak} +\let\uref=\urefbreak +\def\dourefbreak#1{\urefbreakfinish #1,,,\finish} +\def\urefbreakfinish#1,#2,#3,#4\finish{% doesn't work in @example + \unsepspaces + \pdfurl{#1}% + \setbox0 = \hbox{\ignorespaces #3}% + \ifdim\wd0 > 0pt + \unhbox0 % third arg given, show only that + \else + \setbox0 = \hbox{\ignorespaces #2}% + \ifdim\wd0 > 0pt + \ifpdf + \unhbox0 % PDF: 2nd arg given, show only it + \else + \unhbox0\ (\urefcode{#1})% DVI: 2nd arg given, show both it and url + \fi + \else + \urefcode{#1}% only url given, so show it + \fi + \fi + \endlink +\endgroup} + +% Allow line breaks around only a few characters (only). +\def\urefcatcodes{% + \catcode\ampChar=\active \catcode\dotChar=\active + \catcode\hashChar=\active \catcode\questChar=\active + \catcode\slashChar=\active +} +{ + \urefcatcodes + % + \global\def\urefcode{\begingroup + \setupmarkupstyle{code}% + \urefcatcodes + \let&\urefcodeamp + \let.\urefcodedot + \let#\urefcodehash + \let?\urefcodequest + \let/\urefcodeslash + \codex + } + % + % By default, they are just regular characters. + \global\def&{\normalamp} + \global\def.{\normaldot} + \global\def#{\normalhash} + \global\def?{\normalquest} + \global\def/{\normalslash} +} + +% we put a little stretch before and after the breakable chars, to help +% line breaking of long url's. The unequal skips make look better in +% cmtt at least, especially for dots. +\def\urefprestretch{\urefprebreak \hskip0pt plus.13em } +\def\urefpoststretch{\urefpostbreak \hskip0pt plus.1em } +% +\def\urefcodeamp{\urefprestretch \&\urefpoststretch} +\def\urefcodedot{\urefprestretch .\urefpoststretch} +\def\urefcodehash{\urefprestretch \#\urefpoststretch} +\def\urefcodequest{\urefprestretch ?\urefpoststretch} +\def\urefcodeslash{\futurelet\next\urefcodeslashfinish} +{ + \catcode`\/=\active + \global\def\urefcodeslashfinish{% + \urefprestretch \slashChar + % Allow line break only after the final / in a sequence of + % slashes, to avoid line break between the slashes in http://. + \ifx\next/\else \urefpoststretch \fi + } +} + +% One more complication: by default we'll break after the special +% characters, but some people like to break before the special chars, so +% allow that. Also allow no breaking at all, for manual control. +% +\parseargdef\urefbreakstyle{% + \def\txiarg{#1}% + \ifx\txiarg\wordnone + \def\urefprebreak{\nobreak}\def\urefpostbreak{\nobreak} + \else\ifx\txiarg\wordbefore + \def\urefprebreak{\allowbreak}\def\urefpostbreak{\nobreak} + \else\ifx\txiarg\wordafter + \def\urefprebreak{\nobreak}\def\urefpostbreak{\allowbreak} + \else + \errhelp = \EMsimple + \errmessage{Unknown @urefbreakstyle setting `\txiarg'}% + \fi\fi\fi +} +\def\wordafter{after} +\def\wordbefore{before} +\def\wordnone{none} + +\urefbreakstyle after + +% @url synonym for @uref, since that's how everyone uses it. +% +\let\url=\uref + +% rms does not like angle brackets --karl, 17may97. +% So now @email is just like @uref, unless we are pdf. +% +%\def\email#1{\angleleft{\tt #1}\angleright} +\ifpdf + \def\email#1{\doemail#1,,\finish} + \def\doemail#1,#2,#3\finish{\begingroup + \unsepspaces + \pdfurl{mailto:#1}% + \setbox0 = \hbox{\ignorespaces #2}% + \ifdim\wd0>0pt\unhbox0\else\code{#1}\fi + \endlink + \endgroup} +\else + \let\email=\uref +\fi + +% @kbdinputstyle -- arg is `distinct' (@kbd uses slanted tty font always), +% `example' (@kbd uses ttsl only inside of @example and friends), +% or `code' (@kbd uses normal tty font always). +\parseargdef\kbdinputstyle{% + \def\txiarg{#1}% + \ifx\txiarg\worddistinct + \gdef\kbdexamplefont{\ttsl}\gdef\kbdfont{\ttsl}% + \else\ifx\txiarg\wordexample + \gdef\kbdexamplefont{\ttsl}\gdef\kbdfont{\tt}% + \else\ifx\txiarg\wordcode + \gdef\kbdexamplefont{\tt}\gdef\kbdfont{\tt}% + \else + \errhelp = \EMsimple + \errmessage{Unknown @kbdinputstyle setting `\txiarg'}% + \fi\fi\fi +} +\def\worddistinct{distinct} +\def\wordexample{example} +\def\wordcode{code} + +% Default is `distinct'. +\kbdinputstyle distinct + +% @kbd is like @code, except that if the argument is just one @key command, +% then @kbd has no effect. +\def\kbd#1{{\def\look{#1}\expandafter\kbdsub\look??\par}} + +\def\xkey{\key} +\def\kbdsub#1#2#3\par{% + \def\one{#1}\def\three{#3}\def\threex{??}% + \ifx\one\xkey\ifx\threex\three \key{#2}% + \else{\tclose{\kbdfont\setupmarkupstyle{kbd}\look}}\fi + \else{\tclose{\kbdfont\setupmarkupstyle{kbd}\look}}\fi +} + +% definition of @key that produces a lozenge. Doesn't adjust to text size. +%\setfont\keyrm\rmshape{8}{1000}{OT1} +%\font\keysy=cmsy9 +%\def\key#1{{\keyrm\textfont2=\keysy \leavevmode\hbox{% +% \raise0.4pt\hbox{\angleleft}\kern-.08em\vtop{% +% \vbox{\hrule\kern-0.4pt +% \hbox{\raise0.4pt\hbox{\vphantom{\angleleft}}#1}}% +% \kern-0.4pt\hrule}% +% \kern-.06em\raise0.4pt\hbox{\angleright}}}} + +% definition of @key with no lozenge. If the current font is already +% monospace, don't change it; that way, we respect @kbdinputstyle. But +% if it isn't monospace, then use \tt. +% +\def\key#1{{\setupmarkupstyle{key}% + \nohyphenation + \ifmonospace\else\tt\fi + #1}\null} + +% @clicksequence{File @click{} Open ...} +\def\clicksequence#1{\begingroup #1\endgroup} + +% @clickstyle @arrow (by default) +\parseargdef\clickstyle{\def\click{#1}} +\def\click{\arrow} + +% Typeset a dimension, e.g., `in' or `pt'. The only reason for the +% argument is to make the input look right: @dmn{pt} instead of @dmn{}pt. +% +\def\dmn#1{\thinspace #1} + +% @l was never documented to mean ``switch to the Lisp font'', +% and it is not used as such in any manual I can find. We need it for +% Polish suppressed-l. --karl, 22sep96. +%\def\l#1{{\li #1}\null} + +% @acronym for "FBI", "NATO", and the like. +% We print this one point size smaller, since it's intended for +% all-uppercase. +% +\def\acronym#1{\doacronym #1,,\finish} +\def\doacronym#1,#2,#3\finish{% + {\selectfonts\lsize #1}% + \def\temp{#2}% + \ifx\temp\empty \else + \space ({\unsepspaces \ignorespaces \temp \unskip})% + \fi + \null % reset \spacefactor=1000 +} + +% @abbr for "Comput. J." and the like. +% No font change, but don't do end-of-sentence spacing. +% +\def\abbr#1{\doabbr #1,,\finish} +\def\doabbr#1,#2,#3\finish{% + {\plainfrenchspacing #1}% + \def\temp{#2}% + \ifx\temp\empty \else + \space ({\unsepspaces \ignorespaces \temp \unskip})% + \fi + \null % reset \spacefactor=1000 +} + +% @asis just yields its argument. Used with @table, for example. +% +\def\asis#1{#1} + +% @math outputs its argument in math mode. +% +% One complication: _ usually means subscripts, but it could also mean +% an actual _ character, as in @math{@var{some_variable} + 1}. So make +% _ active, and distinguish by seeing if the current family is \slfam, +% which is what @var uses. +{ + \catcode`\_ = \active + \gdef\mathunderscore{% + \catcode`\_=\active + \def_{\ifnum\fam=\slfam \_\else\sb\fi}% + } +} +% Another complication: we want \\ (and @\) to output a math (or tt) \. +% FYI, plain.tex uses \\ as a temporary control sequence (for no +% particular reason), but this is not advertised and we don't care. +% +% The \mathchar is class=0=ordinary, family=7=ttfam, position=5C=\. +\def\mathbackslash{\ifnum\fam=\ttfam \mathchar"075C \else\backslash \fi} +% +\def\math{% + \tex + \mathunderscore + \let\\ = \mathbackslash + \mathactive + % make the texinfo accent commands work in math mode + \let\"=\ddot + \let\'=\acute + \let\==\bar + \let\^=\hat + \let\`=\grave + \let\u=\breve + \let\v=\check + \let\~=\tilde + \let\dotaccent=\dot + $\finishmath +} +\def\finishmath#1{#1$\endgroup} % Close the group opened by \tex. + +% Some active characters (such as <) are spaced differently in math. +% We have to reset their definitions in case the @math was an argument +% to a command which sets the catcodes (such as @item or @section). +% +{ + \catcode`^ = \active + \catcode`< = \active + \catcode`> = \active + \catcode`+ = \active + \catcode`' = \active + \gdef\mathactive{% + \let^ = \ptexhat + \let< = \ptexless + \let> = \ptexgtr + \let+ = \ptexplus + \let' = \ptexquoteright + } +} + +% ctrl is no longer a Texinfo command, but leave this definition for fun. +\def\ctrl #1{{\tt \rawbackslash \hat}#1} + +% @inlinefmt{FMTNAME,PROCESSED-TEXT} and @inlineraw{FMTNAME,RAW-TEXT}. +% Ignore unless FMTNAME == tex; then it is like @iftex and @tex, +% except specified as a normal braced arg, so no newlines to worry about. +% +\def\outfmtnametex{tex} +% +\long\def\inlinefmt#1{\doinlinefmt #1,\finish} +\long\def\doinlinefmt#1,#2,\finish{% + \def\inlinefmtname{#1}% + \ifx\inlinefmtname\outfmtnametex \ignorespaces #2\fi +} +% For raw, must switch into @tex before parsing the argument, to avoid +% setting catcodes prematurely. Doing it this way means that, for +% example, @inlineraw{html, foo{bar} gets a parse error instead of being +% ignored. But this isn't important because if people want a literal +% *right* brace they would have to use a command anyway, so they may as +% well use a command to get a left brace too. We could re-use the +% delimiter character idea from \verb, but it seems like overkill. +% +\long\def\inlineraw{\tex \doinlineraw} +\long\def\doinlineraw#1{\doinlinerawtwo #1,\finish} +\def\doinlinerawtwo#1,#2,\finish{% + \def\inlinerawname{#1}% + \ifx\inlinerawname\outfmtnametex \ignorespaces #2\fi + \endgroup % close group opened by \tex. +} + + +\message{glyphs,} +% and logos. + +% @@ prints an @, as does @atchar{}. +\def\@{\char64 } +\let\atchar=\@ + +% @{ @} @lbracechar{} @rbracechar{} all generate brace characters. +% Unless we're in typewriter, use \ecfont because the CM text fonts do +% not have braces, and we don't want to switch into math. +\def\mylbrace{{\ifmonospace\else\ecfont\fi \char123}} +\def\myrbrace{{\ifmonospace\else\ecfont\fi \char125}} +\let\{=\mylbrace \let\lbracechar=\{ +\let\}=\myrbrace \let\rbracechar=\} +\begingroup + % Definitions to produce \{ and \} commands for indices, + % and @{ and @} for the aux/toc files. + \catcode`\{ = \other \catcode`\} = \other + \catcode`\[ = 1 \catcode`\] = 2 + \catcode`\! = 0 \catcode`\\ = \other + !gdef!lbracecmd[\{]% + !gdef!rbracecmd[\}]% + !gdef!lbraceatcmd[@{]% + !gdef!rbraceatcmd[@}]% +!endgroup + +% @comma{} to avoid , parsing problems. +\let\comma = , + +% Accents: @, @dotaccent @ringaccent @ubaraccent @udotaccent +% Others are defined by plain TeX: @` @' @" @^ @~ @= @u @v @H. +\let\, = \ptexc +\let\dotaccent = \ptexdot +\def\ringaccent#1{{\accent23 #1}} +\let\tieaccent = \ptext +\let\ubaraccent = \ptexb +\let\udotaccent = \d + +% Other special characters: @questiondown @exclamdown @ordf @ordm +% Plain TeX defines: @AA @AE @O @OE @L (plus lowercase versions) @ss. +\def\questiondown{?`} +\def\exclamdown{!`} +\def\ordf{\leavevmode\raise1ex\hbox{\selectfonts\lllsize \underbar{a}}} +\def\ordm{\leavevmode\raise1ex\hbox{\selectfonts\lllsize \underbar{o}}} + +% Dotless i and dotless j, used for accents. +\def\imacro{i} +\def\jmacro{j} +\def\dotless#1{% + \def\temp{#1}% + \ifx\temp\imacro \ifmmode\imath \else\ptexi \fi + \else\ifx\temp\jmacro \ifmmode\jmath \else\j \fi + \else \errmessage{@dotless can be used only with i or j}% + \fi\fi +} + +% The \TeX{} logo, as in plain, but resetting the spacing so that a +% period following counts as ending a sentence. (Idea found in latex.) +% +\edef\TeX{\TeX \spacefactor=1000 } + +% @LaTeX{} logo. Not quite the same results as the definition in +% latex.ltx, since we use a different font for the raised A; it's most +% convenient for us to use an explicitly smaller font, rather than using +% the \scriptstyle font (since we don't reset \scriptstyle and +% \scriptscriptstyle). +% +\def\LaTeX{% + L\kern-.36em + {\setbox0=\hbox{T}% + \vbox to \ht0{\hbox{% + \ifx\textnominalsize\xwordpt + % for 10pt running text, \lllsize (8pt) is too small for the A in LaTeX. + % Revert to plain's \scriptsize, which is 7pt. + \count255=\the\fam $\fam\count255 \scriptstyle A$% + \else + % For 11pt, we can use our lllsize. + \selectfonts\lllsize A% + \fi + }% + \vss + }}% + \kern-.15em + \TeX +} + +% Some math mode symbols. +\def\bullet{$\ptexbullet$} +\def\geq{\ifmmode \ge\else $\ge$\fi} +\def\leq{\ifmmode \le\else $\le$\fi} +\def\minus{\ifmmode -\else $-$\fi} + +% @dots{} outputs an ellipsis using the current font. +% We do .5em per period so that it has the same spacing in the cm +% typewriter fonts as three actual period characters; on the other hand, +% in other typewriter fonts three periods are wider than 1.5em. So do +% whichever is larger. +% +\def\dots{% + \leavevmode + \setbox0=\hbox{...}% get width of three periods + \ifdim\wd0 > 1.5em + \dimen0 = \wd0 + \else + \dimen0 = 1.5em + \fi + \hbox to \dimen0{% + \hskip 0pt plus.25fil + .\hskip 0pt plus1fil + .\hskip 0pt plus1fil + .\hskip 0pt plus.5fil + }% +} + +% @enddots{} is an end-of-sentence ellipsis. +% +\def\enddots{% + \dots + \spacefactor=\endofsentencespacefactor +} + +% @point{}, @result{}, @expansion{}, @print{}, @equiv{}. +% +% Since these characters are used in examples, they should be an even number of +% \tt widths. Each \tt character is 1en, so two makes it 1em. +% +\def\point{$\star$} +\def\arrow{\leavevmode\raise.05ex\hbox to 1em{\hfil$\rightarrow$\hfil}} +\def\result{\leavevmode\raise.05ex\hbox to 1em{\hfil$\Rightarrow$\hfil}} +\def\expansion{\leavevmode\hbox to 1em{\hfil$\mapsto$\hfil}} +\def\print{\leavevmode\lower.1ex\hbox to 1em{\hfil$\dashv$\hfil}} +\def\equiv{\leavevmode\hbox to 1em{\hfil$\ptexequiv$\hfil}} + +% The @error{} command. +% Adapted from the TeXbook's \boxit. +% +\newbox\errorbox +% +{\tentt \global\dimen0 = 3em}% Width of the box. +\dimen2 = .55pt % Thickness of rules +% The text. (`r' is open on the right, `e' somewhat less so on the left.) +\setbox0 = \hbox{\kern-.75pt \reducedsf \putworderror\kern-1.5pt} +% +\setbox\errorbox=\hbox to \dimen0{\hfil + \hsize = \dimen0 \advance\hsize by -5.8pt % Space to left+right. + \advance\hsize by -2\dimen2 % Rules. + \vbox{% + \hrule height\dimen2 + \hbox{\vrule width\dimen2 \kern3pt % Space to left of text. + \vtop{\kern2.4pt \box0 \kern2.4pt}% Space above/below. + \kern3pt\vrule width\dimen2}% Space to right. + \hrule height\dimen2} + \hfil} +% +\def\error{\leavevmode\lower.7ex\copy\errorbox} + +% @pounds{} is a sterling sign, which Knuth put in the CM italic font. +% +\def\pounds{{\it\$}} + +% @euro{} comes from a separate font, depending on the current style. +% We use the free feym* fonts from the eurosym package by Henrik +% Theiling, which support regular, slanted, bold and bold slanted (and +% "outlined" (blackboard board, sort of) versions, which we don't need). +% It is available from http://www.ctan.org/tex-archive/fonts/eurosym. +% +% Although only regular is the truly official Euro symbol, we ignore +% that. The Euro is designed to be slightly taller than the regular +% font height. +% +% feymr - regular +% feymo - slanted +% feybr - bold +% feybo - bold slanted +% +% There is no good (free) typewriter version, to my knowledge. +% A feymr10 euro is ~7.3pt wide, while a normal cmtt10 char is ~5.25pt wide. +% Hmm. +% +% Also doesn't work in math. Do we need to do math with euro symbols? +% Hope not. +% +% +\def\euro{{\eurofont e}} +\def\eurofont{% + % We set the font at each command, rather than predefining it in + % \textfonts and the other font-switching commands, so that + % installations which never need the symbol don't have to have the + % font installed. + % + % There is only one designed size (nominal 10pt), so we always scale + % that to the current nominal size. + % + % By the way, simply using "at 1em" works for cmr10 and the like, but + % does not work for cmbx10 and other extended/shrunken fonts. + % + \def\eurosize{\csname\curfontsize nominalsize\endcsname}% + % + \ifx\curfontstyle\bfstylename + % bold: + \font\thiseurofont = \ifusingit{feybo10}{feybr10} at \eurosize + \else + % regular: + \font\thiseurofont = \ifusingit{feymo10}{feymr10} at \eurosize + \fi + \thiseurofont +} + +% Glyphs from the EC fonts. We don't use \let for the aliases, because +% sometimes we redefine the original macro, and the alias should reflect +% the redefinition. +% +% Use LaTeX names for the Icelandic letters. +\def\DH{{\ecfont \char"D0}} % Eth +\def\dh{{\ecfont \char"F0}} % eth +\def\TH{{\ecfont \char"DE}} % Thorn +\def\th{{\ecfont \char"FE}} % thorn +% +\def\guillemetleft{{\ecfont \char"13}} +\def\guillemotleft{\guillemetleft} +\def\guillemetright{{\ecfont \char"14}} +\def\guillemotright{\guillemetright} +\def\guilsinglleft{{\ecfont \char"0E}} +\def\guilsinglright{{\ecfont \char"0F}} +\def\quotedblbase{{\ecfont \char"12}} +\def\quotesinglbase{{\ecfont \char"0D}} +% +% This positioning is not perfect (see the ogonek LaTeX package), but +% we have the precomposed glyphs for the most common cases. We put the +% tests to use those glyphs in the single \ogonek macro so we have fewer +% dummy definitions to worry about for index entries, etc. +% +% ogonek is also used with other letters in Lithuanian (IOU), but using +% the precomposed glyphs for those is not so easy since they aren't in +% the same EC font. +\def\ogonek#1{{% + \def\temp{#1}% + \ifx\temp\macrocharA\Aogonek + \else\ifx\temp\macrochara\aogonek + \else\ifx\temp\macrocharE\Eogonek + \else\ifx\temp\macrochare\eogonek + \else + \ecfont \setbox0=\hbox{#1}% + \ifdim\ht0=1ex\accent"0C #1% + \else\ooalign{\unhbox0\crcr\hidewidth\char"0C \hidewidth}% + \fi + \fi\fi\fi\fi + }% +} +\def\Aogonek{{\ecfont \char"81}}\def\macrocharA{A} +\def\aogonek{{\ecfont \char"A1}}\def\macrochara{a} +\def\Eogonek{{\ecfont \char"86}}\def\macrocharE{E} +\def\eogonek{{\ecfont \char"A6}}\def\macrochare{e} +% +% Use the ec* fonts (cm-super in outline format) for non-CM glyphs. +\def\ecfont{% + % We can't distinguish serif/sans and italic/slanted, but this + % is used for crude hacks anyway (like adding French and German + % quotes to documents typeset with CM, where we lose kerning), so + % hopefully nobody will notice/care. + \edef\ecsize{\csname\curfontsize ecsize\endcsname}% + \edef\nominalsize{\csname\curfontsize nominalsize\endcsname}% + \ifmonospace + % typewriter: + \font\thisecfont = ectt\ecsize \space at \nominalsize + \else + \ifx\curfontstyle\bfstylename + % bold: + \font\thisecfont = ecb\ifusingit{i}{x}\ecsize \space at \nominalsize + \else + % regular: + \font\thisecfont = ec\ifusingit{ti}{rm}\ecsize \space at \nominalsize + \fi + \fi + \thisecfont +} + +% @registeredsymbol - R in a circle. The font for the R should really +% be smaller yet, but lllsize is the best we can do for now. +% Adapted from the plain.tex definition of \copyright. +% +\def\registeredsymbol{% + $^{{\ooalign{\hfil\raise.07ex\hbox{\selectfonts\lllsize R}% + \hfil\crcr\Orb}}% + }$% +} + +% @textdegree - the normal degrees sign. +% +\def\textdegree{$^\circ$} + +% Laurent Siebenmann reports \Orb undefined with: +% Textures 1.7.7 (preloaded format=plain 93.10.14) (68K) 16 APR 2004 02:38 +% so we'll define it if necessary. +% +\ifx\Orb\thisisundefined +\def\Orb{\mathhexbox20D} +\fi + +% Quotes. +\chardef\quotedblleft="5C +\chardef\quotedblright=`\" +\chardef\quoteleft=`\` +\chardef\quoteright=`\' + + +\message{page headings,} + +\newskip\titlepagetopglue \titlepagetopglue = 1.5in +\newskip\titlepagebottomglue \titlepagebottomglue = 2pc + +% First the title page. Must do @settitle before @titlepage. +\newif\ifseenauthor +\newif\iffinishedtitlepage + +% Do an implicit @contents or @shortcontents after @end titlepage if the +% user says @setcontentsaftertitlepage or @setshortcontentsaftertitlepage. +% +\newif\ifsetcontentsaftertitlepage + \let\setcontentsaftertitlepage = \setcontentsaftertitlepagetrue +\newif\ifsetshortcontentsaftertitlepage + \let\setshortcontentsaftertitlepage = \setshortcontentsaftertitlepagetrue + +\parseargdef\shorttitlepage{% + \begingroup \hbox{}\vskip 1.5in \chaprm \centerline{#1}% + \endgroup\page\hbox{}\page} + +\envdef\titlepage{% + % Open one extra group, as we want to close it in the middle of \Etitlepage. + \begingroup + \parindent=0pt \textfonts + % Leave some space at the very top of the page. + \vglue\titlepagetopglue + % No rule at page bottom unless we print one at the top with @title. + \finishedtitlepagetrue + % + % Most title ``pages'' are actually two pages long, with space + % at the top of the second. We don't want the ragged left on the second. + \let\oldpage = \page + \def\page{% + \iffinishedtitlepage\else + \finishtitlepage + \fi + \let\page = \oldpage + \page + \null + }% +} + +\def\Etitlepage{% + \iffinishedtitlepage\else + \finishtitlepage + \fi + % It is important to do the page break before ending the group, + % because the headline and footline are only empty inside the group. + % If we use the new definition of \page, we always get a blank page + % after the title page, which we certainly don't want. + \oldpage + \endgroup + % + % Need this before the \...aftertitlepage checks so that if they are + % in effect the toc pages will come out with page numbers. + \HEADINGSon + % + % If they want short, they certainly want long too. + \ifsetshortcontentsaftertitlepage + \shortcontents + \contents + \global\let\shortcontents = \relax + \global\let\contents = \relax + \fi + % + \ifsetcontentsaftertitlepage + \contents + \global\let\contents = \relax + \global\let\shortcontents = \relax + \fi +} + +\def\finishtitlepage{% + \vskip4pt \hrule height 2pt width \hsize + \vskip\titlepagebottomglue + \finishedtitlepagetrue +} + +% Settings used for typesetting titles: no hyphenation, no indentation, +% don't worry much about spacing, ragged right. This should be used +% inside a \vbox, and fonts need to be set appropriately first. Because +% it is always used for titles, nothing else, we call \rmisbold. \par +% should be specified before the end of the \vbox, since a vbox is a group. +% +\def\raggedtitlesettings{% + \rmisbold + \hyphenpenalty=10000 + \parindent=0pt + \tolerance=5000 + \ptexraggedright +} + +% Macros to be used within @titlepage: + +\let\subtitlerm=\tenrm +\def\subtitlefont{\subtitlerm \normalbaselineskip = 13pt \normalbaselines} + +\parseargdef\title{% + \checkenv\titlepage + \vbox{\titlefonts \raggedtitlesettings #1\par}% + % print a rule at the page bottom also. + \finishedtitlepagefalse + \vskip4pt \hrule height 4pt width \hsize \vskip4pt +} + +\parseargdef\subtitle{% + \checkenv\titlepage + {\subtitlefont \rightline{#1}}% +} + +% @author should come last, but may come many times. +% It can also be used inside @quotation. +% +\parseargdef\author{% + \def\temp{\quotation}% + \ifx\thisenv\temp + \def\quotationauthor{#1}% printed in \Equotation. + \else + \checkenv\titlepage + \ifseenauthor\else \vskip 0pt plus 1filll \seenauthortrue \fi + {\secfonts\rmisbold \leftline{#1}}% + \fi +} + + +% Set up page headings and footings. + +\let\thispage=\folio + +\newtoks\evenheadline % headline on even pages +\newtoks\oddheadline % headline on odd pages +\newtoks\evenfootline % footline on even pages +\newtoks\oddfootline % footline on odd pages + +% Now make TeX use those variables +\headline={{\textfonts\rm \ifodd\pageno \the\oddheadline + \else \the\evenheadline \fi}} +\footline={{\textfonts\rm \ifodd\pageno \the\oddfootline + \else \the\evenfootline \fi}\HEADINGShook} +\let\HEADINGShook=\relax + +% Commands to set those variables. +% For example, this is what @headings on does +% @evenheading @thistitle|@thispage|@thischapter +% @oddheading @thischapter|@thispage|@thistitle +% @evenfooting @thisfile|| +% @oddfooting ||@thisfile + + +\def\evenheading{\parsearg\evenheadingxxx} +\def\evenheadingxxx #1{\evenheadingyyy #1\|\|\|\|\finish} +\def\evenheadingyyy #1\|#2\|#3\|#4\finish{% +\global\evenheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} + +\def\oddheading{\parsearg\oddheadingxxx} +\def\oddheadingxxx #1{\oddheadingyyy #1\|\|\|\|\finish} +\def\oddheadingyyy #1\|#2\|#3\|#4\finish{% +\global\oddheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} + +\parseargdef\everyheading{\oddheadingxxx{#1}\evenheadingxxx{#1}}% + +\def\evenfooting{\parsearg\evenfootingxxx} +\def\evenfootingxxx #1{\evenfootingyyy #1\|\|\|\|\finish} +\def\evenfootingyyy #1\|#2\|#3\|#4\finish{% +\global\evenfootline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} + +\def\oddfooting{\parsearg\oddfootingxxx} +\def\oddfootingxxx #1{\oddfootingyyy #1\|\|\|\|\finish} +\def\oddfootingyyy #1\|#2\|#3\|#4\finish{% + \global\oddfootline = {\rlap{\centerline{#2}}\line{#1\hfil#3}}% + % + % Leave some space for the footline. Hopefully ok to assume + % @evenfooting will not be used by itself. + \global\advance\pageheight by -12pt + \global\advance\vsize by -12pt +} + +\parseargdef\everyfooting{\oddfootingxxx{#1}\evenfootingxxx{#1}} + +% @evenheadingmarks top \thischapter <- chapter at the top of a page +% @evenheadingmarks bottom \thischapter <- chapter at the bottom of a page +% +% The same set of arguments for: +% +% @oddheadingmarks +% @evenfootingmarks +% @oddfootingmarks +% @everyheadingmarks +% @everyfootingmarks + +\def\evenheadingmarks{\headingmarks{even}{heading}} +\def\oddheadingmarks{\headingmarks{odd}{heading}} +\def\evenfootingmarks{\headingmarks{even}{footing}} +\def\oddfootingmarks{\headingmarks{odd}{footing}} +\def\everyheadingmarks#1 {\headingmarks{even}{heading}{#1} + \headingmarks{odd}{heading}{#1} } +\def\everyfootingmarks#1 {\headingmarks{even}{footing}{#1} + \headingmarks{odd}{footing}{#1} } +% #1 = even/odd, #2 = heading/footing, #3 = top/bottom. +\def\headingmarks#1#2#3 {% + \expandafter\let\expandafter\temp \csname get#3headingmarks\endcsname + \global\expandafter\let\csname get#1#2marks\endcsname \temp +} + +\everyheadingmarks bottom +\everyfootingmarks bottom + +% @headings double turns headings on for double-sided printing. +% @headings single turns headings on for single-sided printing. +% @headings off turns them off. +% @headings on same as @headings double, retained for compatibility. +% @headings after turns on double-sided headings after this page. +% @headings doubleafter turns on double-sided headings after this page. +% @headings singleafter turns on single-sided headings after this page. +% By default, they are off at the start of a document, +% and turned `on' after @end titlepage. + +\def\headings #1 {\csname HEADINGS#1\endcsname} + +\def\headingsoff{% non-global headings elimination + \evenheadline={\hfil}\evenfootline={\hfil}% + \oddheadline={\hfil}\oddfootline={\hfil}% +} + +\def\HEADINGSoff{{\globaldefs=1 \headingsoff}} % global setting +\HEADINGSoff % it's the default + +% When we turn headings on, set the page number to 1. +% For double-sided printing, put current file name in lower left corner, +% chapter name on inside top of right hand pages, document +% title on inside top of left hand pages, and page numbers on outside top +% edge of all pages. +\def\HEADINGSdouble{% +\global\pageno=1 +\global\evenfootline={\hfil} +\global\oddfootline={\hfil} +\global\evenheadline={\line{\folio\hfil\thistitle}} +\global\oddheadline={\line{\thischapter\hfil\folio}} +\global\let\contentsalignmacro = \chapoddpage +} +\let\contentsalignmacro = \chappager + +% For single-sided printing, chapter title goes across top left of page, +% page number on top right. +\def\HEADINGSsingle{% +\global\pageno=1 +\global\evenfootline={\hfil} +\global\oddfootline={\hfil} +\global\evenheadline={\line{\thischapter\hfil\folio}} +\global\oddheadline={\line{\thischapter\hfil\folio}} +\global\let\contentsalignmacro = \chappager +} +\def\HEADINGSon{\HEADINGSdouble} + +\def\HEADINGSafter{\let\HEADINGShook=\HEADINGSdoublex} +\let\HEADINGSdoubleafter=\HEADINGSafter +\def\HEADINGSdoublex{% +\global\evenfootline={\hfil} +\global\oddfootline={\hfil} +\global\evenheadline={\line{\folio\hfil\thistitle}} +\global\oddheadline={\line{\thischapter\hfil\folio}} +\global\let\contentsalignmacro = \chapoddpage +} + +\def\HEADINGSsingleafter{\let\HEADINGShook=\HEADINGSsinglex} +\def\HEADINGSsinglex{% +\global\evenfootline={\hfil} +\global\oddfootline={\hfil} +\global\evenheadline={\line{\thischapter\hfil\folio}} +\global\oddheadline={\line{\thischapter\hfil\folio}} +\global\let\contentsalignmacro = \chappager +} + +% Subroutines used in generating headings +% This produces Day Month Year style of output. +% Only define if not already defined, in case a txi-??.tex file has set +% up a different format (e.g., txi-cs.tex does this). +\ifx\today\thisisundefined +\def\today{% + \number\day\space + \ifcase\month + \or\putwordMJan\or\putwordMFeb\or\putwordMMar\or\putwordMApr + \or\putwordMMay\or\putwordMJun\or\putwordMJul\or\putwordMAug + \or\putwordMSep\or\putwordMOct\or\putwordMNov\or\putwordMDec + \fi + \space\number\year} +\fi + +% @settitle line... specifies the title of the document, for headings. +% It generates no output of its own. +\def\thistitle{\putwordNoTitle} +\def\settitle{\parsearg{\gdef\thistitle}} + + +\message{tables,} +% Tables -- @table, @ftable, @vtable, @item(x). + +% default indentation of table text +\newdimen\tableindent \tableindent=.8in +% default indentation of @itemize and @enumerate text +\newdimen\itemindent \itemindent=.3in +% margin between end of table item and start of table text. +\newdimen\itemmargin \itemmargin=.1in + +% used internally for \itemindent minus \itemmargin +\newdimen\itemmax + +% Note @table, @ftable, and @vtable define @item, @itemx, etc., with +% these defs. +% They also define \itemindex +% to index the item name in whatever manner is desired (perhaps none). + +\newif\ifitemxneedsnegativevskip + +\def\itemxpar{\par\ifitemxneedsnegativevskip\nobreak\vskip-\parskip\nobreak\fi} + +\def\internalBitem{\smallbreak \parsearg\itemzzz} +\def\internalBitemx{\itemxpar \parsearg\itemzzz} + +\def\itemzzz #1{\begingroup % + \advance\hsize by -\rightskip + \advance\hsize by -\tableindent + \setbox0=\hbox{\itemindicate{#1}}% + \itemindex{#1}% + \nobreak % This prevents a break before @itemx. + % + % If the item text does not fit in the space we have, put it on a line + % by itself, and do not allow a page break either before or after that + % line. We do not start a paragraph here because then if the next + % command is, e.g., @kindex, the whatsit would get put into the + % horizontal list on a line by itself, resulting in extra blank space. + \ifdim \wd0>\itemmax + % + % Make this a paragraph so we get the \parskip glue and wrapping, + % but leave it ragged-right. + \begingroup + \advance\leftskip by-\tableindent + \advance\hsize by\tableindent + \advance\rightskip by0pt plus1fil\relax + \leavevmode\unhbox0\par + \endgroup + % + % We're going to be starting a paragraph, but we don't want the + % \parskip glue -- logically it's part of the @item we just started. + \nobreak \vskip-\parskip + % + % Stop a page break at the \parskip glue coming up. However, if + % what follows is an environment such as @example, there will be no + % \parskip glue; then the negative vskip we just inserted would + % cause the example and the item to crash together. So we use this + % bizarre value of 10001 as a signal to \aboveenvbreak to insert + % \parskip glue after all. Section titles are handled this way also. + % + \penalty 10001 + \endgroup + \itemxneedsnegativevskipfalse + \else + % The item text fits into the space. Start a paragraph, so that the + % following text (if any) will end up on the same line. + \noindent + % Do this with kerns and \unhbox so that if there is a footnote in + % the item text, it can migrate to the main vertical list and + % eventually be printed. + \nobreak\kern-\tableindent + \dimen0 = \itemmax \advance\dimen0 by \itemmargin \advance\dimen0 by -\wd0 + \unhbox0 + \nobreak\kern\dimen0 + \endgroup + \itemxneedsnegativevskiptrue + \fi +} + +\def\item{\errmessage{@item while not in a list environment}} +\def\itemx{\errmessage{@itemx while not in a list environment}} + +% @table, @ftable, @vtable. +\envdef\table{% + \let\itemindex\gobble + \tablecheck{table}% +} +\envdef\ftable{% + \def\itemindex ##1{\doind {fn}{\code{##1}}}% + \tablecheck{ftable}% +} +\envdef\vtable{% + \def\itemindex ##1{\doind {vr}{\code{##1}}}% + \tablecheck{vtable}% +} +\def\tablecheck#1{% + \ifnum \the\catcode`\^^M=\active + \endgroup + \errmessage{This command won't work in this context; perhaps the problem is + that we are \inenvironment\thisenv}% + \def\next{\doignore{#1}}% + \else + \let\next\tablex + \fi + \next +} +\def\tablex#1{% + \def\itemindicate{#1}% + \parsearg\tabley +} +\def\tabley#1{% + {% + \makevalueexpandable + \edef\temp{\noexpand\tablez #1\space\space\space}% + \expandafter + }\temp \endtablez +} +\def\tablez #1 #2 #3 #4\endtablez{% + \aboveenvbreak + \ifnum 0#1>0 \advance \leftskip by #1\mil \fi + \ifnum 0#2>0 \tableindent=#2\mil \fi + \ifnum 0#3>0 \advance \rightskip by #3\mil \fi + \itemmax=\tableindent + \advance \itemmax by -\itemmargin + \advance \leftskip by \tableindent + \exdentamount=\tableindent + \parindent = 0pt + \parskip = \smallskipamount + \ifdim \parskip=0pt \parskip=2pt \fi + \let\item = \internalBitem + \let\itemx = \internalBitemx +} +\def\Etable{\endgraf\afterenvbreak} +\let\Eftable\Etable +\let\Evtable\Etable +\let\Eitemize\Etable +\let\Eenumerate\Etable + +% This is the counter used by @enumerate, which is really @itemize + +\newcount \itemno + +\envdef\itemize{\parsearg\doitemize} + +\def\doitemize#1{% + \aboveenvbreak + \itemmax=\itemindent + \advance\itemmax by -\itemmargin + \advance\leftskip by \itemindent + \exdentamount=\itemindent + \parindent=0pt + \parskip=\smallskipamount + \ifdim\parskip=0pt \parskip=2pt \fi + % + % Try typesetting the item mark that if the document erroneously says + % something like @itemize @samp (intending @table), there's an error + % right away at the @itemize. It's not the best error message in the + % world, but it's better than leaving it to the @item. This means if + % the user wants an empty mark, they have to say @w{} not just @w. + \def\itemcontents{#1}% + \setbox0 = \hbox{\itemcontents}% + % + % @itemize with no arg is equivalent to @itemize @bullet. + \ifx\itemcontents\empty\def\itemcontents{\bullet}\fi + % + \let\item=\itemizeitem +} + +% Definition of @item while inside @itemize and @enumerate. +% +\def\itemizeitem{% + \advance\itemno by 1 % for enumerations + {\let\par=\endgraf \smallbreak}% reasonable place to break + {% + % If the document has an @itemize directly after a section title, a + % \nobreak will be last on the list, and \sectionheading will have + % done a \vskip-\parskip. In that case, we don't want to zero + % parskip, or the item text will crash with the heading. On the + % other hand, when there is normal text preceding the item (as there + % usually is), we do want to zero parskip, or there would be too much + % space. In that case, we won't have a \nobreak before. At least + % that's the theory. + \ifnum\lastpenalty<10000 \parskip=0in \fi + \noindent + \hbox to 0pt{\hss \itemcontents \kern\itemmargin}% + % + \vadjust{\penalty 1200}}% not good to break after first line of item. + \flushcr +} + +% \splitoff TOKENS\endmark defines \first to be the first token in +% TOKENS, and \rest to be the remainder. +% +\def\splitoff#1#2\endmark{\def\first{#1}\def\rest{#2}}% + +% Allow an optional argument of an uppercase letter, lowercase letter, +% or number, to specify the first label in the enumerated list. No +% argument is the same as `1'. +% +\envparseargdef\enumerate{\enumeratey #1 \endenumeratey} +\def\enumeratey #1 #2\endenumeratey{% + % If we were given no argument, pretend we were given `1'. + \def\thearg{#1}% + \ifx\thearg\empty \def\thearg{1}\fi + % + % Detect if the argument is a single token. If so, it might be a + % letter. Otherwise, the only valid thing it can be is a number. + % (We will always have one token, because of the test we just made. + % This is a good thing, since \splitoff doesn't work given nothing at + % all -- the first parameter is undelimited.) + \expandafter\splitoff\thearg\endmark + \ifx\rest\empty + % Only one token in the argument. It could still be anything. + % A ``lowercase letter'' is one whose \lccode is nonzero. + % An ``uppercase letter'' is one whose \lccode is both nonzero, and + % not equal to itself. + % Otherwise, we assume it's a number. + % + % We need the \relax at the end of the \ifnum lines to stop TeX from + % continuing to look for a . + % + \ifnum\lccode\expandafter`\thearg=0\relax + \numericenumerate % a number (we hope) + \else + % It's a letter. + \ifnum\lccode\expandafter`\thearg=\expandafter`\thearg\relax + \lowercaseenumerate % lowercase letter + \else + \uppercaseenumerate % uppercase letter + \fi + \fi + \else + % Multiple tokens in the argument. We hope it's a number. + \numericenumerate + \fi +} + +% An @enumerate whose labels are integers. The starting integer is +% given in \thearg. +% +\def\numericenumerate{% + \itemno = \thearg + \startenumeration{\the\itemno}% +} + +% The starting (lowercase) letter is in \thearg. +\def\lowercaseenumerate{% + \itemno = \expandafter`\thearg + \startenumeration{% + % Be sure we're not beyond the end of the alphabet. + \ifnum\itemno=0 + \errmessage{No more lowercase letters in @enumerate; get a bigger + alphabet}% + \fi + \char\lccode\itemno + }% +} + +% The starting (uppercase) letter is in \thearg. +\def\uppercaseenumerate{% + \itemno = \expandafter`\thearg + \startenumeration{% + % Be sure we're not beyond the end of the alphabet. + \ifnum\itemno=0 + \errmessage{No more uppercase letters in @enumerate; get a bigger + alphabet} + \fi + \char\uccode\itemno + }% +} + +% Call \doitemize, adding a period to the first argument and supplying the +% common last two arguments. Also subtract one from the initial value in +% \itemno, since @item increments \itemno. +% +\def\startenumeration#1{% + \advance\itemno by -1 + \doitemize{#1.}\flushcr +} + +% @alphaenumerate and @capsenumerate are abbreviations for giving an arg +% to @enumerate. +% +\def\alphaenumerate{\enumerate{a}} +\def\capsenumerate{\enumerate{A}} +\def\Ealphaenumerate{\Eenumerate} +\def\Ecapsenumerate{\Eenumerate} + + +% @multitable macros +% Amy Hendrickson, 8/18/94, 3/6/96 +% +% @multitable ... @end multitable will make as many columns as desired. +% Contents of each column will wrap at width given in preamble. Width +% can be specified either with sample text given in a template line, +% or in percent of \hsize, the current width of text on page. + +% Table can continue over pages but will only break between lines. + +% To make preamble: +% +% Either define widths of columns in terms of percent of \hsize: +% @multitable @columnfractions .25 .3 .45 +% @item ... +% +% Numbers following @columnfractions are the percent of the total +% current hsize to be used for each column. You may use as many +% columns as desired. + + +% Or use a template: +% @multitable {Column 1 template} {Column 2 template} {Column 3 template} +% @item ... +% using the widest term desired in each column. + +% Each new table line starts with @item, each subsequent new column +% starts with @tab. Empty columns may be produced by supplying @tab's +% with nothing between them for as many times as empty columns are needed, +% ie, @tab@tab@tab will produce two empty columns. + +% @item, @tab do not need to be on their own lines, but it will not hurt +% if they are. + +% Sample multitable: + +% @multitable {Column 1 template} {Column 2 template} {Column 3 template} +% @item first col stuff @tab second col stuff @tab third col +% @item +% first col stuff +% @tab +% second col stuff +% @tab +% third col +% @item first col stuff @tab second col stuff +% @tab Many paragraphs of text may be used in any column. +% +% They will wrap at the width determined by the template. +% @item@tab@tab This will be in third column. +% @end multitable + +% Default dimensions may be reset by user. +% @multitableparskip is vertical space between paragraphs in table. +% @multitableparindent is paragraph indent in table. +% @multitablecolmargin is horizontal space to be left between columns. +% @multitablelinespace is space to leave between table items, baseline +% to baseline. +% 0pt means it depends on current normal line spacing. +% +\newskip\multitableparskip +\newskip\multitableparindent +\newdimen\multitablecolspace +\newskip\multitablelinespace +\multitableparskip=0pt +\multitableparindent=6pt +\multitablecolspace=12pt +\multitablelinespace=0pt + +% Macros used to set up halign preamble: +% +\let\endsetuptable\relax +\def\xendsetuptable{\endsetuptable} +\let\columnfractions\relax +\def\xcolumnfractions{\columnfractions} +\newif\ifsetpercent + +% #1 is the @columnfraction, usually a decimal number like .5, but might +% be just 1. We just use it, whatever it is. +% +\def\pickupwholefraction#1 {% + \global\advance\colcount by 1 + \expandafter\xdef\csname col\the\colcount\endcsname{#1\hsize}% + \setuptable +} + +\newcount\colcount +\def\setuptable#1{% + \def\firstarg{#1}% + \ifx\firstarg\xendsetuptable + \let\go = \relax + \else + \ifx\firstarg\xcolumnfractions + \global\setpercenttrue + \else + \ifsetpercent + \let\go\pickupwholefraction + \else + \global\advance\colcount by 1 + \setbox0=\hbox{#1\unskip\space}% Add a normal word space as a + % separator; typically that is always in the input, anyway. + \expandafter\xdef\csname col\the\colcount\endcsname{\the\wd0}% + \fi + \fi + \ifx\go\pickupwholefraction + % Put the argument back for the \pickupwholefraction call, so + % we'll always have a period there to be parsed. + \def\go{\pickupwholefraction#1}% + \else + \let\go = \setuptable + \fi% + \fi + \go +} + +% multitable-only commands. +% +% @headitem starts a heading row, which we typeset in bold. +% Assignments have to be global since we are inside the implicit group +% of an alignment entry. \everycr resets \everytab so we don't have to +% undo it ourselves. +\def\headitemfont{\b}% for people to use in the template row; not changeable +\def\headitem{% + \checkenv\multitable + \crcr + \global\everytab={\bf}% can't use \headitemfont since the parsing differs + \the\everytab % for the first item +}% +% +% A \tab used to include \hskip1sp. But then the space in a template +% line is not enough. That is bad. So let's go back to just `&' until +% we again encounter the problem the 1sp was intended to solve. +% --karl, nathan@acm.org, 20apr99. +\def\tab{\checkenv\multitable &\the\everytab}% + +% @multitable ... @end multitable definitions: +% +\newtoks\everytab % insert after every tab. +% +\envdef\multitable{% + \vskip\parskip + \startsavinginserts + % + % @item within a multitable starts a normal row. + % We use \def instead of \let so that if one of the multitable entries + % contains an @itemize, we don't choke on the \item (seen as \crcr aka + % \endtemplate) expanding \doitemize. + \def\item{\crcr}% + % + \tolerance=9500 + \hbadness=9500 + \setmultitablespacing + \parskip=\multitableparskip + \parindent=\multitableparindent + \overfullrule=0pt + \global\colcount=0 + % + \everycr = {% + \noalign{% + \global\everytab={}% + \global\colcount=0 % Reset the column counter. + % Check for saved footnotes, etc. + \checkinserts + % Keeps underfull box messages off when table breaks over pages. + %\filbreak + % Maybe so, but it also creates really weird page breaks when the + % table breaks over pages. Wouldn't \vfil be better? Wait until the + % problem manifests itself, so it can be fixed for real --karl. + }% + }% + % + \parsearg\domultitable +} +\def\domultitable#1{% + % To parse everything between @multitable and @item: + \setuptable#1 \endsetuptable + % + % This preamble sets up a generic column definition, which will + % be used as many times as user calls for columns. + % \vtop will set a single line and will also let text wrap and + % continue for many paragraphs if desired. + \halign\bgroup &% + \global\advance\colcount by 1 + \multistrut + \vtop{% + % Use the current \colcount to find the correct column width: + \hsize=\expandafter\csname col\the\colcount\endcsname + % + % In order to keep entries from bumping into each other + % we will add a \leftskip of \multitablecolspace to all columns after + % the first one. + % + % If a template has been used, we will add \multitablecolspace + % to the width of each template entry. + % + % If the user has set preamble in terms of percent of \hsize we will + % use that dimension as the width of the column, and the \leftskip + % will keep entries from bumping into each other. Table will start at + % left margin and final column will justify at right margin. + % + % Make sure we don't inherit \rightskip from the outer environment. + \rightskip=0pt + \ifnum\colcount=1 + % The first column will be indented with the surrounding text. + \advance\hsize by\leftskip + \else + \ifsetpercent \else + % If user has not set preamble in terms of percent of \hsize + % we will advance \hsize by \multitablecolspace. + \advance\hsize by \multitablecolspace + \fi + % In either case we will make \leftskip=\multitablecolspace: + \leftskip=\multitablecolspace + \fi + % Ignoring space at the beginning and end avoids an occasional spurious + % blank line, when TeX decides to break the line at the space before the + % box from the multistrut, so the strut ends up on a line by itself. + % For example: + % @multitable @columnfractions .11 .89 + % @item @code{#} + % @tab Legal holiday which is valid in major parts of the whole country. + % Is automatically provided with highlighting sequences respectively + % marking characters. + \noindent\ignorespaces##\unskip\multistrut + }\cr +} +\def\Emultitable{% + \crcr + \egroup % end the \halign + \global\setpercentfalse +} + +\def\setmultitablespacing{% + \def\multistrut{\strut}% just use the standard line spacing + % + % Compute \multitablelinespace (if not defined by user) for use in + % \multitableparskip calculation. We used define \multistrut based on + % this, but (ironically) that caused the spacing to be off. + % See bug-texinfo report from Werner Lemberg, 31 Oct 2004 12:52:20 +0100. +\ifdim\multitablelinespace=0pt +\setbox0=\vbox{X}\global\multitablelinespace=\the\baselineskip +\global\advance\multitablelinespace by-\ht0 +\fi +% Test to see if parskip is larger than space between lines of +% table. If not, do nothing. +% If so, set to same dimension as multitablelinespace. +\ifdim\multitableparskip>\multitablelinespace +\global\multitableparskip=\multitablelinespace +\global\advance\multitableparskip-7pt % to keep parskip somewhat smaller + % than skip between lines in the table. +\fi% +\ifdim\multitableparskip=0pt +\global\multitableparskip=\multitablelinespace +\global\advance\multitableparskip-7pt % to keep parskip somewhat smaller + % than skip between lines in the table. +\fi} + + +\message{conditionals,} + +% @iftex, @ifnotdocbook, @ifnothtml, @ifnotinfo, @ifnotplaintext, +% @ifnotxml always succeed. They currently do nothing; we don't +% attempt to check whether the conditionals are properly nested. But we +% have to remember that they are conditionals, so that @end doesn't +% attempt to close an environment group. +% +\def\makecond#1{% + \expandafter\let\csname #1\endcsname = \relax + \expandafter\let\csname iscond.#1\endcsname = 1 +} +\makecond{iftex} +\makecond{ifnotdocbook} +\makecond{ifnothtml} +\makecond{ifnotinfo} +\makecond{ifnotplaintext} +\makecond{ifnotxml} + +% Ignore @ignore, @ifhtml, @ifinfo, and the like. +% +\def\direntry{\doignore{direntry}} +\def\documentdescription{\doignore{documentdescription}} +\def\docbook{\doignore{docbook}} +\def\html{\doignore{html}} +\def\ifdocbook{\doignore{ifdocbook}} +\def\ifhtml{\doignore{ifhtml}} +\def\ifinfo{\doignore{ifinfo}} +\def\ifnottex{\doignore{ifnottex}} +\def\ifplaintext{\doignore{ifplaintext}} +\def\ifxml{\doignore{ifxml}} +\def\ignore{\doignore{ignore}} +\def\menu{\doignore{menu}} +\def\xml{\doignore{xml}} + +% Ignore text until a line `@end #1', keeping track of nested conditionals. +% +% A count to remember the depth of nesting. +\newcount\doignorecount + +\def\doignore#1{\begingroup + % Scan in ``verbatim'' mode: + \obeylines + \catcode`\@ = \other + \catcode`\{ = \other + \catcode`\} = \other + % + % Make sure that spaces turn into tokens that match what \doignoretext wants. + \spaceisspace + % + % Count number of #1's that we've seen. + \doignorecount = 0 + % + % Swallow text until we reach the matching `@end #1'. + \dodoignore{#1}% +} + +{ \catcode`_=11 % We want to use \_STOP_ which cannot appear in texinfo source. + \obeylines % + % + \gdef\dodoignore#1{% + % #1 contains the command name as a string, e.g., `ifinfo'. + % + % Define a command to find the next `@end #1'. + \long\def\doignoretext##1^^M@end #1{% + \doignoretextyyy##1^^M@#1\_STOP_}% + % + % And this command to find another #1 command, at the beginning of a + % line. (Otherwise, we would consider a line `@c @ifset', for + % example, to count as an @ifset for nesting.) + \long\def\doignoretextyyy##1^^M@#1##2\_STOP_{\doignoreyyy{##2}\_STOP_}% + % + % And now expand that command. + \doignoretext ^^M% + }% +} + +\def\doignoreyyy#1{% + \def\temp{#1}% + \ifx\temp\empty % Nothing found. + \let\next\doignoretextzzz + \else % Found a nested condition, ... + \advance\doignorecount by 1 + \let\next\doignoretextyyy % ..., look for another. + % If we're here, #1 ends with ^^M\ifinfo (for example). + \fi + \next #1% the token \_STOP_ is present just after this macro. +} + +% We have to swallow the remaining "\_STOP_". +% +\def\doignoretextzzz#1{% + \ifnum\doignorecount = 0 % We have just found the outermost @end. + \let\next\enddoignore + \else % Still inside a nested condition. + \advance\doignorecount by -1 + \let\next\doignoretext % Look for the next @end. + \fi + \next +} + +% Finish off ignored text. +{ \obeylines% + % Ignore anything after the last `@end #1'; this matters in verbatim + % environments, where otherwise the newline after an ignored conditional + % would result in a blank line in the output. + \gdef\enddoignore#1^^M{\endgroup\ignorespaces}% +} + + +% @set VAR sets the variable VAR to an empty value. +% @set VAR REST-OF-LINE sets VAR to the value REST-OF-LINE. +% +% Since we want to separate VAR from REST-OF-LINE (which might be +% empty), we can't just use \parsearg; we have to insert a space of our +% own to delimit the rest of the line, and then take it out again if we +% didn't need it. +% We rely on the fact that \parsearg sets \catcode`\ =10. +% +\parseargdef\set{\setyyy#1 \endsetyyy} +\def\setyyy#1 #2\endsetyyy{% + {% + \makevalueexpandable + \def\temp{#2}% + \edef\next{\gdef\makecsname{SET#1}}% + \ifx\temp\empty + \next{}% + \else + \setzzz#2\endsetzzz + \fi + }% +} +% Remove the trailing space \setxxx inserted. +\def\setzzz#1 \endsetzzz{\next{#1}} + +% @clear VAR clears (i.e., unsets) the variable VAR. +% +\parseargdef\clear{% + {% + \makevalueexpandable + \global\expandafter\let\csname SET#1\endcsname=\relax + }% +} + +% @value{foo} gets the text saved in variable foo. +\def\value{\begingroup\makevalueexpandable\valuexxx} +\def\valuexxx#1{\expandablevalue{#1}\endgroup} +{ + \catcode`\- = \active \catcode`\_ = \active + % + \gdef\makevalueexpandable{% + \let\value = \expandablevalue + % We don't want these characters active, ... + \catcode`\-=\other \catcode`\_=\other + % ..., but we might end up with active ones in the argument if + % we're called from @code, as @code{@value{foo-bar_}}, though. + % So \let them to their normal equivalents. + \let-\normaldash \let_\normalunderscore + } +} + +% We have this subroutine so that we can handle at least some @value's +% properly in indexes (we call \makevalueexpandable in \indexdummies). +% The command has to be fully expandable (if the variable is set), since +% the result winds up in the index file. This means that if the +% variable's value contains other Texinfo commands, it's almost certain +% it will fail (although perhaps we could fix that with sufficient work +% to do a one-level expansion on the result, instead of complete). +% +\def\expandablevalue#1{% + \expandafter\ifx\csname SET#1\endcsname\relax + {[No value for ``#1'']}% + \message{Variable `#1', used in @value, is not set.}% + \else + \csname SET#1\endcsname + \fi +} + +% @ifset VAR ... @end ifset reads the `...' iff VAR has been defined +% with @set. +% +% To get special treatment of `@end ifset,' call \makeond and the redefine. +% +\makecond{ifset} +\def\ifset{\parsearg{\doifset{\let\next=\ifsetfail}}} +\def\doifset#1#2{% + {% + \makevalueexpandable + \let\next=\empty + \expandafter\ifx\csname SET#2\endcsname\relax + #1% If not set, redefine \next. + \fi + \expandafter + }\next +} +\def\ifsetfail{\doignore{ifset}} + +% @ifclear VAR ... @end executes the `...' iff VAR has never been +% defined with @set, or has been undefined with @clear. +% +% The `\else' inside the `\doifset' parameter is a trick to reuse the +% above code: if the variable is not set, do nothing, if it is set, +% then redefine \next to \ifclearfail. +% +\makecond{ifclear} +\def\ifclear{\parsearg{\doifset{\else \let\next=\ifclearfail}}} +\def\ifclearfail{\doignore{ifclear}} + +% @ifcommandisdefined CMD ... @end executes the `...' if CMD (written +% without the @) is in fact defined. We can only feasibly check at the +% TeX level, so something like `mathcode' is going to considered +% defined even though it is not a Texinfo command. +% +\makecond{ifcommanddefined} +\def\ifcommanddefined{\parsearg{\doifcmddefined{\let\next=\ifcmddefinedfail}}} +% +\def\doifcmddefined#1#2{{% + \makevalueexpandable + \let\next=\empty + \expandafter\ifx\csname #2\endcsname\relax + #1% If not defined, \let\next as above. + \fi + \expandafter + }\next +} +\def\ifcmddefinedfail{\doignore{ifcommanddefined}} + +% @ifcommandnotdefined CMD ... handled similar to @ifclear above. +\makecond{ifcommandnotdefined} +\def\ifcommandnotdefined{% + \parsearg{\doifcmddefined{\else \let\next=\ifcmdnotdefinedfail}}} +\def\ifcmdnotdefinedfail{\doignore{ifcommandnotdefined}} + +% Set the `txicommandconditionals' variable, so documents have a way to +% test if the @ifcommand...defined conditionals are available. +\set txicommandconditionals + +% @dircategory CATEGORY -- specify a category of the dir file +% which this file should belong to. Ignore this in TeX. +\let\dircategory=\comment + +% @defininfoenclose. +\let\definfoenclose=\comment + + +\message{indexing,} +% Index generation facilities + +% Define \newwrite to be identical to plain tex's \newwrite +% except not \outer, so it can be used within macros and \if's. +\edef\newwrite{\makecsname{ptexnewwrite}} + +% \newindex {foo} defines an index named foo. +% It automatically defines \fooindex such that +% \fooindex ...rest of line... puts an entry in the index foo. +% It also defines \fooindfile to be the number of the output channel for +% the file that accumulates this index. The file's extension is foo. +% The name of an index should be no more than 2 characters long +% for the sake of vms. +% +\def\newindex#1{% + \iflinks + \expandafter\newwrite \csname#1indfile\endcsname + \openout \csname#1indfile\endcsname \jobname.#1 % Open the file + \fi + \expandafter\xdef\csname#1index\endcsname{% % Define @#1index + \noexpand\doindex{#1}} +} + +% @defindex foo == \newindex{foo} +% +\def\defindex{\parsearg\newindex} + +% Define @defcodeindex, like @defindex except put all entries in @code. +% +\def\defcodeindex{\parsearg\newcodeindex} +% +\def\newcodeindex#1{% + \iflinks + \expandafter\newwrite \csname#1indfile\endcsname + \openout \csname#1indfile\endcsname \jobname.#1 + \fi + \expandafter\xdef\csname#1index\endcsname{% + \noexpand\docodeindex{#1}}% +} + + +% @synindex foo bar makes index foo feed into index bar. +% Do this instead of @defindex foo if you don't want it as a separate index. +% +% @syncodeindex foo bar similar, but put all entries made for index foo +% inside @code. +% +\def\synindex#1 #2 {\dosynindex\doindex{#1}{#2}} +\def\syncodeindex#1 #2 {\dosynindex\docodeindex{#1}{#2}} + +% #1 is \doindex or \docodeindex, #2 the index getting redefined (foo), +% #3 the target index (bar). +\def\dosynindex#1#2#3{% + % Only do \closeout if we haven't already done it, else we'll end up + % closing the target index. + \expandafter \ifx\csname donesynindex#2\endcsname \relax + % The \closeout helps reduce unnecessary open files; the limit on the + % Acorn RISC OS is a mere 16 files. + \expandafter\closeout\csname#2indfile\endcsname + \expandafter\let\csname donesynindex#2\endcsname = 1 + \fi + % redefine \fooindfile: + \expandafter\let\expandafter\temp\expandafter=\csname#3indfile\endcsname + \expandafter\let\csname#2indfile\endcsname=\temp + % redefine \fooindex: + \expandafter\xdef\csname#2index\endcsname{\noexpand#1{#3}}% +} + +% Define \doindex, the driver for all \fooindex macros. +% Argument #1 is generated by the calling \fooindex macro, +% and it is "foo", the name of the index. + +% \doindex just uses \parsearg; it calls \doind for the actual work. +% This is because \doind is more useful to call from other macros. + +% There is also \dosubind {index}{topic}{subtopic} +% which makes an entry in a two-level index such as the operation index. + +\def\doindex#1{\edef\indexname{#1}\parsearg\singleindexer} +\def\singleindexer #1{\doind{\indexname}{#1}} + +% like the previous two, but they put @code around the argument. +\def\docodeindex#1{\edef\indexname{#1}\parsearg\singlecodeindexer} +\def\singlecodeindexer #1{\doind{\indexname}{\code{#1}}} + +% Take care of Texinfo commands that can appear in an index entry. +% Since there are some commands we want to expand, and others we don't, +% we have to laboriously prevent expansion for those that we don't. +% +\def\indexdummies{% + \escapechar = `\\ % use backslash in output files. + \def\@{@}% change to @@ when we switch to @ as escape char in index files. + \def\ {\realbackslash\space }% + % + % Need these unexpandable (because we define \tt as a dummy) + % definitions when @{ or @} appear in index entry text. Also, more + % complicated, when \tex is in effect and \{ is a \delimiter again. + % We can't use \lbracecmd and \rbracecmd because texindex assumes + % braces and backslashes are used only as delimiters. Perhaps we + % should define @lbrace and @rbrace commands a la @comma. + \def\{{{\tt\char123}}% + \def\}{{\tt\char125}}% + % + % I don't entirely understand this, but when an index entry is + % generated from a macro call, the \endinput which \scanmacro inserts + % causes processing to be prematurely terminated. This is, + % apparently, because \indexsorttmp is fully expanded, and \endinput + % is an expandable command. The redefinition below makes \endinput + % disappear altogether for that purpose -- although logging shows that + % processing continues to some further point. On the other hand, it + % seems \endinput does not hurt in the printed index arg, since that + % is still getting written without apparent harm. + % + % Sample source (mac-idx3.tex, reported by Graham Percival to + % help-texinfo, 22may06): + % @macro funindex {WORD} + % @findex xyz + % @end macro + % ... + % @funindex commtest + % + % The above is not enough to reproduce the bug, but it gives the flavor. + % + % Sample whatsit resulting: + % .@write3{\entry{xyz}{@folio }{@code {xyz@endinput }}} + % + % So: + \let\endinput = \empty + % + % Do the redefinitions. + \commondummies +} + +% For the aux and toc files, @ is the escape character. So we want to +% redefine everything using @ as the escape character (instead of +% \realbackslash, still used for index files). When everything uses @, +% this will be simpler. +% +\def\atdummies{% + \def\@{@@}% + \def\ {@ }% + \let\{ = \lbraceatcmd + \let\} = \rbraceatcmd + % + % Do the redefinitions. + \commondummies + \otherbackslash +} + +% Called from \indexdummies and \atdummies. +% +\def\commondummies{% + % + % \definedummyword defines \#1 as \string\#1\space, thus effectively + % preventing its expansion. This is used only for control words, + % not control letters, because the \space would be incorrect for + % control characters, but is needed to separate the control word + % from whatever follows. + % + % For control letters, we have \definedummyletter, which omits the + % space. + % + % These can be used both for control words that take an argument and + % those that do not. If it is followed by {arg} in the input, then + % that will dutifully get written to the index (or wherever). + % + \def\definedummyword ##1{\def##1{\string##1\space}}% + \def\definedummyletter##1{\def##1{\string##1}}% + \let\definedummyaccent\definedummyletter + % + \commondummiesnofonts + % + \definedummyletter\_% + \definedummyletter\-% + % + % Non-English letters. + \definedummyword\AA + \definedummyword\AE + \definedummyword\DH + \definedummyword\L + \definedummyword\O + \definedummyword\OE + \definedummyword\TH + \definedummyword\aa + \definedummyword\ae + \definedummyword\dh + \definedummyword\exclamdown + \definedummyword\l + \definedummyword\o + \definedummyword\oe + \definedummyword\ordf + \definedummyword\ordm + \definedummyword\questiondown + \definedummyword\ss + \definedummyword\th + % + % Although these internal commands shouldn't show up, sometimes they do. + \definedummyword\bf + \definedummyword\gtr + \definedummyword\hat + \definedummyword\less + \definedummyword\sf + \definedummyword\sl + \definedummyword\tclose + \definedummyword\tt + % + \definedummyword\LaTeX + \definedummyword\TeX + % + % Assorted special characters. + \definedummyword\arrow + \definedummyword\bullet + \definedummyword\comma + \definedummyword\copyright + \definedummyword\registeredsymbol + \definedummyword\dots + \definedummyword\enddots + \definedummyword\entrybreak + \definedummyword\equiv + \definedummyword\error + \definedummyword\euro + \definedummyword\expansion + \definedummyword\geq + \definedummyword\guillemetleft + \definedummyword\guillemetright + \definedummyword\guilsinglleft + \definedummyword\guilsinglright + \definedummyword\lbracechar + \definedummyword\leq + \definedummyword\minus + \definedummyword\ogonek + \definedummyword\pounds + \definedummyword\point + \definedummyword\print + \definedummyword\quotedblbase + \definedummyword\quotedblleft + \definedummyword\quotedblright + \definedummyword\quoteleft + \definedummyword\quoteright + \definedummyword\quotesinglbase + \definedummyword\rbracechar + \definedummyword\result + \definedummyword\textdegree + % + % We want to disable all macros so that they are not expanded by \write. + \macrolist + % + \normalturnoffactive + % + % Handle some cases of @value -- where it does not contain any + % (non-fully-expandable) commands. + \makevalueexpandable +} + +% \commondummiesnofonts: common to \commondummies and \indexnofonts. +% +\def\commondummiesnofonts{% + % Control letters and accents. + \definedummyletter\!% + \definedummyaccent\"% + \definedummyaccent\'% + \definedummyletter\*% + \definedummyaccent\,% + \definedummyletter\.% + \definedummyletter\/% + \definedummyletter\:% + \definedummyaccent\=% + \definedummyletter\?% + \definedummyaccent\^% + \definedummyaccent\`% + \definedummyaccent\~% + \definedummyword\u + \definedummyword\v + \definedummyword\H + \definedummyword\dotaccent + \definedummyword\ogonek + \definedummyword\ringaccent + \definedummyword\tieaccent + \definedummyword\ubaraccent + \definedummyword\udotaccent + \definedummyword\dotless + % + % Texinfo font commands. + \definedummyword\b + \definedummyword\i + \definedummyword\r + \definedummyword\sansserif + \definedummyword\sc + \definedummyword\slanted + \definedummyword\t + % + % Commands that take arguments. + \definedummyword\abbr + \definedummyword\acronym + \definedummyword\anchor + \definedummyword\cite + \definedummyword\code + \definedummyword\command + \definedummyword\dfn + \definedummyword\dmn + \definedummyword\email + \definedummyword\emph + \definedummyword\env + \definedummyword\file + \definedummyword\image + \definedummyword\indicateurl + \definedummyword\inforef + \definedummyword\kbd + \definedummyword\key + \definedummyword\math + \definedummyword\option + \definedummyword\pxref + \definedummyword\ref + \definedummyword\samp + \definedummyword\strong + \definedummyword\tie + \definedummyword\uref + \definedummyword\url + \definedummyword\var + \definedummyword\verb + \definedummyword\w + \definedummyword\xref +} + +% \indexnofonts is used when outputting the strings to sort the index +% by, and when constructing control sequence names. It eliminates all +% control sequences and just writes whatever the best ASCII sort string +% would be for a given command (usually its argument). +% +\def\indexnofonts{% + % Accent commands should become @asis. + \def\definedummyaccent##1{\let##1\asis}% + % We can just ignore other control letters. + \def\definedummyletter##1{\let##1\empty}% + % All control words become @asis by default; overrides below. + \let\definedummyword\definedummyaccent + % + \commondummiesnofonts + % + % Don't no-op \tt, since it isn't a user-level command + % and is used in the definitions of the active chars like <, >, |, etc. + % Likewise with the other plain tex font commands. + %\let\tt=\asis + % + \def\ { }% + \def\@{@}% + \def\_{\normalunderscore}% + \def\-{}% @- shouldn't affect sorting + % + % Unfortunately, texindex is not prepared to handle braces in the + % content at all. So for index sorting, we map @{ and @} to strings + % starting with |, since that ASCII character is between ASCII { and }. + \def\{{|a}% + \def\lbracechar{|a}% + % + \def\}{|b}% + \def\rbracechar{|b}% + % + % Non-English letters. + \def\AA{AA}% + \def\AE{AE}% + \def\DH{DZZ}% + \def\L{L}% + \def\OE{OE}% + \def\O{O}% + \def\TH{ZZZ}% + \def\aa{aa}% + \def\ae{ae}% + \def\dh{dzz}% + \def\exclamdown{!}% + \def\l{l}% + \def\oe{oe}% + \def\ordf{a}% + \def\ordm{o}% + \def\o{o}% + \def\questiondown{?}% + \def\ss{ss}% + \def\th{zzz}% + % + \def\LaTeX{LaTeX}% + \def\TeX{TeX}% + % + % Assorted special characters. + % (The following {} will end up in the sort string, but that's ok.) + \def\arrow{->}% + \def\bullet{bullet}% + \def\comma{,}% + \def\copyright{copyright}% + \def\dots{...}% + \def\enddots{...}% + \def\equiv{==}% + \def\error{error}% + \def\euro{euro}% + \def\expansion{==>}% + \def\geq{>=}% + \def\guillemetleft{<<}% + \def\guillemetright{>>}% + \def\guilsinglleft{<}% + \def\guilsinglright{>}% + \def\leq{<=}% + \def\minus{-}% + \def\point{.}% + \def\pounds{pounds}% + \def\print{-|}% + \def\quotedblbase{"}% + \def\quotedblleft{"}% + \def\quotedblright{"}% + \def\quoteleft{`}% + \def\quoteright{'}% + \def\quotesinglbase{,}% + \def\registeredsymbol{R}% + \def\result{=>}% + \def\textdegree{o}% + % + \expandafter\ifx\csname SETtxiindexlquoteignore\endcsname\relax + \else \indexlquoteignore \fi + % + % We need to get rid of all macros, leaving only the arguments (if present). + % Of course this is not nearly correct, but it is the best we can do for now. + % makeinfo does not expand macros in the argument to @deffn, which ends up + % writing an index entry, and texindex isn't prepared for an index sort entry + % that starts with \. + % + % Since macro invocations are followed by braces, we can just redefine them + % to take a single TeX argument. The case of a macro invocation that + % goes to end-of-line is not handled. + % + \macrolist +} + +% Undocumented (for FSFS 2nd ed.): @set txiindexlquoteignore makes us +% ignore left quotes in the sort term. +{\catcode`\`=\active + \gdef\indexlquoteignore{\let`=\empty}} + +\let\indexbackslash=0 %overridden during \printindex. +\let\SETmarginindex=\relax % put index entries in margin (undocumented)? + +% Most index entries go through here, but \dosubind is the general case. +% #1 is the index name, #2 is the entry text. +\def\doind#1#2{\dosubind{#1}{#2}{}} + +% Workhorse for all \fooindexes. +% #1 is name of index, #2 is stuff to put there, #3 is subentry -- +% empty if called from \doind, as we usually are (the main exception +% is with most defuns, which call us directly). +% +\def\dosubind#1#2#3{% + \iflinks + {% + % Store the main index entry text (including the third arg). + \toks0 = {#2}% + % If third arg is present, precede it with a space. + \def\thirdarg{#3}% + \ifx\thirdarg\empty \else + \toks0 = \expandafter{\the\toks0 \space #3}% + \fi + % + \edef\writeto{\csname#1indfile\endcsname}% + % + \safewhatsit\dosubindwrite + }% + \fi +} + +% Write the entry in \toks0 to the index file: +% +\def\dosubindwrite{% + % Put the index entry in the margin if desired. + \ifx\SETmarginindex\relax\else + \insert\margin{\hbox{\vrule height8pt depth3pt width0pt \the\toks0}}% + \fi + % + % Remember, we are within a group. + \indexdummies % Must do this here, since \bf, etc expand at this stage + \def\backslashcurfont{\indexbackslash}% \indexbackslash isn't defined now + % so it will be output as is; and it will print as backslash. + % + % Process the index entry with all font commands turned off, to + % get the string to sort by. + {\indexnofonts + \edef\temp{\the\toks0}% need full expansion + \xdef\indexsorttmp{\temp}% + }% + % + % Set up the complete index entry, with both the sort key and + % the original text, including any font commands. We write + % three arguments to \entry to the .?? file (four in the + % subentry case), texindex reduces to two when writing the .??s + % sorted result. + \edef\temp{% + \write\writeto{% + \string\entry{\indexsorttmp}{\noexpand\folio}{\the\toks0}}% + }% + \temp +} + +% Take care of unwanted page breaks/skips around a whatsit: +% +% If a skip is the last thing on the list now, preserve it +% by backing up by \lastskip, doing the \write, then inserting +% the skip again. Otherwise, the whatsit generated by the +% \write or \pdfdest will make \lastskip zero. The result is that +% sequences like this: +% @end defun +% @tindex whatever +% @defun ... +% will have extra space inserted, because the \medbreak in the +% start of the @defun won't see the skip inserted by the @end of +% the previous defun. +% +% But don't do any of this if we're not in vertical mode. We +% don't want to do a \vskip and prematurely end a paragraph. +% +% Avoid page breaks due to these extra skips, too. +% +% But wait, there is a catch there: +% We'll have to check whether \lastskip is zero skip. \ifdim is not +% sufficient for this purpose, as it ignores stretch and shrink parts +% of the skip. The only way seems to be to check the textual +% representation of the skip. +% +% The following is almost like \def\zeroskipmacro{0.0pt} except that +% the ``p'' and ``t'' characters have catcode \other, not 11 (letter). +% +\edef\zeroskipmacro{\expandafter\the\csname z@skip\endcsname} +% +\newskip\whatsitskip +\newcount\whatsitpenalty +% +% ..., ready, GO: +% +\def\safewhatsit#1{\ifhmode + #1% + \else + % \lastskip and \lastpenalty cannot both be nonzero simultaneously. + \whatsitskip = \lastskip + \edef\lastskipmacro{\the\lastskip}% + \whatsitpenalty = \lastpenalty + % + % If \lastskip is nonzero, that means the last item was a + % skip. And since a skip is discardable, that means this + % -\whatsitskip glue we're inserting is preceded by a + % non-discardable item, therefore it is not a potential + % breakpoint, therefore no \nobreak needed. + \ifx\lastskipmacro\zeroskipmacro + \else + \vskip-\whatsitskip + \fi + % + #1% + % + \ifx\lastskipmacro\zeroskipmacro + % If \lastskip was zero, perhaps the last item was a penalty, and + % perhaps it was >=10000, e.g., a \nobreak. In that case, we want + % to re-insert the same penalty (values >10000 are used for various + % signals); since we just inserted a non-discardable item, any + % following glue (such as a \parskip) would be a breakpoint. For example: + % @deffn deffn-whatever + % @vindex index-whatever + % Description. + % would allow a break between the index-whatever whatsit + % and the "Description." paragraph. + \ifnum\whatsitpenalty>9999 \penalty\whatsitpenalty \fi + \else + % On the other hand, if we had a nonzero \lastskip, + % this make-up glue would be preceded by a non-discardable item + % (the whatsit from the \write), so we must insert a \nobreak. + \nobreak\vskip\whatsitskip + \fi +\fi} + +% The index entry written in the file actually looks like +% \entry {sortstring}{page}{topic} +% or +% \entry {sortstring}{page}{topic}{subtopic} +% The texindex program reads in these files and writes files +% containing these kinds of lines: +% \initial {c} +% before the first topic whose initial is c +% \entry {topic}{pagelist} +% for a topic that is used without subtopics +% \primary {topic} +% for the beginning of a topic that is used with subtopics +% \secondary {subtopic}{pagelist} +% for each subtopic. + +% Define the user-accessible indexing commands +% @findex, @vindex, @kindex, @cindex. + +\def\findex {\fnindex} +\def\kindex {\kyindex} +\def\cindex {\cpindex} +\def\vindex {\vrindex} +\def\tindex {\tpindex} +\def\pindex {\pgindex} + +\def\cindexsub {\begingroup\obeylines\cindexsub} +{\obeylines % +\gdef\cindexsub "#1" #2^^M{\endgroup % +\dosubind{cp}{#2}{#1}}} + +% Define the macros used in formatting output of the sorted index material. + +% @printindex causes a particular index (the ??s file) to get printed. +% It does not print any chapter heading (usually an @unnumbered). +% +\parseargdef\printindex{\begingroup + \dobreak \chapheadingskip{10000}% + % + \smallfonts \rm + \tolerance = 9500 + \plainfrenchspacing + \everypar = {}% don't want the \kern\-parindent from indentation suppression. + % + % See if the index file exists and is nonempty. + % Change catcode of @ here so that if the index file contains + % \initial {@} + % as its first line, TeX doesn't complain about mismatched braces + % (because it thinks @} is a control sequence). + \catcode`\@ = 11 + \openin 1 \jobname.#1s + \ifeof 1 + % \enddoublecolumns gets confused if there is no text in the index, + % and it loses the chapter title and the aux file entries for the + % index. The easiest way to prevent this problem is to make sure + % there is some text. + \putwordIndexNonexistent + \else + % + % If the index file exists but is empty, then \openin leaves \ifeof + % false. We have to make TeX try to read something from the file, so + % it can discover if there is anything in it. + \read 1 to \temp + \ifeof 1 + \putwordIndexIsEmpty + \else + % Index files are almost Texinfo source, but we use \ as the escape + % character. It would be better to use @, but that's too big a change + % to make right now. + \def\indexbackslash{\backslashcurfont}% + \catcode`\\ = 0 + \escapechar = `\\ + \begindoublecolumns + \input \jobname.#1s + \enddoublecolumns + \fi + \fi + \closein 1 +\endgroup} + +% These macros are used by the sorted index file itself. +% Change them to control the appearance of the index. + +\def\initial#1{{% + % Some minor font changes for the special characters. + \let\tentt=\sectt \let\tt=\sectt \let\sf=\sectt + % + % Remove any glue we may have, we'll be inserting our own. + \removelastskip + % + % We like breaks before the index initials, so insert a bonus. + \nobreak + \vskip 0pt plus 3\baselineskip + \penalty 0 + \vskip 0pt plus -3\baselineskip + % + % Typeset the initial. Making this add up to a whole number of + % baselineskips increases the chance of the dots lining up from column + % to column. It still won't often be perfect, because of the stretch + % we need before each entry, but it's better. + % + % No shrink because it confuses \balancecolumns. + \vskip 1.67\baselineskip plus .5\baselineskip + \leftline{\secbf #1}% + % Do our best not to break after the initial. + \nobreak + \vskip .33\baselineskip plus .1\baselineskip +}} + +% \entry typesets a paragraph consisting of the text (#1), dot leaders, and +% then page number (#2) flushed to the right margin. It is used for index +% and table of contents entries. The paragraph is indented by \leftskip. +% +% A straightforward implementation would start like this: +% \def\entry#1#2{... +% But this freezes the catcodes in the argument, and can cause problems to +% @code, which sets - active. This problem was fixed by a kludge--- +% ``-'' was active throughout whole index, but this isn't really right. +% The right solution is to prevent \entry from swallowing the whole text. +% --kasal, 21nov03 +\def\entry{% + \begingroup + % + % Start a new paragraph if necessary, so our assignments below can't + % affect previous text. + \par + % + % Do not fill out the last line with white space. + \parfillskip = 0in + % + % No extra space above this paragraph. + \parskip = 0in + % + % Do not prefer a separate line ending with a hyphen to fewer lines. + \finalhyphendemerits = 0 + % + % \hangindent is only relevant when the entry text and page number + % don't both fit on one line. In that case, bob suggests starting the + % dots pretty far over on the line. Unfortunately, a large + % indentation looks wrong when the entry text itself is broken across + % lines. So we use a small indentation and put up with long leaders. + % + % \hangafter is reset to 1 (which is the value we want) at the start + % of each paragraph, so we need not do anything with that. + \hangindent = 2em + % + % When the entry text needs to be broken, just fill out the first line + % with blank space. + \rightskip = 0pt plus1fil + % + % A bit of stretch before each entry for the benefit of balancing + % columns. + \vskip 0pt plus1pt + % + % When reading the text of entry, convert explicit line breaks + % from @* into spaces. The user might give these in long section + % titles, for instance. + \def\*{\unskip\space\ignorespaces}% + \def\entrybreak{\hfil\break}% + % + % Swallow the left brace of the text (first parameter): + \afterassignment\doentry + \let\temp = +} +\def\entrybreak{\unskip\space\ignorespaces}% +\def\doentry{% + \bgroup % Instead of the swallowed brace. + \noindent + \aftergroup\finishentry + % And now comes the text of the entry. +} +\def\finishentry#1{% + % #1 is the page number. + % + % The following is kludged to not output a line of dots in the index if + % there are no page numbers. The next person who breaks this will be + % cursed by a Unix daemon. + \setbox\boxA = \hbox{#1}% + \ifdim\wd\boxA = 0pt + \ % + \else + % + % If we must, put the page number on a line of its own, and fill out + % this line with blank space. (The \hfil is overwhelmed with the + % fill leaders glue in \indexdotfill if the page number does fit.) + \hfil\penalty50 + \null\nobreak\indexdotfill % Have leaders before the page number. + % + % The `\ ' here is removed by the implicit \unskip that TeX does as + % part of (the primitive) \par. Without it, a spurious underfull + % \hbox ensues. + \ifpdf + \pdfgettoks#1.% + \ \the\toksA + \else + \ #1% + \fi + \fi + \par + \endgroup +} + +% Like plain.tex's \dotfill, except uses up at least 1 em. +\def\indexdotfill{\cleaders + \hbox{$\mathsurround=0pt \mkern1.5mu.\mkern1.5mu$}\hskip 1em plus 1fill} + +\def\primary #1{\line{#1\hfil}} + +\newskip\secondaryindent \secondaryindent=0.5cm +\def\secondary#1#2{{% + \parfillskip=0in + \parskip=0in + \hangindent=1in + \hangafter=1 + \noindent\hskip\secondaryindent\hbox{#1}\indexdotfill + \ifpdf + \pdfgettoks#2.\ \the\toksA % The page number ends the paragraph. + \else + #2 + \fi + \par +}} + +% Define two-column mode, which we use to typeset indexes. +% Adapted from the TeXbook, page 416, which is to say, +% the manmac.tex format used to print the TeXbook itself. +\catcode`\@=11 + +\newbox\partialpage +\newdimen\doublecolumnhsize + +\def\begindoublecolumns{\begingroup % ended by \enddoublecolumns + % Grab any single-column material above us. + \output = {% + % + % Here is a possibility not foreseen in manmac: if we accumulate a + % whole lot of material, we might end up calling this \output + % routine twice in a row (see the doublecol-lose test, which is + % essentially a couple of indexes with @setchapternewpage off). In + % that case we just ship out what is in \partialpage with the normal + % output routine. Generally, \partialpage will be empty when this + % runs and this will be a no-op. See the indexspread.tex test case. + \ifvoid\partialpage \else + \onepageout{\pagecontents\partialpage}% + \fi + % + \global\setbox\partialpage = \vbox{% + % Unvbox the main output page. + \unvbox\PAGE + \kern-\topskip \kern\baselineskip + }% + }% + \eject % run that output routine to set \partialpage + % + % Use the double-column output routine for subsequent pages. + \output = {\doublecolumnout}% + % + % Change the page size parameters. We could do this once outside this + % routine, in each of @smallbook, @afourpaper, and the default 8.5x11 + % format, but then we repeat the same computation. Repeating a couple + % of assignments once per index is clearly meaningless for the + % execution time, so we may as well do it in one place. + % + % First we halve the line length, less a little for the gutter between + % the columns. We compute the gutter based on the line length, so it + % changes automatically with the paper format. The magic constant + % below is chosen so that the gutter has the same value (well, +-<1pt) + % as it did when we hard-coded it. + % + % We put the result in a separate register, \doublecolumhsize, so we + % can restore it in \pagesofar, after \hsize itself has (potentially) + % been clobbered. + % + \doublecolumnhsize = \hsize + \advance\doublecolumnhsize by -.04154\hsize + \divide\doublecolumnhsize by 2 + \hsize = \doublecolumnhsize + % + % Double the \vsize as well. (We don't need a separate register here, + % since nobody clobbers \vsize.) + \vsize = 2\vsize +} + +% The double-column output routine for all double-column pages except +% the last. +% +\def\doublecolumnout{% + \splittopskip=\topskip \splitmaxdepth=\maxdepth + % Get the available space for the double columns -- the normal + % (undoubled) page height minus any material left over from the + % previous page. + \dimen@ = \vsize + \divide\dimen@ by 2 + \advance\dimen@ by -\ht\partialpage + % + % box0 will be the left-hand column, box2 the right. + \setbox0=\vsplit255 to\dimen@ \setbox2=\vsplit255 to\dimen@ + \onepageout\pagesofar + \unvbox255 + \penalty\outputpenalty +} +% +% Re-output the contents of the output page -- any previous material, +% followed by the two boxes we just split, in box0 and box2. +\def\pagesofar{% + \unvbox\partialpage + % + \hsize = \doublecolumnhsize + \wd0=\hsize \wd2=\hsize + \hbox to\pagewidth{\box0\hfil\box2}% +} +% +% All done with double columns. +\def\enddoublecolumns{% + % The following penalty ensures that the page builder is exercised + % _before_ we change the output routine. This is necessary in the + % following situation: + % + % The last section of the index consists only of a single entry. + % Before this section, \pagetotal is less than \pagegoal, so no + % break occurs before the last section starts. However, the last + % section, consisting of \initial and the single \entry, does not + % fit on the page and has to be broken off. Without the following + % penalty the page builder will not be exercised until \eject + % below, and by that time we'll already have changed the output + % routine to the \balancecolumns version, so the next-to-last + % double-column page will be processed with \balancecolumns, which + % is wrong: The two columns will go to the main vertical list, with + % the broken-off section in the recent contributions. As soon as + % the output routine finishes, TeX starts reconsidering the page + % break. The two columns and the broken-off section both fit on the + % page, because the two columns now take up only half of the page + % goal. When TeX sees \eject from below which follows the final + % section, it invokes the new output routine that we've set after + % \balancecolumns below; \onepageout will try to fit the two columns + % and the final section into the vbox of \pageheight (see + % \pagebody), causing an overfull box. + % + % Note that glue won't work here, because glue does not exercise the + % page builder, unlike penalties (see The TeXbook, pp. 280-281). + \penalty0 + % + \output = {% + % Split the last of the double-column material. Leave it on the + % current page, no automatic page break. + \balancecolumns + % + % If we end up splitting too much material for the current page, + % though, there will be another page break right after this \output + % invocation ends. Having called \balancecolumns once, we do not + % want to call it again. Therefore, reset \output to its normal + % definition right away. (We hope \balancecolumns will never be + % called on to balance too much material, but if it is, this makes + % the output somewhat more palatable.) + \global\output = {\onepageout{\pagecontents\PAGE}}% + }% + \eject + \endgroup % started in \begindoublecolumns + % + % \pagegoal was set to the doubled \vsize above, since we restarted + % the current page. We're now back to normal single-column + % typesetting, so reset \pagegoal to the normal \vsize (after the + % \endgroup where \vsize got restored). + \pagegoal = \vsize +} +% +% Called at the end of the double column material. +\def\balancecolumns{% + \setbox0 = \vbox{\unvbox255}% like \box255 but more efficient, see p.120. + \dimen@ = \ht0 + \advance\dimen@ by \topskip + \advance\dimen@ by-\baselineskip + \divide\dimen@ by 2 % target to split to + %debug\message{final 2-column material height=\the\ht0, target=\the\dimen@.}% + \splittopskip = \topskip + % Loop until we get a decent breakpoint. + {% + \vbadness = 10000 + \loop + \global\setbox3 = \copy0 + \global\setbox1 = \vsplit3 to \dimen@ + \ifdim\ht3>\dimen@ + \global\advance\dimen@ by 1pt + \repeat + }% + %debug\message{split to \the\dimen@, column heights: \the\ht1, \the\ht3.}% + \setbox0=\vbox to\dimen@{\unvbox1}% + \setbox2=\vbox to\dimen@{\unvbox3}% + % + \pagesofar +} +\catcode`\@ = \other + + +\message{sectioning,} +% Chapters, sections, etc. + +% Let's start with @part. +\outer\parseargdef\part{\partzzz{#1}} +\def\partzzz#1{% + \chapoddpage + \null + \vskip.3\vsize % move it down on the page a bit + \begingroup + \noindent \titlefonts\rmisbold #1\par % the text + \let\lastnode=\empty % no node to associate with + \writetocentry{part}{#1}{}% but put it in the toc + \headingsoff % no headline or footline on the part page + \chapoddpage + \endgroup +} + +% \unnumberedno is an oxymoron. But we count the unnumbered +% sections so that we can refer to them unambiguously in the pdf +% outlines by their "section number". We avoid collisions with chapter +% numbers by starting them at 10000. (If a document ever has 10000 +% chapters, we're in trouble anyway, I'm sure.) +\newcount\unnumberedno \unnumberedno = 10000 +\newcount\chapno +\newcount\secno \secno=0 +\newcount\subsecno \subsecno=0 +\newcount\subsubsecno \subsubsecno=0 + +% This counter is funny since it counts through charcodes of letters A, B, ... +\newcount\appendixno \appendixno = `\@ +% +% \def\appendixletter{\char\the\appendixno} +% We do the following ugly conditional instead of the above simple +% construct for the sake of pdftex, which needs the actual +% letter in the expansion, not just typeset. +% +\def\appendixletter{% + \ifnum\appendixno=`A A% + \else\ifnum\appendixno=`B B% + \else\ifnum\appendixno=`C C% + \else\ifnum\appendixno=`D D% + \else\ifnum\appendixno=`E E% + \else\ifnum\appendixno=`F F% + \else\ifnum\appendixno=`G G% + \else\ifnum\appendixno=`H H% + \else\ifnum\appendixno=`I I% + \else\ifnum\appendixno=`J J% + \else\ifnum\appendixno=`K K% + \else\ifnum\appendixno=`L L% + \else\ifnum\appendixno=`M M% + \else\ifnum\appendixno=`N N% + \else\ifnum\appendixno=`O O% + \else\ifnum\appendixno=`P P% + \else\ifnum\appendixno=`Q Q% + \else\ifnum\appendixno=`R R% + \else\ifnum\appendixno=`S S% + \else\ifnum\appendixno=`T T% + \else\ifnum\appendixno=`U U% + \else\ifnum\appendixno=`V V% + \else\ifnum\appendixno=`W W% + \else\ifnum\appendixno=`X X% + \else\ifnum\appendixno=`Y Y% + \else\ifnum\appendixno=`Z Z% + % The \the is necessary, despite appearances, because \appendixletter is + % expanded while writing the .toc file. \char\appendixno is not + % expandable, thus it is written literally, thus all appendixes come out + % with the same letter (or @) in the toc without it. + \else\char\the\appendixno + \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi + \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi} + +% Each @chapter defines these (using marks) as the number+name, number +% and name of the chapter. Page headings and footings can use +% these. @section does likewise. +\def\thischapter{} +\def\thischapternum{} +\def\thischaptername{} +\def\thissection{} +\def\thissectionnum{} +\def\thissectionname{} + +\newcount\absseclevel % used to calculate proper heading level +\newcount\secbase\secbase=0 % @raisesections/@lowersections modify this count + +% @raisesections: treat @section as chapter, @subsection as section, etc. +\def\raisesections{\global\advance\secbase by -1} +\let\up=\raisesections % original BFox name + +% @lowersections: treat @chapter as section, @section as subsection, etc. +\def\lowersections{\global\advance\secbase by 1} +\let\down=\lowersections % original BFox name + +% we only have subsub. +\chardef\maxseclevel = 3 +% +% A numbered section within an unnumbered changes to unnumbered too. +% To achieve this, remember the "biggest" unnum. sec. we are currently in: +\chardef\unnlevel = \maxseclevel +% +% Trace whether the current chapter is an appendix or not: +% \chapheadtype is "N" or "A", unnumbered chapters are ignored. +\def\chapheadtype{N} + +% Choose a heading macro +% #1 is heading type +% #2 is heading level +% #3 is text for heading +\def\genhead#1#2#3{% + % Compute the abs. sec. level: + \absseclevel=#2 + \advance\absseclevel by \secbase + % Make sure \absseclevel doesn't fall outside the range: + \ifnum \absseclevel < 0 + \absseclevel = 0 + \else + \ifnum \absseclevel > 3 + \absseclevel = 3 + \fi + \fi + % The heading type: + \def\headtype{#1}% + \if \headtype U% + \ifnum \absseclevel < \unnlevel + \chardef\unnlevel = \absseclevel + \fi + \else + % Check for appendix sections: + \ifnum \absseclevel = 0 + \edef\chapheadtype{\headtype}% + \else + \if \headtype A\if \chapheadtype N% + \errmessage{@appendix... within a non-appendix chapter}% + \fi\fi + \fi + % Check for numbered within unnumbered: + \ifnum \absseclevel > \unnlevel + \def\headtype{U}% + \else + \chardef\unnlevel = 3 + \fi + \fi + % Now print the heading: + \if \headtype U% + \ifcase\absseclevel + \unnumberedzzz{#3}% + \or \unnumberedseczzz{#3}% + \or \unnumberedsubseczzz{#3}% + \or \unnumberedsubsubseczzz{#3}% + \fi + \else + \if \headtype A% + \ifcase\absseclevel + \appendixzzz{#3}% + \or \appendixsectionzzz{#3}% + \or \appendixsubseczzz{#3}% + \or \appendixsubsubseczzz{#3}% + \fi + \else + \ifcase\absseclevel + \chapterzzz{#3}% + \or \seczzz{#3}% + \or \numberedsubseczzz{#3}% + \or \numberedsubsubseczzz{#3}% + \fi + \fi + \fi + \suppressfirstparagraphindent +} + +% an interface: +\def\numhead{\genhead N} +\def\apphead{\genhead A} +\def\unnmhead{\genhead U} + +% @chapter, @appendix, @unnumbered. Increment top-level counter, reset +% all lower-level sectioning counters to zero. +% +% Also set \chaplevelprefix, which we prepend to @float sequence numbers +% (e.g., figures), q.v. By default (before any chapter), that is empty. +\let\chaplevelprefix = \empty +% +\outer\parseargdef\chapter{\numhead0{#1}} % normally numhead0 calls chapterzzz +\def\chapterzzz#1{% + % section resetting is \global in case the chapter is in a group, such + % as an @include file. + \global\secno=0 \global\subsecno=0 \global\subsubsecno=0 + \global\advance\chapno by 1 + % + % Used for \float. + \gdef\chaplevelprefix{\the\chapno.}% + \resetallfloatnos + % + % \putwordChapter can contain complex things in translations. + \toks0=\expandafter{\putwordChapter}% + \message{\the\toks0 \space \the\chapno}% + % + % Write the actual heading. + \chapmacro{#1}{Ynumbered}{\the\chapno}% + % + % So @section and the like are numbered underneath this chapter. + \global\let\section = \numberedsec + \global\let\subsection = \numberedsubsec + \global\let\subsubsection = \numberedsubsubsec +} + +\outer\parseargdef\appendix{\apphead0{#1}} % normally calls appendixzzz +% +\def\appendixzzz#1{% + \global\secno=0 \global\subsecno=0 \global\subsubsecno=0 + \global\advance\appendixno by 1 + \gdef\chaplevelprefix{\appendixletter.}% + \resetallfloatnos + % + % \putwordAppendix can contain complex things in translations. + \toks0=\expandafter{\putwordAppendix}% + \message{\the\toks0 \space \appendixletter}% + % + \chapmacro{#1}{Yappendix}{\appendixletter}% + % + \global\let\section = \appendixsec + \global\let\subsection = \appendixsubsec + \global\let\subsubsection = \appendixsubsubsec +} + +% normally unnmhead0 calls unnumberedzzz: +\outer\parseargdef\unnumbered{\unnmhead0{#1}} +\def\unnumberedzzz#1{% + \global\secno=0 \global\subsecno=0 \global\subsubsecno=0 + \global\advance\unnumberedno by 1 + % + % Since an unnumbered has no number, no prefix for figures. + \global\let\chaplevelprefix = \empty + \resetallfloatnos + % + % This used to be simply \message{#1}, but TeX fully expands the + % argument to \message. Therefore, if #1 contained @-commands, TeX + % expanded them. For example, in `@unnumbered The @cite{Book}', TeX + % expanded @cite (which turns out to cause errors because \cite is meant + % to be executed, not expanded). + % + % Anyway, we don't want the fully-expanded definition of @cite to appear + % as a result of the \message, we just want `@cite' itself. We use + % \the to achieve this: TeX expands \the only once, + % simply yielding the contents of . (We also do this for + % the toc entries.) + \toks0 = {#1}% + \message{(\the\toks0)}% + % + \chapmacro{#1}{Ynothing}{\the\unnumberedno}% + % + \global\let\section = \unnumberedsec + \global\let\subsection = \unnumberedsubsec + \global\let\subsubsection = \unnumberedsubsubsec +} + +% @centerchap is like @unnumbered, but the heading is centered. +\outer\parseargdef\centerchap{% + % Well, we could do the following in a group, but that would break + % an assumption that \chapmacro is called at the outermost level. + % Thus we are safer this way: --kasal, 24feb04 + \let\centerparametersmaybe = \centerparameters + \unnmhead0{#1}% + \let\centerparametersmaybe = \relax +} + +% @top is like @unnumbered. +\let\top\unnumbered + +% Sections. +% +\outer\parseargdef\numberedsec{\numhead1{#1}} % normally calls seczzz +\def\seczzz#1{% + \global\subsecno=0 \global\subsubsecno=0 \global\advance\secno by 1 + \sectionheading{#1}{sec}{Ynumbered}{\the\chapno.\the\secno}% +} + +% normally calls appendixsectionzzz: +\outer\parseargdef\appendixsection{\apphead1{#1}} +\def\appendixsectionzzz#1{% + \global\subsecno=0 \global\subsubsecno=0 \global\advance\secno by 1 + \sectionheading{#1}{sec}{Yappendix}{\appendixletter.\the\secno}% +} +\let\appendixsec\appendixsection + +% normally calls unnumberedseczzz: +\outer\parseargdef\unnumberedsec{\unnmhead1{#1}} +\def\unnumberedseczzz#1{% + \global\subsecno=0 \global\subsubsecno=0 \global\advance\secno by 1 + \sectionheading{#1}{sec}{Ynothing}{\the\unnumberedno.\the\secno}% +} + +% Subsections. +% +% normally calls numberedsubseczzz: +\outer\parseargdef\numberedsubsec{\numhead2{#1}} +\def\numberedsubseczzz#1{% + \global\subsubsecno=0 \global\advance\subsecno by 1 + \sectionheading{#1}{subsec}{Ynumbered}{\the\chapno.\the\secno.\the\subsecno}% +} + +% normally calls appendixsubseczzz: +\outer\parseargdef\appendixsubsec{\apphead2{#1}} +\def\appendixsubseczzz#1{% + \global\subsubsecno=0 \global\advance\subsecno by 1 + \sectionheading{#1}{subsec}{Yappendix}% + {\appendixletter.\the\secno.\the\subsecno}% +} + +% normally calls unnumberedsubseczzz: +\outer\parseargdef\unnumberedsubsec{\unnmhead2{#1}} +\def\unnumberedsubseczzz#1{% + \global\subsubsecno=0 \global\advance\subsecno by 1 + \sectionheading{#1}{subsec}{Ynothing}% + {\the\unnumberedno.\the\secno.\the\subsecno}% +} + +% Subsubsections. +% +% normally numberedsubsubseczzz: +\outer\parseargdef\numberedsubsubsec{\numhead3{#1}} +\def\numberedsubsubseczzz#1{% + \global\advance\subsubsecno by 1 + \sectionheading{#1}{subsubsec}{Ynumbered}% + {\the\chapno.\the\secno.\the\subsecno.\the\subsubsecno}% +} + +% normally appendixsubsubseczzz: +\outer\parseargdef\appendixsubsubsec{\apphead3{#1}} +\def\appendixsubsubseczzz#1{% + \global\advance\subsubsecno by 1 + \sectionheading{#1}{subsubsec}{Yappendix}% + {\appendixletter.\the\secno.\the\subsecno.\the\subsubsecno}% +} + +% normally unnumberedsubsubseczzz: +\outer\parseargdef\unnumberedsubsubsec{\unnmhead3{#1}} +\def\unnumberedsubsubseczzz#1{% + \global\advance\subsubsecno by 1 + \sectionheading{#1}{subsubsec}{Ynothing}% + {\the\unnumberedno.\the\secno.\the\subsecno.\the\subsubsecno}% +} + +% These macros control what the section commands do, according +% to what kind of chapter we are in (ordinary, appendix, or unnumbered). +% Define them by default for a numbered chapter. +\let\section = \numberedsec +\let\subsection = \numberedsubsec +\let\subsubsection = \numberedsubsubsec + +% Define @majorheading, @heading and @subheading + +\def\majorheading{% + {\advance\chapheadingskip by 10pt \chapbreak }% + \parsearg\chapheadingzzz +} + +\def\chapheading{\chapbreak \parsearg\chapheadingzzz} +\def\chapheadingzzz#1{% + \vbox{\chapfonts \raggedtitlesettings #1\par}% + \nobreak\bigskip \nobreak + \suppressfirstparagraphindent +} + +% @heading, @subheading, @subsubheading. +\parseargdef\heading{\sectionheading{#1}{sec}{Yomitfromtoc}{} + \suppressfirstparagraphindent} +\parseargdef\subheading{\sectionheading{#1}{subsec}{Yomitfromtoc}{} + \suppressfirstparagraphindent} +\parseargdef\subsubheading{\sectionheading{#1}{subsubsec}{Yomitfromtoc}{} + \suppressfirstparagraphindent} + +% These macros generate a chapter, section, etc. heading only +% (including whitespace, linebreaking, etc. around it), +% given all the information in convenient, parsed form. + +% Args are the skip and penalty (usually negative) +\def\dobreak#1#2{\par\ifdim\lastskip<#1\removelastskip\penalty#2\vskip#1\fi} + +% Parameter controlling skip before chapter headings (if needed) +\newskip\chapheadingskip + +% Define plain chapter starts, and page on/off switching for it. +\def\chapbreak{\dobreak \chapheadingskip {-4000}} +\def\chappager{\par\vfill\supereject} +% Because \domark is called before \chapoddpage, the filler page will +% get the headings for the next chapter, which is wrong. But we don't +% care -- we just disable all headings on the filler page. +\def\chapoddpage{% + \chappager + \ifodd\pageno \else + \begingroup + \headingsoff + \null + \chappager + \endgroup + \fi +} + +\def\setchapternewpage #1 {\csname CHAPPAG#1\endcsname} + +\def\CHAPPAGoff{% +\global\let\contentsalignmacro = \chappager +\global\let\pchapsepmacro=\chapbreak +\global\let\pagealignmacro=\chappager} + +\def\CHAPPAGon{% +\global\let\contentsalignmacro = \chappager +\global\let\pchapsepmacro=\chappager +\global\let\pagealignmacro=\chappager +\global\def\HEADINGSon{\HEADINGSsingle}} + +\def\CHAPPAGodd{% +\global\let\contentsalignmacro = \chapoddpage +\global\let\pchapsepmacro=\chapoddpage +\global\let\pagealignmacro=\chapoddpage +\global\def\HEADINGSon{\HEADINGSdouble}} + +\CHAPPAGon + +% Chapter opening. +% +% #1 is the text, #2 is the section type (Ynumbered, Ynothing, +% Yappendix, Yomitfromtoc), #3 the chapter number. +% +% To test against our argument. +\def\Ynothingkeyword{Ynothing} +\def\Yomitfromtockeyword{Yomitfromtoc} +\def\Yappendixkeyword{Yappendix} +% +\def\chapmacro#1#2#3{% + % Insert the first mark before the heading break (see notes for \domark). + \let\prevchapterdefs=\lastchapterdefs + \let\prevsectiondefs=\lastsectiondefs + \gdef\lastsectiondefs{\gdef\thissectionname{}\gdef\thissectionnum{}% + \gdef\thissection{}}% + % + \def\temptype{#2}% + \ifx\temptype\Ynothingkeyword + \gdef\lastchapterdefs{\gdef\thischaptername{#1}\gdef\thischapternum{}% + \gdef\thischapter{\thischaptername}}% + \else\ifx\temptype\Yomitfromtockeyword + \gdef\lastchapterdefs{\gdef\thischaptername{#1}\gdef\thischapternum{}% + \gdef\thischapter{}}% + \else\ifx\temptype\Yappendixkeyword + \toks0={#1}% + \xdef\lastchapterdefs{% + \gdef\noexpand\thischaptername{\the\toks0}% + \gdef\noexpand\thischapternum{\appendixletter}% + % \noexpand\putwordAppendix avoids expanding indigestible + % commands in some of the translations. + \gdef\noexpand\thischapter{\noexpand\putwordAppendix{} + \noexpand\thischapternum: + \noexpand\thischaptername}% + }% + \else + \toks0={#1}% + \xdef\lastchapterdefs{% + \gdef\noexpand\thischaptername{\the\toks0}% + \gdef\noexpand\thischapternum{\the\chapno}% + % \noexpand\putwordChapter avoids expanding indigestible + % commands in some of the translations. + \gdef\noexpand\thischapter{\noexpand\putwordChapter{} + \noexpand\thischapternum: + \noexpand\thischaptername}% + }% + \fi\fi\fi + % + % Output the mark. Pass it through \safewhatsit, to take care of + % the preceding space. + \safewhatsit\domark + % + % Insert the chapter heading break. + \pchapsepmacro + % + % Now the second mark, after the heading break. No break points + % between here and the heading. + \let\prevchapterdefs=\lastchapterdefs + \let\prevsectiondefs=\lastsectiondefs + \domark + % + {% + \chapfonts \rmisbold + % + % Have to define \lastsection before calling \donoderef, because the + % xref code eventually uses it. On the other hand, it has to be called + % after \pchapsepmacro, or the headline will change too soon. + \gdef\lastsection{#1}% + % + % Only insert the separating space if we have a chapter/appendix + % number, and don't print the unnumbered ``number''. + \ifx\temptype\Ynothingkeyword + \setbox0 = \hbox{}% + \def\toctype{unnchap}% + \else\ifx\temptype\Yomitfromtockeyword + \setbox0 = \hbox{}% contents like unnumbered, but no toc entry + \def\toctype{omit}% + \else\ifx\temptype\Yappendixkeyword + \setbox0 = \hbox{\putwordAppendix{} #3\enspace}% + \def\toctype{app}% + \else + \setbox0 = \hbox{#3\enspace}% + \def\toctype{numchap}% + \fi\fi\fi + % + % Write the toc entry for this chapter. Must come before the + % \donoderef, because we include the current node name in the toc + % entry, and \donoderef resets it to empty. + \writetocentry{\toctype}{#1}{#3}% + % + % For pdftex, we have to write out the node definition (aka, make + % the pdfdest) after any page break, but before the actual text has + % been typeset. If the destination for the pdf outline is after the + % text, then jumping from the outline may wind up with the text not + % being visible, for instance under high magnification. + \donoderef{#2}% + % + % Typeset the actual heading. + \nobreak % Avoid page breaks at the interline glue. + \vbox{\raggedtitlesettings \hangindent=\wd0 \centerparametersmaybe + \unhbox0 #1\par}% + }% + \nobreak\bigskip % no page break after a chapter title + \nobreak +} + +% @centerchap -- centered and unnumbered. +\let\centerparametersmaybe = \relax +\def\centerparameters{% + \advance\rightskip by 3\rightskip + \leftskip = \rightskip + \parfillskip = 0pt +} + + +% I don't think this chapter style is supported any more, so I'm not +% updating it with the new noderef stuff. We'll see. --karl, 11aug03. +% +\def\setchapterstyle #1 {\csname CHAPF#1\endcsname} +% +\def\unnchfopen #1{% + \chapoddpage + \vbox{\chapfonts \raggedtitlesettings #1\par}% + \nobreak\bigskip\nobreak +} +\def\chfopen #1#2{\chapoddpage {\chapfonts +\vbox to 3in{\vfil \hbox to\hsize{\hfil #2} \hbox to\hsize{\hfil #1} \vfil}}% +\par\penalty 5000 % +} +\def\centerchfopen #1{% + \chapoddpage + \vbox{\chapfonts \raggedtitlesettings \hfill #1\hfill}% + \nobreak\bigskip \nobreak +} +\def\CHAPFopen{% + \global\let\chapmacro=\chfopen + \global\let\centerchapmacro=\centerchfopen} + + +% Section titles. These macros combine the section number parts and +% call the generic \sectionheading to do the printing. +% +\newskip\secheadingskip +\def\secheadingbreak{\dobreak \secheadingskip{-1000}} + +% Subsection titles. +\newskip\subsecheadingskip +\def\subsecheadingbreak{\dobreak \subsecheadingskip{-500}} + +% Subsubsection titles. +\def\subsubsecheadingskip{\subsecheadingskip} +\def\subsubsecheadingbreak{\subsecheadingbreak} + + +% Print any size, any type, section title. +% +% #1 is the text, #2 is the section level (sec/subsec/subsubsec), #3 is +% the section type for xrefs (Ynumbered, Ynothing, Yappendix), #4 is the +% section number. +% +\def\seckeyword{sec} +% +\def\sectionheading#1#2#3#4{% + {% + \checkenv{}% should not be in an environment. + % + % Switch to the right set of fonts. + \csname #2fonts\endcsname \rmisbold + % + \def\sectionlevel{#2}% + \def\temptype{#3}% + % + % Insert first mark before the heading break (see notes for \domark). + \let\prevsectiondefs=\lastsectiondefs + \ifx\temptype\Ynothingkeyword + \ifx\sectionlevel\seckeyword + \gdef\lastsectiondefs{\gdef\thissectionname{#1}\gdef\thissectionnum{}% + \gdef\thissection{\thissectionname}}% + \fi + \else\ifx\temptype\Yomitfromtockeyword + % Don't redefine \thissection. + \else\ifx\temptype\Yappendixkeyword + \ifx\sectionlevel\seckeyword + \toks0={#1}% + \xdef\lastsectiondefs{% + \gdef\noexpand\thissectionname{\the\toks0}% + \gdef\noexpand\thissectionnum{#4}% + % \noexpand\putwordSection avoids expanding indigestible + % commands in some of the translations. + \gdef\noexpand\thissection{\noexpand\putwordSection{} + \noexpand\thissectionnum: + \noexpand\thissectionname}% + }% + \fi + \else + \ifx\sectionlevel\seckeyword + \toks0={#1}% + \xdef\lastsectiondefs{% + \gdef\noexpand\thissectionname{\the\toks0}% + \gdef\noexpand\thissectionnum{#4}% + % \noexpand\putwordSection avoids expanding indigestible + % commands in some of the translations. + \gdef\noexpand\thissection{\noexpand\putwordSection{} + \noexpand\thissectionnum: + \noexpand\thissectionname}% + }% + \fi + \fi\fi\fi + % + % Go into vertical mode. Usually we'll already be there, but we + % don't want the following whatsit to end up in a preceding paragraph + % if the document didn't happen to have a blank line. + \par + % + % Output the mark. Pass it through \safewhatsit, to take care of + % the preceding space. + \safewhatsit\domark + % + % Insert space above the heading. + \csname #2headingbreak\endcsname + % + % Now the second mark, after the heading break. No break points + % between here and the heading. + \let\prevsectiondefs=\lastsectiondefs + \domark + % + % Only insert the space after the number if we have a section number. + \ifx\temptype\Ynothingkeyword + \setbox0 = \hbox{}% + \def\toctype{unn}% + \gdef\lastsection{#1}% + \else\ifx\temptype\Yomitfromtockeyword + % for @headings -- no section number, don't include in toc, + % and don't redefine \lastsection. + \setbox0 = \hbox{}% + \def\toctype{omit}% + \let\sectionlevel=\empty + \else\ifx\temptype\Yappendixkeyword + \setbox0 = \hbox{#4\enspace}% + \def\toctype{app}% + \gdef\lastsection{#1}% + \else + \setbox0 = \hbox{#4\enspace}% + \def\toctype{num}% + \gdef\lastsection{#1}% + \fi\fi\fi + % + % Write the toc entry (before \donoderef). See comments in \chapmacro. + \writetocentry{\toctype\sectionlevel}{#1}{#4}% + % + % Write the node reference (= pdf destination for pdftex). + % Again, see comments in \chapmacro. + \donoderef{#3}% + % + % Interline glue will be inserted when the vbox is completed. + % That glue will be a valid breakpoint for the page, since it'll be + % preceded by a whatsit (usually from the \donoderef, or from the + % \writetocentry if there was no node). We don't want to allow that + % break, since then the whatsits could end up on page n while the + % section is on page n+1, thus toc/etc. are wrong. Debian bug 276000. + \nobreak + % + % Output the actual section heading. + \vbox{\hyphenpenalty=10000 \tolerance=5000 \parindent=0pt \ptexraggedright + \hangindent=\wd0 % zero if no section number + \unhbox0 #1}% + }% + % Add extra space after the heading -- half of whatever came above it. + % Don't allow stretch, though. + \kern .5 \csname #2headingskip\endcsname + % + % Do not let the kern be a potential breakpoint, as it would be if it + % was followed by glue. + \nobreak + % + % We'll almost certainly start a paragraph next, so don't let that + % glue accumulate. (Not a breakpoint because it's preceded by a + % discardable item.) However, when a paragraph is not started next + % (\startdefun, \cartouche, \center, etc.), this needs to be wiped out + % or the negative glue will cause weirdly wrong output, typically + % obscuring the section heading with something else. + \vskip-\parskip + % + % This is so the last item on the main vertical list is a known + % \penalty > 10000, so \startdefun, etc., can recognize the situation + % and do the needful. + \penalty 10001 +} + + +\message{toc,} +% Table of contents. +\newwrite\tocfile + +% Write an entry to the toc file, opening it if necessary. +% Called from @chapter, etc. +% +% Example usage: \writetocentry{sec}{Section Name}{\the\chapno.\the\secno} +% We append the current node name (if any) and page number as additional +% arguments for the \{chap,sec,...}entry macros which will eventually +% read this. The node name is used in the pdf outlines as the +% destination to jump to. +% +% We open the .toc file for writing here instead of at @setfilename (or +% any other fixed time) so that @contents can be anywhere in the document. +% But if #1 is `omit', then we don't do anything. This is used for the +% table of contents chapter openings themselves. +% +\newif\iftocfileopened +\def\omitkeyword{omit}% +% +\def\writetocentry#1#2#3{% + \edef\writetoctype{#1}% + \ifx\writetoctype\omitkeyword \else + \iftocfileopened\else + \immediate\openout\tocfile = \jobname.toc + \global\tocfileopenedtrue + \fi + % + \iflinks + {\atdummies + \edef\temp{% + \write\tocfile{@#1entry{#2}{#3}{\lastnode}{\noexpand\folio}}}% + \temp + }% + \fi + \fi + % + % Tell \shipout to create a pdf destination on each page, if we're + % writing pdf. These are used in the table of contents. We can't + % just write one on every page because the title pages are numbered + % 1 and 2 (the page numbers aren't printed), and so are the first + % two pages of the document. Thus, we'd have two destinations named + % `1', and two named `2'. + \ifpdf \global\pdfmakepagedesttrue \fi +} + + +% These characters do not print properly in the Computer Modern roman +% fonts, so we must take special care. This is more or less redundant +% with the Texinfo input format setup at the end of this file. +% +\def\activecatcodes{% + \catcode`\"=\active + \catcode`\$=\active + \catcode`\<=\active + \catcode`\>=\active + \catcode`\\=\active + \catcode`\^=\active + \catcode`\_=\active + \catcode`\|=\active + \catcode`\~=\active +} + + +% Read the toc file, which is essentially Texinfo input. +\def\readtocfile{% + \setupdatafile + \activecatcodes + \input \tocreadfilename +} + +\newskip\contentsrightmargin \contentsrightmargin=1in +\newcount\savepageno +\newcount\lastnegativepageno \lastnegativepageno = -1 + +% Prepare to read what we've written to \tocfile. +% +\def\startcontents#1{% + % If @setchapternewpage on, and @headings double, the contents should + % start on an odd page, unlike chapters. Thus, we maintain + % \contentsalignmacro in parallel with \pagealignmacro. + % From: Torbjorn Granlund + \contentsalignmacro + \immediate\closeout\tocfile + % + % Don't need to put `Contents' or `Short Contents' in the headline. + % It is abundantly clear what they are. + \chapmacro{#1}{Yomitfromtoc}{}% + % + \savepageno = \pageno + \begingroup % Set up to handle contents files properly. + \raggedbottom % Worry more about breakpoints than the bottom. + \advance\hsize by -\contentsrightmargin % Don't use the full line length. + % + % Roman numerals for page numbers. + \ifnum \pageno>0 \global\pageno = \lastnegativepageno \fi +} + +% redefined for the two-volume lispref. We always output on +% \jobname.toc even if this is redefined. +% +\def\tocreadfilename{\jobname.toc} + +% Normal (long) toc. +% +\def\contents{% + \startcontents{\putwordTOC}% + \openin 1 \tocreadfilename\space + \ifeof 1 \else + \readtocfile + \fi + \vfill \eject + \contentsalignmacro % in case @setchapternewpage odd is in effect + \ifeof 1 \else + \pdfmakeoutlines + \fi + \closein 1 + \endgroup + \lastnegativepageno = \pageno + \global\pageno = \savepageno +} + +% And just the chapters. +\def\summarycontents{% + \startcontents{\putwordShortTOC}% + % + \let\partentry = \shortpartentry + \let\numchapentry = \shortchapentry + \let\appentry = \shortchapentry + \let\unnchapentry = \shortunnchapentry + % We want a true roman here for the page numbers. + \secfonts + \let\rm=\shortcontrm \let\bf=\shortcontbf + \let\sl=\shortcontsl \let\tt=\shortconttt + \rm + \hyphenpenalty = 10000 + \advance\baselineskip by 1pt % Open it up a little. + \def\numsecentry##1##2##3##4{} + \let\appsecentry = \numsecentry + \let\unnsecentry = \numsecentry + \let\numsubsecentry = \numsecentry + \let\appsubsecentry = \numsecentry + \let\unnsubsecentry = \numsecentry + \let\numsubsubsecentry = \numsecentry + \let\appsubsubsecentry = \numsecentry + \let\unnsubsubsecentry = \numsecentry + \openin 1 \tocreadfilename\space + \ifeof 1 \else + \readtocfile + \fi + \closein 1 + \vfill \eject + \contentsalignmacro % in case @setchapternewpage odd is in effect + \endgroup + \lastnegativepageno = \pageno + \global\pageno = \savepageno +} +\let\shortcontents = \summarycontents + +% Typeset the label for a chapter or appendix for the short contents. +% The arg is, e.g., `A' for an appendix, or `3' for a chapter. +% +\def\shortchaplabel#1{% + % This space should be enough, since a single number is .5em, and the + % widest letter (M) is 1em, at least in the Computer Modern fonts. + % But use \hss just in case. + % (This space doesn't include the extra space that gets added after + % the label; that gets put in by \shortchapentry above.) + % + % We'd like to right-justify chapter numbers, but that looks strange + % with appendix letters. And right-justifying numbers and + % left-justifying letters looks strange when there is less than 10 + % chapters. Have to read the whole toc once to know how many chapters + % there are before deciding ... + \hbox to 1em{#1\hss}% +} + +% These macros generate individual entries in the table of contents. +% The first argument is the chapter or section name. +% The last argument is the page number. +% The arguments in between are the chapter number, section number, ... + +% Parts, in the main contents. Replace the part number, which doesn't +% exist, with an empty box. Let's hope all the numbers have the same width. +% Also ignore the page number, which is conventionally not printed. +\def\numeralbox{\setbox0=\hbox{8}\hbox to \wd0{\hfil}} +\def\partentry#1#2#3#4{\dochapentry{\numeralbox\labelspace#1}{}} +% +% Parts, in the short toc. +\def\shortpartentry#1#2#3#4{% + \penalty-300 + \vskip.5\baselineskip plus.15\baselineskip minus.1\baselineskip + \shortchapentry{{\bf #1}}{\numeralbox}{}{}% +} + +% Chapters, in the main contents. +\def\numchapentry#1#2#3#4{\dochapentry{#2\labelspace#1}{#4}} +% +% Chapters, in the short toc. +% See comments in \dochapentry re vbox and related settings. +\def\shortchapentry#1#2#3#4{% + \tocentry{\shortchaplabel{#2}\labelspace #1}{\doshortpageno\bgroup#4\egroup}% +} + +% Appendices, in the main contents. +% Need the word Appendix, and a fixed-size box. +% +\def\appendixbox#1{% + % We use M since it's probably the widest letter. + \setbox0 = \hbox{\putwordAppendix{} M}% + \hbox to \wd0{\putwordAppendix{} #1\hss}} +% +\def\appentry#1#2#3#4{\dochapentry{\appendixbox{#2}\labelspace#1}{#4}} + +% Unnumbered chapters. +\def\unnchapentry#1#2#3#4{\dochapentry{#1}{#4}} +\def\shortunnchapentry#1#2#3#4{\tocentry{#1}{\doshortpageno\bgroup#4\egroup}} + +% Sections. +\def\numsecentry#1#2#3#4{\dosecentry{#2\labelspace#1}{#4}} +\let\appsecentry=\numsecentry +\def\unnsecentry#1#2#3#4{\dosecentry{#1}{#4}} + +% Subsections. +\def\numsubsecentry#1#2#3#4{\dosubsecentry{#2\labelspace#1}{#4}} +\let\appsubsecentry=\numsubsecentry +\def\unnsubsecentry#1#2#3#4{\dosubsecentry{#1}{#4}} + +% And subsubsections. +\def\numsubsubsecentry#1#2#3#4{\dosubsubsecentry{#2\labelspace#1}{#4}} +\let\appsubsubsecentry=\numsubsubsecentry +\def\unnsubsubsecentry#1#2#3#4{\dosubsubsecentry{#1}{#4}} + +% This parameter controls the indentation of the various levels. +% Same as \defaultparindent. +\newdimen\tocindent \tocindent = 15pt + +% Now for the actual typesetting. In all these, #1 is the text and #2 is the +% page number. +% +% If the toc has to be broken over pages, we want it to be at chapters +% if at all possible; hence the \penalty. +\def\dochapentry#1#2{% + \penalty-300 \vskip1\baselineskip plus.33\baselineskip minus.25\baselineskip + \begingroup + \chapentryfonts + \tocentry{#1}{\dopageno\bgroup#2\egroup}% + \endgroup + \nobreak\vskip .25\baselineskip plus.1\baselineskip +} + +\def\dosecentry#1#2{\begingroup + \secentryfonts \leftskip=\tocindent + \tocentry{#1}{\dopageno\bgroup#2\egroup}% +\endgroup} + +\def\dosubsecentry#1#2{\begingroup + \subsecentryfonts \leftskip=2\tocindent + \tocentry{#1}{\dopageno\bgroup#2\egroup}% +\endgroup} + +\def\dosubsubsecentry#1#2{\begingroup + \subsubsecentryfonts \leftskip=3\tocindent + \tocentry{#1}{\dopageno\bgroup#2\egroup}% +\endgroup} + +% We use the same \entry macro as for the index entries. +\let\tocentry = \entry + +% Space between chapter (or whatever) number and the title. +\def\labelspace{\hskip1em \relax} + +\def\dopageno#1{{\rm #1}} +\def\doshortpageno#1{{\rm #1}} + +\def\chapentryfonts{\secfonts \rm} +\def\secentryfonts{\textfonts} +\def\subsecentryfonts{\textfonts} +\def\subsubsecentryfonts{\textfonts} + + +\message{environments,} +% @foo ... @end foo. + +% @tex ... @end tex escapes into raw TeX temporarily. +% One exception: @ is still an escape character, so that @end tex works. +% But \@ or @@ will get a plain @ character. + +\envdef\tex{% + \setupmarkupstyle{tex}% + \catcode `\\=0 \catcode `\{=1 \catcode `\}=2 + \catcode `\$=3 \catcode `\&=4 \catcode `\#=6 + \catcode `\^=7 \catcode `\_=8 \catcode `\~=\active \let~=\tie + \catcode `\%=14 + \catcode `\+=\other + \catcode `\"=\other + \catcode `\|=\other + \catcode `\<=\other + \catcode `\>=\other + \catcode`\`=\other + \catcode`\'=\other + \escapechar=`\\ + % + % ' is active in math mode (mathcode"8000). So reset it, and all our + % other math active characters (just in case), to plain's definitions. + \mathactive + % + \let\b=\ptexb + \let\bullet=\ptexbullet + \let\c=\ptexc + \let\,=\ptexcomma + \let\.=\ptexdot + \let\dots=\ptexdots + \let\equiv=\ptexequiv + \let\!=\ptexexclam + \let\i=\ptexi + \let\indent=\ptexindent + \let\noindent=\ptexnoindent + \let\{=\ptexlbrace + \let\+=\tabalign + \let\}=\ptexrbrace + \let\/=\ptexslash + \let\*=\ptexstar + \let\t=\ptext + \expandafter \let\csname top\endcsname=\ptextop % outer + \let\frenchspacing=\plainfrenchspacing + % + \def\endldots{\mathinner{\ldots\ldots\ldots\ldots}}% + \def\enddots{\relax\ifmmode\endldots\else$\mathsurround=0pt \endldots\,$\fi}% + \def\@{@}% +} +% There is no need to define \Etex. + +% Define @lisp ... @end lisp. +% @lisp environment forms a group so it can rebind things, +% including the definition of @end lisp (which normally is erroneous). + +% Amount to narrow the margins by for @lisp. +\newskip\lispnarrowing \lispnarrowing=0.4in + +% This is the definition that ^^M gets inside @lisp, @example, and other +% such environments. \null is better than a space, since it doesn't +% have any width. +\def\lisppar{\null\endgraf} + +% This space is always present above and below environments. +\newskip\envskipamount \envskipamount = 0pt + +% Make spacing and below environment symmetrical. We use \parskip here +% to help in doing that, since in @example-like environments \parskip +% is reset to zero; thus the \afterenvbreak inserts no space -- but the +% start of the next paragraph will insert \parskip. +% +\def\aboveenvbreak{{% + % =10000 instead of <10000 because of a special case in \itemzzz and + % \sectionheading, q.v. + \ifnum \lastpenalty=10000 \else + \advance\envskipamount by \parskip + \endgraf + \ifdim\lastskip<\envskipamount + \removelastskip + % it's not a good place to break if the last penalty was \nobreak + % or better ... + \ifnum\lastpenalty<10000 \penalty-50 \fi + \vskip\envskipamount + \fi + \fi +}} + +\let\afterenvbreak = \aboveenvbreak + +% \nonarrowing is a flag. If "set", @lisp etc don't narrow margins; it will +% also clear it, so that its embedded environments do the narrowing again. +\let\nonarrowing=\relax + +% @cartouche ... @end cartouche: draw rectangle w/rounded corners around +% environment contents. +\font\circle=lcircle10 +\newdimen\circthick +\newdimen\cartouter\newdimen\cartinner +\newskip\normbskip\newskip\normpskip\newskip\normlskip +\circthick=\fontdimen8\circle +% +\def\ctl{{\circle\char'013\hskip -6pt}}% 6pt from pl file: 1/2charwidth +\def\ctr{{\hskip 6pt\circle\char'010}} +\def\cbl{{\circle\char'012\hskip -6pt}} +\def\cbr{{\hskip 6pt\circle\char'011}} +\def\carttop{\hbox to \cartouter{\hskip\lskip + \ctl\leaders\hrule height\circthick\hfil\ctr + \hskip\rskip}} +\def\cartbot{\hbox to \cartouter{\hskip\lskip + \cbl\leaders\hrule height\circthick\hfil\cbr + \hskip\rskip}} +% +\newskip\lskip\newskip\rskip + +\envdef\cartouche{% + \ifhmode\par\fi % can't be in the midst of a paragraph. + \startsavinginserts + \lskip=\leftskip \rskip=\rightskip + \leftskip=0pt\rightskip=0pt % we want these *outside*. + \cartinner=\hsize \advance\cartinner by-\lskip + \advance\cartinner by-\rskip + \cartouter=\hsize + \advance\cartouter by 18.4pt % allow for 3pt kerns on either + % side, and for 6pt waste from + % each corner char, and rule thickness + \normbskip=\baselineskip \normpskip=\parskip \normlskip=\lineskip + % Flag to tell @lisp, etc., not to narrow margin. + \let\nonarrowing = t% + % + % If this cartouche directly follows a sectioning command, we need the + % \parskip glue (backspaced over by default) or the cartouche can + % collide with the section heading. + \ifnum\lastpenalty>10000 \vskip\parskip \penalty\lastpenalty \fi + % + \vbox\bgroup + \baselineskip=0pt\parskip=0pt\lineskip=0pt + \carttop + \hbox\bgroup + \hskip\lskip + \vrule\kern3pt + \vbox\bgroup + \kern3pt + \hsize=\cartinner + \baselineskip=\normbskip + \lineskip=\normlskip + \parskip=\normpskip + \vskip -\parskip + \comment % For explanation, see the end of def\group. +} +\def\Ecartouche{% + \ifhmode\par\fi + \kern3pt + \egroup + \kern3pt\vrule + \hskip\rskip + \egroup + \cartbot + \egroup + \checkinserts +} + + +% This macro is called at the beginning of all the @example variants, +% inside a group. +\newdimen\nonfillparindent +\def\nonfillstart{% + \aboveenvbreak + \hfuzz = 12pt % Don't be fussy + \sepspaces % Make spaces be word-separators rather than space tokens. + \let\par = \lisppar % don't ignore blank lines + \obeylines % each line of input is a line of output + \parskip = 0pt + % Turn off paragraph indentation but redefine \indent to emulate + % the normal \indent. + \nonfillparindent=\parindent + \parindent = 0pt + \let\indent\nonfillindent + % + \emergencystretch = 0pt % don't try to avoid overfull boxes + \ifx\nonarrowing\relax + \advance \leftskip by \lispnarrowing + \exdentamount=\lispnarrowing + \else + \let\nonarrowing = \relax + \fi + \let\exdent=\nofillexdent +} + +\begingroup +\obeyspaces +% We want to swallow spaces (but not other tokens) after the fake +% @indent in our nonfill-environments, where spaces are normally +% active and set to @tie, resulting in them not being ignored after +% @indent. +\gdef\nonfillindent{\futurelet\temp\nonfillindentcheck}% +\gdef\nonfillindentcheck{% +\ifx\temp % +\expandafter\nonfillindentgobble% +\else% +\leavevmode\nonfillindentbox% +\fi% +}% +\endgroup +\def\nonfillindentgobble#1{\nonfillindent} +\def\nonfillindentbox{\hbox to \nonfillparindent{\hss}} + +% If you want all examples etc. small: @set dispenvsize small. +% If you want even small examples the full size: @set dispenvsize nosmall. +% This affects the following displayed environments: +% @example, @display, @format, @lisp +% +\def\smallword{small} +\def\nosmallword{nosmall} +\let\SETdispenvsize\relax +\def\setnormaldispenv{% + \ifx\SETdispenvsize\smallword + % end paragraph for sake of leading, in case document has no blank + % line. This is redundant with what happens in \aboveenvbreak, but + % we need to do it before changing the fonts, and it's inconvenient + % to change the fonts afterward. + \ifnum \lastpenalty=10000 \else \endgraf \fi + \smallexamplefonts \rm + \fi +} +\def\setsmalldispenv{% + \ifx\SETdispenvsize\nosmallword + \else + \ifnum \lastpenalty=10000 \else \endgraf \fi + \smallexamplefonts \rm + \fi +} + +% We often define two environments, @foo and @smallfoo. +% Let's do it in one command. #1 is the env name, #2 the definition. +\def\makedispenvdef#1#2{% + \expandafter\envdef\csname#1\endcsname {\setnormaldispenv #2}% + \expandafter\envdef\csname small#1\endcsname {\setsmalldispenv #2}% + \expandafter\let\csname E#1\endcsname \afterenvbreak + \expandafter\let\csname Esmall#1\endcsname \afterenvbreak +} + +% Define two environment synonyms (#1 and #2) for an environment. +\def\maketwodispenvdef#1#2#3{% + \makedispenvdef{#1}{#3}% + \makedispenvdef{#2}{#3}% +} +% +% @lisp: indented, narrowed, typewriter font; +% @example: same as @lisp. +% +% @smallexample and @smalllisp: use smaller fonts. +% Originally contributed by Pavel@xerox. +% +\maketwodispenvdef{lisp}{example}{% + \nonfillstart + \tt\setupmarkupstyle{example}% + \let\kbdfont = \kbdexamplefont % Allow @kbd to do something special. + \gobble % eat return +} +% @display/@smalldisplay: same as @lisp except keep current font. +% +\makedispenvdef{display}{% + \nonfillstart + \gobble +} + +% @format/@smallformat: same as @display except don't narrow margins. +% +\makedispenvdef{format}{% + \let\nonarrowing = t% + \nonfillstart + \gobble +} + +% @flushleft: same as @format, but doesn't obey \SETdispenvsize. +\envdef\flushleft{% + \let\nonarrowing = t% + \nonfillstart + \gobble +} +\let\Eflushleft = \afterenvbreak + +% @flushright. +% +\envdef\flushright{% + \let\nonarrowing = t% + \nonfillstart + \advance\leftskip by 0pt plus 1fill\relax + \gobble +} +\let\Eflushright = \afterenvbreak + + +% @raggedright does more-or-less normal line breaking but no right +% justification. From plain.tex. +\envdef\raggedright{% + \rightskip0pt plus2em \spaceskip.3333em \xspaceskip.5em\relax +} +\let\Eraggedright\par + +\envdef\raggedleft{% + \parindent=0pt \leftskip0pt plus2em + \spaceskip.3333em \xspaceskip.5em \parfillskip=0pt + \hbadness=10000 % Last line will usually be underfull, so turn off + % badness reporting. +} +\let\Eraggedleft\par + +\envdef\raggedcenter{% + \parindent=0pt \rightskip0pt plus1em \leftskip0pt plus1em + \spaceskip.3333em \xspaceskip.5em \parfillskip=0pt + \hbadness=10000 % Last line will usually be underfull, so turn off + % badness reporting. +} +\let\Eraggedcenter\par + + +% @quotation does normal linebreaking (hence we can't use \nonfillstart) +% and narrows the margins. We keep \parskip nonzero in general, since +% we're doing normal filling. So, when using \aboveenvbreak and +% \afterenvbreak, temporarily make \parskip 0. +% +\makedispenvdef{quotation}{\quotationstart} +% +\def\quotationstart{% + \indentedblockstart % same as \indentedblock, but increase right margin too. + \ifx\nonarrowing\relax + \advance\rightskip by \lispnarrowing + \fi + \parsearg\quotationlabel +} + +% We have retained a nonzero parskip for the environment, since we're +% doing normal filling. +% +\def\Equotation{% + \par + \ifx\quotationauthor\thisisundefined\else + % indent a bit. + \leftline{\kern 2\leftskip \sl ---\quotationauthor}% + \fi + {\parskip=0pt \afterenvbreak}% +} +\def\Esmallquotation{\Equotation} + +% If we're given an argument, typeset it in bold with a colon after. +\def\quotationlabel#1{% + \def\temp{#1}% + \ifx\temp\empty \else + {\bf #1: }% + \fi +} + +% @indentedblock is like @quotation, but indents only on the left and +% has no optional argument. +% +\makedispenvdef{indentedblock}{\indentedblockstart} +% +\def\indentedblockstart{% + {\parskip=0pt \aboveenvbreak}% because \aboveenvbreak inserts \parskip + \parindent=0pt + % + % @cartouche defines \nonarrowing to inhibit narrowing at next level down. + \ifx\nonarrowing\relax + \advance\leftskip by \lispnarrowing + \exdentamount = \lispnarrowing + \else + \let\nonarrowing = \relax + \fi +} + +% Keep a nonzero parskip for the environment, since we're doing normal filling. +% +\def\Eindentedblock{% + \par + {\parskip=0pt \afterenvbreak}% +} +\def\Esmallindentedblock{\Eindentedblock} + + +% LaTeX-like @verbatim...@end verbatim and @verb{...} +% If we want to allow any as delimiter, +% we need the curly braces so that makeinfo sees the @verb command, eg: +% `@verbx...x' would look like the '@verbx' command. --janneke@gnu.org +% +% [Knuth]: Donald Ervin Knuth, 1996. The TeXbook. +% +% [Knuth] p.344; only we need to do the other characters Texinfo sets +% active too. Otherwise, they get lost as the first character on a +% verbatim line. +\def\dospecials{% + \do\ \do\\\do\{\do\}\do\$\do\&% + \do\#\do\^\do\^^K\do\_\do\^^A\do\%\do\~% + \do\<\do\>\do\|\do\@\do+\do\"% + % Don't do the quotes -- if we do, @set txicodequoteundirected and + % @set txicodequotebacktick will not have effect on @verb and + % @verbatim, and ?` and !` ligatures won't get disabled. + %\do\`\do\'% +} +% +% [Knuth] p. 380 +\def\uncatcodespecials{% + \def\do##1{\catcode`##1=\other}\dospecials} +% +% Setup for the @verb command. +% +% Eight spaces for a tab +\begingroup + \catcode`\^^I=\active + \gdef\tabeightspaces{\catcode`\^^I=\active\def^^I{\ \ \ \ \ \ \ \ }} +\endgroup +% +\def\setupverb{% + \tt % easiest (and conventionally used) font for verbatim + \def\par{\leavevmode\endgraf}% + \setupmarkupstyle{verb}% + \tabeightspaces + % Respect line breaks, + % print special symbols as themselves, and + % make each space count + % must do in this order: + \obeylines \uncatcodespecials \sepspaces +} + +% Setup for the @verbatim environment +% +% Real tab expansion. +\newdimen\tabw \setbox0=\hbox{\tt\space} \tabw=8\wd0 % tab amount +% +% We typeset each line of the verbatim in an \hbox, so we can handle +% tabs. The \global is in case the verbatim line starts with an accent, +% or some other command that starts with a begin-group. Otherwise, the +% entire \verbbox would disappear at the corresponding end-group, before +% it is typeset. Meanwhile, we can't have nested verbatim commands +% (can we?), so the \global won't be overwriting itself. +\newbox\verbbox +\def\starttabbox{\global\setbox\verbbox=\hbox\bgroup} +% +\begingroup + \catcode`\^^I=\active + \gdef\tabexpand{% + \catcode`\^^I=\active + \def^^I{\leavevmode\egroup + \dimen\verbbox=\wd\verbbox % the width so far, or since the previous tab + \divide\dimen\verbbox by\tabw + \multiply\dimen\verbbox by\tabw % compute previous multiple of \tabw + \advance\dimen\verbbox by\tabw % advance to next multiple of \tabw + \wd\verbbox=\dimen\verbbox \box\verbbox \starttabbox + }% + } +\endgroup + +% start the verbatim environment. +\def\setupverbatim{% + \let\nonarrowing = t% + \nonfillstart + \tt % easiest (and conventionally used) font for verbatim + % The \leavevmode here is for blank lines. Otherwise, we would + % never \starttabox and the \egroup would end verbatim mode. + \def\par{\leavevmode\egroup\box\verbbox\endgraf}% + \tabexpand + \setupmarkupstyle{verbatim}% + % Respect line breaks, + % print special symbols as themselves, and + % make each space count. + % Must do in this order: + \obeylines \uncatcodespecials \sepspaces + \everypar{\starttabbox}% +} + +% Do the @verb magic: verbatim text is quoted by unique +% delimiter characters. Before first delimiter expect a +% right brace, after last delimiter expect closing brace: +% +% \def\doverb'{'#1'}'{#1} +% +% [Knuth] p. 382; only eat outer {} +\begingroup + \catcode`[=1\catcode`]=2\catcode`\{=\other\catcode`\}=\other + \gdef\doverb{#1[\def\next##1#1}[##1\endgroup]\next] +\endgroup +% +\def\verb{\begingroup\setupverb\doverb} +% +% +% Do the @verbatim magic: define the macro \doverbatim so that +% the (first) argument ends when '@end verbatim' is reached, ie: +% +% \def\doverbatim#1@end verbatim{#1} +% +% For Texinfo it's a lot easier than for LaTeX, +% because texinfo's \verbatim doesn't stop at '\end{verbatim}': +% we need not redefine '\', '{' and '}'. +% +% Inspired by LaTeX's verbatim command set [latex.ltx] +% +\begingroup + \catcode`\ =\active + \obeylines % + % ignore everything up to the first ^^M, that's the newline at the end + % of the @verbatim input line itself. Otherwise we get an extra blank + % line in the output. + \xdef\doverbatim#1^^M#2@end verbatim{#2\noexpand\end\gobble verbatim}% + % We really want {...\end verbatim} in the body of the macro, but + % without the active space; thus we have to use \xdef and \gobble. +\endgroup +% +\envdef\verbatim{% + \setupverbatim\doverbatim +} +\let\Everbatim = \afterenvbreak + + +% @verbatiminclude FILE - insert text of file in verbatim environment. +% +\def\verbatiminclude{\parseargusing\filenamecatcodes\doverbatiminclude} +% +\def\doverbatiminclude#1{% + {% + \makevalueexpandable + \setupverbatim + \indexnofonts % Allow `@@' and other weird things in file names. + \wlog{texinfo.tex: doing @verbatiminclude of #1^^J}% + \input #1 + \afterenvbreak + }% +} + +% @copying ... @end copying. +% Save the text away for @insertcopying later. +% +% We save the uninterpreted tokens, rather than creating a box. +% Saving the text in a box would be much easier, but then all the +% typesetting commands (@smallbook, font changes, etc.) have to be done +% beforehand -- and a) we want @copying to be done first in the source +% file; b) letting users define the frontmatter in as flexible order as +% possible is very desirable. +% +\def\copying{\checkenv{}\begingroup\scanargctxt\docopying} +\def\docopying#1@end copying{\endgroup\def\copyingtext{#1}} +% +\def\insertcopying{% + \begingroup + \parindent = 0pt % paragraph indentation looks wrong on title page + \scanexp\copyingtext + \endgroup +} + + +\message{defuns,} +% @defun etc. + +\newskip\defbodyindent \defbodyindent=.4in +\newskip\defargsindent \defargsindent=50pt +\newskip\deflastargmargin \deflastargmargin=18pt +\newcount\defunpenalty + +% Start the processing of @deffn: +\def\startdefun{% + \ifnum\lastpenalty<10000 + \medbreak + \defunpenalty=10003 % Will keep this @deffn together with the + % following @def command, see below. + \else + % If there are two @def commands in a row, we'll have a \nobreak, + % which is there to keep the function description together with its + % header. But if there's nothing but headers, we need to allow a + % break somewhere. Check specifically for penalty 10002, inserted + % by \printdefunline, instead of 10000, since the sectioning + % commands also insert a nobreak penalty, and we don't want to allow + % a break between a section heading and a defun. + % + % As a further refinement, we avoid "club" headers by signalling + % with penalty of 10003 after the very first @deffn in the + % sequence (see above), and penalty of 10002 after any following + % @def command. + \ifnum\lastpenalty=10002 \penalty2000 \else \defunpenalty=10002 \fi + % + % Similarly, after a section heading, do not allow a break. + % But do insert the glue. + \medskip % preceded by discardable penalty, so not a breakpoint + \fi + % + \parindent=0in + \advance\leftskip by \defbodyindent + \exdentamount=\defbodyindent +} + +\def\dodefunx#1{% + % First, check whether we are in the right environment: + \checkenv#1% + % + % As above, allow line break if we have multiple x headers in a row. + % It's not a great place, though. + \ifnum\lastpenalty=10002 \penalty3000 \else \defunpenalty=10002 \fi + % + % And now, it's time to reuse the body of the original defun: + \expandafter\gobbledefun#1% +} +\def\gobbledefun#1\startdefun{} + +% \printdefunline \deffnheader{text} +% +\def\printdefunline#1#2{% + \begingroup + % call \deffnheader: + #1#2 \endheader + % common ending: + \interlinepenalty = 10000 + \advance\rightskip by 0pt plus 1fil\relax + \endgraf + \nobreak\vskip -\parskip + \penalty\defunpenalty % signal to \startdefun and \dodefunx + % Some of the @defun-type tags do not enable magic parentheses, + % rendering the following check redundant. But we don't optimize. + \checkparencounts + \endgroup +} + +\def\Edefun{\endgraf\medbreak} + +% \makedefun{deffn} creates \deffn, \deffnx and \Edeffn; +% the only thing remaining is to define \deffnheader. +% +\def\makedefun#1{% + \expandafter\let\csname E#1\endcsname = \Edefun + \edef\temp{\noexpand\domakedefun + \makecsname{#1}\makecsname{#1x}\makecsname{#1header}}% + \temp +} + +% \domakedefun \deffn \deffnx \deffnheader +% +% Define \deffn and \deffnx, without parameters. +% \deffnheader has to be defined explicitly. +% +\def\domakedefun#1#2#3{% + \envdef#1{% + \startdefun + \doingtypefnfalse % distinguish typed functions from all else + \parseargusing\activeparens{\printdefunline#3}% + }% + \def#2{\dodefunx#1}% + \def#3% +} + +\newif\ifdoingtypefn % doing typed function? +\newif\ifrettypeownline % typeset return type on its own line? + +% @deftypefnnewline on|off says whether the return type of typed functions +% are printed on their own line. This affects @deftypefn, @deftypefun, +% @deftypeop, and @deftypemethod. +% +\parseargdef\deftypefnnewline{% + \def\temp{#1}% + \ifx\temp\onword + \expandafter\let\csname SETtxideftypefnnl\endcsname + = \empty + \else\ifx\temp\offword + \expandafter\let\csname SETtxideftypefnnl\endcsname + = \relax + \else + \errhelp = \EMsimple + \errmessage{Unknown @txideftypefnnl value `\temp', + must be on|off}% + \fi\fi +} + +% Untyped functions: + +% @deffn category name args +\makedefun{deffn}{\deffngeneral{}} + +% @deffn category class name args +\makedefun{defop}#1 {\defopon{#1\ \putwordon}} + +% \defopon {category on}class name args +\def\defopon#1#2 {\deffngeneral{\putwordon\ \code{#2}}{#1\ \code{#2}} } + +% \deffngeneral {subind}category name args +% +\def\deffngeneral#1#2 #3 #4\endheader{% + % Remember that \dosubind{fn}{foo}{} is equivalent to \doind{fn}{foo}. + \dosubind{fn}{\code{#3}}{#1}% + \defname{#2}{}{#3}\magicamp\defunargs{#4\unskip}% +} + +% Typed functions: + +% @deftypefn category type name args +\makedefun{deftypefn}{\deftypefngeneral{}} + +% @deftypeop category class type name args +\makedefun{deftypeop}#1 {\deftypeopon{#1\ \putwordon}} + +% \deftypeopon {category on}class type name args +\def\deftypeopon#1#2 {\deftypefngeneral{\putwordon\ \code{#2}}{#1\ \code{#2}} } + +% \deftypefngeneral {subind}category type name args +% +\def\deftypefngeneral#1#2 #3 #4 #5\endheader{% + \dosubind{fn}{\code{#4}}{#1}% + \doingtypefntrue + \defname{#2}{#3}{#4}\defunargs{#5\unskip}% +} + +% Typed variables: + +% @deftypevr category type var args +\makedefun{deftypevr}{\deftypecvgeneral{}} + +% @deftypecv category class type var args +\makedefun{deftypecv}#1 {\deftypecvof{#1\ \putwordof}} + +% \deftypecvof {category of}class type var args +\def\deftypecvof#1#2 {\deftypecvgeneral{\putwordof\ \code{#2}}{#1\ \code{#2}} } + +% \deftypecvgeneral {subind}category type var args +% +\def\deftypecvgeneral#1#2 #3 #4 #5\endheader{% + \dosubind{vr}{\code{#4}}{#1}% + \defname{#2}{#3}{#4}\defunargs{#5\unskip}% +} + +% Untyped variables: + +% @defvr category var args +\makedefun{defvr}#1 {\deftypevrheader{#1} {} } + +% @defcv category class var args +\makedefun{defcv}#1 {\defcvof{#1\ \putwordof}} + +% \defcvof {category of}class var args +\def\defcvof#1#2 {\deftypecvof{#1}#2 {} } + +% Types: + +% @deftp category name args +\makedefun{deftp}#1 #2 #3\endheader{% + \doind{tp}{\code{#2}}% + \defname{#1}{}{#2}\defunargs{#3\unskip}% +} + +% Remaining @defun-like shortcuts: +\makedefun{defun}{\deffnheader{\putwordDeffunc} } +\makedefun{defmac}{\deffnheader{\putwordDefmac} } +\makedefun{defspec}{\deffnheader{\putwordDefspec} } +\makedefun{deftypefun}{\deftypefnheader{\putwordDeffunc} } +\makedefun{defvar}{\defvrheader{\putwordDefvar} } +\makedefun{defopt}{\defvrheader{\putwordDefopt} } +\makedefun{deftypevar}{\deftypevrheader{\putwordDefvar} } +\makedefun{defmethod}{\defopon\putwordMethodon} +\makedefun{deftypemethod}{\deftypeopon\putwordMethodon} +\makedefun{defivar}{\defcvof\putwordInstanceVariableof} +\makedefun{deftypeivar}{\deftypecvof\putwordInstanceVariableof} + +% \defname, which formats the name of the @def (not the args). +% #1 is the category, such as "Function". +% #2 is the return type, if any. +% #3 is the function name. +% +% We are followed by (but not passed) the arguments, if any. +% +\def\defname#1#2#3{% + \par + % Get the values of \leftskip and \rightskip as they were outside the @def... + \advance\leftskip by -\defbodyindent + % + % Determine if we are typesetting the return type of a typed function + % on a line by itself. + \rettypeownlinefalse + \ifdoingtypefn % doing a typed function specifically? + % then check user option for putting return type on its own line: + \expandafter\ifx\csname SETtxideftypefnnl\endcsname\relax \else + \rettypeownlinetrue + \fi + \fi + % + % How we'll format the category name. Putting it in brackets helps + % distinguish it from the body text that may end up on the next line + % just below it. + \def\temp{#1}% + \setbox0=\hbox{\kern\deflastargmargin \ifx\temp\empty\else [\rm\temp]\fi} + % + % Figure out line sizes for the paragraph shape. We'll always have at + % least two. + \tempnum = 2 + % + % The first line needs space for \box0; but if \rightskip is nonzero, + % we need only space for the part of \box0 which exceeds it: + \dimen0=\hsize \advance\dimen0 by -\wd0 \advance\dimen0 by \rightskip + % + % If doing a return type on its own line, we'll have another line. + \ifrettypeownline + \advance\tempnum by 1 + \def\maybeshapeline{0in \hsize}% + \else + \def\maybeshapeline{}% + \fi + % + % The continuations: + \dimen2=\hsize \advance\dimen2 by -\defargsindent + % + % The final paragraph shape: + \parshape \tempnum 0in \dimen0 \maybeshapeline \defargsindent \dimen2 + % + % Put the category name at the right margin. + \noindent + \hbox to 0pt{% + \hfil\box0 \kern-\hsize + % \hsize has to be shortened this way: + \kern\leftskip + % Intentionally do not respect \rightskip, since we need the space. + }% + % + % Allow all lines to be underfull without complaint: + \tolerance=10000 \hbadness=10000 + \exdentamount=\defbodyindent + {% + % defun fonts. We use typewriter by default (used to be bold) because: + % . we're printing identifiers, they should be in tt in principle. + % . in languages with many accents, such as Czech or French, it's + % common to leave accents off identifiers. The result looks ok in + % tt, but exceedingly strange in rm. + % . we don't want -- and --- to be treated as ligatures. + % . this still does not fix the ?` and !` ligatures, but so far no + % one has made identifiers using them :). + \df \tt + \def\temp{#2}% text of the return type + \ifx\temp\empty\else + \tclose{\temp}% typeset the return type + \ifrettypeownline + % put return type on its own line; prohibit line break following: + \hfil\vadjust{\nobreak}\break + \else + \space % type on same line, so just followed by a space + \fi + \fi % no return type + #3% output function name + }% + {\rm\enskip}% hskip 0.5 em of \tenrm + % + \boldbrax + % arguments will be output next, if any. +} + +% Print arguments in slanted roman (not ttsl), inconsistently with using +% tt for the name. This is because literal text is sometimes needed in +% the argument list (groff manual), and ttsl and tt are not very +% distinguishable. Prevent hyphenation at `-' chars. +% +\def\defunargs#1{% + % use sl by default (not ttsl), + % tt for the names. + \df \sl \hyphenchar\font=0 + % + % On the other hand, if an argument has two dashes (for instance), we + % want a way to get ttsl. We used to recommend @var for that, so + % leave the code in, but it's strange for @var to lead to typewriter. + % Nowadays we recommend @code, since the difference between a ttsl hyphen + % and a tt hyphen is pretty tiny. @code also disables ?` !`. + \def\var##1{{\setupmarkupstyle{var}\ttslanted{##1}}}% + #1% + \sl\hyphenchar\font=45 +} + +% We want ()&[] to print specially on the defun line. +% +\def\activeparens{% + \catcode`\(=\active \catcode`\)=\active + \catcode`\[=\active \catcode`\]=\active + \catcode`\&=\active +} + +% Make control sequences which act like normal parenthesis chars. +\let\lparen = ( \let\rparen = ) + +% Be sure that we always have a definition for `(', etc. For example, +% if the fn name has parens in it, \boldbrax will not be in effect yet, +% so TeX would otherwise complain about undefined control sequence. +{ + \activeparens + \global\let(=\lparen \global\let)=\rparen + \global\let[=\lbrack \global\let]=\rbrack + \global\let& = \& + + \gdef\boldbrax{\let(=\opnr\let)=\clnr\let[=\lbrb\let]=\rbrb} + \gdef\magicamp{\let&=\amprm} +} + +\newcount\parencount + +% If we encounter &foo, then turn on ()-hacking afterwards +\newif\ifampseen +\def\amprm#1 {\ampseentrue{\bf\ }} + +\def\parenfont{% + \ifampseen + % At the first level, print parens in roman, + % otherwise use the default font. + \ifnum \parencount=1 \rm \fi + \else + % The \sf parens (in \boldbrax) actually are a little bolder than + % the contained text. This is especially needed for [ and ] . + \sf + \fi +} +\def\infirstlevel#1{% + \ifampseen + \ifnum\parencount=1 + #1% + \fi + \fi +} +\def\bfafterword#1 {#1 \bf} + +\def\opnr{% + \global\advance\parencount by 1 + {\parenfont(}% + \infirstlevel \bfafterword +} +\def\clnr{% + {\parenfont)}% + \infirstlevel \sl + \global\advance\parencount by -1 +} + +\newcount\brackcount +\def\lbrb{% + \global\advance\brackcount by 1 + {\bf[}% +} +\def\rbrb{% + {\bf]}% + \global\advance\brackcount by -1 +} + +\def\checkparencounts{% + \ifnum\parencount=0 \else \badparencount \fi + \ifnum\brackcount=0 \else \badbrackcount \fi +} +% these should not use \errmessage; the glibc manual, at least, actually +% has such constructs (when documenting function pointers). +\def\badparencount{% + \message{Warning: unbalanced parentheses in @def...}% + \global\parencount=0 +} +\def\badbrackcount{% + \message{Warning: unbalanced square brackets in @def...}% + \global\brackcount=0 +} + + +\message{macros,} +% @macro. + +% To do this right we need a feature of e-TeX, \scantokens, +% which we arrange to emulate with a temporary file in ordinary TeX. +\ifx\eTeXversion\thisisundefined + \newwrite\macscribble + \def\scantokens#1{% + \toks0={#1}% + \immediate\openout\macscribble=\jobname.tmp + \immediate\write\macscribble{\the\toks0}% + \immediate\closeout\macscribble + \input \jobname.tmp + } +\fi + +\def\scanmacro#1{\begingroup + \newlinechar`\^^M + \let\xeatspaces\eatspaces + % + % Undo catcode changes of \startcontents and \doprintindex + % When called from @insertcopying or (short)caption, we need active + % backslash to get it printed correctly. Previously, we had + % \catcode`\\=\other instead. We'll see whether a problem appears + % with macro expansion. --kasal, 19aug04 + \catcode`\@=0 \catcode`\\=\active \escapechar=`\@ + % + % ... and for \example: + \spaceisspace + % + % The \empty here causes a following catcode 5 newline to be eaten as + % part of reading whitespace after a control sequence. It does not + % eat a catcode 13 newline. There's no good way to handle the two + % cases (untried: maybe e-TeX's \everyeof could help, though plain TeX + % would then have different behavior). See the Macro Details node in + % the manual for the workaround we recommend for macros and + % line-oriented commands. + % + \scantokens{#1\empty}% +\endgroup} + +\def\scanexp#1{% + \edef\temp{\noexpand\scanmacro{#1}}% + \temp +} + +\newcount\paramno % Count of parameters +\newtoks\macname % Macro name +\newif\ifrecursive % Is it recursive? + +% List of all defined macros in the form +% \definedummyword\macro1\definedummyword\macro2... +% Currently is also contains all @aliases; the list can be split +% if there is a need. +\def\macrolist{} + +% Add the macro to \macrolist +\def\addtomacrolist#1{\expandafter \addtomacrolistxxx \csname#1\endcsname} +\def\addtomacrolistxxx#1{% + \toks0 = \expandafter{\macrolist\definedummyword#1}% + \xdef\macrolist{\the\toks0}% +} + +% Utility routines. +% This does \let #1 = #2, with \csnames; that is, +% \let \csname#1\endcsname = \csname#2\endcsname +% (except of course we have to play expansion games). +% +\def\cslet#1#2{% + \expandafter\let + \csname#1\expandafter\endcsname + \csname#2\endcsname +} + +% Trim leading and trailing spaces off a string. +% Concepts from aro-bend problem 15 (see CTAN). +{\catcode`\@=11 +\gdef\eatspaces #1{\expandafter\trim@\expandafter{#1 }} +\gdef\trim@ #1{\trim@@ @#1 @ #1 @ @@} +\gdef\trim@@ #1@ #2@ #3@@{\trim@@@\empty #2 @} +\def\unbrace#1{#1} +\unbrace{\gdef\trim@@@ #1 } #2@{#1} +} + +% Trim a single trailing ^^M off a string. +{\catcode`\^^M=\other \catcode`\Q=3% +\gdef\eatcr #1{\eatcra #1Q^^MQ}% +\gdef\eatcra#1^^MQ{\eatcrb#1Q}% +\gdef\eatcrb#1Q#2Q{#1}% +} + +% Macro bodies are absorbed as an argument in a context where +% all characters are catcode 10, 11 or 12, except \ which is active +% (as in normal texinfo). It is necessary to change the definition of \ +% to recognize macro arguments; this is the job of \mbodybackslash. +% +% Non-ASCII encodings make 8-bit characters active, so un-activate +% them to avoid their expansion. Must do this non-globally, to +% confine the change to the current group. +% +% It's necessary to have hard CRs when the macro is executed. This is +% done by making ^^M (\endlinechar) catcode 12 when reading the macro +% body, and then making it the \newlinechar in \scanmacro. +% +\def\scanctxt{% used as subroutine + \catcode`\"=\other + \catcode`\+=\other + \catcode`\<=\other + \catcode`\>=\other + \catcode`\@=\other + \catcode`\^=\other + \catcode`\_=\other + \catcode`\|=\other + \catcode`\~=\other + \ifx\declaredencoding\ascii \else \setnonasciicharscatcodenonglobal\other \fi +} + +\def\scanargctxt{% used for copying and captions, not macros. + \scanctxt + \catcode`\\=\other + \catcode`\^^M=\other +} + +\def\macrobodyctxt{% used for @macro definitions + \scanctxt + \catcode`\{=\other + \catcode`\}=\other + \catcode`\^^M=\other + \usembodybackslash +} + +\def\macroargctxt{% used when scanning invocations + \scanctxt + \catcode`\\=0 +} +% why catcode 0 for \ in the above? To recognize \\ \{ \} as "escapes" +% for the single characters \ { }. Thus, we end up with the "commands" +% that would be written @\ @{ @} in a Texinfo document. +% +% We already have @{ and @}. For @\, we define it here, and only for +% this purpose, to produce a typewriter backslash (so, the @\ that we +% define for @math can't be used with @macro calls): +% +\def\\{\normalbackslash}% +% +% We would like to do this for \, too, since that is what makeinfo does. +% But it is not possible, because Texinfo already has a command @, for a +% cedilla accent. Documents must use @comma{} instead. +% +% \anythingelse will almost certainly be an error of some kind. + + +% \mbodybackslash is the definition of \ in @macro bodies. +% It maps \foo\ => \csname macarg.foo\endcsname => #N +% where N is the macro parameter number. +% We define \csname macarg.\endcsname to be \realbackslash, so +% \\ in macro replacement text gets you a backslash. +% +{\catcode`@=0 @catcode`@\=@active + @gdef@usembodybackslash{@let\=@mbodybackslash} + @gdef@mbodybackslash#1\{@csname macarg.#1@endcsname} +} +\expandafter\def\csname macarg.\endcsname{\realbackslash} + +\def\margbackslash#1{\char`\#1 } + +\def\macro{\recursivefalse\parsearg\macroxxx} +\def\rmacro{\recursivetrue\parsearg\macroxxx} + +\def\macroxxx#1{% + \getargs{#1}% now \macname is the macname and \argl the arglist + \ifx\argl\empty % no arguments + \paramno=0\relax + \else + \expandafter\parsemargdef \argl;% + \if\paramno>256\relax + \ifx\eTeXversion\thisisundefined + \errhelp = \EMsimple + \errmessage{You need eTeX to compile a file with macros with more than 256 arguments} + \fi + \fi + \fi + \if1\csname ismacro.\the\macname\endcsname + \message{Warning: redefining \the\macname}% + \else + \expandafter\ifx\csname \the\macname\endcsname \relax + \else \errmessage{Macro name \the\macname\space already defined}\fi + \global\cslet{macsave.\the\macname}{\the\macname}% + \global\expandafter\let\csname ismacro.\the\macname\endcsname=1% + \addtomacrolist{\the\macname}% + \fi + \begingroup \macrobodyctxt + \ifrecursive \expandafter\parsermacbody + \else \expandafter\parsemacbody + \fi} + +\parseargdef\unmacro{% + \if1\csname ismacro.#1\endcsname + \global\cslet{#1}{macsave.#1}% + \global\expandafter\let \csname ismacro.#1\endcsname=0% + % Remove the macro name from \macrolist: + \begingroup + \expandafter\let\csname#1\endcsname \relax + \let\definedummyword\unmacrodo + \xdef\macrolist{\macrolist}% + \endgroup + \else + \errmessage{Macro #1 not defined}% + \fi +} + +% Called by \do from \dounmacro on each macro. The idea is to omit any +% macro definitions that have been changed to \relax. +% +\def\unmacrodo#1{% + \ifx #1\relax + % remove this + \else + \noexpand\definedummyword \noexpand#1% + \fi +} + +% This makes use of the obscure feature that if the last token of a +% is #, then the preceding argument is delimited by +% an opening brace, and that opening brace is not consumed. +\def\getargs#1{\getargsxxx#1{}} +\def\getargsxxx#1#{\getmacname #1 \relax\getmacargs} +\def\getmacname#1 #2\relax{\macname={#1}} +\def\getmacargs#1{\def\argl{#1}} + +% For macro processing make @ a letter so that we can make Texinfo private macro names. +\edef\texiatcatcode{\the\catcode`\@} +\catcode `@=11\relax + +% Parse the optional {params} list. Set up \paramno and \paramlist +% so \defmacro knows what to do. Define \macarg.BLAH for each BLAH +% in the params list to some hook where the argument si to be expanded. If +% there are less than 10 arguments that hook is to be replaced by ##N where N +% is the position in that list, that is to say the macro arguments are to be +% defined `a la TeX in the macro body. +% +% That gets used by \mbodybackslash (above). +% +% We need to get `macro parameter char #' into several definitions. +% The technique used is stolen from LaTeX: let \hash be something +% unexpandable, insert that wherever you need a #, and then redefine +% it to # just before using the token list produced. +% +% The same technique is used to protect \eatspaces till just before +% the macro is used. +% +% If there are 10 or more arguments, a different technique is used, where the +% hook remains in the body, and when macro is to be expanded the body is +% processed again to replace the arguments. +% +% In that case, the hook is \the\toks N-1, and we simply set \toks N-1 to the +% argument N value and then \edef the body (nothing else will expand because of +% the catcode regime underwhich the body was input). +% +% If you compile with TeX (not eTeX), and you have macros with 10 or more +% arguments, you need that no macro has more than 256 arguments, otherwise an +% error is produced. +\def\parsemargdef#1;{% + \paramno=0\def\paramlist{}% + \let\hash\relax + \let\xeatspaces\relax + \parsemargdefxxx#1,;,% + % In case that there are 10 or more arguments we parse again the arguments + % list to set new definitions for the \macarg.BLAH macros corresponding to + % each BLAH argument. It was anyhow needed to parse already once this list + % in order to count the arguments, and as macros with at most 9 arguments + % are by far more frequent than macro with 10 or more arguments, defining + % twice the \macarg.BLAH macros does not cost too much processing power. + \ifnum\paramno<10\relax\else + \paramno0\relax + \parsemmanyargdef@@#1,;,% 10 or more arguments + \fi +} +\def\parsemargdefxxx#1,{% + \if#1;\let\next=\relax + \else \let\next=\parsemargdefxxx + \advance\paramno by 1 + \expandafter\edef\csname macarg.\eatspaces{#1}\endcsname + {\xeatspaces{\hash\the\paramno}}% + \edef\paramlist{\paramlist\hash\the\paramno,}% + \fi\next} + +\def\parsemmanyargdef@@#1,{% + \if#1;\let\next=\relax + \else + \let\next=\parsemmanyargdef@@ + \edef\tempb{\eatspaces{#1}}% + \expandafter\def\expandafter\tempa + \expandafter{\csname macarg.\tempb\endcsname}% + % Note that we need some extra \noexpand\noexpand, this is because we + % don't want \the to be expanded in the \parsermacbody as it uses an + % \xdef . + \expandafter\edef\tempa + {\noexpand\noexpand\noexpand\the\toks\the\paramno}% + \advance\paramno by 1\relax + \fi\next} + +% These two commands read recursive and nonrecursive macro bodies. +% (They're different since rec and nonrec macros end differently.) +% + +\catcode `\@\texiatcatcode +\long\def\parsemacbody#1@end macro% +{\xdef\temp{\eatcr{#1}}\endgroup\defmacro}% +\long\def\parsermacbody#1@end rmacro% +{\xdef\temp{\eatcr{#1}}\endgroup\defmacro}% +\catcode `\@=11\relax + +\let\endargs@\relax +\let\nil@\relax +\def\nilm@{\nil@}% +\long\def\nillm@{\nil@}% + +% This macro is expanded during the Texinfo macro expansion, not during its +% definition. It gets all the arguments values and assigns them to macros +% macarg.ARGNAME +% +% #1 is the macro name +% #2 is the list of argument names +% #3 is the list of argument values +\def\getargvals@#1#2#3{% + \def\macargdeflist@{}% + \def\saveparamlist@{#2}% Need to keep a copy for parameter expansion. + \def\paramlist{#2,\nil@}% + \def\macroname{#1}% + \begingroup + \macroargctxt + \def\argvaluelist{#3,\nil@}% + \def\@tempa{#3}% + \ifx\@tempa\empty + \setemptyargvalues@ + \else + \getargvals@@ + \fi +} + +% +\def\getargvals@@{% + \ifx\paramlist\nilm@ + % Some sanity check needed here that \argvaluelist is also empty. + \ifx\argvaluelist\nillm@ + \else + \errhelp = \EMsimple + \errmessage{Too many arguments in macro `\macroname'!}% + \fi + \let\next\macargexpandinbody@ + \else + \ifx\argvaluelist\nillm@ + % No more arguments values passed to macro. Set remaining named-arg + % macros to empty. + \let\next\setemptyargvalues@ + \else + % pop current arg name into \@tempb + \def\@tempa##1{\pop@{\@tempb}{\paramlist}##1\endargs@}% + \expandafter\@tempa\expandafter{\paramlist}% + % pop current argument value into \@tempc + \def\@tempa##1{\longpop@{\@tempc}{\argvaluelist}##1\endargs@}% + \expandafter\@tempa\expandafter{\argvaluelist}% + % Here \@tempb is the current arg name and \@tempc is the current arg value. + % First place the new argument macro definition into \@tempd + \expandafter\macname\expandafter{\@tempc}% + \expandafter\let\csname macarg.\@tempb\endcsname\relax + \expandafter\def\expandafter\@tempe\expandafter{% + \csname macarg.\@tempb\endcsname}% + \edef\@tempd{\long\def\@tempe{\the\macname}}% + \push@\@tempd\macargdeflist@ + \let\next\getargvals@@ + \fi + \fi + \next +} + +\def\push@#1#2{% + \expandafter\expandafter\expandafter\def + \expandafter\expandafter\expandafter#2% + \expandafter\expandafter\expandafter{% + \expandafter#1#2}% +} + +% Replace arguments by their values in the macro body, and place the result +% in macro \@tempa +\def\macvalstoargs@{% + % To do this we use the property that token registers that are \the'ed + % within an \edef expand only once. So we are going to place all argument + % values into respective token registers. + % + % First we save the token context, and initialize argument numbering. + \begingroup + \paramno0\relax + % Then, for each argument number #N, we place the corresponding argument + % value into a new token list register \toks#N + \expandafter\putargsintokens@\saveparamlist@,;,% + % Then, we expand the body so that argument are replaced by their + % values. The trick for values not to be expanded themselves is that they + % are within tokens and that tokens expand only once in an \edef . + \edef\@tempc{\csname mac.\macroname .body\endcsname}% + % Now we restore the token stack pointer to free the token list registers + % which we have used, but we make sure that expanded body is saved after + % group. + \expandafter + \endgroup + \expandafter\def\expandafter\@tempa\expandafter{\@tempc}% + } + +\def\macargexpandinbody@{% + %% Define the named-macro outside of this group and then close this group. + \expandafter + \endgroup + \macargdeflist@ + % First the replace in body the macro arguments by their values, the result + % is in \@tempa . + \macvalstoargs@ + % Then we point at the \norecurse or \gobble (for recursive) macro value + % with \@tempb . + \expandafter\let\expandafter\@tempb\csname mac.\macroname .recurse\endcsname + % Depending on whether it is recursive or not, we need some tailing + % \egroup . + \ifx\@tempb\gobble + \let\@tempc\relax + \else + \let\@tempc\egroup + \fi + % And now we do the real job: + \edef\@tempd{\noexpand\@tempb{\macroname}\noexpand\scanmacro{\@tempa}\@tempc}% + \@tempd +} + +\def\putargsintokens@#1,{% + \if#1;\let\next\relax + \else + \let\next\putargsintokens@ + % First we allocate the new token list register, and give it a temporary + % alias \@tempb . + \toksdef\@tempb\the\paramno + % Then we place the argument value into that token list register. + \expandafter\let\expandafter\@tempa\csname macarg.#1\endcsname + \expandafter\@tempb\expandafter{\@tempa}% + \advance\paramno by 1\relax + \fi + \next +} + +% Save the token stack pointer into macro #1 +\def\texisavetoksstackpoint#1{\edef#1{\the\@cclvi}} +% Restore the token stack pointer from number in macro #1 +\def\texirestoretoksstackpoint#1{\expandafter\mathchardef\expandafter\@cclvi#1\relax} +% newtoks that can be used non \outer . +\def\texinonouternewtoks{\alloc@ 5\toks \toksdef \@cclvi} + +% Tailing missing arguments are set to empty +\def\setemptyargvalues@{% + \ifx\paramlist\nilm@ + \let\next\macargexpandinbody@ + \else + \expandafter\setemptyargvaluesparser@\paramlist\endargs@ + \let\next\setemptyargvalues@ + \fi + \next +} + +\def\setemptyargvaluesparser@#1,#2\endargs@{% + \expandafter\def\expandafter\@tempa\expandafter{% + \expandafter\def\csname macarg.#1\endcsname{}}% + \push@\@tempa\macargdeflist@ + \def\paramlist{#2}% +} + +% #1 is the element target macro +% #2 is the list macro +% #3,#4\endargs@ is the list value +\def\pop@#1#2#3,#4\endargs@{% + \def#1{#3}% + \def#2{#4}% +} +\long\def\longpop@#1#2#3,#4\endargs@{% + \long\def#1{#3}% + \long\def#2{#4}% +} + +% This defines a Texinfo @macro. There are eight cases: recursive and +% nonrecursive macros of zero, one, up to nine, and many arguments. +% Much magic with \expandafter here. +% \xdef is used so that macro definitions will survive the file +% they're defined in; @include reads the file inside a group. +% +\def\defmacro{% + \let\hash=##% convert placeholders to macro parameter chars + \ifrecursive + \ifcase\paramno + % 0 + \expandafter\xdef\csname\the\macname\endcsname{% + \noexpand\scanmacro{\temp}}% + \or % 1 + \expandafter\xdef\csname\the\macname\endcsname{% + \bgroup\noexpand\macroargctxt + \noexpand\braceorline + \expandafter\noexpand\csname\the\macname xxx\endcsname}% + \expandafter\xdef\csname\the\macname xxx\endcsname##1{% + \egroup\noexpand\scanmacro{\temp}}% + \else + \ifnum\paramno<10\relax % at most 9 + \expandafter\xdef\csname\the\macname\endcsname{% + \bgroup\noexpand\macroargctxt + \noexpand\csname\the\macname xx\endcsname}% + \expandafter\xdef\csname\the\macname xx\endcsname##1{% + \expandafter\noexpand\csname\the\macname xxx\endcsname ##1,}% + \expandafter\expandafter + \expandafter\xdef + \expandafter\expandafter + \csname\the\macname xxx\endcsname + \paramlist{\egroup\noexpand\scanmacro{\temp}}% + \else % 10 or more + \expandafter\xdef\csname\the\macname\endcsname{% + \noexpand\getargvals@{\the\macname}{\argl}% + }% + \global\expandafter\let\csname mac.\the\macname .body\endcsname\temp + \global\expandafter\let\csname mac.\the\macname .recurse\endcsname\gobble + \fi + \fi + \else + \ifcase\paramno + % 0 + \expandafter\xdef\csname\the\macname\endcsname{% + \noexpand\norecurse{\the\macname}% + \noexpand\scanmacro{\temp}\egroup}% + \or % 1 + \expandafter\xdef\csname\the\macname\endcsname{% + \bgroup\noexpand\macroargctxt + \noexpand\braceorline + \expandafter\noexpand\csname\the\macname xxx\endcsname}% + \expandafter\xdef\csname\the\macname xxx\endcsname##1{% + \egroup + \noexpand\norecurse{\the\macname}% + \noexpand\scanmacro{\temp}\egroup}% + \else % at most 9 + \ifnum\paramno<10\relax + \expandafter\xdef\csname\the\macname\endcsname{% + \bgroup\noexpand\macroargctxt + \expandafter\noexpand\csname\the\macname xx\endcsname}% + \expandafter\xdef\csname\the\macname xx\endcsname##1{% + \expandafter\noexpand\csname\the\macname xxx\endcsname ##1,}% + \expandafter\expandafter + \expandafter\xdef + \expandafter\expandafter + \csname\the\macname xxx\endcsname + \paramlist{% + \egroup + \noexpand\norecurse{\the\macname}% + \noexpand\scanmacro{\temp}\egroup}% + \else % 10 or more: + \expandafter\xdef\csname\the\macname\endcsname{% + \noexpand\getargvals@{\the\macname}{\argl}% + }% + \global\expandafter\let\csname mac.\the\macname .body\endcsname\temp + \global\expandafter\let\csname mac.\the\macname .recurse\endcsname\norecurse + \fi + \fi + \fi} + +\catcode `\@\texiatcatcode\relax + +\def\norecurse#1{\bgroup\cslet{#1}{macsave.#1}} + +% \braceorline decides whether the next nonwhitespace character is a +% {. If so it reads up to the closing }, if not, it reads the whole +% line. Whatever was read is then fed to the next control sequence +% as an argument (by \parsebrace or \parsearg). +% +\def\braceorline#1{\let\macnamexxx=#1\futurelet\nchar\braceorlinexxx} +\def\braceorlinexxx{% + \ifx\nchar\bgroup\else + \expandafter\parsearg + \fi \macnamexxx} + + +% @alias. +% We need some trickery to remove the optional spaces around the equal +% sign. Make them active and then expand them all to nothing. +% +\def\alias{\parseargusing\obeyspaces\aliasxxx} +\def\aliasxxx #1{\aliasyyy#1\relax} +\def\aliasyyy #1=#2\relax{% + {% + \expandafter\let\obeyedspace=\empty + \addtomacrolist{#1}% + \xdef\next{\global\let\makecsname{#1}=\makecsname{#2}}% + }% + \next +} + + +\message{cross references,} + +\newwrite\auxfile +\newif\ifhavexrefs % True if xref values are known. +\newif\ifwarnedxrefs % True if we warned once that they aren't known. + +% @inforef is relatively simple. +\def\inforef #1{\inforefzzz #1,,,,**} +\def\inforefzzz #1,#2,#3,#4**{% + \putwordSee{} \putwordInfo{} \putwordfile{} \file{\ignorespaces #3{}}, + node \samp{\ignorespaces#1{}}} + +% @node's only job in TeX is to define \lastnode, which is used in +% cross-references. The @node line might or might not have commas, and +% might or might not have spaces before the first comma, like: +% @node foo , bar , ... +% We don't want such trailing spaces in the node name. +% +\parseargdef\node{\checkenv{}\donode #1 ,\finishnodeparse} +% +% also remove a trailing comma, in case of something like this: +% @node Help-Cross, , , Cross-refs +\def\donode#1 ,#2\finishnodeparse{\dodonode #1,\finishnodeparse} +\def\dodonode#1,#2\finishnodeparse{\gdef\lastnode{#1}} + +\let\nwnode=\node +\let\lastnode=\empty + +% Write a cross-reference definition for the current node. #1 is the +% type (Ynumbered, Yappendix, Ynothing). +% +\def\donoderef#1{% + \ifx\lastnode\empty\else + \setref{\lastnode}{#1}% + \global\let\lastnode=\empty + \fi +} + +% @anchor{NAME} -- define xref target at arbitrary point. +% +\newcount\savesfregister +% +\def\savesf{\relax \ifhmode \savesfregister=\spacefactor \fi} +\def\restoresf{\relax \ifhmode \spacefactor=\savesfregister \fi} +\def\anchor#1{\savesf \setref{#1}{Ynothing}\restoresf \ignorespaces} + +% \setref{NAME}{SNT} defines a cross-reference point NAME (a node or an +% anchor), which consists of three parts: +% 1) NAME-title - the current sectioning name taken from \lastsection, +% or the anchor name. +% 2) NAME-snt - section number and type, passed as the SNT arg, or +% empty for anchors. +% 3) NAME-pg - the page number. +% +% This is called from \donoderef, \anchor, and \dofloat. In the case of +% floats, there is an additional part, which is not written here: +% 4) NAME-lof - the text as it should appear in a @listoffloats. +% +\def\setref#1#2{% + \pdfmkdest{#1}% + \iflinks + {% + \atdummies % preserve commands, but don't expand them + \edef\writexrdef##1##2{% + \write\auxfile{@xrdef{#1-% #1 of \setref, expanded by the \edef + ##1}{##2}}% these are parameters of \writexrdef + }% + \toks0 = \expandafter{\lastsection}% + \immediate \writexrdef{title}{\the\toks0 }% + \immediate \writexrdef{snt}{\csname #2\endcsname}% \Ynumbered etc. + \safewhatsit{\writexrdef{pg}{\folio}}% will be written later, at \shipout + }% + \fi +} + +% @xrefautosectiontitle on|off says whether @section(ing) names are used +% automatically in xrefs, if the third arg is not explicitly specified. +% This was provided as a "secret" @set xref-automatic-section-title +% variable, now it's official. +% +\parseargdef\xrefautomaticsectiontitle{% + \def\temp{#1}% + \ifx\temp\onword + \expandafter\let\csname SETxref-automatic-section-title\endcsname + = \empty + \else\ifx\temp\offword + \expandafter\let\csname SETxref-automatic-section-title\endcsname + = \relax + \else + \errhelp = \EMsimple + \errmessage{Unknown @xrefautomaticsectiontitle value `\temp', + must be on|off}% + \fi\fi +} + +% +% @xref, @pxref, and @ref generate cross-references. For \xrefX, #1 is +% the node name, #2 the name of the Info cross-reference, #3 the printed +% node name, #4 the name of the Info file, #5 the name of the printed +% manual. All but the node name can be omitted. +% +\def\pxref#1{\putwordsee{} \xrefX[#1,,,,,,,]} +\def\xref#1{\putwordSee{} \xrefX[#1,,,,,,,]} +\def\ref#1{\xrefX[#1,,,,,,,]} +% +\newbox\toprefbox +\newbox\printedrefnamebox +\newbox\infofilenamebox +\newbox\printedmanualbox +% +\def\xrefX[#1,#2,#3,#4,#5,#6]{\begingroup + \unsepspaces + % + % Get args without leading/trailing spaces. + \def\printedrefname{\ignorespaces #3}% + \setbox\printedrefnamebox = \hbox{\printedrefname\unskip}% + % + \def\infofilename{\ignorespaces #4}% + \setbox\infofilenamebox = \hbox{\infofilename\unskip}% + % + \def\printedmanual{\ignorespaces #5}% + \setbox\printedmanualbox = \hbox{\printedmanual\unskip}% + % + % If the printed reference name (arg #3) was not explicitly given in + % the @xref, figure out what we want to use. + \ifdim \wd\printedrefnamebox = 0pt + % No printed node name was explicitly given. + \expandafter\ifx\csname SETxref-automatic-section-title\endcsname \relax + % Not auto section-title: use node name inside the square brackets. + \def\printedrefname{\ignorespaces #1}% + \else + % Auto section-title: use chapter/section title inside + % the square brackets if we have it. + \ifdim \wd\printedmanualbox > 0pt + % It is in another manual, so we don't have it; use node name. + \def\printedrefname{\ignorespaces #1}% + \else + \ifhavexrefs + % We (should) know the real title if we have the xref values. + \def\printedrefname{\refx{#1-title}{}}% + \else + % Otherwise just copy the Info node name. + \def\printedrefname{\ignorespaces #1}% + \fi% + \fi + \fi + \fi + % + % Make link in pdf output. + \ifpdf + {\indexnofonts + \turnoffactive + \makevalueexpandable + % This expands tokens, so do it after making catcode changes, so _ + % etc. don't get their TeX definitions. This ignores all spaces in + % #4, including (wrongly) those in the middle of the filename. + \getfilename{#4}% + % + % This (wrongly) does not take account of leading or trailing + % spaces in #1, which should be ignored. + \edef\pdfxrefdest{#1}% + \ifx\pdfxrefdest\empty + \def\pdfxrefdest{Top}% no empty targets + \else + \txiescapepdf\pdfxrefdest % escape PDF special chars + \fi + % + \leavevmode + \startlink attr{/Border [0 0 0]}% + \ifnum\filenamelength>0 + goto file{\the\filename.pdf} name{\pdfxrefdest}% + \else + goto name{\pdfmkpgn{\pdfxrefdest}}% + \fi + }% + \setcolor{\linkcolor}% + \fi + % + % Float references are printed completely differently: "Figure 1.2" + % instead of "[somenode], p.3". We distinguish them by the + % LABEL-title being set to a magic string. + {% + % Have to otherify everything special to allow the \csname to + % include an _ in the xref name, etc. + \indexnofonts + \turnoffactive + \expandafter\global\expandafter\let\expandafter\Xthisreftitle + \csname XR#1-title\endcsname + }% + \iffloat\Xthisreftitle + % If the user specified the print name (third arg) to the ref, + % print it instead of our usual "Figure 1.2". + \ifdim\wd\printedrefnamebox = 0pt + \refx{#1-snt}{}% + \else + \printedrefname + \fi + % + % If the user also gave the printed manual name (fifth arg), append + % "in MANUALNAME". + \ifdim \wd\printedmanualbox > 0pt + \space \putwordin{} \cite{\printedmanual}% + \fi + \else + % node/anchor (non-float) references. + % + % If we use \unhbox to print the node names, TeX does not insert + % empty discretionaries after hyphens, which means that it will not + % find a line break at a hyphen in a node names. Since some manuals + % are best written with fairly long node names, containing hyphens, + % this is a loss. Therefore, we give the text of the node name + % again, so it is as if TeX is seeing it for the first time. + % + \ifdim \wd\printedmanualbox > 0pt + % Cross-manual reference with a printed manual name. + % + \crossmanualxref{\cite{\printedmanual\unskip}}% + % + \else\ifdim \wd\infofilenamebox > 0pt + % Cross-manual reference with only an info filename (arg 4), no + % printed manual name (arg 5). This is essentially the same as + % the case above; we output the filename, since we have nothing else. + % + \crossmanualxref{\code{\infofilename\unskip}}% + % + \else + % Reference within this manual. + % + % _ (for example) has to be the character _ for the purposes of the + % control sequence corresponding to the node, but it has to expand + % into the usual \leavevmode...\vrule stuff for purposes of + % printing. So we \turnoffactive for the \refx-snt, back on for the + % printing, back off for the \refx-pg. + {\turnoffactive + % Only output a following space if the -snt ref is nonempty; for + % @unnumbered and @anchor, it won't be. + \setbox2 = \hbox{\ignorespaces \refx{#1-snt}{}}% + \ifdim \wd2 > 0pt \refx{#1-snt}\space\fi + }% + % output the `[mynode]' via the macro below so it can be overridden. + \xrefprintnodename\printedrefname + % + % But we always want a comma and a space: + ,\space + % + % output the `page 3'. + \turnoffactive \putwordpage\tie\refx{#1-pg}{}% + \fi\fi + \fi + \endlink +\endgroup} + +% Output a cross-manual xref to #1. Used just above (twice). +% +% Only include the text "Section ``foo'' in" if the foo is neither +% missing or Top. Thus, @xref{,,,foo,The Foo Manual} outputs simply +% "see The Foo Manual", the idea being to refer to the whole manual. +% +% But, this being TeX, we can't easily compare our node name against the +% string "Top" while ignoring the possible spaces before and after in +% the input. By adding the arbitrary 7sp below, we make it much less +% likely that a real node name would have the same width as "Top" (e.g., +% in a monospaced font). Hopefully it will never happen in practice. +% +% For the same basic reason, we retypeset the "Top" at every +% reference, since the current font is indeterminate. +% +\def\crossmanualxref#1{% + \setbox\toprefbox = \hbox{Top\kern7sp}% + \setbox2 = \hbox{\ignorespaces \printedrefname \unskip \kern7sp}% + \ifdim \wd2 > 7sp % nonempty? + \ifdim \wd2 = \wd\toprefbox \else % same as Top? + \putwordSection{} ``\printedrefname'' \putwordin{}\space + \fi + \fi + #1% +} + +% This macro is called from \xrefX for the `[nodename]' part of xref +% output. It's a separate macro only so it can be changed more easily, +% since square brackets don't work well in some documents. Particularly +% one that Bob is working on :). +% +\def\xrefprintnodename#1{[#1]} + +% Things referred to by \setref. +% +\def\Ynothing{} +\def\Yomitfromtoc{} +\def\Ynumbered{% + \ifnum\secno=0 + \putwordChapter@tie \the\chapno + \else \ifnum\subsecno=0 + \putwordSection@tie \the\chapno.\the\secno + \else \ifnum\subsubsecno=0 + \putwordSection@tie \the\chapno.\the\secno.\the\subsecno + \else + \putwordSection@tie \the\chapno.\the\secno.\the\subsecno.\the\subsubsecno + \fi\fi\fi +} +\def\Yappendix{% + \ifnum\secno=0 + \putwordAppendix@tie @char\the\appendixno{}% + \else \ifnum\subsecno=0 + \putwordSection@tie @char\the\appendixno.\the\secno + \else \ifnum\subsubsecno=0 + \putwordSection@tie @char\the\appendixno.\the\secno.\the\subsecno + \else + \putwordSection@tie + @char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno + \fi\fi\fi +} + +% Define \refx{NAME}{SUFFIX} to reference a cross-reference string named NAME. +% If its value is nonempty, SUFFIX is output afterward. +% +\def\refx#1#2{% + {% + \indexnofonts + \otherbackslash + \expandafter\global\expandafter\let\expandafter\thisrefX + \csname XR#1\endcsname + }% + \ifx\thisrefX\relax + % If not defined, say something at least. + \angleleft un\-de\-fined\angleright + \iflinks + \ifhavexrefs + {\toks0 = {#1}% avoid expansion of possibly-complex value + \message{\linenumber Undefined cross reference `\the\toks0'.}}% + \else + \ifwarnedxrefs\else + \global\warnedxrefstrue + \message{Cross reference values unknown; you must run TeX again.}% + \fi + \fi + \fi + \else + % It's defined, so just use it. + \thisrefX + \fi + #2% Output the suffix in any case. +} + +% This is the macro invoked by entries in the aux file. Usually it's +% just a \def (we prepend XR to the control sequence name to avoid +% collisions). But if this is a float type, we have more work to do. +% +\def\xrdef#1#2{% + {% The node name might contain 8-bit characters, which in our current + % implementation are changed to commands like @'e. Don't let these + % mess up the control sequence name. + \indexnofonts + \turnoffactive + \xdef\safexrefname{#1}% + }% + % + \expandafter\gdef\csname XR\safexrefname\endcsname{#2}% remember this xref + % + % Was that xref control sequence that we just defined for a float? + \expandafter\iffloat\csname XR\safexrefname\endcsname + % it was a float, and we have the (safe) float type in \iffloattype. + \expandafter\let\expandafter\floatlist + \csname floatlist\iffloattype\endcsname + % + % Is this the first time we've seen this float type? + \expandafter\ifx\floatlist\relax + \toks0 = {\do}% yes, so just \do + \else + % had it before, so preserve previous elements in list. + \toks0 = \expandafter{\floatlist\do}% + \fi + % + % Remember this xref in the control sequence \floatlistFLOATTYPE, + % for later use in \listoffloats. + \expandafter\xdef\csname floatlist\iffloattype\endcsname{\the\toks0 + {\safexrefname}}% + \fi +} + +% Read the last existing aux file, if any. No error if none exists. +% +\def\tryauxfile{% + \openin 1 \jobname.aux + \ifeof 1 \else + \readdatafile{aux}% + \global\havexrefstrue + \fi + \closein 1 +} + +\def\setupdatafile{% + \catcode`\^^@=\other + \catcode`\^^A=\other + \catcode`\^^B=\other + \catcode`\^^C=\other + \catcode`\^^D=\other + \catcode`\^^E=\other + \catcode`\^^F=\other + \catcode`\^^G=\other + \catcode`\^^H=\other + \catcode`\^^K=\other + \catcode`\^^L=\other + \catcode`\^^N=\other + \catcode`\^^P=\other + \catcode`\^^Q=\other + \catcode`\^^R=\other + \catcode`\^^S=\other + \catcode`\^^T=\other + \catcode`\^^U=\other + \catcode`\^^V=\other + \catcode`\^^W=\other + \catcode`\^^X=\other + \catcode`\^^Z=\other + \catcode`\^^[=\other + \catcode`\^^\=\other + \catcode`\^^]=\other + \catcode`\^^^=\other + \catcode`\^^_=\other + % It was suggested to set the catcode of ^ to 7, which would allow ^^e4 etc. + % in xref tags, i.e., node names. But since ^^e4 notation isn't + % supported in the main text, it doesn't seem desirable. Furthermore, + % that is not enough: for node names that actually contain a ^ + % character, we would end up writing a line like this: 'xrdef {'hat + % b-title}{'hat b} and \xrdef does a \csname...\endcsname on the first + % argument, and \hat is not an expandable control sequence. It could + % all be worked out, but why? Either we support ^^ or we don't. + % + % The other change necessary for this was to define \auxhat: + % \def\auxhat{\def^{'hat }}% extra space so ok if followed by letter + % and then to call \auxhat in \setq. + % + \catcode`\^=\other + % + % Special characters. Should be turned off anyway, but... + \catcode`\~=\other + \catcode`\[=\other + \catcode`\]=\other + \catcode`\"=\other + \catcode`\_=\other + \catcode`\|=\other + \catcode`\<=\other + \catcode`\>=\other + \catcode`\$=\other + \catcode`\#=\other + \catcode`\&=\other + \catcode`\%=\other + \catcode`+=\other % avoid \+ for paranoia even though we've turned it off + % + % This is to support \ in node names and titles, since the \ + % characters end up in a \csname. It's easier than + % leaving it active and making its active definition an actual \ + % character. What I don't understand is why it works in the *value* + % of the xrdef. Seems like it should be a catcode12 \, and that + % should not typeset properly. But it works, so I'm moving on for + % now. --karl, 15jan04. + \catcode`\\=\other + % + % Make the characters 128-255 be printing characters. + {% + \count1=128 + \def\loop{% + \catcode\count1=\other + \advance\count1 by 1 + \ifnum \count1<256 \loop \fi + }% + }% + % + % @ is our escape character in .aux files, and we need braces. + \catcode`\{=1 + \catcode`\}=2 + \catcode`\@=0 +} + +\def\readdatafile#1{% +\begingroup + \setupdatafile + \input\jobname.#1 +\endgroup} + + +\message{insertions,} +% including footnotes. + +\newcount \footnoteno + +% The trailing space in the following definition for supereject is +% vital for proper filling; pages come out unaligned when you do a +% pagealignmacro call if that space before the closing brace is +% removed. (Generally, numeric constants should always be followed by a +% space to prevent strange expansion errors.) +\def\supereject{\par\penalty -20000\footnoteno =0 } + +% @footnotestyle is meaningful for Info output only. +\let\footnotestyle=\comment + +{\catcode `\@=11 +% +% Auto-number footnotes. Otherwise like plain. +\gdef\footnote{% + \let\indent=\ptexindent + \let\noindent=\ptexnoindent + \global\advance\footnoteno by \@ne + \edef\thisfootno{$^{\the\footnoteno}$}% + % + % In case the footnote comes at the end of a sentence, preserve the + % extra spacing after we do the footnote number. + \let\@sf\empty + \ifhmode\edef\@sf{\spacefactor\the\spacefactor}\ptexslash\fi + % + % Remove inadvertent blank space before typesetting the footnote number. + \unskip + \thisfootno\@sf + \dofootnote +}% + +% Don't bother with the trickery in plain.tex to not require the +% footnote text as a parameter. Our footnotes don't need to be so general. +% +% Oh yes, they do; otherwise, @ifset (and anything else that uses +% \parseargline) fails inside footnotes because the tokens are fixed when +% the footnote is read. --karl, 16nov96. +% +\gdef\dofootnote{% + \insert\footins\bgroup + % We want to typeset this text as a normal paragraph, even if the + % footnote reference occurs in (for example) a display environment. + % So reset some parameters. + \hsize=\pagewidth + \interlinepenalty\interfootnotelinepenalty + \splittopskip\ht\strutbox % top baseline for broken footnotes + \splitmaxdepth\dp\strutbox + \floatingpenalty\@MM + \leftskip\z@skip + \rightskip\z@skip + \spaceskip\z@skip + \xspaceskip\z@skip + \parindent\defaultparindent + % + \smallfonts \rm + % + % Because we use hanging indentation in footnotes, a @noindent appears + % to exdent this text, so make it be a no-op. makeinfo does not use + % hanging indentation so @noindent can still be needed within footnote + % text after an @example or the like (not that this is good style). + \let\noindent = \relax + % + % Hang the footnote text off the number. Use \everypar in case the + % footnote extends for more than one paragraph. + \everypar = {\hang}% + \textindent{\thisfootno}% + % + % Don't crash into the line above the footnote text. Since this + % expands into a box, it must come within the paragraph, lest it + % provide a place where TeX can split the footnote. + \footstrut + % + % Invoke rest of plain TeX footnote routine. + \futurelet\next\fo@t +} +}%end \catcode `\@=11 + +% In case a @footnote appears in a vbox, save the footnote text and create +% the real \insert just after the vbox finished. Otherwise, the insertion +% would be lost. +% Similarly, if a @footnote appears inside an alignment, save the footnote +% text to a box and make the \insert when a row of the table is finished. +% And the same can be done for other insert classes. --kasal, 16nov03. + +% Replace the \insert primitive by a cheating macro. +% Deeper inside, just make sure that the saved insertions are not spilled +% out prematurely. +% +\def\startsavinginserts{% + \ifx \insert\ptexinsert + \let\insert\saveinsert + \else + \let\checkinserts\relax + \fi +} + +% This \insert replacement works for both \insert\footins{foo} and +% \insert\footins\bgroup foo\egroup, but it doesn't work for \insert27{foo}. +% +\def\saveinsert#1{% + \edef\next{\noexpand\savetobox \makeSAVEname#1}% + \afterassignment\next + % swallow the left brace + \let\temp = +} +\def\makeSAVEname#1{\makecsname{SAVE\expandafter\gobble\string#1}} +\def\savetobox#1{\global\setbox#1 = \vbox\bgroup \unvbox#1} + +\def\checksaveins#1{\ifvoid#1\else \placesaveins#1\fi} + +\def\placesaveins#1{% + \ptexinsert \csname\expandafter\gobblesave\string#1\endcsname + {\box#1}% +} + +% eat @SAVE -- beware, all of them have catcode \other: +{ + \def\dospecials{\do S\do A\do V\do E} \uncatcodespecials % ;-) + \gdef\gobblesave @SAVE{} +} + +% initialization: +\def\newsaveins #1{% + \edef\next{\noexpand\newsaveinsX \makeSAVEname#1}% + \next +} +\def\newsaveinsX #1{% + \csname newbox\endcsname #1% + \expandafter\def\expandafter\checkinserts\expandafter{\checkinserts + \checksaveins #1}% +} + +% initialize: +\let\checkinserts\empty +\newsaveins\footins +\newsaveins\margin + + +% @image. We use the macros from epsf.tex to support this. +% If epsf.tex is not installed and @image is used, we complain. +% +% Check for and read epsf.tex up front. If we read it only at @image +% time, we might be inside a group, and then its definitions would get +% undone and the next image would fail. +\openin 1 = epsf.tex +\ifeof 1 \else + % Do not bother showing banner with epsf.tex v2.7k (available in + % doc/epsf.tex and on ctan). + \def\epsfannounce{\toks0 = }% + \input epsf.tex +\fi +\closein 1 +% +% We will only complain once about lack of epsf.tex. +\newif\ifwarnednoepsf +\newhelp\noepsfhelp{epsf.tex must be installed for images to + work. It is also included in the Texinfo distribution, or you can get + it from ftp://tug.org/tex/epsf.tex.} +% +\def\image#1{% + \ifx\epsfbox\thisisundefined + \ifwarnednoepsf \else + \errhelp = \noepsfhelp + \errmessage{epsf.tex not found, images will be ignored}% + \global\warnednoepsftrue + \fi + \else + \imagexxx #1,,,,,\finish + \fi +} +% +% Arguments to @image: +% #1 is (mandatory) image filename; we tack on .eps extension. +% #2 is (optional) width, #3 is (optional) height. +% #4 is (ignored optional) html alt text. +% #5 is (ignored optional) extension. +% #6 is just the usual extra ignored arg for parsing stuff. +\newif\ifimagevmode +\def\imagexxx#1,#2,#3,#4,#5,#6\finish{\begingroup + \catcode`\^^M = 5 % in case we're inside an example + \normalturnoffactive % allow _ et al. in names + % If the image is by itself, center it. + \ifvmode + \imagevmodetrue + \else \ifx\centersub\centerV + % for @center @image, we need a vbox so we can have our vertical space + \imagevmodetrue + \vbox\bgroup % vbox has better behavior than vtop herev + \fi\fi + % + \ifimagevmode + \nobreak\medskip + % Usually we'll have text after the image which will insert + % \parskip glue, so insert it here too to equalize the space + % above and below. + \nobreak\vskip\parskip + \nobreak + \fi + % + % Leave vertical mode so that indentation from an enclosing + % environment such as @quotation is respected. + % However, if we're at the top level, we don't want the + % normal paragraph indentation. + % On the other hand, if we are in the case of @center @image, we don't + % want to start a paragraph, which will create a hsize-width box and + % eradicate the centering. + \ifx\centersub\centerV\else \noindent \fi + % + % Output the image. + \ifpdf + \dopdfimage{#1}{#2}{#3}% + \else + % \epsfbox itself resets \epsf?size at each figure. + \setbox0 = \hbox{\ignorespaces #2}\ifdim\wd0 > 0pt \epsfxsize=#2\relax \fi + \setbox0 = \hbox{\ignorespaces #3}\ifdim\wd0 > 0pt \epsfysize=#3\relax \fi + \epsfbox{#1.eps}% + \fi + % + \ifimagevmode + \medskip % space after a standalone image + \fi + \ifx\centersub\centerV \egroup \fi +\endgroup} + + +% @float FLOATTYPE,LABEL,LOC ... @end float for displayed figures, tables, +% etc. We don't actually implement floating yet, we always include the +% float "here". But it seemed the best name for the future. +% +\envparseargdef\float{\eatcommaspace\eatcommaspace\dofloat#1, , ,\finish} + +% There may be a space before second and/or third parameter; delete it. +\def\eatcommaspace#1, {#1,} + +% #1 is the optional FLOATTYPE, the text label for this float, typically +% "Figure", "Table", "Example", etc. Can't contain commas. If omitted, +% this float will not be numbered and cannot be referred to. +% +% #2 is the optional xref label. Also must be present for the float to +% be referable. +% +% #3 is the optional positioning argument; for now, it is ignored. It +% will somehow specify the positions allowed to float to (here, top, bottom). +% +% We keep a separate counter for each FLOATTYPE, which we reset at each +% chapter-level command. +\let\resetallfloatnos=\empty +% +\def\dofloat#1,#2,#3,#4\finish{% + \let\thiscaption=\empty + \let\thisshortcaption=\empty + % + % don't lose footnotes inside @float. + % + % BEWARE: when the floats start float, we have to issue warning whenever an + % insert appears inside a float which could possibly float. --kasal, 26may04 + % + \startsavinginserts + % + % We can't be used inside a paragraph. + \par + % + \vtop\bgroup + \def\floattype{#1}% + \def\floatlabel{#2}% + \def\floatloc{#3}% we do nothing with this yet. + % + \ifx\floattype\empty + \let\safefloattype=\empty + \else + {% + % the floattype might have accents or other special characters, + % but we need to use it in a control sequence name. + \indexnofonts + \turnoffactive + \xdef\safefloattype{\floattype}% + }% + \fi + % + % If label is given but no type, we handle that as the empty type. + \ifx\floatlabel\empty \else + % We want each FLOATTYPE to be numbered separately (Figure 1, + % Table 1, Figure 2, ...). (And if no label, no number.) + % + \expandafter\getfloatno\csname\safefloattype floatno\endcsname + \global\advance\floatno by 1 + % + {% + % This magic value for \lastsection is output by \setref as the + % XREFLABEL-title value. \xrefX uses it to distinguish float + % labels (which have a completely different output format) from + % node and anchor labels. And \xrdef uses it to construct the + % lists of floats. + % + \edef\lastsection{\floatmagic=\safefloattype}% + \setref{\floatlabel}{Yfloat}% + }% + \fi + % + % start with \parskip glue, I guess. + \vskip\parskip + % + % Don't suppress indentation if a float happens to start a section. + \restorefirstparagraphindent +} + +% we have these possibilities: +% @float Foo,lbl & @caption{Cap}: Foo 1.1: Cap +% @float Foo,lbl & no caption: Foo 1.1 +% @float Foo & @caption{Cap}: Foo: Cap +% @float Foo & no caption: Foo +% @float ,lbl & Caption{Cap}: 1.1: Cap +% @float ,lbl & no caption: 1.1 +% @float & @caption{Cap}: Cap +% @float & no caption: +% +\def\Efloat{% + \let\floatident = \empty + % + % In all cases, if we have a float type, it comes first. + \ifx\floattype\empty \else \def\floatident{\floattype}\fi + % + % If we have an xref label, the number comes next. + \ifx\floatlabel\empty \else + \ifx\floattype\empty \else % if also had float type, need tie first. + \appendtomacro\floatident{\tie}% + \fi + % the number. + \appendtomacro\floatident{\chaplevelprefix\the\floatno}% + \fi + % + % Start the printed caption with what we've constructed in + % \floatident, but keep it separate; we need \floatident again. + \let\captionline = \floatident + % + \ifx\thiscaption\empty \else + \ifx\floatident\empty \else + \appendtomacro\captionline{: }% had ident, so need a colon between + \fi + % + % caption text. + \appendtomacro\captionline{\scanexp\thiscaption}% + \fi + % + % If we have anything to print, print it, with space before. + % Eventually this needs to become an \insert. + \ifx\captionline\empty \else + \vskip.5\parskip + \captionline + % + % Space below caption. + \vskip\parskip + \fi + % + % If have an xref label, write the list of floats info. Do this + % after the caption, to avoid chance of it being a breakpoint. + \ifx\floatlabel\empty \else + % Write the text that goes in the lof to the aux file as + % \floatlabel-lof. Besides \floatident, we include the short + % caption if specified, else the full caption if specified, else nothing. + {% + \atdummies + % + % since we read the caption text in the macro world, where ^^M + % is turned into a normal character, we have to scan it back, so + % we don't write the literal three characters "^^M" into the aux file. + \scanexp{% + \xdef\noexpand\gtemp{% + \ifx\thisshortcaption\empty + \thiscaption + \else + \thisshortcaption + \fi + }% + }% + \immediate\write\auxfile{@xrdef{\floatlabel-lof}{\floatident + \ifx\gtemp\empty \else : \gtemp \fi}}% + }% + \fi + \egroup % end of \vtop + % + % place the captured inserts + % + % BEWARE: when the floats start floating, we have to issue warning + % whenever an insert appears inside a float which could possibly + % float. --kasal, 26may04 + % + \checkinserts +} + +% Append the tokens #2 to the definition of macro #1, not expanding either. +% +\def\appendtomacro#1#2{% + \expandafter\def\expandafter#1\expandafter{#1#2}% +} + +% @caption, @shortcaption +% +\def\caption{\docaption\thiscaption} +\def\shortcaption{\docaption\thisshortcaption} +\def\docaption{\checkenv\float \bgroup\scanargctxt\defcaption} +\def\defcaption#1#2{\egroup \def#1{#2}} + +% The parameter is the control sequence identifying the counter we are +% going to use. Create it if it doesn't exist and assign it to \floatno. +\def\getfloatno#1{% + \ifx#1\relax + % Haven't seen this figure type before. + \csname newcount\endcsname #1% + % + % Remember to reset this floatno at the next chap. + \expandafter\gdef\expandafter\resetallfloatnos + \expandafter{\resetallfloatnos #1=0 }% + \fi + \let\floatno#1% +} + +% \setref calls this to get the XREFLABEL-snt value. We want an @xref +% to the FLOATLABEL to expand to "Figure 3.1". We call \setref when we +% first read the @float command. +% +\def\Yfloat{\floattype@tie \chaplevelprefix\the\floatno}% + +% Magic string used for the XREFLABEL-title value, so \xrefX can +% distinguish floats from other xref types. +\def\floatmagic{!!float!!} + +% #1 is the control sequence we are passed; we expand into a conditional +% which is true if #1 represents a float ref. That is, the magic +% \lastsection value which we \setref above. +% +\def\iffloat#1{\expandafter\doiffloat#1==\finish} +% +% #1 is (maybe) the \floatmagic string. If so, #2 will be the +% (safe) float type for this float. We set \iffloattype to #2. +% +\def\doiffloat#1=#2=#3\finish{% + \def\temp{#1}% + \def\iffloattype{#2}% + \ifx\temp\floatmagic +} + +% @listoffloats FLOATTYPE - print a list of floats like a table of contents. +% +\parseargdef\listoffloats{% + \def\floattype{#1}% floattype + {% + % the floattype might have accents or other special characters, + % but we need to use it in a control sequence name. + \indexnofonts + \turnoffactive + \xdef\safefloattype{\floattype}% + }% + % + % \xrdef saves the floats as a \do-list in \floatlistSAFEFLOATTYPE. + \expandafter\ifx\csname floatlist\safefloattype\endcsname \relax + \ifhavexrefs + % if the user said @listoffloats foo but never @float foo. + \message{\linenumber No `\safefloattype' floats to list.}% + \fi + \else + \begingroup + \leftskip=\tocindent % indent these entries like a toc + \let\do=\listoffloatsdo + \csname floatlist\safefloattype\endcsname + \endgroup + \fi +} + +% This is called on each entry in a list of floats. We're passed the +% xref label, in the form LABEL-title, which is how we save it in the +% aux file. We strip off the -title and look up \XRLABEL-lof, which +% has the text we're supposed to typeset here. +% +% Figures without xref labels will not be included in the list (since +% they won't appear in the aux file). +% +\def\listoffloatsdo#1{\listoffloatsdoentry#1\finish} +\def\listoffloatsdoentry#1-title\finish{{% + % Can't fully expand XR#1-lof because it can contain anything. Just + % pass the control sequence. On the other hand, XR#1-pg is just the + % page number, and we want to fully expand that so we can get a link + % in pdf output. + \toksA = \expandafter{\csname XR#1-lof\endcsname}% + % + % use the same \entry macro we use to generate the TOC and index. + \edef\writeentry{\noexpand\entry{\the\toksA}{\csname XR#1-pg\endcsname}}% + \writeentry +}} + + +\message{localization,} + +% For single-language documents, @documentlanguage is usually given very +% early, just after @documentencoding. Single argument is the language +% (de) or locale (de_DE) abbreviation. +% +{ + \catcode`\_ = \active + \globaldefs=1 +\parseargdef\documentlanguage{\begingroup + \let_=\normalunderscore % normal _ character for filenames + \tex % read txi-??.tex file in plain TeX. + % Read the file by the name they passed if it exists. + \openin 1 txi-#1.tex + \ifeof 1 + \documentlanguagetrywithoutunderscore{#1_\finish}% + \else + \globaldefs = 1 % everything in the txi-LL files needs to persist + \input txi-#1.tex + \fi + \closein 1 + \endgroup % end raw TeX +\endgroup} +% +% If they passed de_DE, and txi-de_DE.tex doesn't exist, +% try txi-de.tex. +% +\gdef\documentlanguagetrywithoutunderscore#1_#2\finish{% + \openin 1 txi-#1.tex + \ifeof 1 + \errhelp = \nolanghelp + \errmessage{Cannot read language file txi-#1.tex}% + \else + \globaldefs = 1 % everything in the txi-LL files needs to persist + \input txi-#1.tex + \fi + \closein 1 +} +}% end of special _ catcode +% +\newhelp\nolanghelp{The given language definition file cannot be found or +is empty. Maybe you need to install it? Putting it in the current +directory should work if nowhere else does.} + +% This macro is called from txi-??.tex files; the first argument is the +% \language name to set (without the "\lang@" prefix), the second and +% third args are \{left,right}hyphenmin. +% +% The language names to pass are determined when the format is built. +% See the etex.log file created at that time, e.g., +% /usr/local/texlive/2008/texmf-var/web2c/pdftex/etex.log. +% +% With TeX Live 2008, etex now includes hyphenation patterns for all +% available languages. This means we can support hyphenation in +% Texinfo, at least to some extent. (This still doesn't solve the +% accented characters problem.) +% +\catcode`@=11 +\def\txisetlanguage#1#2#3{% + % do not set the language if the name is undefined in the current TeX. + \expandafter\ifx\csname lang@#1\endcsname \relax + \message{no patterns for #1}% + \else + \global\language = \csname lang@#1\endcsname + \fi + % but there is no harm in adjusting the hyphenmin values regardless. + \global\lefthyphenmin = #2\relax + \global\righthyphenmin = #3\relax +} + +% Helpers for encodings. +% Set the catcode of characters 128 through 255 to the specified number. +% +\def\setnonasciicharscatcode#1{% + \count255=128 + \loop\ifnum\count255<256 + \global\catcode\count255=#1\relax + \advance\count255 by 1 + \repeat +} + +\def\setnonasciicharscatcodenonglobal#1{% + \count255=128 + \loop\ifnum\count255<256 + \catcode\count255=#1\relax + \advance\count255 by 1 + \repeat +} + +% @documentencoding sets the definition of non-ASCII characters +% according to the specified encoding. +% +\parseargdef\documentencoding{% + % Encoding being declared for the document. + \def\declaredencoding{\csname #1.enc\endcsname}% + % + % Supported encodings: names converted to tokens in order to be able + % to compare them with \ifx. + \def\ascii{\csname US-ASCII.enc\endcsname}% + \def\latnine{\csname ISO-8859-15.enc\endcsname}% + \def\latone{\csname ISO-8859-1.enc\endcsname}% + \def\lattwo{\csname ISO-8859-2.enc\endcsname}% + \def\utfeight{\csname UTF-8.enc\endcsname}% + % + \ifx \declaredencoding \ascii + \asciichardefs + % + \else \ifx \declaredencoding \lattwo + \setnonasciicharscatcode\active + \lattwochardefs + % + \else \ifx \declaredencoding \latone + \setnonasciicharscatcode\active + \latonechardefs + % + \else \ifx \declaredencoding \latnine + \setnonasciicharscatcode\active + \latninechardefs + % + \else \ifx \declaredencoding \utfeight + \setnonasciicharscatcode\active + \utfeightchardefs + % + \else + \message{Unknown document encoding #1, ignoring.}% + % + \fi % utfeight + \fi % latnine + \fi % latone + \fi % lattwo + \fi % ascii +} + +% A message to be logged when using a character that isn't available +% the default font encoding (OT1). +% +\def\missingcharmsg#1{\message{Character missing in OT1 encoding: #1.}} + +% Take account of \c (plain) vs. \, (Texinfo) difference. +\def\cedilla#1{\ifx\c\ptexc\c{#1}\else\,{#1}\fi} + +% First, make active non-ASCII characters in order for them to be +% correctly categorized when TeX reads the replacement text of +% macros containing the character definitions. +\setnonasciicharscatcode\active +% +% Latin1 (ISO-8859-1) character definitions. +\def\latonechardefs{% + \gdef^^a0{\tie} + \gdef^^a1{\exclamdown} + \gdef^^a2{\missingcharmsg{CENT SIGN}} + \gdef^^a3{{\pounds}} + \gdef^^a4{\missingcharmsg{CURRENCY SIGN}} + \gdef^^a5{\missingcharmsg{YEN SIGN}} + \gdef^^a6{\missingcharmsg{BROKEN BAR}} + \gdef^^a7{\S} + \gdef^^a8{\"{}} + \gdef^^a9{\copyright} + \gdef^^aa{\ordf} + \gdef^^ab{\guillemetleft} + \gdef^^ac{$\lnot$} + \gdef^^ad{\-} + \gdef^^ae{\registeredsymbol} + \gdef^^af{\={}} + % + \gdef^^b0{\textdegree} + \gdef^^b1{$\pm$} + \gdef^^b2{$^2$} + \gdef^^b3{$^3$} + \gdef^^b4{\'{}} + \gdef^^b5{$\mu$} + \gdef^^b6{\P} + % + \gdef^^b7{$^.$} + \gdef^^b8{\cedilla\ } + \gdef^^b9{$^1$} + \gdef^^ba{\ordm} + % + \gdef^^bb{\guillemetright} + \gdef^^bc{$1\over4$} + \gdef^^bd{$1\over2$} + \gdef^^be{$3\over4$} + \gdef^^bf{\questiondown} + % + \gdef^^c0{\`A} + \gdef^^c1{\'A} + \gdef^^c2{\^A} + \gdef^^c3{\~A} + \gdef^^c4{\"A} + \gdef^^c5{\ringaccent A} + \gdef^^c6{\AE} + \gdef^^c7{\cedilla C} + \gdef^^c8{\`E} + \gdef^^c9{\'E} + \gdef^^ca{\^E} + \gdef^^cb{\"E} + \gdef^^cc{\`I} + \gdef^^cd{\'I} + \gdef^^ce{\^I} + \gdef^^cf{\"I} + % + \gdef^^d0{\DH} + \gdef^^d1{\~N} + \gdef^^d2{\`O} + \gdef^^d3{\'O} + \gdef^^d4{\^O} + \gdef^^d5{\~O} + \gdef^^d6{\"O} + \gdef^^d7{$\times$} + \gdef^^d8{\O} + \gdef^^d9{\`U} + \gdef^^da{\'U} + \gdef^^db{\^U} + \gdef^^dc{\"U} + \gdef^^dd{\'Y} + \gdef^^de{\TH} + \gdef^^df{\ss} + % + \gdef^^e0{\`a} + \gdef^^e1{\'a} + \gdef^^e2{\^a} + \gdef^^e3{\~a} + \gdef^^e4{\"a} + \gdef^^e5{\ringaccent a} + \gdef^^e6{\ae} + \gdef^^e7{\cedilla c} + \gdef^^e8{\`e} + \gdef^^e9{\'e} + \gdef^^ea{\^e} + \gdef^^eb{\"e} + \gdef^^ec{\`{\dotless i}} + \gdef^^ed{\'{\dotless i}} + \gdef^^ee{\^{\dotless i}} + \gdef^^ef{\"{\dotless i}} + % + \gdef^^f0{\dh} + \gdef^^f1{\~n} + \gdef^^f2{\`o} + \gdef^^f3{\'o} + \gdef^^f4{\^o} + \gdef^^f5{\~o} + \gdef^^f6{\"o} + \gdef^^f7{$\div$} + \gdef^^f8{\o} + \gdef^^f9{\`u} + \gdef^^fa{\'u} + \gdef^^fb{\^u} + \gdef^^fc{\"u} + \gdef^^fd{\'y} + \gdef^^fe{\th} + \gdef^^ff{\"y} +} + +% Latin9 (ISO-8859-15) encoding character definitions. +\def\latninechardefs{% + % Encoding is almost identical to Latin1. + \latonechardefs + % + \gdef^^a4{\euro} + \gdef^^a6{\v S} + \gdef^^a8{\v s} + \gdef^^b4{\v Z} + \gdef^^b8{\v z} + \gdef^^bc{\OE} + \gdef^^bd{\oe} + \gdef^^be{\"Y} +} + +% Latin2 (ISO-8859-2) character definitions. +\def\lattwochardefs{% + \gdef^^a0{\tie} + \gdef^^a1{\ogonek{A}} + \gdef^^a2{\u{}} + \gdef^^a3{\L} + \gdef^^a4{\missingcharmsg{CURRENCY SIGN}} + \gdef^^a5{\v L} + \gdef^^a6{\'S} + \gdef^^a7{\S} + \gdef^^a8{\"{}} + \gdef^^a9{\v S} + \gdef^^aa{\cedilla S} + \gdef^^ab{\v T} + \gdef^^ac{\'Z} + \gdef^^ad{\-} + \gdef^^ae{\v Z} + \gdef^^af{\dotaccent Z} + % + \gdef^^b0{\textdegree} + \gdef^^b1{\ogonek{a}} + \gdef^^b2{\ogonek{ }} + \gdef^^b3{\l} + \gdef^^b4{\'{}} + \gdef^^b5{\v l} + \gdef^^b6{\'s} + \gdef^^b7{\v{}} + \gdef^^b8{\cedilla\ } + \gdef^^b9{\v s} + \gdef^^ba{\cedilla s} + \gdef^^bb{\v t} + \gdef^^bc{\'z} + \gdef^^bd{\H{}} + \gdef^^be{\v z} + \gdef^^bf{\dotaccent z} + % + \gdef^^c0{\'R} + \gdef^^c1{\'A} + \gdef^^c2{\^A} + \gdef^^c3{\u A} + \gdef^^c4{\"A} + \gdef^^c5{\'L} + \gdef^^c6{\'C} + \gdef^^c7{\cedilla C} + \gdef^^c8{\v C} + \gdef^^c9{\'E} + \gdef^^ca{\ogonek{E}} + \gdef^^cb{\"E} + \gdef^^cc{\v E} + \gdef^^cd{\'I} + \gdef^^ce{\^I} + \gdef^^cf{\v D} + % + \gdef^^d0{\DH} + \gdef^^d1{\'N} + \gdef^^d2{\v N} + \gdef^^d3{\'O} + \gdef^^d4{\^O} + \gdef^^d5{\H O} + \gdef^^d6{\"O} + \gdef^^d7{$\times$} + \gdef^^d8{\v R} + \gdef^^d9{\ringaccent U} + \gdef^^da{\'U} + \gdef^^db{\H U} + \gdef^^dc{\"U} + \gdef^^dd{\'Y} + \gdef^^de{\cedilla T} + \gdef^^df{\ss} + % + \gdef^^e0{\'r} + \gdef^^e1{\'a} + \gdef^^e2{\^a} + \gdef^^e3{\u a} + \gdef^^e4{\"a} + \gdef^^e5{\'l} + \gdef^^e6{\'c} + \gdef^^e7{\cedilla c} + \gdef^^e8{\v c} + \gdef^^e9{\'e} + \gdef^^ea{\ogonek{e}} + \gdef^^eb{\"e} + \gdef^^ec{\v e} + \gdef^^ed{\'{\dotless{i}}} + \gdef^^ee{\^{\dotless{i}}} + \gdef^^ef{\v d} + % + \gdef^^f0{\dh} + \gdef^^f1{\'n} + \gdef^^f2{\v n} + \gdef^^f3{\'o} + \gdef^^f4{\^o} + \gdef^^f5{\H o} + \gdef^^f6{\"o} + \gdef^^f7{$\div$} + \gdef^^f8{\v r} + \gdef^^f9{\ringaccent u} + \gdef^^fa{\'u} + \gdef^^fb{\H u} + \gdef^^fc{\"u} + \gdef^^fd{\'y} + \gdef^^fe{\cedilla t} + \gdef^^ff{\dotaccent{}} +} + +% UTF-8 character definitions. +% +% This code to support UTF-8 is based on LaTeX's utf8.def, with some +% changes for Texinfo conventions. It is included here under the GPL by +% permission from Frank Mittelbach and the LaTeX team. +% +\newcount\countUTFx +\newcount\countUTFy +\newcount\countUTFz + +\gdef\UTFviiiTwoOctets#1#2{\expandafter + \UTFviiiDefined\csname u8:#1\string #2\endcsname} +% +\gdef\UTFviiiThreeOctets#1#2#3{\expandafter + \UTFviiiDefined\csname u8:#1\string #2\string #3\endcsname} +% +\gdef\UTFviiiFourOctets#1#2#3#4{\expandafter + \UTFviiiDefined\csname u8:#1\string #2\string #3\string #4\endcsname} + +\gdef\UTFviiiDefined#1{% + \ifx #1\relax + \message{\linenumber Unicode char \string #1 not defined for Texinfo}% + \else + \expandafter #1% + \fi +} + +\begingroup + \catcode`\~13 + \catcode`\"12 + + \def\UTFviiiLoop{% + \global\catcode\countUTFx\active + \uccode`\~\countUTFx + \uppercase\expandafter{\UTFviiiTmp}% + \advance\countUTFx by 1 + \ifnum\countUTFx < \countUTFy + \expandafter\UTFviiiLoop + \fi} + + \countUTFx = "C2 + \countUTFy = "E0 + \def\UTFviiiTmp{% + \xdef~{\noexpand\UTFviiiTwoOctets\string~}} + \UTFviiiLoop + + \countUTFx = "E0 + \countUTFy = "F0 + \def\UTFviiiTmp{% + \xdef~{\noexpand\UTFviiiThreeOctets\string~}} + \UTFviiiLoop + + \countUTFx = "F0 + \countUTFy = "F4 + \def\UTFviiiTmp{% + \xdef~{\noexpand\UTFviiiFourOctets\string~}} + \UTFviiiLoop +\endgroup + +\begingroup + \catcode`\"=12 + \catcode`\<=12 + \catcode`\.=12 + \catcode`\,=12 + \catcode`\;=12 + \catcode`\!=12 + \catcode`\~=13 + + \gdef\DeclareUnicodeCharacter#1#2{% + \countUTFz = "#1\relax + %\wlog{\space\space defining Unicode char U+#1 (decimal \the\countUTFz)}% + \begingroup + \parseXMLCharref + \def\UTFviiiTwoOctets##1##2{% + \csname u8:##1\string ##2\endcsname}% + \def\UTFviiiThreeOctets##1##2##3{% + \csname u8:##1\string ##2\string ##3\endcsname}% + \def\UTFviiiFourOctets##1##2##3##4{% + \csname u8:##1\string ##2\string ##3\string ##4\endcsname}% + \expandafter\expandafter\expandafter\expandafter + \expandafter\expandafter\expandafter + \gdef\UTFviiiTmp{#2}% + \endgroup} + + \gdef\parseXMLCharref{% + \ifnum\countUTFz < "A0\relax + \errhelp = \EMsimple + \errmessage{Cannot define Unicode char value < 00A0}% + \else\ifnum\countUTFz < "800\relax + \parseUTFviiiA,% + \parseUTFviiiB C\UTFviiiTwoOctets.,% + \else\ifnum\countUTFz < "10000\relax + \parseUTFviiiA;% + \parseUTFviiiA,% + \parseUTFviiiB E\UTFviiiThreeOctets.{,;}% + \else + \parseUTFviiiA;% + \parseUTFviiiA,% + \parseUTFviiiA!% + \parseUTFviiiB F\UTFviiiFourOctets.{!,;}% + \fi\fi\fi + } + + \gdef\parseUTFviiiA#1{% + \countUTFx = \countUTFz + \divide\countUTFz by 64 + \countUTFy = \countUTFz + \multiply\countUTFz by 64 + \advance\countUTFx by -\countUTFz + \advance\countUTFx by 128 + \uccode `#1\countUTFx + \countUTFz = \countUTFy} + + \gdef\parseUTFviiiB#1#2#3#4{% + \advance\countUTFz by "#10\relax + \uccode `#3\countUTFz + \uppercase{\gdef\UTFviiiTmp{#2#3#4}}} +\endgroup + +\def\utfeightchardefs{% + \DeclareUnicodeCharacter{00A0}{\tie} + \DeclareUnicodeCharacter{00A1}{\exclamdown} + \DeclareUnicodeCharacter{00A3}{\pounds} + \DeclareUnicodeCharacter{00A8}{\"{ }} + \DeclareUnicodeCharacter{00A9}{\copyright} + \DeclareUnicodeCharacter{00AA}{\ordf} + \DeclareUnicodeCharacter{00AB}{\guillemetleft} + \DeclareUnicodeCharacter{00AD}{\-} + \DeclareUnicodeCharacter{00AE}{\registeredsymbol} + \DeclareUnicodeCharacter{00AF}{\={ }} + + \DeclareUnicodeCharacter{00B0}{\ringaccent{ }} + \DeclareUnicodeCharacter{00B4}{\'{ }} + \DeclareUnicodeCharacter{00B8}{\cedilla{ }} + \DeclareUnicodeCharacter{00BA}{\ordm} + \DeclareUnicodeCharacter{00BB}{\guillemetright} + \DeclareUnicodeCharacter{00BF}{\questiondown} + + \DeclareUnicodeCharacter{00C0}{\`A} + \DeclareUnicodeCharacter{00C1}{\'A} + \DeclareUnicodeCharacter{00C2}{\^A} + \DeclareUnicodeCharacter{00C3}{\~A} + \DeclareUnicodeCharacter{00C4}{\"A} + \DeclareUnicodeCharacter{00C5}{\AA} + \DeclareUnicodeCharacter{00C6}{\AE} + \DeclareUnicodeCharacter{00C7}{\cedilla{C}} + \DeclareUnicodeCharacter{00C8}{\`E} + \DeclareUnicodeCharacter{00C9}{\'E} + \DeclareUnicodeCharacter{00CA}{\^E} + \DeclareUnicodeCharacter{00CB}{\"E} + \DeclareUnicodeCharacter{00CC}{\`I} + \DeclareUnicodeCharacter{00CD}{\'I} + \DeclareUnicodeCharacter{00CE}{\^I} + \DeclareUnicodeCharacter{00CF}{\"I} + + \DeclareUnicodeCharacter{00D0}{\DH} + \DeclareUnicodeCharacter{00D1}{\~N} + \DeclareUnicodeCharacter{00D2}{\`O} + \DeclareUnicodeCharacter{00D3}{\'O} + \DeclareUnicodeCharacter{00D4}{\^O} + \DeclareUnicodeCharacter{00D5}{\~O} + \DeclareUnicodeCharacter{00D6}{\"O} + \DeclareUnicodeCharacter{00D8}{\O} + \DeclareUnicodeCharacter{00D9}{\`U} + \DeclareUnicodeCharacter{00DA}{\'U} + \DeclareUnicodeCharacter{00DB}{\^U} + \DeclareUnicodeCharacter{00DC}{\"U} + \DeclareUnicodeCharacter{00DD}{\'Y} + \DeclareUnicodeCharacter{00DE}{\TH} + \DeclareUnicodeCharacter{00DF}{\ss} + + \DeclareUnicodeCharacter{00E0}{\`a} + \DeclareUnicodeCharacter{00E1}{\'a} + \DeclareUnicodeCharacter{00E2}{\^a} + \DeclareUnicodeCharacter{00E3}{\~a} + \DeclareUnicodeCharacter{00E4}{\"a} + \DeclareUnicodeCharacter{00E5}{\aa} + \DeclareUnicodeCharacter{00E6}{\ae} + \DeclareUnicodeCharacter{00E7}{\cedilla{c}} + \DeclareUnicodeCharacter{00E8}{\`e} + \DeclareUnicodeCharacter{00E9}{\'e} + \DeclareUnicodeCharacter{00EA}{\^e} + \DeclareUnicodeCharacter{00EB}{\"e} + \DeclareUnicodeCharacter{00EC}{\`{\dotless{i}}} + \DeclareUnicodeCharacter{00ED}{\'{\dotless{i}}} + \DeclareUnicodeCharacter{00EE}{\^{\dotless{i}}} + \DeclareUnicodeCharacter{00EF}{\"{\dotless{i}}} + + \DeclareUnicodeCharacter{00F0}{\dh} + \DeclareUnicodeCharacter{00F1}{\~n} + \DeclareUnicodeCharacter{00F2}{\`o} + \DeclareUnicodeCharacter{00F3}{\'o} + \DeclareUnicodeCharacter{00F4}{\^o} + \DeclareUnicodeCharacter{00F5}{\~o} + \DeclareUnicodeCharacter{00F6}{\"o} + \DeclareUnicodeCharacter{00F8}{\o} + \DeclareUnicodeCharacter{00F9}{\`u} + \DeclareUnicodeCharacter{00FA}{\'u} + \DeclareUnicodeCharacter{00FB}{\^u} + \DeclareUnicodeCharacter{00FC}{\"u} + \DeclareUnicodeCharacter{00FD}{\'y} + \DeclareUnicodeCharacter{00FE}{\th} + \DeclareUnicodeCharacter{00FF}{\"y} + + \DeclareUnicodeCharacter{0100}{\=A} + \DeclareUnicodeCharacter{0101}{\=a} + \DeclareUnicodeCharacter{0102}{\u{A}} + \DeclareUnicodeCharacter{0103}{\u{a}} + \DeclareUnicodeCharacter{0104}{\ogonek{A}} + \DeclareUnicodeCharacter{0105}{\ogonek{a}} + \DeclareUnicodeCharacter{0106}{\'C} + \DeclareUnicodeCharacter{0107}{\'c} + \DeclareUnicodeCharacter{0108}{\^C} + \DeclareUnicodeCharacter{0109}{\^c} + \DeclareUnicodeCharacter{0118}{\ogonek{E}} + \DeclareUnicodeCharacter{0119}{\ogonek{e}} + \DeclareUnicodeCharacter{010A}{\dotaccent{C}} + \DeclareUnicodeCharacter{010B}{\dotaccent{c}} + \DeclareUnicodeCharacter{010C}{\v{C}} + \DeclareUnicodeCharacter{010D}{\v{c}} + \DeclareUnicodeCharacter{010E}{\v{D}} + + \DeclareUnicodeCharacter{0112}{\=E} + \DeclareUnicodeCharacter{0113}{\=e} + \DeclareUnicodeCharacter{0114}{\u{E}} + \DeclareUnicodeCharacter{0115}{\u{e}} + \DeclareUnicodeCharacter{0116}{\dotaccent{E}} + \DeclareUnicodeCharacter{0117}{\dotaccent{e}} + \DeclareUnicodeCharacter{011A}{\v{E}} + \DeclareUnicodeCharacter{011B}{\v{e}} + \DeclareUnicodeCharacter{011C}{\^G} + \DeclareUnicodeCharacter{011D}{\^g} + \DeclareUnicodeCharacter{011E}{\u{G}} + \DeclareUnicodeCharacter{011F}{\u{g}} + + \DeclareUnicodeCharacter{0120}{\dotaccent{G}} + \DeclareUnicodeCharacter{0121}{\dotaccent{g}} + \DeclareUnicodeCharacter{0124}{\^H} + \DeclareUnicodeCharacter{0125}{\^h} + \DeclareUnicodeCharacter{0128}{\~I} + \DeclareUnicodeCharacter{0129}{\~{\dotless{i}}} + \DeclareUnicodeCharacter{012A}{\=I} + \DeclareUnicodeCharacter{012B}{\={\dotless{i}}} + \DeclareUnicodeCharacter{012C}{\u{I}} + \DeclareUnicodeCharacter{012D}{\u{\dotless{i}}} + + \DeclareUnicodeCharacter{0130}{\dotaccent{I}} + \DeclareUnicodeCharacter{0131}{\dotless{i}} + \DeclareUnicodeCharacter{0132}{IJ} + \DeclareUnicodeCharacter{0133}{ij} + \DeclareUnicodeCharacter{0134}{\^J} + \DeclareUnicodeCharacter{0135}{\^{\dotless{j}}} + \DeclareUnicodeCharacter{0139}{\'L} + \DeclareUnicodeCharacter{013A}{\'l} + + \DeclareUnicodeCharacter{0141}{\L} + \DeclareUnicodeCharacter{0142}{\l} + \DeclareUnicodeCharacter{0143}{\'N} + \DeclareUnicodeCharacter{0144}{\'n} + \DeclareUnicodeCharacter{0147}{\v{N}} + \DeclareUnicodeCharacter{0148}{\v{n}} + \DeclareUnicodeCharacter{014C}{\=O} + \DeclareUnicodeCharacter{014D}{\=o} + \DeclareUnicodeCharacter{014E}{\u{O}} + \DeclareUnicodeCharacter{014F}{\u{o}} + + \DeclareUnicodeCharacter{0150}{\H{O}} + \DeclareUnicodeCharacter{0151}{\H{o}} + \DeclareUnicodeCharacter{0152}{\OE} + \DeclareUnicodeCharacter{0153}{\oe} + \DeclareUnicodeCharacter{0154}{\'R} + \DeclareUnicodeCharacter{0155}{\'r} + \DeclareUnicodeCharacter{0158}{\v{R}} + \DeclareUnicodeCharacter{0159}{\v{r}} + \DeclareUnicodeCharacter{015A}{\'S} + \DeclareUnicodeCharacter{015B}{\'s} + \DeclareUnicodeCharacter{015C}{\^S} + \DeclareUnicodeCharacter{015D}{\^s} + \DeclareUnicodeCharacter{015E}{\cedilla{S}} + \DeclareUnicodeCharacter{015F}{\cedilla{s}} + + \DeclareUnicodeCharacter{0160}{\v{S}} + \DeclareUnicodeCharacter{0161}{\v{s}} + \DeclareUnicodeCharacter{0162}{\cedilla{t}} + \DeclareUnicodeCharacter{0163}{\cedilla{T}} + \DeclareUnicodeCharacter{0164}{\v{T}} + + \DeclareUnicodeCharacter{0168}{\~U} + \DeclareUnicodeCharacter{0169}{\~u} + \DeclareUnicodeCharacter{016A}{\=U} + \DeclareUnicodeCharacter{016B}{\=u} + \DeclareUnicodeCharacter{016C}{\u{U}} + \DeclareUnicodeCharacter{016D}{\u{u}} + \DeclareUnicodeCharacter{016E}{\ringaccent{U}} + \DeclareUnicodeCharacter{016F}{\ringaccent{u}} + + \DeclareUnicodeCharacter{0170}{\H{U}} + \DeclareUnicodeCharacter{0171}{\H{u}} + \DeclareUnicodeCharacter{0174}{\^W} + \DeclareUnicodeCharacter{0175}{\^w} + \DeclareUnicodeCharacter{0176}{\^Y} + \DeclareUnicodeCharacter{0177}{\^y} + \DeclareUnicodeCharacter{0178}{\"Y} + \DeclareUnicodeCharacter{0179}{\'Z} + \DeclareUnicodeCharacter{017A}{\'z} + \DeclareUnicodeCharacter{017B}{\dotaccent{Z}} + \DeclareUnicodeCharacter{017C}{\dotaccent{z}} + \DeclareUnicodeCharacter{017D}{\v{Z}} + \DeclareUnicodeCharacter{017E}{\v{z}} + + \DeclareUnicodeCharacter{01C4}{D\v{Z}} + \DeclareUnicodeCharacter{01C5}{D\v{z}} + \DeclareUnicodeCharacter{01C6}{d\v{z}} + \DeclareUnicodeCharacter{01C7}{LJ} + \DeclareUnicodeCharacter{01C8}{Lj} + \DeclareUnicodeCharacter{01C9}{lj} + \DeclareUnicodeCharacter{01CA}{NJ} + \DeclareUnicodeCharacter{01CB}{Nj} + \DeclareUnicodeCharacter{01CC}{nj} + \DeclareUnicodeCharacter{01CD}{\v{A}} + \DeclareUnicodeCharacter{01CE}{\v{a}} + \DeclareUnicodeCharacter{01CF}{\v{I}} + + \DeclareUnicodeCharacter{01D0}{\v{\dotless{i}}} + \DeclareUnicodeCharacter{01D1}{\v{O}} + \DeclareUnicodeCharacter{01D2}{\v{o}} + \DeclareUnicodeCharacter{01D3}{\v{U}} + \DeclareUnicodeCharacter{01D4}{\v{u}} + + \DeclareUnicodeCharacter{01E2}{\={\AE}} + \DeclareUnicodeCharacter{01E3}{\={\ae}} + \DeclareUnicodeCharacter{01E6}{\v{G}} + \DeclareUnicodeCharacter{01E7}{\v{g}} + \DeclareUnicodeCharacter{01E8}{\v{K}} + \DeclareUnicodeCharacter{01E9}{\v{k}} + + \DeclareUnicodeCharacter{01F0}{\v{\dotless{j}}} + \DeclareUnicodeCharacter{01F1}{DZ} + \DeclareUnicodeCharacter{01F2}{Dz} + \DeclareUnicodeCharacter{01F3}{dz} + \DeclareUnicodeCharacter{01F4}{\'G} + \DeclareUnicodeCharacter{01F5}{\'g} + \DeclareUnicodeCharacter{01F8}{\`N} + \DeclareUnicodeCharacter{01F9}{\`n} + \DeclareUnicodeCharacter{01FC}{\'{\AE}} + \DeclareUnicodeCharacter{01FD}{\'{\ae}} + \DeclareUnicodeCharacter{01FE}{\'{\O}} + \DeclareUnicodeCharacter{01FF}{\'{\o}} + + \DeclareUnicodeCharacter{021E}{\v{H}} + \DeclareUnicodeCharacter{021F}{\v{h}} + + \DeclareUnicodeCharacter{0226}{\dotaccent{A}} + \DeclareUnicodeCharacter{0227}{\dotaccent{a}} + \DeclareUnicodeCharacter{0228}{\cedilla{E}} + \DeclareUnicodeCharacter{0229}{\cedilla{e}} + \DeclareUnicodeCharacter{022E}{\dotaccent{O}} + \DeclareUnicodeCharacter{022F}{\dotaccent{o}} + + \DeclareUnicodeCharacter{0232}{\=Y} + \DeclareUnicodeCharacter{0233}{\=y} + \DeclareUnicodeCharacter{0237}{\dotless{j}} + + \DeclareUnicodeCharacter{02DB}{\ogonek{ }} + + \DeclareUnicodeCharacter{1E02}{\dotaccent{B}} + \DeclareUnicodeCharacter{1E03}{\dotaccent{b}} + \DeclareUnicodeCharacter{1E04}{\udotaccent{B}} + \DeclareUnicodeCharacter{1E05}{\udotaccent{b}} + \DeclareUnicodeCharacter{1E06}{\ubaraccent{B}} + \DeclareUnicodeCharacter{1E07}{\ubaraccent{b}} + \DeclareUnicodeCharacter{1E0A}{\dotaccent{D}} + \DeclareUnicodeCharacter{1E0B}{\dotaccent{d}} + \DeclareUnicodeCharacter{1E0C}{\udotaccent{D}} + \DeclareUnicodeCharacter{1E0D}{\udotaccent{d}} + \DeclareUnicodeCharacter{1E0E}{\ubaraccent{D}} + \DeclareUnicodeCharacter{1E0F}{\ubaraccent{d}} + + \DeclareUnicodeCharacter{1E1E}{\dotaccent{F}} + \DeclareUnicodeCharacter{1E1F}{\dotaccent{f}} + + \DeclareUnicodeCharacter{1E20}{\=G} + \DeclareUnicodeCharacter{1E21}{\=g} + \DeclareUnicodeCharacter{1E22}{\dotaccent{H}} + \DeclareUnicodeCharacter{1E23}{\dotaccent{h}} + \DeclareUnicodeCharacter{1E24}{\udotaccent{H}} + \DeclareUnicodeCharacter{1E25}{\udotaccent{h}} + \DeclareUnicodeCharacter{1E26}{\"H} + \DeclareUnicodeCharacter{1E27}{\"h} + + \DeclareUnicodeCharacter{1E30}{\'K} + \DeclareUnicodeCharacter{1E31}{\'k} + \DeclareUnicodeCharacter{1E32}{\udotaccent{K}} + \DeclareUnicodeCharacter{1E33}{\udotaccent{k}} + \DeclareUnicodeCharacter{1E34}{\ubaraccent{K}} + \DeclareUnicodeCharacter{1E35}{\ubaraccent{k}} + \DeclareUnicodeCharacter{1E36}{\udotaccent{L}} + \DeclareUnicodeCharacter{1E37}{\udotaccent{l}} + \DeclareUnicodeCharacter{1E3A}{\ubaraccent{L}} + \DeclareUnicodeCharacter{1E3B}{\ubaraccent{l}} + \DeclareUnicodeCharacter{1E3E}{\'M} + \DeclareUnicodeCharacter{1E3F}{\'m} + + \DeclareUnicodeCharacter{1E40}{\dotaccent{M}} + \DeclareUnicodeCharacter{1E41}{\dotaccent{m}} + \DeclareUnicodeCharacter{1E42}{\udotaccent{M}} + \DeclareUnicodeCharacter{1E43}{\udotaccent{m}} + \DeclareUnicodeCharacter{1E44}{\dotaccent{N}} + \DeclareUnicodeCharacter{1E45}{\dotaccent{n}} + \DeclareUnicodeCharacter{1E46}{\udotaccent{N}} + \DeclareUnicodeCharacter{1E47}{\udotaccent{n}} + \DeclareUnicodeCharacter{1E48}{\ubaraccent{N}} + \DeclareUnicodeCharacter{1E49}{\ubaraccent{n}} + + \DeclareUnicodeCharacter{1E54}{\'P} + \DeclareUnicodeCharacter{1E55}{\'p} + \DeclareUnicodeCharacter{1E56}{\dotaccent{P}} + \DeclareUnicodeCharacter{1E57}{\dotaccent{p}} + \DeclareUnicodeCharacter{1E58}{\dotaccent{R}} + \DeclareUnicodeCharacter{1E59}{\dotaccent{r}} + \DeclareUnicodeCharacter{1E5A}{\udotaccent{R}} + \DeclareUnicodeCharacter{1E5B}{\udotaccent{r}} + \DeclareUnicodeCharacter{1E5E}{\ubaraccent{R}} + \DeclareUnicodeCharacter{1E5F}{\ubaraccent{r}} + + \DeclareUnicodeCharacter{1E60}{\dotaccent{S}} + \DeclareUnicodeCharacter{1E61}{\dotaccent{s}} + \DeclareUnicodeCharacter{1E62}{\udotaccent{S}} + \DeclareUnicodeCharacter{1E63}{\udotaccent{s}} + \DeclareUnicodeCharacter{1E6A}{\dotaccent{T}} + \DeclareUnicodeCharacter{1E6B}{\dotaccent{t}} + \DeclareUnicodeCharacter{1E6C}{\udotaccent{T}} + \DeclareUnicodeCharacter{1E6D}{\udotaccent{t}} + \DeclareUnicodeCharacter{1E6E}{\ubaraccent{T}} + \DeclareUnicodeCharacter{1E6F}{\ubaraccent{t}} + + \DeclareUnicodeCharacter{1E7C}{\~V} + \DeclareUnicodeCharacter{1E7D}{\~v} + \DeclareUnicodeCharacter{1E7E}{\udotaccent{V}} + \DeclareUnicodeCharacter{1E7F}{\udotaccent{v}} + + \DeclareUnicodeCharacter{1E80}{\`W} + \DeclareUnicodeCharacter{1E81}{\`w} + \DeclareUnicodeCharacter{1E82}{\'W} + \DeclareUnicodeCharacter{1E83}{\'w} + \DeclareUnicodeCharacter{1E84}{\"W} + \DeclareUnicodeCharacter{1E85}{\"w} + \DeclareUnicodeCharacter{1E86}{\dotaccent{W}} + \DeclareUnicodeCharacter{1E87}{\dotaccent{w}} + \DeclareUnicodeCharacter{1E88}{\udotaccent{W}} + \DeclareUnicodeCharacter{1E89}{\udotaccent{w}} + \DeclareUnicodeCharacter{1E8A}{\dotaccent{X}} + \DeclareUnicodeCharacter{1E8B}{\dotaccent{x}} + \DeclareUnicodeCharacter{1E8C}{\"X} + \DeclareUnicodeCharacter{1E8D}{\"x} + \DeclareUnicodeCharacter{1E8E}{\dotaccent{Y}} + \DeclareUnicodeCharacter{1E8F}{\dotaccent{y}} + + \DeclareUnicodeCharacter{1E90}{\^Z} + \DeclareUnicodeCharacter{1E91}{\^z} + \DeclareUnicodeCharacter{1E92}{\udotaccent{Z}} + \DeclareUnicodeCharacter{1E93}{\udotaccent{z}} + \DeclareUnicodeCharacter{1E94}{\ubaraccent{Z}} + \DeclareUnicodeCharacter{1E95}{\ubaraccent{z}} + \DeclareUnicodeCharacter{1E96}{\ubaraccent{h}} + \DeclareUnicodeCharacter{1E97}{\"t} + \DeclareUnicodeCharacter{1E98}{\ringaccent{w}} + \DeclareUnicodeCharacter{1E99}{\ringaccent{y}} + + \DeclareUnicodeCharacter{1EA0}{\udotaccent{A}} + \DeclareUnicodeCharacter{1EA1}{\udotaccent{a}} + + \DeclareUnicodeCharacter{1EB8}{\udotaccent{E}} + \DeclareUnicodeCharacter{1EB9}{\udotaccent{e}} + \DeclareUnicodeCharacter{1EBC}{\~E} + \DeclareUnicodeCharacter{1EBD}{\~e} + + \DeclareUnicodeCharacter{1ECA}{\udotaccent{I}} + \DeclareUnicodeCharacter{1ECB}{\udotaccent{i}} + \DeclareUnicodeCharacter{1ECC}{\udotaccent{O}} + \DeclareUnicodeCharacter{1ECD}{\udotaccent{o}} + + \DeclareUnicodeCharacter{1EE4}{\udotaccent{U}} + \DeclareUnicodeCharacter{1EE5}{\udotaccent{u}} + + \DeclareUnicodeCharacter{1EF2}{\`Y} + \DeclareUnicodeCharacter{1EF3}{\`y} + \DeclareUnicodeCharacter{1EF4}{\udotaccent{Y}} + + \DeclareUnicodeCharacter{1EF8}{\~Y} + \DeclareUnicodeCharacter{1EF9}{\~y} + + \DeclareUnicodeCharacter{2013}{--} + \DeclareUnicodeCharacter{2014}{---} + \DeclareUnicodeCharacter{2018}{\quoteleft} + \DeclareUnicodeCharacter{2019}{\quoteright} + \DeclareUnicodeCharacter{201A}{\quotesinglbase} + \DeclareUnicodeCharacter{201C}{\quotedblleft} + \DeclareUnicodeCharacter{201D}{\quotedblright} + \DeclareUnicodeCharacter{201E}{\quotedblbase} + \DeclareUnicodeCharacter{2022}{\bullet} + \DeclareUnicodeCharacter{2026}{\dots} + \DeclareUnicodeCharacter{2039}{\guilsinglleft} + \DeclareUnicodeCharacter{203A}{\guilsinglright} + \DeclareUnicodeCharacter{20AC}{\euro} + + \DeclareUnicodeCharacter{2192}{\expansion} + \DeclareUnicodeCharacter{21D2}{\result} + + \DeclareUnicodeCharacter{2212}{\minus} + \DeclareUnicodeCharacter{2217}{\point} + \DeclareUnicodeCharacter{2261}{\equiv} +}% end of \utfeightchardefs + + +% US-ASCII character definitions. +\def\asciichardefs{% nothing need be done + \relax +} + +% Make non-ASCII characters printable again for compatibility with +% existing Texinfo documents that may use them, even without declaring a +% document encoding. +% +\setnonasciicharscatcode \other + + +\message{formatting,} + +\newdimen\defaultparindent \defaultparindent = 15pt + +\chapheadingskip = 15pt plus 4pt minus 2pt +\secheadingskip = 12pt plus 3pt minus 2pt +\subsecheadingskip = 9pt plus 2pt minus 2pt + +% Prevent underfull vbox error messages. +\vbadness = 10000 + +% Don't be very finicky about underfull hboxes, either. +\hbadness = 6666 + +% Following George Bush, get rid of widows and orphans. +\widowpenalty=10000 +\clubpenalty=10000 + +% Use TeX 3.0's \emergencystretch to help line breaking, but if we're +% using an old version of TeX, don't do anything. We want the amount of +% stretch added to depend on the line length, hence the dependence on +% \hsize. We call this whenever the paper size is set. +% +\def\setemergencystretch{% + \ifx\emergencystretch\thisisundefined + % Allow us to assign to \emergencystretch anyway. + \def\emergencystretch{\dimen0}% + \else + \emergencystretch = .15\hsize + \fi +} + +% Parameters in order: 1) textheight; 2) textwidth; +% 3) voffset; 4) hoffset; 5) binding offset; 6) topskip; +% 7) physical page height; 8) physical page width. +% +% We also call \setleading{\textleading}, so the caller should define +% \textleading. The caller should also set \parskip. +% +\def\internalpagesizes#1#2#3#4#5#6#7#8{% + \voffset = #3\relax + \topskip = #6\relax + \splittopskip = \topskip + % + \vsize = #1\relax + \advance\vsize by \topskip + \outervsize = \vsize + \advance\outervsize by 2\topandbottommargin + \pageheight = \vsize + % + \hsize = #2\relax + \outerhsize = \hsize + \advance\outerhsize by 0.5in + \pagewidth = \hsize + % + \normaloffset = #4\relax + \bindingoffset = #5\relax + % + \ifpdf + \pdfpageheight #7\relax + \pdfpagewidth #8\relax + % if we don't reset these, they will remain at "1 true in" of + % whatever layout pdftex was dumped with. + \pdfhorigin = 1 true in + \pdfvorigin = 1 true in + \fi + % + \setleading{\textleading} + % + \parindent = \defaultparindent + \setemergencystretch +} + +% @letterpaper (the default). +\def\letterpaper{{\globaldefs = 1 + \parskip = 3pt plus 2pt minus 1pt + \textleading = 13.2pt + % + % If page is nothing but text, make it come out even. + \internalpagesizes{607.2pt}{6in}% that's 46 lines + {\voffset}{.25in}% + {\bindingoffset}{36pt}% + {11in}{8.5in}% +}} + +% Use @smallbook to reset parameters for 7x9.25 trim size. +\def\smallbook{{\globaldefs = 1 + \parskip = 2pt plus 1pt + \textleading = 12pt + % + \internalpagesizes{7.5in}{5in}% + {-.2in}{0in}% + {\bindingoffset}{16pt}% + {9.25in}{7in}% + % + \lispnarrowing = 0.3in + \tolerance = 700 + \hfuzz = 1pt + \contentsrightmargin = 0pt + \defbodyindent = .5cm +}} + +% Use @smallerbook to reset parameters for 6x9 trim size. +% (Just testing, parameters still in flux.) +\def\smallerbook{{\globaldefs = 1 + \parskip = 1.5pt plus 1pt + \textleading = 12pt + % + \internalpagesizes{7.4in}{4.8in}% + {-.2in}{-.4in}% + {0pt}{14pt}% + {9in}{6in}% + % + \lispnarrowing = 0.25in + \tolerance = 700 + \hfuzz = 1pt + \contentsrightmargin = 0pt + \defbodyindent = .4cm +}} + +% Use @afourpaper to print on European A4 paper. +\def\afourpaper{{\globaldefs = 1 + \parskip = 3pt plus 2pt minus 1pt + \textleading = 13.2pt + % + % Double-side printing via postscript on Laserjet 4050 + % prints double-sided nicely when \bindingoffset=10mm and \hoffset=-6mm. + % To change the settings for a different printer or situation, adjust + % \normaloffset until the front-side and back-side texts align. Then + % do the same for \bindingoffset. You can set these for testing in + % your texinfo source file like this: + % @tex + % \global\normaloffset = -6mm + % \global\bindingoffset = 10mm + % @end tex + \internalpagesizes{673.2pt}{160mm}% that's 51 lines + {\voffset}{\hoffset}% + {\bindingoffset}{44pt}% + {297mm}{210mm}% + % + \tolerance = 700 + \hfuzz = 1pt + \contentsrightmargin = 0pt + \defbodyindent = 5mm +}} + +% Use @afivepaper to print on European A5 paper. +% From romildo@urano.iceb.ufop.br, 2 July 2000. +% He also recommends making @example and @lisp be small. +\def\afivepaper{{\globaldefs = 1 + \parskip = 2pt plus 1pt minus 0.1pt + \textleading = 12.5pt + % + \internalpagesizes{160mm}{120mm}% + {\voffset}{\hoffset}% + {\bindingoffset}{8pt}% + {210mm}{148mm}% + % + \lispnarrowing = 0.2in + \tolerance = 800 + \hfuzz = 1.2pt + \contentsrightmargin = 0pt + \defbodyindent = 2mm + \tableindent = 12mm +}} + +% A specific text layout, 24x15cm overall, intended for A4 paper. +\def\afourlatex{{\globaldefs = 1 + \afourpaper + \internalpagesizes{237mm}{150mm}% + {\voffset}{4.6mm}% + {\bindingoffset}{7mm}% + {297mm}{210mm}% + % + % Must explicitly reset to 0 because we call \afourpaper. + \globaldefs = 0 +}} + +% Use @afourwide to print on A4 paper in landscape format. +\def\afourwide{{\globaldefs = 1 + \afourpaper + \internalpagesizes{241mm}{165mm}% + {\voffset}{-2.95mm}% + {\bindingoffset}{7mm}% + {297mm}{210mm}% + \globaldefs = 0 +}} + +% @pagesizes TEXTHEIGHT[,TEXTWIDTH] +% Perhaps we should allow setting the margins, \topskip, \parskip, +% and/or leading, also. Or perhaps we should compute them somehow. +% +\parseargdef\pagesizes{\pagesizesyyy #1,,\finish} +\def\pagesizesyyy#1,#2,#3\finish{{% + \setbox0 = \hbox{\ignorespaces #2}\ifdim\wd0 > 0pt \hsize=#2\relax \fi + \globaldefs = 1 + % + \parskip = 3pt plus 2pt minus 1pt + \setleading{\textleading}% + % + \dimen0 = #1\relax + \advance\dimen0 by \voffset + % + \dimen2 = \hsize + \advance\dimen2 by \normaloffset + % + \internalpagesizes{#1}{\hsize}% + {\voffset}{\normaloffset}% + {\bindingoffset}{44pt}% + {\dimen0}{\dimen2}% +}} + +% Set default to letter. +% +\letterpaper + + +\message{and turning on texinfo input format.} + +\def^^L{\par} % remove \outer, so ^L can appear in an @comment + +% DEL is a comment character, in case @c does not suffice. +\catcode`\^^? = 14 + +% Define macros to output various characters with catcode for normal text. +\catcode`\"=\other \def\normaldoublequote{"} +\catcode`\$=\other \def\normaldollar{$}%$ font-lock fix +\catcode`\+=\other \def\normalplus{+} +\catcode`\<=\other \def\normalless{<} +\catcode`\>=\other \def\normalgreater{>} +\catcode`\^=\other \def\normalcaret{^} +\catcode`\_=\other \def\normalunderscore{_} +\catcode`\|=\other \def\normalverticalbar{|} +\catcode`\~=\other \def\normaltilde{~} + +% This macro is used to make a character print one way in \tt +% (where it can probably be output as-is), and another way in other fonts, +% where something hairier probably needs to be done. +% +% #1 is what to print if we are indeed using \tt; #2 is what to print +% otherwise. Since all the Computer Modern typewriter fonts have zero +% interword stretch (and shrink), and it is reasonable to expect all +% typewriter fonts to have this, we can check that font parameter. +% +\def\ifusingtt#1#2{\ifdim \fontdimen3\font=0pt #1\else #2\fi} + +% Same as above, but check for italic font. Actually this also catches +% non-italic slanted fonts since it is impossible to distinguish them from +% italic fonts. But since this is only used by $ and it uses \sl anyway +% this is not a problem. +\def\ifusingit#1#2{\ifdim \fontdimen1\font>0pt #1\else #2\fi} + +% Turn off all special characters except @ +% (and those which the user can use as if they were ordinary). +% Most of these we simply print from the \tt font, but for some, we can +% use math or other variants that look better in normal text. + +\catcode`\"=\active +\def\activedoublequote{{\tt\char34}} +\let"=\activedoublequote +\catcode`\~=\active +\def~{{\tt\char126}} +\chardef\hat=`\^ +\catcode`\^=\active +\def^{{\tt \hat}} + +\catcode`\_=\active +\def_{\ifusingtt\normalunderscore\_} +\let\realunder=_ +% Subroutine for the previous macro. +\def\_{\leavevmode \kern.07em \vbox{\hrule width.3em height.1ex}\kern .07em } + +\catcode`\|=\active +\def|{{\tt\char124}} +\chardef \less=`\< +\catcode`\<=\active +\def<{{\tt \less}} +\chardef \gtr=`\> +\catcode`\>=\active +\def>{{\tt \gtr}} +\catcode`\+=\active +\def+{{\tt \char 43}} +\catcode`\$=\active +\def${\ifusingit{{\sl\$}}\normaldollar}%$ font-lock fix + +% If a .fmt file is being used, characters that might appear in a file +% name cannot be active until we have parsed the command line. +% So turn them off again, and have \everyjob (or @setfilename) turn them on. +% \otherifyactive is called near the end of this file. +\def\otherifyactive{\catcode`+=\other \catcode`\_=\other} + +% Used sometimes to turn off (effectively) the active characters even after +% parsing them. +\def\turnoffactive{% + \normalturnoffactive + \otherbackslash +} + +\catcode`\@=0 + +% \backslashcurfont outputs one backslash character in current font, +% as in \char`\\. +\global\chardef\backslashcurfont=`\\ +\global\let\rawbackslashxx=\backslashcurfont % let existing .??s files work + +% \realbackslash is an actual character `\' with catcode other, and +% \doublebackslash is two of them (for the pdf outlines). +{\catcode`\\=\other @gdef@realbackslash{\} @gdef@doublebackslash{\\}} + +% In texinfo, backslash is an active character; it prints the backslash +% in fixed width font. +\catcode`\\=\active % @ for escape char from now on. + +% The story here is that in math mode, the \char of \backslashcurfont +% ends up printing the roman \ from the math symbol font (because \char +% in math mode uses the \mathcode, and plain.tex sets +% \mathcode`\\="026E). It seems better for @backslashchar{} to always +% print a typewriter backslash, hence we use an explicit \mathchar, +% which is the decimal equivalent of "715c (class 7, e.g., use \fam; +% ignored family value; char position "5C). We can't use " for the +% usual hex value because it has already been made active. +@def@normalbackslash{{@tt @ifmmode @mathchar29020 @else @backslashcurfont @fi}} +@let@backslashchar = @normalbackslash % @backslashchar{} is for user documents. + +% On startup, @fixbackslash assigns: +% @let \ = @normalbackslash +% \rawbackslash defines an active \ to do \backslashcurfont. +% \otherbackslash defines an active \ to be a literal `\' character with +% catcode other. We switch back and forth between these. +@gdef@rawbackslash{@let\=@backslashcurfont} +@gdef@otherbackslash{@let\=@realbackslash} + +% Same as @turnoffactive except outputs \ as {\tt\char`\\} instead of +% the literal character `\'. Also revert - to its normal character, in +% case the active - from code has slipped in. +% +{@catcode`- = @active + @gdef@normalturnoffactive{% + @let-=@normaldash + @let"=@normaldoublequote + @let$=@normaldollar %$ font-lock fix + @let+=@normalplus + @let<=@normalless + @let>=@normalgreater + @let\=@normalbackslash + @let^=@normalcaret + @let_=@normalunderscore + @let|=@normalverticalbar + @let~=@normaltilde + @markupsetuplqdefault + @markupsetuprqdefault + @unsepspaces + } +} + +% Make _ and + \other characters, temporarily. +% This is canceled by @fixbackslash. +@otherifyactive + +% If a .fmt file is being used, we don't want the `\input texinfo' to show up. +% That is what \eatinput is for; after that, the `\' should revert to printing +% a backslash. +% +@gdef@eatinput input texinfo{@fixbackslash} +@global@let\ = @eatinput + +% On the other hand, perhaps the file did not have a `\input texinfo'. Then +% the first `\' in the file would cause an error. This macro tries to fix +% that, assuming it is called before the first `\' could plausibly occur. +% Also turn back on active characters that might appear in the input +% file name, in case not using a pre-dumped format. +% +@gdef@fixbackslash{% + @ifx\@eatinput @let\ = @normalbackslash @fi + @catcode`+=@active + @catcode`@_=@active +} + +% Say @foo, not \foo, in error messages. +@escapechar = `@@ + +% These (along with & and #) are made active for url-breaking, so need +% active definitions as the normal characters. +@def@normaldot{.} +@def@normalquest{?} +@def@normalslash{/} + +% These look ok in all fonts, so just make them not special. +% @hashchar{} gets its own user-level command, because of #line. +@catcode`@& = @other @def@normalamp{&} +@catcode`@# = @other @def@normalhash{#} +@catcode`@% = @other @def@normalpercent{%} + +@let @hashchar = @normalhash + +@c Finally, make ` and ' active, so that txicodequoteundirected and +@c txicodequotebacktick work right in, e.g., @w{@code{`foo'}}. If we +@c don't make ` and ' active, @code will not get them as active chars. +@c Do this last of all since we use ` in the previous @catcode assignments. +@catcode`@'=@active +@catcode`@`=@active +@markupsetuplqdefault +@markupsetuprqdefault + +@c Local variables: +@c eval: (add-hook 'write-file-hooks 'time-stamp) +@c page-delimiter: "^\\\\message" +@c time-stamp-start: "def\\\\texinfoversion{" +@c time-stamp-format: "%:y-%02m-%02d.%02H" +@c time-stamp-end: "}" +@c End: + +@c vim:sw=2: + +@ignore + arch-tag: e1b36e32-c96e-4135-a41a-0b2efa2ea115 +@end ignore diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/threads.texi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/threads.texi Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,219 @@ +@node Multi-threaded FFTW, Distributed-memory FFTW with MPI, FFTW Reference, Top +@chapter Multi-threaded FFTW + +@cindex parallel transform +In this chapter we document the parallel FFTW routines for +shared-memory parallel hardware. These routines, which support +parallel one- and multi-dimensional transforms of both real and +complex data, are the easiest way to take advantage of multiple +processors with FFTW. They work just like the corresponding +uniprocessor transform routines, except that you have an extra +initialization routine to call, and there is a routine to set the +number of threads to employ. Any program that uses the uniprocessor +FFTW can therefore be trivially modified to use the multi-threaded +FFTW. + +A shared-memory machine is one in which all CPUs can directly access +the same main memory, and such machines are now common due to the +ubiquity of multi-core CPUs. FFTW's multi-threading support allows +you to utilize these additional CPUs transparently from a single +program. However, this does not necessarily translate into +performance gains---when multiple threads/CPUs are employed, there is +an overhead required for synchronization that may outweigh the +computatational parallelism. Therefore, you can only benefit from +threads if your problem is sufficiently large. +@cindex shared-memory +@cindex threads + +@menu +* Installation and Supported Hardware/Software:: +* Usage of Multi-threaded FFTW:: +* How Many Threads to Use?:: +* Thread safety:: +@end menu + +@c ------------------------------------------------------------ +@node Installation and Supported Hardware/Software, Usage of Multi-threaded FFTW, Multi-threaded FFTW, Multi-threaded FFTW +@section Installation and Supported Hardware/Software + +All of the FFTW threads code is located in the @code{threads} +subdirectory of the FFTW package. On Unix systems, the FFTW threads +libraries and header files can be automatically configured, compiled, +and installed along with the uniprocessor FFTW libraries simply by +including @code{--enable-threads} in the flags to the @code{configure} +script (@pxref{Installation on Unix}), or @code{--enable-openmp} to use +@uref{http://www.openmp.org,OpenMP} threads. +@fpindex configure + + +@cindex portability +@cindex OpenMP +The threads routines require your operating system to have some sort +of shared-memory threads support. Specifically, the FFTW threads +package works with POSIX threads (available on most Unix variants, +from GNU/Linux to MacOS X) and Win32 threads. OpenMP threads, which +are supported in many common compilers (e.g. gcc) are also supported, +and may give better performance on some systems. (OpenMP threads are +also useful if you are employing OpenMP in your own code, in order to +minimize conflicts between threading models.) If you have a +shared-memory machine that uses a different threads API, it should be +a simple matter of programming to include support for it; see the file +@code{threads/threads.c} for more detail. + +You can compile FFTW with @emph{both} @code{--enable-threads} and +@code{--enable-openmp} at the same time, since they install libraries +with different names (@samp{fftw3_threads} and @samp{fftw3_omp}, as +described below). However, your programs may only link to @emph{one} +of these two libraries at a time. + +Ideally, of course, you should also have multiple processors in order to +get any benefit from the threaded transforms. + +@c ------------------------------------------------------------ +@node Usage of Multi-threaded FFTW, How Many Threads to Use?, Installation and Supported Hardware/Software, Multi-threaded FFTW +@section Usage of Multi-threaded FFTW + +Here, it is assumed that the reader is already familiar with the usage +of the uniprocessor FFTW routines, described elsewhere in this manual. +We only describe what one has to change in order to use the +multi-threaded routines. + +@cindex OpenMP +First, programs using the parallel complex transforms should be linked +with @code{-lfftw3_threads -lfftw3 -lm} on Unix, or @code{-lfftw3_omp +-lfftw3 -lm} if you compiled with OpenMP. You will also need to link +with whatever library is responsible for threads on your system +(e.g. @code{-lpthread} on GNU/Linux) or include whatever compiler flag +enables OpenMP (e.g. @code{-fopenmp} with gcc). +@cindex linking on Unix + + +Second, before calling @emph{any} FFTW routines, you should call the +function: + +@example +int fftw_init_threads(void); +@end example +@findex fftw_init_threads + +This function, which need only be called once, performs any one-time +initialization required to use threads on your system. It returns zero +if there was some error (which should not happen under normal +circumstances) and a non-zero value otherwise. + +Third, before creating a plan that you want to parallelize, you should +call: + +@example +void fftw_plan_with_nthreads(int nthreads); +@end example +@findex fftw_plan_with_nthreads + +The @code{nthreads} argument indicates the number of threads you want +FFTW to use (or actually, the maximum number). All plans subsequently +created with any planner routine will use that many threads. You can +call @code{fftw_plan_with_nthreads}, create some plans, call +@code{fftw_plan_with_nthreads} again with a different argument, and +create some more plans for a new number of threads. Plans already created +before a call to @code{fftw_plan_with_nthreads} are unaffected. If you +pass an @code{nthreads} argument of @code{1} (the default), threads are +disabled for subsequent plans. + +@cindex OpenMP +With OpenMP, to configure FFTW to use all of the currently running +OpenMP threads (set by @code{omp_set_num_threads(nthreads)} or by the +@code{OMP_NUM_THREADS} environment variable), you can do: +@code{fftw_plan_with_nthreads(omp_get_max_threads())}. (The @samp{omp_} +OpenMP functions are declared via @code{#include }.) + +@cindex thread safety +Given a plan, you then execute it as usual with +@code{fftw_execute(plan)}, and the execution will use the number of +threads specified when the plan was created. When done, you destroy +it as usual with @code{fftw_destroy_plan}. As described in +@ref{Thread safety}, plan @emph{execution} is thread-safe, but plan +creation and destruction are @emph{not}: you should create/destroy +plans only from a single thread, but can safely execute multiple plans +in parallel. + +There is one additional routine: if you want to get rid of all memory +and other resources allocated internally by FFTW, you can call: + +@example +void fftw_cleanup_threads(void); +@end example +@findex fftw_cleanup_threads + +which is much like the @code{fftw_cleanup()} function except that it +also gets rid of threads-related data. You must @emph{not} execute any +previously created plans after calling this function. + +We should also mention one other restriction: if you save wisdom from a +program using the multi-threaded FFTW, that wisdom @emph{cannot be used} +by a program using only the single-threaded FFTW (i.e. not calling +@code{fftw_init_threads}). @xref{Words of Wisdom-Saving Plans}. + +@c ------------------------------------------------------------ +@node How Many Threads to Use?, Thread safety, Usage of Multi-threaded FFTW, Multi-threaded FFTW +@section How Many Threads to Use? + +@cindex number of threads +There is a fair amount of overhead involved in synchronizing threads, +so the optimal number of threads to use depends upon the size of the +transform as well as on the number of processors you have. + +As a general rule, you don't want to use more threads than you have +processors. (Using more threads will work, but there will be extra +overhead with no benefit.) In fact, if the problem size is too small, +you may want to use fewer threads than you have processors. + +You will have to experiment with your system to see what level of +parallelization is best for your problem size. Typically, the problem +will have to involve at least a few thousand data points before threads +become beneficial. If you plan with @code{FFTW_PATIENT}, it will +automatically disable threads for sizes that don't benefit from +parallelization. +@ctindex FFTW_PATIENT + +@c ------------------------------------------------------------ +@node Thread safety, , How Many Threads to Use?, Multi-threaded FFTW +@section Thread safety + +@cindex threads +@cindex OpenMP +@cindex thread safety +Users writing multi-threaded programs (including OpenMP) must concern +themselves with the @dfn{thread safety} of the libraries they +use---that is, whether it is safe to call routines in parallel from +multiple threads. FFTW can be used in such an environment, but some +care must be taken because the planner routines share data +(e.g. wisdom and trigonometric tables) between calls and plans. + +The upshot is that the only thread-safe (re-entrant) routine in FFTW is +@code{fftw_execute} (and the new-array variants thereof). All other routines +(e.g. the planner) should only be called from one thread at a time. So, +for example, you can wrap a semaphore lock around any calls to the +planner; even more simply, you can just create all of your plans from +one thread. We do not think this should be an important restriction +(FFTW is designed for the situation where the only performance-sensitive +code is the actual execution of the transform), and the benefits of +shared data between plans are great. + +Note also that, since the plan is not modified by @code{fftw_execute}, +it is safe to execute the @emph{same plan} in parallel by multiple +threads. However, since a given plan operates by default on a fixed +array, you need to use one of the new-array execute functions (@pxref{New-array Execute Functions}) so that different threads compute the transform of different data. + +(Users should note that these comments only apply to programs using +shared-memory threads or OpenMP. Parallelism using MPI or forked processes +involves a separate address-space and global variables for each process, +and is not susceptible to problems of this sort.) + +If you are configured FFTW with the @code{--enable-debug} or +@code{--enable-debug-malloc} flags (@pxref{Installation on Unix}), +then @code{fftw_execute} is not thread-safe. These flags are not +documented because they are intended only for developing +and debugging FFTW, but if you must use @code{--enable-debug} then you +should also specifically pass @code{--disable-debug-malloc} for +@code{fftw_execute} to be thread-safe. + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/tutorial.texi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/tutorial.texi Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,905 @@ +@node Tutorial, Other Important Topics, Introduction, Top +@chapter Tutorial +@menu +* Complex One-Dimensional DFTs:: +* Complex Multi-Dimensional DFTs:: +* One-Dimensional DFTs of Real Data:: +* Multi-Dimensional DFTs of Real Data:: +* More DFTs of Real Data:: +@end menu + +This chapter describes the basic usage of FFTW, i.e., how to compute +@cindex basic interface +the Fourier transform of a single array. This chapter tells the +truth, but not the @emph{whole} truth. Specifically, FFTW implements +additional routines and flags that are not documented here, although +in many cases we try to indicate where added capabilities exist. For +more complete information, see @ref{FFTW Reference}. (Note that you +need to compile and install FFTW before you can use it in a program. +For the details of the installation, see @ref{Installation and +Customization}.) + +We recommend that you read this tutorial in order.@footnote{You can +read the tutorial in bit-reversed order after computing your first +transform.} At the least, read the first section (@pxref{Complex +One-Dimensional DFTs}) before reading any of the others, even if your +main interest lies in one of the other transform types. + +Users of FFTW version 2 and earlier may also want to read @ref{Upgrading +from FFTW version 2}. + +@c ------------------------------------------------------------ +@node Complex One-Dimensional DFTs, Complex Multi-Dimensional DFTs, Tutorial, Tutorial +@section Complex One-Dimensional DFTs + +@quotation +Plan: To bother about the best method of accomplishing an accidental result. +[Ambrose Bierce, @cite{The Enlarged Devil's Dictionary}.] +@cindex Devil +@end quotation + +@iftex +@medskip +@end iftex + +The basic usage of FFTW to compute a one-dimensional DFT of size +@code{N} is simple, and it typically looks something like this code: + +@example +#include +... +@{ + fftw_complex *in, *out; + fftw_plan p; + ... + in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N); + out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N); + p = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE); + ... + fftw_execute(p); /* @r{repeat as needed} */ + ... + fftw_destroy_plan(p); + fftw_free(in); fftw_free(out); +@} +@end example + +You must link this code with the @code{fftw3} library. On Unix systems, +link with @code{-lfftw3 -lm}. + +The example code first allocates the input and output arrays. You can +allocate them in any way that you like, but we recommend using +@code{fftw_malloc}, which behaves like +@findex fftw_malloc +@code{malloc} except that it properly aligns the array when SIMD +instructions (such as SSE and Altivec) are available (@pxref{SIMD +alignment and fftw_malloc}). [Alternatively, we provide a convenient wrapper function @code{fftw_alloc_complex(N)} which has the same effect.] +@findex fftw_alloc_complex +@cindex SIMD + + +The data is an array of type @code{fftw_complex}, which is by default a +@code{double[2]} composed of the real (@code{in[i][0]}) and imaginary +(@code{in[i][1]}) parts of a complex number. +@tindex fftw_complex + +The next step is to create a @dfn{plan}, which is an object +@cindex plan +that contains all the data that FFTW needs to compute the FFT. +This function creates the plan: + +@example +fftw_plan fftw_plan_dft_1d(int n, fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); +@end example +@findex fftw_plan_dft_1d +@tindex fftw_plan + +The first argument, @code{n}, is the size of the transform you are +trying to compute. The size @code{n} can be any positive integer, but +sizes that are products of small factors are transformed most +efficiently (although prime sizes still use an @Onlogn{} algorithm). + +The next two arguments are pointers to the input and output arrays of +the transform. These pointers can be equal, indicating an +@dfn{in-place} transform. +@cindex in-place + + +The fourth argument, @code{sign}, can be either @code{FFTW_FORWARD} +(@code{-1}) or @code{FFTW_BACKWARD} (@code{+1}), +@ctindex FFTW_FORWARD +@ctindex FFTW_BACKWARD +and indicates the direction of the transform you are interested in; +technically, it is the sign of the exponent in the transform. + +The @code{flags} argument is usually either @code{FFTW_MEASURE} or +@cindex flags +@code{FFTW_ESTIMATE}. @code{FFTW_MEASURE} instructs FFTW to run +@ctindex FFTW_MEASURE +and measure the execution time of several FFTs in order to find the +best way to compute the transform of size @code{n}. This process takes +some time (usually a few seconds), depending on your machine and on +the size of the transform. @code{FFTW_ESTIMATE}, on the contrary, +does not run any computation and just builds a +@ctindex FFTW_ESTIMATE +reasonable plan that is probably sub-optimal. In short, if your +program performs many transforms of the same size and initialization +time is not important, use @code{FFTW_MEASURE}; otherwise use the +estimate. + +@emph{You must create the plan before initializing the input}, because +@code{FFTW_MEASURE} overwrites the @code{in}/@code{out} arrays. +(Technically, @code{FFTW_ESTIMATE} does not touch your arrays, but you +should always create plans first just to be sure.) + +Once the plan has been created, you can use it as many times as you +like for transforms on the specified @code{in}/@code{out} arrays, +computing the actual transforms via @code{fftw_execute(plan)}: +@example +void fftw_execute(const fftw_plan plan); +@end example +@findex fftw_execute + +The DFT results are stored in-order in the array @code{out}, with the +zero-frequency (DC) component in @code{out[0]}. +@cindex frequency +If @code{in != out}, the transform is @dfn{out-of-place} and the input +array @code{in} is not modified. Otherwise, the input array is +overwritten with the transform. + +@cindex execute +If you want to transform a @emph{different} array of the same size, you +can create a new plan with @code{fftw_plan_dft_1d} and FFTW +automatically reuses the information from the previous plan, if +possible. Alternatively, with the ``guru'' interface you can apply a +given plan to a different array, if you are careful. +@xref{FFTW Reference}. + +When you are done with the plan, you deallocate it by calling +@code{fftw_destroy_plan(plan)}: +@example +void fftw_destroy_plan(fftw_plan plan); +@end example +@findex fftw_destroy_plan +If you allocate an array with @code{fftw_malloc()} you must deallocate +it with @code{fftw_free()}. Do not use @code{free()} or, heaven +forbid, @code{delete}. +@findex fftw_free + +FFTW computes an @emph{unnormalized} DFT. Thus, computing a forward +followed by a backward transform (or vice versa) results in the original +array scaled by @code{n}. For the definition of the DFT, see @ref{What +FFTW Really Computes}. +@cindex DFT +@cindex normalization + + +If you have a C compiler, such as @code{gcc}, that supports the +C99 standard, and you @code{#include } @emph{before} +@code{}, then @code{fftw_complex} is the native +double-precision complex type and you can manipulate it with ordinary +arithmetic. Otherwise, FFTW defines its own complex type, which is +bit-compatible with the C99 complex type. @xref{Complex numbers}. +(The C++ @code{} template class may also be usable via a +typecast.) +@cindex C++ + +To use single or long-double precision versions of FFTW, replace the +@code{fftw_} prefix by @code{fftwf_} or @code{fftwl_} and link with +@code{-lfftw3f} or @code{-lfftw3l}, but use the @emph{same} +@code{} header file. +@cindex precision + + +Many more flags exist besides @code{FFTW_MEASURE} and +@code{FFTW_ESTIMATE}. For example, use @code{FFTW_PATIENT} if you're +willing to wait even longer for a possibly even faster plan (@pxref{FFTW +Reference}). +@ctindex FFTW_PATIENT +You can also save plans for future use, as described by @ref{Words of +Wisdom-Saving Plans}. + +@c ------------------------------------------------------------ +@node Complex Multi-Dimensional DFTs, One-Dimensional DFTs of Real Data, Complex One-Dimensional DFTs, Tutorial +@section Complex Multi-Dimensional DFTs + +Multi-dimensional transforms work much the same way as one-dimensional +transforms: you allocate arrays of @code{fftw_complex} (preferably +using @code{fftw_malloc}), create an @code{fftw_plan}, execute it as +many times as you want with @code{fftw_execute(plan)}, and clean up +with @code{fftw_destroy_plan(plan)} (and @code{fftw_free}). + +FFTW provides two routines for creating plans for 2d and 3d transforms, +and one routine for creating plans of arbitrary dimensionality. +The 2d and 3d routines have the following signature: +@example +fftw_plan fftw_plan_dft_2d(int n0, int n1, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); +fftw_plan fftw_plan_dft_3d(int n0, int n1, int n2, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); +@end example +@findex fftw_plan_dft_2d +@findex fftw_plan_dft_3d + +These routines create plans for @code{n0} by @code{n1} two-dimensional +(2d) transforms and @code{n0} by @code{n1} by @code{n2} 3d transforms, +respectively. All of these transforms operate on contiguous arrays in +the C-standard @dfn{row-major} order, so that the last dimension has the +fastest-varying index in the array. This layout is described further in +@ref{Multi-dimensional Array Format}. + +FFTW can also compute transforms of higher dimensionality. In order to +avoid confusion between the various meanings of the the word +``dimension'', we use the term @emph{rank} +@cindex rank +to denote the number of independent indices in an array.@footnote{The +term ``rank'' is commonly used in the APL, FORTRAN, and Common Lisp +traditions, although it is not so common in the C@tie{}world.} For +example, we say that a 2d transform has rank@tie{}2, a 3d transform has +rank@tie{}3, and so on. You can plan transforms of arbitrary rank by +means of the following function: + +@example +fftw_plan fftw_plan_dft(int rank, const int *n, + fftw_complex *in, fftw_complex *out, + int sign, unsigned flags); +@end example +@findex fftw_plan_dft + +Here, @code{n} is a pointer to an array @code{n[rank]} denoting an +@code{n[0]} by @code{n[1]} by @dots{} by @code{n[rank-1]} transform. +Thus, for example, the call +@example +fftw_plan_dft_2d(n0, n1, in, out, sign, flags); +@end example +is equivalent to the following code fragment: +@example +int n[2]; +n[0] = n0; +n[1] = n1; +fftw_plan_dft(2, n, in, out, sign, flags); +@end example +@code{fftw_plan_dft} is not restricted to 2d and 3d transforms, +however, but it can plan transforms of arbitrary rank. + +You may have noticed that all the planner routines described so far +have overlapping functionality. For example, you can plan a 1d or 2d +transform by using @code{fftw_plan_dft} with a @code{rank} of @code{1} +or @code{2}, or even by calling @code{fftw_plan_dft_3d} with @code{n0} +and/or @code{n1} equal to @code{1} (with no loss in efficiency). This +pattern continues, and FFTW's planning routines in general form a +``partial order,'' sequences of +@cindex partial order +interfaces with strictly increasing generality but correspondingly +greater complexity. + +@code{fftw_plan_dft} is the most general complex-DFT routine that we +describe in this tutorial, but there are also the advanced and guru interfaces, +@cindex advanced interface +@cindex guru interface +which allow one to efficiently combine multiple/strided transforms +into a single FFTW plan, transform a subset of a larger +multi-dimensional array, and/or to handle more general complex-number +formats. For more information, see @ref{FFTW Reference}. + +@c ------------------------------------------------------------ +@node One-Dimensional DFTs of Real Data, Multi-Dimensional DFTs of Real Data, Complex Multi-Dimensional DFTs, Tutorial +@section One-Dimensional DFTs of Real Data + +In many practical applications, the input data @code{in[i]} are purely +real numbers, in which case the DFT output satisfies the ``Hermitian'' +@cindex Hermitian +redundancy: @code{out[i]} is the conjugate of @code{out[n-i]}. It is +possible to take advantage of these circumstances in order to achieve +roughly a factor of two improvement in both speed and memory usage. + +In exchange for these speed and space advantages, the user sacrifices +some of the simplicity of FFTW's complex transforms. First of all, the +input and output arrays are of @emph{different sizes and types}: the +input is @code{n} real numbers, while the output is @code{n/2+1} +complex numbers (the non-redundant outputs); this also requires slight +``padding'' of the input array for +@cindex padding +in-place transforms. Second, the inverse transform (complex to real) +has the side-effect of @emph{overwriting its input array}, by default. +Neither of these inconveniences should pose a serious problem for +users, but it is important to be aware of them. + +The routines to perform real-data transforms are almost the same as +those for complex transforms: you allocate arrays of @code{double} +and/or @code{fftw_complex} (preferably using @code{fftw_malloc} or +@code{fftw_alloc_complex}), create an @code{fftw_plan}, execute it as +many times as you want with @code{fftw_execute(plan)}, and clean up +with @code{fftw_destroy_plan(plan)} (and @code{fftw_free}). The only +differences are that the input (or output) is of type @code{double} +and there are new routines to create the plan. In one dimension: + +@example +fftw_plan fftw_plan_dft_r2c_1d(int n, double *in, fftw_complex *out, + unsigned flags); +fftw_plan fftw_plan_dft_c2r_1d(int n, fftw_complex *in, double *out, + unsigned flags); +@end example +@findex fftw_plan_dft_r2c_1d +@findex fftw_plan_dft_c2r_1d + +for the real input to complex-Hermitian output (@dfn{r2c}) and +complex-Hermitian input to real output (@dfn{c2r}) transforms. +@cindex r2c +@cindex c2r +Unlike the complex DFT planner, there is no @code{sign} argument. +Instead, r2c DFTs are always @code{FFTW_FORWARD} and c2r DFTs are +always @code{FFTW_BACKWARD}. +@ctindex FFTW_FORWARD +@ctindex FFTW_BACKWARD +(For single/long-double precision +@code{fftwf} and @code{fftwl}, @code{double} should be replaced by +@code{float} and @code{long double}, respectively.) +@cindex precision + + +Here, @code{n} is the ``logical'' size of the DFT, not necessarily the +physical size of the array. In particular, the real (@code{double}) +array has @code{n} elements, while the complex (@code{fftw_complex}) +array has @code{n/2+1} elements (where the division is rounded down). +For an in-place transform, +@cindex in-place +@code{in} and @code{out} are aliased to the same array, which must be +big enough to hold both; so, the real array would actually have +@code{2*(n/2+1)} elements, where the elements beyond the first +@code{n} are unused padding. (Note that this is very different from +the concept of ``zero-padding'' a transform to a larger length, which +changes the logical size of the DFT by actually adding new input +data.) The @math{k}th element of the complex array is exactly the +same as the @math{k}th element of the corresponding complex DFT. All +positive @code{n} are supported; products of small factors are most +efficient, but an @Onlogn algorithm is used even for prime sizes. + +As noted above, the c2r transform destroys its input array even for +out-of-place transforms. This can be prevented, if necessary, by +including @code{FFTW_PRESERVE_INPUT} in the @code{flags}, with +unfortunately some sacrifice in performance. +@cindex flags +@ctindex FFTW_PRESERVE_INPUT +This flag is also not currently supported for multi-dimensional real +DFTs (next section). + +Readers familiar with DFTs of real data will recall that the 0th (the +``DC'') and @code{n/2}-th (the ``Nyquist'' frequency, when @code{n} is +even) elements of the complex output are purely real. Some +implementations therefore store the Nyquist element where the DC +imaginary part would go, in order to make the input and output arrays +the same size. Such packing, however, does not generalize well to +multi-dimensional transforms, and the space savings are miniscule in +any case; FFTW does not support it. + +An alternative interface for one-dimensional r2c and c2r DFTs can be +found in the @samp{r2r} interface (@pxref{The Halfcomplex-format +DFT}), with ``halfcomplex''-format output that @emph{is} the same size +(and type) as the input array. +@cindex halfcomplex format +That interface, although it is not very useful for multi-dimensional +transforms, may sometimes yield better performance. + +@c ------------------------------------------------------------ +@node Multi-Dimensional DFTs of Real Data, More DFTs of Real Data, One-Dimensional DFTs of Real Data, Tutorial +@section Multi-Dimensional DFTs of Real Data + +Multi-dimensional DFTs of real data use the following planner routines: + +@example +fftw_plan fftw_plan_dft_r2c_2d(int n0, int n1, + double *in, fftw_complex *out, + unsigned flags); +fftw_plan fftw_plan_dft_r2c_3d(int n0, int n1, int n2, + double *in, fftw_complex *out, + unsigned flags); +fftw_plan fftw_plan_dft_r2c(int rank, const int *n, + double *in, fftw_complex *out, + unsigned flags); +@end example +@findex fftw_plan_dft_r2c_2d +@findex fftw_plan_dft_r2c_3d +@findex fftw_plan_dft_r2c + +as well as the corresponding @code{c2r} routines with the input/output +types swapped. These routines work similarly to their complex +analogues, except for the fact that here the complex output array is cut +roughly in half and the real array requires padding for in-place +transforms (as in 1d, above). + +As before, @code{n} is the logical size of the array, and the +consequences of this on the the format of the complex arrays deserve +careful attention. +@cindex r2c/c2r multi-dimensional array format +Suppose that the real data has dimensions @ndims (in row-major order). +Then, after an r2c transform, the output is an @ndimshalf array of +@code{fftw_complex} values in row-major order, corresponding to slightly +over half of the output of the corresponding complex DFT. (The division +is rounded down.) The ordering of the data is otherwise exactly the +same as in the complex-DFT case. + +For out-of-place transforms, this is the end of the story: the real +data is stored as a row-major array of size @ndims and the complex +data is stored as a row-major array of size @ndimshalf{}. + +For in-place transforms, however, extra padding of the real-data array +is necessary because the complex array is larger than the real array, +and the two arrays share the same memory locations. Thus, for +in-place transforms, the final dimension of the real-data array must +be padded with extra values to accommodate the size of the complex +data---two values if the last dimension is even and one if it is odd. +@cindex padding +That is, the last dimension of the real data must physically contain +@tex +$2 (n_{d-1}/2+1)$ +@end tex +@ifinfo +2 * (n[d-1]/2+1) +@end ifinfo +@html +2 * (nd-1/2+1) +@end html +@code{double} values (exactly enough to hold the complex data). +This physical array size does not, however, change the @emph{logical} +array size---only +@tex +$n_{d-1}$ +@end tex +@ifinfo +n[d-1] +@end ifinfo +@html +nd-1 +@end html +values are actually stored in the last dimension, and +@tex +$n_{d-1}$ +@end tex +@ifinfo +n[d-1] +@end ifinfo +@html +nd-1 +@end html +is the last dimension passed to the plan-creation routine. + +For example, consider the transform of a two-dimensional real array of +size @code{n0} by @code{n1}. The output of the r2c transform is a +two-dimensional complex array of size @code{n0} by @code{n1/2+1}, where +the @code{y} dimension has been cut nearly in half because of +redundancies in the output. Because @code{fftw_complex} is twice the +size of @code{double}, the output array is slightly bigger than the +input array. Thus, if we want to compute the transform in place, we +must @emph{pad} the input array so that it is of size @code{n0} by +@code{2*(n1/2+1)}. If @code{n1} is even, then there are two padding +elements at the end of each row (which need not be initialized, as they +are only used for output). + +@ifhtml +The following illustration depicts the input and output arrays just +described, for both the out-of-place and in-place transforms (with the +arrows indicating consecutive memory locations): +@image{rfftwnd-for-html} +@end ifhtml +@ifnotinfo +@ifnothtml +@float Figure,fig:rfftwnd +@center @image{rfftwnd} +@caption{Illustration of the data layout for a 2d @code{nx} by @code{ny} +real-to-complex transform.} +@end float +@ref{fig:rfftwnd} depicts the input and output arrays just +described, for both the out-of-place and in-place transforms (with the +arrows indicating consecutive memory locations): +@end ifnothtml +@end ifnotinfo + +These transforms are unnormalized, so an r2c followed by a c2r +transform (or vice versa) will result in the original data scaled by +the number of real data elements---that is, the product of the +(logical) dimensions of the real data. +@cindex normalization + + +(Because the last dimension is treated specially, if it is equal to +@code{1} the transform is @emph{not} equivalent to a lower-dimensional +r2c/c2r transform. In that case, the last complex dimension also has +size @code{1} (@code{=1/2+1}), and no advantage is gained over the +complex transforms.) + +@c ------------------------------------------------------------ +@node More DFTs of Real Data, , Multi-Dimensional DFTs of Real Data, Tutorial +@section More DFTs of Real Data +@menu +* The Halfcomplex-format DFT:: +* Real even/odd DFTs (cosine/sine transforms):: +* The Discrete Hartley Transform:: +@end menu + +FFTW supports several other transform types via a unified @dfn{r2r} +(real-to-real) interface, +@cindex r2r +so called because it takes a real (@code{double}) array and outputs a +real array of the same size. These r2r transforms currently fall into +three categories: DFTs of real input and complex-Hermitian output in +halfcomplex format, DFTs of real input with even/odd symmetry +(a.k.a. discrete cosine/sine transforms, DCTs/DSTs), and discrete +Hartley transforms (DHTs), all described in more detail by the +following sections. + +The r2r transforms follow the by now familiar interface of creating an +@code{fftw_plan}, executing it with @code{fftw_execute(plan)}, and +destroying it with @code{fftw_destroy_plan(plan)}. Furthermore, all +r2r transforms share the same planner interface: + +@example +fftw_plan fftw_plan_r2r_1d(int n, double *in, double *out, + fftw_r2r_kind kind, unsigned flags); +fftw_plan fftw_plan_r2r_2d(int n0, int n1, double *in, double *out, + fftw_r2r_kind kind0, fftw_r2r_kind kind1, + unsigned flags); +fftw_plan fftw_plan_r2r_3d(int n0, int n1, int n2, + double *in, double *out, + fftw_r2r_kind kind0, + fftw_r2r_kind kind1, + fftw_r2r_kind kind2, + unsigned flags); +fftw_plan fftw_plan_r2r(int rank, const int *n, double *in, double *out, + const fftw_r2r_kind *kind, unsigned flags); +@end example +@findex fftw_plan_r2r_1d +@findex fftw_plan_r2r_2d +@findex fftw_plan_r2r_3d +@findex fftw_plan_r2r + +Just as for the complex DFT, these plan 1d/2d/3d/multi-dimensional +transforms for contiguous arrays in row-major order, transforming (real) +input to output of the same size, where @code{n} specifies the +@emph{physical} dimensions of the arrays. All positive @code{n} are +supported (with the exception of @code{n=1} for the @code{FFTW_REDFT00} +kind, noted in the real-even subsection below); products of small +factors are most efficient (factorizing @code{n-1} and @code{n+1} for +@code{FFTW_REDFT00} and @code{FFTW_RODFT00} kinds, described below), but +an @Onlogn algorithm is used even for prime sizes. + +Each dimension has a @dfn{kind} parameter, of type +@code{fftw_r2r_kind}, specifying the kind of r2r transform to be used +for that dimension. +@cindex kind (r2r) +@tindex fftw_r2r_kind +(In the case of @code{fftw_plan_r2r}, this is an array @code{kind[rank]} +where @code{kind[i]} is the transform kind for the dimension +@code{n[i]}.) The kind can be one of a set of predefined constants, +defined in the following subsections. + +In other words, FFTW computes the separable product of the specified +r2r transforms over each dimension, which can be used e.g. for partial +differential equations with mixed boundary conditions. (For some r2r +kinds, notably the halfcomplex DFT and the DHT, such a separable +product is somewhat problematic in more than one dimension, however, +as is described below.) + +In the current version of FFTW, all r2r transforms except for the +halfcomplex type are computed via pre- or post-processing of +halfcomplex transforms, and they are therefore not as fast as they +could be. Since most other general DCT/DST codes employ a similar +algorithm, however, FFTW's implementation should provide at least +competitive performance. + +@c =========> +@node The Halfcomplex-format DFT, Real even/odd DFTs (cosine/sine transforms), More DFTs of Real Data, More DFTs of Real Data +@subsection The Halfcomplex-format DFT + +An r2r kind of @code{FFTW_R2HC} (@dfn{r2hc}) corresponds to an r2c DFT +@ctindex FFTW_R2HC +@cindex r2c +@cindex r2hc +(@pxref{One-Dimensional DFTs of Real Data}) but with ``halfcomplex'' +format output, and may sometimes be faster and/or more convenient than +the latter. +@cindex halfcomplex format +The inverse @dfn{hc2r} transform is of kind @code{FFTW_HC2R}. +@ctindex FFTW_HC2R +@cindex hc2r +This consists of the non-redundant half of the complex output for a 1d +real-input DFT of size @code{n}, stored as a sequence of @code{n} real +numbers (@code{double}) in the format: + +@tex +$$ +r_0, r_1, r_2, \ldots, r_{n/2}, i_{(n+1)/2-1}, \ldots, i_2, i_1 +$$ +@end tex +@ifinfo +r0, r1, r2, r(n/2), i((n+1)/2-1), ..., i2, i1 +@end ifinfo +@html +

    +r0, r1, r2, ..., rn/2, i(n+1)/2-1, ..., i2, i1 +

    +@end html + +Here, +@ifinfo +rk +@end ifinfo +@tex +$r_k$ +@end tex +@html +rk +@end html +is the real part of the @math{k}th output, and +@ifinfo +ik +@end ifinfo +@tex +$i_k$ +@end tex +@html +ik +@end html +is the imaginary part. (Division by 2 is rounded down.) For a +halfcomplex array @code{hc[n]}, the @math{k}th component thus has its +real part in @code{hc[k]} and its imaginary part in @code{hc[n-k]}, with +the exception of @code{k} @code{==} @code{0} or @code{n/2} (the latter +only if @code{n} is even)---in these two cases, the imaginary part is +zero due to symmetries of the real-input DFT, and is not stored. +Thus, the r2hc transform of @code{n} real values is a halfcomplex array of +length @code{n}, and vice versa for hc2r. +@cindex normalization + + +Aside from the differing format, the output of +@code{FFTW_R2HC}/@code{FFTW_HC2R} is otherwise exactly the same as for +the corresponding 1d r2c/c2r transform +(i.e. @code{FFTW_FORWARD}/@code{FFTW_BACKWARD} transforms, respectively). +Recall that these transforms are unnormalized, so r2hc followed by hc2r +will result in the original data multiplied by @code{n}. Furthermore, +like the c2r transform, an out-of-place hc2r transform will +@emph{destroy its input} array. + +Although these halfcomplex transforms can be used with the +multi-dimensional r2r interface, the interpretation of such a separable +product of transforms along each dimension is problematic. For example, +consider a two-dimensional @code{n0} by @code{n1}, r2hc by r2hc +transform planned by @code{fftw_plan_r2r_2d(n0, n1, in, out, FFTW_R2HC, +FFTW_R2HC, FFTW_MEASURE)}. Conceptually, FFTW first transforms the rows +(of size @code{n1}) to produce halfcomplex rows, and then transforms the +columns (of size @code{n0}). Half of these column transforms, however, +are of imaginary parts, and should therefore be multiplied by @math{i} +and combined with the r2hc transforms of the real columns to produce the +2d DFT amplitudes; FFTW's r2r transform does @emph{not} perform this +combination for you. Thus, if a multi-dimensional real-input/output DFT +is required, we recommend using the ordinary r2c/c2r +interface (@pxref{Multi-Dimensional DFTs of Real Data}). + +@c =========> +@node Real even/odd DFTs (cosine/sine transforms), The Discrete Hartley Transform, The Halfcomplex-format DFT, More DFTs of Real Data +@subsection Real even/odd DFTs (cosine/sine transforms) + +The Fourier transform of a real-even function @math{f(-x) = f(x)} is +real-even, and @math{i} times the Fourier transform of a real-odd +function @math{f(-x) = -f(x)} is real-odd. Similar results hold for a +discrete Fourier transform, and thus for these symmetries the need for +complex inputs/outputs is entirely eliminated. Moreover, one gains a +factor of two in speed/space from the fact that the data are real, and +an additional factor of two from the even/odd symmetry: only the +non-redundant (first) half of the array need be stored. The result is +the real-even DFT (@dfn{REDFT}) and the real-odd DFT (@dfn{RODFT}), also +known as the discrete cosine and sine transforms (@dfn{DCT} and +@dfn{DST}), respectively. +@cindex real-even DFT +@cindex REDFT +@cindex real-odd DFT +@cindex RODFT +@cindex discrete cosine transform +@cindex DCT +@cindex discrete sine transform +@cindex DST + + +(In this section, we describe the 1d transforms; multi-dimensional +transforms are just a separable product of these transforms operating +along each dimension.) + +Because of the discrete sampling, one has an additional choice: is the +data even/odd around a sampling point, or around the point halfway +between two samples? The latter corresponds to @emph{shifting} the +samples by @emph{half} an interval, and gives rise to several transform +variants denoted by REDFT@math{ab} and RODFT@math{ab}: @math{a} and +@math{b} are @math{0} or @math{1}, and indicate whether the input +(@math{a}) and/or output (@math{b}) are shifted by half a sample +(@math{1} means it is shifted). These are also known as types I-IV of +the DCT and DST, and all four types are supported by FFTW's r2r +interface.@footnote{There are also type V-VIII transforms, which +correspond to a logical DFT of @emph{odd} size @math{N}, independent of +whether the physical size @code{n} is odd, but we do not support these +variants.} + +The r2r kinds for the various REDFT and RODFT types supported by FFTW, +along with the boundary conditions at both ends of the @emph{input} +array (@code{n} real numbers @code{in[j=0..n-1]}), are: + +@itemize @bullet + +@item +@code{FFTW_REDFT00} (DCT-I): even around @math{j=0} and even around @math{j=n-1}. +@ctindex FFTW_REDFT00 + +@item +@code{FFTW_REDFT10} (DCT-II, ``the'' DCT): even around @math{j=-0.5} and even around @math{j=n-0.5}. +@ctindex FFTW_REDFT10 + +@item +@code{FFTW_REDFT01} (DCT-III, ``the'' IDCT): even around @math{j=0} and odd around @math{j=n}. +@ctindex FFTW_REDFT01 +@cindex IDCT + +@item +@code{FFTW_REDFT11} (DCT-IV): even around @math{j=-0.5} and odd around @math{j=n-0.5}. +@ctindex FFTW_REDFT11 + +@item +@code{FFTW_RODFT00} (DST-I): odd around @math{j=-1} and odd around @math{j=n}. +@ctindex FFTW_RODFT00 + +@item +@code{FFTW_RODFT10} (DST-II): odd around @math{j=-0.5} and odd around @math{j=n-0.5}. +@ctindex FFTW_RODFT10 + +@item +@code{FFTW_RODFT01} (DST-III): odd around @math{j=-1} and even around @math{j=n-1}. +@ctindex FFTW_RODFT01 + +@item +@code{FFTW_RODFT11} (DST-IV): odd around @math{j=-0.5} and even around @math{j=n-0.5}. +@ctindex FFTW_RODFT11 + +@end itemize + +Note that these symmetries apply to the ``logical'' array being +transformed; @strong{there are no constraints on your physical input +data}. So, for example, if you specify a size-5 REDFT00 (DCT-I) of the +data @math{abcde}, it corresponds to the DFT of the logical even array +@math{abcdedcb} of size 8. A size-4 REDFT10 (DCT-II) of the data +@math{abcd} corresponds to the size-8 logical DFT of the even array +@math{abcddcba}, shifted by half a sample. + +All of these transforms are invertible. The inverse of R*DFT00 is +R*DFT00; of R*DFT10 is R*DFT01 and vice versa (these are often called +simply ``the'' DCT and IDCT, respectively); and of R*DFT11 is R*DFT11. +However, the transforms computed by FFTW are unnormalized, exactly +like the corresponding real and complex DFTs, so computing a transform +followed by its inverse yields the original array scaled by @math{N}, +where @math{N} is the @emph{logical} DFT size. For REDFT00, +@math{N=2(n-1)}; for RODFT00, @math{N=2(n+1)}; otherwise, @math{N=2n}. +@cindex normalization +@cindex IDCT + + +Note that the boundary conditions of the transform output array are +given by the input boundary conditions of the inverse transform. +Thus, the above transforms are all inequivalent in terms of +input/output boundary conditions, even neglecting the 0.5 shift +difference. + +FFTW is most efficient when @math{N} is a product of small factors; note +that this @emph{differs} from the factorization of the physical size +@code{n} for REDFT00 and RODFT00! There is another oddity: @code{n=1} +REDFT00 transforms correspond to @math{N=0}, and so are @emph{not +defined} (the planner will return @code{NULL}). Otherwise, any positive +@code{n} is supported. + +For the precise mathematical definitions of these transforms as used by +FFTW, see @ref{What FFTW Really Computes}. (For people accustomed to +the DCT/DST, FFTW's definitions have a coefficient of @math{2} in front +of the cos/sin functions so that they correspond precisely to an +even/odd DFT of size @math{N}. Some authors also include additional +multiplicative factors of +@ifinfo +sqrt(2) +@end ifinfo +@html +√2 +@end html +@tex +$\sqrt{2}$ +@end tex +for selected inputs and outputs; this makes +the transform orthogonal, but sacrifices the direct equivalence to a +symmetric DFT.) + +@subsubheading Which type do you need? + +Since the required flavor of even/odd DFT depends upon your problem, +you are the best judge of this choice, but we can make a few comments +on relative efficiency to help you in your selection. In particular, +R*DFT01 and R*DFT10 tend to be slightly faster than R*DFT11 +(especially for odd sizes), while the R*DFT00 transforms are sometimes +significantly slower (especially for even sizes).@footnote{R*DFT00 is +sometimes slower in FFTW because we discovered that the standard +algorithm for computing this by a pre/post-processed real DFT---the +algorithm used in FFTPACK, Numerical Recipes, and other sources for +decades now---has serious numerical problems: it already loses several +decimal places of accuracy for 16k sizes. There seem to be only two +alternatives in the literature that do not suffer similarly: a +recursive decomposition into smaller DCTs, which would require a large +set of codelets for efficiency and generality, or sacrificing a factor of +@tex +$\sim 2$ +@end tex +@ifnottex +2 +@end ifnottex +in speed to use a real DFT of twice the size. We currently +employ the latter technique for general @math{n}, as well as a limited +form of the former method: a split-radix decomposition when @math{n} +is odd (@math{N} a multiple of 4). For @math{N} containing many +factors of 2, the split-radix method seems to recover most of the +speed of the standard algorithm without the accuracy tradeoff.} + +Thus, if only the boundary conditions on the transform inputs are +specified, we generally recommend R*DFT10 over R*DFT00 and R*DFT01 over +R*DFT11 (unless the half-sample shift or the self-inverse property is +significant for your problem). + +If performance is important to you and you are using only small sizes +(say @math{n<200}), e.g. for multi-dimensional transforms, then you +might consider generating hard-coded transforms of those sizes and types +that you are interested in (@pxref{Generating your own code}). + +We are interested in hearing what types of symmetric transforms you find +most useful. + +@c =========> +@node The Discrete Hartley Transform, , Real even/odd DFTs (cosine/sine transforms), More DFTs of Real Data +@subsection The Discrete Hartley Transform + +If you are planning to use the DHT because you've heard that it is +``faster'' than the DFT (FFT), @strong{stop here}. The DHT is not +faster than the DFT. That story is an old but enduring misconception +that was debunked in 1987. + +The discrete Hartley transform (DHT) is an invertible linear transform +closely related to the DFT. In the DFT, one multiplies each input by +@math{cos - i * sin} (a complex exponential), whereas in the DHT each +input is multiplied by simply @math{cos + sin}. Thus, the DHT +transforms @code{n} real numbers to @code{n} real numbers, and has the +convenient property of being its own inverse. In FFTW, a DHT (of any +positive @code{n}) can be specified by an r2r kind of @code{FFTW_DHT}. +@ctindex FFTW_DHT +@cindex discrete Hartley transform +@cindex DHT + +Like the DFT, in FFTW the DHT is unnormalized, so computing a DHT of +size @code{n} followed by another DHT of the same size will result in +the original array multiplied by @code{n}. +@cindex normalization + +The DHT was originally proposed as a more efficient alternative to the +DFT for real data, but it was subsequently shown that a specialized DFT +(such as FFTW's r2hc or r2c transforms) could be just as fast. In FFTW, +the DHT is actually computed by post-processing an r2hc transform, so +there is ordinarily no reason to prefer it from a performance +perspective.@footnote{We provide the DHT mainly as a byproduct of some +internal algorithms. FFTW computes a real input/output DFT of +@emph{prime} size by re-expressing it as a DHT plus post/pre-processing +and then using Rader's prime-DFT algorithm adapted to the DHT.} +However, we have heard rumors that the DHT might be the most appropriate +transform in its own right for certain applications, and we would be +very interested to hear from anyone who finds it useful. + +If @code{FFTW_DHT} is specified for multiple dimensions of a +multi-dimensional transform, FFTW computes the separable product of 1d +DHTs along each dimension. Unfortunately, this is not quite the same +thing as a true multi-dimensional DHT; you can compute the latter, if +necessary, with at most @code{rank-1} post-processing passes +[see e.g. H. Hao and R. N. Bracewell, @i{Proc. IEEE} @b{75}, 264--266 (1987)]. + +For the precise mathematical definition of the DHT as used by FFTW, see +@ref{What FFTW Really Computes}. + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/upgrading.texi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/upgrading.texi Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,198 @@ +@node Upgrading from FFTW version 2, Installation and Customization, Calling FFTW from Legacy Fortran, Top +@chapter Upgrading from FFTW version 2 + +In this chapter, we outline the process for updating codes designed for +the older FFTW 2 interface to work with FFTW 3. The interface for FFTW +3 is not backwards-compatible with the interface for FFTW 2 and earlier +versions; codes written to use those versions will fail to link with +FFTW 3. Nor is it possible to write ``compatibility wrappers'' to +bridge the gap (at least not efficiently), because FFTW 3 has different +semantics from previous versions. However, upgrading should be a +straightforward process because the data formats are identical and the +overall style of planning/execution is essentially the same. + +Unlike FFTW 2, there are no separate header files for real and complex +transforms (or even for different precisions) in FFTW 3; all interfaces +are defined in the @code{} header file. + +@heading Numeric Types + +The main difference in data types is that @code{fftw_complex} in FFTW 2 +was defined as a @code{struct} with macros @code{c_re} and @code{c_im} +for accessing the real/imaginary parts. (This is binary-compatible with +FFTW 3 on any machine except perhaps for some older Crays in single +precision.) The equivalent macros for FFTW 3 are: + +@example +#define c_re(c) ((c)[0]) +#define c_im(c) ((c)[1]) +@end example + +This does not work if you are using the C99 complex type, however, +unless you insert a @code{double*} typecast into the above macros +(@pxref{Complex numbers}). + +Also, FFTW 2 had an @code{fftw_real} typedef that was an alias for +@code{double} (in double precision). In FFTW 3 you should just use +@code{double} (or whatever precision you are employing). + +@heading Plans + +The major difference between FFTW 2 and FFTW 3 is in the +planning/execution division of labor. In FFTW 2, plans were found for a +given transform size and type, and then could be applied to @emph{any} +arrays and for @emph{any} multiplicity/stride parameters. In FFTW 3, +you specify the particular arrays, stride parameters, etcetera when +creating the plan, and the plan is then executed for @emph{those} arrays +(unless the guru interface is used) and @emph{those} parameters +@emph{only}. (FFTW 2 had ``specific planner'' routines that planned for +a particular array and stride, but the plan could still be used for +other arrays and strides.) That is, much of the information that was +formerly specified at execution time is now specified at planning time. + +Like FFTW 2's specific planner routines, the FFTW 3 planner overwrites +the input/output arrays unless you use @code{FFTW_ESTIMATE}. + +FFTW 2 had separate data types @code{fftw_plan}, @code{fftwnd_plan}, +@code{rfftw_plan}, and @code{rfftwnd_plan} for complex and real one- and +multi-dimensional transforms, and each type had its own @samp{destroy} +function. In FFTW 3, all plans are of type @code{fftw_plan} and all are +destroyed by @code{fftw_destroy_plan(plan)}. + +Where you formerly used @code{fftw_create_plan} and @code{fftw_one} to +plan and compute a single 1d transform, you would now use +@code{fftw_plan_dft_1d} to plan the transform. If you used the generic +@code{fftw} function to execute the transform with multiplicity +(@code{howmany}) and stride parameters, you would now use the advanced +interface @code{fftw_plan_many_dft} to specify those parameters. The +plans are now executed with @code{fftw_execute(plan)}, which takes all +of its parameters (including the input/output arrays) from the plan. + +In-place transforms no longer interpret their output argument as scratch +space, nor is there an @code{FFTW_IN_PLACE} flag. You simply pass the +same pointer for both the input and output arguments. (Previously, the +output @code{ostride} and @code{odist} parameters were ignored for +in-place transforms; now, if they are specified via the advanced +interface, they are significant even in the in-place case, although they +should normally equal the corresponding input parameters.) + +The @code{FFTW_ESTIMATE} and @code{FFTW_MEASURE} flags have the same +meaning as before, although the planning time will differ. You may also +consider using @code{FFTW_PATIENT}, which is like @code{FFTW_MEASURE} +except that it takes more time in order to consider a wider variety of +algorithms. + +For multi-dimensional complex DFTs, instead of @code{fftwnd_create_plan} +(or @code{fftw2d_create_plan} or @code{fftw3d_create_plan}), followed by +@code{fftwnd_one}, you would use @code{fftw_plan_dft} (or +@code{fftw_plan_dft_2d} or @code{fftw_plan_dft_3d}). followed by +@code{fftw_execute}. If you used @code{fftwnd} to to specify strides +etcetera, you would instead specify these via @code{fftw_plan_many_dft}. + +The analogues to @code{rfftw_create_plan} and @code{rfftw_one} with +@code{FFTW_REAL_TO_COMPLEX} or @code{FFTW_COMPLEX_TO_REAL} directions +are @code{fftw_plan_r2r_1d} with kind @code{FFTW_R2HC} or +@code{FFTW_HC2R}, followed by @code{fftw_execute}. The stride etcetera +arguments of @code{rfftw} are now in @code{fftw_plan_many_r2r}. + +Instead of @code{rfftwnd_create_plan} (or @code{rfftw2d_create_plan} or +@code{rfftw3d_create_plan}) followed by +@code{rfftwnd_one_real_to_complex} or +@code{rfftwnd_one_complex_to_real}, you now use @code{fftw_plan_dft_r2c} +(or @code{fftw_plan_dft_r2c_2d} or @code{fftw_plan_dft_r2c_3d}) or +@code{fftw_plan_dft_c2r} (or @code{fftw_plan_dft_c2r_2d} or +@code{fftw_plan_dft_c2r_3d}), respectively, followed by +@code{fftw_execute}. As usual, the strides etcetera of +@code{rfftwnd_real_to_complex} or @code{rfftwnd_complex_to_real} are no +specified in the advanced planner routines, +@code{fftw_plan_many_dft_r2c} or @code{fftw_plan_many_dft_c2r}. + +@heading Wisdom + +In FFTW 2, you had to supply the @code{FFTW_USE_WISDOM} flag in order to +use wisdom; in FFTW 3, wisdom is always used. (You could simulate the +FFTW 2 wisdom-less behavior by calling @code{fftw_forget_wisdom} after +every planner call.) + +The FFTW 3 wisdom import/export routines are almost the same as before +(although the storage format is entirely different). There is one +significant difference, however. In FFTW 2, the import routines would +never read past the end of the wisdom, so you could store extra data +beyond the wisdom in the same file, for example. In FFTW 3, the +file-import routine may read up to a few hundred bytes past the end of +the wisdom, so you cannot store other data just beyond it.@footnote{We +do our own buffering because GNU libc I/O routines are horribly slow for +single-character I/O, apparently for thread-safety reasons (whether you +are using threads or not).} + +Wisdom has been enhanced by additional humility in FFTW 3: whereas FFTW +2 would re-use wisdom for a given transform size regardless of the +stride etc., in FFTW 3 wisdom is only used with the strides etc. for +which it was created. Unfortunately, this means FFTW 3 has to create +new plans from scratch more often than FFTW 2 (in FFTW 2, planning +e.g. one transform of size 1024 also created wisdom for all smaller +powers of 2, but this no longer occurs). + +FFTW 3 also has the new routine @code{fftw_import_system_wisdom} to +import wisdom from a standard system-wide location. + +@heading Memory allocation + +In FFTW 3, we recommend allocating your arrays with @code{fftw_malloc} +and deallocating them with @code{fftw_free}; this is not required, but +allows optimal performance when SIMD acceleration is used. (Those two +functions actually existed in FFTW 2, and worked the same way, but were +not documented.) + +In FFTW 2, there were @code{fftw_malloc_hook} and @code{fftw_free_hook} +functions that allowed the user to replace FFTW's memory-allocation +routines (e.g. to implement different error-handling, since by default +FFTW prints an error message and calls @code{exit} to abort the program +if @code{malloc} returns @code{NULL}). These hooks are not supported in +FFTW 3; those few users who require this functionality can just +directly modify the memory-allocation routines in FFTW (they are defined +in @code{kernel/alloc.c}). + +@heading Fortran interface + +In FFTW 2, the subroutine names were obtained by replacing @samp{fftw_} +with @samp{fftw_f77}; in FFTW 3, you replace @samp{fftw_} with +@samp{dfftw_} (or @samp{sfftw_} or @samp{lfftw_}, depending upon the +precision). + +In FFTW 3, we have begun recommending that you always declare the type +used to store plans as @code{integer*8}. (Too many people didn't notice +our instruction to switch from @code{integer} to @code{integer*8} for +64-bit machines.) + +In FFTW 3, we provide a @code{fftw3.f} ``header file'' to include in +your code (and which is officially installed on Unix systems). (In FFTW +2, we supplied a @code{fftw_f77.i} file, but it was not installed.) + +Otherwise, the C-Fortran interface relationship is much the same as it +was before (e.g. return values become initial parameters, and +multi-dimensional arrays are in column-major order). Unlike FFTW 2, we +do provide some support for wisdom import/export in Fortran +(@pxref{Wisdom of Fortran?}). + +@heading Threads + +Like FFTW 2, only the execution routines are thread-safe. All planner +routines, etcetera, should be called by only a single thread at a time +(@pxref{Thread safety}). @emph{Unlike} FFTW 2, there is no special +@code{FFTW_THREADSAFE} flag for the planner to allow a given plan to be +usable by multiple threads in parallel; this is now the case by default. + +The multi-threaded version of FFTW 2 required you to pass the number of +threads each time you execute the transform. The number of threads is +now stored in the plan, and is specified before the planner is called by +@code{fftw_plan_with_nthreads}. The threads initialization routine used +to be called @code{fftw_threads_init} and would return zero on success; +the new routine is called @code{fftw_init_threads} and returns zero on +failure. @xref{Multi-threaded FFTW}. + +There is no separate threads header file in FFTW 3; all the function +prototypes are in @code{}. However, you still have to link to +a separate library (@code{-lfftw3_threads -lfftw3 -lm} on Unix), as well as +to the threading library (e.g. POSIX threads on Unix). + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/doc/version.texi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/doc/version.texi Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,4 @@ +@set UPDATED 20 September 2013 +@set UPDATED-MONTH September 2013 +@set EDITION 3.3.4 +@set VERSION 3.3.4 diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/fftw.pc.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/fftw.pc.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,11 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: FFTW +Description: fast Fourier transform library +Version: @VERSION@ +Libs: -L${libdir} -lfftw3@PREC_SUFFIX@ @LIBQUADMATH@ +Libs.private: -lm +Cflags: -I${includedir} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,25 @@ +# this makefile requires GNU make. + +EXTRA_DIST = algsimp.ml annotate.ml assoctable.ml c.ml complex.ml \ +conv.ml dag.ml expr.ml fft.ml gen_hc2c.ml gen_hc2cdft.ml \ +gen_hc2cdft_c.ml gen_hc2hc.ml gen_r2cb.ml gen_mdct.ml gen_notw.ml \ +gen_notw_c.ml gen_r2cf.ml gen_r2r.ml gen_twiddle.ml gen_twiddle_c.ml \ +gen_twidsq.ml gen_twidsq_c.ml genutil.ml littlesimp.ml magic.ml \ +monads.ml number.ml oracle.ml schedule.ml simd.ml simdmagic.ml \ +to_alist.ml trig.ml twiddle.ml unique.ml util.ml variable.ml \ +algsimp.mli annotate.mli assoctable.mli c.mli complex.mli conv.mli \ +dag.mli expr.mli fft.mli littlesimp.mli number.mli oracle.mli \ +schedule.mli simd.mli to_alist.mli trig.mli twiddle.mli unique.mli \ +util.mli variable.mli + +GENFFT_NATIVE=gen_notw.native gen_notw_c.native gen_twiddle.native \ +gen_twiddle_c.native gen_twidsq.native gen_twidsq_c.native \ +gen_r2r.native gen_r2cf.native gen_r2cb.native gen_hc2c.native \ +gen_hc2cdft.native gen_hc2cdft_c.native gen_hc2hc.native \ +gen_mdct.native + +all-local:: + $(OCAMLBUILD) -classic-display -libs unix,nums $(GENFFT_NATIVE) + +maintainer-clean-local:: + $(OCAMLBUILD) -classic-display -clean diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,486 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# this makefile requires GNU make. +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = genfft +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +EXTRA_DIST = algsimp.ml annotate.ml assoctable.ml c.ml complex.ml \ +conv.ml dag.ml expr.ml fft.ml gen_hc2c.ml gen_hc2cdft.ml \ +gen_hc2cdft_c.ml gen_hc2hc.ml gen_r2cb.ml gen_mdct.ml gen_notw.ml \ +gen_notw_c.ml gen_r2cf.ml gen_r2r.ml gen_twiddle.ml gen_twiddle_c.ml \ +gen_twidsq.ml gen_twidsq_c.ml genutil.ml littlesimp.ml magic.ml \ +monads.ml number.ml oracle.ml schedule.ml simd.ml simdmagic.ml \ +to_alist.ml trig.ml twiddle.ml unique.ml util.ml variable.ml \ +algsimp.mli annotate.mli assoctable.mli c.mli complex.mli conv.mli \ +dag.mli expr.mli fft.mli littlesimp.mli number.mli oracle.mli \ +schedule.mli simd.mli to_alist.mli trig.mli twiddle.mli unique.mli \ +util.mli variable.mli + +GENFFT_NATIVE = gen_notw.native gen_notw_c.native gen_twiddle.native \ +gen_twiddle_c.native gen_twidsq.native gen_twidsq_c.native \ +gen_r2r.native gen_r2cf.native gen_r2cb.native gen_hc2c.native \ +gen_hc2cdft.native gen_hc2cdft_c.native gen_hc2hc.native \ +gen_mdct.native + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu genfft/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu genfft/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile all-local +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic \ + maintainer-clean-local + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: all all-am all-local check check-am clean clean-generic \ + clean-libtool cscopelist-am ctags-am distclean \ + distclean-generic distclean-libtool distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic maintainer-clean-local mostlyclean \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags-am uninstall uninstall-am + + +all-local:: + $(OCAMLBUILD) -classic-display -libs unix,nums $(GENFFT_NATIVE) + +maintainer-clean-local:: + $(OCAMLBUILD) -classic-display -clean + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/algsimp.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/algsimp.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,580 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + + +open Util +open Expr + +let node_insert x = Assoctable.insert Expr.hash x +let node_lookup x = Assoctable.lookup Expr.hash (==) x + +(************************************************************* + * Algebraic simplifier/elimination of common subexpressions + *************************************************************) +module AlgSimp : sig + val algsimp : expr list -> expr list +end = struct + + open Monads.StateMonad + open Monads.MemoMonad + open Assoctable + + let fetchSimp = + fetchState >>= fun (s, _) -> returnM s + let storeSimp s = + fetchState >>= (fun (_, c) -> storeState (s, c)) + let lookupSimpM key = + fetchSimp >>= fun table -> + returnM (node_lookup key table) + let insertSimpM key value = + fetchSimp >>= fun table -> + storeSimp (node_insert key value table) + + let subset a b = + List.for_all (fun x -> List.exists (fun y -> x == y) b) a + + let structurallyEqualCSE a b = + match (a, b) with + | (Num a, Num b) -> Number.equal a b + | (NaN a, NaN b) -> a == b + | (Load a, Load b) -> Variable.same a b + | (Times (a, a'), Times (b, b')) -> + ((a == b) && (a' == b')) or + ((a == b') && (a' == b)) + | (CTimes (a, a'), CTimes (b, b')) -> + ((a == b) && (a' == b')) or + ((a == b') && (a' == b)) + | (CTimesJ (a, a'), CTimesJ (b, b')) -> ((a == b) && (a' == b')) + | (Plus a, Plus b) -> subset a b && subset b a + | (Uminus a, Uminus b) -> (a == b) + | _ -> false + + let hashCSE x = + if (!Magic.randomized_cse) then + Oracle.hash x + else + Expr.hash x + + let equalCSE a b = + if (!Magic.randomized_cse) then + (structurallyEqualCSE a b || Oracle.likely_equal a b) + else + structurallyEqualCSE a b + + let fetchCSE = + fetchState >>= fun (_, c) -> returnM c + let storeCSE c = + fetchState >>= (fun (s, _) -> storeState (s, c)) + let lookupCSEM key = + fetchCSE >>= fun table -> + returnM (Assoctable.lookup hashCSE equalCSE key table) + let insertCSEM key value = + fetchCSE >>= fun table -> + storeCSE (Assoctable.insert hashCSE key value table) + + (* memoize both x and Uminus x (unless x is already negated) *) + let identityM x = + let memo x = memoizing lookupCSEM insertCSEM returnM x in + match x with + Uminus _ -> memo x + | _ -> memo x >>= fun x' -> memo (Uminus x') >> returnM x' + + let makeNode = identityM + + (* simplifiers for various kinds of nodes *) + let rec snumM = function + n when Number.is_zero n -> + makeNode (Num (Number.zero)) + | n when Number.negative n -> + makeNode (Num (Number.negate n)) >>= suminusM + | n -> makeNode (Num n) + + and suminusM = function + Uminus x -> makeNode x + | Num a when (Number.is_zero a) -> snumM Number.zero + | a -> makeNode (Uminus a) + + and stimesM = function + | (Uminus a, b) -> stimesM (a, b) >>= suminusM + | (a, Uminus b) -> stimesM (a, b) >>= suminusM + | (NaN I, CTimes (a, b)) -> stimesM (NaN I, b) >>= + fun ib -> sctimesM (a, ib) + | (NaN I, CTimesJ (a, b)) -> stimesM (NaN I, b) >>= + fun ib -> sctimesjM (a, ib) + | (Num a, Num b) -> snumM (Number.mul a b) + | (Num a, Times (Num b, c)) -> + snumM (Number.mul a b) >>= fun x -> stimesM (x, c) + | (Num a, b) when Number.is_zero a -> snumM Number.zero + | (Num a, b) when Number.is_one a -> makeNode b + | (Num a, b) when Number.is_mone a -> suminusM b + | (a, b) when is_known_constant b && not (is_known_constant a) -> + stimesM (b, a) + | (a, b) -> makeNode (Times (a, b)) + + and sctimesM = function + | (Uminus a, b) -> sctimesM (a, b) >>= suminusM + | (a, Uminus b) -> sctimesM (a, b) >>= suminusM + | (a, b) -> makeNode (CTimes (a, b)) + + and sctimesjM = function + | (Uminus a, b) -> sctimesjM (a, b) >>= suminusM + | (a, Uminus b) -> sctimesjM (a, b) >>= suminusM + | (a, b) -> makeNode (CTimesJ (a, b)) + + and reduce_sumM x = match x with + [] -> returnM [] + | [Num a] -> + if (Number.is_zero a) then + returnM [] + else returnM x + | [Uminus (Num a)] -> + if (Number.is_zero a) then + returnM [] + else returnM x + | (Num a) :: (Num b) :: s -> + snumM (Number.add a b) >>= fun x -> + reduce_sumM (x :: s) + | (Num a) :: (Uminus (Num b)) :: s -> + snumM (Number.sub a b) >>= fun x -> + reduce_sumM (x :: s) + | (Uminus (Num a)) :: (Num b) :: s -> + snumM (Number.sub b a) >>= fun x -> + reduce_sumM (x :: s) + | (Uminus (Num a)) :: (Uminus (Num b)) :: s -> + snumM (Number.add a b) >>= + suminusM >>= fun x -> + reduce_sumM (x :: s) + | ((Num _) as a) :: b :: s -> reduce_sumM (b :: a :: s) + | ((Uminus (Num _)) as a) :: b :: s -> reduce_sumM (b :: a :: s) + | a :: s -> + reduce_sumM s >>= fun s' -> returnM (a :: s') + + and collectible1 = function + | NaN _ -> false + | Uminus x -> collectible1 x + | _ -> true + and collectible (a, b) = collectible1 a + + (* collect common factors: ax + bx -> (a+b)x *) + and collectM which x = + let rec findCoeffM which = function + | Times (a, b) when collectible (which (a, b)) -> returnM (which (a, b)) + | Uminus x -> + findCoeffM which x >>= fun (coeff, b) -> + suminusM coeff >>= fun mcoeff -> + returnM (mcoeff, b) + | x -> snumM Number.one >>= fun one -> returnM (one, x) + and separateM xpr = function + [] -> returnM ([], []) + | a :: b -> + separateM xpr b >>= fun (w, wo) -> + (* try first factor *) + findCoeffM (fun (a, b) -> (a, b)) a >>= fun (c, x) -> + if (xpr == x) && collectible (c, x) then returnM (c :: w, wo) + else + (* try second factor *) + findCoeffM (fun (a, b) -> (b, a)) a >>= fun (c, x) -> + if (xpr == x) && collectible (c, x) then returnM (c :: w, wo) + else returnM (w, a :: wo) + in match x with + [] -> returnM x + | [a] -> returnM x + | a :: b -> + findCoeffM which a >>= fun (_, xpr) -> + separateM xpr x >>= fun (w, wo) -> + collectM which wo >>= fun wo' -> + splusM w >>= fun w' -> + stimesM (w', xpr) >>= fun t' -> + returnM (t':: wo') + + and mangleSumM x = returnM x + >>= reduce_sumM + >>= collectM (fun (a, b) -> (a, b)) + >>= collectM (fun (a, b) -> (b, a)) + >>= reduce_sumM + >>= deepCollectM !Magic.deep_collect_depth + >>= reduce_sumM + + and reorder_uminus = function (* push all Uminuses to the end *) + [] -> [] + | ((Uminus _) as a' :: b) -> (reorder_uminus b) @ [a'] + | (a :: b) -> a :: (reorder_uminus b) + + and canonicalizeM = function + [] -> snumM Number.zero + | [a] -> makeNode a (* one term *) + | a -> generateFusedMultAddM (reorder_uminus a) + + and generateFusedMultAddM = + let rec is_multiplication = function + | Times (Num a, b) -> true + | Uminus (Times (Num a, b)) -> true + | _ -> false + and separate = function + [] -> ([], [], Number.zero) + | (Times (Num a, b)) as this :: c -> + let (x, y, max) = separate c in + let newmax = if (Number.greater a max) then a else max in + (this :: x, y, newmax) + | (Uminus (Times (Num a, b))) as this :: c -> + let (x, y, max) = separate c in + let newmax = if (Number.greater a max) then a else max in + (this :: x, y, newmax) + | this :: c -> + let (x, y, max) = separate c in + (x, this :: y, max) + in fun l -> + if !Magic.enable_fma && count is_multiplication l >= 2 then + let (w, wo, max) = separate l in + snumM (Number.div Number.one max) >>= fun invmax' -> + snumM max >>= fun max' -> + mapM (fun x -> stimesM (invmax', x)) w >>= splusM >>= fun pw' -> + stimesM (max', pw') >>= fun mw' -> + splusM (wo @ [mw']) + else + makeNode (Plus l) + + + and negative = function + Uminus _ -> true + | _ -> false + + (* + * simplify patterns of the form + * + * ((c_1 * a + ...) + ...) + (c_2 * a + ...) + * + * The pattern includes arbitrary coefficients and minus signs. + * A common case of this pattern is the butterfly + * (a + b) + (a - b) + * (a + b) - (a - b) + *) + (* this whole procedure needs much more thought *) + and deepCollectM maxdepth l = + let rec findTerms depth x = match x with + | Uminus x -> findTerms depth x + | Times (Num _, b) -> (findTerms (depth - 1) b) + | Plus l when depth > 0 -> + x :: List.flatten (List.map (findTerms (depth - 1)) l) + | x -> [x] + and duplicates = function + [] -> [] + | a :: b -> if List.memq a b then a :: duplicates b + else duplicates b + + in let rec splitDuplicates depth d x = + if (List.memq x d) then + snumM (Number.zero) >>= fun zero -> + returnM (zero, x) + else match x with + | Times (a, b) -> + splitDuplicates (depth - 1) d a >>= fun (a', xa) -> + splitDuplicates (depth - 1) d b >>= fun (b', xb) -> + stimesM (a', b') >>= fun ab -> + stimesM (a, xb) >>= fun xb' -> + stimesM (xa, b) >>= fun xa' -> + stimesM (xa, xb) >>= fun xab -> + splusM [xa'; xb'; xab] >>= fun x -> + returnM (ab, x) + | Uminus a -> + splitDuplicates depth d a >>= fun (x, y) -> + suminusM x >>= fun ux -> + suminusM y >>= fun uy -> + returnM (ux, uy) + | Plus l when depth > 0 -> + mapM (splitDuplicates (depth - 1) d) l >>= fun ld -> + let (l', d') = List.split ld in + splusM l' >>= fun p -> + splusM d' >>= fun d'' -> + returnM (p, d'') + | x -> + snumM (Number.zero) >>= fun zero' -> + returnM (x, zero') + + in let l' = List.flatten (List.map (findTerms maxdepth) l) + in match duplicates l' with + | [] -> returnM l + | d -> + mapM (splitDuplicates maxdepth d) l >>= fun ld -> + let (l', d') = List.split ld in + splusM l' >>= fun l'' -> + let rec flattenPlusM = function + | Plus l -> returnM l + | Uminus x -> + flattenPlusM x >>= mapM suminusM + | x -> returnM [x] + in + mapM flattenPlusM d' >>= fun d'' -> + splusM (List.flatten d'') >>= fun d''' -> + mangleSumM [l''; d'''] + + and splusM l = + let fma_heuristics x = + if !Magic.enable_fma then + match x with + | [Uminus (Times _); Times _] -> Some false + | [Times _; Uminus (Times _)] -> Some false + | [Uminus (_); Times _] -> Some true + | [Times _; Uminus (Plus _)] -> Some true + | [_; Uminus (Times _)] -> Some false + | [Uminus (Times _); _] -> Some false + | _ -> None + else + None + in + mangleSumM l >>= fun l' -> + (* no terms are negative. Don't do anything *) + if not (List.exists negative l') then + canonicalizeM l' + (* all terms are negative. Negate them all and collect the minus sign *) + else if List.for_all negative l' then + mapM suminusM l' >>= splusM >>= suminusM + else match fma_heuristics l' with + | Some true -> mapM suminusM l' >>= splusM >>= suminusM + | Some false -> canonicalizeM l' + | None -> + (* Ask the Oracle for the canonical form *) + if (not !Magic.randomized_cse) && + Oracle.should_flip_sign (Plus l') then + mapM suminusM l' >>= splusM >>= suminusM + else + canonicalizeM l' + + (* monadic style algebraic simplifier for the dag *) + let rec algsimpM x = + memoizing lookupSimpM insertSimpM + (function + | Num a -> snumM a + | NaN _ as x -> makeNode x + | Plus a -> + mapM algsimpM a >>= splusM + | Times (a, b) -> + (algsimpM a >>= fun a' -> + algsimpM b >>= fun b' -> + stimesM (a', b')) + | CTimes (a, b) -> + (algsimpM a >>= fun a' -> + algsimpM b >>= fun b' -> + sctimesM (a', b')) + | CTimesJ (a, b) -> + (algsimpM a >>= fun a' -> + algsimpM b >>= fun b' -> + sctimesjM (a', b')) + | Uminus a -> + algsimpM a >>= suminusM + | Store (v, a) -> + algsimpM a >>= fun a' -> + makeNode (Store (v, a')) + | Load _ as x -> makeNode x) + x + + let initialTable = (empty, empty) + let simp_roots = mapM algsimpM + let algsimp = runM initialTable simp_roots +end + +(************************************************************* + * Network transposition algorithm + *************************************************************) +module Transpose = struct + open Monads.StateMonad + open Monads.MemoMonad + open Littlesimp + + let fetchDuals = fetchState + let storeDuals = storeState + + let lookupDualsM key = + fetchDuals >>= fun table -> + returnM (node_lookup key table) + + let insertDualsM key value = + fetchDuals >>= fun table -> + storeDuals (node_insert key value table) + + let rec visit visited vtable parent_table = function + [] -> (visited, parent_table) + | node :: rest -> + match node_lookup node vtable with + | Some _ -> visit visited vtable parent_table rest + | None -> + let children = match node with + | Store (v, n) -> [n] + | Plus l -> l + | Times (a, b) -> [a; b] + | CTimes (a, b) -> [a; b] + | CTimesJ (a, b) -> [a; b] + | Uminus x -> [x] + | _ -> [] + in let rec loop t = function + [] -> t + | a :: rest -> + (match node_lookup a t with + None -> loop (node_insert a [node] t) rest + | Some c -> loop (node_insert a (node :: c) t) rest) + in + (visit + (node :: visited) + (node_insert node () vtable) + (loop parent_table children) + (children @ rest)) + + let make_transposer parent_table = + let rec termM node candidate_parent = + match candidate_parent with + | Store (_, n) when n == node -> + dualM candidate_parent >>= fun x' -> returnM [x'] + | Plus (l) when List.memq node l -> + dualM candidate_parent >>= fun x' -> returnM [x'] + | Times (a, b) when b == node -> + dualM candidate_parent >>= fun x' -> + returnM [makeTimes (a, x')] + | CTimes (a, b) when b == node -> + dualM candidate_parent >>= fun x' -> + returnM [CTimes (a, x')] + | CTimesJ (a, b) when b == node -> + dualM candidate_parent >>= fun x' -> + returnM [CTimesJ (a, x')] + | Uminus n when n == node -> + dualM candidate_parent >>= fun x' -> + returnM [makeUminus x'] + | _ -> returnM [] + + and dualExpressionM this_node = + mapM (termM this_node) + (match node_lookup this_node parent_table with + | Some a -> a + | None -> failwith "bug in dualExpressionM" + ) >>= fun l -> + returnM (makePlus (List.flatten l)) + + and dualM this_node = + memoizing lookupDualsM insertDualsM + (function + | Load v as x -> + if (Variable.is_constant v) then + returnM (Load v) + else + (dualExpressionM x >>= fun d -> + returnM (Store (v, d))) + | Store (v, x) -> returnM (Load v) + | x -> dualExpressionM x) + this_node + + in dualM + + let is_store = function + | Store _ -> true + | _ -> false + + let transpose dag = + let _ = Util.info "begin transpose" in + let (all_nodes, parent_table) = + visit [] Assoctable.empty Assoctable.empty dag in + let transposerM = make_transposer parent_table in + let mapTransposerM = mapM transposerM in + let duals = runM Assoctable.empty mapTransposerM all_nodes in + let roots = List.filter is_store duals in + let _ = Util.info "end transpose" in + roots +end + + +(************************************************************* + * Various dag statistics + *************************************************************) +module Stats : sig + type complexity + val complexity : Expr.expr list -> complexity + val same_complexity : complexity -> complexity -> bool + val leq_complexity : complexity -> complexity -> bool + val to_string : complexity -> string +end = struct + type complexity = int * int * int * int * int * int + let rec visit visited vtable = function + [] -> visited + | node :: rest -> + match node_lookup node vtable with + Some _ -> visit visited vtable rest + | None -> + let children = match node with + Store (v, n) -> [n] + | Plus l -> l + | Times (a, b) -> [a; b] + | Uminus x -> [x] + | _ -> [] + in visit (node :: visited) + (node_insert node () vtable) + (children @ rest) + + let complexity dag = + let rec loop (load, store, plus, times, uminus, num) = function + [] -> (load, store, plus, times, uminus, num) + | node :: rest -> + loop + (match node with + | Load _ -> (load + 1, store, plus, times, uminus, num) + | Store _ -> (load, store + 1, plus, times, uminus, num) + | Plus x -> (load, store, plus + (List.length x - 1), times, uminus, num) + | Times _ -> (load, store, plus, times + 1, uminus, num) + | Uminus _ -> (load, store, plus, times, uminus + 1, num) + | Num _ -> (load, store, plus, times, uminus, num + 1) + | CTimes _ -> (load, store, plus, times, uminus, num) + | CTimesJ _ -> (load, store, plus, times, uminus, num) + | NaN _ -> (load, store, plus, times, uminus, num)) + rest + in let (l, s, p, t, u, n) = + loop (0, 0, 0, 0, 0, 0) (visit [] Assoctable.empty dag) + in (l, s, p, t, u, n) + + let weight (l, s, p, t, u, n) = + l + s + 10 * p + 20 * t + u + n + + let same_complexity a b = weight a = weight b + let leq_complexity a b = weight a <= weight b + + let to_string (l, s, p, t, u, n) = + Printf.sprintf "ld=%d st=%d add=%d mul=%d uminus=%d num=%d\n" + l s p t u n + +end + +(* simplify the dag *) +let algsimp v = + let rec simplification_loop v = + let () = Util.info "simplification step" in + let complexity = Stats.complexity v in + let () = Util.info ("complexity = " ^ (Stats.to_string complexity)) in + let v = (AlgSimp.algsimp @@ Transpose.transpose @@ + AlgSimp.algsimp @@ Transpose.transpose) v in + let complexity' = Stats.complexity v in + let () = Util.info ("complexity = " ^ (Stats.to_string complexity')) in + if (Stats.leq_complexity complexity' complexity) then + let () = Util.info "end algsimp" in + v + else + simplification_loop v + + in + let () = Util.info "begin algsimp" in + let v = AlgSimp.algsimp v in + if !Magic.network_transposition then simplification_loop v else v + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/algsimp.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/algsimp.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,22 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +val algsimp : Expr.expr list -> Expr.expr list diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/annotate.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/annotate.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,361 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(* Here, we take a schedule (produced by schedule.ml) ordering a + sequence of instructions, and produce an annotated schedule. The + annotated schedule has the same ordering as the original schedule, + but is additionally partitioned into nested blocks of temporary + variables. The partitioning is computed via a heuristic algorithm. + + The blocking allows the C code that we generate to consist of + nested blocks that help communicate variable lifetimes to the + compiler. *) + +open Schedule +open Expr +open Variable + +type annotated_schedule = + Annotate of variable list * variable list * variable list * int * aschedule +and aschedule = + ADone + | AInstr of assignment + | ASeq of (annotated_schedule * annotated_schedule) + +let addelem a set = if not (List.memq a set) then a :: set else set +let union l = + let f x = addelem x (* let is source of polymorphism *) + in List.fold_right f l + +(* set difference a - b *) +let diff a b = List.filter (fun x -> not (List.memq x b)) a + +let rec minimize f = function + [] -> failwith "minimize" + | [n] -> n + | n :: rest -> + let x = minimize f rest in + if (f x) >= (f n) then n else x + +(* find all variables used inside a scheduling unit *) +let rec find_block_vars = function + Done -> [] + | (Instr (Assign (v, x))) -> v :: (find_vars x) + | Par a -> List.flatten (List.map find_block_vars a) + | Seq (a, b) -> (find_block_vars a) @ (find_block_vars b) + +let uniq l = + List.fold_right (fun a b -> if List.memq a b then b else a :: b) l [] + +let has_related x = List.exists (Variable.same_class x) + +let rec overlap a b = Util.count (fun y -> has_related y b) a + +(* reorder a list of schedules so as to maximize overlap of variables *) +let reorder l = + let rec loop = function + [] -> [] + | (a, va) :: b -> + let c = + List.map + (fun (a, x) -> ((a, x), (overlap va x, List.length x))) b in + let c' = + Sort.list + (fun (_, (a, la)) (_, (b, lb)) -> + la < lb or a > b) + c in + let b' = List.map (fun (a, _) -> a) c' in + a :: (loop b') in + let l' = List.map (fun x -> x, uniq (find_block_vars x)) l in + (* start with smallest block --- does this matter ? *) + match l' with + [] -> [] + | _ -> + let m = minimize (fun (_, x) -> (List.length x)) l' in + let l'' = Util.remove m l' in + loop (m :: l'') + +(* remove Par blocks *) +let rec linearize = function + | Seq (a, Done) -> linearize a + | Seq (Done, a) -> linearize a + | Seq (a, b) -> Seq (linearize a, linearize b) + + (* try to balance nested Par blocks *) + | Par [a] -> linearize a + | Par l -> + let n2 = (List.length l) / 2 in + let rec loop n a b = + if n = 0 then + (List.rev b, a) + else + match a with + [] -> failwith "loop" + | x :: y -> loop (n - 1) y (x :: b) + in let (a, b) = loop n2 (reorder l) [] + in linearize (Seq (Par a, Par b)) + + | x -> x + +let subset a b = + List.for_all (fun x -> List.exists (fun y -> x == y) b) a + +let use_same_vars (Assign (av, ax)) (Assign (bv, bx)) = + is_temporary av && + is_temporary bv && + (let va = Expr.find_vars ax and vb = Expr.find_vars bx in + subset va vb && subset vb va) + +let store_to_same_class (Assign (av, ax)) (Assign (bv, bx)) = + is_locative av && + is_locative bv && + Variable.same_class av bv + +let loads_from_same_class (Assign (av, ax)) (Assign (bv, bx)) = + match (ax, bx) with + | (Load a), (Load b) when + Variable.is_locative a && Variable.is_locative b + -> Variable.same_class a b + | _ -> false + +(* extract instructions from schedule *) +let rec sched_to_ilist = function + | Done -> [] + | Instr a -> [a] + | Seq (a, b) -> (sched_to_ilist a) @ (sched_to_ilist b) + | _ -> failwith "sched_to_ilist" (* Par blocks removed by linearize *) + +let rec find_friends friendp insn friends foes = function + | [] -> (friends, foes) + | a :: b -> + if (a == insn) || (friendp a insn) then + find_friends friendp insn (a :: friends) foes b + else + find_friends friendp insn friends (a :: foes) b + +(* schedule all instructions in the equivalence class determined + by friendp at the point where the last one + is executed *) +let rec delay_friends friendp sched = + let rec recur insns = function + | Done -> (Done, insns) + | Instr a -> + let (friends, foes) = find_friends friendp a [] [] insns in + (Schedule.sequentially friends), foes + | Seq (a, b) -> + let (b', insnsb) = recur insns b in + let (a', insnsa) = recur insnsb a in + (Seq (a', b')), insnsa + | _ -> failwith "delay_friends" + in match recur (sched_to_ilist sched) sched with + | (s, []) -> s (* assert that all insns have been used *) + | _ -> failwith "delay_friends" + +(* schedule all instructions in the equivalence class determined + by friendp at the point where the first one + is executed *) +let rec anticipate_friends friendp sched = + let rec recur insns = function + | Done -> (Done, insns) + | Instr a -> + let (friends, foes) = find_friends friendp a [] [] insns in + (Schedule.sequentially friends), foes + | Seq (a, b) -> + let (a', insnsa) = recur insns a in + let (b', insnsb) = recur insnsa b in + (Seq (a', b')), insnsb + | _ -> failwith "anticipate_friends" + in match recur (sched_to_ilist sched) sched with + | (s, []) -> s (* assert that all insns have been used *) + | _ -> failwith "anticipate_friends" + +let collect_buddy_stores buddy_list sched = + let rec recur sched delayed_stores = match sched with + | Done -> (sched, delayed_stores) + | Instr (Assign (v, x)) -> + begin + try + let buddies = List.find (List.memq v) buddy_list in + let tmp = Variable.make_temporary () in + let i = Seq(Instr (Assign (tmp, x)), + Instr (Assign (v, Times (NaN MULTI_A, Load tmp)))) + and delayed_stores = (v, Load tmp) :: delayed_stores in + try + (Seq (i, + Instr (Assign + (List.hd buddies, + Times (NaN MULTI_B, + Plus (List.map + (fun buddy -> + List.assq buddy + delayed_stores) + buddies))) ))) + , delayed_stores + with Not_found -> (i, delayed_stores) + with Not_found -> (sched, delayed_stores) + end + | Seq (a, b) -> + let (newa, delayed_stores) = recur a delayed_stores in + let (newb, delayed_stores) = recur b delayed_stores in + (Seq (newa, newb), delayed_stores) + | _ -> failwith "collect_buddy_stores" + in let (sched, _) = recur sched [] in + sched + +let schedule_for_pipeline sched = + let update_readytimes t (Assign (v, _)) ready_times = + (v, (t + !Magic.pipeline_latency)) :: ready_times + and readyp t ready_times (Assign (_, x)) = + List.for_all + (fun var -> + try + (List.assq var ready_times) <= t + with Not_found -> false) + (List.filter Variable.is_temporary (Expr.find_vars x)) + in + let rec recur sched t ready_times delayed_instructions = + let (ready, not_ready) = + List.partition (readyp t ready_times) delayed_instructions + in match ready with + | a :: b -> + let (sched, t, ready_times, delayed_instructions) = + recur sched (t+1) (update_readytimes t a ready_times) + (b @ not_ready) + in + (Seq (Instr a, sched)), t, ready_times, delayed_instructions + | _ -> (match sched with + | Done -> (sched, t, ready_times, delayed_instructions) + | Instr a -> + if (readyp t ready_times a) then + (sched, (t+1), (update_readytimes t a ready_times), + delayed_instructions) + else + (Done, t, ready_times, (a :: delayed_instructions)) + | Seq (a, b) -> + let (a, t, ready_times, delayed_instructions) = + recur a t ready_times delayed_instructions + in + let (b, t, ready_times, delayed_instructions) = + recur b t ready_times delayed_instructions + in (Seq (a, b)), t, ready_times, delayed_instructions + | _ -> failwith "schedule_for_pipeline") + in let rec recur_until_done sched t ready_times delayed_instructions = + let (sched, t, ready_times, delayed_instructions) = + recur sched t ready_times delayed_instructions + in match delayed_instructions with + | [] -> sched + | _ -> + (Seq (sched, + (recur_until_done Done (t+1) ready_times + delayed_instructions))) + in recur_until_done sched 0 [] [] + +let rec rewrite_declarations force_declarations + (Annotate (_, _, declared, _, what)) = + let m = !Magic.number_of_variables in + + let declare_it declared = + if (force_declarations or List.length declared >= m) then + ([], declared) + else + (declared, []) + + in match what with + ADone -> Annotate ([], [], [], 0, what) + | AInstr i -> + let (u, d) = declare_it declared + in Annotate ([], u, d, 0, what) + | ASeq (a, b) -> + let ma = rewrite_declarations false a + and mb = rewrite_declarations false b + in let Annotate (_, ua, _, _, _) = ma + and Annotate (_, ub, _, _, _) = mb + in let (u, d) = declare_it (declared @ ua @ ub) + in Annotate ([], u, d, 0, ASeq (ma, mb)) + +let annotate list_of_buddy_stores schedule = + let rec analyze live_at_end = function + Done -> Annotate (live_at_end, [], [], 0, ADone) + | Instr i -> (match i with + Assign (v, x) -> + let vars = (find_vars x) in + Annotate (Util.remove v (union live_at_end vars), [v], [], + 0, AInstr i)) + | Seq (a, b) -> + let ab = analyze live_at_end b in + let Annotate (live_at_begin_b, defined_b, _, depth_a, _) = ab in + let aa = analyze live_at_begin_b a in + let Annotate (live_at_begin_a, defined_a, _, depth_b, _) = aa in + let defined = List.filter is_temporary (defined_a @ defined_b) in + let declarable = diff defined live_at_end in + let undeclarable = diff defined declarable + and maxdepth = max depth_a depth_b in + Annotate (live_at_begin_a, undeclarable, declarable, + List.length declarable + maxdepth, + ASeq (aa, ab)) + | _ -> failwith "really_analyze" + + in + let () = Util.info "begin annotate" in + let x = linearize schedule in + + let x = + if (!Magic.schedule_for_pipeline && !Magic.pipeline_latency > 0) then + schedule_for_pipeline x + else + x + in + + let x = + if !Magic.reorder_insns then + linearize(anticipate_friends use_same_vars x) + else + x + in + + (* delay stores to the real and imaginary parts of the same number *) + let x = + if !Magic.reorder_stores then + linearize(delay_friends store_to_same_class x) + else + x + in + + (* move loads of the real and imaginary parts of the same number *) + let x = + if !Magic.reorder_loads then + linearize(anticipate_friends loads_from_same_class x) + else + x + in + + let x = collect_buddy_stores list_of_buddy_stores x in + let x = analyze [] x in + let res = rewrite_declarations true x in + let () = Util.info "end annotate" in + res + +let rec dump print (Annotate (_, _, _, _, code)) = + dump_code print code +and dump_code print = function + | ADone -> () + | AInstr x -> print ((assignment_to_string x) ^ "\n") + | ASeq (a, b) -> dump print a; dump print b diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/annotate.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/annotate.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,36 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +open Variable +open Expr + +type annotated_schedule = + Annotate of variable list * variable list * variable list * + int * aschedule +and aschedule = + ADone + | AInstr of assignment + | ASeq of (annotated_schedule * annotated_schedule) + +val annotate : + variable list list -> Schedule.schedule -> annotated_schedule + +val dump : (string -> unit) -> annotated_schedule -> unit diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/assoctable.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/assoctable.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,65 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(************************************************************* + * Functional associative table + *************************************************************) + +(* + * this module implements a functional associative table. + * The table is parametrized by an equality predicate and + * a hash function, with the restriction that (equal a b) ==> + * hash a == hash b. + * The table is purely functional and implemented using a binary + * search tree (not balanced for now) + *) + +type ('a, 'b) elem = + Leaf + | Node of int * ('a, 'b) elem * ('a, 'b) elem * ('a * 'b) list + +let empty = Leaf + +let lookup hash equal key table = + let h = hash key in + let rec look = function + Leaf -> None + | Node (hash_key, left, right, this_list) -> + if (hash_key < h) then look left + else if (hash_key > h) then look right + else let rec loop = function + [] -> None + | (a, b) :: rest -> if (equal key a) then Some b else loop rest + in loop this_list + in look table + +let insert hash key value table = + let h = hash key in + let rec ins = function + Leaf -> Node (h, Leaf, Leaf, [(key, value)]) + | Node (hash_key, left, right, this_list) -> + if (hash_key < h) then + Node (hash_key, ins left, right, this_list) + else if (hash_key > h) then + Node (hash_key, left, ins right, this_list) + else + Node (hash_key, left, right, (key, value) :: this_list) + in ins table diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/assoctable.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/assoctable.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,29 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +type ('a, 'b) elem = + | Leaf + | Node of int * ('a, 'b) elem * ('a, 'b) elem * ('a * 'b) list +val empty : ('a, 'b) elem +val lookup : + ('a -> int) -> ('a -> 'b -> bool) -> 'a -> ('b, 'c) elem -> 'c option +val insert : + ('a -> int) -> 'a -> 'c -> ('a, 'c) elem -> ('a, 'c) elem diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/c.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/c.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,461 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(* + * This module contains the definition of a C-like abstract + * syntax tree, and functions to convert ML values into C + * programs + *) + +open Expr +open Annotate +open List + +let realtype = "R" +let realtypep = realtype ^ " *" +let extended_realtype = "E" +let constrealtype = "const " ^ realtype +let constrealtypep = constrealtype ^ " *" + +let stridetype = "stride" + +(*********************************** + * C program structure + ***********************************) +type c_decl = + | Decl of string * string + | Tdecl of string (* arbitrary text declaration *) + +and c_ast = + | Asch of annotated_schedule + | Simd_leavefun + | Return of c_ast + | For of c_ast * c_ast * c_ast * c_ast + | If of c_ast * c_ast + | Block of (c_decl list) * (c_ast list) + | Binop of string * c_ast * c_ast + | Expr_assign of c_ast * c_ast + | Stmt_assign of c_ast * c_ast + | Comma of c_ast * c_ast + | Integer of int + | CVar of string + | CCall of string * c_ast + | CPlus of c_ast list + | ITimes of c_ast * c_ast + | CUminus of c_ast +and c_fcn = Fcn of string * string * (c_decl list) * c_ast + + +let ctimes = function + | (Integer 1), a -> a + | a, (Integer 1) -> a + | a, b -> ITimes (a, b) + +(* + * C AST unparser + *) +let foldr_string_concat l = fold_right (^) l "" + +let rec unparse_expr_c = + let yes x = x and no x = "" in + + let rec unparse_plus maybe = + let maybep = maybe " + " in + function + | [] -> "" + | (Uminus (Times (a, b))) :: (Uminus c) :: d -> + maybep ^ (op "FNMA" a b c) ^ (unparse_plus yes d) + | (Uminus c) :: (Uminus (Times (a, b))) :: d -> + maybep ^ (op "FNMA" a b c) ^ (unparse_plus yes d) + | (Uminus (Times (a, b))) :: c :: d -> + maybep ^ (op "FNMS" a b c) ^ (unparse_plus yes d) + | c :: (Uminus (Times (a, b))) :: d -> + maybep ^ (op "FNMS" a b c) ^ (unparse_plus yes d) + | (Times (a, b)) :: (Uminus c) :: d -> + maybep ^ (op "FMS" a b c) ^ (unparse_plus yes d) + | (Uminus c) :: (Times (a, b)) :: d -> + maybep ^ (op "FMS" a b c) ^ (unparse_plus yes d) + | (Times (a, b)) :: c :: d -> + maybep ^ (op "FMA" a b c) ^ (unparse_plus yes d) + | c :: (Times (a, b)) :: d -> + maybep ^ (op "FMA" a b c) ^ (unparse_plus yes d) + | (Uminus a :: b) -> + " - " ^ (parenthesize a) ^ (unparse_plus yes b) + | (a :: b) -> + maybep ^ (parenthesize a) ^ (unparse_plus yes b) + and parenthesize x = match x with + | (Load _) -> unparse_expr_c x + | (Num _) -> unparse_expr_c x + | _ -> "(" ^ (unparse_expr_c x) ^ ")" + and op nam a b c = + nam ^ "(" ^ (unparse_expr_c a) ^ ", " ^ (unparse_expr_c b) ^ ", " ^ + (unparse_expr_c c) ^ ")" + + in function + | Load v -> Variable.unparse v + | Num n -> Number.to_konst n + | Plus [] -> "0.0 /* bug */" + | Plus [a] -> " /* bug */ " ^ (unparse_expr_c a) + | Plus a -> (unparse_plus no a) + | Times (a, b) -> (parenthesize a) ^ " * " ^ (parenthesize b) + | Uminus (Plus [a; Uminus b]) -> unparse_plus no [b; Uminus a] + | Uminus a -> "- " ^ (parenthesize a) + | _ -> failwith "unparse_expr_c" + +and unparse_expr_generic = + let rec u x = unparse_expr_generic x + and unary op a = Printf.sprintf "%s(%s)" op (u a) + and binary op a b = Printf.sprintf "%s(%s, %s)" op (u a) (u b) + and ternary op a b c = Printf.sprintf "%s(%s, %s, %s)" op (u a) (u b) (u c) + and quaternary op a b c d = + Printf.sprintf "%s(%s, %s, %s, %s)" op (u a) (u b) (u c) (u d) + and unparse_plus = function + | [(Uminus (Times (a, b))); Times (c, d)] -> quaternary "FNMMS" a b c d + | [Times (c, d); (Uminus (Times (a, b)))] -> quaternary "FNMMS" a b c d + | [Times (c, d); (Times (a, b))] -> quaternary "FMMA" a b c d + | [(Uminus (Times (a, b))); c] -> ternary "FNMS" a b c + | [c; (Uminus (Times (a, b)))] -> ternary "FNMS" a b c + | [(Uminus c); (Times (a, b))] -> ternary "FMS" a b c + | [(Times (a, b)); (Uminus c)] -> ternary "FMS" a b c + | [c; (Times (a, b))] -> ternary "FMA" a b c + | [(Times (a, b)); c] -> ternary "FMA" a b c + | [a; Uminus b] -> binary "SUB" a b + | [a; b] -> binary "ADD" a b + | a :: b :: c -> binary "ADD" a (Plus (b :: c)) + | _ -> failwith "unparse_plus" + in function + | Load v -> Variable.unparse v + | Num n -> Number.to_konst n + | Plus a -> unparse_plus a + | Times (a, b) -> binary "MUL" a b + | Uminus a -> unary "NEG" a + | _ -> failwith "unparse_expr" + +and unparse_expr x = + if !Magic.generic_arith then + unparse_expr_generic x + else + unparse_expr_c x + +and unparse_assignment (Assign (v, x)) = + (Variable.unparse v) ^ " = " ^ (unparse_expr x) ^ ";\n" + +and unparse_annotated force_bracket = + let rec unparse_code = function + ADone -> "" + | AInstr i -> unparse_assignment i + | ASeq (a, b) -> + (unparse_annotated false a) ^ (unparse_annotated false b) + and declare_variables l = + let rec uvar = function + [] -> failwith "uvar" + | [v] -> (Variable.unparse v) ^ ";\n" + | a :: b -> (Variable.unparse a) ^ ", " ^ (uvar b) + in let rec vvar l = + let s = if !Magic.compact then 15 else 1 in + if (List.length l <= s) then + match l with + [] -> "" + | _ -> extended_realtype ^ " " ^ (uvar l) + else + (vvar (Util.take s l)) ^ (vvar (Util.drop s l)) + in vvar (List.filter Variable.is_temporary l) + in function + Annotate (_, _, decl, _, code) -> + if (not force_bracket) && (Util.null decl) then + unparse_code code + else "{\n" ^ + (declare_variables decl) ^ + (unparse_code code) ^ + "}\n" + +and unparse_decl = function + | Decl (a, b) -> a ^ " " ^ b ^ ";\n" + | Tdecl x -> x + +and unparse_ast = + let rec unparse_plus = function + | [] -> "" + | (CUminus a :: b) -> " - " ^ (parenthesize a) ^ (unparse_plus b) + | (a :: b) -> " + " ^ (parenthesize a) ^ (unparse_plus b) + and parenthesize x = match x with + | (CVar _) -> unparse_ast x + | (CCall _) -> unparse_ast x + | (Integer _) -> unparse_ast x + | _ -> "(" ^ (unparse_ast x) ^ ")" + + in + function + | Asch a -> (unparse_annotated true a) + | Simd_leavefun -> "" (* used only in SIMD code *) + | Return x -> "return " ^ unparse_ast x ^ ";" + | For (a, b, c, d) -> + "for (" ^ + unparse_ast a ^ "; " ^ unparse_ast b ^ "; " ^ unparse_ast c + ^ ")" ^ unparse_ast d + | If (a, d) -> + "if (" ^ + unparse_ast a + ^ ")" ^ unparse_ast d + | Block (d, s) -> + if (s == []) then "" + else + "{\n" ^ + foldr_string_concat (map unparse_decl d) ^ + foldr_string_concat (map unparse_ast s) ^ + "}\n" + | Binop (op, a, b) -> (unparse_ast a) ^ op ^ (unparse_ast b) + | Expr_assign (a, b) -> (unparse_ast a) ^ " = " ^ (unparse_ast b) + | Stmt_assign (a, b) -> (unparse_ast a) ^ " = " ^ (unparse_ast b) ^ ";\n" + | Comma (a, b) -> (unparse_ast a) ^ ", " ^ (unparse_ast b) + | Integer i -> string_of_int i + | CVar s -> s + | CCall (s, x) -> s ^ "(" ^ (unparse_ast x) ^ ")" + | CPlus [] -> "0 /* bug */" + | CPlus [a] -> " /* bug */ " ^ (unparse_ast a) + | CPlus (a::b) -> (parenthesize a) ^ (unparse_plus b) + | ITimes (a, b) -> (parenthesize a) ^ " * " ^ (parenthesize b) + | CUminus a -> "- " ^ (parenthesize a) + +and unparse_function = function + Fcn (typ, name, args, body) -> + let rec unparse_args = function + [Decl (a, b)] -> a ^ " " ^ b + | (Decl (a, b)) :: s -> a ^ " " ^ b ^ ", " + ^ unparse_args s + | [] -> "" + | _ -> failwith "unparse_function" + in + (typ ^ " " ^ name ^ "(" ^ unparse_args args ^ ")\n" ^ + unparse_ast body) + + +(************************************************************* + * traverse a a function and return a list of all expressions, + * in the execution order + **************************************************************) +let rec fcn_to_expr_list = fun (Fcn (_, _, _, body)) -> ast_to_expr_list body +and acode_to_expr_list = function + AInstr (Assign (_, x)) -> [x] + | ASeq (a, b) -> + (asched_to_expr_list a) @ (asched_to_expr_list b) + | _ -> [] +and asched_to_expr_list (Annotate (_, _, _, _, code)) = + acode_to_expr_list code +and ast_to_expr_list = function + Asch a -> asched_to_expr_list a + | Block (_, a) -> flatten (map ast_to_expr_list a) + | For (_, _, _, body) -> ast_to_expr_list body + | If (_, body) -> ast_to_expr_list body + | _ -> [] + +(*********************** + * Extracting Constants + ***********************) + +(* add a new key & value to a list of (key,value) pairs, where + the keys are floats and each key is unique up to almost_equal *) + +let extract_constants f = + let constlist = flatten (map expr_to_constants (ast_to_expr_list f)) + in map + (fun n -> + Tdecl + ("DK(" ^ (Number.to_konst n) ^ ", " ^ (Number.to_string n) ^ + ");\n")) + (unique_constants constlist) + +(****************************** + Extracting operation counts + ******************************) + +let count_stack_vars = + let rec count_acode = function + | ASeq (a, b) -> max (count_asched a) (count_asched b) + | _ -> 0 + and count_asched (Annotate (_, _, decl, _, code)) = + (length decl) + (count_acode code) + and count_ast = function + | Asch a -> count_asched a + | Block (d, a) -> (length d) + (Util.max_list (map count_ast a)) + | For (_, _, _, body) -> count_ast body + | If (_, body) -> count_ast body + | _ -> 0 + in function (Fcn (_, _, _, body)) -> count_ast body + +let count_memory_acc f = + let rec count_var v = + if (Variable.is_locative v) then 1 else 0 + and count_acode = function + | AInstr (Assign (v, _)) -> count_var v + | ASeq (a, b) -> (count_asched a) + (count_asched b) + | _ -> 0 + and count_asched = function + Annotate (_, _, _, _, code) -> count_acode code + and count_ast = function + | Asch a -> count_asched a + | Block (_, a) -> (Util.sum_list (map count_ast a)) + | Comma (a, b) -> (count_ast a) + (count_ast b) + | For (_, _, _, body) -> count_ast body + | If (_, body) -> count_ast body + | _ -> 0 + and count_acc_expr_func acc = function + | Load v -> acc + (count_var v) + | Plus a -> fold_left count_acc_expr_func acc a + | Times (a, b) -> fold_left count_acc_expr_func acc [a; b] + | Uminus a -> count_acc_expr_func acc a + | _ -> acc + in let (Fcn (typ, name, args, body)) = f + in (count_ast body) + + fold_left count_acc_expr_func 0 (fcn_to_expr_list f) + +let good_for_fma = To_alist.good_for_fma + +let build_fma = function + | [a; Times (b, c)] when good_for_fma (b, c) -> Some (a, b, c) + | [Times (b, c); a] when good_for_fma (b, c) -> Some (a, b, c) + | [a; Uminus (Times (b, c))] when good_for_fma (b, c) -> Some (a, b, c) + | [Uminus (Times (b, c)); a] when good_for_fma (b, c) -> Some (a, b, c) + | _ -> None + +let rec count_flops_expr_func (adds, mults, fmas) = function + | Plus [] -> (adds, mults, fmas) + | Plus ([_; _] as a) -> + begin + match build_fma a with + | None -> + fold_left count_flops_expr_func + (adds + (length a) - 1, mults, fmas) a + | Some (a, b, c) -> + fold_left count_flops_expr_func (adds, mults, fmas+1) [a; b; c] + end + | Plus (a :: b) -> + count_flops_expr_func (adds, mults, fmas) (Plus [a; Plus b]) + | Times (NaN MULTI_A,_) -> (adds, mults, fmas) + | Times (NaN MULTI_B,_) -> (adds, mults, fmas) + | Times (NaN I,b) -> count_flops_expr_func (adds, mults, fmas) b + | Times (NaN CONJ,b) -> count_flops_expr_func (adds, mults, fmas) b + | Times (a,b) -> fold_left count_flops_expr_func (adds, mults+1, fmas) [a; b] + | CTimes (a,b) -> + fold_left count_flops_expr_func (adds+1, mults+2, fmas) [a; b] + | CTimesJ (a,b) -> + fold_left count_flops_expr_func (adds+1, mults+2, fmas) [a; b] + | Uminus a -> count_flops_expr_func (adds, mults, fmas) a + | _ -> (adds, mults, fmas) + +let count_flops f = + fold_left count_flops_expr_func (0, 0, 0) (fcn_to_expr_list f) + +let count_constants f = + length (unique_constants (flatten (map expr_to_constants (fcn_to_expr_list f)))) + +let arith_complexity f = + let (a, m, fmas) = count_flops f + and v = count_stack_vars f + and c = count_constants f + and mem = count_memory_acc f + in (a, m, fmas, v, c, mem) + +(* print the operation costs *) +let print_cost f = + let Fcn (_, _, _, _) = f + and (a, m, fmas, v, c, mem) = arith_complexity f + in + "/*\n"^ + " * This function contains " ^ + (string_of_int (a + fmas)) ^ " FP additions, " ^ + (string_of_int (m + fmas)) ^ " FP multiplications,\n" ^ + " * (or, " ^ + (string_of_int a) ^ " additions, " ^ + (string_of_int m) ^ " multiplications, " ^ + (string_of_int fmas) ^ " fused multiply/add),\n" ^ + " * " ^ (string_of_int v) ^ " stack variables, " ^ + (string_of_int c) ^ " constants, and " ^ + (string_of_int mem) ^ " memory accesses\n" ^ + " */\n" + +(***************************************** + * functions that create C arrays + *****************************************) +type stride = + | SVar of string + | SConst of string + | SInteger of int + | SNeg of stride + +type sstride = + | Simple of int + | Constant of (string * int) + | Composite of (string * int) + | Negative of sstride + +let rec simplify_stride stride i = + match (stride, i) with + (_, 0) -> Simple 0 + | (SInteger n, i) -> Simple (n * i) + | (SConst s, i) -> Constant (s, i) + | (SVar s, i) -> Composite (s, i) + | (SNeg x, i) -> + match (simplify_stride x i) with + | Negative y -> y + | y -> Negative y + +let rec cstride_to_string = function + | Simple i -> string_of_int i + | Constant (s, i) -> + if !Magic.lisp_syntax then + "(* " ^ s ^ " " ^ (string_of_int i) ^ ")" + else + s ^ " * " ^ (string_of_int i) + | Composite (s, i) -> + if !Magic.lisp_syntax then + "(* " ^ s ^ " " ^ (string_of_int i) ^ ")" + else + "WS(" ^ s ^ ", " ^ (string_of_int i) ^ ")" + | Negative x -> "-" ^ cstride_to_string x + +let aref name index = + if !Magic.lisp_syntax then + Printf.sprintf "(aref %s %s)" name index + else + Printf.sprintf "%s[%s]" name index + +let array_subscript name stride k = + aref name (cstride_to_string (simplify_stride stride k)) + +let varray_subscript name vstride stride v i = + let vindex = simplify_stride vstride v + and iindex = simplify_stride stride i + in + let index = + match (vindex, iindex) with + (Simple vi, Simple ii) -> string_of_int (vi + ii) + | (Simple 0, x) -> cstride_to_string x + | (x, Simple 0) -> cstride_to_string x + | _ -> (cstride_to_string vindex) ^ " + " ^ (cstride_to_string iindex) + in aref name index + +let real_of s = "c_re(" ^ s ^ ")" +let imag_of s = "c_im(" ^ s ^ ")" + +let flops_of f = + let (add, mul, fma) = count_flops f in + Printf.sprintf "{ %d, %d, %d, 0 }" add mul fma diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/c.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/c.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,74 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +type stride = + | SVar of string + | SConst of string + | SInteger of int + | SNeg of stride +val array_subscript : string -> stride -> int -> string +val varray_subscript : string -> stride -> stride -> int -> int -> string + +val real_of : string -> string +val imag_of : string -> string + +val realtype : string +val realtypep : string +val constrealtype : string +val constrealtypep : string +val stridetype : string + +type c_decl = + | Decl of string * string + | Tdecl of string (* arbitrary text declaration *) + +and c_ast = + | Asch of Annotate.annotated_schedule + | Simd_leavefun + | Return of c_ast + | For of c_ast * c_ast * c_ast * c_ast + | If of c_ast * c_ast + | Block of (c_decl list) * (c_ast list) + | Binop of string * c_ast * c_ast + | Expr_assign of c_ast * c_ast + | Stmt_assign of c_ast * c_ast + | Comma of c_ast * c_ast + | Integer of int + | CVar of string + | CCall of string * c_ast + | CPlus of c_ast list + | ITimes of c_ast * c_ast + | CUminus of c_ast +and c_fcn = | Fcn of string * string * c_decl list * c_ast + +val unparse_expr : Expr.expr -> string +val unparse_assignment : Expr.assignment -> string +val unparse_annotated : bool -> Annotate.annotated_schedule -> string +val unparse_decl : c_decl -> string +val unparse_ast : c_ast -> string +val unparse_function : c_fcn -> string + +val flops_of : c_fcn -> string +val print_cost : c_fcn -> string + +val ast_to_expr_list : c_ast -> Expr.expr list +val extract_constants : c_ast -> c_decl list +val ctimes : (c_ast * c_ast) -> c_ast diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/complex.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/complex.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,147 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(* abstraction layer for complex operations *) +open Littlesimp +open Expr + +(* type of complex expressions *) +type expr = CE of Expr.expr * Expr.expr + +let two = CE (makeNum Number.two, makeNum Number.zero) +let one = CE (makeNum Number.one, makeNum Number.zero) +let i = CE (makeNum Number.zero, makeNum Number.one) +let zero = CE (makeNum Number.zero, makeNum Number.zero) +let make (r, i) = CE (r, i) + +let uminus (CE (a, b)) = CE (makeUminus a, makeUminus b) + +let inverse_int n = CE (makeNum (Number.div Number.one (Number.of_int n)), + makeNum Number.zero) + +let inverse_int_sqrt n = + CE (makeNum (Number.div Number.one (Number.sqrt (Number.of_int n))), + makeNum Number.zero) +let int_sqrt n = + CE (makeNum (Number.sqrt (Number.of_int n)), + makeNum Number.zero) + +let nan x = CE (NaN x, makeNum Number.zero) + +let half = inverse_int 2 + +let times3x3 (CE (a, b)) (CE (c, d)) = + CE (makePlus [makeTimes (c, makePlus [a; makeUminus (b)]); + makeTimes (b, makePlus [c; makeUminus (d)])], + makePlus [makeTimes (a, makePlus [c; d]); + makeUminus(makeTimes (c, makePlus [a; makeUminus (b)]))]) + +let times (CE (a, b)) (CE (c, d)) = + if not !Magic.threemult then + CE (makePlus [makeTimes (a, c); makeUminus (makeTimes (b, d))], + makePlus [makeTimes (a, d); makeTimes (b, c)]) + else if is_constant c && is_constant d then + times3x3 (CE (a, b)) (CE (c, d)) + else (* hope a and b are constant expressions *) + times3x3 (CE (c, d)) (CE (a, b)) + +let ctimes (CE (a, _)) (CE (c, _)) = + CE (CTimes (a, c), makeNum Number.zero) + +let ctimesj (CE (a, _)) (CE (c, _)) = + CE (CTimesJ (a, c), makeNum Number.zero) + +(* complex exponential (of root of unity); returns exp(2*pi*i/n * m) *) +let exp n i = + let (c, s) = Number.cexp n i + in CE (makeNum c, makeNum s) + +(* various trig functions evaluated at (2*pi*i/n * m) *) +let sec n m = + let (c, s) = Number.cexp n m + in CE (makeNum (Number.div Number.one c), makeNum Number.zero) +let csc n m = + let (c, s) = Number.cexp n m + in CE (makeNum (Number.div Number.one s), makeNum Number.zero) +let tan n m = + let (c, s) = Number.cexp n m + in CE (makeNum (Number.div s c), makeNum Number.zero) +let cot n m = + let (c, s) = Number.cexp n m + in CE (makeNum (Number.div c s), makeNum Number.zero) + +(* complex sum *) +let plus a = + let rec unzip_complex = function + [] -> ([], []) + | ((CE (a, b)) :: s) -> + let (r,i) = unzip_complex s + in + (a::r), (b::i) in + let (c, d) = unzip_complex a in + CE (makePlus c, makePlus d) + +(* extract real/imaginary *) +let real (CE (a, b)) = CE (a, makeNum Number.zero) +let imag (CE (a, b)) = CE (b, makeNum Number.zero) +let iimag (CE (a, b)) = CE (makeNum Number.zero, b) +let conj (CE (a, b)) = CE (a, makeUminus b) + + +(* abstraction of sum_{i=0}^{n-1} *) +let sigma a b f = plus (List.map f (Util.interval a b)) + +(* store and assignment operations *) +let store_real v (CE (a, b)) = Expr.Store (v, a) +let store_imag v (CE (a, b)) = Expr.Store (v, b) +let store (vr, vi) x = (store_real vr x, store_imag vi x) + +let assign_real v (CE (a, b)) = Expr.Assign (v, a) +let assign_imag v (CE (a, b)) = Expr.Assign (v, b) +let assign (vr, vi) x = (assign_real vr x, assign_imag vi x) + + +(************************ + shortcuts + ************************) +let (@*) = times +let (@+) a b = plus [a; b] +let (@-) a b = plus [a; uminus b] + +(* type of complex signals *) +type signal = int -> expr + +(* make a finite signal infinite *) +let infinite n signal i = if ((0 <= i) && (i < n)) then signal i else zero + +let hermitian n a = + Util.array n (fun i -> + if (i = 0) then real (a 0) + else if (i < n - i) then (a i) + else if (i > n - i) then conj (a (n - i)) + else real (a i)) + +let antihermitian n a = + Util.array n (fun i -> + if (i = 0) then iimag (a 0) + else if (i < n - i) then (a i) + else if (i > n - i) then uminus (conj (a (n - i))) + else iimag (a i)) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/complex.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/complex.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,68 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +type expr +val make : (Expr.expr * Expr.expr) -> expr +val two : expr +val one : expr +val i : expr +val zero : expr +val half : expr +val inverse_int : int -> expr +val inverse_int_sqrt : int -> expr +val int_sqrt : int -> expr +val times : expr -> expr -> expr +val ctimes : expr -> expr -> expr +val ctimesj : expr -> expr -> expr +val uminus : expr -> expr +val exp : int -> int -> expr +val sec : int -> int -> expr +val csc : int -> int -> expr +val tan : int -> int -> expr +val cot : int -> int -> expr +val plus : expr list -> expr +val real : expr -> expr +val imag : expr -> expr +val conj : expr -> expr +val nan : Expr.transcendent -> expr +val sigma : int -> int -> (int -> expr) -> expr + +val (@*) : expr -> expr -> expr +val (@+) : expr -> expr -> expr +val (@-) : expr -> expr -> expr + +(* a signal is a map from integers to expressions *) +type signal = int -> expr +val infinite : int -> signal -> signal + +val store_real : Variable.variable -> expr -> Expr.expr +val store_imag : Variable.variable -> expr -> Expr.expr +val store : + Variable.variable * Variable.variable -> expr -> Expr.expr * Expr.expr + +val assign_real : Variable.variable -> expr -> Expr.assignment +val assign_imag : Variable.variable -> expr -> Expr.assignment +val assign : + Variable.variable * Variable.variable -> + expr -> Expr.assignment * Expr.assignment + +val hermitian : int -> (int -> expr) -> int -> expr +val antihermitian : int -> (int -> expr) -> int -> expr diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/conv.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/conv.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,130 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * +*) + +open Complex +open Util + +let polyphase m a ph i = a (m * i + ph) + +let rec divmod n i = + if (i < 0) then + let (a, b) = divmod n (i + n) + in (a - 1, b) + else (i / n, i mod n) + +let unpolyphase m a i = let (x, y) = divmod m i in a y x + +let lift2 f a b i = f (a i) (b i) + +(* convolution of signals A and B *) +let rec conv na a nb b = + let rec naive na a nb b i = + sigma 0 na (fun j -> (a j) @* (b (i - j))) + + and recur na a nb b = + if (na <= 1 || nb <= 1) then + naive na a nb b + else + let p = polyphase 2 in + let ee = conv (na - na / 2) (p a 0) (nb - nb / 2) (p b 0) + and eo = conv (na - na / 2) (p a 0) (nb / 2) (p b 1) + and oe = conv (na / 2) (p a 1) (nb - nb / 2) (p b 0) + and oo = conv (na / 2) (p a 1) (nb / 2) (p b 1) in + unpolyphase 2 (function + 0 -> fun i -> (ee i) @+ (oo (i - 1)) + | 1 -> fun i -> (eo i) @+ (oe i) + | _ -> failwith "recur") + + + (* Karatsuba variant 1: (a+bx)(c+dx) = (ac+bdxx)+((a+b)(c+d)-ac-bd)x *) + and karatsuba1 na a nb b = + let p = polyphase 2 in + let ae = p a 0 and nae = na - na / 2 + and ao = p a 1 and nao = na / 2 + and be = p b 0 and nbe = nb - nb / 2 + and bo = p b 1 and nbo = nb / 2 in + let ae = infinite nae ae and ao = infinite nao ao + and be = infinite nbe be and bo = infinite nbo bo in + let aeo = lift2 (@+) ae ao and naeo = nae + and beo = lift2 (@+) be bo and nbeo = nbe in + let ee = conv nae ae nbe be + and oo = conv nao ao nbo bo + and eoeo = conv naeo aeo nbeo beo in + + let q = function + 0 -> fun i -> (ee i) @+ (oo (i - 1)) + | 1 -> fun i -> (eoeo i) @- ((ee i) @+ (oo i)) + | _ -> failwith "karatsuba1" in + unpolyphase 2 q + + (* Karatsuba variant 2: + (a+bx)(c+dx) = ((a+b)c-b(c-dxx))+x((a+b)c-a(c-d)) *) + and karatsuba2 na a nb b = + let p = polyphase 2 in + let ae = p a 0 and nae = na - na / 2 + and ao = p a 1 and nao = na / 2 + and be = p b 0 and nbe = nb - nb / 2 + and bo = p b 1 and nbo = nb / 2 in + let ae = infinite nae ae and ao = infinite nao ao + and be = infinite nbe be and bo = infinite nbo bo in + + let c1 = conv nae (lift2 (@+) ae ao) nbe be + and c2 = conv nao ao (nbo + 1) (fun i -> be i @- bo (i - 1)) + and c3 = conv nae ae nbe (lift2 (@-) be bo) in + + let q = function + 0 -> lift2 (@-) c1 c2 + | 1 -> lift2 (@-) c1 c3 + | _ -> failwith "karatsuba2" in + unpolyphase 2 q + + and karatsuba na a nb b = + let m = na + nb - 1 in + if (m < !Magic.karatsuba_min) then + recur na a nb b + else + match !Magic.karatsuba_variant with + 1 -> karatsuba1 na a nb b + | 2 -> karatsuba2 na a nb b + | _ -> failwith "unknown karatsuba variant" + + and via_circular na a nb b = + let m = na + nb - 1 in + if (m < !Magic.circular_min) then + karatsuba na a nb b + else + let rec find_min n = if n >= m then n else find_min (2 * n) in + circular (find_min 1) a b + + in + let a = infinite na a and b = infinite nb b in + let res = array (na + nb - 1) (via_circular na a nb b) in + infinite (na + nb - 1) res + +and circular n a b = + let via_dft n a b = + let fa = Fft.dft (-1) n a + and fb = Fft.dft (-1) n b + and scale = inverse_int n in + let fab i = ((fa i) @* (fb i)) @* scale in + Fft.dft 1 n fab + + in via_dft n a b diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/conv.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/conv.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,22 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +val conv : int -> Complex.signal -> int -> Complex.signal -> Complex.signal diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/dag.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/dag.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,109 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +open Util + +(* Here, we have functions to transform a sequence of assignments + (variable = expression) into a DAG (a directed, acyclic graph). + The nodes of the DAG are the assignments, and the edges indicate + dependencies. (The DAG is analyzed in the scheduler to find an + efficient ordering of the assignments.) + + This file also contains utilities to manipulate the DAG in various + ways. *) + +(******************************************** + * Dag structure + ********************************************) +type color = RED | BLUE | BLACK | YELLOW + +type dagnode = + { assigned: Variable.variable; + mutable expression: Expr.expr; + input_variables: Variable.variable list; + mutable successors: dagnode list; + mutable predecessors: dagnode list; + mutable label: int; + mutable color: color} + +type dag = Dag of (dagnode list) + +(* true if node uses v *) +let node_uses v node = + List.exists (Variable.same v) node.input_variables + +(* true if assignment of v clobbers any input of node *) +let node_clobbers node v = + List.exists (Variable.same_location v) node.input_variables + +(* true if nodeb depends on nodea *) +let depends_on nodea nodeb = + node_uses nodea.assigned nodeb or + node_clobbers nodea nodeb.assigned + +(* transform an assignment list into a dag *) +let makedag alist = + let dag = List.map + (fun assignment -> + let (v, x) = assignment in + { assigned = v; + expression = x; + input_variables = Expr.find_vars x; + successors = []; + predecessors = []; + label = 0; + color = BLACK }) + alist + in begin + for_list dag (fun i -> + for_list dag (fun j -> + if depends_on i j then begin + i.successors <- j :: i.successors; + j.predecessors <- i :: j.predecessors; + end)); + Dag dag; + end + +let map f (Dag dag) = Dag (List.map f dag) +let for_all (Dag dag) f = + (* type system loophole *) + let make_unit _ = () in + make_unit (List.map f dag) +let to_list (Dag dag) = dag + +let find_node f (Dag dag) = Util.find_elem f dag + +(* breadth-first search *) +let rec bfs (Dag dag) node init_label = + let _ = node.label <- init_label in + let rec loop = function + [] -> () + | node :: rest -> + let neighbors = node.predecessors @ node.successors in + let m = min_list (List.map (fun node -> node.label) neighbors) in + if (node.label > m + 1) then begin + node.label <- m + 1; + loop (rest @ neighbors); + end else + loop rest + in let neighbors = node.predecessors @ node.successors in + loop neighbors + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/dag.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/dag.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,43 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +open Util + +type color = | RED | BLUE | BLACK | YELLOW + +type dagnode = + { assigned: Variable.variable; + mutable expression: Expr.expr; + input_variables: Variable.variable list; + mutable successors: dagnode list; + mutable predecessors: dagnode list; + mutable label: int; + mutable color: color} + +type dag + +val makedag : (Variable.variable * Expr.expr) list -> dag + +val map : (dagnode -> dagnode) -> dag -> dag +val for_all : dag -> (dagnode -> unit) -> unit +val to_list : dag -> (dagnode list) +val bfs : dag -> dagnode -> int -> unit +val find_node : (dagnode -> bool) -> dag -> dagnode option diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/expr.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/expr.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,152 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(* Here, we define the data type encapsulating a symbolic arithmetic + expression, and provide some routines for manipulating it. *) + +(* I will regret this hack : *) +(* NEWS: I did *) +type transcendent = I | MULTI_A | MULTI_B | CONJ + +type expr = + | Num of Number.number + | NaN of transcendent + | Plus of expr list + | Times of expr * expr + | CTimes of expr * expr + | CTimesJ of expr * expr (* CTimesJ (a, b) = conj(a) * b *) + | Uminus of expr + | Load of Variable.variable + | Store of Variable.variable * expr + +type assignment = Assign of Variable.variable * expr + +(* various hash functions *) +let hash_float x = + let (mantissa, exponent) = frexp x + in truncate (float_of_int(exponent) *. 1234.567 +. mantissa *. 10000.0) + +let sum_list l = List.fold_right (+) l 0 + +let transcendent_to_float = function + | I -> 2.718281828459045235360287471 (* any transcendent number will do *) + | MULTI_A -> 0.6931471805599453094172321214 + | MULTI_B -> -0.3665129205816643270124391582 + | CONJ -> 0.6019072301972345747375400015 + +let rec hash = function + | Num x -> hash_float (Number.to_float x) + | NaN x -> hash_float (transcendent_to_float x) + | Load v -> 1 + 1237 * Variable.hash v + | Store (v, x) -> 2 * Variable.hash v - 2345 * hash x + | Plus l -> 5 + 23451 * sum_list (List.map Hashtbl.hash l) + | Times (a, b) -> 41 + 31415 * (Hashtbl.hash a + Hashtbl.hash b) + | CTimes (a, b) -> 49 + 3245 * (Hashtbl.hash a + Hashtbl.hash b) + | CTimesJ (a, b) -> 31 + 3471 * (Hashtbl.hash a + Hashtbl.hash b) + | Uminus x -> 42 + 12345 * (hash x) + +(* find all variables *) +let rec find_vars x = + match x with + | Load y -> [y] + | Plus l -> List.flatten (List.map find_vars l) + | Times (a, b) -> (find_vars a) @ (find_vars b) + | CTimes (a, b) -> (find_vars a) @ (find_vars b) + | CTimesJ (a, b) -> (find_vars a) @ (find_vars b) + | Uminus a -> find_vars a + | _ -> [] + + +(* TRUE if expression is a constant *) +let is_constant = function + | Num _ -> true + | NaN _ -> true + | Load v -> Variable.is_constant v + | _ -> false + +let is_known_constant = function + | Num _ -> true + | NaN _ -> true + | _ -> false + +(* expr to string, used for debugging *) +let rec foldr_string_concat l = + match l with + [] -> "" + | [a] -> a + | a :: b -> a ^ " " ^ (foldr_string_concat b) + +let string_of_transcendent = function + | I -> "I" + | MULTI_A -> "MULTI_A" + | MULTI_B -> "MULTI_B" + | CONJ -> "CONJ" + +let rec to_string = function + | Load v -> Variable.unparse v + | Num n -> string_of_float (Number.to_float n) + | NaN n -> string_of_transcendent n + | Plus x -> "(+ " ^ (foldr_string_concat (List.map to_string x)) ^ ")" + | Times (a, b) -> "(* " ^ (to_string a) ^ " " ^ (to_string b) ^ ")" + | CTimes (a, b) -> "(c* " ^ (to_string a) ^ " " ^ (to_string b) ^ ")" + | CTimesJ (a, b) -> "(cj* " ^ (to_string a) ^ " " ^ (to_string b) ^ ")" + | Uminus a -> "(- " ^ (to_string a) ^ ")" + | Store (v, a) -> "(:= " ^ (Variable.unparse v) ^ " " ^ + (to_string a) ^ ")" + +let rec to_string_a d x = + if (d = 0) then "..." else match x with + | Load v -> Variable.unparse v + | Num n -> Number.to_konst n + | NaN n -> string_of_transcendent n + | Plus x -> "(+ " ^ (foldr_string_concat (List.map (to_string_a (d - 1)) x)) ^ ")" + | Times (a, b) -> "(* " ^ (to_string_a (d - 1) a) ^ " " ^ (to_string_a (d - 1) b) ^ ")" + | CTimes (a, b) -> "(c* " ^ (to_string_a (d - 1) a) ^ " " ^ (to_string_a (d - 1) b) ^ ")" + | CTimesJ (a, b) -> "(cj* " ^ (to_string_a (d - 1) a) ^ " " ^ (to_string_a (d - 1) b) ^ ")" + | Uminus a -> "(- " ^ (to_string_a (d-1) a) ^ ")" + | Store (v, a) -> "(:= " ^ (Variable.unparse v) ^ " " ^ + (to_string_a (d-1) a) ^ ")" + +let to_string = to_string_a 10 + +let assignment_to_string = function + | Assign (v, a) -> "(:= " ^ (Variable.unparse v) ^ " " ^ (to_string a) ^ ")" + +let dump print = List.iter (fun x -> print ((assignment_to_string x) ^ "\n")) + +(* find all constants in a given expression *) +let rec expr_to_constants = function + | Num n -> [n] + | Plus a -> List.flatten (List.map expr_to_constants a) + | Times (a, b) -> (expr_to_constants a) @ (expr_to_constants b) + | CTimes (a, b) -> (expr_to_constants a) @ (expr_to_constants b) + | CTimesJ (a, b) -> (expr_to_constants a) @ (expr_to_constants b) + | Uminus a -> expr_to_constants a + | _ -> [] + + +let add_float_key_value list_so_far k = + if List.exists (fun k2 -> Number.equal k k2) list_so_far then + list_so_far + else + k :: list_so_far + +let unique_constants = List.fold_left add_float_key_value [] diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/expr.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/expr.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,51 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +type transcendent = I | MULTI_A | MULTI_B | CONJ + +type expr = + | Num of Number.number + | NaN of transcendent + | Plus of expr list + | Times of expr * expr + | CTimes of expr * expr + | CTimesJ of expr * expr + | Uminus of expr + | Load of Variable.variable + | Store of Variable.variable * expr + +type assignment = Assign of Variable.variable * expr + +val hash_float : float -> int +val hash : expr -> int +val to_string : expr -> string +val assignment_to_string : assignment -> string +val transcendent_to_float : transcendent -> float +val string_of_transcendent : transcendent -> string + +val find_vars : expr -> Variable.variable list +val is_constant : expr -> bool +val is_known_constant : expr -> bool + +val dump : (string -> unit) -> assignment list -> unit + +val expr_to_constants : expr -> Number.number list +val unique_constants : Number.number list -> Number.number list diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/fft.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/fft.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,307 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + + +(* This is the part of the generator that actually computes the FFT + in symbolic form *) + +open Complex +open Util + +(* choose a suitable factor of n *) +let choose_factor n = + (* first choice: i such that gcd(i, n / i) = 1, i as big as possible *) + let choose1 n = + let rec loop i f = + if (i * i > n) then f + else if ((n mod i) == 0 && gcd i (n / i) == 1) then loop (i + 1) i + else loop (i + 1) f + in loop 1 1 + + (* second choice: the biggest factor i of n, where i < sqrt(n), if any *) + and choose2 n = + let rec loop i f = + if (i * i > n) then f + else if ((n mod i) == 0) then loop (i + 1) i + else loop (i + 1) f + in loop 1 1 + + in let i = choose1 n in + if (i > 1) then i + else choose2 n + +let is_power_of_two n = (n > 0) && ((n - 1) land n == 0) + +let rec dft_prime sign n input = + let sum filter i = + sigma 0 n (fun j -> + let coeff = filter (exp n (sign * i * j)) + in coeff @* (input j)) in + let computation_even = array n (sum identity) + and computation_odd = + let sumr = array n (sum real) + and sumi = array n (sum ((times Complex.i) @@ imag)) in + array n (fun i -> + if (i = 0) then + (* expose some common subexpressions *) + input 0 @+ + sigma 1 ((n + 1) / 2) (fun j -> input j @+ input (n - j)) + else + let i' = min i (n - i) in + if (i < n - i) then + sumr i' @+ sumi i' + else + sumr i' @- sumi i') in + if (n >= !Magic.rader_min) then + dft_rader sign n input + else if (n == 2) then + computation_even + else + computation_odd + + +and dft_rader sign p input = + let half = + let one_half = inverse_int 2 in + times one_half + + and make_product n a b = + let scale_factor = inverse_int n in + array n (fun i -> a i @* (scale_factor @* b i)) in + + (* generates a convolution using ffts. (all arguments are the + same as to gen_convolution, below) *) + let gen_convolution_by_fft n a b addtoall = + let fft_a = dft 1 n a + and fft_b = dft 1 n b in + + let fft_ab = make_product n fft_a fft_b + and dc_term i = if (i == 0) then addtoall else zero in + + let fft_ab1 = array n (fun i -> fft_ab i @+ dc_term i) + and sum = fft_a 0 in + let conv = dft (-1) n fft_ab1 in + (sum, conv) + + (* alternate routine for convolution. Seems to work better for + small sizes. I have no idea why. *) + and gen_convolution_by_fft_alt n a b addtoall = + let ap = array n (fun i -> half (a i @+ a ((n - i) mod n))) + and am = array n (fun i -> half (a i @- a ((n - i) mod n))) + and bp = array n (fun i -> half (b i @+ b ((n - i) mod n))) + and bm = array n (fun i -> half (b i @- b ((n - i) mod n))) + in + + let fft_ap = dft 1 n ap + and fft_am = dft 1 n am + and fft_bp = dft 1 n bp + and fft_bm = dft 1 n bm in + + let fft_abpp = make_product n fft_ap fft_bp + and fft_abpm = make_product n fft_ap fft_bm + and fft_abmp = make_product n fft_am fft_bp + and fft_abmm = make_product n fft_am fft_bm + and sum = fft_ap 0 @+ fft_am 0 + and dc_term i = if (i == 0) then addtoall else zero in + + let fft_ab1 = array n (fun i -> (fft_abpp i @+ fft_abmm i) @+ dc_term i) + and fft_ab2 = array n (fun i -> fft_abpm i @+ fft_abmp i) in + let conv1 = dft (-1) n fft_ab1 + and conv2 = dft (-1) n fft_ab2 in + let conv = array n (fun i -> + conv1 i @+ conv2 i) in + (sum, conv) + + (* generator of assignment list assigning conv to the convolution of + a and b, all of which are of length n. addtoall is added to + all of the elements of the result. Returns (sum, convolution) pair + where sum is the sum of the elements of a. *) + + in let gen_convolution = + if (p <= !Magic.alternate_convolution) then + gen_convolution_by_fft_alt + else + gen_convolution_by_fft + + (* fft generator for prime n = p using Rader's algorithm for + turning the fft into a convolution, which then can be + performed in a variety of ways *) + in + let g = find_generator p in + let ginv = pow_mod g (p - 2) p in + let input_perm = array p (fun i -> input (pow_mod g i p)) + and omega_perm = array p (fun i -> exp p (sign * (pow_mod ginv i p))) + and output_perm = array p (fun i -> pow_mod ginv i p) + in let (sum, conv) = + (gen_convolution (p - 1) input_perm omega_perm (input 0)) + in array p (fun i -> + if (i = 0) then + input 0 @+ sum + else + let i' = suchthat 0 (fun i' -> i = output_perm i') + in conv i') + +(* our modified version of the conjugate-pair split-radix algorithm, + which reduces the number of multiplications by rescaling the + sub-transforms (power-of-two n's only) *) +and newsplit sign n input = + let rec s n k = (* recursive scale factor *) + if n <= 4 then + one + else + let k4 = (abs k) mod (n / 4) in + let k4' = if k4 <= (n / 8) then k4 else (n/4 - k4) in + (s (n / 4) k4') @* (real (exp n k4')) + + and sinv n k = (* 1 / s(n,k) *) + if n <= 4 then + one + else + let k4 = (abs k) mod (n / 4) in + let k4' = if k4 <= (n / 8) then k4 else (n/4 - k4) in + (sinv (n / 4) k4') @* (sec n k4') + + in let sdiv2 n k = (s n k) @* (sinv (2*n) k) (* s(n,k) / s(2*n,k) *) + and sdiv4 n k = (* s(n,k) / s(4*n,k) *) + let k4 = (abs k) mod n in + sec (4*n) (if k4 <= (n / 2) then k4 else (n - k4)) + + in let t n k = (exp n k) @* (sdiv4 (n/4) k) + + and dft1 input = input + and dft2 input = array 2 (fun k -> (input 0) @+ ((input 1) @* exp 2 k)) + + in let rec newsplit0 sign n input = + if (n == 1) then dft1 input + else if (n == 2) then dft2 input + else let u = newsplit0 sign (n / 2) (fun i -> input (i*2)) + and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1)) + and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) + and twid = array n (fun k -> s (n/4) k @* exp n (sign * k)) in + let w = array n (fun k -> twid k @* z (k mod (n / 4))) + and w' = array n (fun k -> conj (twid k) @* z' (k mod (n / 4))) in + let ww = array n (fun k -> w k @+ w' k) in + array n (fun k -> u (k mod (n / 2)) @+ ww k) + + and newsplitS sign n input = + if (n == 1) then dft1 input + else if (n == 2) then dft2 input + else let u = newsplitS2 sign (n / 2) (fun i -> input (i*2)) + and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1)) + and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in + let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4))) + and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in + let ww = array n (fun k -> w k @+ w' k) in + array n (fun k -> u (k mod (n / 2)) @+ ww k) + + and newsplitS2 sign n input = + if (n == 1) then dft1 input + else if (n == 2) then dft2 input + else let u = newsplitS4 sign (n / 2) (fun i -> input (i*2)) + and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1)) + and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in + let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4))) + and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in + let ww = array n (fun k -> (w k @+ w' k) @* (sdiv2 n k)) in + array n (fun k -> u (k mod (n / 2)) @+ ww k) + + and newsplitS4 sign n input = + if (n == 1) then dft1 input + else if (n == 2) then + let f = dft2 input + in array 2 (fun k -> (f k) @* (sinv 8 k)) + else let u = newsplitS2 sign (n / 2) (fun i -> input (i*2)) + and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1)) + and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in + let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4))) + and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in + let ww = array n (fun k -> w k @+ w' k) in + array n (fun k -> (u (k mod (n / 2)) @+ ww k) @* (sdiv4 n k)) + + in newsplit0 sign n input + +and dft sign n input = + let rec cooley_tukey sign n1 n2 input = + let tmp1 = + array n2 (fun i2 -> + dft sign n1 (fun i1 -> input (i1 * n2 + i2))) in + let tmp2 = + array n1 (fun i1 -> + array n2 (fun i2 -> + exp n (sign * i1 * i2) @* tmp1 i2 i1)) in + let tmp3 = array n1 (fun i1 -> dft sign n2 (tmp2 i1)) in + (fun i -> tmp3 (i mod n1) (i / n1)) + + (* + * This is "exponent -1" split-radix by Dan Bernstein. + *) + and split_radix_dit sign n input = + let f0 = dft sign (n / 2) (fun i -> input (i * 2)) + and f10 = dft sign (n / 4) (fun i -> input (i * 4 + 1)) + and f11 = dft sign (n / 4) (fun i -> input ((n + i * 4 - 1) mod n)) in + let g10 = array n (fun k -> + exp n (sign * k) @* f10 (k mod (n / 4))) + and g11 = array n (fun k -> + exp n (- sign * k) @* f11 (k mod (n / 4))) in + let g1 = array n (fun k -> g10 k @+ g11 k) in + array n (fun k -> f0 (k mod (n / 2)) @+ g1 k) + + and split_radix_dif sign n input = + let n2 = n / 2 and n4 = n / 4 in + let x0 = array n2 (fun i -> input i @+ input (i + n2)) + and x10 = array n4 (fun i -> input i @- input (i + n2)) + and x11 = array n4 (fun i -> + input (i + n4) @- input (i + n2 + n4)) in + let x1 k i = + exp n (k * i * sign) @* (x10 i @+ exp 4 (k * sign) @* x11 i) in + let f0 = dft sign n2 x0 + and f1 = array 4 (fun k -> dft sign n4 (x1 k)) in + array n (fun k -> + if k mod 2 = 0 then f0 (k / 2) + else let k' = k mod 4 in f1 k' ((k - k') / 4)) + + and prime_factor sign n1 n2 input = + let tmp1 = array n2 (fun i2 -> + dft sign n1 (fun i1 -> input ((i1 * n2 + i2 * n1) mod n))) + in let tmp2 = array n1 (fun i1 -> + dft sign n2 (fun k2 -> tmp1 k2 i1)) + in fun i -> tmp2 (i mod n1) (i mod n2) + + in let algorithm sign n = + let r = choose_factor n in + if List.mem n !Magic.rader_list then + (* special cases *) + dft_rader sign n + else if (r == 1) then (* n is prime *) + dft_prime sign n + else if (gcd r (n / r)) == 1 then + prime_factor sign r (n / r) + else if (n mod 4 = 0 && n > 4) then + if !Magic.newsplit && is_power_of_two n then + newsplit sign n + else if !Magic.dif_split_radix then + split_radix_dif sign n + else + split_radix_dit sign n + else + cooley_tukey sign r (n / r) + in + array n (algorithm sign n input) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/fft.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/fft.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,22 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +val dft : int -> int -> Complex.signal -> Complex.signal diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/gen_hc2c.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/gen_hc2c.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,186 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +open Util +open Genutil +open C + + +type ditdif = DIT | DIF +let ditdif = ref DIT +let usage = "Usage: " ^ Sys.argv.(0) ^ " -n [ -dit | -dif ]" + +let urs = ref Stride_variable + +let speclist = [ + "-dit", + Arg.Unit(fun () -> ditdif := DIT), + " generate a DIT codelet"; + + "-dif", + Arg.Unit(fun () -> ditdif := DIF), + " generate a DIF codelet"; + + "-with-rs", + Arg.String(fun x -> urs := arg_to_stride x), + " specialize for given R-stride"; +] + +let byi = Complex.times Complex.i +let byui = Complex.times (Complex.uminus Complex.i) + +let sym n f i = if (i < n - i) then f i else Complex.conj (f i) + +let shuffle_eo fe fo i = if i mod 2 == 0 then fe (i/2) else fo ((i-1)/2) + +let generate n = + let rs = "rs" + and twarray = "W" + and m = "m" and mb = "mb" and me = "me" and ms = "ms" + + (* the array names are from the point of view of the complex array + (output in R2C, input in C2R) *) + and arp = "Rp" (* real, positive *) + and aip = "Ip" (* imag, positive *) + and arm = "Rm" (* real, negative *) + and aim = "Im" (* imag, negative *) + + in + + let sign = !Genutil.sign + and name = !Magic.codelet_name + and byvl x = choose_simd x (ctimes (CVar "VL", x)) in + + let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 1 false in + let nt = num_twiddles n in + + let byw = bytwiddle n sign (twiddle_array nt twarray) in + + let vrs = either_stride (!urs) (C.SVar rs) in + + (* assume a single location. No point in doing alias analysis *) + let the_location = (Unique.make (), Unique.make ()) in + let locations _ = the_location in + + let locr = (locative_array_c n + (C.array_subscript arp vrs) + (C.array_subscript arm vrs) + locations "BUG") + and loci = (locative_array_c n + (C.array_subscript aip vrs) + (C.array_subscript aim vrs) + locations "BUG") + and locp = (locative_array_c n + (C.array_subscript arp vrs) + (C.array_subscript aip vrs) + locations "BUG") + and locm = (locative_array_c n + (C.array_subscript arm vrs) + (C.array_subscript aim vrs) + locations "BUG") + in + let locri i = if i mod 2 == 0 then locr (i/2) else loci ((i-1)/2) + and locpm i = if i < n - i then locp i else locm (n-1-i) + in + + let asch = + match !ditdif with + | DIT -> + let output = Fft.dft sign n (byw (load_array_c n locri)) in + let odag = store_array_c n locpm (sym n output) in + standard_optimizer odag + + | DIF -> + let output = byw (Fft.dft sign n (sym n (load_array_c n locpm))) in + let odag = store_array_c n locri output in + standard_optimizer odag + in + + let vms = CVar "ms" + and varp = CVar arp + and vaip = CVar aip + and varm = CVar arm + and vaim = CVar aim + and vm = CVar m and vmb = CVar mb and vme = CVar me + in + let body = Block ( + [Decl ("INT", m)], + [For (list_to_comma + [Expr_assign (vm, vmb); + Expr_assign (CVar twarray, + CPlus [CVar twarray; + ctimes (CPlus [vmb; CUminus (Integer 1)], + Integer nt)])], + Binop (" < ", vm, vme), + list_to_comma + [Expr_assign (vm, CPlus [vm; byvl (Integer 1)]); + Expr_assign (varp, CPlus [varp; byvl vms]); + Expr_assign (vaip, CPlus [vaip; byvl vms]); + Expr_assign (varm, CPlus [varm; CUminus (byvl vms)]); + Expr_assign (vaim, CPlus [vaim; CUminus (byvl vms)]); + Expr_assign (CVar twarray, CPlus [CVar twarray; + byvl (Integer nt)]); + make_volatile_stride (4*n) (CVar rs) + ], + Asch asch)]) + in + + let tree = + Fcn ("static void", name, + [Decl (C.realtypep, arp); + Decl (C.realtypep, aip); + Decl (C.realtypep, arm); + Decl (C.realtypep, aim); + Decl (C.constrealtypep, twarray); + Decl (C.stridetype, rs); + Decl ("INT", mb); + Decl ("INT", me); + Decl ("INT", ms)], + finalize_fcn body) + in + let twinstr = + Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n" + (twinstr_to_string "VL" (twdesc n)) + and desc = + Printf.sprintf + "static const hc2c_desc desc = {%d, \"%s\", twinstr, &GENUS, %s};\n\n" + n name (flops_of tree) + and register = "X(khc2c_register)" + + in + let init = + "\n" ^ + twinstr ^ + desc ^ + (declare_register_fcn name) ^ + (Printf.sprintf "{\n%s(p, %s, &desc, HC2C_VIA_RDFT);\n}" register name) + in + + (unparse tree) ^ "\n" ^ init + + +let main () = + begin + parse (speclist @ Twiddle.speclist) usage; + print_string (generate (check_size ())); + end + +let _ = main() diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/gen_hc2cdft.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/gen_hc2cdft.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,208 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +open Util +open Genutil +open C + + +type ditdif = DIT | DIF +let ditdif = ref DIT +let usage = "Usage: " ^ Sys.argv.(0) ^ " -n [ -dit | -dif ]" + +let urs = ref Stride_variable +let ums = ref Stride_variable + +let speclist = [ + "-dit", + Arg.Unit(fun () -> ditdif := DIT), + " generate a DIT codelet"; + + "-dif", + Arg.Unit(fun () -> ditdif := DIF), + " generate a DIF codelet"; + + "-with-rs", + Arg.String(fun x -> urs := arg_to_stride x), + " specialize for given R-stride"; + + "-with-ms", + Arg.String(fun x -> ums := arg_to_stride x), + " specialize for given ms" +] + +let byi = Complex.times Complex.i +let byui = Complex.times (Complex.uminus Complex.i) + +let shuffle_eo fe fo i = if i mod 2 == 0 then fe (i/2) else fo ((i-1)/2) + +let generate n = + let rs = "rs" + and twarray = "W" + and m = "m" and mb = "mb" and me = "me" and ms = "ms" + + (* the array names are from the point of view of the complex array + (output in R2C, input in C2R) *) + and arp = "Rp" (* real, positive *) + and aip = "Ip" (* imag, positive *) + and arm = "Rm" (* real, negative *) + and aim = "Im" (* imag, negative *) + + in + + let sign = !Genutil.sign + and name = !Magic.codelet_name + and byvl x = choose_simd x (ctimes (CVar "VL", x)) in + + let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 1 false in + let nt = num_twiddles n in + + let byw = bytwiddle n sign (twiddle_array nt twarray) in + + let vrs = either_stride (!urs) (C.SVar rs) in + + (* assume a single location. No point in doing alias analysis *) + let the_location = (Unique.make (), Unique.make ()) in + let locations _ = the_location in + + let rlocp = (locative_array_c n + (C.array_subscript arp vrs) + (C.array_subscript aip vrs) + locations "BUG") + and rlocm = (locative_array_c n + (C.array_subscript arm vrs) + (C.array_subscript aim vrs) + locations "BUG") + and clocp = (locative_array_c n + (C.array_subscript arp vrs) + (C.array_subscript aip vrs) + locations "BUG") + and clocm = (locative_array_c n + (C.array_subscript arm vrs) + (C.array_subscript aim vrs) + locations "BUG") + in + let rloc i = if i mod 2 == 0 then rlocp (i/2) else rlocm ((i-1)/2) + and cloc i = if i < n - i then clocp i else clocm (n-1-i) + and sym n f i = if (i < n - i) then f i else Complex.conj (f i) + and sym1 f i = + if i mod 2 == 0 then + Complex.plus [f i; Complex.conj (f (i+1))] + else + Complex.times (Complex.uminus Complex.i) + (Complex.plus [f (i-1); Complex.uminus (Complex.conj (f i))]) + and sym1i f i = + if i mod 2 == 0 then + Complex.plus [f i; Complex.times Complex.i (f (i+1))] + else + Complex.conj + (Complex.plus [f (i-1); + Complex.times (Complex.uminus Complex.i) (f i)]) + in + + let asch = + match !ditdif with + | DIT -> + let output = + (Complex.times Complex.half) @@ + (Fft.dft sign n (byw (sym1 (load_array_c n rloc)))) in + let odag = store_array_c n cloc (sym n output) in + standard_optimizer odag + + | DIF -> + let output = + byw (Fft.dft sign n (sym n (load_array_c n cloc))) + in + let odag = store_array_c n rloc (sym1i output) in + standard_optimizer odag + in + + let vms = CVar "ms" + and varp = CVar arp + and vaip = CVar aip + and varm = CVar arm + and vaim = CVar aim + and vm = CVar m and vmb = CVar mb and vme = CVar me + in + let body = Block ( + [Decl ("INT", m)], + [For (list_to_comma + [Expr_assign (vm, vmb); + Expr_assign (CVar twarray, + CPlus [CVar twarray; + ctimes (CPlus [vmb; CUminus (Integer 1)], + Integer nt)])], + Binop (" < ", vm, vme), + list_to_comma + [Expr_assign (vm, CPlus [vm; byvl (Integer 1)]); + Expr_assign (varp, CPlus [varp; byvl vms]); + Expr_assign (vaip, CPlus [vaip; byvl vms]); + Expr_assign (varm, CPlus [varm; CUminus (byvl vms)]); + Expr_assign (vaim, CPlus [vaim; CUminus (byvl vms)]); + Expr_assign (CVar twarray, CPlus [CVar twarray; + byvl (Integer nt)]); + make_volatile_stride (4*n) (CVar rs) + ], + Asch asch)] + ) + in + + let tree = + Fcn ("static void", name, + [Decl (C.realtypep, arp); + Decl (C.realtypep, aip); + Decl (C.realtypep, arm); + Decl (C.realtypep, aim); + Decl (C.constrealtypep, twarray); + Decl (C.stridetype, rs); + Decl ("INT", mb); + Decl ("INT", me); + Decl ("INT", ms)], + finalize_fcn body) + in + let twinstr = + Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n" + (twinstr_to_string "VL" (twdesc n)) + and desc = + Printf.sprintf + "static const hc2c_desc desc = {%d, \"%s\", twinstr, &GENUS, %s};\n\n" + n name (flops_of tree) + and register = "X(khc2c_register)" + + in + let init = + "\n" ^ + twinstr ^ + desc ^ + (declare_register_fcn name) ^ + (Printf.sprintf "{\n%s(p, %s, &desc, HC2C_VIA_DFT);\n}" register name) + in + + (unparse tree) ^ "\n" ^ init + + +let main () = + begin + parse (speclist @ Twiddle.speclist) usage; + print_string (generate (check_size ())); + end + +let _ = main() diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/gen_hc2cdft_c.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/gen_hc2cdft_c.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,221 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +open Util +open Genutil +open C + + +type ditdif = DIT | DIF +let ditdif = ref DIT +let usage = "Usage: " ^ Sys.argv.(0) ^ " -n [ -dit | -dif ]" + +let urs = ref Stride_variable +let ums = ref Stride_variable + +let speclist = [ + "-dit", + Arg.Unit(fun () -> ditdif := DIT), + " generate a DIT codelet"; + + "-dif", + Arg.Unit(fun () -> ditdif := DIF), + " generate a DIF codelet"; + + "-with-rs", + Arg.String(fun x -> urs := arg_to_stride x), + " specialize for given R-stride"; + + "-with-ms", + Arg.String(fun x -> ums := arg_to_stride x), + " specialize for given ms" +] + +let byi = Complex.times Complex.i +let byui = Complex.times (Complex.uminus Complex.i) + +let shuffle_eo fe fo i = if i mod 2 == 0 then fe (i/2) else fo ((i-1)/2) + +let generate n = + let rs = "rs" + and twarray = "W" + and m = "m" and mb = "mb" and me = "me" and ms = "ms" + + (* the array names are from the point of view of the complex array + (output in R2C, input in C2R) *) + and arp = "Rp" (* real, positive *) + and aip = "Ip" (* imag, positive *) + and arm = "Rm" (* real, negative *) + and aim = "Im" (* imag, negative *) + + in + + let sign = !Genutil.sign + and name = !Magic.codelet_name + and byvl x = choose_simd x (ctimes (CVar "VL", x)) + and bytwvl x = choose_simd x (ctimes (CVar "TWVL", x)) + and bytwvl_vl x = choose_simd x (ctimes (CVar "(TWVL/VL)", x)) in + + let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 1 true in + let nt = num_twiddles n in + + let byw = bytwiddle n sign (twiddle_array nt twarray) in + + let vrs = either_stride (!urs) (C.SVar rs) in + let sms = stride_to_string "ms" !ums in + let msms = "-" ^ sms in + + (* assume a single location. No point in doing alias analysis *) + let the_location = (Unique.make (), Unique.make ()) in + let locations _ = the_location in + + let rlocp = (locative_array_c n + (C.array_subscript arp vrs) + (C.array_subscript aip vrs) + locations sms) + and rlocm = (locative_array_c n + (C.array_subscript arm vrs) + (C.array_subscript aim vrs) + locations msms) + and clocp = (locative_array_c n + (C.array_subscript arp vrs) + (C.array_subscript aip vrs) + locations sms) + and clocm = (locative_array_c n + (C.array_subscript arm vrs) + (C.array_subscript aim vrs) + locations msms) + in + let rloc i = if i mod 2 == 0 then rlocp (i/2) else rlocm ((i-1)/2) + and cloc i = if i < n - i then clocp i else clocm (n-1-i) + and sym n f i = + if (i < n - i) then + f i + else + Complex.times (Complex.nan Expr.CONJ) (f i) + and sym1 f i = + if i mod 2 == 0 then + Complex.plus [f i; + Complex.times (Complex.nan Expr.CONJ) (f (i+1))] + else + Complex.times (Complex.nan Expr.I) + (Complex.plus [Complex.uminus (f (i-1)); + Complex.times (Complex.nan Expr.CONJ) (f i)]) + and sym1i f i = + if i mod 2 == 0 then + Complex.plus [f i; + Complex.times (Complex.nan Expr.I) (f (i+1))] + else + Complex.times (Complex.nan Expr.CONJ) + (Complex.plus [f (i-1); + Complex.uminus + (Complex.times (Complex.nan Expr.I) (f i))]) + in + + let asch = + match !ditdif with + | DIT -> + let output = + (Complex.times Complex.half) @@ + (Trig.dft_via_rdft sign n (byw (sym1 (load_array_r n rloc)))) in + let odag = store_array_r n cloc (sym n output) in + standard_optimizer odag + + | DIF -> + let output = + byw (Trig.dft_via_rdft sign n (sym n (load_array_r n cloc))) + in + let odag = store_array_r n rloc (sym1i output) in + standard_optimizer odag + in + + let vms = CVar sms + and varp = CVar arp + and vaip = CVar aip + and varm = CVar arm + and vaim = CVar aim + and vm = CVar m and vmb = CVar mb and vme = CVar me + in + let body = Block ( + [Decl ("INT", m)], + [For (list_to_comma + [Expr_assign (vm, vmb); + Expr_assign (CVar twarray, + CPlus [CVar twarray; + ctimes (CPlus [vmb; CUminus (Integer 1)], + bytwvl_vl (Integer nt))])], + Binop (" < ", vm, vme), + list_to_comma + [Expr_assign (vm, CPlus [vm; byvl (Integer 1)]); + Expr_assign (varp, CPlus [varp; byvl vms]); + Expr_assign (vaip, CPlus [vaip; byvl vms]); + Expr_assign (varm, CPlus [varm; CUminus (byvl vms)]); + Expr_assign (vaim, CPlus [vaim; CUminus (byvl vms)]); + Expr_assign (CVar twarray, CPlus [CVar twarray; + bytwvl (Integer nt)]); + make_volatile_stride (4*n) (CVar rs) + ], + Asch asch)] + ) + in + + let tree = + Fcn ("static void", name, + [Decl (C.realtypep, arp); + Decl (C.realtypep, aip); + Decl (C.realtypep, arm); + Decl (C.realtypep, aim); + Decl (C.constrealtypep, twarray); + Decl (C.stridetype, rs); + Decl ("INT", mb); + Decl ("INT", me); + Decl ("INT", ms)], + finalize_fcn body) + in + let twinstr = + Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n" + (twinstr_to_string "VL" (twdesc n)) + and desc = + Printf.sprintf + "static const hc2c_desc desc = {%d, %s, twinstr, &GENUS, %s};\n\n" + n (stringify name) (flops_of tree) + and register = "X(khc2c_register)" + + in + let init = + "\n" ^ + twinstr ^ + desc ^ + (declare_register_fcn name) ^ + (Printf.sprintf "{\n%s(p, %s, &desc, HC2C_VIA_DFT);\n}" register name) + in + + (unparse tree) ^ "\n" ^ init + + +let main () = + begin + Simdmagic.simd_mode := true; + parse (speclist @ Twiddle.speclist) usage; + print_string (generate (check_size ())); + end + +let _ = main() diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/gen_hc2hc.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/gen_hc2hc.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,170 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +open Util +open Genutil +open C + + +type ditdif = DIT | DIF +let ditdif = ref DIT +let usage = "Usage: " ^ Sys.argv.(0) ^ " -n [ -dit | -dif ]" + +let urs = ref Stride_variable + +let speclist = [ + "-dit", + Arg.Unit(fun () -> ditdif := DIT), + " generate a DIT codelet"; + + "-dif", + Arg.Unit(fun () -> ditdif := DIF), + " generate a DIF codelet"; + + "-with-rs", + Arg.String(fun x -> urs := arg_to_stride x), + " specialize for given R-stride"; +] + +let rioarray = "cr" +and iioarray = "ci" + +let genone sign n transform load store vrs = + let locations = unique_array_c n in + let input = + locative_array_c n + (C.array_subscript rioarray vrs) + (C.array_subscript iioarray vrs) + locations "BUG" in + let output = transform sign n (load n input) in + let ioloc = + locative_array_c n + (C.array_subscript rioarray vrs) + (C.array_subscript iioarray vrs) + locations "BUG" in + let odag = store n ioloc output in + let annot = standard_optimizer odag + in annot + +let byi = Complex.times Complex.i +let byui = Complex.times (Complex.uminus Complex.i) + +let sym1 n f i = + Complex.plus [Complex.real (f i); byi (Complex.imag (f (n - 1 - i)))] + +let sym2 n f i = if (i < n - i) then f i else byi (f i) +let sym2i n f i = if (i < n - i) then f i else byui (f i) + +let generate n = + let rs = "rs" + and twarray = "W" + and m = "m" and mb = "mb" and me = "me" and ms = "ms" in + + let sign = !Genutil.sign + and name = !Magic.codelet_name + and byvl x = choose_simd x (ctimes (CVar "VL", x)) in + + let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 1 false in + let nt = num_twiddles n in + + let byw = bytwiddle n sign (twiddle_array nt twarray) in + + let vrs = either_stride (!urs) (C.SVar rs) in + + let asch = + match !ditdif with + | DIT -> + genone sign n + (fun sign n input -> + ((sym1 n) @@ (sym2 n)) (Fft.dft sign n (byw input))) + load_array_c store_array_c vrs + | DIF -> + genone sign n + (fun sign n input -> + byw (Fft.dft sign n (((sym2i n) @@ (sym1 n)) input))) + load_array_c store_array_c vrs + in + + let vms = CVar "ms" + and vrioarray = CVar rioarray + and viioarray = CVar iioarray + and vm = CVar m and vmb = CVar mb and vme = CVar me + in + let body = Block ( + [Decl ("INT", m)], + [For (list_to_comma + [Expr_assign (vm, vmb); + Expr_assign (CVar twarray, + CPlus [CVar twarray; + ctimes (CPlus [vmb; CUminus (Integer 1)], + Integer nt)])], + Binop (" < ", vm, vme), + list_to_comma + [Expr_assign (vm, CPlus [vm; byvl (Integer 1)]); + Expr_assign (vrioarray, CPlus [vrioarray; byvl vms]); + Expr_assign (viioarray, + CPlus [viioarray; CUminus (byvl vms)]); + Expr_assign (CVar twarray, CPlus [CVar twarray; + byvl (Integer nt)]); + make_volatile_stride (2*n) (CVar rs) + ], + Asch asch)]) + in + + let tree = + Fcn ("static void", name, + [Decl (C.realtypep, rioarray); + Decl (C.realtypep, iioarray); + Decl (C.constrealtypep, twarray); + Decl (C.stridetype, rs); + Decl ("INT", mb); + Decl ("INT", me); + Decl ("INT", ms)], + finalize_fcn body) + in + let twinstr = + Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n" + (twinstr_to_string "VL" (twdesc n)) + and desc = + Printf.sprintf + "static const hc2hc_desc desc = {%d, \"%s\", twinstr, &GENUS, %s};\n\n" + n name (flops_of tree) + and register = "X(khc2hc_register)" + + in + let init = + "\n" ^ + twinstr ^ + desc ^ + (declare_register_fcn name) ^ + (Printf.sprintf "{\n%s(p, %s, &desc);\n}" register name) + in + + (unparse tree) ^ "\n" ^ init + + +let main () = + begin + parse (speclist @ Twiddle.speclist) usage; + print_string (generate (check_size ())); + end + +let _ = main() diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/gen_mdct.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/gen_mdct.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,257 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(* generation of trigonometric transforms *) + +open Util +open Genutil +open C + + +let usage = "Usage: " ^ Sys.argv.(0) ^ " -n " + +let uistride = ref Stride_variable +let uostride = ref Stride_variable +let uivstride = ref Stride_variable +let uovstride = ref Stride_variable +let normalization = ref 1 + +type mode = + | MDCT + | MDCT_MP3 + | MDCT_VORBIS + | MDCT_WINDOW + | MDCT_WINDOW_SYM + | IMDCT + | IMDCT_MP3 + | IMDCT_VORBIS + | IMDCT_WINDOW + | IMDCT_WINDOW_SYM + | NONE + +let mode = ref NONE + +let speclist = [ + "-with-istride", + Arg.String(fun x -> uistride := arg_to_stride x), + " specialize for given input stride"; + + "-with-ostride", + Arg.String(fun x -> uostride := arg_to_stride x), + " specialize for given output stride"; + + "-with-ivstride", + Arg.String(fun x -> uivstride := arg_to_stride x), + " specialize for given input vector stride"; + + "-with-ovstride", + Arg.String(fun x -> uovstride := arg_to_stride x), + " specialize for given output vector stride"; + + "-normalization", + Arg.String(fun x -> normalization := int_of_string x), + " normalization integer to divide by"; + + "-mdct", + Arg.Unit(fun () -> mode := MDCT), + " generate an MDCT codelet"; + + "-mdct-mp3", + Arg.Unit(fun () -> mode := MDCT_MP3), + " generate an MDCT codelet with MP3 windowing"; + + "-mdct-window", + Arg.Unit(fun () -> mode := MDCT_WINDOW), + " generate an MDCT codelet with window array"; + + "-mdct-window-sym", + Arg.Unit(fun () -> mode := MDCT_WINDOW_SYM), + " generate an MDCT codelet with symmetric window array"; + + "-imdct", + Arg.Unit(fun () -> mode := IMDCT), + " generate an IMDCT codelet"; + + "-imdct-mp3", + Arg.Unit(fun () -> mode := IMDCT_MP3), + " generate an IMDCT codelet with MP3 windowing"; + + "-imdct-window", + Arg.Unit(fun () -> mode := IMDCT_WINDOW), + " generate an IMDCT codelet with window array"; + + "-imdct-window-sym", + Arg.Unit(fun () -> mode := IMDCT_WINDOW_SYM), + " generate an IMDCT codelet with symmetric window array"; +] + +let unity_window n i = Complex.one + +(* MP3 window(k) = sin(pi/(2n) * (k + 1/2)) *) +let mp3_window n k = + Complex.imag (Complex.exp (8 * n) (2*k + 1)) + +(* Vorbis window(k) = sin(pi/2 * (mp3_window(k))^2) + ... this is transcendental, though, so we can't do it with our + current Complex.exp function *) + +let window_array n w = + array n (fun i -> + let stride = C.SInteger 1 + and klass = Unique.make () in + let refr = C.array_subscript w stride i in + let kr = Variable.make_constant klass refr in + load_r (kr, kr)) + +let load_window w n i = w i +let load_window_sym w n i = w (if (i < n) then i else (2*n - 1 - i)) + +(* fixme: use same locations for input and output so that it works in-place? *) + +(* Note: only correct for even n! *) +let load_array_mdct window n rarr iarr locations = + let twon = 2 * n in + let arr = load_array_c twon + (locative_array_c twon rarr iarr locations "BUG") in + let arrw = fun i -> Complex.times (window n i) (arr i) in + array n + ((Complex.times Complex.half) @@ + (fun i -> + if (i < n/2) then + Complex.uminus (Complex.plus [arrw (i + n + n/2); + arrw (n + n/2 - 1 - i)]) + else + Complex.plus [arrw (i - n/2); + Complex.uminus (arrw (n + n/2 - 1 - i))])) + +let store_array_mdct window n rarr iarr locations arr = + store_array_r n (locative_array_c n rarr iarr locations "BUG") arr + +let load_array_imdct window n rarr iarr locations = + load_array_c n (locative_array_c n rarr iarr locations "BUG") + +let store_array_imdct window n rarr iarr locations arr = + let n2 = n/2 in + let threen2 = 3*n2 in + let arr2 = fun i -> + if (i < n2) then + arr (i + n2) + else if (i < threen2) then + Complex.uminus (arr (threen2 - 1 - i)) + else + Complex.uminus (arr (i - threen2)) + in + let arr2w = fun i -> Complex.times (window n i) (arr2 i) in + let twon = 2 * n in + store_array_r twon (locative_array_c twon rarr iarr locations "BUG") arr2w + +let window_param = function + MDCT_WINDOW -> true + | MDCT_WINDOW_SYM -> true + | IMDCT_WINDOW -> true + | IMDCT_WINDOW_SYM -> true + | _ -> false + +let generate n mode = + let iarray = "I" + and oarray = "O" + and istride = "istride" + and ostride = "ostride" + and window = "W" + and name = !Magic.codelet_name in + + let vistride = either_stride (!uistride) (C.SVar istride) + and vostride = either_stride (!uostride) (C.SVar ostride) + in + + let sivs = stride_to_string "ovs" !uovstride in + let sovs = stride_to_string "ivs" !uivstride in + + let (transform, load_input, store_output) = match mode with + | MDCT -> Trig.dctIV, load_array_mdct unity_window, + store_array_mdct unity_window + | MDCT_MP3 -> Trig.dctIV, load_array_mdct mp3_window, + store_array_mdct unity_window + | MDCT_WINDOW -> Trig.dctIV, load_array_mdct + (load_window (window_array (2 * n) window)), + store_array_mdct unity_window + | MDCT_WINDOW_SYM -> Trig.dctIV, load_array_mdct + (load_window_sym (window_array n window)), + store_array_mdct unity_window + | IMDCT -> Trig.dctIV, load_array_imdct unity_window, + store_array_imdct unity_window + | IMDCT_MP3 -> Trig.dctIV, load_array_imdct unity_window, + store_array_imdct mp3_window + | IMDCT_WINDOW -> Trig.dctIV, load_array_imdct unity_window, + store_array_imdct (load_window (window_array (2 * n) window)) + | IMDCT_WINDOW_SYM -> Trig.dctIV, load_array_imdct unity_window, + store_array_imdct (load_window_sym (window_array n window)) + | _ -> failwith "must specify transform kind" + in + + let locations = unique_array_c (2*n) in + let input = + load_input n + (C.array_subscript iarray vistride) + (C.array_subscript "BUG" vistride) + locations + in + let output = (Complex.times (Complex.inverse_int !normalization)) + @@ (transform n input) in + let odag = + store_output n + (C.array_subscript oarray vostride) + (C.array_subscript "BUG" vostride) + locations + output + in + let annot = standard_optimizer odag in + + let tree = + Fcn ("void", name, + ([Decl (C.constrealtypep, iarray); + Decl (C.realtypep, oarray)] + @ (if stride_fixed !uistride then [] + else [Decl (C.stridetype, istride)]) + @ (if stride_fixed !uostride then [] + else [Decl (C.stridetype, ostride)]) + @ (choose_simd [] + (if stride_fixed !uivstride then [] else + [Decl ("int", sivs)])) + @ (choose_simd [] + (if stride_fixed !uovstride then [] else + [Decl ("int", sovs)])) + @ (if (not (window_param mode)) then [] + else [Decl (C.constrealtypep, window)]) + ), + finalize_fcn (Asch annot)) + + in + (unparse tree) ^ "\n" + + +let main () = + begin + parse speclist usage; + print_string (generate (check_size ()) !mode); + end + +let _ = main() diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/gen_notw.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/gen_notw.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,168 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +open Util +open Genutil +open C + + +let usage = "Usage: " ^ Sys.argv.(0) ^ " -n " + +let uistride = ref Stride_variable +let uostride = ref Stride_variable +let uivstride = ref Stride_variable +let uovstride = ref Stride_variable + +let speclist = [ + "-with-istride", + Arg.String(fun x -> uistride := arg_to_stride x), + " specialize for given input stride"; + + "-with-ostride", + Arg.String(fun x -> uostride := arg_to_stride x), + " specialize for given output stride"; + + "-with-ivstride", + Arg.String(fun x -> uivstride := arg_to_stride x), + " specialize for given input vector stride"; + + "-with-ovstride", + Arg.String(fun x -> uovstride := arg_to_stride x), + " specialize for given output vector stride" +] + +let nonstandard_optimizer list_of_buddy_stores dag = + let sched = standard_scheduler dag in + let annot = Annotate.annotate list_of_buddy_stores sched in + let _ = dump_asched annot in + annot + +let generate n = + let riarray = "ri" + and iiarray = "ii" + and roarray = "ro" + and ioarray = "io" + and istride = "is" + and ostride = "os" + and i = "i" + and v = "v" + in + + let sign = !Genutil.sign + and name = !Magic.codelet_name + and byvl x = choose_simd x (ctimes (CVar "(2 * VL)", x)) in + let ename = expand_name name in + + let vistride = either_stride (!uistride) (C.SVar istride) + and vostride = either_stride (!uostride) (C.SVar ostride) + in + + let sovs = stride_to_string "ovs" !uovstride in + let sivs = stride_to_string "ivs" !uivstride in + + let locations = unique_array_c n in + let input = + locative_array_c n + (C.array_subscript riarray vistride) + (C.array_subscript iiarray vistride) + locations sivs in + let output = Fft.dft sign n (load_array_c n input) in + let oloc = + locative_array_c n + (C.array_subscript roarray vostride) + (C.array_subscript ioarray vostride) + locations sovs in + let list_of_buddy_stores = + let k = !Simdmagic.store_multiple in + if (k > 1) then + if (n mod k == 0) then + List.append + (List.map + (fun i -> List.map (fun j -> (fst (oloc (k * i + j)))) (iota k)) + (iota (n / k))) + (List.map + (fun i -> List.map (fun j -> (snd (oloc (k * i + j)))) (iota k)) + (iota (n / k))) + else failwith "invalid n for -store-multiple" + else [] + in + + let odag = store_array_c n oloc output in + let annot = nonstandard_optimizer list_of_buddy_stores odag in + + let body = Block ( + [Decl ("INT", i)], + [For (Expr_assign (CVar i, CVar v), + Binop (" > ", CVar i, Integer 0), + list_to_comma + [Expr_assign (CVar i, CPlus [CVar i; CUminus (byvl (Integer 1))]); + Expr_assign (CVar riarray, CPlus [CVar riarray; + byvl (CVar sivs)]); + Expr_assign (CVar iiarray, CPlus [CVar iiarray; + byvl (CVar sivs)]); + Expr_assign (CVar roarray, CPlus [CVar roarray; + byvl (CVar sovs)]); + Expr_assign (CVar ioarray, CPlus [CVar ioarray; + byvl (CVar sovs)]); + make_volatile_stride (4*n) (CVar istride); + make_volatile_stride (4*n) (CVar ostride) + ], + Asch annot) + ]) + in + + let tree = + Fcn ((if !Magic.standalone then "void" else "static void"), ename, + ([Decl (C.constrealtypep, riarray); + Decl (C.constrealtypep, iiarray); + Decl (C.realtypep, roarray); + Decl (C.realtypep, ioarray); + Decl (C.stridetype, istride); + Decl (C.stridetype, ostride); + Decl ("INT", v); + Decl ("INT", "ivs"); + Decl ("INT", "ovs")]), + finalize_fcn body) + + in let desc = + Printf.sprintf + "static const kdft_desc desc = { %d, %s, %s, &GENUS, %s, %s, %s, %s };\n" + n (stringify name) (flops_of tree) + (stride_to_solverparm !uistride) (stride_to_solverparm !uostride) + (choose_simd "0" (stride_to_solverparm !uivstride)) + (choose_simd "0" (stride_to_solverparm !uovstride)) + + and init = + (declare_register_fcn name) ^ + "{" ^ + " X(kdft_register)(p, " ^ ename ^ ", &desc);\n" ^ + "}\n" + + in ((unparse tree) ^ "\n" ^ + (if !Magic.standalone then "" else desc ^ init)) + +let main () = + begin + parse speclist usage; + print_string (generate (check_size ())); + end + +let _ = main() diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/gen_notw_c.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/gen_notw_c.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,165 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +open Util +open Genutil +open C + + +let usage = "Usage: " ^ Sys.argv.(0) ^ " -n " + +let uistride = ref Stride_variable +let uostride = ref Stride_variable +let uivstride = ref Stride_variable +let uovstride = ref Stride_variable + +let speclist = [ + "-with-istride", + Arg.String(fun x -> uistride := arg_to_stride x), + " specialize for given input stride"; + + "-with-ostride", + Arg.String(fun x -> uostride := arg_to_stride x), + " specialize for given output stride"; + + "-with-ivstride", + Arg.String(fun x -> uivstride := arg_to_stride x), + " specialize for given input vector stride"; + + "-with-ovstride", + Arg.String(fun x -> uovstride := arg_to_stride x), + " specialize for given output vector stride" +] + +let nonstandard_optimizer list_of_buddy_stores dag = + let sched = standard_scheduler dag in + let annot = Annotate.annotate list_of_buddy_stores sched in + let _ = dump_asched annot in + annot + +let generate n = + let riarray = "xi" + and roarray = "xo" + and istride = "is" + and ostride = "os" + and i = "i" + and v = "v" + in + + let sign = !Genutil.sign + and name = !Magic.codelet_name + and byvl x = choose_simd x (ctimes (CVar "VL", x)) in + let ename = expand_name name in + + let vistride = either_stride (!uistride) (C.SVar istride) + and vostride = either_stride (!uostride) (C.SVar ostride) + in + + let sivs = stride_to_string "ivs" !uivstride in + let sovs = stride_to_string "ovs" !uovstride in + + let fft = Trig.dft_via_rdft in + + let locations = unique_array_c n in + let input = + locative_array_c n + (C.array_subscript riarray vistride) + (C.array_subscript "BUG" vistride) + locations sivs in + let output = fft sign n (load_array_r n input) in + let oloc = + locative_array_c n + (C.array_subscript roarray vostride) + (C.array_subscript "BUG" vostride) + locations sovs in + let list_of_buddy_stores = + let k = !Simdmagic.store_multiple in + if (k > 1) then + if (n mod k == 0) then + List.map + (fun i -> List.map (fun j -> (fst (oloc (k * i + j)))) (iota k)) + (iota (n / k)) + else failwith "invalid n for -store-multiple" + else [] + in + let odag = store_array_r n oloc output in + let annot = nonstandard_optimizer list_of_buddy_stores odag in + + let body = Block ( + [Decl ("INT", i); + Decl (C.constrealtypep, riarray); + Decl (C.realtypep, roarray)], + [Stmt_assign (CVar riarray, CVar (if (sign < 0) then "ri" else "ii")); + Stmt_assign (CVar roarray, CVar (if (sign < 0) then "ro" else "io")); + For (Expr_assign (CVar i, CVar v), + Binop (" > ", CVar i, Integer 0), + list_to_comma + [Expr_assign (CVar i, CPlus [CVar i; CUminus (byvl (Integer 1))]); + Expr_assign (CVar riarray, CPlus [CVar riarray; + byvl (CVar sivs)]); + Expr_assign (CVar roarray, CPlus [CVar roarray; + byvl (CVar sovs)]); + make_volatile_stride (2*n) (CVar istride); + make_volatile_stride (2*n) (CVar ostride) + ], + Asch annot); + ]) + in + + let tree = + Fcn ((if !Magic.standalone then "void" else "static void"), ename, + ([Decl (C.constrealtypep, "ri"); + Decl (C.constrealtypep, "ii"); + Decl (C.realtypep, "ro"); + Decl (C.realtypep, "io"); + Decl (C.stridetype, istride); + Decl (C.stridetype, ostride); + Decl ("INT", v); + Decl ("INT", "ivs"); + Decl ("INT", "ovs")]), + finalize_fcn body) + + in + let desc = + Printf.sprintf + "static const kdft_desc desc = { %d, %s, %s, &GENUS, %s, %s, %s, %s };\n" + n (stringify name) (flops_of tree) + (stride_to_solverparm !uistride) (stride_to_solverparm !uostride) + (choose_simd "0" (stride_to_solverparm !uivstride)) + (choose_simd "0" (stride_to_solverparm !uovstride)) + + and init = + (declare_register_fcn name) ^ + "{" ^ + " X(kdft_register)(p, " ^ ename ^ ", &desc);\n" ^ + "}\n" + + in ((unparse tree) ^ "\n" ^ + (if !Magic.standalone then "" else desc ^ init)) + +let main () = + begin + Simdmagic.simd_mode := true; + parse speclist usage; + print_string (generate (check_size ())); + end + +let _ = main() diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/gen_r2cb.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/gen_r2cb.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,167 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +open Util +open Genutil +open C + + +let usage = "Usage: " ^ Sys.argv.(0) ^ " -n " + +let urs = ref Stride_variable +let ucsr = ref Stride_variable +let ucsi = ref Stride_variable +let uivs = ref Stride_variable +let uovs = ref Stride_variable +let dftIII_flag = ref false + +let speclist = [ + "-with-rs", + Arg.String(fun x -> urs := arg_to_stride x), + " specialize for given real-array stride"; + + "-with-csr", + Arg.String(fun x -> ucsr := arg_to_stride x), + " specialize for given complex-array real stride"; + + "-with-csi", + Arg.String(fun x -> ucsi := arg_to_stride x), + " specialize for given complex-array imaginary stride"; + + "-with-ivs", + Arg.String(fun x -> uivs := arg_to_stride x), + " specialize for given input vector stride"; + + "-with-ovs", + Arg.String(fun x -> uovs := arg_to_stride x), + " specialize for given output vector stride"; + + "-dft-III", + Arg.Unit(fun () -> dftIII_flag := true), + " produce shifted dftIII-style codelets" +] + +let hcdftIII sign n input = + let input' i = + if (i mod 2 == 0) then + Complex.zero + else + let i' = (i - 1) / 2 in + if (2 * i' < n - 1) then (input i') + else if (2 * i' == n - 1) then + Complex.real (input i') + else + Complex.conj (input (n - 1 - i')) + in Fft.dft sign (2 * n) input' + +let generate n = + let ar0 = "R0" and ar1 = "R1" and acr = "Cr" and aci = "Ci" + and rs = "rs" and csr = "csr" and csi = "csi" + and i = "i" and v = "v" + and transform = if !dftIII_flag then hcdftIII else Trig.hdft + in + + let sign = !Genutil.sign + and name = !Magic.codelet_name in + + let vrs = either_stride (!urs) (C.SVar rs) + and vcsr = either_stride (!ucsr) (C.SVar csr) + and vcsi = either_stride (!ucsi) (C.SVar csi) + in + + let sovs = stride_to_string "ovs" !uovs in + let sivs = stride_to_string "ivs" !uivs in + + let locations = unique_array_c n in + let input = + locative_array_c n + (C.array_subscript acr vcsr) + (C.array_subscript aci vcsi) + locations sivs in + let output = transform sign n (load_array_hc n input) in + let oloce = + locative_array_c n + (C.array_subscript ar0 vrs) + (C.array_subscript "BUG" vrs) + locations sovs + and oloco = + locative_array_c n + (C.array_subscript ar1 vrs) + (C.array_subscript "BUG" vrs) + locations sovs in + let oloc i = if i mod 2 == 0 then oloce (i/2) else oloco ((i-1)/2) in + let odag = store_array_r n oloc output in + let annot = standard_optimizer odag in + + let body = Block ( + [Decl ("INT", i)], + [For (Expr_assign (CVar i, CVar v), + Binop (" > ", CVar i, Integer 0), + list_to_comma + [Expr_assign (CVar i, CPlus [CVar i; CUminus (Integer 1)]); + Expr_assign (CVar ar0, CPlus [CVar ar0; CVar sovs]); + Expr_assign (CVar ar1, CPlus [CVar ar1; CVar sovs]); + Expr_assign (CVar acr, CPlus [CVar acr; CVar sivs]); + Expr_assign (CVar aci, CPlus [CVar aci; CVar sivs]); + make_volatile_stride (4*n) (CVar rs); + make_volatile_stride (4*n) (CVar csr); + make_volatile_stride (4*n) (CVar csi) + ], + Asch annot) + ]) + in + + let tree = + Fcn ((if !Magic.standalone then "void" else "static void"), name, + ([Decl (C.realtypep, ar0); + Decl (C.realtypep, ar1); + Decl (C.realtypep, acr); + Decl (C.realtypep, aci); + Decl (C.stridetype, rs); + Decl (C.stridetype, csr); + Decl (C.stridetype, csi); + Decl ("INT", v); + Decl ("INT", "ivs"); + Decl ("INT", "ovs")]), + finalize_fcn body) + + in let desc = + Printf.sprintf + "static const kr2c_desc desc = { %d, \"%s\", %s, &GENUS };\n\n" + n name (flops_of tree) + + and init = + (declare_register_fcn name) ^ + "{" ^ + " X(kr2c_register)(p, " ^ name ^ ", &desc);\n" ^ + "}\n" + + in + (unparse tree) ^ "\n" ^ (if !Magic.standalone then "" else desc ^ init) + + +let main () = + begin + parse speclist usage; + print_string (generate (check_size ())); + end + +let _ = main() diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/gen_r2cf.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/gen_r2cf.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,164 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +open Util +open Genutil +open C + + +let usage = "Usage: " ^ Sys.argv.(0) ^ " -n " + +let urs = ref Stride_variable +let ucsr = ref Stride_variable +let ucsi = ref Stride_variable +let uivs = ref Stride_variable +let uovs = ref Stride_variable +let dftII_flag = ref false + +let speclist = [ + "-with-rs", + Arg.String(fun x -> urs := arg_to_stride x), + " specialize for given real-array stride"; + + "-with-csr", + Arg.String(fun x -> ucsr := arg_to_stride x), + " specialize for given complex-array real stride"; + + "-with-csi", + Arg.String(fun x -> ucsi := arg_to_stride x), + " specialize for given complex-array imaginary stride"; + + "-with-ivs", + Arg.String(fun x -> uivs := arg_to_stride x), + " specialize for given input vector stride"; + + "-with-ovs", + Arg.String(fun x -> uovs := arg_to_stride x), + " specialize for given output vector stride"; + + "-dft-II", + Arg.Unit(fun () -> dftII_flag := true), + " produce shifted dftII-style codelets" +] + +let rdftII sign n input = + let input' i = if i < n then input i else Complex.zero in + let f = Fft.dft sign (2 * n) input' in + let g i = f (2 * i + 1) + in fun i -> + if (i < n - i) then g i + else if (2 * i + 1 == n) then Complex.real (g i) + else Complex.zero + +let generate n = + let ar0 = "R0" and ar1 = "R1" and acr = "Cr" and aci = "Ci" + and rs = "rs" and csr = "csr" and csi = "csi" + and i = "i" and v = "v" + and transform = if !dftII_flag then rdftII else Trig.rdft + in + + let sign = !Genutil.sign + and name = !Magic.codelet_name in + + let vrs = either_stride (!urs) (C.SVar rs) + and vcsr = either_stride (!ucsr) (C.SVar csr) + and vcsi = either_stride (!ucsi) (C.SVar csi) + in + + let sovs = stride_to_string "ovs" !uovs in + let sivs = stride_to_string "ivs" !uivs in + + let locations = unique_array_c n in + let inpute = + locative_array_c n + (C.array_subscript ar0 vrs) + (C.array_subscript "BUG" vrs) + locations sivs + and inputo = + locative_array_c n + (C.array_subscript ar1 vrs) + (C.array_subscript "BUG" vrs) + locations sivs + in + let input i = if i mod 2 == 0 then inpute (i/2) else inputo ((i-1)/2) in + let output = transform sign n (load_array_r n input) in + let oloc = + locative_array_c n + (C.array_subscript acr vcsr) + (C.array_subscript aci vcsi) + locations sovs in + let odag = store_array_hc n oloc output in + let annot = standard_optimizer odag in + + let body = Block ( + [Decl ("INT", i)], + [For (Expr_assign (CVar i, CVar v), + Binop (" > ", CVar i, Integer 0), + list_to_comma + [Expr_assign (CVar i, CPlus [CVar i; CUminus (Integer 1)]); + Expr_assign (CVar ar0, CPlus [CVar ar0; CVar sivs]); + Expr_assign (CVar ar1, CPlus [CVar ar1; CVar sivs]); + Expr_assign (CVar acr, CPlus [CVar acr; CVar sovs]); + Expr_assign (CVar aci, CPlus [CVar aci; CVar sovs]); + make_volatile_stride (4*n) (CVar rs); + make_volatile_stride (4*n) (CVar csr); + make_volatile_stride (4*n) (CVar csi) + ], + Asch annot) + ]) + in + + let tree = + Fcn ((if !Magic.standalone then "void" else "static void"), name, + ([Decl (C.realtypep, ar0); + Decl (C.realtypep, ar1); + Decl (C.realtypep, acr); + Decl (C.realtypep, aci); + Decl (C.stridetype, rs); + Decl (C.stridetype, csr); + Decl (C.stridetype, csi); + Decl ("INT", v); + Decl ("INT", "ivs"); + Decl ("INT", "ovs")]), + finalize_fcn body) + + in let desc = + Printf.sprintf + "static const kr2c_desc desc = { %d, \"%s\", %s, &GENUS };\n\n" + n name (flops_of tree) + + and init = + (declare_register_fcn name) ^ + "{" ^ + " X(kr2c_register)(p, " ^ name ^ ", &desc);\n" ^ + "}\n" + + in + (unparse tree) ^ "\n" ^ (if !Magic.standalone then "" else desc ^ init) + + +let main () = + begin + parse speclist usage; + print_string (generate (check_size ())); + end + +let _ = main() diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/gen_r2r.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/gen_r2r.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,257 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(* generation of trigonometric transforms *) + +open Util +open Genutil +open C + + +let usage = "Usage: " ^ Sys.argv.(0) ^ " -n " + +let uistride = ref Stride_variable +let uostride = ref Stride_variable +let uivstride = ref Stride_variable +let uovstride = ref Stride_variable + +type mode = + | RDFT + | HDFT + | DHT + | REDFT00 + | REDFT10 + | REDFT01 + | REDFT11 + | RODFT00 + | RODFT10 + | RODFT01 + | RODFT11 + | NONE + +let mode = ref NONE +let normsqr = ref 1 +let unitary = ref false +let noloop = ref false + +let speclist = [ + "-with-istride", + Arg.String(fun x -> uistride := arg_to_stride x), + " specialize for given input stride"; + + "-with-ostride", + Arg.String(fun x -> uostride := arg_to_stride x), + " specialize for given output stride"; + + "-with-ivstride", + Arg.String(fun x -> uivstride := arg_to_stride x), + " specialize for given input vector stride"; + + "-with-ovstride", + Arg.String(fun x -> uovstride := arg_to_stride x), + " specialize for given output vector stride"; + + "-rdft", + Arg.Unit(fun () -> mode := RDFT), + " generate a real DFT codelet"; + + "-hdft", + Arg.Unit(fun () -> mode := HDFT), + " generate a Hermitian DFT codelet"; + + "-dht", + Arg.Unit(fun () -> mode := DHT), + " generate a DHT codelet"; + + "-redft00", + Arg.Unit(fun () -> mode := REDFT00), + " generate a DCT-I codelet"; + + "-redft10", + Arg.Unit(fun () -> mode := REDFT10), + " generate a DCT-II codelet"; + + "-redft01", + Arg.Unit(fun () -> mode := REDFT01), + " generate a DCT-III codelet"; + + "-redft11", + Arg.Unit(fun () -> mode := REDFT11), + " generate a DCT-IV codelet"; + + "-rodft00", + Arg.Unit(fun () -> mode := RODFT00), + " generate a DST-I codelet"; + + "-rodft10", + Arg.Unit(fun () -> mode := RODFT10), + " generate a DST-II codelet"; + + "-rodft01", + Arg.Unit(fun () -> mode := RODFT01), + " generate a DST-III codelet"; + + "-rodft11", + Arg.Unit(fun () -> mode := RODFT11), + " generate a DST-IV codelet"; + + "-normalization", + Arg.String(fun x -> let ix = int_of_string x in normsqr := ix * ix), + " normalization integer to divide by"; + + "-normsqr", + Arg.String(fun x -> normsqr := int_of_string x), + " integer square of normalization to divide by"; + + "-unitary", + Arg.Unit(fun () -> unitary := true), + " unitary normalization (up overall scale factor)"; + + "-noloop", + Arg.Unit(fun () -> noloop := true), + " no vector loop"; +] + +let sqrt_half = Complex.inverse_int_sqrt 2 +let sqrt_two = Complex.int_sqrt 2 + +let rescale sc s1 s2 input i = + if ((i == s1 || i == s2) && !unitary) then + Complex.times (input i) sc + else + input i + +let generate n mode = + let iarray = "I" + and oarray = "O" + and istride = "is" + and ostride = "os" + and i = "i" + and v = "v" + in + + let sign = !Genutil.sign + and name = !Magic.codelet_name in + + let vistride = either_stride (!uistride) (C.SVar istride) + and vostride = either_stride (!uostride) (C.SVar ostride) + in + + let sovs = stride_to_string "ovs" !uovstride in + let sivs = stride_to_string "ivs" !uivstride in + + let (transform, load_input, store_output, si1,si2,so1,so2) = match mode with + | RDFT -> Trig.rdft sign, load_array_r, store_array_hc, -1,-1,-1,-1 + | HDFT -> Trig.hdft sign, load_array_c, store_array_r, -1,-1,-1,-1 (* TODO *) + | DHT -> Trig.dht 1, load_array_r, store_array_r, -1,-1,-1,-1 + | REDFT00 -> Trig.dctI, load_array_r, store_array_r, 0,n-1,0,n-1 + | REDFT10 -> Trig.dctII, load_array_r, store_array_r, -1,-1,0,-1 + | REDFT01 -> Trig.dctIII, load_array_r, store_array_r, 0,-1,-1,-1 + | REDFT11 -> Trig.dctIV, load_array_r, store_array_r, -1,-1,-1,-1 + | RODFT00 -> Trig.dstI, load_array_r, store_array_r, -1,-1,-1,-1 + | RODFT10 -> Trig.dstII, load_array_r, store_array_r, -1,-1,n-1,-1 + | RODFT01 -> Trig.dstIII, load_array_r, store_array_r, n-1,-1,-1,-1 + | RODFT11 -> Trig.dstIV, load_array_r, store_array_r, -1,-1,-1,-1 + | _ -> failwith "must specify transform kind" + in + + let locations = unique_array_c n in + let input = locative_array_c n + (C.array_subscript iarray vistride) + (C.array_subscript "BUG" vistride) + locations sivs in + let output = rescale sqrt_half so1 so2 + ((Complex.times (Complex.inverse_int_sqrt !normsqr)) + @@ (transform n (rescale sqrt_two si1 si2 (load_array_c n input)))) in + let oloc = + locative_array_c n + (C.array_subscript oarray vostride) + (C.array_subscript "BUG" vostride) + locations sovs in + let odag = store_output n oloc output in + let annot = standard_optimizer odag in + + let body = if !noloop then Block([], [Asch annot]) else Block ( + [Decl ("INT", i)], + [For (Expr_assign (CVar i, CVar v), + Binop (" > ", CVar i, Integer 0), + list_to_comma + [Expr_assign (CVar i, CPlus [CVar i; CUminus (Integer 1)]); + Expr_assign (CVar iarray, CPlus [CVar iarray; CVar sivs]); + Expr_assign (CVar oarray, CPlus [CVar oarray; CVar sovs]); + make_volatile_stride (2*n) (CVar istride); + make_volatile_stride (2*n) (CVar ostride) + ], + Asch annot) + ]) + in + + let tree = + Fcn ((if !Magic.standalone then "void" else "static void"), name, + ([Decl (C.constrealtypep, iarray); + Decl (C.realtypep, oarray)] + @ (if stride_fixed !uistride then [] + else [Decl (C.stridetype, istride)]) + @ (if stride_fixed !uostride then [] + else [Decl (C.stridetype, ostride)]) + @ (if !noloop then [] else + [Decl ("INT", v)] + @ (if stride_fixed !uivstride then [] + else [Decl ("INT", "ivs")]) + @ (if stride_fixed !uovstride then [] + else [Decl ("INT", "ovs")]))), + finalize_fcn body) + + in let desc = + Printf.sprintf + "static const kr2r_desc desc = { %d, \"%s\", %s, &GENUS, %s };\n\n" + n name (flops_of tree) + (match mode with + | RDFT -> "RDFT00" + | HDFT -> "HDFT00" + | DHT -> "DHT" + | REDFT00 -> "REDFT00" + | REDFT10 -> "REDFT10" + | REDFT01 -> "REDFT01" + | REDFT11 -> "REDFT11" + | RODFT00 -> "RODFT00" + | RODFT10 -> "RODFT10" + | RODFT01 -> "RODFT01" + | RODFT11 -> "RODFT11" + | _ -> failwith "must specify a transform kind") + + and init = + (declare_register_fcn name) ^ + "{" ^ + " X(kr2r_register)(p, " ^ name ^ ", &desc);\n" ^ + "}\n" + + in + (unparse tree) ^ "\n" ^ (if !Magic.standalone then "" else desc ^ init) + + +let main () = + begin + parse speclist usage; + print_string (generate (check_size ()) !mode); + end + +let _ = main() diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/gen_twiddle.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/gen_twiddle.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,161 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +open Util +open Genutil +open C + + +type ditdif = DIT | DIF +let ditdif = ref DIT +let usage = "Usage: " ^ Sys.argv.(0) ^ " -n [ -dit | -dif ]" + +let urs = ref Stride_variable +let ums = ref Stride_variable + +let speclist = [ + "-dit", + Arg.Unit(fun () -> ditdif := DIT), + " generate a DIT codelet"; + + "-dif", + Arg.Unit(fun () -> ditdif := DIF), + " generate a DIF codelet"; + + "-with-rs", + Arg.String(fun x -> urs := arg_to_stride x), + " specialize for given i/o stride"; + + "-with-ms", + Arg.String(fun x -> ums := arg_to_stride x), + " specialize for given ms" +] + +let generate n = + let rioarray = "ri" + and iioarray = "ii" + and rs = "rs" + and twarray = "W" + and m = "m" and mb = "mb" and me = "me" and ms = "ms" in + + let sign = !Genutil.sign + and name = !Magic.codelet_name + and byvl x = choose_simd x (ctimes (CVar "(2 * VL)", x)) in + let ename = expand_name name in + + let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 0 false in + let nt = num_twiddles n in + + let byw = bytwiddle n sign (twiddle_array nt twarray) in + + let vrs = either_stride (!urs) (C.SVar rs) in + let sms = stride_to_string "ms" !ums in + + let locations = unique_array_c n in + let iloc = + locative_array_c n + (C.array_subscript rioarray vrs) + (C.array_subscript iioarray vrs) + locations sms + and oloc = + locative_array_c n + (C.array_subscript rioarray vrs) + (C.array_subscript iioarray vrs) + locations sms + in + let liloc = load_array_c n iloc in + let output = + match !ditdif with + | DIT -> array n (Fft.dft sign n (byw liloc)) + | DIF -> array n (byw (Fft.dft sign n liloc)) + in + let odag = store_array_c n oloc output in + let annot = standard_optimizer odag in + + let vm = CVar m and vmb = CVar mb and vme = CVar me in + + let body = Block ( + [Decl ("INT", m)], + [For (list_to_comma + [Expr_assign (vm, vmb); + Expr_assign (CVar twarray, + CPlus [CVar twarray; + ctimes (vmb, Integer nt)])], + Binop (" < ", vm, vme), + list_to_comma + [Expr_assign (vm, CPlus [vm; byvl (Integer 1)]); + Expr_assign (CVar rioarray, CPlus [CVar rioarray; + byvl (CVar sms)]); + Expr_assign (CVar iioarray, CPlus [CVar iioarray; + byvl (CVar sms)]); + Expr_assign (CVar twarray, CPlus [CVar twarray; + byvl (Integer nt)]); + make_volatile_stride (2*n) (CVar rs) + ], + Asch annot)]) + in + + let tree = + Fcn (((if !Magic.standalone then "" else "static ") ^ "void"), + ename, + [Decl (C.realtypep, rioarray); + Decl (C.realtypep, iioarray); + Decl (C.constrealtypep, twarray); + Decl (C.stridetype, rs); + Decl ("INT", mb); + Decl ("INT", me); + Decl ("INT", ms)], + finalize_fcn body) + in + let twinstr = + Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n" + (twinstr_to_string "(2 * VL)" (twdesc n)) + and desc = + Printf.sprintf + "static const ct_desc desc = {%d, %s, twinstr, &GENUS, %s, %s, %s, %s};\n\n" + n (stringify name) (flops_of tree) + (stride_to_solverparm !urs) "0" + (stride_to_solverparm !ums) + and register = + match !ditdif with + | DIT -> "X(kdft_dit_register)" + | DIF -> "X(kdft_dif_register)" + + in + let init = + "\n" ^ + twinstr ^ + desc ^ + (declare_register_fcn name) ^ + (Printf.sprintf "{\n%s(p, %s, &desc);\n}" register ename) + in + + (unparse tree) ^ "\n" ^ + (if !Magic.standalone then "" else init) + + +let main () = + begin + parse (speclist @ Twiddle.speclist) usage; + print_string (generate (check_size ())); + end + +let _ = main() diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/gen_twiddle_c.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/gen_twiddle_c.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,165 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +open Util +open Genutil +open C + + +type ditdif = DIT | DIF +let ditdif = ref DIT +let usage = "Usage: " ^ Sys.argv.(0) ^ " -n [ -dit | -dif ]" + +let urs = ref Stride_variable +let ums = ref Stride_variable + +let speclist = [ + "-dit", + Arg.Unit(fun () -> ditdif := DIT), + " generate a DIT codelet"; + + "-dif", + Arg.Unit(fun () -> ditdif := DIF), + " generate a DIF codelet"; + + "-with-rs", + Arg.String(fun x -> urs := arg_to_stride x), + " specialize for given i/o stride"; + + "-with-ms", + Arg.String(fun x -> ums := arg_to_stride x), + " specialize for given ms" +] + +let generate n = + let rioarray = "x" + and rs = "rs" + and twarray = "W" + and m = "m" and mb = "mb" and me = "me" and ms = "ms" in + + let sign = !Genutil.sign + and name = !Magic.codelet_name + and byvl x = choose_simd x (ctimes (CVar "VL", x)) + and bytwvl x = choose_simd x (ctimes (CVar "TWVL", x)) + and bytwvl_vl x = choose_simd x (ctimes (CVar "(TWVL/VL)", x)) in + let ename = expand_name name in + + let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 0 true in + let nt = num_twiddles n in + + let byw = bytwiddle n sign (twiddle_array nt twarray) in + + let vrs = either_stride (!urs) (C.SVar rs) in + let sms = stride_to_string "ms" !ums in + + let locations = unique_array_c n in + let iloc = + locative_array_c n + (C.array_subscript rioarray vrs) + (C.array_subscript "BUG" vrs) + locations sms + and oloc = + locative_array_c n + (C.array_subscript rioarray vrs) + (C.array_subscript "BUG" vrs) + locations sms + in + let liloc = load_array_r n iloc in + let fft = Trig.dft_via_rdft in + let output = + match !ditdif with + | DIT -> array n (fft sign n (byw liloc)) + | DIF -> array n (byw (fft sign n liloc)) + in + let odag = store_array_r n oloc output in + let annot = standard_optimizer odag in + + let vm = CVar m and vmb = CVar mb and vme = CVar me in + + let body = Block ( + [Decl ("INT", m); + Decl (C.realtypep, rioarray)], + [Stmt_assign (CVar rioarray, + CVar (if (sign < 0) then "ri" else "ii")); + For (list_to_comma + [Expr_assign (vm, vmb); + Expr_assign (CVar twarray, + CPlus [CVar twarray; + ctimes (vmb, + bytwvl_vl (Integer nt))])], + Binop (" < ", vm, vme), + list_to_comma + [Expr_assign (vm, CPlus [vm; byvl (Integer 1)]); + Expr_assign (CVar rioarray, CPlus [CVar rioarray; + byvl (CVar sms)]); + Expr_assign (CVar twarray, CPlus [CVar twarray; + bytwvl (Integer nt)]); + make_volatile_stride n (CVar rs) + ], + Asch annot)]) + in + + let tree = + Fcn (((if !Magic.standalone then "" else "static ") ^ "void"), + ename, + [Decl (C.realtypep, "ri"); + Decl (C.realtypep, "ii"); + Decl (C.constrealtypep, twarray); + Decl (C.stridetype, rs); + Decl ("INT", mb); + Decl ("INT", me); + Decl ("INT", ms)], + finalize_fcn body) + in + let twinstr = + Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n" + (twinstr_to_string "VL" (twdesc n)) + and desc = + Printf.sprintf + "static const ct_desc desc = {%d, %s, twinstr, &GENUS, %s, %s, %s, %s};\n\n" + n (stringify name) (flops_of tree) + (stride_to_solverparm !urs) "0" + (stride_to_solverparm !ums) + and register = + match !ditdif with + | DIT -> "X(kdft_dit_register)" + | DIF -> "X(kdft_dif_register)" + + in + let init = + "\n" ^ + twinstr ^ + desc ^ + (declare_register_fcn name) ^ + (Printf.sprintf "{\n%s(p, %s, &desc);\n}" register ename) + in + + (unparse tree) ^ "\n" ^ (if !Magic.standalone then "" else init) + + +let main () = + begin + Simdmagic.simd_mode := true; + parse (speclist @ Twiddle.speclist) usage; + print_string (generate (check_size ())); + end + +let _ = main() diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/gen_twidsq.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/gen_twidsq.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,176 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +open Util +open Genutil +open C + +type ditdif = DIT | DIF +let ditdif = ref DIT + +let usage = "Usage: " ^ Sys.argv.(0) ^ " -n [ -dit | -dif ]" + +let reload_twiddle = ref false + +let urs = ref Stride_variable +let uvs = ref Stride_variable +let ums = ref Stride_variable + +let speclist = [ + "-dit", + Arg.Unit(fun () -> ditdif := DIT), + " generate a DIT codelet"; + + "-dif", + Arg.Unit(fun () -> ditdif := DIF), + " generate a DIF codelet"; + + "-reload-twiddle", + Arg.Unit(fun () -> reload_twiddle := true), + " do not collect common twiddle factors"; + + "-with-rs", + Arg.String(fun x -> urs := arg_to_stride x), + " specialize for given input stride"; + + "-with-vs", + Arg.String(fun x -> uvs := arg_to_stride x), + " specialize for given vector stride"; + + "-with-ms", + Arg.String(fun x -> ums := arg_to_stride x), + " specialize for given ms" +] + +let generate n = + let rioarray = "rio" + and iioarray = "iio" + and rs = "rs" and vs = "vs" + and twarray = "W" + and m = "m" and mb = "mb" and me = "me" and ms = "ms" in + + let sign = !Genutil.sign + and name = !Magic.codelet_name in + + let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 0 false in + let nt = num_twiddles n in + + let svs = either_stride (!uvs) (C.SVar vs) + and srs = either_stride (!urs) (C.SVar rs) in + + let byw = + if !reload_twiddle then + array n (fun v -> bytwiddle n sign (twiddle_array nt twarray)) + else + let a = bytwiddle n sign (twiddle_array nt twarray) + in fun v -> a + in + + let locations = unique_v_array_c n n in + + let ioi = + locative_v_array_c n n + (C.varray_subscript rioarray svs srs) + (C.varray_subscript iioarray svs srs) + locations "BUG" + and ioo = + locative_v_array_c n n + (C.varray_subscript rioarray svs srs) + (C.varray_subscript iioarray svs srs) + locations "BUG" + in + + let lioi = load_v_array_c n n ioi in + let output = + match !ditdif with + | DIT -> array n (fun v -> Fft.dft sign n (byw v (lioi v))) + | DIF -> array n (fun v -> byw v (Fft.dft sign n (lioi v))) + in + + let odag = store_v_array_c n n ioo (transpose output) in + let annot = standard_optimizer odag in + + let vm = CVar m and vmb = CVar mb and vme = CVar me in + + let body = Block ( + [Decl ("INT", m)], + [For (list_to_comma + [Expr_assign (vm, vmb); + Expr_assign (CVar twarray, + CPlus [CVar twarray; + ctimes (vmb, Integer nt)])], + Binop (" < ", vm, vme), + list_to_comma + [Expr_assign (vm, CPlus [vm; Integer 1]); + Expr_assign (CVar rioarray, CPlus [CVar rioarray; CVar ms]); + Expr_assign (CVar iioarray, CPlus [CVar iioarray; CVar ms]); + Expr_assign (CVar twarray, CPlus [CVar twarray; Integer nt]); + make_volatile_stride (2*n) (CVar rs); + make_volatile_stride (2*0) (CVar vs) + ], + Asch annot)]) in + + let tree = + Fcn (("static void"), name, + [Decl (C.realtypep, rioarray); + Decl (C.realtypep, iioarray); + Decl (C.constrealtypep, twarray); + Decl (C.stridetype, rs); + Decl (C.stridetype, vs); + Decl ("INT", mb); + Decl ("INT", me); + Decl ("INT", ms)], + finalize_fcn body) + in + let twinstr = + Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n" + (Twiddle.twinstr_to_c_string (twdesc n)) + + and desc = + Printf.sprintf + "static const ct_desc desc = {%d, \"%s\", twinstr, &GENUS, %s, %s, %s, %s};\n\n" + n name (flops_of tree) + (stride_to_solverparm !urs) (stride_to_solverparm !uvs) + (stride_to_solverparm !ums) + + and register = + match !ditdif with + | DIT -> "X(kdft_ditsq_register)" + | DIF -> "X(kdft_difsq_register)" + in + let init = + "\n" ^ + twinstr ^ + desc ^ + (declare_register_fcn name) ^ + (Printf.sprintf "{\n%s(p, %s, &desc);\n}" register name) + in + + (unparse tree) ^ "\n" ^ init + + +let main () = + begin + parse (speclist @ Twiddle.speclist) usage; + print_string (generate (check_size ())); + end + +let _ = main() diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/gen_twidsq_c.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/gen_twidsq_c.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,187 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +open Util +open Genutil +open C + +type ditdif = DIT | DIF +let ditdif = ref DIT + +let usage = "Usage: " ^ Sys.argv.(0) ^ " -n [ -dit | -dif ]" + +let reload_twiddle = ref false + +let urs = ref Stride_variable +let uvs = ref Stride_variable +let ums = ref Stride_variable + +let speclist = [ + "-dit", + Arg.Unit(fun () -> ditdif := DIT), + " generate a DIT codelet"; + + "-dif", + Arg.Unit(fun () -> ditdif := DIF), + " generate a DIF codelet"; + + "-reload-twiddle", + Arg.Unit(fun () -> reload_twiddle := true), + " do not collect common twiddle factors"; + + "-with-rs", + Arg.String(fun x -> urs := arg_to_stride x), + " specialize for given input stride"; + + "-with-vs", + Arg.String(fun x -> uvs := arg_to_stride x), + " specialize for given vector stride"; + + "-with-ms", + Arg.String(fun x -> ums := arg_to_stride x), + " specialize for given ms" +] + +let generate n = + let rioarray = "x" + and rs = "rs" and vs = "vs" + and twarray = "W" + and m = "m" and mb = "mb" and me = "me" and ms = "ms" in + + let sign = !Genutil.sign + and name = !Magic.codelet_name + and byvl x = choose_simd x (ctimes (CVar "VL", x)) + and bytwvl x = choose_simd x (ctimes (CVar "TWVL", x)) + and bytwvl_vl x = choose_simd x (ctimes (CVar "(TWVL/VL)", x)) in + let ename = expand_name name in + + let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 0 true in + let nt = num_twiddles n in + + let svs = either_stride (!uvs) (C.SVar vs) + and srs = either_stride (!urs) (C.SVar rs) in + let sms = stride_to_string "ms" !ums in + + let byw = + if !reload_twiddle then + array n (fun v -> bytwiddle n sign (twiddle_array nt twarray)) + else + let a = bytwiddle n sign (twiddle_array nt twarray) + in fun v -> a + in + + let locations = unique_v_array_c n n in + + let ioi = + locative_v_array_c n n + (C.varray_subscript rioarray svs srs) + (C.varray_subscript "BUG" svs srs) + locations sms + and ioo = + locative_v_array_c n n + (C.varray_subscript rioarray svs srs) + (C.varray_subscript "BUG" svs srs) + locations sms + in + + let lioi = load_v_array_c n n ioi in + let fft = Trig.dft_via_rdft in + let output = + match !ditdif with + | DIT -> array n (fun v -> fft sign n (byw v (lioi v))) + | DIF -> array n (fun v -> byw v (fft sign n (lioi v))) + in + + let odag = store_v_array_c n n ioo (transpose output) in + let annot = standard_optimizer odag in + + let vm = CVar m and vmb = CVar mb and vme = CVar me in + + let body = Block ( + [Decl ("INT", m); + Decl (C.realtypep, rioarray)], + [Stmt_assign (CVar rioarray, + CVar (if (sign < 0) then "ri" else "ii")); + For (list_to_comma + [Expr_assign (vm, vmb); + Expr_assign (CVar twarray, + CPlus [CVar twarray; + ctimes (vmb, + bytwvl_vl (Integer nt))])], + Binop (" < ", vm, vme), + list_to_comma + [Expr_assign (vm, CPlus [vm; byvl (Integer 1)]); + Expr_assign (CVar rioarray, CPlus [CVar rioarray; + byvl (CVar sms)]); + Expr_assign (CVar twarray, CPlus [CVar twarray; + bytwvl (Integer nt)]); + make_volatile_stride (2*n) (CVar rs); + make_volatile_stride (2*n) (CVar vs) + ], + Asch annot)]) in + + let tree = + Fcn (("static void"), ename, + [Decl (C.realtypep, "ri"); + Decl (C.realtypep, "ii"); + Decl (C.constrealtypep, twarray); + Decl (C.stridetype, rs); + Decl (C.stridetype, vs); + Decl ("INT", mb); + Decl ("INT", me); + Decl ("INT", ms)], + finalize_fcn body) + in + let twinstr = + Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n" + (twinstr_to_string "VL" (twdesc n)) + + and desc = + Printf.sprintf + "static const ct_desc desc = {%d, %s, twinstr, &GENUS, %s, %s, %s, %s};\n\n" + n (stringify name) (flops_of tree) + (stride_to_solverparm !urs) + (stride_to_solverparm !uvs) + (stride_to_solverparm !ums) + + and register = + match !ditdif with + | DIT -> "X(kdft_ditsq_register)" + | DIF -> "X(kdft_difsq_register)" + in + let init = + "\n" ^ + twinstr ^ + desc ^ + (declare_register_fcn name) ^ + (Printf.sprintf "{\n%s(p, %s, &desc);\n}" register ename) + in + + (unparse tree) ^ "\n" ^ init + + +let main () = + begin + parse (speclist @ Twiddle.speclist) usage; + print_string (generate (check_size ())); + end + +let _ = main() diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/genutil.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/genutil.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,328 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(* utilities common to all generators *) +open Util + +let choose_simd a b = if !Simdmagic.simd_mode then b else a + +let unique_array n = array n (fun _ -> Unique.make ()) +let unique_array_c n = + array n (fun _ -> + (Unique.make (), Unique.make ())) + +let unique_v_array_c veclen n = + array veclen (fun _ -> + unique_array_c n) + +let locative_array_c n rarr iarr loc vs = + array n (fun i -> + let klass = Unique.make () in + let (rloc, iloc) = loc i in + (Variable.make_locative rloc klass rarr i vs, + Variable.make_locative iloc klass iarr i vs)) + +let locative_v_array_c veclen n rarr iarr loc vs = + array veclen (fun v -> + array n (fun i -> + let klass = Unique.make () in + let (rloc, iloc) = loc v i in + (Variable.make_locative rloc klass (rarr v) i vs, + Variable.make_locative iloc klass (iarr v) i vs))) + +let temporary_array n = + array n (fun i -> Variable.make_temporary ()) + +let temporary_array_c n = + let tmpr = temporary_array n + and tmpi = temporary_array n + in + array n (fun i -> (tmpr i, tmpi i)) + +let temporary_v_array_c veclen n = + array veclen (fun v -> temporary_array_c n) + +let temporary_array_c n = + let tmpr = temporary_array n + and tmpi = temporary_array n + in + array n (fun i -> (tmpr i, tmpi i)) + +let load_c (vr, vi) = Complex.make (Expr.Load vr, Expr.Load vi) +let load_r (vr, vi) = Complex.make (Expr.Load vr, Expr.Num (Number.zero)) + +let twiddle_array nt w = + array (nt/2) (fun i -> + let stride = choose_simd (C.SInteger 1) (C.SConst "TWVL") + and klass = Unique.make () in + let (refr, refi) = (C.array_subscript w stride (2 * i), + C.array_subscript w stride (2 * i + 1)) + in + let (kr, ki) = (Variable.make_constant klass refr, + Variable.make_constant klass refi) + in + load_c (kr, ki)) + + +let load_array_c n var = array n (fun i -> load_c (var i)) +let load_array_r n var = array n (fun i -> load_r (var i)) +let load_array_hc n var = + array n (fun i -> + if (i < n - i) then + load_c (var i) + else if (i > n - i) then + Complex.times Complex.i (load_c (var (n - i))) + else + load_r (var i)) + +let load_v_array_c veclen n var = + array veclen (fun v -> load_array_c n (var v)) + +let store_c (vr, vi) x = [Complex.store_real vr x; Complex.store_imag vi x] +let store_r (vr, vi) x = Complex.store_real vr x +let store_i (vr, vi) x = Complex.store_imag vi x + +let assign_array_c n dst src = + List.flatten + (rmap (iota n) + (fun i -> + let (ar, ai) = Complex.assign (dst i) (src i) + in [ar; ai])) +let assign_v_array_c veclen n dst src = + List.flatten + (rmap (iota veclen) + (fun v -> + assign_array_c n (dst v) (src v))) + +let vassign_v_array_c veclen n dst src = + List.flatten + (rmap (iota n) (fun i -> + List.flatten + (rmap (iota veclen) + (fun v -> + let (ar, ai) = Complex.assign (dst v i) (src v i) + in [ar; ai])))) + +let store_array_r n dst src = + rmap (iota n) + (fun i -> store_r (dst i) (src i)) + +let store_array_c n dst src = + List.flatten + (rmap (iota n) + (fun i -> store_c (dst i) (src i))) + +let store_array_hc n dst src = + List.flatten + (rmap (iota n) + (fun i -> + if (i < n - i) then + store_c (dst i) (src i) + else if (i > n - i) then + [] + else + [store_r (dst i) (Complex.real (src i))])) + + +let store_v_array_c veclen n dst src = + List.flatten + (rmap (iota veclen) + (fun v -> + store_array_c n (dst v) (src v))) + + +let elementwise f n a = array n (fun i -> f (a i)) +let conj_array_c = elementwise Complex.conj +let real_array_c = elementwise Complex.real +let imag_array_c = elementwise Complex.imag + +let elementwise_v f veclen n a = + array veclen (fun v -> + array n (fun i -> f (a v i))) +let conj_v_array_c = elementwise_v Complex.conj +let real_v_array_c = elementwise_v Complex.real +let imag_v_array_c = elementwise_v Complex.imag + + +let transpose f i j = f j i +let symmetrize f i j = if i <= j then f i j else f j i + +(* utilities for command-line parsing *) +let standard_arg_parse_fail _ = failwith "too many arguments" + +let dump_dag alist = + let fnam = !Magic.dag_dump_file in + if (String.length fnam > 0) then + let ochan = open_out fnam in + begin + To_alist.dump (output_string ochan) alist; + close_out ochan; + end + +let dump_alist alist = + let fnam = !Magic.alist_dump_file in + if (String.length fnam > 0) then + let ochan = open_out fnam in + begin + Expr.dump (output_string ochan) alist; + close_out ochan; + end + +let dump_asched asched = + let fnam = !Magic.asched_dump_file in + if (String.length fnam > 0) then + let ochan = open_out fnam in + begin + Annotate.dump (output_string ochan) asched; + close_out ochan; + end + +(* utilities for optimization *) +let standard_scheduler dag = + let optim = Algsimp.algsimp dag in + let alist = To_alist.to_assignments optim in + let _ = dump_alist alist in + let _ = dump_dag alist in + if !Magic.precompute_twiddles then + Schedule.isolate_precomputations_and_schedule alist + else + Schedule.schedule alist + +let standard_optimizer dag = + let sched = standard_scheduler dag in + let annot = Annotate.annotate [] sched in + let _ = dump_asched annot in + annot + +let size = ref None +let sign = ref (-1) + +let speclist = [ + "-n", Arg.Int(fun i -> size := Some i), " generate a codelet of size "; + "-sign", + Arg.Int(fun i -> + if (i > 0) then + sign := 1 + else + sign := (-1)), + " sign of transform"; +] + +let check_size () = + match !size with + | Some i -> i + | None -> failwith "must specify -n" + +let expand_name name = if name = "" then "noname" else name + +let declare_register_fcn name = + if name = "" then + "void NAME(planner *p)\n" + else + "void " ^ (choose_simd "X" "XSIMD") ^ + "(codelet_" ^ name ^ ")(planner *p)\n" + +let stringify name = + if name = "" then "STRINGIZE(NAME)" else + choose_simd ("\"" ^ name ^ "\"") + ("XSIMD_STRING(\"" ^ name ^ "\")") + +let parse user_speclist usage = + Arg.parse + (user_speclist @ speclist @ Magic.speclist @ Simdmagic.speclist) + standard_arg_parse_fail + usage + +let rec list_to_c = function + [] -> "" + | [a] -> (string_of_int a) + | a :: b -> (string_of_int a) ^ ", " ^ (list_to_c b) + +let rec list_to_comma = function + | [a; b] -> C.Comma (a, b) + | a :: b -> C.Comma (a, list_to_comma b) + | _ -> failwith "list_to_comma" + + +type stride = Stride_variable | Fixed_int of int | Fixed_string of string + +let either_stride a b = + match a with + Fixed_int x -> C.SInteger x + | Fixed_string x -> C.SConst x + | _ -> b + +let stride_fixed = function + Stride_variable -> false + | _ -> true + +let arg_to_stride s = + try + Fixed_int (int_of_string s) + with Failure "int_of_string" -> + Fixed_string s + +let stride_to_solverparm = function + Stride_variable -> "0" + | Fixed_int x -> string_of_int x + | Fixed_string x -> x + +let stride_to_string s = function + Stride_variable -> s + | Fixed_int x -> string_of_int x + | Fixed_string x -> x + +(* output the command line *) +let cmdline () = + List.fold_right (fun a b -> a ^ " " ^ b) (Array.to_list Sys.argv) "" + +let unparse tree = + "/* Generated by: " ^ (cmdline ()) ^ "*/\n\n" ^ + (C.print_cost tree) ^ + (if String.length !Magic.inklude > 0 + then + (Printf.sprintf "#include \"%s\"\n\n" !Magic.inklude) + else "") ^ + (if !Simdmagic.simd_mode then + Simd.unparse_function tree + else + C.unparse_function tree) + +let finalize_fcn ast = + let mergedecls = function + C.Block (d1, [C.Block (d2, s)]) -> C.Block (d1 @ d2, s) + | x -> x + and extract_constants = + if !Simdmagic.simd_mode then + Simd.extract_constants + else + C.extract_constants + + in mergedecls (C.Block (extract_constants ast, [ast; C.Simd_leavefun])) + +let twinstr_to_string vl x = + if !Simdmagic.simd_mode then + Twiddle.twinstr_to_simd_string vl x + else + Twiddle.twinstr_to_c_string x + +let make_volatile_stride n x = + C.CCall ("MAKE_VOLATILE_STRIDE", C.Comma((C.Integer n), x)) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/littlesimp.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/littlesimp.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,71 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(* + * The LittleSimplifier module implements a subset of the simplifications + * of the AlgSimp module. These simplifications can be executed + * quickly here, while they would take a long time using the heavy + * machinery of AlgSimp. + * + * For example, 0 * x is simplified to 0 tout court by the LittleSimplifier. + * On the other hand, AlgSimp would first simplify x, generating lots + * of common subexpressions, storing them in a table etc, just to + * discard all the work later. Similarly, the LittleSimplifier + * reduces the constant FFT in Rader's algorithm to a constant sequence. + *) + +open Expr + +let rec makeNum = function + | n -> Num n + +and makeUminus = function + | Uminus a -> a + | Num a -> makeNum (Number.negate a) + | a -> Uminus a + +and makeTimes = function + | (Num a, Num b) -> makeNum (Number.mul a b) + | (Num a, Times (Num b, c)) -> makeTimes (makeNum (Number.mul a b), c) + | (Num a, b) when Number.is_zero a -> makeNum (Number.zero) + | (Num a, b) when Number.is_one a -> b + | (Num a, b) when Number.is_mone a -> makeUminus b + | (Num a, Uminus b) -> Times (makeUminus (Num a), b) + | (a, (Num b as b')) -> makeTimes (b', a) + | (a, b) -> Times (a, b) + +and makePlus l = + let rec reduceSum x = match x with + [] -> [] + | [Num a] -> if Number.is_zero a then [] else x + | (Num a) :: (Num b) :: c -> + reduceSum ((makeNum (Number.add a b)) :: c) + | ((Num _) as a') :: b :: c -> b :: reduceSum (a' :: c) + | a :: s -> a :: reduceSum s + + in match reduceSum l with + [] -> makeNum (Number.zero) + | [a] -> a + | [a; b] when a == b -> makeTimes (Num Number.two, a) + | [Times (Num a, b); Times (Num c, d)] when b == d -> + makeTimes (makePlus [Num a; Num c], b) + | a -> Plus a + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/littlesimp.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/littlesimp.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,25 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +val makeNum : Number.number -> Expr.expr +val makeUminus : Expr.expr -> Expr.expr +val makeTimes : Expr.expr * Expr.expr -> Expr.expr +val makePlus : Expr.expr list -> Expr.expr diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/magic.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/magic.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,161 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(* magic parameters *) +let verbose = ref false +let vneg = ref false +let karatsuba_min = ref 15 +let karatsuba_variant = ref 2 +let circular_min = ref 64 +let rader_min = ref 13 +let rader_list = ref [5] +let alternate_convolution = ref 17 +let threemult = ref false +let inline_single = ref true +let inline_loads = ref false +let inline_loads_constants = ref false +let inline_constants = ref true +let trivial_stores = ref false +let locations_are_special = ref false +let strength_reduce_mul = ref false +let number_of_variables = ref 4 +let codelet_name = ref "unnamed" +let randomized_cse = ref true +let dif_split_radix = ref false +let enable_fma = ref false +let deep_collect_depth = ref 1 +let schedule_type = ref 0 +let compact = ref false +let dag_dump_file = ref "" +let alist_dump_file = ref "" +let asched_dump_file = ref "" +let lisp_syntax = ref false +let network_transposition = ref true +let inklude = ref "" +let generic_arith = ref false +let reorder_insns = ref false +let reorder_loads = ref false +let reorder_stores = ref false +let precompute_twiddles = ref false +let newsplit = ref false +let standalone = ref false +let pipeline_latency = ref 0 +let schedule_for_pipeline = ref false +let generate_bytw = ref true + +(* command-line parser for magic parameters *) +let undocumented = " Undocumented voodoo parameter" + +let set_bool var = Arg.Unit (fun () -> var := true) +let unset_bool var = Arg.Unit (fun () -> var := false) +let set_int var = Arg.Int(fun i -> var := i) +let set_string var = Arg.String(fun s -> var := s) + +let speclist = [ + "-name", set_string codelet_name, " set codelet name"; + "-standalone", set_bool standalone, " standalone codelet (no desc)"; + "-include", set_string inklude, undocumented; + + "-verbose", set_bool verbose, " Enable verbose logging messages to stderr"; + + "-rader-min", set_int rader_min, + " : Use Rader's algorithm for prime sizes >= "; + + "-threemult", set_bool threemult, + " Use 3-multiply complex multiplications"; + + "-karatsuba-min", set_int karatsuba_min, undocumented; + "-karatsuba-variant", set_int karatsuba_variant, undocumented; + "-circular-min", set_int circular_min, undocumented; + + "-compact", set_bool compact, + " Mangle variable names to reduce size of source code"; + "-no-compact", unset_bool compact, + " Disable -compact"; + + "-dump-dag", set_string dag_dump_file, undocumented; + "-dump-alist", set_string alist_dump_file, undocumented; + "-dump-asched", set_string asched_dump_file, undocumented; + "-lisp-syntax", set_bool lisp_syntax, undocumented; + + "-alternate-convolution", set_int alternate_convolution, undocumented; + "-deep-collect-depth", set_int deep_collect_depth, undocumented; + "-schedule-type", set_int schedule_type, undocumented; + "-pipeline-latency", set_int pipeline_latency, undocumented; + "-schedule-for-pipeline", set_bool schedule_for_pipeline, undocumented; + + "-dif-split-radix", set_bool dif_split_radix, undocumented; + "-dit-split-radix", unset_bool dif_split_radix, undocumented; + + "-generic-arith", set_bool generic_arith, undocumented; + "-no-generic-arith", unset_bool generic_arith, undocumented; + + "-precompute-twiddles", set_bool precompute_twiddles, undocumented; + "-no-precompute-twiddles", unset_bool precompute_twiddles, undocumented; + + "-inline-single", set_bool inline_single, undocumented; + "-no-inline-single", unset_bool inline_single, undocumented; + + "-inline-loads", set_bool inline_loads, undocumented; + "-no-inline-loads", unset_bool inline_loads, undocumented; + + "-inline-loads-constants", set_bool inline_loads_constants, undocumented; + "-no-inline-loads-constants", + unset_bool inline_loads_constants, undocumented; + + "-inline-constants", set_bool inline_constants, undocumented; + "-no-inline-constants", unset_bool inline_constants, undocumented; + + "-trivial-stores", set_bool trivial_stores, undocumented; + "-no-trivial-stores", unset_bool trivial_stores, undocumented; + + "-locations-are-special", set_bool locations_are_special, undocumented; + "-no-locations-are-special", unset_bool locations_are_special, undocumented; + + "-randomized-cse", set_bool randomized_cse, undocumented; + "-no-randomized-cse", unset_bool randomized_cse, undocumented; + + "-network-transposition", set_bool network_transposition, undocumented; + "-no-network-transposition", unset_bool network_transposition, undocumented; + + "-reorder-insns", set_bool reorder_insns, undocumented; + "-no-reorder-insns", unset_bool reorder_insns, undocumented; + "-reorder-loads", set_bool reorder_loads, undocumented; + "-no-reorder-loads", unset_bool reorder_loads, undocumented; + "-reorder-stores", set_bool reorder_stores, undocumented; + "-no-reorder-stores", unset_bool reorder_stores, undocumented; + + "-newsplit", set_bool newsplit, undocumented; + + "-vneg", set_bool vneg, undocumented; + "-fma", set_bool enable_fma, undocumented; + "-no-fma", unset_bool enable_fma, undocumented; + + "-variables", set_int number_of_variables, undocumented; + + "-strength-reduce-mul", set_bool strength_reduce_mul, undocumented; + "-no-strength-reduce-mul", unset_bool strength_reduce_mul, undocumented; + + "-generate-bytw", set_bool generate_bytw, undocumented; + "-no-generate-bytw", unset_bool generate_bytw, undocumented; +] + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/monads.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/monads.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,75 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(************************************************************* + * Monads + *************************************************************) + +(* + * Phil Wadler has many well written papers about monads. See + * http://cm.bell-labs.com/cm/cs/who/wadler/ + *) +(* vanilla state monad *) +module StateMonad = struct + let returnM x = fun s -> (x, s) + + let (>>=) = fun m k -> + fun s -> + let (a', s') = m s + in let (a'', s'') = k a' s' + in (a'', s'') + + let (>>) = fun m k -> + m >>= fun _ -> k + + let rec mapM f = function + [] -> returnM [] + | a :: b -> + f a >>= fun a' -> + mapM f b >>= fun b' -> + returnM (a' :: b') + + let runM m x initial_state = + let (a, _) = m x initial_state + in a + + let fetchState = + fun s -> s, s + + let storeState newState = + fun _ -> (), newState +end + +(* monad with built-in memoizing capabilities *) +module MemoMonad = + struct + open StateMonad + + let memoizing lookupM insertM f k = + lookupM k >>= fun vMaybe -> + match vMaybe with + Some value -> returnM value + | None -> + f k >>= fun value -> + insertM k value >> returnM value + + let runM initial_state m x = StateMonad.runM m x initial_state +end diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/number.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/number.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,164 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(* The generator keeps track of numeric constants in symbolic + expressions using the abstract number type, defined in this file. + + Our implementation of the number type uses arbitrary-precision + arithmetic from the built-in Num package in order to maintain an + accurate representation of constants. This allows us to output + constants with many decimal places in the generated C code, + ensuring that we will take advantage of the full precision + available on current and future machines. + + Note that we have to write our own routine to compute roots of + unity, since the Num package only supplies simple arithmetic. The + arbitrary-precision operations in Num look like the normal + operations except that they have an appended slash (e.g. +/ -/ */ + // etcetera). *) + +open Num + +type number = N of num + +let makeNum n = N n + +(* decimal digits of precision to maintain internally, and to print out: *) +let precision = 50 +let print_precision = 45 + +let inveps = (Int 10) **/ (Int precision) +let epsilon = (Int 1) // inveps + +let pinveps = (Int 10) **/ (Int print_precision) +let pepsilon = (Int 1) // pinveps + +let round x = epsilon */ (round_num (x */ inveps)) + +let of_int n = N (Int n) +let zero = of_int 0 +let one = of_int 1 +let two = of_int 2 +let mone = of_int (-1) + +(* comparison predicate for real numbers *) +let equal (N x) (N y) = (* use both relative and absolute error *) + let absdiff = abs_num (x -/ y) in + absdiff <=/ pepsilon or + absdiff <=/ pepsilon */ (abs_num x +/ abs_num y) + +let is_zero = equal zero +let is_one = equal one +let is_mone = equal mone +let is_two = equal two + + +(* Note that, in the following computations, it is important to round + to precision epsilon after each operation. Otherwise, since the + Num package uses exact rational arithmetic, the number of digits + quickly blows up. *) +let mul (N a) (N b) = makeNum (round (a */ b)) +let div (N a) (N b) = makeNum (round (a // b)) +let add (N a) (N b) = makeNum (round (a +/ b)) +let sub (N a) (N b) = makeNum (round (a -/ b)) + +let negative (N a) = (a = 1.0) then (f' -. (float (truncate f'))) else f' + in let q = string_of_int (truncate(f2 *. 1.0E9)) + in let r = "0000000000" ^ q + in let l = String.length r + in let prefix = if (f < 0.0) then "KN" else "KP" in + if (f' >= 1.0) then + (prefix ^ (string_of_int (truncate f')) ^ "_" ^ + (String.sub r (l - 9) 9)) + else + (prefix ^ (String.sub r (l - 9) 9)) + +let to_string (N n) = approx_num_fix print_precision n + +let to_float (N n) = float_of_num n + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/number.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/number.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,49 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +type number + +val equal : number -> number -> bool +val of_int : int -> number +val zero : number +val one : number +val two : number +val mone : number +val is_zero : number -> bool +val is_one : number -> bool +val is_mone : number -> bool +val is_two : number -> bool +val mul : number -> number -> number +val div : number -> number -> number +val add : number -> number -> number +val sub : number -> number -> number +val negative : number -> bool +val greater : number -> number -> bool +val negate : number -> number +val sqrt : number -> number + +(* cexp n i = (cos (2 * pi * i / n), sin (2 * pi * i / n)) *) +val cexp : int -> int -> (number * number) + +val to_konst : number -> string +val to_string : number -> string +val to_float : number -> float + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/oracle.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/oracle.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,144 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(* + * the oracle decrees whether the sign of an expression should + * be changed. + * + * Say the expression (A - B) appears somewhere. Elsewhere in the + * expression dag the expression (B - A) may appear. + * The oracle determines which of the two forms is canonical. + * + * Algorithm: evaluate the expression at a random input, and + * keep the expression with the positive sign. + *) + +let make_memoizer hash equal = + let table = ref Assoctable.empty + in + (fun f k -> + match Assoctable.lookup hash equal k !table with + Some value -> value + | None -> + let value = f k in + begin + table := Assoctable.insert hash k value !table; + value + end) + +let almost_equal x y = + let epsilon = 1.0E-8 in + (abs_float (x -. y) < epsilon) || + (abs_float (x -. y) < epsilon *. (abs_float x +. abs_float y)) + +let absid = make_memoizer + (fun x -> Expr.hash_float (abs_float x)) + (fun a b -> almost_equal a b || almost_equal (-. a) b) + (fun x -> x) + +let make_random_oracle () = make_memoizer + Variable.hash + Variable.same + (fun _ -> (float (Random.bits())) /. 1073741824.0) + +let the_random_oracle = make_random_oracle () + +let sum_list l = List.fold_right (+.) l 0.0 + +let eval_aux random_oracle = + let memoizing = make_memoizer Expr.hash (==) in + let rec eval x = + memoizing + (function + | Expr.Num x -> Number.to_float x + | Expr.NaN x -> Expr.transcendent_to_float x + | Expr.Load v -> random_oracle v + | Expr.Store (v, x) -> eval x + | Expr.Plus l -> sum_list (List.map eval l) + | Expr.Times (a, b) -> (eval a) *. (eval b) + | Expr.CTimes (a, b) -> + 1.098612288668109691395245236 +. + 1.609437912434100374600759333 *. (eval a) *. (eval b) + | Expr.CTimesJ (a, b) -> + 0.9102392266268373936142401657 +. + 0.6213349345596118107071993881 *. (eval a) *. (eval b) + | Expr.Uminus x -> -. (eval x)) + x + in eval + +let eval = eval_aux the_random_oracle + +let should_flip_sign node = + let v = eval node in + let v' = absid v in + not (almost_equal v v') + +(* + * determine with high probability if two expressions are equal. + * + * The test is randomized: if the two expressions have the + * same value for NTESTS random inputs, then they are proclaimed + * equal. (Note that two distinct linear functions L1(x0, x1, ..., xn) + * and L2(x0, x1, ..., xn) have the same value with probability + * 0 for random x's, and thus this test is way more paranoid than + * necessary.) + *) +let likely_equal a b = + let tolerance = 1.0e-8 + and ntests = 20 + in + let rec loop n = + if n = 0 then + true + else + let r = make_random_oracle () in + let va = eval_aux r a + and vb = eval_aux r b + in + if (abs_float (va -. vb)) > + tolerance *. (abs_float va +. abs_float vb +. 0.0001) + then + false + else + loop (n - 1) + in + match (a, b) with + + (* + * Because of the way eval is constructed, we have + * eval (Store (v, x)) == eval x + * However, we never consider the two expressions equal + *) + | (Expr.Store _, _) -> false + | (_, Expr.Store _) -> false + + (* + * Expressions of the form ``Uminus (Store _)'' + * are artifacts of algsimp + *) + | ((Expr.Uminus (Expr.Store _)), _) -> false + | (_, Expr.Uminus (Expr.Store _)) -> false + + | _ -> loop ntests + +let hash x = + let f = eval x in + truncate (f *. 65536.0) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/oracle.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/oracle.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,24 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +val should_flip_sign : Expr.expr -> bool +val likely_equal : Expr.expr -> Expr.expr -> bool +val hash : Expr.expr -> int diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/schedule.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/schedule.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,236 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(* This file contains the instruction scheduler, which finds an + efficient ordering for a given list of instructions. + + The scheduler analyzes the DAG (directed acyclic graph) formed by + the instruction dependencies, and recursively partitions it. The + resulting schedule data structure expresses a "good" ordering + and structure for the computation. + + The scheduler makes use of utilties in Dag and other packages to + manipulate the Dag and the instruction list. *) + +open Dag +(************************************************* + * Dag scheduler + *************************************************) +let to_assignment node = (Expr.Assign (node.assigned, node.expression)) +let makedag l = Dag.makedag + (List.map (function Expr.Assign (v, x) -> (v, x)) l) + +let return x = x +let has_color c n = (n.color = c) +let set_color c n = (n.color <- c) +let has_either_color c1 c2 n = (n.color = c1 || n.color = c2) + +let infinity = 100000 + +let cc dag inputs = + begin + Dag.for_all dag (fun node -> + node.label <- infinity); + + (match inputs with + a :: _ -> bfs dag a 0 + | _ -> failwith "connected"); + + return + ((List.map to_assignment (List.filter (fun n -> n.label < infinity) + (Dag.to_list dag))), + (List.map to_assignment (List.filter (fun n -> n.label == infinity) + (Dag.to_list dag)))) + end + +let rec connected_components alist = + let dag = makedag alist in + let inputs = + List.filter (fun node -> Util.null node.predecessors) + (Dag.to_list dag) in + match cc dag inputs with + (a, []) -> [a] + | (a, b) -> a :: connected_components b + +let single_load node = + match (node.input_variables, node.predecessors) with + ([x], []) -> + Variable.is_constant x || + (!Magic.locations_are_special && Variable.is_locative x) + | _ -> false + +let loads_locative node = + match (node.input_variables, node.predecessors) with + | ([x], []) -> Variable.is_locative x + | _ -> false + +let partition alist = + let dag = makedag alist in + let dag' = Dag.to_list dag in + let inputs = + List.filter (fun node -> Util.null node.predecessors) dag' + and outputs = + List.filter (fun node -> Util.null node.successors) dag' + and special_inputs = List.filter single_load dag' in + begin + + let c = match !Magic.schedule_type with + | 1 -> RED; (* all nodes in the input partition *) + | -1 -> BLUE; (* all nodes in the output partition *) + | _ -> BLACK; (* node color determined by bisection algorithm *) + in Dag.for_all dag (fun node -> node.color <- c); + + Util.for_list inputs (set_color RED); + + (* + The special inputs are those input nodes that load a single + location or twiddle factor. Special inputs can end up either + in the blue or in the red part. These inputs are special + because they inherit a color from their neighbors: If a red + node needs a special input, the special input becomes red, but + if all successors of a special input are blue, the special + input becomes blue. Outputs are always blue, whether they be + special or not. + + Because of the processing of special inputs, however, the final + partition might end up being composed only of blue nodes (which + is incorrect). In this case we manually reset all inputs + (whether special or not) to be red. + *) + + Util.for_list special_inputs (set_color YELLOW); + + Util.for_list outputs (set_color BLUE); + + let rec loopi donep = + match (List.filter + (fun node -> (has_color BLACK node) && + List.for_all (has_either_color RED YELLOW) node.predecessors) + dag') with + [] -> if (donep) then () else loopo true + | i -> + begin + Util.for_list i (fun node -> + begin + set_color RED node; + Util.for_list node.predecessors (set_color RED); + end); + loopo false; + end + + and loopo donep = + match (List.filter + (fun node -> (has_either_color BLACK YELLOW node) && + List.for_all (has_color BLUE) node.successors) + dag') with + [] -> if (donep) then () else loopi true + | o -> + begin + Util.for_list o (set_color BLUE); + loopi false; + end + + in loopi false; + + (* fix the partition if it is incorrect *) + if not (List.exists (has_color RED) dag') then + Util.for_list inputs (set_color RED); + + return + ((List.map to_assignment (List.filter (has_color RED) dag')), + (List.map to_assignment (List.filter (has_color BLUE) dag'))) + end + +type schedule = + Done + | Instr of Expr.assignment + | Seq of (schedule * schedule) + | Par of schedule list + + + +(* produce a sequential schedule determined by the user *) +let rec sequentially = function + [] -> Done + | a :: b -> Seq (Instr a, sequentially b) + +let schedule = + let rec schedule_alist = function + | [] -> Done + | [a] -> Instr a + | alist -> match connected_components alist with + | ([a]) -> schedule_connected a + | l -> Par (List.map schedule_alist l) + + and schedule_connected alist = + match partition alist with + | (a, b) -> Seq (schedule_alist a, schedule_alist b) + + in fun x -> + let () = Util.info "begin schedule" in + let res = schedule_alist x in + let () = Util.info "end schedule" in + res + + +(* partition a dag into two parts: + + 1) the set of loads from locatives and their successors, + 2) all other nodes + + This step separates the ``body'' of the dag, which computes the + actual fft, from the ``precomputations'' part, which computes e.g. + twiddle factors. +*) +let partition_precomputations alist = + let dag = makedag alist in + let dag' = Dag.to_list dag in + let loads = List.filter loads_locative dag' in + begin + + Dag.for_all dag (set_color BLUE); + Util.for_list loads (set_color RED); + + let rec loop () = + match (List.filter + (fun node -> (has_color RED node) && + List.exists (has_color BLUE) node.successors) + dag') with + [] -> () + | i -> + begin + Util.for_list i + (fun node -> + Util.for_list node.successors (set_color RED)); + loop () + end + + in loop (); + + return + ((List.map to_assignment (List.filter (has_color BLUE) dag')), + (List.map to_assignment (List.filter (has_color RED) dag'))) + end + +let isolate_precomputations_and_schedule alist = + let (a, b) = partition_precomputations alist in + Seq (schedule a, schedule b) + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/schedule.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/schedule.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,30 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +type schedule = + | Done + | Instr of Expr.assignment + | Seq of (schedule * schedule) + | Par of schedule list + +val schedule : Expr.assignment list -> schedule +val sequentially : Expr.assignment list -> schedule +val isolate_precomputations_and_schedule : Expr.assignment list -> schedule diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/simd.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/simd.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,226 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +open Expr +open List +open Printf +open Variable +open Annotate +open Simdmagic +open C + +let realtype = "V" +let realtypep = realtype ^ " *" +let constrealtype = "const " ^ realtype +let constrealtypep = constrealtype ^ " *" +let alignment_mod = 2 + +(* + * SIMD C AST unparser + *) +let foldr_string_concat l = fold_right (^) l "" + +let rec unparse_by_twiddle nam tw src = + sprintf "%s(&(%s),%s)" nam (Variable.unparse tw) (unparse_expr src) + +and unparse_store dst = function + | Times (NaN MULTI_A, x) -> + sprintf "STM%d(&(%s),%s,%s,&(%s));\n" + !Simdmagic.store_multiple + (Variable.unparse dst) (unparse_expr x) + (Variable.vstride_of_locative dst) + (Variable.unparse_for_alignment alignment_mod dst) + | Times (NaN MULTI_B, Plus stuff) -> + sprintf "STN%d(&(%s)%s,%s);\n" + !Simdmagic.store_multiple + (Variable.unparse dst) + (List.fold_right (fun x a -> "," ^ (unparse_expr x) ^ a) stuff "") + (Variable.vstride_of_locative dst) + | src_expr -> + sprintf "ST(&(%s),%s,%s,&(%s));\n" + (Variable.unparse dst) (unparse_expr src_expr) + (Variable.vstride_of_locative dst) + (Variable.unparse_for_alignment alignment_mod dst) + +and unparse_expr = + let rec unparse_plus = function + | [a] -> unparse_expr a + + | (Uminus (Times (NaN I, b))) :: c :: d -> op2 "VFNMSI" [b] (c :: d) + | c :: (Uminus (Times (NaN I, b))) :: d -> op2 "VFNMSI" [b] (c :: d) + | (Uminus (Times (NaN CONJ, b))) :: c :: d -> op2 "VFNMSCONJ" [b] (c :: d) + | c :: (Uminus (Times (NaN CONJ, b))) :: d -> op2 "VFNMSCONJ" [b] (c :: d) + | (Times (NaN I, b)) :: c :: d -> op2 "VFMAI" [b] (c :: d) + | c :: (Times (NaN I, b)) :: d -> op2 "VFMAI" [b] (c :: d) + | (Times (NaN CONJ, b)) :: (Uminus c) :: d -> op2 "VFMSCONJ" [b] (c :: d) + | (Uminus c) :: (Times (NaN CONJ, b)) :: d -> op2 "VFMSCONJ" [b] (c :: d) + | (Times (NaN CONJ, b)) :: c :: d -> op2 "VFMACONJ" [b] (c :: d) + | c :: (Times (NaN CONJ, b)) :: d -> op2 "VFMACONJ" [b] (c :: d) + | (Times (NaN _, b)) :: (Uminus c) :: d -> failwith "VFMS NaN" + | (Uminus c) :: (Times (NaN _, b)) :: d -> failwith "VFMS NaN" + + | (Uminus (Times (a, b))) :: c :: d -> op3 "VFNMS" a b (c :: d) + | c :: (Uminus (Times (a, b))) :: d -> op3 "VFNMS" a b (c :: d) + | (Times (a, b)) :: (Uminus c) :: d -> op3 "VFMS" a b (c :: negate d) + | (Uminus c) :: (Times (a, b)) :: d -> op3 "VFMS" a b (c :: negate d) + | (Times (a, b)) :: c :: d -> op3 "VFMA" a b (c :: d) + | c :: (Times (a, b)) :: d -> op3 "VFMA" a b (c :: d) + + | (Uminus a :: b) -> op2 "VSUB" b [a] + | (b :: Uminus a :: c) -> op2 "VSUB" (b :: c) [a] + | (a :: b) -> op2 "VADD" [a] b + | [] -> failwith "unparse_plus" + and op3 nam a b c = + nam ^ "(" ^ (unparse_expr a) ^ ", " ^ (unparse_expr b) ^ ", " ^ + (unparse_plus c) ^ ")" + and op2 nam a b = + nam ^ "(" ^ (unparse_plus a) ^ ", " ^ (unparse_plus b) ^ ")" + and op1 nam a = + nam ^ "(" ^ (unparse_expr a) ^ ")" + and negate = function + | [] -> [] + | (Uminus x) :: y -> x :: negate y + | x :: y -> (Uminus x) :: negate y + + in function + | CTimes(Load tw, src) + when Variable.is_constant tw && !Magic.generate_bytw -> + unparse_by_twiddle "BYTW" tw src + | CTimesJ(Load tw, src) + when Variable.is_constant tw && !Magic.generate_bytw -> + unparse_by_twiddle "BYTWJ" tw src + | Load v when is_locative(v) -> + sprintf "LD(&(%s), %s, &(%s))" (Variable.unparse v) + (Variable.vstride_of_locative v) + (Variable.unparse_for_alignment alignment_mod v) + | Load v when is_constant(v) -> sprintf "LDW(&(%s))" (Variable.unparse v) + | Load v -> Variable.unparse v + | Num n -> sprintf "LDK(%s)" (Number.to_konst n) + | NaN n -> failwith "NaN in unparse_expr" + | Plus [] -> "0.0 /* bug */" + | Plus [a] -> " /* bug */ " ^ (unparse_expr a) + | Plus a -> unparse_plus a + | Times(NaN I,b) -> op1 "VBYI" b + | Times(NaN CONJ,b) -> op1 "VCONJ" b + | Times(a,b) -> + sprintf "VMUL(%s, %s)" (unparse_expr a) (unparse_expr b) + | CTimes(a,Times(NaN I, b)) -> + sprintf "VZMULI(%s, %s)" (unparse_expr a) (unparse_expr b) + | CTimes(a,b) -> + sprintf "VZMUL(%s, %s)" (unparse_expr a) (unparse_expr b) + | CTimesJ(a,Times(NaN I, b)) -> + sprintf "VZMULIJ(%s, %s)" (unparse_expr a) (unparse_expr b) + | CTimesJ(a,b) -> + sprintf "VZMULJ(%s, %s)" (unparse_expr a) (unparse_expr b) + | Uminus a when !Magic.vneg -> op1 "VNEG" a + | Uminus a -> failwith "SIMD Uminus" + | _ -> failwith "unparse_expr" + +and unparse_decl x = C.unparse_decl x + +and unparse_ast ast = + let rec unparse_assignment = function + | Assign (v, x) when Variable.is_locative v -> + unparse_store v x + | Assign (v, x) -> + (Variable.unparse v) ^ " = " ^ (unparse_expr x) ^ ";\n" + + and unparse_annotated force_bracket = + let rec unparse_code = function + | ADone -> "" + | AInstr i -> unparse_assignment i + | ASeq (a, b) -> + (unparse_annotated false a) ^ (unparse_annotated false b) + and declare_variables l = + let rec uvar = function + [] -> failwith "uvar" + | [v] -> (Variable.unparse v) ^ ";\n" + | a :: b -> (Variable.unparse a) ^ ", " ^ (uvar b) + in let rec vvar l = + let s = if !Magic.compact then 15 else 1 in + if (List.length l <= s) then + match l with + [] -> "" + | _ -> realtype ^ " " ^ (uvar l) + else + (vvar (Util.take s l)) ^ (vvar (Util.drop s l)) + in vvar (List.filter Variable.is_temporary l) + in function + Annotate (_, _, decl, _, code) -> + if (not force_bracket) && (Util.null decl) then + unparse_code code + else "{\n" ^ + (declare_variables decl) ^ + (unparse_code code) ^ + "}\n" + +(* ---- *) + and unparse_plus = function + | [] -> "" + | (CUminus a :: b) -> " - " ^ (parenthesize a) ^ (unparse_plus b) + | (a :: b) -> " + " ^ (parenthesize a) ^ (unparse_plus b) + and parenthesize x = match x with + | (CVar _) -> unparse_ast x + | (CCall _) -> unparse_ast x + | (Integer _) -> unparse_ast x + | _ -> "(" ^ (unparse_ast x) ^ ")" + + in match ast with + | Asch a -> (unparse_annotated true a) + | Return x -> "return " ^ unparse_ast x ^ ";" + | Simd_leavefun -> "VLEAVE();" + | For (a, b, c, d) -> + "for (" ^ + unparse_ast a ^ "; " ^ unparse_ast b ^ "; " ^ unparse_ast c + ^ ")" ^ unparse_ast d + | If (a, d) -> + "if (" ^ + unparse_ast a + ^ ")" ^ unparse_ast d + | Block (d, s) -> + if (s == []) then "" + else + "{\n" ^ + foldr_string_concat (map unparse_decl d) ^ + foldr_string_concat (map unparse_ast s) ^ + "}\n" + | x -> C.unparse_ast x + +and unparse_function = function + Fcn (typ, name, args, body) -> + let rec unparse_args = function + [Decl (a, b)] -> a ^ " " ^ b + | (Decl (a, b)) :: s -> a ^ " " ^ b ^ ", " + ^ unparse_args s + | [] -> "" + | _ -> failwith "unparse_function" + in + (typ ^ " " ^ name ^ "(" ^ unparse_args args ^ ")\n" ^ + unparse_ast body) + +let extract_constants f = + let constlist = flatten (map expr_to_constants (C.ast_to_expr_list f)) + in map + (fun n -> + Tdecl + ("DVK(" ^ (Number.to_konst n) ^ ", " ^ (Number.to_string n) ^ + ");\n")) + (unique_constants constlist) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/simd.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/simd.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,28 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +val unparse_function : C.c_fcn -> string +val extract_constants : C.c_ast -> C.c_decl list +val realtype : string +val realtypep : string +val constrealtype : string +val constrealtypep : string + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/simdmagic.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/simdmagic.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,31 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(* SIMD magic parameters *) +let simd_mode = ref false +let store_multiple = ref 1 + +open Magic + +let speclist = [ + "-simd", set_bool simd_mode, undocumented; + "-store-multiple", set_int store_multiple, undocumented; +] diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/to_alist.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/to_alist.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,288 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(************************************************************* + * Conversion of the dag to an assignment list + *************************************************************) +(* + * This function is messy. The main problem is that we want to + * inline dag nodes conditionally, depending on how many times they + * are used. The Right Thing to do would be to modify the + * state monad to propagate some of the state backwards, so that + * we know whether a given node will be used again in the future. + * This modification is trivial in a lazy language, but it is + * messy in a strict language like ML. + * + * In this implementation, we just do the obvious thing, i.e., visit + * the dag twice, the first to count the node usages, and the second to + * produce the output. + *) + +open Monads.StateMonad +open Monads.MemoMonad +open Expr + +let fresh = Variable.make_temporary +let node_insert x = Assoctable.insert Expr.hash x +let node_lookup x = Assoctable.lookup Expr.hash (==) x +let empty = Assoctable.empty + +let fetchAl = + fetchState >>= (fun (al, _, _) -> returnM al) + +let storeAl al = + fetchState >>= (fun (_, visited, visited') -> + storeState (al, visited, visited')) + +let fetchVisited = fetchState >>= (fun (_, v, _) -> returnM v) + +let storeVisited visited = + fetchState >>= (fun (al, _, visited') -> + storeState (al, visited, visited')) + +let fetchVisited' = fetchState >>= (fun (_, _, v') -> returnM v') +let storeVisited' visited' = + fetchState >>= (fun (al, visited, _) -> + storeState (al, visited, visited')) +let lookupVisitedM' key = + fetchVisited' >>= fun table -> + returnM (node_lookup key table) +let insertVisitedM' key value = + fetchVisited' >>= fun table -> + storeVisited' (node_insert key value table) + +let counting f x = + fetchVisited >>= (fun v -> + match node_lookup x v with + Some count -> + let incr_cnt = + fetchVisited >>= (fun v' -> + storeVisited (node_insert x (count + 1) v')) + in + begin + match x with + (* Uminus is always inlined. Visit child *) + Uminus y -> f y >> incr_cnt + | _ -> incr_cnt + end + | None -> + f x >> fetchVisited >>= (fun v' -> + storeVisited (node_insert x 1 v'))) + +let with_varM v x = + fetchAl >>= (fun al -> storeAl ((v, x) :: al)) >> returnM (Load v) + +let inlineM = returnM + +let with_tempM x = match x with +| Load v when Variable.is_temporary v -> inlineM x (* avoid trivial moves *) +| _ -> with_varM (fresh ()) x + +(* declare a temporary only if node is used more than once *) +let with_temp_maybeM node x = + fetchVisited >>= (fun v -> + match node_lookup node v with + Some count -> + if (count = 1 && !Magic.inline_single) then + inlineM x + else + with_tempM x + | None -> + failwith "with_temp_maybeM") +type fma = + NO_FMA + | FMA of expr * expr * expr (* FMA (a, b, c) => a + b * c *) + | FMS of expr * expr * expr (* FMS (a, b, c) => -a + b * c *) + | FNMS of expr * expr * expr (* FNMS (a, b, c) => a - b * c *) + +let good_for_fma (a, b) = + let good = function + | NaN I -> true + | NaN CONJ -> true + | NaN _ -> false + | Times(NaN _, _) -> false + | Times(_, NaN _) -> false + | _ -> true + in good a && good b + +let build_fma l = + if (not !Magic.enable_fma) then NO_FMA + else match l with + | [a; Uminus (Times (b, c))] when good_for_fma (b, c) -> FNMS (a, b, c) + | [Uminus (Times (b, c)); a] when good_for_fma (b, c) -> FNMS (a, b, c) + | [Uminus a; Times (b, c)] when good_for_fma (b, c) -> FMS (a, b, c) + | [Times (b, c); Uminus a] when good_for_fma (b, c) -> FMS (a, b, c) + | [a; Times (b, c)] when good_for_fma (b, c) -> FMA (a, b, c) + | [Times (b, c); a] when good_for_fma (b, c) -> FMA (a, b, c) + | _ -> NO_FMA + +let children_fma l = match build_fma l with +| FMA (a, b, c) -> Some (a, b, c) +| FMS (a, b, c) -> Some (a, b, c) +| FNMS (a, b, c) -> Some (a, b, c) +| NO_FMA -> None + + +let rec visitM x = + counting (function + | Load v -> returnM () + | Num a -> returnM () + | NaN a -> returnM () + | Store (v, x) -> visitM x + | Plus a -> (match children_fma a with + None -> mapM visitM a >> returnM () + | Some (a, b, c) -> + (* visit fma's arguments twice to make sure they are not inlined *) + visitM a >> visitM a >> + visitM b >> visitM b >> + visitM c >> visitM c) + | Times (a, b) -> visitM a >> visitM b + | CTimes (a, b) -> visitM a >> visitM b + | CTimesJ (a, b) -> visitM a >> visitM b + | Uminus a -> visitM a) + x + +let visit_rootsM = mapM visitM + + +let rec expr_of_nodeM x = + memoizing lookupVisitedM' insertVisitedM' + (function x -> match x with + | Load v -> + if (Variable.is_temporary v) then + inlineM (Load v) + else if (Variable.is_locative v && !Magic.inline_loads) then + inlineM (Load v) + else if (Variable.is_constant v && !Magic.inline_loads_constants) then + inlineM (Load v) + else + with_tempM (Load v) + | Num a -> + if !Magic.inline_constants then + inlineM (Num a) + else + with_temp_maybeM x (Num a) + | NaN a -> inlineM (NaN a) + | Store (v, x) -> + expr_of_nodeM x >>= + (if !Magic.trivial_stores then with_tempM else inlineM) >>= + with_varM v + + | Plus a -> + begin + match build_fma a with + FMA (a, b, c) -> + expr_of_nodeM a >>= fun a' -> + expr_of_nodeM b >>= fun b' -> + expr_of_nodeM c >>= fun c' -> + with_temp_maybeM x (Plus [a'; Times (b', c')]) + | FMS (a, b, c) -> + expr_of_nodeM a >>= fun a' -> + expr_of_nodeM b >>= fun b' -> + expr_of_nodeM c >>= fun c' -> + with_temp_maybeM x + (Plus [Times (b', c'); Uminus a']) + | FNMS (a, b, c) -> + expr_of_nodeM a >>= fun a' -> + expr_of_nodeM b >>= fun b' -> + expr_of_nodeM c >>= fun c' -> + with_temp_maybeM x + (Plus [a'; Uminus (Times (b', c'))]) + | NO_FMA -> + mapM expr_of_nodeM a >>= fun a' -> + with_temp_maybeM x (Plus a') + end + | CTimes (Load _ as a, b) when !Magic.generate_bytw -> + expr_of_nodeM b >>= fun b' -> + with_tempM (CTimes (a, b')) + | CTimes (a, b) -> + expr_of_nodeM a >>= fun a' -> + expr_of_nodeM b >>= fun b' -> + with_tempM (CTimes (a', b')) + | CTimesJ (Load _ as a, b) when !Magic.generate_bytw -> + expr_of_nodeM b >>= fun b' -> + with_tempM (CTimesJ (a, b')) + | CTimesJ (a, b) -> + expr_of_nodeM a >>= fun a' -> + expr_of_nodeM b >>= fun b' -> + with_tempM (CTimesJ (a', b')) + | Times (a, b) -> + expr_of_nodeM a >>= fun a' -> + expr_of_nodeM b >>= fun b' -> + begin + match a' with + Num a'' when !Magic.strength_reduce_mul && Number.is_two a'' -> + (inlineM b' >>= fun b'' -> + with_temp_maybeM x (Plus [b''; b''])) + | _ -> with_temp_maybeM x (Times (a', b')) + end + | Uminus a -> + expr_of_nodeM a >>= fun a' -> + inlineM (Uminus a')) + x + +let expr_of_rootsM = mapM expr_of_nodeM + +let peek_alistM roots = + visit_rootsM roots >> expr_of_rootsM roots >> fetchAl + +let wrap_assign (a, b) = Expr.Assign (a, b) + +let to_assignments dag = + let () = Util.info "begin to_alist" in + let al = List.rev (runM ([], empty, empty) peek_alistM dag) in + let res = List.map wrap_assign al in + let () = Util.info "end to_alist" in + res + + +(* dump alist in `dot' format *) +let dump print alist = + let vs v = "\"" ^ (Variable.unparse v) ^ "\"" in + begin + print "digraph G {\n"; + print "\tsize=\"6,6\";\n"; + + (* all input nodes have the same rank *) + print "{ rank = same;\n"; + List.iter (fun (Expr.Assign (v, x)) -> + List.iter (fun y -> + if (Variable.is_locative y) then print("\t" ^ (vs y) ^ ";\n")) + (Expr.find_vars x)) + alist; + print "}\n"; + + (* all output nodes have the same rank *) + print "{ rank = same;\n"; + List.iter (fun (Expr.Assign (v, x)) -> + if (Variable.is_locative v) then print("\t" ^ (vs v) ^ ";\n")) + alist; + print "}\n"; + + (* edges *) + List.iter (fun (Expr.Assign (v, x)) -> + List.iter (fun y -> print("\t" ^ (vs y) ^ " -> " ^ (vs v) ^ ";\n")) + (Expr.find_vars x)) + alist; + + print "}\n"; + end + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/to_alist.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/to_alist.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,24 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +val to_assignments : Expr.expr list -> Expr.assignment list +val dump : (string -> unit) -> Expr.assignment list -> unit +val good_for_fma : Expr.expr * Expr.expr -> bool diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/trig.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/trig.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,152 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(* trigonometric transforms *) +open Util + +(* DFT of real input *) +let rdft sign n input = + Fft.dft sign n (Complex.real @@ input) + +(* DFT of hermitian input *) +let hdft sign n input = + Fft.dft sign n (Complex.hermitian n input) + +(* DFT real transform of vectors of two real numbers, + multiplication by (NaN I), and summation *) +let dft_via_rdft sign n input = + let f = rdft sign n input + in fun i -> + Complex.plus + [Complex.real (f i); + Complex.times (Complex.nan Expr.I) (Complex.imag (f i))] + +(* Discrete Hartley Transform *) +let dht sign n input = + let f = Fft.dft sign n (Complex.real @@ input) in + (fun i -> + Complex.plus [Complex.real (f i); Complex.imag (f i)]) + +let trigI n input = + let twon = 2 * n in + let input' = Complex.hermitian twon input + in + Fft.dft 1 twon input' + +let interleave_zero input = fun i -> + if (i mod 2) == 0 + then Complex.zero + else + input ((i - 1) / 2) + +let trigII n input = + let fourn = 4 * n in + let input' = Complex.hermitian fourn (interleave_zero input) + in + Fft.dft 1 fourn input' + +let trigIII n input = + let fourn = 4 * n in + let twon = 2 * n in + let input' = Complex.hermitian fourn + (fun i -> + if (i == 0) then + Complex.real (input 0) + else if (i == twon) then + Complex.uminus (Complex.real (input 0)) + else + Complex.antihermitian twon input i) + in + let dft = Fft.dft 1 fourn input' + in fun k -> dft (2 * k + 1) + +let zero_extend n input = fun i -> + if (i >= 0 && i < n) + then input i + else Complex.zero + +let trigIV n input = + let fourn = 4 * n + and eightn = 8 * n in + let input' = Complex.hermitian eightn + (zero_extend fourn (Complex.antihermitian fourn + (interleave_zero input))) + in + let dft = Fft.dft 1 eightn input' + in fun k -> dft (2 * k + 1) + +let make_dct scale nshift trig = + fun n input -> + trig (n - nshift) (Complex.real @@ (Complex.times scale) @@ + (zero_extend n input)) +(* + * DCT-I: y[k] = sum x[j] cos(pi * j * k / n) + *) +let dctI = make_dct Complex.one 1 trigI + +(* + * DCT-II: y[k] = sum x[j] cos(pi * (j + 1/2) * k / n) + *) +let dctII = make_dct Complex.one 0 trigII + +(* + * DCT-III: y[k] = sum x[j] cos(pi * j * (k + 1/2) / n) + *) +let dctIII = make_dct Complex.half 0 trigIII + +(* + * DCT-IV y[k] = sum x[j] cos(pi * (j + 1/2) * (k + 1/2) / n) + *) +let dctIV = make_dct Complex.half 0 trigIV + +let shift s input = fun i -> input (i - s) + +(* DST-x input := TRIG-x (input / i) *) +let make_dst scale nshift kshift jshift trig = + fun n input -> + Complex.real @@ + (shift (- jshift) + (trig (n + nshift) (Complex.uminus @@ + (Complex.times Complex.i) @@ + (Complex.times scale) @@ + Complex.real @@ + (shift kshift (zero_extend n input))))) + +(* + * DST-I: y[k] = sum x[j] sin(pi * j * k / n) + *) +let dstI = make_dst Complex.one 1 1 1 trigI + +(* + * DST-II: y[k] = sum x[j] sin(pi * (j + 1/2) * k / n) + *) +let dstII = make_dst Complex.one 0 0 1 trigII + +(* + * DST-III: y[k] = sum x[j] sin(pi * j * (k + 1/2) / n) + *) +let dstIII = make_dst Complex.half 0 1 0 trigIII + +(* + * DST-IV y[k] = sum x[j] sin(pi * (j + 1/2) * (k + 1/2) / n) + *) +let dstIV = make_dst Complex.half 0 0 0 trigIV + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/trig.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/trig.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,35 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +val rdft : int -> int -> Complex.signal -> Complex.signal +val hdft : int -> int -> Complex.signal -> Complex.signal +val dft_via_rdft : int -> int -> Complex.signal -> Complex.signal +val dht : int -> int -> Complex.signal -> Complex.signal + +val dctI : int -> Complex.signal -> Complex.signal +val dctII : int -> Complex.signal -> Complex.signal +val dctIII : int -> Complex.signal -> Complex.signal +val dctIV : int -> Complex.signal -> Complex.signal + +val dstI : int -> Complex.signal -> Complex.signal +val dstII : int -> Complex.signal -> Complex.signal +val dstIII : int -> Complex.signal -> Complex.signal +val dstIV : int -> Complex.signal -> Complex.signal diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/twiddle.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/twiddle.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,188 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(* policies for loading/computing twiddle factors *) +open Complex +open Util + +type twop = TW_FULL | TW_CEXP | TW_NEXT + +let optostring = function + | TW_CEXP -> "TW_CEXP" + | TW_NEXT -> "TW_NEXT" + | TW_FULL -> "TW_FULL" + +type twinstr = (twop * int * int) + +let rec unroll_twfull l = match l with +| [] -> [] +| (TW_FULL, v, n) :: b -> + (forall [] cons 1 n (fun i -> (TW_CEXP, v, i))) + @ unroll_twfull b +| a :: b -> a :: unroll_twfull b + +let twinstr_to_c_string l = + let one (op, a, b) = Printf.sprintf "{ %s, %d, %d }" (optostring op) a b + in let rec loop first = function + | [] -> "" + | a :: b -> (if first then "\n" else ",\n") ^ (one a) ^ (loop false b) + in "{" ^ (loop true l) ^ "}" + +let twinstr_to_simd_string vl l = + let one sep = function + | (TW_NEXT, 1, 0) -> sep ^ "{TW_NEXT, " ^ vl ^ ", 0}" + | (TW_NEXT, _, _) -> failwith "twinstr_to_simd_string" + | (TW_CEXP, v, b) -> sep ^ (Printf.sprintf "VTW(%d,%d)" v b) + | _ -> failwith "twinstr_to_simd_string" + in let rec loop first = function + | [] -> "" + | a :: b -> (one (if first then "\n" else ",\n") a) ^ (loop false b) + in "{" ^ (loop true (unroll_twfull l)) ^ "}" + +let rec pow m n = + if (n = 0) then 1 + else m * pow m (n - 1) + +let rec is_pow m n = + n = 1 || ((n mod m) = 0 && is_pow m (n / m)) + +let rec log m n = if n = 1 then 0 else 1 + log m (n / m) + +let rec largest_power_smaller_than m i = + if (is_pow m i) then i + else largest_power_smaller_than m (i - 1) + +let rec smallest_power_larger_than m i = + if (is_pow m i) then i + else smallest_power_larger_than m (i + 1) + +let rec_array n f = + let g = ref (fun i -> Complex.zero) in + let a = Array.init n (fun i -> lazy (!g i)) in + let h i = f (fun i -> Lazy.force a.(i)) i in + begin + g := h; + h + end + + +let ctimes use_complex_arith a b = + if use_complex_arith then + Complex.ctimes a b + else + Complex.times a b + +let ctimesj use_complex_arith a b = + if use_complex_arith then + Complex.ctimesj a b + else + Complex.times (Complex.conj a) b + +let make_bytwiddle sign use_complex_arith g f i = + if i = 0 then + f i + else if sign = 1 then + ctimes use_complex_arith (g i) (f i) + else + ctimesj use_complex_arith (g i) (f i) + +(* various policies for computing/loading twiddle factors *) + +let twiddle_policy_load_all v use_complex_arith = + let bytwiddle n sign w f = + make_bytwiddle sign use_complex_arith (fun i -> w (i - 1)) f + and twidlen n = 2 * (n - 1) + and twdesc r = [(TW_FULL, v, r);(TW_NEXT, 1, 0)] + in bytwiddle, twidlen, twdesc + +(* + * if i is a power of two, then load w (log i) + * else let x = largest power of 2 less than i in + * let y = i - x in + * compute w^{x+y} = w^x * w^y + *) +let twiddle_policy_log2 v use_complex_arith = + let bytwiddle n sign w f = + let g = rec_array n (fun self i -> + if i = 0 then Complex.one + else if is_pow 2 i then w (log 2 i) + else let x = largest_power_smaller_than 2 i in + let y = i - x in + ctimes use_complex_arith (self x) (self y)) + in make_bytwiddle sign use_complex_arith g f + and twidlen n = 2 * (log 2 (largest_power_smaller_than 2 (2 * n - 1))) + and twdesc n = + (List.flatten + (List.map + (fun i -> + if i > 0 && is_pow 2 i then + [TW_CEXP, v, i] + else + []) + (iota n))) + @ [(TW_NEXT, 1, 0)] + in bytwiddle, twidlen, twdesc + +let twiddle_policy_log3 v use_complex_arith = + let rec terms_needed i pi s n = + if (s >= n - 1) then i + else terms_needed (i + 1) (3 * pi) (s + pi) n + in + let rec bytwiddle n sign w f = + let nterms = terms_needed 0 1 0 n in + let maxterm = pow 3 (nterms - 1) in + let g = rec_array (3 * n) (fun self i -> + if i = 0 then Complex.one + else if is_pow 3 i then w (log 3 i) + else if i = (n - 1) && maxterm >= n then + w (nterms - 1) + else let x = smallest_power_larger_than 3 i in + if (i + i >= x) then + let x = min x (n - 1) in + ctimesj use_complex_arith (self (x - i)) (self x) + else let x = largest_power_smaller_than 3 i in + ctimes use_complex_arith (self (i - x)) (self x)) + in make_bytwiddle sign use_complex_arith g f + and twidlen n = 2 * (terms_needed 0 1 0 n) + and twdesc n = + (List.map + (fun i -> + let x = min (pow 3 i) (n - 1) in + TW_CEXP, v, x) + (iota ((twidlen n) / 2))) + @ [(TW_NEXT, 1, 0)] + in bytwiddle, twidlen, twdesc + +let current_twiddle_policy = ref twiddle_policy_load_all + +let twiddle_policy use_complex_arith = + !current_twiddle_policy use_complex_arith + +let set_policy x = Arg.Unit (fun () -> current_twiddle_policy := x) +let set_policy_int x = Arg.Int (fun i -> current_twiddle_policy := x i) + +let undocumented = " Undocumented twiddle policy" + +let speclist = [ + "-twiddle-load-all", set_policy twiddle_policy_load_all, undocumented; + "-twiddle-log2", set_policy twiddle_policy_log2, undocumented; + "-twiddle-log3", set_policy twiddle_policy_log3, undocumented; +] diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/twiddle.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/twiddle.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,32 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +val speclist : (string * Arg.spec * string) list + +type twinstr + +val twiddle_policy : + int -> bool -> + (int -> int -> (int -> Complex.expr) -> (int -> Complex.expr) -> + int -> Complex.expr) *(int -> int) * (int -> twinstr list) + +val twinstr_to_c_string : twinstr list -> string +val twinstr_to_simd_string : string -> twinstr list -> string diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/unique.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/unique.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,38 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(* repository of unique tokens *) + +type unique = Unique of unit + +(* this depends on the compiler not being too smart *) +let make () = + let make_aux x = Unique x in + make_aux () + +(* note that the obvious definition + + let make () = Unique () + + fails *) + +let same (a : unique) (b : unique) = + (a == b) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/unique.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/unique.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,24 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +type unique +val make : unit -> unique +val same : unique -> unique -> bool diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/util.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/util.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,176 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +(* various utility functions *) +open List +open Unix + +(***************************************** + * Integer operations + *****************************************) +(* fint the inverse of n modulo m *) +let invmod n m = + let rec loop i = + if ((i * n) mod m == 1) then i + else loop (i + 1) + in + loop 1 + +(* Yooklid's algorithm *) +let rec gcd n m = + if (n > m) + then gcd m n + else + let r = m mod n + in + if (r == 0) then n + else gcd r n + +(* reduce the fraction m/n to lowest terms, modulo factors of n/n *) +let lowest_terms n m = + if (m mod n == 0) then + (1,0) + else + let nn = (abs n) in let mm = m * (n / nn) + in let mpos = + if (mm > 0) then (mm mod nn) + else (mm + (1 + (abs mm) / nn) * nn) mod nn + and d = gcd nn (abs mm) + in (nn / d, mpos / d) + +(* find a generator for the multiplicative group mod p + (where p must be prime for a generator to exist!!) *) + +exception No_Generator + +let find_generator p = + let rec period x prod = + if (prod == 1) then 1 + else 1 + (period x (prod * x mod p)) + in let rec findgen x = + if (x == 0) then raise No_Generator + else if ((period x x) == (p - 1)) then x + else findgen ((x + 1) mod p) + in findgen 1 + +(* raise x to a power n modulo p (requires n > 0) (in principle, + negative powers would be fine, provided that x and p are relatively + prime...we don't need this functionality, though) *) + +exception Negative_Power + +let rec pow_mod x n p = + if (n == 0) then 1 + else if (n < 0) then raise Negative_Power + else if (n mod 2 == 0) then pow_mod (x * x mod p) (n / 2) p + else x * (pow_mod x (n - 1) p) mod p + +(****************************************** + * auxiliary functions + ******************************************) +let rec forall id combiner a b f = + if (a >= b) then id + else combiner (f a) (forall id combiner (a + 1) b f) + +let sum_list l = fold_right (+) l 0 +let max_list l = fold_right (max) l (-999999) +let min_list l = fold_right (min) l 999999 +let count pred = fold_left + (fun a elem -> if (pred elem) then 1 + a else a) 0 +let remove elem = List.filter (fun e -> (e != elem)) +let cons a b = a :: b +let null = function + [] -> true + | _ -> false +let for_list l f = List.iter f l +let rmap l f = List.map f l + +(* functional composition *) +let (@@) f g x = f (g x) + +let forall_flat a b = forall [] (@) a b + +let identity x = x + +let rec minimize f = function + [] -> None + | elem :: rest -> + match minimize f rest with + None -> Some elem + | Some x -> if (f x) >= (f elem) then Some elem else Some x + + +let rec find_elem condition = function + [] -> None + | elem :: rest -> + if condition elem then + Some elem + else + find_elem condition rest + + +(* find x, x >= a, such that (p x) is true *) +let rec suchthat a pred = + if (pred a) then a else suchthat (a + 1) pred + +(* print an information message *) +let info string = + if !Magic.verbose then begin + let now = Unix.times () + and pid = Unix.getpid () in + prerr_string ((string_of_int pid) ^ ": " ^ + "at t = " ^ (string_of_float now.tms_utime) ^ " : "); + prerr_string (string ^ "\n"); + flush Pervasives.stderr; + end + +(* iota n produces the list [0; 1; ...; n - 1] *) +let iota n = forall [] cons 0 n identity + +(* interval a b produces the list [a; 1; ...; b - 1] *) +let interval a b = List.map ((+) a) (iota (b - a)) + +(* + * freeze a function, i.e., compute it only once on demand, and + * cache it into an array. + *) +let array n f = + let a = Array.init n (fun i -> lazy (f i)) + in fun i -> Lazy.force a.(i) + + +let rec take n l = + match (n, l) with + (0, _) -> [] + | (n, (a :: b)) -> a :: (take (n - 1) b) + | _ -> failwith "take" + +let rec drop n l = + match (n, l) with + (0, _) -> l + | (n, (_ :: b)) -> drop (n - 1) b + | _ -> failwith "drop" + + +let either a b = + match a with + Some x -> x + | _ -> b diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/util.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/util.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,49 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +val invmod : int -> int -> int +val gcd : int -> int -> int +val lowest_terms : int -> int -> int * int +val find_generator : int -> int +val pow_mod : int -> int -> int -> int +val forall : 'a -> ('b -> 'a -> 'a) -> int -> int -> (int -> 'b) -> 'a +val sum_list : int list -> int +val max_list : int list -> int +val min_list : int list -> int +val count : ('a -> bool) -> 'a list -> int +val remove : 'a -> 'a list -> 'a list +val for_list : 'a list -> ('a -> unit) -> unit +val rmap : 'a list -> ('a -> 'b) -> 'b list +val cons : 'a -> 'a list -> 'a list +val null : 'a list -> bool +val (@@) : ('a -> 'b) -> ('c -> 'a) -> 'c -> 'b +val forall_flat : int -> int -> (int -> 'a list) -> 'a list +val identity : 'a -> 'a +val minimize : ('a -> 'b) -> 'a list -> 'a option +val find_elem : ('a -> bool) -> 'a list -> 'a option +val suchthat : int -> (int -> bool) -> int +val info : string -> unit +val iota : int -> int list +val interval : int -> int -> int list +val array : int -> (int -> 'a) -> int -> 'a +val take : int -> 'a list -> 'a list +val drop : int -> 'a list -> 'a list +val either : 'a option -> 'a -> 'a diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/variable.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/variable.ml Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,108 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +type variable = + (* temporary variables generated automatically *) + | Temporary of int + (* memory locations, e.g., array elements *) + | Locative of (Unique.unique * Unique.unique * + (int -> string) * int * string) + (* constant values, e.g., twiddle factors *) + | Constant of (Unique.unique * string) + +let hash v = Hashtbl.hash v + +let same a b = (a == b) + +let is_constant = function + | Constant _ -> true + | _ -> false + +let is_temporary = function + | Temporary _ -> true + | _ -> false + +let is_locative = function + | Locative _ -> true + | _ -> false + +let same_location a b = + match (a, b) with + | (Locative (location_a, _, _, _, _), Locative (location_b, _, _, _, _)) -> + Unique.same location_a location_b + | _ -> false + +let same_class a b = + match (a, b) with + | (Locative (_, class_a, _, _, _), Locative (_, class_b, _, _, _)) -> + Unique.same class_a class_b + | (Constant (class_a, _), Constant (class_b, _)) -> + Unique.same class_a class_b + | _ -> false + +let make_temporary = + let tmp_count = ref 0 + in fun () -> begin + tmp_count := !tmp_count + 1; + Temporary !tmp_count + end + +let make_constant class_token name = + Constant (class_token, name) + +let make_locative location_token class_token name i vs = + Locative (location_token, class_token, name, i, vs) + +let vstride_of_locative = function + | Locative (_, _, _, _, vs) -> vs + | _ -> failwith "vstride_of_locative" + +(* special naming conventions for variables *) +let rec base62_of_int k = + let x = k mod 62 + and y = k / 62 in + let c = + if x < 10 then + Char.chr (x + Char.code '0') + else if x < 36 then + Char.chr (x + Char.code 'a' - 10) + else + Char.chr (x + Char.code 'A' - 36) + in + let s = String.make 1 c in + let r = if y == 0 then "" else base62_of_int y in + r ^ s + +let varname_of_int k = + if !Magic.compact then + base62_of_int k + else + string_of_int k + +let unparse = function + | Temporary k -> "T" ^ (varname_of_int k) + | Constant (_, name) -> name + | Locative (_, _, name, i, _) -> name i + +let unparse_for_alignment m = function + | Locative (_, _, name, i, _) -> name (i mod m) + | _ -> failwith "unparse_for_alignment" + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/genfft/variable.mli --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/genfft/variable.mli Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,38 @@ +(* + * Copyright (c) 1997-1999 Massachusetts Institute of Technology + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + *) + +type variable + +val hash : variable -> int +val same : variable -> variable -> bool +val is_constant : variable -> bool +val is_temporary : variable -> bool +val is_locative : variable -> bool +val same_location : variable -> variable -> bool +val same_class : variable -> variable -> bool +val make_temporary : unit -> variable +val make_constant : Unique.unique -> string -> variable +val make_locative : + Unique.unique -> Unique.unique -> (int -> string) -> + int -> string -> variable +val unparse : variable -> string +val unparse_for_alignment : int -> variable -> string +val vstride_of_locative : variable -> string diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/install-sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/install-sh Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,527 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2011-11-20.07; # UTC + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# 'make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. + +nl=' +' +IFS=" "" $nl" + +# set DOITPROG to echo to test this script + +# Don't use :- since 4.3BSD and earlier shells don't like it. +doit=${DOITPROG-} +if test -z "$doit"; then + doit_exec=exec +else + doit_exec=$doit +fi + +# Put in absolute file names if you don't have them in your path; +# or use environment vars. + +chgrpprog=${CHGRPPROG-chgrp} +chmodprog=${CHMODPROG-chmod} +chownprog=${CHOWNPROG-chown} +cmpprog=${CMPPROG-cmp} +cpprog=${CPPROG-cp} +mkdirprog=${MKDIRPROG-mkdir} +mvprog=${MVPROG-mv} +rmprog=${RMPROG-rm} +stripprog=${STRIPPROG-strip} + +posix_glob='?' +initialize_posix_glob=' + test "$posix_glob" != "?" || { + if (set -f) 2>/dev/null; then + posix_glob= + else + posix_glob=: + fi + } +' + +posix_mkdir= + +# Desired mode of installed file. +mode=0755 + +chgrpcmd= +chmodcmd=$chmodprog +chowncmd= +mvcmd=$mvprog +rmcmd="$rmprog -f" +stripcmd= + +src= +dst= +dir_arg= +dst_arg= + +copy_on_change=false +no_target_directory= + +usage="\ +Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: + --help display this help and exit. + --version display version info and exit. + + -c (ignored) + -C install only if different (preserve the last data modification time) + -d create directories instead of installing files. + -g GROUP $chgrpprog installed files to GROUP. + -m MODE $chmodprog installed files to MODE. + -o USER $chownprog installed files to USER. + -s $stripprog installed files. + -t DIRECTORY install into DIRECTORY. + -T report an error if DSTFILE is a directory. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG + RMPROG STRIPPROG +" + +while test $# -ne 0; do + case $1 in + -c) ;; + + -C) copy_on_change=true;; + + -d) dir_arg=true;; + + -g) chgrpcmd="$chgrpprog $2" + shift;; + + --help) echo "$usage"; exit $?;; + + -m) mode=$2 + case $mode in + *' '* | *' '* | *' +'* | *'*'* | *'?'* | *'['*) + echo "$0: invalid mode: $mode" >&2 + exit 1;; + esac + shift;; + + -o) chowncmd="$chownprog $2" + shift;; + + -s) stripcmd=$stripprog;; + + -t) dst_arg=$2 + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + shift;; + + -T) no_target_directory=true;; + + --version) echo "$0 $scriptversion"; exit $?;; + + --) shift + break;; + + -*) echo "$0: invalid option: $1" >&2 + exit 1;; + + *) break;; + esac + shift +done + +if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then + # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dst_arg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dst_arg" + shift # fnord + fi + shift # arg + dst_arg=$arg + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + done +fi + +if test $# -eq 0; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call 'install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +if test -z "$dir_arg"; then + do_exit='(exit $ret); exit $ret' + trap "ret=129; $do_exit" 1 + trap "ret=130; $do_exit" 2 + trap "ret=141; $do_exit" 13 + trap "ret=143; $do_exit" 15 + + # Set umask so as not to create temps with too-generous modes. + # However, 'strip' requires both read and write access to temps. + case $mode in + # Optimize common cases. + *644) cp_umask=133;; + *755) cp_umask=22;; + + *[0-7]) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw='% 200' + fi + cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; + *) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw=,u+rw + fi + cp_umask=$mode$u_plus_rw;; + esac +fi + +for src +do + # Protect names problematic for 'test' and other utilities. + case $src in + -* | [=\(\)!]) src=./$src;; + esac + + if test -n "$dir_arg"; then + dst=$src + dstdir=$dst + test -d "$dstdir" + dstdir_status=$? + else + + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dst_arg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + dst=$dst_arg + + # If destination is a directory, append the input filename; won't work + # if double slashes aren't ignored. + if test -d "$dst"; then + if test -n "$no_target_directory"; then + echo "$0: $dst_arg: Is a directory" >&2 + exit 1 + fi + dstdir=$dst + dst=$dstdir/`basename "$src"` + dstdir_status=0 + else + # Prefer dirname, but fall back on a substitute if dirname fails. + dstdir=` + (dirname "$dst") 2>/dev/null || + expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$dst" : 'X\(//\)[^/]' \| \ + X"$dst" : 'X\(//\)$' \| \ + X"$dst" : 'X\(/\)' \| . 2>/dev/null || + echo X"$dst" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q' + ` + + test -d "$dstdir" + dstdir_status=$? + fi + fi + + obsolete_mkdir_used=false + + if test $dstdir_status != 0; then + case $posix_mkdir in + '') + # Create intermediate dirs using mode 755 as modified by the umask. + # This is like FreeBSD 'install' as of 1997-10-28. + umask=`umask` + case $stripcmd.$umask in + # Optimize common cases. + *[2367][2367]) mkdir_umask=$umask;; + .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; + + *[0-7]) + mkdir_umask=`expr $umask + 22 \ + - $umask % 100 % 40 + $umask % 20 \ + - $umask % 10 % 4 + $umask % 2 + `;; + *) mkdir_umask=$umask,go-w;; + esac + + # With -d, create the new directory with the user-specified mode. + # Otherwise, rely on $mkdir_umask. + if test -n "$dir_arg"; then + mkdir_mode=-m$mode + else + mkdir_mode= + fi + + posix_mkdir=false + case $umask in + *[123567][0-7][0-7]) + # POSIX mkdir -p sets u+wx bits regardless of umask, which + # is incompatible with FreeBSD 'install' when (umask & 300) != 0. + ;; + *) + tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ + trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 + + if (umask $mkdir_umask && + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 + then + if test -z "$dir_arg" || { + # Check for POSIX incompatibilities with -m. + # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or + # other-writable bit of parent directory when it shouldn't. + # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. + ls_ld_tmpdir=`ls -ld "$tmpdir"` + case $ls_ld_tmpdir in + d????-?r-*) different_mode=700;; + d????-?--*) different_mode=755;; + *) false;; + esac && + $mkdirprog -m$different_mode -p -- "$tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$tmpdir"` + test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" + } + } + then posix_mkdir=: + fi + rmdir "$tmpdir/d" "$tmpdir" + else + # Remove any dirs left behind by ancient mkdir implementations. + rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null + fi + trap '' 0;; + esac;; + esac + + if + $posix_mkdir && ( + umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" + ) + then : + else + + # The umask is ridiculous, or mkdir does not conform to POSIX, + # or it failed possibly due to a race condition. Create the + # directory the slow way, step by step, checking for races as we go. + + case $dstdir in + /*) prefix='/';; + [-=\(\)!]*) prefix='./';; + *) prefix='';; + esac + + eval "$initialize_posix_glob" + + oIFS=$IFS + IFS=/ + $posix_glob set -f + set fnord $dstdir + shift + $posix_glob set +f + IFS=$oIFS + + prefixes= + + for d + do + test X"$d" = X && continue + + prefix=$prefix$d + if test -d "$prefix"; then + prefixes= + else + if $posix_mkdir; then + (umask=$mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break + # Don't fail if two instances are running concurrently. + test -d "$prefix" || exit 1 + else + case $prefix in + *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; + *) qprefix=$prefix;; + esac + prefixes="$prefixes '$qprefix'" + fi + fi + prefix=$prefix/ + done + + if test -n "$prefixes"; then + # Don't fail if two instances are running concurrently. + (umask $mkdir_umask && + eval "\$doit_exec \$mkdirprog $prefixes") || + test -d "$dstdir" || exit 1 + obsolete_mkdir_used=true + fi + fi + fi + + if test -n "$dir_arg"; then + { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && + { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || + test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 + else + + # Make a couple of temp file names in the proper directory. + dsttmp=$dstdir/_inst.$$_ + rmtmp=$dstdir/_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + + # Copy the file name to the temp name. + (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && + { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && + { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && + + # If -C, don't bother to copy if it wouldn't change the file. + if $copy_on_change && + old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && + new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && + + eval "$initialize_posix_glob" && + $posix_glob set -f && + set X $old && old=:$2:$4:$5:$6 && + set X $new && new=:$2:$4:$5:$6 && + $posix_glob set +f && + + test "$old" = "$new" && + $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 + then + rm -f "$dsttmp" + else + # Rename the file to the real destination. + $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || + + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + { + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + test ! -f "$dst" || + $doit $rmcmd -f "$dst" 2>/dev/null || + { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && + { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } + } || + { echo "$0: cannot unlink or rename $dst" >&2 + (exit 1); exit 1 + } + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dst" + } + fi || exit 1 + + trap '' 0 + fi +done + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,13 @@ +AM_CPPFLAGS = -I$(top_srcdir)/simd +noinst_LTLIBRARIES = libkernel.la + +# pkgincludedir = $(includedir)/fftw3@PREC_SUFFIX@ +# pkginclude_HEADERS = ifftw.h cycle.h + +libkernel_la_SOURCES = align.c alloc.c assert.c awake.c buffered.c \ +cpy1d.c cpy2d-pair.c cpy2d.c ct.c debug.c extract-reim.c hash.c iabs.c \ +kalloc.c md5-1.c md5.c minmax.c ops.c pickdim.c plan.c planner.c \ +primes.c print.c problem.c rader.c scan.c solver.c solvtab.c stride.c \ +tensor.c tensor1.c tensor2.c tensor3.c tensor4.c tensor5.c tensor7.c \ +tensor8.c tensor9.c tile2d.c timer.c transpose.c trig.c twiddle.c \ +cycle.h ifftw.h diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,667 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = kernel +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libkernel_la_LIBADD = +am_libkernel_la_OBJECTS = align.lo alloc.lo assert.lo awake.lo \ + buffered.lo cpy1d.lo cpy2d-pair.lo cpy2d.lo ct.lo debug.lo \ + extract-reim.lo hash.lo iabs.lo kalloc.lo md5-1.lo md5.lo \ + minmax.lo ops.lo pickdim.lo plan.lo planner.lo primes.lo \ + print.lo problem.lo rader.lo scan.lo solver.lo solvtab.lo \ + stride.lo tensor.lo tensor1.lo tensor2.lo tensor3.lo \ + tensor4.lo tensor5.lo tensor7.lo tensor8.lo tensor9.lo \ + tile2d.lo timer.lo transpose.lo trig.lo twiddle.lo +libkernel_la_OBJECTS = $(am_libkernel_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libkernel_la_SOURCES) +DIST_SOURCES = $(libkernel_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/simd +noinst_LTLIBRARIES = libkernel.la + +# pkgincludedir = $(includedir)/fftw3@PREC_SUFFIX@ +# pkginclude_HEADERS = ifftw.h cycle.h +libkernel_la_SOURCES = align.c alloc.c assert.c awake.c buffered.c \ +cpy1d.c cpy2d-pair.c cpy2d.c ct.c debug.c extract-reim.c hash.c iabs.c \ +kalloc.c md5-1.c md5.c minmax.c ops.c pickdim.c plan.c planner.c \ +primes.c print.c problem.c rader.c scan.c solver.c solvtab.c stride.c \ +tensor.c tensor1.c tensor2.c tensor3.c tensor4.c tensor5.c tensor7.c \ +tensor8.c tensor9.c tile2d.c timer.c transpose.c trig.c twiddle.c \ +cycle.h ifftw.h + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu kernel/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu kernel/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libkernel.la: $(libkernel_la_OBJECTS) $(libkernel_la_DEPENDENCIES) $(EXTRA_libkernel_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(libkernel_la_OBJECTS) $(libkernel_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/align.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/assert.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/awake.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/buffered.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cpy1d.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cpy2d-pair.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cpy2d.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ct.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/debug.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/extract-reim.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hash.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iabs.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kalloc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md5-1.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minmax.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ops.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pickdim.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/planner.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/primes.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/print.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/problem.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rader.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scan.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/solver.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/solvtab.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stride.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor1.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tile2d.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/timer.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/transpose.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/trig.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twiddle.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/align.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/align.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +#if HAVE_SIMD +# define ALGN 16 +#else + /* disable the alignment machinery, because it will break, + e.g., if sizeof(R) == 12 (as in long-double/x86) */ +# define ALGN 0 +#endif + +/* NONPORTABLE */ +int X(alignment_of)(R *p) +{ +#if ALGN == 0 + UNUSED(p); + return 0; +#else + return (int)(((uintptr_t) p) % ALGN); +#endif +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/alloc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/alloc.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw.h" + +/********************************************************** + * DEBUGGING CODE + **********************************************************/ +#if defined(FFTW_DEBUG_MALLOC) + +#include + +/* + debugging malloc/free. + + 1) Initialize every malloced and freed area to random values, just + to make sure we are not using uninitialized pointers. + + 2) check for blocks freed twice. + + 3) Check for writes past the ends of allocated blocks + + 4) destroy contents of freed blocks in order to detect incorrect reuse. + + 5) keep track of who allocates what and report memory leaks + + This code is a quick and dirty hack. May be nonportable. + Use at your own risk. + +*/ + +#define MAGIC ((size_t)0xABadCafe) +#define PAD_FACTOR 2 +#define SZ_HEADER (4 * sizeof(size_t)) +#define HASHSZ 1031 + +static unsigned int hashaddr(void *p) +{ + return ((unsigned long)p) % HASHSZ; +} + +struct mstat { + int siz; + int maxsiz; + int cnt; + int maxcnt; +}; + +static struct mstat mstat[MALLOC_WHAT_LAST]; + +struct minfo { + const char *file; + int line; + size_t n; + void *p; + struct minfo *next; +}; + +static struct minfo *minfo[HASHSZ] = {0}; + +#if defined(HAVE_THREADS) || defined(HAVE_OPENMP) +int X(in_thread) = 0; +#endif + +void *X(malloc_debug)(size_t n, enum malloc_tag what, + const char *file, int line) +{ + char *p; + size_t i; + struct minfo *info; + struct mstat *stat = mstat + what; + struct mstat *estat = mstat + EVERYTHING; + + if (n == 0) + n = 1; + + if (!IN_THREAD) { + stat->siz += n; + if (stat->siz > stat->maxsiz) + stat->maxsiz = stat->siz; + estat->siz += n; + if (estat->siz > estat->maxsiz) + estat->maxsiz = estat->siz; + } + + p = (char *) X(kernel_malloc)(PAD_FACTOR * n + SZ_HEADER); + A(p); + + /* store the sz in a known position */ + ((size_t *) p)[0] = n; + ((size_t *) p)[1] = MAGIC; + ((size_t *) p)[2] = what; + + /* fill with junk */ + for (i = 0; i < PAD_FACTOR * n; i++) + p[i + SZ_HEADER] = (char) (i ^ 0xEF); + + if (!IN_THREAD) { + ++stat->cnt; + ++estat->cnt; + + if (stat->cnt > stat->maxcnt) + stat->maxcnt = stat->cnt; + if (estat->cnt > estat->maxcnt) + estat->maxcnt = estat->cnt; + } + + /* skip the info we stored previously */ + p = p + SZ_HEADER; + + if (!IN_THREAD) { + unsigned int h = hashaddr(p); + /* record allocation in allocation list */ + info = (struct minfo *) malloc(sizeof(struct minfo)); + info->n = n; + info->file = file; + info->line = line; + info->p = p; + info->next = minfo[h]; + minfo[h] = info; + } + + return (void *) p; +} + +void X(ifree)(void *p) +{ + char *q; + + A(p); + + q = ((char *) p) - SZ_HEADER; + A(q); + + { + size_t n = ((size_t *) q)[0]; + size_t magic = ((size_t *) q)[1]; + int what = ((size_t *) q)[2]; + size_t i; + struct mstat *stat = mstat + what; + struct mstat *estat = mstat + EVERYTHING; + + /* set to zero to detect duplicate free's */ + ((size_t *) q)[0] = 0; + + A(magic == MAGIC); + ((size_t *) q)[1] = ~MAGIC; + + if (!IN_THREAD) { + stat->siz -= n; + A(stat->siz >= 0); + estat->siz -= n; + A(estat->siz >= 0); + } + + /* check for writing past end of array: */ + for (i = n; i < PAD_FACTOR * n; ++i) + if (q[i + SZ_HEADER] != (char) (i ^ 0xEF)) { + A(0 /* array bounds overwritten */ ); + } + for (i = 0; i < PAD_FACTOR * n; ++i) + q[i + SZ_HEADER] = (char) (i ^ 0xAD); + + if (!IN_THREAD) { + --stat->cnt; + --estat->cnt; + + A(stat->cnt >= 0); + A((stat->cnt == 0 && stat->siz == 0) || + (stat->cnt > 0 && stat->siz > 0)); + A(estat->cnt >= 0); + A((estat->cnt == 0 && estat->siz == 0) || + (estat->cnt > 0 && estat->siz > 0)); + } + + X(kernel_free)(q); + } + + if (!IN_THREAD) { + /* delete minfo entry */ + unsigned int h = hashaddr(p); + struct minfo **i; + + for (i = minfo + h; *i; i = &((*i)->next)) { + if ((*i)->p == p) { + struct minfo *i0 = (*i)->next; + free(*i); + *i = i0; + return; + } + } + + A(0 /* no entry in minfo list */ ); + } +} + +void X(malloc_print_minfo)(int verbose) +{ + struct minfo *info; + int what; + unsigned int h; + int leak = 0; + + if (verbose > 2) { + static const char *names[MALLOC_WHAT_LAST] = { + "EVERYTHING", + "PLANS", "SOLVERS", "PROBLEMS", "BUFFERS", + "HASHT", "TENSORS", "PLANNERS", "SLVDSC", "TWIDDLES", + "STRIDES", "OTHER" + }; + + printf("%12s %8s %8s %10s %10s\n", + "what", "cnt", "maxcnt", "siz", "maxsiz"); + + for (what = 0; what < MALLOC_WHAT_LAST; ++what) { + struct mstat *stat = mstat + what; + printf("%12s %8d %8d %10d %10d\n", + names[what], stat->cnt, stat->maxcnt, + stat->siz, stat->maxsiz); + } + } + + for (h = 0; h < HASHSZ; ++h) + if (minfo[h]) { + printf("\nUnfreed allocations:\n"); + break; + } + + for (h = 0; h < HASHSZ; ++h) + for (info = minfo[h]; info; info = info->next) { + leak = 1; + printf("%s:%d: %zd bytes at %p\n", + info->file, info->line, info->n, info->p); + } + + if (leak) + abort(); +} + +#else +/********************************************************** + * NON DEBUGGING CODE + **********************************************************/ +/* production version, no hacks */ + +void *X(malloc_plain)(size_t n) +{ + void *p; + if (n == 0) + n = 1; + p = X(kernel_malloc)(n); + CK(p); + +#ifdef MIN_ALIGNMENT + A((((uintptr_t)p) % MIN_ALIGNMENT) == 0); +#endif + + return p; +} + +void X(ifree)(void *p) +{ + X(kernel_free)(p); +} + +#endif + +void X(ifree0)(void *p) +{ + /* common pattern */ + if (p) X(ifree)(p); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/assert.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/assert.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw.h" +#include +#include + +void X(assertion_failed)(const char *s, int line, const char *file) +{ + fflush(stdout); + fprintf(stderr, "fftw: %s:%d: assertion failed: %s\n", file, line, s); +#ifdef HAVE_ABORT + abort(); +#else + exit(EXIT_FAILURE); +#endif +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/awake.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/awake.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +void X(null_awake)(plan *ego, enum wakefulness wakefulness) +{ + UNUSED(ego); + UNUSED(wakefulness); + /* do nothing */ +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/buffered.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/buffered.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* routines shared by the various buffered solvers */ + +#include "ifftw.h" + +#define DEFAULT_MAXNBUF ((INT)256) + +/* approx. 512KB of buffers for complex data */ +#define MAXBUFSZ (256 * 1024 / (INT)(sizeof(R))) + +INT X(nbuf)(INT n, INT vl, INT maxnbuf) +{ + INT i, nbuf, lb; + + if (!maxnbuf) + maxnbuf = DEFAULT_MAXNBUF; + + nbuf = X(imin)(maxnbuf, + X(imin)(vl, X(imax)((INT)1, MAXBUFSZ / n))); + + /* + * Look for a buffer number (not too small) that divides the + * vector length, in order that we only need one child plan: + */ + lb = X(imax)(1, nbuf / 4); + for (i = nbuf; i >= lb; --i) + if (vl % i == 0) + return i; + + /* whatever... */ + return nbuf; +} + +#define SKEW 6 /* need to be even for SIMD */ +#define SKEWMOD 8 + +INT X(bufdist)(INT n, INT vl) +{ + if (vl == 1) + return n; + else + /* return smallest X such that X >= N and X == SKEW (mod SKEWMOD) */ + return n + X(modulo)(SKEW - n, SKEWMOD); +} + +int X(toobig)(INT n) +{ + return n > MAXBUFSZ; +} + +/* TRUE if there exists i < which such that maxnbuf[i] and + maxnbuf[which] yield the same value, in which case we canonicalize + on the minimum value */ +int X(nbuf_redundant)(INT n, INT vl, int which, + const INT *maxnbuf, int nmaxnbuf) +{ + int i; + (void)nmaxnbuf; /* UNUSED */ + for (i = 0; i < which; ++i) + if (X(nbuf)(n, vl, maxnbuf[i]) == X(nbuf)(n, vl, maxnbuf[which])) + return 1; + return 0; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/cpy1d.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/cpy1d.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* out of place 1D copy routine */ +#include "ifftw.h" + +void X(cpy1d)(R *I, R *O, INT n0, INT is0, INT os0, INT vl) +{ + INT i0, v; + + A(I != O); + switch (vl) { + case 1: + if ((n0 & 1) || is0 != 1 || os0 != 1) { + for (; n0 > 0; --n0, I += is0, O += os0) + *O = *I; + break; + } + n0 /= 2; is0 = 2; os0 = 2; + /* fall through */ + case 2: + if ((n0 & 1) || is0 != 2 || os0 != 2) { + for (; n0 > 0; --n0, I += is0, O += os0) { + R x0 = I[0]; + R x1 = I[1]; + O[0] = x0; + O[1] = x1; + } + break; + } + n0 /= 2; is0 = 4; os0 = 4; + /* fall through */ + case 4: + for (; n0 > 0; --n0, I += is0, O += os0) { + R x0 = I[0]; + R x1 = I[1]; + R x2 = I[2]; + R x3 = I[3]; + O[0] = x0; + O[1] = x1; + O[2] = x2; + O[3] = x3; + } + break; + default: + for (i0 = 0; i0 < n0; ++i0) + for (v = 0; v < vl; ++v) { + R x0 = I[i0 * is0 + v]; + O[i0 * os0 + v] = x0; + } + break; + } +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/cpy2d-pair.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/cpy2d-pair.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* out of place copy routines for pairs of isomorphic 2D arrays */ +#include "ifftw.h" + +void X(cpy2d_pair)(R *I0, R *I1, R *O0, R *O1, + INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1) +{ + INT i0, i1; + + for (i1 = 0; i1 < n1; ++i1) + for (i0 = 0; i0 < n0; ++i0) { + R x0 = I0[i0 * is0 + i1 * is1]; + R x1 = I1[i0 * is0 + i1 * is1]; + O0[i0 * os0 + i1 * os1] = x0; + O1[i0 * os0 + i1 * os1] = x1; + } +} + +/* like cpy2d_pair, but read input contiguously if possible */ +void X(cpy2d_pair_ci)(R *I0, R *I1, R *O0, R *O1, + INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1) +{ + if (IABS(is0) < IABS(is1)) /* inner loop is for n0 */ + X(cpy2d_pair) (I0, I1, O0, O1, n0, is0, os0, n1, is1, os1); + else + X(cpy2d_pair) (I0, I1, O0, O1, n1, is1, os1, n0, is0, os0); +} + +/* like cpy2d_pair, but write output contiguously if possible */ +void X(cpy2d_pair_co)(R *I0, R *I1, R *O0, R *O1, + INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1) +{ + if (IABS(os0) < IABS(os1)) /* inner loop is for n0 */ + X(cpy2d_pair) (I0, I1, O0, O1, n0, is0, os0, n1, is1, os1); + else + X(cpy2d_pair) (I0, I1, O0, O1, n1, is1, os1, n0, is0, os0); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/cpy2d.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/cpy2d.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* out of place 2D copy routines */ +#include "ifftw.h" + +#if defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) +# ifdef HAVE_XMMINTRIN_H +# include +# define WIDE_TYPE __m128 +# endif +#endif + +#ifndef WIDE_TYPE +/* fall back to double, which means that WIDE_TYPE will be unused */ +# define WIDE_TYPE double +#endif + +void X(cpy2d)(R *I, R *O, + INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1, + INT vl) +{ + INT i0, i1, v; + + switch (vl) { + case 1: + for (i1 = 0; i1 < n1; ++i1) + for (i0 = 0; i0 < n0; ++i0) { + R x0 = I[i0 * is0 + i1 * is1]; + O[i0 * os0 + i1 * os1] = x0; + } + break; + case 2: + if (1 + && (2 * sizeof(R) == sizeof(WIDE_TYPE)) + && (sizeof(WIDE_TYPE) > sizeof(double)) + && (((size_t)I) % sizeof(WIDE_TYPE) == 0) + && (((size_t)O) % sizeof(WIDE_TYPE) == 0) + && ((is0 & 1) == 0) + && ((is1 & 1) == 0) + && ((os0 & 1) == 0) + && ((os1 & 1) == 0)) { + /* copy R[2] as WIDE_TYPE if WIDE_TYPE is large + enough to hold R[2], and if the input is + properly aligned. This is a win when R==double + and WIDE_TYPE is 128 bits. */ + for (i1 = 0; i1 < n1; ++i1) + for (i0 = 0; i0 < n0; ++i0) { + *(WIDE_TYPE *)&O[i0 * os0 + i1 * os1] = + *(WIDE_TYPE *)&I[i0 * is0 + i1 * is1]; + } + } else if (1 + && (2 * sizeof(R) == sizeof(double)) + && (((size_t)I) % sizeof(double) == 0) + && (((size_t)O) % sizeof(double) == 0) + && ((is0 & 1) == 0) + && ((is1 & 1) == 0) + && ((os0 & 1) == 0) + && ((os1 & 1) == 0)) { + /* copy R[2] as double if double is large enough to + hold R[2], and if the input is properly aligned. + This case applies when R==float */ + for (i1 = 0; i1 < n1; ++i1) + for (i0 = 0; i0 < n0; ++i0) { + *(double *)&O[i0 * os0 + i1 * os1] = + *(double *)&I[i0 * is0 + i1 * is1]; + } + } else { + for (i1 = 0; i1 < n1; ++i1) + for (i0 = 0; i0 < n0; ++i0) { + R x0 = I[i0 * is0 + i1 * is1]; + R x1 = I[i0 * is0 + i1 * is1 + 1]; + O[i0 * os0 + i1 * os1] = x0; + O[i0 * os0 + i1 * os1 + 1] = x1; + } + } + break; + default: + for (i1 = 0; i1 < n1; ++i1) + for (i0 = 0; i0 < n0; ++i0) + for (v = 0; v < vl; ++v) { + R x0 = I[i0 * is0 + i1 * is1 + v]; + O[i0 * os0 + i1 * os1 + v] = x0; + } + break; + } +} + +/* like cpy2d, but read input contiguously if possible */ +void X(cpy2d_ci)(R *I, R *O, + INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1, + INT vl) +{ + if (IABS(is0) < IABS(is1)) /* inner loop is for n0 */ + X(cpy2d) (I, O, n0, is0, os0, n1, is1, os1, vl); + else + X(cpy2d) (I, O, n1, is1, os1, n0, is0, os0, vl); +} + +/* like cpy2d, but write output contiguously if possible */ +void X(cpy2d_co)(R *I, R *O, + INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1, + INT vl) +{ + if (IABS(os0) < IABS(os1)) /* inner loop is for n0 */ + X(cpy2d) (I, O, n0, is0, os0, n1, is1, os1, vl); + else + X(cpy2d) (I, O, n1, is1, os1, n0, is0, os0, vl); +} + + +/* tiled copy routines */ +struct cpy2d_closure { + R *I, *O; + INT is0, os0, is1, os1, vl; + R *buf; +}; + +static void dotile(INT n0l, INT n0u, INT n1l, INT n1u, void *args) +{ + struct cpy2d_closure *k = (struct cpy2d_closure *)args; + X(cpy2d)(k->I + n0l * k->is0 + n1l * k->is1, + k->O + n0l * k->os0 + n1l * k->os1, + n0u - n0l, k->is0, k->os0, + n1u - n1l, k->is1, k->os1, + k->vl); +} + +static void dotile_buf(INT n0l, INT n0u, INT n1l, INT n1u, void *args) +{ + struct cpy2d_closure *k = (struct cpy2d_closure *)args; + + /* copy from I to buf */ + X(cpy2d_ci)(k->I + n0l * k->is0 + n1l * k->is1, + k->buf, + n0u - n0l, k->is0, k->vl, + n1u - n1l, k->is1, k->vl * (n0u - n0l), + k->vl); + + /* copy from buf to O */ + X(cpy2d_co)(k->buf, + k->O + n0l * k->os0 + n1l * k->os1, + n0u - n0l, k->vl, k->os0, + n1u - n1l, k->vl * (n0u - n0l), k->os1, + k->vl); +} + + +void X(cpy2d_tiled)(R *I, R *O, + INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1, INT vl) +{ + INT tilesz = X(compute_tilesz)(vl, + 1 /* input array */ + + 1 /* ouput array */); + struct cpy2d_closure k; + k.I = I; + k.O = O; + k.is0 = is0; + k.os0 = os0; + k.is1 = is1; + k.os1 = os1; + k.vl = vl; + k.buf = 0; /* unused */ + X(tile2d)(0, n0, 0, n1, tilesz, dotile, &k); +} + +void X(cpy2d_tiledbuf)(R *I, R *O, + INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1, INT vl) +{ + R buf[CACHESIZE / (2 * sizeof(R))]; + /* input and buffer in cache, or + output and buffer in cache */ + INT tilesz = X(compute_tilesz)(vl, 2); + struct cpy2d_closure k; + k.I = I; + k.O = O; + k.is0 = is0; + k.os0 = os0; + k.is1 = is1; + k.os1 = os1; + k.vl = vl; + k.buf = buf; + A(tilesz * tilesz * vl * sizeof(R) <= sizeof(buf)); + X(tile2d)(0, n0, 0, n1, tilesz, dotile_buf, &k); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/ct.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/ct.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* common routines for Cooley-Tukey algorithms */ + +#include "ifftw.h" + +#define POW2P(n) (((n) > 0) && (((n) & ((n) - 1)) == 0)) + +/* TRUE if radix-r is ugly for size n */ +int X(ct_uglyp)(INT min_n, INT v, INT n, INT r) +{ + return (n <= min_n) || (POW2P(n) && (v * (n / r)) <= 4); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/cycle.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/cycle.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,517 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + + +/* machine-dependent cycle counters code. Needs to be inlined. */ + +/***************************************************************************/ +/* To use the cycle counters in your code, simply #include "cycle.h" (this + file), and then use the functions/macros: + + ticks getticks(void); + + ticks is an opaque typedef defined below, representing the current time. + You extract the elapsed time between two calls to gettick() via: + + double elapsed(ticks t1, ticks t0); + + which returns a double-precision variable in arbitrary units. You + are not expected to convert this into human units like seconds; it + is intended only for *comparisons* of time intervals. + + (In order to use some of the OS-dependent timer routines like + Solaris' gethrtime, you need to paste the autoconf snippet below + into your configure.ac file and #include "config.h" before cycle.h, + or define the relevant macros manually if you are not using autoconf.) +*/ + +/***************************************************************************/ +/* This file uses macros like HAVE_GETHRTIME that are assumed to be + defined according to whether the corresponding function/type/header + is available on your system. The necessary macros are most + conveniently defined if you are using GNU autoconf, via the tests: + + dnl --------------------------------------------------------------------- + + AC_C_INLINE + AC_HEADER_TIME + AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h]) + + AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in ])],,[#if HAVE_SYS_TIME_H +#include +#endif]) + + AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime mach_absolute_time]) + + dnl Cray UNICOS _rtc() (real-time clock) intrinsic + AC_MSG_CHECKING([for _rtc intrinsic]) + rtc_ok=yes + AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H +#include +#endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no]) + AC_MSG_RESULT($rtc_ok) + + dnl --------------------------------------------------------------------- +*/ + +/***************************************************************************/ + +#if TIME_WITH_SYS_TIME +# include +# include +#else +# if HAVE_SYS_TIME_H +# include +# else +# include +# endif +#endif + +#define INLINE_ELAPSED(INL) static INL double elapsed(ticks t1, ticks t0) \ +{ \ + return (double)t1 - (double)t0; \ +} + +/*----------------------------------------------------------------*/ +/* Solaris */ +#if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER) +typedef hrtime_t ticks; + +#define getticks gethrtime + +INLINE_ELAPSED(inline) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* AIX v. 4+ routines to read the real-time clock or time-base register */ +#if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER) +typedef timebasestruct_t ticks; + +static __inline ticks getticks(void) +{ + ticks t; + read_real_time(&t, TIMEBASE_SZ); + return t; +} + +static __inline double elapsed(ticks t1, ticks t0) /* time in nanoseconds */ +{ + time_base_to_time(&t1, TIMEBASE_SZ); + time_base_to_time(&t0, TIMEBASE_SZ); + return (((double)t1.tb_high - (double)t0.tb_high) * 1.0e9 + + ((double)t1.tb_low - (double)t0.tb_low)); +} + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* + * PowerPC ``cycle'' counter using the time base register. + */ +#if ((((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh)))) || (defined(__IBM_GCC_ASM) && (defined(__powerpc__) || defined(__ppc__)))) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; + +static __inline__ ticks getticks(void) +{ + unsigned int tbl, tbu0, tbu1; + + do { + __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0)); + __asm__ __volatile__ ("mftb %0" : "=r"(tbl)); + __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1)); + } while (tbu0 != tbu1); + + return (((unsigned long long)tbu0) << 32) | tbl; +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif + +/* MacOS/Mach (Darwin) time-base register interface (unlike UpTime, + from Carbon, requires no additional libraries to be linked). */ +#if defined(HAVE_MACH_ABSOLUTE_TIME) && defined(HAVE_MACH_MACH_TIME_H) && !defined(HAVE_TICK_COUNTER) +#include +typedef uint64_t ticks; +#define getticks mach_absolute_time +INLINE_ELAPSED(__inline__) +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* + * Pentium cycle counter + */ +#if (defined(__GNUC__) || defined(__ICC)) && defined(__i386__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; + +static __inline__ ticks getticks(void) +{ + ticks ret; + + __asm__ __volatile__("rdtsc": "=A" (ret)); + /* no input, nothing else clobbered */ + return ret; +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */ +#endif + +/* Visual C++ -- thanks to Morten Nissov for his help with this */ +#if _MSC_VER >= 1200 && _M_IX86 >= 500 && !defined(HAVE_TICK_COUNTER) +#include +typedef LARGE_INTEGER ticks; +#define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */ + +static __inline ticks getticks(void) +{ + ticks retval; + + __asm { + RDTSC + mov retval.HighPart, edx + mov retval.LowPart, eax + } + return retval; +} + +static __inline double elapsed(ticks t1, ticks t0) +{ + return (double)t1.QuadPart - (double)t0.QuadPart; +} + +#define HAVE_TICK_COUNTER +#define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */ +#endif + +/*----------------------------------------------------------------*/ +/* + * X86-64 cycle counter + */ +#if (defined(__GNUC__) || defined(__ICC) || defined(__SUNPRO_C)) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; + +static __inline__ ticks getticks(void) +{ + unsigned a, d; + asm volatile("rdtsc" : "=a" (a), "=d" (d)); + return ((ticks)a) | (((ticks)d) << 32); +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#define TIME_MIN 5000.0 +#endif + +/* PGI compiler, courtesy Cristiano Calonaci, Andrea Tarsi, & Roberto Gori. + NOTE: this code will fail to link unless you use the -Masmkeyword compiler + option (grrr). */ +#if defined(__PGI) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; +static ticks getticks(void) +{ + asm(" rdtsc; shl $0x20,%rdx; mov %eax,%eax; or %rdx,%rax; "); +} +INLINE_ELAPSED(__inline__) +#define HAVE_TICK_COUNTER +#define TIME_MIN 5000.0 +#endif + +/* Visual C++, courtesy of Dirk Michaelis */ +#if _MSC_VER >= 1400 && (defined(_M_AMD64) || defined(_M_X64)) && !defined(HAVE_TICK_COUNTER) + +#include +#pragma intrinsic(__rdtsc) +typedef unsigned __int64 ticks; +#define getticks __rdtsc +INLINE_ELAPSED(__inline) + +#define HAVE_TICK_COUNTER +#define TIME_MIN 5000.0 +#endif + +/*----------------------------------------------------------------*/ +/* + * IA64 cycle counter + */ + +/* intel's icc/ecc compiler */ +#if (defined(__EDG_VERSION) || defined(__ECC)) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long ticks; +#include + +static __inline__ ticks getticks(void) +{ + return __getReg(_IA64_REG_AR_ITC); +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif + +/* gcc */ +#if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long ticks; + +static __inline__ ticks getticks(void) +{ + ticks ret; + + __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret)); + return ret; +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif + +/* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */ +#if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER) +#include +typedef unsigned long ticks; + +static inline ticks getticks(void) +{ + ticks ret; + + ret = _Asm_mov_from_ar (_AREG_ITC); + return ret; +} + +INLINE_ELAPSED(inline) + +#define HAVE_TICK_COUNTER +#endif + +/* Microsoft Visual C++ */ +#if defined(_MSC_VER) && defined(_M_IA64) && !defined(HAVE_TICK_COUNTER) +typedef unsigned __int64 ticks; + +# ifdef __cplusplus +extern "C" +# endif +ticks __getReg(int whichReg); +#pragma intrinsic(__getReg) + +static __inline ticks getticks(void) +{ + volatile ticks temp; + temp = __getReg(3116); + return temp; +} + +INLINE_ELAPSED(inline) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* + * PA-RISC cycle counter + */ +#if defined(__hppa__) || defined(__hppa) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long ticks; + +# ifdef __GNUC__ +static __inline__ ticks getticks(void) +{ + ticks ret; + + __asm__ __volatile__("mfctl 16, %0": "=r" (ret)); + /* no input, nothing else clobbered */ + return ret; +} +# else +# include +static inline unsigned long getticks(void) +{ + register ticks ret; + _MFCTL(16, ret); + return ret; +} +# endif + +INLINE_ELAPSED(inline) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* S390, courtesy of James Treacy */ +#if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; + +static __inline__ ticks getticks(void) +{ + ticks cycles; + __asm__("stck 0(%0)" : : "a" (&(cycles)) : "memory", "cc"); + return cycles; +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif +/*----------------------------------------------------------------*/ +#if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER) +/* + * The 32-bit cycle counter on alpha overflows pretty quickly, + * unfortunately. A 1GHz machine overflows in 4 seconds. + */ +typedef unsigned int ticks; + +static __inline__ ticks getticks(void) +{ + unsigned long cc; + __asm__ __volatile__ ("rpcc %0" : "=r"(cc)); + return (cc & 0xFFFFFFFF); +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +#if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long ticks; + +static __inline__ ticks getticks(void) +{ + ticks ret; + __asm__ __volatile__("rd %%tick, %0" : "=r" (ret)); + return ret; +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +#if (defined(__DECC) || defined(__DECCXX)) && defined(__alpha) && defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER) +# include +typedef unsigned int ticks; + +static __inline ticks getticks(void) +{ + unsigned long cc; + cc = asm("rpcc %v0"); + return (cc & 0xFFFFFFFF); +} + +INLINE_ELAPSED(__inline) + +#define HAVE_TICK_COUNTER +#endif +/*----------------------------------------------------------------*/ +/* SGI/Irix */ +#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER) +typedef struct timespec ticks; + +static inline ticks getticks(void) +{ + struct timespec t; + clock_gettime(CLOCK_SGI_CYCLE, &t); + return t; +} + +static inline double elapsed(ticks t1, ticks t0) +{ + return ((double)t1.tv_sec - (double)t0.tv_sec) * 1.0E9 + + ((double)t1.tv_nsec - (double)t0.tv_nsec); +} +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* Cray UNICOS _rtc() intrinsic function */ +#if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER) +#ifdef HAVE_INTRINSICS_H +# include +#endif + +typedef long long ticks; + +#define getticks _rtc + +INLINE_ELAPSED(inline) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* MIPS ZBus */ +#if HAVE_MIPS_ZBUS_TIMER +#if defined(__mips__) && !defined(HAVE_TICK_COUNTER) +#include +#include +#include + +typedef uint64_t ticks; + +static inline ticks getticks(void) +{ + static uint64_t* addr = 0; + + if (addr == 0) + { + uint32_t rq_addr = 0x10030000; + int fd; + int pgsize; + + pgsize = getpagesize(); + fd = open ("/dev/mem", O_RDONLY | O_SYNC, 0); + if (fd < 0) { + perror("open"); + return NULL; + } + addr = mmap(0, pgsize, PROT_READ, MAP_SHARED, fd, rq_addr); + close(fd); + if (addr == (uint64_t *)-1) { + perror("mmap"); + return NULL; + } + } + + return *addr; +} + +INLINE_ELAPSED(inline) + +#define HAVE_TICK_COUNTER +#endif +#endif /* HAVE_MIPS_ZBUS_TIMER */ + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/debug.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/debug.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw.h" + +#ifdef FFTW_DEBUG +#include + +typedef struct { + printer super; + FILE *f; +} P_file; + +static void putchr_file(printer *p_, char c) +{ + P_file *p = (P_file *) p_; + fputc(c, p->f); +} + +static printer *mkprinter_file(FILE *f) +{ + P_file *p = (P_file *) X(mkprinter)(sizeof(P_file), putchr_file, 0); + p->f = f; + return &p->super; +} + +void X(debug)(const char *format, ...) +{ + va_list ap; + printer *p = mkprinter_file(stderr); + va_start(ap, format); + p->vprint(p, format, ap); + va_end(ap); + X(printer_destroy)(p); +} +#endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/extract-reim.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/extract-reim.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw.h" + +/* decompose complex pointer into real and imaginary parts. + Flip real and imaginary if there the sign does not match + FFTW's idea of what the sign should be */ + +void X(extract_reim)(int sign, R *c, R **r, R **i) +{ + if (sign == FFT_SIGN) { + *r = c + 0; + *i = c + 1; + } else { + *r = c + 1; + *i = c + 0; + } +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/hash.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/hash.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw.h" + +unsigned X(hash)(const char *s) +{ + unsigned h = 0xDEADBEEFu; + do { + h = h * 17 + (int)*s; + } while (*s++); + return h; +} + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/iabs.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/iabs.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +INT X(iabs)(INT a) +{ + return a < 0 ? (0 - a) : a; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/ifftw.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/ifftw.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1160 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* FFTW internal header file */ +#ifndef __IFFTW_H__ +#define __IFFTW_H__ + +#include "config.h" + +#include /* size_t */ +#include /* va_list */ +#include /* ptrdiff_t */ + +#if HAVE_SYS_TYPES_H +# include +#endif + +#if HAVE_STDINT_H +# include /* uintptr_t, maybe */ +#endif + +#if HAVE_INTTYPES_H +# include /* uintptr_t, maybe */ +#endif + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +/* Windows annoyances -- since tests/hook.c uses some internal + FFTW functions, we need to given them the dllexport attribute + under Windows when compiling as a DLL (see api/fftw3.h). */ +#if defined(FFTW_EXTERN) +# define IFFTW_EXTERN FFTW_EXTERN +#elif (defined(FFTW_DLL) || defined(DLL_EXPORT)) \ + && (defined(_WIN32) || defined(__WIN32__)) +# define IFFTW_EXTERN extern __declspec(dllexport) +#else +# define IFFTW_EXTERN extern +#endif + +/* determine precision and name-mangling scheme */ +#define CONCAT(prefix, name) prefix ## name +#if defined(FFTW_SINGLE) + typedef float R; +# define X(name) CONCAT(fftwf_, name) +#elif defined(FFTW_LDOUBLE) + typedef long double R; +# define X(name) CONCAT(fftwl_, name) +# define TRIGREAL_IS_LONG_DOUBLE +#elif defined(FFTW_QUAD) + typedef __float128 R; +# define X(name) CONCAT(fftwq_, name) +# define TRIGREAL_IS_QUAD +#else + typedef double R; +# define X(name) CONCAT(fftw_, name) +#endif + +/* + integral type large enough to contain a stride (what ``int'' should + have been in the first place. +*/ +typedef ptrdiff_t INT; + +/* dummy use of unused parameters to silence compiler warnings */ +#define UNUSED(x) (void)x + +#define NELEM(array) ((int) (sizeof(array) / sizeof((array)[0]))) + +#define FFT_SIGN (-1) /* sign convention for forward transforms */ +extern void X(extract_reim)(int sign, R *c, R **r, R **i); + +#define REGISTER_SOLVER(p, s) X(solver_register)(p, s) + +#define STRINGIZEx(x) #x +#define STRINGIZE(x) STRINGIZEx(x) +#define CIMPLIES(ante, post) (!(ante) || (post)) + +/* define HAVE_SIMD if any simd extensions are supported */ +#if defined(HAVE_SSE) || defined(HAVE_SSE2) || defined(HAVE_ALTIVEC) || \ + defined(HAVE_MIPS_PS) || defined(HAVE_AVX) +#define HAVE_SIMD 1 +#else +#define HAVE_SIMD 0 +#endif + +extern int X(have_simd_sse2)(void); +extern int X(have_simd_avx)(void); +extern int X(have_simd_altivec)(void); +extern int X(have_simd_neon)(void); + +/* forward declarations */ +typedef struct problem_s problem; +typedef struct plan_s plan; +typedef struct solver_s solver; +typedef struct planner_s planner; +typedef struct printer_s printer; +typedef struct scanner_s scanner; + +/*-----------------------------------------------------------------------*/ +/* alloca: */ +#if HAVE_SIMD +# ifdef HAVE_AVX +# define MIN_ALIGNMENT 32 /* best alignment for AVX, conservative for + * everything else */ +# else + /* Note that we cannot use 32-byte alignment for all SIMD. For + example, MacOS X malloc is 16-byte aligned, but there was no + posix_memalign in MacOS X until version 10.6. */ +# define MIN_ALIGNMENT 16 +# endif +#endif + +#if defined(HAVE_ALLOCA) && defined(FFTW_ENABLE_ALLOCA) + /* use alloca if available */ + +#ifndef alloca +#ifdef __GNUC__ +# define alloca __builtin_alloca +#else +# ifdef _MSC_VER +# include +# define alloca _alloca +# else +# if HAVE_ALLOCA_H +# include +# else +# ifdef _AIX + #pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +void *alloca(size_t); +# endif +# endif +# endif +# endif +#endif +#endif + +# ifdef MIN_ALIGNMENT +# define STACK_MALLOC(T, p, n) \ + { \ + p = (T)alloca((n) + MIN_ALIGNMENT); \ + p = (T)(((uintptr_t)p + (MIN_ALIGNMENT - 1)) & \ + (~(uintptr_t)(MIN_ALIGNMENT - 1))); \ + } +# define STACK_FREE(n) +# else /* HAVE_ALLOCA && !defined(MIN_ALIGNMENT) */ +# define STACK_MALLOC(T, p, n) p = (T)alloca(n) +# define STACK_FREE(n) +# endif + +#else /* ! HAVE_ALLOCA */ + /* use malloc instead of alloca */ +# define STACK_MALLOC(T, p, n) p = (T)MALLOC(n, OTHER) +# define STACK_FREE(n) X(ifree)(n) +#endif /* ! HAVE_ALLOCA */ + +/* allocation of buffers. If these grow too large use malloc(), else + use STACK_MALLOC (hopefully reducing to alloca()). */ + +/* 64KiB ought to be enough for anybody */ +#define MAX_STACK_ALLOC ((size_t)64 * 1024) + +#define BUF_ALLOC(T, p, n) \ +{ \ + if (n < MAX_STACK_ALLOC) { \ + STACK_MALLOC(T, p, n); \ + } else { \ + p = (T)MALLOC(n, BUFFERS); \ + } \ +} + +#define BUF_FREE(p, n) \ +{ \ + if (n < MAX_STACK_ALLOC) { \ + STACK_FREE(p); \ + } else { \ + X(ifree)(p); \ + } \ +} + +/*-----------------------------------------------------------------------*/ +/* define uintptr_t if it is not already defined */ + +#ifndef HAVE_UINTPTR_T +# if SIZEOF_VOID_P == 0 +# error sizeof void* is unknown! +# elif SIZEOF_UNSIGNED_INT == SIZEOF_VOID_P + typedef unsigned int uintptr_t; +# elif SIZEOF_UNSIGNED_LONG == SIZEOF_VOID_P + typedef unsigned long uintptr_t; +# elif SIZEOF_UNSIGNED_LONG_LONG == SIZEOF_VOID_P + typedef unsigned long long uintptr_t; +# else +# error no unsigned integer type matches void* sizeof! +# endif +#endif + +/*-----------------------------------------------------------------------*/ +/* We can do an optimization for copying pairs of (aligned) floats + when in single precision if 2*float = double. */ + +#define FFTW_2R_IS_DOUBLE (defined(FFTW_SINGLE) \ + && SIZEOF_FLOAT != 0 \ + && SIZEOF_DOUBLE == 2*SIZEOF_FLOAT) + +#define DOUBLE_ALIGNED(p) ((((uintptr_t)(p)) % sizeof(double)) == 0) + +/*-----------------------------------------------------------------------*/ +/* assert.c: */ +IFFTW_EXTERN void X(assertion_failed)(const char *s, + int line, const char *file); + +/* always check */ +#define CK(ex) \ + (void)((ex) || (X(assertion_failed)(#ex, __LINE__, __FILE__), 0)) + +#ifdef FFTW_DEBUG +/* check only if debug enabled */ +#define A(ex) \ + (void)((ex) || (X(assertion_failed)(#ex, __LINE__, __FILE__), 0)) +#else +#define A(ex) /* nothing */ +#endif + +extern void X(debug)(const char *format, ...); +#define D X(debug) + +/*-----------------------------------------------------------------------*/ +/* kalloc.c: */ +extern void *X(kernel_malloc)(size_t n); +extern void X(kernel_free)(void *p); + +/*-----------------------------------------------------------------------*/ +/* alloc.c: */ + +/* objects allocated by malloc, for statistical purposes */ +enum malloc_tag { + EVERYTHING, + PLANS, + SOLVERS, + PROBLEMS, + BUFFERS, + HASHT, + TENSORS, + PLANNERS, + SLVDESCS, + TWIDDLES, + STRIDES, + OTHER, + MALLOC_WHAT_LAST /* must be last */ +}; + +IFFTW_EXTERN void X(ifree)(void *ptr); +extern void X(ifree0)(void *ptr); + +#ifdef FFTW_DEBUG_MALLOC + +IFFTW_EXTERN void *X(malloc_debug)(size_t n, enum malloc_tag what, + const char *file, int line); +#define MALLOC(n, what) X(malloc_debug)(n, what, __FILE__, __LINE__) +IFFTW_EXTERN void X(malloc_print_minfo)(int vrbose); + +#else /* ! FFTW_DEBUG_MALLOC */ + +IFFTW_EXTERN void *X(malloc_plain)(size_t sz); +#define MALLOC(n, what) X(malloc_plain)(n) + +#endif + +#if defined(FFTW_DEBUG) && defined(FFTW_DEBUG_MALLOC) && (defined(HAVE_THREADS) || defined(HAVE_OPENMP)) +extern int X(in_thread); +# define IN_THREAD X(in_thread) +# define THREAD_ON { int in_thread_save = X(in_thread); X(in_thread) = 1 +# define THREAD_OFF X(in_thread) = in_thread_save; } +#else +# define IN_THREAD 0 +# define THREAD_ON +# define THREAD_OFF +#endif + +/*-----------------------------------------------------------------------*/ +/* low-resolution clock */ + +#ifdef FAKE_CRUDE_TIME + typedef int crude_time; +#else +# if TIME_WITH_SYS_TIME +# include +# include +# else +# if HAVE_SYS_TIME_H +# include +# else +# include +# endif +# endif + +# ifdef HAVE_BSDGETTIMEOFDAY +# ifndef HAVE_GETTIMEOFDAY +# define gettimeofday BSDgettimeofday +# define HAVE_GETTIMEOFDAY 1 +# endif +# endif + +# if defined(HAVE_GETTIMEOFDAY) + typedef struct timeval crude_time; +# else + typedef clock_t crude_time; +# endif +#endif /* else FAKE_CRUDE_TIME */ + +crude_time X(get_crude_time)(void); +double X(elapsed_since)(const planner *plnr, const problem *p, + crude_time t0); /* time in seconds since t0 */ + +/*-----------------------------------------------------------------------*/ +/* ops.c: */ +/* + * ops counter. The total number of additions is add + fma + * and the total number of multiplications is mul + fma. + * Total flops = add + mul + 2 * fma + */ +typedef struct { + double add; + double mul; + double fma; + double other; +} opcnt; + +void X(ops_zero)(opcnt *dst); +void X(ops_other)(INT o, opcnt *dst); +void X(ops_cpy)(const opcnt *src, opcnt *dst); + +void X(ops_add)(const opcnt *a, const opcnt *b, opcnt *dst); +void X(ops_add2)(const opcnt *a, opcnt *dst); + +/* dst = m * a + b */ +void X(ops_madd)(INT m, const opcnt *a, const opcnt *b, opcnt *dst); + +/* dst += m * a */ +void X(ops_madd2)(INT m, const opcnt *a, opcnt *dst); + + +/*-----------------------------------------------------------------------*/ +/* minmax.c: */ +INT X(imax)(INT a, INT b); +INT X(imin)(INT a, INT b); + +/*-----------------------------------------------------------------------*/ +/* iabs.c: */ +INT X(iabs)(INT a); + +/* inline version */ +#define IABS(x) (((x) < 0) ? (0 - (x)) : (x)) + +/*-----------------------------------------------------------------------*/ +/* md5.c */ + +#if SIZEOF_UNSIGNED_INT >= 4 +typedef unsigned int md5uint; +#else +typedef unsigned long md5uint; /* at least 32 bits as per C standard */ +#endif + +typedef md5uint md5sig[4]; + +typedef struct { + md5sig s; /* state and signature */ + + /* fields not meant to be used outside md5.c: */ + unsigned char c[64]; /* stuff not yet processed */ + unsigned l; /* total length. Should be 64 bits long, but this is + good enough for us */ +} md5; + +void X(md5begin)(md5 *p); +void X(md5putb)(md5 *p, const void *d_, size_t len); +void X(md5puts)(md5 *p, const char *s); +void X(md5putc)(md5 *p, unsigned char c); +void X(md5int)(md5 *p, int i); +void X(md5INT)(md5 *p, INT i); +void X(md5unsigned)(md5 *p, unsigned i); +void X(md5end)(md5 *p); + +/*-----------------------------------------------------------------------*/ +/* tensor.c: */ +#define STRUCT_HACK_KR +#undef STRUCT_HACK_C99 + +typedef struct { + INT n; + INT is; /* input stride */ + INT os; /* output stride */ +} iodim; + +typedef struct { + int rnk; +#if defined(STRUCT_HACK_KR) + iodim dims[1]; +#elif defined(STRUCT_HACK_C99) + iodim dims[]; +#else + iodim *dims; +#endif +} tensor; + +/* + Definition of rank -infinity. + This definition has the property that if you want rank 0 or 1, + you can simply test for rank <= 1. This is a common case. + + A tensor of rank -infinity has size 0. +*/ +#define RNK_MINFTY ((int)(((unsigned) -1) >> 1)) +#define FINITE_RNK(rnk) ((rnk) != RNK_MINFTY) + +typedef enum { INPLACE_IS, INPLACE_OS } inplace_kind; + +tensor *X(mktensor)(int rnk); +tensor *X(mktensor_0d)(void); +tensor *X(mktensor_1d)(INT n, INT is, INT os); +tensor *X(mktensor_2d)(INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1); +tensor *X(mktensor_3d)(INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1, + INT n2, INT is2, INT os2); +tensor *X(mktensor_4d)(INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1, + INT n2, INT is2, INT os2, + INT n3, INT is3, INT os3); +tensor *X(mktensor_5d)(INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1, + INT n2, INT is2, INT os2, + INT n3, INT is3, INT os3, + INT n4, INT is4, INT os4); +INT X(tensor_sz)(const tensor *sz); +void X(tensor_md5)(md5 *p, const tensor *t); +INT X(tensor_max_index)(const tensor *sz); +INT X(tensor_min_istride)(const tensor *sz); +INT X(tensor_min_ostride)(const tensor *sz); +INT X(tensor_min_stride)(const tensor *sz); +int X(tensor_inplace_strides)(const tensor *sz); +int X(tensor_inplace_strides2)(const tensor *a, const tensor *b); +int X(tensor_strides_decrease)(const tensor *sz, const tensor *vecsz, + inplace_kind k); +tensor *X(tensor_copy)(const tensor *sz); +int X(tensor_kosherp)(const tensor *x); + +tensor *X(tensor_copy_inplace)(const tensor *sz, inplace_kind k); +tensor *X(tensor_copy_except)(const tensor *sz, int except_dim); +tensor *X(tensor_copy_sub)(const tensor *sz, int start_dim, int rnk); +tensor *X(tensor_compress)(const tensor *sz); +tensor *X(tensor_compress_contiguous)(const tensor *sz); +tensor *X(tensor_append)(const tensor *a, const tensor *b); +void X(tensor_split)(const tensor *sz, tensor **a, int a_rnk, tensor **b); +int X(tensor_tornk1)(const tensor *t, INT *n, INT *is, INT *os); +void X(tensor_destroy)(tensor *sz); +void X(tensor_destroy2)(tensor *a, tensor *b); +void X(tensor_destroy4)(tensor *a, tensor *b, tensor *c, tensor *d); +void X(tensor_print)(const tensor *sz, printer *p); +int X(dimcmp)(const iodim *a, const iodim *b); +int X(tensor_equal)(const tensor *a, const tensor *b); +int X(tensor_inplace_locations)(const tensor *sz, const tensor *vecsz); + +/*-----------------------------------------------------------------------*/ +/* problem.c: */ +enum { + /* a problem that cannot be solved */ + PROBLEM_UNSOLVABLE, + + PROBLEM_DFT, + PROBLEM_RDFT, + PROBLEM_RDFT2, + + /* for mpi/ subdirectory */ + PROBLEM_MPI_DFT, + PROBLEM_MPI_RDFT, + PROBLEM_MPI_RDFT2, + PROBLEM_MPI_TRANSPOSE, + + PROBLEM_LAST +}; + +typedef struct { + int problem_kind; + void (*hash) (const problem *ego, md5 *p); + void (*zero) (const problem *ego); + void (*print) (const problem *ego, printer *p); + void (*destroy) (problem *ego); +} problem_adt; + +struct problem_s { + const problem_adt *adt; +}; + +problem *X(mkproblem)(size_t sz, const problem_adt *adt); +void X(problem_destroy)(problem *ego); +problem *X(mkproblem_unsolvable)(void); + +/*-----------------------------------------------------------------------*/ +/* print.c */ +struct printer_s { + void (*print)(printer *p, const char *format, ...); + void (*vprint)(printer *p, const char *format, va_list ap); + void (*putchr)(printer *p, char c); + void (*cleanup)(printer *p); + int indent; + int indent_incr; +}; + +printer *X(mkprinter)(size_t size, + void (*putchr)(printer *p, char c), + void (*cleanup)(printer *p)); +IFFTW_EXTERN void X(printer_destroy)(printer *p); + +/*-----------------------------------------------------------------------*/ +/* scan.c */ +struct scanner_s { + int (*scan)(scanner *sc, const char *format, ...); + int (*vscan)(scanner *sc, const char *format, va_list ap); + int (*getchr)(scanner *sc); + int ungotc; +}; + +scanner *X(mkscanner)(size_t size, int (*getchr)(scanner *sc)); +void X(scanner_destroy)(scanner *sc); + +/*-----------------------------------------------------------------------*/ +/* plan.c: */ + +enum wakefulness { + SLEEPY, + AWAKE_ZERO, + AWAKE_SQRTN_TABLE, + AWAKE_SINCOS +}; + +typedef struct { + void (*solve)(const plan *ego, const problem *p); + void (*awake)(plan *ego, enum wakefulness wakefulness); + void (*print)(const plan *ego, printer *p); + void (*destroy)(plan *ego); +} plan_adt; + +struct plan_s { + const plan_adt *adt; + opcnt ops; + double pcost; + enum wakefulness wakefulness; /* used for debugging only */ + int could_prune_now_p; +}; + +plan *X(mkplan)(size_t size, const plan_adt *adt); +void X(plan_destroy_internal)(plan *ego); +IFFTW_EXTERN void X(plan_awake)(plan *ego, enum wakefulness wakefulness); +void X(plan_null_destroy)(plan *ego); + +/*-----------------------------------------------------------------------*/ +/* solver.c: */ +typedef struct { + int problem_kind; + plan *(*mkplan)(const solver *ego, const problem *p, planner *plnr); + void (*destroy)(solver *ego); +} solver_adt; + +struct solver_s { + const solver_adt *adt; + int refcnt; +}; + +solver *X(mksolver)(size_t size, const solver_adt *adt); +void X(solver_use)(solver *ego); +void X(solver_destroy)(solver *ego); +void X(solver_register)(planner *plnr, solver *s); + +/* shorthand */ +#define MKSOLVER(type, adt) (type *)X(mksolver)(sizeof(type), adt) + +/*-----------------------------------------------------------------------*/ +/* planner.c */ + +typedef struct slvdesc_s { + solver *slv; + const char *reg_nam; + unsigned nam_hash; + int reg_id; + int next_for_same_problem_kind; +} slvdesc; + +typedef struct solution_s solution; /* opaque */ + +/* interpretation of L and U: + + - if it returns a plan, the planner guarantees that all applicable + plans at least as impatient as U have been tried, and that each + plan in the solution is at least as impatient as L. + + - if it returns 0, the planner guarantees to have tried all solvers + at least as impatient as L, and that none of them was applicable. + + The structure is packed to fit into 64 bits. +*/ + +typedef struct { + unsigned l:20; + unsigned hash_info:3; +# define BITS_FOR_TIMELIMIT 9 + unsigned timelimit_impatience:BITS_FOR_TIMELIMIT; + unsigned u:20; + + /* abstraction break: we store the solver here to pad the + structure to 64 bits. Otherwise, the struct is padded to 64 + bits anyway, and another word is allocated for slvndx. */ +# define BITS_FOR_SLVNDX 12 + unsigned slvndx:BITS_FOR_SLVNDX; +} flags_t; + +/* impatience flags */ +enum { + BELIEVE_PCOST = 0x0001, + ESTIMATE = 0x0002, + NO_DFT_R2HC = 0x0004, + NO_SLOW = 0x0008, + NO_VRECURSE = 0x0010, + NO_INDIRECT_OP = 0x0020, + NO_LARGE_GENERIC = 0x0040, + NO_RANK_SPLITS = 0x0080, + NO_VRANK_SPLITS = 0x0100, + NO_NONTHREADED = 0x0200, + NO_BUFFERING = 0x0400, + NO_FIXED_RADIX_LARGE_N = 0x0800, + NO_DESTROY_INPUT = 0x1000, + NO_SIMD = 0x2000, + CONSERVE_MEMORY = 0x4000, + NO_DHT_R2HC = 0x8000, + NO_UGLY = 0x10000, + ALLOW_PRUNING = 0x20000 +}; + +/* hashtable information */ +enum { + BLESSING = 0x1, /* save this entry */ + H_VALID = 0x2, /* valid hastable entry */ + H_LIVE = 0x4 /* entry is nonempty, implies H_VALID */ +}; + +#define PLNR_L(plnr) ((plnr)->flags.l) +#define PLNR_U(plnr) ((plnr)->flags.u) +#define PLNR_TIMELIMIT_IMPATIENCE(plnr) ((plnr)->flags.timelimit_impatience) + +#define ESTIMATEP(plnr) (PLNR_U(plnr) & ESTIMATE) +#define BELIEVE_PCOSTP(plnr) (PLNR_U(plnr) & BELIEVE_PCOST) +#define ALLOW_PRUNINGP(plnr) (PLNR_U(plnr) & ALLOW_PRUNING) + +#define NO_INDIRECT_OP_P(plnr) (PLNR_L(plnr) & NO_INDIRECT_OP) +#define NO_LARGE_GENERICP(plnr) (PLNR_L(plnr) & NO_LARGE_GENERIC) +#define NO_RANK_SPLITSP(plnr) (PLNR_L(plnr) & NO_RANK_SPLITS) +#define NO_VRANK_SPLITSP(plnr) (PLNR_L(plnr) & NO_VRANK_SPLITS) +#define NO_VRECURSEP(plnr) (PLNR_L(plnr) & NO_VRECURSE) +#define NO_DFT_R2HCP(plnr) (PLNR_L(plnr) & NO_DFT_R2HC) +#define NO_SLOWP(plnr) (PLNR_L(plnr) & NO_SLOW) +#define NO_UGLYP(plnr) (PLNR_L(plnr) & NO_UGLY) +#define NO_FIXED_RADIX_LARGE_NP(plnr) \ + (PLNR_L(plnr) & NO_FIXED_RADIX_LARGE_N) +#define NO_NONTHREADEDP(plnr) \ + ((PLNR_L(plnr) & NO_NONTHREADED) && (plnr)->nthr > 1) + +#define NO_DESTROY_INPUTP(plnr) (PLNR_L(plnr) & NO_DESTROY_INPUT) +#define NO_SIMDP(plnr) (PLNR_L(plnr) & NO_SIMD) +#define CONSERVE_MEMORYP(plnr) (PLNR_L(plnr) & CONSERVE_MEMORY) +#define NO_DHT_R2HCP(plnr) (PLNR_L(plnr) & NO_DHT_R2HC) +#define NO_BUFFERINGP(plnr) (PLNR_L(plnr) & NO_BUFFERING) + +typedef enum { FORGET_ACCURSED, FORGET_EVERYTHING } amnesia; + +typedef enum { + /* WISDOM_NORMAL: planner may or may not use wisdom */ + WISDOM_NORMAL, + + /* WISDOM_ONLY: planner must use wisdom and must avoid searching */ + WISDOM_ONLY, + + /* WISDOM_IS_BOGUS: planner must return 0 as quickly as possible */ + WISDOM_IS_BOGUS, + + /* WISDOM_IGNORE_INFEASIBLE: planner ignores infeasible wisdom */ + WISDOM_IGNORE_INFEASIBLE, + + /* WISDOM_IGNORE_ALL: planner ignores all */ + WISDOM_IGNORE_ALL +} wisdom_state_t; + +typedef struct { + void (*register_solver)(planner *ego, solver *s); + plan *(*mkplan)(planner *ego, const problem *p); + void (*forget)(planner *ego, amnesia a); + void (*exprt)(planner *ego, printer *p); /* ``export'' is a reserved + word in C++. */ + int (*imprt)(planner *ego, scanner *sc); +} planner_adt; + +/* hash table of solutions */ +typedef struct { + solution *solutions; + unsigned hashsiz, nelem; + + /* statistics */ + int lookup, succ_lookup, lookup_iter; + int insert, insert_iter, insert_unknown; + int nrehash; +} hashtab; + +typedef enum { COST_SUM, COST_MAX } cost_kind; + +struct planner_s { + const planner_adt *adt; + void (*hook)(struct planner_s *plnr, plan *pln, + const problem *p, int optimalp); + double (*cost_hook)(const problem *p, double t, cost_kind k); + int (*wisdom_ok_hook)(const problem *p, flags_t flags); + void (*nowisdom_hook)(const problem *p); + wisdom_state_t (*bogosity_hook)(wisdom_state_t state, const problem *p); + + /* solver descriptors */ + slvdesc *slvdescs; + unsigned nslvdesc, slvdescsiz; + const char *cur_reg_nam; + int cur_reg_id; + int slvdescs_for_problem_kind[PROBLEM_LAST]; + + wisdom_state_t wisdom_state; + + hashtab htab_blessed; + hashtab htab_unblessed; + + int nthr; + flags_t flags; + + crude_time start_time; + double timelimit; /* elapsed_since(start_time) at which to bail out */ + int timed_out; /* whether most recent search timed out */ + int need_timeout_check; + + /* various statistics */ + int nplan; /* number of plans evaluated */ + double pcost, epcost; /* total pcost of measured/estimated plans */ + int nprob; /* number of problems evaluated */ +}; + +planner *X(mkplanner)(void); +void X(planner_destroy)(planner *ego); + +/* + Iterate over all solvers. Read: + + @article{ baker93iterators, + author = "Henry G. Baker, Jr.", + title = "Iterators: Signs of Weakness in Object-Oriented Languages", + journal = "{ACM} {OOPS} Messenger", + volume = "4", + number = "3", + pages = "18--25" + } +*/ +#define FORALL_SOLVERS(ego, s, p, what) \ +{ \ + unsigned _cnt; \ + for (_cnt = 0; _cnt < ego->nslvdesc; ++_cnt) { \ + slvdesc *p = ego->slvdescs + _cnt; \ + solver *s = p->slv; \ + what; \ + } \ +} + +#define FORALL_SOLVERS_OF_KIND(kind, ego, s, p, what) \ +{ \ + int _cnt = ego->slvdescs_for_problem_kind[kind]; \ + while (_cnt >= 0) { \ + slvdesc *p = ego->slvdescs + _cnt; \ + solver *s = p->slv; \ + what; \ + _cnt = p->next_for_same_problem_kind; \ + } \ +} + + +/* make plan, destroy problem */ +plan *X(mkplan_d)(planner *ego, problem *p); +plan *X(mkplan_f_d)(planner *ego, problem *p, + unsigned l_set, unsigned u_set, unsigned u_reset); + +/*-----------------------------------------------------------------------*/ +/* stride.c: */ + +/* If PRECOMPUTE_ARRAY_INDICES is defined, precompute all strides. */ +#if (defined(__i386__) || defined(__x86_64__) || _M_IX86 >= 500) && !defined(FFTW_LDOUBLE) +#define PRECOMPUTE_ARRAY_INDICES +#endif + +extern const INT X(an_INT_guaranteed_to_be_zero); + +#ifdef PRECOMPUTE_ARRAY_INDICES +typedef INT *stride; +#define WS(stride, i) (stride[i]) +extern stride X(mkstride)(INT n, INT s); +void X(stride_destroy)(stride p); +/* hackery to prevent the compiler from copying the strides array + onto the stack */ +#define MAKE_VOLATILE_STRIDE(nptr, x) (x) = (x) + X(an_INT_guaranteed_to_be_zero) +#else + +typedef INT stride; +#define WS(stride, i) (stride * i) +#define fftwf_mkstride(n, stride) stride +#define fftw_mkstride(n, stride) stride +#define fftwl_mkstride(n, stride) stride +#define fftwf_stride_destroy(p) ((void) p) +#define fftw_stride_destroy(p) ((void) p) +#define fftwl_stride_destroy(p) ((void) p) + +/* hackery to prevent the compiler from ``optimizing'' induction + variables in codelet loops. The problem is that for each K and for + each expression of the form P[I + STRIDE * K] in a loop, most + compilers will try to lift an induction variable PK := &P[I + STRIDE * K]. + For large values of K this behavior overflows the + register set, which is likely worse than doing the index computation + in the first place. + + If we guess that there are more than + ESTIMATED_AVAILABLE_INDEX_REGISTERS such pointers, we deliberately confuse + the compiler by setting STRIDE ^= ZERO, where ZERO is a value guaranteed to + be 0, but the compiler does not know this. + + 16 registers ought to be enough for anybody, or so the amd64 and ARM ISA's + seem to imply. +*/ +#define ESTIMATED_AVAILABLE_INDEX_REGISTERS 16 +#define MAKE_VOLATILE_STRIDE(nptr, x) \ + (nptr <= ESTIMATED_AVAILABLE_INDEX_REGISTERS ? \ + 0 : \ + ((x) = (x) ^ X(an_INT_guaranteed_to_be_zero))) +#endif /* PRECOMPUTE_ARRAY_INDICES */ + +/*-----------------------------------------------------------------------*/ +/* solvtab.c */ + +struct solvtab_s { void (*reg)(planner *); const char *reg_nam; }; +typedef struct solvtab_s solvtab[]; +void X(solvtab_exec)(const solvtab tbl, planner *p); +#define SOLVTAB(s) { s, STRINGIZE(s) } +#define SOLVTAB_END { 0, 0 } + +/*-----------------------------------------------------------------------*/ +/* pickdim.c */ +int X(pickdim)(int which_dim, const int *buddies, int nbuddies, + const tensor *sz, int oop, int *dp); + +/*-----------------------------------------------------------------------*/ +/* twiddle.c */ +/* little language to express twiddle factors computation */ +enum { TW_COS = 0, TW_SIN = 1, TW_CEXP = 2, TW_NEXT = 3, + TW_FULL = 4, TW_HALF = 5 }; + +typedef struct { + unsigned char op; + signed char v; + short i; +} tw_instr; + +typedef struct twid_s { + R *W; /* array of twiddle factors */ + INT n, r, m; /* transform order, radix, # twiddle rows */ + int refcnt; + const tw_instr *instr; + struct twid_s *cdr; + enum wakefulness wakefulness; +} twid; + +INT X(twiddle_length)(INT r, const tw_instr *p); +void X(twiddle_awake)(enum wakefulness wakefulness, + twid **pp, const tw_instr *instr, INT n, INT r, INT m); + +/*-----------------------------------------------------------------------*/ +/* trig.c */ +#if defined(TRIGREAL_IS_LONG_DOUBLE) + typedef long double trigreal; +#elif defined(TRIGREAL_IS_QUAD) + typedef __float128 trigreal; +#else + typedef double trigreal; +#endif + +typedef struct triggen_s triggen; + +struct triggen_s { + void (*cexp)(triggen *t, INT m, R *result); + void (*cexpl)(triggen *t, INT m, trigreal *result); + void (*rotate)(triggen *p, INT m, R xr, R xi, R *res); + + INT twshft; + INT twradix; + INT twmsk; + trigreal *W0, *W1; + INT n; +}; + +triggen *X(mktriggen)(enum wakefulness wakefulness, INT n); +void X(triggen_destroy)(triggen *p); + +/*-----------------------------------------------------------------------*/ +/* primes.c: */ + +#define MULMOD(x, y, p) \ + (((x) <= 92681 - (y)) ? ((x) * (y)) % (p) : X(safe_mulmod)(x, y, p)) + +INT X(safe_mulmod)(INT x, INT y, INT p); +INT X(power_mod)(INT n, INT m, INT p); +INT X(find_generator)(INT p); +INT X(first_divisor)(INT n); +int X(is_prime)(INT n); +INT X(next_prime)(INT n); +int X(factors_into)(INT n, const INT *primes); +int X(factors_into_small_primes)(INT n); +INT X(choose_radix)(INT r, INT n); +INT X(isqrt)(INT n); +INT X(modulo)(INT a, INT n); + +#define GENERIC_MIN_BAD 173 /* min prime for which generic becomes bad */ + +/* thresholds below which certain solvers are considered SLOW. These are guesses + believed to be conservative */ +#define GENERIC_MAX_SLOW 16 +#define RADER_MAX_SLOW 32 +#define BLUESTEIN_MAX_SLOW 24 + +/*-----------------------------------------------------------------------*/ +/* rader.c: */ +typedef struct rader_tls rader_tl; + +void X(rader_tl_insert)(INT k1, INT k2, INT k3, R *W, rader_tl **tl); +R *X(rader_tl_find)(INT k1, INT k2, INT k3, rader_tl *t); +void X(rader_tl_delete)(R *W, rader_tl **tl); + +/*-----------------------------------------------------------------------*/ +/* copy/transposition routines */ + +/* lower bound to the cache size, for tiled routines */ +#define CACHESIZE 8192 + +INT X(compute_tilesz)(INT vl, int how_many_tiles_in_cache); + +void X(tile2d)(INT n0l, INT n0u, INT n1l, INT n1u, INT tilesz, + void (*f)(INT n0l, INT n0u, INT n1l, INT n1u, void *args), + void *args); +void X(cpy1d)(R *I, R *O, INT n0, INT is0, INT os0, INT vl); +void X(cpy2d)(R *I, R *O, + INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1, + INT vl); +void X(cpy2d_ci)(R *I, R *O, + INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1, + INT vl); +void X(cpy2d_co)(R *I, R *O, + INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1, + INT vl); +void X(cpy2d_tiled)(R *I, R *O, + INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1, + INT vl); +void X(cpy2d_tiledbuf)(R *I, R *O, + INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1, + INT vl); +void X(cpy2d_pair)(R *I0, R *I1, R *O0, R *O1, + INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1); +void X(cpy2d_pair_ci)(R *I0, R *I1, R *O0, R *O1, + INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1); +void X(cpy2d_pair_co)(R *I0, R *I1, R *O0, R *O1, + INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1); + +void X(transpose)(R *I, INT n, INT s0, INT s1, INT vl); +void X(transpose_tiled)(R *I, INT n, INT s0, INT s1, INT vl); +void X(transpose_tiledbuf)(R *I, INT n, INT s0, INT s1, INT vl); + +typedef void (*transpose_func)(R *I, INT n, INT s0, INT s1, INT vl); +typedef void (*cpy2d_func)(R *I, R *O, + INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1, + INT vl); + +/*-----------------------------------------------------------------------*/ +/* misc stuff */ +void X(null_awake)(plan *ego, enum wakefulness wakefulness); +double X(iestimate_cost)(const planner *, const plan *, const problem *); + +#ifdef FFTW_RANDOM_ESTIMATOR +extern unsigned X(random_estimate_seed); +#endif + +double X(measure_execution_time)(const planner *plnr, + plan *pln, const problem *p); +IFFTW_EXTERN int X(alignment_of)(R *p); +unsigned X(hash)(const char *s); +INT X(nbuf)(INT n, INT vl, INT maxnbuf); +int X(nbuf_redundant)(INT n, INT vl, int which, + const INT *maxnbuf, int nmaxnbuf); +INT X(bufdist)(INT n, INT vl); +int X(toobig)(INT n); +int X(ct_uglyp)(INT min_n, INT v, INT n, INT r); + +#if HAVE_SIMD +R *X(taint)(R *p, INT s); +R *X(join_taint)(R *p1, R *p2); +#define TAINT(p, s) X(taint)(p, s) +#define UNTAINT(p) ((R *) (((uintptr_t) (p)) & ~(uintptr_t)3)) +#define TAINTOF(p) (((uintptr_t)(p)) & 3) +#define JOIN_TAINT(p1, p2) X(join_taint)(p1, p2) +#else +#define TAINT(p, s) (p) +#define UNTAINT(p) (p) +#define TAINTOF(p) 0 +#define JOIN_TAINT(p1, p2) p1 +#endif + +#ifdef FFTW_DEBUG_ALIGNMENT +# define ASSERT_ALIGNED_DOUBLE { \ + double __foo; \ + CK(!(((uintptr_t) &__foo) & 0x7)); \ +} +#else +# define ASSERT_ALIGNED_DOUBLE +#endif /* FFTW_DEBUG_ALIGNMENT */ + + + +/*-----------------------------------------------------------------------*/ +/* macros used in codelets to reduce source code size */ + +typedef R E; /* internal precision of codelets. */ + +#if defined(FFTW_LDOUBLE) +# define K(x) ((E) x##L) +#elif defined(FFTW_QUAD) +# define K(x) ((E) x##Q) +#else +# define K(x) ((E) x) +#endif +#define DK(name, value) const E name = K(value) + +/* FMA macros */ + +#if defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__) || defined(_POWER)) +/* The obvious expression a * b + c does not work. If both x = a * b + + c and y = a * b - c appear in the source, gcc computes t = a * b, + x = t + c, y = t - c, thus destroying the fma. + + This peculiar coding seems to do the right thing on all of + gcc-2.95, gcc-3.1, gcc-3.2, and gcc-3.3. It does the right thing + on gcc-3.4 -fno-web (because the ``web'' pass splits the variable + `x' for the single-assignment form). + + However, gcc-4.0 is a formidable adversary which succeeds in + pessimizing two fma's into one multiplication and two additions. + It does it very early in the game---before the optimization passes + even start. The only real workaround seems to use fake inline asm + such as + + asm ("# confuse gcc %0" : "=f"(a) : "0"(a)); + return a * b + c; + + in each of the FMA, FMS, FNMA, and FNMS functions. However, this + does not solve the problem either, because two equal asm statements + count as a common subexpression! One must use *different* fake asm + statements: + + in FMA: + asm ("# confuse gcc for fma %0" : "=f"(a) : "0"(a)); + + in FMS: + asm ("# confuse gcc for fms %0" : "=f"(a) : "0"(a)); + + etc. + + After these changes, gcc recalcitrantly generates the fma that was + in the source to begin with. However, the extra asm() cruft + confuses other passes of gcc, notably the instruction scheduler. + (Of course, one could also generate the fma directly via inline + asm, but this confuses the scheduler even more.) + + Steven and I have submitted more than one bug report to the gcc + mailing list over the past few years, to no effect. Thus, I give + up. gcc-4.0 can go to hell. I'll wait at least until gcc-4.3 is + out before touching this crap again. +*/ +static __inline__ E FMA(E a, E b, E c) +{ + E x = a * b; + x = x + c; + return x; +} + +static __inline__ E FMS(E a, E b, E c) +{ + E x = a * b; + x = x - c; + return x; +} + +static __inline__ E FNMA(E a, E b, E c) +{ + E x = a * b; + x = - (x + c); + return x; +} + +static __inline__ E FNMS(E a, E b, E c) +{ + E x = a * b; + x = - (x - c); + return x; +} +#else +#define FMA(a, b, c) (((a) * (b)) + (c)) +#define FMS(a, b, c) (((a) * (b)) - (c)) +#define FNMA(a, b, c) (- (((a) * (b)) + (c))) +#define FNMS(a, b, c) ((c) - ((a) * (b))) +#endif + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* __IFFTW_H__ */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/kalloc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/kalloc.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +#if defined(HAVE_MALLOC_H) +# include +#endif + +/* ``kernel'' malloc(), with proper memory alignment */ + +#if defined(HAVE_DECL_MEMALIGN) && !HAVE_DECL_MEMALIGN +extern void *memalign(size_t, size_t); +#endif + +#if defined(HAVE_DECL_POSIX_MEMALIGN) && !HAVE_DECL_POSIX_MEMALIGN +extern int posix_memalign(void **, size_t, size_t); +#endif + +#if defined(macintosh) /* MacOS 9 */ +# include +#endif + +#define real_free free /* memalign and malloc use ordinary free */ + +#define IS_POWER_OF_TWO(n) (((n) > 0) && (((n) & ((n) - 1)) == 0)) +#if defined(WITH_OUR_MALLOC) && (MIN_ALIGNMENT >= 8) && IS_POWER_OF_TWO(MIN_ALIGNMENT) +/* Our own MIN_ALIGNMENT-aligned malloc/free. Assumes sizeof(void*) is a + power of two <= 8 and that malloc is at least sizeof(void*)-aligned. + + The main reason for this routine is that, as of this writing, + Windows does not include any aligned allocation routines in its + system libraries, and instead provides an implementation with a + Visual C++ "Processor Pack" that you have to statically link into + your program. We do not want to require users to have VC++ + (e.g. gcc/MinGW should be fine). Our code should be at least as good + as the MS _aligned_malloc, in any case, according to second-hand + reports of the algorithm it employs (also based on plain malloc). */ +static void *our_malloc(size_t n) +{ + void *p0, *p; + if (!(p0 = malloc(n + MIN_ALIGNMENT))) return (void *) 0; + p = (void *) (((uintptr_t) p0 + MIN_ALIGNMENT) & (~((uintptr_t) (MIN_ALIGNMENT - 1)))); + *((void **) p - 1) = p0; + return p; +} +static void our_free(void *p) +{ + if (p) free(*((void **) p - 1)); +} +#endif + +void *X(kernel_malloc)(size_t n) +{ + void *p; + +#if defined(MIN_ALIGNMENT) + +# if defined(WITH_OUR_MALLOC) + p = our_malloc(n); +# undef real_free +# define real_free our_free + +# elif defined(__FreeBSD__) && (MIN_ALIGNMENT <= 16) + /* FreeBSD does not have memalign, but its malloc is 16-byte aligned. */ + p = malloc(n); + +# elif (defined(__MACOSX__) || defined(__APPLE__)) && (MIN_ALIGNMENT <= 16) + /* MacOS X malloc is already 16-byte aligned */ + p = malloc(n); + +# elif defined(HAVE_MEMALIGN) + p = memalign(MIN_ALIGNMENT, n); + +# elif defined(HAVE_POSIX_MEMALIGN) + /* note: posix_memalign is broken in glibc 2.2.5: it constrains + the size, not the alignment, to be (power of two) * sizeof(void*). + The bug seems to have been fixed as of glibc 2.3.1. */ + if (posix_memalign(&p, MIN_ALIGNMENT, n)) + p = (void*) 0; + +# elif defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC) + /* Intel's C compiler defines _mm_malloc and _mm_free intrinsics */ + p = (void *) _mm_malloc(n, MIN_ALIGNMENT); +# undef real_free +# define real_free _mm_free + +# elif defined(_MSC_VER) + /* MS Visual C++ 6.0 with a "Processor Pack" supports SIMD + and _aligned_malloc/free (uses malloc.h) */ + p = (void *) _aligned_malloc(n, MIN_ALIGNMENT); +# undef real_free +# define real_free _aligned_free + +# elif defined(macintosh) /* MacOS 9 */ + p = (void *) MPAllocateAligned(n, +# if MIN_ALIGNMENT == 8 + kMPAllocate8ByteAligned, +# elif MIN_ALIGNMENT == 16 + kMPAllocate16ByteAligned, +# elif MIN_ALIGNMENT == 32 + kMPAllocate32ByteAligned, +# else +# error "Unknown alignment for MPAllocateAligned" +# endif + 0); +# undef real_free +# define real_free MPFree + +# else + /* Add your machine here and send a patch to fftw@fftw.org + or (e.g. for Windows) configure --with-our-malloc */ +# error "Don't know how to malloc() aligned memory ... try configuring --with-our-malloc" +# endif + +#else /* !defined(MIN_ALIGNMENT) */ + p = malloc(n); +#endif + + return p; +} + +void X(kernel_free)(void *p) +{ + real_free(p); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/md5-1.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/md5-1.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw.h" + + +void X(md5putb)(md5 *p, const void *d_, size_t len) +{ + size_t i; + const unsigned char *d = (const unsigned char *)d_; + for (i = 0; i < len; ++i) + X(md5putc)(p, d[i]); +} + +void X(md5puts)(md5 *p, const char *s) +{ + /* also hash final '\0' */ + do { + X(md5putc)(p, *s); + } while(*s++); +} + +void X(md5int)(md5 *p, int i) +{ + X(md5putb)(p, &i, sizeof(i)); +} + +void X(md5INT)(md5 *p, INT i) +{ + X(md5putb)(p, &i, sizeof(i)); +} + +void X(md5unsigned)(md5 *p, unsigned i) +{ + X(md5putb)(p, &i, sizeof(i)); +} + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/md5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/md5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* + independent implementation of Ron Rivest's MD5 message-digest + algorithm, based on rfc 1321. + + Optimized for small code size, not speed. Works as long as + sizeof(md5uint) >= 4. +*/ + +#include "ifftw.h" + +/* sintab[i] = 4294967296.0 * abs(sin((double)(i + 1))) */ +static const md5uint sintab[64] = { + 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, + 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, + 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, + 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, + 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, + 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, + 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, + 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, + 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, + 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, + 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, + 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, + 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, + 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, + 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, + 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 +}; + +/* see rfc 1321 section 3.4 */ +static const struct roundtab { + char k; + char s; +} roundtab[64] = { + { 0, 7}, { 1, 12}, { 2, 17}, { 3, 22}, + { 4, 7}, { 5, 12}, { 6, 17}, { 7, 22}, + { 8, 7}, { 9, 12}, { 10, 17}, { 11, 22}, + { 12, 7}, { 13, 12}, { 14, 17}, { 15, 22}, + { 1, 5}, { 6, 9}, { 11, 14}, { 0, 20}, + { 5, 5}, { 10, 9}, { 15, 14}, { 4, 20}, + { 9, 5}, { 14, 9}, { 3, 14}, { 8, 20}, + { 13, 5}, { 2, 9}, { 7, 14}, { 12, 20}, + { 5, 4}, { 8, 11}, { 11, 16}, { 14, 23}, + { 1, 4}, { 4, 11}, { 7, 16}, { 10, 23}, + { 13, 4}, { 0, 11}, { 3, 16}, { 6, 23}, + { 9, 4}, { 12, 11}, { 15, 16}, { 2, 23}, + { 0, 6}, { 7, 10}, { 14, 15}, { 5, 21}, + { 12, 6}, { 3, 10}, { 10, 15}, { 1, 21}, + { 8, 6}, { 15, 10}, { 6, 15}, { 13, 21}, + { 4, 6}, { 11, 10}, { 2, 15}, { 9, 21} +}; + +#define rol(a, s) ((a << (int)(s)) | (a >> (32 - (int)(s)))) + +static void doblock(md5sig state, const unsigned char *data) +{ + md5uint a, b, c, d, t, x[16]; + const md5uint msk = (md5uint)0xffffffffUL; + int i; + + /* encode input bytes into md5uint */ + for (i = 0; i < 16; ++i) { + const unsigned char *p = data + 4 * i; + x[i] = p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24); + } + + a = state[0]; b = state[1]; c = state[2]; d = state[3]; + for (i = 0; i < 64; ++i) { + const struct roundtab *p = roundtab + i; + switch (i >> 4) { + case 0: a += (b & c) | (~b & d); break; + case 1: a += (b & d) | (c & ~d); break; + case 2: a += b ^ c ^ d; break; + case 3: a += c ^ (b | ~d); break; + } + a += sintab[i]; + a += x[(int)(p->k)]; + a &= msk; + t = b + rol(a, p->s); + a = d; d = c; c = b; b = t; + } + state[0] = (state[0] + a) & msk; + state[1] = (state[1] + b) & msk; + state[2] = (state[2] + c) & msk; + state[3] = (state[3] + d) & msk; +} + + +void X(md5begin)(md5 *p) +{ + p->s[0] = 0x67452301; + p->s[1] = 0xefcdab89; + p->s[2] = 0x98badcfe; + p->s[3] = 0x10325476; + p->l = 0; +} + +void X(md5putc)(md5 *p, unsigned char c) +{ + p->c[p->l % 64] = c; + if (((++p->l) % 64) == 0) doblock(p->s, p->c); +} + +void X(md5end)(md5 *p) +{ + unsigned l, i; + + l = 8 * p->l; /* length before padding, in bits */ + + /* rfc 1321 section 3.1: padding */ + X(md5putc)(p, 0x80); + while ((p->l % 64) != 56) X(md5putc)(p, 0x00); + + /* rfc 1321 section 3.2: length (little endian) */ + for (i = 0; i < 8; ++i) { + X(md5putc)(p, l & 0xFF); + l = l >> 8; + } + + /* Now p->l % 64 == 0 and signature is in p->s */ +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/minmax.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/minmax.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +INT X(imax)(INT a, INT b) +{ + return (a > b) ? a : b; +} + +INT X(imin)(INT a, INT b) +{ + return (a < b) ? a : b; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/ops.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/ops.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +void X(ops_zero)(opcnt *dst) +{ + dst->add = dst->mul = dst->fma = dst->other = 0; +} + +void X(ops_cpy)(const opcnt *src, opcnt *dst) +{ + *dst = *src; +} + +void X(ops_other)(INT o, opcnt *dst) +{ + X(ops_zero)(dst); + dst->other = o; +} + +void X(ops_madd)(INT m, const opcnt *a, const opcnt *b, opcnt *dst) +{ + dst->add = m * a->add + b->add; + dst->mul = m * a->mul + b->mul; + dst->fma = m * a->fma + b->fma; + dst->other = m * a->other + b->other; +} + +void X(ops_add)(const opcnt *a, const opcnt *b, opcnt *dst) +{ + X(ops_madd)(1, a, b, dst); +} + +void X(ops_add2)(const opcnt *a, opcnt *dst) +{ + X(ops_add)(a, dst, dst); +} + +void X(ops_madd2)(INT m, const opcnt *a, opcnt *dst) +{ + X(ops_madd)(m, a, dst, dst); +} + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/pickdim.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/pickdim.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw.h" + + +/* Given a solver which_dim, a vector sz, and whether or not the + transform is out-of-place, return the actual dimension index that + it corresponds to. The basic idea here is that we return the + which_dim'th valid dimension, starting from the end if + which_dim < 0. */ +static int really_pickdim(int which_dim, const tensor *sz, int oop, int *dp) +{ + int i; + int count_ok = 0; + if (which_dim > 0) { + for (i = 0; i < sz->rnk; ++i) { + if (oop || sz->dims[i].is == sz->dims[i].os) + if (++count_ok == which_dim) { + *dp = i; + return 1; + } + } + } + else if (which_dim < 0) { + for (i = sz->rnk - 1; i >= 0; --i) { + if (oop || sz->dims[i].is == sz->dims[i].os) + if (++count_ok == -which_dim) { + *dp = i; + return 1; + } + } + } + else { /* zero: pick the middle, if valid */ + i = (sz->rnk - 1) / 2; + if (i >= 0 && (oop || sz->dims[i].is == sz->dims[i].os)) { + *dp = i; + return 1; + } + } + return 0; +} + +/* Like really_pickdim, but only returns 1 if no previous "buddy" + which_dim in the buddies list would give the same dim. */ +int X(pickdim)(int which_dim, const int *buddies, int nbuddies, + const tensor *sz, int oop, int *dp) +{ + int i, d1; + + if (!really_pickdim(which_dim, sz, oop, dp)) + return 0; + + /* check whether some buddy solver would produce the same dim. + If so, consider this solver unapplicable and let the buddy + take care of it. The smallest-indexed buddy is applicable. */ + for (i = 0; i < nbuddies; ++i) { + if (buddies[i] == which_dim) + break; /* found self */ + if (really_pickdim(buddies[i], sz, oop, &d1) && *dp == d1) + return 0; /* found equivalent buddy */ + } + return 1; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/plan.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/plan.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +/* "Plan: To bother about the best method of accomplishing an + accidental result." (Ambrose Bierce, The Enlarged Devil's + Dictionary). */ + +plan *X(mkplan)(size_t size, const plan_adt *adt) +{ + plan *p = (plan *)MALLOC(size, PLANS); + + A(adt->destroy); + p->adt = adt; + X(ops_zero)(&p->ops); + p->pcost = 0.0; + p->wakefulness = SLEEPY; + p->could_prune_now_p = 0; + + return p; +} + +/* + * destroy a plan + */ +void X(plan_destroy_internal)(plan *ego) +{ + if (ego) { + A(ego->wakefulness == SLEEPY); + ego->adt->destroy(ego); + X(ifree)(ego); + } +} + +/* dummy destroy routine for plans with no local state */ +void X(plan_null_destroy)(plan *ego) +{ + UNUSED(ego); + /* nothing */ +} + +void X(plan_awake)(plan *ego, enum wakefulness wakefulness) +{ + if (ego) { + A(((wakefulness == SLEEPY) ^ (ego->wakefulness == SLEEPY))); + + ego->adt->awake(ego, wakefulness); + ego->wakefulness = wakefulness; + } +} + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/planner.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/planner.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1035 @@ +/* + * Copyright (c) 2000 Matteo Frigo + * Copyright (c) 2000 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw.h" +#include + +/* GNU Coding Standards, Sec. 5.2: "Please write the comments in a GNU + program in English, because English is the one language that nearly + all programmers in all countries can read." + + ingemisco tanquam reus + culpa rubet vultus meus + supplicanti parce [rms] +*/ + +#define VALIDP(solution) ((solution)->flags.hash_info & H_VALID) +#define LIVEP(solution) ((solution)->flags.hash_info & H_LIVE) +#define SLVNDX(solution) ((solution)->flags.slvndx) +#define BLISS(flags) (((flags).hash_info) & BLESSING) +#define INFEASIBLE_SLVNDX ((1U<timelimit_impatience == 0); + return (LEQ(a->u, b->u) && LEQ(b->l, a->l)); + } else { + return (LEQ(a->l, b->l) + && a->timelimit_impatience <= b->timelimit_impatience); + } +} + +static unsigned addmod(unsigned a, unsigned b, unsigned p) +{ + /* gcc-2.95/sparc produces incorrect code for the fast version below. */ +#if defined(__sparc__) && defined(__GNUC__) + /* slow version */ + return (a + b) % p; +#else + /* faster version */ + unsigned c = a + b; + return c >= p ? c - p : c; +#endif +} + +/* + slvdesc management: +*/ +static void sgrow(planner *ego) +{ + unsigned osiz = ego->slvdescsiz, nsiz = 1 + osiz + osiz / 4; + slvdesc *ntab = (slvdesc *)MALLOC(nsiz * sizeof(slvdesc), SLVDESCS); + slvdesc *otab = ego->slvdescs; + unsigned i; + + ego->slvdescs = ntab; + ego->slvdescsiz = nsiz; + for (i = 0; i < osiz; ++i) + ntab[i] = otab[i]; + X(ifree0)(otab); +} + +static void register_solver(planner *ego, solver *s) +{ + slvdesc *n; + int kind; + + if (s) { /* add s to solver list */ + X(solver_use)(s); + + A(ego->nslvdesc < INFEASIBLE_SLVNDX); + if (ego->nslvdesc >= ego->slvdescsiz) + sgrow(ego); + + n = ego->slvdescs + ego->nslvdesc; + + n->slv = s; + n->reg_nam = ego->cur_reg_nam; + n->reg_id = ego->cur_reg_id++; + + A(strlen(n->reg_nam) < MAXNAM); + n->nam_hash = X(hash)(n->reg_nam); + + kind = s->adt->problem_kind; + n->next_for_same_problem_kind = ego->slvdescs_for_problem_kind[kind]; + ego->slvdescs_for_problem_kind[kind] = ego->nslvdesc; + + ego->nslvdesc++; + } +} + +static unsigned slookup(planner *ego, char *nam, int id) +{ + unsigned h = X(hash)(nam); /* used to avoid strcmp in the common case */ + FORALL_SOLVERS(ego, s, sp, { + UNUSED(s); + if (sp->reg_id == id && sp->nam_hash == h + && !strcmp(sp->reg_nam, nam)) + return sp - ego->slvdescs; + }); + return INFEASIBLE_SLVNDX; +} + +/* Compute a MD5 hash of the configuration of the planner. + We store it into the wisdom file to make absolutely sure that + we are reading wisdom that is applicable */ +static void signature_of_configuration(md5 *m, planner *ego) +{ + X(md5begin)(m); + X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */ + FORALL_SOLVERS(ego, s, sp, { + UNUSED(s); + X(md5int)(m, sp->reg_id); + X(md5puts)(m, sp->reg_nam); + }); + X(md5end)(m); +} + +/* + md5-related stuff: +*/ + +/* first hash function */ +static unsigned h1(const hashtab *ht, const md5sig s) +{ + unsigned h = s[0] % ht->hashsiz; + A(h == (s[0] % ht->hashsiz)); + return h; +} + +/* second hash function (for double hashing) */ +static unsigned h2(const hashtab *ht, const md5sig s) +{ + unsigned h = 1U + s[1] % (ht->hashsiz - 1); + A(h == (1U + s[1] % (ht->hashsiz - 1))); + return h; +} + +static void md5hash(md5 *m, const problem *p, const planner *plnr) +{ + X(md5begin)(m); + X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */ + X(md5int)(m, plnr->nthr); + p->adt->hash(p, m); + X(md5end)(m); +} + +static int md5eq(const md5sig a, const md5sig b) +{ + return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3]; +} + +static void sigcpy(const md5sig a, md5sig b) +{ + b[0] = a[0]; b[1] = a[1]; b[2] = a[2]; b[3] = a[3]; +} + +/* + memoization routines : +*/ + +/* + liber scriptus proferetur + in quo totum continetur + unde mundus iudicetur +*/ +struct solution_s { + md5sig s; + flags_t flags; +}; + +static solution *htab_lookup(hashtab *ht, const md5sig s, + const flags_t *flagsp) +{ + unsigned g, h = h1(ht, s), d = h2(ht, s); + solution *best = 0; + + ++ht->lookup; + + /* search all entries that match; select the one with + the lowest flags.u */ + /* This loop may potentially traverse the whole table, since at + least one element is guaranteed to be !LIVEP, but all elements + may be VALIDP. Hence, we stop after at the first invalid + element or after traversing the whole table. */ + g = h; + do { + solution *l = ht->solutions + g; + ++ht->lookup_iter; + if (VALIDP(l)) { + if (LIVEP(l) + && md5eq(s, l->s) + && subsumes(&l->flags, SLVNDX(l), flagsp) ) { + if (!best || LEQ(l->flags.u, best->flags.u)) + best = l; + } + } else + break; + + g = addmod(g, d, ht->hashsiz); + } while (g != h); + + if (best) + ++ht->succ_lookup; + return best; +} + +static solution *hlookup(planner *ego, const md5sig s, + const flags_t *flagsp) +{ + solution *sol = htab_lookup(&ego->htab_blessed, s, flagsp); + if (!sol) sol = htab_lookup(&ego->htab_unblessed, s, flagsp); + return sol; +} + +static void fill_slot(hashtab *ht, const md5sig s, const flags_t *flagsp, + unsigned slvndx, solution *slot) +{ + ++ht->insert; + ++ht->nelem; + A(!LIVEP(slot)); + slot->flags.u = flagsp->u; + slot->flags.l = flagsp->l; + slot->flags.timelimit_impatience = flagsp->timelimit_impatience; + slot->flags.hash_info |= H_VALID | H_LIVE; + SLVNDX(slot) = slvndx; + + /* keep this check enabled in case we add so many solvers + that the bitfield overflows */ + CK(SLVNDX(slot) == slvndx); + sigcpy(s, slot->s); +} + +static void kill_slot(hashtab *ht, solution *slot) +{ + A(LIVEP(slot)); /* ==> */ A(VALIDP(slot)); + + --ht->nelem; + slot->flags.hash_info = H_VALID; +} + +static void hinsert0(hashtab *ht, const md5sig s, const flags_t *flagsp, + unsigned slvndx) +{ + solution *l; + unsigned g, h = h1(ht, s), d = h2(ht, s); + + ++ht->insert_unknown; + + /* search for nonfull slot */ + for (g = h; ; g = addmod(g, d, ht->hashsiz)) { + ++ht->insert_iter; + l = ht->solutions + g; + if (!LIVEP(l)) break; + A((g + d) % ht->hashsiz != h); + } + + fill_slot(ht, s, flagsp, slvndx, l); +} + +static void rehash(hashtab *ht, unsigned nsiz) +{ + unsigned osiz = ht->hashsiz, h; + solution *osol = ht->solutions, *nsol; + + nsiz = (unsigned)X(next_prime)((INT)nsiz); + nsol = (solution *)MALLOC(nsiz * sizeof(solution), HASHT); + ++ht->nrehash; + + /* init new table */ + for (h = 0; h < nsiz; ++h) + nsol[h].flags.hash_info = 0; + + /* install new table */ + ht->hashsiz = nsiz; + ht->solutions = nsol; + ht->nelem = 0; + + /* copy table */ + for (h = 0; h < osiz; ++h) { + solution *l = osol + h; + if (LIVEP(l)) + hinsert0(ht, l->s, &l->flags, SLVNDX(l)); + } + + X(ifree0)(osol); +} + +static unsigned minsz(unsigned nelem) +{ + return 1U + nelem + nelem / 8U; +} + +static unsigned nextsz(unsigned nelem) +{ + return minsz(minsz(nelem)); +} + +static void hgrow(hashtab *ht) +{ + unsigned nelem = ht->nelem; + if (minsz(nelem) >= ht->hashsiz) + rehash(ht, nextsz(nelem)); +} + +#if 0 +/* shrink the hash table, never used */ +static void hshrink(hashtab *ht) +{ + unsigned nelem = ht->nelem; + /* always rehash after deletions */ + rehash(ht, nextsz(nelem)); +} +#endif + +static void htab_insert(hashtab *ht, const md5sig s, const flags_t *flagsp, + unsigned slvndx) +{ + unsigned g, h = h1(ht, s), d = h2(ht, s); + solution *first = 0; + + /* Remove all entries that are subsumed by the new one. */ + /* This loop may potentially traverse the whole table, since at + least one element is guaranteed to be !LIVEP, but all elements + may be VALIDP. Hence, we stop after at the first invalid + element or after traversing the whole table. */ + g = h; + do { + solution *l = ht->solutions + g; + ++ht->insert_iter; + if (VALIDP(l)) { + if (LIVEP(l) && md5eq(s, l->s)) { + if (subsumes(flagsp, slvndx, &l->flags)) { + if (!first) first = l; + kill_slot(ht, l); + } else { + /* It is an error to insert an element that + is subsumed by an existing entry. */ + A(!subsumes(&l->flags, SLVNDX(l), flagsp)); + } + } + } else + break; + + g = addmod(g, d, ht->hashsiz); + } while (g != h); + + if (first) { + /* overwrite FIRST */ + fill_slot(ht, s, flagsp, slvndx, first); + } else { + /* create a new entry */ + hgrow(ht); + hinsert0(ht, s, flagsp, slvndx); + } +} + +static void hinsert(planner *ego, const md5sig s, const flags_t *flagsp, + unsigned slvndx) +{ + htab_insert(BLISS(*flagsp) ? &ego->htab_blessed : &ego->htab_unblessed, + s, flagsp, slvndx ); +} + + +static void invoke_hook(planner *ego, plan *pln, const problem *p, + int optimalp) +{ + if (ego->hook) + ego->hook(ego, pln, p, optimalp); +} + +#ifdef FFTW_RANDOM_ESTIMATOR +/* a "random" estimate, used for debugging to generate "random" + plans, albeit from a deterministic seed. */ + +unsigned X(random_estimate_seed) = 0; + +static double random_estimate(const planner *ego, const plan *pln, + const problem *p) +{ + md5 m; + X(md5begin)(&m); + X(md5unsigned)(&m, X(random_estimate_seed)); + X(md5int)(&m, ego->nthr); + p->adt->hash(p, &m); + X(md5putb)(&m, &pln->ops, sizeof(pln->ops)); + X(md5putb)(&m, &pln->adt, sizeof(pln->adt)); + X(md5end)(&m); + return ego->cost_hook ? ego->cost_hook(p, m.s[0], COST_MAX) : m.s[0]; +} + +#endif + +double X(iestimate_cost)(const planner *ego, const plan *pln, const problem *p) +{ + double cost = + + pln->ops.add + + pln->ops.mul + +#if HAVE_FMA + + pln->ops.fma +#else + + 2 * pln->ops.fma +#endif + + + pln->ops.other; + if (ego->cost_hook) + cost = ego->cost_hook(p, cost, COST_MAX); + return cost; +} + +static void evaluate_plan(planner *ego, plan *pln, const problem *p) +{ + if (ESTIMATEP(ego) || !BELIEVE_PCOSTP(ego) || pln->pcost == 0.0) { + ego->nplan++; + + if (ESTIMATEP(ego)) { + estimate: + /* heuristic */ +#ifdef FFTW_RANDOM_ESTIMATOR + pln->pcost = random_estimate(ego, pln, p); + ego->epcost += X(iestimate_cost)(ego, pln, p); +#else + pln->pcost = X(iestimate_cost)(ego, pln, p); + ego->epcost += pln->pcost; +#endif + } else { + double t = X(measure_execution_time)(ego, pln, p); + + if (t < 0) { /* unavailable cycle counter */ + /* Real programmers can write FORTRAN in any language */ + goto estimate; + } + + pln->pcost = t; + ego->pcost += t; + ego->need_timeout_check = 1; + } + } + + invoke_hook(ego, pln, p, 0); +} + +/* maintain dynamic scoping of flags, nthr: */ +static plan *invoke_solver(planner *ego, const problem *p, solver *s, + const flags_t *nflags) +{ + flags_t flags = ego->flags; + int nthr = ego->nthr; + plan *pln; + ego->flags = *nflags; + PLNR_TIMELIMIT_IMPATIENCE(ego) = 0; + A(p->adt->problem_kind == s->adt->problem_kind); + pln = s->adt->mkplan(s, p, ego); + ego->nthr = nthr; + ego->flags = flags; + return pln; +} + +/* maintain the invariant TIMED_OUT ==> NEED_TIMEOUT_CHECK */ +static int timeout_p(planner *ego, const problem *p) +{ + /* do not timeout when estimating. First, the estimator is the + planner of last resort. Second, calling X(elapsed_since)() is + slower than estimating */ + if (!ESTIMATEP(ego)) { + /* do not assume that X(elapsed_since)() is monotonic */ + if (ego->timed_out) { + A(ego->need_timeout_check); + return 1; + } + + if (ego->timelimit >= 0 && + X(elapsed_since)(ego, p, ego->start_time) >= ego->timelimit) { + ego->timed_out = 1; + ego->need_timeout_check = 1; + return 1; + } + } + + A(!ego->timed_out); + ego->need_timeout_check = 0; + return 0; +} + +static plan *search0(planner *ego, const problem *p, unsigned *slvndx, + const flags_t *flagsp) +{ + plan *best = 0; + int best_not_yet_timed = 1; + + /* Do not start a search if the planner timed out. This check is + necessary, lest the relaxation mechanism kick in */ + if (timeout_p(ego, p)) + return 0; + + FORALL_SOLVERS_OF_KIND(p->adt->problem_kind, ego, s, sp, { + plan *pln; + + pln = invoke_solver(ego, p, s, flagsp); + + if (ego->need_timeout_check) + if (timeout_p(ego, p)) { + X(plan_destroy_internal)(pln); + X(plan_destroy_internal)(best); + return 0; + } + + if (pln) { + /* read COULD_PRUNE_NOW_P because PLN may be destroyed + before we use COULD_PRUNE_NOW_P */ + int could_prune_now_p = pln->could_prune_now_p; + + if (best) { + if (best_not_yet_timed) { + evaluate_plan(ego, best, p); + best_not_yet_timed = 0; + } + evaluate_plan(ego, pln, p); + if (pln->pcost < best->pcost) { + X(plan_destroy_internal)(best); + best = pln; + *slvndx = sp - ego->slvdescs; + } else { + X(plan_destroy_internal)(pln); + } + } else { + best = pln; + *slvndx = sp - ego->slvdescs; + } + + if (ALLOW_PRUNINGP(ego) && could_prune_now_p) + break; + } + }); + + return best; +} + +static plan *search(planner *ego, const problem *p, unsigned *slvndx, + flags_t *flagsp) +{ + plan *pln = 0; + unsigned i; + + /* relax impatience in this order: */ + static const unsigned relax_tab[] = { + 0, /* relax nothing */ + NO_VRECURSE, + NO_FIXED_RADIX_LARGE_N, + NO_SLOW, + NO_UGLY + }; + + unsigned l_orig = flagsp->l; + unsigned x = flagsp->u; + + /* guaranteed to be different from X */ + unsigned last_x = ~x; + + for (i = 0; i < sizeof(relax_tab) / sizeof(relax_tab[0]); ++i) { + if (LEQ(l_orig, x & ~relax_tab[i])) + x = x & ~relax_tab[i]; + + if (x != last_x) { + last_x = x; + flagsp->l = x; + pln = search0(ego, p, slvndx, flagsp); + if (pln) break; + } + } + + if (!pln) { + /* search [L_ORIG, U] */ + if (l_orig != last_x) { + last_x = l_orig; + flagsp->l = l_orig; + pln = search0(ego, p, slvndx, flagsp); + } + } + + return pln; +} + +#define CHECK_FOR_BOGOSITY \ + if ((ego->bogosity_hook ? \ + (ego->wisdom_state = ego->bogosity_hook(ego->wisdom_state, p)) \ + : ego->wisdom_state) == WISDOM_IS_BOGUS) \ + goto wisdom_is_bogus; + +static plan *mkplan(planner *ego, const problem *p) +{ + plan *pln; + md5 m; + unsigned slvndx; + flags_t flags_of_solution; + solution *sol; + solver *s; + + ASSERT_ALIGNED_DOUBLE; + A(LEQ(PLNR_L(ego), PLNR_U(ego))); + + if (ESTIMATEP(ego)) + PLNR_TIMELIMIT_IMPATIENCE(ego) = 0; /* canonical form */ + + +#ifdef FFTW_DEBUG + check(&ego->htab_blessed); + check(&ego->htab_unblessed); +#endif + + pln = 0; + + CHECK_FOR_BOGOSITY; + + ego->timed_out = 0; + + ++ego->nprob; + md5hash(&m, p, ego); + + flags_of_solution = ego->flags; + + if (ego->wisdom_state != WISDOM_IGNORE_ALL) { + if ((sol = hlookup(ego, m.s, &flags_of_solution))) { + /* wisdom is acceptable */ + wisdom_state_t owisdom_state = ego->wisdom_state; + + /* this hook is mainly for MPI, to make sure that + wisdom is in sync across all processes for MPI problems */ + if (ego->wisdom_ok_hook && !ego->wisdom_ok_hook(p, sol->flags)) + goto do_search; /* ignore not-ok wisdom */ + + slvndx = SLVNDX(sol); + + if (slvndx == INFEASIBLE_SLVNDX) { + if (ego->wisdom_state == WISDOM_IGNORE_INFEASIBLE) + goto do_search; + else + return 0; /* known to be infeasible */ + } + + flags_of_solution = sol->flags; + + /* inherit blessing either from wisdom + or from the planner */ + flags_of_solution.hash_info |= BLISS(ego->flags); + + ego->wisdom_state = WISDOM_ONLY; + + s = ego->slvdescs[slvndx].slv; + if (p->adt->problem_kind != s->adt->problem_kind) + goto wisdom_is_bogus; + + pln = invoke_solver(ego, p, s, &flags_of_solution); + + CHECK_FOR_BOGOSITY; /* catch error in child solvers */ + + sol = 0; /* Paranoia: SOL may be dangling after + invoke_solver(); make sure we don't accidentally + reuse it. */ + + if (!pln) + goto wisdom_is_bogus; + + ego->wisdom_state = owisdom_state; + + goto skip_search; + } + else if (ego->nowisdom_hook) /* for MPI, make sure lack of wisdom */ + ego->nowisdom_hook(p); /* is in sync across all processes */ + } + + do_search: + /* cannot search in WISDOM_ONLY mode */ + if (ego->wisdom_state == WISDOM_ONLY) + goto wisdom_is_bogus; + + flags_of_solution = ego->flags; + pln = search(ego, p, &slvndx, &flags_of_solution); + CHECK_FOR_BOGOSITY; /* catch error in child solvers */ + + if (ego->timed_out) { + A(!pln); + if (PLNR_TIMELIMIT_IMPATIENCE(ego) != 0) { + /* record (below) that this plan has failed because of + timeout */ + flags_of_solution.hash_info |= BLESSING; + } else { + /* this is not the top-level problem or timeout is not + active: record no wisdom. */ + return 0; + } + } else { + /* canonicalize to infinite timeout */ + flags_of_solution.timelimit_impatience = 0; + } + + skip_search: + if (ego->wisdom_state == WISDOM_NORMAL || + ego->wisdom_state == WISDOM_ONLY) { + if (pln) { + hinsert(ego, m.s, &flags_of_solution, slvndx); + invoke_hook(ego, pln, p, 1); + } else { + hinsert(ego, m.s, &flags_of_solution, INFEASIBLE_SLVNDX); + } + } + + return pln; + + wisdom_is_bogus: + X(plan_destroy_internal)(pln); + ego->wisdom_state = WISDOM_IS_BOGUS; + return 0; +} + +static void htab_destroy(hashtab *ht) +{ + X(ifree)(ht->solutions); + ht->solutions = 0; + ht->nelem = 0U; +} + +static void mkhashtab(hashtab *ht) +{ + ht->nrehash = 0; + ht->succ_lookup = ht->lookup = ht->lookup_iter = 0; + ht->insert = ht->insert_iter = ht->insert_unknown = 0; + + ht->solutions = 0; + ht->hashsiz = ht->nelem = 0U; + hgrow(ht); /* so that hashsiz > 0 */ +} + +/* destroy hash table entries. If FORGET_EVERYTHING, destroy the whole + table. If FORGET_ACCURSED, then destroy entries that are not blessed. */ +static void forget(planner *ego, amnesia a) +{ + switch (a) { + case FORGET_EVERYTHING: + htab_destroy(&ego->htab_blessed); + mkhashtab(&ego->htab_blessed); + /* fall through */ + case FORGET_ACCURSED: + htab_destroy(&ego->htab_unblessed); + mkhashtab(&ego->htab_unblessed); + break; + default: + break; + } +} + +/* FIXME: what sort of version information should we write? */ +#define WISDOM_PREAMBLE PACKAGE "-" VERSION " " STRINGIZE(X(wisdom)) +static const char stimeout[] = "TIMEOUT"; + +/* tantus labor non sit cassus */ +static void exprt(planner *ego, printer *p) +{ + unsigned h; + hashtab *ht = &ego->htab_blessed; + md5 m; + + signature_of_configuration(&m, ego); + + p->print(p, + "(" WISDOM_PREAMBLE " #x%M #x%M #x%M #x%M\n", + m.s[0], m.s[1], m.s[2], m.s[3]); + + for (h = 0; h < ht->hashsiz; ++h) { + solution *l = ht->solutions + h; + if (LIVEP(l)) { + const char *reg_nam; + int reg_id; + + if (SLVNDX(l) == INFEASIBLE_SLVNDX) { + reg_nam = stimeout; + reg_id = 0; + } else { + slvdesc *sp = ego->slvdescs + SLVNDX(l); + reg_nam = sp->reg_nam; + reg_id = sp->reg_id; + } + + /* qui salvandos salvas gratis + salva me fons pietatis */ + p->print(p, " (%s %d #x%x #x%x #x%x #x%M #x%M #x%M #x%M)\n", + reg_nam, reg_id, + l->flags.l, l->flags.u, l->flags.timelimit_impatience, + l->s[0], l->s[1], l->s[2], l->s[3]); + } + } + p->print(p, ")\n"); +} + +/* mors stupebit et natura + cum resurget creatura */ +static int imprt(planner *ego, scanner *sc) +{ + char buf[MAXNAM + 1]; + md5uint sig[4]; + unsigned l, u, timelimit_impatience; + flags_t flags; + int reg_id; + unsigned slvndx; + hashtab *ht = &ego->htab_blessed; + hashtab old; + md5 m; + + if (!sc->scan(sc, + "(" WISDOM_PREAMBLE " #x%M #x%M #x%M #x%M\n", + sig + 0, sig + 1, sig + 2, sig + 3)) + return 0; /* don't need to restore hashtable */ + + signature_of_configuration(&m, ego); + if (m.s[0] != sig[0] || m.s[1] != sig[1] || + m.s[2] != sig[2] || m.s[3] != sig[3]) { + /* invalid configuration */ + return 0; + } + + /* make a backup copy of the hash table (cache the hash) */ + { + unsigned h, hsiz = ht->hashsiz; + old = *ht; + old.solutions = (solution *)MALLOC(hsiz * sizeof(solution), HASHT); + for (h = 0; h < hsiz; ++h) + old.solutions[h] = ht->solutions[h]; + } + + while (1) { + if (sc->scan(sc, ")")) + break; + + /* qua resurget ex favilla */ + if (!sc->scan(sc, "(%*s %d #x%x #x%x #x%x #x%M #x%M #x%M #x%M)", + MAXNAM, buf, ®_id, &l, &u, &timelimit_impatience, + sig + 0, sig + 1, sig + 2, sig + 3)) + goto bad; + + if (!strcmp(buf, stimeout) && reg_id == 0) { + slvndx = INFEASIBLE_SLVNDX; + } else { + if (timelimit_impatience != 0) + goto bad; + + slvndx = slookup(ego, buf, reg_id); + if (slvndx == INFEASIBLE_SLVNDX) + goto bad; + } + + /* inter oves locum praesta */ + flags.l = l; + flags.u = u; + flags.timelimit_impatience = timelimit_impatience; + flags.hash_info = BLESSING; + + CK(flags.l == l); + CK(flags.u == u); + CK(flags.timelimit_impatience == timelimit_impatience); + + if (!hlookup(ego, sig, &flags)) + hinsert(ego, sig, &flags, slvndx); + } + + X(ifree0)(old.solutions); + return 1; + + bad: + /* ``The wisdom of FFTW must be above suspicion.'' */ + X(ifree0)(ht->solutions); + *ht = old; + return 0; +} + +/* + * create a planner + */ +planner *X(mkplanner)(void) +{ + int i; + + static const planner_adt padt = { + register_solver, mkplan, forget, exprt, imprt + }; + + planner *p = (planner *) MALLOC(sizeof(planner), PLANNERS); + + p->adt = &padt; + p->nplan = p->nprob = 0; + p->pcost = p->epcost = 0.0; + p->hook = 0; + p->cost_hook = 0; + p->wisdom_ok_hook = 0; + p->nowisdom_hook = 0; + p->bogosity_hook = 0; + p->cur_reg_nam = 0; + p->wisdom_state = WISDOM_NORMAL; + + p->slvdescs = 0; + p->nslvdesc = p->slvdescsiz = 0; + + p->flags.l = 0; + p->flags.u = 0; + p->flags.timelimit_impatience = 0; + p->flags.hash_info = 0; + p->nthr = 1; + p->need_timeout_check = 1; + p->timelimit = -1; + + mkhashtab(&p->htab_blessed); + mkhashtab(&p->htab_unblessed); + + for (i = 0; i < PROBLEM_LAST; ++i) + p->slvdescs_for_problem_kind[i] = -1; + + return p; +} + +void X(planner_destroy)(planner *ego) +{ + /* destroy hash table */ + htab_destroy(&ego->htab_blessed); + htab_destroy(&ego->htab_unblessed); + + /* destroy solvdesc table */ + FORALL_SOLVERS(ego, s, sp, { + UNUSED(sp); + X(solver_destroy)(s); + }); + + X(ifree0)(ego->slvdescs); + X(ifree)(ego); /* dona eis requiem */ +} + +plan *X(mkplan_d)(planner *ego, problem *p) +{ + plan *pln = ego->adt->mkplan(ego, p); + X(problem_destroy)(p); + return pln; +} + +/* like X(mkplan_d), but sets/resets flags as well */ +plan *X(mkplan_f_d)(planner *ego, problem *p, + unsigned l_set, unsigned u_set, unsigned u_reset) +{ + flags_t oflags = ego->flags; + plan *pln; + + PLNR_U(ego) &= ~u_reset; + PLNR_L(ego) &= ~u_reset; + PLNR_L(ego) |= l_set; + PLNR_U(ego) |= u_set | l_set; + pln = X(mkplan_d)(ego, p); + ego->flags = oflags; + return pln; +} + +/* + * Debugging code: + */ +#ifdef FFTW_DEBUG +static void check(hashtab *ht) +{ + unsigned live = 0; + unsigned i; + + A(ht->nelem < ht->hashsiz); + + for (i = 0; i < ht->hashsiz; ++i) { + solution *l = ht->solutions + i; + if (LIVEP(l)) + ++live; + } + + A(ht->nelem == live); + + for (i = 0; i < ht->hashsiz; ++i) { + solution *l1 = ht->solutions + i; + int foundit = 0; + if (LIVEP(l1)) { + unsigned g, h = h1(ht, l1->s), d = h2(ht, l1->s); + + g = h; + do { + solution *l = ht->solutions + g; + if (VALIDP(l)) { + if (l1 == l) + foundit = 1; + else if (LIVEP(l) && md5eq(l1->s, l->s)) { + A(!subsumes(&l->flags, SLVNDX(l), &l1->flags)); + A(!subsumes(&l1->flags, SLVNDX(l1), &l->flags)); + } + } else + break; + g = addmod(g, d, ht->hashsiz); + } while (g != h); + + A(foundit); + } + } +} +#endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/primes.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/primes.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +/***************************************************************************/ + +/* Rader's algorithm requires lots of modular arithmetic, and if we + aren't careful we can have errors due to integer overflows. */ + +/* Compute (x * y) mod p, but watch out for integer overflows; we must + have 0 <= {x, y} < p. + + If overflow is common, this routine is somewhat slower than + e.g. using 'long long' arithmetic. However, it has the advantage + of working when INT is 64 bits, and is also faster when overflow is + rare. FFTW calls this via the MULMOD macro, which further + optimizes for the case of small integers. +*/ + +#define ADD_MOD(x, y, p) ((x) >= (p) - (y)) ? ((x) + ((y) - (p))) : ((x) + (y)) + +INT X(safe_mulmod)(INT x, INT y, INT p) +{ + INT r; + + if (y > x) + return X(safe_mulmod)(y, x, p); + + A(0 <= y && x < p); + + r = 0; + while (y) { + r = ADD_MOD(r, x*(y&1), p); y >>= 1; + x = ADD_MOD(x, x, p); + } + + return r; +} + +/***************************************************************************/ + +/* Compute n^m mod p, where m >= 0 and p > 0. If we really cared, we + could make this tail-recursive. */ + +INT X(power_mod)(INT n, INT m, INT p) +{ + A(p > 0); + if (m == 0) + return 1; + else if (m % 2 == 0) { + INT x = X(power_mod)(n, m / 2, p); + return MULMOD(x, x, p); + } + else + return MULMOD(n, X(power_mod)(n, m - 1, p), p); +} + +/* the following two routines were contributed by Greg Dionne. */ +static INT get_prime_factors(INT n, INT *primef) +{ + INT i; + INT size = 0; + + A(n % 2 == 0); /* this routine is designed only for even n */ + primef[size++] = (INT)2; + do + n >>= 1; + while ((n & 1) == 0); + + if (n == 1) + return size; + + for (i = 3; i * i <= n; i += 2) + if (!(n % i)) { + primef[size++] = i; + do + n /= i; + while (!(n % i)); + } + if (n == 1) + return size; + primef[size++] = n; + return size; +} + +INT X(find_generator)(INT p) +{ + INT n, i, size; + INT primef[16]; /* smallest number = 32589158477190044730 > 2^64 */ + INT pm1 = p - 1; + + if (p == 2) + return 1; + + size = get_prime_factors(pm1, primef); + n = 2; + for (i = 0; i < size; i++) + if (X(power_mod)(n, pm1 / primef[i], p) == 1) { + i = -1; + n++; + } + return n; +} + +/* Return first prime divisor of n (It would be at best slightly faster to + search a static table of primes; there are 6542 primes < 2^16.) */ +INT X(first_divisor)(INT n) +{ + INT i; + if (n <= 1) + return n; + if (n % 2 == 0) + return 2; + for (i = 3; i*i <= n; i += 2) + if (n % i == 0) + return i; + return n; +} + +int X(is_prime)(INT n) +{ + return(n > 1 && X(first_divisor)(n) == n); +} + +INT X(next_prime)(INT n) +{ + while (!X(is_prime)(n)) ++n; + return n; +} + +int X(factors_into)(INT n, const INT *primes) +{ + for (; *primes != 0; ++primes) + while ((n % *primes) == 0) + n /= *primes; + return (n == 1); +} + +/* integer square root. Return floor(sqrt(N)) */ +INT X(isqrt)(INT n) +{ + INT guess, iguess; + + A(n >= 0); + if (n == 0) return 0; + + guess = n; iguess = 1; + + do { + guess = (guess + iguess) / 2; + iguess = n / guess; + } while (guess > iguess); + + return guess; +} + +static INT isqrt_maybe(INT n) +{ + INT guess = X(isqrt)(n); + return guess * guess == n ? guess : 0; +} + +#define divides(a, b) (((b) % (a)) == 0) +INT X(choose_radix)(INT r, INT n) +{ + if (r > 0) { + if (divides(r, n)) return r; + return 0; + } else if (r == 0) { + return X(first_divisor)(n); + } else { + /* r is negative. If n = (-r) * q^2, take q as the radix */ + r = 0 - r; + return (n > r && divides(r, n)) ? isqrt_maybe(n / r) : 0; + } +} + +/* return A mod N, works for all A including A < 0 */ +INT X(modulo)(INT a, INT n) +{ + A(n > 0); + if (a >= 0) + return a % n; + else + return (n - 1) - ((-(a + (INT)1)) % n); +} + +/* TRUE if N factors into small primes */ +int X(factors_into_small_primes)(INT n) +{ + static const INT primes[] = { 2, 3, 5, 0 }; + return X(factors_into)(n, primes); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/print.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/print.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" +#include +#include +#include + +#define BSZ 64 + +static void myputs(printer *p, const char *s) +{ + char c; + while ((c = *s++)) + p->putchr(p, c); +} + +static void newline(printer *p) +{ + int i; + + p->putchr(p, '\n'); + for (i = 0; i < p->indent; ++i) + p->putchr(p, ' '); +} + +static const char *digits = "0123456789abcdef"; + +static void putint(printer *p, INT i) +{ + char buf[BSZ]; + char *f = buf; + + if (i < 0) { + p->putchr(p, '-'); + i = -i; + } + + do { + *f++ = digits[i % 10]; + i /= 10; + } while (i); + + do { + p->putchr(p, *--f); + } while (f != buf); +} + +static void putulong(printer *p, unsigned long i, int base, int width) +{ + char buf[BSZ]; + char *f = buf; + + do { + *f++ = digits[i % base]; + i /= base; + } while (i); + + while (width > f - buf) { + p->putchr(p, '0'); + --width; + } + + do { + p->putchr(p, *--f); + } while (f != buf); +} + +static void vprint(printer *p, const char *format, va_list ap) +{ + const char *s = format; + char c; + INT ival; + + while ((c = *s++)) { + switch (c) { + case '%': + switch ((c = *s++)) { + case 'M': { + /* md5 value */ + md5uint x = va_arg(ap, md5uint); + putulong(p, (unsigned long)(0xffffffffUL & x), + 16, 8); + break; + } + case 'c': { + int x = va_arg(ap, int); + p->putchr(p, x); + break; + } + case 's': { + char *x = va_arg(ap, char *); + if (x) + myputs(p, x); + else + goto putnull; + break; + } + case 'd': { + int x = va_arg(ap, int); + ival = (INT)x; + goto putival; + } + case 'D': { + ival = va_arg(ap, INT); + goto putival; + } + case 'v': { + /* print optional vector length */ + ival = va_arg(ap, INT); + if (ival > 1) { + myputs(p, "-x"); + goto putival; + } + break; + } + case 'o': { + /* integer option. Usage: %oNAME= */ + ival = va_arg(ap, INT); + if (ival) + p->putchr(p, '/'); + while ((c = *s++) != '=') + if (ival) + p->putchr(p, c); + if (ival) { + p->putchr(p, '='); + goto putival; + } + break; + } + case 'u': { + unsigned x = va_arg(ap, unsigned); + putulong(p, (unsigned long)x, 10, 0); + break; + } + case 'x': { + unsigned x = va_arg(ap, unsigned); + putulong(p, (unsigned long)x, 16, 0); + break; + } + case '(': { + /* newline, augment indent level */ + p->indent += p->indent_incr; + newline(p); + break; + } + case ')': { + /* decrement indent level */ + p->indent -= p->indent_incr; + break; + } + case 'p': { /* note difference from C's %p */ + /* print plan */ + plan *x = va_arg(ap, plan *); + if (x) + x->adt->print(x, p); + else + goto putnull; + break; + } + case 'P': { + /* print problem */ + problem *x = va_arg(ap, problem *); + if (x) + x->adt->print(x, p); + else + goto putnull; + break; + } + case 'T': { + /* print tensor */ + tensor *x = va_arg(ap, tensor *); + if (x) + X(tensor_print)(x, p); + else + goto putnull; + break; + } + default: + A(0 /* unknown format */); + break; + + putnull: + myputs(p, "(null)"); + break; + + putival: + putint(p, ival); + break; + } + break; + default: + p->putchr(p, c); + break; + } + } +} + +static void print(printer *p, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vprint(p, format, ap); + va_end(ap); +} + +printer *X(mkprinter)(size_t size, + void (*putchr)(printer *p, char c), + void (*cleanup)(printer *p)) +{ + printer *s = (printer *)MALLOC(size, OTHER); + s->print = print; + s->vprint = vprint; + s->putchr = putchr; + s->cleanup = cleanup; + s->indent = 0; + s->indent_incr = 2; + return s; +} + +void X(printer_destroy)(printer *p) +{ + if (p->cleanup) + p->cleanup(p); + X(ifree)(p); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/problem.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/problem.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +/* constructor */ +problem *X(mkproblem)(size_t sz, const problem_adt *adt) +{ + problem *p = (problem *)MALLOC(sz, PROBLEMS); + + p->adt = adt; + return p; +} + +/* destructor */ +void X(problem_destroy)(problem *ego) +{ + if (ego) + ego->adt->destroy(ego); +} + +/* management of unsolvable problems */ +static void unsolvable_destroy(problem *ego) +{ + UNUSED(ego); +} + +static void unsolvable_hash(const problem *p, md5 *m) +{ + UNUSED(p); + X(md5puts)(m, "unsolvable"); +} + +static void unsolvable_print(const problem *ego, printer *p) +{ + UNUSED(ego); + p->print(p, "(unsolvable)"); +} + +static void unsolvable_zero(const problem *ego) +{ + UNUSED(ego); +} + +static const problem_adt padt = +{ + PROBLEM_UNSOLVABLE, + unsolvable_hash, + unsolvable_zero, + unsolvable_print, + unsolvable_destroy +}; + +/* there is no point in malloc'ing this one */ +static problem the_unsolvable_problem = { &padt }; + +problem *X(mkproblem_unsolvable)(void) +{ + return &the_unsolvable_problem; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/rader.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/rader.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw.h" + +/* + common routines for Rader solvers +*/ + + +/* shared twiddle and omega lists, keyed by two/three integers. */ +struct rader_tls { + INT k1, k2, k3; + R *W; + int refcnt; + rader_tl *cdr; +}; + +void X(rader_tl_insert)(INT k1, INT k2, INT k3, R *W, rader_tl **tl) +{ + rader_tl *t = (rader_tl *) MALLOC(sizeof(rader_tl), TWIDDLES); + t->k1 = k1; t->k2 = k2; t->k3 = k3; t->W = W; + t->refcnt = 1; t->cdr = *tl; *tl = t; +} + +R *X(rader_tl_find)(INT k1, INT k2, INT k3, rader_tl *t) +{ + while (t && (t->k1 != k1 || t->k2 != k2 || t->k3 != k3)) + t = t->cdr; + if (t) { + ++t->refcnt; + return t->W; + } else + return 0; +} + +void X(rader_tl_delete)(R *W, rader_tl **tl) +{ + if (W) { + rader_tl **tp, *t; + + for (tp = tl; (t = *tp) && t->W != W; tp = &t->cdr) + ; + + if (t && --t->refcnt <= 0) { + *tp = t->cdr; + X(ifree)(t->W); + X(ifree)(t); + } + } +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/scan.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/scan.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" +#include +#include +#include +#include + +#ifdef USE_CTYPE +#include +#else +/* Screw ctype. On linux, the is* functions call a routine that gets + the ctype map in the current locale. Because this operation is + expensive, the map is cached on a per-thread basis. I am not + willing to link this crap with FFTW. Not over my dead body. + + Sic transit gloria mundi. +*/ +#undef isspace +#define isspace(x) ((x) >= 0 && (x) <= ' ') +#undef isdigit +#define isdigit(x) ((x) >= '0' && (x) <= '9') +#undef isupper +#define isupper(x) ((x) >= 'A' && (x) <= 'Z') +#undef islower +#define islower(x) ((x) >= 'a' && (x) <= 'z') +#endif + +static int mygetc(scanner *sc) +{ + if (sc->ungotc != EOF) { + int c = sc->ungotc; + sc->ungotc = EOF; + return c; + } + return(sc->getchr(sc)); +} + +#define GETCHR(sc) mygetc(sc) + +static void myungetc(scanner *sc, int c) +{ + sc->ungotc = c; +} + +#define UNGETCHR(sc, c) myungetc(sc, c) + +static void eat_blanks(scanner *sc) +{ + int ch; + while (ch = GETCHR(sc), isspace(ch)) + ; + UNGETCHR(sc, ch); +} + +static void mygets(scanner *sc, char *s, size_t maxlen) +{ + char *s0 = s; + int ch; + + A(maxlen > 0); + while ((ch = GETCHR(sc)) != EOF && !isspace(ch) + && ch != ')' && ch != '(' && s < s0 + maxlen) + *s++ = ch; + *s = 0; + UNGETCHR(sc, ch); +} + +static long getlong(scanner *sc, int base, int *ret) +{ + int sign = 1, ch, count; + long x = 0; + + ch = GETCHR(sc); + if (ch == '-' || ch == '+') { + sign = ch == '-' ? -1 : 1; + ch = GETCHR(sc); + } + for (count = 0; ; ++count) { + if (isdigit(ch)) + ch -= '0'; + else if (isupper(ch)) + ch -= 'A' - 10; + else if (islower(ch)) + ch -= 'a' - 10; + else + break; + x = x * base + ch; + ch = GETCHR(sc); + } + x *= sign; + UNGETCHR(sc, ch); + *ret = count > 0; + return x; +} + +/* vscan is mostly scanf-like, with our additional format specifiers, + but with a few twists. It returns simply 0 or 1 indicating whether + the match was successful. '(' and ')' in the format string match + those characters preceded by any whitespace. Finally, if a + character match fails, it will ungetchr() the last character back + onto the stream. */ +static int vscan(scanner *sc, const char *format, va_list ap) +{ + const char *s = format; + char c; + int ch = 0; + size_t fmt_len; + + while ((c = *s++)) { + fmt_len = 0; + switch (c) { + case '%': + getformat: + switch ((c = *s++)) { + case 's': { + char *x = va_arg(ap, char *); + mygets(sc, x, fmt_len); + break; + } + case 'd': { + int *x = va_arg(ap, int *); + *x = (int) getlong(sc, 10, &ch); + if (!ch) return 0; + break; + } + case 'x': { + int *x = va_arg(ap, int *); + *x = (int) getlong(sc, 16, &ch); + if (!ch) return 0; + break; + } + case 'M': { + md5uint *x = va_arg(ap, md5uint *); + *x = (md5uint) + (0xffffffffUL & getlong(sc, 16, &ch)); + if (!ch) return 0; + break; + } + case '*': { + if ((fmt_len = va_arg(ap, int)) <= 0) return 0; + goto getformat; + } + default: + A(0 /* unknown format */); + break; + } + break; + default: + if (isspace(c) || c == '(' || c == ')') + eat_blanks(sc); + if (!isspace(c) && (ch = GETCHR(sc)) != c) { + UNGETCHR(sc, ch); + return 0; + } + break; + } + } + return 1; +} + +static int scan(scanner *sc, const char *format, ...) +{ + int ret; + va_list ap; + va_start(ap, format); + ret = vscan(sc, format, ap); + va_end(ap); + return ret; +} + +scanner *X(mkscanner)(size_t size, int (*getchr)(scanner *sc)) +{ + scanner *s = (scanner *)MALLOC(size, OTHER); + s->scan = scan; + s->vscan = vscan; + s->getchr = getchr; + s->ungotc = EOF; + return s; +} + +void X(scanner_destroy)(scanner *sc) +{ + X(ifree)(sc); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/solver.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/solver.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +solver *X(mksolver)(size_t size, const solver_adt *adt) +{ + solver *s = (solver *)MALLOC(size, SOLVERS); + + s->adt = adt; + s->refcnt = 0; + return s; +} + +void X(solver_use)(solver *ego) +{ + ++ego->refcnt; +} + +void X(solver_destroy)(solver *ego) +{ + if ((--ego->refcnt) == 0) { + if (ego->adt->destroy) + ego->adt->destroy(ego); + X(ifree)(ego); + } +} + +void X(solver_register)(planner *plnr, solver *s) +{ + plnr->adt->register_solver(plnr, s); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/solvtab.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/solvtab.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +void X(solvtab_exec)(const solvtab tbl, planner *p) +{ + for (; tbl->reg_nam; ++tbl) { + p->cur_reg_nam = tbl->reg_nam; + p->cur_reg_id = 0; + tbl->reg(p); + } + p->cur_reg_nam = 0; +} + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/stride.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/stride.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw.h" + +const INT X(an_INT_guaranteed_to_be_zero) = 0; + +#ifdef PRECOMPUTE_ARRAY_INDICES +stride X(mkstride)(INT n, INT s) +{ + int i; + INT *p = (INT *) MALLOC(n * sizeof(INT), STRIDES); + + for (i = 0; i < n; ++i) + p[i] = s * i; + + return p; +} + +void X(stride_destroy)(stride p) +{ + X(ifree0)(p); +} + +#endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/tensor.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/tensor.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +tensor *X(mktensor)(int rnk) +{ + tensor *x; + + A(rnk >= 0); + +#if defined(STRUCT_HACK_KR) + if (FINITE_RNK(rnk) && rnk > 1) + x = (tensor *)MALLOC(sizeof(tensor) + (rnk - 1) * sizeof(iodim), + TENSORS); + else + x = (tensor *)MALLOC(sizeof(tensor), TENSORS); +#elif defined(STRUCT_HACK_C99) + if (FINITE_RNK(rnk)) + x = (tensor *)MALLOC(sizeof(tensor) + rnk * sizeof(iodim), + TENSORS); + else + x = (tensor *)MALLOC(sizeof(tensor), TENSORS); +#else + x = (tensor *)MALLOC(sizeof(tensor), TENSORS); + if (FINITE_RNK(rnk) && rnk > 0) + x->dims = (iodim *)MALLOC(sizeof(iodim) * rnk, TENSORS); + else + x->dims = 0; +#endif + + x->rnk = rnk; + return x; +} + +void X(tensor_destroy)(tensor *sz) +{ +#if !defined(STRUCT_HACK_C99) && !defined(STRUCT_HACK_KR) + X(ifree0)(sz->dims); +#endif + X(ifree)(sz); +} + +INT X(tensor_sz)(const tensor *sz) +{ + int i; + INT n = 1; + + if (!FINITE_RNK(sz->rnk)) + return 0; + + for (i = 0; i < sz->rnk; ++i) + n *= sz->dims[i].n; + return n; +} + +void X(tensor_md5)(md5 *p, const tensor *t) +{ + int i; + X(md5int)(p, t->rnk); + if (FINITE_RNK(t->rnk)) { + for (i = 0; i < t->rnk; ++i) { + const iodim *q = t->dims + i; + X(md5INT)(p, q->n); + X(md5INT)(p, q->is); + X(md5INT)(p, q->os); + } + } +} + +/* treat a (rank <= 1)-tensor as a rank-1 tensor, extracting + appropriate n, is, and os components */ +int X(tensor_tornk1)(const tensor *t, INT *n, INT *is, INT *os) +{ + A(t->rnk <= 1); + if (t->rnk == 1) { + const iodim *vd = t->dims; + *n = vd[0].n; + *is = vd[0].is; + *os = vd[0].os; + } else { + *n = 1; + *is = *os = 0; + } + return 1; +} + +void X(tensor_print)(const tensor *x, printer *p) +{ + if (FINITE_RNK(x->rnk)) { + int i; + int first = 1; + p->print(p, "("); + for (i = 0; i < x->rnk; ++i) { + const iodim *d = x->dims + i; + p->print(p, "%s(%D %D %D)", + first ? "" : " ", + d->n, d->is, d->os); + first = 0; + } + p->print(p, ")"); + } else { + p->print(p, "rank-minfty"); + } +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/tensor1.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/tensor1.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +tensor *X(mktensor_0d)(void) +{ + return X(mktensor(0)); +} + +tensor *X(mktensor_1d)(INT n, INT is, INT os) +{ + tensor *x = X(mktensor)(1); + x->dims[0].n = n; + x->dims[0].is = is; + x->dims[0].os = os; + return x; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/tensor2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/tensor2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +tensor *X(mktensor_2d)(INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1) +{ + tensor *x = X(mktensor)(2); + x->dims[0].n = n0; + x->dims[0].is = is0; + x->dims[0].os = os0; + x->dims[1].n = n1; + x->dims[1].is = is1; + x->dims[1].os = os1; + return x; +} + + +tensor *X(mktensor_3d)(INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1, + INT n2, INT is2, INT os2) +{ + tensor *x = X(mktensor)(3); + x->dims[0].n = n0; + x->dims[0].is = is0; + x->dims[0].os = os0; + x->dims[1].n = n1; + x->dims[1].is = is1; + x->dims[1].os = os1; + x->dims[2].n = n2; + x->dims[2].is = is2; + x->dims[2].os = os2; + return x; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/tensor3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/tensor3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +/* Currently, mktensor_4d and mktensor_5d are only used in the MPI + routines, where very complicated transpositions are required. + Therefore we split them into a separate source file. */ + +tensor *X(mktensor_4d)(INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1, + INT n2, INT is2, INT os2, + INT n3, INT is3, INT os3) +{ + tensor *x = X(mktensor)(4); + x->dims[0].n = n0; + x->dims[0].is = is0; + x->dims[0].os = os0; + x->dims[1].n = n1; + x->dims[1].is = is1; + x->dims[1].os = os1; + x->dims[2].n = n2; + x->dims[2].is = is2; + x->dims[2].os = os2; + x->dims[3].n = n3; + x->dims[3].is = is3; + x->dims[3].os = os3; + return x; +} + +tensor *X(mktensor_5d)(INT n0, INT is0, INT os0, + INT n1, INT is1, INT os1, + INT n2, INT is2, INT os2, + INT n3, INT is3, INT os3, + INT n4, INT is4, INT os4) +{ + tensor *x = X(mktensor)(5); + x->dims[0].n = n0; + x->dims[0].is = is0; + x->dims[0].os = os0; + x->dims[1].n = n1; + x->dims[1].is = is1; + x->dims[1].os = os1; + x->dims[2].n = n2; + x->dims[2].is = is2; + x->dims[2].os = os2; + x->dims[3].n = n3; + x->dims[3].is = is3; + x->dims[3].os = os3; + x->dims[4].n = n4; + x->dims[4].is = is4; + x->dims[4].os = os4; + return x; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/tensor4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/tensor4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +INT X(tensor_max_index)(const tensor *sz) +{ + int i; + INT ni = 0, no = 0; + + A(FINITE_RNK(sz->rnk)); + for (i = 0; i < sz->rnk; ++i) { + const iodim *p = sz->dims + i; + ni += (p->n - 1) * X(iabs)(p->is); + no += (p->n - 1) * X(iabs)(p->os); + } + return X(imax)(ni, no); +} + +#define tensor_min_xstride(sz, xs) { \ + A(FINITE_RNK(sz->rnk)); \ + if (sz->rnk == 0) return 0; \ + else { \ + int i; \ + INT s = X(iabs)(sz->dims[0].xs); \ + for (i = 1; i < sz->rnk; ++i) \ + s = X(imin)(s, X(iabs)(sz->dims[i].xs)); \ + return s; \ + } \ +} + +INT X(tensor_min_istride)(const tensor *sz) tensor_min_xstride(sz, is) +INT X(tensor_min_ostride)(const tensor *sz) tensor_min_xstride(sz, os) + +INT X(tensor_min_stride)(const tensor *sz) +{ + return X(imin)(X(tensor_min_istride)(sz), X(tensor_min_ostride)(sz)); +} + +int X(tensor_inplace_strides)(const tensor *sz) +{ + int i; + A(FINITE_RNK(sz->rnk)); + for (i = 0; i < sz->rnk; ++i) { + const iodim *p = sz->dims + i; + if (p->is != p->os) + return 0; + } + return 1; +} + +int X(tensor_inplace_strides2)(const tensor *a, const tensor *b) +{ + return X(tensor_inplace_strides(a)) && X(tensor_inplace_strides(b)); +} + +/* return true (1) iff *any* strides of sz decrease when we + tensor_inplace_copy(sz, k). */ +static int tensor_strides_decrease(const tensor *sz, inplace_kind k) +{ + if (FINITE_RNK(sz->rnk)) { + int i; + for (i = 0; i < sz->rnk; ++i) + if ((sz->dims[i].os - sz->dims[i].is) + * (k == INPLACE_OS ? (INT)1 : (INT)-1) < 0) + return 1; + } + return 0; +} + +/* Return true (1) iff *any* strides of sz decrease when we + tensor_inplace_copy(k) *or* if *all* strides of sz are unchanged + but *any* strides of vecsz decrease. This is used in indirect.c + to determine whether to use INPLACE_IS or INPLACE_OS. + + Note: X(tensor_strides_decrease)(sz, vecsz, INPLACE_IS) + || X(tensor_strides_decrease)(sz, vecsz, INPLACE_OS) + || X(tensor_inplace_strides2)(p->sz, p->vecsz) + must always be true. */ +int X(tensor_strides_decrease)(const tensor *sz, const tensor *vecsz, + inplace_kind k) +{ + return(tensor_strides_decrease(sz, k) + || (X(tensor_inplace_strides)(sz) + && tensor_strides_decrease(vecsz, k))); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/tensor5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/tensor5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +static void dimcpy(iodim *dst, const iodim *src, int rnk) +{ + int i; + if (FINITE_RNK(rnk)) + for (i = 0; i < rnk; ++i) + dst[i] = src[i]; +} + +tensor *X(tensor_copy)(const tensor *sz) +{ + tensor *x = X(mktensor)(sz->rnk); + dimcpy(x->dims, sz->dims, sz->rnk); + return x; +} + +/* like X(tensor_copy), but makes strides in-place by + setting os = is if k == INPLACE_IS or is = os if k == INPLACE_OS. */ +tensor *X(tensor_copy_inplace)(const tensor *sz, inplace_kind k) +{ + tensor *x = X(tensor_copy)(sz); + if (FINITE_RNK(x->rnk)) { + int i; + if (k == INPLACE_OS) + for (i = 0; i < x->rnk; ++i) + x->dims[i].is = x->dims[i].os; + else + for (i = 0; i < x->rnk; ++i) + x->dims[i].os = x->dims[i].is; + } + return x; +} + +/* Like X(tensor_copy), but copy all of the dimensions *except* + except_dim. */ +tensor *X(tensor_copy_except)(const tensor *sz, int except_dim) +{ + tensor *x; + + A(FINITE_RNK(sz->rnk) && sz->rnk >= 1 && except_dim < sz->rnk); + x = X(mktensor)(sz->rnk - 1); + dimcpy(x->dims, sz->dims, except_dim); + dimcpy(x->dims + except_dim, sz->dims + except_dim + 1, + x->rnk - except_dim); + return x; +} + +/* Like X(tensor_copy), but copy only rnk dimensions starting + with start_dim. */ +tensor *X(tensor_copy_sub)(const tensor *sz, int start_dim, int rnk) +{ + tensor *x; + + A(FINITE_RNK(sz->rnk) && start_dim + rnk <= sz->rnk); + x = X(mktensor)(rnk); + dimcpy(x->dims, sz->dims + start_dim, rnk); + return x; +} + +tensor *X(tensor_append)(const tensor *a, const tensor *b) +{ + if (!FINITE_RNK(a->rnk) || !FINITE_RNK(b->rnk)) { + return X(mktensor)(RNK_MINFTY); + } else { + tensor *x = X(mktensor)(a->rnk + b->rnk); + dimcpy(x->dims, a->dims, a->rnk); + dimcpy(x->dims + a->rnk, b->dims, b->rnk); + return x; + } +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/tensor7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/tensor7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +static int signof(INT x) +{ + if (x < 0) return -1; + if (x == 0) return 0; + /* if (x > 0) */ return 1; +} + +/* total order among iodim's */ +int X(dimcmp)(const iodim *a, const iodim *b) +{ + INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is); + INT sao = X(iabs)(a->os), sbo = X(iabs)(b->os); + INT sam = X(imin)(sai, sao), sbm = X(imin)(sbi, sbo); + + /* in descending order of min{istride, ostride} */ + if (sam != sbm) + return signof(sbm - sam); + + /* in case of a tie, in descending order of istride */ + if (sbi != sai) + return signof(sbi - sai); + + /* in case of a tie, in descending order of ostride */ + if (sbo != sao) + return signof(sbo - sao); + + /* in case of a tie, in ascending order of n */ + return signof(a->n - b->n); +} + +static void canonicalize(tensor *x) +{ + if (x->rnk > 1) { + qsort(x->dims, (size_t)x->rnk, sizeof(iodim), + (int (*)(const void *, const void *))X(dimcmp)); + } +} + +static int compare_by_istride(const iodim *a, const iodim *b) +{ + INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is); + + /* in descending order of istride */ + return signof(sbi - sai); +} + +static tensor *really_compress(const tensor *sz) +{ + int i, rnk; + tensor *x; + + A(FINITE_RNK(sz->rnk)); + for (i = rnk = 0; i < sz->rnk; ++i) { + A(sz->dims[i].n > 0); + if (sz->dims[i].n != 1) + ++rnk; + } + + x = X(mktensor)(rnk); + for (i = rnk = 0; i < sz->rnk; ++i) { + if (sz->dims[i].n != 1) + x->dims[rnk++] = sz->dims[i]; + } + return x; +} + +/* Like tensor_copy, but eliminate n == 1 dimensions, which + never affect any transform or transform vector. + + Also, we sort the tensor into a canonical order of decreasing + strides (see X(dimcmp) for an exact definition). In general, + processing a loop/array in order of decreasing stride will improve + locality. Both forward and backwards traversal of the tensor are + considered e.g. by vrank-geq1, so sorting in increasing + vs. decreasing order is not really important. */ +tensor *X(tensor_compress)(const tensor *sz) +{ + tensor *x = really_compress(sz); + canonicalize(x); + return x; +} + +/* Return whether the strides of a and b are such that they form an + effective contiguous 1d array. Assumes that a.is >= b.is. */ +static int strides_contig(iodim *a, iodim *b) +{ + return (a->is == b->is * b->n && a->os == b->os * b->n); +} + +/* Like tensor_compress, but also compress into one dimension any + group of dimensions that form a contiguous block of indices with + some stride. (This can safely be done for transform vector sizes.) */ +tensor *X(tensor_compress_contiguous)(const tensor *sz) +{ + int i, rnk; + tensor *sz2, *x; + + if (X(tensor_sz)(sz) == 0) + return X(mktensor)(RNK_MINFTY); + + sz2 = really_compress(sz); + A(FINITE_RNK(sz2->rnk)); + + if (sz2->rnk <= 1) { /* nothing to compress. */ + if (0) { + /* this call is redundant, because "sz->rnk <= 1" implies + that the tensor is already canonical, but I am writing + it explicitly because "logically" we need to canonicalize + the tensor before returning. */ + canonicalize(sz2); + } + return sz2; + } + + /* sort in descending order of |istride|, so that compressible + dimensions appear contigously */ + qsort(sz2->dims, (size_t)sz2->rnk, sizeof(iodim), + (int (*)(const void *, const void *))compare_by_istride); + + /* compute what the rank will be after compression */ + for (i = rnk = 1; i < sz2->rnk; ++i) + if (!strides_contig(sz2->dims + i - 1, sz2->dims + i)) + ++rnk; + + /* merge adjacent dimensions whenever possible */ + x = X(mktensor)(rnk); + x->dims[0] = sz2->dims[0]; + for (i = rnk = 1; i < sz2->rnk; ++i) { + if (strides_contig(sz2->dims + i - 1, sz2->dims + i)) { + x->dims[rnk - 1].n *= sz2->dims[i].n; + x->dims[rnk - 1].is = sz2->dims[i].is; + x->dims[rnk - 1].os = sz2->dims[i].os; + } else { + A(rnk < x->rnk); + x->dims[rnk++] = sz2->dims[i]; + } + } + + X(tensor_destroy)(sz2); + + /* reduce to canonical form */ + canonicalize(x); + return x; +} + +/* The inverse of X(tensor_append): splits the sz tensor into + tensor a followed by tensor b, where a's rank is arnk. */ +void X(tensor_split)(const tensor *sz, tensor **a, int arnk, tensor **b) +{ + A(FINITE_RNK(sz->rnk) && FINITE_RNK(arnk)); + + *a = X(tensor_copy_sub)(sz, 0, arnk); + *b = X(tensor_copy_sub)(sz, arnk, sz->rnk - arnk); +} + +/* TRUE if the two tensors are equal */ +int X(tensor_equal)(const tensor *a, const tensor *b) +{ + if (a->rnk != b->rnk) + return 0; + + if (FINITE_RNK(a->rnk)) { + int i; + for (i = 0; i < a->rnk; ++i) + if (0 + || a->dims[i].n != b->dims[i].n + || a->dims[i].is != b->dims[i].is + || a->dims[i].os != b->dims[i].os + ) + return 0; + } + + return 1; +} + +/* TRUE if the sets of input and output locations described by + (append sz vecsz) are the same */ +int X(tensor_inplace_locations)(const tensor *sz, const tensor *vecsz) +{ + tensor *t = X(tensor_append)(sz, vecsz); + tensor *ti = X(tensor_copy_inplace)(t, INPLACE_IS); + tensor *to = X(tensor_copy_inplace)(t, INPLACE_OS); + tensor *tic = X(tensor_compress_contiguous)(ti); + tensor *toc = X(tensor_compress_contiguous)(to); + + int retval = X(tensor_equal)(tic, toc); + + X(tensor_destroy)(t); + X(tensor_destroy4)(ti, to, tic, toc); + + return retval; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/tensor8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/tensor8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +void X(tensor_destroy2)(tensor *a, tensor *b) +{ + X(tensor_destroy)(a); + X(tensor_destroy)(b); +} + +void X(tensor_destroy4)(tensor *a, tensor *b, tensor *c, tensor *d) +{ + X(tensor_destroy2)(a, b); + X(tensor_destroy2)(c, d); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/tensor9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/tensor9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +int X(tensor_kosherp)(const tensor *x) +{ + int i; + + if (x->rnk < 0) return 0; + + if (FINITE_RNK(x->rnk)) { + for (i = 0; i < x->rnk; ++i) + if (x->dims[i].n < 0) + return 0; + } + return 1; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/tile2d.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/tile2d.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* out of place 2D copy routines */ +#include "ifftw.h" + +void X(tile2d)(INT n0l, INT n0u, INT n1l, INT n1u, INT tilesz, + void (*f)(INT n0l, INT n0u, INT n1l, INT n1u, void *args), + void *args) +{ + INT d0, d1; + + A(tilesz > 0); /* infinite loops otherwise */ + + tail: + d0 = n0u - n0l; + d1 = n1u - n1l; + + if (d0 >= d1 && d0 > tilesz) { + INT n0m = (n0u + n0l) / 2; + X(tile2d)(n0l, n0m, n1l, n1u, tilesz, f, args); + n0l = n0m; goto tail; + } else if (/* d1 >= d0 && */ d1 > tilesz) { + INT n1m = (n1u + n1l) / 2; + X(tile2d)(n0l, n0u, n1l, n1m, tilesz, f, args); + n1l = n1m; goto tail; + } else { + f(n0l, n0u, n1l, n1u, args); + } +} + +INT X(compute_tilesz)(INT vl, int how_many_tiles_in_cache) +{ + return X(isqrt)(CACHESIZE / + (((INT)sizeof(R)) * vl * (INT)how_many_tiles_in_cache)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/timer.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/timer.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +#ifdef HAVE_UNISTD_H +# include +#endif + +#ifndef WITH_SLOW_TIMER +# include "cycle.h" +#endif + +#ifndef FFTW_TIME_LIMIT +#define FFTW_TIME_LIMIT 2.0 /* don't run for more than two seconds */ +#endif + +/* the following code is disabled for now, because it seems to + require that we #include in ifftw.h to + typedef LARGE_INTEGER crude_time, and this pulls in the whole + Windows universe and leads to namespace conflicts (unless + we did some hack like assuming sizeof(LARGE_INTEGER) == sizeof(long long). + gettimeofday is provided by MinGW, which we use to cross-compile + FFTW for Windows, and this seems to work well enough */ +#if 0 && (defined(__WIN32__) || defined(_WIN32) || defined(_WIN64)) +crude_time X(get_crude_time)(void) +{ + crude_time tv; + QueryPerformanceCounter(&tv); + return tv; +} + +static double elapsed_since(crude_time t0) +{ + crude_time t1, freq; + QueryPerformanceCounter(&t1); + QueryPerformanceFrequency(&freq); + return (((double) (t1.QuadPart - t0.QuadPart))) / + ((double) freq.QuadPart); +} + +# define TIME_MIN_SEC 1.0e-2 + +#elif defined(HAVE_GETTIMEOFDAY) +crude_time X(get_crude_time)(void) +{ + crude_time tv; + gettimeofday(&tv, 0); + return tv; +} + +#define elapsed_sec(t1,t0) ((double)(t1.tv_sec - t0.tv_sec) + \ + (double)(t1.tv_usec - t0.tv_usec) * 1.0E-6) + +static double elapsed_since(crude_time t0) +{ + crude_time t1; + gettimeofday(&t1, 0); + return elapsed_sec(t1, t0); +} + +# define TIME_MIN_SEC 1.0e-3 + +#else /* !HAVE_GETTIMEOFDAY */ + +/* Note that the only system where we are likely to need to fall back + on the clock() function is Windows, for which CLOCKS_PER_SEC is 1000 + and thus the clock wraps once every 50 days. This should hopefully + be longer than the time required to create any single plan! */ +crude_time X(get_crude_time)(void) { return clock(); } + +#define elapsed_sec(t1,t0) ((double) ((t1) - (t0)) / CLOCKS_PER_SEC) + +static double elapsed_since(crude_time t0) +{ + return elapsed_sec(clock(), t0); +} + +# define TIME_MIN_SEC 2.0e-1 /* from fftw2 */ + +#endif /* !HAVE_GETTIMEOFDAY */ + +double X(elapsed_since)(const planner *plnr, const problem *p, crude_time t0) +{ + double t = elapsed_since(t0); + if (plnr->cost_hook) + t = plnr->cost_hook(p, t, COST_MAX); + return t; +} + +#ifdef WITH_SLOW_TIMER +/* excruciatingly slow; only use this if there is no choice! */ +typedef crude_time ticks; +# define getticks X(get_crude_time) +# define elapsed(t1,t0) elapsed_sec(t1,t0) +# define TIME_MIN TIME_MIN_SEC +# define TIME_REPEAT 4 /* from fftw2 */ +# define HAVE_TICK_COUNTER +#endif + +#ifdef HAVE_TICK_COUNTER + +# ifndef TIME_MIN +# define TIME_MIN 100.0 +# endif + +# ifndef TIME_REPEAT +# define TIME_REPEAT 8 +# endif + + static double measure(plan *pln, const problem *p, int iter) + { + ticks t0, t1; + int i; + + t0 = getticks(); + for (i = 0; i < iter; ++i) + pln->adt->solve(pln, p); + t1 = getticks(); + return elapsed(t1, t0); + } + + + double X(measure_execution_time)(const planner *plnr, + plan *pln, const problem *p) + { + int iter; + int repeat; + + X(plan_awake)(pln, AWAKE_ZERO); + p->adt->zero(p); + + start_over: + for (iter = 1; iter; iter *= 2) { + double tmin = 0; + int first = 1; + crude_time begin = X(get_crude_time)(); + + /* repeat the measurement TIME_REPEAT times */ + for (repeat = 0; repeat < TIME_REPEAT; ++repeat) { + double t = measure(pln, p, iter); + + if (plnr->cost_hook) + t = plnr->cost_hook(p, t, COST_MAX); + if (t < 0) + goto start_over; + + if (first || t < tmin) + tmin = t; + first = 0; + + /* do not run for too long */ + if (X(elapsed_since)(plnr, p, begin) > FFTW_TIME_LIMIT) + break; + } + + if (tmin >= TIME_MIN) { + X(plan_awake)(pln, SLEEPY); + return tmin / (double) iter; + } + } + goto start_over; /* may happen if timer is screwed up */ + } + +#else /* no cycle counter */ + + double X(measure_execution_time)(const planner *plnr, + plan *pln, const problem *p) + { + UNUSED(plnr); + UNUSED(p); + UNUSED(pln); + return -1.0; + } + +#endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/transpose.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/transpose.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw.h" + +/* in place square transposition, iterative */ +void X(transpose)(R *I, INT n, INT s0, INT s1, INT vl) +{ + INT i0, i1, v; + + switch (vl) { + case 1: + for (i1 = 1; i1 < n; ++i1) { + for (i0 = 0; i0 < i1; ++i0) { + R x0 = I[i1 * s0 + i0 * s1]; + R y0 = I[i1 * s1 + i0 * s0]; + I[i1 * s1 + i0 * s0] = x0; + I[i1 * s0 + i0 * s1] = y0; + } + } + break; + case 2: + for (i1 = 1; i1 < n; ++i1) { + for (i0 = 0; i0 < i1; ++i0) { + R x0 = I[i1 * s0 + i0 * s1]; + R x1 = I[i1 * s0 + i0 * s1 + 1]; + R y0 = I[i1 * s1 + i0 * s0]; + R y1 = I[i1 * s1 + i0 * s0 + 1]; + I[i1 * s1 + i0 * s0] = x0; + I[i1 * s1 + i0 * s0 + 1] = x1; + I[i1 * s0 + i0 * s1] = y0; + I[i1 * s0 + i0 * s1 + 1] = y1; + } + } + break; + default: + for (i1 = 1; i1 < n; ++i1) { + for (i0 = 0; i0 < i1; ++i0) { + for (v = 0; v < vl; ++v) { + R x0 = I[i1 * s0 + i0 * s1 + v]; + R y0 = I[i1 * s1 + i0 * s0 + v]; + I[i1 * s1 + i0 * s0 + v] = x0; + I[i1 * s0 + i0 * s1 + v] = y0; + } + } + } + break; + } +} + +struct transpose_closure { + R *I; + INT s0, s1, vl, tilesz; + R *buf0, *buf1; +}; + +static void dotile(INT n0l, INT n0u, INT n1l, INT n1u, void *args) +{ + struct transpose_closure *k = (struct transpose_closure *)args; + R *I = k->I; + INT s0 = k->s0, s1 = k->s1, vl = k->vl; + INT i0, i1, v; + + switch (vl) { + case 1: + for (i1 = n1l; i1 < n1u; ++i1) { + for (i0 = n0l; i0 < n0u; ++i0) { + R x0 = I[i1 * s0 + i0 * s1]; + R y0 = I[i1 * s1 + i0 * s0]; + I[i1 * s1 + i0 * s0] = x0; + I[i1 * s0 + i0 * s1] = y0; + } + } + break; + case 2: + for (i1 = n1l; i1 < n1u; ++i1) { + for (i0 = n0l; i0 < n0u; ++i0) { + R x0 = I[i1 * s0 + i0 * s1]; + R x1 = I[i1 * s0 + i0 * s1 + 1]; + R y0 = I[i1 * s1 + i0 * s0]; + R y1 = I[i1 * s1 + i0 * s0 + 1]; + I[i1 * s1 + i0 * s0] = x0; + I[i1 * s1 + i0 * s0 + 1] = x1; + I[i1 * s0 + i0 * s1] = y0; + I[i1 * s0 + i0 * s1 + 1] = y1; + } + } + break; + default: + for (i1 = n1l; i1 < n1u; ++i1) { + for (i0 = n0l; i0 < n0u; ++i0) { + for (v = 0; v < vl; ++v) { + R x0 = I[i1 * s0 + i0 * s1 + v]; + R y0 = I[i1 * s1 + i0 * s0 + v]; + I[i1 * s1 + i0 * s0 + v] = x0; + I[i1 * s0 + i0 * s1 + v] = y0; + } + } + } + } +} + +static void dotile_buf(INT n0l, INT n0u, INT n1l, INT n1u, void *args) +{ + struct transpose_closure *k = (struct transpose_closure *)args; + X(cpy2d_ci)(k->I + n0l * k->s0 + n1l * k->s1, + k->buf0, + n0u - n0l, k->s0, k->vl, + n1u - n1l, k->s1, k->vl * (n0u - n0l), + k->vl); + X(cpy2d_ci)(k->I + n0l * k->s1 + n1l * k->s0, + k->buf1, + n0u - n0l, k->s1, k->vl, + n1u - n1l, k->s0, k->vl * (n0u - n0l), + k->vl); + X(cpy2d_co)(k->buf1, + k->I + n0l * k->s0 + n1l * k->s1, + n0u - n0l, k->vl, k->s0, + n1u - n1l, k->vl * (n0u - n0l), k->s1, + k->vl); + X(cpy2d_co)(k->buf0, + k->I + n0l * k->s1 + n1l * k->s0, + n0u - n0l, k->vl, k->s1, + n1u - n1l, k->vl * (n0u - n0l), k->s0, + k->vl); +} + +static void transpose_rec(R *I, INT n, + void (*f)(INT n0l, INT n0u, INT n1l, INT n1u, + void *args), + struct transpose_closure *k) +{ + tail: + if (n > 1) { + INT n2 = n / 2; + k->I = I; + X(tile2d)(0, n2, n2, n, k->tilesz, f, k); + transpose_rec(I, n2, f, k); + I += n2 * (k->s0 + k->s1); n -= n2; goto tail; + } +} + +void X(transpose_tiled)(R *I, INT n, INT s0, INT s1, INT vl) +{ + struct transpose_closure k; + k.s0 = s0; + k.s1 = s1; + k.vl = vl; + /* two blocks must be in cache, to be swapped */ + k.tilesz = X(compute_tilesz)(vl, 2); + k.buf0 = k.buf1 = 0; /* unused */ + transpose_rec(I, n, dotile, &k); +} + +void X(transpose_tiledbuf)(R *I, INT n, INT s0, INT s1, INT vl) +{ + struct transpose_closure k; + /* Assume that the the rows of I conflict into the same cache + lines, and therefore we don't need to reserve cache space for + the input. If the rows don't conflict, there is no reason + to use tiledbuf at all.*/ + R buf0[CACHESIZE / (2 * sizeof(R))]; + R buf1[CACHESIZE / (2 * sizeof(R))]; + k.s0 = s0; + k.s1 = s1; + k.vl = vl; + k.tilesz = X(compute_tilesz)(vl, 2); + k.buf0 = buf0; + k.buf1 = buf1; + A(k.tilesz * k.tilesz * vl * sizeof(R) <= sizeof(buf0)); + A(k.tilesz * k.tilesz * vl * sizeof(R) <= sizeof(buf1)); + transpose_rec(I, n, dotile_buf, &k); +} + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/trig.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/trig.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* trigonometric functions */ +#include "ifftw.h" +#include + +#if defined(TRIGREAL_IS_LONG_DOUBLE) +# define COS cosl +# define SIN sinl +# define KTRIG(x) (x##L) +# if defined(HAVE_DECL_SINL) && !HAVE_DECL_SINL + extern long double sinl(long double x); +# endif +# if defined(HAVE_DECL_COSL) && !HAVE_DECL_COSL + extern long double cosl(long double x); +# endif +#elif defined(TRIGREAL_IS_QUAD) +# define COS cosq +# define SIN sinq +# define KTRIG(x) (x##Q) + extern __float128 sinq(__float128 x); + extern __float128 cosq(__float128 x); +#else +# define COS cos +# define SIN sin +# define KTRIG(x) (x) +#endif + +static const trigreal K2PI = + KTRIG(6.2831853071795864769252867665590057683943388); +#define by2pi(m, n) ((K2PI * (m)) / (n)) + +/* + * Improve accuracy by reducing x to range [0..1/8] + * before multiplication by 2 * PI. + */ + +static void real_cexp(INT m, INT n, trigreal *out) +{ + trigreal theta, c, s, t; + unsigned octant = 0; + INT quarter_n = n; + + n += n; n += n; + m += m; m += m; + + if (m < 0) m += n; + if (m > n - m) { m = n - m; octant |= 4; } + if (m - quarter_n > 0) { m = m - quarter_n; octant |= 2; } + if (m > quarter_n - m) { m = quarter_n - m; octant |= 1; } + + theta = by2pi(m, n); + c = COS(theta); s = SIN(theta); + + if (octant & 1) { t = c; c = s; s = t; } + if (octant & 2) { t = c; c = -s; s = t; } + if (octant & 4) { s = -s; } + + out[0] = c; + out[1] = s; +} + +static INT choose_twshft(INT n) +{ + INT log2r = 0; + while (n > 0) { + ++log2r; + n /= 4; + } + return log2r; +} + +static void cexpl_sqrtn_table(triggen *p, INT m, trigreal *res) +{ + m += p->n * (m < 0); + + { + INT m0 = m & p->twmsk; + INT m1 = m >> p->twshft; + trigreal wr0 = p->W0[2 * m0]; + trigreal wi0 = p->W0[2 * m0 + 1]; + trigreal wr1 = p->W1[2 * m1]; + trigreal wi1 = p->W1[2 * m1 + 1]; + + res[0] = wr1 * wr0 - wi1 * wi0; + res[1] = wi1 * wr0 + wr1 * wi0; + } +} + +/* multiply (xr, xi) by exp(FFT_SIGN * 2*pi*i*m/n) */ +static void rotate_sqrtn_table(triggen *p, INT m, R xr, R xi, R *res) +{ + m += p->n * (m < 0); + + { + INT m0 = m & p->twmsk; + INT m1 = m >> p->twshft; + trigreal wr0 = p->W0[2 * m0]; + trigreal wi0 = p->W0[2 * m0 + 1]; + trigreal wr1 = p->W1[2 * m1]; + trigreal wi1 = p->W1[2 * m1 + 1]; + trigreal wr = wr1 * wr0 - wi1 * wi0; + trigreal wi = wi1 * wr0 + wr1 * wi0; + +#if FFT_SIGN == -1 + res[0] = xr * wr + xi * wi; + res[1] = xi * wr - xr * wi; +#else + res[0] = xr * wr - xi * wi; + res[1] = xi * wr + xr * wi; +#endif + } +} + +static void cexpl_sincos(triggen *p, INT m, trigreal *res) +{ + real_cexp(m, p->n, res); +} + +static void cexp_zero(triggen *p, INT m, R *res) +{ + UNUSED(p); UNUSED(m); + res[0] = 0; + res[1] = 0; +} + +static void cexpl_zero(triggen *p, INT m, trigreal *res) +{ + UNUSED(p); UNUSED(m); + res[0] = 0; + res[1] = 0; +} + +static void cexp_generic(triggen *p, INT m, R *res) +{ + trigreal resl[2]; + p->cexpl(p, m, resl); + res[0] = (R)resl[0]; + res[1] = (R)resl[1]; +} + +static void rotate_generic(triggen *p, INT m, R xr, R xi, R *res) +{ + trigreal w[2]; + p->cexpl(p, m, w); + res[0] = xr * w[0] - xi * (FFT_SIGN * w[1]); + res[1] = xi * w[0] + xr * (FFT_SIGN * w[1]); +} + +triggen *X(mktriggen)(enum wakefulness wakefulness, INT n) +{ + INT i, n0, n1; + triggen *p = (triggen *)MALLOC(sizeof(*p), TWIDDLES); + + p->n = n; + p->W0 = p->W1 = 0; + p->cexp = 0; + p->rotate = 0; + + switch (wakefulness) { + case SLEEPY: + A(0 /* can't happen */); + break; + + case AWAKE_SQRTN_TABLE: { + INT twshft = choose_twshft(n); + + p->twshft = twshft; + p->twradix = ((INT)1) << twshft; + p->twmsk = p->twradix - 1; + + n0 = p->twradix; + n1 = (n + n0 - 1) / n0; + + p->W0 = (trigreal *)MALLOC(n0 * 2 * sizeof(trigreal), TWIDDLES); + p->W1 = (trigreal *)MALLOC(n1 * 2 * sizeof(trigreal), TWIDDLES); + + for (i = 0; i < n0; ++i) + real_cexp(i, n, p->W0 + 2 * i); + + for (i = 0; i < n1; ++i) + real_cexp(i * p->twradix, n, p->W1 + 2 * i); + + p->cexpl = cexpl_sqrtn_table; + p->rotate = rotate_sqrtn_table; + break; + } + + case AWAKE_SINCOS: + p->cexpl = cexpl_sincos; + break; + + case AWAKE_ZERO: + p->cexp = cexp_zero; + p->cexpl = cexpl_zero; + break; + } + + if (!p->cexp) { + if (sizeof(trigreal) == sizeof(R)) + p->cexp = (void (*)(triggen *, INT, R *))p->cexpl; + else + p->cexp = cexp_generic; + } + if (!p->rotate) + p->rotate = rotate_generic; + return p; +} + +void X(triggen_destroy)(triggen *p) +{ + X(ifree0)(p->W0); + X(ifree0)(p->W1); + X(ifree)(p); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/kernel/twiddle.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/kernel/twiddle.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* Twiddle manipulation */ + +#include "ifftw.h" +#include + +#define HASHSZ 109 + +/* hash table of known twiddle factors */ +static twid *twlist[HASHSZ]; + +static INT hash(INT n, INT r) +{ + INT h = n * 17 + r; + + if (h < 0) h = -h; + + return (h % HASHSZ); +} + +static int equal_instr(const tw_instr *p, const tw_instr *q) +{ + if (p == q) + return 1; + + for (;; ++p, ++q) { + if (p->op != q->op) + return 0; + + switch (p->op) { + case TW_NEXT: + return (p->v == q->v); /* p->i is ignored */ + + case TW_FULL: + case TW_HALF: + if (p->v != q->v) return 0; /* p->i is ignored */ + break; + + default: + if (p->v != q->v || p->i != q->i) return 0; + break; + } + } + A(0 /* can't happen */); +} + +static int ok_twid(const twid *t, + enum wakefulness wakefulness, + const tw_instr *q, INT n, INT r, INT m) +{ + return (wakefulness == t->wakefulness && + n == t->n && + r == t->r && + m <= t->m && + equal_instr(t->instr, q)); +} + +static twid *lookup(enum wakefulness wakefulness, + const tw_instr *q, INT n, INT r, INT m) +{ + twid *p; + + for (p = twlist[hash(n,r)]; + p && !ok_twid(p, wakefulness, q, n, r, m); + p = p->cdr) + ; + return p; +} + +static INT twlen0(INT r, const tw_instr *p, INT *vl) +{ + INT ntwiddle = 0; + + /* compute length of bytecode program */ + A(r > 0); + for ( ; p->op != TW_NEXT; ++p) { + switch (p->op) { + case TW_FULL: + ntwiddle += (r - 1) * 2; + break; + case TW_HALF: + ntwiddle += (r - 1); + break; + case TW_CEXP: + ntwiddle += 2; + break; + case TW_COS: + case TW_SIN: + ntwiddle += 1; + break; + } + } + + *vl = (INT)p->v; + return ntwiddle; +} + +INT X(twiddle_length)(INT r, const tw_instr *p) +{ + INT vl; + return twlen0(r, p, &vl); +} + +static R *compute(enum wakefulness wakefulness, + const tw_instr *instr, INT n, INT r, INT m) +{ + INT ntwiddle, j, vl; + R *W, *W0; + const tw_instr *p; + triggen *t = X(mktriggen)(wakefulness, n); + + p = instr; + ntwiddle = twlen0(r, p, &vl); + + A(m % vl == 0); + + W0 = W = (R *)MALLOC((ntwiddle * (m / vl)) * sizeof(R), TWIDDLES); + + for (j = 0; j < m; j += vl) { + for (p = instr; p->op != TW_NEXT; ++p) { + switch (p->op) { + case TW_FULL: { + INT i; + for (i = 1; i < r; ++i) { + A((j + (INT)p->v) * i < n); + A((j + (INT)p->v) * i > -n); + t->cexp(t, (j + (INT)p->v) * i, W); + W += 2; + } + break; + } + + case TW_HALF: { + INT i; + A((r % 2) == 1); + for (i = 1; i + i < r; ++i) { + t->cexp(t, MULMOD(i, (j + (INT)p->v), n), W); + W += 2; + } + break; + } + + case TW_COS: { + R d[2]; + + A((j + (INT)p->v) * p->i < n); + A((j + (INT)p->v) * p->i > -n); + t->cexp(t, (j + (INT)p->v) * (INT)p->i, d); + *W++ = d[0]; + break; + } + + case TW_SIN: { + R d[2]; + + A((j + (INT)p->v) * p->i < n); + A((j + (INT)p->v) * p->i > -n); + t->cexp(t, (j + (INT)p->v) * (INT)p->i, d); + *W++ = d[1]; + break; + } + + case TW_CEXP: + A((j + (INT)p->v) * p->i < n); + A((j + (INT)p->v) * p->i > -n); + t->cexp(t, (j + (INT)p->v) * (INT)p->i, W); + W += 2; + break; + } + } + } + + X(triggen_destroy)(t); + return W0; +} + +static void mktwiddle(enum wakefulness wakefulness, + twid **pp, const tw_instr *instr, INT n, INT r, INT m) +{ + twid *p; + INT h; + + if ((p = lookup(wakefulness, instr, n, r, m))) { + ++p->refcnt; + } else { + p = (twid *) MALLOC(sizeof(twid), TWIDDLES); + p->n = n; + p->r = r; + p->m = m; + p->instr = instr; + p->refcnt = 1; + p->wakefulness = wakefulness; + p->W = compute(wakefulness, instr, n, r, m); + + /* cons! onto twlist */ + h = hash(n, r); + p->cdr = twlist[h]; + twlist[h] = p; + } + + *pp = p; +} + +static void twiddle_destroy(twid **pp) +{ + twid *p = *pp; + twid **q; + + if ((--p->refcnt) == 0) { + /* remove p from twiddle list */ + for (q = &twlist[hash(p->n, p->r)]; *q; q = &((*q)->cdr)) { + if (*q == p) { + *q = p->cdr; + X(ifree)(p->W); + X(ifree)(p); + *pp = 0; + return; + } + } + A(0 /* can't happen */ ); + } +} + + +void X(twiddle_awake)(enum wakefulness wakefulness, twid **pp, + const tw_instr *instr, INT n, INT r, INT m) +{ + switch (wakefulness) { + case SLEEPY: + twiddle_destroy(pp); + break; + default: + mktwiddle(wakefulness, pp, instr, n, r, m); + break; + } +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,17 @@ +noinst_LIBRARIES=libbench2.a + +libbench2_a_SOURCES=after-ccopy-from.c after-ccopy-to.c \ +after-hccopy-from.c after-hccopy-to.c after-rcopy-from.c \ +after-rcopy-to.c allocate.c aset.c bench-cost-postprocess.c \ +bench-exit.c bench-main.c can-do.c caset.c dotens2.c info.c main.c \ +mflops.c mp.c ovtpvt.c pow2.c problem.c report.c speed.c tensor.c \ +timer.c useropt.c util.c verify-dft.c verify-lib.c verify-r2r.c \ +verify-rdft2.c verify.c zero.c bench-user.h bench.h verify.h \ +my-getopt.c my-getopt.h + +benchmark: all + @echo "nothing to benchmark" + +accuracy: all + @echo "nothing to benchmark" + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,666 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = libbench2 +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LIBRARIES = $(noinst_LIBRARIES) +ARFLAGS = cru +AM_V_AR = $(am__v_AR_@AM_V@) +am__v_AR_ = $(am__v_AR_@AM_DEFAULT_V@) +am__v_AR_0 = @echo " AR " $@; +am__v_AR_1 = +libbench2_a_AR = $(AR) $(ARFLAGS) +libbench2_a_LIBADD = +am_libbench2_a_OBJECTS = after-ccopy-from.$(OBJEXT) \ + after-ccopy-to.$(OBJEXT) after-hccopy-from.$(OBJEXT) \ + after-hccopy-to.$(OBJEXT) after-rcopy-from.$(OBJEXT) \ + after-rcopy-to.$(OBJEXT) allocate.$(OBJEXT) aset.$(OBJEXT) \ + bench-cost-postprocess.$(OBJEXT) bench-exit.$(OBJEXT) \ + bench-main.$(OBJEXT) can-do.$(OBJEXT) caset.$(OBJEXT) \ + dotens2.$(OBJEXT) info.$(OBJEXT) main.$(OBJEXT) \ + mflops.$(OBJEXT) mp.$(OBJEXT) ovtpvt.$(OBJEXT) pow2.$(OBJEXT) \ + problem.$(OBJEXT) report.$(OBJEXT) speed.$(OBJEXT) \ + tensor.$(OBJEXT) timer.$(OBJEXT) useropt.$(OBJEXT) \ + util.$(OBJEXT) verify-dft.$(OBJEXT) verify-lib.$(OBJEXT) \ + verify-r2r.$(OBJEXT) verify-rdft2.$(OBJEXT) verify.$(OBJEXT) \ + zero.$(OBJEXT) my-getopt.$(OBJEXT) +libbench2_a_OBJECTS = $(am_libbench2_a_OBJECTS) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libbench2_a_SOURCES) +DIST_SOURCES = $(libbench2_a_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +noinst_LIBRARIES = libbench2.a +libbench2_a_SOURCES = after-ccopy-from.c after-ccopy-to.c \ +after-hccopy-from.c after-hccopy-to.c after-rcopy-from.c \ +after-rcopy-to.c allocate.c aset.c bench-cost-postprocess.c \ +bench-exit.c bench-main.c can-do.c caset.c dotens2.c info.c main.c \ +mflops.c mp.c ovtpvt.c pow2.c problem.c report.c speed.c tensor.c \ +timer.c useropt.c util.c verify-dft.c verify-lib.c verify-r2r.c \ +verify-rdft2.c verify.c zero.c bench-user.h bench.h verify.h \ +my-getopt.c my-getopt.h + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu libbench2/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu libbench2/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLIBRARIES: + -test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES) + +libbench2.a: $(libbench2_a_OBJECTS) $(libbench2_a_DEPENDENCIES) $(EXTRA_libbench2_a_DEPENDENCIES) + $(AM_V_at)-rm -f libbench2.a + $(AM_V_AR)$(libbench2_a_AR) libbench2.a $(libbench2_a_OBJECTS) $(libbench2_a_LIBADD) + $(AM_V_at)$(RANLIB) libbench2.a + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/after-ccopy-from.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/after-ccopy-to.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/after-hccopy-from.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/after-hccopy-to.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/after-rcopy-from.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/after-rcopy-to.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/allocate.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aset.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bench-cost-postprocess.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bench-exit.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bench-main.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/can-do.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/caset.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dotens2.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/info.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mflops.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mp.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/my-getopt.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ovtpvt.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow2.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/problem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/report.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/speed.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/timer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/useropt.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/util.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/verify-dft.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/verify-lib.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/verify-r2r.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/verify-rdft2.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/verify.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/zero.Po@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LIBRARIES) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + + +benchmark: all + @echo "nothing to benchmark" + +accuracy: all + @echo "nothing to benchmark" + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/after-ccopy-from.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/after-ccopy-from.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,10 @@ +/* not worth copyrighting */ +#include "bench.h" + +/* default routine, can be overridden by user */ +void after_problem_ccopy_from(bench_problem *p, bench_real *ri, bench_real *ii) +{ + UNUSED(p); + UNUSED(ri); + UNUSED(ii); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/after-ccopy-to.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/after-ccopy-to.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,10 @@ +/* not worth copyrighting */ +#include "bench.h" + +/* default routine, can be overridden by user */ +void after_problem_ccopy_to(bench_problem *p, bench_real *ro, bench_real *io) +{ + UNUSED(p); + UNUSED(ro); + UNUSED(io); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/after-hccopy-from.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/after-hccopy-from.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,10 @@ +/* not worth copyrighting */ +#include "bench.h" + +/* default routine, can be overridden by user */ +void after_problem_hccopy_from(bench_problem *p, bench_real *ri, bench_real *ii) +{ + UNUSED(p); + UNUSED(ri); + UNUSED(ii); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/after-hccopy-to.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/after-hccopy-to.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,10 @@ +/* not worth copyrighting */ +#include "bench.h" + +/* default routine, can be overridden by user */ +void after_problem_hccopy_to(bench_problem *p, bench_real *ro, bench_real *io) +{ + UNUSED(p); + UNUSED(ro); + UNUSED(io); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/after-rcopy-from.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/after-rcopy-from.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,9 @@ +/* not worth copyrighting */ +#include "bench.h" + +/* default routine, can be overridden by user */ +void after_problem_rcopy_from(bench_problem *p, bench_real *ri) +{ + UNUSED(p); + UNUSED(ri); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/after-rcopy-to.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/after-rcopy-to.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,9 @@ +/* not worth copyrighting */ +#include "bench.h" + +/* default routine, can be overridden by user */ +void after_problem_rcopy_to(bench_problem *p, bench_real *ro) +{ + UNUSED(p); + UNUSED(ro); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/allocate.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/allocate.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,110 @@ +/* not worth copyrighting */ + + +#include "bench.h" + +static void bounds(bench_problem *p, int *ilb, int *iub, int *olb, int *oub) +{ + bench_tensor *t = tensor_append(p->sz, p->vecsz); + tensor_ibounds(t, ilb, iub); + tensor_obounds(t, olb, oub); + tensor_destroy(t); +} + +/* + * Allocate I/O arrays for a problem. + * + * This is the default routine that can be overridden by the user in + * complicated cases. + */ +void problem_alloc(bench_problem *p) +{ + int ilb, iub, olb, oub; + int isz, osz; + + bounds(p, &ilb, &iub, &olb, &oub); + isz = iub - ilb; + osz = oub - olb; + + if (p->kind == PROBLEM_COMPLEX) { + bench_complex *in, *out; + + p->iphyssz = isz; + p->inphys = in = (bench_complex *) bench_malloc(isz * sizeof(bench_complex)); + p->in = in - ilb; + + if (p->in_place) { + p->out = p->in; + p->outphys = p->inphys; + p->ophyssz = p->iphyssz; + } else { + p->ophyssz = osz; + p->outphys = out = (bench_complex *) bench_malloc(osz * sizeof(bench_complex)); + p->out = out - olb; + } + } else if (p->kind == PROBLEM_R2R) { + bench_real *in, *out; + + p->iphyssz = isz; + p->inphys = in = (bench_real *) bench_malloc(isz * sizeof(bench_real)); + p->in = in - ilb; + + if (p->in_place) { + p->out = p->in; + p->outphys = p->inphys; + p->ophyssz = p->iphyssz; + } else { + p->ophyssz = osz; + p->outphys = out = (bench_real *) bench_malloc(osz * sizeof(bench_real)); + p->out = out - olb; + } + } else if (p->kind == PROBLEM_REAL && p->sign < 0) { /* R2HC */ + bench_real *in; + bench_complex *out; + + isz = isz > osz*2 ? isz : osz*2; + p->iphyssz = isz; + p->inphys = in = (bench_real *) bench_malloc(p->iphyssz * sizeof(bench_real)); + p->in = in - ilb; + + if (p->in_place) { + p->out = p->in; + p->outphys = p->inphys; + p->ophyssz = p->iphyssz / 2; + } else { + p->ophyssz = osz; + p->outphys = out = (bench_complex *) bench_malloc(osz * sizeof(bench_complex)); + p->out = out - olb; + } + } else if (p->kind == PROBLEM_REAL && p->sign > 0) { /* HC2R */ + bench_real *out; + bench_complex *in; + + osz = osz > isz*2 ? osz : isz*2; + p->ophyssz = osz; + p->outphys = out = (bench_real *) bench_malloc(p->ophyssz * sizeof(bench_real)); + p->out = out - olb; + + if (p->in_place) { + p->in = p->out; + p->inphys = p->outphys; + p->iphyssz = p->ophyssz / 2; + } else { + p->iphyssz = isz; + p->inphys = in = (bench_complex *) bench_malloc(isz * sizeof(bench_complex)); + p->in = in - ilb; + } + } else { + BENCH_ASSERT(0); /* TODO */ + } +} + +void problem_free(bench_problem *p) +{ + if (p->outphys && p->outphys != p->inphys) + bench_free(p->outphys); + if (p->inphys) + bench_free(p->inphys); + tensor_destroy(p->sz); + tensor_destroy(p->vecsz); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/aset.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/aset.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,10 @@ +/* not worth copyrighting */ + +#include "bench.h" + +void aset(bench_real *A, int n, bench_real x) +{ + int i; + for (i = 0; i < n; ++i) + A[i] = x; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/bench-cost-postprocess.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/bench-cost-postprocess.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,8 @@ +/* not worth copyrighting */ +#include "bench.h" + +/* default routine, can be overridden by user */ +double bench_cost_postprocess(double cost) +{ + return cost; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/bench-exit.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/bench-exit.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,8 @@ +/* not worth copyrighting */ +#include "bench.h" + +/* default routine, can be overridden by user */ +void bench_exit(int status) +{ + exit(status); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/bench-main.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/bench-main.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2001 Matteo Frigo + * Copyright (c) 2001 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "bench.h" +#include "my-getopt.h" +#include +#include + +int verbose; + +static const struct my_option options[] = +{ + {"accuracy", REQARG, 'a'}, + {"accuracy-rounds", REQARG, 405}, + {"impulse-accuracy-rounds", REQARG, 406}, + {"can-do", REQARG, 'd'}, + {"help", NOARG, 'h'}, + {"info", REQARG, 'i'}, + {"info-all", NOARG, 'I'}, + {"print-precision", NOARG, 402}, + {"print-time-min", NOARG, 400}, + {"random-seed", REQARG, 404}, + {"report-benchmark", NOARG, 320}, + {"report-mflops", NOARG, 300}, + {"report-time", NOARG, 310}, + {"report-verbose", NOARG, 330}, + {"speed", REQARG, 's'}, + {"setup-speed", REQARG, 'S'}, + {"time-min", REQARG, 't'}, + {"time-repeat", REQARG, 'r'}, + {"user-option", REQARG, 'o'}, + {"verbose", OPTARG, 'v'}, + {"verify", REQARG, 'y'}, + {"verify-rounds", REQARG, 401}, + {"verify-tolerance", REQARG, 403}, + {0, NOARG, 0} +}; + +int bench_main(int argc, char *argv[]) +{ + double tmin = 0.0; + double tol; + int repeat = 0; + int rounds = 10; + int iarounds = 0; + int arounds = 1; /* this is too low for precise results */ + int c; + + report = report_verbose; /* default */ + verbose = 0; + + tol = SINGLE_PRECISION ? 1.0e-3 : (QUAD_PRECISION ? 1e-29 : 1.0e-10); + + main_init(&argc, &argv); + + bench_srand(1); + + while ((c = my_getopt (argc, argv, options)) != -1) { + switch (c) { + case 't' : + tmin = strtod(my_optarg, 0); + break; + case 'r': + repeat = atoi(my_optarg); + break; + case 's': + timer_init(tmin, repeat); + speed(my_optarg, 0); + break; + case 'S': + timer_init(tmin, repeat); + speed(my_optarg, 1); + break; + case 'd': + report_can_do(my_optarg); + break; + case 'o': + useropt(my_optarg); + break; + case 'v': + if (verbose >= 0) { /* verbose < 0 disables output */ + if (my_optarg) + verbose = atoi(my_optarg); + else + ++verbose; + } + break; + case 'y': + verify(my_optarg, rounds, tol); + break; + case 'a': + accuracy(my_optarg, arounds, iarounds); + break; + case 'i': + report_info(my_optarg); + break; + case 'I': + report_info_all(); + break; + case 'h': + if (verbose >= 0) my_usage(argv[0], options); + break; + + case 300: /* --report-mflops */ + report = report_mflops; + break; + + case 310: /* --report-time */ + report = report_time; + break; + + case 320: /* --report-benchmark */ + report = report_benchmark; + break; + + case 330: /* --report-verbose */ + report = report_verbose; + break; + + case 400: /* --print-time-min */ + timer_init(tmin, repeat); + ovtpvt("%g\n", time_min); + break; + + case 401: /* --verify-rounds */ + rounds = atoi(my_optarg); + break; + + case 402: /* --print-precision */ + if (SINGLE_PRECISION) + ovtpvt("single\n"); + else if (QUAD_PRECISION) + ovtpvt("quad\n"); + else if (LDOUBLE_PRECISION) + ovtpvt("long-double\n"); + else if (DOUBLE_PRECISION) + ovtpvt("double\n"); + else + ovtpvt("unknown %d\n", sizeof(bench_real)); + break; + + case 403: /* --verify-tolerance */ + tol = strtod(my_optarg, 0); + break; + + case 404: /* --random-seed */ + bench_srand(atoi(my_optarg)); + break; + + case 405: /* --accuracy-rounds */ + arounds = atoi(my_optarg); + break; + + case 406: /* --impulse-accuracy-rounds */ + iarounds = atoi(my_optarg); + break; + + case '?': + /* my_getopt() already printed an error message. */ + cleanup(); + return 1; + + default: + abort (); + } + } + + /* assume that any remaining arguments are problems to be + benchmarked */ + while (my_optind < argc) { + timer_init(tmin, repeat); + speed(argv[my_optind++], 0); + } + + cleanup(); + return 0; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/bench-user.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/bench-user.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2001 Matteo Frigo + * Copyright (c) 2001 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __BENCH_USER_H__ +#define __BENCH_USER_H__ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* benchmark program definitions for user code */ +#include "config.h" + +#if HAVE_STDDEF_H +#include +#endif + +#if HAVE_STDLIB_H +#include +#endif + +#if defined(BENCHFFT_SINGLE) +typedef float bench_real; +#elif defined(BENCHFFT_LDOUBLE) +typedef long double bench_real; +#elif defined(BENCHFFT_QUAD) +typedef __float128 bench_real; +#else +typedef double bench_real; +#endif + +typedef bench_real bench_complex[2]; + +#define c_re(c) ((c)[0]) +#define c_im(c) ((c)[1]) + +#undef DOUBLE_PRECISION +#define DOUBLE_PRECISION (sizeof(bench_real) == sizeof(double)) +#undef SINGLE_PRECISION +#define SINGLE_PRECISION (!DOUBLE_PRECISION && sizeof(bench_real) == sizeof(float)) +#undef LDOUBLE_PRECISION +#define LDOUBLE_PRECISION (!DOUBLE_PRECISION && sizeof(bench_real) == sizeof(long double)) + +#undef QUAD_PRECISION +#ifdef BENCHFFT_QUAD +#define QUAD_PRECISION (!LDOUBLE_PRECISION && sizeof(bench_real) == sizeof(__float128)) +#else +#define QUAD_PRECISION 0 +#endif + +typedef enum { PROBLEM_COMPLEX, PROBLEM_REAL, PROBLEM_R2R } problem_kind_t; + +typedef enum { + R2R_R2HC, R2R_HC2R, R2R_DHT, + R2R_REDFT00, R2R_REDFT01, R2R_REDFT10, R2R_REDFT11, + R2R_RODFT00, R2R_RODFT01, R2R_RODFT10, R2R_RODFT11 +} r2r_kind_t; + +typedef struct { + int n; + int is; /* input stride */ + int os; /* output stride */ +} bench_iodim; + +typedef struct { + int rnk; + bench_iodim *dims; +} bench_tensor; + +bench_tensor *mktensor(int rnk); +void tensor_destroy(bench_tensor *sz); +int tensor_sz(const bench_tensor *sz); +bench_tensor *tensor_compress(const bench_tensor *sz); +int tensor_unitstridep(bench_tensor *t); +int tensor_rowmajorp(bench_tensor *t); +int tensor_real_rowmajorp(bench_tensor *t, int sign, int in_place); +bench_tensor *tensor_append(const bench_tensor *a, const bench_tensor *b); +bench_tensor *tensor_copy(const bench_tensor *sz); +bench_tensor *tensor_copy_sub(const bench_tensor *sz, int start_dim, int rnk); +bench_tensor *tensor_copy_swapio(const bench_tensor *sz); +void tensor_ibounds(bench_tensor *t, int *lbp, int *ubp); +void tensor_obounds(bench_tensor *t, int *lbp, int *ubp); + +/* + Definition of rank -infinity. + This definition has the property that if you want rank 0 or 1, + you can simply test for rank <= 1. This is a common case. + + A tensor of rank -infinity has size 0. +*/ +#define RNK_MINFTY ((int)(((unsigned) -1) >> 1)) +#define FINITE_RNK(rnk) ((rnk) != RNK_MINFTY) + +typedef struct { + problem_kind_t kind; + r2r_kind_t *k; + bench_tensor *sz; + bench_tensor *vecsz; + int sign; + int in_place; + int destroy_input; + int split; + void *in, *out; + void *inphys, *outphys; + int iphyssz, ophyssz; + char *pstring; + void *userinfo; /* user can store whatever */ + int scrambled_in, scrambled_out; /* hack for MPI */ + + /* internal hack so that we can use verifier in FFTW test program */ + void *ini, *outi; /* if nonzero, point to imag. parts for dft */ + + /* another internal hack to avoid passing around too many parameters */ + double setup_time; +} bench_problem; + +extern int verbose; + +extern int no_speed_allocation; + +extern int always_pad_real; + +#define LIBBENCH_TIMER 0 +#define USER_TIMER 1 +#define BENCH_NTIMERS 2 +extern void timer_start(int which_timer); +extern double timer_stop(int which_timer); + +extern int can_do(bench_problem *p); +extern void setup(bench_problem *p); +extern void doit(int iter, bench_problem *p); +extern void done(bench_problem *p); +extern void main_init(int *argc, char ***argv); +extern void cleanup(void); +extern void verify(const char *param, int rounds, double tol); +extern void useropt(const char *arg); + +extern void verify_problem(bench_problem *p, int rounds, double tol); + +extern void problem_alloc(bench_problem *p); +extern void problem_free(bench_problem *p); +extern void problem_zero(bench_problem *p); +extern void problem_destroy(bench_problem *p); + +extern int power_of_two(int n); +extern int log_2(int n); + + +#define CASSIGN(out, in) (c_re(out) = c_re(in), c_im(out) = c_im(in)) + +bench_tensor *verify_pack(const bench_tensor *sz, int s); + +typedef struct { + double l; + double i; + double s; +} errors; + +void verify_dft(bench_problem *p, int rounds, double tol, errors *e); +void verify_rdft2(bench_problem *p, int rounds, double tol, errors *e); +void verify_r2r(bench_problem *p, int rounds, double tol, errors *e); + +/**************************************************************/ +/* routines to override */ + +extern void after_problem_ccopy_from(bench_problem *p, bench_real *ri, bench_real *ii); +extern void after_problem_ccopy_to(bench_problem *p, bench_real *ro, bench_real *io); +extern void after_problem_hccopy_from(bench_problem *p, bench_real *ri, bench_real *ii); +extern void after_problem_hccopy_to(bench_problem *p, bench_real *ro, bench_real *io); +extern void after_problem_rcopy_from(bench_problem *p, bench_real *ri); +extern void after_problem_rcopy_to(bench_problem *p, bench_real *ro); +extern void bench_exit(int status); +extern double bench_cost_postprocess(double cost); + +/************************************************************** + * malloc + **************************************************************/ +extern void *bench_malloc(size_t size); +extern void bench_free(void *ptr); +extern void bench_free0(void *ptr); + +/************************************************************** + * alloca + **************************************************************/ +#ifdef HAVE_ALLOCA_H +#include +#endif + +/************************************************************** + * assert + **************************************************************/ +extern void bench_assertion_failed(const char *s, int line, const char *file); +#define BENCH_ASSERT(ex) \ + (void)((ex) || (bench_assertion_failed(#ex, __LINE__, __FILE__), 0)) + +#define UNUSED(x) (void)x + +/*************************************** + * Documentation strings + ***************************************/ +struct bench_doc { + const char *key; + const char *val; + const char *(*f)(void); +}; + +extern struct bench_doc bench_doc[]; + +#ifdef CC +#define CC_DOC BENCH_DOC("cc", CC) +#elif defined(BENCH_CC) +#define CC_DOC BENCH_DOC("cc", BENCH_CC) +#else +#define CC_DOC /* none */ +#endif + +#ifdef CXX +#define CXX_DOC BENCH_DOC("cxx", CXX) +#elif defined(BENCH_CXX) +#define CXX_DOC BENCH_DOC("cxx", BENCH_CXX) +#else +#define CXX_DOC /* none */ +#endif + +#ifdef F77 +#define F77_DOC BENCH_DOC("f77", F77) +#elif defined(BENCH_F77) +#define F77_DOC BENCH_DOC("f77", BENCH_F77) +#else +#define F77_DOC /* none */ +#endif + +#ifdef F90 +#define F90_DOC BENCH_DOC("f90", F90) +#elif defined(BENCH_F90) +#define F90_DOC BENCH_DOC("f90", BENCH_F90) +#else +#define F90_DOC /* none */ +#endif + +#define BEGIN_BENCH_DOC \ +struct bench_doc bench_doc[] = { \ + CC_DOC \ + CXX_DOC \ + F77_DOC \ + F90_DOC + +#define BENCH_DOC(key, val) { key, val, 0 }, +#define BENCH_DOCF(key, f) { key, 0, f }, + +#define END_BENCH_DOC \ + {0, 0, 0}}; + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* __BENCH_USER_H__ */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/bench.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/bench.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2001 Matteo Frigo + * Copyright (c) 2001 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* benchmark program definitions */ +#include "bench-user.h" + +extern double time_min; +extern int time_repeat; + +extern void timer_init(double tmin, int repeat); + +/* report functions */ +extern void (*report)(const bench_problem *p, double *t, int st); + +void report_mflops(const bench_problem *p, double *t, int st); +void report_time(const bench_problem *p, double *t, int st); +void report_benchmark(const bench_problem *p, double *t, int st); +void report_verbose(const bench_problem *p, double *t, int st); + +void report_can_do(const char *param); +void report_info(const char *param); +void report_info_all(void); + +extern int aligned_main(int argc, char *argv[]); +extern int bench_main(int argc, char *argv[]); + +extern void speed(const char *param, int setup_only); +extern void accuracy(const char *param, int rounds, int impulse_rounds); + +extern double mflops(const bench_problem *p, double t); + +extern double bench_drand(void); +extern void bench_srand(int seed); + +extern bench_problem *problem_parse(const char *desc); + +extern void ovtpvt(const char *format, ...); +extern void ovtpvt_err(const char *format, ...); + +extern void fftaccuracy(int n, bench_complex *a, bench_complex *ffta, + int sign, double err[6]); +extern void fftaccuracy_done(void); + +extern void caset(bench_complex *A, int n, bench_complex x); +extern void aset(bench_real *A, int n, bench_real x); diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/can-do.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/can-do.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2001 Matteo Frigo + * Copyright (c) 2001 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "bench.h" +#include + +void report_can_do(const char *param) +{ + bench_problem *p; + p = problem_parse(param); + ovtpvt("#%c\n", can_do(p) ? 't' : 'f'); + problem_destroy(p); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/caset.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/caset.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,12 @@ +/* not worth copyrighting */ + +#include "bench.h" + +void caset(bench_complex *A, int n, bench_complex x) +{ + int i; + for (i = 0; i < n; ++i) { + c_re(A[i]) = c_re(x); + c_im(A[i]) = c_im(x); + } +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/dotens2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/dotens2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "verify.h" + +static void recur(int rnk, const bench_iodim *dims0, const bench_iodim *dims1, + dotens2_closure *k, + int indx0, int ondx0, int indx1, int ondx1) +{ + if (rnk == 0) + k->apply(k, indx0, ondx0, indx1, ondx1); + else { + int i, n = dims0[0].n; + int is0 = dims0[0].is; + int os0 = dims0[0].os; + int is1 = dims1[0].is; + int os1 = dims1[0].os; + + BENCH_ASSERT(n == dims1[0].n); + + for (i = 0; i < n; ++i) { + recur(rnk - 1, dims0 + 1, dims1 + 1, k, + indx0, ondx0, indx1, ondx1); + indx0 += is0; ondx0 += os0; + indx1 += is1; ondx1 += os1; + } + } +} + +void bench_dotens2(const bench_tensor *sz0, const bench_tensor *sz1, dotens2_closure *k) +{ + BENCH_ASSERT(sz0->rnk == sz1->rnk); + if (sz0->rnk == RNK_MINFTY) + return; + recur(sz0->rnk, sz0->dims, sz1->dims, k, 0, 0, 0, 0); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/info.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/info.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2001 Matteo Frigo + * Copyright (c) 2001 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "bench.h" +#include +#include + +void report_info(const char *param) +{ + struct bench_doc *p; + + for (p = bench_doc; p->key; ++p) { + if (!strcmp(param, p->key)) { + if (!p->val) + p->val = p->f(); + + ovtpvt("%s\n", p->val); + } + } +} + +void report_info_all(void) +{ + struct bench_doc *p; + + /* + * TODO: escape quotes? The format is not unambigously + * parseable if the info string contains double quotes. + */ + for (p = bench_doc; p->key; ++p) { + if (!p->val) + p->val = p->f(); + ovtpvt("(%s \"%s\")\n", p->key, p->val); + } + ovtpvt("(benchmark-precision \"%s\")\n", + SINGLE_PRECISION ? "single" : + (LDOUBLE_PRECISION ? "long-double" : + (QUAD_PRECISION ? "quad" : "double"))); +} + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/main.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/main.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2001 Matteo Frigo + * Copyright (c) 2001 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "bench.h" + +/* On some systems, we are required to define a dummy main-like + routine (called "MAIN__" or something similar in order to link a C + main() with the Fortran libraries). This is detected by autoconf; + see the autoconf 2.52 or later manual. */ +#ifdef F77_DUMMY_MAIN +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } +#endif + +/* in a separate file so that the user can override it */ +int main(int argc, char *argv[]) +{ + return bench_main(argc, argv); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/mflops.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/mflops.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,32 @@ +/* not worth copyrighting */ + +#include "bench.h" +#include + +double mflops(const bench_problem *p, double t) +{ + int size = tensor_sz(p->sz); + int vsize = tensor_sz(p->vecsz); + + if (size <= 1) /* a copy: just return reals copied / time */ + switch (p->kind) { + case PROBLEM_COMPLEX: + return (2.0 * size * vsize / (t * 1.0e6)); + case PROBLEM_REAL: + case PROBLEM_R2R: + return (1.0 * size * vsize / (t * 1.0e6)); + } + + switch (p->kind) { + case PROBLEM_COMPLEX: + return (5.0 * size * vsize * log((double)size) / + (log(2.0) * t * 1.0e6)); + case PROBLEM_REAL: + case PROBLEM_R2R: + return (2.5 * vsize * size * log((double) size) / + (log(2.0) * t * 1.0e6)); + } + BENCH_ASSERT(0 /* can't happen */); + return 0.0; +} + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/mp.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/mp.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,641 @@ +#include "config.h" +#include "bench.h" +#include + +#define DG unsigned short +#define ACC unsigned long +#define REAL bench_real +#define BITS_IN_REAL 53 /* mantissa */ + +#define SHFT 16 +#define RADIX 65536L +#define IRADIX (1.0 / RADIX) +#define LO(x) ((x) & (RADIX - 1)) +#define HI(x) ((x) >> SHFT) +#define HI_SIGNED(x) \ + ((((x) + (ACC)(RADIX >> 1) * RADIX) >> SHFT) - (RADIX >> 1)) +#define ZEROEXP (-32768) + +#define LEN 10 + +typedef struct { + short sign; + short expt; + DG d[LEN]; +} N[1]; + +#define EXA a->expt +#define EXB b->expt +#define EXC c->expt + +#define AD a->d +#define BD b->d + +#define SGNA a->sign +#define SGNB b->sign + +static const N zero = {{ 1, ZEROEXP, {0} }}; + +static void cpy(const N a, N b) +{ + *b = *a; +} + +static void fromreal(REAL x, N a) +{ + int i, e; + + cpy(zero, a); + if (x == 0.0) return; + + if (x >= 0) { SGNA = 1; } + else { SGNA = -1; x = -x; } + + e = 0; + while (x >= 1.0) { x *= IRADIX; ++e; } + while (x < IRADIX) { x *= RADIX; --e; } + EXA = e; + + for (i = LEN - 1; i >= 0 && x != 0.0; --i) { + REAL y; + + x *= RADIX; + y = (REAL) ((int) x); + AD[i] = (DG)y; + x -= y; + } +} + +static void fromshort(int x, N a) +{ + cpy(zero, a); + + if (x < 0) { x = -x; SGNA = -1; } + else { SGNA = 1; } + EXA = 1; + AD[LEN - 1] = x; +} + +static void pack(DG *d, int e, int s, int l, N a) +{ + int i, j; + + for (i = l - 1; i >= 0; --i, --e) + if (d[i] != 0) + break; + + if (i < 0) { + /* number is zero */ + cpy(zero, a); + } else { + EXA = e; + SGNA = s; + + if (i >= LEN - 1) { + for (j = LEN - 1; j >= 0; --i, --j) + AD[j] = d[i]; + } else { + for (j = LEN - 1; i >= 0; --i, --j) + AD[j] = d[i]; + for ( ; j >= 0; --j) + AD[j] = 0; + } + } +} + + +/* compare absolute values */ +static int abscmp(const N a, const N b) +{ + int i; + if (EXA > EXB) return 1; + if (EXA < EXB) return -1; + for (i = LEN - 1; i >= 0; --i) { + if (AD[i] > BD[i]) + return 1; + if (AD[i] < BD[i]) + return -1; + } + return 0; +} + +static int eq(const N a, const N b) +{ + return (SGNA == SGNB) && (abscmp(a, b) == 0); +} + +/* add magnitudes, for |a| >= |b| */ +static void addmag0(int s, const N a, const N b, N c) +{ + int ia, ib; + ACC r = 0; + DG d[LEN + 1]; + + for (ia = 0, ib = EXA - EXB; ib < LEN; ++ia, ++ib) { + r += (ACC)AD[ia] + (ACC)BD[ib]; + d[ia] = LO(r); + r = HI(r); + } + for (; ia < LEN; ++ia) { + r += (ACC)AD[ia]; + d[ia] = LO(r); + r = HI(r); + } + d[ia] = LO(r); + pack(d, EXA + 1, s * SGNA, LEN + 1, c); +} + +static void addmag(int s, const N a, const N b, N c) +{ + if (abscmp(a, b) > 0) addmag0(1, a, b, c); else addmag0(s, b, a, c); +} + +/* subtract magnitudes, for |a| >= |b| */ +static void submag0(int s, const N a, const N b, N c) +{ + int ia, ib; + ACC r = 0; + DG d[LEN]; + + for (ia = 0, ib = EXA - EXB; ib < LEN; ++ia, ++ib) { + r += (ACC)AD[ia] - (ACC)BD[ib]; + d[ia] = LO(r); + r = HI_SIGNED(r); + } + for (; ia < LEN; ++ia) { + r += (ACC)AD[ia]; + d[ia] = LO(r); + r = HI_SIGNED(r); + } + + pack(d, EXA, s * SGNA, LEN, c); +} + +static void submag(int s, const N a, const N b, N c) +{ + if (abscmp(a, b) > 0) submag0(1, a, b, c); else submag0(s, b, a, c); +} + +/* c = a + b */ +static void add(const N a, const N b, N c) +{ + if (SGNA == SGNB) addmag(1, a, b, c); else submag(1, a, b, c); +} + +static void sub(const N a, const N b, N c) +{ + if (SGNA == SGNB) submag(-1, a, b, c); else addmag(-1, a, b, c); +} + +static void mul(const N a, const N b, N c) +{ + DG d[2 * LEN]; + int i, j, k; + ACC r; + + for (i = 0; i < LEN; ++i) + d[2 * i] = d[2 * i + 1] = 0; + + for (i = 0; i < LEN; ++i) { + ACC ai = AD[i]; + if (ai) { + r = 0; + for (j = 0, k = i; j < LEN; ++j, ++k) { + r += ai * (ACC)BD[j] + (ACC)d[k]; + d[k] = LO(r); + r = HI(r); + } + d[k] = LO(r); + } + } + + pack(d, EXA + EXB, SGNA * SGNB, 2 * LEN, c); +} + +static REAL toreal(const N a) +{ + REAL h, l, f; + int i, bits; + ACC r; + DG sticky; + + if (EXA != ZEROEXP) { + f = IRADIX; + i = LEN; + + bits = 0; + h = (r = AD[--i]) * f; f *= IRADIX; + for (bits = 0; r > 0; ++bits) + r >>= 1; + + /* first digit */ + while (bits + SHFT <= BITS_IN_REAL) { + h += AD[--i] * f; f *= IRADIX; bits += SHFT; + } + + /* guard digit (leave one bit for sticky bit, hence `<' instead + of `<=') */ + bits = 0; l = 0.0; + while (bits + SHFT < BITS_IN_REAL) { + l += AD[--i] * f; f *= IRADIX; bits += SHFT; + } + + /* sticky bit */ + sticky = 0; + while (i > 0) + sticky |= AD[--i]; + + if (sticky) + l += (RADIX / 2) * f; + + h += l; + + for (i = 0; i < EXA; ++i) h *= (REAL)RADIX; + for (i = 0; i > EXA; --i) h *= IRADIX; + if (SGNA == -1) h = -h; + return h; + } else { + return 0.0; + } +} + +static void neg(N a) +{ + SGNA = -SGNA; +} + +static void inv(const N a, N x) +{ + N w, z, one, two; + + fromreal(1.0 / toreal(a), x); /* initial guess */ + fromshort(1, one); + fromshort(2, two); + + for (;;) { + /* Newton */ + mul(a, x, w); + sub(two, w, z); + if (eq(one, z)) break; + mul(x, z, x); + } +} + + +/* 2 pi */ +static const N n2pi = {{ + 1, 1, + {18450, 59017, 1760, 5212, 9779, 4518, 2886, 54545, 18558, 6} +}}; + +/* 1 / 31! */ +static const N i31fac = {{ + 1, -7, + {28087, 45433, 51357, 24545, 14291, 3954, 57879, 8109, 38716, 41382} +}}; + + +/* 1 / 32! */ +static const N i32fac = {{ + 1, -7, + {52078, 60811, 3652, 39679, 37310, 47227, 28432, 57597, 13497, 1293} +}}; + +static void msin(const N a, N b) +{ + N a2, g, k; + int i; + + cpy(i31fac, g); + cpy(g, b); + mul(a, a, a2); + + /* Taylor */ + for (i = 31; i > 1; i -= 2) { + fromshort(i * (i - 1), k); + mul(k, g, g); + mul(a2, b, k); + sub(g, k, b); + } + mul(a, b, b); +} + +static void mcos(const N a, N b) +{ + N a2, g, k; + int i; + + cpy(i32fac, g); + cpy(g, b); + mul(a, a, a2); + + /* Taylor */ + for (i = 32; i > 0; i -= 2) { + fromshort(i * (i - 1), k); + mul(k, g, g); + mul(a2, b, k); + sub(g, k, b); + } +} + +static void by2pi(REAL m, REAL n, N a) +{ + N b; + + fromreal(n, b); + inv(b, a); + fromreal(m, b); + mul(a, b, a); + mul(n2pi, a, a); +} + +static void sin2pi(REAL m, REAL n, N a); +static void cos2pi(REAL m, REAL n, N a) +{ + N b; + if (m < 0) cos2pi(-m, n, a); + else if (m > n * 0.5) cos2pi(n - m, n, a); + else if (m > n * 0.25) {sin2pi(m - n * 0.25, n, a); neg(a);} + else if (m > n * 0.125) sin2pi(n * 0.25 - m, n, a); + else { by2pi(m, n, b); mcos(b, a); } +} + +static void sin2pi(REAL m, REAL n, N a) +{ + N b; + if (m < 0) {sin2pi(-m, n, a); neg(a);} + else if (m > n * 0.5) {sin2pi(n - m, n, a); neg(a);} + else if (m > n * 0.25) {cos2pi(m - n * 0.25, n, a);} + else if (m > n * 0.125) {cos2pi(n * 0.25 - m, n, a);} + else {by2pi(m, n, b); msin(b, a);} +} + +/*----------------------------------------------------------------------*/ +/* FFT stuff */ + +/* (r0 + i i0)(r1 + i i1) */ +static void cmul(N r0, N i0, N r1, N i1, N r2, N i2) +{ + N s, t, q; + mul(r0, r1, s); + mul(i0, i1, t); + sub(s, t, q); + mul(r0, i1, s); + mul(i0, r1, t); + add(s, t, i2); + cpy(q, r2); +} + +/* (r0 - i i0)(r1 + i i1) */ +static void cmulj(N r0, N i0, N r1, N i1, N r2, N i2) +{ + N s, t, q; + mul(r0, r1, s); + mul(i0, i1, t); + add(s, t, q); + mul(r0, i1, s); + mul(i0, r1, t); + sub(s, t, i2); + cpy(q, r2); +} + +static void mcexp(int m, int n, N r, N i) +{ + static int cached_n = -1; + static N w[64][2]; + int k, j; + if (n != cached_n) { + for (j = 1, k = 0; j < n; j += j, ++k) { + cos2pi(j, n, w[k][0]); + sin2pi(j, n, w[k][1]); + } + cached_n = n; + } + + fromshort(1, r); + fromshort(0, i); + if (m > 0) { + for (k = 0; m; ++k, m >>= 1) + if (m & 1) + cmul(w[k][0], w[k][1], r, i, r, i); + } else { + m = -m; + for (k = 0; m; ++k, m >>= 1) + if (m & 1) + cmulj(w[k][0], w[k][1], r, i, r, i); + } +} + +static void bitrev(int n, N *a) +{ + int i, j, m; + for (i = j = 0; i < n - 1; ++i) { + if (i < j) { + N t; + cpy(a[2*i], t); cpy(a[2*j], a[2*i]); cpy(t, a[2*j]); + cpy(a[2*i+1], t); cpy(a[2*j+1], a[2*i+1]); cpy(t, a[2*j+1]); + } + + /* bit reversed counter */ + m = n; do { m >>= 1; j ^= m; } while (!(j & m)); + } +} + +static void fft0(int n, N *a, int sign) +{ + int i, j, k; + + bitrev(n, a); + for (i = 1; i < n; i = 2 * i) { + for (j = 0; j < i; ++j) { + N wr, wi; + mcexp(sign * (int)j, 2 * i, wr, wi); + for (k = j; k < n; k += 2 * i) { + N *a0 = a + 2 * k; + N *a1 = a0 + 2 * i; + N r0, i0, r1, i1, t0, t1, xr, xi; + cpy(a0[0], r0); cpy(a0[1], i0); + cpy(a1[0], r1); cpy(a1[1], i1); + mul(r1, wr, t0); mul(i1, wi, t1); sub(t0, t1, xr); + mul(r1, wi, t0); mul(i1, wr, t1); add(t0, t1, xi); + add(r0, xr, a0[0]); add(i0, xi, a0[1]); + sub(r0, xr, a1[0]); sub(i0, xi, a1[1]); + } + } + } +} + +/* a[2*k]+i*a[2*k+1] = exp(2*pi*i*k^2/(2*n)) */ +static void bluestein_sequence(int n, N *a) +{ + int k, ksq, n2 = 2 * n; + + ksq = 1; /* (-1)^2 */ + for (k = 0; k < n; ++k) { + /* careful with overflow */ + ksq = ksq + 2*k - 1; while (ksq > n2) ksq -= n2; + mcexp(ksq, n2, a[2*k], a[2*k+1]); + } +} + +static int pow2_atleast(int x) +{ + int h; + for (h = 1; h < x; h = 2 * h) + ; + return h; +} + +static N *cached_bluestein_w = 0; +static N *cached_bluestein_y = 0; +static int cached_bluestein_n = -1; + +static void bluestein(int n, N *a) +{ + int nb = pow2_atleast(2 * n); + N *b = (N *)bench_malloc(2 * nb * sizeof(N)); + N *w = cached_bluestein_w; + N *y = cached_bluestein_y; + N nbinv; + int i; + + fromreal(1.0 / nb, nbinv); /* exact because nb = 2^k */ + + if (cached_bluestein_n != n) { + if (w) bench_free(w); + if (y) bench_free(y); + w = (N *)bench_malloc(2 * n * sizeof(N)); + y = (N *)bench_malloc(2 * nb * sizeof(N)); + cached_bluestein_n = n; + cached_bluestein_w = w; + cached_bluestein_y = y; + + bluestein_sequence(n, w); + for (i = 0; i < 2*nb; ++i) cpy(zero, y[i]); + + for (i = 0; i < n; ++i) { + cpy(w[2*i], y[2*i]); + cpy(w[2*i+1], y[2*i+1]); + } + for (i = 1; i < n; ++i) { + cpy(w[2*i], y[2*(nb-i)]); + cpy(w[2*i+1], y[2*(nb-i)+1]); + } + + fft0(nb, y, -1); + } + + for (i = 0; i < 2*nb; ++i) cpy(zero, b[i]); + + for (i = 0; i < n; ++i) + cmulj(w[2*i], w[2*i+1], a[2*i], a[2*i+1], b[2*i], b[2*i+1]); + + /* scaled convolution b * y */ + fft0(nb, b, -1); + + for (i = 0; i < nb; ++i) + cmul(b[2*i], b[2*i+1], y[2*i], y[2*i+1], b[2*i], b[2*i+1]); + fft0(nb, b, 1); + + for (i = 0; i < n; ++i) { + cmulj(w[2*i], w[2*i+1], b[2*i], b[2*i+1], a[2*i], a[2*i+1]); + mul(nbinv, a[2*i], a[2*i]); + mul(nbinv, a[2*i+1], a[2*i+1]); + } + + bench_free(b); +} + +static void swapri(int n, N *a) +{ + int i; + for (i = 0; i < n; ++i) { + N t; + cpy(a[2 * i], t); + cpy(a[2 * i + 1], a[2 * i]); + cpy(t, a[2 * i + 1]); + } +} + +static void fft1(int n, N *a, int sign) +{ + if (power_of_two(n)) { + fft0(n, a, sign); + } else { + if (sign == 1) swapri(n, a); + bluestein(n, a); + if (sign == 1) swapri(n, a); + } +} + +static void fromrealv(int n, bench_complex *a, N *b) +{ + int i; + + for (i = 0; i < n; ++i) { + fromreal(c_re(a[i]), b[2 * i]); + fromreal(c_im(a[i]), b[2 * i + 1]); + } +} + +static void compare(int n, N *a, N *b, double *err) +{ + int i; + double e1, e2, einf; + double n1, n2, ninf; + + e1 = e2 = einf = 0.0; + n1 = n2 = ninf = 0.0; + +# define DO(x1, x2, xinf, var) { \ + double d = var; \ + if (d < 0) d = -d; \ + x1 += d; x2 += d * d; if (d > xinf) xinf = d; \ +} + + for (i = 0; i < 2 * n; ++i) { + N dd; + sub(a[i], b[i], dd); + DO(n1, n2, ninf, toreal(a[i])); + DO(e1, e2, einf, toreal(dd)); + } + +# undef DO + err[0] = e1 / n1; + err[1] = sqrt(e2 / n2); + err[2] = einf / ninf; +} + +void fftaccuracy(int n, bench_complex *a, bench_complex *ffta, + int sign, double err[6]) +{ + N *b = (N *)bench_malloc(2 * n * sizeof(N)); + N *fftb = (N *)bench_malloc(2 * n * sizeof(N)); + N mn, ninv; + int i; + + fromreal(n, mn); inv(mn, ninv); + + /* forward error */ + fromrealv(n, a, b); fromrealv(n, ffta, fftb); + fft1(n, b, sign); + compare(n, b, fftb, err); + + /* backward error */ + fromrealv(n, a, b); fromrealv(n, ffta, fftb); + for (i = 0; i < 2 * n; ++i) mul(fftb[i], ninv, fftb[i]); + fft1(n, fftb, -sign); + compare(n, b, fftb, err + 3); + + bench_free(fftb); + bench_free(b); +} + +void fftaccuracy_done(void) +{ + if (cached_bluestein_w) bench_free(cached_bluestein_w); + if (cached_bluestein_y) bench_free(cached_bluestein_y); + cached_bluestein_w = 0; + cached_bluestein_y = 0; + cached_bluestein_n = -1; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/my-getopt.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/my-getopt.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include + +#include "config.h" +#include "my-getopt.h" + +int my_optind = 1; +const char *my_optarg = 0; +static const char *scan_pointer = 0; + +void my_usage(const char *progname, const struct my_option *opt) +{ + int i; + size_t col = 0; + + fprintf(stdout, "Usage: %s", progname); + col += (strlen(progname) + 7); + for (i = 0; opt[i].long_name; i++) { + size_t option_len; + + option_len = strlen(opt[i].long_name); + if (col >= 80 - (option_len + 16)) { + fputs("\n\t", stdout); + col = 8; + } + fprintf(stdout, " [--%s", opt[i].long_name); + col += (option_len + 4); + if (opt[i].short_name < 128) { + fprintf(stdout, " | -%c", opt[i].short_name); + col += 5; + } + switch (opt[i].argtype) { + case REQARG: + fputs(" arg]", stdout); + col += 5; + break; + case OPTARG: + fputs(" [arg]]", stdout); + col += 10; + break; + default: + fputs("]", stdout); + col++; + } + } + + fputs ("\n", stdout); +} + +int my_getopt(int argc, char *argv[], const struct my_option *optarray) +{ + const char *p; + const struct my_option *l; + + if (scan_pointer && *scan_pointer) { + /* continue a previously scanned argv[] element */ + p = scan_pointer; + goto short_option; + } else { + /* new argv[] element */ + if (my_optind >= argc) + return -1; /* no more options */ + + p = argv[my_optind]; + + if (*p++ != '-') + return (-1); /* not an option */ + + if (!*p) + return (-1); /* string is exactly '-' */ + + ++my_optind; + } + + if (*p == '-') { + /* long option */ + scan_pointer = 0; + my_optarg = 0; + + ++p; + + for (l = optarray; l->short_name; ++l) { + size_t len = strlen(l->long_name); + if (!strncmp(l->long_name, p, len) && + (!p[len] || p[len] == '=')) { + switch (l->argtype) { + case NOARG: + goto ok; + case OPTARG: + if (p[len] == '=') + my_optarg = p + len + 1; + goto ok; + case REQARG: + if (p[len] == '=') { + my_optarg = p + len + 1; + goto ok; + } + if (my_optind >= argc) { + fprintf(stderr, + "option --%s requires an argument\n", + l->long_name); + return '?'; + } + my_optarg = argv[my_optind]; + ++my_optind; + goto ok; + } + } + } + } else { + short_option: + scan_pointer = 0; + my_optarg = 0; + + for (l = optarray; l->short_name; ++l) { + if (l->short_name == (char)l->short_name && + *p == l->short_name) { + ++p; + switch (l->argtype) { + case NOARG: + scan_pointer = p; + goto ok; + case OPTARG: + if (*p) + my_optarg = p; + goto ok; + case REQARG: + if (*p) { + my_optarg = p; + } else { + if (my_optind >= argc) { + fprintf(stderr, + "option -%c requires an argument\n", + l->short_name); + return '?'; + } + my_optarg = argv[my_optind]; + ++my_optind; + } + goto ok; + } + } + } + } + + fprintf(stderr, "unrecognized option %s\n", argv[my_optind - 1]); + return '?'; + + ok: + return l->short_name; +} + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/my-getopt.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/my-getopt.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __MY_GETOPT_H__ +#define __MY_GETOPT_H__ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +enum { REQARG, OPTARG, NOARG }; + +struct my_option { + const char *long_name; + int argtype; + int short_name; +}; + +extern int my_optind; +extern const char *my_optarg; + +extern void my_usage(const char *progname, const struct my_option *opt); +extern int my_getopt(int argc, char *argv[], const struct my_option *optarray); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* __MY_GETOPT_H__ */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/ovtpvt.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/ovtpvt.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,28 @@ +#include +#include +#include +#include "bench.h" + +void ovtpvt(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + if (verbose >= 0) + vfprintf(stdout, format, ap); + va_end(ap); + fflush(stdout); +} + +void ovtpvt_err(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + if (verbose >= 0) { + fflush(stdout); + vfprintf(stderr, format, ap); + } + va_end(ap); + fflush(stdout); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/pow2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/pow2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,6 @@ +#include "bench.h" + +int power_of_two(int n) +{ + return (((n) > 0) && (((n) & ((n) - 1)) == 0)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/problem.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/problem.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2001 Matteo Frigo + * Copyright (c) 2001 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "config.h" +#include "bench.h" +#include +#include +#include +#include + +int always_pad_real = 0; /* by default, only pad in-place case */ + +typedef enum { + SAME, PADDED, HALFISH +} n_transform; + +/* funny transformations for last dimension of PROBLEM_REAL */ +static int transform_n(int n, n_transform nt) +{ + switch (nt) { + case SAME: return n; + case PADDED: return 2*(n/2+1); + case HALFISH: return (n/2+1); + default: BENCH_ASSERT(0); return 0; + } +} + +/* do what I mean */ +static bench_tensor *dwim(bench_tensor *t, bench_iodim **last_iodim, + n_transform nti, n_transform nto, + bench_iodim *dt) +{ + int i; + bench_iodim *d, *d1; + + if (!FINITE_RNK(t->rnk) || t->rnk < 1) + return t; + + i = t->rnk; + d1 = *last_iodim; + + while (--i >= 0) { + d = t->dims + i; + if (!d->is) + d->is = d1->is * transform_n(d1->n, d1==dt ? nti : SAME); + if (!d->os) + d->os = d1->os * transform_n(d1->n, d1==dt ? nto : SAME); + d1 = d; + } + + *last_iodim = d1; + return t; +} + +static void transpose_tensor(bench_tensor *t) +{ + if (!FINITE_RNK(t->rnk) || t->rnk < 2) + return; + + t->dims[0].os = t->dims[1].os; + t->dims[1].os = t->dims[0].os * t->dims[0].n; +} + +static const char *parseint(const char *s, int *n) +{ + int sign = 1; + + *n = 0; + + if (*s == '-') { + sign = -1; + ++s; + } else if (*s == '+') { + sign = +1; + ++s; + } + + BENCH_ASSERT(isdigit(*s)); + while (isdigit(*s)) { + *n = *n * 10 + (*s - '0'); + ++s; + } + + *n *= sign; + + if (*s == 'k' || *s == 'K') { + *n *= 1024; + ++s; + } + + if (*s == 'm' || *s == 'M') { + *n *= 1024 * 1024; + ++s; + } + + return s; +} + +struct dimlist { bench_iodim car; r2r_kind_t k; struct dimlist *cdr; }; + +static const char *parsetensor(const char *s, bench_tensor **tp, + r2r_kind_t **k) +{ + struct dimlist *l = 0, *m; + bench_tensor *t; + int rnk = 0; + + L1: + m = (struct dimlist *)bench_malloc(sizeof(struct dimlist)); + /* nconc onto l */ + m->cdr = l; l = m; + ++rnk; + + s = parseint(s, &m->car.n); + + if (*s == ':') { + /* read input stride */ + ++s; + s = parseint(s, &m->car.is); + if (*s == ':') { + /* read output stride */ + ++s; + s = parseint(s, &m->car.os); + } else { + /* default */ + m->car.os = m->car.is; + } + } else { + m->car.is = 0; + m->car.os = 0; + } + + if (*s == 'f' || *s == 'F') { + m->k = R2R_R2HC; + ++s; + } + else if (*s == 'b' || *s == 'B') { + m->k = R2R_HC2R; + ++s; + } + else if (*s == 'h' || *s == 'H') { + m->k = R2R_DHT; + ++s; + } + else if (*s == 'e' || *s == 'E' || *s == 'o' || *s == 'O') { + char c = *(s++); + int ab; + + s = parseint(s, &ab); + + if (c == 'e' || c == 'E') { + if (ab == 0) + m->k = R2R_REDFT00; + else if (ab == 1) + m->k = R2R_REDFT01; + else if (ab == 10) + m->k = R2R_REDFT10; + else if (ab == 11) + m->k = R2R_REDFT11; + else + BENCH_ASSERT(0); + } + else { + if (ab == 0) + m->k = R2R_RODFT00; + else if (ab == 1) + m->k = R2R_RODFT01; + else if (ab == 10) + m->k = R2R_RODFT10; + else if (ab == 11) + m->k = R2R_RODFT11; + else + BENCH_ASSERT(0); + } + } + else + m->k = R2R_R2HC; + + if (*s == 'x' || *s == 'X') { + ++s; + goto L1; + } + + /* now we have a dimlist. Build bench_tensor, etc. */ + + if (k && rnk > 0) { + int i; + *k = (r2r_kind_t *) bench_malloc(sizeof(r2r_kind_t) * rnk); + for (m = l, i = rnk - 1; i >= 0; --i, m = m->cdr) { + BENCH_ASSERT(m); + (*k)[i] = m->k; + } + } + + t = mktensor(rnk); + while (--rnk >= 0) { + bench_iodim *d = t->dims + rnk; + BENCH_ASSERT(l); + m = l; l = m->cdr; + d->n = m->car.n; + d->is = m->car.is; + d->os = m->car.os; + bench_free(m); + } + + *tp = t; + return s; +} + +/* parse a problem description, return a problem */ +bench_problem *problem_parse(const char *s) +{ + bench_problem *p; + bench_iodim last_iodim0 = {1,1,1}, *last_iodim = &last_iodim0; + bench_iodim *sz_last_iodim; + bench_tensor *sz; + n_transform nti = SAME, nto = SAME; + int transpose = 0; + + p = (bench_problem *) bench_malloc(sizeof(bench_problem)); + p->kind = PROBLEM_COMPLEX; + p->k = 0; + p->sign = -1; + p->in = p->out = 0; + p->inphys = p->outphys = 0; + p->iphyssz = p->ophyssz = 0; + p->in_place = 0; + p->destroy_input = 0; + p->split = 0; + p->userinfo = 0; + p->scrambled_in = p->scrambled_out = 0; + p->sz = p->vecsz = 0; + p->ini = p->outi = 0; + p->pstring = (char *) bench_malloc(sizeof(char) * (strlen(s) + 1)); + strcpy(p->pstring, s); + + L1: + switch (tolower(*s)) { + case 'i': p->in_place = 1; ++s; goto L1; + case 'o': p->in_place = 0; ++s; goto L1; + case 'd': p->destroy_input = 1; ++s; goto L1; + case '/': p->split = 1; ++s; goto L1; + case 'f': + case '-': p->sign = -1; ++s; goto L1; + case 'b': + case '+': p->sign = 1; ++s; goto L1; + case 'r': p->kind = PROBLEM_REAL; ++s; goto L1; + case 'c': p->kind = PROBLEM_COMPLEX; ++s; goto L1; + case 'k': p->kind = PROBLEM_R2R; ++s; goto L1; + case 't': transpose = 1; ++s; goto L1; + + /* hack for MPI: */ + case '[': p->scrambled_in = 1; ++s; goto L1; + case ']': p->scrambled_out = 1; ++s; goto L1; + + default : ; + } + + s = parsetensor(s, &sz, p->kind == PROBLEM_R2R ? &p->k : 0); + + if (p->kind == PROBLEM_REAL) { + if (p->sign < 0) { + nti = p->in_place || always_pad_real ? PADDED : SAME; + nto = HALFISH; + } + else { + nti = HALFISH; + nto = p->in_place || always_pad_real ? PADDED : SAME; + } + } + + sz_last_iodim = sz->dims + sz->rnk - 1; + if (*s == '*') { /* "external" vector */ + ++s; + p->sz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim); + s = parsetensor(s, &sz, 0); + p->vecsz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim); + } else if (*s == 'v' || *s == 'V') { /* "internal" vector */ + bench_tensor *vecsz; + ++s; + s = parsetensor(s, &vecsz, 0); + p->vecsz = dwim(vecsz, &last_iodim, nti, nto, sz_last_iodim); + p->sz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim); + } else { + p->sz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim); + p->vecsz = mktensor(0); + } + + if (transpose) { + transpose_tensor(p->sz); + transpose_tensor(p->vecsz); + } + + if (!p->in_place) + p->out = ((bench_real *) p->in) + (1 << 20); /* whatever */ + + BENCH_ASSERT(p->sz && p->vecsz); + BENCH_ASSERT(!*s); + return p; +} + +void problem_destroy(bench_problem *p) +{ + BENCH_ASSERT(p); + problem_free(p); + bench_free0(p->k); + bench_free0(p->pstring); + bench_free(p); +} + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/report.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/report.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2001 Matteo Frigo + * Copyright (c) 2001 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "bench.h" +#include +#include +#include + +void (*report)(const bench_problem *p, double *t, int st); + +#undef min +#undef max /* you never know */ + +struct stats { + double min; + double max; + double avg; + double median; +}; + +static void mkstat(double *t, int st, struct stats *a) +{ + int i, j; + + a->min = t[0]; + a->max = t[0]; + a->avg = 0.0; + + for (i = 0; i < st; ++i) { + if (t[i] < a->min) + a->min = t[i]; + if (t[i] > a->max) + a->max = t[i]; + a->avg += t[i]; + } + a->avg /= (double)st; + + /* compute median --- silly bubblesort algorithm */ + for (i = st - 1; i > 1; --i) { + for (j = 0; j < i - 1; ++j) { + double t0, t1; + if ((t0 = t[j]) > (t1 = t[j + 1])) { + t[j] = t1; + t[j + 1] = t0; + } + } + } + a->median = t[st / 2]; +} + +void report_mflops(const bench_problem *p, double *t, int st) +{ + struct stats s; + mkstat(t, st, &s); + ovtpvt("(%g %g %g %g)\n", + mflops(p, s.max), mflops(p, s.avg), + mflops(p, s.min), mflops(p, s.median)); +} + +void report_time(const bench_problem *p, double *t, int st) +{ + struct stats s; + UNUSED(p); + mkstat(t, st, &s); + ovtpvt("(%g %g %g %g)\n", s.min, s.avg, s.max, s.median); +} + +void report_benchmark(const bench_problem *p, double *t, int st) +{ + struct stats s; + mkstat(t, st, &s); + ovtpvt("%.5g %.8g %g\n", mflops(p, s.min), s.min, p->setup_time); +} + +static void sprintf_time(double x, char *buf, int buflen) +{ +#ifdef HAVE_SNPRINTF +# define MY_SPRINTF(a, b) snprintf(buf, buflen, a, b) +#else +# define MY_SPRINTF(a, b) sprintf(buf, a, b) +#endif + if (x < 1.0E-6) + MY_SPRINTF("%.2f ns", x * 1.0E9); + else if (x < 1.0E-3) + MY_SPRINTF("%.2f us", x * 1.0E6); + else if (x < 1.0) + MY_SPRINTF("%.2f ms", x * 1.0E3); + else + MY_SPRINTF("%.2f s", x); +#undef MY_SPRINTF +} + +void report_verbose(const bench_problem *p, double *t, int st) +{ + struct stats s; + char bmin[64], bmax[64], bavg[64], bmedian[64], btmin[64]; + char bsetup[64]; + int copyp = tensor_sz(p->sz) == 1; + + mkstat(t, st, &s); + + sprintf_time(s.min, bmin, 64); + sprintf_time(s.max, bmax, 64); + sprintf_time(s.avg, bavg, 64); + sprintf_time(s.median, bmedian, 64); + sprintf_time(time_min, btmin, 64); + sprintf_time(p->setup_time, bsetup, 64); + + ovtpvt("Problem: %s, setup: %s, time: %s, %s: %.5g\n", + p->pstring, bsetup, bmin, + copyp ? "fp-move/us" : "``mflops''", + mflops(p, s.min)); + + if (verbose) { + ovtpvt("Took %d measurements for at least %s each.\n", st, btmin); + ovtpvt("Time: min %s, max %s, avg %s, median %s\n", + bmin, bmax, bavg, bmedian); + } +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/speed.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/speed.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2001 Matteo Frigo + * Copyright (c) 2001 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "bench.h" + +int no_speed_allocation = 0; /* 1 to not allocate array data in speed() */ + +void speed(const char *param, int setup_only) +{ + double *t; + int iter = 0, k; + bench_problem *p; + double tmin, y; + + t = (double *) bench_malloc(time_repeat * sizeof(double)); + + for (k = 0; k < time_repeat; ++k) + t[k] = 0; + + p = problem_parse(param); + BENCH_ASSERT(can_do(p)); + if (!no_speed_allocation) { + problem_alloc(p); + problem_zero(p); + } + + timer_start(LIBBENCH_TIMER); + setup(p); + p->setup_time = bench_cost_postprocess(timer_stop(LIBBENCH_TIMER)); + + /* reset the input to zero again, because the planner in paranoid + mode sets it to random values, thus making the benchmark + diverge. */ + if (!no_speed_allocation) + problem_zero(p); + + if (setup_only) + goto done; + + start_over: + for (iter = 1; iter < (1<<30); iter *= 2) { + tmin = 1.0e20; + for (k = 0; k < time_repeat; ++k) { + timer_start(LIBBENCH_TIMER); + doit(iter, p); + y = bench_cost_postprocess(timer_stop(LIBBENCH_TIMER)); + if (y < 0) /* yes, it happens */ + goto start_over; + t[k] = y; + if (y < tmin) + tmin = y; + } + + if (tmin >= time_min) + goto done; + } + + goto start_over; /* this also happens */ + + done: + done(p); + + if (iter) + for (k = 0; k < time_repeat; ++k) + t[k] /= iter; + else + for (k = 0; k < time_repeat; ++k) + t[k] = 0; + + report(p, t, time_repeat); + + if (!no_speed_allocation) + problem_destroy(p); + bench_free(t); + return; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/tensor.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/tensor.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2001 Matteo Frigo + * Copyright (c) 2001 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "bench.h" +#include + +bench_tensor *mktensor(int rnk) +{ + bench_tensor *x; + + BENCH_ASSERT(rnk >= 0); + + x = (bench_tensor *)bench_malloc(sizeof(bench_tensor)); + if (FINITE_RNK(rnk) && rnk > 0) + x->dims = (bench_iodim *)bench_malloc(sizeof(bench_iodim) * rnk); + else + x->dims = 0; + + x->rnk = rnk; + return x; +} + +void tensor_destroy(bench_tensor *sz) +{ + bench_free0(sz->dims); + bench_free(sz); +} + +int tensor_sz(const bench_tensor *sz) +{ + int i, n = 1; + + if (!FINITE_RNK(sz->rnk)) + return 0; + + for (i = 0; i < sz->rnk; ++i) + n *= sz->dims[i].n; + return n; +} + + +/* total order among bench_iodim's */ +static int dimcmp(const bench_iodim *a, const bench_iodim *b) +{ + if (b->is != a->is) + return (b->is - a->is); /* shorter strides go later */ + if (b->os != a->os) + return (b->os - a->os); /* shorter strides go later */ + return (int)(a->n - b->n); /* larger n's go later */ +} + +bench_tensor *tensor_compress(const bench_tensor *sz) +{ + int i, rnk; + bench_tensor *x; + + BENCH_ASSERT(FINITE_RNK(sz->rnk)); + for (i = rnk = 0; i < sz->rnk; ++i) { + BENCH_ASSERT(sz->dims[i].n > 0); + if (sz->dims[i].n != 1) + ++rnk; + } + + x = mktensor(rnk); + for (i = rnk = 0; i < sz->rnk; ++i) { + if (sz->dims[i].n != 1) + x->dims[rnk++] = sz->dims[i]; + } + + if (rnk) { + /* God knows how qsort() behaves if n==0 */ + qsort(x->dims, (size_t)x->rnk, sizeof(bench_iodim), + (int (*)(const void *, const void *))dimcmp); + } + + return x; +} + +int tensor_unitstridep(bench_tensor *t) +{ + BENCH_ASSERT(FINITE_RNK(t->rnk)); + return (t->rnk == 0 || + (t->dims[t->rnk - 1].is == 1 && t->dims[t->rnk - 1].os == 1)); +} + +/* detect screwy real padded rowmajor... ugh */ +int tensor_real_rowmajorp(bench_tensor *t, int sign, int in_place) +{ + int i; + + BENCH_ASSERT(FINITE_RNK(t->rnk)); + + i = t->rnk - 1; + + if (--i >= 0) { + bench_iodim *d = t->dims + i; + if (sign < 0) { + if (d[0].is != d[1].is * (in_place ? 2*(d[1].n/2 + 1) : d[1].n)) + return 0; + if (d[0].os != d[1].os * (d[1].n/2 + 1)) + return 0; + } + else { + if (d[0].is != d[1].is * (d[1].n/2 + 1)) + return 0; + if (d[0].os != d[1].os * (in_place ? 2*(d[1].n/2 + 1) : d[1].n)) + return 0; + } + } + + while (--i >= 0) { + bench_iodim *d = t->dims + i; + if (d[0].is != d[1].is * d[1].n) + return 0; + if (d[0].os != d[1].os * d[1].n) + return 0; + } + return 1; +} + +int tensor_rowmajorp(bench_tensor *t) +{ + int i; + + BENCH_ASSERT(FINITE_RNK(t->rnk)); + + i = t->rnk - 1; + while (--i >= 0) { + bench_iodim *d = t->dims + i; + if (d[0].is != d[1].is * d[1].n) + return 0; + if (d[0].os != d[1].os * d[1].n) + return 0; + } + return 1; +} + +static void dimcpy(bench_iodim *dst, const bench_iodim *src, int rnk) +{ + int i; + if (FINITE_RNK(rnk)) + for (i = 0; i < rnk; ++i) + dst[i] = src[i]; +} + +bench_tensor *tensor_append(const bench_tensor *a, const bench_tensor *b) +{ + if (!FINITE_RNK(a->rnk) || !FINITE_RNK(b->rnk)) { + return mktensor(RNK_MINFTY); + } else { + bench_tensor *x = mktensor(a->rnk + b->rnk); + dimcpy(x->dims, a->dims, a->rnk); + dimcpy(x->dims + a->rnk, b->dims, b->rnk); + return x; + } +} + +static int imax(int a, int b) +{ + return (a > b) ? a : b; +} + +static int imin(int a, int b) +{ + return (a < b) ? a : b; +} + +#define DEFBOUNDS(name, xs) \ +void name(bench_tensor *t, int *lbp, int *ubp) \ +{ \ + int lb = 0; \ + int ub = 1; \ + int i; \ + \ + BENCH_ASSERT(FINITE_RNK(t->rnk)); \ + \ + for (i = 0; i < t->rnk; ++i) { \ + bench_iodim *d = t->dims + i; \ + int n = d->n; \ + int s = d->xs; \ + lb = imin(lb, lb + s * (n - 1)); \ + ub = imax(ub, ub + s * (n - 1)); \ + } \ + \ + *lbp = lb; \ + *ubp = ub; \ +} + +DEFBOUNDS(tensor_ibounds, is) +DEFBOUNDS(tensor_obounds, os) + +bench_tensor *tensor_copy(const bench_tensor *sz) +{ + bench_tensor *x = mktensor(sz->rnk); + dimcpy(x->dims, sz->dims, sz->rnk); + return x; +} + +/* Like tensor_copy, but copy only rnk dimensions starting with start_dim. */ +bench_tensor *tensor_copy_sub(const bench_tensor *sz, int start_dim, int rnk) +{ + bench_tensor *x; + + BENCH_ASSERT(FINITE_RNK(sz->rnk) && start_dim + rnk <= sz->rnk); + x = mktensor(rnk); + dimcpy(x->dims, sz->dims + start_dim, rnk); + return x; +} + +bench_tensor *tensor_copy_swapio(const bench_tensor *sz) +{ + bench_tensor *x = tensor_copy(sz); + int i; + if (FINITE_RNK(x->rnk)) + for (i = 0; i < x->rnk; ++i) { + int s; + s = x->dims[i].is; + x->dims[i].is = x->dims[i].os; + x->dims[i].os = s; + } + return x; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/timer.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/timer.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2001 Matteo Frigo + * Copyright (c) 2001 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "bench.h" +#include + +/* + * System-dependent timing functions: + */ +#ifdef HAVE_SYS_TIME_H +#include +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif + +#ifdef HAVE_BSDGETTIMEOFDAY +#ifndef HAVE_GETTIMEOFDAY +#define gettimeofday BSDgettimeofday +#define HAVE_GETTIMEOFDAY 1 +#endif +#endif + +double time_min; +int time_repeat; + +#if !defined(HAVE_TIMER) && (defined(__WIN32__) || defined(_WIN32) || defined(_WINDOWS) || defined(__CYGWIN__)) +#include +typedef LARGE_INTEGER mytime; + +static mytime get_time(void) +{ + mytime tv; + QueryPerformanceCounter(&tv); + return tv; +} + +static double elapsed(mytime t1, mytime t0) +{ + LARGE_INTEGER freq; + QueryPerformanceFrequency(&freq); + return (((double) t1.QuadPart - (double) t0.QuadPart)) / + ((double) freq.QuadPart); +} + +#define HAVE_TIMER +#endif + + +#if defined(HAVE_GETTIMEOFDAY) && !defined(HAVE_TIMER) +typedef struct timeval mytime; + +static mytime get_time(void) +{ + struct timeval tv; + gettimeofday(&tv, 0); + return tv; +} + +static double elapsed(mytime t1, mytime t0) +{ + return ((double) t1.tv_sec - (double) t0.tv_sec) + + ((double) t1.tv_usec - (double) t0.tv_usec) * 1.0E-6; +} + +#define HAVE_TIMER +#endif + +#ifndef HAVE_TIMER +#error "timer not defined" +#endif + +static double calibrate(void) +{ + /* there seems to be no reasonable way to calibrate the + clock automatically any longer. Grrr... */ + + return 0.01; +} + + +void timer_init(double tmin, int repeat) +{ + static int inited = 0; + + if (inited) + return; + inited = 1; + + if (!repeat) + repeat = 8; + time_repeat = repeat; + + if (tmin > 0) + time_min = tmin; + else + time_min = calibrate(); +} + +static mytime t0[BENCH_NTIMERS]; + +void timer_start(int n) +{ + BENCH_ASSERT(n >= 0 && n < BENCH_NTIMERS); + t0[n] = get_time(); +} + +double timer_stop(int n) +{ + mytime t1; + BENCH_ASSERT(n >= 0 && n < BENCH_NTIMERS); + t1 = get_time(); + return elapsed(t1, t0[n]); +} + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/useropt.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/useropt.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2000 Matteo Frigo + * Copyright (c) 2000 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include +#include +#include "bench.h" + +void useropt(const char *arg) +{ + ovtpvt_err("unknown user option: %s. Ignoring.\n", arg); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/util.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/util.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2000 Matteo Frigo + * Copyright (c) 2000 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "bench.h" +#include +#include +#include +#include + +#if defined(HAVE_MALLOC_H) +# include +#endif + +#if defined(HAVE_DECL_MEMALIGN) && !HAVE_DECL_MEMALIGN +extern void *memalign(size_t, size_t); +#endif + +#if defined(HAVE_DECL_POSIX_MEMALIGN) && !HAVE_DECL_POSIX_MEMALIGN +extern int posix_memalign(void **, size_t, size_t); +#endif + +void bench_assertion_failed(const char *s, int line, const char *file) +{ + ovtpvt_err("bench: %s:%d: assertion failed: %s\n", file, line, s); + bench_exit(EXIT_FAILURE); +} + +#ifdef HAVE_DRAND48 +# if defined(HAVE_DECL_DRAND48) && !HAVE_DECL_DRAND48 +extern double drand48(void); +# endif +double bench_drand(void) +{ + return drand48() - 0.5; +} +# if defined(HAVE_DECL_SRAND48) && !HAVE_DECL_SRAND48 +extern void srand48(long); +# endif +void bench_srand(int seed) +{ + srand48(seed); +} +#else +double bench_drand(void) +{ + double d = rand(); + return (d / (double) RAND_MAX) - 0.5; +} +void bench_srand(int seed) +{ + srand(seed); +} +#endif + +/********************************************************** + * DEBUGGING CODE + **********************************************************/ +#ifdef BENCH_DEBUG +static int bench_malloc_cnt = 0; + +/* + * debugging malloc/free. Initialize every malloced and freed area to + * random values, just to make sure we are not using uninitialized + * pointers. Also check for writes past the ends of allocated blocks, + * and a couple of other things. + * + * This code is a quick and dirty hack -- use at your own risk. + */ + +static int bench_malloc_total = 0, bench_malloc_max = 0, bench_malloc_cnt_max = 0; + +#define MAGIC ((size_t)0xABadCafe) +#define PAD_FACTOR 2 +#define TWO_SIZE_T (2 * sizeof(size_t)) + +#define VERBOSE_ALLOCATION 0 + +#if VERBOSE_ALLOCATION +#define WHEN_VERBOSE(a) a +#else +#define WHEN_VERBOSE(a) +#endif + +void *bench_malloc(size_t n) +{ + char *p; + size_t i; + + bench_malloc_total += n; + + if (bench_malloc_total > bench_malloc_max) + bench_malloc_max = bench_malloc_total; + + p = (char *) malloc(PAD_FACTOR * n + TWO_SIZE_T); + BENCH_ASSERT(p); + + /* store the size in a known position */ + ((size_t *) p)[0] = n; + ((size_t *) p)[1] = MAGIC; + for (i = 0; i < PAD_FACTOR * n; i++) + p[i + TWO_SIZE_T] = (char) (i ^ 0xDEADBEEF); + + ++bench_malloc_cnt; + + if (bench_malloc_cnt > bench_malloc_cnt_max) + bench_malloc_cnt_max = bench_malloc_cnt; + + /* skip the size we stored previously */ + return (void *) (p + TWO_SIZE_T); +} + +void bench_free(void *p) +{ + char *q; + + BENCH_ASSERT(p); + + q = ((char *) p) - TWO_SIZE_T; + BENCH_ASSERT(q); + + { + size_t n = ((size_t *) q)[0]; + size_t magic = ((size_t *) q)[1]; + size_t i; + + ((size_t *) q)[0] = 0; /* set to zero to detect duplicate free's */ + + BENCH_ASSERT(magic == MAGIC); + ((size_t *) q)[1] = ~MAGIC; + + bench_malloc_total -= n; + BENCH_ASSERT(bench_malloc_total >= 0); + + /* check for writing past end of array: */ + for (i = n; i < PAD_FACTOR * n; ++i) + if (q[i + TWO_SIZE_T] != (char) (i ^ 0xDEADBEEF)) { + BENCH_ASSERT(0 /* array bounds overwritten */); + } + for (i = 0; i < PAD_FACTOR * n; ++i) + q[i + TWO_SIZE_T] = (char) (i ^ 0xBEEFDEAD); + + --bench_malloc_cnt; + + BENCH_ASSERT(bench_malloc_cnt >= 0); + + BENCH_ASSERT( + (bench_malloc_cnt == 0 && bench_malloc_total == 0) || + (bench_malloc_cnt > 0 && bench_malloc_total > 0)); + + free(q); + } +} + +#else +/********************************************************** + * NON DEBUGGING CODE + **********************************************************/ +/* production version, no hacks */ + +#define MIN_ALIGNMENT 128 /* must be power of two */ + +#define real_free free /* memalign and malloc use ordinary free */ + +void *bench_malloc(size_t n) +{ + void *p; + if (n == 0) n = 1; + +#if defined(WITH_OUR_MALLOC) + /* Our own aligned malloc/free. Assumes sizeof(void*) is + a power of two <= 8 and that malloc is at least + sizeof(void*)-aligned. Assumes size_t = uintptr_t. */ + { + void *p0; + if ((p0 = malloc(n + MIN_ALIGNMENT))) { + p = (void *) (((size_t) p0 + MIN_ALIGNMENT) & (~((size_t) (MIN_ALIGNMENT - 1)))); + *((void **) p - 1) = p0; + } + else + p = (void *) 0; + } +#elif defined(HAVE_MEMALIGN) + p = memalign(MIN_ALIGNMENT, n); +#elif defined(HAVE_POSIX_MEMALIGN) + /* note: posix_memalign is broken in glibc 2.2.5: it constrains + the size, not the alignment, to be (power of two) * sizeof(void*). + The bug seems to have been fixed as of glibc 2.3.1. */ + if (posix_memalign(&p, MIN_ALIGNMENT, n)) + p = (void*) 0; +#elif defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC) + /* Intel's C compiler defines _mm_malloc and _mm_free intrinsics */ + p = (void *) _mm_malloc(n, MIN_ALIGNMENT); +# undef real_free +# define real_free _mm_free +#else + p = malloc(n); +#endif + + BENCH_ASSERT(p); + return p; +} + +void bench_free(void *p) +{ +#ifdef WITH_OUR_MALLOC + if (p) free(*((void **) p - 1)); +#else + real_free(p); +#endif +} + +#endif + +void bench_free0(void *p) +{ + if (p) bench_free(p); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/verify-dft.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/verify-dft.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "verify.h" + +/* copy A into B, using output stride of A and input stride of B */ +typedef struct { + dotens2_closure k; + R *ra; R *ia; + R *rb; R *ib; + int scalea, scaleb; +} cpy_closure; + +static void cpy0(dotens2_closure *k_, + int indxa, int ondxa, int indxb, int ondxb) +{ + cpy_closure *k = (cpy_closure *)k_; + k->rb[indxb * k->scaleb] = k->ra[ondxa * k->scalea]; + k->ib[indxb * k->scaleb] = k->ia[ondxa * k->scalea]; + UNUSED(indxa); UNUSED(ondxb); +} + +static void cpy(R *ra, R *ia, const bench_tensor *sza, int scalea, + R *rb, R *ib, const bench_tensor *szb, int scaleb) +{ + cpy_closure k; + k.k.apply = cpy0; + k.ra = ra; k.ia = ia; k.rb = rb; k.ib = ib; + k.scalea = scalea; k.scaleb = scaleb; + bench_dotens2(sza, szb, &k.k); +} + +typedef struct { + dofft_closure k; + bench_problem *p; +} dofft_dft_closure; + +static void dft_apply(dofft_closure *k_, bench_complex *in, bench_complex *out) +{ + dofft_dft_closure *k = (dofft_dft_closure *)k_; + bench_problem *p = k->p; + bench_tensor *totalsz, *pckdsz; + bench_tensor *totalsz_swap, *pckdsz_swap; + bench_real *ri, *ii, *ro, *io; + int totalscale; + + totalsz = tensor_append(p->vecsz, p->sz); + pckdsz = verify_pack(totalsz, 2); + ri = (bench_real *) p->in; + ro = (bench_real *) p->out; + + totalsz_swap = tensor_copy_swapio(totalsz); + pckdsz_swap = tensor_copy_swapio(pckdsz); + + /* confusion: the stride is the distance between complex elements + when using interleaved format, but it is the distance between + real elements when using split format */ + if (p->split) { + ii = p->ini ? (bench_real *) p->ini : ri + p->iphyssz; + io = p->outi ? (bench_real *) p->outi : ro + p->ophyssz; + totalscale = 1; + } else { + ii = p->ini ? (bench_real *) p->ini : ri + 1; + io = p->outi ? (bench_real *) p->outi : ro + 1; + totalscale = 2; + } + + cpy(&c_re(in[0]), &c_im(in[0]), pckdsz, 1, + ri, ii, totalsz, totalscale); + after_problem_ccopy_from(p, ri, ii); + doit(1, p); + after_problem_ccopy_to(p, ro, io); + if (k->k.recopy_input) + cpy(ri, ii, totalsz_swap, totalscale, + &c_re(in[0]), &c_im(in[0]), pckdsz_swap, 1); + cpy(ro, io, totalsz, totalscale, + &c_re(out[0]), &c_im(out[0]), pckdsz, 1); + + tensor_destroy(totalsz); + tensor_destroy(pckdsz); + tensor_destroy(totalsz_swap); + tensor_destroy(pckdsz_swap); +} + +void verify_dft(bench_problem *p, int rounds, double tol, errors *e) +{ + C *inA, *inB, *inC, *outA, *outB, *outC, *tmp; + int n, vecn, N; + dofft_dft_closure k; + + BENCH_ASSERT(p->kind == PROBLEM_COMPLEX); + + k.k.apply = dft_apply; + k.k.recopy_input = 0; + k.p = p; + + if (rounds == 0) + rounds = 20; /* default value */ + + n = tensor_sz(p->sz); + vecn = tensor_sz(p->vecsz); + N = n * vecn; + + inA = (C *) bench_malloc(N * sizeof(C)); + inB = (C *) bench_malloc(N * sizeof(C)); + inC = (C *) bench_malloc(N * sizeof(C)); + outA = (C *) bench_malloc(N * sizeof(C)); + outB = (C *) bench_malloc(N * sizeof(C)); + outC = (C *) bench_malloc(N * sizeof(C)); + tmp = (C *) bench_malloc(N * sizeof(C)); + + e->i = impulse(&k.k, n, vecn, inA, inB, inC, outA, outB, outC, + tmp, rounds, tol); + e->l = linear(&k.k, 0, N, inA, inB, inC, outA, outB, outC, + tmp, rounds, tol); + + e->s = 0.0; + e->s = dmax(e->s, tf_shift(&k.k, 0, p->sz, n, vecn, p->sign, + inA, inB, outA, outB, + tmp, rounds, tol, TIME_SHIFT)); + e->s = dmax(e->s, tf_shift(&k.k, 0, p->sz, n, vecn, p->sign, + inA, inB, outA, outB, + tmp, rounds, tol, FREQ_SHIFT)); + + if (!p->in_place && !p->destroy_input) + preserves_input(&k.k, 0, N, inA, inB, outB, rounds); + + bench_free(tmp); + bench_free(outC); + bench_free(outB); + bench_free(outA); + bench_free(inC); + bench_free(inB); + bench_free(inA); +} + + +void accuracy_dft(bench_problem *p, int rounds, int impulse_rounds, + double t[6]) +{ + dofft_dft_closure k; + int n; + C *a, *b; + + BENCH_ASSERT(p->kind == PROBLEM_COMPLEX); + BENCH_ASSERT(p->sz->rnk == 1); + BENCH_ASSERT(p->vecsz->rnk == 0); + + k.k.apply = dft_apply; + k.k.recopy_input = 0; + k.p = p; + n = tensor_sz(p->sz); + + a = (C *) bench_malloc(n * sizeof(C)); + b = (C *) bench_malloc(n * sizeof(C)); + accuracy_test(&k.k, 0, p->sign, n, a, b, rounds, impulse_rounds, t); + bench_free(b); + bench_free(a); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/verify-lib.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/verify-lib.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,545 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "verify.h" +#include +#include +#include + +/* + * Utility functions: + */ +static double dabs(double x) { return (x < 0.0) ? -x : x; } +static double dmin(double x, double y) { return (x < y) ? x : y; } +static double norm2(double x, double y) { return dmax(dabs(x), dabs(y)); } + +double dmax(double x, double y) { return (x > y) ? x : y; } + +static double aerror(C *a, C *b, int n) +{ + if (n > 0) { + /* compute the relative Linf error */ + double e = 0.0, mag = 0.0; + int i; + + for (i = 0; i < n; ++i) { + e = dmax(e, norm2(c_re(a[i]) - c_re(b[i]), + c_im(a[i]) - c_im(b[i]))); + mag = dmax(mag, + dmin(norm2(c_re(a[i]), c_im(a[i])), + norm2(c_re(b[i]), c_im(b[i])))); + } + e /= mag; + +#ifdef HAVE_ISNAN + BENCH_ASSERT(!isnan(e)); +#endif + return e; + } else + return 0.0; +} + +#ifdef HAVE_DRAND48 +# if defined(HAVE_DECL_DRAND48) && !HAVE_DECL_DRAND48 +extern double drand48(void); +# endif +double mydrand(void) +{ + return drand48() - 0.5; +} +#else +double mydrand(void) +{ + double d = rand(); + return (d / (double) RAND_MAX) - 0.5; +} +#endif + +void arand(C *a, int n) +{ + int i; + + /* generate random inputs */ + for (i = 0; i < n; ++i) { + c_re(a[i]) = mydrand(); + c_im(a[i]) = mydrand(); + } +} + +/* make array real */ +void mkreal(C *A, int n) +{ + int i; + + for (i = 0; i < n; ++i) { + c_im(A[i]) = 0.0; + } +} + +static void assign_conj(C *Ac, C *A, int rank, const bench_iodim *dim, int stride) +{ + if (rank == 0) { + c_re(*Ac) = c_re(*A); + c_im(*Ac) = -c_im(*A); + } + else { + int i, n0 = dim[rank - 1].n, s = stride; + rank -= 1; + stride *= n0; + assign_conj(Ac, A, rank, dim, stride); + for (i = 1; i < n0; ++i) + assign_conj(Ac + (n0 - i) * s, A + i * s, rank, dim, stride); + } +} + +/* make array hermitian */ +void mkhermitian(C *A, int rank, const bench_iodim *dim, int stride) +{ + if (rank == 0) + c_im(*A) = 0.0; + else { + int i, n0 = dim[rank - 1].n, s = stride; + rank -= 1; + stride *= n0; + mkhermitian(A, rank, dim, stride); + for (i = 1; 2*i < n0; ++i) + assign_conj(A + (n0 - i) * s, A + i * s, rank, dim, stride); + if (2*i == n0) + mkhermitian(A + i * s, rank, dim, stride); + } +} + +void mkhermitian1(C *a, int n) +{ + bench_iodim d; + + d.n = n; + d.is = d.os = 1; + mkhermitian(a, 1, &d, 1); +} + +/* C = A */ +void acopy(C *c, C *a, int n) +{ + int i; + + for (i = 0; i < n; ++i) { + c_re(c[i]) = c_re(a[i]); + c_im(c[i]) = c_im(a[i]); + } +} + +/* C = A + B */ +void aadd(C *c, C *a, C *b, int n) +{ + int i; + + for (i = 0; i < n; ++i) { + c_re(c[i]) = c_re(a[i]) + c_re(b[i]); + c_im(c[i]) = c_im(a[i]) + c_im(b[i]); + } +} + +/* C = A - B */ +void asub(C *c, C *a, C *b, int n) +{ + int i; + + for (i = 0; i < n; ++i) { + c_re(c[i]) = c_re(a[i]) - c_re(b[i]); + c_im(c[i]) = c_im(a[i]) - c_im(b[i]); + } +} + +/* B = rotate left A (complex) */ +void arol(C *b, C *a, int n, int nb, int na) +{ + int i, ib, ia; + + for (ib = 0; ib < nb; ++ib) { + for (i = 0; i < n - 1; ++i) + for (ia = 0; ia < na; ++ia) { + C *pb = b + (ib * n + i) * na + ia; + C *pa = a + (ib * n + i + 1) * na + ia; + c_re(*pb) = c_re(*pa); + c_im(*pb) = c_im(*pa); + } + + for (ia = 0; ia < na; ++ia) { + C *pb = b + (ib * n + n - 1) * na + ia; + C *pa = a + ib * n * na + ia; + c_re(*pb) = c_re(*pa); + c_im(*pb) = c_im(*pa); + } + } +} + +void aphase_shift(C *b, C *a, int n, int nb, int na, double sign) +{ + int j, jb, ja; + trigreal twopin; + twopin = K2PI / n; + + for (jb = 0; jb < nb; ++jb) + for (j = 0; j < n; ++j) { + trigreal s = sign * SIN(j * twopin); + trigreal c = COS(j * twopin); + + for (ja = 0; ja < na; ++ja) { + int k = (jb * n + j) * na + ja; + c_re(b[k]) = c_re(a[k]) * c - c_im(a[k]) * s; + c_im(b[k]) = c_re(a[k]) * s + c_im(a[k]) * c; + } + } +} + +/* A = alpha * A (complex, in place) */ +void ascale(C *a, C alpha, int n) +{ + int i; + + for (i = 0; i < n; ++i) { + R xr = c_re(a[i]), xi = c_im(a[i]); + c_re(a[i]) = xr * c_re(alpha) - xi * c_im(alpha); + c_im(a[i]) = xr * c_im(alpha) + xi * c_re(alpha); + } +} + + +double acmp(C *a, C *b, int n, const char *test, double tol) +{ + double d = aerror(a, b, n); + if (d > tol) { + ovtpvt_err("Found relative error %e (%s)\n", d, test); + + { + int i, N; + N = n > 300 && verbose <= 2 ? 300 : n; + for (i = 0; i < N; ++i) + ovtpvt_err("%8d %16.12f %16.12f %16.12f %16.12f\n", i, + (double) c_re(a[i]), (double) c_im(a[i]), + (double) c_re(b[i]), (double) c_im(b[i])); + } + + bench_exit(EXIT_FAILURE); + } + return d; +} + + +/* + * Implementation of the FFT tester described in + * + * Funda Ergün. Testing multivariate linear functions: Overcoming the + * generator bottleneck. In Proceedings of the Twenty-Seventh Annual + * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas, + * Nevada, 29 May--1 June 1995. + * + * Also: F. Ergun, S. R. Kumar, and D. Sivakumar, "Self-testing without + * the generator bottleneck," SIAM J. on Computing 29 (5), 1630-51 (2000). + */ + +static double impulse0(dofft_closure *k, + int n, int vecn, + C *inA, C *inB, C *inC, + C *outA, C *outB, C *outC, + C *tmp, int rounds, double tol) +{ + int N = n * vecn; + double e = 0.0; + int j; + + k->apply(k, inA, tmp); + e = dmax(e, acmp(tmp, outA, N, "impulse 1", tol)); + + for (j = 0; j < rounds; ++j) { + arand(inB, N); + asub(inC, inA, inB, N); + k->apply(k, inB, outB); + k->apply(k, inC, outC); + aadd(tmp, outB, outC, N); + e = dmax(e, acmp(tmp, outA, N, "impulse", tol)); + } + return e; +} + +double impulse(dofft_closure *k, + int n, int vecn, + C *inA, C *inB, C *inC, + C *outA, C *outB, C *outC, + C *tmp, int rounds, double tol) +{ + int i, j; + double e = 0.0; + + /* check impulsive input */ + for (i = 0; i < vecn; ++i) { + R x = (sqrt(n)*(i+1)) / (double)(vecn+1); + for (j = 0; j < n; ++j) { + c_re(inA[j + i * n]) = 0; + c_im(inA[j + i * n]) = 0; + c_re(outA[j + i * n]) = x; + c_im(outA[j + i * n]) = 0; + } + c_re(inA[i * n]) = x; + c_im(inA[i * n]) = 0; + } + + e = dmax(e, impulse0(k, n, vecn, inA, inB, inC, outA, outB, outC, + tmp, rounds, tol)); + + /* check constant input */ + for (i = 0; i < vecn; ++i) { + R x = (i+1) / ((double)(vecn+1) * sqrt(n)); + for (j = 0; j < n; ++j) { + c_re(inA[j + i * n]) = x; + c_im(inA[j + i * n]) = 0; + c_re(outA[j + i * n]) = 0; + c_im(outA[j + i * n]) = 0; + } + c_re(outA[i * n]) = n * x; + c_im(outA[i * n]) = 0; + } + + e = dmax(e, impulse0(k, n, vecn, inA, inB, inC, outA, outB, outC, + tmp, rounds, tol)); + return e; +} + +double linear(dofft_closure *k, int realp, + int n, C *inA, C *inB, C *inC, C *outA, + C *outB, C *outC, C *tmp, int rounds, double tol) +{ + int j; + double e = 0.0; + + for (j = 0; j < rounds; ++j) { + C alpha, beta; + c_re(alpha) = mydrand(); + c_im(alpha) = realp ? 0.0 : mydrand(); + c_re(beta) = mydrand(); + c_im(beta) = realp ? 0.0 : mydrand(); + arand(inA, n); + arand(inB, n); + k->apply(k, inA, outA); + k->apply(k, inB, outB); + + ascale(outA, alpha, n); + ascale(outB, beta, n); + aadd(tmp, outA, outB, n); + ascale(inA, alpha, n); + ascale(inB, beta, n); + aadd(inC, inA, inB, n); + k->apply(k, inC, outC); + + e = dmax(e, acmp(outC, tmp, n, "linear", tol)); + } + return e; +} + + + +double tf_shift(dofft_closure *k, + int realp, const bench_tensor *sz, + int n, int vecn, double sign, + C *inA, C *inB, C *outA, C *outB, C *tmp, + int rounds, double tol, int which_shift) +{ + int nb, na, dim, N = n * vecn; + int i, j; + double e = 0.0; + + /* test 3: check the time-shift property */ + /* the paper performs more tests, but this code should be fine too */ + + nb = 1; + na = n; + + /* check shifts across all SZ dimensions */ + for (dim = 0; dim < sz->rnk; ++dim) { + int ncur = sz->dims[dim].n; + + na /= ncur; + + for (j = 0; j < rounds; ++j) { + arand(inA, N); + + if (which_shift == TIME_SHIFT) { + for (i = 0; i < vecn; ++i) { + if (realp) mkreal(inA + i * n, n); + arol(inB + i * n, inA + i * n, ncur, nb, na); + } + k->apply(k, inA, outA); + k->apply(k, inB, outB); + for (i = 0; i < vecn; ++i) + aphase_shift(tmp + i * n, outB + i * n, ncur, + nb, na, sign); + e = dmax(e, acmp(tmp, outA, N, "time shift", tol)); + } else { + for (i = 0; i < vecn; ++i) { + if (realp) + mkhermitian(inA + i * n, sz->rnk, sz->dims, 1); + aphase_shift(inB + i * n, inA + i * n, ncur, + nb, na, -sign); + } + k->apply(k, inA, outA); + k->apply(k, inB, outB); + for (i = 0; i < vecn; ++i) + arol(tmp + i * n, outB + i * n, ncur, nb, na); + e = dmax(e, acmp(tmp, outA, N, "freq shift", tol)); + } + } + + nb *= ncur; + } + return e; +} + + +void preserves_input(dofft_closure *k, aconstrain constrain, + int n, C *inA, C *inB, C *outB, int rounds) +{ + int j; + int recopy_input = k->recopy_input; + + k->recopy_input = 1; + for (j = 0; j < rounds; ++j) { + arand(inA, n); + if (constrain) + constrain(inA, n); + + acopy(inB, inA, n); + k->apply(k, inB, outB); + acmp(inB, inA, n, "preserves_input", 0.0); + } + k->recopy_input = recopy_input; +} + + +/* Make a copy of the size tensor, with the same dimensions, but with + the strides corresponding to a "packed" row-major array with the + given stride. */ +bench_tensor *verify_pack(const bench_tensor *sz, int s) +{ + bench_tensor *x = tensor_copy(sz); + if (FINITE_RNK(x->rnk) && x->rnk > 0) { + int i; + x->dims[x->rnk - 1].is = s; + x->dims[x->rnk - 1].os = s; + for (i = x->rnk - 1; i > 0; --i) { + x->dims[i - 1].is = x->dims[i].is * x->dims[i].n; + x->dims[i - 1].os = x->dims[i].os * x->dims[i].n; + } + } + return x; +} + +static int all_zero(C *a, int n) +{ + int i; + for (i = 0; i < n; ++i) + if (c_re(a[i]) != 0.0 || c_im(a[i]) != 0.0) + return 0; + return 1; +} + +static int one_accuracy_test(dofft_closure *k, aconstrain constrain, + int sign, int n, C *a, C *b, + double t[6]) +{ + double err[6]; + + if (constrain) + constrain(a, n); + + if (all_zero(a, n)) + return 0; + + k->apply(k, a, b); + fftaccuracy(n, a, b, sign, err); + + t[0] += err[0]; + t[1] += err[1] * err[1]; + t[2] = dmax(t[2], err[2]); + t[3] += err[3]; + t[4] += err[4] * err[4]; + t[5] = dmax(t[5], err[5]); + + return 1; +} + +void accuracy_test(dofft_closure *k, aconstrain constrain, + int sign, int n, C *a, C *b, int rounds, int impulse_rounds, + double t[6]) +{ + int r, i; + int ntests = 0; + bench_complex czero = {0, 0}; + + for (i = 0; i < 6; ++i) t[i] = 0.0; + + for (r = 0; r < rounds; ++r) { + arand(a, n); + if (one_accuracy_test(k, constrain, sign, n, a, b, t)) + ++ntests; + } + + /* impulses at beginning of array */ + for (r = 0; r < impulse_rounds; ++r) { + if (r > n - r - 1) + continue; + + caset(a, n, czero); + c_re(a[r]) = c_im(a[r]) = 1.0; + + if (one_accuracy_test(k, constrain, sign, n, a, b, t)) + ++ntests; + } + + /* impulses at end of array */ + for (r = 0; r < impulse_rounds; ++r) { + if (r <= n - r - 1) + continue; + + caset(a, n, czero); + c_re(a[n - r - 1]) = c_im(a[n - r - 1]) = 1.0; + + if (one_accuracy_test(k, constrain, sign, n, a, b, t)) + ++ntests; + } + + /* randomly-located impulses */ + for (r = 0; r < impulse_rounds; ++r) { + caset(a, n, czero); + i = rand() % n; + c_re(a[i]) = c_im(a[i]) = 1.0; + + if (one_accuracy_test(k, constrain, sign, n, a, b, t)) + ++ntests; + } + + t[0] /= ntests; + t[1] = sqrt(t[1] / ntests); + t[3] /= ntests; + t[4] = sqrt(t[4] / ntests); + + fftaccuracy_done(); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/verify-r2r.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/verify-r2r.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,964 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* Lots of ugly duplication from verify-lib.c, plus lots of ugliness in + general for all of the r2r variants...oh well, for now */ + +#include "verify.h" +#include +#include +#include + +typedef struct { + bench_problem *p; + bench_tensor *probsz; + bench_tensor *totalsz; + bench_tensor *pckdsz; + bench_tensor *pckdvecsz; +} info; + +/* + * Utility functions: + */ + +static double dabs(double x) { return (x < 0.0) ? -x : x; } +static double dmin(double x, double y) { return (x < y) ? x : y; } + +static double raerror(R *a, R *b, int n) +{ + if (n > 0) { + /* compute the relative Linf error */ + double e = 0.0, mag = 0.0; + int i; + + for (i = 0; i < n; ++i) { + e = dmax(e, dabs(a[i] - b[i])); + mag = dmax(mag, dmin(dabs(a[i]), dabs(b[i]))); + } + if (dabs(mag) < 1e-14 && dabs(e) < 1e-14) + e = 0.0; + else + e /= mag; + +#ifdef HAVE_ISNAN + BENCH_ASSERT(!isnan(e)); +#endif + return e; + } else + return 0.0; +} + +#define by2pi(m, n) ((K2PI * (m)) / (n)) + +/* + * Improve accuracy by reducing x to range [0..1/8] + * before multiplication by 2 * PI. + */ + +static trigreal bench_sincos(trigreal m, trigreal n, int sinp) +{ + /* waiting for C to get tail recursion... */ + trigreal half_n = n * 0.5; + trigreal quarter_n = half_n * 0.5; + trigreal eighth_n = quarter_n * 0.5; + trigreal sgn = 1.0; + + if (sinp) goto sin; + cos: + if (m < 0) { m = -m; /* goto cos; */ } + if (m > half_n) { m = n - m; goto cos; } + if (m > eighth_n) { m = quarter_n - m; goto sin; } + return sgn * COS(by2pi(m, n)); + + msin: + sgn = -sgn; + sin: + if (m < 0) { m = -m; goto msin; } + if (m > half_n) { m = n - m; goto msin; } + if (m > eighth_n) { m = quarter_n - m; goto cos; } + return sgn * SIN(by2pi(m, n)); +} + +static trigreal cos2pi(int m, int n) +{ + return bench_sincos((trigreal)m, (trigreal)n, 0); +} + +static trigreal sin2pi(int m, int n) +{ + return bench_sincos((trigreal)m, (trigreal)n, 1); +} + +static trigreal cos00(int i, int j, int n) +{ + return cos2pi(i * j, n); +} + +static trigreal cos01(int i, int j, int n) +{ + return cos00(i, 2*j + 1, 2*n); +} + +static trigreal cos10(int i, int j, int n) +{ + return cos00(2*i + 1, j, 2*n); +} + +static trigreal cos11(int i, int j, int n) +{ + return cos00(2*i + 1, 2*j + 1, 4*n); +} + +static trigreal sin00(int i, int j, int n) +{ + return sin2pi(i * j, n); +} + +static trigreal sin01(int i, int j, int n) +{ + return sin00(i, 2*j + 1, 2*n); +} + +static trigreal sin10(int i, int j, int n) +{ + return sin00(2*i + 1, j, 2*n); +} + +static trigreal sin11(int i, int j, int n) +{ + return sin00(2*i + 1, 2*j + 1, 4*n); +} + +static trigreal realhalf(int i, int j, int n) +{ + UNUSED(i); + if (j <= n - j) + return 1.0; + else + return 0.0; +} + +static trigreal coshalf(int i, int j, int n) +{ + if (j <= n - j) + return cos00(i, j, n); + else + return cos00(i, n - j, n); +} + +static trigreal unity(int i, int j, int n) +{ + UNUSED(i); + UNUSED(j); + UNUSED(n); + return 1.0; +} + +typedef trigreal (*trigfun)(int, int, int); + +static void rarand(R *a, int n) +{ + int i; + + /* generate random inputs */ + for (i = 0; i < n; ++i) { + a[i] = mydrand(); + } +} + +/* C = A + B */ +static void raadd(R *c, R *a, R *b, int n) +{ + int i; + + for (i = 0; i < n; ++i) { + c[i] = a[i] + b[i]; + } +} + +/* C = A - B */ +static void rasub(R *c, R *a, R *b, int n) +{ + int i; + + for (i = 0; i < n; ++i) { + c[i] = a[i] - b[i]; + } +} + +/* B = rotate left A + rotate right A */ +static void rarolr(R *b, R *a, int n, int nb, int na, + r2r_kind_t k) +{ + int isL0 = 0, isL1 = 0, isR0 = 0, isR1 = 0; + int i, ib, ia; + + for (ib = 0; ib < nb; ++ib) { + for (i = 0; i < n - 1; ++i) + for (ia = 0; ia < na; ++ia) + b[(ib * n + i) * na + ia] = + a[(ib * n + i + 1) * na + ia]; + + /* ugly switch to do boundary conditions for various r2r types */ + switch (k) { + /* periodic boundaries */ + case R2R_DHT: + case R2R_R2HC: + for (ia = 0; ia < na; ++ia) { + b[(ib * n + n - 1) * na + ia] = + a[(ib * n + 0) * na + ia]; + b[(ib * n + 0) * na + ia] += + a[(ib * n + n - 1) * na + ia]; + } + break; + + case R2R_HC2R: /* ugh (hermitian halfcomplex boundaries) */ + if (n > 2) { + if (n % 2 == 0) + for (ia = 0; ia < na; ++ia) { + b[(ib * n + n - 1) * na + ia] = 0.0; + b[(ib * n + 0) * na + ia] += + a[(ib * n + 1) * na + ia]; + b[(ib * n + n/2) * na + ia] += + + a[(ib * n + n/2 - 1) * na + ia] + - a[(ib * n + n/2 + 1) * na + ia]; + b[(ib * n + n/2 + 1) * na + ia] += + - a[(ib * n + n/2) * na + ia]; + } + else + for (ia = 0; ia < na; ++ia) { + b[(ib * n + n - 1) * na + ia] = 0.0; + b[(ib * n + 0) * na + ia] += + a[(ib * n + 1) * na + ia]; + b[(ib * n + n/2) * na + ia] += + + a[(ib * n + n/2) * na + ia] + - a[(ib * n + n/2 + 1) * na + ia]; + b[(ib * n + n/2 + 1) * na + ia] += + - a[(ib * n + n/2 + 1) * na + ia] + - a[(ib * n + n/2) * na + ia]; + } + } else /* n <= 2 */ { + for (ia = 0; ia < na; ++ia) { + b[(ib * n + n - 1) * na + ia] = + a[(ib * n + 0) * na + ia]; + b[(ib * n + 0) * na + ia] += + a[(ib * n + n - 1) * na + ia]; + } + } + break; + + /* various even/odd boundary conditions */ + case R2R_REDFT00: + isL1 = isR1 = 1; + goto mirrors; + case R2R_REDFT01: + isL1 = 1; + goto mirrors; + case R2R_REDFT10: + isL0 = isR0 = 1; + goto mirrors; + case R2R_REDFT11: + isL0 = 1; + isR0 = -1; + goto mirrors; + case R2R_RODFT00: + goto mirrors; + case R2R_RODFT01: + isR1 = 1; + goto mirrors; + case R2R_RODFT10: + isL0 = isR0 = -1; + goto mirrors; + case R2R_RODFT11: + isL0 = -1; + isR0 = 1; + goto mirrors; + + mirrors: + + for (ia = 0; ia < na; ++ia) + b[(ib * n + n - 1) * na + ia] = + isR0 * a[(ib * n + n - 1) * na + ia] + + (n > 1 ? isR1 * a[(ib * n + n - 2) * na + ia] + : 0); + + for (ia = 0; ia < na; ++ia) + b[(ib * n) * na + ia] += + isL0 * a[(ib * n) * na + ia] + + (n > 1 ? isL1 * a[(ib * n + 1) * na + ia] : 0); + + } + + for (i = 1; i < n; ++i) + for (ia = 0; ia < na; ++ia) + b[(ib * n + i) * na + ia] += + a[(ib * n + i - 1) * na + ia]; + } +} + +static void raphase_shift(R *b, R *a, int n, int nb, int na, + int n0, int k0, trigfun t) +{ + int j, jb, ja; + + for (jb = 0; jb < nb; ++jb) + for (j = 0; j < n; ++j) { + trigreal c = 2.0 * t(1, j + k0, n0); + + for (ja = 0; ja < na; ++ja) { + int k = (jb * n + j) * na + ja; + b[k] = a[k] * c; + } + } +} + +/* A = alpha * A (real, in place) */ +static void rascale(R *a, R alpha, int n) +{ + int i; + + for (i = 0; i < n; ++i) { + a[i] *= alpha; + } +} + +/* + * compute rdft: + */ + +/* copy real A into real B, using output stride of A and input stride of B */ +typedef struct { + dotens2_closure k; + R *ra; + R *rb; +} cpyr_closure; + +static void cpyr0(dotens2_closure *k_, + int indxa, int ondxa, int indxb, int ondxb) +{ + cpyr_closure *k = (cpyr_closure *)k_; + k->rb[indxb] = k->ra[ondxa]; + UNUSED(indxa); UNUSED(ondxb); +} + +static void cpyr(R *ra, bench_tensor *sza, R *rb, bench_tensor *szb) +{ + cpyr_closure k; + k.k.apply = cpyr0; + k.ra = ra; k.rb = rb; + bench_dotens2(sza, szb, &k.k); +} + +static void dofft(info *nfo, R *in, R *out) +{ + cpyr(in, nfo->pckdsz, (R *) nfo->p->in, nfo->totalsz); + after_problem_rcopy_from(nfo->p, (bench_real *)nfo->p->in); + doit(1, nfo->p); + after_problem_rcopy_to(nfo->p, (bench_real *)nfo->p->out); + cpyr((R *) nfo->p->out, nfo->totalsz, out, nfo->pckdsz); +} + +static double racmp(R *a, R *b, int n, const char *test, double tol) +{ + double d = raerror(a, b, n); + if (d > tol) { + ovtpvt_err("Found relative error %e (%s)\n", d, test); + { + int i, N; + N = n > 300 && verbose <= 2 ? 300 : n; + for (i = 0; i < N; ++i) + ovtpvt_err("%8d %16.12f %16.12f\n", i, + (double) a[i], + (double) b[i]); + } + bench_exit(EXIT_FAILURE); + } + return d; +} + +/***********************************************************************/ + +typedef struct { + int n; /* physical size */ + int n0; /* "logical" transform size */ + int i0, k0; /* shifts of input/output */ + trigfun ti, ts; /* impulse/shift trig functions */ +} dim_stuff; + +static void impulse_response(int rnk, dim_stuff *d, R impulse_amp, + R *A, int N) +{ + if (rnk == 0) + A[0] = impulse_amp; + else { + int i; + N /= d->n; + for (i = 0; i < d->n; ++i) { + impulse_response(rnk - 1, d + 1, + impulse_amp * d->ti(d->i0, d->k0 + i, d->n0), + A + i * N, N); + } + } +} + +/***************************************************************************/ + +/* + * Implementation of the FFT tester described in + * + * Funda Ergün. Testing multivariate linear functions: Overcoming the + * generator bottleneck. In Proceedings of the Twenty-Seventh Annual + * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas, + * Nevada, 29 May--1 June 1995. + * + * Also: F. Ergun, S. R. Kumar, and D. Sivakumar, "Self-testing without + * the generator bottleneck," SIAM J. on Computing 29 (5), 1630-51 (2000). + */ + +static double rlinear(int n, info *nfo, R *inA, R *inB, R *inC, R *outA, + R *outB, R *outC, R *tmp, int rounds, double tol) +{ + double e = 0.0; + int j; + + for (j = 0; j < rounds; ++j) { + R alpha, beta; + alpha = mydrand(); + beta = mydrand(); + rarand(inA, n); + rarand(inB, n); + dofft(nfo, inA, outA); + dofft(nfo, inB, outB); + + rascale(outA, alpha, n); + rascale(outB, beta, n); + raadd(tmp, outA, outB, n); + rascale(inA, alpha, n); + rascale(inB, beta, n); + raadd(inC, inA, inB, n); + dofft(nfo, inC, outC); + + e = dmax(e, racmp(outC, tmp, n, "linear", tol)); + } + return e; +} + +static double rimpulse(dim_stuff *d, R impulse_amp, + int n, int vecn, info *nfo, + R *inA, R *inB, R *inC, + R *outA, R *outB, R *outC, + R *tmp, int rounds, double tol) +{ + double e = 0.0; + int N = n * vecn; + int i; + int j; + + /* test 2: check that the unit impulse is transformed properly */ + + for (i = 0; i < N; ++i) { + /* pls */ + inA[i] = 0.0; + } + for (i = 0; i < vecn; ++i) { + inA[i * n] = (i+1) / (double)(vecn+1); + + /* transform of the pls */ + impulse_response(nfo->probsz->rnk, d, impulse_amp * inA[i * n], + outA + i * n, n); + } + + dofft(nfo, inA, tmp); + e = dmax(e, racmp(tmp, outA, N, "impulse 1", tol)); + + for (j = 0; j < rounds; ++j) { + rarand(inB, N); + rasub(inC, inA, inB, N); + dofft(nfo, inB, outB); + dofft(nfo, inC, outC); + raadd(tmp, outB, outC, N); + e = dmax(e, racmp(tmp, outA, N, "impulse", tol)); + } + return e; +} + +static double t_shift(int n, int vecn, info *nfo, + R *inA, R *inB, R *outA, R *outB, R *tmp, + int rounds, double tol, + dim_stuff *d) +{ + double e = 0.0; + int nb, na, dim, N = n * vecn; + int i, j; + bench_tensor *sz = nfo->probsz; + + /* test 3: check the time-shift property */ + /* the paper performs more tests, but this code should be fine too */ + + nb = 1; + na = n; + + /* check shifts across all SZ dimensions */ + for (dim = 0; dim < sz->rnk; ++dim) { + int ncur = sz->dims[dim].n; + + na /= ncur; + + for (j = 0; j < rounds; ++j) { + rarand(inA, N); + + for (i = 0; i < vecn; ++i) { + rarolr(inB + i * n, inA + i*n, ncur, nb,na, + nfo->p->k[dim]); + } + dofft(nfo, inA, outA); + dofft(nfo, inB, outB); + for (i = 0; i < vecn; ++i) + raphase_shift(tmp + i * n, outA + i * n, ncur, + nb, na, d[dim].n0, d[dim].k0, d[dim].ts); + e = dmax(e, racmp(tmp, outB, N, "time shift", tol)); + } + + nb *= ncur; + } + return e; +} + +/***********************************************************************/ + +void verify_r2r(bench_problem *p, int rounds, double tol, errors *e) +{ + R *inA, *inB, *inC, *outA, *outB, *outC, *tmp; + info nfo; + int n, vecn, N; + double impulse_amp = 1.0; + dim_stuff *d; + int i; + + if (rounds == 0) + rounds = 20; /* default value */ + + n = tensor_sz(p->sz); + vecn = tensor_sz(p->vecsz); + N = n * vecn; + + d = (dim_stuff *) bench_malloc(sizeof(dim_stuff) * p->sz->rnk); + for (i = 0; i < p->sz->rnk; ++i) { + int n0, i0, k0; + trigfun ti, ts; + + d[i].n = n0 = p->sz->dims[i].n; + if (p->k[i] > R2R_DHT) + n0 = 2 * (n0 + (p->k[i] == R2R_REDFT00 ? -1 : + (p->k[i] == R2R_RODFT00 ? 1 : 0))); + + switch (p->k[i]) { + case R2R_R2HC: + i0 = k0 = 0; + ti = realhalf; + ts = coshalf; + break; + case R2R_DHT: + i0 = k0 = 0; + ti = unity; + ts = cos00; + break; + case R2R_HC2R: + i0 = k0 = 0; + ti = unity; + ts = cos00; + break; + case R2R_REDFT00: + i0 = k0 = 0; + ti = ts = cos00; + break; + case R2R_REDFT01: + i0 = k0 = 0; + ti = ts = cos01; + break; + case R2R_REDFT10: + i0 = k0 = 0; + ti = cos10; impulse_amp *= 2.0; + ts = cos00; + break; + case R2R_REDFT11: + i0 = k0 = 0; + ti = cos11; impulse_amp *= 2.0; + ts = cos01; + break; + case R2R_RODFT00: + i0 = k0 = 1; + ti = sin00; impulse_amp *= 2.0; + ts = cos00; + break; + case R2R_RODFT01: + i0 = 1; k0 = 0; + ti = sin01; impulse_amp *= n == 1 ? 1.0 : 2.0; + ts = cos01; + break; + case R2R_RODFT10: + i0 = 0; k0 = 1; + ti = sin10; impulse_amp *= 2.0; + ts = cos00; + break; + case R2R_RODFT11: + i0 = k0 = 0; + ti = sin11; impulse_amp *= 2.0; + ts = cos01; + break; + default: + BENCH_ASSERT(0); + return; + } + + d[i].n0 = n0; + d[i].i0 = i0; + d[i].k0 = k0; + d[i].ti = ti; + d[i].ts = ts; + } + + + inA = (R *) bench_malloc(N * sizeof(R)); + inB = (R *) bench_malloc(N * sizeof(R)); + inC = (R *) bench_malloc(N * sizeof(R)); + outA = (R *) bench_malloc(N * sizeof(R)); + outB = (R *) bench_malloc(N * sizeof(R)); + outC = (R *) bench_malloc(N * sizeof(R)); + tmp = (R *) bench_malloc(N * sizeof(R)); + + nfo.p = p; + nfo.probsz = p->sz; + nfo.totalsz = tensor_append(p->vecsz, nfo.probsz); + nfo.pckdsz = verify_pack(nfo.totalsz, 1); + nfo.pckdvecsz = verify_pack(p->vecsz, tensor_sz(nfo.probsz)); + + e->i = rimpulse(d, impulse_amp, n, vecn, &nfo, + inA, inB, inC, outA, outB, outC, tmp, rounds, tol); + e->l = rlinear(N, &nfo, inA, inB, inC, outA, outB, outC, tmp, rounds,tol); + e->s = t_shift(n, vecn, &nfo, inA, inB, outA, outB, tmp, + rounds, tol, d); + + /* grr, verify-lib.c:preserves_input() only works for complex */ + if (!p->in_place && !p->destroy_input) { + bench_tensor *totalsz_swap, *pckdsz_swap; + totalsz_swap = tensor_copy_swapio(nfo.totalsz); + pckdsz_swap = tensor_copy_swapio(nfo.pckdsz); + + for (i = 0; i < rounds; ++i) { + rarand(inA, N); + dofft(&nfo, inA, outB); + cpyr((R *) nfo.p->in, totalsz_swap, inB, pckdsz_swap); + racmp(inB, inA, N, "preserves_input", 0.0); + } + + tensor_destroy(totalsz_swap); + tensor_destroy(pckdsz_swap); + } + + tensor_destroy(nfo.totalsz); + tensor_destroy(nfo.pckdsz); + tensor_destroy(nfo.pckdvecsz); + bench_free(tmp); + bench_free(outC); + bench_free(outB); + bench_free(outA); + bench_free(inC); + bench_free(inB); + bench_free(inA); + bench_free(d); +} + + +typedef struct { + dofft_closure k; + bench_problem *p; + int n0; +} dofft_r2r_closure; + +static void cpyr1(int n, R *in, int is, R *out, int os, R scale) +{ + int i; + for (i = 0; i < n; ++i) + out[i * os] = in[i * is] * scale; +} + +static void mke00(C *a, int n, int c) +{ + int i; + for (i = 1; i + i < n; ++i) + a[n - i][c] = a[i][c]; +} + +static void mkre00(C *a, int n) +{ + mkreal(a, n); + mke00(a, n, 0); +} + +static void mkimag(C *a, int n) +{ + int i; + for (i = 0; i < n; ++i) + c_re(a[i]) = 0.0; +} + +static void mko00(C *a, int n, int c) +{ + int i; + a[0][c] = 0.0; + for (i = 1; i + i < n; ++i) + a[n - i][c] = -a[i][c]; + if (i + i == n) + a[i][c] = 0.0; +} + +static void mkro00(C *a, int n) +{ + mkreal(a, n); + mko00(a, n, 0); +} + +static void mkio00(C *a, int n) +{ + mkimag(a, n); + mko00(a, n, 1); +} + +static void mkre01(C *a, int n) /* n should be be multiple of 4 */ +{ + R a0; + a0 = c_re(a[0]); + mko00(a, n/2, 0); + c_re(a[n/2]) = -(c_re(a[0]) = a0); + mkre00(a, n); +} + +static void mkro01(C *a, int n) /* n should be be multiple of 4 */ +{ + c_re(a[0]) = c_im(a[0]) = 0.0; + mkre00(a, n/2); + mkro00(a, n); +} + +static void mkoddonly(C *a, int n) +{ + int i; + for (i = 0; i < n; i += 2) + c_re(a[i]) = c_im(a[i]) = 0.0; +} + +static void mkre10(C *a, int n) +{ + mkoddonly(a, n); + mkre00(a, n); +} + +static void mkio10(C *a, int n) +{ + mkoddonly(a, n); + mkio00(a, n); +} + +static void mkre11(C *a, int n) +{ + mkoddonly(a, n); + mko00(a, n/2, 0); + mkre00(a, n); +} + +static void mkro11(C *a, int n) +{ + mkoddonly(a, n); + mkre00(a, n/2); + mkro00(a, n); +} + +static void mkio11(C *a, int n) +{ + mkoddonly(a, n); + mke00(a, n/2, 1); + mkio00(a, n); +} + +static void r2r_apply(dofft_closure *k_, bench_complex *in, bench_complex *out) +{ + dofft_r2r_closure *k = (dofft_r2r_closure *)k_; + bench_problem *p = k->p; + bench_real *ri, *ro; + int n, is, os; + + n = p->sz->dims[0].n; + is = p->sz->dims[0].is; + os = p->sz->dims[0].os; + + ri = (bench_real *) p->in; + ro = (bench_real *) p->out; + + switch (p->k[0]) { + case R2R_R2HC: + cpyr1(n, &c_re(in[0]), 2, ri, is, 1.0); + break; + case R2R_HC2R: + cpyr1(n/2 + 1, &c_re(in[0]), 2, ri, is, 1.0); + cpyr1((n+1)/2 - 1, &c_im(in[n-1]), -2, ri + is*(n-1), -is, 1.0); + break; + case R2R_REDFT00: + cpyr1(n, &c_re(in[0]), 2, ri, is, 1.0); + break; + case R2R_RODFT00: + cpyr1(n, &c_re(in[1]), 2, ri, is, 1.0); + break; + case R2R_REDFT01: + cpyr1(n, &c_re(in[0]), 2, ri, is, 1.0); + break; + case R2R_REDFT10: + cpyr1(n, &c_re(in[1]), 4, ri, is, 1.0); + break; + case R2R_RODFT01: + cpyr1(n, &c_re(in[1]), 2, ri, is, 1.0); + break; + case R2R_RODFT10: + cpyr1(n, &c_im(in[1]), 4, ri, is, 1.0); + break; + case R2R_REDFT11: + cpyr1(n, &c_re(in[1]), 4, ri, is, 1.0); + break; + case R2R_RODFT11: + cpyr1(n, &c_re(in[1]), 4, ri, is, 1.0); + break; + default: + BENCH_ASSERT(0); /* not yet implemented */ + } + + after_problem_rcopy_from(p, ri); + doit(1, p); + after_problem_rcopy_to(p, ro); + + switch (p->k[0]) { + case R2R_R2HC: + if (k->k.recopy_input) + cpyr1(n, ri, is, &c_re(in[0]), 2, 1.0); + cpyr1(n/2 + 1, ro, os, &c_re(out[0]), 2, 1.0); + cpyr1((n+1)/2 - 1, ro + os*(n-1), -os, &c_im(out[1]), 2, 1.0); + c_im(out[0]) = 0.0; + if (n % 2 == 0) + c_im(out[n/2]) = 0.0; + mkhermitian1(out, n); + break; + case R2R_HC2R: + if (k->k.recopy_input) { + cpyr1(n/2 + 1, ri, is, &c_re(in[0]), 2, 1.0); + cpyr1((n+1)/2 - 1, ri + is*(n-1), -is, &c_im(in[1]), 2,1.0); + } + cpyr1(n, ro, os, &c_re(out[0]), 2, 1.0); + mkreal(out, n); + break; + case R2R_REDFT00: + if (k->k.recopy_input) + cpyr1(n, ri, is, &c_re(in[0]), 2, 1.0); + cpyr1(n, ro, os, &c_re(out[0]), 2, 1.0); + mkre00(out, k->n0); + break; + case R2R_RODFT00: + if (k->k.recopy_input) + cpyr1(n, ri, is, &c_im(in[1]), 2, -1.0); + cpyr1(n, ro, os, &c_im(out[1]), 2, -1.0); + mkio00(out, k->n0); + break; + case R2R_REDFT01: + if (k->k.recopy_input) + cpyr1(n, ri, is, &c_re(in[0]), 2, 1.0); + cpyr1(n, ro, os, &c_re(out[1]), 4, 2.0); + mkre10(out, k->n0); + break; + case R2R_REDFT10: + if (k->k.recopy_input) + cpyr1(n, ri, is, &c_re(in[1]), 4, 2.0); + cpyr1(n, ro, os, &c_re(out[0]), 2, 1.0); + mkre01(out, k->n0); + break; + case R2R_RODFT01: + if (k->k.recopy_input) + cpyr1(n, ri, is, &c_re(in[1]), 2, 1.0); + cpyr1(n, ro, os, &c_im(out[1]), 4, -2.0); + mkio10(out, k->n0); + break; + case R2R_RODFT10: + if (k->k.recopy_input) + cpyr1(n, ri, is, &c_im(in[1]), 4, -2.0); + cpyr1(n, ro, os, &c_re(out[1]), 2, 1.0); + mkro01(out, k->n0); + break; + case R2R_REDFT11: + if (k->k.recopy_input) + cpyr1(n, ri, is, &c_re(in[1]), 4, 2.0); + cpyr1(n, ro, os, &c_re(out[1]), 4, 2.0); + mkre11(out, k->n0); + break; + case R2R_RODFT11: + if (k->k.recopy_input) + cpyr1(n, ri, is, &c_im(in[1]), 4, -2.0); + cpyr1(n, ro, os, &c_im(out[1]), 4, -2.0); + mkio11(out, k->n0); + break; + default: + BENCH_ASSERT(0); /* not yet implemented */ + } +} + +void accuracy_r2r(bench_problem *p, int rounds, int impulse_rounds, + double t[6]) +{ + dofft_r2r_closure k; + int n, n0 = 1; + C *a, *b; + aconstrain constrain = 0; + + BENCH_ASSERT(p->kind == PROBLEM_R2R); + BENCH_ASSERT(p->sz->rnk == 1); + BENCH_ASSERT(p->vecsz->rnk == 0); + + k.k.apply = r2r_apply; + k.k.recopy_input = 0; + k.p = p; + n = tensor_sz(p->sz); + + switch (p->k[0]) { + case R2R_R2HC: constrain = mkreal; n0 = n; break; + case R2R_HC2R: constrain = mkhermitian1; n0 = n; break; + case R2R_REDFT00: constrain = mkre00; n0 = 2*(n-1); break; + case R2R_RODFT00: constrain = mkro00; n0 = 2*(n+1); break; + case R2R_REDFT01: constrain = mkre01; n0 = 4*n; break; + case R2R_REDFT10: constrain = mkre10; n0 = 4*n; break; + case R2R_RODFT01: constrain = mkro01; n0 = 4*n; break; + case R2R_RODFT10: constrain = mkio10; n0 = 4*n; break; + case R2R_REDFT11: constrain = mkre11; n0 = 8*n; break; + case R2R_RODFT11: constrain = mkro11; n0 = 8*n; break; + default: BENCH_ASSERT(0); /* not yet implemented */ + } + k.n0 = n0; + + a = (C *) bench_malloc(n0 * sizeof(C)); + b = (C *) bench_malloc(n0 * sizeof(C)); + accuracy_test(&k.k, constrain, -1, n0, a, b, rounds, impulse_rounds, t); + bench_free(b); + bench_free(a); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/verify-rdft2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/verify-rdft2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,307 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "verify.h" + +/* copy real A into real B, using output stride of A and input stride of B */ +typedef struct { + dotens2_closure k; + R *ra; + R *rb; +} cpyr_closure; + +static void cpyr0(dotens2_closure *k_, + int indxa, int ondxa, int indxb, int ondxb) +{ + cpyr_closure *k = (cpyr_closure *)k_; + k->rb[indxb] = k->ra[ondxa]; + UNUSED(indxa); UNUSED(ondxb); +} + +static void cpyr(R *ra, const bench_tensor *sza, + R *rb, const bench_tensor *szb) +{ + cpyr_closure k; + k.k.apply = cpyr0; + k.ra = ra; k.rb = rb; + bench_dotens2(sza, szb, &k.k); +} + +/* copy unpacked halfcomplex A[n] into packed-complex B[n], using output stride + of A and input stride of B. Only copies non-redundant half; other + half must be copied via mkhermitian. */ +typedef struct { + dotens2_closure k; + int n; + int as; + int scalea; + R *ra, *ia; + R *rb, *ib; +} cpyhc2_closure; + +static void cpyhc20(dotens2_closure *k_, + int indxa, int ondxa, int indxb, int ondxb) +{ + cpyhc2_closure *k = (cpyhc2_closure *)k_; + int i, n = k->n; + int scalea = k->scalea; + int as = k->as * scalea; + R *ra = k->ra + ondxa * scalea, *ia = k->ia + ondxa * scalea; + R *rb = k->rb + indxb, *ib = k->ib + indxb; + UNUSED(indxa); UNUSED(ondxb); + + for (i = 0; i < n/2 + 1; ++i) { + rb[2*i] = ra[as*i]; + ib[2*i] = ia[as*i]; + } +} + +static void cpyhc2(R *ra, R *ia, + const bench_tensor *sza, const bench_tensor *vecsza, + int scalea, + R *rb, R *ib, const bench_tensor *szb) +{ + cpyhc2_closure k; + BENCH_ASSERT(sza->rnk <= 1); + k.k.apply = cpyhc20; + k.n = tensor_sz(sza); + k.scalea = scalea; + if (!FINITE_RNK(sza->rnk) || sza->rnk == 0) + k.as = 0; + else + k.as = sza->dims[0].os; + k.ra = ra; k.ia = ia; k.rb = rb; k.ib = ib; + bench_dotens2(vecsza, szb, &k.k); +} + +/* icpyhc2 is the inverse of cpyhc2 */ + +static void icpyhc20(dotens2_closure *k_, + int indxa, int ondxa, int indxb, int ondxb) +{ + cpyhc2_closure *k = (cpyhc2_closure *)k_; + int i, n = k->n; + int scalea = k->scalea; + int as = k->as * scalea; + R *ra = k->ra + indxa * scalea, *ia = k->ia + indxa * scalea; + R *rb = k->rb + ondxb, *ib = k->ib + ondxb; + UNUSED(ondxa); UNUSED(indxb); + + for (i = 0; i < n/2 + 1; ++i) { + ra[as*i] = rb[2*i]; + ia[as*i] = ib[2*i]; + } +} + +static void icpyhc2(R *ra, R *ia, + const bench_tensor *sza, const bench_tensor *vecsza, + int scalea, + R *rb, R *ib, const bench_tensor *szb) +{ + cpyhc2_closure k; + BENCH_ASSERT(sza->rnk <= 1); + k.k.apply = icpyhc20; + k.n = tensor_sz(sza); + k.scalea = scalea; + if (!FINITE_RNK(sza->rnk) || sza->rnk == 0) + k.as = 0; + else + k.as = sza->dims[0].is; + k.ra = ra; k.ia = ia; k.rb = rb; k.ib = ib; + bench_dotens2(vecsza, szb, &k.k); +} + +typedef struct { + dofft_closure k; + bench_problem *p; +} dofft_rdft2_closure; + +static void rdft2_apply(dofft_closure *k_, + bench_complex *in, bench_complex *out) +{ + dofft_rdft2_closure *k = (dofft_rdft2_closure *)k_; + bench_problem *p = k->p; + bench_tensor *totalsz, *pckdsz, *totalsz_swap, *pckdsz_swap; + bench_tensor *probsz2, *totalsz2, *pckdsz2; + bench_tensor *probsz2_swap, *totalsz2_swap, *pckdsz2_swap; + bench_real *ri, *ii, *ro, *io; + int n2, totalscale; + + totalsz = tensor_append(p->vecsz, p->sz); + pckdsz = verify_pack(totalsz, 2); + n2 = tensor_sz(totalsz); + if (FINITE_RNK(p->sz->rnk) && p->sz->rnk > 0) + n2 = (n2 / p->sz->dims[p->sz->rnk - 1].n) * + (p->sz->dims[p->sz->rnk - 1].n / 2 + 1); + ri = (bench_real *) p->in; + ro = (bench_real *) p->out; + + if (FINITE_RNK(p->sz->rnk) && p->sz->rnk > 0 && n2 > 0) { + probsz2 = tensor_copy_sub(p->sz, p->sz->rnk - 1, 1); + totalsz2 = tensor_copy_sub(totalsz, 0, totalsz->rnk - 1); + pckdsz2 = tensor_copy_sub(pckdsz, 0, pckdsz->rnk - 1); + } + else { + probsz2 = mktensor(0); + totalsz2 = tensor_copy(totalsz); + pckdsz2 = tensor_copy(pckdsz); + } + + totalsz_swap = tensor_copy_swapio(totalsz); + pckdsz_swap = tensor_copy_swapio(pckdsz); + totalsz2_swap = tensor_copy_swapio(totalsz2); + pckdsz2_swap = tensor_copy_swapio(pckdsz2); + probsz2_swap = tensor_copy_swapio(probsz2); + + /* confusion: the stride is the distance between complex elements + when using interleaved format, but it is the distance between + real elements when using split format */ + if (p->split) { + ii = p->ini ? (bench_real *) p->ini : ri + n2; + io = p->outi ? (bench_real *) p->outi : ro + n2; + totalscale = 1; + } else { + ii = p->ini ? (bench_real *) p->ini : ri + 1; + io = p->outi ? (bench_real *) p->outi : ro + 1; + totalscale = 2; + } + + if (p->sign < 0) { /* R2HC */ + int N, vN, i; + cpyr(&c_re(in[0]), pckdsz, ri, totalsz); + after_problem_rcopy_from(p, ri); + doit(1, p); + after_problem_hccopy_to(p, ro, io); + if (k->k.recopy_input) + cpyr(ri, totalsz_swap, &c_re(in[0]), pckdsz_swap); + cpyhc2(ro, io, probsz2, totalsz2, totalscale, + &c_re(out[0]), &c_im(out[0]), pckdsz2); + N = tensor_sz(p->sz); + vN = tensor_sz(p->vecsz); + for (i = 0; i < vN; ++i) + mkhermitian(out + i*N, p->sz->rnk, p->sz->dims, 1); + } + else { /* HC2R */ + icpyhc2(ri, ii, probsz2, totalsz2, totalscale, + &c_re(in[0]), &c_im(in[0]), pckdsz2); + after_problem_hccopy_from(p, ri, ii); + doit(1, p); + after_problem_rcopy_to(p, ro); + if (k->k.recopy_input) + cpyhc2(ri, ii, probsz2_swap, totalsz2_swap, totalscale, + &c_re(in[0]), &c_im(in[0]), pckdsz2_swap); + mkreal(out, tensor_sz(pckdsz)); + cpyr(ro, totalsz, &c_re(out[0]), pckdsz); + } + + tensor_destroy(totalsz); + tensor_destroy(pckdsz); + tensor_destroy(totalsz_swap); + tensor_destroy(pckdsz_swap); + tensor_destroy(probsz2); + tensor_destroy(totalsz2); + tensor_destroy(pckdsz2); + tensor_destroy(probsz2_swap); + tensor_destroy(totalsz2_swap); + tensor_destroy(pckdsz2_swap); +} + +void verify_rdft2(bench_problem *p, int rounds, double tol, errors *e) +{ + C *inA, *inB, *inC, *outA, *outB, *outC, *tmp; + int n, vecn, N; + dofft_rdft2_closure k; + + BENCH_ASSERT(p->kind == PROBLEM_REAL); + + if (!FINITE_RNK(p->sz->rnk) || !FINITE_RNK(p->vecsz->rnk)) + return; /* give up */ + + k.k.apply = rdft2_apply; + k.k.recopy_input = 0; + k.p = p; + + if (rounds == 0) + rounds = 20; /* default value */ + + n = tensor_sz(p->sz); + vecn = tensor_sz(p->vecsz); + N = n * vecn; + + inA = (C *) bench_malloc(N * sizeof(C)); + inB = (C *) bench_malloc(N * sizeof(C)); + inC = (C *) bench_malloc(N * sizeof(C)); + outA = (C *) bench_malloc(N * sizeof(C)); + outB = (C *) bench_malloc(N * sizeof(C)); + outC = (C *) bench_malloc(N * sizeof(C)); + tmp = (C *) bench_malloc(N * sizeof(C)); + + e->i = impulse(&k.k, n, vecn, inA, inB, inC, outA, outB, outC, + tmp, rounds, tol); + e->l = linear(&k.k, 1, N, inA, inB, inC, outA, outB, outC, + tmp, rounds, tol); + + e->s = 0.0; + if (p->sign < 0) + e->s = dmax(e->s, tf_shift(&k.k, 1, p->sz, n, vecn, p->sign, + inA, inB, outA, outB, + tmp, rounds, tol, TIME_SHIFT)); + else + e->s = dmax(e->s, tf_shift(&k.k, 1, p->sz, n, vecn, p->sign, + inA, inB, outA, outB, + tmp, rounds, tol, FREQ_SHIFT)); + + if (!p->in_place && !p->destroy_input) + preserves_input(&k.k, p->sign < 0 ? mkreal : mkhermitian1, + N, inA, inB, outB, rounds); + + bench_free(tmp); + bench_free(outC); + bench_free(outB); + bench_free(outA); + bench_free(inC); + bench_free(inB); + bench_free(inA); +} + +void accuracy_rdft2(bench_problem *p, int rounds, int impulse_rounds, + double t[6]) +{ + dofft_rdft2_closure k; + int n; + C *a, *b; + + BENCH_ASSERT(p->kind == PROBLEM_REAL); + BENCH_ASSERT(p->sz->rnk == 1); + BENCH_ASSERT(p->vecsz->rnk == 0); + + k.k.apply = rdft2_apply; + k.k.recopy_input = 0; + k.p = p; + n = tensor_sz(p->sz); + + a = (C *) bench_malloc(n * sizeof(C)); + b = (C *) bench_malloc(n * sizeof(C)); + accuracy_test(&k.k, p->sign < 0 ? mkreal : mkhermitian1, p->sign, + n, a, b, rounds, impulse_rounds, t); + bench_free(b); + bench_free(a); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/verify.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/verify.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2000 Matteo Frigo + * Copyright (c) 2000 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include +#include + +#include "verify.h" + +void verify_problem(bench_problem *p, int rounds, double tol) +{ + errors e; + const char *pstring = p->pstring ? p->pstring : ""; + + switch (p->kind) { + case PROBLEM_COMPLEX: verify_dft(p, rounds, tol, &e); break; + case PROBLEM_REAL: verify_rdft2(p, rounds, tol, &e); break; + case PROBLEM_R2R: verify_r2r(p, rounds, tol, &e); break; + } + + if (verbose) + ovtpvt("%s %g %g %g\n", pstring, e.l, e.i, e.s); +} + +void verify(const char *param, int rounds, double tol) +{ + bench_problem *p; + + p = problem_parse(param); + problem_alloc(p); + + if (!can_do(p)) { + ovtpvt_err("No can_do for %s\n", p->pstring); + BENCH_ASSERT(0); + } + + problem_zero(p); + setup(p); + + verify_problem(p, rounds, tol); + + done(p); + problem_destroy(p); +} + + +static void do_accuracy(bench_problem *p, int rounds, int impulse_rounds) +{ + double t[6]; + + switch (p->kind) { + case PROBLEM_COMPLEX: + accuracy_dft(p, rounds, impulse_rounds, t); break; + case PROBLEM_REAL: + accuracy_rdft2(p, rounds, impulse_rounds, t); break; + case PROBLEM_R2R: + accuracy_r2r(p, rounds, impulse_rounds, t); break; + } + + /* t[0] : L1 error + t[1] : L2 error + t[2] : Linf error + t[3..5]: L1, L2, Linf backward error */ + ovtpvt("%6.2e %6.2e %6.2e %6.2e %6.2e %6.2e\n", + t[0], t[1], t[2], t[3], t[4], t[5]); +} + +void accuracy(const char *param, int rounds, int impulse_rounds) +{ + bench_problem *p; + p = problem_parse(param); + BENCH_ASSERT(can_do(p)); + problem_alloc(p); + problem_zero(p); + setup(p); + do_accuracy(p, rounds, impulse_rounds); + done(p); + problem_destroy(p); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/verify.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/verify.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "bench.h" + +typedef bench_real R; +typedef bench_complex C; + +typedef struct dofft_closure_s { + void (*apply)(struct dofft_closure_s *k, + bench_complex *in, bench_complex *out); + int recopy_input; +} dofft_closure; + +double dmax(double x, double y); + +typedef void (*aconstrain)(C *a, int n); + +void arand(C *a, int n); +void mkreal(C *A, int n); +void mkhermitian(C *A, int rank, const bench_iodim *dim, int stride); +void mkhermitian1(C *a, int n); +void aadd(C *c, C *a, C *b, int n); +void asub(C *c, C *a, C *b, int n); +void arol(C *b, C *a, int n, int nb, int na); +void aphase_shift(C *b, C *a, int n, int nb, int na, double sign); +void ascale(C *a, C alpha, int n); +double acmp(C *a, C *b, int n, const char *test, double tol); +double mydrand(void); +double impulse(dofft_closure *k, + int n, int vecn, + C *inA, C *inB, C *inC, + C *outA, C *outB, C *outC, + C *tmp, int rounds, double tol); +double linear(dofft_closure *k, int realp, + int n, C *inA, C *inB, C *inC, C *outA, + C *outB, C *outC, C *tmp, int rounds, double tol); +void preserves_input(dofft_closure *k, aconstrain constrain, + int n, C *inA, C *inB, C *outB, int rounds); + +enum { TIME_SHIFT, FREQ_SHIFT }; +double tf_shift(dofft_closure *k, int realp, const bench_tensor *sz, + int n, int vecn, double sign, + C *inA, C *inB, C *outA, C *outB, C *tmp, + int rounds, double tol, int which_shift); + +typedef struct dotens2_closure_s { + void (*apply)(struct dotens2_closure_s *k, + int indx0, int ondx0, int indx1, int ondx1); +} dotens2_closure; + +void bench_dotens2(const bench_tensor *sz0, + const bench_tensor *sz1, dotens2_closure *k); + +void accuracy_test(dofft_closure *k, aconstrain constrain, + int sign, int n, C *a, C *b, int rounds, int impulse_rounds, + double t[6]); + +void accuracy_dft(bench_problem *p, int rounds, int impulse_rounds, + double t[6]); +void accuracy_rdft2(bench_problem *p, int rounds, int impulse_rounds, + double t[6]); +void accuracy_r2r(bench_problem *p, int rounds, int impulse_rounds, + double t[6]); + +#if defined(BENCHFFT_LDOUBLE) && HAVE_COSL + typedef long double trigreal; +# define COS cosl +# define SIN sinl +# define TAN tanl +# define KTRIG(x) (x##L) +#elif defined(BENCHFFT_QUAD) && HAVE_LIBQUADMATH + typedef __float128 trigreal; +# define COS cosq +# define SIN sinq +# define TAN tanq +# define KTRIG(x) (x##Q) +extern trigreal cosq(trigreal); +extern trigreal sinq(trigreal); +extern trigreal tanq(trigreal); +#else + typedef double trigreal; +# define COS cos +# define SIN sin +# define TAN tan +# define KTRIG(x) (x) +#endif +#define K2PI KTRIG(6.2831853071795864769252867665590057683943388) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/libbench2/zero.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/libbench2/zero.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2001 Matteo Frigo + * Copyright (c) 2001 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "bench.h" + +/* set I/O arrays to zero. Default routine */ +void problem_zero(bench_problem *p) +{ + bench_complex czero = {0, 0}; + if (p->kind == PROBLEM_COMPLEX) { + caset((bench_complex *) p->inphys, p->iphyssz, czero); + caset((bench_complex *) p->outphys, p->ophyssz, czero); + } else if (p->kind == PROBLEM_R2R) { + aset((bench_real *) p->inphys, p->iphyssz, 0.0); + aset((bench_real *) p->outphys, p->ophyssz, 0.0); + } else if (p->kind == PROBLEM_REAL && p->sign < 0) { + aset((bench_real *) p->inphys, p->iphyssz, 0.0); + caset((bench_complex *) p->outphys, p->ophyssz, czero); + } else if (p->kind == PROBLEM_REAL && p->sign > 0) { + caset((bench_complex *) p->inphys, p->iphyssz, czero); + aset((bench_real *) p->outphys, p->ophyssz, 0.0); + } else { + BENCH_ASSERT(0); /* TODO */ + } +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/ltmain.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/ltmain.sh Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,9655 @@ + +# libtool (GNU libtool) 2.4.2 +# Written by Gordon Matzigkeit , 1996 + +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006, +# 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. +# This is free software; see the source for copying conditions. There is NO +# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# GNU Libtool is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# As a special exception to the GNU General Public License, +# if you distribute this file as part of a program or library that +# is built using GNU Libtool, you may include this file under the +# same distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Libtool; see the file COPYING. If not, a copy +# can be downloaded from http://www.gnu.org/licenses/gpl.html, +# or obtained by writing to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# Usage: $progname [OPTION]... [MODE-ARG]... +# +# Provide generalized library-building support services. +# +# --config show all configuration variables +# --debug enable verbose shell tracing +# -n, --dry-run display commands without modifying any files +# --features display basic configuration information and exit +# --mode=MODE use operation mode MODE +# --preserve-dup-deps don't remove duplicate dependency libraries +# --quiet, --silent don't print informational messages +# --no-quiet, --no-silent +# print informational messages (default) +# --no-warn don't display warning messages +# --tag=TAG use configuration variables from tag TAG +# -v, --verbose print more informational messages than default +# --no-verbose don't print the extra informational messages +# --version print version information +# -h, --help, --help-all print short, long, or detailed help message +# +# MODE must be one of the following: +# +# clean remove files from the build directory +# compile compile a source file into a libtool object +# execute automatically set library path, then run a program +# finish complete the installation of libtool libraries +# install install libraries or executables +# link create a library or an executable +# uninstall remove libraries from an installed directory +# +# MODE-ARGS vary depending on the MODE. When passed as first option, +# `--mode=MODE' may be abbreviated as `MODE' or a unique abbreviation of that. +# Try `$progname --help --mode=MODE' for a more detailed description of MODE. +# +# When reporting a bug, please describe a test case to reproduce it and +# include the following information: +# +# host-triplet: $host +# shell: $SHELL +# compiler: $LTCC +# compiler flags: $LTCFLAGS +# linker: $LD (gnu? $with_gnu_ld) +# $progname: (GNU libtool) 2.4.2 +# automake: $automake_version +# autoconf: $autoconf_version +# +# Report bugs to . +# GNU libtool home page: . +# General help using GNU software: . + +PROGRAM=libtool +PACKAGE=libtool +VERSION=2.4.2 +TIMESTAMP="" +package_revision=1.3337 + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac +fi +BIN_SH=xpg4; export BIN_SH # for Tru64 +DUALCASE=1; export DUALCASE # for MKS sh + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +$1 +_LTECHO_EOF' +} + +# NLS nuisances: We save the old values to restore during execute mode. +lt_user_locale= +lt_safe_locale= +for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES +do + eval "if test \"\${$lt_var+set}\" = set; then + save_$lt_var=\$$lt_var + $lt_var=C + export $lt_var + lt_user_locale=\"$lt_var=\\\$save_\$lt_var; \$lt_user_locale\" + lt_safe_locale=\"$lt_var=C; \$lt_safe_locale\" + fi" +done +LC_ALL=C +LANGUAGE=C +export LANGUAGE LC_ALL + +$lt_unset CDPATH + + +# Work around backward compatibility issue on IRIX 6.5. On IRIX 6.4+, sh +# is ksh but when the shell is invoked as "sh" and the current value of +# the _XPG environment variable is not equal to 1 (one), the special +# positional parameter $0, within a function call, is the name of the +# function. +progpath="$0" + + + +: ${CP="cp -f"} +test "${ECHO+set}" = set || ECHO=${as_echo-'printf %s\n'} +: ${MAKE="make"} +: ${MKDIR="mkdir"} +: ${MV="mv -f"} +: ${RM="rm -f"} +: ${SHELL="${CONFIG_SHELL-/bin/sh}"} +: ${Xsed="$SED -e 1s/^X//"} + +# Global variables: +EXIT_SUCCESS=0 +EXIT_FAILURE=1 +EXIT_MISMATCH=63 # $? = 63 is used to indicate version mismatch to missing. +EXIT_SKIP=77 # $? = 77 is used to indicate a skipped test to automake. + +exit_status=$EXIT_SUCCESS + +# Make sure IFS has a sensible default +lt_nl=' +' +IFS=" $lt_nl" + +dirname="s,/[^/]*$,," +basename="s,^.*/,," + +# func_dirname file append nondir_replacement +# Compute the dirname of FILE. If nonempty, add APPEND to the result, +# otherwise set result to NONDIR_REPLACEMENT. +func_dirname () +{ + func_dirname_result=`$ECHO "${1}" | $SED "$dirname"` + if test "X$func_dirname_result" = "X${1}"; then + func_dirname_result="${3}" + else + func_dirname_result="$func_dirname_result${2}" + fi +} # func_dirname may be replaced by extended shell implementation + + +# func_basename file +func_basename () +{ + func_basename_result=`$ECHO "${1}" | $SED "$basename"` +} # func_basename may be replaced by extended shell implementation + + +# func_dirname_and_basename file append nondir_replacement +# perform func_basename and func_dirname in a single function +# call: +# dirname: Compute the dirname of FILE. If nonempty, +# add APPEND to the result, otherwise set result +# to NONDIR_REPLACEMENT. +# value returned in "$func_dirname_result" +# basename: Compute filename of FILE. +# value retuned in "$func_basename_result" +# Implementation must be kept synchronized with func_dirname +# and func_basename. For efficiency, we do not delegate to +# those functions but instead duplicate the functionality here. +func_dirname_and_basename () +{ + # Extract subdirectory from the argument. + func_dirname_result=`$ECHO "${1}" | $SED -e "$dirname"` + if test "X$func_dirname_result" = "X${1}"; then + func_dirname_result="${3}" + else + func_dirname_result="$func_dirname_result${2}" + fi + func_basename_result=`$ECHO "${1}" | $SED -e "$basename"` +} # func_dirname_and_basename may be replaced by extended shell implementation + + +# func_stripname prefix suffix name +# strip PREFIX and SUFFIX off of NAME. +# PREFIX and SUFFIX must not contain globbing or regex special +# characters, hashes, percent signs, but SUFFIX may contain a leading +# dot (in which case that matches only a dot). +# func_strip_suffix prefix name +func_stripname () +{ + case ${2} in + .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;; + *) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;; + esac +} # func_stripname may be replaced by extended shell implementation + + +# These SED scripts presuppose an absolute path with a trailing slash. +pathcar='s,^/\([^/]*\).*$,\1,' +pathcdr='s,^/[^/]*,,' +removedotparts=':dotsl + s@/\./@/@g + t dotsl + s,/\.$,/,' +collapseslashes='s@/\{1,\}@/@g' +finalslash='s,/*$,/,' + +# func_normal_abspath PATH +# Remove doubled-up and trailing slashes, "." path components, +# and cancel out any ".." path components in PATH after making +# it an absolute path. +# value returned in "$func_normal_abspath_result" +func_normal_abspath () +{ + # Start from root dir and reassemble the path. + func_normal_abspath_result= + func_normal_abspath_tpath=$1 + func_normal_abspath_altnamespace= + case $func_normal_abspath_tpath in + "") + # Empty path, that just means $cwd. + func_stripname '' '/' "`pwd`" + func_normal_abspath_result=$func_stripname_result + return + ;; + # The next three entries are used to spot a run of precisely + # two leading slashes without using negated character classes; + # we take advantage of case's first-match behaviour. + ///*) + # Unusual form of absolute path, do nothing. + ;; + //*) + # Not necessarily an ordinary path; POSIX reserves leading '//' + # and for example Cygwin uses it to access remote file shares + # over CIFS/SMB, so we conserve a leading double slash if found. + func_normal_abspath_altnamespace=/ + ;; + /*) + # Absolute path, do nothing. + ;; + *) + # Relative path, prepend $cwd. + func_normal_abspath_tpath=`pwd`/$func_normal_abspath_tpath + ;; + esac + # Cancel out all the simple stuff to save iterations. We also want + # the path to end with a slash for ease of parsing, so make sure + # there is one (and only one) here. + func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$removedotparts" -e "$collapseslashes" -e "$finalslash"` + while :; do + # Processed it all yet? + if test "$func_normal_abspath_tpath" = / ; then + # If we ascended to the root using ".." the result may be empty now. + if test -z "$func_normal_abspath_result" ; then + func_normal_abspath_result=/ + fi + break + fi + func_normal_abspath_tcomponent=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$pathcar"` + func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$pathcdr"` + # Figure out what to do with it + case $func_normal_abspath_tcomponent in + "") + # Trailing empty path component, ignore it. + ;; + ..) + # Parent dir; strip last assembled component from result. + func_dirname "$func_normal_abspath_result" + func_normal_abspath_result=$func_dirname_result + ;; + *) + # Actual path component, append it. + func_normal_abspath_result=$func_normal_abspath_result/$func_normal_abspath_tcomponent + ;; + esac + done + # Restore leading double-slash if one was found on entry. + func_normal_abspath_result=$func_normal_abspath_altnamespace$func_normal_abspath_result +} + +# func_relative_path SRCDIR DSTDIR +# generates a relative path from SRCDIR to DSTDIR, with a trailing +# slash if non-empty, suitable for immediately appending a filename +# without needing to append a separator. +# value returned in "$func_relative_path_result" +func_relative_path () +{ + func_relative_path_result= + func_normal_abspath "$1" + func_relative_path_tlibdir=$func_normal_abspath_result + func_normal_abspath "$2" + func_relative_path_tbindir=$func_normal_abspath_result + + # Ascend the tree starting from libdir + while :; do + # check if we have found a prefix of bindir + case $func_relative_path_tbindir in + $func_relative_path_tlibdir) + # found an exact match + func_relative_path_tcancelled= + break + ;; + $func_relative_path_tlibdir*) + # found a matching prefix + func_stripname "$func_relative_path_tlibdir" '' "$func_relative_path_tbindir" + func_relative_path_tcancelled=$func_stripname_result + if test -z "$func_relative_path_result"; then + func_relative_path_result=. + fi + break + ;; + *) + func_dirname $func_relative_path_tlibdir + func_relative_path_tlibdir=${func_dirname_result} + if test "x$func_relative_path_tlibdir" = x ; then + # Have to descend all the way to the root! + func_relative_path_result=../$func_relative_path_result + func_relative_path_tcancelled=$func_relative_path_tbindir + break + fi + func_relative_path_result=../$func_relative_path_result + ;; + esac + done + + # Now calculate path; take care to avoid doubling-up slashes. + func_stripname '' '/' "$func_relative_path_result" + func_relative_path_result=$func_stripname_result + func_stripname '/' '/' "$func_relative_path_tcancelled" + if test "x$func_stripname_result" != x ; then + func_relative_path_result=${func_relative_path_result}/${func_stripname_result} + fi + + # Normalisation. If bindir is libdir, return empty string, + # else relative path ending with a slash; either way, target + # file name can be directly appended. + if test ! -z "$func_relative_path_result"; then + func_stripname './' '' "$func_relative_path_result/" + func_relative_path_result=$func_stripname_result + fi +} + +# The name of this program: +func_dirname_and_basename "$progpath" +progname=$func_basename_result + +# Make sure we have an absolute path for reexecution: +case $progpath in + [\\/]*|[A-Za-z]:\\*) ;; + *[\\/]*) + progdir=$func_dirname_result + progdir=`cd "$progdir" && pwd` + progpath="$progdir/$progname" + ;; + *) + save_IFS="$IFS" + IFS=${PATH_SEPARATOR-:} + for progdir in $PATH; do + IFS="$save_IFS" + test -x "$progdir/$progname" && break + done + IFS="$save_IFS" + test -n "$progdir" || progdir=`pwd` + progpath="$progdir/$progname" + ;; +esac + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +Xsed="${SED}"' -e 1s/^X//' +sed_quote_subst='s/\([`"$\\]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\(["`\\]\)/\\\1/g' + +# Sed substitution that turns a string into a regex matching for the +# string literally. +sed_make_literal_regex='s,[].[^$\\*\/],\\&,g' + +# Sed substitution that converts a w32 file name or path +# which contains forward slashes, into one that contains +# (escaped) backslashes. A very naive implementation. +lt_sed_naive_backslashify='s|\\\\*|\\|g;s|/|\\|g;s|\\|\\\\|g' + +# Re-`\' parameter expansions in output of double_quote_subst that were +# `\'-ed in input to the same. If an odd number of `\' preceded a '$' +# in input to double_quote_subst, that '$' was protected from expansion. +# Since each input `\' is now two `\'s, look for any number of runs of +# four `\'s followed by two `\'s and then a '$'. `\' that '$'. +bs='\\' +bs2='\\\\' +bs4='\\\\\\\\' +dollar='\$' +sed_double_backslash="\ + s/$bs4/&\\ +/g + s/^$bs2$dollar/$bs&/ + s/\\([^$bs]\\)$bs2$dollar/\\1$bs2$bs$dollar/g + s/\n//g" + +# Standard options: +opt_dry_run=false +opt_help=false +opt_quiet=false +opt_verbose=false +opt_warning=: + +# func_echo arg... +# Echo program name prefixed message, along with the current mode +# name if it has been set yet. +func_echo () +{ + $ECHO "$progname: ${opt_mode+$opt_mode: }$*" +} + +# func_verbose arg... +# Echo program name prefixed message in verbose mode only. +func_verbose () +{ + $opt_verbose && func_echo ${1+"$@"} + + # A bug in bash halts the script if the last line of a function + # fails when set -e is in force, so we need another command to + # work around that: + : +} + +# func_echo_all arg... +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "$*" +} + +# func_error arg... +# Echo program name prefixed message to standard error. +func_error () +{ + $ECHO "$progname: ${opt_mode+$opt_mode: }"${1+"$@"} 1>&2 +} + +# func_warning arg... +# Echo program name prefixed warning message to standard error. +func_warning () +{ + $opt_warning && $ECHO "$progname: ${opt_mode+$opt_mode: }warning: "${1+"$@"} 1>&2 + + # bash bug again: + : +} + +# func_fatal_error arg... +# Echo program name prefixed message to standard error, and exit. +func_fatal_error () +{ + func_error ${1+"$@"} + exit $EXIT_FAILURE +} + +# func_fatal_help arg... +# Echo program name prefixed message to standard error, followed by +# a help hint, and exit. +func_fatal_help () +{ + func_error ${1+"$@"} + func_fatal_error "$help" +} +help="Try \`$progname --help' for more information." ## default + + +# func_grep expression filename +# Check whether EXPRESSION matches any line of FILENAME, without output. +func_grep () +{ + $GREP "$1" "$2" >/dev/null 2>&1 +} + + +# func_mkdir_p directory-path +# Make sure the entire path to DIRECTORY-PATH is available. +func_mkdir_p () +{ + my_directory_path="$1" + my_dir_list= + + if test -n "$my_directory_path" && test "$opt_dry_run" != ":"; then + + # Protect directory names starting with `-' + case $my_directory_path in + -*) my_directory_path="./$my_directory_path" ;; + esac + + # While some portion of DIR does not yet exist... + while test ! -d "$my_directory_path"; do + # ...make a list in topmost first order. Use a colon delimited + # list incase some portion of path contains whitespace. + my_dir_list="$my_directory_path:$my_dir_list" + + # If the last portion added has no slash in it, the list is done + case $my_directory_path in */*) ;; *) break ;; esac + + # ...otherwise throw away the child directory and loop + my_directory_path=`$ECHO "$my_directory_path" | $SED -e "$dirname"` + done + my_dir_list=`$ECHO "$my_dir_list" | $SED 's,:*$,,'` + + save_mkdir_p_IFS="$IFS"; IFS=':' + for my_dir in $my_dir_list; do + IFS="$save_mkdir_p_IFS" + # mkdir can fail with a `File exist' error if two processes + # try to create one of the directories concurrently. Don't + # stop in that case! + $MKDIR "$my_dir" 2>/dev/null || : + done + IFS="$save_mkdir_p_IFS" + + # Bail out if we (or some other process) failed to create a directory. + test -d "$my_directory_path" || \ + func_fatal_error "Failed to create \`$1'" + fi +} + + +# func_mktempdir [string] +# Make a temporary directory that won't clash with other running +# libtool processes, and avoids race conditions if possible. If +# given, STRING is the basename for that directory. +func_mktempdir () +{ + my_template="${TMPDIR-/tmp}/${1-$progname}" + + if test "$opt_dry_run" = ":"; then + # Return a directory name, but don't create it in dry-run mode + my_tmpdir="${my_template}-$$" + else + + # If mktemp works, use that first and foremost + my_tmpdir=`mktemp -d "${my_template}-XXXXXXXX" 2>/dev/null` + + if test ! -d "$my_tmpdir"; then + # Failing that, at least try and use $RANDOM to avoid a race + my_tmpdir="${my_template}-${RANDOM-0}$$" + + save_mktempdir_umask=`umask` + umask 0077 + $MKDIR "$my_tmpdir" + umask $save_mktempdir_umask + fi + + # If we're not in dry-run mode, bomb out on failure + test -d "$my_tmpdir" || \ + func_fatal_error "cannot create temporary directory \`$my_tmpdir'" + fi + + $ECHO "$my_tmpdir" +} + + +# func_quote_for_eval arg +# Aesthetically quote ARG to be evaled later. +# This function returns two values: FUNC_QUOTE_FOR_EVAL_RESULT +# is double-quoted, suitable for a subsequent eval, whereas +# FUNC_QUOTE_FOR_EVAL_UNQUOTED_RESULT has merely all characters +# which are still active within double quotes backslashified. +func_quote_for_eval () +{ + case $1 in + *[\\\`\"\$]*) + func_quote_for_eval_unquoted_result=`$ECHO "$1" | $SED "$sed_quote_subst"` ;; + *) + func_quote_for_eval_unquoted_result="$1" ;; + esac + + case $func_quote_for_eval_unquoted_result in + # Double-quote args containing shell metacharacters to delay + # word splitting, command substitution and and variable + # expansion for a subsequent eval. + # Many Bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + func_quote_for_eval_result="\"$func_quote_for_eval_unquoted_result\"" + ;; + *) + func_quote_for_eval_result="$func_quote_for_eval_unquoted_result" + esac +} + + +# func_quote_for_expand arg +# Aesthetically quote ARG to be evaled later; same as above, +# but do not quote variable references. +func_quote_for_expand () +{ + case $1 in + *[\\\`\"]*) + my_arg=`$ECHO "$1" | $SED \ + -e "$double_quote_subst" -e "$sed_double_backslash"` ;; + *) + my_arg="$1" ;; + esac + + case $my_arg in + # Double-quote args containing shell metacharacters to delay + # word splitting and command substitution for a subsequent eval. + # Many Bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + my_arg="\"$my_arg\"" + ;; + esac + + func_quote_for_expand_result="$my_arg" +} + + +# func_show_eval cmd [fail_exp] +# Unless opt_silent is true, then output CMD. Then, if opt_dryrun is +# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP +# is given, then evaluate it. +func_show_eval () +{ + my_cmd="$1" + my_fail_exp="${2-:}" + + ${opt_silent-false} || { + func_quote_for_expand "$my_cmd" + eval "func_echo $func_quote_for_expand_result" + } + + if ${opt_dry_run-false}; then :; else + eval "$my_cmd" + my_status=$? + if test "$my_status" -eq 0; then :; else + eval "(exit $my_status); $my_fail_exp" + fi + fi +} + + +# func_show_eval_locale cmd [fail_exp] +# Unless opt_silent is true, then output CMD. Then, if opt_dryrun is +# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP +# is given, then evaluate it. Use the saved locale for evaluation. +func_show_eval_locale () +{ + my_cmd="$1" + my_fail_exp="${2-:}" + + ${opt_silent-false} || { + func_quote_for_expand "$my_cmd" + eval "func_echo $func_quote_for_expand_result" + } + + if ${opt_dry_run-false}; then :; else + eval "$lt_user_locale + $my_cmd" + my_status=$? + eval "$lt_safe_locale" + if test "$my_status" -eq 0; then :; else + eval "(exit $my_status); $my_fail_exp" + fi + fi +} + +# func_tr_sh +# Turn $1 into a string suitable for a shell variable name. +# Result is stored in $func_tr_sh_result. All characters +# not in the set a-zA-Z0-9_ are replaced with '_'. Further, +# if $1 begins with a digit, a '_' is prepended as well. +func_tr_sh () +{ + case $1 in + [0-9]* | *[!a-zA-Z0-9_]*) + func_tr_sh_result=`$ECHO "$1" | $SED 's/^\([0-9]\)/_\1/; s/[^a-zA-Z0-9_]/_/g'` + ;; + * ) + func_tr_sh_result=$1 + ;; + esac +} + + +# func_version +# Echo version message to standard output and exit. +func_version () +{ + $opt_debug + + $SED -n '/(C)/!b go + :more + /\./!{ + N + s/\n# / / + b more + } + :go + /^# '$PROGRAM' (GNU /,/# warranty; / { + s/^# // + s/^# *$// + s/\((C)\)[ 0-9,-]*\( [1-9][0-9]*\)/\1\2/ + p + }' < "$progpath" + exit $? +} + +# func_usage +# Echo short help message to standard output and exit. +func_usage () +{ + $opt_debug + + $SED -n '/^# Usage:/,/^# *.*--help/ { + s/^# // + s/^# *$// + s/\$progname/'$progname'/ + p + }' < "$progpath" + echo + $ECHO "run \`$progname --help | more' for full usage" + exit $? +} + +# func_help [NOEXIT] +# Echo long help message to standard output and exit, +# unless 'noexit' is passed as argument. +func_help () +{ + $opt_debug + + $SED -n '/^# Usage:/,/# Report bugs to/ { + :print + s/^# // + s/^# *$// + s*\$progname*'$progname'* + s*\$host*'"$host"'* + s*\$SHELL*'"$SHELL"'* + s*\$LTCC*'"$LTCC"'* + s*\$LTCFLAGS*'"$LTCFLAGS"'* + s*\$LD*'"$LD"'* + s/\$with_gnu_ld/'"$with_gnu_ld"'/ + s/\$automake_version/'"`(${AUTOMAKE-automake} --version) 2>/dev/null |$SED 1q`"'/ + s/\$autoconf_version/'"`(${AUTOCONF-autoconf} --version) 2>/dev/null |$SED 1q`"'/ + p + d + } + /^# .* home page:/b print + /^# General help using/b print + ' < "$progpath" + ret=$? + if test -z "$1"; then + exit $ret + fi +} + +# func_missing_arg argname +# Echo program name prefixed message to standard error and set global +# exit_cmd. +func_missing_arg () +{ + $opt_debug + + func_error "missing argument for $1." + exit_cmd=exit +} + + +# func_split_short_opt shortopt +# Set func_split_short_opt_name and func_split_short_opt_arg shell +# variables after splitting SHORTOPT after the 2nd character. +func_split_short_opt () +{ + my_sed_short_opt='1s/^\(..\).*$/\1/;q' + my_sed_short_rest='1s/^..\(.*\)$/\1/;q' + + func_split_short_opt_name=`$ECHO "$1" | $SED "$my_sed_short_opt"` + func_split_short_opt_arg=`$ECHO "$1" | $SED "$my_sed_short_rest"` +} # func_split_short_opt may be replaced by extended shell implementation + + +# func_split_long_opt longopt +# Set func_split_long_opt_name and func_split_long_opt_arg shell +# variables after splitting LONGOPT at the `=' sign. +func_split_long_opt () +{ + my_sed_long_opt='1s/^\(--[^=]*\)=.*/\1/;q' + my_sed_long_arg='1s/^--[^=]*=//' + + func_split_long_opt_name=`$ECHO "$1" | $SED "$my_sed_long_opt"` + func_split_long_opt_arg=`$ECHO "$1" | $SED "$my_sed_long_arg"` +} # func_split_long_opt may be replaced by extended shell implementation + +exit_cmd=: + + + + + +magic="%%%MAGIC variable%%%" +magic_exe="%%%MAGIC EXE variable%%%" + +# Global variables. +nonopt= +preserve_args= +lo2o="s/\\.lo\$/.${objext}/" +o2lo="s/\\.${objext}\$/.lo/" +extracted_archives= +extracted_serial=0 + +# If this variable is set in any of the actions, the command in it +# will be execed at the end. This prevents here-documents from being +# left over by shells. +exec_cmd= + +# func_append var value +# Append VALUE to the end of shell variable VAR. +func_append () +{ + eval "${1}=\$${1}\${2}" +} # func_append may be replaced by extended shell implementation + +# func_append_quoted var value +# Quote VALUE and append to the end of shell variable VAR, separated +# by a space. +func_append_quoted () +{ + func_quote_for_eval "${2}" + eval "${1}=\$${1}\\ \$func_quote_for_eval_result" +} # func_append_quoted may be replaced by extended shell implementation + + +# func_arith arithmetic-term... +func_arith () +{ + func_arith_result=`expr "${@}"` +} # func_arith may be replaced by extended shell implementation + + +# func_len string +# STRING may not start with a hyphen. +func_len () +{ + func_len_result=`expr "${1}" : ".*" 2>/dev/null || echo $max_cmd_len` +} # func_len may be replaced by extended shell implementation + + +# func_lo2o object +func_lo2o () +{ + func_lo2o_result=`$ECHO "${1}" | $SED "$lo2o"` +} # func_lo2o may be replaced by extended shell implementation + + +# func_xform libobj-or-source +func_xform () +{ + func_xform_result=`$ECHO "${1}" | $SED 's/\.[^.]*$/.lo/'` +} # func_xform may be replaced by extended shell implementation + + +# func_fatal_configuration arg... +# Echo program name prefixed message to standard error, followed by +# a configuration failure hint, and exit. +func_fatal_configuration () +{ + func_error ${1+"$@"} + func_error "See the $PACKAGE documentation for more information." + func_fatal_error "Fatal configuration error." +} + + +# func_config +# Display the configuration for all the tags in this script. +func_config () +{ + re_begincf='^# ### BEGIN LIBTOOL' + re_endcf='^# ### END LIBTOOL' + + # Default configuration. + $SED "1,/$re_begincf CONFIG/d;/$re_endcf CONFIG/,\$d" < "$progpath" + + # Now print the configurations for the tags. + for tagname in $taglist; do + $SED -n "/$re_begincf TAG CONFIG: $tagname\$/,/$re_endcf TAG CONFIG: $tagname\$/p" < "$progpath" + done + + exit $? +} + +# func_features +# Display the features supported by this script. +func_features () +{ + echo "host: $host" + if test "$build_libtool_libs" = yes; then + echo "enable shared libraries" + else + echo "disable shared libraries" + fi + if test "$build_old_libs" = yes; then + echo "enable static libraries" + else + echo "disable static libraries" + fi + + exit $? +} + +# func_enable_tag tagname +# Verify that TAGNAME is valid, and either flag an error and exit, or +# enable the TAGNAME tag. We also add TAGNAME to the global $taglist +# variable here. +func_enable_tag () +{ + # Global variable: + tagname="$1" + + re_begincf="^# ### BEGIN LIBTOOL TAG CONFIG: $tagname\$" + re_endcf="^# ### END LIBTOOL TAG CONFIG: $tagname\$" + sed_extractcf="/$re_begincf/,/$re_endcf/p" + + # Validate tagname. + case $tagname in + *[!-_A-Za-z0-9,/]*) + func_fatal_error "invalid tag name: $tagname" + ;; + esac + + # Don't test for the "default" C tag, as we know it's + # there but not specially marked. + case $tagname in + CC) ;; + *) + if $GREP "$re_begincf" "$progpath" >/dev/null 2>&1; then + taglist="$taglist $tagname" + + # Evaluate the configuration. Be careful to quote the path + # and the sed script, to avoid splitting on whitespace, but + # also don't use non-portable quotes within backquotes within + # quotes we have to do it in 2 steps: + extractedcf=`$SED -n -e "$sed_extractcf" < "$progpath"` + eval "$extractedcf" + else + func_error "ignoring unknown tag $tagname" + fi + ;; + esac +} + +# func_check_version_match +# Ensure that we are using m4 macros, and libtool script from the same +# release of libtool. +func_check_version_match () +{ + if test "$package_revision" != "$macro_revision"; then + if test "$VERSION" != "$macro_version"; then + if test -z "$macro_version"; then + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, but the +$progname: definition of this LT_INIT comes from an older release. +$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION +$progname: and run autoconf again. +_LT_EOF + else + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, but the +$progname: definition of this LT_INIT comes from $PACKAGE $macro_version. +$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION +$progname: and run autoconf again. +_LT_EOF + fi + else + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, revision $package_revision, +$progname: but the definition of this LT_INIT comes from revision $macro_revision. +$progname: You should recreate aclocal.m4 with macros from revision $package_revision +$progname: of $PACKAGE $VERSION and run autoconf again. +_LT_EOF + fi + + exit $EXIT_MISMATCH + fi +} + + +# Shorthand for --mode=foo, only valid as the first argument +case $1 in +clean|clea|cle|cl) + shift; set dummy --mode clean ${1+"$@"}; shift + ;; +compile|compil|compi|comp|com|co|c) + shift; set dummy --mode compile ${1+"$@"}; shift + ;; +execute|execut|execu|exec|exe|ex|e) + shift; set dummy --mode execute ${1+"$@"}; shift + ;; +finish|finis|fini|fin|fi|f) + shift; set dummy --mode finish ${1+"$@"}; shift + ;; +install|instal|insta|inst|ins|in|i) + shift; set dummy --mode install ${1+"$@"}; shift + ;; +link|lin|li|l) + shift; set dummy --mode link ${1+"$@"}; shift + ;; +uninstall|uninstal|uninsta|uninst|unins|unin|uni|un|u) + shift; set dummy --mode uninstall ${1+"$@"}; shift + ;; +esac + + + +# Option defaults: +opt_debug=: +opt_dry_run=false +opt_config=false +opt_preserve_dup_deps=false +opt_features=false +opt_finish=false +opt_help=false +opt_help_all=false +opt_silent=: +opt_warning=: +opt_verbose=: +opt_silent=false +opt_verbose=false + + +# Parse options once, thoroughly. This comes as soon as possible in the +# script to make things like `--version' happen as quickly as we can. +{ + # this just eases exit handling + while test $# -gt 0; do + opt="$1" + shift + case $opt in + --debug|-x) opt_debug='set -x' + func_echo "enabling shell trace mode" + $opt_debug + ;; + --dry-run|--dryrun|-n) + opt_dry_run=: + ;; + --config) + opt_config=: +func_config + ;; + --dlopen|-dlopen) + optarg="$1" + opt_dlopen="${opt_dlopen+$opt_dlopen +}$optarg" + shift + ;; + --preserve-dup-deps) + opt_preserve_dup_deps=: + ;; + --features) + opt_features=: +func_features + ;; + --finish) + opt_finish=: +set dummy --mode finish ${1+"$@"}; shift + ;; + --help) + opt_help=: + ;; + --help-all) + opt_help_all=: +opt_help=': help-all' + ;; + --mode) + test $# = 0 && func_missing_arg $opt && break + optarg="$1" + opt_mode="$optarg" +case $optarg in + # Valid mode arguments: + clean|compile|execute|finish|install|link|relink|uninstall) ;; + + # Catch anything else as an error + *) func_error "invalid argument for $opt" + exit_cmd=exit + break + ;; +esac + shift + ;; + --no-silent|--no-quiet) + opt_silent=false +func_append preserve_args " $opt" + ;; + --no-warning|--no-warn) + opt_warning=false +func_append preserve_args " $opt" + ;; + --no-verbose) + opt_verbose=false +func_append preserve_args " $opt" + ;; + --silent|--quiet) + opt_silent=: +func_append preserve_args " $opt" + opt_verbose=false + ;; + --verbose|-v) + opt_verbose=: +func_append preserve_args " $opt" +opt_silent=false + ;; + --tag) + test $# = 0 && func_missing_arg $opt && break + optarg="$1" + opt_tag="$optarg" +func_append preserve_args " $opt $optarg" +func_enable_tag "$optarg" + shift + ;; + + -\?|-h) func_usage ;; + --help) func_help ;; + --version) func_version ;; + + # Separate optargs to long options: + --*=*) + func_split_long_opt "$opt" + set dummy "$func_split_long_opt_name" "$func_split_long_opt_arg" ${1+"$@"} + shift + ;; + + # Separate non-argument short options: + -\?*|-h*|-n*|-v*) + func_split_short_opt "$opt" + set dummy "$func_split_short_opt_name" "-$func_split_short_opt_arg" ${1+"$@"} + shift + ;; + + --) break ;; + -*) func_fatal_help "unrecognized option \`$opt'" ;; + *) set dummy "$opt" ${1+"$@"}; shift; break ;; + esac + done + + # Validate options: + + # save first non-option argument + if test "$#" -gt 0; then + nonopt="$opt" + shift + fi + + # preserve --debug + test "$opt_debug" = : || func_append preserve_args " --debug" + + case $host in + *cygwin* | *mingw* | *pw32* | *cegcc*) + # don't eliminate duplications in $postdeps and $predeps + opt_duplicate_compiler_generated_deps=: + ;; + *) + opt_duplicate_compiler_generated_deps=$opt_preserve_dup_deps + ;; + esac + + $opt_help || { + # Sanity checks first: + func_check_version_match + + if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then + func_fatal_configuration "not configured to build any kind of library" + fi + + # Darwin sucks + eval std_shrext=\"$shrext_cmds\" + + # Only execute mode is allowed to have -dlopen flags. + if test -n "$opt_dlopen" && test "$opt_mode" != execute; then + func_error "unrecognized option \`-dlopen'" + $ECHO "$help" 1>&2 + exit $EXIT_FAILURE + fi + + # Change the help message to a mode-specific one. + generic_help="$help" + help="Try \`$progname --help --mode=$opt_mode' for more information." + } + + + # Bail if the options were screwed + $exit_cmd $EXIT_FAILURE +} + + + + +## ----------- ## +## Main. ## +## ----------- ## + +# func_lalib_p file +# True iff FILE is a libtool `.la' library or `.lo' object file. +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_lalib_p () +{ + test -f "$1" && + $SED -e 4q "$1" 2>/dev/null \ + | $GREP "^# Generated by .*$PACKAGE" > /dev/null 2>&1 +} + +# func_lalib_unsafe_p file +# True iff FILE is a libtool `.la' library or `.lo' object file. +# This function implements the same check as func_lalib_p without +# resorting to external programs. To this end, it redirects stdin and +# closes it afterwards, without saving the original file descriptor. +# As a safety measure, use it only where a negative result would be +# fatal anyway. Works if `file' does not exist. +func_lalib_unsafe_p () +{ + lalib_p=no + if test -f "$1" && test -r "$1" && exec 5<&0 <"$1"; then + for lalib_p_l in 1 2 3 4 + do + read lalib_p_line + case "$lalib_p_line" in + \#\ Generated\ by\ *$PACKAGE* ) lalib_p=yes; break;; + esac + done + exec 0<&5 5<&- + fi + test "$lalib_p" = yes +} + +# func_ltwrapper_script_p file +# True iff FILE is a libtool wrapper script +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_script_p () +{ + func_lalib_p "$1" +} + +# func_ltwrapper_executable_p file +# True iff FILE is a libtool wrapper executable +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_executable_p () +{ + func_ltwrapper_exec_suffix= + case $1 in + *.exe) ;; + *) func_ltwrapper_exec_suffix=.exe ;; + esac + $GREP "$magic_exe" "$1$func_ltwrapper_exec_suffix" >/dev/null 2>&1 +} + +# func_ltwrapper_scriptname file +# Assumes file is an ltwrapper_executable +# uses $file to determine the appropriate filename for a +# temporary ltwrapper_script. +func_ltwrapper_scriptname () +{ + func_dirname_and_basename "$1" "" "." + func_stripname '' '.exe' "$func_basename_result" + func_ltwrapper_scriptname_result="$func_dirname_result/$objdir/${func_stripname_result}_ltshwrapper" +} + +# func_ltwrapper_p file +# True iff FILE is a libtool wrapper script or wrapper executable +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_p () +{ + func_ltwrapper_script_p "$1" || func_ltwrapper_executable_p "$1" +} + + +# func_execute_cmds commands fail_cmd +# Execute tilde-delimited COMMANDS. +# If FAIL_CMD is given, eval that upon failure. +# FAIL_CMD may read-access the current command in variable CMD! +func_execute_cmds () +{ + $opt_debug + save_ifs=$IFS; IFS='~' + for cmd in $1; do + IFS=$save_ifs + eval cmd=\"$cmd\" + func_show_eval "$cmd" "${2-:}" + done + IFS=$save_ifs +} + + +# func_source file +# Source FILE, adding directory component if necessary. +# Note that it is not necessary on cygwin/mingw to append a dot to +# FILE even if both FILE and FILE.exe exist: automatic-append-.exe +# behavior happens only for exec(3), not for open(2)! Also, sourcing +# `FILE.' does not work on cygwin managed mounts. +func_source () +{ + $opt_debug + case $1 in + */* | *\\*) . "$1" ;; + *) . "./$1" ;; + esac +} + + +# func_resolve_sysroot PATH +# Replace a leading = in PATH with a sysroot. Store the result into +# func_resolve_sysroot_result +func_resolve_sysroot () +{ + func_resolve_sysroot_result=$1 + case $func_resolve_sysroot_result in + =*) + func_stripname '=' '' "$func_resolve_sysroot_result" + func_resolve_sysroot_result=$lt_sysroot$func_stripname_result + ;; + esac +} + +# func_replace_sysroot PATH +# If PATH begins with the sysroot, replace it with = and +# store the result into func_replace_sysroot_result. +func_replace_sysroot () +{ + case "$lt_sysroot:$1" in + ?*:"$lt_sysroot"*) + func_stripname "$lt_sysroot" '' "$1" + func_replace_sysroot_result="=$func_stripname_result" + ;; + *) + # Including no sysroot. + func_replace_sysroot_result=$1 + ;; + esac +} + +# func_infer_tag arg +# Infer tagged configuration to use if any are available and +# if one wasn't chosen via the "--tag" command line option. +# Only attempt this if the compiler in the base compile +# command doesn't match the default compiler. +# arg is usually of the form 'gcc ...' +func_infer_tag () +{ + $opt_debug + if test -n "$available_tags" && test -z "$tagname"; then + CC_quoted= + for arg in $CC; do + func_append_quoted CC_quoted "$arg" + done + CC_expanded=`func_echo_all $CC` + CC_quoted_expanded=`func_echo_all $CC_quoted` + case $@ in + # Blanks in the command may have been stripped by the calling shell, + # but not from the CC environment variable when configure was run. + " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ + " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) ;; + # Blanks at the start of $base_compile will cause this to fail + # if we don't check for them as well. + *) + for z in $available_tags; do + if $GREP "^# ### BEGIN LIBTOOL TAG CONFIG: $z$" < "$progpath" > /dev/null; then + # Evaluate the configuration. + eval "`${SED} -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$z'$/,/^# ### END LIBTOOL TAG CONFIG: '$z'$/p' < $progpath`" + CC_quoted= + for arg in $CC; do + # Double-quote args containing other shell metacharacters. + func_append_quoted CC_quoted "$arg" + done + CC_expanded=`func_echo_all $CC` + CC_quoted_expanded=`func_echo_all $CC_quoted` + case "$@ " in + " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ + " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) + # The compiler in the base compile command matches + # the one in the tagged configuration. + # Assume this is the tagged configuration we want. + tagname=$z + break + ;; + esac + fi + done + # If $tagname still isn't set, then no tagged configuration + # was found and let the user know that the "--tag" command + # line option must be used. + if test -z "$tagname"; then + func_echo "unable to infer tagged configuration" + func_fatal_error "specify a tag with \`--tag'" +# else +# func_verbose "using $tagname tagged configuration" + fi + ;; + esac + fi +} + + + +# func_write_libtool_object output_name pic_name nonpic_name +# Create a libtool object file (analogous to a ".la" file), +# but don't create it if we're doing a dry run. +func_write_libtool_object () +{ + write_libobj=${1} + if test "$build_libtool_libs" = yes; then + write_lobj=\'${2}\' + else + write_lobj=none + fi + + if test "$build_old_libs" = yes; then + write_oldobj=\'${3}\' + else + write_oldobj=none + fi + + $opt_dry_run || { + cat >${write_libobj}T </dev/null` + if test "$?" -eq 0 && test -n "${func_convert_core_file_wine_to_w32_tmp}"; then + func_convert_core_file_wine_to_w32_result=`$ECHO "$func_convert_core_file_wine_to_w32_tmp" | + $SED -e "$lt_sed_naive_backslashify"` + else + func_convert_core_file_wine_to_w32_result= + fi + fi +} +# end: func_convert_core_file_wine_to_w32 + + +# func_convert_core_path_wine_to_w32 ARG +# Helper function used by path conversion functions when $build is *nix, and +# $host is mingw, cygwin, or some other w32 environment. Relies on a correctly +# configured wine environment available, with the winepath program in $build's +# $PATH. Assumes ARG has no leading or trailing path separator characters. +# +# ARG is path to be converted from $build format to win32. +# Result is available in $func_convert_core_path_wine_to_w32_result. +# Unconvertible file (directory) names in ARG are skipped; if no directory names +# are convertible, then the result may be empty. +func_convert_core_path_wine_to_w32 () +{ + $opt_debug + # unfortunately, winepath doesn't convert paths, only file names + func_convert_core_path_wine_to_w32_result="" + if test -n "$1"; then + oldIFS=$IFS + IFS=: + for func_convert_core_path_wine_to_w32_f in $1; do + IFS=$oldIFS + func_convert_core_file_wine_to_w32 "$func_convert_core_path_wine_to_w32_f" + if test -n "$func_convert_core_file_wine_to_w32_result" ; then + if test -z "$func_convert_core_path_wine_to_w32_result"; then + func_convert_core_path_wine_to_w32_result="$func_convert_core_file_wine_to_w32_result" + else + func_append func_convert_core_path_wine_to_w32_result ";$func_convert_core_file_wine_to_w32_result" + fi + fi + done + IFS=$oldIFS + fi +} +# end: func_convert_core_path_wine_to_w32 + + +# func_cygpath ARGS... +# Wrapper around calling the cygpath program via LT_CYGPATH. This is used when +# when (1) $build is *nix and Cygwin is hosted via a wine environment; or (2) +# $build is MSYS and $host is Cygwin, or (3) $build is Cygwin. In case (1) or +# (2), returns the Cygwin file name or path in func_cygpath_result (input +# file name or path is assumed to be in w32 format, as previously converted +# from $build's *nix or MSYS format). In case (3), returns the w32 file name +# or path in func_cygpath_result (input file name or path is assumed to be in +# Cygwin format). Returns an empty string on error. +# +# ARGS are passed to cygpath, with the last one being the file name or path to +# be converted. +# +# Specify the absolute *nix (or w32) name to cygpath in the LT_CYGPATH +# environment variable; do not put it in $PATH. +func_cygpath () +{ + $opt_debug + if test -n "$LT_CYGPATH" && test -f "$LT_CYGPATH"; then + func_cygpath_result=`$LT_CYGPATH "$@" 2>/dev/null` + if test "$?" -ne 0; then + # on failure, ensure result is empty + func_cygpath_result= + fi + else + func_cygpath_result= + func_error "LT_CYGPATH is empty or specifies non-existent file: \`$LT_CYGPATH'" + fi +} +#end: func_cygpath + + +# func_convert_core_msys_to_w32 ARG +# Convert file name or path ARG from MSYS format to w32 format. Return +# result in func_convert_core_msys_to_w32_result. +func_convert_core_msys_to_w32 () +{ + $opt_debug + # awkward: cmd appends spaces to result + func_convert_core_msys_to_w32_result=`( cmd //c echo "$1" ) 2>/dev/null | + $SED -e 's/[ ]*$//' -e "$lt_sed_naive_backslashify"` +} +#end: func_convert_core_msys_to_w32 + + +# func_convert_file_check ARG1 ARG2 +# Verify that ARG1 (a file name in $build format) was converted to $host +# format in ARG2. Otherwise, emit an error message, but continue (resetting +# func_to_host_file_result to ARG1). +func_convert_file_check () +{ + $opt_debug + if test -z "$2" && test -n "$1" ; then + func_error "Could not determine host file name corresponding to" + func_error " \`$1'" + func_error "Continuing, but uninstalled executables may not work." + # Fallback: + func_to_host_file_result="$1" + fi +} +# end func_convert_file_check + + +# func_convert_path_check FROM_PATHSEP TO_PATHSEP FROM_PATH TO_PATH +# Verify that FROM_PATH (a path in $build format) was converted to $host +# format in TO_PATH. Otherwise, emit an error message, but continue, resetting +# func_to_host_file_result to a simplistic fallback value (see below). +func_convert_path_check () +{ + $opt_debug + if test -z "$4" && test -n "$3"; then + func_error "Could not determine the host path corresponding to" + func_error " \`$3'" + func_error "Continuing, but uninstalled executables may not work." + # Fallback. This is a deliberately simplistic "conversion" and + # should not be "improved". See libtool.info. + if test "x$1" != "x$2"; then + lt_replace_pathsep_chars="s|$1|$2|g" + func_to_host_path_result=`echo "$3" | + $SED -e "$lt_replace_pathsep_chars"` + else + func_to_host_path_result="$3" + fi + fi +} +# end func_convert_path_check + + +# func_convert_path_front_back_pathsep FRONTPAT BACKPAT REPL ORIG +# Modifies func_to_host_path_result by prepending REPL if ORIG matches FRONTPAT +# and appending REPL if ORIG matches BACKPAT. +func_convert_path_front_back_pathsep () +{ + $opt_debug + case $4 in + $1 ) func_to_host_path_result="$3$func_to_host_path_result" + ;; + esac + case $4 in + $2 ) func_append func_to_host_path_result "$3" + ;; + esac +} +# end func_convert_path_front_back_pathsep + + +################################################## +# $build to $host FILE NAME CONVERSION FUNCTIONS # +################################################## +# invoked via `$to_host_file_cmd ARG' +# +# In each case, ARG is the path to be converted from $build to $host format. +# Result will be available in $func_to_host_file_result. + + +# func_to_host_file ARG +# Converts the file name ARG from $build format to $host format. Return result +# in func_to_host_file_result. +func_to_host_file () +{ + $opt_debug + $to_host_file_cmd "$1" +} +# end func_to_host_file + + +# func_to_tool_file ARG LAZY +# converts the file name ARG from $build format to toolchain format. Return +# result in func_to_tool_file_result. If the conversion in use is listed +# in (the comma separated) LAZY, no conversion takes place. +func_to_tool_file () +{ + $opt_debug + case ,$2, in + *,"$to_tool_file_cmd",*) + func_to_tool_file_result=$1 + ;; + *) + $to_tool_file_cmd "$1" + func_to_tool_file_result=$func_to_host_file_result + ;; + esac +} +# end func_to_tool_file + + +# func_convert_file_noop ARG +# Copy ARG to func_to_host_file_result. +func_convert_file_noop () +{ + func_to_host_file_result="$1" +} +# end func_convert_file_noop + + +# func_convert_file_msys_to_w32 ARG +# Convert file name ARG from (mingw) MSYS to (mingw) w32 format; automatic +# conversion to w32 is not available inside the cwrapper. Returns result in +# func_to_host_file_result. +func_convert_file_msys_to_w32 () +{ + $opt_debug + func_to_host_file_result="$1" + if test -n "$1"; then + func_convert_core_msys_to_w32 "$1" + func_to_host_file_result="$func_convert_core_msys_to_w32_result" + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_msys_to_w32 + + +# func_convert_file_cygwin_to_w32 ARG +# Convert file name ARG from Cygwin to w32 format. Returns result in +# func_to_host_file_result. +func_convert_file_cygwin_to_w32 () +{ + $opt_debug + func_to_host_file_result="$1" + if test -n "$1"; then + # because $build is cygwin, we call "the" cygpath in $PATH; no need to use + # LT_CYGPATH in this case. + func_to_host_file_result=`cygpath -m "$1"` + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_cygwin_to_w32 + + +# func_convert_file_nix_to_w32 ARG +# Convert file name ARG from *nix to w32 format. Requires a wine environment +# and a working winepath. Returns result in func_to_host_file_result. +func_convert_file_nix_to_w32 () +{ + $opt_debug + func_to_host_file_result="$1" + if test -n "$1"; then + func_convert_core_file_wine_to_w32 "$1" + func_to_host_file_result="$func_convert_core_file_wine_to_w32_result" + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_nix_to_w32 + + +# func_convert_file_msys_to_cygwin ARG +# Convert file name ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. +# Returns result in func_to_host_file_result. +func_convert_file_msys_to_cygwin () +{ + $opt_debug + func_to_host_file_result="$1" + if test -n "$1"; then + func_convert_core_msys_to_w32 "$1" + func_cygpath -u "$func_convert_core_msys_to_w32_result" + func_to_host_file_result="$func_cygpath_result" + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_msys_to_cygwin + + +# func_convert_file_nix_to_cygwin ARG +# Convert file name ARG from *nix to Cygwin format. Requires Cygwin installed +# in a wine environment, working winepath, and LT_CYGPATH set. Returns result +# in func_to_host_file_result. +func_convert_file_nix_to_cygwin () +{ + $opt_debug + func_to_host_file_result="$1" + if test -n "$1"; then + # convert from *nix to w32, then use cygpath to convert from w32 to cygwin. + func_convert_core_file_wine_to_w32 "$1" + func_cygpath -u "$func_convert_core_file_wine_to_w32_result" + func_to_host_file_result="$func_cygpath_result" + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_nix_to_cygwin + + +############################################# +# $build to $host PATH CONVERSION FUNCTIONS # +############################################# +# invoked via `$to_host_path_cmd ARG' +# +# In each case, ARG is the path to be converted from $build to $host format. +# The result will be available in $func_to_host_path_result. +# +# Path separators are also converted from $build format to $host format. If +# ARG begins or ends with a path separator character, it is preserved (but +# converted to $host format) on output. +# +# All path conversion functions are named using the following convention: +# file name conversion function : func_convert_file_X_to_Y () +# path conversion function : func_convert_path_X_to_Y () +# where, for any given $build/$host combination the 'X_to_Y' value is the +# same. If conversion functions are added for new $build/$host combinations, +# the two new functions must follow this pattern, or func_init_to_host_path_cmd +# will break. + + +# func_init_to_host_path_cmd +# Ensures that function "pointer" variable $to_host_path_cmd is set to the +# appropriate value, based on the value of $to_host_file_cmd. +to_host_path_cmd= +func_init_to_host_path_cmd () +{ + $opt_debug + if test -z "$to_host_path_cmd"; then + func_stripname 'func_convert_file_' '' "$to_host_file_cmd" + to_host_path_cmd="func_convert_path_${func_stripname_result}" + fi +} + + +# func_to_host_path ARG +# Converts the path ARG from $build format to $host format. Return result +# in func_to_host_path_result. +func_to_host_path () +{ + $opt_debug + func_init_to_host_path_cmd + $to_host_path_cmd "$1" +} +# end func_to_host_path + + +# func_convert_path_noop ARG +# Copy ARG to func_to_host_path_result. +func_convert_path_noop () +{ + func_to_host_path_result="$1" +} +# end func_convert_path_noop + + +# func_convert_path_msys_to_w32 ARG +# Convert path ARG from (mingw) MSYS to (mingw) w32 format; automatic +# conversion to w32 is not available inside the cwrapper. Returns result in +# func_to_host_path_result. +func_convert_path_msys_to_w32 () +{ + $opt_debug + func_to_host_path_result="$1" + if test -n "$1"; then + # Remove leading and trailing path separator characters from ARG. MSYS + # behavior is inconsistent here; cygpath turns them into '.;' and ';.'; + # and winepath ignores them completely. + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" + func_to_host_path_result="$func_convert_core_msys_to_w32_result" + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_msys_to_w32 + + +# func_convert_path_cygwin_to_w32 ARG +# Convert path ARG from Cygwin to w32 format. Returns result in +# func_to_host_file_result. +func_convert_path_cygwin_to_w32 () +{ + $opt_debug + func_to_host_path_result="$1" + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_to_host_path_result=`cygpath -m -p "$func_to_host_path_tmp1"` + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_cygwin_to_w32 + + +# func_convert_path_nix_to_w32 ARG +# Convert path ARG from *nix to w32 format. Requires a wine environment and +# a working winepath. Returns result in func_to_host_file_result. +func_convert_path_nix_to_w32 () +{ + $opt_debug + func_to_host_path_result="$1" + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" + func_to_host_path_result="$func_convert_core_path_wine_to_w32_result" + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_nix_to_w32 + + +# func_convert_path_msys_to_cygwin ARG +# Convert path ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. +# Returns result in func_to_host_file_result. +func_convert_path_msys_to_cygwin () +{ + $opt_debug + func_to_host_path_result="$1" + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" + func_cygpath -u -p "$func_convert_core_msys_to_w32_result" + func_to_host_path_result="$func_cygpath_result" + func_convert_path_check : : \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" : "$1" + fi +} +# end func_convert_path_msys_to_cygwin + + +# func_convert_path_nix_to_cygwin ARG +# Convert path ARG from *nix to Cygwin format. Requires Cygwin installed in a +# a wine environment, working winepath, and LT_CYGPATH set. Returns result in +# func_to_host_file_result. +func_convert_path_nix_to_cygwin () +{ + $opt_debug + func_to_host_path_result="$1" + if test -n "$1"; then + # Remove leading and trailing path separator characters from + # ARG. msys behavior is inconsistent here, cygpath turns them + # into '.;' and ';.', and winepath ignores them completely. + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" + func_cygpath -u -p "$func_convert_core_path_wine_to_w32_result" + func_to_host_path_result="$func_cygpath_result" + func_convert_path_check : : \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" : "$1" + fi +} +# end func_convert_path_nix_to_cygwin + + +# func_mode_compile arg... +func_mode_compile () +{ + $opt_debug + # Get the compilation command and the source file. + base_compile= + srcfile="$nonopt" # always keep a non-empty value in "srcfile" + suppress_opt=yes + suppress_output= + arg_mode=normal + libobj= + later= + pie_flag= + + for arg + do + case $arg_mode in + arg ) + # do not "continue". Instead, add this to base_compile + lastarg="$arg" + arg_mode=normal + ;; + + target ) + libobj="$arg" + arg_mode=normal + continue + ;; + + normal ) + # Accept any command-line options. + case $arg in + -o) + test -n "$libobj" && \ + func_fatal_error "you cannot specify \`-o' more than once" + arg_mode=target + continue + ;; + + -pie | -fpie | -fPIE) + func_append pie_flag " $arg" + continue + ;; + + -shared | -static | -prefer-pic | -prefer-non-pic) + func_append later " $arg" + continue + ;; + + -no-suppress) + suppress_opt=no + continue + ;; + + -Xcompiler) + arg_mode=arg # the next one goes into the "base_compile" arg list + continue # The current "srcfile" will either be retained or + ;; # replaced later. I would guess that would be a bug. + + -Wc,*) + func_stripname '-Wc,' '' "$arg" + args=$func_stripname_result + lastarg= + save_ifs="$IFS"; IFS=',' + for arg in $args; do + IFS="$save_ifs" + func_append_quoted lastarg "$arg" + done + IFS="$save_ifs" + func_stripname ' ' '' "$lastarg" + lastarg=$func_stripname_result + + # Add the arguments to base_compile. + func_append base_compile " $lastarg" + continue + ;; + + *) + # Accept the current argument as the source file. + # The previous "srcfile" becomes the current argument. + # + lastarg="$srcfile" + srcfile="$arg" + ;; + esac # case $arg + ;; + esac # case $arg_mode + + # Aesthetically quote the previous argument. + func_append_quoted base_compile "$lastarg" + done # for arg + + case $arg_mode in + arg) + func_fatal_error "you must specify an argument for -Xcompile" + ;; + target) + func_fatal_error "you must specify a target with \`-o'" + ;; + *) + # Get the name of the library object. + test -z "$libobj" && { + func_basename "$srcfile" + libobj="$func_basename_result" + } + ;; + esac + + # Recognize several different file suffixes. + # If the user specifies -o file.o, it is replaced with file.lo + case $libobj in + *.[cCFSifmso] | \ + *.ada | *.adb | *.ads | *.asm | \ + *.c++ | *.cc | *.ii | *.class | *.cpp | *.cxx | \ + *.[fF][09]? | *.for | *.java | *.go | *.obj | *.sx | *.cu | *.cup) + func_xform "$libobj" + libobj=$func_xform_result + ;; + esac + + case $libobj in + *.lo) func_lo2o "$libobj"; obj=$func_lo2o_result ;; + *) + func_fatal_error "cannot determine name of library object from \`$libobj'" + ;; + esac + + func_infer_tag $base_compile + + for arg in $later; do + case $arg in + -shared) + test "$build_libtool_libs" != yes && \ + func_fatal_configuration "can not build a shared library" + build_old_libs=no + continue + ;; + + -static) + build_libtool_libs=no + build_old_libs=yes + continue + ;; + + -prefer-pic) + pic_mode=yes + continue + ;; + + -prefer-non-pic) + pic_mode=no + continue + ;; + esac + done + + func_quote_for_eval "$libobj" + test "X$libobj" != "X$func_quote_for_eval_result" \ + && $ECHO "X$libobj" | $GREP '[]~#^*{};<>?"'"'"' &()|`$[]' \ + && func_warning "libobj name \`$libobj' may not contain shell special characters." + func_dirname_and_basename "$obj" "/" "" + objname="$func_basename_result" + xdir="$func_dirname_result" + lobj=${xdir}$objdir/$objname + + test -z "$base_compile" && \ + func_fatal_help "you must specify a compilation command" + + # Delete any leftover library objects. + if test "$build_old_libs" = yes; then + removelist="$obj $lobj $libobj ${libobj}T" + else + removelist="$lobj $libobj ${libobj}T" + fi + + # On Cygwin there's no "real" PIC flag so we must build both object types + case $host_os in + cygwin* | mingw* | pw32* | os2* | cegcc*) + pic_mode=default + ;; + esac + if test "$pic_mode" = no && test "$deplibs_check_method" != pass_all; then + # non-PIC code in shared libraries is not supported + pic_mode=default + fi + + # Calculate the filename of the output object if compiler does + # not support -o with -c + if test "$compiler_c_o" = no; then + output_obj=`$ECHO "$srcfile" | $SED 's%^.*/%%; s%\.[^.]*$%%'`.${objext} + lockfile="$output_obj.lock" + else + output_obj= + need_locks=no + lockfile= + fi + + # Lock this critical section if it is needed + # We use this script file to make the link, it avoids creating a new file + if test "$need_locks" = yes; then + until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do + func_echo "Waiting for $lockfile to be removed" + sleep 2 + done + elif test "$need_locks" = warn; then + if test -f "$lockfile"; then + $ECHO "\ +*** ERROR, $lockfile exists and contains: +`cat $lockfile 2>/dev/null` + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support \`-c' and \`-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + func_append removelist " $output_obj" + $ECHO "$srcfile" > "$lockfile" + fi + + $opt_dry_run || $RM $removelist + func_append removelist " $lockfile" + trap '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' 1 2 15 + + func_to_tool_file "$srcfile" func_convert_file_msys_to_w32 + srcfile=$func_to_tool_file_result + func_quote_for_eval "$srcfile" + qsrcfile=$func_quote_for_eval_result + + # Only build a PIC object if we are building libtool libraries. + if test "$build_libtool_libs" = yes; then + # Without this assignment, base_compile gets emptied. + fbsd_hideous_sh_bug=$base_compile + + if test "$pic_mode" != no; then + command="$base_compile $qsrcfile $pic_flag" + else + # Don't build PIC code + command="$base_compile $qsrcfile" + fi + + func_mkdir_p "$xdir$objdir" + + if test -z "$output_obj"; then + # Place PIC objects in $objdir + func_append command " -o $lobj" + fi + + func_show_eval_locale "$command" \ + 'test -n "$output_obj" && $RM $removelist; exit $EXIT_FAILURE' + + if test "$need_locks" = warn && + test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then + $ECHO "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support \`-c' and \`-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + + # Just move the object if needed, then go on to compile the next one + if test -n "$output_obj" && test "X$output_obj" != "X$lobj"; then + func_show_eval '$MV "$output_obj" "$lobj"' \ + 'error=$?; $opt_dry_run || $RM $removelist; exit $error' + fi + + # Allow error messages only from the first compilation. + if test "$suppress_opt" = yes; then + suppress_output=' >/dev/null 2>&1' + fi + fi + + # Only build a position-dependent object if we build old libraries. + if test "$build_old_libs" = yes; then + if test "$pic_mode" != yes; then + # Don't build PIC code + command="$base_compile $qsrcfile$pie_flag" + else + command="$base_compile $qsrcfile $pic_flag" + fi + if test "$compiler_c_o" = yes; then + func_append command " -o $obj" + fi + + # Suppress compiler output if we already did a PIC compilation. + func_append command "$suppress_output" + func_show_eval_locale "$command" \ + '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' + + if test "$need_locks" = warn && + test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then + $ECHO "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support \`-c' and \`-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + + # Just move the object if needed + if test -n "$output_obj" && test "X$output_obj" != "X$obj"; then + func_show_eval '$MV "$output_obj" "$obj"' \ + 'error=$?; $opt_dry_run || $RM $removelist; exit $error' + fi + fi + + $opt_dry_run || { + func_write_libtool_object "$libobj" "$objdir/$objname" "$objname" + + # Unlock the critical section if it was locked + if test "$need_locks" != no; then + removelist=$lockfile + $RM "$lockfile" + fi + } + + exit $EXIT_SUCCESS +} + +$opt_help || { + test "$opt_mode" = compile && func_mode_compile ${1+"$@"} +} + +func_mode_help () +{ + # We need to display help for each of the modes. + case $opt_mode in + "") + # Generic help is extracted from the usage comments + # at the start of this file. + func_help + ;; + + clean) + $ECHO \ +"Usage: $progname [OPTION]... --mode=clean RM [RM-OPTION]... FILE... + +Remove files from the build directory. + +RM is the name of the program to use to delete files associated with each FILE +(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed +to RM. + +If FILE is a libtool library, object or program, all the files associated +with it are deleted. Otherwise, only FILE itself is deleted using RM." + ;; + + compile) + $ECHO \ +"Usage: $progname [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE + +Compile a source file into a libtool library object. + +This mode accepts the following additional options: + + -o OUTPUT-FILE set the output file name to OUTPUT-FILE + -no-suppress do not suppress compiler output for multiple passes + -prefer-pic try to build PIC objects only + -prefer-non-pic try to build non-PIC objects only + -shared do not build a \`.o' file suitable for static linking + -static only build a \`.o' file suitable for static linking + -Wc,FLAG pass FLAG directly to the compiler + +COMPILE-COMMAND is a command to be used in creating a \`standard' object file +from the given SOURCEFILE. + +The output file name is determined by removing the directory component from +SOURCEFILE, then substituting the C source code suffix \`.c' with the +library object suffix, \`.lo'." + ;; + + execute) + $ECHO \ +"Usage: $progname [OPTION]... --mode=execute COMMAND [ARGS]... + +Automatically set library path, then run a program. + +This mode accepts the following additional options: + + -dlopen FILE add the directory containing FILE to the library path + +This mode sets the library path environment variable according to \`-dlopen' +flags. + +If any of the ARGS are libtool executable wrappers, then they are translated +into their corresponding uninstalled binary, and any of their required library +directories are added to the library path. + +Then, COMMAND is executed, with ARGS as arguments." + ;; + + finish) + $ECHO \ +"Usage: $progname [OPTION]... --mode=finish [LIBDIR]... + +Complete the installation of libtool libraries. + +Each LIBDIR is a directory that contains libtool libraries. + +The commands that this mode executes may require superuser privileges. Use +the \`--dry-run' option if you just want to see what would be executed." + ;; + + install) + $ECHO \ +"Usage: $progname [OPTION]... --mode=install INSTALL-COMMAND... + +Install executables or libraries. + +INSTALL-COMMAND is the installation command. The first component should be +either the \`install' or \`cp' program. + +The following components of INSTALL-COMMAND are treated specially: + + -inst-prefix-dir PREFIX-DIR Use PREFIX-DIR as a staging area for installation + +The rest of the components are interpreted as arguments to that command (only +BSD-compatible install options are recognized)." + ;; + + link) + $ECHO \ +"Usage: $progname [OPTION]... --mode=link LINK-COMMAND... + +Link object files or libraries together to form another library, or to +create an executable program. + +LINK-COMMAND is a command using the C compiler that you would use to create +a program from several object files. + +The following components of LINK-COMMAND are treated specially: + + -all-static do not do any dynamic linking at all + -avoid-version do not add a version suffix if possible + -bindir BINDIR specify path to binaries directory (for systems where + libraries must be found in the PATH setting at runtime) + -dlopen FILE \`-dlpreopen' FILE if it cannot be dlopened at runtime + -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols + -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3) + -export-symbols SYMFILE + try to export only the symbols listed in SYMFILE + -export-symbols-regex REGEX + try to export only the symbols matching REGEX + -LLIBDIR search LIBDIR for required installed libraries + -lNAME OUTPUT-FILE requires the installed library libNAME + -module build a library that can dlopened + -no-fast-install disable the fast-install mode + -no-install link a not-installable executable + -no-undefined declare that a library does not refer to external symbols + -o OUTPUT-FILE create OUTPUT-FILE from the specified objects + -objectlist FILE Use a list of object files found in FILE to specify objects + -precious-files-regex REGEX + don't remove output files matching REGEX + -release RELEASE specify package release information + -rpath LIBDIR the created library will eventually be installed in LIBDIR + -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries + -shared only do dynamic linking of libtool libraries + -shrext SUFFIX override the standard shared library file extension + -static do not do any dynamic linking of uninstalled libtool libraries + -static-libtool-libs + do not do any dynamic linking of libtool libraries + -version-info CURRENT[:REVISION[:AGE]] + specify library version info [each variable defaults to 0] + -weak LIBNAME declare that the target provides the LIBNAME interface + -Wc,FLAG + -Xcompiler FLAG pass linker-specific FLAG directly to the compiler + -Wl,FLAG + -Xlinker FLAG pass linker-specific FLAG directly to the linker + -XCClinker FLAG pass link-specific FLAG to the compiler driver (CC) + +All other options (arguments beginning with \`-') are ignored. + +Every other argument is treated as a filename. Files ending in \`.la' are +treated as uninstalled libtool libraries, other files are standard or library +object files. + +If the OUTPUT-FILE ends in \`.la', then a libtool library is created, +only library objects (\`.lo' files) may be specified, and \`-rpath' is +required, except when creating a convenience library. + +If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created +using \`ar' and \`ranlib', or on Windows using \`lib'. + +If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file +is created, otherwise an executable program is created." + ;; + + uninstall) + $ECHO \ +"Usage: $progname [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE... + +Remove libraries from an installation directory. + +RM is the name of the program to use to delete files associated with each FILE +(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed +to RM. + +If FILE is a libtool library, all the files associated with it are deleted. +Otherwise, only FILE itself is deleted using RM." + ;; + + *) + func_fatal_help "invalid operation mode \`$opt_mode'" + ;; + esac + + echo + $ECHO "Try \`$progname --help' for more information about other modes." +} + +# Now that we've collected a possible --mode arg, show help if necessary +if $opt_help; then + if test "$opt_help" = :; then + func_mode_help + else + { + func_help noexit + for opt_mode in compile link execute install finish uninstall clean; do + func_mode_help + done + } | sed -n '1p; 2,$s/^Usage:/ or: /p' + { + func_help noexit + for opt_mode in compile link execute install finish uninstall clean; do + echo + func_mode_help + done + } | + sed '1d + /^When reporting/,/^Report/{ + H + d + } + $x + /information about other modes/d + /more detailed .*MODE/d + s/^Usage:.*--mode=\([^ ]*\) .*/Description of \1 mode:/' + fi + exit $? +fi + + +# func_mode_execute arg... +func_mode_execute () +{ + $opt_debug + # The first argument is the command name. + cmd="$nonopt" + test -z "$cmd" && \ + func_fatal_help "you must specify a COMMAND" + + # Handle -dlopen flags immediately. + for file in $opt_dlopen; do + test -f "$file" \ + || func_fatal_help "\`$file' is not a file" + + dir= + case $file in + *.la) + func_resolve_sysroot "$file" + file=$func_resolve_sysroot_result + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$file" \ + || func_fatal_help "\`$lib' is not a valid libtool archive" + + # Read the libtool library. + dlname= + library_names= + func_source "$file" + + # Skip this library if it cannot be dlopened. + if test -z "$dlname"; then + # Warn if it was a shared library. + test -n "$library_names" && \ + func_warning "\`$file' was not linked with \`-export-dynamic'" + continue + fi + + func_dirname "$file" "" "." + dir="$func_dirname_result" + + if test -f "$dir/$objdir/$dlname"; then + func_append dir "/$objdir" + else + if test ! -f "$dir/$dlname"; then + func_fatal_error "cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'" + fi + fi + ;; + + *.lo) + # Just add the directory containing the .lo file. + func_dirname "$file" "" "." + dir="$func_dirname_result" + ;; + + *) + func_warning "\`-dlopen' is ignored for non-libtool libraries and objects" + continue + ;; + esac + + # Get the absolute pathname. + absdir=`cd "$dir" && pwd` + test -n "$absdir" && dir="$absdir" + + # Now add the directory to shlibpath_var. + if eval "test -z \"\$$shlibpath_var\""; then + eval "$shlibpath_var=\"\$dir\"" + else + eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\"" + fi + done + + # This variable tells wrapper scripts just to set shlibpath_var + # rather than running their programs. + libtool_execute_magic="$magic" + + # Check if any of the arguments is a wrapper script. + args= + for file + do + case $file in + -* | *.la | *.lo ) ;; + *) + # Do a test to see if this is really a libtool program. + if func_ltwrapper_script_p "$file"; then + func_source "$file" + # Transform arg to wrapped name. + file="$progdir/$program" + elif func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + func_source "$func_ltwrapper_scriptname_result" + # Transform arg to wrapped name. + file="$progdir/$program" + fi + ;; + esac + # Quote arguments (to preserve shell metacharacters). + func_append_quoted args "$file" + done + + if test "X$opt_dry_run" = Xfalse; then + if test -n "$shlibpath_var"; then + # Export the shlibpath_var. + eval "export $shlibpath_var" + fi + + # Restore saved environment variables + for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES + do + eval "if test \"\${save_$lt_var+set}\" = set; then + $lt_var=\$save_$lt_var; export $lt_var + else + $lt_unset $lt_var + fi" + done + + # Now prepare to actually exec the command. + exec_cmd="\$cmd$args" + else + # Display what would be done. + if test -n "$shlibpath_var"; then + eval "\$ECHO \"\$shlibpath_var=\$$shlibpath_var\"" + echo "export $shlibpath_var" + fi + $ECHO "$cmd$args" + exit $EXIT_SUCCESS + fi +} + +test "$opt_mode" = execute && func_mode_execute ${1+"$@"} + + +# func_mode_finish arg... +func_mode_finish () +{ + $opt_debug + libs= + libdirs= + admincmds= + + for opt in "$nonopt" ${1+"$@"} + do + if test -d "$opt"; then + func_append libdirs " $opt" + + elif test -f "$opt"; then + if func_lalib_unsafe_p "$opt"; then + func_append libs " $opt" + else + func_warning "\`$opt' is not a valid libtool archive" + fi + + else + func_fatal_error "invalid argument \`$opt'" + fi + done + + if test -n "$libs"; then + if test -n "$lt_sysroot"; then + sysroot_regex=`$ECHO "$lt_sysroot" | $SED "$sed_make_literal_regex"` + sysroot_cmd="s/\([ ']\)$sysroot_regex/\1/g;" + else + sysroot_cmd= + fi + + # Remove sysroot references + if $opt_dry_run; then + for lib in $libs; do + echo "removing references to $lt_sysroot and \`=' prefixes from $lib" + done + else + tmpdir=`func_mktempdir` + for lib in $libs; do + sed -e "${sysroot_cmd} s/\([ ']-[LR]\)=/\1/g; s/\([ ']\)=/\1/g" $lib \ + > $tmpdir/tmp-la + mv -f $tmpdir/tmp-la $lib + done + ${RM}r "$tmpdir" + fi + fi + + if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then + for libdir in $libdirs; do + if test -n "$finish_cmds"; then + # Do each command in the finish commands. + func_execute_cmds "$finish_cmds" 'admincmds="$admincmds +'"$cmd"'"' + fi + if test -n "$finish_eval"; then + # Do the single finish_eval. + eval cmds=\"$finish_eval\" + $opt_dry_run || eval "$cmds" || func_append admincmds " + $cmds" + fi + done + fi + + # Exit here if they wanted silent mode. + $opt_silent && exit $EXIT_SUCCESS + + if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then + echo "----------------------------------------------------------------------" + echo "Libraries have been installed in:" + for libdir in $libdirs; do + $ECHO " $libdir" + done + echo + echo "If you ever happen to want to link against installed libraries" + echo "in a given directory, LIBDIR, you must either use libtool, and" + echo "specify the full pathname of the library, or use the \`-LLIBDIR'" + echo "flag during linking and do at least one of the following:" + if test -n "$shlibpath_var"; then + echo " - add LIBDIR to the \`$shlibpath_var' environment variable" + echo " during execution" + fi + if test -n "$runpath_var"; then + echo " - add LIBDIR to the \`$runpath_var' environment variable" + echo " during linking" + fi + if test -n "$hardcode_libdir_flag_spec"; then + libdir=LIBDIR + eval flag=\"$hardcode_libdir_flag_spec\" + + $ECHO " - use the \`$flag' linker flag" + fi + if test -n "$admincmds"; then + $ECHO " - have your system administrator run these commands:$admincmds" + fi + if test -f /etc/ld.so.conf; then + echo " - have your system administrator add LIBDIR to \`/etc/ld.so.conf'" + fi + echo + + echo "See any operating system documentation about shared libraries for" + case $host in + solaris2.[6789]|solaris2.1[0-9]) + echo "more information, such as the ld(1), crle(1) and ld.so(8) manual" + echo "pages." + ;; + *) + echo "more information, such as the ld(1) and ld.so(8) manual pages." + ;; + esac + echo "----------------------------------------------------------------------" + fi + exit $EXIT_SUCCESS +} + +test "$opt_mode" = finish && func_mode_finish ${1+"$@"} + + +# func_mode_install arg... +func_mode_install () +{ + $opt_debug + # There may be an optional sh(1) argument at the beginning of + # install_prog (especially on Windows NT). + if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh || + # Allow the use of GNU shtool's install command. + case $nonopt in *shtool*) :;; *) false;; esac; then + # Aesthetically quote it. + func_quote_for_eval "$nonopt" + install_prog="$func_quote_for_eval_result " + arg=$1 + shift + else + install_prog= + arg=$nonopt + fi + + # The real first argument should be the name of the installation program. + # Aesthetically quote it. + func_quote_for_eval "$arg" + func_append install_prog "$func_quote_for_eval_result" + install_shared_prog=$install_prog + case " $install_prog " in + *[\\\ /]cp\ *) install_cp=: ;; + *) install_cp=false ;; + esac + + # We need to accept at least all the BSD install flags. + dest= + files= + opts= + prev= + install_type= + isdir=no + stripme= + no_mode=: + for arg + do + arg2= + if test -n "$dest"; then + func_append files " $dest" + dest=$arg + continue + fi + + case $arg in + -d) isdir=yes ;; + -f) + if $install_cp; then :; else + prev=$arg + fi + ;; + -g | -m | -o) + prev=$arg + ;; + -s) + stripme=" -s" + continue + ;; + -*) + ;; + *) + # If the previous option needed an argument, then skip it. + if test -n "$prev"; then + if test "x$prev" = x-m && test -n "$install_override_mode"; then + arg2=$install_override_mode + no_mode=false + fi + prev= + else + dest=$arg + continue + fi + ;; + esac + + # Aesthetically quote the argument. + func_quote_for_eval "$arg" + func_append install_prog " $func_quote_for_eval_result" + if test -n "$arg2"; then + func_quote_for_eval "$arg2" + fi + func_append install_shared_prog " $func_quote_for_eval_result" + done + + test -z "$install_prog" && \ + func_fatal_help "you must specify an install program" + + test -n "$prev" && \ + func_fatal_help "the \`$prev' option requires an argument" + + if test -n "$install_override_mode" && $no_mode; then + if $install_cp; then :; else + func_quote_for_eval "$install_override_mode" + func_append install_shared_prog " -m $func_quote_for_eval_result" + fi + fi + + if test -z "$files"; then + if test -z "$dest"; then + func_fatal_help "no file or destination specified" + else + func_fatal_help "you must specify a destination" + fi + fi + + # Strip any trailing slash from the destination. + func_stripname '' '/' "$dest" + dest=$func_stripname_result + + # Check to see that the destination is a directory. + test -d "$dest" && isdir=yes + if test "$isdir" = yes; then + destdir="$dest" + destname= + else + func_dirname_and_basename "$dest" "" "." + destdir="$func_dirname_result" + destname="$func_basename_result" + + # Not a directory, so check to see that there is only one file specified. + set dummy $files; shift + test "$#" -gt 1 && \ + func_fatal_help "\`$dest' is not a directory" + fi + case $destdir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + for file in $files; do + case $file in + *.lo) ;; + *) + func_fatal_help "\`$destdir' must be an absolute directory name" + ;; + esac + done + ;; + esac + + # This variable tells wrapper scripts just to set variables rather + # than running their programs. + libtool_install_magic="$magic" + + staticlibs= + future_libdirs= + current_libdirs= + for file in $files; do + + # Do each installation. + case $file in + *.$libext) + # Do the static libraries later. + func_append staticlibs " $file" + ;; + + *.la) + func_resolve_sysroot "$file" + file=$func_resolve_sysroot_result + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$file" \ + || func_fatal_help "\`$file' is not a valid libtool archive" + + library_names= + old_library= + relink_command= + func_source "$file" + + # Add the libdir to current_libdirs if it is the destination. + if test "X$destdir" = "X$libdir"; then + case "$current_libdirs " in + *" $libdir "*) ;; + *) func_append current_libdirs " $libdir" ;; + esac + else + # Note the libdir as a future libdir. + case "$future_libdirs " in + *" $libdir "*) ;; + *) func_append future_libdirs " $libdir" ;; + esac + fi + + func_dirname "$file" "/" "" + dir="$func_dirname_result" + func_append dir "$objdir" + + if test -n "$relink_command"; then + # Determine the prefix the user has applied to our future dir. + inst_prefix_dir=`$ECHO "$destdir" | $SED -e "s%$libdir\$%%"` + + # Don't allow the user to place us outside of our expected + # location b/c this prevents finding dependent libraries that + # are installed to the same prefix. + # At present, this check doesn't affect windows .dll's that + # are installed into $libdir/../bin (currently, that works fine) + # but it's something to keep an eye on. + test "$inst_prefix_dir" = "$destdir" && \ + func_fatal_error "error: cannot install \`$file' to a directory not ending in $libdir" + + if test -n "$inst_prefix_dir"; then + # Stick the inst_prefix_dir data into the link command. + relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%-inst-prefix-dir $inst_prefix_dir%"` + else + relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%%"` + fi + + func_warning "relinking \`$file'" + func_show_eval "$relink_command" \ + 'func_fatal_error "error: relink \`$file'\'' with the above command before installing it"' + fi + + # See the names of the shared library. + set dummy $library_names; shift + if test -n "$1"; then + realname="$1" + shift + + srcname="$realname" + test -n "$relink_command" && srcname="$realname"T + + # Install the shared library and build the symlinks. + func_show_eval "$install_shared_prog $dir/$srcname $destdir/$realname" \ + 'exit $?' + tstripme="$stripme" + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + case $realname in + *.dll.a) + tstripme="" + ;; + esac + ;; + esac + if test -n "$tstripme" && test -n "$striplib"; then + func_show_eval "$striplib $destdir/$realname" 'exit $?' + fi + + if test "$#" -gt 0; then + # Delete the old symlinks, and create new ones. + # Try `ln -sf' first, because the `ln' binary might depend on + # the symlink we replace! Solaris /bin/ln does not understand -f, + # so we also need to try rm && ln -s. + for linkname + do + test "$linkname" != "$realname" \ + && func_show_eval "(cd $destdir && { $LN_S -f $realname $linkname || { $RM $linkname && $LN_S $realname $linkname; }; })" + done + fi + + # Do each command in the postinstall commands. + lib="$destdir/$realname" + func_execute_cmds "$postinstall_cmds" 'exit $?' + fi + + # Install the pseudo-library for information purposes. + func_basename "$file" + name="$func_basename_result" + instname="$dir/$name"i + func_show_eval "$install_prog $instname $destdir/$name" 'exit $?' + + # Maybe install the static library, too. + test -n "$old_library" && func_append staticlibs " $dir/$old_library" + ;; + + *.lo) + # Install (i.e. copy) a libtool object. + + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile="$destdir/$destname" + else + func_basename "$file" + destfile="$func_basename_result" + destfile="$destdir/$destfile" + fi + + # Deduce the name of the destination old-style object file. + case $destfile in + *.lo) + func_lo2o "$destfile" + staticdest=$func_lo2o_result + ;; + *.$objext) + staticdest="$destfile" + destfile= + ;; + *) + func_fatal_help "cannot copy a libtool object to \`$destfile'" + ;; + esac + + # Install the libtool object if requested. + test -n "$destfile" && \ + func_show_eval "$install_prog $file $destfile" 'exit $?' + + # Install the old object if enabled. + if test "$build_old_libs" = yes; then + # Deduce the name of the old-style object file. + func_lo2o "$file" + staticobj=$func_lo2o_result + func_show_eval "$install_prog \$staticobj \$staticdest" 'exit $?' + fi + exit $EXIT_SUCCESS + ;; + + *) + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile="$destdir/$destname" + else + func_basename "$file" + destfile="$func_basename_result" + destfile="$destdir/$destfile" + fi + + # If the file is missing, and there is a .exe on the end, strip it + # because it is most likely a libtool script we actually want to + # install + stripped_ext="" + case $file in + *.exe) + if test ! -f "$file"; then + func_stripname '' '.exe' "$file" + file=$func_stripname_result + stripped_ext=".exe" + fi + ;; + esac + + # Do a test to see if this is really a libtool program. + case $host in + *cygwin* | *mingw*) + if func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + wrapper=$func_ltwrapper_scriptname_result + else + func_stripname '' '.exe' "$file" + wrapper=$func_stripname_result + fi + ;; + *) + wrapper=$file + ;; + esac + if func_ltwrapper_script_p "$wrapper"; then + notinst_deplibs= + relink_command= + + func_source "$wrapper" + + # Check the variables that should have been set. + test -z "$generated_by_libtool_version" && \ + func_fatal_error "invalid libtool wrapper script \`$wrapper'" + + finalize=yes + for lib in $notinst_deplibs; do + # Check to see that each library is installed. + libdir= + if test -f "$lib"; then + func_source "$lib" + fi + libfile="$libdir/"`$ECHO "$lib" | $SED 's%^.*/%%g'` ### testsuite: skip nested quoting test + if test -n "$libdir" && test ! -f "$libfile"; then + func_warning "\`$lib' has not been installed in \`$libdir'" + finalize=no + fi + done + + relink_command= + func_source "$wrapper" + + outputname= + if test "$fast_install" = no && test -n "$relink_command"; then + $opt_dry_run || { + if test "$finalize" = yes; then + tmpdir=`func_mktempdir` + func_basename "$file$stripped_ext" + file="$func_basename_result" + outputname="$tmpdir/$file" + # Replace the output file specification. + relink_command=`$ECHO "$relink_command" | $SED 's%@OUTPUT@%'"$outputname"'%g'` + + $opt_silent || { + func_quote_for_expand "$relink_command" + eval "func_echo $func_quote_for_expand_result" + } + if eval "$relink_command"; then : + else + func_error "error: relink \`$file' with the above command before installing it" + $opt_dry_run || ${RM}r "$tmpdir" + continue + fi + file="$outputname" + else + func_warning "cannot relink \`$file'" + fi + } + else + # Install the binary that we compiled earlier. + file=`$ECHO "$file$stripped_ext" | $SED "s%\([^/]*\)$%$objdir/\1%"` + fi + fi + + # remove .exe since cygwin /usr/bin/install will append another + # one anyway + case $install_prog,$host in + */usr/bin/install*,*cygwin*) + case $file:$destfile in + *.exe:*.exe) + # this is ok + ;; + *.exe:*) + destfile=$destfile.exe + ;; + *:*.exe) + func_stripname '' '.exe' "$destfile" + destfile=$func_stripname_result + ;; + esac + ;; + esac + func_show_eval "$install_prog\$stripme \$file \$destfile" 'exit $?' + $opt_dry_run || if test -n "$outputname"; then + ${RM}r "$tmpdir" + fi + ;; + esac + done + + for file in $staticlibs; do + func_basename "$file" + name="$func_basename_result" + + # Set up the ranlib parameters. + oldlib="$destdir/$name" + func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 + tool_oldlib=$func_to_tool_file_result + + func_show_eval "$install_prog \$file \$oldlib" 'exit $?' + + if test -n "$stripme" && test -n "$old_striplib"; then + func_show_eval "$old_striplib $tool_oldlib" 'exit $?' + fi + + # Do each command in the postinstall commands. + func_execute_cmds "$old_postinstall_cmds" 'exit $?' + done + + test -n "$future_libdirs" && \ + func_warning "remember to run \`$progname --finish$future_libdirs'" + + if test -n "$current_libdirs"; then + # Maybe just do a dry run. + $opt_dry_run && current_libdirs=" -n$current_libdirs" + exec_cmd='$SHELL $progpath $preserve_args --finish$current_libdirs' + else + exit $EXIT_SUCCESS + fi +} + +test "$opt_mode" = install && func_mode_install ${1+"$@"} + + +# func_generate_dlsyms outputname originator pic_p +# Extract symbols from dlprefiles and create ${outputname}S.o with +# a dlpreopen symbol table. +func_generate_dlsyms () +{ + $opt_debug + my_outputname="$1" + my_originator="$2" + my_pic_p="${3-no}" + my_prefix=`$ECHO "$my_originator" | sed 's%[^a-zA-Z0-9]%_%g'` + my_dlsyms= + + if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then + if test -n "$NM" && test -n "$global_symbol_pipe"; then + my_dlsyms="${my_outputname}S.c" + else + func_error "not configured to extract global symbols from dlpreopened files" + fi + fi + + if test -n "$my_dlsyms"; then + case $my_dlsyms in + "") ;; + *.c) + # Discover the nlist of each of the dlfiles. + nlist="$output_objdir/${my_outputname}.nm" + + func_show_eval "$RM $nlist ${nlist}S ${nlist}T" + + # Parse the name list into a source file. + func_verbose "creating $output_objdir/$my_dlsyms" + + $opt_dry_run || $ECHO > "$output_objdir/$my_dlsyms" "\ +/* $my_dlsyms - symbol resolution table for \`$my_outputname' dlsym emulation. */ +/* Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION */ + +#ifdef __cplusplus +extern \"C\" { +#endif + +#if defined(__GNUC__) && (((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ > 4)) +#pragma GCC diagnostic ignored \"-Wstrict-prototypes\" +#endif + +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE) +/* DATA imports from DLLs on WIN32 con't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT_DLSYM_CONST +#elif defined(__osf__) +/* This system does not cope well with relocations in const data. */ +# define LT_DLSYM_CONST +#else +# define LT_DLSYM_CONST const +#endif + +/* External symbol declarations for the compiler. */\ +" + + if test "$dlself" = yes; then + func_verbose "generating symbol list for \`$output'" + + $opt_dry_run || echo ': @PROGRAM@ ' > "$nlist" + + # Add our own program objects to the symbol list. + progfiles=`$ECHO "$objs$old_deplibs" | $SP2NL | $SED "$lo2o" | $NL2SP` + for progfile in $progfiles; do + func_to_tool_file "$progfile" func_convert_file_msys_to_w32 + func_verbose "extracting global C symbols from \`$func_to_tool_file_result'" + $opt_dry_run || eval "$NM $func_to_tool_file_result | $global_symbol_pipe >> '$nlist'" + done + + if test -n "$exclude_expsyms"; then + $opt_dry_run || { + eval '$EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + } + fi + + if test -n "$export_symbols_regex"; then + $opt_dry_run || { + eval '$EGREP -e "$export_symbols_regex" "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + } + fi + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + export_symbols="$output_objdir/$outputname.exp" + $opt_dry_run || { + $RM $export_symbols + eval "${SED} -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"' + case $host in + *cygwin* | *mingw* | *cegcc* ) + eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' + eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"' + ;; + esac + } + else + $opt_dry_run || { + eval "${SED} -e 's/\([].[*^$]\)/\\\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$outputname.exp"' + eval '$GREP -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + case $host in + *cygwin* | *mingw* | *cegcc* ) + eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' + eval 'cat "$nlist" >> "$output_objdir/$outputname.def"' + ;; + esac + } + fi + fi + + for dlprefile in $dlprefiles; do + func_verbose "extracting global C symbols from \`$dlprefile'" + func_basename "$dlprefile" + name="$func_basename_result" + case $host in + *cygwin* | *mingw* | *cegcc* ) + # if an import library, we need to obtain dlname + if func_win32_import_lib_p "$dlprefile"; then + func_tr_sh "$dlprefile" + eval "curr_lafile=\$libfile_$func_tr_sh_result" + dlprefile_dlbasename="" + if test -n "$curr_lafile" && func_lalib_p "$curr_lafile"; then + # Use subshell, to avoid clobbering current variable values + dlprefile_dlname=`source "$curr_lafile" && echo "$dlname"` + if test -n "$dlprefile_dlname" ; then + func_basename "$dlprefile_dlname" + dlprefile_dlbasename="$func_basename_result" + else + # no lafile. user explicitly requested -dlpreopen . + $sharedlib_from_linklib_cmd "$dlprefile" + dlprefile_dlbasename=$sharedlib_from_linklib_result + fi + fi + $opt_dry_run || { + if test -n "$dlprefile_dlbasename" ; then + eval '$ECHO ": $dlprefile_dlbasename" >> "$nlist"' + else + func_warning "Could not compute DLL name from $name" + eval '$ECHO ": $name " >> "$nlist"' + fi + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe | + $SED -e '/I __imp/d' -e 's/I __nm_/D /;s/_nm__//' >> '$nlist'" + } + else # not an import lib + $opt_dry_run || { + eval '$ECHO ": $name " >> "$nlist"' + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" + } + fi + ;; + *) + $opt_dry_run || { + eval '$ECHO ": $name " >> "$nlist"' + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" + } + ;; + esac + done + + $opt_dry_run || { + # Make sure we have at least an empty file. + test -f "$nlist" || : > "$nlist" + + if test -n "$exclude_expsyms"; then + $EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T + $MV "$nlist"T "$nlist" + fi + + # Try sorting and uniquifying the output. + if $GREP -v "^: " < "$nlist" | + if sort -k 3 /dev/null 2>&1; then + sort -k 3 + else + sort +2 + fi | + uniq > "$nlist"S; then + : + else + $GREP -v "^: " < "$nlist" > "$nlist"S + fi + + if test -f "$nlist"S; then + eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$my_dlsyms"' + else + echo '/* NONE */' >> "$output_objdir/$my_dlsyms" + fi + + echo >> "$output_objdir/$my_dlsyms" "\ + +/* The mapping between symbol names and symbols. */ +typedef struct { + const char *name; + void *address; +} lt_dlsymlist; +extern LT_DLSYM_CONST lt_dlsymlist +lt_${my_prefix}_LTX_preloaded_symbols[]; +LT_DLSYM_CONST lt_dlsymlist +lt_${my_prefix}_LTX_preloaded_symbols[] = +{\ + { \"$my_originator\", (void *) 0 }," + + case $need_lib_prefix in + no) + eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$my_dlsyms" + ;; + *) + eval "$global_symbol_to_c_name_address_lib_prefix" < "$nlist" >> "$output_objdir/$my_dlsyms" + ;; + esac + echo >> "$output_objdir/$my_dlsyms" "\ + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt_${my_prefix}_LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif\ +" + } # !$opt_dry_run + + pic_flag_for_symtable= + case "$compile_command " in + *" -static "*) ;; + *) + case $host in + # compiling the symbol table file with pic_flag works around + # a FreeBSD bug that causes programs to crash when -lm is + # linked before any other PIC object. But we must not use + # pic_flag when linking with -static. The problem exists in + # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1. + *-*-freebsd2.*|*-*-freebsd3.0*|*-*-freebsdelf3.0*) + pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND" ;; + *-*-hpux*) + pic_flag_for_symtable=" $pic_flag" ;; + *) + if test "X$my_pic_p" != Xno; then + pic_flag_for_symtable=" $pic_flag" + fi + ;; + esac + ;; + esac + symtab_cflags= + for arg in $LTCFLAGS; do + case $arg in + -pie | -fpie | -fPIE) ;; + *) func_append symtab_cflags " $arg" ;; + esac + done + + # Now compile the dynamic symbol file. + func_show_eval '(cd $output_objdir && $LTCC$symtab_cflags -c$no_builtin_flag$pic_flag_for_symtable "$my_dlsyms")' 'exit $?' + + # Clean up the generated files. + func_show_eval '$RM "$output_objdir/$my_dlsyms" "$nlist" "${nlist}S" "${nlist}T"' + + # Transform the symbol file into the correct name. + symfileobj="$output_objdir/${my_outputname}S.$objext" + case $host in + *cygwin* | *mingw* | *cegcc* ) + if test -f "$output_objdir/$my_outputname.def"; then + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` + else + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` + fi + ;; + *) + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` + ;; + esac + ;; + *) + func_fatal_error "unknown suffix for \`$my_dlsyms'" + ;; + esac + else + # We keep going just in case the user didn't refer to + # lt_preloaded_symbols. The linker will fail if global_symbol_pipe + # really was required. + + # Nullify the symbol file. + compile_command=`$ECHO "$compile_command" | $SED "s% @SYMFILE@%%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s% @SYMFILE@%%"` + fi +} + +# func_win32_libid arg +# return the library type of file 'arg' +# +# Need a lot of goo to handle *both* DLLs and import libs +# Has to be a shell function in order to 'eat' the argument +# that is supplied when $file_magic_command is called. +# Despite the name, also deal with 64 bit binaries. +func_win32_libid () +{ + $opt_debug + win32_libid_type="unknown" + win32_fileres=`file -L $1 2>/dev/null` + case $win32_fileres in + *ar\ archive\ import\ library*) # definitely import + win32_libid_type="x86 archive import" + ;; + *ar\ archive*) # could be an import, or static + # Keep the egrep pattern in sync with the one in _LT_CHECK_MAGIC_METHOD. + if eval $OBJDUMP -f $1 | $SED -e '10q' 2>/dev/null | + $EGREP 'file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' >/dev/null; then + func_to_tool_file "$1" func_convert_file_msys_to_w32 + win32_nmres=`eval $NM -f posix -A \"$func_to_tool_file_result\" | + $SED -n -e ' + 1,100{ + / I /{ + s,.*,import, + p + q + } + }'` + case $win32_nmres in + import*) win32_libid_type="x86 archive import";; + *) win32_libid_type="x86 archive static";; + esac + fi + ;; + *DLL*) + win32_libid_type="x86 DLL" + ;; + *executable*) # but shell scripts are "executable" too... + case $win32_fileres in + *MS\ Windows\ PE\ Intel*) + win32_libid_type="x86 DLL" + ;; + esac + ;; + esac + $ECHO "$win32_libid_type" +} + +# func_cygming_dll_for_implib ARG +# +# Platform-specific function to extract the +# name of the DLL associated with the specified +# import library ARG. +# Invoked by eval'ing the libtool variable +# $sharedlib_from_linklib_cmd +# Result is available in the variable +# $sharedlib_from_linklib_result +func_cygming_dll_for_implib () +{ + $opt_debug + sharedlib_from_linklib_result=`$DLLTOOL --identify-strict --identify "$1"` +} + +# func_cygming_dll_for_implib_fallback_core SECTION_NAME LIBNAMEs +# +# The is the core of a fallback implementation of a +# platform-specific function to extract the name of the +# DLL associated with the specified import library LIBNAME. +# +# SECTION_NAME is either .idata$6 or .idata$7, depending +# on the platform and compiler that created the implib. +# +# Echos the name of the DLL associated with the +# specified import library. +func_cygming_dll_for_implib_fallback_core () +{ + $opt_debug + match_literal=`$ECHO "$1" | $SED "$sed_make_literal_regex"` + $OBJDUMP -s --section "$1" "$2" 2>/dev/null | + $SED '/^Contents of section '"$match_literal"':/{ + # Place marker at beginning of archive member dllname section + s/.*/====MARK====/ + p + d + } + # These lines can sometimes be longer than 43 characters, but + # are always uninteresting + /:[ ]*file format pe[i]\{,1\}-/d + /^In archive [^:]*:/d + # Ensure marker is printed + /^====MARK====/p + # Remove all lines with less than 43 characters + /^.\{43\}/!d + # From remaining lines, remove first 43 characters + s/^.\{43\}//' | + $SED -n ' + # Join marker and all lines until next marker into a single line + /^====MARK====/ b para + H + $ b para + b + :para + x + s/\n//g + # Remove the marker + s/^====MARK====// + # Remove trailing dots and whitespace + s/[\. \t]*$// + # Print + /./p' | + # we now have a list, one entry per line, of the stringified + # contents of the appropriate section of all members of the + # archive which possess that section. Heuristic: eliminate + # all those which have a first or second character that is + # a '.' (that is, objdump's representation of an unprintable + # character.) This should work for all archives with less than + # 0x302f exports -- but will fail for DLLs whose name actually + # begins with a literal '.' or a single character followed by + # a '.'. + # + # Of those that remain, print the first one. + $SED -e '/^\./d;/^.\./d;q' +} + +# func_cygming_gnu_implib_p ARG +# This predicate returns with zero status (TRUE) if +# ARG is a GNU/binutils-style import library. Returns +# with nonzero status (FALSE) otherwise. +func_cygming_gnu_implib_p () +{ + $opt_debug + func_to_tool_file "$1" func_convert_file_msys_to_w32 + func_cygming_gnu_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $EGREP ' (_head_[A-Za-z0-9_]+_[ad]l*|[A-Za-z0-9_]+_[ad]l*_iname)$'` + test -n "$func_cygming_gnu_implib_tmp" +} + +# func_cygming_ms_implib_p ARG +# This predicate returns with zero status (TRUE) if +# ARG is an MS-style import library. Returns +# with nonzero status (FALSE) otherwise. +func_cygming_ms_implib_p () +{ + $opt_debug + func_to_tool_file "$1" func_convert_file_msys_to_w32 + func_cygming_ms_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $GREP '_NULL_IMPORT_DESCRIPTOR'` + test -n "$func_cygming_ms_implib_tmp" +} + +# func_cygming_dll_for_implib_fallback ARG +# Platform-specific function to extract the +# name of the DLL associated with the specified +# import library ARG. +# +# This fallback implementation is for use when $DLLTOOL +# does not support the --identify-strict option. +# Invoked by eval'ing the libtool variable +# $sharedlib_from_linklib_cmd +# Result is available in the variable +# $sharedlib_from_linklib_result +func_cygming_dll_for_implib_fallback () +{ + $opt_debug + if func_cygming_gnu_implib_p "$1" ; then + # binutils import library + sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$7' "$1"` + elif func_cygming_ms_implib_p "$1" ; then + # ms-generated import library + sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$6' "$1"` + else + # unknown + sharedlib_from_linklib_result="" + fi +} + + +# func_extract_an_archive dir oldlib +func_extract_an_archive () +{ + $opt_debug + f_ex_an_ar_dir="$1"; shift + f_ex_an_ar_oldlib="$1" + if test "$lock_old_archive_extraction" = yes; then + lockfile=$f_ex_an_ar_oldlib.lock + until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do + func_echo "Waiting for $lockfile to be removed" + sleep 2 + done + fi + func_show_eval "(cd \$f_ex_an_ar_dir && $AR x \"\$f_ex_an_ar_oldlib\")" \ + 'stat=$?; rm -f "$lockfile"; exit $stat' + if test "$lock_old_archive_extraction" = yes; then + $opt_dry_run || rm -f "$lockfile" + fi + if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then + : + else + func_fatal_error "object name conflicts in archive: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib" + fi +} + + +# func_extract_archives gentop oldlib ... +func_extract_archives () +{ + $opt_debug + my_gentop="$1"; shift + my_oldlibs=${1+"$@"} + my_oldobjs="" + my_xlib="" + my_xabs="" + my_xdir="" + + for my_xlib in $my_oldlibs; do + # Extract the objects. + case $my_xlib in + [\\/]* | [A-Za-z]:[\\/]*) my_xabs="$my_xlib" ;; + *) my_xabs=`pwd`"/$my_xlib" ;; + esac + func_basename "$my_xlib" + my_xlib="$func_basename_result" + my_xlib_u=$my_xlib + while :; do + case " $extracted_archives " in + *" $my_xlib_u "*) + func_arith $extracted_serial + 1 + extracted_serial=$func_arith_result + my_xlib_u=lt$extracted_serial-$my_xlib ;; + *) break ;; + esac + done + extracted_archives="$extracted_archives $my_xlib_u" + my_xdir="$my_gentop/$my_xlib_u" + + func_mkdir_p "$my_xdir" + + case $host in + *-darwin*) + func_verbose "Extracting $my_xabs" + # Do not bother doing anything if just a dry run + $opt_dry_run || { + darwin_orig_dir=`pwd` + cd $my_xdir || exit $? + darwin_archive=$my_xabs + darwin_curdir=`pwd` + darwin_base_archive=`basename "$darwin_archive"` + darwin_arches=`$LIPO -info "$darwin_archive" 2>/dev/null | $GREP Architectures 2>/dev/null || true` + if test -n "$darwin_arches"; then + darwin_arches=`$ECHO "$darwin_arches" | $SED -e 's/.*are://'` + darwin_arch= + func_verbose "$darwin_base_archive has multiple architectures $darwin_arches" + for darwin_arch in $darwin_arches ; do + func_mkdir_p "unfat-$$/${darwin_base_archive}-${darwin_arch}" + $LIPO -thin $darwin_arch -output "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" "${darwin_archive}" + cd "unfat-$$/${darwin_base_archive}-${darwin_arch}" + func_extract_an_archive "`pwd`" "${darwin_base_archive}" + cd "$darwin_curdir" + $RM "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" + done # $darwin_arches + ## Okay now we've a bunch of thin objects, gotta fatten them up :) + darwin_filelist=`find unfat-$$ -type f -name \*.o -print -o -name \*.lo -print | $SED -e "$basename" | sort -u` + darwin_file= + darwin_files= + for darwin_file in $darwin_filelist; do + darwin_files=`find unfat-$$ -name $darwin_file -print | sort | $NL2SP` + $LIPO -create -output "$darwin_file" $darwin_files + done # $darwin_filelist + $RM -rf unfat-$$ + cd "$darwin_orig_dir" + else + cd $darwin_orig_dir + func_extract_an_archive "$my_xdir" "$my_xabs" + fi # $darwin_arches + } # !$opt_dry_run + ;; + *) + func_extract_an_archive "$my_xdir" "$my_xabs" + ;; + esac + my_oldobjs="$my_oldobjs "`find $my_xdir -name \*.$objext -print -o -name \*.lo -print | sort | $NL2SP` + done + + func_extract_archives_result="$my_oldobjs" +} + + +# func_emit_wrapper [arg=no] +# +# Emit a libtool wrapper script on stdout. +# Don't directly open a file because we may want to +# incorporate the script contents within a cygwin/mingw +# wrapper executable. Must ONLY be called from within +# func_mode_link because it depends on a number of variables +# set therein. +# +# ARG is the value that the WRAPPER_SCRIPT_BELONGS_IN_OBJDIR +# variable will take. If 'yes', then the emitted script +# will assume that the directory in which it is stored is +# the $objdir directory. This is a cygwin/mingw-specific +# behavior. +func_emit_wrapper () +{ + func_emit_wrapper_arg1=${1-no} + + $ECHO "\ +#! $SHELL + +# $output - temporary wrapper script for $objdir/$outputname +# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION +# +# The $output program cannot be directly executed until all the libtool +# libraries that it depends on are installed. +# +# This wrapper script should never be moved out of the build directory. +# If it is, it will not operate correctly. + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +sed_quote_subst='$sed_quote_subst' + +# Be Bourne compatible +if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in *posix*) set -o posix;; esac +fi +BIN_SH=xpg4; export BIN_SH # for Tru64 +DUALCASE=1; export DUALCASE # for MKS sh + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +relink_command=\"$relink_command\" + +# This environment variable determines our operation mode. +if test \"\$libtool_install_magic\" = \"$magic\"; then + # install mode needs the following variables: + generated_by_libtool_version='$macro_version' + notinst_deplibs='$notinst_deplibs' +else + # When we are sourced in execute mode, \$file and \$ECHO are already set. + if test \"\$libtool_execute_magic\" != \"$magic\"; then + file=\"\$0\"" + + qECHO=`$ECHO "$ECHO" | $SED "$sed_quote_subst"` + $ECHO "\ + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$1 +_LTECHO_EOF' +} + ECHO=\"$qECHO\" + fi + +# Very basic option parsing. These options are (a) specific to +# the libtool wrapper, (b) are identical between the wrapper +# /script/ and the wrapper /executable/ which is used only on +# windows platforms, and (c) all begin with the string "--lt-" +# (application programs are unlikely to have options which match +# this pattern). +# +# There are only two supported options: --lt-debug and +# --lt-dump-script. There is, deliberately, no --lt-help. +# +# The first argument to this parsing function should be the +# script's $0 value, followed by "$@". +lt_option_debug= +func_parse_lt_options () +{ + lt_script_arg0=\$0 + shift + for lt_opt + do + case \"\$lt_opt\" in + --lt-debug) lt_option_debug=1 ;; + --lt-dump-script) + lt_dump_D=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%/[^/]*$%%'\` + test \"X\$lt_dump_D\" = \"X\$lt_script_arg0\" && lt_dump_D=. + lt_dump_F=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%^.*/%%'\` + cat \"\$lt_dump_D/\$lt_dump_F\" + exit 0 + ;; + --lt-*) + \$ECHO \"Unrecognized --lt- option: '\$lt_opt'\" 1>&2 + exit 1 + ;; + esac + done + + # Print the debug banner immediately: + if test -n \"\$lt_option_debug\"; then + echo \"${outputname}:${output}:\${LINENO}: libtool wrapper (GNU $PACKAGE$TIMESTAMP) $VERSION\" 1>&2 + fi +} + +# Used when --lt-debug. Prints its arguments to stdout +# (redirection is the responsibility of the caller) +func_lt_dump_args () +{ + lt_dump_args_N=1; + for lt_arg + do + \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[\$lt_dump_args_N]: \$lt_arg\" + lt_dump_args_N=\`expr \$lt_dump_args_N + 1\` + done +} + +# Core function for launching the target application +func_exec_program_core () +{ +" + case $host in + # Backslashes separate directories on plain windows + *-*-mingw | *-*-os2* | *-cegcc*) + $ECHO "\ + if test -n \"\$lt_option_debug\"; then + \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[0]: \$progdir\\\\\$program\" 1>&2 + func_lt_dump_args \${1+\"\$@\"} 1>&2 + fi + exec \"\$progdir\\\\\$program\" \${1+\"\$@\"} +" + ;; + + *) + $ECHO "\ + if test -n \"\$lt_option_debug\"; then + \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[0]: \$progdir/\$program\" 1>&2 + func_lt_dump_args \${1+\"\$@\"} 1>&2 + fi + exec \"\$progdir/\$program\" \${1+\"\$@\"} +" + ;; + esac + $ECHO "\ + \$ECHO \"\$0: cannot exec \$program \$*\" 1>&2 + exit 1 +} + +# A function to encapsulate launching the target application +# Strips options in the --lt-* namespace from \$@ and +# launches target application with the remaining arguments. +func_exec_program () +{ + case \" \$* \" in + *\\ --lt-*) + for lt_wr_arg + do + case \$lt_wr_arg in + --lt-*) ;; + *) set x \"\$@\" \"\$lt_wr_arg\"; shift;; + esac + shift + done ;; + esac + func_exec_program_core \${1+\"\$@\"} +} + + # Parse options + func_parse_lt_options \"\$0\" \${1+\"\$@\"} + + # Find the directory that this script lives in. + thisdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*$%%'\` + test \"x\$thisdir\" = \"x\$file\" && thisdir=. + + # Follow symbolic links until we get to the real thisdir. + file=\`ls -ld \"\$file\" | $SED -n 's/.*-> //p'\` + while test -n \"\$file\"; do + destdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*\$%%'\` + + # If there was a directory component, then change thisdir. + if test \"x\$destdir\" != \"x\$file\"; then + case \"\$destdir\" in + [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;; + *) thisdir=\"\$thisdir/\$destdir\" ;; + esac + fi + + file=\`\$ECHO \"\$file\" | $SED 's%^.*/%%'\` + file=\`ls -ld \"\$thisdir/\$file\" | $SED -n 's/.*-> //p'\` + done + + # Usually 'no', except on cygwin/mingw when embedded into + # the cwrapper. + WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=$func_emit_wrapper_arg1 + if test \"\$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR\" = \"yes\"; then + # special case for '.' + if test \"\$thisdir\" = \".\"; then + thisdir=\`pwd\` + fi + # remove .libs from thisdir + case \"\$thisdir\" in + *[\\\\/]$objdir ) thisdir=\`\$ECHO \"\$thisdir\" | $SED 's%[\\\\/][^\\\\/]*$%%'\` ;; + $objdir ) thisdir=. ;; + esac + fi + + # Try to get the absolute directory name. + absdir=\`cd \"\$thisdir\" && pwd\` + test -n \"\$absdir\" && thisdir=\"\$absdir\" +" + + if test "$fast_install" = yes; then + $ECHO "\ + program=lt-'$outputname'$exeext + progdir=\"\$thisdir/$objdir\" + + if test ! -f \"\$progdir/\$program\" || + { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | ${SED} 1q\`; \\ + test \"X\$file\" != \"X\$progdir/\$program\"; }; then + + file=\"\$\$-\$program\" + + if test ! -d \"\$progdir\"; then + $MKDIR \"\$progdir\" + else + $RM \"\$progdir/\$file\" + fi" + + $ECHO "\ + + # relink executable if necessary + if test -n \"\$relink_command\"; then + if relink_command_output=\`eval \$relink_command 2>&1\`; then : + else + $ECHO \"\$relink_command_output\" >&2 + $RM \"\$progdir/\$file\" + exit 1 + fi + fi + + $MV \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null || + { $RM \"\$progdir/\$program\"; + $MV \"\$progdir/\$file\" \"\$progdir/\$program\"; } + $RM \"\$progdir/\$file\" + fi" + else + $ECHO "\ + program='$outputname' + progdir=\"\$thisdir/$objdir\" +" + fi + + $ECHO "\ + + if test -f \"\$progdir/\$program\"; then" + + # fixup the dll searchpath if we need to. + # + # Fix the DLL searchpath if we need to. Do this before prepending + # to shlibpath, because on Windows, both are PATH and uninstalled + # libraries must come first. + if test -n "$dllsearchpath"; then + $ECHO "\ + # Add the dll search path components to the executable PATH + PATH=$dllsearchpath:\$PATH +" + fi + + # Export our shlibpath_var if we have one. + if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then + $ECHO "\ + # Add our own library path to $shlibpath_var + $shlibpath_var=\"$temp_rpath\$$shlibpath_var\" + + # Some systems cannot cope with colon-terminated $shlibpath_var + # The second colon is a workaround for a bug in BeOS R4 sed + $shlibpath_var=\`\$ECHO \"\$$shlibpath_var\" | $SED 's/::*\$//'\` + + export $shlibpath_var +" + fi + + $ECHO "\ + if test \"\$libtool_execute_magic\" != \"$magic\"; then + # Run the actual program with our arguments. + func_exec_program \${1+\"\$@\"} + fi + else + # The program doesn't exist. + \$ECHO \"\$0: error: \\\`\$progdir/\$program' does not exist\" 1>&2 + \$ECHO \"This script is just a wrapper for \$program.\" 1>&2 + \$ECHO \"See the $PACKAGE documentation for more information.\" 1>&2 + exit 1 + fi +fi\ +" +} + + +# func_emit_cwrapperexe_src +# emit the source code for a wrapper executable on stdout +# Must ONLY be called from within func_mode_link because +# it depends on a number of variable set therein. +func_emit_cwrapperexe_src () +{ + cat < +#include +#ifdef _MSC_VER +# include +# include +# include +#else +# include +# include +# ifdef __CYGWIN__ +# include +# endif +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +/* declarations of non-ANSI functions */ +#if defined(__MINGW32__) +# ifdef __STRICT_ANSI__ +int _putenv (const char *); +# endif +#elif defined(__CYGWIN__) +# ifdef __STRICT_ANSI__ +char *realpath (const char *, char *); +int putenv (char *); +int setenv (const char *, const char *, int); +# endif +/* #elif defined (other platforms) ... */ +#endif + +/* portability defines, excluding path handling macros */ +#if defined(_MSC_VER) +# define setmode _setmode +# define stat _stat +# define chmod _chmod +# define getcwd _getcwd +# define putenv _putenv +# define S_IXUSR _S_IEXEC +# ifndef _INTPTR_T_DEFINED +# define _INTPTR_T_DEFINED +# define intptr_t int +# endif +#elif defined(__MINGW32__) +# define setmode _setmode +# define stat _stat +# define chmod _chmod +# define getcwd _getcwd +# define putenv _putenv +#elif defined(__CYGWIN__) +# define HAVE_SETENV +# define FOPEN_WB "wb" +/* #elif defined (other platforms) ... */ +#endif + +#if defined(PATH_MAX) +# define LT_PATHMAX PATH_MAX +#elif defined(MAXPATHLEN) +# define LT_PATHMAX MAXPATHLEN +#else +# define LT_PATHMAX 1024 +#endif + +#ifndef S_IXOTH +# define S_IXOTH 0 +#endif +#ifndef S_IXGRP +# define S_IXGRP 0 +#endif + +/* path handling portability macros */ +#ifndef DIR_SEPARATOR +# define DIR_SEPARATOR '/' +# define PATH_SEPARATOR ':' +#endif + +#if defined (_WIN32) || defined (__MSDOS__) || defined (__DJGPP__) || \ + defined (__OS2__) +# define HAVE_DOS_BASED_FILE_SYSTEM +# define FOPEN_WB "wb" +# ifndef DIR_SEPARATOR_2 +# define DIR_SEPARATOR_2 '\\' +# endif +# ifndef PATH_SEPARATOR_2 +# define PATH_SEPARATOR_2 ';' +# endif +#endif + +#ifndef DIR_SEPARATOR_2 +# define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR) +#else /* DIR_SEPARATOR_2 */ +# define IS_DIR_SEPARATOR(ch) \ + (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2)) +#endif /* DIR_SEPARATOR_2 */ + +#ifndef PATH_SEPARATOR_2 +# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR) +#else /* PATH_SEPARATOR_2 */ +# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR_2) +#endif /* PATH_SEPARATOR_2 */ + +#ifndef FOPEN_WB +# define FOPEN_WB "w" +#endif +#ifndef _O_BINARY +# define _O_BINARY 0 +#endif + +#define XMALLOC(type, num) ((type *) xmalloc ((num) * sizeof(type))) +#define XFREE(stale) do { \ + if (stale) { free ((void *) stale); stale = 0; } \ +} while (0) + +#if defined(LT_DEBUGWRAPPER) +static int lt_debug = 1; +#else +static int lt_debug = 0; +#endif + +const char *program_name = "libtool-wrapper"; /* in case xstrdup fails */ + +void *xmalloc (size_t num); +char *xstrdup (const char *string); +const char *base_name (const char *name); +char *find_executable (const char *wrapper); +char *chase_symlinks (const char *pathspec); +int make_executable (const char *path); +int check_executable (const char *path); +char *strendzap (char *str, const char *pat); +void lt_debugprintf (const char *file, int line, const char *fmt, ...); +void lt_fatal (const char *file, int line, const char *message, ...); +static const char *nonnull (const char *s); +static const char *nonempty (const char *s); +void lt_setenv (const char *name, const char *value); +char *lt_extend_str (const char *orig_value, const char *add, int to_end); +void lt_update_exe_path (const char *name, const char *value); +void lt_update_lib_path (const char *name, const char *value); +char **prepare_spawn (char **argv); +void lt_dump_script (FILE *f); +EOF + + cat <= 0) + && (st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) + return 1; + else + return 0; +} + +int +make_executable (const char *path) +{ + int rval = 0; + struct stat st; + + lt_debugprintf (__FILE__, __LINE__, "(make_executable): %s\n", + nonempty (path)); + if ((!path) || (!*path)) + return 0; + + if (stat (path, &st) >= 0) + { + rval = chmod (path, st.st_mode | S_IXOTH | S_IXGRP | S_IXUSR); + } + return rval; +} + +/* Searches for the full path of the wrapper. Returns + newly allocated full path name if found, NULL otherwise + Does not chase symlinks, even on platforms that support them. +*/ +char * +find_executable (const char *wrapper) +{ + int has_slash = 0; + const char *p; + const char *p_next; + /* static buffer for getcwd */ + char tmp[LT_PATHMAX + 1]; + int tmp_len; + char *concat_name; + + lt_debugprintf (__FILE__, __LINE__, "(find_executable): %s\n", + nonempty (wrapper)); + + if ((wrapper == NULL) || (*wrapper == '\0')) + return NULL; + + /* Absolute path? */ +#if defined (HAVE_DOS_BASED_FILE_SYSTEM) + if (isalpha ((unsigned char) wrapper[0]) && wrapper[1] == ':') + { + concat_name = xstrdup (wrapper); + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } + else + { +#endif + if (IS_DIR_SEPARATOR (wrapper[0])) + { + concat_name = xstrdup (wrapper); + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } +#if defined (HAVE_DOS_BASED_FILE_SYSTEM) + } +#endif + + for (p = wrapper; *p; p++) + if (*p == '/') + { + has_slash = 1; + break; + } + if (!has_slash) + { + /* no slashes; search PATH */ + const char *path = getenv ("PATH"); + if (path != NULL) + { + for (p = path; *p; p = p_next) + { + const char *q; + size_t p_len; + for (q = p; *q; q++) + if (IS_PATH_SEPARATOR (*q)) + break; + p_len = q - p; + p_next = (*q == '\0' ? q : q + 1); + if (p_len == 0) + { + /* empty path: current directory */ + if (getcwd (tmp, LT_PATHMAX) == NULL) + lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", + nonnull (strerror (errno))); + tmp_len = strlen (tmp); + concat_name = + XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, tmp, tmp_len); + concat_name[tmp_len] = '/'; + strcpy (concat_name + tmp_len + 1, wrapper); + } + else + { + concat_name = + XMALLOC (char, p_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, p, p_len); + concat_name[p_len] = '/'; + strcpy (concat_name + p_len + 1, wrapper); + } + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } + } + /* not found in PATH; assume curdir */ + } + /* Relative path | not found in path: prepend cwd */ + if (getcwd (tmp, LT_PATHMAX) == NULL) + lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", + nonnull (strerror (errno))); + tmp_len = strlen (tmp); + concat_name = XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, tmp, tmp_len); + concat_name[tmp_len] = '/'; + strcpy (concat_name + tmp_len + 1, wrapper); + + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + return NULL; +} + +char * +chase_symlinks (const char *pathspec) +{ +#ifndef S_ISLNK + return xstrdup (pathspec); +#else + char buf[LT_PATHMAX]; + struct stat s; + char *tmp_pathspec = xstrdup (pathspec); + char *p; + int has_symlinks = 0; + while (strlen (tmp_pathspec) && !has_symlinks) + { + lt_debugprintf (__FILE__, __LINE__, + "checking path component for symlinks: %s\n", + tmp_pathspec); + if (lstat (tmp_pathspec, &s) == 0) + { + if (S_ISLNK (s.st_mode) != 0) + { + has_symlinks = 1; + break; + } + + /* search backwards for last DIR_SEPARATOR */ + p = tmp_pathspec + strlen (tmp_pathspec) - 1; + while ((p > tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) + p--; + if ((p == tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) + { + /* no more DIR_SEPARATORS left */ + break; + } + *p = '\0'; + } + else + { + lt_fatal (__FILE__, __LINE__, + "error accessing file \"%s\": %s", + tmp_pathspec, nonnull (strerror (errno))); + } + } + XFREE (tmp_pathspec); + + if (!has_symlinks) + { + return xstrdup (pathspec); + } + + tmp_pathspec = realpath (pathspec, buf); + if (tmp_pathspec == 0) + { + lt_fatal (__FILE__, __LINE__, + "could not follow symlinks for %s", pathspec); + } + return xstrdup (tmp_pathspec); +#endif +} + +char * +strendzap (char *str, const char *pat) +{ + size_t len, patlen; + + assert (str != NULL); + assert (pat != NULL); + + len = strlen (str); + patlen = strlen (pat); + + if (patlen <= len) + { + str += len - patlen; + if (strcmp (str, pat) == 0) + *str = '\0'; + } + return str; +} + +void +lt_debugprintf (const char *file, int line, const char *fmt, ...) +{ + va_list args; + if (lt_debug) + { + (void) fprintf (stderr, "%s:%s:%d: ", program_name, file, line); + va_start (args, fmt); + (void) vfprintf (stderr, fmt, args); + va_end (args); + } +} + +static void +lt_error_core (int exit_status, const char *file, + int line, const char *mode, + const char *message, va_list ap) +{ + fprintf (stderr, "%s:%s:%d: %s: ", program_name, file, line, mode); + vfprintf (stderr, message, ap); + fprintf (stderr, ".\n"); + + if (exit_status >= 0) + exit (exit_status); +} + +void +lt_fatal (const char *file, int line, const char *message, ...) +{ + va_list ap; + va_start (ap, message); + lt_error_core (EXIT_FAILURE, file, line, "FATAL", message, ap); + va_end (ap); +} + +static const char * +nonnull (const char *s) +{ + return s ? s : "(null)"; +} + +static const char * +nonempty (const char *s) +{ + return (s && !*s) ? "(empty)" : nonnull (s); +} + +void +lt_setenv (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_setenv) setting '%s' to '%s'\n", + nonnull (name), nonnull (value)); + { +#ifdef HAVE_SETENV + /* always make a copy, for consistency with !HAVE_SETENV */ + char *str = xstrdup (value); + setenv (name, str, 1); +#else + int len = strlen (name) + 1 + strlen (value) + 1; + char *str = XMALLOC (char, len); + sprintf (str, "%s=%s", name, value); + if (putenv (str) != EXIT_SUCCESS) + { + XFREE (str); + } +#endif + } +} + +char * +lt_extend_str (const char *orig_value, const char *add, int to_end) +{ + char *new_value; + if (orig_value && *orig_value) + { + int orig_value_len = strlen (orig_value); + int add_len = strlen (add); + new_value = XMALLOC (char, add_len + orig_value_len + 1); + if (to_end) + { + strcpy (new_value, orig_value); + strcpy (new_value + orig_value_len, add); + } + else + { + strcpy (new_value, add); + strcpy (new_value + add_len, orig_value); + } + } + else + { + new_value = xstrdup (add); + } + return new_value; +} + +void +lt_update_exe_path (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_update_exe_path) modifying '%s' by prepending '%s'\n", + nonnull (name), nonnull (value)); + + if (name && *name && value && *value) + { + char *new_value = lt_extend_str (getenv (name), value, 0); + /* some systems can't cope with a ':'-terminated path #' */ + int len = strlen (new_value); + while (((len = strlen (new_value)) > 0) && IS_PATH_SEPARATOR (new_value[len-1])) + { + new_value[len-1] = '\0'; + } + lt_setenv (name, new_value); + XFREE (new_value); + } +} + +void +lt_update_lib_path (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_update_lib_path) modifying '%s' by prepending '%s'\n", + nonnull (name), nonnull (value)); + + if (name && *name && value && *value) + { + char *new_value = lt_extend_str (getenv (name), value, 0); + lt_setenv (name, new_value); + XFREE (new_value); + } +} + +EOF + case $host_os in + mingw*) + cat <<"EOF" + +/* Prepares an argument vector before calling spawn(). + Note that spawn() does not by itself call the command interpreter + (getenv ("COMSPEC") != NULL ? getenv ("COMSPEC") : + ({ OSVERSIONINFO v; v.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + GetVersionEx(&v); + v.dwPlatformId == VER_PLATFORM_WIN32_NT; + }) ? "cmd.exe" : "command.com"). + Instead it simply concatenates the arguments, separated by ' ', and calls + CreateProcess(). We must quote the arguments since Win32 CreateProcess() + interprets characters like ' ', '\t', '\\', '"' (but not '<' and '>') in a + special way: + - Space and tab are interpreted as delimiters. They are not treated as + delimiters if they are surrounded by double quotes: "...". + - Unescaped double quotes are removed from the input. Their only effect is + that within double quotes, space and tab are treated like normal + characters. + - Backslashes not followed by double quotes are not special. + - But 2*n+1 backslashes followed by a double quote become + n backslashes followed by a double quote (n >= 0): + \" -> " + \\\" -> \" + \\\\\" -> \\" + */ +#define SHELL_SPECIAL_CHARS "\"\\ \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +#define SHELL_SPACE_CHARS " \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +char ** +prepare_spawn (char **argv) +{ + size_t argc; + char **new_argv; + size_t i; + + /* Count number of arguments. */ + for (argc = 0; argv[argc] != NULL; argc++) + ; + + /* Allocate new argument vector. */ + new_argv = XMALLOC (char *, argc + 1); + + /* Put quoted arguments into the new argument vector. */ + for (i = 0; i < argc; i++) + { + const char *string = argv[i]; + + if (string[0] == '\0') + new_argv[i] = xstrdup ("\"\""); + else if (strpbrk (string, SHELL_SPECIAL_CHARS) != NULL) + { + int quote_around = (strpbrk (string, SHELL_SPACE_CHARS) != NULL); + size_t length; + unsigned int backslashes; + const char *s; + char *quoted_string; + char *p; + + length = 0; + backslashes = 0; + if (quote_around) + length++; + for (s = string; *s != '\0'; s++) + { + char c = *s; + if (c == '"') + length += backslashes + 1; + length++; + if (c == '\\') + backslashes++; + else + backslashes = 0; + } + if (quote_around) + length += backslashes + 1; + + quoted_string = XMALLOC (char, length + 1); + + p = quoted_string; + backslashes = 0; + if (quote_around) + *p++ = '"'; + for (s = string; *s != '\0'; s++) + { + char c = *s; + if (c == '"') + { + unsigned int j; + for (j = backslashes + 1; j > 0; j--) + *p++ = '\\'; + } + *p++ = c; + if (c == '\\') + backslashes++; + else + backslashes = 0; + } + if (quote_around) + { + unsigned int j; + for (j = backslashes; j > 0; j--) + *p++ = '\\'; + *p++ = '"'; + } + *p = '\0'; + + new_argv[i] = quoted_string; + } + else + new_argv[i] = (char *) string; + } + new_argv[argc] = NULL; + + return new_argv; +} +EOF + ;; + esac + + cat <<"EOF" +void lt_dump_script (FILE* f) +{ +EOF + func_emit_wrapper yes | + $SED -n -e ' +s/^\(.\{79\}\)\(..*\)/\1\ +\2/ +h +s/\([\\"]\)/\\\1/g +s/$/\\n/ +s/\([^\n]*\).*/ fputs ("\1", f);/p +g +D' + cat <<"EOF" +} +EOF +} +# end: func_emit_cwrapperexe_src + +# func_win32_import_lib_p ARG +# True if ARG is an import lib, as indicated by $file_magic_cmd +func_win32_import_lib_p () +{ + $opt_debug + case `eval $file_magic_cmd \"\$1\" 2>/dev/null | $SED -e 10q` in + *import*) : ;; + *) false ;; + esac +} + +# func_mode_link arg... +func_mode_link () +{ + $opt_debug + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + # It is impossible to link a dll without this setting, and + # we shouldn't force the makefile maintainer to figure out + # which system we are compiling for in order to pass an extra + # flag for every libtool invocation. + # allow_undefined=no + + # FIXME: Unfortunately, there are problems with the above when trying + # to make a dll which has undefined symbols, in which case not + # even a static library is built. For now, we need to specify + # -no-undefined on the libtool link line when we can be certain + # that all symbols are satisfied, otherwise we get a static library. + allow_undefined=yes + ;; + *) + allow_undefined=yes + ;; + esac + libtool_args=$nonopt + base_compile="$nonopt $@" + compile_command=$nonopt + finalize_command=$nonopt + + compile_rpath= + finalize_rpath= + compile_shlibpath= + finalize_shlibpath= + convenience= + old_convenience= + deplibs= + old_deplibs= + compiler_flags= + linker_flags= + dllsearchpath= + lib_search_path=`pwd` + inst_prefix_dir= + new_inherited_linker_flags= + + avoid_version=no + bindir= + dlfiles= + dlprefiles= + dlself=no + export_dynamic=no + export_symbols= + export_symbols_regex= + generated= + libobjs= + ltlibs= + module=no + no_install=no + objs= + non_pic_objects= + precious_files_regex= + prefer_static_libs=no + preload=no + prev= + prevarg= + release= + rpath= + xrpath= + perm_rpath= + temp_rpath= + thread_safe=no + vinfo= + vinfo_number=no + weak_libs= + single_module="${wl}-single_module" + func_infer_tag $base_compile + + # We need to know -static, to get the right output filenames. + for arg + do + case $arg in + -shared) + test "$build_libtool_libs" != yes && \ + func_fatal_configuration "can not build a shared library" + build_old_libs=no + break + ;; + -all-static | -static | -static-libtool-libs) + case $arg in + -all-static) + if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then + func_warning "complete static linking is impossible in this configuration" + fi + if test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=yes + ;; + -static) + if test -z "$pic_flag" && test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=built + ;; + -static-libtool-libs) + if test -z "$pic_flag" && test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=yes + ;; + esac + build_libtool_libs=no + build_old_libs=yes + break + ;; + esac + done + + # See if our shared archives depend on static archives. + test -n "$old_archive_from_new_cmds" && build_old_libs=yes + + # Go through the arguments, transforming them on the way. + while test "$#" -gt 0; do + arg="$1" + shift + func_quote_for_eval "$arg" + qarg=$func_quote_for_eval_unquoted_result + func_append libtool_args " $func_quote_for_eval_result" + + # If the previous option needs an argument, assign it. + if test -n "$prev"; then + case $prev in + output) + func_append compile_command " @OUTPUT@" + func_append finalize_command " @OUTPUT@" + ;; + esac + + case $prev in + bindir) + bindir="$arg" + prev= + continue + ;; + dlfiles|dlprefiles) + if test "$preload" = no; then + # Add the symbol object into the linking commands. + func_append compile_command " @SYMFILE@" + func_append finalize_command " @SYMFILE@" + preload=yes + fi + case $arg in + *.la | *.lo) ;; # We handle these cases below. + force) + if test "$dlself" = no; then + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + self) + if test "$prev" = dlprefiles; then + dlself=yes + elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then + dlself=yes + else + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + *) + if test "$prev" = dlfiles; then + func_append dlfiles " $arg" + else + func_append dlprefiles " $arg" + fi + prev= + continue + ;; + esac + ;; + expsyms) + export_symbols="$arg" + test -f "$arg" \ + || func_fatal_error "symbol file \`$arg' does not exist" + prev= + continue + ;; + expsyms_regex) + export_symbols_regex="$arg" + prev= + continue + ;; + framework) + case $host in + *-*-darwin*) + case "$deplibs " in + *" $qarg.ltframework "*) ;; + *) func_append deplibs " $qarg.ltframework" # this is fixed later + ;; + esac + ;; + esac + prev= + continue + ;; + inst_prefix) + inst_prefix_dir="$arg" + prev= + continue + ;; + objectlist) + if test -f "$arg"; then + save_arg=$arg + moreargs= + for fil in `cat "$save_arg"` + do +# func_append moreargs " $fil" + arg=$fil + # A libtool-controlled object. + + # Check to see that this really is a libtool object. + if func_lalib_unsafe_p "$arg"; then + pic_object= + non_pic_object= + + # Read the .lo file + func_source "$arg" + + if test -z "$pic_object" || + test -z "$non_pic_object" || + test "$pic_object" = none && + test "$non_pic_object" = none; then + func_fatal_error "cannot find name of object for \`$arg'" + fi + + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir="$func_dirname_result" + + if test "$pic_object" != none; then + # Prepend the subdirectory the object is found in. + pic_object="$xdir$pic_object" + + if test "$prev" = dlfiles; then + if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then + func_append dlfiles " $pic_object" + prev= + continue + else + # If libtool objects are unsupported, then we need to preload. + prev=dlprefiles + fi + fi + + # CHECK ME: I think I busted this. -Ossama + if test "$prev" = dlprefiles; then + # Preload the old-style object. + func_append dlprefiles " $pic_object" + prev= + fi + + # A PIC object. + func_append libobjs " $pic_object" + arg="$pic_object" + fi + + # Non-PIC object. + if test "$non_pic_object" != none; then + # Prepend the subdirectory the object is found in. + non_pic_object="$xdir$non_pic_object" + + # A standard non-PIC object + func_append non_pic_objects " $non_pic_object" + if test -z "$pic_object" || test "$pic_object" = none ; then + arg="$non_pic_object" + fi + else + # If the PIC object exists, use it instead. + # $xdir was prepended to $pic_object above. + non_pic_object="$pic_object" + func_append non_pic_objects " $non_pic_object" + fi + else + # Only an error if not doing a dry-run. + if $opt_dry_run; then + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir="$func_dirname_result" + + func_lo2o "$arg" + pic_object=$xdir$objdir/$func_lo2o_result + non_pic_object=$xdir$func_lo2o_result + func_append libobjs " $pic_object" + func_append non_pic_objects " $non_pic_object" + else + func_fatal_error "\`$arg' is not a valid libtool object" + fi + fi + done + else + func_fatal_error "link input file \`$arg' does not exist" + fi + arg=$save_arg + prev= + continue + ;; + precious_regex) + precious_files_regex="$arg" + prev= + continue + ;; + release) + release="-$arg" + prev= + continue + ;; + rpath | xrpath) + # We need an absolute path. + case $arg in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + func_fatal_error "only absolute run-paths are allowed" + ;; + esac + if test "$prev" = rpath; then + case "$rpath " in + *" $arg "*) ;; + *) func_append rpath " $arg" ;; + esac + else + case "$xrpath " in + *" $arg "*) ;; + *) func_append xrpath " $arg" ;; + esac + fi + prev= + continue + ;; + shrext) + shrext_cmds="$arg" + prev= + continue + ;; + weak) + func_append weak_libs " $arg" + prev= + continue + ;; + xcclinker) + func_append linker_flags " $qarg" + func_append compiler_flags " $qarg" + prev= + func_append compile_command " $qarg" + func_append finalize_command " $qarg" + continue + ;; + xcompiler) + func_append compiler_flags " $qarg" + prev= + func_append compile_command " $qarg" + func_append finalize_command " $qarg" + continue + ;; + xlinker) + func_append linker_flags " $qarg" + func_append compiler_flags " $wl$qarg" + prev= + func_append compile_command " $wl$qarg" + func_append finalize_command " $wl$qarg" + continue + ;; + *) + eval "$prev=\"\$arg\"" + prev= + continue + ;; + esac + fi # test -n "$prev" + + prevarg="$arg" + + case $arg in + -all-static) + if test -n "$link_static_flag"; then + # See comment for -static flag below, for more details. + func_append compile_command " $link_static_flag" + func_append finalize_command " $link_static_flag" + fi + continue + ;; + + -allow-undefined) + # FIXME: remove this flag sometime in the future. + func_fatal_error "\`-allow-undefined' must not be used because it is the default" + ;; + + -avoid-version) + avoid_version=yes + continue + ;; + + -bindir) + prev=bindir + continue + ;; + + -dlopen) + prev=dlfiles + continue + ;; + + -dlpreopen) + prev=dlprefiles + continue + ;; + + -export-dynamic) + export_dynamic=yes + continue + ;; + + -export-symbols | -export-symbols-regex) + if test -n "$export_symbols" || test -n "$export_symbols_regex"; then + func_fatal_error "more than one -exported-symbols argument is not allowed" + fi + if test "X$arg" = "X-export-symbols"; then + prev=expsyms + else + prev=expsyms_regex + fi + continue + ;; + + -framework) + prev=framework + continue + ;; + + -inst-prefix-dir) + prev=inst_prefix + continue + ;; + + # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:* + # so, if we see these flags be careful not to treat them like -L + -L[A-Z][A-Z]*:*) + case $with_gcc/$host in + no/*-*-irix* | /*-*-irix*) + func_append compile_command " $arg" + func_append finalize_command " $arg" + ;; + esac + continue + ;; + + -L*) + func_stripname "-L" '' "$arg" + if test -z "$func_stripname_result"; then + if test "$#" -gt 0; then + func_fatal_error "require no space between \`-L' and \`$1'" + else + func_fatal_error "need path for \`-L' option" + fi + fi + func_resolve_sysroot "$func_stripname_result" + dir=$func_resolve_sysroot_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + absdir=`cd "$dir" && pwd` + test -z "$absdir" && \ + func_fatal_error "cannot determine absolute directory name of \`$dir'" + dir="$absdir" + ;; + esac + case "$deplibs " in + *" -L$dir "* | *" $arg "*) + # Will only happen for absolute or sysroot arguments + ;; + *) + # Preserve sysroot, but never include relative directories + case $dir in + [\\/]* | [A-Za-z]:[\\/]* | =*) func_append deplibs " $arg" ;; + *) func_append deplibs " -L$dir" ;; + esac + func_append lib_search_path " $dir" + ;; + esac + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + testbindir=`$ECHO "$dir" | $SED 's*/lib$*/bin*'` + case :$dllsearchpath: in + *":$dir:"*) ;; + ::) dllsearchpath=$dir;; + *) func_append dllsearchpath ":$dir";; + esac + case :$dllsearchpath: in + *":$testbindir:"*) ;; + ::) dllsearchpath=$testbindir;; + *) func_append dllsearchpath ":$testbindir";; + esac + ;; + esac + continue + ;; + + -l*) + if test "X$arg" = "X-lc" || test "X$arg" = "X-lm"; then + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-beos* | *-cegcc* | *-*-haiku*) + # These systems don't actually have a C or math library (as such) + continue + ;; + *-*-os2*) + # These systems don't actually have a C library (as such) + test "X$arg" = "X-lc" && continue + ;; + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) + # Do not include libc due to us having libc/libc_r. + test "X$arg" = "X-lc" && continue + ;; + *-*-rhapsody* | *-*-darwin1.[012]) + # Rhapsody C and math libraries are in the System framework + func_append deplibs " System.ltframework" + continue + ;; + *-*-sco3.2v5* | *-*-sco5v6*) + # Causes problems with __ctype + test "X$arg" = "X-lc" && continue + ;; + *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) + # Compiler inserts libc in the correct place for threads to work + test "X$arg" = "X-lc" && continue + ;; + esac + elif test "X$arg" = "X-lc_r"; then + case $host in + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) + # Do not include libc_r directly, use -pthread flag. + continue + ;; + esac + fi + func_append deplibs " $arg" + continue + ;; + + -module) + module=yes + continue + ;; + + # Tru64 UNIX uses -model [arg] to determine the layout of C++ + # classes, name mangling, and exception handling. + # Darwin uses the -arch flag to determine output architecture. + -model|-arch|-isysroot|--sysroot) + func_append compiler_flags " $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + prev=xcompiler + continue + ;; + + -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \ + |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) + func_append compiler_flags " $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + case "$new_inherited_linker_flags " in + *" $arg "*) ;; + * ) func_append new_inherited_linker_flags " $arg" ;; + esac + continue + ;; + + -multi_module) + single_module="${wl}-multi_module" + continue + ;; + + -no-fast-install) + fast_install=no + continue + ;; + + -no-install) + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-darwin* | *-cegcc*) + # The PATH hackery in wrapper scripts is required on Windows + # and Darwin in order for the loader to find any dlls it needs. + func_warning "\`-no-install' is ignored for $host" + func_warning "assuming \`-no-fast-install' instead" + fast_install=no + ;; + *) no_install=yes ;; + esac + continue + ;; + + -no-undefined) + allow_undefined=no + continue + ;; + + -objectlist) + prev=objectlist + continue + ;; + + -o) prev=output ;; + + -precious-files-regex) + prev=precious_regex + continue + ;; + + -release) + prev=release + continue + ;; + + -rpath) + prev=rpath + continue + ;; + + -R) + prev=xrpath + continue + ;; + + -R*) + func_stripname '-R' '' "$arg" + dir=$func_stripname_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + =*) + func_stripname '=' '' "$dir" + dir=$lt_sysroot$func_stripname_result + ;; + *) + func_fatal_error "only absolute run-paths are allowed" + ;; + esac + case "$xrpath " in + *" $dir "*) ;; + *) func_append xrpath " $dir" ;; + esac + continue + ;; + + -shared) + # The effects of -shared are defined in a previous loop. + continue + ;; + + -shrext) + prev=shrext + continue + ;; + + -static | -static-libtool-libs) + # The effects of -static are defined in a previous loop. + # We used to do the same as -all-static on platforms that + # didn't have a PIC flag, but the assumption that the effects + # would be equivalent was wrong. It would break on at least + # Digital Unix and AIX. + continue + ;; + + -thread-safe) + thread_safe=yes + continue + ;; + + -version-info) + prev=vinfo + continue + ;; + + -version-number) + prev=vinfo + vinfo_number=yes + continue + ;; + + -weak) + prev=weak + continue + ;; + + -Wc,*) + func_stripname '-Wc,' '' "$arg" + args=$func_stripname_result + arg= + save_ifs="$IFS"; IFS=',' + for flag in $args; do + IFS="$save_ifs" + func_quote_for_eval "$flag" + func_append arg " $func_quote_for_eval_result" + func_append compiler_flags " $func_quote_for_eval_result" + done + IFS="$save_ifs" + func_stripname ' ' '' "$arg" + arg=$func_stripname_result + ;; + + -Wl,*) + func_stripname '-Wl,' '' "$arg" + args=$func_stripname_result + arg= + save_ifs="$IFS"; IFS=',' + for flag in $args; do + IFS="$save_ifs" + func_quote_for_eval "$flag" + func_append arg " $wl$func_quote_for_eval_result" + func_append compiler_flags " $wl$func_quote_for_eval_result" + func_append linker_flags " $func_quote_for_eval_result" + done + IFS="$save_ifs" + func_stripname ' ' '' "$arg" + arg=$func_stripname_result + ;; + + -Xcompiler) + prev=xcompiler + continue + ;; + + -Xlinker) + prev=xlinker + continue + ;; + + -XCClinker) + prev=xcclinker + continue + ;; + + # -msg_* for osf cc + -msg_*) + func_quote_for_eval "$arg" + arg="$func_quote_for_eval_result" + ;; + + # Flags to be passed through unchanged, with rationale: + # -64, -mips[0-9] enable 64-bit mode for the SGI compiler + # -r[0-9][0-9]* specify processor for the SGI compiler + # -xarch=*, -xtarget=* enable 64-bit mode for the Sun compiler + # +DA*, +DD* enable 64-bit mode for the HP compiler + # -q* compiler args for the IBM compiler + # -m*, -t[45]*, -txscale* architecture-specific flags for GCC + # -F/path path to uninstalled frameworks, gcc on darwin + # -p, -pg, --coverage, -fprofile-* profiling flags for GCC + # @file GCC response files + # -tp=* Portland pgcc target processor selection + # --sysroot=* for sysroot support + # -O*, -flto*, -fwhopr*, -fuse-linker-plugin GCC link-time optimization + -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \ + -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*|-tp=*|--sysroot=*| \ + -O*|-flto*|-fwhopr*|-fuse-linker-plugin) + func_quote_for_eval "$arg" + arg="$func_quote_for_eval_result" + func_append compile_command " $arg" + func_append finalize_command " $arg" + func_append compiler_flags " $arg" + continue + ;; + + # Some other compiler flag. + -* | +*) + func_quote_for_eval "$arg" + arg="$func_quote_for_eval_result" + ;; + + *.$objext) + # A standard object. + func_append objs " $arg" + ;; + + *.lo) + # A libtool-controlled object. + + # Check to see that this really is a libtool object. + if func_lalib_unsafe_p "$arg"; then + pic_object= + non_pic_object= + + # Read the .lo file + func_source "$arg" + + if test -z "$pic_object" || + test -z "$non_pic_object" || + test "$pic_object" = none && + test "$non_pic_object" = none; then + func_fatal_error "cannot find name of object for \`$arg'" + fi + + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir="$func_dirname_result" + + if test "$pic_object" != none; then + # Prepend the subdirectory the object is found in. + pic_object="$xdir$pic_object" + + if test "$prev" = dlfiles; then + if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then + func_append dlfiles " $pic_object" + prev= + continue + else + # If libtool objects are unsupported, then we need to preload. + prev=dlprefiles + fi + fi + + # CHECK ME: I think I busted this. -Ossama + if test "$prev" = dlprefiles; then + # Preload the old-style object. + func_append dlprefiles " $pic_object" + prev= + fi + + # A PIC object. + func_append libobjs " $pic_object" + arg="$pic_object" + fi + + # Non-PIC object. + if test "$non_pic_object" != none; then + # Prepend the subdirectory the object is found in. + non_pic_object="$xdir$non_pic_object" + + # A standard non-PIC object + func_append non_pic_objects " $non_pic_object" + if test -z "$pic_object" || test "$pic_object" = none ; then + arg="$non_pic_object" + fi + else + # If the PIC object exists, use it instead. + # $xdir was prepended to $pic_object above. + non_pic_object="$pic_object" + func_append non_pic_objects " $non_pic_object" + fi + else + # Only an error if not doing a dry-run. + if $opt_dry_run; then + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir="$func_dirname_result" + + func_lo2o "$arg" + pic_object=$xdir$objdir/$func_lo2o_result + non_pic_object=$xdir$func_lo2o_result + func_append libobjs " $pic_object" + func_append non_pic_objects " $non_pic_object" + else + func_fatal_error "\`$arg' is not a valid libtool object" + fi + fi + ;; + + *.$libext) + # An archive. + func_append deplibs " $arg" + func_append old_deplibs " $arg" + continue + ;; + + *.la) + # A libtool-controlled library. + + func_resolve_sysroot "$arg" + if test "$prev" = dlfiles; then + # This library was specified with -dlopen. + func_append dlfiles " $func_resolve_sysroot_result" + prev= + elif test "$prev" = dlprefiles; then + # The library was specified with -dlpreopen. + func_append dlprefiles " $func_resolve_sysroot_result" + prev= + else + func_append deplibs " $func_resolve_sysroot_result" + fi + continue + ;; + + # Some other compiler argument. + *) + # Unknown arguments in both finalize_command and compile_command need + # to be aesthetically quoted because they are evaled later. + func_quote_for_eval "$arg" + arg="$func_quote_for_eval_result" + ;; + esac # arg + + # Now actually substitute the argument into the commands. + if test -n "$arg"; then + func_append compile_command " $arg" + func_append finalize_command " $arg" + fi + done # argument parsing loop + + test -n "$prev" && \ + func_fatal_help "the \`$prevarg' option requires an argument" + + if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then + eval arg=\"$export_dynamic_flag_spec\" + func_append compile_command " $arg" + func_append finalize_command " $arg" + fi + + oldlibs= + # calculate the name of the file, without its directory + func_basename "$output" + outputname="$func_basename_result" + libobjs_save="$libobjs" + + if test -n "$shlibpath_var"; then + # get the directories listed in $shlibpath_var + eval shlib_search_path=\`\$ECHO \"\${$shlibpath_var}\" \| \$SED \'s/:/ /g\'\` + else + shlib_search_path= + fi + eval sys_lib_search_path=\"$sys_lib_search_path_spec\" + eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\" + + func_dirname "$output" "/" "" + output_objdir="$func_dirname_result$objdir" + func_to_tool_file "$output_objdir/" + tool_output_objdir=$func_to_tool_file_result + # Create the object directory. + func_mkdir_p "$output_objdir" + + # Determine the type of output + case $output in + "") + func_fatal_help "you must specify an output file" + ;; + *.$libext) linkmode=oldlib ;; + *.lo | *.$objext) linkmode=obj ;; + *.la) linkmode=lib ;; + *) linkmode=prog ;; # Anything else should be a program. + esac + + specialdeplibs= + + libs= + # Find all interdependent deplibs by searching for libraries + # that are linked more than once (e.g. -la -lb -la) + for deplib in $deplibs; do + if $opt_preserve_dup_deps ; then + case "$libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append libs " $deplib" + done + + if test "$linkmode" = lib; then + libs="$predeps $libs $compiler_lib_search_path $postdeps" + + # Compute libraries that are listed more than once in $predeps + # $postdeps and mark them as special (i.e., whose duplicates are + # not to be eliminated). + pre_post_deps= + if $opt_duplicate_compiler_generated_deps; then + for pre_post_dep in $predeps $postdeps; do + case "$pre_post_deps " in + *" $pre_post_dep "*) func_append specialdeplibs " $pre_post_deps" ;; + esac + func_append pre_post_deps " $pre_post_dep" + done + fi + pre_post_deps= + fi + + deplibs= + newdependency_libs= + newlib_search_path= + need_relink=no # whether we're linking any uninstalled libtool libraries + notinst_deplibs= # not-installed libtool libraries + notinst_path= # paths that contain not-installed libtool libraries + + case $linkmode in + lib) + passes="conv dlpreopen link" + for file in $dlfiles $dlprefiles; do + case $file in + *.la) ;; + *) + func_fatal_help "libraries can \`-dlopen' only libtool libraries: $file" + ;; + esac + done + ;; + prog) + compile_deplibs= + finalize_deplibs= + alldeplibs=no + newdlfiles= + newdlprefiles= + passes="conv scan dlopen dlpreopen link" + ;; + *) passes="conv" + ;; + esac + + for pass in $passes; do + # The preopen pass in lib mode reverses $deplibs; put it back here + # so that -L comes before libs that need it for instance... + if test "$linkmode,$pass" = "lib,link"; then + ## FIXME: Find the place where the list is rebuilt in the wrong + ## order, and fix it there properly + tmp_deplibs= + for deplib in $deplibs; do + tmp_deplibs="$deplib $tmp_deplibs" + done + deplibs="$tmp_deplibs" + fi + + if test "$linkmode,$pass" = "lib,link" || + test "$linkmode,$pass" = "prog,scan"; then + libs="$deplibs" + deplibs= + fi + if test "$linkmode" = prog; then + case $pass in + dlopen) libs="$dlfiles" ;; + dlpreopen) libs="$dlprefiles" ;; + link) libs="$deplibs %DEPLIBS% $dependency_libs" ;; + esac + fi + if test "$linkmode,$pass" = "lib,dlpreopen"; then + # Collect and forward deplibs of preopened libtool libs + for lib in $dlprefiles; do + # Ignore non-libtool-libs + dependency_libs= + func_resolve_sysroot "$lib" + case $lib in + *.la) func_source "$func_resolve_sysroot_result" ;; + esac + + # Collect preopened libtool deplibs, except any this library + # has declared as weak libs + for deplib in $dependency_libs; do + func_basename "$deplib" + deplib_base=$func_basename_result + case " $weak_libs " in + *" $deplib_base "*) ;; + *) func_append deplibs " $deplib" ;; + esac + done + done + libs="$dlprefiles" + fi + if test "$pass" = dlopen; then + # Collect dlpreopened libraries + save_deplibs="$deplibs" + deplibs= + fi + + for deplib in $libs; do + lib= + found=no + case $deplib in + -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \ + |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) + if test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + func_append compiler_flags " $deplib" + if test "$linkmode" = lib ; then + case "$new_inherited_linker_flags " in + *" $deplib "*) ;; + * ) func_append new_inherited_linker_flags " $deplib" ;; + esac + fi + fi + continue + ;; + -l*) + if test "$linkmode" != lib && test "$linkmode" != prog; then + func_warning "\`-l' is ignored for archives/objects" + continue + fi + func_stripname '-l' '' "$deplib" + name=$func_stripname_result + if test "$linkmode" = lib; then + searchdirs="$newlib_search_path $lib_search_path $compiler_lib_search_dirs $sys_lib_search_path $shlib_search_path" + else + searchdirs="$newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path" + fi + for searchdir in $searchdirs; do + for search_ext in .la $std_shrext .so .a; do + # Search the libtool library + lib="$searchdir/lib${name}${search_ext}" + if test -f "$lib"; then + if test "$search_ext" = ".la"; then + found=yes + else + found=no + fi + break 2 + fi + done + done + if test "$found" != yes; then + # deplib doesn't seem to be a libtool library + if test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs" + fi + continue + else # deplib is a libtool library + # If $allow_libtool_libs_with_static_runtimes && $deplib is a stdlib, + # We need to do some special things here, and not later. + if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then + case " $predeps $postdeps " in + *" $deplib "*) + if func_lalib_p "$lib"; then + library_names= + old_library= + func_source "$lib" + for l in $old_library $library_names; do + ll="$l" + done + if test "X$ll" = "X$old_library" ; then # only static version available + found=no + func_dirname "$lib" "" "." + ladir="$func_dirname_result" + lib=$ladir/$old_library + if test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs" + fi + continue + fi + fi + ;; + *) ;; + esac + fi + fi + ;; # -l + *.ltframework) + if test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + if test "$linkmode" = lib ; then + case "$new_inherited_linker_flags " in + *" $deplib "*) ;; + * ) func_append new_inherited_linker_flags " $deplib" ;; + esac + fi + fi + continue + ;; + -L*) + case $linkmode in + lib) + deplibs="$deplib $deplibs" + test "$pass" = conv && continue + newdependency_libs="$deplib $newdependency_libs" + func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + prog) + if test "$pass" = conv; then + deplibs="$deplib $deplibs" + continue + fi + if test "$pass" = scan; then + deplibs="$deplib $deplibs" + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + *) + func_warning "\`-L' is ignored for archives/objects" + ;; + esac # linkmode + continue + ;; # -L + -R*) + if test "$pass" = link; then + func_stripname '-R' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + dir=$func_resolve_sysroot_result + # Make sure the xrpath contains only unique directories. + case "$xrpath " in + *" $dir "*) ;; + *) func_append xrpath " $dir" ;; + esac + fi + deplibs="$deplib $deplibs" + continue + ;; + *.la) + func_resolve_sysroot "$deplib" + lib=$func_resolve_sysroot_result + ;; + *.$libext) + if test "$pass" = conv; then + deplibs="$deplib $deplibs" + continue + fi + case $linkmode in + lib) + # Linking convenience modules into shared libraries is allowed, + # but linking other static libraries is non-portable. + case " $dlpreconveniencelibs " in + *" $deplib "*) ;; + *) + valid_a_lib=no + case $deplibs_check_method in + match_pattern*) + set dummy $deplibs_check_method; shift + match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` + if eval "\$ECHO \"$deplib\"" 2>/dev/null | $SED 10q \ + | $EGREP "$match_pattern_regex" > /dev/null; then + valid_a_lib=yes + fi + ;; + pass_all) + valid_a_lib=yes + ;; + esac + if test "$valid_a_lib" != yes; then + echo + $ECHO "*** Warning: Trying to link with static lib archive $deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because the file extensions .$libext of this argument makes me believe" + echo "*** that it is just a static archive that I should not use here." + else + echo + $ECHO "*** Warning: Linking the shared library $output against the" + $ECHO "*** static library $deplib is not portable!" + deplibs="$deplib $deplibs" + fi + ;; + esac + continue + ;; + prog) + if test "$pass" != link; then + deplibs="$deplib $deplibs" + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + continue + ;; + esac # linkmode + ;; # *.$libext + *.lo | *.$objext) + if test "$pass" = conv; then + deplibs="$deplib $deplibs" + elif test "$linkmode" = prog; then + if test "$pass" = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then + # If there is no dlopen support or we're linking statically, + # we need to preload. + func_append newdlprefiles " $deplib" + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + func_append newdlfiles " $deplib" + fi + fi + continue + ;; + %DEPLIBS%) + alldeplibs=yes + continue + ;; + esac # case $deplib + + if test "$found" = yes || test -f "$lib"; then : + else + func_fatal_error "cannot find the library \`$lib' or unhandled argument \`$deplib'" + fi + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$lib" \ + || func_fatal_error "\`$lib' is not a valid libtool archive" + + func_dirname "$lib" "" "." + ladir="$func_dirname_result" + + dlname= + dlopen= + dlpreopen= + libdir= + library_names= + old_library= + inherited_linker_flags= + # If the library was installed with an old release of libtool, + # it will not redefine variables installed, or shouldnotlink + installed=yes + shouldnotlink=no + avoidtemprpath= + + + # Read the .la file + func_source "$lib" + + # Convert "-framework foo" to "foo.ltframework" + if test -n "$inherited_linker_flags"; then + tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'` + for tmp_inherited_linker_flag in $tmp_inherited_linker_flags; do + case " $new_inherited_linker_flags " in + *" $tmp_inherited_linker_flag "*) ;; + *) func_append new_inherited_linker_flags " $tmp_inherited_linker_flag";; + esac + done + fi + dependency_libs=`$ECHO " $dependency_libs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + if test "$linkmode,$pass" = "lib,link" || + test "$linkmode,$pass" = "prog,scan" || + { test "$linkmode" != prog && test "$linkmode" != lib; }; then + test -n "$dlopen" && func_append dlfiles " $dlopen" + test -n "$dlpreopen" && func_append dlprefiles " $dlpreopen" + fi + + if test "$pass" = conv; then + # Only check for convenience libraries + deplibs="$lib $deplibs" + if test -z "$libdir"; then + if test -z "$old_library"; then + func_fatal_error "cannot find name of link library for \`$lib'" + fi + # It is a libtool convenience library, so add in its objects. + func_append convenience " $ladir/$objdir/$old_library" + func_append old_convenience " $ladir/$objdir/$old_library" + elif test "$linkmode" != prog && test "$linkmode" != lib; then + func_fatal_error "\`$lib' is not a convenience library" + fi + tmp_libs= + for deplib in $dependency_libs; do + deplibs="$deplib $deplibs" + if $opt_preserve_dup_deps ; then + case "$tmp_libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append tmp_libs " $deplib" + done + continue + fi # $pass = conv + + + # Get the name of the library we link against. + linklib= + if test -n "$old_library" && + { test "$prefer_static_libs" = yes || + test "$prefer_static_libs,$installed" = "built,no"; }; then + linklib=$old_library + else + for l in $old_library $library_names; do + linklib="$l" + done + fi + if test -z "$linklib"; then + func_fatal_error "cannot find name of link library for \`$lib'" + fi + + # This library was specified with -dlopen. + if test "$pass" = dlopen; then + if test -z "$libdir"; then + func_fatal_error "cannot -dlopen a convenience library: \`$lib'" + fi + if test -z "$dlname" || + test "$dlopen_support" != yes || + test "$build_libtool_libs" = no; then + # If there is no dlname, no dlopen support or we're linking + # statically, we need to preload. We also need to preload any + # dependent libraries so libltdl's deplib preloader doesn't + # bomb out in the load deplibs phase. + func_append dlprefiles " $lib $dependency_libs" + else + func_append newdlfiles " $lib" + fi + continue + fi # $pass = dlopen + + # We need an absolute path. + case $ladir in + [\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;; + *) + abs_ladir=`cd "$ladir" && pwd` + if test -z "$abs_ladir"; then + func_warning "cannot determine absolute directory name of \`$ladir'" + func_warning "passing it literally to the linker, although it might fail" + abs_ladir="$ladir" + fi + ;; + esac + func_basename "$lib" + laname="$func_basename_result" + + # Find the relevant object directory and library name. + if test "X$installed" = Xyes; then + if test ! -f "$lt_sysroot$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then + func_warning "library \`$lib' was moved." + dir="$ladir" + absdir="$abs_ladir" + libdir="$abs_ladir" + else + dir="$lt_sysroot$libdir" + absdir="$lt_sysroot$libdir" + fi + test "X$hardcode_automatic" = Xyes && avoidtemprpath=yes + else + if test ! -f "$ladir/$objdir/$linklib" && test -f "$abs_ladir/$linklib"; then + dir="$ladir" + absdir="$abs_ladir" + # Remove this search path later + func_append notinst_path " $abs_ladir" + else + dir="$ladir/$objdir" + absdir="$abs_ladir/$objdir" + # Remove this search path later + func_append notinst_path " $abs_ladir" + fi + fi # $installed = yes + func_stripname 'lib' '.la' "$laname" + name=$func_stripname_result + + # This library was specified with -dlpreopen. + if test "$pass" = dlpreopen; then + if test -z "$libdir" && test "$linkmode" = prog; then + func_fatal_error "only libraries may -dlpreopen a convenience library: \`$lib'" + fi + case "$host" in + # special handling for platforms with PE-DLLs. + *cygwin* | *mingw* | *cegcc* ) + # Linker will automatically link against shared library if both + # static and shared are present. Therefore, ensure we extract + # symbols from the import library if a shared library is present + # (otherwise, the dlopen module name will be incorrect). We do + # this by putting the import library name into $newdlprefiles. + # We recover the dlopen module name by 'saving' the la file + # name in a special purpose variable, and (later) extracting the + # dlname from the la file. + if test -n "$dlname"; then + func_tr_sh "$dir/$linklib" + eval "libfile_$func_tr_sh_result=\$abs_ladir/\$laname" + func_append newdlprefiles " $dir/$linklib" + else + func_append newdlprefiles " $dir/$old_library" + # Keep a list of preopened convenience libraries to check + # that they are being used correctly in the link pass. + test -z "$libdir" && \ + func_append dlpreconveniencelibs " $dir/$old_library" + fi + ;; + * ) + # Prefer using a static library (so that no silly _DYNAMIC symbols + # are required to link). + if test -n "$old_library"; then + func_append newdlprefiles " $dir/$old_library" + # Keep a list of preopened convenience libraries to check + # that they are being used correctly in the link pass. + test -z "$libdir" && \ + func_append dlpreconveniencelibs " $dir/$old_library" + # Otherwise, use the dlname, so that lt_dlopen finds it. + elif test -n "$dlname"; then + func_append newdlprefiles " $dir/$dlname" + else + func_append newdlprefiles " $dir/$linklib" + fi + ;; + esac + fi # $pass = dlpreopen + + if test -z "$libdir"; then + # Link the convenience library + if test "$linkmode" = lib; then + deplibs="$dir/$old_library $deplibs" + elif test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$dir/$old_library $compile_deplibs" + finalize_deplibs="$dir/$old_library $finalize_deplibs" + else + deplibs="$lib $deplibs" # used for prog,scan pass + fi + continue + fi + + + if test "$linkmode" = prog && test "$pass" != link; then + func_append newlib_search_path " $ladir" + deplibs="$lib $deplibs" + + linkalldeplibs=no + if test "$link_all_deplibs" != no || test -z "$library_names" || + test "$build_libtool_libs" = no; then + linkalldeplibs=yes + fi + + tmp_libs= + for deplib in $dependency_libs; do + case $deplib in + -L*) func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + esac + # Need to link against all dependency_libs? + if test "$linkalldeplibs" = yes; then + deplibs="$deplib $deplibs" + else + # Need to hardcode shared library paths + # or/and link against static libraries + newdependency_libs="$deplib $newdependency_libs" + fi + if $opt_preserve_dup_deps ; then + case "$tmp_libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append tmp_libs " $deplib" + done # for deplib + continue + fi # $linkmode = prog... + + if test "$linkmode,$pass" = "prog,link"; then + if test -n "$library_names" && + { { test "$prefer_static_libs" = no || + test "$prefer_static_libs,$installed" = "built,yes"; } || + test -z "$old_library"; }; then + # We need to hardcode the library path + if test -n "$shlibpath_var" && test -z "$avoidtemprpath" ; then + # Make sure the rpath contains only unique directories. + case "$temp_rpath:" in + *"$absdir:"*) ;; + *) func_append temp_rpath "$absdir:" ;; + esac + fi + + # Hardcode the library path. + # Skip directories that are in the system default run-time + # search path. + case " $sys_lib_dlsearch_path " in + *" $absdir "*) ;; + *) + case "$compile_rpath " in + *" $absdir "*) ;; + *) func_append compile_rpath " $absdir" ;; + esac + ;; + esac + case " $sys_lib_dlsearch_path " in + *" $libdir "*) ;; + *) + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + ;; + esac + fi # $linkmode,$pass = prog,link... + + if test "$alldeplibs" = yes && + { test "$deplibs_check_method" = pass_all || + { test "$build_libtool_libs" = yes && + test -n "$library_names"; }; }; then + # We only need to search for static libraries + continue + fi + fi + + link_static=no # Whether the deplib will be linked statically + use_static_libs=$prefer_static_libs + if test "$use_static_libs" = built && test "$installed" = yes; then + use_static_libs=no + fi + if test -n "$library_names" && + { test "$use_static_libs" = no || test -z "$old_library"; }; then + case $host in + *cygwin* | *mingw* | *cegcc*) + # No point in relinking DLLs because paths are not encoded + func_append notinst_deplibs " $lib" + need_relink=no + ;; + *) + if test "$installed" = no; then + func_append notinst_deplibs " $lib" + need_relink=yes + fi + ;; + esac + # This is a shared library + + # Warn about portability, can't link against -module's on some + # systems (darwin). Don't bleat about dlopened modules though! + dlopenmodule="" + for dlpremoduletest in $dlprefiles; do + if test "X$dlpremoduletest" = "X$lib"; then + dlopenmodule="$dlpremoduletest" + break + fi + done + if test -z "$dlopenmodule" && test "$shouldnotlink" = yes && test "$pass" = link; then + echo + if test "$linkmode" = prog; then + $ECHO "*** Warning: Linking the executable $output against the loadable module" + else + $ECHO "*** Warning: Linking the shared library $output against the loadable module" + fi + $ECHO "*** $linklib is not portable!" + fi + if test "$linkmode" = lib && + test "$hardcode_into_libs" = yes; then + # Hardcode the library path. + # Skip directories that are in the system default run-time + # search path. + case " $sys_lib_dlsearch_path " in + *" $absdir "*) ;; + *) + case "$compile_rpath " in + *" $absdir "*) ;; + *) func_append compile_rpath " $absdir" ;; + esac + ;; + esac + case " $sys_lib_dlsearch_path " in + *" $libdir "*) ;; + *) + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + ;; + esac + fi + + if test -n "$old_archive_from_expsyms_cmds"; then + # figure out the soname + set dummy $library_names + shift + realname="$1" + shift + libname=`eval "\\$ECHO \"$libname_spec\""` + # use dlname if we got it. it's perfectly good, no? + if test -n "$dlname"; then + soname="$dlname" + elif test -n "$soname_spec"; then + # bleh windows + case $host in + *cygwin* | mingw* | *cegcc*) + func_arith $current - $age + major=$func_arith_result + versuffix="-$major" + ;; + esac + eval soname=\"$soname_spec\" + else + soname="$realname" + fi + + # Make a new name for the extract_expsyms_cmds to use + soroot="$soname" + func_basename "$soroot" + soname="$func_basename_result" + func_stripname 'lib' '.dll' "$soname" + newlib=libimp-$func_stripname_result.a + + # If the library has no export list, then create one now + if test -f "$output_objdir/$soname-def"; then : + else + func_verbose "extracting exported symbol list from \`$soname'" + func_execute_cmds "$extract_expsyms_cmds" 'exit $?' + fi + + # Create $newlib + if test -f "$output_objdir/$newlib"; then :; else + func_verbose "generating import library for \`$soname'" + func_execute_cmds "$old_archive_from_expsyms_cmds" 'exit $?' + fi + # make sure the library variables are pointing to the new library + dir=$output_objdir + linklib=$newlib + fi # test -n "$old_archive_from_expsyms_cmds" + + if test "$linkmode" = prog || test "$opt_mode" != relink; then + add_shlibpath= + add_dir= + add= + lib_linked=yes + case $hardcode_action in + immediate | unsupported) + if test "$hardcode_direct" = no; then + add="$dir/$linklib" + case $host in + *-*-sco3.2v5.0.[024]*) add_dir="-L$dir" ;; + *-*-sysv4*uw2*) add_dir="-L$dir" ;; + *-*-sysv5OpenUNIX* | *-*-sysv5UnixWare7.[01].[10]* | \ + *-*-unixware7*) add_dir="-L$dir" ;; + *-*-darwin* ) + # if the lib is a (non-dlopened) module then we can not + # link against it, someone is ignoring the earlier warnings + if /usr/bin/file -L $add 2> /dev/null | + $GREP ": [^:]* bundle" >/dev/null ; then + if test "X$dlopenmodule" != "X$lib"; then + $ECHO "*** Warning: lib $linklib is a module, not a shared library" + if test -z "$old_library" ; then + echo + echo "*** And there doesn't seem to be a static archive available" + echo "*** The link will probably fail, sorry" + else + add="$dir/$old_library" + fi + elif test -n "$old_library"; then + add="$dir/$old_library" + fi + fi + esac + elif test "$hardcode_minus_L" = no; then + case $host in + *-*-sunos*) add_shlibpath="$dir" ;; + esac + add_dir="-L$dir" + add="-l$name" + elif test "$hardcode_shlibpath_var" = no; then + add_shlibpath="$dir" + add="-l$name" + else + lib_linked=no + fi + ;; + relink) + if test "$hardcode_direct" = yes && + test "$hardcode_direct_absolute" = no; then + add="$dir/$linklib" + elif test "$hardcode_minus_L" = yes; then + add_dir="-L$absdir" + # Try looking first in the location we're being installed to. + if test -n "$inst_prefix_dir"; then + case $libdir in + [\\/]*) + func_append add_dir " -L$inst_prefix_dir$libdir" + ;; + esac + fi + add="-l$name" + elif test "$hardcode_shlibpath_var" = yes; then + add_shlibpath="$dir" + add="-l$name" + else + lib_linked=no + fi + ;; + *) lib_linked=no ;; + esac + + if test "$lib_linked" != yes; then + func_fatal_configuration "unsupported hardcode properties" + fi + + if test -n "$add_shlibpath"; then + case :$compile_shlibpath: in + *":$add_shlibpath:"*) ;; + *) func_append compile_shlibpath "$add_shlibpath:" ;; + esac + fi + if test "$linkmode" = prog; then + test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs" + test -n "$add" && compile_deplibs="$add $compile_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add $deplibs" + if test "$hardcode_direct" != yes && + test "$hardcode_minus_L" != yes && + test "$hardcode_shlibpath_var" = yes; then + case :$finalize_shlibpath: in + *":$libdir:"*) ;; + *) func_append finalize_shlibpath "$libdir:" ;; + esac + fi + fi + fi + + if test "$linkmode" = prog || test "$opt_mode" = relink; then + add_shlibpath= + add_dir= + add= + # Finalize command for both is simple: just hardcode it. + if test "$hardcode_direct" = yes && + test "$hardcode_direct_absolute" = no; then + add="$libdir/$linklib" + elif test "$hardcode_minus_L" = yes; then + add_dir="-L$libdir" + add="-l$name" + elif test "$hardcode_shlibpath_var" = yes; then + case :$finalize_shlibpath: in + *":$libdir:"*) ;; + *) func_append finalize_shlibpath "$libdir:" ;; + esac + add="-l$name" + elif test "$hardcode_automatic" = yes; then + if test -n "$inst_prefix_dir" && + test -f "$inst_prefix_dir$libdir/$linklib" ; then + add="$inst_prefix_dir$libdir/$linklib" + else + add="$libdir/$linklib" + fi + else + # We cannot seem to hardcode it, guess we'll fake it. + add_dir="-L$libdir" + # Try looking first in the location we're being installed to. + if test -n "$inst_prefix_dir"; then + case $libdir in + [\\/]*) + func_append add_dir " -L$inst_prefix_dir$libdir" + ;; + esac + fi + add="-l$name" + fi + + if test "$linkmode" = prog; then + test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs" + test -n "$add" && finalize_deplibs="$add $finalize_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add $deplibs" + fi + fi + elif test "$linkmode" = prog; then + # Here we assume that one of hardcode_direct or hardcode_minus_L + # is not unsupported. This is valid on all known static and + # shared platforms. + if test "$hardcode_direct" != unsupported; then + test -n "$old_library" && linklib="$old_library" + compile_deplibs="$dir/$linklib $compile_deplibs" + finalize_deplibs="$dir/$linklib $finalize_deplibs" + else + compile_deplibs="-l$name -L$dir $compile_deplibs" + finalize_deplibs="-l$name -L$dir $finalize_deplibs" + fi + elif test "$build_libtool_libs" = yes; then + # Not a shared library + if test "$deplibs_check_method" != pass_all; then + # We're trying link a shared library against a static one + # but the system doesn't support it. + + # Just print a warning and add the library to dependency_libs so + # that the program can be linked against the static library. + echo + $ECHO "*** Warning: This system can not link to static lib archive $lib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have." + if test "$module" = yes; then + echo "*** But as you try to build a module library, libtool will still create " + echo "*** a static module, that should work as long as the dlopening application" + echo "*** is linked with the -dlopen flag to resolve symbols at runtime." + if test -z "$global_symbol_pipe"; then + echo + echo "*** However, this would only work if libtool was able to extract symbol" + echo "*** lists from a program, using \`nm' or equivalent, but libtool could" + echo "*** not find such a program. So, this module is probably useless." + echo "*** \`nm' from GNU binutils and a full rebuild may help." + fi + if test "$build_old_libs" = no; then + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + fi + else + deplibs="$dir/$old_library $deplibs" + link_static=yes + fi + fi # link shared/static library? + + if test "$linkmode" = lib; then + if test -n "$dependency_libs" && + { test "$hardcode_into_libs" != yes || + test "$build_old_libs" = yes || + test "$link_static" = yes; }; then + # Extract -R from dependency_libs + temp_deplibs= + for libdir in $dependency_libs; do + case $libdir in + -R*) func_stripname '-R' '' "$libdir" + temp_xrpath=$func_stripname_result + case " $xrpath " in + *" $temp_xrpath "*) ;; + *) func_append xrpath " $temp_xrpath";; + esac;; + *) func_append temp_deplibs " $libdir";; + esac + done + dependency_libs="$temp_deplibs" + fi + + func_append newlib_search_path " $absdir" + # Link against this library + test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs" + # ... and its dependency_libs + tmp_libs= + for deplib in $dependency_libs; do + newdependency_libs="$deplib $newdependency_libs" + case $deplib in + -L*) func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result";; + *) func_resolve_sysroot "$deplib" ;; + esac + if $opt_preserve_dup_deps ; then + case "$tmp_libs " in + *" $func_resolve_sysroot_result "*) + func_append specialdeplibs " $func_resolve_sysroot_result" ;; + esac + fi + func_append tmp_libs " $func_resolve_sysroot_result" + done + + if test "$link_all_deplibs" != no; then + # Add the search paths of all dependency libraries + for deplib in $dependency_libs; do + path= + case $deplib in + -L*) path="$deplib" ;; + *.la) + func_resolve_sysroot "$deplib" + deplib=$func_resolve_sysroot_result + func_dirname "$deplib" "" "." + dir=$func_dirname_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;; + *) + absdir=`cd "$dir" && pwd` + if test -z "$absdir"; then + func_warning "cannot determine absolute directory name of \`$dir'" + absdir="$dir" + fi + ;; + esac + if $GREP "^installed=no" $deplib > /dev/null; then + case $host in + *-*-darwin*) + depdepl= + eval deplibrary_names=`${SED} -n -e 's/^library_names=\(.*\)$/\1/p' $deplib` + if test -n "$deplibrary_names" ; then + for tmp in $deplibrary_names ; do + depdepl=$tmp + done + if test -f "$absdir/$objdir/$depdepl" ; then + depdepl="$absdir/$objdir/$depdepl" + darwin_install_name=`${OTOOL} -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` + if test -z "$darwin_install_name"; then + darwin_install_name=`${OTOOL64} -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` + fi + func_append compiler_flags " ${wl}-dylib_file ${wl}${darwin_install_name}:${depdepl}" + func_append linker_flags " -dylib_file ${darwin_install_name}:${depdepl}" + path= + fi + fi + ;; + *) + path="-L$absdir/$objdir" + ;; + esac + else + eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` + test -z "$libdir" && \ + func_fatal_error "\`$deplib' is not a valid libtool archive" + test "$absdir" != "$libdir" && \ + func_warning "\`$deplib' seems to be moved" + + path="-L$absdir" + fi + ;; + esac + case " $deplibs " in + *" $path "*) ;; + *) deplibs="$path $deplibs" ;; + esac + done + fi # link_all_deplibs != no + fi # linkmode = lib + done # for deplib in $libs + if test "$pass" = link; then + if test "$linkmode" = "prog"; then + compile_deplibs="$new_inherited_linker_flags $compile_deplibs" + finalize_deplibs="$new_inherited_linker_flags $finalize_deplibs" + else + compiler_flags="$compiler_flags "`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + fi + fi + dependency_libs="$newdependency_libs" + if test "$pass" = dlpreopen; then + # Link the dlpreopened libraries before other libraries + for deplib in $save_deplibs; do + deplibs="$deplib $deplibs" + done + fi + if test "$pass" != dlopen; then + if test "$pass" != conv; then + # Make sure lib_search_path contains only unique directories. + lib_search_path= + for dir in $newlib_search_path; do + case "$lib_search_path " in + *" $dir "*) ;; + *) func_append lib_search_path " $dir" ;; + esac + done + newlib_search_path= + fi + + if test "$linkmode,$pass" != "prog,link"; then + vars="deplibs" + else + vars="compile_deplibs finalize_deplibs" + fi + for var in $vars dependency_libs; do + # Add libraries to $var in reverse order + eval tmp_libs=\"\$$var\" + new_libs= + for deplib in $tmp_libs; do + # FIXME: Pedantically, this is the right thing to do, so + # that some nasty dependency loop isn't accidentally + # broken: + #new_libs="$deplib $new_libs" + # Pragmatically, this seems to cause very few problems in + # practice: + case $deplib in + -L*) new_libs="$deplib $new_libs" ;; + -R*) ;; + *) + # And here is the reason: when a library appears more + # than once as an explicit dependence of a library, or + # is implicitly linked in more than once by the + # compiler, it is considered special, and multiple + # occurrences thereof are not removed. Compare this + # with having the same library being listed as a + # dependency of multiple other libraries: in this case, + # we know (pedantically, we assume) the library does not + # need to be listed more than once, so we keep only the + # last copy. This is not always right, but it is rare + # enough that we require users that really mean to play + # such unportable linking tricks to link the library + # using -Wl,-lname, so that libtool does not consider it + # for duplicate removal. + case " $specialdeplibs " in + *" $deplib "*) new_libs="$deplib $new_libs" ;; + *) + case " $new_libs " in + *" $deplib "*) ;; + *) new_libs="$deplib $new_libs" ;; + esac + ;; + esac + ;; + esac + done + tmp_libs= + for deplib in $new_libs; do + case $deplib in + -L*) + case " $tmp_libs " in + *" $deplib "*) ;; + *) func_append tmp_libs " $deplib" ;; + esac + ;; + *) func_append tmp_libs " $deplib" ;; + esac + done + eval $var=\"$tmp_libs\" + done # for var + fi + # Last step: remove runtime libs from dependency_libs + # (they stay in deplibs) + tmp_libs= + for i in $dependency_libs ; do + case " $predeps $postdeps $compiler_lib_search_path " in + *" $i "*) + i="" + ;; + esac + if test -n "$i" ; then + func_append tmp_libs " $i" + fi + done + dependency_libs=$tmp_libs + done # for pass + if test "$linkmode" = prog; then + dlfiles="$newdlfiles" + fi + if test "$linkmode" = prog || test "$linkmode" = lib; then + dlprefiles="$newdlprefiles" + fi + + case $linkmode in + oldlib) + if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then + func_warning "\`-dlopen' is ignored for archives" + fi + + case " $deplibs" in + *\ -l* | *\ -L*) + func_warning "\`-l' and \`-L' are ignored for archives" ;; + esac + + test -n "$rpath" && \ + func_warning "\`-rpath' is ignored for archives" + + test -n "$xrpath" && \ + func_warning "\`-R' is ignored for archives" + + test -n "$vinfo" && \ + func_warning "\`-version-info/-version-number' is ignored for archives" + + test -n "$release" && \ + func_warning "\`-release' is ignored for archives" + + test -n "$export_symbols$export_symbols_regex" && \ + func_warning "\`-export-symbols' is ignored for archives" + + # Now set the variables for building old libraries. + build_libtool_libs=no + oldlibs="$output" + func_append objs "$old_deplibs" + ;; + + lib) + # Make sure we only generate libraries of the form `libNAME.la'. + case $outputname in + lib*) + func_stripname 'lib' '.la' "$outputname" + name=$func_stripname_result + eval shared_ext=\"$shrext_cmds\" + eval libname=\"$libname_spec\" + ;; + *) + test "$module" = no && \ + func_fatal_help "libtool library \`$output' must begin with \`lib'" + + if test "$need_lib_prefix" != no; then + # Add the "lib" prefix for modules if required + func_stripname '' '.la' "$outputname" + name=$func_stripname_result + eval shared_ext=\"$shrext_cmds\" + eval libname=\"$libname_spec\" + else + func_stripname '' '.la' "$outputname" + libname=$func_stripname_result + fi + ;; + esac + + if test -n "$objs"; then + if test "$deplibs_check_method" != pass_all; then + func_fatal_error "cannot build libtool library \`$output' from non-libtool objects on this host:$objs" + else + echo + $ECHO "*** Warning: Linking the shared library $output against the non-libtool" + $ECHO "*** objects $objs is not portable!" + func_append libobjs " $objs" + fi + fi + + test "$dlself" != no && \ + func_warning "\`-dlopen self' is ignored for libtool libraries" + + set dummy $rpath + shift + test "$#" -gt 1 && \ + func_warning "ignoring multiple \`-rpath's for a libtool library" + + install_libdir="$1" + + oldlibs= + if test -z "$rpath"; then + if test "$build_libtool_libs" = yes; then + # Building a libtool convenience library. + # Some compilers have problems with a `.al' extension so + # convenience libraries should have the same extension an + # archive normally would. + oldlibs="$output_objdir/$libname.$libext $oldlibs" + build_libtool_libs=convenience + build_old_libs=yes + fi + + test -n "$vinfo" && \ + func_warning "\`-version-info/-version-number' is ignored for convenience libraries" + + test -n "$release" && \ + func_warning "\`-release' is ignored for convenience libraries" + else + + # Parse the version information argument. + save_ifs="$IFS"; IFS=':' + set dummy $vinfo 0 0 0 + shift + IFS="$save_ifs" + + test -n "$7" && \ + func_fatal_help "too many parameters to \`-version-info'" + + # convert absolute version numbers to libtool ages + # this retains compatibility with .la files and attempts + # to make the code below a bit more comprehensible + + case $vinfo_number in + yes) + number_major="$1" + number_minor="$2" + number_revision="$3" + # + # There are really only two kinds -- those that + # use the current revision as the major version + # and those that subtract age and use age as + # a minor version. But, then there is irix + # which has an extra 1 added just for fun + # + case $version_type in + # correct linux to gnu/linux during the next big refactor + darwin|linux|osf|windows|none) + func_arith $number_major + $number_minor + current=$func_arith_result + age="$number_minor" + revision="$number_revision" + ;; + freebsd-aout|freebsd-elf|qnx|sunos) + current="$number_major" + revision="$number_minor" + age="0" + ;; + irix|nonstopux) + func_arith $number_major + $number_minor + current=$func_arith_result + age="$number_minor" + revision="$number_minor" + lt_irix_increment=no + ;; + esac + ;; + no) + current="$1" + revision="$2" + age="$3" + ;; + esac + + # Check that each of the things are valid numbers. + case $current in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "CURRENT \`$current' must be a nonnegative integer" + func_fatal_error "\`$vinfo' is not valid version information" + ;; + esac + + case $revision in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "REVISION \`$revision' must be a nonnegative integer" + func_fatal_error "\`$vinfo' is not valid version information" + ;; + esac + + case $age in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "AGE \`$age' must be a nonnegative integer" + func_fatal_error "\`$vinfo' is not valid version information" + ;; + esac + + if test "$age" -gt "$current"; then + func_error "AGE \`$age' is greater than the current interface number \`$current'" + func_fatal_error "\`$vinfo' is not valid version information" + fi + + # Calculate the version variables. + major= + versuffix= + verstring= + case $version_type in + none) ;; + + darwin) + # Like Linux, but with the current version available in + # verstring for coding it into the library header + func_arith $current - $age + major=.$func_arith_result + versuffix="$major.$age.$revision" + # Darwin ld doesn't like 0 for these options... + func_arith $current + 1 + minor_current=$func_arith_result + xlcverstring="${wl}-compatibility_version ${wl}$minor_current ${wl}-current_version ${wl}$minor_current.$revision" + verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" + ;; + + freebsd-aout) + major=".$current" + versuffix=".$current.$revision"; + ;; + + freebsd-elf) + major=".$current" + versuffix=".$current" + ;; + + irix | nonstopux) + if test "X$lt_irix_increment" = "Xno"; then + func_arith $current - $age + else + func_arith $current - $age + 1 + fi + major=$func_arith_result + + case $version_type in + nonstopux) verstring_prefix=nonstopux ;; + *) verstring_prefix=sgi ;; + esac + verstring="$verstring_prefix$major.$revision" + + # Add in all the interfaces that we are compatible with. + loop=$revision + while test "$loop" -ne 0; do + func_arith $revision - $loop + iface=$func_arith_result + func_arith $loop - 1 + loop=$func_arith_result + verstring="$verstring_prefix$major.$iface:$verstring" + done + + # Before this point, $major must not contain `.'. + major=.$major + versuffix="$major.$revision" + ;; + + linux) # correct to gnu/linux during the next big refactor + func_arith $current - $age + major=.$func_arith_result + versuffix="$major.$age.$revision" + ;; + + osf) + func_arith $current - $age + major=.$func_arith_result + versuffix=".$current.$age.$revision" + verstring="$current.$age.$revision" + + # Add in all the interfaces that we are compatible with. + loop=$age + while test "$loop" -ne 0; do + func_arith $current - $loop + iface=$func_arith_result + func_arith $loop - 1 + loop=$func_arith_result + verstring="$verstring:${iface}.0" + done + + # Make executables depend on our current version. + func_append verstring ":${current}.0" + ;; + + qnx) + major=".$current" + versuffix=".$current" + ;; + + sunos) + major=".$current" + versuffix=".$current.$revision" + ;; + + windows) + # Use '-' rather than '.', since we only want one + # extension on DOS 8.3 filesystems. + func_arith $current - $age + major=$func_arith_result + versuffix="-$major" + ;; + + *) + func_fatal_configuration "unknown library version type \`$version_type'" + ;; + esac + + # Clear the version info if we defaulted, and they specified a release. + if test -z "$vinfo" && test -n "$release"; then + major= + case $version_type in + darwin) + # we can't check for "0.0" in archive_cmds due to quoting + # problems, so we reset it completely + verstring= + ;; + *) + verstring="0.0" + ;; + esac + if test "$need_version" = no; then + versuffix= + else + versuffix=".0.0" + fi + fi + + # Remove version info from name if versioning should be avoided + if test "$avoid_version" = yes && test "$need_version" = no; then + major= + versuffix= + verstring="" + fi + + # Check to see if the archive will have undefined symbols. + if test "$allow_undefined" = yes; then + if test "$allow_undefined_flag" = unsupported; then + func_warning "undefined symbols not allowed in $host shared libraries" + build_libtool_libs=no + build_old_libs=yes + fi + else + # Don't allow undefined symbols. + allow_undefined_flag="$no_undefined_flag" + fi + + fi + + func_generate_dlsyms "$libname" "$libname" "yes" + func_append libobjs " $symfileobj" + test "X$libobjs" = "X " && libobjs= + + if test "$opt_mode" != relink; then + # Remove our outputs, but don't remove object files since they + # may have been created when compiling PIC objects. + removelist= + tempremovelist=`$ECHO "$output_objdir/*"` + for p in $tempremovelist; do + case $p in + *.$objext | *.gcno) + ;; + $output_objdir/$outputname | $output_objdir/$libname.* | $output_objdir/${libname}${release}.*) + if test "X$precious_files_regex" != "X"; then + if $ECHO "$p" | $EGREP -e "$precious_files_regex" >/dev/null 2>&1 + then + continue + fi + fi + func_append removelist " $p" + ;; + *) ;; + esac + done + test -n "$removelist" && \ + func_show_eval "${RM}r \$removelist" + fi + + # Now set the variables for building old libraries. + if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then + func_append oldlibs " $output_objdir/$libname.$libext" + + # Transform .lo files to .o files. + oldobjs="$objs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.${libext}$/d; $lo2o" | $NL2SP` + fi + + # Eliminate all temporary directories. + #for path in $notinst_path; do + # lib_search_path=`$ECHO "$lib_search_path " | $SED "s% $path % %g"` + # deplibs=`$ECHO "$deplibs " | $SED "s% -L$path % %g"` + # dependency_libs=`$ECHO "$dependency_libs " | $SED "s% -L$path % %g"` + #done + + if test -n "$xrpath"; then + # If the user specified any rpath flags, then add them. + temp_xrpath= + for libdir in $xrpath; do + func_replace_sysroot "$libdir" + func_append temp_xrpath " -R$func_replace_sysroot_result" + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + done + if test "$hardcode_into_libs" != yes || test "$build_old_libs" = yes; then + dependency_libs="$temp_xrpath $dependency_libs" + fi + fi + + # Make sure dlfiles contains only unique files that won't be dlpreopened + old_dlfiles="$dlfiles" + dlfiles= + for lib in $old_dlfiles; do + case " $dlprefiles $dlfiles " in + *" $lib "*) ;; + *) func_append dlfiles " $lib" ;; + esac + done + + # Make sure dlprefiles contains only unique files + old_dlprefiles="$dlprefiles" + dlprefiles= + for lib in $old_dlprefiles; do + case "$dlprefiles " in + *" $lib "*) ;; + *) func_append dlprefiles " $lib" ;; + esac + done + + if test "$build_libtool_libs" = yes; then + if test -n "$rpath"; then + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos* | *-cegcc* | *-*-haiku*) + # these systems don't actually have a c library (as such)! + ;; + *-*-rhapsody* | *-*-darwin1.[012]) + # Rhapsody C library is in the System framework + func_append deplibs " System.ltframework" + ;; + *-*-netbsd*) + # Don't link with libc until the a.out ld.so is fixed. + ;; + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) + # Do not include libc due to us having libc/libc_r. + ;; + *-*-sco3.2v5* | *-*-sco5v6*) + # Causes problems with __ctype + ;; + *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) + # Compiler inserts libc in the correct place for threads to work + ;; + *) + # Add libc to deplibs on all other systems if necessary. + if test "$build_libtool_need_lc" = "yes"; then + func_append deplibs " -lc" + fi + ;; + esac + fi + + # Transform deplibs into only deplibs that can be linked in shared. + name_save=$name + libname_save=$libname + release_save=$release + versuffix_save=$versuffix + major_save=$major + # I'm not sure if I'm treating the release correctly. I think + # release should show up in the -l (ie -lgmp5) so we don't want to + # add it in twice. Is that correct? + release="" + versuffix="" + major="" + newdeplibs= + droppeddeps=no + case $deplibs_check_method in + pass_all) + # Don't check for shared/static. Everything works. + # This might be a little naive. We might want to check + # whether the library exists or not. But this is on + # osf3 & osf4 and I'm not really sure... Just + # implementing what was already the behavior. + newdeplibs=$deplibs + ;; + test_compile) + # This code stresses the "libraries are programs" paradigm to its + # limits. Maybe even breaks it. We compile a program, linking it + # against the deplibs as a proxy for the library. Then we can check + # whether they linked in statically or dynamically with ldd. + $opt_dry_run || $RM conftest.c + cat > conftest.c </dev/null` + $nocaseglob + else + potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null` + fi + for potent_lib in $potential_libs; do + # Follow soft links. + if ls -lLd "$potent_lib" 2>/dev/null | + $GREP " -> " >/dev/null; then + continue + fi + # The statement above tries to avoid entering an + # endless loop below, in case of cyclic links. + # We might still enter an endless loop, since a link + # loop can be closed while we follow links, + # but so what? + potlib="$potent_lib" + while test -h "$potlib" 2>/dev/null; do + potliblink=`ls -ld $potlib | ${SED} 's/.* -> //'` + case $potliblink in + [\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";; + *) potlib=`$ECHO "$potlib" | $SED 's,[^/]*$,,'`"$potliblink";; + esac + done + if eval $file_magic_cmd \"\$potlib\" 2>/dev/null | + $SED -e 10q | + $EGREP "$file_magic_regex" > /dev/null; then + func_append newdeplibs " $a_deplib" + a_deplib="" + break 2 + fi + done + done + fi + if test -n "$a_deplib" ; then + droppeddeps=yes + echo + $ECHO "*** Warning: linker path does not have real file for library $a_deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because I did check the linker path looking for a file starting" + if test -z "$potlib" ; then + $ECHO "*** with $libname but no candidates were found. (...for file magic test)" + else + $ECHO "*** with $libname and none of the candidates passed a file format test" + $ECHO "*** using a file magic. Last file checked: $potlib" + fi + fi + ;; + *) + # Add a -L argument. + func_append newdeplibs " $a_deplib" + ;; + esac + done # Gone through all deplibs. + ;; + match_pattern*) + set dummy $deplibs_check_method; shift + match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` + for a_deplib in $deplibs; do + case $a_deplib in + -l*) + func_stripname -l '' "$a_deplib" + name=$func_stripname_result + if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then + case " $predeps $postdeps " in + *" $a_deplib "*) + func_append newdeplibs " $a_deplib" + a_deplib="" + ;; + esac + fi + if test -n "$a_deplib" ; then + libname=`eval "\\$ECHO \"$libname_spec\""` + for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do + potential_libs=`ls $i/$libname[.-]* 2>/dev/null` + for potent_lib in $potential_libs; do + potlib="$potent_lib" # see symlink-check above in file_magic test + if eval "\$ECHO \"$potent_lib\"" 2>/dev/null | $SED 10q | \ + $EGREP "$match_pattern_regex" > /dev/null; then + func_append newdeplibs " $a_deplib" + a_deplib="" + break 2 + fi + done + done + fi + if test -n "$a_deplib" ; then + droppeddeps=yes + echo + $ECHO "*** Warning: linker path does not have real file for library $a_deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because I did check the linker path looking for a file starting" + if test -z "$potlib" ; then + $ECHO "*** with $libname but no candidates were found. (...for regex pattern test)" + else + $ECHO "*** with $libname and none of the candidates passed a file format test" + $ECHO "*** using a regex pattern. Last file checked: $potlib" + fi + fi + ;; + *) + # Add a -L argument. + func_append newdeplibs " $a_deplib" + ;; + esac + done # Gone through all deplibs. + ;; + none | unknown | *) + newdeplibs="" + tmp_deplibs=`$ECHO " $deplibs" | $SED 's/ -lc$//; s/ -[LR][^ ]*//g'` + if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then + for i in $predeps $postdeps ; do + # can't use Xsed below, because $i might contain '/' + tmp_deplibs=`$ECHO " $tmp_deplibs" | $SED "s,$i,,"` + done + fi + case $tmp_deplibs in + *[!\ \ ]*) + echo + if test "X$deplibs_check_method" = "Xnone"; then + echo "*** Warning: inter-library dependencies are not supported in this platform." + else + echo "*** Warning: inter-library dependencies are not known to be supported." + fi + echo "*** All declared inter-library dependencies are being dropped." + droppeddeps=yes + ;; + esac + ;; + esac + versuffix=$versuffix_save + major=$major_save + release=$release_save + libname=$libname_save + name=$name_save + + case $host in + *-*-rhapsody* | *-*-darwin1.[012]) + # On Rhapsody replace the C library with the System framework + newdeplibs=`$ECHO " $newdeplibs" | $SED 's/ -lc / System.ltframework /'` + ;; + esac + + if test "$droppeddeps" = yes; then + if test "$module" = yes; then + echo + echo "*** Warning: libtool could not satisfy all declared inter-library" + $ECHO "*** dependencies of module $libname. Therefore, libtool will create" + echo "*** a static module, that should work as long as the dlopening" + echo "*** application is linked with the -dlopen flag." + if test -z "$global_symbol_pipe"; then + echo + echo "*** However, this would only work if libtool was able to extract symbol" + echo "*** lists from a program, using \`nm' or equivalent, but libtool could" + echo "*** not find such a program. So, this module is probably useless." + echo "*** \`nm' from GNU binutils and a full rebuild may help." + fi + if test "$build_old_libs" = no; then + oldlibs="$output_objdir/$libname.$libext" + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + else + echo "*** The inter-library dependencies that have been dropped here will be" + echo "*** automatically added whenever a program is linked with this library" + echo "*** or is declared to -dlopen it." + + if test "$allow_undefined" = no; then + echo + echo "*** Since this library must not contain undefined symbols," + echo "*** because either the platform does not support them or" + echo "*** it was explicitly requested with -no-undefined," + echo "*** libtool will only create a static version of it." + if test "$build_old_libs" = no; then + oldlibs="$output_objdir/$libname.$libext" + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + fi + fi + fi + # Done checking deplibs! + deplibs=$newdeplibs + fi + # Time to change all our "foo.ltframework" stuff back to "-framework foo" + case $host in + *-*-darwin*) + newdeplibs=`$ECHO " $newdeplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + new_inherited_linker_flags=`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + deplibs=`$ECHO " $deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + ;; + esac + + # move library search paths that coincide with paths to not yet + # installed libraries to the beginning of the library search list + new_libs= + for path in $notinst_path; do + case " $new_libs " in + *" -L$path/$objdir "*) ;; + *) + case " $deplibs " in + *" -L$path/$objdir "*) + func_append new_libs " -L$path/$objdir" ;; + esac + ;; + esac + done + for deplib in $deplibs; do + case $deplib in + -L*) + case " $new_libs " in + *" $deplib "*) ;; + *) func_append new_libs " $deplib" ;; + esac + ;; + *) func_append new_libs " $deplib" ;; + esac + done + deplibs="$new_libs" + + # All the library-specific variables (install_libdir is set above). + library_names= + old_library= + dlname= + + # Test again, we may have decided not to build it any more + if test "$build_libtool_libs" = yes; then + # Remove ${wl} instances when linking with ld. + # FIXME: should test the right _cmds variable. + case $archive_cmds in + *\$LD\ *) wl= ;; + esac + if test "$hardcode_into_libs" = yes; then + # Hardcode the library paths + hardcode_libdirs= + dep_rpath= + rpath="$finalize_rpath" + test "$opt_mode" != relink && rpath="$compile_rpath$rpath" + for libdir in $rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + func_replace_sysroot "$libdir" + libdir=$func_replace_sysroot_result + if test -z "$hardcode_libdirs"; then + hardcode_libdirs="$libdir" + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append dep_rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) func_append perm_rpath " $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir="$hardcode_libdirs" + eval "dep_rpath=\"$hardcode_libdir_flag_spec\"" + fi + if test -n "$runpath_var" && test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + func_append rpath "$dir:" + done + eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var" + fi + test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs" + fi + + shlibpath="$finalize_shlibpath" + test "$opt_mode" != relink && shlibpath="$compile_shlibpath$shlibpath" + if test -n "$shlibpath"; then + eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var" + fi + + # Get the real and link names of the library. + eval shared_ext=\"$shrext_cmds\" + eval library_names=\"$library_names_spec\" + set dummy $library_names + shift + realname="$1" + shift + + if test -n "$soname_spec"; then + eval soname=\"$soname_spec\" + else + soname="$realname" + fi + if test -z "$dlname"; then + dlname=$soname + fi + + lib="$output_objdir/$realname" + linknames= + for link + do + func_append linknames " $link" + done + + # Use standard objects if they are pic + test -z "$pic_flag" && libobjs=`$ECHO "$libobjs" | $SP2NL | $SED "$lo2o" | $NL2SP` + test "X$libobjs" = "X " && libobjs= + + delfiles= + if test -n "$export_symbols" && test -n "$include_expsyms"; then + $opt_dry_run || cp "$export_symbols" "$output_objdir/$libname.uexp" + export_symbols="$output_objdir/$libname.uexp" + func_append delfiles " $export_symbols" + fi + + orig_export_symbols= + case $host_os in + cygwin* | mingw* | cegcc*) + if test -n "$export_symbols" && test -z "$export_symbols_regex"; then + # exporting using user supplied symfile + if test "x`$SED 1q $export_symbols`" != xEXPORTS; then + # and it's NOT already a .def file. Must figure out + # which of the given symbols are data symbols and tag + # them as such. So, trigger use of export_symbols_cmds. + # export_symbols gets reassigned inside the "prepare + # the list of exported symbols" if statement, so the + # include_expsyms logic still works. + orig_export_symbols="$export_symbols" + export_symbols= + always_export_symbols=yes + fi + fi + ;; + esac + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then + func_verbose "generating symbol list for \`$libname.la'" + export_symbols="$output_objdir/$libname.exp" + $opt_dry_run || $RM $export_symbols + cmds=$export_symbols_cmds + save_ifs="$IFS"; IFS='~' + for cmd1 in $cmds; do + IFS="$save_ifs" + # Take the normal branch if the nm_file_list_spec branch + # doesn't work or if tool conversion is not needed. + case $nm_file_list_spec~$to_tool_file_cmd in + *~func_convert_file_noop | *~func_convert_file_msys_to_w32 | ~*) + try_normal_branch=yes + eval cmd=\"$cmd1\" + func_len " $cmd" + len=$func_len_result + ;; + *) + try_normal_branch=no + ;; + esac + if test "$try_normal_branch" = yes \ + && { test "$len" -lt "$max_cmd_len" \ + || test "$max_cmd_len" -le -1; } + then + func_show_eval "$cmd" 'exit $?' + skipped_export=false + elif test -n "$nm_file_list_spec"; then + func_basename "$output" + output_la=$func_basename_result + save_libobjs=$libobjs + save_output=$output + output=${output_objdir}/${output_la}.nm + func_to_tool_file "$output" + libobjs=$nm_file_list_spec$func_to_tool_file_result + func_append delfiles " $output" + func_verbose "creating $NM input file list: $output" + for obj in $save_libobjs; do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" + done > "$output" + eval cmd=\"$cmd1\" + func_show_eval "$cmd" 'exit $?' + output=$save_output + libobjs=$save_libobjs + skipped_export=false + else + # The command line is too long to execute in one step. + func_verbose "using reloadable object file for export list..." + skipped_export=: + # Break out early, otherwise skipped_export may be + # set to false by a later but shorter cmd. + break + fi + done + IFS="$save_ifs" + if test -n "$export_symbols_regex" && test "X$skipped_export" != "X:"; then + func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' + func_show_eval '$MV "${export_symbols}T" "$export_symbols"' + fi + fi + fi + + if test -n "$export_symbols" && test -n "$include_expsyms"; then + tmp_export_symbols="$export_symbols" + test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols" + $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' + fi + + if test "X$skipped_export" != "X:" && test -n "$orig_export_symbols"; then + # The given exports_symbols file has to be filtered, so filter it. + func_verbose "filter symbol list for \`$libname.la' to tag DATA exports" + # FIXME: $output_objdir/$libname.filter potentially contains lots of + # 's' commands which not all seds can handle. GNU sed should be fine + # though. Also, the filter scales superlinearly with the number of + # global variables. join(1) would be nice here, but unfortunately + # isn't a blessed tool. + $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter + func_append delfiles " $export_symbols $output_objdir/$libname.filter" + export_symbols=$output_objdir/$libname.def + $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols + fi + + tmp_deplibs= + for test_deplib in $deplibs; do + case " $convenience " in + *" $test_deplib "*) ;; + *) + func_append tmp_deplibs " $test_deplib" + ;; + esac + done + deplibs="$tmp_deplibs" + + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec" && + test "$compiler_needs_object" = yes && + test -z "$libobjs"; then + # extract the archives, so we have objects to list. + # TODO: could optimize this to just extract one archive. + whole_archive_flag_spec= + fi + if test -n "$whole_archive_flag_spec"; then + save_libobjs=$libobjs + eval libobjs=\"\$libobjs $whole_archive_flag_spec\" + test "X$libobjs" = "X " && libobjs= + else + gentop="$output_objdir/${outputname}x" + func_append generated " $gentop" + + func_extract_archives $gentop $convenience + func_append libobjs " $func_extract_archives_result" + test "X$libobjs" = "X " && libobjs= + fi + fi + + if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then + eval flag=\"$thread_safe_flag_spec\" + func_append linker_flags " $flag" + fi + + # Make a backup of the uninstalled library when relinking + if test "$opt_mode" = relink; then + $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}U && $MV $realname ${realname}U)' || exit $? + fi + + # Do each of the archive commands. + if test "$module" = yes && test -n "$module_cmds" ; then + if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then + eval test_cmds=\"$module_expsym_cmds\" + cmds=$module_expsym_cmds + else + eval test_cmds=\"$module_cmds\" + cmds=$module_cmds + fi + else + if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then + eval test_cmds=\"$archive_expsym_cmds\" + cmds=$archive_expsym_cmds + else + eval test_cmds=\"$archive_cmds\" + cmds=$archive_cmds + fi + fi + + if test "X$skipped_export" != "X:" && + func_len " $test_cmds" && + len=$func_len_result && + test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then + : + else + # The command line is too long to link in one step, link piecewise + # or, if using GNU ld and skipped_export is not :, use a linker + # script. + + # Save the value of $output and $libobjs because we want to + # use them later. If we have whole_archive_flag_spec, we + # want to use save_libobjs as it was before + # whole_archive_flag_spec was expanded, because we can't + # assume the linker understands whole_archive_flag_spec. + # This may have to be revisited, in case too many + # convenience libraries get linked in and end up exceeding + # the spec. + if test -z "$convenience" || test -z "$whole_archive_flag_spec"; then + save_libobjs=$libobjs + fi + save_output=$output + func_basename "$output" + output_la=$func_basename_result + + # Clear the reloadable object creation command queue and + # initialize k to one. + test_cmds= + concat_cmds= + objlist= + last_robj= + k=1 + + if test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "$with_gnu_ld" = yes; then + output=${output_objdir}/${output_la}.lnkscript + func_verbose "creating GNU ld script: $output" + echo 'INPUT (' > $output + for obj in $save_libobjs + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" >> $output + done + echo ')' >> $output + func_append delfiles " $output" + func_to_tool_file "$output" + output=$func_to_tool_file_result + elif test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "X$file_list_spec" != X; then + output=${output_objdir}/${output_la}.lnk + func_verbose "creating linker input file list: $output" + : > $output + set x $save_libobjs + shift + firstobj= + if test "$compiler_needs_object" = yes; then + firstobj="$1 " + shift + fi + for obj + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" >> $output + done + func_append delfiles " $output" + func_to_tool_file "$output" + output=$firstobj\"$file_list_spec$func_to_tool_file_result\" + else + if test -n "$save_libobjs"; then + func_verbose "creating reloadable object files..." + output=$output_objdir/$output_la-${k}.$objext + eval test_cmds=\"$reload_cmds\" + func_len " $test_cmds" + len0=$func_len_result + len=$len0 + + # Loop over the list of objects to be linked. + for obj in $save_libobjs + do + func_len " $obj" + func_arith $len + $func_len_result + len=$func_arith_result + if test "X$objlist" = X || + test "$len" -lt "$max_cmd_len"; then + func_append objlist " $obj" + else + # The command $test_cmds is almost too long, add a + # command to the queue. + if test "$k" -eq 1 ; then + # The first file doesn't have a previous command to add. + reload_objs=$objlist + eval concat_cmds=\"$reload_cmds\" + else + # All subsequent reloadable object files will link in + # the last one created. + reload_objs="$objlist $last_robj" + eval concat_cmds=\"\$concat_cmds~$reload_cmds~\$RM $last_robj\" + fi + last_robj=$output_objdir/$output_la-${k}.$objext + func_arith $k + 1 + k=$func_arith_result + output=$output_objdir/$output_la-${k}.$objext + objlist=" $obj" + func_len " $last_robj" + func_arith $len0 + $func_len_result + len=$func_arith_result + fi + done + # Handle the remaining objects by creating one last + # reloadable object file. All subsequent reloadable object + # files will link in the last one created. + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + reload_objs="$objlist $last_robj" + eval concat_cmds=\"\${concat_cmds}$reload_cmds\" + if test -n "$last_robj"; then + eval concat_cmds=\"\${concat_cmds}~\$RM $last_robj\" + fi + func_append delfiles " $output" + + else + output= + fi + + if ${skipped_export-false}; then + func_verbose "generating symbol list for \`$libname.la'" + export_symbols="$output_objdir/$libname.exp" + $opt_dry_run || $RM $export_symbols + libobjs=$output + # Append the command to create the export file. + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + eval concat_cmds=\"\$concat_cmds$export_symbols_cmds\" + if test -n "$last_robj"; then + eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\" + fi + fi + + test -n "$save_libobjs" && + func_verbose "creating a temporary reloadable object file: $output" + + # Loop through the commands generated above and execute them. + save_ifs="$IFS"; IFS='~' + for cmd in $concat_cmds; do + IFS="$save_ifs" + $opt_silent || { + func_quote_for_expand "$cmd" + eval "func_echo $func_quote_for_expand_result" + } + $opt_dry_run || eval "$cmd" || { + lt_exit=$? + + # Restore the uninstalled library and exit + if test "$opt_mode" = relink; then + ( cd "$output_objdir" && \ + $RM "${realname}T" && \ + $MV "${realname}U" "$realname" ) + fi + + exit $lt_exit + } + done + IFS="$save_ifs" + + if test -n "$export_symbols_regex" && ${skipped_export-false}; then + func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' + func_show_eval '$MV "${export_symbols}T" "$export_symbols"' + fi + fi + + if ${skipped_export-false}; then + if test -n "$export_symbols" && test -n "$include_expsyms"; then + tmp_export_symbols="$export_symbols" + test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols" + $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' + fi + + if test -n "$orig_export_symbols"; then + # The given exports_symbols file has to be filtered, so filter it. + func_verbose "filter symbol list for \`$libname.la' to tag DATA exports" + # FIXME: $output_objdir/$libname.filter potentially contains lots of + # 's' commands which not all seds can handle. GNU sed should be fine + # though. Also, the filter scales superlinearly with the number of + # global variables. join(1) would be nice here, but unfortunately + # isn't a blessed tool. + $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter + func_append delfiles " $export_symbols $output_objdir/$libname.filter" + export_symbols=$output_objdir/$libname.def + $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols + fi + fi + + libobjs=$output + # Restore the value of output. + output=$save_output + + if test -n "$convenience" && test -n "$whole_archive_flag_spec"; then + eval libobjs=\"\$libobjs $whole_archive_flag_spec\" + test "X$libobjs" = "X " && libobjs= + fi + # Expand the library linking commands again to reset the + # value of $libobjs for piecewise linking. + + # Do each of the archive commands. + if test "$module" = yes && test -n "$module_cmds" ; then + if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then + cmds=$module_expsym_cmds + else + cmds=$module_cmds + fi + else + if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then + cmds=$archive_expsym_cmds + else + cmds=$archive_cmds + fi + fi + fi + + if test -n "$delfiles"; then + # Append the command to remove temporary files to $cmds. + eval cmds=\"\$cmds~\$RM $delfiles\" + fi + + # Add any objects from preloaded convenience libraries + if test -n "$dlprefiles"; then + gentop="$output_objdir/${outputname}x" + func_append generated " $gentop" + + func_extract_archives $gentop $dlprefiles + func_append libobjs " $func_extract_archives_result" + test "X$libobjs" = "X " && libobjs= + fi + + save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + eval cmd=\"$cmd\" + $opt_silent || { + func_quote_for_expand "$cmd" + eval "func_echo $func_quote_for_expand_result" + } + $opt_dry_run || eval "$cmd" || { + lt_exit=$? + + # Restore the uninstalled library and exit + if test "$opt_mode" = relink; then + ( cd "$output_objdir" && \ + $RM "${realname}T" && \ + $MV "${realname}U" "$realname" ) + fi + + exit $lt_exit + } + done + IFS="$save_ifs" + + # Restore the uninstalled library and exit + if test "$opt_mode" = relink; then + $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}T && $MV $realname ${realname}T && $MV ${realname}U $realname)' || exit $? + + if test -n "$convenience"; then + if test -z "$whole_archive_flag_spec"; then + func_show_eval '${RM}r "$gentop"' + fi + fi + + exit $EXIT_SUCCESS + fi + + # Create links to the real library. + for linkname in $linknames; do + if test "$realname" != "$linkname"; then + func_show_eval '(cd "$output_objdir" && $RM "$linkname" && $LN_S "$realname" "$linkname")' 'exit $?' + fi + done + + # If -module or -export-dynamic was specified, set the dlname. + if test "$module" = yes || test "$export_dynamic" = yes; then + # On all known operating systems, these are identical. + dlname="$soname" + fi + fi + ;; + + obj) + if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then + func_warning "\`-dlopen' is ignored for objects" + fi + + case " $deplibs" in + *\ -l* | *\ -L*) + func_warning "\`-l' and \`-L' are ignored for objects" ;; + esac + + test -n "$rpath" && \ + func_warning "\`-rpath' is ignored for objects" + + test -n "$xrpath" && \ + func_warning "\`-R' is ignored for objects" + + test -n "$vinfo" && \ + func_warning "\`-version-info' is ignored for objects" + + test -n "$release" && \ + func_warning "\`-release' is ignored for objects" + + case $output in + *.lo) + test -n "$objs$old_deplibs" && \ + func_fatal_error "cannot build library object \`$output' from non-libtool objects" + + libobj=$output + func_lo2o "$libobj" + obj=$func_lo2o_result + ;; + *) + libobj= + obj="$output" + ;; + esac + + # Delete the old objects. + $opt_dry_run || $RM $obj $libobj + + # Objects from convenience libraries. This assumes + # single-version convenience libraries. Whenever we create + # different ones for PIC/non-PIC, this we'll have to duplicate + # the extraction. + reload_conv_objs= + gentop= + # reload_cmds runs $LD directly, so let us get rid of + # -Wl from whole_archive_flag_spec and hope we can get by with + # turning comma into space.. + wl= + + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec"; then + eval tmp_whole_archive_flags=\"$whole_archive_flag_spec\" + reload_conv_objs=$reload_objs\ `$ECHO "$tmp_whole_archive_flags" | $SED 's|,| |g'` + else + gentop="$output_objdir/${obj}x" + func_append generated " $gentop" + + func_extract_archives $gentop $convenience + reload_conv_objs="$reload_objs $func_extract_archives_result" + fi + fi + + # If we're not building shared, we need to use non_pic_objs + test "$build_libtool_libs" != yes && libobjs="$non_pic_objects" + + # Create the old-style object. + reload_objs="$objs$old_deplibs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.${libext}$/d; /\.lib$/d; $lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test + + output="$obj" + func_execute_cmds "$reload_cmds" 'exit $?' + + # Exit if we aren't doing a library object file. + if test -z "$libobj"; then + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + exit $EXIT_SUCCESS + fi + + if test "$build_libtool_libs" != yes; then + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + # Create an invalid libtool object if no PIC, so that we don't + # accidentally link it into a program. + # $show "echo timestamp > $libobj" + # $opt_dry_run || eval "echo timestamp > $libobj" || exit $? + exit $EXIT_SUCCESS + fi + + if test -n "$pic_flag" || test "$pic_mode" != default; then + # Only do commands if we really have different PIC objects. + reload_objs="$libobjs $reload_conv_objs" + output="$libobj" + func_execute_cmds "$reload_cmds" 'exit $?' + fi + + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + exit $EXIT_SUCCESS + ;; + + prog) + case $host in + *cygwin*) func_stripname '' '.exe' "$output" + output=$func_stripname_result.exe;; + esac + test -n "$vinfo" && \ + func_warning "\`-version-info' is ignored for programs" + + test -n "$release" && \ + func_warning "\`-release' is ignored for programs" + + test "$preload" = yes \ + && test "$dlopen_support" = unknown \ + && test "$dlopen_self" = unknown \ + && test "$dlopen_self_static" = unknown && \ + func_warning "\`LT_INIT([dlopen])' not used. Assuming no dlopen support." + + case $host in + *-*-rhapsody* | *-*-darwin1.[012]) + # On Rhapsody replace the C library is the System framework + compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's/ -lc / System.ltframework /'` + finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's/ -lc / System.ltframework /'` + ;; + esac + + case $host in + *-*-darwin*) + # Don't allow lazy linking, it breaks C++ global constructors + # But is supposedly fixed on 10.4 or later (yay!). + if test "$tagname" = CXX ; then + case ${MACOSX_DEPLOYMENT_TARGET-10.0} in + 10.[0123]) + func_append compile_command " ${wl}-bind_at_load" + func_append finalize_command " ${wl}-bind_at_load" + ;; + esac + fi + # Time to change all our "foo.ltframework" stuff back to "-framework foo" + compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + ;; + esac + + + # move library search paths that coincide with paths to not yet + # installed libraries to the beginning of the library search list + new_libs= + for path in $notinst_path; do + case " $new_libs " in + *" -L$path/$objdir "*) ;; + *) + case " $compile_deplibs " in + *" -L$path/$objdir "*) + func_append new_libs " -L$path/$objdir" ;; + esac + ;; + esac + done + for deplib in $compile_deplibs; do + case $deplib in + -L*) + case " $new_libs " in + *" $deplib "*) ;; + *) func_append new_libs " $deplib" ;; + esac + ;; + *) func_append new_libs " $deplib" ;; + esac + done + compile_deplibs="$new_libs" + + + func_append compile_command " $compile_deplibs" + func_append finalize_command " $finalize_deplibs" + + if test -n "$rpath$xrpath"; then + # If the user specified any rpath flags, then add them. + for libdir in $rpath $xrpath; do + # This is the magic to use -rpath. + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + done + fi + + # Now hardcode the library paths + rpath= + hardcode_libdirs= + for libdir in $compile_rpath $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs="$libdir" + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) func_append perm_rpath " $libdir" ;; + esac + fi + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + testbindir=`${ECHO} "$libdir" | ${SED} -e 's*/lib$*/bin*'` + case :$dllsearchpath: in + *":$libdir:"*) ;; + ::) dllsearchpath=$libdir;; + *) func_append dllsearchpath ":$libdir";; + esac + case :$dllsearchpath: in + *":$testbindir:"*) ;; + ::) dllsearchpath=$testbindir;; + *) func_append dllsearchpath ":$testbindir";; + esac + ;; + esac + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir="$hardcode_libdirs" + eval rpath=\" $hardcode_libdir_flag_spec\" + fi + compile_rpath="$rpath" + + rpath= + hardcode_libdirs= + for libdir in $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs="$libdir" + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$finalize_perm_rpath " in + *" $libdir "*) ;; + *) func_append finalize_perm_rpath " $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir="$hardcode_libdirs" + eval rpath=\" $hardcode_libdir_flag_spec\" + fi + finalize_rpath="$rpath" + + if test -n "$libobjs" && test "$build_old_libs" = yes; then + # Transform all the library objects into standard objects. + compile_command=`$ECHO "$compile_command" | $SP2NL | $SED "$lo2o" | $NL2SP` + finalize_command=`$ECHO "$finalize_command" | $SP2NL | $SED "$lo2o" | $NL2SP` + fi + + func_generate_dlsyms "$outputname" "@PROGRAM@" "no" + + # template prelinking step + if test -n "$prelink_cmds"; then + func_execute_cmds "$prelink_cmds" 'exit $?' + fi + + wrappers_required=yes + case $host in + *cegcc* | *mingw32ce*) + # Disable wrappers for cegcc and mingw32ce hosts, we are cross compiling anyway. + wrappers_required=no + ;; + *cygwin* | *mingw* ) + if test "$build_libtool_libs" != yes; then + wrappers_required=no + fi + ;; + *) + if test "$need_relink" = no || test "$build_libtool_libs" != yes; then + wrappers_required=no + fi + ;; + esac + if test "$wrappers_required" = no; then + # Replace the output file specification. + compile_command=`$ECHO "$compile_command" | $SED 's%@OUTPUT@%'"$output"'%g'` + link_command="$compile_command$compile_rpath" + + # We have no uninstalled library dependencies, so finalize right now. + exit_status=0 + func_show_eval "$link_command" 'exit_status=$?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + # Delete the generated files. + if test -f "$output_objdir/${outputname}S.${objext}"; then + func_show_eval '$RM "$output_objdir/${outputname}S.${objext}"' + fi + + exit $exit_status + fi + + if test -n "$compile_shlibpath$finalize_shlibpath"; then + compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command" + fi + if test -n "$finalize_shlibpath"; then + finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command" + fi + + compile_var= + finalize_var= + if test -n "$runpath_var"; then + if test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + func_append rpath "$dir:" + done + compile_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi + if test -n "$finalize_perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $finalize_perm_rpath; do + func_append rpath "$dir:" + done + finalize_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi + fi + + if test "$no_install" = yes; then + # We don't need to create a wrapper script. + link_command="$compile_var$compile_command$compile_rpath" + # Replace the output file specification. + link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output"'%g'` + # Delete the old output file. + $opt_dry_run || $RM $output + # Link the executable and exit + func_show_eval "$link_command" 'exit $?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + exit $EXIT_SUCCESS + fi + + if test "$hardcode_action" = relink; then + # Fast installation is not supported + link_command="$compile_var$compile_command$compile_rpath" + relink_command="$finalize_var$finalize_command$finalize_rpath" + + func_warning "this platform does not like uninstalled shared libraries" + func_warning "\`$output' will be relinked during installation" + else + if test "$fast_install" != no; then + link_command="$finalize_var$compile_command$finalize_rpath" + if test "$fast_install" = yes; then + relink_command=`$ECHO "$compile_var$compile_command$compile_rpath" | $SED 's%@OUTPUT@%\$progdir/\$file%g'` + else + # fast_install is set to needless + relink_command= + fi + else + link_command="$compile_var$compile_command$compile_rpath" + relink_command="$finalize_var$finalize_command$finalize_rpath" + fi + fi + + # Replace the output file specification. + link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'` + + # Delete the old output files. + $opt_dry_run || $RM $output $output_objdir/$outputname $output_objdir/lt-$outputname + + func_show_eval "$link_command" 'exit $?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output_objdir/$outputname" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + # Now create the wrapper script. + func_verbose "creating $output" + + # Quote the relink command for shipping. + if test -n "$relink_command"; then + # Preserve any variables that may affect compiler behavior + for var in $variables_saved_for_relink; do + if eval test -z \"\${$var+set}\"; then + relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" + elif eval var_value=\$$var; test -z "$var_value"; then + relink_command="$var=; export $var; $relink_command" + else + func_quote_for_eval "$var_value" + relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command" + fi + done + relink_command="(cd `pwd`; $relink_command)" + relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"` + fi + + # Only actually do things if not in dry run mode. + $opt_dry_run || { + # win32 will think the script is a binary if it has + # a .exe suffix, so we strip it off here. + case $output in + *.exe) func_stripname '' '.exe' "$output" + output=$func_stripname_result ;; + esac + # test for cygwin because mv fails w/o .exe extensions + case $host in + *cygwin*) + exeext=.exe + func_stripname '' '.exe' "$outputname" + outputname=$func_stripname_result ;; + *) exeext= ;; + esac + case $host in + *cygwin* | *mingw* ) + func_dirname_and_basename "$output" "" "." + output_name=$func_basename_result + output_path=$func_dirname_result + cwrappersource="$output_path/$objdir/lt-$output_name.c" + cwrapper="$output_path/$output_name.exe" + $RM $cwrappersource $cwrapper + trap "$RM $cwrappersource $cwrapper; exit $EXIT_FAILURE" 1 2 15 + + func_emit_cwrapperexe_src > $cwrappersource + + # The wrapper executable is built using the $host compiler, + # because it contains $host paths and files. If cross- + # compiling, it, like the target executable, must be + # executed on the $host or under an emulation environment. + $opt_dry_run || { + $LTCC $LTCFLAGS -o $cwrapper $cwrappersource + $STRIP $cwrapper + } + + # Now, create the wrapper script for func_source use: + func_ltwrapper_scriptname $cwrapper + $RM $func_ltwrapper_scriptname_result + trap "$RM $func_ltwrapper_scriptname_result; exit $EXIT_FAILURE" 1 2 15 + $opt_dry_run || { + # note: this script will not be executed, so do not chmod. + if test "x$build" = "x$host" ; then + $cwrapper --lt-dump-script > $func_ltwrapper_scriptname_result + else + func_emit_wrapper no > $func_ltwrapper_scriptname_result + fi + } + ;; + * ) + $RM $output + trap "$RM $output; exit $EXIT_FAILURE" 1 2 15 + + func_emit_wrapper no > $output + chmod +x $output + ;; + esac + } + exit $EXIT_SUCCESS + ;; + esac + + # See if we need to build an old-fashioned archive. + for oldlib in $oldlibs; do + + if test "$build_libtool_libs" = convenience; then + oldobjs="$libobjs_save $symfileobj" + addlibs="$convenience" + build_libtool_libs=no + else + if test "$build_libtool_libs" = module; then + oldobjs="$libobjs_save" + build_libtool_libs=no + else + oldobjs="$old_deplibs $non_pic_objects" + if test "$preload" = yes && test -f "$symfileobj"; then + func_append oldobjs " $symfileobj" + fi + fi + addlibs="$old_convenience" + fi + + if test -n "$addlibs"; then + gentop="$output_objdir/${outputname}x" + func_append generated " $gentop" + + func_extract_archives $gentop $addlibs + func_append oldobjs " $func_extract_archives_result" + fi + + # Do each command in the archive commands. + if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then + cmds=$old_archive_from_new_cmds + else + + # Add any objects from preloaded convenience libraries + if test -n "$dlprefiles"; then + gentop="$output_objdir/${outputname}x" + func_append generated " $gentop" + + func_extract_archives $gentop $dlprefiles + func_append oldobjs " $func_extract_archives_result" + fi + + # POSIX demands no paths to be encoded in archives. We have + # to avoid creating archives with duplicate basenames if we + # might have to extract them afterwards, e.g., when creating a + # static archive out of a convenience library, or when linking + # the entirety of a libtool archive into another (currently + # not supported by libtool). + if (for obj in $oldobjs + do + func_basename "$obj" + $ECHO "$func_basename_result" + done | sort | sort -uc >/dev/null 2>&1); then + : + else + echo "copying selected object files to avoid basename conflicts..." + gentop="$output_objdir/${outputname}x" + func_append generated " $gentop" + func_mkdir_p "$gentop" + save_oldobjs=$oldobjs + oldobjs= + counter=1 + for obj in $save_oldobjs + do + func_basename "$obj" + objbase="$func_basename_result" + case " $oldobjs " in + " ") oldobjs=$obj ;; + *[\ /]"$objbase "*) + while :; do + # Make sure we don't pick an alternate name that also + # overlaps. + newobj=lt$counter-$objbase + func_arith $counter + 1 + counter=$func_arith_result + case " $oldobjs " in + *[\ /]"$newobj "*) ;; + *) if test ! -f "$gentop/$newobj"; then break; fi ;; + esac + done + func_show_eval "ln $obj $gentop/$newobj || cp $obj $gentop/$newobj" + func_append oldobjs " $gentop/$newobj" + ;; + *) func_append oldobjs " $obj" ;; + esac + done + fi + func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 + tool_oldlib=$func_to_tool_file_result + eval cmds=\"$old_archive_cmds\" + + func_len " $cmds" + len=$func_len_result + if test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then + cmds=$old_archive_cmds + elif test -n "$archiver_list_spec"; then + func_verbose "using command file archive linking..." + for obj in $oldobjs + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" + done > $output_objdir/$libname.libcmd + func_to_tool_file "$output_objdir/$libname.libcmd" + oldobjs=" $archiver_list_spec$func_to_tool_file_result" + cmds=$old_archive_cmds + else + # the command line is too long to link in one step, link in parts + func_verbose "using piecewise archive linking..." + save_RANLIB=$RANLIB + RANLIB=: + objlist= + concat_cmds= + save_oldobjs=$oldobjs + oldobjs= + # Is there a better way of finding the last object in the list? + for obj in $save_oldobjs + do + last_oldobj=$obj + done + eval test_cmds=\"$old_archive_cmds\" + func_len " $test_cmds" + len0=$func_len_result + len=$len0 + for obj in $save_oldobjs + do + func_len " $obj" + func_arith $len + $func_len_result + len=$func_arith_result + func_append objlist " $obj" + if test "$len" -lt "$max_cmd_len"; then + : + else + # the above command should be used before it gets too long + oldobjs=$objlist + if test "$obj" = "$last_oldobj" ; then + RANLIB=$save_RANLIB + fi + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + eval concat_cmds=\"\${concat_cmds}$old_archive_cmds\" + objlist= + len=$len0 + fi + done + RANLIB=$save_RANLIB + oldobjs=$objlist + if test "X$oldobjs" = "X" ; then + eval cmds=\"\$concat_cmds\" + else + eval cmds=\"\$concat_cmds~\$old_archive_cmds\" + fi + fi + fi + func_execute_cmds "$cmds" 'exit $?' + done + + test -n "$generated" && \ + func_show_eval "${RM}r$generated" + + # Now create the libtool archive. + case $output in + *.la) + old_library= + test "$build_old_libs" = yes && old_library="$libname.$libext" + func_verbose "creating $output" + + # Preserve any variables that may affect compiler behavior + for var in $variables_saved_for_relink; do + if eval test -z \"\${$var+set}\"; then + relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" + elif eval var_value=\$$var; test -z "$var_value"; then + relink_command="$var=; export $var; $relink_command" + else + func_quote_for_eval "$var_value" + relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command" + fi + done + # Quote the link command for shipping. + relink_command="(cd `pwd`; $SHELL $progpath $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)" + relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"` + if test "$hardcode_automatic" = yes ; then + relink_command= + fi + + # Only create the output if not a dry run. + $opt_dry_run || { + for installed in no yes; do + if test "$installed" = yes; then + if test -z "$install_libdir"; then + break + fi + output="$output_objdir/$outputname"i + # Replace all uninstalled libtool libraries with the installed ones + newdependency_libs= + for deplib in $dependency_libs; do + case $deplib in + *.la) + func_basename "$deplib" + name="$func_basename_result" + func_resolve_sysroot "$deplib" + eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $func_resolve_sysroot_result` + test -z "$libdir" && \ + func_fatal_error "\`$deplib' is not a valid libtool archive" + func_append newdependency_libs " ${lt_sysroot:+=}$libdir/$name" + ;; + -L*) + func_stripname -L '' "$deplib" + func_replace_sysroot "$func_stripname_result" + func_append newdependency_libs " -L$func_replace_sysroot_result" + ;; + -R*) + func_stripname -R '' "$deplib" + func_replace_sysroot "$func_stripname_result" + func_append newdependency_libs " -R$func_replace_sysroot_result" + ;; + *) func_append newdependency_libs " $deplib" ;; + esac + done + dependency_libs="$newdependency_libs" + newdlfiles= + + for lib in $dlfiles; do + case $lib in + *.la) + func_basename "$lib" + name="$func_basename_result" + eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + test -z "$libdir" && \ + func_fatal_error "\`$lib' is not a valid libtool archive" + func_append newdlfiles " ${lt_sysroot:+=}$libdir/$name" + ;; + *) func_append newdlfiles " $lib" ;; + esac + done + dlfiles="$newdlfiles" + newdlprefiles= + for lib in $dlprefiles; do + case $lib in + *.la) + # Only pass preopened files to the pseudo-archive (for + # eventual linking with the app. that links it) if we + # didn't already link the preopened objects directly into + # the library: + func_basename "$lib" + name="$func_basename_result" + eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + test -z "$libdir" && \ + func_fatal_error "\`$lib' is not a valid libtool archive" + func_append newdlprefiles " ${lt_sysroot:+=}$libdir/$name" + ;; + esac + done + dlprefiles="$newdlprefiles" + else + newdlfiles= + for lib in $dlfiles; do + case $lib in + [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;; + *) abs=`pwd`"/$lib" ;; + esac + func_append newdlfiles " $abs" + done + dlfiles="$newdlfiles" + newdlprefiles= + for lib in $dlprefiles; do + case $lib in + [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;; + *) abs=`pwd`"/$lib" ;; + esac + func_append newdlprefiles " $abs" + done + dlprefiles="$newdlprefiles" + fi + $RM $output + # place dlname in correct position for cygwin + # In fact, it would be nice if we could use this code for all target + # systems that can't hard-code library paths into their executables + # and that have no shared library path variable independent of PATH, + # but it turns out we can't easily determine that from inspecting + # libtool variables, so we have to hard-code the OSs to which it + # applies here; at the moment, that means platforms that use the PE + # object format with DLL files. See the long comment at the top of + # tests/bindir.at for full details. + tdlname=$dlname + case $host,$output,$installed,$module,$dlname in + *cygwin*,*lai,yes,no,*.dll | *mingw*,*lai,yes,no,*.dll | *cegcc*,*lai,yes,no,*.dll) + # If a -bindir argument was supplied, place the dll there. + if test "x$bindir" != x ; + then + func_relative_path "$install_libdir" "$bindir" + tdlname=$func_relative_path_result$dlname + else + # Otherwise fall back on heuristic. + tdlname=../bin/$dlname + fi + ;; + esac + $ECHO > $output "\ +# $outputname - a libtool library file +# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +# The name that we can dlopen(3). +dlname='$tdlname' + +# Names of this library. +library_names='$library_names' + +# The name of the static archive. +old_library='$old_library' + +# Linker flags that can not go in dependency_libs. +inherited_linker_flags='$new_inherited_linker_flags' + +# Libraries that this one depends upon. +dependency_libs='$dependency_libs' + +# Names of additional weak libraries provided by this library +weak_library_names='$weak_libs' + +# Version information for $libname. +current=$current +age=$age +revision=$revision + +# Is this an already installed library? +installed=$installed + +# Should we warn about portability when linking against -modules? +shouldnotlink=$module + +# Files to dlopen/dlpreopen +dlopen='$dlfiles' +dlpreopen='$dlprefiles' + +# Directory that this library needs to be installed in: +libdir='$install_libdir'" + if test "$installed" = no && test "$need_relink" = yes; then + $ECHO >> $output "\ +relink_command=\"$relink_command\"" + fi + done + } + + # Do a symbolic link so that the libtool archive can be found in + # LD_LIBRARY_PATH before the program is installed. + func_show_eval '( cd "$output_objdir" && $RM "$outputname" && $LN_S "../$outputname" "$outputname" )' 'exit $?' + ;; + esac + exit $EXIT_SUCCESS +} + +{ test "$opt_mode" = link || test "$opt_mode" = relink; } && + func_mode_link ${1+"$@"} + + +# func_mode_uninstall arg... +func_mode_uninstall () +{ + $opt_debug + RM="$nonopt" + files= + rmforce= + exit_status=0 + + # This variable tells wrapper scripts just to set variables rather + # than running their programs. + libtool_install_magic="$magic" + + for arg + do + case $arg in + -f) func_append RM " $arg"; rmforce=yes ;; + -*) func_append RM " $arg" ;; + *) func_append files " $arg" ;; + esac + done + + test -z "$RM" && \ + func_fatal_help "you must specify an RM program" + + rmdirs= + + for file in $files; do + func_dirname "$file" "" "." + dir="$func_dirname_result" + if test "X$dir" = X.; then + odir="$objdir" + else + odir="$dir/$objdir" + fi + func_basename "$file" + name="$func_basename_result" + test "$opt_mode" = uninstall && odir="$dir" + + # Remember odir for removal later, being careful to avoid duplicates + if test "$opt_mode" = clean; then + case " $rmdirs " in + *" $odir "*) ;; + *) func_append rmdirs " $odir" ;; + esac + fi + + # Don't error if the file doesn't exist and rm -f was used. + if { test -L "$file"; } >/dev/null 2>&1 || + { test -h "$file"; } >/dev/null 2>&1 || + test -f "$file"; then + : + elif test -d "$file"; then + exit_status=1 + continue + elif test "$rmforce" = yes; then + continue + fi + + rmfiles="$file" + + case $name in + *.la) + # Possibly a libtool archive, so verify it. + if func_lalib_p "$file"; then + func_source $dir/$name + + # Delete the libtool libraries and symlinks. + for n in $library_names; do + func_append rmfiles " $odir/$n" + done + test -n "$old_library" && func_append rmfiles " $odir/$old_library" + + case "$opt_mode" in + clean) + case " $library_names " in + *" $dlname "*) ;; + *) test -n "$dlname" && func_append rmfiles " $odir/$dlname" ;; + esac + test -n "$libdir" && func_append rmfiles " $odir/$name $odir/${name}i" + ;; + uninstall) + if test -n "$library_names"; then + # Do each command in the postuninstall commands. + func_execute_cmds "$postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1' + fi + + if test -n "$old_library"; then + # Do each command in the old_postuninstall commands. + func_execute_cmds "$old_postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1' + fi + # FIXME: should reinstall the best remaining shared library. + ;; + esac + fi + ;; + + *.lo) + # Possibly a libtool object, so verify it. + if func_lalib_p "$file"; then + + # Read the .lo file + func_source $dir/$name + + # Add PIC object to the list of files to remove. + if test -n "$pic_object" && + test "$pic_object" != none; then + func_append rmfiles " $dir/$pic_object" + fi + + # Add non-PIC object to the list of files to remove. + if test -n "$non_pic_object" && + test "$non_pic_object" != none; then + func_append rmfiles " $dir/$non_pic_object" + fi + fi + ;; + + *) + if test "$opt_mode" = clean ; then + noexename=$name + case $file in + *.exe) + func_stripname '' '.exe' "$file" + file=$func_stripname_result + func_stripname '' '.exe' "$name" + noexename=$func_stripname_result + # $file with .exe has already been added to rmfiles, + # add $file without .exe + func_append rmfiles " $file" + ;; + esac + # Do a test to see if this is a libtool program. + if func_ltwrapper_p "$file"; then + if func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + relink_command= + func_source $func_ltwrapper_scriptname_result + func_append rmfiles " $func_ltwrapper_scriptname_result" + else + relink_command= + func_source $dir/$noexename + fi + + # note $name still contains .exe if it was in $file originally + # as does the version of $file that was added into $rmfiles + func_append rmfiles " $odir/$name $odir/${name}S.${objext}" + if test "$fast_install" = yes && test -n "$relink_command"; then + func_append rmfiles " $odir/lt-$name" + fi + if test "X$noexename" != "X$name" ; then + func_append rmfiles " $odir/lt-${noexename}.c" + fi + fi + fi + ;; + esac + func_show_eval "$RM $rmfiles" 'exit_status=1' + done + + # Try to remove the ${objdir}s in the directories where we deleted files + for dir in $rmdirs; do + if test -d "$dir"; then + func_show_eval "rmdir $dir >/dev/null 2>&1" + fi + done + + exit $exit_status +} + +{ test "$opt_mode" = uninstall || test "$opt_mode" = clean; } && + func_mode_uninstall ${1+"$@"} + +test -z "$opt_mode" && { + help="$generic_help" + func_fatal_help "you must specify a MODE" +} + +test -z "$exec_cmd" && \ + func_fatal_help "invalid operation mode \`$opt_mode'" + +if test -n "$exec_cmd"; then + eval exec "$exec_cmd" + exit $EXIT_FAILURE +fi + +exit $exit_status + + +# The TAGs below are defined such that we never get into a situation +# in which we disable both kinds of libraries. Given conflicting +# choices, we go for a static library, that is the most portable, +# since we can't tell whether shared libraries were disabled because +# the user asked for that or because the platform doesn't support +# them. This is particularly important on AIX, because we don't +# support having both static and shared libraries enabled at the same +# time on that platform, so we default to a shared-only configuration. +# If a disable-shared tag is given, we'll fallback to a static-only +# configuration. But we'll never go from static-only to shared-only. + +# ### BEGIN LIBTOOL TAG CONFIG: disable-shared +build_libtool_libs=no +build_old_libs=yes +# ### END LIBTOOL TAG CONFIG: disable-shared + +# ### BEGIN LIBTOOL TAG CONFIG: disable-static +build_old_libs=`case $build_libtool_libs in yes) echo no;; *) echo yes;; esac` +# ### END LIBTOOL TAG CONFIG: disable-static + +# Local Variables: +# mode:shell-script +# sh-indentation:2 +# End: +# vi:sw=2 + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/m4/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/m4/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,8 @@ +EXTRA_DIST = acx_mpi.m4 acx_pthread.m4 ax_cc_maxopt.m4 \ +ax_check_compiler_flags.m4 ax_compiler_vendor.m4 \ +ax_gcc_aligns_stack.m4 ax_gcc_version.m4 ax_openmp.m4 + +# libtool sticks a bunch of extra .m4 files in this directory, +# but they don't seem to be needed for the distributed tarball +# (they aren't needed for configure && make, and boostrapping +# will regenerate them anyway). diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/m4/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/m4/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,467 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = m4 +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +EXTRA_DIST = acx_mpi.m4 acx_pthread.m4 ax_cc_maxopt.m4 \ +ax_check_compiler_flags.m4 ax_compiler_vendor.m4 \ +ax_gcc_aligns_stack.m4 ax_gcc_version.m4 ax_openmp.m4 + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu m4/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu m4/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags-am uninstall uninstall-am + + +# libtool sticks a bunch of extra .m4 files in this directory, +# but they don't seem to be needed for the distributed tarball +# (they aren't needed for configure && make, and boostrapping +# will regenerate them anyway). + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/m4/acx_mpi.m4 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/m4/acx_mpi.m4 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,106 @@ +dnl @synopsis ACX_MPI([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) +dnl @summary figure out how to compile/link code with MPI +dnl @category InstalledPackages +dnl +dnl This macro tries to find out how to compile programs that +dnl use MPI (Message Passing Interface), a standard API for +dnl parallel process communication (see http://www-unix.mcs.anl.gov/mpi/) +dnl +dnl On success, it sets the MPICC, MPICXX, or MPIF77 output variable to +dnl the name of the MPI compiler, depending upon the current language. +dnl (This may just be $CC/$CXX/$F77, but is more often something like +dnl mpicc/mpiCC/mpif77.) It also sets MPILIBS to any libraries that are +dnl needed for linking MPI (e.g. -lmpi, if a special MPICC/MPICXX/MPIF77 +dnl was not found). +dnl +dnl If you want to compile everything with MPI, you should set: +dnl +dnl CC="$MPICC" #OR# CXX="$MPICXX" #OR# F77="$MPIF77" +dnl LIBS="$MPILIBS $LIBS" +dnl +dnl NOTE: The above assumes that you will use $CC (or whatever) +dnl for linking as well as for compiling. (This is the +dnl default for automake and most Makefiles.) +dnl +dnl The user can force a particular library/compiler by setting the +dnl MPICC/MPICXX/MPIF77 and/or MPILIBS environment variables. +dnl +dnl ACTION-IF-FOUND is a list of shell commands to run if an MPI +dnl library is found, and ACTION-IF-NOT-FOUND is a list of commands +dnl to run it if it is not found. If ACTION-IF-FOUND is not specified, +dnl the default action will define HAVE_MPI. +dnl +dnl @version 2005-09-02 +dnl @license GPLWithACException +dnl @author Steven G. Johnson + +AC_DEFUN([ACX_MPI], [ +AC_PREREQ(2.50) dnl for AC_LANG_CASE + +AC_LANG_CASE([C], [ + AC_REQUIRE([AC_PROG_CC]) + AC_ARG_VAR(MPICC,[MPI C compiler command]) + AC_CHECK_PROGS(MPICC, mpicc hcc mpcc mpcc_r mpxlc cmpicc, $CC) + acx_mpi_save_CC="$CC" + CC="$MPICC" + AC_SUBST(MPICC) +], +[C++], [ + AC_REQUIRE([AC_PROG_CXX]) + AC_ARG_VAR(MPICXX,[MPI C++ compiler command]) + AC_CHECK_PROGS(MPICXX, mpic++ mpiCC mpicxx mpCC hcp mpxlC mpxlC_r cmpic++, $CXX) + acx_mpi_save_CXX="$CXX" + CXX="$MPICXX" + AC_SUBST(MPICXX) +], +[Fortran 77], [ + AC_REQUIRE([AC_PROG_F77]) + AC_ARG_VAR(MPIF77,[MPI Fortran compiler command]) + AC_CHECK_PROGS(MPIF77, mpif77 hf77 mpxlf mpf77 mpif90 mpf90 mpxlf90 mpxlf95 mpxlf_r cmpifc cmpif90c, $F77) + acx_mpi_save_F77="$F77" + F77="$MPIF77" + AC_SUBST(MPIF77) +]) + +if test x = x"$MPILIBS"; then + AC_LANG_CASE([C], [AC_CHECK_FUNC(MPI_Init, [MPILIBS=" "])], + [C++], [AC_CHECK_FUNC(MPI_Init, [MPILIBS=" "])], + [Fortran 77], [AC_MSG_CHECKING([for MPI_Init]) + AC_TRY_LINK([],[ call MPI_Init], [MPILIBS=" " + AC_MSG_RESULT(yes)], [AC_MSG_RESULT(no)])]) +fi +if test x = x"$MPILIBS"; then + AC_CHECK_LIB(mpi, MPI_Init, [MPILIBS="-lmpi"]) +fi +if test x = x"$MPILIBS"; then + AC_CHECK_LIB(mpich, MPI_Init, [MPILIBS="-lmpich"]) +fi + +dnl We have to use AC_TRY_COMPILE and not AC_CHECK_HEADER because the +dnl latter uses $CPP, not $CC (which may be mpicc). +AC_LANG_CASE([C], [if test x != x"$MPILIBS"; then + AC_MSG_CHECKING([for mpi.h]) + AC_TRY_COMPILE([#include ],[],[AC_MSG_RESULT(yes)], [MPILIBS="" + AC_MSG_RESULT(no)]) +fi], +[C++], [if test x != x"$MPILIBS"; then + AC_MSG_CHECKING([for mpi.h]) + AC_TRY_COMPILE([#include ],[],[AC_MSG_RESULT(yes)], [MPILIBS="" + AC_MSG_RESULT(no)]) +fi]) + +AC_LANG_CASE([C], [CC="$acx_mpi_save_CC"], + [C++], [CXX="$acx_mpi_save_CXX"], + [Fortran 77], [F77="$acx_mpi_save_F77"]) + +AC_SUBST(MPILIBS) + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x = x"$MPILIBS"; then + $2 + : +else + ifelse([$1],,[AC_DEFINE(HAVE_MPI,1,[Define if you have the MPI library.])],[$1]) + : +fi +])dnl ACX_MPI diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/m4/acx_pthread.m4 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/m4/acx_pthread.m4 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,245 @@ +dnl @synopsis ACX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) +dnl @summary figure out how to build C programs using POSIX threads +dnl @category InstalledPackages +dnl +dnl This macro figures out how to build C programs using POSIX +dnl threads. It sets the PTHREAD_LIBS output variable to the threads +dnl library and linker flags, and the PTHREAD_CFLAGS output variable +dnl to any special C compiler flags that are needed. (The user can also +dnl force certain compiler flags/libs to be tested by setting these +dnl environment variables.) +dnl +dnl Also sets PTHREAD_CC to any special C compiler that is needed for +dnl multi-threaded programs (defaults to the value of CC otherwise). +dnl (This is necessary on AIX to use the special cc_r compiler alias.) +dnl +dnl NOTE: You are assumed to not only compile your program with these +dnl flags, but also link it with them as well. e.g. you should link +dnl with $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS +dnl +dnl If you are only building threads programs, you may wish to +dnl use these variables in your default LIBS, CFLAGS, and CC: +dnl +dnl LIBS="$PTHREAD_LIBS $LIBS" +dnl CFLAGS="$CFLAGS $PTHREAD_CFLAGS" +dnl CC="$PTHREAD_CC" +dnl +dnl In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute +dnl constant has a nonstandard name, defines PTHREAD_CREATE_JOINABLE +dnl to that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX). +dnl +dnl ACTION-IF-FOUND is a list of shell commands to run if a threads +dnl library is found, and ACTION-IF-NOT-FOUND is a list of commands +dnl to run it if it is not found. If ACTION-IF-FOUND is not specified, +dnl the default action will define HAVE_PTHREAD. +dnl +dnl Please let the authors know if this macro fails on any platform, +dnl or if you have any other suggestions or comments. This macro was +dnl based on work by SGJ on autoconf scripts for FFTW (www.fftw.org) +dnl (with help from M. Frigo), as well as ac_pthread and hb_pthread +dnl macros posted by Alejandro Forero Cuervo to the autoconf macro +dnl repository. We are also grateful for the helpful feedback of +dnl numerous users. +dnl +dnl @version 2006-09-15 +dnl @license GPLWithACException +dnl @author Steven G. Johnson + +AC_DEFUN([ACX_PTHREAD], [ +AC_REQUIRE([AC_CANONICAL_HOST]) +AC_LANG_SAVE +AC_LANG_C +acx_pthread_ok=no + +# We used to check for pthread.h first, but this fails if pthread.h +# requires special compiler flags (e.g. on True64 or Sequent). +# It gets checked for in the link test anyway. + +# First of all, check if the user has set any of the PTHREAD_LIBS, +# etcetera environment variables, and if threads linking works using +# them: +if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS]) + AC_TRY_LINK_FUNC(pthread_join, acx_pthread_ok=yes) + AC_MSG_RESULT($acx_pthread_ok) + if test x"$acx_pthread_ok" = xno; then + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" + fi + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" +fi + +# We must check for the threads library under a number of different +# names; the ordering is very important because some systems +# (e.g. DEC) have both -lpthread and -lpthreads, where one of the +# libraries is broken (non-POSIX). + +# Create a list of thread flags to try. Items starting with a "-" are +# C compiler flags, and other items are library names, except for "none" +# which indicates that we try without any flags at all, and "pthread-config" +# which is a program returning the flags for the Pth emulation library. + +acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mt -mthreads pthread --thread-safe pthread-config" + +# The ordering *is* (sometimes) important. Some notes on the +# individual items follow: + +# pthreads: AIX (must check this before -lpthread) +# none: in case threads are in libc; should be tried before -Kthread and +# other compiler flags to prevent continual compiler warnings +# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) +# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) +# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) +# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) +# -pthreads: Solaris/gcc +# -mthreads: Mingw32/gcc, Lynx/gcc +# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it +# doesn't hurt to check since this sometimes defines pthreads too; +# also defines -D_REENTRANT) +# ... -mt is also the pthreads flag for HP/aCC +# (where it should come before -mthreads to avoid spurious warnings) +# pthread: Linux, etcetera +# --thread-safe: KAI C++ +# pthread-config: use pthread-config program (for GNU Pth library) + +case "${host_cpu}-${host_os}" in + *solaris*) + + # On Solaris (at least, for some versions), libc contains stubbed + # (non-functional) versions of the pthreads routines, so link-based + # tests will erroneously succeed. (We need to link with -pthreads/-mt/ + # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather + # a function called by this macro, so we could check for that, but + # who knows whether they'll stub that too in a future libc.) So, + # we'll just look for -pthreads and -lpthread first: + + acx_pthread_flags="-pthreads pthread -mt -pthread $acx_pthread_flags" + ;; +esac + +if test x"$acx_pthread_ok" = xno; then +for flag in $acx_pthread_flags; do + + case $flag in + none) + AC_MSG_CHECKING([whether pthreads work without any flags]) + ;; + + -*) + AC_MSG_CHECKING([whether pthreads work with $flag]) + PTHREAD_CFLAGS="$flag" + ;; + + pthread-config) + AC_CHECK_PROG(acx_pthread_config, pthread-config, yes, no) + if test x"$acx_pthread_config" = xno; then continue; fi + PTHREAD_CFLAGS="`pthread-config --cflags`" + PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" + ;; + + *) + AC_MSG_CHECKING([for the pthreads library -l$flag]) + PTHREAD_LIBS="-l$flag" + ;; + esac + + save_LIBS="$LIBS" + save_CFLAGS="$CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Check for various functions. We must include pthread.h, + # since some functions may be macros. (On the Sequent, we + # need a special flag -Kthread to make this header compile.) + # We check for pthread_join because it is in -lpthread on IRIX + # while pthread_create is in libc. We check for pthread_attr_init + # due to DEC craziness with -lpthreads. We check for + # pthread_cleanup_push because it is one of the few pthread + # functions on Solaris that doesn't have a non-functional libc stub. + # We try pthread_create on general principles. + AC_TRY_LINK([#include ], + [pthread_t th; pthread_join(th, (void**) 0); + pthread_attr_init((pthread_attr_t*) 0); + pthread_cleanup_push((void(*)(void *)) 0, (void*) 0); + pthread_create((pthread_t*) 0, (pthread_attr_t*) 0, + (void*(*)(void *)) 0, (void*) 0); + pthread_cleanup_pop(0); ], + [acx_pthread_ok=yes]) + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + AC_MSG_RESULT($acx_pthread_ok) + if test "x$acx_pthread_ok" = xyes; then + break; + fi + + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" +done +fi + +# Various other checks: +if test "x$acx_pthread_ok" = xyes; then + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. + AC_MSG_CHECKING([for joinable pthread attribute]) + attr_name=unknown + for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do + AC_TRY_LINK([#include ], [int attr=$attr; return attr;], + [attr_name=$attr; break]) + done + AC_MSG_RESULT($attr_name) + if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then + AC_DEFINE_UNQUOTED(PTHREAD_CREATE_JOINABLE, $attr_name, + [Define to necessary symbol if this constant + uses a non-standard name on your system.]) + fi + + AC_MSG_CHECKING([if more special flags are required for pthreads]) + flag=no + case "${host_cpu}-${host_os}" in + *-aix* | *-freebsd* | *-darwin*) flag="-D_THREAD_SAFE";; + *solaris* | *-osf* | *-hpux*) flag="-D_REENTRANT";; + esac + AC_MSG_RESULT(${flag}) + if test "x$flag" != xno; then + PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" + fi + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + # More AIX lossage: must compile with xlc_r or cc_r + if test x"$GCC" != xyes; then + AC_CHECK_PROGS(PTHREAD_CC, xlc_r cc_r, ${CC}) + else + PTHREAD_CC=$CC + fi +else + PTHREAD_CC="$CC" +fi + +AC_SUBST(PTHREAD_LIBS) +AC_SUBST(PTHREAD_CFLAGS) +AC_SUBST(PTHREAD_CC) + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$acx_pthread_ok" = xyes; then + ifelse([$1],,AC_DEFINE(HAVE_PTHREAD,1,[Define if you have POSIX threads libraries and header files.]),[$1]) + : +else + acx_pthread_ok=no + $2 +fi +AC_LANG_RESTORE +])dnl ACX_PTHREAD diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/m4/ax_cc_maxopt.m4 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/m4/ax_cc_maxopt.m4 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,128 @@ +dnl @synopsis AX_CC_MAXOPT +dnl @summary turn on optimization flags for the C compiler +dnl @category C +dnl +dnl Try to turn on "good" C optimization flags for various compilers +dnl and architectures, for some definition of "good". (In our case, +dnl good for FFTW and hopefully for other scientific codes. Modify +dnl as needed.) +dnl +dnl The user can override the flags by setting the CFLAGS environment +dnl variable. +dnl +dnl Note also that the flags assume that ANSI C aliasing rules are +dnl followed by the code (e.g. for gcc's -fstrict-aliasing), and that +dnl floating-point computations can be re-ordered as needed. +dnl +dnl Requires macros: AX_CHECK_COMPILER_FLAGS, AX_COMPILER_VENDOR, +dnl +dnl @version 2011-06-22 +dnl @license GPLWithACException +dnl @author Steven G. Johnson and Matteo Frigo. +AC_DEFUN([AX_CC_MAXOPT], +[ +AC_REQUIRE([AC_PROG_CC]) +AC_REQUIRE([AX_COMPILER_VENDOR]) +AC_REQUIRE([AC_CANONICAL_HOST]) + +# Try to determine "good" native compiler flags if none specified via CFLAGS +if test "$ac_test_CFLAGS" != "set"; then + CFLAGS="" + case $ax_cv_c_compiler_vendor in + dec) CFLAGS="-newc -w0 -O5 -ansi_alias -ansi_args -fp_reorder -tune host" + ;; + + sun) CFLAGS="-native -fast -xO5 -dalign" + ;; + + hp) CFLAGS="+Oall +Optrs_ansi +DSnative" + ;; + + ibm) xlc_opt="-qtune=auto" + AX_CHECK_COMPILER_FLAGS($xlc_opt, + CFLAGS="-O3 -qansialias -w $xlc_opt", + [CFLAGS="-O3 -qansialias -w" + echo "******************************************************" + echo "* You seem to have the IBM C compiler. It is *" + echo "* recommended for best performance that you use: *" + echo "* *" + echo "* CFLAGS=-O3 -qarch=xxx -qtune=xxx -qansialias -w *" + echo "* ^^^ ^^^ *" + echo "* where xxx is pwr2, pwr3, 604, or whatever kind of *" + echo "* CPU you have. (Set the CFLAGS environment var. *" + echo "* and re-run configure.) For more info, man cc. *" + echo "******************************************************"]) + ;; + + intel) CFLAGS="-O3" + # Intel seems to have changed the spelling of this flag recently + icc_ansi_alias="unknown" + for flag in -ansi-alias -ansi_alias; do + AX_CHECK_COMPILER_FLAGS($flag, [icc_ansi_alias=$flag; break]) + done + if test "x$icc_ansi_alias" != xunknown; then + CFLAGS="$CFLAGS $icc_ansi_alias" + fi + AX_CHECK_COMPILER_FLAGS(-malign-double, CFLAGS="$CFLAGS -malign-double") + # We used to check for architecture flags here, e.g. -xHost etc., + # but these flags are problematic. On icc-12.0.0, "-mavx -xHost" + # overrides -mavx with -xHost, generating SSE2 code instead of AVX + # code. ICC does not seem to support -mtune=host or equivalent + # non-ABI changing flag. + ;; + + gnu) + # Default optimization flags for gcc on all systems. + # Somehow -O3 does not imply -fomit-frame-pointer on ia32 + CFLAGS="-O3 -fomit-frame-pointer" + + # tune for the host by default + AX_CHECK_COMPILER_FLAGS(-mtune=native, CFLAGS="$CFLAGS -mtune=native") + + # -malign-double for x86 systems + AX_CHECK_COMPILER_FLAGS(-malign-double, CFLAGS="$CFLAGS -malign-double") + + # -fstrict-aliasing for gcc-2.95+ + AX_CHECK_COMPILER_FLAGS(-fstrict-aliasing, + CFLAGS="$CFLAGS -fstrict-aliasing") + + # -fno-schedule-insns is pretty much required on all risc + # processors. + # + # gcc performs one pass of instruction scheduling, then a pass of + # register allocation, then another pass of instruction + # scheduling. The first pass reorders instructions in a way that + # is pretty much the worst possible for the purposes of register + # allocation. We disable the first pass. + AX_CHECK_COMPILER_FLAGS(-fno-schedule-insns, CFLAGS="$CFLAGS -fno-schedule-insns") + + # note that we enable "unsafe" fp optimization with other compilers, too + AX_CHECK_COMPILER_FLAGS(-ffast-math, CFLAGS="$CFLAGS -ffast-math") + + ;; + esac + + if test -z "$CFLAGS"; then + echo "" + echo "********************************************************" + echo "* WARNING: Don't know the best CFLAGS for this system *" + echo "* Use ./configure CFLAGS=... to specify your own flags *" + echo "* (otherwise, a default of CFLAGS=-O3 will be used) *" + echo "********************************************************" + echo "" + CFLAGS="-O3" + fi + + AX_CHECK_COMPILER_FLAGS($CFLAGS, [], [ + echo "" + echo "********************************************************" + echo "* WARNING: The guessed CFLAGS don't seem to work with *" + echo "* your compiler. *" + echo "* Use ./configure CFLAGS=... to specify your own flags *" + echo "********************************************************" + echo "" + CFLAGS="" + ]) + +fi +]) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/m4/ax_check_compiler_flags.m4 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/m4/ax_check_compiler_flags.m4 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,40 @@ +dnl @synopsis AX_CHECK_COMPILER_FLAGS(FLAGS, [ACTION-SUCCESS], [ACTION-FAILURE]) +dnl @summary check whether FLAGS are accepted by the compiler +dnl @category Misc +dnl +dnl Check whether the given compiler FLAGS work with the current language's +dnl compiler, or whether they give an error. (Warnings, however, are +dnl ignored.) +dnl +dnl ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on +dnl success/failure. +dnl +dnl @version 2005-05-30 +dnl @license GPLWithACException +dnl @author Steven G. Johnson and Matteo Frigo. +AC_DEFUN([AX_CHECK_COMPILER_FLAGS], +[AC_PREREQ(2.59) dnl for _AC_LANG_PREFIX +AC_MSG_CHECKING([whether _AC_LANG compiler accepts $1]) +dnl Some hackery here since AC_CACHE_VAL can't handle a non-literal varname: +AS_LITERAL_IF([$1], + [AC_CACHE_VAL(AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1), [ + ax_save_FLAGS=$[]_AC_LANG_PREFIX[]FLAGS + _AC_LANG_PREFIX[]FLAGS="$1" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], + AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1)=yes, + AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1)=no) + _AC_LANG_PREFIX[]FLAGS=$ax_save_FLAGS])], + [ax_save_FLAGS=$[]_AC_LANG_PREFIX[]FLAGS + _AC_LANG_PREFIX[]FLAGS="$1" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], + eval AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1)=yes, + eval AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1)=no) + _AC_LANG_PREFIX[]FLAGS=$ax_save_FLAGS]) +eval ax_check_compiler_flags=$AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1) +AC_MSG_RESULT($ax_check_compiler_flags) +if test "x$ax_check_compiler_flags" = xyes; then + m4_default([$2], :) +else + m4_default([$3], :) +fi +])dnl AX_CHECK_COMPILER_FLAGS diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/m4/ax_compiler_vendor.m4 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/m4/ax_compiler_vendor.m4 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,30 @@ +dnl @synopsis AX_COMPILER_VENDOR +dnl @summary find the vendor (gnu, intel, etc.) of the C/C++ compiler +dnl @category C +dnl @category C++ +dnl +dnl Determine the vendor of the C/C++ compiler, e.g., gnu, intel, ibm, +dnl sun, hp, borland, comeau, dec, cray, kai, lcc, metrowerks, sgi, +dnl microsoft, watcom, etc. The vendor is returned in the cache variable +dnl $ax_cv_c_compiler_vendor for C and $ax_cv_cxx_compiler_vendor for C++. +dnl +dnl @version 2007-08-01 +dnl @license GPLWithACException +dnl @author Steven G. Johnson with Matteo Frigo + +AC_DEFUN([AX_COMPILER_VENDOR], +[ +AC_CACHE_CHECK([for _AC_LANG compiler vendor], ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor, + [ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor=unknown + # note: don't check for gcc first since some other compilers define __GNUC__ + for ventest in intel:__ICC,__ECC,__INTEL_COMPILER ibm:__xlc__,__xlC__,__IBMC__,__IBMCPP__ pathscale:__PATHCC__,__PATHSCALE__ gnu:__GNUC__ sun:__SUNPRO_C,__SUNPRO_CC hp:__HP_cc,__HP_aCC dec:__DECC,__DECCXX,__DECC_VER,__DECCXX_VER borland:__BORLANDC__,__TURBOC__ comeau:__COMO__ cray:_CRAYC kai:__KCC lcc:__LCC__ metrowerks:__MWERKS__ sgi:__sgi,sgi microsoft:_MSC_VER watcom:__WATCOMC__ portland:__PGI; do + vencpp="defined("`echo $ventest | cut -d: -f2 | sed 's/,/) || defined(/g'`")" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[ +#if !($vencpp) + thisisanerror; +#endif +])], [ax_cv_]_AC_LANG_ABBREV[_compiler_vendor=`echo $ventest | cut -d: -f1`; break]) + done + ]) +]) + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/m4/ax_gcc_aligns_stack.m4 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/m4/ax_gcc_aligns_stack.m4 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,50 @@ +dnl @synopsis AX_GCC_ALIGNS_STACK([ACTION-IF-YES], [ACTION-IF-NO]) +dnl @summary check whether gcc can align stack to 8-byte boundary +dnl @category Misc +dnl +dnl Check to see if we are using a version of gcc that aligns the stack +dnl (true in gcc-2.95+, which have the -mpreferred-stack-boundary flag). +dnl Also, however, checks whether main() is correctly aligned by the +dnl OS/libc/..., as well as for a bug in the stack alignment of gcc-2.95.x +dnl (see http://gcc.gnu.org/ml/gcc-bugs/1999-11/msg00259.html). +dnl +dnl ACTION-IF-YES/ACTION-IF-NO are shell commands to execute if we are +dnl using gcc and the stack is/isn't aligned, respectively. +dnl +dnl Requires macro: AX_CHECK_COMPILER_FLAGS, AX_GCC_VERSION +dnl +dnl @version 2005-05-30 +dnl @license GPLWithACException +dnl @author Steven G. Johnson +AC_DEFUN([AX_GCC_ALIGNS_STACK], +[ +AC_REQUIRE([AC_PROG_CC]) +ax_gcc_aligns_stack=no +if test "$GCC" = "yes"; then +AX_CHECK_COMPILER_FLAGS(-mpreferred-stack-boundary=4, [ + AC_MSG_CHECKING([whether the stack is at least 8-byte aligned by gcc]) + save_CFLAGS="$CFLAGS" + CFLAGS="-O" + AX_CHECK_COMPILER_FLAGS(-malign-double, CFLAGS="$CFLAGS -malign-double") + AC_TRY_RUN([#include +# include + struct yuck { int blechh; }; + int one(void) { return 1; } + struct yuck ick(void) { struct yuck y; y.blechh = 3; return y; } +# define CHK_ALIGN(x) if ((((long) &(x)) & 0x7)) { fprintf(stderr, "bad alignment of " #x "\n"); exit(1); } + void blah(int foo) { double foobar; CHK_ALIGN(foobar); } + int main2(void) {double ok1; struct yuck y; double ok2; CHK_ALIGN(ok1); + CHK_ALIGN(ok2); y = ick(); blah(one()); return 0;} + int main(void) { if ((((long) (__builtin_alloca(0))) & 0x7)) __builtin_alloca(4); return main2(); } + ], [ax_gcc_aligns_stack=yes; ax_gcc_stack_align_bug=no], + ax_gcc_stack_align_bug=yes, [AX_GCC_VERSION(3,0,0, ax_gcc_stack_align_bug=no, ax_gcc_stack_align_bug=yes)]) + CFLAGS="$save_CFLAGS" + AC_MSG_RESULT($ax_gcc_aligns_stack) +]) +fi +if test "$ax_gcc_aligns_stack" = yes; then + m4_default([$1], :) +else + m4_default([$2], :) +fi +]) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/m4/ax_gcc_version.m4 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/m4/ax_gcc_version.m4 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,38 @@ +dnl @synopsis AX_GCC_VERSION(MAJOR, MINOR, PATCHLEVEL, [ACTION-SUCCESS], [ACTION-FAILURE]) +dnl @summary check wither gcc is at least version MAJOR.MINOR.PATCHLEVEL +dnl @category InstalledPackages +dnl +dnl Check whether we are using gcc and, if so, whether its version +dnl is at least MAJOR.MINOR.PATCHLEVEL +dnl +dnl ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on +dnl success/failure. +dnl +dnl @version 2005-05-30 +dnl @license GPLWithACException +dnl @author Steven G. Johnson and Matteo Frigo. +AC_DEFUN([AX_GCC_VERSION], +[ +AC_REQUIRE([AC_PROG_CC]) +AC_CACHE_CHECK(whether we are using gcc $1.$2.$3 or later, ax_cv_gcc_$1_$2_$3, +[ +ax_cv_gcc_$1_$2_$3=no +if test "$GCC" = "yes"; then +dnl The semicolon after "yes" below is to pacify NeXT's syntax-checking cpp. +AC_EGREP_CPP(yes, [ +#ifdef __GNUC__ +# if (__GNUC__ > $1) || (__GNUC__ == $1 && __GNUC_MINOR__ > $2) \ + || (__GNUC__ == $1 && __GNUC_MINOR__ == $2 && __GNUC_PATCHLEVEL__ >= $3) + yes; +# endif +#endif +], [ax_cv_gcc_$1_$2_$3=yes]) +fi +]) +if test "$ax_cv_gcc_$1_$2_$3" = yes; then + m4_default([$4], :) +else + m4_default([$5], :) +fi +]) + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/m4/ax_openmp.m4 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/m4/ax_openmp.m4 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,66 @@ +dnl @synopsis AX_OPENMP([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) +dnl @summary determine how to compile programs using OpenMP +dnl @category InstalledPackages +dnl +dnl This macro tries to find out how to compile programs that +dnl use OpenMP, a standard API and set of compiler directives for +dnl parallel programming (see http://www.openmp.org/). +dnl +dnl On success, it sets the OPENMP_CFLAGS/OPENMP_CXXFLAGS/OPENMP_FFLAGS +dnl output variable to the flag (e.g. -omp) used both to compile *and* link +dnl OpenMP programs in the current language. +dnl +dnl NOTE: You are assumed to not only compile your program with these +dnl flags, but also link it with them as well. +dnl +dnl If you want to compile everything with OpenMP, you should set: +dnl +dnl CFLAGS="$CFLAGS $OPENMP_CFLAGS" +dnl #OR# CXXFLAGS="$CXXFLAGS $OPENMP_CXXFLAGS" +dnl #OR# FFLAGS="$FFLAGS $OPENMP_FFLAGS" +dnl +dnl (depending on the selected language). +dnl +dnl The user can override the default choice by setting the corresponding +dnl environment variable (e.g. OPENMP_CFLAGS). +dnl +dnl ACTION-IF-FOUND is a list of shell commands to run if an OpenMP +dnl flag is found, and ACTION-IF-NOT-FOUND is a list of commands +dnl to run it if it is not found. If ACTION-IF-FOUND is not specified, +dnl the default action will define HAVE_OPENMP. +dnl +dnl @version 2006-11-20 +dnl @license GPLWithACException +dnl @author Steven G. Johnson + +AC_DEFUN([AX_OPENMP], [ +AC_PREREQ(2.59) dnl for _AC_LANG_PREFIX + +AC_CACHE_CHECK([for OpenMP flag of _AC_LANG compiler], ax_cv_[]_AC_LANG_ABBREV[]_openmp, [save[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS +ax_cv_[]_AC_LANG_ABBREV[]_openmp=unknown +# Flags to try: -fopenmp (gcc), -openmp (icc), -mp (SGI & PGI), +# -xopenmp (Sun), -omp (Tru64), -qsmp=omp (AIX), none +ax_openmp_flags="-fopenmp -openmp -mp -xopenmp -omp -qsmp=omp none" +if test "x$OPENMP_[]_AC_LANG_PREFIX[]FLAGS" != x; then + ax_openmp_flags="$OPENMP_[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flags" +fi +for ax_openmp_flag in $ax_openmp_flags; do + case $ax_openmp_flag in + none) []_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[] ;; + *) []_AC_LANG_PREFIX[]FLAGS="$save[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flag" ;; + esac + AC_TRY_LINK_FUNC(omp_set_num_threads, + [ax_cv_[]_AC_LANG_ABBREV[]_openmp=$ax_openmp_flag; break]) +done +[]_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[]FLAGS +]) +if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" = "xunknown"; then + m4_default([$2],:) +else + if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" != "xnone"; then + OPENMP_[]_AC_LANG_PREFIX[]FLAGS=$ax_cv_[]_AC_LANG_ABBREV[]_openmp + fi + m4_default([$1], [AC_DEFINE(HAVE_OPENMP,1,[Define if OpenMP is enabled])]) +fi +AC_SUBST(OPENMP_[]_AC_LANG_PREFIX[]FLAGS) +])dnl AX_OPENMP diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/m4/libtool.m4 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/m4/libtool.m4 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,7982 @@ +# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*- +# +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, +# 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# Written by Gordon Matzigkeit, 1996 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +m4_define([_LT_COPYING], [dnl +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, +# 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# Written by Gordon Matzigkeit, 1996 +# +# This file is part of GNU Libtool. +# +# GNU Libtool is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# As a special exception to the GNU General Public License, +# if you distribute this file as part of a program or library that +# is built using GNU Libtool, you may include this file under the +# same distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Libtool; see the file COPYING. If not, a copy +# can be downloaded from http://www.gnu.org/licenses/gpl.html, or +# obtained by writing to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +]) + +# serial 57 LT_INIT + + +# LT_PREREQ(VERSION) +# ------------------ +# Complain and exit if this libtool version is less that VERSION. +m4_defun([LT_PREREQ], +[m4_if(m4_version_compare(m4_defn([LT_PACKAGE_VERSION]), [$1]), -1, + [m4_default([$3], + [m4_fatal([Libtool version $1 or higher is required], + 63)])], + [$2])]) + + +# _LT_CHECK_BUILDDIR +# ------------------ +# Complain if the absolute build directory name contains unusual characters +m4_defun([_LT_CHECK_BUILDDIR], +[case `pwd` in + *\ * | *\ *) + AC_MSG_WARN([Libtool does not cope well with whitespace in `pwd`]) ;; +esac +]) + + +# LT_INIT([OPTIONS]) +# ------------------ +AC_DEFUN([LT_INIT], +[AC_PREREQ([2.58])dnl We use AC_INCLUDES_DEFAULT +AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl +AC_BEFORE([$0], [LT_LANG])dnl +AC_BEFORE([$0], [LT_OUTPUT])dnl +AC_BEFORE([$0], [LTDL_INIT])dnl +m4_require([_LT_CHECK_BUILDDIR])dnl + +dnl Autoconf doesn't catch unexpanded LT_ macros by default: +m4_pattern_forbid([^_?LT_[A-Z_]+$])dnl +m4_pattern_allow([^(_LT_EOF|LT_DLGLOBAL|LT_DLLAZY_OR_NOW|LT_MULTI_MODULE)$])dnl +dnl aclocal doesn't pull ltoptions.m4, ltsugar.m4, or ltversion.m4 +dnl unless we require an AC_DEFUNed macro: +AC_REQUIRE([LTOPTIONS_VERSION])dnl +AC_REQUIRE([LTSUGAR_VERSION])dnl +AC_REQUIRE([LTVERSION_VERSION])dnl +AC_REQUIRE([LTOBSOLETE_VERSION])dnl +m4_require([_LT_PROG_LTMAIN])dnl + +_LT_SHELL_INIT([SHELL=${CONFIG_SHELL-/bin/sh}]) + +dnl Parse OPTIONS +_LT_SET_OPTIONS([$0], [$1]) + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS="$ltmain" + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' +AC_SUBST(LIBTOOL)dnl + +_LT_SETUP + +# Only expand once: +m4_define([LT_INIT]) +])# LT_INIT + +# Old names: +AU_ALIAS([AC_PROG_LIBTOOL], [LT_INIT]) +AU_ALIAS([AM_PROG_LIBTOOL], [LT_INIT]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_PROG_LIBTOOL], []) +dnl AC_DEFUN([AM_PROG_LIBTOOL], []) + + +# _LT_CC_BASENAME(CC) +# ------------------- +# Calculate cc_basename. Skip known compiler wrappers and cross-prefix. +m4_defun([_LT_CC_BASENAME], +[for cc_temp in $1""; do + case $cc_temp in + compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;; + distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;; + \-*) ;; + *) break;; + esac +done +cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` +]) + + +# _LT_FILEUTILS_DEFAULTS +# ---------------------- +# It is okay to use these file commands and assume they have been set +# sensibly after `m4_require([_LT_FILEUTILS_DEFAULTS])'. +m4_defun([_LT_FILEUTILS_DEFAULTS], +[: ${CP="cp -f"} +: ${MV="mv -f"} +: ${RM="rm -f"} +])# _LT_FILEUTILS_DEFAULTS + + +# _LT_SETUP +# --------- +m4_defun([_LT_SETUP], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl + +_LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl +dnl +_LT_DECL([], [host_alias], [0], [The host system])dnl +_LT_DECL([], [host], [0])dnl +_LT_DECL([], [host_os], [0])dnl +dnl +_LT_DECL([], [build_alias], [0], [The build system])dnl +_LT_DECL([], [build], [0])dnl +_LT_DECL([], [build_os], [0])dnl +dnl +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([LT_PATH_LD])dnl +AC_REQUIRE([LT_PATH_NM])dnl +dnl +AC_REQUIRE([AC_PROG_LN_S])dnl +test -z "$LN_S" && LN_S="ln -s" +_LT_DECL([], [LN_S], [1], [Whether we need soft or hard links])dnl +dnl +AC_REQUIRE([LT_CMD_MAX_LEN])dnl +_LT_DECL([objext], [ac_objext], [0], [Object file suffix (normally "o")])dnl +_LT_DECL([], [exeext], [0], [Executable file suffix (normally "")])dnl +dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_CHECK_SHELL_FEATURES])dnl +m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl +m4_require([_LT_CMD_RELOAD])dnl +m4_require([_LT_CHECK_MAGIC_METHOD])dnl +m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl +m4_require([_LT_CMD_OLD_ARCHIVE])dnl +m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl +m4_require([_LT_WITH_SYSROOT])dnl + +_LT_CONFIG_LIBTOOL_INIT([ +# See if we are running on zsh, and set the options which allow our +# commands through without removal of \ escapes INIT. +if test -n "\${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST +fi +]) +if test -n "${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST +fi + +_LT_CHECK_OBJDIR + +m4_require([_LT_TAG_COMPILER])dnl + +case $host_os in +aix3*) + # AIX sometimes has problems with the GCC collect2 program. For some + # reason, if we set the COLLECT_NAMES environment variable, the problems + # vanish in a puff of smoke. + if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES + fi + ;; +esac + +# Global variables: +ofile=libtool +can_build_shared=yes + +# All known linkers require a `.a' archive for static linking (except MSVC, +# which needs '.lib'). +libext=a + +with_gnu_ld="$lt_cv_prog_gnu_ld" + +old_CC="$CC" +old_CFLAGS="$CFLAGS" + +# Set sane defaults for various variables +test -z "$CC" && CC=cc +test -z "$LTCC" && LTCC=$CC +test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS +test -z "$LD" && LD=ld +test -z "$ac_objext" && ac_objext=o + +_LT_CC_BASENAME([$compiler]) + +# Only perform the check for file, if the check method requires it +test -z "$MAGIC_CMD" && MAGIC_CMD=file +case $deplibs_check_method in +file_magic*) + if test "$file_magic_cmd" = '$MAGIC_CMD'; then + _LT_PATH_MAGIC + fi + ;; +esac + +# Use C for the default configuration in the libtool script +LT_SUPPORTED_TAG([CC]) +_LT_LANG_C_CONFIG +_LT_LANG_DEFAULT_CONFIG +_LT_CONFIG_COMMANDS +])# _LT_SETUP + + +# _LT_PREPARE_SED_QUOTE_VARS +# -------------------------- +# Define a few sed substitution that help us do robust quoting. +m4_defun([_LT_PREPARE_SED_QUOTE_VARS], +[# Backslashify metacharacters that are still active within +# double-quoted strings. +sed_quote_subst='s/\([["`$\\]]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\([["`\\]]\)/\\\1/g' + +# Sed substitution to delay expansion of an escaped shell variable in a +# double_quote_subst'ed string. +delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' + +# Sed substitution to delay expansion of an escaped single quote. +delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' + +# Sed substitution to avoid accidental globbing in evaled expressions +no_glob_subst='s/\*/\\\*/g' +]) + +# _LT_PROG_LTMAIN +# --------------- +# Note that this code is called both from `configure', and `config.status' +# now that we use AC_CONFIG_COMMANDS to generate libtool. Notably, +# `config.status' has no value for ac_aux_dir unless we are using Automake, +# so we pass a copy along to make sure it has a sensible value anyway. +m4_defun([_LT_PROG_LTMAIN], +[m4_ifdef([AC_REQUIRE_AUX_FILE], [AC_REQUIRE_AUX_FILE([ltmain.sh])])dnl +_LT_CONFIG_LIBTOOL_INIT([ac_aux_dir='$ac_aux_dir']) +ltmain="$ac_aux_dir/ltmain.sh" +])# _LT_PROG_LTMAIN + + +## ------------------------------------- ## +## Accumulate code for creating libtool. ## +## ------------------------------------- ## + +# So that we can recreate a full libtool script including additional +# tags, we accumulate the chunks of code to send to AC_CONFIG_COMMANDS +# in macros and then make a single call at the end using the `libtool' +# label. + + +# _LT_CONFIG_LIBTOOL_INIT([INIT-COMMANDS]) +# ---------------------------------------- +# Register INIT-COMMANDS to be passed to AC_CONFIG_COMMANDS later. +m4_define([_LT_CONFIG_LIBTOOL_INIT], +[m4_ifval([$1], + [m4_append([_LT_OUTPUT_LIBTOOL_INIT], + [$1 +])])]) + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_INIT]) + + +# _LT_CONFIG_LIBTOOL([COMMANDS]) +# ------------------------------ +# Register COMMANDS to be passed to AC_CONFIG_COMMANDS later. +m4_define([_LT_CONFIG_LIBTOOL], +[m4_ifval([$1], + [m4_append([_LT_OUTPUT_LIBTOOL_COMMANDS], + [$1 +])])]) + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS]) + + +# _LT_CONFIG_SAVE_COMMANDS([COMMANDS], [INIT_COMMANDS]) +# ----------------------------------------------------- +m4_defun([_LT_CONFIG_SAVE_COMMANDS], +[_LT_CONFIG_LIBTOOL([$1]) +_LT_CONFIG_LIBTOOL_INIT([$2]) +]) + + +# _LT_FORMAT_COMMENT([COMMENT]) +# ----------------------------- +# Add leading comment marks to the start of each line, and a trailing +# full-stop to the whole comment if one is not present already. +m4_define([_LT_FORMAT_COMMENT], +[m4_ifval([$1], [ +m4_bpatsubst([m4_bpatsubst([$1], [^ *], [# ])], + [['`$\]], [\\\&])]m4_bmatch([$1], [[!?.]$], [], [.]) +)]) + + + +## ------------------------ ## +## FIXME: Eliminate VARNAME ## +## ------------------------ ## + + +# _LT_DECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION], [IS-TAGGED?]) +# ------------------------------------------------------------------- +# CONFIGNAME is the name given to the value in the libtool script. +# VARNAME is the (base) name used in the configure script. +# VALUE may be 0, 1 or 2 for a computed quote escaped value based on +# VARNAME. Any other value will be used directly. +m4_define([_LT_DECL], +[lt_if_append_uniq([lt_decl_varnames], [$2], [, ], + [lt_dict_add_subkey([lt_decl_dict], [$2], [libtool_name], + [m4_ifval([$1], [$1], [$2])]) + lt_dict_add_subkey([lt_decl_dict], [$2], [value], [$3]) + m4_ifval([$4], + [lt_dict_add_subkey([lt_decl_dict], [$2], [description], [$4])]) + lt_dict_add_subkey([lt_decl_dict], [$2], + [tagged?], [m4_ifval([$5], [yes], [no])])]) +]) + + +# _LT_TAGDECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION]) +# -------------------------------------------------------- +m4_define([_LT_TAGDECL], [_LT_DECL([$1], [$2], [$3], [$4], [yes])]) + + +# lt_decl_tag_varnames([SEPARATOR], [VARNAME1...]) +# ------------------------------------------------ +m4_define([lt_decl_tag_varnames], +[_lt_decl_filter([tagged?], [yes], $@)]) + + +# _lt_decl_filter(SUBKEY, VALUE, [SEPARATOR], [VARNAME1..]) +# --------------------------------------------------------- +m4_define([_lt_decl_filter], +[m4_case([$#], + [0], [m4_fatal([$0: too few arguments: $#])], + [1], [m4_fatal([$0: too few arguments: $#: $1])], + [2], [lt_dict_filter([lt_decl_dict], [$1], [$2], [], lt_decl_varnames)], + [3], [lt_dict_filter([lt_decl_dict], [$1], [$2], [$3], lt_decl_varnames)], + [lt_dict_filter([lt_decl_dict], $@)])[]dnl +]) + + +# lt_decl_quote_varnames([SEPARATOR], [VARNAME1...]) +# -------------------------------------------------- +m4_define([lt_decl_quote_varnames], +[_lt_decl_filter([value], [1], $@)]) + + +# lt_decl_dquote_varnames([SEPARATOR], [VARNAME1...]) +# --------------------------------------------------- +m4_define([lt_decl_dquote_varnames], +[_lt_decl_filter([value], [2], $@)]) + + +# lt_decl_varnames_tagged([SEPARATOR], [VARNAME1...]) +# --------------------------------------------------- +m4_define([lt_decl_varnames_tagged], +[m4_assert([$# <= 2])dnl +_$0(m4_quote(m4_default([$1], [[, ]])), + m4_ifval([$2], [[$2]], [m4_dquote(lt_decl_tag_varnames)]), + m4_split(m4_normalize(m4_quote(_LT_TAGS)), [ ]))]) +m4_define([_lt_decl_varnames_tagged], +[m4_ifval([$3], [lt_combine([$1], [$2], [_], $3)])]) + + +# lt_decl_all_varnames([SEPARATOR], [VARNAME1...]) +# ------------------------------------------------ +m4_define([lt_decl_all_varnames], +[_$0(m4_quote(m4_default([$1], [[, ]])), + m4_if([$2], [], + m4_quote(lt_decl_varnames), + m4_quote(m4_shift($@))))[]dnl +]) +m4_define([_lt_decl_all_varnames], +[lt_join($@, lt_decl_varnames_tagged([$1], + lt_decl_tag_varnames([[, ]], m4_shift($@))))dnl +]) + + +# _LT_CONFIG_STATUS_DECLARE([VARNAME]) +# ------------------------------------ +# Quote a variable value, and forward it to `config.status' so that its +# declaration there will have the same value as in `configure'. VARNAME +# must have a single quote delimited value for this to work. +m4_define([_LT_CONFIG_STATUS_DECLARE], +[$1='`$ECHO "$][$1" | $SED "$delay_single_quote_subst"`']) + + +# _LT_CONFIG_STATUS_DECLARATIONS +# ------------------------------ +# We delimit libtool config variables with single quotes, so when +# we write them to config.status, we have to be sure to quote all +# embedded single quotes properly. In configure, this macro expands +# each variable declared with _LT_DECL (and _LT_TAGDECL) into: +# +# ='`$ECHO "$" | $SED "$delay_single_quote_subst"`' +m4_defun([_LT_CONFIG_STATUS_DECLARATIONS], +[m4_foreach([_lt_var], m4_quote(lt_decl_all_varnames), + [m4_n([_LT_CONFIG_STATUS_DECLARE(_lt_var)])])]) + + +# _LT_LIBTOOL_TAGS +# ---------------- +# Output comment and list of tags supported by the script +m4_defun([_LT_LIBTOOL_TAGS], +[_LT_FORMAT_COMMENT([The names of the tagged configurations supported by this script])dnl +available_tags="_LT_TAGS"dnl +]) + + +# _LT_LIBTOOL_DECLARE(VARNAME, [TAG]) +# ----------------------------------- +# Extract the dictionary values for VARNAME (optionally with TAG) and +# expand to a commented shell variable setting: +# +# # Some comment about what VAR is for. +# visible_name=$lt_internal_name +m4_define([_LT_LIBTOOL_DECLARE], +[_LT_FORMAT_COMMENT(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], + [description])))[]dnl +m4_pushdef([_libtool_name], + m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [libtool_name])))[]dnl +m4_case(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [value])), + [0], [_libtool_name=[$]$1], + [1], [_libtool_name=$lt_[]$1], + [2], [_libtool_name=$lt_[]$1], + [_libtool_name=lt_dict_fetch([lt_decl_dict], [$1], [value])])[]dnl +m4_ifval([$2], [_$2])[]m4_popdef([_libtool_name])[]dnl +]) + + +# _LT_LIBTOOL_CONFIG_VARS +# ----------------------- +# Produce commented declarations of non-tagged libtool config variables +# suitable for insertion in the LIBTOOL CONFIG section of the `libtool' +# script. Tagged libtool config variables (even for the LIBTOOL CONFIG +# section) are produced by _LT_LIBTOOL_TAG_VARS. +m4_defun([_LT_LIBTOOL_CONFIG_VARS], +[m4_foreach([_lt_var], + m4_quote(_lt_decl_filter([tagged?], [no], [], lt_decl_varnames)), + [m4_n([_LT_LIBTOOL_DECLARE(_lt_var)])])]) + + +# _LT_LIBTOOL_TAG_VARS(TAG) +# ------------------------- +m4_define([_LT_LIBTOOL_TAG_VARS], +[m4_foreach([_lt_var], m4_quote(lt_decl_tag_varnames), + [m4_n([_LT_LIBTOOL_DECLARE(_lt_var, [$1])])])]) + + +# _LT_TAGVAR(VARNAME, [TAGNAME]) +# ------------------------------ +m4_define([_LT_TAGVAR], [m4_ifval([$2], [$1_$2], [$1])]) + + +# _LT_CONFIG_COMMANDS +# ------------------- +# Send accumulated output to $CONFIG_STATUS. Thanks to the lists of +# variables for single and double quote escaping we saved from calls +# to _LT_DECL, we can put quote escaped variables declarations +# into `config.status', and then the shell code to quote escape them in +# for loops in `config.status'. Finally, any additional code accumulated +# from calls to _LT_CONFIG_LIBTOOL_INIT is expanded. +m4_defun([_LT_CONFIG_COMMANDS], +[AC_PROVIDE_IFELSE([LT_OUTPUT], + dnl If the libtool generation code has been placed in $CONFIG_LT, + dnl instead of duplicating it all over again into config.status, + dnl then we will have config.status run $CONFIG_LT later, so it + dnl needs to know what name is stored there: + [AC_CONFIG_COMMANDS([libtool], + [$SHELL $CONFIG_LT || AS_EXIT(1)], [CONFIG_LT='$CONFIG_LT'])], + dnl If the libtool generation code is destined for config.status, + dnl expand the accumulated commands and init code now: + [AC_CONFIG_COMMANDS([libtool], + [_LT_OUTPUT_LIBTOOL_COMMANDS], [_LT_OUTPUT_LIBTOOL_COMMANDS_INIT])]) +])#_LT_CONFIG_COMMANDS + + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS_INIT], +[ + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +sed_quote_subst='$sed_quote_subst' +double_quote_subst='$double_quote_subst' +delay_variable_subst='$delay_variable_subst' +_LT_CONFIG_STATUS_DECLARATIONS +LTCC='$LTCC' +LTCFLAGS='$LTCFLAGS' +compiler='$compiler_DEFAULT' + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$[]1 +_LTECHO_EOF' +} + +# Quote evaled strings. +for var in lt_decl_all_varnames([[ \ +]], lt_decl_quote_varnames); do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[[\\\\\\\`\\"\\\$]]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +# Double-quote double-evaled strings. +for var in lt_decl_all_varnames([[ \ +]], lt_decl_dquote_varnames); do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[[\\\\\\\`\\"\\\$]]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +_LT_OUTPUT_LIBTOOL_INIT +]) + +# _LT_GENERATED_FILE_INIT(FILE, [COMMENT]) +# ------------------------------------ +# Generate a child script FILE with all initialization necessary to +# reuse the environment learned by the parent script, and make the +# file executable. If COMMENT is supplied, it is inserted after the +# `#!' sequence but before initialization text begins. After this +# macro, additional text can be appended to FILE to form the body of +# the child script. The macro ends with non-zero status if the +# file could not be fully written (such as if the disk is full). +m4_ifdef([AS_INIT_GENERATED], +[m4_defun([_LT_GENERATED_FILE_INIT],[AS_INIT_GENERATED($@)])], +[m4_defun([_LT_GENERATED_FILE_INIT], +[m4_require([AS_PREPARE])]dnl +[m4_pushdef([AS_MESSAGE_LOG_FD])]dnl +[lt_write_fail=0 +cat >$1 <<_ASEOF || lt_write_fail=1 +#! $SHELL +# Generated by $as_me. +$2 +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$1 <<\_ASEOF || lt_write_fail=1 +AS_SHELL_SANITIZE +_AS_PREPARE +exec AS_MESSAGE_FD>&1 +_ASEOF +test $lt_write_fail = 0 && chmod +x $1[]dnl +m4_popdef([AS_MESSAGE_LOG_FD])])])# _LT_GENERATED_FILE_INIT + +# LT_OUTPUT +# --------- +# This macro allows early generation of the libtool script (before +# AC_OUTPUT is called), incase it is used in configure for compilation +# tests. +AC_DEFUN([LT_OUTPUT], +[: ${CONFIG_LT=./config.lt} +AC_MSG_NOTICE([creating $CONFIG_LT]) +_LT_GENERATED_FILE_INIT(["$CONFIG_LT"], +[# Run this file to recreate a libtool stub with the current configuration.]) + +cat >>"$CONFIG_LT" <<\_LTEOF +lt_cl_silent=false +exec AS_MESSAGE_LOG_FD>>config.log +{ + echo + AS_BOX([Running $as_me.]) +} >&AS_MESSAGE_LOG_FD + +lt_cl_help="\ +\`$as_me' creates a local libtool stub from the current configuration, +for use in further configure time tests before the real libtool is +generated. + +Usage: $[0] [[OPTIONS]] + + -h, --help print this help, then exit + -V, --version print version number, then exit + -q, --quiet do not print progress messages + -d, --debug don't remove temporary files + +Report bugs to ." + +lt_cl_version="\ +m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl +m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION]) +configured by $[0], generated by m4_PACKAGE_STRING. + +Copyright (C) 2011 Free Software Foundation, Inc. +This config.lt script is free software; the Free Software Foundation +gives unlimited permision to copy, distribute and modify it." + +while test $[#] != 0 +do + case $[1] in + --version | --v* | -V ) + echo "$lt_cl_version"; exit 0 ;; + --help | --h* | -h ) + echo "$lt_cl_help"; exit 0 ;; + --debug | --d* | -d ) + debug=: ;; + --quiet | --q* | --silent | --s* | -q ) + lt_cl_silent=: ;; + + -*) AC_MSG_ERROR([unrecognized option: $[1] +Try \`$[0] --help' for more information.]) ;; + + *) AC_MSG_ERROR([unrecognized argument: $[1] +Try \`$[0] --help' for more information.]) ;; + esac + shift +done + +if $lt_cl_silent; then + exec AS_MESSAGE_FD>/dev/null +fi +_LTEOF + +cat >>"$CONFIG_LT" <<_LTEOF +_LT_OUTPUT_LIBTOOL_COMMANDS_INIT +_LTEOF + +cat >>"$CONFIG_LT" <<\_LTEOF +AC_MSG_NOTICE([creating $ofile]) +_LT_OUTPUT_LIBTOOL_COMMANDS +AS_EXIT(0) +_LTEOF +chmod +x "$CONFIG_LT" + +# configure is writing to config.log, but config.lt does its own redirection, +# appending to config.log, which fails on DOS, as config.log is still kept +# open by configure. Here we exec the FD to /dev/null, effectively closing +# config.log, so it can be properly (re)opened and appended to by config.lt. +lt_cl_success=: +test "$silent" = yes && + lt_config_lt_args="$lt_config_lt_args --quiet" +exec AS_MESSAGE_LOG_FD>/dev/null +$SHELL "$CONFIG_LT" $lt_config_lt_args || lt_cl_success=false +exec AS_MESSAGE_LOG_FD>>config.log +$lt_cl_success || AS_EXIT(1) +])# LT_OUTPUT + + +# _LT_CONFIG(TAG) +# --------------- +# If TAG is the built-in tag, create an initial libtool script with a +# default configuration from the untagged config vars. Otherwise add code +# to config.status for appending the configuration named by TAG from the +# matching tagged config vars. +m4_defun([_LT_CONFIG], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +_LT_CONFIG_SAVE_COMMANDS([ + m4_define([_LT_TAG], m4_if([$1], [], [C], [$1]))dnl + m4_if(_LT_TAG, [C], [ + # See if we are running on zsh, and set the options which allow our + # commands through without removal of \ escapes. + if test -n "${ZSH_VERSION+set}" ; then + setopt NO_GLOB_SUBST + fi + + cfgfile="${ofile}T" + trap "$RM \"$cfgfile\"; exit 1" 1 2 15 + $RM "$cfgfile" + + cat <<_LT_EOF >> "$cfgfile" +#! $SHELL + +# `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services. +# Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION +# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: +# NOTE: Changes made to this file will be lost: look at ltmain.sh. +# +_LT_COPYING +_LT_LIBTOOL_TAGS + +# ### BEGIN LIBTOOL CONFIG +_LT_LIBTOOL_CONFIG_VARS +_LT_LIBTOOL_TAG_VARS +# ### END LIBTOOL CONFIG + +_LT_EOF + + case $host_os in + aix3*) + cat <<\_LT_EOF >> "$cfgfile" +# AIX sometimes has problems with the GCC collect2 program. For some +# reason, if we set the COLLECT_NAMES environment variable, the problems +# vanish in a puff of smoke. +if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES +fi +_LT_EOF + ;; + esac + + _LT_PROG_LTMAIN + + # We use sed instead of cat because bash on DJGPP gets confused if + # if finds mixed CR/LF and LF-only lines. Since sed operates in + # text mode, it properly converts lines to CR/LF. This bash problem + # is reportedly fixed, but why not run on old versions too? + sed '$q' "$ltmain" >> "$cfgfile" \ + || (rm -f "$cfgfile"; exit 1) + + _LT_PROG_REPLACE_SHELLFNS + + mv -f "$cfgfile" "$ofile" || + (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") + chmod +x "$ofile" +], +[cat <<_LT_EOF >> "$ofile" + +dnl Unfortunately we have to use $1 here, since _LT_TAG is not expanded +dnl in a comment (ie after a #). +# ### BEGIN LIBTOOL TAG CONFIG: $1 +_LT_LIBTOOL_TAG_VARS(_LT_TAG) +# ### END LIBTOOL TAG CONFIG: $1 +_LT_EOF +])dnl /m4_if +], +[m4_if([$1], [], [ + PACKAGE='$PACKAGE' + VERSION='$VERSION' + TIMESTAMP='$TIMESTAMP' + RM='$RM' + ofile='$ofile'], []) +])dnl /_LT_CONFIG_SAVE_COMMANDS +])# _LT_CONFIG + + +# LT_SUPPORTED_TAG(TAG) +# --------------------- +# Trace this macro to discover what tags are supported by the libtool +# --tag option, using: +# autoconf --trace 'LT_SUPPORTED_TAG:$1' +AC_DEFUN([LT_SUPPORTED_TAG], []) + + +# C support is built-in for now +m4_define([_LT_LANG_C_enabled], []) +m4_define([_LT_TAGS], []) + + +# LT_LANG(LANG) +# ------------- +# Enable libtool support for the given language if not already enabled. +AC_DEFUN([LT_LANG], +[AC_BEFORE([$0], [LT_OUTPUT])dnl +m4_case([$1], + [C], [_LT_LANG(C)], + [C++], [_LT_LANG(CXX)], + [Go], [_LT_LANG(GO)], + [Java], [_LT_LANG(GCJ)], + [Fortran 77], [_LT_LANG(F77)], + [Fortran], [_LT_LANG(FC)], + [Windows Resource], [_LT_LANG(RC)], + [m4_ifdef([_LT_LANG_]$1[_CONFIG], + [_LT_LANG($1)], + [m4_fatal([$0: unsupported language: "$1"])])])dnl +])# LT_LANG + + +# _LT_LANG(LANGNAME) +# ------------------ +m4_defun([_LT_LANG], +[m4_ifdef([_LT_LANG_]$1[_enabled], [], + [LT_SUPPORTED_TAG([$1])dnl + m4_append([_LT_TAGS], [$1 ])dnl + m4_define([_LT_LANG_]$1[_enabled], [])dnl + _LT_LANG_$1_CONFIG($1)])dnl +])# _LT_LANG + + +m4_ifndef([AC_PROG_GO], [ +############################################################ +# NOTE: This macro has been submitted for inclusion into # +# GNU Autoconf as AC_PROG_GO. When it is available in # +# a released version of Autoconf we should remove this # +# macro and use it instead. # +############################################################ +m4_defun([AC_PROG_GO], +[AC_LANG_PUSH(Go)dnl +AC_ARG_VAR([GOC], [Go compiler command])dnl +AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl +_AC_ARG_VAR_LDFLAGS()dnl +AC_CHECK_TOOL(GOC, gccgo) +if test -z "$GOC"; then + if test -n "$ac_tool_prefix"; then + AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo]) + fi +fi +if test -z "$GOC"; then + AC_CHECK_PROG(GOC, gccgo, gccgo, false) +fi +])#m4_defun +])#m4_ifndef + + +# _LT_LANG_DEFAULT_CONFIG +# ----------------------- +m4_defun([_LT_LANG_DEFAULT_CONFIG], +[AC_PROVIDE_IFELSE([AC_PROG_CXX], + [LT_LANG(CXX)], + [m4_define([AC_PROG_CXX], defn([AC_PROG_CXX])[LT_LANG(CXX)])]) + +AC_PROVIDE_IFELSE([AC_PROG_F77], + [LT_LANG(F77)], + [m4_define([AC_PROG_F77], defn([AC_PROG_F77])[LT_LANG(F77)])]) + +AC_PROVIDE_IFELSE([AC_PROG_FC], + [LT_LANG(FC)], + [m4_define([AC_PROG_FC], defn([AC_PROG_FC])[LT_LANG(FC)])]) + +dnl The call to [A][M_PROG_GCJ] is quoted like that to stop aclocal +dnl pulling things in needlessly. +AC_PROVIDE_IFELSE([AC_PROG_GCJ], + [LT_LANG(GCJ)], + [AC_PROVIDE_IFELSE([A][M_PROG_GCJ], + [LT_LANG(GCJ)], + [AC_PROVIDE_IFELSE([LT_PROG_GCJ], + [LT_LANG(GCJ)], + [m4_ifdef([AC_PROG_GCJ], + [m4_define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[LT_LANG(GCJ)])]) + m4_ifdef([A][M_PROG_GCJ], + [m4_define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[LT_LANG(GCJ)])]) + m4_ifdef([LT_PROG_GCJ], + [m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])]) + +AC_PROVIDE_IFELSE([AC_PROG_GO], + [LT_LANG(GO)], + [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])]) + +AC_PROVIDE_IFELSE([LT_PROG_RC], + [LT_LANG(RC)], + [m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])]) +])# _LT_LANG_DEFAULT_CONFIG + +# Obsolete macros: +AU_DEFUN([AC_LIBTOOL_CXX], [LT_LANG(C++)]) +AU_DEFUN([AC_LIBTOOL_F77], [LT_LANG(Fortran 77)]) +AU_DEFUN([AC_LIBTOOL_FC], [LT_LANG(Fortran)]) +AU_DEFUN([AC_LIBTOOL_GCJ], [LT_LANG(Java)]) +AU_DEFUN([AC_LIBTOOL_RC], [LT_LANG(Windows Resource)]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_CXX], []) +dnl AC_DEFUN([AC_LIBTOOL_F77], []) +dnl AC_DEFUN([AC_LIBTOOL_FC], []) +dnl AC_DEFUN([AC_LIBTOOL_GCJ], []) +dnl AC_DEFUN([AC_LIBTOOL_RC], []) + + +# _LT_TAG_COMPILER +# ---------------- +m4_defun([_LT_TAG_COMPILER], +[AC_REQUIRE([AC_PROG_CC])dnl + +_LT_DECL([LTCC], [CC], [1], [A C compiler])dnl +_LT_DECL([LTCFLAGS], [CFLAGS], [1], [LTCC compiler flags])dnl +_LT_TAGDECL([CC], [compiler], [1], [A language specific compiler])dnl +_LT_TAGDECL([with_gcc], [GCC], [0], [Is the compiler the GNU compiler?])dnl + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC +])# _LT_TAG_COMPILER + + +# _LT_COMPILER_BOILERPLATE +# ------------------------ +# Check for compiler boilerplate output or warnings with +# the simple compiler test code. +m4_defun([_LT_COMPILER_BOILERPLATE], +[m4_require([_LT_DECL_SED])dnl +ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* +])# _LT_COMPILER_BOILERPLATE + + +# _LT_LINKER_BOILERPLATE +# ---------------------- +# Check for linker boilerplate output or warnings with +# the simple link test code. +m4_defun([_LT_LINKER_BOILERPLATE], +[m4_require([_LT_DECL_SED])dnl +ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* +])# _LT_LINKER_BOILERPLATE + +# _LT_REQUIRED_DARWIN_CHECKS +# ------------------------- +m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[ + case $host_os in + rhapsody* | darwin*) + AC_CHECK_TOOL([DSYMUTIL], [dsymutil], [:]) + AC_CHECK_TOOL([NMEDIT], [nmedit], [:]) + AC_CHECK_TOOL([LIPO], [lipo], [:]) + AC_CHECK_TOOL([OTOOL], [otool], [:]) + AC_CHECK_TOOL([OTOOL64], [otool64], [:]) + _LT_DECL([], [DSYMUTIL], [1], + [Tool to manipulate archived DWARF debug symbol files on Mac OS X]) + _LT_DECL([], [NMEDIT], [1], + [Tool to change global to local symbols on Mac OS X]) + _LT_DECL([], [LIPO], [1], + [Tool to manipulate fat objects and archives on Mac OS X]) + _LT_DECL([], [OTOOL], [1], + [ldd/readelf like tool for Mach-O binaries on Mac OS X]) + _LT_DECL([], [OTOOL64], [1], + [ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4]) + + AC_CACHE_CHECK([for -single_module linker flag],[lt_cv_apple_cc_single_mod], + [lt_cv_apple_cc_single_mod=no + if test -z "${LT_MULTI_MODULE}"; then + # By default we will add the -single_module flag. You can override + # by either setting the environment variable LT_MULTI_MODULE + # non-empty at configure time, or by adding -multi_module to the + # link flags. + rm -rf libconftest.dylib* + echo "int foo(void){return 1;}" > conftest.c + echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ +-dynamiclib -Wl,-single_module conftest.c" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ + -dynamiclib -Wl,-single_module conftest.c 2>conftest.err + _lt_result=$? + # If there is a non-empty error log, and "single_module" + # appears in it, assume the flag caused a linker warning + if test -s conftest.err && $GREP single_module conftest.err; then + cat conftest.err >&AS_MESSAGE_LOG_FD + # Otherwise, if the output was created with a 0 exit code from + # the compiler, it worked. + elif test -f libconftest.dylib && test $_lt_result -eq 0; then + lt_cv_apple_cc_single_mod=yes + else + cat conftest.err >&AS_MESSAGE_LOG_FD + fi + rm -rf libconftest.dylib* + rm -f conftest.* + fi]) + + AC_CACHE_CHECK([for -exported_symbols_list linker flag], + [lt_cv_ld_exported_symbols_list], + [lt_cv_ld_exported_symbols_list=no + save_LDFLAGS=$LDFLAGS + echo "_main" > conftest.sym + LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" + AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], + [lt_cv_ld_exported_symbols_list=yes], + [lt_cv_ld_exported_symbols_list=no]) + LDFLAGS="$save_LDFLAGS" + ]) + + AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load], + [lt_cv_ld_force_load=no + cat > conftest.c << _LT_EOF +int forced_loaded() { return 2;} +_LT_EOF + echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&AS_MESSAGE_LOG_FD + echo "$AR cru libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD + $AR cru libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD + echo "$RANLIB libconftest.a" >&AS_MESSAGE_LOG_FD + $RANLIB libconftest.a 2>&AS_MESSAGE_LOG_FD + cat > conftest.c << _LT_EOF +int main() { return 0;} +_LT_EOF + echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err + _lt_result=$? + if test -s conftest.err && $GREP force_load conftest.err; then + cat conftest.err >&AS_MESSAGE_LOG_FD + elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then + lt_cv_ld_force_load=yes + else + cat conftest.err >&AS_MESSAGE_LOG_FD + fi + rm -f conftest.err libconftest.a conftest conftest.c + rm -rf conftest.dSYM + ]) + case $host_os in + rhapsody* | darwin1.[[012]]) + _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;; + darwin1.*) + _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; + darwin*) # darwin 5.x on + # if running on 10.5 or later, the deployment target defaults + # to the OS version, if on x86, and 10.4, the deployment + # target defaults to 10.4. Don't you love it? + case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in + 10.0,*86*-darwin8*|10.0,*-darwin[[91]]*) + _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; + 10.[[012]]*) + _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; + 10.*) + _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; + esac + ;; + esac + if test "$lt_cv_apple_cc_single_mod" = "yes"; then + _lt_dar_single_mod='$single_module' + fi + if test "$lt_cv_ld_exported_symbols_list" = "yes"; then + _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym' + else + _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}' + fi + if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then + _lt_dsymutil='~$DSYMUTIL $lib || :' + else + _lt_dsymutil= + fi + ;; + esac +]) + + +# _LT_DARWIN_LINKER_FEATURES([TAG]) +# --------------------------------- +# Checks for linker and compiler features on darwin +m4_defun([_LT_DARWIN_LINKER_FEATURES], +[ + m4_require([_LT_REQUIRED_DARWIN_CHECKS]) + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_automatic, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported + if test "$lt_cv_ld_force_load" = "yes"; then + _LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes], + [FC], [_LT_TAGVAR(compiler_needs_object, $1)=yes]) + else + _LT_TAGVAR(whole_archive_flag_spec, $1)='' + fi + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)="$_lt_dar_allow_undefined" + case $cc_basename in + ifort*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test "$_lt_dar_can_shared" = "yes"; then + output_verbose_link_cmd=func_echo_all + _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}" + _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}" + _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}" + _LT_TAGVAR(module_expsym_cmds, $1)="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}" + m4_if([$1], [CXX], +[ if test "$lt_cv_apple_cc_single_mod" != "yes"; then + _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dsymutil}" + _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dar_export_syms}${_lt_dsymutil}" + fi +],[]) + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi +]) + +# _LT_SYS_MODULE_PATH_AIX([TAGNAME]) +# ---------------------------------- +# Links a minimal program and checks the executable +# for the system default hardcoded library path. In most cases, +# this is /usr/lib:/lib, but when the MPI compilers are used +# the location of the communication and MPI libs are included too. +# If we don't find anything, use the default library path according +# to the aix ld manual. +# Store the results from the different compilers for each TAGNAME. +# Allow to override them for all tags through lt_cv_aix_libpath. +m4_defun([_LT_SYS_MODULE_PATH_AIX], +[m4_require([_LT_DECL_SED])dnl +if test "${lt_cv_aix_libpath+set}" = set; then + aix_libpath=$lt_cv_aix_libpath +else + AC_CACHE_VAL([_LT_TAGVAR([lt_cv_aix_libpath_], [$1])], + [AC_LINK_IFELSE([AC_LANG_PROGRAM],[ + lt_aix_libpath_sed='[ + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }]' + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi],[]) + if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])="/usr/lib:/lib" + fi + ]) + aix_libpath=$_LT_TAGVAR([lt_cv_aix_libpath_], [$1]) +fi +])# _LT_SYS_MODULE_PATH_AIX + + +# _LT_SHELL_INIT(ARG) +# ------------------- +m4_define([_LT_SHELL_INIT], +[m4_divert_text([M4SH-INIT], [$1 +])])# _LT_SHELL_INIT + + + +# _LT_PROG_ECHO_BACKSLASH +# ----------------------- +# Find how we can fake an echo command that does not interpret backslash. +# In particular, with Autoconf 2.60 or later we add some code to the start +# of the generated configure script which will find a shell with a builtin +# printf (which we can use as an echo command). +m4_defun([_LT_PROG_ECHO_BACKSLASH], +[ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + +AC_MSG_CHECKING([how to print strings]) +# Test print first, because it will be a builtin if present. +if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ + test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='print -r --' +elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='printf %s\n' +else + # Use this function as a fallback that always works. + func_fallback_echo () + { + eval 'cat <<_LTECHO_EOF +$[]1 +_LTECHO_EOF' + } + ECHO='func_fallback_echo' +fi + +# func_echo_all arg... +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "$*" +} + +case "$ECHO" in + printf*) AC_MSG_RESULT([printf]) ;; + print*) AC_MSG_RESULT([print -r]) ;; + *) AC_MSG_RESULT([cat]) ;; +esac + +m4_ifdef([_AS_DETECT_SUGGESTED], +[_AS_DETECT_SUGGESTED([ + test -n "${ZSH_VERSION+set}${BASH_VERSION+set}" || ( + ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' + ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO + ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + PATH=/empty FPATH=/empty; export PATH FPATH + test "X`printf %s $ECHO`" = "X$ECHO" \ + || test "X`print -r -- $ECHO`" = "X$ECHO" )])]) + +_LT_DECL([], [SHELL], [1], [Shell to use when invoking shell scripts]) +_LT_DECL([], [ECHO], [1], [An echo program that protects backslashes]) +])# _LT_PROG_ECHO_BACKSLASH + + +# _LT_WITH_SYSROOT +# ---------------- +AC_DEFUN([_LT_WITH_SYSROOT], +[AC_MSG_CHECKING([for sysroot]) +AC_ARG_WITH([sysroot], +[ --with-sysroot[=DIR] Search for dependent libraries within DIR + (or the compiler's sysroot if not specified).], +[], [with_sysroot=no]) + +dnl lt_sysroot will always be passed unquoted. We quote it here +dnl in case the user passed a directory name. +lt_sysroot= +case ${with_sysroot} in #( + yes) + if test "$GCC" = yes; then + lt_sysroot=`$CC --print-sysroot 2>/dev/null` + fi + ;; #( + /*) + lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"` + ;; #( + no|'') + ;; #( + *) + AC_MSG_RESULT([${with_sysroot}]) + AC_MSG_ERROR([The sysroot must be an absolute path.]) + ;; +esac + + AC_MSG_RESULT([${lt_sysroot:-no}]) +_LT_DECL([], [lt_sysroot], [0], [The root where to search for ]dnl +[dependent libraries, and in which our libraries should be installed.])]) + +# _LT_ENABLE_LOCK +# --------------- +m4_defun([_LT_ENABLE_LOCK], +[AC_ARG_ENABLE([libtool-lock], + [AS_HELP_STRING([--disable-libtool-lock], + [avoid locking (might break parallel builds)])]) +test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case $host in +ia64-*-hpux*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `/usr/bin/file conftest.$ac_objext` in + *ELF-32*) + HPUX_IA64_MODE="32" + ;; + *ELF-64*) + HPUX_IA64_MODE="64" + ;; + esac + fi + rm -rf conftest* + ;; +*-*-irix6*) + # Find out which ABI we are using. + echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + if test "$lt_cv_prog_gnu_ld" = yes; then + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -melf32bsmip" + ;; + *N32*) + LD="${LD-ld} -melf32bmipn32" + ;; + *64-bit*) + LD="${LD-ld} -melf64bmip" + ;; + esac + else + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + fi + rm -rf conftest* + ;; + +x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \ +s390*-*linux*|s390*-*tpf*|sparc*-*linux*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `/usr/bin/file conftest.o` in + *32-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_i386_fbsd" + ;; + x86_64-*linux*) + LD="${LD-ld} -m elf_i386" + ;; + ppc64-*linux*|powerpc64-*linux*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) + LD="${LD-ld} -m elf_s390" + ;; + sparc64-*linux*) + LD="${LD-ld} -m elf32_sparc" + ;; + esac + ;; + *64-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_x86_64_fbsd" + ;; + x86_64-*linux*) + LD="${LD-ld} -m elf_x86_64" + ;; + ppc*-*linux*|powerpc*-*linux*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) + LD="${LD-ld} -m elf64_s390" + ;; + sparc*-*linux*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -belf" + AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf, + [AC_LANG_PUSH(C) + AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no]) + AC_LANG_POP]) + if test x"$lt_cv_cc_needs_belf" != x"yes"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS="$SAVE_CFLAGS" + fi + ;; +*-*solaris*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `/usr/bin/file conftest.o` in + *64-bit*) + case $lt_cv_prog_gnu_ld in + yes*) + case $host in + i?86-*-solaris*) + LD="${LD-ld} -m elf_x86_64" + ;; + sparc*-*-solaris*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + # GNU ld 2.21 introduced _sol2 emulations. Use them if available. + if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then + LD="${LD-ld}_sol2" + fi + ;; + *) + if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then + LD="${LD-ld} -64" + fi + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; +esac + +need_locks="$enable_libtool_lock" +])# _LT_ENABLE_LOCK + + +# _LT_PROG_AR +# ----------- +m4_defun([_LT_PROG_AR], +[AC_CHECK_TOOLS(AR, [ar], false) +: ${AR=ar} +: ${AR_FLAGS=cru} +_LT_DECL([], [AR], [1], [The archiver]) +_LT_DECL([], [AR_FLAGS], [1], [Flags to create an archive]) + +AC_CACHE_CHECK([for archiver @FILE support], [lt_cv_ar_at_file], + [lt_cv_ar_at_file=no + AC_COMPILE_IFELSE([AC_LANG_PROGRAM], + [echo conftest.$ac_objext > conftest.lst + lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&AS_MESSAGE_LOG_FD' + AC_TRY_EVAL([lt_ar_try]) + if test "$ac_status" -eq 0; then + # Ensure the archiver fails upon bogus file names. + rm -f conftest.$ac_objext libconftest.a + AC_TRY_EVAL([lt_ar_try]) + if test "$ac_status" -ne 0; then + lt_cv_ar_at_file=@ + fi + fi + rm -f conftest.* libconftest.a + ]) + ]) + +if test "x$lt_cv_ar_at_file" = xno; then + archiver_list_spec= +else + archiver_list_spec=$lt_cv_ar_at_file +fi +_LT_DECL([], [archiver_list_spec], [1], + [How to feed a file listing to the archiver]) +])# _LT_PROG_AR + + +# _LT_CMD_OLD_ARCHIVE +# ------------------- +m4_defun([_LT_CMD_OLD_ARCHIVE], +[_LT_PROG_AR + +AC_CHECK_TOOL(STRIP, strip, :) +test -z "$STRIP" && STRIP=: +_LT_DECL([], [STRIP], [1], [A symbol stripping program]) + +AC_CHECK_TOOL(RANLIB, ranlib, :) +test -z "$RANLIB" && RANLIB=: +_LT_DECL([], [RANLIB], [1], + [Commands used to install an old-style archive]) + +# Determine commands to create old-style static archives. +old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' +old_postinstall_cmds='chmod 644 $oldlib' +old_postuninstall_cmds= + +if test -n "$RANLIB"; then + case $host_os in + openbsd*) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" + ;; + *) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" + ;; + esac + old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" +fi + +case $host_os in + darwin*) + lock_old_archive_extraction=yes ;; + *) + lock_old_archive_extraction=no ;; +esac +_LT_DECL([], [old_postinstall_cmds], [2]) +_LT_DECL([], [old_postuninstall_cmds], [2]) +_LT_TAGDECL([], [old_archive_cmds], [2], + [Commands used to build an old-style archive]) +_LT_DECL([], [lock_old_archive_extraction], [0], + [Whether to use a lock for old archive extraction]) +])# _LT_CMD_OLD_ARCHIVE + + +# _LT_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, +# [OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE]) +# ---------------------------------------------------------------- +# Check whether the given compiler option works +AC_DEFUN([_LT_COMPILER_OPTION], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_SED])dnl +AC_CACHE_CHECK([$1], [$2], + [$2=no + m4_if([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4]) + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$3" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&AS_MESSAGE_LOG_FD + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + $2=yes + fi + fi + $RM conftest* +]) + +if test x"[$]$2" = xyes; then + m4_if([$5], , :, [$5]) +else + m4_if([$6], , :, [$6]) +fi +])# _LT_COMPILER_OPTION + +# Old name: +AU_ALIAS([AC_LIBTOOL_COMPILER_OPTION], [_LT_COMPILER_OPTION]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION], []) + + +# _LT_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, +# [ACTION-SUCCESS], [ACTION-FAILURE]) +# ---------------------------------------------------- +# Check whether the given linker option works +AC_DEFUN([_LT_LINKER_OPTION], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_SED])dnl +AC_CACHE_CHECK([$1], [$2], + [$2=no + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS $3" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&AS_MESSAGE_LOG_FD + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + $2=yes + fi + else + $2=yes + fi + fi + $RM -r conftest* + LDFLAGS="$save_LDFLAGS" +]) + +if test x"[$]$2" = xyes; then + m4_if([$4], , :, [$4]) +else + m4_if([$5], , :, [$5]) +fi +])# _LT_LINKER_OPTION + +# Old name: +AU_ALIAS([AC_LIBTOOL_LINKER_OPTION], [_LT_LINKER_OPTION]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], []) + + +# LT_CMD_MAX_LEN +#--------------- +AC_DEFUN([LT_CMD_MAX_LEN], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +# find the maximum length of command line arguments +AC_MSG_CHECKING([the maximum length of command line arguments]) +AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl + i=0 + teststring="ABCD" + + case $build_os in + msdosdjgpp*) + # On DJGPP, this test can blow up pretty badly due to problems in libc + # (any single argument exceeding 2000 bytes causes a buffer overrun + # during glob expansion). Even if it were fixed, the result of this + # check would be larger than it should be. + lt_cv_sys_max_cmd_len=12288; # 12K is about right + ;; + + gnu*) + # Under GNU Hurd, this test is not required because there is + # no limit to the length of command line arguments. + # Libtool will interpret -1 as no limit whatsoever + lt_cv_sys_max_cmd_len=-1; + ;; + + cygwin* | mingw* | cegcc*) + # On Win9x/ME, this test blows up -- it succeeds, but takes + # about 5 minutes as the teststring grows exponentially. + # Worse, since 9x/ME are not pre-emptively multitasking, + # you end up with a "frozen" computer, even though with patience + # the test eventually succeeds (with a max line length of 256k). + # Instead, let's just punt: use the minimum linelength reported by + # all of the supported platforms: 8192 (on NT/2K/XP). + lt_cv_sys_max_cmd_len=8192; + ;; + + mint*) + # On MiNT this can take a long time and run out of memory. + lt_cv_sys_max_cmd_len=8192; + ;; + + amigaos*) + # On AmigaOS with pdksh, this test takes hours, literally. + # So we just punt and use a minimum line length of 8192. + lt_cv_sys_max_cmd_len=8192; + ;; + + netbsd* | freebsd* | openbsd* | darwin* | dragonfly*) + # This has been around since 386BSD, at least. Likely further. + if test -x /sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` + elif test -x /usr/sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` + else + lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs + fi + # And add a safety zone + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + ;; + + interix*) + # We know the value 262144 and hardcode it with a safety zone (like BSD) + lt_cv_sys_max_cmd_len=196608 + ;; + + os2*) + # The test takes a long time on OS/2. + lt_cv_sys_max_cmd_len=8192 + ;; + + osf*) + # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure + # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not + # nice to cause kernel panics so lets avoid the loop below. + # First set a reasonable default. + lt_cv_sys_max_cmd_len=16384 + # + if test -x /sbin/sysconfig; then + case `/sbin/sysconfig -q proc exec_disable_arg_limit` in + *1*) lt_cv_sys_max_cmd_len=-1 ;; + esac + fi + ;; + sco3.2v5*) + lt_cv_sys_max_cmd_len=102400 + ;; + sysv5* | sco5v6* | sysv4.2uw2*) + kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` + if test -n "$kargmax"; then + lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[[ ]]//'` + else + lt_cv_sys_max_cmd_len=32768 + fi + ;; + *) + lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` + if test -n "$lt_cv_sys_max_cmd_len"; then + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + else + # Make teststring a little bigger before we do anything with it. + # a 1K string should be a reasonable start. + for i in 1 2 3 4 5 6 7 8 ; do + teststring=$teststring$teststring + done + SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} + # If test is not a shell built-in, we'll probably end up computing a + # maximum length that is only half of the actual maximum length, but + # we can't tell. + while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \ + = "X$teststring$teststring"; } >/dev/null 2>&1 && + test $i != 17 # 1/2 MB should be enough + do + i=`expr $i + 1` + teststring=$teststring$teststring + done + # Only check the string length outside the loop. + lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` + teststring= + # Add a significant safety factor because C++ compilers can tack on + # massive amounts of additional arguments before passing them to the + # linker. It appears as though 1/2 is a usable value. + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` + fi + ;; + esac +]) +if test -n $lt_cv_sys_max_cmd_len ; then + AC_MSG_RESULT($lt_cv_sys_max_cmd_len) +else + AC_MSG_RESULT(none) +fi +max_cmd_len=$lt_cv_sys_max_cmd_len +_LT_DECL([], [max_cmd_len], [0], + [What is the maximum length of a command?]) +])# LT_CMD_MAX_LEN + +# Old name: +AU_ALIAS([AC_LIBTOOL_SYS_MAX_CMD_LEN], [LT_CMD_MAX_LEN]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN], []) + + +# _LT_HEADER_DLFCN +# ---------------- +m4_defun([_LT_HEADER_DLFCN], +[AC_CHECK_HEADERS([dlfcn.h], [], [], [AC_INCLUDES_DEFAULT])dnl +])# _LT_HEADER_DLFCN + + +# _LT_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE, +# ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING) +# ---------------------------------------------------------------- +m4_defun([_LT_TRY_DLOPEN_SELF], +[m4_require([_LT_HEADER_DLFCN])dnl +if test "$cross_compiling" = yes; then : + [$4] +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +[#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisbility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +}] +_LT_EOF + if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext} 2>/dev/null; then + (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) $1 ;; + x$lt_dlneed_uscore) $2 ;; + x$lt_dlunknown|x*) $3 ;; + esac + else : + # compilation failed + $3 + fi +fi +rm -fr conftest* +])# _LT_TRY_DLOPEN_SELF + + +# LT_SYS_DLOPEN_SELF +# ------------------ +AC_DEFUN([LT_SYS_DLOPEN_SELF], +[m4_require([_LT_HEADER_DLFCN])dnl +if test "x$enable_dlopen" != xyes; then + enable_dlopen=unknown + enable_dlopen_self=unknown + enable_dlopen_self_static=unknown +else + lt_cv_dlopen=no + lt_cv_dlopen_libs= + + case $host_os in + beos*) + lt_cv_dlopen="load_add_on" + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + + mingw* | pw32* | cegcc*) + lt_cv_dlopen="LoadLibrary" + lt_cv_dlopen_libs= + ;; + + cygwin*) + lt_cv_dlopen="dlopen" + lt_cv_dlopen_libs= + ;; + + darwin*) + # if libdl is installed we need to link against it + AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],[ + lt_cv_dlopen="dyld" + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ]) + ;; + + *) + AC_CHECK_FUNC([shl_load], + [lt_cv_dlopen="shl_load"], + [AC_CHECK_LIB([dld], [shl_load], + [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"], + [AC_CHECK_FUNC([dlopen], + [lt_cv_dlopen="dlopen"], + [AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"], + [AC_CHECK_LIB([svld], [dlopen], + [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"], + [AC_CHECK_LIB([dld], [dld_link], + [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"]) + ]) + ]) + ]) + ]) + ]) + ;; + esac + + if test "x$lt_cv_dlopen" != xno; then + enable_dlopen=yes + else + enable_dlopen=no + fi + + case $lt_cv_dlopen in + dlopen) + save_CPPFLAGS="$CPPFLAGS" + test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" + + save_LDFLAGS="$LDFLAGS" + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" + + save_LIBS="$LIBS" + LIBS="$lt_cv_dlopen_libs $LIBS" + + AC_CACHE_CHECK([whether a program can dlopen itself], + lt_cv_dlopen_self, [dnl + _LT_TRY_DLOPEN_SELF( + lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes, + lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross) + ]) + + if test "x$lt_cv_dlopen_self" = xyes; then + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" + AC_CACHE_CHECK([whether a statically linked program can dlopen itself], + lt_cv_dlopen_self_static, [dnl + _LT_TRY_DLOPEN_SELF( + lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes, + lt_cv_dlopen_self_static=no, lt_cv_dlopen_self_static=cross) + ]) + fi + + CPPFLAGS="$save_CPPFLAGS" + LDFLAGS="$save_LDFLAGS" + LIBS="$save_LIBS" + ;; + esac + + case $lt_cv_dlopen_self in + yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; + *) enable_dlopen_self=unknown ;; + esac + + case $lt_cv_dlopen_self_static in + yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; + *) enable_dlopen_self_static=unknown ;; + esac +fi +_LT_DECL([dlopen_support], [enable_dlopen], [0], + [Whether dlopen is supported]) +_LT_DECL([dlopen_self], [enable_dlopen_self], [0], + [Whether dlopen of programs is supported]) +_LT_DECL([dlopen_self_static], [enable_dlopen_self_static], [0], + [Whether dlopen of statically linked programs is supported]) +])# LT_SYS_DLOPEN_SELF + +# Old name: +AU_ALIAS([AC_LIBTOOL_DLOPEN_SELF], [LT_SYS_DLOPEN_SELF]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], []) + + +# _LT_COMPILER_C_O([TAGNAME]) +# --------------------------- +# Check to see if options -c and -o are simultaneously supported by compiler. +# This macro does not hard code the compiler like AC_PROG_CC_C_O. +m4_defun([_LT_COMPILER_C_O], +[m4_require([_LT_DECL_SED])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_TAG_COMPILER])dnl +AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext], + [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)], + [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&AS_MESSAGE_LOG_FD + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes + fi + fi + chmod u+w . 2>&AS_MESSAGE_LOG_FD + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* +]) +_LT_TAGDECL([compiler_c_o], [lt_cv_prog_compiler_c_o], [1], + [Does compiler simultaneously support -c and -o options?]) +])# _LT_COMPILER_C_O + + +# _LT_COMPILER_FILE_LOCKS([TAGNAME]) +# ---------------------------------- +# Check to see if we can do hard links to lock some files if needed +m4_defun([_LT_COMPILER_FILE_LOCKS], +[m4_require([_LT_ENABLE_LOCK])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +_LT_COMPILER_C_O([$1]) + +hard_links="nottested" +if test "$_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)" = no && test "$need_locks" != no; then + # do not overwrite the value of need_locks provided by the user + AC_MSG_CHECKING([if we can lock with hard links]) + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + AC_MSG_RESULT([$hard_links]) + if test "$hard_links" = no; then + AC_MSG_WARN([`$CC' does not support `-c -o', so `make -j' may be unsafe]) + need_locks=warn + fi +else + need_locks=no +fi +_LT_DECL([], [need_locks], [1], [Must we lock files when doing compilation?]) +])# _LT_COMPILER_FILE_LOCKS + + +# _LT_CHECK_OBJDIR +# ---------------- +m4_defun([_LT_CHECK_OBJDIR], +[AC_CACHE_CHECK([for objdir], [lt_cv_objdir], +[rm -f .libs 2>/dev/null +mkdir .libs 2>/dev/null +if test -d .libs; then + lt_cv_objdir=.libs +else + # MS-DOS does not allow filenames that begin with a dot. + lt_cv_objdir=_libs +fi +rmdir .libs 2>/dev/null]) +objdir=$lt_cv_objdir +_LT_DECL([], [objdir], [0], + [The name of the directory that contains temporary libtool files])dnl +m4_pattern_allow([LT_OBJDIR])dnl +AC_DEFINE_UNQUOTED(LT_OBJDIR, "$lt_cv_objdir/", + [Define to the sub-directory in which libtool stores uninstalled libraries.]) +])# _LT_CHECK_OBJDIR + + +# _LT_LINKER_HARDCODE_LIBPATH([TAGNAME]) +# -------------------------------------- +# Check hardcoding attributes. +m4_defun([_LT_LINKER_HARDCODE_LIBPATH], +[AC_MSG_CHECKING([how to hardcode library paths into programs]) +_LT_TAGVAR(hardcode_action, $1)= +if test -n "$_LT_TAGVAR(hardcode_libdir_flag_spec, $1)" || + test -n "$_LT_TAGVAR(runpath_var, $1)" || + test "X$_LT_TAGVAR(hardcode_automatic, $1)" = "Xyes" ; then + + # We can hardcode non-existent directories. + if test "$_LT_TAGVAR(hardcode_direct, $1)" != no && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test "$_LT_TAGVAR(hardcode_shlibpath_var, $1)" != no && + test "$_LT_TAGVAR(hardcode_minus_L, $1)" != no; then + # Linking always hardcodes the temporary library directory. + _LT_TAGVAR(hardcode_action, $1)=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + _LT_TAGVAR(hardcode_action, $1)=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + _LT_TAGVAR(hardcode_action, $1)=unsupported +fi +AC_MSG_RESULT([$_LT_TAGVAR(hardcode_action, $1)]) + +if test "$_LT_TAGVAR(hardcode_action, $1)" = relink || + test "$_LT_TAGVAR(inherit_rpath, $1)" = yes; then + # Fast installation is not supported + enable_fast_install=no +elif test "$shlibpath_overrides_runpath" = yes || + test "$enable_shared" = no; then + # Fast installation is not necessary + enable_fast_install=needless +fi +_LT_TAGDECL([], [hardcode_action], [0], + [How to hardcode a shared library path into an executable]) +])# _LT_LINKER_HARDCODE_LIBPATH + + +# _LT_CMD_STRIPLIB +# ---------------- +m4_defun([_LT_CMD_STRIPLIB], +[m4_require([_LT_DECL_EGREP]) +striplib= +old_striplib= +AC_MSG_CHECKING([whether stripping libraries is possible]) +if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then + test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" + test -z "$striplib" && striplib="$STRIP --strip-unneeded" + AC_MSG_RESULT([yes]) +else +# FIXME - insert some real tests, host_os isn't really good enough + case $host_os in + darwin*) + if test -n "$STRIP" ; then + striplib="$STRIP -x" + old_striplib="$STRIP -S" + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + ;; + *) + AC_MSG_RESULT([no]) + ;; + esac +fi +_LT_DECL([], [old_striplib], [1], [Commands to strip libraries]) +_LT_DECL([], [striplib], [1]) +])# _LT_CMD_STRIPLIB + + +# _LT_SYS_DYNAMIC_LINKER([TAG]) +# ----------------------------- +# PORTME Fill in your ld.so characteristics +m4_defun([_LT_SYS_DYNAMIC_LINKER], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_OBJDUMP])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_CHECK_SHELL_FEATURES])dnl +AC_MSG_CHECKING([dynamic linker characteristics]) +m4_if([$1], + [], [ +if test "$GCC" = yes; then + case $host_os in + darwin*) lt_awk_arg="/^libraries:/,/LR/" ;; + *) lt_awk_arg="/^libraries:/" ;; + esac + case $host_os in + mingw* | cegcc*) lt_sed_strip_eq="s,=\([[A-Za-z]]:\),\1,g" ;; + *) lt_sed_strip_eq="s,=/,/,g" ;; + esac + lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` + case $lt_search_path_spec in + *\;*) + # if the path contains ";" then we assume it to be the separator + # otherwise default to the standard path separator (i.e. ":") - it is + # assumed that no part of a normal pathname contains ";" but that should + # okay in the real world where ";" in dirpaths is itself problematic. + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` + ;; + *) + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` + ;; + esac + # Ok, now we have the path, separated by spaces, we can step through it + # and add multilib dir if necessary. + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path/$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" + else + test -d "$lt_sys_path" && \ + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" + fi + done + lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' +BEGIN {RS=" "; FS="/|\n";} { + lt_foo=""; + lt_count=0; + for (lt_i = NF; lt_i > 0; lt_i--) { + if ($lt_i != "" && $lt_i != ".") { + if ($lt_i == "..") { + lt_count++; + } else { + if (lt_count == 0) { + lt_foo="/" $lt_i lt_foo; + } else { + lt_count--; + } + } + } + } + if (lt_foo != "") { lt_freq[[lt_foo]]++; } + if (lt_freq[[lt_foo]] == 1) { print lt_foo; } +}'` + # AWK program above erroneously prepends '/' to C:/dos/paths + # for these hosts. + case $host_os in + mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ + $SED 's,/\([[A-Za-z]]:\),\1,g'` ;; + esac + sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` +else + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" +fi]) +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=".so" +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + +case $host_os in +aix3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='${libname}${release}${shared_ext}$major' + ;; + +aix[[4-9]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test "$host_cpu" = ia64; then + # AIX 5 supports IA64 + library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line `#! .'. This would cause the generated library to + # depend on `.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[[01]] | aix4.[[01]].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # AIX (on Power*) has no versioning support, so currently we can not hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + if test "$aix_use_runtimelinking" = yes; then + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + else + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='${libname}${release}.a $libname.a' + soname_spec='${libname}${release}${shared_ext}$major' + fi + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='${libname}${shared_ext}' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[[45]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=".dll" + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \${file}`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' +m4_if([$1], [],[ + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"]) + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl*) + # Native MSVC + libname_spec='$name' + soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' + library_names_spec='${libname}.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec="$LIB" + if $ECHO "$sys_lib_search_path_spec" | [$GREP ';[c-zC-Z]:/' >/dev/null]; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \${file}`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC wrapper + library_names_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext} $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext' + soname_spec='${libname}${release}${major}$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' +m4_if([$1], [],[ + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"]) + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd* | dragonfly*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[[23]].*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2.*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[[01]]* | freebsdelf3.[[01]]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \ + freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +gnu*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +haiku*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=yes + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + if test "X$HPUX_IA64_MODE" = X32; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + fi + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[[3-9]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test "$lt_cv_prog_gnu_ld" = yes; then + version_type=linux # correct to gnu/linux during the next big refactor + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}${shared_ext}$major' + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" + sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + AC_CACHE_VAL([lt_cv_shlibpath_overrides_runpath], + [lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$_LT_TAGVAR(lt_prog_compiler_wl, $1)\"; \ + LDFLAGS=\"\$LDFLAGS $_LT_TAGVAR(hardcode_libdir_flag_spec, $1)\"" + AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], + [AS_IF([ ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null], + [lt_cv_shlibpath_overrides_runpath=yes])]) + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + ]) + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Append ld.so.conf contents to the search path + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd*) + version_type=sunos + sys_lib_dlsearch_path_spec="/usr/lib" + need_lib_prefix=no + # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. + case $host_os in + openbsd3.3 | openbsd3.3.*) need_version=yes ;; + *) need_version=no ;; + esac + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + case $host_os in + openbsd2.[[89]] | openbsd2.[[89]].*) + shlibpath_overrides_runpath=no + ;; + *) + shlibpath_overrides_runpath=yes + ;; + esac + else + shlibpath_overrides_runpath=yes + fi + ;; + +os2*) + libname_spec='$name' + shrext_cmds=".dll" + need_lib_prefix=no + library_names_spec='$libname${shared_ext} $libname.a' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=LIBPATH + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}${shared_ext}$major' + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test "$with_gnu_ld" = yes; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec ;then + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' + soname_spec='$libname${shared_ext}.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=freebsd-elf + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test "$with_gnu_ld" = yes; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +AC_MSG_RESULT([$dynamic_linker]) +test "$dynamic_linker" = no && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test "$GCC" = yes; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then + sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec" +fi +if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then + sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec" +fi + +_LT_DECL([], [variables_saved_for_relink], [1], + [Variables whose values should be saved in libtool wrapper scripts and + restored at link time]) +_LT_DECL([], [need_lib_prefix], [0], + [Do we need the "lib" prefix for modules?]) +_LT_DECL([], [need_version], [0], [Do we need a version for libraries?]) +_LT_DECL([], [version_type], [0], [Library versioning type]) +_LT_DECL([], [runpath_var], [0], [Shared library runtime path variable]) +_LT_DECL([], [shlibpath_var], [0],[Shared library path variable]) +_LT_DECL([], [shlibpath_overrides_runpath], [0], + [Is shlibpath searched before the hard-coded library search path?]) +_LT_DECL([], [libname_spec], [1], [Format of library name prefix]) +_LT_DECL([], [library_names_spec], [1], + [[List of archive names. First name is the real one, the rest are links. + The last name is the one that the linker finds with -lNAME]]) +_LT_DECL([], [soname_spec], [1], + [[The coded name of the library, if different from the real name]]) +_LT_DECL([], [install_override_mode], [1], + [Permission mode override for installation of shared libraries]) +_LT_DECL([], [postinstall_cmds], [2], + [Command to use after installation of a shared archive]) +_LT_DECL([], [postuninstall_cmds], [2], + [Command to use after uninstallation of a shared archive]) +_LT_DECL([], [finish_cmds], [2], + [Commands used to finish a libtool library installation in a directory]) +_LT_DECL([], [finish_eval], [1], + [[As "finish_cmds", except a single script fragment to be evaled but + not shown]]) +_LT_DECL([], [hardcode_into_libs], [0], + [Whether we should hardcode library paths into libraries]) +_LT_DECL([], [sys_lib_search_path_spec], [2], + [Compile-time system search path for libraries]) +_LT_DECL([], [sys_lib_dlsearch_path_spec], [2], + [Run-time system search path for libraries]) +])# _LT_SYS_DYNAMIC_LINKER + + +# _LT_PATH_TOOL_PREFIX(TOOL) +# -------------------------- +# find a file program which can recognize shared library +AC_DEFUN([_LT_PATH_TOOL_PREFIX], +[m4_require([_LT_DECL_EGREP])dnl +AC_MSG_CHECKING([for $1]) +AC_CACHE_VAL(lt_cv_path_MAGIC_CMD, +[case $MAGIC_CMD in +[[\\/*] | ?:[\\/]*]) + lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD="$MAGIC_CMD" + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR +dnl $ac_dummy forces splitting on constant user-supplied paths. +dnl POSIX.2 word splitting is done only on the output of word expansions, +dnl not every word. This closes a longstanding sh security hole. + ac_dummy="m4_if([$2], , $PATH, [$2])" + for ac_dir in $ac_dummy; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$1; then + lt_cv_path_MAGIC_CMD="$ac_dir/$1" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD="$lt_cv_path_MAGIC_CMD" + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS="$lt_save_ifs" + MAGIC_CMD="$lt_save_MAGIC_CMD" + ;; +esac]) +MAGIC_CMD="$lt_cv_path_MAGIC_CMD" +if test -n "$MAGIC_CMD"; then + AC_MSG_RESULT($MAGIC_CMD) +else + AC_MSG_RESULT(no) +fi +_LT_DECL([], [MAGIC_CMD], [0], + [Used to examine libraries when file_magic_cmd begins with "file"])dnl +])# _LT_PATH_TOOL_PREFIX + +# Old name: +AU_ALIAS([AC_PATH_TOOL_PREFIX], [_LT_PATH_TOOL_PREFIX]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_PATH_TOOL_PREFIX], []) + + +# _LT_PATH_MAGIC +# -------------- +# find a file program which can recognize a shared library +m4_defun([_LT_PATH_MAGIC], +[_LT_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH) +if test -z "$lt_cv_path_MAGIC_CMD"; then + if test -n "$ac_tool_prefix"; then + _LT_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH) + else + MAGIC_CMD=: + fi +fi +])# _LT_PATH_MAGIC + + +# LT_PATH_LD +# ---------- +# find the pathname to the GNU or non-GNU linker +AC_DEFUN([LT_PATH_LD], +[AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_PROG_ECHO_BACKSLASH])dnl + +AC_ARG_WITH([gnu-ld], + [AS_HELP_STRING([--with-gnu-ld], + [assume the C compiler uses GNU ld @<:@default=no@:>@])], + [test "$withval" = no || with_gnu_ld=yes], + [with_gnu_ld=no])dnl + +ac_prog=ld +if test "$GCC" = yes; then + # Check if gcc -print-prog-name=ld gives a path. + AC_MSG_CHECKING([for ld used by $CC]) + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [[\\/]]* | ?:[[\\/]]*) + re_direlt='/[[^/]][[^/]]*/\.\./' + # Canonicalize the pathname of ld + ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` + while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do + ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` + done + test -z "$LD" && LD="$ac_prog" + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test "$with_gnu_ld" = yes; then + AC_MSG_CHECKING([for GNU ld]) +else + AC_MSG_CHECKING([for non-GNU ld]) +fi +AC_CACHE_VAL(lt_cv_path_LD, +[if test -z "$LD"; then + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD="$ac_dir/$ac_prog" + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some variants of GNU ld only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$lt_cv_path_LD" -v 2>&1 &1 /dev/null 2>&1; then + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + else + # Keep this pattern in sync with the one in func_win32_libid. + lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' + lt_cv_file_magic_cmd='$OBJDUMP -f' + fi + ;; + +cegcc*) + # use the weaker test based on 'objdump'. See mingw*. + lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' + lt_cv_file_magic_cmd='$OBJDUMP -f' + ;; + +darwin* | rhapsody*) + lt_cv_deplibs_check_method=pass_all + ;; + +freebsd* | dragonfly*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + case $host_cpu in + i*86 ) + # Not sure whether the presence of OpenBSD here was a mistake. + # Let's accept both of them until this is cleared up. + lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + ;; + esac + else + lt_cv_deplibs_check_method=pass_all + fi + ;; + +gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +haiku*) + lt_cv_deplibs_check_method=pass_all + ;; + +hpux10.20* | hpux11*) + lt_cv_file_magic_cmd=/usr/bin/file + case $host_cpu in + ia64*) + lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64' + lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so + ;; + hppa*64*) + [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]'] + lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl + ;; + *) + lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]]\.[[0-9]]) shared library' + lt_cv_file_magic_test_file=/usr/lib/libc.sl + ;; + esac + ;; + +interix[[3-9]]*) + # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$' + ;; + +irix5* | irix6* | nonstopux*) + case $LD in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu) + lt_cv_deplibs_check_method=pass_all + ;; + +netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$' + fi + ;; + +newos6*) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=/usr/lib/libnls.so + ;; + +*nto* | *qnx*) + lt_cv_deplibs_check_method=pass_all + ;; + +openbsd*) + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' + fi + ;; + +osf3* | osf4* | osf5*) + lt_cv_deplibs_check_method=pass_all + ;; + +rdos*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv4 | sysv4.3*) + case $host_vendor in + motorola) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]' + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + sequent) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )' + ;; + sni) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib" + lt_cv_file_magic_test_file=/lib/libc.so + ;; + siemens) + lt_cv_deplibs_check_method=pass_all + ;; + pc) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; + +tpf*) + lt_cv_deplibs_check_method=pass_all + ;; +esac +]) + +file_magic_glob= +want_nocaseglob=no +if test "$build" = "$host"; then + case $host_os in + mingw* | pw32*) + if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then + want_nocaseglob=yes + else + file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[[\1]]\/[[\1]]\/g;/g"` + fi + ;; + esac +fi + +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method +test -z "$deplibs_check_method" && deplibs_check_method=unknown + +_LT_DECL([], [deplibs_check_method], [1], + [Method to check whether dependent libraries are shared objects]) +_LT_DECL([], [file_magic_cmd], [1], + [Command to use when deplibs_check_method = "file_magic"]) +_LT_DECL([], [file_magic_glob], [1], + [How to find potential files when deplibs_check_method = "file_magic"]) +_LT_DECL([], [want_nocaseglob], [1], + [Find potential files using nocaseglob when deplibs_check_method = "file_magic"]) +])# _LT_CHECK_MAGIC_METHOD + + +# LT_PATH_NM +# ---------- +# find the pathname to a BSD- or MS-compatible name lister +AC_DEFUN([LT_PATH_NM], +[AC_REQUIRE([AC_PROG_CC])dnl +AC_CACHE_CHECK([for BSD- or MS-compatible name lister (nm)], lt_cv_path_NM, +[if test -n "$NM"; then + # Let the user override the test. + lt_cv_path_NM="$NM" +else + lt_nm_to_check="${ac_tool_prefix}nm" + if test -n "$ac_tool_prefix" && test "$build" = "$host"; then + lt_nm_to_check="$lt_nm_to_check nm" + fi + for lt_tmp_nm in $lt_nm_to_check; do + lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do + IFS="$lt_save_ifs" + test -z "$ac_dir" && ac_dir=. + tmp_nm="$ac_dir/$lt_tmp_nm" + if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the `sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + # Tru64's nm complains that /dev/null is an invalid object file + case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in + */dev/null* | *'Invalid file or object type'*) + lt_cv_path_NM="$tmp_nm -B" + break + ;; + *) + case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in + */dev/null*) + lt_cv_path_NM="$tmp_nm -p" + break + ;; + *) + lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + ;; + esac + ;; + esac + fi + done + IFS="$lt_save_ifs" + done + : ${lt_cv_path_NM=no} +fi]) +if test "$lt_cv_path_NM" != "no"; then + NM="$lt_cv_path_NM" +else + # Didn't find any BSD compatible name lister, look for dumpbin. + if test -n "$DUMPBIN"; then : + # Let the user override the test. + else + AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :) + case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in + *COFF*) + DUMPBIN="$DUMPBIN -symbols" + ;; + *) + DUMPBIN=: + ;; + esac + fi + AC_SUBST([DUMPBIN]) + if test "$DUMPBIN" != ":"; then + NM="$DUMPBIN" + fi +fi +test -z "$NM" && NM=nm +AC_SUBST([NM]) +_LT_DECL([], [NM], [1], [A BSD- or MS-compatible name lister])dnl + +AC_CACHE_CHECK([the name lister ($NM) interface], [lt_cv_nm_interface], + [lt_cv_nm_interface="BSD nm" + echo "int some_variable = 0;" > conftest.$ac_ext + (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$ac_compile" 2>conftest.err) + cat conftest.err >&AS_MESSAGE_LOG_FD + (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&AS_MESSAGE_LOG_FD) + (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) + cat conftest.err >&AS_MESSAGE_LOG_FD + (eval echo "\"\$as_me:$LINENO: output\"" >&AS_MESSAGE_LOG_FD) + cat conftest.out >&AS_MESSAGE_LOG_FD + if $GREP 'External.*some_variable' conftest.out > /dev/null; then + lt_cv_nm_interface="MS dumpbin" + fi + rm -f conftest*]) +])# LT_PATH_NM + +# Old names: +AU_ALIAS([AM_PROG_NM], [LT_PATH_NM]) +AU_ALIAS([AC_PROG_NM], [LT_PATH_NM]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_PROG_NM], []) +dnl AC_DEFUN([AC_PROG_NM], []) + +# _LT_CHECK_SHAREDLIB_FROM_LINKLIB +# -------------------------------- +# how to determine the name of the shared library +# associated with a specific link library. +# -- PORTME fill in with the dynamic library characteristics +m4_defun([_LT_CHECK_SHAREDLIB_FROM_LINKLIB], +[m4_require([_LT_DECL_EGREP]) +m4_require([_LT_DECL_OBJDUMP]) +m4_require([_LT_DECL_DLLTOOL]) +AC_CACHE_CHECK([how to associate runtime and link libraries], +lt_cv_sharedlib_from_linklib_cmd, +[lt_cv_sharedlib_from_linklib_cmd='unknown' + +case $host_os in +cygwin* | mingw* | pw32* | cegcc*) + # two different shell functions defined in ltmain.sh + # decide which to use based on capabilities of $DLLTOOL + case `$DLLTOOL --help 2>&1` in + *--identify-strict*) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib + ;; + *) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback + ;; + esac + ;; +*) + # fallback: assume linklib IS sharedlib + lt_cv_sharedlib_from_linklib_cmd="$ECHO" + ;; +esac +]) +sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd +test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO + +_LT_DECL([], [sharedlib_from_linklib_cmd], [1], + [Command to associate shared and link libraries]) +])# _LT_CHECK_SHAREDLIB_FROM_LINKLIB + + +# _LT_PATH_MANIFEST_TOOL +# ---------------------- +# locate the manifest tool +m4_defun([_LT_PATH_MANIFEST_TOOL], +[AC_CHECK_TOOL(MANIFEST_TOOL, mt, :) +test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt +AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_mainfest_tool], + [lt_cv_path_mainfest_tool=no + echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&AS_MESSAGE_LOG_FD + $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out + cat conftest.err >&AS_MESSAGE_LOG_FD + if $GREP 'Manifest Tool' conftest.out > /dev/null; then + lt_cv_path_mainfest_tool=yes + fi + rm -f conftest*]) +if test "x$lt_cv_path_mainfest_tool" != xyes; then + MANIFEST_TOOL=: +fi +_LT_DECL([], [MANIFEST_TOOL], [1], [Manifest tool])dnl +])# _LT_PATH_MANIFEST_TOOL + + +# LT_LIB_M +# -------- +# check for math library +AC_DEFUN([LT_LIB_M], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +LIBM= +case $host in +*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*) + # These system don't have libm, or don't need it + ;; +*-ncr-sysv4.3*) + AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw") + AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm") + ;; +*) + AC_CHECK_LIB(m, cos, LIBM="-lm") + ;; +esac +AC_SUBST([LIBM]) +])# LT_LIB_M + +# Old name: +AU_ALIAS([AC_CHECK_LIBM], [LT_LIB_M]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_CHECK_LIBM], []) + + +# _LT_COMPILER_NO_RTTI([TAGNAME]) +# ------------------------------- +m4_defun([_LT_COMPILER_NO_RTTI], +[m4_require([_LT_TAG_COMPILER])dnl + +_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= + +if test "$GCC" = yes; then + case $cc_basename in + nvcc*) + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -Xcompiler -fno-builtin' ;; + *) + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' ;; + esac + + _LT_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions], + lt_cv_prog_compiler_rtti_exceptions, + [-fno-rtti -fno-exceptions], [], + [_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"]) +fi +_LT_TAGDECL([no_builtin_flag], [lt_prog_compiler_no_builtin_flag], [1], + [Compiler flag to turn off builtin functions]) +])# _LT_COMPILER_NO_RTTI + + +# _LT_CMD_GLOBAL_SYMBOLS +# ---------------------- +m4_defun([_LT_CMD_GLOBAL_SYMBOLS], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([LT_PATH_NM])dnl +AC_REQUIRE([LT_PATH_LD])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_TAG_COMPILER])dnl + +# Check for command to grab the raw symbol name followed by C symbol from nm. +AC_MSG_CHECKING([command to parse $NM output from $compiler object]) +AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], +[ +# These are sane defaults that work on at least a few old systems. +# [They come from Ultrix. What could be older than Ultrix?!! ;)] + +# Character class describing NM global symbol codes. +symcode='[[BCDEGRST]]' + +# Regexp to match symbols that can be accessed directly from C. +sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)' + +# Define system-specific variables. +case $host_os in +aix*) + symcode='[[BCDT]]' + ;; +cygwin* | mingw* | pw32* | cegcc*) + symcode='[[ABCDGISTW]]' + ;; +hpux*) + if test "$host_cpu" = ia64; then + symcode='[[ABCDEGRST]]' + fi + ;; +irix* | nonstopux*) + symcode='[[BCDEGRST]]' + ;; +osf*) + symcode='[[BCDEGQRST]]' + ;; +solaris*) + symcode='[[BDRT]]' + ;; +sco3.2v5*) + symcode='[[DT]]' + ;; +sysv4.2uw2*) + symcode='[[DT]]' + ;; +sysv5* | sco5v6* | unixware* | OpenUNIX*) + symcode='[[ABDT]]' + ;; +sysv4) + symcode='[[DFNSTU]]' + ;; +esac + +# If we're using GNU nm, then use its standard symbol codes. +case `$NM -V 2>&1` in +*GNU* | *'with BFD'*) + symcode='[[ABCDGIRSTW]]' ;; +esac + +# Transform an extracted symbol line into a proper C declaration. +# Some systems (esp. on ia64) link data and code symbols differently, +# so use this general approach. +lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" + +# Transform an extracted symbol line into symbol name and symbol address +lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"\2\", (void *) \&\2},/p'" +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \(lib[[^ ]]*\)$/ {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"lib\2\", (void *) \&\2},/p'" + +# Handle CRLF in mingw tool chain +opt_cr= +case $build_os in +mingw*) + opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp + ;; +esac + +# Try without a prefix underscore, then with it. +for ac_symprfx in "" "_"; do + + # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. + symxfrm="\\1 $ac_symprfx\\2 \\2" + + # Write the raw and C identifiers. + if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Fake it for dumpbin and say T for any non-static function + # and D for any global variable. + # Also find C++ and __fastcall symbols from MSVC++, + # which start with @ or ?. + lt_cv_sys_global_symbol_pipe="$AWK ['"\ +" {last_section=section; section=\$ 3};"\ +" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ +" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ +" \$ 0!~/External *\|/{next};"\ +" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ +" {if(hide[section]) next};"\ +" {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\ +" {split(\$ 0, a, /\||\r/); split(a[2], s)};"\ +" s[1]~/^[@?]/{print s[1], s[1]; next};"\ +" s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\ +" ' prfx=^$ac_symprfx]" + else + lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" + fi + lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'" + + # Check to see that the pipe works correctly. + pipe_works=no + + rm -f conftest* + cat > conftest.$ac_ext <<_LT_EOF +#ifdef __cplusplus +extern "C" { +#endif +char nm_test_var; +void nm_test_func(void); +void nm_test_func(void){} +#ifdef __cplusplus +} +#endif +int main(){nm_test_var='a';nm_test_func();return(0);} +_LT_EOF + + if AC_TRY_EVAL(ac_compile); then + # Now try to grab the symbols. + nlist=conftest.nm + if AC_TRY_EVAL(NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) && test -s "$nlist"; then + # Try sorting and uniquifying the output. + if sort "$nlist" | uniq > "$nlist"T; then + mv -f "$nlist"T "$nlist" + else + rm -f "$nlist"T + fi + + # Make sure that we snagged all the symbols we need. + if $GREP ' nm_test_var$' "$nlist" >/dev/null; then + if $GREP ' nm_test_func$' "$nlist" >/dev/null; then + cat <<_LT_EOF > conftest.$ac_ext +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE) +/* DATA imports from DLLs on WIN32 con't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT@&t@_DLSYM_CONST +#elif defined(__osf__) +/* This system does not cope well with relocations in const data. */ +# define LT@&t@_DLSYM_CONST +#else +# define LT@&t@_DLSYM_CONST const +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +_LT_EOF + # Now generate the symbol file. + eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' + + cat <<_LT_EOF >> conftest.$ac_ext + +/* The mapping between symbol names and symbols. */ +LT@&t@_DLSYM_CONST struct { + const char *name; + void *address; +} +lt__PROGRAM__LTX_preloaded_symbols[[]] = +{ + { "@PROGRAM@", (void *) 0 }, +_LT_EOF + $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext + cat <<\_LT_EOF >> conftest.$ac_ext + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt__PROGRAM__LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif +_LT_EOF + # Now try linking the two files. + mv conftest.$ac_objext conftstm.$ac_objext + lt_globsym_save_LIBS=$LIBS + lt_globsym_save_CFLAGS=$CFLAGS + LIBS="conftstm.$ac_objext" + CFLAGS="$CFLAGS$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)" + if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext}; then + pipe_works=yes + fi + LIBS=$lt_globsym_save_LIBS + CFLAGS=$lt_globsym_save_CFLAGS + else + echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD + fi + else + echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD + fi + else + echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD + fi + else + echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD + cat conftest.$ac_ext >&5 + fi + rm -rf conftest* conftst* + + # Do not use the global_symbol_pipe unless it works. + if test "$pipe_works" = yes; then + break + else + lt_cv_sys_global_symbol_pipe= + fi +done +]) +if test -z "$lt_cv_sys_global_symbol_pipe"; then + lt_cv_sys_global_symbol_to_cdecl= +fi +if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then + AC_MSG_RESULT(failed) +else + AC_MSG_RESULT(ok) +fi + +# Response file support. +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + nm_file_list_spec='@' +elif $NM --help 2>/dev/null | grep '[[@]]FILE' >/dev/null; then + nm_file_list_spec='@' +fi + +_LT_DECL([global_symbol_pipe], [lt_cv_sys_global_symbol_pipe], [1], + [Take the output of nm and produce a listing of raw symbols and C names]) +_LT_DECL([global_symbol_to_cdecl], [lt_cv_sys_global_symbol_to_cdecl], [1], + [Transform the output of nm in a proper C declaration]) +_LT_DECL([global_symbol_to_c_name_address], + [lt_cv_sys_global_symbol_to_c_name_address], [1], + [Transform the output of nm in a C name address pair]) +_LT_DECL([global_symbol_to_c_name_address_lib_prefix], + [lt_cv_sys_global_symbol_to_c_name_address_lib_prefix], [1], + [Transform the output of nm in a C name address pair when lib prefix is needed]) +_LT_DECL([], [nm_file_list_spec], [1], + [Specify filename containing input files for $NM]) +]) # _LT_CMD_GLOBAL_SYMBOLS + + +# _LT_COMPILER_PIC([TAGNAME]) +# --------------------------- +m4_defun([_LT_COMPILER_PIC], +[m4_require([_LT_TAG_COMPILER])dnl +_LT_TAGVAR(lt_prog_compiler_wl, $1)= +_LT_TAGVAR(lt_prog_compiler_pic, $1)= +_LT_TAGVAR(lt_prog_compiler_static, $1)= + +m4_if([$1], [CXX], [ + # C++ specific cases for pic, static, wl, etc. + if test "$GXX" = yes; then + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the `-m68020' flag to GCC prevents building anything better, + # like `-m68040'. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + ;; + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + ;; + *djgpp*) + # DJGPP does not support shared libraries at all + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + ;; + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + _LT_TAGVAR(lt_prog_compiler_static, $1)= + ;; + interix[[3-9]]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic + fi + ;; + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + else + case $host_os in + aix[[4-9]]*) + # All AIX code is PIC. + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + else + _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' + fi + ;; + chorus*) + case $cc_basename in + cxch68*) + # Green Hills C++ Compiler + # _LT_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" + ;; + esac + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + ;; + dgux*) + case $cc_basename in + ec++*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + ;; + ghcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + *) + ;; + esac + ;; + freebsd* | dragonfly*) + # FreeBSD uses GNU C++ + ;; + hpux9* | hpux10* | hpux11*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' + if test "$host_cpu" != ia64; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + fi + ;; + aCC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + ;; + esac + ;; + *) + ;; + esac + ;; + interix*) + # This is c89, which is MS Visual C++ (no shared libs) + # Anyone wants to do a port? + ;; + irix5* | irix6* | nonstopux*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + # CC pic flag -KPIC is the default. + ;; + *) + ;; + esac + ;; + linux* | k*bsd*-gnu | kopensolaris*-gnu) + case $cc_basename in + KCC*) + # KAI C++ Compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + ecpc* ) + # old Intel C++ for x86_64 which still supported -KPIC. + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + icpc* ) + # Intel C++, used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + cxx*) + # Compaq C++ + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + xlc* | xlC* | bgxl[[cC]]* | mpixl[[cC]]*) + # IBM XL 8.0, 9.0 on PPC and BlueGene + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + esac + ;; + esac + ;; + lynxos*) + ;; + m88k*) + ;; + mvs*) + case $cc_basename in + cxx*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall' + ;; + *) + ;; + esac + ;; + netbsd*) + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' + ;; + RCC*) + # Rational C++ 2.4.1 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + cxx*) + # Digital/Compaq C++ + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + *) + ;; + esac + ;; + psos*) + ;; + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + gcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + ;; + *) + ;; + esac + ;; + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + lcc*) + # Lucid + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + *) + ;; + esac + ;; + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + esac + ;; + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + ;; + *) + ;; + esac + ;; + vxworks*) + ;; + *) + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + esac + fi +], +[ + if test "$GCC" = yes; then + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the `-m68020' flag to GCC prevents building anything better, + # like `-m68040'. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + ;; + + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + _LT_TAGVAR(lt_prog_compiler_static, $1)= + ;; + + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + ;; + + interix[[3-9]]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + + msdosdjgpp*) + # Just because we use GCC doesn't mean we suddenly get shared libraries + # on systems that don't support them. + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + enable_shared=no + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic + fi + ;; + + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + + case $cc_basename in + nvcc*) # Cuda Compiler Driver 2.2 + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker ' + if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)" + fi + ;; + esac + else + # PORTME Check for flag to pass linker flags through the system compiler. + case $host_os in + aix*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + else + _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + ;; + + hpux9* | hpux10* | hpux11*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but + # not for PA HP-UX. + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + ;; + esac + # Is there a better lt_prog_compiler_static that works with the bundled CC? + _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' + ;; + + irix5* | irix6* | nonstopux*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # PIC (with -KPIC) is the default. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu) + case $cc_basename in + # old Intel for x86_64 which still supported -KPIC. + ecc*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + # icc used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + icc* | ifort*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + # Lahey Fortran 8.1. + lf95*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='--shared' + _LT_TAGVAR(lt_prog_compiler_static, $1)='--static' + ;; + nagfor*) + # NAG Fortran compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group compilers (*not* the Pentium gcc compiler, + # which looks to be a dead project) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + ccc*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # All Alpha code is PIC. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + xl* | bgxl* | bgf* | mpixl*) + # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='' + ;; + *Sun\ F* | *Sun*Fortran*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + *Sun\ C*) + # Sun C 5.9 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + ;; + *Intel*\ [[CF]]*Compiler*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + *Portland\ Group*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + esac + ;; + esac + ;; + + newsos6) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + + osf3* | osf4* | osf5*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # All OSF/1 code is PIC. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + rdos*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + solaris*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + case $cc_basename in + f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';; + *) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';; + esac + ;; + + sunos4*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec ;then + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + ;; + + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + unicos*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + + uts4*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + *) + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + esac + fi +]) +case $host_os in + # For platforms which do not support PIC, -DPIC is meaningless: + *djgpp*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])" + ;; +esac + +AC_CACHE_CHECK([for $compiler option to produce PIC], + [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)], + [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_prog_compiler_pic, $1)]) +_LT_TAGVAR(lt_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_cv_prog_compiler_pic, $1) + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then + _LT_COMPILER_OPTION([if $compiler PIC flag $_LT_TAGVAR(lt_prog_compiler_pic, $1) works], + [_LT_TAGVAR(lt_cv_prog_compiler_pic_works, $1)], + [$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])], [], + [case $_LT_TAGVAR(lt_prog_compiler_pic, $1) in + "" | " "*) ;; + *) _LT_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_TAGVAR(lt_prog_compiler_pic, $1)" ;; + esac], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no]) +fi +_LT_TAGDECL([pic_flag], [lt_prog_compiler_pic], [1], + [Additional compiler flags for building library objects]) + +_LT_TAGDECL([wl], [lt_prog_compiler_wl], [1], + [How to pass a linker flag through the compiler]) +# +# Check to make sure the static flag actually works. +# +wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_TAGVAR(lt_prog_compiler_static, $1)\" +_LT_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works], + _LT_TAGVAR(lt_cv_prog_compiler_static_works, $1), + $lt_tmp_static_flag, + [], + [_LT_TAGVAR(lt_prog_compiler_static, $1)=]) +_LT_TAGDECL([link_static_flag], [lt_prog_compiler_static], [1], + [Compiler flag to prevent dynamic linking]) +])# _LT_COMPILER_PIC + + +# _LT_LINKER_SHLIBS([TAGNAME]) +# ---------------------------- +# See if the linker supports building shared libraries. +m4_defun([_LT_LINKER_SHLIBS], +[AC_REQUIRE([LT_PATH_LD])dnl +AC_REQUIRE([LT_PATH_NM])dnl +m4_require([_LT_PATH_MANIFEST_TOOL])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl +m4_require([_LT_TAG_COMPILER])dnl +AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) +m4_if([$1], [CXX], [ + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] + case $host_os in + aix[[4-9]]*) + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to AIX nm, but means don't demangle with GNU nm + # Also, AIX nm treats weak defined symbols like other global defined + # symbols, whereas GNU nm marks them as "W". + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + else + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + fi + ;; + pw32*) + _LT_TAGVAR(export_symbols_cmds, $1)="$ltdll_cmds" + ;; + cygwin* | mingw* | cegcc*) + case $cc_basename in + cl*) + _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + ;; + *) + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] + ;; + esac + ;; + *) + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + ;; + esac +], [ + runpath_var= + _LT_TAGVAR(allow_undefined_flag, $1)= + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(archive_cmds, $1)= + _LT_TAGVAR(archive_expsym_cmds, $1)= + _LT_TAGVAR(compiler_needs_object, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + _LT_TAGVAR(export_dynamic_flag_spec, $1)= + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(hardcode_automatic, $1)=no + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= + _LT_TAGVAR(hardcode_libdir_separator, $1)= + _LT_TAGVAR(hardcode_minus_L, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported + _LT_TAGVAR(inherit_rpath, $1)=no + _LT_TAGVAR(link_all_deplibs, $1)=unknown + _LT_TAGVAR(module_cmds, $1)= + _LT_TAGVAR(module_expsym_cmds, $1)= + _LT_TAGVAR(old_archive_from_new_cmds, $1)= + _LT_TAGVAR(old_archive_from_expsyms_cmds, $1)= + _LT_TAGVAR(thread_safe_flag_spec, $1)= + _LT_TAGVAR(whole_archive_flag_spec, $1)= + # include_expsyms should be a list of space-separated symbols to be *always* + # included in the symbol list + _LT_TAGVAR(include_expsyms, $1)= + # exclude_expsyms can be an extended regexp of symbols to exclude + # it will be wrapped by ` (' and `)$', so one must not match beginning or + # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', + # as well as any symbol that contains `d'. + _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] + # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out + # platforms (ab)use it in PIC code, but their linkers get confused if + # the symbol is explicitly referenced. Since portable code cannot + # rely on this symbol name, it's probably fine to never include it in + # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. +dnl Note also adjust exclude_expsyms for C++ above. + extract_expsyms_cmds= + + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + # FIXME: the MSVC++ port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + if test "$GCC" != yes; then + with_gnu_ld=no + fi + ;; + interix*) + # we just hope/assume this is gcc and not c89 (= MSVC++) + with_gnu_ld=yes + ;; + openbsd*) + with_gnu_ld=no + ;; + esac + + _LT_TAGVAR(ld_shlibs, $1)=yes + + # On some targets, GNU ld is compatible enough with the native linker + # that we're better off using the native interface for both. + lt_use_gnu_ld_interface=no + if test "$with_gnu_ld" = yes; then + case $host_os in + aix*) + # The AIX port of GNU ld has always aspired to compatibility + # with the native linker. However, as the warning in the GNU ld + # block says, versions before 2.19.5* couldn't really create working + # shared libraries, regardless of the interface used. + case `$LD -v 2>&1` in + *\ \(GNU\ Binutils\)\ 2.19.5*) ;; + *\ \(GNU\ Binutils\)\ 2.[[2-9]]*) ;; + *\ \(GNU\ Binutils\)\ [[3-9]]*) ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + fi + + if test "$lt_use_gnu_ld_interface" = yes; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='${wl}' + + # Set some defaults for GNU ld with shared library support. These + # are reset later if shared libraries are not supported. Putting them + # here allows them to be overridden if necessary. + runpath_var=LD_RUN_PATH + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + supports_anon_versioning=no + case `$LD -v 2>&1` in + *GNU\ gold*) supports_anon_versioning=yes ;; + *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11 + *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... + *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... + *\ 2.11.*) ;; # other 2.11 versions + *) supports_anon_versioning=yes ;; + esac + + # See if GNU ld supports shared libraries. + case $host_os in + aix[[3-9]]*) + # On AIX/PPC, the GNU linker is very broken + if test "$host_cpu" != ia64; then + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.19, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to install binutils +*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. +*** You will then need to restart the configuration process. + +_LT_EOF + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='' + ;; + m68k) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, + # as there is no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file (1st line + # is EXPORTS), use it as is; otherwise, prepend... + _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + haiku*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + interix[[3-9]]*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + + gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) + tmp_diet=no + if test "$host_os" = linux-dietlibc; then + case $cc_basename in + diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) + esac + fi + if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ + && test "$tmp_diet" = no + then + tmp_addflag=' $pic_flag' + tmp_sharedflag='-shared' + case $cc_basename,$host_cpu in + pgcc*) # Portland Group C compiler + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + tmp_addflag=' $pic_flag' + ;; + pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group f77 and f90 compilers + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + tmp_addflag=' $pic_flag -Mnomain' ;; + ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 + tmp_addflag=' -i_dynamic' ;; + efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 + tmp_addflag=' -i_dynamic -nofor_main' ;; + ifc* | ifort*) # Intel Fortran compiler + tmp_addflag=' -nofor_main' ;; + lf95*) # Lahey Fortran 8.1 + _LT_TAGVAR(whole_archive_flag_spec, $1)= + tmp_sharedflag='--shared' ;; + xl[[cC]]* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL C 8.0 on PPC (deal with xlf below) + tmp_sharedflag='-qmkshrobj' + tmp_addflag= ;; + nvcc*) # Cuda Compiler Driver 2.2 + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + ;; + esac + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) # Sun C 5.9 + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + tmp_sharedflag='-G' ;; + *Sun\ F*) # Sun Fortran 8.3 + tmp_sharedflag='-G' ;; + esac + _LT_TAGVAR(archive_cmds, $1)='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + + if test "x$supports_anon_versioning" = xyes; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' + fi + + case $cc_basename in + xlf* | bgf* | bgxlf* | mpixlf*) + # IBM XL Fortran 10.1 on PPC cannot create shared libs itself + _LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' + if test "x$supports_anon_versioning" = xyes; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' + fi + ;; + esac + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris*) + if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) + case `$LD -v 2>&1` in + *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*) + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not +*** reliably create shared libraries on SCO systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.16.91.0.3 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + ;; + *) + # For security reasons, it is highly recommended that you always + # use absolute paths for naming shared libraries, and exclude the + # DT_RUNPATH tag from executables and libraries. But doing so + # requires that you compile everything twice, which is a pain. + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + sunos4*) + _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + + if test "$_LT_TAGVAR(ld_shlibs, $1)" = no; then + runpath_var= + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= + _LT_TAGVAR(export_dynamic_flag_spec, $1)= + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + _LT_TAGVAR(hardcode_direct, $1)=unsupported + fi + ;; + + aix[[4-9]]*) + if test "$host_cpu" = ia64; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag="" + else + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to AIX nm, but means don't demangle with GNU nm + # Also, AIX nm treats weak defined symbols like other global + # defined symbols, whereas GNU nm marks them as "W". + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + else + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' + fi + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # need to do runtime linking. + case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) + for ld_flag in $LDFLAGS; do + if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then + aix_use_runtimelinking=yes + break + fi + done + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + _LT_TAGVAR(archive_cmds, $1)='' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='${wl}-f,' + + if test "$GCC" = yes; then + case $host_os in aix4.[[012]]|aix4.[[012]].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`${CC} -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + _LT_TAGVAR(hardcode_direct, $1)=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)= + fi + ;; + esac + shared_flag='-shared' + if test "$aix_use_runtimelinking" = yes; then + shared_flag="$shared_flag "'${wl}-G' + fi + else + # not using gcc + if test "$host_cpu" = ia64; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test "$aix_use_runtimelinking" = yes; then + shared_flag='${wl}-G' + else + shared_flag='${wl}-bM:SRE' + fi + fi + fi + + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to export. + _LT_TAGVAR(always_export_symbols, $1)=yes + if test "$aix_use_runtimelinking" = yes; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(allow_undefined_flag, $1)='-berok' + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" + else + if test "$host_cpu" = ia64; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib' + _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok' + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok' + if test "$with_gnu_ld" = yes; then + # We only use this code for GNU lds that support --whole-archive. + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + # This is similar to how AIX traditionally builds its shared libraries. + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' + fi + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='' + ;; + m68k) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + ;; + + bsdi[[45]]*) + _LT_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + case $cc_basename in + cl*) + # Native MSVC + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' + _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; + else + sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols' + # Don't use ranlib + _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' + _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile="$lt_outputfile.exe" + lt_tool_outputfile="$lt_tool_outputfile.exe" + ;; + esac~ + if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # Assume MSVC wrapper + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + # FIXME: Should let the user specify the lib program. + _LT_TAGVAR(old_archive_cmds, $1)='lib -OUT:$oldlib$oldobjs$old_deplibs' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + ;; + esac + ;; + + darwin* | rhapsody*) + _LT_DARWIN_LINKER_FEATURES($1) + ;; + + dgux*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2.*) + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd* | dragonfly*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + hpux9*) + if test "$GCC" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + else + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_direct, $1)=yes + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + ;; + + hpux10*) + if test "$GCC" = yes && test "$with_gnu_ld" = no; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' + fi + if test "$with_gnu_ld" = no; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + fi + ;; + + hpux11*) + if test "$GCC" = yes && test "$with_gnu_ld" = no; then + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + else + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + m4_if($1, [], [ + # Older versions of the 11.00 compiler do not understand -b yet + # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) + _LT_LINKER_OPTION([if $CC understands -b], + _LT_TAGVAR(lt_cv_prog_compiler__b, $1), [-b], + [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'], + [_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'])], + [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags']) + ;; + esac + fi + if test "$with_gnu_ld" = no; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + case $host_cpu in + hppa*64*|ia64*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + *) + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + fi + ;; + + irix5* | irix6* | nonstopux*) + if test "$GCC" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + # Try to use the -exported_symbol ld option, if it does not + # work, assume that -exports_file does not work either and + # implicitly export all symbols. + # This should be the same for all languages, so no per-tag cache variable. + AC_CACHE_CHECK([whether the $host_os linker accepts -exported_symbol], + [lt_cv_irix_exported_symbol], + [save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null" + AC_LINK_IFELSE( + [AC_LANG_SOURCE( + [AC_LANG_CASE([C], [[int foo (void) { return 0; }]], + [C++], [[int foo (void) { return 0; }]], + [Fortran 77], [[ + subroutine foo + end]], + [Fortran], [[ + subroutine foo + end]])])], + [lt_cv_irix_exported_symbol=yes], + [lt_cv_irix_exported_symbol=no]) + LDFLAGS="$save_LDFLAGS"]) + if test "$lt_cv_irix_exported_symbol" = yes; then + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib' + fi + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(inherit_rpath, $1)=yes + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + _LT_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + newsos6) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *nto* | *qnx*) + ;; + + openbsd*) + if test -f /usr/libexec/ld.so; then + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + else + case $host_os in + openbsd[[01]].* | openbsd2.[[0-7]] | openbsd2.[[0-7]].*) + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + ;; + esac + fi + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + os2*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_from_new_cmds, $1)='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' + ;; + + osf3*) + if test "$GCC" = yes; then + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + else + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test "$GCC" = yes; then + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + else + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ + $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp' + + # Both c and cxx compiler support -rpath directly + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + ;; + + solaris*) + _LT_TAGVAR(no_undefined_flag, $1)=' -z defs' + if test "$GCC" = yes; then + wlarc='${wl}' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + else + case `$CC -V 2>&1` in + *"Compilers 5.0"*) + wlarc='' + _LT_TAGVAR(archive_cmds, $1)='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' + ;; + *) + wlarc='${wl}' + _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + ;; + esac + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands `-z linker_flag'. GCC discards it without `$wl', + # but is careful enough not to reorder. + # Supported since Solaris 2.6 (maybe 2.5.1?) + if test "$GCC" = yes; then + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' + fi + ;; + esac + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + sunos4*) + if test "x$host_vendor" = xsequent; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + sysv4) + case $host_vendor in + sni) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + _LT_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs' + _LT_TAGVAR(hardcode_direct, $1)=no + ;; + motorola) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + sysv4.3*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + _LT_TAGVAR(ld_shlibs, $1)=yes + fi + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) + _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var='LD_RUN_PATH' + + if test "$GCC" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We can NOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' + _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport' + runpath_var='LD_RUN_PATH' + + if test "$GCC" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + uts4*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *) + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + + if test x$host_vendor = xsni; then + case $host in + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Blargedynsym' + ;; + esac + fi + fi +]) +AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) +test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no + +_LT_TAGVAR(with_gnu_ld, $1)=$with_gnu_ld + +_LT_DECL([], [libext], [0], [Old archive suffix (normally "a")])dnl +_LT_DECL([], [shrext_cmds], [1], [Shared library suffix (normally ".so")])dnl +_LT_DECL([], [extract_expsyms_cmds], [2], + [The commands to extract the exported symbol list from a shared archive]) + +# +# Do we need to explicitly link libc? +# +case "x$_LT_TAGVAR(archive_cmds_need_lc, $1)" in +x|xyes) + # Assume -lc should be added + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + + if test "$enable_shared" = yes && test "$GCC" = yes; then + case $_LT_TAGVAR(archive_cmds, $1) in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + AC_CACHE_CHECK([whether -lc should be explicitly linked in], + [lt_cv_]_LT_TAGVAR(archive_cmds_need_lc, $1), + [$RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if AC_TRY_EVAL(ac_compile) 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) + pic_flag=$_LT_TAGVAR(lt_prog_compiler_pic, $1) + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$_LT_TAGVAR(allow_undefined_flag, $1) + _LT_TAGVAR(allow_undefined_flag, $1)= + if AC_TRY_EVAL(_LT_TAGVAR(archive_cmds, $1) 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) + then + lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=no + else + lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=yes + fi + _LT_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + ]) + _LT_TAGVAR(archive_cmds_need_lc, $1)=$lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1) + ;; + esac + fi + ;; +esac + +_LT_TAGDECL([build_libtool_need_lc], [archive_cmds_need_lc], [0], + [Whether or not to add -lc for building shared libraries]) +_LT_TAGDECL([allow_libtool_libs_with_static_runtimes], + [enable_shared_with_static_runtimes], [0], + [Whether or not to disallow shared libs when runtime libs are static]) +_LT_TAGDECL([], [export_dynamic_flag_spec], [1], + [Compiler flag to allow reflexive dlopens]) +_LT_TAGDECL([], [whole_archive_flag_spec], [1], + [Compiler flag to generate shared objects directly from archives]) +_LT_TAGDECL([], [compiler_needs_object], [1], + [Whether the compiler copes with passing no objects directly]) +_LT_TAGDECL([], [old_archive_from_new_cmds], [2], + [Create an old-style archive from a shared archive]) +_LT_TAGDECL([], [old_archive_from_expsyms_cmds], [2], + [Create a temporary old-style archive to link instead of a shared archive]) +_LT_TAGDECL([], [archive_cmds], [2], [Commands used to build a shared archive]) +_LT_TAGDECL([], [archive_expsym_cmds], [2]) +_LT_TAGDECL([], [module_cmds], [2], + [Commands used to build a loadable module if different from building + a shared archive.]) +_LT_TAGDECL([], [module_expsym_cmds], [2]) +_LT_TAGDECL([], [with_gnu_ld], [1], + [Whether we are building with GNU ld or not]) +_LT_TAGDECL([], [allow_undefined_flag], [1], + [Flag that allows shared libraries with undefined symbols to be built]) +_LT_TAGDECL([], [no_undefined_flag], [1], + [Flag that enforces no undefined symbols]) +_LT_TAGDECL([], [hardcode_libdir_flag_spec], [1], + [Flag to hardcode $libdir into a binary during linking. + This must work even if $libdir does not exist]) +_LT_TAGDECL([], [hardcode_libdir_separator], [1], + [Whether we need a single "-rpath" flag with a separated argument]) +_LT_TAGDECL([], [hardcode_direct], [0], + [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes + DIR into the resulting binary]) +_LT_TAGDECL([], [hardcode_direct_absolute], [0], + [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes + DIR into the resulting binary and the resulting library dependency is + "absolute", i.e impossible to change by setting ${shlibpath_var} if the + library is relocated]) +_LT_TAGDECL([], [hardcode_minus_L], [0], + [Set to "yes" if using the -LDIR flag during linking hardcodes DIR + into the resulting binary]) +_LT_TAGDECL([], [hardcode_shlibpath_var], [0], + [Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR + into the resulting binary]) +_LT_TAGDECL([], [hardcode_automatic], [0], + [Set to "yes" if building a shared library automatically hardcodes DIR + into the library and all subsequent libraries and executables linked + against it]) +_LT_TAGDECL([], [inherit_rpath], [0], + [Set to yes if linker adds runtime paths of dependent libraries + to runtime path list]) +_LT_TAGDECL([], [link_all_deplibs], [0], + [Whether libtool must link a program against all its dependency libraries]) +_LT_TAGDECL([], [always_export_symbols], [0], + [Set to "yes" if exported symbols are required]) +_LT_TAGDECL([], [export_symbols_cmds], [2], + [The commands to list exported symbols]) +_LT_TAGDECL([], [exclude_expsyms], [1], + [Symbols that should not be listed in the preloaded symbols]) +_LT_TAGDECL([], [include_expsyms], [1], + [Symbols that must always be exported]) +_LT_TAGDECL([], [prelink_cmds], [2], + [Commands necessary for linking programs (against libraries) with templates]) +_LT_TAGDECL([], [postlink_cmds], [2], + [Commands necessary for finishing linking programs]) +_LT_TAGDECL([], [file_list_spec], [1], + [Specify filename containing input files]) +dnl FIXME: Not yet implemented +dnl _LT_TAGDECL([], [thread_safe_flag_spec], [1], +dnl [Compiler flag to generate thread safe objects]) +])# _LT_LINKER_SHLIBS + + +# _LT_LANG_C_CONFIG([TAG]) +# ------------------------ +# Ensure that the configuration variables for a C compiler are suitably +# defined. These variables are subsequently used by _LT_CONFIG to write +# the compiler configuration to `libtool'. +m4_defun([_LT_LANG_C_CONFIG], +[m4_require([_LT_DECL_EGREP])dnl +lt_save_CC="$CC" +AC_LANG_PUSH(C) + +# Source file extension for C test sources. +ac_ext=c + +# Object file extension for compiled C test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="int some_variable = 0;" + +# Code to be used in simple link tests +lt_simple_link_test_code='int main(){return(0);}' + +_LT_TAG_COMPILER +# Save the default compiler, since it gets overwritten when the other +# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. +compiler_DEFAULT=$CC + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + LT_SYS_DLOPEN_SELF + _LT_CMD_STRIPLIB + + # Report which library types will actually be built + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test "$can_build_shared" = "no" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test "$enable_shared" = yes && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + + aix[[4-9]]*) + if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then + test "$enable_shared" = yes && enable_static=no + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test "$enable_shared" = yes || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_CONFIG($1) +fi +AC_LANG_POP +CC="$lt_save_CC" +])# _LT_LANG_C_CONFIG + + +# _LT_LANG_CXX_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for a C++ compiler are suitably +# defined. These variables are subsequently used by _LT_CONFIG to write +# the compiler configuration to `libtool'. +m4_defun([_LT_LANG_CXX_CONFIG], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_PATH_MANIFEST_TOOL])dnl +if test -n "$CXX" && ( test "X$CXX" != "Xno" && + ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) || + (test "X$CXX" != "Xg++"))) ; then + AC_PROG_CXXCPP +else + _lt_caught_CXX_error=yes +fi + +AC_LANG_PUSH(C++) +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(compiler_needs_object, $1)=no +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for C++ test sources. +ac_ext=cpp + +# Object file extension for compiled C++ test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the CXX compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test "$_lt_caught_CXX_error" != yes; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="int some_variable = 0;" + + # Code to be used in simple link tests + lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }' + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_CFLAGS=$CFLAGS + lt_save_LD=$LD + lt_save_GCC=$GCC + GCC=$GXX + lt_save_with_gnu_ld=$with_gnu_ld + lt_save_path_LD=$lt_cv_path_LD + if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then + lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx + else + $as_unset lt_cv_prog_gnu_ld + fi + if test -n "${lt_cv_path_LDCXX+set}"; then + lt_cv_path_LD=$lt_cv_path_LDCXX + else + $as_unset lt_cv_path_LD + fi + test -z "${LDCXX+set}" || LD=$LDCXX + CC=${CXX-"c++"} + CFLAGS=$CXXFLAGS + compiler=$CC + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + + if test -n "$compiler"; then + # We don't want -fno-exception when compiling C++ code, so set the + # no_builtin_flag separately + if test "$GXX" = yes; then + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' + else + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= + fi + + if test "$GXX" = yes; then + # Set up default GNU C++ configuration + + LT_PATH_LD + + # Check if GNU C++ uses GNU ld as the underlying linker, since the + # archiving commands below assume that GNU ld is being used. + if test "$with_gnu_ld" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + + # If archive_cmds runs LD, not CC, wlarc should be empty + # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to + # investigate it a little bit more. (MM) + wlarc='${wl}' + + # ancient GNU ld didn't support --whole-archive et. al. + if eval "`$CC -print-prog-name=ld` --help 2>&1" | + $GREP 'no-whole-archive' > /dev/null; then + _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + else + with_gnu_ld=no + wlarc= + + # A generic and very simple default shared library creation + # command for GNU C++ for the case where it uses the native + # linker, instead of GNU ld. If possible, this setting should + # overridden to take advantage of the native linker features on + # the platform it is being used on. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + fi + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + + else + GXX=no + with_gnu_ld=no + wlarc= + fi + + # PORTME: fill in a description of your system's C++ link characteristics + AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) + _LT_TAGVAR(ld_shlibs, $1)=yes + case $host_os in + aix3*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aix[[4-9]]*) + if test "$host_cpu" = ia64; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag="" + else + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # need to do runtime linking. + case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) + for ld_flag in $LDFLAGS; do + case $ld_flag in + *-brtl*) + aix_use_runtimelinking=yes + break + ;; + esac + done + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + _LT_TAGVAR(archive_cmds, $1)='' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='${wl}-f,' + + if test "$GXX" = yes; then + case $host_os in aix4.[[012]]|aix4.[[012]].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`${CC} -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + _LT_TAGVAR(hardcode_direct, $1)=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)= + fi + esac + shared_flag='-shared' + if test "$aix_use_runtimelinking" = yes; then + shared_flag="$shared_flag "'${wl}-G' + fi + else + # not using gcc + if test "$host_cpu" = ia64; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test "$aix_use_runtimelinking" = yes; then + shared_flag='${wl}-G' + else + shared_flag='${wl}-bM:SRE' + fi + fi + fi + + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to + # export. + _LT_TAGVAR(always_export_symbols, $1)=yes + if test "$aix_use_runtimelinking" = yes; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(allow_undefined_flag, $1)='-berok' + # Determine the default libpath from the value encoded in an empty + # executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" + + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" + else + if test "$host_cpu" = ia64; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib' + _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok' + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok' + if test "$with_gnu_ld" = yes; then + # We only use this code for GNU lds that support --whole-archive. + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + # This is similar to how AIX traditionally builds its shared + # libraries. + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' + fi + fi + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + chorus*) + case $cc_basename in + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + cygwin* | mingw* | pw32* | cegcc*) + case $GXX,$cc_basename in + ,cl* | no,cl*) + # Native MSVC + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=".dll" + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' + _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + $SED -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; + else + $SED -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + # Don't use ranlib + _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' + _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile="$lt_outputfile.exe" + lt_tool_outputfile="$lt_tool_outputfile.exe" + ;; + esac~ + func_to_tool_file "$lt_outputfile"~ + if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # g++ + # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, + # as there is no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file (1st line + # is EXPORTS), use it as is; otherwise, prepend... + _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + darwin* | rhapsody*) + _LT_DARWIN_LINKER_FEATURES($1) + ;; + + dgux*) + case $cc_basename in + ec++*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + ghcx*) + # Green Hills C++ Compiler + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + freebsd2.*) + # C++ shared libraries reported to be fairly broken before + # switch to ELF + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + freebsd-elf*) + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + ;; + + freebsd* | dragonfly*) + # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF + # conventions + _LT_TAGVAR(ld_shlibs, $1)=yes + ;; + + gnu*) + ;; + + haiku*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + hpux9*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, + # but as the default + # location of the library. + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aCC*) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test "$GXX" = yes; then + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + hpux10*|hpux11*) + if test $with_gnu_ld = no; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + case $host_cpu in + hppa*64*|ia64*) + ;; + *) + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + ;; + esac + fi + case $host_cpu in + hppa*64*|ia64*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + *) + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, + # but as the default + # location of the library. + ;; + esac + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aCC*) + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test "$GXX" = yes; then + if test $with_gnu_ld = no; then + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + fi + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + interix[[3-9]]*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + irix5* | irix6*) + case $cc_basename in + CC*) + # SGI C++ + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + + # Archives containing C++ object files must be created using + # "CC -ar", where "CC" is the IRIX C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs' + ;; + *) + if test "$GXX" = yes; then + if test "$with_gnu_ld" = no; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` -o $lib' + fi + fi + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + esac + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(inherit_rpath, $1)=yes + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + + # Archives containing C++ object files must be created using + # "CC -Bstatic", where "CC" is the KAI C++ compiler. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' + ;; + icpc* | ecpc* ) + # Intel C++ + with_gnu_ld=yes + # version 8.0 and above of icpc choke on multiply defined symbols + # if we add $predep_objects and $postdep_objects, however 7.1 and + # earlier do not add the objects themselves. + case `$CC -V 2>&1` in + *"Version 7."*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + ;; + *) # Version 8.0 or newer + tmp_idyn= + case $host_cpu in + ia64*) tmp_idyn=' -i_dynamic';; + esac + _LT_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + ;; + esac + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + case `$CC -V` in + *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*) + _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~ + compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"' + _LT_TAGVAR(old_archive_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~ + $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~ + $RANLIB $oldlib' + _LT_TAGVAR(archive_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' + ;; + *) # Version 6 and above use weak symbols + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + ;; + cxx*) + # Compaq C++ + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib ${wl}-retain-symbols-file $wl$export_symbols' + + runpath_var=LD_RUN_PATH + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed' + ;; + xl* | mpixl* | bgxl*) + # IBM XL 8.0 on PPC, with GNU ld + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' + _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + if test "x$supports_anon_versioning" = xyes; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' + fi + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' + _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + + # Not sure whether something based on + # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 + # would be better. + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' + ;; + esac + ;; + esac + ;; + + lynxos*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + m88k*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + mvs*) + case $cc_basename in + cxx*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' + wlarc= + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + fi + # Workaround some broken pre-1.5 toolchains + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' + ;; + + *nto* | *qnx*) + _LT_TAGVAR(ld_shlibs, $1)=yes + ;; + + openbsd2*) + # C++ shared libraries are fairly broken + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + openbsd*) + if test -f /usr/libexec/ld.so; then + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + fi + output_verbose_link_cmd=func_echo_all + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Archives containing C++ object files must be created using + # the KAI C++ compiler. + case $host in + osf3*) _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;; + *) _LT_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs' ;; + esac + ;; + RCC*) + # Rational C++ 2.4.1 + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + cxx*) + case $host in + osf3*) + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && func_echo_all "${wl}-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + ;; + *) + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ + echo "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname ${wl}-input ${wl}$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~ + $RM $lib.exp' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test "$GXX" = yes && test "$with_gnu_ld" = no; then + _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' + case $host in + osf3*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + psos*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + lcc*) + # Lucid + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + _LT_TAGVAR(archive_cmds_need_lc,$1)=yes + _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' + _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G${allow_undefined_flag} ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands `-z linker_flag'. + # Supported since Solaris 2.6 (maybe 2.5.1?) + _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' + ;; + esac + _LT_TAGVAR(link_all_deplibs, $1)=yes + + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' + ;; + gcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' + + # The C++ compiler must be used to create the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs' + ;; + *) + # GNU C++ compiler with Solaris linker + if test "$GXX" = yes && test "$with_gnu_ld" = no; then + _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-z ${wl}defs' + if $CC --version | $GREP -v '^2\.7' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + else + # g++ 2.7 appears to require `-G' NOT `-shared' on this + # platform. + _LT_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + fi + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $wl$libdir' + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' + ;; + esac + fi + ;; + esac + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) + _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We can NOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' + _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport' + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(old_archive_cmds, $1)='$CC -Tprelink_objects $oldobjs~ + '"$_LT_TAGVAR(old_archive_cmds, $1)" + _LT_TAGVAR(reload_cmds, $1)='$CC -Tprelink_objects $reload_objs~ + '"$_LT_TAGVAR(reload_cmds, $1)" + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + vxworks*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + + AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) + test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no + + _LT_TAGVAR(GCC, $1)="$GXX" + _LT_TAGVAR(LD, $1)="$LD" + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_SYS_HIDDEN_LIBDEPS($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS + LDCXX=$LD + LD=$lt_save_LD + GCC=$lt_save_GCC + with_gnu_ld=$lt_save_with_gnu_ld + lt_cv_path_LDCXX=$lt_cv_path_LD + lt_cv_path_LD=$lt_save_path_LD + lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld + lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld +fi # test "$_lt_caught_CXX_error" != yes + +AC_LANG_POP +])# _LT_LANG_CXX_CONFIG + + +# _LT_FUNC_STRIPNAME_CNF +# ---------------------- +# func_stripname_cnf prefix suffix name +# strip PREFIX and SUFFIX off of NAME. +# PREFIX and SUFFIX must not contain globbing or regex special +# characters, hashes, percent signs, but SUFFIX may contain a leading +# dot (in which case that matches only a dot). +# +# This function is identical to the (non-XSI) version of func_stripname, +# except this one can be used by m4 code that may be executed by configure, +# rather than the libtool script. +m4_defun([_LT_FUNC_STRIPNAME_CNF],[dnl +AC_REQUIRE([_LT_DECL_SED]) +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH]) +func_stripname_cnf () +{ + case ${2} in + .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;; + *) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;; + esac +} # func_stripname_cnf +])# _LT_FUNC_STRIPNAME_CNF + +# _LT_SYS_HIDDEN_LIBDEPS([TAGNAME]) +# --------------------------------- +# Figure out "hidden" library dependencies from verbose +# compiler output when linking a shared library. +# Parse the compiler output and extract the necessary +# objects, libraries and library flags. +m4_defun([_LT_SYS_HIDDEN_LIBDEPS], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +AC_REQUIRE([_LT_FUNC_STRIPNAME_CNF])dnl +# Dependencies to place before and after the object being linked: +_LT_TAGVAR(predep_objects, $1)= +_LT_TAGVAR(postdep_objects, $1)= +_LT_TAGVAR(predeps, $1)= +_LT_TAGVAR(postdeps, $1)= +_LT_TAGVAR(compiler_lib_search_path, $1)= + +dnl we can't use the lt_simple_compile_test_code here, +dnl because it contains code intended for an executable, +dnl not a library. It's possible we should let each +dnl tag define a new lt_????_link_test_code variable, +dnl but it's only used here... +m4_if([$1], [], [cat > conftest.$ac_ext <<_LT_EOF +int a; +void foo (void) { a = 0; } +_LT_EOF +], [$1], [CXX], [cat > conftest.$ac_ext <<_LT_EOF +class Foo +{ +public: + Foo (void) { a = 0; } +private: + int a; +}; +_LT_EOF +], [$1], [F77], [cat > conftest.$ac_ext <<_LT_EOF + subroutine foo + implicit none + integer*4 a + a=0 + return + end +_LT_EOF +], [$1], [FC], [cat > conftest.$ac_ext <<_LT_EOF + subroutine foo + implicit none + integer a + a=0 + return + end +_LT_EOF +], [$1], [GCJ], [cat > conftest.$ac_ext <<_LT_EOF +public class foo { + private int a; + public void bar (void) { + a = 0; + } +}; +_LT_EOF +], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF +package foo +func foo() { +} +_LT_EOF +]) + +_lt_libdeps_save_CFLAGS=$CFLAGS +case "$CC $CFLAGS " in #( +*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; +*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; +*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;; +esac + +dnl Parse the compiler output and extract the necessary +dnl objects, libraries and library flags. +if AC_TRY_EVAL(ac_compile); then + # Parse the compiler output and extract the necessary + # objects, libraries and library flags. + + # Sentinel used to keep track of whether or not we are before + # the conftest object file. + pre_test_object_deps_done=no + + for p in `eval "$output_verbose_link_cmd"`; do + case ${prev}${p} in + + -L* | -R* | -l*) + # Some compilers place space between "-{L,R}" and the path. + # Remove the space. + if test $p = "-L" || + test $p = "-R"; then + prev=$p + continue + fi + + # Expand the sysroot to ease extracting the directories later. + if test -z "$prev"; then + case $p in + -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; + -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; + -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; + esac + fi + case $p in + =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; + esac + if test "$pre_test_object_deps_done" = no; then + case ${prev} in + -L | -R) + # Internal compiler library paths should come after those + # provided the user. The postdeps already come after the + # user supplied libs so there is no need to process them. + if test -z "$_LT_TAGVAR(compiler_lib_search_path, $1)"; then + _LT_TAGVAR(compiler_lib_search_path, $1)="${prev}${p}" + else + _LT_TAGVAR(compiler_lib_search_path, $1)="${_LT_TAGVAR(compiler_lib_search_path, $1)} ${prev}${p}" + fi + ;; + # The "-l" case would never come before the object being + # linked, so don't bother handling this case. + esac + else + if test -z "$_LT_TAGVAR(postdeps, $1)"; then + _LT_TAGVAR(postdeps, $1)="${prev}${p}" + else + _LT_TAGVAR(postdeps, $1)="${_LT_TAGVAR(postdeps, $1)} ${prev}${p}" + fi + fi + prev= + ;; + + *.lto.$objext) ;; # Ignore GCC LTO objects + *.$objext) + # This assumes that the test object file only shows up + # once in the compiler output. + if test "$p" = "conftest.$objext"; then + pre_test_object_deps_done=yes + continue + fi + + if test "$pre_test_object_deps_done" = no; then + if test -z "$_LT_TAGVAR(predep_objects, $1)"; then + _LT_TAGVAR(predep_objects, $1)="$p" + else + _LT_TAGVAR(predep_objects, $1)="$_LT_TAGVAR(predep_objects, $1) $p" + fi + else + if test -z "$_LT_TAGVAR(postdep_objects, $1)"; then + _LT_TAGVAR(postdep_objects, $1)="$p" + else + _LT_TAGVAR(postdep_objects, $1)="$_LT_TAGVAR(postdep_objects, $1) $p" + fi + fi + ;; + + *) ;; # Ignore the rest. + + esac + done + + # Clean up. + rm -f a.out a.exe +else + echo "libtool.m4: error: problem compiling $1 test program" +fi + +$RM -f confest.$objext +CFLAGS=$_lt_libdeps_save_CFLAGS + +# PORTME: override above test on systems where it is broken +m4_if([$1], [CXX], +[case $host_os in +interix[[3-9]]*) + # Interix 3.5 installs completely hosed .la files for C++, so rather than + # hack all around it, let's just trust "g++" to DTRT. + _LT_TAGVAR(predep_objects,$1)= + _LT_TAGVAR(postdep_objects,$1)= + _LT_TAGVAR(postdeps,$1)= + ;; + +linux*) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + + # The more standards-conforming stlport4 library is + # incompatible with the Cstd library. Avoid specifying + # it if it's in CXXFLAGS. Ignore libCrun as + # -library=stlport4 depends on it. + case " $CXX $CXXFLAGS " in + *" -library=stlport4 "*) + solaris_use_stlport4=yes + ;; + esac + + if test "$solaris_use_stlport4" != yes; then + _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun' + fi + ;; + esac + ;; + +solaris*) + case $cc_basename in + CC* | sunCC*) + # The more standards-conforming stlport4 library is + # incompatible with the Cstd library. Avoid specifying + # it if it's in CXXFLAGS. Ignore libCrun as + # -library=stlport4 depends on it. + case " $CXX $CXXFLAGS " in + *" -library=stlport4 "*) + solaris_use_stlport4=yes + ;; + esac + + # Adding this requires a known-good setup of shared libraries for + # Sun compiler versions before 5.6, else PIC objects from an old + # archive will be linked into the output, leading to subtle bugs. + if test "$solaris_use_stlport4" != yes; then + _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun' + fi + ;; + esac + ;; +esac +]) + +case " $_LT_TAGVAR(postdeps, $1) " in +*" -lc "*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;; +esac + _LT_TAGVAR(compiler_lib_search_dirs, $1)= +if test -n "${_LT_TAGVAR(compiler_lib_search_path, $1)}"; then + _LT_TAGVAR(compiler_lib_search_dirs, $1)=`echo " ${_LT_TAGVAR(compiler_lib_search_path, $1)}" | ${SED} -e 's! -L! !g' -e 's!^ !!'` +fi +_LT_TAGDECL([], [compiler_lib_search_dirs], [1], + [The directories searched by this compiler when creating a shared library]) +_LT_TAGDECL([], [predep_objects], [1], + [Dependencies to place before and after the objects being linked to + create a shared library]) +_LT_TAGDECL([], [postdep_objects], [1]) +_LT_TAGDECL([], [predeps], [1]) +_LT_TAGDECL([], [postdeps], [1]) +_LT_TAGDECL([], [compiler_lib_search_path], [1], + [The library search path used internally by the compiler when linking + a shared library]) +])# _LT_SYS_HIDDEN_LIBDEPS + + +# _LT_LANG_F77_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for a Fortran 77 compiler are +# suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to `libtool'. +m4_defun([_LT_LANG_F77_CONFIG], +[AC_LANG_PUSH(Fortran 77) +if test -z "$F77" || test "X$F77" = "Xno"; then + _lt_disable_F77=yes +fi + +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for f77 test sources. +ac_ext=f + +# Object file extension for compiled f77 test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the F77 compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test "$_lt_disable_F77" != yes; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="\ + subroutine t + return + end +" + + # Code to be used in simple link tests + lt_simple_link_test_code="\ + program t + end +" + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC="$CC" + lt_save_GCC=$GCC + lt_save_CFLAGS=$CFLAGS + CC=${F77-"f77"} + CFLAGS=$FFLAGS + compiler=$CC + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + GCC=$G77 + if test -n "$compiler"; then + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test "$can_build_shared" = "no" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test "$enable_shared" = yes && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + aix[[4-9]]*) + if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then + test "$enable_shared" = yes && enable_static=no + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test "$enable_shared" = yes || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_TAGVAR(GCC, $1)="$G77" + _LT_TAGVAR(LD, $1)="$LD" + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + GCC=$lt_save_GCC + CC="$lt_save_CC" + CFLAGS="$lt_save_CFLAGS" +fi # test "$_lt_disable_F77" != yes + +AC_LANG_POP +])# _LT_LANG_F77_CONFIG + + +# _LT_LANG_FC_CONFIG([TAG]) +# ------------------------- +# Ensure that the configuration variables for a Fortran compiler are +# suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to `libtool'. +m4_defun([_LT_LANG_FC_CONFIG], +[AC_LANG_PUSH(Fortran) + +if test -z "$FC" || test "X$FC" = "Xno"; then + _lt_disable_FC=yes +fi + +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for fc test sources. +ac_ext=${ac_fc_srcext-f} + +# Object file extension for compiled fc test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the FC compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test "$_lt_disable_FC" != yes; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="\ + subroutine t + return + end +" + + # Code to be used in simple link tests + lt_simple_link_test_code="\ + program t + end +" + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC="$CC" + lt_save_GCC=$GCC + lt_save_CFLAGS=$CFLAGS + CC=${FC-"f95"} + CFLAGS=$FCFLAGS + compiler=$CC + GCC=$ac_cv_fc_compiler_gnu + + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + + if test -n "$compiler"; then + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test "$can_build_shared" = "no" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test "$enable_shared" = yes && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + aix[[4-9]]*) + if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then + test "$enable_shared" = yes && enable_static=no + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test "$enable_shared" = yes || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_TAGVAR(GCC, $1)="$ac_cv_fc_compiler_gnu" + _LT_TAGVAR(LD, $1)="$LD" + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_SYS_HIDDEN_LIBDEPS($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + GCC=$lt_save_GCC + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS +fi # test "$_lt_disable_FC" != yes + +AC_LANG_POP +])# _LT_LANG_FC_CONFIG + + +# _LT_LANG_GCJ_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for the GNU Java Compiler compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to `libtool'. +m4_defun([_LT_LANG_GCJ_CONFIG], +[AC_REQUIRE([LT_PROG_GCJ])dnl +AC_LANG_SAVE + +# Source file extension for Java test sources. +ac_ext=java + +# Object file extension for compiled Java test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="class foo {}" + +# Code to be used in simple link tests +lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }' + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC=$CC +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC=yes +CC=${GCJ-"gcj"} +CFLAGS=$GCJFLAGS +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_TAGVAR(LD, $1)="$LD" +_LT_CC_BASENAME([$compiler]) + +# GCJ did not exist at the time GCC didn't implicitly link libc in. +_LT_TAGVAR(archive_cmds_need_lc, $1)=no + +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) +fi + +AC_LANG_RESTORE + +GCC=$lt_save_GCC +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_GCJ_CONFIG + + +# _LT_LANG_GO_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for the GNU Go compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to `libtool'. +m4_defun([_LT_LANG_GO_CONFIG], +[AC_REQUIRE([LT_PROG_GO])dnl +AC_LANG_SAVE + +# Source file extension for Go test sources. +ac_ext=go + +# Object file extension for compiled Go test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="package main; func main() { }" + +# Code to be used in simple link tests +lt_simple_link_test_code='package main; func main() { }' + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC=$CC +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC=yes +CC=${GOC-"gccgo"} +CFLAGS=$GOFLAGS +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_TAGVAR(LD, $1)="$LD" +_LT_CC_BASENAME([$compiler]) + +# Go did not exist at the time GCC didn't implicitly link libc in. +_LT_TAGVAR(archive_cmds_need_lc, $1)=no + +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) +fi + +AC_LANG_RESTORE + +GCC=$lt_save_GCC +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_GO_CONFIG + + +# _LT_LANG_RC_CONFIG([TAG]) +# ------------------------- +# Ensure that the configuration variables for the Windows resource compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to `libtool'. +m4_defun([_LT_LANG_RC_CONFIG], +[AC_REQUIRE([LT_PROG_RC])dnl +AC_LANG_SAVE + +# Source file extension for RC test sources. +ac_ext=rc + +# Object file extension for compiled RC test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }' + +# Code to be used in simple link tests +lt_simple_link_test_code="$lt_simple_compile_test_code" + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC="$CC" +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC= +CC=${RC-"windres"} +CFLAGS= +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_CC_BASENAME([$compiler]) +_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes + +if test -n "$compiler"; then + : + _LT_CONFIG($1) +fi + +GCC=$lt_save_GCC +AC_LANG_RESTORE +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_RC_CONFIG + + +# LT_PROG_GCJ +# ----------- +AC_DEFUN([LT_PROG_GCJ], +[m4_ifdef([AC_PROG_GCJ], [AC_PROG_GCJ], + [m4_ifdef([A][M_PROG_GCJ], [A][M_PROG_GCJ], + [AC_CHECK_TOOL(GCJ, gcj,) + test "x${GCJFLAGS+set}" = xset || GCJFLAGS="-g -O2" + AC_SUBST(GCJFLAGS)])])[]dnl +]) + +# Old name: +AU_ALIAS([LT_AC_PROG_GCJ], [LT_PROG_GCJ]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_GCJ], []) + + +# LT_PROG_GO +# ---------- +AC_DEFUN([LT_PROG_GO], +[AC_CHECK_TOOL(GOC, gccgo,) +]) + + +# LT_PROG_RC +# ---------- +AC_DEFUN([LT_PROG_RC], +[AC_CHECK_TOOL(RC, windres,) +]) + +# Old name: +AU_ALIAS([LT_AC_PROG_RC], [LT_PROG_RC]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_RC], []) + + +# _LT_DECL_EGREP +# -------------- +# If we don't have a new enough Autoconf to choose the best grep +# available, choose the one first in the user's PATH. +m4_defun([_LT_DECL_EGREP], +[AC_REQUIRE([AC_PROG_EGREP])dnl +AC_REQUIRE([AC_PROG_FGREP])dnl +test -z "$GREP" && GREP=grep +_LT_DECL([], [GREP], [1], [A grep program that handles long lines]) +_LT_DECL([], [EGREP], [1], [An ERE matcher]) +_LT_DECL([], [FGREP], [1], [A literal string matcher]) +dnl Non-bleeding-edge autoconf doesn't subst GREP, so do it here too +AC_SUBST([GREP]) +]) + + +# _LT_DECL_OBJDUMP +# -------------- +# If we don't have a new enough Autoconf to choose the best objdump +# available, choose the one first in the user's PATH. +m4_defun([_LT_DECL_OBJDUMP], +[AC_CHECK_TOOL(OBJDUMP, objdump, false) +test -z "$OBJDUMP" && OBJDUMP=objdump +_LT_DECL([], [OBJDUMP], [1], [An object symbol dumper]) +AC_SUBST([OBJDUMP]) +]) + +# _LT_DECL_DLLTOOL +# ---------------- +# Ensure DLLTOOL variable is set. +m4_defun([_LT_DECL_DLLTOOL], +[AC_CHECK_TOOL(DLLTOOL, dlltool, false) +test -z "$DLLTOOL" && DLLTOOL=dlltool +_LT_DECL([], [DLLTOOL], [1], [DLL creation program]) +AC_SUBST([DLLTOOL]) +]) + +# _LT_DECL_SED +# ------------ +# Check for a fully-functional sed program, that truncates +# as few characters as possible. Prefer GNU sed if found. +m4_defun([_LT_DECL_SED], +[AC_PROG_SED +test -z "$SED" && SED=sed +Xsed="$SED -e 1s/^X//" +_LT_DECL([], [SED], [1], [A sed program that does not truncate output]) +_LT_DECL([], [Xsed], ["\$SED -e 1s/^X//"], + [Sed that helps us avoid accidentally triggering echo(1) options like -n]) +])# _LT_DECL_SED + +m4_ifndef([AC_PROG_SED], [ +############################################################ +# NOTE: This macro has been submitted for inclusion into # +# GNU Autoconf as AC_PROG_SED. When it is available in # +# a released version of Autoconf we should remove this # +# macro and use it instead. # +############################################################ + +m4_defun([AC_PROG_SED], +[AC_MSG_CHECKING([for a sed that does not truncate output]) +AC_CACHE_VAL(lt_cv_path_SED, +[# Loop through the user's path and test for sed and gsed. +# Then use that list of sed's as ones to test for truncation. +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for lt_ac_prog in sed gsed; do + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then + lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext" + fi + done + done +done +IFS=$as_save_IFS +lt_ac_max=0 +lt_ac_count=0 +# Add /usr/xpg4/bin/sed as it is typically found on Solaris +# along with /bin/sed that truncates output. +for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do + test ! -f $lt_ac_sed && continue + cat /dev/null > conftest.in + lt_ac_count=0 + echo $ECHO_N "0123456789$ECHO_C" >conftest.in + # Check for GNU sed and select it if it is found. + if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then + lt_cv_path_SED=$lt_ac_sed + break + fi + while true; do + cat conftest.in conftest.in >conftest.tmp + mv conftest.tmp conftest.in + cp conftest.in conftest.nl + echo >>conftest.nl + $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break + cmp -s conftest.out conftest.nl || break + # 10000 chars as input seems more than enough + test $lt_ac_count -gt 10 && break + lt_ac_count=`expr $lt_ac_count + 1` + if test $lt_ac_count -gt $lt_ac_max; then + lt_ac_max=$lt_ac_count + lt_cv_path_SED=$lt_ac_sed + fi + done +done +]) +SED=$lt_cv_path_SED +AC_SUBST([SED]) +AC_MSG_RESULT([$SED]) +])#AC_PROG_SED +])#m4_ifndef + +# Old name: +AU_ALIAS([LT_AC_PROG_SED], [AC_PROG_SED]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_SED], []) + + +# _LT_CHECK_SHELL_FEATURES +# ------------------------ +# Find out whether the shell is Bourne or XSI compatible, +# or has some other useful features. +m4_defun([_LT_CHECK_SHELL_FEATURES], +[AC_MSG_CHECKING([whether the shell understands some XSI constructs]) +# Try some XSI features +xsi_shell=no +( _lt_dummy="a/b/c" + test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \ + = c,a/b,b/c, \ + && eval 'test $(( 1 + 1 )) -eq 2 \ + && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \ + && xsi_shell=yes +AC_MSG_RESULT([$xsi_shell]) +_LT_CONFIG_LIBTOOL_INIT([xsi_shell='$xsi_shell']) + +AC_MSG_CHECKING([whether the shell understands "+="]) +lt_shell_append=no +( foo=bar; set foo baz; eval "$[1]+=\$[2]" && test "$foo" = barbaz ) \ + >/dev/null 2>&1 \ + && lt_shell_append=yes +AC_MSG_RESULT([$lt_shell_append]) +_LT_CONFIG_LIBTOOL_INIT([lt_shell_append='$lt_shell_append']) + +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + lt_unset=unset +else + lt_unset=false +fi +_LT_DECL([], [lt_unset], [0], [whether the shell understands "unset"])dnl + +# test EBCDIC or ASCII +case `echo X|tr X '\101'` in + A) # ASCII based system + # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr + lt_SP2NL='tr \040 \012' + lt_NL2SP='tr \015\012 \040\040' + ;; + *) # EBCDIC based system + lt_SP2NL='tr \100 \n' + lt_NL2SP='tr \r\n \100\100' + ;; +esac +_LT_DECL([SP2NL], [lt_SP2NL], [1], [turn spaces into newlines])dnl +_LT_DECL([NL2SP], [lt_NL2SP], [1], [turn newlines into spaces])dnl +])# _LT_CHECK_SHELL_FEATURES + + +# _LT_PROG_FUNCTION_REPLACE (FUNCNAME, REPLACEMENT-BODY) +# ------------------------------------------------------ +# In `$cfgfile', look for function FUNCNAME delimited by `^FUNCNAME ()$' and +# '^} FUNCNAME ', and replace its body with REPLACEMENT-BODY. +m4_defun([_LT_PROG_FUNCTION_REPLACE], +[dnl { +sed -e '/^$1 ()$/,/^} # $1 /c\ +$1 ()\ +{\ +m4_bpatsubsts([$2], [$], [\\], [^\([ ]\)], [\\\1]) +} # Extended-shell $1 implementation' "$cfgfile" > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") +test 0 -eq $? || _lt_function_replace_fail=: +]) + + +# _LT_PROG_REPLACE_SHELLFNS +# ------------------------- +# Replace existing portable implementations of several shell functions with +# equivalent extended shell implementations where those features are available.. +m4_defun([_LT_PROG_REPLACE_SHELLFNS], +[if test x"$xsi_shell" = xyes; then + _LT_PROG_FUNCTION_REPLACE([func_dirname], [dnl + case ${1} in + */*) func_dirname_result="${1%/*}${2}" ;; + * ) func_dirname_result="${3}" ;; + esac]) + + _LT_PROG_FUNCTION_REPLACE([func_basename], [dnl + func_basename_result="${1##*/}"]) + + _LT_PROG_FUNCTION_REPLACE([func_dirname_and_basename], [dnl + case ${1} in + */*) func_dirname_result="${1%/*}${2}" ;; + * ) func_dirname_result="${3}" ;; + esac + func_basename_result="${1##*/}"]) + + _LT_PROG_FUNCTION_REPLACE([func_stripname], [dnl + # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are + # positional parameters, so assign one to ordinary parameter first. + func_stripname_result=${3} + func_stripname_result=${func_stripname_result#"${1}"} + func_stripname_result=${func_stripname_result%"${2}"}]) + + _LT_PROG_FUNCTION_REPLACE([func_split_long_opt], [dnl + func_split_long_opt_name=${1%%=*} + func_split_long_opt_arg=${1#*=}]) + + _LT_PROG_FUNCTION_REPLACE([func_split_short_opt], [dnl + func_split_short_opt_arg=${1#??} + func_split_short_opt_name=${1%"$func_split_short_opt_arg"}]) + + _LT_PROG_FUNCTION_REPLACE([func_lo2o], [dnl + case ${1} in + *.lo) func_lo2o_result=${1%.lo}.${objext} ;; + *) func_lo2o_result=${1} ;; + esac]) + + _LT_PROG_FUNCTION_REPLACE([func_xform], [ func_xform_result=${1%.*}.lo]) + + _LT_PROG_FUNCTION_REPLACE([func_arith], [ func_arith_result=$(( $[*] ))]) + + _LT_PROG_FUNCTION_REPLACE([func_len], [ func_len_result=${#1}]) +fi + +if test x"$lt_shell_append" = xyes; then + _LT_PROG_FUNCTION_REPLACE([func_append], [ eval "${1}+=\\${2}"]) + + _LT_PROG_FUNCTION_REPLACE([func_append_quoted], [dnl + func_quote_for_eval "${2}" +dnl m4 expansion turns \\\\ into \\, and then the shell eval turns that into \ + eval "${1}+=\\\\ \\$func_quote_for_eval_result"]) + + # Save a `func_append' function call where possible by direct use of '+=' + sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") + test 0 -eq $? || _lt_function_replace_fail=: +else + # Save a `func_append' function call even when '+=' is not available + sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \ + && mv -f "$cfgfile.tmp" "$cfgfile" \ + || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") + test 0 -eq $? || _lt_function_replace_fail=: +fi + +if test x"$_lt_function_replace_fail" = x":"; then + AC_MSG_WARN([Unable to substitute extended shell functions in $ofile]) +fi +]) + +# _LT_PATH_CONVERSION_FUNCTIONS +# ----------------------------- +# Determine which file name conversion functions should be used by +# func_to_host_file (and, implicitly, by func_to_host_path). These are needed +# for certain cross-compile configurations and native mingw. +m4_defun([_LT_PATH_CONVERSION_FUNCTIONS], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_MSG_CHECKING([how to convert $build file names to $host format]) +AC_CACHE_VAL(lt_cv_to_host_file_cmd, +[case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 + ;; + esac + ;; + *-*-cygwin* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin + ;; + esac + ;; + * ) # unhandled hosts (and "normal" native builds) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; +esac +]) +to_host_file_cmd=$lt_cv_to_host_file_cmd +AC_MSG_RESULT([$lt_cv_to_host_file_cmd]) +_LT_DECL([to_host_file_cmd], [lt_cv_to_host_file_cmd], + [0], [convert $build file names to $host format])dnl + +AC_MSG_CHECKING([how to convert $build file names to toolchain format]) +AC_CACHE_VAL(lt_cv_to_tool_file_cmd, +[#assume ordinary cross tools, or native build. +lt_cv_to_tool_file_cmd=func_convert_file_noop +case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 + ;; + esac + ;; +esac +]) +to_tool_file_cmd=$lt_cv_to_tool_file_cmd +AC_MSG_RESULT([$lt_cv_to_tool_file_cmd]) +_LT_DECL([to_tool_file_cmd], [lt_cv_to_tool_file_cmd], + [0], [convert $build files to toolchain format])dnl +])# _LT_PATH_CONVERSION_FUNCTIONS diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/m4/ltoptions.m4 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/m4/ltoptions.m4 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,384 @@ +# Helper functions for option handling. -*- Autoconf -*- +# +# Copyright (C) 2004, 2005, 2007, 2008, 2009 Free Software Foundation, +# Inc. +# Written by Gary V. Vaughan, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 7 ltoptions.m4 + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])]) + + +# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME) +# ------------------------------------------ +m4_define([_LT_MANGLE_OPTION], +[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])]) + + +# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME) +# --------------------------------------- +# Set option OPTION-NAME for macro MACRO-NAME, and if there is a +# matching handler defined, dispatch to it. Other OPTION-NAMEs are +# saved as a flag. +m4_define([_LT_SET_OPTION], +[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl +m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]), + _LT_MANGLE_DEFUN([$1], [$2]), + [m4_warning([Unknown $1 option `$2'])])[]dnl +]) + + +# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET]) +# ------------------------------------------------------------ +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +m4_define([_LT_IF_OPTION], +[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])]) + + +# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET) +# ------------------------------------------------------- +# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME +# are set. +m4_define([_LT_UNLESS_OPTIONS], +[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), + [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option), + [m4_define([$0_found])])])[]dnl +m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3 +])[]dnl +]) + + +# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST) +# ---------------------------------------- +# OPTION-LIST is a space-separated list of Libtool options associated +# with MACRO-NAME. If any OPTION has a matching handler declared with +# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about +# the unknown option and exit. +m4_defun([_LT_SET_OPTIONS], +[# Set options +m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), + [_LT_SET_OPTION([$1], _LT_Option)]) + +m4_if([$1],[LT_INIT],[ + dnl + dnl Simply set some default values (i.e off) if boolean options were not + dnl specified: + _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no + ]) + _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no + ]) + dnl + dnl If no reference was made to various pairs of opposing options, then + dnl we run the default mode handler for the pair. For example, if neither + dnl `shared' nor `disable-shared' was passed, we enable building of shared + dnl archives by default: + _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED]) + _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC]) + _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC]) + _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install], + [_LT_ENABLE_FAST_INSTALL]) + ]) +])# _LT_SET_OPTIONS + + +## --------------------------------- ## +## Macros to handle LT_INIT options. ## +## --------------------------------- ## + +# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME) +# ----------------------------------------- +m4_define([_LT_MANGLE_DEFUN], +[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])]) + + +# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE) +# ----------------------------------------------- +m4_define([LT_OPTION_DEFINE], +[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl +])# LT_OPTION_DEFINE + + +# dlopen +# ------ +LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes +]) + +AU_DEFUN([AC_LIBTOOL_DLOPEN], +[_LT_SET_OPTION([LT_INIT], [dlopen]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the `dlopen' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], []) + + +# win32-dll +# --------- +# Declare package support for building win32 dll's. +LT_OPTION_DEFINE([LT_INIT], [win32-dll], +[enable_win32_dll=yes + +case $host in +*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) + AC_CHECK_TOOL(AS, as, false) + AC_CHECK_TOOL(DLLTOOL, dlltool, false) + AC_CHECK_TOOL(OBJDUMP, objdump, false) + ;; +esac + +test -z "$AS" && AS=as +_LT_DECL([], [AS], [1], [Assembler program])dnl + +test -z "$DLLTOOL" && DLLTOOL=dlltool +_LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl + +test -z "$OBJDUMP" && OBJDUMP=objdump +_LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl +])# win32-dll + +AU_DEFUN([AC_LIBTOOL_WIN32_DLL], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +_LT_SET_OPTION([LT_INIT], [win32-dll]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the `win32-dll' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], []) + + +# _LT_ENABLE_SHARED([DEFAULT]) +# ---------------------------- +# implement the --enable-shared flag, and supports the `shared' and +# `disable-shared' LT_INIT options. +# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. +m4_define([_LT_ENABLE_SHARED], +[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([shared], + [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@], + [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_shared=yes ;; + no) enable_shared=no ;; + *) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac], + [enable_shared=]_LT_ENABLE_SHARED_DEFAULT) + + _LT_DECL([build_libtool_libs], [enable_shared], [0], + [Whether or not to build shared libraries]) +])# _LT_ENABLE_SHARED + +LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])]) + +# Old names: +AC_DEFUN([AC_ENABLE_SHARED], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared]) +]) + +AC_DEFUN([AC_DISABLE_SHARED], +[_LT_SET_OPTION([LT_INIT], [disable-shared]) +]) + +AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) +AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_ENABLE_SHARED], []) +dnl AC_DEFUN([AM_DISABLE_SHARED], []) + + + +# _LT_ENABLE_STATIC([DEFAULT]) +# ---------------------------- +# implement the --enable-static flag, and support the `static' and +# `disable-static' LT_INIT options. +# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. +m4_define([_LT_ENABLE_STATIC], +[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([static], + [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@], + [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_static=yes ;; + no) enable_static=no ;; + *) + enable_static=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac], + [enable_static=]_LT_ENABLE_STATIC_DEFAULT) + + _LT_DECL([build_old_libs], [enable_static], [0], + [Whether or not to build static libraries]) +])# _LT_ENABLE_STATIC + +LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])]) + +# Old names: +AC_DEFUN([AC_ENABLE_STATIC], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static]) +]) + +AC_DEFUN([AC_DISABLE_STATIC], +[_LT_SET_OPTION([LT_INIT], [disable-static]) +]) + +AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) +AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_ENABLE_STATIC], []) +dnl AC_DEFUN([AM_DISABLE_STATIC], []) + + + +# _LT_ENABLE_FAST_INSTALL([DEFAULT]) +# ---------------------------------- +# implement the --enable-fast-install flag, and support the `fast-install' +# and `disable-fast-install' LT_INIT options. +# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. +m4_define([_LT_ENABLE_FAST_INSTALL], +[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([fast-install], + [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@], + [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_fast_install=yes ;; + no) enable_fast_install=no ;; + *) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for pkg in $enableval; do + IFS="$lt_save_ifs" + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac], + [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT) + +_LT_DECL([fast_install], [enable_fast_install], [0], + [Whether or not to optimize for fast installation])dnl +])# _LT_ENABLE_FAST_INSTALL + +LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])]) + +# Old names: +AU_DEFUN([AC_ENABLE_FAST_INSTALL], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you put +the `fast-install' option into LT_INIT's first parameter.]) +]) + +AU_DEFUN([AC_DISABLE_FAST_INSTALL], +[_LT_SET_OPTION([LT_INIT], [disable-fast-install]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you put +the `disable-fast-install' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], []) +dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], []) + + +# _LT_WITH_PIC([MODE]) +# -------------------- +# implement the --with-pic flag, and support the `pic-only' and `no-pic' +# LT_INIT options. +# MODE is either `yes' or `no'. If omitted, it defaults to `both'. +m4_define([_LT_WITH_PIC], +[AC_ARG_WITH([pic], + [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@], + [try to use only PIC/non-PIC objects @<:@default=use both@:>@])], + [lt_p=${PACKAGE-default} + case $withval in + yes|no) pic_mode=$withval ;; + *) + pic_mode=default + # Look at the argument we got. We use all the common list separators. + lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," + for lt_pkg in $withval; do + IFS="$lt_save_ifs" + if test "X$lt_pkg" = "X$lt_p"; then + pic_mode=yes + fi + done + IFS="$lt_save_ifs" + ;; + esac], + [pic_mode=default]) + +test -z "$pic_mode" && pic_mode=m4_default([$1], [default]) + +_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl +])# _LT_WITH_PIC + +LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])]) +LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])]) + +# Old name: +AU_DEFUN([AC_LIBTOOL_PICMODE], +[_LT_SET_OPTION([LT_INIT], [pic-only]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the `pic-only' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_PICMODE], []) + +## ----------------- ## +## LTDL_INIT Options ## +## ----------------- ## + +m4_define([_LTDL_MODE], []) +LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive], + [m4_define([_LTDL_MODE], [nonrecursive])]) +LT_OPTION_DEFINE([LTDL_INIT], [recursive], + [m4_define([_LTDL_MODE], [recursive])]) +LT_OPTION_DEFINE([LTDL_INIT], [subproject], + [m4_define([_LTDL_MODE], [subproject])]) + +m4_define([_LTDL_TYPE], []) +LT_OPTION_DEFINE([LTDL_INIT], [installable], + [m4_define([_LTDL_TYPE], [installable])]) +LT_OPTION_DEFINE([LTDL_INIT], [convenience], + [m4_define([_LTDL_TYPE], [convenience])]) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/m4/ltsugar.m4 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/m4/ltsugar.m4 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,123 @@ +# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- +# +# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc. +# Written by Gary V. Vaughan, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 6 ltsugar.m4 + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) + + +# lt_join(SEP, ARG1, [ARG2...]) +# ----------------------------- +# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their +# associated separator. +# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier +# versions in m4sugar had bugs. +m4_define([lt_join], +[m4_if([$#], [1], [], + [$#], [2], [[$2]], + [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) +m4_define([_lt_join], +[m4_if([$#$2], [2], [], + [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) + + +# lt_car(LIST) +# lt_cdr(LIST) +# ------------ +# Manipulate m4 lists. +# These macros are necessary as long as will still need to support +# Autoconf-2.59 which quotes differently. +m4_define([lt_car], [[$1]]) +m4_define([lt_cdr], +[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], + [$#], 1, [], + [m4_dquote(m4_shift($@))])]) +m4_define([lt_unquote], $1) + + +# lt_append(MACRO-NAME, STRING, [SEPARATOR]) +# ------------------------------------------ +# Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'. +# Note that neither SEPARATOR nor STRING are expanded; they are appended +# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). +# No SEPARATOR is output if MACRO-NAME was previously undefined (different +# than defined and empty). +# +# This macro is needed until we can rely on Autoconf 2.62, since earlier +# versions of m4sugar mistakenly expanded SEPARATOR but not STRING. +m4_define([lt_append], +[m4_define([$1], + m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) + + + +# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) +# ---------------------------------------------------------- +# Produce a SEP delimited list of all paired combinations of elements of +# PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list +# has the form PREFIXmINFIXSUFFIXn. +# Needed until we can rely on m4_combine added in Autoconf 2.62. +m4_define([lt_combine], +[m4_if(m4_eval([$# > 3]), [1], + [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl +[[m4_foreach([_Lt_prefix], [$2], + [m4_foreach([_Lt_suffix], + ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, + [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) + + +# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) +# ----------------------------------------------------------------------- +# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited +# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. +m4_define([lt_if_append_uniq], +[m4_ifdef([$1], + [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], + [lt_append([$1], [$2], [$3])$4], + [$5])], + [lt_append([$1], [$2], [$3])$4])]) + + +# lt_dict_add(DICT, KEY, VALUE) +# ----------------------------- +m4_define([lt_dict_add], +[m4_define([$1($2)], [$3])]) + + +# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) +# -------------------------------------------- +m4_define([lt_dict_add_subkey], +[m4_define([$1($2:$3)], [$4])]) + + +# lt_dict_fetch(DICT, KEY, [SUBKEY]) +# ---------------------------------- +m4_define([lt_dict_fetch], +[m4_ifval([$3], + m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), + m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) + + +# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) +# ----------------------------------------------------------------- +m4_define([lt_if_dict_fetch], +[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], + [$5], + [$6])]) + + +# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) +# -------------------------------------------------------------- +m4_define([lt_dict_filter], +[m4_if([$5], [], [], + [lt_join(m4_quote(m4_default([$4], [[, ]])), + lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), + [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl +]) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/m4/ltversion.m4 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/m4/ltversion.m4 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,23 @@ +# ltversion.m4 -- version numbers -*- Autoconf -*- +# +# Copyright (C) 2004 Free Software Foundation, Inc. +# Written by Scott James Remnant, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# @configure_input@ + +# serial 3337 ltversion.m4 +# This file is part of GNU Libtool + +m4_define([LT_PACKAGE_VERSION], [2.4.2]) +m4_define([LT_PACKAGE_REVISION], [1.3337]) + +AC_DEFUN([LTVERSION_VERSION], +[macro_version='2.4.2' +macro_revision='1.3337' +_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) +_LT_DECL(, macro_revision, 0) +]) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/m4/lt~obsolete.m4 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/m4/lt~obsolete.m4 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,98 @@ +# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- +# +# Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc. +# Written by Scott James Remnant, 2004. +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 5 lt~obsolete.m4 + +# These exist entirely to fool aclocal when bootstrapping libtool. +# +# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN) +# which have later been changed to m4_define as they aren't part of the +# exported API, or moved to Autoconf or Automake where they belong. +# +# The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN +# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us +# using a macro with the same name in our local m4/libtool.m4 it'll +# pull the old libtool.m4 in (it doesn't see our shiny new m4_define +# and doesn't know about Autoconf macros at all.) +# +# So we provide this file, which has a silly filename so it's always +# included after everything else. This provides aclocal with the +# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything +# because those macros already exist, or will be overwritten later. +# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. +# +# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. +# Yes, that means every name once taken will need to remain here until +# we give up compatibility with versions before 1.7, at which point +# we need to keep only those names which we still refer to. + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) + +m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) +m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) +m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) +m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) +m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) +m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) +m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) +m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) +m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) +m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) +m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) +m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) +m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) +m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) +m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) +m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) +m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) +m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) +m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) +m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) +m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) +m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) +m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) +m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) +m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) +m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) +m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) +m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) +m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) +m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) +m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) +m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) +m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) +m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) +m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) +m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) +m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) +m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) +m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) +m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) +m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) +m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) +m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) +m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) +m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) +m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) +m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) +m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) +m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) +m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) +m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) +m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) +m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) +m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) +m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) +m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) +m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/missing --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/missing Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,215 @@ +#! /bin/sh +# Common wrapper for a few potentially missing GNU programs. + +scriptversion=2012-06-26.16; # UTC + +# Copyright (C) 1996-2013 Free Software Foundation, Inc. +# Originally written by Fran,cois Pinard , 1996. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +if test $# -eq 0; then + echo 1>&2 "Try '$0 --help' for more information" + exit 1 +fi + +case $1 in + + --is-lightweight) + # Used by our autoconf macros to check whether the available missing + # script is modern enough. + exit 0 + ;; + + --run) + # Back-compat with the calling convention used by older automake. + shift + ;; + + -h|--h|--he|--hel|--help) + echo "\ +$0 [OPTION]... PROGRAM [ARGUMENT]... + +Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due +to PROGRAM being missing or too old. + +Options: + -h, --help display this help and exit + -v, --version output version information and exit + +Supported PROGRAM values: + aclocal autoconf autoheader autom4te automake makeinfo + bison yacc flex lex help2man + +Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and +'g' are ignored when checking the name. + +Send bug reports to ." + exit $? + ;; + + -v|--v|--ve|--ver|--vers|--versi|--versio|--version) + echo "missing $scriptversion (GNU Automake)" + exit $? + ;; + + -*) + echo 1>&2 "$0: unknown '$1' option" + echo 1>&2 "Try '$0 --help' for more information" + exit 1 + ;; + +esac + +# Run the given program, remember its exit status. +"$@"; st=$? + +# If it succeeded, we are done. +test $st -eq 0 && exit 0 + +# Also exit now if we it failed (or wasn't found), and '--version' was +# passed; such an option is passed most likely to detect whether the +# program is present and works. +case $2 in --version|--help) exit $st;; esac + +# Exit code 63 means version mismatch. This often happens when the user +# tries to use an ancient version of a tool on a file that requires a +# minimum version. +if test $st -eq 63; then + msg="probably too old" +elif test $st -eq 127; then + # Program was missing. + msg="missing on your system" +else + # Program was found and executed, but failed. Give up. + exit $st +fi + +perl_URL=http://www.perl.org/ +flex_URL=http://flex.sourceforge.net/ +gnu_software_URL=http://www.gnu.org/software + +program_details () +{ + case $1 in + aclocal|automake) + echo "The '$1' program is part of the GNU Automake package:" + echo "<$gnu_software_URL/automake>" + echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/autoconf>" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + autoconf|autom4te|autoheader) + echo "The '$1' program is part of the GNU Autoconf package:" + echo "<$gnu_software_URL/autoconf/>" + echo "It also requires GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + esac +} + +give_advice () +{ + # Normalize program name to check for. + normalized_program=`echo "$1" | sed ' + s/^gnu-//; t + s/^gnu//; t + s/^g//; t'` + + printf '%s\n' "'$1' is $msg." + + configure_deps="'configure.ac' or m4 files included by 'configure.ac'" + case $normalized_program in + autoconf*) + echo "You should only need it if you modified 'configure.ac'," + echo "or m4 files included by it." + program_details 'autoconf' + ;; + autoheader*) + echo "You should only need it if you modified 'acconfig.h' or" + echo "$configure_deps." + program_details 'autoheader' + ;; + automake*) + echo "You should only need it if you modified 'Makefile.am' or" + echo "$configure_deps." + program_details 'automake' + ;; + aclocal*) + echo "You should only need it if you modified 'acinclude.m4' or" + echo "$configure_deps." + program_details 'aclocal' + ;; + autom4te*) + echo "You might have modified some maintainer files that require" + echo "the 'automa4te' program to be rebuilt." + program_details 'autom4te' + ;; + bison*|yacc*) + echo "You should only need it if you modified a '.y' file." + echo "You may want to install the GNU Bison package:" + echo "<$gnu_software_URL/bison/>" + ;; + lex*|flex*) + echo "You should only need it if you modified a '.l' file." + echo "You may want to install the Fast Lexical Analyzer package:" + echo "<$flex_URL>" + ;; + help2man*) + echo "You should only need it if you modified a dependency" \ + "of a man page." + echo "You may want to install the GNU Help2man package:" + echo "<$gnu_software_URL/help2man/>" + ;; + makeinfo*) + echo "You should only need it if you modified a '.texi' file, or" + echo "any other file indirectly affecting the aspect of the manual." + echo "You might want to install the Texinfo package:" + echo "<$gnu_software_URL/texinfo/>" + echo "The spurious makeinfo call might also be the consequence of" + echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might" + echo "want to install GNU make:" + echo "<$gnu_software_URL/make/>" + ;; + *) + echo "You might have modified some files without having the proper" + echo "tools for further handling them. Check the 'README' file, it" + echo "often tells you about the needed prerequisites for installing" + echo "this package. You may also peek at any GNU archive site, in" + echo "case some other package contains this missing '$1' program." + ;; + esac +} + +give_advice "$1" | sed -e '1s/^/WARNING: /' \ + -e '2,$s/^/ /' >&2 + +# Propagate the correct exit status (expected to be 127 for a program +# not found, 63 for a program that failed due to version mismatch). +exit $st + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,100 @@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft \ +-I$(top_srcdir)/rdft -I$(top_srcdir)/api -I$(top_srcdir)/tests \ +-I$(top_srcdir)/libbench2 + +if MPI +lib_LTLIBRARIES = libfftw3@PREC_SUFFIX@_mpi.la +include_HEADERS = fftw3-mpi.h +nodist_include_HEADERS = fftw3-mpi.f03 fftw3l-mpi.f03 +noinst_PROGRAMS = mpi-bench +endif + +CC=@MPICC@ + +EXTRA_DIST = testsched.c f03api.sh f03-wrap.sh genf03-wrap.pl fftw3-mpi.f03.in fftw3l-mpi.f03.in +BUILT_SOURCES = fftw3-mpi.f03.in fftw3-mpi.f03 fftw3l-mpi.f03.in fftw3l-mpi.f03 f03-wrap.c +CLEANFILES = fftw3-mpi.f03 fftw3l-mpi.f03 + +TRANSPOSE_SRC = transpose-alltoall.c transpose-pairwise.c transpose-recurse.c transpose-problem.c transpose-solve.c mpi-transpose.h +DFT_SRC = dft-serial.c dft-rank-geq2.c dft-rank-geq2-transposed.c dft-rank1.c dft-rank1-bigvec.c dft-problem.c dft-solve.c mpi-dft.h +RDFT_SRC = rdft-serial.c rdft-rank-geq2.c rdft-rank-geq2-transposed.c rdft-rank1-bigvec.c rdft-problem.c rdft-solve.c mpi-rdft.h +RDFT2_SRC = rdft2-serial.c rdft2-rank-geq2.c rdft2-rank-geq2-transposed.c rdft2-problem.c rdft2-solve.c mpi-rdft2.h +SRC = any-true.c api.c block.c choose-radix.c conf.c dtensor.c fftw3-mpi.h ifftw-mpi.h rearrange.c wisdom-api.c f03-wrap.c + +libfftw3@PREC_SUFFIX@_mpi_la_SOURCES = $(SRC) $(TRANSPOSE_SRC) $(DFT_SRC) $(RDFT_SRC) $(RDFT2_SRC) + +libfftw3@PREC_SUFFIX@_mpi_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ +libfftw3@PREC_SUFFIX@_mpi_la_LIBADD = ../libfftw3@PREC_SUFFIX@.la @MPILIBS@ + +if THREADS +mpi_bench_CFLAGS = $(PTHREAD_CFLAGS) +if !COMBINED_THREADS +LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_threads.la +endif +else +if OPENMP +mpi_bench_CFLAGS = $(OPENMP_CFLAGS) +LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_omp.la +endif +endif + +mpi_bench_SOURCES = mpi-bench.c $(top_srcdir)/tests/fftw-bench.c $(top_srcdir)/tests/hook.c +mpi_bench_LDADD = libfftw3@PREC_SUFFIX@_mpi.la $(LIBFFTWTHREADS) $(top_builddir)/libfftw3@PREC_SUFFIX@.la $(top_builddir)/libbench2/libbench2.a $(MPILIBS) $(THREADLIBS) + +CHECK = $(top_srcdir)/tests/check.pl +NUMCHECK=10 +CHECKSIZE=10000 +CHECKOPTS = --verbose --random --maxsize=$(CHECKSIZE) -c=$(NUMCHECK) $(CHECK_PL_OPTS) + +if MPI + +check-local: mpi-bench$(EXEEXT) + perl -w $(CHECK) $(CHECKOPTS) --mpi "$(MPIRUN) -np 1 `pwd`/mpi-bench" + @echo "--------------------------------------------------------------" + @echo " MPI FFTW transforms passed "$(NUMCHECK)" tests, 1 CPU" + @echo "--------------------------------------------------------------" + perl -w $(CHECK) $(CHECKOPTS) --mpi "$(MPIRUN) -np 2 `pwd`/mpi-bench" + @echo "--------------------------------------------------------------" + @echo " MPI FFTW transforms passed "$(NUMCHECK)" tests, 2 CPUs" + @echo "--------------------------------------------------------------" + perl -w $(CHECK) $(CHECKOPTS) --mpi "$(MPIRUN) -np 3 `pwd`/mpi-bench" + @echo "--------------------------------------------------------------" + @echo " MPI FFTW transforms passed "$(NUMCHECK)" tests, 3 CPUs" + @echo "--------------------------------------------------------------" + perl -w $(CHECK) $(CHECKOPTS) --mpi "$(MPIRUN) -np 4 `pwd`/mpi-bench" + @echo "--------------------------------------------------------------" + @echo " MPI FFTW transforms passed "$(NUMCHECK)" tests, 4 CPUs" + @echo "--------------------------------------------------------------" +if SMP + perl -w $(CHECK) $(CHECKOPTS) --mpi --nthreads=2 "$(MPIRUN) -np 3 `pwd`/mpi-bench" + @echo "--------------------------------------------------------------" + @echo " MPI FFTW threaded transforms passed "$(NUMCHECK)" tests!" + @echo "--------------------------------------------------------------" +endif + +bigcheck: mpi-bench$(EXEEXT) + $(MAKE) $(AM_MAKEFLAGS) NUMCHECK=100 CHECKSIZE=60000 check-local + +smallcheck: mpi-bench$(EXEEXT) + $(MAKE) $(AM_MAKEFLAGS) NUMCHECK=2 check-local + +endif + +fftw3-mpi.f03: fftw3-mpi.f03.in + sed 's/C_MPI_FINT/@C_MPI_FINT@/' $(srcdir)/fftw3-mpi.f03.in > $@ + +fftw3l-mpi.f03: fftw3l-mpi.f03.in + sed 's/C_MPI_FINT/@C_MPI_FINT@/' $(srcdir)/fftw3l-mpi.f03.in > $@ + +if MAINTAINER_MODE + +fftw3-mpi.f03.in: fftw3-mpi.h f03api.sh $(top_srcdir)/api/genf03.pl + sh $(srcdir)/f03api.sh d f > $@ + +fftw3l-mpi.f03.in: fftw3-mpi.h f03api.sh $(top_srcdir)/api/genf03.pl + sh $(srcdir)/f03api.sh l | grep -v parameter | sed 's/fftw3.f03/fftw3l.f03/' > $@ + +f03-wrap.c: fftw3-mpi.h f03-wrap.sh genf03-wrap.pl + sh $(srcdir)/f03-wrap.sh > $@ + +endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,913 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +@MPI_TRUE@noinst_PROGRAMS = mpi-bench$(EXEEXT) +subdir = mpi +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp $(am__include_HEADERS_DIST) +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" \ + "$(DESTDIR)$(includedir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libfftw3@PREC_SUFFIX@_mpi_la_DEPENDENCIES = \ + ../libfftw3@PREC_SUFFIX@.la +am__objects_1 = any-true.lo api.lo block.lo choose-radix.lo conf.lo \ + dtensor.lo rearrange.lo wisdom-api.lo f03-wrap.lo +am__objects_2 = transpose-alltoall.lo transpose-pairwise.lo \ + transpose-recurse.lo transpose-problem.lo transpose-solve.lo +am__objects_3 = dft-serial.lo dft-rank-geq2.lo \ + dft-rank-geq2-transposed.lo dft-rank1.lo dft-rank1-bigvec.lo \ + dft-problem.lo dft-solve.lo +am__objects_4 = rdft-serial.lo rdft-rank-geq2.lo \ + rdft-rank-geq2-transposed.lo rdft-rank1-bigvec.lo \ + rdft-problem.lo rdft-solve.lo +am__objects_5 = rdft2-serial.lo rdft2-rank-geq2.lo \ + rdft2-rank-geq2-transposed.lo rdft2-problem.lo rdft2-solve.lo +am_libfftw3@PREC_SUFFIX@_mpi_la_OBJECTS = $(am__objects_1) \ + $(am__objects_2) $(am__objects_3) $(am__objects_4) \ + $(am__objects_5) +libfftw3@PREC_SUFFIX@_mpi_la_OBJECTS = \ + $(am_libfftw3@PREC_SUFFIX@_mpi_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libfftw3@PREC_SUFFIX@_mpi_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(libfftw3@PREC_SUFFIX@_mpi_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@MPI_TRUE@am_libfftw3@PREC_SUFFIX@_mpi_la_rpath = -rpath $(libdir) +PROGRAMS = $(noinst_PROGRAMS) +am_mpi_bench_OBJECTS = mpi_bench-mpi-bench.$(OBJEXT) \ + mpi_bench-fftw-bench.$(OBJEXT) mpi_bench-hook.$(OBJEXT) +mpi_bench_OBJECTS = $(am_mpi_bench_OBJECTS) +am__DEPENDENCIES_1 = +mpi_bench_DEPENDENCIES = libfftw3@PREC_SUFFIX@_mpi.la \ + $(LIBFFTWTHREADS) $(top_builddir)/libfftw3@PREC_SUFFIX@.la \ + $(top_builddir)/libbench2/libbench2.a $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) +mpi_bench_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(mpi_bench_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libfftw3@PREC_SUFFIX@_mpi_la_SOURCES) $(mpi_bench_SOURCES) +DIST_SOURCES = $(libfftw3@PREC_SUFFIX@_mpi_la_SOURCES) \ + $(mpi_bench_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__include_HEADERS_DIST = fftw3-mpi.h +HEADERS = $(include_HEADERS) $(nodist_include_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @MPICC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft \ +-I$(top_srcdir)/rdft -I$(top_srcdir)/api -I$(top_srcdir)/tests \ +-I$(top_srcdir)/libbench2 + +@MPI_TRUE@lib_LTLIBRARIES = libfftw3@PREC_SUFFIX@_mpi.la +@MPI_TRUE@include_HEADERS = fftw3-mpi.h +@MPI_TRUE@nodist_include_HEADERS = fftw3-mpi.f03 fftw3l-mpi.f03 +EXTRA_DIST = testsched.c f03api.sh f03-wrap.sh genf03-wrap.pl fftw3-mpi.f03.in fftw3l-mpi.f03.in +BUILT_SOURCES = fftw3-mpi.f03.in fftw3-mpi.f03 fftw3l-mpi.f03.in fftw3l-mpi.f03 f03-wrap.c +CLEANFILES = fftw3-mpi.f03 fftw3l-mpi.f03 +TRANSPOSE_SRC = transpose-alltoall.c transpose-pairwise.c transpose-recurse.c transpose-problem.c transpose-solve.c mpi-transpose.h +DFT_SRC = dft-serial.c dft-rank-geq2.c dft-rank-geq2-transposed.c dft-rank1.c dft-rank1-bigvec.c dft-problem.c dft-solve.c mpi-dft.h +RDFT_SRC = rdft-serial.c rdft-rank-geq2.c rdft-rank-geq2-transposed.c rdft-rank1-bigvec.c rdft-problem.c rdft-solve.c mpi-rdft.h +RDFT2_SRC = rdft2-serial.c rdft2-rank-geq2.c rdft2-rank-geq2-transposed.c rdft2-problem.c rdft2-solve.c mpi-rdft2.h +SRC = any-true.c api.c block.c choose-radix.c conf.c dtensor.c fftw3-mpi.h ifftw-mpi.h rearrange.c wisdom-api.c f03-wrap.c +libfftw3@PREC_SUFFIX@_mpi_la_SOURCES = $(SRC) $(TRANSPOSE_SRC) $(DFT_SRC) $(RDFT_SRC) $(RDFT2_SRC) +libfftw3@PREC_SUFFIX@_mpi_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ +libfftw3@PREC_SUFFIX@_mpi_la_LIBADD = ../libfftw3@PREC_SUFFIX@.la @MPILIBS@ +@OPENMP_TRUE@@THREADS_FALSE@mpi_bench_CFLAGS = $(OPENMP_CFLAGS) +@THREADS_TRUE@mpi_bench_CFLAGS = $(PTHREAD_CFLAGS) +@COMBINED_THREADS_FALSE@@THREADS_TRUE@LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_threads.la +@OPENMP_TRUE@@THREADS_FALSE@LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_omp.la +mpi_bench_SOURCES = mpi-bench.c $(top_srcdir)/tests/fftw-bench.c $(top_srcdir)/tests/hook.c +mpi_bench_LDADD = libfftw3@PREC_SUFFIX@_mpi.la $(LIBFFTWTHREADS) $(top_builddir)/libfftw3@PREC_SUFFIX@.la $(top_builddir)/libbench2/libbench2.a $(MPILIBS) $(THREADLIBS) +CHECK = $(top_srcdir)/tests/check.pl +NUMCHECK = 10 +CHECKSIZE = 10000 +CHECKOPTS = --verbose --random --maxsize=$(CHECKSIZE) -c=$(NUMCHECK) $(CHECK_PL_OPTS) +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu mpi/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu mpi/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libfftw3@PREC_SUFFIX@_mpi.la: $(libfftw3@PREC_SUFFIX@_mpi_la_OBJECTS) $(libfftw3@PREC_SUFFIX@_mpi_la_DEPENDENCIES) $(EXTRA_libfftw3@PREC_SUFFIX@_mpi_la_DEPENDENCIES) + $(AM_V_CCLD)$(libfftw3@PREC_SUFFIX@_mpi_la_LINK) $(am_libfftw3@PREC_SUFFIX@_mpi_la_rpath) $(libfftw3@PREC_SUFFIX@_mpi_la_OBJECTS) $(libfftw3@PREC_SUFFIX@_mpi_la_LIBADD) $(LIBS) + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +mpi-bench$(EXEEXT): $(mpi_bench_OBJECTS) $(mpi_bench_DEPENDENCIES) $(EXTRA_mpi_bench_DEPENDENCIES) + @rm -f mpi-bench$(EXEEXT) + $(AM_V_CCLD)$(mpi_bench_LINK) $(mpi_bench_OBJECTS) $(mpi_bench_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/any-true.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/api.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/block.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/choose-radix.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/conf.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dft-problem.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dft-rank-geq2-transposed.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dft-rank-geq2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dft-rank1-bigvec.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dft-rank1.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dft-serial.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dft-solve.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dtensor.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/f03-wrap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_bench-fftw-bench.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_bench-hook.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_bench-mpi-bench.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdft-problem.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdft-rank-geq2-transposed.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdft-rank-geq2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdft-rank1-bigvec.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdft-serial.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdft-solve.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdft2-problem.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdft2-rank-geq2-transposed.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdft2-rank-geq2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdft2-serial.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdft2-solve.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rearrange.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/transpose-alltoall.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/transpose-pairwise.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/transpose-problem.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/transpose-recurse.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/transpose-solve.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/wisdom-api.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mpi_bench-mpi-bench.o: mpi-bench.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_bench_CFLAGS) $(CFLAGS) -MT mpi_bench-mpi-bench.o -MD -MP -MF $(DEPDIR)/mpi_bench-mpi-bench.Tpo -c -o mpi_bench-mpi-bench.o `test -f 'mpi-bench.c' || echo '$(srcdir)/'`mpi-bench.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/mpi_bench-mpi-bench.Tpo $(DEPDIR)/mpi_bench-mpi-bench.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mpi-bench.c' object='mpi_bench-mpi-bench.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_bench_CFLAGS) $(CFLAGS) -c -o mpi_bench-mpi-bench.o `test -f 'mpi-bench.c' || echo '$(srcdir)/'`mpi-bench.c + +mpi_bench-mpi-bench.obj: mpi-bench.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_bench_CFLAGS) $(CFLAGS) -MT mpi_bench-mpi-bench.obj -MD -MP -MF $(DEPDIR)/mpi_bench-mpi-bench.Tpo -c -o mpi_bench-mpi-bench.obj `if test -f 'mpi-bench.c'; then $(CYGPATH_W) 'mpi-bench.c'; else $(CYGPATH_W) '$(srcdir)/mpi-bench.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/mpi_bench-mpi-bench.Tpo $(DEPDIR)/mpi_bench-mpi-bench.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mpi-bench.c' object='mpi_bench-mpi-bench.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_bench_CFLAGS) $(CFLAGS) -c -o mpi_bench-mpi-bench.obj `if test -f 'mpi-bench.c'; then $(CYGPATH_W) 'mpi-bench.c'; else $(CYGPATH_W) '$(srcdir)/mpi-bench.c'; fi` + +mpi_bench-fftw-bench.o: $(top_srcdir)/tests/fftw-bench.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_bench_CFLAGS) $(CFLAGS) -MT mpi_bench-fftw-bench.o -MD -MP -MF $(DEPDIR)/mpi_bench-fftw-bench.Tpo -c -o mpi_bench-fftw-bench.o `test -f '$(top_srcdir)/tests/fftw-bench.c' || echo '$(srcdir)/'`$(top_srcdir)/tests/fftw-bench.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/mpi_bench-fftw-bench.Tpo $(DEPDIR)/mpi_bench-fftw-bench.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(top_srcdir)/tests/fftw-bench.c' object='mpi_bench-fftw-bench.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_bench_CFLAGS) $(CFLAGS) -c -o mpi_bench-fftw-bench.o `test -f '$(top_srcdir)/tests/fftw-bench.c' || echo '$(srcdir)/'`$(top_srcdir)/tests/fftw-bench.c + +mpi_bench-fftw-bench.obj: $(top_srcdir)/tests/fftw-bench.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_bench_CFLAGS) $(CFLAGS) -MT mpi_bench-fftw-bench.obj -MD -MP -MF $(DEPDIR)/mpi_bench-fftw-bench.Tpo -c -o mpi_bench-fftw-bench.obj `if test -f '$(top_srcdir)/tests/fftw-bench.c'; then $(CYGPATH_W) '$(top_srcdir)/tests/fftw-bench.c'; else $(CYGPATH_W) '$(srcdir)/$(top_srcdir)/tests/fftw-bench.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/mpi_bench-fftw-bench.Tpo $(DEPDIR)/mpi_bench-fftw-bench.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(top_srcdir)/tests/fftw-bench.c' object='mpi_bench-fftw-bench.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_bench_CFLAGS) $(CFLAGS) -c -o mpi_bench-fftw-bench.obj `if test -f '$(top_srcdir)/tests/fftw-bench.c'; then $(CYGPATH_W) '$(top_srcdir)/tests/fftw-bench.c'; else $(CYGPATH_W) '$(srcdir)/$(top_srcdir)/tests/fftw-bench.c'; fi` + +mpi_bench-hook.o: $(top_srcdir)/tests/hook.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_bench_CFLAGS) $(CFLAGS) -MT mpi_bench-hook.o -MD -MP -MF $(DEPDIR)/mpi_bench-hook.Tpo -c -o mpi_bench-hook.o `test -f '$(top_srcdir)/tests/hook.c' || echo '$(srcdir)/'`$(top_srcdir)/tests/hook.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/mpi_bench-hook.Tpo $(DEPDIR)/mpi_bench-hook.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(top_srcdir)/tests/hook.c' object='mpi_bench-hook.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_bench_CFLAGS) $(CFLAGS) -c -o mpi_bench-hook.o `test -f '$(top_srcdir)/tests/hook.c' || echo '$(srcdir)/'`$(top_srcdir)/tests/hook.c + +mpi_bench-hook.obj: $(top_srcdir)/tests/hook.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_bench_CFLAGS) $(CFLAGS) -MT mpi_bench-hook.obj -MD -MP -MF $(DEPDIR)/mpi_bench-hook.Tpo -c -o mpi_bench-hook.obj `if test -f '$(top_srcdir)/tests/hook.c'; then $(CYGPATH_W) '$(top_srcdir)/tests/hook.c'; else $(CYGPATH_W) '$(srcdir)/$(top_srcdir)/tests/hook.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/mpi_bench-hook.Tpo $(DEPDIR)/mpi_bench-hook.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(top_srcdir)/tests/hook.c' object='mpi_bench-hook.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_bench_CFLAGS) $(CFLAGS) -c -o mpi_bench-hook.obj `if test -f '$(top_srcdir)/tests/hook.c'; then $(CYGPATH_W) '$(top_srcdir)/tests/hook.c'; else $(CYGPATH_W) '$(srcdir)/$(top_srcdir)/tests/hook.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-includeHEADERS: $(include_HEADERS) + @$(NORMAL_INSTALL) + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \ + done + +uninstall-includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) +install-nodist_includeHEADERS: $(nodist_include_HEADERS) + @$(NORMAL_INSTALL) + @list='$(nodist_include_HEADERS)'; test -n "$(includedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \ + done + +uninstall-nodist_includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(nodist_include_HEADERS)'; test -n "$(includedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +@MPI_FALSE@check-local: +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) check-local +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-includeHEADERS install-nodist_includeHEADERS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-includeHEADERS uninstall-libLTLIBRARIES \ + uninstall-nodist_includeHEADERS + +.MAKE: all check check-am install install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am check-local clean \ + clean-generic clean-libLTLIBRARIES clean-libtool \ + clean-noinstPROGRAMS cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-includeHEADERS install-info \ + install-info-am install-libLTLIBRARIES install-man \ + install-nodist_includeHEADERS install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-includeHEADERS \ + uninstall-libLTLIBRARIES uninstall-nodist_includeHEADERS + + +@MPI_TRUE@check-local: mpi-bench$(EXEEXT) +@MPI_TRUE@ perl -w $(CHECK) $(CHECKOPTS) --mpi "$(MPIRUN) -np 1 `pwd`/mpi-bench" +@MPI_TRUE@ @echo "--------------------------------------------------------------" +@MPI_TRUE@ @echo " MPI FFTW transforms passed "$(NUMCHECK)" tests, 1 CPU" +@MPI_TRUE@ @echo "--------------------------------------------------------------" +@MPI_TRUE@ perl -w $(CHECK) $(CHECKOPTS) --mpi "$(MPIRUN) -np 2 `pwd`/mpi-bench" +@MPI_TRUE@ @echo "--------------------------------------------------------------" +@MPI_TRUE@ @echo " MPI FFTW transforms passed "$(NUMCHECK)" tests, 2 CPUs" +@MPI_TRUE@ @echo "--------------------------------------------------------------" +@MPI_TRUE@ perl -w $(CHECK) $(CHECKOPTS) --mpi "$(MPIRUN) -np 3 `pwd`/mpi-bench" +@MPI_TRUE@ @echo "--------------------------------------------------------------" +@MPI_TRUE@ @echo " MPI FFTW transforms passed "$(NUMCHECK)" tests, 3 CPUs" +@MPI_TRUE@ @echo "--------------------------------------------------------------" +@MPI_TRUE@ perl -w $(CHECK) $(CHECKOPTS) --mpi "$(MPIRUN) -np 4 `pwd`/mpi-bench" +@MPI_TRUE@ @echo "--------------------------------------------------------------" +@MPI_TRUE@ @echo " MPI FFTW transforms passed "$(NUMCHECK)" tests, 4 CPUs" +@MPI_TRUE@ @echo "--------------------------------------------------------------" +@MPI_TRUE@@SMP_TRUE@ perl -w $(CHECK) $(CHECKOPTS) --mpi --nthreads=2 "$(MPIRUN) -np 3 `pwd`/mpi-bench" +@MPI_TRUE@@SMP_TRUE@ @echo "--------------------------------------------------------------" +@MPI_TRUE@@SMP_TRUE@ @echo " MPI FFTW threaded transforms passed "$(NUMCHECK)" tests!" +@MPI_TRUE@@SMP_TRUE@ @echo "--------------------------------------------------------------" + +@MPI_TRUE@bigcheck: mpi-bench$(EXEEXT) +@MPI_TRUE@ $(MAKE) $(AM_MAKEFLAGS) NUMCHECK=100 CHECKSIZE=60000 check-local + +@MPI_TRUE@smallcheck: mpi-bench$(EXEEXT) +@MPI_TRUE@ $(MAKE) $(AM_MAKEFLAGS) NUMCHECK=2 check-local + +fftw3-mpi.f03: fftw3-mpi.f03.in + sed 's/C_MPI_FINT/@C_MPI_FINT@/' $(srcdir)/fftw3-mpi.f03.in > $@ + +fftw3l-mpi.f03: fftw3l-mpi.f03.in + sed 's/C_MPI_FINT/@C_MPI_FINT@/' $(srcdir)/fftw3l-mpi.f03.in > $@ + +@MAINTAINER_MODE_TRUE@fftw3-mpi.f03.in: fftw3-mpi.h f03api.sh $(top_srcdir)/api/genf03.pl +@MAINTAINER_MODE_TRUE@ sh $(srcdir)/f03api.sh d f > $@ + +@MAINTAINER_MODE_TRUE@fftw3l-mpi.f03.in: fftw3-mpi.h f03api.sh $(top_srcdir)/api/genf03.pl +@MAINTAINER_MODE_TRUE@ sh $(srcdir)/f03api.sh l | grep -v parameter | sed 's/fftw3.f03/fftw3l.f03/' > $@ + +@MAINTAINER_MODE_TRUE@f03-wrap.c: fftw3-mpi.h f03-wrap.sh genf03-wrap.pl +@MAINTAINER_MODE_TRUE@ sh $(srcdir)/f03-wrap.sh > $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/any-true.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/any-true.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw-mpi.h" + +/* During planning, if any process fails to create a plan then + all of the processes must fail. This synchronization is implemented + by the following routine. + + Instead of + if (failure) goto nada; + we instead do: + if (any_true(failure, comm)) goto nada; +*/ + +int XM(any_true)(int condition, MPI_Comm comm) +{ + int result; + MPI_Allreduce(&condition, &result, 1, MPI_INT, MPI_LOR, comm); + return result; +} + +/***********************************************************************/ + +#if defined(FFTW_DEBUG) +/* for debugging, we include an assertion to make sure that + MPI problems all produce equal hashes, as checked by this routine: */ + +int XM(md5_equal)(md5 m, MPI_Comm comm) +{ + unsigned long s0[4]; + int i, eq_me, eq_all; + + X(md5end)(&m); + for (i = 0; i < 4; ++i) s0[i] = m.s[i]; + MPI_Bcast(s0, 4, MPI_UNSIGNED_LONG, 0, comm); + for (i = 0; i < 4 && s0[i] == m.s[i]; ++i) ; + eq_me = i == 4; + MPI_Allreduce(&eq_me, &eq_all, 1, MPI_INT, MPI_LAND, comm); + return eq_all; +} +#endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/api.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/api.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,907 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "fftw3-mpi.h" +#include "ifftw-mpi.h" +#include "mpi-transpose.h" +#include "mpi-dft.h" +#include "mpi-rdft.h" +#include "mpi-rdft2.h" + +/* Convert API flags to internal MPI flags. */ +#define MPI_FLAGS(f) ((f) >> 27) + +/*************************************************************************/ + +static int mpi_inited = 0; + +static MPI_Comm problem_comm(const problem *p) { + switch (p->adt->problem_kind) { + case PROBLEM_MPI_DFT: + return ((const problem_mpi_dft *) p)->comm; + case PROBLEM_MPI_RDFT: + return ((const problem_mpi_rdft *) p)->comm; + case PROBLEM_MPI_RDFT2: + return ((const problem_mpi_rdft2 *) p)->comm; + case PROBLEM_MPI_TRANSPOSE: + return ((const problem_mpi_transpose *) p)->comm; + default: + return MPI_COMM_NULL; + } +} + +/* used to synchronize cost measurements (timing or estimation) + across all processes for an MPI problem, which is critical to + ensure that all processes decide to use the same MPI plans + (whereas serial plans need not be syncronized). */ +static double cost_hook(const problem *p, double t, cost_kind k) +{ + MPI_Comm comm = problem_comm(p); + double tsum; + if (comm == MPI_COMM_NULL) return t; + MPI_Allreduce(&t, &tsum, 1, MPI_DOUBLE, + k == COST_SUM ? MPI_SUM : MPI_MAX, comm); + return tsum; +} + +/* Used to reject wisdom that is not in sync across all processes + for an MPI problem, which is critical to ensure that all processes + decide to use the same MPI plans. (Even though costs are synchronized, + above, out-of-sync wisdom may result from plans being produced + by communicators that do not span all processes, either from a + user-specified communicator or e.g. from transpose-recurse. */ +static int wisdom_ok_hook(const problem *p, flags_t flags) +{ + MPI_Comm comm = problem_comm(p); + int eq_me, eq_all; + /* unpack flags bitfield, since MPI communications may involve + byte-order changes and MPI cannot do this for bit fields */ +#if SIZEOF_UNSIGNED_INT >= 4 /* must be big enough to hold 20-bit fields */ + unsigned int f[5]; +#else + unsigned long f[5]; /* at least 32 bits as per C standard */ +#endif + + if (comm == MPI_COMM_NULL) return 1; /* non-MPI wisdom is always ok */ + + if (XM(any_true)(0, comm)) return 0; /* some process had nowisdom_hook */ + + /* otherwise, check that the flags and solver index are identical + on all processes in this problem's communicator. + + TO DO: possibly we can relax strict equality, but it is + critical to ensure that any flags which affect what plan is + created (and whether the solver is applicable) are the same, + e.g. DESTROY_INPUT, NO_UGLY, etcetera. (If the MPI algorithm + differs between processes, deadlocks/crashes generally result.) */ + f[0] = flags.l; + f[1] = flags.hash_info; + f[2] = flags.timelimit_impatience; + f[3] = flags.u; + f[4] = flags.slvndx; + MPI_Bcast(f, 5, + SIZEOF_UNSIGNED_INT >= 4 ? MPI_UNSIGNED : MPI_UNSIGNED_LONG, + 0, comm); + eq_me = f[0] == flags.l && f[1] == flags.hash_info + && f[2] == flags.timelimit_impatience + && f[3] == flags.u && f[4] == flags.slvndx; + MPI_Allreduce(&eq_me, &eq_all, 1, MPI_INT, MPI_LAND, comm); + return eq_all; +} + +/* This hook is called when wisdom is not found. The any_true here + matches up with the any_true in wisdom_ok_hook, in order to handle + the case where some processes had wisdom (and called wisdom_ok_hook) + and some processes didn't have wisdom (and called nowisdom_hook). */ +static void nowisdom_hook(const problem *p) +{ + MPI_Comm comm = problem_comm(p); + if (comm == MPI_COMM_NULL) return; /* nothing to do for non-MPI p */ + XM(any_true)(1, comm); /* signal nowisdom to any wisdom_ok_hook */ +} + +/* needed to synchronize planner bogosity flag, in case non-MPI problems + on a subset of processes encountered bogus wisdom */ +static wisdom_state_t bogosity_hook(wisdom_state_t state, const problem *p) +{ + MPI_Comm comm = problem_comm(p); + if (comm != MPI_COMM_NULL /* an MPI problem */ + && XM(any_true)(state == WISDOM_IS_BOGUS, comm)) /* bogus somewhere */ + return WISDOM_IS_BOGUS; + return state; +} + +void XM(init)(void) +{ + if (!mpi_inited) { + planner *plnr = X(the_planner)(); + plnr->cost_hook = cost_hook; + plnr->wisdom_ok_hook = wisdom_ok_hook; + plnr->nowisdom_hook = nowisdom_hook; + plnr->bogosity_hook = bogosity_hook; + XM(conf_standard)(plnr); + mpi_inited = 1; + } +} + +void XM(cleanup)(void) +{ + X(cleanup)(); + mpi_inited = 0; +} + +/*************************************************************************/ + +static dtensor *mkdtensor_api(int rnk, const XM(ddim) *dims0) +{ + dtensor *x = XM(mkdtensor)(rnk); + int i; + for (i = 0; i < rnk; ++i) { + x->dims[i].n = dims0[i].n; + x->dims[i].b[IB] = dims0[i].ib; + x->dims[i].b[OB] = dims0[i].ob; + } + return x; +} + +static dtensor *default_sz(int rnk, const XM(ddim) *dims0, int n_pes, + int rdft2) +{ + dtensor *sz = XM(mkdtensor)(rnk); + dtensor *sz0 = mkdtensor_api(rnk, dims0); + block_kind k; + int i; + + for (i = 0; i < rnk; ++i) + sz->dims[i].n = dims0[i].n; + + if (rdft2) sz->dims[rnk-1].n = dims0[rnk-1].n / 2 + 1; + + for (i = 0; i < rnk; ++i) { + sz->dims[i].b[IB] = dims0[i].ib ? dims0[i].ib : sz->dims[i].n; + sz->dims[i].b[OB] = dims0[i].ob ? dims0[i].ob : sz->dims[i].n; + } + + /* If we haven't used all of the processes yet, and some of the + block sizes weren't specified (i.e. 0), then set the + unspecified blocks so as to use as many processes as + possible with as few distributed dimensions as possible. */ + FORALL_BLOCK_KIND(k) { + INT nb = XM(num_blocks_total)(sz, k); + INT np = n_pes / nb; + for (i = 0; i < rnk && np > 1; ++i) + if (!sz0->dims[i].b[k]) { + sz->dims[i].b[k] = XM(default_block)(sz->dims[i].n, np); + nb *= XM(num_blocks)(sz->dims[i].n, sz->dims[i].b[k]); + np = n_pes / nb; + } + } + + if (rdft2) sz->dims[rnk-1].n = dims0[rnk-1].n; + + /* punt for 1d prime */ + if (rnk == 1 && X(is_prime)(sz->dims[0].n)) + sz->dims[0].b[IB] = sz->dims[0].b[OB] = sz->dims[0].n; + + XM(dtensor_destroy)(sz0); + sz0 = XM(dtensor_canonical)(sz, 0); + XM(dtensor_destroy)(sz); + return sz0; +} + +/* allocate simple local (serial) dims array corresponding to n[rnk] */ +static XM(ddim) *simple_dims(int rnk, const ptrdiff_t *n) +{ + XM(ddim) *dims = (XM(ddim) *) MALLOC(sizeof(XM(ddim)) * rnk, + TENSORS); + int i; + for (i = 0; i < rnk; ++i) + dims[i].n = dims[i].ib = dims[i].ob = n[i]; + return dims; +} + +/*************************************************************************/ + +static void local_size(int my_pe, const dtensor *sz, block_kind k, + ptrdiff_t *local_n, ptrdiff_t *local_start) +{ + int i; + if (my_pe >= XM(num_blocks_total)(sz, k)) + for (i = 0; i < sz->rnk; ++i) + local_n[i] = local_start[i] = 0; + else { + XM(block_coords)(sz, k, my_pe, local_start); + for (i = 0; i < sz->rnk; ++i) { + local_n[i] = XM(block)(sz->dims[i].n, sz->dims[i].b[k], + local_start[i]); + local_start[i] *= sz->dims[i].b[k]; + } + } +} + +static INT prod(int rnk, const ptrdiff_t *local_n) +{ + int i; + INT N = 1; + for (i = 0; i < rnk; ++i) N *= local_n[i]; + return N; +} + +ptrdiff_t XM(local_size_guru)(int rnk, const XM(ddim) *dims0, + ptrdiff_t howmany, MPI_Comm comm, + ptrdiff_t *local_n_in, + ptrdiff_t *local_start_in, + ptrdiff_t *local_n_out, + ptrdiff_t *local_start_out, + int sign, unsigned flags) +{ + INT N; + int my_pe, n_pes, i; + dtensor *sz; + + if (rnk == 0) + return howmany; + + MPI_Comm_rank(comm, &my_pe); + MPI_Comm_size(comm, &n_pes); + sz = default_sz(rnk, dims0, n_pes, 0); + + /* Now, we must figure out how much local space the user should + allocate (or at least an upper bound). This depends strongly + on the exact algorithms we employ...ugh! FIXME: get this info + from the solvers somehow? */ + N = 1; /* never return zero allocation size */ + if (rnk > 1 && XM(is_block1d)(sz, IB) && XM(is_block1d)(sz, OB)) { + INT Nafter; + ddim odims[2]; + + /* dft-rank-geq2-transposed */ + odims[0] = sz->dims[0]; odims[1] = sz->dims[1]; /* save */ + /* we may need extra space for transposed intermediate data */ + for (i = 0; i < 2; ++i) + if (XM(num_blocks)(sz->dims[i].n, sz->dims[i].b[IB]) == 1 && + XM(num_blocks)(sz->dims[i].n, sz->dims[i].b[OB]) == 1) { + sz->dims[i].b[IB] + = XM(default_block)(sz->dims[i].n, n_pes); + sz->dims[1-i].b[IB] = sz->dims[1-i].n; + local_size(my_pe, sz, IB, local_n_in, local_start_in); + N = X(imax)(N, prod(rnk, local_n_in)); + sz->dims[i] = odims[i]; + sz->dims[1-i] = odims[1-i]; + break; + } + + /* dft-rank-geq2 */ + Nafter = howmany; + for (i = 1; i < sz->rnk; ++i) Nafter *= sz->dims[i].n; + N = X(imax)(N, (sz->dims[0].n + * XM(block)(Nafter, XM(default_block)(Nafter, n_pes), + my_pe) + howmany - 1) / howmany); + + /* dft-rank-geq2 with dimensions swapped */ + Nafter = howmany * sz->dims[0].n; + for (i = 2; i < sz->rnk; ++i) Nafter *= sz->dims[i].n; + N = X(imax)(N, (sz->dims[1].n + * XM(block)(Nafter, XM(default_block)(Nafter, n_pes), + my_pe) + howmany - 1) / howmany); + } + else if (rnk == 1) { + if (howmany >= n_pes && !MPI_FLAGS(flags)) { /* dft-rank1-bigvec */ + ptrdiff_t n[2], start[2]; + dtensor *sz2 = XM(mkdtensor)(2); + sz2->dims[0] = sz->dims[0]; + sz2->dims[0].b[IB] = sz->dims[0].n; + sz2->dims[1].n = sz2->dims[1].b[OB] = howmany; + sz2->dims[1].b[IB] = XM(default_block)(howmany, n_pes); + local_size(my_pe, sz2, IB, n, start); + XM(dtensor_destroy)(sz2); + N = X(imax)(N, (prod(2, n) + howmany - 1) / howmany); + } + else { /* dft-rank1 */ + INT r, m, rblock[2], mblock[2]; + + /* Since the 1d transforms are so different, we require + the user to call local_size_1d for this case. Ugh. */ + CK(sign == FFTW_FORWARD || sign == FFTW_BACKWARD); + + if ((r = XM(choose_radix)(sz->dims[0], n_pes, flags, sign, + rblock, mblock))) { + m = sz->dims[0].n / r; + if (flags & FFTW_MPI_SCRAMBLED_IN) + sz->dims[0].b[IB] = rblock[IB] * m; + else { /* !SCRAMBLED_IN */ + sz->dims[0].b[IB] = r * mblock[IB]; + N = X(imax)(N, rblock[IB] * m); + } + if (flags & FFTW_MPI_SCRAMBLED_OUT) + sz->dims[0].b[OB] = r * mblock[OB]; + else { /* !SCRAMBLED_OUT */ + N = X(imax)(N, r * mblock[OB]); + sz->dims[0].b[OB] = rblock[OB] * m; + } + } + } + } + + local_size(my_pe, sz, IB, local_n_in, local_start_in); + local_size(my_pe, sz, OB, local_n_out, local_start_out); + + /* at least, make sure we have enough space to store input & output */ + N = X(imax)(N, X(imax)(prod(rnk, local_n_in), prod(rnk, local_n_out))); + + XM(dtensor_destroy)(sz); + return N * howmany; +} + +ptrdiff_t XM(local_size_many_transposed)(int rnk, const ptrdiff_t *n, + ptrdiff_t howmany, + ptrdiff_t xblock, ptrdiff_t yblock, + MPI_Comm comm, + ptrdiff_t *local_nx, + ptrdiff_t *local_x_start, + ptrdiff_t *local_ny, + ptrdiff_t *local_y_start) +{ + ptrdiff_t N; + XM(ddim) *dims; + ptrdiff_t *local; + + if (rnk == 0) { + *local_nx = *local_ny = 1; + *local_x_start = *local_y_start = 0; + return howmany; + } + + dims = simple_dims(rnk, n); + local = (ptrdiff_t *) MALLOC(sizeof(ptrdiff_t) * rnk * 4, TENSORS); + + /* default 1d block distribution, with transposed output + if yblock < n[1] */ + dims[0].ib = xblock; + if (rnk > 1) { + if (yblock < n[1]) + dims[1].ob = yblock; + else + dims[0].ob = xblock; + } + else + dims[0].ob = xblock; /* FIXME: 1d not really supported here + since we don't have flags/sign */ + + N = XM(local_size_guru)(rnk, dims, howmany, comm, + local, local + rnk, + local + 2*rnk, local + 3*rnk, + 0, 0); + *local_nx = local[0]; + *local_x_start = local[rnk]; + if (rnk > 1) { + *local_ny = local[2*rnk + 1]; + *local_y_start = local[3*rnk + 1]; + } + else { + *local_ny = *local_nx; + *local_y_start = *local_x_start; + } + X(ifree)(local); + X(ifree)(dims); + return N; +} + +ptrdiff_t XM(local_size_many)(int rnk, const ptrdiff_t *n, + ptrdiff_t howmany, + ptrdiff_t xblock, + MPI_Comm comm, + ptrdiff_t *local_nx, + ptrdiff_t *local_x_start) +{ + ptrdiff_t local_ny, local_y_start; + return XM(local_size_many_transposed)(rnk, n, howmany, + xblock, rnk > 1 + ? n[1] : FFTW_MPI_DEFAULT_BLOCK, + comm, + local_nx, local_x_start, + &local_ny, &local_y_start); +} + + +ptrdiff_t XM(local_size_transposed)(int rnk, const ptrdiff_t *n, + MPI_Comm comm, + ptrdiff_t *local_nx, + ptrdiff_t *local_x_start, + ptrdiff_t *local_ny, + ptrdiff_t *local_y_start) +{ + return XM(local_size_many_transposed)(rnk, n, 1, + FFTW_MPI_DEFAULT_BLOCK, + FFTW_MPI_DEFAULT_BLOCK, + comm, + local_nx, local_x_start, + local_ny, local_y_start); +} + +ptrdiff_t XM(local_size)(int rnk, const ptrdiff_t *n, + MPI_Comm comm, + ptrdiff_t *local_nx, + ptrdiff_t *local_x_start) +{ + return XM(local_size_many)(rnk, n, 1, FFTW_MPI_DEFAULT_BLOCK, comm, + local_nx, local_x_start); +} + +ptrdiff_t XM(local_size_many_1d)(ptrdiff_t nx, ptrdiff_t howmany, + MPI_Comm comm, int sign, unsigned flags, + ptrdiff_t *local_nx, ptrdiff_t *local_x_start, + ptrdiff_t *local_ny, ptrdiff_t *local_y_start) +{ + XM(ddim) d; + d.n = nx; + d.ib = d.ob = FFTW_MPI_DEFAULT_BLOCK; + return XM(local_size_guru)(1, &d, howmany, comm, + local_nx, local_x_start, + local_ny, local_y_start, sign, flags); +} + +ptrdiff_t XM(local_size_1d)(ptrdiff_t nx, + MPI_Comm comm, int sign, unsigned flags, + ptrdiff_t *local_nx, ptrdiff_t *local_x_start, + ptrdiff_t *local_ny, ptrdiff_t *local_y_start) +{ + return XM(local_size_many_1d)(nx, 1, comm, sign, flags, + local_nx, local_x_start, + local_ny, local_y_start); +} + +ptrdiff_t XM(local_size_2d_transposed)(ptrdiff_t nx, ptrdiff_t ny, + MPI_Comm comm, + ptrdiff_t *local_nx, + ptrdiff_t *local_x_start, + ptrdiff_t *local_ny, + ptrdiff_t *local_y_start) +{ + ptrdiff_t n[2]; + n[0] = nx; n[1] = ny; + return XM(local_size_transposed)(2, n, comm, + local_nx, local_x_start, + local_ny, local_y_start); +} + +ptrdiff_t XM(local_size_2d)(ptrdiff_t nx, ptrdiff_t ny, MPI_Comm comm, + ptrdiff_t *local_nx, ptrdiff_t *local_x_start) +{ + ptrdiff_t n[2]; + n[0] = nx; n[1] = ny; + return XM(local_size)(2, n, comm, local_nx, local_x_start); +} + +ptrdiff_t XM(local_size_3d_transposed)(ptrdiff_t nx, ptrdiff_t ny, + ptrdiff_t nz, + MPI_Comm comm, + ptrdiff_t *local_nx, + ptrdiff_t *local_x_start, + ptrdiff_t *local_ny, + ptrdiff_t *local_y_start) +{ + ptrdiff_t n[3]; + n[0] = nx; n[1] = ny; n[2] = nz; + return XM(local_size_transposed)(3, n, comm, + local_nx, local_x_start, + local_ny, local_y_start); +} + +ptrdiff_t XM(local_size_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz, + MPI_Comm comm, + ptrdiff_t *local_nx, ptrdiff_t *local_x_start) +{ + ptrdiff_t n[3]; + n[0] = nx; n[1] = ny; n[2] = nz; + return XM(local_size)(3, n, comm, local_nx, local_x_start); +} + +/*************************************************************************/ +/* Transpose API */ + +X(plan) XM(plan_many_transpose)(ptrdiff_t nx, ptrdiff_t ny, + ptrdiff_t howmany, + ptrdiff_t xblock, ptrdiff_t yblock, + R *in, R *out, + MPI_Comm comm, unsigned flags) +{ + int n_pes; + XM(init)(); + + if (howmany < 0 || xblock < 0 || yblock < 0 || + nx <= 0 || ny <= 0) return 0; + + MPI_Comm_size(comm, &n_pes); + if (!xblock) xblock = XM(default_block)(nx, n_pes); + if (!yblock) yblock = XM(default_block)(ny, n_pes); + if (n_pes < XM(num_blocks)(nx, xblock) + || n_pes < XM(num_blocks)(ny, yblock)) + return 0; + + return + X(mkapiplan)(FFTW_FORWARD, flags, + XM(mkproblem_transpose)(nx, ny, howmany, + in, out, xblock, yblock, + comm, MPI_FLAGS(flags))); +} + +X(plan) XM(plan_transpose)(ptrdiff_t nx, ptrdiff_t ny, R *in, R *out, + MPI_Comm comm, unsigned flags) + +{ + return XM(plan_many_transpose)(nx, ny, 1, + FFTW_MPI_DEFAULT_BLOCK, + FFTW_MPI_DEFAULT_BLOCK, + in, out, comm, flags); +} + +/*************************************************************************/ +/* Complex DFT API */ + +X(plan) XM(plan_guru_dft)(int rnk, const XM(ddim) *dims0, + ptrdiff_t howmany, + C *in, C *out, + MPI_Comm comm, int sign, unsigned flags) +{ + int n_pes, i; + dtensor *sz; + + XM(init)(); + + if (howmany < 0 || rnk < 1) return 0; + for (i = 0; i < rnk; ++i) + if (dims0[i].n < 1 || dims0[i].ib < 0 || dims0[i].ob < 0) + return 0; + + MPI_Comm_size(comm, &n_pes); + sz = default_sz(rnk, dims0, n_pes, 0); + + if (XM(num_blocks_total)(sz, IB) > n_pes + || XM(num_blocks_total)(sz, OB) > n_pes) { + XM(dtensor_destroy)(sz); + return 0; + } + + return + X(mkapiplan)(sign, flags, + XM(mkproblem_dft_d)(sz, howmany, + (R *) in, (R *) out, + comm, sign, + MPI_FLAGS(flags))); +} + +X(plan) XM(plan_many_dft)(int rnk, const ptrdiff_t *n, + ptrdiff_t howmany, + ptrdiff_t iblock, ptrdiff_t oblock, + C *in, C *out, + MPI_Comm comm, int sign, unsigned flags) +{ + XM(ddim) *dims = simple_dims(rnk, n); + X(plan) pln; + + if (rnk == 1) { + dims[0].ib = iblock; + dims[0].ob = oblock; + } + else if (rnk > 1) { + dims[0 != (flags & FFTW_MPI_TRANSPOSED_IN)].ib = iblock; + dims[0 != (flags & FFTW_MPI_TRANSPOSED_OUT)].ob = oblock; + } + + pln = XM(plan_guru_dft)(rnk,dims,howmany, in,out, comm, sign, flags); + X(ifree)(dims); + return pln; +} + +X(plan) XM(plan_dft)(int rnk, const ptrdiff_t *n, C *in, C *out, + MPI_Comm comm, int sign, unsigned flags) +{ + return XM(plan_many_dft)(rnk, n, 1, + FFTW_MPI_DEFAULT_BLOCK, + FFTW_MPI_DEFAULT_BLOCK, + in, out, comm, sign, flags); +} + +X(plan) XM(plan_dft_1d)(ptrdiff_t nx, C *in, C *out, + MPI_Comm comm, int sign, unsigned flags) +{ + return XM(plan_dft)(1, &nx, in, out, comm, sign, flags); +} + +X(plan) XM(plan_dft_2d)(ptrdiff_t nx, ptrdiff_t ny, C *in, C *out, + MPI_Comm comm, int sign, unsigned flags) +{ + ptrdiff_t n[2]; + n[0] = nx; n[1] = ny; + return XM(plan_dft)(2, n, in, out, comm, sign, flags); +} + +X(plan) XM(plan_dft_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz, + C *in, C *out, + MPI_Comm comm, int sign, unsigned flags) +{ + ptrdiff_t n[3]; + n[0] = nx; n[1] = ny; n[2] = nz; + return XM(plan_dft)(3, n, in, out, comm, sign, flags); +} + +/*************************************************************************/ +/* R2R API */ + +X(plan) XM(plan_guru_r2r)(int rnk, const XM(ddim) *dims0, + ptrdiff_t howmany, + R *in, R *out, + MPI_Comm comm, const X(r2r_kind) *kind, + unsigned flags) +{ + int n_pes, i; + dtensor *sz; + rdft_kind *k; + X(plan) pln; + + XM(init)(); + + if (howmany < 0 || rnk < 1) return 0; + for (i = 0; i < rnk; ++i) + if (dims0[i].n < 1 || dims0[i].ib < 0 || dims0[i].ob < 0) + return 0; + + k = X(map_r2r_kind)(rnk, kind); + + MPI_Comm_size(comm, &n_pes); + sz = default_sz(rnk, dims0, n_pes, 0); + + if (XM(num_blocks_total)(sz, IB) > n_pes + || XM(num_blocks_total)(sz, OB) > n_pes) { + XM(dtensor_destroy)(sz); + return 0; + } + + pln = X(mkapiplan)(0, flags, + XM(mkproblem_rdft_d)(sz, howmany, + in, out, + comm, k, MPI_FLAGS(flags))); + X(ifree0)(k); + return pln; +} + +X(plan) XM(plan_many_r2r)(int rnk, const ptrdiff_t *n, + ptrdiff_t howmany, + ptrdiff_t iblock, ptrdiff_t oblock, + R *in, R *out, + MPI_Comm comm, const X(r2r_kind) *kind, + unsigned flags) +{ + XM(ddim) *dims = simple_dims(rnk, n); + X(plan) pln; + + if (rnk == 1) { + dims[0].ib = iblock; + dims[0].ob = oblock; + } + else if (rnk > 1) { + dims[0 != (flags & FFTW_MPI_TRANSPOSED_IN)].ib = iblock; + dims[0 != (flags & FFTW_MPI_TRANSPOSED_OUT)].ob = oblock; + } + + pln = XM(plan_guru_r2r)(rnk,dims,howmany, in,out, comm, kind, flags); + X(ifree)(dims); + return pln; +} + +X(plan) XM(plan_r2r)(int rnk, const ptrdiff_t *n, R *in, R *out, + MPI_Comm comm, + const X(r2r_kind) *kind, + unsigned flags) +{ + return XM(plan_many_r2r)(rnk, n, 1, + FFTW_MPI_DEFAULT_BLOCK, + FFTW_MPI_DEFAULT_BLOCK, + in, out, comm, kind, flags); +} + +X(plan) XM(plan_r2r_2d)(ptrdiff_t nx, ptrdiff_t ny, R *in, R *out, + MPI_Comm comm, + X(r2r_kind) kindx, X(r2r_kind) kindy, + unsigned flags) +{ + ptrdiff_t n[2]; + X(r2r_kind) kind[2]; + n[0] = nx; n[1] = ny; + kind[0] = kindx; kind[1] = kindy; + return XM(plan_r2r)(2, n, in, out, comm, kind, flags); +} + +X(plan) XM(plan_r2r_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz, + R *in, R *out, + MPI_Comm comm, + X(r2r_kind) kindx, X(r2r_kind) kindy, + X(r2r_kind) kindz, + unsigned flags) +{ + ptrdiff_t n[3]; + X(r2r_kind) kind[3]; + n[0] = nx; n[1] = ny; n[2] = nz; + kind[0] = kindx; kind[1] = kindy; kind[2] = kindz; + return XM(plan_r2r)(3, n, in, out, comm, kind, flags); +} + +/*************************************************************************/ +/* R2C/C2R API */ + +static X(plan) plan_guru_rdft2(int rnk, const XM(ddim) *dims0, + ptrdiff_t howmany, + R *r, C *c, + MPI_Comm comm, rdft_kind kind, unsigned flags) +{ + int n_pes, i; + dtensor *sz; + R *cr = (R *) c; + + XM(init)(); + + if (howmany < 0 || rnk < 2) return 0; + for (i = 0; i < rnk; ++i) + if (dims0[i].n < 1 || dims0[i].ib < 0 || dims0[i].ob < 0) + return 0; + + MPI_Comm_size(comm, &n_pes); + sz = default_sz(rnk, dims0, n_pes, 1); + + sz->dims[rnk-1].n = dims0[rnk-1].n / 2 + 1; + if (XM(num_blocks_total)(sz, IB) > n_pes + || XM(num_blocks_total)(sz, OB) > n_pes) { + XM(dtensor_destroy)(sz); + return 0; + } + sz->dims[rnk-1].n = dims0[rnk-1].n; + + if (kind == R2HC) + return X(mkapiplan)(0, flags, + XM(mkproblem_rdft2_d)(sz, howmany, + r, cr, comm, R2HC, + MPI_FLAGS(flags))); + else + return X(mkapiplan)(0, flags, + XM(mkproblem_rdft2_d)(sz, howmany, + cr, r, comm, HC2R, + MPI_FLAGS(flags))); +} + +X(plan) XM(plan_many_dft_r2c)(int rnk, const ptrdiff_t *n, + ptrdiff_t howmany, + ptrdiff_t iblock, ptrdiff_t oblock, + R *in, C *out, + MPI_Comm comm, unsigned flags) +{ + XM(ddim) *dims = simple_dims(rnk, n); + X(plan) pln; + + if (rnk == 1) { + dims[0].ib = iblock; + dims[0].ob = oblock; + } + else if (rnk > 1) { + dims[0 != (flags & FFTW_MPI_TRANSPOSED_IN)].ib = iblock; + dims[0 != (flags & FFTW_MPI_TRANSPOSED_OUT)].ob = oblock; + } + + pln = plan_guru_rdft2(rnk,dims,howmany, in,out, comm, R2HC, flags); + X(ifree)(dims); + return pln; +} + +X(plan) XM(plan_many_dft_c2r)(int rnk, const ptrdiff_t *n, + ptrdiff_t howmany, + ptrdiff_t iblock, ptrdiff_t oblock, + C *in, R *out, + MPI_Comm comm, unsigned flags) +{ + XM(ddim) *dims = simple_dims(rnk, n); + X(plan) pln; + + if (rnk == 1) { + dims[0].ib = iblock; + dims[0].ob = oblock; + } + else if (rnk > 1) { + dims[0 != (flags & FFTW_MPI_TRANSPOSED_IN)].ib = iblock; + dims[0 != (flags & FFTW_MPI_TRANSPOSED_OUT)].ob = oblock; + } + + pln = plan_guru_rdft2(rnk,dims,howmany, out,in, comm, HC2R, flags); + X(ifree)(dims); + return pln; +} + +X(plan) XM(plan_dft_r2c)(int rnk, const ptrdiff_t *n, R *in, C *out, + MPI_Comm comm, unsigned flags) +{ + return XM(plan_many_dft_r2c)(rnk, n, 1, + FFTW_MPI_DEFAULT_BLOCK, + FFTW_MPI_DEFAULT_BLOCK, + in, out, comm, flags); +} + +X(plan) XM(plan_dft_r2c_2d)(ptrdiff_t nx, ptrdiff_t ny, R *in, C *out, + MPI_Comm comm, unsigned flags) +{ + ptrdiff_t n[2]; + n[0] = nx; n[1] = ny; + return XM(plan_dft_r2c)(2, n, in, out, comm, flags); +} + +X(plan) XM(plan_dft_r2c_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz, + R *in, C *out, MPI_Comm comm, unsigned flags) +{ + ptrdiff_t n[3]; + n[0] = nx; n[1] = ny; n[2] = nz; + return XM(plan_dft_r2c)(3, n, in, out, comm, flags); +} + +X(plan) XM(plan_dft_c2r)(int rnk, const ptrdiff_t *n, C *in, R *out, + MPI_Comm comm, unsigned flags) +{ + return XM(plan_many_dft_c2r)(rnk, n, 1, + FFTW_MPI_DEFAULT_BLOCK, + FFTW_MPI_DEFAULT_BLOCK, + in, out, comm, flags); +} + +X(plan) XM(plan_dft_c2r_2d)(ptrdiff_t nx, ptrdiff_t ny, C *in, R *out, + MPI_Comm comm, unsigned flags) +{ + ptrdiff_t n[2]; + n[0] = nx; n[1] = ny; + return XM(plan_dft_c2r)(2, n, in, out, comm, flags); +} + +X(plan) XM(plan_dft_c2r_3d)(ptrdiff_t nx, ptrdiff_t ny, ptrdiff_t nz, + C *in, R *out, MPI_Comm comm, unsigned flags) +{ + ptrdiff_t n[3]; + n[0] = nx; n[1] = ny; n[2] = nz; + return XM(plan_dft_c2r)(3, n, in, out, comm, flags); +} + +/*************************************************************************/ +/* New-array execute functions */ + +void XM(execute_dft)(const X(plan) p, C *in, C *out) { + /* internally, MPI plans are just rdft plans */ + X(execute_r2r)(p, (R*) in, (R*) out); +} + +void XM(execute_dft_r2c)(const X(plan) p, R *in, C *out) { + /* internally, MPI plans are just rdft plans */ + X(execute_r2r)(p, in, (R*) out); +} + +void XM(execute_dft_c2r)(const X(plan) p, C *in, R *out) { + /* internally, MPI plans are just rdft plans */ + X(execute_r2r)(p, (R*) in, out); +} + +void XM(execute_r2r)(const X(plan) p, R *in, R *out) { + /* internally, MPI plans are just rdft plans */ + X(execute_r2r)(p, in, out); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/block.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/block.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw-mpi.h" + +INT XM(num_blocks)(INT n, INT block) +{ + return (n + block - 1) / block; +} + +int XM(num_blocks_ok)(INT n, INT block, MPI_Comm comm) +{ + int n_pes; + MPI_Comm_size(comm, &n_pes); + return n_pes >= XM(num_blocks)(n, block); +} + +/* Pick a default block size for dividing a problem of size n among + n_pes processes. Divide as equally as possible, while minimizing + the maximum block size among the processes as well as the number of + processes with nonzero blocks. */ +INT XM(default_block)(INT n, int n_pes) +{ + return ((n + n_pes - 1) / n_pes); +} + +/* For a given block size and dimension n, compute the block size + on the given process. */ +INT XM(block)(INT n, INT block, int which_block) +{ + INT d = n - which_block * block; + return d <= 0 ? 0 : (d > block ? block : d); +} + +static INT num_blocks_kind(const ddim *dim, block_kind k) +{ + return XM(num_blocks)(dim->n, dim->b[k]); +} + +INT XM(num_blocks_total)(const dtensor *sz, block_kind k) +{ + if (FINITE_RNK(sz->rnk)) { + int i; + INT ntot = 1; + for (i = 0; i < sz->rnk; ++i) + ntot *= num_blocks_kind(sz->dims + i, k); + return ntot; + } + else + return 0; +} + +int XM(idle_process)(const dtensor *sz, block_kind k, int which_pe) +{ + return (which_pe >= XM(num_blocks_total)(sz, k)); +} + +/* Given a non-idle process which_pe, computes the coordinate + vector coords[rnk] giving the coordinates of a block in the + matrix of blocks. k specifies whether we are talking about + the input or output data distribution. */ +void XM(block_coords)(const dtensor *sz, block_kind k, int which_pe, + INT *coords) +{ + int i; + A(!XM(idle_process)(sz, k, which_pe) && FINITE_RNK(sz->rnk)); + for (i = sz->rnk - 1; i >= 0; --i) { + INT nb = num_blocks_kind(sz->dims + i, k); + coords[i] = which_pe % nb; + which_pe /= nb; + } +} + +INT XM(total_block)(const dtensor *sz, block_kind k, int which_pe) +{ + if (XM(idle_process)(sz, k, which_pe)) + return 0; + else { + int i; + INT N = 1, *coords; + STACK_MALLOC(INT*, coords, sizeof(INT) * sz->rnk); + XM(block_coords)(sz, k, which_pe, coords); + for (i = 0; i < sz->rnk; ++i) + N *= XM(block)(sz->dims[i].n, sz->dims[i].b[k], coords[i]); + STACK_FREE(coords); + return N; + } +} + +/* returns whether sz is local for dims >= dim */ +int XM(is_local_after)(int dim, const dtensor *sz, block_kind k) +{ + if (FINITE_RNK(sz->rnk)) + for (; dim < sz->rnk; ++dim) + if (XM(num_blocks)(sz->dims[dim].n, sz->dims[dim].b[k]) > 1) + return 0; + return 1; +} + +int XM(is_local)(const dtensor *sz, block_kind k) +{ + return XM(is_local_after)(0, sz, k); +} + +/* Return whether sz is distributed for k according to a simple + 1d block distribution in the first or second dimensions */ +int XM(is_block1d)(const dtensor *sz, block_kind k) +{ + int i; + if (!FINITE_RNK(sz->rnk)) return 0; + for (i = 0; i < sz->rnk && num_blocks_kind(sz->dims + i, k) == 1; ++i) ; + return(i < sz->rnk && i < 2 && XM(is_local_after)(i + 1, sz, k)); + +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/choose-radix.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/choose-radix.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw-mpi.h" + +/* Return the radix r for a 1d MPI transform of a distributed dimension d, + with the given flags and transform size. That is, decomposes d.n + as r * m, Cooley-Tukey style. Also computes the block sizes rblock + and mblock. Returns 0 if such a decomposition is not feasible. + This is unfortunately somewhat complicated. + + A distributed Cooley-Tukey algorithm works as follows (see dft-rank1.c): + + d.n is initially distributed as an m x r array with block size mblock[IB]. + Then it is internally transposed to an r x m array with block size + rblock[IB]. Then it is internally transposed to m x r again with block + size mblock[OB]. Finally, it is transposed to r x m with block size + rblock[IB]. + + If flags & SCRAMBLED_IN, then the first transpose is skipped (the array + starts out as r x m). If flags & SCRAMBLED_OUT, then the last transpose + is skipped (the array ends up as m x r). To make sure the forward + and backward transforms use the same "scrambling" format, we swap r + and m when sign != FFT_SIGN. + + There are some downsides to this, especially in the case where + either m or r is not divisible by n_pes. For one thing, it means + that in general we can't use the same block size for the input and + output. For another thing, it means that we can't in general honor + a user's "requested" block sizes in d.b[]. Therefore, for simplicity, + we simply ignore d.b[] for now. +*/ +INT XM(choose_radix)(ddim d, int n_pes, unsigned flags, int sign, + INT rblock[2], INT mblock[2]) +{ + INT r, m; + + UNUSED(flags); /* we would need this if we paid attention to d.b[*] */ + + /* If n_pes is a factor of d.n, then choose r to be d.n / n_pes. + This not only ensures that the input (the m dimension) is + equally distributed if possible, and at the r dimension is + maximally equally distributed (if d.n/n_pes >= n_pes), it also + makes one of the local transpositions in the algorithm + trivial. */ + if (d.n % n_pes == 0 /* it's good if n_pes divides d.n ...*/ + && d.n / n_pes >= n_pes /* .. unless we can't use n_pes processes */) + r = d.n / n_pes; + else { /* n_pes does not divide d.n, pick a factor close to sqrt(d.n) */ + for (r = X(isqrt)(d.n); d.n % r != 0; ++r) + ; + } + if (r == 1 || r == d.n) return 0; /* punt if we can't reduce size */ + + if (sign != FFT_SIGN) { /* swap {m,r} so that scrambling is reversible */ + m = r; + r = d.n / m; + } + else + m = d.n / r; + + rblock[IB] = rblock[OB] = XM(default_block)(r, n_pes); + mblock[IB] = mblock[OB] = XM(default_block)(m, n_pes); + + return r; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/conf.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/conf.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "mpi-transpose.h" +#include "mpi-dft.h" +#include "mpi-rdft.h" +#include "mpi-rdft2.h" + +static const solvtab s = +{ + SOLVTAB(XM(transpose_pairwise_register)), + SOLVTAB(XM(transpose_alltoall_register)), + SOLVTAB(XM(transpose_recurse_register)), + SOLVTAB(XM(dft_rank_geq2_register)), + SOLVTAB(XM(dft_rank_geq2_transposed_register)), + SOLVTAB(XM(dft_serial_register)), + SOLVTAB(XM(dft_rank1_bigvec_register)), + SOLVTAB(XM(dft_rank1_register)), + SOLVTAB(XM(rdft_rank_geq2_register)), + SOLVTAB(XM(rdft_rank_geq2_transposed_register)), + SOLVTAB(XM(rdft_serial_register)), + SOLVTAB(XM(rdft_rank1_bigvec_register)), + SOLVTAB(XM(rdft2_rank_geq2_register)), + SOLVTAB(XM(rdft2_rank_geq2_transposed_register)), + SOLVTAB(XM(rdft2_serial_register)), + SOLVTAB_END +}; + +void XM(conf_standard)(planner *p) +{ + X(solvtab_exec)(s, p); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/dft-problem.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/dft-problem.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "mpi-dft.h" + +static void destroy(problem *ego_) +{ + problem_mpi_dft *ego = (problem_mpi_dft *) ego_; + XM(dtensor_destroy)(ego->sz); + MPI_Comm_free(&ego->comm); + X(ifree)(ego_); +} + +static void hash(const problem *p_, md5 *m) +{ + const problem_mpi_dft *p = (const problem_mpi_dft *) p_; + int i; + X(md5puts)(m, "mpi-dft"); + X(md5int)(m, p->I == p->O); + /* don't include alignment -- may differ between processes + X(md5int)(m, X(alignment_of)(p->I)); + X(md5int)(m, X(alignment_of)(p->O)); + ... note that applicability of MPI plans does not depend + on alignment (although optimality may, in principle). */ + XM(dtensor_md5)(m, p->sz); + X(md5INT)(m, p->vn); + X(md5int)(m, p->sign); + X(md5int)(m, p->flags); + MPI_Comm_size(p->comm, &i); X(md5int)(m, i); + A(XM(md5_equal)(*m, p->comm)); +} + +static void print(const problem *ego_, printer *p) +{ + const problem_mpi_dft *ego = (const problem_mpi_dft *) ego_; + int i; + p->print(p, "(mpi-dft %d %d %d ", + ego->I == ego->O, + X(alignment_of)(ego->I), + X(alignment_of)(ego->O)); + XM(dtensor_print)(ego->sz, p); + p->print(p, " %D %d %d", ego->vn, ego->sign, ego->flags); + MPI_Comm_size(ego->comm, &i); p->print(p, " %d)", i); +} + +static void zero(const problem *ego_) +{ + const problem_mpi_dft *ego = (const problem_mpi_dft *) ego_; + R *I = ego->I; + INT i, N; + int my_pe; + + MPI_Comm_rank(ego->comm, &my_pe); + N = 2 * ego->vn * XM(total_block)(ego->sz, IB, my_pe); + for (i = 0; i < N; ++i) I[i] = K(0.0); +} + +static const problem_adt padt = +{ + PROBLEM_MPI_DFT, + hash, + zero, + print, + destroy +}; + +problem *XM(mkproblem_dft)(const dtensor *sz, INT vn, + R *I, R *O, + MPI_Comm comm, + int sign, + unsigned flags) +{ + problem_mpi_dft *ego = + (problem_mpi_dft *)X(mkproblem)(sizeof(problem_mpi_dft), &padt); + int n_pes; + + A(XM(dtensor_validp)(sz) && FINITE_RNK(sz->rnk)); + MPI_Comm_size(comm, &n_pes); + A(n_pes >= XM(num_blocks_total)(sz, IB) + && n_pes >= XM(num_blocks_total)(sz, OB)); + A(vn >= 0); + A(sign == -1 || sign == 1); + + /* enforce pointer equality if untainted pointers are equal */ + if (UNTAINT(I) == UNTAINT(O)) + I = O = JOIN_TAINT(I, O); + + ego->sz = XM(dtensor_canonical)(sz, 1); + ego->vn = vn; + ego->I = I; + ego->O = O; + ego->sign = sign; + + /* canonicalize: replace TRANSPOSED_IN with TRANSPOSED_OUT by + swapping the first two dimensions (for rnk > 1) */ + if ((flags & TRANSPOSED_IN) && ego->sz->rnk > 1) { + ddim dim0 = ego->sz->dims[0]; + ego->sz->dims[0] = ego->sz->dims[1]; + ego->sz->dims[1] = dim0; + flags &= ~TRANSPOSED_IN; + flags ^= TRANSPOSED_OUT; + } + ego->flags = flags; + + MPI_Comm_dup(comm, &ego->comm); + + return &(ego->super); +} + +problem *XM(mkproblem_dft_d)(dtensor *sz, INT vn, + R *I, R *O, + MPI_Comm comm, + int sign, + unsigned flags) +{ + problem *p = XM(mkproblem_dft)(sz, vn, I, O, comm, sign, flags); + XM(dtensor_destroy)(sz); + return p; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/dft-rank-geq2-transposed.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/dft-rank-geq2-transposed.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* Complex DFTs of rank >= 2, for the case where we are distributed + across the first dimension only, and the output is transposed both + in data distribution and in ordering (for the first 2 dimensions). + + (Note that we don't have to handle the case where the input is + transposed, since this is equivalent to transposed output with the + first two dimensions swapped, and is automatically canonicalized as + such by dft-problem.c. */ + +#include "mpi-dft.h" +#include "mpi-transpose.h" +#include "dft.h" + +typedef struct { + solver super; + int preserve_input; /* preserve input even if DESTROY_INPUT was passed */ +} S; + +typedef struct { + plan_mpi_dft super; + + plan *cld1, *cldt, *cld2; + INT roff, ioff; + int preserve_input; +} P; + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_dft *cld1, *cld2; + plan_rdft *cldt; + INT roff = ego->roff, ioff = ego->ioff; + + /* DFT local dimensions */ + cld1 = (plan_dft *) ego->cld1; + if (ego->preserve_input) { + cld1->apply(ego->cld1, I+roff, I+ioff, O+roff, O+ioff); + I = O; + } + else + cld1->apply(ego->cld1, I+roff, I+ioff, I+roff, I+ioff); + + /* global transpose */ + cldt = (plan_rdft *) ego->cldt; + cldt->apply(ego->cldt, I, O); + + /* DFT final local dimension */ + cld2 = (plan_dft *) ego->cld2; + cld2->apply(ego->cld2, O+roff, O+ioff, O+roff, O+ioff); +} + +static int applicable(const S *ego, const problem *p_, + const planner *plnr) +{ + const problem_mpi_dft *p = (const problem_mpi_dft *) p_; + return (1 + && p->sz->rnk > 1 + && p->flags == TRANSPOSED_OUT + && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr) + && p->I != p->O)) + && XM(is_local_after)(1, p->sz, IB) + && XM(is_local_after)(2, p->sz, OB) + && XM(num_blocks)(p->sz->dims[0].n, p->sz->dims[0].b[OB]) == 1 + && (!NO_SLOWP(plnr) /* slow if dft-serial is applicable */ + || !XM(dft_serial_applicable)(p)) + ); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld1, wakefulness); + X(plan_awake)(ego->cldt, wakefulness); + X(plan_awake)(ego->cld2, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld2); + X(plan_destroy_internal)(ego->cldt); + X(plan_destroy_internal)(ego->cld1); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(mpi-dft-rank-geq2-transposed%s%(%p%)%(%p%)%(%p%))", + ego->preserve_input==2 ?"/p":"", + ego->cld1, ego->cldt, ego->cld2); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_mpi_dft *p; + P *pln; + plan *cld1 = 0, *cldt = 0, *cld2 = 0; + R *ri, *ii, *ro, *io, *I, *O; + tensor *sz; + int i, my_pe, n_pes; + INT nrest; + static const plan_adt padt = { + XM(dft_solve), awake, print, destroy + }; + + UNUSED(ego); + + if (!applicable(ego, p_, plnr)) + return (plan *) 0; + + p = (const problem_mpi_dft *) p_; + + X(extract_reim)(p->sign, I = p->I, &ri, &ii); + X(extract_reim)(p->sign, O = p->O, &ro, &io); + if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) + I = O; + else { + ro = ri; + io = ii; + } + MPI_Comm_rank(p->comm, &my_pe); + MPI_Comm_size(p->comm, &n_pes); + + sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */ + i = p->sz->rnk - 2; A(i >= 0); + sz->dims[i].n = p->sz->dims[i+1].n; + sz->dims[i].is = sz->dims[i].os = 2 * p->vn; + for (--i; i >= 0; --i) { + sz->dims[i].n = p->sz->dims[i+1].n; + sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is; + } + nrest = 1; for (i = 1; i < sz->rnk; ++i) nrest *= sz->dims[i].n; + { + INT is = sz->dims[0].n * sz->dims[0].is; + INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[IB], my_pe); + cld1 = X(mkplan_d)(plnr, + X(mkproblem_dft_d)(sz, + X(mktensor_2d)(b, is, is, + p->vn, 2, 2), + ri, ii, ro, io)); + if (XM(any_true)(!cld1, p->comm)) goto nada; + } + + nrest *= p->vn; + cldt = X(mkplan_d)(plnr, + XM(mkproblem_transpose)( + p->sz->dims[0].n, p->sz->dims[1].n, nrest * 2, + I, O, + p->sz->dims[0].b[IB], p->sz->dims[1].b[OB], + p->comm, 0)); + if (XM(any_true)(!cldt, p->comm)) goto nada; + + X(extract_reim)(p->sign, O, &ro, &io); + { + INT is = p->sz->dims[0].n * nrest * 2; + INT b = XM(block)(p->sz->dims[1].n, p->sz->dims[1].b[OB], my_pe); + cld2 = X(mkplan_d)(plnr, + X(mkproblem_dft_d)(X(mktensor_1d)( + p->sz->dims[0].n, + nrest * 2, nrest * 2), + X(mktensor_2d)(b, is, is, + nrest, 2, 2), + ro, io, ro, io)); + if (XM(any_true)(!cld2, p->comm)) goto nada; + } + + pln = MKPLAN_MPI_DFT(P, &padt, apply); + pln->cld1 = cld1; + pln->cldt = cldt; + pln->cld2 = cld2; + pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr); + pln->roff = ri - p->I; + pln->ioff = ii - p->I; + + X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops); + X(ops_add2)(&cldt->ops, &pln->super.super.ops); + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cld2); + X(plan_destroy_internal)(cldt); + X(plan_destroy_internal)(cld1); + return (plan *) 0; +} + +static solver *mksolver(int preserve_input) +{ + static const solver_adt sadt = { PROBLEM_MPI_DFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->preserve_input = preserve_input; + return &(slv->super); +} + +void XM(dft_rank_geq2_transposed_register)(planner *p) +{ + int preserve_input; + for (preserve_input = 0; preserve_input <= 1; ++preserve_input) + REGISTER_SOLVER(p, mksolver(preserve_input)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/dft-rank-geq2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/dft-rank-geq2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* Complex DFTs of rank >= 2, for the case where we are distributed + across the first dimension only, and the output is not transposed. */ + +#include "mpi-dft.h" +#include "dft.h" + +typedef struct { + solver super; + int preserve_input; /* preserve input even if DESTROY_INPUT was passed */ +} S; + +typedef struct { + plan_mpi_dft super; + + plan *cld1, *cld2; + INT roff, ioff; + int preserve_input; +} P; + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_dft *cld1; + plan_rdft *cld2; + INT roff = ego->roff, ioff = ego->ioff; + + /* DFT local dimensions */ + cld1 = (plan_dft *) ego->cld1; + if (ego->preserve_input) { + cld1->apply(ego->cld1, I+roff, I+ioff, O+roff, O+ioff); + I = O; + } + else + cld1->apply(ego->cld1, I+roff, I+ioff, I+roff, I+ioff); + + /* DFT non-local dimension (via dft-rank1-bigvec, usually): */ + cld2 = (plan_rdft *) ego->cld2; + cld2->apply(ego->cld2, I, O); +} + +static int applicable(const S *ego, const problem *p_, + const planner *plnr) +{ + const problem_mpi_dft *p = (const problem_mpi_dft *) p_; + return (1 + && p->sz->rnk > 1 + && p->flags == 0 /* TRANSPOSED/SCRAMBLED_IN/OUT not supported */ + && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr) + && p->I != p->O)) + && XM(is_local_after)(1, p->sz, IB) + && XM(is_local_after)(1, p->sz, OB) + && (!NO_SLOWP(plnr) /* slow if dft-serial is applicable */ + || !XM(dft_serial_applicable)(p)) + ); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld1, wakefulness); + X(plan_awake)(ego->cld2, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld2); + X(plan_destroy_internal)(ego->cld1); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(mpi-dft-rank-geq2%s%(%p%)%(%p%))", + ego->preserve_input==2 ?"/p":"", ego->cld1, ego->cld2); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_mpi_dft *p; + P *pln; + plan *cld1 = 0, *cld2 = 0; + R *ri, *ii, *ro, *io, *I, *O; + tensor *sz; + dtensor *sz2; + int i, my_pe, n_pes; + INT nrest; + static const plan_adt padt = { + XM(dft_solve), awake, print, destroy + }; + + UNUSED(ego); + + if (!applicable(ego, p_, plnr)) + return (plan *) 0; + + p = (const problem_mpi_dft *) p_; + + X(extract_reim)(p->sign, I = p->I, &ri, &ii); + X(extract_reim)(p->sign, O = p->O, &ro, &io); + if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) + I = O; + else { + ro = ri; + io = ii; + } + MPI_Comm_rank(p->comm, &my_pe); + MPI_Comm_size(p->comm, &n_pes); + + sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */ + i = p->sz->rnk - 2; A(i >= 0); + sz->dims[i].n = p->sz->dims[i+1].n; + sz->dims[i].is = sz->dims[i].os = 2 * p->vn; + for (--i; i >= 0; --i) { + sz->dims[i].n = p->sz->dims[i+1].n; + sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is; + } + nrest = X(tensor_sz)(sz); + { + INT is = sz->dims[0].n * sz->dims[0].is; + INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[IB], my_pe); + cld1 = X(mkplan_d)(plnr, + X(mkproblem_dft_d)(sz, + X(mktensor_2d)(b, is, is, + p->vn, 2, 2), + ri, ii, ro, io)); + if (XM(any_true)(!cld1, p->comm)) goto nada; + } + + sz2 = XM(mkdtensor)(1); /* tensor for first (distributed) dimension */ + sz2->dims[0] = p->sz->dims[0]; + cld2 = X(mkplan_d)(plnr, XM(mkproblem_dft_d)(sz2, nrest * p->vn, + I, O, p->comm, p->sign, + RANK1_BIGVEC_ONLY)); + if (XM(any_true)(!cld2, p->comm)) goto nada; + + pln = MKPLAN_MPI_DFT(P, &padt, apply); + pln->cld1 = cld1; + pln->cld2 = cld2; + pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr); + pln->roff = ri - p->I; + pln->ioff = ii - p->I; + + X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops); + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cld2); + X(plan_destroy_internal)(cld1); + return (plan *) 0; +} + +static solver *mksolver(int preserve_input) +{ + static const solver_adt sadt = { PROBLEM_MPI_DFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->preserve_input = preserve_input; + return &(slv->super); +} + +void XM(dft_rank_geq2_register)(planner *p) +{ + int preserve_input; + for (preserve_input = 0; preserve_input <= 1; ++preserve_input) + REGISTER_SOLVER(p, mksolver(preserve_input)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/dft-rank1-bigvec.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/dft-rank1-bigvec.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* Complex DFTs of rank == 1 when the vector length vn is >= # processes. + In this case, we don't need to use a six-step type algorithm, and can + instead transpose the DFT dimension with the vector dimension to + make the DFT local. */ + +#include "mpi-dft.h" +#include "mpi-transpose.h" +#include "dft.h" + +typedef struct { + solver super; + int preserve_input; /* preserve input even if DESTROY_INPUT was passed */ + rearrangement rearrange; +} S; + +typedef struct { + plan_mpi_dft super; + + plan *cldt_before, *cld, *cldt_after; + INT roff, ioff; + int preserve_input; + rearrangement rearrange; +} P; + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_dft *cld; + plan_rdft *cldt_before, *cldt_after; + INT roff = ego->roff, ioff = ego->ioff; + + /* global transpose */ + cldt_before = (plan_rdft *) ego->cldt_before; + cldt_before->apply(ego->cldt_before, I, O); + + if (ego->preserve_input) I = O; + + /* 1d DFT(s) */ + cld = (plan_dft *) ego->cld; + cld->apply(ego->cld, O+roff, O+ioff, I+roff, I+ioff); + + /* global transpose */ + cldt_after = (plan_rdft *) ego->cldt_after; + cldt_after->apply(ego->cldt_after, I, O); +} + +static int applicable(const S *ego, const problem *p_, + const planner *plnr) +{ + const problem_mpi_dft *p = (const problem_mpi_dft *) p_; + int n_pes; + MPI_Comm_size(p->comm, &n_pes); + return (1 + && p->sz->rnk == 1 + && !(p->flags & ~RANK1_BIGVEC_ONLY) + && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr) + && p->I != p->O)) + && (p->vn >= n_pes /* TODO: relax this, using more memory? */ + || (p->flags & RANK1_BIGVEC_ONLY)) + + && XM(rearrange_applicable)(ego->rearrange, + p->sz->dims[0], p->vn, n_pes) + + && (!NO_SLOWP(plnr) /* slow if dft-serial is applicable */ + || !XM(dft_serial_applicable)(p)) + ); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cldt_before, wakefulness); + X(plan_awake)(ego->cld, wakefulness); + X(plan_awake)(ego->cldt_after, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cldt_after); + X(plan_destroy_internal)(ego->cld); + X(plan_destroy_internal)(ego->cldt_before); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const char descrip[][16] = { "contig", "discontig", "square-after", + "square-middle", "square-before" }; + p->print(p, "(mpi-dft-rank1-bigvec/%s%s %(%p%) %(%p%) %(%p%))", + descrip[ego->rearrange], ego->preserve_input==2 ?"/p":"", + ego->cldt_before, ego->cld, ego->cldt_after); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_mpi_dft *p; + P *pln; + plan *cld = 0, *cldt_before = 0, *cldt_after = 0; + R *ri, *ii, *ro, *io, *I, *O; + INT yblock, yb, nx, ny, vn; + int my_pe, n_pes; + static const plan_adt padt = { + XM(dft_solve), awake, print, destroy + }; + + UNUSED(ego); + + if (!applicable(ego, p_, plnr)) + return (plan *) 0; + + p = (const problem_mpi_dft *) p_; + + MPI_Comm_rank(p->comm, &my_pe); + MPI_Comm_size(p->comm, &n_pes); + + nx = p->sz->dims[0].n; + if (!(ny = XM(rearrange_ny)(ego->rearrange, p->sz->dims[0],p->vn,n_pes))) + return (plan *) 0; + vn = p->vn / ny; + A(ny * vn == p->vn); + + yblock = XM(default_block)(ny, n_pes); + cldt_before = X(mkplan_d)(plnr, + XM(mkproblem_transpose)( + nx, ny, vn*2, + I = p->I, O = p->O, + p->sz->dims[0].b[IB], yblock, + p->comm, 0)); + if (XM(any_true)(!cldt_before, p->comm)) goto nada; + if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) { I = O; } + + X(extract_reim)(p->sign, I, &ri, &ii); + X(extract_reim)(p->sign, O, &ro, &io); + + yb = XM(block)(ny, yblock, my_pe); + cld = X(mkplan_d)(plnr, + X(mkproblem_dft_d)(X(mktensor_1d)(nx, vn*2, vn*2), + X(mktensor_2d)(yb, vn*2*nx, vn*2*nx, + vn, 2, 2), + ro, io, ri, ii)); + if (XM(any_true)(!cld, p->comm)) goto nada; + + cldt_after = X(mkplan_d)(plnr, + XM(mkproblem_transpose)( + ny, nx, vn*2, + I, O, + yblock, p->sz->dims[0].b[OB], + p->comm, 0)); + if (XM(any_true)(!cldt_after, p->comm)) goto nada; + + pln = MKPLAN_MPI_DFT(P, &padt, apply); + + pln->cldt_before = cldt_before; + pln->cld = cld; + pln->cldt_after = cldt_after; + pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr); + pln->roff = ro - p->O; + pln->ioff = io - p->O; + pln->rearrange = ego->rearrange; + + X(ops_add)(&cldt_before->ops, &cld->ops, &pln->super.super.ops); + X(ops_add2)(&cldt_after->ops, &pln->super.super.ops); + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cldt_after); + X(plan_destroy_internal)(cld); + X(plan_destroy_internal)(cldt_before); + return (plan *) 0; +} + +static solver *mksolver(rearrangement rearrange, int preserve_input) +{ + static const solver_adt sadt = { PROBLEM_MPI_DFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->rearrange = rearrange; + slv->preserve_input = preserve_input; + return &(slv->super); +} + +void XM(dft_rank1_bigvec_register)(planner *p) +{ + rearrangement rearrange; + int preserve_input; + FORALL_REARRANGE(rearrange) + for (preserve_input = 0; preserve_input <= 1; ++preserve_input) + REGISTER_SOLVER(p, mksolver(rearrange, preserve_input)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/dft-rank1.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/dft-rank1.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,352 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* Complex DFTs of rank == 1 via six-step algorithm. */ + +#include "mpi-dft.h" +#include "mpi-transpose.h" +#include "dft.h" + +typedef struct { + solver super; + rdftapply apply; /* apply_ddft_first or apply_ddft_last */ + int preserve_input; /* preserve input even if DESTROY_INPUT was passed */ +} S; + +typedef struct { + plan_mpi_dft super; + + triggen *t; + plan *cldt, *cld_ddft, *cld_dft; + INT roff, ioff; + int preserve_input; + INT vn, xmin, xmax, xs, m, r; +} P; + +static void do_twiddle(triggen *t, INT ir, INT m, INT vn, R *xr, R *xi) +{ + void (*rotate)(triggen *, INT, R, R, R *) = t->rotate; + INT im, iv; + for (im = 0; im < m; ++im) + for (iv = 0; iv < vn; ++iv) { + /* TODO: modify/inline rotate function + so that it can do whole vn vector at once? */ + R c[2]; + rotate(t, ir * im, *xr, *xi, c); + *xr = c[0]; *xi = c[1]; + xr += 2; xi += 2; + } +} + +/* radix-r DFT of size r*m. This is equivalent to an m x r 2d DFT, + plus twiddle factors between the size-m and size-r 1d DFTs, where + the m dimension is initially distributed. The output is transposed + to r x m where the r dimension is distributed. + + This algorithm follows the general sequence: + global transpose (m x r -> r x m) + DFTs of size m + multiply by twiddles + global transpose (r x m -> m x r) + DFTs of size r + global transpose (m x r -> r x m) + where the multiplication by twiddles can come before or after + the middle transpose. The first/last transposes are omitted + for SCRAMBLED_IN/OUT formats, respectively. + + However, we wish to exploit our dft-rank1-bigvec solver, which + solves a vector of distributed DFTs via transpose+dft+transpose. + Therefore, we can group *either* the DFTs of size m *or* the + DFTs of size r with their surrounding transposes as a single + distributed-DFT (ddft) plan. These two variations correspond to + apply_ddft_first or apply_ddft_last, respectively. +*/ + +static void apply_ddft_first(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_dft *cld_dft; + plan_rdft *cldt, *cld_ddft; + INT roff, ioff, im, mmax, ms, r, vn; + triggen *t; + R *dI, *dO; + + /* distributed size-m DFTs, with output in m x r format */ + cld_ddft = (plan_rdft *) ego->cld_ddft; + cld_ddft->apply(ego->cld_ddft, I, O); + + cldt = (plan_rdft *) ego->cldt; + if (ego->preserve_input || !cldt) I = O; + + /* twiddle multiplications, followed by 1d DFTs of size-r */ + cld_dft = (plan_dft *) ego->cld_dft; + roff = ego->roff; ioff = ego->ioff; + mmax = ego->xmax; ms = ego->xs; + t = ego->t; r = ego->r; vn = ego->vn; + dI = O; dO = I; + for (im = ego->xmin; im <= mmax; ++im) { + do_twiddle(t, im, r, vn, dI+roff, dI+ioff); + cld_dft->apply((plan *) cld_dft, dI+roff, dI+ioff, dO+roff, dO+ioff); + dI += ms; dO += ms; + } + + /* final global transpose (m x r -> r x m), if not SCRAMBLED_OUT */ + if (cldt) + cldt->apply((plan *) cldt, I, O); +} + +static void apply_ddft_last(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_dft *cld_dft; + plan_rdft *cldt, *cld_ddft; + INT roff, ioff, ir, rmax, rs, m, vn; + triggen *t; + R *dI, *dO0, *dO; + + /* initial global transpose (m x r -> r x m), if not SCRAMBLED_IN */ + cldt = (plan_rdft *) ego->cldt; + if (cldt) { + cldt->apply((plan *) cldt, I, O); + dI = O; + } + else + dI = I; + if (ego->preserve_input) dO = O; else dO = I; + dO0 = dO; + + /* 1d DFTs of size m, followed by twiddle multiplications */ + cld_dft = (plan_dft *) ego->cld_dft; + roff = ego->roff; ioff = ego->ioff; + rmax = ego->xmax; rs = ego->xs; + t = ego->t; m = ego->m; vn = ego->vn; + for (ir = ego->xmin; ir <= rmax; ++ir) { + cld_dft->apply((plan *) cld_dft, dI+roff, dI+ioff, dO+roff, dO+ioff); + do_twiddle(t, ir, m, vn, dO+roff, dO+ioff); + dI += rs; dO += rs; + } + + /* distributed size-r DFTs, with output in r x m format */ + cld_ddft = (plan_rdft *) ego->cld_ddft; + cld_ddft->apply(ego->cld_ddft, dO0, O); +} + +static int applicable(const S *ego, const problem *p_, + const planner *plnr, + INT *r, INT rblock[2], INT mblock[2]) +{ + const problem_mpi_dft *p = (const problem_mpi_dft *) p_; + int n_pes; + MPI_Comm_size(p->comm, &n_pes); + return (1 + && p->sz->rnk == 1 + + && ONLY_SCRAMBLEDP(p->flags) + + && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr) + && p->I != p->O)) + + && (!(p->flags & SCRAMBLED_IN) || ego->apply == apply_ddft_last) + && (!(p->flags & SCRAMBLED_OUT) || ego->apply == apply_ddft_first) + + && (!NO_SLOWP(plnr) /* slow if dft-serial is applicable */ + || !XM(dft_serial_applicable)(p)) + + /* disallow if dft-rank1-bigvec is applicable since the + data distribution may be slightly different (ugh!) */ + && (p->vn < n_pes || p->flags) + + && (*r = XM(choose_radix)(p->sz->dims[0], n_pes, + p->flags, p->sign, + rblock, mblock)) + + /* ddft_first or last has substantial advantages in the + bigvec transpositions for the common case where + n_pes == n/r or r, respectively */ + && (!NO_UGLYP(plnr) + || !(*r == n_pes && ego->apply == apply_ddft_first) + || !(p->sz->dims[0].n / *r == n_pes + && ego->apply == apply_ddft_last)) + ); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cldt, wakefulness); + X(plan_awake)(ego->cld_dft, wakefulness); + X(plan_awake)(ego->cld_ddft, wakefulness); + + switch (wakefulness) { + case SLEEPY: + X(triggen_destroy)(ego->t); ego->t = 0; + break; + default: + ego->t = X(mktriggen)(AWAKE_SQRTN_TABLE, ego->r * ego->m); + break; + } +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cldt); + X(plan_destroy_internal)(ego->cld_dft); + X(plan_destroy_internal)(ego->cld_ddft); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(mpi-dft-rank1/%D%s%s%(%p%)%(%p%)%(%p%))", + ego->r, + ego->super.apply == apply_ddft_first ? "/first" : "/last", + ego->preserve_input==2 ?"/p":"", + ego->cld_ddft, ego->cld_dft, ego->cldt); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_mpi_dft *p; + P *pln; + plan *cld_dft = 0, *cld_ddft = 0, *cldt = 0; + R *ri, *ii, *ro, *io, *I, *O; + INT r, rblock[2], m, mblock[2], rp, mp, mpblock[2], mpb; + int my_pe, n_pes, preserve_input, ddft_first; + dtensor *sz; + static const plan_adt padt = { + XM(dft_solve), awake, print, destroy + }; + + UNUSED(ego); + + if (!applicable(ego, p_, plnr, &r, rblock, mblock)) + return (plan *) 0; + + p = (const problem_mpi_dft *) p_; + + MPI_Comm_rank(p->comm, &my_pe); + MPI_Comm_size(p->comm, &n_pes); + + m = p->sz->dims[0].n / r; + + /* some hackery so that we can plan both ddft_first and ddft_last + as if they were ddft_first */ + if ((ddft_first = (ego->apply == apply_ddft_first))) { + rp = r; mp = m; + mpblock[IB] = mblock[IB]; mpblock[OB] = mblock[OB]; + mpb = XM(block)(mp, mpblock[OB], my_pe); + } + else { + rp = m; mp = r; + mpblock[IB] = rblock[IB]; mpblock[OB] = rblock[OB]; + mpb = XM(block)(mp, mpblock[IB], my_pe); + } + + preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr); + + sz = XM(mkdtensor)(1); + sz->dims[0].n = mp; + sz->dims[0].b[IB] = mpblock[IB]; + sz->dims[0].b[OB] = mpblock[OB]; + I = (ddft_first || !preserve_input) ? p->I : p->O; + O = p->O; + cld_ddft = X(mkplan_d)(plnr, XM(mkproblem_dft_d)(sz, rp * p->vn, + I, O, p->comm, p->sign, + RANK1_BIGVEC_ONLY)); + if (XM(any_true)(!cld_ddft, p->comm)) goto nada; + + I = TAINT((ddft_first || !p->flags) ? p->O : p->I, rp * p->vn * 2); + O = TAINT((preserve_input || (ddft_first && p->flags)) ? p->O : p->I, + rp * p->vn * 2); + X(extract_reim)(p->sign, I, &ri, &ii); + X(extract_reim)(p->sign, O, &ro, &io); + cld_dft = X(mkplan_d)(plnr, + X(mkproblem_dft_d)(X(mktensor_1d)(rp, p->vn*2,p->vn*2), + X(mktensor_1d)(p->vn, 2, 2), + ri, ii, ro, io)); + if (XM(any_true)(!cld_dft, p->comm)) goto nada; + + if (!p->flags) { /* !(SCRAMBLED_IN or SCRAMBLED_OUT) */ + I = (ddft_first && preserve_input) ? p->O : p->I; + O = p->O; + cldt = X(mkplan_d)(plnr, + XM(mkproblem_transpose)( + m, r, p->vn * 2, + I, O, + ddft_first ? mblock[OB] : mblock[IB], + ddft_first ? rblock[OB] : rblock[IB], + p->comm, 0)); + if (XM(any_true)(!cldt, p->comm)) goto nada; + } + + pln = MKPLAN_MPI_DFT(P, &padt, ego->apply); + + pln->cld_ddft = cld_ddft; + pln->cld_dft = cld_dft; + pln->cldt = cldt; + pln->preserve_input = preserve_input; + X(extract_reim)(p->sign, p->O, &ro, &io); + pln->roff = ro - p->O; + pln->ioff = io - p->O; + pln->vn = p->vn; + pln->m = m; + pln->r = r; + pln->xmin = (ddft_first ? mblock[OB] : rblock[IB]) * my_pe; + pln->xmax = pln->xmin + mpb - 1; + pln->xs = rp * p->vn * 2; + pln->t = 0; + + X(ops_add)(&cld_ddft->ops, &cld_dft->ops, &pln->super.super.ops); + if (cldt) X(ops_add2)(&cldt->ops, &pln->super.super.ops); + { + double n0 = (1 + pln->xmax - pln->xmin) * (mp - 1) * pln->vn; + pln->super.super.ops.mul += 8 * n0; + pln->super.super.ops.add += 4 * n0; + pln->super.super.ops.other += 8 * n0; + } + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cldt); + X(plan_destroy_internal)(cld_dft); + X(plan_destroy_internal)(cld_ddft); + return (plan *) 0; +} + +static solver *mksolver(rdftapply apply, int preserve_input) +{ + static const solver_adt sadt = { PROBLEM_MPI_DFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->apply = apply; + slv->preserve_input = preserve_input; + return &(slv->super); +} + +void XM(dft_rank1_register)(planner *p) +{ + rdftapply apply[] = { apply_ddft_first, apply_ddft_last }; + unsigned int iapply; + int preserve_input; + for (iapply = 0; iapply < sizeof(apply) / sizeof(apply[0]); ++iapply) + for (preserve_input = 0; preserve_input <= 1; ++preserve_input) + REGISTER_SOLVER(p, mksolver(apply[iapply], preserve_input)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/dft-serial.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/dft-serial.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* "MPI" DFTs where all of the data is on one processor...just + call through to serial API. */ + +#include "mpi-dft.h" +#include "dft.h" + +typedef struct { + plan_mpi_dft super; + plan *cld; + INT roff, ioff; +} P; + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_dft *cld; + INT roff = ego->roff, ioff = ego->ioff; + cld = (plan_dft *) ego->cld; + cld->apply(ego->cld, I+roff, I+ioff, O+roff, O+ioff); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(mpi-dft-serial %(%p%))", ego->cld); +} + +int XM(dft_serial_applicable)(const problem_mpi_dft *p) +{ + return (1 + && p->flags == 0 /* TRANSPOSED/SCRAMBLED_IN/OUT not supported */ + && ((XM(is_local)(p->sz, IB) && XM(is_local)(p->sz, OB)) + || p->vn == 0)); +} + +static plan *mkplan(const solver *ego, const problem *p_, planner *plnr) +{ + const problem_mpi_dft *p = (const problem_mpi_dft *) p_; + P *pln; + plan *cld; + int my_pe; + R *ri, *ii, *ro, *io; + static const plan_adt padt = { + XM(dft_solve), awake, print, destroy + }; + + UNUSED(ego); + + /* check whether applicable: */ + if (!XM(dft_serial_applicable)(p)) + return (plan *) 0; + + X(extract_reim)(p->sign, p->I, &ri, &ii); + X(extract_reim)(p->sign, p->O, &ro, &io); + + MPI_Comm_rank(p->comm, &my_pe); + if (my_pe == 0 && p->vn > 0) { + int i, rnk = p->sz->rnk; + tensor *sz = X(mktensor)(p->sz->rnk); + sz->dims[rnk - 1].is = sz->dims[rnk - 1].os = 2 * p->vn; + sz->dims[rnk - 1].n = p->sz->dims[rnk - 1].n; + for (i = rnk - 1; i > 0; --i) { + sz->dims[i - 1].is = sz->dims[i - 1].os = + sz->dims[i].is * sz->dims[i].n; + sz->dims[i - 1].n = p->sz->dims[i - 1].n; + } + + cld = X(mkplan_d)(plnr, + X(mkproblem_dft_d)(sz, + X(mktensor_1d)(p->vn, 2, 2), + ri, ii, ro, io)); + } + else { /* idle process: make nop plan */ + cld = X(mkplan_d)(plnr, + X(mkproblem_dft_d)(X(mktensor_0d)(), + X(mktensor_1d)(0,0,0), + ri, ii, ro, io)); + } + if (XM(any_true)(!cld, p->comm)) return (plan *) 0; + + pln = MKPLAN_MPI_DFT(P, &padt, apply); + pln->cld = cld; + pln->roff = ro - p->O; + pln->ioff = io - p->O; + X(ops_cpy)(&cld->ops, &pln->super.super.ops); + return &(pln->super.super); +} + +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_MPI_DFT, mkplan, 0 }; + return MKSOLVER(solver, &sadt); +} + +void XM(dft_serial_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/dft-solve.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/dft-solve.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "mpi-dft.h" + +/* use the apply() operation for MPI_DFT problems */ +void XM(dft_solve)(const plan *ego_, const problem *p_) +{ + const plan_mpi_dft *ego = (const plan_mpi_dft *) ego_; + const problem_mpi_dft *p = (const problem_mpi_dft *) p_; + ego->apply(ego_, UNTAINT(p->I), UNTAINT(p->O)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/dtensor.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/dtensor.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw-mpi.h" + +dtensor *XM(mkdtensor)(int rnk) +{ + dtensor *x; + + A(rnk >= 0); + +#if defined(STRUCT_HACK_KR) + if (FINITE_RNK(rnk) && rnk > 1) + x = (dtensor *)MALLOC(sizeof(dtensor) + (rnk - 1) * sizeof(ddim), + TENSORS); + else + x = (dtensor *)MALLOC(sizeof(dtensor), TENSORS); +#elif defined(STRUCT_HACK_C99) + if (FINITE_RNK(rnk)) + x = (dtensor *)MALLOC(sizeof(dtensor) + rnk * sizeof(ddim), + TENSORS); + else + x = (dtensor *)MALLOC(sizeof(dtensor), TENSORS); +#else + x = (dtensor *)MALLOC(sizeof(dtensor), TENSORS); + if (FINITE_RNK(rnk) && rnk > 0) + x->dims = (ddim *)MALLOC(sizeof(ddim) * rnk, TENSORS); + else + x->dims = 0; +#endif + + x->rnk = rnk; + return x; +} + +void XM(dtensor_destroy)(dtensor *sz) +{ +#if !defined(STRUCT_HACK_C99) && !defined(STRUCT_HACK_KR) + X(ifree0)(sz->dims); +#endif + X(ifree)(sz); +} + +void XM(dtensor_md5)(md5 *p, const dtensor *t) +{ + int i; + X(md5int)(p, t->rnk); + if (FINITE_RNK(t->rnk)) { + for (i = 0; i < t->rnk; ++i) { + const ddim *q = t->dims + i; + X(md5INT)(p, q->n); + X(md5INT)(p, q->b[IB]); + X(md5INT)(p, q->b[OB]); + } + } +} + +dtensor *XM(dtensor_copy)(const dtensor *sz) +{ + dtensor *x = XM(mkdtensor)(sz->rnk); + int i; + if (FINITE_RNK(sz->rnk)) + for (i = 0; i < sz->rnk; ++i) + x->dims[i] = sz->dims[i]; + return x; +} + +dtensor *XM(dtensor_canonical)(const dtensor *sz, int compress) +{ + int i, rnk; + dtensor *x; + block_kind k; + + if (!FINITE_RNK(sz->rnk)) + return XM(mkdtensor)(sz->rnk); + for (i = rnk = 0; i < sz->rnk; ++i) { + if (sz->dims[i].n <= 0) + return XM(mkdtensor)(RNK_MINFTY); + else if (!compress || sz->dims[i].n > 1) + ++rnk; + } + x = XM(mkdtensor)(rnk); + for (i = rnk = 0; i < sz->rnk; ++i) { + if (!compress || sz->dims[i].n > 1) { + x->dims[rnk].n = sz->dims[i].n; + FORALL_BLOCK_KIND(k) { + if (XM(num_blocks)(sz->dims[i].n, sz->dims[i].b[k]) == 1) + x->dims[rnk].b[k] = sz->dims[i].n; + else + x->dims[rnk].b[k] = sz->dims[i].b[k]; + } + ++rnk; + } + } + return x; +} + +int XM(dtensor_validp)(const dtensor *sz) +{ + int i; + if (sz->rnk < 0) return 0; + if (FINITE_RNK(sz->rnk)) + for (i = 0; i < sz->rnk; ++i) + if (sz->dims[i].n < 0 + || sz->dims[i].b[IB] <= 0 + || sz->dims[i].b[OB] <= 0) + return 0; + return 1; +} + +void XM(dtensor_print)(const dtensor *t, printer *p) +{ + if (FINITE_RNK(t->rnk)) { + int i; + int first = 1; + p->print(p, "("); + for (i = 0; i < t->rnk; ++i) { + const ddim *d = t->dims + i; + p->print(p, "%s(%D %D %D)", + first ? "" : " ", + d->n, d->b[IB], d->b[OB]); + first = 0; + } + p->print(p, ")"); + } else { + p->print(p, "rank-minfty"); + } + +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/f03-wrap.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/f03-wrap.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,284 @@ +/* Generated automatically. DO NOT EDIT! */ + +#include "fftw3-mpi.h" +#include "ifftw-mpi.h" + +FFTW_EXTERN ptrdiff_t XM(local_size_many_transposed_f03)(int rnk, const ptrdiff_t * n, ptrdiff_t howmany, ptrdiff_t block0, ptrdiff_t block1, MPI_Fint f_comm, ptrdiff_t * local_n0, ptrdiff_t * local_0_start, ptrdiff_t * local_n1, ptrdiff_t * local_1_start); +FFTW_EXTERN ptrdiff_t XM(local_size_many_f03)(int rnk, const ptrdiff_t * n, ptrdiff_t howmany, ptrdiff_t block0, MPI_Fint f_comm, ptrdiff_t * local_n0, ptrdiff_t * local_0_start); +FFTW_EXTERN ptrdiff_t XM(local_size_transposed_f03)(int rnk, const ptrdiff_t * n, MPI_Fint f_comm, ptrdiff_t * local_n0, ptrdiff_t * local_0_start, ptrdiff_t * local_n1, ptrdiff_t * local_1_start); +FFTW_EXTERN ptrdiff_t XM(local_size_f03)(int rnk, const ptrdiff_t * n, MPI_Fint f_comm, ptrdiff_t * local_n0, ptrdiff_t * local_0_start); +FFTW_EXTERN ptrdiff_t XM(local_size_many_1d_f03)(ptrdiff_t n0, ptrdiff_t howmany, MPI_Fint f_comm, int sign, unsigned flags, ptrdiff_t * local_ni, ptrdiff_t * local_i_start, ptrdiff_t * local_no, ptrdiff_t * local_o_start); +FFTW_EXTERN ptrdiff_t XM(local_size_1d_f03)(ptrdiff_t n0, MPI_Fint f_comm, int sign, unsigned flags, ptrdiff_t * local_ni, ptrdiff_t * local_i_start, ptrdiff_t * local_no, ptrdiff_t * local_o_start); +FFTW_EXTERN ptrdiff_t XM(local_size_2d_f03)(ptrdiff_t n0, ptrdiff_t n1, MPI_Fint f_comm, ptrdiff_t * local_n0, ptrdiff_t * local_0_start); +FFTW_EXTERN ptrdiff_t XM(local_size_2d_transposed_f03)(ptrdiff_t n0, ptrdiff_t n1, MPI_Fint f_comm, ptrdiff_t * local_n0, ptrdiff_t * local_0_start, ptrdiff_t * local_n1, ptrdiff_t * local_1_start); +FFTW_EXTERN ptrdiff_t XM(local_size_3d_f03)(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, MPI_Fint f_comm, ptrdiff_t * local_n0, ptrdiff_t * local_0_start); +FFTW_EXTERN ptrdiff_t XM(local_size_3d_transposed_f03)(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, MPI_Fint f_comm, ptrdiff_t * local_n0, ptrdiff_t * local_0_start, ptrdiff_t * local_n1, ptrdiff_t * local_1_start); +FFTW_EXTERN X(plan) XM(plan_many_transpose_f03)(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t howmany, ptrdiff_t block0, ptrdiff_t block1, R * in, R * out, MPI_Fint f_comm, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_transpose_f03)(ptrdiff_t n0, ptrdiff_t n1, R * in, R * out, MPI_Fint f_comm, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_many_dft_f03)(int rnk, const ptrdiff_t * n, ptrdiff_t howmany, ptrdiff_t block, ptrdiff_t tblock, X(complex) * in, X(complex) * out, MPI_Fint f_comm, int sign, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_dft_f03)(int rnk, const ptrdiff_t * n, X(complex) * in, X(complex) * out, MPI_Fint f_comm, int sign, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_dft_1d_f03)(ptrdiff_t n0, X(complex) * in, X(complex) * out, MPI_Fint f_comm, int sign, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_dft_2d_f03)(ptrdiff_t n0, ptrdiff_t n1, X(complex) * in, X(complex) * out, MPI_Fint f_comm, int sign, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_dft_3d_f03)(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, X(complex) * in, X(complex) * out, MPI_Fint f_comm, int sign, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_many_r2r_f03)(int rnk, const ptrdiff_t * n, ptrdiff_t howmany, ptrdiff_t iblock, ptrdiff_t oblock, R * in, R * out, MPI_Fint f_comm, const X(r2r_kind) * kind, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_r2r_f03)(int rnk, const ptrdiff_t * n, R * in, R * out, MPI_Fint f_comm, const X(r2r_kind) * kind, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_r2r_2d_f03)(ptrdiff_t n0, ptrdiff_t n1, R * in, R * out, MPI_Fint f_comm, X(r2r_kind) kind0, X(r2r_kind) kind1, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_r2r_3d_f03)(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, R * in, R * out, MPI_Fint f_comm, X(r2r_kind) kind0, X(r2r_kind) kind1, X(r2r_kind) kind2, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_many_dft_r2c_f03)(int rnk, const ptrdiff_t * n, ptrdiff_t howmany, ptrdiff_t iblock, ptrdiff_t oblock, R * in, X(complex) * out, MPI_Fint f_comm, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_dft_r2c_f03)(int rnk, const ptrdiff_t * n, R * in, X(complex) * out, MPI_Fint f_comm, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_dft_r2c_2d_f03)(ptrdiff_t n0, ptrdiff_t n1, R * in, X(complex) * out, MPI_Fint f_comm, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_dft_r2c_3d_f03)(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, R * in, X(complex) * out, MPI_Fint f_comm, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_many_dft_c2r_f03)(int rnk, const ptrdiff_t * n, ptrdiff_t howmany, ptrdiff_t iblock, ptrdiff_t oblock, X(complex) * in, R * out, MPI_Fint f_comm, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_dft_c2r_f03)(int rnk, const ptrdiff_t * n, X(complex) * in, R * out, MPI_Fint f_comm, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_dft_c2r_2d_f03)(ptrdiff_t n0, ptrdiff_t n1, X(complex) * in, R * out, MPI_Fint f_comm, unsigned flags); +FFTW_EXTERN X(plan) XM(plan_dft_c2r_3d_f03)(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, X(complex) * in, R * out, MPI_Fint f_comm, unsigned flags); +FFTW_EXTERN void XM(gather_wisdom_f03)(MPI_Fint f_comm_); +FFTW_EXTERN void XM(broadcast_wisdom_f03)(MPI_Fint f_comm_); + +ptrdiff_t XM(local_size_many_transposed_f03)(int rnk, const ptrdiff_t * n, ptrdiff_t howmany, ptrdiff_t block0, ptrdiff_t block1, MPI_Fint f_comm, ptrdiff_t * local_n0, ptrdiff_t * local_0_start, ptrdiff_t * local_n1, ptrdiff_t * local_1_start) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(local_size_many_transposed)(rnk,n,howmany,block0,block1,comm,local_n0,local_0_start,local_n1,local_1_start); +} + +ptrdiff_t XM(local_size_many_f03)(int rnk, const ptrdiff_t * n, ptrdiff_t howmany, ptrdiff_t block0, MPI_Fint f_comm, ptrdiff_t * local_n0, ptrdiff_t * local_0_start) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(local_size_many)(rnk,n,howmany,block0,comm,local_n0,local_0_start); +} + +ptrdiff_t XM(local_size_transposed_f03)(int rnk, const ptrdiff_t * n, MPI_Fint f_comm, ptrdiff_t * local_n0, ptrdiff_t * local_0_start, ptrdiff_t * local_n1, ptrdiff_t * local_1_start) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(local_size_transposed)(rnk,n,comm,local_n0,local_0_start,local_n1,local_1_start); +} + +ptrdiff_t XM(local_size_f03)(int rnk, const ptrdiff_t * n, MPI_Fint f_comm, ptrdiff_t * local_n0, ptrdiff_t * local_0_start) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(local_size)(rnk,n,comm,local_n0,local_0_start); +} + +ptrdiff_t XM(local_size_many_1d_f03)(ptrdiff_t n0, ptrdiff_t howmany, MPI_Fint f_comm, int sign, unsigned flags, ptrdiff_t * local_ni, ptrdiff_t * local_i_start, ptrdiff_t * local_no, ptrdiff_t * local_o_start) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(local_size_many_1d)(n0,howmany,comm,sign,flags,local_ni,local_i_start,local_no,local_o_start); +} + +ptrdiff_t XM(local_size_1d_f03)(ptrdiff_t n0, MPI_Fint f_comm, int sign, unsigned flags, ptrdiff_t * local_ni, ptrdiff_t * local_i_start, ptrdiff_t * local_no, ptrdiff_t * local_o_start) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(local_size_1d)(n0,comm,sign,flags,local_ni,local_i_start,local_no,local_o_start); +} + +ptrdiff_t XM(local_size_2d_f03)(ptrdiff_t n0, ptrdiff_t n1, MPI_Fint f_comm, ptrdiff_t * local_n0, ptrdiff_t * local_0_start) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(local_size_2d)(n0,n1,comm,local_n0,local_0_start); +} + +ptrdiff_t XM(local_size_2d_transposed_f03)(ptrdiff_t n0, ptrdiff_t n1, MPI_Fint f_comm, ptrdiff_t * local_n0, ptrdiff_t * local_0_start, ptrdiff_t * local_n1, ptrdiff_t * local_1_start) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(local_size_2d_transposed)(n0,n1,comm,local_n0,local_0_start,local_n1,local_1_start); +} + +ptrdiff_t XM(local_size_3d_f03)(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, MPI_Fint f_comm, ptrdiff_t * local_n0, ptrdiff_t * local_0_start) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(local_size_3d)(n0,n1,n2,comm,local_n0,local_0_start); +} + +ptrdiff_t XM(local_size_3d_transposed_f03)(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, MPI_Fint f_comm, ptrdiff_t * local_n0, ptrdiff_t * local_0_start, ptrdiff_t * local_n1, ptrdiff_t * local_1_start) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(local_size_3d_transposed)(n0,n1,n2,comm,local_n0,local_0_start,local_n1,local_1_start); +} + +X(plan) XM(plan_many_transpose_f03)(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t howmany, ptrdiff_t block0, ptrdiff_t block1, R * in, R * out, MPI_Fint f_comm, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_many_transpose)(n0,n1,howmany,block0,block1,in,out,comm,flags); +} + +X(plan) XM(plan_transpose_f03)(ptrdiff_t n0, ptrdiff_t n1, R * in, R * out, MPI_Fint f_comm, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_transpose)(n0,n1,in,out,comm,flags); +} + +X(plan) XM(plan_many_dft_f03)(int rnk, const ptrdiff_t * n, ptrdiff_t howmany, ptrdiff_t block, ptrdiff_t tblock, X(complex) * in, X(complex) * out, MPI_Fint f_comm, int sign, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_many_dft)(rnk,n,howmany,block,tblock,in,out,comm,sign,flags); +} + +X(plan) XM(plan_dft_f03)(int rnk, const ptrdiff_t * n, X(complex) * in, X(complex) * out, MPI_Fint f_comm, int sign, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_dft)(rnk,n,in,out,comm,sign,flags); +} + +X(plan) XM(plan_dft_1d_f03)(ptrdiff_t n0, X(complex) * in, X(complex) * out, MPI_Fint f_comm, int sign, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_dft_1d)(n0,in,out,comm,sign,flags); +} + +X(plan) XM(plan_dft_2d_f03)(ptrdiff_t n0, ptrdiff_t n1, X(complex) * in, X(complex) * out, MPI_Fint f_comm, int sign, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_dft_2d)(n0,n1,in,out,comm,sign,flags); +} + +X(plan) XM(plan_dft_3d_f03)(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, X(complex) * in, X(complex) * out, MPI_Fint f_comm, int sign, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_dft_3d)(n0,n1,n2,in,out,comm,sign,flags); +} + +X(plan) XM(plan_many_r2r_f03)(int rnk, const ptrdiff_t * n, ptrdiff_t howmany, ptrdiff_t iblock, ptrdiff_t oblock, R * in, R * out, MPI_Fint f_comm, const X(r2r_kind) * kind, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_many_r2r)(rnk,n,howmany,iblock,oblock,in,out,comm,kind,flags); +} + +X(plan) XM(plan_r2r_f03)(int rnk, const ptrdiff_t * n, R * in, R * out, MPI_Fint f_comm, const X(r2r_kind) * kind, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_r2r)(rnk,n,in,out,comm,kind,flags); +} + +X(plan) XM(plan_r2r_2d_f03)(ptrdiff_t n0, ptrdiff_t n1, R * in, R * out, MPI_Fint f_comm, X(r2r_kind) kind0, X(r2r_kind) kind1, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_r2r_2d)(n0,n1,in,out,comm,kind0,kind1,flags); +} + +X(plan) XM(plan_r2r_3d_f03)(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, R * in, R * out, MPI_Fint f_comm, X(r2r_kind) kind0, X(r2r_kind) kind1, X(r2r_kind) kind2, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_r2r_3d)(n0,n1,n2,in,out,comm,kind0,kind1,kind2,flags); +} + +X(plan) XM(plan_many_dft_r2c_f03)(int rnk, const ptrdiff_t * n, ptrdiff_t howmany, ptrdiff_t iblock, ptrdiff_t oblock, R * in, X(complex) * out, MPI_Fint f_comm, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_many_dft_r2c)(rnk,n,howmany,iblock,oblock,in,out,comm,flags); +} + +X(plan) XM(plan_dft_r2c_f03)(int rnk, const ptrdiff_t * n, R * in, X(complex) * out, MPI_Fint f_comm, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_dft_r2c)(rnk,n,in,out,comm,flags); +} + +X(plan) XM(plan_dft_r2c_2d_f03)(ptrdiff_t n0, ptrdiff_t n1, R * in, X(complex) * out, MPI_Fint f_comm, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_dft_r2c_2d)(n0,n1,in,out,comm,flags); +} + +X(plan) XM(plan_dft_r2c_3d_f03)(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, R * in, X(complex) * out, MPI_Fint f_comm, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_dft_r2c_3d)(n0,n1,n2,in,out,comm,flags); +} + +X(plan) XM(plan_many_dft_c2r_f03)(int rnk, const ptrdiff_t * n, ptrdiff_t howmany, ptrdiff_t iblock, ptrdiff_t oblock, X(complex) * in, R * out, MPI_Fint f_comm, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_many_dft_c2r)(rnk,n,howmany,iblock,oblock,in,out,comm,flags); +} + +X(plan) XM(plan_dft_c2r_f03)(int rnk, const ptrdiff_t * n, X(complex) * in, R * out, MPI_Fint f_comm, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_dft_c2r)(rnk,n,in,out,comm,flags); +} + +X(plan) XM(plan_dft_c2r_2d_f03)(ptrdiff_t n0, ptrdiff_t n1, X(complex) * in, R * out, MPI_Fint f_comm, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_dft_c2r_2d)(n0,n1,in,out,comm,flags); +} + +X(plan) XM(plan_dft_c2r_3d_f03)(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, X(complex) * in, R * out, MPI_Fint f_comm, unsigned flags) +{ + MPI_Comm comm; + + comm = MPI_Comm_f2c(f_comm); + return XM(plan_dft_c2r_3d)(n0,n1,n2,in,out,comm,flags); +} + +void XM(gather_wisdom_f03)(MPI_Fint f_comm_) +{ + MPI_Comm comm_; + + comm_ = MPI_Comm_f2c(f_comm_); + XM(gather_wisdom)(comm_); +} + +void XM(broadcast_wisdom_f03)(MPI_Fint f_comm_) +{ + MPI_Comm comm_; + + comm_ = MPI_Comm_f2c(f_comm_); + XM(broadcast_wisdom)(comm_); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/f03-wrap.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/f03-wrap.sh Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,22 @@ +#! /bin/sh + +# Script to generate Fortran 2003 wrappers for FFTW's MPI functions. This +# is necessary because MPI provides no way to deal with C MPI_Comm handles +# from Fortran (where MPI_Comm == integer), but does provide a way to +# deal with Fortran MPI_Comm handles from C (via MPI_Comm_f2c). So, +# every FFTW function that takes an MPI_Comm argument needs a wrapper +# function that takes a Fortran integer and converts it to MPI_Comm. + +echo "/* Generated automatically. DO NOT EDIT! */" +echo + +echo "#include \"fftw3-mpi.h\"" +echo "#include \"ifftw-mpi.h\"" +echo + +# Declare prototypes using FFTW_EXTERN, important for Windows DLLs +grep -v 'mpi.h' fftw3-mpi.h | gcc -E - |grep "fftw_mpi_init" |tr ';' '\n' | grep "MPI_Comm" | perl genf03-wrap.pl | grep "MPI_Fint" | sed 's/^/FFTW_EXTERN /;s/$/;/' + +grep -v 'mpi.h' fftw3-mpi.h | gcc -E - |grep "fftw_mpi_init" |tr ';' '\n' | grep "MPI_Comm" | perl genf03-wrap.pl + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/f03api.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/f03api.sh Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,43 @@ +#! /bin/sh + +# Script to generate Fortran 2003 interface declarations for FFTW's MPI +# interface from the fftw3-mpi.h header file. + +# This is designed so that the Fortran caller can do: +# use, intrinsic :: iso_c_binding +# implicit none +# include 'fftw3-mpi.f03' +# and then call the C FFTW MPI functions directly, with type checking. +# +# One caveat: because there is no standard way to conver MPI_Comm objects +# from Fortran (= integer) to C (= opaque type), the Fortran interface +# technically calls C wrapper functions (also auto-generated) which +# call MPI_Comm_f2c to convert the communicators as needed. + +echo "! Generated automatically. DO NOT EDIT!" +echo + +echo " include 'fftw3.f03'" +echo + +# Extract constants +perl -pe 's/#define +([A-Z0-9_]+) +\(([+-]?[0-9]+)U?\)/\n integer\(C_INTPTR_T\), parameter :: \1 = \2\n/g' < fftw3-mpi.h | grep 'integer(C_INTPTR_T)' +perl -pe 'if (/#define +([A-Z0-9_]+) +\(([0-9]+)U? *<< *([0-9]+)\)/) { print "\n integer\(C_INT\), parameter :: $1 = ",$2 << $3,"\n"; }' < fftw3-mpi.h | grep 'integer(C_INT)' + +# Extract function declarations +for p in $*; do + if test "$p" = "d"; then p=""; fi + + echo + cat < + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +struct fftw_mpi_ddim_do_not_use_me { + ptrdiff_t n; /* dimension size */ + ptrdiff_t ib; /* input block */ + ptrdiff_t ob; /* output block */ +}; + +/* + huge second-order macro that defines prototypes for all API + functions. We expand this macro for each supported precision + + XM: name-mangling macro (MPI) + X: name-mangling macro (serial) + R: real data type + C: complex data type +*/ + +#define FFTW_MPI_DEFINE_API(XM, X, R, C) \ + \ +typedef struct fftw_mpi_ddim_do_not_use_me XM(ddim); \ + \ +FFTW_EXTERN void XM(init)(void); \ +FFTW_EXTERN void XM(cleanup)(void); \ + \ +FFTW_EXTERN ptrdiff_t XM(local_size_many_transposed) \ + (int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ + ptrdiff_t block0, ptrdiff_t block1, MPI_Comm comm, \ + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, \ + ptrdiff_t *local_n1, ptrdiff_t *local_1_start); \ +FFTW_EXTERN ptrdiff_t XM(local_size_many) \ + (int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ + ptrdiff_t block0, MPI_Comm comm, \ + ptrdiff_t *local_n0, ptrdiff_t *local_0_start); \ +FFTW_EXTERN ptrdiff_t XM(local_size_transposed) \ + (int rnk, const ptrdiff_t *n, MPI_Comm comm, \ + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, \ + ptrdiff_t *local_n1, ptrdiff_t *local_1_start); \ +FFTW_EXTERN ptrdiff_t XM(local_size) \ + (int rnk, const ptrdiff_t *n, MPI_Comm comm, \ + ptrdiff_t *local_n0, ptrdiff_t *local_0_start); \ +FFTW_EXTERN ptrdiff_t XM(local_size_many_1d)( \ + ptrdiff_t n0, ptrdiff_t howmany, \ + MPI_Comm comm, int sign, unsigned flags, \ + ptrdiff_t *local_ni, ptrdiff_t *local_i_start, \ + ptrdiff_t *local_no, ptrdiff_t *local_o_start); \ +FFTW_EXTERN ptrdiff_t XM(local_size_1d)( \ + ptrdiff_t n0, MPI_Comm comm, int sign, unsigned flags, \ + ptrdiff_t *local_ni, ptrdiff_t *local_i_start, \ + ptrdiff_t *local_no, ptrdiff_t *local_o_start); \ +FFTW_EXTERN ptrdiff_t XM(local_size_2d)( \ + ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm, \ + ptrdiff_t *local_n0, ptrdiff_t *local_0_start); \ +FFTW_EXTERN ptrdiff_t XM(local_size_2d_transposed)( \ + ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm, \ + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, \ + ptrdiff_t *local_n1, ptrdiff_t *local_1_start); \ +FFTW_EXTERN ptrdiff_t XM(local_size_3d)( \ + ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, MPI_Comm comm, \ + ptrdiff_t *local_n0, ptrdiff_t *local_0_start); \ +FFTW_EXTERN ptrdiff_t XM(local_size_3d_transposed)( \ + ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, MPI_Comm comm, \ + ptrdiff_t *local_n0, ptrdiff_t *local_0_start, \ + ptrdiff_t *local_n1, ptrdiff_t *local_1_start); \ + \ +FFTW_EXTERN X(plan) XM(plan_many_transpose) \ + (ptrdiff_t n0, ptrdiff_t n1, \ + ptrdiff_t howmany, ptrdiff_t block0, ptrdiff_t block1, \ + R *in, R *out, MPI_Comm comm, unsigned flags); \ +FFTW_EXTERN X(plan) XM(plan_transpose) \ + (ptrdiff_t n0, ptrdiff_t n1, \ + R *in, R *out, MPI_Comm comm, unsigned flags); \ + \ +FFTW_EXTERN X(plan) XM(plan_many_dft) \ + (int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ + ptrdiff_t block, ptrdiff_t tblock, C *in, C *out, \ + MPI_Comm comm, int sign, unsigned flags); \ +FFTW_EXTERN X(plan) XM(plan_dft) \ + (int rnk, const ptrdiff_t *n, C *in, C *out, \ + MPI_Comm comm, int sign, unsigned flags); \ +FFTW_EXTERN X(plan) XM(plan_dft_1d) \ + (ptrdiff_t n0, C *in, C *out, \ + MPI_Comm comm, int sign, unsigned flags); \ +FFTW_EXTERN X(plan) XM(plan_dft_2d) \ + (ptrdiff_t n0, ptrdiff_t n1, C *in, C *out, \ + MPI_Comm comm, int sign, unsigned flags); \ +FFTW_EXTERN X(plan) XM(plan_dft_3d) \ + (ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, C *in, C *out, \ + MPI_Comm comm, int sign, unsigned flags); \ + \ +FFTW_EXTERN X(plan) XM(plan_many_r2r) \ + (int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ + ptrdiff_t iblock, ptrdiff_t oblock, R *in, R *out, \ + MPI_Comm comm, const X(r2r_kind) *kind, unsigned flags); \ +FFTW_EXTERN X(plan) XM(plan_r2r) \ + (int rnk, const ptrdiff_t *n, R *in, R *out, \ + MPI_Comm comm, const X(r2r_kind) *kind, unsigned flags); \ +FFTW_EXTERN X(plan) XM(plan_r2r_2d) \ + (ptrdiff_t n0, ptrdiff_t n1, R *in, R *out, MPI_Comm comm, \ + X(r2r_kind) kind0, X(r2r_kind) kind1, unsigned flags); \ +FFTW_EXTERN X(plan) XM(plan_r2r_3d) \ + (ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, \ + R *in, R *out, MPI_Comm comm, X(r2r_kind) kind0, \ + X(r2r_kind) kind1, X(r2r_kind) kind2, unsigned flags); \ + \ +FFTW_EXTERN X(plan) XM(plan_many_dft_r2c) \ + (int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ + ptrdiff_t iblock, ptrdiff_t oblock, R *in, C *out, \ + MPI_Comm comm, unsigned flags); \ +FFTW_EXTERN X(plan) XM(plan_dft_r2c) \ + (int rnk, const ptrdiff_t *n, R *in, C *out, \ + MPI_Comm comm, unsigned flags); \ +FFTW_EXTERN X(plan) XM(plan_dft_r2c_2d) \ + (ptrdiff_t n0, ptrdiff_t n1, R *in, C *out, \ + MPI_Comm comm, unsigned flags); \ +FFTW_EXTERN X(plan) XM(plan_dft_r2c_3d) \ + (ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, R *in, C *out, \ + MPI_Comm comm, unsigned flags); \ + \ +FFTW_EXTERN X(plan) XM(plan_many_dft_c2r) \ + (int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ + ptrdiff_t iblock, ptrdiff_t oblock, C *in, R *out, \ + MPI_Comm comm, unsigned flags); \ +FFTW_EXTERN X(plan) XM(plan_dft_c2r) \ + (int rnk, const ptrdiff_t *n, C *in, R *out, \ + MPI_Comm comm, unsigned flags); \ +FFTW_EXTERN X(plan) XM(plan_dft_c2r_2d) \ + (ptrdiff_t n0, ptrdiff_t n1, C *in, R *out, \ + MPI_Comm comm, unsigned flags); \ +FFTW_EXTERN X(plan) XM(plan_dft_c2r_3d) \ + (ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, C *in, R *out, \ + MPI_Comm comm, unsigned flags); \ + \ +FFTW_EXTERN void XM(gather_wisdom)(MPI_Comm comm_); \ +FFTW_EXTERN void XM(broadcast_wisdom)(MPI_Comm comm_); \ + \ +FFTW_EXTERN void XM(execute_dft)(X(plan) p, C *in, C *out); \ +FFTW_EXTERN void XM(execute_dft_r2c)(X(plan) p, R *in, C *out); \ +FFTW_EXTERN void XM(execute_dft_c2r)(X(plan) p, C *in, R *out); \ +FFTW_EXTERN void XM(execute_r2r)(X(plan) p, R *in, R *out); + + + +/* end of FFTW_MPI_DEFINE_API macro */ + +#define FFTW_MPI_MANGLE_DOUBLE(name) FFTW_MANGLE_DOUBLE(FFTW_CONCAT(mpi_,name)) +#define FFTW_MPI_MANGLE_FLOAT(name) FFTW_MANGLE_FLOAT(FFTW_CONCAT(mpi_,name)) +#define FFTW_MPI_MANGLE_LONG_DOUBLE(name) FFTW_MANGLE_LONG_DOUBLE(FFTW_CONCAT(mpi_,name)) + +FFTW_MPI_DEFINE_API(FFTW_MPI_MANGLE_DOUBLE, FFTW_MANGLE_DOUBLE, double, fftw_complex) +FFTW_MPI_DEFINE_API(FFTW_MPI_MANGLE_FLOAT, FFTW_MANGLE_FLOAT, float, fftwf_complex) +FFTW_MPI_DEFINE_API(FFTW_MPI_MANGLE_LONG_DOUBLE, FFTW_MANGLE_LONG_DOUBLE, long double, fftwl_complex) + +#define FFTW_MPI_DEFAULT_BLOCK (0) + +/* MPI-specific flags */ +#define FFTW_MPI_SCRAMBLED_IN (1U << 27) +#define FFTW_MPI_SCRAMBLED_OUT (1U << 28) +#define FFTW_MPI_TRANSPOSED_IN (1U << 29) +#define FFTW_MPI_TRANSPOSED_OUT (1U << 30) + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* FFTW3_MPI_H */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/fftw3l-mpi.f03.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/fftw3l-mpi.f03.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,405 @@ +! Generated automatically. DO NOT EDIT! + + include 'fftw3l.f03' + + + type, bind(C) :: fftwl_mpi_ddim + integer(C_INTPTR_T) n, ib, ob + end type fftwl_mpi_ddim + + interface + subroutine fftwl_mpi_init() bind(C, name='fftwl_mpi_init') + import + end subroutine fftwl_mpi_init + + subroutine fftwl_mpi_cleanup() bind(C, name='fftwl_mpi_cleanup') + import + end subroutine fftwl_mpi_cleanup + + integer(C_INTPTR_T) function fftwl_mpi_local_size_many_transposed(rnk,n,howmany,block0,block1,comm,local_n0,local_0_start, & + local_n1,local_1_start) & + bind(C, name='fftwl_mpi_local_size_many_transposed_f03') + import + integer(C_INT), value :: rnk + integer(C_INTPTR_T), dimension(*), intent(in) :: n + integer(C_INTPTR_T), value :: howmany + integer(C_INTPTR_T), value :: block0 + integer(C_INTPTR_T), value :: block1 + integer(C_MPI_FINT), value :: comm + integer(C_INTPTR_T), intent(out) :: local_n0 + integer(C_INTPTR_T), intent(out) :: local_0_start + integer(C_INTPTR_T), intent(out) :: local_n1 + integer(C_INTPTR_T), intent(out) :: local_1_start + end function fftwl_mpi_local_size_many_transposed + + integer(C_INTPTR_T) function fftwl_mpi_local_size_many(rnk,n,howmany,block0,comm,local_n0,local_0_start) & + bind(C, name='fftwl_mpi_local_size_many_f03') + import + integer(C_INT), value :: rnk + integer(C_INTPTR_T), dimension(*), intent(in) :: n + integer(C_INTPTR_T), value :: howmany + integer(C_INTPTR_T), value :: block0 + integer(C_MPI_FINT), value :: comm + integer(C_INTPTR_T), intent(out) :: local_n0 + integer(C_INTPTR_T), intent(out) :: local_0_start + end function fftwl_mpi_local_size_many + + integer(C_INTPTR_T) function fftwl_mpi_local_size_transposed(rnk,n,comm,local_n0,local_0_start,local_n1,local_1_start) & + bind(C, name='fftwl_mpi_local_size_transposed_f03') + import + integer(C_INT), value :: rnk + integer(C_INTPTR_T), dimension(*), intent(in) :: n + integer(C_MPI_FINT), value :: comm + integer(C_INTPTR_T), intent(out) :: local_n0 + integer(C_INTPTR_T), intent(out) :: local_0_start + integer(C_INTPTR_T), intent(out) :: local_n1 + integer(C_INTPTR_T), intent(out) :: local_1_start + end function fftwl_mpi_local_size_transposed + + integer(C_INTPTR_T) function fftwl_mpi_local_size(rnk,n,comm,local_n0,local_0_start) bind(C, name='fftwl_mpi_local_size_f03') + import + integer(C_INT), value :: rnk + integer(C_INTPTR_T), dimension(*), intent(in) :: n + integer(C_MPI_FINT), value :: comm + integer(C_INTPTR_T), intent(out) :: local_n0 + integer(C_INTPTR_T), intent(out) :: local_0_start + end function fftwl_mpi_local_size + + integer(C_INTPTR_T) function fftwl_mpi_local_size_many_1d(n0,howmany,comm,sign,flags,local_ni,local_i_start,local_no, & + local_o_start) bind(C, name='fftwl_mpi_local_size_many_1d_f03') + import + integer(C_INTPTR_T), value :: n0 + integer(C_INTPTR_T), value :: howmany + integer(C_MPI_FINT), value :: comm + integer(C_INT), value :: sign + integer(C_INT), value :: flags + integer(C_INTPTR_T), intent(out) :: local_ni + integer(C_INTPTR_T), intent(out) :: local_i_start + integer(C_INTPTR_T), intent(out) :: local_no + integer(C_INTPTR_T), intent(out) :: local_o_start + end function fftwl_mpi_local_size_many_1d + + integer(C_INTPTR_T) function fftwl_mpi_local_size_1d(n0,comm,sign,flags,local_ni,local_i_start,local_no,local_o_start) & + bind(C, name='fftwl_mpi_local_size_1d_f03') + import + integer(C_INTPTR_T), value :: n0 + integer(C_MPI_FINT), value :: comm + integer(C_INT), value :: sign + integer(C_INT), value :: flags + integer(C_INTPTR_T), intent(out) :: local_ni + integer(C_INTPTR_T), intent(out) :: local_i_start + integer(C_INTPTR_T), intent(out) :: local_no + integer(C_INTPTR_T), intent(out) :: local_o_start + end function fftwl_mpi_local_size_1d + + integer(C_INTPTR_T) function fftwl_mpi_local_size_2d(n0,n1,comm,local_n0,local_0_start) & + bind(C, name='fftwl_mpi_local_size_2d_f03') + import + integer(C_INTPTR_T), value :: n0 + integer(C_INTPTR_T), value :: n1 + integer(C_MPI_FINT), value :: comm + integer(C_INTPTR_T), intent(out) :: local_n0 + integer(C_INTPTR_T), intent(out) :: local_0_start + end function fftwl_mpi_local_size_2d + + integer(C_INTPTR_T) function fftwl_mpi_local_size_2d_transposed(n0,n1,comm,local_n0,local_0_start,local_n1,local_1_start) & + bind(C, name='fftwl_mpi_local_size_2d_transposed_f03') + import + integer(C_INTPTR_T), value :: n0 + integer(C_INTPTR_T), value :: n1 + integer(C_MPI_FINT), value :: comm + integer(C_INTPTR_T), intent(out) :: local_n0 + integer(C_INTPTR_T), intent(out) :: local_0_start + integer(C_INTPTR_T), intent(out) :: local_n1 + integer(C_INTPTR_T), intent(out) :: local_1_start + end function fftwl_mpi_local_size_2d_transposed + + integer(C_INTPTR_T) function fftwl_mpi_local_size_3d(n0,n1,n2,comm,local_n0,local_0_start) & + bind(C, name='fftwl_mpi_local_size_3d_f03') + import + integer(C_INTPTR_T), value :: n0 + integer(C_INTPTR_T), value :: n1 + integer(C_INTPTR_T), value :: n2 + integer(C_MPI_FINT), value :: comm + integer(C_INTPTR_T), intent(out) :: local_n0 + integer(C_INTPTR_T), intent(out) :: local_0_start + end function fftwl_mpi_local_size_3d + + integer(C_INTPTR_T) function fftwl_mpi_local_size_3d_transposed(n0,n1,n2,comm,local_n0,local_0_start,local_n1,local_1_start) & + bind(C, name='fftwl_mpi_local_size_3d_transposed_f03') + import + integer(C_INTPTR_T), value :: n0 + integer(C_INTPTR_T), value :: n1 + integer(C_INTPTR_T), value :: n2 + integer(C_MPI_FINT), value :: comm + integer(C_INTPTR_T), intent(out) :: local_n0 + integer(C_INTPTR_T), intent(out) :: local_0_start + integer(C_INTPTR_T), intent(out) :: local_n1 + integer(C_INTPTR_T), intent(out) :: local_1_start + end function fftwl_mpi_local_size_3d_transposed + + type(C_PTR) function fftwl_mpi_plan_many_transpose(n0,n1,howmany,block0,block1,in,out,comm,flags) & + bind(C, name='fftwl_mpi_plan_many_transpose_f03') + import + integer(C_INTPTR_T), value :: n0 + integer(C_INTPTR_T), value :: n1 + integer(C_INTPTR_T), value :: howmany + integer(C_INTPTR_T), value :: block0 + integer(C_INTPTR_T), value :: block1 + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_INT), value :: flags + end function fftwl_mpi_plan_many_transpose + + type(C_PTR) function fftwl_mpi_plan_transpose(n0,n1,in,out,comm,flags) bind(C, name='fftwl_mpi_plan_transpose_f03') + import + integer(C_INTPTR_T), value :: n0 + integer(C_INTPTR_T), value :: n1 + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_INT), value :: flags + end function fftwl_mpi_plan_transpose + + type(C_PTR) function fftwl_mpi_plan_many_dft(rnk,n,howmany,block,tblock,in,out,comm,sign,flags) & + bind(C, name='fftwl_mpi_plan_many_dft_f03') + import + integer(C_INT), value :: rnk + integer(C_INTPTR_T), dimension(*), intent(in) :: n + integer(C_INTPTR_T), value :: howmany + integer(C_INTPTR_T), value :: block + integer(C_INTPTR_T), value :: tblock + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwl_mpi_plan_many_dft + + type(C_PTR) function fftwl_mpi_plan_dft(rnk,n,in,out,comm,sign,flags) bind(C, name='fftwl_mpi_plan_dft_f03') + import + integer(C_INT), value :: rnk + integer(C_INTPTR_T), dimension(*), intent(in) :: n + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwl_mpi_plan_dft + + type(C_PTR) function fftwl_mpi_plan_dft_1d(n0,in,out,comm,sign,flags) bind(C, name='fftwl_mpi_plan_dft_1d_f03') + import + integer(C_INTPTR_T), value :: n0 + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwl_mpi_plan_dft_1d + + type(C_PTR) function fftwl_mpi_plan_dft_2d(n0,n1,in,out,comm,sign,flags) bind(C, name='fftwl_mpi_plan_dft_2d_f03') + import + integer(C_INTPTR_T), value :: n0 + integer(C_INTPTR_T), value :: n1 + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwl_mpi_plan_dft_2d + + type(C_PTR) function fftwl_mpi_plan_dft_3d(n0,n1,n2,in,out,comm,sign,flags) bind(C, name='fftwl_mpi_plan_dft_3d_f03') + import + integer(C_INTPTR_T), value :: n0 + integer(C_INTPTR_T), value :: n1 + integer(C_INTPTR_T), value :: n2 + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_INT), value :: sign + integer(C_INT), value :: flags + end function fftwl_mpi_plan_dft_3d + + type(C_PTR) function fftwl_mpi_plan_many_r2r(rnk,n,howmany,iblock,oblock,in,out,comm,kind,flags) & + bind(C, name='fftwl_mpi_plan_many_r2r_f03') + import + integer(C_INT), value :: rnk + integer(C_INTPTR_T), dimension(*), intent(in) :: n + integer(C_INTPTR_T), value :: howmany + integer(C_INTPTR_T), value :: iblock + integer(C_INTPTR_T), value :: oblock + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftwl_mpi_plan_many_r2r + + type(C_PTR) function fftwl_mpi_plan_r2r(rnk,n,in,out,comm,kind,flags) bind(C, name='fftwl_mpi_plan_r2r_f03') + import + integer(C_INT), value :: rnk + integer(C_INTPTR_T), dimension(*), intent(in) :: n + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_FFTW_R2R_KIND), dimension(*), intent(in) :: kind + integer(C_INT), value :: flags + end function fftwl_mpi_plan_r2r + + type(C_PTR) function fftwl_mpi_plan_r2r_2d(n0,n1,in,out,comm,kind0,kind1,flags) bind(C, name='fftwl_mpi_plan_r2r_2d_f03') + import + integer(C_INTPTR_T), value :: n0 + integer(C_INTPTR_T), value :: n1 + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_FFTW_R2R_KIND), value :: kind0 + integer(C_FFTW_R2R_KIND), value :: kind1 + integer(C_INT), value :: flags + end function fftwl_mpi_plan_r2r_2d + + type(C_PTR) function fftwl_mpi_plan_r2r_3d(n0,n1,n2,in,out,comm,kind0,kind1,kind2,flags) & + bind(C, name='fftwl_mpi_plan_r2r_3d_f03') + import + integer(C_INTPTR_T), value :: n0 + integer(C_INTPTR_T), value :: n1 + integer(C_INTPTR_T), value :: n2 + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_FFTW_R2R_KIND), value :: kind0 + integer(C_FFTW_R2R_KIND), value :: kind1 + integer(C_FFTW_R2R_KIND), value :: kind2 + integer(C_INT), value :: flags + end function fftwl_mpi_plan_r2r_3d + + type(C_PTR) function fftwl_mpi_plan_many_dft_r2c(rnk,n,howmany,iblock,oblock,in,out,comm,flags) & + bind(C, name='fftwl_mpi_plan_many_dft_r2c_f03') + import + integer(C_INT), value :: rnk + integer(C_INTPTR_T), dimension(*), intent(in) :: n + integer(C_INTPTR_T), value :: howmany + integer(C_INTPTR_T), value :: iblock + integer(C_INTPTR_T), value :: oblock + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_INT), value :: flags + end function fftwl_mpi_plan_many_dft_r2c + + type(C_PTR) function fftwl_mpi_plan_dft_r2c(rnk,n,in,out,comm,flags) bind(C, name='fftwl_mpi_plan_dft_r2c_f03') + import + integer(C_INT), value :: rnk + integer(C_INTPTR_T), dimension(*), intent(in) :: n + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_INT), value :: flags + end function fftwl_mpi_plan_dft_r2c + + type(C_PTR) function fftwl_mpi_plan_dft_r2c_2d(n0,n1,in,out,comm,flags) bind(C, name='fftwl_mpi_plan_dft_r2c_2d_f03') + import + integer(C_INTPTR_T), value :: n0 + integer(C_INTPTR_T), value :: n1 + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_INT), value :: flags + end function fftwl_mpi_plan_dft_r2c_2d + + type(C_PTR) function fftwl_mpi_plan_dft_r2c_3d(n0,n1,n2,in,out,comm,flags) bind(C, name='fftwl_mpi_plan_dft_r2c_3d_f03') + import + integer(C_INTPTR_T), value :: n0 + integer(C_INTPTR_T), value :: n1 + integer(C_INTPTR_T), value :: n2 + real(C_LONG_DOUBLE), dimension(*), intent(out) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_INT), value :: flags + end function fftwl_mpi_plan_dft_r2c_3d + + type(C_PTR) function fftwl_mpi_plan_many_dft_c2r(rnk,n,howmany,iblock,oblock,in,out,comm,flags) & + bind(C, name='fftwl_mpi_plan_many_dft_c2r_f03') + import + integer(C_INT), value :: rnk + integer(C_INTPTR_T), dimension(*), intent(in) :: n + integer(C_INTPTR_T), value :: howmany + integer(C_INTPTR_T), value :: iblock + integer(C_INTPTR_T), value :: oblock + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_INT), value :: flags + end function fftwl_mpi_plan_many_dft_c2r + + type(C_PTR) function fftwl_mpi_plan_dft_c2r(rnk,n,in,out,comm,flags) bind(C, name='fftwl_mpi_plan_dft_c2r_f03') + import + integer(C_INT), value :: rnk + integer(C_INTPTR_T), dimension(*), intent(in) :: n + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_INT), value :: flags + end function fftwl_mpi_plan_dft_c2r + + type(C_PTR) function fftwl_mpi_plan_dft_c2r_2d(n0,n1,in,out,comm,flags) bind(C, name='fftwl_mpi_plan_dft_c2r_2d_f03') + import + integer(C_INTPTR_T), value :: n0 + integer(C_INTPTR_T), value :: n1 + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_INT), value :: flags + end function fftwl_mpi_plan_dft_c2r_2d + + type(C_PTR) function fftwl_mpi_plan_dft_c2r_3d(n0,n1,n2,in,out,comm,flags) bind(C, name='fftwl_mpi_plan_dft_c2r_3d_f03') + import + integer(C_INTPTR_T), value :: n0 + integer(C_INTPTR_T), value :: n1 + integer(C_INTPTR_T), value :: n2 + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + integer(C_MPI_FINT), value :: comm + integer(C_INT), value :: flags + end function fftwl_mpi_plan_dft_c2r_3d + + subroutine fftwl_mpi_gather_wisdom(comm_) bind(C, name='fftwl_mpi_gather_wisdom_f03') + import + integer(C_MPI_FINT), value :: comm_ + end subroutine fftwl_mpi_gather_wisdom + + subroutine fftwl_mpi_broadcast_wisdom(comm_) bind(C, name='fftwl_mpi_broadcast_wisdom_f03') + import + integer(C_MPI_FINT), value :: comm_ + end subroutine fftwl_mpi_broadcast_wisdom + + subroutine fftwl_mpi_execute_dft(p,in,out) bind(C, name='fftwl_mpi_execute_dft') + import + type(C_PTR), value :: p + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(inout) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + end subroutine fftwl_mpi_execute_dft + + subroutine fftwl_mpi_execute_dft_r2c(p,in,out) bind(C, name='fftwl_mpi_execute_dft_r2c') + import + type(C_PTR), value :: p + real(C_LONG_DOUBLE), dimension(*), intent(inout) :: in + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(out) :: out + end subroutine fftwl_mpi_execute_dft_r2c + + subroutine fftwl_mpi_execute_dft_c2r(p,in,out) bind(C, name='fftwl_mpi_execute_dft_c2r') + import + type(C_PTR), value :: p + complex(C_LONG_DOUBLE_COMPLEX), dimension(*), intent(inout) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + end subroutine fftwl_mpi_execute_dft_c2r + + subroutine fftwl_mpi_execute_r2r(p,in,out) bind(C, name='fftwl_mpi_execute_r2r') + import + type(C_PTR), value :: p + real(C_LONG_DOUBLE), dimension(*), intent(inout) :: in + real(C_LONG_DOUBLE), dimension(*), intent(out) :: out + end subroutine fftwl_mpi_execute_r2r + + end interface diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/genf03-wrap.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/genf03-wrap.pl Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,78 @@ +#!/usr/bin/perl -w +# Generate Fortran 2003 wrappers (which translate MPI_Comm from f2c) from +# function declarations of the form (one per line): +# extern fftw_mpi_(...args...) +# extern fftw_mpi_(...args...) +# ... +# with no line breaks within a given function. (It's too much work to +# write a general parser, since we just have to handle FFTW's header files.) +# Each declaration has at least one MPI_Comm argument. + +sub canonicalize_type { + my($type); + ($type) = @_; + $type =~ s/ +/ /g; + $type =~ s/^ //; + $type =~ s/ $//; + $type =~ s/([^\* ])\*/$1 \*/g; + $type =~ s/double/R/; + $type =~ s/fftw_([A-Za-z0-9_]+)/X(\1)/; + return $type; +} + +while (<>) { + next if /^ *$/; + if (/^ *extern +([a-zA-Z_0-9 ]+[ \*]) *fftw_mpi_([a-zA-Z_0-9]+) *\((.*)\) *$/) { + $ret = &canonicalize_type($1); + $name = $2; + + $args = $3; + + + print "\n$ret XM(${name}_f03)("; + + $comma = ""; + foreach $arg (split(/ *, */, $args)) { + $arg =~ /^([a-zA-Z_0-9 ]+[ \*]) *([a-zA-Z_0-9]+) *$/; + $argtype = &canonicalize_type($1); + $argname = $2; + print $comma; + if ($argtype eq "MPI_Comm") { + print "MPI_Fint f_$argname"; + } + else { + print "$argtype $argname"; + } + $comma = ", "; + } + print ")\n{\n"; + + print " MPI_Comm "; + $comma = ""; + foreach $arg (split(/ *, */, $args)) { + $arg =~ /^([a-zA-Z_0-9 ]+[ \*]) *([a-zA-Z_0-9]+) *$/; + $argtype = &canonicalize_type($1); + $argname = $2; + if ($argtype eq "MPI_Comm") { + print "$comma$argname"; + $comma = ", "; + } + } + print ";\n\n"; + + foreach $arg (split(/ *, */, $args)) { + $arg =~ /^([a-zA-Z_0-9 ]+[ \*]) *([a-zA-Z_0-9]+) *$/; + $argtype = &canonicalize_type($1); + $argname = $2; + if ($argtype eq "MPI_Comm") { + print " $argname = MPI_Comm_f2c(f_$argname);\n"; + } + } + + $argnames = $args; + $argnames =~ s/([a-zA-Z_0-9 ]+[ \*]) *([a-zA-Z_0-9]+) */$2/g; + print " "; + print "return " if ($ret ne "void"); + print "XM($name)($argnames);\n}\n"; + } +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/ifftw-mpi.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/ifftw-mpi.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* FFTW-MPI internal header file */ +#ifndef __IFFTW_MPI_H__ +#define __IFFTW_MPI_H__ + +#include "ifftw.h" +#include "rdft.h" + +#include + +/* mpi problem flags: problem-dependent meaning, but in general + SCRAMBLED means some reordering *within* the dimensions, while + TRANSPOSED means some reordering *of* the dimensions */ +#define SCRAMBLED_IN (1 << 0) +#define SCRAMBLED_OUT (1 << 1) +#define TRANSPOSED_IN (1 << 2) +#define TRANSPOSED_OUT (1 << 3) +#define RANK1_BIGVEC_ONLY (1 << 4) /* for rank=1, allow only bigvec solver */ + +#define ONLY_SCRAMBLEDP(flags) (!((flags) & ~(SCRAMBLED_IN|SCRAMBLED_OUT))) +#define ONLY_TRANSPOSEDP(flags) (!((flags) & ~(TRANSPOSED_IN|TRANSPOSED_OUT))) + +#if defined(FFTW_SINGLE) +# define FFTW_MPI_TYPE MPI_FLOAT +#elif defined(FFTW_LDOUBLE) +# define FFTW_MPI_TYPE MPI_LONG_DOUBLE +#elif defined(FFTW_QUAD) +# error MPI quad-precision type is unknown +#else +# define FFTW_MPI_TYPE MPI_DOUBLE +#endif + +/* all fftw-mpi identifiers start with fftw_mpi (or fftwf_mpi etc.) */ +#define XM(name) X(CONCAT(mpi_, name)) + +/***********************************************************************/ +/* block distributions */ + +/* a distributed dimension of length n with input and output block + sizes ib and ob, respectively. */ +typedef enum { IB = 0, OB } block_kind; +typedef struct { + INT n; + INT b[2]; /* b[IB], b[OB] */ +} ddim; + +/* Loop over k in {IB, OB}. Note: need explicit casts for C++. */ +#define FORALL_BLOCK_KIND(k) for (k = IB; k <= OB; k = (block_kind) (((int) k) + 1)) + +/* unlike tensors in the serial FFTW, the ordering of the dtensor + dimensions matters - both the array and the block layout are + row-major order. */ +typedef struct { + int rnk; +#if defined(STRUCT_HACK_KR) + ddim dims[1]; +#elif defined(STRUCT_HACK_C99) + ddim dims[]; +#else + ddim *dims; +#endif +} dtensor; + + +/* dtensor.c: */ +dtensor *XM(mkdtensor)(int rnk); +void XM(dtensor_destroy)(dtensor *sz); +dtensor *XM(dtensor_copy)(const dtensor *sz); +dtensor *XM(dtensor_canonical)(const dtensor *sz, int compress); +int XM(dtensor_validp)(const dtensor *sz); +void XM(dtensor_md5)(md5 *p, const dtensor *t); +void XM(dtensor_print)(const dtensor *t, printer *p); + +/* block.c: */ + +/* for a single distributed dimension: */ +INT XM(num_blocks)(INT n, INT block); +int XM(num_blocks_ok)(INT n, INT block, MPI_Comm comm); +INT XM(default_block)(INT n, int n_pes); +INT XM(block)(INT n, INT block, int which_block); + +/* for multiple distributed dimensions: */ +INT XM(num_blocks_total)(const dtensor *sz, block_kind k); +int XM(idle_process)(const dtensor *sz, block_kind k, int which_pe); +void XM(block_coords)(const dtensor *sz, block_kind k, int which_pe, + INT *coords); +INT XM(total_block)(const dtensor *sz, block_kind k, int which_pe); +int XM(is_local_after)(int dim, const dtensor *sz, block_kind k); +int XM(is_local)(const dtensor *sz, block_kind k); +int XM(is_block1d)(const dtensor *sz, block_kind k); + +/* choose-radix.c */ +INT XM(choose_radix)(ddim d, int n_pes, unsigned flags, int sign, + INT rblock[2], INT mblock[2]); + +/***********************************************************************/ +/* any_true.c */ +int XM(any_true)(int condition, MPI_Comm comm); +int XM(md5_equal)(md5 m, MPI_Comm comm); + +/* conf.c */ +void XM(conf_standard)(planner *p); + +/***********************************************************************/ +/* rearrange.c */ + +/* Different ways to rearrange the vector dimension vn during transposition, + reflecting different tradeoffs between ease of transposition and + contiguity during the subsequent DFTs. + + TODO: can we pare this down to CONTIG and DISCONTIG, at least + in MEASURE mode? SQUARE_MIDDLE is also used for 1d destroy-input DFTs. */ +typedef enum { + CONTIG = 0, /* vn x 1: make subsequent DFTs contiguous */ + DISCONTIG, /* P x (vn/P) for P processes */ + SQUARE_BEFORE, /* try to get square transpose at beginning */ + SQUARE_MIDDLE, /* try to get square transpose in the middle */ + SQUARE_AFTER /* try to get square transpose at end */ +} rearrangement; + +/* skipping SQUARE_AFTER since it doesn't seem to offer any advantage + over SQUARE_BEFORE */ +#define FORALL_REARRANGE(rearrange) for (rearrange = CONTIG; rearrange <= SQUARE_MIDDLE; rearrange = (rearrangement) (((int) rearrange) + 1)) + +int XM(rearrange_applicable)(rearrangement rearrange, + ddim dim0, INT vn, int n_pes); +INT XM(rearrange_ny)(rearrangement rearrange, ddim dim0, INT vn, int n_pes); + +/***********************************************************************/ + +#endif /* __IFFTW_MPI_H__ */ + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/mpi-bench.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/mpi-bench.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,844 @@ +/**************************************************************************/ +/* NOTE to users: this is the FFTW-MPI self-test and benchmark program. + It is probably NOT a good place to learn FFTW usage, since it has a + lot of added complexity in order to exercise and test the full API, + etcetera. We suggest reading the manual. */ +/**************************************************************************/ + +#include +#include +#include +#include "fftw3-mpi.h" +#include "fftw-bench.h" + +#if defined(BENCHFFT_SINGLE) +# define BENCH_MPI_TYPE MPI_FLOAT +#elif defined(BENCHFFT_LDOUBLE) +# define BENCH_MPI_TYPE MPI_LONG_DOUBLE +#elif defined(BENCHFFT_QUAD) +# error MPI quad-precision type is unknown +#else +# define BENCH_MPI_TYPE MPI_DOUBLE +#endif + +#if SIZEOF_PTRDIFF_T == SIZEOF_INT +# define FFTW_MPI_PTRDIFF_T MPI_INT +#elif SIZEOF_PTRDIFF_T == SIZEOF_LONG +# define FFTW_MPI_PTRDIFF_T MPI_LONG +#elif SIZEOF_PTRDIFF_T == SIZEOF_LONG_LONG +# define FFTW_MPI_PTRDIFF_T MPI_LONG_LONG +#else +# error MPI type for ptrdiff_t is unknown +# define FFTW_MPI_PTRDIFF_T MPI_LONG +#endif + +static const char *mkversion(void) { return FFTW(version); } +static const char *mkcc(void) { return FFTW(cc); } +static const char *mkcodelet_optim(void) { return FFTW(codelet_optim); } +static const char *mknproc(void) { + static char buf[32]; + int ncpus; + MPI_Comm_size(MPI_COMM_WORLD, &ncpus); +#ifdef HAVE_SNPRINTF + snprintf(buf, 32, "%d", ncpus); +#else + sprintf(buf, "%d", ncpus); +#endif + return buf; +} + +BEGIN_BENCH_DOC +BENCH_DOC("name", "fftw3_mpi") +BENCH_DOCF("version", mkversion) +BENCH_DOCF("cc", mkcc) +BENCH_DOCF("codelet-optim", mkcodelet_optim) +BENCH_DOCF("nproc", mknproc) +END_BENCH_DOC + +static int n_pes = 1, my_pe = 0; + +/* global variables describing the shape of the data and its distribution */ +static int rnk; +static ptrdiff_t vn, iNtot, oNtot; +static ptrdiff_t *local_ni=0, *local_starti=0; +static ptrdiff_t *local_no=0, *local_starto=0; +static ptrdiff_t *all_local_ni=0, *all_local_starti=0; /* n_pes x rnk arrays */ +static ptrdiff_t *all_local_no=0, *all_local_starto=0; /* n_pes x rnk arrays */ +static ptrdiff_t *istrides = 0, *ostrides = 0; +static ptrdiff_t *total_ni=0, *total_no=0; +static int *isend_cnt = 0, *isend_off = 0; /* for MPI_Scatterv */ +static int *orecv_cnt = 0, *orecv_off = 0; /* for MPI_Gatherv */ + +static bench_real *local_in = 0, *local_out = 0; +static bench_real *all_local_in = 0, *all_local_out = 0; +static int all_local_in_alloc = 0, all_local_out_alloc = 0; +static FFTW(plan) plan_scramble_in = 0, plan_unscramble_out = 0; + +static void alloc_rnk(int rnk_) { + rnk = rnk_; + bench_free(local_ni); + if (rnk == 0) + local_ni = 0; + else + local_ni = (ptrdiff_t *) bench_malloc(sizeof(ptrdiff_t) * rnk + * (8 + n_pes * 4)); + + local_starti = local_ni + rnk; + local_no = local_ni + 2 * rnk; + local_starto = local_ni + 3 * rnk; + istrides = local_ni + 4 * rnk; + ostrides = local_ni + 5 * rnk; + total_ni = local_ni + 6 * rnk; + total_no = local_ni + 7 * rnk; + all_local_ni = local_ni + 8 * rnk; + all_local_starti = local_ni + (8 + n_pes) * rnk; + all_local_no = local_ni + (8 + 2 * n_pes) * rnk; + all_local_starto = local_ni + (8 + 3 * n_pes) * rnk; +} + +static void setup_gather_scatter(void) +{ + int i, j; + ptrdiff_t off; + + MPI_Gather(local_ni, rnk, FFTW_MPI_PTRDIFF_T, + all_local_ni, rnk, FFTW_MPI_PTRDIFF_T, + 0, MPI_COMM_WORLD); + MPI_Bcast(all_local_ni, rnk*n_pes, FFTW_MPI_PTRDIFF_T, 0, MPI_COMM_WORLD); + MPI_Gather(local_starti, rnk, FFTW_MPI_PTRDIFF_T, + all_local_starti, rnk, FFTW_MPI_PTRDIFF_T, + 0, MPI_COMM_WORLD); + MPI_Bcast(all_local_starti, rnk*n_pes, FFTW_MPI_PTRDIFF_T, 0, MPI_COMM_WORLD); + + MPI_Gather(local_no, rnk, FFTW_MPI_PTRDIFF_T, + all_local_no, rnk, FFTW_MPI_PTRDIFF_T, + 0, MPI_COMM_WORLD); + MPI_Bcast(all_local_no, rnk*n_pes, FFTW_MPI_PTRDIFF_T, 0, MPI_COMM_WORLD); + MPI_Gather(local_starto, rnk, FFTW_MPI_PTRDIFF_T, + all_local_starto, rnk, FFTW_MPI_PTRDIFF_T, + 0, MPI_COMM_WORLD); + MPI_Bcast(all_local_starto, rnk*n_pes, FFTW_MPI_PTRDIFF_T, 0, MPI_COMM_WORLD); + + off = 0; + for (i = 0; i < n_pes; ++i) { + ptrdiff_t N = vn; + for (j = 0; j < rnk; ++j) + N *= all_local_ni[i * rnk + j]; + isend_cnt[i] = N; + isend_off[i] = off; + off += N; + } + iNtot = off; + all_local_in_alloc = 1; + + istrides[rnk - 1] = vn; + for (j = rnk - 2; j >= 0; --j) + istrides[j] = total_ni[j + 1] * istrides[j + 1]; + + off = 0; + for (i = 0; i < n_pes; ++i) { + ptrdiff_t N = vn; + for (j = 0; j < rnk; ++j) + N *= all_local_no[i * rnk + j]; + orecv_cnt[i] = N; + orecv_off[i] = off; + off += N; + } + oNtot = off; + all_local_out_alloc = 1; + + ostrides[rnk - 1] = vn; + for (j = rnk - 2; j >= 0; --j) + ostrides[j] = total_no[j + 1] * ostrides[j + 1]; +} + +static void copy_block_out(const bench_real *in, + int rnk, ptrdiff_t *n, ptrdiff_t *start, + ptrdiff_t is, ptrdiff_t *os, ptrdiff_t vn, + bench_real *out) +{ + ptrdiff_t i; + if (rnk == 0) { + for (i = 0; i < vn; ++i) + out[i] = in[i]; + } + else if (rnk == 1) { /* this case is just an optimization */ + ptrdiff_t j; + out += start[0] * os[0]; + for (j = 0; j < n[0]; ++j) { + for (i = 0; i < vn; ++i) + out[i] = in[i]; + in += is; + out += os[0]; + } + } + else { + /* we should do n[0] for locality, but this way is simpler to code */ + for (i = 0; i < n[rnk - 1]; ++i) + copy_block_out(in + i * is, + rnk - 1, n, start, is * n[rnk - 1], os, vn, + out + (start[rnk - 1] + i) * os[rnk - 1]); + } +} + +static void copy_block_in(bench_real *in, + int rnk, ptrdiff_t *n, ptrdiff_t *start, + ptrdiff_t is, ptrdiff_t *os, ptrdiff_t vn, + const bench_real *out) +{ + ptrdiff_t i; + if (rnk == 0) { + for (i = 0; i < vn; ++i) + in[i] = out[i]; + } + else if (rnk == 1) { /* this case is just an optimization */ + ptrdiff_t j; + out += start[0] * os[0]; + for (j = 0; j < n[0]; ++j) { + for (i = 0; i < vn; ++i) + in[i] = out[i]; + in += is; + out += os[0]; + } + } + else { + /* we should do n[0] for locality, but this way is simpler to code */ + for (i = 0; i < n[rnk - 1]; ++i) + copy_block_in(in + i * is, + rnk - 1, n, start, is * n[rnk - 1], os, vn, + out + (start[rnk - 1] + i) * os[rnk - 1]); + } +} + +static void do_scatter_in(bench_real *in) +{ + bench_real *ali; + int i; + if (all_local_in_alloc) { + bench_free(all_local_in); + all_local_in = (bench_real*) bench_malloc(iNtot*sizeof(bench_real)); + all_local_in_alloc = 0; + } + ali = all_local_in; + for (i = 0; i < n_pes; ++i) { + copy_block_in(ali, + rnk, all_local_ni + i * rnk, + all_local_starti + i * rnk, + vn, istrides, vn, + in); + ali += isend_cnt[i]; + } + MPI_Scatterv(all_local_in, isend_cnt, isend_off, BENCH_MPI_TYPE, + local_in, isend_cnt[my_pe], BENCH_MPI_TYPE, + 0, MPI_COMM_WORLD); +} + +static void do_gather_out(bench_real *out) +{ + bench_real *alo; + int i; + + if (all_local_out_alloc) { + bench_free(all_local_out); + all_local_out = (bench_real*) bench_malloc(oNtot*sizeof(bench_real)); + all_local_out_alloc = 0; + } + MPI_Gatherv(local_out, orecv_cnt[my_pe], BENCH_MPI_TYPE, + all_local_out, orecv_cnt, orecv_off, BENCH_MPI_TYPE, + 0, MPI_COMM_WORLD); + MPI_Bcast(all_local_out, oNtot, BENCH_MPI_TYPE, 0, MPI_COMM_WORLD); + alo = all_local_out; + for (i = 0; i < n_pes; ++i) { + copy_block_out(alo, + rnk, all_local_no + i * rnk, + all_local_starto + i * rnk, + vn, ostrides, vn, + out); + alo += orecv_cnt[i]; + } +} + +static void alloc_local(ptrdiff_t nreal, int inplace) +{ + bench_free(local_in); + if (local_out != local_in) bench_free(local_out); + local_in = local_out = 0; + if (nreal > 0) { + ptrdiff_t i; + local_in = (bench_real*) bench_malloc(nreal * sizeof(bench_real)); + if (inplace) + local_out = local_in; + else + local_out = (bench_real*) bench_malloc(nreal * sizeof(bench_real)); + for (i = 0; i < nreal; ++i) local_in[i] = local_out[i] = 0.0; + } +} + +void after_problem_rcopy_from(bench_problem *p, bench_real *ri) +{ + UNUSED(p); + do_scatter_in(ri); + if (plan_scramble_in) FFTW(execute)(plan_scramble_in); +} + +void after_problem_rcopy_to(bench_problem *p, bench_real *ro) +{ + UNUSED(p); + if (plan_unscramble_out) FFTW(execute)(plan_unscramble_out); + do_gather_out(ro); +} + +void after_problem_ccopy_from(bench_problem *p, bench_real *ri, bench_real *ii) +{ + UNUSED(ii); + after_problem_rcopy_from(p, ri); +} + +void after_problem_ccopy_to(bench_problem *p, bench_real *ro, bench_real *io) +{ + UNUSED(io); + after_problem_rcopy_to(p, ro); +} + +void after_problem_hccopy_from(bench_problem *p, bench_real *ri, bench_real *ii) +{ + UNUSED(ii); + after_problem_rcopy_from(p, ri); +} + +void after_problem_hccopy_to(bench_problem *p, bench_real *ro, bench_real *io) +{ + UNUSED(io); + after_problem_rcopy_to(p, ro); +} + +static FFTW(plan) mkplan_transpose_local(ptrdiff_t nx, ptrdiff_t ny, + ptrdiff_t vn, + bench_real *in, bench_real *out) +{ + FFTW(iodim64) hdims[3]; + FFTW(r2r_kind) k[3]; + FFTW(plan) pln; + + hdims[0].n = nx; + hdims[0].is = ny * vn; + hdims[0].os = vn; + hdims[1].n = ny; + hdims[1].is = vn; + hdims[1].os = nx * vn; + hdims[2].n = vn; + hdims[2].is = 1; + hdims[2].os = 1; + k[0] = k[1] = k[2] = FFTW_R2HC; + pln = FFTW(plan_guru64_r2r)(0, 0, 3, hdims, in, out, k, FFTW_ESTIMATE); + BENCH_ASSERT(pln != 0); + return pln; +} + +static int tensor_rowmajor_transposedp(bench_tensor *t) +{ + bench_iodim *d; + int i; + + BENCH_ASSERT(FINITE_RNK(t->rnk)); + if (t->rnk < 2) + return 0; + + d = t->dims; + if (d[0].is != d[1].is * d[1].n + || d[0].os != d[1].is + || d[1].os != d[0].os * d[0].n) + return 0; + if (t->rnk > 2 && d[1].is != d[2].is * d[2].n) + return 0; + for (i = 2; i + 1 < t->rnk; ++i) { + d = t->dims + i; + if (d[0].is != d[1].is * d[1].n + || d[0].os != d[1].os * d[1].n) + return 0; + } + + if (t->rnk > 2 && t->dims[t->rnk-1].is != t->dims[t->rnk-1].os) + return 0; + return 1; +} + +static int tensor_contiguousp(bench_tensor *t, int s) +{ + return (t->dims[t->rnk-1].is == s + && ((tensor_rowmajorp(t) && + t->dims[t->rnk-1].is == t->dims[t->rnk-1].os) + || tensor_rowmajor_transposedp(t))); +} + +static FFTW(plan) mkplan_complex(bench_problem *p, unsigned flags) +{ + FFTW(plan) pln = 0; + int i; + ptrdiff_t ntot; + + vn = p->vecsz->rnk == 1 ? p->vecsz->dims[0].n : 1; + + if (p->sz->rnk < 1 + || p->split + || !tensor_contiguousp(p->sz, vn) + || tensor_rowmajor_transposedp(p->sz) + || p->vecsz->rnk > 1 + || (p->vecsz->rnk == 1 && (p->vecsz->dims[0].is != 1 + || p->vecsz->dims[0].os != 1))) + return 0; + + alloc_rnk(p->sz->rnk); + for (i = 0; i < rnk; ++i) { + total_ni[i] = total_no[i] = p->sz->dims[i].n; + local_ni[i] = local_no[i] = total_ni[i]; + local_starti[i] = local_starto[i] = 0; + } + if (rnk > 1) { + ptrdiff_t n, start, nT, startT; + ntot = FFTW(mpi_local_size_many_transposed) + (p->sz->rnk, total_ni, vn, + FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + MPI_COMM_WORLD, + &n, &start, &nT, &startT); + if (flags & FFTW_MPI_TRANSPOSED_IN) { + local_ni[1] = nT; + local_starti[1] = startT; + } + else { + local_ni[0] = n; + local_starti[0] = start; + } + if (flags & FFTW_MPI_TRANSPOSED_OUT) { + local_no[1] = nT; + local_starto[1] = startT; + } + else { + local_no[0] = n; + local_starto[0] = start; + } + } + else if (rnk == 1) { + ntot = FFTW(mpi_local_size_many_1d) + (total_ni[0], vn, MPI_COMM_WORLD, p->sign, flags, + local_ni, local_starti, local_no, local_starto); + } + alloc_local(ntot * 2, p->in == p->out); + + pln = FFTW(mpi_plan_many_dft)(p->sz->rnk, total_ni, vn, + FFTW_MPI_DEFAULT_BLOCK, + FFTW_MPI_DEFAULT_BLOCK, + (FFTW(complex) *) local_in, + (FFTW(complex) *) local_out, + MPI_COMM_WORLD, p->sign, flags); + + vn *= 2; + + if (rnk > 1) { + ptrdiff_t nrest = 1; + for (i = 2; i < rnk; ++i) nrest *= p->sz->dims[i].n; + if (flags & FFTW_MPI_TRANSPOSED_IN) + plan_scramble_in = mkplan_transpose_local( + p->sz->dims[0].n, local_ni[1], vn * nrest, + local_in, local_in); + if (flags & FFTW_MPI_TRANSPOSED_OUT) + plan_unscramble_out = mkplan_transpose_local( + local_no[1], p->sz->dims[0].n, vn * nrest, + local_out, local_out); + } + + return pln; +} + +static int tensor_real_contiguousp(bench_tensor *t, int sign, int s) +{ + return (t->dims[t->rnk-1].is == s + && ((tensor_real_rowmajorp(t, sign, 1) && + t->dims[t->rnk-1].is == t->dims[t->rnk-1].os))); +} + +static FFTW(plan) mkplan_real(bench_problem *p, unsigned flags) +{ + FFTW(plan) pln = 0; + int i; + ptrdiff_t ntot; + + vn = p->vecsz->rnk == 1 ? p->vecsz->dims[0].n : 1; + + if (p->sz->rnk < 2 + || p->split + || !tensor_real_contiguousp(p->sz, p->sign, vn) + || tensor_rowmajor_transposedp(p->sz) + || p->vecsz->rnk > 1 + || (p->vecsz->rnk == 1 && (p->vecsz->dims[0].is != 1 + || p->vecsz->dims[0].os != 1))) + return 0; + + alloc_rnk(p->sz->rnk); + for (i = 0; i < rnk; ++i) { + total_ni[i] = total_no[i] = p->sz->dims[i].n; + local_ni[i] = local_no[i] = total_ni[i]; + local_starti[i] = local_starto[i] = 0; + } + local_ni[rnk-1] = local_no[rnk-1] = total_ni[rnk-1] = total_no[rnk-1] + = p->sz->dims[rnk-1].n / 2 + 1; + { + ptrdiff_t n, start, nT, startT; + ntot = FFTW(mpi_local_size_many_transposed) + (p->sz->rnk, total_ni, vn, + FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + MPI_COMM_WORLD, + &n, &start, &nT, &startT); + if (flags & FFTW_MPI_TRANSPOSED_IN) { + local_ni[1] = nT; + local_starti[1] = startT; + } + else { + local_ni[0] = n; + local_starti[0] = start; + } + if (flags & FFTW_MPI_TRANSPOSED_OUT) { + local_no[1] = nT; + local_starto[1] = startT; + } + else { + local_no[0] = n; + local_starto[0] = start; + } + } + alloc_local(ntot * 2, p->in == p->out); + + total_ni[rnk - 1] = p->sz->dims[rnk - 1].n; + if (p->sign < 0) + pln = FFTW(mpi_plan_many_dft_r2c)(p->sz->rnk, total_ni, vn, + FFTW_MPI_DEFAULT_BLOCK, + FFTW_MPI_DEFAULT_BLOCK, + local_in, + (FFTW(complex) *) local_out, + MPI_COMM_WORLD, flags); + else + pln = FFTW(mpi_plan_many_dft_c2r)(p->sz->rnk, total_ni, vn, + FFTW_MPI_DEFAULT_BLOCK, + FFTW_MPI_DEFAULT_BLOCK, + (FFTW(complex) *) local_in, + local_out, + MPI_COMM_WORLD, flags); + + total_ni[rnk - 1] = p->sz->dims[rnk - 1].n / 2 + 1; + vn *= 2; + + { + ptrdiff_t nrest = 1; + for (i = 2; i < rnk; ++i) nrest *= total_ni[i]; + if (flags & FFTW_MPI_TRANSPOSED_IN) + plan_scramble_in = mkplan_transpose_local( + total_ni[0], local_ni[1], vn * nrest, + local_in, local_in); + if (flags & FFTW_MPI_TRANSPOSED_OUT) + plan_unscramble_out = mkplan_transpose_local( + local_no[1], total_ni[0], vn * nrest, + local_out, local_out); + } + + return pln; +} + +static FFTW(plan) mkplan_transpose(bench_problem *p, unsigned flags) +{ + ptrdiff_t ntot, nx, ny; + int ix=0, iy=1, i; + const bench_iodim *d = p->vecsz->dims; + FFTW(plan) pln; + + if (p->vecsz->rnk == 3) { + for (i = 0; i < 3; ++i) + if (d[i].is == 1 && d[i].os == 1) { + vn = d[i].n; + ix = (i + 1) % 3; + iy = (i + 2) % 3; + break; + } + if (i == 3) return 0; + } + else { + vn = 1; + ix = 0; + iy = 1; + } + + if (d[ix].is == d[iy].n * vn && d[ix].os == vn + && d[iy].os == d[ix].n * vn && d[iy].is == vn) { + nx = d[ix].n; + ny = d[iy].n; + } + else if (d[iy].is == d[ix].n * vn && d[iy].os == vn + && d[ix].os == d[iy].n * vn && d[ix].is == vn) { + nx = d[iy].n; + ny = d[ix].n; + } + else + return 0; + + alloc_rnk(2); + ntot = vn * FFTW(mpi_local_size_2d_transposed)(nx, ny, MPI_COMM_WORLD, + &local_ni[0], + &local_starti[0], + &local_no[0], + &local_starto[0]); + local_ni[1] = ny; + local_starti[1] = 0; + local_no[1] = nx; + local_starto[1] = 0; + total_ni[0] = nx; total_ni[1] = ny; + total_no[1] = nx; total_no[0] = ny; + alloc_local(ntot, p->in == p->out); + + pln = FFTW(mpi_plan_many_transpose)(nx, ny, vn, + FFTW_MPI_DEFAULT_BLOCK, + FFTW_MPI_DEFAULT_BLOCK, + local_in, local_out, + MPI_COMM_WORLD, flags); + + if (flags & FFTW_MPI_TRANSPOSED_IN) + plan_scramble_in = mkplan_transpose_local(local_ni[0], ny, vn, + local_in, local_in); + if (flags & FFTW_MPI_TRANSPOSED_OUT) + plan_unscramble_out = mkplan_transpose_local + (nx, local_no[0], vn, local_out, local_out); + +#if 0 + if (pln && vn == 1) { + int i, j; + bench_real *ri = (bench_real *) p->in; + bench_real *ro = (bench_real *) p->out; + if (!ri || !ro) return pln; + setup_gather_scatter(); + for (i = 0; i < nx * ny; ++i) + ri[i] = i; + after_problem_rcopy_from(p, ri); + FFTW(execute)(pln); + after_problem_rcopy_to(p, ro); + if (my_pe == 0) { + for (i = 0; i < nx; ++i) { + for (j = 0; j < ny; ++j) + printf(" %3g", ro[j * nx + i]); + printf("\n"); + } + } + } +#endif + + return pln; +} + +static FFTW(plan) mkplan_r2r(bench_problem *p, unsigned flags) +{ + FFTW(plan) pln = 0; + int i; + ptrdiff_t ntot; + FFTW(r2r_kind) *k; + + if ((p->sz->rnk == 0 || (p->sz->rnk == 1 && p->sz->dims[0].n == 1)) + && p->vecsz->rnk >= 2 && p->vecsz->rnk <= 3) + return mkplan_transpose(p, flags); + + vn = p->vecsz->rnk == 1 ? p->vecsz->dims[0].n : 1; + + if (p->sz->rnk < 1 + || p->split + || !tensor_contiguousp(p->sz, vn) + || tensor_rowmajor_transposedp(p->sz) + || p->vecsz->rnk > 1 + || (p->vecsz->rnk == 1 && (p->vecsz->dims[0].is != 1 + || p->vecsz->dims[0].os != 1))) + return 0; + + alloc_rnk(p->sz->rnk); + for (i = 0; i < rnk; ++i) { + total_ni[i] = total_no[i] = p->sz->dims[i].n; + local_ni[i] = local_no[i] = total_ni[i]; + local_starti[i] = local_starto[i] = 0; + } + if (rnk > 1) { + ptrdiff_t n, start, nT, startT; + ntot = FFTW(mpi_local_size_many_transposed) + (p->sz->rnk, total_ni, vn, + FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + MPI_COMM_WORLD, + &n, &start, &nT, &startT); + if (flags & FFTW_MPI_TRANSPOSED_IN) { + local_ni[1] = nT; + local_starti[1] = startT; + } + else { + local_ni[0] = n; + local_starti[0] = start; + } + if (flags & FFTW_MPI_TRANSPOSED_OUT) { + local_no[1] = nT; + local_starto[1] = startT; + } + else { + local_no[0] = n; + local_starto[0] = start; + } + } + else if (rnk == 1) { + ntot = FFTW(mpi_local_size_many_1d) + (total_ni[0], vn, MPI_COMM_WORLD, p->sign, flags, + local_ni, local_starti, local_no, local_starto); + } + alloc_local(ntot, p->in == p->out); + + k = (FFTW(r2r_kind) *) bench_malloc(sizeof(FFTW(r2r_kind)) * p->sz->rnk); + for (i = 0; i < p->sz->rnk; ++i) + switch (p->k[i]) { + case R2R_R2HC: k[i] = FFTW_R2HC; break; + case R2R_HC2R: k[i] = FFTW_HC2R; break; + case R2R_DHT: k[i] = FFTW_DHT; break; + case R2R_REDFT00: k[i] = FFTW_REDFT00; break; + case R2R_REDFT01: k[i] = FFTW_REDFT01; break; + case R2R_REDFT10: k[i] = FFTW_REDFT10; break; + case R2R_REDFT11: k[i] = FFTW_REDFT11; break; + case R2R_RODFT00: k[i] = FFTW_RODFT00; break; + case R2R_RODFT01: k[i] = FFTW_RODFT01; break; + case R2R_RODFT10: k[i] = FFTW_RODFT10; break; + case R2R_RODFT11: k[i] = FFTW_RODFT11; break; + default: BENCH_ASSERT(0); + } + + pln = FFTW(mpi_plan_many_r2r)(p->sz->rnk, total_ni, vn, + FFTW_MPI_DEFAULT_BLOCK, + FFTW_MPI_DEFAULT_BLOCK, + local_in, local_out, + MPI_COMM_WORLD, k, flags); + bench_free(k); + + if (rnk > 1) { + ptrdiff_t nrest = 1; + for (i = 2; i < rnk; ++i) nrest *= p->sz->dims[i].n; + if (flags & FFTW_MPI_TRANSPOSED_IN) + plan_scramble_in = mkplan_transpose_local( + p->sz->dims[0].n, local_ni[1], vn * nrest, + local_in, local_in); + if (flags & FFTW_MPI_TRANSPOSED_OUT) + plan_unscramble_out = mkplan_transpose_local( + local_no[1], p->sz->dims[0].n, vn * nrest, + local_out, local_out); + } + + return pln; +} + +FFTW(plan) mkplan(bench_problem *p, unsigned flags) +{ + FFTW(plan) pln = 0; + FFTW(destroy_plan)(plan_scramble_in); plan_scramble_in = 0; + FFTW(destroy_plan)(plan_unscramble_out); plan_unscramble_out = 0; + if (p->scrambled_in) { + if (p->sz->rnk == 1 && p->sz->dims[0].n != 1) + flags |= FFTW_MPI_SCRAMBLED_IN; + else + flags |= FFTW_MPI_TRANSPOSED_IN; + } + if (p->scrambled_out) { + if (p->sz->rnk == 1 && p->sz->dims[0].n != 1) + flags |= FFTW_MPI_SCRAMBLED_OUT; + else + flags |= FFTW_MPI_TRANSPOSED_OUT; + } + switch (p->kind) { + case PROBLEM_COMPLEX: + pln =mkplan_complex(p, flags); + break; + case PROBLEM_REAL: + pln = mkplan_real(p, flags); + break; + case PROBLEM_R2R: + pln = mkplan_r2r(p, flags); + break; + default: BENCH_ASSERT(0); + } + if (pln) setup_gather_scatter(); + return pln; +} + +void main_init(int *argc, char ***argv) +{ +#ifdef HAVE_SMP +# if MPI_VERSION >= 2 /* for MPI_Init_thread */ + int provided; + MPI_Init_thread(argc, argv, MPI_THREAD_FUNNELED, &provided); + threads_ok = provided >= MPI_THREAD_FUNNELED; +# else + MPI_Init(argc, argv); + threads_ok = 0; +# endif +#else + MPI_Init(argc, argv); +#endif + MPI_Comm_rank(MPI_COMM_WORLD, &my_pe); + MPI_Comm_size(MPI_COMM_WORLD, &n_pes); + if (my_pe != 0) verbose = -999; + no_speed_allocation = 1; /* so we can benchmark transforms > memory */ + always_pad_real = 1; /* out-of-place real transforms are padded */ + isend_cnt = (int *) bench_malloc(sizeof(int) * n_pes); + isend_off = (int *) bench_malloc(sizeof(int) * n_pes); + orecv_cnt = (int *) bench_malloc(sizeof(int) * n_pes); + orecv_off = (int *) bench_malloc(sizeof(int) * n_pes); + + /* init_threads must be called before any other FFTW function, + including mpi_init, because it has to register the threads hooks + before the planner is initalized */ +#ifdef HAVE_SMP + if (threads_ok) { BENCH_ASSERT(FFTW(init_threads)()); } +#endif + FFTW(mpi_init)(); +} + +void initial_cleanup(void) +{ + alloc_rnk(0); + alloc_local(0, 0); + bench_free(all_local_in); all_local_in = 0; + bench_free(all_local_out); all_local_out = 0; + bench_free(isend_off); isend_off = 0; + bench_free(isend_cnt); isend_cnt = 0; + bench_free(orecv_off); orecv_off = 0; + bench_free(orecv_cnt); orecv_cnt = 0; + FFTW(destroy_plan)(plan_scramble_in); plan_scramble_in = 0; + FFTW(destroy_plan)(plan_unscramble_out); plan_unscramble_out = 0; +} + +void final_cleanup(void) +{ + MPI_Finalize(); +} + +void bench_exit(int status) +{ + MPI_Abort(MPI_COMM_WORLD, status); +} + +double bench_cost_postprocess(double cost) +{ + double cost_max; + MPI_Allreduce(&cost, &cost_max, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + return cost_max; +} + + +int import_wisdom(FILE *f) +{ + int success = 1, sall; + if (my_pe == 0) success = FFTW(import_wisdom_from_file)(f); + FFTW(mpi_broadcast_wisdom)(MPI_COMM_WORLD); + MPI_Allreduce(&success, &sall, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD); + return sall; +} + +void export_wisdom(FILE *f) +{ + FFTW(mpi_gather_wisdom)(MPI_COMM_WORLD); + if (my_pe == 0) FFTW(export_wisdom_to_file)(f); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/mpi-dft.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/mpi-dft.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw-mpi.h" + +/* problem.c: */ +typedef struct { + problem super; + dtensor *sz; + INT vn; /* vector length (vector stride 1) */ + R *I, *O; /* contiguous interleaved arrays */ + + int sign; /* FFTW_FORWARD / FFTW_BACKWARD */ + unsigned flags; /* TRANSPOSED_IN/OUT meaningful for rnk>1 only + SCRAMBLED_IN/OUT meaningful for 1d transforms only */ + + MPI_Comm comm; +} problem_mpi_dft; + +problem *XM(mkproblem_dft)(const dtensor *sz, INT vn, + R *I, R *O, MPI_Comm comm, + int sign, unsigned flags); +problem *XM(mkproblem_dft_d)(dtensor *sz, INT vn, + R *I, R *O, MPI_Comm comm, + int sign, unsigned flags); + +/* solve.c: */ +void XM(dft_solve)(const plan *ego_, const problem *p_); + +/* plans have same operands as rdft plans, so just re-use */ +typedef plan_rdft plan_mpi_dft; +#define MKPLAN_MPI_DFT(type, adt, apply) \ + (type *)X(mkplan_rdft)(sizeof(type), adt, apply) + +int XM(dft_serial_applicable)(const problem_mpi_dft *p); + +/* various solvers */ +void XM(dft_rank_geq2_register)(planner *p); +void XM(dft_rank_geq2_transposed_register)(planner *p); +void XM(dft_serial_register)(planner *p); +void XM(dft_rank1_bigvec_register)(planner *p); +void XM(dft_rank1_register)(planner *p); diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/mpi-rdft.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/mpi-rdft.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw-mpi.h" + +/* problem.c: */ +typedef struct { + problem super; + dtensor *sz; + INT vn; /* vector length (vector stride 1) */ + R *I, *O; /* contiguous interleaved arrays */ + + + unsigned flags; /* TRANSPOSED_IN/OUT meaningful for rnk>1 only + SCRAMBLED_IN/OUT meaningful for 1d transforms only */ + + MPI_Comm comm; + +#if defined(STRUCT_HACK_KR) + rdft_kind kind[1]; +#elif defined(STRUCT_HACK_C99) + rdft_kind kind[]; +#else + rdft_kind *kind; +#endif +} problem_mpi_rdft; + +problem *XM(mkproblem_rdft)(const dtensor *sz, INT vn, + R *I, R *O, MPI_Comm comm, + const rdft_kind *kind, unsigned flags); +problem *XM(mkproblem_rdft_d)(dtensor *sz, INT vn, + R *I, R *O, MPI_Comm comm, + const rdft_kind *kind, unsigned flags); + +/* solve.c: */ +void XM(rdft_solve)(const plan *ego_, const problem *p_); + +/* plans have same operands as rdft plans, so just re-use */ +typedef plan_rdft plan_mpi_rdft; +#define MKPLAN_MPI_RDFT(type, adt, apply) \ + (type *)X(mkplan_rdft)(sizeof(type), adt, apply) + +int XM(rdft_serial_applicable)(const problem_mpi_rdft *p); + +/* various solvers */ +void XM(rdft_rank_geq2_register)(planner *p); +void XM(rdft_rank_geq2_transposed_register)(planner *p); +void XM(rdft_serial_register)(planner *p); +void XM(rdft_rank1_bigvec_register)(planner *p); diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/mpi-rdft2.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/mpi-rdft2.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw-mpi.h" + +/* r2c and c2r transforms. The sz dtensor, as usual, gives the size + of the "logical" complex array. For the last dimension N, however, + only N/2+1 complex numbers are stored for the complex data. Moreover, + for the real data, the last dimension is *always* padded to a size + 2*(N/2+1). (Contrast this with the serial API, where there is only + padding for in-place plans.) */ + +/* problem.c: */ +typedef struct { + problem super; + dtensor *sz; + INT vn; /* vector length (vector stride 1) */ + R *I, *O; /* contiguous interleaved arrays */ + + rdft_kind kind; /* assert(kind < DHT) */ + unsigned flags; /* TRANSPOSED_IN/OUT meaningful for rnk>1 only + SCRAMBLED_IN/OUT meaningful for 1d transforms only */ + + MPI_Comm comm; +} problem_mpi_rdft2; + +problem *XM(mkproblem_rdft2)(const dtensor *sz, INT vn, + R *I, R *O, MPI_Comm comm, + rdft_kind kind, unsigned flags); +problem *XM(mkproblem_rdft2_d)(dtensor *sz, INT vn, + R *I, R *O, MPI_Comm comm, + rdft_kind kind, unsigned flags); + +/* solve.c: */ +void XM(rdft2_solve)(const plan *ego_, const problem *p_); + +/* plans have same operands as rdft plans, so just re-use */ +typedef plan_rdft plan_mpi_rdft2; +#define MKPLAN_MPI_RDFT2(type, adt, apply) \ + (type *)X(mkplan_rdft)(sizeof(type), adt, apply) + +int XM(rdft2_serial_applicable)(const problem_mpi_rdft2 *p); + +/* various solvers */ +void XM(rdft2_rank_geq2_register)(planner *p); +void XM(rdft2_rank_geq2_transposed_register)(planner *p); +void XM(rdft2_serial_register)(planner *p); diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/mpi-transpose.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/mpi-transpose.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw-mpi.h" + +/* tproblem.c: */ +typedef struct { + problem super; + INT vn; /* vector length (vector stride 1) */ + INT nx, ny; /* nx x ny transposed to ny x nx */ + R *I, *O; /* contiguous real arrays (both same size!) */ + + unsigned flags; /* TRANSPOSED_IN: input is *locally* transposed + TRANSPOSED_OUT: output is *locally* transposed */ + + INT block, tblock; /* block size, slab decomposition; + tblock is for transposed blocks on output */ + + MPI_Comm comm; +} problem_mpi_transpose; + +problem *XM(mkproblem_transpose)(INT nx, INT ny, INT vn, + R *I, R *O, + INT block, INT tblock, + MPI_Comm comm, + unsigned flags); + +/* tsolve.c: */ +void XM(transpose_solve)(const plan *ego_, const problem *p_); + +/* plans have same operands as rdft plans, so just re-use */ +typedef plan_rdft plan_mpi_transpose; +#define MKPLAN_MPI_TRANSPOSE(type, adt, apply) \ + (type *)X(mkplan_rdft)(sizeof(type), adt, apply) + +/* transpose-pairwise.c: */ +int XM(mkplans_posttranspose)(const problem_mpi_transpose *p, planner *plnr, + R *I, R *O, int my_pe, + plan **cld2, plan **cld2rest, plan **cld3, + INT *rest_Ioff, INT *rest_Ooff); +/* various solvers */ +void XM(transpose_pairwise_register)(planner *p); +void XM(transpose_alltoall_register)(planner *p); +void XM(transpose_recurse_register)(planner *p); diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/rdft-problem.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/rdft-problem.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "mpi-rdft.h" + +static void destroy(problem *ego_) +{ + problem_mpi_rdft *ego = (problem_mpi_rdft *) ego_; + XM(dtensor_destroy)(ego->sz); + MPI_Comm_free(&ego->comm); +#if !defined(STRUCT_HACK_C99) && !defined(STRUCT_HACK_KR) + X(ifree0)(ego->kind); +#endif + X(ifree)(ego_); +} + +static void hash(const problem *p_, md5 *m) +{ + const problem_mpi_rdft *p = (const problem_mpi_rdft *) p_; + int i; + X(md5puts)(m, "mpi-dft"); + X(md5int)(m, p->I == p->O); + /* don't include alignment -- may differ between processes + X(md5int)(m, X(alignment_of)(p->I)); + X(md5int)(m, X(alignment_of)(p->O)); + ... note that applicability of MPI plans does not depend + on alignment (although optimality may, in principle). */ + XM(dtensor_md5)(m, p->sz); + X(md5INT)(m, p->vn); + for (i = 0; i < p->sz->rnk; ++i) + X(md5int)(m, p->kind[i]); + X(md5int)(m, p->flags); + MPI_Comm_size(p->comm, &i); X(md5int)(m, i); + A(XM(md5_equal)(*m, p->comm)); +} + +static void print(const problem *ego_, printer *p) +{ + const problem_mpi_rdft *ego = (const problem_mpi_rdft *) ego_; + int i; + p->print(p, "(mpi-rdft %d %d %d ", + ego->I == ego->O, + X(alignment_of)(ego->I), + X(alignment_of)(ego->O)); + XM(dtensor_print)(ego->sz, p); + for (i = 0; i < ego->sz->rnk; ++i) + p->print(p, " %d", (int)ego->kind[i]); + p->print(p, " %D %d", ego->vn, ego->flags); + MPI_Comm_size(ego->comm, &i); p->print(p, " %d)", i); +} + +static void zero(const problem *ego_) +{ + const problem_mpi_rdft *ego = (const problem_mpi_rdft *) ego_; + R *I = ego->I; + INT i, N; + int my_pe; + + MPI_Comm_rank(ego->comm, &my_pe); + N = ego->vn * XM(total_block)(ego->sz, IB, my_pe); + for (i = 0; i < N; ++i) I[i] = K(0.0); +} + +static const problem_adt padt = +{ + PROBLEM_MPI_RDFT, + hash, + zero, + print, + destroy +}; + +problem *XM(mkproblem_rdft)(const dtensor *sz, INT vn, + R *I, R *O, + MPI_Comm comm, + const rdft_kind *kind, unsigned flags) +{ + problem_mpi_rdft *ego; + int i, rnk = sz->rnk; + int n_pes; + + A(XM(dtensor_validp)(sz) && FINITE_RNK(sz->rnk)); + MPI_Comm_size(comm, &n_pes); + A(n_pes >= XM(num_blocks_total)(sz, IB) + && n_pes >= XM(num_blocks_total)(sz, OB)); + A(vn >= 0); + +#if defined(STRUCT_HACK_KR) + ego = (problem_mpi_rdft *) X(mkproblem)(sizeof(problem_mpi_rdft) + + sizeof(rdft_kind) + * (rnk > 0 ? rnk - 1 : 0), &padt); +#elif defined(STRUCT_HACK_C99) + ego = (problem_mpi_rdft *) X(mkproblem)(sizeof(problem_mpi_rdft) + + sizeof(rdft_kind) * rnk, &padt); +#else + ego = (problem_mpi_rdft *) X(mkproblem)(sizeof(problem_mpi_rdft), &padt); + ego->kind = (rdft_kind *) MALLOC(sizeof(rdft_kind) * rnk, PROBLEMS); +#endif + + /* enforce pointer equality if untainted pointers are equal */ + if (UNTAINT(I) == UNTAINT(O)) + I = O = JOIN_TAINT(I, O); + + ego->sz = XM(dtensor_canonical)(sz, 0); + ego->vn = vn; + ego->I = I; + ego->O = O; + for (i = 0; i< ego->sz->rnk; ++i) + ego->kind[i] = kind[i]; + + /* canonicalize: replace TRANSPOSED_IN with TRANSPOSED_OUT by + swapping the first two dimensions (for rnk > 1) */ + if ((flags & TRANSPOSED_IN) && ego->sz->rnk > 1) { + rdft_kind k = ego->kind[0]; + ddim dim0 = ego->sz->dims[0]; + ego->sz->dims[0] = ego->sz->dims[1]; + ego->sz->dims[1] = dim0; + ego->kind[0] = ego->kind[1]; + ego->kind[1] = k; + flags &= ~TRANSPOSED_IN; + flags ^= TRANSPOSED_OUT; + } + ego->flags = flags; + + MPI_Comm_dup(comm, &ego->comm); + + return &(ego->super); +} + +problem *XM(mkproblem_rdft_d)(dtensor *sz, INT vn, + R *I, R *O, + MPI_Comm comm, + const rdft_kind *kind, unsigned flags) +{ + problem *p = XM(mkproblem_rdft)(sz, vn, I, O, comm, kind, flags); + XM(dtensor_destroy)(sz); + return p; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/rdft-rank-geq2-transposed.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/rdft-rank-geq2-transposed.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* Complex RDFTs of rank >= 2, for the case where we are distributed + across the first dimension only, and the output is transposed both + in data distribution and in ordering (for the first 2 dimensions). + + (Note that we don't have to handle the case where the input is + transposed, since this is equivalent to transposed output with the + first two dimensions swapped, and is automatically canonicalized as + such by rdft-problem.c. */ + +#include "mpi-rdft.h" +#include "mpi-transpose.h" + +typedef struct { + solver super; + int preserve_input; /* preserve input even if DESTROY_INPUT was passed */ +} S; + +typedef struct { + plan_mpi_rdft super; + + plan *cld1, *cldt, *cld2; + INT roff, ioff; + int preserve_input; +} P; + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld1, *cld2, *cldt; + + /* RDFT local dimensions */ + cld1 = (plan_rdft *) ego->cld1; + if (ego->preserve_input) { + cld1->apply(ego->cld1, I, O); + I = O; + } + else + cld1->apply(ego->cld1, I, I); + + /* global transpose */ + cldt = (plan_rdft *) ego->cldt; + cldt->apply(ego->cldt, I, O); + + /* RDFT final local dimension */ + cld2 = (plan_rdft *) ego->cld2; + cld2->apply(ego->cld2, O, O); +} + +static int applicable(const S *ego, const problem *p_, + const planner *plnr) +{ + const problem_mpi_rdft *p = (const problem_mpi_rdft *) p_; + return (1 + && p->sz->rnk > 1 + && p->flags == TRANSPOSED_OUT + && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr) + && p->I != p->O)) + && XM(is_local_after)(1, p->sz, IB) + && XM(is_local_after)(2, p->sz, OB) + && XM(num_blocks)(p->sz->dims[0].n, p->sz->dims[0].b[OB]) == 1 + && (!NO_SLOWP(plnr) /* slow if rdft-serial is applicable */ + || !XM(rdft_serial_applicable)(p)) + ); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld1, wakefulness); + X(plan_awake)(ego->cldt, wakefulness); + X(plan_awake)(ego->cld2, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld2); + X(plan_destroy_internal)(ego->cldt); + X(plan_destroy_internal)(ego->cld1); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(mpi-rdft-rank-geq2-transposed%s%(%p%)%(%p%)%(%p%))", + ego->preserve_input==2 ?"/p":"", + ego->cld1, ego->cldt, ego->cld2); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_mpi_rdft *p; + P *pln; + plan *cld1 = 0, *cldt = 0, *cld2 = 0; + R *I, *O, *I2; + tensor *sz; + int i, my_pe, n_pes; + INT nrest; + static const plan_adt padt = { + XM(rdft_solve), awake, print, destroy + }; + + UNUSED(ego); + + if (!applicable(ego, p_, plnr)) + return (plan *) 0; + + p = (const problem_mpi_rdft *) p_; + + I2 = I = p->I; + O = p->O; + if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) + I = O; + MPI_Comm_rank(p->comm, &my_pe); + MPI_Comm_size(p->comm, &n_pes); + + sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */ + i = p->sz->rnk - 2; A(i >= 0); + sz->dims[i].n = p->sz->dims[i+1].n; + sz->dims[i].is = sz->dims[i].os = p->vn; + for (--i; i >= 0; --i) { + sz->dims[i].n = p->sz->dims[i+1].n; + sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is; + } + nrest = 1; for (i = 1; i < sz->rnk; ++i) nrest *= sz->dims[i].n; + { + INT is = sz->dims[0].n * sz->dims[0].is; + INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[IB], my_pe); + cld1 = X(mkplan_d)(plnr, + X(mkproblem_rdft_d)(sz, + X(mktensor_2d)(b, is, is, + p->vn, 1, 1), + I2, I, p->kind + 1)); + if (XM(any_true)(!cld1, p->comm)) goto nada; + } + + nrest *= p->vn; + cldt = X(mkplan_d)(plnr, + XM(mkproblem_transpose)( + p->sz->dims[0].n, p->sz->dims[1].n, nrest, + I, O, + p->sz->dims[0].b[IB], p->sz->dims[1].b[OB], + p->comm, 0)); + if (XM(any_true)(!cldt, p->comm)) goto nada; + + { + INT is = p->sz->dims[0].n * nrest; + INT b = XM(block)(p->sz->dims[1].n, p->sz->dims[1].b[OB], my_pe); + cld2 = X(mkplan_d)(plnr, + X(mkproblem_rdft_1_d)(X(mktensor_1d)( + p->sz->dims[0].n, + nrest, nrest), + X(mktensor_2d)(b, is, is, + nrest, 1, 1), + O, O, p->kind[0])); + if (XM(any_true)(!cld2, p->comm)) goto nada; + } + + pln = MKPLAN_MPI_RDFT(P, &padt, apply); + pln->cld1 = cld1; + pln->cldt = cldt; + pln->cld2 = cld2; + pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr); + + X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops); + X(ops_add2)(&cldt->ops, &pln->super.super.ops); + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cld2); + X(plan_destroy_internal)(cldt); + X(plan_destroy_internal)(cld1); + return (plan *) 0; +} + +static solver *mksolver(int preserve_input) +{ + static const solver_adt sadt = { PROBLEM_MPI_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->preserve_input = preserve_input; + return &(slv->super); +} + +void XM(rdft_rank_geq2_transposed_register)(planner *p) +{ + int preserve_input; + for (preserve_input = 0; preserve_input <= 1; ++preserve_input) + REGISTER_SOLVER(p, mksolver(preserve_input)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/rdft-rank-geq2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/rdft-rank-geq2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* Complex RDFTs of rank >= 2, for the case where we are distributed + across the first dimension only, and the output is not transposed. */ + +#include "mpi-rdft.h" + +typedef struct { + solver super; + int preserve_input; /* preserve input even if DESTROY_INPUT was passed */ +} S; + +typedef struct { + plan_mpi_rdft super; + + plan *cld1, *cld2; + int preserve_input; +} P; + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld1, *cld2; + + /* RDFT local dimensions */ + cld1 = (plan_rdft *) ego->cld1; + if (ego->preserve_input) { + cld1->apply(ego->cld1, I, O); + I = O; + } + else + cld1->apply(ego->cld1, I, I); + + /* RDFT non-local dimension (via rdft-rank1-bigvec, usually): */ + cld2 = (plan_rdft *) ego->cld2; + cld2->apply(ego->cld2, I, O); +} + +static int applicable(const S *ego, const problem *p_, + const planner *plnr) +{ + const problem_mpi_rdft *p = (const problem_mpi_rdft *) p_; + return (1 + && p->sz->rnk > 1 + && p->flags == 0 /* TRANSPOSED/SCRAMBLED_IN/OUT not supported */ + && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr) + && p->I != p->O)) + && XM(is_local_after)(1, p->sz, IB) + && XM(is_local_after)(1, p->sz, OB) + && (!NO_SLOWP(plnr) /* slow if rdft-serial is applicable */ + || !XM(rdft_serial_applicable)(p)) + ); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld1, wakefulness); + X(plan_awake)(ego->cld2, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld2); + X(plan_destroy_internal)(ego->cld1); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(mpi-rdft-rank-geq2%s%(%p%)%(%p%))", + ego->preserve_input==2 ?"/p":"", ego->cld1, ego->cld2); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_mpi_rdft *p; + P *pln; + plan *cld1 = 0, *cld2 = 0; + R *I, *O, *I2; + tensor *sz; + dtensor *sz2; + int i, my_pe, n_pes; + INT nrest; + static const plan_adt padt = { + XM(rdft_solve), awake, print, destroy + }; + + UNUSED(ego); + + if (!applicable(ego, p_, plnr)) + return (plan *) 0; + + p = (const problem_mpi_rdft *) p_; + + I2 = I = p->I; + O = p->O; + if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) + I = O; + MPI_Comm_rank(p->comm, &my_pe); + MPI_Comm_size(p->comm, &n_pes); + + sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */ + i = p->sz->rnk - 2; A(i >= 0); + sz->dims[i].n = p->sz->dims[i+1].n; + sz->dims[i].is = sz->dims[i].os = p->vn; + for (--i; i >= 0; --i) { + sz->dims[i].n = p->sz->dims[i+1].n; + sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is; + } + nrest = X(tensor_sz)(sz); + { + INT is = sz->dims[0].n * sz->dims[0].is; + INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[IB], my_pe); + cld1 = X(mkplan_d)(plnr, + X(mkproblem_rdft_d)(sz, + X(mktensor_2d)(b, is, is, + p->vn, 1, 1), + I2, I, p->kind + 1)); + if (XM(any_true)(!cld1, p->comm)) goto nada; + } + + sz2 = XM(mkdtensor)(1); /* tensor for first (distributed) dimension */ + sz2->dims[0] = p->sz->dims[0]; + cld2 = X(mkplan_d)(plnr, XM(mkproblem_rdft_d)(sz2, nrest * p->vn, + I, O, + p->comm, p->kind, + RANK1_BIGVEC_ONLY)); + if (XM(any_true)(!cld2, p->comm)) goto nada; + + pln = MKPLAN_MPI_RDFT(P, &padt, apply); + pln->cld1 = cld1; + pln->cld2 = cld2; + pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr); + + X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops); + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cld2); + X(plan_destroy_internal)(cld1); + return (plan *) 0; +} + +static solver *mksolver(int preserve_input) +{ + static const solver_adt sadt = { PROBLEM_MPI_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->preserve_input = preserve_input; + return &(slv->super); +} + +void XM(rdft_rank_geq2_register)(planner *p) +{ + int preserve_input; + for (preserve_input = 0; preserve_input <= 1; ++preserve_input) + REGISTER_SOLVER(p, mksolver(preserve_input)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/rdft-rank1-bigvec.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/rdft-rank1-bigvec.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* Complex RDFTs of rank == 1 when the vector length vn is >= # processes. + In this case, we don't need to use a six-step type algorithm, and can + instead transpose the RDFT dimension with the vector dimension to + make the RDFT local. */ + +#include "mpi-rdft.h" +#include "mpi-transpose.h" + +typedef struct { + solver super; + int preserve_input; /* preserve input even if DESTROY_INPUT was passed */ + rearrangement rearrange; +} S; + +typedef struct { + plan_mpi_rdft super; + + plan *cldt_before, *cld, *cldt_after; + int preserve_input; + rearrangement rearrange; +} P; + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld, *cldt_before, *cldt_after; + + /* global transpose */ + cldt_before = (plan_rdft *) ego->cldt_before; + cldt_before->apply(ego->cldt_before, I, O); + + if (ego->preserve_input) I = O; + + /* 1d RDFT(s) */ + cld = (plan_rdft *) ego->cld; + cld->apply(ego->cld, O, I); + + /* global transpose */ + cldt_after = (plan_rdft *) ego->cldt_after; + cldt_after->apply(ego->cldt_after, I, O); +} + +static int applicable(const S *ego, const problem *p_, + const planner *plnr) +{ + const problem_mpi_rdft *p = (const problem_mpi_rdft *) p_; + int n_pes; + MPI_Comm_size(p->comm, &n_pes); + return (1 + && p->sz->rnk == 1 + && !(p->flags & ~RANK1_BIGVEC_ONLY) + && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr) + && p->I != p->O)) + +#if 0 /* don't need this check since no other rank-1 rdft solver */ + && (p->vn >= n_pes /* TODO: relax this, using more memory? */ + || (p->flags & RANK1_BIGVEC_ONLY)) +#endif + + && XM(rearrange_applicable)(ego->rearrange, + p->sz->dims[0], p->vn, n_pes) + + && (!NO_SLOWP(plnr) /* slow if rdft-serial is applicable */ + || !XM(rdft_serial_applicable)(p)) + ); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cldt_before, wakefulness); + X(plan_awake)(ego->cld, wakefulness); + X(plan_awake)(ego->cldt_after, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cldt_after); + X(plan_destroy_internal)(ego->cld); + X(plan_destroy_internal)(ego->cldt_before); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const char descrip[][16] = { "contig", "discontig", "square-after", + "square-middle", "square-before" }; + p->print(p, "(mpi-rdft-rank1-bigvec/%s%s %(%p%) %(%p%) %(%p%))", + descrip[ego->rearrange], ego->preserve_input==2 ?"/p":"", + ego->cldt_before, ego->cld, ego->cldt_after); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_mpi_rdft *p; + P *pln; + plan *cld = 0, *cldt_before = 0, *cldt_after = 0; + R *I, *O; + INT yblock, yb, nx, ny, vn; + int my_pe, n_pes; + static const plan_adt padt = { + XM(rdft_solve), awake, print, destroy + }; + + UNUSED(ego); + + if (!applicable(ego, p_, plnr)) + return (plan *) 0; + + p = (const problem_mpi_rdft *) p_; + + MPI_Comm_rank(p->comm, &my_pe); + MPI_Comm_size(p->comm, &n_pes); + + nx = p->sz->dims[0].n; + if (!(ny = XM(rearrange_ny)(ego->rearrange, p->sz->dims[0],p->vn,n_pes))) + return (plan *) 0; + vn = p->vn / ny; + A(ny * vn == p->vn); + + yblock = XM(default_block)(ny, n_pes); + cldt_before = X(mkplan_d)(plnr, + XM(mkproblem_transpose)( + nx, ny, vn, + I = p->I, O = p->O, + p->sz->dims[0].b[IB], yblock, + p->comm, 0)); + if (XM(any_true)(!cldt_before, p->comm)) goto nada; + if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) { I = O; } + + yb = XM(block)(ny, yblock, my_pe); + cld = X(mkplan_d)(plnr, + X(mkproblem_rdft_1_d)(X(mktensor_1d)(nx, vn, vn), + X(mktensor_2d)(yb, vn*nx, vn*nx, + vn, 1, 1), + O, I, p->kind[0])); + if (XM(any_true)(!cld, p->comm)) goto nada; + + cldt_after = X(mkplan_d)(plnr, + XM(mkproblem_transpose)( + ny, nx, vn, + I, O, + yblock, p->sz->dims[0].b[OB], + p->comm, 0)); + if (XM(any_true)(!cldt_after, p->comm)) goto nada; + + pln = MKPLAN_MPI_RDFT(P, &padt, apply); + + pln->cldt_before = cldt_before; + pln->cld = cld; + pln->cldt_after = cldt_after; + pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr); + pln->rearrange = ego->rearrange; + + X(ops_add)(&cldt_before->ops, &cld->ops, &pln->super.super.ops); + X(ops_add2)(&cldt_after->ops, &pln->super.super.ops); + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cldt_after); + X(plan_destroy_internal)(cld); + X(plan_destroy_internal)(cldt_before); + return (plan *) 0; +} + +static solver *mksolver(rearrangement rearrange, int preserve_input) +{ + static const solver_adt sadt = { PROBLEM_MPI_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->rearrange = rearrange; + slv->preserve_input = preserve_input; + return &(slv->super); +} + +void XM(rdft_rank1_bigvec_register)(planner *p) +{ + rearrangement rearrange; + int preserve_input; + FORALL_REARRANGE(rearrange) + for (preserve_input = 0; preserve_input <= 1; ++preserve_input) + REGISTER_SOLVER(p, mksolver(rearrange, preserve_input)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/rdft-serial.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/rdft-serial.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* "MPI" RDFTs where all of the data is on one processor...just + call through to serial API. */ + +#include "mpi-rdft.h" + +typedef struct { + plan_mpi_rdft super; + plan *cld; +} P; + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply(ego->cld, I, O); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(mpi-rdft-serial %(%p%))", ego->cld); +} + +int XM(rdft_serial_applicable)(const problem_mpi_rdft *p) +{ + return (1 + && p->flags == 0 /* TRANSPOSED/SCRAMBLED_IN/OUT not supported */ + && ((XM(is_local)(p->sz, IB) && XM(is_local)(p->sz, OB)) + || p->vn == 0)); +} + +static plan *mkplan(const solver *ego, const problem *p_, planner *plnr) +{ + const problem_mpi_rdft *p = (const problem_mpi_rdft *) p_; + P *pln; + plan *cld; + int my_pe; + static const plan_adt padt = { + XM(rdft_solve), awake, print, destroy + }; + + UNUSED(ego); + + /* check whether applicable: */ + if (!XM(rdft_serial_applicable)(p)) + return (plan *) 0; + + MPI_Comm_rank(p->comm, &my_pe); + if (my_pe == 0 && p->vn > 0) { + int i, rnk = p->sz->rnk; + tensor *sz = X(mktensor)(rnk); + rdft_kind *kind + = (rdft_kind *) MALLOC(sizeof(rdft_kind) * rnk, PROBLEMS); + sz->dims[rnk - 1].is = sz->dims[rnk - 1].os = p->vn; + sz->dims[rnk - 1].n = p->sz->dims[rnk - 1].n; + for (i = rnk - 1; i > 0; --i) { + sz->dims[i - 1].is = sz->dims[i - 1].os = + sz->dims[i].is * sz->dims[i].n; + sz->dims[i - 1].n = p->sz->dims[i - 1].n; + } + for (i = 0; i < rnk; ++i) + kind[i] = p->kind[i]; + + cld = X(mkplan_d)(plnr, + X(mkproblem_rdft_d)(sz, + X(mktensor_1d)(p->vn, 1, 1), + p->I, p->O, kind)); + X(ifree0)(kind); + } + else { /* idle process: make nop plan */ + cld = X(mkplan_d)(plnr, + X(mkproblem_rdft_0_d)(X(mktensor_1d)(0,0,0), + p->I, p->O)); + } + if (XM(any_true)(!cld, p->comm)) return (plan *) 0; + + pln = MKPLAN_MPI_RDFT(P, &padt, apply); + pln->cld = cld; + X(ops_cpy)(&cld->ops, &pln->super.super.ops); + return &(pln->super.super); +} + +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_MPI_RDFT, mkplan, 0 }; + return MKSOLVER(solver, &sadt); +} + +void XM(rdft_serial_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/rdft-solve.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/rdft-solve.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "mpi-rdft.h" + +/* use the apply() operation for MPI_RDFT problems */ +void XM(rdft_solve)(const plan *ego_, const problem *p_) +{ + const plan_mpi_rdft *ego = (const plan_mpi_rdft *) ego_; + const problem_mpi_rdft *p = (const problem_mpi_rdft *) p_; + ego->apply(ego_, UNTAINT(p->I), UNTAINT(p->O)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/rdft2-problem.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/rdft2-problem.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "mpi-rdft2.h" + +static void destroy(problem *ego_) +{ + problem_mpi_rdft2 *ego = (problem_mpi_rdft2 *) ego_; + XM(dtensor_destroy)(ego->sz); + MPI_Comm_free(&ego->comm); + X(ifree)(ego_); +} + +static void hash(const problem *p_, md5 *m) +{ + const problem_mpi_rdft2 *p = (const problem_mpi_rdft2 *) p_; + int i; + X(md5puts)(m, "mpi-rdft2"); + X(md5int)(m, p->I == p->O); + /* don't include alignment -- may differ between processes + X(md5int)(m, X(alignment_of)(p->I)); + X(md5int)(m, X(alignment_of)(p->O)); + ... note that applicability of MPI plans does not depend + on alignment (although optimality may, in principle). */ + XM(dtensor_md5)(m, p->sz); + X(md5INT)(m, p->vn); + X(md5int)(m, p->kind); + X(md5int)(m, p->flags); + MPI_Comm_size(p->comm, &i); X(md5int)(m, i); + A(XM(md5_equal)(*m, p->comm)); +} + +static void print(const problem *ego_, printer *p) +{ + const problem_mpi_rdft2 *ego = (const problem_mpi_rdft2 *) ego_; + int i; + p->print(p, "(mpi-rdft2 %d %d %d ", + ego->I == ego->O, + X(alignment_of)(ego->I), + X(alignment_of)(ego->O)); + XM(dtensor_print)(ego->sz, p); + p->print(p, " %D %d %d", ego->vn, (int) ego->kind, ego->flags); + MPI_Comm_size(ego->comm, &i); p->print(p, " %d)", i); +} + +static void zero(const problem *ego_) +{ + const problem_mpi_rdft2 *ego = (const problem_mpi_rdft2 *) ego_; + R *I = ego->I; + dtensor *sz; + INT i, N; + int my_pe; + + sz = XM(dtensor_copy)(ego->sz); + sz->dims[sz->rnk - 1].n = sz->dims[sz->rnk - 1].n / 2 + 1; + MPI_Comm_rank(ego->comm, &my_pe); + N = 2 * ego->vn * XM(total_block)(sz, IB, my_pe); + XM(dtensor_destroy)(sz); + for (i = 0; i < N; ++i) I[i] = K(0.0); +} + +static const problem_adt padt = +{ + PROBLEM_MPI_RDFT2, + hash, + zero, + print, + destroy +}; + +problem *XM(mkproblem_rdft2)(const dtensor *sz, INT vn, + R *I, R *O, + MPI_Comm comm, + rdft_kind kind, + unsigned flags) +{ + problem_mpi_rdft2 *ego = + (problem_mpi_rdft2 *)X(mkproblem)(sizeof(problem_mpi_rdft2), &padt); + int n_pes; + + A(XM(dtensor_validp)(sz) && FINITE_RNK(sz->rnk) && sz->rnk > 1); + MPI_Comm_size(comm, &n_pes); + A(vn >= 0); + A(kind == R2HC || kind == HC2R); + + /* enforce pointer equality if untainted pointers are equal */ + if (UNTAINT(I) == UNTAINT(O)) + I = O = JOIN_TAINT(I, O); + + ego->sz = XM(dtensor_canonical)(sz, 0); +#ifdef FFTW_DEBUG + ego->sz->dims[sz->rnk - 1].n = sz->dims[sz->rnk - 1].n / 2 + 1; + A(n_pes >= XM(num_blocks_total)(ego->sz, IB) + && n_pes >= XM(num_blocks_total)(ego->sz, OB)); + ego->sz->dims[sz->rnk - 1].n = sz->dims[sz->rnk - 1].n; +#endif + + ego->vn = vn; + ego->I = I; + ego->O = O; + ego->kind = kind; + + /* We only support TRANSPOSED_OUT for r2c and TRANSPOSED_IN for + c2r transforms. */ + + ego->flags = flags; + + MPI_Comm_dup(comm, &ego->comm); + + return &(ego->super); +} + +problem *XM(mkproblem_rdft2_d)(dtensor *sz, INT vn, + R *I, R *O, + MPI_Comm comm, + rdft_kind kind, + unsigned flags) +{ + problem *p = XM(mkproblem_rdft2)(sz, vn, I, O, comm, kind, flags); + XM(dtensor_destroy)(sz); + return p; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/rdft2-rank-geq2-transposed.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/rdft2-rank-geq2-transposed.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* Real-input (r2c) DFTs of rank >= 2, for the case where we are distributed + across the first dimension only, and the output is transposed both + in data distribution and in ordering (for the first 2 dimensions). + + Conversely, real-output (c2r) DFTs where the input is transposed. + + We don't currently support transposed-input r2c or transposed-output + c2r transforms. */ + +#include "mpi-rdft2.h" +#include "mpi-transpose.h" +#include "rdft.h" +#include "dft.h" + +typedef struct { + solver super; + int preserve_input; /* preserve input even if DESTROY_INPUT was passed */ +} S; + +typedef struct { + plan_mpi_rdft2 super; + + plan *cld1, *cldt, *cld2; + INT vn; + int preserve_input; +} P; + +static void apply_r2c(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft2 *cld1; + plan_dft *cld2; + plan_rdft *cldt; + + /* RDFT2 local dimensions */ + cld1 = (plan_rdft2 *) ego->cld1; + if (ego->preserve_input) { + cld1->apply(ego->cld1, I, I+ego->vn, O, O+1); + I = O; + } + else + cld1->apply(ego->cld1, I, I+ego->vn, I, I+1); + + /* global transpose */ + cldt = (plan_rdft *) ego->cldt; + cldt->apply(ego->cldt, I, O); + + /* DFT final local dimension */ + cld2 = (plan_dft *) ego->cld2; + cld2->apply(ego->cld2, O, O+1, O, O+1); +} + +static void apply_c2r(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft2 *cld1; + plan_dft *cld2; + plan_rdft *cldt; + + /* IDFT local dimensions */ + cld2 = (plan_dft *) ego->cld2; + if (ego->preserve_input) { + cld2->apply(ego->cld2, I+1, I, O+1, O); + I = O; + } + else + cld2->apply(ego->cld2, I+1, I, I+1, I); + + /* global transpose */ + cldt = (plan_rdft *) ego->cldt; + cldt->apply(ego->cldt, I, O); + + /* RDFT2 final local dimension */ + cld1 = (plan_rdft2 *) ego->cld1; + cld1->apply(ego->cld1, O, O+ego->vn, O, O+1); +} + +static int applicable(const S *ego, const problem *p_, + const planner *plnr) +{ + const problem_mpi_rdft2 *p = (const problem_mpi_rdft2 *) p_; + return (1 + && p->sz->rnk > 1 + && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr) + && p->I != p->O)) + && ((p->flags == TRANSPOSED_OUT && p->kind == R2HC + && XM(is_local_after)(1, p->sz, IB) + && XM(is_local_after)(2, p->sz, OB) + && XM(num_blocks)(p->sz->dims[0].n, + p->sz->dims[0].b[OB]) == 1) + || + (p->flags == TRANSPOSED_IN && p->kind == HC2R + && XM(is_local_after)(1, p->sz, OB) + && XM(is_local_after)(2, p->sz, IB) + && XM(num_blocks)(p->sz->dims[0].n, + p->sz->dims[0].b[IB]) == 1)) + && (!NO_SLOWP(plnr) /* slow if rdft2-serial is applicable */ + || !XM(rdft2_serial_applicable)(p)) + ); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld1, wakefulness); + X(plan_awake)(ego->cldt, wakefulness); + X(plan_awake)(ego->cld2, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld2); + X(plan_destroy_internal)(ego->cldt); + X(plan_destroy_internal)(ego->cld1); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(mpi-rdft2-rank-geq2-transposed%s%(%p%)%(%p%)%(%p%))", + ego->preserve_input==2 ?"/p":"", + ego->cld1, ego->cldt, ego->cld2); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_mpi_rdft2 *p; + P *pln; + plan *cld1 = 0, *cldt = 0, *cld2 = 0; + R *r0, *r1, *cr, *ci, *ri, *ii, *ro, *io, *I, *O; + tensor *sz; + int i, my_pe, n_pes; + INT nrest, n1, b1; + static const plan_adt padt = { + XM(rdft2_solve), awake, print, destroy + }; + block_kind k1, k2; + + UNUSED(ego); + + if (!applicable(ego, p_, plnr)) + return (plan *) 0; + + p = (const problem_mpi_rdft2 *) p_; + + I = p->I; O = p->O; + if (p->kind == R2HC) { + k1 = IB; k2 = OB; + r1 = (r0 = I) + p->vn; + if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) { + ci = (cr = O) + 1; + I = O; + } + else + ci = (cr = I) + 1; + io = ii = (ro = ri = O) + 1; + } + else { + k1 = OB; k2 = IB; + r1 = (r0 = O) + p->vn; + ci = (cr = O) + 1; + if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) { + ri = (ii = I) + 1; + ro = (io = O) + 1; + I = O; + } + else + ro = ri = (io = ii = I) + 1; + } + + MPI_Comm_rank(p->comm, &my_pe); + MPI_Comm_size(p->comm, &n_pes); + + sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */ + i = p->sz->rnk - 2; A(i >= 0); + sz->dims[i].n = p->sz->dims[i+1].n / 2 + 1; + sz->dims[i].is = sz->dims[i].os = 2 * p->vn; + for (--i; i >= 0; --i) { + sz->dims[i].n = p->sz->dims[i+1].n; + sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is; + } + nrest = 1; for (i = 1; i < sz->rnk; ++i) nrest *= sz->dims[i].n; + { + INT ivs = 1 + (p->kind == HC2R), ovs = 1 + (p->kind == R2HC); + INT is = sz->dims[0].n * sz->dims[0].is; + INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[k1], my_pe); + sz->dims[p->sz->rnk - 2].n = p->sz->dims[p->sz->rnk - 1].n; + cld1 = X(mkplan_d)(plnr, + X(mkproblem_rdft2_d)(sz, + X(mktensor_2d)(b, is, is, + p->vn,ivs,ovs), + r0, r1, cr, ci, p->kind)); + if (XM(any_true)(!cld1, p->comm)) goto nada; + } + + nrest *= p->vn; + n1 = p->sz->dims[1].n; + b1 = p->sz->dims[1].b[k2]; + if (p->sz->rnk == 2) { /* n1 dimension is cut in ~half */ + n1 = n1 / 2 + 1; + b1 = b1 == p->sz->dims[1].n ? n1 : b1; + } + + if (p->kind == R2HC) + cldt = X(mkplan_d)(plnr, + XM(mkproblem_transpose)( + p->sz->dims[0].n, n1, nrest * 2, + I, O, + p->sz->dims[0].b[IB], b1, + p->comm, 0)); + else + cldt = X(mkplan_d)(plnr, + XM(mkproblem_transpose)( + n1, p->sz->dims[0].n, nrest * 2, + I, O, + b1, p->sz->dims[0].b[OB], + p->comm, 0)); + if (XM(any_true)(!cldt, p->comm)) goto nada; + + { + INT is = p->sz->dims[0].n * nrest * 2; + INT b = XM(block)(n1, b1, my_pe); + cld2 = X(mkplan_d)(plnr, + X(mkproblem_dft_d)(X(mktensor_1d)( + p->sz->dims[0].n, + nrest * 2, nrest * 2), + X(mktensor_2d)(b, is, is, + nrest, 2, 2), + ri, ii, ro, io)); + if (XM(any_true)(!cld2, p->comm)) goto nada; + } + + pln = MKPLAN_MPI_RDFT2(P, &padt, p->kind == R2HC ? apply_r2c : apply_c2r); + pln->cld1 = cld1; + pln->cldt = cldt; + pln->cld2 = cld2; + pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr); + pln->vn = p->vn; + + X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops); + X(ops_add2)(&cldt->ops, &pln->super.super.ops); + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cld2); + X(plan_destroy_internal)(cldt); + X(plan_destroy_internal)(cld1); + return (plan *) 0; +} + +static solver *mksolver(int preserve_input) +{ + static const solver_adt sadt = { PROBLEM_MPI_RDFT2, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->preserve_input = preserve_input; + return &(slv->super); +} + +void XM(rdft2_rank_geq2_transposed_register)(planner *p) +{ + int preserve_input; + for (preserve_input = 0; preserve_input <= 1; ++preserve_input) + REGISTER_SOLVER(p, mksolver(preserve_input)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/rdft2-rank-geq2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/rdft2-rank-geq2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* Complex RDFT2s of rank >= 2, for the case where we are distributed + across the first dimension only, and the output is not transposed. */ + +#include "mpi-dft.h" +#include "mpi-rdft2.h" +#include "rdft.h" + +typedef struct { + solver super; + int preserve_input; /* preserve input even if DESTROY_INPUT was passed */ +} S; + +typedef struct { + plan_mpi_rdft2 super; + + plan *cld1, *cld2; + INT vn; + int preserve_input; +} P; + +static void apply_r2c(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft2 *cld1; + plan_rdft *cld2; + + /* RDFT2 local dimensions */ + cld1 = (plan_rdft2 *) ego->cld1; + if (ego->preserve_input) { + cld1->apply(ego->cld1, I, I+ego->vn, O, O+1); + I = O; + } + else + cld1->apply(ego->cld1, I, I+ego->vn, I, I+1); + + /* DFT non-local dimension (via dft-rank1-bigvec, usually): */ + cld2 = (plan_rdft *) ego->cld2; + cld2->apply(ego->cld2, I, O); +} + +static void apply_c2r(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft2 *cld1; + plan_rdft *cld2; + + /* DFT non-local dimension (via dft-rank1-bigvec, usually): */ + cld2 = (plan_rdft *) ego->cld2; + cld2->apply(ego->cld2, I, O); + + /* RDFT2 local dimensions */ + cld1 = (plan_rdft2 *) ego->cld1; + cld1->apply(ego->cld1, O, O+ego->vn, O, O+1); + +} + +static int applicable(const S *ego, const problem *p_, + const planner *plnr) +{ + const problem_mpi_rdft2 *p = (const problem_mpi_rdft2 *) p_; + return (1 + && p->sz->rnk > 1 + && p->flags == 0 /* TRANSPOSED/SCRAMBLED_IN/OUT not supported */ + && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr) + && p->I != p->O + && p->kind == R2HC)) + && XM(is_local_after)(1, p->sz, IB) + && XM(is_local_after)(1, p->sz, OB) + && (!NO_SLOWP(plnr) /* slow if rdft2-serial is applicable */ + || !XM(rdft2_serial_applicable)(p)) + ); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld1, wakefulness); + X(plan_awake)(ego->cld2, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld2); + X(plan_destroy_internal)(ego->cld1); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(mpi-rdft2-rank-geq2%s%(%p%)%(%p%))", + ego->preserve_input==2 ?"/p":"", ego->cld1, ego->cld2); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_mpi_rdft2 *p; + P *pln; + plan *cld1 = 0, *cld2 = 0; + R *r0, *r1, *cr, *ci, *I, *O; + tensor *sz; + dtensor *sz2; + int i, my_pe, n_pes; + INT nrest; + static const plan_adt padt = { + XM(rdft2_solve), awake, print, destroy + }; + + UNUSED(ego); + + if (!applicable(ego, p_, plnr)) + return (plan *) 0; + + p = (const problem_mpi_rdft2 *) p_; + + I = p->I; O = p->O; + if (p->kind == R2HC) { + r1 = (r0 = p->I) + p->vn; + if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) { + ci = (cr = p->O) + 1; + I = O; + } + else + ci = (cr = p->I) + 1; + } + else { + r1 = (r0 = p->O) + p->vn; + ci = (cr = p->O) + 1; + } + + MPI_Comm_rank(p->comm, &my_pe); + MPI_Comm_size(p->comm, &n_pes); + + sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */ + i = p->sz->rnk - 2; A(i >= 0); + sz->dims[i].is = sz->dims[i].os = 2 * p->vn; + sz->dims[i].n = p->sz->dims[i+1].n / 2 + 1; + for (--i; i >= 0; --i) { + sz->dims[i].n = p->sz->dims[i+1].n; + sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is; + } + nrest = X(tensor_sz)(sz); + { + INT ivs = 1 + (p->kind == HC2R), ovs = 1 + (p->kind == R2HC); + INT is = sz->dims[0].n * sz->dims[0].is; + INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[IB], my_pe); + sz->dims[p->sz->rnk - 2].n = p->sz->dims[p->sz->rnk - 1].n; + cld1 = X(mkplan_d)(plnr, + X(mkproblem_rdft2_d)(sz, + X(mktensor_2d)(b, is, is, + p->vn,ivs,ovs), + r0, r1, cr, ci, p->kind)); + if (XM(any_true)(!cld1, p->comm)) goto nada; + } + + sz2 = XM(mkdtensor)(1); /* tensor for first (distributed) dimension */ + sz2->dims[0] = p->sz->dims[0]; + cld2 = X(mkplan_d)(plnr, XM(mkproblem_dft_d)(sz2, nrest * p->vn, + I, O, p->comm, + p->kind == R2HC ? + FFT_SIGN : -FFT_SIGN, + RANK1_BIGVEC_ONLY)); + if (XM(any_true)(!cld2, p->comm)) goto nada; + + pln = MKPLAN_MPI_RDFT2(P, &padt, p->kind == R2HC ? apply_r2c : apply_c2r); + pln->cld1 = cld1; + pln->cld2 = cld2; + pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr); + pln->vn = p->vn; + + X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops); + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cld2); + X(plan_destroy_internal)(cld1); + return (plan *) 0; +} + +static solver *mksolver(int preserve_input) +{ + static const solver_adt sadt = { PROBLEM_MPI_RDFT2, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->preserve_input = preserve_input; + return &(slv->super); +} + +void XM(rdft2_rank_geq2_register)(planner *p) +{ + int preserve_input; + for (preserve_input = 0; preserve_input <= 1; ++preserve_input) + REGISTER_SOLVER(p, mksolver(preserve_input)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/rdft2-serial.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/rdft2-serial.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* "MPI" DFTs where all of the data is on one processor...just + call through to serial API. */ + +#include "mpi-rdft2.h" +#include "rdft.h" + +typedef struct { + plan_mpi_rdft2 super; + plan *cld; + INT vn; +} P; + +static void apply_r2c(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft2 *cld; + cld = (plan_rdft2 *) ego->cld; + cld->apply(ego->cld, I, I+ego->vn, O, O+1); +} + +static void apply_c2r(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft2 *cld; + cld = (plan_rdft2 *) ego->cld; + cld->apply(ego->cld, O, O+ego->vn, I, I+1); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(mpi-rdft2-serial %(%p%))", ego->cld); +} + +int XM(rdft2_serial_applicable)(const problem_mpi_rdft2 *p) +{ + return (1 + && p->flags == 0 /* TRANSPOSED/SCRAMBLED_IN/OUT not supported */ + && ((XM(is_local)(p->sz, IB) && XM(is_local)(p->sz, OB)) + || p->vn == 0)); +} + +static plan *mkplan(const solver *ego, const problem *p_, planner *plnr) +{ + const problem_mpi_rdft2 *p = (const problem_mpi_rdft2 *) p_; + P *pln; + plan *cld; + int my_pe; + R *r0, *r1, *cr, *ci; + static const plan_adt padt = { + XM(rdft2_solve), awake, print, destroy + }; + + UNUSED(ego); + + /* check whether applicable: */ + if (!XM(rdft2_serial_applicable)(p)) + return (plan *) 0; + + if (p->kind == R2HC) { + r1 = (r0 = p->I) + p->vn; + ci = (cr = p->O) + 1; + } + else { + r1 = (r0 = p->O) + p->vn; + ci = (cr = p->I) + 1; + } + + MPI_Comm_rank(p->comm, &my_pe); + if (my_pe == 0 && p->vn > 0) { + INT ivs = 1 + (p->kind == HC2R), ovs = 1 + (p->kind == R2HC); + int i, rnk = p->sz->rnk; + tensor *sz = X(mktensor)(p->sz->rnk); + sz->dims[rnk - 1].is = sz->dims[rnk - 1].os = 2 * p->vn; + sz->dims[rnk - 1].n = p->sz->dims[rnk - 1].n / 2 + 1; + for (i = rnk - 1; i > 0; --i) { + sz->dims[i - 1].is = sz->dims[i - 1].os = + sz->dims[i].is * sz->dims[i].n; + sz->dims[i - 1].n = p->sz->dims[i - 1].n; + } + sz->dims[rnk - 1].n = p->sz->dims[rnk - 1].n; + + cld = X(mkplan_d)(plnr, + X(mkproblem_rdft2_d)(sz, + X(mktensor_1d)(p->vn,ivs,ovs), + r0, r1, cr, ci, p->kind)); + } + else { /* idle process: make nop plan */ + cld = X(mkplan_d)(plnr, + X(mkproblem_rdft2_d)(X(mktensor_0d)(), + X(mktensor_1d)(0,0,0), + cr, ci, cr, ci, HC2R)); + } + if (XM(any_true)(!cld, p->comm)) return (plan *) 0; + + pln = MKPLAN_MPI_RDFT2(P, &padt, p->kind == R2HC ? apply_r2c : apply_c2r); + pln->cld = cld; + pln->vn = p->vn; + X(ops_cpy)(&cld->ops, &pln->super.super.ops); + return &(pln->super.super); +} + +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_MPI_RDFT2, mkplan, 0 }; + return MKSOLVER(solver, &sadt); +} + +void XM(rdft2_serial_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/rdft2-solve.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/rdft2-solve.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "mpi-rdft2.h" + +/* use the apply() operation for MPI_RDFT2 problems */ +void XM(rdft2_solve)(const plan *ego_, const problem *p_) +{ + const plan_mpi_rdft2 *ego = (const plan_mpi_rdft2 *) ego_; + const problem_mpi_rdft2 *p = (const problem_mpi_rdft2 *) p_; + ego->apply(ego_, UNTAINT(p->I), UNTAINT(p->O)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/rearrange.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/rearrange.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw-mpi.h" + +/* common functions for rearrangements of the data for the *-rank1-bigvec + solvers */ + +static int div_mult(INT b, INT a) { + return (a > b && a % b == 0); +} +static int div_mult2(INT b, INT a, INT n) { + return (div_mult(b, a) && div_mult(n, b)); +} + +int XM(rearrange_applicable)(rearrangement rearrange, + ddim dim0, INT vn, int n_pes) +{ + /* note: it is important that cases other than CONTIG be + applicable only when the resulting transpose dimension + is divisible by n_pes; otherwise, the allocation size + returned by the API will be incorrect */ + return ((rearrange != DISCONTIG || div_mult(n_pes, vn)) + && (rearrange != SQUARE_BEFORE + || div_mult2(dim0.b[IB], vn, n_pes)) + && (rearrange != SQUARE_AFTER + || (dim0.b[IB] != dim0.b[OB] + && div_mult2(dim0.b[OB], vn, n_pes))) + && (rearrange != SQUARE_MIDDLE + || div_mult(dim0.n * n_pes, vn))); +} + +INT XM(rearrange_ny)(rearrangement rearrange, ddim dim0, INT vn, int n_pes) +{ + switch (rearrange) { + case CONTIG: + return vn; + case DISCONTIG: + return n_pes; + case SQUARE_BEFORE: + return dim0.b[IB]; + case SQUARE_AFTER: + return dim0.b[OB]; + case SQUARE_MIDDLE: + return dim0.n * n_pes; + } + return 0; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/testsched.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/testsched.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,552 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 1999-2003, 2007-8 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/**********************************************************************/ +/* This is a modified and combined version of the sched.c and + test_sched.c files shipped with FFTW 2, written to implement and + test various all-to-all communications scheduling patterns. + + It is not used in FFTW 3, but I keep it around in case we ever want + to play with this again or to change algorithms. In particular, I + used it to implement and test the fill1_comm_sched routine in + transpose-pairwise.c, which allows us to create a schedule for one + process at a time and is much more compact than the FFTW 2 code. + + Note that the scheduling algorithm is somewhat modified from that + of FFTW 2. Originally, I thought that one "stall" in the schedule + was unavoidable for odd numbers of processes, since this is the + case for the soccer-timetabling problem. However, because of the + self-communication step, we can use the self-communication to fill + in the stalls. (Thanks to Ralf Wildenhues for pointing this out.) + This greatly simplifies the process re-sorting algorithm. */ + +/**********************************************************************/ + +#include +#include + +/* This file contains routines to compute communications schedules for + all-to-all communications (complete exchanges) that are performed + in-place. (That is, the block that processor x sends to processor + y gets replaced on processor x by a block received from processor y.) + + A schedule, int **sched, is a two-dimensional array where + sched[pe][i] is the processor that pe expects to exchange a message + with on the i-th step of the exchange. sched[pe][i] == -1 for the + i after the last exchange scheduled on pe. + + Here, processors (pe's, for processing elements), are numbered from + 0 to npes-1. + + There are a couple of constraints that a schedule should satisfy + (besides the obvious one that every processor has to communicate + with every other processor exactly once). + + * First, and most importantly, there must be no deadlocks. + + * Second, we would like to overlap communications as much as possible, + so that all exchanges occur in parallel. It turns out that perfect + overlap is possible for all number of processes (npes). + + It turns out that this scheduling problem is actually well-studied, + and good solutions are known. The problem is known as a + "time-tabling" problem, and is specifically the problem of + scheduling a sports competition (where n teams must compete exactly + once with every other team). The problem is discussed and + algorithms are presented in: + + [1] J. A. M. Schreuder, "Constructing Timetables for Sport + Competitions," Mathematical Programming Study 13, pp. 58-67 (1980). + + [2] A. Schaerf, "Scheduling Sport Tournaments using Constraint + Logic Programming," Proc. of 12th Europ. Conf. on + Artif. Intell. (ECAI-96), pp. 634-639 (Budapest 1996). + http://hermes.dis.uniromal.it/~aschaerf/publications.html + + (These people actually impose a lot of additional constraints that + we don't care about, so they are solving harder problems. [1] gives + a simple enough algorithm for our purposes, though.) + + In the timetabling problem, N teams can all play one another in N-1 + steps if N is even, and N steps if N is odd. Here, however, + there is a "self-communication" step (a team must also "play itself") + and so we can always make an optimal N-step schedule regardless of N. + + However, we have to do more: for a particular processor, the + communications schedule must be sorted in ascending or descending + order of processor index. (This is necessary so that the data + coming in for the transpose does not overwrite data that will be + sent later; for that processor the incoming and outgoing blocks are + of different non-zero sizes.) Fortunately, because the schedule + is stall free, each parallel step of the schedule is independent + of every other step, and we can reorder the steps arbitrarily + to achieve any desired order on a particular process. +*/ + +void free_comm_schedule(int **sched, int npes) +{ + if (sched) { + int i; + + for (i = 0; i < npes; ++i) + free(sched[i]); + free(sched); + } +} + +void empty_comm_schedule(int **sched, int npes) +{ + int i; + for (i = 0; i < npes; ++i) + sched[i][0] = -1; +} + +extern void fill_comm_schedule(int **sched, int npes); + +/* Create a new communications schedule for a given number of processors. + The schedule is initialized to a deadlock-free, maximum overlap + schedule. Returns NULL on an error (may print a message to + stderr if there is a program bug detected). */ +int **make_comm_schedule(int npes) +{ + int **sched; + int i; + + sched = (int **) malloc(sizeof(int *) * npes); + if (!sched) + return NULL; + + for (i = 0; i < npes; ++i) + sched[i] = NULL; + + for (i = 0; i < npes; ++i) { + sched[i] = (int *) malloc(sizeof(int) * 10 * (npes + 1)); + if (!sched[i]) { + free_comm_schedule(sched,npes); + return NULL; + } + } + + empty_comm_schedule(sched,npes); + fill_comm_schedule(sched,npes); + + if (!check_comm_schedule(sched,npes)) { + free_comm_schedule(sched,npes); + return NULL; + } + + return sched; +} + +static void add_dest_to_comm_schedule(int **sched, int pe, int dest) +{ + int i; + + for (i = 0; sched[pe][i] != -1; ++i) + ; + + sched[pe][i] = dest; + sched[pe][i+1] = -1; +} + +static void add_pair_to_comm_schedule(int **sched, int pe1, int pe2) +{ + add_dest_to_comm_schedule(sched, pe1, pe2); + if (pe1 != pe2) + add_dest_to_comm_schedule(sched, pe2, pe1); +} + +/* Simplification of algorithm presented in [1] (we have fewer + constraints). Produces a perfect schedule (npes steps). */ + +void fill_comm_schedule(int **sched, int npes) +{ + int pe, i, n; + + if (npes % 2 == 0) { + n = npes; + for (pe = 0; pe < npes; ++pe) + add_pair_to_comm_schedule(sched,pe,pe); + } + else + n = npes + 1; + + for (pe = 0; pe < n - 1; ++pe) { + add_pair_to_comm_schedule(sched, pe, npes % 2 == 0 ? npes - 1 : pe); + + for (i = 1; i < n/2; ++i) { + int pe_a, pe_b; + + pe_a = pe - i; + if (pe_a < 0) + pe_a += n - 1; + + pe_b = (pe + i) % (n - 1); + + add_pair_to_comm_schedule(sched,pe_a,pe_b); + } + } +} + +/* given an array sched[npes], fills it with the communications + schedule for process pe. */ +void fill1_comm_sched(int *sched, int which_pe, int npes) +{ + int pe, i, n, s = 0; + if (npes % 2 == 0) { + n = npes; + sched[s++] = which_pe; + } + else + n = npes + 1; + for (pe = 0; pe < n - 1; ++pe) { + if (npes % 2 == 0) { + if (pe == which_pe) sched[s++] = npes - 1; + else if (npes - 1 == which_pe) sched[s++] = pe; + } + else if (pe == which_pe) sched[s++] = pe; + + if (pe != which_pe && which_pe < n - 1) { + i = (pe - which_pe + (n - 1)) % (n - 1); + if (i < n/2) + sched[s++] = (pe + i) % (n - 1); + + i = (which_pe - pe + (n - 1)) % (n - 1); + if (i < n/2) + sched[s++] = (pe - i + (n - 1)) % (n - 1); + } + } + if (s != npes) { + fprintf(stderr, "bug in fill1_com_schedule (%d, %d/%d)\n", + s, which_pe, npes); + exit(EXIT_FAILURE); + } +} + +/* sort the communication schedule sched for npes so that the schedule + on process sortpe is ascending or descending (!ascending). */ +static void sort1_comm_sched(int *sched, int npes, int sortpe, int ascending) +{ + int *sortsched, i; + sortsched = (int *) malloc(npes * sizeof(int) * 2); + fill1_comm_sched(sortsched, sortpe, npes); + if (ascending) + for (i = 0; i < npes; ++i) + sortsched[npes + sortsched[i]] = sched[i]; + else + for (i = 0; i < npes; ++i) + sortsched[2*npes - 1 - sortsched[i]] = sched[i]; + for (i = 0; i < npes; ++i) + sched[i] = sortsched[npes + i]; + free(sortsched); +} + +/* Below, we have various checks in case of bugs: */ + +/* check for deadlocks by simulating the schedule and looking for + cycles in the dependency list; returns 0 if there are deadlocks + (or other errors) */ +static int check_schedule_deadlock(int **sched, int npes) +{ + int *step, *depend, *visited, pe, pe2, period, done = 0; + int counter = 0; + + /* step[pe] is the step in the schedule that a given pe is on */ + step = (int *) malloc(sizeof(int) * npes); + + /* depend[pe] is the pe' that pe is currently waiting for a message + from (-1 if none) */ + depend = (int *) malloc(sizeof(int) * npes); + + /* visited[pe] tells whether we have visited the current pe already + when we are looking for cycles. */ + visited = (int *) malloc(sizeof(int) * npes); + + if (!step || !depend || !visited) { + free(step); free(depend); free(visited); + return 0; + } + + for (pe = 0; pe < npes; ++pe) + step[pe] = 0; + + while (!done) { + ++counter; + + for (pe = 0; pe < npes; ++pe) + depend[pe] = sched[pe][step[pe]]; + + /* now look for cycles in the dependencies with period > 2: */ + for (pe = 0; pe < npes; ++pe) + if (depend[pe] != -1) { + for (pe2 = 0; pe2 < npes; ++pe2) + visited[pe2] = 0; + + period = 0; + pe2 = pe; + do { + visited[pe2] = period + 1; + pe2 = depend[pe2]; + period++; + } while (pe2 != -1 && !visited[pe2]); + + if (pe2 == -1) { + fprintf(stderr, + "BUG: unterminated cycle in schedule!\n"); + free(step); free(depend); + free(visited); + return 0; + } + if (period - (visited[pe2] - 1) > 2) { + fprintf(stderr,"BUG: deadlock in schedule!\n"); + free(step); free(depend); + free(visited); + return 0; + } + + if (pe2 == pe) + step[pe]++; + } + + done = 1; + for (pe = 0; pe < npes; ++pe) + if (sched[pe][step[pe]] != -1) { + done = 0; + break; + } + } + + free(step); free(depend); free(visited); + return (counter > 0 ? counter : 1); +} + +/* sanity checks; prints message and returns 0 on failure. + undocumented feature: the return value on success is actually the + number of steps required for the schedule to complete, counting + stalls. */ +int check_comm_schedule(int **sched, int npes) +{ + int pe, i, comm_pe; + + for (pe = 0; pe < npes; ++pe) { + for (comm_pe = 0; comm_pe < npes; ++comm_pe) { + for (i = 0; sched[pe][i] != -1 && sched[pe][i] != comm_pe; ++i) + ; + if (sched[pe][i] == -1) { + fprintf(stderr,"BUG: schedule never sends message from " + "%d to %d.\n",pe,comm_pe); + return 0; /* never send message to comm_pe */ + } + } + for (i = 0; sched[pe][i] != -1; ++i) + ; + if (i != npes) { + fprintf(stderr,"BUG: schedule sends too many messages from " + "%d\n",pe); + return 0; + } + } + return check_schedule_deadlock(sched,npes); +} + +/* invert the order of all the schedules; this has no effect on + its required properties. */ +void invert_comm_schedule(int **sched, int npes) +{ + int pe, i; + + for (pe = 0; pe < npes; ++pe) + for (i = 0; i < npes/2; ++i) { + int dummy = sched[pe][i]; + sched[pe][i] = sched[pe][npes-1-i]; + sched[pe][npes-1-i] = dummy; + } +} + +/* Sort the schedule for sort_pe in ascending order of processor + index. Unfortunately, for odd npes (when schedule has a stall + to begin with) this will introduce an extra stall due to + the motion of the self-communication past a stall. We could + fix this if it were really important. Actually, we don't + get an extra stall when sort_pe == 0 or npes-1, which is sufficient + for our purposes. */ +void sort_comm_schedule(int **sched, int npes, int sort_pe) +{ + int i,j,pe; + + /* Note that we can do this sort in O(npes) swaps because we know + that the numbers we are sorting are just 0...npes-1. But we'll + just do a bubble sort for simplicity here. */ + + for (i = 0; i < npes - 1; ++i) + for (j = i + 1; j < npes; ++j) + if (sched[sort_pe][i] > sched[sort_pe][j]) { + for (pe = 0; pe < npes; ++pe) { + int s = sched[pe][i]; + sched[pe][i] = sched[pe][j]; + sched[pe][j] = s; + } + } +} + +/* print the schedule (for debugging purposes) */ +void print_comm_schedule(int **sched, int npes) +{ + int pe, i, width; + + if (npes < 10) + width = 1; + else if (npes < 100) + width = 2; + else + width = 3; + + for (pe = 0; pe < npes; ++pe) { + printf("pe %*d schedule:", width, pe); + for (i = 0; sched[pe][i] != -1; ++i) + printf(" %*d",width,sched[pe][i]); + printf("\n"); + } +} + +int main(int argc, char **argv) +{ + int **sched; + int npes = -1, sortpe = -1, steps, i; + + if (argc >= 2) { + npes = atoi(argv[1]); + if (npes <= 0) { + fprintf(stderr,"npes must be positive!"); + return 1; + } + } + if (argc >= 3) { + sortpe = atoi(argv[2]); + if (sortpe < 0 || sortpe >= npes) { + fprintf(stderr,"sortpe must be between 0 and npes-1.\n"); + return 1; + } + } + + if (npes != -1) { + printf("Computing schedule for npes = %d:\n",npes); + sched = make_comm_schedule(npes); + if (!sched) { + fprintf(stderr,"Out of memory!"); + return 6; + } + + if (steps = check_comm_schedule(sched,npes)) + printf("schedule OK (takes %d steps to complete).\n", steps); + else + printf("schedule not OK.\n"); + + print_comm_schedule(sched, npes); + + if (sortpe != -1) { + printf("\nRe-creating schedule for pe = %d...\n", sortpe); + int *sched1 = (int*) malloc(sizeof(int) * npes); + for (i = 0; i < npes; ++i) sched1[i] = -1; + fill1_comm_sched(sched1, sortpe, npes); + printf(" ="); + for (i = 0; i < npes; ++i) + printf(" %*d", npes < 10 ? 1 : (npes < 100 ? 2 : 3), + sched1[i]); + printf("\n"); + + printf("\nSorting schedule for sortpe = %d...\n", sortpe); + sort_comm_schedule(sched,npes,sortpe); + + if (steps = check_comm_schedule(sched,npes)) + printf("schedule OK (takes %d steps to complete).\n", + steps); + else + printf("schedule not OK.\n"); + + print_comm_schedule(sched, npes); + + printf("\nInverting schedule...\n"); + invert_comm_schedule(sched,npes); + + if (steps = check_comm_schedule(sched,npes)) + printf("schedule OK (takes %d steps to complete).\n", + steps); + else + printf("schedule not OK.\n"); + + print_comm_schedule(sched, npes); + + free_comm_schedule(sched,npes); + + free(sched1); + } + } + else { + printf("Doing infinite tests...\n"); + for (npes = 1; ; ++npes) { + int *sched1 = (int*) malloc(sizeof(int) * npes); + printf("npes = %d...",npes); + sched = make_comm_schedule(npes); + if (!sched) { + fprintf(stderr,"Out of memory!\n"); + return 5; + } + for (sortpe = 0; sortpe < npes; ++sortpe) { + empty_comm_schedule(sched,npes); + fill_comm_schedule(sched,npes); + if (!check_comm_schedule(sched,npes)) { + fprintf(stderr, + "\n -- fill error for sortpe = %d!\n",sortpe); + return 2; + } + + for (i = 0; i < npes; ++i) sched1[i] = -1; + fill1_comm_sched(sched1, sortpe, npes); + for (i = 0; i < npes; ++i) + if (sched1[i] != sched[sortpe][i]) + fprintf(stderr, + "\n -- fill1 error for pe = %d!\n", + sortpe); + + sort_comm_schedule(sched,npes,sortpe); + if (!check_comm_schedule(sched,npes)) { + fprintf(stderr, + "\n -- sort error for sortpe = %d!\n",sortpe); + return 3; + } + invert_comm_schedule(sched,npes); + if (!check_comm_schedule(sched,npes)) { + fprintf(stderr, + "\n -- invert error for sortpe = %d!\n", + sortpe); + return 4; + } + } + free_comm_schedule(sched,npes); + printf("OK\n"); + if (npes % 50 == 0) + printf("(...Hit Ctrl-C to stop...)\n"); + free(sched1); + } + } + + return 0; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/transpose-alltoall.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/transpose-alltoall.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* plans for distributed out-of-place transpose using MPI_Alltoall, + and which destroy the input array (unless TRANSPOSED_IN is used) */ + +#include "mpi-transpose.h" +#include + +typedef struct { + solver super; + int copy_transposed_in; /* whether to copy the input for TRANSPOSED_IN, + which makes the final transpose out-of-place + but costs an extra copy and requires us + to destroy the input */ +} S; + +typedef struct { + plan_mpi_transpose super; + + plan *cld1, *cld2, *cld2rest, *cld3; + + MPI_Comm comm; + int *send_block_sizes, *send_block_offsets; + int *recv_block_sizes, *recv_block_offsets; + + INT rest_Ioff, rest_Ooff; + + int equal_blocks; +} P; + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld1, *cld2, *cld2rest, *cld3; + + /* transpose locally to get contiguous chunks */ + cld1 = (plan_rdft *) ego->cld1; + if (cld1) { + cld1->apply(ego->cld1, I, O); + + /* transpose chunks globally */ + if (ego->equal_blocks) + MPI_Alltoall(O, ego->send_block_sizes[0], FFTW_MPI_TYPE, + I, ego->recv_block_sizes[0], FFTW_MPI_TYPE, + ego->comm); + else + MPI_Alltoallv(O, ego->send_block_sizes, ego->send_block_offsets, + FFTW_MPI_TYPE, + I, ego->recv_block_sizes, ego->recv_block_offsets, + FFTW_MPI_TYPE, + ego->comm); + } + else { /* TRANSPOSED_IN, no need to destroy input */ + /* transpose chunks globally */ + if (ego->equal_blocks) + MPI_Alltoall(I, ego->send_block_sizes[0], FFTW_MPI_TYPE, + O, ego->recv_block_sizes[0], FFTW_MPI_TYPE, + ego->comm); + else + MPI_Alltoallv(I, ego->send_block_sizes, ego->send_block_offsets, + FFTW_MPI_TYPE, + O, ego->recv_block_sizes, ego->recv_block_offsets, + FFTW_MPI_TYPE, + ego->comm); + I = O; /* final transpose (if any) is in-place */ + } + + /* transpose locally, again, to get ordinary row-major */ + cld2 = (plan_rdft *) ego->cld2; + if (cld2) { + cld2->apply(ego->cld2, I, O); + cld2rest = (plan_rdft *) ego->cld2rest; + if (cld2rest) { /* leftover from unequal block sizes */ + cld2rest->apply(ego->cld2rest, + I + ego->rest_Ioff, O + ego->rest_Ooff); + cld3 = (plan_rdft *) ego->cld3; + if (cld3) + cld3->apply(ego->cld3, O, O); + /* else TRANSPOSED_OUT is true and user wants O transposed */ + } + } +} + +static int applicable(const S *ego, const problem *p_, + const planner *plnr) +{ + const problem_mpi_transpose *p = (const problem_mpi_transpose *) p_; + return (1 + && p->I != p->O + && (!NO_DESTROY_INPUTP(plnr) || + ((p->flags & TRANSPOSED_IN) && !ego->copy_transposed_in)) + && ((p->flags & TRANSPOSED_IN) || !ego->copy_transposed_in) + && ONLY_TRANSPOSEDP(p->flags) + ); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld1, wakefulness); + X(plan_awake)(ego->cld2, wakefulness); + X(plan_awake)(ego->cld2rest, wakefulness); + X(plan_awake)(ego->cld3, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(ifree0)(ego->send_block_sizes); + MPI_Comm_free(&ego->comm); + X(plan_destroy_internal)(ego->cld3); + X(plan_destroy_internal)(ego->cld2rest); + X(plan_destroy_internal)(ego->cld2); + X(plan_destroy_internal)(ego->cld1); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(mpi-transpose-alltoall%s%(%p%)%(%p%)%(%p%)%(%p%))", + ego->equal_blocks ? "/e" : "", + ego->cld1, ego->cld2, ego->cld2rest, ego->cld3); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_mpi_transpose *p; + P *pln; + plan *cld1 = 0, *cld2 = 0, *cld2rest = 0, *cld3 = 0; + INT b, bt, vn, rest_Ioff, rest_Ooff; + R *I; + int *sbs, *sbo, *rbs, *rbo; + int pe, my_pe, n_pes; + int equal_blocks = 1; + static const plan_adt padt = { + XM(transpose_solve), awake, print, destroy + }; + + if (!applicable(ego, p_, plnr)) + return (plan *) 0; + + p = (const problem_mpi_transpose *) p_; + vn = p->vn; + + MPI_Comm_rank(p->comm, &my_pe); + MPI_Comm_size(p->comm, &n_pes); + + b = XM(block)(p->nx, p->block, my_pe); + + if (p->flags & TRANSPOSED_IN) { /* I is already transposed */ + if (ego->copy_transposed_in) { + cld1 = X(mkplan_f_d)(plnr, + X(mkproblem_rdft_0_d)(X(mktensor_1d) + (b * p->ny * vn, 1, 1), + I = p->I, p->O), + 0, 0, NO_SLOW); + if (XM(any_true)(!cld1, p->comm)) goto nada; + } + else + I = p->O; /* final transpose is in-place */ + } + else { /* transpose b x ny x vn -> ny x b x vn */ + cld1 = X(mkplan_f_d)(plnr, + X(mkproblem_rdft_0_d)(X(mktensor_3d) + (b, p->ny * vn, vn, + p->ny, vn, b * vn, + vn, 1, 1), + I = p->I, p->O), + 0, 0, NO_SLOW); + if (XM(any_true)(!cld1, p->comm)) goto nada; + } + + if (XM(any_true)(!XM(mkplans_posttranspose)(p, plnr, I, p->O, my_pe, + &cld2, &cld2rest, &cld3, + &rest_Ioff, &rest_Ooff), + p->comm)) goto nada; + + pln = MKPLAN_MPI_TRANSPOSE(P, &padt, apply); + + pln->cld1 = cld1; + pln->cld2 = cld2; + pln->cld2rest = cld2rest; + pln->rest_Ioff = rest_Ioff; + pln->rest_Ooff = rest_Ooff; + pln->cld3 = cld3; + + MPI_Comm_dup(p->comm, &pln->comm); + + /* Compute sizes/offsets of blocks to send for all-to-all command. */ + sbs = (int *) MALLOC(4 * n_pes * sizeof(int), PLANS); + sbo = sbs + n_pes; + rbs = sbo + n_pes; + rbo = rbs + n_pes; + b = XM(block)(p->nx, p->block, my_pe); + bt = XM(block)(p->ny, p->tblock, my_pe); + for (pe = 0; pe < n_pes; ++pe) { + INT db, dbt; /* destination block sizes */ + db = XM(block)(p->nx, p->block, pe); + dbt = XM(block)(p->ny, p->tblock, pe); + if (db != p->block || dbt != p->tblock) + equal_blocks = 0; + + /* MPI requires type "int" here; apparently it + has no 64-bit API? Grrr. */ + sbs[pe] = (int) (b * dbt * vn); + sbo[pe] = (int) (pe * (b * p->tblock) * vn); + rbs[pe] = (int) (db * bt * vn); + rbo[pe] = (int) (pe * (p->block * bt) * vn); + } + pln->send_block_sizes = sbs; + pln->send_block_offsets = sbo; + pln->recv_block_sizes = rbs; + pln->recv_block_offsets = rbo; + pln->equal_blocks = equal_blocks; + + X(ops_zero)(&pln->super.super.ops); + if (cld1) X(ops_add2)(&cld1->ops, &pln->super.super.ops); + if (cld2) X(ops_add2)(&cld2->ops, &pln->super.super.ops); + if (cld2rest) X(ops_add2)(&cld2rest->ops, &pln->super.super.ops); + if (cld3) X(ops_add2)(&cld3->ops, &pln->super.super.ops); + /* FIXME: should MPI operations be counted in "other" somehow? */ + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cld3); + X(plan_destroy_internal)(cld2rest); + X(plan_destroy_internal)(cld2); + X(plan_destroy_internal)(cld1); + return (plan *) 0; +} + +static solver *mksolver(int copy_transposed_in) +{ + static const solver_adt sadt = { PROBLEM_MPI_TRANSPOSE, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->copy_transposed_in = copy_transposed_in; + return &(slv->super); +} + +void XM(transpose_alltoall_register)(planner *p) +{ + int cti; + for (cti = 0; cti <= 1; ++cti) + REGISTER_SOLVER(p, mksolver(cti)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/transpose-pairwise.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/transpose-pairwise.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,486 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* Distributed transposes using a sequence of carefully scheduled + pairwise exchanges. This has the advantage that it can be done + in-place, or out-of-place while preserving the input, using buffer + space proportional to the local size divided by the number of + processes (i.e. to the total array size divided by the number of + processes squared). */ + +#include "mpi-transpose.h" +#include + +typedef struct { + solver super; + int preserve_input; /* preserve input even if DESTROY_INPUT was passed */ +} S; + +typedef struct { + plan_mpi_transpose super; + + plan *cld1, *cld2, *cld2rest, *cld3; + INT rest_Ioff, rest_Ooff; + + int n_pes, my_pe, *sched; + INT *send_block_sizes, *send_block_offsets; + INT *recv_block_sizes, *recv_block_offsets; + MPI_Comm comm; + int preserve_input; +} P; + +static void transpose_chunks(int *sched, int n_pes, int my_pe, + INT *sbs, INT *sbo, INT *rbs, INT *rbo, + MPI_Comm comm, + R *I, R *O) +{ + if (sched) { + int i; + MPI_Status status; + + /* TODO: explore non-synchronous send/recv? */ + + if (I == O) { + R *buf = (R*) MALLOC(sizeof(R) * sbs[0], BUFFERS); + + for (i = 0; i < n_pes; ++i) { + int pe = sched[i]; + if (my_pe == pe) { + if (rbo[pe] != sbo[pe]) + memmove(O + rbo[pe], O + sbo[pe], + sbs[pe] * sizeof(R)); + } + else { + memcpy(buf, O + sbo[pe], sbs[pe] * sizeof(R)); + MPI_Sendrecv(buf, (int) (sbs[pe]), FFTW_MPI_TYPE, + pe, (my_pe * n_pes + pe) & 0xffff, + O + rbo[pe], (int) (rbs[pe]), + FFTW_MPI_TYPE, + pe, (pe * n_pes + my_pe) & 0xffff, + comm, &status); + } + } + + X(ifree)(buf); + } + else { /* I != O */ + for (i = 0; i < n_pes; ++i) { + int pe = sched[i]; + if (my_pe == pe) + memcpy(O + rbo[pe], I + sbo[pe], sbs[pe] * sizeof(R)); + else + MPI_Sendrecv(I + sbo[pe], (int) (sbs[pe]), + FFTW_MPI_TYPE, + pe, (my_pe * n_pes + pe) & 0xffff, + O + rbo[pe], (int) (rbs[pe]), + FFTW_MPI_TYPE, + pe, (pe * n_pes + my_pe) & 0xffff, + comm, &status); + } + } + } +} + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld1, *cld2, *cld2rest, *cld3; + + /* transpose locally to get contiguous chunks */ + cld1 = (plan_rdft *) ego->cld1; + if (cld1) { + cld1->apply(ego->cld1, I, O); + + if (ego->preserve_input) I = O; + + /* transpose chunks globally */ + transpose_chunks(ego->sched, ego->n_pes, ego->my_pe, + ego->send_block_sizes, ego->send_block_offsets, + ego->recv_block_sizes, ego->recv_block_offsets, + ego->comm, O, I); + } + else if (ego->preserve_input) { + /* transpose chunks globally */ + transpose_chunks(ego->sched, ego->n_pes, ego->my_pe, + ego->send_block_sizes, ego->send_block_offsets, + ego->recv_block_sizes, ego->recv_block_offsets, + ego->comm, I, O); + + I = O; + } + else { + /* transpose chunks globally */ + transpose_chunks(ego->sched, ego->n_pes, ego->my_pe, + ego->send_block_sizes, ego->send_block_offsets, + ego->recv_block_sizes, ego->recv_block_offsets, + ego->comm, I, I); + } + + /* transpose locally, again, to get ordinary row-major; + this may take two transposes if the block sizes are unequal + (3 subplans, two of which operate on disjoint data) */ + cld2 = (plan_rdft *) ego->cld2; + cld2->apply(ego->cld2, I, O); + cld2rest = (plan_rdft *) ego->cld2rest; + if (cld2rest) { + cld2rest->apply(ego->cld2rest, + I + ego->rest_Ioff, O + ego->rest_Ooff); + cld3 = (plan_rdft *) ego->cld3; + if (cld3) + cld3->apply(ego->cld3, O, O); + /* else TRANSPOSED_OUT is true and user wants O transposed */ + } +} + +static int applicable(const S *ego, const problem *p_, + const planner *plnr) +{ + const problem_mpi_transpose *p = (const problem_mpi_transpose *) p_; + /* Note: this is *not* UGLY for out-of-place, destroy-input plans; + the planner often prefers transpose-pairwise to transpose-alltoall, + at least with LAM MPI on my machine. */ + return (1 + && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr) + && p->I != p->O)) + && ONLY_TRANSPOSEDP(p->flags)); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld1, wakefulness); + X(plan_awake)(ego->cld2, wakefulness); + X(plan_awake)(ego->cld2rest, wakefulness); + X(plan_awake)(ego->cld3, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(ifree0)(ego->sched); + X(ifree0)(ego->send_block_sizes); + MPI_Comm_free(&ego->comm); + X(plan_destroy_internal)(ego->cld3); + X(plan_destroy_internal)(ego->cld2rest); + X(plan_destroy_internal)(ego->cld2); + X(plan_destroy_internal)(ego->cld1); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(mpi-transpose-pairwise%s%(%p%)%(%p%)%(%p%)%(%p%))", + ego->preserve_input==2 ?"/p":"", + ego->cld1, ego->cld2, ego->cld2rest, ego->cld3); +} + +/* Given a process which_pe and a number of processes npes, fills + the array sched[npes] with a sequence of processes to communicate + with for a deadlock-free, optimum-overlap all-to-all communication. + (All processes must call this routine to get their own schedules.) + The schedule can be re-ordered arbitrarily as long as all processes + apply the same permutation to their schedules. + + The algorithm here is based upon the one described in: + J. A. M. Schreuder, "Constructing timetables for sport + competitions," Mathematical Programming Study 13, pp. 58-67 (1980). + In a sport competition, you have N teams and want every team to + play every other team in as short a time as possible (maximum overlap + between games). This timetabling problem is therefore identical + to that of an all-to-all communications problem. In our case, there + is one wrinkle: as part of the schedule, the process must do + some data transfer with itself (local data movement), analogous + to a requirement that each team "play itself" in addition to other + teams. With this wrinkle, it turns out that an optimal timetable + (N parallel games) can be constructed for any N, not just for even + N as in the original problem described by Schreuder. +*/ +static void fill1_comm_sched(int *sched, int which_pe, int npes) +{ + int pe, i, n, s = 0; + A(which_pe >= 0 && which_pe < npes); + if (npes % 2 == 0) { + n = npes; + sched[s++] = which_pe; + } + else + n = npes + 1; + for (pe = 0; pe < n - 1; ++pe) { + if (npes % 2 == 0) { + if (pe == which_pe) sched[s++] = npes - 1; + else if (npes - 1 == which_pe) sched[s++] = pe; + } + else if (pe == which_pe) sched[s++] = pe; + + if (pe != which_pe && which_pe < n - 1) { + i = (pe - which_pe + (n - 1)) % (n - 1); + if (i < n/2) + sched[s++] = (pe + i) % (n - 1); + + i = (which_pe - pe + (n - 1)) % (n - 1); + if (i < n/2) + sched[s++] = (pe - i + (n - 1)) % (n - 1); + } + } + A(s == npes); +} + +/* Sort the communication schedule sched for npes so that the schedule + on process sortpe is ascending or descending (!ascending). This is + necessary to allow in-place transposes when the problem does not + divide equally among the processes. In this case there is one + process where the incoming blocks are bigger/smaller than the + outgoing blocks and thus have to be received in + descending/ascending order, respectively, to avoid overwriting data + before it is sent. */ +static void sort1_comm_sched(int *sched, int npes, int sortpe, int ascending) +{ + int *sortsched, i; + sortsched = (int *) MALLOC(npes * sizeof(int) * 2, OTHER); + fill1_comm_sched(sortsched, sortpe, npes); + if (ascending) + for (i = 0; i < npes; ++i) + sortsched[npes + sortsched[i]] = sched[i]; + else + for (i = 0; i < npes; ++i) + sortsched[2*npes - 1 - sortsched[i]] = sched[i]; + for (i = 0; i < npes; ++i) + sched[i] = sortsched[npes + i]; + X(ifree)(sortsched); +} + +/* make the plans to do the post-MPI transpositions (shared with + transpose-alltoall) */ +int XM(mkplans_posttranspose)(const problem_mpi_transpose *p, planner *plnr, + R *I, R *O, int my_pe, + plan **cld2, plan **cld2rest, plan **cld3, + INT *rest_Ioff, INT *rest_Ooff) +{ + INT vn = p->vn; + INT b = p->block; + INT bt = XM(block)(p->ny, p->tblock, my_pe); + INT nxb = p->nx / b; /* number of equal-sized blocks */ + INT nxr = p->nx - nxb * b; /* leftover rows after equal blocks */ + + *cld2 = *cld2rest = *cld3 = NULL; + *rest_Ioff = *rest_Ooff = 0; + + if (!(p->flags & TRANSPOSED_OUT) && (nxr == 0 || I != O)) { + INT nx = p->nx * vn; + b *= vn; + *cld2 = X(mkplan_f_d)(plnr, + X(mkproblem_rdft_0_d)(X(mktensor_3d) + (nxb, bt * b, b, + bt, b, nx, + b, 1, 1), + I, O), + 0, 0, NO_SLOW); + if (!*cld2) goto nada; + + if (nxr > 0) { + *rest_Ioff = nxb * bt * b; + *rest_Ooff = nxb * b; + b = nxr * vn; + *cld2rest = X(mkplan_f_d)(plnr, + X(mkproblem_rdft_0_d)(X(mktensor_2d) + (bt, b, nx, + b, 1, 1), + I + *rest_Ioff, + O + *rest_Ooff), + 0, 0, NO_SLOW); + if (!*cld2rest) goto nada; + } + } + else { + *cld2 = X(mkplan_f_d)(plnr, + X(mkproblem_rdft_0_d)( + X(mktensor_4d) + (nxb, bt * b * vn, bt * b * vn, + bt, b * vn, vn, + b, vn, bt * vn, + vn, 1, 1), + I, O), + 0, 0, NO_SLOW); + if (!*cld2) goto nada; + + *rest_Ioff = *rest_Ooff = nxb * bt * b * vn; + *cld2rest = X(mkplan_f_d)(plnr, + X(mkproblem_rdft_0_d)( + X(mktensor_3d) + (bt, nxr * vn, vn, + nxr, vn, bt * vn, + vn, 1, 1), + I + *rest_Ioff, O + *rest_Ooff), + 0, 0, NO_SLOW); + if (!*cld2rest) goto nada; + + if (!(p->flags & TRANSPOSED_OUT)) { + *cld3 = X(mkplan_f_d)(plnr, + X(mkproblem_rdft_0_d)( + X(mktensor_3d) + (p->nx, bt * vn, vn, + bt, vn, p->nx * vn, + vn, 1, 1), + O, O), + 0, 0, NO_SLOW); + if (!*cld3) goto nada; + } + } + + return 1; + +nada: + X(plan_destroy_internal)(*cld3); + X(plan_destroy_internal)(*cld2rest); + X(plan_destroy_internal)(*cld2); + return 0; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_mpi_transpose *p; + P *pln; + plan *cld1 = 0, *cld2 = 0, *cld2rest = 0, *cld3 = 0; + INT b, bt, vn, rest_Ioff, rest_Ooff; + INT *sbs, *sbo, *rbs, *rbo; + int pe, my_pe, n_pes, sort_pe = -1, ascending = 1; + R *I, *O; + static const plan_adt padt = { + XM(transpose_solve), awake, print, destroy + }; + + UNUSED(ego); + + if (!applicable(ego, p_, plnr)) + return (plan *) 0; + + p = (const problem_mpi_transpose *) p_; + vn = p->vn; + I = p->I; O = p->O; + + MPI_Comm_rank(p->comm, &my_pe); + MPI_Comm_size(p->comm, &n_pes); + + b = XM(block)(p->nx, p->block, my_pe); + + if (!(p->flags & TRANSPOSED_IN)) { /* b x ny x vn -> ny x b x vn */ + cld1 = X(mkplan_f_d)(plnr, + X(mkproblem_rdft_0_d)(X(mktensor_3d) + (b, p->ny * vn, vn, + p->ny, vn, b * vn, + vn, 1, 1), + I, O), + 0, 0, NO_SLOW); + if (XM(any_true)(!cld1, p->comm)) goto nada; + } + if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) I = O; + + if (XM(any_true)(!XM(mkplans_posttranspose)(p, plnr, I, O, my_pe, + &cld2, &cld2rest, &cld3, + &rest_Ioff, &rest_Ooff), + p->comm)) goto nada; + + pln = MKPLAN_MPI_TRANSPOSE(P, &padt, apply); + + pln->cld1 = cld1; + pln->cld2 = cld2; + pln->cld2rest = cld2rest; + pln->rest_Ioff = rest_Ioff; + pln->rest_Ooff = rest_Ooff; + pln->cld3 = cld3; + pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr); + + MPI_Comm_dup(p->comm, &pln->comm); + + n_pes = (int) X(imax)(XM(num_blocks)(p->nx, p->block), + XM(num_blocks)(p->ny, p->tblock)); + + /* Compute sizes/offsets of blocks to exchange between processors */ + sbs = (INT *) MALLOC(4 * n_pes * sizeof(INT), PLANS); + sbo = sbs + n_pes; + rbs = sbo + n_pes; + rbo = rbs + n_pes; + b = XM(block)(p->nx, p->block, my_pe); + bt = XM(block)(p->ny, p->tblock, my_pe); + for (pe = 0; pe < n_pes; ++pe) { + INT db, dbt; /* destination block sizes */ + db = XM(block)(p->nx, p->block, pe); + dbt = XM(block)(p->ny, p->tblock, pe); + + sbs[pe] = b * dbt * vn; + sbo[pe] = pe * (b * p->tblock) * vn; + rbs[pe] = db * bt * vn; + rbo[pe] = pe * (p->block * bt) * vn; + + if (db * dbt > 0 && db * p->tblock != p->block * dbt) { + A(sort_pe == -1); /* only one process should need sorting */ + sort_pe = pe; + ascending = db * p->tblock > p->block * dbt; + } + } + pln->n_pes = n_pes; + pln->my_pe = my_pe; + pln->send_block_sizes = sbs; + pln->send_block_offsets = sbo; + pln->recv_block_sizes = rbs; + pln->recv_block_offsets = rbo; + + if (my_pe >= n_pes) { + pln->sched = 0; /* this process is not doing anything */ + } + else { + pln->sched = (int *) MALLOC(n_pes * sizeof(int), PLANS); + fill1_comm_sched(pln->sched, my_pe, n_pes); + if (sort_pe >= 0) + sort1_comm_sched(pln->sched, n_pes, sort_pe, ascending); + } + + X(ops_zero)(&pln->super.super.ops); + if (cld1) X(ops_add2)(&cld1->ops, &pln->super.super.ops); + if (cld2) X(ops_add2)(&cld2->ops, &pln->super.super.ops); + if (cld2rest) X(ops_add2)(&cld2rest->ops, &pln->super.super.ops); + if (cld3) X(ops_add2)(&cld3->ops, &pln->super.super.ops); + /* FIXME: should MPI operations be counted in "other" somehow? */ + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cld3); + X(plan_destroy_internal)(cld2rest); + X(plan_destroy_internal)(cld2); + X(plan_destroy_internal)(cld1); + return (plan *) 0; +} + +static solver *mksolver(int preserve_input) +{ + static const solver_adt sadt = { PROBLEM_MPI_TRANSPOSE, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->preserve_input = preserve_input; + return &(slv->super); +} + +void XM(transpose_pairwise_register)(planner *p) +{ + int preserve_input; + for (preserve_input = 0; preserve_input <= 1; ++preserve_input) + REGISTER_SOLVER(p, mksolver(preserve_input)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/transpose-problem.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/transpose-problem.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "mpi-transpose.h" + +static void destroy(problem *ego_) +{ + problem_mpi_transpose *ego = (problem_mpi_transpose *) ego_; + MPI_Comm_free(&ego->comm); + X(ifree)(ego_); +} + +static void hash(const problem *p_, md5 *m) +{ + const problem_mpi_transpose *p = (const problem_mpi_transpose *) p_; + int i; + X(md5puts)(m, "mpi-transpose"); + X(md5int)(m, p->I == p->O); + /* don't include alignment -- may differ between processes + X(md5int)(m, X(alignment_of)(p->I)); + X(md5int)(m, X(alignment_of)(p->O)); + ... note that applicability of MPI plans does not depend + on alignment (although optimality may, in principle). */ + X(md5INT)(m, p->vn); + X(md5INT)(m, p->nx); + X(md5INT)(m, p->ny); + X(md5INT)(m, p->block); + X(md5INT)(m, p->tblock); + MPI_Comm_size(p->comm, &i); X(md5int)(m, i); + A(XM(md5_equal)(*m, p->comm)); +} + +static void print(const problem *ego_, printer *p) +{ + const problem_mpi_transpose *ego = (const problem_mpi_transpose *) ego_; + int i; + MPI_Comm_size(ego->comm, &i); + p->print(p, "(mpi-transpose %d %d %d %D %D %D %D %D %d)", + ego->I == ego->O, + X(alignment_of)(ego->I), + X(alignment_of)(ego->O), + ego->vn, + ego->nx, ego->ny, + ego->block, ego->tblock, + i); +} + +static void zero(const problem *ego_) +{ + const problem_mpi_transpose *ego = (const problem_mpi_transpose *) ego_; + R *I = ego->I; + INT i, N = ego->vn * ego->ny; + int my_pe; + + MPI_Comm_rank(ego->comm, &my_pe); + N *= XM(block)(ego->nx, ego->block, my_pe); + + for (i = 0; i < N; ++i) I[i] = K(0.0); +} + +static const problem_adt padt = +{ + PROBLEM_MPI_TRANSPOSE, + hash, + zero, + print, + destroy +}; + +problem *XM(mkproblem_transpose)(INT nx, INT ny, INT vn, + R *I, R *O, + INT block, INT tblock, + MPI_Comm comm, + unsigned flags) +{ + problem_mpi_transpose *ego = + (problem_mpi_transpose *)X(mkproblem)(sizeof(problem_mpi_transpose), &padt); + + A(nx > 0 && ny > 0 && vn > 0); + A(block > 0 && XM(num_blocks_ok)(nx, block, comm) + && tblock > 0 && XM(num_blocks_ok)(ny, tblock, comm)); + + /* enforce pointer equality if untainted pointers are equal */ + if (UNTAINT(I) == UNTAINT(O)) + I = O = JOIN_TAINT(I, O); + + ego->nx = nx; + ego->ny = ny; + ego->vn = vn; + ego->I = I; + ego->O = O; + ego->block = block > nx ? nx : block; + ego->tblock = tblock > ny ? ny : tblock; + + /* canonicalize flags: we can freely assume that the data is + "transposed" if one of the dimensions is 1. */ + if (ego->block == 1) + flags |= TRANSPOSED_IN; + if (ego->tblock == 1) + flags |= TRANSPOSED_OUT; + ego->flags = flags; + + MPI_Comm_dup(comm, &ego->comm); + + return &(ego->super); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/transpose-recurse.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/transpose-recurse.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* Recursive "radix-r" distributed transpose, which breaks a transpose + over p processes into p/r transposes over r processes plus r + transposes over p/r processes. If performed recursively, this + produces a total of O(p log p) messages vs. O(p^2) messages for a + direct approach. + + However, this is not necessarily an improvement. The total size of + all the messages is actually increased from O(N) to O(N log p) + where N is the total data size. Also, the amount of local data + rearrangement is increased. So, it's not clear, a priori, what the + best algorithm will be, and we'll leave it to the planner. (In + theory and practice, it looks like this becomes advantageous for + large p, in the limit where the message sizes are small and + latency-dominated.) +*/ + +#include "mpi-transpose.h" +#include + +typedef struct { + solver super; + int (*radix)(int np); + const char *nam; + int preserve_input; /* preserve input even if DESTROY_INPUT was passed */ +} S; + +typedef struct { + plan_mpi_transpose super; + + plan *cld1, *cldtr, *cldtm; + int preserve_input; + + int r; /* "radix" */ + const char *nam; +} P; + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld1, *cldtr, *cldtm; + + cld1 = (plan_rdft *) ego->cld1; + if (cld1) cld1->apply((plan *) cld1, I, O); + + if (ego->preserve_input) I = O; + + cldtr = (plan_rdft *) ego->cldtr; + if (cldtr) cldtr->apply((plan *) cldtr, O, I); + + cldtm = (plan_rdft *) ego->cldtm; + if (cldtm) cldtm->apply((plan *) cldtm, I, O); +} + +static int radix_sqrt(int np) +{ + int r; + for (r = (int) (X(isqrt)(np)); np % r != 0; ++r) + ; + return r; +} + +static int radix_first(int np) +{ + int r = (int) (X(first_divisor)(np)); + return (r >= (int) (X(isqrt)(np)) ? 0 : r); +} + +/* the local allocated space on process pe required for the given transpose + dimensions and block sizes */ +static INT transpose_space(INT nx, INT ny, INT block, INT tblock, int pe) +{ + return X(imax)(XM(block)(nx, block, pe) * ny, + nx * XM(block)(ny, tblock, pe)); +} + +/* check whether the recursive transposes fit within the space + that must have been allocated on each process for this transpose; + this must be modified if the subdivision in mkplan is changed! */ +static int enough_space(INT nx, INT ny, INT block, INT tblock, + int r, int n_pes) +{ + int pe; + int m = n_pes / r; + for (pe = 0; pe < n_pes; ++pe) { + INT space = transpose_space(nx, ny, block, tblock, pe); + INT b1 = XM(block)(nx, r * block, pe / r); + INT b2 = XM(block)(ny, m * tblock, pe % r); + if (transpose_space(b1, ny, block, m*tblock, pe % r) > space + || transpose_space(nx, b2, r*block, tblock, pe / r) > space) + return 0; + } + return 1; +} + +/* In theory, transpose-recurse becomes advantageous for message sizes + below some minimum, assuming that the time is dominated by + communications. In practice, we want to constrain the minimum + message size for transpose-recurse to keep the planning time down. + I've set this conservatively according to some simple experiments + on a Cray XT3 where the crossover message size was 128, although on + a larger-latency machine the crossover will be larger. */ +#define SMALL_MESSAGE 2048 + +static int applicable(const S *ego, const problem *p_, + const planner *plnr, int *r) +{ + const problem_mpi_transpose *p = (const problem_mpi_transpose *) p_; + int n_pes; + MPI_Comm_size(p->comm, &n_pes); + return (1 + && p->tblock * n_pes == p->ny + && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr) + && p->I != p->O)) + && (*r = ego->radix(n_pes)) && *r < n_pes && *r > 1 + && enough_space(p->nx, p->ny, p->block, p->tblock, *r, n_pes) + && (!CONSERVE_MEMORYP(plnr) || *r > 8 + || !X(toobig)((p->nx * (p->ny / n_pes) * p->vn) / *r)) + && (!NO_SLOWP(plnr) || + (p->nx * (p->ny / n_pes) * p->vn) / n_pes <= SMALL_MESSAGE) + && ONLY_TRANSPOSEDP(p->flags) + ); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld1, wakefulness); + X(plan_awake)(ego->cldtr, wakefulness); + X(plan_awake)(ego->cldtm, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cldtm); + X(plan_destroy_internal)(ego->cldtr); + X(plan_destroy_internal)(ego->cld1); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(mpi-transpose-recurse/%s/%d%s%(%p%)%(%p%)%(%p%))", + ego->nam, ego->r, ego->preserve_input==2 ?"/p":"", + ego->cld1, ego->cldtr, ego->cldtm); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_mpi_transpose *p; + P *pln; + plan *cld1 = 0, *cldtr = 0, *cldtm = 0; + R *I, *O; + int me, np, r, m; + INT b; + MPI_Comm comm2; + static const plan_adt padt = { + XM(transpose_solve), awake, print, destroy + }; + + UNUSED(ego); + + if (!applicable(ego, p_, plnr, &r)) + return (plan *) 0; + + p = (const problem_mpi_transpose *) p_; + + MPI_Comm_size(p->comm, &np); + MPI_Comm_rank(p->comm, &me); + m = np / r; + A(r * m == np); + + I = p->I; O = p->O; + + b = XM(block)(p->nx, p->block, me); + A(p->tblock * np == p->ny); /* this is currently required for cld1 */ + if (p->flags & TRANSPOSED_IN) { + /* m x r x (bt x b x vn) -> r x m x (bt x b x vn) */ + INT vn = p->vn * b * p->tblock; + cld1 = X(mkplan_f_d)(plnr, + X(mkproblem_rdft_0_d)(X(mktensor_3d) + (m, r*vn, vn, + r, vn, m*vn, + vn, 1, 1), + I, O), + 0, 0, NO_SLOW); + } + else if (I != O) { /* combine cld1 with TRANSPOSED_IN permutation */ + /* b x m x r x bt x vn -> r x m x bt x b x vn */ + INT vn = p->vn; + INT bt = p->tblock; + cld1 = X(mkplan_f_d)(plnr, + X(mkproblem_rdft_0_d)(X(mktensor_5d) + (b, m*r*bt*vn, vn, + m, r*bt*vn, bt*b*vn, + r, bt*vn, m*bt*b*vn, + bt, vn, b*vn, + vn, 1, 1), + I, O), + 0, 0, NO_SLOW); + } + else { /* TRANSPOSED_IN permutation must be separate for in-place */ + /* b x (m x r) x bt x vn -> b x (r x m) x bt x vn */ + INT vn = p->vn * p->tblock; + cld1 = X(mkplan_f_d)(plnr, + X(mkproblem_rdft_0_d)(X(mktensor_4d) + (m, r*vn, vn, + r, vn, m*vn, + vn, 1, 1, + b, np*vn, np*vn), + I, O), + 0, 0, NO_SLOW); + } + if (XM(any_true)(!cld1, p->comm)) goto nada; + + if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) I = O; + + b = XM(block)(p->nx, r * p->block, me / r); + MPI_Comm_split(p->comm, me / r, me, &comm2); + if (b) + cldtr = X(mkplan_d)(plnr, XM(mkproblem_transpose) + (b, p->ny, p->vn, + O, I, p->block, m * p->tblock, comm2, + p->I != p->O + ? TRANSPOSED_IN : (p->flags & TRANSPOSED_IN))); + MPI_Comm_free(&comm2); + if (XM(any_true)(b && !cldtr, p->comm)) goto nada; + + b = XM(block)(p->ny, m * p->tblock, me % r); + MPI_Comm_split(p->comm, me % r, me, &comm2); + if (b) + cldtm = X(mkplan_d)(plnr, XM(mkproblem_transpose) + (p->nx, b, p->vn, + I, O, r * p->block, p->tblock, comm2, + TRANSPOSED_IN | (p->flags & TRANSPOSED_OUT))); + MPI_Comm_free(&comm2); + if (XM(any_true)(b && !cldtm, p->comm)) goto nada; + + pln = MKPLAN_MPI_TRANSPOSE(P, &padt, apply); + + pln->cld1 = cld1; + pln->cldtr = cldtr; + pln->cldtm = cldtm; + pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr); + pln->r = r; + pln->nam = ego->nam; + + pln->super.super.ops = cld1->ops; + if (cldtr) X(ops_add2)(&cldtr->ops, &pln->super.super.ops); + if (cldtm) X(ops_add2)(&cldtm->ops, &pln->super.super.ops); + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cldtm); + X(plan_destroy_internal)(cldtr); + X(plan_destroy_internal)(cld1); + return (plan *) 0; +} + +static solver *mksolver(int preserve_input, + int (*radix)(int np), const char *nam) +{ + static const solver_adt sadt = { PROBLEM_MPI_TRANSPOSE, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->preserve_input = preserve_input; + slv->radix = radix; + slv->nam = nam; + return &(slv->super); +} + +void XM(transpose_recurse_register)(planner *p) +{ + int preserve_input; + for (preserve_input = 0; preserve_input <= 1; ++preserve_input) { + REGISTER_SOLVER(p, mksolver(preserve_input, radix_sqrt, "sqrt")); + REGISTER_SOLVER(p, mksolver(preserve_input, radix_first, "first")); + } +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/transpose-solve.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/transpose-solve.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "mpi-transpose.h" + +/* use the apply() operation for MPI_TRANSPOSE problems */ +void XM(transpose_solve)(const plan *ego_, const problem *p_) +{ + const plan_mpi_transpose *ego = (const plan_mpi_transpose *) ego_; + const problem_mpi_transpose *p = (const problem_mpi_transpose *) p_; + ego->apply(ego_, UNTAINT(p->I), UNTAINT(p->O)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/mpi/wisdom-api.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/mpi/wisdom-api.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "fftw3-mpi.h" +#include "ifftw-mpi.h" +#include + +#if SIZEOF_SIZE_T == SIZEOF_UNSIGNED_INT +# define FFTW_MPI_SIZE_T MPI_UNSIGNED +#elif SIZEOF_SIZE_T == SIZEOF_UNSIGNED_LONG +# define FFTW_MPI_SIZE_T MPI_UNSIGNED_LONG +#elif SIZEOF_SIZE_T == SIZEOF_UNSIGNED_LONG_LONG +# define FFTW_MPI_SIZE_T MPI_UNSIGNED_LONG_LONG +#else +# error MPI type for size_t is unknown +# define FFTW_MPI_SIZE_T MPI_UNSIGNED_LONG +#endif + +/* Import wisdom from all processes to process 0, as prelude to + exporting a single wisdom file (this is convenient when we are + running on identical processors, to avoid the annoyance of having + per-process wisdom files). In order to make the time for this + operation logarithmic in the number of processors (rather than + linear), we employ a tree reduction algorithm. This means that the + wisdom is modified on processes other than root, which shouldn't + matter in practice. */ +void XM(gather_wisdom)(MPI_Comm comm_) +{ + MPI_Comm comm, comm2; + int my_pe, n_pes; + char *wis; + size_t wislen; + MPI_Status status; + + MPI_Comm_dup(comm_, &comm); + MPI_Comm_rank(comm, &my_pe); + MPI_Comm_size(comm, &n_pes); + + if (n_pes > 2) { /* recursively split into even/odd processes */ + MPI_Comm_split(comm, my_pe % 2, my_pe, &comm2); + XM(gather_wisdom)(comm2); + MPI_Comm_free(&comm2); + } + if (n_pes > 1 && my_pe < 2) { /* import process 1 -> 0 */ + if (my_pe == 1) { + wis = X(export_wisdom_to_string)(); + wislen = strlen(wis) + 1; + MPI_Send(&wislen, 1, FFTW_MPI_SIZE_T, 0, 111, comm); + MPI_Send(wis, wislen, MPI_CHAR, 0, 222, comm); + free(wis); + } + else /* my_pe == 0 */ { + MPI_Recv(&wislen, 1, FFTW_MPI_SIZE_T, 1, 111, comm, &status); + wis = (char *) MALLOC(wislen * sizeof(char), OTHER); + MPI_Recv(wis, wislen, MPI_CHAR, 1, 222, comm, &status); + if (!X(import_wisdom_from_string)(wis)) + MPI_Abort(comm, 1); + X(ifree)(wis); + } + } + MPI_Comm_free(&comm); +} + +/* broadcast wisdom from process 0 to all other processes; this + is useful so that we can import wisdom once and not worry + about parallel I/O or process-specific wisdom, although of + course it assumes that all the processes have identical + performance characteristics (i.e. identical hardware). */ +void XM(broadcast_wisdom)(MPI_Comm comm_) +{ + MPI_Comm comm; + int my_pe; + char *wis; + size_t wislen; + + MPI_Comm_dup(comm_, &comm); + MPI_Comm_rank(comm, &my_pe); + + if (my_pe != 0) { + MPI_Bcast(&wislen, 1, FFTW_MPI_SIZE_T, 0, comm); + wis = (char *) MALLOC(wislen * sizeof(char), OTHER); + MPI_Bcast(wis, wislen, MPI_CHAR, 0, comm); + if (!X(import_wisdom_from_string)(wis)) + MPI_Abort(comm, 1); + X(ifree)(wis); + } + else /* my_pe == 0 */ { + wis = X(export_wisdom_to_string)(); + wislen = strlen(wis) + 1; + MPI_Bcast(&wislen, 1, FFTW_MPI_SIZE_T, 0, comm); + MPI_Bcast(wis, wislen, MPI_CHAR, 0, comm); + X(free)(wis); + } + MPI_Comm_free(&comm); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,18 @@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft +SUBDIRS = scalar simd + +noinst_LTLIBRARIES = librdft.la + +# pkgincludedir = $(includedir)/fftw3@PREC_SUFFIX@ +# pkginclude_HEADERS = codelet-rdft.h rdft.h + +RDFT2 = buffered2.c direct2.c nop2.c rank0-rdft2.c rank-geq2-rdft2.c \ +plan2.c problem2.c solve2.c vrank-geq1-rdft2.c rdft2-rdft.c \ +rdft2-tensor-max-index.c rdft2-inplace-strides.c rdft2-strides.c \ +khc2c.c ct-hc2c.h ct-hc2c.c ct-hc2c-direct.c + +librdft_la_SOURCES = hc2hc.h hc2hc.c dft-r2hc.c dht-r2hc.c dht-rader.c \ +buffered.c codelet-rdft.h conf.c direct-r2r.c direct-r2c.c generic.c \ +hc2hc-direct.c hc2hc-generic.c khc2hc.c kr2c.c kr2r.c indirect.c nop.c \ +plan.c problem.c rank0.c rank-geq2.c rdft.h rdft-dht.c solve.c \ +vrank-geq1.c vrank3-transpose.c $(RDFT2) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,787 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = rdft +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +librdft_la_LIBADD = +am__objects_1 = buffered2.lo direct2.lo nop2.lo rank0-rdft2.lo \ + rank-geq2-rdft2.lo plan2.lo problem2.lo solve2.lo \ + vrank-geq1-rdft2.lo rdft2-rdft.lo rdft2-tensor-max-index.lo \ + rdft2-inplace-strides.lo rdft2-strides.lo khc2c.lo ct-hc2c.lo \ + ct-hc2c-direct.lo +am_librdft_la_OBJECTS = hc2hc.lo dft-r2hc.lo dht-r2hc.lo dht-rader.lo \ + buffered.lo conf.lo direct-r2r.lo direct-r2c.lo generic.lo \ + hc2hc-direct.lo hc2hc-generic.lo khc2hc.lo kr2c.lo kr2r.lo \ + indirect.lo nop.lo plan.lo problem.lo rank0.lo rank-geq2.lo \ + rdft-dht.lo solve.lo vrank-geq1.lo vrank3-transpose.lo \ + $(am__objects_1) +librdft_la_OBJECTS = $(am_librdft_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(librdft_la_SOURCES) +DIST_SOURCES = $(librdft_la_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft +SUBDIRS = scalar simd +noinst_LTLIBRARIES = librdft.la + +# pkgincludedir = $(includedir)/fftw3@PREC_SUFFIX@ +# pkginclude_HEADERS = codelet-rdft.h rdft.h +RDFT2 = buffered2.c direct2.c nop2.c rank0-rdft2.c rank-geq2-rdft2.c \ +plan2.c problem2.c solve2.c vrank-geq1-rdft2.c rdft2-rdft.c \ +rdft2-tensor-max-index.c rdft2-inplace-strides.c rdft2-strides.c \ +khc2c.c ct-hc2c.h ct-hc2c.c ct-hc2c-direct.c + +librdft_la_SOURCES = hc2hc.h hc2hc.c dft-r2hc.c dht-r2hc.c dht-rader.c \ +buffered.c codelet-rdft.h conf.c direct-r2r.c direct-r2c.c generic.c \ +hc2hc-direct.c hc2hc-generic.c khc2hc.c kr2c.c kr2r.c indirect.c nop.c \ +plan.c problem.c rank0.c rank-geq2.c rdft.h rdft-dht.c solve.c \ +vrank-geq1.c vrank3-transpose.c $(RDFT2) + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu rdft/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu rdft/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +librdft.la: $(librdft_la_OBJECTS) $(librdft_la_DEPENDENCIES) $(EXTRA_librdft_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(librdft_la_OBJECTS) $(librdft_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/buffered.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/buffered2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/conf.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ct-hc2c-direct.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ct-hc2c.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dft-r2hc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dht-r2hc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dht-rader.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/direct-r2c.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/direct-r2r.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/direct2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/generic.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2hc-direct.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2hc-generic.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2hc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/indirect.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/khc2c.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/khc2hc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kr2c.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kr2r.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nop.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nop2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/problem.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/problem2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rank-geq2-rdft2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rank-geq2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rank0-rdft2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rank0.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdft-dht.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdft2-inplace-strides.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdft2-rdft.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdft2-strides.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdft2-tensor-max-index.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/solve.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/solve2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vrank-geq1-rdft2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vrank-geq1.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vrank3-transpose.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(LTLIBRARIES) +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool \ + clean-noinstLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/buffered.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/buffered.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,337 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "rdft.h" + +typedef struct { + solver super; + int maxnbuf_ndx; +} S; + +static const INT maxnbufs[] = { 8, 256 }; + +typedef struct { + plan_rdft super; + + plan *cld, *cldcpy, *cldrest; + INT n, vl, nbuf, bufdist; + INT ivs_by_nbuf, ovs_by_nbuf; +} P; + +/* transform a vector input with the help of bufs */ +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld = (plan_rdft *) ego->cld; + plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy; + plan_rdft *cldrest; + INT i, vl = ego->vl, nbuf = ego->nbuf; + INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf; + R *bufs; + + bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS); + + for (i = nbuf; i <= vl; i += nbuf) { + /* transform to bufs: */ + cld->apply((plan *) cld, I, bufs); + I += ivs_by_nbuf; + + /* copy back */ + cldcpy->apply((plan *) cldcpy, bufs, O); + O += ovs_by_nbuf; + } + + X(ifree)(bufs); + + /* Do the remaining transforms, if any: */ + cldrest = (plan_rdft *) ego->cldrest; + cldrest->apply((plan *) cldrest, I, O); +} + +/* for hc2r problems, copy the input into buffer, and then + transform buffer->output, which allows for destruction of the + buffer */ +static void apply_hc2r(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld = (plan_rdft *) ego->cld; + plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy; + plan_rdft *cldrest; + INT i, vl = ego->vl, nbuf = ego->nbuf; + INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf; + R *bufs; + + bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS); + + for (i = nbuf; i <= vl; i += nbuf) { + /* copy input into bufs: */ + cldcpy->apply((plan *) cldcpy, I, bufs); + I += ivs_by_nbuf; + + /* transform to output */ + cld->apply((plan *) cld, bufs, O); + O += ovs_by_nbuf; + } + + X(ifree)(bufs); + + /* Do the remaining transforms, if any: */ + cldrest = (plan_rdft *) ego->cldrest; + cldrest->apply((plan *) cldrest, I, O); +} + + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + + X(plan_awake)(ego->cld, wakefulness); + X(plan_awake)(ego->cldcpy, wakefulness); + X(plan_awake)(ego->cldrest, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cldrest); + X(plan_destroy_internal)(ego->cldcpy); + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(rdft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))", + ego->n, ego->nbuf, + ego->vl, ego->bufdist % ego->n, + ego->cld, ego->cldcpy, ego->cldrest); +} + +static int applicable0(const S *ego, const problem *p_, const planner *plnr) +{ + const problem_rdft *p = (const problem_rdft *) p_; + iodim *d = p->sz->dims; + + if (1 + && p->vecsz->rnk <= 1 + && p->sz->rnk == 1 + ) { + INT vl, ivs, ovs; + X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs); + + if (X(toobig)(d[0].n) && CONSERVE_MEMORYP(plnr)) + return 0; + + /* if this solver is redundant, in the sense that a solver + of lower index generates the same plan, then prune this + solver */ + if (X(nbuf_redundant)(d[0].n, vl, + ego->maxnbuf_ndx, + maxnbufs, NELEM(maxnbufs))) + return 0; + + if (p->I != p->O) { + if (p->kind[0] == HC2R) { + /* Allow HC2R problems only if the input is to be + preserved. This solver sets NO_DESTROY_INPUT, + which prevents infinite loops */ + return (NO_DESTROY_INPUTP(plnr)); + } else { + /* + In principle, the buffered transforms might be useful + when working out of place. However, in order to + prevent infinite loops in the planner, we require + that the output stride of the buffered transforms be + greater than 1. + */ + return (d[0].os > 1); + } + } + + /* + * If the problem is in place, the input/output strides must + * be the same or the whole thing must fit in the buffer. + */ + if (X(tensor_inplace_strides2)(p->sz, p->vecsz)) + return 1; + + if (/* fits into buffer: */ + ((p->vecsz->rnk == 0) + || + (X(nbuf)(d[0].n, p->vecsz->dims[0].n, + maxnbufs[ego->maxnbuf_ndx]) + == p->vecsz->dims[0].n))) + return 1; + } + + return 0; +} + +static int applicable(const S *ego, const problem *p_, const planner *plnr) +{ + const problem_rdft *p; + + if (NO_BUFFERINGP(plnr)) return 0; + + if (!applicable0(ego, p_, plnr)) return 0; + + p = (const problem_rdft *) p_; + if (p->kind[0] == HC2R) { + if (NO_UGLYP(plnr)) { + /* UGLY if in-place and too big, since the problem + could be solved via transpositions */ + if (p->I == p->O && X(toobig)(p->sz->dims[0].n)) + return 0; + } + } else { + if (NO_UGLYP(plnr)) { + if (p->I != p->O) return 0; + if (X(toobig)(p->sz->dims[0].n)) return 0; + } + } + return 1; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + P *pln; + const S *ego = (const S *)ego_; + plan *cld = (plan *) 0; + plan *cldcpy = (plan *) 0; + plan *cldrest = (plan *) 0; + const problem_rdft *p = (const problem_rdft *) p_; + R *bufs = (R *) 0; + INT nbuf = 0, bufdist, n, vl; + INT ivs, ovs; + int hc2rp; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego, p_, plnr)) + goto nada; + + n = X(tensor_sz)(p->sz); + X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs); + hc2rp = (p->kind[0] == HC2R); + + nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]); + bufdist = X(bufdist)(n, vl); + A(nbuf > 0); + + /* initial allocation for the purpose of planning */ + bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS); + + if (hc2rp) { + /* allow destruction of buffer */ + cld = X(mkplan_f_d)(plnr, + X(mkproblem_rdft_d)( + X(mktensor_1d)(n, 1, p->sz->dims[0].os), + X(mktensor_1d)(nbuf, bufdist, ovs), + bufs, TAINT(p->O, ovs * nbuf), p->kind), + 0, 0, NO_DESTROY_INPUT); + if (!cld) goto nada; + + /* copying input into buffer buffer is a rank-0 transform: */ + cldcpy = X(mkplan_d)(plnr, + X(mkproblem_rdft_0_d)( + X(mktensor_2d)(nbuf, ivs, bufdist, + n, p->sz->dims[0].is, 1), + TAINT(p->I, ivs * nbuf), bufs)); + if (!cldcpy) goto nada; + } else { + /* allow destruction of input if problem is in place */ + cld = X(mkplan_f_d)(plnr, + X(mkproblem_rdft_d)( + X(mktensor_1d)(n, p->sz->dims[0].is, 1), + X(mktensor_1d)(nbuf, ivs, bufdist), + TAINT(p->I, ivs * nbuf), bufs, p->kind), + 0, 0, (p->I == p->O) ? NO_DESTROY_INPUT : 0); + if (!cld) goto nada; + + /* copying back from the buffer is a rank-0 transform: */ + cldcpy = X(mkplan_d)(plnr, + X(mkproblem_rdft_0_d)( + X(mktensor_2d)(nbuf, bufdist, ovs, + n, 1, p->sz->dims[0].os), + bufs, TAINT(p->O, ovs * nbuf))); + if (!cldcpy) goto nada; + } + + /* deallocate buffers, let apply() allocate them for real */ + X(ifree)(bufs); + bufs = 0; + + /* plan the leftover transforms (cldrest): */ + { + INT id = ivs * (nbuf * (vl / nbuf)); + INT od = ovs * (nbuf * (vl / nbuf)); + cldrest = X(mkplan_d)(plnr, + X(mkproblem_rdft_d)( + X(tensor_copy)(p->sz), + X(mktensor_1d)(vl % nbuf, ivs, ovs), + p->I + id, p->O + od, p->kind)); + } + if (!cldrest) goto nada; + + pln = MKPLAN_RDFT(P, &padt, hc2rp ? apply_hc2r : apply); + pln->cld = cld; + pln->cldcpy = cldcpy; + pln->cldrest = cldrest; + pln->n = n; + pln->vl = vl; + pln->ivs_by_nbuf = ivs * nbuf; + pln->ovs_by_nbuf = ovs * nbuf; + + pln->nbuf = nbuf; + pln->bufdist = bufdist; + + { + opcnt t; + X(ops_add)(&cld->ops, &cldcpy->ops, &t); + X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops); + } + + return &(pln->super.super); + + nada: + X(ifree0)(bufs); + X(plan_destroy_internal)(cldrest); + X(plan_destroy_internal)(cldcpy); + X(plan_destroy_internal)(cld); + return (plan *) 0; +} + +static solver *mksolver(int maxnbuf_ndx) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->maxnbuf_ndx = maxnbuf_ndx; + return &(slv->super); +} + +void X(rdft_buffered_register)(planner *p) +{ + size_t i; + for (i = 0; i < NELEM(maxnbufs); ++i) + REGISTER_SOLVER(p, mksolver(i)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/buffered2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/buffered2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* buffering of rdft2. We always buffer the complex array */ + +#include "rdft.h" +#include "dft.h" + +typedef struct { + solver super; + int maxnbuf_ndx; +} S; + +static const INT maxnbufs[] = { 8, 256 }; + +typedef struct { + plan_rdft2 super; + + plan *cld, *cldcpy, *cldrest; + INT n, vl, nbuf, bufdist; + INT ivs_by_nbuf, ovs_by_nbuf; + INT ioffset, roffset; +} P; + +/* transform a vector input with the help of bufs */ +static void apply_r2hc(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + plan_rdft2 *cld = (plan_rdft2 *) ego->cld; + plan_dft *cldcpy = (plan_dft *) ego->cldcpy; + INT i, vl = ego->vl, nbuf = ego->nbuf; + INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf; + R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS); + R *bufr = bufs + ego->roffset; + R *bufi = bufs + ego->ioffset; + plan_rdft2 *cldrest; + + for (i = nbuf; i <= vl; i += nbuf) { + /* transform to bufs: */ + cld->apply((plan *) cld, r0, r1, bufr, bufi); + r0 += ivs_by_nbuf; r1 += ivs_by_nbuf; + + /* copy back */ + cldcpy->apply((plan *) cldcpy, bufr, bufi, cr, ci); + cr += ovs_by_nbuf; ci += ovs_by_nbuf; + } + + X(ifree)(bufs); + + /* Do the remaining transforms, if any: */ + cldrest = (plan_rdft2 *) ego->cldrest; + cldrest->apply((plan *) cldrest, r0, r1, cr, ci); +} + +/* for hc2r problems, copy the input into buffer, and then + transform buffer->output, which allows for destruction of the + buffer */ +static void apply_hc2r(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + plan_rdft2 *cld = (plan_rdft2 *) ego->cld; + plan_dft *cldcpy = (plan_dft *) ego->cldcpy; + INT i, vl = ego->vl, nbuf = ego->nbuf; + INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf; + R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS); + R *bufr = bufs + ego->roffset; + R *bufi = bufs + ego->ioffset; + plan_rdft2 *cldrest; + + for (i = nbuf; i <= vl; i += nbuf) { + /* copy input into bufs: */ + cldcpy->apply((plan *) cldcpy, cr, ci, bufr, bufi); + cr += ivs_by_nbuf; ci += ivs_by_nbuf; + + /* transform to output */ + cld->apply((plan *) cld, r0, r1, bufr, bufi); + r0 += ovs_by_nbuf; r1 += ovs_by_nbuf; + } + + X(ifree)(bufs); + + /* Do the remaining transforms, if any: */ + cldrest = (plan_rdft2 *) ego->cldrest; + cldrest->apply((plan *) cldrest, r0, r1, cr, ci); +} + + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + + X(plan_awake)(ego->cld, wakefulness); + X(plan_awake)(ego->cldcpy, wakefulness); + X(plan_awake)(ego->cldrest, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cldrest); + X(plan_destroy_internal)(ego->cldcpy); + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(rdft2-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))", + ego->n, ego->nbuf, + ego->vl, ego->bufdist % ego->n, + ego->cld, ego->cldcpy, ego->cldrest); +} + +static int applicable0(const S *ego, const problem *p_, const planner *plnr) +{ + const problem_rdft2 *p = (const problem_rdft2 *) p_; + iodim *d = p->sz->dims; + + if (1 + && p->vecsz->rnk <= 1 + && p->sz->rnk == 1 + + /* we assume even n throughout */ + && (d[0].n % 2) == 0 + + /* and we only consider these two cases */ + && (p->kind == R2HC || p->kind == HC2R) + + ) { + INT vl, ivs, ovs; + X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs); + + if (X(toobig)(d[0].n) && CONSERVE_MEMORYP(plnr)) + return 0; + + /* if this solver is redundant, in the sense that a solver + of lower index generates the same plan, then prune this + solver */ + if (X(nbuf_redundant)(d[0].n, vl, + ego->maxnbuf_ndx, + maxnbufs, NELEM(maxnbufs))) + return 0; + + if (p->r0 != p->cr) { + if (p->kind == HC2R) { + /* Allow HC2R problems only if the input is to be + preserved. This solver sets NO_DESTROY_INPUT, + which prevents infinite loops */ + return (NO_DESTROY_INPUTP(plnr)); + } else { + /* + In principle, the buffered transforms might be useful + when working out of place. However, in order to + prevent infinite loops in the planner, we require + that the output stride of the buffered transforms be + greater than 2. + */ + return (d[0].os > 2); + } + } + + /* + * If the problem is in place, the input/output strides must + * be the same or the whole thing must fit in the buffer. + */ + if (X(rdft2_inplace_strides(p, RNK_MINFTY))) + return 1; + + if (/* fits into buffer: */ + ((p->vecsz->rnk == 0) + || + (X(nbuf)(d[0].n, p->vecsz->dims[0].n, + maxnbufs[ego->maxnbuf_ndx]) + == p->vecsz->dims[0].n))) + return 1; + } + + return 0; +} + +static int applicable(const S *ego, const problem *p_, const planner *plnr) +{ + const problem_rdft2 *p; + + if (NO_BUFFERINGP(plnr)) return 0; + + if (!applicable0(ego, p_, plnr)) return 0; + + p = (const problem_rdft2 *) p_; + if (p->kind == HC2R) { + if (NO_UGLYP(plnr)) { + /* UGLY if in-place and too big, since the problem + could be solved via transpositions */ + if (p->r0 == p->cr && X(toobig)(p->sz->dims[0].n)) + return 0; + } + } else { + if (NO_UGLYP(plnr)) { + if (p->r0 != p->cr || X(toobig)(p->sz->dims[0].n)) + return 0; + } + } + return 1; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + P *pln; + const S *ego = (const S *)ego_; + plan *cld = (plan *) 0; + plan *cldcpy = (plan *) 0; + plan *cldrest = (plan *) 0; + const problem_rdft2 *p = (const problem_rdft2 *) p_; + R *bufs = (R *) 0; + INT nbuf = 0, bufdist, n, vl; + INT ivs, ovs, ioffset, roffset, id, od; + + static const plan_adt padt = { + X(rdft2_solve), awake, print, destroy + }; + + if (!applicable(ego, p_, plnr)) + goto nada; + + n = X(tensor_sz)(p->sz); + X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs); + + nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]); + bufdist = X(bufdist)(n + 2, vl); /* complex-side rdft2 stores N+2 + real numbers */ + A(nbuf > 0); + + /* attempt to keep real and imaginary part in the same order, + so as to allow optimizations in the the copy plan */ + roffset = (p->cr - p->ci > 0) ? (INT)1 : (INT)0; + ioffset = 1 - roffset; + + /* initial allocation for the purpose of planning */ + bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS); + + id = ivs * (nbuf * (vl / nbuf)); + od = ovs * (nbuf * (vl / nbuf)); + + if (p->kind == R2HC) { + /* allow destruction of input if problem is in place */ + cld = X(mkplan_f_d)( + plnr, + X(mkproblem_rdft2_d)( + X(mktensor_1d)(n, p->sz->dims[0].is, 2), + X(mktensor_1d)(nbuf, ivs, bufdist), + TAINT(p->r0, ivs * nbuf), TAINT(p->r1, ivs * nbuf), + bufs + roffset, bufs + ioffset, p->kind), + 0, 0, (p->r0 == p->cr) ? NO_DESTROY_INPUT : 0); + if (!cld) goto nada; + + /* copying back from the buffer is a rank-0 DFT: */ + cldcpy = X(mkplan_d)( + plnr, + X(mkproblem_dft_d)( + X(mktensor_0d)(), + X(mktensor_2d)(nbuf, bufdist, ovs, + n/2+1, 2, p->sz->dims[0].os), + bufs + roffset, bufs + ioffset, + TAINT(p->cr, ovs * nbuf), TAINT(p->ci, ovs * nbuf) )); + if (!cldcpy) goto nada; + + X(ifree)(bufs); bufs = 0; + + cldrest = X(mkplan_d)(plnr, + X(mkproblem_rdft2_d)( + X(tensor_copy)(p->sz), + X(mktensor_1d)(vl % nbuf, ivs, ovs), + p->r0 + id, p->r1 + id, + p->cr + od, p->ci + od, + p->kind)); + if (!cldrest) goto nada; + pln = MKPLAN_RDFT2(P, &padt, apply_r2hc); + } else { + /* allow destruction of buffer */ + cld = X(mkplan_f_d)( + plnr, + X(mkproblem_rdft2_d)( + X(mktensor_1d)(n, 2, p->sz->dims[0].os), + X(mktensor_1d)(nbuf, bufdist, ovs), + TAINT(p->r0, ovs * nbuf), TAINT(p->r1, ovs * nbuf), + bufs + roffset, bufs + ioffset, p->kind), + 0, 0, NO_DESTROY_INPUT); + if (!cld) goto nada; + + /* copying input into buffer is a rank-0 DFT: */ + cldcpy = X(mkplan_d)( + plnr, + X(mkproblem_dft_d)( + X(mktensor_0d)(), + X(mktensor_2d)(nbuf, ivs, bufdist, + n/2+1, p->sz->dims[0].is, 2), + TAINT(p->cr, ivs * nbuf), TAINT(p->ci, ivs * nbuf), + bufs + roffset, bufs + ioffset)); + if (!cldcpy) goto nada; + + X(ifree)(bufs); bufs = 0; + + cldrest = X(mkplan_d)(plnr, + X(mkproblem_rdft2_d)( + X(tensor_copy)(p->sz), + X(mktensor_1d)(vl % nbuf, ivs, ovs), + p->r0 + od, p->r1 + od, + p->cr + id, p->ci + id, + p->kind)); + if (!cldrest) goto nada; + + pln = MKPLAN_RDFT2(P, &padt, apply_hc2r); + } + + pln->cld = cld; + pln->cldcpy = cldcpy; + pln->cldrest = cldrest; + pln->n = n; + pln->vl = vl; + pln->ivs_by_nbuf = ivs * nbuf; + pln->ovs_by_nbuf = ovs * nbuf; + pln->roffset = roffset; + pln->ioffset = ioffset; + + pln->nbuf = nbuf; + pln->bufdist = bufdist; + + { + opcnt t; + X(ops_add)(&cld->ops, &cldcpy->ops, &t); + X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops); + } + + return &(pln->super.super); + + nada: + X(ifree0)(bufs); + X(plan_destroy_internal)(cldrest); + X(plan_destroy_internal)(cldcpy); + X(plan_destroy_internal)(cld); + return (plan *) 0; +} + +static solver *mksolver(int maxnbuf_ndx) +{ + static const solver_adt sadt = { PROBLEM_RDFT2, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->maxnbuf_ndx = maxnbuf_ndx; + return &(slv->super); +} + +void X(rdft2_buffered_register)(planner *p) +{ + size_t i; + for (i = 0; i < NELEM(maxnbufs); ++i) + REGISTER_SOLVER(p, mksolver(i)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/codelet-rdft.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/codelet-rdft.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* + * This header file must include every file or define every + * type or macro which is required to compile a codelet. + */ + +#ifndef __RDFT_CODELET_H__ +#define __RDFT_CODELET_H__ + +#include "ifftw.h" + +/************************************************************** + * types of codelets + **************************************************************/ + +/* FOOab, with a,b in {0,1}, denotes the FOO transform + where a/b say whether the input/output are shifted by + half a sample/slot. */ + +typedef enum { + R2HC00, R2HC01, R2HC10, R2HC11, + HC2R00, HC2R01, HC2R10, HC2R11, + DHT, + REDFT00, REDFT01, REDFT10, REDFT11, /* real-even == DCT's */ + RODFT00, RODFT01, RODFT10, RODFT11 /* real-odd == DST's */ +} rdft_kind; + +/* standard R2HC/HC2R transforms are unshifted */ +#define R2HC R2HC00 +#define HC2R HC2R00 + +#define R2HCII R2HC01 +#define HC2RIII HC2R10 + +/* (k) >= R2HC00 produces a warning under gcc because checking x >= 0 + is superfluous for unsigned values...but it is needed because other + compilers (e.g. icc) may define the enum to be a signed int...grrr. */ +#define R2HC_KINDP(k) ((k) >= R2HC00 && (k) <= R2HC11) /* uses kr2hc_genus */ +#define HC2R_KINDP(k) ((k) >= HC2R00 && (k) <= HC2R11) /* uses khc2r_genus */ + +#define R2R_KINDP(k) ((k) >= DHT) /* uses kr2r_genus */ + +#define REDFT_KINDP(k) ((k) >= REDFT00 && (k) <= REDFT11) +#define RODFT_KINDP(k) ((k) >= RODFT00 && (k) <= RODFT11) +#define REODFT_KINDP(k) ((k) >= REDFT00 && (k) <= RODFT11) + +/* codelets with real input (output) and complex output (input) */ +typedef struct kr2c_desc_s kr2c_desc; + +typedef struct { + rdft_kind kind; + INT vl; +} kr2c_genus; + +struct kr2c_desc_s { + INT n; /* size of transform computed */ + const char *nam; + opcnt ops; + const kr2c_genus *genus; +}; + +typedef void (*kr2c) (R *R0, R *R1, R *Cr, R *Ci, + stride rs, stride csr, stride csi, + INT vl, INT ivs, INT ovs); +void X(kr2c_register)(planner *p, kr2c codelet, const kr2c_desc *desc); + +/* half-complex to half-complex DIT/DIF codelets: */ +typedef struct hc2hc_desc_s hc2hc_desc; + +typedef struct { + rdft_kind kind; + INT vl; +} hc2hc_genus; + +struct hc2hc_desc_s { + INT radix; + const char *nam; + const tw_instr *tw; + const hc2hc_genus *genus; + opcnt ops; +}; + +typedef void (*khc2hc) (R *rioarray, R *iioarray, const R *W, + stride rs, INT mb, INT me, INT ms); +void X(khc2hc_register)(planner *p, khc2hc codelet, const hc2hc_desc *desc); + +/* half-complex to rdft2-complex DIT/DIF codelets: */ +typedef struct hc2c_desc_s hc2c_desc; + +typedef enum { + HC2C_VIA_RDFT, + HC2C_VIA_DFT +} hc2c_kind; + +typedef struct { + int (*okp)( + const R *Rp, const R *Ip, const R *Rm, const R *Im, + INT rs, INT mb, INT me, INT ms, + const planner *plnr); + rdft_kind kind; + INT vl; +} hc2c_genus; + +struct hc2c_desc_s { + INT radix; + const char *nam; + const tw_instr *tw; + const hc2c_genus *genus; + opcnt ops; +}; + +typedef void (*khc2c) (R *Rp, R *Ip, R *Rm, R *Im, const R *W, + stride rs, INT mb, INT me, INT ms); +void X(khc2c_register)(planner *p, khc2c codelet, const hc2c_desc *desc, + hc2c_kind hc2ckind); + +extern const solvtab X(solvtab_rdft_r2cf); +extern const solvtab X(solvtab_rdft_r2cb); +extern const solvtab X(solvtab_rdft_sse2); +extern const solvtab X(solvtab_rdft_avx); +extern const solvtab X(solvtab_rdft_altivec); +extern const solvtab X(solvtab_rdft_neon); + +/* real-input & output DFT-like codelets (DHT, etc.) */ +typedef struct kr2r_desc_s kr2r_desc; + +typedef struct { + INT vl; +} kr2r_genus; + +struct kr2r_desc_s { + INT n; /* size of transform computed */ + const char *nam; + opcnt ops; + const kr2r_genus *genus; + rdft_kind kind; +}; + +typedef void (*kr2r) (const R *I, R *O, stride is, stride os, + INT vl, INT ivs, INT ovs); +void X(kr2r_register)(planner *p, kr2r codelet, const kr2r_desc *desc); + +extern const solvtab X(solvtab_rdft_r2r); + +#endif /* __RDFT_CODELET_H__ */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/conf.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/conf.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "rdft.h" + +static const solvtab s = +{ + SOLVTAB(X(rdft_indirect_register)), + SOLVTAB(X(rdft_rank0_register)), + SOLVTAB(X(rdft_vrank3_transpose_register)), + SOLVTAB(X(rdft_vrank_geq1_register)), + + SOLVTAB(X(rdft_nop_register)), + SOLVTAB(X(rdft_buffered_register)), + SOLVTAB(X(rdft_generic_register)), + SOLVTAB(X(rdft_rank_geq2_register)), + + SOLVTAB(X(dft_r2hc_register)), + + SOLVTAB(X(rdft_dht_register)), + SOLVTAB(X(dht_r2hc_register)), + SOLVTAB(X(dht_rader_register)), + + SOLVTAB(X(rdft2_vrank_geq1_register)), + SOLVTAB(X(rdft2_nop_register)), + SOLVTAB(X(rdft2_rank0_register)), + SOLVTAB(X(rdft2_buffered_register)), + SOLVTAB(X(rdft2_rank_geq2_register)), + SOLVTAB(X(rdft2_rdft_register)), + + SOLVTAB(X(hc2hc_generic_register)), + + SOLVTAB_END +}; + +void X(rdft_conf_standard)(planner *p) +{ + X(solvtab_exec)(s, p); + X(solvtab_exec)(X(solvtab_rdft_r2cf), p); + X(solvtab_exec)(X(solvtab_rdft_r2cb), p); + X(solvtab_exec)(X(solvtab_rdft_r2r), p); + +#if HAVE_SSE2 + if (X(have_simd_sse2)()) + X(solvtab_exec)(X(solvtab_rdft_sse2), p); +#endif +#if HAVE_AVX + if (X(have_simd_avx)()) + X(solvtab_exec)(X(solvtab_rdft_avx), p); +#endif +#if HAVE_ALTIVEC + if (X(have_simd_altivec)()) + X(solvtab_exec)(X(solvtab_rdft_altivec), p); +#endif +#if HAVE_NEON + if (X(have_simd_neon)()) + X(solvtab_exec)(X(solvtab_rdft_neon), p); +#endif +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/ct-hc2c-direct.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/ct-hc2c-direct.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,392 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ct-hc2c.h" + +typedef struct { + hc2c_solver super; + const hc2c_desc *desc; + int bufferedp; + khc2c k; +} S; + +typedef struct { + plan_hc2c super; + khc2c k; + plan *cld0, *cldm; /* children for 0th and middle butterflies */ + INT r, m, v, extra_iter; + INT ms, vs; + stride rs, brs; + twid *td; + const S *slv; +} P; + +/************************************************************* + Nonbuffered code + *************************************************************/ +static void apply(const plan *ego_, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + plan_rdft2 *cld0 = (plan_rdft2 *) ego->cld0; + plan_rdft2 *cldm = (plan_rdft2 *) ego->cldm; + INT i, m = ego->m, v = ego->v; + INT ms = ego->ms, vs = ego->vs; + + for (i = 0; i < v; ++i, cr += vs, ci += vs) { + cld0->apply((plan *) cld0, cr, ci, cr, ci); + ego->k(cr + ms, ci + ms, cr + (m-1)*ms, ci + (m-1)*ms, + ego->td->W, ego->rs, 1, (m+1)/2, ms); + cldm->apply((plan *) cldm, cr + (m/2)*ms, ci + (m/2)*ms, + cr + (m/2)*ms, ci + (m/2)*ms); + } +} + +static void apply_extra_iter(const plan *ego_, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + plan_rdft2 *cld0 = (plan_rdft2 *) ego->cld0; + plan_rdft2 *cldm = (plan_rdft2 *) ego->cldm; + INT i, m = ego->m, v = ego->v; + INT ms = ego->ms, vs = ego->vs; + INT mm = (m-1)/2; + + for (i = 0; i < v; ++i, cr += vs, ci += vs) { + cld0->apply((plan *) cld0, cr, ci, cr, ci); + + /* for 4-way SIMD when (m+1)/2-1 is odd: iterate over an + even vector length MM-1, and then execute the last + iteration as a 2-vector with vector stride 0. The + twiddle factors of the second half of the last iteration + are bogus, but we only store the results of the first + half. */ + ego->k(cr + ms, ci + ms, cr + (m-1)*ms, ci + (m-1)*ms, + ego->td->W, ego->rs, 1, mm, ms); + ego->k(cr + mm*ms, ci + mm*ms, cr + (m-mm)*ms, ci + (m-mm)*ms, + ego->td->W, ego->rs, mm, mm+2, 0); + cldm->apply((plan *) cldm, cr + (m/2)*ms, ci + (m/2)*ms, + cr + (m/2)*ms, ci + (m/2)*ms); + } + +} + +/************************************************************* + Buffered code + *************************************************************/ + +/* should not be 2^k to avoid associativity conflicts */ +static INT compute_batchsize(INT radix) +{ + /* round up to multiple of 4 */ + radix += 3; + radix &= -4; + + return (radix + 2); +} + +static void dobatch(const P *ego, R *Rp, R *Ip, R *Rm, R *Im, + INT mb, INT me, INT extra_iter, R *bufp) +{ + INT b = WS(ego->brs, 1); + INT rs = WS(ego->rs, 1); + INT ms = ego->ms; + R *bufm = bufp + b - 2; + + X(cpy2d_pair_ci)(Rp + mb * ms, Ip + mb * ms, bufp, bufp + 1, + ego->r / 2, rs, b, + me - mb, ms, 2); + X(cpy2d_pair_ci)(Rm - mb * ms, Im - mb * ms, bufm, bufm + 1, + ego->r / 2, rs, b, + me - mb, -ms, -2); + ego->k(bufp, bufp + 1, bufm, bufm + 1, ego->td->W, + ego->brs, mb, me + extra_iter, 2); + X(cpy2d_pair_co)(bufp, bufp + 1, Rp + mb * ms, Ip + mb * ms, + ego->r / 2, b, rs, + me - mb, 2, ms); + X(cpy2d_pair_co)(bufm, bufm + 1, Rm - mb * ms, Im - mb * ms, + ego->r / 2, b, rs, + me - mb, -2, -ms); +} + +static void apply_buf(const plan *ego_, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + plan_rdft2 *cld0 = (plan_rdft2 *) ego->cld0; + plan_rdft2 *cldm = (plan_rdft2 *) ego->cldm; + INT i, j, ms = ego->ms, v = ego->v; + INT batchsz = compute_batchsize(ego->r); + R *buf; + INT mb = 1, me = (ego->m+1) / 2; + size_t bufsz = ego->r * batchsz * 2 * sizeof(R); + + BUF_ALLOC(R *, buf, bufsz); + + for (i = 0; i < v; ++i, cr += ego->vs, ci += ego->vs) { + R *Rp = cr; + R *Ip = ci; + R *Rm = cr + ego->m * ms; + R *Im = ci + ego->m * ms; + + cld0->apply((plan *) cld0, Rp, Ip, Rp, Ip); + + for (j = mb; j + batchsz < me; j += batchsz) + dobatch(ego, Rp, Ip, Rm, Im, j, j + batchsz, 0, buf); + + dobatch(ego, Rp, Ip, Rm, Im, j, me, ego->extra_iter, buf); + + cldm->apply((plan *) cldm, + Rp + me * ms, Ip + me * ms, + Rp + me * ms, Ip + me * ms); + + } + + BUF_FREE(buf, bufsz); +} + +/************************************************************* + common code + *************************************************************/ +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + + X(plan_awake)(ego->cld0, wakefulness); + X(plan_awake)(ego->cldm, wakefulness); + X(twiddle_awake)(wakefulness, &ego->td, ego->slv->desc->tw, + ego->r * ego->m, ego->r, + (ego->m - 1) / 2 + ego->extra_iter); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld0); + X(plan_destroy_internal)(ego->cldm); + X(stride_destroy)(ego->rs); + X(stride_destroy)(ego->brs); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *slv = ego->slv; + const hc2c_desc *e = slv->desc; + + if (slv->bufferedp) + p->print(p, "(hc2c-directbuf/%D-%D/%D/%D%v \"%s\"%(%p%)%(%p%))", + compute_batchsize(ego->r), + ego->r, X(twiddle_length)(ego->r, e->tw), + ego->extra_iter, ego->v, e->nam, + ego->cld0, ego->cldm); + else + p->print(p, "(hc2c-direct-%D/%D/%D%v \"%s\"%(%p%)%(%p%))", + ego->r, X(twiddle_length)(ego->r, e->tw), + ego->extra_iter, ego->v, e->nam, + ego->cld0, ego->cldm); +} + +static int applicable0(const S *ego, rdft_kind kind, + INT r, INT rs, + INT m, INT ms, + INT v, INT vs, + const R *cr, const R *ci, + const planner *plnr, + INT *extra_iter) +{ + const hc2c_desc *e = ego->desc; + UNUSED(v); + + return ( + 1 + && r == e->radix + && kind == e->genus->kind + + /* first v-loop iteration */ + && ((*extra_iter = 0, + e->genus->okp(cr + ms, ci + ms, cr + (m-1)*ms, ci + (m-1)*ms, + rs, 1, (m+1)/2, ms, plnr)) + || + (*extra_iter = 1, + ((e->genus->okp(cr + ms, ci + ms, cr + (m-1)*ms, ci + (m-1)*ms, + rs, 1, (m-1)/2, ms, plnr)) + && + (e->genus->okp(cr + ms, ci + ms, cr + (m-1)*ms, ci + (m-1)*ms, + rs, (m-1)/2, (m-1)/2 + 2, 0, plnr))))) + + /* subsequent v-loop iterations */ + && (cr += vs, ci += vs, 1) + + && e->genus->okp(cr + ms, ci + ms, cr + (m-1)*ms, ci + (m-1)*ms, + rs, 1, (m+1)/2 - *extra_iter, ms, plnr) + ); +} + +static int applicable0_buf(const S *ego, rdft_kind kind, + INT r, INT rs, + INT m, INT ms, + INT v, INT vs, + const R *cr, const R *ci, + const planner *plnr, INT *extra_iter) +{ + const hc2c_desc *e = ego->desc; + INT batchsz, brs; + UNUSED(v); UNUSED(rs); UNUSED(ms); UNUSED(vs); + + return ( + 1 + && r == e->radix + && kind == e->genus->kind + + /* ignore cr, ci, use buffer */ + && (cr = (const R *)0, ci = cr + 1, + batchsz = compute_batchsize(r), + brs = 4 * batchsz, 1) + + && e->genus->okp(cr, ci, cr + brs - 2, ci + brs - 2, + brs, 1, 1+batchsz, 2, plnr) + + && ((*extra_iter = 0, + e->genus->okp(cr, ci, cr + brs - 2, ci + brs - 2, + brs, 1, 1 + (((m-1)/2) % batchsz), 2, plnr)) + || + (*extra_iter = 1, + e->genus->okp(cr, ci, cr + brs - 2, ci + brs - 2, + brs, 1, 1 + 1 + (((m-1)/2) % batchsz), 2, plnr))) + + ); +} + +static int applicable(const S *ego, rdft_kind kind, + INT r, INT rs, + INT m, INT ms, + INT v, INT vs, + R *cr, R *ci, + const planner *plnr, INT *extra_iter) +{ + if (ego->bufferedp) { + if (!applicable0_buf(ego, kind, r, rs, m, ms, v, vs, cr, ci, plnr, + extra_iter)) + return 0; + } else { + if (!applicable0(ego, kind, r, rs, m, ms, v, vs, cr, ci, plnr, + extra_iter)) + return 0; + } + + if (NO_UGLYP(plnr) && X(ct_uglyp)((ego->bufferedp? (INT)512 : (INT)16), + v, m * r, r)) + return 0; + + return 1; +} + +static plan *mkcldw(const hc2c_solver *ego_, rdft_kind kind, + INT r, INT rs, + INT m, INT ms, + INT v, INT vs, + R *cr, R *ci, + planner *plnr) +{ + const S *ego = (const S *) ego_; + P *pln; + const hc2c_desc *e = ego->desc; + plan *cld0 = 0, *cldm = 0; + INT imid = (m / 2) * ms; + INT extra_iter; + + static const plan_adt padt = { + 0, awake, print, destroy + }; + + if (!applicable(ego, kind, r, rs, m, ms, v, vs, cr, ci, plnr, + &extra_iter)) + return (plan *)0; + + cld0 = X(mkplan_d)( + plnr, + X(mkproblem_rdft2_d)(X(mktensor_1d)(r, rs, rs), + X(mktensor_0d)(), + TAINT(cr, vs), TAINT(ci, vs), + TAINT(cr, vs), TAINT(ci, vs), + kind)); + if (!cld0) goto nada; + + cldm = X(mkplan_d)( + plnr, + X(mkproblem_rdft2_d)(((m % 2) ? + X(mktensor_0d)() : X(mktensor_1d)(r, rs, rs) ), + X(mktensor_0d)(), + TAINT(cr + imid, vs), TAINT(ci + imid, vs), + TAINT(cr + imid, vs), TAINT(ci + imid, vs), + kind == R2HC ? R2HCII : HC2RIII)); + if (!cldm) goto nada; + + if (ego->bufferedp) + pln = MKPLAN_HC2C(P, &padt, apply_buf); + else + pln = MKPLAN_HC2C(P, &padt, extra_iter ? apply_extra_iter : apply); + + pln->k = ego->k; + pln->td = 0; + pln->r = r; pln->rs = X(mkstride)(r, rs); + pln->m = m; pln->ms = ms; + pln->v = v; pln->vs = vs; + pln->slv = ego; + pln->brs = X(mkstride)(r, 4 * compute_batchsize(r)); + pln->cld0 = cld0; + pln->cldm = cldm; + pln->extra_iter = extra_iter; + + X(ops_zero)(&pln->super.super.ops); + X(ops_madd2)(v * (((m - 1) / 2) / e->genus->vl), + &e->ops, &pln->super.super.ops); + X(ops_madd2)(v, &cld0->ops, &pln->super.super.ops); + X(ops_madd2)(v, &cldm->ops, &pln->super.super.ops); + + if (ego->bufferedp) + pln->super.super.ops.other += 4 * r * m * v; + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cld0); + X(plan_destroy_internal)(cldm); + return 0; +} + +static void regone(planner *plnr, khc2c codelet, + const hc2c_desc *desc, + hc2c_kind hc2ckind, + int bufferedp) +{ + S *slv = (S *)X(mksolver_hc2c)(sizeof(S), desc->radix, hc2ckind, mkcldw); + slv->k = codelet; + slv->desc = desc; + slv->bufferedp = bufferedp; + REGISTER_SOLVER(plnr, &(slv->super.super)); +} + +void X(regsolver_hc2c_direct)(planner *plnr, khc2c codelet, + const hc2c_desc *desc, + hc2c_kind hc2ckind) +{ + regone(plnr, codelet, desc, hc2ckind, /* bufferedp */0); + regone(plnr, codelet, desc, hc2ckind, /* bufferedp */1); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/ct-hc2c.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/ct-hc2c.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ct-hc2c.h" +#include "dft.h" + +typedef struct { + plan_rdft2 super; + plan *cld; + plan *cldw; + INT r; +} P; + +static void apply_dit(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld; + plan_hc2c *cldw; + UNUSED(r1); + + cld = (plan_rdft *) ego->cld; + cld->apply(ego->cld, r0, cr); + + cldw = (plan_hc2c *) ego->cldw; + cldw->apply(ego->cldw, cr, ci); +} + +static void apply_dif(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld; + plan_hc2c *cldw; + UNUSED(r1); + + cldw = (plan_hc2c *) ego->cldw; + cldw->apply(ego->cldw, cr, ci); + + cld = (plan_rdft *) ego->cld; + cld->apply(ego->cld, cr, r0); +} + +static void apply_dit_dft(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + plan_dft *cld; + plan_hc2c *cldw; + + cld = (plan_dft *) ego->cld; + cld->apply(ego->cld, r0, r1, cr, ci); + + cldw = (plan_hc2c *) ego->cldw; + cldw->apply(ego->cldw, cr, ci); +} + +static void apply_dif_dft(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + plan_dft *cld; + plan_hc2c *cldw; + + cldw = (plan_hc2c *) ego->cldw; + cldw->apply(ego->cldw, cr, ci); + + cld = (plan_dft *) ego->cld; + cld->apply(ego->cld, ci, cr, r1, r0); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld, wakefulness); + X(plan_awake)(ego->cldw, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cldw); + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(rdft2-ct-%s/%D%(%p%)%(%p%))", + (ego->super.apply == apply_dit || + ego->super.apply == apply_dit_dft) + ? "dit" : "dif", + ego->r, ego->cldw, ego->cld); +} + +static int applicable0(const hc2c_solver *ego, const problem *p_, planner *plnr) +{ + const problem_rdft2 *p = (const problem_rdft2 *) p_; + INT r; + + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk <= 1 + + && (/* either the problem is R2HC, which is solved by DIT */ + (p->kind == R2HC) + || + /* or the problem is HC2R, in which case it is solved + by DIF, which destroys the input */ + (p->kind == HC2R && + (p->r0 == p->cr || !NO_DESTROY_INPUTP(plnr)))) + + && ((r = X(choose_radix)(ego->r, p->sz->dims[0].n)) > 0) + && p->sz->dims[0].n > r); +} + +int X(hc2c_applicable)(const hc2c_solver *ego, const problem *p_, + planner *plnr) +{ + const problem_rdft2 *p; + + if (!applicable0(ego, p_, plnr)) + return 0; + + p = (const problem_rdft2 *) p_; + + return (0 + || p->vecsz->rnk == 0 + || !NO_VRECURSEP(plnr) + ); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const hc2c_solver *ego = (const hc2c_solver *) ego_; + const problem_rdft2 *p; + P *pln = 0; + plan *cld = 0, *cldw = 0; + INT n, r, m, v, ivs, ovs; + iodim *d; + + static const plan_adt padt = { + X(rdft2_solve), awake, print, destroy + }; + + if (!X(hc2c_applicable)(ego, p_, plnr)) + return (plan *) 0; + + p = (const problem_rdft2 *) p_; + d = p->sz->dims; + n = d[0].n; + r = X(choose_radix)(ego->r, n); + A((r % 2) == 0); + m = n / r; + + X(tensor_tornk1)(p->vecsz, &v, &ivs, &ovs); + + switch (p->kind) { + case R2HC: + cldw = ego->mkcldw(ego, R2HC, + r, m * d[0].os, + m, d[0].os, + v, ovs, + p->cr, p->ci, plnr); + if (!cldw) goto nada; + + switch (ego->hc2ckind) { + case HC2C_VIA_RDFT: + cld = X(mkplan_d)( + plnr, + X(mkproblem_rdft_1_d)( + X(mktensor_1d)(m, (r/2)*d[0].is, d[0].os), + X(mktensor_3d)( + 2, p->r1 - p->r0, p->ci - p->cr, + r / 2, d[0].is, m * d[0].os, + v, ivs, ovs), + p->r0, p->cr, R2HC) + ); + if (!cld) goto nada; + + pln = MKPLAN_RDFT2(P, &padt, apply_dit); + break; + + case HC2C_VIA_DFT: + cld = X(mkplan_d)( + plnr, + X(mkproblem_dft_d)( + X(mktensor_1d)(m, (r/2)*d[0].is, d[0].os), + X(mktensor_2d)( + r / 2, d[0].is, m * d[0].os, + v, ivs, ovs), + p->r0, p->r1, p->cr, p->ci) + ); + if (!cld) goto nada; + + pln = MKPLAN_RDFT2(P, &padt, apply_dit_dft); + break; + } + break; + + case HC2R: + cldw = ego->mkcldw(ego, HC2R, + r, m * d[0].is, + m, d[0].is, + v, ivs, + p->cr, p->ci, plnr); + if (!cldw) goto nada; + + switch (ego->hc2ckind) { + case HC2C_VIA_RDFT: + cld = X(mkplan_d)( + plnr, + X(mkproblem_rdft_1_d)( + X(mktensor_1d)(m, d[0].is, (r/2)*d[0].os), + X(mktensor_3d)( + 2, p->ci - p->cr, p->r1 - p->r0, + r / 2, m * d[0].is, d[0].os, + v, ivs, ovs), + p->cr, p->r0, HC2R) + ); + if (!cld) goto nada; + + pln = MKPLAN_RDFT2(P, &padt, apply_dif); + break; + + case HC2C_VIA_DFT: + cld = X(mkplan_d)( + plnr, + X(mkproblem_dft_d)( + X(mktensor_1d)(m, d[0].is, (r/2)*d[0].os), + X(mktensor_2d)( + r / 2, m * d[0].is, d[0].os, + v, ivs, ovs), + p->ci, p->cr, p->r1, p->r0) + ); + if (!cld) goto nada; + + pln = MKPLAN_RDFT2(P, &padt, apply_dif_dft); + break; + } + break; + + default: + A(0); + } + + pln->cld = cld; + pln->cldw = cldw; + pln->r = r; + X(ops_add)(&cld->ops, &cldw->ops, &pln->super.super.ops); + + /* inherit could_prune_now_p attribute from cldw */ + pln->super.super.could_prune_now_p = cldw->could_prune_now_p; + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cldw); + X(plan_destroy_internal)(cld); + return (plan *) 0; +} + +hc2c_solver *X(mksolver_hc2c)(size_t size, INT r, + hc2c_kind hc2ckind, + hc2c_mkinferior mkcldw) +{ + static const solver_adt sadt = { PROBLEM_RDFT2, mkplan, 0 }; + hc2c_solver *slv = (hc2c_solver *)X(mksolver)(size, &sadt); + slv->r = r; + slv->hc2ckind = hc2ckind; + slv->mkcldw = mkcldw; + return slv; +} + +plan *X(mkplan_hc2c)(size_t size, const plan_adt *adt, hc2capply apply) +{ + plan_hc2c *ego; + + ego = (plan_hc2c *) X(mkplan)(size, adt); + ego->apply = apply; + + return &(ego->super); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/ct-hc2c.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/ct-hc2c.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "rdft.h" + +typedef void (*hc2capply) (const plan *ego, R *cr, R *ci); +typedef struct hc2c_solver_s hc2c_solver; +typedef plan *(*hc2c_mkinferior)(const hc2c_solver *ego, rdft_kind kind, + INT r, INT rs, + INT m, INT ms, + INT v, INT vs, + R *cr, R *ci, + planner *plnr); + +typedef struct { + plan super; + hc2capply apply; +} plan_hc2c; + +extern plan *X(mkplan_hc2c)(size_t size, const plan_adt *adt, + hc2capply apply); + +#define MKPLAN_HC2C(type, adt, apply) \ + (type *)X(mkplan_hc2c)(sizeof(type), adt, apply) + +struct hc2c_solver_s { + solver super; + INT r; + + hc2c_mkinferior mkcldw; + hc2c_kind hc2ckind; +}; + +hc2c_solver *X(mksolver_hc2c)(size_t size, INT r, + hc2c_kind hc2ckind, + hc2c_mkinferior mkcldw); + +void X(regsolver_hc2c_direct)(planner *plnr, khc2c codelet, + const hc2c_desc *desc, + hc2c_kind hc2ckind); diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/dft-r2hc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/dft-r2hc.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* Compute the complex DFT by combining R2HC RDFTs on the real + and imaginary parts. This could be useful for people just wanting + to link to the real codelets and not the complex ones. It could + also even be faster than the complex algorithms for split (as opposed + to interleaved) real/imag complex data. */ + +#include "rdft.h" +#include "dft.h" + +typedef struct { + solver super; +} S; + +typedef struct { + plan_dft super; + plan *cld; + INT ishift, oshift; + INT os; + INT n; +} P; + +static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + INT n; + + UNUSED(ii); + + { /* transform vector of real & imag parts: */ + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, ri + ego->ishift, ro + ego->oshift); + } + + n = ego->n; + if (n > 1) { + INT i, os = ego->os; + for (i = 1; i < (n + 1)/2; ++i) { + E rop, iop, iom, rom; + rop = ro[os * i]; + iop = io[os * i]; + rom = ro[os * (n - i)]; + iom = io[os * (n - i)]; + ro[os * i] = rop - iom; + io[os * i] = iop + rom; + ro[os * (n - i)] = rop + iom; + io[os * (n - i)] = iop - rom; + } + } +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(dft-r2hc-%D%(%p%))", ego->n, ego->cld); +} + + +static int applicable0(const problem *p_) +{ + const problem_dft *p = (const problem_dft *) p_; + return ((p->sz->rnk == 1 && p->vecsz->rnk == 0) + || (p->sz->rnk == 0 && FINITE_RNK(p->vecsz->rnk)) + ); +} + +static int splitp(R *r, R *i, INT n, INT s) +{ + return ((r > i ? (r - i) : (i - r)) >= n * (s > 0 ? s : 0-s)); +} + +static int applicable(const problem *p_, const planner *plnr) +{ + if (!applicable0(p_)) return 0; + + { + const problem_dft *p = (const problem_dft *) p_; + + /* rank-0 problems are always OK */ + if (p->sz->rnk == 0) return 1; + + /* this solver is ok for split arrays */ + if (p->sz->rnk == 1 && + splitp(p->ri, p->ii, p->sz->dims[0].n, p->sz->dims[0].is) && + splitp(p->ro, p->io, p->sz->dims[0].n, p->sz->dims[0].os)) + return 1; + + return !(NO_DFT_R2HCP(plnr)); + } +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + P *pln; + const problem_dft *p; + plan *cld; + INT ishift = 0, oshift = 0; + + static const plan_adt padt = { + X(dft_solve), awake, print, destroy + }; + + UNUSED(ego_); + if (!applicable(p_, plnr)) + return (plan *)0; + + p = (const problem_dft *) p_; + + { + tensor *ri_vec = X(mktensor_1d)(2, p->ii - p->ri, p->io - p->ro); + tensor *cld_vec = X(tensor_append)(ri_vec, p->vecsz); + int i; + for (i = 0; i < cld_vec->rnk; ++i) { /* make all istrides > 0 */ + if (cld_vec->dims[i].is < 0) { + INT nm1 = cld_vec->dims[i].n - 1; + ishift -= nm1 * (cld_vec->dims[i].is *= -1); + oshift -= nm1 * (cld_vec->dims[i].os *= -1); + } + } + cld = X(mkplan_d)(plnr, + X(mkproblem_rdft_1)(p->sz, cld_vec, + p->ri + ishift, + p->ro + oshift, R2HC)); + X(tensor_destroy2)(ri_vec, cld_vec); + } + if (!cld) return (plan *)0; + + pln = MKPLAN_DFT(P, &padt, apply); + + if (p->sz->rnk == 0) { + pln->n = 1; + pln->os = 0; + } + else { + pln->n = p->sz->dims[0].n; + pln->os = p->sz->dims[0].os; + } + pln->ishift = ishift; + pln->oshift = oshift; + + pln->cld = cld; + + pln->super.super.ops = cld->ops; + pln->super.super.ops.other += 8 * ((pln->n - 1)/2); + pln->super.super.ops.add += 4 * ((pln->n - 1)/2); + pln->super.super.ops.other += 1; /* estimator hack for nop plans */ + + return &(pln->super.super); +} + +/* constructor */ +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return &(slv->super); +} + +void X(dft_r2hc_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/dht-r2hc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/dht-r2hc.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* Solve a DHT problem (Discrete Hartley Transform) via post-processing + of an R2HC problem. */ + +#include "rdft.h" + +typedef struct { + solver super; +} S; + +typedef struct { + plan_rdft super; + plan *cld; + INT os; + INT n; +} P; + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT os = ego->os; + INT i, n = ego->n; + + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, I, O); + } + + for (i = 1; i < n - i; ++i) { + E a, b; + a = O[os * i]; + b = O[os * (n - i)]; +#if FFT_SIGN == -1 + O[os * i] = a - b; + O[os * (n - i)] = a + b; +#else + O[os * i] = a + b; + O[os * (n - i)] = a - b; +#endif + } +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(dht-r2hc-%D%(%p%))", ego->n, ego->cld); +} + +static int applicable0(const problem *p_, const planner *plnr) +{ + const problem_rdft *p = (const problem_rdft *) p_; + return (1 + && !NO_DHT_R2HCP(plnr) + && p->sz->rnk == 1 + && p->vecsz->rnk == 0 + && p->kind[0] == DHT + ); +} + +static int applicable(const solver *ego, const problem *p, const planner *plnr) +{ + UNUSED(ego); + return (!NO_SLOWP(plnr) && applicable0(p, plnr)); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + P *pln; + const problem_rdft *p; + plan *cld; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr)) + return (plan *)0; + + p = (const problem_rdft *) p_; + + /* NO_DHT_R2HC stops infinite loops with rdft-dht.c */ + cld = X(mkplan_f_d)(plnr, + X(mkproblem_rdft_1)(p->sz, p->vecsz, + p->I, p->O, R2HC), + NO_DHT_R2HC, 0, 0); + if (!cld) return (plan *)0; + + pln = MKPLAN_RDFT(P, &padt, apply); + + pln->n = p->sz->dims[0].n; + pln->os = p->sz->dims[0].os; + pln->cld = cld; + + pln->super.super.ops = cld->ops; + pln->super.super.ops.other += 4 * ((pln->n - 1)/2); + pln->super.super.ops.add += 2 * ((pln->n - 1)/2); + + return &(pln->super.super); +} + +/* constructor */ +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return &(slv->super); +} + +void X(dht_r2hc_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/dht-rader.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/dht-rader.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,386 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "rdft.h" + +/* + * Compute DHTs of prime sizes using Rader's trick: turn them + * into convolutions of size n - 1, which we then perform via a pair + * of FFTs. (We can then do prime real FFTs via rdft-dht.c.) + * + * Optionally (determined by the "pad" field of the solver), we can + * perform the (cyclic) convolution by zero-padding to a size + * >= 2*(n-1) - 1. This is advantageous if n-1 has large prime factors. + * + */ + +typedef struct { + solver super; + int pad; +} S; + +typedef struct { + plan_rdft super; + + plan *cld1, *cld2; + R *omega; + INT n, npad, g, ginv; + INT is, os; + plan *cld_omega; +} P; + +static rader_tl *omegas = 0; + +/***************************************************************************/ + +/* If R2HC_ONLY_CONV is 1, we use a trick to perform the convolution + purely in terms of R2HC transforms, as opposed to R2HC followed by H2RC. + This requires a few more operations, but allows us to share the same + plan/codelets for both Rader children. */ +#define R2HC_ONLY_CONV 1 + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT n = ego->n; /* prime */ + INT npad = ego->npad; /* == n - 1 for unpadded Rader; always even */ + INT is = ego->is, os; + INT k, gpower, g; + R *buf, *omega; + R r0; + + buf = (R *) MALLOC(sizeof(R) * npad, BUFFERS); + + /* First, permute the input, storing in buf: */ + g = ego->g; + for (gpower = 1, k = 0; k < n - 1; ++k, gpower = MULMOD(gpower, g, n)) { + buf[k] = I[gpower * is]; + } + /* gpower == g^(n-1) mod n == 1 */; + + A(n - 1 <= npad); + for (k = n - 1; k < npad; ++k) /* optionally, zero-pad convolution */ + buf[k] = 0; + + os = ego->os; + + /* compute RDFT of buf, storing in buf (i.e., in-place): */ + { + plan_rdft *cld = (plan_rdft *) ego->cld1; + cld->apply((plan *) cld, buf, buf); + } + + /* set output DC component: */ + O[0] = (r0 = I[0]) + buf[0]; + + /* now, multiply by omega: */ + omega = ego->omega; + buf[0] *= omega[0]; + for (k = 1; k < npad/2; ++k) { + E rB, iB, rW, iW, a, b; + rW = omega[k]; + iW = omega[npad - k]; + rB = buf[k]; + iB = buf[npad - k]; + a = rW * rB - iW * iB; + b = rW * iB + iW * rB; +#if R2HC_ONLY_CONV + buf[k] = a + b; + buf[npad - k] = a - b; +#else + buf[k] = a; + buf[npad - k] = b; +#endif + } + /* Nyquist component: */ + A(k + k == npad); /* since npad is even */ + buf[k] *= omega[k]; + + /* this will add input[0] to all of the outputs after the ifft */ + buf[0] += r0; + + /* inverse FFT: */ + { + plan_rdft *cld = (plan_rdft *) ego->cld2; + cld->apply((plan *) cld, buf, buf); + } + + /* do inverse permutation to unshuffle the output: */ + A(gpower == 1); +#if R2HC_ONLY_CONV + O[os] = buf[0]; + gpower = g = ego->ginv; + A(npad == n - 1 || npad/2 >= n - 1); + if (npad == n - 1) { + for (k = 1; k < npad/2; ++k, gpower = MULMOD(gpower, g, n)) { + O[gpower * os] = buf[k] + buf[npad - k]; + } + O[gpower * os] = buf[k]; + ++k, gpower = MULMOD(gpower, g, n); + for (; k < npad; ++k, gpower = MULMOD(gpower, g, n)) { + O[gpower * os] = buf[npad - k] - buf[k]; + } + } + else { + for (k = 1; k < n - 1; ++k, gpower = MULMOD(gpower, g, n)) { + O[gpower * os] = buf[k] + buf[npad - k]; + } + } +#else + g = ego->ginv; + for (k = 0; k < n - 1; ++k, gpower = MULMOD(gpower, g, n)) { + O[gpower * os] = buf[k]; + } +#endif + A(gpower == 1); + + X(ifree)(buf); +} + +static R *mkomega(enum wakefulness wakefulness, + plan *p_, INT n, INT npad, INT ginv) +{ + plan_rdft *p = (plan_rdft *) p_; + R *omega; + INT i, gpower; + trigreal scale; + triggen *t; + + if ((omega = X(rader_tl_find)(n, npad + 1, ginv, omegas))) + return omega; + + omega = (R *)MALLOC(sizeof(R) * npad, TWIDDLES); + + scale = npad; /* normalization for convolution */ + + t = X(mktriggen)(wakefulness, n); + for (i = 0, gpower = 1; i < n-1; ++i, gpower = MULMOD(gpower, ginv, n)) { + trigreal w[2]; + t->cexpl(t, gpower, w); + omega[i] = (w[0] + w[1]) / scale; + } + X(triggen_destroy)(t); + A(gpower == 1); + + A(npad == n - 1 || npad >= 2*(n - 1) - 1); + + for (; i < npad; ++i) + omega[i] = K(0.0); + if (npad > n - 1) + for (i = 1; i < n-1; ++i) + omega[npad - i] = omega[n - 1 - i]; + + p->apply(p_, omega, omega); + + X(rader_tl_insert)(n, npad + 1, ginv, omega, &omegas); + return omega; +} + +static void free_omega(R *omega) +{ + X(rader_tl_delete)(omega, &omegas); +} + +/***************************************************************************/ + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + + X(plan_awake)(ego->cld1, wakefulness); + X(plan_awake)(ego->cld2, wakefulness); + X(plan_awake)(ego->cld_omega, wakefulness); + + switch (wakefulness) { + case SLEEPY: + free_omega(ego->omega); + ego->omega = 0; + break; + default: + ego->g = X(find_generator)(ego->n); + ego->ginv = X(power_mod)(ego->g, ego->n - 2, ego->n); + A(MULMOD(ego->g, ego->ginv, ego->n) == 1); + + A(!ego->omega); + ego->omega = mkomega(wakefulness, + ego->cld_omega,ego->n,ego->npad,ego->ginv); + break; + } +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld_omega); + X(plan_destroy_internal)(ego->cld2); + X(plan_destroy_internal)(ego->cld1); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + + p->print(p, "(dht-rader-%D/%D%ois=%oos=%(%p%)", + ego->n, ego->npad, ego->is, ego->os, ego->cld1); + if (ego->cld2 != ego->cld1) + p->print(p, "%(%p%)", ego->cld2); + if (ego->cld_omega != ego->cld1 && ego->cld_omega != ego->cld2) + p->print(p, "%(%p%)", ego->cld_omega); + p->putchr(p, ')'); +} + +static int applicable(const solver *ego, const problem *p_, const planner *plnr) +{ + const problem_rdft *p = (const problem_rdft *) p_; + UNUSED(ego); + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk == 0 + && p->kind[0] == DHT + && X(is_prime)(p->sz->dims[0].n) + && p->sz->dims[0].n > 2 + && CIMPLIES(NO_SLOWP(plnr), p->sz->dims[0].n > RADER_MAX_SLOW) + /* proclaim the solver SLOW if p-1 is not easily + factorizable. Unlike in the complex case where + Bluestein can solve the problem, in the DHT case we + may have no other choice */ + && CIMPLIES(NO_SLOWP(plnr), X(factors_into_small_primes)(p->sz->dims[0].n - 1)) + ); +} + +static INT choose_transform_size(INT minsz) +{ + static const INT primes[] = { 2, 3, 5, 0 }; + while (!X(factors_into)(minsz, primes) || minsz % 2) + ++minsz; + return minsz; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_rdft *p = (const problem_rdft *) p_; + P *pln; + INT n, npad; + INT is, os; + plan *cld1 = (plan *) 0; + plan *cld2 = (plan *) 0; + plan *cld_omega = (plan *) 0; + R *buf = (R *) 0; + problem *cldp; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr)) + return (plan *) 0; + + n = p->sz->dims[0].n; + is = p->sz->dims[0].is; + os = p->sz->dims[0].os; + + if (ego->pad) + npad = choose_transform_size(2 * (n - 1) - 1); + else + npad = n - 1; + + /* initial allocation for the purpose of planning */ + buf = (R *) MALLOC(sizeof(R) * npad, BUFFERS); + + cld1 = X(mkplan_f_d)(plnr, + X(mkproblem_rdft_1_d)(X(mktensor_1d)(npad, 1, 1), + X(mktensor_1d)(1, 0, 0), + buf, buf, + R2HC), + NO_SLOW, 0, 0); + if (!cld1) goto nada; + + cldp = + X(mkproblem_rdft_1_d)( + X(mktensor_1d)(npad, 1, 1), + X(mktensor_1d)(1, 0, 0), + buf, buf, +#if R2HC_ONLY_CONV + R2HC +#else + HC2R +#endif + ); + if (!(cld2 = X(mkplan_f_d)(plnr, cldp, NO_SLOW, 0, 0))) + goto nada; + + /* plan for omega */ + cld_omega = X(mkplan_f_d)(plnr, + X(mkproblem_rdft_1_d)( + X(mktensor_1d)(npad, 1, 1), + X(mktensor_1d)(1, 0, 0), + buf, buf, R2HC), + NO_SLOW, ESTIMATE, 0); + if (!cld_omega) goto nada; + + /* deallocate buffers; let awake() or apply() allocate them for real */ + X(ifree)(buf); + buf = 0; + + pln = MKPLAN_RDFT(P, &padt, apply); + pln->cld1 = cld1; + pln->cld2 = cld2; + pln->cld_omega = cld_omega; + pln->omega = 0; + pln->n = n; + pln->npad = npad; + pln->is = is; + pln->os = os; + + X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops); + pln->super.super.ops.other += (npad/2-1)*6 + npad + n + (n-1) * ego->pad; + pln->super.super.ops.add += (npad/2-1)*2 + 2 + (n-1) * ego->pad; + pln->super.super.ops.mul += (npad/2-1)*4 + 2 + ego->pad; +#if R2HC_ONLY_CONV + pln->super.super.ops.other += n-2 - ego->pad; + pln->super.super.ops.add += (npad/2-1)*2 + (n-2) - ego->pad; +#endif + + return &(pln->super.super); + + nada: + X(ifree0)(buf); + X(plan_destroy_internal)(cld_omega); + X(plan_destroy_internal)(cld2); + X(plan_destroy_internal)(cld1); + return 0; +} + +/* constructors */ + +static solver *mksolver(int pad) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->pad = pad; + return &(slv->super); +} + +void X(dht_rader_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver(0)); + REGISTER_SOLVER(p, mksolver(1)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/direct-r2c.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/direct-r2c.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* direct RDFT solver, using r2c codelets */ + +#include "rdft.h" + +typedef struct { + solver super; + const kr2c_desc *desc; + kr2c k; + int bufferedp; +} S; + +typedef struct { + plan_rdft super; + + stride rs, csr, csi; + stride brs, bcsr, bcsi; + INT n, vl, rs0, ivs, ovs, ioffset, bioffset; + kr2c k; + const S *slv; +} P; + +/************************************************************* + Nonbuffered code + *************************************************************/ +static void apply_r2hc(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + ASSERT_ALIGNED_DOUBLE; + ego->k(I, I + ego->rs0, O, O + ego->ioffset, + ego->rs, ego->csr, ego->csi, + ego->vl, ego->ivs, ego->ovs); +} + +static void apply_hc2r(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + ASSERT_ALIGNED_DOUBLE; + ego->k(O, O + ego->rs0, I, I + ego->ioffset, + ego->rs, ego->csr, ego->csi, + ego->vl, ego->ivs, ego->ovs); +} + +/************************************************************* + Buffered code + *************************************************************/ +/* should not be 2^k to avoid associativity conflicts */ +static INT compute_batchsize(INT radix) +{ + /* round up to multiple of 4 */ + radix += 3; + radix &= -4; + + return (radix + 2); +} + +static void dobatch_r2hc(const P *ego, R *I, R *O, R *buf, INT batchsz) +{ + X(cpy2d_ci)(I, buf, + ego->n, ego->rs0, WS(ego->bcsr /* hack */, 1), + batchsz, ego->ivs, 1, 1); + + if (IABS(WS(ego->csr, 1)) < IABS(ego->ovs)) { + /* transform directly to output */ + ego->k(buf, buf + WS(ego->bcsr /* hack */, 1), + O, O + ego->ioffset, + ego->brs, ego->csr, ego->csi, + batchsz, 1, ego->ovs); + } else { + /* transform to buffer and copy back */ + ego->k(buf, buf + WS(ego->bcsr /* hack */, 1), + buf, buf + ego->bioffset, + ego->brs, ego->bcsr, ego->bcsi, + batchsz, 1, 1); + X(cpy2d_co)(buf, O, + ego->n, WS(ego->bcsr, 1), WS(ego->csr, 1), + batchsz, 1, ego->ovs, 1); + } +} + +static void dobatch_hc2r(const P *ego, R *I, R *O, R *buf, INT batchsz) +{ + if (IABS(WS(ego->csr, 1)) < IABS(ego->ivs)) { + /* transform directly from input */ + ego->k(buf, buf + WS(ego->bcsr /* hack */, 1), + I, I + ego->ioffset, + ego->brs, ego->csr, ego->csi, + batchsz, ego->ivs, 1); + } else { + /* copy into buffer and transform in place */ + X(cpy2d_ci)(I, buf, + ego->n, WS(ego->csr, 1), WS(ego->bcsr, 1), + batchsz, ego->ivs, 1, 1); + ego->k(buf, buf + WS(ego->bcsr /* hack */, 1), + buf, buf + ego->bioffset, + ego->brs, ego->bcsr, ego->bcsi, + batchsz, 1, 1); + } + X(cpy2d_co)(buf, O, + ego->n, WS(ego->bcsr /* hack */, 1), ego->rs0, + batchsz, 1, ego->ovs, 1); +} + +static void iterate(const P *ego, R *I, R *O, + void (*dobatch)(const P *ego, R *I, R *O, + R *buf, INT batchsz)) +{ + R *buf; + INT vl = ego->vl; + INT n = ego->n; + INT i; + INT batchsz = compute_batchsize(n); + size_t bufsz = n * batchsz * sizeof(R); + + BUF_ALLOC(R *, buf, bufsz); + + for (i = 0; i < vl - batchsz; i += batchsz) { + dobatch(ego, I, O, buf, batchsz); + I += batchsz * ego->ivs; + O += batchsz * ego->ovs; + } + dobatch(ego, I, O, buf, vl - i); + + BUF_FREE(buf, bufsz); +} + +static void apply_buf_r2hc(const plan *ego_, R *I, R *O) +{ + iterate((const P *) ego_, I, O, dobatch_r2hc); +} + +static void apply_buf_hc2r(const plan *ego_, R *I, R *O) +{ + iterate((const P *) ego_, I, O, dobatch_hc2r); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(stride_destroy)(ego->rs); + X(stride_destroy)(ego->csr); + X(stride_destroy)(ego->csi); + X(stride_destroy)(ego->brs); + X(stride_destroy)(ego->bcsr); + X(stride_destroy)(ego->bcsi); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *s = ego->slv; + + if (ego->slv->bufferedp) + p->print(p, "(rdft-%s-directbuf/%D-r2c-%D%v \"%s\")", + X(rdft_kind_str)(s->desc->genus->kind), + /* hack */ WS(ego->bcsr, 1), ego->n, + ego->vl, s->desc->nam); + + else + p->print(p, "(rdft-%s-direct-r2c-%D%v \"%s\")", + X(rdft_kind_str)(s->desc->genus->kind), ego->n, + ego->vl, s->desc->nam); +} + +static INT ioffset(rdft_kind kind, INT sz, INT s) +{ + return(s * ((kind == R2HC || kind == HC2R) ? sz : (sz - 1))); +} + +static int applicable(const solver *ego_, const problem *p_) +{ + const S *ego = (const S *) ego_; + const kr2c_desc *desc = ego->desc; + const problem_rdft *p = (const problem_rdft *) p_; + INT vl, ivs, ovs; + + return ( + 1 + && p->sz->rnk == 1 + && p->vecsz->rnk <= 1 + && p->sz->dims[0].n == desc->n + && p->kind[0] == desc->genus->kind + + /* check strides etc */ + && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs) + + && (0 + /* can operate out-of-place */ + || p->I != p->O + + /* computing one transform */ + || vl == 1 + + /* can operate in-place as long as strides are the same */ + || X(tensor_inplace_strides2)(p->sz, p->vecsz) + ) + ); +} + +static int applicable_buf(const solver *ego_, const problem *p_) +{ + const S *ego = (const S *) ego_; + const kr2c_desc *desc = ego->desc; + const problem_rdft *p = (const problem_rdft *) p_; + INT vl, ivs, ovs, batchsz; + + return ( + 1 + && p->sz->rnk == 1 + && p->vecsz->rnk <= 1 + && p->sz->dims[0].n == desc->n + && p->kind[0] == desc->genus->kind + + /* check strides etc */ + && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs) + + && (batchsz = compute_batchsize(desc->n), 1) + + && (0 + /* can operate out-of-place */ + || p->I != p->O + + /* can operate in-place as long as strides are the same */ + || X(tensor_inplace_strides2)(p->sz, p->vecsz) + + /* can do it if the problem fits in the buffer, no matter + what the strides are */ + || vl <= batchsz + ) + ); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + P *pln; + const problem_rdft *p; + iodim *d; + INT rs, cs, b, n; + + static const plan_adt padt = { + X(rdft_solve), X(null_awake), print, destroy + }; + + UNUSED(plnr); + + if (ego->bufferedp) { + if (!applicable_buf(ego_, p_)) + return (plan *)0; + } else { + if (!applicable(ego_, p_)) + return (plan *)0; + } + + p = (const problem_rdft *) p_; + + if (R2HC_KINDP(p->kind[0])) { + rs = p->sz->dims[0].is; cs = p->sz->dims[0].os; + pln = MKPLAN_RDFT(P, &padt, + ego->bufferedp ? apply_buf_r2hc : apply_r2hc); + } else { + rs = p->sz->dims[0].os; cs = p->sz->dims[0].is; + pln = MKPLAN_RDFT(P, &padt, + ego->bufferedp ? apply_buf_hc2r : apply_hc2r); + } + + d = p->sz->dims; + n = d[0].n; + + pln->k = ego->k; + pln->n = n; + + pln->rs0 = rs; + pln->rs = X(mkstride)(n, 2 * rs); + pln->csr = X(mkstride)(n, cs); + pln->csi = X(mkstride)(n, -cs); + pln->ioffset = ioffset(p->kind[0], n, cs); + + b = compute_batchsize(n); + pln->brs = X(mkstride)(n, 2 * b); + pln->bcsr = X(mkstride)(n, b); + pln->bcsi = X(mkstride)(n, -b); + pln->bioffset = ioffset(p->kind[0], n, b); + + X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); + + pln->slv = ego; + X(ops_zero)(&pln->super.super.ops); + + X(ops_madd2)(pln->vl / ego->desc->genus->vl, + &ego->desc->ops, + &pln->super.super.ops); + + if (ego->bufferedp) + pln->super.super.ops.other += 2 * n * pln->vl; + + pln->super.super.could_prune_now_p = !ego->bufferedp; + + return &(pln->super.super); +} + +/* constructor */ +static solver *mksolver(kr2c k, const kr2c_desc *desc, int bufferedp) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->k = k; + slv->desc = desc; + slv->bufferedp = bufferedp; + return &(slv->super); +} + +solver *X(mksolver_rdft_r2c_direct)(kr2c k, const kr2c_desc *desc) +{ + return mksolver(k, desc, 0); +} + +solver *X(mksolver_rdft_r2c_directbuf)(kr2c k, const kr2c_desc *desc) +{ + return mksolver(k, desc, 1); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/direct-r2r.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/direct-r2r.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* direct RDFT solver, using r2r codelets */ + +#include "rdft.h" + +typedef struct { + solver super; + const kr2r_desc *desc; + kr2r k; +} S; + +typedef struct { + plan_rdft super; + + INT vl, ivs, ovs; + stride is, os; + kr2r k; + const S *slv; +} P; + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + ASSERT_ALIGNED_DOUBLE; + ego->k(I, O, ego->is, ego->os, ego->vl, ego->ivs, ego->ovs); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(stride_destroy)(ego->is); + X(stride_destroy)(ego->os); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *s = ego->slv; + + p->print(p, "(rdft-%s-direct-r2r-%D%v \"%s\")", + X(rdft_kind_str)(s->desc->kind), s->desc->n, + ego->vl, s->desc->nam); +} + +static int applicable(const solver *ego_, const problem *p_) +{ + const S *ego = (const S *) ego_; + const problem_rdft *p = (const problem_rdft *) p_; + INT vl; + INT ivs, ovs; + + return ( + 1 + && p->sz->rnk == 1 + && p->vecsz->rnk <= 1 + && p->sz->dims[0].n == ego->desc->n + && p->kind[0] == ego->desc->kind + + /* check strides etc */ + && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs) + + && (0 + /* can operate out-of-place */ + || p->I != p->O + + /* computing one transform */ + || vl == 1 + + /* can operate in-place as long as strides are the same */ + || X(tensor_inplace_strides2)(p->sz, p->vecsz) + ) + ); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + P *pln; + const problem_rdft *p; + iodim *d; + + static const plan_adt padt = { + X(rdft_solve), X(null_awake), print, destroy + }; + + UNUSED(plnr); + + if (!applicable(ego_, p_)) + return (plan *)0; + + p = (const problem_rdft *) p_; + + + pln = MKPLAN_RDFT(P, &padt, apply); + + d = p->sz->dims; + + pln->k = ego->k; + + pln->is = X(mkstride)(d->n, d->is); + pln->os = X(mkstride)(d->n, d->os); + + X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); + + pln->slv = ego; + X(ops_zero)(&pln->super.super.ops); + X(ops_madd2)(pln->vl / ego->desc->genus->vl, + &ego->desc->ops, + &pln->super.super.ops); + + pln->super.super.could_prune_now_p = 1; + + return &(pln->super.super); +} + +/* constructor */ +solver *X(mksolver_rdft_r2r_direct)(kr2r k, const kr2r_desc *desc) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->k = k; + slv->desc = desc; + return &(slv->super); +} + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/direct2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/direct2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* direct RDFT2 R2HC/HC2R solver, if we have a codelet */ + +#include "rdft.h" + +typedef struct { + solver super; + const kr2c_desc *desc; + kr2c k; +} S; + +typedef struct { + plan_rdft2 super; + + stride rs, cs; + INT vl; + INT ivs, ovs; + kr2c k; + const S *slv; + INT ilast; +} P; + +static void apply(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + ASSERT_ALIGNED_DOUBLE; + ego->k(r0, r1, cr, ci, + ego->rs, ego->cs, ego->cs, + ego->vl, ego->ivs, ego->ovs); +} + +static void apply_r2hc(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + INT i, vl = ego->vl, ovs = ego->ovs; + ASSERT_ALIGNED_DOUBLE; + ego->k(r0, r1, cr, ci, + ego->rs, ego->cs, ego->cs, + vl, ego->ivs, ovs); + for (i = 0; i < vl; ++i, ci += ovs) + ci[0] = ci[ego->ilast] = 0; +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(stride_destroy)(ego->rs); + X(stride_destroy)(ego->cs); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *s = ego->slv; + + p->print(p, "(rdft2-%s-direct-%D%v \"%s\")", + X(rdft_kind_str)(s->desc->genus->kind), s->desc->n, + ego->vl, s->desc->nam); +} + +static int applicable(const solver *ego_, const problem *p_) +{ + const S *ego = (const S *) ego_; + const kr2c_desc *desc = ego->desc; + const problem_rdft2 *p = (const problem_rdft2 *) p_; + INT vl; + INT ivs, ovs; + + return ( + 1 + && p->sz->rnk == 1 + && p->vecsz->rnk <= 1 + && p->sz->dims[0].n == desc->n + && p->kind == desc->genus->kind + + /* check strides etc */ + && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs) + + && (0 + /* can operate out-of-place */ + || p->r0 != p->cr + + /* + * can compute one transform in-place, no matter + * what the strides are. + */ + || p->vecsz->rnk == 0 + + /* can operate in-place as long as strides are the same */ + || X(rdft2_inplace_strides)(p, RNK_MINFTY) + ) + ); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + P *pln; + const problem_rdft2 *p; + iodim *d; + int r2hc_kindp; + + static const plan_adt padt = { + X(rdft2_solve), X(null_awake), print, destroy + }; + + UNUSED(plnr); + + if (!applicable(ego_, p_)) + return (plan *)0; + + p = (const problem_rdft2 *) p_; + + r2hc_kindp = R2HC_KINDP(p->kind); + A(r2hc_kindp || HC2R_KINDP(p->kind)); + + pln = MKPLAN_RDFT2(P, &padt, p->kind == R2HC ? apply_r2hc : apply); + + d = p->sz->dims; + + pln->k = ego->k; + + pln->rs = X(mkstride)(d->n, r2hc_kindp ? d->is : d->os); + pln->cs = X(mkstride)(d->n, r2hc_kindp ? d->os : d->is); + + X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); + + /* Nyquist freq., if any */ + pln->ilast = (d->n % 2) ? 0 : (d->n/2) * d->os; + + pln->slv = ego; + X(ops_zero)(&pln->super.super.ops); + X(ops_madd2)(pln->vl / ego->desc->genus->vl, + &ego->desc->ops, + &pln->super.super.ops); + if (p->kind == R2HC) + pln->super.super.ops.other += 2 * pln->vl; /* + 2 stores */ + + pln->super.super.could_prune_now_p = 1; + return &(pln->super.super); +} + +/* constructor */ +solver *X(mksolver_rdft2_direct)(kr2c k, const kr2c_desc *desc) +{ + static const solver_adt sadt = { PROBLEM_RDFT2, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->k = k; + slv->desc = desc; + return &(slv->super); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/generic.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/generic.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "rdft.h" + +typedef struct { + solver super; + rdft_kind kind; +} S; + +typedef struct { + plan_rdft super; + twid *td; + INT n, is, os; + rdft_kind kind; +} P; + +/***************************************************************************/ + +static void cdot_r2hc(INT n, const E *x, const R *w, R *or0, R *oi1) +{ + INT i; + + E rr = x[0], ri = 0; + x += 1; + for (i = 1; i + i < n; ++i) { + rr += x[0] * w[0]; + ri += x[1] * w[1]; + x += 2; w += 2; + } + *or0 = rr; + *oi1 = ri; +} + +static void hartley_r2hc(INT n, const R *xr, INT xs, E *o, R *pr) +{ + INT i; + E sr; + o[0] = sr = xr[0]; o += 1; + for (i = 1; i + i < n; ++i) { + R a, b; + a = xr[i * xs]; + b = xr[(n - i) * xs]; + sr += (o[0] = a + b); +#if FFT_SIGN == -1 + o[1] = b - a; +#else + o[1] = a - b; +#endif + o += 2; + } + *pr = sr; +} + +static void apply_r2hc(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT i; + INT n = ego->n, is = ego->is, os = ego->os; + const R *W = ego->td->W; + E *buf; + size_t bufsz = n * sizeof(E); + + BUF_ALLOC(E *, buf, bufsz); + hartley_r2hc(n, I, is, buf, O); + + for (i = 1; i + i < n; ++i) { + cdot_r2hc(n, buf, W, O + i * os, O + (n - i) * os); + W += n - 1; + } + + BUF_FREE(buf, bufsz); +} + + +static void cdot_hc2r(INT n, const E *x, const R *w, R *or0, R *or1) +{ + INT i; + + E rr = x[0], ii = 0; + x += 1; + for (i = 1; i + i < n; ++i) { + rr += x[0] * w[0]; + ii += x[1] * w[1]; + x += 2; w += 2; + } +#if FFT_SIGN == -1 + *or0 = rr - ii; + *or1 = rr + ii; +#else + *or0 = rr + ii; + *or1 = rr - ii; +#endif +} + +static void hartley_hc2r(INT n, const R *x, INT xs, E *o, R *pr) +{ + INT i; + E sr; + + o[0] = sr = x[0]; o += 1; + for (i = 1; i + i < n; ++i) { + sr += (o[0] = x[i * xs] + x[i * xs]); + o[1] = x[(n - i) * xs] + x[(n - i) * xs]; + o += 2; + } + *pr = sr; +} + +static void apply_hc2r(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT i; + INT n = ego->n, is = ego->is, os = ego->os; + const R *W = ego->td->W; + E *buf; + size_t bufsz = n * sizeof(E); + + BUF_ALLOC(E *, buf, bufsz); + hartley_hc2r(n, I, is, buf, O); + + for (i = 1; i + i < n; ++i) { + cdot_hc2r(n, buf, W, O + i * os, O + (n - i) * os); + W += n - 1; + } + + BUF_FREE(buf, bufsz); +} + + +/***************************************************************************/ + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + static const tw_instr half_tw[] = { + { TW_HALF, 1, 0 }, + { TW_NEXT, 1, 0 } + }; + + X(twiddle_awake)(wakefulness, &ego->td, half_tw, ego->n, ego->n, + (ego->n - 1) / 2); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + + p->print(p, "(rdft-generic-%s-%D)", + ego->kind == R2HC ? "r2hc" : "hc2r", + ego->n); +} + +static int applicable(const S *ego, const problem *p_, + const planner *plnr) +{ + const problem_rdft *p = (const problem_rdft *) p_; + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk == 0 + && (p->sz->dims[0].n % 2) == 1 + && CIMPLIES(NO_LARGE_GENERICP(plnr), p->sz->dims[0].n < GENERIC_MIN_BAD) + && CIMPLIES(NO_SLOWP(plnr), p->sz->dims[0].n > GENERIC_MAX_SLOW) + && X(is_prime)(p->sz->dims[0].n) + && p->kind[0] == ego->kind + ); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *)ego_; + const problem_rdft *p; + P *pln; + INT n; + + static const plan_adt padt = { + X(rdft_solve), awake, print, X(plan_null_destroy) + }; + + if (!applicable(ego, p_, plnr)) + return (plan *)0; + + p = (const problem_rdft *) p_; + pln = MKPLAN_RDFT(P, &padt, + R2HC_KINDP(p->kind[0]) ? apply_r2hc : apply_hc2r); + + pln->n = n = p->sz->dims[0].n; + pln->is = p->sz->dims[0].is; + pln->os = p->sz->dims[0].os; + pln->td = 0; + pln->kind = ego->kind; + + pln->super.super.ops.add = (n-1) * 2.5; + pln->super.super.ops.mul = 0; + pln->super.super.ops.fma = 0.5 * (n-1) * (n-1) ; +#if 0 /* these are nice pipelined sequential loads and should cost nothing */ + pln->super.super.ops.other = (n-1)*(2 + 1 + (n-1)); /* approximate */ +#endif + + return &(pln->super.super); +} + +static solver *mksolver(rdft_kind kind) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->kind = kind; + return &(slv->super); +} + +void X(rdft_generic_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver(R2HC)); + REGISTER_SOLVER(p, mksolver(HC2R)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/hc2hc-direct.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/hc2hc-direct.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,279 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "hc2hc.h" + +typedef struct { + hc2hc_solver super; + const hc2hc_desc *desc; + khc2hc k; + int bufferedp; +} S; + +typedef struct { + plan_hc2hc super; + khc2hc k; + plan *cld0, *cldm; /* children for 0th and middle butterflies */ + INT r, m, v; + INT ms, vs, mb, me; + stride rs, brs; + twid *td; + const S *slv; +} P; + +/************************************************************* + Nonbuffered code +*************************************************************/ +static void apply(const plan *ego_, R *IO) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld0 = (plan_rdft *) ego->cld0; + plan_rdft *cldm = (plan_rdft *) ego->cldm; + INT i, m = ego->m, v = ego->v; + INT mb = ego->mb, me = ego->me; + INT ms = ego->ms, vs = ego->vs; + + for (i = 0; i < v; ++i, IO += vs) { + cld0->apply((plan *) cld0, IO, IO); + ego->k(IO + ms * mb, IO + (m - mb) * ms, + ego->td->W, ego->rs, mb, me, ms); + cldm->apply((plan *) cldm, IO + (m/2) * ms, IO + (m/2) * ms); + } +} + +/************************************************************* + Buffered code +*************************************************************/ + +/* should not be 2^k to avoid associativity conflicts */ +static INT compute_batchsize(INT radix) +{ + /* round up to multiple of 4 */ + radix += 3; + radix &= -4; + + return (radix + 2); +} + +static void dobatch(const P *ego, R *IOp, R *IOm, + INT mb, INT me, R *bufp) +{ + INT b = WS(ego->brs, 1); + INT rs = WS(ego->rs, 1); + INT r = ego->r; + INT ms = ego->ms; + R *bufm = bufp + b - 1; + + X(cpy2d_ci)(IOp + mb * ms, bufp, r, rs, b, me - mb, ms, 1, 1); + X(cpy2d_ci)(IOm - mb * ms, bufm, r, rs, b, me - mb, -ms, -1, 1); + + ego->k(bufp, bufm, ego->td->W, ego->brs, mb, me, 1); + + X(cpy2d_co)(bufp, IOp + mb * ms, r, b, rs, me - mb, 1, ms, 1); + X(cpy2d_co)(bufm, IOm - mb * ms, r, b, rs, me - mb, -1, -ms, 1); +} + +static void apply_buf(const plan *ego_, R *IO) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld0 = (plan_rdft *) ego->cld0; + plan_rdft *cldm = (plan_rdft *) ego->cldm; + INT i, j, m = ego->m, v = ego->v, r = ego->r; + INT mb = ego->mb, me = ego->me, ms = ego->ms; + INT batchsz = compute_batchsize(r); + R *buf; + size_t bufsz = r * batchsz * 2 * sizeof(R); + + BUF_ALLOC(R *, buf, bufsz); + + for (i = 0; i < v; ++i, IO += ego->vs) { + R *IOp = IO; + R *IOm = IO + m * ms; + + cld0->apply((plan *) cld0, IO, IO); + + for (j = mb; j + batchsz < me; j += batchsz) + dobatch(ego, IOp, IOm, j, j + batchsz, buf); + + dobatch(ego, IOp, IOm, j, me, buf); + + cldm->apply((plan *) cldm, IO + ms * (m/2), IO + ms * (m/2)); + } + + BUF_FREE(buf, bufsz); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + + X(plan_awake)(ego->cld0, wakefulness); + X(plan_awake)(ego->cldm, wakefulness); + X(twiddle_awake)(wakefulness, &ego->td, ego->slv->desc->tw, + ego->r * ego->m, ego->r, (ego->m - 1) / 2); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld0); + X(plan_destroy_internal)(ego->cldm); + X(stride_destroy)(ego->rs); + X(stride_destroy)(ego->brs); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *slv = ego->slv; + const hc2hc_desc *e = slv->desc; + INT batchsz = compute_batchsize(ego->r); + + if (slv->bufferedp) + p->print(p, "(hc2hc-directbuf/%D-%D/%D%v \"%s\"%(%p%)%(%p%))", + batchsz, ego->r, X(twiddle_length)(ego->r, e->tw), + ego->v, e->nam, ego->cld0, ego->cldm); + else + p->print(p, "(hc2hc-direct-%D/%D%v \"%s\"%(%p%)%(%p%))", + ego->r, X(twiddle_length)(ego->r, e->tw), ego->v, e->nam, + ego->cld0, ego->cldm); +} + +static int applicable0(const S *ego, rdft_kind kind, INT r) +{ + const hc2hc_desc *e = ego->desc; + + return (1 + && r == e->radix + && kind == e->genus->kind + ); +} + +static int applicable(const S *ego, rdft_kind kind, INT r, INT m, INT v, + const planner *plnr) +{ + if (!applicable0(ego, kind, r)) + return 0; + + if (NO_UGLYP(plnr) && X(ct_uglyp)((ego->bufferedp? (INT)512 : (INT)16), + v, m * r, r)) + return 0; + + return 1; +} + +#define CLDMP(m, mstart, mcount) (2 * ((mstart) + (mcount)) == (m) + 2) +#define CLD0P(mstart) ((mstart) == 0) + +static plan *mkcldw(const hc2hc_solver *ego_, + rdft_kind kind, INT r, INT m, INT ms, INT v, INT vs, + INT mstart, INT mcount, + R *IO, planner *plnr) +{ + const S *ego = (const S *) ego_; + P *pln; + const hc2hc_desc *e = ego->desc; + plan *cld0 = 0, *cldm = 0; + INT imid = (m / 2) * ms; + INT rs = m * ms; + + static const plan_adt padt = { + 0, awake, print, destroy + }; + + if (!applicable(ego, kind, r, m, v, plnr)) + return (plan *)0; + + cld0 = X(mkplan_d)( + plnr, + X(mkproblem_rdft_1_d)((CLD0P(mstart) ? + X(mktensor_1d)(r, rs, rs) : X(mktensor_0d)()), + X(mktensor_0d)(), + TAINT(IO, vs), TAINT(IO, vs), + kind)); + if (!cld0) goto nada; + + cldm = X(mkplan_d)( + plnr, + X(mkproblem_rdft_1_d)((CLDMP(m, mstart, mcount) ? + X(mktensor_1d)(r, rs, rs) : X(mktensor_0d)()), + X(mktensor_0d)(), + TAINT(IO + imid, vs), TAINT(IO + imid, vs), + kind == R2HC ? R2HCII : HC2RIII)); + if (!cldm) goto nada; + + pln = MKPLAN_HC2HC(P, &padt, ego->bufferedp ? apply_buf : apply); + + pln->k = ego->k; + pln->td = 0; + pln->r = r; pln->rs = X(mkstride)(r, rs); + pln->m = m; pln->ms = ms; + pln->v = v; pln->vs = vs; + pln->slv = ego; + pln->brs = X(mkstride)(r, 2 * compute_batchsize(r)); + pln->cld0 = cld0; + pln->cldm = cldm; + pln->mb = mstart + CLD0P(mstart); + pln->me = mstart + mcount - CLDMP(m, mstart, mcount); + + X(ops_zero)(&pln->super.super.ops); + X(ops_madd2)(v * ((pln->me - pln->mb) / e->genus->vl), + &e->ops, &pln->super.super.ops); + X(ops_madd2)(v, &cld0->ops, &pln->super.super.ops); + X(ops_madd2)(v, &cldm->ops, &pln->super.super.ops); + + if (ego->bufferedp) + pln->super.super.ops.other += 4 * r * (pln->me - pln->mb) * v; + + pln->super.super.could_prune_now_p = + (!ego->bufferedp && r >= 5 && r < 64 && m >= r); + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cld0); + X(plan_destroy_internal)(cldm); + return 0; +} + +static void regone(planner *plnr, khc2hc codelet, const hc2hc_desc *desc, + int bufferedp) +{ + S *slv = (S *)X(mksolver_hc2hc)(sizeof(S), desc->radix, mkcldw); + slv->k = codelet; + slv->desc = desc; + slv->bufferedp = bufferedp; + REGISTER_SOLVER(plnr, &(slv->super.super)); + if (X(mksolver_hc2hc_hook)) { + slv = (S *)X(mksolver_hc2hc_hook)(sizeof(S), desc->radix, mkcldw); + slv->k = codelet; + slv->desc = desc; + slv->bufferedp = bufferedp; + REGISTER_SOLVER(plnr, &(slv->super.super)); + } +} + +void X(regsolver_hc2hc_direct)(planner *plnr, khc2hc codelet, + const hc2hc_desc *desc) +{ + regone(plnr, codelet, desc, /* bufferedp */0); + regone(plnr, codelet, desc, /* bufferedp */1); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/hc2hc-generic.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/hc2hc-generic.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* express a hc2hc problem in terms of rdft + multiplication by + twiddle factors */ + +#include "hc2hc.h" + +typedef hc2hc_solver S; + +typedef struct { + plan_hc2hc super; + + INT r, m, s, vl, vs, mstart1, mcount1; + plan *cld0; + plan *cld; + twid *td; +} P; + + +/**************************************************************/ +static void mktwiddle(P *ego, enum wakefulness wakefulness) +{ + static const tw_instr tw[] = { { TW_HALF, 0, 0 }, { TW_NEXT, 1, 0 } }; + + /* note that R and M are swapped, to allow for sequential + access both to data and twiddles */ + X(twiddle_awake)(wakefulness, &ego->td, tw, + ego->r * ego->m, ego->m, ego->r); +} + +static void bytwiddle(const P *ego, R *IO, R sign) +{ + INT i, j, k; + INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs; + INT ms = m * s; + INT mstart1 = ego->mstart1, mcount1 = ego->mcount1; + INT wrem = 2 * ((m-1)/2 - mcount1); + + for (i = 0; i < vl; ++i, IO += vs) { + const R *W = ego->td->W; + + A(m % 2 == 1); + for (k = 1, W += (m - 1) + 2*(mstart1-1); k < r; ++k) { + /* pr := IO + (j + mstart1) * s + k * ms */ + R *pr = IO + mstart1 * s + k * ms; + + /* pi := IO + (m - j - mstart1) * s + k * ms */ + R *pi = IO - mstart1 * s + (k + 1) * ms; + + for (j = 0; j < mcount1; ++j, pr += s, pi -= s) { + E xr = *pr; + E xi = *pi; + E wr = W[0]; + E wi = sign * W[1]; + *pr = xr * wr - xi * wi; + *pi = xi * wr + xr * wi; + W += 2; + } + W += wrem; + } + } +} + +static void swapri(R *IO, INT r, INT m, INT s, INT jstart, INT jend) +{ + INT k; + INT ms = m * s; + INT js = jstart * s; + for (k = 0; k + k < r; ++k) { + /* pr := IO + (m - j) * s + k * ms */ + R *pr = IO + (k + 1) * ms - js; + /* pi := IO + (m - j) * s + (r - 1 - k) * ms */ + R *pi = IO + (r - k) * ms - js; + INT j; + for (j = jstart; j < jend; j += 1, pr -= s, pi -= s) { + R t = *pr; + *pr = *pi; + *pi = t; + } + } +} + +static void reorder_dit(const P *ego, R *IO) +{ + INT i, k; + INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs; + INT ms = m * s; + INT mstart1 = ego->mstart1, mend1 = mstart1 + ego->mcount1; + + for (i = 0; i < vl; ++i, IO += vs) { + for (k = 1; k + k < r; ++k) { + R *p0 = IO + k * ms; + R *p1 = IO + (r - k) * ms; + INT j; + + for (j = mstart1; j < mend1; ++j) { + E rp, ip, im, rm; + rp = p0[j * s]; + im = p1[ms - j * s]; + rm = p1[j * s]; + ip = p0[ms - j * s]; + p0[j * s] = rp - im; + p1[ms - j * s] = rp + im; + p1[j * s] = rm - ip; + p0[ms - j * s] = ip + rm; + } + } + + swapri(IO, r, m, s, mstart1, mend1); + } +} + +static void reorder_dif(const P *ego, R *IO) +{ + INT i, k; + INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs; + INT ms = m * s; + INT mstart1 = ego->mstart1, mend1 = mstart1 + ego->mcount1; + + for (i = 0; i < vl; ++i, IO += vs) { + swapri(IO, r, m, s, mstart1, mend1); + + for (k = 1; k + k < r; ++k) { + R *p0 = IO + k * ms; + R *p1 = IO + (r - k) * ms; + const R half = K(0.5); + INT j; + + for (j = mstart1; j < mend1; ++j) { + E rp, ip, im, rm; + rp = half * p0[j * s]; + im = half * p1[ms - j * s]; + rm = half * p1[j * s]; + ip = half * p0[ms - j * s]; + p0[j * s] = rp + im; + p1[ms - j * s] = im - rp; + p1[j * s] = rm + ip; + p0[ms - j * s] = ip - rm; + } + } + } +} + +static int applicable(rdft_kind kind, INT r, INT m, const planner *plnr) +{ + return (1 + && (kind == R2HC || kind == HC2R) + && (m % 2) + && (r % 2) + && !NO_SLOWP(plnr) + ); +} + +/**************************************************************/ + +static void apply_dit(const plan *ego_, R *IO) +{ + const P *ego = (const P *) ego_; + INT start; + plan_rdft *cld, *cld0; + + bytwiddle(ego, IO, K(-1.0)); + + cld0 = (plan_rdft *) ego->cld0; + cld0->apply(ego->cld0, IO, IO); + + start = ego->mstart1 * ego->s; + cld = (plan_rdft *) ego->cld; + cld->apply(ego->cld, IO + start, IO + start); + + reorder_dit(ego, IO); +} + +static void apply_dif(const plan *ego_, R *IO) +{ + const P *ego = (const P *) ego_; + INT start; + plan_rdft *cld, *cld0; + + reorder_dif(ego, IO); + + cld0 = (plan_rdft *) ego->cld0; + cld0->apply(ego->cld0, IO, IO); + + start = ego->mstart1 * ego->s; + cld = (plan_rdft *) ego->cld; + cld->apply(ego->cld, IO + start, IO + start); + + bytwiddle(ego, IO, K(1.0)); +} + + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld0, wakefulness); + X(plan_awake)(ego->cld, wakefulness); + mktwiddle(ego, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); + X(plan_destroy_internal)(ego->cld0); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(hc2hc-generic-%s-%D-%D%v%(%p%)%(%p%))", + ego->super.apply == apply_dit ? "dit" : "dif", + ego->r, ego->m, ego->vl, ego->cld0, ego->cld); +} + +static plan *mkcldw(const hc2hc_solver *ego_, + rdft_kind kind, INT r, INT m, INT s, INT vl, INT vs, + INT mstart, INT mcount, + R *IO, planner *plnr) +{ + P *pln; + plan *cld0 = 0, *cld = 0; + INT mstart1, mcount1, mstride; + + static const plan_adt padt = { + 0, awake, print, destroy + }; + + UNUSED(ego_); + + A(mstart >= 0 && mcount > 0 && mstart + mcount <= (m+2)/2); + + if (!applicable(kind, r, m, plnr)) + return (plan *)0; + + A(m % 2); + mstart1 = mstart + (mstart == 0); + mcount1 = mcount - (mstart == 0); + mstride = m - (mstart + mcount - 1) - mstart1; + + /* 0th (DC) transform (vl of these), if mstart == 0 */ + cld0 = X(mkplan_d)(plnr, + X(mkproblem_rdft_1_d)( + mstart == 0 ? X(mktensor_1d)(r, m * s, m * s) + : X(mktensor_0d)(), + X(mktensor_1d)(vl, vs, vs), + IO, IO, kind) + ); + if (!cld0) goto nada; + + /* twiddle transforms: there are 2 x mcount1 x vl of these + (where 2 corresponds to the real and imaginary parts) ... + the 2 x mcount1 loops are combined if mstart=0 and mcount=(m+2)/2. */ + cld = X(mkplan_d)(plnr, + X(mkproblem_rdft_1_d)( + X(mktensor_1d)(r, m * s, m * s), + X(mktensor_3d)(2, mstride * s, mstride * s, + mcount1, s, s, + vl, vs, vs), + IO + s * mstart1, IO + s * mstart1, kind) + ); + if (!cld) goto nada; + + pln = MKPLAN_HC2HC(P, &padt, (kind == R2HC) ? apply_dit : apply_dif); + pln->cld = cld; + pln->cld0 = cld0; + pln->r = r; + pln->m = m; + pln->s = s; + pln->vl = vl; + pln->vs = vs; + pln->td = 0; + pln->mstart1 = mstart1; + pln->mcount1 = mcount1; + + { + double n0 = 0.5 * (r - 1) * (2 * mcount1) * vl; + pln->super.super.ops = cld->ops; + pln->super.super.ops.mul += (kind == R2HC ? 5.0 : 7.0) * n0; + pln->super.super.ops.add += 4.0 * n0; + pln->super.super.ops.other += 11.0 * n0; + } + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cld); + X(plan_destroy_internal)(cld0); + return (plan *) 0; +} + +static void regsolver(planner *plnr, INT r) +{ + S *slv = (S *)X(mksolver_hc2hc)(sizeof(S), r, mkcldw); + REGISTER_SOLVER(plnr, &(slv->super)); + if (X(mksolver_hc2hc_hook)) { + slv = (S *)X(mksolver_hc2hc_hook)(sizeof(S), r, mkcldw); + REGISTER_SOLVER(plnr, &(slv->super)); + } +} + +void X(hc2hc_generic_register)(planner *p) +{ + regsolver(p, 0); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/hc2hc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/hc2hc.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "hc2hc.h" + +hc2hc_solver *(*X(mksolver_hc2hc_hook))(size_t, INT, hc2hc_mkinferior) = 0; + +typedef struct { + plan_rdft super; + plan *cld; + plan *cldw; + INT r; +} P; + +static void apply_dit(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld; + plan_hc2hc *cldw; + + cld = (plan_rdft *) ego->cld; + cld->apply(ego->cld, I, O); + + cldw = (plan_hc2hc *) ego->cldw; + cldw->apply(ego->cldw, O); +} + +static void apply_dif(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld; + plan_hc2hc *cldw; + + cldw = (plan_hc2hc *) ego->cldw; + cldw->apply(ego->cldw, I); + + cld = (plan_rdft *) ego->cld; + cld->apply(ego->cld, I, O); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld, wakefulness); + X(plan_awake)(ego->cldw, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cldw); + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(rdft-ct-%s/%D%(%p%)%(%p%))", + ego->super.apply == apply_dit ? "dit" : "dif", + ego->r, ego->cldw, ego->cld); +} + +static int applicable0(const hc2hc_solver *ego, const problem *p_, planner *plnr) +{ + const problem_rdft *p = (const problem_rdft *) p_; + INT r; + + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk <= 1 + + && (/* either the problem is R2HC, which is solved by DIT */ + (p->kind[0] == R2HC) + || + /* or the problem is HC2R, in which case it is solved + by DIF, which destroys the input */ + (p->kind[0] == HC2R && + (p->I == p->O || !NO_DESTROY_INPUTP(plnr)))) + + && ((r = X(choose_radix)(ego->r, p->sz->dims[0].n)) > 0) + && p->sz->dims[0].n > r); +} + +int X(hc2hc_applicable)(const hc2hc_solver *ego, const problem *p_, planner *plnr) +{ + const problem_rdft *p; + + if (!applicable0(ego, p_, plnr)) + return 0; + + p = (const problem_rdft *) p_; + + return (0 + || p->vecsz->rnk == 0 + || !NO_VRECURSEP(plnr) + ); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const hc2hc_solver *ego = (const hc2hc_solver *) ego_; + const problem_rdft *p; + P *pln = 0; + plan *cld = 0, *cldw = 0; + INT n, r, m, v, ivs, ovs; + iodim *d; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (NO_NONTHREADEDP(plnr) || !X(hc2hc_applicable)(ego, p_, plnr)) + return (plan *) 0; + + p = (const problem_rdft *) p_; + d = p->sz->dims; + n = d[0].n; + r = X(choose_radix)(ego->r, n); + m = n / r; + + X(tensor_tornk1)(p->vecsz, &v, &ivs, &ovs); + + switch (p->kind[0]) { + case R2HC: + cldw = ego->mkcldw(ego, + R2HC, r, m, d[0].os, v, ovs, 0, (m+2)/2, + p->O, plnr); + if (!cldw) goto nada; + + cld = X(mkplan_d)(plnr, + X(mkproblem_rdft_d)( + X(mktensor_1d)(m, r * d[0].is, d[0].os), + X(mktensor_2d)(r, d[0].is, m * d[0].os, + v, ivs, ovs), + p->I, p->O, p->kind) + ); + if (!cld) goto nada; + + pln = MKPLAN_RDFT(P, &padt, apply_dit); + break; + + case HC2R: + cldw = ego->mkcldw(ego, + HC2R, r, m, d[0].is, v, ivs, 0, (m+2)/2, + p->I, plnr); + if (!cldw) goto nada; + + cld = X(mkplan_d)(plnr, + X(mkproblem_rdft_d)( + X(mktensor_1d)(m, d[0].is, r * d[0].os), + X(mktensor_2d)(r, m * d[0].is, d[0].os, + v, ivs, ovs), + p->I, p->O, p->kind) + ); + if (!cld) goto nada; + + pln = MKPLAN_RDFT(P, &padt, apply_dif); + break; + + default: + A(0); + } + + pln->cld = cld; + pln->cldw = cldw; + pln->r = r; + X(ops_add)(&cld->ops, &cldw->ops, &pln->super.super.ops); + + /* inherit could_prune_now_p attribute from cldw */ + pln->super.super.could_prune_now_p = cldw->could_prune_now_p; + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cldw); + X(plan_destroy_internal)(cld); + return (plan *) 0; +} + +hc2hc_solver *X(mksolver_hc2hc)(size_t size, INT r, hc2hc_mkinferior mkcldw) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + hc2hc_solver *slv = (hc2hc_solver *)X(mksolver)(size, &sadt); + slv->r = r; + slv->mkcldw = mkcldw; + return slv; +} + +plan *X(mkplan_hc2hc)(size_t size, const plan_adt *adt, hc2hcapply apply) +{ + plan_hc2hc *ego; + + ego = (plan_hc2hc *) X(mkplan)(size, adt); + ego->apply = apply; + + return &(ego->super); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/hc2hc.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/hc2hc.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "rdft.h" + +typedef void (*hc2hcapply) (const plan *ego, R *IO); +typedef struct hc2hc_solver_s hc2hc_solver; +typedef plan *(*hc2hc_mkinferior)(const hc2hc_solver *ego, + rdft_kind kind, INT r, INT m, INT s, + INT vl, INT vs, INT mstart, INT mcount, + R *IO, planner *plnr); + +typedef struct { + plan super; + hc2hcapply apply; +} plan_hc2hc; + +extern plan *X(mkplan_hc2hc)(size_t size, const plan_adt *adt, + hc2hcapply apply); + +#define MKPLAN_HC2HC(type, adt, apply) \ + (type *)X(mkplan_hc2hc)(sizeof(type), adt, apply) + +struct hc2hc_solver_s { + solver super; + INT r; + + hc2hc_mkinferior mkcldw; +}; + +hc2hc_solver *X(mksolver_hc2hc)(size_t size, INT r, hc2hc_mkinferior mkcldw); +extern hc2hc_solver *(*X(mksolver_hc2hc_hook))(size_t, INT, hc2hc_mkinferior); + +void X(regsolver_hc2hc_direct)(planner *plnr, khc2hc codelet, + const hc2hc_desc *desc); + +int X(hc2hc_applicable)(const hc2hc_solver *, const problem *, planner *); diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/indirect.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/indirect.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + + +/* solvers/plans for vectors of small RDFT's that cannot be done + in-place directly. Use a rank-0 plan to rearrange the data + before or after the transform. Can also change an out-of-place + plan into a copy + in-place (where the in-place transform + is e.g. unit stride). */ + +/* FIXME: merge with rank-geq2.c(?), since this is just a special case + of a rank split where the first/second transform has rank 0. */ + +#include "rdft.h" + +typedef problem *(*mkcld_t) (const problem_rdft *p); + +typedef struct { + rdftapply apply; + problem *(*mkcld)(const problem_rdft *p); + const char *nam; +} ndrct_adt; + +typedef struct { + solver super; + const ndrct_adt *adt; +} S; + +typedef struct { + plan_rdft super; + plan *cldcpy, *cld; + const S *slv; +} P; + +/*-----------------------------------------------------------------------*/ +/* first rearrange, then transform */ +static void apply_before(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + + { + plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy; + cldcpy->apply(ego->cldcpy, I, O); + } + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply(ego->cld, O, O); + } +} + +static problem *mkcld_before(const problem_rdft *p) +{ + return X(mkproblem_rdft_d)(X(tensor_copy_inplace)(p->sz, INPLACE_OS), + X(tensor_copy_inplace)(p->vecsz, INPLACE_OS), + p->O, p->O, p->kind); +} + +static const ndrct_adt adt_before = +{ + apply_before, mkcld_before, "rdft-indirect-before" +}; + +/*-----------------------------------------------------------------------*/ +/* first transform, then rearrange */ + +static void apply_after(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply(ego->cld, I, I); + } + { + plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy; + cldcpy->apply(ego->cldcpy, I, O); + } +} + +static problem *mkcld_after(const problem_rdft *p) +{ + return X(mkproblem_rdft_d)(X(tensor_copy_inplace)(p->sz, INPLACE_IS), + X(tensor_copy_inplace)(p->vecsz, INPLACE_IS), + p->I, p->I, p->kind); +} + +static const ndrct_adt adt_after = +{ + apply_after, mkcld_after, "rdft-indirect-after" +}; + +/*-----------------------------------------------------------------------*/ +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); + X(plan_destroy_internal)(ego->cldcpy); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cldcpy, wakefulness); + X(plan_awake)(ego->cld, wakefulness); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *s = ego->slv; + p->print(p, "(%s%(%p%)%(%p%))", s->adt->nam, ego->cld, ego->cldcpy); +} + +static int applicable0(const solver *ego_, const problem *p_, + const planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_rdft *p = (const problem_rdft *) p_; + return (1 + && FINITE_RNK(p->vecsz->rnk) + + /* problem must be a nontrivial transform, not just a copy */ + && p->sz->rnk > 0 + + && (0 + + /* problem must be in-place & require some + rearrangement of the data */ + || (p->I == p->O + && !(X(tensor_inplace_strides2)(p->sz, p->vecsz))) + + /* or problem must be out of place, transforming + from stride 1/2 to bigger stride, for apply_after */ + || (p->I != p->O && ego->adt->apply == apply_after + && !NO_DESTROY_INPUTP(plnr) + && X(tensor_min_istride)(p->sz) <= 2 + && X(tensor_min_ostride)(p->sz) > 2) + + /* or problem must be out of place, transforming + to stride 1/2 from bigger stride, for apply_before */ + || (p->I != p->O && ego->adt->apply == apply_before + && X(tensor_min_ostride)(p->sz) <= 2 + && X(tensor_min_istride)(p->sz) > 2) + + ) + ); +} + +static int applicable(const solver *ego_, const problem *p_, + const planner *plnr) +{ + if (!applicable0(ego_, p_, plnr)) return 0; + + if (NO_INDIRECT_OP_P(plnr)) { + const problem_rdft *p = (const problem_rdft *)p_; + if (p->I != p->O) return 0; + } + + return 1; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const problem_rdft *p = (const problem_rdft *) p_; + const S *ego = (const S *) ego_; + P *pln; + plan *cld = 0, *cldcpy = 0; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr)) + return (plan *) 0; + + cldcpy = X(mkplan_d)(plnr, + X(mkproblem_rdft_0_d)( + X(tensor_append)(p->vecsz, p->sz), + p->I, p->O)); + if (!cldcpy) goto nada; + + cld = X(mkplan_f_d)(plnr, ego->adt->mkcld(p), NO_BUFFERING, 0, 0); + if (!cld) goto nada; + + pln = MKPLAN_RDFT(P, &padt, ego->adt->apply); + pln->cld = cld; + pln->cldcpy = cldcpy; + pln->slv = ego; + X(ops_add)(&cld->ops, &cldcpy->ops, &pln->super.super.ops); + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cld); + X(plan_destroy_internal)(cldcpy); + return (plan *)0; +} + +static solver *mksolver(const ndrct_adt *adt) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->adt = adt; + return &(slv->super); +} + +void X(rdft_indirect_register)(planner *p) +{ + unsigned i; + static const ndrct_adt *const adts[] = { + &adt_before, &adt_after + }; + + for (i = 0; i < sizeof(adts) / sizeof(adts[0]); ++i) + REGISTER_SOLVER(p, mksolver(adts[i])); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/khc2c.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/khc2c.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ct-hc2c.h" + +void X(khc2c_register)(planner *p, khc2c codelet, const hc2c_desc *desc, + hc2c_kind hc2ckind) +{ + X(regsolver_hc2c_direct)(p, codelet, desc, hc2ckind); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/khc2hc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/khc2hc.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "hc2hc.h" + +void X(khc2hc_register)(planner *p, khc2hc codelet, const hc2hc_desc *desc) +{ + X(regsolver_hc2hc_direct)(p, codelet, desc); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/kr2c.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/kr2c.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "rdft.h" + +void X(kr2c_register)(planner *p, kr2c codelet, const kr2c_desc *desc) +{ + REGISTER_SOLVER(p, X(mksolver_rdft_r2c_direct)(codelet, desc)); + REGISTER_SOLVER(p, X(mksolver_rdft_r2c_directbuf)(codelet, desc)); + REGISTER_SOLVER(p, X(mksolver_rdft2_direct)(codelet, desc)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/kr2r.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/kr2r.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "rdft.h" + +void X(kr2r_register)(planner *p, kr2r codelet, const kr2r_desc *desc) +{ + REGISTER_SOLVER(p, X(mksolver_rdft_r2r_direct)(codelet, desc)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/nop.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/nop.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* plans for vrank -infty RDFTs (nothing to do) */ + +#include "rdft.h" + +static void apply(const plan *ego_, R *I, R *O) +{ + UNUSED(ego_); + UNUSED(I); + UNUSED(O); +} + +static int applicable(const solver *ego_, const problem *p_) +{ + const problem_rdft *p = (const problem_rdft *) p_; + UNUSED(ego_); + return 0 + /* case 1 : -infty vector rank */ + || (p->vecsz->rnk == RNK_MINFTY) + + /* case 2 : rank-0 in-place rdft */ + || (1 + && p->sz->rnk == 0 + && FINITE_RNK(p->vecsz->rnk) + && p->O == p->I + && X(tensor_inplace_strides)(p->vecsz) + ); +} + +static void print(const plan *ego, printer *p) +{ + UNUSED(ego); + p->print(p, "(rdft-nop)"); +} + +static plan *mkplan(const solver *ego, const problem *p, planner *plnr) +{ + static const plan_adt padt = { + X(rdft_solve), X(null_awake), print, X(plan_null_destroy) + }; + plan_rdft *pln; + + UNUSED(plnr); + + if (!applicable(ego, p)) + return (plan *) 0; + pln = MKPLAN_RDFT(plan_rdft, &padt, apply); + X(ops_zero)(&pln->super.ops); + + return &(pln->super); +} + +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + return MKSOLVER(solver, &sadt); +} + +void X(rdft_nop_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/nop2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/nop2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* plans for vrank -infty RDFT2s (nothing to do), as well as in-place + rank-0 HC2R. Note that in-place rank-0 R2HC is *not* a no-op, because + we have to set the imaginary parts of the output to zero. */ + +#include "rdft.h" + +static void apply(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + UNUSED(ego_); + UNUSED(r0); + UNUSED(r1); + UNUSED(cr); + UNUSED(ci); +} + +static int applicable(const solver *ego_, const problem *p_) +{ + const problem_rdft2 *p = (const problem_rdft2 *) p_; + UNUSED(ego_); + + return(0 + /* case 1 : -infty vector rank */ + || (p->vecsz->rnk == RNK_MINFTY) + + /* case 2 : rank-0 in-place rdft, except that + R2HC is not a no-op because it sets the imaginary + part to 0 */ + || (1 + && p->kind != R2HC + && p->sz->rnk == 0 + && FINITE_RNK(p->vecsz->rnk) + && (p->r0 == p->cr) + && X(rdft2_inplace_strides)(p, RNK_MINFTY) + )); +} + +static void print(const plan *ego, printer *p) +{ + UNUSED(ego); + p->print(p, "(rdft2-nop)"); +} + +static plan *mkplan(const solver *ego, const problem *p, planner *plnr) +{ + static const plan_adt padt = { + X(rdft2_solve), X(null_awake), print, X(plan_null_destroy) + }; + plan_rdft2 *pln; + + UNUSED(plnr); + + if (!applicable(ego, p)) + return (plan *) 0; + pln = MKPLAN_RDFT2(plan_rdft2, &padt, apply); + X(ops_zero)(&pln->super.ops); + + return &(pln->super); +} + +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_RDFT2, mkplan, 0 }; + return MKSOLVER(solver, &sadt); +} + +void X(rdft2_nop_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/plan.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/plan.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "rdft.h" + +plan *X(mkplan_rdft)(size_t size, const plan_adt *adt, rdftapply apply) +{ + plan_rdft *ego; + + ego = (plan_rdft *) X(mkplan)(size, adt); + ego->apply = apply; + + return &(ego->super); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/plan2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/plan2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "rdft.h" + +plan *X(mkplan_rdft2)(size_t size, const plan_adt *adt, rdft2apply apply) +{ + plan_rdft2 *ego; + + ego = (plan_rdft2 *) X(mkplan)(size, adt); + ego->apply = apply; + + return &(ego->super); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/problem.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/problem.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "rdft.h" +#include + +static void destroy(problem *ego_) +{ + problem_rdft *ego = (problem_rdft *) ego_; +#if !defined(STRUCT_HACK_C99) && !defined(STRUCT_HACK_KR) + X(ifree0)(ego->kind); +#endif + X(tensor_destroy2)(ego->vecsz, ego->sz); + X(ifree)(ego_); +} + +static void kind_hash(md5 *m, const rdft_kind *kind, int rnk) +{ + int i; + for (i = 0; i < rnk; ++i) + X(md5int)(m, kind[i]); +} + +static void hash(const problem *p_, md5 *m) +{ + const problem_rdft *p = (const problem_rdft *) p_; + X(md5puts)(m, "rdft"); + X(md5int)(m, p->I == p->O); + kind_hash(m, p->kind, p->sz->rnk); + X(md5int)(m, X(alignment_of)(p->I)); + X(md5int)(m, X(alignment_of)(p->O)); + X(tensor_md5)(m, p->sz); + X(tensor_md5)(m, p->vecsz); +} + +static void recur(const iodim *dims, int rnk, R *I) +{ + if (rnk == RNK_MINFTY) + return; + else if (rnk == 0) + I[0] = K(0.0); + else if (rnk > 0) { + INT i, n = dims[0].n, is = dims[0].is; + + if (rnk == 1) { + /* this case is redundant but faster */ + for (i = 0; i < n; ++i) + I[i * is] = K(0.0); + } else { + for (i = 0; i < n; ++i) + recur(dims + 1, rnk - 1, I + i * is); + } + } +} + +void X(rdft_zerotens)(tensor *sz, R *I) +{ + recur(sz->dims, sz->rnk, I); +} + +#define KSTR_LEN 8 + +const char *X(rdft_kind_str)(rdft_kind kind) +{ + static const char kstr[][KSTR_LEN] = { + "r2hc", "r2hc01", "r2hc10", "r2hc11", + "hc2r", "hc2r01", "hc2r10", "hc2r11", + "dht", + "redft00", "redft01", "redft10", "redft11", + "rodft00", "rodft01", "rodft10", "rodft11" + }; + A(kind >= 0 && kind < sizeof(kstr) / KSTR_LEN); + return kstr[kind]; +} + +static void print(const problem *ego_, printer *p) +{ + const problem_rdft *ego = (const problem_rdft *) ego_; + int i; + p->print(p, "(rdft %d %D %T %T", + X(alignment_of)(ego->I), + (INT)(ego->O - ego->I), + ego->sz, + ego->vecsz); + for (i = 0; i < ego->sz->rnk; ++i) + p->print(p, " %d", (int)ego->kind[i]); + p->print(p, ")"); +} + +static void zero(const problem *ego_) +{ + const problem_rdft *ego = (const problem_rdft *) ego_; + tensor *sz = X(tensor_append)(ego->vecsz, ego->sz); + X(rdft_zerotens)(sz, UNTAINT(ego->I)); + X(tensor_destroy)(sz); +} + +static const problem_adt padt = +{ + PROBLEM_RDFT, + hash, + zero, + print, + destroy +}; + +/* Dimensions of size 1 that are not REDFT/RODFT are no-ops and can be + eliminated. REDFT/RODFT unit dimensions often have factors of 2.0 + and suchlike from normalization and phases, although in principle + these constant factors from different dimensions could be combined. */ +static int nontrivial(const iodim *d, rdft_kind kind) +{ + return (d->n > 1 || kind == R2HC11 || kind == HC2R11 + || (REODFT_KINDP(kind) && kind != REDFT01 && kind != RODFT01)); +} + +problem *X(mkproblem_rdft)(const tensor *sz, const tensor *vecsz, + R *I, R *O, const rdft_kind *kind) +{ + problem_rdft *ego; + int rnk = sz->rnk; + int i; + + A(X(tensor_kosherp)(sz)); + A(X(tensor_kosherp)(vecsz)); + A(FINITE_RNK(sz->rnk)); + + if (UNTAINT(I) == UNTAINT(O)) + I = O = JOIN_TAINT(I, O); + + if (I == O && !X(tensor_inplace_locations)(sz, vecsz)) + return X(mkproblem_unsolvable)(); + + for (i = rnk = 0; i < sz->rnk; ++i) { + A(sz->dims[i].n > 0); + if (nontrivial(sz->dims + i, kind[i])) + ++rnk; + } + +#if defined(STRUCT_HACK_KR) + ego = (problem_rdft *) X(mkproblem)(sizeof(problem_rdft) + + sizeof(rdft_kind) + * (rnk > 0 ? rnk - 1 : 0), &padt); +#elif defined(STRUCT_HACK_C99) + ego = (problem_rdft *) X(mkproblem)(sizeof(problem_rdft) + + sizeof(rdft_kind) * rnk, &padt); +#else + ego = (problem_rdft *) X(mkproblem)(sizeof(problem_rdft), &padt); + ego->kind = (rdft_kind *) MALLOC(sizeof(rdft_kind) * rnk, PROBLEMS); +#endif + + /* do compression and sorting as in X(tensor_compress), but take + transform kind into account (sigh) */ + ego->sz = X(mktensor)(rnk); + for (i = rnk = 0; i < sz->rnk; ++i) { + if (nontrivial(sz->dims + i, kind[i])) { + ego->kind[rnk] = kind[i]; + ego->sz->dims[rnk++] = sz->dims[i]; + } + } + for (i = 0; i + 1 < rnk; ++i) { + int j; + for (j = i + 1; j < rnk; ++j) + if (X(dimcmp)(ego->sz->dims + i, ego->sz->dims + j) > 0) { + iodim dswap; + rdft_kind kswap; + dswap = ego->sz->dims[i]; + ego->sz->dims[i] = ego->sz->dims[j]; + ego->sz->dims[j] = dswap; + kswap = ego->kind[i]; + ego->kind[i] = ego->kind[j]; + ego->kind[j] = kswap; + } + } + + for (i = 0; i < rnk; ++i) + if (ego->sz->dims[i].n == 2 && (ego->kind[i] == REDFT00 + || ego->kind[i] == DHT + || ego->kind[i] == HC2R)) + ego->kind[i] = R2HC; /* size-2 transforms are equivalent */ + + ego->vecsz = X(tensor_compress_contiguous)(vecsz); + ego->I = I; + ego->O = O; + + A(FINITE_RNK(ego->sz->rnk)); + + return &(ego->super); +} + +/* Same as X(mkproblem_rdft), but also destroy input tensors. */ +problem *X(mkproblem_rdft_d)(tensor *sz, tensor *vecsz, + R *I, R *O, const rdft_kind *kind) +{ + problem *p = X(mkproblem_rdft)(sz, vecsz, I, O, kind); + X(tensor_destroy2)(vecsz, sz); + return p; +} + +/* As above, but for rnk <= 1 only and takes a scalar kind parameter */ +problem *X(mkproblem_rdft_1)(const tensor *sz, const tensor *vecsz, + R *I, R *O, rdft_kind kind) +{ + A(sz->rnk <= 1); + return X(mkproblem_rdft)(sz, vecsz, I, O, &kind); +} + +problem *X(mkproblem_rdft_1_d)(tensor *sz, tensor *vecsz, + R *I, R *O, rdft_kind kind) +{ + A(sz->rnk <= 1); + return X(mkproblem_rdft_d)(sz, vecsz, I, O, &kind); +} + +/* create a zero-dimensional problem */ +problem *X(mkproblem_rdft_0_d)(tensor *vecsz, R *I, R *O) +{ + return X(mkproblem_rdft_d)(X(mktensor_0d)(), vecsz, I, O, + (const rdft_kind *)0); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/problem2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/problem2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "dft.h" +#include "rdft.h" +#include + +static void destroy(problem *ego_) +{ + problem_rdft2 *ego = (problem_rdft2 *) ego_; + X(tensor_destroy2)(ego->vecsz, ego->sz); + X(ifree)(ego_); +} + +static void hash(const problem *p_, md5 *m) +{ + const problem_rdft2 *p = (const problem_rdft2 *) p_; + X(md5puts)(m, "rdft2"); + X(md5int)(m, p->r0 == p->cr); + X(md5INT)(m, p->r1 - p->r0); + X(md5INT)(m, p->ci - p->cr); + X(md5int)(m, X(alignment_of)(p->r0)); + X(md5int)(m, X(alignment_of)(p->r1)); + X(md5int)(m, X(alignment_of)(p->cr)); + X(md5int)(m, X(alignment_of)(p->ci)); + X(md5int)(m, p->kind); + X(tensor_md5)(m, p->sz); + X(tensor_md5)(m, p->vecsz); +} + +static void print(const problem *ego_, printer *p) +{ + const problem_rdft2 *ego = (const problem_rdft2 *) ego_; + p->print(p, "(rdft2 %d %d %T %T)", + (int)(ego->cr == ego->r0), + (int)(ego->kind), + ego->sz, + ego->vecsz); +} + +static void recur(const iodim *dims, int rnk, R *I0, R *I1) +{ + if (rnk == RNK_MINFTY) + return; + else if (rnk == 0) + I0[0] = K(0.0); + else if (rnk > 0) { + INT i, n = dims[0].n, is = dims[0].is; + + if (rnk == 1) { + for (i = 0; i < n - 1; i += 2) { + *I0 = *I1 = K(0.0); + I0 += is; I1 += is; + } + if (i < n) + *I0 = K(0.0); + } else { + for (i = 0; i < n; ++i) + recur(dims + 1, rnk - 1, I0 + i * is, I1 + i * is); + } + } +} + +static void vrecur(const iodim *vdims, int vrnk, + const iodim *dims, int rnk, R *I0, R *I1) +{ + if (vrnk == RNK_MINFTY) + return; + else if (vrnk == 0) + recur(dims, rnk, I0, I1); + else if (vrnk > 0) { + INT i, n = vdims[0].n, is = vdims[0].is; + + for (i = 0; i < n; ++i) + vrecur(vdims + 1, vrnk - 1, + dims, rnk, I0 + i * is, I1 + i * is); + } +} + +INT X(rdft2_complex_n)(INT real_n, rdft_kind kind) +{ + switch (kind) { + case R2HC: + case HC2R: + return (real_n / 2) + 1; + case R2HCII: + case HC2RIII: + return (real_n + 1) / 2; + default: + /* can't happen */ + A(0); + return 0; + } +} + +static void zero(const problem *ego_) +{ + const problem_rdft2 *ego = (const problem_rdft2 *) ego_; + if (R2HC_KINDP(ego->kind)) { + /* FIXME: can we avoid the double recursion somehow? */ + vrecur(ego->vecsz->dims, ego->vecsz->rnk, + ego->sz->dims, ego->sz->rnk, + UNTAINT(ego->r0), UNTAINT(ego->r1)); + } else { + tensor *sz; + tensor *sz2 = X(tensor_copy)(ego->sz); + int rnk = sz2->rnk; + if (rnk > 0) /* ~half as many complex outputs */ + sz2->dims[rnk-1].n = + X(rdft2_complex_n)(sz2->dims[rnk-1].n, ego->kind); + sz = X(tensor_append)(ego->vecsz, sz2); + X(tensor_destroy)(sz2); + X(dft_zerotens)(sz, UNTAINT(ego->cr), UNTAINT(ego->ci)); + X(tensor_destroy)(sz); + } +} + +static const problem_adt padt = +{ + PROBLEM_RDFT2, + hash, + zero, + print, + destroy +}; + +problem *X(mkproblem_rdft2)(const tensor *sz, const tensor *vecsz, + R *r0, R *r1, R *cr, R *ci, + rdft_kind kind) +{ + problem_rdft2 *ego; + + A(kind == R2HC || kind == R2HCII || kind == HC2R || kind == HC2RIII); + A(X(tensor_kosherp)(sz)); + A(X(tensor_kosherp)(vecsz)); + A(FINITE_RNK(sz->rnk)); + + /* require in-place problems to use r0 == cr */ + if (UNTAINT(r0) == UNTAINT(ci)) + return X(mkproblem_unsolvable)(); + + /* FIXME: should check UNTAINT(r1) == UNTAINT(cr) but + only if odd elements exist, which requires compressing the + tensors first */ + + if (UNTAINT(r0) == UNTAINT(cr)) + r0 = cr = JOIN_TAINT(r0, cr); + + ego = (problem_rdft2 *)X(mkproblem)(sizeof(problem_rdft2), &padt); + + if (sz->rnk > 1) { /* have to compress rnk-1 dims separately, ugh */ + tensor *szc = X(tensor_copy_except)(sz, sz->rnk - 1); + tensor *szr = X(tensor_copy_sub)(sz, sz->rnk - 1, 1); + tensor *szcc = X(tensor_compress)(szc); + if (szcc->rnk > 0) + ego->sz = X(tensor_append)(szcc, szr); + else + ego->sz = X(tensor_compress)(szr); + X(tensor_destroy2)(szc, szr); X(tensor_destroy)(szcc); + } else { + ego->sz = X(tensor_compress)(sz); + } + ego->vecsz = X(tensor_compress_contiguous)(vecsz); + ego->r0 = r0; + ego->r1 = r1; + ego->cr = cr; + ego->ci = ci; + ego->kind = kind; + + A(FINITE_RNK(ego->sz->rnk)); + return &(ego->super); + +} + +/* Same as X(mkproblem_rdft2), but also destroy input tensors. */ +problem *X(mkproblem_rdft2_d)(tensor *sz, tensor *vecsz, + R *r0, R *r1, R *cr, R *ci, rdft_kind kind) +{ + problem *p = X(mkproblem_rdft2)(sz, vecsz, r0, r1, cr, ci, kind); + X(tensor_destroy2)(vecsz, sz); + return p; +} + +/* Same as X(mkproblem_rdft2_d), but with only one R pointer. + Used by the API. */ +problem *X(mkproblem_rdft2_d_3pointers)(tensor *sz, tensor *vecsz, + R *r0, R *cr, R *ci, rdft_kind kind) +{ + problem *p; + int rnk = sz->rnk; + R *r1; + + if (rnk == 0) + r1 = r0; + else if (R2HC_KINDP(kind)) { + r1 = r0 + sz->dims[rnk-1].is; + sz->dims[rnk-1].is *= 2; + } else { + r1 = r0 + sz->dims[rnk-1].os; + sz->dims[rnk-1].os *= 2; + } + + p = X(mkproblem_rdft2)(sz, vecsz, r0, r1, cr, ci, kind); + X(tensor_destroy2)(vecsz, sz); + return p; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/rank-geq2-rdft2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/rank-geq2-rdft2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* plans for RDFT2 of rank >= 2 (multidimensional) */ + +#include "rdft.h" +#include "dft.h" + +typedef struct { + solver super; + int spltrnk; + const int *buddies; + int nbuddies; +} S; + +typedef struct { + plan_dft super; + plan *cldr, *cldc; + const S *solver; +} P; + +static void apply_r2hc(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + + { + plan_rdft2 *cldr = (plan_rdft2 *) ego->cldr; + cldr->apply((plan *) cldr, r0, r1, cr, ci); + } + + { + plan_dft *cldc = (plan_dft *) ego->cldc; + cldc->apply((plan *) cldc, cr, ci, cr, ci); + } +} + +static void apply_hc2r(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + + { + plan_dft *cldc = (plan_dft *) ego->cldc; + cldc->apply((plan *) cldc, ci, cr, ci, cr); + } + + { + plan_rdft2 *cldr = (plan_rdft2 *) ego->cldr; + cldr->apply((plan *) cldr, r0, r1, cr, ci); + } + +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cldr, wakefulness); + X(plan_awake)(ego->cldc, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cldr); + X(plan_destroy_internal)(ego->cldc); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *s = ego->solver; + p->print(p, "(rdft2-rank>=2/%d%(%p%)%(%p%))", + s->spltrnk, ego->cldr, ego->cldc); +} + +static int picksplit(const S *ego, const tensor *sz, int *rp) +{ + A(sz->rnk > 1); /* cannot split rnk <= 1 */ + if (!X(pickdim)(ego->spltrnk, ego->buddies, ego->nbuddies, sz, 1, rp)) + return 0; + *rp += 1; /* convert from dim. index to rank */ + if (*rp >= sz->rnk) /* split must reduce rank */ + return 0; + return 1; +} + +static int applicable0(const solver *ego_, const problem *p_, int *rp, + const planner *plnr) +{ + const problem_rdft2 *p = (const problem_rdft2 *) p_; + const S *ego = (const S *)ego_; + return (1 + && FINITE_RNK(p->sz->rnk) && FINITE_RNK(p->vecsz->rnk) + + /* FIXME: multidimensional R2HCII ? */ + && (p->kind == R2HC || p->kind == HC2R) + + && p->sz->rnk >= 2 + && picksplit(ego, p->sz, rp) + && (0 + + /* can work out-of-place, but HC2R destroys input */ + || (p->r0 != p->cr && + (p->kind == R2HC || !NO_DESTROY_INPUTP(plnr))) + + /* FIXME: what are sufficient conditions for inplace? */ + || (p->r0 == p->cr)) + ); +} + +/* TODO: revise this. */ +static int applicable(const solver *ego_, const problem *p_, + const planner *plnr, int *rp) +{ + const S *ego = (const S *)ego_; + + if (!applicable0(ego_, p_, rp, plnr)) return 0; + + if (NO_RANK_SPLITSP(plnr) && (ego->spltrnk != ego->buddies[0])) + return 0; + + if (NO_UGLYP(plnr)) { + const problem_rdft2 *p = (const problem_rdft2 *) p_; + + /* Heuristic: if the vector stride is greater than the transform + size, don't use (prefer to do the vector loop first with a + vrank-geq1 plan). */ + if (p->vecsz->rnk > 0 && + X(tensor_min_stride)(p->vecsz) + > X(rdft2_tensor_max_index)(p->sz, p->kind)) + return 0; + } + + return 1; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_rdft2 *p; + P *pln; + plan *cldr = 0, *cldc = 0; + tensor *sz1, *sz2, *vecszi, *sz2i; + int spltrnk; + inplace_kind k; + problem *cldp; + + static const plan_adt padt = { + X(rdft2_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr, &spltrnk)) + return (plan *) 0; + + p = (const problem_rdft2 *) p_; + X(tensor_split)(p->sz, &sz1, spltrnk, &sz2); + + k = p->kind == R2HC ? INPLACE_OS : INPLACE_IS; + vecszi = X(tensor_copy_inplace)(p->vecsz, k); + sz2i = X(tensor_copy_inplace)(sz2, k); + + /* complex data is ~half of real */ + sz2i->dims[sz2i->rnk - 1].n = sz2i->dims[sz2i->rnk - 1].n/2 + 1; + + cldr = X(mkplan_d)(plnr, + X(mkproblem_rdft2_d)(X(tensor_copy)(sz2), + X(tensor_append)(p->vecsz, sz1), + p->r0, p->r1, + p->cr, p->ci, p->kind)); + if (!cldr) goto nada; + + if (p->kind == R2HC) + cldp = X(mkproblem_dft_d)(X(tensor_copy_inplace)(sz1, k), + X(tensor_append)(vecszi, sz2i), + p->cr, p->ci, p->cr, p->ci); + else /* HC2R must swap re/im parts to get IDFT */ + cldp = X(mkproblem_dft_d)(X(tensor_copy_inplace)(sz1, k), + X(tensor_append)(vecszi, sz2i), + p->ci, p->cr, p->ci, p->cr); + cldc = X(mkplan_d)(plnr, cldp); + if (!cldc) goto nada; + + pln = MKPLAN_RDFT2(P, &padt, p->kind == R2HC ? apply_r2hc : apply_hc2r); + + pln->cldr = cldr; + pln->cldc = cldc; + + pln->solver = ego; + X(ops_add)(&cldr->ops, &cldc->ops, &pln->super.super.ops); + + X(tensor_destroy4)(sz2i, vecszi, sz2, sz1); + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cldr); + X(plan_destroy_internal)(cldc); + X(tensor_destroy4)(sz2i, vecszi, sz2, sz1); + return (plan *) 0; +} + +static solver *mksolver(int spltrnk, const int *buddies, int nbuddies) +{ + static const solver_adt sadt = { PROBLEM_RDFT2, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->spltrnk = spltrnk; + slv->buddies = buddies; + slv->nbuddies = nbuddies; + return &(slv->super); +} + +void X(rdft2_rank_geq2_register)(planner *p) +{ + int i; + static const int buddies[] = { 1, 0, -2 }; + + const int nbuddies = (int)(sizeof(buddies) / sizeof(buddies[0])); + + for (i = 0; i < nbuddies; ++i) + REGISTER_SOLVER(p, mksolver(buddies[i], buddies, nbuddies)); + + /* FIXME: Should we try more buddies? See also dft/rank-geq2. */ +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/rank-geq2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/rank-geq2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* plans for RDFT of rank >= 2 (multidimensional) */ + +/* FIXME: this solver cannot strictly be applied to multidimensional + DHTs, since the latter are not separable...up to rnk-1 additional + post-processing passes may be required. See also: + + R. N. Bracewell, O. Buneman, H. Hao, and J. Villasenor, "Fast + two-dimensional Hartley transform," Proc. IEEE 74, 1282-1283 (1986). + + H. Hao and R. N. Bracewell, "A three-dimensional DFT algorithm + using the fast Hartley transform," Proc. IEEE 75(2), 264-266 (1987). +*/ + +#include "rdft.h" + +typedef struct { + solver super; + int spltrnk; + const int *buddies; + int nbuddies; +} S; + +typedef struct { + plan_rdft super; + + plan *cld1, *cld2; + const S *solver; +} P; + +/* Compute multi-dimensional RDFT by applying the two cld plans + (lower-rnk RDFTs). */ +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld1, *cld2; + + cld1 = (plan_rdft *) ego->cld1; + cld1->apply(ego->cld1, I, O); + + cld2 = (plan_rdft *) ego->cld2; + cld2->apply(ego->cld2, O, O); +} + + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld1, wakefulness); + X(plan_awake)(ego->cld2, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld2); + X(plan_destroy_internal)(ego->cld1); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *s = ego->solver; + p->print(p, "(rdft-rank>=2/%d%(%p%)%(%p%))", + s->spltrnk, ego->cld1, ego->cld2); +} + +static int picksplit(const S *ego, const tensor *sz, int *rp) +{ + A(sz->rnk > 1); /* cannot split rnk <= 1 */ + if (!X(pickdim)(ego->spltrnk, ego->buddies, ego->nbuddies, sz, 1, rp)) + return 0; + *rp += 1; /* convert from dim. index to rank */ + if (*rp >= sz->rnk) /* split must reduce rank */ + return 0; + return 1; +} + +static int applicable0(const solver *ego_, const problem *p_, int *rp) +{ + const problem_rdft *p = (const problem_rdft *) p_; + const S *ego = (const S *)ego_; + return (1 + && FINITE_RNK(p->sz->rnk) && FINITE_RNK(p->vecsz->rnk) + && p->sz->rnk >= 2 + && picksplit(ego, p->sz, rp) + ); +} + +/* TODO: revise this. */ +static int applicable(const solver *ego_, const problem *p_, + const planner *plnr, int *rp) +{ + const S *ego = (const S *)ego_; + + if (!applicable0(ego_, p_, rp)) return 0; + + if (NO_RANK_SPLITSP(plnr) && (ego->spltrnk != ego->buddies[0])) + return 0; + + if (NO_UGLYP(plnr)) { + /* Heuristic: if the vector stride is greater than the transform + sz, don't use (prefer to do the vector loop first with a + vrank-geq1 plan). */ + const problem_rdft *p = (const problem_rdft *) p_; + + if (p->vecsz->rnk > 0 && + X(tensor_min_stride)(p->vecsz) > X(tensor_max_index)(p->sz)) + return 0; + } + + return 1; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_rdft *p; + P *pln; + plan *cld1 = 0, *cld2 = 0; + tensor *sz1, *sz2, *vecszi, *sz2i; + int spltrnk; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr, &spltrnk)) + return (plan *) 0; + + p = (const problem_rdft *) p_; + X(tensor_split)(p->sz, &sz1, spltrnk, &sz2); + vecszi = X(tensor_copy_inplace)(p->vecsz, INPLACE_OS); + sz2i = X(tensor_copy_inplace)(sz2, INPLACE_OS); + + cld1 = X(mkplan_d)(plnr, + X(mkproblem_rdft_d)(X(tensor_copy)(sz2), + X(tensor_append)(p->vecsz, sz1), + p->I, p->O, p->kind + spltrnk)); + if (!cld1) goto nada; + + cld2 = X(mkplan_d)(plnr, + X(mkproblem_rdft_d)( + X(tensor_copy_inplace)(sz1, INPLACE_OS), + X(tensor_append)(vecszi, sz2i), + p->O, p->O, p->kind)); + if (!cld2) goto nada; + + pln = MKPLAN_RDFT(P, &padt, apply); + + pln->cld1 = cld1; + pln->cld2 = cld2; + + pln->solver = ego; + X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops); + + X(tensor_destroy4)(sz2, sz1, vecszi, sz2i); + + return &(pln->super.super); + + nada: + X(plan_destroy_internal)(cld2); + X(plan_destroy_internal)(cld1); + X(tensor_destroy4)(sz2, sz1, vecszi, sz2i); + return (plan *) 0; +} + +static solver *mksolver(int spltrnk, const int *buddies, int nbuddies) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->spltrnk = spltrnk; + slv->buddies = buddies; + slv->nbuddies = nbuddies; + return &(slv->super); +} + +void X(rdft_rank_geq2_register)(planner *p) +{ + int i; + static const int buddies[] = { 1, 0, -2 }; + + const int nbuddies = (int)(sizeof(buddies) / sizeof(buddies[0])); + + for (i = 0; i < nbuddies; ++i) + REGISTER_SOLVER(p, mksolver(buddies[i], buddies, nbuddies)); + + /* FIXME: Should we try more buddies? See also dft/rank-geq2. */ +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/rank0-rdft2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/rank0-rdft2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* plans for rank-0 RDFT2 (copy operations, plus setting 0 imag. parts) */ + +#include "rdft.h" + +#ifdef HAVE_STRING_H +#include /* for memcpy() */ +#endif + +typedef struct { + solver super; +} S; + +typedef struct { + plan_rdft super; + INT vl; + INT ivs, ovs; + plan *cldcpy; +} P; + +static int applicable(const problem *p_) +{ + const problem_rdft2 *p = (const problem_rdft2 *) p_; + return (1 + && p->sz->rnk == 0 + && (p->kind == HC2R + || + (1 + && p->kind == R2HC + + && p->vecsz->rnk <= 1 + + && ((p->r0 != p->cr) + || + X(rdft2_inplace_strides)(p, RNK_MINFTY)) )) + ); +} + +static void apply_r2hc(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + INT i, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + + UNUSED(r1); /* rank-0 has no real odd-index elements */ + + for (i = 4; i <= vl; i += 4) { + R x0, x1, x2, x3; + x0 = *r0; r0 += ivs; + x1 = *r0; r0 += ivs; + x2 = *r0; r0 += ivs; + x3 = *r0; r0 += ivs; + *cr = x0; cr += ovs; + *ci = K(0.0); ci += ovs; + *cr = x1; cr += ovs; + *ci = K(0.0); ci += ovs; + *cr = x2; cr += ovs; + *ci = K(0.0); ci += ovs; + *cr = x3; cr += ovs; + *ci = K(0.0); ci += ovs; + } + for (; i < vl + 4; ++i) { + R x0; + x0 = *r0; r0 += ivs; + *cr = x0; cr += ovs; + *ci = K(0.0); ci += ovs; + } +} + +/* in-place r2hc rank-0: set imaginary parts of output to 0 */ +static void apply_r2hc_inplace(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + INT i, vl = ego->vl; + INT ovs = ego->ovs; + + UNUSED(r0); UNUSED(r1); UNUSED(cr); + + for (i = 4; i <= vl; i += 4) { + *ci = K(0.0); ci += ovs; + *ci = K(0.0); ci += ovs; + *ci = K(0.0); ci += ovs; + *ci = K(0.0); ci += ovs; + } + for (; i < vl + 4; ++i) { + *ci = K(0.0); ci += ovs; + } +} + +/* a rank-0 HC2R rdft2 problem is just a copy from cr to r0, + so we can use a rank-0 rdft plan */ +static void apply_hc2r(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy; + UNUSED(ci); + UNUSED(r1); + cldcpy->apply((plan *) cldcpy, cr, r0); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + if (ego->cldcpy) + X(plan_awake)(ego->cldcpy, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + if (ego->cldcpy) + X(plan_destroy_internal)(ego->cldcpy); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + if (ego->cldcpy) + p->print(p, "(rdft2-hc2r-rank0%(%p%))", ego->cldcpy); + else + p->print(p, "(rdft2-r2hc-rank0%v)", ego->vl); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const problem_rdft2 *p; + plan *cldcpy = (plan *) 0; + P *pln; + + static const plan_adt padt = { + X(rdft2_solve), awake, print, destroy + }; + + UNUSED(ego_); + + if (!applicable(p_)) + return (plan *) 0; + + p = (const problem_rdft2 *) p_; + + if (p->kind == HC2R) { + cldcpy = X(mkplan_d)(plnr, + X(mkproblem_rdft_0_d)( + X(tensor_copy)(p->vecsz), + p->cr, p->r0)); + if (!cldcpy) return (plan *) 0; + } + + pln = MKPLAN_RDFT2(P, &padt, + p->kind == R2HC ? + (p->r0 == p->cr ? apply_r2hc_inplace : apply_r2hc) + : apply_hc2r); + + if (p->kind == R2HC) + X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); + pln->cldcpy = cldcpy; + + if (p->kind == R2HC) { + /* vl loads, 2*vl stores */ + X(ops_other)(3 * pln->vl, &pln->super.super.ops); + } + else { + pln->super.super.ops = cldcpy->ops; + } + + return &(pln->super.super); +} + +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_RDFT2, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return &(slv->super); +} + +void X(rdft2_rank0_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/rank0.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/rank0.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* plans for rank-0 RDFTs (copy operations) */ + +#include "rdft.h" + +#ifdef HAVE_STRING_H +#include /* for memcpy() */ +#endif + +#define MAXRNK 32 /* FIXME: should malloc() */ + +typedef struct { + plan_rdft super; + INT vl; + int rnk; + iodim d[MAXRNK]; + const char *nam; +} P; + +typedef struct { + solver super; + rdftapply apply; + int (*applicable)(const P *pln, const problem_rdft *p); + const char *nam; +} S; + +/* copy up to MAXRNK dimensions from problem into plan. If a + contiguous dimension exists, save its length in pln->vl */ +static int fill_iodim(P *pln, const problem_rdft *p) +{ + int i; + const tensor *vecsz = p->vecsz; + + pln->vl = 1; + pln->rnk = 0; + for (i = 0; i < vecsz->rnk; ++i) { + /* extract contiguous dimensions */ + if (pln->vl == 1 && + vecsz->dims[i].is == 1 && vecsz->dims[i].os == 1) + pln->vl = vecsz->dims[i].n; + else if (pln->rnk == MAXRNK) + return 0; + else + pln->d[pln->rnk++] = vecsz->dims[i]; + } + + return 1; +} + +/* generic higher-rank copy routine, calls cpy2d() to do the real work */ +static void copy(const iodim *d, int rnk, INT vl, + R *I, R *O, + cpy2d_func cpy2d) +{ + A(rnk >= 2); + if (rnk == 2) + cpy2d(I, O, d[0].n, d[0].is, d[0].os, d[1].n, d[1].is, d[1].os, vl); + else { + INT i; + for (i = 0; i < d[0].n; ++i, I += d[0].is, O += d[0].os) + copy(d + 1, rnk - 1, vl, I, O, cpy2d); + } +} + +/* FIXME: should be more general */ +static int transposep(const P *pln) +{ + int i; + + for (i = 0; i < pln->rnk - 2; ++i) + if (pln->d[i].is != pln->d[i].os) + return 0; + + return (pln->d[i].n == pln->d[i+1].n && + pln->d[i].is == pln->d[i+1].os && + pln->d[i].os == pln->d[i+1].is); +} + +/* generic higher-rank transpose routine, calls transpose2d() to do + * the real work */ +static void transpose(const iodim *d, int rnk, INT vl, + R *I, + transpose_func transpose2d) +{ + A(rnk >= 2); + if (rnk == 2) + transpose2d(I, d[0].n, d[0].is, d[0].os, vl); + else { + INT i; + for (i = 0; i < d[0].n; ++i, I += d[0].is) + transpose(d + 1, rnk - 1, vl, I, transpose2d); + } +} + +/**************************************************************/ +/* rank 0,1,2, out of place, iterative */ +static void apply_iter(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + + switch (ego->rnk) { + case 0: + X(cpy1d)(I, O, ego->vl, 1, 1, 1); + break; + case 1: + X(cpy1d)(I, O, + ego->d[0].n, ego->d[0].is, ego->d[0].os, + ego->vl); + break; + default: + copy(ego->d, ego->rnk, ego->vl, I, O, X(cpy2d_ci)); + break; + } +} + +static int applicable_iter(const P *pln, const problem_rdft *p) +{ + UNUSED(pln); + return (p->I != p->O); +} + +/**************************************************************/ +/* out of place, write contiguous output */ +static void apply_cpy2dco(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + copy(ego->d, ego->rnk, ego->vl, I, O, X(cpy2d_co)); +} + +static int applicable_cpy2dco(const P *pln, const problem_rdft *p) +{ + int rnk = pln->rnk; + return (1 + && p->I != p->O + && rnk >= 2 + + /* must not duplicate apply_iter */ + && (X(iabs)(pln->d[rnk - 2].is) <= X(iabs)(pln->d[rnk - 1].is) + || + X(iabs)(pln->d[rnk - 2].os) <= X(iabs)(pln->d[rnk - 1].os)) + ); +} + +/**************************************************************/ +/* out of place, tiled, no buffering */ +static void apply_tiled(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + copy(ego->d, ego->rnk, ego->vl, I, O, X(cpy2d_tiled)); +} + +static int applicable_tiled(const P *pln, const problem_rdft *p) +{ + return (1 + && p->I != p->O + && pln->rnk >= 2 + + /* somewhat arbitrary */ + && X(compute_tilesz)(pln->vl, 1) > 4 + ); +} + +/**************************************************************/ +/* out of place, tiled, with buffer */ +static void apply_tiledbuf(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + copy(ego->d, ego->rnk, ego->vl, I, O, X(cpy2d_tiledbuf)); +} + +#define applicable_tiledbuf applicable_tiled + +/**************************************************************/ +/* rank 0, out of place, using memcpy */ +static void apply_memcpy(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + + A(ego->rnk == 0); + memcpy(O, I, ego->vl * sizeof(R)); +} + +static int applicable_memcpy(const P *pln, const problem_rdft *p) +{ + return (1 + && p->I != p->O + && pln->rnk == 0 + && pln->vl > 2 /* do not bother memcpy-ing complex numbers */ + ); +} + +/**************************************************************/ +/* rank > 0 vecloop, out of place, using memcpy (e.g. out-of-place + transposes of vl-tuples ... for large vl it should be more + efficient to use memcpy than the tiled stuff). */ + +static void memcpy_loop(INT cpysz, int rnk, const iodim *d, R *I, R *O) +{ + INT i, n = d->n, is = d->is, os = d->os; + if (rnk == 1) + for (i = 0; i < n; ++i, I += is, O += os) + memcpy(O, I, cpysz); + else { + --rnk; ++d; + for (i = 0; i < n; ++i, I += is, O += os) + memcpy_loop(cpysz, rnk, d, I, O); + } +} + +static void apply_memcpy_loop(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + memcpy_loop(ego->vl * sizeof(R), ego->rnk, ego->d, I, O); +} + +static int applicable_memcpy_loop(const P *pln, const problem_rdft *p) +{ + return (p->I != p->O + && pln->rnk > 0 + && pln->vl > 2 /* do not bother memcpy-ing complex numbers */); +} + +/**************************************************************/ +/* rank 2, in place, square transpose, iterative */ +static void apply_ip_sq(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + UNUSED(O); + transpose(ego->d, ego->rnk, ego->vl, I, X(transpose)); +} + + +static int applicable_ip_sq(const P *pln, const problem_rdft *p) +{ + return (1 + && p->I == p->O + && pln->rnk >= 2 + && transposep(pln)); +} + +/**************************************************************/ +/* rank 2, in place, square transpose, tiled */ +static void apply_ip_sq_tiled(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + UNUSED(O); + transpose(ego->d, ego->rnk, ego->vl, I, X(transpose_tiled)); +} + +static int applicable_ip_sq_tiled(const P *pln, const problem_rdft *p) +{ + return (1 + && applicable_ip_sq(pln, p) + + /* somewhat arbitrary */ + && X(compute_tilesz)(pln->vl, 2) > 4 + ); +} + +/**************************************************************/ +/* rank 2, in place, square transpose, tiled, buffered */ +static void apply_ip_sq_tiledbuf(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + UNUSED(O); + transpose(ego->d, ego->rnk, ego->vl, I, X(transpose_tiledbuf)); +} + +#define applicable_ip_sq_tiledbuf applicable_ip_sq_tiled + +/**************************************************************/ +static int applicable(const S *ego, const problem *p_) +{ + const problem_rdft *p = (const problem_rdft *) p_; + P pln; + return (1 + && p->sz->rnk == 0 + && FINITE_RNK(p->vecsz->rnk) + && fill_iodim(&pln, p) + && ego->applicable(&pln, p) + ); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + int i; + p->print(p, "(%s/%D", ego->nam, ego->vl); + for (i = 0; i < ego->rnk; ++i) + p->print(p, "%v", ego->d[i].n); + p->print(p, ")"); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const problem_rdft *p; + const S *ego = (const S *) ego_; + P *pln; + int retval; + + static const plan_adt padt = { + X(rdft_solve), X(null_awake), print, X(plan_null_destroy) + }; + + UNUSED(plnr); + + if (!applicable(ego, p_)) + return (plan *) 0; + + p = (const problem_rdft *) p_; + pln = MKPLAN_RDFT(P, &padt, ego->apply); + + retval = fill_iodim(pln, p); + (void)retval; /* UNUSED unless DEBUG */ + A(retval); + A(pln->vl > 0); /* because FINITE_RNK(p->vecsz->rnk) holds */ + pln->nam = ego->nam; + + /* X(tensor_sz)(p->vecsz) loads, X(tensor_sz)(p->vecsz) stores */ + X(ops_other)(2 * X(tensor_sz)(p->vecsz), &pln->super.super.ops); + return &(pln->super.super); +} + + +void X(rdft_rank0_register)(planner *p) +{ + unsigned i; + static struct { + rdftapply apply; + int (*applicable)(const P *, const problem_rdft *); + const char *nam; + } tab[] = { + { apply_memcpy, applicable_memcpy, "rdft-rank0-memcpy" }, + { apply_memcpy_loop, applicable_memcpy_loop, + "rdft-rank0-memcpy-loop" }, + { apply_iter, applicable_iter, "rdft-rank0-iter-ci" }, + { apply_cpy2dco, applicable_cpy2dco, "rdft-rank0-iter-co" }, + { apply_tiled, applicable_tiled, "rdft-rank0-tiled" }, + { apply_tiledbuf, applicable_tiledbuf, "rdft-rank0-tiledbuf" }, + { apply_ip_sq, applicable_ip_sq, "rdft-rank0-ip-sq" }, + { + apply_ip_sq_tiled, + applicable_ip_sq_tiled, + "rdft-rank0-ip-sq-tiled" + }, + { + apply_ip_sq_tiledbuf, + applicable_ip_sq_tiledbuf, + "rdft-rank0-ip-sq-tiledbuf" + }, + }; + + for (i = 0; i < sizeof(tab) / sizeof(tab[0]); ++i) { + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->apply = tab[i].apply; + slv->applicable = tab[i].applicable; + slv->nam = tab[i].nam; + REGISTER_SOLVER(p, &(slv->super)); + } +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/rdft-dht.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/rdft-dht.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* Solve an R2HC/HC2R problem via post/pre processing of a DHT. This + is mainly useful because we can use Rader to compute DHTs of prime + sizes. It also allows us to express hc2r problems in terms of r2hc + (via dht-r2hc), and to do hc2r problems without destroying the input. */ + +#include "rdft.h" + +typedef struct { + solver super; +} S; + +typedef struct { + plan_rdft super; + plan *cld; + INT is, os; + INT n; +} P; + +static void apply_r2hc(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT os; + INT i, n; + + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, I, O); + } + + n = ego->n; + os = ego->os; + for (i = 1; i < n - i; ++i) { + E a, b; + a = K(0.5) * O[os * i]; + b = K(0.5) * O[os * (n - i)]; + O[os * i] = a + b; +#if FFT_SIGN == -1 + O[os * (n - i)] = b - a; +#else + O[os * (n - i)] = a - b; +#endif + } +} + +/* hc2r, destroying input as usual */ +static void apply_hc2r(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is; + INT i, n = ego->n; + + for (i = 1; i < n - i; ++i) { + E a, b; + a = I[is * i]; + b = I[is * (n - i)]; +#if FFT_SIGN == -1 + I[is * i] = a - b; + I[is * (n - i)] = a + b; +#else + I[is * i] = a + b; + I[is * (n - i)] = a - b; +#endif + } + + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, I, O); + } +} + +/* hc2r, without destroying input */ +static void apply_hc2r_save(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is, os = ego->os; + INT i, n = ego->n; + + O[0] = I[0]; + for (i = 1; i < n - i; ++i) { + E a, b; + a = I[is * i]; + b = I[is * (n - i)]; +#if FFT_SIGN == -1 + O[os * i] = a - b; + O[os * (n - i)] = a + b; +#else + O[os * i] = a + b; + O[os * (n - i)] = a - b; +#endif + } + if (i == n - i) + O[os * i] = I[is * i]; + + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, O, O); + } +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(%s-dht-%D%(%p%))", + ego->super.apply == apply_r2hc ? "r2hc" : "hc2r", + ego->n, ego->cld); +} + +static int applicable0(const solver *ego_, const problem *p_) +{ + const problem_rdft *p = (const problem_rdft *) p_; + UNUSED(ego_); + + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk == 0 + && (p->kind[0] == R2HC || p->kind[0] == HC2R) + + /* hack: size-2 DHT etc. are defined as being equivalent + to size-2 R2HC in problem.c, so we need this to prevent + infinite loops for size 2 in EXHAUSTIVE mode: */ + && p->sz->dims[0].n > 2 + ); +} + +static int applicable(const solver *ego, const problem *p_, + const planner *plnr) +{ + return (!NO_SLOWP(plnr) && applicable0(ego, p_)); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + P *pln; + const problem_rdft *p; + problem *cldp; + plan *cld; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr)) + return (plan *)0; + + p = (const problem_rdft *) p_; + + if (p->kind[0] == R2HC || !NO_DESTROY_INPUTP(plnr)) + cldp = X(mkproblem_rdft_1)(p->sz, p->vecsz, p->I, p->O, DHT); + else { + tensor *sz = X(tensor_copy_inplace)(p->sz, INPLACE_OS); + cldp = X(mkproblem_rdft_1)(sz, p->vecsz, p->O, p->O, DHT); + X(tensor_destroy)(sz); + } + cld = X(mkplan_d)(plnr, cldp); + if (!cld) return (plan *)0; + + pln = MKPLAN_RDFT(P, &padt, p->kind[0] == R2HC ? + apply_r2hc : (NO_DESTROY_INPUTP(plnr) ? + apply_hc2r_save : apply_hc2r)); + pln->n = p->sz->dims[0].n; + pln->is = p->sz->dims[0].is; + pln->os = p->sz->dims[0].os; + pln->cld = cld; + + pln->super.super.ops = cld->ops; + pln->super.super.ops.other += 4 * ((pln->n - 1)/2); + pln->super.super.ops.add += 2 * ((pln->n - 1)/2); + if (p->kind[0] == R2HC) + pln->super.super.ops.mul += 2 * ((pln->n - 1)/2); + if (pln->super.apply == apply_hc2r_save) + pln->super.super.ops.other += 2 + (pln->n % 2 ? 0 : 2); + + return &(pln->super.super); +} + +/* constructor */ +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return &(slv->super); +} + +void X(rdft_dht_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/rdft.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/rdft.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __RDFT_H__ +#define __RDFT_H__ + +#include "ifftw.h" +#include "codelet-rdft.h" + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +/* problem.c: */ +typedef struct { + problem super; + tensor *sz, *vecsz; + R *I, *O; +#if defined(STRUCT_HACK_KR) + rdft_kind kind[1]; +#elif defined(STRUCT_HACK_C99) + rdft_kind kind[]; +#else + rdft_kind *kind; +#endif +} problem_rdft; + +void X(rdft_zerotens)(tensor *sz, R *I); +problem *X(mkproblem_rdft)(const tensor *sz, const tensor *vecsz, + R *I, R *O, const rdft_kind *kind); +problem *X(mkproblem_rdft_d)(tensor *sz, tensor *vecsz, + R *I, R *O, const rdft_kind *kind); +problem *X(mkproblem_rdft_0_d)(tensor *vecsz, R *I, R *O); +problem *X(mkproblem_rdft_1)(const tensor *sz, const tensor *vecsz, + R *I, R *O, rdft_kind kind); +problem *X(mkproblem_rdft_1_d)(tensor *sz, tensor *vecsz, + R *I, R *O, rdft_kind kind); + +const char *X(rdft_kind_str)(rdft_kind kind); + +/* solve.c: */ +void X(rdft_solve)(const plan *ego_, const problem *p_); + +/* plan.c: */ +typedef void (*rdftapply) (const plan *ego, R *I, R *O); + +typedef struct { + plan super; + rdftapply apply; +} plan_rdft; + +plan *X(mkplan_rdft)(size_t size, const plan_adt *adt, rdftapply apply); + +#define MKPLAN_RDFT(type, adt, apply) \ + (type *)X(mkplan_rdft)(sizeof(type), adt, apply) + +/* various solvers */ + +solver *X(mksolver_rdft_r2c_direct)(kr2c k, const kr2c_desc *desc); +solver *X(mksolver_rdft_r2c_directbuf)(kr2c k, const kr2c_desc *desc); +solver *X(mksolver_rdft_r2r_direct)(kr2r k, const kr2r_desc *desc); + +void X(rdft_rank0_register)(planner *p); +void X(rdft_vrank3_transpose_register)(planner *p); +void X(rdft_rank_geq2_register)(planner *p); +void X(rdft_indirect_register)(planner *p); +void X(rdft_vrank_geq1_register)(planner *p); +void X(rdft_buffered_register)(planner *p); +void X(rdft_generic_register)(planner *p); +void X(rdft_rader_hc2hc_register)(planner *p); +void X(rdft_dht_register)(planner *p); +void X(dht_r2hc_register)(planner *p); +void X(dht_rader_register)(planner *p); +void X(dft_r2hc_register)(planner *p); +void X(rdft_nop_register)(planner *p); +void X(hc2hc_generic_register)(planner *p); + +/****************************************************************************/ +/* problem2.c: */ +/* + An RDFT2 problem transforms a 1d real array r[n] with stride is/os + to/from an "unpacked" complex array {rio,iio}[n/2 + 1] with stride + os/is. R0 points to the first even element of the real array. + R1 points to the first odd element of the real array. + + Strides on the real side of the transform express distances + between consecutive elements of the same array (even or odd). + E.g., for a contiguous input + + R0 R1 R2 R3 ... + + the input stride would be 2, not 1. This convention is necessary + for hc2c codelets to work, since they transpose even/odd with + real/imag. + + Multidimensional transforms use complex DFTs for the + noncontiguous dimensions. vecsz has the usual interpretation. +*/ +typedef struct { + problem super; + tensor *sz; + tensor *vecsz; + R *r0, *r1; + R *cr, *ci; + rdft_kind kind; /* assert(kind < DHT) */ +} problem_rdft2; + +problem *X(mkproblem_rdft2)(const tensor *sz, const tensor *vecsz, + R *r0, R *r1, R *cr, R *ci, rdft_kind kind); +problem *X(mkproblem_rdft2_d)(tensor *sz, tensor *vecsz, + R *r0, R *r1, R *cr, R *ci, rdft_kind kind); +problem *X(mkproblem_rdft2_d_3pointers)(tensor *sz, tensor *vecsz, + R *r, R *cr, R *ci, rdft_kind kind); +int X(rdft2_inplace_strides)(const problem_rdft2 *p, int vdim); +INT X(rdft2_tensor_max_index)(const tensor *sz, rdft_kind k); +void X(rdft2_strides)(rdft_kind kind, const iodim *d, INT *rs, INT *cs); +INT X(rdft2_complex_n)(INT real_n, rdft_kind kind); + +/* verify.c: */ +void X(rdft2_verify)(plan *pln, const problem_rdft2 *p, int rounds); + +/* solve.c: */ +void X(rdft2_solve)(const plan *ego_, const problem *p_); + +/* plan.c: */ +typedef void (*rdft2apply) (const plan *ego, R *r0, R *r1, R *cr, R *ci); + +typedef struct { + plan super; + rdft2apply apply; +} plan_rdft2; + +plan *X(mkplan_rdft2)(size_t size, const plan_adt *adt, rdft2apply apply); + +#define MKPLAN_RDFT2(type, adt, apply) \ + (type *)X(mkplan_rdft2)(sizeof(type), adt, apply) + +/* various solvers */ + +solver *X(mksolver_rdft2_direct)(kr2c k, const kr2c_desc *desc); + +void X(rdft2_vrank_geq1_register)(planner *p); +void X(rdft2_buffered_register)(planner *p); +void X(rdft2_rdft_register)(planner *p); +void X(rdft2_nop_register)(planner *p); +void X(rdft2_rank0_register)(planner *p); +void X(rdft2_rank_geq2_register)(planner *p); + +/****************************************************************************/ + +/* configurations */ +void X(rdft_conf_standard)(planner *p); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* __RDFT_H__ */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/rdft2-inplace-strides.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/rdft2-inplace-strides.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "rdft.h" + +/* Check if the vecsz/sz strides are consistent with the problem + being in-place for vecsz.dim[vdim], or for all dimensions + if vdim == RNK_MINFTY. We can't just use tensor_inplace_strides + because rdft transforms have the unfortunate property of + differing input and output sizes. This routine is not + exhaustive; we only return 1 for the most common case. */ +int X(rdft2_inplace_strides)(const problem_rdft2 *p, int vdim) +{ + INT N, Nc; + INT rs, cs; + int i; + + for (i = 0; i + 1 < p->sz->rnk; ++i) + if (p->sz->dims[i].is != p->sz->dims[i].os) + return 0; + + if (!FINITE_RNK(p->vecsz->rnk) || p->vecsz->rnk == 0) + return 1; + if (!FINITE_RNK(vdim)) { /* check all vector dimensions */ + for (vdim = 0; vdim < p->vecsz->rnk; ++vdim) + if (!X(rdft2_inplace_strides)(p, vdim)) + return 0; + return 1; + } + + A(vdim < p->vecsz->rnk); + if (p->sz->rnk == 0) + return(p->vecsz->dims[vdim].is == p->vecsz->dims[vdim].os); + + N = X(tensor_sz)(p->sz); + Nc = (N / p->sz->dims[p->sz->rnk-1].n) * + (p->sz->dims[p->sz->rnk-1].n/2 + 1); + X(rdft2_strides)(p->kind, p->sz->dims + p->sz->rnk - 1, &rs, &cs); + + /* the factor of 2 comes from the fact that RS is the stride + of p->r0 and p->r1, which is twice as large as the strides + in the r2r case */ + return(p->vecsz->dims[vdim].is == p->vecsz->dims[vdim].os + && (X(iabs)(2 * p->vecsz->dims[vdim].os) + >= X(imax)(2 * Nc * X(iabs)(cs), N * X(iabs)(rs)))); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/rdft2-rdft.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/rdft2-rdft.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "rdft.h" + +typedef struct { + solver super; +} S; + +typedef struct { + plan_rdft2 super; + + plan *cld, *cldrest; + INT n, vl, nbuf, bufdist; + INT cs, ivs, ovs; +} P; + +/***************************************************************************/ + +/* FIXME: have alternate copy functions that push a vector loop inside + the n loops? */ + +/* copy halfcomplex array r (contiguous) to complex (strided) array rio/iio. */ +static void hc2c(INT n, R *r, R *rio, R *iio, INT os) +{ + INT i; + + rio[0] = r[0]; + iio[0] = 0; + + for (i = 1; i + i < n; ++i) { + rio[i * os] = r[i]; + iio[i * os] = r[n - i]; + } + + if (i + i == n) { /* store the Nyquist frequency */ + rio[i * os] = r[i]; + iio[i * os] = K(0.0); + } +} + +/* reverse of hc2c */ +static void c2hc(INT n, R *rio, R *iio, INT is, R *r) +{ + INT i; + + r[0] = rio[0]; + + for (i = 1; i + i < n; ++i) { + r[i] = rio[i * is]; + r[n - i] = iio[i * is]; + } + + if (i + i == n) /* store the Nyquist frequency */ + r[i] = rio[i * is]; +} + +/***************************************************************************/ + +static void apply_r2hc(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld = (plan_rdft *) ego->cld; + INT i, j, vl = ego->vl, nbuf = ego->nbuf, bufdist = ego->bufdist; + INT n = ego->n; + INT ivs = ego->ivs, ovs = ego->ovs, os = ego->cs; + R *bufs = (R *)MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS); + plan_rdft2 *cldrest; + + for (i = nbuf; i <= vl; i += nbuf) { + /* transform to bufs: */ + cld->apply((plan *) cld, r0, bufs); + r0 += ivs * nbuf; r1 += ivs * nbuf; + + /* copy back */ + for (j = 0; j < nbuf; ++j, cr += ovs, ci += ovs) + hc2c(n, bufs + j*bufdist, cr, ci, os); + } + + X(ifree)(bufs); + + /* Do the remaining transforms, if any: */ + cldrest = (plan_rdft2 *) ego->cldrest; + cldrest->apply((plan *) cldrest, r0, r1, cr, ci); +} + +static void apply_hc2r(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld = (plan_rdft *) ego->cld; + INT i, j, vl = ego->vl, nbuf = ego->nbuf, bufdist = ego->bufdist; + INT n = ego->n; + INT ivs = ego->ivs, ovs = ego->ovs, is = ego->cs; + R *bufs = (R *)MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS); + plan_rdft2 *cldrest; + + for (i = nbuf; i <= vl; i += nbuf) { + /* copy to bufs */ + for (j = 0; j < nbuf; ++j, cr += ivs, ci += ivs) + c2hc(n, cr, ci, is, bufs + j*bufdist); + + /* transform back: */ + cld->apply((plan *) cld, bufs, r0); + r0 += ovs * nbuf; r1 += ovs * nbuf; + } + + X(ifree)(bufs); + + /* Do the remaining transforms, if any: */ + cldrest = (plan_rdft2 *) ego->cldrest; + cldrest->apply((plan *) cldrest, r0, r1, cr, ci); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + + X(plan_awake)(ego->cld, wakefulness); + X(plan_awake)(ego->cldrest, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cldrest); + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(rdft2-rdft-%s-%D%v/%D-%D%(%p%)%(%p%))", + ego->super.apply == apply_r2hc ? "r2hc" : "hc2r", + ego->n, ego->nbuf, + ego->vl, ego->bufdist % ego->n, + ego->cld, ego->cldrest); +} + +static INT min_nbuf(const problem_rdft2 *p, INT n, INT vl) +{ + INT is, os, ivs, ovs; + + if (p->r0 != p->cr) + return 1; + if (X(rdft2_inplace_strides(p, RNK_MINFTY))) + return 1; + A(p->vecsz->rnk == 1); /* rank 0 and MINFTY are inplace */ + + X(rdft2_strides)(p->kind, p->sz->dims, &is, &os); + X(rdft2_strides)(p->kind, p->vecsz->dims, &ivs, &ovs); + + /* handle one potentially common case: "contiguous" real and + complex arrays, which overlap because of the differing sizes. */ + if (n * X(iabs)(is) <= X(iabs)(ivs) + && (n/2 + 1) * X(iabs)(os) <= X(iabs)(ovs) + && ( ((p->cr - p->ci) <= X(iabs)(os)) || + ((p->ci - p->cr) <= X(iabs)(os)) ) + && ivs > 0 && ovs > 0) { + INT vsmin = X(imin)(ivs, ovs); + INT vsmax = X(imax)(ivs, ovs); + return(((vsmax - vsmin) * vl + vsmin - 1) / vsmin); + } + + return vl; /* punt: just buffer the whole vector */ +} + +static int applicable0(const problem *p_, const S *ego, const planner *plnr) +{ + const problem_rdft2 *p = (const problem_rdft2 *) p_; + UNUSED(ego); + return(1 + && p->vecsz->rnk <= 1 + && p->sz->rnk == 1 + + /* FIXME: does it make sense to do R2HCII ? */ + && (p->kind == R2HC || p->kind == HC2R) + + /* real strides must allow for reduction to rdft */ + && (2 * (p->r1 - p->r0) == + (((p->kind == R2HC) ? p->sz->dims[0].is : p->sz->dims[0].os))) + + && !(X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr)) + ); +} + +static int applicable(const problem *p_, const S *ego, const planner *plnr) +{ + const problem_rdft2 *p; + + if (NO_BUFFERINGP(plnr)) return 0; + + if (!applicable0(p_, ego, plnr)) return 0; + + p = (const problem_rdft2 *) p_; + if (NO_UGLYP(plnr)) { + if (p->r0 != p->cr) return 0; + if (X(toobig)(p->sz->dims[0].n)) return 0; + } + return 1; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + P *pln; + plan *cld = (plan *) 0; + plan *cldrest = (plan *) 0; + const problem_rdft2 *p = (const problem_rdft2 *) p_; + R *bufs = (R *) 0; + INT nbuf = 0, bufdist, n, vl; + INT ivs, ovs, rs, id, od; + + static const plan_adt padt = { + X(rdft2_solve), awake, print, destroy + }; + + if (!applicable(p_, ego, plnr)) + goto nada; + + n = p->sz->dims[0].n; + X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs); + + nbuf = X(imax)(X(nbuf)(n, vl, 0), min_nbuf(p, n, vl)); + bufdist = X(bufdist)(n, vl); + A(nbuf > 0); + + /* initial allocation for the purpose of planning */ + bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS); + + id = ivs * (nbuf * (vl / nbuf)); + od = ovs * (nbuf * (vl / nbuf)); + + if (p->kind == R2HC) { + cld = X(mkplan_f_d)( + plnr, + X(mkproblem_rdft_d)( + X(mktensor_1d)(n, p->sz->dims[0].is/2, 1), + X(mktensor_1d)(nbuf, ivs, bufdist), + TAINT(p->r0, ivs * nbuf), bufs, &p->kind), + 0, 0, (p->r0 == p->cr) ? NO_DESTROY_INPUT : 0); + if (!cld) goto nada; + X(ifree)(bufs); bufs = 0; + + cldrest = X(mkplan_d)(plnr, + X(mkproblem_rdft2_d)( + X(tensor_copy)(p->sz), + X(mktensor_1d)(vl % nbuf, ivs, ovs), + p->r0 + id, p->r1 + id, + p->cr + od, p->ci + od, + p->kind)); + if (!cldrest) goto nada; + + pln = MKPLAN_RDFT2(P, &padt, apply_r2hc); + } else { + A(p->kind == HC2R); + cld = X(mkplan_f_d)( + plnr, + X(mkproblem_rdft_d)( + X(mktensor_1d)(n, 1, p->sz->dims[0].os/2), + X(mktensor_1d)(nbuf, bufdist, ovs), + bufs, TAINT(p->r0, ovs * nbuf), &p->kind), + 0, 0, NO_DESTROY_INPUT); /* always ok to destroy bufs */ + if (!cld) goto nada; + X(ifree)(bufs); bufs = 0; + + cldrest = X(mkplan_d)(plnr, + X(mkproblem_rdft2_d)( + X(tensor_copy)(p->sz), + X(mktensor_1d)(vl % nbuf, ivs, ovs), + p->r0 + od, p->r1 + od, + p->cr + id, p->ci + id, + p->kind)); + if (!cldrest) goto nada; + pln = MKPLAN_RDFT2(P, &padt, apply_hc2r); + } + + pln->cld = cld; + pln->cldrest = cldrest; + pln->n = n; + pln->vl = vl; + pln->ivs = ivs; + pln->ovs = ovs; + X(rdft2_strides)(p->kind, &p->sz->dims[0], &rs, &pln->cs); + pln->nbuf = nbuf; + pln->bufdist = bufdist; + + X(ops_madd)(vl / nbuf, &cld->ops, &cldrest->ops, + &pln->super.super.ops); + pln->super.super.ops.other += (p->kind == R2HC ? (n + 2) : n) * vl; + + return &(pln->super.super); + + nada: + X(ifree0)(bufs); + X(plan_destroy_internal)(cldrest); + X(plan_destroy_internal)(cld); + return (plan *) 0; +} + +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_RDFT2, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return &(slv->super); +} + +void X(rdft2_rdft_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/rdft2-strides.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/rdft2-strides.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "rdft.h" + +/* Deal with annoyance because the tensor (is,os) applies to + (r,rio/iio) for R2HC and vice-versa for HC2R. We originally had + (is,os) always apply to (r,rio/iio), but this causes other + headaches with the tensor functions. */ +void X(rdft2_strides)(rdft_kind kind, const iodim *d, INT *rs, INT *cs) +{ + if (kind == R2HC) { + *rs = d->is; + *cs = d->os; + } + else { + A(kind == HC2R); + *rs = d->os; + *cs = d->is; + } +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/rdft2-tensor-max-index.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/rdft2-tensor-max-index.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "rdft.h" + +/* like X(tensor_max_index), but takes into account the special n/2+1 + final dimension for the complex output/input of an R2HC/HC2R transform. */ +INT X(rdft2_tensor_max_index)(const tensor *sz, rdft_kind k) +{ + int i; + INT n = 0; + + A(FINITE_RNK(sz->rnk)); + for (i = 0; i + 1 < sz->rnk; ++i) { + const iodim *p = sz->dims + i; + n += (p->n - 1) * X(imax)(X(iabs)(p->is), X(iabs)(p->os)); + } + if (i < sz->rnk) { + const iodim *p = sz->dims + i; + INT is, os; + X(rdft2_strides)(k, p, &is, &os); + n += X(imax)((p->n - 1) * X(iabs)(is), (p->n/2) * X(iabs)(os)); + } + return n; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,7 @@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/rdft +SUBDIRS = r2cf r2cb r2r +noinst_LTLIBRARIES = librdft_scalar.la + +librdft_scalar_la_SOURCES = hb.h r2cb.h r2cbIII.h hf.h hfb.c r2c.c \ +r2cf.h r2cfII.h r2r.c r2r.h hc2c.c hc2cf.h hc2cb.h + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,730 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = rdft/scalar +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +librdft_scalar_la_LIBADD = +am_librdft_scalar_la_OBJECTS = hfb.lo r2c.lo r2r.lo hc2c.lo +librdft_scalar_la_OBJECTS = $(am_librdft_scalar_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(librdft_scalar_la_SOURCES) +DIST_SOURCES = $(librdft_scalar_la_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/rdft +SUBDIRS = r2cf r2cb r2r +noinst_LTLIBRARIES = librdft_scalar.la +librdft_scalar_la_SOURCES = hb.h r2cb.h r2cbIII.h hf.h hfb.c r2c.c \ +r2cf.h r2cfII.h r2r.c r2r.h hc2c.c hc2cf.h hc2cb.h + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu rdft/scalar/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu rdft/scalar/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +librdft_scalar.la: $(librdft_scalar_la_OBJECTS) $(librdft_scalar_la_DEPENDENCIES) $(EXTRA_librdft_scalar_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(librdft_scalar_la_OBJECTS) $(librdft_scalar_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2c.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hfb.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2c.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2r.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(LTLIBRARIES) +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool \ + clean-noinstLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/hb.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/hb.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#define GENUS X(rdft_hb_genus) +extern const hc2hc_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/hc2c.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/hc2c.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "codelet-rdft.h" +#include "hc2cf.h" + +static int okp(const R *Rp, const R *Ip, const R *Rm, const R *Im, + INT rs, INT mb, INT me, INT ms, + const planner *plnr) +{ + UNUSED(Rp); UNUSED(Ip); UNUSED(Rm); UNUSED(Im); + UNUSED(rs); UNUSED(mb); UNUSED(me); UNUSED(ms); UNUSED(plnr); + + return 1; +} + +const hc2c_genus GENUS = { okp, R2HC, 1 }; + +#undef GENUS +#include "hc2cb.h" + +const hc2c_genus GENUS = { okp, HC2R, 1 }; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/hc2cb.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/hc2cb.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#define GENUS X(rdft_hc2cb_genus) +extern const hc2c_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/hc2cf.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/hc2cf.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#define GENUS X(rdft_hc2cf_genus) +extern const hc2c_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/hf.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/hf.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#define GENUS X(rdft_hf_genus) +extern const hc2hc_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/hfb.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/hfb.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "codelet-rdft.h" +#include "hf.h" + +const hc2hc_genus GENUS = { R2HC, 1 }; + +#undef GENUS +#include "hb.h" + +const hc2hc_genus GENUS = { HC2R, 1 }; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2c.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2c.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "codelet-rdft.h" + +#include "r2cf.h" +const kr2c_genus GENUS = { R2HC, 1 }; +#undef GENUS + +#include "r2cfII.h" +const kr2c_genus GENUS = { R2HCII, 1 }; +#undef GENUS + +#include "r2cb.h" +const kr2c_genus GENUS = { HC2R, 1 }; +#undef GENUS + +#include "r2cbIII.h" +const kr2c_genus GENUS = { HC2RIII, 1 }; +#undef GENUS diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#define GENUS X(rdft_r2cb_genus) +extern const kr2c_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,110 @@ +# This Makefile.am specifies a set of codelets, efficient transforms +# of small sizes, that are used as building blocks (kernels) by FFTW +# to build up large transforms, as well as the options for generating +# and compiling them. + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +########################################################################### +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/rdft \ +-I$(top_srcdir)/rdft/scalar +noinst_LTLIBRARIES = librdft_scalar_r2cb.la + +########################################################################### +# r2cb_ is a hard-coded complex-to-real FFT of size (base cases +# of real-output FFT recursion) +R2CB = r2cb_2.c r2cb_3.c r2cb_4.c r2cb_5.c r2cb_6.c r2cb_7.c r2cb_8.c \ +r2cb_9.c r2cb_10.c r2cb_11.c r2cb_12.c r2cb_13.c r2cb_14.c r2cb_15.c \ +r2cb_16.c r2cb_32.c r2cb_64.c r2cb_128.c r2cb_20.c r2cb_25.c +# r2cb_30.c r2cb_40.c r2cb_50.c + +########################################################################### +# hb_ is a "twiddle" FFT of size , implementing a radix-r DIF +# step for a real-output FFT. Every hb codelet must have a +# corresponding r2cbIII codelet (see below)! +HB = hb_2.c hb_3.c hb_4.c hb_5.c hb_6.c hb_7.c hb_8.c hb_9.c \ +hb_10.c hb_12.c hb_15.c hb_16.c hb_32.c hb_64.c \ +hb_20.c hb_25.c # hb_30.c hb_40.c hb_50.c + +# like hb, but generates part of its trig table on the fly (good for large n) +HB2 = hb2_4.c hb2_8.c hb2_16.c hb2_32.c \ +hb2_5.c hb2_20.c hb2_25.c + +# an r2cb transform where the output is shifted by half a sample (input +# is multiplied by a phase). This is needed as part of the DIF recursion; +# every hb_ or hb2_ codelet should have a corresponding r2cbIII_ +R2CBIII = r2cbIII_2.c r2cbIII_3.c r2cbIII_4.c r2cbIII_5.c r2cbIII_6.c \ +r2cbIII_7.c r2cbIII_8.c r2cbIII_9.c r2cbIII_10.c r2cbIII_12.c \ +r2cbIII_15.c r2cbIII_16.c r2cbIII_32.c r2cbIII_64.c \ +r2cbIII_20.c r2cbIII_25.c # r2cbIII_30.c r2cbIII_40.c r2cbIII_50.c + +########################################################################### +# hc2cb_ is a "twiddle" FFT of size , implementing a radix-r DIF +# step for a real-input FFT with rdft2-style output. must be even. +HC2CB = hc2cb_2.c hc2cb_4.c hc2cb_6.c hc2cb_8.c hc2cb_10.c hc2cb_12.c \ +hc2cb_16.c hc2cb_32.c \ +hc2cb_20.c # hc2cb_30.c + +HC2CBDFT = hc2cbdft_2.c hc2cbdft_4.c hc2cbdft_6.c hc2cbdft_8.c \ +hc2cbdft_10.c hc2cbdft_12.c hc2cbdft_16.c hc2cbdft_32.c \ +hc2cbdft_20.c # hc2cbdft_30.c + +# like hc2cb, but generates part of its trig table on the fly (good +# for large n) +HC2CB2 = hc2cb2_4.c hc2cb2_8.c hc2cb2_16.c hc2cb2_32.c \ +hc2cb2_20.c # hc2cb2_30.c +HC2CBDFT2 = hc2cbdft2_4.c hc2cbdft2_8.c hc2cbdft2_16.c hc2cbdft2_32.c \ +hc2cbdft2_20.c # hc2cbdft2_30.c + +########################################################################### +ALL_CODELETS = $(R2CB) $(HB) $(HB2) $(R2CBIII) $(HC2CB) $(HC2CB2) \ +$(HC2CBDFT) $(HC2CBDFT2) + +BUILT_SOURCES= $(ALL_CODELETS) $(CODLIST) + +librdft_scalar_r2cb_la_SOURCES = $(BUILT_SOURCES) + +SOLVTAB_NAME = X(solvtab_rdft_r2cb) +XRENAME=X + +# special rules for regenerating codelets. +include $(top_srcdir)/support/Makefile.codelets + +if MAINTAINER_MODE +FLAGS_R2CB=$(RDFT_FLAGS_COMMON) -sign 1 +FLAGS_HB=$(RDFT_FLAGS_COMMON) -sign 1 +FLAGS_HB2=$(RDFT_FLAGS_COMMON) -sign 1 -twiddle-log3 -precompute-twiddles +FLAGS_HC2CB=$(RDFT_FLAGS_COMMON) -sign 1 +FLAGS_HC2CB2=$(RDFT_FLAGS_COMMON) -sign 1 -twiddle-log3 -precompute-twiddles +FLAGS_R2CBIII=$(RDFT_FLAGS_COMMON) -sign 1 + +r2cb_%.c: $(CODELET_DEPS) $(GEN_R2CB) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2CB) $(FLAGS_R2CB) -n $* -name r2cb_$* -include "r2cb.h") | $(ADD_DATE) | $(INDENT) >$@ + +hb_%.c: $(CODELET_DEPS) $(GEN_HC2HC) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2HC) $(FLAGS_HB) -n $* -dif -name hb_$* -include "hb.h") | $(ADD_DATE) | $(INDENT) >$@ + +hb2_%.c: $(CODELET_DEPS) $(GEN_HC2HC) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2HC) $(FLAGS_HB2) -n $* -dif -name hb2_$* -include "hb.h") | $(ADD_DATE) | $(INDENT) >$@ + +r2cbIII_%.c: $(CODELET_DEPS) $(GEN_R2CB) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2CB) $(FLAGS_R2CB) -n $* -name r2cbIII_$* -dft-III -include "r2cbIII.h") | $(ADD_DATE) | $(INDENT) >$@ + +hc2cb_%.c: $(CODELET_DEPS) $(GEN_HC2C) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2C) $(FLAGS_HC2CB) -n $* -dif -name hc2cb_$* -include "hc2cb.h") | $(ADD_DATE) | $(INDENT) >$@ + +hc2cb2_%.c: $(CODELET_DEPS) $(GEN_HC2C) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2C) $(FLAGS_HC2CB2) -n $* -dif -name hc2cb2_$* -include "hc2cb.h") | $(ADD_DATE) | $(INDENT) >$@ + +hc2cbdft_%.c: $(CODELET_DEPS) $(GEN_HC2CDFT) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2CDFT) $(FLAGS_HC2CB) -n $* -dif -name hc2cbdft_$* -include "hc2cb.h") | $(ADD_DATE) | $(INDENT) >$@ + +hc2cbdft2_%.c: $(CODELET_DEPS) $(GEN_HC2CDFT) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2CDFT) $(FLAGS_HC2CB) -n $* -dif -name hc2cbdft2_$* -include "hc2cb.h") | $(ADD_DATE) | $(INDENT) >$@ + +endif # MAINTAINER_MODE diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,907 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# This Makefile.am specifies a set of codelets, efficient transforms +# of small sizes, that are used as building blocks (kernels) by FFTW +# to build up large transforms, as well as the options for generating +# and compiling them. + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +# -*- makefile -*- +# This file contains special make rules to generate codelets. +# Most of this file requires GNU make . + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +DIST_COMMON = $(top_srcdir)/support/Makefile.codelets \ + $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp +subdir = rdft/scalar/r2cb +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +librdft_scalar_r2cb_la_LIBADD = +am__objects_1 = r2cb_2.lo r2cb_3.lo r2cb_4.lo r2cb_5.lo r2cb_6.lo \ + r2cb_7.lo r2cb_8.lo r2cb_9.lo r2cb_10.lo r2cb_11.lo r2cb_12.lo \ + r2cb_13.lo r2cb_14.lo r2cb_15.lo r2cb_16.lo r2cb_32.lo \ + r2cb_64.lo r2cb_128.lo r2cb_20.lo r2cb_25.lo +am__objects_2 = hb_2.lo hb_3.lo hb_4.lo hb_5.lo hb_6.lo hb_7.lo \ + hb_8.lo hb_9.lo hb_10.lo hb_12.lo hb_15.lo hb_16.lo hb_32.lo \ + hb_64.lo hb_20.lo hb_25.lo +am__objects_3 = hb2_4.lo hb2_8.lo hb2_16.lo hb2_32.lo hb2_5.lo \ + hb2_20.lo hb2_25.lo +am__objects_4 = r2cbIII_2.lo r2cbIII_3.lo r2cbIII_4.lo r2cbIII_5.lo \ + r2cbIII_6.lo r2cbIII_7.lo r2cbIII_8.lo r2cbIII_9.lo \ + r2cbIII_10.lo r2cbIII_12.lo r2cbIII_15.lo r2cbIII_16.lo \ + r2cbIII_32.lo r2cbIII_64.lo r2cbIII_20.lo r2cbIII_25.lo +am__objects_5 = hc2cb_2.lo hc2cb_4.lo hc2cb_6.lo hc2cb_8.lo \ + hc2cb_10.lo hc2cb_12.lo hc2cb_16.lo hc2cb_32.lo hc2cb_20.lo +am__objects_6 = hc2cb2_4.lo hc2cb2_8.lo hc2cb2_16.lo hc2cb2_32.lo \ + hc2cb2_20.lo +am__objects_7 = hc2cbdft_2.lo hc2cbdft_4.lo hc2cbdft_6.lo \ + hc2cbdft_8.lo hc2cbdft_10.lo hc2cbdft_12.lo hc2cbdft_16.lo \ + hc2cbdft_32.lo hc2cbdft_20.lo +am__objects_8 = hc2cbdft2_4.lo hc2cbdft2_8.lo hc2cbdft2_16.lo \ + hc2cbdft2_32.lo hc2cbdft2_20.lo +am__objects_9 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \ + $(am__objects_4) $(am__objects_5) $(am__objects_6) \ + $(am__objects_7) $(am__objects_8) +am__objects_10 = codlist.lo +am__objects_11 = $(am__objects_9) $(am__objects_10) +am_librdft_scalar_r2cb_la_OBJECTS = $(am__objects_11) +librdft_scalar_r2cb_la_OBJECTS = $(am_librdft_scalar_r2cb_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(librdft_scalar_r2cb_la_SOURCES) +DIST_SOURCES = $(librdft_scalar_r2cb_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ + +########################################################################### +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/rdft \ +-I$(top_srcdir)/rdft/scalar + +noinst_LTLIBRARIES = librdft_scalar_r2cb.la + +########################################################################### +# r2cb_ is a hard-coded complex-to-real FFT of size (base cases +# of real-output FFT recursion) +R2CB = r2cb_2.c r2cb_3.c r2cb_4.c r2cb_5.c r2cb_6.c r2cb_7.c r2cb_8.c \ +r2cb_9.c r2cb_10.c r2cb_11.c r2cb_12.c r2cb_13.c r2cb_14.c r2cb_15.c \ +r2cb_16.c r2cb_32.c r2cb_64.c r2cb_128.c r2cb_20.c r2cb_25.c + +# r2cb_30.c r2cb_40.c r2cb_50.c + +########################################################################### +# hb_ is a "twiddle" FFT of size , implementing a radix-r DIF +# step for a real-output FFT. Every hb codelet must have a +# corresponding r2cbIII codelet (see below)! +HB = hb_2.c hb_3.c hb_4.c hb_5.c hb_6.c hb_7.c hb_8.c hb_9.c \ +hb_10.c hb_12.c hb_15.c hb_16.c hb_32.c hb_64.c \ +hb_20.c hb_25.c # hb_30.c hb_40.c hb_50.c + + +# like hb, but generates part of its trig table on the fly (good for large n) +HB2 = hb2_4.c hb2_8.c hb2_16.c hb2_32.c \ +hb2_5.c hb2_20.c hb2_25.c + + +# an r2cb transform where the output is shifted by half a sample (input +# is multiplied by a phase). This is needed as part of the DIF recursion; +# every hb_ or hb2_ codelet should have a corresponding r2cbIII_ +R2CBIII = r2cbIII_2.c r2cbIII_3.c r2cbIII_4.c r2cbIII_5.c r2cbIII_6.c \ +r2cbIII_7.c r2cbIII_8.c r2cbIII_9.c r2cbIII_10.c r2cbIII_12.c \ +r2cbIII_15.c r2cbIII_16.c r2cbIII_32.c r2cbIII_64.c \ +r2cbIII_20.c r2cbIII_25.c # r2cbIII_30.c r2cbIII_40.c r2cbIII_50.c + + +########################################################################### +# hc2cb_ is a "twiddle" FFT of size , implementing a radix-r DIF +# step for a real-input FFT with rdft2-style output. must be even. +HC2CB = hc2cb_2.c hc2cb_4.c hc2cb_6.c hc2cb_8.c hc2cb_10.c hc2cb_12.c \ +hc2cb_16.c hc2cb_32.c \ +hc2cb_20.c # hc2cb_30.c + +HC2CBDFT = hc2cbdft_2.c hc2cbdft_4.c hc2cbdft_6.c hc2cbdft_8.c \ +hc2cbdft_10.c hc2cbdft_12.c hc2cbdft_16.c hc2cbdft_32.c \ +hc2cbdft_20.c # hc2cbdft_30.c + + +# like hc2cb, but generates part of its trig table on the fly (good +# for large n) +HC2CB2 = hc2cb2_4.c hc2cb2_8.c hc2cb2_16.c hc2cb2_32.c \ +hc2cb2_20.c # hc2cb2_30.c + +HC2CBDFT2 = hc2cbdft2_4.c hc2cbdft2_8.c hc2cbdft2_16.c hc2cbdft2_32.c \ +hc2cbdft2_20.c # hc2cbdft2_30.c + + +########################################################################### +ALL_CODELETS = $(R2CB) $(HB) $(HB2) $(R2CBIII) $(HC2CB) $(HC2CB2) \ +$(HC2CBDFT) $(HC2CBDFT2) + +BUILT_SOURCES = $(ALL_CODELETS) $(CODLIST) +librdft_scalar_r2cb_la_SOURCES = $(BUILT_SOURCES) +SOLVTAB_NAME = X(solvtab_rdft_r2cb) +XRENAME = X +CODLIST = codlist.c +CODELET_NAME = codelet_ +@MAINTAINER_MODE_TRUE@INDENT = indent -kr -cs -i5 -l800 -fca -nfc1 -sc -sob -cli4 -TR -Tplanner -TV +@MAINTAINER_MODE_TRUE@TWOVERS = sh ${top_srcdir}/support/twovers.sh +@MAINTAINER_MODE_TRUE@GENFFTDIR = ${top_builddir}/genfft +@MAINTAINER_MODE_TRUE@GEN_NOTW = ${GENFFTDIR}/gen_notw.native +@MAINTAINER_MODE_TRUE@GEN_NOTW_C = ${GENFFTDIR}/gen_notw_c.native +@MAINTAINER_MODE_TRUE@GEN_TWIDDLE = ${GENFFTDIR}/gen_twiddle.native +@MAINTAINER_MODE_TRUE@GEN_TWIDDLE_C = ${GENFFTDIR}/gen_twiddle_c.native +@MAINTAINER_MODE_TRUE@GEN_TWIDSQ = ${GENFFTDIR}/gen_twidsq.native +@MAINTAINER_MODE_TRUE@GEN_TWIDSQ_C = ${GENFFTDIR}/gen_twidsq_c.native +@MAINTAINER_MODE_TRUE@GEN_R2CF = ${GENFFTDIR}/gen_r2cf.native +@MAINTAINER_MODE_TRUE@GEN_R2CB = ${GENFFTDIR}/gen_r2cb.native +@MAINTAINER_MODE_TRUE@GEN_HC2HC = ${GENFFTDIR}/gen_hc2hc.native +@MAINTAINER_MODE_TRUE@GEN_HC2C = ${GENFFTDIR}/gen_hc2c.native +@MAINTAINER_MODE_TRUE@GEN_HC2CDFT = ${GENFFTDIR}/gen_hc2cdft.native +@MAINTAINER_MODE_TRUE@GEN_HC2CDFT_C = ${GENFFTDIR}/gen_hc2cdft_c.native +@MAINTAINER_MODE_TRUE@GEN_R2R = ${GENFFTDIR}/gen_r2r.native +@MAINTAINER_MODE_TRUE@PRELUDE_DFT = ${top_srcdir}/support/codelet_prelude.dft +@MAINTAINER_MODE_TRUE@PRELUDE_RDFT = ${top_srcdir}/support/codelet_prelude.rdft +@MAINTAINER_MODE_TRUE@ADD_DATE = sed -e s/@DATE@/"`date`"/ +@MAINTAINER_MODE_TRUE@COPYRIGHT = ${top_srcdir}/COPYRIGHT +@MAINTAINER_MODE_TRUE@CODELET_DEPS = $(COPYRIGHT) $(PRELUDE) +@MAINTAINER_MODE_TRUE@PRELUDE_COMMANDS_DFT = cat $(COPYRIGHT) $(PRELUDE_DFT) +@MAINTAINER_MODE_TRUE@PRELUDE_COMMANDS_RDFT = cat $(COPYRIGHT) $(PRELUDE_RDFT) +@MAINTAINER_MODE_TRUE@FLAGS_COMMON = -compact -variables 4 +@MAINTAINER_MODE_TRUE@DFT_FLAGS_COMMON = $(FLAGS_COMMON) -pipeline-latency 4 +@MAINTAINER_MODE_TRUE@RDFT_FLAGS_COMMON = $(FLAGS_COMMON) -pipeline-latency 4 + +# special rules for regenerating codelets. +@MAINTAINER_MODE_TRUE@FLAGS_R2CB = $(RDFT_FLAGS_COMMON) -sign 1 +@MAINTAINER_MODE_TRUE@FLAGS_HB = $(RDFT_FLAGS_COMMON) -sign 1 +@MAINTAINER_MODE_TRUE@FLAGS_HB2 = $(RDFT_FLAGS_COMMON) -sign 1 -twiddle-log3 -precompute-twiddles +@MAINTAINER_MODE_TRUE@FLAGS_HC2CB = $(RDFT_FLAGS_COMMON) -sign 1 +@MAINTAINER_MODE_TRUE@FLAGS_HC2CB2 = $(RDFT_FLAGS_COMMON) -sign 1 -twiddle-log3 -precompute-twiddles +@MAINTAINER_MODE_TRUE@FLAGS_R2CBIII = $(RDFT_FLAGS_COMMON) -sign 1 +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/support/Makefile.codelets $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu rdft/scalar/r2cb/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu rdft/scalar/r2cb/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(top_srcdir)/support/Makefile.codelets: + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +librdft_scalar_r2cb.la: $(librdft_scalar_r2cb_la_OBJECTS) $(librdft_scalar_r2cb_la_DEPENDENCIES) $(EXTRA_librdft_scalar_r2cb_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(librdft_scalar_r2cb_la_OBJECTS) $(librdft_scalar_r2cb_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/codlist.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb2_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb2_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb2_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb2_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb2_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb2_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb2_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hb_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cb2_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cb2_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cb2_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cb2_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cb2_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cb_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cb_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cb_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cb_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cb_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cb_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cb_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cb_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cb_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdft2_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdft2_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdft2_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdft2_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdft2_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdft_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdft_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdft_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdft_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdft_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdft_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdft_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdft_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdft_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cbIII_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cbIII_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cbIII_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cbIII_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cbIII_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cbIII_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cbIII_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cbIII_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cbIII_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cbIII_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cbIII_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cbIII_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cbIII_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cbIII_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cbIII_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cbIII_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_11.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_128.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_13.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cb_9.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic \ + maintainer-clean-local + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: all check install install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic maintainer-clean-local mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am + + +# rule to build codlist +$(CODLIST): Makefile + ( \ + echo "#include \"ifftw.h\""; \ + echo $(INCLUDE_SIMD_HEADER); \ + echo; \ + for i in $(ALL_CODELETS) NIL; do \ + if test "$$i" != NIL; then \ + j=`basename $$i | sed -e 's/[.][cS]$$//g'`; \ + echo "extern void $(XRENAME)($(CODELET_NAME)$$j)(planner *);"; \ + fi \ + done; \ + echo; \ + echo; \ + echo "extern const solvtab $(SOLVTAB_NAME);"; \ + echo "const solvtab $(SOLVTAB_NAME) = {"; \ + for i in $(ALL_CODELETS) NIL; do \ + if test "$$i" != NIL; then \ + j=`basename $$i | sed -e 's/[.][cS]$$//g'`; \ + echo " SOLVTAB($(XRENAME)($(CODELET_NAME)$$j)),"; \ + fi \ + done; \ + echo " SOLVTAB_END"; \ + echo "};"; \ + ) >$@ + +# only delete codlist.c in maintainer-mode, since it is included in the dist +# FIXME: is there a way to delete in 'make clean' only when builddir != srcdir? +maintainer-clean-local: + rm -f $(CODLIST) + +# cancel the hideous builtin rules that cause an infinite loop +@MAINTAINER_MODE_TRUE@%: %.o +@MAINTAINER_MODE_TRUE@%: %.s +@MAINTAINER_MODE_TRUE@%: %.c +@MAINTAINER_MODE_TRUE@%: %.S + +@MAINTAINER_MODE_TRUE@r2cb_%.c: $(CODELET_DEPS) $(GEN_R2CB) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2CB) $(FLAGS_R2CB) -n $* -name r2cb_$* -include "r2cb.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@hb_%.c: $(CODELET_DEPS) $(GEN_HC2HC) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2HC) $(FLAGS_HB) -n $* -dif -name hb_$* -include "hb.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@hb2_%.c: $(CODELET_DEPS) $(GEN_HC2HC) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2HC) $(FLAGS_HB2) -n $* -dif -name hb2_$* -include "hb.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@r2cbIII_%.c: $(CODELET_DEPS) $(GEN_R2CB) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2CB) $(FLAGS_R2CB) -n $* -name r2cbIII_$* -dft-III -include "r2cbIII.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@hc2cb_%.c: $(CODELET_DEPS) $(GEN_HC2C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2C) $(FLAGS_HC2CB) -n $* -dif -name hc2cb_$* -include "hc2cb.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@hc2cb2_%.c: $(CODELET_DEPS) $(GEN_HC2C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2C) $(FLAGS_HC2CB2) -n $* -dif -name hc2cb2_$* -include "hc2cb.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@hc2cbdft_%.c: $(CODELET_DEPS) $(GEN_HC2CDFT) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2CDFT) $(FLAGS_HC2CB) -n $* -dif -name hc2cbdft_$* -include "hc2cb.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@hc2cbdft2_%.c: $(CODELET_DEPS) $(GEN_HC2CDFT) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2CDFT) $(FLAGS_HC2CB) -n $* -dif -name hc2cbdft2_$* -include "hc2cb.h") | $(ADD_DATE) | $(INDENT) >$@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/codlist.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/codlist.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,183 @@ +#include "ifftw.h" + + +extern void X(codelet_r2cb_2)(planner *); +extern void X(codelet_r2cb_3)(planner *); +extern void X(codelet_r2cb_4)(planner *); +extern void X(codelet_r2cb_5)(planner *); +extern void X(codelet_r2cb_6)(planner *); +extern void X(codelet_r2cb_7)(planner *); +extern void X(codelet_r2cb_8)(planner *); +extern void X(codelet_r2cb_9)(planner *); +extern void X(codelet_r2cb_10)(planner *); +extern void X(codelet_r2cb_11)(planner *); +extern void X(codelet_r2cb_12)(planner *); +extern void X(codelet_r2cb_13)(planner *); +extern void X(codelet_r2cb_14)(planner *); +extern void X(codelet_r2cb_15)(planner *); +extern void X(codelet_r2cb_16)(planner *); +extern void X(codelet_r2cb_32)(planner *); +extern void X(codelet_r2cb_64)(planner *); +extern void X(codelet_r2cb_128)(planner *); +extern void X(codelet_r2cb_20)(planner *); +extern void X(codelet_r2cb_25)(planner *); +extern void X(codelet_hb_2)(planner *); +extern void X(codelet_hb_3)(planner *); +extern void X(codelet_hb_4)(planner *); +extern void X(codelet_hb_5)(planner *); +extern void X(codelet_hb_6)(planner *); +extern void X(codelet_hb_7)(planner *); +extern void X(codelet_hb_8)(planner *); +extern void X(codelet_hb_9)(planner *); +extern void X(codelet_hb_10)(planner *); +extern void X(codelet_hb_12)(planner *); +extern void X(codelet_hb_15)(planner *); +extern void X(codelet_hb_16)(planner *); +extern void X(codelet_hb_32)(planner *); +extern void X(codelet_hb_64)(planner *); +extern void X(codelet_hb_20)(planner *); +extern void X(codelet_hb_25)(planner *); +extern void X(codelet_hb2_4)(planner *); +extern void X(codelet_hb2_8)(planner *); +extern void X(codelet_hb2_16)(planner *); +extern void X(codelet_hb2_32)(planner *); +extern void X(codelet_hb2_5)(planner *); +extern void X(codelet_hb2_20)(planner *); +extern void X(codelet_hb2_25)(planner *); +extern void X(codelet_r2cbIII_2)(planner *); +extern void X(codelet_r2cbIII_3)(planner *); +extern void X(codelet_r2cbIII_4)(planner *); +extern void X(codelet_r2cbIII_5)(planner *); +extern void X(codelet_r2cbIII_6)(planner *); +extern void X(codelet_r2cbIII_7)(planner *); +extern void X(codelet_r2cbIII_8)(planner *); +extern void X(codelet_r2cbIII_9)(planner *); +extern void X(codelet_r2cbIII_10)(planner *); +extern void X(codelet_r2cbIII_12)(planner *); +extern void X(codelet_r2cbIII_15)(planner *); +extern void X(codelet_r2cbIII_16)(planner *); +extern void X(codelet_r2cbIII_32)(planner *); +extern void X(codelet_r2cbIII_64)(planner *); +extern void X(codelet_r2cbIII_20)(planner *); +extern void X(codelet_r2cbIII_25)(planner *); +extern void X(codelet_hc2cb_2)(planner *); +extern void X(codelet_hc2cb_4)(planner *); +extern void X(codelet_hc2cb_6)(planner *); +extern void X(codelet_hc2cb_8)(planner *); +extern void X(codelet_hc2cb_10)(planner *); +extern void X(codelet_hc2cb_12)(planner *); +extern void X(codelet_hc2cb_16)(planner *); +extern void X(codelet_hc2cb_32)(planner *); +extern void X(codelet_hc2cb_20)(planner *); +extern void X(codelet_hc2cb2_4)(planner *); +extern void X(codelet_hc2cb2_8)(planner *); +extern void X(codelet_hc2cb2_16)(planner *); +extern void X(codelet_hc2cb2_32)(planner *); +extern void X(codelet_hc2cb2_20)(planner *); +extern void X(codelet_hc2cbdft_2)(planner *); +extern void X(codelet_hc2cbdft_4)(planner *); +extern void X(codelet_hc2cbdft_6)(planner *); +extern void X(codelet_hc2cbdft_8)(planner *); +extern void X(codelet_hc2cbdft_10)(planner *); +extern void X(codelet_hc2cbdft_12)(planner *); +extern void X(codelet_hc2cbdft_16)(planner *); +extern void X(codelet_hc2cbdft_32)(planner *); +extern void X(codelet_hc2cbdft_20)(planner *); +extern void X(codelet_hc2cbdft2_4)(planner *); +extern void X(codelet_hc2cbdft2_8)(planner *); +extern void X(codelet_hc2cbdft2_16)(planner *); +extern void X(codelet_hc2cbdft2_32)(planner *); +extern void X(codelet_hc2cbdft2_20)(planner *); + + +extern const solvtab X(solvtab_rdft_r2cb); +const solvtab X(solvtab_rdft_r2cb) = { + SOLVTAB(X(codelet_r2cb_2)), + SOLVTAB(X(codelet_r2cb_3)), + SOLVTAB(X(codelet_r2cb_4)), + SOLVTAB(X(codelet_r2cb_5)), + SOLVTAB(X(codelet_r2cb_6)), + SOLVTAB(X(codelet_r2cb_7)), + SOLVTAB(X(codelet_r2cb_8)), + SOLVTAB(X(codelet_r2cb_9)), + SOLVTAB(X(codelet_r2cb_10)), + SOLVTAB(X(codelet_r2cb_11)), + SOLVTAB(X(codelet_r2cb_12)), + SOLVTAB(X(codelet_r2cb_13)), + SOLVTAB(X(codelet_r2cb_14)), + SOLVTAB(X(codelet_r2cb_15)), + SOLVTAB(X(codelet_r2cb_16)), + SOLVTAB(X(codelet_r2cb_32)), + SOLVTAB(X(codelet_r2cb_64)), + SOLVTAB(X(codelet_r2cb_128)), + SOLVTAB(X(codelet_r2cb_20)), + SOLVTAB(X(codelet_r2cb_25)), + SOLVTAB(X(codelet_hb_2)), + SOLVTAB(X(codelet_hb_3)), + SOLVTAB(X(codelet_hb_4)), + SOLVTAB(X(codelet_hb_5)), + SOLVTAB(X(codelet_hb_6)), + SOLVTAB(X(codelet_hb_7)), + SOLVTAB(X(codelet_hb_8)), + SOLVTAB(X(codelet_hb_9)), + SOLVTAB(X(codelet_hb_10)), + SOLVTAB(X(codelet_hb_12)), + SOLVTAB(X(codelet_hb_15)), + SOLVTAB(X(codelet_hb_16)), + SOLVTAB(X(codelet_hb_32)), + SOLVTAB(X(codelet_hb_64)), + SOLVTAB(X(codelet_hb_20)), + SOLVTAB(X(codelet_hb_25)), + SOLVTAB(X(codelet_hb2_4)), + SOLVTAB(X(codelet_hb2_8)), + SOLVTAB(X(codelet_hb2_16)), + SOLVTAB(X(codelet_hb2_32)), + SOLVTAB(X(codelet_hb2_5)), + SOLVTAB(X(codelet_hb2_20)), + SOLVTAB(X(codelet_hb2_25)), + SOLVTAB(X(codelet_r2cbIII_2)), + SOLVTAB(X(codelet_r2cbIII_3)), + SOLVTAB(X(codelet_r2cbIII_4)), + SOLVTAB(X(codelet_r2cbIII_5)), + SOLVTAB(X(codelet_r2cbIII_6)), + SOLVTAB(X(codelet_r2cbIII_7)), + SOLVTAB(X(codelet_r2cbIII_8)), + SOLVTAB(X(codelet_r2cbIII_9)), + SOLVTAB(X(codelet_r2cbIII_10)), + SOLVTAB(X(codelet_r2cbIII_12)), + SOLVTAB(X(codelet_r2cbIII_15)), + SOLVTAB(X(codelet_r2cbIII_16)), + SOLVTAB(X(codelet_r2cbIII_32)), + SOLVTAB(X(codelet_r2cbIII_64)), + SOLVTAB(X(codelet_r2cbIII_20)), + SOLVTAB(X(codelet_r2cbIII_25)), + SOLVTAB(X(codelet_hc2cb_2)), + SOLVTAB(X(codelet_hc2cb_4)), + SOLVTAB(X(codelet_hc2cb_6)), + SOLVTAB(X(codelet_hc2cb_8)), + SOLVTAB(X(codelet_hc2cb_10)), + SOLVTAB(X(codelet_hc2cb_12)), + SOLVTAB(X(codelet_hc2cb_16)), + SOLVTAB(X(codelet_hc2cb_32)), + SOLVTAB(X(codelet_hc2cb_20)), + SOLVTAB(X(codelet_hc2cb2_4)), + SOLVTAB(X(codelet_hc2cb2_8)), + SOLVTAB(X(codelet_hc2cb2_16)), + SOLVTAB(X(codelet_hc2cb2_32)), + SOLVTAB(X(codelet_hc2cb2_20)), + SOLVTAB(X(codelet_hc2cbdft_2)), + SOLVTAB(X(codelet_hc2cbdft_4)), + SOLVTAB(X(codelet_hc2cbdft_6)), + SOLVTAB(X(codelet_hc2cbdft_8)), + SOLVTAB(X(codelet_hc2cbdft_10)), + SOLVTAB(X(codelet_hc2cbdft_12)), + SOLVTAB(X(codelet_hc2cbdft_16)), + SOLVTAB(X(codelet_hc2cbdft_32)), + SOLVTAB(X(codelet_hc2cbdft_20)), + SOLVTAB(X(codelet_hc2cbdft2_4)), + SOLVTAB(X(codelet_hc2cbdft2_8)), + SOLVTAB(X(codelet_hc2cbdft2_16)), + SOLVTAB(X(codelet_hc2cbdft2_32)), + SOLVTAB(X(codelet_hc2cbdft2_20)), + SOLVTAB_END +}; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb2_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb2_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,831 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:28 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 16 -dif -name hb2_16 -include hb.h */ + +/* + * This function contains 196 FP additions, 134 FP multiplications, + * (or, 104 additions, 42 multiplications, 92 fused multiply/add), + * 114 stack variables, 3 constants, and 64 memory accesses + */ +#include "hb.h" + +static void hb2_16(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(32, rs)) { + E Tv, TB, TF, Ty, T1J, T1O, T1N, T1K; + { + E Tw, T2z, T2C, Tx, T3f, T3l, T2F, T3r, Tz; + Tv = W[0]; + Tw = W[2]; + T2z = W[6]; + T2C = W[7]; + TB = W[4]; + Tx = Tv * Tw; + T3f = Tv * T2z; + T3l = Tv * T2C; + T2F = Tv * TB; + T3r = Tw * TB; + TF = W[5]; + Ty = W[1]; + Tz = W[3]; + { + E T2G, T3z, T3m, T3g, T3L, T3s, T1V, TA, T3w, T3Q, T30, T3C, TE, T1X, T1D; + E TG, T1G, T1o, T2p, T1Y, T2u, T2c, T1Z, TL, T1t, T2d, T35, T3n, T3R, T3F; + E T20, T1w, T3M, Tf, T3h, T2L, T2e, TW, T3N, T3I, T2Q, T36, T2V, T37, T1d; + E Tu, T3S, T18, T1z, T1i, T24, T2g, T27, T2h, TQ, TV; + { + E TH, T3, T2I, TU, T32, T1s, T1p, T6, TM, Ta, Tb, T33, TK, T2J, TP; + E Tc, T4, T5; + { + E TS, TT, T1q, T1r; + { + E T1, T1n, TC, T2b, T1W, T2, T3v, T2Z, TD; + T1 = cr[0]; + T3v = Tw * TF; + T2Z = Tv * TF; + T2G = FNMS(Ty, TF, T2F); + T3z = FMA(Ty, TF, T2F); + T3m = FNMS(Ty, T2z, T3l); + T3g = FMA(Ty, T2C, T3f); + T3L = FNMS(Tz, TF, T3r); + T3s = FMA(Tz, TF, T3r); + T1V = FMA(Ty, Tz, Tx); + TA = FNMS(Ty, Tz, Tx); + TD = Tv * Tz; + T3w = FNMS(Tz, TB, T3v); + T3Q = FMA(Tz, TB, T3v); + T30 = FMA(Ty, TB, T2Z); + T3C = FNMS(Ty, TB, T2Z); + T1n = TA * TF; + TC = TA * TB; + T2b = T1V * TF; + T1W = T1V * TB; + TE = FMA(Ty, Tw, TD); + T1X = FNMS(Ty, Tw, TD); + T2 = ci[WS(rs, 7)]; + TS = ci[WS(rs, 9)]; + T1D = FMA(TE, TF, TC); + TG = FNMS(TE, TF, TC); + T1G = FNMS(TE, TB, T1n); + T1o = FMA(TE, TB, T1n); + T2p = FMA(T1X, TF, T1W); + T1Y = FNMS(T1X, TF, T1W); + T2u = FNMS(T1X, TB, T2b); + T2c = FMA(T1X, TB, T2b); + TH = T1 - T2; + T3 = T1 + T2; + TT = cr[WS(rs, 14)]; + } + T1q = ci[WS(rs, 15)]; + T1r = cr[WS(rs, 8)]; + T4 = cr[WS(rs, 4)]; + T2I = TS - TT; + TU = TS + TT; + T32 = T1q - T1r; + T1s = T1q + T1r; + T5 = ci[WS(rs, 3)]; + } + { + E TI, TJ, T8, T9, TN, TO; + T8 = cr[WS(rs, 2)]; + T9 = ci[WS(rs, 5)]; + TI = ci[WS(rs, 11)]; + T1p = T4 - T5; + T6 = T4 + T5; + TM = T8 - T9; + Ta = T8 + T9; + TJ = cr[WS(rs, 12)]; + TN = ci[WS(rs, 13)]; + TO = cr[WS(rs, 10)]; + Tb = ci[WS(rs, 1)]; + T33 = TI - TJ; + TK = TI + TJ; + T2J = TN - TO; + TP = TN + TO; + Tc = cr[WS(rs, 6)]; + } + { + E TR, Td, T3D, T34; + T1Z = TH + TK; + TL = TH - TK; + T1t = T1p + T1s; + T2d = T1s - T1p; + TR = Tb - Tc; + Td = Tb + Tc; + T3D = T32 + T33; + T34 = T32 - T33; + { + E Te, T2K, T1u, T1v, T31, T3E, T2H, T7; + Te = Ta + Td; + T31 = Ta - Td; + T3E = T2J + T2I; + T2K = T2I - T2J; + TQ = TM - TP; + T1u = TM + TP; + T1v = TR + TU; + TV = TR - TU; + T35 = T31 + T34; + T3n = T34 - T31; + T3R = T3D - T3E; + T3F = T3D + T3E; + T2H = T3 - T6; + T7 = T3 + T6; + T20 = T1u + T1v; + T1w = T1u - T1v; + T3M = T7 - Te; + Tf = T7 + Te; + T3h = T2H - T2K; + T2L = T2H + T2K; + } + } + } + { + E T1e, Ti, T2N, T1c, T2O, T1h, T19, Tl, T13, Tp, Tq, T2S, T11, T2T, T16; + E Tr, Tj, Tk, Tm, TY, Tt; + { + E T1a, T1b, Tg, Th, T1f, T1g; + Tg = cr[WS(rs, 1)]; + Th = ci[WS(rs, 6)]; + T1a = ci[WS(rs, 14)]; + T2e = TQ - TV; + TW = TQ + TV; + T1e = Tg - Th; + Ti = Tg + Th; + T1b = cr[WS(rs, 9)]; + T1f = ci[WS(rs, 10)]; + T1g = cr[WS(rs, 13)]; + Tj = cr[WS(rs, 5)]; + T2N = T1a - T1b; + T1c = T1a + T1b; + T2O = T1f - T1g; + T1h = T1f + T1g; + Tk = ci[WS(rs, 2)]; + } + { + E TZ, T10, Tn, To, T14, T15; + Tn = ci[0]; + To = cr[WS(rs, 7)]; + TZ = ci[WS(rs, 8)]; + T19 = Tj - Tk; + Tl = Tj + Tk; + T13 = Tn - To; + Tp = Tn + To; + T10 = cr[WS(rs, 15)]; + T14 = ci[WS(rs, 12)]; + T15 = cr[WS(rs, 11)]; + Tq = cr[WS(rs, 3)]; + T2S = TZ - T10; + T11 = TZ + T10; + T2T = T14 - T15; + T16 = T14 + T15; + Tr = ci[WS(rs, 4)]; + } + { + E T2P, T2U, T2M, Ts, T3G, T3H, T2R; + T2P = T2N - T2O; + T3G = T2N + T2O; + T3H = T2S + T2T; + T2U = T2S - T2T; + Tm = Ti + Tl; + T2M = Ti - Tl; + TY = Tq - Tr; + Ts = Tq + Tr; + T3N = T3H - T3G; + T3I = T3G + T3H; + Tt = Tp + Ts; + T2R = Tp - Ts; + T2Q = T2M - T2P; + T36 = T2M + T2P; + T2V = T2R + T2U; + T37 = T2U - T2R; + } + { + E T25, T26, T22, T23, T12, T17; + T12 = TY - T11; + T25 = TY + T11; + T26 = T13 + T16; + T17 = T13 - T16; + T22 = T1c - T19; + T1d = T19 + T1c; + Tu = Tm + Tt; + T3S = Tm - Tt; + T18 = FNMS(KP414213562, T17, T12); + T1z = FMA(KP414213562, T12, T17); + T1i = T1e - T1h; + T23 = T1e + T1h; + T24 = FNMS(KP414213562, T23, T22); + T2g = FMA(KP414213562, T22, T23); + T27 = FNMS(KP414213562, T26, T25); + T2h = FMA(KP414213562, T25, T26); + } + } + { + E T1j, T1y, T3V, T3X, T3W, T38, T3i, T3o, T2W, T3K, T3B, T3A; + cr[0] = Tf + Tu; + T3A = Tf - Tu; + T1j = FMA(KP414213562, T1i, T1d); + T1y = FNMS(KP414213562, T1d, T1i); + T3K = T3C * T3A; + T3B = T3z * T3A; + { + E T3O, T3T, T3J, T3P, T3U; + T3O = T3M - T3N; + T3V = T3M + T3N; + T3X = T3S + T3R; + T3T = T3R - T3S; + ci[0] = T3F + T3I; + T3J = T3F - T3I; + T3P = T3L * T3O; + T3U = T3L * T3T; + T3W = TA * T3V; + cr[WS(rs, 8)] = FNMS(T3C, T3J, T3B); + ci[WS(rs, 8)] = FMA(T3z, T3J, T3K); + cr[WS(rs, 12)] = FNMS(T3Q, T3T, T3P); + ci[WS(rs, 12)] = FMA(T3Q, T3O, T3U); + T38 = T36 + T37; + T3i = T37 - T36; + T3o = T2Q - T2V; + T2W = T2Q + T2V; + } + { + E T2q, T21, T28, T2w, T2v, T2f, T2i, T2r; + { + E T2Y, T3a, T3c, T3d, T39, T3e, T3b, T2X, T3Y; + cr[WS(rs, 4)] = FNMS(TE, T3X, T3W); + T3Y = TA * T3X; + { + E T3t, T3j, T3x, T3p; + T3t = FMA(KP707106781, T3i, T3h); + T3j = FNMS(KP707106781, T3i, T3h); + T3x = FMA(KP707106781, T3o, T3n); + T3p = FNMS(KP707106781, T3o, T3n); + ci[WS(rs, 4)] = FMA(TE, T3V, T3Y); + { + E T3u, T3k, T3y, T3q; + T3u = T3s * T3t; + T3k = T3g * T3j; + T3y = T3s * T3x; + T3q = T3g * T3p; + cr[WS(rs, 6)] = FNMS(T3w, T3x, T3u); + cr[WS(rs, 14)] = FNMS(T3m, T3p, T3k); + ci[WS(rs, 6)] = FMA(T3w, T3t, T3y); + ci[WS(rs, 14)] = FMA(T3m, T3j, T3q); + T3b = FMA(KP707106781, T2W, T2L); + T2X = FNMS(KP707106781, T2W, T2L); + } + } + T2Y = T2G * T2X; + T3a = T30 * T2X; + T3c = T1V * T3b; + T3d = FMA(KP707106781, T38, T35); + T39 = FNMS(KP707106781, T38, T35); + T3e = T1X * T3b; + T2q = FMA(KP707106781, T20, T1Z); + T21 = FNMS(KP707106781, T20, T1Z); + cr[WS(rs, 2)] = FNMS(T1X, T3d, T3c); + ci[WS(rs, 10)] = FMA(T2G, T39, T3a); + cr[WS(rs, 10)] = FNMS(T30, T39, T2Y); + ci[WS(rs, 2)] = FMA(T1V, T3d, T3e); + T28 = T24 + T27; + T2w = T27 - T24; + T2v = FNMS(KP707106781, T2e, T2d); + T2f = FMA(KP707106781, T2e, T2d); + T2i = T2g - T2h; + T2r = T2g + T2h; + } + { + E TX, T1k, T1x, T1A; + T1J = FMA(KP707106781, TW, TL); + TX = FNMS(KP707106781, TW, TL); + { + E T2l, T29, T2n, T2j; + T2l = FNMS(KP923879532, T28, T21); + T29 = FMA(KP923879532, T28, T21); + T2n = FMA(KP923879532, T2i, T2f); + T2j = FNMS(KP923879532, T2i, T2f); + { + E T2o, T2m, T2k, T2a; + T2o = Tz * T2l; + T2m = Tw * T2l; + T2k = T2c * T29; + T2a = T1Y * T29; + ci[WS(rs, 3)] = FMA(Tw, T2n, T2o); + cr[WS(rs, 3)] = FNMS(Tz, T2n, T2m); + ci[WS(rs, 11)] = FMA(T1Y, T2j, T2k); + cr[WS(rs, 11)] = FNMS(T2c, T2j, T2a); + T1k = T18 - T1j; + T1O = T1j + T18; + } + } + T1N = FMA(KP707106781, T1w, T1t); + T1x = FNMS(KP707106781, T1w, T1t); + T1A = T1y - T1z; + T1K = T1y + T1z; + { + E T1E, T1l, T1H, T1B; + T1E = FMA(KP923879532, T1k, TX); + T1l = FNMS(KP923879532, T1k, TX); + T1H = FMA(KP923879532, T1A, T1x); + T1B = FNMS(KP923879532, T1A, T1x); + { + E T1I, T1F, T1C, T1m; + T1I = T1G * T1E; + T1F = T1D * T1E; + T1C = T1o * T1l; + T1m = TG * T1l; + ci[WS(rs, 5)] = FMA(T1D, T1H, T1I); + cr[WS(rs, 5)] = FNMS(T1G, T1H, T1F); + ci[WS(rs, 13)] = FMA(TG, T1B, T1C); + cr[WS(rs, 13)] = FNMS(T1o, T1B, T1m); + } + } + { + E T2A, T2s, T2D, T2x; + T2A = FMA(KP923879532, T2r, T2q); + T2s = FNMS(KP923879532, T2r, T2q); + T2D = FNMS(KP923879532, T2w, T2v); + T2x = FMA(KP923879532, T2w, T2v); + { + E T2B, T2t, T2E, T2y; + T2B = T2z * T2A; + T2t = T2p * T2s; + T2E = T2z * T2D; + T2y = T2p * T2x; + cr[WS(rs, 15)] = FNMS(T2C, T2D, T2B); + cr[WS(rs, 7)] = FNMS(T2u, T2x, T2t); + ci[WS(rs, 15)] = FMA(T2C, T2A, T2E); + ci[WS(rs, 7)] = FMA(T2u, T2s, T2y); + } + } + } + } + } + } + } + { + E T1L, T1R, T1P, T1T; + T1L = FNMS(KP923879532, T1K, T1J); + T1R = FMA(KP923879532, T1K, T1J); + T1P = FNMS(KP923879532, T1O, T1N); + T1T = FMA(KP923879532, T1O, T1N); + { + E T1S, T1M, T1U, T1Q; + T1S = Tv * T1R; + T1M = TB * T1L; + T1U = Tv * T1T; + T1Q = TB * T1P; + cr[WS(rs, 1)] = FNMS(Ty, T1T, T1S); + cr[WS(rs, 9)] = FNMS(TF, T1P, T1M); + ci[WS(rs, 1)] = FMA(Ty, T1R, T1U); + ci[WS(rs, 9)] = FMA(TF, T1L, T1Q); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 15}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 16, "hb2_16", twinstr, &GENUS, {104, 42, 92, 0} }; + +void X(codelet_hb2_16) (planner *p) { + X(khc2hc_register) (p, hb2_16, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 16 -dif -name hb2_16 -include hb.h */ + +/* + * This function contains 196 FP additions, 108 FP multiplications, + * (or, 156 additions, 68 multiplications, 40 fused multiply/add), + * 80 stack variables, 3 constants, and 64 memory accesses + */ +#include "hb.h" + +static void hb2_16(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(32, rs)) { + E Tv, Ty, T1l, T1n, T1p, T1t, T27, T25, Tz, Tw, TB, T21, T1P, T1H, T1X; + E T17, T1L, T1N, T1v, T1w, T1x, T1B, T2F, T2T, T2b, T2R, T3j, T3x, T35, T3t; + { + E TA, T1J, T15, T1G, Tx, T1K, T16, T1F; + { + E T1m, T1s, T1o, T1r; + Tv = W[0]; + Ty = W[1]; + T1l = W[2]; + T1n = W[3]; + T1m = Tv * T1l; + T1s = Ty * T1l; + T1o = Ty * T1n; + T1r = Tv * T1n; + T1p = T1m + T1o; + T1t = T1r - T1s; + T27 = T1r + T1s; + T25 = T1m - T1o; + Tz = W[5]; + TA = Ty * Tz; + T1J = T1l * Tz; + T15 = Tv * Tz; + T1G = T1n * Tz; + Tw = W[4]; + Tx = Tv * Tw; + T1K = T1n * Tw; + T16 = Ty * Tw; + T1F = T1l * Tw; + } + TB = Tx - TA; + T21 = T1J + T1K; + T1P = T15 - T16; + T1H = T1F + T1G; + T1X = T1F - T1G; + T17 = T15 + T16; + T1L = T1J - T1K; + T1N = Tx + TA; + T1v = W[6]; + T1w = W[7]; + T1x = FMA(Tv, T1v, Ty * T1w); + T1B = FNMS(Ty, T1v, Tv * T1w); + { + E T2D, T2E, T29, T2a; + T2D = T25 * Tz; + T2E = T27 * Tw; + T2F = T2D + T2E; + T2T = T2D - T2E; + T29 = T25 * Tw; + T2a = T27 * Tz; + T2b = T29 - T2a; + T2R = T29 + T2a; + } + { + E T3h, T3i, T33, T34; + T3h = T1p * Tz; + T3i = T1t * Tw; + T3j = T3h + T3i; + T3x = T3h - T3i; + T33 = T1p * Tw; + T34 = T1t * Tz; + T35 = T33 - T34; + T3t = T33 + T34; + } + } + { + E T7, T36, T3k, TC, T1f, T2e, T2I, T1Q, Te, TJ, T1R, T18, T2L, T37, T2l; + E T3l, Tm, T1T, TT, T1h, T2A, T2N, T3b, T3n, Tt, T1U, T12, T1i, T2t, T2O; + E T3e, T3o; + { + E T3, T2c, T1e, T2d, T6, T2G, T1b, T2H; + { + E T1, T2, T1c, T1d; + T1 = cr[0]; + T2 = ci[WS(rs, 7)]; + T3 = T1 + T2; + T2c = T1 - T2; + T1c = ci[WS(rs, 11)]; + T1d = cr[WS(rs, 12)]; + T1e = T1c - T1d; + T2d = T1c + T1d; + } + { + E T4, T5, T19, T1a; + T4 = cr[WS(rs, 4)]; + T5 = ci[WS(rs, 3)]; + T6 = T4 + T5; + T2G = T4 - T5; + T19 = ci[WS(rs, 15)]; + T1a = cr[WS(rs, 8)]; + T1b = T19 - T1a; + T2H = T19 + T1a; + } + T7 = T3 + T6; + T36 = T2c + T2d; + T3k = T2H - T2G; + TC = T3 - T6; + T1f = T1b - T1e; + T2e = T2c - T2d; + T2I = T2G + T2H; + T1Q = T1b + T1e; + } + { + E Ta, T2f, TI, T2g, Td, T2i, TF, T2j; + { + E T8, T9, TG, TH; + T8 = cr[WS(rs, 2)]; + T9 = ci[WS(rs, 5)]; + Ta = T8 + T9; + T2f = T8 - T9; + TG = ci[WS(rs, 13)]; + TH = cr[WS(rs, 10)]; + TI = TG - TH; + T2g = TG + TH; + } + { + E Tb, Tc, TD, TE; + Tb = ci[WS(rs, 1)]; + Tc = cr[WS(rs, 6)]; + Td = Tb + Tc; + T2i = Tb - Tc; + TD = ci[WS(rs, 9)]; + TE = cr[WS(rs, 14)]; + TF = TD - TE; + T2j = TD + TE; + } + Te = Ta + Td; + TJ = TF - TI; + T1R = TI + TF; + T18 = Ta - Td; + { + E T2J, T2K, T2h, T2k; + T2J = T2f + T2g; + T2K = T2i + T2j; + T2L = KP707106781 * (T2J - T2K); + T37 = KP707106781 * (T2J + T2K); + T2h = T2f - T2g; + T2k = T2i - T2j; + T2l = KP707106781 * (T2h + T2k); + T3l = KP707106781 * (T2h - T2k); + } + } + { + E Ti, T2x, TR, T2y, Tl, T2u, TO, T2v, TL, TS; + { + E Tg, Th, TP, TQ; + Tg = cr[WS(rs, 1)]; + Th = ci[WS(rs, 6)]; + Ti = Tg + Th; + T2x = Tg - Th; + TP = ci[WS(rs, 10)]; + TQ = cr[WS(rs, 13)]; + TR = TP - TQ; + T2y = TP + TQ; + } + { + E Tj, Tk, TM, TN; + Tj = cr[WS(rs, 5)]; + Tk = ci[WS(rs, 2)]; + Tl = Tj + Tk; + T2u = Tj - Tk; + TM = ci[WS(rs, 14)]; + TN = cr[WS(rs, 9)]; + TO = TM - TN; + T2v = TM + TN; + } + Tm = Ti + Tl; + T1T = TO + TR; + TL = Ti - Tl; + TS = TO - TR; + TT = TL - TS; + T1h = TL + TS; + { + E T2w, T2z, T39, T3a; + T2w = T2u + T2v; + T2z = T2x - T2y; + T2A = FMA(KP923879532, T2w, KP382683432 * T2z); + T2N = FNMS(KP382683432, T2w, KP923879532 * T2z); + T39 = T2x + T2y; + T3a = T2v - T2u; + T3b = FNMS(KP923879532, T3a, KP382683432 * T39); + T3n = FMA(KP382683432, T3a, KP923879532 * T39); + } + } + { + E Tp, T2q, T10, T2r, Ts, T2n, TX, T2o, TU, T11; + { + E Tn, To, TY, TZ; + Tn = ci[0]; + To = cr[WS(rs, 7)]; + Tp = Tn + To; + T2q = Tn - To; + TY = ci[WS(rs, 12)]; + TZ = cr[WS(rs, 11)]; + T10 = TY - TZ; + T2r = TY + TZ; + } + { + E Tq, Tr, TV, TW; + Tq = cr[WS(rs, 3)]; + Tr = ci[WS(rs, 4)]; + Ts = Tq + Tr; + T2n = Tq - Tr; + TV = ci[WS(rs, 8)]; + TW = cr[WS(rs, 15)]; + TX = TV - TW; + T2o = TV + TW; + } + Tt = Tp + Ts; + T1U = TX + T10; + TU = Tp - Ts; + T11 = TX - T10; + T12 = TU + T11; + T1i = T11 - TU; + { + E T2p, T2s, T3c, T3d; + T2p = T2n - T2o; + T2s = T2q - T2r; + T2t = FNMS(KP382683432, T2s, KP923879532 * T2p); + T2O = FMA(KP382683432, T2p, KP923879532 * T2s); + T3c = T2q + T2r; + T3d = T2n + T2o; + T3e = FNMS(KP923879532, T3d, KP382683432 * T3c); + T3o = FMA(KP382683432, T3d, KP923879532 * T3c); + } + } + { + E Tf, Tu, T1O, T1S, T1V, T1W; + Tf = T7 + Te; + Tu = Tm + Tt; + T1O = Tf - Tu; + T1S = T1Q + T1R; + T1V = T1T + T1U; + T1W = T1S - T1V; + cr[0] = Tf + Tu; + ci[0] = T1S + T1V; + cr[WS(rs, 8)] = FNMS(T1P, T1W, T1N * T1O); + ci[WS(rs, 8)] = FMA(T1P, T1O, T1N * T1W); + } + { + E T3g, T3r, T3q, T3s; + { + E T38, T3f, T3m, T3p; + T38 = T36 - T37; + T3f = T3b + T3e; + T3g = T38 - T3f; + T3r = T38 + T3f; + T3m = T3k + T3l; + T3p = T3n - T3o; + T3q = T3m - T3p; + T3s = T3m + T3p; + } + cr[WS(rs, 11)] = FNMS(T3j, T3q, T35 * T3g); + ci[WS(rs, 11)] = FMA(T3j, T3g, T35 * T3q); + cr[WS(rs, 3)] = FNMS(T1n, T3s, T1l * T3r); + ci[WS(rs, 3)] = FMA(T1n, T3r, T1l * T3s); + } + { + E T3w, T3B, T3A, T3C; + { + E T3u, T3v, T3y, T3z; + T3u = T36 + T37; + T3v = T3n + T3o; + T3w = T3u - T3v; + T3B = T3u + T3v; + T3y = T3k - T3l; + T3z = T3b - T3e; + T3A = T3y + T3z; + T3C = T3y - T3z; + } + cr[WS(rs, 7)] = FNMS(T3x, T3A, T3t * T3w); + ci[WS(rs, 7)] = FMA(T3t, T3A, T3x * T3w); + cr[WS(rs, 15)] = FNMS(T1w, T3C, T1v * T3B); + ci[WS(rs, 15)] = FMA(T1v, T3C, T1w * T3B); + } + { + E T14, T1q, T1k, T1u; + { + E TK, T13, T1g, T1j; + TK = TC + TJ; + T13 = KP707106781 * (TT + T12); + T14 = TK - T13; + T1q = TK + T13; + T1g = T18 + T1f; + T1j = KP707106781 * (T1h + T1i); + T1k = T1g - T1j; + T1u = T1g + T1j; + } + cr[WS(rs, 10)] = FNMS(T17, T1k, TB * T14); + ci[WS(rs, 10)] = FMA(T17, T14, TB * T1k); + cr[WS(rs, 2)] = FNMS(T1t, T1u, T1p * T1q); + ci[WS(rs, 2)] = FMA(T1t, T1q, T1p * T1u); + } + { + E T1A, T1I, T1E, T1M; + { + E T1y, T1z, T1C, T1D; + T1y = TC - TJ; + T1z = KP707106781 * (T1i - T1h); + T1A = T1y - T1z; + T1I = T1y + T1z; + T1C = T1f - T18; + T1D = KP707106781 * (TT - T12); + T1E = T1C - T1D; + T1M = T1C + T1D; + } + cr[WS(rs, 14)] = FNMS(T1B, T1E, T1x * T1A); + ci[WS(rs, 14)] = FMA(T1x, T1E, T1B * T1A); + cr[WS(rs, 6)] = FNMS(T1L, T1M, T1H * T1I); + ci[WS(rs, 6)] = FMA(T1H, T1M, T1L * T1I); + } + { + E T2C, T2S, T2Q, T2U; + { + E T2m, T2B, T2M, T2P; + T2m = T2e - T2l; + T2B = T2t - T2A; + T2C = T2m - T2B; + T2S = T2m + T2B; + T2M = T2I - T2L; + T2P = T2N - T2O; + T2Q = T2M - T2P; + T2U = T2M + T2P; + } + cr[WS(rs, 13)] = FNMS(T2F, T2Q, T2b * T2C); + ci[WS(rs, 13)] = FMA(T2F, T2C, T2b * T2Q); + cr[WS(rs, 5)] = FNMS(T2T, T2U, T2R * T2S); + ci[WS(rs, 5)] = FMA(T2T, T2S, T2R * T2U); + } + { + E T2X, T31, T30, T32; + { + E T2V, T2W, T2Y, T2Z; + T2V = T2e + T2l; + T2W = T2N + T2O; + T2X = T2V - T2W; + T31 = T2V + T2W; + T2Y = T2I + T2L; + T2Z = T2A + T2t; + T30 = T2Y - T2Z; + T32 = T2Y + T2Z; + } + cr[WS(rs, 9)] = FNMS(Tz, T30, Tw * T2X); + ci[WS(rs, 9)] = FMA(Tw, T30, Tz * T2X); + cr[WS(rs, 1)] = FNMS(Ty, T32, Tv * T31); + ci[WS(rs, 1)] = FMA(Tv, T32, Ty * T31); + } + { + E T20, T26, T24, T28; + { + E T1Y, T1Z, T22, T23; + T1Y = T7 - Te; + T1Z = T1U - T1T; + T20 = T1Y - T1Z; + T26 = T1Y + T1Z; + T22 = T1Q - T1R; + T23 = Tm - Tt; + T24 = T22 - T23; + T28 = T23 + T22; + } + cr[WS(rs, 12)] = FNMS(T21, T24, T1X * T20); + ci[WS(rs, 12)] = FMA(T1X, T24, T21 * T20); + cr[WS(rs, 4)] = FNMS(T27, T28, T25 * T26); + ci[WS(rs, 4)] = FMA(T25, T28, T27 * T26); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 15}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 16, "hb2_16", twinstr, &GENUS, {156, 68, 40, 0} }; + +void X(codelet_hb2_16) (planner *p) { + X(khc2hc_register) (p, hb2_16, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb2_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb2_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1087 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:29 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 20 -dif -name hb2_20 -include hb.h */ + +/* + * This function contains 276 FP additions, 198 FP multiplications, + * (or, 136 additions, 58 multiplications, 140 fused multiply/add), + * 153 stack variables, 4 constants, and 80 memory accesses + */ +#include "hb.h" + +static void hb2_20(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(40, rs)) { + E T1S, T1O, T1s, TI, T24, T1Y, T2g, T2k, TS, TR, T1I, T26, T1o, T20, T1F; + E T25, TT, T1Z; + { + E TD, TH, TE, T1L, T1N, T1X, TG, T1V, T2Y, T2b, T29, T2s, T36, T3e, T31; + E T2o, T3b, T5b, T2c, T2U, T4y, T4u, T2f, T5g, T47, T5p, T4b, T5l; + { + E T1r, TF, T2T, T1M, T1R, T2X, T2r, T4x; + TD = W[0]; + TH = W[3]; + TE = W[2]; + T1L = W[6]; + T1N = W[7]; + T1r = TD * TH; + TF = TD * TE; + T2T = TE * T1L; + T1M = TD * T1L; + T1R = TD * T1N; + T2X = TE * T1N; + T1X = W[5]; + TG = W[1]; + T1V = W[4]; + T2Y = FNMS(TH, T1L, T2X); + T2r = TD * T1X; + { + E T23, T2n, T1W, T2a; + T23 = TE * T1X; + T1S = FNMS(TG, T1L, T1R); + T1O = FMA(TG, T1N, T1M); + T2b = FMA(TG, TE, T1r); + T1s = FNMS(TG, TE, T1r); + T29 = FNMS(TG, TH, TF); + TI = FMA(TG, TH, TF); + T2n = TD * T1V; + T1W = TE * T1V; + T2s = FMA(TG, T1V, T2r); + T36 = FNMS(TG, T1V, T2r); + T3e = FMA(TH, T1V, T23); + T24 = FNMS(TH, T1V, T23); + T2a = T29 * T1V; + T31 = FMA(TG, T1X, T2n); + T2o = FNMS(TG, T1X, T2n); + T3b = FNMS(TH, T1X, T1W); + T1Y = FMA(TH, T1X, T1W); + T5b = FNMS(T2b, T1X, T2a); + T2c = FMA(T2b, T1X, T2a); + T2U = FMA(TH, T1N, T2T); + } + T4x = T29 * T1N; + { + E T4t, T2d, T2j, T2e; + T4t = T29 * T1L; + T2e = T29 * T1X; + T4y = FNMS(T2b, T1L, T4x); + T4u = FMA(T2b, T1N, T4t); + T2f = FNMS(T2b, T1V, T2e); + T5g = FMA(T2b, T1V, T2e); + T2d = T2c * T1L; + T2j = T2c * T1N; + T47 = TI * T1V; + T2g = FMA(T2f, T1N, T2d); + T2k = FNMS(T2f, T1L, T2j); + T5p = TI * T1N; + T4b = TI * T1X; + T5l = TI * T1L; + } + } + { + E T4f, T48, T4c, T4k, T5m, T5q, T3j, T4B, T7, TJ, T4V, T3V, T1z, T2H, T3x; + E T42, T18, T3q, T43, T1n, T2D, T53, T52, T2A, T1H, T4R, T4X, T4W, T4O, T1G; + E T2O, T3I, T2P, T3P, T2K, T2M, T1C, T1E, TC, T2w, T40, T3Y, T4K, T4I, TQ; + { + E T1y, T3U, T1v, T3T; + { + E T3h, T3, T1t, T3i, T6, T1u; + { + E T1w, T1x, T1, T2, T4, T5; + T1 = cr[0]; + T2 = ci[WS(rs, 9)]; + T1w = ci[WS(rs, 14)]; + T4f = FNMS(T1s, T1X, T47); + T48 = FMA(T1s, T1X, T47); + T4c = FNMS(T1s, T1V, T4b); + T4k = FMA(T1s, T1V, T4b); + T5m = FMA(T1s, T1N, T5l); + T5q = FNMS(T1s, T1L, T5p); + T3h = T1 - T2; + T3 = T1 + T2; + T1x = cr[WS(rs, 15)]; + T4 = cr[WS(rs, 5)]; + T5 = ci[WS(rs, 4)]; + T1t = ci[WS(rs, 19)]; + T3i = T1w + T1x; + T1y = T1w - T1x; + T3U = T4 - T5; + T6 = T4 + T5; + T1u = cr[WS(rs, 10)]; + } + T3j = T3h + T3i; + T4B = T3h - T3i; + T7 = T3 + T6; + TJ = T3 - T6; + T1v = T1t - T1u; + T3T = T1t + T1u; + } + { + E T3m, T4C, Te, TK, T4M, T3L, T1f, T2y, TO, TA, T4Q, T3H, T3w, T4G, T2C; + E T17, T3p, T4D, Tl, TL, T3O, T4N, T1m, T2z, T3t, T4F, Tt, TN, T3E, T4P; + E T10, T2B; + { + E T3u, T13, T3v, T16; + { + E T1e, T3K, T1b, T3J; + { + E T3k, Ta, T19, T3l, Td, T1a; + { + E T1c, T1d, T8, T9, Tb, Tc; + T8 = cr[WS(rs, 4)]; + T9 = ci[WS(rs, 5)]; + T4V = T3U + T3T; + T3V = T3T - T3U; + T1z = T1v - T1y; + T2H = T1v + T1y; + T3k = T8 - T9; + Ta = T8 + T9; + T1c = ci[WS(rs, 10)]; + T1d = cr[WS(rs, 19)]; + Tb = cr[WS(rs, 9)]; + Tc = ci[0]; + T19 = ci[WS(rs, 15)]; + T3l = T1c + T1d; + T1e = T1c - T1d; + T3K = Tb - Tc; + Td = Tb + Tc; + T1a = cr[WS(rs, 14)]; + } + T3m = T3k + T3l; + T4C = T3k - T3l; + Te = Ta + Td; + TK = Ta - Td; + T1b = T19 - T1a; + T3J = T19 + T1a; + } + { + E Tw, T14, T3F, Tz, T3G, T15; + { + E Tx, Ty, Tu, Tv, T11, T12; + Tu = ci[WS(rs, 7)]; + Tv = cr[WS(rs, 2)]; + T4M = T3K + T3J; + T3L = T3J - T3K; + T1f = T1b - T1e; + T2y = T1b + T1e; + T3u = Tu - Tv; + Tw = Tu + Tv; + Tx = ci[WS(rs, 2)]; + Ty = cr[WS(rs, 7)]; + T11 = ci[WS(rs, 17)]; + T12 = cr[WS(rs, 12)]; + T14 = ci[WS(rs, 12)]; + T3F = Tx - Ty; + Tz = Tx + Ty; + T3G = T11 + T12; + T13 = T11 - T12; + T15 = cr[WS(rs, 17)]; + } + TO = Tw - Tz; + TA = Tw + Tz; + T4Q = T3F - T3G; + T3H = T3F + T3G; + T3v = T14 + T15; + T16 = T14 - T15; + } + } + { + E Ti, T3n, Th, T3o, T1l, Tj, T1g, T1h; + { + E Tf, Tg, T1j, T1k; + Tf = ci[WS(rs, 3)]; + T3w = T3u - T3v; + T4G = T3u + T3v; + T2C = T13 + T16; + T17 = T13 - T16; + Tg = cr[WS(rs, 6)]; + T1j = ci[WS(rs, 18)]; + T1k = cr[WS(rs, 11)]; + Ti = cr[WS(rs, 1)]; + T3n = Tf - Tg; + Th = Tf + Tg; + T3o = T1j + T1k; + T1l = T1j - T1k; + Tj = ci[WS(rs, 8)]; + T1g = ci[WS(rs, 13)]; + T1h = cr[WS(rs, 16)]; + } + { + E T3M, Tk, T3N, T1i; + T3p = T3n + T3o; + T4D = T3n - T3o; + T3M = Ti - Tj; + Tk = Ti + Tj; + T3N = T1g + T1h; + T1i = T1g - T1h; + Tl = Th + Tk; + TL = Th - Tk; + T3O = T3M + T3N; + T4N = T3M - T3N; + T1m = T1i - T1l; + T2z = T1i + T1l; + } + } + { + E Tq, T3r, Tp, T3s, TZ, Tr, TU, TV; + { + E Tn, To, TX, TY; + Tn = cr[WS(rs, 8)]; + To = ci[WS(rs, 1)]; + TX = ci[WS(rs, 16)]; + TY = cr[WS(rs, 13)]; + Tq = ci[WS(rs, 6)]; + T3r = Tn - To; + Tp = Tn + To; + T3s = TX + TY; + TZ = TX - TY; + Tr = cr[WS(rs, 3)]; + TU = ci[WS(rs, 11)]; + TV = cr[WS(rs, 18)]; + } + { + E T3D, Ts, T3C, TW; + T3t = T3r - T3s; + T4F = T3r + T3s; + T3D = Tq - Tr; + Ts = Tq + Tr; + T3C = TU + TV; + TW = TU - TV; + Tt = Tp + Ts; + TN = Tp - Ts; + T3E = T3C - T3D; + T4P = T3D + T3C; + T10 = TW - TZ; + T2B = TW + TZ; + } + } + } + { + E T1B, T1A, T2J, T4H, T4E, T2I, TM, TP; + T3x = T3t + T3w; + T42 = T3t - T3w; + T18 = T10 - T17; + T1B = T10 + T17; + T3q = T3m + T3p; + T43 = T3m - T3p; + T1n = T1f - T1m; + T1A = T1f + T1m; + T2J = T2B + T2C; + T2D = T2B - T2C; + T53 = T4F - T4G; + T4H = T4F + T4G; + T4E = T4C + T4D; + T52 = T4C - T4D; + T2A = T2y - T2z; + T2I = T2y + T2z; + TM = TK + TL; + T1H = TK - TL; + T4R = T4P - T4Q; + T4X = T4P + T4Q; + T4W = T4M + T4N; + T4O = T4M - T4N; + T1G = TN - TO; + TP = TN + TO; + { + E Tm, T3X, TB, T3W; + Tm = Te + Tl; + T2O = Te - Tl; + T3I = T3E + T3H; + T3X = T3E - T3H; + TB = Tt + TA; + T2P = Tt - TA; + T3P = T3L + T3O; + T3W = T3L - T3O; + T2K = T2I + T2J; + T2M = T2I - T2J; + T1C = T1A + T1B; + T1E = T1A - T1B; + TC = Tm + TB; + T2w = Tm - TB; + T40 = T3W - T3X; + T3Y = T3W + T3X; + T4K = T4E - T4H; + T4I = T4E + T4H; + TS = TM - TP; + TQ = TM + TP; + } + } + } + } + { + E T3A, T3y, T50, T1D, T2t, T2p, T4J, T5t, T5v, T4Z, T4Y; + cr[0] = T7 + TC; + T3A = T3q - T3x; + T3y = T3q + T3x; + T50 = T4W - T4X; + T4Y = T4W + T4X; + ci[0] = T2H + T2K; + T1D = FNMS(KP250000000, T1C, T1z); + T2t = T1z + T1C; + T2p = TJ + TQ; + TR = FNMS(KP250000000, TQ, TJ); + T4J = FNMS(KP250000000, T4I, T4B); + T5t = T4B + T4I; + T5v = T4V + T4Y; + T4Z = FNMS(KP250000000, T4Y, T4V); + { + E T4m, T44, T4i, T4p, T49, T3R, T4j, T4a, T3S, T4l, T41, T4q; + { + E T3z, T4v, T4w, T3Z, T4z; + T3z = FNMS(KP250000000, T3y, T3j); + T4v = T3j + T3y; + { + E T2u, T2q, T5u, T5w; + T2u = T2s * T2p; + T2q = T2o * T2p; + T5u = T2c * T5t; + T5w = T2c * T5v; + ci[WS(rs, 10)] = FMA(T2o, T2t, T2u); + cr[WS(rs, 10)] = FNMS(T2s, T2t, T2q); + cr[WS(rs, 5)] = FNMS(T2f, T5v, T5u); + ci[WS(rs, 5)] = FMA(T2f, T5t, T5w); + T4w = T4u * T4v; + } + T3Z = FNMS(KP250000000, T3Y, T3V); + T4z = T3V + T3Y; + { + E T3Q, T4h, T4A, T4g, T3B; + T3Q = FNMS(KP618033988, T3P, T3I); + T4h = FMA(KP618033988, T3I, T3P); + cr[WS(rs, 15)] = FNMS(T4y, T4z, T4w); + T4A = T4u * T4z; + T4m = FMA(KP618033988, T42, T43); + T44 = FNMS(KP618033988, T43, T42); + T4g = FMA(KP559016994, T3A, T3z); + T3B = FNMS(KP559016994, T3A, T3z); + ci[WS(rs, 15)] = FMA(T4y, T4v, T4A); + T4i = FNMS(KP951056516, T4h, T4g); + T4p = FMA(KP951056516, T4h, T4g); + T49 = FMA(KP951056516, T3Q, T3B); + T3R = FNMS(KP951056516, T3Q, T3B); + } + T4j = T4f * T4i; + T4a = T48 * T49; + T3S = TE * T3R; + T4l = FMA(KP559016994, T40, T3Z); + T41 = FNMS(KP559016994, T40, T3Z); + T4q = T1L * T4p; + } + { + E T5d, T4S, T54, T5i, T4L, T5c; + T5d = FNMS(KP618033988, T4O, T4R); + T4S = FMA(KP618033988, T4R, T4O); + { + E T4n, T4r, T4d, T45; + T4n = FMA(KP951056516, T4m, T4l); + T4r = FNMS(KP951056516, T4m, T4l); + T4d = FNMS(KP951056516, T44, T41); + T45 = FMA(KP951056516, T44, T41); + { + E T4o, T4s, T4e, T46; + T4o = T4f * T4n; + cr[WS(rs, 11)] = FNMS(T4k, T4n, T4j); + T4s = T1L * T4r; + cr[WS(rs, 19)] = FNMS(T1N, T4r, T4q); + T4e = T48 * T4d; + cr[WS(rs, 7)] = FNMS(T4c, T4d, T4a); + T46 = TE * T45; + cr[WS(rs, 3)] = FNMS(TH, T45, T3S); + ci[WS(rs, 11)] = FMA(T4k, T4i, T4o); + ci[WS(rs, 19)] = FMA(T1N, T4p, T4s); + ci[WS(rs, 7)] = FMA(T4c, T49, T4e); + ci[WS(rs, 3)] = FMA(TH, T3R, T46); + } + } + T54 = FMA(KP618033988, T53, T52); + T5i = FNMS(KP618033988, T52, T53); + T4L = FMA(KP559016994, T4K, T4J); + T5c = FNMS(KP559016994, T4K, T4J); + { + E T38, T2Q, T33, T2E, T2v, T37, T2N, T5h, T51, T2L, T2x, T32; + T38 = FNMS(KP618033988, T2O, T2P); + T2Q = FMA(KP618033988, T2P, T2O); + T5h = FNMS(KP559016994, T50, T4Z); + T51 = FMA(KP559016994, T50, T4Z); + { + E T5e, T5n, T57, T4T; + T5e = FNMS(KP951056516, T5d, T5c); + T5n = FMA(KP951056516, T5d, T5c); + T57 = FMA(KP951056516, T4S, T4L); + T4T = FNMS(KP951056516, T4S, T4L); + { + E T5j, T5r, T59, T55; + T5j = FMA(KP951056516, T5i, T5h); + T5r = FNMS(KP951056516, T5i, T5h); + T59 = FNMS(KP951056516, T54, T51); + T55 = FMA(KP951056516, T54, T51); + { + E T5f, T5o, T58, T4U; + T5f = T5b * T5e; + T5o = T5m * T5n; + T58 = T1V * T57; + T4U = TD * T4T; + { + E T5k, T5s, T5a, T56; + T5k = T5b * T5j; + T5s = T5m * T5r; + T5a = T1V * T59; + T56 = TD * T55; + cr[WS(rs, 13)] = FNMS(T5g, T5j, T5f); + cr[WS(rs, 17)] = FNMS(T5q, T5r, T5o); + cr[WS(rs, 9)] = FNMS(T1X, T59, T58); + cr[WS(rs, 1)] = FNMS(TG, T55, T4U); + ci[WS(rs, 13)] = FMA(T5g, T5e, T5k); + ci[WS(rs, 17)] = FMA(T5q, T5n, T5s); + ci[WS(rs, 9)] = FMA(T1X, T57, T5a); + ci[WS(rs, 1)] = FMA(TG, T4T, T56); + } + } + } + } + T2L = FNMS(KP250000000, T2K, T2H); + T33 = FNMS(KP618033988, T2A, T2D); + T2E = FMA(KP618033988, T2D, T2A); + T2v = FNMS(KP250000000, TC, T7); + T37 = FNMS(KP559016994, T2M, T2L); + T2N = FMA(KP559016994, T2M, T2L); + T1I = FNMS(KP618033988, T1H, T1G); + T26 = FMA(KP618033988, T1G, T1H); + T2x = FMA(KP559016994, T2w, T2v); + T32 = FNMS(KP559016994, T2w, T2v); + { + E T3f, T39, T2R, T2Z; + T3f = FNMS(KP951056516, T38, T37); + T39 = FMA(KP951056516, T38, T37); + T2R = FNMS(KP951056516, T2Q, T2N); + T2Z = FMA(KP951056516, T2Q, T2N); + { + E T3c, T34, T2F, T2V; + T3c = FMA(KP951056516, T33, T32); + T34 = FNMS(KP951056516, T33, T32); + T2F = FMA(KP951056516, T2E, T2x); + T2V = FNMS(KP951056516, T2E, T2x); + { + E T3a, T35, T3g, T3d; + T3a = T36 * T34; + T35 = T31 * T34; + T3g = T3e * T3c; + T3d = T3b * T3c; + { + E T30, T2W, T2S, T2G; + T30 = T2Y * T2V; + T2W = T2U * T2V; + T2S = T2b * T2F; + T2G = T29 * T2F; + ci[WS(rs, 8)] = FMA(T31, T39, T3a); + cr[WS(rs, 8)] = FNMS(T36, T39, T35); + ci[WS(rs, 12)] = FMA(T3b, T3f, T3g); + cr[WS(rs, 12)] = FNMS(T3e, T3f, T3d); + ci[WS(rs, 16)] = FMA(T2U, T2Z, T30); + cr[WS(rs, 16)] = FNMS(T2Y, T2Z, T2W); + ci[WS(rs, 4)] = FMA(T29, T2R, T2S); + cr[WS(rs, 4)] = FNMS(T2b, T2R, T2G); + } + } + } + } + T1o = FNMS(KP618033988, T1n, T18); + T20 = FMA(KP618033988, T18, T1n); + T1F = FNMS(KP559016994, T1E, T1D); + T25 = FMA(KP559016994, T1E, T1D); + } + } + } + } + } + } + TT = FNMS(KP559016994, TS, TR); + T1Z = FMA(KP559016994, TS, TR); + { + E T2l, T27, T1J, T1T; + T2l = FNMS(KP951056516, T26, T25); + T27 = FMA(KP951056516, T26, T25); + T1J = FNMS(KP951056516, T1I, T1F); + T1T = FMA(KP951056516, T1I, T1F); + { + E T2h, T21, T1p, T1P; + T2h = FMA(KP951056516, T20, T1Z); + T21 = FNMS(KP951056516, T20, T1Z); + T1p = FMA(KP951056516, T1o, TT); + T1P = FNMS(KP951056516, T1o, TT); + { + E T28, T22, T2m, T2i; + T28 = T24 * T21; + T22 = T1Y * T21; + T2m = T2k * T2h; + T2i = T2g * T2h; + { + E T1U, T1Q, T1K, T1q; + T1U = T1S * T1P; + T1Q = T1O * T1P; + T1K = T1s * T1p; + T1q = TI * T1p; + ci[WS(rs, 6)] = FMA(T1Y, T27, T28); + cr[WS(rs, 6)] = FNMS(T24, T27, T22); + ci[WS(rs, 14)] = FMA(T2g, T2l, T2m); + cr[WS(rs, 14)] = FNMS(T2k, T2l, T2i); + ci[WS(rs, 18)] = FMA(T1O, T1T, T1U); + cr[WS(rs, 18)] = FNMS(T1S, T1T, T1Q); + ci[WS(rs, 2)] = FMA(TI, T1J, T1K); + cr[WS(rs, 2)] = FNMS(T1s, T1J, T1q); + } + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 19}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 20, "hb2_20", twinstr, &GENUS, {136, 58, 140, 0} }; + +void X(codelet_hb2_20) (planner *p) { + X(khc2hc_register) (p, hb2_20, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 20 -dif -name hb2_20 -include hb.h */ + +/* + * This function contains 276 FP additions, 164 FP multiplications, + * (or, 204 additions, 92 multiplications, 72 fused multiply/add), + * 137 stack variables, 4 constants, and 80 memory accesses + */ +#include "hb.h" + +static void hb2_20(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(40, rs)) { + E TD, TG, TE, TH, TJ, T1t, T27, T25, T1T, T1R, T1V, T2j, T2Z, T21, T2X; + E T2T, T2n, T2P, T3V, T41, T3R, T3X, T29, T2c, T4H, T4L, T1L, T1M, T1N, T2d; + E T4R, T1P, T4P, T49, T2N, T2f, T47, T2L; + { + E T1U, T2l, T1Z, T2i, T1S, T2m, T20, T2h; + { + E TF, T1s, TI, T1r; + TD = W[0]; + TG = W[1]; + TE = W[2]; + TH = W[3]; + TF = TD * TE; + T1s = TG * TE; + TI = TG * TH; + T1r = TD * TH; + TJ = TF + TI; + T1t = T1r - T1s; + T27 = T1r + T1s; + T25 = TF - TI; + T1T = W[5]; + T1U = TH * T1T; + T2l = TD * T1T; + T1Z = TE * T1T; + T2i = TG * T1T; + T1R = W[4]; + T1S = TE * T1R; + T2m = TG * T1R; + T20 = TH * T1R; + T2h = TD * T1R; + } + T1V = T1S + T1U; + T2j = T2h - T2i; + T2Z = T1Z + T20; + T21 = T1Z - T20; + T2X = T1S - T1U; + T2T = T2l - T2m; + T2n = T2l + T2m; + T2P = T2h + T2i; + { + E T3T, T3U, T3P, T3Q; + T3T = TJ * T1T; + T3U = T1t * T1R; + T3V = T3T - T3U; + T41 = T3T + T3U; + T3P = TJ * T1R; + T3Q = T1t * T1T; + T3R = T3P + T3Q; + T3X = T3P - T3Q; + { + E T26, T28, T2a, T2b; + T26 = T25 * T1R; + T28 = T27 * T1T; + T29 = T26 + T28; + T2a = T25 * T1T; + T2b = T27 * T1R; + T2c = T2a - T2b; + T4H = T26 - T28; + T4L = T2a + T2b; + T1L = W[6]; + T1M = W[7]; + T1N = FMA(TD, T1L, TG * T1M); + T2d = FMA(T29, T1L, T2c * T1M); + T4R = FNMS(T1t, T1L, TJ * T1M); + T1P = FNMS(TG, T1L, TD * T1M); + T4P = FMA(TJ, T1L, T1t * T1M); + T49 = FNMS(T27, T1L, T25 * T1M); + T2N = FNMS(TH, T1L, TE * T1M); + T2f = FNMS(T2c, T1L, T29 * T1M); + T47 = FMA(T25, T1L, T27 * T1M); + T2L = FMA(TE, T1L, TH * T1M); + } + } + } + { + E T7, T4i, T4x, TK, T1D, T3i, T3E, T2D, T19, T3L, T3M, T1o, T2x, T4C, T4B; + E T2u, T1v, T4r, T4o, T1u, T2H, T37, T2I, T3e, T3p, T3w, T3x, Tm, TB, TC; + E T4u, T4v, T4y, T2A, T2B, T2E, T1E, T1F, T1G, T4d, T4g, T4j, T3F, T3G, T3H; + E TN, TQ, TR, T48, T4a; + { + E T3, T3g, T1C, T3h, T6, T3D, T1z, T3C; + { + E T1, T2, T1A, T1B; + T1 = cr[0]; + T2 = ci[WS(rs, 9)]; + T3 = T1 + T2; + T3g = T1 - T2; + T1A = ci[WS(rs, 14)]; + T1B = cr[WS(rs, 15)]; + T1C = T1A - T1B; + T3h = T1A + T1B; + } + { + E T4, T5, T1x, T1y; + T4 = cr[WS(rs, 5)]; + T5 = ci[WS(rs, 4)]; + T6 = T4 + T5; + T3D = T4 - T5; + T1x = ci[WS(rs, 19)]; + T1y = cr[WS(rs, 10)]; + T1z = T1x - T1y; + T3C = T1x + T1y; + } + T7 = T3 + T6; + T4i = T3g - T3h; + T4x = T3D + T3C; + TK = T3 - T6; + T1D = T1z - T1C; + T3i = T3g + T3h; + T3E = T3C - T3D; + T2D = T1z + T1C; + } + { + E Te, T4b, T4m, TL, T11, T33, T3l, T2s, TA, T4f, T4q, TP, T1n, T3d, T3v; + E T2w, Tl, T4c, T4n, TM, T18, T36, T3o, T2t, Tt, T4e, T4p, TO, T1g, T3a; + E T3s, T2v; + { + E Ta, T3j, T10, T3k, Td, T32, TX, T31; + { + E T8, T9, TY, TZ; + T8 = cr[WS(rs, 4)]; + T9 = ci[WS(rs, 5)]; + Ta = T8 + T9; + T3j = T8 - T9; + TY = ci[WS(rs, 10)]; + TZ = cr[WS(rs, 19)]; + T10 = TY - TZ; + T3k = TY + TZ; + } + { + E Tb, Tc, TV, TW; + Tb = cr[WS(rs, 9)]; + Tc = ci[0]; + Td = Tb + Tc; + T32 = Tb - Tc; + TV = ci[WS(rs, 15)]; + TW = cr[WS(rs, 14)]; + TX = TV - TW; + T31 = TV + TW; + } + Te = Ta + Td; + T4b = T3j - T3k; + T4m = T32 + T31; + TL = Ta - Td; + T11 = TX - T10; + T33 = T31 - T32; + T3l = T3j + T3k; + T2s = TX + T10; + } + { + E Tw, T3t, Tz, T3b, T1j, T3c, T1m, T3u; + { + E Tu, Tv, Tx, Ty; + Tu = ci[WS(rs, 7)]; + Tv = cr[WS(rs, 2)]; + Tw = Tu + Tv; + T3t = Tu - Tv; + Tx = ci[WS(rs, 2)]; + Ty = cr[WS(rs, 7)]; + Tz = Tx + Ty; + T3b = Tx - Ty; + } + { + E T1h, T1i, T1k, T1l; + T1h = ci[WS(rs, 17)]; + T1i = cr[WS(rs, 12)]; + T1j = T1h - T1i; + T3c = T1h + T1i; + T1k = ci[WS(rs, 12)]; + T1l = cr[WS(rs, 17)]; + T1m = T1k - T1l; + T3u = T1k + T1l; + } + TA = Tw + Tz; + T4f = T3t + T3u; + T4q = T3b - T3c; + TP = Tw - Tz; + T1n = T1j - T1m; + T3d = T3b + T3c; + T3v = T3t - T3u; + T2w = T1j + T1m; + } + { + E Th, T3m, T17, T3n, Tk, T34, T14, T35; + { + E Tf, Tg, T15, T16; + Tf = ci[WS(rs, 3)]; + Tg = cr[WS(rs, 6)]; + Th = Tf + Tg; + T3m = Tf - Tg; + T15 = ci[WS(rs, 18)]; + T16 = cr[WS(rs, 11)]; + T17 = T15 - T16; + T3n = T15 + T16; + } + { + E Ti, Tj, T12, T13; + Ti = cr[WS(rs, 1)]; + Tj = ci[WS(rs, 8)]; + Tk = Ti + Tj; + T34 = Ti - Tj; + T12 = ci[WS(rs, 13)]; + T13 = cr[WS(rs, 16)]; + T14 = T12 - T13; + T35 = T12 + T13; + } + Tl = Th + Tk; + T4c = T3m - T3n; + T4n = T34 - T35; + TM = Th - Tk; + T18 = T14 - T17; + T36 = T34 + T35; + T3o = T3m + T3n; + T2t = T14 + T17; + } + { + E Tp, T3q, T1f, T3r, Ts, T39, T1c, T38; + { + E Tn, To, T1d, T1e; + Tn = cr[WS(rs, 8)]; + To = ci[WS(rs, 1)]; + Tp = Tn + To; + T3q = Tn - To; + T1d = ci[WS(rs, 16)]; + T1e = cr[WS(rs, 13)]; + T1f = T1d - T1e; + T3r = T1d + T1e; + } + { + E Tq, Tr, T1a, T1b; + Tq = ci[WS(rs, 6)]; + Tr = cr[WS(rs, 3)]; + Ts = Tq + Tr; + T39 = Tq - Tr; + T1a = ci[WS(rs, 11)]; + T1b = cr[WS(rs, 18)]; + T1c = T1a - T1b; + T38 = T1a + T1b; + } + Tt = Tp + Ts; + T4e = T3q + T3r; + T4p = T39 + T38; + TO = Tp - Ts; + T1g = T1c - T1f; + T3a = T38 - T39; + T3s = T3q - T3r; + T2v = T1c + T1f; + } + T19 = T11 - T18; + T3L = T3l - T3o; + T3M = T3s - T3v; + T1o = T1g - T1n; + T2x = T2v - T2w; + T4C = T4e - T4f; + T4B = T4b - T4c; + T2u = T2s - T2t; + T1v = TO - TP; + T4r = T4p - T4q; + T4o = T4m - T4n; + T1u = TL - TM; + T2H = Te - Tl; + T37 = T33 + T36; + T2I = Tt - TA; + T3e = T3a + T3d; + T3p = T3l + T3o; + T3w = T3s + T3v; + T3x = T3p + T3w; + Tm = Te + Tl; + TB = Tt + TA; + TC = Tm + TB; + T4u = T4m + T4n; + T4v = T4p + T4q; + T4y = T4u + T4v; + T2A = T2s + T2t; + T2B = T2v + T2w; + T2E = T2A + T2B; + T1E = T11 + T18; + T1F = T1g + T1n; + T1G = T1E + T1F; + T4d = T4b + T4c; + T4g = T4e + T4f; + T4j = T4d + T4g; + T3F = T33 - T36; + T3G = T3a - T3d; + T3H = T3F + T3G; + TN = TL + TM; + TQ = TO + TP; + TR = TN + TQ; + } + cr[0] = T7 + TC; + ci[0] = T2D + T2E; + { + E T2k, T2o, T4T, T4U; + T2k = TK + TR; + T2o = T1D + T1G; + cr[WS(rs, 10)] = FNMS(T2n, T2o, T2j * T2k); + ci[WS(rs, 10)] = FMA(T2n, T2k, T2j * T2o); + T4T = T4i + T4j; + T4U = T4x + T4y; + cr[WS(rs, 5)] = FNMS(T2c, T4U, T29 * T4T); + ci[WS(rs, 5)] = FMA(T29, T4U, T2c * T4T); + } + T48 = T3i + T3x; + T4a = T3E + T3H; + cr[WS(rs, 15)] = FNMS(T49, T4a, T47 * T48); + ci[WS(rs, 15)] = FMA(T47, T4a, T49 * T48); + { + E T2y, T2J, T2V, T2R, T2G, T2U, T2r, T2Q; + T2y = FMA(KP951056516, T2u, KP587785252 * T2x); + T2J = FMA(KP951056516, T2H, KP587785252 * T2I); + T2V = FNMS(KP951056516, T2I, KP587785252 * T2H); + T2R = FNMS(KP951056516, T2x, KP587785252 * T2u); + { + E T2C, T2F, T2p, T2q; + T2C = KP559016994 * (T2A - T2B); + T2F = FNMS(KP250000000, T2E, T2D); + T2G = T2C + T2F; + T2U = T2F - T2C; + T2p = KP559016994 * (Tm - TB); + T2q = FNMS(KP250000000, TC, T7); + T2r = T2p + T2q; + T2Q = T2q - T2p; + } + { + E T2z, T2K, T2Y, T30; + T2z = T2r + T2y; + T2K = T2G - T2J; + cr[WS(rs, 4)] = FNMS(T27, T2K, T25 * T2z); + ci[WS(rs, 4)] = FMA(T27, T2z, T25 * T2K); + T2Y = T2Q - T2R; + T30 = T2V + T2U; + cr[WS(rs, 12)] = FNMS(T2Z, T30, T2X * T2Y); + ci[WS(rs, 12)] = FMA(T2Z, T2Y, T2X * T30); + } + { + E T2M, T2O, T2S, T2W; + T2M = T2r - T2y; + T2O = T2J + T2G; + cr[WS(rs, 16)] = FNMS(T2N, T2O, T2L * T2M); + ci[WS(rs, 16)] = FMA(T2N, T2M, T2L * T2O); + T2S = T2Q + T2R; + T2W = T2U - T2V; + cr[WS(rs, 8)] = FNMS(T2T, T2W, T2P * T2S); + ci[WS(rs, 8)] = FMA(T2T, T2S, T2P * T2W); + } + } + { + E T4s, T4D, T4N, T4I, T4A, T4M, T4l, T4J; + T4s = FMA(KP951056516, T4o, KP587785252 * T4r); + T4D = FMA(KP951056516, T4B, KP587785252 * T4C); + T4N = FNMS(KP951056516, T4C, KP587785252 * T4B); + T4I = FNMS(KP951056516, T4r, KP587785252 * T4o); + { + E T4w, T4z, T4h, T4k; + T4w = KP559016994 * (T4u - T4v); + T4z = FNMS(KP250000000, T4y, T4x); + T4A = T4w + T4z; + T4M = T4z - T4w; + T4h = KP559016994 * (T4d - T4g); + T4k = FNMS(KP250000000, T4j, T4i); + T4l = T4h + T4k; + T4J = T4k - T4h; + } + { + E T4t, T4E, T4Q, T4S; + T4t = T4l - T4s; + T4E = T4A + T4D; + cr[WS(rs, 1)] = FNMS(TG, T4E, TD * T4t); + ci[WS(rs, 1)] = FMA(TD, T4E, TG * T4t); + T4Q = T4J - T4I; + T4S = T4M + T4N; + cr[WS(rs, 17)] = FNMS(T4R, T4S, T4P * T4Q); + ci[WS(rs, 17)] = FMA(T4P, T4S, T4R * T4Q); + } + { + E T4F, T4G, T4K, T4O; + T4F = T4s + T4l; + T4G = T4A - T4D; + cr[WS(rs, 9)] = FNMS(T1T, T4G, T1R * T4F); + ci[WS(rs, 9)] = FMA(T1R, T4G, T1T * T4F); + T4K = T4I + T4J; + T4O = T4M - T4N; + cr[WS(rs, 13)] = FNMS(T4L, T4O, T4H * T4K); + ci[WS(rs, 13)] = FMA(T4H, T4O, T4L * T4K); + } + } + { + E T1p, T1w, T22, T1X, T1J, T23, TU, T1W; + T1p = FNMS(KP951056516, T1o, KP587785252 * T19); + T1w = FNMS(KP951056516, T1v, KP587785252 * T1u); + T22 = FMA(KP951056516, T1u, KP587785252 * T1v); + T1X = FMA(KP951056516, T19, KP587785252 * T1o); + { + E T1H, T1I, TS, TT; + T1H = FNMS(KP250000000, T1G, T1D); + T1I = KP559016994 * (T1E - T1F); + T1J = T1H - T1I; + T23 = T1I + T1H; + TS = FNMS(KP250000000, TR, TK); + TT = KP559016994 * (TN - TQ); + TU = TS - TT; + T1W = TT + TS; + } + { + E T1q, T1K, T2e, T2g; + T1q = TU - T1p; + T1K = T1w + T1J; + cr[WS(rs, 2)] = FNMS(T1t, T1K, TJ * T1q); + ci[WS(rs, 2)] = FMA(T1t, T1q, TJ * T1K); + T2e = T1W + T1X; + T2g = T23 - T22; + cr[WS(rs, 14)] = FNMS(T2f, T2g, T2d * T2e); + ci[WS(rs, 14)] = FMA(T2f, T2e, T2d * T2g); + } + { + E T1O, T1Q, T1Y, T24; + T1O = TU + T1p; + T1Q = T1J - T1w; + cr[WS(rs, 18)] = FNMS(T1P, T1Q, T1N * T1O); + ci[WS(rs, 18)] = FMA(T1P, T1O, T1N * T1Q); + T1Y = T1W - T1X; + T24 = T22 + T23; + cr[WS(rs, 6)] = FNMS(T21, T24, T1V * T1Y); + ci[WS(rs, 6)] = FMA(T21, T1Y, T1V * T24); + } + } + { + E T3f, T3N, T43, T3Z, T3K, T42, T3A, T3Y; + T3f = FNMS(KP951056516, T3e, KP587785252 * T37); + T3N = FNMS(KP951056516, T3M, KP587785252 * T3L); + T43 = FMA(KP951056516, T3L, KP587785252 * T3M); + T3Z = FMA(KP951056516, T37, KP587785252 * T3e); + { + E T3I, T3J, T3y, T3z; + T3I = FNMS(KP250000000, T3H, T3E); + T3J = KP559016994 * (T3F - T3G); + T3K = T3I - T3J; + T42 = T3J + T3I; + T3y = FNMS(KP250000000, T3x, T3i); + T3z = KP559016994 * (T3p - T3w); + T3A = T3y - T3z; + T3Y = T3z + T3y; + } + { + E T3B, T3O, T45, T46; + T3B = T3f + T3A; + T3O = T3K - T3N; + cr[WS(rs, 3)] = FNMS(TH, T3O, TE * T3B); + ci[WS(rs, 3)] = FMA(TE, T3O, TH * T3B); + T45 = T3Z + T3Y; + T46 = T42 - T43; + cr[WS(rs, 19)] = FNMS(T1M, T46, T1L * T45); + ci[WS(rs, 19)] = FMA(T1L, T46, T1M * T45); + } + { + E T3S, T3W, T40, T44; + T3S = T3A - T3f; + T3W = T3K + T3N; + cr[WS(rs, 7)] = FNMS(T3V, T3W, T3R * T3S); + ci[WS(rs, 7)] = FMA(T3R, T3W, T3V * T3S); + T40 = T3Y - T3Z; + T44 = T42 + T43; + cr[WS(rs, 11)] = FNMS(T41, T44, T3X * T40); + ci[WS(rs, 11)] = FMA(T3X, T44, T41 * T40); + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 19}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 20, "hb2_20", twinstr, &GENUS, {204, 92, 72, 0} }; + +void X(codelet_hb2_20) (planner *p) { + X(khc2hc_register) (p, hb2_20, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb2_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb2_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1682 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:30 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 25 -dif -name hb2_25 -include hb.h */ + +/* + * This function contains 440 FP additions, 434 FP multiplications, + * (or, 84 additions, 78 multiplications, 356 fused multiply/add), + * 234 stack variables, 47 constants, and 100 memory accesses + */ +#include "hb.h" + +static void hb2_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP833417178, +0.833417178328688677408962550243238843138996060); + DK(KP921177326, +0.921177326965143320250447435415066029359282231); + DK(KP541454447, +0.541454447536312777046285590082819509052033189); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP242145790, +0.242145790282157779872542093866183953459003101); + DK(KP904730450, +0.904730450839922351881287709692877908104763647); + DK(KP803003575, +0.803003575438660414833440593570376004635464850); + DK(KP554608978, +0.554608978404018097464974850792216217022558774); + DK(KP683113946, +0.683113946453479238701949862233725244439656928); + DK(KP559154169, +0.559154169276087864842202529084232643714075927); + DK(KP248028675, +0.248028675328619457762448260696444630363259177); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP831864738, +0.831864738706457140726048799369896829771167132); + DK(KP871714437, +0.871714437527667770979999223229522602943903653); + DK(KP851038619, +0.851038619207379630836264138867114231259902550); + DK(KP943557151, +0.943557151597354104399655195398983005179443399); + DK(KP726211448, +0.726211448929902658173535992263577167607493062); + DK(KP525970792, +0.525970792408939708442463226536226366643874659); + DK(KP912018591, +0.912018591466481957908415381764119056233607330); + DK(KP912575812, +0.912575812670962425556968549836277086778922727); + DK(KP994076283, +0.994076283785401014123185814696322018529298887); + DK(KP614372930, +0.614372930789563808870829930444362096004872855); + DK(KP621716863, +0.621716863012209892444754556304102309693593202); + DK(KP860541664, +0.860541664367944677098261680920518816412804187); + DK(KP949179823, +0.949179823508441261575555465843363271711583843); + DK(KP557913902, +0.557913902031834264187699648465567037992437152); + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP249506682, +0.249506682107067890488084201715862638334226305); + DK(KP772036680, +0.772036680810363904029489473607579825330539880); + DK(KP906616052, +0.906616052148196230441134447086066874408359177); + DK(KP734762448, +0.734762448793050413546343770063151342619912334); + DK(KP560319534, +0.560319534973832390111614715371676131169633784); + DK(KP681693190, +0.681693190061530575150324149145440022633095390); + DK(KP845997307, +0.845997307939530944175097360758058292389769300); + DK(KP968479752, +0.968479752739016373193524836781420152702090879); + DK(KP062914667, +0.062914667253649757225485955897349402364686947); + DK(KP827271945, +0.827271945972475634034355757144307982555673741); + DK(KP470564281, +0.470564281212251493087595091036643380879947982); + DK(KP126329378, +0.126329378446108174786050455341811215027378105); + DK(KP256756360, +0.256756360367726783319498520922669048172391148); + DK(KP634619297, +0.634619297544148100711287640319130485732531031); + DK(KP549754652, +0.549754652192770074288023275540779861653779767); + DK(KP939062505, +0.939062505817492352556001843133229685779824606); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) { + E TN, TQ, T4e, T2y, T4i, T3U, T4u, T4o, T4G, T4C, T2F, T41, T3Q, T4q, T3a; + E T3F, T4a, T4w, T46, T44; + { + E TT, TO, TR, T23, T2d, T2x, TP, TV, T2p, T85, T4d, T25, TX; + TN = W[0]; + TT = W[4]; + TO = W[2]; + TR = W[3]; + T23 = W[6]; + T2d = TN * TT; + T2x = TO * TT; + TP = TN * TO; + TV = TN * TR; + T2p = TT * T23; + T85 = TN * T23; + T4d = TO * T23; + T25 = W[7]; + TQ = W[1]; + TX = W[5]; + { + E T86, T4n, TW, T4l, TS, T71, T2q, T4z, T2e, T8a, T2u, T76, T2k, T4B, T6E; + E T6U, T6Y, T5T, T8i, T1I, T2a, T26, TY, T8d, T8s, T8o, T5C, T5w, T7g, T7c; + E T5M, T5I, T9, T40, T1R, T3X, T6H, T7F, T5W, T7n, T4N, T68, T1S, T1k, T1T; + E T1D, T1Y, T1Z, T10, TM, T7K, T7A, T6p, T6w, T4X, T56, T3K, T2U, T7x, T7J; + E T6v, T6i, T50, T57, T3L, T39, T4Q, T59, T3O, T3E, T67, T7t, T7H, T6y, T63; + E T4T, T5a, T3N, T3p, T66, T7o; + { + E T2A, T2z, T6G, T2E, T5V, T6F; + { + E T1, T1J, T3Y, T3Z, T8, T2C, T1M, T1P, T2D, T4h, T89, T2t, T3W, T1Q, T3V; + T1 = cr[0]; + T4e = FMA(TR, T25, T4d); + T4h = TO * T25; + T89 = TN * T25; + T2t = TT * T25; + T86 = FMA(TQ, T25, T85); + T4n = FNMS(TQ, TO, TV); + TW = FMA(TQ, TO, TV); + T4l = FMA(TQ, TR, TP); + TS = FNMS(TQ, TR, TP); + T71 = FNMS(TR, TX, T2x); + T2y = FMA(TR, TX, T2x); + T2q = FMA(TX, T25, T2p); + T4z = FMA(TQ, TX, T2d); + T2e = FNMS(TQ, TX, T2d); + { + E T3T, T2j, T4t, T6T; + T3T = TO * TX; + T2j = TN * TX; + T4i = FNMS(TR, T23, T4h); + T8a = FNMS(TQ, T23, T89); + T2u = FNMS(TX, T23, T2t); + T4t = T4l * TX; + T6T = T4l * T23; + { + E T6X, T4m, T1H, T29; + T6X = T4l * T25; + T4m = T4l * TT; + T1H = TS * TX; + T29 = TS * T25; + { + E T24, TU, T4F, T4A; + T24 = TS * T23; + TU = TS * TT; + T4F = T4z * T25; + T4A = T4z * T23; + { + E T8r, T8n, T5B, T5v; + T8r = T2y * T25; + T8n = T2y * T23; + T5B = T2e * T25; + T5v = T2e * T23; + T3U = FNMS(TR, TT, T3T); + T76 = FMA(TR, TT, T3T); + T2k = FMA(TQ, TT, T2j); + T4B = FNMS(TQ, TT, T2j); + T4u = FMA(T4n, TT, T4t); + T6E = FNMS(T4n, TT, T4t); + T6U = FMA(T4n, T25, T6T); + T6Y = FNMS(T4n, T23, T6X); + T5T = FMA(T4n, TX, T4m); + T4o = FNMS(T4n, TX, T4m); + T8i = FMA(TW, TT, T1H); + T1I = FNMS(TW, TT, T1H); + T2a = FNMS(TW, T23, T29); + T26 = FMA(TW, T25, T24); + TY = FMA(TW, TX, TU); + T8d = FNMS(TW, TX, TU); + T8s = FNMS(T3U, T23, T8r); + T8o = FMA(T3U, T25, T8n); + T5C = FNMS(T2k, T23, T5B); + T5w = FMA(T2k, T25, T5v); + T4G = FNMS(T4B, T23, T4F); + T4C = FMA(T4B, T25, T4A); + { + E T7f, T7b, T5L, T5H; + T7f = T5T * T25; + T7b = T5T * T23; + T5L = TY * T25; + T5H = TY * T23; + T7g = FNMS(T6E, T23, T7f); + T7c = FMA(T6E, T25, T7b); + T5M = FNMS(T1I, T23, T5L); + T5I = FMA(T1I, T25, T5H); + T1J = ci[WS(rs, 24)]; + } + } + } + } + } + { + E T2, T3, T5, T6; + T2 = cr[WS(rs, 5)]; + T3 = ci[WS(rs, 4)]; + T5 = cr[WS(rs, 10)]; + T6 = ci[WS(rs, 9)]; + { + E T1K, T4, T7, T1L, T1N, T1O; + T1K = ci[WS(rs, 19)]; + T3Y = T2 - T3; + T4 = T2 + T3; + T3Z = T5 - T6; + T7 = T5 + T6; + T1L = cr[WS(rs, 20)]; + T1N = ci[WS(rs, 14)]; + T1O = cr[WS(rs, 15)]; + T8 = T4 + T7; + T2A = T4 - T7; + T2C = T1K + T1L; + T1M = T1K - T1L; + T1P = T1N - T1O; + T2D = T1N + T1O; + } + } + T2z = FNMS(KP250000000, T8, T1); + T9 = T1 + T8; + T3W = T1M - T1P; + T1Q = T1M + T1P; + T40 = FMA(KP618033988, T3Z, T3Y); + T6G = FNMS(KP618033988, T3Y, T3Z); + T2E = FMA(KP618033988, T2D, T2C); + T5V = FNMS(KP618033988, T2C, T2D); + T1R = T1J + T1Q; + T3V = FNMS(KP250000000, T1Q, T1J); + T6F = FNMS(KP559016994, T3W, T3V); + T3X = FMA(KP559016994, T3W, T3V); + } + { + E T2S, T6n, T2H, T2G, Ti, T5Y, T3C, T3r, TK, T3q, T30, T6d, T33, Tr, T32; + E T3v, T61, T3y, T1C, T3x, T2L, T6k, T2O, T1a, T2N, T6g, T37, T2W, Tt, T1j; + E T2V, Tx, T3g, T3j, Tw, T3l, T1t, T3i, Ty; + { + E T1u, T1v, T1A, T3u, T1w; + { + E TC, TI, T3B, TD, TE; + { + E Ta, Te, Tf, Tb, Tc, T5U, T2B, T2R, Tg; + Ta = cr[WS(rs, 1)]; + T5U = FNMS(KP559016994, T2A, T2z); + T2B = FMA(KP559016994, T2A, T2z); + T6H = FNMS(KP951056516, T6G, T6F); + T7F = FMA(KP951056516, T6G, T6F); + Te = cr[WS(rs, 11)]; + T5W = FMA(KP951056516, T5V, T5U); + T7n = FNMS(KP951056516, T5V, T5U); + T4N = FMA(KP951056516, T2E, T2B); + T2F = FNMS(KP951056516, T2E, T2B); + Tf = ci[WS(rs, 8)]; + Tb = cr[WS(rs, 6)]; + Tc = ci[WS(rs, 3)]; + TC = cr[WS(rs, 3)]; + T2R = Tf - Te; + Tg = Te + Tf; + { + E T2Q, Td, Th, TG, TH; + T2Q = Tb - Tc; + Td = Tb + Tc; + TG = ci[WS(rs, 11)]; + TH = ci[WS(rs, 6)]; + T2S = FNMS(KP618033988, T2R, T2Q); + T6n = FMA(KP618033988, T2Q, T2R); + Th = Td + Tg; + T2H = Td - Tg; + TI = TG + TH; + T3B = TG - TH; + T2G = FNMS(KP250000000, Th, Ta); + Ti = Ta + Th; + TD = cr[WS(rs, 8)]; + TE = ci[WS(rs, 1)]; + } + } + { + E Tj, Tk, Tp, T2Z, TJ, Tl; + Tj = cr[WS(rs, 4)]; + { + E Tn, To, T3A, TF; + Tn = ci[WS(rs, 10)]; + To = ci[WS(rs, 5)]; + T3A = TD - TE; + TF = TD + TE; + Tk = cr[WS(rs, 9)]; + Tp = Tn + To; + T2Z = To - Tn; + T5Y = FNMS(KP618033988, T3A, T3B); + T3C = FMA(KP618033988, T3B, T3A); + T3r = TI - TF; + TJ = TF + TI; + Tl = ci[0]; + } + T1u = ci[WS(rs, 21)]; + TK = TC + TJ; + T3q = FNMS(KP250000000, TJ, TC); + { + E T1y, Tm, T2Y, T1z, Tq; + T1y = cr[WS(rs, 13)]; + Tm = Tk + Tl; + T2Y = Tl - Tk; + T1z = cr[WS(rs, 18)]; + T1v = ci[WS(rs, 16)]; + T30 = FMA(KP618033988, T2Z, T2Y); + T6d = FNMS(KP618033988, T2Y, T2Z); + T33 = Tm - Tp; + Tq = Tm + Tp; + T1A = T1y + T1z; + T3u = T1z - T1y; + Tr = Tj + Tq; + T32 = FMS(KP250000000, Tq, Tj); + T1w = cr[WS(rs, 23)]; + } + } + } + { + E T1b, T1c, T1h, T36, T1d; + { + E T12, T13, T18, T2K, T1B, T14; + T12 = ci[WS(rs, 23)]; + { + E T16, T17, T3t, T1x; + T16 = ci[WS(rs, 13)]; + T17 = cr[WS(rs, 16)]; + T3t = T1v + T1w; + T1x = T1v - T1w; + T13 = ci[WS(rs, 18)]; + T18 = T16 - T17; + T2K = T16 + T17; + T3v = FMA(KP618033988, T3u, T3t); + T61 = FNMS(KP618033988, T3t, T3u); + T3y = T1x + T1A; + T1B = T1x - T1A; + T14 = cr[WS(rs, 21)]; + } + T1b = ci[WS(rs, 20)]; + T1C = T1u + T1B; + T3x = FMS(KP250000000, T1B, T1u); + { + E T1f, T15, T2J, T1g, T19; + T1f = cr[WS(rs, 14)]; + T15 = T13 - T14; + T2J = T13 + T14; + T1g = cr[WS(rs, 19)]; + T1c = ci[WS(rs, 15)]; + T2L = FMA(KP618033988, T2K, T2J); + T6k = FNMS(KP618033988, T2J, T2K); + T2O = T15 - T18; + T19 = T15 + T18; + T1h = T1f + T1g; + T36 = T1g - T1f; + T1a = T12 + T19; + T2N = FNMS(KP250000000, T19, T12); + T1d = cr[WS(rs, 24)]; + } + } + { + E T1l, T1p, T1o, T3e, T1i, T1q; + T1l = ci[WS(rs, 22)]; + { + E T1m, T1n, T35, T1e; + T1m = ci[WS(rs, 17)]; + T1n = cr[WS(rs, 22)]; + T35 = T1c + T1d; + T1e = T1c - T1d; + T1p = ci[WS(rs, 12)]; + T1o = T1m - T1n; + T3e = T1m + T1n; + T6g = FNMS(KP618033988, T35, T36); + T37 = FMA(KP618033988, T36, T35); + T2W = T1e + T1h; + T1i = T1e - T1h; + T1q = cr[WS(rs, 17)]; + } + Tt = cr[WS(rs, 2)]; + T1j = T1b + T1i; + T2V = FMS(KP250000000, T1i, T1b); + { + E Tu, T1r, T3f, Tv, T1s; + Tu = cr[WS(rs, 7)]; + T1r = T1p - T1q; + T3f = T1p + T1q; + Tv = ci[WS(rs, 2)]; + Tx = cr[WS(rs, 12)]; + T3g = FMA(KP618033988, T3f, T3e); + T68 = FNMS(KP618033988, T3e, T3f); + T3j = T1o - T1r; + T1s = T1o + T1r; + Tw = Tu + Tv; + T3l = Tu - Tv; + T1t = T1l + T1s; + T3i = FMS(KP250000000, T1s, T1l); + Ty = ci[WS(rs, 7)]; + } + } + } + } + { + E T3n, T65, T3c, T3b, T2P, T2M, T4W; + { + E TA, T3m, Tz, TB, Ts; + T3m = Ty - Tx; + Tz = Tx + Ty; + T1S = T1a + T1j; + T1k = T1a - T1j; + T3n = FNMS(KP618033988, T3m, T3l); + T65 = FMA(KP618033988, T3l, T3m); + TA = Tw + Tz; + T3c = Tz - Tw; + T3b = FNMS(KP250000000, TA, Tt); + TB = Tt + TA; + T1T = T1t + T1C; + T1D = T1t - T1C; + T1Y = Ti - Tr; + Ts = Ti + Tr; + { + E T2I, T6j, T6m, TL; + T2I = FMA(KP559016994, T2H, T2G); + T6j = FNMS(KP559016994, T2H, T2G); + T6m = FNMS(KP559016994, T2O, T2N); + T2P = FMA(KP559016994, T2O, T2N); + TL = TB + TK; + T1Z = TB - TK; + { + E T6l, T7y, T6o, T7z; + T6l = FMA(KP951056516, T6k, T6j); + T7y = FNMS(KP951056516, T6k, T6j); + T6o = FMA(KP951056516, T6n, T6m); + T7z = FNMS(KP951056516, T6n, T6m); + T10 = Ts - TL; + TM = Ts + TL; + T2M = FNMS(KP951056516, T2L, T2I); + T4W = FMA(KP951056516, T2L, T2I); + T7K = FMA(KP939062505, T7y, T7z); + T7A = FNMS(KP939062505, T7z, T7y); + T6p = FNMS(KP549754652, T6o, T6l); + T6w = FMA(KP549754652, T6l, T6o); + } + } + } + { + E T34, T31, T4Y, T60, T3s, T3z, T5X; + { + E T2X, T6c, T6f, T4V, T2T; + T2X = FNMS(KP559016994, T2W, T2V); + T6c = FMA(KP559016994, T2W, T2V); + T6f = FMA(KP559016994, T33, T32); + T34 = FNMS(KP559016994, T33, T32); + T4V = FNMS(KP951056516, T2S, T2P); + T2T = FMA(KP951056516, T2S, T2P); + { + E T7w, T6e, T7v, T6h; + T7w = FMA(KP951056516, T6d, T6c); + T6e = FNMS(KP951056516, T6d, T6c); + T7v = FMA(KP951056516, T6g, T6f); + T6h = FNMS(KP951056516, T6g, T6f); + T4X = FNMS(KP634619297, T4W, T4V); + T56 = FMA(KP634619297, T4V, T4W); + T3K = FMA(KP256756360, T2M, T2T); + T2U = FNMS(KP256756360, T2T, T2M); + T7x = FMA(KP126329378, T7w, T7v); + T7J = FNMS(KP126329378, T7v, T7w); + T6v = FNMS(KP470564281, T6e, T6h); + T6i = FMA(KP470564281, T6h, T6e); + T31 = FMA(KP951056516, T30, T2X); + T4Y = FNMS(KP951056516, T30, T2X); + } + T60 = FMA(KP559016994, T3r, T3q); + T3s = FNMS(KP559016994, T3r, T3q); + T3z = FNMS(KP559016994, T3y, T3x); + T5X = FMA(KP559016994, T3y, T3x); + } + { + E T5Z, T7r, T4Z, T38; + T4Z = FNMS(KP951056516, T37, T34); + T38 = FMA(KP951056516, T37, T34); + { + E T4O, T3w, T4P, T3D; + T4O = FMA(KP951056516, T3v, T3s); + T3w = FNMS(KP951056516, T3v, T3s); + T4P = FMA(KP951056516, T3C, T3z); + T3D = FNMS(KP951056516, T3C, T3z); + T50 = FNMS(KP827271945, T4Z, T4Y); + T57 = FMA(KP827271945, T4Y, T4Z); + T3L = FMA(KP634619297, T31, T38); + T39 = FNMS(KP634619297, T38, T31); + T4Q = FNMS(KP126329378, T4P, T4O); + T59 = FMA(KP126329378, T4O, T4P); + T3O = FNMS(KP939062505, T3w, T3D); + T3E = FMA(KP939062505, T3D, T3w); + T5Z = FMA(KP951056516, T5Y, T5X); + T7r = FNMS(KP951056516, T5Y, T5X); + } + { + E T3d, T3k, T64, T7s, T62; + T67 = FMA(KP559016994, T3c, T3b); + T3d = FNMS(KP559016994, T3c, T3b); + T3k = FNMS(KP559016994, T3j, T3i); + T64 = FMA(KP559016994, T3j, T3i); + T7s = FNMS(KP951056516, T61, T60); + T62 = FMA(KP951056516, T61, T60); + { + E T4S, T3h, T4R, T3o; + T4S = FMA(KP951056516, T3g, T3d); + T3h = FNMS(KP951056516, T3g, T3d); + T4R = FMA(KP951056516, T3n, T3k); + T3o = FNMS(KP951056516, T3n, T3k); + T7t = FNMS(KP827271945, T7s, T7r); + T7H = FMA(KP827271945, T7r, T7s); + T6y = FNMS(KP062914667, T5Z, T62); + T63 = FMA(KP062914667, T62, T5Z); + T4T = FNMS(KP470564281, T4S, T4R); + T5a = FMA(KP470564281, T4R, T4S); + T3N = FNMS(KP549754652, T3h, T3o); + T3p = FMA(KP549754652, T3o, T3h); + T66 = FNMS(KP951056516, T65, T64); + T7o = FMA(KP951056516, T65, T64); + } + } + } + } + } + } + } + { + E T7q, T7G, T6J, T6I, T6q, T6b, T6B, T73, T6Q, T78, T6z, T6a; + cr[0] = T9 + TM; + { + E T1U, T2l, T1X, T2g, T1E, TZ, T2m, T20, T2v, T2n; + { + E T1W, T7p, T69, T1V; + T1W = T1S - T1T; + T1U = T1S + T1T; + T7p = FNMS(KP951056516, T68, T67); + T69 = FMA(KP951056516, T68, T67); + T1V = FNMS(KP250000000, T1U, T1R); + T7q = FMA(KP062914667, T7p, T7o); + T7G = FNMS(KP062914667, T7o, T7p); + T6z = FNMS(KP634619297, T66, T69); + T6a = FMA(KP634619297, T69, T66); + T2l = FNMS(KP559016994, T1W, T1V); + T1X = FMA(KP559016994, T1W, T1V); + T2g = FNMS(KP618033988, T1k, T1D); + T1E = FMA(KP618033988, T1D, T1k); + TZ = FNMS(KP250000000, TM, T9); + T2m = FNMS(KP618033988, T1Y, T1Z); + T20 = FMA(KP618033988, T1Z, T1Y); + } + ci[0] = T1R + T1U; + T2v = FMA(KP951056516, T2m, T2l); + T2n = FNMS(KP951056516, T2m, T2l); + { + E T2b, T21, T2f, T11; + T2b = FNMS(KP951056516, T20, T1X); + T21 = FMA(KP951056516, T20, T1X); + T2f = FNMS(KP559016994, T10, TZ); + T11 = FMA(KP559016994, T10, TZ); + { + E T2h, T2r, T27, T1F; + T2h = FMA(KP951056516, T2g, T2f); + T2r = FNMS(KP951056516, T2g, T2f); + T27 = FMA(KP951056516, T1E, T11); + T1F = FNMS(KP951056516, T1E, T11); + { + E T2o, T2i, T2w, T2s; + T2o = T2k * T2h; + T2i = T2e * T2h; + T2w = T2u * T2r; + T2s = T2q * T2r; + { + E T2c, T28, T22, T1G; + T2c = T2a * T27; + T28 = T26 * T27; + T22 = T1I * T1F; + T1G = TY * T1F; + ci[WS(rs, 15)] = FMA(T2q, T2v, T2w); + cr[WS(rs, 15)] = FNMS(T2u, T2v, T2s); + ci[WS(rs, 20)] = FMA(T26, T2b, T2c); + cr[WS(rs, 20)] = FNMS(T2a, T2b, T28); + ci[WS(rs, 5)] = FMA(TY, T21, T22); + cr[WS(rs, 5)] = FNMS(T1I, T21, T1G); + cr[WS(rs, 10)] = FNMS(T2k, T2n, T2i); + ci[WS(rs, 10)] = FMA(T2e, T2n, T2o); + } + } + } + } + } + { + E T6x, T6A, T6O, T6P; + T6x = FMA(KP968479752, T6w, T6v); + T6J = FNMS(KP968479752, T6w, T6v); + T6I = FMA(KP845997307, T6z, T6y); + T6A = FNMS(KP845997307, T6z, T6y); + T6O = FNMS(KP968479752, T6p, T6i); + T6q = FMA(KP968479752, T6p, T6i); + T6b = FMA(KP845997307, T6a, T63); + T6P = FNMS(KP845997307, T6a, T63); + T6B = FNMS(KP681693190, T6A, T6x); + T73 = FMA(KP560319534, T6x, T6A); + T6Q = FMA(KP681693190, T6P, T6O); + T78 = FNMS(KP560319534, T6O, T6P); + } + { + E T7U, T8f, T7B, T7u, T82, T8k, T7Y, T7M; + { + E T7L, T7I, T80, T81; + { + E T7S, T6r, T6t, T6K, T6M, T7T, T6s, T7j; + T7S = FNMS(KP734762448, T7K, T7J); + T7L = FMA(KP734762448, T7K, T7J); + T6r = FMA(KP906616052, T6q, T6b); + T6t = FNMS(KP906616052, T6q, T6b); + T6K = FNMS(KP906616052, T6J, T6I); + T6M = FMA(KP906616052, T6J, T6I); + T7I = FMA(KP772036680, T7H, T7G); + T7T = FNMS(KP772036680, T7H, T7G); + T6s = FNMS(KP249506682, T6r, T5W); + T7j = FMA(KP998026728, T6r, T5W); + { + E T6L, T7l, T72, T6u; + T6L = FNMS(KP249506682, T6K, T6H); + T7l = FMA(KP998026728, T6K, T6H); + T72 = FMA(KP557913902, T6t, T6s); + T6u = FNMS(KP557913902, T6t, T6s); + { + E T7k, T6N, T77, T7m; + T7k = T4l * T7j; + T6N = FNMS(KP557913902, T6M, T6L); + T77 = FMA(KP557913902, T6M, T6L); + T7m = T4l * T7l; + { + E T74, T7d, T6V, T6C; + T74 = FNMS(KP949179823, T73, T72); + T7d = FMA(KP949179823, T73, T72); + T6V = FMA(KP860541664, T6B, T6u); + T6C = FNMS(KP860541664, T6B, T6u); + cr[WS(rs, 2)] = FNMS(T4n, T7l, T7k); + { + E T7h, T79, T6R, T6Z; + T7h = FNMS(KP949179823, T78, T77); + T79 = FMA(KP949179823, T78, T77); + T6R = FNMS(KP860541664, T6Q, T6N); + T6Z = FMA(KP860541664, T6Q, T6N); + ci[WS(rs, 2)] = FMA(T4n, T7j, T7m); + { + E T75, T7e, T6W, T6D; + T75 = T71 * T74; + T7e = T7c * T7d; + T6W = T6U * T6V; + T6D = T5T * T6C; + { + E T7a, T7i, T70, T6S; + T7a = T71 * T79; + T7i = T7c * T7h; + T70 = T6U * T6Z; + T6S = T5T * T6R; + cr[WS(rs, 12)] = FNMS(T76, T79, T75); + cr[WS(rs, 17)] = FNMS(T7g, T7h, T7e); + cr[WS(rs, 22)] = FNMS(T6Y, T6Z, T6W); + cr[WS(rs, 7)] = FNMS(T6E, T6R, T6D); + ci[WS(rs, 12)] = FMA(T76, T74, T7a); + ci[WS(rs, 17)] = FMA(T7g, T7d, T7i); + ci[WS(rs, 22)] = FMA(T6Y, T6V, T70); + ci[WS(rs, 7)] = FMA(T6E, T6C, T6S); + T7U = FNMS(KP621716863, T7T, T7S); + T8f = FMA(KP614372930, T7S, T7T); + } + } + } + } + } + } + } + T80 = FNMS(KP734762448, T7A, T7x); + T7B = FMA(KP734762448, T7A, T7x); + T7u = FMA(KP772036680, T7t, T7q); + T81 = FNMS(KP772036680, T7t, T7q); + T82 = FNMS(KP621716863, T81, T80); + T8k = FMA(KP614372930, T80, T81); + T7Y = FNMS(KP994076283, T7L, T7I); + T7M = FMA(KP994076283, T7L, T7I); + } + { + E T5y, T5c, T51, T4U, T5f, T5E, T5o, T5i, T5k; + { + E T5h, T5g, T5m, T5n, T58, T5b; + T5h = FMA(KP912575812, T57, T56); + T58 = FNMS(KP912575812, T57, T56); + T5b = FNMS(KP912018591, T5a, T59); + T5g = FMA(KP912018591, T5a, T59); + { + E T7X, T7N, T7C, T7Q; + T7X = FNMS(KP249506682, T7M, T7F); + T7N = FMA(KP998026728, T7M, T7F); + T7C = FMA(KP994076283, T7B, T7u); + T7Q = FNMS(KP994076283, T7B, T7u); + T5y = FMA(KP525970792, T58, T5b); + T5c = FNMS(KP726211448, T5b, T58); + { + E T7Z, T8j, T7P, T7D; + T7Z = FNMS(KP557913902, T7Y, T7X); + T8j = FMA(KP557913902, T7Y, T7X); + T7P = FNMS(KP249506682, T7C, T7n); + T7D = FMA(KP998026728, T7C, T7n); + { + E T8b, T83, T8t, T8l; + T8b = FMA(KP943557151, T82, T7Z); + T83 = FNMS(KP943557151, T82, T7Z); + T8t = FMA(KP949179823, T8k, T8j); + T8l = FNMS(KP949179823, T8k, T8j); + { + E T8e, T7R, T7O, T7E; + T8e = FMA(KP557913902, T7Q, T7P); + T7R = FNMS(KP557913902, T7Q, T7P); + T7O = TR * T7D; + T7E = TO * T7D; + { + E T8g, T8p, T7V, T87; + T8g = FMA(KP949179823, T8f, T8e); + T8p = FNMS(KP949179823, T8f, T8e); + T7V = FMA(KP943557151, T7U, T7R); + T87 = FNMS(KP943557151, T7U, T7R); + ci[WS(rs, 3)] = FMA(TO, T7N, T7O); + cr[WS(rs, 3)] = FNMS(TR, T7N, T7E); + { + E T8m, T8h, T8u, T8q; + T8m = T8i * T8g; + T8h = T8d * T8g; + T8u = T8s * T8p; + T8q = T8o * T8p; + { + E T84, T7W, T8c, T88; + T84 = T4B * T7V; + T7W = T4z * T7V; + T8c = T8a * T87; + T88 = T86 * T87; + ci[WS(rs, 13)] = FMA(T8d, T8l, T8m); + cr[WS(rs, 13)] = FNMS(T8i, T8l, T8h); + ci[WS(rs, 18)] = FMA(T8o, T8t, T8u); + cr[WS(rs, 18)] = FNMS(T8s, T8t, T8q); + ci[WS(rs, 8)] = FMA(T4z, T83, T84); + cr[WS(rs, 8)] = FNMS(T4B, T83, T7W); + ci[WS(rs, 23)] = FMA(T86, T8b, T8c); + cr[WS(rs, 23)] = FNMS(T8a, T8b, T88); + } + } + } + } + } + } + } + T51 = FMA(KP912575812, T50, T4X); + T5m = FNMS(KP912575812, T50, T4X); + T5n = FMA(KP912018591, T4T, T4Q); + T4U = FNMS(KP912018591, T4T, T4Q); + T41 = FMA(KP951056516, T40, T3X); + T5f = FNMS(KP951056516, T40, T3X); + T5E = FMA(KP525970792, T5m, T5n); + T5o = FNMS(KP726211448, T5n, T5m); + T5i = FMA(KP851038619, T5h, T5g); + T5k = FNMS(KP851038619, T5h, T5g); + } + { + E T42, T43, T48, T49, T3M, T3P; + T3M = FMA(KP871714437, T3L, T3K); + T42 = FNMS(KP871714437, T3L, T3K); + T43 = FMA(KP831864738, T3O, T3N); + T3P = FNMS(KP831864738, T3O, T3N); + { + E T5R, T5j, T54, T52; + T5R = FMA(KP992114701, T5i, T5f); + T5j = FNMS(KP248028675, T5i, T5f); + T54 = FNMS(KP851038619, T51, T4U); + T52 = FMA(KP851038619, T51, T4U); + T3Q = FNMS(KP559154169, T3P, T3M); + T4q = FMA(KP683113946, T3M, T3P); + { + E T5D, T5l, T5P, T53; + T5D = FMA(KP554608978, T5k, T5j); + T5l = FNMS(KP554608978, T5k, T5j); + T5P = FNMS(KP992114701, T52, T4N); + T53 = FMA(KP248028675, T52, T4N); + { + E T5p, T5t, T5F, T5N; + T5p = FNMS(KP803003575, T5o, T5l); + T5t = FMA(KP803003575, T5o, T5l); + T5F = FNMS(KP943557151, T5E, T5D); + T5N = FMA(KP943557151, T5E, T5D); + { + E T55, T5x, T5S, T5Q; + T55 = FMA(KP554608978, T54, T53); + T5x = FNMS(KP554608978, T54, T53); + T5S = TW * T5P; + T5Q = TS * T5P; + { + E T5J, T5z, T5r, T5d; + T5J = FMA(KP943557151, T5y, T5x); + T5z = FNMS(KP943557151, T5y, T5x); + T5r = FMA(KP803003575, T5c, T55); + T5d = FNMS(KP803003575, T5c, T55); + ci[WS(rs, 4)] = FMA(TS, T5R, T5S); + cr[WS(rs, 4)] = FNMS(TW, T5R, T5Q); + { + E T5G, T5A, T5O, T5K; + T5G = T5C * T5z; + T5A = T5w * T5z; + T5O = T5M * T5J; + T5K = T5I * T5J; + { + E T5q, T5e, T5u, T5s; + T5q = TX * T5d; + T5e = TT * T5d; + T5u = T25 * T5r; + T5s = T23 * T5r; + ci[WS(rs, 14)] = FMA(T5w, T5F, T5G); + cr[WS(rs, 14)] = FNMS(T5C, T5F, T5A); + ci[WS(rs, 19)] = FMA(T5I, T5N, T5O); + cr[WS(rs, 19)] = FNMS(T5M, T5N, T5K); + ci[WS(rs, 9)] = FMA(TT, T5p, T5q); + cr[WS(rs, 9)] = FNMS(TX, T5p, T5e); + ci[WS(rs, 24)] = FMA(T23, T5t, T5u); + cr[WS(rs, 24)] = FNMS(T25, T5t, T5s); + } + } + } + } + } + } + } + T48 = FNMS(KP871714437, T39, T2U); + T3a = FMA(KP871714437, T39, T2U); + T3F = FMA(KP831864738, T3E, T3p); + T49 = FNMS(KP831864738, T3E, T3p); + T4a = FMA(KP559154169, T49, T48); + T4w = FNMS(KP683113946, T48, T49); + T46 = FMA(KP904730450, T43, T42); + T44 = FNMS(KP904730450, T43, T42); + } + } + } + } + } + } + { + E T45, T4L, T3G, T3I; + T45 = FNMS(KP242145790, T44, T41); + T4L = FMA(KP968583161, T44, T41); + T3G = FMA(KP904730450, T3F, T3a); + T3I = FNMS(KP904730450, T3F, T3a); + { + E T4v, T47, T4J, T3H; + T4v = FNMS(KP541454447, T46, T45); + T47 = FMA(KP541454447, T46, T45); + T4J = FMA(KP968583161, T3G, T2F); + T3H = FNMS(KP242145790, T3G, T2F); + { + E T4b, T4j, T4x, T4H; + T4b = FMA(KP921177326, T4a, T47); + T4j = FNMS(KP921177326, T4a, T47); + T4x = FNMS(KP833417178, T4w, T4v); + T4H = FMA(KP833417178, T4w, T4v); + { + E T3J, T4p, T4M, T4K; + T3J = FMA(KP541454447, T3I, T3H); + T4p = FNMS(KP541454447, T3I, T3H); + T4M = TQ * T4J; + T4K = TN * T4J; + { + E T4D, T4r, T4f, T3R; + T4D = FMA(KP833417178, T4q, T4p); + T4r = FNMS(KP833417178, T4q, T4p); + T4f = FMA(KP921177326, T3Q, T3J); + T3R = FNMS(KP921177326, T3Q, T3J); + ci[WS(rs, 1)] = FMA(TN, T4L, T4M); + cr[WS(rs, 1)] = FNMS(TQ, T4L, T4K); + { + E T4y, T4s, T4I, T4E; + T4y = T4u * T4r; + T4s = T4o * T4r; + T4I = T4G * T4D; + T4E = T4C * T4D; + { + E T4c, T3S, T4k, T4g; + T4c = T3U * T3R; + T3S = T2y * T3R; + T4k = T4i * T4f; + T4g = T4e * T4f; + ci[WS(rs, 11)] = FMA(T4o, T4x, T4y); + cr[WS(rs, 11)] = FNMS(T4u, T4x, T4s); + ci[WS(rs, 16)] = FMA(T4C, T4H, T4I); + cr[WS(rs, 16)] = FNMS(T4G, T4H, T4E); + ci[WS(rs, 6)] = FMA(T2y, T4b, T4c); + cr[WS(rs, 6)] = FNMS(T3U, T4b, T3S); + ci[WS(rs, 21)] = FMA(T4e, T4j, T4k); + cr[WS(rs, 21)] = FNMS(T4i, T4j, T4g); + } + } + } + } + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 24}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 25, "hb2_25", twinstr, &GENUS, {84, 78, 356, 0} }; + +void X(codelet_hb2_25) (planner *p) { + X(khc2hc_register) (p, hb2_25, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 25 -dif -name hb2_25 -include hb.h */ + +/* + * This function contains 440 FP additions, 340 FP multiplications, + * (or, 280 additions, 180 multiplications, 160 fused multiply/add), + * 155 stack variables, 20 constants, and 100 memory accesses + */ +#include "hb.h" + +static void hb2_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP062790519, +0.062790519529313376076178224565631133122484832); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP125333233, +0.125333233564304245373118759816508793942918247); + DK(KP425779291, +0.425779291565072648862502445744251703979973042); + DK(KP904827052, +0.904827052466019527713668647932697593970413911); + DK(KP248689887, +0.248689887164854788242283746006447968417567406); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP770513242, +0.770513242775789230803009636396177847271667672); + DK(KP637423989, +0.637423989748689710176712811676016195434917298); + DK(KP844327925, +0.844327925502015078548558063966681505381659241); + DK(KP535826794, +0.535826794978996618271308767867639978063575346); + DK(KP684547105, +0.684547105928688673732283357621209269889519233); + DK(KP728968627, +0.728968627421411523146730319055259111372571664); + DK(KP481753674, +0.481753674101715274987191502872129653528542010); + DK(KP876306680, +0.876306680043863587308115903922062583399064238); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) { + E TN, TQ, TO, TR, TT, TY, T2t, T2r, TZ, TU, T4f, T4l, T2d, T4v, T5m; + E T2j, T5l, T4X, T2v, T11, T3R, T1L, T5d, T6x, T5h, T6t, T25, T26, T27, T29; + E T6D, T7v, T49, T7l, T7p, T7t, T2p, T2n, T4b, T4p, T5n, T6B, T5b, T5p, T6p; + E T6r, T59, T4r; + { + E T2c, T4j, T2h, T4e, T2b, T4k, T2i, T4d; + { + E TP, TX, TS, TW; + TN = W[0]; + TQ = W[1]; + TO = W[2]; + TR = W[3]; + TP = TN * TO; + TX = TQ * TO; + TS = TQ * TR; + TW = TN * TR; + TT = TP - TS; + TY = TW + TX; + T2t = TW - TX; + T2r = TP + TS; + TZ = W[5]; + T2c = TQ * TZ; + T4j = TO * TZ; + T2h = TN * TZ; + T4e = TR * TZ; + TU = W[4]; + T2b = TN * TU; + T4k = TR * TU; + T2i = TQ * TU; + T4d = TO * TU; + } + T4f = T4d - T4e; + T4l = T4j + T4k; + { + E T2s, T2u, TV, T10, T3P, T3Q, T1J, T1K; + T2d = T2b - T2c; + T4v = T2b + T2c; + T5m = T4j - T4k; + T2j = T2h + T2i; + T5l = T4d + T4e; + T4X = T2h - T2i; + T2s = T2r * TU; + T2u = T2t * TZ; + T2v = T2s + T2u; + TV = TT * TU; + T10 = TY * TZ; + T11 = TV + T10; + T3P = T2r * TZ; + T3Q = T2t * TU; + T3R = T3P - T3Q; + T1J = TT * TZ; + T1K = TY * TU; + T1L = T1J - T1K; + T5d = TV - T10; + T6x = T3P + T3Q; + T5h = T1J + T1K; + T6t = T2s - T2u; + T25 = W[6]; + T26 = W[7]; + T27 = FMA(TT, T25, TY * T26); + T29 = FNMS(TY, T25, TT * T26); + T6D = FNMS(T4X, T25, T4v * T26); + T7v = FNMS(T1L, T25, T11 * T26); + T49 = FMA(T2r, T25, T2t * T26); + T7l = FMA(T2d, T25, T2j * T26); + T7p = FNMS(T2j, T25, T2d * T26); + T7t = FMA(T11, T25, T1L * T26); + T2p = FNMS(TZ, T25, TU * T26); + T2n = FMA(TU, T25, TZ * T26); + T4b = FNMS(T2t, T25, T2r * T26); + T4p = FMA(T2v, T25, T3R * T26); + T5n = FMA(T5l, T25, T5m * T26); + T6B = FMA(T4v, T25, T4X * T26); + T5b = FNMS(TQ, T25, TN * T26); + T5p = FNMS(T5m, T25, T5l * T26); + T6p = FMA(TO, T25, TR * T26); + T6r = FNMS(TR, T25, TO * T26); + T59 = FMA(TN, T25, TQ * T26); + T4r = FNMS(T3R, T25, T2v * T26); + } + } + { + E T9, T6i, T40, T3z, T5Y, Ti, Tr, Ts, T1d, T1m, T1P, T2K, T4P, T3H, T4y; + E T5G, T71, T65, T6N, T5z, T70, T64, T6K, T2Z, T4Q, T3I, T4B, T20, T5Z, T3C; + E T43, T6j, TB, TK, TL, T1w, T1F, T1Q, T3f, T4S, T3K, T4F, T5V, T74, T68; + E T6U, T5O, T73, T67, T6R, T3u, T4T, T3L, T4I; + { + E T1, T4, T7, T8, T3Z, T3Y, T3x, T3y; + T1 = cr[0]; + { + E T2, T3, T5, T6; + T2 = cr[WS(rs, 5)]; + T3 = ci[WS(rs, 4)]; + T4 = T2 + T3; + T5 = cr[WS(rs, 10)]; + T6 = ci[WS(rs, 9)]; + T7 = T5 + T6; + T8 = T4 + T7; + T3Z = T5 - T6; + T3Y = T2 - T3; + } + T9 = T1 + T8; + T6i = FMA(KP951056516, T3Y, KP587785252 * T3Z); + T40 = FNMS(KP951056516, T3Z, KP587785252 * T3Y); + T3x = FNMS(KP250000000, T8, T1); + T3y = KP559016994 * (T4 - T7); + T3z = T3x - T3y; + T5Y = T3y + T3x; + } + { + E Ta, T2x, T5w, T2F, Th, T2w, T1e, T2P, T5B, T2X, T1l, T2O, Tj, T2N, T5D; + E T2T, Tq, T2S, T15, T2B, T5u, T2H, T1c, T2G; + { + E Tg, T2E, Td, T2D; + Ta = cr[WS(rs, 1)]; + { + E Te, Tf, Tb, Tc; + Te = cr[WS(rs, 11)]; + Tf = ci[WS(rs, 8)]; + Tg = Te + Tf; + T2E = Te - Tf; + Tb = cr[WS(rs, 6)]; + Tc = ci[WS(rs, 3)]; + Td = Tb + Tc; + T2D = Tb - Tc; + } + T2x = KP559016994 * (Td - Tg); + T5w = FMA(KP951056516, T2D, KP587785252 * T2E); + T2F = FNMS(KP951056516, T2E, KP587785252 * T2D); + Th = Td + Tg; + T2w = FNMS(KP250000000, Th, Ta); + } + { + E T1k, T2W, T1h, T2V; + T1e = ci[WS(rs, 20)]; + { + E T1i, T1j, T1f, T1g; + T1i = cr[WS(rs, 14)]; + T1j = cr[WS(rs, 19)]; + T1k = T1i + T1j; + T2W = T1j - T1i; + T1f = ci[WS(rs, 15)]; + T1g = cr[WS(rs, 24)]; + T1h = T1f - T1g; + T2V = T1f + T1g; + } + T2P = KP559016994 * (T1h + T1k); + T5B = FMA(KP951056516, T2V, KP587785252 * T2W); + T2X = FNMS(KP951056516, T2W, KP587785252 * T2V); + T1l = T1h - T1k; + T2O = FNMS(KP250000000, T1l, T1e); + } + { + E Tp, T2M, Tm, T2L; + Tj = cr[WS(rs, 4)]; + { + E Tn, To, Tk, Tl; + Tn = ci[WS(rs, 10)]; + To = ci[WS(rs, 5)]; + Tp = Tn + To; + T2M = Tn - To; + Tk = cr[WS(rs, 9)]; + Tl = ci[0]; + Tm = Tk + Tl; + T2L = Tk - Tl; + } + T2N = FNMS(KP951056516, T2M, KP587785252 * T2L); + T5D = FMA(KP951056516, T2L, KP587785252 * T2M); + T2T = KP559016994 * (Tm - Tp); + Tq = Tm + Tp; + T2S = FNMS(KP250000000, Tq, Tj); + } + { + E T1b, T2A, T18, T2z; + T15 = ci[WS(rs, 23)]; + { + E T19, T1a, T16, T17; + T19 = ci[WS(rs, 13)]; + T1a = cr[WS(rs, 16)]; + T1b = T19 - T1a; + T2A = T19 + T1a; + T16 = ci[WS(rs, 18)]; + T17 = cr[WS(rs, 21)]; + T18 = T16 - T17; + T2z = T16 + T17; + } + T2B = FNMS(KP951056516, T2A, KP587785252 * T2z); + T5u = FMA(KP951056516, T2z, KP587785252 * T2A); + T2H = KP559016994 * (T18 - T1b); + T1c = T18 + T1b; + T2G = FNMS(KP250000000, T1c, T15); + } + Ti = Ta + Th; + Tr = Tj + Tq; + Ts = Ti + Tr; + T1d = T15 + T1c; + T1m = T1e + T1l; + T1P = T1d + T1m; + { + E T2C, T4w, T2J, T4x, T2y, T2I; + T2y = T2w - T2x; + T2C = T2y - T2B; + T4w = T2y + T2B; + T2I = T2G - T2H; + T2J = T2F + T2I; + T4x = T2I - T2F; + T2K = FNMS(KP481753674, T2J, KP876306680 * T2C); + T4P = FMA(KP728968627, T4x, KP684547105 * T4w); + T3H = FMA(KP876306680, T2J, KP481753674 * T2C); + T4y = FNMS(KP684547105, T4x, KP728968627 * T4w); + } + { + E T5C, T6M, T5F, T6L, T5A, T5E; + T5A = T2T + T2S; + T5C = T5A - T5B; + T6M = T5A + T5B; + T5E = T2O + T2P; + T5F = T5D + T5E; + T6L = T5E - T5D; + T5G = FNMS(KP844327925, T5F, KP535826794 * T5C); + T71 = FMA(KP637423989, T6L, KP770513242 * T6M); + T65 = FMA(KP535826794, T5F, KP844327925 * T5C); + T6N = FNMS(KP637423989, T6M, KP770513242 * T6L); + } + { + E T5v, T6I, T5y, T6J, T5t, T5x; + T5t = T2x + T2w; + T5v = T5t - T5u; + T6I = T5t + T5u; + T5x = T2H + T2G; + T5y = T5w + T5x; + T6J = T5x - T5w; + T5z = FNMS(KP248689887, T5y, KP968583161 * T5v); + T70 = FMA(KP535826794, T6J, KP844327925 * T6I); + T64 = FMA(KP968583161, T5y, KP248689887 * T5v); + T6K = FNMS(KP844327925, T6J, KP535826794 * T6I); + } + { + E T2R, T4z, T2Y, T4A, T2Q, T2U; + T2Q = T2O - T2P; + T2R = T2N + T2Q; + T4z = T2Q - T2N; + T2U = T2S - T2T; + T2Y = T2U - T2X; + T4A = T2U + T2X; + T2Z = FMA(KP904827052, T2R, KP425779291 * T2Y); + T4Q = FNMS(KP992114701, T4z, KP125333233 * T4A); + T3I = FNMS(KP425779291, T2R, KP904827052 * T2Y); + T4B = FMA(KP125333233, T4z, KP992114701 * T4A); + } + } + { + E T1S, T1V, T1Y, T1Z, T3B, T3A, T41, T42; + T1S = ci[WS(rs, 24)]; + { + E T1T, T1U, T1W, T1X; + T1T = ci[WS(rs, 19)]; + T1U = cr[WS(rs, 20)]; + T1V = T1T - T1U; + T1W = ci[WS(rs, 14)]; + T1X = cr[WS(rs, 15)]; + T1Y = T1W - T1X; + T1Z = T1V + T1Y; + T3B = T1W + T1X; + T3A = T1T + T1U; + } + T20 = T1S + T1Z; + T5Z = FMA(KP951056516, T3A, KP587785252 * T3B); + T3C = FNMS(KP951056516, T3B, KP587785252 * T3A); + T41 = FNMS(KP250000000, T1Z, T1S); + T42 = KP559016994 * (T1V - T1Y); + T43 = T41 - T42; + T6j = T42 + T41; + } + { + E Tt, T32, T5L, T3a, TA, T31, T1o, T36, T5J, T3c, T1v, T3b, TC, T3h, T5S; + E T3p, TJ, T3g, T1x, T3l, T5Q, T3r, T1E, T3q; + { + E Tw, T38, Tz, T39; + Tt = cr[WS(rs, 2)]; + { + E Tu, Tv, Tx, Ty; + Tu = cr[WS(rs, 7)]; + Tv = ci[WS(rs, 2)]; + Tw = Tu + Tv; + T38 = Tu - Tv; + Tx = cr[WS(rs, 12)]; + Ty = ci[WS(rs, 7)]; + Tz = Tx + Ty; + T39 = Tx - Ty; + } + T32 = KP559016994 * (Tw - Tz); + T5L = FMA(KP951056516, T38, KP587785252 * T39); + T3a = FNMS(KP951056516, T39, KP587785252 * T38); + TA = Tw + Tz; + T31 = FNMS(KP250000000, TA, Tt); + } + { + E T1r, T34, T1u, T35; + T1o = ci[WS(rs, 22)]; + { + E T1p, T1q, T1s, T1t; + T1p = ci[WS(rs, 17)]; + T1q = cr[WS(rs, 22)]; + T1r = T1p - T1q; + T34 = T1p + T1q; + T1s = ci[WS(rs, 12)]; + T1t = cr[WS(rs, 17)]; + T1u = T1s - T1t; + T35 = T1s + T1t; + } + T36 = FNMS(KP951056516, T35, KP587785252 * T34); + T5J = FMA(KP951056516, T34, KP587785252 * T35); + T3c = KP559016994 * (T1r - T1u); + T1v = T1r + T1u; + T3b = FNMS(KP250000000, T1v, T1o); + } + { + E TI, T3o, TF, T3n; + TC = cr[WS(rs, 3)]; + { + E TG, TH, TD, TE; + TG = ci[WS(rs, 11)]; + TH = ci[WS(rs, 6)]; + TI = TG + TH; + T3o = TG - TH; + TD = cr[WS(rs, 8)]; + TE = ci[WS(rs, 1)]; + TF = TD + TE; + T3n = TD - TE; + } + T3h = KP559016994 * (TF - TI); + T5S = FMA(KP951056516, T3n, KP587785252 * T3o); + T3p = FNMS(KP951056516, T3o, KP587785252 * T3n); + TJ = TF + TI; + T3g = FNMS(KP250000000, TJ, TC); + } + { + E T1D, T3k, T1A, T3j; + T1x = ci[WS(rs, 21)]; + { + E T1B, T1C, T1y, T1z; + T1B = cr[WS(rs, 13)]; + T1C = cr[WS(rs, 18)]; + T1D = T1B + T1C; + T3k = T1C - T1B; + T1y = ci[WS(rs, 16)]; + T1z = cr[WS(rs, 23)]; + T1A = T1y - T1z; + T3j = T1y + T1z; + } + T3l = FNMS(KP951056516, T3k, KP587785252 * T3j); + T5Q = FMA(KP951056516, T3j, KP587785252 * T3k); + T3r = KP559016994 * (T1A + T1D); + T1E = T1A - T1D; + T3q = FNMS(KP250000000, T1E, T1x); + } + TB = Tt + TA; + TK = TC + TJ; + TL = TB + TK; + T1w = T1o + T1v; + T1F = T1x + T1E; + T1Q = T1w + T1F; + { + E T37, T4D, T3e, T4E, T33, T3d; + T33 = T31 - T32; + T37 = T33 - T36; + T4D = T33 + T36; + T3d = T3b - T3c; + T3e = T3a + T3d; + T4E = T3d - T3a; + T3f = FNMS(KP844327925, T3e, KP535826794 * T37); + T4S = FMA(KP062790519, T4E, KP998026728 * T4D); + T3K = FMA(KP535826794, T3e, KP844327925 * T37); + T4F = FNMS(KP998026728, T4E, KP062790519 * T4D); + } + { + E T5R, T6T, T5U, T6S, T5P, T5T; + T5P = T3h + T3g; + T5R = T5P - T5Q; + T6T = T5P + T5Q; + T5T = T3q + T3r; + T5U = T5S + T5T; + T6S = T5T - T5S; + T5V = FNMS(KP684547105, T5U, KP728968627 * T5R); + T74 = FNMS(KP992114701, T6S, KP125333233 * T6T); + T68 = FMA(KP728968627, T5U, KP684547105 * T5R); + T6U = FMA(KP125333233, T6S, KP992114701 * T6T); + } + { + E T5K, T6Q, T5N, T6P, T5I, T5M; + T5I = T32 + T31; + T5K = T5I - T5J; + T6Q = T5I + T5J; + T5M = T3c + T3b; + T5N = T5L + T5M; + T6P = T5M - T5L; + T5O = FNMS(KP481753674, T5N, KP876306680 * T5K); + T73 = FNMS(KP425779291, T6P, KP904827052 * T6Q); + T67 = FMA(KP876306680, T5N, KP481753674 * T5K); + T6R = FMA(KP904827052, T6P, KP425779291 * T6Q); + } + { + E T3m, T4H, T3t, T4G, T3i, T3s; + T3i = T3g - T3h; + T3m = T3i - T3l; + T4H = T3i + T3l; + T3s = T3q - T3r; + T3t = T3p + T3s; + T4G = T3s - T3p; + T3u = FNMS(KP998026728, T3t, KP062790519 * T3m); + T4T = FNMS(KP637423989, T4G, KP770513242 * T4H); + T3L = FMA(KP062790519, T3t, KP998026728 * T3m); + T4I = FMA(KP770513242, T4G, KP637423989 * T4H); + } + } + { + E TM, T14, T2e, T21, T23, T2l, T1H, T2f, T1O, T2k; + { + E T12, T13, T1R, T22; + T12 = KP559016994 * (Ts - TL); + TM = Ts + TL; + T13 = FNMS(KP250000000, TM, T9); + T14 = T12 + T13; + T2e = T13 - T12; + T1R = KP559016994 * (T1P - T1Q); + T21 = T1P + T1Q; + T22 = FNMS(KP250000000, T21, T20); + T23 = T1R + T22; + T2l = T22 - T1R; + } + { + E T1n, T1G, T1M, T1N; + T1n = T1d - T1m; + T1G = T1w - T1F; + T1H = FMA(KP951056516, T1n, KP587785252 * T1G); + T2f = FNMS(KP951056516, T1G, KP587785252 * T1n); + T1M = Ti - Tr; + T1N = TB - TK; + T1O = FMA(KP951056516, T1M, KP587785252 * T1N); + T2k = FNMS(KP951056516, T1N, KP587785252 * T1M); + } + { + E T1I, T24, T2o, T2q; + cr[0] = T9 + TM; + ci[0] = T20 + T21; + T1I = T14 - T1H; + T24 = T1O + T23; + cr[WS(rs, 5)] = FNMS(T1L, T24, T11 * T1I); + ci[WS(rs, 5)] = FMA(T1L, T1I, T11 * T24); + T2o = T2e + T2f; + T2q = T2l - T2k; + cr[WS(rs, 15)] = FNMS(T2p, T2q, T2n * T2o); + ci[WS(rs, 15)] = FMA(T2p, T2o, T2n * T2q); + { + E T2g, T2m, T28, T2a; + T2g = T2e - T2f; + T2m = T2k + T2l; + cr[WS(rs, 10)] = FNMS(T2j, T2m, T2d * T2g); + ci[WS(rs, 10)] = FMA(T2j, T2g, T2d * T2m); + T28 = T14 + T1H; + T2a = T23 - T1O; + cr[WS(rs, 20)] = FNMS(T29, T2a, T27 * T28); + ci[WS(rs, 20)] = FMA(T29, T28, T27 * T2a); + } + } + } + { + E T76, T7n, T7a, T7q, T6H, T6W, T6X, T6Y, T7e, T7f, T7d, T7g, T7x, T7y; + { + E T72, T75, T78, T79; + T72 = T70 + T71; + T75 = T73 - T74; + T76 = FMA(KP951056516, T72, KP587785252 * T75); + T7n = FNMS(KP951056516, T75, KP587785252 * T72); + T78 = T6K - T6N; + T79 = T6U - T6R; + T7a = FMA(KP951056516, T78, KP587785252 * T79); + T7q = FNMS(KP951056516, T79, KP587785252 * T78); + } + { + E T6O, T6V, T7b, T7c; + T6H = T5Y + T5Z; + T6O = T6K + T6N; + T6V = T6R + T6U; + T6W = T6O - T6V; + T6X = FNMS(KP250000000, T6W, T6H); + T6Y = KP559016994 * (T6O + T6V); + T7e = T6j - T6i; + T7b = T70 - T71; + T7c = T73 + T74; + T7f = T7b + T7c; + T7d = KP559016994 * (T7b - T7c); + T7g = FNMS(KP250000000, T7f, T7e); + } + T7x = T6H + T6W; + T7y = T7e + T7f; + cr[WS(rs, 4)] = FNMS(TY, T7y, TT * T7x); + ci[WS(rs, 4)] = FMA(TY, T7x, TT * T7y); + { + E T7o, T7u, T7s, T7w, T7m, T7r; + T7m = T6X - T6Y; + T7o = T7m - T7n; + T7u = T7m + T7n; + T7r = T7g - T7d; + T7s = T7q + T7r; + T7w = T7r - T7q; + cr[WS(rs, 14)] = FNMS(T7p, T7s, T7l * T7o); + ci[WS(rs, 14)] = FMA(T7p, T7o, T7l * T7s); + cr[WS(rs, 19)] = FNMS(T7v, T7w, T7t * T7u); + ci[WS(rs, 19)] = FMA(T7v, T7u, T7t * T7w); + } + { + E T77, T7j, T7i, T7k, T6Z, T7h; + T6Z = T6X + T6Y; + T77 = T6Z - T76; + T7j = T6Z + T76; + T7h = T7d + T7g; + T7i = T7a + T7h; + T7k = T7h - T7a; + cr[WS(rs, 9)] = FNMS(TZ, T7i, TU * T77); + ci[WS(rs, 9)] = FMA(TZ, T77, TU * T7i); + cr[WS(rs, 24)] = FNMS(T26, T7k, T25 * T7j); + ci[WS(rs, 24)] = FMA(T26, T7j, T25 * T7k); + } + } + { + E T3N, T4h, T3U, T4m, T3D, T3E, T3w, T3F, T44, T45, T3X, T46, T4t, T4u; + { + E T3J, T3M, T3S, T3T; + T3J = T3H - T3I; + T3M = T3K - T3L; + T3N = FMA(KP951056516, T3J, KP587785252 * T3M); + T4h = FNMS(KP951056516, T3M, KP587785252 * T3J); + T3S = T2K + T2Z; + T3T = T3f - T3u; + T3U = FMA(KP951056516, T3S, KP587785252 * T3T); + T4m = FNMS(KP951056516, T3T, KP587785252 * T3S); + } + { + E T30, T3v, T3V, T3W; + T3D = T3z - T3C; + T30 = T2K - T2Z; + T3v = T3f + T3u; + T3E = T30 + T3v; + T3w = KP559016994 * (T30 - T3v); + T3F = FNMS(KP250000000, T3E, T3D); + T44 = T40 + T43; + T3V = T3H + T3I; + T3W = T3K + T3L; + T45 = T3V + T3W; + T3X = KP559016994 * (T3V - T3W); + T46 = FNMS(KP250000000, T45, T44); + } + T4t = T3D + T3E; + T4u = T44 + T45; + cr[WS(rs, 2)] = FNMS(T2t, T4u, T2r * T4t); + ci[WS(rs, 2)] = FMA(T2t, T4t, T2r * T4u); + { + E T4i, T4q, T4o, T4s, T4g, T4n; + T4g = T3F - T3w; + T4i = T4g - T4h; + T4q = T4g + T4h; + T4n = T46 - T3X; + T4o = T4m + T4n; + T4s = T4n - T4m; + cr[WS(rs, 12)] = FNMS(T4l, T4o, T4f * T4i); + ci[WS(rs, 12)] = FMA(T4l, T4i, T4f * T4o); + cr[WS(rs, 17)] = FNMS(T4r, T4s, T4p * T4q); + ci[WS(rs, 17)] = FMA(T4r, T4q, T4p * T4s); + } + { + E T3O, T4a, T48, T4c, T3G, T47; + T3G = T3w + T3F; + T3O = T3G - T3N; + T4a = T3G + T3N; + T47 = T3X + T46; + T48 = T3U + T47; + T4c = T47 - T3U; + cr[WS(rs, 7)] = FNMS(T3R, T48, T2v * T3O); + ci[WS(rs, 7)] = FMA(T3R, T3O, T2v * T48); + cr[WS(rs, 22)] = FNMS(T4b, T4c, T49 * T4a); + ci[WS(rs, 22)] = FMA(T4b, T4a, T49 * T4c); + } + } + { + E T4V, T5f, T50, T5i, T4L, T4M, T4K, T4N, T54, T55, T53, T56, T5r, T5s; + { + E T4R, T4U, T4Y, T4Z; + T4R = T4P - T4Q; + T4U = T4S - T4T; + T4V = FMA(KP951056516, T4R, KP587785252 * T4U); + T5f = FNMS(KP951056516, T4U, KP587785252 * T4R); + T4Y = T4y + T4B; + T4Z = T4F + T4I; + T50 = FMA(KP951056516, T4Y, KP587785252 * T4Z); + T5i = FNMS(KP951056516, T4Z, KP587785252 * T4Y); + } + { + E T4C, T4J, T51, T52; + T4L = T3z + T3C; + T4C = T4y - T4B; + T4J = T4F - T4I; + T4M = T4C + T4J; + T4K = KP559016994 * (T4C - T4J); + T4N = FNMS(KP250000000, T4M, T4L); + T54 = T43 - T40; + T51 = T4P + T4Q; + T52 = T4S + T4T; + T55 = T51 + T52; + T53 = KP559016994 * (T51 - T52); + T56 = FNMS(KP250000000, T55, T54); + } + T5r = T4L + T4M; + T5s = T54 + T55; + cr[WS(rs, 3)] = FNMS(TR, T5s, TO * T5r); + ci[WS(rs, 3)] = FMA(TR, T5r, TO * T5s); + { + E T5g, T5o, T5k, T5q, T5e, T5j; + T5e = T4N - T4K; + T5g = T5e - T5f; + T5o = T5e + T5f; + T5j = T56 - T53; + T5k = T5i + T5j; + T5q = T5j - T5i; + cr[WS(rs, 13)] = FNMS(T5h, T5k, T5d * T5g); + ci[WS(rs, 13)] = FMA(T5h, T5g, T5d * T5k); + cr[WS(rs, 18)] = FNMS(T5p, T5q, T5n * T5o); + ci[WS(rs, 18)] = FMA(T5p, T5o, T5n * T5q); + } + { + E T4W, T5a, T58, T5c, T4O, T57; + T4O = T4K + T4N; + T4W = T4O - T4V; + T5a = T4O + T4V; + T57 = T53 + T56; + T58 = T50 + T57; + T5c = T57 - T50; + cr[WS(rs, 8)] = FNMS(T4X, T58, T4v * T4W); + ci[WS(rs, 8)] = FMA(T4X, T4W, T4v * T58); + cr[WS(rs, 23)] = FNMS(T5b, T5c, T59 * T5a); + ci[WS(rs, 23)] = FMA(T5b, T5a, T59 * T5c); + } + } + { + E T6a, T6v, T6e, T6y, T60, T61, T5X, T62, T6k, T6l, T6h, T6m, T6F, T6G; + { + E T66, T69, T6c, T6d; + T66 = T64 - T65; + T69 = T67 - T68; + T6a = FMA(KP951056516, T66, KP587785252 * T69); + T6v = FNMS(KP951056516, T69, KP587785252 * T66); + T6c = T5z - T5G; + T6d = T5O - T5V; + T6e = FMA(KP951056516, T6c, KP587785252 * T6d); + T6y = FNMS(KP951056516, T6d, KP587785252 * T6c); + } + { + E T5H, T5W, T6f, T6g; + T60 = T5Y - T5Z; + T5H = T5z + T5G; + T5W = T5O + T5V; + T61 = T5H + T5W; + T5X = KP559016994 * (T5H - T5W); + T62 = FNMS(KP250000000, T61, T60); + T6k = T6i + T6j; + T6f = T64 + T65; + T6g = T67 + T68; + T6l = T6f + T6g; + T6h = KP559016994 * (T6f - T6g); + T6m = FNMS(KP250000000, T6l, T6k); + } + T6F = T60 + T61; + T6G = T6k + T6l; + cr[WS(rs, 1)] = FNMS(TQ, T6G, TN * T6F); + ci[WS(rs, 1)] = FMA(TQ, T6F, TN * T6G); + { + E T6w, T6C, T6A, T6E, T6u, T6z; + T6u = T62 - T5X; + T6w = T6u - T6v; + T6C = T6u + T6v; + T6z = T6m - T6h; + T6A = T6y + T6z; + T6E = T6z - T6y; + cr[WS(rs, 11)] = FNMS(T6x, T6A, T6t * T6w); + ci[WS(rs, 11)] = FMA(T6x, T6w, T6t * T6A); + cr[WS(rs, 16)] = FNMS(T6D, T6E, T6B * T6C); + ci[WS(rs, 16)] = FMA(T6D, T6C, T6B * T6E); + } + { + E T6b, T6q, T6o, T6s, T63, T6n; + T63 = T5X + T62; + T6b = T63 - T6a; + T6q = T63 + T6a; + T6n = T6h + T6m; + T6o = T6e + T6n; + T6s = T6n - T6e; + cr[WS(rs, 6)] = FNMS(T5m, T6o, T5l * T6b); + ci[WS(rs, 6)] = FMA(T5m, T6b, T5l * T6o); + cr[WS(rs, 21)] = FNMS(T6r, T6s, T6p * T6q); + ci[WS(rs, 21)] = FMA(T6r, T6q, T6p * T6s); + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 24}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 25, "hb2_25", twinstr, &GENUS, {280, 180, 160, 0} }; + +void X(codelet_hb2_25) (planner *p) { + X(khc2hc_register) (p, hb2_25, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb2_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb2_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1845 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:28 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 32 -dif -name hb2_32 -include hb.h */ + +/* + * This function contains 488 FP additions, 350 FP multiplications, + * (or, 236 additions, 98 multiplications, 252 fused multiply/add), + * 204 stack variables, 7 constants, and 128 memory accesses + */ +#include "hb.h" + +static void hb2_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) { + E T5u, T6b, T6e, T5I, T66, T60, T5U, T5R, T67, T5L, T61, T5x, T5A, T5D, T5O; + E T62, T5V, T5P; + { + E T11, T14, T12, T37, T17, T1b, T39, T15, T7C, T8P, T8S, T7I, T98, T7e, T78; + E T8V, T3d, T3x, T3a, T3v, T9s, T3G, T4p, T5X, T16, T9m, T3y, T4b, T3C, T4g; + E T5Z, T1a, T4r, T3J, T2O, T1c, T4W, T4s, T3Y, T3K, T3l, T3e, T3i, T3q, T8K; + E T8E, T8m, T7S, T5k, T5e; + { + E T13, T3c, T38, T3F, T7B, T9l, T77, T7d, T9r, T7H; + T11 = W[2]; + T14 = W[3]; + T12 = W[4]; + T37 = W[0]; + T17 = W[6]; + T1b = W[7]; + T13 = T11 * T12; + T3c = T37 * T14; + T38 = T37 * T11; + T3F = T37 * T12; + T7B = T11 * T17; + T9l = T12 * T17; + T77 = T37 * T17; + T7d = T37 * T1b; + T9r = T12 * T1b; + T7H = T11 * T1b; + T39 = W[1]; + T15 = W[5]; + { + E T3I, T19, T5d, T3b, T18, T2N; + T7C = FMA(T14, T1b, T7B); + T8P = FNMS(T14, T1b, T7B); + T8S = FMA(T14, T17, T7H); + T7I = FNMS(T14, T17, T7H); + T98 = FNMS(T39, T17, T7d); + T7e = FMA(T39, T17, T7d); + T78 = FNMS(T39, T1b, T77); + T8V = FMA(T39, T1b, T77); + T3d = FMA(T39, T11, T3c); + T3x = FNMS(T39, T11, T3c); + T3a = FNMS(T39, T14, T38); + T3v = FMA(T39, T14, T38); + T9s = FNMS(T15, T17, T9r); + T3G = FNMS(T39, T15, T3F); + T4p = FMA(T39, T15, T3F); + T5X = FNMS(T14, T15, T13); + T16 = FMA(T14, T15, T13); + T3I = T37 * T15; + T19 = T11 * T15; + T5d = T3v * T12; + T3b = T3a * T12; + T9m = FMA(T15, T1b, T9l); + { + E T3w, T3B, T5t, T5H; + T3w = T3v * T17; + T3B = T3v * T1b; + T5t = T3a * T17; + T5H = T3a * T1b; + T3y = FNMS(T3x, T1b, T3w); + T4b = FMA(T3x, T1b, T3w); + T3C = FMA(T3x, T17, T3B); + T4g = FNMS(T3x, T17, T3B); + T5u = FMA(T3d, T1b, T5t); + T6b = FNMS(T3d, T1b, T5t); + T6e = FMA(T3d, T17, T5H); + T5I = FNMS(T3d, T17, T5H); + T18 = T16 * T17; + T2N = T16 * T1b; + T5Z = FMA(T14, T12, T19); + T1a = FNMS(T14, T12, T19); + } + { + E T3H, T3X, T4q, T4V, T5Y, T65; + T4q = T4p * T17; + T4V = T4p * T1b; + T4r = FNMS(T39, T12, T3I); + T3J = FMA(T39, T12, T3I); + T2O = FNMS(T1a, T17, T2N); + T1c = FMA(T1a, T1b, T18); + T3H = T3G * T17; + T4W = FNMS(T4r, T17, T4V); + T4s = FMA(T4r, T1b, T4q); + T3X = T3G * T1b; + T5Y = T5X * T17; + T65 = T5X * T1b; + T3Y = FNMS(T3J, T17, T3X); + T3K = FMA(T3J, T1b, T3H); + { + E T8J, T8D, T3h, T5j, T8l, T7R; + T3h = T3a * T15; + T66 = FNMS(T5Z, T17, T65); + T60 = FMA(T5Z, T1b, T5Y); + T3l = FNMS(T3d, T15, T3b); + T3e = FMA(T3d, T15, T3b); + T3i = FNMS(T3d, T12, T3h); + T3q = FMA(T3d, T12, T3h); + T8J = T3l * T1b; + T8D = T3l * T17; + T5j = T3v * T15; + T8l = T3e * T1b; + T7R = T3e * T17; + T8K = FNMS(T3q, T17, T8J); + T8E = FMA(T3q, T1b, T8D); + T8m = FNMS(T3i, T17, T8l); + T7S = FMA(T3i, T1b, T7R); + T5U = FNMS(T3x, T12, T5j); + T5k = FMA(T3x, T12, T5j); + T5e = FNMS(T3x, T15, T5d); + T5R = FMA(T3x, T15, T5d); + } + } + } + } + { + E T6O, T6i, T7s, T7o, T6j, Tf, T8W, T7V, T99, T8p, T3L, T1t, T3Z, T2X, T5J; + E T4Z, T7t, T6W, T5v, T4v, TZ, T7x, T91, T9d, T28, T3S, T3R, T2h, T5B, T4Q; + E T8v, T8a, T5C, T4N, T6Z, T6J, TK, T7w, T2z, T3P, T94, T9c, T3O, T2I, T5y; + E T4J, T8u, T8h, T5z, T4G, T6Y, T6A, T6p, T6m, T6P, Tu, T9a, T82, T8X, T8s; + E T40, T1Q, T4y, T4B, T3M, T30, T5w, T52; + { + E T6B, T6I, T4L, T4M, T4t, T4u; + { + E T1d, T3, T2P, T6, T6Q, T2S, T6R, T1g, Td, T6U, T1i, Ta, T2V, T1r, T6T; + E T1l; + { + E T4, T5, T2Q, T2R, T1, T2, T1e, T1f; + T1 = cr[0]; + T2 = ci[WS(rs, 15)]; + { + E T6N, T6h, T7r, T7n; + T6N = T5R * T1b; + T6h = T5R * T17; + T7r = T5e * T1b; + T7n = T5e * T17; + T6O = FNMS(T5U, T17, T6N); + T6i = FMA(T5U, T1b, T6h); + T7s = FNMS(T5k, T17, T7r); + T7o = FMA(T5k, T1b, T7n); + T1d = T1 - T2; + T3 = T1 + T2; + } + T4 = cr[WS(rs, 8)]; + T5 = ci[WS(rs, 7)]; + T2Q = ci[WS(rs, 31)]; + T2R = cr[WS(rs, 16)]; + T1e = ci[WS(rs, 23)]; + T2P = T4 - T5; + T6 = T4 + T5; + T6Q = T2Q - T2R; + T2S = T2Q + T2R; + T1f = cr[WS(rs, 24)]; + { + E T1o, T1n, T1p, Tb, Tc; + Tb = ci[WS(rs, 3)]; + Tc = cr[WS(rs, 12)]; + T1o = ci[WS(rs, 19)]; + T6R = T1e - T1f; + T1g = T1e + T1f; + T1n = Tb - Tc; + Td = Tb + Tc; + T1p = cr[WS(rs, 28)]; + { + E T1j, T1k, T8, T9, T1q; + T8 = cr[WS(rs, 4)]; + T9 = ci[WS(rs, 11)]; + T1q = T1o + T1p; + T6U = T1o - T1p; + T1j = ci[WS(rs, 27)]; + T1i = T8 - T9; + Ta = T8 + T9; + T1k = cr[WS(rs, 20)]; + T2V = T1n + T1q; + T1r = T1n - T1q; + T6T = T1j - T1k; + T1l = T1j + T1k; + } + } + } + { + E T2U, T6V, T6S, T1h, T1s, T4Y, T4X, T2T, T2W; + { + E T7T, T8o, T1m, T7U, T7, Te, T8n; + T7T = T3 - T6; + T7 = T3 + T6; + Te = Ta + Td; + T8o = Ta - Td; + T1m = T1i - T1l; + T2U = T1i + T1l; + T6j = T7 - Te; + Tf = T7 + Te; + T7U = T6U - T6T; + T6V = T6T + T6U; + T6S = T6Q + T6R; + T8n = T6Q - T6R; + T4t = T1d + T1g; + T1h = T1d - T1g; + T8W = T7T + T7U; + T7V = T7T - T7U; + T99 = T8o + T8n; + T8p = T8n - T8o; + T1s = T1m + T1r; + T4Y = T1m - T1r; + } + T4X = T2S - T2P; + T2T = T2P + T2S; + T2W = T2U - T2V; + T4u = T2U + T2V; + T3L = FMA(KP707106781, T1s, T1h); + T1t = FNMS(KP707106781, T1s, T1h); + T3Z = FMA(KP707106781, T2W, T2T); + T2X = FNMS(KP707106781, T2W, T2T); + T5J = FNMS(KP707106781, T4Y, T4X); + T4Z = FMA(KP707106781, T4Y, T4X); + T7t = T6S + T6V; + T6W = T6S - T6V; + } + } + { + E T29, T1S, T1V, T87, TR, T2c, T84, T6E, T1X, TU, T1Y, T6G, T25, T22, TX; + E T1Z; + { + E TO, TN, TP, TL, TM, T6C, T6D; + TL = ci[0]; + TM = cr[WS(rs, 15)]; + TO = cr[WS(rs, 7)]; + T5v = FMA(KP707106781, T4u, T4t); + T4v = FNMS(KP707106781, T4u, T4t); + TN = TL + TM; + T29 = TL - TM; + TP = ci[WS(rs, 8)]; + { + E T2a, T2b, T1T, T1U, TQ; + T1T = ci[WS(rs, 16)]; + T1U = cr[WS(rs, 31)]; + TQ = TO + TP; + T1S = TO - TP; + T2a = ci[WS(rs, 24)]; + T6C = T1T - T1U; + T1V = T1T + T1U; + T2b = cr[WS(rs, 23)]; + T87 = TN - TQ; + TR = TN + TQ; + T2c = T2a + T2b; + T6D = T2a - T2b; + } + { + E T23, T24, TS, TT, TV, TW; + TS = cr[WS(rs, 3)]; + TT = ci[WS(rs, 12)]; + T84 = T6C - T6D; + T6E = T6C + T6D; + T23 = ci[WS(rs, 20)]; + T1X = TS - TT; + TU = TS + TT; + T24 = cr[WS(rs, 27)]; + TV = ci[WS(rs, 4)]; + TW = cr[WS(rs, 11)]; + T1Y = ci[WS(rs, 28)]; + T6G = T23 - T24; + T25 = T23 + T24; + T22 = TV - TW; + TX = TV + TW; + T1Z = cr[WS(rs, 19)]; + } + } + { + E T4O, T1W, T2f, T26, T8Z, T86, T2e, T21, T89, T90; + { + E T85, TY, T6F, T20, T6H, T88; + T4O = T1S + T1V; + T1W = T1S - T1V; + T2f = T22 - T25; + T26 = T22 + T25; + T85 = TU - TX; + TY = TU + TX; + T6F = T1Y - T1Z; + T20 = T1Y + T1Z; + T8Z = T85 + T84; + T86 = T84 - T85; + T6B = TR - TY; + TZ = TR + TY; + T6H = T6F + T6G; + T88 = T6G - T6F; + T2e = T1X - T20; + T21 = T1X + T20; + T7x = T6E + T6H; + T6I = T6E - T6H; + T89 = T87 - T88; + T90 = T87 + T88; + } + { + E T4P, T2d, T27, T2g; + T2d = T29 - T2c; + T4L = T29 + T2c; + T4M = T21 + T26; + T27 = T21 - T26; + T2g = T2e + T2f; + T4P = T2e - T2f; + T91 = FNMS(KP414213562, T90, T8Z); + T9d = FMA(KP414213562, T8Z, T90); + T28 = FNMS(KP707106781, T27, T1W); + T3S = FMA(KP707106781, T27, T1W); + T3R = FMA(KP707106781, T2g, T2d); + T2h = FNMS(KP707106781, T2g, T2d); + T5B = FMA(KP707106781, T4P, T4O); + T4Q = FNMS(KP707106781, T4P, T4O); + T8v = FNMS(KP414213562, T86, T89); + T8a = FMA(KP414213562, T89, T86); + } + } + } + { + E T6s, T6z, T4F, T4E; + { + E T2A, T2j, TC, T8e, T2m, T2D, T6v, T8b, TG, T2o, TF, T6x, T2w, TH, T2p; + E T2q; + { + E Tw, Tx, Tz, TA, T6t, T6u; + Tw = cr[WS(rs, 1)]; + T5C = FMA(KP707106781, T4M, T4L); + T4N = FNMS(KP707106781, T4M, T4L); + T6Z = T6I - T6B; + T6J = T6B + T6I; + Tx = ci[WS(rs, 14)]; + Tz = cr[WS(rs, 9)]; + TA = ci[WS(rs, 6)]; + { + E T2k, Ty, TB, T2l, T2B, T2C; + T2k = ci[WS(rs, 30)]; + T2A = Tw - Tx; + Ty = Tw + Tx; + T2j = Tz - TA; + TB = Tz + TA; + T2l = cr[WS(rs, 17)]; + T2B = ci[WS(rs, 22)]; + T2C = cr[WS(rs, 25)]; + TC = Ty + TB; + T8e = Ty - TB; + T2m = T2k + T2l; + T6t = T2k - T2l; + T6u = T2B - T2C; + T2D = T2B + T2C; + } + { + E TD, TE, T2u, T2v; + TD = cr[WS(rs, 5)]; + T6v = T6t + T6u; + T8b = T6t - T6u; + TE = ci[WS(rs, 10)]; + T2u = ci[WS(rs, 18)]; + T2v = cr[WS(rs, 29)]; + TG = ci[WS(rs, 2)]; + T2o = TD - TE; + TF = TD + TE; + T6x = T2u - T2v; + T2w = T2u + T2v; + TH = cr[WS(rs, 13)]; + T2p = ci[WS(rs, 26)]; + T2q = cr[WS(rs, 21)]; + } + } + { + E T4H, T2n, T2G, T2F, T92, T8d, T2y, T93, T8g, T4I, T2E, T2H; + { + E T2x, T8c, T8f, T2s, T2t, TI; + T4H = T2m - T2j; + T2n = T2j + T2m; + T2t = TG - TH; + TI = TG + TH; + { + E T6w, T2r, TJ, T6y; + T6w = T2p - T2q; + T2r = T2p + T2q; + T2G = T2t - T2w; + T2x = T2t + T2w; + T8c = TF - TI; + TJ = TF + TI; + T6y = T6w + T6x; + T8f = T6x - T6w; + T2F = T2o - T2r; + T2s = T2o + T2r; + TK = TC + TJ; + T6s = TC - TJ; + T6z = T6v - T6y; + T7w = T6v + T6y; + } + T92 = T8c + T8b; + T8d = T8b - T8c; + T4F = T2s + T2x; + T2y = T2s - T2x; + T93 = T8e + T8f; + T8g = T8e - T8f; + } + T4E = T2A + T2D; + T2E = T2A - T2D; + T2H = T2F + T2G; + T4I = T2G - T2F; + T2z = FNMS(KP707106781, T2y, T2n); + T3P = FMA(KP707106781, T2y, T2n); + T94 = FMA(KP414213562, T93, T92); + T9c = FNMS(KP414213562, T92, T93); + T3O = FMA(KP707106781, T2H, T2E); + T2I = FNMS(KP707106781, T2H, T2E); + T5y = FMA(KP707106781, T4I, T4H); + T4J = FNMS(KP707106781, T4I, T4H); + T8u = FMA(KP414213562, T8d, T8g); + T8h = FNMS(KP414213562, T8g, T8d); + } + } + { + E T4x, T1O, Tm, T7Z, T80, T4w, T1J, T4A, T1D, Tt, T7X, T7W, T4z, T1y; + { + E Tj, T1K, Ti, T6o, T1N, Tk, T1G, T1H; + { + E Tg, Th, T1L, T1M; + Tg = cr[WS(rs, 2)]; + T5z = FMA(KP707106781, T4F, T4E); + T4G = FNMS(KP707106781, T4F, T4E); + T6Y = T6s + T6z; + T6A = T6s - T6z; + Th = ci[WS(rs, 13)]; + T1L = ci[WS(rs, 21)]; + T1M = cr[WS(rs, 26)]; + Tj = cr[WS(rs, 10)]; + T1K = Tg - Th; + Ti = Tg + Th; + T6o = T1L - T1M; + T1N = T1L + T1M; + Tk = ci[WS(rs, 5)]; + T1G = ci[WS(rs, 29)]; + T1H = cr[WS(rs, 18)]; + } + { + E T1F, Tl, T6n, T1I; + T4x = T1K + T1N; + T1O = T1K - T1N; + T1F = Tj - Tk; + Tl = Tj + Tk; + T6n = T1G - T1H; + T1I = T1G + T1H; + Tm = Ti + Tl; + T7Z = Ti - Tl; + T80 = T6n - T6o; + T6p = T6n + T6o; + T4w = T1I - T1F; + T1J = T1F + T1I; + } + } + { + E Tq, T1z, Tp, T6l, T1C, Tr, T1v, T1w; + { + E Tn, To, T1A, T1B; + Tn = ci[WS(rs, 1)]; + To = cr[WS(rs, 14)]; + T1A = ci[WS(rs, 25)]; + T1B = cr[WS(rs, 22)]; + Tq = cr[WS(rs, 6)]; + T1z = Tn - To; + Tp = Tn + To; + T6l = T1A - T1B; + T1C = T1A + T1B; + Tr = ci[WS(rs, 9)]; + T1v = ci[WS(rs, 17)]; + T1w = cr[WS(rs, 30)]; + } + { + E T1u, Ts, T6k, T1x; + T4A = T1z + T1C; + T1D = T1z - T1C; + T1u = Tq - Tr; + Ts = Tq + Tr; + T6k = T1v - T1w; + T1x = T1v + T1w; + Tt = Tp + Ts; + T7X = Tp - Ts; + T7W = T6k - T6l; + T6m = T6k + T6l; + T4z = T1u + T1x; + T1y = T1u - T1x; + } + } + { + E T8r, T8q, T2Z, T1E, T1P, T2Y, T7Y, T81, T50, T51; + T8r = T7X + T7W; + T7Y = T7W - T7X; + T81 = T7Z + T80; + T8q = T7Z - T80; + T6P = Tm - Tt; + Tu = Tm + Tt; + T9a = T81 + T7Y; + T82 = T7Y - T81; + T2Z = FMA(KP414213562, T1y, T1D); + T1E = FNMS(KP414213562, T1D, T1y); + T1P = FMA(KP414213562, T1O, T1J); + T2Y = FNMS(KP414213562, T1J, T1O); + T8X = T8q + T8r; + T8s = T8q - T8r; + T40 = T1P + T1E; + T1Q = T1E - T1P; + T4y = FNMS(KP414213562, T4x, T4w); + T50 = FMA(KP414213562, T4w, T4x); + T51 = FMA(KP414213562, T4z, T4A); + T4B = FNMS(KP414213562, T4A, T4z); + T3M = T2Y + T2Z; + T30 = T2Y - T2Z; + T5w = T50 + T51; + T52 = T50 - T51; + } + } + } + } + { + E T7D, T5K, T4C, T7K, T7J, T7E, T83, T8w, T8t, T8i, T6r, T70, T6X, T6K; + { + E T6q, T8Y, T9e, T9b, T95, T8L, T8Q, T8H, T8M, T8I, T8R; + { + E Tv, T10, T7v, T7y, T7u; + T7D = Tf - Tu; + Tv = Tf + Tu; + T7u = T6p + T6m; + T6q = T6m - T6p; + T5K = T4B - T4y; + T4C = T4y + T4B; + T10 = TK + TZ; + T7K = TK - TZ; + T7J = T7t - T7u; + T7v = T7t + T7u; + T7y = T7w + T7x; + T7E = T7x - T7w; + { + E T9t, T9x, T9p, T9u, T9q, T9y; + { + E T9n, T7z, T9o, T7A, T7q, T7p; + T8Y = FNMS(KP707106781, T8X, T8W); + T9n = FMA(KP707106781, T8X, T8W); + cr[0] = Tv + T10; + T7p = Tv - T10; + ci[0] = T7v + T7y; + T7z = T7v - T7y; + T9o = T9c + T9d; + T9e = T9c - T9d; + T7A = T7s * T7p; + T7q = T7o * T7p; + T9b = FNMS(KP707106781, T9a, T99); + T9t = FMA(KP707106781, T9a, T99); + T9x = FMA(KP923879532, T9o, T9n); + T9p = FNMS(KP923879532, T9o, T9n); + ci[WS(rs, 16)] = FMA(T7o, T7z, T7A); + cr[WS(rs, 16)] = FNMS(T7s, T7z, T7q); + T9u = T94 + T91; + T95 = T91 - T94; + } + T9q = T9m * T9p; + T9y = T3v * T9x; + { + E T8F, T9z, T9v, T8G, T9A, T9w; + T83 = FMA(KP707106781, T82, T7V); + T8F = FNMS(KP707106781, T82, T7V); + T9z = FMA(KP923879532, T9u, T9t); + T9v = FNMS(KP923879532, T9u, T9t); + T8G = T8u + T8v; + T8w = T8u - T8v; + T8t = FMA(KP707106781, T8s, T8p); + T8L = FNMS(KP707106781, T8s, T8p); + T9A = T3v * T9z; + cr[WS(rs, 2)] = FNMS(T3x, T9z, T9y); + T9w = T9m * T9v; + cr[WS(rs, 18)] = FNMS(T9s, T9v, T9q); + T8Q = FMA(KP923879532, T8G, T8F); + T8H = FNMS(KP923879532, T8G, T8F); + ci[WS(rs, 2)] = FMA(T3x, T9x, T9A); + ci[WS(rs, 18)] = FMA(T9s, T9p, T9w); + T8M = T8h + T8a; + T8i = T8a - T8h; + } + T8I = T8E * T8H; + T8R = T8P * T8Q; + } + } + { + E T7f, T7j, T7b, T7g, T7c, T7k; + { + E T79, T8T, T8N, T7a, T8U, T8O; + T6r = T6j + T6q; + T79 = T6j - T6q; + T8T = FMA(KP923879532, T8M, T8L); + T8N = FNMS(KP923879532, T8M, T8L); + T7a = T6Z - T6Y; + T70 = T6Y + T6Z; + T6X = T6P + T6W; + T7f = T6W - T6P; + T8U = T8P * T8T; + cr[WS(rs, 30)] = FNMS(T8S, T8T, T8R); + T8O = T8E * T8N; + cr[WS(rs, 14)] = FNMS(T8K, T8N, T8I); + T7j = FMA(KP707106781, T7a, T79); + T7b = FNMS(KP707106781, T7a, T79); + ci[WS(rs, 30)] = FMA(T8S, T8Q, T8U); + ci[WS(rs, 14)] = FMA(T8K, T8H, T8O); + T7g = T6A - T6J; + T6K = T6A + T6J; + } + T7c = T78 * T7b; + T7k = T5X * T7j; + { + E T97, T9g, T9i, T9j, T9f, T9k, T9h, T96; + { + E T7l, T7h, T7m, T7i; + T7l = FMA(KP707106781, T7g, T7f); + T7h = FNMS(KP707106781, T7g, T7f); + T7m = T5X * T7l; + cr[WS(rs, 12)] = FNMS(T5Z, T7l, T7k); + T7i = T78 * T7h; + cr[WS(rs, 28)] = FNMS(T7e, T7h, T7c); + T9h = FMA(KP923879532, T95, T8Y); + T96 = FNMS(KP923879532, T95, T8Y); + ci[WS(rs, 12)] = FMA(T5Z, T7j, T7m); + ci[WS(rs, 28)] = FMA(T7e, T7b, T7i); + } + T97 = T8V * T96; + T9g = T98 * T96; + T9i = T3G * T9h; + T9j = FMA(KP923879532, T9e, T9b); + T9f = FNMS(KP923879532, T9e, T9b); + T9k = T3J * T9h; + cr[WS(rs, 10)] = FNMS(T3J, T9j, T9i); + ci[WS(rs, 26)] = FMA(T8V, T9f, T9g); + cr[WS(rs, 26)] = FNMS(T98, T9f, T97); + ci[WS(rs, 10)] = FMA(T3G, T9j, T9k); + } + } + } + { + E T31, T3r, T1R, T3m, T33, T32, T3s, T2K, T8z, T8j; + { + E T73, T6L, T75, T71; + T73 = FMA(KP707106781, T6K, T6r); + T6L = FNMS(KP707106781, T6K, T6r); + T75 = FMA(KP707106781, T70, T6X); + T71 = FNMS(KP707106781, T70, T6X); + { + E T76, T74, T72, T6M; + T76 = T3d * T73; + T74 = T3a * T73; + T72 = T6O * T6L; + T6M = T6i * T6L; + ci[WS(rs, 4)] = FMA(T3a, T75, T76); + cr[WS(rs, 4)] = FNMS(T3d, T75, T74); + ci[WS(rs, 20)] = FMA(T6i, T71, T72); + cr[WS(rs, 20)] = FNMS(T6O, T71, T6M); + } + } + { + E T7N, T7F, T7P, T7L; + T7N = T7D + T7E; + T7F = T7D - T7E; + T7P = T7K + T7J; + T7L = T7J - T7K; + { + E T7O, T7G, T7Q, T7M; + T7O = T4p * T7N; + T7G = T7C * T7F; + T7Q = T4p * T7P; + T7M = T7C * T7L; + cr[WS(rs, 8)] = FNMS(T4r, T7P, T7O); + cr[WS(rs, 24)] = FNMS(T7I, T7L, T7G); + ci[WS(rs, 8)] = FMA(T4r, T7N, T7Q); + ci[WS(rs, 24)] = FMA(T7I, T7F, T7M); + } + } + T31 = FMA(KP923879532, T30, T2X); + T3r = FNMS(KP923879532, T30, T2X); + T8z = FMA(KP923879532, T8i, T83); + T8j = FNMS(KP923879532, T8i, T83); + { + E T8B, T8x, T8C, T8A; + T8B = FMA(KP923879532, T8w, T8t); + T8x = FNMS(KP923879532, T8w, T8t); + T8C = T1a * T8z; + T8A = T16 * T8z; + { + E T8y, T8k, T2i, T2J; + T8y = T8m * T8j; + T8k = T7S * T8j; + ci[WS(rs, 6)] = FMA(T16, T8B, T8C); + cr[WS(rs, 6)] = FNMS(T1a, T8B, T8A); + ci[WS(rs, 22)] = FMA(T7S, T8x, T8y); + cr[WS(rs, 22)] = FNMS(T8m, T8x, T8k); + T1R = FMA(KP923879532, T1Q, T1t); + T3m = FNMS(KP923879532, T1Q, T1t); + T33 = FNMS(KP668178637, T28, T2h); + T2i = FMA(KP668178637, T2h, T28); + T2J = FNMS(KP668178637, T2I, T2z); + T32 = FMA(KP668178637, T2z, T2I); + T3s = T2J + T2i; + T2K = T2i - T2J; + } + } + { + E T5l, T53, T5f, T4D, T4K, T4R, T56, T5g; + T5l = FNMS(KP923879532, T52, T4Z); + T53 = FMA(KP923879532, T52, T4Z); + { + E T3t, T3D, T3f, T2L; + T3t = FNMS(KP831469612, T3s, T3r); + T3D = FMA(KP831469612, T3s, T3r); + T3f = FMA(KP831469612, T2K, T1R); + T2L = FNMS(KP831469612, T2K, T1R); + { + E T3n, T34, T3g, T2M; + T3n = T32 + T33; + T34 = T32 - T33; + T3g = T3e * T3f; + T2M = T1c * T2L; + { + E T3o, T3z, T3j, T35; + T3o = FNMS(KP831469612, T3n, T3m); + T3z = FMA(KP831469612, T3n, T3m); + T3j = FMA(KP831469612, T34, T31); + T35 = FNMS(KP831469612, T34, T31); + { + E T3u, T3p, T3E, T3A; + T3u = T3q * T3o; + T3p = T3l * T3o; + T3E = T3C * T3z; + T3A = T3y * T3z; + { + E T3k, T36, T54, T55; + T3k = T3e * T3j; + cr[WS(rs, 5)] = FNMS(T3i, T3j, T3g); + T36 = T1c * T35; + cr[WS(rs, 21)] = FNMS(T2O, T35, T2M); + ci[WS(rs, 13)] = FMA(T3l, T3t, T3u); + cr[WS(rs, 13)] = FNMS(T3q, T3t, T3p); + ci[WS(rs, 29)] = FMA(T3y, T3D, T3E); + cr[WS(rs, 29)] = FNMS(T3C, T3D, T3A); + ci[WS(rs, 5)] = FMA(T3i, T3f, T3k); + ci[WS(rs, 21)] = FMA(T2O, T2L, T36); + T5f = FMA(KP923879532, T4C, T4v); + T4D = FNMS(KP923879532, T4C, T4v); + T4K = FNMS(KP668178637, T4J, T4G); + T54 = FMA(KP668178637, T4G, T4J); + T55 = FMA(KP668178637, T4N, T4Q); + T4R = FNMS(KP668178637, T4Q, T4N); + T56 = T54 - T55; + T5g = T54 + T55; + } + } + } + } + } + { + E T4h, T41, T4c, T3N, T3Q, T3T, T44, T4d; + T4h = FNMS(KP923879532, T40, T3Z); + T41 = FMA(KP923879532, T40, T3Z); + { + E T57, T5b, T5h, T5p; + T57 = FNMS(KP831469612, T56, T53); + T5b = FMA(KP831469612, T56, T53); + T5h = FNMS(KP831469612, T5g, T5f); + T5p = FMA(KP831469612, T5g, T5f); + { + E T5m, T4S, T5i, T5q; + T5m = T4K - T4R; + T4S = T4K + T4R; + T5i = T5e * T5h; + T5q = T17 * T5p; + { + E T5n, T5r, T59, T4T; + T5n = FMA(KP831469612, T5m, T5l); + T5r = FNMS(KP831469612, T5m, T5l); + T59 = FMA(KP831469612, T4S, T4D); + T4T = FNMS(KP831469612, T4S, T4D); + { + E T5o, T5s, T5c, T5a; + T5o = T5e * T5n; + cr[WS(rs, 11)] = FNMS(T5k, T5n, T5i); + T5s = T17 * T5r; + cr[WS(rs, 27)] = FNMS(T1b, T5r, T5q); + T5c = T14 * T59; + T5a = T11 * T59; + { + E T58, T4U, T42, T43; + T58 = T4W * T4T; + T4U = T4s * T4T; + ci[WS(rs, 11)] = FMA(T5k, T5h, T5o); + ci[WS(rs, 27)] = FMA(T1b, T5p, T5s); + ci[WS(rs, 3)] = FMA(T11, T5b, T5c); + cr[WS(rs, 3)] = FNMS(T14, T5b, T5a); + ci[WS(rs, 19)] = FMA(T4s, T57, T58); + cr[WS(rs, 19)] = FNMS(T4W, T57, T4U); + T4c = FNMS(KP923879532, T3M, T3L); + T3N = FMA(KP923879532, T3M, T3L); + T3Q = FNMS(KP198912367, T3P, T3O); + T42 = FMA(KP198912367, T3O, T3P); + T43 = FNMS(KP198912367, T3R, T3S); + T3T = FMA(KP198912367, T3S, T3R); + T44 = T42 + T43; + T4d = T43 - T42; + } + } + } + } + } + T67 = FNMS(KP923879532, T5K, T5J); + T5L = FMA(KP923879532, T5K, T5J); + { + E T45, T49, T4e, T4l; + T45 = FNMS(KP980785280, T44, T41); + T49 = FMA(KP980785280, T44, T41); + T4e = FNMS(KP980785280, T4d, T4c); + T4l = FMA(KP980785280, T4d, T4c); + { + E T4i, T3U, T4f, T4m; + T4i = T3Q - T3T; + T3U = T3Q + T3T; + T4f = T4b * T4e; + T4m = T12 * T4l; + { + E T4j, T4n, T47, T3V; + T4j = FNMS(KP980785280, T4i, T4h); + T4n = FMA(KP980785280, T4i, T4h); + T47 = FMA(KP980785280, T3U, T3N); + T3V = FNMS(KP980785280, T3U, T3N); + { + E T4k, T4o, T4a, T48; + T4k = T4b * T4j; + cr[WS(rs, 25)] = FNMS(T4g, T4j, T4f); + T4o = T12 * T4n; + cr[WS(rs, 9)] = FNMS(T15, T4n, T4m); + T4a = T39 * T47; + T48 = T37 * T47; + { + E T46, T3W, T5M, T5N; + T46 = T3Y * T3V; + T3W = T3K * T3V; + ci[WS(rs, 25)] = FMA(T4g, T4e, T4k); + ci[WS(rs, 9)] = FMA(T15, T4l, T4o); + ci[WS(rs, 1)] = FMA(T37, T49, T4a); + cr[WS(rs, 1)] = FNMS(T39, T49, T48); + ci[WS(rs, 17)] = FMA(T3K, T45, T46); + cr[WS(rs, 17)] = FNMS(T3Y, T45, T3W); + T61 = FMA(KP923879532, T5w, T5v); + T5x = FNMS(KP923879532, T5w, T5v); + T5A = FNMS(KP198912367, T5z, T5y); + T5M = FMA(KP198912367, T5y, T5z); + T5N = FMA(KP198912367, T5B, T5C); + T5D = FNMS(KP198912367, T5C, T5B); + T5O = T5M - T5N; + T62 = T5M + T5N; + } + } + } + } + } + } + } + } + } + } + } + T5V = FMA(KP980785280, T5O, T5L); + T5P = FNMS(KP980785280, T5O, T5L); + { + E T6c, T63, T5E, T68; + T6c = FMA(KP980785280, T62, T61); + T63 = FNMS(KP980785280, T62, T61); + T5E = T5A + T5D; + T68 = T5D - T5A; + { + E T64, T6d, T6f, T69; + T64 = T60 * T63; + T6d = T6b * T6c; + T6f = FNMS(KP980785280, T68, T67); + T69 = FMA(KP980785280, T68, T67); + { + E T5F, T5S, T6a, T6g; + T5F = FMA(KP980785280, T5E, T5x); + T5S = FNMS(KP980785280, T5E, T5x); + T6a = T60 * T69; + cr[WS(rs, 15)] = FNMS(T66, T69, T64); + T6g = T6b * T6f; + cr[WS(rs, 31)] = FNMS(T6e, T6f, T6d); + { + E T5W, T5T, T5Q, T5G; + T5W = T5U * T5S; + T5T = T5R * T5S; + T5Q = T5I * T5F; + T5G = T5u * T5F; + ci[WS(rs, 15)] = FMA(T66, T63, T6a); + ci[WS(rs, 31)] = FMA(T6e, T6c, T6g); + ci[WS(rs, 7)] = FMA(T5R, T5V, T5W); + cr[WS(rs, 7)] = FNMS(T5U, T5V, T5T); + ci[WS(rs, 23)] = FMA(T5u, T5P, T5Q); + cr[WS(rs, 23)] = FNMS(T5I, T5P, T5G); + } + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 27}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 32, "hb2_32", twinstr, &GENUS, {236, 98, 252, 0} }; + +void X(codelet_hb2_32) (planner *p) { + X(khc2hc_register) (p, hb2_32, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 32 -dif -name hb2_32 -include hb.h */ + +/* + * This function contains 488 FP additions, 280 FP multiplications, + * (or, 376 additions, 168 multiplications, 112 fused multiply/add), + * 160 stack variables, 7 constants, and 128 memory accesses + */ +#include "hb.h" + +static void hb2_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) { + E T11, T14, T12, T15, T17, T2z, T2B, T1c, T18, T1d, T1g, T1k, T2F, T2L, T3t; + E T4H, T3h, T3V, T3b, T4v, T4T, T4X, T6t, T71, T6z, T75, T81, T8x, T8f, T8z; + E T2R, T2V, T8p, T8t, T4r, T4t, T53, T69, T3n, T3r, T7P, T7T, T4P, T4R, T6F; + E T6R, T1f, T2X, T1j, T2Y, T1l, T31, T2d, T2Z, T49, T4h, T4c, T4i, T4d, T4n; + E T4f, T4j; + { + E T2P, T3q, T2U, T3l, T2Q, T3p, T2T, T3m, T2D, T3g, T2K, T39, T2E, T3f, T2J; + E T3a; + { + E T13, T1b, T16, T1a; + T11 = W[0]; + T14 = W[1]; + T12 = W[2]; + T15 = W[3]; + T13 = T11 * T12; + T1b = T14 * T12; + T16 = T14 * T15; + T1a = T11 * T15; + T17 = T13 + T16; + T2z = T13 - T16; + T2B = T1a + T1b; + T1c = T1a - T1b; + T18 = W[4]; + T2P = T12 * T18; + T3q = T14 * T18; + T2U = T15 * T18; + T3l = T11 * T18; + T1d = W[5]; + T2Q = T15 * T1d; + T3p = T11 * T1d; + T2T = T12 * T1d; + T3m = T14 * T1d; + T1g = W[6]; + T2D = T11 * T1g; + T3g = T15 * T1g; + T2K = T14 * T1g; + T39 = T12 * T1g; + T1k = W[7]; + T2E = T14 * T1k; + T3f = T12 * T1k; + T2J = T11 * T1k; + T3a = T15 * T1k; + } + T2F = T2D - T2E; + T2L = T2J + T2K; + T3t = T39 - T3a; + T4H = T2J - T2K; + T3h = T3f - T3g; + T3V = T3f + T3g; + T3b = T39 + T3a; + T4v = T2D + T2E; + T4T = FMA(T18, T1g, T1d * T1k); + T4X = FNMS(T1d, T1g, T18 * T1k); + { + E T6r, T6s, T6x, T6y; + T6r = T17 * T1g; + T6s = T1c * T1k; + T6t = T6r - T6s; + T71 = T6r + T6s; + T6x = T17 * T1k; + T6y = T1c * T1g; + T6z = T6x + T6y; + T75 = T6x - T6y; + } + { + E T7Z, T80, T8d, T8e; + T7Z = T2z * T1g; + T80 = T2B * T1k; + T81 = T7Z + T80; + T8x = T7Z - T80; + T8d = T2z * T1k; + T8e = T2B * T1g; + T8f = T8d - T8e; + T8z = T8d + T8e; + T2R = T2P - T2Q; + T2V = T2T + T2U; + T8p = FMA(T2R, T1g, T2V * T1k); + T8t = FNMS(T2V, T1g, T2R * T1k); + } + T4r = T2P + T2Q; + T4t = T2T - T2U; + T53 = FMA(T4r, T1g, T4t * T1k); + T69 = FNMS(T4t, T1g, T4r * T1k); + T3n = T3l + T3m; + T3r = T3p - T3q; + T7P = FMA(T3n, T1g, T3r * T1k); + T7T = FNMS(T3r, T1g, T3n * T1k); + T4P = T3l - T3m; + T4R = T3p + T3q; + T6F = FMA(T4P, T1g, T4R * T1k); + T6R = FNMS(T4R, T1g, T4P * T1k); + { + E T19, T1e, T1h, T1i; + T19 = T17 * T18; + T1e = T1c * T1d; + T1f = T19 + T1e; + T2X = T19 - T1e; + T1h = T17 * T1d; + T1i = T1c * T18; + T1j = T1h - T1i; + T2Y = T1h + T1i; + } + T1l = FMA(T1f, T1g, T1j * T1k); + T31 = FNMS(T2Y, T1g, T2X * T1k); + T2d = FNMS(T1j, T1g, T1f * T1k); + T2Z = FMA(T2X, T1g, T2Y * T1k); + { + E T47, T48, T4a, T4b; + T47 = T2z * T18; + T48 = T2B * T1d; + T49 = T47 - T48; + T4h = T47 + T48; + T4a = T2z * T1d; + T4b = T2B * T18; + T4c = T4a + T4b; + T4i = T4a - T4b; + } + T4d = FMA(T49, T1g, T4c * T1k); + T4n = FNMS(T4i, T1g, T4h * T1k); + T4f = FNMS(T4c, T1g, T49 * T1k); + T4j = FMA(T4h, T1g, T4i * T1k); + } + { + E T56, T7b, T7C, T6c, Tf, T1m, T6f, T7c, T3Y, T4I, T2t, T32, T5d, T7D, T3w; + E T4w, Tu, T2e, T7g, T7F, T7j, T7G, T1B, T33, T3z, T40, T5l, T6i, T5s, T6h; + E T3C, T3Z, TK, T1D, T7v, T86, T7y, T85, T1S, T35, T3O, T4C, T5F, T6J, T5M; + E T6K, T3R, T4D, TZ, T1U, T7o, T89, T7r, T88, T29, T36, T3H, T4z, T5Y, T6M; + E T65, T6N, T3K, T4A; + { + E T3, T54, T2o, T58, T2r, T5b, T6, T6a, Ta, T57, T2h, T6b, T2k, T55, Td; + E T5a; + { + E T1, T2, T2m, T2n; + T1 = cr[0]; + T2 = ci[WS(rs, 15)]; + T3 = T1 + T2; + T54 = T1 - T2; + T2m = ci[WS(rs, 27)]; + T2n = cr[WS(rs, 20)]; + T2o = T2m - T2n; + T58 = T2m + T2n; + } + { + E T2p, T2q, T4, T5; + T2p = ci[WS(rs, 19)]; + T2q = cr[WS(rs, 28)]; + T2r = T2p - T2q; + T5b = T2p + T2q; + T4 = cr[WS(rs, 8)]; + T5 = ci[WS(rs, 7)]; + T6 = T4 + T5; + T6a = T4 - T5; + } + { + E T8, T9, T2f, T2g; + T8 = cr[WS(rs, 4)]; + T9 = ci[WS(rs, 11)]; + Ta = T8 + T9; + T57 = T8 - T9; + T2f = ci[WS(rs, 31)]; + T2g = cr[WS(rs, 16)]; + T2h = T2f - T2g; + T6b = T2f + T2g; + } + { + E T2i, T2j, Tb, Tc; + T2i = ci[WS(rs, 23)]; + T2j = cr[WS(rs, 24)]; + T2k = T2i - T2j; + T55 = T2i + T2j; + Tb = ci[WS(rs, 3)]; + Tc = cr[WS(rs, 12)]; + Td = Tb + Tc; + T5a = Tb - Tc; + } + { + E T7, Te, T2l, T2s; + T56 = T54 - T55; + T7b = T54 + T55; + T7C = T6b - T6a; + T6c = T6a + T6b; + T7 = T3 + T6; + Te = Ta + Td; + Tf = T7 + Te; + T1m = T7 - Te; + { + E T6d, T6e, T3W, T3X; + T6d = T57 + T58; + T6e = T5a + T5b; + T6f = KP707106781 * (T6d - T6e); + T7c = KP707106781 * (T6d + T6e); + T3W = T2h - T2k; + T3X = Ta - Td; + T3Y = T3W - T3X; + T4I = T3X + T3W; + } + T2l = T2h + T2k; + T2s = T2o + T2r; + T2t = T2l - T2s; + T32 = T2l + T2s; + { + E T59, T5c, T3u, T3v; + T59 = T57 - T58; + T5c = T5a - T5b; + T5d = KP707106781 * (T59 + T5c); + T7D = KP707106781 * (T59 - T5c); + T3u = T3 - T6; + T3v = T2r - T2o; + T3w = T3u - T3v; + T4w = T3u + T3v; + } + } + } + { + E Ti, T5p, T1w, T5n, T1z, T5q, Tl, T5m, Tp, T5i, T1p, T5g, T1s, T5j, Ts; + E T5f; + { + E Tg, Th, T1u, T1v; + Tg = cr[WS(rs, 2)]; + Th = ci[WS(rs, 13)]; + Ti = Tg + Th; + T5p = Tg - Th; + T1u = ci[WS(rs, 29)]; + T1v = cr[WS(rs, 18)]; + T1w = T1u - T1v; + T5n = T1u + T1v; + } + { + E T1x, T1y, Tj, Tk; + T1x = ci[WS(rs, 21)]; + T1y = cr[WS(rs, 26)]; + T1z = T1x - T1y; + T5q = T1x + T1y; + Tj = cr[WS(rs, 10)]; + Tk = ci[WS(rs, 5)]; + Tl = Tj + Tk; + T5m = Tj - Tk; + } + { + E Tn, To, T1n, T1o; + Tn = ci[WS(rs, 1)]; + To = cr[WS(rs, 14)]; + Tp = Tn + To; + T5i = Tn - To; + T1n = ci[WS(rs, 17)]; + T1o = cr[WS(rs, 30)]; + T1p = T1n - T1o; + T5g = T1n + T1o; + } + { + E T1q, T1r, Tq, Tr; + T1q = ci[WS(rs, 25)]; + T1r = cr[WS(rs, 22)]; + T1s = T1q - T1r; + T5j = T1q + T1r; + Tq = cr[WS(rs, 6)]; + Tr = ci[WS(rs, 9)]; + Ts = Tq + Tr; + T5f = Tq - Tr; + } + { + E Tm, Tt, T7e, T7f; + Tm = Ti + Tl; + Tt = Tp + Ts; + Tu = Tm + Tt; + T2e = Tm - Tt; + T7e = T5p + T5q; + T7f = T5n - T5m; + T7g = FNMS(KP923879532, T7f, KP382683432 * T7e); + T7F = FMA(KP382683432, T7f, KP923879532 * T7e); + } + { + E T7h, T7i, T1t, T1A; + T7h = T5i + T5j; + T7i = T5f + T5g; + T7j = FNMS(KP923879532, T7i, KP382683432 * T7h); + T7G = FMA(KP382683432, T7i, KP923879532 * T7h); + T1t = T1p + T1s; + T1A = T1w + T1z; + T1B = T1t - T1A; + T33 = T1A + T1t; + } + { + E T3x, T3y, T5h, T5k; + T3x = T1p - T1s; + T3y = Tp - Ts; + T3z = T3x - T3y; + T40 = T3y + T3x; + T5h = T5f - T5g; + T5k = T5i - T5j; + T5l = FNMS(KP382683432, T5k, KP923879532 * T5h); + T6i = FMA(KP382683432, T5h, KP923879532 * T5k); + } + { + E T5o, T5r, T3A, T3B; + T5o = T5m + T5n; + T5r = T5p - T5q; + T5s = FMA(KP923879532, T5o, KP382683432 * T5r); + T6h = FNMS(KP382683432, T5o, KP923879532 * T5r); + T3A = Ti - Tl; + T3B = T1w - T1z; + T3C = T3A + T3B; + T3Z = T3A - T3B; + } + } + { + E Ty, T5v, TB, T5G, T1J, T5w, T1G, T5H, TI, T5K, T1Q, T5D, TF, T5J, T1N; + E T5A; + { + E Tw, Tx, T1E, T1F; + Tw = cr[WS(rs, 1)]; + Tx = ci[WS(rs, 14)]; + Ty = Tw + Tx; + T5v = Tw - Tx; + { + E Tz, TA, T1H, T1I; + Tz = cr[WS(rs, 9)]; + TA = ci[WS(rs, 6)]; + TB = Tz + TA; + T5G = Tz - TA; + T1H = ci[WS(rs, 22)]; + T1I = cr[WS(rs, 25)]; + T1J = T1H - T1I; + T5w = T1H + T1I; + } + T1E = ci[WS(rs, 30)]; + T1F = cr[WS(rs, 17)]; + T1G = T1E - T1F; + T5H = T1E + T1F; + { + E TG, TH, T5B, T1O, T1P, T5C; + TG = ci[WS(rs, 2)]; + TH = cr[WS(rs, 13)]; + T5B = TG - TH; + T1O = ci[WS(rs, 18)]; + T1P = cr[WS(rs, 29)]; + T5C = T1O + T1P; + TI = TG + TH; + T5K = T5B + T5C; + T1Q = T1O - T1P; + T5D = T5B - T5C; + } + { + E TD, TE, T5y, T1L, T1M, T5z; + TD = cr[WS(rs, 5)]; + TE = ci[WS(rs, 10)]; + T5y = TD - TE; + T1L = ci[WS(rs, 26)]; + T1M = cr[WS(rs, 21)]; + T5z = T1L + T1M; + TF = TD + TE; + T5J = T5y + T5z; + T1N = T1L - T1M; + T5A = T5y - T5z; + } + } + { + E TC, TJ, T7t, T7u; + TC = Ty + TB; + TJ = TF + TI; + TK = TC + TJ; + T1D = TC - TJ; + T7t = T5H - T5G; + T7u = KP707106781 * (T5A - T5D); + T7v = T7t + T7u; + T86 = T7t - T7u; + } + { + E T7w, T7x, T1K, T1R; + T7w = T5v + T5w; + T7x = KP707106781 * (T5J + T5K); + T7y = T7w - T7x; + T85 = T7w + T7x; + T1K = T1G + T1J; + T1R = T1N + T1Q; + T1S = T1K - T1R; + T35 = T1K + T1R; + } + { + E T3M, T3N, T5x, T5E; + T3M = T1G - T1J; + T3N = TF - TI; + T3O = T3M - T3N; + T4C = T3N + T3M; + T5x = T5v - T5w; + T5E = KP707106781 * (T5A + T5D); + T5F = T5x - T5E; + T6J = T5x + T5E; + } + { + E T5I, T5L, T3P, T3Q; + T5I = T5G + T5H; + T5L = KP707106781 * (T5J - T5K); + T5M = T5I - T5L; + T6K = T5I + T5L; + T3P = Ty - TB; + T3Q = T1Q - T1N; + T3R = T3P - T3Q; + T4D = T3P + T3Q; + } + } + { + E TN, T5O, TQ, T5Z, T20, T5P, T1X, T60, TX, T63, T27, T5W, TU, T62, T24; + E T5T; + { + E TL, TM, T1V, T1W; + TL = ci[0]; + TM = cr[WS(rs, 15)]; + TN = TL + TM; + T5O = TL - TM; + { + E TO, TP, T1Y, T1Z; + TO = cr[WS(rs, 7)]; + TP = ci[WS(rs, 8)]; + TQ = TO + TP; + T5Z = TO - TP; + T1Y = ci[WS(rs, 24)]; + T1Z = cr[WS(rs, 23)]; + T20 = T1Y - T1Z; + T5P = T1Y + T1Z; + } + T1V = ci[WS(rs, 16)]; + T1W = cr[WS(rs, 31)]; + T1X = T1V - T1W; + T60 = T1V + T1W; + { + E TV, TW, T5U, T25, T26, T5V; + TV = ci[WS(rs, 4)]; + TW = cr[WS(rs, 11)]; + T5U = TV - TW; + T25 = ci[WS(rs, 20)]; + T26 = cr[WS(rs, 27)]; + T5V = T25 + T26; + TX = TV + TW; + T63 = T5U + T5V; + T27 = T25 - T26; + T5W = T5U - T5V; + } + { + E TS, TT, T5R, T22, T23, T5S; + TS = cr[WS(rs, 3)]; + TT = ci[WS(rs, 12)]; + T5R = TS - TT; + T22 = ci[WS(rs, 28)]; + T23 = cr[WS(rs, 19)]; + T5S = T22 + T23; + TU = TS + TT; + T62 = T5R + T5S; + T24 = T22 - T23; + T5T = T5R - T5S; + } + } + { + E TR, TY, T7m, T7n; + TR = TN + TQ; + TY = TU + TX; + TZ = TR + TY; + T1U = TR - TY; + T7m = KP707106781 * (T5T - T5W); + T7n = T5Z + T60; + T7o = T7m - T7n; + T89 = T7n + T7m; + } + { + E T7p, T7q, T21, T28; + T7p = T5O + T5P; + T7q = KP707106781 * (T62 + T63); + T7r = T7p - T7q; + T88 = T7p + T7q; + T21 = T1X + T20; + T28 = T24 + T27; + T29 = T21 - T28; + T36 = T21 + T28; + } + { + E T3F, T3G, T5Q, T5X; + T3F = T1X - T20; + T3G = TU - TX; + T3H = T3F - T3G; + T4z = T3G + T3F; + T5Q = T5O - T5P; + T5X = KP707106781 * (T5T + T5W); + T5Y = T5Q - T5X; + T6M = T5Q + T5X; + } + { + E T61, T64, T3I, T3J; + T61 = T5Z - T60; + T64 = KP707106781 * (T62 - T63); + T65 = T61 - T64; + T6N = T61 + T64; + T3I = TN - TQ; + T3J = T27 - T24; + T3K = T3I - T3J; + T4A = T3I + T3J; + } + } + { + E Tv, T10, T30, T34, T37, T38; + Tv = Tf + Tu; + T10 = TK + TZ; + T30 = Tv - T10; + T34 = T32 + T33; + T37 = T35 + T36; + T38 = T34 - T37; + cr[0] = Tv + T10; + ci[0] = T34 + T37; + cr[WS(rs, 16)] = FNMS(T31, T38, T2Z * T30); + ci[WS(rs, 16)] = FMA(T31, T30, T2Z * T38); + } + { + E T3e, T3o, T3k, T3s; + { + E T3c, T3d, T3i, T3j; + T3c = Tf - Tu; + T3d = T36 - T35; + T3e = T3c - T3d; + T3o = T3c + T3d; + T3i = T32 - T33; + T3j = TK - TZ; + T3k = T3i - T3j; + T3s = T3j + T3i; + } + cr[WS(rs, 24)] = FNMS(T3h, T3k, T3b * T3e); + ci[WS(rs, 24)] = FMA(T3b, T3k, T3h * T3e); + cr[WS(rs, 8)] = FNMS(T3r, T3s, T3n * T3o); + ci[WS(rs, 8)] = FMA(T3n, T3s, T3r * T3o); + } + { + E T1C, T2u, T2M, T2G, T2x, T2H, T2b, T2N; + T1C = T1m + T1B; + T2u = T2e + T2t; + T2M = T2t - T2e; + T2G = T1m - T1B; + { + E T2v, T2w, T1T, T2a; + T2v = T1D + T1S; + T2w = T29 - T1U; + T2x = KP707106781 * (T2v + T2w); + T2H = KP707106781 * (T2w - T2v); + T1T = T1D - T1S; + T2a = T1U + T29; + T2b = KP707106781 * (T1T + T2a); + T2N = KP707106781 * (T1T - T2a); + } + { + E T2c, T2y, T2S, T2W; + T2c = T1C - T2b; + T2y = T2u - T2x; + cr[WS(rs, 20)] = FNMS(T2d, T2y, T1l * T2c); + ci[WS(rs, 20)] = FMA(T2d, T2c, T1l * T2y); + T2S = T2G + T2H; + T2W = T2M + T2N; + cr[WS(rs, 12)] = FNMS(T2V, T2W, T2R * T2S); + ci[WS(rs, 12)] = FMA(T2R, T2W, T2V * T2S); + } + { + E T2A, T2C, T2I, T2O; + T2A = T1C + T2b; + T2C = T2u + T2x; + cr[WS(rs, 4)] = FNMS(T2B, T2C, T2z * T2A); + ci[WS(rs, 4)] = FMA(T2B, T2A, T2z * T2C); + T2I = T2G - T2H; + T2O = T2M - T2N; + cr[WS(rs, 28)] = FNMS(T2L, T2O, T2F * T2I); + ci[WS(rs, 28)] = FMA(T2F, T2O, T2L * T2I); + } + } + { + E T4y, T4U, T4K, T4Y, T4F, T4Z, T4N, T4V, T4x, T4J; + T4x = KP707106781 * (T3Z + T40); + T4y = T4w - T4x; + T4U = T4w + T4x; + T4J = KP707106781 * (T3C + T3z); + T4K = T4I - T4J; + T4Y = T4I + T4J; + { + E T4B, T4E, T4L, T4M; + T4B = FNMS(KP382683432, T4A, KP923879532 * T4z); + T4E = FMA(KP923879532, T4C, KP382683432 * T4D); + T4F = T4B - T4E; + T4Z = T4E + T4B; + T4L = FNMS(KP382683432, T4C, KP923879532 * T4D); + T4M = FMA(KP382683432, T4z, KP923879532 * T4A); + T4N = T4L - T4M; + T4V = T4L + T4M; + } + { + E T4G, T4O, T51, T52; + T4G = T4y - T4F; + T4O = T4K - T4N; + cr[WS(rs, 26)] = FNMS(T4H, T4O, T4v * T4G); + ci[WS(rs, 26)] = FMA(T4H, T4G, T4v * T4O); + T51 = T4U + T4V; + T52 = T4Y + T4Z; + cr[WS(rs, 2)] = FNMS(T1c, T52, T17 * T51); + ci[WS(rs, 2)] = FMA(T17, T52, T1c * T51); + } + { + E T4Q, T4S, T4W, T50; + T4Q = T4y + T4F; + T4S = T4K + T4N; + cr[WS(rs, 10)] = FNMS(T4R, T4S, T4P * T4Q); + ci[WS(rs, 10)] = FMA(T4R, T4Q, T4P * T4S); + T4W = T4U - T4V; + T50 = T4Y - T4Z; + cr[WS(rs, 18)] = FNMS(T4X, T50, T4T * T4W); + ci[WS(rs, 18)] = FMA(T4T, T50, T4X * T4W); + } + } + { + E T3E, T4k, T42, T4o, T3T, T4p, T45, T4l, T3D, T41; + T3D = KP707106781 * (T3z - T3C); + T3E = T3w - T3D; + T4k = T3w + T3D; + T41 = KP707106781 * (T3Z - T40); + T42 = T3Y - T41; + T4o = T3Y + T41; + { + E T3L, T3S, T43, T44; + T3L = FNMS(KP923879532, T3K, KP382683432 * T3H); + T3S = FMA(KP382683432, T3O, KP923879532 * T3R); + T3T = T3L - T3S; + T4p = T3S + T3L; + T43 = FNMS(KP923879532, T3O, KP382683432 * T3R); + T44 = FMA(KP923879532, T3H, KP382683432 * T3K); + T45 = T43 - T44; + T4l = T43 + T44; + } + { + E T3U, T46, T4s, T4u; + T3U = T3E - T3T; + T46 = T42 - T45; + cr[WS(rs, 30)] = FNMS(T3V, T46, T3t * T3U); + ci[WS(rs, 30)] = FMA(T3V, T3U, T3t * T46); + T4s = T4k + T4l; + T4u = T4o + T4p; + cr[WS(rs, 6)] = FNMS(T4t, T4u, T4r * T4s); + ci[WS(rs, 6)] = FMA(T4r, T4u, T4t * T4s); + } + { + E T4e, T4g, T4m, T4q; + T4e = T3E + T3T; + T4g = T42 + T45; + cr[WS(rs, 14)] = FNMS(T4f, T4g, T4d * T4e); + ci[WS(rs, 14)] = FMA(T4f, T4e, T4d * T4g); + T4m = T4k - T4l; + T4q = T4o - T4p; + cr[WS(rs, 22)] = FNMS(T4n, T4q, T4j * T4m); + ci[WS(rs, 22)] = FMA(T4j, T4q, T4n * T4m); + } + } + { + E T6I, T72, T6X, T73, T6P, T77, T6U, T76; + { + E T6G, T6H, T6V, T6W; + T6G = T56 + T5d; + T6H = T6h + T6i; + T6I = T6G + T6H; + T72 = T6G - T6H; + T6V = FMA(KP195090322, T6J, KP980785280 * T6K); + T6W = FNMS(KP195090322, T6M, KP980785280 * T6N); + T6X = T6V + T6W; + T73 = T6W - T6V; + } + { + E T6L, T6O, T6S, T6T; + T6L = FNMS(KP195090322, T6K, KP980785280 * T6J); + T6O = FMA(KP980785280, T6M, KP195090322 * T6N); + T6P = T6L + T6O; + T77 = T6L - T6O; + T6S = T6c + T6f; + T6T = T5s + T5l; + T6U = T6S + T6T; + T76 = T6S - T6T; + } + { + E T6Q, T6Y, T79, T7a; + T6Q = T6I - T6P; + T6Y = T6U - T6X; + cr[WS(rs, 17)] = FNMS(T6R, T6Y, T6F * T6Q); + ci[WS(rs, 17)] = FMA(T6R, T6Q, T6F * T6Y); + T79 = T72 + T73; + T7a = T76 + T77; + cr[WS(rs, 9)] = FNMS(T1d, T7a, T18 * T79); + ci[WS(rs, 9)] = FMA(T18, T7a, T1d * T79); + } + { + E T6Z, T70, T74, T78; + T6Z = T6I + T6P; + T70 = T6U + T6X; + cr[WS(rs, 1)] = FNMS(T14, T70, T11 * T6Z); + ci[WS(rs, 1)] = FMA(T14, T6Z, T11 * T70); + T74 = T72 - T73; + T78 = T76 - T77; + cr[WS(rs, 25)] = FNMS(T75, T78, T71 * T74); + ci[WS(rs, 25)] = FMA(T71, T78, T75 * T74); + } + } + { + E T84, T8q, T8l, T8r, T8b, T8v, T8i, T8u; + { + E T82, T83, T8j, T8k; + T82 = T7b + T7c; + T83 = T7F + T7G; + T84 = T82 - T83; + T8q = T82 + T83; + T8j = FMA(KP195090322, T86, KP980785280 * T85); + T8k = FMA(KP195090322, T89, KP980785280 * T88); + T8l = T8j - T8k; + T8r = T8j + T8k; + } + { + E T87, T8a, T8g, T8h; + T87 = FNMS(KP980785280, T86, KP195090322 * T85); + T8a = FNMS(KP980785280, T89, KP195090322 * T88); + T8b = T87 + T8a; + T8v = T87 - T8a; + T8g = T7C - T7D; + T8h = T7g - T7j; + T8i = T8g + T8h; + T8u = T8g - T8h; + } + { + E T8c, T8m, T8y, T8A; + T8c = T84 - T8b; + T8m = T8i - T8l; + cr[WS(rs, 23)] = FNMS(T8f, T8m, T81 * T8c); + ci[WS(rs, 23)] = FMA(T8f, T8c, T81 * T8m); + T8y = T8q + T8r; + T8A = T8u - T8v; + cr[WS(rs, 31)] = FNMS(T8z, T8A, T8x * T8y); + ci[WS(rs, 31)] = FMA(T8x, T8A, T8z * T8y); + } + { + E T8n, T8o, T8s, T8w; + T8n = T84 + T8b; + T8o = T8i + T8l; + cr[WS(rs, 7)] = FNMS(T1j, T8o, T1f * T8n); + ci[WS(rs, 7)] = FMA(T1j, T8n, T1f * T8o); + T8s = T8q - T8r; + T8w = T8u + T8v; + cr[WS(rs, 15)] = FNMS(T8t, T8w, T8p * T8s); + ci[WS(rs, 15)] = FMA(T8p, T8w, T8t * T8s); + } + } + { + E T5u, T6u, T6n, T6v, T67, T6B, T6k, T6A; + { + E T5e, T5t, T6l, T6m; + T5e = T56 - T5d; + T5t = T5l - T5s; + T5u = T5e + T5t; + T6u = T5e - T5t; + T6l = FMA(KP831469612, T5F, KP555570233 * T5M); + T6m = FNMS(KP831469612, T5Y, KP555570233 * T65); + T6n = T6l + T6m; + T6v = T6m - T6l; + } + { + E T5N, T66, T6g, T6j; + T5N = FNMS(KP831469612, T5M, KP555570233 * T5F); + T66 = FMA(KP555570233, T5Y, KP831469612 * T65); + T67 = T5N + T66; + T6B = T5N - T66; + T6g = T6c - T6f; + T6j = T6h - T6i; + T6k = T6g + T6j; + T6A = T6g - T6j; + } + { + E T68, T6o, T6D, T6E; + T68 = T5u - T67; + T6o = T6k - T6n; + cr[WS(rs, 21)] = FNMS(T69, T6o, T53 * T68); + ci[WS(rs, 21)] = FMA(T69, T68, T53 * T6o); + T6D = T6u + T6v; + T6E = T6A + T6B; + cr[WS(rs, 13)] = FNMS(T4c, T6E, T49 * T6D); + ci[WS(rs, 13)] = FMA(T49, T6E, T4c * T6D); + } + { + E T6p, T6q, T6w, T6C; + T6p = T5u + T67; + T6q = T6k + T6n; + cr[WS(rs, 5)] = FNMS(T4i, T6q, T4h * T6p); + ci[WS(rs, 5)] = FMA(T4i, T6p, T4h * T6q); + T6w = T6u - T6v; + T6C = T6A - T6B; + cr[WS(rs, 29)] = FNMS(T6z, T6C, T6t * T6w); + ci[WS(rs, 29)] = FMA(T6t, T6C, T6z * T6w); + } + } + { + E T7l, T7Q, T7L, T7R, T7A, T7V, T7I, T7U; + { + E T7d, T7k, T7J, T7K; + T7d = T7b - T7c; + T7k = T7g + T7j; + T7l = T7d - T7k; + T7Q = T7d + T7k; + T7J = FNMS(KP555570233, T7v, KP831469612 * T7y); + T7K = FMA(KP555570233, T7o, KP831469612 * T7r); + T7L = T7J - T7K; + T7R = T7J + T7K; + } + { + E T7s, T7z, T7E, T7H; + T7s = FNMS(KP555570233, T7r, KP831469612 * T7o); + T7z = FMA(KP831469612, T7v, KP555570233 * T7y); + T7A = T7s - T7z; + T7V = T7z + T7s; + T7E = T7C + T7D; + T7H = T7F - T7G; + T7I = T7E - T7H; + T7U = T7E + T7H; + } + { + E T7B, T7M, T7X, T7Y; + T7B = T7l - T7A; + T7M = T7I - T7L; + cr[WS(rs, 27)] = FNMS(T1k, T7M, T1g * T7B); + ci[WS(rs, 27)] = FMA(T1k, T7B, T1g * T7M); + T7X = T7Q + T7R; + T7Y = T7U + T7V; + cr[WS(rs, 3)] = FNMS(T15, T7Y, T12 * T7X); + ci[WS(rs, 3)] = FMA(T12, T7Y, T15 * T7X); + } + { + E T7N, T7O, T7S, T7W; + T7N = T7l + T7A; + T7O = T7I + T7L; + cr[WS(rs, 11)] = FNMS(T2Y, T7O, T2X * T7N); + ci[WS(rs, 11)] = FMA(T2Y, T7N, T2X * T7O); + T7S = T7Q - T7R; + T7W = T7U - T7V; + cr[WS(rs, 19)] = FNMS(T7T, T7W, T7P * T7S); + ci[WS(rs, 19)] = FMA(T7P, T7W, T7T * T7S); + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 27}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 32, "hb2_32", twinstr, &GENUS, {376, 168, 112, 0} }; + +void X(codelet_hb2_32) (planner *p) { + X(khc2hc_register) (p, hb2_32, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb2_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb2_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:28 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 4 -dif -name hb2_4 -include hb.h */ + +/* + * This function contains 24 FP additions, 16 FP multiplications, + * (or, 16 additions, 8 multiplications, 8 fused multiply/add), + * 33 stack variables, 0 constants, and 16 memory accesses + */ +#include "hb.h" + +static void hb2_4(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(8, rs)) { + E Tg, Tc, Te, To, Tn; + { + E T7, Tb, T8, Ta; + T7 = W[0]; + Tb = W[3]; + T8 = W[2]; + Ta = W[1]; + { + E Tj, Tm, T3, T6, Tx, Tr, Tz, Tv, Td; + { + E Tu, T4, Tq, T5, Tp, Tt; + { + E Tk, Tl, T1, T2; + { + E Th, Tf, T9, Ti; + Th = ci[WS(rs, 3)]; + Tf = T7 * Tb; + T9 = T7 * T8; + Ti = cr[WS(rs, 2)]; + Tk = ci[WS(rs, 2)]; + Tg = FNMS(Ta, T8, Tf); + Tc = FMA(Ta, Tb, T9); + Tu = Th + Ti; + Tj = Th - Ti; + Tl = cr[WS(rs, 3)]; + } + T1 = cr[0]; + T2 = ci[WS(rs, 1)]; + T4 = cr[WS(rs, 1)]; + Tm = Tk - Tl; + Tq = Tk + Tl; + T5 = ci[0]; + T3 = T1 + T2; + Tp = T1 - T2; + } + Tt = T4 - T5; + T6 = T4 + T5; + Tx = Tp + Tq; + Tr = Tp - Tq; + Tz = Tu - Tt; + Tv = Tt + Tu; + Td = T3 - T6; + } + { + E Ts, Tw, TA, Ty; + cr[0] = T3 + T6; + Ts = T7 * Tr; + ci[0] = Tj + Tm; + Tw = T7 * Tv; + TA = T8 * Tz; + cr[WS(rs, 1)] = FNMS(Ta, Tv, Ts); + Ty = T8 * Tx; + ci[WS(rs, 1)] = FMA(Ta, Tr, Tw); + ci[WS(rs, 3)] = FMA(Tb, Tx, TA); + Te = Tc * Td; + cr[WS(rs, 3)] = FNMS(Tb, Tz, Ty); + To = Tg * Td; + Tn = Tj - Tm; + } + } + } + ci[WS(rs, 2)] = FMA(Tc, Tn, To); + cr[WS(rs, 2)] = FNMS(Tg, Tn, Te); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 4, "hb2_4", twinstr, &GENUS, {16, 8, 8, 0} }; + +void X(codelet_hb2_4) (planner *p) { + X(khc2hc_register) (p, hb2_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 4 -dif -name hb2_4 -include hb.h */ + +/* + * This function contains 24 FP additions, 16 FP multiplications, + * (or, 16 additions, 8 multiplications, 8 fused multiply/add), + * 21 stack variables, 0 constants, and 16 memory accesses + */ +#include "hb.h" + +static void hb2_4(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(8, rs)) { + E T7, T9, T8, Ta, Tb, Td; + T7 = W[0]; + T9 = W[1]; + T8 = W[2]; + Ta = W[3]; + Tb = FMA(T7, T8, T9 * Ta); + Td = FNMS(T9, T8, T7 * Ta); + { + E T3, Tl, T6, To, Tg, Tp, Tj, Tm, Tc, Tk; + { + E T1, T2, T4, T5; + T1 = cr[0]; + T2 = ci[WS(rs, 1)]; + T3 = T1 + T2; + Tl = T1 - T2; + T4 = cr[WS(rs, 1)]; + T5 = ci[0]; + T6 = T4 + T5; + To = T4 - T5; + } + { + E Te, Tf, Th, Ti; + Te = ci[WS(rs, 3)]; + Tf = cr[WS(rs, 2)]; + Tg = Te - Tf; + Tp = Te + Tf; + Th = ci[WS(rs, 2)]; + Ti = cr[WS(rs, 3)]; + Tj = Th - Ti; + Tm = Th + Ti; + } + cr[0] = T3 + T6; + ci[0] = Tg + Tj; + Tc = T3 - T6; + Tk = Tg - Tj; + cr[WS(rs, 2)] = FNMS(Td, Tk, Tb * Tc); + ci[WS(rs, 2)] = FMA(Td, Tc, Tb * Tk); + { + E Tn, Tq, Tr, Ts; + Tn = Tl - Tm; + Tq = To + Tp; + cr[WS(rs, 1)] = FNMS(T9, Tq, T7 * Tn); + ci[WS(rs, 1)] = FMA(T7, Tq, T9 * Tn); + Tr = Tl + Tm; + Ts = Tp - To; + cr[WS(rs, 3)] = FNMS(Ta, Ts, T8 * Tr); + ci[WS(rs, 3)] = FMA(T8, Ts, Ta * Tr); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 4, "hb2_4", twinstr, &GENUS, {16, 8, 8, 0} }; + +void X(codelet_hb2_4) (planner *p) { + X(khc2hc_register) (p, hb2_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb2_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb2_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:29 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 5 -dif -name hb2_5 -include hb.h */ + +/* + * This function contains 44 FP additions, 40 FP multiplications, + * (or, 14 additions, 10 multiplications, 30 fused multiply/add), + * 51 stack variables, 4 constants, and 20 memory accesses + */ +#include "hb.h" + +static void hb2_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(10, rs)) { + E T9, TB, Tz, Tm, T1, TG, TO, TJ, TC, Tn, Tg, To, Tf, Tw, TQ; + E T8, Tb, Th, Ta, Ti, Tp; + T9 = W[0]; + TB = W[3]; + Tz = W[2]; + Tm = W[1]; + { + E T4, Tu, T5, T6; + T1 = cr[0]; + { + E TF, TA, T2, T3; + TF = T9 * TB; + TA = T9 * Tz; + T2 = cr[WS(rs, 1)]; + T3 = ci[0]; + TG = FMA(Tm, Tz, TF); + TO = FNMS(Tm, Tz, TF); + TJ = FMA(Tm, TB, TA); + TC = FNMS(Tm, TB, TA); + T4 = T2 + T3; + Tu = T2 - T3; + T5 = cr[WS(rs, 2)]; + T6 = ci[WS(rs, 1)]; + } + Tn = ci[WS(rs, 4)]; + { + E Td, Te, T7, Tv; + Td = ci[WS(rs, 3)]; + Te = cr[WS(rs, 4)]; + T7 = T5 + T6; + Tv = T5 - T6; + Tg = ci[WS(rs, 2)]; + To = Td - Te; + Tf = Td + Te; + Tw = FMA(KP618033988, Tv, Tu); + TQ = FNMS(KP618033988, Tu, Tv); + T8 = T4 + T7; + Tb = T4 - T7; + Th = cr[WS(rs, 3)]; + } + } + cr[0] = T1 + T8; + Ta = FNMS(KP250000000, T8, T1); + Ti = Tg + Th; + Tp = Tg - Th; + { + E Tc, TK, Ts, Tq; + Tc = FMA(KP559016994, Tb, Ta); + TK = FNMS(KP559016994, Tb, Ta); + Ts = To - Tp; + Tq = To + Tp; + { + E Tj, TL, Tr, TM, TT; + Tj = FMA(KP618033988, Ti, Tf); + TL = FNMS(KP618033988, Tf, Ti); + ci[0] = Tn + Tq; + Tr = FNMS(KP250000000, Tq, Tn); + TM = FMA(KP951056516, TL, TK); + TT = FNMS(KP951056516, TL, TK); + { + E Tk, TD, Tt, TP; + Tk = FNMS(KP951056516, Tj, Tc); + TD = FMA(KP951056516, Tj, Tc); + Tt = FMA(KP559016994, Ts, Tr); + TP = FNMS(KP559016994, Ts, Tr); + { + E TW, TU, TS, TN; + TW = TB * TT; + TU = Tz * TT; + TS = TO * TM; + TN = TJ * TM; + { + E TI, TE, Ty, Tl; + TI = TG * TD; + TE = TC * TD; + Ty = Tm * Tk; + Tl = T9 * Tk; + { + E TR, TV, Tx, TH; + TR = FNMS(KP951056516, TQ, TP); + TV = FMA(KP951056516, TQ, TP); + Tx = FMA(KP951056516, Tw, Tt); + TH = FNMS(KP951056516, Tw, Tt); + ci[WS(rs, 3)] = FMA(Tz, TV, TW); + cr[WS(rs, 3)] = FNMS(TB, TV, TU); + ci[WS(rs, 2)] = FMA(TJ, TR, TS); + cr[WS(rs, 2)] = FNMS(TO, TR, TN); + ci[WS(rs, 4)] = FMA(TC, TH, TI); + cr[WS(rs, 4)] = FNMS(TG, TH, TE); + ci[WS(rs, 1)] = FMA(T9, Tx, Ty); + cr[WS(rs, 1)] = FNMS(Tm, Tx, Tl); + } + } + } + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 5, "hb2_5", twinstr, &GENUS, {14, 10, 30, 0} }; + +void X(codelet_hb2_5) (planner *p) { + X(khc2hc_register) (p, hb2_5, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 5 -dif -name hb2_5 -include hb.h */ + +/* + * This function contains 44 FP additions, 32 FP multiplications, + * (or, 30 additions, 18 multiplications, 14 fused multiply/add), + * 33 stack variables, 4 constants, and 20 memory accesses + */ +#include "hb.h" + +static void hb2_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(10, rs)) { + E Th, Tk, Ti, Tl, Tn, TP, Tx, TN; + { + E Tj, Tw, Tm, Tv; + Th = W[0]; + Tk = W[1]; + Ti = W[2]; + Tl = W[3]; + Tj = Th * Ti; + Tw = Tk * Ti; + Tm = Tk * Tl; + Tv = Th * Tl; + Tn = Tj + Tm; + TP = Tv + Tw; + Tx = Tv - Tw; + TN = Tj - Tm; + } + { + E T1, Tp, TK, TA, T8, To, T9, Tt, TI, TC, Tg, TB; + { + E T4, Ty, T7, Tz; + T1 = cr[0]; + { + E T2, T3, T5, T6; + T2 = cr[WS(rs, 1)]; + T3 = ci[0]; + T4 = T2 + T3; + Ty = T2 - T3; + T5 = cr[WS(rs, 2)]; + T6 = ci[WS(rs, 1)]; + T7 = T5 + T6; + Tz = T5 - T6; + } + Tp = KP559016994 * (T4 - T7); + TK = FMA(KP951056516, Ty, KP587785252 * Tz); + TA = FNMS(KP951056516, Tz, KP587785252 * Ty); + T8 = T4 + T7; + To = FNMS(KP250000000, T8, T1); + } + { + E Tc, Tr, Tf, Ts; + T9 = ci[WS(rs, 4)]; + { + E Ta, Tb, Td, Te; + Ta = ci[WS(rs, 3)]; + Tb = cr[WS(rs, 4)]; + Tc = Ta - Tb; + Tr = Ta + Tb; + Td = ci[WS(rs, 2)]; + Te = cr[WS(rs, 3)]; + Tf = Td - Te; + Ts = Td + Te; + } + Tt = FNMS(KP951056516, Ts, KP587785252 * Tr); + TI = FMA(KP951056516, Tr, KP587785252 * Ts); + TC = KP559016994 * (Tc - Tf); + Tg = Tc + Tf; + TB = FNMS(KP250000000, Tg, T9); + } + cr[0] = T1 + T8; + ci[0] = T9 + Tg; + { + E Tu, TF, TE, TG, Tq, TD; + Tq = To - Tp; + Tu = Tq - Tt; + TF = Tq + Tt; + TD = TB - TC; + TE = TA + TD; + TG = TD - TA; + cr[WS(rs, 2)] = FNMS(Tx, TE, Tn * Tu); + ci[WS(rs, 2)] = FMA(Tn, TE, Tx * Tu); + cr[WS(rs, 3)] = FNMS(Tl, TG, Ti * TF); + ci[WS(rs, 3)] = FMA(Ti, TG, Tl * TF); + } + { + E TJ, TO, TM, TQ, TH, TL; + TH = Tp + To; + TJ = TH - TI; + TO = TH + TI; + TL = TC + TB; + TM = TK + TL; + TQ = TL - TK; + cr[WS(rs, 1)] = FNMS(Tk, TM, Th * TJ); + ci[WS(rs, 1)] = FMA(Th, TM, Tk * TJ); + cr[WS(rs, 4)] = FNMS(TP, TQ, TN * TO); + ci[WS(rs, 4)] = FMA(TN, TQ, TP * TO); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 5, "hb2_5", twinstr, &GENUS, {30, 18, 14, 0} }; + +void X(codelet_hb2_5) (planner *p) { + X(khc2hc_register) (p, hb2_5, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb2_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb2_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:28 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 8 -dif -name hb2_8 -include hb.h */ + +/* + * This function contains 74 FP additions, 50 FP multiplications, + * (or, 44 additions, 20 multiplications, 30 fused multiply/add), + * 77 stack variables, 1 constants, and 32 memory accesses + */ +#include "hb.h" + +static void hb2_8(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { + E Tf, Tg, Tl, Tp, Ti, Tj, T1o, T1u, Tk, T1b, To, T1e, TK, Tq, T13; + E TP, T1p, T7, T1h, T1v, TZ, Tv, Tw, Ta, Tx, T1j, TE, TB, Td, Ty; + E Th, T1n, T1t; + Tf = W[0]; + Tg = W[2]; + Tl = W[4]; + Tp = W[5]; + Ti = W[1]; + Th = Tf * Tg; + T1n = Tf * Tl; + T1t = Tf * Tp; + Tj = W[3]; + { + E Tr, T3, Ts, T1f, TO, TL, T6, Tt; + { + E TM, TN, T4, T5; + { + E T1, Tn, T2, TJ, Tm; + T1 = cr[0]; + T1o = FMA(Ti, Tp, T1n); + T1u = FNMS(Ti, Tl, T1t); + Tk = FMA(Ti, Tj, Th); + T1b = FNMS(Ti, Tj, Th); + Tn = Tf * Tj; + T2 = ci[WS(rs, 3)]; + TM = ci[WS(rs, 7)]; + TJ = Tk * Tp; + Tm = Tk * Tl; + To = FNMS(Ti, Tg, Tn); + T1e = FMA(Ti, Tg, Tn); + Tr = T1 - T2; + T3 = T1 + T2; + TK = FNMS(To, Tl, TJ); + Tq = FMA(To, Tp, Tm); + TN = cr[WS(rs, 4)]; + } + T4 = cr[WS(rs, 2)]; + T5 = ci[WS(rs, 1)]; + Ts = ci[WS(rs, 5)]; + T1f = TM - TN; + TO = TM + TN; + TL = T4 - T5; + T6 = T4 + T5; + Tt = cr[WS(rs, 6)]; + } + { + E TC, TD, Tb, Tc; + { + E T8, T1g, Tu, T9; + T8 = cr[WS(rs, 1)]; + T13 = TO - TL; + TP = TL + TO; + T1p = T3 - T6; + T7 = T3 + T6; + T1g = Ts - Tt; + Tu = Ts + Tt; + T9 = ci[WS(rs, 2)]; + TC = ci[WS(rs, 4)]; + T1h = T1f + T1g; + T1v = T1f - T1g; + TZ = Tr + Tu; + Tv = Tr - Tu; + Tw = T8 - T9; + Ta = T8 + T9; + TD = cr[WS(rs, 7)]; + } + Tb = ci[0]; + Tc = cr[WS(rs, 3)]; + Tx = ci[WS(rs, 6)]; + T1j = TC - TD; + TE = TC + TD; + TB = Tb - Tc; + Td = Tb + Tc; + Ty = cr[WS(rs, 5)]; + } + } + { + E TR, TF, Te, T1w; + TR = TB + TE; + TF = TB - TE; + Te = Ta + Td; + T1w = Ta - Td; + { + E Tz, T1i, T1B, T1x, T1c; + Tz = Tx + Ty; + T1i = Tx - Ty; + T1B = T1w + T1v; + T1x = T1v - T1w; + T1c = T7 - Te; + cr[0] = T7 + Te; + { + E T1k, T1q, TQ, TA; + T1k = T1i + T1j; + T1q = T1j - T1i; + TQ = Tw + Tz; + TA = Tw - Tz; + { + E T1y, T1C, T1m, T1d; + T1y = T1o * T1x; + T1C = Tk * T1B; + T1m = T1e * T1c; + T1d = T1b * T1c; + { + E T1z, T1r, T1l, TG, T14; + T1z = T1p + T1q; + T1r = T1p - T1q; + T1l = T1h - T1k; + ci[0] = T1h + T1k; + TG = TA + TF; + T14 = TA - TF; + { + E T10, TS, T1s, T1A; + T10 = TQ + TR; + TS = TQ - TR; + ci[WS(rs, 6)] = FMA(T1u, T1r, T1y); + T1s = T1o * T1r; + ci[WS(rs, 2)] = FMA(To, T1z, T1C); + T1A = Tk * T1z; + ci[WS(rs, 4)] = FMA(T1b, T1l, T1m); + cr[WS(rs, 4)] = FNMS(T1e, T1l, T1d); + { + E T15, T19, TV, TH; + T15 = FMA(KP707106781, T14, T13); + T19 = FNMS(KP707106781, T14, T13); + TV = FMA(KP707106781, TG, Tv); + TH = FNMS(KP707106781, TG, Tv); + { + E TT, TX, T11, T17; + TT = FNMS(KP707106781, TS, TP); + TX = FMA(KP707106781, TS, TP); + T11 = FNMS(KP707106781, T10, TZ); + T17 = FMA(KP707106781, T10, TZ); + cr[WS(rs, 6)] = FNMS(T1u, T1x, T1s); + cr[WS(rs, 2)] = FNMS(To, T1B, T1A); + { + E T1a, T16, TU, TI; + T1a = Tl * T19; + T16 = Tg * T15; + TU = TK * TH; + TI = Tq * TH; + { + E TY, TW, T18, T12; + TY = Ti * TV; + TW = Tf * TV; + T18 = Tl * T17; + T12 = Tg * T11; + ci[WS(rs, 7)] = FMA(Tp, T17, T1a); + ci[WS(rs, 3)] = FMA(Tj, T11, T16); + ci[WS(rs, 5)] = FMA(Tq, TT, TU); + cr[WS(rs, 5)] = FNMS(TK, TT, TI); + ci[WS(rs, 1)] = FMA(Tf, TX, TY); + cr[WS(rs, 1)] = FNMS(Ti, TX, TW); + cr[WS(rs, 7)] = FNMS(Tp, T19, T18); + cr[WS(rs, 3)] = FNMS(Tj, T15, T12); + } + } + } + } + } + } + } + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 7}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 8, "hb2_8", twinstr, &GENUS, {44, 20, 30, 0} }; + +void X(codelet_hb2_8) (planner *p) { + X(khc2hc_register) (p, hb2_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 8 -dif -name hb2_8 -include hb.h */ + +/* + * This function contains 74 FP additions, 44 FP multiplications, + * (or, 56 additions, 26 multiplications, 18 fused multiply/add), + * 46 stack variables, 1 constants, and 32 memory accesses + */ +#include "hb.h" + +static void hb2_8(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { + E Tf, Ti, Tg, Tj, Tl, Tp, TP, TR, TF, TG, TH, T15, TL, TT; + { + E Th, To, Tk, Tn; + Tf = W[0]; + Ti = W[1]; + Tg = W[2]; + Tj = W[3]; + Th = Tf * Tg; + To = Ti * Tg; + Tk = Ti * Tj; + Tn = Tf * Tj; + Tl = Th - Tk; + Tp = Tn + To; + TP = Th + Tk; + TR = Tn - To; + TF = W[4]; + TG = W[5]; + TH = FMA(Tf, TF, Ti * TG); + T15 = FNMS(TR, TF, TP * TG); + TL = FNMS(Ti, TF, Tf * TG); + TT = FMA(TP, TF, TR * TG); + } + { + E T7, T1f, T1i, Tw, TI, TW, T18, TM, Te, T19, T1a, TD, TJ, TZ, T12; + E TN, Tm, TE; + { + E T3, TU, Tv, TV, T6, T16, Ts, T17; + { + E T1, T2, Tt, Tu; + T1 = cr[0]; + T2 = ci[WS(rs, 3)]; + T3 = T1 + T2; + TU = T1 - T2; + Tt = ci[WS(rs, 5)]; + Tu = cr[WS(rs, 6)]; + Tv = Tt - Tu; + TV = Tt + Tu; + } + { + E T4, T5, Tq, Tr; + T4 = cr[WS(rs, 2)]; + T5 = ci[WS(rs, 1)]; + T6 = T4 + T5; + T16 = T4 - T5; + Tq = ci[WS(rs, 7)]; + Tr = cr[WS(rs, 4)]; + Ts = Tq - Tr; + T17 = Tq + Tr; + } + T7 = T3 + T6; + T1f = TU + TV; + T1i = T17 - T16; + Tw = Ts + Tv; + TI = T3 - T6; + TW = TU - TV; + T18 = T16 + T17; + TM = Ts - Tv; + } + { + E Ta, TX, TC, T11, Td, T10, Tz, TY; + { + E T8, T9, TA, TB; + T8 = cr[WS(rs, 1)]; + T9 = ci[WS(rs, 2)]; + Ta = T8 + T9; + TX = T8 - T9; + TA = ci[WS(rs, 4)]; + TB = cr[WS(rs, 7)]; + TC = TA - TB; + T11 = TA + TB; + } + { + E Tb, Tc, Tx, Ty; + Tb = ci[0]; + Tc = cr[WS(rs, 3)]; + Td = Tb + Tc; + T10 = Tb - Tc; + Tx = ci[WS(rs, 6)]; + Ty = cr[WS(rs, 5)]; + Tz = Tx - Ty; + TY = Tx + Ty; + } + Te = Ta + Td; + T19 = TX + TY; + T1a = T10 + T11; + TD = Tz + TC; + TJ = TC - Tz; + TZ = TX - TY; + T12 = T10 - T11; + TN = Ta - Td; + } + cr[0] = T7 + Te; + ci[0] = Tw + TD; + Tm = T7 - Te; + TE = Tw - TD; + cr[WS(rs, 4)] = FNMS(Tp, TE, Tl * Tm); + ci[WS(rs, 4)] = FMA(Tp, Tm, Tl * TE); + { + E TQ, TS, TK, TO; + TQ = TI + TJ; + TS = TN + TM; + cr[WS(rs, 2)] = FNMS(TR, TS, TP * TQ); + ci[WS(rs, 2)] = FMA(TP, TS, TR * TQ); + TK = TI - TJ; + TO = TM - TN; + cr[WS(rs, 6)] = FNMS(TL, TO, TH * TK); + ci[WS(rs, 6)] = FMA(TH, TO, TL * TK); + } + { + E T1h, T1l, T1k, T1m, T1g, T1j; + T1g = KP707106781 * (T19 + T1a); + T1h = T1f - T1g; + T1l = T1f + T1g; + T1j = KP707106781 * (TZ - T12); + T1k = T1i + T1j; + T1m = T1i - T1j; + cr[WS(rs, 3)] = FNMS(Tj, T1k, Tg * T1h); + ci[WS(rs, 3)] = FMA(Tg, T1k, Tj * T1h); + cr[WS(rs, 7)] = FNMS(TG, T1m, TF * T1l); + ci[WS(rs, 7)] = FMA(TF, T1m, TG * T1l); + } + { + E T14, T1d, T1c, T1e, T13, T1b; + T13 = KP707106781 * (TZ + T12); + T14 = TW - T13; + T1d = TW + T13; + T1b = KP707106781 * (T19 - T1a); + T1c = T18 - T1b; + T1e = T18 + T1b; + cr[WS(rs, 5)] = FNMS(T15, T1c, TT * T14); + ci[WS(rs, 5)] = FMA(T15, T14, TT * T1c); + cr[WS(rs, 1)] = FNMS(Ti, T1e, Tf * T1d); + ci[WS(rs, 1)] = FMA(Ti, T1d, Tf * T1e); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 7}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 8, "hb2_8", twinstr, &GENUS, {56, 26, 18, 0} }; + +void X(codelet_hb2_8) (planner *p) { + X(khc2hc_register) (p, hb2_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,507 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:26 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -dif -name hb_10 -include hb.h */ + +/* + * This function contains 102 FP additions, 72 FP multiplications, + * (or, 48 additions, 18 multiplications, 54 fused multiply/add), + * 71 stack variables, 4 constants, and 40 memory accesses + */ +#include "hb.h" + +static void hb_10(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 18, MAKE_VOLATILE_STRIDE(20, rs)) { + E T21, T1Y, T1X; + { + E T1B, TH, T1g, T3, T1V, T1x, T1G, T1E, TM, TK, T11, TB, T7, T1m, T1J; + E TO, Th, T1h, T6, T8, TF, TG, T1i, T9; + TF = ci[WS(rs, 9)]; + TG = cr[WS(rs, 5)]; + { + E T1u, Tp, Tu, T1s, Tz, T1v, Ts, Tv; + { + E Tx, Ty, Tn, To, Tq, Tr; + Tn = ci[WS(rs, 5)]; + To = cr[WS(rs, 9)]; + Tx = ci[WS(rs, 6)]; + T1B = TF + TG; + TH = TF - TG; + T1u = Tn + To; + Tp = Tn - To; + Ty = cr[WS(rs, 8)]; + Tq = ci[WS(rs, 8)]; + Tr = cr[WS(rs, 6)]; + Tu = ci[WS(rs, 7)]; + T1s = Tx + Ty; + Tz = Tx - Ty; + T1v = Tq + Tr; + Ts = Tq - Tr; + Tv = cr[WS(rs, 7)]; + } + { + E T1, T1w, T1D, TJ, Tt, T1r, Tw, T2; + T1 = cr[0]; + T1w = T1u + T1v; + T1D = T1u - T1v; + TJ = Tp + Ts; + Tt = Tp - Ts; + T1r = Tu + Tv; + Tw = Tu - Tv; + T2 = ci[WS(rs, 4)]; + { + E Tb, Tc, Te, Tf; + Tb = cr[WS(rs, 4)]; + { + E T1t, T1C, TI, TA; + T1t = T1r + T1s; + T1C = T1r - T1s; + TI = Tw + Tz; + TA = Tw - Tz; + T1g = T1 - T2; + T3 = T1 + T2; + T1V = FNMS(KP618033988, T1t, T1w); + T1x = FMA(KP618033988, T1w, T1t); + T1G = T1C - T1D; + T1E = T1C + T1D; + TM = TI - TJ; + TK = TI + TJ; + T11 = FMA(KP618033988, Tt, TA); + TB = FNMS(KP618033988, TA, Tt); + Tc = ci[0]; + } + Te = ci[WS(rs, 3)]; + Tf = cr[WS(rs, 1)]; + { + E T4, T1k, Td, T1l, Tg, T5; + T4 = cr[WS(rs, 2)]; + T1k = Tb - Tc; + Td = Tb + Tc; + T1l = Te - Tf; + Tg = Te + Tf; + T5 = ci[WS(rs, 2)]; + T7 = ci[WS(rs, 1)]; + T1m = T1k + T1l; + T1J = T1k - T1l; + TO = Td - Tg; + Th = Td + Tg; + T1h = T4 - T5; + T6 = T4 + T5; + T8 = cr[WS(rs, 3)]; + } + } + } + } + ci[0] = TH + TK; + T1i = T7 - T8; + T9 = T7 + T8; + { + E T2d, T1F, T29, T1I, TP, T2c, T1p, Tl, T1o, Tk, T2b, T2e, T17, T14, T13; + T2d = T1B + T1E; + T1F = FNMS(KP250000000, T1E, T1B); + { + E T1j, Ta, T1n, Ti, T2a; + T29 = W[8]; + T1I = T1h - T1i; + T1j = T1h + T1i; + TP = T6 - T9; + Ta = T6 + T9; + T2c = W[9]; + T1p = T1j - T1m; + T1n = T1j + T1m; + Tl = Ta - Th; + Ti = Ta + Th; + T1o = FNMS(KP250000000, T1n, T1g); + T2a = T1g + T1n; + cr[0] = T3 + Ti; + Tk = FNMS(KP250000000, Ti, T3); + T2b = T29 * T2a; + T2e = T2c * T2a; + } + { + E T16, TQ, T10, Tm, TL; + T16 = FMA(KP618033988, TO, TP); + TQ = FNMS(KP618033988, TP, TO); + cr[WS(rs, 5)] = FNMS(T2c, T2d, T2b); + ci[WS(rs, 5)] = FMA(T29, T2d, T2e); + T10 = FMA(KP559016994, Tl, Tk); + Tm = FNMS(KP559016994, Tl, Tk); + TL = FNMS(KP250000000, TK, TH); + { + E TE, TU, T12, TR, TX, T1d, T1c, T19, TD, T1e, T1b, TW, TT; + { + E TC, T15, T1a, TS, Tj, TN; + TE = W[3]; + TC = FMA(KP951056516, TB, Tm); + TU = FNMS(KP951056516, TB, Tm); + TN = FNMS(KP559016994, TM, TL); + T15 = FMA(KP559016994, TM, TL); + T12 = FMA(KP951056516, T11, T10); + T1a = FNMS(KP951056516, T11, T10); + TS = TE * TC; + TR = FNMS(KP951056516, TQ, TN); + TX = FMA(KP951056516, TQ, TN); + Tj = W[2]; + T1d = FMA(KP951056516, T16, T15); + T17 = FNMS(KP951056516, T16, T15); + T1c = W[11]; + T19 = W[10]; + ci[WS(rs, 2)] = FMA(Tj, TR, TS); + TD = Tj * TC; + T1e = T1c * T1a; + T1b = T19 * T1a; + } + cr[WS(rs, 2)] = FNMS(TE, TR, TD); + ci[WS(rs, 6)] = FMA(T19, T1d, T1e); + cr[WS(rs, 6)] = FNMS(T1c, T1d, T1b); + TW = W[15]; + TT = W[14]; + { + E TZ, T18, TY, TV; + T14 = W[7]; + TY = TW * TU; + TV = TT * TU; + TZ = W[6]; + T18 = T14 * T12; + ci[WS(rs, 8)] = FMA(TT, TX, TY); + cr[WS(rs, 8)] = FNMS(TW, TX, TV); + T13 = TZ * T12; + ci[WS(rs, 4)] = FMA(TZ, T17, T18); + } + } + } + { + E T20, T1K, T1q, T1U; + T20 = FNMS(KP618033988, T1I, T1J); + T1K = FMA(KP618033988, T1J, T1I); + cr[WS(rs, 4)] = FNMS(T14, T17, T13); + T1q = FMA(KP559016994, T1p, T1o); + T1U = FNMS(KP559016994, T1p, T1o); + { + E T1A, T1O, T1W, T1R, T1L, T27, T26, T23, T1z, T28, T25, T1Q, T1N; + { + E T1y, T1Z, T24, T1M, T1f, T1H; + T1A = W[1]; + T1O = FMA(KP951056516, T1x, T1q); + T1y = FNMS(KP951056516, T1x, T1q); + T1Z = FNMS(KP559016994, T1G, T1F); + T1H = FMA(KP559016994, T1G, T1F); + T24 = FMA(KP951056516, T1V, T1U); + T1W = FNMS(KP951056516, T1V, T1U); + T1M = T1A * T1y; + T1R = FNMS(KP951056516, T1K, T1H); + T1L = FMA(KP951056516, T1K, T1H); + T1f = W[0]; + T21 = FMA(KP951056516, T20, T1Z); + T27 = FNMS(KP951056516, T20, T1Z); + T26 = W[13]; + T23 = W[12]; + ci[WS(rs, 1)] = FMA(T1f, T1L, T1M); + T1z = T1f * T1y; + T28 = T26 * T24; + T25 = T23 * T24; + } + cr[WS(rs, 1)] = FNMS(T1A, T1L, T1z); + ci[WS(rs, 7)] = FMA(T23, T27, T28); + cr[WS(rs, 7)] = FNMS(T26, T27, T25); + T1Q = W[17]; + T1N = W[16]; + { + E T1T, T22, T1S, T1P; + T1Y = W[5]; + T1S = T1Q * T1O; + T1P = T1N * T1O; + T1T = W[4]; + T22 = T1Y * T1W; + ci[WS(rs, 9)] = FMA(T1N, T1R, T1S); + cr[WS(rs, 9)] = FNMS(T1Q, T1R, T1P); + T1X = T1T * T1W; + ci[WS(rs, 3)] = FMA(T1T, T21, T22); + } + } + } + } + } + cr[WS(rs, 3)] = FNMS(T1Y, T21, T1X); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 10}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 10, "hb_10", twinstr, &GENUS, {48, 18, 54, 0} }; + +void X(codelet_hb_10) (planner *p) { + X(khc2hc_register) (p, hb_10, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -dif -name hb_10 -include hb.h */ + +/* + * This function contains 102 FP additions, 60 FP multiplications, + * (or, 72 additions, 30 multiplications, 30 fused multiply/add), + * 41 stack variables, 4 constants, and 40 memory accesses + */ +#include "hb.h" + +static void hb_10(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 18, MAKE_VOLATILE_STRIDE(20, rs)) { + E T3, T18, TE, TF, T1B, T1A, T1f, T1t, Ti, Tl, TJ, T1i, Tt, TA, T1w; + E T1v, T1p, T1E, TM, TO; + { + E T1, T2, TH, TI; + T1 = cr[0]; + T2 = ci[WS(rs, 4)]; + T3 = T1 + T2; + T18 = T1 - T2; + { + E T6, T19, Tg, T1d, T9, T1a, Td, T1c; + { + E T4, T5, Te, Tf; + T4 = cr[WS(rs, 2)]; + T5 = ci[WS(rs, 2)]; + T6 = T4 + T5; + T19 = T4 - T5; + Te = ci[WS(rs, 3)]; + Tf = cr[WS(rs, 1)]; + Tg = Te + Tf; + T1d = Te - Tf; + } + { + E T7, T8, Tb, Tc; + T7 = ci[WS(rs, 1)]; + T8 = cr[WS(rs, 3)]; + T9 = T7 + T8; + T1a = T7 - T8; + Tb = cr[WS(rs, 4)]; + Tc = ci[0]; + Td = Tb + Tc; + T1c = Tb - Tc; + } + TE = T6 - T9; + TF = Td - Tg; + T1B = T1c - T1d; + T1A = T19 - T1a; + { + E T1b, T1e, Ta, Th; + T1b = T19 + T1a; + T1e = T1c + T1d; + T1f = T1b + T1e; + T1t = KP559016994 * (T1b - T1e); + Ta = T6 + T9; + Th = Td + Tg; + Ti = Ta + Th; + Tl = KP559016994 * (Ta - Th); + } + } + TH = ci[WS(rs, 9)]; + TI = cr[WS(rs, 5)]; + TJ = TH - TI; + T1i = TH + TI; + { + E Tp, T1j, Tz, T1n, Ts, T1k, Tw, T1m; + { + E Tn, To, Tx, Ty; + Tn = ci[WS(rs, 7)]; + To = cr[WS(rs, 7)]; + Tp = Tn - To; + T1j = Tn + To; + Tx = ci[WS(rs, 8)]; + Ty = cr[WS(rs, 6)]; + Tz = Tx - Ty; + T1n = Tx + Ty; + } + { + E Tq, Tr, Tu, Tv; + Tq = ci[WS(rs, 6)]; + Tr = cr[WS(rs, 8)]; + Ts = Tq - Tr; + T1k = Tq + Tr; + Tu = ci[WS(rs, 5)]; + Tv = cr[WS(rs, 9)]; + Tw = Tu - Tv; + T1m = Tu + Tv; + } + Tt = Tp - Ts; + TA = Tw - Tz; + T1w = T1m + T1n; + T1v = T1j + T1k; + { + E T1l, T1o, TK, TL; + T1l = T1j - T1k; + T1o = T1m - T1n; + T1p = T1l + T1o; + T1E = KP559016994 * (T1l - T1o); + TK = Tp + Ts; + TL = Tw + Tz; + TM = TK + TL; + TO = KP559016994 * (TK - TL); + } + } + } + cr[0] = T3 + Ti; + ci[0] = TJ + TM; + { + E T1g, T1q, T17, T1h; + T1g = T18 + T1f; + T1q = T1i + T1p; + T17 = W[8]; + T1h = W[9]; + cr[WS(rs, 5)] = FNMS(T1h, T1q, T17 * T1g); + ci[WS(rs, 5)] = FMA(T1h, T1g, T17 * T1q); + } + { + E TB, TG, T11, TX, TP, T10, Tm, TW, TN, Tk; + TB = FNMS(KP951056516, TA, KP587785252 * Tt); + TG = FNMS(KP951056516, TF, KP587785252 * TE); + T11 = FMA(KP951056516, TE, KP587785252 * TF); + TX = FMA(KP951056516, Tt, KP587785252 * TA); + TN = FNMS(KP250000000, TM, TJ); + TP = TN - TO; + T10 = TO + TN; + Tk = FNMS(KP250000000, Ti, T3); + Tm = Tk - Tl; + TW = Tl + Tk; + { + E TC, TQ, Tj, TD; + TC = Tm - TB; + TQ = TG + TP; + Tj = W[2]; + TD = W[3]; + cr[WS(rs, 2)] = FNMS(TD, TQ, Tj * TC); + ci[WS(rs, 2)] = FMA(TD, TC, Tj * TQ); + } + { + E T14, T16, T13, T15; + T14 = TW - TX; + T16 = T11 + T10; + T13 = W[10]; + T15 = W[11]; + cr[WS(rs, 6)] = FNMS(T15, T16, T13 * T14); + ci[WS(rs, 6)] = FMA(T15, T14, T13 * T16); + } + { + E TS, TU, TR, TT; + TS = Tm + TB; + TU = TP - TG; + TR = W[14]; + TT = W[15]; + cr[WS(rs, 8)] = FNMS(TT, TU, TR * TS); + ci[WS(rs, 8)] = FMA(TT, TS, TR * TU); + } + { + E TY, T12, TV, TZ; + TY = TW + TX; + T12 = T10 - T11; + TV = W[6]; + TZ = W[7]; + cr[WS(rs, 4)] = FNMS(TZ, T12, TV * TY); + ci[WS(rs, 4)] = FMA(TZ, TY, TV * T12); + } + } + { + E T1x, T1C, T1Q, T1N, T1F, T1R, T1u, T1M, T1D, T1s; + T1x = FNMS(KP951056516, T1w, KP587785252 * T1v); + T1C = FNMS(KP951056516, T1B, KP587785252 * T1A); + T1Q = FMA(KP951056516, T1A, KP587785252 * T1B); + T1N = FMA(KP951056516, T1v, KP587785252 * T1w); + T1D = FNMS(KP250000000, T1p, T1i); + T1F = T1D - T1E; + T1R = T1E + T1D; + T1s = FNMS(KP250000000, T1f, T18); + T1u = T1s - T1t; + T1M = T1t + T1s; + { + E T1y, T1G, T1r, T1z; + T1y = T1u - T1x; + T1G = T1C + T1F; + T1r = W[12]; + T1z = W[13]; + cr[WS(rs, 7)] = FNMS(T1z, T1G, T1r * T1y); + ci[WS(rs, 7)] = FMA(T1r, T1G, T1z * T1y); + } + { + E T1U, T1W, T1T, T1V; + T1U = T1M + T1N; + T1W = T1R - T1Q; + T1T = W[16]; + T1V = W[17]; + cr[WS(rs, 9)] = FNMS(T1V, T1W, T1T * T1U); + ci[WS(rs, 9)] = FMA(T1T, T1W, T1V * T1U); + } + { + E T1I, T1K, T1H, T1J; + T1I = T1u + T1x; + T1K = T1F - T1C; + T1H = W[4]; + T1J = W[5]; + cr[WS(rs, 3)] = FNMS(T1J, T1K, T1H * T1I); + ci[WS(rs, 3)] = FMA(T1H, T1K, T1J * T1I); + } + { + E T1O, T1S, T1L, T1P; + T1O = T1M - T1N; + T1S = T1Q + T1R; + T1L = W[0]; + T1P = W[1]; + cr[WS(rs, 1)] = FNMS(T1P, T1S, T1L * T1O); + ci[WS(rs, 1)] = FMA(T1L, T1S, T1P * T1O); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 10}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 10, "hb_10", twinstr, &GENUS, {72, 30, 30, 0} }; + +void X(codelet_hb_10) (planner *p) { + X(khc2hc_register) (p, hb_10, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,582 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:26 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hb_12 -include hb.h */ + +/* + * This function contains 118 FP additions, 68 FP multiplications, + * (or, 72 additions, 22 multiplications, 46 fused multiply/add), + * 64 stack variables, 2 constants, and 48 memory accesses + */ +#include "hb.h" + +static void hb_12(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 22, MAKE_VOLATILE_STRIDE(24, rs)) { + E T1U, T1X, T1W, T1Y, T1V; + { + E T18, T20, T2a, T1s, T21, T1b, T29, T1p, TO, T11, To, Tb, Tg, T23, T1f; + E Ty, Tl, Tt, T1z, T2d, T1i, T24, T1w, T2c; + { + E T5, TN, Ta, TI; + { + E T1, TE, TM, T6, TJ, T1o, T4, T17, TH, TK, T7, T8; + T1 = cr[0]; + TE = ci[WS(rs, 11)]; + TM = cr[WS(rs, 6)]; + T6 = ci[WS(rs, 5)]; + { + E T2, T3, TF, TG; + T2 = cr[WS(rs, 4)]; + T3 = ci[WS(rs, 3)]; + TF = ci[WS(rs, 7)]; + TG = cr[WS(rs, 8)]; + TJ = ci[WS(rs, 9)]; + T1o = T2 - T3; + T4 = T2 + T3; + T17 = TF + TG; + TH = TF - TG; + TK = cr[WS(rs, 10)]; + T7 = ci[WS(rs, 1)]; + T8 = cr[WS(rs, 2)]; + } + { + E T1a, T1r, T1q, T19, TL, T9, T16, T1n; + T5 = T1 + T4; + T16 = FNMS(KP500000000, T4, T1); + T1a = TJ + TK; + TL = TJ - TK; + T1r = T7 - T8; + T9 = T7 + T8; + T18 = FNMS(KP866025403, T17, T16); + T20 = FMA(KP866025403, T17, T16); + T1q = FMA(KP500000000, TL, TM); + TN = TL - TM; + Ta = T6 + T9; + T19 = FNMS(KP500000000, T9, T6); + T1n = FNMS(KP500000000, TH, TE); + TI = TE + TH; + T2a = FMA(KP866025403, T1r, T1q); + T1s = FNMS(KP866025403, T1r, T1q); + T21 = FNMS(KP866025403, T1a, T19); + T1b = FMA(KP866025403, T1a, T19); + T29 = FNMS(KP866025403, T1o, T1n); + T1p = FMA(KP866025403, T1o, T1n); + } + } + { + E Tc, Tp, Tx, Th, Tu, Tf, T1v, Ts, T1e, Tv, Ti, Tj; + Tc = cr[WS(rs, 3)]; + TO = TI - TN; + T11 = TI + TN; + Tp = ci[WS(rs, 8)]; + To = T5 - Ta; + Tb = T5 + Ta; + Tx = cr[WS(rs, 9)]; + Th = ci[WS(rs, 2)]; + { + E Td, Te, Tq, Tr; + Td = ci[WS(rs, 4)]; + Te = ci[0]; + Tq = cr[WS(rs, 7)]; + Tr = cr[WS(rs, 11)]; + Tu = ci[WS(rs, 10)]; + Tf = Td + Te; + T1v = Td - Te; + Ts = Tq + Tr; + T1e = Tq - Tr; + Tv = ci[WS(rs, 6)]; + Ti = cr[WS(rs, 1)]; + Tj = cr[WS(rs, 5)]; + } + { + E T1h, T1y, T1x, T1g, Tw, Tk, T1d, T1u; + T1d = FNMS(KP500000000, Tf, Tc); + Tg = Tc + Tf; + Tw = Tu + Tv; + T1h = Tv - Tu; + Tk = Ti + Tj; + T1y = Ti - Tj; + T23 = FNMS(KP866025403, T1e, T1d); + T1f = FMA(KP866025403, T1e, T1d); + Ty = Tw - Tx; + T1x = FMA(KP500000000, Tw, Tx); + T1g = FNMS(KP500000000, Tk, Th); + Tl = Th + Tk; + Tt = Tp - Ts; + T1u = FMA(KP500000000, Ts, Tp); + T1z = FNMS(KP866025403, T1y, T1x); + T2d = FMA(KP866025403, T1y, T1x); + T1i = FMA(KP866025403, T1h, T1g); + T24 = FNMS(KP866025403, T1h, T1g); + T1w = FMA(KP866025403, T1v, T1u); + T2c = FNMS(KP866025403, T1v, T1u); + } + } + } + { + E TY, T13, TX, T10; + { + E Tn, T12, TC, Tm, TD, TS, TA, Tz; + Tn = W[16]; + T12 = Tt + Ty; + Tz = Tt - Ty; + TC = W[17]; + Tm = Tg + Tl; + TD = Tg - Tl; + TS = To + Tz; + TA = To - Tz; + { + E TV, TU, TW, TT; + { + E TQ, TR, TP, TB; + TV = TO - TD; + TP = TD + TO; + cr[0] = Tb + Tm; + TB = Tn * TA; + TQ = Tn * TP; + TR = W[4]; + cr[WS(rs, 9)] = FNMS(TC, TP, TB); + TU = W[5]; + ci[WS(rs, 9)] = FMA(TC, TA, TQ); + TW = TR * TV; + TT = TR * TS; + } + ci[WS(rs, 3)] = FMA(TU, TS, TW); + cr[WS(rs, 3)] = FNMS(TU, TV, TT); + TY = Tb - Tm; + T13 = T11 - T12; + TX = W[10]; + T10 = W[11]; + ci[0] = T11 + T12; + } + } + { + E T1K, T1Q, T1P, T1L, T2o, T2u, T2t, T2p; + { + E T1E, T1D, T1H, T1F, T1G, T1t, T1k, T1A; + { + E T1c, TZ, T14, T1j; + T1K = T18 - T1b; + T1c = T18 + T1b; + TZ = TX * TY; + T14 = T10 * TY; + T1j = T1f + T1i; + T1Q = T1f - T1i; + T1P = T1p + T1s; + T1t = T1p - T1s; + cr[WS(rs, 6)] = FNMS(T10, T13, TZ); + ci[WS(rs, 6)] = FMA(TX, T13, T14); + T1E = T1c + T1j; + T1k = T1c - T1j; + T1A = T1w - T1z; + T1L = T1w + T1z; + } + { + E T15, T1m, T1B, T1l, T1C; + T15 = W[18]; + T1m = W[19]; + T1D = W[6]; + T1H = T1t + T1A; + T1B = T1t - T1A; + T1l = T15 * T1k; + T1C = T1m * T1k; + T1F = T1D * T1E; + T1G = W[7]; + cr[WS(rs, 10)] = FNMS(T1m, T1B, T1l); + ci[WS(rs, 10)] = FMA(T15, T1B, T1C); + } + { + E T26, T2i, T2l, T2f, T1Z, T28; + { + E T22, T1I, T25, T2b, T2e; + T22 = T20 + T21; + T2o = T20 - T21; + cr[WS(rs, 4)] = FNMS(T1G, T1H, T1F); + T1I = T1G * T1E; + T2u = T23 - T24; + T25 = T23 + T24; + T2b = T29 - T2a; + T2t = T29 + T2a; + T2p = T2c + T2d; + T2e = T2c - T2d; + ci[WS(rs, 4)] = FMA(T1D, T1H, T1I); + T26 = T22 - T25; + T2i = T22 + T25; + T2l = T2b + T2e; + T2f = T2b - T2e; + } + T1Z = W[2]; + T28 = W[3]; + { + E T2h, T2k, T27, T2g, T2j, T2m; + T2h = W[14]; + T2k = W[15]; + T27 = T1Z * T26; + T2g = T28 * T26; + T2j = T2h * T2i; + T2m = T2k * T2i; + cr[WS(rs, 2)] = FNMS(T28, T2f, T27); + ci[WS(rs, 2)] = FMA(T1Z, T2f, T2g); + cr[WS(rs, 8)] = FNMS(T2k, T2l, T2j); + ci[WS(rs, 8)] = FMA(T2h, T2l, T2m); + } + } + } + { + E T2y, T2B, T2A, T2C, T2z; + { + E T2n, T2q, T2v, T2s, T2r, T2x, T2w; + T2n = W[8]; + T2y = T2o + T2p; + T2q = T2o - T2p; + T2B = T2t - T2u; + T2v = T2t + T2u; + T2s = W[9]; + T2r = T2n * T2q; + T2x = W[20]; + T2w = T2n * T2v; + T2A = W[21]; + cr[WS(rs, 5)] = FNMS(T2s, T2v, T2r); + T2C = T2x * T2B; + T2z = T2x * T2y; + ci[WS(rs, 5)] = FMA(T2s, T2q, T2w); + } + ci[WS(rs, 11)] = FMA(T2A, T2y, T2C); + cr[WS(rs, 11)] = FNMS(T2A, T2B, T2z); + { + E T1J, T1M, T1R, T1O, T1N, T1T, T1S; + T1J = W[0]; + T1U = T1K + T1L; + T1M = T1K - T1L; + T1X = T1P - T1Q; + T1R = T1P + T1Q; + T1O = W[1]; + T1N = T1J * T1M; + T1T = W[12]; + T1S = T1J * T1R; + T1W = W[13]; + cr[WS(rs, 1)] = FNMS(T1O, T1R, T1N); + T1Y = T1T * T1X; + T1V = T1T * T1U; + ci[WS(rs, 1)] = FMA(T1O, T1M, T1S); + } + } + } + } + } + ci[WS(rs, 7)] = FMA(T1W, T1U, T1Y); + cr[WS(rs, 7)] = FNMS(T1W, T1X, T1V); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 12}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 12, "hb_12", twinstr, &GENUS, {72, 22, 46, 0} }; + +void X(codelet_hb_12) (planner *p) { + X(khc2hc_register) (p, hb_12, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hb_12 -include hb.h */ + +/* + * This function contains 118 FP additions, 60 FP multiplications, + * (or, 88 additions, 30 multiplications, 30 fused multiply/add), + * 39 stack variables, 2 constants, and 48 memory accesses + */ +#include "hb.h" + +static void hb_12(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 22, MAKE_VOLATILE_STRIDE(24, rs)) { + E T5, TH, T12, T1M, T1i, T1U, Tg, Tt, T19, T1X, T1p, T1P, Ta, TM, T15; + E T1N, T1l, T1V, Tl, Ty, T1c, T1Y, T1s, T1Q; + { + E T1, TD, T4, T1g, TG, T11, T10, T1h; + T1 = cr[0]; + TD = ci[WS(rs, 11)]; + { + E T2, T3, TE, TF; + T2 = cr[WS(rs, 4)]; + T3 = ci[WS(rs, 3)]; + T4 = T2 + T3; + T1g = KP866025403 * (T2 - T3); + TE = ci[WS(rs, 7)]; + TF = cr[WS(rs, 8)]; + TG = TE - TF; + T11 = KP866025403 * (TE + TF); + } + T5 = T1 + T4; + TH = TD + TG; + T10 = FNMS(KP500000000, T4, T1); + T12 = T10 - T11; + T1M = T10 + T11; + T1h = FNMS(KP500000000, TG, TD); + T1i = T1g + T1h; + T1U = T1h - T1g; + } + { + E Tc, Tp, Tf, T17, Ts, T1o, T18, T1n; + Tc = cr[WS(rs, 3)]; + Tp = ci[WS(rs, 8)]; + { + E Td, Te, Tq, Tr; + Td = ci[WS(rs, 4)]; + Te = ci[0]; + Tf = Td + Te; + T17 = KP866025403 * (Td - Te); + Tq = cr[WS(rs, 7)]; + Tr = cr[WS(rs, 11)]; + Ts = Tq + Tr; + T1o = KP866025403 * (Tq - Tr); + } + Tg = Tc + Tf; + Tt = Tp - Ts; + T18 = FMA(KP500000000, Ts, Tp); + T19 = T17 + T18; + T1X = T18 - T17; + T1n = FNMS(KP500000000, Tf, Tc); + T1p = T1n + T1o; + T1P = T1n - T1o; + } + { + E T6, TL, T9, T1j, TK, T14, T13, T1k; + T6 = ci[WS(rs, 5)]; + TL = cr[WS(rs, 6)]; + { + E T7, T8, TI, TJ; + T7 = ci[WS(rs, 1)]; + T8 = cr[WS(rs, 2)]; + T9 = T7 + T8; + T1j = KP866025403 * (T7 - T8); + TI = ci[WS(rs, 9)]; + TJ = cr[WS(rs, 10)]; + TK = TI - TJ; + T14 = KP866025403 * (TI + TJ); + } + Ta = T6 + T9; + TM = TK - TL; + T13 = FNMS(KP500000000, T9, T6); + T15 = T13 + T14; + T1N = T13 - T14; + T1k = FMA(KP500000000, TK, TL); + T1l = T1j - T1k; + T1V = T1j + T1k; + } + { + E Th, Tx, Tk, T1a, Tw, T1r, T1b, T1q; + Th = ci[WS(rs, 2)]; + Tx = cr[WS(rs, 9)]; + { + E Ti, Tj, Tu, Tv; + Ti = cr[WS(rs, 1)]; + Tj = cr[WS(rs, 5)]; + Tk = Ti + Tj; + T1a = KP866025403 * (Ti - Tj); + Tu = ci[WS(rs, 10)]; + Tv = ci[WS(rs, 6)]; + Tw = Tu + Tv; + T1r = KP866025403 * (Tv - Tu); + } + Tl = Th + Tk; + Ty = Tw - Tx; + T1b = FMA(KP500000000, Tw, Tx); + T1c = T1a - T1b; + T1Y = T1a + T1b; + T1q = FNMS(KP500000000, Tk, Th); + T1s = T1q + T1r; + T1Q = T1q - T1r; + } + { + E Tb, Tm, TU, TW, TX, TY, TT, TV; + Tb = T5 + Ta; + Tm = Tg + Tl; + TU = Tb - Tm; + TW = TH + TM; + TX = Tt + Ty; + TY = TW - TX; + cr[0] = Tb + Tm; + ci[0] = TW + TX; + TT = W[10]; + TV = W[11]; + cr[WS(rs, 6)] = FNMS(TV, TY, TT * TU); + ci[WS(rs, 6)] = FMA(TV, TU, TT * TY); + } + { + E TA, TQ, TO, TS; + { + E To, Tz, TC, TN; + To = T5 - Ta; + Tz = Tt - Ty; + TA = To - Tz; + TQ = To + Tz; + TC = Tg - Tl; + TN = TH - TM; + TO = TC + TN; + TS = TN - TC; + } + { + E Tn, TB, TP, TR; + Tn = W[16]; + TB = W[17]; + cr[WS(rs, 9)] = FNMS(TB, TO, Tn * TA); + ci[WS(rs, 9)] = FMA(Tn, TO, TB * TA); + TP = W[4]; + TR = W[5]; + cr[WS(rs, 3)] = FNMS(TR, TS, TP * TQ); + ci[WS(rs, 3)] = FMA(TP, TS, TR * TQ); + } + } + { + E T28, T2e, T2c, T2g; + { + E T26, T27, T2a, T2b; + T26 = T1M - T1N; + T27 = T1X + T1Y; + T28 = T26 - T27; + T2e = T26 + T27; + T2a = T1U + T1V; + T2b = T1P - T1Q; + T2c = T2a + T2b; + T2g = T2a - T2b; + } + { + E T25, T29, T2d, T2f; + T25 = W[8]; + T29 = W[9]; + cr[WS(rs, 5)] = FNMS(T29, T2c, T25 * T28); + ci[WS(rs, 5)] = FMA(T25, T2c, T29 * T28); + T2d = W[20]; + T2f = W[21]; + cr[WS(rs, 11)] = FNMS(T2f, T2g, T2d * T2e); + ci[WS(rs, 11)] = FMA(T2d, T2g, T2f * T2e); + } + } + { + E T1S, T22, T20, T24; + { + E T1O, T1R, T1W, T1Z; + T1O = T1M + T1N; + T1R = T1P + T1Q; + T1S = T1O - T1R; + T22 = T1O + T1R; + T1W = T1U - T1V; + T1Z = T1X - T1Y; + T20 = T1W - T1Z; + T24 = T1W + T1Z; + } + { + E T1L, T1T, T21, T23; + T1L = W[2]; + T1T = W[3]; + cr[WS(rs, 2)] = FNMS(T1T, T20, T1L * T1S); + ci[WS(rs, 2)] = FMA(T1T, T1S, T1L * T20); + T21 = W[14]; + T23 = W[15]; + cr[WS(rs, 8)] = FNMS(T23, T24, T21 * T22); + ci[WS(rs, 8)] = FMA(T23, T22, T21 * T24); + } + } + { + E T1C, T1I, T1G, T1K; + { + E T1A, T1B, T1E, T1F; + T1A = T12 + T15; + T1B = T1p + T1s; + T1C = T1A - T1B; + T1I = T1A + T1B; + T1E = T1i + T1l; + T1F = T19 + T1c; + T1G = T1E - T1F; + T1K = T1E + T1F; + } + { + E T1z, T1D, T1H, T1J; + T1z = W[18]; + T1D = W[19]; + cr[WS(rs, 10)] = FNMS(T1D, T1G, T1z * T1C); + ci[WS(rs, 10)] = FMA(T1D, T1C, T1z * T1G); + T1H = W[6]; + T1J = W[7]; + cr[WS(rs, 4)] = FNMS(T1J, T1K, T1H * T1I); + ci[WS(rs, 4)] = FMA(T1J, T1I, T1H * T1K); + } + } + { + E T1e, T1w, T1u, T1y; + { + E T16, T1d, T1m, T1t; + T16 = T12 - T15; + T1d = T19 - T1c; + T1e = T16 - T1d; + T1w = T16 + T1d; + T1m = T1i - T1l; + T1t = T1p - T1s; + T1u = T1m + T1t; + T1y = T1m - T1t; + } + { + E TZ, T1f, T1v, T1x; + TZ = W[0]; + T1f = W[1]; + cr[WS(rs, 1)] = FNMS(T1f, T1u, TZ * T1e); + ci[WS(rs, 1)] = FMA(TZ, T1u, T1f * T1e); + T1v = W[12]; + T1x = W[13]; + cr[WS(rs, 7)] = FNMS(T1x, T1y, T1v * T1w); + ci[WS(rs, 7)] = FMA(T1v, T1y, T1x * T1w); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 12}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 12, "hb_12", twinstr, &GENUS, {88, 30, 30, 0} }; + +void X(codelet_hb_12) (planner *p) { + X(khc2hc_register) (p, hb_12, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,800 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:26 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 15 -dif -name hb_15 -include hb.h */ + +/* + * This function contains 184 FP additions, 140 FP multiplications, + * (or, 72 additions, 28 multiplications, 112 fused multiply/add), + * 93 stack variables, 6 constants, and 60 memory accesses + */ +#include "hb.h" + +static void hb_15(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 28); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 28, MAKE_VOLATILE_STRIDE(30, rs)) { + E T3v, T3u, T3r, T3w, T3t; + { + E T5, T11, T1C, T2U, T2f, T3f, T19, T18, TS, TH, T14, T16, T3g, T3a, Ts; + E Tv, T37, T3h, T28, T2h, T1M, T21, T2g, T3n, T2X, T1P, T30, T3m, T1J, T2m; + { + E T1, TX, T2, T3, TY, TZ; + T1 = cr[0]; + TX = ci[WS(rs, 14)]; + T2 = cr[WS(rs, 5)]; + T3 = ci[WS(rs, 4)]; + TY = ci[WS(rs, 9)]; + TZ = cr[WS(rs, 10)]; + { + E T1W, T23, T1D, Ta, Tl, T1K, T1Z, T1H, T1G, Tf, TR, T1Y, T26, TI, T1O; + E T1N, Tq, TG, T25, Tx, Ty, Tz, TL, T1E; + { + E Tb, TQ, TN, TO, Te; + { + E T6, Th, Ti, Tj, T9, Tc, Td, Tk; + { + E T7, T8, T2e, T4; + T6 = cr[WS(rs, 3)]; + T2e = T2 - T3; + T4 = T2 + T3; + { + E T1B, T10, T1A, T2d; + T1B = TY + TZ; + T10 = TY - TZ; + T7 = ci[WS(rs, 6)]; + T5 = T1 + T4; + T1A = FNMS(KP500000000, T4, T1); + T11 = TX + T10; + T2d = FNMS(KP500000000, T10, TX); + T1C = FNMS(KP866025403, T1B, T1A); + T2U = FMA(KP866025403, T1B, T1A); + T2f = FMA(KP866025403, T2e, T2d); + T3f = FNMS(KP866025403, T2e, T2d); + T8 = ci[WS(rs, 1)]; + } + Th = cr[WS(rs, 6)]; + Ti = ci[WS(rs, 3)]; + Tj = cr[WS(rs, 1)]; + T9 = T7 + T8; + T1W = T7 - T8; + } + Tb = ci[WS(rs, 2)]; + T23 = Ti - Tj; + Tk = Ti + Tj; + T1D = FNMS(KP500000000, T9, T6); + Ta = T6 + T9; + Tc = cr[WS(rs, 2)]; + Tl = Th + Tk; + T1K = FNMS(KP500000000, Tk, Th); + Td = cr[WS(rs, 7)]; + TQ = cr[WS(rs, 12)]; + TN = ci[WS(rs, 12)]; + TO = ci[WS(rs, 7)]; + Te = Tc + Td; + T1Z = Tc - Td; + } + { + E Tm, TF, TC, TD, Tp, Tn, To, TP, TJ, TK, TE; + Tm = ci[WS(rs, 5)]; + T1H = TO - TN; + TP = TN + TO; + T1G = FNMS(KP500000000, Te, Tb); + Tf = Tb + Te; + Tn = ci[0]; + TR = TP - TQ; + T1Y = FMA(KP500000000, TP, TQ); + To = cr[WS(rs, 4)]; + TF = cr[WS(rs, 9)]; + TC = ci[WS(rs, 10)]; + TD = cr[WS(rs, 14)]; + Tp = Tn + To; + T26 = Tn - To; + TI = ci[WS(rs, 11)]; + T1O = TC + TD; + TE = TC - TD; + T1N = FNMS(KP500000000, Tp, Tm); + Tq = Tm + Tp; + TJ = cr[WS(rs, 8)]; + TG = TE - TF; + T25 = FMA(KP500000000, TE, TF); + TK = cr[WS(rs, 13)]; + Tx = ci[WS(rs, 8)]; + Ty = ci[WS(rs, 13)]; + Tz = cr[WS(rs, 11)]; + TL = TJ + TK; + T1E = TJ - TK; + } + } + { + E Tg, T1L, Tr, T22, T12, T1X, T38, T13, T39, T20; + { + E TA, T1V, TM, TB; + Tg = Ta + Tf; + T19 = Ta - Tf; + T1L = Ty + Tz; + TA = Ty - Tz; + T1V = FMA(KP500000000, TL, TI); + TM = TI - TL; + T18 = Tl - Tq; + Tr = Tl + Tq; + TB = Tx + TA; + T22 = FNMS(KP500000000, TA, Tx); + T12 = TM + TR; + TS = TM - TR; + T1X = FMA(KP866025403, T1W, T1V); + T38 = FNMS(KP866025403, T1W, T1V); + T13 = TB + TG; + TH = TB - TG; + T39 = FMA(KP866025403, T1Z, T1Y); + T20 = FNMS(KP866025403, T1Z, T1Y); + } + { + E T35, T24, T27, T36; + T14 = T12 + T13; + T16 = T12 - T13; + T3g = T38 - T39; + T3a = T38 + T39; + T35 = FNMS(KP866025403, T23, T22); + T24 = FMA(KP866025403, T23, T22); + Ts = Tg + Tr; + Tv = Tg - Tr; + T27 = FNMS(KP866025403, T26, T25); + T36 = FMA(KP866025403, T26, T25); + T37 = T35 + T36; + T3h = T35 - T36; + T28 = T24 + T27; + T2h = T24 - T27; + { + E T1F, T1I, T2Y, T2Z, T2V, T2W; + T2V = FNMS(KP866025403, T1E, T1D); + T1F = FMA(KP866025403, T1E, T1D); + T1I = FMA(KP866025403, T1H, T1G); + T2W = FNMS(KP866025403, T1H, T1G); + T2Y = FNMS(KP866025403, T1L, T1K); + T1M = FMA(KP866025403, T1L, T1K); + T21 = T1X + T20; + T2g = T1X - T20; + T3n = T2V - T2W; + T2X = T2V + T2W; + T2Z = FNMS(KP866025403, T1O, T1N); + T1P = FMA(KP866025403, T1O, T1N); + T30 = T2Y + T2Z; + T3m = T2Y - T2Z; + T1J = T1F + T1I; + T2m = T1F - T1I; + } + } + } + } + } + { + E T31, T33, T2n, T1Q; + cr[0] = T5 + Ts; + T31 = T2X + T30; + T33 = T2X - T30; + T2n = T1M - T1P; + T1Q = T1M + T1P; + ci[0] = T11 + T14; + { + E T1T, T1R, T1r, T1o, T1n; + { + E T1q, T1a, TT, T1l, Tu, T17, T1p, T15; + T1q = FMA(KP618033988, T18, T19); + T1a = FNMS(KP618033988, T19, T18); + T1T = T1J - T1Q; + T1R = T1J + T1Q; + T15 = FNMS(KP250000000, T14, T11); + TT = FNMS(KP618033988, TS, TH); + T1l = FMA(KP618033988, TH, TS); + Tu = FNMS(KP250000000, Ts, T5); + T17 = FNMS(KP559016994, T16, T15); + T1p = FMA(KP559016994, T16, T15); + { + E T1h, T1m, T1e, T1x, T1w, T1v, T1g, T1d; + { + E TW, T1b, Tt, T1u, TU, T1k, Tw; + TW = W[5]; + T1k = FMA(KP559016994, Tv, Tu); + Tw = FNMS(KP559016994, Tv, Tu); + T1b = FMA(KP951056516, T1a, T17); + T1h = FNMS(KP951056516, T1a, T17); + Tt = W[4]; + T1m = FNMS(KP951056516, T1l, T1k); + T1u = FMA(KP951056516, T1l, T1k); + T1e = FMA(KP951056516, TT, Tw); + TU = FNMS(KP951056516, TT, Tw); + { + E T1t, TV, T1c, T1y; + T1x = FNMS(KP951056516, T1q, T1p); + T1r = FMA(KP951056516, T1q, T1p); + T1w = W[17]; + T1t = W[16]; + TV = Tt * TU; + T1c = TW * TU; + T1y = T1w * T1u; + T1v = T1t * T1u; + cr[WS(rs, 3)] = FNMS(TW, T1b, TV); + ci[WS(rs, 3)] = FMA(Tt, T1b, T1c); + ci[WS(rs, 9)] = FMA(T1t, T1x, T1y); + } + } + cr[WS(rs, 9)] = FNMS(T1w, T1x, T1v); + T1g = W[23]; + T1d = W[22]; + { + E T1j, T1s, T1i, T1f; + T1o = W[11]; + T1i = T1g * T1e; + T1f = T1d * T1e; + T1j = W[10]; + T1s = T1o * T1m; + ci[WS(rs, 12)] = FMA(T1d, T1h, T1i); + cr[WS(rs, 12)] = FNMS(T1g, T1h, T1f); + T1n = T1j * T1m; + ci[WS(rs, 6)] = FMA(T1j, T1r, T1s); + } + } + } + { + E T2v, T2u, T2r, T2w, T2t; + { + E T1S, T2N, T2o, T2E, T2Q, T2P, T2k, T2S, T29, T2z, T2R, T2j, T2O, T2i; + cr[WS(rs, 6)] = FNMS(T1o, T1r, T1n); + T1S = FNMS(KP250000000, T1R, T1C); + T2O = T1C + T1R; + T2N = W[18]; + T2o = FMA(KP618033988, T2n, T2m); + T2E = FNMS(KP618033988, T2m, T2n); + T2Q = W[19]; + T2P = T2N * T2O; + T2i = T2g + T2h; + T2k = T2g - T2h; + T2S = T2Q * T2O; + T29 = FMA(KP618033988, T28, T21); + T2z = FNMS(KP618033988, T21, T28); + T2R = T2f + T2i; + T2j = FNMS(KP250000000, T2i, T2f); + { + E T2D, T2p, T2I, T2A, T2a, T2s, T2c, T1z, T2l, T1U, T2y; + cr[WS(rs, 10)] = FNMS(T2Q, T2R, T2P); + T2l = FMA(KP559016994, T2k, T2j); + T2D = FNMS(KP559016994, T2k, T2j); + T1U = FMA(KP559016994, T1T, T1S); + T2y = FNMS(KP559016994, T1T, T1S); + ci[WS(rs, 10)] = FMA(T2N, T2R, T2S); + T2p = FMA(KP951056516, T2o, T2l); + T2v = FNMS(KP951056516, T2o, T2l); + T2I = FNMS(KP951056516, T2z, T2y); + T2A = FMA(KP951056516, T2z, T2y); + T2a = FNMS(KP951056516, T29, T1U); + T2s = FMA(KP951056516, T29, T1U); + T2c = W[1]; + T1z = W[0]; + { + E T2F, T2L, T2K, T2J; + { + E T2H, T2M, T2q, T2b; + T2F = FNMS(KP951056516, T2E, T2D); + T2L = FMA(KP951056516, T2E, T2D); + T2K = W[25]; + T2q = T2c * T2a; + T2b = T1z * T2a; + T2H = W[24]; + T2M = T2K * T2I; + ci[WS(rs, 1)] = FMA(T1z, T2p, T2q); + cr[WS(rs, 1)] = FNMS(T2c, T2p, T2b); + T2J = T2H * T2I; + ci[WS(rs, 13)] = FMA(T2H, T2L, T2M); + } + { + E T2C, T2x, T2G, T2B; + T2C = W[13]; + cr[WS(rs, 13)] = FNMS(T2K, T2L, T2J); + T2x = W[12]; + T2G = T2C * T2A; + T2u = W[7]; + T2B = T2x * T2A; + T2r = W[6]; + ci[WS(rs, 7)] = FMA(T2x, T2F, T2G); + T2w = T2u * T2s; + cr[WS(rs, 7)] = FNMS(T2C, T2F, T2B); + T2t = T2r * T2s; + } + } + } + } + { + E T32, T3N, T3E, T3o, T3Q, T3P, T3k, T3S, T3z, T3b, T3j, T3R, T3O, T3i; + ci[WS(rs, 4)] = FMA(T2r, T2v, T2w); + cr[WS(rs, 4)] = FNMS(T2u, T2v, T2t); + T3O = T2U + T31; + T32 = FNMS(KP250000000, T31, T2U); + T3N = W[8]; + T3E = FMA(KP618033988, T3m, T3n); + T3o = FNMS(KP618033988, T3n, T3m); + T3Q = W[9]; + T3P = T3N * T3O; + T3k = T3g - T3h; + T3i = T3g + T3h; + T3S = T3Q * T3O; + T3z = FMA(KP618033988, T37, T3a); + T3b = FNMS(KP618033988, T3a, T37); + T3j = FNMS(KP250000000, T3i, T3f); + T3R = T3f + T3i; + { + E T3D, T3p, T3A, T3I, T3s, T3c, T3e, T2T, T3l, T3y, T34; + cr[WS(rs, 5)] = FNMS(T3Q, T3R, T3P); + T3D = FMA(KP559016994, T3k, T3j); + T3l = FNMS(KP559016994, T3k, T3j); + T3y = FMA(KP559016994, T33, T32); + T34 = FNMS(KP559016994, T33, T32); + ci[WS(rs, 5)] = FMA(T3N, T3R, T3S); + T3v = FMA(KP951056516, T3o, T3l); + T3p = FNMS(KP951056516, T3o, T3l); + T3A = FNMS(KP951056516, T3z, T3y); + T3I = FMA(KP951056516, T3z, T3y); + T3s = FNMS(KP951056516, T3b, T34); + T3c = FMA(KP951056516, T3b, T34); + T3e = W[3]; + T2T = W[2]; + { + E T3L, T3F, T3K, T3J; + { + E T3H, T3M, T3q, T3d; + T3L = FNMS(KP951056516, T3E, T3D); + T3F = FMA(KP951056516, T3E, T3D); + T3K = W[27]; + T3q = T3e * T3c; + T3d = T2T * T3c; + T3H = W[26]; + T3M = T3K * T3I; + ci[WS(rs, 2)] = FMA(T2T, T3p, T3q); + cr[WS(rs, 2)] = FNMS(T3e, T3p, T3d); + T3J = T3H * T3I; + ci[WS(rs, 14)] = FMA(T3H, T3L, T3M); + } + { + E T3C, T3x, T3G, T3B; + T3C = W[21]; + cr[WS(rs, 14)] = FNMS(T3K, T3L, T3J); + T3x = W[20]; + T3G = T3C * T3A; + T3u = W[15]; + T3B = T3x * T3A; + T3r = W[14]; + ci[WS(rs, 11)] = FMA(T3x, T3F, T3G); + T3w = T3u * T3s; + cr[WS(rs, 11)] = FNMS(T3C, T3F, T3B); + T3t = T3r * T3s; + } + } + } + } + } + } + } + } + ci[WS(rs, 8)] = FMA(T3r, T3v, T3w); + cr[WS(rs, 8)] = FNMS(T3u, T3v, T3t); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 15}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 15, "hb_15", twinstr, &GENUS, {72, 28, 112, 0} }; + +void X(codelet_hb_15) (planner *p) { + X(khc2hc_register) (p, hb_15, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 15 -dif -name hb_15 -include hb.h */ + +/* + * This function contains 184 FP additions, 112 FP multiplications, + * (or, 128 additions, 56 multiplications, 56 fused multiply/add), + * 75 stack variables, 6 constants, and 60 memory accesses + */ +#include "hb.h" + +static void hb_15(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 28); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 28, MAKE_VOLATILE_STRIDE(30, rs)) { + E T5, T10, T1J, T2C, T2c, T2M, TH, T18, T17, TS, T2Q, T2R, T2S, Tg, Tr; + E Ts, T11, T12, T13, T2N, T2O, T2P, T1u, T1x, T1y, T1W, T1Z, T28, T1P, T1S; + E T27, T1B, T1E, T1F, T2G, T2H, T2I, T2D, T2E, T2F; + { + E T1, TW, T4, T2a, TZ, T1I, T1H, T2b; + T1 = cr[0]; + TW = ci[WS(rs, 14)]; + { + E T2, T3, TX, TY; + T2 = cr[WS(rs, 5)]; + T3 = ci[WS(rs, 4)]; + T4 = T2 + T3; + T2a = KP866025403 * (T2 - T3); + TX = ci[WS(rs, 9)]; + TY = cr[WS(rs, 10)]; + TZ = TX - TY; + T1I = KP866025403 * (TX + TY); + } + T5 = T1 + T4; + T10 = TW + TZ; + T1H = FNMS(KP500000000, T4, T1); + T1J = T1H - T1I; + T2C = T1H + T1I; + T2b = FNMS(KP500000000, TZ, TW); + T2c = T2a + T2b; + T2M = T2b - T2a; + } + { + E Ta, T1N, T1s, Tl, T1U, T1z, Tf, T1Q, T1v, TG, T1R, T1w, Tq, T1X, T1C; + E TM, T1V, T1A, TB, T1O, T1t, TR, T1Y, T1D; + { + E T6, T7, T8, T9; + T6 = cr[WS(rs, 3)]; + T7 = ci[WS(rs, 6)]; + T8 = ci[WS(rs, 1)]; + T9 = T7 + T8; + Ta = T6 + T9; + T1N = KP866025403 * (T7 - T8); + T1s = FNMS(KP500000000, T9, T6); + } + { + E Th, Ti, Tj, Tk; + Th = cr[WS(rs, 6)]; + Ti = ci[WS(rs, 3)]; + Tj = cr[WS(rs, 1)]; + Tk = Ti + Tj; + Tl = Th + Tk; + T1U = KP866025403 * (Ti - Tj); + T1z = FNMS(KP500000000, Tk, Th); + } + { + E Tb, Tc, Td, Te; + Tb = ci[WS(rs, 2)]; + Tc = cr[WS(rs, 2)]; + Td = cr[WS(rs, 7)]; + Te = Tc + Td; + Tf = Tb + Te; + T1Q = KP866025403 * (Tc - Td); + T1v = FNMS(KP500000000, Te, Tb); + } + { + E TF, TC, TD, TE; + TF = cr[WS(rs, 12)]; + TC = ci[WS(rs, 12)]; + TD = ci[WS(rs, 7)]; + TE = TC + TD; + TG = TE - TF; + T1R = FMA(KP500000000, TE, TF); + T1w = KP866025403 * (TD - TC); + } + { + E Tm, Tn, To, Tp; + Tm = ci[WS(rs, 5)]; + Tn = ci[0]; + To = cr[WS(rs, 4)]; + Tp = Tn + To; + Tq = Tm + Tp; + T1X = KP866025403 * (Tn - To); + T1C = FNMS(KP500000000, Tp, Tm); + } + { + E TI, TJ, TK, TL; + TI = ci[WS(rs, 8)]; + TJ = ci[WS(rs, 13)]; + TK = cr[WS(rs, 11)]; + TL = TJ - TK; + TM = TI + TL; + T1V = FNMS(KP500000000, TL, TI); + T1A = KP866025403 * (TJ + TK); + } + { + E Tx, Ty, Tz, TA; + Tx = ci[WS(rs, 11)]; + Ty = cr[WS(rs, 8)]; + Tz = cr[WS(rs, 13)]; + TA = Ty + Tz; + TB = Tx - TA; + T1O = FMA(KP500000000, TA, Tx); + T1t = KP866025403 * (Ty - Tz); + } + { + E TQ, TN, TO, TP; + TQ = cr[WS(rs, 9)]; + TN = ci[WS(rs, 10)]; + TO = cr[WS(rs, 14)]; + TP = TN - TO; + TR = TP - TQ; + T1Y = FMA(KP500000000, TP, TQ); + T1D = KP866025403 * (TN + TO); + } + TH = TB - TG; + T18 = Tl - Tq; + T17 = Ta - Tf; + TS = TM - TR; + T2Q = T1V - T1U; + T2R = T1X + T1Y; + T2S = T2Q - T2R; + Tg = Ta + Tf; + Tr = Tl + Tq; + Ts = Tg + Tr; + T11 = TB + TG; + T12 = TM + TR; + T13 = T11 + T12; + T2N = T1O - T1N; + T2O = T1Q + T1R; + T2P = T2N - T2O; + T1u = T1s + T1t; + T1x = T1v + T1w; + T1y = T1u + T1x; + T1W = T1U + T1V; + T1Z = T1X - T1Y; + T28 = T1W + T1Z; + T1P = T1N + T1O; + T1S = T1Q - T1R; + T27 = T1P + T1S; + T1B = T1z + T1A; + T1E = T1C + T1D; + T1F = T1B + T1E; + T2G = T1z - T1A; + T2H = T1C - T1D; + T2I = T2G + T2H; + T2D = T1s - T1t; + T2E = T1v - T1w; + T2F = T2D + T2E; + } + cr[0] = T5 + Ts; + ci[0] = T10 + T13; + { + E TT, T19, T1k, T1h, T16, T1l, Tw, T1g; + TT = FNMS(KP951056516, TS, KP587785252 * TH); + T19 = FNMS(KP951056516, T18, KP587785252 * T17); + T1k = FMA(KP951056516, T17, KP587785252 * T18); + T1h = FMA(KP951056516, TH, KP587785252 * TS); + { + E T14, T15, Tu, Tv; + T14 = FNMS(KP250000000, T13, T10); + T15 = KP559016994 * (T11 - T12); + T16 = T14 - T15; + T1l = T15 + T14; + Tu = FNMS(KP250000000, Ts, T5); + Tv = KP559016994 * (Tg - Tr); + Tw = Tu - Tv; + T1g = Tv + Tu; + } + { + E TU, T1a, Tt, TV; + TU = Tw + TT; + T1a = T16 - T19; + Tt = W[4]; + TV = W[5]; + cr[WS(rs, 3)] = FNMS(TV, T1a, Tt * TU); + ci[WS(rs, 3)] = FMA(TV, TU, Tt * T1a); + } + { + E T1o, T1q, T1n, T1p; + T1o = T1g + T1h; + T1q = T1l - T1k; + T1n = W[16]; + T1p = W[17]; + cr[WS(rs, 9)] = FNMS(T1p, T1q, T1n * T1o); + ci[WS(rs, 9)] = FMA(T1p, T1o, T1n * T1q); + } + { + E T1c, T1e, T1b, T1d; + T1c = Tw - TT; + T1e = T19 + T16; + T1b = W[22]; + T1d = W[23]; + cr[WS(rs, 12)] = FNMS(T1d, T1e, T1b * T1c); + ci[WS(rs, 12)] = FMA(T1d, T1c, T1b * T1e); + } + { + E T1i, T1m, T1f, T1j; + T1i = T1g - T1h; + T1m = T1k + T1l; + T1f = W[10]; + T1j = W[11]; + cr[WS(rs, 6)] = FNMS(T1j, T1m, T1f * T1i); + ci[WS(rs, 6)] = FMA(T1j, T1i, T1f * T1m); + } + } + { + E T21, T2n, T26, T2q, T1M, T2y, T2m, T2f, T2A, T2r, T2x, T2z; + { + E T1T, T20, T24, T25; + T1T = T1P - T1S; + T20 = T1W - T1Z; + T21 = FMA(KP951056516, T1T, KP587785252 * T20); + T2n = FNMS(KP951056516, T20, KP587785252 * T1T); + T24 = T1u - T1x; + T25 = T1B - T1E; + T26 = FMA(KP951056516, T24, KP587785252 * T25); + T2q = FNMS(KP951056516, T25, KP587785252 * T24); + } + { + E T1G, T1K, T1L, T29, T2d, T2e; + T1G = KP559016994 * (T1y - T1F); + T1K = T1y + T1F; + T1L = FNMS(KP250000000, T1K, T1J); + T1M = T1G + T1L; + T2y = T1J + T1K; + T2m = T1L - T1G; + T29 = KP559016994 * (T27 - T28); + T2d = T27 + T28; + T2e = FNMS(KP250000000, T2d, T2c); + T2f = T29 + T2e; + T2A = T2c + T2d; + T2r = T2e - T29; + } + T2x = W[18]; + T2z = W[19]; + cr[WS(rs, 10)] = FNMS(T2z, T2A, T2x * T2y); + ci[WS(rs, 10)] = FMA(T2z, T2y, T2x * T2A); + { + E T2u, T2w, T2t, T2v; + T2u = T2m + T2n; + T2w = T2r - T2q; + T2t = W[24]; + T2v = W[25]; + cr[WS(rs, 13)] = FNMS(T2v, T2w, T2t * T2u); + ci[WS(rs, 13)] = FMA(T2v, T2u, T2t * T2w); + } + { + E T22, T2g, T1r, T23; + T22 = T1M - T21; + T2g = T26 + T2f; + T1r = W[0]; + T23 = W[1]; + cr[WS(rs, 1)] = FNMS(T23, T2g, T1r * T22); + ci[WS(rs, 1)] = FMA(T23, T22, T1r * T2g); + } + { + E T2i, T2k, T2h, T2j; + T2i = T1M + T21; + T2k = T2f - T26; + T2h = W[6]; + T2j = W[7]; + cr[WS(rs, 4)] = FNMS(T2j, T2k, T2h * T2i); + ci[WS(rs, 4)] = FMA(T2j, T2i, T2h * T2k); + } + { + E T2o, T2s, T2l, T2p; + T2o = T2m - T2n; + T2s = T2q + T2r; + T2l = W[12]; + T2p = W[13]; + cr[WS(rs, 7)] = FNMS(T2p, T2s, T2l * T2o); + ci[WS(rs, 7)] = FMA(T2p, T2o, T2l * T2s); + } + } + { + E T31, T3h, T36, T3k, T2K, T3g, T2Y, T2U, T3l, T39, T2B, T2L; + { + E T2Z, T30, T34, T35; + T2Z = T2N + T2O; + T30 = T2Q + T2R; + T31 = FNMS(KP951056516, T30, KP587785252 * T2Z); + T3h = FMA(KP951056516, T2Z, KP587785252 * T30); + T34 = T2D - T2E; + T35 = T2G - T2H; + T36 = FNMS(KP951056516, T35, KP587785252 * T34); + T3k = FMA(KP951056516, T34, KP587785252 * T35); + } + { + E T2X, T2J, T2W, T38, T2T, T37; + T2X = KP559016994 * (T2F - T2I); + T2J = T2F + T2I; + T2W = FNMS(KP250000000, T2J, T2C); + T2K = T2C + T2J; + T3g = T2X + T2W; + T2Y = T2W - T2X; + T38 = KP559016994 * (T2P - T2S); + T2T = T2P + T2S; + T37 = FNMS(KP250000000, T2T, T2M); + T2U = T2M + T2T; + T3l = T38 + T37; + T39 = T37 - T38; + } + T2B = W[8]; + T2L = W[9]; + cr[WS(rs, 5)] = FNMS(T2L, T2U, T2B * T2K); + ci[WS(rs, 5)] = FMA(T2L, T2K, T2B * T2U); + { + E T3o, T3q, T3n, T3p; + T3o = T3g + T3h; + T3q = T3l - T3k; + T3n = W[26]; + T3p = W[27]; + cr[WS(rs, 14)] = FNMS(T3p, T3q, T3n * T3o); + ci[WS(rs, 14)] = FMA(T3n, T3q, T3p * T3o); + } + { + E T32, T3a, T2V, T33; + T32 = T2Y - T31; + T3a = T36 + T39; + T2V = W[2]; + T33 = W[3]; + cr[WS(rs, 2)] = FNMS(T33, T3a, T2V * T32); + ci[WS(rs, 2)] = FMA(T2V, T3a, T33 * T32); + } + { + E T3c, T3e, T3b, T3d; + T3c = T2Y + T31; + T3e = T39 - T36; + T3b = W[14]; + T3d = W[15]; + cr[WS(rs, 8)] = FNMS(T3d, T3e, T3b * T3c); + ci[WS(rs, 8)] = FMA(T3b, T3e, T3d * T3c); + } + { + E T3i, T3m, T3f, T3j; + T3i = T3g - T3h; + T3m = T3k + T3l; + T3f = W[20]; + T3j = W[21]; + cr[WS(rs, 11)] = FNMS(T3j, T3m, T3f * T3i); + ci[WS(rs, 11)] = FMA(T3f, T3m, T3j * T3i); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 15}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 15, "hb_15", twinstr, &GENUS, {128, 56, 56, 0} }; + +void X(codelet_hb_15) (planner *p) { + X(khc2hc_register) (p, hb_15, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,809 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:26 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 16 -dif -name hb_16 -include hb.h */ + +/* + * This function contains 174 FP additions, 100 FP multiplications, + * (or, 104 additions, 30 multiplications, 70 fused multiply/add), + * 78 stack variables, 3 constants, and 64 memory accesses + */ +#include "hb.h" + +static void hb_16(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 30, MAKE_VOLATILE_STRIDE(32, rs)) { + E T1I, T1L, T1K, T1M, T1J; + { + E T1O, TA, T1h, T21, T3b, T2T, T3D, T3r, T1k, T1P, T3y, Tf, T36, T2A, T22; + E TL, T3z, T3u, T2U, T2F, T2K, T2V, T12, Tu, T3E, TX, T1n, T17, T1T, T24; + E T1W, T25; + { + E T2z, TF, TK, T2w; + { + E Tw, T3, T2x, TJ, T2Q, T1g, T1d, T6, TC, TB, Ta, T2R, Tz, TD, Tb; + E Tc; + { + E T1e, T1f, T4, T5; + { + E T1, T2, TH, TI; + T1 = cr[0]; + T2 = ci[WS(rs, 7)]; + TH = ci[WS(rs, 9)]; + TI = cr[WS(rs, 14)]; + T1e = ci[WS(rs, 15)]; + Tw = T1 - T2; + T3 = T1 + T2; + T2x = TH - TI; + TJ = TH + TI; + T1f = cr[WS(rs, 8)]; + T4 = cr[WS(rs, 4)]; + T5 = ci[WS(rs, 3)]; + } + { + E T8, T9, Tx, Ty; + T8 = cr[WS(rs, 2)]; + T2Q = T1e - T1f; + T1g = T1e + T1f; + T1d = T4 - T5; + T6 = T4 + T5; + T9 = ci[WS(rs, 5)]; + Tx = ci[WS(rs, 11)]; + Ty = cr[WS(rs, 12)]; + TC = ci[WS(rs, 13)]; + TB = T8 - T9; + Ta = T8 + T9; + T2R = Tx - Ty; + Tz = Tx + Ty; + TD = cr[WS(rs, 10)]; + Tb = ci[WS(rs, 1)]; + Tc = cr[WS(rs, 6)]; + } + } + { + E T2y, TE, TG, Te, T2P, T2S, T3p, Td; + T1O = Tw + Tz; + TA = Tw - Tz; + T2y = TC - TD; + TE = TC + TD; + TG = Tb - Tc; + Td = Tb + Tc; + T1h = T1d + T1g; + T21 = T1g - T1d; + Te = Ta + Td; + T2P = Ta - Td; + T2S = T2Q - T2R; + T3p = T2Q + T2R; + { + E T1i, T1j, T3q, T7; + T3q = T2y + T2x; + T2z = T2x - T2y; + TF = TB - TE; + T1i = TB + TE; + T3b = T2S - T2P; + T2T = T2P + T2S; + TK = TG - TJ; + T1j = TG + TJ; + T3D = T3p - T3q; + T3r = T3p + T3q; + T2w = T3 - T6; + T7 = T3 + T6; + T1k = T1i - T1j; + T1P = T1i + T1j; + T3y = T7 - Te; + Tf = T7 + Te; + } + } + } + { + E T13, Ti, T2C, T11, T2D, T16, TY, Tl, TT, TS, Tp, T2H, TQ, TU, Tq; + E Tr; + { + E T14, T15, Tj, Tk; + { + E Tg, Th, TZ, T10; + Tg = cr[WS(rs, 1)]; + T36 = T2w - T2z; + T2A = T2w + T2z; + T22 = TF - TK; + TL = TF + TK; + Th = ci[WS(rs, 6)]; + TZ = ci[WS(rs, 14)]; + T10 = cr[WS(rs, 9)]; + T14 = ci[WS(rs, 10)]; + T13 = Tg - Th; + Ti = Tg + Th; + T2C = TZ - T10; + T11 = TZ + T10; + T15 = cr[WS(rs, 13)]; + Tj = cr[WS(rs, 5)]; + Tk = ci[WS(rs, 2)]; + } + { + E Tn, To, TO, TP; + Tn = ci[0]; + T2D = T14 - T15; + T16 = T14 + T15; + TY = Tj - Tk; + Tl = Tj + Tk; + To = cr[WS(rs, 7)]; + TO = ci[WS(rs, 8)]; + TP = cr[WS(rs, 15)]; + TT = ci[WS(rs, 12)]; + TS = Tn - To; + Tp = Tn + To; + T2H = TO - TP; + TQ = TO + TP; + TU = cr[WS(rs, 11)]; + Tq = cr[WS(rs, 3)]; + Tr = ci[WS(rs, 4)]; + } + } + { + E TV, TN, Tm, Tt; + { + E T2E, T3s, Ts, T3t, T2J, T2B, T2I, T2G; + T2E = T2C - T2D; + T3s = T2C + T2D; + T2I = TT - TU; + TV = TT + TU; + TN = Tq - Tr; + Ts = Tq + Tr; + T3t = T2H + T2I; + T2J = T2H - T2I; + Tm = Ti + Tl; + T2B = Ti - Tl; + Tt = Tp + Ts; + T2G = Tp - Ts; + T3z = T3t - T3s; + T3u = T3s + T3t; + T2U = T2B + T2E; + T2F = T2B - T2E; + T2K = T2G + T2J; + T2V = T2J - T2G; + } + { + E T1U, T1V, T1R, T1S, TR, TW; + TR = TN - TQ; + T1U = TN + TQ; + T1V = TS + TV; + TW = TS - TV; + T1R = T11 - TY; + T12 = TY + T11; + Tu = Tm + Tt; + T3E = Tm - Tt; + TX = FNMS(KP414213562, TW, TR); + T1n = FMA(KP414213562, TR, TW); + T17 = T13 - T16; + T1S = T13 + T16; + T1T = FNMS(KP414213562, T1S, T1R); + T24 = FMA(KP414213562, T1R, T1S); + T1W = FNMS(KP414213562, T1V, T1U); + T25 = FMA(KP414213562, T1U, T1V); + } + } + } + } + { + E T18, T1m, T2W, T2L, T3j, T3i, T3h; + { + E T3m, T3v, T3l, T3o; + cr[0] = Tf + Tu; + T18 = FMA(KP414213562, T17, T12); + T1m = FNMS(KP414213562, T12, T17); + T3m = Tf - Tu; + T3v = T3r - T3u; + T3l = W[14]; + T3o = W[15]; + ci[0] = T3r + T3u; + { + E T3A, T3I, T3L, T3F, T3C, T3G, T3B, T3x, T3n, T3w, T3H, T3K; + T3A = T3y - T3z; + T3I = T3y + T3z; + T3n = T3l * T3m; + T3w = T3o * T3m; + T3L = T3E + T3D; + T3F = T3D - T3E; + T3x = W[22]; + cr[WS(rs, 8)] = FNMS(T3o, T3v, T3n); + ci[WS(rs, 8)] = FMA(T3l, T3v, T3w); + T3C = W[23]; + T3G = T3x * T3F; + T3B = T3x * T3A; + ci[WS(rs, 12)] = FMA(T3C, T3A, T3G); + cr[WS(rs, 12)] = FNMS(T3C, T3F, T3B); + T3H = W[6]; + T3K = W[7]; + { + E T3g, T38, T3d, T35, T3a; + { + E T37, T3c, T3M, T3J; + T37 = T2V - T2U; + T2W = T2U + T2V; + T2L = T2F + T2K; + T3c = T2F - T2K; + T3M = T3H * T3L; + T3J = T3H * T3I; + T3g = FMA(KP707106781, T37, T36); + T38 = FNMS(KP707106781, T37, T36); + ci[WS(rs, 4)] = FMA(T3K, T3I, T3M); + cr[WS(rs, 4)] = FNMS(T3K, T3L, T3J); + T3d = FNMS(KP707106781, T3c, T3b); + T3j = FMA(KP707106781, T3c, T3b); + } + T35 = W[26]; + T3a = W[27]; + { + E T3f, T3e, T39, T3k; + T3f = W[10]; + T3i = W[11]; + T3e = T35 * T3d; + T39 = T35 * T38; + T3k = T3f * T3j; + T3h = T3f * T3g; + ci[WS(rs, 14)] = FMA(T3a, T38, T3e); + cr[WS(rs, 14)] = FNMS(T3a, T3d, T39); + ci[WS(rs, 6)] = FMA(T3i, T3g, T3k); + } + } + } + } + cr[WS(rs, 6)] = FNMS(T3i, T3j, T3h); + { + E T2g, T2m, T2l, T2h, T2d, T29, T2c, T2b, T2e; + { + E T33, T2Z, T32, T31, T34; + { + E T2v, T30, T2M, T2X, T2O, T2N, T2Y; + T2v = W[18]; + T30 = FMA(KP707106781, T2L, T2A); + T2M = FNMS(KP707106781, T2L, T2A); + T33 = FMA(KP707106781, T2W, T2T); + T2X = FNMS(KP707106781, T2W, T2T); + T2O = W[19]; + T2N = T2v * T2M; + T2Z = W[2]; + T32 = W[3]; + T2Y = T2O * T2M; + cr[WS(rs, 10)] = FNMS(T2O, T2X, T2N); + T31 = T2Z * T30; + T34 = T32 * T30; + ci[WS(rs, 10)] = FMA(T2v, T2X, T2Y); + } + { + E T1Q, T1X, T23, T26; + T2g = FMA(KP707106781, T1P, T1O); + T1Q = FNMS(KP707106781, T1P, T1O); + cr[WS(rs, 2)] = FNMS(T32, T33, T31); + ci[WS(rs, 2)] = FMA(T2Z, T33, T34); + T1X = T1T + T1W; + T2m = T1W - T1T; + T2l = FNMS(KP707106781, T22, T21); + T23 = FMA(KP707106781, T22, T21); + T26 = T24 - T25; + T2h = T24 + T25; + { + E T1N, T2a, T1Y, T27, T20, T1Z, T28; + T1N = W[20]; + T2a = FNMS(KP923879532, T1X, T1Q); + T1Y = FMA(KP923879532, T1X, T1Q); + T2d = FMA(KP923879532, T26, T23); + T27 = FNMS(KP923879532, T26, T23); + T20 = W[21]; + T1Z = T1N * T1Y; + T29 = W[4]; + T2c = W[5]; + T28 = T20 * T1Y; + cr[WS(rs, 11)] = FNMS(T20, T27, T1Z); + T2b = T29 * T2a; + T2e = T2c * T2a; + ci[WS(rs, 11)] = FMA(T1N, T27, T28); + } + } + } + { + E T1y, T1E, T1D, T1z, T1v, T1r, T1u, T1t, T1w; + { + E TM, T19, T1l, T1o; + T1y = FMA(KP707106781, TL, TA); + TM = FNMS(KP707106781, TL, TA); + cr[WS(rs, 3)] = FNMS(T2c, T2d, T2b); + ci[WS(rs, 3)] = FMA(T29, T2d, T2e); + T19 = TX - T18; + T1E = T18 + TX; + T1D = FMA(KP707106781, T1k, T1h); + T1l = FNMS(KP707106781, T1k, T1h); + T1o = T1m - T1n; + T1z = T1m + T1n; + { + E Tv, T1s, T1a, T1p, T1c, T1b, T1q; + Tv = W[24]; + T1s = FMA(KP923879532, T19, TM); + T1a = FNMS(KP923879532, T19, TM); + T1v = FMA(KP923879532, T1o, T1l); + T1p = FNMS(KP923879532, T1o, T1l); + T1c = W[25]; + T1b = Tv * T1a; + T1r = W[8]; + T1u = W[9]; + T1q = T1c * T1a; + cr[WS(rs, 13)] = FNMS(T1c, T1p, T1b); + T1t = T1r * T1s; + T1w = T1u * T1s; + ci[WS(rs, 13)] = FMA(Tv, T1p, T1q); + } + } + { + E T2q, T2t, T2s, T2u, T2r; + cr[WS(rs, 5)] = FNMS(T1u, T1v, T1t); + ci[WS(rs, 5)] = FMA(T1r, T1v, T1w); + { + E T2f, T2i, T2n, T2k, T2j, T2p, T2o; + T2f = W[12]; + T2q = FMA(KP923879532, T2h, T2g); + T2i = FNMS(KP923879532, T2h, T2g); + T2t = FNMS(KP923879532, T2m, T2l); + T2n = FMA(KP923879532, T2m, T2l); + T2k = W[13]; + T2j = T2f * T2i; + T2p = W[28]; + T2o = T2f * T2n; + T2s = W[29]; + cr[WS(rs, 7)] = FNMS(T2k, T2n, T2j); + T2u = T2p * T2t; + T2r = T2p * T2q; + ci[WS(rs, 7)] = FMA(T2k, T2i, T2o); + } + ci[WS(rs, 15)] = FMA(T2s, T2q, T2u); + cr[WS(rs, 15)] = FNMS(T2s, T2t, T2r); + { + E T1x, T1A, T1F, T1C, T1B, T1H, T1G; + T1x = W[16]; + T1I = FMA(KP923879532, T1z, T1y); + T1A = FNMS(KP923879532, T1z, T1y); + T1L = FMA(KP923879532, T1E, T1D); + T1F = FNMS(KP923879532, T1E, T1D); + T1C = W[17]; + T1B = T1x * T1A; + T1H = W[0]; + T1G = T1x * T1F; + T1K = W[1]; + cr[WS(rs, 9)] = FNMS(T1C, T1F, T1B); + T1M = T1H * T1L; + T1J = T1H * T1I; + ci[WS(rs, 9)] = FMA(T1C, T1A, T1G); + } + } + } + } + } + } + ci[WS(rs, 1)] = FMA(T1K, T1I, T1M); + cr[WS(rs, 1)] = FNMS(T1K, T1L, T1J); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 16}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 16, "hb_16", twinstr, &GENUS, {104, 30, 70, 0} }; + +void X(codelet_hb_16) (planner *p) { + X(khc2hc_register) (p, hb_16, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 16 -dif -name hb_16 -include hb.h */ + +/* + * This function contains 174 FP additions, 84 FP multiplications, + * (or, 136 additions, 46 multiplications, 38 fused multiply/add), + * 50 stack variables, 3 constants, and 64 memory accesses + */ +#include "hb.h" + +static void hb_16(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 30, MAKE_VOLATILE_STRIDE(32, rs)) { + E T7, T2K, T2W, Tw, T17, T1S, T2k, T1w, Te, TD, T1x, T10, T2n, T2L, T1Z; + E T2X, Tm, T1z, TN, T19, T2e, T2p, T2P, T2Z, Tt, T1A, TW, T1a, T27, T2q; + E T2S, T30; + { + E T3, T1Q, T16, T1R, T6, T2i, T13, T2j; + { + E T1, T2, T14, T15; + T1 = cr[0]; + T2 = ci[WS(rs, 7)]; + T3 = T1 + T2; + T1Q = T1 - T2; + T14 = ci[WS(rs, 11)]; + T15 = cr[WS(rs, 12)]; + T16 = T14 - T15; + T1R = T14 + T15; + } + { + E T4, T5, T11, T12; + T4 = cr[WS(rs, 4)]; + T5 = ci[WS(rs, 3)]; + T6 = T4 + T5; + T2i = T4 - T5; + T11 = ci[WS(rs, 15)]; + T12 = cr[WS(rs, 8)]; + T13 = T11 - T12; + T2j = T11 + T12; + } + T7 = T3 + T6; + T2K = T1Q + T1R; + T2W = T2j - T2i; + Tw = T3 - T6; + T17 = T13 - T16; + T1S = T1Q - T1R; + T2k = T2i + T2j; + T1w = T13 + T16; + } + { + E Ta, T1T, TC, T1U, Td, T1W, Tz, T1X; + { + E T8, T9, TA, TB; + T8 = cr[WS(rs, 2)]; + T9 = ci[WS(rs, 5)]; + Ta = T8 + T9; + T1T = T8 - T9; + TA = ci[WS(rs, 13)]; + TB = cr[WS(rs, 10)]; + TC = TA - TB; + T1U = TA + TB; + } + { + E Tb, Tc, Tx, Ty; + Tb = ci[WS(rs, 1)]; + Tc = cr[WS(rs, 6)]; + Td = Tb + Tc; + T1W = Tb - Tc; + Tx = ci[WS(rs, 9)]; + Ty = cr[WS(rs, 14)]; + Tz = Tx - Ty; + T1X = Tx + Ty; + } + Te = Ta + Td; + TD = Tz - TC; + T1x = TC + Tz; + T10 = Ta - Td; + { + E T2l, T2m, T1V, T1Y; + T2l = T1T + T1U; + T2m = T1W + T1X; + T2n = KP707106781 * (T2l - T2m); + T2L = KP707106781 * (T2l + T2m); + T1V = T1T - T1U; + T1Y = T1W - T1X; + T1Z = KP707106781 * (T1V + T1Y); + T2X = KP707106781 * (T1V - T1Y); + } + } + { + E Ti, T2b, TL, T2c, Tl, T28, TI, T29, TF, TM; + { + E Tg, Th, TJ, TK; + Tg = cr[WS(rs, 1)]; + Th = ci[WS(rs, 6)]; + Ti = Tg + Th; + T2b = Tg - Th; + TJ = ci[WS(rs, 10)]; + TK = cr[WS(rs, 13)]; + TL = TJ - TK; + T2c = TJ + TK; + } + { + E Tj, Tk, TG, TH; + Tj = cr[WS(rs, 5)]; + Tk = ci[WS(rs, 2)]; + Tl = Tj + Tk; + T28 = Tj - Tk; + TG = ci[WS(rs, 14)]; + TH = cr[WS(rs, 9)]; + TI = TG - TH; + T29 = TG + TH; + } + Tm = Ti + Tl; + T1z = TI + TL; + TF = Ti - Tl; + TM = TI - TL; + TN = TF - TM; + T19 = TF + TM; + { + E T2a, T2d, T2N, T2O; + T2a = T28 + T29; + T2d = T2b - T2c; + T2e = FMA(KP923879532, T2a, KP382683432 * T2d); + T2p = FNMS(KP382683432, T2a, KP923879532 * T2d); + T2N = T2b + T2c; + T2O = T29 - T28; + T2P = FNMS(KP923879532, T2O, KP382683432 * T2N); + T2Z = FMA(KP382683432, T2O, KP923879532 * T2N); + } + } + { + E Tp, T24, TU, T25, Ts, T21, TR, T22, TO, TV; + { + E Tn, To, TS, TT; + Tn = ci[0]; + To = cr[WS(rs, 7)]; + Tp = Tn + To; + T24 = Tn - To; + TS = ci[WS(rs, 12)]; + TT = cr[WS(rs, 11)]; + TU = TS - TT; + T25 = TS + TT; + } + { + E Tq, Tr, TP, TQ; + Tq = cr[WS(rs, 3)]; + Tr = ci[WS(rs, 4)]; + Ts = Tq + Tr; + T21 = Tq - Tr; + TP = ci[WS(rs, 8)]; + TQ = cr[WS(rs, 15)]; + TR = TP - TQ; + T22 = TP + TQ; + } + Tt = Tp + Ts; + T1A = TR + TU; + TO = Tp - Ts; + TV = TR - TU; + TW = TO + TV; + T1a = TV - TO; + { + E T23, T26, T2Q, T2R; + T23 = T21 - T22; + T26 = T24 - T25; + T27 = FNMS(KP382683432, T26, KP923879532 * T23); + T2q = FMA(KP382683432, T23, KP923879532 * T26); + T2Q = T24 + T25; + T2R = T21 + T22; + T2S = FNMS(KP923879532, T2R, KP382683432 * T2Q); + T30 = FMA(KP382683432, T2R, KP923879532 * T2Q); + } + } + { + E Tf, Tu, T1u, T1y, T1B, T1C, T1t, T1v; + Tf = T7 + Te; + Tu = Tm + Tt; + T1u = Tf - Tu; + T1y = T1w + T1x; + T1B = T1z + T1A; + T1C = T1y - T1B; + cr[0] = Tf + Tu; + ci[0] = T1y + T1B; + T1t = W[14]; + T1v = W[15]; + cr[WS(rs, 8)] = FNMS(T1v, T1C, T1t * T1u); + ci[WS(rs, 8)] = FMA(T1v, T1u, T1t * T1C); + } + { + E T2U, T34, T32, T36; + { + E T2M, T2T, T2Y, T31; + T2M = T2K - T2L; + T2T = T2P + T2S; + T2U = T2M - T2T; + T34 = T2M + T2T; + T2Y = T2W + T2X; + T31 = T2Z - T30; + T32 = T2Y - T31; + T36 = T2Y + T31; + } + { + E T2J, T2V, T33, T35; + T2J = W[20]; + T2V = W[21]; + cr[WS(rs, 11)] = FNMS(T2V, T32, T2J * T2U); + ci[WS(rs, 11)] = FMA(T2V, T2U, T2J * T32); + T33 = W[4]; + T35 = W[5]; + cr[WS(rs, 3)] = FNMS(T35, T36, T33 * T34); + ci[WS(rs, 3)] = FMA(T35, T34, T33 * T36); + } + } + { + E T3a, T3g, T3e, T3i; + { + E T38, T39, T3c, T3d; + T38 = T2K + T2L; + T39 = T2Z + T30; + T3a = T38 - T39; + T3g = T38 + T39; + T3c = T2W - T2X; + T3d = T2P - T2S; + T3e = T3c + T3d; + T3i = T3c - T3d; + } + { + E T37, T3b, T3f, T3h; + T37 = W[12]; + T3b = W[13]; + cr[WS(rs, 7)] = FNMS(T3b, T3e, T37 * T3a); + ci[WS(rs, 7)] = FMA(T37, T3e, T3b * T3a); + T3f = W[28]; + T3h = W[29]; + cr[WS(rs, 15)] = FNMS(T3h, T3i, T3f * T3g); + ci[WS(rs, 15)] = FMA(T3f, T3i, T3h * T3g); + } + } + { + E TY, T1e, T1c, T1g; + { + E TE, TX, T18, T1b; + TE = Tw + TD; + TX = KP707106781 * (TN + TW); + TY = TE - TX; + T1e = TE + TX; + T18 = T10 + T17; + T1b = KP707106781 * (T19 + T1a); + T1c = T18 - T1b; + T1g = T18 + T1b; + } + { + E Tv, TZ, T1d, T1f; + Tv = W[18]; + TZ = W[19]; + cr[WS(rs, 10)] = FNMS(TZ, T1c, Tv * TY); + ci[WS(rs, 10)] = FMA(TZ, TY, Tv * T1c); + T1d = W[2]; + T1f = W[3]; + cr[WS(rs, 2)] = FNMS(T1f, T1g, T1d * T1e); + ci[WS(rs, 2)] = FMA(T1f, T1e, T1d * T1g); + } + } + { + E T1k, T1q, T1o, T1s; + { + E T1i, T1j, T1m, T1n; + T1i = Tw - TD; + T1j = KP707106781 * (T1a - T19); + T1k = T1i - T1j; + T1q = T1i + T1j; + T1m = T17 - T10; + T1n = KP707106781 * (TN - TW); + T1o = T1m - T1n; + T1s = T1m + T1n; + } + { + E T1h, T1l, T1p, T1r; + T1h = W[26]; + T1l = W[27]; + cr[WS(rs, 14)] = FNMS(T1l, T1o, T1h * T1k); + ci[WS(rs, 14)] = FMA(T1h, T1o, T1l * T1k); + T1p = W[10]; + T1r = W[11]; + cr[WS(rs, 6)] = FNMS(T1r, T1s, T1p * T1q); + ci[WS(rs, 6)] = FMA(T1p, T1s, T1r * T1q); + } + } + { + E T2g, T2u, T2s, T2w; + { + E T20, T2f, T2o, T2r; + T20 = T1S - T1Z; + T2f = T27 - T2e; + T2g = T20 - T2f; + T2u = T20 + T2f; + T2o = T2k - T2n; + T2r = T2p - T2q; + T2s = T2o - T2r; + T2w = T2o + T2r; + } + { + E T1P, T2h, T2t, T2v; + T1P = W[24]; + T2h = W[25]; + cr[WS(rs, 13)] = FNMS(T2h, T2s, T1P * T2g); + ci[WS(rs, 13)] = FMA(T2h, T2g, T1P * T2s); + T2t = W[8]; + T2v = W[9]; + cr[WS(rs, 5)] = FNMS(T2v, T2w, T2t * T2u); + ci[WS(rs, 5)] = FMA(T2v, T2u, T2t * T2w); + } + } + { + E T2A, T2G, T2E, T2I; + { + E T2y, T2z, T2C, T2D; + T2y = T1S + T1Z; + T2z = T2p + T2q; + T2A = T2y - T2z; + T2G = T2y + T2z; + T2C = T2k + T2n; + T2D = T2e + T27; + T2E = T2C - T2D; + T2I = T2C + T2D; + } + { + E T2x, T2B, T2F, T2H; + T2x = W[16]; + T2B = W[17]; + cr[WS(rs, 9)] = FNMS(T2B, T2E, T2x * T2A); + ci[WS(rs, 9)] = FMA(T2x, T2E, T2B * T2A); + T2F = W[0]; + T2H = W[1]; + cr[WS(rs, 1)] = FNMS(T2H, T2I, T2F * T2G); + ci[WS(rs, 1)] = FMA(T2F, T2I, T2H * T2G); + } + } + { + E T1G, T1M, T1K, T1O; + { + E T1E, T1F, T1I, T1J; + T1E = T7 - Te; + T1F = T1A - T1z; + T1G = T1E - T1F; + T1M = T1E + T1F; + T1I = T1w - T1x; + T1J = Tm - Tt; + T1K = T1I - T1J; + T1O = T1J + T1I; + } + { + E T1D, T1H, T1L, T1N; + T1D = W[22]; + T1H = W[23]; + cr[WS(rs, 12)] = FNMS(T1H, T1K, T1D * T1G); + ci[WS(rs, 12)] = FMA(T1D, T1K, T1H * T1G); + T1L = W[6]; + T1N = W[7]; + cr[WS(rs, 4)] = FNMS(T1N, T1O, T1L * T1M); + ci[WS(rs, 4)] = FMA(T1L, T1O, T1N * T1M); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 16}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 16, "hb_16", twinstr, &GENUS, {136, 46, 38, 0} }; + +void X(codelet_hb_16) (planner *p) { + X(khc2hc_register) (p, hb_16, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:25 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 2 -dif -name hb_2 -include hb.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 4 additions, 2 multiplications, 2 fused multiply/add), + * 11 stack variables, 0 constants, and 8 memory accesses + */ +#include "hb.h" + +static void hb_2(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 2); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 2, MAKE_VOLATILE_STRIDE(4, rs)) { + E T5, T6, T9, T8, T7, Ta; + { + E T1, T2, T3, T4; + T1 = cr[0]; + T2 = ci[0]; + T3 = ci[WS(rs, 1)]; + T4 = cr[WS(rs, 1)]; + T5 = W[0]; + cr[0] = T1 + T2; + T6 = T1 - T2; + ci[0] = T3 - T4; + T9 = T3 + T4; + T8 = W[1]; + T7 = T5 * T6; + } + Ta = T8 * T6; + cr[WS(rs, 1)] = FNMS(T8, T9, T7); + ci[WS(rs, 1)] = FMA(T5, T9, Ta); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 2}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 2, "hb_2", twinstr, &GENUS, {4, 2, 2, 0} }; + +void X(codelet_hb_2) (planner *p) { + X(khc2hc_register) (p, hb_2, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 2 -dif -name hb_2 -include hb.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 4 additions, 2 multiplications, 2 fused multiply/add), + * 9 stack variables, 0 constants, and 8 memory accesses + */ +#include "hb.h" + +static void hb_2(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 2); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 2, MAKE_VOLATILE_STRIDE(4, rs)) { + E T1, T2, T6, T3, T4, T8, T5, T7; + T1 = cr[0]; + T2 = ci[0]; + T6 = T1 - T2; + T3 = ci[WS(rs, 1)]; + T4 = cr[WS(rs, 1)]; + T8 = T3 + T4; + cr[0] = T1 + T2; + ci[0] = T3 - T4; + T5 = W[0]; + T7 = W[1]; + cr[WS(rs, 1)] = FNMS(T7, T8, T5 * T6); + ci[WS(rs, 1)] = FMA(T7, T6, T5 * T8); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 2}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 2, "hb_2", twinstr, &GENUS, {4, 2, 2, 0} }; + +void X(codelet_hb_2) (planner *p) { + X(khc2hc_register) (p, hb_2, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1049 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:28 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 20 -dif -name hb_20 -include hb.h */ + +/* + * This function contains 246 FP additions, 148 FP multiplications, + * (or, 136 additions, 38 multiplications, 110 fused multiply/add), + * 101 stack variables, 4 constants, and 80 memory accesses + */ +#include "hb.h" + +static void hb_20(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 38, MAKE_VOLATILE_STRIDE(40, rs)) { + E T1T, T1Q, T1P; + { + E T2W, T4e, T7, TE, T3z, T4z, T1t, T2l, T3a, T3G, T13, T33, T3H, T1i, T2g; + E T4H, T4G, T2d, T1B, T4u, T4B, T4A, T4r, T1A, T2s, T3l, T2t, T3s, T2o, T2q; + E T1w, T1y, TC, T29, T3E, T3C, T4n, T4l, TN, TL; + { + E T4, T2U, T3, T2V, T1s, T5, T1n, T1o; + { + E T1, T2, T1q, T1r; + T1 = cr[0]; + T2 = ci[WS(rs, 9)]; + T1q = ci[WS(rs, 14)]; + T1r = cr[WS(rs, 15)]; + T4 = cr[WS(rs, 5)]; + T2U = T1 - T2; + T3 = T1 + T2; + T2V = T1q + T1r; + T1s = T1q - T1r; + T5 = ci[WS(rs, 4)]; + T1n = ci[WS(rs, 19)]; + T1o = cr[WS(rs, 10)]; + } + { + E T3y, T6, T3x, T1p; + T2W = T2U + T2V; + T4e = T2U - T2V; + T3y = T4 - T5; + T6 = T4 + T5; + T3x = T1n + T1o; + T1p = T1n - T1o; + T7 = T3 + T6; + TE = T3 - T6; + T3z = T3x - T3y; + T4z = T3y + T3x; + T1t = T1p - T1s; + T2l = T1p + T1s; + } + } + { + E T2Z, T4f, Te, TF, T3o, T4p, T1a, T2b, TJ, TA, T4t, T3k, T4j, T39, T2f; + E T12, T32, T4g, Tl, TG, T3r, T4q, T1h, T2c, T36, T4i, Tt, TI, T3h, T4s; + E TV, T2e; + { + E Tb, T2X, Ta, T2Y, T19, Tc, T14, T15; + { + E T8, T9, T17, T18; + T8 = cr[WS(rs, 4)]; + T9 = ci[WS(rs, 5)]; + T17 = ci[WS(rs, 10)]; + T18 = cr[WS(rs, 19)]; + Tb = cr[WS(rs, 9)]; + T2X = T8 - T9; + Ta = T8 + T9; + T2Y = T17 + T18; + T19 = T17 - T18; + Tc = ci[0]; + T14 = ci[WS(rs, 15)]; + T15 = cr[WS(rs, 14)]; + } + { + E T3n, Td, T3m, T16; + T2Z = T2X + T2Y; + T4f = T2X - T2Y; + T3n = Tb - Tc; + Td = Tb + Tc; + T3m = T14 + T15; + T16 = T14 - T15; + Te = Ta + Td; + TF = Ta - Td; + T3o = T3m - T3n; + T4p = T3n + T3m; + T1a = T16 - T19; + T2b = T16 + T19; + } + } + { + E TW, T37, Tw, T3i, Tz, TX, TZ, T10; + { + E Tu, Tv, Tx, Ty; + Tu = ci[WS(rs, 7)]; + Tv = cr[WS(rs, 2)]; + Tx = ci[WS(rs, 2)]; + Ty = cr[WS(rs, 7)]; + TW = ci[WS(rs, 17)]; + T37 = Tu - Tv; + Tw = Tu + Tv; + T3i = Tx - Ty; + Tz = Tx + Ty; + TX = cr[WS(rs, 12)]; + TZ = ci[WS(rs, 12)]; + T10 = cr[WS(rs, 17)]; + } + { + E TY, T38, T11, T3j; + TJ = Tw - Tz; + TA = Tw + Tz; + T3j = TW + TX; + TY = TW - TX; + T38 = TZ + T10; + T11 = TZ - T10; + T4t = T3i - T3j; + T3k = T3i + T3j; + T4j = T37 + T38; + T39 = T37 - T38; + T2f = TY + T11; + T12 = TY - T11; + } + } + { + E Ti, T30, Th, T31, T1g, Tj, T1b, T1c; + { + E Tf, Tg, T1e, T1f; + Tf = ci[WS(rs, 3)]; + Tg = cr[WS(rs, 6)]; + T1e = ci[WS(rs, 18)]; + T1f = cr[WS(rs, 11)]; + Ti = cr[WS(rs, 1)]; + T30 = Tf - Tg; + Th = Tf + Tg; + T31 = T1e + T1f; + T1g = T1e - T1f; + Tj = ci[WS(rs, 8)]; + T1b = ci[WS(rs, 13)]; + T1c = cr[WS(rs, 16)]; + } + { + E T3p, Tk, T3q, T1d; + T32 = T30 + T31; + T4g = T30 - T31; + T3p = Ti - Tj; + Tk = Ti + Tj; + T3q = T1b + T1c; + T1d = T1b - T1c; + Tl = Th + Tk; + TG = Th - Tk; + T3r = T3p + T3q; + T4q = T3p - T3q; + T1h = T1d - T1g; + T2c = T1d + T1g; + } + } + { + E Tq, T34, Tp, T35, TU, Tr, TP, TQ; + { + E Tn, To, TS, TT; + Tn = cr[WS(rs, 8)]; + To = ci[WS(rs, 1)]; + TS = ci[WS(rs, 16)]; + TT = cr[WS(rs, 13)]; + Tq = ci[WS(rs, 6)]; + T34 = Tn - To; + Tp = Tn + To; + T35 = TS + TT; + TU = TS - TT; + Tr = cr[WS(rs, 3)]; + TP = ci[WS(rs, 11)]; + TQ = cr[WS(rs, 18)]; + } + { + E T3g, Ts, T3f, TR; + T36 = T34 - T35; + T4i = T34 + T35; + T3g = Tq - Tr; + Ts = Tq + Tr; + T3f = TP + TQ; + TR = TP - TQ; + Tt = Tp + Ts; + TI = Tp - Ts; + T3h = T3f - T3g; + T4s = T3g + T3f; + TV = TR - TU; + T2e = TR + TU; + } + } + { + E T1v, T1u, T2n, T4k, T4h, T2m, TH, TK; + T3a = T36 + T39; + T3G = T36 - T39; + T13 = TV - T12; + T1v = TV + T12; + T33 = T2Z + T32; + T3H = T2Z - T32; + T1i = T1a - T1h; + T1u = T1a + T1h; + T2n = T2e + T2f; + T2g = T2e - T2f; + T4H = T4i - T4j; + T4k = T4i + T4j; + T4h = T4f + T4g; + T4G = T4f - T4g; + T2d = T2b - T2c; + T2m = T2b + T2c; + TH = TF + TG; + T1B = TF - TG; + T4u = T4s - T4t; + T4B = T4s + T4t; + T4A = T4p + T4q; + T4r = T4p - T4q; + T1A = TI - TJ; + TK = TI + TJ; + { + E Tm, T3B, TB, T3A; + Tm = Te + Tl; + T2s = Te - Tl; + T3l = T3h + T3k; + T3B = T3h - T3k; + TB = Tt + TA; + T2t = Tt - TA; + T3s = T3o + T3r; + T3A = T3o - T3r; + T2o = T2m + T2n; + T2q = T2m - T2n; + T1w = T1u + T1v; + T1y = T1u - T1v; + TC = Tm + TB; + T29 = Tm - TB; + T3E = T3A - T3B; + T3C = T3A + T3B; + T4n = T4h - T4k; + T4l = T4h + T4k; + TN = TH - TK; + TL = TH + TK; + } + } + } + { + E T3d, T3b, T4E, T1x, TM, T4m, T58, T5b, T4D, T5a, T5c, T59, T4C; + cr[0] = T7 + TC; + T3d = T33 - T3a; + T3b = T33 + T3a; + T4E = T4A - T4B; + T4C = T4A + T4B; + ci[0] = T2l + T2o; + { + E T25, T22, T21, T24, T23, T26, T57; + T1x = FNMS(KP250000000, T1w, T1t); + T25 = T1t + T1w; + T22 = TE + TL; + TM = FNMS(KP250000000, TL, TE); + T21 = W[18]; + T24 = W[19]; + T4m = FNMS(KP250000000, T4l, T4e); + T58 = T4e + T4l; + T5b = T4z + T4C; + T4D = FNMS(KP250000000, T4C, T4z); + T23 = T21 * T22; + T26 = T24 * T22; + T57 = W[8]; + T5a = W[9]; + cr[WS(rs, 10)] = FNMS(T24, T25, T23); + ci[WS(rs, 10)] = FMA(T21, T25, T26); + T5c = T57 * T5b; + T59 = T57 * T58; + } + { + E T3U, T3Z, T3W, T40, T3V; + { + E T3c, T48, T4b, T3D, T47, T4a; + T3c = FNMS(KP250000000, T3b, T2W); + T48 = T2W + T3b; + T4b = T3z + T3C; + T3D = FNMS(KP250000000, T3C, T3z); + ci[WS(rs, 5)] = FMA(T5a, T58, T5c); + cr[WS(rs, 5)] = FNMS(T5a, T5b, T59); + T47 = W[28]; + T4a = W[29]; + { + E T3I, T3Y, T42, T3u, T3M, T3X, T3F; + { + E T3T, T3t, T4c, T49, T3e, T3S; + T3T = FMA(KP618033988, T3l, T3s); + T3t = FNMS(KP618033988, T3s, T3l); + T4c = T47 * T4b; + T49 = T47 * T48; + T3I = FNMS(KP618033988, T3H, T3G); + T3Y = FMA(KP618033988, T3G, T3H); + ci[WS(rs, 15)] = FMA(T4a, T48, T4c); + cr[WS(rs, 15)] = FNMS(T4a, T4b, T49); + T3e = FNMS(KP559016994, T3d, T3c); + T3S = FMA(KP559016994, T3d, T3c); + T42 = FMA(KP951056516, T3T, T3S); + T3U = FNMS(KP951056516, T3T, T3S); + T3u = FNMS(KP951056516, T3t, T3e); + T3M = FMA(KP951056516, T3t, T3e); + T3X = FMA(KP559016994, T3E, T3D); + T3F = FNMS(KP559016994, T3E, T3D); + } + { + E T3P, T45, T44, T46, T43; + { + E T3w, T3J, T3v, T3K, T2T, T41; + T2T = W[4]; + T3w = W[5]; + T3J = FMA(KP951056516, T3I, T3F); + T3P = FNMS(KP951056516, T3I, T3F); + T45 = FNMS(KP951056516, T3Y, T3X); + T3Z = FMA(KP951056516, T3Y, T3X); + T3v = T2T * T3u; + T3K = T2T * T3J; + T41 = W[36]; + T44 = W[37]; + cr[WS(rs, 3)] = FNMS(T3w, T3J, T3v); + ci[WS(rs, 3)] = FMA(T3w, T3u, T3K); + T46 = T41 * T45; + T43 = T41 * T42; + } + { + E T3O, T3Q, T3N, T3L, T3R; + T3L = W[12]; + T3O = W[13]; + ci[WS(rs, 19)] = FMA(T44, T42, T46); + cr[WS(rs, 19)] = FNMS(T44, T45, T43); + T3Q = T3L * T3P; + T3N = T3L * T3M; + T3R = W[20]; + T3W = W[21]; + ci[WS(rs, 7)] = FMA(T3O, T3M, T3Q); + cr[WS(rs, 7)] = FNMS(T3O, T3P, T3N); + T40 = T3R * T3Z; + T3V = T3R * T3U; + } + } + } + } + { + E T4U, T4Z, T4W, T50, T4V, T2L, T2I, T2H; + { + E T4T, T4v, T4I, T4Y, T4o, T4S; + T4T = FNMS(KP618033988, T4r, T4u); + T4v = FMA(KP618033988, T4u, T4r); + ci[WS(rs, 11)] = FMA(T3W, T3U, T40); + cr[WS(rs, 11)] = FNMS(T3W, T3Z, T3V); + T4I = FMA(KP618033988, T4H, T4G); + T4Y = FNMS(KP618033988, T4G, T4H); + T4o = FMA(KP559016994, T4n, T4m); + T4S = FNMS(KP559016994, T4n, T4m); + { + E T52, T4M, T55, T4P, T54, T56, T53; + { + E T4d, T4w, T4J, T4x, T4y, T4X, T4F, T51, T4K; + T4d = W[0]; + T4X = FNMS(KP559016994, T4E, T4D); + T4F = FMA(KP559016994, T4E, T4D); + T4U = FNMS(KP951056516, T4T, T4S); + T52 = FMA(KP951056516, T4T, T4S); + T4M = FMA(KP951056516, T4v, T4o); + T4w = FNMS(KP951056516, T4v, T4o); + T4Z = FMA(KP951056516, T4Y, T4X); + T55 = FNMS(KP951056516, T4Y, T4X); + T4P = FNMS(KP951056516, T4I, T4F); + T4J = FMA(KP951056516, T4I, T4F); + T4x = T4d * T4w; + T4y = W[1]; + T51 = W[32]; + T4K = T4d * T4J; + T54 = W[33]; + cr[WS(rs, 1)] = FNMS(T4y, T4J, T4x); + T56 = T51 * T55; + T53 = T51 * T52; + ci[WS(rs, 1)] = FMA(T4y, T4w, T4K); + } + { + E T4O, T4Q, T4N, T4L, T4R; + T4L = W[16]; + ci[WS(rs, 17)] = FMA(T54, T52, T56); + cr[WS(rs, 17)] = FNMS(T54, T55, T53); + T4O = W[17]; + T4Q = T4L * T4P; + T4N = T4L * T4M; + T4R = W[24]; + T4W = W[25]; + ci[WS(rs, 9)] = FMA(T4O, T4M, T4Q); + cr[WS(rs, 9)] = FNMS(T4O, T4P, T4N); + T50 = T4R * T4Z; + T4V = T4R * T4U; + } + } + } + { + E T2K, T2u, T2F, T2h, T28, T2J, T2r, T2p; + T2K = FNMS(KP618033988, T2s, T2t); + T2u = FMA(KP618033988, T2t, T2s); + ci[WS(rs, 13)] = FMA(T4W, T4U, T50); + cr[WS(rs, 13)] = FNMS(T4W, T4Z, T4V); + T2p = FNMS(KP250000000, T2o, T2l); + T2F = FNMS(KP618033988, T2d, T2g); + T2h = FMA(KP618033988, T2g, T2d); + T28 = FNMS(KP250000000, TC, T7); + T2J = FNMS(KP559016994, T2q, T2p); + T2r = FMA(KP559016994, T2q, T2p); + { + E T2B, T2G, T2y, T2R, T2Q, T2P, T2A, T2x; + { + E T2k, T2v, T27, T2O, T2i, T2a, T2E; + T2k = W[7]; + T2a = FMA(KP559016994, T29, T28); + T2E = FNMS(KP559016994, T29, T28); + T2B = FMA(KP951056516, T2u, T2r); + T2v = FNMS(KP951056516, T2u, T2r); + T27 = W[6]; + T2O = FMA(KP951056516, T2F, T2E); + T2G = FNMS(KP951056516, T2F, T2E); + T2i = FMA(KP951056516, T2h, T2a); + T2y = FNMS(KP951056516, T2h, T2a); + { + E T2N, T2j, T2w, T2S; + T2L = FMA(KP951056516, T2K, T2J); + T2R = FNMS(KP951056516, T2K, T2J); + T2Q = W[23]; + T2N = W[22]; + T2j = T27 * T2i; + T2w = T2k * T2i; + T2S = T2Q * T2O; + T2P = T2N * T2O; + cr[WS(rs, 4)] = FNMS(T2k, T2v, T2j); + ci[WS(rs, 4)] = FMA(T27, T2v, T2w); + ci[WS(rs, 12)] = FMA(T2N, T2R, T2S); + } + } + cr[WS(rs, 12)] = FNMS(T2Q, T2R, T2P); + T2A = W[31]; + T2x = W[30]; + { + E T2D, T2M, T2C, T2z; + T2I = W[15]; + T2C = T2A * T2y; + T2z = T2x * T2y; + T2D = W[14]; + T2M = T2I * T2G; + ci[WS(rs, 16)] = FMA(T2x, T2B, T2C); + cr[WS(rs, 16)] = FNMS(T2A, T2B, T2z); + T2H = T2D * T2G; + ci[WS(rs, 8)] = FMA(T2D, T2L, T2M); + } + } + } + { + E T1S, T1C, T1j, T1N, T1z, T1R; + T1S = FMA(KP618033988, T1A, T1B); + T1C = FNMS(KP618033988, T1B, T1A); + cr[WS(rs, 8)] = FNMS(T2I, T2L, T2H); + T1j = FNMS(KP618033988, T1i, T13); + T1N = FMA(KP618033988, T13, T1i); + T1z = FNMS(KP559016994, T1y, T1x); + T1R = FMA(KP559016994, T1y, T1x); + { + E T1J, T1O, T1G, T1Z, T1Y, T1X, T1I, T1F; + { + E T1m, T1D, TD, T1W, T1k, T1M, TO; + T1m = W[3]; + T1M = FMA(KP559016994, TN, TM); + TO = FNMS(KP559016994, TN, TM); + T1D = FNMS(KP951056516, T1C, T1z); + T1J = FMA(KP951056516, T1C, T1z); + TD = W[2]; + T1O = FNMS(KP951056516, T1N, T1M); + T1W = FMA(KP951056516, T1N, T1M); + T1G = FNMS(KP951056516, T1j, TO); + T1k = FMA(KP951056516, T1j, TO); + { + E T1V, T1l, T1E, T20; + T1Z = FNMS(KP951056516, T1S, T1R); + T1T = FMA(KP951056516, T1S, T1R); + T1Y = W[27]; + T1V = W[26]; + T1l = TD * T1k; + T1E = T1m * T1k; + T20 = T1Y * T1W; + T1X = T1V * T1W; + cr[WS(rs, 2)] = FNMS(T1m, T1D, T1l); + ci[WS(rs, 2)] = FMA(TD, T1D, T1E); + ci[WS(rs, 14)] = FMA(T1V, T1Z, T20); + } + } + cr[WS(rs, 14)] = FNMS(T1Y, T1Z, T1X); + T1I = W[35]; + T1F = W[34]; + { + E T1L, T1U, T1K, T1H; + T1Q = W[11]; + T1K = T1I * T1G; + T1H = T1F * T1G; + T1L = W[10]; + T1U = T1Q * T1O; + ci[WS(rs, 18)] = FMA(T1F, T1J, T1K); + cr[WS(rs, 18)] = FNMS(T1I, T1J, T1H); + T1P = T1L * T1O; + ci[WS(rs, 6)] = FMA(T1L, T1T, T1U); + } + } + } + } + } + } + } + cr[WS(rs, 6)] = FNMS(T1Q, T1T, T1P); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 20}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 20, "hb_20", twinstr, &GENUS, {136, 38, 110, 0} }; + +void X(codelet_hb_20) (planner *p) { + X(khc2hc_register) (p, hb_20, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 20 -dif -name hb_20 -include hb.h */ + +/* + * This function contains 246 FP additions, 124 FP multiplications, + * (or, 184 additions, 62 multiplications, 62 fused multiply/add), + * 97 stack variables, 4 constants, and 80 memory accesses + */ +#include "hb.h" + +static void hb_20(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 38, MAKE_VOLATILE_STRIDE(40, rs)) { + E T7, T3T, T49, TE, T1v, T2T, T3g, T2d, T13, T3n, T3o, T1i, T26, T4e, T4d; + E T23, T1n, T42, T3Z, T1m, T2h, T2I, T2i, T2P, T30, T37, T38, Tm, TB, TC; + E T46, T47, T4a, T2a, T2b, T2e, T1w, T1x, T1y, T3O, T3R, T3U, T3h, T3i, T3j; + E TH, TK, TL; + { + E T3, T2R, T1u, T2S, T6, T3f, T1r, T3e; + { + E T1, T2, T1s, T1t; + T1 = cr[0]; + T2 = ci[WS(rs, 9)]; + T3 = T1 + T2; + T2R = T1 - T2; + T1s = ci[WS(rs, 14)]; + T1t = cr[WS(rs, 15)]; + T1u = T1s - T1t; + T2S = T1s + T1t; + } + { + E T4, T5, T1p, T1q; + T4 = cr[WS(rs, 5)]; + T5 = ci[WS(rs, 4)]; + T6 = T4 + T5; + T3f = T4 - T5; + T1p = ci[WS(rs, 19)]; + T1q = cr[WS(rs, 10)]; + T1r = T1p - T1q; + T3e = T1p + T1q; + } + T7 = T3 + T6; + T3T = T2R - T2S; + T49 = T3f + T3e; + TE = T3 - T6; + T1v = T1r - T1u; + T2T = T2R + T2S; + T3g = T3e - T3f; + T2d = T1r + T1u; + } + { + E Te, T3M, T3X, TF, TV, T2E, T2W, T21, TA, T3Q, T41, TJ, T1h, T2O, T36; + E T25, Tl, T3N, T3Y, TG, T12, T2H, T2Z, T22, Tt, T3P, T40, TI, T1a, T2L; + E T33, T24; + { + E Ta, T2U, TU, T2V, Td, T2D, TR, T2C; + { + E T8, T9, TS, TT; + T8 = cr[WS(rs, 4)]; + T9 = ci[WS(rs, 5)]; + Ta = T8 + T9; + T2U = T8 - T9; + TS = ci[WS(rs, 10)]; + TT = cr[WS(rs, 19)]; + TU = TS - TT; + T2V = TS + TT; + } + { + E Tb, Tc, TP, TQ; + Tb = cr[WS(rs, 9)]; + Tc = ci[0]; + Td = Tb + Tc; + T2D = Tb - Tc; + TP = ci[WS(rs, 15)]; + TQ = cr[WS(rs, 14)]; + TR = TP - TQ; + T2C = TP + TQ; + } + Te = Ta + Td; + T3M = T2U - T2V; + T3X = T2D + T2C; + TF = Ta - Td; + TV = TR - TU; + T2E = T2C - T2D; + T2W = T2U + T2V; + T21 = TR + TU; + } + { + E Tw, T34, Tz, T2M, T1d, T2N, T1g, T35; + { + E Tu, Tv, Tx, Ty; + Tu = ci[WS(rs, 7)]; + Tv = cr[WS(rs, 2)]; + Tw = Tu + Tv; + T34 = Tu - Tv; + Tx = ci[WS(rs, 2)]; + Ty = cr[WS(rs, 7)]; + Tz = Tx + Ty; + T2M = Tx - Ty; + } + { + E T1b, T1c, T1e, T1f; + T1b = ci[WS(rs, 17)]; + T1c = cr[WS(rs, 12)]; + T1d = T1b - T1c; + T2N = T1b + T1c; + T1e = ci[WS(rs, 12)]; + T1f = cr[WS(rs, 17)]; + T1g = T1e - T1f; + T35 = T1e + T1f; + } + TA = Tw + Tz; + T3Q = T34 + T35; + T41 = T2M - T2N; + TJ = Tw - Tz; + T1h = T1d - T1g; + T2O = T2M + T2N; + T36 = T34 - T35; + T25 = T1d + T1g; + } + { + E Th, T2X, T11, T2Y, Tk, T2F, TY, T2G; + { + E Tf, Tg, TZ, T10; + Tf = ci[WS(rs, 3)]; + Tg = cr[WS(rs, 6)]; + Th = Tf + Tg; + T2X = Tf - Tg; + TZ = ci[WS(rs, 18)]; + T10 = cr[WS(rs, 11)]; + T11 = TZ - T10; + T2Y = TZ + T10; + } + { + E Ti, Tj, TW, TX; + Ti = cr[WS(rs, 1)]; + Tj = ci[WS(rs, 8)]; + Tk = Ti + Tj; + T2F = Ti - Tj; + TW = ci[WS(rs, 13)]; + TX = cr[WS(rs, 16)]; + TY = TW - TX; + T2G = TW + TX; + } + Tl = Th + Tk; + T3N = T2X - T2Y; + T3Y = T2F - T2G; + TG = Th - Tk; + T12 = TY - T11; + T2H = T2F + T2G; + T2Z = T2X + T2Y; + T22 = TY + T11; + } + { + E Tp, T31, T19, T32, Ts, T2K, T16, T2J; + { + E Tn, To, T17, T18; + Tn = cr[WS(rs, 8)]; + To = ci[WS(rs, 1)]; + Tp = Tn + To; + T31 = Tn - To; + T17 = ci[WS(rs, 16)]; + T18 = cr[WS(rs, 13)]; + T19 = T17 - T18; + T32 = T17 + T18; + } + { + E Tq, Tr, T14, T15; + Tq = ci[WS(rs, 6)]; + Tr = cr[WS(rs, 3)]; + Ts = Tq + Tr; + T2K = Tq - Tr; + T14 = ci[WS(rs, 11)]; + T15 = cr[WS(rs, 18)]; + T16 = T14 - T15; + T2J = T14 + T15; + } + Tt = Tp + Ts; + T3P = T31 + T32; + T40 = T2K + T2J; + TI = Tp - Ts; + T1a = T16 - T19; + T2L = T2J - T2K; + T33 = T31 - T32; + T24 = T16 + T19; + } + T13 = TV - T12; + T3n = T2W - T2Z; + T3o = T33 - T36; + T1i = T1a - T1h; + T26 = T24 - T25; + T4e = T3P - T3Q; + T4d = T3M - T3N; + T23 = T21 - T22; + T1n = TI - TJ; + T42 = T40 - T41; + T3Z = T3X - T3Y; + T1m = TF - TG; + T2h = Te - Tl; + T2I = T2E + T2H; + T2i = Tt - TA; + T2P = T2L + T2O; + T30 = T2W + T2Z; + T37 = T33 + T36; + T38 = T30 + T37; + Tm = Te + Tl; + TB = Tt + TA; + TC = Tm + TB; + T46 = T3X + T3Y; + T47 = T40 + T41; + T4a = T46 + T47; + T2a = T21 + T22; + T2b = T24 + T25; + T2e = T2a + T2b; + T1w = TV + T12; + T1x = T1a + T1h; + T1y = T1w + T1x; + T3O = T3M + T3N; + T3R = T3P + T3Q; + T3U = T3O + T3R; + T3h = T2E - T2H; + T3i = T2L - T2O; + T3j = T3h + T3i; + TH = TF + TG; + TK = TI + TJ; + TL = TH + TK; + } + cr[0] = T7 + TC; + ci[0] = T2d + T2e; + { + E T1U, T1W, T1T, T1V; + T1U = TE + TL; + T1W = T1v + T1y; + T1T = W[18]; + T1V = W[19]; + cr[WS(rs, 10)] = FNMS(T1V, T1W, T1T * T1U); + ci[WS(rs, 10)] = FMA(T1V, T1U, T1T * T1W); + } + { + E T4y, T4A, T4x, T4z; + T4y = T3T + T3U; + T4A = T49 + T4a; + T4x = W[8]; + T4z = W[9]; + cr[WS(rs, 5)] = FNMS(T4z, T4A, T4x * T4y); + ci[WS(rs, 5)] = FMA(T4x, T4A, T4z * T4y); + } + { + E T3I, T3K, T3H, T3J; + T3I = T2T + T38; + T3K = T3g + T3j; + T3H = W[28]; + T3J = W[29]; + cr[WS(rs, 15)] = FNMS(T3J, T3K, T3H * T3I); + ci[WS(rs, 15)] = FMA(T3H, T3K, T3J * T3I); + } + { + E T27, T2j, T2v, T2r, T2g, T2u, T20, T2q; + T27 = FMA(KP951056516, T23, KP587785252 * T26); + T2j = FMA(KP951056516, T2h, KP587785252 * T2i); + T2v = FNMS(KP951056516, T2i, KP587785252 * T2h); + T2r = FNMS(KP951056516, T26, KP587785252 * T23); + { + E T2c, T2f, T1Y, T1Z; + T2c = KP559016994 * (T2a - T2b); + T2f = FNMS(KP250000000, T2e, T2d); + T2g = T2c + T2f; + T2u = T2f - T2c; + T1Y = KP559016994 * (Tm - TB); + T1Z = FNMS(KP250000000, TC, T7); + T20 = T1Y + T1Z; + T2q = T1Z - T1Y; + } + { + E T28, T2k, T1X, T29; + T28 = T20 + T27; + T2k = T2g - T2j; + T1X = W[6]; + T29 = W[7]; + cr[WS(rs, 4)] = FNMS(T29, T2k, T1X * T28); + ci[WS(rs, 4)] = FMA(T29, T28, T1X * T2k); + } + { + E T2y, T2A, T2x, T2z; + T2y = T2q - T2r; + T2A = T2v + T2u; + T2x = W[22]; + T2z = W[23]; + cr[WS(rs, 12)] = FNMS(T2z, T2A, T2x * T2y); + ci[WS(rs, 12)] = FMA(T2z, T2y, T2x * T2A); + } + { + E T2m, T2o, T2l, T2n; + T2m = T20 - T27; + T2o = T2j + T2g; + T2l = W[30]; + T2n = W[31]; + cr[WS(rs, 16)] = FNMS(T2n, T2o, T2l * T2m); + ci[WS(rs, 16)] = FMA(T2n, T2m, T2l * T2o); + } + { + E T2s, T2w, T2p, T2t; + T2s = T2q + T2r; + T2w = T2u - T2v; + T2p = W[14]; + T2t = W[15]; + cr[WS(rs, 8)] = FNMS(T2t, T2w, T2p * T2s); + ci[WS(rs, 8)] = FMA(T2t, T2s, T2p * T2w); + } + } + { + E T43, T4f, T4r, T4m, T4c, T4q, T3W, T4n; + T43 = FMA(KP951056516, T3Z, KP587785252 * T42); + T4f = FMA(KP951056516, T4d, KP587785252 * T4e); + T4r = FNMS(KP951056516, T4e, KP587785252 * T4d); + T4m = FNMS(KP951056516, T42, KP587785252 * T3Z); + { + E T48, T4b, T3S, T3V; + T48 = KP559016994 * (T46 - T47); + T4b = FNMS(KP250000000, T4a, T49); + T4c = T48 + T4b; + T4q = T4b - T48; + T3S = KP559016994 * (T3O - T3R); + T3V = FNMS(KP250000000, T3U, T3T); + T3W = T3S + T3V; + T4n = T3V - T3S; + } + { + E T44, T4g, T3L, T45; + T44 = T3W - T43; + T4g = T4c + T4f; + T3L = W[0]; + T45 = W[1]; + cr[WS(rs, 1)] = FNMS(T45, T4g, T3L * T44); + ci[WS(rs, 1)] = FMA(T3L, T4g, T45 * T44); + } + { + E T4u, T4w, T4t, T4v; + T4u = T4n - T4m; + T4w = T4q + T4r; + T4t = W[32]; + T4v = W[33]; + cr[WS(rs, 17)] = FNMS(T4v, T4w, T4t * T4u); + ci[WS(rs, 17)] = FMA(T4t, T4w, T4v * T4u); + } + { + E T4i, T4k, T4h, T4j; + T4i = T43 + T3W; + T4k = T4c - T4f; + T4h = W[16]; + T4j = W[17]; + cr[WS(rs, 9)] = FNMS(T4j, T4k, T4h * T4i); + ci[WS(rs, 9)] = FMA(T4h, T4k, T4j * T4i); + } + { + E T4o, T4s, T4l, T4p; + T4o = T4m + T4n; + T4s = T4q - T4r; + T4l = W[24]; + T4p = W[25]; + cr[WS(rs, 13)] = FNMS(T4p, T4s, T4l * T4o); + ci[WS(rs, 13)] = FMA(T4l, T4s, T4p * T4o); + } + } + { + E T1j, T1o, T1M, T1J, T1B, T1N, TO, T1I; + T1j = FNMS(KP951056516, T1i, KP587785252 * T13); + T1o = FNMS(KP951056516, T1n, KP587785252 * T1m); + T1M = FMA(KP951056516, T1m, KP587785252 * T1n); + T1J = FMA(KP951056516, T13, KP587785252 * T1i); + { + E T1z, T1A, TM, TN; + T1z = FNMS(KP250000000, T1y, T1v); + T1A = KP559016994 * (T1w - T1x); + T1B = T1z - T1A; + T1N = T1A + T1z; + TM = FNMS(KP250000000, TL, TE); + TN = KP559016994 * (TH - TK); + TO = TM - TN; + T1I = TN + TM; + } + { + E T1k, T1C, TD, T1l; + T1k = TO - T1j; + T1C = T1o + T1B; + TD = W[2]; + T1l = W[3]; + cr[WS(rs, 2)] = FNMS(T1l, T1C, TD * T1k); + ci[WS(rs, 2)] = FMA(T1l, T1k, TD * T1C); + } + { + E T1Q, T1S, T1P, T1R; + T1Q = T1I + T1J; + T1S = T1N - T1M; + T1P = W[26]; + T1R = W[27]; + cr[WS(rs, 14)] = FNMS(T1R, T1S, T1P * T1Q); + ci[WS(rs, 14)] = FMA(T1R, T1Q, T1P * T1S); + } + { + E T1E, T1G, T1D, T1F; + T1E = TO + T1j; + T1G = T1B - T1o; + T1D = W[34]; + T1F = W[35]; + cr[WS(rs, 18)] = FNMS(T1F, T1G, T1D * T1E); + ci[WS(rs, 18)] = FMA(T1F, T1E, T1D * T1G); + } + { + E T1K, T1O, T1H, T1L; + T1K = T1I - T1J; + T1O = T1M + T1N; + T1H = W[10]; + T1L = W[11]; + cr[WS(rs, 6)] = FNMS(T1L, T1O, T1H * T1K); + ci[WS(rs, 6)] = FMA(T1L, T1K, T1H * T1O); + } + } + { + E T2Q, T3p, T3B, T3x, T3m, T3A, T3b, T3w; + T2Q = FNMS(KP951056516, T2P, KP587785252 * T2I); + T3p = FNMS(KP951056516, T3o, KP587785252 * T3n); + T3B = FMA(KP951056516, T3n, KP587785252 * T3o); + T3x = FMA(KP951056516, T2I, KP587785252 * T2P); + { + E T3k, T3l, T39, T3a; + T3k = FNMS(KP250000000, T3j, T3g); + T3l = KP559016994 * (T3h - T3i); + T3m = T3k - T3l; + T3A = T3l + T3k; + T39 = FNMS(KP250000000, T38, T2T); + T3a = KP559016994 * (T30 - T37); + T3b = T39 - T3a; + T3w = T3a + T39; + } + { + E T3c, T3q, T2B, T3d; + T3c = T2Q + T3b; + T3q = T3m - T3p; + T2B = W[4]; + T3d = W[5]; + cr[WS(rs, 3)] = FNMS(T3d, T3q, T2B * T3c); + ci[WS(rs, 3)] = FMA(T2B, T3q, T3d * T3c); + } + { + E T3E, T3G, T3D, T3F; + T3E = T3x + T3w; + T3G = T3A - T3B; + T3D = W[36]; + T3F = W[37]; + cr[WS(rs, 19)] = FNMS(T3F, T3G, T3D * T3E); + ci[WS(rs, 19)] = FMA(T3D, T3G, T3F * T3E); + } + { + E T3s, T3u, T3r, T3t; + T3s = T3b - T2Q; + T3u = T3m + T3p; + T3r = W[12]; + T3t = W[13]; + cr[WS(rs, 7)] = FNMS(T3t, T3u, T3r * T3s); + ci[WS(rs, 7)] = FMA(T3r, T3u, T3t * T3s); + } + { + E T3y, T3C, T3v, T3z; + T3y = T3w - T3x; + T3C = T3A + T3B; + T3v = W[20]; + T3z = W[21]; + cr[WS(rs, 11)] = FNMS(T3z, T3C, T3v * T3y); + ci[WS(rs, 11)] = FMA(T3v, T3C, T3z * T3y); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 20}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 20, "hb_20", twinstr, &GENUS, {184, 62, 62, 0} }; + +void X(codelet_hb_20) (planner *p) { + X(khc2hc_register) (p, hb_20, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1626 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:28 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 25 -dif -name hb_25 -include hb.h */ + +/* + * This function contains 400 FP additions, 364 FP multiplications, + * (or, 84 additions, 48 multiplications, 316 fused multiply/add), + * 176 stack variables, 47 constants, and 100 memory accesses + */ +#include "hb.h" + +static void hb_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP921177326, +0.921177326965143320250447435415066029359282231); + DK(KP833417178, +0.833417178328688677408962550243238843138996060); + DK(KP541454447, +0.541454447536312777046285590082819509052033189); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP242145790, +0.242145790282157779872542093866183953459003101); + DK(KP904730450, +0.904730450839922351881287709692877908104763647); + DK(KP559154169, +0.559154169276087864842202529084232643714075927); + DK(KP683113946, +0.683113946453479238701949862233725244439656928); + DK(KP831864738, +0.831864738706457140726048799369896829771167132); + DK(KP871714437, +0.871714437527667770979999223229522602943903653); + DK(KP803003575, +0.803003575438660414833440593570376004635464850); + DK(KP554608978, +0.554608978404018097464974850792216217022558774); + DK(KP248028675, +0.248028675328619457762448260696444630363259177); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP851038619, +0.851038619207379630836264138867114231259902550); + DK(KP525970792, +0.525970792408939708442463226536226366643874659); + DK(KP726211448, +0.726211448929902658173535992263577167607493062); + DK(KP912018591, +0.912018591466481957908415381764119056233607330); + DK(KP912575812, +0.912575812670962425556968549836277086778922727); + DK(KP943557151, +0.943557151597354104399655195398983005179443399); + DK(KP994076283, +0.994076283785401014123185814696322018529298887); + DK(KP614372930, +0.614372930789563808870829930444362096004872855); + DK(KP621716863, +0.621716863012209892444754556304102309693593202); + DK(KP772036680, +0.772036680810363904029489473607579825330539880); + DK(KP734762448, +0.734762448793050413546343770063151342619912334); + DK(KP860541664, +0.860541664367944677098261680920518816412804187); + DK(KP949179823, +0.949179823508441261575555465843363271711583843); + DK(KP557913902, +0.557913902031834264187699648465567037992437152); + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP249506682, +0.249506682107067890488084201715862638334226305); + DK(KP906616052, +0.906616052148196230441134447086066874408359177); + DK(KP681693190, +0.681693190061530575150324149145440022633095390); + DK(KP560319534, +0.560319534973832390111614715371676131169633784); + DK(KP845997307, +0.845997307939530944175097360758058292389769300); + DK(KP968479752, +0.968479752739016373193524836781420152702090879); + DK(KP062914667, +0.062914667253649757225485955897349402364686947); + DK(KP827271945, +0.827271945972475634034355757144307982555673741); + DK(KP470564281, +0.470564281212251493087595091036643380879947982); + DK(KP126329378, +0.126329378446108174786050455341811215027378105); + DK(KP256756360, +0.256756360367726783319498520922669048172391148); + DK(KP634619297, +0.634619297544148100711287640319130485732531031); + DK(KP549754652, +0.549754652192770074288023275540779861653779767); + DK(KP939062505, +0.939062505817492352556001843133229685779824606); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 48); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 48, MAKE_VOLATILE_STRIDE(50, rs)) { + E T3w, T3P, T2d, T3y, T3x, T3Q; + { + E T9, T3E, T1F, T3B, T6f, T7d, T5u, T6U, T4k, T2k, T5G, T1G, T19, T1H, T1s; + E T1M, T1N, TP, TM, T7i, T77, T5X, T64, T4u, T4D, T3p, T2z, T74, T7h, T63; + E T5Q, T4x, T4E, T3q, T2O, T4n, T4G, T3t, T3j, T5F, T70, T7f, T66, T5B, T4q; + E T4H, T3s, T34, T5E, T6V; + { + E T2f, T2e, T6e, T2j, T5t, T6d; + { + E T1, T1x, T3C, T3D, T8, T2h, T1A, T1D, T2i, T3A, T1E, T3z; + T1 = cr[0]; + T1x = ci[WS(rs, 24)]; + { + E T2, T3, T5, T6; + T2 = cr[WS(rs, 5)]; + T3 = ci[WS(rs, 4)]; + T5 = cr[WS(rs, 10)]; + T6 = ci[WS(rs, 9)]; + { + E T1y, T4, T7, T1z, T1B, T1C; + T1y = ci[WS(rs, 19)]; + T3C = T2 - T3; + T4 = T2 + T3; + T3D = T5 - T6; + T7 = T5 + T6; + T1z = cr[WS(rs, 20)]; + T1B = ci[WS(rs, 14)]; + T1C = cr[WS(rs, 15)]; + T8 = T4 + T7; + T2f = T4 - T7; + T2h = T1y + T1z; + T1A = T1y - T1z; + T1D = T1B - T1C; + T2i = T1B + T1C; + } + } + T2e = FNMS(KP250000000, T8, T1); + T9 = T1 + T8; + T3A = T1A - T1D; + T1E = T1A + T1D; + T3E = FMA(KP618033988, T3D, T3C); + T6e = FNMS(KP618033988, T3C, T3D); + T2j = FMA(KP618033988, T2i, T2h); + T5t = FNMS(KP618033988, T2h, T2i); + T1F = T1x + T1E; + T3z = FNMS(KP250000000, T1E, T1x); + T6d = FNMS(KP559016994, T3A, T3z); + T3B = FMA(KP559016994, T3A, T3z); + } + { + E T2x, T5V, T2m, T2l, Ti, T5w, T3h, T36, TK, T35, T2F, T5L, T2I, Tr, T2H; + E T3a, T5z, T3d, T1r, T3c, T2q, T5S, T2t, TZ, T2s, T5O, T2M, T2B, Tt, T18; + E T2A, Tx, T2V, T2Y, Tw, T30, T1i, T2X, Ty; + { + E T1j, T1k, T1p, T39, T1l; + { + E TC, TI, T3g, TD, TE; + { + E Ta, Te, Tf, Tb, Tc, T5s, T2g, T2w, Tg; + Ta = cr[WS(rs, 1)]; + T5s = FNMS(KP559016994, T2f, T2e); + T2g = FMA(KP559016994, T2f, T2e); + T6f = FNMS(KP951056516, T6e, T6d); + T7d = FMA(KP951056516, T6e, T6d); + Te = cr[WS(rs, 11)]; + T5u = FMA(KP951056516, T5t, T5s); + T6U = FNMS(KP951056516, T5t, T5s); + T4k = FMA(KP951056516, T2j, T2g); + T2k = FNMS(KP951056516, T2j, T2g); + Tf = ci[WS(rs, 8)]; + Tb = cr[WS(rs, 6)]; + Tc = ci[WS(rs, 3)]; + TC = cr[WS(rs, 3)]; + T2w = Tf - Te; + Tg = Te + Tf; + { + E T2v, Td, Th, TG, TH; + T2v = Tb - Tc; + Td = Tb + Tc; + TG = ci[WS(rs, 11)]; + TH = ci[WS(rs, 6)]; + T2x = FNMS(KP618033988, T2w, T2v); + T5V = FMA(KP618033988, T2v, T2w); + Th = Td + Tg; + T2m = Td - Tg; + TI = TG + TH; + T3g = TG - TH; + T2l = FNMS(KP250000000, Th, Ta); + Ti = Ta + Th; + TD = cr[WS(rs, 8)]; + TE = ci[WS(rs, 1)]; + } + } + { + E Tj, Tk, Tp, T2E, TJ, Tl; + Tj = cr[WS(rs, 4)]; + { + E Tn, To, T3f, TF; + Tn = ci[WS(rs, 10)]; + To = ci[WS(rs, 5)]; + T3f = TD - TE; + TF = TD + TE; + Tk = cr[WS(rs, 9)]; + Tp = Tn + To; + T2E = To - Tn; + T5w = FNMS(KP618033988, T3f, T3g); + T3h = FMA(KP618033988, T3g, T3f); + T36 = TI - TF; + TJ = TF + TI; + Tl = ci[0]; + } + T1j = ci[WS(rs, 21)]; + TK = TC + TJ; + T35 = FNMS(KP250000000, TJ, TC); + { + E T1n, Tm, T2D, T1o, Tq; + T1n = cr[WS(rs, 13)]; + Tm = Tk + Tl; + T2D = Tl - Tk; + T1o = cr[WS(rs, 18)]; + T1k = ci[WS(rs, 16)]; + T2F = FMA(KP618033988, T2E, T2D); + T5L = FNMS(KP618033988, T2D, T2E); + T2I = Tm - Tp; + Tq = Tm + Tp; + T1p = T1n + T1o; + T39 = T1o - T1n; + Tr = Tj + Tq; + T2H = FMS(KP250000000, Tq, Tj); + T1l = cr[WS(rs, 23)]; + } + } + } + { + E T10, T11, T16, T2L, T12; + { + E TR, TS, TX, T2p, T1q, TT; + TR = ci[WS(rs, 23)]; + { + E TV, TW, T38, T1m; + TV = ci[WS(rs, 13)]; + TW = cr[WS(rs, 16)]; + T38 = T1k + T1l; + T1m = T1k - T1l; + TS = ci[WS(rs, 18)]; + TX = TV - TW; + T2p = TV + TW; + T3a = FMA(KP618033988, T39, T38); + T5z = FNMS(KP618033988, T38, T39); + T3d = T1m + T1p; + T1q = T1m - T1p; + TT = cr[WS(rs, 21)]; + } + T10 = ci[WS(rs, 20)]; + T1r = T1j + T1q; + T3c = FMS(KP250000000, T1q, T1j); + { + E T14, TU, T2o, T15, TY; + T14 = cr[WS(rs, 14)]; + TU = TS - TT; + T2o = TS + TT; + T15 = cr[WS(rs, 19)]; + T11 = ci[WS(rs, 15)]; + T2q = FMA(KP618033988, T2p, T2o); + T5S = FNMS(KP618033988, T2o, T2p); + T2t = TU - TX; + TY = TU + TX; + T16 = T14 + T15; + T2L = T15 - T14; + TZ = TR + TY; + T2s = FNMS(KP250000000, TY, TR); + T12 = cr[WS(rs, 24)]; + } + } + { + E T1a, T1e, T1d, T2T, T17, T1f; + T1a = ci[WS(rs, 22)]; + { + E T1b, T1c, T2K, T13; + T1b = ci[WS(rs, 17)]; + T1c = cr[WS(rs, 22)]; + T2K = T11 + T12; + T13 = T11 - T12; + T1e = ci[WS(rs, 12)]; + T1d = T1b - T1c; + T2T = T1b + T1c; + T5O = FNMS(KP618033988, T2K, T2L); + T2M = FMA(KP618033988, T2L, T2K); + T2B = T13 + T16; + T17 = T13 - T16; + T1f = cr[WS(rs, 17)]; + } + Tt = cr[WS(rs, 2)]; + T18 = T10 + T17; + T2A = FMS(KP250000000, T17, T10); + { + E Tu, T1g, T2U, Tv, T1h; + Tu = cr[WS(rs, 7)]; + T1g = T1e - T1f; + T2U = T1e + T1f; + Tv = ci[WS(rs, 2)]; + Tx = cr[WS(rs, 12)]; + T2V = FMA(KP618033988, T2U, T2T); + T5G = FNMS(KP618033988, T2T, T2U); + T2Y = T1d - T1g; + T1h = T1d + T1g; + Tw = Tu + Tv; + T30 = Tu - Tv; + T1i = T1a + T1h; + T2X = FMS(KP250000000, T1h, T1a); + Ty = ci[WS(rs, 7)]; + } + } + } + } + { + E T32, T5D, T2R, T2Q, T2u, T2r, T4t; + { + E TA, T31, Tz, TB, Ts; + T31 = Ty - Tx; + Tz = Tx + Ty; + T1G = TZ + T18; + T19 = TZ - T18; + T32 = FNMS(KP618033988, T31, T30); + T5D = FMA(KP618033988, T30, T31); + TA = Tw + Tz; + T2R = Tz - Tw; + T2Q = FNMS(KP250000000, TA, Tt); + TB = Tt + TA; + T1H = T1i + T1r; + T1s = T1i - T1r; + T1M = Ti - Tr; + Ts = Ti + Tr; + { + E T2n, T5R, T5U, TL; + T2n = FMA(KP559016994, T2m, T2l); + T5R = FNMS(KP559016994, T2m, T2l); + T5U = FNMS(KP559016994, T2t, T2s); + T2u = FMA(KP559016994, T2t, T2s); + TL = TB + TK; + T1N = TB - TK; + { + E T5T, T75, T5W, T76; + T5T = FMA(KP951056516, T5S, T5R); + T75 = FNMS(KP951056516, T5S, T5R); + T5W = FMA(KP951056516, T5V, T5U); + T76 = FNMS(KP951056516, T5V, T5U); + TP = Ts - TL; + TM = Ts + TL; + T2r = FNMS(KP951056516, T2q, T2n); + T4t = FMA(KP951056516, T2q, T2n); + T7i = FMA(KP939062505, T75, T76); + T77 = FNMS(KP939062505, T76, T75); + T5X = FNMS(KP549754652, T5W, T5T); + T64 = FMA(KP549754652, T5T, T5W); + } + } + } + { + E T2J, T2G, T4v, T5y, T37, T3e, T5v; + { + E T2C, T5K, T5N, T4s, T2y; + T2C = FNMS(KP559016994, T2B, T2A); + T5K = FMA(KP559016994, T2B, T2A); + T5N = FMA(KP559016994, T2I, T2H); + T2J = FNMS(KP559016994, T2I, T2H); + T4s = FNMS(KP951056516, T2x, T2u); + T2y = FMA(KP951056516, T2x, T2u); + { + E T73, T5M, T72, T5P; + T73 = FMA(KP951056516, T5L, T5K); + T5M = FNMS(KP951056516, T5L, T5K); + T72 = FMA(KP951056516, T5O, T5N); + T5P = FNMS(KP951056516, T5O, T5N); + T4u = FNMS(KP634619297, T4t, T4s); + T4D = FMA(KP634619297, T4s, T4t); + T3p = FMA(KP256756360, T2r, T2y); + T2z = FNMS(KP256756360, T2y, T2r); + T74 = FMA(KP126329378, T73, T72); + T7h = FNMS(KP126329378, T72, T73); + T63 = FNMS(KP470564281, T5M, T5P); + T5Q = FMA(KP470564281, T5P, T5M); + T2G = FMA(KP951056516, T2F, T2C); + T4v = FNMS(KP951056516, T2F, T2C); + } + T5y = FMA(KP559016994, T36, T35); + T37 = FNMS(KP559016994, T36, T35); + T3e = FNMS(KP559016994, T3d, T3c); + T5v = FMA(KP559016994, T3d, T3c); + } + { + E T5x, T6Y, T4w, T2N; + T4w = FNMS(KP951056516, T2M, T2J); + T2N = FMA(KP951056516, T2M, T2J); + { + E T4l, T3b, T4m, T3i; + T4l = FMA(KP951056516, T3a, T37); + T3b = FNMS(KP951056516, T3a, T37); + T4m = FMA(KP951056516, T3h, T3e); + T3i = FNMS(KP951056516, T3h, T3e); + T4x = FNMS(KP827271945, T4w, T4v); + T4E = FMA(KP827271945, T4v, T4w); + T3q = FMA(KP634619297, T2G, T2N); + T2O = FNMS(KP634619297, T2N, T2G); + T4n = FNMS(KP126329378, T4m, T4l); + T4G = FMA(KP126329378, T4l, T4m); + T3t = FNMS(KP939062505, T3b, T3i); + T3j = FMA(KP939062505, T3i, T3b); + T5x = FMA(KP951056516, T5w, T5v); + T6Y = FNMS(KP951056516, T5w, T5v); + } + { + E T2S, T2Z, T5C, T6Z, T5A; + T5F = FMA(KP559016994, T2R, T2Q); + T2S = FNMS(KP559016994, T2R, T2Q); + T2Z = FNMS(KP559016994, T2Y, T2X); + T5C = FMA(KP559016994, T2Y, T2X); + T6Z = FNMS(KP951056516, T5z, T5y); + T5A = FMA(KP951056516, T5z, T5y); + { + E T4p, T2W, T4o, T33; + T4p = FMA(KP951056516, T2V, T2S); + T2W = FNMS(KP951056516, T2V, T2S); + T4o = FMA(KP951056516, T32, T2Z); + T33 = FNMS(KP951056516, T32, T2Z); + T70 = FNMS(KP827271945, T6Z, T6Y); + T7f = FMA(KP827271945, T6Y, T6Z); + T66 = FNMS(KP062914667, T5x, T5A); + T5B = FMA(KP062914667, T5A, T5x); + T4q = FNMS(KP470564281, T4p, T4o); + T4H = FMA(KP470564281, T4o, T4p); + T3s = FNMS(KP549754652, T2W, T33); + T34 = FMA(KP549754652, T33, T2W); + T5E = FNMS(KP951056516, T5D, T5C); + T6V = FMA(KP951056516, T5D, T5C); + } + } + } + } + } + } + } + { + E T6X, T7e, T6A, T6F, T6C, T6G, T6B; + cr[0] = T9 + TM; + { + E T67, T5I, T25, T22, T1X, T26, T21; + { + E T1I, T23, T1L, T1Z, T1t, TO, T24, T1O; + { + E T1K, T6W, T5H, T1J; + T1K = T1G - T1H; + T1I = T1G + T1H; + T6W = FNMS(KP951056516, T5G, T5F); + T5H = FMA(KP951056516, T5G, T5F); + T1J = FNMS(KP250000000, T1I, T1F); + T6X = FMA(KP062914667, T6W, T6V); + T7e = FNMS(KP062914667, T6V, T6W); + T67 = FNMS(KP634619297, T5E, T5H); + T5I = FMA(KP634619297, T5H, T5E); + T23 = FNMS(KP559016994, T1K, T1J); + T1L = FMA(KP559016994, T1K, T1J); + T1Z = FNMS(KP618033988, T19, T1s); + T1t = FMA(KP618033988, T1s, T19); + TO = FNMS(KP250000000, TM, T9); + T24 = FNMS(KP618033988, T1M, T1N); + T1O = FMA(KP618033988, T1N, T1M); + } + { + E T2b, T2a, T1Y, TQ, T27; + ci[0] = T1F + T1I; + T2b = FMA(KP951056516, T24, T23); + T25 = FNMS(KP951056516, T24, T23); + T2a = W[29]; + T1Y = FNMS(KP559016994, TP, TO); + TQ = FMA(KP559016994, TP, TO); + T27 = W[28]; + { + E T1V, T1P, T20, T1S, T1w, T1v, TN, T1Q; + T1V = FNMS(KP951056516, T1O, T1L); + T1P = FMA(KP951056516, T1O, T1L); + { + E T28, T1u, T29, T2c; + T20 = FMA(KP951056516, T1Z, T1Y); + T28 = FNMS(KP951056516, T1Z, T1Y); + T1S = FMA(KP951056516, T1t, TQ); + T1u = FNMS(KP951056516, T1t, TQ); + T1w = W[9]; + T29 = T27 * T28; + T2c = T2a * T28; + TN = W[8]; + T1Q = T1w * T1u; + cr[WS(rs, 15)] = FNMS(T2a, T2b, T29); + ci[WS(rs, 15)] = FMA(T27, T2b, T2c); + T1v = TN * T1u; + } + ci[WS(rs, 5)] = FMA(TN, T1P, T1Q); + { + E T1U, T1R, T1W, T1T; + T1U = W[39]; + cr[WS(rs, 5)] = FNMS(T1w, T1P, T1v); + T1R = W[38]; + T1W = T1U * T1S; + T22 = W[19]; + T1T = T1R * T1S; + T1X = W[18]; + ci[WS(rs, 20)] = FMA(T1R, T1V, T1W); + T26 = T22 * T20; + cr[WS(rs, 20)] = FNMS(T1U, T1V, T1T); + T21 = T1X * T20; + } + } + } + } + { + E T6h, T6g, T5Y, T5J, T6z, T69, T6o, T6E; + { + E T6m, T6n, T65, T68; + T65 = FMA(KP968479752, T64, T63); + T6h = FNMS(KP968479752, T64, T63); + ci[WS(rs, 10)] = FMA(T1X, T25, T26); + T68 = FNMS(KP845997307, T67, T66); + T6g = FMA(KP845997307, T67, T66); + cr[WS(rs, 10)] = FNMS(T22, T25, T21); + T6m = FNMS(KP968479752, T5X, T5Q); + T5Y = FMA(KP968479752, T5X, T5Q); + T5J = FMA(KP845997307, T5I, T5B); + T6n = FNMS(KP845997307, T5I, T5B); + T6z = FMA(KP560319534, T65, T68); + T69 = FNMS(KP681693190, T68, T65); + T6o = FMA(KP681693190, T6n, T6m); + T6E = FNMS(KP560319534, T6m, T6n); + } + { + E T62, T6l, T6I, T6L, T6H, T6K; + { + E T6Q, T6O, T6y, T6D, T6S; + { + E T6N, T5Z, T61, T6i, T6k; + T6N = W[2]; + T5Z = FMA(KP906616052, T5Y, T5J); + T61 = FNMS(KP906616052, T5Y, T5J); + T6i = FNMS(KP906616052, T6h, T6g); + T6k = FMA(KP906616052, T6h, T6g); + T6Q = W[3]; + { + E T60, T6j, T6R, T6P; + T60 = FNMS(KP249506682, T5Z, T5u); + T6O = FMA(KP998026728, T5Z, T5u); + T6j = FNMS(KP249506682, T6i, T6f); + T6R = FMA(KP998026728, T6i, T6f); + T6y = FMA(KP557913902, T61, T60); + T62 = FNMS(KP557913902, T61, T60); + T6P = T6N * T6O; + T6l = FNMS(KP557913902, T6k, T6j); + T6D = FMA(KP557913902, T6k, T6j); + T6S = T6N * T6R; + cr[WS(rs, 2)] = FNMS(T6Q, T6R, T6P); + } + } + T6A = FNMS(KP949179823, T6z, T6y); + T6I = FMA(KP949179823, T6z, T6y); + T6L = FNMS(KP949179823, T6E, T6D); + T6F = FMA(KP949179823, T6E, T6D); + ci[WS(rs, 2)] = FMA(T6Q, T6O, T6S); + T6H = W[32]; + T6K = W[33]; + } + { + E T6a, T6s, T6v, T6p, T6c, T6q, T6b, T6M, T6J, T5r; + T6a = FNMS(KP860541664, T69, T62); + T6s = FMA(KP860541664, T69, T62); + T6v = FMA(KP860541664, T6o, T6l); + T6p = FNMS(KP860541664, T6o, T6l); + T6M = T6H * T6L; + T6J = T6H * T6I; + T5r = W[12]; + T6c = W[13]; + ci[WS(rs, 17)] = FMA(T6K, T6I, T6M); + cr[WS(rs, 17)] = FNMS(T6K, T6L, T6J); + T6q = T5r * T6p; + T6b = T5r * T6a; + { + E T6r, T6u, T6w, T6t, T6x; + ci[WS(rs, 7)] = FMA(T6c, T6a, T6q); + cr[WS(rs, 7)] = FNMS(T6c, T6p, T6b); + T6r = W[42]; + T6u = W[43]; + T6w = T6r * T6v; + T6t = T6r * T6s; + T6x = W[22]; + T6C = W[23]; + ci[WS(rs, 22)] = FMA(T6u, T6s, T6w); + cr[WS(rs, 22)] = FNMS(T6u, T6v, T6t); + T6G = T6x * T6F; + T6B = T6x * T6A; + } + } + } + } + } + { + E T7u, T7D, T7n, T7w, T7v, T7E; + { + E T78, T7t, T7N, T71, T7C, T7S, T7y, T7k; + { + E T7j, T7g, T7A, T7B, T7r, T7s; + T7r = FNMS(KP734762448, T7i, T7h); + T7j = FMA(KP734762448, T7i, T7h); + T7g = FMA(KP772036680, T7f, T7e); + T7s = FNMS(KP772036680, T7f, T7e); + ci[WS(rs, 12)] = FMA(T6C, T6A, T6G); + cr[WS(rs, 12)] = FNMS(T6C, T6F, T6B); + T7A = FNMS(KP734762448, T77, T74); + T78 = FMA(KP734762448, T77, T74); + T7t = FNMS(KP621716863, T7s, T7r); + T7N = FMA(KP614372930, T7r, T7s); + T71 = FMA(KP772036680, T70, T6X); + T7B = FNMS(KP772036680, T70, T6X); + T7C = FNMS(KP621716863, T7B, T7A); + T7S = FMA(KP614372930, T7A, T7B); + T7y = FNMS(KP994076283, T7j, T7g); + T7k = FMA(KP994076283, T7j, T7g); + } + { + E T7c, T6T, T7x, T7l, T79, T7p; + T7c = W[5]; + T6T = W[4]; + T7x = FNMS(KP249506682, T7k, T7d); + T7l = FMA(KP998026728, T7k, T7d); + T79 = FMA(KP994076283, T78, T71); + T7p = FNMS(KP994076283, T78, T71); + { + E T7z, T7Y, T7Z, T7T, T7q, T7O, T7X, T7L, T7Q, T7P, T7U; + { + E T7V, T80, T7b, T7m, T7W; + { + E T7R, T7o, T7a, T7M; + T7V = W[34]; + T7R = FMA(KP557913902, T7y, T7x); + T7z = FNMS(KP557913902, T7y, T7x); + T7Y = W[35]; + T7o = FNMS(KP249506682, T79, T6U); + T7a = FMA(KP998026728, T79, T6U); + T7Z = FMA(KP949179823, T7S, T7R); + T7T = FNMS(KP949179823, T7S, T7R); + T7M = FMA(KP557913902, T7p, T7o); + T7q = FNMS(KP557913902, T7p, T7o); + T7b = T6T * T7a; + T7m = T7c * T7a; + T7W = FNMS(KP949179823, T7N, T7M); + T7O = FMA(KP949179823, T7N, T7M); + } + cr[WS(rs, 3)] = FNMS(T7c, T7l, T7b); + ci[WS(rs, 3)] = FMA(T6T, T7l, T7m); + T80 = T7Y * T7W; + T7X = T7V * T7W; + T7L = W[24]; + T7Q = W[25]; + ci[WS(rs, 18)] = FMA(T7V, T7Z, T80); + } + cr[WS(rs, 18)] = FNMS(T7Y, T7Z, T7X); + T7P = T7L * T7O; + T7U = T7Q * T7O; + { + E T7J, T7F, T7I, T7H, T7K, T7G; + T7u = FMA(KP943557151, T7t, T7q); + T7G = FNMS(KP943557151, T7t, T7q); + cr[WS(rs, 13)] = FNMS(T7Q, T7T, T7P); + ci[WS(rs, 13)] = FMA(T7L, T7T, T7U); + T7J = FMA(KP943557151, T7C, T7z); + T7D = FNMS(KP943557151, T7C, T7z); + T7F = W[44]; + T7I = W[45]; + T7n = W[14]; + T7H = T7F * T7G; + T7K = T7I * T7G; + T7w = W[15]; + T7v = T7n * T7u; + cr[WS(rs, 23)] = FNMS(T7I, T7J, T7H); + ci[WS(rs, 23)] = FMA(T7F, T7J, T7K); + } + } + } + } + T7E = T7w * T7u; + cr[WS(rs, 8)] = FNMS(T7w, T7D, T7v); + { + E T3F, T4K, T4X, T4j, T4M, T4L, T4Y; + { + E T4P, T4O, T4y, T4r, T4J, T57, T4N, T5c, T4W; + { + E T4U, T4V, T4F, T4I; + T4F = FNMS(KP912575812, T4E, T4D); + T4P = FMA(KP912575812, T4E, T4D); + T4O = FMA(KP912018591, T4H, T4G); + T4I = FNMS(KP912018591, T4H, T4G); + ci[WS(rs, 8)] = FMA(T7n, T7D, T7E); + T4y = FMA(KP912575812, T4x, T4u); + T4U = FNMS(KP912575812, T4x, T4u); + T4V = FMA(KP912018591, T4q, T4n); + T4r = FNMS(KP912018591, T4q, T4n); + T4J = FNMS(KP726211448, T4I, T4F); + T57 = FMA(KP525970792, T4F, T4I); + T3F = FMA(KP951056516, T3E, T3B); + T4N = FNMS(KP951056516, T3E, T3B); + T5c = FMA(KP525970792, T4U, T4V); + T4W = FNMS(KP726211448, T4V, T4U); + } + { + E T5o, T4S, T4B, T5l, T5p, T4R, T4A, T5m, T4Q, T4z; + T5o = W[7]; + T4Q = FMA(KP851038619, T4P, T4O); + T4S = FNMS(KP851038619, T4P, T4O); + T4z = FMA(KP851038619, T4y, T4r); + T4B = FNMS(KP851038619, T4y, T4r); + T5l = W[6]; + T5p = FMA(KP992114701, T4Q, T4N); + T4R = FNMS(KP248028675, T4Q, T4N); + T4A = FMA(KP248028675, T4z, T4k); + T5m = FNMS(KP992114701, T4z, T4k); + { + E T4T, T4C, T5d, T58, T55, T5a, T59, T5e; + { + E T5f, T5j, T5i, T5h, T5k, T5g; + T5f = W[36]; + { + E T5b, T56, T5n, T5q; + T4T = FNMS(KP554608978, T4S, T4R); + T5b = FMA(KP554608978, T4S, T4R); + T56 = FNMS(KP554608978, T4B, T4A); + T4C = FMA(KP554608978, T4B, T4A); + T5n = T5l * T5m; + T5q = T5o * T5m; + T5j = FMA(KP943557151, T5c, T5b); + T5d = FNMS(KP943557151, T5c, T5b); + T5g = FMA(KP943557151, T57, T56); + T58 = FNMS(KP943557151, T57, T56); + cr[WS(rs, 4)] = FNMS(T5o, T5p, T5n); + ci[WS(rs, 4)] = FMA(T5l, T5p, T5q); + } + T5i = W[37]; + T5h = T5f * T5g; + T55 = W[26]; + T5k = T5i * T5g; + T5a = W[27]; + cr[WS(rs, 19)] = FNMS(T5i, T5j, T5h); + T59 = T55 * T58; + ci[WS(rs, 19)] = FMA(T5f, T5j, T5k); + } + T5e = T5a * T58; + { + E T53, T4Z, T52, T51, T54, T50; + cr[WS(rs, 14)] = FNMS(T5a, T5d, T59); + T4K = FNMS(KP803003575, T4J, T4C); + T50 = FMA(KP803003575, T4J, T4C); + ci[WS(rs, 14)] = FMA(T55, T5d, T5e); + T4X = FNMS(KP803003575, T4W, T4T); + T53 = FMA(KP803003575, T4W, T4T); + T4Z = W[46]; + T52 = W[47]; + T4j = W[16]; + T51 = T4Z * T50; + T54 = T52 * T50; + T4M = W[17]; + T4L = T4j * T4K; + cr[WS(rs, 24)] = FNMS(T52, T53, T51); + ci[WS(rs, 24)] = FMA(T4Z, T53, T54); + } + } + } + } + T4Y = T4M * T4K; + cr[WS(rs, 9)] = FNMS(T4M, T4X, T4L); + { + E T3G, T3H, T2P, T3k, T3Z, T3v, T3O, T44; + { + E T3M, T3N, T3r, T3u; + T3G = FNMS(KP871714437, T3q, T3p); + T3r = FMA(KP871714437, T3q, T3p); + T3u = FNMS(KP831864738, T3t, T3s); + T3H = FMA(KP831864738, T3t, T3s); + ci[WS(rs, 9)] = FMA(T4j, T4X, T4Y); + T3M = FNMS(KP871714437, T2O, T2z); + T2P = FMA(KP871714437, T2O, T2z); + T3k = FMA(KP831864738, T3j, T34); + T3N = FNMS(KP831864738, T3j, T34); + T3Z = FMA(KP683113946, T3r, T3u); + T3v = FNMS(KP559154169, T3u, T3r); + T3O = FMA(KP559154169, T3N, T3M); + T44 = FNMS(KP683113946, T3M, T3N); + } + { + E T4g, T3K, T3n, T4d, T3J, T4h, T4e, T3m, T3I, T3l; + T4g = W[1]; + T3K = FMA(KP904730450, T3H, T3G); + T3I = FNMS(KP904730450, T3H, T3G); + T3n = FNMS(KP904730450, T3k, T2P); + T3l = FMA(KP904730450, T3k, T2P); + T4d = W[0]; + T3J = FNMS(KP242145790, T3I, T3F); + T4h = FMA(KP968583161, T3I, T3F); + T4e = FMA(KP968583161, T3l, T2k); + T3m = FNMS(KP242145790, T3l, T2k); + { + E T3L, T3o, T45, T40, T3X, T42, T41, T46; + { + E T47, T4b, T4a, T49, T4c, T48; + T47 = W[30]; + { + E T43, T3Y, T4f, T4i; + T43 = FNMS(KP541454447, T3K, T3J); + T3L = FMA(KP541454447, T3K, T3J); + T3o = FMA(KP541454447, T3n, T3m); + T3Y = FNMS(KP541454447, T3n, T3m); + T4f = T4d * T4e; + T4i = T4g * T4e; + T45 = FNMS(KP833417178, T44, T43); + T4b = FMA(KP833417178, T44, T43); + T40 = FNMS(KP833417178, T3Z, T3Y); + T48 = FMA(KP833417178, T3Z, T3Y); + cr[WS(rs, 1)] = FNMS(T4g, T4h, T4f); + ci[WS(rs, 1)] = FMA(T4d, T4h, T4i); + } + T4a = W[31]; + T49 = T47 * T48; + T3X = W[20]; + T4c = T4a * T48; + T42 = W[21]; + cr[WS(rs, 16)] = FNMS(T4a, T4b, T49); + T41 = T3X * T40; + ci[WS(rs, 16)] = FMA(T47, T4b, T4c); + } + T46 = T42 * T40; + { + E T3V, T3R, T3U, T3T, T3W, T3S; + cr[WS(rs, 11)] = FNMS(T42, T45, T41); + T3S = FMA(KP921177326, T3v, T3o); + T3w = FNMS(KP921177326, T3v, T3o); + ci[WS(rs, 11)] = FMA(T3X, T45, T46); + T3V = FNMS(KP921177326, T3O, T3L); + T3P = FMA(KP921177326, T3O, T3L); + T3R = W[40]; + T3U = W[41]; + T2d = W[10]; + T3T = T3R * T3S; + T3W = T3U * T3S; + T3y = W[11]; + T3x = T2d * T3w; + cr[WS(rs, 21)] = FNMS(T3U, T3V, T3T); + ci[WS(rs, 21)] = FMA(T3R, T3V, T3W); + } + } + } + } + } + } + } + } + cr[WS(rs, 6)] = FNMS(T3y, T3P, T3x); + T3Q = T3y * T3w; + ci[WS(rs, 6)] = FMA(T2d, T3P, T3Q); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 25}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 25, "hb_25", twinstr, &GENUS, {84, 48, 316, 0} }; + +void X(codelet_hb_25) (planner *p) { + X(khc2hc_register) (p, hb_25, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 25 -dif -name hb_25 -include hb.h */ + +/* + * This function contains 400 FP additions, 280 FP multiplications, + * (or, 260 additions, 140 multiplications, 140 fused multiply/add), + * 107 stack variables, 20 constants, and 100 memory accesses + */ +#include "hb.h" + +static void hb_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP062790519, +0.062790519529313376076178224565631133122484832); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP125333233, +0.125333233564304245373118759816508793942918247); + DK(KP425779291, +0.425779291565072648862502445744251703979973042); + DK(KP904827052, +0.904827052466019527713668647932697593970413911); + DK(KP248689887, +0.248689887164854788242283746006447968417567406); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP770513242, +0.770513242775789230803009636396177847271667672); + DK(KP637423989, +0.637423989748689710176712811676016195434917298); + DK(KP844327925, +0.844327925502015078548558063966681505381659241); + DK(KP535826794, +0.535826794978996618271308767867639978063575346); + DK(KP684547105, +0.684547105928688673732283357621209269889519233); + DK(KP728968627, +0.728968627421411523146730319055259111372571664); + DK(KP481753674, +0.481753674101715274987191502872129653528542010); + DK(KP876306680, +0.876306680043863587308115903922062583399064238); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 48); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 48, MAKE_VOLATILE_STRIDE(50, rs)) { + E T9, T5Q, T3y, T39, T5v, Ti, Tr, Ts, TZ, T18, T1z, T2k, T4l, T3h, T44; + E T5d, T6C, T5C, T6o, T56, T6B, T5B, T6l, T2z, T4m, T3i, T47, T1K, T5w, T3c; + E T3B, T5R, TB, TK, TL, T1i, T1r, T1A, T2P, T4o, T3k, T4b, T5s, T6F, T5F; + E T6v, T5l, T6E, T5E, T6s, T34, T4p, T3l, T4e; + { + E T1, T4, T7, T8, T3x, T3w, T37, T38; + T1 = cr[0]; + { + E T2, T3, T5, T6; + T2 = cr[WS(rs, 5)]; + T3 = ci[WS(rs, 4)]; + T4 = T2 + T3; + T5 = cr[WS(rs, 10)]; + T6 = ci[WS(rs, 9)]; + T7 = T5 + T6; + T8 = T4 + T7; + T3x = T5 - T6; + T3w = T2 - T3; + } + T9 = T1 + T8; + T5Q = FMA(KP951056516, T3w, KP587785252 * T3x); + T3y = FNMS(KP951056516, T3x, KP587785252 * T3w); + T37 = FNMS(KP250000000, T8, T1); + T38 = KP559016994 * (T4 - T7); + T39 = T37 - T38; + T5v = T38 + T37; + } + { + E Ta, T27, T53, T2f, Th, T26, T10, T2p, T58, T2x, T17, T2o, Tj, T2n, T5a; + E T2t, Tq, T2s, TR, T2b, T51, T2h, TY, T2g; + { + E Tg, T2e, Td, T2d; + Ta = cr[WS(rs, 1)]; + { + E Te, Tf, Tb, Tc; + Te = cr[WS(rs, 11)]; + Tf = ci[WS(rs, 8)]; + Tg = Te + Tf; + T2e = Te - Tf; + Tb = cr[WS(rs, 6)]; + Tc = ci[WS(rs, 3)]; + Td = Tb + Tc; + T2d = Tb - Tc; + } + T27 = KP559016994 * (Td - Tg); + T53 = FMA(KP951056516, T2d, KP587785252 * T2e); + T2f = FNMS(KP951056516, T2e, KP587785252 * T2d); + Th = Td + Tg; + T26 = FNMS(KP250000000, Th, Ta); + } + { + E T16, T2w, T13, T2v; + T10 = ci[WS(rs, 20)]; + { + E T14, T15, T11, T12; + T14 = cr[WS(rs, 14)]; + T15 = cr[WS(rs, 19)]; + T16 = T14 + T15; + T2w = T15 - T14; + T11 = ci[WS(rs, 15)]; + T12 = cr[WS(rs, 24)]; + T13 = T11 - T12; + T2v = T11 + T12; + } + T2p = KP559016994 * (T13 + T16); + T58 = FMA(KP951056516, T2v, KP587785252 * T2w); + T2x = FNMS(KP951056516, T2w, KP587785252 * T2v); + T17 = T13 - T16; + T2o = FNMS(KP250000000, T17, T10); + } + { + E Tp, T2m, Tm, T2l; + Tj = cr[WS(rs, 4)]; + { + E Tn, To, Tk, Tl; + Tn = ci[WS(rs, 10)]; + To = ci[WS(rs, 5)]; + Tp = Tn + To; + T2m = Tn - To; + Tk = cr[WS(rs, 9)]; + Tl = ci[0]; + Tm = Tk + Tl; + T2l = Tk - Tl; + } + T2n = FNMS(KP951056516, T2m, KP587785252 * T2l); + T5a = FMA(KP951056516, T2l, KP587785252 * T2m); + T2t = KP559016994 * (Tm - Tp); + Tq = Tm + Tp; + T2s = FNMS(KP250000000, Tq, Tj); + } + { + E TX, T2a, TU, T29; + TR = ci[WS(rs, 23)]; + { + E TV, TW, TS, TT; + TV = ci[WS(rs, 13)]; + TW = cr[WS(rs, 16)]; + TX = TV - TW; + T2a = TV + TW; + TS = ci[WS(rs, 18)]; + TT = cr[WS(rs, 21)]; + TU = TS - TT; + T29 = TS + TT; + } + T2b = FNMS(KP951056516, T2a, KP587785252 * T29); + T51 = FMA(KP951056516, T29, KP587785252 * T2a); + T2h = KP559016994 * (TU - TX); + TY = TU + TX; + T2g = FNMS(KP250000000, TY, TR); + } + Ti = Ta + Th; + Tr = Tj + Tq; + Ts = Ti + Tr; + TZ = TR + TY; + T18 = T10 + T17; + T1z = TZ + T18; + { + E T2c, T42, T2j, T43, T28, T2i; + T28 = T26 - T27; + T2c = T28 - T2b; + T42 = T28 + T2b; + T2i = T2g - T2h; + T2j = T2f + T2i; + T43 = T2i - T2f; + T2k = FNMS(KP481753674, T2j, KP876306680 * T2c); + T4l = FMA(KP728968627, T43, KP684547105 * T42); + T3h = FMA(KP876306680, T2j, KP481753674 * T2c); + T44 = FNMS(KP684547105, T43, KP728968627 * T42); + } + { + E T59, T6n, T5c, T6m, T57, T5b; + T57 = T2t + T2s; + T59 = T57 - T58; + T6n = T57 + T58; + T5b = T2o + T2p; + T5c = T5a + T5b; + T6m = T5b - T5a; + T5d = FNMS(KP844327925, T5c, KP535826794 * T59); + T6C = FMA(KP637423989, T6m, KP770513242 * T6n); + T5C = FMA(KP535826794, T5c, KP844327925 * T59); + T6o = FNMS(KP637423989, T6n, KP770513242 * T6m); + } + { + E T52, T6j, T55, T6k, T50, T54; + T50 = T27 + T26; + T52 = T50 - T51; + T6j = T50 + T51; + T54 = T2h + T2g; + T55 = T53 + T54; + T6k = T54 - T53; + T56 = FNMS(KP248689887, T55, KP968583161 * T52); + T6B = FMA(KP535826794, T6k, KP844327925 * T6j); + T5B = FMA(KP968583161, T55, KP248689887 * T52); + T6l = FNMS(KP844327925, T6k, KP535826794 * T6j); + } + { + E T2r, T45, T2y, T46, T2q, T2u; + T2q = T2o - T2p; + T2r = T2n + T2q; + T45 = T2q - T2n; + T2u = T2s - T2t; + T2y = T2u - T2x; + T46 = T2u + T2x; + T2z = FMA(KP904827052, T2r, KP425779291 * T2y); + T4m = FNMS(KP992114701, T45, KP125333233 * T46); + T3i = FNMS(KP425779291, T2r, KP904827052 * T2y); + T47 = FMA(KP125333233, T45, KP992114701 * T46); + } + } + { + E T1C, T1F, T1I, T1J, T3b, T3a, T3z, T3A; + T1C = ci[WS(rs, 24)]; + { + E T1D, T1E, T1G, T1H; + T1D = ci[WS(rs, 19)]; + T1E = cr[WS(rs, 20)]; + T1F = T1D - T1E; + T1G = ci[WS(rs, 14)]; + T1H = cr[WS(rs, 15)]; + T1I = T1G - T1H; + T1J = T1F + T1I; + T3b = T1G + T1H; + T3a = T1D + T1E; + } + T1K = T1C + T1J; + T5w = FMA(KP951056516, T3a, KP587785252 * T3b); + T3c = FNMS(KP951056516, T3b, KP587785252 * T3a); + T3z = FNMS(KP250000000, T1J, T1C); + T3A = KP559016994 * (T1F - T1I); + T3B = T3z - T3A; + T5R = T3A + T3z; + } + { + E Tt, T2C, T5i, T2K, TA, T2B, T1a, T2G, T5g, T2M, T1h, T2L, TC, T2R, T5p; + E T2Z, TJ, T2Q, T1j, T2V, T5n, T31, T1q, T30; + { + E Tw, T2I, Tz, T2J; + Tt = cr[WS(rs, 2)]; + { + E Tu, Tv, Tx, Ty; + Tu = cr[WS(rs, 7)]; + Tv = ci[WS(rs, 2)]; + Tw = Tu + Tv; + T2I = Tu - Tv; + Tx = cr[WS(rs, 12)]; + Ty = ci[WS(rs, 7)]; + Tz = Tx + Ty; + T2J = Tx - Ty; + } + T2C = KP559016994 * (Tw - Tz); + T5i = FMA(KP951056516, T2I, KP587785252 * T2J); + T2K = FNMS(KP951056516, T2J, KP587785252 * T2I); + TA = Tw + Tz; + T2B = FNMS(KP250000000, TA, Tt); + } + { + E T1d, T2E, T1g, T2F; + T1a = ci[WS(rs, 22)]; + { + E T1b, T1c, T1e, T1f; + T1b = ci[WS(rs, 17)]; + T1c = cr[WS(rs, 22)]; + T1d = T1b - T1c; + T2E = T1b + T1c; + T1e = ci[WS(rs, 12)]; + T1f = cr[WS(rs, 17)]; + T1g = T1e - T1f; + T2F = T1e + T1f; + } + T2G = FNMS(KP951056516, T2F, KP587785252 * T2E); + T5g = FMA(KP951056516, T2E, KP587785252 * T2F); + T2M = KP559016994 * (T1d - T1g); + T1h = T1d + T1g; + T2L = FNMS(KP250000000, T1h, T1a); + } + { + E TI, T2Y, TF, T2X; + TC = cr[WS(rs, 3)]; + { + E TG, TH, TD, TE; + TG = ci[WS(rs, 11)]; + TH = ci[WS(rs, 6)]; + TI = TG + TH; + T2Y = TG - TH; + TD = cr[WS(rs, 8)]; + TE = ci[WS(rs, 1)]; + TF = TD + TE; + T2X = TD - TE; + } + T2R = KP559016994 * (TF - TI); + T5p = FMA(KP951056516, T2X, KP587785252 * T2Y); + T2Z = FNMS(KP951056516, T2Y, KP587785252 * T2X); + TJ = TF + TI; + T2Q = FNMS(KP250000000, TJ, TC); + } + { + E T1p, T2U, T1m, T2T; + T1j = ci[WS(rs, 21)]; + { + E T1n, T1o, T1k, T1l; + T1n = cr[WS(rs, 13)]; + T1o = cr[WS(rs, 18)]; + T1p = T1n + T1o; + T2U = T1o - T1n; + T1k = ci[WS(rs, 16)]; + T1l = cr[WS(rs, 23)]; + T1m = T1k - T1l; + T2T = T1k + T1l; + } + T2V = FNMS(KP951056516, T2U, KP587785252 * T2T); + T5n = FMA(KP951056516, T2T, KP587785252 * T2U); + T31 = KP559016994 * (T1m + T1p); + T1q = T1m - T1p; + T30 = FNMS(KP250000000, T1q, T1j); + } + TB = Tt + TA; + TK = TC + TJ; + TL = TB + TK; + T1i = T1a + T1h; + T1r = T1j + T1q; + T1A = T1i + T1r; + { + E T2H, T49, T2O, T4a, T2D, T2N; + T2D = T2B - T2C; + T2H = T2D - T2G; + T49 = T2D + T2G; + T2N = T2L - T2M; + T2O = T2K + T2N; + T4a = T2N - T2K; + T2P = FNMS(KP844327925, T2O, KP535826794 * T2H); + T4o = FMA(KP062790519, T4a, KP998026728 * T49); + T3k = FMA(KP535826794, T2O, KP844327925 * T2H); + T4b = FNMS(KP998026728, T4a, KP062790519 * T49); + } + { + E T5o, T6u, T5r, T6t, T5m, T5q; + T5m = T2R + T2Q; + T5o = T5m - T5n; + T6u = T5m + T5n; + T5q = T30 + T31; + T5r = T5p + T5q; + T6t = T5q - T5p; + T5s = FNMS(KP684547105, T5r, KP728968627 * T5o); + T6F = FNMS(KP992114701, T6t, KP125333233 * T6u); + T5F = FMA(KP728968627, T5r, KP684547105 * T5o); + T6v = FMA(KP125333233, T6t, KP992114701 * T6u); + } + { + E T5h, T6r, T5k, T6q, T5f, T5j; + T5f = T2C + T2B; + T5h = T5f - T5g; + T6r = T5f + T5g; + T5j = T2M + T2L; + T5k = T5i + T5j; + T6q = T5j - T5i; + T5l = FNMS(KP481753674, T5k, KP876306680 * T5h); + T6E = FNMS(KP425779291, T6q, KP904827052 * T6r); + T5E = FMA(KP876306680, T5k, KP481753674 * T5h); + T6s = FMA(KP904827052, T6q, KP425779291 * T6r); + } + { + E T2W, T4d, T33, T4c, T2S, T32; + T2S = T2Q - T2R; + T2W = T2S - T2V; + T4d = T2S + T2V; + T32 = T30 - T31; + T33 = T2Z + T32; + T4c = T32 - T2Z; + T34 = FNMS(KP998026728, T33, KP062790519 * T2W); + T4p = FNMS(KP637423989, T4c, KP770513242 * T4d); + T3l = FMA(KP062790519, T33, KP998026728 * T2W); + T4e = FMA(KP770513242, T4c, KP637423989 * T4d); + } + } + { + E TM, TQ, T1U, T1L, T1N, T1Z, T1t, T1V, T1y, T1Y; + { + E TO, TP, T1B, T1M; + TO = KP559016994 * (Ts - TL); + TM = Ts + TL; + TP = FNMS(KP250000000, TM, T9); + TQ = TO + TP; + T1U = TP - TO; + T1B = KP559016994 * (T1z - T1A); + T1L = T1z + T1A; + T1M = FNMS(KP250000000, T1L, T1K); + T1N = T1B + T1M; + T1Z = T1M - T1B; + } + { + E T19, T1s, T1w, T1x; + T19 = TZ - T18; + T1s = T1i - T1r; + T1t = FMA(KP951056516, T19, KP587785252 * T1s); + T1V = FNMS(KP951056516, T1s, KP587785252 * T19); + T1w = Ti - Tr; + T1x = TB - TK; + T1y = FMA(KP951056516, T1w, KP587785252 * T1x); + T1Y = FNMS(KP951056516, T1x, KP587785252 * T1w); + } + cr[0] = T9 + TM; + ci[0] = T1K + T1L; + { + E T1u, T1O, TN, T1v; + T1u = TQ - T1t; + T1O = T1y + T1N; + TN = W[8]; + T1v = W[9]; + cr[WS(rs, 5)] = FNMS(T1v, T1O, TN * T1u); + ci[WS(rs, 5)] = FMA(T1v, T1u, TN * T1O); + } + { + E T22, T24, T21, T23; + T22 = T1U + T1V; + T24 = T1Z - T1Y; + T21 = W[28]; + T23 = W[29]; + cr[WS(rs, 15)] = FNMS(T23, T24, T21 * T22); + ci[WS(rs, 15)] = FMA(T23, T22, T21 * T24); + } + { + E T1W, T20, T1T, T1X; + T1W = T1U - T1V; + T20 = T1Y + T1Z; + T1T = W[18]; + T1X = W[19]; + cr[WS(rs, 10)] = FNMS(T1X, T20, T1T * T1W); + ci[WS(rs, 10)] = FMA(T1X, T1W, T1T * T20); + } + { + E T1Q, T1S, T1P, T1R; + T1Q = TQ + T1t; + T1S = T1N - T1y; + T1P = W[38]; + T1R = W[39]; + cr[WS(rs, 20)] = FNMS(T1R, T1S, T1P * T1Q); + ci[WS(rs, 20)] = FMA(T1R, T1Q, T1P * T1S); + } + } + { + E T6H, T71, T6M, T74, T6i, T6x, T6y, T6z, T6Q, T6R, T6P, T6S; + { + E T6D, T6G, T6K, T6L; + T6D = T6B + T6C; + T6G = T6E - T6F; + T6H = FMA(KP951056516, T6D, KP587785252 * T6G); + T71 = FNMS(KP951056516, T6G, KP587785252 * T6D); + T6K = T6l - T6o; + T6L = T6v - T6s; + T6M = FMA(KP951056516, T6K, KP587785252 * T6L); + T74 = FNMS(KP951056516, T6L, KP587785252 * T6K); + } + { + E T6p, T6w, T6N, T6O; + T6i = T5v + T5w; + T6p = T6l + T6o; + T6w = T6s + T6v; + T6x = T6p - T6w; + T6y = FNMS(KP250000000, T6x, T6i); + T6z = KP559016994 * (T6p + T6w); + T6Q = T5R - T5Q; + T6N = T6B - T6C; + T6O = T6E + T6F; + T6R = T6N + T6O; + T6P = KP559016994 * (T6N - T6O); + T6S = FNMS(KP250000000, T6R, T6Q); + } + { + E T7c, T7e, T7b, T7d; + T7c = T6i + T6x; + T7e = T6Q + T6R; + T7b = W[6]; + T7d = W[7]; + cr[WS(rs, 4)] = FNMS(T7d, T7e, T7b * T7c); + ci[WS(rs, 4)] = FMA(T7d, T7c, T7b * T7e); + } + { + E T72, T78, T76, T7a, T70, T75; + T70 = T6y - T6z; + T72 = T70 - T71; + T78 = T70 + T71; + T75 = T6S - T6P; + T76 = T74 + T75; + T7a = T75 - T74; + { + E T6Z, T73, T77, T79; + T6Z = W[26]; + T73 = W[27]; + cr[WS(rs, 14)] = FNMS(T73, T76, T6Z * T72); + ci[WS(rs, 14)] = FMA(T73, T72, T6Z * T76); + T77 = W[36]; + T79 = W[37]; + cr[WS(rs, 19)] = FNMS(T79, T7a, T77 * T78); + ci[WS(rs, 19)] = FMA(T79, T78, T77 * T7a); + } + } + { + E T6I, T6W, T6U, T6Y, T6A, T6T; + T6A = T6y + T6z; + T6I = T6A - T6H; + T6W = T6A + T6H; + T6T = T6P + T6S; + T6U = T6M + T6T; + T6Y = T6T - T6M; + { + E T6h, T6J, T6V, T6X; + T6h = W[16]; + T6J = W[17]; + cr[WS(rs, 9)] = FNMS(T6J, T6U, T6h * T6I); + ci[WS(rs, 9)] = FMA(T6J, T6I, T6h * T6U); + T6V = W[46]; + T6X = W[47]; + cr[WS(rs, 24)] = FNMS(T6X, T6Y, T6V * T6W); + ci[WS(rs, 24)] = FMA(T6X, T6W, T6V * T6Y); + } + } + } + { + E T3n, T3N, T3s, T3Q, T3d, T3e, T36, T3f, T3C, T3D, T3v, T3E; + { + E T3j, T3m, T3q, T3r; + T3j = T3h - T3i; + T3m = T3k - T3l; + T3n = FMA(KP951056516, T3j, KP587785252 * T3m); + T3N = FNMS(KP951056516, T3m, KP587785252 * T3j); + T3q = T2k + T2z; + T3r = T2P - T34; + T3s = FMA(KP951056516, T3q, KP587785252 * T3r); + T3Q = FNMS(KP951056516, T3r, KP587785252 * T3q); + } + { + E T2A, T35, T3t, T3u; + T3d = T39 - T3c; + T2A = T2k - T2z; + T35 = T2P + T34; + T3e = T2A + T35; + T36 = KP559016994 * (T2A - T35); + T3f = FNMS(KP250000000, T3e, T3d); + T3C = T3y + T3B; + T3t = T3h + T3i; + T3u = T3k + T3l; + T3D = T3t + T3u; + T3v = KP559016994 * (T3t - T3u); + T3E = FNMS(KP250000000, T3D, T3C); + } + { + E T3Y, T40, T3X, T3Z; + T3Y = T3d + T3e; + T40 = T3C + T3D; + T3X = W[2]; + T3Z = W[3]; + cr[WS(rs, 2)] = FNMS(T3Z, T40, T3X * T3Y); + ci[WS(rs, 2)] = FMA(T3Z, T3Y, T3X * T40); + } + { + E T3O, T3U, T3S, T3W, T3M, T3R; + T3M = T3f - T36; + T3O = T3M - T3N; + T3U = T3M + T3N; + T3R = T3E - T3v; + T3S = T3Q + T3R; + T3W = T3R - T3Q; + { + E T3L, T3P, T3T, T3V; + T3L = W[22]; + T3P = W[23]; + cr[WS(rs, 12)] = FNMS(T3P, T3S, T3L * T3O); + ci[WS(rs, 12)] = FMA(T3P, T3O, T3L * T3S); + T3T = W[32]; + T3V = W[33]; + cr[WS(rs, 17)] = FNMS(T3V, T3W, T3T * T3U); + ci[WS(rs, 17)] = FMA(T3V, T3U, T3T * T3W); + } + } + { + E T3o, T3I, T3G, T3K, T3g, T3F; + T3g = T36 + T3f; + T3o = T3g - T3n; + T3I = T3g + T3n; + T3F = T3v + T3E; + T3G = T3s + T3F; + T3K = T3F - T3s; + { + E T25, T3p, T3H, T3J; + T25 = W[12]; + T3p = W[13]; + cr[WS(rs, 7)] = FNMS(T3p, T3G, T25 * T3o); + ci[WS(rs, 7)] = FMA(T3p, T3o, T25 * T3G); + T3H = W[42]; + T3J = W[43]; + cr[WS(rs, 22)] = FNMS(T3J, T3K, T3H * T3I); + ci[WS(rs, 22)] = FMA(T3J, T3I, T3H * T3K); + } + } + } + { + E T4r, T4L, T4w, T4O, T4h, T4i, T4g, T4j, T4A, T4B, T4z, T4C; + { + E T4n, T4q, T4u, T4v; + T4n = T4l - T4m; + T4q = T4o - T4p; + T4r = FMA(KP951056516, T4n, KP587785252 * T4q); + T4L = FNMS(KP951056516, T4q, KP587785252 * T4n); + T4u = T44 + T47; + T4v = T4b + T4e; + T4w = FMA(KP951056516, T4u, KP587785252 * T4v); + T4O = FNMS(KP951056516, T4v, KP587785252 * T4u); + } + { + E T48, T4f, T4x, T4y; + T4h = T39 + T3c; + T48 = T44 - T47; + T4f = T4b - T4e; + T4i = T48 + T4f; + T4g = KP559016994 * (T48 - T4f); + T4j = FNMS(KP250000000, T4i, T4h); + T4A = T3B - T3y; + T4x = T4l + T4m; + T4y = T4o + T4p; + T4B = T4x + T4y; + T4z = KP559016994 * (T4x - T4y); + T4C = FNMS(KP250000000, T4B, T4A); + } + { + E T4W, T4Y, T4V, T4X; + T4W = T4h + T4i; + T4Y = T4A + T4B; + T4V = W[4]; + T4X = W[5]; + cr[WS(rs, 3)] = FNMS(T4X, T4Y, T4V * T4W); + ci[WS(rs, 3)] = FMA(T4X, T4W, T4V * T4Y); + } + { + E T4M, T4S, T4Q, T4U, T4K, T4P; + T4K = T4j - T4g; + T4M = T4K - T4L; + T4S = T4K + T4L; + T4P = T4C - T4z; + T4Q = T4O + T4P; + T4U = T4P - T4O; + { + E T4J, T4N, T4R, T4T; + T4J = W[24]; + T4N = W[25]; + cr[WS(rs, 13)] = FNMS(T4N, T4Q, T4J * T4M); + ci[WS(rs, 13)] = FMA(T4N, T4M, T4J * T4Q); + T4R = W[34]; + T4T = W[35]; + cr[WS(rs, 18)] = FNMS(T4T, T4U, T4R * T4S); + ci[WS(rs, 18)] = FMA(T4T, T4S, T4R * T4U); + } + } + { + E T4s, T4G, T4E, T4I, T4k, T4D; + T4k = T4g + T4j; + T4s = T4k - T4r; + T4G = T4k + T4r; + T4D = T4z + T4C; + T4E = T4w + T4D; + T4I = T4D - T4w; + { + E T41, T4t, T4F, T4H; + T41 = W[14]; + T4t = W[15]; + cr[WS(rs, 8)] = FNMS(T4t, T4E, T41 * T4s); + ci[WS(rs, 8)] = FMA(T4t, T4s, T41 * T4E); + T4F = W[44]; + T4H = W[45]; + cr[WS(rs, 23)] = FNMS(T4H, T4I, T4F * T4G); + ci[WS(rs, 23)] = FMA(T4H, T4G, T4F * T4I); + } + } + } + { + E T5H, T63, T5M, T66, T5x, T5y, T5u, T5z, T5S, T5T, T5P, T5U; + { + E T5D, T5G, T5K, T5L; + T5D = T5B - T5C; + T5G = T5E - T5F; + T5H = FMA(KP951056516, T5D, KP587785252 * T5G); + T63 = FNMS(KP951056516, T5G, KP587785252 * T5D); + T5K = T56 - T5d; + T5L = T5l - T5s; + T5M = FMA(KP951056516, T5K, KP587785252 * T5L); + T66 = FNMS(KP951056516, T5L, KP587785252 * T5K); + } + { + E T5e, T5t, T5N, T5O; + T5x = T5v - T5w; + T5e = T56 + T5d; + T5t = T5l + T5s; + T5y = T5e + T5t; + T5u = KP559016994 * (T5e - T5t); + T5z = FNMS(KP250000000, T5y, T5x); + T5S = T5Q + T5R; + T5N = T5B + T5C; + T5O = T5E + T5F; + T5T = T5N + T5O; + T5P = KP559016994 * (T5N - T5O); + T5U = FNMS(KP250000000, T5T, T5S); + } + { + E T6e, T6g, T6d, T6f; + T6e = T5x + T5y; + T6g = T5S + T5T; + T6d = W[0]; + T6f = W[1]; + cr[WS(rs, 1)] = FNMS(T6f, T6g, T6d * T6e); + ci[WS(rs, 1)] = FMA(T6f, T6e, T6d * T6g); + } + { + E T64, T6a, T68, T6c, T62, T67; + T62 = T5z - T5u; + T64 = T62 - T63; + T6a = T62 + T63; + T67 = T5U - T5P; + T68 = T66 + T67; + T6c = T67 - T66; + { + E T61, T65, T69, T6b; + T61 = W[20]; + T65 = W[21]; + cr[WS(rs, 11)] = FNMS(T65, T68, T61 * T64); + ci[WS(rs, 11)] = FMA(T65, T64, T61 * T68); + T69 = W[30]; + T6b = W[31]; + cr[WS(rs, 16)] = FNMS(T6b, T6c, T69 * T6a); + ci[WS(rs, 16)] = FMA(T6b, T6a, T69 * T6c); + } + } + { + E T5I, T5Y, T5W, T60, T5A, T5V; + T5A = T5u + T5z; + T5I = T5A - T5H; + T5Y = T5A + T5H; + T5V = T5P + T5U; + T5W = T5M + T5V; + T60 = T5V - T5M; + { + E T4Z, T5J, T5X, T5Z; + T4Z = W[10]; + T5J = W[11]; + cr[WS(rs, 6)] = FNMS(T5J, T5W, T4Z * T5I); + ci[WS(rs, 6)] = FMA(T5J, T5I, T4Z * T5W); + T5X = W[40]; + T5Z = W[41]; + cr[WS(rs, 21)] = FNMS(T5Z, T60, T5X * T5Y); + ci[WS(rs, 21)] = FMA(T5Z, T5Y, T5X * T60); + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 25}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 25, "hb_25", twinstr, &GENUS, {260, 140, 140, 0} }; + +void X(codelet_hb_25) (planner *p) { + X(khc2hc_register) (p, hb_25, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:25 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 3 -dif -name hb_3 -include hb.h */ + +/* + * This function contains 16 FP additions, 14 FP multiplications, + * (or, 6 additions, 4 multiplications, 10 fused multiply/add), + * 27 stack variables, 2 constants, and 12 memory accesses + */ +#include "hb.h" + +static void hb_3(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(6, rs)) { + E Tk, Tj, Tn, Tl, Tm, To; + { + E T1, Td, T7, T8, T4, Tg, T2, T3; + T1 = cr[0]; + T2 = cr[WS(rs, 1)]; + T3 = ci[0]; + Td = ci[WS(rs, 2)]; + T7 = ci[WS(rs, 1)]; + T8 = cr[WS(rs, 2)]; + T4 = T2 + T3; + Tg = T2 - T3; + { + E T5, Tc, Tf, Ta, T9, Te, T6, Th, Ti, Tb; + T5 = W[0]; + T9 = T7 + T8; + Te = T7 - T8; + cr[0] = T1 + T4; + T6 = FNMS(KP500000000, T4, T1); + Tc = W[1]; + ci[0] = Td + Te; + Tf = FNMS(KP500000000, Te, Td); + Tk = FMA(KP866025403, T9, T6); + Ta = FNMS(KP866025403, T9, T6); + Tj = W[2]; + Tn = FNMS(KP866025403, Tg, Tf); + Th = FMA(KP866025403, Tg, Tf); + Ti = Tc * Ta; + Tb = T5 * Ta; + Tl = Tj * Tk; + Tm = W[3]; + ci[WS(rs, 1)] = FMA(T5, Th, Ti); + cr[WS(rs, 1)] = FNMS(Tc, Th, Tb); + } + } + cr[WS(rs, 2)] = FNMS(Tm, Tn, Tl); + To = Tm * Tk; + ci[WS(rs, 2)] = FMA(Tj, Tn, To); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 3, "hb_3", twinstr, &GENUS, {6, 4, 10, 0} }; + +void X(codelet_hb_3) (planner *p) { + X(khc2hc_register) (p, hb_3, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 3 -dif -name hb_3 -include hb.h */ + +/* + * This function contains 16 FP additions, 12 FP multiplications, + * (or, 10 additions, 6 multiplications, 6 fused multiply/add), + * 15 stack variables, 2 constants, and 12 memory accesses + */ +#include "hb.h" + +static void hb_3(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(6, rs)) { + E T1, T4, Ta, Te, T5, T8, Tb, Tf; + { + E T2, T3, T6, T7; + T1 = cr[0]; + T2 = cr[WS(rs, 1)]; + T3 = ci[0]; + T4 = T2 + T3; + Ta = FNMS(KP500000000, T4, T1); + Te = KP866025403 * (T2 - T3); + T5 = ci[WS(rs, 2)]; + T6 = ci[WS(rs, 1)]; + T7 = cr[WS(rs, 2)]; + T8 = T6 - T7; + Tb = KP866025403 * (T6 + T7); + Tf = FNMS(KP500000000, T8, T5); + } + cr[0] = T1 + T4; + ci[0] = T5 + T8; + { + E Tc, Tg, T9, Td; + Tc = Ta - Tb; + Tg = Te + Tf; + T9 = W[0]; + Td = W[1]; + cr[WS(rs, 1)] = FNMS(Td, Tg, T9 * Tc); + ci[WS(rs, 1)] = FMA(T9, Tg, Td * Tc); + } + { + E Ti, Tk, Th, Tj; + Ti = Ta + Tb; + Tk = Tf - Te; + Th = W[2]; + Tj = W[3]; + cr[WS(rs, 2)] = FNMS(Tj, Tk, Th * Ti); + ci[WS(rs, 2)] = FMA(Th, Tk, Tj * Ti); + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 3, "hb_3", twinstr, &GENUS, {10, 6, 6, 0} }; + +void X(codelet_hb_3) (planner *p) { + X(khc2hc_register) (p, hb_3, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1770 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:26 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hb_32 -include hb.h */ + +/* + * This function contains 434 FP additions, 260 FP multiplications, + * (or, 236 additions, 62 multiplications, 198 fused multiply/add), + * 135 stack variables, 7 constants, and 128 memory accesses + */ +#include "hb.h" + +static void hb_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(64, rs)) { + E T5o, T5r, T5q, T5n, T5s, T5p; + { + E T5K, Tf, T8k, T7k, T8x, T7N, T3i, T1i, T3v, T2L, T5f, T4v, T6T, T6m, T52; + E T42, TZ, T6X, T1X, T3p, T8p, T8B, T3o, T26, T58, T4n, T7T, T7z, T59, T4k; + E T6p, T6a, TK, T6W, T2o, T3m, T8s, T8A, T3l, T2x, T55, T4g, T7S, T7G, T56; + E T4d, T6o, T61, T5Q, T5N, T6f, Tu, T8y, T7r, T8l, T7Q, T3w, T1F, T45, T48; + E T3j, T2O, T53, T4y; + { + E T62, T69, T4j, T4i; + { + E T6l, T6i, T40, T41; + { + E T12, T3, T2D, T6, T6g, T2G, T6h, T15, Td, T6k, T1g, T2J, Ta, T17, T1a; + E T6j; + { + E T2E, T2F, T13, T14; + { + E T1, T2, T4, T5; + T1 = cr[0]; + T2 = ci[WS(rs, 15)]; + T4 = cr[WS(rs, 8)]; + T5 = ci[WS(rs, 7)]; + T2E = ci[WS(rs, 31)]; + T12 = T1 - T2; + T3 = T1 + T2; + T2D = T4 - T5; + T6 = T4 + T5; + T2F = cr[WS(rs, 16)]; + } + T13 = ci[WS(rs, 23)]; + T14 = cr[WS(rs, 24)]; + { + E Tb, Tc, T1d, T1e; + Tb = ci[WS(rs, 3)]; + T6g = T2E - T2F; + T2G = T2E + T2F; + T6h = T13 - T14; + T15 = T13 + T14; + Tc = cr[WS(rs, 12)]; + T1d = ci[WS(rs, 19)]; + T1e = cr[WS(rs, 28)]; + { + E T8, T1c, T1f, T9, T18, T19; + T8 = cr[WS(rs, 4)]; + Td = Tb + Tc; + T1c = Tb - Tc; + T6k = T1d - T1e; + T1f = T1d + T1e; + T9 = ci[WS(rs, 11)]; + T18 = ci[WS(rs, 27)]; + T19 = cr[WS(rs, 20)]; + T1g = T1c - T1f; + T2J = T1c + T1f; + Ta = T8 + T9; + T17 = T8 - T9; + T1a = T18 + T19; + T6j = T18 - T19; + } + } + } + { + E T2I, T7M, T7L, T16, T1h, T4u, T4t, T2H, T2K; + { + E T7i, T7, T1b, Te, T7j; + T7i = T3 - T6; + T7 = T3 + T6; + T2I = T17 + T1a; + T1b = T17 - T1a; + Te = Ta + Td; + T7M = Ta - Td; + T7j = T6k - T6j; + T6l = T6j + T6k; + T6i = T6g + T6h; + T7L = T6g - T6h; + T5K = T7 - Te; + Tf = T7 + Te; + T8k = T7i + T7j; + T7k = T7i - T7j; + T40 = T12 + T15; + T16 = T12 - T15; + T1h = T1b + T1g; + T4u = T1b - T1g; + } + T4t = T2G - T2D; + T2H = T2D + T2G; + T8x = T7M + T7L; + T7N = T7L - T7M; + T3i = FMA(KP707106781, T1h, T16); + T1i = FNMS(KP707106781, T1h, T16); + T2K = T2I - T2J; + T41 = T2I + T2J; + T3v = FMA(KP707106781, T2K, T2H); + T2L = FNMS(KP707106781, T2K, T2H); + T5f = FNMS(KP707106781, T4u, T4t); + T4v = FMA(KP707106781, T4u, T4t); + } + } + { + E T1Y, T1H, TR, T7w, T1K, T21, T65, T7t, TV, T1M, TU, T67, T1U, TW, T1N; + E T1O; + { + E TL, TM, TO, TP, T63, T64; + TL = ci[0]; + T6T = T6i + T6l; + T6m = T6i - T6l; + T52 = FMA(KP707106781, T41, T40); + T42 = FNMS(KP707106781, T41, T40); + TM = cr[WS(rs, 15)]; + TO = cr[WS(rs, 7)]; + TP = ci[WS(rs, 8)]; + { + E T1I, TN, TQ, T1J, T1Z, T20; + T1I = ci[WS(rs, 16)]; + T1Y = TL - TM; + TN = TL + TM; + T1H = TO - TP; + TQ = TO + TP; + T1J = cr[WS(rs, 31)]; + T1Z = ci[WS(rs, 24)]; + T20 = cr[WS(rs, 23)]; + TR = TN + TQ; + T7w = TN - TQ; + T1K = T1I + T1J; + T63 = T1I - T1J; + T64 = T1Z - T20; + T21 = T1Z + T20; + } + { + E TS, TT, T1S, T1T; + TS = cr[WS(rs, 3)]; + T65 = T63 + T64; + T7t = T63 - T64; + TT = ci[WS(rs, 12)]; + T1S = ci[WS(rs, 20)]; + T1T = cr[WS(rs, 27)]; + TV = ci[WS(rs, 4)]; + T1M = TS - TT; + TU = TS + TT; + T67 = T1S - T1T; + T1U = T1S + T1T; + TW = cr[WS(rs, 11)]; + T1N = ci[WS(rs, 28)]; + T1O = cr[WS(rs, 19)]; + } + } + { + E T4l, T1L, T24, T23, T8n, T7v, T1W, T8o, T7y, T4m, T22, T25; + { + E T1V, T7u, T7x, T1Q, T1R, TX; + T4l = T1H + T1K; + T1L = T1H - T1K; + T1R = TV - TW; + TX = TV + TW; + { + E T66, T1P, TY, T68; + T66 = T1N - T1O; + T1P = T1N + T1O; + T24 = T1R - T1U; + T1V = T1R + T1U; + T7u = TU - TX; + TY = TU + TX; + T68 = T66 + T67; + T7x = T67 - T66; + T23 = T1M - T1P; + T1Q = T1M + T1P; + TZ = TR + TY; + T62 = TR - TY; + T69 = T65 - T68; + T6X = T65 + T68; + } + T8n = T7u + T7t; + T7v = T7t - T7u; + T4j = T1Q + T1V; + T1W = T1Q - T1V; + T8o = T7w + T7x; + T7y = T7w - T7x; + } + T4i = T1Y + T21; + T22 = T1Y - T21; + T25 = T23 + T24; + T4m = T23 - T24; + T1X = FNMS(KP707106781, T1W, T1L); + T3p = FMA(KP707106781, T1W, T1L); + T8p = FNMS(KP414213562, T8o, T8n); + T8B = FMA(KP414213562, T8n, T8o); + T3o = FMA(KP707106781, T25, T22); + T26 = FNMS(KP707106781, T25, T22); + T58 = FMA(KP707106781, T4m, T4l); + T4n = FNMS(KP707106781, T4m, T4l); + T7T = FNMS(KP414213562, T7v, T7y); + T7z = FMA(KP414213562, T7y, T7v); + } + } + } + { + E T5T, T60, T4c, T4b; + { + E T2p, T28, TC, T7D, T2b, T2s, T5W, T7A, TG, T2d, TF, T5Y, T2l, TH, T2e; + E T2f; + { + E Tw, Tx, Tz, TA, T5U, T5V; + Tw = cr[WS(rs, 1)]; + T59 = FMA(KP707106781, T4j, T4i); + T4k = FNMS(KP707106781, T4j, T4i); + T6p = T69 - T62; + T6a = T62 + T69; + Tx = ci[WS(rs, 14)]; + Tz = cr[WS(rs, 9)]; + TA = ci[WS(rs, 6)]; + { + E T29, Ty, TB, T2a, T2q, T2r; + T29 = ci[WS(rs, 30)]; + T2p = Tw - Tx; + Ty = Tw + Tx; + T28 = Tz - TA; + TB = Tz + TA; + T2a = cr[WS(rs, 17)]; + T2q = ci[WS(rs, 22)]; + T2r = cr[WS(rs, 25)]; + TC = Ty + TB; + T7D = Ty - TB; + T2b = T29 + T2a; + T5U = T29 - T2a; + T5V = T2q - T2r; + T2s = T2q + T2r; + } + { + E TD, TE, T2j, T2k; + TD = cr[WS(rs, 5)]; + T5W = T5U + T5V; + T7A = T5U - T5V; + TE = ci[WS(rs, 10)]; + T2j = ci[WS(rs, 18)]; + T2k = cr[WS(rs, 29)]; + TG = ci[WS(rs, 2)]; + T2d = TD - TE; + TF = TD + TE; + T5Y = T2j - T2k; + T2l = T2j + T2k; + TH = cr[WS(rs, 13)]; + T2e = ci[WS(rs, 26)]; + T2f = cr[WS(rs, 21)]; + } + } + { + E T4e, T2c, T2v, T2u, T8q, T7C, T2n, T8r, T7F, T4f, T2t, T2w; + { + E T2m, T7B, T7E, T2h, T2i, TI; + T4e = T2b - T28; + T2c = T28 + T2b; + T2i = TG - TH; + TI = TG + TH; + { + E T5X, T2g, TJ, T5Z; + T5X = T2e - T2f; + T2g = T2e + T2f; + T2v = T2i - T2l; + T2m = T2i + T2l; + T7B = TF - TI; + TJ = TF + TI; + T5Z = T5X + T5Y; + T7E = T5Y - T5X; + T2u = T2d - T2g; + T2h = T2d + T2g; + TK = TC + TJ; + T5T = TC - TJ; + T60 = T5W - T5Z; + T6W = T5W + T5Z; + } + T8q = T7B + T7A; + T7C = T7A - T7B; + T4c = T2h + T2m; + T2n = T2h - T2m; + T8r = T7D + T7E; + T7F = T7D - T7E; + } + T4b = T2p + T2s; + T2t = T2p - T2s; + T2w = T2u + T2v; + T4f = T2v - T2u; + T2o = FNMS(KP707106781, T2n, T2c); + T3m = FMA(KP707106781, T2n, T2c); + T8s = FMA(KP414213562, T8r, T8q); + T8A = FNMS(KP414213562, T8q, T8r); + T3l = FMA(KP707106781, T2w, T2t); + T2x = FNMS(KP707106781, T2w, T2t); + T55 = FMA(KP707106781, T4f, T4e); + T4g = FNMS(KP707106781, T4f, T4e); + T7S = FMA(KP414213562, T7C, T7F); + T7G = FNMS(KP414213562, T7F, T7C); + } + } + { + E T44, T1D, Tm, T7o, T7p, T43, T1y, T47, T1s, Tt, T7m, T7l, T46, T1n; + { + E Tj, T1z, Ti, T5P, T1C, Tk, T1v, T1w; + { + E Tg, Th, T1A, T1B; + Tg = cr[WS(rs, 2)]; + T56 = FMA(KP707106781, T4c, T4b); + T4d = FNMS(KP707106781, T4c, T4b); + T6o = T5T + T60; + T61 = T5T - T60; + Th = ci[WS(rs, 13)]; + T1A = ci[WS(rs, 21)]; + T1B = cr[WS(rs, 26)]; + Tj = cr[WS(rs, 10)]; + T1z = Tg - Th; + Ti = Tg + Th; + T5P = T1A - T1B; + T1C = T1A + T1B; + Tk = ci[WS(rs, 5)]; + T1v = ci[WS(rs, 29)]; + T1w = cr[WS(rs, 18)]; + } + { + E T1u, Tl, T5O, T1x; + T44 = T1z + T1C; + T1D = T1z - T1C; + T1u = Tj - Tk; + Tl = Tj + Tk; + T5O = T1v - T1w; + T1x = T1v + T1w; + Tm = Ti + Tl; + T7o = Ti - Tl; + T7p = T5O - T5P; + T5Q = T5O + T5P; + T43 = T1x - T1u; + T1y = T1u + T1x; + } + } + { + E Tq, T1o, Tp, T5M, T1r, Tr, T1k, T1l; + { + E Tn, To, T1p, T1q; + Tn = ci[WS(rs, 1)]; + To = cr[WS(rs, 14)]; + T1p = ci[WS(rs, 25)]; + T1q = cr[WS(rs, 22)]; + Tq = cr[WS(rs, 6)]; + T1o = Tn - To; + Tp = Tn + To; + T5M = T1p - T1q; + T1r = T1p + T1q; + Tr = ci[WS(rs, 9)]; + T1k = ci[WS(rs, 17)]; + T1l = cr[WS(rs, 30)]; + } + { + E T1j, Ts, T5L, T1m; + T47 = T1o + T1r; + T1s = T1o - T1r; + T1j = Tq - Tr; + Ts = Tq + Tr; + T5L = T1k - T1l; + T1m = T1k + T1l; + Tt = Tp + Ts; + T7m = Tp - Ts; + T7l = T5L - T5M; + T5N = T5L + T5M; + T46 = T1j + T1m; + T1n = T1j - T1m; + } + } + { + E T7P, T7O, T2N, T1t, T1E, T2M, T7n, T7q, T4w, T4x; + T7P = T7m + T7l; + T7n = T7l - T7m; + T7q = T7o + T7p; + T7O = T7o - T7p; + T6f = Tm - Tt; + Tu = Tm + Tt; + T8y = T7q + T7n; + T7r = T7n - T7q; + T2N = FMA(KP414213562, T1n, T1s); + T1t = FNMS(KP414213562, T1s, T1n); + T1E = FMA(KP414213562, T1D, T1y); + T2M = FNMS(KP414213562, T1y, T1D); + T8l = T7O + T7P; + T7Q = T7O - T7P; + T3w = T1E + T1t; + T1F = T1t - T1E; + T45 = FNMS(KP414213562, T44, T43); + T4w = FMA(KP414213562, T43, T44); + T4x = FMA(KP414213562, T46, T47); + T48 = FNMS(KP414213562, T47, T46); + T3j = T2M + T2N; + T2O = T2M - T2N; + T53 = T4w + T4x; + T4y = T4w - T4x; + } + } + } + } + { + E T72, T5g, T49, T78, T77, T73, T7s, T7U, T7R, T7H, T3f, T3e, T3d; + { + E T5R, T8m, T8C, T8z, T8t, T8e, T86, T88, T8h, T8f, T8i, T8c, T8g; + { + E T6P, T6Q, T6Z, T6S, T6R; + { + E Tv, T10, T6V, T6Y, T6U; + T72 = Tf - Tu; + Tv = Tf + Tu; + T6U = T5Q + T5N; + T5R = T5N - T5Q; + T5g = T48 - T45; + T49 = T45 + T48; + T10 = TK + TZ; + T78 = TK - TZ; + T77 = T6T - T6U; + T6V = T6T + T6U; + T6Y = T6W + T6X; + T73 = T6X - T6W; + T6P = W[30]; + cr[0] = Tv + T10; + T6Q = Tv - T10; + ci[0] = T6V + T6Y; + T6Z = T6V - T6Y; + T6S = W[31]; + T6R = T6P * T6Q; + } + { + E T8O, T8W, T8Q, T8Z, T8X, T90, T8U, T8Y; + { + E T8R, T8S, T8M, T8N, T70; + T8M = FMA(KP707106781, T8l, T8k); + T8m = FNMS(KP707106781, T8l, T8k); + T8C = T8A - T8B; + T8N = T8A + T8B; + T70 = T6S * T6Q; + cr[WS(rs, 16)] = FNMS(T6S, T6Z, T6R); + T8R = FMA(KP707106781, T8y, T8x); + T8z = FNMS(KP707106781, T8y, T8x); + T8O = FNMS(KP923879532, T8N, T8M); + T8W = FMA(KP923879532, T8N, T8M); + ci[WS(rs, 16)] = FMA(T6P, T6Z, T70); + T8S = T8s + T8p; + T8t = T8p - T8s; + { + E T8L, T8T, T8P, T8V; + T8L = W[34]; + T8Q = W[35]; + T8V = W[2]; + T8Z = FMA(KP923879532, T8S, T8R); + T8T = FNMS(KP923879532, T8S, T8R); + T8P = T8L * T8O; + T8X = T8V * T8W; + T90 = T8V * T8Z; + T8U = T8L * T8T; + cr[WS(rs, 18)] = FNMS(T8Q, T8T, T8P); + T8Y = W[3]; + } + } + { + E T89, T8a, T84, T85; + T84 = FNMS(KP707106781, T7r, T7k); + T7s = FMA(KP707106781, T7r, T7k); + ci[WS(rs, 18)] = FMA(T8Q, T8O, T8U); + T85 = T7S + T7T; + T7U = T7S - T7T; + ci[WS(rs, 2)] = FMA(T8Y, T8W, T90); + cr[WS(rs, 2)] = FNMS(T8Y, T8Z, T8X); + T7R = FMA(KP707106781, T7Q, T7N); + T89 = FNMS(KP707106781, T7Q, T7N); + T8e = FMA(KP923879532, T85, T84); + T86 = FNMS(KP923879532, T85, T84); + T8a = T7G + T7z; + T7H = T7z - T7G; + { + E T83, T8b, T87, T8d; + T83 = W[26]; + T88 = W[27]; + T8d = W[58]; + T8h = FMA(KP923879532, T8a, T89); + T8b = FNMS(KP923879532, T8a, T89); + T87 = T83 * T86; + T8f = T8d * T8e; + T8i = T8d * T8h; + T8c = T83 * T8b; + cr[WS(rs, 14)] = FNMS(T88, T8b, T87); + T8g = W[59]; + } + } + } + } + { + E T5S, T6q, T6n, T6K, T6C, T6b, T6E, T6N, T6L, T6O, T6I, T6M; + { + E T6F, T6G, T6A, T6B; + T6A = T5K - T5R; + T5S = T5K + T5R; + ci[WS(rs, 14)] = FMA(T88, T86, T8c); + T6B = T6p - T6o; + T6q = T6o + T6p; + ci[WS(rs, 30)] = FMA(T8g, T8e, T8i); + cr[WS(rs, 30)] = FNMS(T8g, T8h, T8f); + T6n = T6f + T6m; + T6F = T6m - T6f; + T6K = FMA(KP707106781, T6B, T6A); + T6C = FNMS(KP707106781, T6B, T6A); + T6G = T61 - T6a; + T6b = T61 + T6a; + { + E T6z, T6H, T6D, T6J; + T6z = W[54]; + T6E = W[55]; + T6J = W[22]; + T6N = FMA(KP707106781, T6G, T6F); + T6H = FNMS(KP707106781, T6G, T6F); + T6D = T6z * T6C; + T6L = T6J * T6K; + T6O = T6J * T6N; + T6I = T6z * T6H; + cr[WS(rs, 28)] = FNMS(T6E, T6H, T6D); + T6M = W[23]; + } + } + { + E T8G, T8F, T8J, T8H, T8I, T8u; + ci[WS(rs, 28)] = FMA(T6E, T6C, T6I); + ci[WS(rs, 12)] = FMA(T6M, T6K, T6O); + cr[WS(rs, 12)] = FNMS(T6M, T6N, T6L); + T8G = FMA(KP923879532, T8t, T8m); + T8u = FNMS(KP923879532, T8t, T8m); + { + E T8j, T8w, T8D, T8v, T8E; + T8j = W[50]; + T8w = W[51]; + T8F = W[18]; + T8J = FMA(KP923879532, T8C, T8z); + T8D = FNMS(KP923879532, T8C, T8z); + T8v = T8j * T8u; + T8E = T8w * T8u; + T8H = T8F * T8G; + T8I = W[19]; + cr[WS(rs, 26)] = FNMS(T8w, T8D, T8v); + ci[WS(rs, 26)] = FMA(T8j, T8D, T8E); + } + { + E T6c, T6u, T6x, T6r, T8K, T5J, T6e; + cr[WS(rs, 10)] = FNMS(T8I, T8J, T8H); + T8K = T8I * T8G; + ci[WS(rs, 10)] = FMA(T8F, T8J, T8K); + T6c = FNMS(KP707106781, T6b, T5S); + T6u = FMA(KP707106781, T6b, T5S); + T6x = FMA(KP707106781, T6q, T6n); + T6r = FNMS(KP707106781, T6q, T6n); + T5J = W[38]; + T6e = W[39]; + { + E T6t, T6w, T6d, T6s, T6v, T6y; + T6t = W[6]; + T6w = W[7]; + T6d = T5J * T6c; + T6s = T6e * T6c; + T6v = T6t * T6u; + T6y = T6w * T6u; + cr[WS(rs, 20)] = FNMS(T6e, T6r, T6d); + ci[WS(rs, 20)] = FMA(T5J, T6r, T6s); + cr[WS(rs, 4)] = FNMS(T6w, T6x, T6v); + ci[WS(rs, 4)] = FMA(T6t, T6x, T6y); + } + } + } + } + } + { + E T7c, T7f, T7e, T7g, T7d; + { + E T71, T74, T79, T76, T75, T7b, T7a; + T71 = W[46]; + T7c = T72 + T73; + T74 = T72 - T73; + T7f = T78 + T77; + T79 = T77 - T78; + T76 = W[47]; + T75 = T71 * T74; + T7b = W[14]; + T7a = T71 * T79; + T7e = W[15]; + cr[WS(rs, 24)] = FNMS(T76, T79, T75); + T7g = T7b * T7f; + T7d = T7b * T7c; + ci[WS(rs, 24)] = FMA(T76, T74, T7a); + } + { + E T81, T7X, T80, T7Z, T82; + ci[WS(rs, 8)] = FMA(T7e, T7c, T7g); + cr[WS(rs, 8)] = FNMS(T7e, T7f, T7d); + { + E T7h, T7Y, T7I, T7V, T7K, T7J, T7W; + T7h = W[42]; + T7Y = FMA(KP923879532, T7H, T7s); + T7I = FNMS(KP923879532, T7H, T7s); + T81 = FMA(KP923879532, T7U, T7R); + T7V = FNMS(KP923879532, T7U, T7R); + T7K = W[43]; + T7J = T7h * T7I; + T7X = W[10]; + T80 = W[11]; + T7W = T7K * T7I; + cr[WS(rs, 22)] = FNMS(T7K, T7V, T7J); + T7Z = T7X * T7Y; + T82 = T80 * T7Y; + ci[WS(rs, 22)] = FMA(T7h, T7V, T7W); + } + { + E T2P, T37, T1G, T32, T2R, T2Q, T38, T2z, T27, T2y; + T2P = FMA(KP923879532, T2O, T2L); + T37 = FNMS(KP923879532, T2O, T2L); + cr[WS(rs, 6)] = FNMS(T80, T81, T7Z); + ci[WS(rs, 6)] = FMA(T7X, T81, T82); + T1G = FMA(KP923879532, T1F, T1i); + T32 = FNMS(KP923879532, T1F, T1i); + T2R = FNMS(KP668178637, T1X, T26); + T27 = FMA(KP668178637, T26, T1X); + T2y = FNMS(KP668178637, T2x, T2o); + T2Q = FMA(KP668178637, T2o, T2x); + T38 = T2y + T27; + T2z = T27 - T2y; + { + E T2C, T2A, T3c, T34, T2U, T39, T36, T31; + { + E T11, T2W, T2S, T33; + T11 = W[40]; + T2C = W[41]; + T2A = FNMS(KP831469612, T2z, T1G); + T2W = FMA(KP831469612, T2z, T1G); + T2S = T2Q - T2R; + T33 = T2Q + T2R; + { + E T2V, T2B, T2T, T2Z, T2X, T2Y, T30; + T2V = W[8]; + T2B = T11 * T2A; + T3c = FMA(KP831469612, T33, T32); + T34 = FNMS(KP831469612, T33, T32); + T2T = FNMS(KP831469612, T2S, T2P); + T2Z = FMA(KP831469612, T2S, T2P); + T2X = T2V * T2W; + T2Y = W[9]; + T30 = T2V * T2Z; + cr[WS(rs, 21)] = FNMS(T2C, T2T, T2B); + T2U = T11 * T2T; + cr[WS(rs, 5)] = FNMS(T2Y, T2Z, T2X); + ci[WS(rs, 5)] = FMA(T2Y, T2W, T30); + } + } + T39 = FNMS(KP831469612, T38, T37); + T3f = FMA(KP831469612, T38, T37); + ci[WS(rs, 21)] = FMA(T2C, T2A, T2U); + T36 = W[25]; + T31 = W[24]; + { + E T3b, T3g, T3a, T35; + T3e = W[57]; + T3a = T36 * T34; + T35 = T31 * T34; + T3b = W[56]; + T3g = T3e * T3c; + ci[WS(rs, 13)] = FMA(T31, T39, T3a); + cr[WS(rs, 13)] = FNMS(T36, T39, T35); + T3d = T3b * T3c; + ci[WS(rs, 29)] = FMA(T3b, T3f, T3g); + } + } + } + } + } + { + E T4G, T4J, T4I, T4F, T4K; + { + E T4z, T4R, T4a, T4M, T4h, T4o, T4C, T4N, T4A, T4B; + T4z = FMA(KP923879532, T4y, T4v); + T4R = FNMS(KP923879532, T4y, T4v); + T4a = FNMS(KP923879532, T49, T42); + T4M = FMA(KP923879532, T49, T42); + cr[WS(rs, 29)] = FNMS(T3e, T3f, T3d); + T4h = FNMS(KP668178637, T4g, T4d); + T4A = FMA(KP668178637, T4d, T4g); + T4B = FMA(KP668178637, T4k, T4n); + T4o = FNMS(KP668178637, T4n, T4k); + T4C = T4A - T4B; + T4N = T4A + T4B; + { + E T4W, T4Z, T4q, T4X, T50, T4Y; + { + E T4L, T4Q, T4O, T4p, T4S, T4P, T4U, T4V, T4T; + T4L = W[20]; + T4Q = W[21]; + T4W = FMA(KP831469612, T4N, T4M); + T4O = FNMS(KP831469612, T4N, T4M); + T4p = T4h + T4o; + T4S = T4h - T4o; + T4P = T4L * T4O; + T4V = W[52]; + T4Z = FNMS(KP831469612, T4S, T4R); + T4T = FMA(KP831469612, T4S, T4R); + T4q = FNMS(KP831469612, T4p, T4a); + T4G = FMA(KP831469612, T4p, T4a); + cr[WS(rs, 11)] = FNMS(T4Q, T4T, T4P); + T4U = T4L * T4T; + T4X = T4V * T4W; + T50 = T4V * T4Z; + T4Y = W[53]; + ci[WS(rs, 11)] = FMA(T4Q, T4O, T4U); + } + { + E T4D, T4s, T3Z, T4E, T4r; + T4J = FMA(KP831469612, T4C, T4z); + T4D = FNMS(KP831469612, T4C, T4z); + T4s = W[37]; + ci[WS(rs, 27)] = FMA(T4Y, T4W, T50); + cr[WS(rs, 27)] = FNMS(T4Y, T4Z, T4X); + T3Z = W[36]; + T4E = T4s * T4q; + T4I = W[5]; + T4r = T3Z * T4q; + ci[WS(rs, 19)] = FMA(T3Z, T4D, T4E); + T4F = W[4]; + T4K = T4I * T4G; + cr[WS(rs, 19)] = FNMS(T4s, T4D, T4r); + } + } + } + { + E T3E, T3H, T3G, T3D, T3I; + { + E T3x, T3P, T3k, T3K, T3n, T3q, T3A, T3L, T4H, T3y, T3z; + T3x = FMA(KP923879532, T3w, T3v); + T3P = FNMS(KP923879532, T3w, T3v); + T4H = T4F * T4G; + ci[WS(rs, 3)] = FMA(T4F, T4J, T4K); + T3k = FMA(KP923879532, T3j, T3i); + T3K = FNMS(KP923879532, T3j, T3i); + T3y = FMA(KP198912367, T3l, T3m); + T3n = FNMS(KP198912367, T3m, T3l); + cr[WS(rs, 3)] = FNMS(T4I, T4J, T4H); + T3z = FNMS(KP198912367, T3o, T3p); + T3q = FMA(KP198912367, T3p, T3o); + T3A = T3y + T3z; + T3L = T3z - T3y; + { + E T3U, T3X, T3s, T3V, T3Y, T3W; + { + E T3J, T3O, T3M, T3r, T3Q, T3N, T3S, T3T, T3R; + T3J = W[48]; + T3O = W[49]; + T3U = FMA(KP980785280, T3L, T3K); + T3M = FNMS(KP980785280, T3L, T3K); + T3r = T3n + T3q; + T3Q = T3n - T3q; + T3N = T3J * T3M; + T3T = W[16]; + T3X = FMA(KP980785280, T3Q, T3P); + T3R = FNMS(KP980785280, T3Q, T3P); + T3s = FNMS(KP980785280, T3r, T3k); + T3E = FMA(KP980785280, T3r, T3k); + cr[WS(rs, 25)] = FNMS(T3O, T3R, T3N); + T3S = T3J * T3R; + T3V = T3T * T3U; + T3Y = T3T * T3X; + T3W = W[17]; + ci[WS(rs, 25)] = FMA(T3O, T3M, T3S); + } + { + E T3B, T3u, T3h, T3C, T3t; + T3H = FMA(KP980785280, T3A, T3x); + T3B = FNMS(KP980785280, T3A, T3x); + T3u = W[33]; + ci[WS(rs, 9)] = FMA(T3W, T3U, T3Y); + cr[WS(rs, 9)] = FNMS(T3W, T3X, T3V); + T3h = W[32]; + T3C = T3u * T3s; + T3G = W[1]; + T3t = T3h * T3s; + ci[WS(rs, 17)] = FMA(T3h, T3B, T3C); + T3D = W[0]; + T3I = T3G * T3E; + cr[WS(rs, 17)] = FNMS(T3u, T3B, T3t); + } + } + } + { + E T5h, T5z, T54, T5u, T57, T5a, T5k, T5v, T3F, T5i, T5j; + T5h = FMA(KP923879532, T5g, T5f); + T5z = FNMS(KP923879532, T5g, T5f); + T3F = T3D * T3E; + ci[WS(rs, 1)] = FMA(T3D, T3H, T3I); + T54 = FNMS(KP923879532, T53, T52); + T5u = FMA(KP923879532, T53, T52); + T5i = FMA(KP198912367, T55, T56); + T57 = FNMS(KP198912367, T56, T55); + cr[WS(rs, 1)] = FNMS(T3G, T3H, T3F); + T5j = FMA(KP198912367, T58, T59); + T5a = FNMS(KP198912367, T59, T58); + T5k = T5i - T5j; + T5v = T5i + T5j; + { + E T5E, T5H, T5c, T5F, T5I, T5G; + { + E T5t, T5y, T5w, T5b, T5A, T5x, T5C, T5D, T5B; + T5t = W[28]; + T5y = W[29]; + T5E = FMA(KP980785280, T5v, T5u); + T5w = FNMS(KP980785280, T5v, T5u); + T5b = T57 + T5a; + T5A = T5a - T57; + T5x = T5t * T5w; + T5D = W[60]; + T5H = FNMS(KP980785280, T5A, T5z); + T5B = FMA(KP980785280, T5A, T5z); + T5c = FMA(KP980785280, T5b, T54); + T5o = FNMS(KP980785280, T5b, T54); + cr[WS(rs, 15)] = FNMS(T5y, T5B, T5x); + T5C = T5t * T5B; + T5F = T5D * T5E; + T5I = T5D * T5H; + T5G = W[61]; + ci[WS(rs, 15)] = FMA(T5y, T5w, T5C); + } + { + E T5l, T5e, T51, T5m, T5d; + T5r = FMA(KP980785280, T5k, T5h); + T5l = FNMS(KP980785280, T5k, T5h); + T5e = W[45]; + ci[WS(rs, 31)] = FMA(T5G, T5E, T5I); + cr[WS(rs, 31)] = FNMS(T5G, T5H, T5F); + T51 = W[44]; + T5m = T5e * T5c; + T5q = W[13]; + T5d = T51 * T5c; + ci[WS(rs, 23)] = FMA(T51, T5l, T5m); + T5n = W[12]; + T5s = T5q * T5o; + cr[WS(rs, 23)] = FNMS(T5e, T5l, T5d); + } + } + } + } + } + } + } + T5p = T5n * T5o; + ci[WS(rs, 7)] = FMA(T5n, T5r, T5s); + cr[WS(rs, 7)] = FNMS(T5q, T5r, T5p); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 32}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 32, "hb_32", twinstr, &GENUS, {236, 62, 198, 0} }; + +void X(codelet_hb_32) (planner *p) { + X(khc2hc_register) (p, hb_32, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hb_32 -include hb.h */ + +/* + * This function contains 434 FP additions, 208 FP multiplications, + * (or, 340 additions, 114 multiplications, 94 fused multiply/add), + * 98 stack variables, 7 constants, and 128 memory accesses + */ +#include "hb.h" + +static void hb_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(64, rs)) { + E T4o, T6y, T70, T5u, Tf, T12, T5x, T6z, T3m, T3Y, T29, T2y, T4v, T71, T2U; + E T3M, Tu, T1U, T6D, T73, T6G, T74, T1h, T2z, T2X, T3o, T4D, T5A, T4K, T5z; + E T30, T3n, TK, T1j, T6S, T7w, T6V, T7v, T1y, T2B, T3c, T3S, T4X, T61, T54; + E T62, T3f, T3T, TZ, T1A, T6L, T7z, T6O, T7y, T1P, T2C, T35, T3P, T5g, T64; + E T5n, T65, T38, T3Q; + { + E T3, T4m, T24, T4q, T27, T4t, T6, T5s, Ta, T4p, T1X, T5t, T20, T4n, Td; + E T4s; + { + E T1, T2, T22, T23; + T1 = cr[0]; + T2 = ci[WS(rs, 15)]; + T3 = T1 + T2; + T4m = T1 - T2; + T22 = ci[WS(rs, 27)]; + T23 = cr[WS(rs, 20)]; + T24 = T22 - T23; + T4q = T22 + T23; + } + { + E T25, T26, T4, T5; + T25 = ci[WS(rs, 19)]; + T26 = cr[WS(rs, 28)]; + T27 = T25 - T26; + T4t = T25 + T26; + T4 = cr[WS(rs, 8)]; + T5 = ci[WS(rs, 7)]; + T6 = T4 + T5; + T5s = T4 - T5; + } + { + E T8, T9, T1V, T1W; + T8 = cr[WS(rs, 4)]; + T9 = ci[WS(rs, 11)]; + Ta = T8 + T9; + T4p = T8 - T9; + T1V = ci[WS(rs, 31)]; + T1W = cr[WS(rs, 16)]; + T1X = T1V - T1W; + T5t = T1V + T1W; + } + { + E T1Y, T1Z, Tb, Tc; + T1Y = ci[WS(rs, 23)]; + T1Z = cr[WS(rs, 24)]; + T20 = T1Y - T1Z; + T4n = T1Y + T1Z; + Tb = ci[WS(rs, 3)]; + Tc = cr[WS(rs, 12)]; + Td = Tb + Tc; + T4s = Tb - Tc; + } + { + E T7, Te, T21, T28; + T4o = T4m - T4n; + T6y = T4m + T4n; + T70 = T5t - T5s; + T5u = T5s + T5t; + T7 = T3 + T6; + Te = Ta + Td; + Tf = T7 + Te; + T12 = T7 - Te; + { + E T5v, T5w, T3k, T3l; + T5v = T4p + T4q; + T5w = T4s + T4t; + T5x = KP707106781 * (T5v - T5w); + T6z = KP707106781 * (T5v + T5w); + T3k = T1X - T20; + T3l = Ta - Td; + T3m = T3k - T3l; + T3Y = T3l + T3k; + } + T21 = T1X + T20; + T28 = T24 + T27; + T29 = T21 - T28; + T2y = T21 + T28; + { + E T4r, T4u, T2S, T2T; + T4r = T4p - T4q; + T4u = T4s - T4t; + T4v = KP707106781 * (T4r + T4u); + T71 = KP707106781 * (T4r - T4u); + T2S = T3 - T6; + T2T = T27 - T24; + T2U = T2S - T2T; + T3M = T2S + T2T; + } + } + } + { + E Ti, T4H, T1c, T4F, T1f, T4I, Tl, T4E, Tp, T4A, T15, T4y, T18, T4B, Ts; + E T4x; + { + E Tg, Th, T1a, T1b; + Tg = cr[WS(rs, 2)]; + Th = ci[WS(rs, 13)]; + Ti = Tg + Th; + T4H = Tg - Th; + T1a = ci[WS(rs, 29)]; + T1b = cr[WS(rs, 18)]; + T1c = T1a - T1b; + T4F = T1a + T1b; + } + { + E T1d, T1e, Tj, Tk; + T1d = ci[WS(rs, 21)]; + T1e = cr[WS(rs, 26)]; + T1f = T1d - T1e; + T4I = T1d + T1e; + Tj = cr[WS(rs, 10)]; + Tk = ci[WS(rs, 5)]; + Tl = Tj + Tk; + T4E = Tj - Tk; + } + { + E Tn, To, T13, T14; + Tn = ci[WS(rs, 1)]; + To = cr[WS(rs, 14)]; + Tp = Tn + To; + T4A = Tn - To; + T13 = ci[WS(rs, 17)]; + T14 = cr[WS(rs, 30)]; + T15 = T13 - T14; + T4y = T13 + T14; + } + { + E T16, T17, Tq, Tr; + T16 = ci[WS(rs, 25)]; + T17 = cr[WS(rs, 22)]; + T18 = T16 - T17; + T4B = T16 + T17; + Tq = cr[WS(rs, 6)]; + Tr = ci[WS(rs, 9)]; + Ts = Tq + Tr; + T4x = Tq - Tr; + } + { + E Tm, Tt, T6B, T6C; + Tm = Ti + Tl; + Tt = Tp + Ts; + Tu = Tm + Tt; + T1U = Tm - Tt; + T6B = T4H + T4I; + T6C = T4F - T4E; + T6D = FNMS(KP923879532, T6C, KP382683432 * T6B); + T73 = FMA(KP382683432, T6C, KP923879532 * T6B); + } + { + E T6E, T6F, T19, T1g; + T6E = T4A + T4B; + T6F = T4x + T4y; + T6G = FNMS(KP923879532, T6F, KP382683432 * T6E); + T74 = FMA(KP382683432, T6F, KP923879532 * T6E); + T19 = T15 + T18; + T1g = T1c + T1f; + T1h = T19 - T1g; + T2z = T1g + T19; + } + { + E T2V, T2W, T4z, T4C; + T2V = T15 - T18; + T2W = Tp - Ts; + T2X = T2V - T2W; + T3o = T2W + T2V; + T4z = T4x - T4y; + T4C = T4A - T4B; + T4D = FNMS(KP382683432, T4C, KP923879532 * T4z); + T5A = FMA(KP382683432, T4z, KP923879532 * T4C); + } + { + E T4G, T4J, T2Y, T2Z; + T4G = T4E + T4F; + T4J = T4H - T4I; + T4K = FMA(KP923879532, T4G, KP382683432 * T4J); + T5z = FNMS(KP382683432, T4G, KP923879532 * T4J); + T2Y = Ti - Tl; + T2Z = T1c - T1f; + T30 = T2Y + T2Z; + T3n = T2Y - T2Z; + } + } + { + E Ty, T4N, TB, T4Y, T1p, T4O, T1m, T4Z, TI, T52, T1w, T4V, TF, T51, T1t; + E T4S; + { + E Tw, Tx, T1k, T1l; + Tw = cr[WS(rs, 1)]; + Tx = ci[WS(rs, 14)]; + Ty = Tw + Tx; + T4N = Tw - Tx; + { + E Tz, TA, T1n, T1o; + Tz = cr[WS(rs, 9)]; + TA = ci[WS(rs, 6)]; + TB = Tz + TA; + T4Y = Tz - TA; + T1n = ci[WS(rs, 22)]; + T1o = cr[WS(rs, 25)]; + T1p = T1n - T1o; + T4O = T1n + T1o; + } + T1k = ci[WS(rs, 30)]; + T1l = cr[WS(rs, 17)]; + T1m = T1k - T1l; + T4Z = T1k + T1l; + { + E TG, TH, T4T, T1u, T1v, T4U; + TG = ci[WS(rs, 2)]; + TH = cr[WS(rs, 13)]; + T4T = TG - TH; + T1u = ci[WS(rs, 18)]; + T1v = cr[WS(rs, 29)]; + T4U = T1u + T1v; + TI = TG + TH; + T52 = T4T + T4U; + T1w = T1u - T1v; + T4V = T4T - T4U; + } + { + E TD, TE, T4Q, T1r, T1s, T4R; + TD = cr[WS(rs, 5)]; + TE = ci[WS(rs, 10)]; + T4Q = TD - TE; + T1r = ci[WS(rs, 26)]; + T1s = cr[WS(rs, 21)]; + T4R = T1r + T1s; + TF = TD + TE; + T51 = T4Q + T4R; + T1t = T1r - T1s; + T4S = T4Q - T4R; + } + } + { + E TC, TJ, T6Q, T6R; + TC = Ty + TB; + TJ = TF + TI; + TK = TC + TJ; + T1j = TC - TJ; + T6Q = T4Z - T4Y; + T6R = KP707106781 * (T4S - T4V); + T6S = T6Q + T6R; + T7w = T6Q - T6R; + } + { + E T6T, T6U, T1q, T1x; + T6T = T4N + T4O; + T6U = KP707106781 * (T51 + T52); + T6V = T6T - T6U; + T7v = T6T + T6U; + T1q = T1m + T1p; + T1x = T1t + T1w; + T1y = T1q - T1x; + T2B = T1q + T1x; + } + { + E T3a, T3b, T4P, T4W; + T3a = T1m - T1p; + T3b = TF - TI; + T3c = T3a - T3b; + T3S = T3b + T3a; + T4P = T4N - T4O; + T4W = KP707106781 * (T4S + T4V); + T4X = T4P - T4W; + T61 = T4P + T4W; + } + { + E T50, T53, T3d, T3e; + T50 = T4Y + T4Z; + T53 = KP707106781 * (T51 - T52); + T54 = T50 - T53; + T62 = T50 + T53; + T3d = Ty - TB; + T3e = T1w - T1t; + T3f = T3d - T3e; + T3T = T3d + T3e; + } + } + { + E TN, T56, TQ, T5h, T1G, T57, T1D, T5i, TX, T5l, T1N, T5e, TU, T5k, T1K; + E T5b; + { + E TL, TM, T1B, T1C; + TL = ci[0]; + TM = cr[WS(rs, 15)]; + TN = TL + TM; + T56 = TL - TM; + { + E TO, TP, T1E, T1F; + TO = cr[WS(rs, 7)]; + TP = ci[WS(rs, 8)]; + TQ = TO + TP; + T5h = TO - TP; + T1E = ci[WS(rs, 24)]; + T1F = cr[WS(rs, 23)]; + T1G = T1E - T1F; + T57 = T1E + T1F; + } + T1B = ci[WS(rs, 16)]; + T1C = cr[WS(rs, 31)]; + T1D = T1B - T1C; + T5i = T1B + T1C; + { + E TV, TW, T5c, T1L, T1M, T5d; + TV = ci[WS(rs, 4)]; + TW = cr[WS(rs, 11)]; + T5c = TV - TW; + T1L = ci[WS(rs, 20)]; + T1M = cr[WS(rs, 27)]; + T5d = T1L + T1M; + TX = TV + TW; + T5l = T5c + T5d; + T1N = T1L - T1M; + T5e = T5c - T5d; + } + { + E TS, TT, T59, T1I, T1J, T5a; + TS = cr[WS(rs, 3)]; + TT = ci[WS(rs, 12)]; + T59 = TS - TT; + T1I = ci[WS(rs, 28)]; + T1J = cr[WS(rs, 19)]; + T5a = T1I + T1J; + TU = TS + TT; + T5k = T59 + T5a; + T1K = T1I - T1J; + T5b = T59 - T5a; + } + } + { + E TR, TY, T6J, T6K; + TR = TN + TQ; + TY = TU + TX; + TZ = TR + TY; + T1A = TR - TY; + T6J = KP707106781 * (T5b - T5e); + T6K = T5h + T5i; + T6L = T6J - T6K; + T7z = T6K + T6J; + } + { + E T6M, T6N, T1H, T1O; + T6M = T56 + T57; + T6N = KP707106781 * (T5k + T5l); + T6O = T6M - T6N; + T7y = T6M + T6N; + T1H = T1D + T1G; + T1O = T1K + T1N; + T1P = T1H - T1O; + T2C = T1H + T1O; + } + { + E T33, T34, T58, T5f; + T33 = T1D - T1G; + T34 = TU - TX; + T35 = T33 - T34; + T3P = T34 + T33; + T58 = T56 - T57; + T5f = KP707106781 * (T5b + T5e); + T5g = T58 - T5f; + T64 = T58 + T5f; + } + { + E T5j, T5m, T36, T37; + T5j = T5h - T5i; + T5m = KP707106781 * (T5k - T5l); + T5n = T5j - T5m; + T65 = T5j + T5m; + T36 = TN - TQ; + T37 = T1N - T1K; + T38 = T36 - T37; + T3Q = T36 + T37; + } + } + { + E Tv, T10, T2w, T2A, T2D, T2E, T2v, T2x; + Tv = Tf + Tu; + T10 = TK + TZ; + T2w = Tv - T10; + T2A = T2y + T2z; + T2D = T2B + T2C; + T2E = T2A - T2D; + cr[0] = Tv + T10; + ci[0] = T2A + T2D; + T2v = W[30]; + T2x = W[31]; + cr[WS(rs, 16)] = FNMS(T2x, T2E, T2v * T2w); + ci[WS(rs, 16)] = FMA(T2x, T2w, T2v * T2E); + } + { + E T2I, T2O, T2M, T2Q; + { + E T2G, T2H, T2K, T2L; + T2G = Tf - Tu; + T2H = T2C - T2B; + T2I = T2G - T2H; + T2O = T2G + T2H; + T2K = T2y - T2z; + T2L = TK - TZ; + T2M = T2K - T2L; + T2Q = T2L + T2K; + } + { + E T2F, T2J, T2N, T2P; + T2F = W[46]; + T2J = W[47]; + cr[WS(rs, 24)] = FNMS(T2J, T2M, T2F * T2I); + ci[WS(rs, 24)] = FMA(T2F, T2M, T2J * T2I); + T2N = W[14]; + T2P = W[15]; + cr[WS(rs, 8)] = FNMS(T2P, T2Q, T2N * T2O); + ci[WS(rs, 8)] = FMA(T2N, T2Q, T2P * T2O); + } + } + { + E T1i, T2a, T2o, T2k, T2d, T2l, T1R, T2p; + T1i = T12 + T1h; + T2a = T1U + T29; + T2o = T29 - T1U; + T2k = T12 - T1h; + { + E T2b, T2c, T1z, T1Q; + T2b = T1j + T1y; + T2c = T1P - T1A; + T2d = KP707106781 * (T2b + T2c); + T2l = KP707106781 * (T2c - T2b); + T1z = T1j - T1y; + T1Q = T1A + T1P; + T1R = KP707106781 * (T1z + T1Q); + T2p = KP707106781 * (T1z - T1Q); + } + { + E T1S, T2e, T11, T1T; + T1S = T1i - T1R; + T2e = T2a - T2d; + T11 = W[38]; + T1T = W[39]; + cr[WS(rs, 20)] = FNMS(T1T, T2e, T11 * T1S); + ci[WS(rs, 20)] = FMA(T1T, T1S, T11 * T2e); + } + { + E T2s, T2u, T2r, T2t; + T2s = T2k + T2l; + T2u = T2o + T2p; + T2r = W[22]; + T2t = W[23]; + cr[WS(rs, 12)] = FNMS(T2t, T2u, T2r * T2s); + ci[WS(rs, 12)] = FMA(T2r, T2u, T2t * T2s); + } + { + E T2g, T2i, T2f, T2h; + T2g = T1i + T1R; + T2i = T2a + T2d; + T2f = W[6]; + T2h = W[7]; + cr[WS(rs, 4)] = FNMS(T2h, T2i, T2f * T2g); + ci[WS(rs, 4)] = FMA(T2h, T2g, T2f * T2i); + } + { + E T2m, T2q, T2j, T2n; + T2m = T2k - T2l; + T2q = T2o - T2p; + T2j = W[54]; + T2n = W[55]; + cr[WS(rs, 28)] = FNMS(T2n, T2q, T2j * T2m); + ci[WS(rs, 28)] = FMA(T2j, T2q, T2n * T2m); + } + } + { + E T3O, T4a, T40, T4e, T3V, T4f, T43, T4b, T3N, T3Z; + T3N = KP707106781 * (T3n + T3o); + T3O = T3M - T3N; + T4a = T3M + T3N; + T3Z = KP707106781 * (T30 + T2X); + T40 = T3Y - T3Z; + T4e = T3Y + T3Z; + { + E T3R, T3U, T41, T42; + T3R = FNMS(KP382683432, T3Q, KP923879532 * T3P); + T3U = FMA(KP923879532, T3S, KP382683432 * T3T); + T3V = T3R - T3U; + T4f = T3U + T3R; + T41 = FNMS(KP382683432, T3S, KP923879532 * T3T); + T42 = FMA(KP382683432, T3P, KP923879532 * T3Q); + T43 = T41 - T42; + T4b = T41 + T42; + } + { + E T3W, T44, T3L, T3X; + T3W = T3O - T3V; + T44 = T40 - T43; + T3L = W[50]; + T3X = W[51]; + cr[WS(rs, 26)] = FNMS(T3X, T44, T3L * T3W); + ci[WS(rs, 26)] = FMA(T3X, T3W, T3L * T44); + } + { + E T4i, T4k, T4h, T4j; + T4i = T4a + T4b; + T4k = T4e + T4f; + T4h = W[2]; + T4j = W[3]; + cr[WS(rs, 2)] = FNMS(T4j, T4k, T4h * T4i); + ci[WS(rs, 2)] = FMA(T4h, T4k, T4j * T4i); + } + { + E T46, T48, T45, T47; + T46 = T3O + T3V; + T48 = T40 + T43; + T45 = W[18]; + T47 = W[19]; + cr[WS(rs, 10)] = FNMS(T47, T48, T45 * T46); + ci[WS(rs, 10)] = FMA(T47, T46, T45 * T48); + } + { + E T4c, T4g, T49, T4d; + T4c = T4a - T4b; + T4g = T4e - T4f; + T49 = W[34]; + T4d = W[35]; + cr[WS(rs, 18)] = FNMS(T4d, T4g, T49 * T4c); + ci[WS(rs, 18)] = FMA(T49, T4g, T4d * T4c); + } + } + { + E T32, T3A, T3q, T3E, T3h, T3F, T3t, T3B, T31, T3p; + T31 = KP707106781 * (T2X - T30); + T32 = T2U - T31; + T3A = T2U + T31; + T3p = KP707106781 * (T3n - T3o); + T3q = T3m - T3p; + T3E = T3m + T3p; + { + E T39, T3g, T3r, T3s; + T39 = FNMS(KP923879532, T38, KP382683432 * T35); + T3g = FMA(KP382683432, T3c, KP923879532 * T3f); + T3h = T39 - T3g; + T3F = T3g + T39; + T3r = FNMS(KP923879532, T3c, KP382683432 * T3f); + T3s = FMA(KP923879532, T35, KP382683432 * T38); + T3t = T3r - T3s; + T3B = T3r + T3s; + } + { + E T3i, T3u, T2R, T3j; + T3i = T32 - T3h; + T3u = T3q - T3t; + T2R = W[58]; + T3j = W[59]; + cr[WS(rs, 30)] = FNMS(T3j, T3u, T2R * T3i); + ci[WS(rs, 30)] = FMA(T3j, T3i, T2R * T3u); + } + { + E T3I, T3K, T3H, T3J; + T3I = T3A + T3B; + T3K = T3E + T3F; + T3H = W[10]; + T3J = W[11]; + cr[WS(rs, 6)] = FNMS(T3J, T3K, T3H * T3I); + ci[WS(rs, 6)] = FMA(T3H, T3K, T3J * T3I); + } + { + E T3w, T3y, T3v, T3x; + T3w = T32 + T3h; + T3y = T3q + T3t; + T3v = W[26]; + T3x = W[27]; + cr[WS(rs, 14)] = FNMS(T3x, T3y, T3v * T3w); + ci[WS(rs, 14)] = FMA(T3x, T3w, T3v * T3y); + } + { + E T3C, T3G, T3z, T3D; + T3C = T3A - T3B; + T3G = T3E - T3F; + T3z = W[42]; + T3D = W[43]; + cr[WS(rs, 22)] = FNMS(T3D, T3G, T3z * T3C); + ci[WS(rs, 22)] = FMA(T3z, T3G, T3D * T3C); + } + } + { + E T60, T6m, T6f, T6n, T67, T6r, T6c, T6q; + { + E T5Y, T5Z, T6d, T6e; + T5Y = T4o + T4v; + T5Z = T5z + T5A; + T60 = T5Y + T5Z; + T6m = T5Y - T5Z; + T6d = FMA(KP195090322, T61, KP980785280 * T62); + T6e = FNMS(KP195090322, T64, KP980785280 * T65); + T6f = T6d + T6e; + T6n = T6e - T6d; + } + { + E T63, T66, T6a, T6b; + T63 = FNMS(KP195090322, T62, KP980785280 * T61); + T66 = FMA(KP980785280, T64, KP195090322 * T65); + T67 = T63 + T66; + T6r = T63 - T66; + T6a = T5u + T5x; + T6b = T4K + T4D; + T6c = T6a + T6b; + T6q = T6a - T6b; + } + { + E T68, T6g, T5X, T69; + T68 = T60 - T67; + T6g = T6c - T6f; + T5X = W[32]; + T69 = W[33]; + cr[WS(rs, 17)] = FNMS(T69, T6g, T5X * T68); + ci[WS(rs, 17)] = FMA(T69, T68, T5X * T6g); + } + { + E T6u, T6w, T6t, T6v; + T6u = T6m + T6n; + T6w = T6q + T6r; + T6t = W[16]; + T6v = W[17]; + cr[WS(rs, 9)] = FNMS(T6v, T6w, T6t * T6u); + ci[WS(rs, 9)] = FMA(T6t, T6w, T6v * T6u); + } + { + E T6i, T6k, T6h, T6j; + T6i = T60 + T67; + T6k = T6c + T6f; + T6h = W[0]; + T6j = W[1]; + cr[WS(rs, 1)] = FNMS(T6j, T6k, T6h * T6i); + ci[WS(rs, 1)] = FMA(T6j, T6i, T6h * T6k); + } + { + E T6o, T6s, T6l, T6p; + T6o = T6m - T6n; + T6s = T6q - T6r; + T6l = W[48]; + T6p = W[49]; + cr[WS(rs, 25)] = FNMS(T6p, T6s, T6l * T6o); + ci[WS(rs, 25)] = FMA(T6l, T6s, T6p * T6o); + } + } + { + E T7u, T7Q, T7J, T7R, T7B, T7V, T7G, T7U; + { + E T7s, T7t, T7H, T7I; + T7s = T6y + T6z; + T7t = T73 + T74; + T7u = T7s - T7t; + T7Q = T7s + T7t; + T7H = FMA(KP195090322, T7w, KP980785280 * T7v); + T7I = FMA(KP195090322, T7z, KP980785280 * T7y); + T7J = T7H - T7I; + T7R = T7H + T7I; + } + { + E T7x, T7A, T7E, T7F; + T7x = FNMS(KP980785280, T7w, KP195090322 * T7v); + T7A = FNMS(KP980785280, T7z, KP195090322 * T7y); + T7B = T7x + T7A; + T7V = T7x - T7A; + T7E = T70 - T71; + T7F = T6D - T6G; + T7G = T7E + T7F; + T7U = T7E - T7F; + } + { + E T7C, T7K, T7r, T7D; + T7C = T7u - T7B; + T7K = T7G - T7J; + T7r = W[44]; + T7D = W[45]; + cr[WS(rs, 23)] = FNMS(T7D, T7K, T7r * T7C); + ci[WS(rs, 23)] = FMA(T7D, T7C, T7r * T7K); + } + { + E T7Y, T80, T7X, T7Z; + T7Y = T7Q + T7R; + T80 = T7U - T7V; + T7X = W[60]; + T7Z = W[61]; + cr[WS(rs, 31)] = FNMS(T7Z, T80, T7X * T7Y); + ci[WS(rs, 31)] = FMA(T7X, T80, T7Z * T7Y); + } + { + E T7M, T7O, T7L, T7N; + T7M = T7u + T7B; + T7O = T7G + T7J; + T7L = W[12]; + T7N = W[13]; + cr[WS(rs, 7)] = FNMS(T7N, T7O, T7L * T7M); + ci[WS(rs, 7)] = FMA(T7N, T7M, T7L * T7O); + } + { + E T7S, T7W, T7P, T7T; + T7S = T7Q - T7R; + T7W = T7U + T7V; + T7P = W[28]; + T7T = W[29]; + cr[WS(rs, 15)] = FNMS(T7T, T7W, T7P * T7S); + ci[WS(rs, 15)] = FMA(T7P, T7W, T7T * T7S); + } + } + { + E T4M, T5M, T5F, T5N, T5p, T5R, T5C, T5Q; + { + E T4w, T4L, T5D, T5E; + T4w = T4o - T4v; + T4L = T4D - T4K; + T4M = T4w + T4L; + T5M = T4w - T4L; + T5D = FMA(KP831469612, T4X, KP555570233 * T54); + T5E = FNMS(KP831469612, T5g, KP555570233 * T5n); + T5F = T5D + T5E; + T5N = T5E - T5D; + } + { + E T55, T5o, T5y, T5B; + T55 = FNMS(KP831469612, T54, KP555570233 * T4X); + T5o = FMA(KP555570233, T5g, KP831469612 * T5n); + T5p = T55 + T5o; + T5R = T55 - T5o; + T5y = T5u - T5x; + T5B = T5z - T5A; + T5C = T5y + T5B; + T5Q = T5y - T5B; + } + { + E T5q, T5G, T4l, T5r; + T5q = T4M - T5p; + T5G = T5C - T5F; + T4l = W[40]; + T5r = W[41]; + cr[WS(rs, 21)] = FNMS(T5r, T5G, T4l * T5q); + ci[WS(rs, 21)] = FMA(T5r, T5q, T4l * T5G); + } + { + E T5U, T5W, T5T, T5V; + T5U = T5M + T5N; + T5W = T5Q + T5R; + T5T = W[24]; + T5V = W[25]; + cr[WS(rs, 13)] = FNMS(T5V, T5W, T5T * T5U); + ci[WS(rs, 13)] = FMA(T5T, T5W, T5V * T5U); + } + { + E T5I, T5K, T5H, T5J; + T5I = T4M + T5p; + T5K = T5C + T5F; + T5H = W[8]; + T5J = W[9]; + cr[WS(rs, 5)] = FNMS(T5J, T5K, T5H * T5I); + ci[WS(rs, 5)] = FMA(T5J, T5I, T5H * T5K); + } + { + E T5O, T5S, T5L, T5P; + T5O = T5M - T5N; + T5S = T5Q - T5R; + T5L = W[56]; + T5P = W[57]; + cr[WS(rs, 29)] = FNMS(T5P, T5S, T5L * T5O); + ci[WS(rs, 29)] = FMA(T5L, T5S, T5P * T5O); + } + } + { + E T6I, T7g, T79, T7h, T6X, T7l, T76, T7k; + { + E T6A, T6H, T77, T78; + T6A = T6y - T6z; + T6H = T6D + T6G; + T6I = T6A - T6H; + T7g = T6A + T6H; + T77 = FNMS(KP555570233, T6S, KP831469612 * T6V); + T78 = FMA(KP555570233, T6L, KP831469612 * T6O); + T79 = T77 - T78; + T7h = T77 + T78; + } + { + E T6P, T6W, T72, T75; + T6P = FNMS(KP555570233, T6O, KP831469612 * T6L); + T6W = FMA(KP831469612, T6S, KP555570233 * T6V); + T6X = T6P - T6W; + T7l = T6W + T6P; + T72 = T70 + T71; + T75 = T73 - T74; + T76 = T72 - T75; + T7k = T72 + T75; + } + { + E T6Y, T7a, T6x, T6Z; + T6Y = T6I - T6X; + T7a = T76 - T79; + T6x = W[52]; + T6Z = W[53]; + cr[WS(rs, 27)] = FNMS(T6Z, T7a, T6x * T6Y); + ci[WS(rs, 27)] = FMA(T6Z, T6Y, T6x * T7a); + } + { + E T7o, T7q, T7n, T7p; + T7o = T7g + T7h; + T7q = T7k + T7l; + T7n = W[4]; + T7p = W[5]; + cr[WS(rs, 3)] = FNMS(T7p, T7q, T7n * T7o); + ci[WS(rs, 3)] = FMA(T7n, T7q, T7p * T7o); + } + { + E T7c, T7e, T7b, T7d; + T7c = T6I + T6X; + T7e = T76 + T79; + T7b = W[20]; + T7d = W[21]; + cr[WS(rs, 11)] = FNMS(T7d, T7e, T7b * T7c); + ci[WS(rs, 11)] = FMA(T7d, T7c, T7b * T7e); + } + { + E T7i, T7m, T7f, T7j; + T7i = T7g - T7h; + T7m = T7k - T7l; + T7f = W[36]; + T7j = W[37]; + cr[WS(rs, 19)] = FNMS(T7j, T7m, T7f * T7i); + ci[WS(rs, 19)] = FMA(T7f, T7m, T7j * T7i); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 32}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 32, "hb_32", twinstr, &GENUS, {340, 114, 94, 0} }; + +void X(codelet_hb_32) (planner *p) { + X(khc2hc_register) (p, hb_32, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:25 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 4 -dif -name hb_4 -include hb.h */ + +/* + * This function contains 22 FP additions, 12 FP multiplications, + * (or, 16 additions, 6 multiplications, 6 fused multiply/add), + * 27 stack variables, 0 constants, and 16 memory accesses + */ +#include "hb.h" + +static void hb_4(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 6, MAKE_VOLATILE_STRIDE(8, rs)) { + E T8, Th, Ta, T7, Ti, T9; + { + E Td, Tg, T3, T6, Tu, Tm, Tx, Tr; + { + E Tq, Tl, T4, T5, Tk, Tp; + { + E Tb, Tc, Te, Tf, T1, T2; + Tb = ci[WS(rs, 3)]; + Tc = cr[WS(rs, 2)]; + Te = ci[WS(rs, 2)]; + Tf = cr[WS(rs, 3)]; + T1 = cr[0]; + Tq = Tb + Tc; + Td = Tb - Tc; + T2 = ci[WS(rs, 1)]; + Tl = Te + Tf; + Tg = Te - Tf; + T4 = cr[WS(rs, 1)]; + T5 = ci[0]; + T3 = T1 + T2; + Tk = T1 - T2; + } + Tp = T4 - T5; + T6 = T4 + T5; + Tu = Tk + Tl; + Tm = Tk - Tl; + Tx = Tq - Tp; + Tr = Tp + Tq; + T8 = T3 - T6; + } + cr[0] = T3 + T6; + { + E Tj, To, Tw, Tv; + Tj = W[0]; + ci[0] = Td + Tg; + To = W[1]; + { + E Tt, Ts, Tn, Ty; + Tt = W[4]; + Ts = Tj * Tr; + Tn = Tj * Tm; + Tw = W[5]; + Ty = Tt * Tx; + Tv = Tt * Tu; + ci[WS(rs, 1)] = FMA(To, Tm, Ts); + cr[WS(rs, 1)] = FNMS(To, Tr, Tn); + ci[WS(rs, 3)] = FMA(Tw, Tu, Ty); + } + cr[WS(rs, 3)] = FNMS(Tw, Tx, Tv); + Th = Td - Tg; + Ta = W[3]; + T7 = W[2]; + } + } + Ti = Ta * T8; + T9 = T7 * T8; + ci[WS(rs, 2)] = FMA(T7, Th, Ti); + cr[WS(rs, 2)] = FNMS(Ta, Th, T9); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 4}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 4, "hb_4", twinstr, &GENUS, {16, 6, 6, 0} }; + +void X(codelet_hb_4) (planner *p) { + X(khc2hc_register) (p, hb_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 4 -dif -name hb_4 -include hb.h */ + +/* + * This function contains 22 FP additions, 12 FP multiplications, + * (or, 16 additions, 6 multiplications, 6 fused multiply/add), + * 13 stack variables, 0 constants, and 16 memory accesses + */ +#include "hb.h" + +static void hb_4(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 6, MAKE_VOLATILE_STRIDE(8, rs)) { + E T3, Ti, T6, Tm, Tc, Tn, Tf, Tj; + { + E T1, T2, T4, T5; + T1 = cr[0]; + T2 = ci[WS(rs, 1)]; + T3 = T1 + T2; + Ti = T1 - T2; + T4 = cr[WS(rs, 1)]; + T5 = ci[0]; + T6 = T4 + T5; + Tm = T4 - T5; + } + { + E Ta, Tb, Td, Te; + Ta = ci[WS(rs, 3)]; + Tb = cr[WS(rs, 2)]; + Tc = Ta - Tb; + Tn = Ta + Tb; + Td = ci[WS(rs, 2)]; + Te = cr[WS(rs, 3)]; + Tf = Td - Te; + Tj = Td + Te; + } + cr[0] = T3 + T6; + ci[0] = Tc + Tf; + { + E T8, Tg, T7, T9; + T8 = T3 - T6; + Tg = Tc - Tf; + T7 = W[2]; + T9 = W[3]; + cr[WS(rs, 2)] = FNMS(T9, Tg, T7 * T8); + ci[WS(rs, 2)] = FMA(T9, T8, T7 * Tg); + } + { + E Tk, To, Th, Tl; + Tk = Ti - Tj; + To = Tm + Tn; + Th = W[0]; + Tl = W[1]; + cr[WS(rs, 1)] = FNMS(Tl, To, Th * Tk); + ci[WS(rs, 1)] = FMA(Th, To, Tl * Tk); + } + { + E Tq, Ts, Tp, Tr; + Tq = Ti + Tj; + Ts = Tn - Tm; + Tp = W[4]; + Tr = W[5]; + cr[WS(rs, 3)] = FNMS(Tr, Ts, Tp * Tq); + ci[WS(rs, 3)] = FMA(Tp, Ts, Tr * Tq); + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 4}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 4, "hb_4", twinstr, &GENUS, {16, 6, 6, 0} }; + +void X(codelet_hb_4) (planner *p) { + X(khc2hc_register) (p, hb_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:25 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 5 -dif -name hb_5 -include hb.h */ + +/* + * This function contains 40 FP additions, 34 FP multiplications, + * (or, 14 additions, 8 multiplications, 26 fused multiply/add), + * 42 stack variables, 4 constants, and 20 memory accesses + */ +#include "hb.h" + +static void hb_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(10, rs)) { + E TQ, TP, TT, TR, TS, TU; + { + E T1, Tn, TM, Tw, Tb, T8, To, Tf, Ta, Tg, Th; + { + E T2, T3, T5, T6, T4, Tu; + T1 = cr[0]; + T2 = cr[WS(rs, 1)]; + T3 = ci[0]; + T5 = cr[WS(rs, 2)]; + T6 = ci[WS(rs, 1)]; + Tn = ci[WS(rs, 4)]; + T4 = T2 + T3; + Tu = T2 - T3; + { + E T7, Tv, Td, Te; + T7 = T5 + T6; + Tv = T5 - T6; + Td = ci[WS(rs, 3)]; + Te = cr[WS(rs, 4)]; + TM = FNMS(KP618033988, Tu, Tv); + Tw = FMA(KP618033988, Tv, Tu); + Tb = T4 - T7; + T8 = T4 + T7; + To = Td - Te; + Tf = Td + Te; + Ta = FNMS(KP250000000, T8, T1); + Tg = ci[WS(rs, 2)]; + Th = cr[WS(rs, 3)]; + } + } + cr[0] = T1 + T8; + { + E TG, T9, Tm, Tz, TH, TC, TA, Tk, Tt, TL, Tc, Ti, Tp, TI; + TG = FNMS(KP559016994, Tb, Ta); + Tc = FMA(KP559016994, Tb, Ta); + T9 = W[0]; + Ti = Tg + Th; + Tp = Tg - Th; + Tm = W[1]; + { + E Ts, Tj, Tr, Tq; + Tz = W[6]; + Ts = To - Tp; + Tq = To + Tp; + Tj = FMA(KP618033988, Ti, Tf); + TH = FNMS(KP618033988, Tf, Ti); + ci[0] = Tn + Tq; + Tr = FNMS(KP250000000, Tq, Tn); + TC = W[7]; + TA = FMA(KP951056516, Tj, Tc); + Tk = FNMS(KP951056516, Tj, Tc); + Tt = FMA(KP559016994, Ts, Tr); + TL = FNMS(KP559016994, Ts, Tr); + } + { + E TE, TB, Ty, Tl, TD, Tx; + TE = TC * TA; + TB = Tz * TA; + Ty = Tm * Tk; + Tl = T9 * Tk; + TD = FNMS(KP951056516, Tw, Tt); + Tx = FMA(KP951056516, Tw, Tt); + TI = FMA(KP951056516, TH, TG); + TQ = FNMS(KP951056516, TH, TG); + ci[WS(rs, 4)] = FMA(Tz, TD, TE); + cr[WS(rs, 4)] = FNMS(TC, TD, TB); + ci[WS(rs, 1)] = FMA(T9, Tx, Ty); + cr[WS(rs, 1)] = FNMS(Tm, Tx, Tl); + } + { + E TF, TK, TN, TJ, TO; + TF = W[2]; + TK = W[3]; + TP = W[4]; + TT = FMA(KP951056516, TM, TL); + TN = FNMS(KP951056516, TM, TL); + TJ = TF * TI; + TO = TK * TI; + TR = TP * TQ; + TS = W[5]; + cr[WS(rs, 2)] = FNMS(TK, TN, TJ); + ci[WS(rs, 2)] = FMA(TF, TN, TO); + } + } + } + cr[WS(rs, 3)] = FNMS(TS, TT, TR); + TU = TS * TQ; + ci[WS(rs, 3)] = FMA(TP, TT, TU); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 5}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 5, "hb_5", twinstr, &GENUS, {14, 8, 26, 0} }; + +void X(codelet_hb_5) (planner *p) { + X(khc2hc_register) (p, hb_5, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 5 -dif -name hb_5 -include hb.h */ + +/* + * This function contains 40 FP additions, 28 FP multiplications, + * (or, 26 additions, 14 multiplications, 14 fused multiply/add), + * 27 stack variables, 4 constants, and 20 memory accesses + */ +#include "hb.h" + +static void hb_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(10, rs)) { + E T1, Tj, TG, Ts, T8, Ti, T9, Tn, TD, Tu, Tg, Tt; + { + E T4, Tq, T7, Tr; + T1 = cr[0]; + { + E T2, T3, T5, T6; + T2 = cr[WS(rs, 1)]; + T3 = ci[0]; + T4 = T2 + T3; + Tq = T2 - T3; + T5 = cr[WS(rs, 2)]; + T6 = ci[WS(rs, 1)]; + T7 = T5 + T6; + Tr = T5 - T6; + } + Tj = KP559016994 * (T4 - T7); + TG = FMA(KP951056516, Tq, KP587785252 * Tr); + Ts = FNMS(KP951056516, Tr, KP587785252 * Tq); + T8 = T4 + T7; + Ti = FNMS(KP250000000, T8, T1); + } + { + E Tc, Tl, Tf, Tm; + T9 = ci[WS(rs, 4)]; + { + E Ta, Tb, Td, Te; + Ta = ci[WS(rs, 3)]; + Tb = cr[WS(rs, 4)]; + Tc = Ta - Tb; + Tl = Ta + Tb; + Td = ci[WS(rs, 2)]; + Te = cr[WS(rs, 3)]; + Tf = Td - Te; + Tm = Td + Te; + } + Tn = FNMS(KP951056516, Tm, KP587785252 * Tl); + TD = FMA(KP951056516, Tl, KP587785252 * Tm); + Tu = KP559016994 * (Tc - Tf); + Tg = Tc + Tf; + Tt = FNMS(KP250000000, Tg, T9); + } + cr[0] = T1 + T8; + ci[0] = T9 + Tg; + { + E To, Ty, Tw, TA, Tk, Tv; + Tk = Ti - Tj; + To = Tk - Tn; + Ty = Tk + Tn; + Tv = Tt - Tu; + Tw = Ts + Tv; + TA = Tv - Ts; + { + E Th, Tp, Tx, Tz; + Th = W[2]; + Tp = W[3]; + cr[WS(rs, 2)] = FNMS(Tp, Tw, Th * To); + ci[WS(rs, 2)] = FMA(Th, Tw, Tp * To); + Tx = W[4]; + Tz = W[5]; + cr[WS(rs, 3)] = FNMS(Tz, TA, Tx * Ty); + ci[WS(rs, 3)] = FMA(Tx, TA, Tz * Ty); + } + } + { + E TE, TK, TI, TM, TC, TH; + TC = Tj + Ti; + TE = TC - TD; + TK = TC + TD; + TH = Tu + Tt; + TI = TG + TH; + TM = TH - TG; + { + E TB, TF, TJ, TL; + TB = W[0]; + TF = W[1]; + cr[WS(rs, 1)] = FNMS(TF, TI, TB * TE); + ci[WS(rs, 1)] = FMA(TB, TI, TF * TE); + TJ = W[6]; + TL = W[7]; + cr[WS(rs, 4)] = FNMS(TL, TM, TJ * TK); + ci[WS(rs, 4)] = FMA(TJ, TM, TL * TK); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 5}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 5, "hb_5", twinstr, &GENUS, {26, 14, 14, 0} }; + +void X(codelet_hb_5) (planner *p) { + X(khc2hc_register) (p, hb_5, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:25 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 6 -dif -name hb_6 -include hb.h */ + +/* + * This function contains 46 FP additions, 32 FP multiplications, + * (or, 24 additions, 10 multiplications, 22 fused multiply/add), + * 45 stack variables, 2 constants, and 24 memory accesses + */ +#include "hb.h" + +static void hb_6(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) { + E TK, TR, TB, TM, TL, TS; + { + E Td, TN, TO, TJ, Tn, Tk, TC, T3, Tr, T4, T5, T7, T8; + { + E TH, Tg, Tj, TI, Th, Ti, T1, T2; + { + E Tb, Tc, Te, Tf; + Tb = ci[WS(rs, 5)]; + Tc = cr[WS(rs, 3)]; + Te = ci[WS(rs, 3)]; + Tf = cr[WS(rs, 5)]; + Th = ci[WS(rs, 4)]; + Td = Tb - Tc; + TN = Tb + Tc; + Ti = cr[WS(rs, 4)]; + TH = Te + Tf; + Tg = Te - Tf; + } + Tj = Th - Ti; + TI = Th + Ti; + T1 = cr[0]; + T2 = ci[WS(rs, 2)]; + TO = TH - TI; + TJ = TH + TI; + Tn = Tj - Tg; + Tk = Tg + Tj; + TC = T1 - T2; + T3 = T1 + T2; + Tr = FNMS(KP500000000, Tk, Td); + T4 = cr[WS(rs, 2)]; + T5 = ci[0]; + T7 = ci[WS(rs, 1)]; + T8 = cr[WS(rs, 1)]; + } + { + E Tl, Tq, TQ, Ts, Ta, T10, TG; + ci[0] = Td + Tk; + { + E T6, TD, T9, TE, TF; + T6 = T4 + T5; + TD = T4 - T5; + T9 = T7 + T8; + TE = T7 - T8; + Tl = W[2]; + Tq = W[3]; + TQ = TD - TE; + TF = TD + TE; + Ts = T6 - T9; + Ta = T6 + T9; + T10 = TC + TF; + TG = FNMS(KP500000000, TF, TC); + } + { + E T13, TP, Tz, TZ, Tw, T14, Tv, Ty; + { + E Tt, T12, T11, Tp, Tm, To, Tu; + T13 = TN + TO; + TP = FNMS(KP500000000, TO, TN); + cr[0] = T3 + Ta; + Tm = FNMS(KP500000000, Ta, T3); + Tz = FMA(KP866025403, Ts, Tr); + Tt = FNMS(KP866025403, Ts, Tr); + TZ = W[4]; + To = FNMS(KP866025403, Tn, Tm); + Tw = FMA(KP866025403, Tn, Tm); + Tu = Tl * Tt; + T12 = W[5]; + T11 = TZ * T10; + Tp = Tl * To; + ci[WS(rs, 2)] = FMA(Tq, To, Tu); + T14 = T12 * T10; + cr[WS(rs, 3)] = FNMS(T12, T13, T11); + cr[WS(rs, 2)] = FNMS(Tq, Tt, Tp); + } + ci[WS(rs, 3)] = FMA(TZ, T13, T14); + Tv = W[6]; + Ty = W[7]; + { + E TX, TT, TW, TV, TY, TU, TA, Tx; + TK = FNMS(KP866025403, TJ, TG); + TU = FMA(KP866025403, TJ, TG); + TA = Tv * Tz; + Tx = Tv * Tw; + TX = FNMS(KP866025403, TQ, TP); + TR = FMA(KP866025403, TQ, TP); + ci[WS(rs, 4)] = FMA(Ty, Tw, TA); + cr[WS(rs, 4)] = FNMS(Ty, Tz, Tx); + TT = W[8]; + TW = W[9]; + TB = W[0]; + TV = TT * TU; + TY = TW * TU; + TM = W[1]; + TL = TB * TK; + cr[WS(rs, 5)] = FNMS(TW, TX, TV); + ci[WS(rs, 5)] = FMA(TT, TX, TY); + } + } + } + } + cr[WS(rs, 1)] = FNMS(TM, TR, TL); + TS = TM * TK; + ci[WS(rs, 1)] = FMA(TB, TR, TS); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 6}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 6, "hb_6", twinstr, &GENUS, {24, 10, 22, 0} }; + +void X(codelet_hb_6) (planner *p) { + X(khc2hc_register) (p, hb_6, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 6 -dif -name hb_6 -include hb.h */ + +/* + * This function contains 46 FP additions, 28 FP multiplications, + * (or, 32 additions, 14 multiplications, 14 fused multiply/add), + * 27 stack variables, 2 constants, and 24 memory accesses + */ +#include "hb.h" + +static void hb_6(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) { + E T3, Ty, Ta, TO, Tr, TB, Td, TE, Tk, TL, Tn, TH; + { + E T1, T2, Tb, Tc; + T1 = cr[0]; + T2 = ci[WS(rs, 2)]; + T3 = T1 + T2; + Ty = T1 - T2; + { + E T6, Tz, T9, TA; + { + E T4, T5, T7, T8; + T4 = cr[WS(rs, 2)]; + T5 = ci[0]; + T6 = T4 + T5; + Tz = T4 - T5; + T7 = ci[WS(rs, 1)]; + T8 = cr[WS(rs, 1)]; + T9 = T7 + T8; + TA = T7 - T8; + } + Ta = T6 + T9; + TO = KP866025403 * (Tz - TA); + Tr = KP866025403 * (T6 - T9); + TB = Tz + TA; + } + Tb = ci[WS(rs, 5)]; + Tc = cr[WS(rs, 3)]; + Td = Tb - Tc; + TE = Tb + Tc; + { + E Tg, TG, Tj, TF; + { + E Te, Tf, Th, Ti; + Te = ci[WS(rs, 3)]; + Tf = cr[WS(rs, 5)]; + Tg = Te - Tf; + TG = Te + Tf; + Th = ci[WS(rs, 4)]; + Ti = cr[WS(rs, 4)]; + Tj = Th - Ti; + TF = Th + Ti; + } + Tk = Tg + Tj; + TL = KP866025403 * (TG + TF); + Tn = KP866025403 * (Tj - Tg); + TH = TF - TG; + } + } + cr[0] = T3 + Ta; + ci[0] = Td + Tk; + { + E TC, TI, Tx, TD; + TC = Ty + TB; + TI = TE - TH; + Tx = W[4]; + TD = W[5]; + cr[WS(rs, 3)] = FNMS(TD, TI, Tx * TC); + ci[WS(rs, 3)] = FMA(TD, TC, Tx * TI); + } + { + E To, Tu, Ts, Tw, Tm, Tq; + Tm = FNMS(KP500000000, Ta, T3); + To = Tm - Tn; + Tu = Tm + Tn; + Tq = FNMS(KP500000000, Tk, Td); + Ts = Tq - Tr; + Tw = Tr + Tq; + { + E Tl, Tp, Tt, Tv; + Tl = W[2]; + Tp = W[3]; + cr[WS(rs, 2)] = FNMS(Tp, Ts, Tl * To); + ci[WS(rs, 2)] = FMA(Tl, Ts, Tp * To); + Tt = W[6]; + Tv = W[7]; + cr[WS(rs, 4)] = FNMS(Tv, Tw, Tt * Tu); + ci[WS(rs, 4)] = FMA(Tt, Tw, Tv * Tu); + } + } + { + E TM, TS, TQ, TU, TK, TP; + TK = FNMS(KP500000000, TB, Ty); + TM = TK - TL; + TS = TK + TL; + TP = FMA(KP500000000, TH, TE); + TQ = TO + TP; + TU = TP - TO; + { + E TJ, TN, TR, TT; + TJ = W[0]; + TN = W[1]; + cr[WS(rs, 1)] = FNMS(TN, TQ, TJ * TM); + ci[WS(rs, 1)] = FMA(TN, TM, TJ * TQ); + TR = W[8]; + TT = W[9]; + cr[WS(rs, 5)] = FNMS(TT, TU, TR * TS); + ci[WS(rs, 5)] = FMA(TT, TS, TR * TU); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 6}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 6, "hb_6", twinstr, &GENUS, {32, 14, 14, 0} }; + +void X(codelet_hb_6) (planner *p) { + X(khc2hc_register) (p, hb_6, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3959 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:27 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -dif -name hb_64 -include hb.h */ + +/* + * This function contains 1038 FP additions, 644 FP multiplications, + * (or, 520 additions, 126 multiplications, 518 fused multiply/add), + * 231 stack variables, 15 constants, and 256 memory accesses + */ +#include "hb.h" + +static void hb_64(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP534511135, +0.534511135950791641089685961295362908582039528); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP303346683, +0.303346683607342391675883946941299872384187453); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP098491403, +0.098491403357164253077197521291327432293052451); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP820678790, +0.820678790828660330972281985331011598767386482); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 126); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 126, MAKE_VOLATILE_STRIDE(128, rs)) { + E Tcx, Tcw, Tcv; + { + E Thy, Tv, T7n, T5B, TfP, Tey, Tkl, TjB, T6U, T2k, T7o, T2H, TiH, Tia, Tk8; + E Tj8, T6V, T5E, Tbz, T9N, Tb7, T9Q, Tgh, Tev, Tb6, T8G, TbA, T8N, TfO, TcU; + E Tgi, Td5, Ti3, T10, TjC, Tje, TiI, ThF, TeA, Tds, TjD, Tjb, TeB, Tdh, Tgl; + E TfT, Tgk, TfW, T6Z, T7r, T5H, T39, Tbb, TbC, T9S, T8V, T72, T7q, T5G, T3A; + E Tbe, TbD, T9T, T92, ThH, T1w, Tke, Tjq, Tkf, Tjt, TiK, ThO, Tgb, TgT, Tfc; + E Tec, Tg8, TgU, Tfd, Tel, T77, T83, T6i, T5a, T7a, T82, T6j, T5n, Tbj, Tcc; + E Tas, T9f, Tbm, Tcb, Tar, T9m, ThQ, T21, Tkb, Tjj, Tkc, Tjm, TiL, ThX, Tg4; + E TgW, Tf9, TdL, Tg1, TgX, Tfa, TdU, T7e, T80, T6f, T4h, T9q, Tbr, T7h, T7Z; + E T6g, T4u, T9D, T9C, Tbo, T9B, Tbp, T9x; + { + E T3v, T8Z, T8W, T90, T8X, T3y, T3q, T70; + { + E TcQ, TcT, Td4, TcZ; + { + E T24, T5t, T7, T27, T5w, Ti4, Tet, T2i, T5z, Te, Teu, Ti5, T5y, T2d, T8H; + E T2u, Td0, Tm, Ti7, Td3, T8I, T2p, Tq, T2w, Tp, TcV, T2E, Tr, T2x, T2y; + E Tes, Ter; + { + E T1, T2, T4, T5, T5u, T5v; + T1 = cr[0]; + T2 = ci[WS(rs, 31)]; + T4 = cr[WS(rs, 16)]; + T5 = ci[WS(rs, 15)]; + { + E T25, T3, T6, T26; + T25 = ci[WS(rs, 47)]; + T24 = T1 - T2; + T3 = T1 + T2; + T5t = T4 - T5; + T6 = T4 + T5; + T26 = cr[WS(rs, 48)]; + T5u = ci[WS(rs, 63)]; + T5v = cr[WS(rs, 32)]; + TcQ = T3 - T6; + T7 = T3 + T6; + Tes = T25 - T26; + T27 = T25 + T26; + } + Ter = T5u - T5v; + T5w = T5u + T5v; + } + { + E Ta, T29, Tb, TcR, T2h, Tc, T2a, T2b; + { + E T2f, T2g, T8, T9; + T8 = cr[WS(rs, 8)]; + T9 = ci[WS(rs, 23)]; + Ti4 = Ter + Tes; + Tet = Ter - Tes; + T2f = ci[WS(rs, 39)]; + T2g = cr[WS(rs, 56)]; + Ta = T8 + T9; + T29 = T8 - T9; + Tb = ci[WS(rs, 7)]; + TcR = T2f - T2g; + T2h = T2f + T2g; + Tc = cr[WS(rs, 24)]; + T2a = ci[WS(rs, 55)]; + T2b = cr[WS(rs, 40)]; + } + { + E Tj, T2l, Ti, Td1, T2t, Tk, T2m, T2n; + { + E Tg, Th, T2r, T2s; + Tg = cr[WS(rs, 4)]; + { + E T2e, Td, TcS, T2c; + T2e = Tb - Tc; + Td = Tb + Tc; + TcS = T2a - T2b; + T2c = T2a + T2b; + T2i = T2e - T2h; + T5z = T2e + T2h; + Te = Ta + Td; + Teu = Ta - Td; + TcT = TcR - TcS; + Ti5 = TcS + TcR; + T5y = T29 + T2c; + T2d = T29 - T2c; + Th = ci[WS(rs, 27)]; + } + T2r = ci[WS(rs, 59)]; + T2s = cr[WS(rs, 36)]; + Tj = cr[WS(rs, 20)]; + T2l = Tg - Th; + Ti = Tg + Th; + Td1 = T2r - T2s; + T2t = T2r + T2s; + Tk = ci[WS(rs, 11)]; + T2m = ci[WS(rs, 43)]; + T2n = cr[WS(rs, 52)]; + } + { + E Tn, To, T2C, T2D; + Tn = ci[WS(rs, 3)]; + { + E T2q, Tl, Td2, T2o; + T2q = Tj - Tk; + Tl = Tj + Tk; + Td2 = T2m - T2n; + T2o = T2m + T2n; + T8H = T2t - T2q; + T2u = T2q + T2t; + Td0 = Ti - Tl; + Tm = Ti + Tl; + Ti7 = Td1 + Td2; + Td3 = Td1 - Td2; + T8I = T2l + T2o; + T2p = T2l - T2o; + To = cr[WS(rs, 28)]; + } + T2C = ci[WS(rs, 35)]; + T2D = cr[WS(rs, 60)]; + Tq = cr[WS(rs, 12)]; + T2w = Tn - To; + Tp = Tn + To; + TcV = T2C - T2D; + T2E = T2C + T2D; + Tr = ci[WS(rs, 19)]; + T2x = ci[WS(rs, 51)]; + T2y = cr[WS(rs, 44)]; + } + } + } + { + E Tj6, T8K, T8L, T9L, T8F, Ti6, T8E, T9M, T5C, T5D, Ti9, Tj7; + { + E T2F, Ti8, T2A, TjA, Tew, Tex, Tjz; + { + E Tf, TcY, TcX, Tu, T5x, T5A; + Tj6 = T7 - Te; + Tf = T7 + Te; + { + E T2B, Ts, TcW, T2z, Tt; + T2B = Tq - Tr; + Ts = Tq + Tr; + TcW = T2x - T2y; + T2z = T2x + T2y; + T8K = T2B + T2E; + T2F = T2B - T2E; + TcY = Tp - Ts; + Tt = Tp + Ts; + TcX = TcV - TcW; + Ti8 = TcV + TcW; + T8L = T2w + T2z; + T2A = T2w - T2z; + Tu = Tm + Tt; + TjA = Tm - Tt; + } + T9L = T5w - T5t; + T5x = T5t + T5w; + T5A = T5y - T5z; + T8F = T5y + T5z; + Td4 = Td0 + Td3; + Tew = Td0 - Td3; + Thy = Tf - Tu; + Tv = Tf + Tu; + T7n = FNMS(KP707106781, T5A, T5x); + T5B = FMA(KP707106781, T5A, T5x); + Tex = TcY + TcX; + TcZ = TcX - TcY; + Ti6 = Ti4 + Ti5; + Tjz = Ti4 - Ti5; + } + { + E T28, T2j, T2v, T2G; + T8E = T24 + T27; + T28 = T24 - T27; + TfP = Tew + Tex; + Tey = Tew - Tex; + Tkl = TjA + Tjz; + TjB = Tjz - TjA; + T2j = T2d + T2i; + T9M = T2d - T2i; + T5C = FMA(KP414213562, T2p, T2u); + T2v = FNMS(KP414213562, T2u, T2p); + T2G = FMA(KP414213562, T2F, T2A); + T5D = FNMS(KP414213562, T2A, T2F); + T6U = FNMS(KP707106781, T2j, T28); + T2k = FMA(KP707106781, T2j, T28); + T7o = T2v - T2G; + T2H = T2v + T2G; + Ti9 = Ti7 + Ti8; + Tj7 = Ti8 - Ti7; + } + } + { + E T8J, T9O, T9P, T8M; + TiH = Ti6 + Ti9; + Tia = Ti6 - Ti9; + Tk8 = Tj6 + Tj7; + Tj8 = Tj6 - Tj7; + T8J = FNMS(KP414213562, T8I, T8H); + T9O = FMA(KP414213562, T8H, T8I); + T6V = T5D - T5C; + T5E = T5C + T5D; + Tbz = FNMS(KP707106781, T9M, T9L); + T9N = FMA(KP707106781, T9M, T9L); + T9P = FMA(KP414213562, T8K, T8L); + T8M = FNMS(KP414213562, T8L, T8K); + Tb7 = T9O + T9P; + T9Q = T9O - T9P; + Tgh = Teu + Tet; + Tev = Tet - Teu; + Tb6 = FMA(KP707106781, T8F, T8E); + T8G = FNMS(KP707106781, T8F, T8E); + TbA = T8M - T8J; + T8N = T8J + T8M; + } + } + } + { + E T8S, TC, Tdn, Tdk, ThC, T3e, T8P, T36, T2X, Tda, TY, ThA, Tdf, T35, T2S; + E T3x, T3o, Tdl, TJ, ThD, Tdq, T3w, T3j, T34, TR, Tdc, Td9, Thz, T2N; + { + E TV, T2O, TU, Tdd, T2W, TW, T2P, T2Q; + { + E Tz, T3r, Ty, Tdj, T3u, TA, T3b, T3c; + { + E Tw, Tx, T3s, T3t; + Tw = cr[WS(rs, 2)]; + TfO = TcQ + TcT; + TcU = TcQ - TcT; + Tgi = Td4 + TcZ; + Td5 = TcZ - Td4; + Tx = ci[WS(rs, 29)]; + T3s = ci[WS(rs, 45)]; + T3t = cr[WS(rs, 50)]; + Tz = cr[WS(rs, 18)]; + T3r = Tw - Tx; + Ty = Tw + Tx; + Tdj = T3s - T3t; + T3u = T3s + T3t; + TA = ci[WS(rs, 13)]; + T3b = ci[WS(rs, 61)]; + T3c = cr[WS(rs, 34)]; + } + { + E T3a, TB, Tdi, T3d; + T8S = T3r + T3u; + T3v = T3r - T3u; + T3a = Tz - TA; + TB = Tz + TA; + Tdi = T3b - T3c; + T3d = T3b + T3c; + TC = Ty + TB; + Tdn = Ty - TB; + Tdk = Tdi - Tdj; + ThC = Tdi + Tdj; + T3e = T3a + T3d; + T8P = T3d - T3a; + } + } + { + E TS, TT, T2U, T2V; + TS = cr[WS(rs, 6)]; + TT = ci[WS(rs, 25)]; + T2U = ci[WS(rs, 41)]; + T2V = cr[WS(rs, 54)]; + TV = ci[WS(rs, 9)]; + T2O = TS - TT; + TU = TS + TT; + Tdd = T2U - T2V; + T2W = T2U + T2V; + TW = cr[WS(rs, 22)]; + T2P = ci[WS(rs, 57)]; + T2Q = cr[WS(rs, 38)]; + } + { + E TG, T3f, TF, Tdo, T3n, TH, T3g, T3h; + { + E TD, TE, T3l, T3m; + TD = cr[WS(rs, 10)]; + { + E T2T, TX, Tde, T2R; + T2T = TV - TW; + TX = TV + TW; + Tde = T2P - T2Q; + T2R = T2P + T2Q; + T36 = T2T - T2W; + T2X = T2T + T2W; + Tda = TU - TX; + TY = TU + TX; + ThA = Tde + Tdd; + Tdf = Tdd - Tde; + T35 = T2O - T2R; + T2S = T2O + T2R; + TE = ci[WS(rs, 21)]; + } + T3l = ci[WS(rs, 37)]; + T3m = cr[WS(rs, 58)]; + TG = ci[WS(rs, 5)]; + T3f = TD - TE; + TF = TD + TE; + Tdo = T3l - T3m; + T3n = T3l + T3m; + TH = cr[WS(rs, 26)]; + T3g = ci[WS(rs, 53)]; + T3h = cr[WS(rs, 42)]; + } + { + E TO, T30, TN, Td8, T33, TP, T2K, T2L; + { + E TL, TM, T31, T32; + TL = ci[WS(rs, 1)]; + { + E T3k, TI, Tdp, T3i; + T3k = TG - TH; + TI = TG + TH; + Tdp = T3g - T3h; + T3i = T3g + T3h; + T3x = T3k - T3n; + T3o = T3k + T3n; + Tdl = TF - TI; + TJ = TF + TI; + ThD = Tdp + Tdo; + Tdq = Tdo - Tdp; + T3w = T3f - T3i; + T3j = T3f + T3i; + TM = cr[WS(rs, 30)]; + } + T31 = ci[WS(rs, 49)]; + T32 = cr[WS(rs, 46)]; + TO = cr[WS(rs, 14)]; + T30 = TL - TM; + TN = TL + TM; + Td8 = T31 - T32; + T33 = T31 + T32; + TP = ci[WS(rs, 17)]; + T2K = ci[WS(rs, 33)]; + T2L = cr[WS(rs, 62)]; + } + { + E T2J, TQ, Td7, T2M; + T8Z = T30 + T33; + T34 = T30 - T33; + T2J = TO - TP; + TQ = TO + TP; + Td7 = T2K - T2L; + T2M = T2K + T2L; + TR = TN + TQ; + Tdc = TN - TQ; + Td9 = Td7 - Td8; + Thz = Td7 + Td8; + T2N = T2J - T2M; + T8W = T2J + T2M; + } + } + } + } + { + E Tja, Tj9, TfU, TfV, TfR, Tdb, Tdg, TfS; + { + E ThE, ThB, Tdm, Tdr; + { + E Tjc, TK, TZ, Tjd; + Tjc = TC - TJ; + TK = TC + TJ; + TZ = TR + TY; + Tja = TR - TY; + Tjd = ThC - ThD; + ThE = ThC + ThD; + Tj9 = Thz - ThA; + ThB = Thz + ThA; + Ti3 = TK - TZ; + T10 = TK + TZ; + TjC = Tjc - Tjd; + Tje = Tjc + Tjd; + } + TfU = Tdl + Tdk; + Tdm = Tdk - Tdl; + Tdr = Tdn - Tdq; + TfV = Tdn + Tdq; + TiI = ThE + ThB; + ThF = ThB - ThE; + TeA = FMA(KP414213562, Tdm, Tdr); + Tds = FNMS(KP414213562, Tdr, Tdm); + TfR = Tda + Td9; + Tdb = Td9 - Tda; + Tdg = Tdc - Tdf; + TfS = Tdc + Tdf; + } + { + E T2Z, T6X, T37, T2Y; + TjD = Tja + Tj9; + Tjb = Tj9 - Tja; + TeB = FNMS(KP414213562, Tdb, Tdg); + Tdh = FMA(KP414213562, Tdg, Tdb); + T90 = T2S + T2X; + T2Y = T2S - T2X; + Tgl = FMA(KP414213562, TfR, TfS); + TfT = FNMS(KP414213562, TfS, TfR); + Tgk = FNMS(KP414213562, TfU, TfV); + TfW = FMA(KP414213562, TfV, TfU); + T2Z = FMA(KP707106781, T2Y, T2N); + T6X = FNMS(KP707106781, T2Y, T2N); + T37 = T35 + T36; + T8X = T35 - T36; + { + E T8Q, T8T, T3p, T6Y, T38; + T3y = T3w + T3x; + T8Q = T3x - T3w; + T8T = T3j + T3o; + T3p = T3j - T3o; + T6Y = FNMS(KP707106781, T37, T34); + T38 = FMA(KP707106781, T37, T34); + { + E Tb9, T8R, Tba, T8U; + Tb9 = FMA(KP707106781, T8Q, T8P); + T8R = FNMS(KP707106781, T8Q, T8P); + Tba = FMA(KP707106781, T8T, T8S); + T8U = FNMS(KP707106781, T8T, T8S); + T6Z = FMA(KP668178637, T6Y, T6X); + T7r = FNMS(KP668178637, T6X, T6Y); + T5H = FMA(KP198912367, T2Z, T38); + T39 = FNMS(KP198912367, T38, T2Z); + Tbb = FNMS(KP198912367, Tba, Tb9); + TbC = FMA(KP198912367, Tb9, Tba); + T9S = FNMS(KP668178637, T8R, T8U); + T8V = FMA(KP668178637, T8U, T8R); + T3q = FMA(KP707106781, T3p, T3e); + T70 = FNMS(KP707106781, T3p, T3e); + } + } + } + } + } + } + { + E T97, Tbk, T9j, T9k, Tbh, T9i, Tbi, T9e; + { + E T9g, T5f, T18, Ted, TdY, ThI, T4A, T95, T9b, T57, T1u, Te1, Te4, ThM, T52; + E T9c, T5h, T4K, TdZ, T1f, ThJ, Teg, T5g, T4F, T1j, Te8, T98, T4W, T4N, T1m; + E Te7, T4Q, T1n, Te6; + { + E T1q, Te3, T4Y, T1t, Te2, T51; + { + E T15, T5b, T14, TdX, T5e, T16, T4x, T4y; + { + E T12, T13, T5c, T5d, T71, T3z; + T12 = cr[WS(rs, 1)]; + T71 = FNMS(KP707106781, T3y, T3v); + T3z = FMA(KP707106781, T3y, T3v); + { + E Tbc, T8Y, Tbd, T91; + Tbc = FMA(KP707106781, T8X, T8W); + T8Y = FNMS(KP707106781, T8X, T8W); + Tbd = FMA(KP707106781, T90, T8Z); + T91 = FNMS(KP707106781, T90, T8Z); + T72 = FNMS(KP668178637, T71, T70); + T7q = FMA(KP668178637, T70, T71); + T5G = FNMS(KP198912367, T3q, T3z); + T3A = FMA(KP198912367, T3z, T3q); + Tbe = FNMS(KP198912367, Tbd, Tbc); + TbD = FMA(KP198912367, Tbc, Tbd); + T9T = FNMS(KP668178637, T8Y, T91); + T92 = FMA(KP668178637, T91, T8Y); + T13 = ci[WS(rs, 30)]; + } + T5c = ci[WS(rs, 46)]; + T5d = cr[WS(rs, 49)]; + T15 = cr[WS(rs, 17)]; + T5b = T12 - T13; + T14 = T12 + T13; + TdX = T5c - T5d; + T5e = T5c + T5d; + T16 = ci[WS(rs, 14)]; + T4x = ci[WS(rs, 62)]; + T4y = cr[WS(rs, 33)]; + } + { + E T4w, T17, TdW, T4z; + T9g = T5b + T5e; + T5f = T5b - T5e; + T4w = T15 - T16; + T17 = T15 + T16; + TdW = T4x - T4y; + T4z = T4x + T4y; + T18 = T14 + T17; + Ted = T14 - T17; + TdY = TdW - TdX; + ThI = TdW + TdX; + T4A = T4w + T4z; + T95 = T4z - T4w; + } + } + { + E T1r, T53, T56, T1s, T4Z, T50; + { + E T1o, T1p, T54, T55; + T1o = ci[WS(rs, 2)]; + T1p = cr[WS(rs, 29)]; + T54 = ci[WS(rs, 50)]; + T55 = cr[WS(rs, 45)]; + T1r = cr[WS(rs, 13)]; + T53 = T1o - T1p; + T1q = T1o + T1p; + Te3 = T54 - T55; + T56 = T54 + T55; + T1s = ci[WS(rs, 18)]; + T4Z = ci[WS(rs, 34)]; + T50 = cr[WS(rs, 61)]; + } + T9b = T53 + T56; + T57 = T53 - T56; + T4Y = T1r - T1s; + T1t = T1r + T1s; + Te2 = T4Z - T50; + T51 = T4Z + T50; + } + T1u = T1q + T1t; + Te1 = T1q - T1t; + Te4 = Te2 - Te3; + ThM = Te2 + Te3; + T52 = T4Y - T51; + T9c = T4Y + T51; + { + E T1c, T4B, T1b, Tee, T4J, T1d, T4C, T4D; + { + E T19, T1a, T4H, T4I; + T19 = cr[WS(rs, 9)]; + T1a = ci[WS(rs, 22)]; + T4H = ci[WS(rs, 38)]; + T4I = cr[WS(rs, 57)]; + T1c = ci[WS(rs, 6)]; + T4B = T19 - T1a; + T1b = T19 + T1a; + Tee = T4H - T4I; + T4J = T4H + T4I; + T1d = cr[WS(rs, 25)]; + T4C = ci[WS(rs, 54)]; + T4D = cr[WS(rs, 41)]; + } + { + E T1k, T4S, T4V, T1l, T4O, T4P; + { + E T1h, T1i, T4T, T4U; + T1h = cr[WS(rs, 5)]; + { + E T4G, T1e, Tef, T4E; + T4G = T1c - T1d; + T1e = T1c + T1d; + Tef = T4C - T4D; + T4E = T4C + T4D; + T5h = T4G - T4J; + T4K = T4G + T4J; + TdZ = T1b - T1e; + T1f = T1b + T1e; + ThJ = Tef + Tee; + Teg = Tee - Tef; + T5g = T4B - T4E; + T4F = T4B + T4E; + T1i = ci[WS(rs, 26)]; + } + T4T = ci[WS(rs, 42)]; + T4U = cr[WS(rs, 53)]; + T1k = cr[WS(rs, 21)]; + T4S = T1h - T1i; + T1j = T1h + T1i; + Te8 = T4T - T4U; + T4V = T4T + T4U; + T1l = ci[WS(rs, 10)]; + T4O = ci[WS(rs, 58)]; + T4P = cr[WS(rs, 37)]; + } + T98 = T4S + T4V; + T4W = T4S - T4V; + T4N = T1k - T1l; + T1m = T1k + T1l; + Te7 = T4O - T4P; + T4Q = T4O + T4P; + } + } + } + T1n = T1j + T1m; + Te6 = T1j - T1m; + { + E Te9, ThL, T4R, T99; + Te9 = Te7 - Te8; + ThL = Te7 + Te8; + T4R = T4N + T4Q; + T99 = T4Q - T4N; + { + E Tjr, ThK, Tjs, ThN; + { + E T1g, T1v, Tjp, Tjo; + Tjr = T18 - T1f; + T1g = T18 + T1f; + T1v = T1n + T1u; + Tjp = T1n - T1u; + ThK = ThI + ThJ; + Tjo = ThI - ThJ; + ThH = T1g - T1v; + T1w = T1g + T1v; + Tke = Tjp + Tjo; + Tjq = Tjo - Tjp; + Tjs = ThM - ThL; + ThN = ThL + ThM; + } + { + E Tg6, Te0, Tg9, Teh, Tej, Tei, Tga, Teb, Te5, Tea; + Tg6 = TdZ + TdY; + Te0 = TdY - TdZ; + Tkf = Tjr + Tjs; + Tjt = Tjr - Tjs; + TiK = ThK + ThN; + ThO = ThK - ThN; + Tg9 = Ted + Teg; + Teh = Ted - Teg; + Tej = Te4 - Te1; + Te5 = Te1 + Te4; + Tea = Te6 - Te9; + Tei = Te6 + Te9; + Tga = Tea + Te5; + Teb = Te5 - Tea; + { + E T9h, T4M, T78, T96, T5k, T5l, T75, T5j, T76, T59; + { + E T5i, Tg7, Tek, T4L, T4X, T58; + T9h = T4F + T4K; + T4L = T4F - T4K; + Tgb = FNMS(KP707106781, Tga, Tg9); + TgT = FMA(KP707106781, Tga, Tg9); + Tfc = FMA(KP707106781, Teb, Te0); + Tec = FNMS(KP707106781, Teb, Te0); + Tg7 = Tei + Tej; + Tek = Tei - Tej; + T4M = FMA(KP707106781, T4L, T4A); + T78 = FNMS(KP707106781, T4L, T4A); + Tg8 = FNMS(KP707106781, Tg7, Tg6); + TgU = FMA(KP707106781, Tg7, Tg6); + Tfd = FMA(KP707106781, Tek, Teh); + Tel = FNMS(KP707106781, Tek, Teh); + T5i = T5g + T5h; + T96 = T5h - T5g; + T5k = FNMS(KP414213562, T4R, T4W); + T4X = FMA(KP414213562, T4W, T4R); + T58 = FNMS(KP414213562, T57, T52); + T5l = FMA(KP414213562, T52, T57); + T75 = FNMS(KP707106781, T5i, T5f); + T5j = FMA(KP707106781, T5i, T5f); + T76 = T4X - T58; + T59 = T4X + T58; + } + { + E T79, T5m, T9a, T9d; + T77 = FNMS(KP923879532, T76, T75); + T83 = FMA(KP923879532, T76, T75); + T6i = FMA(KP923879532, T59, T4M); + T5a = FNMS(KP923879532, T59, T4M); + T79 = T5l - T5k; + T5m = T5k + T5l; + T97 = FNMS(KP707106781, T96, T95); + Tbk = FMA(KP707106781, T96, T95); + T7a = FNMS(KP923879532, T79, T78); + T82 = FMA(KP923879532, T79, T78); + T6j = FMA(KP923879532, T5m, T5j); + T5n = FNMS(KP923879532, T5m, T5j); + T9j = FNMS(KP414213562, T98, T99); + T9a = FMA(KP414213562, T99, T98); + T9d = FMA(KP414213562, T9c, T9b); + T9k = FNMS(KP414213562, T9b, T9c); + Tbh = FMA(KP707106781, T9h, T9g); + T9i = FNMS(KP707106781, T9h, T9g); + Tbi = T9a + T9d; + T9e = T9a - T9d; + } + } + } + } + } + } + { + E T9z, T4m, T1D, TdM, ThR, Tdx, T3H, T9o, T9r, T4e, T1Z, TdA, TdD, ThV, T49; + E T9s, T4o, T3R, Tdy, T1K, ThS, TdP, T4n, T3M, T1O, T3V, TdH, T3U, T1R, T3W; + E T9u, T43; + { + E T1V, T46, TdC, T45, T1Y, T47, T48, TdB; + { + E Tdw, T3D, T3G, Tdv, T4a, T4d; + { + E T4i, T1z, T3E, T4l, T1C, T3F; + { + E T4j, T4k, T1A, T1B; + { + E T1x, Tbl, T9l, T1y; + T1x = ci[0]; + Tbj = FNMS(KP923879532, Tbi, Tbh); + Tcc = FMA(KP923879532, Tbi, Tbh); + Tas = FMA(KP923879532, T9e, T97); + T9f = FNMS(KP923879532, T9e, T97); + Tbl = T9j - T9k; + T9l = T9j + T9k; + T1y = cr[WS(rs, 31)]; + T4j = ci[WS(rs, 48)]; + Tbm = FNMS(KP923879532, Tbl, Tbk); + Tcb = FMA(KP923879532, Tbl, Tbk); + Tar = FNMS(KP923879532, T9l, T9i); + T9m = FMA(KP923879532, T9l, T9i); + T4i = T1x - T1y; + T1z = T1x + T1y; + T4k = cr[WS(rs, 47)]; + } + T1A = cr[WS(rs, 15)]; + T1B = ci[WS(rs, 16)]; + T3E = ci[WS(rs, 32)]; + Tdw = T4j - T4k; + T4l = T4j + T4k; + T3D = T1A - T1B; + T1C = T1A + T1B; + T3F = cr[WS(rs, 63)]; + } + T9z = T4i + T4l; + T4m = T4i - T4l; + T1D = T1z + T1C; + TdM = T1z - T1C; + T3G = T3E + T3F; + Tdv = T3E - T3F; + } + { + E T4b, T4c, T1T, T1U, T1W, T1X; + T1T = ci[WS(rs, 4)]; + T1U = cr[WS(rs, 27)]; + ThR = Tdv + Tdw; + Tdx = Tdv - Tdw; + T3H = T3D - T3G; + T9o = T3D + T3G; + T4a = T1T - T1U; + T1V = T1T + T1U; + T4b = ci[WS(rs, 52)]; + T4c = cr[WS(rs, 43)]; + T1W = cr[WS(rs, 11)]; + T1X = ci[WS(rs, 20)]; + T46 = ci[WS(rs, 36)]; + TdC = T4b - T4c; + T4d = T4b + T4c; + T45 = T1W - T1X; + T1Y = T1W + T1X; + T47 = cr[WS(rs, 59)]; + } + T9r = T4a + T4d; + T4e = T4a - T4d; + } + T1Z = T1V + T1Y; + TdA = T1V - T1Y; + T48 = T46 + T47; + TdB = T46 - T47; + { + E T3I, T1G, T3J, TdN, T3Q, T3N, T1J, T3K, T3Z, T42; + { + E T3O, T3P, T1E, T1F, T1H, T1I; + T1E = cr[WS(rs, 7)]; + T1F = ci[WS(rs, 24)]; + TdD = TdB - TdC; + ThV = TdB + TdC; + T49 = T45 - T48; + T9s = T45 + T48; + T3I = T1E - T1F; + T1G = T1E + T1F; + T3O = ci[WS(rs, 40)]; + T3P = cr[WS(rs, 55)]; + T1H = ci[WS(rs, 8)]; + T1I = cr[WS(rs, 23)]; + T3J = ci[WS(rs, 56)]; + TdN = T3O - T3P; + T3Q = T3O + T3P; + T3N = T1H - T1I; + T1J = T1H + T1I; + T3K = cr[WS(rs, 39)]; + } + { + E T40, T41, T1P, T1Q; + { + E T1M, TdO, T3L, T1N; + T1M = cr[WS(rs, 3)]; + T4o = T3N - T3Q; + T3R = T3N + T3Q; + Tdy = T1G - T1J; + T1K = T1G + T1J; + TdO = T3J - T3K; + T3L = T3J + T3K; + T1N = ci[WS(rs, 28)]; + T40 = ci[WS(rs, 44)]; + ThS = TdO + TdN; + TdP = TdN - TdO; + T4n = T3I - T3L; + T3M = T3I + T3L; + T3Z = T1M - T1N; + T1O = T1M + T1N; + T41 = cr[WS(rs, 51)]; + } + T1P = cr[WS(rs, 19)]; + T1Q = ci[WS(rs, 12)]; + T3V = ci[WS(rs, 60)]; + TdH = T40 - T41; + T42 = T40 + T41; + T3U = T1P - T1Q; + T1R = T1P + T1Q; + T3W = cr[WS(rs, 35)]; + } + T9u = T3Z + T42; + T43 = T3Z - T42; + } + } + { + E T1S, TdF, T3X, TdG; + T1S = T1O + T1R; + TdF = T1O - T1R; + T3X = T3V + T3W; + TdG = T3V - T3W; + { + E TdI, T3Y, T9v, ThT, ThW; + { + E Tjk, Tji, ThU, Tjh, T1L, T20, Tjl; + Tjk = T1D - T1K; + T1L = T1D + T1K; + T20 = T1S + T1Z; + Tji = T1S - T1Z; + TdI = TdG - TdH; + ThU = TdG + TdH; + T3Y = T3U + T3X; + T9v = T3U - T3X; + ThQ = T1L - T20; + T21 = T1L + T20; + ThT = ThR + ThS; + Tjh = ThR - ThS; + Tjl = ThV - ThU; + ThW = ThU + ThV; + Tkb = Tji + Tjh; + Tjj = Tjh - Tji; + Tkc = Tjk + Tjl; + Tjm = Tjk - Tjl; + } + { + E TfZ, Tdz, Tg2, TdQ, TdS, TdR, Tg3, TdK, TdE, TdJ; + TfZ = Tdy + Tdx; + Tdz = Tdx - Tdy; + Tg2 = TdM + TdP; + TdQ = TdM - TdP; + TdS = TdD - TdA; + TdE = TdA + TdD; + TiL = ThT + ThW; + ThX = ThT - ThW; + TdJ = TdF - TdI; + TdR = TdF + TdI; + Tg3 = TdJ + TdE; + TdK = TdE - TdJ; + { + E T9A, T3T, T7f, T9p, T4r, T4s, T7c, T4q, T7d, T4g; + { + E T4p, Tg0, TdT, T3S, T44, T4f; + T9A = T3M + T3R; + T3S = T3M - T3R; + Tg4 = FNMS(KP707106781, Tg3, Tg2); + TgW = FMA(KP707106781, Tg3, Tg2); + Tf9 = FMA(KP707106781, TdK, Tdz); + TdL = FNMS(KP707106781, TdK, Tdz); + Tg0 = TdR + TdS; + TdT = TdR - TdS; + T3T = FMA(KP707106781, T3S, T3H); + T7f = FNMS(KP707106781, T3S, T3H); + Tg1 = FNMS(KP707106781, Tg0, TfZ); + TgX = FMA(KP707106781, Tg0, TfZ); + Tfa = FMA(KP707106781, TdT, TdQ); + TdU = FNMS(KP707106781, TdT, TdQ); + T4p = T4n + T4o; + T9p = T4n - T4o; + T4r = FNMS(KP414213562, T3Y, T43); + T44 = FMA(KP414213562, T43, T3Y); + T4f = FNMS(KP414213562, T4e, T49); + T4s = FMA(KP414213562, T49, T4e); + T7c = FNMS(KP707106781, T4p, T4m); + T4q = FMA(KP707106781, T4p, T4m); + T7d = T44 - T4f; + T4g = T44 + T4f; + } + { + E T7g, T4t, T9t, T9w; + T7e = FNMS(KP923879532, T7d, T7c); + T80 = FMA(KP923879532, T7d, T7c); + T6f = FMA(KP923879532, T4g, T3T); + T4h = FNMS(KP923879532, T4g, T3T); + T7g = T4s - T4r; + T4t = T4r + T4s; + T9q = FNMS(KP707106781, T9p, T9o); + Tbr = FMA(KP707106781, T9p, T9o); + T7h = FNMS(KP923879532, T7g, T7f); + T7Z = FMA(KP923879532, T7g, T7f); + T6g = FMA(KP923879532, T4t, T4q); + T4u = FNMS(KP923879532, T4t, T4q); + T9D = FNMS(KP414213562, T9r, T9s); + T9t = FMA(KP414213562, T9s, T9r); + T9w = FNMS(KP414213562, T9v, T9u); + T9C = FMA(KP414213562, T9u, T9v); + Tbo = FMA(KP707106781, T9A, T9z); + T9B = FNMS(KP707106781, T9A, T9z); + Tbp = T9w + T9t; + T9x = T9t - T9w; + } + } + } + } + } + } + } + } + { + E Tbq, Tcf, Tav, T9y, Tbt, Tce, Tau, T9F, T6p, T6d, T6c, T6q, Thf, The, Thd; + { + E Tk9, Tkm, TjP, TjO, TjN; + { + E Tj0, TiS, TiU, Tj3, Tj1, Tj4, TiY, Tj2; + { + E TiQ, TiW, TiV, TiR, TiD, TiG, TiN, TiF, TiO; + { + E T11, T22, TiJ, TiE, TiM, Tbs, T9E; + TiQ = Tv - T10; + T11 = Tv + T10; + Tbq = FNMS(KP923879532, Tbp, Tbo); + Tcf = FMA(KP923879532, Tbp, Tbo); + Tav = FMA(KP923879532, T9x, T9q); + T9y = FNMS(KP923879532, T9x, T9q); + Tbs = T9C + T9D; + T9E = T9C - T9D; + T22 = T1w + T21; + TiW = T1w - T21; + TiV = TiH - TiI; + TiJ = TiH + TiI; + Tbt = FNMS(KP923879532, Tbs, Tbr); + Tce = FMA(KP923879532, Tbs, Tbr); + Tau = FMA(KP923879532, T9E, T9B); + T9F = FNMS(KP923879532, T9E, T9B); + TiE = T11 - T22; + TiR = TiL - TiK; + TiM = TiK + TiL; + cr[0] = T11 + T22; + TiD = W[62]; + TiG = W[63]; + ci[0] = TiJ + TiM; + TiN = TiJ - TiM; + TiF = TiD * TiE; + TiO = TiG * TiE; + } + cr[WS(rs, 32)] = FNMS(TiG, TiN, TiF); + ci[WS(rs, 32)] = FMA(TiD, TiN, TiO); + Tj0 = TiQ + TiR; + TiS = TiQ - TiR; + { + E TiP, TiX, TiT, TiZ; + TiP = W[94]; + TiU = W[95]; + TiZ = W[30]; + Tj3 = TiW + TiV; + TiX = TiV - TiW; + TiT = TiP * TiS; + Tj1 = TiZ * Tj0; + Tj4 = TiZ * Tj3; + TiY = TiP * TiX; + cr[WS(rs, 48)] = FNMS(TiU, TiX, TiT); + Tj2 = W[31]; + } + } + { + E Tii, Til, Tik, Tih, Tim; + { + E Tib, Tit, Tio, ThG, ThP, ThY, Tie, Tip, Tic, Tid; + Tib = Ti3 + Tia; + Tit = Tia - Ti3; + ci[WS(rs, 48)] = FMA(TiU, TiS, TiY); + Tio = Thy - ThF; + ThG = Thy + ThF; + ci[WS(rs, 16)] = FMA(Tj2, Tj0, Tj4); + cr[WS(rs, 16)] = FNMS(Tj2, Tj3, Tj1); + ThP = ThH - ThO; + Tic = ThH + ThO; + Tid = ThX - ThQ; + ThY = ThQ + ThX; + Tie = Tic + Tid; + Tip = Tid - Tic; + { + E Tiy, TiB, Ti0, Tiz, TiC, TiA; + { + E Tin, Tis, Tiq, ThZ, Tiu, Tir, Tiw, Tix, Tiv; + Tin = W[110]; + Tis = W[111]; + Tiy = FMA(KP707106781, Tip, Tio); + Tiq = FNMS(KP707106781, Tip, Tio); + ThZ = ThP + ThY; + Tiu = ThP - ThY; + Tir = Tin * Tiq; + Tix = W[46]; + TiB = FMA(KP707106781, Tiu, Tit); + Tiv = FNMS(KP707106781, Tiu, Tit); + Ti0 = FNMS(KP707106781, ThZ, ThG); + Tii = FMA(KP707106781, ThZ, ThG); + cr[WS(rs, 56)] = FNMS(Tis, Tiv, Tir); + Tiw = Tin * Tiv; + Tiz = Tix * Tiy; + TiC = Tix * TiB; + TiA = W[47]; + ci[WS(rs, 56)] = FMA(Tis, Tiq, Tiw); + } + { + E Tif, Ti2, Thx, Tig, Ti1; + Til = FMA(KP707106781, Tie, Tib); + Tif = FNMS(KP707106781, Tie, Tib); + Ti2 = W[79]; + ci[WS(rs, 24)] = FMA(TiA, Tiy, TiC); + cr[WS(rs, 24)] = FNMS(TiA, TiB, Tiz); + Thx = W[78]; + Tig = Ti2 * Ti0; + Tik = W[15]; + Ti1 = Thx * Ti0; + ci[WS(rs, 40)] = FMA(Thx, Tif, Tig); + Tih = W[14]; + Tim = Tik * Tii; + cr[WS(rs, 40)] = FNMS(Ti2, Tif, Ti1); + } + } + } + { + E TjF, TjI, TjU, Tk2, TjZ, Tk5, Tjw, TjM; + { + E TjX, TjG, Tju, Tjg, TjS, Tjn, TjH, Tjf, TjE, Tij, TjT, Tjv, TjY; + TjE = TjC - TjD; + Tk9 = TjC + TjD; + Tij = Tih * Tii; + ci[WS(rs, 8)] = FMA(Tih, Til, Tim); + Tkm = Tje + Tjb; + Tjf = Tjb - Tje; + TjX = FNMS(KP707106781, TjE, TjB); + TjF = FMA(KP707106781, TjE, TjB); + cr[WS(rs, 8)] = FNMS(Tik, Til, Tij); + TjG = FMA(KP414213562, Tjq, Tjt); + Tju = FNMS(KP414213562, Tjt, Tjq); + Tjg = FMA(KP707106781, Tjf, Tj8); + TjS = FNMS(KP707106781, Tjf, Tj8); + Tjn = FMA(KP414213562, Tjm, Tjj); + TjH = FNMS(KP414213562, Tjj, Tjm); + TjI = TjG - TjH; + TjT = TjG + TjH; + Tjv = Tjn - Tju; + TjY = Tju + Tjn; + TjU = FNMS(KP923879532, TjT, TjS); + Tk2 = FMA(KP923879532, TjT, TjS); + TjZ = FNMS(KP923879532, TjY, TjX); + Tk5 = FMA(KP923879532, TjY, TjX); + Tjw = FNMS(KP923879532, Tjv, Tjg); + TjM = FMA(KP923879532, Tjv, Tjg); + } + { + E Tk4, Tk3, TjR, TjW, TjJ, Tjy, Tj5; + TjR = W[54]; + TjW = W[55]; + { + E Tk1, Tk0, TjV, Tk6; + Tk1 = W[118]; + Tk4 = W[119]; + Tk0 = TjR * TjZ; + TjV = TjR * TjU; + Tk6 = Tk1 * Tk5; + Tk3 = Tk1 * Tk2; + ci[WS(rs, 28)] = FMA(TjW, TjU, Tk0); + cr[WS(rs, 28)] = FNMS(TjW, TjZ, TjV); + ci[WS(rs, 60)] = FMA(Tk4, Tk2, Tk6); + } + cr[WS(rs, 60)] = FNMS(Tk4, Tk5, Tk3); + TjP = FMA(KP923879532, TjI, TjF); + TjJ = FNMS(KP923879532, TjI, TjF); + Tjy = W[87]; + Tj5 = W[86]; + { + E TjL, TjQ, TjK, Tjx; + TjO = W[23]; + TjK = Tjy * Tjw; + Tjx = Tj5 * Tjw; + TjL = W[22]; + TjQ = TjO * TjM; + ci[WS(rs, 44)] = FMA(Tj5, TjJ, TjK); + cr[WS(rs, 44)] = FNMS(Tjy, TjJ, Tjx); + TjN = TjL * TjM; + ci[WS(rs, 12)] = FMA(TjL, TjP, TjQ); + } + } + } + } + } + { + E T5T, T5S, T5R, Tkx, Tkw, Tkv; + { + E Tkn, Tkq, TkC, TkK, TkH, TkN, Tki, Tku; + { + E Tkg, Tko, TkF, Tka, TkA, Tkd, Tkp, TkB, Tkh, TkG; + cr[WS(rs, 12)] = FNMS(TjO, TjP, TjN); + Tkg = FMA(KP414213562, Tkf, Tke); + Tko = FNMS(KP414213562, Tke, Tkf); + TkF = FMA(KP707106781, Tkm, Tkl); + Tkn = FNMS(KP707106781, Tkm, Tkl); + Tka = FNMS(KP707106781, Tk9, Tk8); + TkA = FMA(KP707106781, Tk9, Tk8); + Tkd = FNMS(KP414213562, Tkc, Tkb); + Tkp = FMA(KP414213562, Tkb, Tkc); + Tkq = Tko - Tkp; + TkB = Tko + Tkp; + Tkh = Tkd - Tkg; + TkG = Tkg + Tkd; + TkC = FNMS(KP923879532, TkB, TkA); + TkK = FMA(KP923879532, TkB, TkA); + TkH = FNMS(KP923879532, TkG, TkF); + TkN = FMA(KP923879532, TkG, TkF); + Tki = FNMS(KP923879532, Tkh, Tka); + Tku = FMA(KP923879532, Tkh, Tka); + } + { + E TkM, TkL, Tkz, TkE, Tkr, Tkk, Tk7; + Tkz = W[70]; + TkE = W[71]; + { + E TkJ, TkI, TkD, TkO; + TkJ = W[6]; + TkM = W[7]; + TkI = Tkz * TkH; + TkD = Tkz * TkC; + TkO = TkJ * TkN; + TkL = TkJ * TkK; + ci[WS(rs, 36)] = FMA(TkE, TkC, TkI); + cr[WS(rs, 36)] = FNMS(TkE, TkH, TkD); + ci[WS(rs, 4)] = FMA(TkM, TkK, TkO); + } + cr[WS(rs, 4)] = FNMS(TkM, TkN, TkL); + Tkx = FMA(KP923879532, Tkq, Tkn); + Tkr = FNMS(KP923879532, Tkq, Tkn); + Tkk = W[103]; + Tk7 = W[102]; + { + E Tkt, Tky, Tks, Tkj; + Tkw = W[39]; + Tks = Tkk * Tki; + Tkj = Tk7 * Tki; + Tkt = W[38]; + Tky = Tkw * Tku; + ci[WS(rs, 52)] = FMA(Tk7, Tkr, Tks); + cr[WS(rs, 52)] = FNMS(Tkk, Tkr, Tkj); + Tkv = Tkt * Tku; + ci[WS(rs, 20)] = FMA(Tkt, Tkx, Tky); + } + } + } + { + E T5J, T5M, T66, T5Y, T69, T63, T5Q, T5q; + { + E T5o, T4v, T61, T5X, T3C, T5W, T62, T5p; + { + E T5K, T5L, T5F, T5I, T2I, T3B; + T5F = FNMS(KP923879532, T5E, T5B); + T6p = FMA(KP923879532, T5E, T5B); + T6d = T5G + T5H; + T5I = T5G - T5H; + cr[WS(rs, 20)] = FNMS(Tkw, Tkx, Tkv); + T5o = FNMS(KP820678790, T5n, T5a); + T5K = FMA(KP820678790, T5a, T5n); + T5L = FNMS(KP820678790, T4h, T4u); + T4v = FMA(KP820678790, T4u, T4h); + T5J = FMA(KP980785280, T5I, T5F); + T61 = FNMS(KP980785280, T5I, T5F); + T2I = FNMS(KP923879532, T2H, T2k); + T6c = FMA(KP923879532, T2H, T2k); + T6q = T3A + T39; + T3B = T39 - T3A; + T5X = T5K + T5L; + T5M = T5K - T5L; + T3C = FMA(KP980785280, T3B, T2I); + T5W = FNMS(KP980785280, T3B, T2I); + } + T62 = T5o + T4v; + T5p = T4v - T5o; + T66 = FMA(KP773010453, T5X, T5W); + T5Y = FNMS(KP773010453, T5X, T5W); + T69 = FMA(KP773010453, T62, T61); + T63 = FNMS(KP773010453, T62, T61); + T5Q = FMA(KP773010453, T5p, T3C); + T5q = FNMS(KP773010453, T5p, T3C); + } + { + E T68, T67, T5V, T60, T5N, T5s, T23; + T5V = W[48]; + T60 = W[49]; + { + E T65, T64, T5Z, T6a; + T65 = W[112]; + T68 = W[113]; + T64 = T5V * T63; + T5Z = T5V * T5Y; + T6a = T65 * T69; + T67 = T65 * T66; + ci[WS(rs, 25)] = FMA(T60, T5Y, T64); + cr[WS(rs, 25)] = FNMS(T60, T63, T5Z); + ci[WS(rs, 57)] = FMA(T68, T66, T6a); + } + cr[WS(rs, 57)] = FNMS(T68, T69, T67); + T5T = FMA(KP773010453, T5M, T5J); + T5N = FNMS(KP773010453, T5M, T5J); + T5s = W[81]; + T23 = W[80]; + { + E T5P, T5U, T5O, T5r; + T5S = W[17]; + T5O = T5s * T5q; + T5r = T23 * T5q; + T5P = W[16]; + T5U = T5S * T5Q; + ci[WS(rs, 41)] = FMA(T23, T5N, T5O); + cr[WS(rs, 41)] = FNMS(T5s, T5N, T5r); + T5R = T5P * T5Q; + ci[WS(rs, 9)] = FMA(T5P, T5T, T5U); + } + } + } + { + E Th3, TgR, TgQ, Th4, TgN, TgM, TgL; + { + E TgG, TgF, Tge, Tgu, TgK, TgC, Tgx, Tgr; + { + E Tgp, Tgo, Tgd, Tgn, TfY, TgA, TgB, Tgq; + { + E Tgj, Tgm, Tg5, Tgc, TfQ, TfX; + Tg5 = FMA(KP668178637, Tg4, Tg1); + Tgp = FNMS(KP668178637, Tg1, Tg4); + Tgo = FMA(KP668178637, Tg8, Tgb); + Tgc = FNMS(KP668178637, Tgb, Tg8); + cr[WS(rs, 9)] = FNMS(T5S, T5T, T5R); + Th3 = FMA(KP707106781, Tgi, Tgh); + Tgj = FNMS(KP707106781, Tgi, Tgh); + Tgm = Tgk - Tgl; + TgR = Tgk + Tgl; + TgG = Tgc + Tg5; + Tgd = Tg5 - Tgc; + TfQ = FNMS(KP707106781, TfP, TfO); + TgQ = FMA(KP707106781, TfP, TfO); + Th4 = TfW + TfT; + TfX = TfT - TfW; + Tgn = FMA(KP923879532, Tgm, Tgj); + TgF = FNMS(KP923879532, Tgm, Tgj); + TfY = FMA(KP923879532, TfX, TfQ); + TgA = FNMS(KP923879532, TfX, TfQ); + } + TgB = Tgo + Tgp; + Tgq = Tgo - Tgp; + Tge = FNMS(KP831469612, Tgd, TfY); + Tgu = FMA(KP831469612, Tgd, TfY); + TgK = FMA(KP831469612, TgB, TgA); + TgC = FNMS(KP831469612, TgB, TgA); + Tgx = FMA(KP831469612, Tgq, Tgn); + Tgr = FNMS(KP831469612, Tgq, Tgn); + } + { + E Tgw, Tgv, TfN, Tgg, TgH, TgE, Tgz; + TfN = W[82]; + Tgg = W[83]; + { + E Tgt, Tgs, Tgf, Tgy; + Tgt = W[18]; + Tgw = W[19]; + Tgs = TfN * Tgr; + Tgf = TfN * Tge; + Tgy = Tgt * Tgx; + Tgv = Tgt * Tgu; + ci[WS(rs, 42)] = FMA(Tgg, Tge, Tgs); + cr[WS(rs, 42)] = FNMS(Tgg, Tgr, Tgf); + ci[WS(rs, 10)] = FMA(Tgw, Tgu, Tgy); + } + cr[WS(rs, 10)] = FNMS(Tgw, Tgx, Tgv); + TgN = FMA(KP831469612, TgG, TgF); + TgH = FNMS(KP831469612, TgG, TgF); + TgE = W[51]; + Tgz = W[50]; + { + E TgJ, TgO, TgI, TgD; + TgM = W[115]; + TgI = TgE * TgC; + TgD = Tgz * TgC; + TgJ = W[114]; + TgO = TgM * TgK; + ci[WS(rs, 26)] = FMA(Tgz, TgH, TgI); + cr[WS(rs, 26)] = FNMS(TgE, TgH, TgD); + TgL = TgJ * TgK; + ci[WS(rs, 58)] = FMA(TgJ, TgN, TgO); + } + } + } + { + E Th5, Th8, Ths, Thk, Thv, Thp, Thc, Th0; + { + E TgV, TgY, Thn, Thj, TgS, Thi, Th6, Th7, Tho, TgZ; + cr[WS(rs, 58)] = FNMS(TgM, TgN, TgL); + TgV = FNMS(KP198912367, TgU, TgT); + Th6 = FMA(KP198912367, TgT, TgU); + Th7 = FNMS(KP198912367, TgW, TgX); + TgY = FMA(KP198912367, TgX, TgW); + Th5 = FMA(KP923879532, Th4, Th3); + Thn = FNMS(KP923879532, Th4, Th3); + Thj = Th7 - Th6; + Th8 = Th6 + Th7; + TgS = FMA(KP923879532, TgR, TgQ); + Thi = FNMS(KP923879532, TgR, TgQ); + Tho = TgV - TgY; + TgZ = TgV + TgY; + Ths = FMA(KP980785280, Thj, Thi); + Thk = FNMS(KP980785280, Thj, Thi); + Thv = FMA(KP980785280, Tho, Thn); + Thp = FNMS(KP980785280, Tho, Thn); + Thc = FMA(KP980785280, TgZ, TgS); + Th0 = FNMS(KP980785280, TgZ, TgS); + } + { + E Thu, Tht, Thh, Thm, Th9, Th2, TgP; + Thh = W[98]; + Thm = W[99]; + { + E Thr, Thq, Thl, Thw; + Thr = W[34]; + Thu = W[35]; + Thq = Thh * Thp; + Thl = Thh * Thk; + Thw = Thr * Thv; + Tht = Thr * Ths; + ci[WS(rs, 50)] = FMA(Thm, Thk, Thq); + cr[WS(rs, 50)] = FNMS(Thm, Thp, Thl); + ci[WS(rs, 18)] = FMA(Thu, Ths, Thw); + } + cr[WS(rs, 18)] = FNMS(Thu, Thv, Tht); + Thf = FMA(KP980785280, Th8, Th5); + Th9 = FNMS(KP980785280, Th8, Th5); + Th2 = W[67]; + TgP = W[66]; + { + E Thb, Thg, Tha, Th1; + The = W[3]; + Tha = Th2 * Th0; + Th1 = TgP * Th0; + Thb = W[2]; + Thg = The * Thc; + ci[WS(rs, 34)] = FMA(TgP, Th9, Tha); + cr[WS(rs, 34)] = FNMS(Th2, Th9, Th1); + Thd = Thb * Thc; + ci[WS(rs, 2)] = FMA(Thb, Thf, Thg); + } + } + } + } + } + } + { + E Tcl, Tc9, Tc8, Tcm, T9R, T93, T8O, T9U, Tez, Tdt, Td6, TeC, Tfv, Tfu, Tft; + E T8B, T8A, T8z; + { + E TbP, TbO, TbN, T6B, T6A, T6z, TaN, TaM, TaL; + { + E T6r, T6u, T6O, T6G, T6R, T6L, T6y, T6m; + { + E T6k, T6h, T6J, T6F, T6e, T6E, T6s, T6t, T6K, T6l; + cr[WS(rs, 2)] = FNMS(The, Thf, Thd); + T6k = FMA(KP098491403, T6j, T6i); + T6s = FNMS(KP098491403, T6i, T6j); + T6t = FMA(KP098491403, T6f, T6g); + T6h = FNMS(KP098491403, T6g, T6f); + T6r = FNMS(KP980785280, T6q, T6p); + T6J = FMA(KP980785280, T6q, T6p); + T6F = T6s + T6t; + T6u = T6s - T6t; + T6e = FNMS(KP980785280, T6d, T6c); + T6E = FMA(KP980785280, T6d, T6c); + T6K = T6k + T6h; + T6l = T6h - T6k; + T6O = FMA(KP995184726, T6F, T6E); + T6G = FNMS(KP995184726, T6F, T6E); + T6R = FMA(KP995184726, T6K, T6J); + T6L = FNMS(KP995184726, T6K, T6J); + T6y = FMA(KP995184726, T6l, T6e); + T6m = FNMS(KP995184726, T6l, T6e); + } + { + E T6Q, T6P, T6D, T6I, T6v, T6o, T6b; + T6D = W[64]; + T6I = W[65]; + { + E T6N, T6M, T6H, T6S; + T6N = W[0]; + T6Q = W[1]; + T6M = T6D * T6L; + T6H = T6D * T6G; + T6S = T6N * T6R; + T6P = T6N * T6O; + ci[WS(rs, 33)] = FMA(T6I, T6G, T6M); + cr[WS(rs, 33)] = FNMS(T6I, T6L, T6H); + ci[WS(rs, 1)] = FMA(T6Q, T6O, T6S); + } + cr[WS(rs, 1)] = FNMS(T6Q, T6R, T6P); + T6B = FMA(KP995184726, T6u, T6r); + T6v = FNMS(KP995184726, T6u, T6r); + T6o = W[97]; + T6b = W[96]; + { + E T6x, T6C, T6w, T6n; + T6A = W[33]; + T6w = T6o * T6m; + T6n = T6b * T6m; + T6x = W[32]; + T6C = T6A * T6y; + ci[WS(rs, 49)] = FMA(T6b, T6v, T6w); + cr[WS(rs, 49)] = FNMS(T6o, T6v, T6n); + T6z = T6x * T6y; + ci[WS(rs, 17)] = FMA(T6x, T6B, T6C); + } + } + } + { + E TbF, TbI, Tc2, TbU, Tc5, TbZ, TbM, Tbw; + { + E Tbn, Tbu, TbX, TbT, Tbg, TbS, TbY, Tbv; + { + E TbG, TbH, TbB, TbE, Tb8, Tbf; + TbB = FMA(KP923879532, TbA, Tbz); + Tcl = FNMS(KP923879532, TbA, Tbz); + Tc9 = TbC + TbD; + TbE = TbC - TbD; + cr[WS(rs, 17)] = FNMS(T6A, T6B, T6z); + Tbn = FNMS(KP820678790, Tbm, Tbj); + TbG = FMA(KP820678790, Tbj, Tbm); + TbH = FMA(KP820678790, Tbq, Tbt); + Tbu = FNMS(KP820678790, Tbt, Tbq); + TbF = FMA(KP980785280, TbE, TbB); + TbX = FNMS(KP980785280, TbE, TbB); + Tb8 = FNMS(KP923879532, Tb7, Tb6); + Tc8 = FMA(KP923879532, Tb7, Tb6); + Tcm = Tbe - Tbb; + Tbf = Tbb + Tbe; + TbT = TbG + TbH; + TbI = TbG - TbH; + Tbg = FNMS(KP980785280, Tbf, Tb8); + TbS = FMA(KP980785280, Tbf, Tb8); + } + TbY = Tbn - Tbu; + Tbv = Tbn + Tbu; + Tc2 = FMA(KP773010453, TbT, TbS); + TbU = FNMS(KP773010453, TbT, TbS); + Tc5 = FNMS(KP773010453, TbY, TbX); + TbZ = FMA(KP773010453, TbY, TbX); + TbM = FMA(KP773010453, Tbv, Tbg); + Tbw = FNMS(KP773010453, Tbv, Tbg); + } + { + E Tc4, Tc3, TbR, TbW, TbJ, Tby, Tb5; + TbR = W[44]; + TbW = W[45]; + { + E Tc1, Tc0, TbV, Tc6; + Tc1 = W[108]; + Tc4 = W[109]; + Tc0 = TbR * TbZ; + TbV = TbR * TbU; + Tc6 = Tc1 * Tc5; + Tc3 = Tc1 * Tc2; + ci[WS(rs, 23)] = FMA(TbW, TbU, Tc0); + cr[WS(rs, 23)] = FNMS(TbW, TbZ, TbV); + ci[WS(rs, 55)] = FMA(Tc4, Tc2, Tc6); + } + cr[WS(rs, 55)] = FNMS(Tc4, Tc5, Tc3); + TbP = FMA(KP773010453, TbI, TbF); + TbJ = FNMS(KP773010453, TbI, TbF); + Tby = W[77]; + Tb5 = W[76]; + { + E TbL, TbQ, TbK, Tbx; + TbO = W[13]; + TbK = Tby * Tbw; + Tbx = Tb5 * Tbw; + TbL = W[12]; + TbQ = TbO * TbM; + ci[WS(rs, 39)] = FMA(Tb5, TbJ, TbK); + cr[WS(rs, 39)] = FNMS(Tby, TbJ, Tbx); + TbN = TbL * TbM; + ci[WS(rs, 7)] = FMA(TbL, TbP, TbQ); + } + } + } + { + E TaD, TaG, Tb0, TaS, Tb3, TaX, TaK, Tay; + { + E Tat, Taw, TaV, TaR, Taq, TaQ, TaW, Tax; + { + E TaE, TaF, TaB, TaC, Tao, Tap; + TaB = FMA(KP923879532, T9Q, T9N); + T9R = FNMS(KP923879532, T9Q, T9N); + T93 = T8V + T92; + TaC = T8V - T92; + cr[WS(rs, 7)] = FNMS(TbO, TbP, TbN); + Tat = FNMS(KP303346683, Tas, Tar); + TaE = FMA(KP303346683, Tar, Tas); + TaF = FMA(KP303346683, Tau, Tav); + Taw = FNMS(KP303346683, Tav, Tau); + TaD = FMA(KP831469612, TaC, TaB); + TaV = FNMS(KP831469612, TaC, TaB); + Tao = FNMS(KP923879532, T8N, T8G); + T8O = FMA(KP923879532, T8N, T8G); + T9U = T9S - T9T; + Tap = T9S + T9T; + TaR = TaE + TaF; + TaG = TaE - TaF; + Taq = FMA(KP831469612, Tap, Tao); + TaQ = FNMS(KP831469612, Tap, Tao); + } + TaW = Tat - Taw; + Tax = Tat + Taw; + Tb0 = FMA(KP956940335, TaR, TaQ); + TaS = FNMS(KP956940335, TaR, TaQ); + Tb3 = FNMS(KP956940335, TaW, TaV); + TaX = FMA(KP956940335, TaW, TaV); + TaK = FMA(KP956940335, Tax, Taq); + Tay = FNMS(KP956940335, Tax, Taq); + } + { + E Tb2, Tb1, TaP, TaU, TaH, TaA, Tan; + TaP = W[36]; + TaU = W[37]; + { + E TaZ, TaY, TaT, Tb4; + TaZ = W[100]; + Tb2 = W[101]; + TaY = TaP * TaX; + TaT = TaP * TaS; + Tb4 = TaZ * Tb3; + Tb1 = TaZ * Tb0; + ci[WS(rs, 19)] = FMA(TaU, TaS, TaY); + cr[WS(rs, 19)] = FNMS(TaU, TaX, TaT); + ci[WS(rs, 51)] = FMA(Tb2, Tb0, Tb4); + } + cr[WS(rs, 51)] = FNMS(Tb2, Tb3, Tb1); + TaN = FMA(KP956940335, TaG, TaD); + TaH = FNMS(KP956940335, TaG, TaD); + TaA = W[69]; + Tan = W[68]; + { + E TaJ, TaO, TaI, Taz; + TaM = W[5]; + TaI = TaA * Tay; + Taz = Tan * Tay; + TaJ = W[4]; + TaO = TaM * TaK; + ci[WS(rs, 35)] = FMA(Tan, TaH, TaI); + cr[WS(rs, 35)] = FNMS(TaA, TaH, Taz); + TaL = TaJ * TaK; + ci[WS(rs, 3)] = FMA(TaJ, TaN, TaO); + } + } + } + { + E Tfl, Tfo, TfI, TfA, TfL, TfF, Tfs, Tfg; + { + E Tfe, Tfb, TfD, Tfz, Tf8, Tfy, TfE, Tff; + { + E Tfm, Tfn, Tfj, Tfk, Tf6, Tf7; + Tfj = FNMS(KP707106781, Tey, Tev); + Tez = FMA(KP707106781, Tey, Tev); + Tdt = Tdh - Tds; + Tfk = Tds + Tdh; + cr[WS(rs, 3)] = FNMS(TaM, TaN, TaL); + Tfe = FNMS(KP198912367, Tfd, Tfc); + Tfm = FMA(KP198912367, Tfc, Tfd); + Tfn = FNMS(KP198912367, Tf9, Tfa); + Tfb = FMA(KP198912367, Tfa, Tf9); + Tfl = FNMS(KP923879532, Tfk, Tfj); + TfD = FMA(KP923879532, Tfk, Tfj); + Tf6 = FNMS(KP707106781, Td5, TcU); + Td6 = FMA(KP707106781, Td5, TcU); + TeC = TeA - TeB; + Tf7 = TeA + TeB; + Tfz = Tfm + Tfn; + Tfo = Tfm - Tfn; + Tf8 = FNMS(KP923879532, Tf7, Tf6); + Tfy = FMA(KP923879532, Tf7, Tf6); + } + TfE = Tfe + Tfb; + Tff = Tfb - Tfe; + TfI = FMA(KP980785280, Tfz, Tfy); + TfA = FNMS(KP980785280, Tfz, Tfy); + TfL = FMA(KP980785280, TfE, TfD); + TfF = FNMS(KP980785280, TfE, TfD); + Tfs = FMA(KP980785280, Tff, Tf8); + Tfg = FNMS(KP980785280, Tff, Tf8); + } + { + E TfK, TfJ, Tfx, TfC, Tfp, Tfi, Tf5; + Tfx = W[58]; + TfC = W[59]; + { + E TfH, TfG, TfB, TfM; + TfH = W[122]; + TfK = W[123]; + TfG = Tfx * TfF; + TfB = Tfx * TfA; + TfM = TfH * TfL; + TfJ = TfH * TfI; + ci[WS(rs, 30)] = FMA(TfC, TfA, TfG); + cr[WS(rs, 30)] = FNMS(TfC, TfF, TfB); + ci[WS(rs, 62)] = FMA(TfK, TfI, TfM); + } + cr[WS(rs, 62)] = FNMS(TfK, TfL, TfJ); + Tfv = FMA(KP980785280, Tfo, Tfl); + Tfp = FNMS(KP980785280, Tfo, Tfl); + Tfi = W[91]; + Tf5 = W[90]; + { + E Tfr, Tfw, Tfq, Tfh; + Tfu = W[27]; + Tfq = Tfi * Tfg; + Tfh = Tf5 * Tfg; + Tfr = W[26]; + Tfw = Tfu * Tfs; + ci[WS(rs, 46)] = FMA(Tf5, Tfp, Tfq); + cr[WS(rs, 46)] = FNMS(Tfi, Tfp, Tfh); + Tft = Tfr * Tfs; + ci[WS(rs, 14)] = FMA(Tfr, Tfv, Tfw); + } + } + } + } + { + E T89, T7X, T7W, T8a, T7D, T7C, T7B; + { + E T7t, T7w, T7Q, T7I, T7T, T7N, T7A, T7k; + { + E T7b, T7i, T7L, T7H, T74, T7G, T7M, T7j; + { + E T7u, T7v, T7p, T7s, T6W, T73; + T7p = FMA(KP923879532, T7o, T7n); + T89 = FNMS(KP923879532, T7o, T7n); + T7X = T7q + T7r; + T7s = T7q - T7r; + cr[WS(rs, 14)] = FNMS(Tfu, Tfv, Tft); + T7b = FNMS(KP534511135, T7a, T77); + T7u = FMA(KP534511135, T77, T7a); + T7v = FNMS(KP534511135, T7e, T7h); + T7i = FMA(KP534511135, T7h, T7e); + T7t = FMA(KP831469612, T7s, T7p); + T7L = FNMS(KP831469612, T7s, T7p); + T6W = FMA(KP923879532, T6V, T6U); + T7W = FNMS(KP923879532, T6V, T6U); + T8a = T72 + T6Z; + T73 = T6Z - T72; + T7H = T7v - T7u; + T7w = T7u + T7v; + T74 = FMA(KP831469612, T73, T6W); + T7G = FNMS(KP831469612, T73, T6W); + } + T7M = T7b - T7i; + T7j = T7b + T7i; + T7Q = FMA(KP881921264, T7H, T7G); + T7I = FNMS(KP881921264, T7H, T7G); + T7T = FMA(KP881921264, T7M, T7L); + T7N = FNMS(KP881921264, T7M, T7L); + T7A = FMA(KP881921264, T7j, T74); + T7k = FNMS(KP881921264, T7j, T74); + } + { + E T7S, T7R, T7F, T7K, T7x, T7m, T6T; + T7F = W[104]; + T7K = W[105]; + { + E T7P, T7O, T7J, T7U; + T7P = W[40]; + T7S = W[41]; + T7O = T7F * T7N; + T7J = T7F * T7I; + T7U = T7P * T7T; + T7R = T7P * T7Q; + ci[WS(rs, 53)] = FMA(T7K, T7I, T7O); + cr[WS(rs, 53)] = FNMS(T7K, T7N, T7J); + ci[WS(rs, 21)] = FMA(T7S, T7Q, T7U); + } + cr[WS(rs, 21)] = FNMS(T7S, T7T, T7R); + T7D = FMA(KP881921264, T7w, T7t); + T7x = FNMS(KP881921264, T7w, T7t); + T7m = W[73]; + T6T = W[72]; + { + E T7z, T7E, T7y, T7l; + T7C = W[9]; + T7y = T7m * T7k; + T7l = T6T * T7k; + T7z = W[8]; + T7E = T7C * T7A; + ci[WS(rs, 37)] = FMA(T6T, T7x, T7y); + cr[WS(rs, 37)] = FNMS(T7m, T7x, T7l); + T7B = T7z * T7A; + ci[WS(rs, 5)] = FMA(T7z, T7D, T7E); + } + } + } + { + E T8u, T8t, T86, T8i, T8y, T8q, T8l, T8f; + { + E T8d, T8c, T85, T8b, T7Y, T8o, T81, T84, T8p, T8e; + T81 = FMA(KP303346683, T80, T7Z); + T8d = FNMS(KP303346683, T7Z, T80); + T8c = FMA(KP303346683, T82, T83); + T84 = FNMS(KP303346683, T83, T82); + cr[WS(rs, 5)] = FNMS(T7C, T7D, T7B); + T8u = T84 + T81; + T85 = T81 - T84; + T8b = FNMS(KP831469612, T8a, T89); + T8t = FMA(KP831469612, T8a, T89); + T7Y = FNMS(KP831469612, T7X, T7W); + T8o = FMA(KP831469612, T7X, T7W); + T8p = T8c + T8d; + T8e = T8c - T8d; + T86 = FNMS(KP956940335, T85, T7Y); + T8i = FMA(KP956940335, T85, T7Y); + T8y = FMA(KP956940335, T8p, T8o); + T8q = FNMS(KP956940335, T8p, T8o); + T8l = FMA(KP956940335, T8e, T8b); + T8f = FNMS(KP956940335, T8e, T8b); + } + { + E T8k, T8j, T7V, T88, T8v, T8s, T8n; + T7V = W[88]; + T88 = W[89]; + { + E T8h, T8g, T87, T8m; + T8h = W[24]; + T8k = W[25]; + T8g = T7V * T8f; + T87 = T7V * T86; + T8m = T8h * T8l; + T8j = T8h * T8i; + ci[WS(rs, 45)] = FMA(T88, T86, T8g); + cr[WS(rs, 45)] = FNMS(T88, T8f, T87); + ci[WS(rs, 13)] = FMA(T8k, T8i, T8m); + } + cr[WS(rs, 13)] = FNMS(T8k, T8l, T8j); + T8B = FMA(KP956940335, T8u, T8t); + T8v = FNMS(KP956940335, T8u, T8t); + T8s = W[57]; + T8n = W[56]; + { + E T8x, T8C, T8w, T8r; + T8A = W[121]; + T8w = T8s * T8q; + T8r = T8n * T8q; + T8x = W[120]; + T8C = T8A * T8y; + ci[WS(rs, 29)] = FMA(T8n, T8v, T8w); + cr[WS(rs, 29)] = FNMS(T8s, T8v, T8r); + T8z = T8x * T8y; + ci[WS(rs, 61)] = FMA(T8x, T8B, T8C); + } + } + } + } + { + E Ta5, Ta4, Ta3, TeN, TeM, TeL; + { + E T9V, T9Y, Tai, Taa, Tal, Taf, Ta2, T9I; + { + E T9n, T9G, Tad, Ta9, T94, Ta8, T9W, T9X, Tae, T9H; + cr[WS(rs, 61)] = FNMS(T8A, T8B, T8z); + T9n = FNMS(KP534511135, T9m, T9f); + T9W = FMA(KP534511135, T9f, T9m); + T9X = FMA(KP534511135, T9y, T9F); + T9G = FNMS(KP534511135, T9F, T9y); + T9V = FMA(KP831469612, T9U, T9R); + Tad = FNMS(KP831469612, T9U, T9R); + Ta9 = T9W + T9X; + T9Y = T9W - T9X; + T94 = FNMS(KP831469612, T93, T8O); + Ta8 = FMA(KP831469612, T93, T8O); + Tae = T9G - T9n; + T9H = T9n + T9G; + Tai = FMA(KP881921264, Ta9, Ta8); + Taa = FNMS(KP881921264, Ta9, Ta8); + Tal = FNMS(KP881921264, Tae, Tad); + Taf = FMA(KP881921264, Tae, Tad); + Ta2 = FNMS(KP881921264, T9H, T94); + T9I = FMA(KP881921264, T9H, T94); + } + { + E Tak, Taj, Ta7, Tac, T9Z, T9K, T8D; + Ta7 = W[52]; + Tac = W[53]; + { + E Tah, Tag, Tab, Tam; + Tah = W[116]; + Tak = W[117]; + Tag = Ta7 * Taf; + Tab = Ta7 * Taa; + Tam = Tah * Tal; + Taj = Tah * Tai; + ci[WS(rs, 27)] = FMA(Tac, Taa, Tag); + cr[WS(rs, 27)] = FNMS(Tac, Taf, Tab); + ci[WS(rs, 59)] = FMA(Tak, Tai, Tam); + } + cr[WS(rs, 59)] = FNMS(Tak, Tal, Taj); + Ta5 = FMA(KP881921264, T9Y, T9V); + T9Z = FNMS(KP881921264, T9Y, T9V); + T9K = W[85]; + T8D = W[84]; + { + E Ta1, Ta6, Ta0, T9J; + Ta4 = W[21]; + Ta0 = T9K * T9I; + T9J = T8D * T9I; + Ta1 = W[20]; + Ta6 = Ta4 * Ta2; + ci[WS(rs, 43)] = FMA(T8D, T9Z, Ta0); + cr[WS(rs, 43)] = FNMS(T9K, T9Z, T9J); + Ta3 = Ta1 * Ta2; + ci[WS(rs, 11)] = FMA(Ta1, Ta5, Ta6); + } + } + } + { + E TeD, TeG, Tf0, TeS, Tf3, TeX, TeK, Teo; + { + E Tem, TdV, TeV, TeR, Tdu, TeQ, TeE, TeF, TeW, Ten; + cr[WS(rs, 11)] = FNMS(Ta4, Ta5, Ta3); + Tem = FMA(KP668178637, Tel, Tec); + TeE = FNMS(KP668178637, Tec, Tel); + TeF = FMA(KP668178637, TdL, TdU); + TdV = FNMS(KP668178637, TdU, TdL); + TeD = FNMS(KP923879532, TeC, Tez); + TeV = FMA(KP923879532, TeC, Tez); + TeR = TeE + TeF; + TeG = TeE - TeF; + Tdu = FNMS(KP923879532, Tdt, Td6); + TeQ = FMA(KP923879532, Tdt, Td6); + TeW = Tem + TdV; + Ten = TdV - Tem; + Tf0 = FMA(KP831469612, TeR, TeQ); + TeS = FNMS(KP831469612, TeR, TeQ); + Tf3 = FMA(KP831469612, TeW, TeV); + TeX = FNMS(KP831469612, TeW, TeV); + TeK = FMA(KP831469612, Ten, Tdu); + Teo = FNMS(KP831469612, Ten, Tdu); + } + { + E Tf2, Tf1, TeP, TeU, TeH, Teq, TcP; + TeP = W[74]; + TeU = W[75]; + { + E TeZ, TeY, TeT, Tf4; + TeZ = W[10]; + Tf2 = W[11]; + TeY = TeP * TeX; + TeT = TeP * TeS; + Tf4 = TeZ * Tf3; + Tf1 = TeZ * Tf0; + ci[WS(rs, 38)] = FMA(TeU, TeS, TeY); + cr[WS(rs, 38)] = FNMS(TeU, TeX, TeT); + ci[WS(rs, 6)] = FMA(Tf2, Tf0, Tf4); + } + cr[WS(rs, 6)] = FNMS(Tf2, Tf3, Tf1); + TeN = FMA(KP831469612, TeG, TeD); + TeH = FNMS(KP831469612, TeG, TeD); + Teq = W[107]; + TcP = W[106]; + { + E TeJ, TeO, TeI, Tep; + TeM = W[43]; + TeI = Teq * Teo; + Tep = TcP * Teo; + TeJ = W[42]; + TeO = TeM * TeK; + ci[WS(rs, 54)] = FMA(TcP, TeH, TeI); + cr[WS(rs, 54)] = FNMS(Teq, TeH, Tep); + TeL = TeJ * TeK; + ci[WS(rs, 22)] = FMA(TeJ, TeN, TeO); + } + } + } + { + E Tcn, Tcq, TcK, TcC, TcN, TcH, Tcu, Tci; + { + E Tcd, Tcg, TcF, TcB, Tca, TcA, Tco, Tcp, TcG, Tch; + cr[WS(rs, 22)] = FNMS(TeM, TeN, TeL); + Tcd = FNMS(KP098491403, Tcc, Tcb); + Tco = FMA(KP098491403, Tcb, Tcc); + Tcp = FMA(KP098491403, Tce, Tcf); + Tcg = FNMS(KP098491403, Tcf, Tce); + Tcn = FMA(KP980785280, Tcm, Tcl); + TcF = FNMS(KP980785280, Tcm, Tcl); + TcB = Tco + Tcp; + Tcq = Tco - Tcp; + Tca = FNMS(KP980785280, Tc9, Tc8); + TcA = FMA(KP980785280, Tc9, Tc8); + TcG = Tcg - Tcd; + Tch = Tcd + Tcg; + TcK = FMA(KP995184726, TcB, TcA); + TcC = FNMS(KP995184726, TcB, TcA); + TcN = FNMS(KP995184726, TcG, TcF); + TcH = FMA(KP995184726, TcG, TcF); + Tcu = FNMS(KP995184726, Tch, Tca); + Tci = FMA(KP995184726, Tch, Tca); + } + { + E TcM, TcL, Tcz, TcE, Tcr, Tck, Tc7; + Tcz = W[60]; + TcE = W[61]; + { + E TcJ, TcI, TcD, TcO; + TcJ = W[124]; + TcM = W[125]; + TcI = Tcz * TcH; + TcD = Tcz * TcC; + TcO = TcJ * TcN; + TcL = TcJ * TcK; + ci[WS(rs, 31)] = FMA(TcE, TcC, TcI); + cr[WS(rs, 31)] = FNMS(TcE, TcH, TcD); + ci[WS(rs, 63)] = FMA(TcM, TcK, TcO); + } + cr[WS(rs, 63)] = FNMS(TcM, TcN, TcL); + Tcx = FMA(KP995184726, Tcq, Tcn); + Tcr = FNMS(KP995184726, Tcq, Tcn); + Tck = W[93]; + Tc7 = W[92]; + { + E Tct, Tcy, Tcs, Tcj; + Tcw = W[29]; + Tcs = Tck * Tci; + Tcj = Tc7 * Tci; + Tct = W[28]; + Tcy = Tcw * Tcu; + ci[WS(rs, 47)] = FMA(Tc7, Tcr, Tcs); + cr[WS(rs, 47)] = FNMS(Tck, Tcr, Tcj); + Tcv = Tct * Tcu; + ci[WS(rs, 15)] = FMA(Tct, Tcx, Tcy); + } + } + } + } + } + } + } + cr[WS(rs, 15)] = FNMS(Tcw, Tcx, Tcv); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 64}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 64, "hb_64", twinstr, &GENUS, {520, 126, 518, 0} }; + +void X(codelet_hb_64) (planner *p) { + X(khc2hc_register) (p, hb_64, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -dif -name hb_64 -include hb.h */ + +/* + * This function contains 1038 FP additions, 500 FP multiplications, + * (or, 808 additions, 270 multiplications, 230 fused multiply/add), + * 196 stack variables, 15 constants, and 256 memory accesses + */ +#include "hb.h" + +static void hb_64(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP098017140, +0.098017140329560601994195563888641845861136673); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP634393284, +0.634393284163645498215171613225493370675687095); + DK(KP471396736, +0.471396736825997648556387625905254377657460319); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP290284677, +0.290284677254462367636192375817395274691476278); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 126); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 126, MAKE_VOLATILE_STRIDE(128, rs)) { + E Tf, T8C, Tfa, Thk, Tgg, ThM, T2c, T5O, T4K, T6g, Tag, TdE, TcA, Te6, T7P; + E T94, TK, T7o, T38, T4P, Tfv, Thn, T5W, T6j, Tb0, TdK, Tfs, Tho, T8K, T97; + E Tb7, TdL, TZ, T7l, T2P, T4Q, Tfo, Thq, T5T, T6k, TaH, TdH, Tfl, Thr, T8H; + E T98, TaO, TdI, Tu, T95, Tfh, ThN, Tgj, Thl, T2v, T6h, T4N, T5P, Tav, Te7; + E TcD, TdF, T7S, T8D, T1L, T20, T7A, T7D, T7G, T7H, T40, T62, Tg1, Thv, Tg8; + E Thz, Tg5, Thw, T4t, T5Z, T4j, T60, T4w, T63, TbY, TdS, Tcd, TdQ, TfU, Thy; + E T8P, T9z, T8S, T9A, Tcl, TdP, Tco, TdT, T1g, T1v, T7r, T7u, T7x, T7y, T3j; + E T69, TfI, ThD, TfP, ThG, TfM, ThC, T3M, T66, T3C, T67, T3P, T6a, Tbl, TdZ; + E TbA, TdX, TfB, ThF, T8W, T9C, T8Z, T9D, TbI, TdW, TbL, Te0; + { + E T3, Ta6, T6, Tcu, T4I, Ta7, T4F, Tcv, Td, Tcy, T27, Tae, Ta, Tcx, T2a; + E Tab; + { + E T1, T2, T4D, T4E; + T1 = cr[0]; + T2 = ci[WS(rs, 31)]; + T3 = T1 + T2; + Ta6 = T1 - T2; + { + E T4, T5, T4G, T4H; + T4 = cr[WS(rs, 16)]; + T5 = ci[WS(rs, 15)]; + T6 = T4 + T5; + Tcu = T4 - T5; + T4G = ci[WS(rs, 47)]; + T4H = cr[WS(rs, 48)]; + T4I = T4G - T4H; + Ta7 = T4G + T4H; + } + T4D = ci[WS(rs, 63)]; + T4E = cr[WS(rs, 32)]; + T4F = T4D - T4E; + Tcv = T4D + T4E; + { + E Tb, Tc, Tac, T25, T26, Tad; + Tb = ci[WS(rs, 7)]; + Tc = cr[WS(rs, 24)]; + Tac = Tb - Tc; + T25 = ci[WS(rs, 39)]; + T26 = cr[WS(rs, 56)]; + Tad = T25 + T26; + Td = Tb + Tc; + Tcy = Tac + Tad; + T27 = T25 - T26; + Tae = Tac - Tad; + } + { + E T8, T9, Ta9, T28, T29, Taa; + T8 = cr[WS(rs, 8)]; + T9 = ci[WS(rs, 23)]; + Ta9 = T8 - T9; + T28 = ci[WS(rs, 55)]; + T29 = cr[WS(rs, 40)]; + Taa = T28 + T29; + Ta = T8 + T9; + Tcx = Ta9 + Taa; + T2a = T28 - T29; + Tab = Ta9 - Taa; + } + } + { + E T7, Te, Tf8, Tf9; + T7 = T3 + T6; + Te = Ta + Td; + Tf = T7 + Te; + T8C = T7 - Te; + Tf8 = Ta6 + Ta7; + Tf9 = KP707106781 * (Tcx + Tcy); + Tfa = Tf8 - Tf9; + Thk = Tf8 + Tf9; + } + { + E Tge, Tgf, T24, T2b; + Tge = Tcv - Tcu; + Tgf = KP707106781 * (Tab - Tae); + Tgg = Tge + Tgf; + ThM = Tge - Tgf; + T24 = T3 - T6; + T2b = T27 - T2a; + T2c = T24 + T2b; + T5O = T24 - T2b; + } + { + E T4C, T4J, Ta8, Taf; + T4C = Ta - Td; + T4J = T4F - T4I; + T4K = T4C + T4J; + T6g = T4J - T4C; + Ta8 = Ta6 - Ta7; + Taf = KP707106781 * (Tab + Tae); + Tag = Ta8 - Taf; + TdE = Ta8 + Taf; + } + { + E Tcw, Tcz, T7N, T7O; + Tcw = Tcu + Tcv; + Tcz = KP707106781 * (Tcx - Tcy); + TcA = Tcw - Tcz; + Te6 = Tcw + Tcz; + T7N = T4F + T4I; + T7O = T2a + T27; + T7P = T7N + T7O; + T94 = T7N - T7O; + } + } + { + E TC, Tb1, T2Z, TaQ, T2X, Tb2, T7m, TaR, TJ, Tb4, Tb5, T2Q, T36, TaV, TaY; + E T7n, Tfq, Tfr; + { + E Tw, Tx, Ty, Tz, TA, TB; + Tw = cr[WS(rs, 2)]; + Tx = ci[WS(rs, 29)]; + Ty = Tw + Tx; + Tz = cr[WS(rs, 18)]; + TA = ci[WS(rs, 13)]; + TB = Tz + TA; + TC = Ty + TB; + Tb1 = Tz - TA; + T2Z = Ty - TB; + TaQ = Tw - Tx; + } + { + E T2R, T2S, T2T, T2U, T2V, T2W; + T2R = ci[WS(rs, 61)]; + T2S = cr[WS(rs, 34)]; + T2T = T2R - T2S; + T2U = ci[WS(rs, 45)]; + T2V = cr[WS(rs, 50)]; + T2W = T2U - T2V; + T2X = T2T - T2W; + Tb2 = T2R + T2S; + T7m = T2T + T2W; + TaR = T2U + T2V; + } + { + E TF, TaT, T35, TaU, TI, TaW, T32, TaX; + { + E TD, TE, T33, T34; + TD = cr[WS(rs, 10)]; + TE = ci[WS(rs, 21)]; + TF = TD + TE; + TaT = TD - TE; + T33 = ci[WS(rs, 53)]; + T34 = cr[WS(rs, 42)]; + T35 = T33 - T34; + TaU = T33 + T34; + } + { + E TG, TH, T30, T31; + TG = ci[WS(rs, 5)]; + TH = cr[WS(rs, 26)]; + TI = TG + TH; + TaW = TG - TH; + T30 = ci[WS(rs, 37)]; + T31 = cr[WS(rs, 58)]; + T32 = T30 - T31; + TaX = T30 + T31; + } + TJ = TF + TI; + Tb4 = TaT + TaU; + Tb5 = TaW + TaX; + T2Q = TF - TI; + T36 = T32 - T35; + TaV = TaT - TaU; + TaY = TaW - TaX; + T7n = T35 + T32; + } + TK = TC + TJ; + T7o = T7m + T7n; + { + E T2Y, T37, Tft, Tfu; + T2Y = T2Q + T2X; + T37 = T2Z + T36; + T38 = FMA(KP923879532, T2Y, KP382683432 * T37); + T4P = FNMS(KP382683432, T2Y, KP923879532 * T37); + Tft = TaQ + TaR; + Tfu = KP707106781 * (Tb4 + Tb5); + Tfv = Tft - Tfu; + Thn = Tft + Tfu; + } + { + E T5U, T5V, TaS, TaZ; + T5U = T2X - T2Q; + T5V = T2Z - T36; + T5W = FMA(KP382683432, T5U, KP923879532 * T5V); + T6j = FNMS(KP923879532, T5U, KP382683432 * T5V); + TaS = TaQ - TaR; + TaZ = KP707106781 * (TaV + TaY); + Tb0 = TaS - TaZ; + TdK = TaS + TaZ; + } + Tfq = Tb2 - Tb1; + Tfr = KP707106781 * (TaV - TaY); + Tfs = Tfq + Tfr; + Tho = Tfq - Tfr; + { + E T8I, T8J, Tb3, Tb6; + T8I = TC - TJ; + T8J = T7m - T7n; + T8K = T8I + T8J; + T97 = T8I - T8J; + Tb3 = Tb1 + Tb2; + Tb6 = KP707106781 * (Tb4 - Tb5); + Tb7 = Tb3 - Tb6; + TdL = Tb3 + Tb6; + } + } + { + E TR, TaI, T2G, Tax, T2E, TaJ, T7j, Tay, TY, TaL, TaM, T2x, T2N, TaC, TaF; + E T7k, Tfj, Tfk; + { + E TL, TM, TN, TO, TP, TQ; + TL = ci[WS(rs, 1)]; + TM = cr[WS(rs, 30)]; + TN = TL + TM; + TO = cr[WS(rs, 14)]; + TP = ci[WS(rs, 17)]; + TQ = TO + TP; + TR = TN + TQ; + TaI = TL - TM; + T2G = TN - TQ; + Tax = TO - TP; + } + { + E T2y, T2z, T2A, T2B, T2C, T2D; + T2y = ci[WS(rs, 33)]; + T2z = cr[WS(rs, 62)]; + T2A = T2y - T2z; + T2B = ci[WS(rs, 49)]; + T2C = cr[WS(rs, 46)]; + T2D = T2B - T2C; + T2E = T2A - T2D; + TaJ = T2B + T2C; + T7j = T2A + T2D; + Tay = T2y + T2z; + } + { + E TU, TaA, T2M, TaB, TX, TaD, T2J, TaE; + { + E TS, TT, T2K, T2L; + TS = cr[WS(rs, 6)]; + TT = ci[WS(rs, 25)]; + TU = TS + TT; + TaA = TS - TT; + T2K = ci[WS(rs, 57)]; + T2L = cr[WS(rs, 38)]; + T2M = T2K - T2L; + TaB = T2K + T2L; + } + { + E TV, TW, T2H, T2I; + TV = ci[WS(rs, 9)]; + TW = cr[WS(rs, 22)]; + TX = TV + TW; + TaD = TV - TW; + T2H = ci[WS(rs, 41)]; + T2I = cr[WS(rs, 54)]; + T2J = T2H - T2I; + TaE = T2H + T2I; + } + TY = TU + TX; + TaL = TaA - TaB; + TaM = TaD - TaE; + T2x = TU - TX; + T2N = T2J - T2M; + TaC = TaA + TaB; + TaF = TaD + TaE; + T7k = T2M + T2J; + } + TZ = TR + TY; + T7l = T7j + T7k; + { + E T2F, T2O, Tfm, Tfn; + T2F = T2x + T2E; + T2O = T2G + T2N; + T2P = FNMS(KP382683432, T2O, KP923879532 * T2F); + T4Q = FMA(KP382683432, T2F, KP923879532 * T2O); + Tfm = TaI + TaJ; + Tfn = KP707106781 * (TaC + TaF); + Tfo = Tfm - Tfn; + Thq = Tfm + Tfn; + } + { + E T5R, T5S, Taz, TaG; + T5R = T2E - T2x; + T5S = T2G - T2N; + T5T = FNMS(KP923879532, T5S, KP382683432 * T5R); + T6k = FMA(KP923879532, T5R, KP382683432 * T5S); + Taz = Tax - Tay; + TaG = KP707106781 * (TaC - TaF); + TaH = Taz - TaG; + TdH = Taz + TaG; + } + Tfj = KP707106781 * (TaL - TaM); + Tfk = Tax + Tay; + Tfl = Tfj - Tfk; + Thr = Tfk + Tfj; + { + E T8F, T8G, TaK, TaN; + T8F = T7j - T7k; + T8G = TR - TY; + T8H = T8F - T8G; + T98 = T8G + T8F; + TaK = TaI - TaJ; + TaN = KP707106781 * (TaL + TaM); + TaO = TaK - TaN; + TdI = TaK + TaN; + } + } + { + E Ti, T2j, Tl, T2g, T2d, T2k, Tfc, Tfb, Tat, Taq, Tp, T2s, Ts, T2p, T2m; + E T2t, Tff, Tfe, Tam, Taj; + { + E Tar, Tas, Tao, Tap; + { + E Tg, Th, T2h, T2i; + Tg = cr[WS(rs, 4)]; + Th = ci[WS(rs, 27)]; + Ti = Tg + Th; + Tar = Tg - Th; + T2h = ci[WS(rs, 43)]; + T2i = cr[WS(rs, 52)]; + T2j = T2h - T2i; + Tas = T2h + T2i; + } + { + E Tj, Tk, T2e, T2f; + Tj = cr[WS(rs, 20)]; + Tk = ci[WS(rs, 11)]; + Tl = Tj + Tk; + Tao = Tj - Tk; + T2e = ci[WS(rs, 59)]; + T2f = cr[WS(rs, 36)]; + T2g = T2e - T2f; + Tap = T2e + T2f; + } + T2d = Ti - Tl; + T2k = T2g - T2j; + Tfc = Tap - Tao; + Tfb = Tar + Tas; + Tat = Tar - Tas; + Taq = Tao + Tap; + } + { + E Tak, Tal, Tah, Tai; + { + E Tn, To, T2q, T2r; + Tn = ci[WS(rs, 3)]; + To = cr[WS(rs, 28)]; + Tp = Tn + To; + Tak = Tn - To; + T2q = ci[WS(rs, 51)]; + T2r = cr[WS(rs, 44)]; + T2s = T2q - T2r; + Tal = T2q + T2r; + } + { + E Tq, Tr, T2n, T2o; + Tq = cr[WS(rs, 12)]; + Tr = ci[WS(rs, 19)]; + Ts = Tq + Tr; + Tah = Tq - Tr; + T2n = ci[WS(rs, 35)]; + T2o = cr[WS(rs, 60)]; + T2p = T2n - T2o; + Tai = T2n + T2o; + } + T2m = Tp - Ts; + T2t = T2p - T2s; + Tff = Tah + Tai; + Tfe = Tak + Tal; + Tam = Tak - Tal; + Taj = Tah - Tai; + } + { + E Tm, Tt, Tfd, Tfg; + Tm = Ti + Tl; + Tt = Tp + Ts; + Tu = Tm + Tt; + T95 = Tm - Tt; + Tfd = FNMS(KP923879532, Tfc, KP382683432 * Tfb); + Tfg = FNMS(KP923879532, Tff, KP382683432 * Tfe); + Tfh = Tfd + Tfg; + ThN = Tfd - Tfg; + } + { + E Tgh, Tgi, T2l, T2u; + Tgh = FMA(KP382683432, Tfc, KP923879532 * Tfb); + Tgi = FMA(KP382683432, Tff, KP923879532 * Tfe); + Tgj = Tgh - Tgi; + Thl = Tgh + Tgi; + T2l = T2d - T2k; + T2u = T2m + T2t; + T2v = KP707106781 * (T2l + T2u); + T6h = KP707106781 * (T2l - T2u); + } + { + E T4L, T4M, Tan, Tau; + T4L = T2d + T2k; + T4M = T2t - T2m; + T4N = KP707106781 * (T4L + T4M); + T5P = KP707106781 * (T4M - T4L); + Tan = FNMS(KP382683432, Tam, KP923879532 * Taj); + Tau = FMA(KP923879532, Taq, KP382683432 * Tat); + Tav = Tan - Tau; + Te7 = Tau + Tan; + } + { + E TcB, TcC, T7Q, T7R; + TcB = FNMS(KP382683432, Taq, KP923879532 * Tat); + TcC = FMA(KP382683432, Taj, KP923879532 * Tam); + TcD = TcB - TcC; + TdF = TcB + TcC; + T7Q = T2g + T2j; + T7R = T2p + T2s; + T7S = T7Q + T7R; + T8D = T7R - T7Q; + } + } + { + E T1z, T1C, T1D, Tcf, TbO, T4o, T4r, T7B, Tcg, TbP, T1G, T3Y, T1J, T3V, T1K; + E T7C, Tcj, Tci, TbW, TbT, T1S, TfV, TfW, T41, T48, Tc8, Tcb, T7E, T1Z, TfY; + E TfZ, T4a, T4h, Tc1, Tc4, T7F; + { + E T1x, T1y, T1A, T1B; + T1x = ci[0]; + T1y = cr[WS(rs, 31)]; + T1z = T1x + T1y; + T1A = cr[WS(rs, 15)]; + T1B = ci[WS(rs, 16)]; + T1C = T1A + T1B; + T1D = T1z + T1C; + Tcf = T1A - T1B; + TbO = T1x - T1y; + } + { + E T4m, T4n, T4p, T4q; + T4m = ci[WS(rs, 32)]; + T4n = cr[WS(rs, 63)]; + T4o = T4m - T4n; + T4p = ci[WS(rs, 48)]; + T4q = cr[WS(rs, 47)]; + T4r = T4p - T4q; + T7B = T4o + T4r; + Tcg = T4m + T4n; + TbP = T4p + T4q; + } + { + E TbR, TbS, TbU, TbV; + { + E T1E, T1F, T3W, T3X; + T1E = cr[WS(rs, 7)]; + T1F = ci[WS(rs, 24)]; + T1G = T1E + T1F; + TbR = T1E - T1F; + T3W = ci[WS(rs, 56)]; + T3X = cr[WS(rs, 39)]; + T3Y = T3W - T3X; + TbS = T3W + T3X; + } + { + E T1H, T1I, T3T, T3U; + T1H = ci[WS(rs, 8)]; + T1I = cr[WS(rs, 23)]; + T1J = T1H + T1I; + TbU = T1H - T1I; + T3T = ci[WS(rs, 40)]; + T3U = cr[WS(rs, 55)]; + T3V = T3T - T3U; + TbV = T3T + T3U; + } + T1K = T1G + T1J; + T7C = T3Y + T3V; + Tcj = TbU + TbV; + Tci = TbR + TbS; + TbW = TbU - TbV; + TbT = TbR - TbS; + } + { + E T1O, Tc9, T47, Tca, T1R, Tc6, T44, Tc7; + { + E T1M, T1N, T45, T46; + T1M = cr[WS(rs, 3)]; + T1N = ci[WS(rs, 28)]; + T1O = T1M + T1N; + Tc9 = T1M - T1N; + T45 = ci[WS(rs, 44)]; + T46 = cr[WS(rs, 51)]; + T47 = T45 - T46; + Tca = T45 + T46; + } + { + E T1P, T1Q, T42, T43; + T1P = cr[WS(rs, 19)]; + T1Q = ci[WS(rs, 12)]; + T1R = T1P + T1Q; + Tc6 = T1P - T1Q; + T42 = ci[WS(rs, 60)]; + T43 = cr[WS(rs, 35)]; + T44 = T42 - T43; + Tc7 = T42 + T43; + } + T1S = T1O + T1R; + TfV = Tc9 + Tca; + TfW = Tc7 - Tc6; + T41 = T1O - T1R; + T48 = T44 - T47; + Tc8 = Tc6 + Tc7; + Tcb = Tc9 - Tca; + T7E = T44 + T47; + } + { + E T1V, Tc2, T4g, Tc3, T1Y, TbZ, T4d, Tc0; + { + E T1T, T1U, T4e, T4f; + T1T = ci[WS(rs, 4)]; + T1U = cr[WS(rs, 27)]; + T1V = T1T + T1U; + Tc2 = T1T - T1U; + T4e = ci[WS(rs, 52)]; + T4f = cr[WS(rs, 43)]; + T4g = T4e - T4f; + Tc3 = T4e + T4f; + } + { + E T1W, T1X, T4b, T4c; + T1W = cr[WS(rs, 11)]; + T1X = ci[WS(rs, 20)]; + T1Y = T1W + T1X; + TbZ = T1W - T1X; + T4b = ci[WS(rs, 36)]; + T4c = cr[WS(rs, 59)]; + T4d = T4b - T4c; + Tc0 = T4b + T4c; + } + T1Z = T1V + T1Y; + TfY = Tc2 + Tc3; + TfZ = TbZ + Tc0; + T4a = T1V - T1Y; + T4h = T4d - T4g; + Tc1 = TbZ - Tc0; + Tc4 = Tc2 - Tc3; + T7F = T4d + T4g; + } + T1L = T1D + T1K; + T20 = T1S + T1Z; + T7A = T1L - T20; + T7D = T7B + T7C; + T7G = T7E + T7F; + T7H = T7D - T7G; + { + E T3S, T3Z, TfX, Tg0; + T3S = T1z - T1C; + T3Z = T3V - T3Y; + T40 = T3S + T3Z; + T62 = T3S - T3Z; + TfX = FNMS(KP923879532, TfW, KP382683432 * TfV); + Tg0 = FNMS(KP923879532, TfZ, KP382683432 * TfY); + Tg1 = TfX + Tg0; + Thv = TfX - Tg0; + } + { + E Tg6, Tg7, Tg3, Tg4; + Tg6 = FMA(KP382683432, TfW, KP923879532 * TfV); + Tg7 = FMA(KP382683432, TfZ, KP923879532 * TfY); + Tg8 = Tg6 - Tg7; + Thz = Tg6 + Tg7; + Tg3 = KP707106781 * (TbT - TbW); + Tg4 = Tcf + Tcg; + Tg5 = Tg3 - Tg4; + Thw = Tg4 + Tg3; + } + { + E T4l, T4s, T49, T4i; + T4l = T1G - T1J; + T4s = T4o - T4r; + T4t = T4l + T4s; + T5Z = T4s - T4l; + T49 = T41 - T48; + T4i = T4a + T4h; + T4j = KP707106781 * (T49 + T4i); + T60 = KP707106781 * (T49 - T4i); + } + { + E T4u, T4v, TbQ, TbX; + T4u = T41 + T48; + T4v = T4h - T4a; + T4w = KP707106781 * (T4u + T4v); + T63 = KP707106781 * (T4v - T4u); + TbQ = TbO - TbP; + TbX = KP707106781 * (TbT + TbW); + TbY = TbQ - TbX; + TdS = TbQ + TbX; + } + { + E Tc5, Tcc, TfS, TfT; + Tc5 = FNMS(KP382683432, Tc4, KP923879532 * Tc1); + Tcc = FMA(KP923879532, Tc8, KP382683432 * Tcb); + Tcd = Tc5 - Tcc; + TdQ = Tcc + Tc5; + TfS = TbO + TbP; + TfT = KP707106781 * (Tci + Tcj); + TfU = TfS - TfT; + Thy = TfS + TfT; + } + { + E T8N, T8O, T8Q, T8R; + T8N = T7B - T7C; + T8O = T1S - T1Z; + T8P = T8N - T8O; + T9z = T8O + T8N; + T8Q = T1D - T1K; + T8R = T7F - T7E; + T8S = T8Q - T8R; + T9A = T8Q + T8R; + } + { + E Tch, Tck, Tcm, Tcn; + Tch = Tcf - Tcg; + Tck = KP707106781 * (Tci - Tcj); + Tcl = Tch - Tck; + TdP = Tch + Tck; + Tcm = FNMS(KP382683432, Tc8, KP923879532 * Tcb); + Tcn = FMA(KP382683432, Tc1, KP923879532 * Tc4); + Tco = Tcm - Tcn; + TdT = Tcm + Tcn; + } + } + { + E T14, T17, T18, TbC, Tbb, T3H, T3K, T7s, TbD, Tbc, T1b, T3h, T1e, T3e, T1f; + E T7t, TbG, TbF, Tbj, Tbg, T1n, TfC, TfD, T3k, T3r, Tbv, Tby, T7v, T1u, TfF; + E TfG, T3t, T3A, Tbo, Tbr, T7w; + { + E T12, T13, T15, T16; + T12 = cr[WS(rs, 1)]; + T13 = ci[WS(rs, 30)]; + T14 = T12 + T13; + T15 = cr[WS(rs, 17)]; + T16 = ci[WS(rs, 14)]; + T17 = T15 + T16; + T18 = T14 + T17; + TbC = T15 - T16; + Tbb = T12 - T13; + } + { + E T3F, T3G, T3I, T3J; + T3F = ci[WS(rs, 62)]; + T3G = cr[WS(rs, 33)]; + T3H = T3F - T3G; + T3I = ci[WS(rs, 46)]; + T3J = cr[WS(rs, 49)]; + T3K = T3I - T3J; + T7s = T3H + T3K; + TbD = T3F + T3G; + Tbc = T3I + T3J; + } + { + E Tbe, Tbf, Tbh, Tbi; + { + E T19, T1a, T3f, T3g; + T19 = cr[WS(rs, 9)]; + T1a = ci[WS(rs, 22)]; + T1b = T19 + T1a; + Tbe = T19 - T1a; + T3f = ci[WS(rs, 54)]; + T3g = cr[WS(rs, 41)]; + T3h = T3f - T3g; + Tbf = T3f + T3g; + } + { + E T1c, T1d, T3c, T3d; + T1c = ci[WS(rs, 6)]; + T1d = cr[WS(rs, 25)]; + T1e = T1c + T1d; + Tbh = T1c - T1d; + T3c = ci[WS(rs, 38)]; + T3d = cr[WS(rs, 57)]; + T3e = T3c - T3d; + Tbi = T3c + T3d; + } + T1f = T1b + T1e; + T7t = T3h + T3e; + TbG = Tbh + Tbi; + TbF = Tbe + Tbf; + Tbj = Tbh - Tbi; + Tbg = Tbe - Tbf; + } + { + E T1j, Tbw, T3q, Tbx, T1m, Tbt, T3n, Tbu; + { + E T1h, T1i, T3o, T3p; + T1h = cr[WS(rs, 5)]; + T1i = ci[WS(rs, 26)]; + T1j = T1h + T1i; + Tbw = T1h - T1i; + T3o = ci[WS(rs, 42)]; + T3p = cr[WS(rs, 53)]; + T3q = T3o - T3p; + Tbx = T3o + T3p; + } + { + E T1k, T1l, T3l, T3m; + T1k = cr[WS(rs, 21)]; + T1l = ci[WS(rs, 10)]; + T1m = T1k + T1l; + Tbt = T1k - T1l; + T3l = ci[WS(rs, 58)]; + T3m = cr[WS(rs, 37)]; + T3n = T3l - T3m; + Tbu = T3l + T3m; + } + T1n = T1j + T1m; + TfC = Tbw + Tbx; + TfD = Tbu - Tbt; + T3k = T1j - T1m; + T3r = T3n - T3q; + Tbv = Tbt + Tbu; + Tby = Tbw - Tbx; + T7v = T3n + T3q; + } + { + E T1q, Tbp, T3z, Tbq, T1t, Tbm, T3w, Tbn; + { + E T1o, T1p, T3x, T3y; + T1o = ci[WS(rs, 2)]; + T1p = cr[WS(rs, 29)]; + T1q = T1o + T1p; + Tbp = T1o - T1p; + T3x = ci[WS(rs, 50)]; + T3y = cr[WS(rs, 45)]; + T3z = T3x - T3y; + Tbq = T3x + T3y; + } + { + E T1r, T1s, T3u, T3v; + T1r = cr[WS(rs, 13)]; + T1s = ci[WS(rs, 18)]; + T1t = T1r + T1s; + Tbm = T1r - T1s; + T3u = ci[WS(rs, 34)]; + T3v = cr[WS(rs, 61)]; + T3w = T3u - T3v; + Tbn = T3u + T3v; + } + T1u = T1q + T1t; + TfF = Tbp + Tbq; + TfG = Tbm + Tbn; + T3t = T1q - T1t; + T3A = T3w - T3z; + Tbo = Tbm - Tbn; + Tbr = Tbp - Tbq; + T7w = T3w + T3z; + } + T1g = T18 + T1f; + T1v = T1n + T1u; + T7r = T1g - T1v; + T7u = T7s + T7t; + T7x = T7v + T7w; + T7y = T7u - T7x; + { + E T3b, T3i, TfE, TfH; + T3b = T14 - T17; + T3i = T3e - T3h; + T3j = T3b + T3i; + T69 = T3b - T3i; + TfE = FNMS(KP923879532, TfD, KP382683432 * TfC); + TfH = FNMS(KP923879532, TfG, KP382683432 * TfF); + TfI = TfE + TfH; + ThD = TfE - TfH; + } + { + E TfN, TfO, TfK, TfL; + TfN = FMA(KP382683432, TfD, KP923879532 * TfC); + TfO = FMA(KP382683432, TfG, KP923879532 * TfF); + TfP = TfN - TfO; + ThG = TfN + TfO; + TfK = TbD - TbC; + TfL = KP707106781 * (Tbg - Tbj); + TfM = TfK + TfL; + ThC = TfK - TfL; + } + { + E T3E, T3L, T3s, T3B; + T3E = T1b - T1e; + T3L = T3H - T3K; + T3M = T3E + T3L; + T66 = T3L - T3E; + T3s = T3k - T3r; + T3B = T3t + T3A; + T3C = KP707106781 * (T3s + T3B); + T67 = KP707106781 * (T3s - T3B); + } + { + E T3N, T3O, Tbd, Tbk; + T3N = T3k + T3r; + T3O = T3A - T3t; + T3P = KP707106781 * (T3N + T3O); + T6a = KP707106781 * (T3O - T3N); + Tbd = Tbb - Tbc; + Tbk = KP707106781 * (Tbg + Tbj); + Tbl = Tbd - Tbk; + TdZ = Tbd + Tbk; + } + { + E Tbs, Tbz, Tfz, TfA; + Tbs = FNMS(KP382683432, Tbr, KP923879532 * Tbo); + Tbz = FMA(KP923879532, Tbv, KP382683432 * Tby); + TbA = Tbs - Tbz; + TdX = Tbz + Tbs; + Tfz = Tbb + Tbc; + TfA = KP707106781 * (TbF + TbG); + TfB = Tfz - TfA; + ThF = Tfz + TfA; + } + { + E T8U, T8V, T8X, T8Y; + T8U = T7s - T7t; + T8V = T1n - T1u; + T8W = T8U - T8V; + T9C = T8V + T8U; + T8X = T18 - T1f; + T8Y = T7w - T7v; + T8Z = T8X - T8Y; + T9D = T8X + T8Y; + } + { + E TbE, TbH, TbJ, TbK; + TbE = TbC + TbD; + TbH = KP707106781 * (TbF - TbG); + TbI = TbE - TbH; + TdW = TbE + TbH; + TbJ = FNMS(KP382683432, Tbv, KP923879532 * Tby); + TbK = FMA(KP382683432, Tbo, KP923879532 * Tbr); + TbL = TbJ - TbK; + Te0 = TbJ + TbK; + } + } + { + E T11, T8q, T8n, T8r, T22, T8v, T8k, T8u; + { + E Tv, T10, T8l, T8m; + Tv = Tf + Tu; + T10 = TK + TZ; + T11 = Tv + T10; + T8q = Tv - T10; + T8l = T7u + T7x; + T8m = T7D + T7G; + T8n = T8l + T8m; + T8r = T8m - T8l; + } + { + E T1w, T21, T8i, T8j; + T1w = T1g + T1v; + T21 = T1L + T20; + T22 = T1w + T21; + T8v = T1w - T21; + T8i = T7P + T7S; + T8j = T7o + T7l; + T8k = T8i + T8j; + T8u = T8i - T8j; + } + cr[0] = T11 + T22; + ci[0] = T8k + T8n; + { + E T8g, T8o, T8f, T8h; + T8g = T11 - T22; + T8o = T8k - T8n; + T8f = W[62]; + T8h = W[63]; + cr[WS(rs, 32)] = FNMS(T8h, T8o, T8f * T8g); + ci[WS(rs, 32)] = FMA(T8h, T8g, T8f * T8o); + } + { + E T8s, T8w, T8p, T8t; + T8s = T8q - T8r; + T8w = T8u - T8v; + T8p = W[94]; + T8t = W[95]; + cr[WS(rs, 48)] = FNMS(T8t, T8w, T8p * T8s); + ci[WS(rs, 48)] = FMA(T8p, T8w, T8t * T8s); + } + { + E T8y, T8A, T8x, T8z; + T8y = T8q + T8r; + T8A = T8v + T8u; + T8x = W[30]; + T8z = W[31]; + cr[WS(rs, 16)] = FNMS(T8z, T8A, T8x * T8y); + ci[WS(rs, 16)] = FMA(T8x, T8A, T8z * T8y); + } + } + { + E T9y, T9U, T9N, T9V, T9F, T9Z, T9K, T9Y; + { + E T9w, T9x, T9L, T9M; + T9w = T8C + T8D; + T9x = KP707106781 * (T97 + T98); + T9y = T9w - T9x; + T9U = T9w + T9x; + T9L = FNMS(KP382683432, T9C, KP923879532 * T9D); + T9M = FMA(KP382683432, T9z, KP923879532 * T9A); + T9N = T9L - T9M; + T9V = T9L + T9M; + } + { + E T9B, T9E, T9I, T9J; + T9B = FNMS(KP382683432, T9A, KP923879532 * T9z); + T9E = FMA(KP923879532, T9C, KP382683432 * T9D); + T9F = T9B - T9E; + T9Z = T9E + T9B; + T9I = T95 + T94; + T9J = KP707106781 * (T8K + T8H); + T9K = T9I - T9J; + T9Y = T9I + T9J; + } + { + E T9G, T9O, T9v, T9H; + T9G = T9y - T9F; + T9O = T9K - T9N; + T9v = W[102]; + T9H = W[103]; + cr[WS(rs, 52)] = FNMS(T9H, T9O, T9v * T9G); + ci[WS(rs, 52)] = FMA(T9H, T9G, T9v * T9O); + } + { + E Ta2, Ta4, Ta1, Ta3; + Ta2 = T9U + T9V; + Ta4 = T9Y + T9Z; + Ta1 = W[6]; + Ta3 = W[7]; + cr[WS(rs, 4)] = FNMS(Ta3, Ta4, Ta1 * Ta2); + ci[WS(rs, 4)] = FMA(Ta1, Ta4, Ta3 * Ta2); + } + { + E T9Q, T9S, T9P, T9R; + T9Q = T9y + T9F; + T9S = T9K + T9N; + T9P = W[38]; + T9R = W[39]; + cr[WS(rs, 20)] = FNMS(T9R, T9S, T9P * T9Q); + ci[WS(rs, 20)] = FMA(T9R, T9Q, T9P * T9S); + } + { + E T9W, Ta0, T9T, T9X; + T9W = T9U - T9V; + Ta0 = T9Y - T9Z; + T9T = W[70]; + T9X = W[71]; + cr[WS(rs, 36)] = FNMS(T9X, Ta0, T9T * T9W); + ci[WS(rs, 36)] = FMA(T9T, Ta0, T9X * T9W); + } + } + { + E T8M, T9k, T9d, T9l, T91, T9p, T9a, T9o; + { + E T8E, T8L, T9b, T9c; + T8E = T8C - T8D; + T8L = KP707106781 * (T8H - T8K); + T8M = T8E - T8L; + T9k = T8E + T8L; + T9b = FNMS(KP923879532, T8W, KP382683432 * T8Z); + T9c = FMA(KP923879532, T8P, KP382683432 * T8S); + T9d = T9b - T9c; + T9l = T9b + T9c; + } + { + E T8T, T90, T96, T99; + T8T = FNMS(KP923879532, T8S, KP382683432 * T8P); + T90 = FMA(KP382683432, T8W, KP923879532 * T8Z); + T91 = T8T - T90; + T9p = T90 + T8T; + T96 = T94 - T95; + T99 = KP707106781 * (T97 - T98); + T9a = T96 - T99; + T9o = T96 + T99; + } + { + E T92, T9e, T8B, T93; + T92 = T8M - T91; + T9e = T9a - T9d; + T8B = W[118]; + T93 = W[119]; + cr[WS(rs, 60)] = FNMS(T93, T9e, T8B * T92); + ci[WS(rs, 60)] = FMA(T93, T92, T8B * T9e); + } + { + E T9s, T9u, T9r, T9t; + T9s = T9k + T9l; + T9u = T9o + T9p; + T9r = W[22]; + T9t = W[23]; + cr[WS(rs, 12)] = FNMS(T9t, T9u, T9r * T9s); + ci[WS(rs, 12)] = FMA(T9r, T9u, T9t * T9s); + } + { + E T9g, T9i, T9f, T9h; + T9g = T8M + T91; + T9i = T9a + T9d; + T9f = W[54]; + T9h = W[55]; + cr[WS(rs, 28)] = FNMS(T9h, T9i, T9f * T9g); + ci[WS(rs, 28)] = FMA(T9h, T9g, T9f * T9i); + } + { + E T9m, T9q, T9j, T9n; + T9m = T9k - T9l; + T9q = T9o - T9p; + T9j = W[86]; + T9n = W[87]; + cr[WS(rs, 44)] = FNMS(T9n, T9q, T9j * T9m); + ci[WS(rs, 44)] = FMA(T9j, T9q, T9n * T9m); + } + } + { + E T7q, T84, T7X, T85, T7J, T89, T7U, T88; + { + E T7i, T7p, T7V, T7W; + T7i = Tf - Tu; + T7p = T7l - T7o; + T7q = T7i + T7p; + T84 = T7i - T7p; + T7V = T7r + T7y; + T7W = T7H - T7A; + T7X = KP707106781 * (T7V + T7W); + T85 = KP707106781 * (T7W - T7V); + } + { + E T7z, T7I, T7M, T7T; + T7z = T7r - T7y; + T7I = T7A + T7H; + T7J = KP707106781 * (T7z + T7I); + T89 = KP707106781 * (T7z - T7I); + T7M = TK - TZ; + T7T = T7P - T7S; + T7U = T7M + T7T; + T88 = T7T - T7M; + } + { + E T7K, T7Y, T7h, T7L; + T7K = T7q - T7J; + T7Y = T7U - T7X; + T7h = W[78]; + T7L = W[79]; + cr[WS(rs, 40)] = FNMS(T7L, T7Y, T7h * T7K); + ci[WS(rs, 40)] = FMA(T7L, T7K, T7h * T7Y); + } + { + E T8c, T8e, T8b, T8d; + T8c = T84 + T85; + T8e = T88 + T89; + T8b = W[46]; + T8d = W[47]; + cr[WS(rs, 24)] = FNMS(T8d, T8e, T8b * T8c); + ci[WS(rs, 24)] = FMA(T8b, T8e, T8d * T8c); + } + { + E T80, T82, T7Z, T81; + T80 = T7q + T7J; + T82 = T7U + T7X; + T7Z = W[14]; + T81 = W[15]; + cr[WS(rs, 8)] = FNMS(T81, T82, T7Z * T80); + ci[WS(rs, 8)] = FMA(T81, T80, T7Z * T82); + } + { + E T86, T8a, T83, T87; + T86 = T84 - T85; + T8a = T88 - T89; + T83 = W[110]; + T87 = W[111]; + cr[WS(rs, 56)] = FNMS(T87, T8a, T83 * T86); + ci[WS(rs, 56)] = FMA(T83, T8a, T87 * T86); + } + } + { + E T6K, T76, T6W, T7a, T6R, T7b, T6Z, T77; + { + E T6I, T6J, T6U, T6V; + T6I = T5O + T5P; + T6J = T6j + T6k; + T6K = T6I - T6J; + T76 = T6I + T6J; + T6U = T6g + T6h; + T6V = T5W + T5T; + T6W = T6U - T6V; + T7a = T6U + T6V; + { + E T6N, T6Y, T6Q, T6X; + { + E T6L, T6M, T6O, T6P; + T6L = T5Z + T60; + T6M = T62 + T63; + T6N = FNMS(KP555570233, T6M, KP831469612 * T6L); + T6Y = FMA(KP555570233, T6L, KP831469612 * T6M); + T6O = T66 + T67; + T6P = T69 + T6a; + T6Q = FMA(KP831469612, T6O, KP555570233 * T6P); + T6X = FNMS(KP555570233, T6O, KP831469612 * T6P); + } + T6R = T6N - T6Q; + T7b = T6Q + T6N; + T6Z = T6X - T6Y; + T77 = T6X + T6Y; + } + } + { + E T6S, T70, T6H, T6T; + T6S = T6K - T6R; + T70 = T6W - T6Z; + T6H = W[106]; + T6T = W[107]; + cr[WS(rs, 54)] = FNMS(T6T, T70, T6H * T6S); + ci[WS(rs, 54)] = FMA(T6T, T6S, T6H * T70); + } + { + E T7e, T7g, T7d, T7f; + T7e = T76 + T77; + T7g = T7a + T7b; + T7d = W[10]; + T7f = W[11]; + cr[WS(rs, 6)] = FNMS(T7f, T7g, T7d * T7e); + ci[WS(rs, 6)] = FMA(T7d, T7g, T7f * T7e); + } + { + E T72, T74, T71, T73; + T72 = T6K + T6R; + T74 = T6W + T6Z; + T71 = W[42]; + T73 = W[43]; + cr[WS(rs, 22)] = FNMS(T73, T74, T71 * T72); + ci[WS(rs, 22)] = FMA(T73, T72, T71 * T74); + } + { + E T78, T7c, T75, T79; + T78 = T76 - T77; + T7c = T7a - T7b; + T75 = W[74]; + T79 = W[75]; + cr[WS(rs, 38)] = FNMS(T79, T7c, T75 * T78); + ci[WS(rs, 38)] = FMA(T75, T7c, T79 * T78); + } + } + { + E T3a, T52, T4S, T56, T4z, T57, T4V, T53; + { + E T2w, T39, T4O, T4R; + T2w = T2c - T2v; + T39 = T2P - T38; + T3a = T2w + T39; + T52 = T2w - T39; + T4O = T4K - T4N; + T4R = T4P - T4Q; + T4S = T4O + T4R; + T56 = T4O - T4R; + { + E T3R, T4T, T4y, T4U; + { + E T3D, T3Q, T4k, T4x; + T3D = T3j - T3C; + T3Q = T3M - T3P; + T3R = FNMS(KP831469612, T3Q, KP555570233 * T3D); + T4T = FMA(KP831469612, T3D, KP555570233 * T3Q); + T4k = T40 - T4j; + T4x = T4t - T4w; + T4y = FMA(KP555570233, T4k, KP831469612 * T4x); + T4U = FNMS(KP831469612, T4k, KP555570233 * T4x); + } + T4z = T3R + T4y; + T57 = T3R - T4y; + T4V = T4T + T4U; + T53 = T4U - T4T; + } + } + { + E T4A, T4W, T23, T4B; + T4A = T3a - T4z; + T4W = T4S - T4V; + T23 = W[82]; + T4B = W[83]; + cr[WS(rs, 42)] = FNMS(T4B, T4W, T23 * T4A); + ci[WS(rs, 42)] = FMA(T4B, T4A, T23 * T4W); + } + { + E T5a, T5c, T59, T5b; + T5a = T52 + T53; + T5c = T56 + T57; + T59 = W[50]; + T5b = W[51]; + cr[WS(rs, 26)] = FNMS(T5b, T5c, T59 * T5a); + ci[WS(rs, 26)] = FMA(T59, T5c, T5b * T5a); + } + { + E T4Y, T50, T4X, T4Z; + T4Y = T3a + T4z; + T50 = T4S + T4V; + T4X = W[18]; + T4Z = W[19]; + cr[WS(rs, 10)] = FNMS(T4Z, T50, T4X * T4Y); + ci[WS(rs, 10)] = FMA(T4Z, T4Y, T4X * T50); + } + { + E T54, T58, T51, T55; + T54 = T52 - T53; + T58 = T56 - T57; + T51 = W[114]; + T55 = W[115]; + cr[WS(rs, 58)] = FNMS(T55, T58, T51 * T54); + ci[WS(rs, 58)] = FMA(T51, T58, T55 * T54); + } + } + { + E T5g, T5C, T5s, T5G, T5n, T5H, T5v, T5D; + { + E T5e, T5f, T5q, T5r; + T5e = T2c + T2v; + T5f = T4P + T4Q; + T5g = T5e + T5f; + T5C = T5e - T5f; + T5q = T4K + T4N; + T5r = T38 + T2P; + T5s = T5q + T5r; + T5G = T5q - T5r; + { + E T5j, T5t, T5m, T5u; + { + E T5h, T5i, T5k, T5l; + T5h = T3j + T3C; + T5i = T3M + T3P; + T5j = FNMS(KP195090322, T5i, KP980785280 * T5h); + T5t = FMA(KP195090322, T5h, KP980785280 * T5i); + T5k = T40 + T4j; + T5l = T4t + T4w; + T5m = FMA(KP980785280, T5k, KP195090322 * T5l); + T5u = FNMS(KP195090322, T5k, KP980785280 * T5l); + } + T5n = T5j + T5m; + T5H = T5j - T5m; + T5v = T5t + T5u; + T5D = T5u - T5t; + } + } + { + E T5o, T5w, T5d, T5p; + T5o = T5g - T5n; + T5w = T5s - T5v; + T5d = W[66]; + T5p = W[67]; + cr[WS(rs, 34)] = FNMS(T5p, T5w, T5d * T5o); + ci[WS(rs, 34)] = FMA(T5p, T5o, T5d * T5w); + } + { + E T5K, T5M, T5J, T5L; + T5K = T5C + T5D; + T5M = T5G + T5H; + T5J = W[34]; + T5L = W[35]; + cr[WS(rs, 18)] = FNMS(T5L, T5M, T5J * T5K); + ci[WS(rs, 18)] = FMA(T5J, T5M, T5L * T5K); + } + { + E T5y, T5A, T5x, T5z; + T5y = T5g + T5n; + T5A = T5s + T5v; + T5x = W[2]; + T5z = W[3]; + cr[WS(rs, 2)] = FNMS(T5z, T5A, T5x * T5y); + ci[WS(rs, 2)] = FMA(T5z, T5y, T5x * T5A); + } + { + E T5E, T5I, T5B, T5F; + T5E = T5C - T5D; + T5I = T5G - T5H; + T5B = W[98]; + T5F = W[99]; + cr[WS(rs, 50)] = FNMS(T5F, T5I, T5B * T5E); + ci[WS(rs, 50)] = FMA(T5B, T5I, T5F * T5E); + } + } + { + E T5Y, T6w, T6m, T6A, T6d, T6B, T6p, T6x; + { + E T5Q, T5X, T6i, T6l; + T5Q = T5O - T5P; + T5X = T5T - T5W; + T5Y = T5Q - T5X; + T6w = T5Q + T5X; + T6i = T6g - T6h; + T6l = T6j - T6k; + T6m = T6i - T6l; + T6A = T6i + T6l; + { + E T65, T6o, T6c, T6n; + { + E T61, T64, T68, T6b; + T61 = T5Z - T60; + T64 = T62 - T63; + T65 = FNMS(KP980785280, T64, KP195090322 * T61); + T6o = FMA(KP980785280, T61, KP195090322 * T64); + T68 = T66 - T67; + T6b = T69 - T6a; + T6c = FMA(KP195090322, T68, KP980785280 * T6b); + T6n = FNMS(KP980785280, T68, KP195090322 * T6b); + } + T6d = T65 - T6c; + T6B = T6c + T65; + T6p = T6n - T6o; + T6x = T6n + T6o; + } + } + { + E T6e, T6q, T5N, T6f; + T6e = T5Y - T6d; + T6q = T6m - T6p; + T5N = W[122]; + T6f = W[123]; + cr[WS(rs, 62)] = FNMS(T6f, T6q, T5N * T6e); + ci[WS(rs, 62)] = FMA(T6f, T6e, T5N * T6q); + } + { + E T6E, T6G, T6D, T6F; + T6E = T6w + T6x; + T6G = T6A + T6B; + T6D = W[26]; + T6F = W[27]; + cr[WS(rs, 14)] = FNMS(T6F, T6G, T6D * T6E); + ci[WS(rs, 14)] = FMA(T6D, T6G, T6F * T6E); + } + { + E T6s, T6u, T6r, T6t; + T6s = T5Y + T6d; + T6u = T6m + T6p; + T6r = W[58]; + T6t = W[59]; + cr[WS(rs, 30)] = FNMS(T6t, T6u, T6r * T6s); + ci[WS(rs, 30)] = FMA(T6t, T6s, T6r * T6u); + } + { + E T6y, T6C, T6v, T6z; + T6y = T6w - T6x; + T6C = T6A - T6B; + T6v = W[90]; + T6z = W[91]; + cr[WS(rs, 46)] = FNMS(T6z, T6C, T6v * T6y); + ci[WS(rs, 46)] = FMA(T6v, T6C, T6z * T6y); + } + } + { + E Tba, Tdw, TcS, Tdi, TcI, Tds, TcW, Td6, Tcr, TcX, TcL, TcT, Tdd, Tdx, Tdl; + E Tdt; + { + E Taw, Tdg, Tb9, Tdh, TaP, Tb8; + Taw = Tag - Tav; + Tdg = TcA + TcD; + TaP = FNMS(KP831469612, TaO, KP555570233 * TaH); + Tb8 = FMA(KP831469612, Tb0, KP555570233 * Tb7); + Tb9 = TaP - Tb8; + Tdh = Tb8 + TaP; + Tba = Taw + Tb9; + Tdw = Tdg - Tdh; + TcS = Taw - Tb9; + Tdi = Tdg + Tdh; + } + { + E TcE, Td4, TcH, Td5, TcF, TcG; + TcE = TcA - TcD; + Td4 = Tag + Tav; + TcF = FNMS(KP831469612, Tb7, KP555570233 * Tb0); + TcG = FMA(KP555570233, TaO, KP831469612 * TaH); + TcH = TcF - TcG; + Td5 = TcF + TcG; + TcI = TcE + TcH; + Tds = Td4 - Td5; + TcW = TcE - TcH; + Td6 = Td4 + Td5; + } + { + E TbN, TcJ, Tcq, TcK; + { + E TbB, TbM, Tce, Tcp; + TbB = Tbl - TbA; + TbM = TbI - TbL; + TbN = FNMS(KP956940335, TbM, KP290284677 * TbB); + TcJ = FMA(KP956940335, TbB, KP290284677 * TbM); + Tce = TbY - Tcd; + Tcp = Tcl - Tco; + Tcq = FMA(KP290284677, Tce, KP956940335 * Tcp); + TcK = FNMS(KP956940335, Tce, KP290284677 * Tcp); + } + Tcr = TbN + Tcq; + TcX = TbN - Tcq; + TcL = TcJ + TcK; + TcT = TcK - TcJ; + } + { + E Td9, Tdj, Tdc, Tdk; + { + E Td7, Td8, Tda, Tdb; + Td7 = Tbl + TbA; + Td8 = TbI + TbL; + Td9 = FNMS(KP471396736, Td8, KP881921264 * Td7); + Tdj = FMA(KP471396736, Td7, KP881921264 * Td8); + Tda = TbY + Tcd; + Tdb = Tcl + Tco; + Tdc = FMA(KP881921264, Tda, KP471396736 * Tdb); + Tdk = FNMS(KP471396736, Tda, KP881921264 * Tdb); + } + Tdd = Td9 + Tdc; + Tdx = Td9 - Tdc; + Tdl = Tdj + Tdk; + Tdt = Tdk - Tdj; + } + { + E Tcs, TcM, Ta5, Tct; + Tcs = Tba - Tcr; + TcM = TcI - TcL; + Ta5 = W[88]; + Tct = W[89]; + cr[WS(rs, 45)] = FNMS(Tct, TcM, Ta5 * Tcs); + ci[WS(rs, 45)] = FMA(Tct, Tcs, Ta5 * TcM); + } + { + E Tdu, Tdy, Tdr, Tdv; + Tdu = Tds - Tdt; + Tdy = Tdw - Tdx; + Tdr = W[104]; + Tdv = W[105]; + cr[WS(rs, 53)] = FNMS(Tdv, Tdy, Tdr * Tdu); + ci[WS(rs, 53)] = FMA(Tdr, Tdy, Tdv * Tdu); + } + { + E TdA, TdC, Tdz, TdB; + TdA = Tds + Tdt; + TdC = Tdw + Tdx; + Tdz = W[40]; + TdB = W[41]; + cr[WS(rs, 21)] = FNMS(TdB, TdC, Tdz * TdA); + ci[WS(rs, 21)] = FMA(Tdz, TdC, TdB * TdA); + } + { + E TcO, TcQ, TcN, TcP; + TcO = Tba + Tcr; + TcQ = TcI + TcL; + TcN = W[24]; + TcP = W[25]; + cr[WS(rs, 13)] = FNMS(TcP, TcQ, TcN * TcO); + ci[WS(rs, 13)] = FMA(TcP, TcO, TcN * TcQ); + } + { + E TcU, TcY, TcR, TcV; + TcU = TcS - TcT; + TcY = TcW - TcX; + TcR = W[120]; + TcV = W[121]; + cr[WS(rs, 61)] = FNMS(TcV, TcY, TcR * TcU); + ci[WS(rs, 61)] = FMA(TcR, TcY, TcV * TcU); + } + { + E Tde, Tdm, Td3, Tdf; + Tde = Td6 - Tdd; + Tdm = Tdi - Tdl; + Td3 = W[72]; + Tdf = W[73]; + cr[WS(rs, 37)] = FNMS(Tdf, Tdm, Td3 * Tde); + ci[WS(rs, 37)] = FMA(Tdf, Tde, Td3 * Tdm); + } + { + E Tdo, Tdq, Tdn, Tdp; + Tdo = Td6 + Tdd; + Tdq = Tdi + Tdl; + Tdn = W[8]; + Tdp = W[9]; + cr[WS(rs, 5)] = FNMS(Tdp, Tdq, Tdn * Tdo); + ci[WS(rs, 5)] = FMA(Tdp, Tdo, Tdn * Tdq); + } + { + E Td0, Td2, TcZ, Td1; + Td0 = TcS + TcT; + Td2 = TcW + TcX; + TcZ = W[56]; + Td1 = W[57]; + cr[WS(rs, 29)] = FNMS(Td1, Td2, TcZ * Td0); + ci[WS(rs, 29)] = FMA(TcZ, Td2, Td1 * Td0); + } + } + { + E Tfy, Thc, Tgy, TgY, Tgo, Th8, TgC, TgM, Tgb, TgD, Tgr, Tgz, TgT, Thd, Th1; + E Th9; + { + E Tfi, TgW, Tfx, TgX, Tfp, Tfw; + Tfi = Tfa - Tfh; + TgW = Tgg + Tgj; + Tfp = FNMS(KP555570233, Tfo, KP831469612 * Tfl); + Tfw = FMA(KP831469612, Tfs, KP555570233 * Tfv); + Tfx = Tfp - Tfw; + TgX = Tfw + Tfp; + Tfy = Tfi + Tfx; + Thc = TgW - TgX; + Tgy = Tfi - Tfx; + TgY = TgW + TgX; + } + { + E Tgk, TgK, Tgn, TgL, Tgl, Tgm; + Tgk = Tgg - Tgj; + TgK = Tfa + Tfh; + Tgl = FNMS(KP555570233, Tfs, KP831469612 * Tfv); + Tgm = FMA(KP555570233, Tfl, KP831469612 * Tfo); + Tgn = Tgl - Tgm; + TgL = Tgl + Tgm; + Tgo = Tgk + Tgn; + Th8 = TgK - TgL; + TgC = Tgk - Tgn; + TgM = TgK + TgL; + } + { + E TfR, Tgp, Tga, Tgq; + { + E TfJ, TfQ, Tg2, Tg9; + TfJ = TfB - TfI; + TfQ = TfM - TfP; + TfR = FNMS(KP881921264, TfQ, KP471396736 * TfJ); + Tgp = FMA(KP881921264, TfJ, KP471396736 * TfQ); + Tg2 = TfU - Tg1; + Tg9 = Tg5 - Tg8; + Tga = FMA(KP471396736, Tg2, KP881921264 * Tg9); + Tgq = FNMS(KP881921264, Tg2, KP471396736 * Tg9); + } + Tgb = TfR + Tga; + TgD = TfR - Tga; + Tgr = Tgp + Tgq; + Tgz = Tgq - Tgp; + } + { + E TgP, TgZ, TgS, Th0; + { + E TgN, TgO, TgQ, TgR; + TgN = TfB + TfI; + TgO = TfM + TfP; + TgP = FNMS(KP290284677, TgO, KP956940335 * TgN); + TgZ = FMA(KP290284677, TgN, KP956940335 * TgO); + TgQ = TfU + Tg1; + TgR = Tg5 + Tg8; + TgS = FMA(KP956940335, TgQ, KP290284677 * TgR); + Th0 = FNMS(KP290284677, TgQ, KP956940335 * TgR); + } + TgT = TgP + TgS; + Thd = TgP - TgS; + Th1 = TgZ + Th0; + Th9 = Th0 - TgZ; + } + { + E Tgc, Tgs, Tf7, Tgd; + Tgc = Tfy - Tgb; + Tgs = Tgo - Tgr; + Tf7 = W[84]; + Tgd = W[85]; + cr[WS(rs, 43)] = FNMS(Tgd, Tgs, Tf7 * Tgc); + ci[WS(rs, 43)] = FMA(Tgd, Tgc, Tf7 * Tgs); + } + { + E Tha, The, Th7, Thb; + Tha = Th8 - Th9; + The = Thc - Thd; + Th7 = W[100]; + Thb = W[101]; + cr[WS(rs, 51)] = FNMS(Thb, The, Th7 * Tha); + ci[WS(rs, 51)] = FMA(Th7, The, Thb * Tha); + } + { + E Thg, Thi, Thf, Thh; + Thg = Th8 + Th9; + Thi = Thc + Thd; + Thf = W[36]; + Thh = W[37]; + cr[WS(rs, 19)] = FNMS(Thh, Thi, Thf * Thg); + ci[WS(rs, 19)] = FMA(Thf, Thi, Thh * Thg); + } + { + E Tgu, Tgw, Tgt, Tgv; + Tgu = Tfy + Tgb; + Tgw = Tgo + Tgr; + Tgt = W[20]; + Tgv = W[21]; + cr[WS(rs, 11)] = FNMS(Tgv, Tgw, Tgt * Tgu); + ci[WS(rs, 11)] = FMA(Tgv, Tgu, Tgt * Tgw); + } + { + E TgA, TgE, Tgx, TgB; + TgA = Tgy - Tgz; + TgE = TgC - TgD; + Tgx = W[116]; + TgB = W[117]; + cr[WS(rs, 59)] = FNMS(TgB, TgE, Tgx * TgA); + ci[WS(rs, 59)] = FMA(Tgx, TgE, TgB * TgA); + } + { + E TgU, Th2, TgJ, TgV; + TgU = TgM - TgT; + Th2 = TgY - Th1; + TgJ = W[68]; + TgV = W[69]; + cr[WS(rs, 35)] = FNMS(TgV, Th2, TgJ * TgU); + ci[WS(rs, 35)] = FMA(TgV, TgU, TgJ * Th2); + } + { + E Th4, Th6, Th3, Th5; + Th4 = TgM + TgT; + Th6 = TgY + Th1; + Th3 = W[4]; + Th5 = W[5]; + cr[WS(rs, 3)] = FNMS(Th5, Th6, Th3 * Th4); + ci[WS(rs, 3)] = FMA(Th5, Th4, Th3 * Th6); + } + { + E TgG, TgI, TgF, TgH; + TgG = Tgy + Tgz; + TgI = TgC + TgD; + TgF = W[52]; + TgH = W[53]; + cr[WS(rs, 27)] = FNMS(TgH, TgI, TgF * TgG); + ci[WS(rs, 27)] = FMA(TgF, TgI, TgH * TgG); + } + } + { + E TdO, Tf0, Tem, TeM, Tec, TeW, Teq, TeA, Te3, Ter, Tef, Ten, TeH, Tf1, TeP; + E TeX; + { + E TdG, TeK, TdN, TeL, TdJ, TdM; + TdG = TdE - TdF; + TeK = Te6 + Te7; + TdJ = FNMS(KP195090322, TdI, KP980785280 * TdH); + TdM = FMA(KP195090322, TdK, KP980785280 * TdL); + TdN = TdJ - TdM; + TeL = TdM + TdJ; + TdO = TdG - TdN; + Tf0 = TeK + TeL; + Tem = TdG + TdN; + TeM = TeK - TeL; + } + { + E Te8, Tey, Teb, Tez, Te9, Tea; + Te8 = Te6 - Te7; + Tey = TdE + TdF; + Te9 = FNMS(KP195090322, TdL, KP980785280 * TdK); + Tea = FMA(KP980785280, TdI, KP195090322 * TdH); + Teb = Te9 - Tea; + Tez = Te9 + Tea; + Tec = Te8 - Teb; + TeW = Tey + Tez; + Teq = Te8 + Teb; + TeA = Tey - Tez; + } + { + E TdV, Tee, Te2, Ted; + { + E TdR, TdU, TdY, Te1; + TdR = TdP - TdQ; + TdU = TdS - TdT; + TdV = FNMS(KP773010453, TdU, KP634393284 * TdR); + Tee = FMA(KP773010453, TdR, KP634393284 * TdU); + TdY = TdW - TdX; + Te1 = TdZ - Te0; + Te2 = FMA(KP634393284, TdY, KP773010453 * Te1); + Ted = FNMS(KP773010453, TdY, KP634393284 * Te1); + } + Te3 = TdV - Te2; + Ter = Te2 + TdV; + Tef = Ted - Tee; + Ten = Ted + Tee; + } + { + E TeD, TeO, TeG, TeN; + { + E TeB, TeC, TeE, TeF; + TeB = TdP + TdQ; + TeC = TdS + TdT; + TeD = FNMS(KP098017140, TeC, KP995184726 * TeB); + TeO = FMA(KP098017140, TeB, KP995184726 * TeC); + TeE = TdW + TdX; + TeF = TdZ + Te0; + TeG = FMA(KP995184726, TeE, KP098017140 * TeF); + TeN = FNMS(KP098017140, TeE, KP995184726 * TeF); + } + TeH = TeD - TeG; + Tf1 = TeG + TeD; + TeP = TeN - TeO; + TeX = TeN + TeO; + } + { + E Te4, Teg, TdD, Te5; + Te4 = TdO - Te3; + Teg = Tec - Tef; + TdD = W[112]; + Te5 = W[113]; + cr[WS(rs, 57)] = FNMS(Te5, Teg, TdD * Te4); + ci[WS(rs, 57)] = FMA(Te5, Te4, TdD * Teg); + } + { + E TeY, Tf2, TeV, TeZ; + TeY = TeW - TeX; + Tf2 = Tf0 - Tf1; + TeV = W[64]; + TeZ = W[65]; + cr[WS(rs, 33)] = FNMS(TeZ, Tf2, TeV * TeY); + ci[WS(rs, 33)] = FMA(TeV, Tf2, TeZ * TeY); + } + { + E Tf4, Tf6, Tf3, Tf5; + Tf4 = TeW + TeX; + Tf6 = Tf0 + Tf1; + Tf3 = W[0]; + Tf5 = W[1]; + cr[WS(rs, 1)] = FNMS(Tf5, Tf6, Tf3 * Tf4); + ci[WS(rs, 1)] = FMA(Tf3, Tf6, Tf5 * Tf4); + } + { + E Tei, Tek, Teh, Tej; + Tei = TdO + Te3; + Tek = Tec + Tef; + Teh = W[48]; + Tej = W[49]; + cr[WS(rs, 25)] = FNMS(Tej, Tek, Teh * Tei); + ci[WS(rs, 25)] = FMA(Tej, Tei, Teh * Tek); + } + { + E Teo, Tes, Tel, Tep; + Teo = Tem - Ten; + Tes = Teq - Ter; + Tel = W[80]; + Tep = W[81]; + cr[WS(rs, 41)] = FNMS(Tep, Tes, Tel * Teo); + ci[WS(rs, 41)] = FMA(Tel, Tes, Tep * Teo); + } + { + E TeI, TeQ, Tex, TeJ; + TeI = TeA - TeH; + TeQ = TeM - TeP; + Tex = W[96]; + TeJ = W[97]; + cr[WS(rs, 49)] = FNMS(TeJ, TeQ, Tex * TeI); + ci[WS(rs, 49)] = FMA(TeJ, TeI, Tex * TeQ); + } + { + E TeS, TeU, TeR, TeT; + TeS = TeA + TeH; + TeU = TeM + TeP; + TeR = W[32]; + TeT = W[33]; + cr[WS(rs, 17)] = FNMS(TeT, TeU, TeR * TeS); + ci[WS(rs, 17)] = FMA(TeT, TeS, TeR * TeU); + } + { + E Teu, Tew, Tet, Tev; + Teu = Tem + Ten; + Tew = Teq + Ter; + Tet = W[16]; + Tev = W[17]; + cr[WS(rs, 9)] = FNMS(Tev, Tew, Tet * Teu); + ci[WS(rs, 9)] = FMA(Tet, Tew, Tev * Teu); + } + } + { + E Thu, TiG, Ti2, Tis, ThS, TiC, Ti6, Tig, ThJ, Ti7, ThV, Ti3, Tin, TiH, Tiv; + E TiD; + { + E Thm, Tiq, Tht, Tir, Thp, Ths; + Thm = Thk - Thl; + Tiq = ThM - ThN; + Thp = FNMS(KP980785280, Tho, KP195090322 * Thn); + Ths = FNMS(KP980785280, Thr, KP195090322 * Thq); + Tht = Thp + Ths; + Tir = Thp - Ths; + Thu = Thm - Tht; + TiG = Tiq - Tir; + Ti2 = Thm + Tht; + Tis = Tiq + Tir; + } + { + E ThO, Tie, ThR, Tif, ThP, ThQ; + ThO = ThM + ThN; + Tie = Thk + Thl; + ThP = FMA(KP195090322, Tho, KP980785280 * Thn); + ThQ = FMA(KP195090322, Thr, KP980785280 * Thq); + ThR = ThP - ThQ; + Tif = ThP + ThQ; + ThS = ThO - ThR; + TiC = Tie + Tif; + Ti6 = ThO + ThR; + Tig = Tie - Tif; + } + { + E ThB, ThU, ThI, ThT; + { + E Thx, ThA, ThE, ThH; + Thx = Thv - Thw; + ThA = Thy - Thz; + ThB = FNMS(KP634393284, ThA, KP773010453 * Thx); + ThU = FMA(KP634393284, Thx, KP773010453 * ThA); + ThE = ThC + ThD; + ThH = ThF - ThG; + ThI = FMA(KP773010453, ThE, KP634393284 * ThH); + ThT = FNMS(KP634393284, ThE, KP773010453 * ThH); + } + ThJ = ThB - ThI; + Ti7 = ThI + ThB; + ThV = ThT - ThU; + Ti3 = ThT + ThU; + } + { + E Tij, Tit, Tim, Tiu; + { + E Tih, Tii, Tik, Til; + Tih = ThF + ThG; + Tii = ThC - ThD; + Tij = FNMS(KP995184726, Tii, KP098017140 * Tih); + Tit = FMA(KP098017140, Tii, KP995184726 * Tih); + Tik = Thy + Thz; + Til = Thw + Thv; + Tim = FNMS(KP995184726, Til, KP098017140 * Tik); + Tiu = FMA(KP098017140, Til, KP995184726 * Tik); + } + Tin = Tij + Tim; + TiH = Tij - Tim; + Tiv = Tit - Tiu; + TiD = Tit + Tiu; + } + { + E ThK, ThW, Thj, ThL; + ThK = Thu - ThJ; + ThW = ThS - ThV; + Thj = W[108]; + ThL = W[109]; + cr[WS(rs, 55)] = FNMS(ThL, ThW, Thj * ThK); + ci[WS(rs, 55)] = FMA(ThL, ThK, Thj * ThW); + } + { + E TiE, TiI, TiB, TiF; + TiE = TiC - TiD; + TiI = TiG + TiH; + TiB = W[60]; + TiF = W[61]; + cr[WS(rs, 31)] = FNMS(TiF, TiI, TiB * TiE); + ci[WS(rs, 31)] = FMA(TiB, TiI, TiF * TiE); + } + { + E TiK, TiM, TiJ, TiL; + TiK = TiC + TiD; + TiM = TiG - TiH; + TiJ = W[124]; + TiL = W[125]; + cr[WS(rs, 63)] = FNMS(TiL, TiM, TiJ * TiK); + ci[WS(rs, 63)] = FMA(TiJ, TiM, TiL * TiK); + } + { + E ThY, Ti0, ThX, ThZ; + ThY = Thu + ThJ; + Ti0 = ThS + ThV; + ThX = W[44]; + ThZ = W[45]; + cr[WS(rs, 23)] = FNMS(ThZ, Ti0, ThX * ThY); + ci[WS(rs, 23)] = FMA(ThZ, ThY, ThX * Ti0); + } + { + E Ti4, Ti8, Ti1, Ti5; + Ti4 = Ti2 - Ti3; + Ti8 = Ti6 - Ti7; + Ti1 = W[76]; + Ti5 = W[77]; + cr[WS(rs, 39)] = FNMS(Ti5, Ti8, Ti1 * Ti4); + ci[WS(rs, 39)] = FMA(Ti1, Ti8, Ti5 * Ti4); + } + { + E Tio, Tiw, Tid, Tip; + Tio = Tig - Tin; + Tiw = Tis - Tiv; + Tid = W[92]; + Tip = W[93]; + cr[WS(rs, 47)] = FNMS(Tip, Tiw, Tid * Tio); + ci[WS(rs, 47)] = FMA(Tip, Tio, Tid * Tiw); + } + { + E Tiy, TiA, Tix, Tiz; + Tiy = Tig + Tin; + TiA = Tis + Tiv; + Tix = W[28]; + Tiz = W[29]; + cr[WS(rs, 15)] = FNMS(Tiz, TiA, Tix * Tiy); + ci[WS(rs, 15)] = FMA(Tiz, Tiy, Tix * TiA); + } + { + E Tia, Tic, Ti9, Tib; + Tia = Ti2 + Ti3; + Tic = Ti6 + Ti7; + Ti9 = W[12]; + Tib = W[13]; + cr[WS(rs, 7)] = FNMS(Tib, Tic, Ti9 * Tia); + ci[WS(rs, 7)] = FMA(Ti9, Tic, Tib * Tia); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 64}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 64, "hb_64", twinstr, &GENUS, {808, 270, 230, 0} }; + +void X(codelet_hb_64) (planner *p) { + X(khc2hc_register) (p, hb_64, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:25 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 7 -dif -name hb_7 -include hb.h */ + +/* + * This function contains 72 FP additions, 66 FP multiplications, + * (or, 18 additions, 12 multiplications, 54 fused multiply/add), + * 67 stack variables, 6 constants, and 28 memory accesses + */ +#include "hb.h" + +static void hb_7(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP974927912, +0.974927912181823607018131682993931217232785801); + DK(KP900968867, +0.900968867902419126236102319507445051165919162); + DK(KP801937735, +0.801937735804838252472204639014890102331838324); + DK(KP692021471, +0.692021471630095869627814897002069140197260599); + DK(KP356895867, +0.356895867892209443894399510021300583399127187); + DK(KP554958132, +0.554958132087371191422194871006410481067288862); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 12); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 12, MAKE_VOLATILE_STRIDE(14, rs)) { + E T1q, T1p, T1t, T1r, T1s, T1u; + { + E T1, T4, TC, T7, TB, Tt, TD, Ta, TA, T1l, TZ, T1b, Th, Tw, Td; + E TP, Ti, Tj, Tl, Tm, T8, T9, T1a; + T1 = cr[0]; + { + E T2, T3, T5, T6; + T2 = cr[WS(rs, 1)]; + T3 = ci[0]; + T5 = cr[WS(rs, 2)]; + T6 = ci[WS(rs, 1)]; + T8 = cr[WS(rs, 3)]; + T4 = T2 + T3; + TC = T2 - T3; + T7 = T5 + T6; + TB = T5 - T6; + T9 = ci[WS(rs, 2)]; + } + Tt = ci[WS(rs, 6)]; + TD = FNMS(KP554958132, TC, TB); + T1a = FNMS(KP356895867, T7, T4); + Ta = T8 + T9; + TA = T8 - T9; + { + E Tf, Tg, Tc, TO; + Tf = ci[WS(rs, 3)]; + Tg = cr[WS(rs, 4)]; + T1l = FMA(KP554958132, TA, TC); + TZ = FMA(KP554958132, TB, TA); + Tc = FNMS(KP356895867, Ta, T7); + TO = FNMS(KP356895867, T4, Ta); + T1b = FNMS(KP692021471, T1a, Ta); + Th = Tf + Tg; + Tw = Tf - Tg; + Td = FNMS(KP692021471, Tc, T4); + TP = FNMS(KP692021471, TO, T7); + } + Ti = ci[WS(rs, 4)]; + Tj = cr[WS(rs, 5)]; + Tl = ci[WS(rs, 5)]; + Tm = cr[WS(rs, 6)]; + { + E Ty, TS, TX, T1j, T1e, Tp, Tk, Tv; + cr[0] = T1 + T4 + T7 + Ta; + Tk = Ti + Tj; + Tv = Ti - Tj; + { + E Tn, Tu, Tx, TR; + Tn = Tl + Tm; + Tu = Tl - Tm; + Tx = FNMS(KP356895867, Tw, Tv); + TR = FMA(KP554958132, Tk, Th); + { + E TW, T1i, T1d, To; + TW = FNMS(KP356895867, Tu, Tw); + T1i = FNMS(KP356895867, Tv, Tu); + T1d = FMA(KP554958132, Th, Tn); + To = FNMS(KP554958132, Tn, Tk); + Ty = FNMS(KP692021471, Tx, Tu); + TS = FNMS(KP801937735, TR, Tn); + TX = FNMS(KP692021471, TW, Tv); + T1j = FNMS(KP692021471, T1i, Tw); + T1e = FMA(KP801937735, T1d, Tk); + Tp = FNMS(KP801937735, To, Th); + ci[0] = Tt + Tu + Tv + Tw; + } + } + { + E TL, TH, TK, TJ, TM, Te, Tz, TE; + Te = FNMS(KP900968867, Td, T1); + Tz = FNMS(KP900968867, Ty, Tt); + TE = FNMS(KP801937735, TD, TA); + { + E Tb, TI, Tq, TF, Ts, Tr, TG; + Tb = W[4]; + TI = FMA(KP974927912, Tp, Te); + Tq = FNMS(KP974927912, Tp, Te); + TL = FNMS(KP974927912, TE, Tz); + TF = FMA(KP974927912, TE, Tz); + Ts = W[5]; + Tr = Tb * Tq; + TH = W[6]; + TK = W[7]; + TG = Ts * Tq; + cr[WS(rs, 3)] = FNMS(Ts, TF, Tr); + TJ = TH * TI; + TM = TK * TI; + ci[WS(rs, 3)] = FMA(Tb, TF, TG); + } + { + E T14, T13, T17, T15, T16; + { + E TY, TT, T10, TQ; + TQ = FNMS(KP900968867, TP, T1); + cr[WS(rs, 4)] = FNMS(TK, TL, TJ); + ci[WS(rs, 4)] = FMA(TH, TL, TM); + TY = FNMS(KP900968867, TX, Tt); + TT = FNMS(KP974927912, TS, TQ); + T14 = FMA(KP974927912, TS, TQ); + T10 = FNMS(KP801937735, TZ, TC); + { + E TN, TV, T11, TU, T12; + TN = W[2]; + TV = W[3]; + T13 = W[8]; + T11 = FMA(KP974927912, T10, TY); + T17 = FNMS(KP974927912, T10, TY); + TU = TN * TT; + T12 = TV * TT; + T15 = T13 * T14; + T16 = W[9]; + cr[WS(rs, 2)] = FNMS(TV, T11, TU); + ci[WS(rs, 2)] = FMA(TN, T11, T12); + } + } + { + E T1k, T1f, T1m, T1c, T18; + T1c = FNMS(KP900968867, T1b, T1); + cr[WS(rs, 5)] = FNMS(T16, T17, T15); + T18 = T16 * T14; + T1k = FNMS(KP900968867, T1j, Tt); + T1f = FNMS(KP974927912, T1e, T1c); + T1q = FMA(KP974927912, T1e, T1c); + ci[WS(rs, 5)] = FMA(T13, T17, T18); + T1m = FMA(KP801937735, T1l, TB); + { + E T19, T1h, T1n, T1g, T1o; + T19 = W[0]; + T1h = W[1]; + T1p = W[10]; + T1t = FNMS(KP974927912, T1m, T1k); + T1n = FMA(KP974927912, T1m, T1k); + T1g = T19 * T1f; + T1o = T1h * T1f; + T1r = T1p * T1q; + T1s = W[11]; + cr[WS(rs, 1)] = FNMS(T1h, T1n, T1g); + ci[WS(rs, 1)] = FMA(T19, T1n, T1o); + } + } + } + } + } + } + cr[WS(rs, 6)] = FNMS(T1s, T1t, T1r); + T1u = T1s * T1q; + ci[WS(rs, 6)] = FMA(T1p, T1t, T1u); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 7}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 7, "hb_7", twinstr, &GENUS, {18, 12, 54, 0} }; + +void X(codelet_hb_7) (planner *p) { + X(khc2hc_register) (p, hb_7, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 7 -dif -name hb_7 -include hb.h */ + +/* + * This function contains 72 FP additions, 60 FP multiplications, + * (or, 36 additions, 24 multiplications, 36 fused multiply/add), + * 36 stack variables, 6 constants, and 28 memory accesses + */ +#include "hb.h" + +static void hb_7(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP222520933, +0.222520933956314404288902564496794759466355569); + DK(KP900968867, +0.900968867902419126236102319507445051165919162); + DK(KP623489801, +0.623489801858733530525004884004239810632274731); + DK(KP781831482, +0.781831482468029808708444526674057750232334519); + DK(KP974927912, +0.974927912181823607018131682993931217232785801); + DK(KP433883739, +0.433883739117558120475768332848358754609990728); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 12); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 12, MAKE_VOLATILE_STRIDE(14, rs)) { + E T1, T4, T7, Ta, Tx, TI, TV, TQ, TE, Tm, Tb, Te, Th, Tk, Tq; + E TF, TR, TU, TJ, Tt; + { + E Tu, Tw, Tv, T2, T3; + T1 = cr[0]; + T2 = cr[WS(rs, 1)]; + T3 = ci[0]; + T4 = T2 + T3; + Tu = T2 - T3; + { + E T5, T6, T8, T9; + T5 = cr[WS(rs, 2)]; + T6 = ci[WS(rs, 1)]; + T7 = T5 + T6; + Tw = T5 - T6; + T8 = cr[WS(rs, 3)]; + T9 = ci[WS(rs, 2)]; + Ta = T8 + T9; + Tv = T8 - T9; + } + Tx = FMA(KP433883739, Tu, KP974927912 * Tv) - (KP781831482 * Tw); + TI = FMA(KP781831482, Tu, KP974927912 * Tw) + (KP433883739 * Tv); + TV = FNMS(KP781831482, Tv, KP974927912 * Tu) - (KP433883739 * Tw); + TQ = FMA(KP623489801, Ta, T1) + FNMA(KP900968867, T7, KP222520933 * T4); + TE = FMA(KP623489801, T4, T1) + FNMA(KP900968867, Ta, KP222520933 * T7); + Tm = FMA(KP623489801, T7, T1) + FNMA(KP222520933, Ta, KP900968867 * T4); + } + { + E Tp, Tn, To, Tc, Td; + Tb = ci[WS(rs, 6)]; + Tc = ci[WS(rs, 5)]; + Td = cr[WS(rs, 6)]; + Te = Tc - Td; + Tp = Tc + Td; + { + E Tf, Tg, Ti, Tj; + Tf = ci[WS(rs, 4)]; + Tg = cr[WS(rs, 5)]; + Th = Tf - Tg; + Tn = Tf + Tg; + Ti = ci[WS(rs, 3)]; + Tj = cr[WS(rs, 4)]; + Tk = Ti - Tj; + To = Ti + Tj; + } + Tq = FNMS(KP974927912, To, KP781831482 * Tn) - (KP433883739 * Tp); + TF = FMA(KP781831482, Tp, KP974927912 * Tn) + (KP433883739 * To); + TR = FMA(KP433883739, Tn, KP781831482 * To) - (KP974927912 * Tp); + TU = FMA(KP623489801, Tk, Tb) + FNMA(KP900968867, Th, KP222520933 * Te); + TJ = FMA(KP623489801, Te, Tb) + FNMA(KP900968867, Tk, KP222520933 * Th); + Tt = FMA(KP623489801, Th, Tb) + FNMA(KP222520933, Tk, KP900968867 * Te); + } + cr[0] = T1 + T4 + T7 + Ta; + ci[0] = Tb + Te + Th + Tk; + { + E Tr, Ty, Tl, Ts; + Tr = Tm - Tq; + Ty = Tt - Tx; + Tl = W[6]; + Ts = W[7]; + cr[WS(rs, 4)] = FNMS(Ts, Ty, Tl * Tr); + ci[WS(rs, 4)] = FMA(Tl, Ty, Ts * Tr); + } + { + E TY, T10, TX, TZ; + TY = TQ + TR; + T10 = TV + TU; + TX = W[2]; + TZ = W[3]; + cr[WS(rs, 2)] = FNMS(TZ, T10, TX * TY); + ci[WS(rs, 2)] = FMA(TX, T10, TZ * TY); + } + { + E TA, TC, Tz, TB; + TA = Tm + Tq; + TC = Tx + Tt; + Tz = W[4]; + TB = W[5]; + cr[WS(rs, 3)] = FNMS(TB, TC, Tz * TA); + ci[WS(rs, 3)] = FMA(Tz, TC, TB * TA); + } + { + E TM, TO, TL, TN; + TM = TE + TF; + TO = TJ - TI; + TL = W[10]; + TN = W[11]; + cr[WS(rs, 6)] = FNMS(TN, TO, TL * TM); + ci[WS(rs, 6)] = FMA(TL, TO, TN * TM); + } + { + E TS, TW, TP, TT; + TS = TQ - TR; + TW = TU - TV; + TP = W[8]; + TT = W[9]; + cr[WS(rs, 5)] = FNMS(TT, TW, TP * TS); + ci[WS(rs, 5)] = FMA(TP, TW, TT * TS); + } + { + E TG, TK, TD, TH; + TG = TE - TF; + TK = TI + TJ; + TD = W[0]; + TH = W[1]; + cr[WS(rs, 1)] = FNMS(TH, TK, TD * TG); + ci[WS(rs, 1)] = FMA(TD, TK, TH * TG); + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 7}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 7, "hb_7", twinstr, &GENUS, {36, 24, 36, 0} }; + +void X(codelet_hb_7) (planner *p) { + X(khc2hc_register) (p, hb_7, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:25 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 8 -dif -name hb_8 -include hb.h */ + +/* + * This function contains 66 FP additions, 36 FP multiplications, + * (or, 44 additions, 14 multiplications, 22 fused multiply/add), + * 52 stack variables, 1 constants, and 32 memory accesses + */ +#include "hb.h" + +static void hb_8(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 14, MAKE_VOLATILE_STRIDE(16, rs)) { + E Tw, TH, Tf, Ty, Tx, TI; + { + E TV, TD, T1i, T7, T1b, T1n, TQ, Tk, Tb, Tl, Ta, T1d, Tt, Tc, Tm; + E Tn; + { + E T4, Tg, T3, T19, TC, T5, Th, Ti; + { + E T1, T2, TA, TB; + T1 = cr[0]; + T2 = ci[WS(rs, 3)]; + TA = ci[WS(rs, 7)]; + TB = cr[WS(rs, 4)]; + T4 = cr[WS(rs, 2)]; + Tg = T1 - T2; + T3 = T1 + T2; + T19 = TA - TB; + TC = TA + TB; + T5 = ci[WS(rs, 1)]; + Th = ci[WS(rs, 5)]; + Ti = cr[WS(rs, 6)]; + } + { + E T8, T9, Tr, Ts; + T8 = cr[WS(rs, 1)]; + { + E Tz, T6, T1a, Tj; + Tz = T4 - T5; + T6 = T4 + T5; + T1a = Th - Ti; + Tj = Th + Ti; + TV = TC - Tz; + TD = Tz + TC; + T1i = T3 - T6; + T7 = T3 + T6; + T1b = T19 + T1a; + T1n = T19 - T1a; + TQ = Tg + Tj; + Tk = Tg - Tj; + T9 = ci[WS(rs, 2)]; + } + Tr = ci[WS(rs, 4)]; + Ts = cr[WS(rs, 7)]; + Tb = ci[0]; + Tl = T8 - T9; + Ta = T8 + T9; + T1d = Tr - Ts; + Tt = Tr + Ts; + Tc = cr[WS(rs, 3)]; + Tm = ci[WS(rs, 6)]; + Tn = cr[WS(rs, 5)]; + } + } + { + E Te, T1e, Tv, TG, T13, T1k, T1s, T10, T1p, T1v, T1u, T1w, T1t; + { + E TP, T1o, T1j, TR, TU, TX, TW; + TP = W[4]; + { + E Tq, Td, T1c, To; + Tq = Tb - Tc; + Td = Tb + Tc; + T1c = Tm - Tn; + To = Tm + Tn; + { + E Tu, TF, Tp, TE; + Tu = Tq - Tt; + TF = Tq + Tt; + T1o = Ta - Td; + Te = Ta + Td; + T1j = T1d - T1c; + T1e = T1c + T1d; + Tp = Tl - To; + TE = Tl + To; + cr[0] = T7 + Te; + ci[0] = T1b + T1e; + TW = Tp - Tu; + Tv = Tp + Tu; + TR = TE + TF; + TG = TE - TF; + } + } + TU = W[5]; + TX = FMA(KP707106781, TW, TV); + T13 = FNMS(KP707106781, TW, TV); + { + E TS, TY, T1r, TT; + T1k = T1i - T1j; + T1s = T1i + T1j; + TS = FNMS(KP707106781, TR, TQ); + T10 = FMA(KP707106781, TR, TQ); + T1p = T1n - T1o; + T1v = T1o + T1n; + TY = TP * TX; + T1r = W[2]; + TT = TP * TS; + T1u = W[3]; + ci[WS(rs, 3)] = FMA(TU, TS, TY); + T1w = T1r * T1v; + T1t = T1r * T1s; + cr[WS(rs, 3)] = FNMS(TU, TX, TT); + } + } + { + E T1f, T15, T18, T17, T1g, T1h, T1m; + { + E TZ, T12, T16, T14, T11; + ci[WS(rs, 2)] = FMA(T1u, T1s, T1w); + cr[WS(rs, 2)] = FNMS(T1u, T1v, T1t); + TZ = W[12]; + T12 = W[13]; + T1f = T1b - T1e; + T16 = T7 - Te; + T14 = TZ * T13; + T11 = TZ * T10; + T15 = W[6]; + T18 = W[7]; + ci[WS(rs, 7)] = FMA(T12, T10, T14); + cr[WS(rs, 7)] = FNMS(T12, T13, T11); + T17 = T15 * T16; + T1g = T18 * T16; + } + cr[WS(rs, 4)] = FNMS(T18, T1f, T17); + ci[WS(rs, 4)] = FMA(T15, T1f, T1g); + T1h = W[10]; + T1m = W[11]; + { + E TN, TJ, TM, TL, TO, TK, T1q, T1l; + Tw = FNMS(KP707106781, Tv, Tk); + TK = FMA(KP707106781, Tv, Tk); + T1q = T1h * T1p; + T1l = T1h * T1k; + TN = FMA(KP707106781, TG, TD); + TH = FNMS(KP707106781, TG, TD); + ci[WS(rs, 6)] = FMA(T1m, T1k, T1q); + cr[WS(rs, 6)] = FNMS(T1m, T1p, T1l); + TJ = W[0]; + TM = W[1]; + Tf = W[8]; + TL = TJ * TK; + TO = TM * TK; + Ty = W[9]; + Tx = Tf * Tw; + cr[WS(rs, 1)] = FNMS(TM, TN, TL); + ci[WS(rs, 1)] = FMA(TJ, TN, TO); + } + } + } + } + cr[WS(rs, 5)] = FNMS(Ty, TH, Tx); + TI = Ty * Tw; + ci[WS(rs, 5)] = FMA(Tf, TH, TI); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 8}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 8, "hb_8", twinstr, &GENUS, {44, 14, 22, 0} }; + +void X(codelet_hb_8) (planner *p) { + X(khc2hc_register) (p, hb_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 8 -dif -name hb_8 -include hb.h */ + +/* + * This function contains 66 FP additions, 32 FP multiplications, + * (or, 52 additions, 18 multiplications, 14 fused multiply/add), + * 30 stack variables, 1 constants, and 32 memory accesses + */ +#include "hb.h" + +static void hb_8(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 14, MAKE_VOLATILE_STRIDE(16, rs)) { + E T7, T18, T1c, To, Ty, TM, TY, TC, Te, TZ, T10, Tv, Tz, TP, TS; + E TD; + { + E T3, TK, Tn, TL, T6, TW, Tk, TX; + { + E T1, T2, Tl, Tm; + T1 = cr[0]; + T2 = ci[WS(rs, 3)]; + T3 = T1 + T2; + TK = T1 - T2; + Tl = ci[WS(rs, 5)]; + Tm = cr[WS(rs, 6)]; + Tn = Tl - Tm; + TL = Tl + Tm; + } + { + E T4, T5, Ti, Tj; + T4 = cr[WS(rs, 2)]; + T5 = ci[WS(rs, 1)]; + T6 = T4 + T5; + TW = T4 - T5; + Ti = ci[WS(rs, 7)]; + Tj = cr[WS(rs, 4)]; + Tk = Ti - Tj; + TX = Ti + Tj; + } + T7 = T3 + T6; + T18 = TK + TL; + T1c = TX - TW; + To = Tk + Tn; + Ty = T3 - T6; + TM = TK - TL; + TY = TW + TX; + TC = Tk - Tn; + } + { + E Ta, TN, Tu, TR, Td, TQ, Tr, TO; + { + E T8, T9, Ts, Tt; + T8 = cr[WS(rs, 1)]; + T9 = ci[WS(rs, 2)]; + Ta = T8 + T9; + TN = T8 - T9; + Ts = ci[WS(rs, 4)]; + Tt = cr[WS(rs, 7)]; + Tu = Ts - Tt; + TR = Ts + Tt; + } + { + E Tb, Tc, Tp, Tq; + Tb = ci[0]; + Tc = cr[WS(rs, 3)]; + Td = Tb + Tc; + TQ = Tb - Tc; + Tp = ci[WS(rs, 6)]; + Tq = cr[WS(rs, 5)]; + Tr = Tp - Tq; + TO = Tp + Tq; + } + Te = Ta + Td; + TZ = TN + TO; + T10 = TQ + TR; + Tv = Tr + Tu; + Tz = Tu - Tr; + TP = TN - TO; + TS = TQ - TR; + TD = Ta - Td; + } + cr[0] = T7 + Te; + ci[0] = To + Tv; + { + E Tg, Tw, Tf, Th; + Tg = T7 - Te; + Tw = To - Tv; + Tf = W[6]; + Th = W[7]; + cr[WS(rs, 4)] = FNMS(Th, Tw, Tf * Tg); + ci[WS(rs, 4)] = FMA(Th, Tg, Tf * Tw); + } + { + E TG, TI, TF, TH; + TG = Ty + Tz; + TI = TD + TC; + TF = W[2]; + TH = W[3]; + cr[WS(rs, 2)] = FNMS(TH, TI, TF * TG); + ci[WS(rs, 2)] = FMA(TF, TI, TH * TG); + } + { + E TA, TE, Tx, TB; + TA = Ty - Tz; + TE = TC - TD; + Tx = W[10]; + TB = W[11]; + cr[WS(rs, 6)] = FNMS(TB, TE, Tx * TA); + ci[WS(rs, 6)] = FMA(Tx, TE, TB * TA); + } + { + E T1a, T1g, T1e, T1i, T19, T1d; + T19 = KP707106781 * (TZ + T10); + T1a = T18 - T19; + T1g = T18 + T19; + T1d = KP707106781 * (TP - TS); + T1e = T1c + T1d; + T1i = T1c - T1d; + { + E T17, T1b, T1f, T1h; + T17 = W[4]; + T1b = W[5]; + cr[WS(rs, 3)] = FNMS(T1b, T1e, T17 * T1a); + ci[WS(rs, 3)] = FMA(T17, T1e, T1b * T1a); + T1f = W[12]; + T1h = W[13]; + cr[WS(rs, 7)] = FNMS(T1h, T1i, T1f * T1g); + ci[WS(rs, 7)] = FMA(T1f, T1i, T1h * T1g); + } + } + { + E TU, T14, T12, T16, TT, T11; + TT = KP707106781 * (TP + TS); + TU = TM - TT; + T14 = TM + TT; + T11 = KP707106781 * (TZ - T10); + T12 = TY - T11; + T16 = TY + T11; + { + E TJ, TV, T13, T15; + TJ = W[8]; + TV = W[9]; + cr[WS(rs, 5)] = FNMS(TV, T12, TJ * TU); + ci[WS(rs, 5)] = FMA(TV, TU, TJ * T12); + T13 = W[0]; + T15 = W[1]; + cr[WS(rs, 1)] = FNMS(T15, T16, T13 * T14); + ci[WS(rs, 1)] = FMA(T15, T14, T13 * T16); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 8}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 8, "hb_8", twinstr, &GENUS, {52, 18, 14, 0} }; + +void X(codelet_hb_8) (planner *p) { + X(khc2hc_register) (p, hb_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hb_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,490 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:26 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 9 -dif -name hb_9 -include hb.h */ + +/* + * This function contains 96 FP additions, 88 FP multiplications, + * (or, 24 additions, 16 multiplications, 72 fused multiply/add), + * 69 stack variables, 10 constants, and 36 memory accesses + */ +#include "hb.h" + +static void hb_9(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP954188894, +0.954188894138671133499268364187245676532219158); + DK(KP852868531, +0.852868531952443209628250963940074071936020296); + DK(KP492403876, +0.492403876506104029683371512294761506835321626); + DK(KP984807753, +0.984807753012208059366743024589523013670643252); + DK(KP777861913, +0.777861913430206160028177977318626690410586096); + DK(KP839099631, +0.839099631177280011763127298123181364687434283); + DK(KP363970234, +0.363970234266202361351047882776834043890471784); + DK(KP176326980, +0.176326980708464973471090386868618986121633062); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 16); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) { + E T1X, T1S, T1U, T1P, T1Y, T1T; + { + E T5, Tl, TQ, T1y, T1b, T1J, Tg, TE, TW, T13, T10, Tz, Tw, TT, T1K; + E T1B, T1L, T1E; + { + E T1, Th, T2, T3, Ti, Tj; + T1 = cr[0]; + Th = ci[WS(rs, 8)]; + T2 = cr[WS(rs, 3)]; + T3 = ci[WS(rs, 2)]; + Ti = ci[WS(rs, 5)]; + Tj = cr[WS(rs, 6)]; + { + E T12, Tb, TZ, TY, Ta, Tq, T11, Tr, Ts, TS, Te, Tt; + { + E T6, Tm, Tn, To, T9, Tc, Td, Tp; + { + E T7, T8, T1a, T4; + T6 = cr[WS(rs, 1)]; + T1a = T2 - T3; + T4 = T2 + T3; + { + E TP, Tk, TO, T19; + TP = Ti + Tj; + Tk = Ti - Tj; + T7 = cr[WS(rs, 4)]; + T5 = T1 + T4; + TO = FNMS(KP500000000, T4, T1); + Tl = Th + Tk; + T19 = FNMS(KP500000000, Tk, Th); + TQ = FNMS(KP866025403, TP, TO); + T1y = FMA(KP866025403, TP, TO); + T1b = FMA(KP866025403, T1a, T19); + T1J = FNMS(KP866025403, T1a, T19); + T8 = ci[WS(rs, 1)]; + } + Tm = ci[WS(rs, 7)]; + Tn = ci[WS(rs, 4)]; + To = cr[WS(rs, 7)]; + T9 = T7 + T8; + T12 = T7 - T8; + } + Tb = cr[WS(rs, 2)]; + TZ = Tn + To; + Tp = Tn - To; + TY = FNMS(KP500000000, T9, T6); + Ta = T6 + T9; + Tc = ci[WS(rs, 3)]; + Td = ci[0]; + Tq = Tm + Tp; + T11 = FMS(KP500000000, Tp, Tm); + Tr = ci[WS(rs, 6)]; + Ts = cr[WS(rs, 5)]; + TS = Td - Tc; + Te = Tc + Td; + Tt = cr[WS(rs, 8)]; + } + { + E T1C, Tv, TR, T1D, T1z, T1A; + { + E TU, Tu, TV, Tf; + TU = FNMS(KP500000000, Te, Tb); + Tf = Tb + Te; + Tu = Ts + Tt; + TV = Ts - Tt; + Tg = Ta + Tf; + TE = Ta - Tf; + TW = FMA(KP866025403, TV, TU); + T1C = FNMS(KP866025403, TV, TU); + Tv = Tr - Tu; + TR = FMA(KP500000000, Tu, Tr); + } + T1z = FMA(KP866025403, T12, T11); + T13 = FNMS(KP866025403, T12, T11); + T10 = FNMS(KP866025403, TZ, TY); + T1A = FMA(KP866025403, TZ, TY); + Tz = Tv - Tq; + Tw = Tq + Tv; + T1D = FMA(KP866025403, TS, TR); + TT = FNMS(KP866025403, TS, TR); + T1K = FNMS(KP176326980, T1z, T1A); + T1B = FMA(KP176326980, T1A, T1z); + T1L = FNMS(KP363970234, T1C, T1D); + T1E = FMA(KP363970234, T1D, T1C); + } + } + } + { + E T1d, T14, T1c, TX; + cr[0] = T5 + Tg; + T1d = FNMS(KP839099631, T10, T13); + T14 = FMA(KP839099631, T13, T10); + T1c = FMA(KP176326980, TT, TW); + TX = FNMS(KP176326980, TW, TT); + ci[0] = Tl + Tw; + { + E TL, TK, TJ, Ty, TD; + Ty = FNMS(KP500000000, Tg, T5); + TD = FNMS(KP500000000, Tw, Tl); + { + E Tx, TC, TA, TI, TF; + Tx = W[10]; + TC = W[11]; + TA = FNMS(KP866025403, Tz, Ty); + TI = FMA(KP866025403, Tz, Ty); + TF = FNMS(KP866025403, TE, TD); + TL = FMA(KP866025403, TE, TD); + { + E TH, TB, TG, TM; + TH = W[4]; + TB = Tx * TA; + TK = W[5]; + TG = Tx * TF; + TM = TH * TL; + TJ = TH * TI; + cr[WS(rs, 6)] = FNMS(TC, TF, TB); + ci[WS(rs, 6)] = FMA(TC, TA, TG); + ci[WS(rs, 3)] = FMA(TK, TI, TM); + } + } + cr[WS(rs, 3)] = FNMS(TK, TL, TJ); + { + E T1k, T1p, T1l, T1q, T1m; + { + E T1e, T1j, T15, T1o; + T1e = FNMS(KP777861913, T1d, T1c); + T1j = FMA(KP777861913, T1d, T1c); + T15 = FNMS(KP777861913, T14, TX); + T1o = FMA(KP777861913, T14, TX); + { + E TN, T16, T1f, T17, T1s, T1v, T18, T1i, T1n, T1r, T1u; + TN = W[0]; + T16 = FNMS(KP984807753, T15, TQ); + T1i = FMA(KP492403876, T15, TQ); + T1f = FMA(KP984807753, T1e, T1b); + T1n = FNMS(KP492403876, T1e, T1b); + T17 = TN * T16; + T1s = FMA(KP852868531, T1j, T1i); + T1k = FNMS(KP852868531, T1j, T1i); + T1v = FMA(KP852868531, T1o, T1n); + T1p = FNMS(KP852868531, T1o, T1n); + T18 = W[1]; + T1r = W[6]; + T1u = W[7]; + { + E T1h, T1g, T1w, T1t; + T1h = W[12]; + cr[WS(rs, 1)] = FNMS(T18, T1f, T17); + T1g = T18 * T16; + T1w = T1r * T1v; + T1t = T1r * T1s; + T1l = T1h * T1k; + ci[WS(rs, 1)] = FMA(TN, T1f, T1g); + ci[WS(rs, 4)] = FMA(T1u, T1s, T1w); + cr[WS(rs, 4)] = FNMS(T1u, T1v, T1t); + T1q = T1h * T1p; + } + T1m = W[13]; + } + } + { + E T1F, T1W, T1R, T1V, T1N, T1M, T1x, T1I; + T1F = FNMS(KP954188894, T1E, T1B); + T1W = FMA(KP954188894, T1E, T1B); + T1M = FNMS(KP954188894, T1L, T1K); + T1R = FMA(KP954188894, T1L, T1K); + ci[WS(rs, 7)] = FMA(T1m, T1k, T1q); + cr[WS(rs, 7)] = FNMS(T1m, T1p, T1l); + T1V = FNMS(KP492403876, T1M, T1J); + T1N = FMA(KP984807753, T1M, T1J); + T1x = W[2]; + T1I = W[3]; + { + E T23, T22, T20, T1Z, T24, T21; + T1X = FMA(KP852868531, T1W, T1V); + T23 = FNMS(KP852868531, T1W, T1V); + { + E T1G, T1Q, T1O, T1H; + T1G = FMA(KP984807753, T1F, T1y); + T1Q = FNMS(KP492403876, T1F, T1y); + T1O = T1x * T1N; + T22 = W[15]; + T1H = T1x * T1G; + T20 = FMA(KP852868531, T1R, T1Q); + T1S = FNMS(KP852868531, T1R, T1Q); + ci[WS(rs, 2)] = FMA(T1I, T1G, T1O); + cr[WS(rs, 2)] = FNMS(T1I, T1N, T1H); + T1Z = W[14]; + T24 = T22 * T20; + } + T1U = W[9]; + T21 = T1Z * T20; + ci[WS(rs, 8)] = FMA(T1Z, T23, T24); + T1P = W[8]; + T1Y = T1U * T1S; + cr[WS(rs, 8)] = FNMS(T22, T23, T21); + } + } + } + } + } + } + T1T = T1P * T1S; + ci[WS(rs, 5)] = FMA(T1P, T1X, T1Y); + cr[WS(rs, 5)] = FNMS(T1U, T1X, T1T); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 9}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 9, "hb_9", twinstr, &GENUS, {24, 16, 72, 0} }; + +void X(codelet_hb_9) (planner *p) { + X(khc2hc_register) (p, hb_9, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 9 -dif -name hb_9 -include hb.h */ + +/* + * This function contains 96 FP additions, 72 FP multiplications, + * (or, 60 additions, 36 multiplications, 36 fused multiply/add), + * 53 stack variables, 8 constants, and 36 memory accesses + */ +#include "hb.h" + +static void hb_9(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP984807753, +0.984807753012208059366743024589523013670643252); + DK(KP173648177, +0.173648177666930348851716626769314796000375677); + DK(KP342020143, +0.342020143325668733044099614682259580763083368); + DK(KP939692620, +0.939692620785908384054109277324731469936208134); + DK(KP642787609, +0.642787609686539326322643409907263432907559884); + DK(KP766044443, +0.766044443118978035202392650555416673935832457); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 16); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) { + E T5, Tl, TM, T1o, T16, T1y, Ta, Tf, Tg, Tq, Tv, Tw, TT, T17, T1u; + E T1A, T1r, T1z, T10, T18; + { + E T1, Th, T4, T14, Tk, TL, TK, T15; + T1 = cr[0]; + Th = ci[WS(rs, 8)]; + { + E T2, T3, Ti, Tj; + T2 = cr[WS(rs, 3)]; + T3 = ci[WS(rs, 2)]; + T4 = T2 + T3; + T14 = KP866025403 * (T2 - T3); + Ti = ci[WS(rs, 5)]; + Tj = cr[WS(rs, 6)]; + Tk = Ti - Tj; + TL = KP866025403 * (Ti + Tj); + } + T5 = T1 + T4; + Tl = Th + Tk; + TK = FNMS(KP500000000, T4, T1); + TM = TK - TL; + T1o = TK + TL; + T15 = FNMS(KP500000000, Tk, Th); + T16 = T14 + T15; + T1y = T15 - T14; + } + { + E T6, T9, TN, TQ, Tm, Tp, TO, TR, Tb, Te, TU, TX, Tr, Tu, TV; + E TY; + { + E T7, T8, Tn, To; + T6 = cr[WS(rs, 1)]; + T7 = cr[WS(rs, 4)]; + T8 = ci[WS(rs, 1)]; + T9 = T7 + T8; + TN = FNMS(KP500000000, T9, T6); + TQ = KP866025403 * (T7 - T8); + Tm = ci[WS(rs, 7)]; + Tn = ci[WS(rs, 4)]; + To = cr[WS(rs, 7)]; + Tp = Tn - To; + TO = KP866025403 * (Tn + To); + TR = FNMS(KP500000000, Tp, Tm); + } + { + E Tc, Td, Ts, Tt; + Tb = cr[WS(rs, 2)]; + Tc = ci[WS(rs, 3)]; + Td = ci[0]; + Te = Tc + Td; + TU = FNMS(KP500000000, Te, Tb); + TX = KP866025403 * (Tc - Td); + Tr = ci[WS(rs, 6)]; + Ts = cr[WS(rs, 5)]; + Tt = cr[WS(rs, 8)]; + Tu = Ts + Tt; + TV = KP866025403 * (Ts - Tt); + TY = FMA(KP500000000, Tu, Tr); + } + { + E TP, TS, T1s, T1t; + Ta = T6 + T9; + Tf = Tb + Te; + Tg = Ta + Tf; + Tq = Tm + Tp; + Tv = Tr - Tu; + Tw = Tq + Tv; + TP = TN - TO; + TS = TQ + TR; + TT = FNMS(KP642787609, TS, KP766044443 * TP); + T17 = FMA(KP766044443, TS, KP642787609 * TP); + T1s = TU - TV; + T1t = TY - TX; + T1u = FMA(KP939692620, T1s, KP342020143 * T1t); + T1A = FNMS(KP939692620, T1t, KP342020143 * T1s); + { + E T1p, T1q, TW, TZ; + T1p = TN + TO; + T1q = TR - TQ; + T1r = FNMS(KP984807753, T1q, KP173648177 * T1p); + T1z = FMA(KP173648177, T1q, KP984807753 * T1p); + TW = TU + TV; + TZ = TX + TY; + T10 = FNMS(KP984807753, TZ, KP173648177 * TW); + T18 = FMA(KP984807753, TW, KP173648177 * TZ); + } + } + } + cr[0] = T5 + Tg; + ci[0] = Tl + Tw; + { + E TA, TG, TE, TI; + { + E Ty, Tz, TC, TD; + Ty = FNMS(KP500000000, Tg, T5); + Tz = KP866025403 * (Tv - Tq); + TA = Ty - Tz; + TG = Ty + Tz; + TC = FNMS(KP500000000, Tw, Tl); + TD = KP866025403 * (Ta - Tf); + TE = TC - TD; + TI = TD + TC; + } + { + E Tx, TB, TF, TH; + Tx = W[10]; + TB = W[11]; + cr[WS(rs, 6)] = FNMS(TB, TE, Tx * TA); + ci[WS(rs, 6)] = FMA(Tx, TE, TB * TA); + TF = W[4]; + TH = W[5]; + cr[WS(rs, 3)] = FNMS(TH, TI, TF * TG); + ci[WS(rs, 3)] = FMA(TF, TI, TH * TG); + } + } + { + E T1d, T1h, T12, T1c, T1a, T1g, T11, T19, TJ, T13; + T1d = KP866025403 * (T18 - T17); + T1h = KP866025403 * (TT - T10); + T11 = TT + T10; + T12 = TM + T11; + T1c = FNMS(KP500000000, T11, TM); + T19 = T17 + T18; + T1a = T16 + T19; + T1g = FNMS(KP500000000, T19, T16); + TJ = W[0]; + T13 = W[1]; + cr[WS(rs, 1)] = FNMS(T13, T1a, TJ * T12); + ci[WS(rs, 1)] = FMA(T13, T12, TJ * T1a); + { + E T1k, T1m, T1j, T1l; + T1k = T1c + T1d; + T1m = T1h + T1g; + T1j = W[6]; + T1l = W[7]; + cr[WS(rs, 4)] = FNMS(T1l, T1m, T1j * T1k); + ci[WS(rs, 4)] = FMA(T1j, T1m, T1l * T1k); + } + { + E T1e, T1i, T1b, T1f; + T1e = T1c - T1d; + T1i = T1g - T1h; + T1b = W[12]; + T1f = W[13]; + cr[WS(rs, 7)] = FNMS(T1f, T1i, T1b * T1e); + ci[WS(rs, 7)] = FMA(T1b, T1i, T1f * T1e); + } + } + { + E T1F, T1J, T1w, T1E, T1C, T1I, T1v, T1B, T1n, T1x; + T1F = KP866025403 * (T1A - T1z); + T1J = KP866025403 * (T1r + T1u); + T1v = T1r - T1u; + T1w = T1o + T1v; + T1E = FNMS(KP500000000, T1v, T1o); + T1B = T1z + T1A; + T1C = T1y + T1B; + T1I = FNMS(KP500000000, T1B, T1y); + T1n = W[2]; + T1x = W[3]; + cr[WS(rs, 2)] = FNMS(T1x, T1C, T1n * T1w); + ci[WS(rs, 2)] = FMA(T1n, T1C, T1x * T1w); + { + E T1M, T1O, T1L, T1N; + T1M = T1F + T1E; + T1O = T1I + T1J; + T1L = W[8]; + T1N = W[9]; + cr[WS(rs, 5)] = FNMS(T1N, T1O, T1L * T1M); + ci[WS(rs, 5)] = FMA(T1N, T1M, T1L * T1O); + } + { + E T1G, T1K, T1D, T1H; + T1G = T1E - T1F; + T1K = T1I - T1J; + T1D = W[14]; + T1H = W[15]; + cr[WS(rs, 8)] = FNMS(T1H, T1K, T1D * T1G); + ci[WS(rs, 8)] = FMA(T1H, T1G, T1D * T1K); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 9}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 9, "hb_9", twinstr, &GENUS, {60, 36, 36, 0} }; + +void X(codelet_hb_9) (planner *p) { + X(khc2hc_register) (p, hb_9, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb2_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb2_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,840 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:42 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 16 -dif -name hc2cb2_16 -include hc2cb.h */ + +/* + * This function contains 196 FP additions, 134 FP multiplications, + * (or, 104 additions, 42 multiplications, 92 fused multiply/add), + * 112 stack variables, 3 constants, and 64 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb2_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) { + E Tv, TB, TF, Ty, T1J, T1O, T1N, T1K; + { + E Tw, T2z, T2C, Tx, T3f, T3l, T2F, T3r, Tz; + Tv = W[0]; + Tw = W[2]; + T2z = W[6]; + T2C = W[7]; + TB = W[4]; + Tx = Tv * Tw; + T3f = Tv * T2z; + T3l = Tv * T2C; + T2F = Tv * TB; + T3r = Tw * TB; + TF = W[5]; + Ty = W[1]; + Tz = W[3]; + { + E T2G, T3z, T3m, T3g, T3L, T3s, T1V, TA, T3w, T3Q, T30, T3C, TE, T1X, T1D; + E TG, T1G, T1o, T2p, T1Y, T2u, T2c, T1Z, TL, T1t, T2d, T3n, T35, T3R, T3F; + E T1w, T20, T3M, Tf, T3h, T2L, T2e, TW, T2Q, T36, T3I, T3N, T2V, T37, T1d; + E Tu, T3S, T18, T1z, T1i, T24, T2g, T27, T2h; + { + E T2K, TQ, TV, T2H; + { + E TH, T3, T32, T1s, T1p, T6, T33, TK, TM, Ta, TS, T2J, TP, TR, Td; + E TT, TI, TJ; + { + E T1q, T1r, T4, T5; + { + E T1, T1n, TC, T2b, T1W, T2, T3v, T2Z, TD; + T1 = Rp[0]; + T3v = Tw * TF; + T2Z = Tv * TF; + T2G = FNMS(Ty, TF, T2F); + T3z = FMA(Ty, TF, T2F); + T3m = FNMS(Ty, T2z, T3l); + T3g = FMA(Ty, T2C, T3f); + T3L = FNMS(Tz, TF, T3r); + T3s = FMA(Tz, TF, T3r); + T1V = FMA(Ty, Tz, Tx); + TA = FNMS(Ty, Tz, Tx); + TD = Tv * Tz; + T3w = FNMS(Tz, TB, T3v); + T3Q = FMA(Tz, TB, T3v); + T30 = FMA(Ty, TB, T2Z); + T3C = FNMS(Ty, TB, T2Z); + T1n = TA * TF; + TC = TA * TB; + T2b = T1V * TF; + T1W = T1V * TB; + TE = FMA(Ty, Tw, TD); + T1X = FNMS(Ty, Tw, TD); + T2 = Rm[WS(rs, 7)]; + T1q = Ip[0]; + T1D = FMA(TE, TF, TC); + TG = FNMS(TE, TF, TC); + T1G = FNMS(TE, TB, T1n); + T1o = FMA(TE, TB, T1n); + T2p = FMA(T1X, TF, T1W); + T1Y = FNMS(T1X, TF, T1W); + T2u = FNMS(T1X, TB, T2b); + T2c = FMA(T1X, TB, T2b); + TH = T1 - T2; + T3 = T1 + T2; + T1r = Im[WS(rs, 7)]; + } + T4 = Rp[WS(rs, 4)]; + T5 = Rm[WS(rs, 3)]; + TI = Ip[WS(rs, 4)]; + T32 = T1q - T1r; + T1s = T1q + T1r; + T1p = T4 - T5; + T6 = T4 + T5; + TJ = Im[WS(rs, 3)]; + } + { + E TN, TO, T8, T9, Tb, Tc; + T8 = Rp[WS(rs, 2)]; + T9 = Rm[WS(rs, 5)]; + TN = Ip[WS(rs, 2)]; + T33 = TI - TJ; + TK = TI + TJ; + TM = T8 - T9; + Ta = T8 + T9; + TO = Im[WS(rs, 5)]; + Tb = Rm[WS(rs, 1)]; + Tc = Rp[WS(rs, 6)]; + TS = Ip[WS(rs, 6)]; + T2J = TN - TO; + TP = TN + TO; + TR = Tb - Tc; + Td = Tb + Tc; + TT = Im[WS(rs, 1)]; + } + { + E T2I, TU, Te, T31, T34, T3D; + T1Z = TH + TK; + TL = TH - TK; + T1t = T1p + T1s; + T2d = T1s - T1p; + T2I = TS - TT; + TU = TS + TT; + Te = Ta + Td; + T31 = Ta - Td; + T34 = T32 - T33; + T3D = T32 + T33; + { + E T1u, T1v, T3E, T7; + T3E = T2J + T2I; + T2K = T2I - T2J; + TQ = TM - TP; + T1u = TM + TP; + T3n = T34 - T31; + T35 = T31 + T34; + TV = TR - TU; + T1v = TR + TU; + T3R = T3D - T3E; + T3F = T3D + T3E; + T2H = T3 - T6; + T7 = T3 + T6; + T1w = T1u - T1v; + T20 = T1u + T1v; + T3M = T7 - Te; + Tf = T7 + Te; + } + } + } + { + E T1e, Ti, T2N, T1c, T19, Tl, T2O, T1h, Tq, T13, Tp, T2S, T11, Tr, T14; + E T15; + { + E Tj, Tk, T1f, T1g; + { + E Tg, Th, T1a, T1b; + Tg = Rp[WS(rs, 1)]; + T3h = T2H - T2K; + T2L = T2H + T2K; + T2e = TQ - TV; + TW = TQ + TV; + Th = Rm[WS(rs, 6)]; + T1a = Ip[WS(rs, 1)]; + T1b = Im[WS(rs, 6)]; + Tj = Rp[WS(rs, 5)]; + T1e = Tg - Th; + Ti = Tg + Th; + T2N = T1a - T1b; + T1c = T1a + T1b; + Tk = Rm[WS(rs, 2)]; + T1f = Ip[WS(rs, 5)]; + T1g = Im[WS(rs, 2)]; + } + { + E Tn, To, TZ, T10; + Tn = Rm[0]; + T19 = Tj - Tk; + Tl = Tj + Tk; + T2O = T1f - T1g; + T1h = T1f + T1g; + To = Rp[WS(rs, 7)]; + TZ = Ip[WS(rs, 7)]; + T10 = Im[0]; + Tq = Rp[WS(rs, 3)]; + T13 = Tn - To; + Tp = Tn + To; + T2S = TZ - T10; + T11 = TZ + T10; + Tr = Rm[WS(rs, 4)]; + T14 = Ip[WS(rs, 3)]; + T15 = Im[WS(rs, 4)]; + } + } + { + E TY, T16, Tm, Tt; + { + E T2P, T3G, Ts, T2M, T3H, T2U, T2T, T2R; + T2P = T2N - T2O; + T3G = T2N + T2O; + TY = Tq - Tr; + Ts = Tq + Tr; + T2T = T14 - T15; + T16 = T14 + T15; + T2M = Ti - Tl; + Tm = Ti + Tl; + T3H = T2S + T2T; + T2U = T2S - T2T; + Tt = Tp + Ts; + T2R = Tp - Ts; + T2Q = T2M - T2P; + T36 = T2M + T2P; + T3I = T3G + T3H; + T3N = T3H - T3G; + T2V = T2R + T2U; + T37 = T2U - T2R; + } + { + E T25, T26, T22, T23, T12, T17; + T12 = TY - T11; + T25 = TY + T11; + T26 = T13 + T16; + T17 = T13 - T16; + T22 = T1c - T19; + T1d = T19 + T1c; + Tu = Tm + Tt; + T3S = Tm - Tt; + T18 = FNMS(KP414213562, T17, T12); + T1z = FMA(KP414213562, T12, T17); + T1i = T1e - T1h; + T23 = T1e + T1h; + T24 = FNMS(KP414213562, T23, T22); + T2g = FMA(KP414213562, T22, T23); + T27 = FNMS(KP414213562, T26, T25); + T2h = FMA(KP414213562, T25, T26); + } + } + } + } + { + E T1j, T1y, T3V, T3X, T3W, T38, T3i, T3o, T2W, T3K, T3B, T3A; + Rp[0] = Tf + Tu; + T3A = Tf - Tu; + T1j = FMA(KP414213562, T1i, T1d); + T1y = FNMS(KP414213562, T1d, T1i); + T3K = T3C * T3A; + T3B = T3z * T3A; + { + E T3O, T3T, T3J, T3P, T3U; + T3O = T3M - T3N; + T3V = T3M + T3N; + T3X = T3S + T3R; + T3T = T3R - T3S; + Rm[0] = T3F + T3I; + T3J = T3F - T3I; + T3P = T3L * T3O; + T3U = T3L * T3T; + T3W = TA * T3V; + Rp[WS(rs, 4)] = FNMS(T3C, T3J, T3B); + Rm[WS(rs, 4)] = FMA(T3z, T3J, T3K); + Rp[WS(rs, 6)] = FNMS(T3Q, T3T, T3P); + Rm[WS(rs, 6)] = FMA(T3Q, T3O, T3U); + T38 = T36 + T37; + T3i = T37 - T36; + T3o = T2Q - T2V; + T2W = T2Q + T2V; + } + { + E T2q, T21, T28, T2w, T2v, T2f, T2i, T2r; + { + E T2Y, T3a, T3c, T3d, T39, T3e, T3b, T2X, T3Y; + Rp[WS(rs, 2)] = FNMS(TE, T3X, T3W); + T3Y = TA * T3X; + { + E T3t, T3j, T3x, T3p; + T3t = FMA(KP707106781, T3i, T3h); + T3j = FNMS(KP707106781, T3i, T3h); + T3x = FMA(KP707106781, T3o, T3n); + T3p = FNMS(KP707106781, T3o, T3n); + Rm[WS(rs, 2)] = FMA(TE, T3V, T3Y); + { + E T3u, T3k, T3y, T3q; + T3u = T3s * T3t; + T3k = T3g * T3j; + T3y = T3s * T3x; + T3q = T3g * T3p; + Rp[WS(rs, 3)] = FNMS(T3w, T3x, T3u); + Rp[WS(rs, 7)] = FNMS(T3m, T3p, T3k); + Rm[WS(rs, 3)] = FMA(T3w, T3t, T3y); + Rm[WS(rs, 7)] = FMA(T3m, T3j, T3q); + T3b = FMA(KP707106781, T2W, T2L); + T2X = FNMS(KP707106781, T2W, T2L); + } + } + T2Y = T2G * T2X; + T3a = T30 * T2X; + T3c = T1V * T3b; + T3d = FMA(KP707106781, T38, T35); + T39 = FNMS(KP707106781, T38, T35); + T3e = T1X * T3b; + T2q = FMA(KP707106781, T20, T1Z); + T21 = FNMS(KP707106781, T20, T1Z); + Rp[WS(rs, 1)] = FNMS(T1X, T3d, T3c); + Rm[WS(rs, 5)] = FMA(T2G, T39, T3a); + Rp[WS(rs, 5)] = FNMS(T30, T39, T2Y); + Rm[WS(rs, 1)] = FMA(T1V, T3d, T3e); + T28 = T24 + T27; + T2w = T27 - T24; + T2v = FNMS(KP707106781, T2e, T2d); + T2f = FMA(KP707106781, T2e, T2d); + T2i = T2g - T2h; + T2r = T2g + T2h; + } + { + E TX, T1k, T1x, T1A; + T1J = FMA(KP707106781, TW, TL); + TX = FNMS(KP707106781, TW, TL); + { + E T2l, T29, T2n, T2j; + T2l = FNMS(KP923879532, T28, T21); + T29 = FMA(KP923879532, T28, T21); + T2n = FMA(KP923879532, T2i, T2f); + T2j = FNMS(KP923879532, T2i, T2f); + { + E T2o, T2m, T2k, T2a; + T2o = Tz * T2l; + T2m = Tw * T2l; + T2k = T2c * T29; + T2a = T1Y * T29; + Im[WS(rs, 1)] = FMA(Tw, T2n, T2o); + Ip[WS(rs, 1)] = FNMS(Tz, T2n, T2m); + Im[WS(rs, 5)] = FMA(T1Y, T2j, T2k); + Ip[WS(rs, 5)] = FNMS(T2c, T2j, T2a); + T1k = T18 - T1j; + T1O = T1j + T18; + } + } + T1N = FMA(KP707106781, T1w, T1t); + T1x = FNMS(KP707106781, T1w, T1t); + T1A = T1y - T1z; + T1K = T1y + T1z; + { + E T1E, T1l, T1H, T1B; + T1E = FMA(KP923879532, T1k, TX); + T1l = FNMS(KP923879532, T1k, TX); + T1H = FMA(KP923879532, T1A, T1x); + T1B = FNMS(KP923879532, T1A, T1x); + { + E T1I, T1F, T1C, T1m; + T1I = T1G * T1E; + T1F = T1D * T1E; + T1C = T1o * T1l; + T1m = TG * T1l; + Im[WS(rs, 2)] = FMA(T1D, T1H, T1I); + Ip[WS(rs, 2)] = FNMS(T1G, T1H, T1F); + Im[WS(rs, 6)] = FMA(TG, T1B, T1C); + Ip[WS(rs, 6)] = FNMS(T1o, T1B, T1m); + } + } + { + E T2A, T2s, T2D, T2x; + T2A = FMA(KP923879532, T2r, T2q); + T2s = FNMS(KP923879532, T2r, T2q); + T2D = FNMS(KP923879532, T2w, T2v); + T2x = FMA(KP923879532, T2w, T2v); + { + E T2B, T2t, T2E, T2y; + T2B = T2z * T2A; + T2t = T2p * T2s; + T2E = T2z * T2D; + T2y = T2p * T2x; + Ip[WS(rs, 7)] = FNMS(T2C, T2D, T2B); + Ip[WS(rs, 3)] = FNMS(T2u, T2x, T2t); + Im[WS(rs, 7)] = FMA(T2C, T2A, T2E); + Im[WS(rs, 3)] = FMA(T2u, T2s, T2y); + } + } + } + } + } + } + } + { + E T1L, T1R, T1P, T1T; + T1L = FNMS(KP923879532, T1K, T1J); + T1R = FMA(KP923879532, T1K, T1J); + T1P = FNMS(KP923879532, T1O, T1N); + T1T = FMA(KP923879532, T1O, T1N); + { + E T1S, T1M, T1U, T1Q; + T1S = Tv * T1R; + T1M = TB * T1L; + T1U = Tv * T1T; + T1Q = TB * T1P; + Ip[0] = FNMS(Ty, T1T, T1S); + Ip[WS(rs, 4)] = FNMS(TF, T1P, T1M); + Im[0] = FMA(Ty, T1R, T1U); + Im[WS(rs, 4)] = FMA(TF, T1L, T1Q); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 15}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 16, "hc2cb2_16", twinstr, &GENUS, {104, 42, 92, 0} }; + +void X(codelet_hc2cb2_16) (planner *p) { + X(khc2c_register) (p, hc2cb2_16, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 16 -dif -name hc2cb2_16 -include hc2cb.h */ + +/* + * This function contains 196 FP additions, 108 FP multiplications, + * (or, 156 additions, 68 multiplications, 40 fused multiply/add), + * 80 stack variables, 3 constants, and 64 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb2_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) { + E Tv, Ty, T1l, T1n, T1p, T1t, T27, T25, Tz, Tw, TB, T21, T1P, T1H, T1X; + E T17, T1L, T1N, T1v, T1w, T1x, T1B, T2F, T2T, T2b, T2R, T3j, T3x, T35, T3t; + { + E TA, T1J, T15, T1G, Tx, T1K, T16, T1F; + { + E T1m, T1s, T1o, T1r; + Tv = W[0]; + Ty = W[1]; + T1l = W[2]; + T1n = W[3]; + T1m = Tv * T1l; + T1s = Ty * T1l; + T1o = Ty * T1n; + T1r = Tv * T1n; + T1p = T1m + T1o; + T1t = T1r - T1s; + T27 = T1r + T1s; + T25 = T1m - T1o; + Tz = W[5]; + TA = Ty * Tz; + T1J = T1l * Tz; + T15 = Tv * Tz; + T1G = T1n * Tz; + Tw = W[4]; + Tx = Tv * Tw; + T1K = T1n * Tw; + T16 = Ty * Tw; + T1F = T1l * Tw; + } + TB = Tx - TA; + T21 = T1J + T1K; + T1P = T15 - T16; + T1H = T1F + T1G; + T1X = T1F - T1G; + T17 = T15 + T16; + T1L = T1J - T1K; + T1N = Tx + TA; + T1v = W[6]; + T1w = W[7]; + T1x = FMA(Tv, T1v, Ty * T1w); + T1B = FNMS(Ty, T1v, Tv * T1w); + { + E T2D, T2E, T29, T2a; + T2D = T25 * Tz; + T2E = T27 * Tw; + T2F = T2D + T2E; + T2T = T2D - T2E; + T29 = T25 * Tw; + T2a = T27 * Tz; + T2b = T29 - T2a; + T2R = T29 + T2a; + } + { + E T3h, T3i, T33, T34; + T3h = T1p * Tz; + T3i = T1t * Tw; + T3j = T3h + T3i; + T3x = T3h - T3i; + T33 = T1p * Tw; + T34 = T1t * Tz; + T35 = T33 - T34; + T3t = T33 + T34; + } + } + { + E T7, T36, T3k, TC, T1f, T2e, T2I, T1Q, Te, TJ, T1R, T18, T2L, T37, T2l; + E T3l, Tm, T1T, TT, T1h, T2A, T2N, T3b, T3n, Tt, T1U, T12, T1i, T2t, T2O; + E T3e, T3o; + { + E T3, T2c, T1b, T2H, T6, T2G, T1e, T2d; + { + E T1, T2, T19, T1a; + T1 = Rp[0]; + T2 = Rm[WS(rs, 7)]; + T3 = T1 + T2; + T2c = T1 - T2; + T19 = Ip[0]; + T1a = Im[WS(rs, 7)]; + T1b = T19 - T1a; + T2H = T19 + T1a; + } + { + E T4, T5, T1c, T1d; + T4 = Rp[WS(rs, 4)]; + T5 = Rm[WS(rs, 3)]; + T6 = T4 + T5; + T2G = T4 - T5; + T1c = Ip[WS(rs, 4)]; + T1d = Im[WS(rs, 3)]; + T1e = T1c - T1d; + T2d = T1c + T1d; + } + T7 = T3 + T6; + T36 = T2c + T2d; + T3k = T2H - T2G; + TC = T3 - T6; + T1f = T1b - T1e; + T2e = T2c - T2d; + T2I = T2G + T2H; + T1Q = T1b + T1e; + } + { + E Ta, T2f, TI, T2g, Td, T2i, TF, T2j; + { + E T8, T9, TG, TH; + T8 = Rp[WS(rs, 2)]; + T9 = Rm[WS(rs, 5)]; + Ta = T8 + T9; + T2f = T8 - T9; + TG = Ip[WS(rs, 2)]; + TH = Im[WS(rs, 5)]; + TI = TG - TH; + T2g = TG + TH; + } + { + E Tb, Tc, TD, TE; + Tb = Rm[WS(rs, 1)]; + Tc = Rp[WS(rs, 6)]; + Td = Tb + Tc; + T2i = Tb - Tc; + TD = Ip[WS(rs, 6)]; + TE = Im[WS(rs, 1)]; + TF = TD - TE; + T2j = TD + TE; + } + Te = Ta + Td; + TJ = TF - TI; + T1R = TI + TF; + T18 = Ta - Td; + { + E T2J, T2K, T2h, T2k; + T2J = T2f + T2g; + T2K = T2i + T2j; + T2L = KP707106781 * (T2J - T2K); + T37 = KP707106781 * (T2J + T2K); + T2h = T2f - T2g; + T2k = T2i - T2j; + T2l = KP707106781 * (T2h + T2k); + T3l = KP707106781 * (T2h - T2k); + } + } + { + E Ti, T2x, TO, T2v, Tl, T2u, TR, T2y, TL, TS; + { + E Tg, Th, TM, TN; + Tg = Rp[WS(rs, 1)]; + Th = Rm[WS(rs, 6)]; + Ti = Tg + Th; + T2x = Tg - Th; + TM = Ip[WS(rs, 1)]; + TN = Im[WS(rs, 6)]; + TO = TM - TN; + T2v = TM + TN; + } + { + E Tj, Tk, TP, TQ; + Tj = Rp[WS(rs, 5)]; + Tk = Rm[WS(rs, 2)]; + Tl = Tj + Tk; + T2u = Tj - Tk; + TP = Ip[WS(rs, 5)]; + TQ = Im[WS(rs, 2)]; + TR = TP - TQ; + T2y = TP + TQ; + } + Tm = Ti + Tl; + T1T = TO + TR; + TL = Ti - Tl; + TS = TO - TR; + TT = TL - TS; + T1h = TL + TS; + { + E T2w, T2z, T39, T3a; + T2w = T2u + T2v; + T2z = T2x - T2y; + T2A = FMA(KP923879532, T2w, KP382683432 * T2z); + T2N = FNMS(KP382683432, T2w, KP923879532 * T2z); + T39 = T2x + T2y; + T3a = T2v - T2u; + T3b = FNMS(KP923879532, T3a, KP382683432 * T39); + T3n = FMA(KP382683432, T3a, KP923879532 * T39); + } + } + { + E Tp, T2q, TX, T2o, Ts, T2n, T10, T2r, TU, T11; + { + E Tn, To, TV, TW; + Tn = Rm[0]; + To = Rp[WS(rs, 7)]; + Tp = Tn + To; + T2q = Tn - To; + TV = Ip[WS(rs, 7)]; + TW = Im[0]; + TX = TV - TW; + T2o = TV + TW; + } + { + E Tq, Tr, TY, TZ; + Tq = Rp[WS(rs, 3)]; + Tr = Rm[WS(rs, 4)]; + Ts = Tq + Tr; + T2n = Tq - Tr; + TY = Ip[WS(rs, 3)]; + TZ = Im[WS(rs, 4)]; + T10 = TY - TZ; + T2r = TY + TZ; + } + Tt = Tp + Ts; + T1U = TX + T10; + TU = Tp - Ts; + T11 = TX - T10; + T12 = TU + T11; + T1i = T11 - TU; + { + E T2p, T2s, T3c, T3d; + T2p = T2n - T2o; + T2s = T2q - T2r; + T2t = FNMS(KP382683432, T2s, KP923879532 * T2p); + T2O = FMA(KP382683432, T2p, KP923879532 * T2s); + T3c = T2q + T2r; + T3d = T2n + T2o; + T3e = FNMS(KP923879532, T3d, KP382683432 * T3c); + T3o = FMA(KP382683432, T3d, KP923879532 * T3c); + } + } + { + E Tf, Tu, T1O, T1S, T1V, T1W; + Tf = T7 + Te; + Tu = Tm + Tt; + T1O = Tf - Tu; + T1S = T1Q + T1R; + T1V = T1T + T1U; + T1W = T1S - T1V; + Rp[0] = Tf + Tu; + Rm[0] = T1S + T1V; + Rp[WS(rs, 4)] = FNMS(T1P, T1W, T1N * T1O); + Rm[WS(rs, 4)] = FMA(T1P, T1O, T1N * T1W); + } + { + E T3g, T3r, T3q, T3s; + { + E T38, T3f, T3m, T3p; + T38 = T36 - T37; + T3f = T3b + T3e; + T3g = T38 - T3f; + T3r = T38 + T3f; + T3m = T3k + T3l; + T3p = T3n - T3o; + T3q = T3m - T3p; + T3s = T3m + T3p; + } + Ip[WS(rs, 5)] = FNMS(T3j, T3q, T35 * T3g); + Im[WS(rs, 5)] = FMA(T3j, T3g, T35 * T3q); + Ip[WS(rs, 1)] = FNMS(T1n, T3s, T1l * T3r); + Im[WS(rs, 1)] = FMA(T1n, T3r, T1l * T3s); + } + { + E T3w, T3B, T3A, T3C; + { + E T3u, T3v, T3y, T3z; + T3u = T36 + T37; + T3v = T3n + T3o; + T3w = T3u - T3v; + T3B = T3u + T3v; + T3y = T3k - T3l; + T3z = T3b - T3e; + T3A = T3y + T3z; + T3C = T3y - T3z; + } + Ip[WS(rs, 3)] = FNMS(T3x, T3A, T3t * T3w); + Im[WS(rs, 3)] = FMA(T3t, T3A, T3x * T3w); + Ip[WS(rs, 7)] = FNMS(T1w, T3C, T1v * T3B); + Im[WS(rs, 7)] = FMA(T1v, T3C, T1w * T3B); + } + { + E T14, T1q, T1k, T1u; + { + E TK, T13, T1g, T1j; + TK = TC + TJ; + T13 = KP707106781 * (TT + T12); + T14 = TK - T13; + T1q = TK + T13; + T1g = T18 + T1f; + T1j = KP707106781 * (T1h + T1i); + T1k = T1g - T1j; + T1u = T1g + T1j; + } + Rp[WS(rs, 5)] = FNMS(T17, T1k, TB * T14); + Rm[WS(rs, 5)] = FMA(T17, T14, TB * T1k); + Rp[WS(rs, 1)] = FNMS(T1t, T1u, T1p * T1q); + Rm[WS(rs, 1)] = FMA(T1t, T1q, T1p * T1u); + } + { + E T1A, T1I, T1E, T1M; + { + E T1y, T1z, T1C, T1D; + T1y = TC - TJ; + T1z = KP707106781 * (T1i - T1h); + T1A = T1y - T1z; + T1I = T1y + T1z; + T1C = T1f - T18; + T1D = KP707106781 * (TT - T12); + T1E = T1C - T1D; + T1M = T1C + T1D; + } + Rp[WS(rs, 7)] = FNMS(T1B, T1E, T1x * T1A); + Rm[WS(rs, 7)] = FMA(T1x, T1E, T1B * T1A); + Rp[WS(rs, 3)] = FNMS(T1L, T1M, T1H * T1I); + Rm[WS(rs, 3)] = FMA(T1H, T1M, T1L * T1I); + } + { + E T2C, T2S, T2Q, T2U; + { + E T2m, T2B, T2M, T2P; + T2m = T2e - T2l; + T2B = T2t - T2A; + T2C = T2m - T2B; + T2S = T2m + T2B; + T2M = T2I - T2L; + T2P = T2N - T2O; + T2Q = T2M - T2P; + T2U = T2M + T2P; + } + Ip[WS(rs, 6)] = FNMS(T2F, T2Q, T2b * T2C); + Im[WS(rs, 6)] = FMA(T2F, T2C, T2b * T2Q); + Ip[WS(rs, 2)] = FNMS(T2T, T2U, T2R * T2S); + Im[WS(rs, 2)] = FMA(T2T, T2S, T2R * T2U); + } + { + E T2X, T31, T30, T32; + { + E T2V, T2W, T2Y, T2Z; + T2V = T2e + T2l; + T2W = T2N + T2O; + T2X = T2V - T2W; + T31 = T2V + T2W; + T2Y = T2I + T2L; + T2Z = T2A + T2t; + T30 = T2Y - T2Z; + T32 = T2Y + T2Z; + } + Ip[WS(rs, 4)] = FNMS(Tz, T30, Tw * T2X); + Im[WS(rs, 4)] = FMA(Tw, T30, Tz * T2X); + Ip[0] = FNMS(Ty, T32, Tv * T31); + Im[0] = FMA(Tv, T32, Ty * T31); + } + { + E T20, T26, T24, T28; + { + E T1Y, T1Z, T22, T23; + T1Y = T7 - Te; + T1Z = T1U - T1T; + T20 = T1Y - T1Z; + T26 = T1Y + T1Z; + T22 = T1Q - T1R; + T23 = Tm - Tt; + T24 = T22 - T23; + T28 = T23 + T22; + } + Rp[WS(rs, 6)] = FNMS(T21, T24, T1X * T20); + Rm[WS(rs, 6)] = FMA(T1X, T24, T21 * T20); + Rp[WS(rs, 2)] = FNMS(T27, T28, T25 * T26); + Rm[WS(rs, 2)] = FMA(T25, T28, T27 * T26); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 15}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 16, "hc2cb2_16", twinstr, &GENUS, {156, 68, 40, 0} }; + +void X(codelet_hc2cb2_16) (planner *p) { + X(khc2c_register) (p, hc2cb2_16, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb2_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb2_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1087 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:43 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 20 -dif -name hc2cb2_20 -include hc2cb.h */ + +/* + * This function contains 276 FP additions, 198 FP multiplications, + * (or, 136 additions, 58 multiplications, 140 fused multiply/add), + * 160 stack variables, 4 constants, and 80 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb2_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(80, rs)) { + E T1S, T1O, T1s, TI, T24, T1Y, T2g, T2k, TS, TR, T1I, T26, T1o, T20, T1F; + E T25, TT, T1Z; + { + E TD, TH, TE, T1L, T1N, T1X, TG, T1V, T2Y, T2b, T29, T2s, T36, T3e, T31; + E T2o, T3b, T5b, T2c, T2U, T4y, T4u, T2f, T5g, T47, T5p, T4b, T5l; + { + E T1r, TF, T2T, T1M, T1R, T2X, T2r, T4x; + TD = W[0]; + TH = W[3]; + TE = W[2]; + T1L = W[6]; + T1N = W[7]; + T1r = TD * TH; + TF = TD * TE; + T2T = TE * T1L; + T1M = TD * T1L; + T1R = TD * T1N; + T2X = TE * T1N; + T1X = W[5]; + TG = W[1]; + T1V = W[4]; + T2Y = FNMS(TH, T1L, T2X); + T2r = TD * T1X; + { + E T23, T2n, T1W, T2a; + T23 = TE * T1X; + T1S = FNMS(TG, T1L, T1R); + T1O = FMA(TG, T1N, T1M); + T2b = FMA(TG, TE, T1r); + T1s = FNMS(TG, TE, T1r); + T29 = FNMS(TG, TH, TF); + TI = FMA(TG, TH, TF); + T2n = TD * T1V; + T1W = TE * T1V; + T2s = FMA(TG, T1V, T2r); + T36 = FNMS(TG, T1V, T2r); + T3e = FMA(TH, T1V, T23); + T24 = FNMS(TH, T1V, T23); + T2a = T29 * T1V; + T31 = FMA(TG, T1X, T2n); + T2o = FNMS(TG, T1X, T2n); + T3b = FNMS(TH, T1X, T1W); + T1Y = FMA(TH, T1X, T1W); + T5b = FNMS(T2b, T1X, T2a); + T2c = FMA(T2b, T1X, T2a); + T2U = FMA(TH, T1N, T2T); + } + T4x = T29 * T1N; + { + E T4t, T2d, T2j, T2e; + T4t = T29 * T1L; + T2e = T29 * T1X; + T4y = FNMS(T2b, T1L, T4x); + T4u = FMA(T2b, T1N, T4t); + T2f = FNMS(T2b, T1V, T2e); + T5g = FMA(T2b, T1V, T2e); + T2d = T2c * T1L; + T2j = T2c * T1N; + T47 = TI * T1V; + T2g = FMA(T2f, T1N, T2d); + T2k = FNMS(T2f, T1L, T2j); + T5p = TI * T1N; + T4b = TI * T1X; + T5l = TI * T1L; + } + } + { + E T4f, T48, T4c, T4k, T5m, T5q, T3V, T4V, TJ, T7, T3j, T4B, T2H, T1z, T3q; + E T43, T1n, T52, T42, T3x, T53, T2D, T18, T2A, T1H, T4R, T4X, T4W, T4O, T1G; + E T2O, T3I, T2P, T3P, T2K, T2M, T1C, T1E, TC, T2w, T40, T3Y, T4K, T4I, TQ; + { + E T3h, T3, T1w, T3T, T1v, T3U, T6, T1x; + { + E T1t, T1u, T1, T2, T4, T5; + T1 = Rp[0]; + T2 = Rm[WS(rs, 9)]; + T1t = Ip[0]; + T4f = FNMS(T1s, T1X, T47); + T48 = FMA(T1s, T1X, T47); + T4c = FNMS(T1s, T1V, T4b); + T4k = FMA(T1s, T1V, T4b); + T5m = FMA(T1s, T1N, T5l); + T5q = FNMS(T1s, T1L, T5p); + T3h = T1 - T2; + T3 = T1 + T2; + T1u = Im[WS(rs, 9)]; + T4 = Rp[WS(rs, 5)]; + T5 = Rm[WS(rs, 4)]; + T1w = Ip[WS(rs, 5)]; + T3T = T1t + T1u; + T1v = T1t - T1u; + T3U = T4 - T5; + T6 = T4 + T5; + T1x = Im[WS(rs, 4)]; + } + { + E T3L, T4M, TK, Te, T3m, T4C, T2y, T1f, T3H, T4Q, TO, TA, T3w, T4G, T2C; + E T17, T3O, T4N, TL, Tl, T3p, T4D, T2z, T1m, T3r, Tp, TX, T3C, TW, T3D; + E Ts, TY; + { + E T3u, Tw, T14, T3G, T13, T3F, Tz, T15; + { + E T3k, Ta, T1c, T3J, T1b, T3K, Td, T1d; + { + E T19, T1a, Tb, Tc; + { + E T8, T3i, T1y, T9; + T8 = Rp[WS(rs, 4)]; + T3V = T3T - T3U; + T4V = T3U + T3T; + TJ = T3 - T6; + T7 = T3 + T6; + T3i = T1w + T1x; + T1y = T1w - T1x; + T9 = Rm[WS(rs, 5)]; + T19 = Ip[WS(rs, 4)]; + T3j = T3h + T3i; + T4B = T3h - T3i; + T2H = T1v + T1y; + T1z = T1v - T1y; + T3k = T8 - T9; + Ta = T8 + T9; + T1a = Im[WS(rs, 5)]; + } + Tb = Rp[WS(rs, 9)]; + Tc = Rm[0]; + T1c = Ip[WS(rs, 9)]; + T3J = T19 + T1a; + T1b = T19 - T1a; + T3K = Tb - Tc; + Td = Tb + Tc; + T1d = Im[0]; + } + { + E T11, T12, Tx, Ty; + { + E Tu, T3l, T1e, Tv; + Tu = Rm[WS(rs, 7)]; + T3L = T3J - T3K; + T4M = T3K + T3J; + TK = Ta - Td; + Te = Ta + Td; + T3l = T1c + T1d; + T1e = T1c - T1d; + Tv = Rp[WS(rs, 2)]; + T11 = Ip[WS(rs, 2)]; + T3m = T3k + T3l; + T4C = T3k - T3l; + T2y = T1b + T1e; + T1f = T1b - T1e; + T3u = Tu - Tv; + Tw = Tu + Tv; + T12 = Im[WS(rs, 7)]; + } + Tx = Rm[WS(rs, 2)]; + Ty = Rp[WS(rs, 7)]; + T14 = Ip[WS(rs, 7)]; + T3G = T11 + T12; + T13 = T11 - T12; + T3F = Tx - Ty; + Tz = Tx + Ty; + T15 = Im[WS(rs, 2)]; + } + } + { + E T3n, Th, T1j, T3N, T1i, T3M, Tk, T1k; + { + E T1g, T1h, Ti, Tj; + { + E Tf, T3v, T16, Tg; + Tf = Rm[WS(rs, 3)]; + T3H = T3F + T3G; + T4Q = T3F - T3G; + TO = Tw - Tz; + TA = Tw + Tz; + T3v = T14 + T15; + T16 = T14 - T15; + Tg = Rp[WS(rs, 6)]; + T1g = Ip[WS(rs, 6)]; + T3w = T3u - T3v; + T4G = T3u + T3v; + T2C = T13 + T16; + T17 = T13 - T16; + T3n = Tf - Tg; + Th = Tf + Tg; + T1h = Im[WS(rs, 3)]; + } + Ti = Rp[WS(rs, 1)]; + Tj = Rm[WS(rs, 8)]; + T1j = Ip[WS(rs, 1)]; + T3N = T1g + T1h; + T1i = T1g - T1h; + T3M = Ti - Tj; + Tk = Ti + Tj; + T1k = Im[WS(rs, 8)]; + } + { + E TU, TV, Tq, Tr; + { + E Tn, T3o, T1l, To; + Tn = Rp[WS(rs, 8)]; + T3O = T3M + T3N; + T4N = T3M - T3N; + TL = Th - Tk; + Tl = Th + Tk; + T3o = T1j + T1k; + T1l = T1j - T1k; + To = Rm[WS(rs, 1)]; + TU = Ip[WS(rs, 8)]; + T3p = T3n + T3o; + T4D = T3n - T3o; + T2z = T1i + T1l; + T1m = T1i - T1l; + T3r = Tn - To; + Tp = Tn + To; + TV = Im[WS(rs, 1)]; + } + Tq = Rm[WS(rs, 6)]; + Tr = Rp[WS(rs, 3)]; + TX = Ip[WS(rs, 3)]; + T3C = TU + TV; + TW = TU - TV; + T3D = Tq - Tr; + Ts = Tq + Tr; + TY = Im[WS(rs, 6)]; + } + } + } + { + E T3E, Tt, T1A, T4E, T4H, T2J, T1B, T2I, TM, TP; + { + E T4P, TN, T3s, TZ; + T3q = T3m + T3p; + T43 = T3m - T3p; + T3E = T3C - T3D; + T4P = T3D + T3C; + TN = Tp - Ts; + Tt = Tp + Ts; + T3s = TX + TY; + TZ = TX - TY; + T1n = T1f - T1m; + T1A = T1f + T1m; + T4E = T4C + T4D; + T52 = T4C - T4D; + { + E T3t, T4F, T2B, T10; + T3t = T3r - T3s; + T4F = T3r + T3s; + T2B = TW + TZ; + T10 = TW - TZ; + T42 = T3t - T3w; + T3x = T3t + T3w; + T4H = T4F + T4G; + T53 = T4F - T4G; + T2D = T2B - T2C; + T2J = T2B + T2C; + T1B = T10 + T17; + T18 = T10 - T17; + T2A = T2y - T2z; + T2I = T2y + T2z; + TM = TK + TL; + T1H = TK - TL; + } + T4R = T4P - T4Q; + T4X = T4P + T4Q; + T4W = T4M + T4N; + T4O = T4M - T4N; + T1G = TN - TO; + TP = TN + TO; + } + { + E Tm, T3X, TB, T3W; + Tm = Te + Tl; + T2O = Te - Tl; + T3I = T3E + T3H; + T3X = T3E - T3H; + TB = Tt + TA; + T2P = Tt - TA; + T3P = T3L + T3O; + T3W = T3L - T3O; + T2K = T2I + T2J; + T2M = T2I - T2J; + T1C = T1A + T1B; + T1E = T1A - T1B; + TC = Tm + TB; + T2w = Tm - TB; + T40 = T3W - T3X; + T3Y = T3W + T3X; + T4K = T4E - T4H; + T4I = T4E + T4H; + TS = TM - TP; + TQ = TM + TP; + } + } + } + } + { + E T3A, T3y, T50, T1D, T2t, T2p, T4J, T5t, T5v, T4Z, T4Y; + Rp[0] = T7 + TC; + T3A = T3q - T3x; + T3y = T3q + T3x; + T50 = T4W - T4X; + T4Y = T4W + T4X; + Rm[0] = T2H + T2K; + T1D = FNMS(KP250000000, T1C, T1z); + T2t = T1z + T1C; + T2p = TJ + TQ; + TR = FNMS(KP250000000, TQ, TJ); + T4J = FNMS(KP250000000, T4I, T4B); + T5t = T4B + T4I; + T5v = T4V + T4Y; + T4Z = FNMS(KP250000000, T4Y, T4V); + { + E T4m, T44, T4i, T4p, T49, T3R, T4j, T4a, T3S, T4l, T41, T4q; + { + E T3z, T4v, T4w, T3Z, T4z; + T3z = FNMS(KP250000000, T3y, T3j); + T4v = T3j + T3y; + { + E T2u, T2q, T5u, T5w; + T2u = T2s * T2p; + T2q = T2o * T2p; + T5u = T2c * T5t; + T5w = T2c * T5v; + Rm[WS(rs, 5)] = FMA(T2o, T2t, T2u); + Rp[WS(rs, 5)] = FNMS(T2s, T2t, T2q); + Ip[WS(rs, 2)] = FNMS(T2f, T5v, T5u); + Im[WS(rs, 2)] = FMA(T2f, T5t, T5w); + T4w = T4u * T4v; + } + T3Z = FNMS(KP250000000, T3Y, T3V); + T4z = T3V + T3Y; + { + E T3Q, T4h, T4A, T4g, T3B; + T3Q = FNMS(KP618033988, T3P, T3I); + T4h = FMA(KP618033988, T3I, T3P); + Ip[WS(rs, 7)] = FNMS(T4y, T4z, T4w); + T4A = T4u * T4z; + T4m = FMA(KP618033988, T42, T43); + T44 = FNMS(KP618033988, T43, T42); + T4g = FMA(KP559016994, T3A, T3z); + T3B = FNMS(KP559016994, T3A, T3z); + Im[WS(rs, 7)] = FMA(T4y, T4v, T4A); + T4i = FNMS(KP951056516, T4h, T4g); + T4p = FMA(KP951056516, T4h, T4g); + T49 = FMA(KP951056516, T3Q, T3B); + T3R = FNMS(KP951056516, T3Q, T3B); + } + T4j = T4f * T4i; + T4a = T48 * T49; + T3S = TE * T3R; + T4l = FMA(KP559016994, T40, T3Z); + T41 = FNMS(KP559016994, T40, T3Z); + T4q = T1L * T4p; + } + { + E T5d, T4S, T54, T5i, T4L, T5c; + T5d = FNMS(KP618033988, T4O, T4R); + T4S = FMA(KP618033988, T4R, T4O); + { + E T4n, T4r, T4d, T45; + T4n = FMA(KP951056516, T4m, T4l); + T4r = FNMS(KP951056516, T4m, T4l); + T4d = FNMS(KP951056516, T44, T41); + T45 = FMA(KP951056516, T44, T41); + { + E T4o, T4s, T4e, T46; + T4o = T4f * T4n; + Ip[WS(rs, 5)] = FNMS(T4k, T4n, T4j); + T4s = T1L * T4r; + Ip[WS(rs, 9)] = FNMS(T1N, T4r, T4q); + T4e = T48 * T4d; + Ip[WS(rs, 3)] = FNMS(T4c, T4d, T4a); + T46 = TE * T45; + Ip[WS(rs, 1)] = FNMS(TH, T45, T3S); + Im[WS(rs, 5)] = FMA(T4k, T4i, T4o); + Im[WS(rs, 9)] = FMA(T1N, T4p, T4s); + Im[WS(rs, 3)] = FMA(T4c, T49, T4e); + Im[WS(rs, 1)] = FMA(TH, T3R, T46); + } + } + T54 = FMA(KP618033988, T53, T52); + T5i = FNMS(KP618033988, T52, T53); + T4L = FMA(KP559016994, T4K, T4J); + T5c = FNMS(KP559016994, T4K, T4J); + { + E T38, T2Q, T33, T2E, T2v, T37, T2N, T5h, T51, T2L, T2x, T32; + T38 = FNMS(KP618033988, T2O, T2P); + T2Q = FMA(KP618033988, T2P, T2O); + T5h = FNMS(KP559016994, T50, T4Z); + T51 = FMA(KP559016994, T50, T4Z); + { + E T5e, T5n, T57, T4T; + T5e = FNMS(KP951056516, T5d, T5c); + T5n = FMA(KP951056516, T5d, T5c); + T57 = FMA(KP951056516, T4S, T4L); + T4T = FNMS(KP951056516, T4S, T4L); + { + E T5j, T5r, T59, T55; + T5j = FMA(KP951056516, T5i, T5h); + T5r = FNMS(KP951056516, T5i, T5h); + T59 = FNMS(KP951056516, T54, T51); + T55 = FMA(KP951056516, T54, T51); + { + E T5f, T5o, T58, T4U; + T5f = T5b * T5e; + T5o = T5m * T5n; + T58 = T1V * T57; + T4U = TD * T4T; + { + E T5k, T5s, T5a, T56; + T5k = T5b * T5j; + T5s = T5m * T5r; + T5a = T1V * T59; + T56 = TD * T55; + Ip[WS(rs, 6)] = FNMS(T5g, T5j, T5f); + Ip[WS(rs, 8)] = FNMS(T5q, T5r, T5o); + Ip[WS(rs, 4)] = FNMS(T1X, T59, T58); + Ip[0] = FNMS(TG, T55, T4U); + Im[WS(rs, 6)] = FMA(T5g, T5e, T5k); + Im[WS(rs, 8)] = FMA(T5q, T5n, T5s); + Im[WS(rs, 4)] = FMA(T1X, T57, T5a); + Im[0] = FMA(TG, T4T, T56); + } + } + } + } + T2L = FNMS(KP250000000, T2K, T2H); + T33 = FNMS(KP618033988, T2A, T2D); + T2E = FMA(KP618033988, T2D, T2A); + T2v = FNMS(KP250000000, TC, T7); + T37 = FNMS(KP559016994, T2M, T2L); + T2N = FMA(KP559016994, T2M, T2L); + T1I = FNMS(KP618033988, T1H, T1G); + T26 = FMA(KP618033988, T1G, T1H); + T2x = FMA(KP559016994, T2w, T2v); + T32 = FNMS(KP559016994, T2w, T2v); + { + E T3f, T39, T2R, T2Z; + T3f = FNMS(KP951056516, T38, T37); + T39 = FMA(KP951056516, T38, T37); + T2R = FNMS(KP951056516, T2Q, T2N); + T2Z = FMA(KP951056516, T2Q, T2N); + { + E T3c, T34, T2F, T2V; + T3c = FMA(KP951056516, T33, T32); + T34 = FNMS(KP951056516, T33, T32); + T2F = FMA(KP951056516, T2E, T2x); + T2V = FNMS(KP951056516, T2E, T2x); + { + E T3a, T35, T3g, T3d; + T3a = T36 * T34; + T35 = T31 * T34; + T3g = T3e * T3c; + T3d = T3b * T3c; + { + E T30, T2W, T2S, T2G; + T30 = T2Y * T2V; + T2W = T2U * T2V; + T2S = T2b * T2F; + T2G = T29 * T2F; + Rm[WS(rs, 4)] = FMA(T31, T39, T3a); + Rp[WS(rs, 4)] = FNMS(T36, T39, T35); + Rm[WS(rs, 6)] = FMA(T3b, T3f, T3g); + Rp[WS(rs, 6)] = FNMS(T3e, T3f, T3d); + Rm[WS(rs, 8)] = FMA(T2U, T2Z, T30); + Rp[WS(rs, 8)] = FNMS(T2Y, T2Z, T2W); + Rm[WS(rs, 2)] = FMA(T29, T2R, T2S); + Rp[WS(rs, 2)] = FNMS(T2b, T2R, T2G); + } + } + } + } + T1o = FNMS(KP618033988, T1n, T18); + T20 = FMA(KP618033988, T18, T1n); + T1F = FNMS(KP559016994, T1E, T1D); + T25 = FMA(KP559016994, T1E, T1D); + } + } + } + } + } + } + TT = FNMS(KP559016994, TS, TR); + T1Z = FMA(KP559016994, TS, TR); + { + E T2l, T27, T1J, T1T; + T2l = FNMS(KP951056516, T26, T25); + T27 = FMA(KP951056516, T26, T25); + T1J = FNMS(KP951056516, T1I, T1F); + T1T = FMA(KP951056516, T1I, T1F); + { + E T2h, T21, T1p, T1P; + T2h = FMA(KP951056516, T20, T1Z); + T21 = FNMS(KP951056516, T20, T1Z); + T1p = FMA(KP951056516, T1o, TT); + T1P = FNMS(KP951056516, T1o, TT); + { + E T28, T22, T2m, T2i; + T28 = T24 * T21; + T22 = T1Y * T21; + T2m = T2k * T2h; + T2i = T2g * T2h; + { + E T1U, T1Q, T1K, T1q; + T1U = T1S * T1P; + T1Q = T1O * T1P; + T1K = T1s * T1p; + T1q = TI * T1p; + Rm[WS(rs, 3)] = FMA(T1Y, T27, T28); + Rp[WS(rs, 3)] = FNMS(T24, T27, T22); + Rm[WS(rs, 7)] = FMA(T2g, T2l, T2m); + Rp[WS(rs, 7)] = FNMS(T2k, T2l, T2i); + Rm[WS(rs, 9)] = FMA(T1O, T1T, T1U); + Rp[WS(rs, 9)] = FNMS(T1S, T1T, T1Q); + Rm[WS(rs, 1)] = FMA(TI, T1J, T1K); + Rp[WS(rs, 1)] = FNMS(T1s, T1J, T1q); + } + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 19}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 20, "hc2cb2_20", twinstr, &GENUS, {136, 58, 140, 0} }; + +void X(codelet_hc2cb2_20) (planner *p) { + X(khc2c_register) (p, hc2cb2_20, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 20 -dif -name hc2cb2_20 -include hc2cb.h */ + +/* + * This function contains 276 FP additions, 164 FP multiplications, + * (or, 204 additions, 92 multiplications, 72 fused multiply/add), + * 137 stack variables, 4 constants, and 80 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb2_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(80, rs)) { + E TD, TG, TE, TH, TJ, T1t, T27, T25, T1T, T1R, T1V, T2j, T2Z, T21, T2X; + E T2T, T2n, T2P, T3V, T41, T3R, T3X, T29, T2c, T4H, T4L, T1L, T1M, T1N, T2d; + E T4R, T1P, T4P, T49, T2N, T2f, T47, T2L; + { + E T1U, T2l, T1Z, T2i, T1S, T2m, T20, T2h; + { + E TF, T1s, TI, T1r; + TD = W[0]; + TG = W[1]; + TE = W[2]; + TH = W[3]; + TF = TD * TE; + T1s = TG * TE; + TI = TG * TH; + T1r = TD * TH; + TJ = TF + TI; + T1t = T1r - T1s; + T27 = T1r + T1s; + T25 = TF - TI; + T1T = W[5]; + T1U = TH * T1T; + T2l = TD * T1T; + T1Z = TE * T1T; + T2i = TG * T1T; + T1R = W[4]; + T1S = TE * T1R; + T2m = TG * T1R; + T20 = TH * T1R; + T2h = TD * T1R; + } + T1V = T1S + T1U; + T2j = T2h - T2i; + T2Z = T1Z + T20; + T21 = T1Z - T20; + T2X = T1S - T1U; + T2T = T2l - T2m; + T2n = T2l + T2m; + T2P = T2h + T2i; + { + E T3T, T3U, T3P, T3Q; + T3T = TJ * T1T; + T3U = T1t * T1R; + T3V = T3T - T3U; + T41 = T3T + T3U; + T3P = TJ * T1R; + T3Q = T1t * T1T; + T3R = T3P + T3Q; + T3X = T3P - T3Q; + { + E T26, T28, T2a, T2b; + T26 = T25 * T1R; + T28 = T27 * T1T; + T29 = T26 + T28; + T2a = T25 * T1T; + T2b = T27 * T1R; + T2c = T2a - T2b; + T4H = T26 - T28; + T4L = T2a + T2b; + T1L = W[6]; + T1M = W[7]; + T1N = FMA(TD, T1L, TG * T1M); + T2d = FMA(T29, T1L, T2c * T1M); + T4R = FNMS(T1t, T1L, TJ * T1M); + T1P = FNMS(TG, T1L, TD * T1M); + T4P = FMA(TJ, T1L, T1t * T1M); + T49 = FNMS(T27, T1L, T25 * T1M); + T2N = FNMS(TH, T1L, TE * T1M); + T2f = FNMS(T2c, T1L, T29 * T1M); + T47 = FMA(T25, T1L, T27 * T1M); + T2L = FMA(TE, T1L, TH * T1M); + } + } + } + { + E T7, T4i, T4x, TK, T1D, T3i, T3E, T2D, T19, T3L, T3M, T1o, T2x, T4C, T4B; + E T2u, T1v, T4r, T4o, T1u, T2H, T37, T2I, T3e, T3p, T3w, T3x, Tm, TB, TC; + E T4u, T4v, T4y, T2A, T2B, T2E, T1E, T1F, T1G, T4d, T4g, T4j, T3F, T3G, T3H; + E TN, TQ, TR, T48, T4a; + { + E T3, T3g, T1z, T3C, T6, T3D, T1C, T3h; + { + E T1, T2, T1x, T1y; + T1 = Rp[0]; + T2 = Rm[WS(rs, 9)]; + T3 = T1 + T2; + T3g = T1 - T2; + T1x = Ip[0]; + T1y = Im[WS(rs, 9)]; + T1z = T1x - T1y; + T3C = T1x + T1y; + } + { + E T4, T5, T1A, T1B; + T4 = Rp[WS(rs, 5)]; + T5 = Rm[WS(rs, 4)]; + T6 = T4 + T5; + T3D = T4 - T5; + T1A = Ip[WS(rs, 5)]; + T1B = Im[WS(rs, 4)]; + T1C = T1A - T1B; + T3h = T1A + T1B; + } + T7 = T3 + T6; + T4i = T3g - T3h; + T4x = T3D + T3C; + TK = T3 - T6; + T1D = T1z - T1C; + T3i = T3g + T3h; + T3E = T3C - T3D; + T2D = T1z + T1C; + } + { + E Te, T4b, T4m, TL, T11, T33, T3l, T2s, TA, T4f, T4q, TP, T1n, T3d, T3v; + E T2w, Tl, T4c, T4n, TM, T18, T36, T3o, T2t, Tt, T4e, T4p, TO, T1g, T3a; + E T3s, T2v; + { + E Ta, T3j, TX, T31, Td, T32, T10, T3k; + { + E T8, T9, TV, TW; + T8 = Rp[WS(rs, 4)]; + T9 = Rm[WS(rs, 5)]; + Ta = T8 + T9; + T3j = T8 - T9; + TV = Ip[WS(rs, 4)]; + TW = Im[WS(rs, 5)]; + TX = TV - TW; + T31 = TV + TW; + } + { + E Tb, Tc, TY, TZ; + Tb = Rp[WS(rs, 9)]; + Tc = Rm[0]; + Td = Tb + Tc; + T32 = Tb - Tc; + TY = Ip[WS(rs, 9)]; + TZ = Im[0]; + T10 = TY - TZ; + T3k = TY + TZ; + } + Te = Ta + Td; + T4b = T3j - T3k; + T4m = T32 + T31; + TL = Ta - Td; + T11 = TX - T10; + T33 = T31 - T32; + T3l = T3j + T3k; + T2s = TX + T10; + } + { + E Tw, T3t, T1j, T3c, Tz, T3b, T1m, T3u; + { + E Tu, Tv, T1h, T1i; + Tu = Rm[WS(rs, 7)]; + Tv = Rp[WS(rs, 2)]; + Tw = Tu + Tv; + T3t = Tu - Tv; + T1h = Ip[WS(rs, 2)]; + T1i = Im[WS(rs, 7)]; + T1j = T1h - T1i; + T3c = T1h + T1i; + } + { + E Tx, Ty, T1k, T1l; + Tx = Rm[WS(rs, 2)]; + Ty = Rp[WS(rs, 7)]; + Tz = Tx + Ty; + T3b = Tx - Ty; + T1k = Ip[WS(rs, 7)]; + T1l = Im[WS(rs, 2)]; + T1m = T1k - T1l; + T3u = T1k + T1l; + } + TA = Tw + Tz; + T4f = T3t + T3u; + T4q = T3b - T3c; + TP = Tw - Tz; + T1n = T1j - T1m; + T3d = T3b + T3c; + T3v = T3t - T3u; + T2w = T1j + T1m; + } + { + E Th, T3m, T14, T35, Tk, T34, T17, T3n; + { + E Tf, Tg, T12, T13; + Tf = Rm[WS(rs, 3)]; + Tg = Rp[WS(rs, 6)]; + Th = Tf + Tg; + T3m = Tf - Tg; + T12 = Ip[WS(rs, 6)]; + T13 = Im[WS(rs, 3)]; + T14 = T12 - T13; + T35 = T12 + T13; + } + { + E Ti, Tj, T15, T16; + Ti = Rp[WS(rs, 1)]; + Tj = Rm[WS(rs, 8)]; + Tk = Ti + Tj; + T34 = Ti - Tj; + T15 = Ip[WS(rs, 1)]; + T16 = Im[WS(rs, 8)]; + T17 = T15 - T16; + T3n = T15 + T16; + } + Tl = Th + Tk; + T4c = T3m - T3n; + T4n = T34 - T35; + TM = Th - Tk; + T18 = T14 - T17; + T36 = T34 + T35; + T3o = T3m + T3n; + T2t = T14 + T17; + } + { + E Tp, T3q, T1c, T38, Ts, T39, T1f, T3r; + { + E Tn, To, T1a, T1b; + Tn = Rp[WS(rs, 8)]; + To = Rm[WS(rs, 1)]; + Tp = Tn + To; + T3q = Tn - To; + T1a = Ip[WS(rs, 8)]; + T1b = Im[WS(rs, 1)]; + T1c = T1a - T1b; + T38 = T1a + T1b; + } + { + E Tq, Tr, T1d, T1e; + Tq = Rm[WS(rs, 6)]; + Tr = Rp[WS(rs, 3)]; + Ts = Tq + Tr; + T39 = Tq - Tr; + T1d = Ip[WS(rs, 3)]; + T1e = Im[WS(rs, 6)]; + T1f = T1d - T1e; + T3r = T1d + T1e; + } + Tt = Tp + Ts; + T4e = T3q + T3r; + T4p = T39 + T38; + TO = Tp - Ts; + T1g = T1c - T1f; + T3a = T38 - T39; + T3s = T3q - T3r; + T2v = T1c + T1f; + } + T19 = T11 - T18; + T3L = T3l - T3o; + T3M = T3s - T3v; + T1o = T1g - T1n; + T2x = T2v - T2w; + T4C = T4e - T4f; + T4B = T4b - T4c; + T2u = T2s - T2t; + T1v = TO - TP; + T4r = T4p - T4q; + T4o = T4m - T4n; + T1u = TL - TM; + T2H = Te - Tl; + T37 = T33 + T36; + T2I = Tt - TA; + T3e = T3a + T3d; + T3p = T3l + T3o; + T3w = T3s + T3v; + T3x = T3p + T3w; + Tm = Te + Tl; + TB = Tt + TA; + TC = Tm + TB; + T4u = T4m + T4n; + T4v = T4p + T4q; + T4y = T4u + T4v; + T2A = T2s + T2t; + T2B = T2v + T2w; + T2E = T2A + T2B; + T1E = T11 + T18; + T1F = T1g + T1n; + T1G = T1E + T1F; + T4d = T4b + T4c; + T4g = T4e + T4f; + T4j = T4d + T4g; + T3F = T33 - T36; + T3G = T3a - T3d; + T3H = T3F + T3G; + TN = TL + TM; + TQ = TO + TP; + TR = TN + TQ; + } + Rp[0] = T7 + TC; + Rm[0] = T2D + T2E; + { + E T2k, T2o, T4T, T4U; + T2k = TK + TR; + T2o = T1D + T1G; + Rp[WS(rs, 5)] = FNMS(T2n, T2o, T2j * T2k); + Rm[WS(rs, 5)] = FMA(T2n, T2k, T2j * T2o); + T4T = T4i + T4j; + T4U = T4x + T4y; + Ip[WS(rs, 2)] = FNMS(T2c, T4U, T29 * T4T); + Im[WS(rs, 2)] = FMA(T29, T4U, T2c * T4T); + } + T48 = T3i + T3x; + T4a = T3E + T3H; + Ip[WS(rs, 7)] = FNMS(T49, T4a, T47 * T48); + Im[WS(rs, 7)] = FMA(T47, T4a, T49 * T48); + { + E T2y, T2J, T2V, T2R, T2G, T2U, T2r, T2Q; + T2y = FMA(KP951056516, T2u, KP587785252 * T2x); + T2J = FMA(KP951056516, T2H, KP587785252 * T2I); + T2V = FNMS(KP951056516, T2I, KP587785252 * T2H); + T2R = FNMS(KP951056516, T2x, KP587785252 * T2u); + { + E T2C, T2F, T2p, T2q; + T2C = KP559016994 * (T2A - T2B); + T2F = FNMS(KP250000000, T2E, T2D); + T2G = T2C + T2F; + T2U = T2F - T2C; + T2p = KP559016994 * (Tm - TB); + T2q = FNMS(KP250000000, TC, T7); + T2r = T2p + T2q; + T2Q = T2q - T2p; + } + { + E T2z, T2K, T2Y, T30; + T2z = T2r + T2y; + T2K = T2G - T2J; + Rp[WS(rs, 2)] = FNMS(T27, T2K, T25 * T2z); + Rm[WS(rs, 2)] = FMA(T27, T2z, T25 * T2K); + T2Y = T2Q - T2R; + T30 = T2V + T2U; + Rp[WS(rs, 6)] = FNMS(T2Z, T30, T2X * T2Y); + Rm[WS(rs, 6)] = FMA(T2Z, T2Y, T2X * T30); + } + { + E T2M, T2O, T2S, T2W; + T2M = T2r - T2y; + T2O = T2J + T2G; + Rp[WS(rs, 8)] = FNMS(T2N, T2O, T2L * T2M); + Rm[WS(rs, 8)] = FMA(T2N, T2M, T2L * T2O); + T2S = T2Q + T2R; + T2W = T2U - T2V; + Rp[WS(rs, 4)] = FNMS(T2T, T2W, T2P * T2S); + Rm[WS(rs, 4)] = FMA(T2T, T2S, T2P * T2W); + } + } + { + E T4s, T4D, T4N, T4I, T4A, T4M, T4l, T4J; + T4s = FMA(KP951056516, T4o, KP587785252 * T4r); + T4D = FMA(KP951056516, T4B, KP587785252 * T4C); + T4N = FNMS(KP951056516, T4C, KP587785252 * T4B); + T4I = FNMS(KP951056516, T4r, KP587785252 * T4o); + { + E T4w, T4z, T4h, T4k; + T4w = KP559016994 * (T4u - T4v); + T4z = FNMS(KP250000000, T4y, T4x); + T4A = T4w + T4z; + T4M = T4z - T4w; + T4h = KP559016994 * (T4d - T4g); + T4k = FNMS(KP250000000, T4j, T4i); + T4l = T4h + T4k; + T4J = T4k - T4h; + } + { + E T4t, T4E, T4Q, T4S; + T4t = T4l - T4s; + T4E = T4A + T4D; + Ip[0] = FNMS(TG, T4E, TD * T4t); + Im[0] = FMA(TD, T4E, TG * T4t); + T4Q = T4J - T4I; + T4S = T4M + T4N; + Ip[WS(rs, 8)] = FNMS(T4R, T4S, T4P * T4Q); + Im[WS(rs, 8)] = FMA(T4P, T4S, T4R * T4Q); + } + { + E T4F, T4G, T4K, T4O; + T4F = T4s + T4l; + T4G = T4A - T4D; + Ip[WS(rs, 4)] = FNMS(T1T, T4G, T1R * T4F); + Im[WS(rs, 4)] = FMA(T1R, T4G, T1T * T4F); + T4K = T4I + T4J; + T4O = T4M - T4N; + Ip[WS(rs, 6)] = FNMS(T4L, T4O, T4H * T4K); + Im[WS(rs, 6)] = FMA(T4H, T4O, T4L * T4K); + } + } + { + E T1p, T1w, T22, T1X, T1J, T23, TU, T1W; + T1p = FNMS(KP951056516, T1o, KP587785252 * T19); + T1w = FNMS(KP951056516, T1v, KP587785252 * T1u); + T22 = FMA(KP951056516, T1u, KP587785252 * T1v); + T1X = FMA(KP951056516, T19, KP587785252 * T1o); + { + E T1H, T1I, TS, TT; + T1H = FNMS(KP250000000, T1G, T1D); + T1I = KP559016994 * (T1E - T1F); + T1J = T1H - T1I; + T23 = T1I + T1H; + TS = FNMS(KP250000000, TR, TK); + TT = KP559016994 * (TN - TQ); + TU = TS - TT; + T1W = TT + TS; + } + { + E T1q, T1K, T2e, T2g; + T1q = TU - T1p; + T1K = T1w + T1J; + Rp[WS(rs, 1)] = FNMS(T1t, T1K, TJ * T1q); + Rm[WS(rs, 1)] = FMA(T1t, T1q, TJ * T1K); + T2e = T1W + T1X; + T2g = T23 - T22; + Rp[WS(rs, 7)] = FNMS(T2f, T2g, T2d * T2e); + Rm[WS(rs, 7)] = FMA(T2f, T2e, T2d * T2g); + } + { + E T1O, T1Q, T1Y, T24; + T1O = TU + T1p; + T1Q = T1J - T1w; + Rp[WS(rs, 9)] = FNMS(T1P, T1Q, T1N * T1O); + Rm[WS(rs, 9)] = FMA(T1P, T1O, T1N * T1Q); + T1Y = T1W - T1X; + T24 = T22 + T23; + Rp[WS(rs, 3)] = FNMS(T21, T24, T1V * T1Y); + Rm[WS(rs, 3)] = FMA(T21, T1Y, T1V * T24); + } + } + { + E T3f, T3N, T43, T3Z, T3K, T42, T3A, T3Y; + T3f = FNMS(KP951056516, T3e, KP587785252 * T37); + T3N = FNMS(KP951056516, T3M, KP587785252 * T3L); + T43 = FMA(KP951056516, T3L, KP587785252 * T3M); + T3Z = FMA(KP951056516, T37, KP587785252 * T3e); + { + E T3I, T3J, T3y, T3z; + T3I = FNMS(KP250000000, T3H, T3E); + T3J = KP559016994 * (T3F - T3G); + T3K = T3I - T3J; + T42 = T3J + T3I; + T3y = FNMS(KP250000000, T3x, T3i); + T3z = KP559016994 * (T3p - T3w); + T3A = T3y - T3z; + T3Y = T3z + T3y; + } + { + E T3B, T3O, T45, T46; + T3B = T3f + T3A; + T3O = T3K - T3N; + Ip[WS(rs, 1)] = FNMS(TH, T3O, TE * T3B); + Im[WS(rs, 1)] = FMA(TE, T3O, TH * T3B); + T45 = T3Z + T3Y; + T46 = T42 - T43; + Ip[WS(rs, 9)] = FNMS(T1M, T46, T1L * T45); + Im[WS(rs, 9)] = FMA(T1L, T46, T1M * T45); + } + { + E T3S, T3W, T40, T44; + T3S = T3A - T3f; + T3W = T3K + T3N; + Ip[WS(rs, 3)] = FNMS(T3V, T3W, T3R * T3S); + Im[WS(rs, 3)] = FMA(T3R, T3W, T3V * T3S); + T40 = T3Y - T3Z; + T44 = T42 + T43; + Ip[WS(rs, 5)] = FNMS(T41, T44, T3X * T40); + Im[WS(rs, 5)] = FMA(T3X, T44, T41 * T40); + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 19}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 20, "hc2cb2_20", twinstr, &GENUS, {204, 92, 72, 0} }; + +void X(codelet_hc2cb2_20) (planner *p) { + X(khc2c_register) (p, hc2cb2_20, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb2_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb2_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1855 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:42 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 32 -dif -name hc2cb2_32 -include hc2cb.h */ + +/* + * This function contains 488 FP additions, 350 FP multiplications, + * (or, 236 additions, 98 multiplications, 252 fused multiply/add), + * 204 stack variables, 7 constants, and 128 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(128, rs)) { + E T5u, T6b, T6e, T5I, T66, T60, T5U, T5R, T67, T5L, T61, T5x, T5A, T5D, T5O; + E T62, T5V, T5P; + { + E T11, T14, T12, T37, T17, T1b, T39, T15, T7C, T8P, T8S, T7I, T98, T7e, T78; + E T8V, T3d, T3x, T3a, T3v, T9s, T3G, T4p, T5X, T16, T9m, T3y, T4b, T3C, T4g; + E T5Z, T1a, T4r, T3J, T2O, T1c, T4W, T4s, T3Y, T3K, T3l, T3e, T3i, T3q, T8K; + E T8E, T8m, T7S, T5k, T5e; + { + E T13, T3c, T38, T3F, T7B, T9l, T77, T7d, T9r, T7H; + T11 = W[2]; + T14 = W[3]; + T12 = W[4]; + T37 = W[0]; + T17 = W[6]; + T1b = W[7]; + T13 = T11 * T12; + T3c = T37 * T14; + T38 = T37 * T11; + T3F = T37 * T12; + T7B = T11 * T17; + T9l = T12 * T17; + T77 = T37 * T17; + T7d = T37 * T1b; + T9r = T12 * T1b; + T7H = T11 * T1b; + T39 = W[1]; + T15 = W[5]; + { + E T3I, T19, T5d, T3b, T18, T2N; + T7C = FMA(T14, T1b, T7B); + T8P = FNMS(T14, T1b, T7B); + T8S = FMA(T14, T17, T7H); + T7I = FNMS(T14, T17, T7H); + T98 = FNMS(T39, T17, T7d); + T7e = FMA(T39, T17, T7d); + T78 = FNMS(T39, T1b, T77); + T8V = FMA(T39, T1b, T77); + T3d = FMA(T39, T11, T3c); + T3x = FNMS(T39, T11, T3c); + T3a = FNMS(T39, T14, T38); + T3v = FMA(T39, T14, T38); + T9s = FNMS(T15, T17, T9r); + T3G = FNMS(T39, T15, T3F); + T4p = FMA(T39, T15, T3F); + T5X = FNMS(T14, T15, T13); + T16 = FMA(T14, T15, T13); + T3I = T37 * T15; + T19 = T11 * T15; + T5d = T3v * T12; + T3b = T3a * T12; + T9m = FMA(T15, T1b, T9l); + { + E T3w, T3B, T5t, T5H; + T3w = T3v * T17; + T3B = T3v * T1b; + T5t = T3a * T17; + T5H = T3a * T1b; + T3y = FNMS(T3x, T1b, T3w); + T4b = FMA(T3x, T1b, T3w); + T3C = FMA(T3x, T17, T3B); + T4g = FNMS(T3x, T17, T3B); + T5u = FMA(T3d, T1b, T5t); + T6b = FNMS(T3d, T1b, T5t); + T6e = FMA(T3d, T17, T5H); + T5I = FNMS(T3d, T17, T5H); + T18 = T16 * T17; + T2N = T16 * T1b; + T5Z = FMA(T14, T12, T19); + T1a = FNMS(T14, T12, T19); + } + { + E T3H, T3X, T4q, T4V, T5Y, T65; + T4q = T4p * T17; + T4V = T4p * T1b; + T4r = FNMS(T39, T12, T3I); + T3J = FMA(T39, T12, T3I); + T2O = FNMS(T1a, T17, T2N); + T1c = FMA(T1a, T1b, T18); + T3H = T3G * T17; + T4W = FNMS(T4r, T17, T4V); + T4s = FMA(T4r, T1b, T4q); + T3X = T3G * T1b; + T5Y = T5X * T17; + T65 = T5X * T1b; + T3Y = FNMS(T3J, T17, T3X); + T3K = FMA(T3J, T1b, T3H); + { + E T8J, T8D, T3h, T5j, T8l, T7R; + T3h = T3a * T15; + T66 = FNMS(T5Z, T17, T65); + T60 = FMA(T5Z, T1b, T5Y); + T3l = FNMS(T3d, T15, T3b); + T3e = FMA(T3d, T15, T3b); + T3i = FNMS(T3d, T12, T3h); + T3q = FMA(T3d, T12, T3h); + T8J = T3l * T1b; + T8D = T3l * T17; + T5j = T3v * T15; + T8l = T3e * T1b; + T7R = T3e * T17; + T8K = FNMS(T3q, T17, T8J); + T8E = FMA(T3q, T1b, T8D); + T8m = FNMS(T3i, T17, T8l); + T7S = FMA(T3i, T1b, T7R); + T5U = FNMS(T3x, T12, T5j); + T5k = FMA(T3x, T12, T5j); + T5e = FNMS(T3x, T15, T5d); + T5R = FMA(T3x, T15, T5d); + } + } + } + } + { + E T6O, T6i, T7s, T7o, T6j, Tf, T8W, T7V, T99, T8p, T3L, T1t, T3Z, T2X, T5J; + E T4Z, T7t, T6W, T5v, T4v, TZ, T7x, T91, T9d, T28, T3S, T3R, T2h, T5B, T4Q; + E T8v, T8a, T5C, T4N, T6Z, T6J, TK, T7w, T3P, T2z, T9c, T94, T3O, T2I, T5y; + E T4J, T8u, T8h, T5z, T4G, T6Y, T6A, T6P, Tu, T9a, T82, T8X, T8s, T4y, T40; + E T1Q, T3M, T30, T4B, T5w, T52, T7u, T6q; + { + E T6B, T6I, T4M, T4L, T4t, T4u, T6s, T6z; + { + E T1d, T3, T6Q, T2S, T2P, T6, T6R, T1g, Td, T6U, T1i, Ta, T2V, T1r, T6T; + E T1l; + { + E T2Q, T2R, T4, T5, T1, T2, T1e, T1f; + T1 = Rp[0]; + T2 = Rm[WS(rs, 15)]; + { + E T6N, T6h, T7r, T7n; + T6N = T5R * T1b; + T6h = T5R * T17; + T7r = T5e * T1b; + T7n = T5e * T17; + T6O = FNMS(T5U, T17, T6N); + T6i = FMA(T5U, T1b, T6h); + T7s = FNMS(T5k, T17, T7r); + T7o = FMA(T5k, T1b, T7n); + T1d = T1 - T2; + T3 = T1 + T2; + } + T2Q = Ip[0]; + T2R = Im[WS(rs, 15)]; + T4 = Rp[WS(rs, 8)]; + T5 = Rm[WS(rs, 7)]; + T1e = Ip[WS(rs, 8)]; + T6Q = T2Q - T2R; + T2S = T2Q + T2R; + T2P = T4 - T5; + T6 = T4 + T5; + T1f = Im[WS(rs, 7)]; + { + E T1o, T1n, T1p, Tb, Tc; + Tb = Rm[WS(rs, 3)]; + Tc = Rp[WS(rs, 12)]; + T1o = Ip[WS(rs, 12)]; + T6R = T1e - T1f; + T1g = T1e + T1f; + T1n = Tb - Tc; + Td = Tb + Tc; + T1p = Im[WS(rs, 3)]; + { + E T1j, T1k, T8, T9, T1q; + T8 = Rp[WS(rs, 4)]; + T9 = Rm[WS(rs, 11)]; + T1q = T1o + T1p; + T6U = T1o - T1p; + T1j = Ip[WS(rs, 4)]; + T1i = T8 - T9; + Ta = T8 + T9; + T1k = Im[WS(rs, 11)]; + T2V = T1n + T1q; + T1r = T1n - T1q; + T6T = T1j - T1k; + T1l = T1j + T1k; + } + } + } + { + E T2U, T6V, T6S, T1h, T1s, T4Y, T4X, T2T, T2W; + { + E T7T, T8o, T1m, T7U, T7, Te, T8n; + T7T = T3 - T6; + T7 = T3 + T6; + Te = Ta + Td; + T8o = Ta - Td; + T1m = T1i - T1l; + T2U = T1i + T1l; + T6j = T7 - Te; + Tf = T7 + Te; + T7U = T6U - T6T; + T6V = T6T + T6U; + T6S = T6Q + T6R; + T8n = T6Q - T6R; + T4t = T1d + T1g; + T1h = T1d - T1g; + T8W = T7T + T7U; + T7V = T7T - T7U; + T99 = T8o + T8n; + T8p = T8n - T8o; + T1s = T1m + T1r; + T4Y = T1m - T1r; + } + T4X = T2S - T2P; + T2T = T2P + T2S; + T2W = T2U - T2V; + T4u = T2U + T2V; + T3L = FMA(KP707106781, T1s, T1h); + T1t = FNMS(KP707106781, T1s, T1h); + T3Z = FMA(KP707106781, T2W, T2T); + T2X = FNMS(KP707106781, T2W, T2T); + T5J = FNMS(KP707106781, T4Y, T4X); + T4Z = FMA(KP707106781, T4Y, T4X); + T7t = T6S + T6V; + T6W = T6S - T6V; + } + } + { + E T29, T1S, T1V, T87, TR, T2c, T84, T6E, TU, T23, T6F, T22, TX, T24, T2e; + E T21; + { + E TO, TN, TP, TL, TM; + TL = Rm[0]; + TM = Rp[WS(rs, 15)]; + TO = Rp[WS(rs, 7)]; + T5v = FMA(KP707106781, T4u, T4t); + T4v = FNMS(KP707106781, T4u, T4t); + TN = TL + TM; + T29 = TL - TM; + TP = Rm[WS(rs, 8)]; + { + E T6C, T6D, T1X, T20; + { + E T2a, T2b, T1T, T1U, TQ; + T1T = Ip[WS(rs, 15)]; + T1U = Im[0]; + TQ = TO + TP; + T1S = TO - TP; + T2a = Ip[WS(rs, 7)]; + T6C = T1T - T1U; + T1V = T1T + T1U; + T2b = Im[WS(rs, 8)]; + T87 = TN - TQ; + TR = TN + TQ; + T2c = T2a + T2b; + T6D = T2a - T2b; + } + { + E T1Y, T1Z, TS, TT, TV, TW; + TS = Rp[WS(rs, 3)]; + TT = Rm[WS(rs, 12)]; + T84 = T6C - T6D; + T6E = T6C + T6D; + T1Y = Ip[WS(rs, 3)]; + T1X = TS - TT; + TU = TS + TT; + T1Z = Im[WS(rs, 12)]; + TV = Rm[WS(rs, 4)]; + TW = Rp[WS(rs, 11)]; + T23 = Ip[WS(rs, 11)]; + T6F = T1Y - T1Z; + T20 = T1Y + T1Z; + T22 = TV - TW; + TX = TV + TW; + T24 = Im[WS(rs, 4)]; + } + T2e = T1X - T20; + T21 = T1X + T20; + } + } + { + E TY, T85, T25, T6G; + TY = TU + TX; + T85 = TU - TX; + T25 = T23 + T24; + T6G = T23 - T24; + { + E T4O, T1W, T2f, T8Z, T86, T89, T90, T27, T88, T26, T6H, T4P, T2d, T2g; + T4O = T1S + T1V; + T1W = T1S - T1V; + TZ = TR + TY; + T6B = TR - TY; + T88 = T6G - T6F; + T6H = T6F + T6G; + T26 = T22 + T25; + T2f = T22 - T25; + T6I = T6E - T6H; + T7x = T6E + T6H; + T8Z = T85 + T84; + T86 = T84 - T85; + T89 = T87 - T88; + T90 = T87 + T88; + T27 = T21 - T26; + T4M = T21 + T26; + T4L = T29 + T2c; + T2d = T29 - T2c; + T2g = T2e + T2f; + T4P = T2e - T2f; + T91 = FNMS(KP414213562, T90, T8Z); + T9d = FMA(KP414213562, T8Z, T90); + T28 = FNMS(KP707106781, T27, T1W); + T3S = FMA(KP707106781, T27, T1W); + T3R = FMA(KP707106781, T2g, T2d); + T2h = FNMS(KP707106781, T2g, T2d); + T5B = FMA(KP707106781, T4P, T4O); + T4Q = FNMS(KP707106781, T4P, T4O); + T8v = FNMS(KP414213562, T86, T89); + T8a = FMA(KP414213562, T89, T86); + } + } + } + { + E T2A, T2j, TC, T8e, T2m, T2D, T6v, T8b, TF, T6w, T2F, T2s, T2t, TI, T6x; + E T2w, TJ, T8c; + { + E Tw, Tx, Tz, TA, T6t, T6u; + Tw = Rp[WS(rs, 1)]; + T5C = FMA(KP707106781, T4M, T4L); + T4N = FNMS(KP707106781, T4M, T4L); + T6Z = T6I - T6B; + T6J = T6B + T6I; + Tx = Rm[WS(rs, 14)]; + Tz = Rp[WS(rs, 9)]; + TA = Rm[WS(rs, 6)]; + { + E T2k, Ty, TB, T2l, T2B, T2C; + T2k = Ip[WS(rs, 1)]; + T2A = Tw - Tx; + Ty = Tw + Tx; + T2j = Tz - TA; + TB = Tz + TA; + T2l = Im[WS(rs, 14)]; + T2B = Ip[WS(rs, 9)]; + T2C = Im[WS(rs, 6)]; + TC = Ty + TB; + T8e = Ty - TB; + T2m = T2k + T2l; + T6t = T2k - T2l; + T6u = T2B - T2C; + T2D = T2B + T2C; + } + { + E TG, T2o, T2r, TH, T2u, T2v; + { + E TD, TE, T2p, T2q; + TD = Rp[WS(rs, 5)]; + T6v = T6t + T6u; + T8b = T6t - T6u; + TE = Rm[WS(rs, 10)]; + T2p = Ip[WS(rs, 5)]; + T2q = Im[WS(rs, 10)]; + TG = Rm[WS(rs, 2)]; + T2o = TD - TE; + TF = TD + TE; + T6w = T2p - T2q; + T2r = T2p + T2q; + TH = Rp[WS(rs, 13)]; + T2u = Ip[WS(rs, 13)]; + T2v = Im[WS(rs, 2)]; + } + T2F = T2o - T2r; + T2s = T2o + T2r; + T2t = TG - TH; + TI = TG + TH; + T6x = T2u - T2v; + T2w = T2u + T2v; + } + } + TJ = TF + TI; + T8c = TF - TI; + { + E T8f, T6y, T2x, T2G; + T8f = T6x - T6w; + T6y = T6w + T6x; + T2x = T2t + T2w; + T2G = T2t - T2w; + { + E T4H, T2n, T2y, T4F, T8d, T92, T93, T8g; + T6s = TC - TJ; + TK = TC + TJ; + T7w = T6v + T6y; + T6z = T6v - T6y; + T4H = T2m - T2j; + T2n = T2j + T2m; + T2y = T2s - T2x; + T4F = T2s + T2x; + T8d = T8b - T8c; + T92 = T8c + T8b; + T93 = T8e + T8f; + T8g = T8e - T8f; + { + E T4E, T2E, T2H, T4I; + T4E = T2A + T2D; + T2E = T2A - T2D; + T3P = FMA(KP707106781, T2y, T2n); + T2z = FNMS(KP707106781, T2y, T2n); + T9c = FNMS(KP414213562, T92, T93); + T94 = FMA(KP414213562, T93, T92); + T2H = T2F + T2G; + T4I = T2G - T2F; + T3O = FMA(KP707106781, T2H, T2E); + T2I = FNMS(KP707106781, T2H, T2E); + T5y = FMA(KP707106781, T4I, T4H); + T4J = FNMS(KP707106781, T4I, T4H); + T8u = FMA(KP414213562, T8d, T8g); + T8h = FNMS(KP414213562, T8g, T8d); + T5z = FMA(KP707106781, T4F, T4E); + T4G = FNMS(KP707106781, T4F, T4E); + } + } + } + } + { + E T4w, T1J, T7Z, Tm, T6p, T80, T4x, T1O, T1z, Tp, T1A, T6k, T1x, T1u, Ts; + E T1B; + { + E T1K, Ti, T1L, T6n, T1I, T1F, Tl, T1M; + { + E T1G, T1H, Tg, Th, Tj, Tk; + Tg = Rp[WS(rs, 2)]; + Th = Rm[WS(rs, 13)]; + T1G = Ip[WS(rs, 2)]; + T6Y = T6s + T6z; + T6A = T6s - T6z; + T1K = Tg - Th; + Ti = Tg + Th; + T1H = Im[WS(rs, 13)]; + Tj = Rp[WS(rs, 10)]; + Tk = Rm[WS(rs, 5)]; + T1L = Ip[WS(rs, 10)]; + T6n = T1G - T1H; + T1I = T1G + T1H; + T1F = Tj - Tk; + Tl = Tj + Tk; + T1M = Im[WS(rs, 5)]; + } + { + E T1v, T1w, Tq, Tr; + { + E Tn, T1N, T6o, To; + Tn = Rm[WS(rs, 1)]; + T4w = T1I - T1F; + T1J = T1F + T1I; + T7Z = Ti - Tl; + Tm = Ti + Tl; + T1N = T1L + T1M; + T6o = T1L - T1M; + To = Rp[WS(rs, 14)]; + T1v = Ip[WS(rs, 14)]; + T6p = T6n + T6o; + T80 = T6n - T6o; + T4x = T1K + T1N; + T1O = T1K - T1N; + T1z = Tn - To; + Tp = Tn + To; + T1w = Im[WS(rs, 1)]; + } + Tq = Rp[WS(rs, 6)]; + Tr = Rm[WS(rs, 9)]; + T1A = Ip[WS(rs, 6)]; + T6k = T1v - T1w; + T1x = T1v + T1w; + T1u = Tq - Tr; + Ts = Tq + Tr; + T1B = Im[WS(rs, 9)]; + } + } + { + E T4z, T6m, T4A, T2Z, T1E, T1P, T2Y, T50, T51; + { + E T1y, T81, T8q, T1D, T7Y, T8r; + { + E T7X, Tt, T1C, T6l, T7W; + T4z = T1u + T1x; + T1y = T1u - T1x; + T7X = Tp - Ts; + Tt = Tp + Ts; + T1C = T1A + T1B; + T6l = T1A - T1B; + T81 = T7Z + T80; + T8q = T7Z - T80; + T6m = T6k + T6l; + T7W = T6k - T6l; + T4A = T1z + T1C; + T1D = T1z - T1C; + T6P = Tm - Tt; + Tu = Tm + Tt; + T7Y = T7W - T7X; + T8r = T7X + T7W; + } + T2Z = FMA(KP414213562, T1y, T1D); + T1E = FNMS(KP414213562, T1D, T1y); + T9a = T81 + T7Y; + T82 = T7Y - T81; + T8X = T8q + T8r; + T8s = T8q - T8r; + T1P = FMA(KP414213562, T1O, T1J); + T2Y = FNMS(KP414213562, T1J, T1O); + } + T4y = FNMS(KP414213562, T4x, T4w); + T50 = FMA(KP414213562, T4w, T4x); + T40 = T1P + T1E; + T1Q = T1E - T1P; + T3M = T2Y + T2Z; + T30 = T2Y - T2Z; + T51 = FMA(KP414213562, T4z, T4A); + T4B = FNMS(KP414213562, T4A, T4z); + T5w = T50 + T51; + T52 = T50 - T51; + T7u = T6p + T6m; + T6q = T6m - T6p; + } + } + } + { + E T7D, T7K, T7J, T5K, T4C, T7E, T83, T8w, T8t, T8i, T6r, T70, T6X, T6K; + { + E T8Y, T9e, T9b, T95, T8F, T8G, T8L, T8M; + { + E T7v, T7p, T7y, Tv, T10; + T7D = Tf - Tu; + Tv = Tf + Tu; + T10 = TK + TZ; + T7K = TK - TZ; + T7J = T7t - T7u; + T7v = T7t + T7u; + T5K = T4B - T4y; + T4C = T4y + T4B; + T7p = Tv - T10; + T7E = T7x - T7w; + T7y = T7w + T7x; + Rp[0] = Tv + T10; + { + E T9p, T9x, T9z, T9v; + { + E T9n, T7A, T7q, T7z, T9o, T9t, T9u; + T8Y = FNMS(KP707106781, T8X, T8W); + T9n = FMA(KP707106781, T8X, T8W); + T7A = T7s * T7p; + T7q = T7o * T7p; + Rm[0] = T7v + T7y; + T7z = T7v - T7y; + T9o = T9c + T9d; + T9e = T9c - T9d; + T9b = FNMS(KP707106781, T9a, T99); + T9t = FMA(KP707106781, T9a, T99); + T9u = T94 + T91; + T95 = T91 - T94; + Rm[WS(rs, 8)] = FMA(T7o, T7z, T7A); + Rp[WS(rs, 8)] = FNMS(T7s, T7z, T7q); + T9p = FNMS(KP923879532, T9o, T9n); + T9x = FMA(KP923879532, T9o, T9n); + T9z = FMA(KP923879532, T9u, T9t); + T9v = FNMS(KP923879532, T9u, T9t); + } + { + E T9y, T9q, T9w, T9A; + T9y = T3v * T9x; + T9q = T9m * T9p; + T9w = T9m * T9v; + T9A = T3v * T9z; + Rp[WS(rs, 1)] = FNMS(T3x, T9z, T9y); + Rp[WS(rs, 9)] = FNMS(T9s, T9v, T9q); + Rm[WS(rs, 9)] = FMA(T9s, T9p, T9w); + Rm[WS(rs, 1)] = FMA(T3x, T9x, T9A); + } + } + T83 = FMA(KP707106781, T82, T7V); + T8F = FNMS(KP707106781, T82, T7V); + T8G = T8u + T8v; + T8w = T8u - T8v; + T8t = FMA(KP707106781, T8s, T8p); + T8L = FNMS(KP707106781, T8s, T8p); + T8M = T8h + T8a; + T8i = T8a - T8h; + } + { + E T79, T7a, T7f, T7g; + T6r = T6j + T6q; + T79 = T6j - T6q; + { + E T8Q, T8H, T8T, T8N; + T8Q = FMA(KP923879532, T8G, T8F); + T8H = FNMS(KP923879532, T8G, T8F); + T8T = FMA(KP923879532, T8M, T8L); + T8N = FNMS(KP923879532, T8M, T8L); + { + E T8R, T8I, T8U, T8O; + T8R = T8P * T8Q; + T8I = T8E * T8H; + T8U = T8P * T8T; + T8O = T8E * T8N; + Rp[WS(rs, 15)] = FNMS(T8S, T8T, T8R); + Rp[WS(rs, 7)] = FNMS(T8K, T8N, T8I); + Rm[WS(rs, 15)] = FMA(T8S, T8Q, T8U); + Rm[WS(rs, 7)] = FMA(T8K, T8H, T8O); + T7a = T6Z - T6Y; + T70 = T6Y + T6Z; + } + } + T6X = T6P + T6W; + T7f = T6W - T6P; + T7g = T6A - T6J; + T6K = T6A + T6J; + { + E T7j, T7b, T7l, T7h; + T7j = FMA(KP707106781, T7a, T79); + T7b = FNMS(KP707106781, T7a, T79); + T7l = FMA(KP707106781, T7g, T7f); + T7h = FNMS(KP707106781, T7g, T7f); + { + E T7k, T7c, T7m, T7i; + T7k = T5X * T7j; + T7c = T78 * T7b; + T7m = T5X * T7l; + T7i = T78 * T7h; + Rp[WS(rs, 6)] = FNMS(T5Z, T7l, T7k); + Rp[WS(rs, 14)] = FNMS(T7e, T7h, T7c); + Rm[WS(rs, 6)] = FMA(T5Z, T7j, T7m); + Rm[WS(rs, 14)] = FMA(T7e, T7b, T7i); + } + } + { + E T9h, T96, T9j, T9f; + T9h = FMA(KP923879532, T95, T8Y); + T96 = FNMS(KP923879532, T95, T8Y); + T9j = FMA(KP923879532, T9e, T9b); + T9f = FNMS(KP923879532, T9e, T9b); + { + E T9k, T9i, T9g, T97; + T9k = T3J * T9h; + T9i = T3G * T9h; + T9g = T98 * T96; + T97 = T8V * T96; + Rm[WS(rs, 5)] = FMA(T3G, T9j, T9k); + Rp[WS(rs, 5)] = FNMS(T3J, T9j, T9i); + Rm[WS(rs, 13)] = FMA(T8V, T9f, T9g); + Rp[WS(rs, 13)] = FNMS(T98, T9f, T97); + } + } + } + } + { + E T31, T3r, T1R, T3m, T33, T32, T3s, T2K, T8z, T8j; + { + E T73, T6L, T75, T71; + T73 = FMA(KP707106781, T6K, T6r); + T6L = FNMS(KP707106781, T6K, T6r); + T75 = FMA(KP707106781, T70, T6X); + T71 = FNMS(KP707106781, T70, T6X); + { + E T76, T74, T72, T6M; + T76 = T3d * T73; + T74 = T3a * T73; + T72 = T6O * T6L; + T6M = T6i * T6L; + Rm[WS(rs, 2)] = FMA(T3a, T75, T76); + Rp[WS(rs, 2)] = FNMS(T3d, T75, T74); + Rm[WS(rs, 10)] = FMA(T6i, T71, T72); + Rp[WS(rs, 10)] = FNMS(T6O, T71, T6M); + } + } + { + E T7N, T7F, T7P, T7L; + T7N = T7D + T7E; + T7F = T7D - T7E; + T7P = T7K + T7J; + T7L = T7J - T7K; + { + E T7O, T7G, T7Q, T7M; + T7O = T4p * T7N; + T7G = T7C * T7F; + T7Q = T4p * T7P; + T7M = T7C * T7L; + Rp[WS(rs, 4)] = FNMS(T4r, T7P, T7O); + Rp[WS(rs, 12)] = FNMS(T7I, T7L, T7G); + Rm[WS(rs, 4)] = FMA(T4r, T7N, T7Q); + Rm[WS(rs, 12)] = FMA(T7I, T7F, T7M); + } + } + T31 = FMA(KP923879532, T30, T2X); + T3r = FNMS(KP923879532, T30, T2X); + T8z = FMA(KP923879532, T8i, T83); + T8j = FNMS(KP923879532, T8i, T83); + { + E T8B, T8x, T8C, T8A; + T8B = FMA(KP923879532, T8w, T8t); + T8x = FNMS(KP923879532, T8w, T8t); + T8C = T1a * T8z; + T8A = T16 * T8z; + { + E T8y, T8k, T2i, T2J; + T8y = T8m * T8j; + T8k = T7S * T8j; + Rm[WS(rs, 3)] = FMA(T16, T8B, T8C); + Rp[WS(rs, 3)] = FNMS(T1a, T8B, T8A); + Rm[WS(rs, 11)] = FMA(T7S, T8x, T8y); + Rp[WS(rs, 11)] = FNMS(T8m, T8x, T8k); + T1R = FMA(KP923879532, T1Q, T1t); + T3m = FNMS(KP923879532, T1Q, T1t); + T33 = FNMS(KP668178637, T28, T2h); + T2i = FMA(KP668178637, T2h, T28); + T2J = FNMS(KP668178637, T2I, T2z); + T32 = FMA(KP668178637, T2z, T2I); + T3s = T2J + T2i; + T2K = T2i - T2J; + } + } + { + E T5l, T53, T5f, T4D, T4K, T4R, T56, T5g; + T5l = FNMS(KP923879532, T52, T4Z); + T53 = FMA(KP923879532, T52, T4Z); + { + E T3t, T3D, T3f, T2L; + T3t = FNMS(KP831469612, T3s, T3r); + T3D = FMA(KP831469612, T3s, T3r); + T3f = FMA(KP831469612, T2K, T1R); + T2L = FNMS(KP831469612, T2K, T1R); + { + E T3n, T34, T3g, T2M; + T3n = T32 + T33; + T34 = T32 - T33; + T3g = T3e * T3f; + T2M = T1c * T2L; + { + E T3o, T3z, T3j, T35; + T3o = FNMS(KP831469612, T3n, T3m); + T3z = FMA(KP831469612, T3n, T3m); + T3j = FMA(KP831469612, T34, T31); + T35 = FNMS(KP831469612, T34, T31); + { + E T3u, T3p, T3E, T3A; + T3u = T3q * T3o; + T3p = T3l * T3o; + T3E = T3C * T3z; + T3A = T3y * T3z; + { + E T3k, T36, T54, T55; + T3k = T3e * T3j; + Ip[WS(rs, 2)] = FNMS(T3i, T3j, T3g); + T36 = T1c * T35; + Ip[WS(rs, 10)] = FNMS(T2O, T35, T2M); + Im[WS(rs, 6)] = FMA(T3l, T3t, T3u); + Ip[WS(rs, 6)] = FNMS(T3q, T3t, T3p); + Im[WS(rs, 14)] = FMA(T3y, T3D, T3E); + Ip[WS(rs, 14)] = FNMS(T3C, T3D, T3A); + Im[WS(rs, 2)] = FMA(T3i, T3f, T3k); + Im[WS(rs, 10)] = FMA(T2O, T2L, T36); + T5f = FMA(KP923879532, T4C, T4v); + T4D = FNMS(KP923879532, T4C, T4v); + T4K = FNMS(KP668178637, T4J, T4G); + T54 = FMA(KP668178637, T4G, T4J); + T55 = FMA(KP668178637, T4N, T4Q); + T4R = FNMS(KP668178637, T4Q, T4N); + T56 = T54 - T55; + T5g = T54 + T55; + } + } + } + } + } + { + E T4h, T41, T4c, T3N, T3Q, T3T, T44, T4d; + T4h = FNMS(KP923879532, T40, T3Z); + T41 = FMA(KP923879532, T40, T3Z); + { + E T57, T5b, T5h, T5p; + T57 = FNMS(KP831469612, T56, T53); + T5b = FMA(KP831469612, T56, T53); + T5h = FNMS(KP831469612, T5g, T5f); + T5p = FMA(KP831469612, T5g, T5f); + { + E T5m, T4S, T5i, T5q; + T5m = T4K - T4R; + T4S = T4K + T4R; + T5i = T5e * T5h; + T5q = T17 * T5p; + { + E T5n, T5r, T59, T4T; + T5n = FMA(KP831469612, T5m, T5l); + T5r = FNMS(KP831469612, T5m, T5l); + T59 = FMA(KP831469612, T4S, T4D); + T4T = FNMS(KP831469612, T4S, T4D); + { + E T5o, T5s, T5c, T5a; + T5o = T5e * T5n; + Ip[WS(rs, 5)] = FNMS(T5k, T5n, T5i); + T5s = T17 * T5r; + Ip[WS(rs, 13)] = FNMS(T1b, T5r, T5q); + T5c = T14 * T59; + T5a = T11 * T59; + { + E T58, T4U, T42, T43; + T58 = T4W * T4T; + T4U = T4s * T4T; + Im[WS(rs, 5)] = FMA(T5k, T5h, T5o); + Im[WS(rs, 13)] = FMA(T1b, T5p, T5s); + Im[WS(rs, 1)] = FMA(T11, T5b, T5c); + Ip[WS(rs, 1)] = FNMS(T14, T5b, T5a); + Im[WS(rs, 9)] = FMA(T4s, T57, T58); + Ip[WS(rs, 9)] = FNMS(T4W, T57, T4U); + T4c = FNMS(KP923879532, T3M, T3L); + T3N = FMA(KP923879532, T3M, T3L); + T3Q = FNMS(KP198912367, T3P, T3O); + T42 = FMA(KP198912367, T3O, T3P); + T43 = FNMS(KP198912367, T3R, T3S); + T3T = FMA(KP198912367, T3S, T3R); + T44 = T42 + T43; + T4d = T43 - T42; + } + } + } + } + } + T67 = FNMS(KP923879532, T5K, T5J); + T5L = FMA(KP923879532, T5K, T5J); + { + E T45, T49, T4e, T4l; + T45 = FNMS(KP980785280, T44, T41); + T49 = FMA(KP980785280, T44, T41); + T4e = FNMS(KP980785280, T4d, T4c); + T4l = FMA(KP980785280, T4d, T4c); + { + E T4i, T3U, T4f, T4m; + T4i = T3Q - T3T; + T3U = T3Q + T3T; + T4f = T4b * T4e; + T4m = T12 * T4l; + { + E T4j, T4n, T47, T3V; + T4j = FNMS(KP980785280, T4i, T4h); + T4n = FMA(KP980785280, T4i, T4h); + T47 = FMA(KP980785280, T3U, T3N); + T3V = FNMS(KP980785280, T3U, T3N); + { + E T4k, T4o, T4a, T48; + T4k = T4b * T4j; + Ip[WS(rs, 12)] = FNMS(T4g, T4j, T4f); + T4o = T12 * T4n; + Ip[WS(rs, 4)] = FNMS(T15, T4n, T4m); + T4a = T39 * T47; + T48 = T37 * T47; + { + E T46, T3W, T5M, T5N; + T46 = T3Y * T3V; + T3W = T3K * T3V; + Im[WS(rs, 12)] = FMA(T4g, T4e, T4k); + Im[WS(rs, 4)] = FMA(T15, T4l, T4o); + Im[0] = FMA(T37, T49, T4a); + Ip[0] = FNMS(T39, T49, T48); + Im[WS(rs, 8)] = FMA(T3K, T45, T46); + Ip[WS(rs, 8)] = FNMS(T3Y, T45, T3W); + T61 = FMA(KP923879532, T5w, T5v); + T5x = FNMS(KP923879532, T5w, T5v); + T5A = FNMS(KP198912367, T5z, T5y); + T5M = FMA(KP198912367, T5y, T5z); + T5N = FMA(KP198912367, T5B, T5C); + T5D = FNMS(KP198912367, T5C, T5B); + T5O = T5M - T5N; + T62 = T5M + T5N; + } + } + } + } + } + } + } + } + } + } + } + T5V = FMA(KP980785280, T5O, T5L); + T5P = FNMS(KP980785280, T5O, T5L); + { + E T6c, T63, T5E, T68; + T6c = FMA(KP980785280, T62, T61); + T63 = FNMS(KP980785280, T62, T61); + T5E = T5A + T5D; + T68 = T5D - T5A; + { + E T64, T6d, T6f, T69; + T64 = T60 * T63; + T6d = T6b * T6c; + T6f = FNMS(KP980785280, T68, T67); + T69 = FMA(KP980785280, T68, T67); + { + E T5F, T5S, T6a, T6g; + T5F = FMA(KP980785280, T5E, T5x); + T5S = FNMS(KP980785280, T5E, T5x); + T6a = T60 * T69; + Ip[WS(rs, 7)] = FNMS(T66, T69, T64); + T6g = T6b * T6f; + Ip[WS(rs, 15)] = FNMS(T6e, T6f, T6d); + { + E T5W, T5T, T5Q, T5G; + T5W = T5U * T5S; + T5T = T5R * T5S; + T5Q = T5I * T5F; + T5G = T5u * T5F; + Im[WS(rs, 7)] = FMA(T66, T63, T6a); + Im[WS(rs, 15)] = FMA(T6e, T6c, T6g); + Im[WS(rs, 3)] = FMA(T5R, T5V, T5W); + Ip[WS(rs, 3)] = FNMS(T5U, T5V, T5T); + Im[WS(rs, 11)] = FMA(T5u, T5P, T5Q); + Ip[WS(rs, 11)] = FNMS(T5I, T5P, T5G); + } + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 27}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 32, "hc2cb2_32", twinstr, &GENUS, {236, 98, 252, 0} }; + +void X(codelet_hc2cb2_32) (planner *p) { + X(khc2c_register) (p, hc2cb2_32, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 32 -dif -name hc2cb2_32 -include hc2cb.h */ + +/* + * This function contains 488 FP additions, 280 FP multiplications, + * (or, 376 additions, 168 multiplications, 112 fused multiply/add), + * 160 stack variables, 7 constants, and 128 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(128, rs)) { + E T11, T14, T12, T15, T17, T2z, T2B, T1c, T18, T1d, T1g, T1k, T2F, T2L, T3t; + E T4H, T3h, T3V, T3b, T4v, T4T, T4X, T6t, T71, T6z, T75, T81, T8x, T8f, T8z; + E T2R, T2V, T8p, T8t, T4r, T4t, T53, T69, T3n, T3r, T7P, T7T, T4P, T4R, T6F; + E T6R, T1f, T2X, T1j, T2Y, T1l, T31, T2d, T2Z, T49, T4h, T4c, T4i, T4d, T4n; + E T4f, T4j; + { + E T2P, T3q, T2U, T3l, T2Q, T3p, T2T, T3m, T2D, T3g, T2K, T39, T2E, T3f, T2J; + E T3a; + { + E T13, T1b, T16, T1a; + T11 = W[0]; + T14 = W[1]; + T12 = W[2]; + T15 = W[3]; + T13 = T11 * T12; + T1b = T14 * T12; + T16 = T14 * T15; + T1a = T11 * T15; + T17 = T13 + T16; + T2z = T13 - T16; + T2B = T1a + T1b; + T1c = T1a - T1b; + T18 = W[4]; + T2P = T12 * T18; + T3q = T14 * T18; + T2U = T15 * T18; + T3l = T11 * T18; + T1d = W[5]; + T2Q = T15 * T1d; + T3p = T11 * T1d; + T2T = T12 * T1d; + T3m = T14 * T1d; + T1g = W[6]; + T2D = T11 * T1g; + T3g = T15 * T1g; + T2K = T14 * T1g; + T39 = T12 * T1g; + T1k = W[7]; + T2E = T14 * T1k; + T3f = T12 * T1k; + T2J = T11 * T1k; + T3a = T15 * T1k; + } + T2F = T2D - T2E; + T2L = T2J + T2K; + T3t = T39 - T3a; + T4H = T2J - T2K; + T3h = T3f - T3g; + T3V = T3f + T3g; + T3b = T39 + T3a; + T4v = T2D + T2E; + T4T = FMA(T18, T1g, T1d * T1k); + T4X = FNMS(T1d, T1g, T18 * T1k); + { + E T6r, T6s, T6x, T6y; + T6r = T17 * T1g; + T6s = T1c * T1k; + T6t = T6r - T6s; + T71 = T6r + T6s; + T6x = T17 * T1k; + T6y = T1c * T1g; + T6z = T6x + T6y; + T75 = T6x - T6y; + } + { + E T7Z, T80, T8d, T8e; + T7Z = T2z * T1g; + T80 = T2B * T1k; + T81 = T7Z + T80; + T8x = T7Z - T80; + T8d = T2z * T1k; + T8e = T2B * T1g; + T8f = T8d - T8e; + T8z = T8d + T8e; + T2R = T2P - T2Q; + T2V = T2T + T2U; + T8p = FMA(T2R, T1g, T2V * T1k); + T8t = FNMS(T2V, T1g, T2R * T1k); + } + T4r = T2P + T2Q; + T4t = T2T - T2U; + T53 = FMA(T4r, T1g, T4t * T1k); + T69 = FNMS(T4t, T1g, T4r * T1k); + T3n = T3l + T3m; + T3r = T3p - T3q; + T7P = FMA(T3n, T1g, T3r * T1k); + T7T = FNMS(T3r, T1g, T3n * T1k); + T4P = T3l - T3m; + T4R = T3p + T3q; + T6F = FMA(T4P, T1g, T4R * T1k); + T6R = FNMS(T4R, T1g, T4P * T1k); + { + E T19, T1e, T1h, T1i; + T19 = T17 * T18; + T1e = T1c * T1d; + T1f = T19 + T1e; + T2X = T19 - T1e; + T1h = T17 * T1d; + T1i = T1c * T18; + T1j = T1h - T1i; + T2Y = T1h + T1i; + } + T1l = FMA(T1f, T1g, T1j * T1k); + T31 = FNMS(T2Y, T1g, T2X * T1k); + T2d = FNMS(T1j, T1g, T1f * T1k); + T2Z = FMA(T2X, T1g, T2Y * T1k); + { + E T47, T48, T4a, T4b; + T47 = T2z * T18; + T48 = T2B * T1d; + T49 = T47 - T48; + T4h = T47 + T48; + T4a = T2z * T1d; + T4b = T2B * T18; + T4c = T4a + T4b; + T4i = T4a - T4b; + } + T4d = FMA(T49, T1g, T4c * T1k); + T4n = FNMS(T4i, T1g, T4h * T1k); + T4f = FNMS(T4c, T1g, T49 * T1k); + T4j = FMA(T4h, T1g, T4i * T1k); + } + { + E T56, T7b, T7C, T6c, Tf, T1m, T6f, T7c, T3Y, T4I, T2t, T32, T5d, T7D, T3w; + E T4w, Tu, T2e, T7g, T7F, T7j, T7G, T1B, T33, T3z, T40, T5l, T6i, T5s, T6h; + E T3C, T3Z, TK, T1D, T7v, T86, T7y, T85, T1S, T35, T3O, T4C, T5F, T6J, T5M; + E T6K, T3R, T4D, TZ, T1U, T7o, T89, T7r, T88, T29, T36, T3H, T4z, T5Y, T6M; + E T65, T6N, T3K, T4A; + { + E T3, T54, T2h, T6b, T6, T6a, T2k, T55, Ta, T57, T2o, T58, Td, T5a, T2r; + E T5b; + { + E T1, T2, T2f, T2g; + T1 = Rp[0]; + T2 = Rm[WS(rs, 15)]; + T3 = T1 + T2; + T54 = T1 - T2; + T2f = Ip[0]; + T2g = Im[WS(rs, 15)]; + T2h = T2f - T2g; + T6b = T2f + T2g; + } + { + E T4, T5, T2i, T2j; + T4 = Rp[WS(rs, 8)]; + T5 = Rm[WS(rs, 7)]; + T6 = T4 + T5; + T6a = T4 - T5; + T2i = Ip[WS(rs, 8)]; + T2j = Im[WS(rs, 7)]; + T2k = T2i - T2j; + T55 = T2i + T2j; + } + { + E T8, T9, T2m, T2n; + T8 = Rp[WS(rs, 4)]; + T9 = Rm[WS(rs, 11)]; + Ta = T8 + T9; + T57 = T8 - T9; + T2m = Ip[WS(rs, 4)]; + T2n = Im[WS(rs, 11)]; + T2o = T2m - T2n; + T58 = T2m + T2n; + } + { + E Tb, Tc, T2p, T2q; + Tb = Rm[WS(rs, 3)]; + Tc = Rp[WS(rs, 12)]; + Td = Tb + Tc; + T5a = Tb - Tc; + T2p = Ip[WS(rs, 12)]; + T2q = Im[WS(rs, 3)]; + T2r = T2p - T2q; + T5b = T2p + T2q; + } + { + E T7, Te, T2l, T2s; + T56 = T54 - T55; + T7b = T54 + T55; + T7C = T6b - T6a; + T6c = T6a + T6b; + T7 = T3 + T6; + Te = Ta + Td; + Tf = T7 + Te; + T1m = T7 - Te; + { + E T6d, T6e, T3W, T3X; + T6d = T57 + T58; + T6e = T5a + T5b; + T6f = KP707106781 * (T6d - T6e); + T7c = KP707106781 * (T6d + T6e); + T3W = T2h - T2k; + T3X = Ta - Td; + T3Y = T3W - T3X; + T4I = T3X + T3W; + } + T2l = T2h + T2k; + T2s = T2o + T2r; + T2t = T2l - T2s; + T32 = T2l + T2s; + { + E T59, T5c, T3u, T3v; + T59 = T57 - T58; + T5c = T5a - T5b; + T5d = KP707106781 * (T59 + T5c); + T7D = KP707106781 * (T59 - T5c); + T3u = T3 - T6; + T3v = T2r - T2o; + T3w = T3u - T3v; + T4w = T3u + T3v; + } + } + } + { + E Ti, T5p, T1w, T5n, Tl, T5m, T1z, T5q, Tp, T5i, T1p, T5g, Ts, T5f, T1s; + E T5j; + { + E Tg, Th, T1u, T1v; + Tg = Rp[WS(rs, 2)]; + Th = Rm[WS(rs, 13)]; + Ti = Tg + Th; + T5p = Tg - Th; + T1u = Ip[WS(rs, 2)]; + T1v = Im[WS(rs, 13)]; + T1w = T1u - T1v; + T5n = T1u + T1v; + } + { + E Tj, Tk, T1x, T1y; + Tj = Rp[WS(rs, 10)]; + Tk = Rm[WS(rs, 5)]; + Tl = Tj + Tk; + T5m = Tj - Tk; + T1x = Ip[WS(rs, 10)]; + T1y = Im[WS(rs, 5)]; + T1z = T1x - T1y; + T5q = T1x + T1y; + } + { + E Tn, To, T1n, T1o; + Tn = Rm[WS(rs, 1)]; + To = Rp[WS(rs, 14)]; + Tp = Tn + To; + T5i = Tn - To; + T1n = Ip[WS(rs, 14)]; + T1o = Im[WS(rs, 1)]; + T1p = T1n - T1o; + T5g = T1n + T1o; + } + { + E Tq, Tr, T1q, T1r; + Tq = Rp[WS(rs, 6)]; + Tr = Rm[WS(rs, 9)]; + Ts = Tq + Tr; + T5f = Tq - Tr; + T1q = Ip[WS(rs, 6)]; + T1r = Im[WS(rs, 9)]; + T1s = T1q - T1r; + T5j = T1q + T1r; + } + { + E Tm, Tt, T7e, T7f; + Tm = Ti + Tl; + Tt = Tp + Ts; + Tu = Tm + Tt; + T2e = Tm - Tt; + T7e = T5p + T5q; + T7f = T5n - T5m; + T7g = FNMS(KP923879532, T7f, KP382683432 * T7e); + T7F = FMA(KP382683432, T7f, KP923879532 * T7e); + } + { + E T7h, T7i, T1t, T1A; + T7h = T5i + T5j; + T7i = T5f + T5g; + T7j = FNMS(KP923879532, T7i, KP382683432 * T7h); + T7G = FMA(KP382683432, T7i, KP923879532 * T7h); + T1t = T1p + T1s; + T1A = T1w + T1z; + T1B = T1t - T1A; + T33 = T1A + T1t; + } + { + E T3x, T3y, T5h, T5k; + T3x = T1p - T1s; + T3y = Tp - Ts; + T3z = T3x - T3y; + T40 = T3y + T3x; + T5h = T5f - T5g; + T5k = T5i - T5j; + T5l = FNMS(KP382683432, T5k, KP923879532 * T5h); + T6i = FMA(KP382683432, T5h, KP923879532 * T5k); + } + { + E T5o, T5r, T3A, T3B; + T5o = T5m + T5n; + T5r = T5p - T5q; + T5s = FMA(KP923879532, T5o, KP382683432 * T5r); + T6h = FNMS(KP382683432, T5o, KP923879532 * T5r); + T3A = Ti - Tl; + T3B = T1w - T1z; + T3C = T3A + T3B; + T3Z = T3A - T3B; + } + } + { + E Ty, T5v, T1G, T5H, TB, T5G, T1J, T5w, TI, T5K, T1Q, T5D, TF, T5J, T1N; + E T5A; + { + E Tw, Tx, T1H, T1I; + Tw = Rp[WS(rs, 1)]; + Tx = Rm[WS(rs, 14)]; + Ty = Tw + Tx; + T5v = Tw - Tx; + { + E T1E, T1F, Tz, TA; + T1E = Ip[WS(rs, 1)]; + T1F = Im[WS(rs, 14)]; + T1G = T1E - T1F; + T5H = T1E + T1F; + Tz = Rp[WS(rs, 9)]; + TA = Rm[WS(rs, 6)]; + TB = Tz + TA; + T5G = Tz - TA; + } + T1H = Ip[WS(rs, 9)]; + T1I = Im[WS(rs, 6)]; + T1J = T1H - T1I; + T5w = T1H + T1I; + { + E TG, TH, T5B, T1O, T1P, T5C; + TG = Rm[WS(rs, 2)]; + TH = Rp[WS(rs, 13)]; + T5B = TG - TH; + T1O = Ip[WS(rs, 13)]; + T1P = Im[WS(rs, 2)]; + T5C = T1O + T1P; + TI = TG + TH; + T5K = T5B + T5C; + T1Q = T1O - T1P; + T5D = T5B - T5C; + } + { + E TD, TE, T5y, T1L, T1M, T5z; + TD = Rp[WS(rs, 5)]; + TE = Rm[WS(rs, 10)]; + T5y = TD - TE; + T1L = Ip[WS(rs, 5)]; + T1M = Im[WS(rs, 10)]; + T5z = T1L + T1M; + TF = TD + TE; + T5J = T5y + T5z; + T1N = T1L - T1M; + T5A = T5y - T5z; + } + } + { + E TC, TJ, T7t, T7u; + TC = Ty + TB; + TJ = TF + TI; + TK = TC + TJ; + T1D = TC - TJ; + T7t = T5H - T5G; + T7u = KP707106781 * (T5A - T5D); + T7v = T7t + T7u; + T86 = T7t - T7u; + } + { + E T7w, T7x, T1K, T1R; + T7w = T5v + T5w; + T7x = KP707106781 * (T5J + T5K); + T7y = T7w - T7x; + T85 = T7w + T7x; + T1K = T1G + T1J; + T1R = T1N + T1Q; + T1S = T1K - T1R; + T35 = T1K + T1R; + } + { + E T3M, T3N, T5x, T5E; + T3M = T1G - T1J; + T3N = TF - TI; + T3O = T3M - T3N; + T4C = T3N + T3M; + T5x = T5v - T5w; + T5E = KP707106781 * (T5A + T5D); + T5F = T5x - T5E; + T6J = T5x + T5E; + } + { + E T5I, T5L, T3P, T3Q; + T5I = T5G + T5H; + T5L = KP707106781 * (T5J - T5K); + T5M = T5I - T5L; + T6K = T5I + T5L; + T3P = Ty - TB; + T3Q = T1Q - T1N; + T3R = T3P - T3Q; + T4D = T3P + T3Q; + } + } + { + E TN, T5O, T1X, T60, TQ, T5Z, T20, T5P, TX, T63, T27, T5W, TU, T62, T24; + E T5T; + { + E TL, TM, T1Y, T1Z; + TL = Rm[0]; + TM = Rp[WS(rs, 15)]; + TN = TL + TM; + T5O = TL - TM; + { + E T1V, T1W, TO, TP; + T1V = Ip[WS(rs, 15)]; + T1W = Im[0]; + T1X = T1V - T1W; + T60 = T1V + T1W; + TO = Rp[WS(rs, 7)]; + TP = Rm[WS(rs, 8)]; + TQ = TO + TP; + T5Z = TO - TP; + } + T1Y = Ip[WS(rs, 7)]; + T1Z = Im[WS(rs, 8)]; + T20 = T1Y - T1Z; + T5P = T1Y + T1Z; + { + E TV, TW, T5U, T25, T26, T5V; + TV = Rm[WS(rs, 4)]; + TW = Rp[WS(rs, 11)]; + T5U = TV - TW; + T25 = Ip[WS(rs, 11)]; + T26 = Im[WS(rs, 4)]; + T5V = T25 + T26; + TX = TV + TW; + T63 = T5U + T5V; + T27 = T25 - T26; + T5W = T5U - T5V; + } + { + E TS, TT, T5R, T22, T23, T5S; + TS = Rp[WS(rs, 3)]; + TT = Rm[WS(rs, 12)]; + T5R = TS - TT; + T22 = Ip[WS(rs, 3)]; + T23 = Im[WS(rs, 12)]; + T5S = T22 + T23; + TU = TS + TT; + T62 = T5R + T5S; + T24 = T22 - T23; + T5T = T5R - T5S; + } + } + { + E TR, TY, T7m, T7n; + TR = TN + TQ; + TY = TU + TX; + TZ = TR + TY; + T1U = TR - TY; + T7m = KP707106781 * (T5T - T5W); + T7n = T5Z + T60; + T7o = T7m - T7n; + T89 = T7n + T7m; + } + { + E T7p, T7q, T21, T28; + T7p = T5O + T5P; + T7q = KP707106781 * (T62 + T63); + T7r = T7p - T7q; + T88 = T7p + T7q; + T21 = T1X + T20; + T28 = T24 + T27; + T29 = T21 - T28; + T36 = T21 + T28; + } + { + E T3F, T3G, T5Q, T5X; + T3F = T1X - T20; + T3G = TU - TX; + T3H = T3F - T3G; + T4z = T3G + T3F; + T5Q = T5O - T5P; + T5X = KP707106781 * (T5T + T5W); + T5Y = T5Q - T5X; + T6M = T5Q + T5X; + } + { + E T61, T64, T3I, T3J; + T61 = T5Z - T60; + T64 = KP707106781 * (T62 - T63); + T65 = T61 - T64; + T6N = T61 + T64; + T3I = TN - TQ; + T3J = T27 - T24; + T3K = T3I - T3J; + T4A = T3I + T3J; + } + } + { + E Tv, T10, T30, T34, T37, T38; + Tv = Tf + Tu; + T10 = TK + TZ; + T30 = Tv - T10; + T34 = T32 + T33; + T37 = T35 + T36; + T38 = T34 - T37; + Rp[0] = Tv + T10; + Rm[0] = T34 + T37; + Rp[WS(rs, 8)] = FNMS(T31, T38, T2Z * T30); + Rm[WS(rs, 8)] = FMA(T31, T30, T2Z * T38); + } + { + E T3e, T3o, T3k, T3s; + { + E T3c, T3d, T3i, T3j; + T3c = Tf - Tu; + T3d = T36 - T35; + T3e = T3c - T3d; + T3o = T3c + T3d; + T3i = T32 - T33; + T3j = TK - TZ; + T3k = T3i - T3j; + T3s = T3j + T3i; + } + Rp[WS(rs, 12)] = FNMS(T3h, T3k, T3b * T3e); + Rm[WS(rs, 12)] = FMA(T3b, T3k, T3h * T3e); + Rp[WS(rs, 4)] = FNMS(T3r, T3s, T3n * T3o); + Rm[WS(rs, 4)] = FMA(T3n, T3s, T3r * T3o); + } + { + E T1C, T2u, T2M, T2G, T2x, T2H, T2b, T2N; + T1C = T1m + T1B; + T2u = T2e + T2t; + T2M = T2t - T2e; + T2G = T1m - T1B; + { + E T2v, T2w, T1T, T2a; + T2v = T1D + T1S; + T2w = T29 - T1U; + T2x = KP707106781 * (T2v + T2w); + T2H = KP707106781 * (T2w - T2v); + T1T = T1D - T1S; + T2a = T1U + T29; + T2b = KP707106781 * (T1T + T2a); + T2N = KP707106781 * (T1T - T2a); + } + { + E T2c, T2y, T2S, T2W; + T2c = T1C - T2b; + T2y = T2u - T2x; + Rp[WS(rs, 10)] = FNMS(T2d, T2y, T1l * T2c); + Rm[WS(rs, 10)] = FMA(T2d, T2c, T1l * T2y); + T2S = T2G + T2H; + T2W = T2M + T2N; + Rp[WS(rs, 6)] = FNMS(T2V, T2W, T2R * T2S); + Rm[WS(rs, 6)] = FMA(T2R, T2W, T2V * T2S); + } + { + E T2A, T2C, T2I, T2O; + T2A = T1C + T2b; + T2C = T2u + T2x; + Rp[WS(rs, 2)] = FNMS(T2B, T2C, T2z * T2A); + Rm[WS(rs, 2)] = FMA(T2B, T2A, T2z * T2C); + T2I = T2G - T2H; + T2O = T2M - T2N; + Rp[WS(rs, 14)] = FNMS(T2L, T2O, T2F * T2I); + Rm[WS(rs, 14)] = FMA(T2F, T2O, T2L * T2I); + } + } + { + E T4y, T4U, T4K, T4Y, T4F, T4Z, T4N, T4V, T4x, T4J; + T4x = KP707106781 * (T3Z + T40); + T4y = T4w - T4x; + T4U = T4w + T4x; + T4J = KP707106781 * (T3C + T3z); + T4K = T4I - T4J; + T4Y = T4I + T4J; + { + E T4B, T4E, T4L, T4M; + T4B = FNMS(KP382683432, T4A, KP923879532 * T4z); + T4E = FMA(KP923879532, T4C, KP382683432 * T4D); + T4F = T4B - T4E; + T4Z = T4E + T4B; + T4L = FNMS(KP382683432, T4C, KP923879532 * T4D); + T4M = FMA(KP382683432, T4z, KP923879532 * T4A); + T4N = T4L - T4M; + T4V = T4L + T4M; + } + { + E T4G, T4O, T51, T52; + T4G = T4y - T4F; + T4O = T4K - T4N; + Rp[WS(rs, 13)] = FNMS(T4H, T4O, T4v * T4G); + Rm[WS(rs, 13)] = FMA(T4H, T4G, T4v * T4O); + T51 = T4U + T4V; + T52 = T4Y + T4Z; + Rp[WS(rs, 1)] = FNMS(T1c, T52, T17 * T51); + Rm[WS(rs, 1)] = FMA(T17, T52, T1c * T51); + } + { + E T4Q, T4S, T4W, T50; + T4Q = T4y + T4F; + T4S = T4K + T4N; + Rp[WS(rs, 5)] = FNMS(T4R, T4S, T4P * T4Q); + Rm[WS(rs, 5)] = FMA(T4R, T4Q, T4P * T4S); + T4W = T4U - T4V; + T50 = T4Y - T4Z; + Rp[WS(rs, 9)] = FNMS(T4X, T50, T4T * T4W); + Rm[WS(rs, 9)] = FMA(T4T, T50, T4X * T4W); + } + } + { + E T3E, T4k, T42, T4o, T3T, T4p, T45, T4l, T3D, T41; + T3D = KP707106781 * (T3z - T3C); + T3E = T3w - T3D; + T4k = T3w + T3D; + T41 = KP707106781 * (T3Z - T40); + T42 = T3Y - T41; + T4o = T3Y + T41; + { + E T3L, T3S, T43, T44; + T3L = FNMS(KP923879532, T3K, KP382683432 * T3H); + T3S = FMA(KP382683432, T3O, KP923879532 * T3R); + T3T = T3L - T3S; + T4p = T3S + T3L; + T43 = FNMS(KP923879532, T3O, KP382683432 * T3R); + T44 = FMA(KP923879532, T3H, KP382683432 * T3K); + T45 = T43 - T44; + T4l = T43 + T44; + } + { + E T3U, T46, T4s, T4u; + T3U = T3E - T3T; + T46 = T42 - T45; + Rp[WS(rs, 15)] = FNMS(T3V, T46, T3t * T3U); + Rm[WS(rs, 15)] = FMA(T3V, T3U, T3t * T46); + T4s = T4k + T4l; + T4u = T4o + T4p; + Rp[WS(rs, 3)] = FNMS(T4t, T4u, T4r * T4s); + Rm[WS(rs, 3)] = FMA(T4r, T4u, T4t * T4s); + } + { + E T4e, T4g, T4m, T4q; + T4e = T3E + T3T; + T4g = T42 + T45; + Rp[WS(rs, 7)] = FNMS(T4f, T4g, T4d * T4e); + Rm[WS(rs, 7)] = FMA(T4f, T4e, T4d * T4g); + T4m = T4k - T4l; + T4q = T4o - T4p; + Rp[WS(rs, 11)] = FNMS(T4n, T4q, T4j * T4m); + Rm[WS(rs, 11)] = FMA(T4j, T4q, T4n * T4m); + } + } + { + E T6I, T72, T6X, T73, T6P, T77, T6U, T76; + { + E T6G, T6H, T6V, T6W; + T6G = T56 + T5d; + T6H = T6h + T6i; + T6I = T6G + T6H; + T72 = T6G - T6H; + T6V = FMA(KP195090322, T6J, KP980785280 * T6K); + T6W = FNMS(KP195090322, T6M, KP980785280 * T6N); + T6X = T6V + T6W; + T73 = T6W - T6V; + } + { + E T6L, T6O, T6S, T6T; + T6L = FNMS(KP195090322, T6K, KP980785280 * T6J); + T6O = FMA(KP980785280, T6M, KP195090322 * T6N); + T6P = T6L + T6O; + T77 = T6L - T6O; + T6S = T6c + T6f; + T6T = T5s + T5l; + T6U = T6S + T6T; + T76 = T6S - T6T; + } + { + E T6Q, T6Y, T79, T7a; + T6Q = T6I - T6P; + T6Y = T6U - T6X; + Ip[WS(rs, 8)] = FNMS(T6R, T6Y, T6F * T6Q); + Im[WS(rs, 8)] = FMA(T6R, T6Q, T6F * T6Y); + T79 = T72 + T73; + T7a = T76 + T77; + Ip[WS(rs, 4)] = FNMS(T1d, T7a, T18 * T79); + Im[WS(rs, 4)] = FMA(T18, T7a, T1d * T79); + } + { + E T6Z, T70, T74, T78; + T6Z = T6I + T6P; + T70 = T6U + T6X; + Ip[0] = FNMS(T14, T70, T11 * T6Z); + Im[0] = FMA(T14, T6Z, T11 * T70); + T74 = T72 - T73; + T78 = T76 - T77; + Ip[WS(rs, 12)] = FNMS(T75, T78, T71 * T74); + Im[WS(rs, 12)] = FMA(T71, T78, T75 * T74); + } + } + { + E T84, T8q, T8l, T8r, T8b, T8v, T8i, T8u; + { + E T82, T83, T8j, T8k; + T82 = T7b + T7c; + T83 = T7F + T7G; + T84 = T82 - T83; + T8q = T82 + T83; + T8j = FMA(KP195090322, T86, KP980785280 * T85); + T8k = FMA(KP195090322, T89, KP980785280 * T88); + T8l = T8j - T8k; + T8r = T8j + T8k; + } + { + E T87, T8a, T8g, T8h; + T87 = FNMS(KP980785280, T86, KP195090322 * T85); + T8a = FNMS(KP980785280, T89, KP195090322 * T88); + T8b = T87 + T8a; + T8v = T87 - T8a; + T8g = T7C - T7D; + T8h = T7g - T7j; + T8i = T8g + T8h; + T8u = T8g - T8h; + } + { + E T8c, T8m, T8y, T8A; + T8c = T84 - T8b; + T8m = T8i - T8l; + Ip[WS(rs, 11)] = FNMS(T8f, T8m, T81 * T8c); + Im[WS(rs, 11)] = FMA(T8f, T8c, T81 * T8m); + T8y = T8q + T8r; + T8A = T8u - T8v; + Ip[WS(rs, 15)] = FNMS(T8z, T8A, T8x * T8y); + Im[WS(rs, 15)] = FMA(T8x, T8A, T8z * T8y); + } + { + E T8n, T8o, T8s, T8w; + T8n = T84 + T8b; + T8o = T8i + T8l; + Ip[WS(rs, 3)] = FNMS(T1j, T8o, T1f * T8n); + Im[WS(rs, 3)] = FMA(T1j, T8n, T1f * T8o); + T8s = T8q - T8r; + T8w = T8u + T8v; + Ip[WS(rs, 7)] = FNMS(T8t, T8w, T8p * T8s); + Im[WS(rs, 7)] = FMA(T8p, T8w, T8t * T8s); + } + } + { + E T5u, T6u, T6n, T6v, T67, T6B, T6k, T6A; + { + E T5e, T5t, T6l, T6m; + T5e = T56 - T5d; + T5t = T5l - T5s; + T5u = T5e + T5t; + T6u = T5e - T5t; + T6l = FMA(KP831469612, T5F, KP555570233 * T5M); + T6m = FNMS(KP831469612, T5Y, KP555570233 * T65); + T6n = T6l + T6m; + T6v = T6m - T6l; + } + { + E T5N, T66, T6g, T6j; + T5N = FNMS(KP831469612, T5M, KP555570233 * T5F); + T66 = FMA(KP555570233, T5Y, KP831469612 * T65); + T67 = T5N + T66; + T6B = T5N - T66; + T6g = T6c - T6f; + T6j = T6h - T6i; + T6k = T6g + T6j; + T6A = T6g - T6j; + } + { + E T68, T6o, T6D, T6E; + T68 = T5u - T67; + T6o = T6k - T6n; + Ip[WS(rs, 10)] = FNMS(T69, T6o, T53 * T68); + Im[WS(rs, 10)] = FMA(T69, T68, T53 * T6o); + T6D = T6u + T6v; + T6E = T6A + T6B; + Ip[WS(rs, 6)] = FNMS(T4c, T6E, T49 * T6D); + Im[WS(rs, 6)] = FMA(T49, T6E, T4c * T6D); + } + { + E T6p, T6q, T6w, T6C; + T6p = T5u + T67; + T6q = T6k + T6n; + Ip[WS(rs, 2)] = FNMS(T4i, T6q, T4h * T6p); + Im[WS(rs, 2)] = FMA(T4i, T6p, T4h * T6q); + T6w = T6u - T6v; + T6C = T6A - T6B; + Ip[WS(rs, 14)] = FNMS(T6z, T6C, T6t * T6w); + Im[WS(rs, 14)] = FMA(T6t, T6C, T6z * T6w); + } + } + { + E T7l, T7Q, T7L, T7R, T7A, T7V, T7I, T7U; + { + E T7d, T7k, T7J, T7K; + T7d = T7b - T7c; + T7k = T7g + T7j; + T7l = T7d - T7k; + T7Q = T7d + T7k; + T7J = FNMS(KP555570233, T7v, KP831469612 * T7y); + T7K = FMA(KP555570233, T7o, KP831469612 * T7r); + T7L = T7J - T7K; + T7R = T7J + T7K; + } + { + E T7s, T7z, T7E, T7H; + T7s = FNMS(KP555570233, T7r, KP831469612 * T7o); + T7z = FMA(KP831469612, T7v, KP555570233 * T7y); + T7A = T7s - T7z; + T7V = T7z + T7s; + T7E = T7C + T7D; + T7H = T7F - T7G; + T7I = T7E - T7H; + T7U = T7E + T7H; + } + { + E T7B, T7M, T7X, T7Y; + T7B = T7l - T7A; + T7M = T7I - T7L; + Ip[WS(rs, 13)] = FNMS(T1k, T7M, T1g * T7B); + Im[WS(rs, 13)] = FMA(T1k, T7B, T1g * T7M); + T7X = T7Q + T7R; + T7Y = T7U + T7V; + Ip[WS(rs, 1)] = FNMS(T15, T7Y, T12 * T7X); + Im[WS(rs, 1)] = FMA(T12, T7Y, T15 * T7X); + } + { + E T7N, T7O, T7S, T7W; + T7N = T7l + T7A; + T7O = T7I + T7L; + Ip[WS(rs, 5)] = FNMS(T2Y, T7O, T2X * T7N); + Im[WS(rs, 5)] = FMA(T2Y, T7N, T2X * T7O); + T7S = T7Q - T7R; + T7W = T7U - T7V; + Ip[WS(rs, 9)] = FNMS(T7T, T7W, T7P * T7S); + Im[WS(rs, 9)] = FMA(T7P, T7W, T7T * T7S); + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 27}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 32, "hc2cb2_32", twinstr, &GENUS, {376, 168, 112, 0} }; + +void X(codelet_hc2cb2_32) (planner *p) { + X(khc2c_register) (p, hc2cb2_32, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb2_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb2_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:41 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 4 -dif -name hc2cb2_4 -include hc2cb.h */ + +/* + * This function contains 24 FP additions, 16 FP multiplications, + * (or, 16 additions, 8 multiplications, 8 fused multiply/add), + * 30 stack variables, 0 constants, and 16 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb2_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 4, MAKE_VOLATILE_STRIDE(16, rs)) { + E Tg, Tc, Te, To, Tn; + { + E T7, Tb, T8, Ta; + T7 = W[0]; + Tb = W[3]; + T8 = W[2]; + Ta = W[1]; + { + E Tu, Tj, T3, Tm, Tx, Tr, T6, Tt; + { + E T4, Tp, Tq, T5; + { + E T1, T2, Tk, Tl; + { + E Th, Tf, T9, Ti; + Th = Ip[0]; + Tf = T7 * Tb; + T9 = T7 * T8; + Ti = Im[WS(rs, 1)]; + T1 = Rp[0]; + Tg = FNMS(Ta, T8, Tf); + Tc = FMA(Ta, Tb, T9); + Tu = Th + Ti; + Tj = Th - Ti; + T2 = Rm[WS(rs, 1)]; + } + Tk = Ip[WS(rs, 1)]; + Tl = Im[0]; + T4 = Rp[WS(rs, 1)]; + T3 = T1 + T2; + Tp = T1 - T2; + Tm = Tk - Tl; + Tq = Tk + Tl; + T5 = Rm[0]; + } + Tx = Tp + Tq; + Tr = Tp - Tq; + T6 = T4 + T5; + Tt = T4 - T5; + } + { + E Tz, Tv, Td, Ts, Tw, TA, Ty; + Rm[0] = Tj + Tm; + Ts = T7 * Tr; + Tz = Tu - Tt; + Tv = Tt + Tu; + Rp[0] = T3 + T6; + Td = T3 - T6; + Ip[0] = FNMS(Ta, Tv, Ts); + Tw = T7 * Tv; + TA = T8 * Tz; + Ty = T8 * Tx; + Te = Tc * Td; + Im[0] = FMA(Ta, Tr, Tw); + Im[WS(rs, 1)] = FMA(Tb, Tx, TA); + Ip[WS(rs, 1)] = FNMS(Tb, Tz, Ty); + To = Tg * Td; + Tn = Tj - Tm; + } + } + } + Rm[WS(rs, 1)] = FMA(Tc, Tn, To); + Rp[WS(rs, 1)] = FNMS(Tg, Tn, Te); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 4, "hc2cb2_4", twinstr, &GENUS, {16, 8, 8, 0} }; + +void X(codelet_hc2cb2_4) (planner *p) { + X(khc2c_register) (p, hc2cb2_4, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 4 -dif -name hc2cb2_4 -include hc2cb.h */ + +/* + * This function contains 24 FP additions, 16 FP multiplications, + * (or, 16 additions, 8 multiplications, 8 fused multiply/add), + * 21 stack variables, 0 constants, and 16 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb2_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 4, MAKE_VOLATILE_STRIDE(16, rs)) { + E T7, T9, T8, Ta, Tb, Td; + T7 = W[0]; + T9 = W[1]; + T8 = W[2]; + Ta = W[3]; + Tb = FMA(T7, T8, T9 * Ta); + Td = FNMS(T9, T8, T7 * Ta); + { + E T3, Tl, Tg, Tp, T6, To, Tj, Tm, Tc, Tk; + { + E T1, T2, Te, Tf; + T1 = Rp[0]; + T2 = Rm[WS(rs, 1)]; + T3 = T1 + T2; + Tl = T1 - T2; + Te = Ip[0]; + Tf = Im[WS(rs, 1)]; + Tg = Te - Tf; + Tp = Te + Tf; + } + { + E T4, T5, Th, Ti; + T4 = Rp[WS(rs, 1)]; + T5 = Rm[0]; + T6 = T4 + T5; + To = T4 - T5; + Th = Ip[WS(rs, 1)]; + Ti = Im[0]; + Tj = Th - Ti; + Tm = Th + Ti; + } + Rp[0] = T3 + T6; + Rm[0] = Tg + Tj; + Tc = T3 - T6; + Tk = Tg - Tj; + Rp[WS(rs, 1)] = FNMS(Td, Tk, Tb * Tc); + Rm[WS(rs, 1)] = FMA(Td, Tc, Tb * Tk); + { + E Tn, Tq, Tr, Ts; + Tn = Tl - Tm; + Tq = To + Tp; + Ip[0] = FNMS(T9, Tq, T7 * Tn); + Im[0] = FMA(T7, Tq, T9 * Tn); + Tr = Tl + Tm; + Ts = Tp - To; + Ip[WS(rs, 1)] = FNMS(Ta, Ts, T8 * Tr); + Im[WS(rs, 1)] = FMA(T8, Ts, Ta * Tr); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 4, "hc2cb2_4", twinstr, &GENUS, {16, 8, 8, 0} }; + +void X(codelet_hc2cb2_4) (planner *p) { + X(khc2c_register) (p, hc2cb2_4, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb2_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb2_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,384 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:41 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 8 -dif -name hc2cb2_8 -include hc2cb.h */ + +/* + * This function contains 74 FP additions, 50 FP multiplications, + * (or, 44 additions, 20 multiplications, 30 fused multiply/add), + * 64 stack variables, 1 constants, and 32 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb2_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(32, rs)) { + E Tf, Ti, TK, Tq, TH, TT, TX, TW, TY, TU, TI; + { + E Tg, Tl, Tp, Th, T1n, T1t, Tj; + Tf = W[0]; + Tg = W[2]; + Tl = W[4]; + Tp = W[5]; + Ti = W[1]; + Th = Tf * Tg; + T1n = Tf * Tl; + T1t = Tf * Tp; + Tj = W[3]; + { + E T1o, T1u, Tk, T1b, To, T1e, T13, TP, T1p, T7, T1h, T1v, TZ, Tv, T1i; + E TB, TA, TQ, Te, T1w, TE, T1j; + { + E Tr, T3, Ts, T1f, TO, TL, T6, Tt; + { + E TM, TN, T4, T5; + { + E T1, Tn, T2, TJ, Tm; + T1 = Rp[0]; + T1o = FMA(Ti, Tp, T1n); + T1u = FNMS(Ti, Tl, T1t); + Tk = FMA(Ti, Tj, Th); + T1b = FNMS(Ti, Tj, Th); + Tn = Tf * Tj; + T2 = Rm[WS(rs, 3)]; + TM = Ip[0]; + TJ = Tk * Tp; + Tm = Tk * Tl; + To = FNMS(Ti, Tg, Tn); + T1e = FMA(Ti, Tg, Tn); + Tr = T1 - T2; + T3 = T1 + T2; + TK = FNMS(To, Tl, TJ); + Tq = FMA(To, Tp, Tm); + TN = Im[WS(rs, 3)]; + } + T4 = Rp[WS(rs, 2)]; + T5 = Rm[WS(rs, 1)]; + Ts = Ip[WS(rs, 2)]; + T1f = TM - TN; + TO = TM + TN; + TL = T4 - T5; + T6 = T4 + T5; + Tt = Im[WS(rs, 1)]; + } + { + E Tw, Ta, TC, Tz, Td, TD; + { + E Tx, Ty, Tb, Tc; + { + E T8, T1g, Tu, T9; + T8 = Rp[WS(rs, 1)]; + T13 = TO - TL; + TP = TL + TO; + T1p = T3 - T6; + T7 = T3 + T6; + T1g = Ts - Tt; + Tu = Ts + Tt; + T9 = Rm[WS(rs, 2)]; + Tx = Ip[WS(rs, 1)]; + T1h = T1f + T1g; + T1v = T1f - T1g; + TZ = Tr + Tu; + Tv = Tr - Tu; + Tw = T8 - T9; + Ta = T8 + T9; + Ty = Im[WS(rs, 2)]; + } + Tb = Rm[0]; + Tc = Rp[WS(rs, 3)]; + TC = Ip[WS(rs, 3)]; + T1i = Tx - Ty; + Tz = Tx + Ty; + TB = Tb - Tc; + Td = Tb + Tc; + TD = Im[0]; + } + TA = Tw - Tz; + TQ = Tw + Tz; + Te = Ta + Td; + T1w = Ta - Td; + TE = TC + TD; + T1j = TC - TD; + } + } + { + E T1x, T1k, T1r, TG, TS, T19, T15, T17, T11, T16, T12; + { + E T1B, T1z, T10, T1A, T1C; + T1x = T1v - T1w; + T1B = T1w + T1v; + Rp[0] = T7 + Te; + { + E T1q, TR, TF, T14; + T1k = T1i + T1j; + T1q = T1j - T1i; + TR = TB + TE; + TF = TB - TE; + T1r = T1p - T1q; + T1z = T1p + T1q; + Rm[0] = T1h + T1k; + TG = TA + TF; + T14 = TA - TF; + TS = TQ - TR; + T10 = TQ + TR; + T1A = Tk * T1z; + T19 = FNMS(KP707106781, T14, T13); + T15 = FMA(KP707106781, T14, T13); + T1C = Tk * T1B; + } + T17 = FMA(KP707106781, T10, TZ); + T11 = FNMS(KP707106781, T10, TZ); + Rp[WS(rs, 1)] = FNMS(To, T1B, T1A); + T16 = Tg * T15; + Rm[WS(rs, 1)] = FMA(To, T1z, T1C); + } + T12 = Tg * T11; + { + E T1l, T1a, T1c, T18; + Im[WS(rs, 1)] = FMA(Tj, T11, T16); + Ip[WS(rs, 1)] = FNMS(Tj, T15, T12); + T18 = Tl * T17; + T1l = T1h - T1k; + T1a = Tl * T19; + T1c = T7 - Te; + Ip[WS(rs, 3)] = FNMS(Tp, T19, T18); + { + E T1s, T1m, T1d, T1y, TV; + Im[WS(rs, 3)] = FMA(Tp, T17, T1a); + T1m = T1e * T1c; + T1d = T1b * T1c; + T1s = T1o * T1r; + Rm[WS(rs, 2)] = FMA(T1b, T1l, T1m); + Rp[WS(rs, 2)] = FNMS(T1e, T1l, T1d); + Rp[WS(rs, 3)] = FNMS(T1u, T1x, T1s); + T1y = T1o * T1x; + TV = FMA(KP707106781, TG, Tv); + TH = FNMS(KP707106781, TG, Tv); + TT = FNMS(KP707106781, TS, TP); + TX = FMA(KP707106781, TS, TP); + Rm[WS(rs, 3)] = FMA(T1u, T1r, T1y); + TW = Tf * TV; + TY = Ti * TV; + } + } + } + } + } + Ip[0] = FNMS(Ti, TX, TW); + Im[0] = FMA(Tf, TX, TY); + TU = TK * TH; + TI = Tq * TH; + Im[WS(rs, 2)] = FMA(Tq, TT, TU); + Ip[WS(rs, 2)] = FNMS(TK, TT, TI); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 7}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 8, "hc2cb2_8", twinstr, &GENUS, {44, 20, 30, 0} }; + +void X(codelet_hc2cb2_8) (planner *p) { + X(khc2c_register) (p, hc2cb2_8, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 8 -dif -name hc2cb2_8 -include hc2cb.h */ + +/* + * This function contains 74 FP additions, 44 FP multiplications, + * (or, 56 additions, 26 multiplications, 18 fused multiply/add), + * 46 stack variables, 1 constants, and 32 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb2_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(32, rs)) { + E Tf, Ti, Tg, Tj, Tl, Tp, TP, TR, TF, TG, TH, T15, TL, TT; + { + E Th, To, Tk, Tn; + Tf = W[0]; + Ti = W[1]; + Tg = W[2]; + Tj = W[3]; + Th = Tf * Tg; + To = Ti * Tg; + Tk = Ti * Tj; + Tn = Tf * Tj; + Tl = Th - Tk; + Tp = Tn + To; + TP = Th + Tk; + TR = Tn - To; + TF = W[4]; + TG = W[5]; + TH = FMA(Tf, TF, Ti * TG); + T15 = FNMS(TR, TF, TP * TG); + TL = FNMS(Ti, TF, Tf * TG); + TT = FMA(TP, TF, TR * TG); + } + { + E T7, T1f, T1i, Tw, TI, TW, T18, TM, Te, T19, T1a, TD, TJ, TZ, T12; + E TN, Tm, TE; + { + E T3, TU, Ts, T17, T6, T16, Tv, TV; + { + E T1, T2, Tq, Tr; + T1 = Rp[0]; + T2 = Rm[WS(rs, 3)]; + T3 = T1 + T2; + TU = T1 - T2; + Tq = Ip[0]; + Tr = Im[WS(rs, 3)]; + Ts = Tq - Tr; + T17 = Tq + Tr; + } + { + E T4, T5, Tt, Tu; + T4 = Rp[WS(rs, 2)]; + T5 = Rm[WS(rs, 1)]; + T6 = T4 + T5; + T16 = T4 - T5; + Tt = Ip[WS(rs, 2)]; + Tu = Im[WS(rs, 1)]; + Tv = Tt - Tu; + TV = Tt + Tu; + } + T7 = T3 + T6; + T1f = TU + TV; + T1i = T17 - T16; + Tw = Ts + Tv; + TI = T3 - T6; + TW = TU - TV; + T18 = T16 + T17; + TM = Ts - Tv; + } + { + E Ta, TX, Tz, TY, Td, T10, TC, T11; + { + E T8, T9, Tx, Ty; + T8 = Rp[WS(rs, 1)]; + T9 = Rm[WS(rs, 2)]; + Ta = T8 + T9; + TX = T8 - T9; + Tx = Ip[WS(rs, 1)]; + Ty = Im[WS(rs, 2)]; + Tz = Tx - Ty; + TY = Tx + Ty; + } + { + E Tb, Tc, TA, TB; + Tb = Rm[0]; + Tc = Rp[WS(rs, 3)]; + Td = Tb + Tc; + T10 = Tb - Tc; + TA = Ip[WS(rs, 3)]; + TB = Im[0]; + TC = TA - TB; + T11 = TA + TB; + } + Te = Ta + Td; + T19 = TX + TY; + T1a = T10 + T11; + TD = Tz + TC; + TJ = TC - Tz; + TZ = TX - TY; + T12 = T10 - T11; + TN = Ta - Td; + } + Rp[0] = T7 + Te; + Rm[0] = Tw + TD; + Tm = T7 - Te; + TE = Tw - TD; + Rp[WS(rs, 2)] = FNMS(Tp, TE, Tl * Tm); + Rm[WS(rs, 2)] = FMA(Tp, Tm, Tl * TE); + { + E TQ, TS, TK, TO; + TQ = TI + TJ; + TS = TN + TM; + Rp[WS(rs, 1)] = FNMS(TR, TS, TP * TQ); + Rm[WS(rs, 1)] = FMA(TP, TS, TR * TQ); + TK = TI - TJ; + TO = TM - TN; + Rp[WS(rs, 3)] = FNMS(TL, TO, TH * TK); + Rm[WS(rs, 3)] = FMA(TH, TO, TL * TK); + } + { + E T1h, T1l, T1k, T1m, T1g, T1j; + T1g = KP707106781 * (T19 + T1a); + T1h = T1f - T1g; + T1l = T1f + T1g; + T1j = KP707106781 * (TZ - T12); + T1k = T1i + T1j; + T1m = T1i - T1j; + Ip[WS(rs, 1)] = FNMS(Tj, T1k, Tg * T1h); + Im[WS(rs, 1)] = FMA(Tg, T1k, Tj * T1h); + Ip[WS(rs, 3)] = FNMS(TG, T1m, TF * T1l); + Im[WS(rs, 3)] = FMA(TF, T1m, TG * T1l); + } + { + E T14, T1d, T1c, T1e, T13, T1b; + T13 = KP707106781 * (TZ + T12); + T14 = TW - T13; + T1d = TW + T13; + T1b = KP707106781 * (T19 - T1a); + T1c = T18 - T1b; + T1e = T18 + T1b; + Ip[WS(rs, 2)] = FNMS(T15, T1c, TT * T14); + Im[WS(rs, 2)] = FMA(T15, T14, TT * T1c); + Ip[0] = FNMS(Ti, T1e, Tf * T1d); + Im[0] = FMA(Ti, T1d, Tf * T1e); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 7}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 8, "hc2cb2_8", twinstr, &GENUS, {56, 26, 18, 0} }; + +void X(codelet_hc2cb2_8) (planner *p) { + X(khc2c_register) (p, hc2cb2_8, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,507 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:37 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -dif -name hc2cb_10 -include hc2cb.h */ + +/* + * This function contains 102 FP additions, 72 FP multiplications, + * (or, 48 additions, 18 multiplications, 54 fused multiply/add), + * 71 stack variables, 4 constants, and 40 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) { + E T21, T1Y, T1X; + { + E T1B, TH, T1g, T3, T1V, T1x, T1G, T1E, TM, TK, T11, TB, T7, T1m, T1J; + E TO, Th, T1h, T6, T8, TF, TG, T1i, T9; + TF = Ip[0]; + TG = Im[WS(rs, 4)]; + { + E T1u, Tp, Tu, T1s, Tz, T1v, Ts, Tv; + { + E Tx, Ty, Tn, To, Tq, Tr; + Tn = Ip[WS(rs, 4)]; + To = Im[0]; + Tx = Ip[WS(rs, 3)]; + T1B = TF + TG; + TH = TF - TG; + T1u = Tn + To; + Tp = Tn - To; + Ty = Im[WS(rs, 1)]; + Tq = Ip[WS(rs, 1)]; + Tr = Im[WS(rs, 3)]; + Tu = Ip[WS(rs, 2)]; + T1s = Tx + Ty; + Tz = Tx - Ty; + T1v = Tq + Tr; + Ts = Tq - Tr; + Tv = Im[WS(rs, 2)]; + } + { + E T1, T1w, T1D, TJ, Tt, T1r, Tw, T2; + T1 = Rp[0]; + T1w = T1u + T1v; + T1D = T1u - T1v; + TJ = Tp + Ts; + Tt = Tp - Ts; + T1r = Tu + Tv; + Tw = Tu - Tv; + T2 = Rm[WS(rs, 4)]; + { + E Tb, Tc, Te, Tf; + Tb = Rp[WS(rs, 4)]; + { + E T1t, T1C, TI, TA; + T1t = T1r + T1s; + T1C = T1r - T1s; + TI = Tw + Tz; + TA = Tw - Tz; + T1g = T1 - T2; + T3 = T1 + T2; + T1V = FNMS(KP618033988, T1t, T1w); + T1x = FMA(KP618033988, T1w, T1t); + T1G = T1C - T1D; + T1E = T1C + T1D; + TM = TI - TJ; + TK = TI + TJ; + T11 = FMA(KP618033988, Tt, TA); + TB = FNMS(KP618033988, TA, Tt); + Tc = Rm[0]; + } + Te = Rm[WS(rs, 3)]; + Tf = Rp[WS(rs, 1)]; + { + E T4, T1k, Td, T1l, Tg, T5; + T4 = Rp[WS(rs, 2)]; + T1k = Tb - Tc; + Td = Tb + Tc; + T1l = Te - Tf; + Tg = Te + Tf; + T5 = Rm[WS(rs, 2)]; + T7 = Rm[WS(rs, 1)]; + T1m = T1k + T1l; + T1J = T1k - T1l; + TO = Td - Tg; + Th = Td + Tg; + T1h = T4 - T5; + T6 = T4 + T5; + T8 = Rp[WS(rs, 3)]; + } + } + } + } + Rm[0] = TH + TK; + T1i = T7 - T8; + T9 = T7 + T8; + { + E T2d, T1F, T29, T1I, TP, T2c, T1p, Tl, T1o, Tk, T2b, T2e, T17, T14, T13; + T2d = T1B + T1E; + T1F = FNMS(KP250000000, T1E, T1B); + { + E T1j, Ta, T1n, Ti, T2a; + T29 = W[8]; + T1I = T1h - T1i; + T1j = T1h + T1i; + TP = T6 - T9; + Ta = T6 + T9; + T2c = W[9]; + T1p = T1j - T1m; + T1n = T1j + T1m; + Tl = Ta - Th; + Ti = Ta + Th; + T1o = FNMS(KP250000000, T1n, T1g); + T2a = T1g + T1n; + Rp[0] = T3 + Ti; + Tk = FNMS(KP250000000, Ti, T3); + T2b = T29 * T2a; + T2e = T2c * T2a; + } + { + E T16, TQ, T10, Tm, TL; + T16 = FMA(KP618033988, TO, TP); + TQ = FNMS(KP618033988, TP, TO); + Ip[WS(rs, 2)] = FNMS(T2c, T2d, T2b); + Im[WS(rs, 2)] = FMA(T29, T2d, T2e); + T10 = FMA(KP559016994, Tl, Tk); + Tm = FNMS(KP559016994, Tl, Tk); + TL = FNMS(KP250000000, TK, TH); + { + E TE, TU, T12, TR, TX, T1d, T1c, T19, TD, T1e, T1b, TW, TT; + { + E TC, T15, T1a, TS, Tj, TN; + TE = W[3]; + TC = FMA(KP951056516, TB, Tm); + TU = FNMS(KP951056516, TB, Tm); + TN = FNMS(KP559016994, TM, TL); + T15 = FMA(KP559016994, TM, TL); + T12 = FMA(KP951056516, T11, T10); + T1a = FNMS(KP951056516, T11, T10); + TS = TE * TC; + TR = FNMS(KP951056516, TQ, TN); + TX = FMA(KP951056516, TQ, TN); + Tj = W[2]; + T1d = FMA(KP951056516, T16, T15); + T17 = FNMS(KP951056516, T16, T15); + T1c = W[11]; + T19 = W[10]; + Rm[WS(rs, 1)] = FMA(Tj, TR, TS); + TD = Tj * TC; + T1e = T1c * T1a; + T1b = T19 * T1a; + } + Rp[WS(rs, 1)] = FNMS(TE, TR, TD); + Rm[WS(rs, 3)] = FMA(T19, T1d, T1e); + Rp[WS(rs, 3)] = FNMS(T1c, T1d, T1b); + TW = W[15]; + TT = W[14]; + { + E TZ, T18, TY, TV; + T14 = W[7]; + TY = TW * TU; + TV = TT * TU; + TZ = W[6]; + T18 = T14 * T12; + Rm[WS(rs, 4)] = FMA(TT, TX, TY); + Rp[WS(rs, 4)] = FNMS(TW, TX, TV); + T13 = TZ * T12; + Rm[WS(rs, 2)] = FMA(TZ, T17, T18); + } + } + } + { + E T20, T1K, T1q, T1U; + T20 = FNMS(KP618033988, T1I, T1J); + T1K = FMA(KP618033988, T1J, T1I); + Rp[WS(rs, 2)] = FNMS(T14, T17, T13); + T1q = FMA(KP559016994, T1p, T1o); + T1U = FNMS(KP559016994, T1p, T1o); + { + E T1A, T1O, T1W, T1R, T1L, T27, T26, T23, T1z, T28, T25, T1Q, T1N; + { + E T1y, T1Z, T24, T1M, T1f, T1H; + T1A = W[1]; + T1O = FMA(KP951056516, T1x, T1q); + T1y = FNMS(KP951056516, T1x, T1q); + T1Z = FNMS(KP559016994, T1G, T1F); + T1H = FMA(KP559016994, T1G, T1F); + T24 = FMA(KP951056516, T1V, T1U); + T1W = FNMS(KP951056516, T1V, T1U); + T1M = T1A * T1y; + T1R = FNMS(KP951056516, T1K, T1H); + T1L = FMA(KP951056516, T1K, T1H); + T1f = W[0]; + T21 = FMA(KP951056516, T20, T1Z); + T27 = FNMS(KP951056516, T20, T1Z); + T26 = W[13]; + T23 = W[12]; + Im[0] = FMA(T1f, T1L, T1M); + T1z = T1f * T1y; + T28 = T26 * T24; + T25 = T23 * T24; + } + Ip[0] = FNMS(T1A, T1L, T1z); + Im[WS(rs, 3)] = FMA(T23, T27, T28); + Ip[WS(rs, 3)] = FNMS(T26, T27, T25); + T1Q = W[17]; + T1N = W[16]; + { + E T1T, T22, T1S, T1P; + T1Y = W[5]; + T1S = T1Q * T1O; + T1P = T1N * T1O; + T1T = W[4]; + T22 = T1Y * T1W; + Im[WS(rs, 4)] = FMA(T1N, T1R, T1S); + Ip[WS(rs, 4)] = FNMS(T1Q, T1R, T1P); + T1X = T1T * T1W; + Im[WS(rs, 1)] = FMA(T1T, T21, T22); + } + } + } + } + } + Ip[WS(rs, 1)] = FNMS(T1Y, T21, T1X); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 10}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 10, "hc2cb_10", twinstr, &GENUS, {48, 18, 54, 0} }; + +void X(codelet_hc2cb_10) (planner *p) { + X(khc2c_register) (p, hc2cb_10, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -dif -name hc2cb_10 -include hc2cb.h */ + +/* + * This function contains 102 FP additions, 60 FP multiplications, + * (or, 72 additions, 30 multiplications, 30 fused multiply/add), + * 39 stack variables, 4 constants, and 40 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) { + E T3, T18, TJ, T1i, TE, TF, T1B, T1A, T1f, T1t, Ti, Tl, Tt, TA, T1w; + E T1v, T1p, T1E, TM, TO; + { + E T1, T2, TH, TI; + T1 = Rp[0]; + T2 = Rm[WS(rs, 4)]; + T3 = T1 + T2; + T18 = T1 - T2; + TH = Ip[0]; + TI = Im[WS(rs, 4)]; + TJ = TH - TI; + T1i = TH + TI; + } + { + E T6, T19, Tg, T1d, T9, T1a, Td, T1c; + { + E T4, T5, Te, Tf; + T4 = Rp[WS(rs, 2)]; + T5 = Rm[WS(rs, 2)]; + T6 = T4 + T5; + T19 = T4 - T5; + Te = Rm[WS(rs, 3)]; + Tf = Rp[WS(rs, 1)]; + Tg = Te + Tf; + T1d = Te - Tf; + } + { + E T7, T8, Tb, Tc; + T7 = Rm[WS(rs, 1)]; + T8 = Rp[WS(rs, 3)]; + T9 = T7 + T8; + T1a = T7 - T8; + Tb = Rp[WS(rs, 4)]; + Tc = Rm[0]; + Td = Tb + Tc; + T1c = Tb - Tc; + } + TE = T6 - T9; + TF = Td - Tg; + T1B = T1c - T1d; + T1A = T19 - T1a; + { + E T1b, T1e, Ta, Th; + T1b = T19 + T1a; + T1e = T1c + T1d; + T1f = T1b + T1e; + T1t = KP559016994 * (T1b - T1e); + Ta = T6 + T9; + Th = Td + Tg; + Ti = Ta + Th; + Tl = KP559016994 * (Ta - Th); + } + } + { + E Tp, T1j, Tz, T1n, Ts, T1k, Tw, T1m; + { + E Tn, To, Tx, Ty; + Tn = Ip[WS(rs, 2)]; + To = Im[WS(rs, 2)]; + Tp = Tn - To; + T1j = Tn + To; + Tx = Ip[WS(rs, 1)]; + Ty = Im[WS(rs, 3)]; + Tz = Tx - Ty; + T1n = Tx + Ty; + } + { + E Tq, Tr, Tu, Tv; + Tq = Ip[WS(rs, 3)]; + Tr = Im[WS(rs, 1)]; + Ts = Tq - Tr; + T1k = Tq + Tr; + Tu = Ip[WS(rs, 4)]; + Tv = Im[0]; + Tw = Tu - Tv; + T1m = Tu + Tv; + } + Tt = Tp - Ts; + TA = Tw - Tz; + T1w = T1m + T1n; + T1v = T1j + T1k; + { + E T1l, T1o, TK, TL; + T1l = T1j - T1k; + T1o = T1m - T1n; + T1p = T1l + T1o; + T1E = KP559016994 * (T1l - T1o); + TK = Tp + Ts; + TL = Tw + Tz; + TM = TK + TL; + TO = KP559016994 * (TK - TL); + } + } + Rp[0] = T3 + Ti; + Rm[0] = TJ + TM; + { + E T1g, T1q, T17, T1h; + T1g = T18 + T1f; + T1q = T1i + T1p; + T17 = W[8]; + T1h = W[9]; + Ip[WS(rs, 2)] = FNMS(T1h, T1q, T17 * T1g); + Im[WS(rs, 2)] = FMA(T1h, T1g, T17 * T1q); + } + { + E TB, TG, T11, TX, TP, T10, Tm, TW, TN, Tk; + TB = FNMS(KP951056516, TA, KP587785252 * Tt); + TG = FNMS(KP951056516, TF, KP587785252 * TE); + T11 = FMA(KP951056516, TE, KP587785252 * TF); + TX = FMA(KP951056516, Tt, KP587785252 * TA); + TN = FNMS(KP250000000, TM, TJ); + TP = TN - TO; + T10 = TO + TN; + Tk = FNMS(KP250000000, Ti, T3); + Tm = Tk - Tl; + TW = Tl + Tk; + { + E TC, TQ, Tj, TD; + TC = Tm - TB; + TQ = TG + TP; + Tj = W[2]; + TD = W[3]; + Rp[WS(rs, 1)] = FNMS(TD, TQ, Tj * TC); + Rm[WS(rs, 1)] = FMA(TD, TC, Tj * TQ); + } + { + E T14, T16, T13, T15; + T14 = TW - TX; + T16 = T11 + T10; + T13 = W[10]; + T15 = W[11]; + Rp[WS(rs, 3)] = FNMS(T15, T16, T13 * T14); + Rm[WS(rs, 3)] = FMA(T15, T14, T13 * T16); + } + { + E TS, TU, TR, TT; + TS = Tm + TB; + TU = TP - TG; + TR = W[14]; + TT = W[15]; + Rp[WS(rs, 4)] = FNMS(TT, TU, TR * TS); + Rm[WS(rs, 4)] = FMA(TT, TS, TR * TU); + } + { + E TY, T12, TV, TZ; + TY = TW + TX; + T12 = T10 - T11; + TV = W[6]; + TZ = W[7]; + Rp[WS(rs, 2)] = FNMS(TZ, T12, TV * TY); + Rm[WS(rs, 2)] = FMA(TZ, TY, TV * T12); + } + } + { + E T1x, T1C, T1Q, T1N, T1F, T1R, T1u, T1M, T1D, T1s; + T1x = FNMS(KP951056516, T1w, KP587785252 * T1v); + T1C = FNMS(KP951056516, T1B, KP587785252 * T1A); + T1Q = FMA(KP951056516, T1A, KP587785252 * T1B); + T1N = FMA(KP951056516, T1v, KP587785252 * T1w); + T1D = FNMS(KP250000000, T1p, T1i); + T1F = T1D - T1E; + T1R = T1E + T1D; + T1s = FNMS(KP250000000, T1f, T18); + T1u = T1s - T1t; + T1M = T1t + T1s; + { + E T1y, T1G, T1r, T1z; + T1y = T1u - T1x; + T1G = T1C + T1F; + T1r = W[12]; + T1z = W[13]; + Ip[WS(rs, 3)] = FNMS(T1z, T1G, T1r * T1y); + Im[WS(rs, 3)] = FMA(T1r, T1G, T1z * T1y); + } + { + E T1U, T1W, T1T, T1V; + T1U = T1M + T1N; + T1W = T1R - T1Q; + T1T = W[16]; + T1V = W[17]; + Ip[WS(rs, 4)] = FNMS(T1V, T1W, T1T * T1U); + Im[WS(rs, 4)] = FMA(T1T, T1W, T1V * T1U); + } + { + E T1I, T1K, T1H, T1J; + T1I = T1u + T1x; + T1K = T1F - T1C; + T1H = W[4]; + T1J = W[5]; + Ip[WS(rs, 1)] = FNMS(T1J, T1K, T1H * T1I); + Im[WS(rs, 1)] = FMA(T1H, T1K, T1J * T1I); + } + { + E T1O, T1S, T1L, T1P; + T1O = T1M - T1N; + T1S = T1Q + T1R; + T1L = W[0]; + T1P = W[1]; + Ip[0] = FNMS(T1P, T1S, T1L * T1O); + Im[0] = FMA(T1L, T1S, T1P * T1O); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 10}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 10, "hc2cb_10", twinstr, &GENUS, {72, 30, 30, 0} }; + +void X(codelet_hc2cb_10) (planner *p) { + X(khc2c_register) (p, hc2cb_10, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,582 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:38 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hc2cb_12 -include hc2cb.h */ + +/* + * This function contains 118 FP additions, 68 FP multiplications, + * (or, 72 additions, 22 multiplications, 46 fused multiply/add), + * 64 stack variables, 2 constants, and 48 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) { + E T1U, T1X, T1W, T1Y, T1V; + { + E T18, T20, T21, T1b, T2a, T1s, T29, T1p, TO, T11, To, Tb, Tg, T23, T1f; + E Tl, Ty, Tt, T1i, T24, T1z, T2d, T1w, T2c; + { + E T5, Ta, TN, TI; + { + E T1, TE, T6, TM, T7, T1o, T4, T17, TH, T8, TJ, TK; + T1 = Rp[0]; + TE = Ip[0]; + T6 = Rm[WS(rs, 5)]; + TM = Im[WS(rs, 5)]; + { + E T2, T3, TF, TG; + T2 = Rp[WS(rs, 4)]; + T3 = Rm[WS(rs, 3)]; + TF = Ip[WS(rs, 4)]; + TG = Im[WS(rs, 3)]; + T7 = Rm[WS(rs, 1)]; + T1o = T2 - T3; + T4 = T2 + T3; + T17 = TF + TG; + TH = TF - TG; + T8 = Rp[WS(rs, 2)]; + TJ = Ip[WS(rs, 2)]; + TK = Im[WS(rs, 1)]; + } + { + E T1r, T1a, T19, T1q, T9, TL, T16, T1n; + T5 = T1 + T4; + T16 = FNMS(KP500000000, T4, T1); + T1r = T7 - T8; + T9 = T7 + T8; + T1a = TJ + TK; + TL = TJ - TK; + T18 = FNMS(KP866025403, T17, T16); + T20 = FMA(KP866025403, T17, T16); + T19 = FNMS(KP500000000, T9, T6); + Ta = T6 + T9; + TN = TL - TM; + T1q = FMA(KP500000000, TL, TM); + T1n = FNMS(KP500000000, TH, TE); + TI = TE + TH; + T21 = FNMS(KP866025403, T1a, T19); + T1b = FMA(KP866025403, T1a, T19); + T2a = FMA(KP866025403, T1r, T1q); + T1s = FNMS(KP866025403, T1r, T1q); + T29 = FNMS(KP866025403, T1o, T1n); + T1p = FMA(KP866025403, T1o, T1n); + } + } + { + E Tc, Tp, Th, Tx, Ti, Tf, T1v, Ts, T1e, Tj, Tu, Tv; + Tc = Rp[WS(rs, 3)]; + TO = TI - TN; + T11 = TI + TN; + Tp = Ip[WS(rs, 3)]; + To = T5 - Ta; + Tb = T5 + Ta; + Th = Rm[WS(rs, 2)]; + Tx = Im[WS(rs, 2)]; + { + E Td, Te, Tq, Tr; + Td = Rm[WS(rs, 4)]; + Te = Rm[0]; + Tq = Im[WS(rs, 4)]; + Tr = Im[0]; + Ti = Rp[WS(rs, 1)]; + Tf = Td + Te; + T1v = Td - Te; + Ts = Tq + Tr; + T1e = Tq - Tr; + Tj = Rp[WS(rs, 5)]; + Tu = Ip[WS(rs, 1)]; + Tv = Ip[WS(rs, 5)]; + } + { + E T1y, T1h, T1g, T1x, Tk, Tw, T1d, T1u; + T1d = FNMS(KP500000000, Tf, Tc); + Tg = Tc + Tf; + Tk = Ti + Tj; + T1y = Ti - Tj; + Tw = Tu + Tv; + T1h = Tv - Tu; + T23 = FNMS(KP866025403, T1e, T1d); + T1f = FMA(KP866025403, T1e, T1d); + Tl = Th + Tk; + T1g = FNMS(KP500000000, Tk, Th); + T1x = FMA(KP500000000, Tw, Tx); + Ty = Tw - Tx; + Tt = Tp - Ts; + T1u = FMA(KP500000000, Ts, Tp); + T1i = FMA(KP866025403, T1h, T1g); + T24 = FNMS(KP866025403, T1h, T1g); + T1z = FNMS(KP866025403, T1y, T1x); + T2d = FMA(KP866025403, T1y, T1x); + T1w = FMA(KP866025403, T1v, T1u); + T2c = FNMS(KP866025403, T1v, T1u); + } + } + } + { + E TY, T13, TX, T10; + { + E Tn, T12, TC, Tm, TD, TS, TA, Tz; + Tn = W[16]; + T12 = Tt + Ty; + Tz = Tt - Ty; + TC = W[17]; + Tm = Tg + Tl; + TD = Tg - Tl; + TS = To + Tz; + TA = To - Tz; + { + E TV, TU, TW, TT; + { + E TQ, TR, TP, TB; + TV = TO - TD; + TP = TD + TO; + Rp[0] = Tb + Tm; + TB = Tn * TA; + TQ = Tn * TP; + TR = W[4]; + Ip[WS(rs, 4)] = FNMS(TC, TP, TB); + TU = W[5]; + Im[WS(rs, 4)] = FMA(TC, TA, TQ); + TW = TR * TV; + TT = TR * TS; + } + Im[WS(rs, 1)] = FMA(TU, TS, TW); + Ip[WS(rs, 1)] = FNMS(TU, TV, TT); + TY = Tb - Tm; + T13 = T11 - T12; + TX = W[10]; + T10 = W[11]; + Rm[0] = T11 + T12; + } + } + { + E T1K, T1Q, T1P, T1L, T2o, T2u, T2t, T2p; + { + E T1E, T1D, T1H, T1F, T1G, T1t, T1k, T1A; + { + E T1c, TZ, T14, T1j; + T1K = T18 - T1b; + T1c = T18 + T1b; + TZ = TX * TY; + T14 = T10 * TY; + T1j = T1f + T1i; + T1Q = T1f - T1i; + T1P = T1p + T1s; + T1t = T1p - T1s; + Rp[WS(rs, 3)] = FNMS(T10, T13, TZ); + Rm[WS(rs, 3)] = FMA(TX, T13, T14); + T1E = T1c + T1j; + T1k = T1c - T1j; + T1A = T1w - T1z; + T1L = T1w + T1z; + } + { + E T15, T1m, T1B, T1l, T1C; + T15 = W[18]; + T1m = W[19]; + T1D = W[6]; + T1H = T1t + T1A; + T1B = T1t - T1A; + T1l = T15 * T1k; + T1C = T1m * T1k; + T1F = T1D * T1E; + T1G = W[7]; + Rp[WS(rs, 5)] = FNMS(T1m, T1B, T1l); + Rm[WS(rs, 5)] = FMA(T15, T1B, T1C); + } + { + E T26, T2i, T2l, T2f, T1Z, T28; + { + E T22, T1I, T25, T2b, T2e; + T22 = T20 + T21; + T2o = T20 - T21; + Rp[WS(rs, 2)] = FNMS(T1G, T1H, T1F); + T1I = T1G * T1E; + T2u = T23 - T24; + T25 = T23 + T24; + T2b = T29 - T2a; + T2t = T29 + T2a; + T2p = T2c + T2d; + T2e = T2c - T2d; + Rm[WS(rs, 2)] = FMA(T1D, T1H, T1I); + T26 = T22 - T25; + T2i = T22 + T25; + T2l = T2b + T2e; + T2f = T2b - T2e; + } + T1Z = W[2]; + T28 = W[3]; + { + E T2h, T2k, T27, T2g, T2j, T2m; + T2h = W[14]; + T2k = W[15]; + T27 = T1Z * T26; + T2g = T28 * T26; + T2j = T2h * T2i; + T2m = T2k * T2i; + Rp[WS(rs, 1)] = FNMS(T28, T2f, T27); + Rm[WS(rs, 1)] = FMA(T1Z, T2f, T2g); + Rp[WS(rs, 4)] = FNMS(T2k, T2l, T2j); + Rm[WS(rs, 4)] = FMA(T2h, T2l, T2m); + } + } + } + { + E T2y, T2B, T2A, T2C, T2z; + { + E T2n, T2q, T2v, T2s, T2r, T2x, T2w; + T2n = W[8]; + T2y = T2o + T2p; + T2q = T2o - T2p; + T2B = T2t - T2u; + T2v = T2t + T2u; + T2s = W[9]; + T2r = T2n * T2q; + T2x = W[20]; + T2w = T2n * T2v; + T2A = W[21]; + Ip[WS(rs, 2)] = FNMS(T2s, T2v, T2r); + T2C = T2x * T2B; + T2z = T2x * T2y; + Im[WS(rs, 2)] = FMA(T2s, T2q, T2w); + } + Im[WS(rs, 5)] = FMA(T2A, T2y, T2C); + Ip[WS(rs, 5)] = FNMS(T2A, T2B, T2z); + { + E T1J, T1M, T1R, T1O, T1N, T1T, T1S; + T1J = W[0]; + T1U = T1K + T1L; + T1M = T1K - T1L; + T1X = T1P - T1Q; + T1R = T1P + T1Q; + T1O = W[1]; + T1N = T1J * T1M; + T1T = W[12]; + T1S = T1J * T1R; + T1W = W[13]; + Ip[0] = FNMS(T1O, T1R, T1N); + T1Y = T1T * T1X; + T1V = T1T * T1U; + Im[0] = FMA(T1O, T1M, T1S); + } + } + } + } + } + Im[WS(rs, 3)] = FMA(T1W, T1U, T1Y); + Ip[WS(rs, 3)] = FNMS(T1W, T1X, T1V); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 12}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 12, "hc2cb_12", twinstr, &GENUS, {72, 22, 46, 0} }; + +void X(codelet_hc2cb_12) (planner *p) { + X(khc2c_register) (p, hc2cb_12, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hc2cb_12 -include hc2cb.h */ + +/* + * This function contains 118 FP additions, 60 FP multiplications, + * (or, 88 additions, 30 multiplications, 30 fused multiply/add), + * 39 stack variables, 2 constants, and 48 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) { + E T5, TH, T12, T1M, T1i, T1U, Tl, Ty, T1c, T1Y, T1s, T1Q, Ta, TM, T15; + E T1N, T1l, T1V, Tg, Tt, T19, T1X, T1p, T1P; + { + E T1, TD, T4, T1g, TG, T11, T10, T1h; + T1 = Rp[0]; + TD = Ip[0]; + { + E T2, T3, TE, TF; + T2 = Rp[WS(rs, 4)]; + T3 = Rm[WS(rs, 3)]; + T4 = T2 + T3; + T1g = KP866025403 * (T2 - T3); + TE = Ip[WS(rs, 4)]; + TF = Im[WS(rs, 3)]; + TG = TE - TF; + T11 = KP866025403 * (TE + TF); + } + T5 = T1 + T4; + TH = TD + TG; + T10 = FNMS(KP500000000, T4, T1); + T12 = T10 - T11; + T1M = T10 + T11; + T1h = FNMS(KP500000000, TG, TD); + T1i = T1g + T1h; + T1U = T1h - T1g; + } + { + E Th, Tx, Tk, T1a, Tw, T1r, T1b, T1q; + Th = Rm[WS(rs, 2)]; + Tx = Im[WS(rs, 2)]; + { + E Ti, Tj, Tu, Tv; + Ti = Rp[WS(rs, 1)]; + Tj = Rp[WS(rs, 5)]; + Tk = Ti + Tj; + T1a = KP866025403 * (Ti - Tj); + Tu = Ip[WS(rs, 1)]; + Tv = Ip[WS(rs, 5)]; + Tw = Tu + Tv; + T1r = KP866025403 * (Tv - Tu); + } + Tl = Th + Tk; + Ty = Tw - Tx; + T1b = FMA(KP500000000, Tw, Tx); + T1c = T1a - T1b; + T1Y = T1a + T1b; + T1q = FNMS(KP500000000, Tk, Th); + T1s = T1q + T1r; + T1Q = T1q - T1r; + } + { + E T6, TL, T9, T1j, TK, T14, T13, T1k; + T6 = Rm[WS(rs, 5)]; + TL = Im[WS(rs, 5)]; + { + E T7, T8, TI, TJ; + T7 = Rm[WS(rs, 1)]; + T8 = Rp[WS(rs, 2)]; + T9 = T7 + T8; + T1j = KP866025403 * (T7 - T8); + TI = Ip[WS(rs, 2)]; + TJ = Im[WS(rs, 1)]; + TK = TI - TJ; + T14 = KP866025403 * (TI + TJ); + } + Ta = T6 + T9; + TM = TK - TL; + T13 = FNMS(KP500000000, T9, T6); + T15 = T13 + T14; + T1N = T13 - T14; + T1k = FMA(KP500000000, TK, TL); + T1l = T1j - T1k; + T1V = T1j + T1k; + } + { + E Tc, Tp, Tf, T17, Ts, T1o, T18, T1n; + Tc = Rp[WS(rs, 3)]; + Tp = Ip[WS(rs, 3)]; + { + E Td, Te, Tq, Tr; + Td = Rm[WS(rs, 4)]; + Te = Rm[0]; + Tf = Td + Te; + T17 = KP866025403 * (Td - Te); + Tq = Im[WS(rs, 4)]; + Tr = Im[0]; + Ts = Tq + Tr; + T1o = KP866025403 * (Tq - Tr); + } + Tg = Tc + Tf; + Tt = Tp - Ts; + T18 = FMA(KP500000000, Ts, Tp); + T19 = T17 + T18; + T1X = T18 - T17; + T1n = FNMS(KP500000000, Tf, Tc); + T1p = T1n + T1o; + T1P = T1n - T1o; + } + { + E Tb, Tm, TU, TW, TX, TY, TT, TV; + Tb = T5 + Ta; + Tm = Tg + Tl; + TU = Tb - Tm; + TW = TH + TM; + TX = Tt + Ty; + TY = TW - TX; + Rp[0] = Tb + Tm; + Rm[0] = TW + TX; + TT = W[10]; + TV = W[11]; + Rp[WS(rs, 3)] = FNMS(TV, TY, TT * TU); + Rm[WS(rs, 3)] = FMA(TV, TU, TT * TY); + } + { + E TA, TQ, TO, TS; + { + E To, Tz, TC, TN; + To = T5 - Ta; + Tz = Tt - Ty; + TA = To - Tz; + TQ = To + Tz; + TC = Tg - Tl; + TN = TH - TM; + TO = TC + TN; + TS = TN - TC; + } + { + E Tn, TB, TP, TR; + Tn = W[16]; + TB = W[17]; + Ip[WS(rs, 4)] = FNMS(TB, TO, Tn * TA); + Im[WS(rs, 4)] = FMA(Tn, TO, TB * TA); + TP = W[4]; + TR = W[5]; + Ip[WS(rs, 1)] = FNMS(TR, TS, TP * TQ); + Im[WS(rs, 1)] = FMA(TP, TS, TR * TQ); + } + } + { + E T28, T2e, T2c, T2g; + { + E T26, T27, T2a, T2b; + T26 = T1M - T1N; + T27 = T1X + T1Y; + T28 = T26 - T27; + T2e = T26 + T27; + T2a = T1U + T1V; + T2b = T1P - T1Q; + T2c = T2a + T2b; + T2g = T2a - T2b; + } + { + E T25, T29, T2d, T2f; + T25 = W[8]; + T29 = W[9]; + Ip[WS(rs, 2)] = FNMS(T29, T2c, T25 * T28); + Im[WS(rs, 2)] = FMA(T25, T2c, T29 * T28); + T2d = W[20]; + T2f = W[21]; + Ip[WS(rs, 5)] = FNMS(T2f, T2g, T2d * T2e); + Im[WS(rs, 5)] = FMA(T2d, T2g, T2f * T2e); + } + } + { + E T1S, T22, T20, T24; + { + E T1O, T1R, T1W, T1Z; + T1O = T1M + T1N; + T1R = T1P + T1Q; + T1S = T1O - T1R; + T22 = T1O + T1R; + T1W = T1U - T1V; + T1Z = T1X - T1Y; + T20 = T1W - T1Z; + T24 = T1W + T1Z; + } + { + E T1L, T1T, T21, T23; + T1L = W[2]; + T1T = W[3]; + Rp[WS(rs, 1)] = FNMS(T1T, T20, T1L * T1S); + Rm[WS(rs, 1)] = FMA(T1T, T1S, T1L * T20); + T21 = W[14]; + T23 = W[15]; + Rp[WS(rs, 4)] = FNMS(T23, T24, T21 * T22); + Rm[WS(rs, 4)] = FMA(T23, T22, T21 * T24); + } + } + { + E T1C, T1I, T1G, T1K; + { + E T1A, T1B, T1E, T1F; + T1A = T12 + T15; + T1B = T1p + T1s; + T1C = T1A - T1B; + T1I = T1A + T1B; + T1E = T1i + T1l; + T1F = T19 + T1c; + T1G = T1E - T1F; + T1K = T1E + T1F; + } + { + E T1z, T1D, T1H, T1J; + T1z = W[18]; + T1D = W[19]; + Rp[WS(rs, 5)] = FNMS(T1D, T1G, T1z * T1C); + Rm[WS(rs, 5)] = FMA(T1D, T1C, T1z * T1G); + T1H = W[6]; + T1J = W[7]; + Rp[WS(rs, 2)] = FNMS(T1J, T1K, T1H * T1I); + Rm[WS(rs, 2)] = FMA(T1J, T1I, T1H * T1K); + } + } + { + E T1e, T1w, T1u, T1y; + { + E T16, T1d, T1m, T1t; + T16 = T12 - T15; + T1d = T19 - T1c; + T1e = T16 - T1d; + T1w = T16 + T1d; + T1m = T1i - T1l; + T1t = T1p - T1s; + T1u = T1m + T1t; + T1y = T1m - T1t; + } + { + E TZ, T1f, T1v, T1x; + TZ = W[0]; + T1f = W[1]; + Ip[0] = FNMS(T1f, T1u, TZ * T1e); + Im[0] = FMA(TZ, T1u, T1f * T1e); + T1v = W[12]; + T1x = W[13]; + Ip[WS(rs, 3)] = FNMS(T1x, T1y, T1v * T1w); + Im[WS(rs, 3)] = FMA(T1v, T1y, T1x * T1w); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 12}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 12, "hc2cb_12", twinstr, &GENUS, {88, 30, 30, 0} }; + +void X(codelet_hc2cb_12) (planner *p) { + X(khc2c_register) (p, hc2cb_12, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,809 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:38 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 16 -dif -name hc2cb_16 -include hc2cb.h */ + +/* + * This function contains 174 FP additions, 100 FP multiplications, + * (or, 104 additions, 30 multiplications, 70 fused multiply/add), + * 78 stack variables, 3 constants, and 64 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 30, MAKE_VOLATILE_STRIDE(64, rs)) { + E T1I, T1L, T1K, T1M, T1J; + { + E T1O, TA, T1h, T21, T3b, T2T, T3D, T3r, T1k, T1P, T3y, Tf, T36, T2A, T22; + E TL, T2F, T2U, T3u, T3z, T2K, T2V, T12, Tu, T3E, TX, T1n, T17, T1T, T24; + E T1W, T25; + { + E T2z, TF, TK, T2w; + { + E Tw, T3, T2Q, T1g, T1d, T6, T2R, Tz, Tb, TB, Ta, T2y, TE, Tc, TH; + E TI; + { + E T4, T5, Tx, Ty; + { + E T1, T2, T1e, T1f; + T1 = Rp[0]; + T2 = Rm[WS(rs, 7)]; + T1e = Ip[0]; + T1f = Im[WS(rs, 7)]; + T4 = Rp[WS(rs, 4)]; + Tw = T1 - T2; + T3 = T1 + T2; + T2Q = T1e - T1f; + T1g = T1e + T1f; + T5 = Rm[WS(rs, 3)]; + Tx = Ip[WS(rs, 4)]; + Ty = Im[WS(rs, 3)]; + } + { + E T8, T9, TC, TD; + T8 = Rp[WS(rs, 2)]; + T1d = T4 - T5; + T6 = T4 + T5; + T2R = Tx - Ty; + Tz = Tx + Ty; + T9 = Rm[WS(rs, 5)]; + TC = Ip[WS(rs, 2)]; + TD = Im[WS(rs, 5)]; + Tb = Rm[WS(rs, 1)]; + TB = T8 - T9; + Ta = T8 + T9; + T2y = TC - TD; + TE = TC + TD; + Tc = Rp[WS(rs, 6)]; + TH = Ip[WS(rs, 6)]; + TI = Im[WS(rs, 1)]; + } + } + { + E TG, T2x, TJ, Te, T2P, T2S, T3p, Td; + T1O = Tw + Tz; + TA = Tw - Tz; + TG = Tb - Tc; + Td = Tb + Tc; + T2x = TH - TI; + TJ = TH + TI; + T1h = T1d + T1g; + T21 = T1g - T1d; + Te = Ta + Td; + T2P = Ta - Td; + T2S = T2Q - T2R; + T3p = T2Q + T2R; + { + E T1i, T1j, T3q, T7; + T3q = T2y + T2x; + T2z = T2x - T2y; + TF = TB - TE; + T1i = TB + TE; + T3b = T2S - T2P; + T2T = T2P + T2S; + TK = TG - TJ; + T1j = TG + TJ; + T3D = T3p - T3q; + T3r = T3p + T3q; + T2w = T3 - T6; + T7 = T3 + T6; + T1k = T1i - T1j; + T1P = T1i + T1j; + T3y = T7 - Te; + Tf = T7 + Te; + } + } + } + { + E T13, Ti, T2C, T11, TY, Tl, T2D, T16, Tq, TS, Tp, T2H, TQ, Tr, TT; + E TU; + { + E Tj, Tk, T14, T15; + { + E Tg, Th, TZ, T10; + Tg = Rp[WS(rs, 1)]; + T36 = T2w - T2z; + T2A = T2w + T2z; + T22 = TF - TK; + TL = TF + TK; + Th = Rm[WS(rs, 6)]; + TZ = Ip[WS(rs, 1)]; + T10 = Im[WS(rs, 6)]; + Tj = Rp[WS(rs, 5)]; + T13 = Tg - Th; + Ti = Tg + Th; + T2C = TZ - T10; + T11 = TZ + T10; + Tk = Rm[WS(rs, 2)]; + T14 = Ip[WS(rs, 5)]; + T15 = Im[WS(rs, 2)]; + } + { + E Tn, To, TO, TP; + Tn = Rm[0]; + TY = Tj - Tk; + Tl = Tj + Tk; + T2D = T14 - T15; + T16 = T14 + T15; + To = Rp[WS(rs, 7)]; + TO = Ip[WS(rs, 7)]; + TP = Im[0]; + Tq = Rp[WS(rs, 3)]; + TS = Tn - To; + Tp = Tn + To; + T2H = TO - TP; + TQ = TO + TP; + Tr = Rm[WS(rs, 4)]; + TT = Ip[WS(rs, 3)]; + TU = Im[WS(rs, 4)]; + } + } + { + E TN, TV, Tm, Tt; + { + E T2E, T3s, Ts, T2B, T3t, T2J, T2I, T2G; + T2E = T2C - T2D; + T3s = T2C + T2D; + TN = Tq - Tr; + Ts = Tq + Tr; + T2I = TT - TU; + TV = TT + TU; + T2B = Ti - Tl; + Tm = Ti + Tl; + T3t = T2H + T2I; + T2J = T2H - T2I; + Tt = Tp + Ts; + T2G = Tp - Ts; + T2F = T2B - T2E; + T2U = T2B + T2E; + T3u = T3s + T3t; + T3z = T3t - T3s; + T2K = T2G + T2J; + T2V = T2J - T2G; + } + { + E T1U, T1V, T1R, T1S, TR, TW; + TR = TN - TQ; + T1U = TN + TQ; + T1V = TS + TV; + TW = TS - TV; + T1R = T11 - TY; + T12 = TY + T11; + Tu = Tm + Tt; + T3E = Tm - Tt; + TX = FNMS(KP414213562, TW, TR); + T1n = FMA(KP414213562, TR, TW); + T17 = T13 - T16; + T1S = T13 + T16; + T1T = FNMS(KP414213562, T1S, T1R); + T24 = FMA(KP414213562, T1R, T1S); + T1W = FNMS(KP414213562, T1V, T1U); + T25 = FMA(KP414213562, T1U, T1V); + } + } + } + } + { + E T18, T1m, T2W, T2L, T3j, T3i, T3h; + { + E T3m, T3v, T3l, T3o; + Rp[0] = Tf + Tu; + T18 = FMA(KP414213562, T17, T12); + T1m = FNMS(KP414213562, T12, T17); + T3m = Tf - Tu; + T3v = T3r - T3u; + T3l = W[14]; + T3o = W[15]; + Rm[0] = T3r + T3u; + { + E T3A, T3I, T3L, T3F, T3C, T3G, T3B, T3x, T3n, T3w, T3H, T3K; + T3A = T3y - T3z; + T3I = T3y + T3z; + T3n = T3l * T3m; + T3w = T3o * T3m; + T3L = T3E + T3D; + T3F = T3D - T3E; + T3x = W[22]; + Rp[WS(rs, 4)] = FNMS(T3o, T3v, T3n); + Rm[WS(rs, 4)] = FMA(T3l, T3v, T3w); + T3C = W[23]; + T3G = T3x * T3F; + T3B = T3x * T3A; + Rm[WS(rs, 6)] = FMA(T3C, T3A, T3G); + Rp[WS(rs, 6)] = FNMS(T3C, T3F, T3B); + T3H = W[6]; + T3K = W[7]; + { + E T3g, T38, T3d, T35, T3a; + { + E T37, T3c, T3M, T3J; + T37 = T2V - T2U; + T2W = T2U + T2V; + T2L = T2F + T2K; + T3c = T2F - T2K; + T3M = T3H * T3L; + T3J = T3H * T3I; + T3g = FMA(KP707106781, T37, T36); + T38 = FNMS(KP707106781, T37, T36); + Rm[WS(rs, 2)] = FMA(T3K, T3I, T3M); + Rp[WS(rs, 2)] = FNMS(T3K, T3L, T3J); + T3d = FNMS(KP707106781, T3c, T3b); + T3j = FMA(KP707106781, T3c, T3b); + } + T35 = W[26]; + T3a = W[27]; + { + E T3f, T3e, T39, T3k; + T3f = W[10]; + T3i = W[11]; + T3e = T35 * T3d; + T39 = T35 * T38; + T3k = T3f * T3j; + T3h = T3f * T3g; + Rm[WS(rs, 7)] = FMA(T3a, T38, T3e); + Rp[WS(rs, 7)] = FNMS(T3a, T3d, T39); + Rm[WS(rs, 3)] = FMA(T3i, T3g, T3k); + } + } + } + } + Rp[WS(rs, 3)] = FNMS(T3i, T3j, T3h); + { + E T2g, T2m, T2l, T2h, T2d, T29, T2c, T2b, T2e; + { + E T33, T2Z, T32, T31, T34; + { + E T2v, T30, T2M, T2X, T2O, T2N, T2Y; + T2v = W[18]; + T30 = FMA(KP707106781, T2L, T2A); + T2M = FNMS(KP707106781, T2L, T2A); + T33 = FMA(KP707106781, T2W, T2T); + T2X = FNMS(KP707106781, T2W, T2T); + T2O = W[19]; + T2N = T2v * T2M; + T2Z = W[2]; + T32 = W[3]; + T2Y = T2O * T2M; + Rp[WS(rs, 5)] = FNMS(T2O, T2X, T2N); + T31 = T2Z * T30; + T34 = T32 * T30; + Rm[WS(rs, 5)] = FMA(T2v, T2X, T2Y); + } + { + E T1Q, T1X, T23, T26; + T2g = FMA(KP707106781, T1P, T1O); + T1Q = FNMS(KP707106781, T1P, T1O); + Rp[WS(rs, 1)] = FNMS(T32, T33, T31); + Rm[WS(rs, 1)] = FMA(T2Z, T33, T34); + T1X = T1T + T1W; + T2m = T1W - T1T; + T2l = FNMS(KP707106781, T22, T21); + T23 = FMA(KP707106781, T22, T21); + T26 = T24 - T25; + T2h = T24 + T25; + { + E T1N, T2a, T1Y, T27, T20, T1Z, T28; + T1N = W[20]; + T2a = FNMS(KP923879532, T1X, T1Q); + T1Y = FMA(KP923879532, T1X, T1Q); + T2d = FMA(KP923879532, T26, T23); + T27 = FNMS(KP923879532, T26, T23); + T20 = W[21]; + T1Z = T1N * T1Y; + T29 = W[4]; + T2c = W[5]; + T28 = T20 * T1Y; + Ip[WS(rs, 5)] = FNMS(T20, T27, T1Z); + T2b = T29 * T2a; + T2e = T2c * T2a; + Im[WS(rs, 5)] = FMA(T1N, T27, T28); + } + } + } + { + E T1y, T1E, T1D, T1z, T1v, T1r, T1u, T1t, T1w; + { + E TM, T19, T1l, T1o; + T1y = FMA(KP707106781, TL, TA); + TM = FNMS(KP707106781, TL, TA); + Ip[WS(rs, 1)] = FNMS(T2c, T2d, T2b); + Im[WS(rs, 1)] = FMA(T29, T2d, T2e); + T19 = TX - T18; + T1E = T18 + TX; + T1D = FMA(KP707106781, T1k, T1h); + T1l = FNMS(KP707106781, T1k, T1h); + T1o = T1m - T1n; + T1z = T1m + T1n; + { + E Tv, T1s, T1a, T1p, T1c, T1b, T1q; + Tv = W[24]; + T1s = FMA(KP923879532, T19, TM); + T1a = FNMS(KP923879532, T19, TM); + T1v = FMA(KP923879532, T1o, T1l); + T1p = FNMS(KP923879532, T1o, T1l); + T1c = W[25]; + T1b = Tv * T1a; + T1r = W[8]; + T1u = W[9]; + T1q = T1c * T1a; + Ip[WS(rs, 6)] = FNMS(T1c, T1p, T1b); + T1t = T1r * T1s; + T1w = T1u * T1s; + Im[WS(rs, 6)] = FMA(Tv, T1p, T1q); + } + } + { + E T2q, T2t, T2s, T2u, T2r; + Ip[WS(rs, 2)] = FNMS(T1u, T1v, T1t); + Im[WS(rs, 2)] = FMA(T1r, T1v, T1w); + { + E T2f, T2i, T2n, T2k, T2j, T2p, T2o; + T2f = W[12]; + T2q = FMA(KP923879532, T2h, T2g); + T2i = FNMS(KP923879532, T2h, T2g); + T2t = FNMS(KP923879532, T2m, T2l); + T2n = FMA(KP923879532, T2m, T2l); + T2k = W[13]; + T2j = T2f * T2i; + T2p = W[28]; + T2o = T2f * T2n; + T2s = W[29]; + Ip[WS(rs, 3)] = FNMS(T2k, T2n, T2j); + T2u = T2p * T2t; + T2r = T2p * T2q; + Im[WS(rs, 3)] = FMA(T2k, T2i, T2o); + } + Im[WS(rs, 7)] = FMA(T2s, T2q, T2u); + Ip[WS(rs, 7)] = FNMS(T2s, T2t, T2r); + { + E T1x, T1A, T1F, T1C, T1B, T1H, T1G; + T1x = W[16]; + T1I = FMA(KP923879532, T1z, T1y); + T1A = FNMS(KP923879532, T1z, T1y); + T1L = FMA(KP923879532, T1E, T1D); + T1F = FNMS(KP923879532, T1E, T1D); + T1C = W[17]; + T1B = T1x * T1A; + T1H = W[0]; + T1G = T1x * T1F; + T1K = W[1]; + Ip[WS(rs, 4)] = FNMS(T1C, T1F, T1B); + T1M = T1H * T1L; + T1J = T1H * T1I; + Im[WS(rs, 4)] = FMA(T1C, T1A, T1G); + } + } + } + } + } + } + Im[0] = FMA(T1K, T1I, T1M); + Ip[0] = FNMS(T1K, T1L, T1J); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 16}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 16, "hc2cb_16", twinstr, &GENUS, {104, 30, 70, 0} }; + +void X(codelet_hc2cb_16) (planner *p) { + X(khc2c_register) (p, hc2cb_16, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 16 -dif -name hc2cb_16 -include hc2cb.h */ + +/* + * This function contains 174 FP additions, 84 FP multiplications, + * (or, 136 additions, 46 multiplications, 38 fused multiply/add), + * 50 stack variables, 3 constants, and 64 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 30, MAKE_VOLATILE_STRIDE(64, rs)) { + E T7, T2K, T2W, Tw, T17, T1S, T2k, T1w, Te, TD, T1x, T10, T2n, T2L, T1Z; + E T2X, Tm, T1z, TN, T19, T2e, T2p, T2P, T2Z, Tt, T1A, TW, T1a, T27, T2q; + E T2S, T30; + { + E T3, T1Q, T13, T2j, T6, T2i, T16, T1R; + { + E T1, T2, T11, T12; + T1 = Rp[0]; + T2 = Rm[WS(rs, 7)]; + T3 = T1 + T2; + T1Q = T1 - T2; + T11 = Ip[0]; + T12 = Im[WS(rs, 7)]; + T13 = T11 - T12; + T2j = T11 + T12; + } + { + E T4, T5, T14, T15; + T4 = Rp[WS(rs, 4)]; + T5 = Rm[WS(rs, 3)]; + T6 = T4 + T5; + T2i = T4 - T5; + T14 = Ip[WS(rs, 4)]; + T15 = Im[WS(rs, 3)]; + T16 = T14 - T15; + T1R = T14 + T15; + } + T7 = T3 + T6; + T2K = T1Q + T1R; + T2W = T2j - T2i; + Tw = T3 - T6; + T17 = T13 - T16; + T1S = T1Q - T1R; + T2k = T2i + T2j; + T1w = T13 + T16; + } + { + E Ta, T1T, TC, T1U, Td, T1W, Tz, T1X; + { + E T8, T9, TA, TB; + T8 = Rp[WS(rs, 2)]; + T9 = Rm[WS(rs, 5)]; + Ta = T8 + T9; + T1T = T8 - T9; + TA = Ip[WS(rs, 2)]; + TB = Im[WS(rs, 5)]; + TC = TA - TB; + T1U = TA + TB; + } + { + E Tb, Tc, Tx, Ty; + Tb = Rm[WS(rs, 1)]; + Tc = Rp[WS(rs, 6)]; + Td = Tb + Tc; + T1W = Tb - Tc; + Tx = Ip[WS(rs, 6)]; + Ty = Im[WS(rs, 1)]; + Tz = Tx - Ty; + T1X = Tx + Ty; + } + Te = Ta + Td; + TD = Tz - TC; + T1x = TC + Tz; + T10 = Ta - Td; + { + E T2l, T2m, T1V, T1Y; + T2l = T1T + T1U; + T2m = T1W + T1X; + T2n = KP707106781 * (T2l - T2m); + T2L = KP707106781 * (T2l + T2m); + T1V = T1T - T1U; + T1Y = T1W - T1X; + T1Z = KP707106781 * (T1V + T1Y); + T2X = KP707106781 * (T1V - T1Y); + } + } + { + E Ti, T2b, TI, T29, Tl, T28, TL, T2c, TF, TM; + { + E Tg, Th, TG, TH; + Tg = Rp[WS(rs, 1)]; + Th = Rm[WS(rs, 6)]; + Ti = Tg + Th; + T2b = Tg - Th; + TG = Ip[WS(rs, 1)]; + TH = Im[WS(rs, 6)]; + TI = TG - TH; + T29 = TG + TH; + } + { + E Tj, Tk, TJ, TK; + Tj = Rp[WS(rs, 5)]; + Tk = Rm[WS(rs, 2)]; + Tl = Tj + Tk; + T28 = Tj - Tk; + TJ = Ip[WS(rs, 5)]; + TK = Im[WS(rs, 2)]; + TL = TJ - TK; + T2c = TJ + TK; + } + Tm = Ti + Tl; + T1z = TI + TL; + TF = Ti - Tl; + TM = TI - TL; + TN = TF - TM; + T19 = TF + TM; + { + E T2a, T2d, T2N, T2O; + T2a = T28 + T29; + T2d = T2b - T2c; + T2e = FMA(KP923879532, T2a, KP382683432 * T2d); + T2p = FNMS(KP382683432, T2a, KP923879532 * T2d); + T2N = T2b + T2c; + T2O = T29 - T28; + T2P = FNMS(KP923879532, T2O, KP382683432 * T2N); + T2Z = FMA(KP382683432, T2O, KP923879532 * T2N); + } + } + { + E Tp, T24, TR, T22, Ts, T21, TU, T25, TO, TV; + { + E Tn, To, TP, TQ; + Tn = Rm[0]; + To = Rp[WS(rs, 7)]; + Tp = Tn + To; + T24 = Tn - To; + TP = Ip[WS(rs, 7)]; + TQ = Im[0]; + TR = TP - TQ; + T22 = TP + TQ; + } + { + E Tq, Tr, TS, TT; + Tq = Rp[WS(rs, 3)]; + Tr = Rm[WS(rs, 4)]; + Ts = Tq + Tr; + T21 = Tq - Tr; + TS = Ip[WS(rs, 3)]; + TT = Im[WS(rs, 4)]; + TU = TS - TT; + T25 = TS + TT; + } + Tt = Tp + Ts; + T1A = TR + TU; + TO = Tp - Ts; + TV = TR - TU; + TW = TO + TV; + T1a = TV - TO; + { + E T23, T26, T2Q, T2R; + T23 = T21 - T22; + T26 = T24 - T25; + T27 = FNMS(KP382683432, T26, KP923879532 * T23); + T2q = FMA(KP382683432, T23, KP923879532 * T26); + T2Q = T24 + T25; + T2R = T21 + T22; + T2S = FNMS(KP923879532, T2R, KP382683432 * T2Q); + T30 = FMA(KP382683432, T2R, KP923879532 * T2Q); + } + } + { + E Tf, Tu, T1u, T1y, T1B, T1C, T1t, T1v; + Tf = T7 + Te; + Tu = Tm + Tt; + T1u = Tf - Tu; + T1y = T1w + T1x; + T1B = T1z + T1A; + T1C = T1y - T1B; + Rp[0] = Tf + Tu; + Rm[0] = T1y + T1B; + T1t = W[14]; + T1v = W[15]; + Rp[WS(rs, 4)] = FNMS(T1v, T1C, T1t * T1u); + Rm[WS(rs, 4)] = FMA(T1v, T1u, T1t * T1C); + } + { + E T2U, T34, T32, T36; + { + E T2M, T2T, T2Y, T31; + T2M = T2K - T2L; + T2T = T2P + T2S; + T2U = T2M - T2T; + T34 = T2M + T2T; + T2Y = T2W + T2X; + T31 = T2Z - T30; + T32 = T2Y - T31; + T36 = T2Y + T31; + } + { + E T2J, T2V, T33, T35; + T2J = W[20]; + T2V = W[21]; + Ip[WS(rs, 5)] = FNMS(T2V, T32, T2J * T2U); + Im[WS(rs, 5)] = FMA(T2V, T2U, T2J * T32); + T33 = W[4]; + T35 = W[5]; + Ip[WS(rs, 1)] = FNMS(T35, T36, T33 * T34); + Im[WS(rs, 1)] = FMA(T35, T34, T33 * T36); + } + } + { + E T3a, T3g, T3e, T3i; + { + E T38, T39, T3c, T3d; + T38 = T2K + T2L; + T39 = T2Z + T30; + T3a = T38 - T39; + T3g = T38 + T39; + T3c = T2W - T2X; + T3d = T2P - T2S; + T3e = T3c + T3d; + T3i = T3c - T3d; + } + { + E T37, T3b, T3f, T3h; + T37 = W[12]; + T3b = W[13]; + Ip[WS(rs, 3)] = FNMS(T3b, T3e, T37 * T3a); + Im[WS(rs, 3)] = FMA(T37, T3e, T3b * T3a); + T3f = W[28]; + T3h = W[29]; + Ip[WS(rs, 7)] = FNMS(T3h, T3i, T3f * T3g); + Im[WS(rs, 7)] = FMA(T3f, T3i, T3h * T3g); + } + } + { + E TY, T1e, T1c, T1g; + { + E TE, TX, T18, T1b; + TE = Tw + TD; + TX = KP707106781 * (TN + TW); + TY = TE - TX; + T1e = TE + TX; + T18 = T10 + T17; + T1b = KP707106781 * (T19 + T1a); + T1c = T18 - T1b; + T1g = T18 + T1b; + } + { + E Tv, TZ, T1d, T1f; + Tv = W[18]; + TZ = W[19]; + Rp[WS(rs, 5)] = FNMS(TZ, T1c, Tv * TY); + Rm[WS(rs, 5)] = FMA(TZ, TY, Tv * T1c); + T1d = W[2]; + T1f = W[3]; + Rp[WS(rs, 1)] = FNMS(T1f, T1g, T1d * T1e); + Rm[WS(rs, 1)] = FMA(T1f, T1e, T1d * T1g); + } + } + { + E T1k, T1q, T1o, T1s; + { + E T1i, T1j, T1m, T1n; + T1i = Tw - TD; + T1j = KP707106781 * (T1a - T19); + T1k = T1i - T1j; + T1q = T1i + T1j; + T1m = T17 - T10; + T1n = KP707106781 * (TN - TW); + T1o = T1m - T1n; + T1s = T1m + T1n; + } + { + E T1h, T1l, T1p, T1r; + T1h = W[26]; + T1l = W[27]; + Rp[WS(rs, 7)] = FNMS(T1l, T1o, T1h * T1k); + Rm[WS(rs, 7)] = FMA(T1h, T1o, T1l * T1k); + T1p = W[10]; + T1r = W[11]; + Rp[WS(rs, 3)] = FNMS(T1r, T1s, T1p * T1q); + Rm[WS(rs, 3)] = FMA(T1p, T1s, T1r * T1q); + } + } + { + E T2g, T2u, T2s, T2w; + { + E T20, T2f, T2o, T2r; + T20 = T1S - T1Z; + T2f = T27 - T2e; + T2g = T20 - T2f; + T2u = T20 + T2f; + T2o = T2k - T2n; + T2r = T2p - T2q; + T2s = T2o - T2r; + T2w = T2o + T2r; + } + { + E T1P, T2h, T2t, T2v; + T1P = W[24]; + T2h = W[25]; + Ip[WS(rs, 6)] = FNMS(T2h, T2s, T1P * T2g); + Im[WS(rs, 6)] = FMA(T2h, T2g, T1P * T2s); + T2t = W[8]; + T2v = W[9]; + Ip[WS(rs, 2)] = FNMS(T2v, T2w, T2t * T2u); + Im[WS(rs, 2)] = FMA(T2v, T2u, T2t * T2w); + } + } + { + E T2A, T2G, T2E, T2I; + { + E T2y, T2z, T2C, T2D; + T2y = T1S + T1Z; + T2z = T2p + T2q; + T2A = T2y - T2z; + T2G = T2y + T2z; + T2C = T2k + T2n; + T2D = T2e + T27; + T2E = T2C - T2D; + T2I = T2C + T2D; + } + { + E T2x, T2B, T2F, T2H; + T2x = W[16]; + T2B = W[17]; + Ip[WS(rs, 4)] = FNMS(T2B, T2E, T2x * T2A); + Im[WS(rs, 4)] = FMA(T2x, T2E, T2B * T2A); + T2F = W[0]; + T2H = W[1]; + Ip[0] = FNMS(T2H, T2I, T2F * T2G); + Im[0] = FMA(T2F, T2I, T2H * T2G); + } + } + { + E T1G, T1M, T1K, T1O; + { + E T1E, T1F, T1I, T1J; + T1E = T7 - Te; + T1F = T1A - T1z; + T1G = T1E - T1F; + T1M = T1E + T1F; + T1I = T1w - T1x; + T1J = Tm - Tt; + T1K = T1I - T1J; + T1O = T1J + T1I; + } + { + E T1D, T1H, T1L, T1N; + T1D = W[22]; + T1H = W[23]; + Rp[WS(rs, 6)] = FNMS(T1H, T1K, T1D * T1G); + Rm[WS(rs, 6)] = FMA(T1D, T1K, T1H * T1G); + T1L = W[6]; + T1N = W[7]; + Rp[WS(rs, 2)] = FNMS(T1N, T1O, T1L * T1M); + Rm[WS(rs, 2)] = FMA(T1L, T1O, T1N * T1M); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 16}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 16, "hc2cb_16", twinstr, &GENUS, {136, 46, 38, 0} }; + +void X(codelet_hc2cb_16) (planner *p) { + X(khc2c_register) (p, hc2cb_16, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:37 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 2 -dif -name hc2cb_2 -include hc2cb.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 4 additions, 2 multiplications, 2 fused multiply/add), + * 11 stack variables, 0 constants, and 8 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_2(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 2); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 2, MAKE_VOLATILE_STRIDE(8, rs)) { + E T5, T6, T9, T8, T7, Ta; + { + E T1, T2, T3, T4; + T1 = Rp[0]; + T2 = Rm[0]; + T3 = Ip[0]; + T4 = Im[0]; + T5 = W[0]; + Rp[0] = T1 + T2; + T6 = T1 - T2; + Rm[0] = T3 - T4; + T9 = T3 + T4; + T8 = W[1]; + T7 = T5 * T6; + } + Ta = T8 * T6; + Ip[0] = FNMS(T8, T9, T7); + Im[0] = FMA(T5, T9, Ta); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 2}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 2, "hc2cb_2", twinstr, &GENUS, {4, 2, 2, 0} }; + +void X(codelet_hc2cb_2) (planner *p) { + X(khc2c_register) (p, hc2cb_2, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 2 -dif -name hc2cb_2 -include hc2cb.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 4 additions, 2 multiplications, 2 fused multiply/add), + * 9 stack variables, 0 constants, and 8 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_2(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 2); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 2, MAKE_VOLATILE_STRIDE(8, rs)) { + E T1, T2, T6, T3, T4, T8, T5, T7; + T1 = Rp[0]; + T2 = Rm[0]; + T6 = T1 - T2; + T3 = Ip[0]; + T4 = Im[0]; + T8 = T3 + T4; + Rp[0] = T1 + T2; + Rm[0] = T3 - T4; + T5 = W[0]; + T7 = W[1]; + Ip[0] = FNMS(T7, T8, T5 * T6); + Im[0] = FMA(T7, T6, T5 * T8); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 2}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 2, "hc2cb_2", twinstr, &GENUS, {4, 2, 2, 0} }; + +void X(codelet_hc2cb_2) (planner *p) { + X(khc2c_register) (p, hc2cb_2, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1049 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:39 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 20 -dif -name hc2cb_20 -include hc2cb.h */ + +/* + * This function contains 246 FP additions, 148 FP multiplications, + * (or, 136 additions, 38 multiplications, 110 fused multiply/add), + * 112 stack variables, 4 constants, and 80 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 38, MAKE_VOLATILE_STRIDE(80, rs)) { + E T1T, T1Q, T1P; + { + E T3z, T4z, TE, T7, T2W, T4e, T2l, T1t, T33, T3H, T3G, T3a, T1i, T2g, T13; + E T4H, T4G, T2d, T1B, T4u, T4B, T4A, T4r, T1A, T2s, T3l, T2t, T3s, T2o, T2q; + E T1w, T1y, TC, T29, T3E, T3C, T4n, T4l, TN, TL; + { + E T4, T2U, T3, T3x, T1p, T5, T1q, T1r; + { + E T1, T2, T1n, T1o; + T1 = Rp[0]; + T2 = Rm[WS(rs, 9)]; + T1n = Ip[0]; + T1o = Im[WS(rs, 9)]; + T4 = Rp[WS(rs, 5)]; + T2U = T1 - T2; + T3 = T1 + T2; + T3x = T1n + T1o; + T1p = T1n - T1o; + T5 = Rm[WS(rs, 4)]; + T1q = Ip[WS(rs, 5)]; + T1r = Im[WS(rs, 4)]; + } + { + E T3o, T4p, TF, Te, T2Z, T4f, T2b, T1a, T3k, T4t, TJ, TA, T39, T4j, T2f; + E T12, T3r, T4q, TG, Tl, T32, T4g, T2c, T1h, Tq, T34, Tp, T3f, TR, Tr; + E TS, TT; + { + E Tx, T37, Tw, T3j, TY, Ty, TZ, T10; + { + E Tb, T2X, Ta, T3m, T16, Tc, T17, T18; + { + E T8, T9, T14, T15; + T8 = Rp[WS(rs, 4)]; + { + E T3y, T6, T2V, T1s; + T3y = T4 - T5; + T6 = T4 + T5; + T2V = T1q + T1r; + T1s = T1q - T1r; + T3z = T3x - T3y; + T4z = T3y + T3x; + TE = T3 - T6; + T7 = T3 + T6; + T2W = T2U + T2V; + T4e = T2U - T2V; + T2l = T1p + T1s; + T1t = T1p - T1s; + T9 = Rm[WS(rs, 5)]; + } + T14 = Ip[WS(rs, 4)]; + T15 = Im[WS(rs, 5)]; + Tb = Rp[WS(rs, 9)]; + T2X = T8 - T9; + Ta = T8 + T9; + T3m = T14 + T15; + T16 = T14 - T15; + Tc = Rm[0]; + T17 = Ip[WS(rs, 9)]; + T18 = Im[0]; + } + { + E Tu, Tv, TW, TX; + Tu = Rm[WS(rs, 7)]; + { + E T3n, Td, T2Y, T19; + T3n = Tb - Tc; + Td = Tb + Tc; + T2Y = T17 + T18; + T19 = T17 - T18; + T3o = T3m - T3n; + T4p = T3n + T3m; + TF = Ta - Td; + Te = Ta + Td; + T2Z = T2X + T2Y; + T4f = T2X - T2Y; + T2b = T16 + T19; + T1a = T16 - T19; + Tv = Rp[WS(rs, 2)]; + } + TW = Ip[WS(rs, 2)]; + TX = Im[WS(rs, 7)]; + Tx = Rm[WS(rs, 2)]; + T37 = Tu - Tv; + Tw = Tu + Tv; + T3j = TW + TX; + TY = TW - TX; + Ty = Rp[WS(rs, 7)]; + TZ = Ip[WS(rs, 7)]; + T10 = Im[WS(rs, 2)]; + } + } + { + E Ti, T30, Th, T3q, T1d, Tj, T1e, T1f; + { + E Tf, Tg, T1b, T1c; + Tf = Rm[WS(rs, 3)]; + { + E T3i, Tz, T38, T11; + T3i = Tx - Ty; + Tz = Tx + Ty; + T38 = TZ + T10; + T11 = TZ - T10; + T3k = T3i + T3j; + T4t = T3i - T3j; + TJ = Tw - Tz; + TA = Tw + Tz; + T39 = T37 - T38; + T4j = T37 + T38; + T2f = TY + T11; + T12 = TY - T11; + Tg = Rp[WS(rs, 6)]; + } + T1b = Ip[WS(rs, 6)]; + T1c = Im[WS(rs, 3)]; + Ti = Rp[WS(rs, 1)]; + T30 = Tf - Tg; + Th = Tf + Tg; + T3q = T1b + T1c; + T1d = T1b - T1c; + Tj = Rm[WS(rs, 8)]; + T1e = Ip[WS(rs, 1)]; + T1f = Im[WS(rs, 8)]; + } + { + E Tn, To, TP, TQ; + Tn = Rp[WS(rs, 8)]; + { + E T3p, Tk, T31, T1g; + T3p = Ti - Tj; + Tk = Ti + Tj; + T31 = T1e + T1f; + T1g = T1e - T1f; + T3r = T3p + T3q; + T4q = T3p - T3q; + TG = Th - Tk; + Tl = Th + Tk; + T32 = T30 + T31; + T4g = T30 - T31; + T2c = T1d + T1g; + T1h = T1d - T1g; + To = Rm[WS(rs, 1)]; + } + TP = Ip[WS(rs, 8)]; + TQ = Im[WS(rs, 1)]; + Tq = Rm[WS(rs, 6)]; + T34 = Tn - To; + Tp = Tn + To; + T3f = TP + TQ; + TR = TP - TQ; + Tr = Rp[WS(rs, 3)]; + TS = Ip[WS(rs, 3)]; + TT = Im[WS(rs, 6)]; + } + } + } + { + E T3h, Tt, T1u, T2n, T1v, T4k, T4h, T2m, TH, TK, T4s, TI; + T33 = T2Z + T32; + T3H = T2Z - T32; + { + E T3g, Ts, T35, TU; + T3g = Tq - Tr; + Ts = Tq + Tr; + T35 = TS + TT; + TU = TS - TT; + T3h = T3f - T3g; + T4s = T3g + T3f; + TI = Tp - Ts; + Tt = Tp + Ts; + { + E T36, T4i, T2e, TV; + T36 = T34 - T35; + T4i = T34 + T35; + T2e = TR + TU; + TV = TR - TU; + T3G = T36 - T39; + T3a = T36 + T39; + T1u = T1a + T1h; + T1i = T1a - T1h; + T2g = T2e - T2f; + T2n = T2e + T2f; + T1v = TV + T12; + T13 = TV - T12; + T4H = T4i - T4j; + T4k = T4i + T4j; + } + } + T4h = T4f + T4g; + T4G = T4f - T4g; + T2d = T2b - T2c; + T2m = T2b + T2c; + TH = TF + TG; + T1B = TF - TG; + T4u = T4s - T4t; + T4B = T4s + T4t; + T4A = T4p + T4q; + T4r = T4p - T4q; + T1A = TI - TJ; + TK = TI + TJ; + { + E Tm, T3B, TB, T3A; + Tm = Te + Tl; + T2s = Te - Tl; + T3l = T3h + T3k; + T3B = T3h - T3k; + TB = Tt + TA; + T2t = Tt - TA; + T3s = T3o + T3r; + T3A = T3o - T3r; + T2o = T2m + T2n; + T2q = T2m - T2n; + T1w = T1u + T1v; + T1y = T1u - T1v; + TC = Tm + TB; + T29 = Tm - TB; + T3E = T3A - T3B; + T3C = T3A + T3B; + T4n = T4h - T4k; + T4l = T4h + T4k; + TN = TH - TK; + TL = TH + TK; + } + } + } + } + { + E T3d, T3b, T4E, T1x, TM, T4m, T58, T5b, T4D, T5a, T5c, T59, T4C; + Rp[0] = T7 + TC; + T3d = T33 - T3a; + T3b = T33 + T3a; + T4E = T4A - T4B; + T4C = T4A + T4B; + Rm[0] = T2l + T2o; + { + E T25, T22, T21, T24, T23, T26, T57; + T1x = FNMS(KP250000000, T1w, T1t); + T25 = T1t + T1w; + T22 = TE + TL; + TM = FNMS(KP250000000, TL, TE); + T21 = W[18]; + T24 = W[19]; + T4m = FNMS(KP250000000, T4l, T4e); + T58 = T4e + T4l; + T5b = T4z + T4C; + T4D = FNMS(KP250000000, T4C, T4z); + T23 = T21 * T22; + T26 = T24 * T22; + T57 = W[8]; + T5a = W[9]; + Rp[WS(rs, 5)] = FNMS(T24, T25, T23); + Rm[WS(rs, 5)] = FMA(T21, T25, T26); + T5c = T57 * T5b; + T59 = T57 * T58; + } + { + E T3U, T3Z, T3W, T40, T3V; + { + E T3c, T48, T4b, T3D, T47, T4a; + T3c = FNMS(KP250000000, T3b, T2W); + T48 = T2W + T3b; + T4b = T3z + T3C; + T3D = FNMS(KP250000000, T3C, T3z); + Im[WS(rs, 2)] = FMA(T5a, T58, T5c); + Ip[WS(rs, 2)] = FNMS(T5a, T5b, T59); + T47 = W[28]; + T4a = W[29]; + { + E T3I, T3Y, T42, T3u, T3M, T3X, T3F; + { + E T3T, T3t, T4c, T49, T3e, T3S; + T3T = FMA(KP618033988, T3l, T3s); + T3t = FNMS(KP618033988, T3s, T3l); + T4c = T47 * T4b; + T49 = T47 * T48; + T3I = FNMS(KP618033988, T3H, T3G); + T3Y = FMA(KP618033988, T3G, T3H); + Im[WS(rs, 7)] = FMA(T4a, T48, T4c); + Ip[WS(rs, 7)] = FNMS(T4a, T4b, T49); + T3e = FNMS(KP559016994, T3d, T3c); + T3S = FMA(KP559016994, T3d, T3c); + T42 = FMA(KP951056516, T3T, T3S); + T3U = FNMS(KP951056516, T3T, T3S); + T3u = FNMS(KP951056516, T3t, T3e); + T3M = FMA(KP951056516, T3t, T3e); + T3X = FMA(KP559016994, T3E, T3D); + T3F = FNMS(KP559016994, T3E, T3D); + } + { + E T3P, T45, T44, T46, T43; + { + E T3w, T3J, T3v, T3K, T2T, T41; + T2T = W[4]; + T3w = W[5]; + T3J = FMA(KP951056516, T3I, T3F); + T3P = FNMS(KP951056516, T3I, T3F); + T45 = FNMS(KP951056516, T3Y, T3X); + T3Z = FMA(KP951056516, T3Y, T3X); + T3v = T2T * T3u; + T3K = T2T * T3J; + T41 = W[36]; + T44 = W[37]; + Ip[WS(rs, 1)] = FNMS(T3w, T3J, T3v); + Im[WS(rs, 1)] = FMA(T3w, T3u, T3K); + T46 = T41 * T45; + T43 = T41 * T42; + } + { + E T3O, T3Q, T3N, T3L, T3R; + T3L = W[12]; + T3O = W[13]; + Im[WS(rs, 9)] = FMA(T44, T42, T46); + Ip[WS(rs, 9)] = FNMS(T44, T45, T43); + T3Q = T3L * T3P; + T3N = T3L * T3M; + T3R = W[20]; + T3W = W[21]; + Im[WS(rs, 3)] = FMA(T3O, T3M, T3Q); + Ip[WS(rs, 3)] = FNMS(T3O, T3P, T3N); + T40 = T3R * T3Z; + T3V = T3R * T3U; + } + } + } + } + { + E T4U, T4Z, T4W, T50, T4V, T2L, T2I, T2H; + { + E T4T, T4v, T4I, T4Y, T4o, T4S; + T4T = FNMS(KP618033988, T4r, T4u); + T4v = FMA(KP618033988, T4u, T4r); + Im[WS(rs, 5)] = FMA(T3W, T3U, T40); + Ip[WS(rs, 5)] = FNMS(T3W, T3Z, T3V); + T4I = FMA(KP618033988, T4H, T4G); + T4Y = FNMS(KP618033988, T4G, T4H); + T4o = FMA(KP559016994, T4n, T4m); + T4S = FNMS(KP559016994, T4n, T4m); + { + E T52, T4M, T55, T4P, T54, T56, T53; + { + E T4d, T4w, T4J, T4x, T4y, T4X, T4F, T51, T4K; + T4d = W[0]; + T4X = FNMS(KP559016994, T4E, T4D); + T4F = FMA(KP559016994, T4E, T4D); + T4U = FNMS(KP951056516, T4T, T4S); + T52 = FMA(KP951056516, T4T, T4S); + T4M = FMA(KP951056516, T4v, T4o); + T4w = FNMS(KP951056516, T4v, T4o); + T4Z = FMA(KP951056516, T4Y, T4X); + T55 = FNMS(KP951056516, T4Y, T4X); + T4P = FNMS(KP951056516, T4I, T4F); + T4J = FMA(KP951056516, T4I, T4F); + T4x = T4d * T4w; + T4y = W[1]; + T51 = W[32]; + T4K = T4d * T4J; + T54 = W[33]; + Ip[0] = FNMS(T4y, T4J, T4x); + T56 = T51 * T55; + T53 = T51 * T52; + Im[0] = FMA(T4y, T4w, T4K); + } + { + E T4O, T4Q, T4N, T4L, T4R; + T4L = W[16]; + Im[WS(rs, 8)] = FMA(T54, T52, T56); + Ip[WS(rs, 8)] = FNMS(T54, T55, T53); + T4O = W[17]; + T4Q = T4L * T4P; + T4N = T4L * T4M; + T4R = W[24]; + T4W = W[25]; + Im[WS(rs, 4)] = FMA(T4O, T4M, T4Q); + Ip[WS(rs, 4)] = FNMS(T4O, T4P, T4N); + T50 = T4R * T4Z; + T4V = T4R * T4U; + } + } + } + { + E T2K, T2u, T2F, T2h, T28, T2J, T2r, T2p; + T2K = FNMS(KP618033988, T2s, T2t); + T2u = FMA(KP618033988, T2t, T2s); + Im[WS(rs, 6)] = FMA(T4W, T4U, T50); + Ip[WS(rs, 6)] = FNMS(T4W, T4Z, T4V); + T2p = FNMS(KP250000000, T2o, T2l); + T2F = FNMS(KP618033988, T2d, T2g); + T2h = FMA(KP618033988, T2g, T2d); + T28 = FNMS(KP250000000, TC, T7); + T2J = FNMS(KP559016994, T2q, T2p); + T2r = FMA(KP559016994, T2q, T2p); + { + E T2B, T2G, T2y, T2R, T2Q, T2P, T2A, T2x; + { + E T2k, T2v, T27, T2O, T2i, T2a, T2E; + T2k = W[7]; + T2a = FMA(KP559016994, T29, T28); + T2E = FNMS(KP559016994, T29, T28); + T2B = FMA(KP951056516, T2u, T2r); + T2v = FNMS(KP951056516, T2u, T2r); + T27 = W[6]; + T2O = FMA(KP951056516, T2F, T2E); + T2G = FNMS(KP951056516, T2F, T2E); + T2i = FMA(KP951056516, T2h, T2a); + T2y = FNMS(KP951056516, T2h, T2a); + { + E T2N, T2j, T2w, T2S; + T2L = FMA(KP951056516, T2K, T2J); + T2R = FNMS(KP951056516, T2K, T2J); + T2Q = W[23]; + T2N = W[22]; + T2j = T27 * T2i; + T2w = T2k * T2i; + T2S = T2Q * T2O; + T2P = T2N * T2O; + Rp[WS(rs, 2)] = FNMS(T2k, T2v, T2j); + Rm[WS(rs, 2)] = FMA(T27, T2v, T2w); + Rm[WS(rs, 6)] = FMA(T2N, T2R, T2S); + } + } + Rp[WS(rs, 6)] = FNMS(T2Q, T2R, T2P); + T2A = W[31]; + T2x = W[30]; + { + E T2D, T2M, T2C, T2z; + T2I = W[15]; + T2C = T2A * T2y; + T2z = T2x * T2y; + T2D = W[14]; + T2M = T2I * T2G; + Rm[WS(rs, 8)] = FMA(T2x, T2B, T2C); + Rp[WS(rs, 8)] = FNMS(T2A, T2B, T2z); + T2H = T2D * T2G; + Rm[WS(rs, 4)] = FMA(T2D, T2L, T2M); + } + } + } + { + E T1S, T1C, T1j, T1N, T1z, T1R; + T1S = FMA(KP618033988, T1A, T1B); + T1C = FNMS(KP618033988, T1B, T1A); + Rp[WS(rs, 4)] = FNMS(T2I, T2L, T2H); + T1j = FNMS(KP618033988, T1i, T13); + T1N = FMA(KP618033988, T13, T1i); + T1z = FNMS(KP559016994, T1y, T1x); + T1R = FMA(KP559016994, T1y, T1x); + { + E T1J, T1O, T1G, T1Z, T1Y, T1X, T1I, T1F; + { + E T1m, T1D, TD, T1W, T1k, T1M, TO; + T1m = W[3]; + T1M = FMA(KP559016994, TN, TM); + TO = FNMS(KP559016994, TN, TM); + T1D = FNMS(KP951056516, T1C, T1z); + T1J = FMA(KP951056516, T1C, T1z); + TD = W[2]; + T1O = FNMS(KP951056516, T1N, T1M); + T1W = FMA(KP951056516, T1N, T1M); + T1G = FNMS(KP951056516, T1j, TO); + T1k = FMA(KP951056516, T1j, TO); + { + E T1V, T1l, T1E, T20; + T1Z = FNMS(KP951056516, T1S, T1R); + T1T = FMA(KP951056516, T1S, T1R); + T1Y = W[27]; + T1V = W[26]; + T1l = TD * T1k; + T1E = T1m * T1k; + T20 = T1Y * T1W; + T1X = T1V * T1W; + Rp[WS(rs, 1)] = FNMS(T1m, T1D, T1l); + Rm[WS(rs, 1)] = FMA(TD, T1D, T1E); + Rm[WS(rs, 7)] = FMA(T1V, T1Z, T20); + } + } + Rp[WS(rs, 7)] = FNMS(T1Y, T1Z, T1X); + T1I = W[35]; + T1F = W[34]; + { + E T1L, T1U, T1K, T1H; + T1Q = W[11]; + T1K = T1I * T1G; + T1H = T1F * T1G; + T1L = W[10]; + T1U = T1Q * T1O; + Rm[WS(rs, 9)] = FMA(T1F, T1J, T1K); + Rp[WS(rs, 9)] = FNMS(T1I, T1J, T1H); + T1P = T1L * T1O; + Rm[WS(rs, 3)] = FMA(T1L, T1T, T1U); + } + } + } + } + } + } + } + Rp[WS(rs, 3)] = FNMS(T1Q, T1T, T1P); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 20}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 20, "hc2cb_20", twinstr, &GENUS, {136, 38, 110, 0} }; + +void X(codelet_hc2cb_20) (planner *p) { + X(khc2c_register) (p, hc2cb_20, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 20 -dif -name hc2cb_20 -include hc2cb.h */ + +/* + * This function contains 246 FP additions, 124 FP multiplications, + * (or, 184 additions, 62 multiplications, 62 fused multiply/add), + * 97 stack variables, 4 constants, and 80 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 38, MAKE_VOLATILE_STRIDE(80, rs)) { + E T7, T3T, T49, TE, T1v, T2T, T3g, T2d, T13, T3n, T3o, T1i, T26, T4e, T4d; + E T23, T1n, T42, T3Z, T1m, T2h, T2I, T2i, T2P, T30, T37, T38, Tm, TB, TC; + E T46, T47, T4a, T2a, T2b, T2e, T1w, T1x, T1y, T3O, T3R, T3U, T3h, T3i, T3j; + E TH, TK, TL; + { + E T3, T2R, T1r, T3e, T6, T3f, T1u, T2S; + { + E T1, T2, T1p, T1q; + T1 = Rp[0]; + T2 = Rm[WS(rs, 9)]; + T3 = T1 + T2; + T2R = T1 - T2; + T1p = Ip[0]; + T1q = Im[WS(rs, 9)]; + T1r = T1p - T1q; + T3e = T1p + T1q; + } + { + E T4, T5, T1s, T1t; + T4 = Rp[WS(rs, 5)]; + T5 = Rm[WS(rs, 4)]; + T6 = T4 + T5; + T3f = T4 - T5; + T1s = Ip[WS(rs, 5)]; + T1t = Im[WS(rs, 4)]; + T1u = T1s - T1t; + T2S = T1s + T1t; + } + T7 = T3 + T6; + T3T = T2R - T2S; + T49 = T3f + T3e; + TE = T3 - T6; + T1v = T1r - T1u; + T2T = T2R + T2S; + T3g = T3e - T3f; + T2d = T1r + T1u; + } + { + E Te, T3M, T3X, TF, TV, T2E, T2W, T21, TA, T3Q, T41, TJ, T1h, T2O, T36; + E T25, Tl, T3N, T3Y, TG, T12, T2H, T2Z, T22, Tt, T3P, T40, TI, T1a, T2L; + E T33, T24; + { + E Ta, T2U, TR, T2C, Td, T2D, TU, T2V; + { + E T8, T9, TP, TQ; + T8 = Rp[WS(rs, 4)]; + T9 = Rm[WS(rs, 5)]; + Ta = T8 + T9; + T2U = T8 - T9; + TP = Ip[WS(rs, 4)]; + TQ = Im[WS(rs, 5)]; + TR = TP - TQ; + T2C = TP + TQ; + } + { + E Tb, Tc, TS, TT; + Tb = Rp[WS(rs, 9)]; + Tc = Rm[0]; + Td = Tb + Tc; + T2D = Tb - Tc; + TS = Ip[WS(rs, 9)]; + TT = Im[0]; + TU = TS - TT; + T2V = TS + TT; + } + Te = Ta + Td; + T3M = T2U - T2V; + T3X = T2D + T2C; + TF = Ta - Td; + TV = TR - TU; + T2E = T2C - T2D; + T2W = T2U + T2V; + T21 = TR + TU; + } + { + E Tw, T34, T1d, T2N, Tz, T2M, T1g, T35; + { + E Tu, Tv, T1b, T1c; + Tu = Rm[WS(rs, 7)]; + Tv = Rp[WS(rs, 2)]; + Tw = Tu + Tv; + T34 = Tu - Tv; + T1b = Ip[WS(rs, 2)]; + T1c = Im[WS(rs, 7)]; + T1d = T1b - T1c; + T2N = T1b + T1c; + } + { + E Tx, Ty, T1e, T1f; + Tx = Rm[WS(rs, 2)]; + Ty = Rp[WS(rs, 7)]; + Tz = Tx + Ty; + T2M = Tx - Ty; + T1e = Ip[WS(rs, 7)]; + T1f = Im[WS(rs, 2)]; + T1g = T1e - T1f; + T35 = T1e + T1f; + } + TA = Tw + Tz; + T3Q = T34 + T35; + T41 = T2M - T2N; + TJ = Tw - Tz; + T1h = T1d - T1g; + T2O = T2M + T2N; + T36 = T34 - T35; + T25 = T1d + T1g; + } + { + E Th, T2X, TY, T2G, Tk, T2F, T11, T2Y; + { + E Tf, Tg, TW, TX; + Tf = Rm[WS(rs, 3)]; + Tg = Rp[WS(rs, 6)]; + Th = Tf + Tg; + T2X = Tf - Tg; + TW = Ip[WS(rs, 6)]; + TX = Im[WS(rs, 3)]; + TY = TW - TX; + T2G = TW + TX; + } + { + E Ti, Tj, TZ, T10; + Ti = Rp[WS(rs, 1)]; + Tj = Rm[WS(rs, 8)]; + Tk = Ti + Tj; + T2F = Ti - Tj; + TZ = Ip[WS(rs, 1)]; + T10 = Im[WS(rs, 8)]; + T11 = TZ - T10; + T2Y = TZ + T10; + } + Tl = Th + Tk; + T3N = T2X - T2Y; + T3Y = T2F - T2G; + TG = Th - Tk; + T12 = TY - T11; + T2H = T2F + T2G; + T2Z = T2X + T2Y; + T22 = TY + T11; + } + { + E Tp, T31, T16, T2J, Ts, T2K, T19, T32; + { + E Tn, To, T14, T15; + Tn = Rp[WS(rs, 8)]; + To = Rm[WS(rs, 1)]; + Tp = Tn + To; + T31 = Tn - To; + T14 = Ip[WS(rs, 8)]; + T15 = Im[WS(rs, 1)]; + T16 = T14 - T15; + T2J = T14 + T15; + } + { + E Tq, Tr, T17, T18; + Tq = Rm[WS(rs, 6)]; + Tr = Rp[WS(rs, 3)]; + Ts = Tq + Tr; + T2K = Tq - Tr; + T17 = Ip[WS(rs, 3)]; + T18 = Im[WS(rs, 6)]; + T19 = T17 - T18; + T32 = T17 + T18; + } + Tt = Tp + Ts; + T3P = T31 + T32; + T40 = T2K + T2J; + TI = Tp - Ts; + T1a = T16 - T19; + T2L = T2J - T2K; + T33 = T31 - T32; + T24 = T16 + T19; + } + T13 = TV - T12; + T3n = T2W - T2Z; + T3o = T33 - T36; + T1i = T1a - T1h; + T26 = T24 - T25; + T4e = T3P - T3Q; + T4d = T3M - T3N; + T23 = T21 - T22; + T1n = TI - TJ; + T42 = T40 - T41; + T3Z = T3X - T3Y; + T1m = TF - TG; + T2h = Te - Tl; + T2I = T2E + T2H; + T2i = Tt - TA; + T2P = T2L + T2O; + T30 = T2W + T2Z; + T37 = T33 + T36; + T38 = T30 + T37; + Tm = Te + Tl; + TB = Tt + TA; + TC = Tm + TB; + T46 = T3X + T3Y; + T47 = T40 + T41; + T4a = T46 + T47; + T2a = T21 + T22; + T2b = T24 + T25; + T2e = T2a + T2b; + T1w = TV + T12; + T1x = T1a + T1h; + T1y = T1w + T1x; + T3O = T3M + T3N; + T3R = T3P + T3Q; + T3U = T3O + T3R; + T3h = T2E - T2H; + T3i = T2L - T2O; + T3j = T3h + T3i; + TH = TF + TG; + TK = TI + TJ; + TL = TH + TK; + } + Rp[0] = T7 + TC; + Rm[0] = T2d + T2e; + { + E T1U, T1W, T1T, T1V; + T1U = TE + TL; + T1W = T1v + T1y; + T1T = W[18]; + T1V = W[19]; + Rp[WS(rs, 5)] = FNMS(T1V, T1W, T1T * T1U); + Rm[WS(rs, 5)] = FMA(T1V, T1U, T1T * T1W); + } + { + E T4y, T4A, T4x, T4z; + T4y = T3T + T3U; + T4A = T49 + T4a; + T4x = W[8]; + T4z = W[9]; + Ip[WS(rs, 2)] = FNMS(T4z, T4A, T4x * T4y); + Im[WS(rs, 2)] = FMA(T4x, T4A, T4z * T4y); + } + { + E T3I, T3K, T3H, T3J; + T3I = T2T + T38; + T3K = T3g + T3j; + T3H = W[28]; + T3J = W[29]; + Ip[WS(rs, 7)] = FNMS(T3J, T3K, T3H * T3I); + Im[WS(rs, 7)] = FMA(T3H, T3K, T3J * T3I); + } + { + E T27, T2j, T2v, T2r, T2g, T2u, T20, T2q; + T27 = FMA(KP951056516, T23, KP587785252 * T26); + T2j = FMA(KP951056516, T2h, KP587785252 * T2i); + T2v = FNMS(KP951056516, T2i, KP587785252 * T2h); + T2r = FNMS(KP951056516, T26, KP587785252 * T23); + { + E T2c, T2f, T1Y, T1Z; + T2c = KP559016994 * (T2a - T2b); + T2f = FNMS(KP250000000, T2e, T2d); + T2g = T2c + T2f; + T2u = T2f - T2c; + T1Y = KP559016994 * (Tm - TB); + T1Z = FNMS(KP250000000, TC, T7); + T20 = T1Y + T1Z; + T2q = T1Z - T1Y; + } + { + E T28, T2k, T1X, T29; + T28 = T20 + T27; + T2k = T2g - T2j; + T1X = W[6]; + T29 = W[7]; + Rp[WS(rs, 2)] = FNMS(T29, T2k, T1X * T28); + Rm[WS(rs, 2)] = FMA(T29, T28, T1X * T2k); + } + { + E T2y, T2A, T2x, T2z; + T2y = T2q - T2r; + T2A = T2v + T2u; + T2x = W[22]; + T2z = W[23]; + Rp[WS(rs, 6)] = FNMS(T2z, T2A, T2x * T2y); + Rm[WS(rs, 6)] = FMA(T2z, T2y, T2x * T2A); + } + { + E T2m, T2o, T2l, T2n; + T2m = T20 - T27; + T2o = T2j + T2g; + T2l = W[30]; + T2n = W[31]; + Rp[WS(rs, 8)] = FNMS(T2n, T2o, T2l * T2m); + Rm[WS(rs, 8)] = FMA(T2n, T2m, T2l * T2o); + } + { + E T2s, T2w, T2p, T2t; + T2s = T2q + T2r; + T2w = T2u - T2v; + T2p = W[14]; + T2t = W[15]; + Rp[WS(rs, 4)] = FNMS(T2t, T2w, T2p * T2s); + Rm[WS(rs, 4)] = FMA(T2t, T2s, T2p * T2w); + } + } + { + E T43, T4f, T4r, T4m, T4c, T4q, T3W, T4n; + T43 = FMA(KP951056516, T3Z, KP587785252 * T42); + T4f = FMA(KP951056516, T4d, KP587785252 * T4e); + T4r = FNMS(KP951056516, T4e, KP587785252 * T4d); + T4m = FNMS(KP951056516, T42, KP587785252 * T3Z); + { + E T48, T4b, T3S, T3V; + T48 = KP559016994 * (T46 - T47); + T4b = FNMS(KP250000000, T4a, T49); + T4c = T48 + T4b; + T4q = T4b - T48; + T3S = KP559016994 * (T3O - T3R); + T3V = FNMS(KP250000000, T3U, T3T); + T3W = T3S + T3V; + T4n = T3V - T3S; + } + { + E T44, T4g, T3L, T45; + T44 = T3W - T43; + T4g = T4c + T4f; + T3L = W[0]; + T45 = W[1]; + Ip[0] = FNMS(T45, T4g, T3L * T44); + Im[0] = FMA(T3L, T4g, T45 * T44); + } + { + E T4u, T4w, T4t, T4v; + T4u = T4n - T4m; + T4w = T4q + T4r; + T4t = W[32]; + T4v = W[33]; + Ip[WS(rs, 8)] = FNMS(T4v, T4w, T4t * T4u); + Im[WS(rs, 8)] = FMA(T4t, T4w, T4v * T4u); + } + { + E T4i, T4k, T4h, T4j; + T4i = T43 + T3W; + T4k = T4c - T4f; + T4h = W[16]; + T4j = W[17]; + Ip[WS(rs, 4)] = FNMS(T4j, T4k, T4h * T4i); + Im[WS(rs, 4)] = FMA(T4h, T4k, T4j * T4i); + } + { + E T4o, T4s, T4l, T4p; + T4o = T4m + T4n; + T4s = T4q - T4r; + T4l = W[24]; + T4p = W[25]; + Ip[WS(rs, 6)] = FNMS(T4p, T4s, T4l * T4o); + Im[WS(rs, 6)] = FMA(T4l, T4s, T4p * T4o); + } + } + { + E T1j, T1o, T1M, T1J, T1B, T1N, TO, T1I; + T1j = FNMS(KP951056516, T1i, KP587785252 * T13); + T1o = FNMS(KP951056516, T1n, KP587785252 * T1m); + T1M = FMA(KP951056516, T1m, KP587785252 * T1n); + T1J = FMA(KP951056516, T13, KP587785252 * T1i); + { + E T1z, T1A, TM, TN; + T1z = FNMS(KP250000000, T1y, T1v); + T1A = KP559016994 * (T1w - T1x); + T1B = T1z - T1A; + T1N = T1A + T1z; + TM = FNMS(KP250000000, TL, TE); + TN = KP559016994 * (TH - TK); + TO = TM - TN; + T1I = TN + TM; + } + { + E T1k, T1C, TD, T1l; + T1k = TO - T1j; + T1C = T1o + T1B; + TD = W[2]; + T1l = W[3]; + Rp[WS(rs, 1)] = FNMS(T1l, T1C, TD * T1k); + Rm[WS(rs, 1)] = FMA(T1l, T1k, TD * T1C); + } + { + E T1Q, T1S, T1P, T1R; + T1Q = T1I + T1J; + T1S = T1N - T1M; + T1P = W[26]; + T1R = W[27]; + Rp[WS(rs, 7)] = FNMS(T1R, T1S, T1P * T1Q); + Rm[WS(rs, 7)] = FMA(T1R, T1Q, T1P * T1S); + } + { + E T1E, T1G, T1D, T1F; + T1E = TO + T1j; + T1G = T1B - T1o; + T1D = W[34]; + T1F = W[35]; + Rp[WS(rs, 9)] = FNMS(T1F, T1G, T1D * T1E); + Rm[WS(rs, 9)] = FMA(T1F, T1E, T1D * T1G); + } + { + E T1K, T1O, T1H, T1L; + T1K = T1I - T1J; + T1O = T1M + T1N; + T1H = W[10]; + T1L = W[11]; + Rp[WS(rs, 3)] = FNMS(T1L, T1O, T1H * T1K); + Rm[WS(rs, 3)] = FMA(T1L, T1K, T1H * T1O); + } + } + { + E T2Q, T3p, T3B, T3x, T3m, T3A, T3b, T3w; + T2Q = FNMS(KP951056516, T2P, KP587785252 * T2I); + T3p = FNMS(KP951056516, T3o, KP587785252 * T3n); + T3B = FMA(KP951056516, T3n, KP587785252 * T3o); + T3x = FMA(KP951056516, T2I, KP587785252 * T2P); + { + E T3k, T3l, T39, T3a; + T3k = FNMS(KP250000000, T3j, T3g); + T3l = KP559016994 * (T3h - T3i); + T3m = T3k - T3l; + T3A = T3l + T3k; + T39 = FNMS(KP250000000, T38, T2T); + T3a = KP559016994 * (T30 - T37); + T3b = T39 - T3a; + T3w = T3a + T39; + } + { + E T3c, T3q, T2B, T3d; + T3c = T2Q + T3b; + T3q = T3m - T3p; + T2B = W[4]; + T3d = W[5]; + Ip[WS(rs, 1)] = FNMS(T3d, T3q, T2B * T3c); + Im[WS(rs, 1)] = FMA(T2B, T3q, T3d * T3c); + } + { + E T3E, T3G, T3D, T3F; + T3E = T3x + T3w; + T3G = T3A - T3B; + T3D = W[36]; + T3F = W[37]; + Ip[WS(rs, 9)] = FNMS(T3F, T3G, T3D * T3E); + Im[WS(rs, 9)] = FMA(T3D, T3G, T3F * T3E); + } + { + E T3s, T3u, T3r, T3t; + T3s = T3b - T2Q; + T3u = T3m + T3p; + T3r = W[12]; + T3t = W[13]; + Ip[WS(rs, 3)] = FNMS(T3t, T3u, T3r * T3s); + Im[WS(rs, 3)] = FMA(T3r, T3u, T3t * T3s); + } + { + E T3y, T3C, T3v, T3z; + T3y = T3w - T3x; + T3C = T3A + T3B; + T3v = W[20]; + T3z = W[21]; + Ip[WS(rs, 5)] = FNMS(T3z, T3C, T3v * T3y); + Im[WS(rs, 5)] = FMA(T3v, T3C, T3z * T3y); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 20}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 20, "hc2cb_20", twinstr, &GENUS, {184, 62, 62, 0} }; + +void X(codelet_hc2cb_20) (planner *p) { + X(khc2c_register) (p, hc2cb_20, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1770 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:38 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hc2cb_32 -include hc2cb.h */ + +/* + * This function contains 434 FP additions, 260 FP multiplications, + * (or, 236 additions, 62 multiplications, 198 fused multiply/add), + * 137 stack variables, 7 constants, and 128 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) { + E T5o, T5r, T5q, T5n, T5s, T5p; + { + E T5K, Tf, T8k, T7k, T8x, T7N, T3i, T1i, T3v, T2L, T5f, T4v, T6T, T6m, T52; + E T42, TZ, T6X, T3p, T1X, T8B, T8p, T3o, T26, T58, T4n, T7T, T7z, T59, T4k; + E T6p, T6a, TK, T6W, T8s, T8A, T2o, T3m, T3l, T2x, T55, T4g, T7S, T7G, T56; + E T4d, T6o, T61, T5Q, T5N, T6f, Tu, T8y, T7r, T8l, T7Q, T3w, T1F, T45, T48; + E T3j, T2O, T53, T4y, T62, T69; + { + E T6l, T6i, T40, T41; + { + E T12, T3, T6g, T2G, T2D, T6, T6h, T15, Td, T6k, T1g, T2J, Ta, T17, T1a; + E T6j; + { + E T4, T5, T13, T14; + { + E T1, T2, T2E, T2F; + T1 = Rp[0]; + T2 = Rm[WS(rs, 15)]; + T2E = Ip[0]; + T2F = Im[WS(rs, 15)]; + T4 = Rp[WS(rs, 8)]; + T12 = T1 - T2; + T3 = T1 + T2; + T6g = T2E - T2F; + T2G = T2E + T2F; + T5 = Rm[WS(rs, 7)]; + } + T13 = Ip[WS(rs, 8)]; + T14 = Im[WS(rs, 7)]; + { + E Tb, Tc, T1d, T1e; + Tb = Rm[WS(rs, 3)]; + T2D = T4 - T5; + T6 = T4 + T5; + T6h = T13 - T14; + T15 = T13 + T14; + Tc = Rp[WS(rs, 12)]; + T1d = Ip[WS(rs, 12)]; + T1e = Im[WS(rs, 3)]; + { + E T8, T1c, T1f, T9, T18, T19; + T8 = Rp[WS(rs, 4)]; + Td = Tb + Tc; + T1c = Tb - Tc; + T6k = T1d - T1e; + T1f = T1d + T1e; + T9 = Rm[WS(rs, 11)]; + T18 = Ip[WS(rs, 4)]; + T19 = Im[WS(rs, 11)]; + T1g = T1c - T1f; + T2J = T1c + T1f; + Ta = T8 + T9; + T17 = T8 - T9; + T1a = T18 + T19; + T6j = T18 - T19; + } + } + } + { + E T2I, T7M, T7L, T16, T1h, T4u, T4t, T2H, T2K; + { + E T7i, T7, T1b, Te, T7j; + T7i = T3 - T6; + T7 = T3 + T6; + T2I = T17 + T1a; + T1b = T17 - T1a; + Te = Ta + Td; + T7M = Ta - Td; + T7j = T6k - T6j; + T6l = T6j + T6k; + T6i = T6g + T6h; + T7L = T6g - T6h; + T5K = T7 - Te; + Tf = T7 + Te; + T8k = T7i + T7j; + T7k = T7i - T7j; + T40 = T12 + T15; + T16 = T12 - T15; + T1h = T1b + T1g; + T4u = T1b - T1g; + } + T4t = T2G - T2D; + T2H = T2D + T2G; + T8x = T7M + T7L; + T7N = T7L - T7M; + T3i = FMA(KP707106781, T1h, T16); + T1i = FNMS(KP707106781, T1h, T16); + T2K = T2I - T2J; + T41 = T2I + T2J; + T3v = FMA(KP707106781, T2K, T2H); + T2L = FNMS(KP707106781, T2K, T2H); + T5f = FNMS(KP707106781, T4u, T4t); + T4v = FMA(KP707106781, T4u, T4t); + } + } + { + E T1Y, T1H, TR, T7w, T1K, T21, T65, T7t, TU, T66, T23, T1Q, T1R, TX, T67; + E T1U, TY, T7u; + { + E TL, TM, TO, TP, T63, T64; + TL = Rm[0]; + T6T = T6i + T6l; + T6m = T6i - T6l; + T52 = FMA(KP707106781, T41, T40); + T42 = FNMS(KP707106781, T41, T40); + TM = Rp[WS(rs, 15)]; + TO = Rp[WS(rs, 7)]; + TP = Rm[WS(rs, 8)]; + { + E T1I, TN, TQ, T1J, T1Z, T20; + T1I = Ip[WS(rs, 15)]; + T1Y = TL - TM; + TN = TL + TM; + T1H = TO - TP; + TQ = TO + TP; + T1J = Im[0]; + T1Z = Ip[WS(rs, 7)]; + T20 = Im[WS(rs, 8)]; + TR = TN + TQ; + T7w = TN - TQ; + T1K = T1I + T1J; + T63 = T1I - T1J; + T64 = T1Z - T20; + T21 = T1Z + T20; + } + { + E TV, T1M, T1P, TW, T1S, T1T; + { + E TS, TT, T1N, T1O; + TS = Rp[WS(rs, 3)]; + T65 = T63 + T64; + T7t = T63 - T64; + TT = Rm[WS(rs, 12)]; + T1N = Ip[WS(rs, 3)]; + T1O = Im[WS(rs, 12)]; + TV = Rm[WS(rs, 4)]; + T1M = TS - TT; + TU = TS + TT; + T66 = T1N - T1O; + T1P = T1N + T1O; + TW = Rp[WS(rs, 11)]; + T1S = Ip[WS(rs, 11)]; + T1T = Im[WS(rs, 4)]; + } + T23 = T1M - T1P; + T1Q = T1M + T1P; + T1R = TV - TW; + TX = TV + TW; + T67 = T1S - T1T; + T1U = T1S + T1T; + } + } + TY = TU + TX; + T7u = TU - TX; + { + E T7x, T68, T1V, T24; + T7x = T67 - T66; + T68 = T66 + T67; + T1V = T1R + T1U; + T24 = T1R - T1U; + { + E T4l, T1L, T1W, T4j, T7v, T8n, T8o, T7y; + T62 = TR - TY; + TZ = TR + TY; + T6X = T65 + T68; + T69 = T65 - T68; + T4l = T1H + T1K; + T1L = T1H - T1K; + T1W = T1Q - T1V; + T4j = T1Q + T1V; + T7v = T7t - T7u; + T8n = T7u + T7t; + T8o = T7w + T7x; + T7y = T7w - T7x; + { + E T4i, T22, T25, T4m; + T4i = T1Y + T21; + T22 = T1Y - T21; + T3p = FMA(KP707106781, T1W, T1L); + T1X = FNMS(KP707106781, T1W, T1L); + T8B = FMA(KP414213562, T8n, T8o); + T8p = FNMS(KP414213562, T8o, T8n); + T25 = T23 + T24; + T4m = T23 - T24; + T3o = FMA(KP707106781, T25, T22); + T26 = FNMS(KP707106781, T25, T22); + T58 = FMA(KP707106781, T4m, T4l); + T4n = FNMS(KP707106781, T4m, T4l); + T7T = FNMS(KP414213562, T7v, T7y); + T7z = FMA(KP414213562, T7y, T7v); + T59 = FMA(KP707106781, T4j, T4i); + T4k = FNMS(KP707106781, T4j, T4i); + } + } + } + } + } + { + E T5T, T60, T4c, T4b; + { + E T2p, T28, T2b, T7D, TC, T2s, T7A, T5W, TF, T2j, T5X, T2i, TI, T2k, T2u; + E T2h; + { + E Tz, Ty, TA, Tw, Tx; + Tw = Rp[WS(rs, 1)]; + Tx = Rm[WS(rs, 14)]; + Tz = Rp[WS(rs, 9)]; + T6p = T69 - T62; + T6a = T62 + T69; + Ty = Tw + Tx; + T2p = Tw - Tx; + TA = Rm[WS(rs, 6)]; + { + E T5U, T5V, T2d, T2g; + { + E T2q, T2r, T29, T2a, TB; + T29 = Ip[WS(rs, 1)]; + T2a = Im[WS(rs, 14)]; + TB = Tz + TA; + T28 = Tz - TA; + T2q = Ip[WS(rs, 9)]; + T5U = T29 - T2a; + T2b = T29 + T2a; + T2r = Im[WS(rs, 6)]; + T7D = Ty - TB; + TC = Ty + TB; + T2s = T2q + T2r; + T5V = T2q - T2r; + } + { + E T2e, T2f, TD, TE, TG, TH; + TD = Rp[WS(rs, 5)]; + TE = Rm[WS(rs, 10)]; + T7A = T5U - T5V; + T5W = T5U + T5V; + T2e = Ip[WS(rs, 5)]; + T2d = TD - TE; + TF = TD + TE; + T2f = Im[WS(rs, 10)]; + TG = Rm[WS(rs, 2)]; + TH = Rp[WS(rs, 13)]; + T2j = Ip[WS(rs, 13)]; + T5X = T2e - T2f; + T2g = T2e + T2f; + T2i = TG - TH; + TI = TG + TH; + T2k = Im[WS(rs, 2)]; + } + T2u = T2d - T2g; + T2h = T2d + T2g; + } + } + { + E TJ, T7B, T2l, T5Y; + TJ = TF + TI; + T7B = TF - TI; + T2l = T2j + T2k; + T5Y = T2j - T2k; + { + E T4e, T2c, T2v, T8q, T7C, T7F, T8r, T2n, T7E, T2m, T5Z, T4f, T2t, T2w; + T4e = T2b - T28; + T2c = T28 + T2b; + TK = TC + TJ; + T5T = TC - TJ; + T7E = T5Y - T5X; + T5Z = T5X + T5Y; + T2m = T2i + T2l; + T2v = T2i - T2l; + T60 = T5W - T5Z; + T6W = T5W + T5Z; + T8q = T7B + T7A; + T7C = T7A - T7B; + T7F = T7D - T7E; + T8r = T7D + T7E; + T2n = T2h - T2m; + T4c = T2h + T2m; + T4b = T2p + T2s; + T2t = T2p - T2s; + T2w = T2u + T2v; + T4f = T2v - T2u; + T8s = FMA(KP414213562, T8r, T8q); + T8A = FNMS(KP414213562, T8q, T8r); + T2o = FNMS(KP707106781, T2n, T2c); + T3m = FMA(KP707106781, T2n, T2c); + T3l = FMA(KP707106781, T2w, T2t); + T2x = FNMS(KP707106781, T2w, T2t); + T55 = FMA(KP707106781, T4f, T4e); + T4g = FNMS(KP707106781, T4f, T4e); + T7S = FMA(KP414213562, T7C, T7F); + T7G = FNMS(KP414213562, T7F, T7C); + } + } + } + { + E T43, T1y, T7o, Tm, T7p, T44, T1D, Tq, T1o, Tp, T5L, T1m, Tr, T1p, T1q; + { + E Tj, T1z, Ti, T5O, T1x, Tk, T1A, T1B; + { + E Tg, Th, T1v, T1w; + Tg = Rp[WS(rs, 2)]; + T56 = FMA(KP707106781, T4c, T4b); + T4d = FNMS(KP707106781, T4c, T4b); + T6o = T5T + T60; + T61 = T5T - T60; + Th = Rm[WS(rs, 13)]; + T1v = Ip[WS(rs, 2)]; + T1w = Im[WS(rs, 13)]; + Tj = Rp[WS(rs, 10)]; + T1z = Tg - Th; + Ti = Tg + Th; + T5O = T1v - T1w; + T1x = T1v + T1w; + Tk = Rm[WS(rs, 5)]; + T1A = Ip[WS(rs, 10)]; + T1B = Im[WS(rs, 5)]; + } + { + E Tn, To, T1k, T1l; + Tn = Rm[WS(rs, 1)]; + { + E T1u, Tl, T5P, T1C; + T1u = Tj - Tk; + Tl = Tj + Tk; + T5P = T1A - T1B; + T1C = T1A + T1B; + T43 = T1x - T1u; + T1y = T1u + T1x; + T7o = Ti - Tl; + Tm = Ti + Tl; + T5Q = T5O + T5P; + T7p = T5O - T5P; + T44 = T1z + T1C; + T1D = T1z - T1C; + To = Rp[WS(rs, 14)]; + } + T1k = Ip[WS(rs, 14)]; + T1l = Im[WS(rs, 1)]; + Tq = Rp[WS(rs, 6)]; + T1o = Tn - To; + Tp = Tn + To; + T5L = T1k - T1l; + T1m = T1k + T1l; + Tr = Rm[WS(rs, 9)]; + T1p = Ip[WS(rs, 6)]; + T1q = Im[WS(rs, 9)]; + } + } + { + E T46, T47, T7P, T7O, T2N, T1t, T1E, T2M, T4w, T4x; + { + E T1n, Tt, T1s, T7n, T7q, T7m, T7l; + { + E T1j, Ts, T5M, T1r; + T1j = Tq - Tr; + Ts = Tq + Tr; + T5M = T1p - T1q; + T1r = T1p + T1q; + T46 = T1j + T1m; + T1n = T1j - T1m; + T7m = Tp - Ts; + Tt = Tp + Ts; + T5N = T5L + T5M; + T7l = T5L - T5M; + T47 = T1o + T1r; + T1s = T1o - T1r; + } + T7P = T7m + T7l; + T7n = T7l - T7m; + T7q = T7o + T7p; + T7O = T7o - T7p; + T6f = Tm - Tt; + Tu = Tm + Tt; + T8y = T7q + T7n; + T7r = T7n - T7q; + T2N = FMA(KP414213562, T1n, T1s); + T1t = FNMS(KP414213562, T1s, T1n); + T1E = FMA(KP414213562, T1D, T1y); + T2M = FNMS(KP414213562, T1y, T1D); + } + T8l = T7O + T7P; + T7Q = T7O - T7P; + T3w = T1E + T1t; + T1F = T1t - T1E; + T45 = FNMS(KP414213562, T44, T43); + T4w = FMA(KP414213562, T43, T44); + T4x = FMA(KP414213562, T46, T47); + T48 = FNMS(KP414213562, T47, T46); + T3j = T2M + T2N; + T2O = T2M - T2N; + T53 = T4w + T4x; + T4y = T4w - T4x; + } + } + } + { + E T72, T5g, T49, T78, T77, T73, T7s, T7U, T7R, T7H, T3f, T3e, T3d; + { + E T5R, T8m, T8C, T8z, T8t, T8e, T86, T88, T8h, T8f, T8i, T8c, T8g; + { + E T6P, T6Q, T6Z, T6S, T6R; + { + E Tv, T10, T6V, T6Y, T6U; + T72 = Tf - Tu; + Tv = Tf + Tu; + T6U = T5Q + T5N; + T5R = T5N - T5Q; + T5g = T48 - T45; + T49 = T45 + T48; + T10 = TK + TZ; + T78 = TK - TZ; + T77 = T6T - T6U; + T6V = T6T + T6U; + T6Y = T6W + T6X; + T73 = T6X - T6W; + T6P = W[30]; + Rp[0] = Tv + T10; + T6Q = Tv - T10; + Rm[0] = T6V + T6Y; + T6Z = T6V - T6Y; + T6S = W[31]; + T6R = T6P * T6Q; + } + { + E T8O, T8W, T8Q, T8Z, T8X, T90, T8U, T8Y; + { + E T8R, T8S, T8M, T8N, T70; + T8M = FMA(KP707106781, T8l, T8k); + T8m = FNMS(KP707106781, T8l, T8k); + T8C = T8A - T8B; + T8N = T8A + T8B; + T70 = T6S * T6Q; + Rp[WS(rs, 8)] = FNMS(T6S, T6Z, T6R); + T8R = FMA(KP707106781, T8y, T8x); + T8z = FNMS(KP707106781, T8y, T8x); + T8O = FNMS(KP923879532, T8N, T8M); + T8W = FMA(KP923879532, T8N, T8M); + Rm[WS(rs, 8)] = FMA(T6P, T6Z, T70); + T8S = T8s + T8p; + T8t = T8p - T8s; + { + E T8L, T8T, T8P, T8V; + T8L = W[34]; + T8Q = W[35]; + T8V = W[2]; + T8Z = FMA(KP923879532, T8S, T8R); + T8T = FNMS(KP923879532, T8S, T8R); + T8P = T8L * T8O; + T8X = T8V * T8W; + T90 = T8V * T8Z; + T8U = T8L * T8T; + Rp[WS(rs, 9)] = FNMS(T8Q, T8T, T8P); + T8Y = W[3]; + } + } + { + E T89, T8a, T84, T85; + T84 = FNMS(KP707106781, T7r, T7k); + T7s = FMA(KP707106781, T7r, T7k); + Rm[WS(rs, 9)] = FMA(T8Q, T8O, T8U); + T85 = T7S + T7T; + T7U = T7S - T7T; + Rm[WS(rs, 1)] = FMA(T8Y, T8W, T90); + Rp[WS(rs, 1)] = FNMS(T8Y, T8Z, T8X); + T7R = FMA(KP707106781, T7Q, T7N); + T89 = FNMS(KP707106781, T7Q, T7N); + T8e = FMA(KP923879532, T85, T84); + T86 = FNMS(KP923879532, T85, T84); + T8a = T7G + T7z; + T7H = T7z - T7G; + { + E T83, T8b, T87, T8d; + T83 = W[26]; + T88 = W[27]; + T8d = W[58]; + T8h = FMA(KP923879532, T8a, T89); + T8b = FNMS(KP923879532, T8a, T89); + T87 = T83 * T86; + T8f = T8d * T8e; + T8i = T8d * T8h; + T8c = T83 * T8b; + Rp[WS(rs, 7)] = FNMS(T88, T8b, T87); + T8g = W[59]; + } + } + } + } + { + E T5S, T6q, T6n, T6K, T6C, T6b, T6E, T6N, T6L, T6O, T6I, T6M; + { + E T6F, T6G, T6A, T6B; + T6A = T5K - T5R; + T5S = T5K + T5R; + Rm[WS(rs, 7)] = FMA(T88, T86, T8c); + T6B = T6p - T6o; + T6q = T6o + T6p; + Rm[WS(rs, 15)] = FMA(T8g, T8e, T8i); + Rp[WS(rs, 15)] = FNMS(T8g, T8h, T8f); + T6n = T6f + T6m; + T6F = T6m - T6f; + T6K = FMA(KP707106781, T6B, T6A); + T6C = FNMS(KP707106781, T6B, T6A); + T6G = T61 - T6a; + T6b = T61 + T6a; + { + E T6z, T6H, T6D, T6J; + T6z = W[54]; + T6E = W[55]; + T6J = W[22]; + T6N = FMA(KP707106781, T6G, T6F); + T6H = FNMS(KP707106781, T6G, T6F); + T6D = T6z * T6C; + T6L = T6J * T6K; + T6O = T6J * T6N; + T6I = T6z * T6H; + Rp[WS(rs, 14)] = FNMS(T6E, T6H, T6D); + T6M = W[23]; + } + } + { + E T8G, T8F, T8J, T8H, T8I, T8u; + Rm[WS(rs, 14)] = FMA(T6E, T6C, T6I); + Rm[WS(rs, 6)] = FMA(T6M, T6K, T6O); + Rp[WS(rs, 6)] = FNMS(T6M, T6N, T6L); + T8G = FMA(KP923879532, T8t, T8m); + T8u = FNMS(KP923879532, T8t, T8m); + { + E T8j, T8w, T8D, T8v, T8E; + T8j = W[50]; + T8w = W[51]; + T8F = W[18]; + T8J = FMA(KP923879532, T8C, T8z); + T8D = FNMS(KP923879532, T8C, T8z); + T8v = T8j * T8u; + T8E = T8w * T8u; + T8H = T8F * T8G; + T8I = W[19]; + Rp[WS(rs, 13)] = FNMS(T8w, T8D, T8v); + Rm[WS(rs, 13)] = FMA(T8j, T8D, T8E); + } + { + E T6c, T6u, T6x, T6r, T8K, T5J, T6e; + Rp[WS(rs, 5)] = FNMS(T8I, T8J, T8H); + T8K = T8I * T8G; + Rm[WS(rs, 5)] = FMA(T8F, T8J, T8K); + T6c = FNMS(KP707106781, T6b, T5S); + T6u = FMA(KP707106781, T6b, T5S); + T6x = FMA(KP707106781, T6q, T6n); + T6r = FNMS(KP707106781, T6q, T6n); + T5J = W[38]; + T6e = W[39]; + { + E T6t, T6w, T6d, T6s, T6v, T6y; + T6t = W[6]; + T6w = W[7]; + T6d = T5J * T6c; + T6s = T6e * T6c; + T6v = T6t * T6u; + T6y = T6w * T6u; + Rp[WS(rs, 10)] = FNMS(T6e, T6r, T6d); + Rm[WS(rs, 10)] = FMA(T5J, T6r, T6s); + Rp[WS(rs, 2)] = FNMS(T6w, T6x, T6v); + Rm[WS(rs, 2)] = FMA(T6t, T6x, T6y); + } + } + } + } + } + { + E T7c, T7f, T7e, T7g, T7d; + { + E T71, T74, T79, T76, T75, T7b, T7a; + T71 = W[46]; + T7c = T72 + T73; + T74 = T72 - T73; + T7f = T78 + T77; + T79 = T77 - T78; + T76 = W[47]; + T75 = T71 * T74; + T7b = W[14]; + T7a = T71 * T79; + T7e = W[15]; + Rp[WS(rs, 12)] = FNMS(T76, T79, T75); + T7g = T7b * T7f; + T7d = T7b * T7c; + Rm[WS(rs, 12)] = FMA(T76, T74, T7a); + } + { + E T81, T7X, T80, T7Z, T82; + Rm[WS(rs, 4)] = FMA(T7e, T7c, T7g); + Rp[WS(rs, 4)] = FNMS(T7e, T7f, T7d); + { + E T7h, T7Y, T7I, T7V, T7K, T7J, T7W; + T7h = W[42]; + T7Y = FMA(KP923879532, T7H, T7s); + T7I = FNMS(KP923879532, T7H, T7s); + T81 = FMA(KP923879532, T7U, T7R); + T7V = FNMS(KP923879532, T7U, T7R); + T7K = W[43]; + T7J = T7h * T7I; + T7X = W[10]; + T80 = W[11]; + T7W = T7K * T7I; + Rp[WS(rs, 11)] = FNMS(T7K, T7V, T7J); + T7Z = T7X * T7Y; + T82 = T80 * T7Y; + Rm[WS(rs, 11)] = FMA(T7h, T7V, T7W); + } + { + E T2P, T37, T1G, T32, T2R, T2Q, T38, T2z, T27, T2y; + T2P = FMA(KP923879532, T2O, T2L); + T37 = FNMS(KP923879532, T2O, T2L); + Rp[WS(rs, 3)] = FNMS(T80, T81, T7Z); + Rm[WS(rs, 3)] = FMA(T7X, T81, T82); + T1G = FMA(KP923879532, T1F, T1i); + T32 = FNMS(KP923879532, T1F, T1i); + T2R = FNMS(KP668178637, T1X, T26); + T27 = FMA(KP668178637, T26, T1X); + T2y = FNMS(KP668178637, T2x, T2o); + T2Q = FMA(KP668178637, T2o, T2x); + T38 = T2y + T27; + T2z = T27 - T2y; + { + E T2C, T2A, T3c, T34, T2U, T39, T36, T31; + { + E T11, T2W, T2S, T33; + T11 = W[40]; + T2C = W[41]; + T2A = FNMS(KP831469612, T2z, T1G); + T2W = FMA(KP831469612, T2z, T1G); + T2S = T2Q - T2R; + T33 = T2Q + T2R; + { + E T2V, T2B, T2T, T2Z, T2X, T2Y, T30; + T2V = W[8]; + T2B = T11 * T2A; + T3c = FMA(KP831469612, T33, T32); + T34 = FNMS(KP831469612, T33, T32); + T2T = FNMS(KP831469612, T2S, T2P); + T2Z = FMA(KP831469612, T2S, T2P); + T2X = T2V * T2W; + T2Y = W[9]; + T30 = T2V * T2Z; + Ip[WS(rs, 10)] = FNMS(T2C, T2T, T2B); + T2U = T11 * T2T; + Ip[WS(rs, 2)] = FNMS(T2Y, T2Z, T2X); + Im[WS(rs, 2)] = FMA(T2Y, T2W, T30); + } + } + T39 = FNMS(KP831469612, T38, T37); + T3f = FMA(KP831469612, T38, T37); + Im[WS(rs, 10)] = FMA(T2C, T2A, T2U); + T36 = W[25]; + T31 = W[24]; + { + E T3b, T3g, T3a, T35; + T3e = W[57]; + T3a = T36 * T34; + T35 = T31 * T34; + T3b = W[56]; + T3g = T3e * T3c; + Im[WS(rs, 6)] = FMA(T31, T39, T3a); + Ip[WS(rs, 6)] = FNMS(T36, T39, T35); + T3d = T3b * T3c; + Im[WS(rs, 14)] = FMA(T3b, T3f, T3g); + } + } + } + } + } + { + E T4G, T4J, T4I, T4F, T4K; + { + E T4z, T4R, T4a, T4M, T4h, T4o, T4C, T4N, T4A, T4B; + T4z = FMA(KP923879532, T4y, T4v); + T4R = FNMS(KP923879532, T4y, T4v); + T4a = FNMS(KP923879532, T49, T42); + T4M = FMA(KP923879532, T49, T42); + Ip[WS(rs, 14)] = FNMS(T3e, T3f, T3d); + T4h = FNMS(KP668178637, T4g, T4d); + T4A = FMA(KP668178637, T4d, T4g); + T4B = FMA(KP668178637, T4k, T4n); + T4o = FNMS(KP668178637, T4n, T4k); + T4C = T4A - T4B; + T4N = T4A + T4B; + { + E T4W, T4Z, T4q, T4X, T50, T4Y; + { + E T4L, T4Q, T4O, T4p, T4S, T4P, T4U, T4V, T4T; + T4L = W[20]; + T4Q = W[21]; + T4W = FMA(KP831469612, T4N, T4M); + T4O = FNMS(KP831469612, T4N, T4M); + T4p = T4h + T4o; + T4S = T4h - T4o; + T4P = T4L * T4O; + T4V = W[52]; + T4Z = FNMS(KP831469612, T4S, T4R); + T4T = FMA(KP831469612, T4S, T4R); + T4q = FNMS(KP831469612, T4p, T4a); + T4G = FMA(KP831469612, T4p, T4a); + Ip[WS(rs, 5)] = FNMS(T4Q, T4T, T4P); + T4U = T4L * T4T; + T4X = T4V * T4W; + T50 = T4V * T4Z; + T4Y = W[53]; + Im[WS(rs, 5)] = FMA(T4Q, T4O, T4U); + } + { + E T4D, T4s, T3Z, T4E, T4r; + T4J = FMA(KP831469612, T4C, T4z); + T4D = FNMS(KP831469612, T4C, T4z); + T4s = W[37]; + Im[WS(rs, 13)] = FMA(T4Y, T4W, T50); + Ip[WS(rs, 13)] = FNMS(T4Y, T4Z, T4X); + T3Z = W[36]; + T4E = T4s * T4q; + T4I = W[5]; + T4r = T3Z * T4q; + Im[WS(rs, 9)] = FMA(T3Z, T4D, T4E); + T4F = W[4]; + T4K = T4I * T4G; + Ip[WS(rs, 9)] = FNMS(T4s, T4D, T4r); + } + } + } + { + E T3E, T3H, T3G, T3D, T3I; + { + E T3x, T3P, T3k, T3K, T3n, T3q, T3A, T3L, T4H, T3y, T3z; + T3x = FMA(KP923879532, T3w, T3v); + T3P = FNMS(KP923879532, T3w, T3v); + T4H = T4F * T4G; + Im[WS(rs, 1)] = FMA(T4F, T4J, T4K); + T3k = FMA(KP923879532, T3j, T3i); + T3K = FNMS(KP923879532, T3j, T3i); + T3y = FMA(KP198912367, T3l, T3m); + T3n = FNMS(KP198912367, T3m, T3l); + Ip[WS(rs, 1)] = FNMS(T4I, T4J, T4H); + T3z = FNMS(KP198912367, T3o, T3p); + T3q = FMA(KP198912367, T3p, T3o); + T3A = T3y + T3z; + T3L = T3z - T3y; + { + E T3U, T3X, T3s, T3V, T3Y, T3W; + { + E T3J, T3O, T3M, T3r, T3Q, T3N, T3S, T3T, T3R; + T3J = W[48]; + T3O = W[49]; + T3U = FMA(KP980785280, T3L, T3K); + T3M = FNMS(KP980785280, T3L, T3K); + T3r = T3n + T3q; + T3Q = T3n - T3q; + T3N = T3J * T3M; + T3T = W[16]; + T3X = FMA(KP980785280, T3Q, T3P); + T3R = FNMS(KP980785280, T3Q, T3P); + T3s = FNMS(KP980785280, T3r, T3k); + T3E = FMA(KP980785280, T3r, T3k); + Ip[WS(rs, 12)] = FNMS(T3O, T3R, T3N); + T3S = T3J * T3R; + T3V = T3T * T3U; + T3Y = T3T * T3X; + T3W = W[17]; + Im[WS(rs, 12)] = FMA(T3O, T3M, T3S); + } + { + E T3B, T3u, T3h, T3C, T3t; + T3H = FMA(KP980785280, T3A, T3x); + T3B = FNMS(KP980785280, T3A, T3x); + T3u = W[33]; + Im[WS(rs, 4)] = FMA(T3W, T3U, T3Y); + Ip[WS(rs, 4)] = FNMS(T3W, T3X, T3V); + T3h = W[32]; + T3C = T3u * T3s; + T3G = W[1]; + T3t = T3h * T3s; + Im[WS(rs, 8)] = FMA(T3h, T3B, T3C); + T3D = W[0]; + T3I = T3G * T3E; + Ip[WS(rs, 8)] = FNMS(T3u, T3B, T3t); + } + } + } + { + E T5h, T5z, T54, T5u, T57, T5a, T5k, T5v, T3F, T5i, T5j; + T5h = FMA(KP923879532, T5g, T5f); + T5z = FNMS(KP923879532, T5g, T5f); + T3F = T3D * T3E; + Im[0] = FMA(T3D, T3H, T3I); + T54 = FNMS(KP923879532, T53, T52); + T5u = FMA(KP923879532, T53, T52); + T5i = FMA(KP198912367, T55, T56); + T57 = FNMS(KP198912367, T56, T55); + Ip[0] = FNMS(T3G, T3H, T3F); + T5j = FMA(KP198912367, T58, T59); + T5a = FNMS(KP198912367, T59, T58); + T5k = T5i - T5j; + T5v = T5i + T5j; + { + E T5E, T5H, T5c, T5F, T5I, T5G; + { + E T5t, T5y, T5w, T5b, T5A, T5x, T5C, T5D, T5B; + T5t = W[28]; + T5y = W[29]; + T5E = FMA(KP980785280, T5v, T5u); + T5w = FNMS(KP980785280, T5v, T5u); + T5b = T57 + T5a; + T5A = T5a - T57; + T5x = T5t * T5w; + T5D = W[60]; + T5H = FNMS(KP980785280, T5A, T5z); + T5B = FMA(KP980785280, T5A, T5z); + T5c = FMA(KP980785280, T5b, T54); + T5o = FNMS(KP980785280, T5b, T54); + Ip[WS(rs, 7)] = FNMS(T5y, T5B, T5x); + T5C = T5t * T5B; + T5F = T5D * T5E; + T5I = T5D * T5H; + T5G = W[61]; + Im[WS(rs, 7)] = FMA(T5y, T5w, T5C); + } + { + E T5l, T5e, T51, T5m, T5d; + T5r = FMA(KP980785280, T5k, T5h); + T5l = FNMS(KP980785280, T5k, T5h); + T5e = W[45]; + Im[WS(rs, 15)] = FMA(T5G, T5E, T5I); + Ip[WS(rs, 15)] = FNMS(T5G, T5H, T5F); + T51 = W[44]; + T5m = T5e * T5c; + T5q = W[13]; + T5d = T51 * T5c; + Im[WS(rs, 11)] = FMA(T51, T5l, T5m); + T5n = W[12]; + T5s = T5q * T5o; + Ip[WS(rs, 11)] = FNMS(T5e, T5l, T5d); + } + } + } + } + } + } + } + T5p = T5n * T5o; + Im[WS(rs, 3)] = FMA(T5n, T5r, T5s); + Ip[WS(rs, 3)] = FNMS(T5q, T5r, T5p); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 32}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 32, "hc2cb_32", twinstr, &GENUS, {236, 62, 198, 0} }; + +void X(codelet_hc2cb_32) (planner *p) { + X(khc2c_register) (p, hc2cb_32, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hc2cb_32 -include hc2cb.h */ + +/* + * This function contains 434 FP additions, 208 FP multiplications, + * (or, 340 additions, 114 multiplications, 94 fused multiply/add), + * 98 stack variables, 7 constants, and 128 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) { + E T4o, T6y, T70, T5u, Tf, T12, T5x, T6z, T3m, T3Y, T29, T2y, T4v, T71, T2U; + E T3M, Tu, T1U, T6D, T73, T6G, T74, T1h, T2z, T2X, T3o, T4D, T5A, T4K, T5z; + E T30, T3n, TK, T1j, T6S, T7w, T6V, T7v, T1y, T2B, T3c, T3S, T4X, T61, T54; + E T62, T3f, T3T, TZ, T1A, T6L, T7z, T6O, T7y, T1P, T2C, T35, T3P, T5g, T64; + E T5n, T65, T38, T3Q; + { + E T3, T4m, T1X, T5t, T6, T5s, T20, T4n, Ta, T4p, T24, T4q, Td, T4s, T27; + E T4t; + { + E T1, T2, T1V, T1W; + T1 = Rp[0]; + T2 = Rm[WS(rs, 15)]; + T3 = T1 + T2; + T4m = T1 - T2; + T1V = Ip[0]; + T1W = Im[WS(rs, 15)]; + T1X = T1V - T1W; + T5t = T1V + T1W; + } + { + E T4, T5, T1Y, T1Z; + T4 = Rp[WS(rs, 8)]; + T5 = Rm[WS(rs, 7)]; + T6 = T4 + T5; + T5s = T4 - T5; + T1Y = Ip[WS(rs, 8)]; + T1Z = Im[WS(rs, 7)]; + T20 = T1Y - T1Z; + T4n = T1Y + T1Z; + } + { + E T8, T9, T22, T23; + T8 = Rp[WS(rs, 4)]; + T9 = Rm[WS(rs, 11)]; + Ta = T8 + T9; + T4p = T8 - T9; + T22 = Ip[WS(rs, 4)]; + T23 = Im[WS(rs, 11)]; + T24 = T22 - T23; + T4q = T22 + T23; + } + { + E Tb, Tc, T25, T26; + Tb = Rm[WS(rs, 3)]; + Tc = Rp[WS(rs, 12)]; + Td = Tb + Tc; + T4s = Tb - Tc; + T25 = Ip[WS(rs, 12)]; + T26 = Im[WS(rs, 3)]; + T27 = T25 - T26; + T4t = T25 + T26; + } + { + E T7, Te, T21, T28; + T4o = T4m - T4n; + T6y = T4m + T4n; + T70 = T5t - T5s; + T5u = T5s + T5t; + T7 = T3 + T6; + Te = Ta + Td; + Tf = T7 + Te; + T12 = T7 - Te; + { + E T5v, T5w, T3k, T3l; + T5v = T4p + T4q; + T5w = T4s + T4t; + T5x = KP707106781 * (T5v - T5w); + T6z = KP707106781 * (T5v + T5w); + T3k = T1X - T20; + T3l = Ta - Td; + T3m = T3k - T3l; + T3Y = T3l + T3k; + } + T21 = T1X + T20; + T28 = T24 + T27; + T29 = T21 - T28; + T2y = T21 + T28; + { + E T4r, T4u, T2S, T2T; + T4r = T4p - T4q; + T4u = T4s - T4t; + T4v = KP707106781 * (T4r + T4u); + T71 = KP707106781 * (T4r - T4u); + T2S = T3 - T6; + T2T = T27 - T24; + T2U = T2S - T2T; + T3M = T2S + T2T; + } + } + } + { + E Ti, T4H, T1c, T4F, Tl, T4E, T1f, T4I, Tp, T4A, T15, T4y, Ts, T4x, T18; + E T4B; + { + E Tg, Th, T1a, T1b; + Tg = Rp[WS(rs, 2)]; + Th = Rm[WS(rs, 13)]; + Ti = Tg + Th; + T4H = Tg - Th; + T1a = Ip[WS(rs, 2)]; + T1b = Im[WS(rs, 13)]; + T1c = T1a - T1b; + T4F = T1a + T1b; + } + { + E Tj, Tk, T1d, T1e; + Tj = Rp[WS(rs, 10)]; + Tk = Rm[WS(rs, 5)]; + Tl = Tj + Tk; + T4E = Tj - Tk; + T1d = Ip[WS(rs, 10)]; + T1e = Im[WS(rs, 5)]; + T1f = T1d - T1e; + T4I = T1d + T1e; + } + { + E Tn, To, T13, T14; + Tn = Rm[WS(rs, 1)]; + To = Rp[WS(rs, 14)]; + Tp = Tn + To; + T4A = Tn - To; + T13 = Ip[WS(rs, 14)]; + T14 = Im[WS(rs, 1)]; + T15 = T13 - T14; + T4y = T13 + T14; + } + { + E Tq, Tr, T16, T17; + Tq = Rp[WS(rs, 6)]; + Tr = Rm[WS(rs, 9)]; + Ts = Tq + Tr; + T4x = Tq - Tr; + T16 = Ip[WS(rs, 6)]; + T17 = Im[WS(rs, 9)]; + T18 = T16 - T17; + T4B = T16 + T17; + } + { + E Tm, Tt, T6B, T6C; + Tm = Ti + Tl; + Tt = Tp + Ts; + Tu = Tm + Tt; + T1U = Tm - Tt; + T6B = T4H + T4I; + T6C = T4F - T4E; + T6D = FNMS(KP923879532, T6C, KP382683432 * T6B); + T73 = FMA(KP382683432, T6C, KP923879532 * T6B); + } + { + E T6E, T6F, T19, T1g; + T6E = T4A + T4B; + T6F = T4x + T4y; + T6G = FNMS(KP923879532, T6F, KP382683432 * T6E); + T74 = FMA(KP382683432, T6F, KP923879532 * T6E); + T19 = T15 + T18; + T1g = T1c + T1f; + T1h = T19 - T1g; + T2z = T1g + T19; + } + { + E T2V, T2W, T4z, T4C; + T2V = T15 - T18; + T2W = Tp - Ts; + T2X = T2V - T2W; + T3o = T2W + T2V; + T4z = T4x - T4y; + T4C = T4A - T4B; + T4D = FNMS(KP382683432, T4C, KP923879532 * T4z); + T5A = FMA(KP382683432, T4z, KP923879532 * T4C); + } + { + E T4G, T4J, T2Y, T2Z; + T4G = T4E + T4F; + T4J = T4H - T4I; + T4K = FMA(KP923879532, T4G, KP382683432 * T4J); + T5z = FNMS(KP382683432, T4G, KP923879532 * T4J); + T2Y = Ti - Tl; + T2Z = T1c - T1f; + T30 = T2Y + T2Z; + T3n = T2Y - T2Z; + } + } + { + E Ty, T4N, T1m, T4Z, TB, T4Y, T1p, T4O, TI, T52, T1w, T4V, TF, T51, T1t; + E T4S; + { + E Tw, Tx, T1n, T1o; + Tw = Rp[WS(rs, 1)]; + Tx = Rm[WS(rs, 14)]; + Ty = Tw + Tx; + T4N = Tw - Tx; + { + E T1k, T1l, Tz, TA; + T1k = Ip[WS(rs, 1)]; + T1l = Im[WS(rs, 14)]; + T1m = T1k - T1l; + T4Z = T1k + T1l; + Tz = Rp[WS(rs, 9)]; + TA = Rm[WS(rs, 6)]; + TB = Tz + TA; + T4Y = Tz - TA; + } + T1n = Ip[WS(rs, 9)]; + T1o = Im[WS(rs, 6)]; + T1p = T1n - T1o; + T4O = T1n + T1o; + { + E TG, TH, T4T, T1u, T1v, T4U; + TG = Rm[WS(rs, 2)]; + TH = Rp[WS(rs, 13)]; + T4T = TG - TH; + T1u = Ip[WS(rs, 13)]; + T1v = Im[WS(rs, 2)]; + T4U = T1u + T1v; + TI = TG + TH; + T52 = T4T + T4U; + T1w = T1u - T1v; + T4V = T4T - T4U; + } + { + E TD, TE, T4Q, T1r, T1s, T4R; + TD = Rp[WS(rs, 5)]; + TE = Rm[WS(rs, 10)]; + T4Q = TD - TE; + T1r = Ip[WS(rs, 5)]; + T1s = Im[WS(rs, 10)]; + T4R = T1r + T1s; + TF = TD + TE; + T51 = T4Q + T4R; + T1t = T1r - T1s; + T4S = T4Q - T4R; + } + } + { + E TC, TJ, T6Q, T6R; + TC = Ty + TB; + TJ = TF + TI; + TK = TC + TJ; + T1j = TC - TJ; + T6Q = T4Z - T4Y; + T6R = KP707106781 * (T4S - T4V); + T6S = T6Q + T6R; + T7w = T6Q - T6R; + } + { + E T6T, T6U, T1q, T1x; + T6T = T4N + T4O; + T6U = KP707106781 * (T51 + T52); + T6V = T6T - T6U; + T7v = T6T + T6U; + T1q = T1m + T1p; + T1x = T1t + T1w; + T1y = T1q - T1x; + T2B = T1q + T1x; + } + { + E T3a, T3b, T4P, T4W; + T3a = T1m - T1p; + T3b = TF - TI; + T3c = T3a - T3b; + T3S = T3b + T3a; + T4P = T4N - T4O; + T4W = KP707106781 * (T4S + T4V); + T4X = T4P - T4W; + T61 = T4P + T4W; + } + { + E T50, T53, T3d, T3e; + T50 = T4Y + T4Z; + T53 = KP707106781 * (T51 - T52); + T54 = T50 - T53; + T62 = T50 + T53; + T3d = Ty - TB; + T3e = T1w - T1t; + T3f = T3d - T3e; + T3T = T3d + T3e; + } + } + { + E TN, T56, T1D, T5i, TQ, T5h, T1G, T57, TX, T5l, T1N, T5e, TU, T5k, T1K; + E T5b; + { + E TL, TM, T1E, T1F; + TL = Rm[0]; + TM = Rp[WS(rs, 15)]; + TN = TL + TM; + T56 = TL - TM; + { + E T1B, T1C, TO, TP; + T1B = Ip[WS(rs, 15)]; + T1C = Im[0]; + T1D = T1B - T1C; + T5i = T1B + T1C; + TO = Rp[WS(rs, 7)]; + TP = Rm[WS(rs, 8)]; + TQ = TO + TP; + T5h = TO - TP; + } + T1E = Ip[WS(rs, 7)]; + T1F = Im[WS(rs, 8)]; + T1G = T1E - T1F; + T57 = T1E + T1F; + { + E TV, TW, T5c, T1L, T1M, T5d; + TV = Rm[WS(rs, 4)]; + TW = Rp[WS(rs, 11)]; + T5c = TV - TW; + T1L = Ip[WS(rs, 11)]; + T1M = Im[WS(rs, 4)]; + T5d = T1L + T1M; + TX = TV + TW; + T5l = T5c + T5d; + T1N = T1L - T1M; + T5e = T5c - T5d; + } + { + E TS, TT, T59, T1I, T1J, T5a; + TS = Rp[WS(rs, 3)]; + TT = Rm[WS(rs, 12)]; + T59 = TS - TT; + T1I = Ip[WS(rs, 3)]; + T1J = Im[WS(rs, 12)]; + T5a = T1I + T1J; + TU = TS + TT; + T5k = T59 + T5a; + T1K = T1I - T1J; + T5b = T59 - T5a; + } + } + { + E TR, TY, T6J, T6K; + TR = TN + TQ; + TY = TU + TX; + TZ = TR + TY; + T1A = TR - TY; + T6J = KP707106781 * (T5b - T5e); + T6K = T5h + T5i; + T6L = T6J - T6K; + T7z = T6K + T6J; + } + { + E T6M, T6N, T1H, T1O; + T6M = T56 + T57; + T6N = KP707106781 * (T5k + T5l); + T6O = T6M - T6N; + T7y = T6M + T6N; + T1H = T1D + T1G; + T1O = T1K + T1N; + T1P = T1H - T1O; + T2C = T1H + T1O; + } + { + E T33, T34, T58, T5f; + T33 = T1D - T1G; + T34 = TU - TX; + T35 = T33 - T34; + T3P = T34 + T33; + T58 = T56 - T57; + T5f = KP707106781 * (T5b + T5e); + T5g = T58 - T5f; + T64 = T58 + T5f; + } + { + E T5j, T5m, T36, T37; + T5j = T5h - T5i; + T5m = KP707106781 * (T5k - T5l); + T5n = T5j - T5m; + T65 = T5j + T5m; + T36 = TN - TQ; + T37 = T1N - T1K; + T38 = T36 - T37; + T3Q = T36 + T37; + } + } + { + E Tv, T10, T2w, T2A, T2D, T2E, T2v, T2x; + Tv = Tf + Tu; + T10 = TK + TZ; + T2w = Tv - T10; + T2A = T2y + T2z; + T2D = T2B + T2C; + T2E = T2A - T2D; + Rp[0] = Tv + T10; + Rm[0] = T2A + T2D; + T2v = W[30]; + T2x = W[31]; + Rp[WS(rs, 8)] = FNMS(T2x, T2E, T2v * T2w); + Rm[WS(rs, 8)] = FMA(T2x, T2w, T2v * T2E); + } + { + E T2I, T2O, T2M, T2Q; + { + E T2G, T2H, T2K, T2L; + T2G = Tf - Tu; + T2H = T2C - T2B; + T2I = T2G - T2H; + T2O = T2G + T2H; + T2K = T2y - T2z; + T2L = TK - TZ; + T2M = T2K - T2L; + T2Q = T2L + T2K; + } + { + E T2F, T2J, T2N, T2P; + T2F = W[46]; + T2J = W[47]; + Rp[WS(rs, 12)] = FNMS(T2J, T2M, T2F * T2I); + Rm[WS(rs, 12)] = FMA(T2F, T2M, T2J * T2I); + T2N = W[14]; + T2P = W[15]; + Rp[WS(rs, 4)] = FNMS(T2P, T2Q, T2N * T2O); + Rm[WS(rs, 4)] = FMA(T2N, T2Q, T2P * T2O); + } + } + { + E T1i, T2a, T2o, T2k, T2d, T2l, T1R, T2p; + T1i = T12 + T1h; + T2a = T1U + T29; + T2o = T29 - T1U; + T2k = T12 - T1h; + { + E T2b, T2c, T1z, T1Q; + T2b = T1j + T1y; + T2c = T1P - T1A; + T2d = KP707106781 * (T2b + T2c); + T2l = KP707106781 * (T2c - T2b); + T1z = T1j - T1y; + T1Q = T1A + T1P; + T1R = KP707106781 * (T1z + T1Q); + T2p = KP707106781 * (T1z - T1Q); + } + { + E T1S, T2e, T11, T1T; + T1S = T1i - T1R; + T2e = T2a - T2d; + T11 = W[38]; + T1T = W[39]; + Rp[WS(rs, 10)] = FNMS(T1T, T2e, T11 * T1S); + Rm[WS(rs, 10)] = FMA(T1T, T1S, T11 * T2e); + } + { + E T2s, T2u, T2r, T2t; + T2s = T2k + T2l; + T2u = T2o + T2p; + T2r = W[22]; + T2t = W[23]; + Rp[WS(rs, 6)] = FNMS(T2t, T2u, T2r * T2s); + Rm[WS(rs, 6)] = FMA(T2r, T2u, T2t * T2s); + } + { + E T2g, T2i, T2f, T2h; + T2g = T1i + T1R; + T2i = T2a + T2d; + T2f = W[6]; + T2h = W[7]; + Rp[WS(rs, 2)] = FNMS(T2h, T2i, T2f * T2g); + Rm[WS(rs, 2)] = FMA(T2h, T2g, T2f * T2i); + } + { + E T2m, T2q, T2j, T2n; + T2m = T2k - T2l; + T2q = T2o - T2p; + T2j = W[54]; + T2n = W[55]; + Rp[WS(rs, 14)] = FNMS(T2n, T2q, T2j * T2m); + Rm[WS(rs, 14)] = FMA(T2j, T2q, T2n * T2m); + } + } + { + E T3O, T4a, T40, T4e, T3V, T4f, T43, T4b, T3N, T3Z; + T3N = KP707106781 * (T3n + T3o); + T3O = T3M - T3N; + T4a = T3M + T3N; + T3Z = KP707106781 * (T30 + T2X); + T40 = T3Y - T3Z; + T4e = T3Y + T3Z; + { + E T3R, T3U, T41, T42; + T3R = FNMS(KP382683432, T3Q, KP923879532 * T3P); + T3U = FMA(KP923879532, T3S, KP382683432 * T3T); + T3V = T3R - T3U; + T4f = T3U + T3R; + T41 = FNMS(KP382683432, T3S, KP923879532 * T3T); + T42 = FMA(KP382683432, T3P, KP923879532 * T3Q); + T43 = T41 - T42; + T4b = T41 + T42; + } + { + E T3W, T44, T3L, T3X; + T3W = T3O - T3V; + T44 = T40 - T43; + T3L = W[50]; + T3X = W[51]; + Rp[WS(rs, 13)] = FNMS(T3X, T44, T3L * T3W); + Rm[WS(rs, 13)] = FMA(T3X, T3W, T3L * T44); + } + { + E T4i, T4k, T4h, T4j; + T4i = T4a + T4b; + T4k = T4e + T4f; + T4h = W[2]; + T4j = W[3]; + Rp[WS(rs, 1)] = FNMS(T4j, T4k, T4h * T4i); + Rm[WS(rs, 1)] = FMA(T4h, T4k, T4j * T4i); + } + { + E T46, T48, T45, T47; + T46 = T3O + T3V; + T48 = T40 + T43; + T45 = W[18]; + T47 = W[19]; + Rp[WS(rs, 5)] = FNMS(T47, T48, T45 * T46); + Rm[WS(rs, 5)] = FMA(T47, T46, T45 * T48); + } + { + E T4c, T4g, T49, T4d; + T4c = T4a - T4b; + T4g = T4e - T4f; + T49 = W[34]; + T4d = W[35]; + Rp[WS(rs, 9)] = FNMS(T4d, T4g, T49 * T4c); + Rm[WS(rs, 9)] = FMA(T49, T4g, T4d * T4c); + } + } + { + E T32, T3A, T3q, T3E, T3h, T3F, T3t, T3B, T31, T3p; + T31 = KP707106781 * (T2X - T30); + T32 = T2U - T31; + T3A = T2U + T31; + T3p = KP707106781 * (T3n - T3o); + T3q = T3m - T3p; + T3E = T3m + T3p; + { + E T39, T3g, T3r, T3s; + T39 = FNMS(KP923879532, T38, KP382683432 * T35); + T3g = FMA(KP382683432, T3c, KP923879532 * T3f); + T3h = T39 - T3g; + T3F = T3g + T39; + T3r = FNMS(KP923879532, T3c, KP382683432 * T3f); + T3s = FMA(KP923879532, T35, KP382683432 * T38); + T3t = T3r - T3s; + T3B = T3r + T3s; + } + { + E T3i, T3u, T2R, T3j; + T3i = T32 - T3h; + T3u = T3q - T3t; + T2R = W[58]; + T3j = W[59]; + Rp[WS(rs, 15)] = FNMS(T3j, T3u, T2R * T3i); + Rm[WS(rs, 15)] = FMA(T3j, T3i, T2R * T3u); + } + { + E T3I, T3K, T3H, T3J; + T3I = T3A + T3B; + T3K = T3E + T3F; + T3H = W[10]; + T3J = W[11]; + Rp[WS(rs, 3)] = FNMS(T3J, T3K, T3H * T3I); + Rm[WS(rs, 3)] = FMA(T3H, T3K, T3J * T3I); + } + { + E T3w, T3y, T3v, T3x; + T3w = T32 + T3h; + T3y = T3q + T3t; + T3v = W[26]; + T3x = W[27]; + Rp[WS(rs, 7)] = FNMS(T3x, T3y, T3v * T3w); + Rm[WS(rs, 7)] = FMA(T3x, T3w, T3v * T3y); + } + { + E T3C, T3G, T3z, T3D; + T3C = T3A - T3B; + T3G = T3E - T3F; + T3z = W[42]; + T3D = W[43]; + Rp[WS(rs, 11)] = FNMS(T3D, T3G, T3z * T3C); + Rm[WS(rs, 11)] = FMA(T3z, T3G, T3D * T3C); + } + } + { + E T60, T6m, T6f, T6n, T67, T6r, T6c, T6q; + { + E T5Y, T5Z, T6d, T6e; + T5Y = T4o + T4v; + T5Z = T5z + T5A; + T60 = T5Y + T5Z; + T6m = T5Y - T5Z; + T6d = FMA(KP195090322, T61, KP980785280 * T62); + T6e = FNMS(KP195090322, T64, KP980785280 * T65); + T6f = T6d + T6e; + T6n = T6e - T6d; + } + { + E T63, T66, T6a, T6b; + T63 = FNMS(KP195090322, T62, KP980785280 * T61); + T66 = FMA(KP980785280, T64, KP195090322 * T65); + T67 = T63 + T66; + T6r = T63 - T66; + T6a = T5u + T5x; + T6b = T4K + T4D; + T6c = T6a + T6b; + T6q = T6a - T6b; + } + { + E T68, T6g, T5X, T69; + T68 = T60 - T67; + T6g = T6c - T6f; + T5X = W[32]; + T69 = W[33]; + Ip[WS(rs, 8)] = FNMS(T69, T6g, T5X * T68); + Im[WS(rs, 8)] = FMA(T69, T68, T5X * T6g); + } + { + E T6u, T6w, T6t, T6v; + T6u = T6m + T6n; + T6w = T6q + T6r; + T6t = W[16]; + T6v = W[17]; + Ip[WS(rs, 4)] = FNMS(T6v, T6w, T6t * T6u); + Im[WS(rs, 4)] = FMA(T6t, T6w, T6v * T6u); + } + { + E T6i, T6k, T6h, T6j; + T6i = T60 + T67; + T6k = T6c + T6f; + T6h = W[0]; + T6j = W[1]; + Ip[0] = FNMS(T6j, T6k, T6h * T6i); + Im[0] = FMA(T6j, T6i, T6h * T6k); + } + { + E T6o, T6s, T6l, T6p; + T6o = T6m - T6n; + T6s = T6q - T6r; + T6l = W[48]; + T6p = W[49]; + Ip[WS(rs, 12)] = FNMS(T6p, T6s, T6l * T6o); + Im[WS(rs, 12)] = FMA(T6l, T6s, T6p * T6o); + } + } + { + E T7u, T7Q, T7J, T7R, T7B, T7V, T7G, T7U; + { + E T7s, T7t, T7H, T7I; + T7s = T6y + T6z; + T7t = T73 + T74; + T7u = T7s - T7t; + T7Q = T7s + T7t; + T7H = FMA(KP195090322, T7w, KP980785280 * T7v); + T7I = FMA(KP195090322, T7z, KP980785280 * T7y); + T7J = T7H - T7I; + T7R = T7H + T7I; + } + { + E T7x, T7A, T7E, T7F; + T7x = FNMS(KP980785280, T7w, KP195090322 * T7v); + T7A = FNMS(KP980785280, T7z, KP195090322 * T7y); + T7B = T7x + T7A; + T7V = T7x - T7A; + T7E = T70 - T71; + T7F = T6D - T6G; + T7G = T7E + T7F; + T7U = T7E - T7F; + } + { + E T7C, T7K, T7r, T7D; + T7C = T7u - T7B; + T7K = T7G - T7J; + T7r = W[44]; + T7D = W[45]; + Ip[WS(rs, 11)] = FNMS(T7D, T7K, T7r * T7C); + Im[WS(rs, 11)] = FMA(T7D, T7C, T7r * T7K); + } + { + E T7Y, T80, T7X, T7Z; + T7Y = T7Q + T7R; + T80 = T7U - T7V; + T7X = W[60]; + T7Z = W[61]; + Ip[WS(rs, 15)] = FNMS(T7Z, T80, T7X * T7Y); + Im[WS(rs, 15)] = FMA(T7X, T80, T7Z * T7Y); + } + { + E T7M, T7O, T7L, T7N; + T7M = T7u + T7B; + T7O = T7G + T7J; + T7L = W[12]; + T7N = W[13]; + Ip[WS(rs, 3)] = FNMS(T7N, T7O, T7L * T7M); + Im[WS(rs, 3)] = FMA(T7N, T7M, T7L * T7O); + } + { + E T7S, T7W, T7P, T7T; + T7S = T7Q - T7R; + T7W = T7U + T7V; + T7P = W[28]; + T7T = W[29]; + Ip[WS(rs, 7)] = FNMS(T7T, T7W, T7P * T7S); + Im[WS(rs, 7)] = FMA(T7P, T7W, T7T * T7S); + } + } + { + E T4M, T5M, T5F, T5N, T5p, T5R, T5C, T5Q; + { + E T4w, T4L, T5D, T5E; + T4w = T4o - T4v; + T4L = T4D - T4K; + T4M = T4w + T4L; + T5M = T4w - T4L; + T5D = FMA(KP831469612, T4X, KP555570233 * T54); + T5E = FNMS(KP831469612, T5g, KP555570233 * T5n); + T5F = T5D + T5E; + T5N = T5E - T5D; + } + { + E T55, T5o, T5y, T5B; + T55 = FNMS(KP831469612, T54, KP555570233 * T4X); + T5o = FMA(KP555570233, T5g, KP831469612 * T5n); + T5p = T55 + T5o; + T5R = T55 - T5o; + T5y = T5u - T5x; + T5B = T5z - T5A; + T5C = T5y + T5B; + T5Q = T5y - T5B; + } + { + E T5q, T5G, T4l, T5r; + T5q = T4M - T5p; + T5G = T5C - T5F; + T4l = W[40]; + T5r = W[41]; + Ip[WS(rs, 10)] = FNMS(T5r, T5G, T4l * T5q); + Im[WS(rs, 10)] = FMA(T5r, T5q, T4l * T5G); + } + { + E T5U, T5W, T5T, T5V; + T5U = T5M + T5N; + T5W = T5Q + T5R; + T5T = W[24]; + T5V = W[25]; + Ip[WS(rs, 6)] = FNMS(T5V, T5W, T5T * T5U); + Im[WS(rs, 6)] = FMA(T5T, T5W, T5V * T5U); + } + { + E T5I, T5K, T5H, T5J; + T5I = T4M + T5p; + T5K = T5C + T5F; + T5H = W[8]; + T5J = W[9]; + Ip[WS(rs, 2)] = FNMS(T5J, T5K, T5H * T5I); + Im[WS(rs, 2)] = FMA(T5J, T5I, T5H * T5K); + } + { + E T5O, T5S, T5L, T5P; + T5O = T5M - T5N; + T5S = T5Q - T5R; + T5L = W[56]; + T5P = W[57]; + Ip[WS(rs, 14)] = FNMS(T5P, T5S, T5L * T5O); + Im[WS(rs, 14)] = FMA(T5L, T5S, T5P * T5O); + } + } + { + E T6I, T7g, T79, T7h, T6X, T7l, T76, T7k; + { + E T6A, T6H, T77, T78; + T6A = T6y - T6z; + T6H = T6D + T6G; + T6I = T6A - T6H; + T7g = T6A + T6H; + T77 = FNMS(KP555570233, T6S, KP831469612 * T6V); + T78 = FMA(KP555570233, T6L, KP831469612 * T6O); + T79 = T77 - T78; + T7h = T77 + T78; + } + { + E T6P, T6W, T72, T75; + T6P = FNMS(KP555570233, T6O, KP831469612 * T6L); + T6W = FMA(KP831469612, T6S, KP555570233 * T6V); + T6X = T6P - T6W; + T7l = T6W + T6P; + T72 = T70 + T71; + T75 = T73 - T74; + T76 = T72 - T75; + T7k = T72 + T75; + } + { + E T6Y, T7a, T6x, T6Z; + T6Y = T6I - T6X; + T7a = T76 - T79; + T6x = W[52]; + T6Z = W[53]; + Ip[WS(rs, 13)] = FNMS(T6Z, T7a, T6x * T6Y); + Im[WS(rs, 13)] = FMA(T6Z, T6Y, T6x * T7a); + } + { + E T7o, T7q, T7n, T7p; + T7o = T7g + T7h; + T7q = T7k + T7l; + T7n = W[4]; + T7p = W[5]; + Ip[WS(rs, 1)] = FNMS(T7p, T7q, T7n * T7o); + Im[WS(rs, 1)] = FMA(T7n, T7q, T7p * T7o); + } + { + E T7c, T7e, T7b, T7d; + T7c = T6I + T6X; + T7e = T76 + T79; + T7b = W[20]; + T7d = W[21]; + Ip[WS(rs, 5)] = FNMS(T7d, T7e, T7b * T7c); + Im[WS(rs, 5)] = FMA(T7d, T7c, T7b * T7e); + } + { + E T7i, T7m, T7f, T7j; + T7i = T7g - T7h; + T7m = T7k - T7l; + T7f = W[36]; + T7j = W[37]; + Ip[WS(rs, 9)] = FNMS(T7j, T7m, T7f * T7i); + Im[WS(rs, 9)] = FMA(T7f, T7m, T7j * T7i); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 32}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 32, "hc2cb_32", twinstr, &GENUS, {340, 114, 94, 0} }; + +void X(codelet_hc2cb_32) (planner *p) { + X(khc2c_register) (p, hc2cb_32, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:37 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 4 -dif -name hc2cb_4 -include hc2cb.h */ + +/* + * This function contains 22 FP additions, 12 FP multiplications, + * (or, 16 additions, 6 multiplications, 6 fused multiply/add), + * 25 stack variables, 0 constants, and 16 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { + E Th, Ta, T7, Ti, T9; + { + E Tq, Td, T3, Tg, Tu, Tm, T6, Tp; + { + E Tk, T4, Tl, T5; + { + E Tb, Tc, T1, T2, Te, Tf; + Tb = Ip[0]; + Tc = Im[WS(rs, 1)]; + T1 = Rp[0]; + T2 = Rm[WS(rs, 1)]; + Te = Ip[WS(rs, 1)]; + Tq = Tb + Tc; + Td = Tb - Tc; + Tf = Im[0]; + Tk = T1 - T2; + T3 = T1 + T2; + T4 = Rp[WS(rs, 1)]; + Tg = Te - Tf; + Tl = Te + Tf; + T5 = Rm[0]; + } + Tu = Tk + Tl; + Tm = Tk - Tl; + T6 = T4 + T5; + Tp = T4 - T5; + } + Rm[0] = Td + Tg; + { + E Tx, Tr, T8, Tn, Ts, To, Tj; + Tj = W[0]; + Tx = Tq - Tp; + Tr = Tp + Tq; + Rp[0] = T3 + T6; + T8 = T3 - T6; + Tn = Tj * Tm; + Ts = Tj * Tr; + To = W[1]; + { + E Tt, Tw, Ty, Tv; + Tt = W[4]; + Tw = W[5]; + Th = Td - Tg; + Im[0] = FMA(To, Tm, Ts); + Ip[0] = FNMS(To, Tr, Tn); + Ty = Tt * Tx; + Tv = Tt * Tu; + Ta = W[3]; + T7 = W[2]; + Im[WS(rs, 1)] = FMA(Tw, Tu, Ty); + Ip[WS(rs, 1)] = FNMS(Tw, Tx, Tv); + Ti = Ta * T8; + T9 = T7 * T8; + } + } + } + Rm[WS(rs, 1)] = FMA(T7, Th, Ti); + Rp[WS(rs, 1)] = FNMS(Ta, Th, T9); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 4}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 4, "hc2cb_4", twinstr, &GENUS, {16, 6, 6, 0} }; + +void X(codelet_hc2cb_4) (planner *p) { + X(khc2c_register) (p, hc2cb_4, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 4 -dif -name hc2cb_4 -include hc2cb.h */ + +/* + * This function contains 22 FP additions, 12 FP multiplications, + * (or, 16 additions, 6 multiplications, 6 fused multiply/add), + * 13 stack variables, 0 constants, and 16 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { + E T3, Ti, Tc, Tn, T6, Tm, Tf, Tj; + { + E T1, T2, Ta, Tb; + T1 = Rp[0]; + T2 = Rm[WS(rs, 1)]; + T3 = T1 + T2; + Ti = T1 - T2; + Ta = Ip[0]; + Tb = Im[WS(rs, 1)]; + Tc = Ta - Tb; + Tn = Ta + Tb; + } + { + E T4, T5, Td, Te; + T4 = Rp[WS(rs, 1)]; + T5 = Rm[0]; + T6 = T4 + T5; + Tm = T4 - T5; + Td = Ip[WS(rs, 1)]; + Te = Im[0]; + Tf = Td - Te; + Tj = Td + Te; + } + Rp[0] = T3 + T6; + Rm[0] = Tc + Tf; + { + E T8, Tg, T7, T9; + T8 = T3 - T6; + Tg = Tc - Tf; + T7 = W[2]; + T9 = W[3]; + Rp[WS(rs, 1)] = FNMS(T9, Tg, T7 * T8); + Rm[WS(rs, 1)] = FMA(T9, T8, T7 * Tg); + } + { + E Tk, To, Th, Tl; + Tk = Ti - Tj; + To = Tm + Tn; + Th = W[0]; + Tl = W[1]; + Ip[0] = FNMS(Tl, To, Th * Tk); + Im[0] = FMA(Th, To, Tl * Tk); + } + { + E Tq, Ts, Tp, Tr; + Tq = Ti + Tj; + Ts = Tn - Tm; + Tp = W[4]; + Tr = W[5]; + Ip[WS(rs, 1)] = FNMS(Tr, Ts, Tp * Tq); + Im[WS(rs, 1)] = FMA(Tp, Ts, Tr * Tq); + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 4}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 4, "hc2cb_4", twinstr, &GENUS, {16, 6, 6, 0} }; + +void X(codelet_hc2cb_4) (planner *p) { + X(khc2c_register) (p, hc2cb_4, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:37 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 6 -dif -name hc2cb_6 -include hc2cb.h */ + +/* + * This function contains 46 FP additions, 32 FP multiplications, + * (or, 24 additions, 10 multiplications, 22 fused multiply/add), + * 45 stack variables, 2 constants, and 24 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 10, MAKE_VOLATILE_STRIDE(24, rs)) { + E TK, TR, TB, TM, TL, TS; + { + E Td, TN, TO, TJ, Tn, Tk, TC, T3, Tr, T7, T8, T4, T5; + { + E TI, Tj, Tg, TH, Te, Tf, T1, T2; + { + E Tb, Tc, Th, Ti; + Tb = Ip[0]; + Tc = Im[WS(rs, 2)]; + Th = Ip[WS(rs, 1)]; + Ti = Im[WS(rs, 1)]; + Te = Ip[WS(rs, 2)]; + Td = Tb - Tc; + TN = Tb + Tc; + Tf = Im[0]; + TI = Th + Ti; + Tj = Th - Ti; + } + Tg = Te - Tf; + TH = Te + Tf; + T1 = Rp[0]; + T2 = Rm[WS(rs, 2)]; + TO = TH - TI; + TJ = TH + TI; + Tn = Tj - Tg; + Tk = Tg + Tj; + TC = T1 - T2; + T3 = T1 + T2; + Tr = FNMS(KP500000000, Tk, Td); + T7 = Rm[WS(rs, 1)]; + T8 = Rp[WS(rs, 1)]; + T4 = Rp[WS(rs, 2)]; + T5 = Rm[0]; + } + { + E Tl, Tq, TQ, Ts, Ta, T10, TG; + Rm[0] = Td + Tk; + { + E T9, TE, T6, TD, TF; + T9 = T7 + T8; + TE = T7 - T8; + T6 = T4 + T5; + TD = T4 - T5; + Tl = W[2]; + Tq = W[3]; + TQ = TD - TE; + TF = TD + TE; + Ts = T6 - T9; + Ta = T6 + T9; + T10 = TC + TF; + TG = FNMS(KP500000000, TF, TC); + } + { + E T13, TP, Tz, TZ, Tw, T14, Tv, Ty; + { + E Tt, T12, T11, Tp, Tm, To, Tu; + T13 = TN + TO; + TP = FNMS(KP500000000, TO, TN); + Rp[0] = T3 + Ta; + Tm = FNMS(KP500000000, Ta, T3); + Tz = FMA(KP866025403, Ts, Tr); + Tt = FNMS(KP866025403, Ts, Tr); + TZ = W[4]; + To = FNMS(KP866025403, Tn, Tm); + Tw = FMA(KP866025403, Tn, Tm); + Tu = Tl * Tt; + T12 = W[5]; + T11 = TZ * T10; + Tp = Tl * To; + Rm[WS(rs, 1)] = FMA(Tq, To, Tu); + T14 = T12 * T10; + Ip[WS(rs, 1)] = FNMS(T12, T13, T11); + Rp[WS(rs, 1)] = FNMS(Tq, Tt, Tp); + } + Im[WS(rs, 1)] = FMA(TZ, T13, T14); + Tv = W[6]; + Ty = W[7]; + { + E TX, TT, TW, TV, TY, TU, TA, Tx; + TK = FNMS(KP866025403, TJ, TG); + TU = FMA(KP866025403, TJ, TG); + TA = Tv * Tz; + Tx = Tv * Tw; + TX = FNMS(KP866025403, TQ, TP); + TR = FMA(KP866025403, TQ, TP); + Rm[WS(rs, 2)] = FMA(Ty, Tw, TA); + Rp[WS(rs, 2)] = FNMS(Ty, Tz, Tx); + TT = W[8]; + TW = W[9]; + TB = W[0]; + TV = TT * TU; + TY = TW * TU; + TM = W[1]; + TL = TB * TK; + Ip[WS(rs, 2)] = FNMS(TW, TX, TV); + Im[WS(rs, 2)] = FMA(TT, TX, TY); + } + } + } + } + Ip[0] = FNMS(TM, TR, TL); + TS = TM * TK; + Im[0] = FMA(TB, TR, TS); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 6}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 6, "hc2cb_6", twinstr, &GENUS, {24, 10, 22, 0} }; + +void X(codelet_hc2cb_6) (planner *p) { + X(khc2c_register) (p, hc2cb_6, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 6 -dif -name hc2cb_6 -include hc2cb.h */ + +/* + * This function contains 46 FP additions, 28 FP multiplications, + * (or, 32 additions, 14 multiplications, 14 fused multiply/add), + * 25 stack variables, 2 constants, and 24 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 10, MAKE_VOLATILE_STRIDE(24, rs)) { + E T3, Ty, Td, TE, Ta, TO, Tr, TB, Tk, TL, Tn, TH; + { + E T1, T2, Tb, Tc; + T1 = Rp[0]; + T2 = Rm[WS(rs, 2)]; + T3 = T1 + T2; + Ty = T1 - T2; + Tb = Ip[0]; + Tc = Im[WS(rs, 2)]; + Td = Tb - Tc; + TE = Tb + Tc; + } + { + E T6, Tz, T9, TA; + { + E T4, T5, T7, T8; + T4 = Rp[WS(rs, 2)]; + T5 = Rm[0]; + T6 = T4 + T5; + Tz = T4 - T5; + T7 = Rm[WS(rs, 1)]; + T8 = Rp[WS(rs, 1)]; + T9 = T7 + T8; + TA = T7 - T8; + } + Ta = T6 + T9; + TO = KP866025403 * (Tz - TA); + Tr = KP866025403 * (T6 - T9); + TB = Tz + TA; + } + { + E Tg, TG, Tj, TF; + { + E Te, Tf, Th, Ti; + Te = Ip[WS(rs, 2)]; + Tf = Im[0]; + Tg = Te - Tf; + TG = Te + Tf; + Th = Ip[WS(rs, 1)]; + Ti = Im[WS(rs, 1)]; + Tj = Th - Ti; + TF = Th + Ti; + } + Tk = Tg + Tj; + TL = KP866025403 * (TG + TF); + Tn = KP866025403 * (Tj - Tg); + TH = TF - TG; + } + Rp[0] = T3 + Ta; + Rm[0] = Td + Tk; + { + E TC, TI, Tx, TD; + TC = Ty + TB; + TI = TE - TH; + Tx = W[4]; + TD = W[5]; + Ip[WS(rs, 1)] = FNMS(TD, TI, Tx * TC); + Im[WS(rs, 1)] = FMA(TD, TC, Tx * TI); + } + { + E To, Tu, Ts, Tw, Tm, Tq; + Tm = FNMS(KP500000000, Ta, T3); + To = Tm - Tn; + Tu = Tm + Tn; + Tq = FNMS(KP500000000, Tk, Td); + Ts = Tq - Tr; + Tw = Tr + Tq; + { + E Tl, Tp, Tt, Tv; + Tl = W[2]; + Tp = W[3]; + Rp[WS(rs, 1)] = FNMS(Tp, Ts, Tl * To); + Rm[WS(rs, 1)] = FMA(Tl, Ts, Tp * To); + Tt = W[6]; + Tv = W[7]; + Rp[WS(rs, 2)] = FNMS(Tv, Tw, Tt * Tu); + Rm[WS(rs, 2)] = FMA(Tt, Tw, Tv * Tu); + } + } + { + E TM, TS, TQ, TU, TK, TP; + TK = FNMS(KP500000000, TB, Ty); + TM = TK - TL; + TS = TK + TL; + TP = FMA(KP500000000, TH, TE); + TQ = TO + TP; + TU = TP - TO; + { + E TJ, TN, TR, TT; + TJ = W[0]; + TN = W[1]; + Ip[0] = FNMS(TN, TQ, TJ * TM); + Im[0] = FMA(TN, TM, TJ * TQ); + TR = W[8]; + TT = W[9]; + Ip[WS(rs, 2)] = FNMS(TT, TU, TR * TS); + Im[WS(rs, 2)] = FMA(TT, TS, TR * TU); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 6}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 6, "hc2cb_6", twinstr, &GENUS, {32, 14, 14, 0} }; + +void X(codelet_hc2cb_6) (planner *p) { + X(khc2c_register) (p, hc2cb_6, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cb_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,373 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:37 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 8 -dif -name hc2cb_8 -include hc2cb.h */ + +/* + * This function contains 66 FP additions, 36 FP multiplications, + * (or, 44 additions, 14 multiplications, 22 fused multiply/add), + * 52 stack variables, 1 constants, and 32 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(32, rs)) { + E Tw, TH, Tf, Ty, Tx, TI; + { + E TV, TD, T1i, T7, T1b, T1n, TQ, Tk, Tp, TE, Te, T1o, T1e, T1j, Tu; + E TF; + { + E T4, Tg, T3, T19, TC, T5, Th, Ti; + { + E T1, T2, TA, TB; + T1 = Rp[0]; + T2 = Rm[WS(rs, 3)]; + TA = Ip[0]; + TB = Im[WS(rs, 3)]; + T4 = Rp[WS(rs, 2)]; + Tg = T1 - T2; + T3 = T1 + T2; + T19 = TA - TB; + TC = TA + TB; + T5 = Rm[WS(rs, 1)]; + Th = Ip[WS(rs, 2)]; + Ti = Im[WS(rs, 1)]; + } + { + E Tb, Tl, Ta, T1c, To, Tc, Tr, Ts; + { + E T8, T9, Tm, Tn; + T8 = Rp[WS(rs, 1)]; + { + E Tz, T6, T1a, Tj; + Tz = T4 - T5; + T6 = T4 + T5; + T1a = Th - Ti; + Tj = Th + Ti; + TV = TC - Tz; + TD = Tz + TC; + T1i = T3 - T6; + T7 = T3 + T6; + T1b = T19 + T1a; + T1n = T19 - T1a; + TQ = Tg + Tj; + Tk = Tg - Tj; + T9 = Rm[WS(rs, 2)]; + } + Tm = Ip[WS(rs, 1)]; + Tn = Im[WS(rs, 2)]; + Tb = Rm[0]; + Tl = T8 - T9; + Ta = T8 + T9; + T1c = Tm - Tn; + To = Tm + Tn; + Tc = Rp[WS(rs, 3)]; + Tr = Ip[WS(rs, 3)]; + Ts = Im[0]; + } + { + E Tq, Td, T1d, Tt; + Tp = Tl - To; + TE = Tl + To; + Tq = Tb - Tc; + Td = Tb + Tc; + T1d = Tr - Ts; + Tt = Tr + Ts; + Te = Ta + Td; + T1o = Ta - Td; + T1e = T1c + T1d; + T1j = T1d - T1c; + Tu = Tq - Tt; + TF = Tq + Tt; + } + } + } + { + E TG, Tv, T10, T13, T1s, T1k, T1p, T1v, T1u, T1w, T1t, TR, TW; + Rp[0] = T7 + Te; + Rm[0] = T1b + T1e; + TG = TE - TF; + TR = TE + TF; + TW = Tp - Tu; + Tv = Tp + Tu; + { + E TP, TS, TX, TU, T1r, TT, TY; + TP = W[4]; + T10 = FMA(KP707106781, TR, TQ); + TS = FNMS(KP707106781, TR, TQ); + TX = FMA(KP707106781, TW, TV); + T13 = FNMS(KP707106781, TW, TV); + TU = W[5]; + T1s = T1i + T1j; + T1k = T1i - T1j; + TT = TP * TS; + TY = TP * TX; + T1p = T1n - T1o; + T1v = T1o + T1n; + T1r = W[2]; + Ip[WS(rs, 1)] = FNMS(TU, TX, TT); + Im[WS(rs, 1)] = FMA(TU, TS, TY); + T1u = W[3]; + T1w = T1r * T1v; + T1t = T1r * T1s; + } + { + E T1f, T15, T18, T17, T1g, T1h, T1m; + { + E TZ, T12, T16, T14, T11; + Rm[WS(rs, 1)] = FMA(T1u, T1s, T1w); + Rp[WS(rs, 1)] = FNMS(T1u, T1v, T1t); + TZ = W[12]; + T12 = W[13]; + T1f = T1b - T1e; + T16 = T7 - Te; + T14 = TZ * T13; + T11 = TZ * T10; + T15 = W[6]; + T18 = W[7]; + Im[WS(rs, 3)] = FMA(T12, T10, T14); + Ip[WS(rs, 3)] = FNMS(T12, T13, T11); + T17 = T15 * T16; + T1g = T18 * T16; + } + Rp[WS(rs, 2)] = FNMS(T18, T1f, T17); + Rm[WS(rs, 2)] = FMA(T15, T1f, T1g); + T1h = W[10]; + T1m = W[11]; + { + E TN, TJ, TM, TL, TO, TK, T1q, T1l; + Tw = FNMS(KP707106781, Tv, Tk); + TK = FMA(KP707106781, Tv, Tk); + T1q = T1h * T1p; + T1l = T1h * T1k; + TN = FMA(KP707106781, TG, TD); + TH = FNMS(KP707106781, TG, TD); + Rm[WS(rs, 3)] = FMA(T1m, T1k, T1q); + Rp[WS(rs, 3)] = FNMS(T1m, T1p, T1l); + TJ = W[0]; + TM = W[1]; + Tf = W[8]; + TL = TJ * TK; + TO = TM * TK; + Ty = W[9]; + Tx = Tf * Tw; + Ip[0] = FNMS(TM, TN, TL); + Im[0] = FMA(TJ, TN, TO); + } + } + } + } + Ip[WS(rs, 2)] = FNMS(Ty, TH, Tx); + TI = Ty * Tw; + Im[WS(rs, 2)] = FMA(Tf, TH, TI); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 8}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 8, "hc2cb_8", twinstr, &GENUS, {44, 14, 22, 0} }; + +void X(codelet_hc2cb_8) (planner *p) { + X(khc2c_register) (p, hc2cb_8, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 8 -dif -name hc2cb_8 -include hc2cb.h */ + +/* + * This function contains 66 FP additions, 32 FP multiplications, + * (or, 52 additions, 18 multiplications, 14 fused multiply/add), + * 30 stack variables, 1 constants, and 32 memory accesses + */ +#include "hc2cb.h" + +static void hc2cb_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(32, rs)) { + E T7, T18, T1c, To, Ty, TM, TY, TC, Te, TZ, T10, Tv, Tz, TP, TS; + E TD; + { + E T3, TK, Tk, TX, T6, TW, Tn, TL; + { + E T1, T2, Ti, Tj; + T1 = Rp[0]; + T2 = Rm[WS(rs, 3)]; + T3 = T1 + T2; + TK = T1 - T2; + Ti = Ip[0]; + Tj = Im[WS(rs, 3)]; + Tk = Ti - Tj; + TX = Ti + Tj; + } + { + E T4, T5, Tl, Tm; + T4 = Rp[WS(rs, 2)]; + T5 = Rm[WS(rs, 1)]; + T6 = T4 + T5; + TW = T4 - T5; + Tl = Ip[WS(rs, 2)]; + Tm = Im[WS(rs, 1)]; + Tn = Tl - Tm; + TL = Tl + Tm; + } + T7 = T3 + T6; + T18 = TK + TL; + T1c = TX - TW; + To = Tk + Tn; + Ty = T3 - T6; + TM = TK - TL; + TY = TW + TX; + TC = Tk - Tn; + } + { + E Ta, TN, Tr, TO, Td, TQ, Tu, TR; + { + E T8, T9, Tp, Tq; + T8 = Rp[WS(rs, 1)]; + T9 = Rm[WS(rs, 2)]; + Ta = T8 + T9; + TN = T8 - T9; + Tp = Ip[WS(rs, 1)]; + Tq = Im[WS(rs, 2)]; + Tr = Tp - Tq; + TO = Tp + Tq; + } + { + E Tb, Tc, Ts, Tt; + Tb = Rm[0]; + Tc = Rp[WS(rs, 3)]; + Td = Tb + Tc; + TQ = Tb - Tc; + Ts = Ip[WS(rs, 3)]; + Tt = Im[0]; + Tu = Ts - Tt; + TR = Ts + Tt; + } + Te = Ta + Td; + TZ = TN + TO; + T10 = TQ + TR; + Tv = Tr + Tu; + Tz = Tu - Tr; + TP = TN - TO; + TS = TQ - TR; + TD = Ta - Td; + } + Rp[0] = T7 + Te; + Rm[0] = To + Tv; + { + E Tg, Tw, Tf, Th; + Tg = T7 - Te; + Tw = To - Tv; + Tf = W[6]; + Th = W[7]; + Rp[WS(rs, 2)] = FNMS(Th, Tw, Tf * Tg); + Rm[WS(rs, 2)] = FMA(Th, Tg, Tf * Tw); + } + { + E TG, TI, TF, TH; + TG = Ty + Tz; + TI = TD + TC; + TF = W[2]; + TH = W[3]; + Rp[WS(rs, 1)] = FNMS(TH, TI, TF * TG); + Rm[WS(rs, 1)] = FMA(TF, TI, TH * TG); + } + { + E TA, TE, Tx, TB; + TA = Ty - Tz; + TE = TC - TD; + Tx = W[10]; + TB = W[11]; + Rp[WS(rs, 3)] = FNMS(TB, TE, Tx * TA); + Rm[WS(rs, 3)] = FMA(Tx, TE, TB * TA); + } + { + E T1a, T1g, T1e, T1i, T19, T1d; + T19 = KP707106781 * (TZ + T10); + T1a = T18 - T19; + T1g = T18 + T19; + T1d = KP707106781 * (TP - TS); + T1e = T1c + T1d; + T1i = T1c - T1d; + { + E T17, T1b, T1f, T1h; + T17 = W[4]; + T1b = W[5]; + Ip[WS(rs, 1)] = FNMS(T1b, T1e, T17 * T1a); + Im[WS(rs, 1)] = FMA(T17, T1e, T1b * T1a); + T1f = W[12]; + T1h = W[13]; + Ip[WS(rs, 3)] = FNMS(T1h, T1i, T1f * T1g); + Im[WS(rs, 3)] = FMA(T1f, T1i, T1h * T1g); + } + } + { + E TU, T14, T12, T16, TT, T11; + TT = KP707106781 * (TP + TS); + TU = TM - TT; + T14 = TM + TT; + T11 = KP707106781 * (TZ - T10); + T12 = TY - T11; + T16 = TY + T11; + { + E TJ, TV, T13, T15; + TJ = W[8]; + TV = W[9]; + Ip[WS(rs, 2)] = FNMS(TV, T12, TJ * TU); + Im[WS(rs, 2)] = FMA(TV, TU, TJ * T12); + T13 = W[0]; + T15 = W[1]; + Ip[0] = FNMS(T15, T16, T13 * T14); + Im[0] = FMA(T15, T14, T13 * T16); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 8}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 8, "hc2cb_8", twinstr, &GENUS, {52, 18, 14, 0} }; + +void X(codelet_hc2cb_8) (planner *p) { + X(khc2c_register) (p, hc2cb_8, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft2_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft2_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,880 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:46 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 16 -dif -name hc2cbdft2_16 -include hc2cb.h */ + +/* + * This function contains 206 FP additions, 100 FP multiplications, + * (or, 136 additions, 30 multiplications, 70 fused multiply/add), + * 97 stack variables, 3 constants, and 64 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft2_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 30, MAKE_VOLATILE_STRIDE(64, rs)) { + E T3w, T3z, T2Y, T3D, T3x, T3m, T3u, T3C, T3y, T3o, T3k, T3E, T3A; + { + E T20, Tf, T3Q, T32, T3V, T3f, T2a, TN, T2f, T1m, T3G, T2G, T3L, T2T, T26; + E T1F, T3M, T2N, T3H, T2W, T25, Tu, T1n, T1o, T3R, T3i, T2g, T1a, T21, T1y; + E T3W, T39; + { + E T2R, T1B, T2S, T1E; + { + E T1e, T3, T1C, TA, Tx, T6, T1D, T1h, Td, T1A, TL, T1k, Ta, TC, TF; + E T1z; + { + E T4, T5, T1f, T1g; + { + E T1, T2, Ty, Tz; + T1 = Rp[0]; + T2 = Rm[WS(rs, 7)]; + Ty = Ip[0]; + Tz = Im[WS(rs, 7)]; + T4 = Rp[WS(rs, 4)]; + T1e = T1 - T2; + T3 = T1 + T2; + T1C = Ty - Tz; + TA = Ty + Tz; + T5 = Rm[WS(rs, 3)]; + } + T1f = Ip[WS(rs, 4)]; + T1g = Im[WS(rs, 3)]; + { + E Tb, Tc, TI, TJ; + Tb = Rm[WS(rs, 1)]; + Tx = T4 - T5; + T6 = T4 + T5; + T1D = T1f - T1g; + T1h = T1f + T1g; + Tc = Rp[WS(rs, 6)]; + TI = Im[WS(rs, 1)]; + TJ = Ip[WS(rs, 6)]; + { + E T8, TH, TK, T9, TD, TE; + T8 = Rp[WS(rs, 2)]; + Td = Tb + Tc; + TH = Tb - Tc; + T1A = TJ - TI; + TK = TI + TJ; + T9 = Rm[WS(rs, 5)]; + TD = Ip[WS(rs, 2)]; + TE = Im[WS(rs, 5)]; + TL = TH + TK; + T1k = TH - TK; + Ta = T8 + T9; + TC = T8 - T9; + TF = TD + TE; + T1z = TD - TE; + } + } + } + { + E T2E, TB, T1l, T1i, T3d, T3e, TM, T2F; + { + E T7, TG, Te, T30, T31, T1j; + T2E = T3 - T6; + T7 = T3 + T6; + T1j = TC - TF; + TG = TC + TF; + Te = Ta + Td; + T2R = Ta - Td; + TB = Tx + TA; + T30 = TA - Tx; + T31 = T1j - T1k; + T1l = T1j + T1k; + T1i = T1e - T1h; + T3d = T1e + T1h; + T20 = T7 - Te; + Tf = T7 + Te; + T3Q = FNMS(KP707106781, T31, T30); + T32 = FMA(KP707106781, T31, T30); + T3e = TG + TL; + TM = TG - TL; + } + T3V = FMA(KP707106781, T3e, T3d); + T3f = FNMS(KP707106781, T3e, T3d); + T2a = FNMS(KP707106781, TM, TB); + TN = FMA(KP707106781, TM, TB); + T2F = T1A - T1z; + T1B = T1z + T1A; + T2f = FNMS(KP707106781, T1l, T1i); + T1m = FMA(KP707106781, T1l, T1i); + T3G = T2E - T2F; + T2G = T2E + T2F; + T2S = T1C - T1D; + T1E = T1C + T1D; + } + } + { + E T34, TS, T2H, Tm, T1u, T2I, T33, TX, Tq, T14, Tp, T1v, T12, Tr, T15; + E T16; + { + E Tj, TT, Ti, T1s, TR, Tk, TU, TV; + { + E Tg, Th, TP, TQ; + Tg = Rp[WS(rs, 1)]; + T3L = T2S - T2R; + T2T = T2R + T2S; + T26 = T1E - T1B; + T1F = T1B + T1E; + Th = Rm[WS(rs, 6)]; + TP = Ip[WS(rs, 1)]; + TQ = Im[WS(rs, 6)]; + Tj = Rp[WS(rs, 5)]; + TT = Tg - Th; + Ti = Tg + Th; + T1s = TP - TQ; + TR = TP + TQ; + Tk = Rm[WS(rs, 2)]; + TU = Ip[WS(rs, 5)]; + TV = Im[WS(rs, 2)]; + } + { + E Tn, To, T10, T11; + Tn = Rm[0]; + { + E TO, Tl, T1t, TW; + TO = Tj - Tk; + Tl = Tj + Tk; + T1t = TU - TV; + TW = TU + TV; + T34 = TR - TO; + TS = TO + TR; + T2H = Ti - Tl; + Tm = Ti + Tl; + T1u = T1s + T1t; + T2I = T1s - T1t; + T33 = TT + TW; + TX = TT - TW; + To = Rp[WS(rs, 7)]; + } + T10 = Im[0]; + T11 = Ip[WS(rs, 7)]; + Tq = Rp[WS(rs, 3)]; + T14 = Tn - To; + Tp = Tn + To; + T1v = T11 - T10; + T12 = T10 + T11; + Tr = Rm[WS(rs, 4)]; + T15 = Ip[WS(rs, 3)]; + T16 = Im[WS(rs, 4)]; + } + } + { + E T13, T1x, T18, T35, T3g, T3h, T38, TY, T19; + { + E T2U, T2J, T37, Tt, T36, T2V, T2M, T2K, T2L; + T2U = T2H + T2I; + T2J = T2H - T2I; + { + E TZ, Ts, T1w, T17; + TZ = Tq - Tr; + Ts = Tq + Tr; + T1w = T15 - T16; + T17 = T15 + T16; + T37 = TZ + T12; + T13 = TZ - T12; + T2K = Tp - Ts; + Tt = Tp + Ts; + T1x = T1v + T1w; + T2L = T1v - T1w; + T36 = T14 + T17; + T18 = T14 - T17; + } + T2V = T2L - T2K; + T2M = T2K + T2L; + T3M = T2J - T2M; + T2N = T2J + T2M; + T3H = T2V - T2U; + T2W = T2U + T2V; + T35 = FMA(KP414213562, T34, T33); + T3g = FNMS(KP414213562, T33, T34); + T25 = Tm - Tt; + Tu = Tm + Tt; + T3h = FNMS(KP414213562, T36, T37); + T38 = FMA(KP414213562, T37, T36); + } + T1n = FNMS(KP414213562, TS, TX); + TY = FMA(KP414213562, TX, TS); + T19 = FNMS(KP414213562, T18, T13); + T1o = FMA(KP414213562, T13, T18); + T3R = T3h - T3g; + T3i = T3g + T3h; + T2g = T19 - TY; + T1a = TY + T19; + T21 = T1x - T1u; + T1y = T1u + T1x; + T3W = T35 + T38; + T39 = T35 - T38; + } + } + } + { + E T27, T22, T2c, T2u, T2x, T2h, T2s, T2A, T2w, T2B, T2v; + { + E T1K, Tv, T1G, T1N, T1Q, T1b, T2b, T1p, Tw, T1d; + T1K = Tf - Tu; + Tv = Tf + Tu; + T1G = T1y + T1F; + T1N = T1F - T1y; + T1Q = FNMS(KP923879532, T1a, TN); + T1b = FMA(KP923879532, T1a, TN); + T2b = T1n - T1o; + T1p = T1n + T1o; + Tw = W[0]; + T1d = W[1]; + { + E T1T, T1O, T1W, T1S, T1X, T1R; + { + E T1J, T1M, T1L, T1V, T1P, T1q; + T1T = FNMS(KP923879532, T1p, T1m); + T1q = FMA(KP923879532, T1p, T1m); + { + E T1c, T1I, T1H, T1r; + T1c = Tw * T1b; + T1J = W[14]; + T1H = Tw * T1q; + T1r = FMA(T1d, T1q, T1c); + T1M = W[15]; + T1L = T1J * T1K; + T1I = FNMS(T1d, T1b, T1H); + Rm[0] = Tv + T1r; + Rp[0] = Tv - T1r; + T1V = T1M * T1K; + Im[0] = T1I - T1G; + Ip[0] = T1G + T1I; + T1P = W[16]; + } + T1O = FNMS(T1M, T1N, T1L); + T1W = FMA(T1J, T1N, T1V); + T1S = W[17]; + T1X = T1P * T1T; + T1R = T1P * T1Q; + } + { + E T2r, T2n, T2q, T2p, T2z, T2t, T2o, T1Y, T1U; + T27 = T25 + T26; + T2r = T26 - T25; + T2o = T20 - T21; + T22 = T20 + T21; + T1Y = FNMS(T1S, T1Q, T1X); + T1U = FMA(T1S, T1T, T1R); + T2n = W[22]; + T2q = W[23]; + Im[WS(rs, 4)] = T1Y - T1W; + Ip[WS(rs, 4)] = T1W + T1Y; + Rm[WS(rs, 4)] = T1O + T1U; + Rp[WS(rs, 4)] = T1O - T1U; + T2p = T2n * T2o; + T2z = T2q * T2o; + T2c = FMA(KP923879532, T2b, T2a); + T2u = FNMS(KP923879532, T2b, T2a); + T2x = FNMS(KP923879532, T2g, T2f); + T2h = FMA(KP923879532, T2g, T2f); + T2t = W[24]; + T2s = FNMS(T2q, T2r, T2p); + T2A = FMA(T2n, T2r, T2z); + T2w = W[25]; + T2B = T2t * T2x; + T2v = T2t * T2u; + } + } + } + { + E T28, T2k, T2e, T2l, T2d; + { + E T1Z, T24, T23, T2j, T29, T2C, T2y; + T2C = FNMS(T2w, T2u, T2B); + T2y = FMA(T2w, T2x, T2v); + T1Z = W[6]; + T24 = W[7]; + Im[WS(rs, 6)] = T2C - T2A; + Ip[WS(rs, 6)] = T2A + T2C; + Rm[WS(rs, 6)] = T2s + T2y; + Rp[WS(rs, 6)] = T2s - T2y; + T23 = T1Z * T22; + T2j = T24 * T22; + T29 = W[8]; + T28 = FNMS(T24, T27, T23); + T2k = FMA(T1Z, T27, T2j); + T2e = W[9]; + T2l = T29 * T2h; + T2d = T29 * T2c; + } + { + E T4a, T4d, T3O, T4h, T4b, T40, T48, T4g, T4c, T42, T3Y; + { + E T3N, T47, T43, T46, T3F, T45, T4f, T3K, T3J, T3S, T3X, T3Z, T49, T41, T3T; + E T3U; + { + E T44, T3I, T2m, T2i, T3P; + T44 = FNMS(KP707106781, T3H, T3G); + T3I = FMA(KP707106781, T3H, T3G); + T2m = FNMS(T2e, T2c, T2l); + T2i = FMA(T2e, T2h, T2d); + T3N = FMA(KP707106781, T3M, T3L); + T47 = FNMS(KP707106781, T3M, T3L); + Im[WS(rs, 2)] = T2m - T2k; + Ip[WS(rs, 2)] = T2k + T2m; + Rm[WS(rs, 2)] = T28 + T2i; + Rp[WS(rs, 2)] = T28 - T2i; + T43 = W[26]; + T46 = W[27]; + T3F = W[10]; + T45 = T43 * T44; + T4f = T46 * T44; + T3K = W[11]; + T3J = T3F * T3I; + T4a = FNMS(KP923879532, T3R, T3Q); + T3S = FMA(KP923879532, T3R, T3Q); + T3X = FNMS(KP923879532, T3W, T3V); + T4d = FMA(KP923879532, T3W, T3V); + T3Z = T3K * T3I; + T3P = W[12]; + T49 = W[28]; + T41 = T3P * T3X; + T3T = T3P * T3S; + } + T3O = FNMS(T3K, T3N, T3J); + T4h = T49 * T4d; + T4b = T49 * T4a; + T40 = FMA(T3F, T3N, T3Z); + T3U = W[13]; + T48 = FNMS(T46, T47, T45); + T4g = FMA(T43, T47, T4f); + T4c = W[29]; + T42 = FNMS(T3U, T3S, T41); + T3Y = FMA(T3U, T3X, T3T); + } + { + E T3t, T2X, T3p, T3s, T2D, T3r, T3B, T2Q, T2P, T3a, T3j, T3l, T3v, T3n, T3b; + E T3c; + { + E T2O, T3q, T4i, T4e, T2Z; + T4i = FNMS(T4c, T4a, T4h); + T4e = FMA(T4c, T4d, T4b); + Im[WS(rs, 3)] = T42 - T40; + Ip[WS(rs, 3)] = T40 + T42; + Rm[WS(rs, 3)] = T3O + T3Y; + Rp[WS(rs, 3)] = T3O - T3Y; + Im[WS(rs, 7)] = T4i - T4g; + Ip[WS(rs, 7)] = T4g + T4i; + Rm[WS(rs, 7)] = T48 + T4e; + Rp[WS(rs, 7)] = T48 - T4e; + T3t = FNMS(KP707106781, T2W, T2T); + T2X = FMA(KP707106781, T2W, T2T); + T2O = FMA(KP707106781, T2N, T2G); + T3q = FNMS(KP707106781, T2N, T2G); + T3p = W[18]; + T3s = W[19]; + T2D = W[2]; + T3r = T3p * T3q; + T3B = T3s * T3q; + T2Q = W[3]; + T2P = T2D * T2O; + T3a = FMA(KP923879532, T39, T32); + T3w = FNMS(KP923879532, T39, T32); + T3z = FMA(KP923879532, T3i, T3f); + T3j = FNMS(KP923879532, T3i, T3f); + T3l = T2Q * T2O; + T2Z = W[4]; + T3v = W[20]; + T3n = T2Z * T3j; + T3b = T2Z * T3a; + } + T2Y = FNMS(T2Q, T2X, T2P); + T3D = T3v * T3z; + T3x = T3v * T3w; + T3m = FMA(T2D, T2X, T3l); + T3c = W[5]; + T3u = FNMS(T3s, T3t, T3r); + T3C = FMA(T3p, T3t, T3B); + T3y = W[21]; + T3o = FNMS(T3c, T3a, T3n); + T3k = FMA(T3c, T3j, T3b); + } + } + } + } + } + T3E = FNMS(T3y, T3w, T3D); + T3A = FMA(T3y, T3z, T3x); + Im[WS(rs, 1)] = T3o - T3m; + Ip[WS(rs, 1)] = T3m + T3o; + Rm[WS(rs, 1)] = T2Y + T3k; + Rp[WS(rs, 1)] = T2Y - T3k; + Im[WS(rs, 5)] = T3E - T3C; + Ip[WS(rs, 5)] = T3C + T3E; + Rm[WS(rs, 5)] = T3u + T3A; + Rp[WS(rs, 5)] = T3u - T3A; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 16}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 16, "hc2cbdft2_16", twinstr, &GENUS, {136, 30, 70, 0} }; + +void X(codelet_hc2cbdft2_16) (planner *p) { + X(khc2c_register) (p, hc2cbdft2_16, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 16 -dif -name hc2cbdft2_16 -include hc2cb.h */ + +/* + * This function contains 206 FP additions, 84 FP multiplications, + * (or, 168 additions, 46 multiplications, 38 fused multiply/add), + * 60 stack variables, 3 constants, and 64 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft2_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 30, MAKE_VOLATILE_STRIDE(64, rs)) { + E TB, T2L, T30, T1n, Tf, T1U, T2H, T3p, T1E, T1Z, TM, T31, T2s, T3k, T1i; + E T2M, Tu, T1Y, T2Q, T2X, T2T, T2Y, TY, T1d, T19, T1e, T2v, T2C, T2y, T2D; + E T1x, T1V; + { + E T3, T1j, TA, T1B, T6, Tx, T1m, T1C, Ta, TC, TF, T1y, Td, TH, TK; + E T1z; + { + E T1, T2, Ty, Tz; + T1 = Rp[0]; + T2 = Rm[WS(rs, 7)]; + T3 = T1 + T2; + T1j = T1 - T2; + Ty = Ip[0]; + Tz = Im[WS(rs, 7)]; + TA = Ty + Tz; + T1B = Ty - Tz; + } + { + E T4, T5, T1k, T1l; + T4 = Rp[WS(rs, 4)]; + T5 = Rm[WS(rs, 3)]; + T6 = T4 + T5; + Tx = T4 - T5; + T1k = Ip[WS(rs, 4)]; + T1l = Im[WS(rs, 3)]; + T1m = T1k + T1l; + T1C = T1k - T1l; + } + { + E T8, T9, TD, TE; + T8 = Rp[WS(rs, 2)]; + T9 = Rm[WS(rs, 5)]; + Ta = T8 + T9; + TC = T8 - T9; + TD = Ip[WS(rs, 2)]; + TE = Im[WS(rs, 5)]; + TF = TD + TE; + T1y = TD - TE; + } + { + E Tb, Tc, TI, TJ; + Tb = Rm[WS(rs, 1)]; + Tc = Rp[WS(rs, 6)]; + Td = Tb + Tc; + TH = Tb - Tc; + TI = Im[WS(rs, 1)]; + TJ = Ip[WS(rs, 6)]; + TK = TI + TJ; + T1z = TJ - TI; + } + { + E T7, Te, TG, TL; + TB = Tx + TA; + T2L = TA - Tx; + T30 = T1j + T1m; + T1n = T1j - T1m; + T7 = T3 + T6; + Te = Ta + Td; + Tf = T7 + Te; + T1U = T7 - Te; + { + E T2F, T2G, T1A, T1D; + T2F = Ta - Td; + T2G = T1B - T1C; + T2H = T2F + T2G; + T3p = T2G - T2F; + T1A = T1y + T1z; + T1D = T1B + T1C; + T1E = T1A + T1D; + T1Z = T1D - T1A; + } + TG = TC + TF; + TL = TH + TK; + TM = KP707106781 * (TG - TL); + T31 = KP707106781 * (TG + TL); + { + E T2q, T2r, T1g, T1h; + T2q = T3 - T6; + T2r = T1z - T1y; + T2s = T2q + T2r; + T3k = T2q - T2r; + T1g = TC - TF; + T1h = TH - TK; + T1i = KP707106781 * (T1g + T1h); + T2M = KP707106781 * (T1g - T1h); + } + } + } + { + E Ti, TT, TR, T1r, Tl, TO, TW, T1s, Tp, T14, T12, T1u, Ts, TZ, T17; + E T1v; + { + E Tg, Th, TP, TQ; + Tg = Rp[WS(rs, 1)]; + Th = Rm[WS(rs, 6)]; + Ti = Tg + Th; + TT = Tg - Th; + TP = Ip[WS(rs, 1)]; + TQ = Im[WS(rs, 6)]; + TR = TP + TQ; + T1r = TP - TQ; + } + { + E Tj, Tk, TU, TV; + Tj = Rp[WS(rs, 5)]; + Tk = Rm[WS(rs, 2)]; + Tl = Tj + Tk; + TO = Tj - Tk; + TU = Ip[WS(rs, 5)]; + TV = Im[WS(rs, 2)]; + TW = TU + TV; + T1s = TU - TV; + } + { + E Tn, To, T10, T11; + Tn = Rm[0]; + To = Rp[WS(rs, 7)]; + Tp = Tn + To; + T14 = Tn - To; + T10 = Im[0]; + T11 = Ip[WS(rs, 7)]; + T12 = T10 + T11; + T1u = T11 - T10; + } + { + E Tq, Tr, T15, T16; + Tq = Rp[WS(rs, 3)]; + Tr = Rm[WS(rs, 4)]; + Ts = Tq + Tr; + TZ = Tq - Tr; + T15 = Ip[WS(rs, 3)]; + T16 = Im[WS(rs, 4)]; + T17 = T15 + T16; + T1v = T15 - T16; + } + { + E Tm, Tt, T2O, T2P; + Tm = Ti + Tl; + Tt = Tp + Ts; + Tu = Tm + Tt; + T1Y = Tm - Tt; + T2O = TR - TO; + T2P = TT + TW; + T2Q = FMA(KP382683432, T2O, KP923879532 * T2P); + T2X = FNMS(KP923879532, T2O, KP382683432 * T2P); + } + { + E T2R, T2S, TS, TX; + T2R = TZ + T12; + T2S = T14 + T17; + T2T = FMA(KP382683432, T2R, KP923879532 * T2S); + T2Y = FNMS(KP923879532, T2R, KP382683432 * T2S); + TS = TO + TR; + TX = TT - TW; + TY = FMA(KP923879532, TS, KP382683432 * TX); + T1d = FNMS(KP382683432, TS, KP923879532 * TX); + } + { + E T13, T18, T2t, T2u; + T13 = TZ - T12; + T18 = T14 - T17; + T19 = FNMS(KP382683432, T18, KP923879532 * T13); + T1e = FMA(KP382683432, T13, KP923879532 * T18); + T2t = Ti - Tl; + T2u = T1r - T1s; + T2v = T2t - T2u; + T2C = T2t + T2u; + } + { + E T2w, T2x, T1t, T1w; + T2w = Tp - Ts; + T2x = T1u - T1v; + T2y = T2w + T2x; + T2D = T2x - T2w; + T1t = T1r + T1s; + T1w = T1u + T1v; + T1x = T1t + T1w; + T1V = T1w - T1t; + } + } + { + E Tv, T1F, T1b, T1N, T1p, T1P, T1L, T1R; + Tv = Tf + Tu; + T1F = T1x + T1E; + { + E TN, T1a, T1f, T1o; + TN = TB + TM; + T1a = TY + T19; + T1b = TN + T1a; + T1N = TN - T1a; + T1f = T1d + T1e; + T1o = T1i + T1n; + T1p = T1f + T1o; + T1P = T1o - T1f; + { + E T1I, T1K, T1H, T1J; + T1I = Tf - Tu; + T1K = T1E - T1x; + T1H = W[14]; + T1J = W[15]; + T1L = FNMS(T1J, T1K, T1H * T1I); + T1R = FMA(T1J, T1I, T1H * T1K); + } + } + { + E T1q, T1G, Tw, T1c; + Tw = W[0]; + T1c = W[1]; + T1q = FMA(Tw, T1b, T1c * T1p); + T1G = FNMS(T1c, T1b, Tw * T1p); + Rp[0] = Tv - T1q; + Ip[0] = T1F + T1G; + Rm[0] = Tv + T1q; + Im[0] = T1G - T1F; + } + { + E T1Q, T1S, T1M, T1O; + T1M = W[16]; + T1O = W[17]; + T1Q = FMA(T1M, T1N, T1O * T1P); + T1S = FNMS(T1O, T1N, T1M * T1P); + Rp[WS(rs, 4)] = T1L - T1Q; + Ip[WS(rs, 4)] = T1R + T1S; + Rm[WS(rs, 4)] = T1L + T1Q; + Im[WS(rs, 4)] = T1S - T1R; + } + } + { + E T25, T2j, T29, T2l, T21, T2b, T2h, T2n; + { + E T23, T24, T27, T28; + T23 = TB - TM; + T24 = T1d - T1e; + T25 = T23 + T24; + T2j = T23 - T24; + T27 = T19 - TY; + T28 = T1n - T1i; + T29 = T27 + T28; + T2l = T28 - T27; + } + { + E T1W, T20, T1T, T1X; + T1W = T1U + T1V; + T20 = T1Y + T1Z; + T1T = W[6]; + T1X = W[7]; + T21 = FNMS(T1X, T20, T1T * T1W); + T2b = FMA(T1X, T1W, T1T * T20); + } + { + E T2e, T2g, T2d, T2f; + T2e = T1U - T1V; + T2g = T1Z - T1Y; + T2d = W[22]; + T2f = W[23]; + T2h = FNMS(T2f, T2g, T2d * T2e); + T2n = FMA(T2f, T2e, T2d * T2g); + } + { + E T2a, T2c, T22, T26; + T22 = W[8]; + T26 = W[9]; + T2a = FMA(T22, T25, T26 * T29); + T2c = FNMS(T26, T25, T22 * T29); + Rp[WS(rs, 2)] = T21 - T2a; + Ip[WS(rs, 2)] = T2b + T2c; + Rm[WS(rs, 2)] = T21 + T2a; + Im[WS(rs, 2)] = T2c - T2b; + } + { + E T2m, T2o, T2i, T2k; + T2i = W[24]; + T2k = W[25]; + T2m = FMA(T2i, T2j, T2k * T2l); + T2o = FNMS(T2k, T2j, T2i * T2l); + Rp[WS(rs, 6)] = T2h - T2m; + Ip[WS(rs, 6)] = T2n + T2o; + Rm[WS(rs, 6)] = T2h + T2m; + Im[WS(rs, 6)] = T2o - T2n; + } + } + { + E T2A, T38, T2I, T3a, T2V, T3d, T33, T3f, T2z, T2E; + T2z = KP707106781 * (T2v + T2y); + T2A = T2s + T2z; + T38 = T2s - T2z; + T2E = KP707106781 * (T2C + T2D); + T2I = T2E + T2H; + T3a = T2H - T2E; + { + E T2N, T2U, T2Z, T32; + T2N = T2L + T2M; + T2U = T2Q - T2T; + T2V = T2N + T2U; + T3d = T2N - T2U; + T2Z = T2X + T2Y; + T32 = T30 - T31; + T33 = T2Z + T32; + T3f = T32 - T2Z; + } + { + E T2J, T35, T34, T36; + { + E T2p, T2B, T2K, T2W; + T2p = W[2]; + T2B = W[3]; + T2J = FNMS(T2B, T2I, T2p * T2A); + T35 = FMA(T2B, T2A, T2p * T2I); + T2K = W[4]; + T2W = W[5]; + T34 = FMA(T2K, T2V, T2W * T33); + T36 = FNMS(T2W, T2V, T2K * T33); + } + Rp[WS(rs, 1)] = T2J - T34; + Ip[WS(rs, 1)] = T35 + T36; + Rm[WS(rs, 1)] = T2J + T34; + Im[WS(rs, 1)] = T36 - T35; + } + { + E T3b, T3h, T3g, T3i; + { + E T37, T39, T3c, T3e; + T37 = W[18]; + T39 = W[19]; + T3b = FNMS(T39, T3a, T37 * T38); + T3h = FMA(T39, T38, T37 * T3a); + T3c = W[20]; + T3e = W[21]; + T3g = FMA(T3c, T3d, T3e * T3f); + T3i = FNMS(T3e, T3d, T3c * T3f); + } + Rp[WS(rs, 5)] = T3b - T3g; + Ip[WS(rs, 5)] = T3h + T3i; + Rm[WS(rs, 5)] = T3b + T3g; + Im[WS(rs, 5)] = T3i - T3h; + } + } + { + E T3m, T3E, T3q, T3G, T3v, T3J, T3z, T3L, T3l, T3o; + T3l = KP707106781 * (T2D - T2C); + T3m = T3k + T3l; + T3E = T3k - T3l; + T3o = KP707106781 * (T2v - T2y); + T3q = T3o + T3p; + T3G = T3p - T3o; + { + E T3t, T3u, T3x, T3y; + T3t = T2L - T2M; + T3u = T2X - T2Y; + T3v = T3t + T3u; + T3J = T3t - T3u; + T3x = T31 + T30; + T3y = T2Q + T2T; + T3z = T3x - T3y; + T3L = T3y + T3x; + } + { + E T3r, T3B, T3A, T3C; + { + E T3j, T3n, T3s, T3w; + T3j = W[10]; + T3n = W[11]; + T3r = FNMS(T3n, T3q, T3j * T3m); + T3B = FMA(T3n, T3m, T3j * T3q); + T3s = W[12]; + T3w = W[13]; + T3A = FMA(T3s, T3v, T3w * T3z); + T3C = FNMS(T3w, T3v, T3s * T3z); + } + Rp[WS(rs, 3)] = T3r - T3A; + Ip[WS(rs, 3)] = T3B + T3C; + Rm[WS(rs, 3)] = T3r + T3A; + Im[WS(rs, 3)] = T3C - T3B; + } + { + E T3H, T3N, T3M, T3O; + { + E T3D, T3F, T3I, T3K; + T3D = W[26]; + T3F = W[27]; + T3H = FNMS(T3F, T3G, T3D * T3E); + T3N = FMA(T3F, T3E, T3D * T3G); + T3I = W[28]; + T3K = W[29]; + T3M = FMA(T3I, T3J, T3K * T3L); + T3O = FNMS(T3K, T3J, T3I * T3L); + } + Rp[WS(rs, 7)] = T3H - T3M; + Ip[WS(rs, 7)] = T3N + T3O; + Rm[WS(rs, 7)] = T3H + T3M; + Im[WS(rs, 7)] = T3O - T3N; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 16}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 16, "hc2cbdft2_16", twinstr, &GENUS, {168, 46, 38, 0} }; + +void X(codelet_hc2cbdft2_16) (planner *p) { + X(khc2c_register) (p, hc2cbdft2_16, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft2_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft2_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1135 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:47 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 20 -dif -name hc2cbdft2_20 -include hc2cb.h */ + +/* + * This function contains 286 FP additions, 148 FP multiplications, + * (or, 176 additions, 38 multiplications, 110 fused multiply/add), + * 122 stack variables, 4 constants, and 80 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft2_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 38, MAKE_VOLATILE_STRIDE(80, rs)) { + E T5s, T5v, T5t, T5z, T5q, T5y, T5u, T5A, T5w; + { + E T3T, T27, T2o, T41, T2p, T40, TU, T15, T2Q, T1N, T2L, T1w, T59, T4n, T5e; + E T4A, T2m, T24, T2Z, T2h, T4J, T3P, T3Y, T3W, T2d, TJ, T3H, T2c, TD, T52; + E T3G, T1E, T4f, T5I, T4e, T4w, T5L, T4v, T1J, T1H; + { + E T1A, T3, T25, TI, TF, T6, T26, T1D, TO, T47, T3z, Te, T1S, T3M, T1e; + E T4k, TZ, T4a, T3C, Tt, T1Z, T3J, T1p, T4h, T14, T4b, T3D, TA, T22, T3K; + E T1u, T4i, Ti, T1f, Th, T1T, TS, Tj, T1g, T1h; + { + E T4, T5, T1B, T1C; + { + E T1, T2, TG, TH; + T1 = Rp[0]; + T2 = Rm[WS(rs, 9)]; + TG = Ip[0]; + TH = Im[WS(rs, 9)]; + T4 = Rp[WS(rs, 5)]; + T1A = T1 - T2; + T3 = T1 + T2; + T25 = TG - TH; + TI = TG + TH; + T5 = Rm[WS(rs, 4)]; + T1B = Ip[WS(rs, 5)]; + T1C = Im[WS(rs, 4)]; + } + { + E Tq, T1l, Tp, T1X, TY, Tr, T1m, T1n; + { + E Tb, T1a, Ta, T1Q, TN, Tc, T1b, T1c; + { + E T8, T9, TL, TM; + T8 = Rp[WS(rs, 4)]; + TF = T4 - T5; + T6 = T4 + T5; + T26 = T1B - T1C; + T1D = T1B + T1C; + T9 = Rm[WS(rs, 5)]; + TL = Ip[WS(rs, 4)]; + TM = Im[WS(rs, 5)]; + Tb = Rp[WS(rs, 9)]; + T1a = T8 - T9; + Ta = T8 + T9; + T1Q = TL - TM; + TN = TL + TM; + Tc = Rm[0]; + T1b = Ip[WS(rs, 9)]; + T1c = Im[0]; + } + { + E Tn, To, TW, TX; + Tn = Rp[WS(rs, 8)]; + { + E TK, Td, T1R, T1d; + TK = Tb - Tc; + Td = Tb + Tc; + T1R = T1b - T1c; + T1d = T1b + T1c; + TO = TK + TN; + T47 = TN - TK; + T3z = Ta - Td; + Te = Ta + Td; + T1S = T1Q + T1R; + T3M = T1Q - T1R; + T1e = T1a - T1d; + T4k = T1a + T1d; + To = Rm[WS(rs, 1)]; + } + TW = Ip[WS(rs, 8)]; + TX = Im[WS(rs, 1)]; + Tq = Rm[WS(rs, 6)]; + T1l = Tn - To; + Tp = Tn + To; + T1X = TW - TX; + TY = TW + TX; + Tr = Rp[WS(rs, 3)]; + T1m = Im[WS(rs, 6)]; + T1n = Ip[WS(rs, 3)]; + } + } + { + E Tx, T1q, Tw, T20, T13, Ty, T1r, T1s; + { + E Tu, Tv, T11, T12; + Tu = Rm[WS(rs, 7)]; + { + E TV, Ts, T1Y, T1o; + TV = Tq - Tr; + Ts = Tq + Tr; + T1Y = T1n - T1m; + T1o = T1m + T1n; + TZ = TV + TY; + T4a = TY - TV; + T3C = Tp - Ts; + Tt = Tp + Ts; + T1Z = T1X + T1Y; + T3J = T1X - T1Y; + T1p = T1l + T1o; + T4h = T1l - T1o; + Tv = Rp[WS(rs, 2)]; + } + T11 = Im[WS(rs, 7)]; + T12 = Ip[WS(rs, 2)]; + Tx = Rm[WS(rs, 2)]; + T1q = Tu - Tv; + Tw = Tu + Tv; + T20 = T12 - T11; + T13 = T11 + T12; + Ty = Rp[WS(rs, 7)]; + T1r = Im[WS(rs, 2)]; + T1s = Ip[WS(rs, 7)]; + } + { + E Tf, Tg, TQ, TR; + Tf = Rm[WS(rs, 3)]; + { + E T10, Tz, T21, T1t; + T10 = Tx - Ty; + Tz = Tx + Ty; + T21 = T1s - T1r; + T1t = T1r + T1s; + T14 = T10 - T13; + T4b = T10 + T13; + T3D = Tw - Tz; + TA = Tw + Tz; + T22 = T20 + T21; + T3K = T20 - T21; + T1u = T1q + T1t; + T4i = T1q - T1t; + Tg = Rp[WS(rs, 6)]; + } + TQ = Im[WS(rs, 3)]; + TR = Ip[WS(rs, 6)]; + Ti = Rp[WS(rs, 1)]; + T1f = Tf - Tg; + Th = Tf + Tg; + T1T = TR - TQ; + TS = TQ + TR; + Tj = Rm[WS(rs, 8)]; + T1g = Ip[WS(rs, 1)]; + T1h = Im[WS(rs, 8)]; + } + } + } + } + { + E T1V, T3N, TB, T3B, Tm, T3E, T1F, T1G, T4t, T4j, T4m, T4s, T4c, T4y, T4z; + E T49, T3y, T7; + { + E TT, T48, T1j, T4l, T3A, Tl; + T3T = T25 - T26; + T27 = T25 + T26; + { + E TP, Tk, T1U, T1i; + TP = Ti - Tj; + Tk = Ti + Tj; + T1U = T1g - T1h; + T1i = T1g + T1h; + TT = TP - TS; + T48 = TP + TS; + T3A = Th - Tk; + Tl = Th + Tk; + T1V = T1T + T1U; + T3N = T1T - T1U; + T1j = T1f - T1i; + T4l = T1f + T1i; + T2o = Tt - TA; + TB = Tt + TA; + } + T41 = T3z - T3A; + T3B = T3z + T3A; + Tm = Te + Tl; + T2p = Te - Tl; + { + E T1L, T1M, T1k, T1v; + T40 = T3C - T3D; + T3E = T3C + T3D; + TU = TO + TT; + T1L = TO - TT; + T1M = TZ - T14; + T15 = TZ + T14; + T1F = T1e + T1j; + T1k = T1e - T1j; + T1v = T1p - T1u; + T1G = T1p + T1u; + T4t = T4h + T4i; + T4j = T4h - T4i; + T2Q = FNMS(KP618033988, T1L, T1M); + T1N = FMA(KP618033988, T1M, T1L); + T2L = FNMS(KP618033988, T1k, T1v); + T1w = FMA(KP618033988, T1v, T1k); + T4m = T4k - T4l; + T4s = T4k + T4l; + T4c = T4a - T4b; + T4y = T4a + T4b; + T4z = T47 + T48; + T49 = T47 - T48; + } + } + { + E T2g, T1W, T23, T2f; + T2g = T1S - T1V; + T1W = T1S + T1V; + T59 = FMA(KP618033988, T4j, T4m); + T4n = FNMS(KP618033988, T4m, T4j); + T5e = FMA(KP618033988, T4y, T4z); + T4A = FNMS(KP618033988, T4z, T4y); + T23 = T1Z + T22; + T2f = T1Z - T22; + { + E T3V, T3L, T3O, T3U; + T3V = T3J + T3K; + T3L = T3J - T3K; + T2m = T1W - T23; + T24 = T1W + T23; + T2Z = FMA(KP618033988, T2f, T2g); + T2h = FNMS(KP618033988, T2g, T2f); + T3O = T3M - T3N; + T3U = T3M + T3N; + T3y = T3 - T6; + T7 = T3 + T6; + T4J = FMA(KP618033988, T3L, T3O); + T3P = FNMS(KP618033988, T3O, T3L); + T3Y = T3U - T3V; + T3W = T3U + T3V; + } + } + { + E T46, TC, T3F, T4r, T4d, T4u; + TC = Tm + TB; + T2d = Tm - TB; + TJ = TF + TI; + T46 = TI - TF; + T3H = T3B - T3E; + T3F = T3B + T3E; + T2c = FNMS(KP250000000, TC, T7); + TD = T7 + TC; + T52 = T3y + T3F; + T3G = FNMS(KP250000000, T3F, T3y); + T4r = T1A + T1D; + T1E = T1A - T1D; + T4f = T49 - T4c; + T4d = T49 + T4c; + T5I = T46 + T4d; + T4e = FNMS(KP250000000, T4d, T46); + T4w = T4s - T4t; + T4u = T4s + T4t; + T5L = T4u + T4r; + T4v = FNMS(KP250000000, T4u, T4r); + T1J = T1F - T1G; + T1H = T1F + T1G; + } + } + } + { + E T38, T3b, T39, T3f, T36, T3e, T3a; + { + E T28, T3r, T3o, T3v, T3p, T2b, T2k, T35, T3l, T2H, T2r, T2j, T2z, T2D, T2G; + E T2X, T2F, T2T, T32, T3h, T3k, T31, T3d, T3j, T3t, T1x, T2u, T1O, T2x, T2v; + E T1y, T2B, T29, T2J, T2M, T2R, T2N, T2V; + { + E T2l, T1I, T18, T2q, T34, T17, T16, T3n; + T28 = T24 + T27; + T2l = FNMS(KP250000000, T24, T27); + T3r = T1H + T1E; + T1I = FNMS(KP250000000, T1H, T1E); + T18 = TU - T15; + T16 = TU + T15; + T3n = W[8]; + T2q = FNMS(KP618033988, T2p, T2o); + T34 = FMA(KP618033988, T2o, T2p); + T17 = FNMS(KP250000000, T16, TJ); + T3o = TJ + T16; + T3v = T3n * T3r; + T3p = T3n * T3o; + { + E T2Y, T2E, T3i, T30; + { + E T2e, T33, T2n, T2i; + T2Y = FMA(KP559016994, T2d, T2c); + T2e = FNMS(KP559016994, T2d, T2c); + T2b = W[14]; + T2k = W[15]; + T33 = FMA(KP559016994, T2m, T2l); + T2n = FNMS(KP559016994, T2m, T2l); + T2E = FMA(KP951056516, T2h, T2e); + T2i = FNMS(KP951056516, T2h, T2e); + T35 = FMA(KP951056516, T34, T33); + T3l = FNMS(KP951056516, T34, T33); + T2H = FNMS(KP951056516, T2q, T2n); + T2r = FMA(KP951056516, T2q, T2n); + T2j = T2b * T2i; + T2z = T2k * T2i; + T2D = W[22]; + T2G = W[23]; + } + T2X = W[30]; + T2F = T2D * T2E; + T2T = T2G * T2E; + T3i = FMA(KP951056516, T2Z, T2Y); + T30 = FNMS(KP951056516, T2Z, T2Y); + T32 = W[31]; + T3h = W[6]; + T3k = W[7]; + T31 = T2X * T30; + T3d = T32 * T30; + T3j = T3h * T3i; + T3t = T3k * T3i; + } + { + E T2K, T2P, TE, T19, T1K, T2t, T37; + T2K = FNMS(KP559016994, T18, T17); + T19 = FMA(KP559016994, T18, T17); + T1K = FMA(KP559016994, T1J, T1I); + T2P = FNMS(KP559016994, T1J, T1I); + TE = W[0]; + T2t = W[16]; + T1x = FMA(KP951056516, T1w, T19); + T2u = FNMS(KP951056516, T1w, T19); + T1O = FNMS(KP951056516, T1N, T1K); + T2x = FMA(KP951056516, T1N, T1K); + T2v = T2t * T2u; + T1y = TE * T1x; + T2B = T2t * T2x; + T29 = TE * T1O; + T2J = W[24]; + T37 = W[32]; + T2M = FMA(KP951056516, T2L, T2K); + T38 = FNMS(KP951056516, T2L, T2K); + T2R = FNMS(KP951056516, T2Q, T2P); + T3b = FMA(KP951056516, T2Q, T2P); + T39 = T37 * T38; + T2N = T2J * T2M; + T3f = T37 * T3b; + } + } + T2V = T2J * T2R; + { + E T3m, T3u, T3q, T2a, T1P, T1z; + T1z = W[1]; + T3m = FNMS(T3k, T3l, T3j); + T3u = FMA(T3h, T3l, T3t); + T3q = W[9]; + T2a = FNMS(T1z, T1x, T29); + T1P = FMA(T1z, T1O, T1y); + { + E T2s, T2A, T2w, T3w, T3s; + T2s = FNMS(T2k, T2r, T2j); + T3w = FNMS(T3q, T3o, T3v); + T3s = FMA(T3q, T3r, T3p); + Im[0] = T2a - T28; + Ip[0] = T28 + T2a; + Rm[0] = TD + T1P; + Rp[0] = TD - T1P; + Im[WS(rs, 2)] = T3w - T3u; + Ip[WS(rs, 2)] = T3u + T3w; + Rm[WS(rs, 2)] = T3m + T3s; + Rp[WS(rs, 2)] = T3m - T3s; + T2A = FMA(T2b, T2r, T2z); + T2w = W[17]; + { + E T2I, T2U, T2O, T2C, T2y, T2W, T2S; + T2I = FNMS(T2G, T2H, T2F); + T2U = FMA(T2D, T2H, T2T); + T2O = W[25]; + T2C = FNMS(T2w, T2u, T2B); + T2y = FMA(T2w, T2x, T2v); + T36 = FNMS(T32, T35, T31); + T2W = FNMS(T2O, T2M, T2V); + T2S = FMA(T2O, T2R, T2N); + Im[WS(rs, 4)] = T2C - T2A; + Ip[WS(rs, 4)] = T2A + T2C; + Rm[WS(rs, 4)] = T2s + T2y; + Rp[WS(rs, 4)] = T2s - T2y; + Im[WS(rs, 6)] = T2W - T2U; + Ip[WS(rs, 6)] = T2U + T2W; + Rm[WS(rs, 6)] = T2I + T2S; + Rp[WS(rs, 6)] = T2I - T2S; + T3e = FMA(T2X, T35, T3d); + T3a = W[33]; + } + } + } + } + { + E T55, T51, T54, T53, T5h, T5P, T5J, T3x, T4P, T5F, T5p, T43, T3R, T3S, T5l; + E T5o, T4D, T5n, T5x, T4H, T4M, T5B, T5E, T4L, T4X, T5D, T5N, T4S, T4o, T4V; + E T4B, T4T, T4p, T4Z, T4F, T57, T5a, T5f, T5b, T5j; + { + E T3X, T4O, T42, T3g, T3c, T5H; + T55 = T3W + T3T; + T3X = FNMS(KP250000000, T3W, T3T); + T51 = W[18]; + T3g = FNMS(T3a, T38, T3f); + T3c = FMA(T3a, T3b, T39); + T54 = W[19]; + T53 = T51 * T52; + Im[WS(rs, 8)] = T3g - T3e; + Ip[WS(rs, 8)] = T3e + T3g; + Rm[WS(rs, 8)] = T36 + T3c; + Rp[WS(rs, 8)] = T36 - T3c; + T5h = T54 * T52; + T5H = W[28]; + T4O = FMA(KP618033988, T40, T41); + T42 = FNMS(KP618033988, T41, T40); + T5P = T5H * T5L; + T5J = T5H * T5I; + { + E T4I, T5m, T3Q, T3I, T3Z, T4N, T4K, T5C; + T3I = FNMS(KP559016994, T3H, T3G); + T4I = FMA(KP559016994, T3H, T3G); + T3Z = FNMS(KP559016994, T3Y, T3X); + T4N = FMA(KP559016994, T3Y, T3X); + T3x = W[2]; + T5m = FNMS(KP951056516, T3P, T3I); + T3Q = FMA(KP951056516, T3P, T3I); + T4P = FMA(KP951056516, T4O, T4N); + T5F = FNMS(KP951056516, T4O, T4N); + T5p = FMA(KP951056516, T42, T3Z); + T43 = FNMS(KP951056516, T42, T3Z); + T3R = T3x * T3Q; + T3S = W[3]; + T5l = W[34]; + T5o = W[35]; + T4D = T3S * T3Q; + T5n = T5l * T5m; + T5x = T5o * T5m; + T4K = FNMS(KP951056516, T4J, T4I); + T5C = FMA(KP951056516, T4J, T4I); + T4H = W[10]; + T4M = W[11]; + T5B = W[26]; + T5E = W[27]; + T4L = T4H * T4K; + T4X = T4M * T4K; + T5D = T5B * T5C; + T5N = T5E * T5C; + } + { + E T58, T5d, T45, T4g, T4x, T4R, T5r; + T4g = FNMS(KP559016994, T4f, T4e); + T58 = FMA(KP559016994, T4f, T4e); + T5d = FMA(KP559016994, T4w, T4v); + T4x = FNMS(KP559016994, T4w, T4v); + T45 = W[4]; + T4R = W[12]; + T4S = FNMS(KP951056516, T4n, T4g); + T4o = FMA(KP951056516, T4n, T4g); + T4V = FMA(KP951056516, T4A, T4x); + T4B = FNMS(KP951056516, T4A, T4x); + T4T = T4R * T4S; + T4p = T45 * T4o; + T4Z = T4R * T4V; + T4F = T45 * T4B; + T57 = W[20]; + T5r = W[36]; + T5s = FNMS(KP951056516, T59, T58); + T5a = FMA(KP951056516, T59, T58); + T5v = FMA(KP951056516, T5e, T5d); + T5f = FNMS(KP951056516, T5e, T5d); + T5t = T5r * T5s; + T5b = T57 * T5a; + T5z = T5r * T5v; + } + } + T5j = T57 * T5f; + { + E T44, T4E, T5G, T5O, T5K, T4G, T4C, T4q; + T44 = FNMS(T3S, T43, T3R); + T4E = FMA(T3x, T43, T4D); + T4q = W[5]; + T5G = FNMS(T5E, T5F, T5D); + T5O = FMA(T5B, T5F, T5N); + T5K = W[29]; + T4G = FNMS(T4q, T4o, T4F); + T4C = FMA(T4q, T4B, T4p); + { + E T4Q, T4Y, T4U, T5Q, T5M; + T4Q = FNMS(T4M, T4P, T4L); + T5Q = FNMS(T5K, T5I, T5P); + T5M = FMA(T5K, T5L, T5J); + Im[WS(rs, 1)] = T4G - T4E; + Ip[WS(rs, 1)] = T4E + T4G; + Rm[WS(rs, 1)] = T44 + T4C; + Rp[WS(rs, 1)] = T44 - T4C; + Im[WS(rs, 7)] = T5Q - T5O; + Ip[WS(rs, 7)] = T5O + T5Q; + Rm[WS(rs, 7)] = T5G + T5M; + Rp[WS(rs, 7)] = T5G - T5M; + T4Y = FMA(T4H, T4P, T4X); + T4U = W[13]; + { + E T56, T5i, T5c, T50, T4W, T5k, T5g; + T56 = FNMS(T54, T55, T53); + T5i = FMA(T51, T55, T5h); + T5c = W[21]; + T50 = FNMS(T4U, T4S, T4Z); + T4W = FMA(T4U, T4V, T4T); + T5q = FNMS(T5o, T5p, T5n); + T5k = FNMS(T5c, T5a, T5j); + T5g = FMA(T5c, T5f, T5b); + Im[WS(rs, 3)] = T50 - T4Y; + Ip[WS(rs, 3)] = T4Y + T50; + Rm[WS(rs, 3)] = T4Q + T4W; + Rp[WS(rs, 3)] = T4Q - T4W; + Im[WS(rs, 5)] = T5k - T5i; + Ip[WS(rs, 5)] = T5i + T5k; + Rm[WS(rs, 5)] = T56 + T5g; + Rp[WS(rs, 5)] = T56 - T5g; + T5y = FMA(T5l, T5p, T5x); + T5u = W[37]; + } + } + } + } + } + } + T5A = FNMS(T5u, T5s, T5z); + T5w = FMA(T5u, T5v, T5t); + Im[WS(rs, 9)] = T5A - T5y; + Ip[WS(rs, 9)] = T5y + T5A; + Rm[WS(rs, 9)] = T5q + T5w; + Rp[WS(rs, 9)] = T5q - T5w; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 20}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 20, "hc2cbdft2_20", twinstr, &GENUS, {176, 38, 110, 0} }; + +void X(codelet_hc2cbdft2_20) (planner *p) { + X(khc2c_register) (p, hc2cbdft2_20, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 20 -dif -name hc2cbdft2_20 -include hc2cb.h */ + +/* + * This function contains 286 FP additions, 124 FP multiplications, + * (or, 224 additions, 62 multiplications, 62 fused multiply/add), + * 89 stack variables, 4 constants, and 80 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft2_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 38, MAKE_VOLATILE_STRIDE(80, rs)) { + E T7, T3N, T4a, T16, T1G, T3g, T3D, T26, T1k, T3A, T3B, T1v, T2e, T48, T47; + E T2d, T1L, T43, T40, T1K, T2l, T3t, T2m, T3w, T3n, T3p, TC, T2b, T4d, T4f; + E T23, T2j, T1B, T1H, T3U, T3W, T3G, T3I, T11, T17; + { + E T3, T1C, T15, T24, T6, T12, T1F, T25; + { + E T1, T2, T13, T14; + T1 = Rp[0]; + T2 = Rm[WS(rs, 9)]; + T3 = T1 + T2; + T1C = T1 - T2; + T13 = Ip[0]; + T14 = Im[WS(rs, 9)]; + T15 = T13 + T14; + T24 = T13 - T14; + } + { + E T4, T5, T1D, T1E; + T4 = Rp[WS(rs, 5)]; + T5 = Rm[WS(rs, 4)]; + T6 = T4 + T5; + T12 = T4 - T5; + T1D = Ip[WS(rs, 5)]; + T1E = Im[WS(rs, 4)]; + T1F = T1D + T1E; + T25 = T1D - T1E; + } + T7 = T3 + T6; + T3N = T15 - T12; + T4a = T1C + T1F; + T16 = T12 + T15; + T1G = T1C - T1F; + T3g = T3 - T6; + T3D = T24 - T25; + T26 = T24 + T25; + } + { + E Te, T3O, T3Y, TJ, T1e, T3h, T3r, T1R, TA, T3S, T42, TZ, T1u, T3l, T3v; + E T21, Tl, T3P, T3Z, TO, T1j, T3i, T3s, T1U, Tt, T3R, T41, TU, T1p, T3k; + E T3u, T1Y; + { + E Ta, T1a, TI, T1P, Td, TF, T1d, T1Q; + { + E T8, T9, TG, TH; + T8 = Rp[WS(rs, 4)]; + T9 = Rm[WS(rs, 5)]; + Ta = T8 + T9; + T1a = T8 - T9; + TG = Ip[WS(rs, 4)]; + TH = Im[WS(rs, 5)]; + TI = TG + TH; + T1P = TG - TH; + } + { + E Tb, Tc, T1b, T1c; + Tb = Rp[WS(rs, 9)]; + Tc = Rm[0]; + Td = Tb + Tc; + TF = Tb - Tc; + T1b = Ip[WS(rs, 9)]; + T1c = Im[0]; + T1d = T1b + T1c; + T1Q = T1b - T1c; + } + Te = Ta + Td; + T3O = TI - TF; + T3Y = T1a + T1d; + TJ = TF + TI; + T1e = T1a - T1d; + T3h = Ta - Td; + T3r = T1P - T1Q; + T1R = T1P + T1Q; + } + { + E Tw, T1q, TY, T1Z, Tz, TV, T1t, T20; + { + E Tu, Tv, TW, TX; + Tu = Rm[WS(rs, 7)]; + Tv = Rp[WS(rs, 2)]; + Tw = Tu + Tv; + T1q = Tu - Tv; + TW = Im[WS(rs, 7)]; + TX = Ip[WS(rs, 2)]; + TY = TW + TX; + T1Z = TX - TW; + } + { + E Tx, Ty, T1r, T1s; + Tx = Rm[WS(rs, 2)]; + Ty = Rp[WS(rs, 7)]; + Tz = Tx + Ty; + TV = Tx - Ty; + T1r = Im[WS(rs, 2)]; + T1s = Ip[WS(rs, 7)]; + T1t = T1r + T1s; + T20 = T1s - T1r; + } + TA = Tw + Tz; + T3S = TV + TY; + T42 = T1q - T1t; + TZ = TV - TY; + T1u = T1q + T1t; + T3l = Tw - Tz; + T3v = T1Z - T20; + T21 = T1Z + T20; + } + { + E Th, T1f, TN, T1S, Tk, TK, T1i, T1T; + { + E Tf, Tg, TL, TM; + Tf = Rm[WS(rs, 3)]; + Tg = Rp[WS(rs, 6)]; + Th = Tf + Tg; + T1f = Tf - Tg; + TL = Im[WS(rs, 3)]; + TM = Ip[WS(rs, 6)]; + TN = TL + TM; + T1S = TM - TL; + } + { + E Ti, Tj, T1g, T1h; + Ti = Rp[WS(rs, 1)]; + Tj = Rm[WS(rs, 8)]; + Tk = Ti + Tj; + TK = Ti - Tj; + T1g = Ip[WS(rs, 1)]; + T1h = Im[WS(rs, 8)]; + T1i = T1g + T1h; + T1T = T1g - T1h; + } + Tl = Th + Tk; + T3P = TK + TN; + T3Z = T1f + T1i; + TO = TK - TN; + T1j = T1f - T1i; + T3i = Th - Tk; + T3s = T1S - T1T; + T1U = T1S + T1T; + } + { + E Tp, T1l, TT, T1W, Ts, TQ, T1o, T1X; + { + E Tn, To, TR, TS; + Tn = Rp[WS(rs, 8)]; + To = Rm[WS(rs, 1)]; + Tp = Tn + To; + T1l = Tn - To; + TR = Ip[WS(rs, 8)]; + TS = Im[WS(rs, 1)]; + TT = TR + TS; + T1W = TR - TS; + } + { + E Tq, Tr, T1m, T1n; + Tq = Rm[WS(rs, 6)]; + Tr = Rp[WS(rs, 3)]; + Ts = Tq + Tr; + TQ = Tq - Tr; + T1m = Im[WS(rs, 6)]; + T1n = Ip[WS(rs, 3)]; + T1o = T1m + T1n; + T1X = T1n - T1m; + } + Tt = Tp + Ts; + T3R = TT - TQ; + T41 = T1l - T1o; + TU = TQ + TT; + T1p = T1l + T1o; + T3k = Tp - Ts; + T3u = T1W - T1X; + T1Y = T1W + T1X; + } + T1k = T1e - T1j; + T3A = T3h - T3i; + T3B = T3k - T3l; + T1v = T1p - T1u; + T2e = T1Y - T21; + T48 = T3R + T3S; + T47 = T3O + T3P; + T2d = T1R - T1U; + T1L = TU - TZ; + T43 = T41 - T42; + T40 = T3Y - T3Z; + T1K = TJ - TO; + T2l = Te - Tl; + T3t = T3r - T3s; + T2m = Tt - TA; + T3w = T3u - T3v; + { + E T3j, T3m, Tm, TB; + T3j = T3h + T3i; + T3m = T3k + T3l; + T3n = T3j + T3m; + T3p = KP559016994 * (T3j - T3m); + Tm = Te + Tl; + TB = Tt + TA; + TC = Tm + TB; + T2b = KP559016994 * (Tm - TB); + } + { + E T4b, T4c, T3Q, T3T; + T4b = T3Y + T3Z; + T4c = T41 + T42; + T4d = T4b + T4c; + T4f = KP559016994 * (T4b - T4c); + { + E T1V, T22, T1z, T1A; + T1V = T1R + T1U; + T22 = T1Y + T21; + T23 = T1V + T22; + T2j = KP559016994 * (T1V - T22); + T1z = T1e + T1j; + T1A = T1p + T1u; + T1B = KP559016994 * (T1z - T1A); + T1H = T1z + T1A; + } + T3Q = T3O - T3P; + T3T = T3R - T3S; + T3U = T3Q + T3T; + T3W = KP559016994 * (T3Q - T3T); + { + E T3E, T3F, TP, T10; + T3E = T3r + T3s; + T3F = T3u + T3v; + T3G = T3E + T3F; + T3I = KP559016994 * (T3E - T3F); + TP = TJ + TO; + T10 = TU + TZ; + T11 = KP559016994 * (TP - T10); + T17 = TP + T10; + } + } + } + { + E TD, T27, T3c, T3e, T2o, T36, T2A, T2U, T1N, T2Z, T2t, T2J, T1x, T2X, T2r; + E T2F, T2g, T34, T2y, T2Q; + TD = T7 + TC; + T27 = T23 + T26; + { + E T39, T3b, T38, T3a; + T39 = T16 + T17; + T3b = T1H + T1G; + T38 = W[8]; + T3a = W[9]; + T3c = FMA(T38, T39, T3a * T3b); + T3e = FNMS(T3a, T39, T38 * T3b); + } + { + E T2n, T2S, T2k, T2T, T2i; + T2n = FNMS(KP951056516, T2m, KP587785252 * T2l); + T2S = FMA(KP951056516, T2l, KP587785252 * T2m); + T2i = FNMS(KP250000000, T23, T26); + T2k = T2i - T2j; + T2T = T2j + T2i; + T2o = T2k - T2n; + T36 = T2T - T2S; + T2A = T2n + T2k; + T2U = T2S + T2T; + } + { + E T1M, T2H, T1J, T2I, T1I; + T1M = FMA(KP951056516, T1K, KP587785252 * T1L); + T2H = FNMS(KP951056516, T1L, KP587785252 * T1K); + T1I = FNMS(KP250000000, T1H, T1G); + T1J = T1B + T1I; + T2I = T1I - T1B; + T1N = T1J - T1M; + T2Z = T2I - T2H; + T2t = T1M + T1J; + T2J = T2H + T2I; + } + { + E T1w, T2E, T19, T2D, T18; + T1w = FMA(KP951056516, T1k, KP587785252 * T1v); + T2E = FNMS(KP951056516, T1v, KP587785252 * T1k); + T18 = FNMS(KP250000000, T17, T16); + T19 = T11 + T18; + T2D = T18 - T11; + T1x = T19 + T1w; + T2X = T2D + T2E; + T2r = T19 - T1w; + T2F = T2D - T2E; + } + { + E T2f, T2P, T2c, T2O, T2a; + T2f = FNMS(KP951056516, T2e, KP587785252 * T2d); + T2P = FMA(KP951056516, T2d, KP587785252 * T2e); + T2a = FNMS(KP250000000, TC, T7); + T2c = T2a - T2b; + T2O = T2b + T2a; + T2g = T2c + T2f; + T34 = T2O + T2P; + T2y = T2c - T2f; + T2Q = T2O - T2P; + } + { + E T1O, T28, TE, T1y; + TE = W[0]; + T1y = W[1]; + T1O = FMA(TE, T1x, T1y * T1N); + T28 = FNMS(T1y, T1x, TE * T1N); + Rp[0] = TD - T1O; + Ip[0] = T27 + T28; + Rm[0] = TD + T1O; + Im[0] = T28 - T27; + } + { + E T37, T3d, T33, T35; + T33 = W[6]; + T35 = W[7]; + T37 = FNMS(T35, T36, T33 * T34); + T3d = FMA(T35, T34, T33 * T36); + Rp[WS(rs, 2)] = T37 - T3c; + Ip[WS(rs, 2)] = T3d + T3e; + Rm[WS(rs, 2)] = T37 + T3c; + Im[WS(rs, 2)] = T3e - T3d; + } + { + E T2p, T2v, T2u, T2w; + { + E T29, T2h, T2q, T2s; + T29 = W[14]; + T2h = W[15]; + T2p = FNMS(T2h, T2o, T29 * T2g); + T2v = FMA(T2h, T2g, T29 * T2o); + T2q = W[16]; + T2s = W[17]; + T2u = FMA(T2q, T2r, T2s * T2t); + T2w = FNMS(T2s, T2r, T2q * T2t); + } + Rp[WS(rs, 4)] = T2p - T2u; + Ip[WS(rs, 4)] = T2v + T2w; + Rm[WS(rs, 4)] = T2p + T2u; + Im[WS(rs, 4)] = T2w - T2v; + } + { + E T2B, T2L, T2K, T2M; + { + E T2x, T2z, T2C, T2G; + T2x = W[22]; + T2z = W[23]; + T2B = FNMS(T2z, T2A, T2x * T2y); + T2L = FMA(T2z, T2y, T2x * T2A); + T2C = W[24]; + T2G = W[25]; + T2K = FMA(T2C, T2F, T2G * T2J); + T2M = FNMS(T2G, T2F, T2C * T2J); + } + Rp[WS(rs, 6)] = T2B - T2K; + Ip[WS(rs, 6)] = T2L + T2M; + Rm[WS(rs, 6)] = T2B + T2K; + Im[WS(rs, 6)] = T2M - T2L; + } + { + E T2V, T31, T30, T32; + { + E T2N, T2R, T2W, T2Y; + T2N = W[30]; + T2R = W[31]; + T2V = FNMS(T2R, T2U, T2N * T2Q); + T31 = FMA(T2R, T2Q, T2N * T2U); + T2W = W[32]; + T2Y = W[33]; + T30 = FMA(T2W, T2X, T2Y * T2Z); + T32 = FNMS(T2Y, T2X, T2W * T2Z); + } + Rp[WS(rs, 8)] = T2V - T30; + Ip[WS(rs, 8)] = T31 + T32; + Rm[WS(rs, 8)] = T2V + T30; + Im[WS(rs, 8)] = T32 - T31; + } + } + { + E T4F, T4P, T5c, T5e, T3y, T54, T4o, T4S, T4h, T4Z, T4x, T4N, T45, T4X, T4v; + E T4J, T3K, T56, T4s, T4U; + { + E T4C, T4E, T4B, T4D; + T4C = T3g + T3n; + T4E = T3G + T3D; + T4B = W[18]; + T4D = W[19]; + T4F = FNMS(T4D, T4E, T4B * T4C); + T4P = FMA(T4D, T4C, T4B * T4E); + } + { + E T59, T5b, T58, T5a; + T59 = T3N + T3U; + T5b = T4d + T4a; + T58 = W[28]; + T5a = W[29]; + T5c = FMA(T58, T59, T5a * T5b); + T5e = FNMS(T5a, T59, T58 * T5b); + } + { + E T3x, T4n, T3q, T4m, T3o; + T3x = FNMS(KP951056516, T3w, KP587785252 * T3t); + T4n = FMA(KP951056516, T3t, KP587785252 * T3w); + T3o = FNMS(KP250000000, T3n, T3g); + T3q = T3o - T3p; + T4m = T3p + T3o; + T3y = T3q - T3x; + T54 = T4m + T4n; + T4o = T4m - T4n; + T4S = T3q + T3x; + } + { + E T49, T4M, T4g, T4L, T4e; + T49 = FNMS(KP951056516, T48, KP587785252 * T47); + T4M = FMA(KP951056516, T47, KP587785252 * T48); + T4e = FNMS(KP250000000, T4d, T4a); + T4g = T4e - T4f; + T4L = T4f + T4e; + T4h = T49 + T4g; + T4Z = T4M + T4L; + T4x = T4g - T49; + T4N = T4L - T4M; + } + { + E T44, T4I, T3X, T4H, T3V; + T44 = FNMS(KP951056516, T43, KP587785252 * T40); + T4I = FMA(KP951056516, T40, KP587785252 * T43); + T3V = FNMS(KP250000000, T3U, T3N); + T3X = T3V - T3W; + T4H = T3W + T3V; + T45 = T3X - T44; + T4X = T4H - T4I; + T4v = T3X + T44; + T4J = T4H + T4I; + } + { + E T3C, T4q, T3J, T4r, T3H; + T3C = FNMS(KP951056516, T3B, KP587785252 * T3A); + T4q = FMA(KP951056516, T3A, KP587785252 * T3B); + T3H = FNMS(KP250000000, T3G, T3D); + T3J = T3H - T3I; + T4r = T3I + T3H; + T3K = T3C + T3J; + T56 = T4r - T4q; + T4s = T4q + T4r; + T4U = T3J - T3C; + } + { + E T4O, T4Q, T4G, T4K; + T4G = W[20]; + T4K = W[21]; + T4O = FMA(T4G, T4J, T4K * T4N); + T4Q = FNMS(T4K, T4J, T4G * T4N); + Rp[WS(rs, 5)] = T4F - T4O; + Ip[WS(rs, 5)] = T4P + T4Q; + Rm[WS(rs, 5)] = T4F + T4O; + Im[WS(rs, 5)] = T4Q - T4P; + } + { + E T57, T5d, T53, T55; + T53 = W[26]; + T55 = W[27]; + T57 = FNMS(T55, T56, T53 * T54); + T5d = FMA(T55, T54, T53 * T56); + Rp[WS(rs, 7)] = T57 - T5c; + Ip[WS(rs, 7)] = T5d + T5e; + Rm[WS(rs, 7)] = T57 + T5c; + Im[WS(rs, 7)] = T5e - T5d; + } + { + E T3L, T4j, T4i, T4k; + { + E T3f, T3z, T3M, T46; + T3f = W[2]; + T3z = W[3]; + T3L = FNMS(T3z, T3K, T3f * T3y); + T4j = FMA(T3z, T3y, T3f * T3K); + T3M = W[4]; + T46 = W[5]; + T4i = FMA(T3M, T45, T46 * T4h); + T4k = FNMS(T46, T45, T3M * T4h); + } + Rp[WS(rs, 1)] = T3L - T4i; + Ip[WS(rs, 1)] = T4j + T4k; + Rm[WS(rs, 1)] = T3L + T4i; + Im[WS(rs, 1)] = T4k - T4j; + } + { + E T4t, T4z, T4y, T4A; + { + E T4l, T4p, T4u, T4w; + T4l = W[10]; + T4p = W[11]; + T4t = FNMS(T4p, T4s, T4l * T4o); + T4z = FMA(T4p, T4o, T4l * T4s); + T4u = W[12]; + T4w = W[13]; + T4y = FMA(T4u, T4v, T4w * T4x); + T4A = FNMS(T4w, T4v, T4u * T4x); + } + Rp[WS(rs, 3)] = T4t - T4y; + Ip[WS(rs, 3)] = T4z + T4A; + Rm[WS(rs, 3)] = T4t + T4y; + Im[WS(rs, 3)] = T4A - T4z; + } + { + E T4V, T51, T50, T52; + { + E T4R, T4T, T4W, T4Y; + T4R = W[34]; + T4T = W[35]; + T4V = FNMS(T4T, T4U, T4R * T4S); + T51 = FMA(T4T, T4S, T4R * T4U); + T4W = W[36]; + T4Y = W[37]; + T50 = FMA(T4W, T4X, T4Y * T4Z); + T52 = FNMS(T4Y, T4X, T4W * T4Z); + } + Rp[WS(rs, 9)] = T4V - T50; + Ip[WS(rs, 9)] = T51 + T52; + Rm[WS(rs, 9)] = T4V + T50; + Im[WS(rs, 9)] = T52 - T51; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 20}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 20, "hc2cbdft2_20", twinstr, &GENUS, {224, 62, 62, 0} }; + +void X(codelet_hc2cbdft2_20) (planner *p) { + X(khc2c_register) (p, hc2cbdft2_20, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft2_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft2_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1888 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:47 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hc2cbdft2_32 -include hc2cb.h */ + +/* + * This function contains 498 FP additions, 260 FP multiplications, + * (or, 300 additions, 62 multiplications, 198 fused multiply/add), + * 165 stack variables, 7 constants, and 128 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) { + E T8e, T8h, T7S, T8l, T8f, T84, T8c, T8k, T8g, T86, T82, T8m, T8i; + { + E T4B, T3h, T3K, Tv, T8Y, T6T, T8L, T7i, T8X, T7f, T4Y, T1G, T4K, T1j, T4X; + E T2M, T8C, T6d, T8o, T66, T8K, T6M, T4L, T2P, T4C, T3o, T5q, T4q, T8p, T6C; + E T8B, T6z, T72, T2u, T75, T10, T3P, T3a, T3L, T4t, T4E, T8F, T8t, T4F, T4w; + E T8E, T8w, T6E, T6l, T6F, T6s, T76, T4P, T51, T2R, T28, T8P, T90, T7k, T71; + E T2p, T4R, T2x, T73, T6x, T6y; + { + E T3l, T16, T3m, T2H, T2E, T13, T64, T7, T3i, T2J, T1c, T3j, T1h, T2K, Te; + E T1z, T6R, T6a, Tt, T3g, T6b, T1E, T6Q, Tj, T1p, Ti, T3b, T1n, Tk, T1q; + E T1r; + { + E T1, T2, T4, T5; + { + E T14, T15, T2F, T2G; + T14 = Ip[0]; + T15 = Im[WS(rs, 15)]; + T2F = Ip[WS(rs, 8)]; + T2G = Im[WS(rs, 7)]; + T1 = Rp[0]; + T3l = T14 - T15; + T16 = T14 + T15; + T3m = T2F - T2G; + T2H = T2F + T2G; + T2 = Rm[WS(rs, 15)]; + T4 = Rp[WS(rs, 8)]; + T5 = Rm[WS(rs, 7)]; + } + { + E T1b, T1e, T18, Ta, T1f, Tb, Tc, T8, T9, T1g, T1d, Td; + { + E T19, T3, T6, T1a; + T19 = Ip[WS(rs, 4)]; + T2E = T1 - T2; + T3 = T1 + T2; + T13 = T4 - T5; + T6 = T4 + T5; + T1a = Im[WS(rs, 11)]; + T8 = Rp[WS(rs, 4)]; + T9 = Rm[WS(rs, 11)]; + T64 = T3 - T6; + T7 = T3 + T6; + T1b = T19 + T1a; + T3i = T19 - T1a; + } + T1e = Im[WS(rs, 3)]; + T18 = T8 - T9; + Ta = T8 + T9; + T1f = Ip[WS(rs, 12)]; + Tb = Rm[WS(rs, 3)]; + Tc = Rp[WS(rs, 12)]; + T2J = T18 - T1b; + T1c = T18 + T1b; + T1g = T1e + T1f; + T3j = T1f - T1e; + T1d = Tb - Tc; + Td = Tb + Tc; + T1h = T1d + T1g; + T2K = T1d - T1g; + T6x = Ta - Td; + Te = Ta + Td; + } + { + E Tq, T1A, Tp, T3e, T1y, Tr, T1B, T1C; + { + E Tn, To, T1w, T1x; + Tn = Rm[WS(rs, 1)]; + To = Rp[WS(rs, 14)]; + T1w = Im[WS(rs, 1)]; + T1x = Ip[WS(rs, 14)]; + Tq = Rp[WS(rs, 6)]; + T1A = Tn - To; + Tp = Tn + To; + T3e = T1x - T1w; + T1y = T1w + T1x; + Tr = Rm[WS(rs, 9)]; + T1B = Ip[WS(rs, 6)]; + T1C = Im[WS(rs, 9)]; + } + { + E Tg, Th, T1l, T1m; + Tg = Rp[WS(rs, 2)]; + { + E T1v, Ts, T3f, T1D; + T1v = Tq - Tr; + Ts = Tq + Tr; + T3f = T1B - T1C; + T1D = T1B + T1C; + T1z = T1v - T1y; + T6R = T1v + T1y; + T6a = Tp - Ts; + Tt = Tp + Ts; + T3g = T3e + T3f; + T6b = T3e - T3f; + T1E = T1A - T1D; + T6Q = T1A + T1D; + Th = Rm[WS(rs, 13)]; + } + T1l = Ip[WS(rs, 2)]; + T1m = Im[WS(rs, 13)]; + Tj = Rp[WS(rs, 10)]; + T1p = Tg - Th; + Ti = Tg + Th; + T3b = T1l - T1m; + T1n = T1l + T1m; + Tk = Rm[WS(rs, 5)]; + T1q = Ip[WS(rs, 10)]; + T1r = Im[WS(rs, 5)]; + } + } + } + { + E T4o, T67, T68, T4p, T2I, T1i, T2N, T1u, T1F, T2O, T6K, T17; + { + E Tf, T1o, T1t, Tu, T7g, T6P, T6S, T7h, T7d, T7e; + { + E T6O, T6N, T1k, Tl; + T4o = T7 - Te; + Tf = T7 + Te; + T1k = Tj - Tk; + Tl = Tj + Tk; + { + E T3c, T1s, Tm, T3d; + T3c = T1q - T1r; + T1s = T1q + T1r; + T1o = T1k + T1n; + T6O = T1n - T1k; + T67 = Ti - Tl; + Tm = Ti + Tl; + T3d = T3b + T3c; + T68 = T3b - T3c; + T1t = T1p - T1s; + T6N = T1p + T1s; + T4B = Tm - Tt; + Tu = Tm + Tt; + T4p = T3g - T3d; + T3h = T3d + T3g; + } + T7g = FNMS(KP414213562, T6N, T6O); + T6P = FMA(KP414213562, T6O, T6N); + T6S = FMA(KP414213562, T6R, T6Q); + T7h = FNMS(KP414213562, T6Q, T6R); + } + T3K = Tf - Tu; + Tv = Tf + Tu; + T8Y = T6P + T6S; + T6T = T6P - T6S; + T2I = T2E - T2H; + T7d = T2E + T2H; + T7e = T1c + T1h; + T1i = T1c - T1h; + T2N = FNMS(KP414213562, T1o, T1t); + T1u = FMA(KP414213562, T1t, T1o); + T8L = T7h - T7g; + T7i = T7g + T7h; + T8X = FMA(KP707106781, T7e, T7d); + T7f = FNMS(KP707106781, T7e, T7d); + T1F = FNMS(KP414213562, T1E, T1z); + T2O = FMA(KP414213562, T1z, T1E); + T6K = T16 - T13; + T17 = T13 + T16; + } + { + E T6L, T6A, T6B, T65, T3k, T2L, T69, T6c, T3n; + T4Y = T1F - T1u; + T1G = T1u + T1F; + T4K = FNMS(KP707106781, T1i, T17); + T1j = FMA(KP707106781, T1i, T17); + T2L = T2J + T2K; + T6L = T2J - T2K; + T6A = T67 + T68; + T69 = T67 - T68; + T6c = T6a + T6b; + T6B = T6b - T6a; + T4X = FNMS(KP707106781, T2L, T2I); + T2M = FMA(KP707106781, T2L, T2I); + T8C = T69 - T6c; + T6d = T69 + T6c; + T65 = T3j - T3i; + T3k = T3i + T3j; + T8o = T64 - T65; + T66 = T64 + T65; + T8K = FNMS(KP707106781, T6L, T6K); + T6M = FMA(KP707106781, T6L, T6K); + T3n = T3l + T3m; + T6y = T3l - T3m; + T4L = T2N - T2O; + T2P = T2N + T2O; + T4C = T3n - T3k; + T3o = T3k + T3n; + T5q = T4o - T4p; + T4q = T4o + T4p; + T8p = T6B - T6A; + T6C = T6A + T6B; + } + } + } + { + E T1M, T6V, T6f, TC, T31, T6j, T23, T6Y, T2v, T2i, TY, T6p, T6n, T35, T2n; + E T2w, T24, T1R, TJ, T6i, T6g, T2Y, T1W, T25, T2q, TN, T2r, T36, T2c, T29; + E TQ, T2s; + { + E TU, T2k, T33, T2j, TX, T2l, T2m, T34; + { + E T1Z, Ty, T20, T2Z, T1L, T1I, TB, T21, T2e, T2h; + { + E T1J, T1K, Tw, Tx, Tz, TA; + Tw = Rp[WS(rs, 1)]; + Tx = Rm[WS(rs, 14)]; + T1J = Ip[WS(rs, 1)]; + T8B = T6y - T6x; + T6z = T6x + T6y; + T1Z = Tw - Tx; + Ty = Tw + Tx; + T1K = Im[WS(rs, 14)]; + Tz = Rp[WS(rs, 9)]; + TA = Rm[WS(rs, 6)]; + T20 = Ip[WS(rs, 9)]; + T2Z = T1J - T1K; + T1L = T1J + T1K; + T1I = Tz - TA; + TB = Tz + TA; + T21 = Im[WS(rs, 6)]; + } + { + E T2f, T2g, TV, TW; + { + E TS, T30, T22, TT; + TS = Rp[WS(rs, 3)]; + T1M = T1I + T1L; + T6V = T1L - T1I; + T6f = Ty - TB; + TC = Ty + TB; + T30 = T20 - T21; + T22 = T20 + T21; + TT = Rm[WS(rs, 12)]; + T2f = Ip[WS(rs, 3)]; + T31 = T2Z + T30; + T6j = T2Z - T30; + T23 = T1Z - T22; + T6Y = T1Z + T22; + T2e = TS - TT; + TU = TS + TT; + T2g = Im[WS(rs, 12)]; + } + TV = Rm[WS(rs, 4)]; + TW = Rp[WS(rs, 11)]; + T2k = Im[WS(rs, 4)]; + T33 = T2f - T2g; + T2h = T2f + T2g; + T2j = TV - TW; + TX = TV + TW; + T2l = Ip[WS(rs, 11)]; + } + T2v = T2e - T2h; + T2i = T2e + T2h; + } + TY = TU + TX; + T6p = TU - TX; + T2m = T2k + T2l; + T34 = T2l - T2k; + { + E TF, T1T, T2W, T1S, TI, T1U, T1N, T1Q, T1V, T2X; + { + E T1O, T1P, TD, TE, TG, TH; + TD = Rp[WS(rs, 5)]; + TE = Rm[WS(rs, 10)]; + T6n = T34 - T33; + T35 = T33 + T34; + T2n = T2j + T2m; + T2w = T2j - T2m; + T1N = TD - TE; + TF = TD + TE; + T1O = Ip[WS(rs, 5)]; + T1P = Im[WS(rs, 10)]; + TG = Rm[WS(rs, 2)]; + TH = Rp[WS(rs, 13)]; + T1T = Im[WS(rs, 2)]; + T2W = T1O - T1P; + T1Q = T1O + T1P; + T1S = TG - TH; + TI = TG + TH; + T1U = Ip[WS(rs, 13)]; + } + T24 = T1N - T1Q; + T1R = T1N + T1Q; + TJ = TF + TI; + T6i = TF - TI; + T1V = T1T + T1U; + T2X = T1U - T1T; + { + E T2a, T2b, TL, TM, TO, TP; + TL = Rm[0]; + TM = Rp[WS(rs, 15)]; + T6g = T2X - T2W; + T2Y = T2W + T2X; + T1W = T1S + T1V; + T25 = T1S - T1V; + T2q = TL - TM; + TN = TL + TM; + T2a = Im[0]; + T2b = Ip[WS(rs, 15)]; + TO = Rp[WS(rs, 7)]; + TP = Rm[WS(rs, 8)]; + T2r = Ip[WS(rs, 7)]; + T36 = T2b - T2a; + T2c = T2a + T2b; + T29 = TO - TP; + TQ = TO + TP; + T2s = Im[WS(rs, 8)]; + } + } + } + { + E T2d, T4u, T4v, T6r, T6o, T6k, T8u, T8v, T6h; + { + E T4r, T6m, T32, T4s, T6q, T39, T8r, T8s; + { + E TK, TR, T37, T2t, TZ, T38; + T4r = TC - TJ; + TK = TC + TJ; + T2d = T29 - T2c; + T72 = T29 + T2c; + T6m = TN - TQ; + TR = TN + TQ; + T37 = T2r - T2s; + T2t = T2r + T2s; + T32 = T2Y + T31; + T4s = T31 - T2Y; + T4u = TR - TY; + TZ = TR + TY; + T38 = T36 + T37; + T6q = T36 - T37; + T2u = T2q - T2t; + T75 = T2q + T2t; + T10 = TK + TZ; + T3P = TK - TZ; + T4v = T38 - T35; + T39 = T35 + T38; + } + T8r = T6q - T6p; + T6r = T6p + T6q; + T3a = T32 + T39; + T3L = T39 - T32; + T8s = T6m - T6n; + T6o = T6m + T6n; + T4t = T4r - T4s; + T4E = T4r + T4s; + T8F = FNMS(KP414213562, T8r, T8s); + T8t = FMA(KP414213562, T8s, T8r); + T6k = T6i + T6j; + T8u = T6j - T6i; + T8v = T6f - T6g; + T6h = T6f + T6g; + } + { + E T6Z, T1Y, T4O, T26, T6W, T1X, T2o, T4N, T27; + T4F = T4v - T4u; + T4w = T4u + T4v; + T8E = FMA(KP414213562, T8u, T8v); + T8w = FNMS(KP414213562, T8v, T8u); + T6Z = T1R + T1W; + T1X = T1R - T1W; + T6E = FMA(KP414213562, T6h, T6k); + T6l = FNMS(KP414213562, T6k, T6h); + T6F = FNMS(KP414213562, T6o, T6r); + T6s = FMA(KP414213562, T6r, T6o); + T1Y = FMA(KP707106781, T1X, T1M); + T4O = FNMS(KP707106781, T1X, T1M); + T26 = T24 + T25; + T6W = T25 - T24; + T76 = T2i + T2n; + T2o = T2i - T2n; + T4N = FNMS(KP707106781, T26, T23); + T27 = FMA(KP707106781, T26, T23); + { + E T8O, T6X, T8N, T70; + T8O = FMA(KP707106781, T6W, T6V); + T6X = FNMS(KP707106781, T6W, T6V); + T8N = FMA(KP707106781, T6Z, T6Y); + T70 = FNMS(KP707106781, T6Z, T6Y); + T4P = FMA(KP668178637, T4O, T4N); + T51 = FNMS(KP668178637, T4N, T4O); + T2R = FNMS(KP198912367, T1Y, T27); + T28 = FMA(KP198912367, T27, T1Y); + T8P = FMA(KP198912367, T8O, T8N); + T90 = FNMS(KP198912367, T8N, T8O); + T7k = FNMS(KP668178637, T6X, T70); + T71 = FMA(KP668178637, T70, T6X); + T2p = FMA(KP707106781, T2o, T2d); + T4R = FNMS(KP707106781, T2o, T2d); + } + T2x = T2v + T2w; + T73 = T2v - T2w; + } + } + } + { + E T8S, T91, T7l, T78, T5U, T5X, T5y, T61, T5V, T5K, T5S, T60, T5W, T5M, T5I; + { + E T4S, T50, T4e, T4h, T3S, T4l, T4f, T44, T4c, T4k, T4g, T46, T42; + { + E T3Q, T3U, T40, T3Z, T3V, T3A, T3D, T3H, T3B, T3y, T3G, T3C; + { + E T11, T3t, T3w, T3q, T3x, T3v, T3F, T12, T2B, T2U, T3z, T2C; + { + E T3u, T2S, T2z, T3p, T4Q, T2y; + T3u = Tv - T10; + T11 = Tv + T10; + T4Q = FNMS(KP707106781, T2x, T2u); + T2y = FMA(KP707106781, T2x, T2u); + { + E T8R, T74, T8Q, T77; + T8R = FMA(KP707106781, T73, T72); + T74 = FNMS(KP707106781, T73, T72); + T8Q = FMA(KP707106781, T76, T75); + T77 = FNMS(KP707106781, T76, T75); + T4S = FNMS(KP668178637, T4R, T4Q); + T50 = FMA(KP668178637, T4Q, T4R); + T2S = FMA(KP198912367, T2p, T2y); + T2z = FNMS(KP198912367, T2y, T2p); + T8S = FMA(KP198912367, T8R, T8Q); + T91 = FNMS(KP198912367, T8Q, T8R); + T7l = FNMS(KP668178637, T74, T77); + T78 = FMA(KP668178637, T77, T74); + T3Q = T3o - T3h; + T3p = T3h + T3o; + } + T3t = W[30]; + T3w = W[31]; + T3q = T3a + T3p; + T3x = T3p - T3a; + T3v = T3t * T3u; + T3F = T3w * T3u; + { + E T1H, T2A, T2Q, T2T; + T3U = FNMS(KP923879532, T1G, T1j); + T1H = FMA(KP923879532, T1G, T1j); + T2A = T28 + T2z; + T40 = T2z - T28; + T3Z = FNMS(KP923879532, T2P, T2M); + T2Q = FMA(KP923879532, T2P, T2M); + T2T = T2R + T2S; + T3V = T2R - T2S; + T12 = W[0]; + T3A = FNMS(KP980785280, T2A, T1H); + T2B = FMA(KP980785280, T2A, T1H); + T3D = FNMS(KP980785280, T2T, T2Q); + T2U = FMA(KP980785280, T2T, T2Q); + T3z = W[32]; + T2C = T12 * T2B; + } + } + { + E T2V, T3s, T2D, T3r; + T2D = W[1]; + T3r = T12 * T2U; + T3H = T3z * T3D; + T3B = T3z * T3A; + T2V = FMA(T2D, T2U, T2C); + T3s = FNMS(T2D, T2B, T3r); + T3y = FNMS(T3w, T3x, T3v); + T3G = FMA(T3t, T3x, T3F); + Rm[0] = T11 + T2V; + Rp[0] = T11 - T2V; + Im[0] = T3s - T3q; + Ip[0] = T3q + T3s; + T3C = W[33]; + } + } + { + E T4b, T3R, T47, T4a, T3J, T49, T4j, T3O, T3N, T43, T3W, T3T, T41, T4d, T3X; + E T45, T3Y; + { + E T3M, T48, T3I, T3E; + T3M = T3K + T3L; + T48 = T3K - T3L; + T3I = FNMS(T3C, T3A, T3H); + T3E = FMA(T3C, T3D, T3B); + T4b = T3Q - T3P; + T3R = T3P + T3Q; + Im[WS(rs, 8)] = T3I - T3G; + Ip[WS(rs, 8)] = T3G + T3I; + Rm[WS(rs, 8)] = T3y + T3E; + Rp[WS(rs, 8)] = T3y - T3E; + T47 = W[46]; + T4a = W[47]; + T3J = W[14]; + T49 = T47 * T48; + T4j = T4a * T48; + T3O = W[15]; + T3N = T3J * T3M; + T43 = T3O * T3M; + T3W = FMA(KP980785280, T3V, T3U); + T4e = FNMS(KP980785280, T3V, T3U); + T3T = W[16]; + T4h = FNMS(KP980785280, T40, T3Z); + T41 = FMA(KP980785280, T40, T3Z); + T4d = W[48]; + T3X = T3T * T3W; + } + T3S = FNMS(T3O, T3R, T3N); + T45 = T3T * T41; + T4l = T4d * T4h; + T4f = T4d * T4e; + T44 = FMA(T3J, T3R, T43); + T3Y = W[17]; + T4c = FNMS(T4a, T4b, T49); + T4k = FMA(T47, T4b, T4j); + T4g = W[49]; + T46 = FNMS(T3Y, T3W, T45); + T42 = FMA(T3Y, T41, T3X); + } + } + { + E T5v, T5r, T5w, T5A, T5G, T5F, T5B, T5g, T5j, T4I, T5n, T5h, T56, T5e, T5m; + E T5i, T58, T54; + { + E T4n, T4A, T5d, T4H, T59, T5c, T55, T4z, T5b, T5l, T4J, T4U, T53, T5f, T4V; + E T57, T4W; + { + E T4D, T4G, T4m, T4i, T5a, T4y, T4x; + T5v = T4C - T4B; + T4D = T4B + T4C; + T4m = FNMS(T4g, T4e, T4l); + T4i = FMA(T4g, T4h, T4f); + Im[WS(rs, 4)] = T46 - T44; + Ip[WS(rs, 4)] = T44 + T46; + Rm[WS(rs, 4)] = T3S + T42; + Rp[WS(rs, 4)] = T3S - T42; + Im[WS(rs, 12)] = T4m - T4k; + Ip[WS(rs, 12)] = T4k + T4m; + Rm[WS(rs, 12)] = T4c + T4i; + Rp[WS(rs, 12)] = T4c - T4i; + T4G = T4E + T4F; + T5r = T4F - T4E; + T5w = T4t - T4w; + T4x = T4t + T4w; + T4n = W[6]; + T4A = W[7]; + T5d = FNMS(KP707106781, T4G, T4D); + T4H = FMA(KP707106781, T4G, T4D); + T5a = FNMS(KP707106781, T4x, T4q); + T4y = FMA(KP707106781, T4x, T4q); + T59 = W[38]; + T5c = W[39]; + { + E T4M, T4T, T4Z, T52; + T4M = FMA(KP923879532, T4L, T4K); + T5A = FNMS(KP923879532, T4L, T4K); + T55 = T4A * T4y; + T4z = T4n * T4y; + T5b = T59 * T5a; + T5l = T5c * T5a; + T5G = T4P + T4S; + T4T = T4P - T4S; + T4Z = FMA(KP923879532, T4Y, T4X); + T5F = FNMS(KP923879532, T4Y, T4X); + T5B = T51 + T50; + T52 = T50 - T51; + T4J = W[8]; + T4U = FMA(KP831469612, T4T, T4M); + T5g = FNMS(KP831469612, T4T, T4M); + T53 = FMA(KP831469612, T52, T4Z); + T5j = FNMS(KP831469612, T52, T4Z); + T5f = W[40]; + T4V = T4J * T4U; + } + } + T4I = FNMS(T4A, T4H, T4z); + T57 = T4J * T53; + T5n = T5f * T5j; + T5h = T5f * T5g; + T56 = FMA(T4n, T4H, T55); + T4W = W[9]; + T5e = FNMS(T5c, T5d, T5b); + T5m = FMA(T59, T5d, T5l); + T5i = W[41]; + T58 = FNMS(T4W, T4U, T57); + T54 = FMA(T4W, T53, T4V); + } + { + E T5p, T5u, T5x, T5R, T5N, T5Q, T5J, T5t, T5P, T5Z, T5z, T5C, T5H, T5T, T5D; + E T5L, T5E; + { + E T5o, T5k, T5s, T5O; + T5o = FNMS(T5i, T5g, T5n); + T5k = FMA(T5i, T5j, T5h); + Im[WS(rs, 2)] = T58 - T56; + Ip[WS(rs, 2)] = T56 + T58; + Rm[WS(rs, 2)] = T4I + T54; + Rp[WS(rs, 2)] = T4I - T54; + Im[WS(rs, 10)] = T5o - T5m; + Ip[WS(rs, 10)] = T5m + T5o; + Rm[WS(rs, 10)] = T5e + T5k; + Rp[WS(rs, 10)] = T5e - T5k; + T5p = W[22]; + T5u = W[23]; + T5x = FMA(KP707106781, T5w, T5v); + T5R = FNMS(KP707106781, T5w, T5v); + T5s = FMA(KP707106781, T5r, T5q); + T5O = FNMS(KP707106781, T5r, T5q); + T5N = W[54]; + T5Q = W[55]; + T5J = T5u * T5s; + T5t = T5p * T5s; + T5P = T5N * T5O; + T5Z = T5Q * T5O; + T5z = W[24]; + T5U = FMA(KP831469612, T5B, T5A); + T5C = FNMS(KP831469612, T5B, T5A); + T5X = FMA(KP831469612, T5G, T5F); + T5H = FNMS(KP831469612, T5G, T5F); + T5T = W[56]; + T5D = T5z * T5C; + } + T5y = FNMS(T5u, T5x, T5t); + T5L = T5z * T5H; + T61 = T5T * T5X; + T5V = T5T * T5U; + T5K = FMA(T5p, T5x, T5J); + T5E = W[25]; + T5S = FNMS(T5Q, T5R, T5P); + T60 = FMA(T5N, T5R, T5Z); + T5W = W[57]; + T5M = FNMS(T5E, T5C, T5L); + T5I = FMA(T5E, T5H, T5D); + } + } + } + { + E T7P, T7L, T7K, T7Q, T7U, T80, T7Z, T7V, T9v, T9r, T9q, T9w, T9A, T9G, T9F; + E T9B, T9g, T9j, T8I, T9n, T9h, T96, T9e, T9m, T9i, T98, T94; + { + E T7A, T7D, T6I, T7H, T7B, T7q, T7y, T7G, T7C, T7s, T7o; + { + E T63, T7x, T6H, T6w, T7t, T7w, T6v, T7p, T7v, T7F, T6J, T7a, T7n, T7z, T7b; + E T7r, T7c; + { + E T6D, T6G, T62, T5Y; + T7P = FNMS(KP707106781, T6C, T6z); + T6D = FMA(KP707106781, T6C, T6z); + T62 = FNMS(T5W, T5U, T61); + T5Y = FMA(T5W, T5X, T5V); + Im[WS(rs, 6)] = T5M - T5K; + Ip[WS(rs, 6)] = T5K + T5M; + Rm[WS(rs, 6)] = T5y + T5I; + Rp[WS(rs, 6)] = T5y - T5I; + Im[WS(rs, 14)] = T62 - T60; + Ip[WS(rs, 14)] = T60 + T62; + Rm[WS(rs, 14)] = T5S + T5Y; + Rp[WS(rs, 14)] = T5S - T5Y; + T6G = T6E + T6F; + T7L = T6F - T6E; + { + E T6e, T6t, T7u, T6u; + T7K = FNMS(KP707106781, T6d, T66); + T6e = FMA(KP707106781, T6d, T66); + T6t = T6l + T6s; + T7Q = T6l - T6s; + T63 = W[2]; + T7x = FNMS(KP923879532, T6G, T6D); + T6H = FMA(KP923879532, T6G, T6D); + T7u = FNMS(KP923879532, T6t, T6e); + T6u = FMA(KP923879532, T6t, T6e); + T6w = W[3]; + T7t = W[34]; + T7w = W[35]; + T6v = T63 * T6u; + T7p = T6w * T6u; + T7v = T7t * T7u; + T7F = T7w * T7u; + } + { + E T6U, T79, T7j, T7m; + T7U = FNMS(KP923879532, T6T, T6M); + T6U = FMA(KP923879532, T6T, T6M); + T79 = T71 - T78; + T80 = T71 + T78; + T7Z = FMA(KP923879532, T7i, T7f); + T7j = FNMS(KP923879532, T7i, T7f); + T7m = T7k + T7l; + T7V = T7k - T7l; + T6J = W[4]; + T7A = FNMS(KP831469612, T79, T6U); + T7a = FMA(KP831469612, T79, T6U); + T7D = FNMS(KP831469612, T7m, T7j); + T7n = FMA(KP831469612, T7m, T7j); + T7z = W[36]; + T7b = T6J * T7a; + } + } + T6I = FNMS(T6w, T6H, T6v); + T7r = T6J * T7n; + T7H = T7z * T7D; + T7B = T7z * T7A; + T7q = FMA(T63, T6H, T7p); + T7c = W[5]; + T7y = FNMS(T7w, T7x, T7v); + T7G = FMA(T7t, T7x, T7F); + T7C = W[37]; + T7s = FNMS(T7c, T7a, T7r); + T7o = FMA(T7c, T7n, T7b); + } + { + E T8n, T9d, T8H, T8A, T99, T9c, T8z, T95, T9b, T9l, T8J, T8U, T93, T9f, T8V; + E T97, T8W; + { + E T8D, T8G, T7I, T7E; + T9v = FNMS(KP707106781, T8C, T8B); + T8D = FMA(KP707106781, T8C, T8B); + T7I = FNMS(T7C, T7A, T7H); + T7E = FMA(T7C, T7D, T7B); + Im[WS(rs, 1)] = T7s - T7q; + Ip[WS(rs, 1)] = T7q + T7s; + Rm[WS(rs, 1)] = T6I + T7o; + Rp[WS(rs, 1)] = T6I - T7o; + Im[WS(rs, 9)] = T7I - T7G; + Ip[WS(rs, 9)] = T7G + T7I; + Rm[WS(rs, 9)] = T7y + T7E; + Rp[WS(rs, 9)] = T7y - T7E; + T8G = T8E - T8F; + T9r = T8E + T8F; + { + E T8q, T8x, T9a, T8y; + T9q = FNMS(KP707106781, T8p, T8o); + T8q = FMA(KP707106781, T8p, T8o); + T8x = T8t - T8w; + T9w = T8w + T8t; + T8n = W[10]; + T9d = FNMS(KP923879532, T8G, T8D); + T8H = FMA(KP923879532, T8G, T8D); + T9a = FNMS(KP923879532, T8x, T8q); + T8y = FMA(KP923879532, T8x, T8q); + T8A = W[11]; + T99 = W[42]; + T9c = W[43]; + T8z = T8n * T8y; + T95 = T8A * T8y; + T9b = T99 * T9a; + T9l = T9c * T9a; + } + { + E T8M, T8T, T8Z, T92; + T9A = FNMS(KP923879532, T8L, T8K); + T8M = FMA(KP923879532, T8L, T8K); + T8T = T8P - T8S; + T9G = T8P + T8S; + T9F = FMA(KP923879532, T8Y, T8X); + T8Z = FNMS(KP923879532, T8Y, T8X); + T92 = T90 + T91; + T9B = T91 - T90; + T8J = W[12]; + T9g = FNMS(KP980785280, T8T, T8M); + T8U = FMA(KP980785280, T8T, T8M); + T9j = FMA(KP980785280, T92, T8Z); + T93 = FNMS(KP980785280, T92, T8Z); + T9f = W[44]; + T8V = T8J * T8U; + } + } + T8I = FNMS(T8A, T8H, T8z); + T97 = T8J * T93; + T9n = T9f * T9j; + T9h = T9f * T9g; + T96 = FMA(T8n, T8H, T95); + T8W = W[13]; + T9e = FNMS(T9c, T9d, T9b); + T9m = FMA(T99, T9d, T9l); + T9i = W[45]; + T98 = FNMS(T8W, T8U, T97); + T94 = FMA(T8W, T93, T8V); + } + } + { + E T9U, T9X, T9y, Ta1, T9V, T9K, T9S, Ta0, T9W, T9M, T9I; + { + E T9p, T9R, T9x, T9u, T9N, T9Q, T9t, T9J, T9P, T9Z, T9z, T9C, T9H, T9T, T9D; + E T9L, T9E; + { + E T9o, T9k, T9O, T9s; + T9o = FNMS(T9i, T9g, T9n); + T9k = FMA(T9i, T9j, T9h); + Im[WS(rs, 3)] = T98 - T96; + Ip[WS(rs, 3)] = T96 + T98; + Rm[WS(rs, 3)] = T8I + T94; + Rp[WS(rs, 3)] = T8I - T94; + Im[WS(rs, 11)] = T9o - T9m; + Ip[WS(rs, 11)] = T9m + T9o; + Rm[WS(rs, 11)] = T9e + T9k; + Rp[WS(rs, 11)] = T9e - T9k; + T9p = W[26]; + T9R = FMA(KP923879532, T9w, T9v); + T9x = FNMS(KP923879532, T9w, T9v); + T9O = FMA(KP923879532, T9r, T9q); + T9s = FNMS(KP923879532, T9r, T9q); + T9u = W[27]; + T9N = W[58]; + T9Q = W[59]; + T9t = T9p * T9s; + T9J = T9u * T9s; + T9P = T9N * T9O; + T9Z = T9Q * T9O; + T9z = W[28]; + T9U = FNMS(KP980785280, T9B, T9A); + T9C = FMA(KP980785280, T9B, T9A); + T9X = FMA(KP980785280, T9G, T9F); + T9H = FNMS(KP980785280, T9G, T9F); + T9T = W[60]; + T9D = T9z * T9C; + } + T9y = FNMS(T9u, T9x, T9t); + T9L = T9z * T9H; + Ta1 = T9T * T9X; + T9V = T9T * T9U; + T9K = FMA(T9p, T9x, T9J); + T9E = W[29]; + T9S = FNMS(T9Q, T9R, T9P); + Ta0 = FMA(T9N, T9R, T9Z); + T9W = W[61]; + T9M = FNMS(T9E, T9C, T9L); + T9I = FMA(T9E, T9H, T9D); + } + { + E T7J, T8b, T7R, T7O, T87, T8a, T7N, T83, T89, T8j, T7T, T7W, T81, T8d, T7X; + E T85, T7Y; + { + E Ta2, T9Y, T88, T7M; + Ta2 = FNMS(T9W, T9U, Ta1); + T9Y = FMA(T9W, T9X, T9V); + Im[WS(rs, 7)] = T9M - T9K; + Ip[WS(rs, 7)] = T9K + T9M; + Rm[WS(rs, 7)] = T9y + T9I; + Rp[WS(rs, 7)] = T9y - T9I; + Im[WS(rs, 15)] = Ta2 - Ta0; + Ip[WS(rs, 15)] = Ta0 + Ta2; + Rm[WS(rs, 15)] = T9S + T9Y; + Rp[WS(rs, 15)] = T9S - T9Y; + T7J = W[18]; + T8b = FNMS(KP923879532, T7Q, T7P); + T7R = FMA(KP923879532, T7Q, T7P); + T88 = FNMS(KP923879532, T7L, T7K); + T7M = FMA(KP923879532, T7L, T7K); + T7O = W[19]; + T87 = W[50]; + T8a = W[51]; + T7N = T7J * T7M; + T83 = T7O * T7M; + T89 = T87 * T88; + T8j = T8a * T88; + T7T = W[20]; + T8e = FNMS(KP831469612, T7V, T7U); + T7W = FMA(KP831469612, T7V, T7U); + T8h = FMA(KP831469612, T80, T7Z); + T81 = FNMS(KP831469612, T80, T7Z); + T8d = W[52]; + T7X = T7T * T7W; + } + T7S = FNMS(T7O, T7R, T7N); + T85 = T7T * T81; + T8l = T8d * T8h; + T8f = T8d * T8e; + T84 = FMA(T7J, T7R, T83); + T7Y = W[21]; + T8c = FNMS(T8a, T8b, T89); + T8k = FMA(T87, T8b, T8j); + T8g = W[53]; + T86 = FNMS(T7Y, T7W, T85); + T82 = FMA(T7Y, T81, T7X); + } + } + } + } + } + T8m = FNMS(T8g, T8e, T8l); + T8i = FMA(T8g, T8h, T8f); + Im[WS(rs, 5)] = T86 - T84; + Ip[WS(rs, 5)] = T84 + T86; + Rm[WS(rs, 5)] = T7S + T82; + Rp[WS(rs, 5)] = T7S - T82; + Im[WS(rs, 13)] = T8m - T8k; + Ip[WS(rs, 13)] = T8k + T8m; + Rm[WS(rs, 13)] = T8c + T8i; + Rp[WS(rs, 13)] = T8c - T8i; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 32}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 32, "hc2cbdft2_32", twinstr, &GENUS, {300, 62, 198, 0} }; + +void X(codelet_hc2cbdft2_32) (planner *p) { + X(khc2c_register) (p, hc2cbdft2_32, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hc2cbdft2_32 -include hc2cb.h */ + +/* + * This function contains 498 FP additions, 208 FP multiplications, + * (or, 404 additions, 114 multiplications, 94 fused multiply/add), + * 102 stack variables, 7 constants, and 128 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) { + E Tf, T4a, T6h, T7Z, T6P, T8e, T1j, T4v, T2R, T4L, T5C, T7E, T6a, T7U, T3n; + E T4q, TZ, T38, T2p, T4B, T7M, T7R, T2y, T4C, T5Y, T63, T6C, T86, T4i, T4n; + E T6z, T85, TK, T31, T1Y, T4y, T7J, T7Q, T27, T4z, T5R, T62, T6v, T83, T4f; + E T4m, T6s, T82, Tu, T4p, T6o, T8f, T6M, T80, T1G, T4K, T2I, T4w, T5J, T7T; + E T67, T7F, T3g, T4b; + { + E T3, T2M, T16, T3k, T6, T13, T2P, T3l, Td, T3i, T1h, T2K, Ta, T3h, T1c; + E T2J; + { + E T1, T2, T2N, T2O; + T1 = Rp[0]; + T2 = Rm[WS(rs, 15)]; + T3 = T1 + T2; + T2M = T1 - T2; + { + E T14, T15, T4, T5; + T14 = Ip[0]; + T15 = Im[WS(rs, 15)]; + T16 = T14 + T15; + T3k = T14 - T15; + T4 = Rp[WS(rs, 8)]; + T5 = Rm[WS(rs, 7)]; + T6 = T4 + T5; + T13 = T4 - T5; + } + T2N = Ip[WS(rs, 8)]; + T2O = Im[WS(rs, 7)]; + T2P = T2N + T2O; + T3l = T2N - T2O; + { + E Tb, Tc, T1d, T1e, T1f, T1g; + Tb = Rm[WS(rs, 3)]; + Tc = Rp[WS(rs, 12)]; + T1d = Tb - Tc; + T1e = Im[WS(rs, 3)]; + T1f = Ip[WS(rs, 12)]; + T1g = T1e + T1f; + Td = Tb + Tc; + T3i = T1f - T1e; + T1h = T1d + T1g; + T2K = T1d - T1g; + } + { + E T8, T9, T18, T19, T1a, T1b; + T8 = Rp[WS(rs, 4)]; + T9 = Rm[WS(rs, 11)]; + T18 = T8 - T9; + T19 = Ip[WS(rs, 4)]; + T1a = Im[WS(rs, 11)]; + T1b = T19 + T1a; + Ta = T8 + T9; + T3h = T19 - T1a; + T1c = T18 + T1b; + T2J = T18 - T1b; + } + } + { + E T7, Te, T6f, T6g; + T7 = T3 + T6; + Te = Ta + Td; + Tf = T7 + Te; + T4a = T7 - Te; + T6f = T16 - T13; + T6g = KP707106781 * (T2J - T2K); + T6h = T6f + T6g; + T7Z = T6f - T6g; + } + { + E T6N, T6O, T17, T1i; + T6N = T2M + T2P; + T6O = KP707106781 * (T1c + T1h); + T6P = T6N - T6O; + T8e = T6O + T6N; + T17 = T13 + T16; + T1i = KP707106781 * (T1c - T1h); + T1j = T17 + T1i; + T4v = T17 - T1i; + } + { + E T2L, T2Q, T5A, T5B; + T2L = KP707106781 * (T2J + T2K); + T2Q = T2M - T2P; + T2R = T2L + T2Q; + T4L = T2Q - T2L; + T5A = T3 - T6; + T5B = T3i - T3h; + T5C = T5A + T5B; + T7E = T5A - T5B; + } + { + E T68, T69, T3j, T3m; + T68 = Ta - Td; + T69 = T3k - T3l; + T6a = T68 + T69; + T7U = T69 - T68; + T3j = T3h + T3i; + T3m = T3k + T3l; + T3n = T3j + T3m; + T4q = T3m - T3j; + } + } + { + E TR, T5S, T29, T2t, T2c, T5W, T2w, T37, TY, T5T, T5V, T2i, T2n, T2r, T34; + E T2q, T6A, T6B; + { + E TL, TM, TN, TO, TP, TQ; + TL = Rm[0]; + TM = Rp[WS(rs, 15)]; + TN = TL + TM; + TO = Rp[WS(rs, 7)]; + TP = Rm[WS(rs, 8)]; + TQ = TO + TP; + TR = TN + TQ; + T5S = TN - TQ; + T29 = TO - TP; + T2t = TL - TM; + } + { + E T2a, T2b, T35, T2u, T2v, T36; + T2a = Im[0]; + T2b = Ip[WS(rs, 15)]; + T35 = T2b - T2a; + T2u = Ip[WS(rs, 7)]; + T2v = Im[WS(rs, 8)]; + T36 = T2u - T2v; + T2c = T2a + T2b; + T5W = T35 - T36; + T2w = T2u + T2v; + T37 = T35 + T36; + } + { + E TU, T2e, T2h, T32, TX, T2j, T2m, T33; + { + E TS, TT, T2f, T2g; + TS = Rp[WS(rs, 3)]; + TT = Rm[WS(rs, 12)]; + TU = TS + TT; + T2e = TS - TT; + T2f = Ip[WS(rs, 3)]; + T2g = Im[WS(rs, 12)]; + T2h = T2f + T2g; + T32 = T2f - T2g; + } + { + E TV, TW, T2k, T2l; + TV = Rm[WS(rs, 4)]; + TW = Rp[WS(rs, 11)]; + TX = TV + TW; + T2j = TV - TW; + T2k = Im[WS(rs, 4)]; + T2l = Ip[WS(rs, 11)]; + T2m = T2k + T2l; + T33 = T2l - T2k; + } + TY = TU + TX; + T5T = T33 - T32; + T5V = TU - TX; + T2i = T2e + T2h; + T2n = T2j + T2m; + T2r = T2j - T2m; + T34 = T32 + T33; + T2q = T2e - T2h; + } + TZ = TR + TY; + T38 = T34 + T37; + { + E T2d, T2o, T7K, T7L; + T2d = T29 - T2c; + T2o = KP707106781 * (T2i - T2n); + T2p = T2d + T2o; + T4B = T2d - T2o; + T7K = T5S - T5T; + T7L = T5W - T5V; + T7M = FMA(KP382683432, T7K, KP923879532 * T7L); + T7R = FNMS(KP923879532, T7K, KP382683432 * T7L); + } + { + E T2s, T2x, T5U, T5X; + T2s = KP707106781 * (T2q + T2r); + T2x = T2t - T2w; + T2y = T2s + T2x; + T4C = T2x - T2s; + T5U = T5S + T5T; + T5X = T5V + T5W; + T5Y = FMA(KP923879532, T5U, KP382683432 * T5X); + T63 = FNMS(KP382683432, T5U, KP923879532 * T5X); + } + T6A = T2t + T2w; + T6B = KP707106781 * (T2i + T2n); + T6C = T6A - T6B; + T86 = T6B + T6A; + { + E T4g, T4h, T6x, T6y; + T4g = TR - TY; + T4h = T37 - T34; + T4i = T4g + T4h; + T4n = T4h - T4g; + T6x = KP707106781 * (T2q - T2r); + T6y = T29 + T2c; + T6z = T6x - T6y; + T85 = T6y + T6x; + } + } + { + E TC, T5L, T1I, T22, T1L, T5P, T25, T30, TJ, T5M, T5O, T1R, T1W, T20, T2X; + E T1Z, T6t, T6u; + { + E Tw, Tx, Ty, Tz, TA, TB; + Tw = Rp[WS(rs, 1)]; + Tx = Rm[WS(rs, 14)]; + Ty = Tw + Tx; + Tz = Rp[WS(rs, 9)]; + TA = Rm[WS(rs, 6)]; + TB = Tz + TA; + TC = Ty + TB; + T5L = Ty - TB; + T1I = Tz - TA; + T22 = Tw - Tx; + } + { + E T1J, T1K, T2Y, T23, T24, T2Z; + T1J = Ip[WS(rs, 1)]; + T1K = Im[WS(rs, 14)]; + T2Y = T1J - T1K; + T23 = Ip[WS(rs, 9)]; + T24 = Im[WS(rs, 6)]; + T2Z = T23 - T24; + T1L = T1J + T1K; + T5P = T2Y - T2Z; + T25 = T23 + T24; + T30 = T2Y + T2Z; + } + { + E TF, T1N, T1Q, T2V, TI, T1S, T1V, T2W; + { + E TD, TE, T1O, T1P; + TD = Rp[WS(rs, 5)]; + TE = Rm[WS(rs, 10)]; + TF = TD + TE; + T1N = TD - TE; + T1O = Ip[WS(rs, 5)]; + T1P = Im[WS(rs, 10)]; + T1Q = T1O + T1P; + T2V = T1O - T1P; + } + { + E TG, TH, T1T, T1U; + TG = Rm[WS(rs, 2)]; + TH = Rp[WS(rs, 13)]; + TI = TG + TH; + T1S = TG - TH; + T1T = Im[WS(rs, 2)]; + T1U = Ip[WS(rs, 13)]; + T1V = T1T + T1U; + T2W = T1U - T1T; + } + TJ = TF + TI; + T5M = T2W - T2V; + T5O = TF - TI; + T1R = T1N + T1Q; + T1W = T1S + T1V; + T20 = T1S - T1V; + T2X = T2V + T2W; + T1Z = T1N - T1Q; + } + TK = TC + TJ; + T31 = T2X + T30; + { + E T1M, T1X, T7H, T7I; + T1M = T1I + T1L; + T1X = KP707106781 * (T1R - T1W); + T1Y = T1M + T1X; + T4y = T1M - T1X; + T7H = T5L - T5M; + T7I = T5P - T5O; + T7J = FNMS(KP923879532, T7I, KP382683432 * T7H); + T7Q = FMA(KP923879532, T7H, KP382683432 * T7I); + } + { + E T21, T26, T5N, T5Q; + T21 = KP707106781 * (T1Z + T20); + T26 = T22 - T25; + T27 = T21 + T26; + T4z = T26 - T21; + T5N = T5L + T5M; + T5Q = T5O + T5P; + T5R = FNMS(KP382683432, T5Q, KP923879532 * T5N); + T62 = FMA(KP382683432, T5N, KP923879532 * T5Q); + } + T6t = T22 + T25; + T6u = KP707106781 * (T1R + T1W); + T6v = T6t - T6u; + T83 = T6u + T6t; + { + E T4d, T4e, T6q, T6r; + T4d = TC - TJ; + T4e = T30 - T2X; + T4f = T4d - T4e; + T4m = T4d + T4e; + T6q = T1L - T1I; + T6r = KP707106781 * (T1Z - T20); + T6s = T6q + T6r; + T82 = T6q - T6r; + } + } + { + E Ti, T3a, Tl, T3b, T1o, T1t, T6j, T6i, T5E, T5D, Tp, T3d, Ts, T3e, T1z; + E T1E, T6m, T6l, T5H, T5G; + { + E T1p, T1n, T1k, T1s; + { + E Tg, Th, T1l, T1m; + Tg = Rp[WS(rs, 2)]; + Th = Rm[WS(rs, 13)]; + Ti = Tg + Th; + T1p = Tg - Th; + T1l = Ip[WS(rs, 2)]; + T1m = Im[WS(rs, 13)]; + T1n = T1l + T1m; + T3a = T1l - T1m; + } + { + E Tj, Tk, T1q, T1r; + Tj = Rp[WS(rs, 10)]; + Tk = Rm[WS(rs, 5)]; + Tl = Tj + Tk; + T1k = Tj - Tk; + T1q = Ip[WS(rs, 10)]; + T1r = Im[WS(rs, 5)]; + T1s = T1q + T1r; + T3b = T1q - T1r; + } + T1o = T1k + T1n; + T1t = T1p - T1s; + T6j = T1p + T1s; + T6i = T1n - T1k; + T5E = T3a - T3b; + T5D = Ti - Tl; + } + { + E T1A, T1y, T1v, T1D; + { + E Tn, To, T1w, T1x; + Tn = Rm[WS(rs, 1)]; + To = Rp[WS(rs, 14)]; + Tp = Tn + To; + T1A = Tn - To; + T1w = Im[WS(rs, 1)]; + T1x = Ip[WS(rs, 14)]; + T1y = T1w + T1x; + T3d = T1x - T1w; + } + { + E Tq, Tr, T1B, T1C; + Tq = Rp[WS(rs, 6)]; + Tr = Rm[WS(rs, 9)]; + Ts = Tq + Tr; + T1v = Tq - Tr; + T1B = Ip[WS(rs, 6)]; + T1C = Im[WS(rs, 9)]; + T1D = T1B + T1C; + T3e = T1B - T1C; + } + T1z = T1v - T1y; + T1E = T1A - T1D; + T6m = T1A + T1D; + T6l = T1v + T1y; + T5H = T3d - T3e; + T5G = Tp - Ts; + } + { + E Tm, Tt, T6k, T6n; + Tm = Ti + Tl; + Tt = Tp + Ts; + Tu = Tm + Tt; + T4p = Tm - Tt; + T6k = FMA(KP382683432, T6i, KP923879532 * T6j); + T6n = FMA(KP382683432, T6l, KP923879532 * T6m); + T6o = T6k - T6n; + T8f = T6k + T6n; + } + { + E T6K, T6L, T1u, T1F; + T6K = FNMS(KP923879532, T6i, KP382683432 * T6j); + T6L = FNMS(KP923879532, T6l, KP382683432 * T6m); + T6M = T6K + T6L; + T80 = T6K - T6L; + T1u = FMA(KP923879532, T1o, KP382683432 * T1t); + T1F = FNMS(KP382683432, T1E, KP923879532 * T1z); + T1G = T1u + T1F; + T4K = T1F - T1u; + } + { + E T2G, T2H, T5F, T5I; + T2G = FNMS(KP382683432, T1o, KP923879532 * T1t); + T2H = FMA(KP382683432, T1z, KP923879532 * T1E); + T2I = T2G + T2H; + T4w = T2G - T2H; + T5F = T5D - T5E; + T5I = T5G + T5H; + T5J = KP707106781 * (T5F + T5I); + T7T = KP707106781 * (T5F - T5I); + } + { + E T65, T66, T3c, T3f; + T65 = T5D + T5E; + T66 = T5H - T5G; + T67 = KP707106781 * (T65 + T66); + T7F = KP707106781 * (T66 - T65); + T3c = T3a + T3b; + T3f = T3d + T3e; + T3g = T3c + T3f; + T4b = T3f - T3c; + } + } + { + E T11, T3s, T3p, T3u, T3K, T40, T3G, T3Y, T2T, T43, T3z, T3P, T2B, T45, T3x; + E T3T; + { + E Tv, T10, T3E, T3F; + Tv = Tf + Tu; + T10 = TK + TZ; + T11 = Tv + T10; + T3s = Tv - T10; + { + E T39, T3o, T3I, T3J; + T39 = T31 + T38; + T3o = T3g + T3n; + T3p = T39 + T3o; + T3u = T3o - T39; + T3I = TK - TZ; + T3J = T3n - T3g; + T3K = T3I + T3J; + T40 = T3J - T3I; + } + T3E = Tf - Tu; + T3F = T38 - T31; + T3G = T3E + T3F; + T3Y = T3E - T3F; + { + E T2S, T3N, T2F, T3O, T2D, T2E; + T2S = T2I + T2R; + T3N = T1j - T1G; + T2D = FNMS(KP195090322, T1Y, KP980785280 * T27); + T2E = FMA(KP195090322, T2p, KP980785280 * T2y); + T2F = T2D + T2E; + T3O = T2D - T2E; + T2T = T2F + T2S; + T43 = T3N - T3O; + T3z = T2S - T2F; + T3P = T3N + T3O; + } + { + E T1H, T3S, T2A, T3R, T28, T2z; + T1H = T1j + T1G; + T3S = T2R - T2I; + T28 = FMA(KP980785280, T1Y, KP195090322 * T27); + T2z = FNMS(KP195090322, T2y, KP980785280 * T2p); + T2A = T28 + T2z; + T3R = T2z - T28; + T2B = T1H + T2A; + T45 = T3S - T3R; + T3x = T1H - T2A; + T3T = T3R + T3S; + } + } + { + E T2U, T3q, T12, T2C; + T12 = W[0]; + T2C = W[1]; + T2U = FMA(T12, T2B, T2C * T2T); + T3q = FNMS(T2C, T2B, T12 * T2T); + Rp[0] = T11 - T2U; + Ip[0] = T3p + T3q; + Rm[0] = T11 + T2U; + Im[0] = T3q - T3p; + } + { + E T41, T47, T46, T48; + { + E T3X, T3Z, T42, T44; + T3X = W[46]; + T3Z = W[47]; + T41 = FNMS(T3Z, T40, T3X * T3Y); + T47 = FMA(T3Z, T3Y, T3X * T40); + T42 = W[48]; + T44 = W[49]; + T46 = FMA(T42, T43, T44 * T45); + T48 = FNMS(T44, T43, T42 * T45); + } + Rp[WS(rs, 12)] = T41 - T46; + Ip[WS(rs, 12)] = T47 + T48; + Rm[WS(rs, 12)] = T41 + T46; + Im[WS(rs, 12)] = T48 - T47; + } + { + E T3v, T3B, T3A, T3C; + { + E T3r, T3t, T3w, T3y; + T3r = W[30]; + T3t = W[31]; + T3v = FNMS(T3t, T3u, T3r * T3s); + T3B = FMA(T3t, T3s, T3r * T3u); + T3w = W[32]; + T3y = W[33]; + T3A = FMA(T3w, T3x, T3y * T3z); + T3C = FNMS(T3y, T3x, T3w * T3z); + } + Rp[WS(rs, 8)] = T3v - T3A; + Ip[WS(rs, 8)] = T3B + T3C; + Rm[WS(rs, 8)] = T3v + T3A; + Im[WS(rs, 8)] = T3C - T3B; + } + { + E T3L, T3V, T3U, T3W; + { + E T3D, T3H, T3M, T3Q; + T3D = W[14]; + T3H = W[15]; + T3L = FNMS(T3H, T3K, T3D * T3G); + T3V = FMA(T3H, T3G, T3D * T3K); + T3M = W[16]; + T3Q = W[17]; + T3U = FMA(T3M, T3P, T3Q * T3T); + T3W = FNMS(T3Q, T3P, T3M * T3T); + } + Rp[WS(rs, 4)] = T3L - T3U; + Ip[WS(rs, 4)] = T3V + T3W; + Rm[WS(rs, 4)] = T3L + T3U; + Im[WS(rs, 4)] = T3W - T3V; + } + } + { + E T7O, T8m, T7W, T8o, T8E, T8U, T8A, T8S, T8h, T8X, T8t, T8J, T89, T8Z, T8r; + E T8N; + { + E T7G, T7N, T8y, T8z; + T7G = T7E + T7F; + T7N = T7J + T7M; + T7O = T7G + T7N; + T8m = T7G - T7N; + { + E T7S, T7V, T8C, T8D; + T7S = T7Q + T7R; + T7V = T7T + T7U; + T7W = T7S + T7V; + T8o = T7V - T7S; + T8C = T7J - T7M; + T8D = T7U - T7T; + T8E = T8C + T8D; + T8U = T8D - T8C; + } + T8y = T7E - T7F; + T8z = T7R - T7Q; + T8A = T8y + T8z; + T8S = T8y - T8z; + { + E T8g, T8H, T8d, T8I, T8b, T8c; + T8g = T8e - T8f; + T8H = T7Z - T80; + T8b = FNMS(KP980785280, T82, KP195090322 * T83); + T8c = FNMS(KP980785280, T85, KP195090322 * T86); + T8d = T8b + T8c; + T8I = T8b - T8c; + T8h = T8d + T8g; + T8X = T8H - T8I; + T8t = T8g - T8d; + T8J = T8H + T8I; + } + { + E T81, T8L, T88, T8M, T84, T87; + T81 = T7Z + T80; + T8L = T8f + T8e; + T84 = FMA(KP195090322, T82, KP980785280 * T83); + T87 = FMA(KP195090322, T85, KP980785280 * T86); + T88 = T84 - T87; + T8M = T84 + T87; + T89 = T81 + T88; + T8Z = T8M + T8L; + T8r = T81 - T88; + T8N = T8L - T8M; + } + } + { + E T7X, T8j, T8i, T8k; + { + E T7D, T7P, T7Y, T8a; + T7D = W[10]; + T7P = W[11]; + T7X = FNMS(T7P, T7W, T7D * T7O); + T8j = FMA(T7P, T7O, T7D * T7W); + T7Y = W[12]; + T8a = W[13]; + T8i = FMA(T7Y, T89, T8a * T8h); + T8k = FNMS(T8a, T89, T7Y * T8h); + } + Rp[WS(rs, 3)] = T7X - T8i; + Ip[WS(rs, 3)] = T8j + T8k; + Rm[WS(rs, 3)] = T7X + T8i; + Im[WS(rs, 3)] = T8k - T8j; + } + { + E T8V, T91, T90, T92; + { + E T8R, T8T, T8W, T8Y; + T8R = W[58]; + T8T = W[59]; + T8V = FNMS(T8T, T8U, T8R * T8S); + T91 = FMA(T8T, T8S, T8R * T8U); + T8W = W[60]; + T8Y = W[61]; + T90 = FMA(T8W, T8X, T8Y * T8Z); + T92 = FNMS(T8Y, T8X, T8W * T8Z); + } + Rp[WS(rs, 15)] = T8V - T90; + Ip[WS(rs, 15)] = T91 + T92; + Rm[WS(rs, 15)] = T8V + T90; + Im[WS(rs, 15)] = T92 - T91; + } + { + E T8p, T8v, T8u, T8w; + { + E T8l, T8n, T8q, T8s; + T8l = W[42]; + T8n = W[43]; + T8p = FNMS(T8n, T8o, T8l * T8m); + T8v = FMA(T8n, T8m, T8l * T8o); + T8q = W[44]; + T8s = W[45]; + T8u = FMA(T8q, T8r, T8s * T8t); + T8w = FNMS(T8s, T8r, T8q * T8t); + } + Rp[WS(rs, 11)] = T8p - T8u; + Ip[WS(rs, 11)] = T8v + T8w; + Rm[WS(rs, 11)] = T8p + T8u; + Im[WS(rs, 11)] = T8w - T8v; + } + { + E T8F, T8P, T8O, T8Q; + { + E T8x, T8B, T8G, T8K; + T8x = W[26]; + T8B = W[27]; + T8F = FNMS(T8B, T8E, T8x * T8A); + T8P = FMA(T8B, T8A, T8x * T8E); + T8G = W[28]; + T8K = W[29]; + T8O = FMA(T8G, T8J, T8K * T8N); + T8Q = FNMS(T8K, T8J, T8G * T8N); + } + Rp[WS(rs, 7)] = T8F - T8O; + Ip[WS(rs, 7)] = T8P + T8Q; + Rm[WS(rs, 7)] = T8F + T8O; + Im[WS(rs, 7)] = T8Q - T8P; + } + } + { + E T4k, T4S, T4s, T4U, T5a, T5q, T56, T5o, T4N, T5t, T4Z, T5f, T4F, T5v, T4X; + E T5j; + { + E T4c, T4j, T54, T55; + T4c = T4a + T4b; + T4j = KP707106781 * (T4f + T4i); + T4k = T4c + T4j; + T4S = T4c - T4j; + { + E T4o, T4r, T58, T59; + T4o = KP707106781 * (T4m + T4n); + T4r = T4p + T4q; + T4s = T4o + T4r; + T4U = T4r - T4o; + T58 = KP707106781 * (T4f - T4i); + T59 = T4q - T4p; + T5a = T58 + T59; + T5q = T59 - T58; + } + T54 = T4a - T4b; + T55 = KP707106781 * (T4n - T4m); + T56 = T54 + T55; + T5o = T54 - T55; + { + E T4M, T5d, T4J, T5e, T4H, T4I; + T4M = T4K + T4L; + T5d = T4v - T4w; + T4H = FNMS(KP831469612, T4y, KP555570233 * T4z); + T4I = FMA(KP831469612, T4B, KP555570233 * T4C); + T4J = T4H + T4I; + T5e = T4H - T4I; + T4N = T4J + T4M; + T5t = T5d - T5e; + T4Z = T4M - T4J; + T5f = T5d + T5e; + } + { + E T4x, T5i, T4E, T5h, T4A, T4D; + T4x = T4v + T4w; + T5i = T4L - T4K; + T4A = FMA(KP555570233, T4y, KP831469612 * T4z); + T4D = FNMS(KP831469612, T4C, KP555570233 * T4B); + T4E = T4A + T4D; + T5h = T4D - T4A; + T4F = T4x + T4E; + T5v = T5i - T5h; + T4X = T4x - T4E; + T5j = T5h + T5i; + } + } + { + E T4t, T4P, T4O, T4Q; + { + E T49, T4l, T4u, T4G; + T49 = W[6]; + T4l = W[7]; + T4t = FNMS(T4l, T4s, T49 * T4k); + T4P = FMA(T4l, T4k, T49 * T4s); + T4u = W[8]; + T4G = W[9]; + T4O = FMA(T4u, T4F, T4G * T4N); + T4Q = FNMS(T4G, T4F, T4u * T4N); + } + Rp[WS(rs, 2)] = T4t - T4O; + Ip[WS(rs, 2)] = T4P + T4Q; + Rm[WS(rs, 2)] = T4t + T4O; + Im[WS(rs, 2)] = T4Q - T4P; + } + { + E T5r, T5x, T5w, T5y; + { + E T5n, T5p, T5s, T5u; + T5n = W[54]; + T5p = W[55]; + T5r = FNMS(T5p, T5q, T5n * T5o); + T5x = FMA(T5p, T5o, T5n * T5q); + T5s = W[56]; + T5u = W[57]; + T5w = FMA(T5s, T5t, T5u * T5v); + T5y = FNMS(T5u, T5t, T5s * T5v); + } + Rp[WS(rs, 14)] = T5r - T5w; + Ip[WS(rs, 14)] = T5x + T5y; + Rm[WS(rs, 14)] = T5r + T5w; + Im[WS(rs, 14)] = T5y - T5x; + } + { + E T4V, T51, T50, T52; + { + E T4R, T4T, T4W, T4Y; + T4R = W[38]; + T4T = W[39]; + T4V = FNMS(T4T, T4U, T4R * T4S); + T51 = FMA(T4T, T4S, T4R * T4U); + T4W = W[40]; + T4Y = W[41]; + T50 = FMA(T4W, T4X, T4Y * T4Z); + T52 = FNMS(T4Y, T4X, T4W * T4Z); + } + Rp[WS(rs, 10)] = T4V - T50; + Ip[WS(rs, 10)] = T51 + T52; + Rm[WS(rs, 10)] = T4V + T50; + Im[WS(rs, 10)] = T52 - T51; + } + { + E T5b, T5l, T5k, T5m; + { + E T53, T57, T5c, T5g; + T53 = W[22]; + T57 = W[23]; + T5b = FNMS(T57, T5a, T53 * T56); + T5l = FMA(T57, T56, T53 * T5a); + T5c = W[24]; + T5g = W[25]; + T5k = FMA(T5c, T5f, T5g * T5j); + T5m = FNMS(T5g, T5f, T5c * T5j); + } + Rp[WS(rs, 6)] = T5b - T5k; + Ip[WS(rs, 6)] = T5l + T5m; + Rm[WS(rs, 6)] = T5b + T5k; + Im[WS(rs, 6)] = T5m - T5l; + } + } + { + E T60, T6W, T6c, T6Y, T7e, T7u, T7a, T7s, T6R, T7x, T73, T7j, T6F, T7z, T71; + E T7n; + { + E T5K, T5Z, T78, T79; + T5K = T5C + T5J; + T5Z = T5R + T5Y; + T60 = T5K + T5Z; + T6W = T5K - T5Z; + { + E T64, T6b, T7c, T7d; + T64 = T62 + T63; + T6b = T67 + T6a; + T6c = T64 + T6b; + T6Y = T6b - T64; + T7c = T5R - T5Y; + T7d = T6a - T67; + T7e = T7c + T7d; + T7u = T7d - T7c; + } + T78 = T5C - T5J; + T79 = T63 - T62; + T7a = T78 + T79; + T7s = T78 - T79; + { + E T6Q, T7h, T6J, T7i, T6H, T6I; + T6Q = T6M + T6P; + T7h = T6h - T6o; + T6H = FNMS(KP555570233, T6s, KP831469612 * T6v); + T6I = FMA(KP555570233, T6z, KP831469612 * T6C); + T6J = T6H + T6I; + T7i = T6H - T6I; + T6R = T6J + T6Q; + T7x = T7h - T7i; + T73 = T6Q - T6J; + T7j = T7h + T7i; + } + { + E T6p, T7m, T6E, T7l, T6w, T6D; + T6p = T6h + T6o; + T7m = T6P - T6M; + T6w = FMA(KP831469612, T6s, KP555570233 * T6v); + T6D = FNMS(KP555570233, T6C, KP831469612 * T6z); + T6E = T6w + T6D; + T7l = T6D - T6w; + T6F = T6p + T6E; + T7z = T7m - T7l; + T71 = T6p - T6E; + T7n = T7l + T7m; + } + } + { + E T6d, T6T, T6S, T6U; + { + E T5z, T61, T6e, T6G; + T5z = W[2]; + T61 = W[3]; + T6d = FNMS(T61, T6c, T5z * T60); + T6T = FMA(T61, T60, T5z * T6c); + T6e = W[4]; + T6G = W[5]; + T6S = FMA(T6e, T6F, T6G * T6R); + T6U = FNMS(T6G, T6F, T6e * T6R); + } + Rp[WS(rs, 1)] = T6d - T6S; + Ip[WS(rs, 1)] = T6T + T6U; + Rm[WS(rs, 1)] = T6d + T6S; + Im[WS(rs, 1)] = T6U - T6T; + } + { + E T7v, T7B, T7A, T7C; + { + E T7r, T7t, T7w, T7y; + T7r = W[50]; + T7t = W[51]; + T7v = FNMS(T7t, T7u, T7r * T7s); + T7B = FMA(T7t, T7s, T7r * T7u); + T7w = W[52]; + T7y = W[53]; + T7A = FMA(T7w, T7x, T7y * T7z); + T7C = FNMS(T7y, T7x, T7w * T7z); + } + Rp[WS(rs, 13)] = T7v - T7A; + Ip[WS(rs, 13)] = T7B + T7C; + Rm[WS(rs, 13)] = T7v + T7A; + Im[WS(rs, 13)] = T7C - T7B; + } + { + E T6Z, T75, T74, T76; + { + E T6V, T6X, T70, T72; + T6V = W[34]; + T6X = W[35]; + T6Z = FNMS(T6X, T6Y, T6V * T6W); + T75 = FMA(T6X, T6W, T6V * T6Y); + T70 = W[36]; + T72 = W[37]; + T74 = FMA(T70, T71, T72 * T73); + T76 = FNMS(T72, T71, T70 * T73); + } + Rp[WS(rs, 9)] = T6Z - T74; + Ip[WS(rs, 9)] = T75 + T76; + Rm[WS(rs, 9)] = T6Z + T74; + Im[WS(rs, 9)] = T76 - T75; + } + { + E T7f, T7p, T7o, T7q; + { + E T77, T7b, T7g, T7k; + T77 = W[18]; + T7b = W[19]; + T7f = FNMS(T7b, T7e, T77 * T7a); + T7p = FMA(T7b, T7a, T77 * T7e); + T7g = W[20]; + T7k = W[21]; + T7o = FMA(T7g, T7j, T7k * T7n); + T7q = FNMS(T7k, T7j, T7g * T7n); + } + Rp[WS(rs, 5)] = T7f - T7o; + Ip[WS(rs, 5)] = T7p + T7q; + Rm[WS(rs, 5)] = T7f + T7o; + Im[WS(rs, 5)] = T7q - T7p; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 32}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 32, "hc2cbdft2_32", twinstr, &GENUS, {404, 114, 94, 0} }; + +void X(codelet_hc2cbdft2_32) (planner *p) { + X(khc2c_register) (p, hc2cbdft2_32, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft2_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft2_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:45 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 4 -dif -name hc2cbdft2_4 -include hc2cb.h */ + +/* + * This function contains 30 FP additions, 12 FP multiplications, + * (or, 24 additions, 6 multiplications, 6 fused multiply/add), + * 35 stack variables, 0 constants, and 16 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft2_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { + E Ty, TB, Tw, TE, TA, TF, Tz, TG, TC; + { + E T4, Tg, T3, Tm, Tc, T5, Th, Ti; + { + E T1, T2, Ta, Tb; + T1 = Rp[0]; + T2 = Rm[WS(rs, 1)]; + Ta = Ip[0]; + Tb = Im[WS(rs, 1)]; + T4 = Rp[WS(rs, 1)]; + Tg = T1 - T2; + T3 = T1 + T2; + Tm = Ta - Tb; + Tc = Ta + Tb; + T5 = Rm[0]; + Th = Ip[WS(rs, 1)]; + Ti = Im[0]; + } + { + E T8, Td, T7, Ts, To, Tv, Tk, Te, Tf; + T8 = W[0]; + { + E T9, T6, Tn, Tj; + T9 = T4 - T5; + T6 = T4 + T5; + Tn = Th - Ti; + Tj = Th + Ti; + Ty = Tc - T9; + Td = T9 + Tc; + T7 = T3 + T6; + Ts = T3 - T6; + To = Tm + Tn; + Tv = Tm - Tn; + TB = Tg + Tj; + Tk = Tg - Tj; + Te = T8 * Td; + } + Tf = W[1]; + { + E Tr, Tu, Tt, TD, Tx, Tp, Tl, Tq; + Tr = W[2]; + Tp = T8 * Tk; + Tu = W[3]; + Tl = FMA(Tf, Tk, Te); + Tt = Tr * Ts; + Tq = FNMS(Tf, Td, Tp); + TD = Tu * Ts; + Rm[0] = T7 + Tl; + Rp[0] = T7 - Tl; + Im[0] = Tq - To; + Ip[0] = To + Tq; + Tx = W[4]; + Tw = FNMS(Tu, Tv, Tt); + TE = FMA(Tr, Tv, TD); + TA = W[5]; + TF = Tx * TB; + Tz = Tx * Ty; + } + } + } + TG = FNMS(TA, Ty, TF); + TC = FMA(TA, TB, Tz); + Im[WS(rs, 1)] = TG - TE; + Ip[WS(rs, 1)] = TE + TG; + Rm[WS(rs, 1)] = Tw + TC; + Rp[WS(rs, 1)] = Tw - TC; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 4}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 4, "hc2cbdft2_4", twinstr, &GENUS, {24, 6, 6, 0} }; + +void X(codelet_hc2cbdft2_4) (planner *p) { + X(khc2c_register) (p, hc2cbdft2_4, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 4 -dif -name hc2cbdft2_4 -include hc2cb.h */ + +/* + * This function contains 30 FP additions, 12 FP multiplications, + * (or, 24 additions, 6 multiplications, 6 fused multiply/add), + * 19 stack variables, 0 constants, and 16 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft2_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { + E T3, Tl, T6, Tm, Td, Tj, Tx, Tv, Ts, Tq; + { + E Tf, Tc, T9, Ti; + { + E T1, T2, Ta, Tb; + T1 = Rp[0]; + T2 = Rm[WS(rs, 1)]; + T3 = T1 + T2; + Tf = T1 - T2; + Ta = Ip[0]; + Tb = Im[WS(rs, 1)]; + Tc = Ta + Tb; + Tl = Ta - Tb; + } + { + E T4, T5, Tg, Th; + T4 = Rp[WS(rs, 1)]; + T5 = Rm[0]; + T6 = T4 + T5; + T9 = T4 - T5; + Tg = Ip[WS(rs, 1)]; + Th = Im[0]; + Ti = Tg + Th; + Tm = Tg - Th; + } + Td = T9 + Tc; + Tj = Tf - Ti; + Tx = Tf + Ti; + Tv = Tc - T9; + Ts = Tl - Tm; + Tq = T3 - T6; + } + { + E T7, Tn, Tk, To, T8, Te; + T7 = T3 + T6; + Tn = Tl + Tm; + T8 = W[0]; + Te = W[1]; + Tk = FMA(T8, Td, Te * Tj); + To = FNMS(Te, Td, T8 * Tj); + Rp[0] = T7 - Tk; + Ip[0] = Tn + To; + Rm[0] = T7 + Tk; + Im[0] = To - Tn; + } + { + E Tt, Tz, Ty, TA; + { + E Tp, Tr, Tu, Tw; + Tp = W[2]; + Tr = W[3]; + Tt = FNMS(Tr, Ts, Tp * Tq); + Tz = FMA(Tr, Tq, Tp * Ts); + Tu = W[4]; + Tw = W[5]; + Ty = FMA(Tu, Tv, Tw * Tx); + TA = FNMS(Tw, Tv, Tu * Tx); + } + Rp[WS(rs, 1)] = Tt - Ty; + Ip[WS(rs, 1)] = Tz + TA; + Rm[WS(rs, 1)] = Tt + Ty; + Im[WS(rs, 1)] = TA - Tz; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 4}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 4, "hc2cbdft2_4", twinstr, &GENUS, {24, 6, 6, 0} }; + +void X(codelet_hc2cbdft2_4) (planner *p) { + X(khc2c_register) (p, hc2cbdft2_4, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft2_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft2_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,427 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:45 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 8 -dif -name hc2cbdft2_8 -include hc2cb.h */ + +/* + * This function contains 82 FP additions, 36 FP multiplications, + * (or, 60 additions, 14 multiplications, 22 fused multiply/add), + * 55 stack variables, 1 constants, and 32 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft2_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(32, rs)) { + E T1m, T1r, T1i, T1u, T1o, T1v, T1n, T1w, T1s; + { + E T1k, Tl, T1p, TE, TP, T1g, TM, T1b, T1f, T1a, TU, Tf, T1l, TH, Tw; + E T1q; + { + E TA, T3, TN, Tk, Th, T6, TO, TD, Tb, Tm, Ta, TK, Tp, Tc, Ts; + E Tt; + { + E T4, T5, TB, TC; + { + E T1, T2, Ti, Tj; + T1 = Rp[0]; + T2 = Rm[WS(rs, 3)]; + Ti = Ip[0]; + Tj = Im[WS(rs, 3)]; + T4 = Rp[WS(rs, 2)]; + TA = T1 - T2; + T3 = T1 + T2; + TN = Ti - Tj; + Tk = Ti + Tj; + T5 = Rm[WS(rs, 1)]; + TB = Ip[WS(rs, 2)]; + TC = Im[WS(rs, 1)]; + } + { + E T8, T9, Tn, To; + T8 = Rp[WS(rs, 1)]; + Th = T4 - T5; + T6 = T4 + T5; + TO = TB - TC; + TD = TB + TC; + T9 = Rm[WS(rs, 2)]; + Tn = Ip[WS(rs, 1)]; + To = Im[WS(rs, 2)]; + Tb = Rm[0]; + Tm = T8 - T9; + Ta = T8 + T9; + TK = Tn - To; + Tp = Tn + To; + Tc = Rp[WS(rs, 3)]; + Ts = Im[0]; + Tt = Ip[WS(rs, 3)]; + } + } + { + E Tr, Td, Tu, TL, Te, T7; + T1k = Tk - Th; + Tl = Th + Tk; + Tr = Tb - Tc; + Td = Tb + Tc; + TL = Tt - Ts; + Tu = Ts + Tt; + T1p = TA + TD; + TE = TA - TD; + TP = TN + TO; + T1g = TN - TO; + TM = TK + TL; + T1b = TL - TK; + T1f = Ta - Td; + Te = Ta + Td; + T1a = T3 - T6; + T7 = T3 + T6; + { + E Tq, TF, TG, Tv; + Tq = Tm + Tp; + TF = Tm - Tp; + TG = Tr - Tu; + Tv = Tr + Tu; + TU = T7 - Te; + Tf = T7 + Te; + T1l = TF - TG; + TH = TF + TG; + Tw = Tq - Tv; + T1q = Tq + Tv; + } + } + } + { + E TX, T10, T1c, T13, T1h, T1E, T1H, T1C, T1K, T1G, T1L, T1F; + { + E TQ, Tx, T1y, TI, Tg, Tz; + TX = TP - TM; + TQ = TM + TP; + Tx = FMA(KP707106781, Tw, Tl); + T10 = FNMS(KP707106781, Tw, Tl); + T1c = T1a + T1b; + T1y = T1a - T1b; + T13 = FNMS(KP707106781, TH, TE); + TI = FMA(KP707106781, TH, TE); + Tg = W[0]; + Tz = W[1]; + { + E T1B, T1A, T1x, T1J, T1z, T1D; + { + E TR, Ty, TS, TJ; + T1B = T1g - T1f; + T1h = T1f + T1g; + T1A = W[11]; + TR = Tg * TI; + Ty = Tg * Tx; + T1x = W[10]; + T1J = T1A * T1y; + TS = FNMS(Tz, Tx, TR); + TJ = FMA(Tz, TI, Ty); + T1z = T1x * T1y; + T1m = FMA(KP707106781, T1l, T1k); + T1E = FNMS(KP707106781, T1l, T1k); + Im[0] = TS - TQ; + Ip[0] = TQ + TS; + Rm[0] = Tf + TJ; + Rp[0] = Tf - TJ; + T1H = FMA(KP707106781, T1q, T1p); + T1r = FNMS(KP707106781, T1q, T1p); + T1D = W[12]; + } + T1C = FNMS(T1A, T1B, T1z); + T1K = FMA(T1x, T1B, T1J); + T1G = W[13]; + T1L = T1D * T1H; + T1F = T1D * T1E; + } + } + { + E TY, T16, T12, T17, T11; + { + E TW, TT, T15, TV, TZ, T1M, T1I; + TW = W[7]; + T1M = FNMS(T1G, T1E, T1L); + T1I = FMA(T1G, T1H, T1F); + TT = W[6]; + T15 = TW * TU; + Im[WS(rs, 3)] = T1M - T1K; + Ip[WS(rs, 3)] = T1K + T1M; + Rm[WS(rs, 3)] = T1C + T1I; + Rp[WS(rs, 3)] = T1C - T1I; + TV = TT * TU; + TZ = W[8]; + TY = FNMS(TW, TX, TV); + T16 = FMA(TT, TX, T15); + T12 = W[9]; + T17 = TZ * T13; + T11 = TZ * T10; + } + { + E T1e, T19, T1t, T1d, T1j, T18, T14; + T1e = W[3]; + T18 = FNMS(T12, T10, T17); + T14 = FMA(T12, T13, T11); + T19 = W[2]; + T1t = T1e * T1c; + Im[WS(rs, 2)] = T18 - T16; + Ip[WS(rs, 2)] = T16 + T18; + Rm[WS(rs, 2)] = TY + T14; + Rp[WS(rs, 2)] = TY - T14; + T1d = T19 * T1c; + T1j = W[4]; + T1i = FNMS(T1e, T1h, T1d); + T1u = FMA(T19, T1h, T1t); + T1o = W[5]; + T1v = T1j * T1r; + T1n = T1j * T1m; + } + } + } + } + T1w = FNMS(T1o, T1m, T1v); + T1s = FMA(T1o, T1r, T1n); + Im[WS(rs, 1)] = T1w - T1u; + Ip[WS(rs, 1)] = T1u + T1w; + Rm[WS(rs, 1)] = T1i + T1s; + Rp[WS(rs, 1)] = T1i - T1s; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 8}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 8, "hc2cbdft2_8", twinstr, &GENUS, {60, 14, 22, 0} }; + +void X(codelet_hc2cbdft2_8) (planner *p) { + X(khc2c_register) (p, hc2cbdft2_8, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 8 -dif -name hc2cbdft2_8 -include hc2cb.h */ + +/* + * This function contains 82 FP additions, 32 FP multiplications, + * (or, 68 additions, 18 multiplications, 14 fused multiply/add), + * 30 stack variables, 1 constants, and 32 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft2_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(32, rs)) { + E T7, T1d, T1h, Tl, TG, T14, T19, TO, Te, TL, T18, T15, TB, T1e, Tw; + E T1i; + { + E T3, TC, Tk, TM, T6, Th, TF, TN; + { + E T1, T2, Ti, Tj; + T1 = Rp[0]; + T2 = Rm[WS(rs, 3)]; + T3 = T1 + T2; + TC = T1 - T2; + Ti = Ip[0]; + Tj = Im[WS(rs, 3)]; + Tk = Ti + Tj; + TM = Ti - Tj; + } + { + E T4, T5, TD, TE; + T4 = Rp[WS(rs, 2)]; + T5 = Rm[WS(rs, 1)]; + T6 = T4 + T5; + Th = T4 - T5; + TD = Ip[WS(rs, 2)]; + TE = Im[WS(rs, 1)]; + TF = TD + TE; + TN = TD - TE; + } + T7 = T3 + T6; + T1d = Tk - Th; + T1h = TC + TF; + Tl = Th + Tk; + TG = TC - TF; + T14 = T3 - T6; + T19 = TM - TN; + TO = TM + TN; + } + { + E Ta, Tm, Tp, TJ, Td, Tr, Tu, TK; + { + E T8, T9, Tn, To; + T8 = Rp[WS(rs, 1)]; + T9 = Rm[WS(rs, 2)]; + Ta = T8 + T9; + Tm = T8 - T9; + Tn = Ip[WS(rs, 1)]; + To = Im[WS(rs, 2)]; + Tp = Tn + To; + TJ = Tn - To; + } + { + E Tb, Tc, Ts, Tt; + Tb = Rm[0]; + Tc = Rp[WS(rs, 3)]; + Td = Tb + Tc; + Tr = Tb - Tc; + Ts = Im[0]; + Tt = Ip[WS(rs, 3)]; + Tu = Ts + Tt; + TK = Tt - Ts; + } + Te = Ta + Td; + TL = TJ + TK; + T18 = Ta - Td; + T15 = TK - TJ; + { + E Tz, TA, Tq, Tv; + Tz = Tm - Tp; + TA = Tr - Tu; + TB = KP707106781 * (Tz + TA); + T1e = KP707106781 * (Tz - TA); + Tq = Tm + Tp; + Tv = Tr + Tu; + Tw = KP707106781 * (Tq - Tv); + T1i = KP707106781 * (Tq + Tv); + } + } + { + E Tf, TP, TI, TQ; + Tf = T7 + Te; + TP = TL + TO; + { + E Tx, TH, Tg, Ty; + Tx = Tl + Tw; + TH = TB + TG; + Tg = W[0]; + Ty = W[1]; + TI = FMA(Tg, Tx, Ty * TH); + TQ = FNMS(Ty, Tx, Tg * TH); + } + Rp[0] = Tf - TI; + Ip[0] = TP + TQ; + Rm[0] = Tf + TI; + Im[0] = TQ - TP; + } + { + E T1r, T1x, T1w, T1y; + { + E T1o, T1q, T1n, T1p; + T1o = T14 - T15; + T1q = T19 - T18; + T1n = W[10]; + T1p = W[11]; + T1r = FNMS(T1p, T1q, T1n * T1o); + T1x = FMA(T1p, T1o, T1n * T1q); + } + { + E T1t, T1v, T1s, T1u; + T1t = T1d - T1e; + T1v = T1i + T1h; + T1s = W[12]; + T1u = W[13]; + T1w = FMA(T1s, T1t, T1u * T1v); + T1y = FNMS(T1u, T1t, T1s * T1v); + } + Rp[WS(rs, 3)] = T1r - T1w; + Ip[WS(rs, 3)] = T1x + T1y; + Rm[WS(rs, 3)] = T1r + T1w; + Im[WS(rs, 3)] = T1y - T1x; + } + { + E TV, T11, T10, T12; + { + E TS, TU, TR, TT; + TS = T7 - Te; + TU = TO - TL; + TR = W[6]; + TT = W[7]; + TV = FNMS(TT, TU, TR * TS); + T11 = FMA(TT, TS, TR * TU); + } + { + E TX, TZ, TW, TY; + TX = Tl - Tw; + TZ = TG - TB; + TW = W[8]; + TY = W[9]; + T10 = FMA(TW, TX, TY * TZ); + T12 = FNMS(TY, TX, TW * TZ); + } + Rp[WS(rs, 2)] = TV - T10; + Ip[WS(rs, 2)] = T11 + T12; + Rm[WS(rs, 2)] = TV + T10; + Im[WS(rs, 2)] = T12 - T11; + } + { + E T1b, T1l, T1k, T1m; + { + E T16, T1a, T13, T17; + T16 = T14 + T15; + T1a = T18 + T19; + T13 = W[2]; + T17 = W[3]; + T1b = FNMS(T17, T1a, T13 * T16); + T1l = FMA(T17, T16, T13 * T1a); + } + { + E T1f, T1j, T1c, T1g; + T1f = T1d + T1e; + T1j = T1h - T1i; + T1c = W[4]; + T1g = W[5]; + T1k = FMA(T1c, T1f, T1g * T1j); + T1m = FNMS(T1g, T1f, T1c * T1j); + } + Rp[WS(rs, 1)] = T1b - T1k; + Ip[WS(rs, 1)] = T1l + T1m; + Rm[WS(rs, 1)] = T1b + T1k; + Im[WS(rs, 1)] = T1m - T1l; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 8}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 8, "hc2cbdft2_8", twinstr, &GENUS, {68, 18, 14, 0} }; + +void X(codelet_hc2cbdft2_8) (planner *p) { + X(khc2c_register) (p, hc2cbdft2_8, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,551 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:44 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -dif -name hc2cbdft_10 -include hc2cb.h */ + +/* + * This function contains 122 FP additions, 72 FP multiplications, + * (or, 68 additions, 18 multiplications, 54 fused multiply/add), + * 95 stack variables, 4 constants, and 40 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) { + E T2d, T2f; + { + E T1g, TQ, T1z, TZ, Tu, T23, T1p, T14, Tt, T27, T13, Tj, Tz, T1i, T18; + E TJ, TS, T19, Ty, TA; + { + E Tl, T3, T7, Tm, T6, Tr, TY, T1n, Th, T8, T1, T2; + T1 = Rp[0]; + T2 = Rm[WS(rs, 4)]; + { + E Te, Tp, Td, Tf, Tb, Tc; + Tb = Rp[WS(rs, 4)]; + Tc = Rm[0]; + Te = Rm[WS(rs, 3)]; + Tl = T1 - T2; + T3 = T1 + T2; + Tp = Tb - Tc; + Td = Tb + Tc; + Tf = Rp[WS(rs, 1)]; + { + E T4, T5, Tq, Tg; + T4 = Rp[WS(rs, 2)]; + T5 = Rm[WS(rs, 2)]; + T7 = Rm[WS(rs, 1)]; + Tq = Te - Tf; + Tg = Te + Tf; + Tm = T4 - T5; + T6 = T4 + T5; + Tr = Tp + Tq; + TY = Tp - Tq; + T1n = Td - Tg; + Th = Td + Tg; + T8 = Rp[WS(rs, 3)]; + } + } + { + E TO, Tn, T9, TP; + TO = Ip[0]; + Tn = T7 - T8; + T9 = T7 + T8; + TP = Im[WS(rs, 4)]; + { + E TG, TH, TF, T16, TD, TE, Ti; + TD = Ip[WS(rs, 4)]; + { + E TX, To, T1o, Ta, Ts; + TX = Tm - Tn; + To = Tm + Tn; + T1o = T6 - T9; + Ta = T6 + T9; + T1g = TO - TP; + TQ = TO + TP; + T1z = FNMS(KP618033988, TX, TY); + TZ = FMA(KP618033988, TY, TX); + Ts = To + Tr; + Tu = To - Tr; + T23 = FMA(KP618033988, T1n, T1o); + T1p = FNMS(KP618033988, T1o, T1n); + Ti = Ta + Th; + T14 = Ta - Th; + Tt = FNMS(KP250000000, Ts, Tl); + T27 = Tl + Ts; + TE = Im[0]; + } + T13 = FNMS(KP250000000, Ti, T3); + Tj = T3 + Ti; + TG = Im[WS(rs, 3)]; + TH = Ip[WS(rs, 1)]; + TF = TD + TE; + T16 = TD - TE; + { + E Tw, T17, TI, Tx; + Tw = Ip[WS(rs, 2)]; + T17 = TH - TG; + TI = TG + TH; + Tx = Im[WS(rs, 2)]; + Tz = Im[WS(rs, 1)]; + T1i = T16 + T17; + T18 = T16 - T17; + TJ = TF + TI; + TS = TF - TI; + T19 = Tw - Tx; + Ty = Tw + Tx; + TA = Ip[WS(rs, 3)]; + } + } + } + } + { + E T26, T2y, T2a, T28, T1q, T1K, T24, T2k, T10, T1Q, T1A, T2q, T29, Tk, TN; + E T2c, T1M, T1P, T2w, TM, T1O, T1S, T1s, T1x, T2m, T2p, T1w, T1C, T2o, T2s; + E T12, T1f, T1G, T1J, T1I, T1E, T1e, T1U, T1W, T21, T2g, T2j, T20, T2e, T2i; + E T2u, T1a, TB; + T1a = TA - Tz; + TB = Tz + TA; + { + E T1Y, T1c, T1u, T1t, T1N, TL, TK, Tv, T2n, T1v; + { + E T1l, TV, T1k, TU, T1b, T1h; + T26 = W[9]; + T1b = T19 - T1a; + T1h = T19 + T1a; + { + E TC, TR, T1j, TT; + TC = Ty + TB; + TR = Ty - TB; + T1Y = FMA(KP618033988, T18, T1b); + T1c = FNMS(KP618033988, T1b, T18); + T1j = T1h + T1i; + T1l = T1h - T1i; + T1u = FNMS(KP618033988, TC, TJ); + TK = FMA(KP618033988, TJ, TC); + TT = TR + TS; + TV = TR - TS; + T2y = T1g + T1j; + T1k = FNMS(KP250000000, T1j, T1g); + T2a = TQ + TT; + TU = FNMS(KP250000000, TT, TQ); + T28 = T26 * T27; + } + { + E T22, T1m, T1y, TW; + T22 = FMA(KP559016994, T1l, T1k); + T1m = FNMS(KP559016994, T1l, T1k); + T1y = FNMS(KP559016994, TV, TU); + TW = FMA(KP559016994, TV, TU); + T1q = FNMS(KP951056516, T1p, T1m); + T1K = FMA(KP951056516, T1p, T1m); + T24 = FNMS(KP951056516, T23, T22); + T2k = FMA(KP951056516, T23, T22); + T10 = FMA(KP951056516, TZ, TW); + T1Q = FNMS(KP951056516, TZ, TW); + T1A = FMA(KP951056516, T1z, T1y); + T2q = FNMS(KP951056516, T1z, T1y); + T29 = W[8]; + } + } + Tv = FMA(KP559016994, Tu, Tt); + T1t = FNMS(KP559016994, Tu, Tt); + Tk = W[1]; + TN = W[0]; + T2c = T29 * T27; + T1N = FMA(KP951056516, TK, Tv); + TL = FNMS(KP951056516, TK, Tv); + T1M = W[17]; + T1P = W[16]; + T2w = TN * TL; + TM = Tk * TL; + T1O = T1M * T1N; + T1S = T1P * T1N; + T2n = FMA(KP951056516, T1u, T1t); + T1v = FNMS(KP951056516, T1u, T1t); + T1s = W[5]; + T1x = W[4]; + T2m = W[13]; + T2p = W[12]; + T1w = T1s * T1v; + T1C = T1x * T1v; + T2o = T2m * T2n; + T2s = T2p * T2n; + { + E T1X, T1d, T1H, T15, T2h, T1Z; + T1X = FMA(KP559016994, T14, T13); + T15 = FNMS(KP559016994, T14, T13); + T12 = W[2]; + T1f = W[3]; + T1G = W[14]; + T1d = FMA(KP951056516, T1c, T15); + T1H = FNMS(KP951056516, T1c, T15); + T1J = W[15]; + T1I = T1G * T1H; + T1E = T1f * T1d; + T1e = T12 * T1d; + T1U = T1J * T1H; + T2h = FNMS(KP951056516, T1Y, T1X); + T1Z = FMA(KP951056516, T1Y, T1X); + T1W = W[6]; + T21 = W[7]; + T2g = W[10]; + T2j = W[11]; + T20 = T1W * T1Z; + T2e = T21 * T1Z; + T2i = T2g * T2h; + T2u = T2j * T2h; + } + } + { + E T1D, T1F, T1L, T1R; + { + E T11, T2x, T1r, T1B; + T11 = FMA(TN, T10, TM); + T2x = FNMS(Tk, T10, T2w); + T1r = FNMS(T1f, T1q, T1e); + T1B = FMA(T1x, T1A, T1w); + Rm[0] = Tj + T11; + Rp[0] = Tj - T11; + Ip[0] = T2x + T2y; + Im[0] = T2x - T2y; + Rp[WS(rs, 1)] = T1r - T1B; + Rm[WS(rs, 1)] = T1B + T1r; + T1D = FNMS(T1s, T1A, T1C); + T1F = FMA(T12, T1q, T1E); + T1L = FNMS(T1J, T1K, T1I); + T1R = FMA(T1P, T1Q, T1O); + } + { + E T1T, T1V, T2t, T2v; + T1T = FNMS(T1M, T1Q, T1S); + Ip[WS(rs, 1)] = T1D + T1F; + Im[WS(rs, 1)] = T1D - T1F; + Rm[WS(rs, 4)] = T1R + T1L; + Rp[WS(rs, 4)] = T1L - T1R; + T1V = FMA(T1G, T1K, T1U); + T2t = FNMS(T2m, T2q, T2s); + T2v = FMA(T2g, T2k, T2u); + { + E T2l, T2r, T25, T2b; + T2l = FNMS(T2j, T2k, T2i); + Ip[WS(rs, 4)] = T1T + T1V; + Im[WS(rs, 4)] = T1T - T1V; + Ip[WS(rs, 3)] = T2t + T2v; + Im[WS(rs, 3)] = T2t - T2v; + T2r = FMA(T2p, T2q, T2o); + T25 = FNMS(T21, T24, T20); + T2b = FMA(T29, T2a, T28); + T2d = FNMS(T26, T2a, T2c); + Rm[WS(rs, 3)] = T2r + T2l; + Rp[WS(rs, 3)] = T2l - T2r; + Rm[WS(rs, 2)] = T2b + T25; + Rp[WS(rs, 2)] = T25 - T2b; + T2f = FMA(T1W, T24, T2e); + } + } + } + } + } + Ip[WS(rs, 2)] = T2d + T2f; + Im[WS(rs, 2)] = T2d - T2f; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 10}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 10, "hc2cbdft_10", twinstr, &GENUS, {68, 18, 54, 0} }; + +void X(codelet_hc2cbdft_10) (planner *p) { + X(khc2c_register) (p, hc2cbdft_10, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -dif -name hc2cbdft_10 -include hc2cb.h */ + +/* + * This function contains 122 FP additions, 60 FP multiplications, + * (or, 92 additions, 30 multiplications, 30 fused multiply/add), + * 61 stack variables, 4 constants, and 40 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) { + E T3, TS, TR, T13, Ti, T12, TT, TU, T1g, T1T, Tr, T1s, TJ, T1h, TG; + E T1m, TK, TL, T1k, T1l, T1b, T1P, TY, T1w; + { + E Td, To, Tg, Tp, Th, TQ, T6, Tl, T9, Tm, Ta, TP, T1, T2; + T1 = Rp[0]; + T2 = Rm[WS(rs, 4)]; + T3 = T1 + T2; + TS = T1 - T2; + { + E Tb, Tc, Te, Tf; + Tb = Rp[WS(rs, 4)]; + Tc = Rm[0]; + Td = Tb + Tc; + To = Tb - Tc; + Te = Rm[WS(rs, 3)]; + Tf = Rp[WS(rs, 1)]; + Tg = Te + Tf; + Tp = Te - Tf; + } + Th = Td + Tg; + TQ = To + Tp; + { + E T4, T5, T7, T8; + T4 = Rp[WS(rs, 2)]; + T5 = Rm[WS(rs, 2)]; + T6 = T4 + T5; + Tl = T4 - T5; + T7 = Rm[WS(rs, 1)]; + T8 = Rp[WS(rs, 3)]; + T9 = T7 + T8; + Tm = T7 - T8; + } + Ta = T6 + T9; + TP = Tl + Tm; + TR = KP559016994 * (TP - TQ); + T13 = KP559016994 * (Ta - Th); + Ti = Ta + Th; + T12 = FNMS(KP250000000, Ti, T3); + TT = TP + TQ; + TU = FNMS(KP250000000, TT, TS); + { + E T1e, T1f, Tn, Tq; + T1e = T6 - T9; + T1f = Td - Tg; + T1g = FNMS(KP951056516, T1f, KP587785252 * T1e); + T1T = FMA(KP951056516, T1e, KP587785252 * T1f); + Tn = Tl - Tm; + Tq = To - Tp; + Tr = FMA(KP951056516, Tn, KP587785252 * Tq); + T1s = FNMS(KP951056516, Tq, KP587785252 * Tn); + } + } + { + E TB, T18, TE, T19, TF, T1j, Tu, T15, Tx, T16, Ty, T1i, TH, TI; + TH = Ip[0]; + TI = Im[WS(rs, 4)]; + TJ = TH + TI; + T1h = TH - TI; + { + E Tz, TA, TC, TD; + Tz = Ip[WS(rs, 4)]; + TA = Im[0]; + TB = Tz + TA; + T18 = Tz - TA; + TC = Im[WS(rs, 3)]; + TD = Ip[WS(rs, 1)]; + TE = TC + TD; + T19 = TD - TC; + } + TF = TB - TE; + T1j = T18 + T19; + { + E Ts, Tt, Tv, Tw; + Ts = Ip[WS(rs, 2)]; + Tt = Im[WS(rs, 2)]; + Tu = Ts + Tt; + T15 = Ts - Tt; + Tv = Im[WS(rs, 1)]; + Tw = Ip[WS(rs, 3)]; + Tx = Tv + Tw; + T16 = Tw - Tv; + } + Ty = Tu - Tx; + T1i = T15 + T16; + TG = KP559016994 * (Ty - TF); + T1m = KP559016994 * (T1i - T1j); + TK = Ty + TF; + TL = FNMS(KP250000000, TK, TJ); + T1k = T1i + T1j; + T1l = FNMS(KP250000000, T1k, T1h); + { + E T17, T1a, TW, TX; + T17 = T15 - T16; + T1a = T18 - T19; + T1b = FNMS(KP951056516, T1a, KP587785252 * T17); + T1P = FMA(KP951056516, T17, KP587785252 * T1a); + TW = Tu + Tx; + TX = TB + TE; + TY = FMA(KP951056516, TW, KP587785252 * TX); + T1w = FNMS(KP951056516, TX, KP587785252 * TW); + } + } + { + E Tj, T2g, TN, T1H, T1U, T26, TZ, T1J, T1Q, T24, T1c, T1C, T1t, T29, T1o; + E T1E, T1x, T2b, T20, T21, TM, T1S, TV; + Tj = T3 + Ti; + T2g = T1h + T1k; + TM = TG + TL; + TN = Tr + TM; + T1H = TM - Tr; + T1S = T1m + T1l; + T1U = T1S - T1T; + T26 = T1T + T1S; + TV = TR + TU; + TZ = TV - TY; + T1J = TV + TY; + { + E T1O, T14, T1r, T1n, T1v; + T1O = T13 + T12; + T1Q = T1O + T1P; + T24 = T1O - T1P; + T14 = T12 - T13; + T1c = T14 - T1b; + T1C = T14 + T1b; + T1r = TL - TG; + T1t = T1r - T1s; + T29 = T1s + T1r; + T1n = T1l - T1m; + T1o = T1g + T1n; + T1E = T1n - T1g; + T1v = TU - TR; + T1x = T1v + T1w; + T2b = T1v - T1w; + { + E T1X, T1Z, T1W, T1Y; + T1X = TS + TT; + T1Z = TJ + TK; + T1W = W[9]; + T1Y = W[8]; + T20 = FMA(T1W, T1X, T1Y * T1Z); + T21 = FNMS(T1W, T1Z, T1Y * T1X); + } + } + { + E T10, T2f, Tk, TO; + Tk = W[0]; + TO = W[1]; + T10 = FMA(Tk, TN, TO * TZ); + T2f = FNMS(TO, TN, Tk * TZ); + Rp[0] = Tj - T10; + Ip[0] = T2f + T2g; + Rm[0] = Tj + T10; + Im[0] = T2f - T2g; + } + { + E T1V, T22, T1N, T1R; + T1N = W[6]; + T1R = W[7]; + T1V = FNMS(T1R, T1U, T1N * T1Q); + T22 = FMA(T1R, T1Q, T1N * T1U); + Rp[WS(rs, 2)] = T1V - T20; + Ip[WS(rs, 2)] = T21 + T22; + Rm[WS(rs, 2)] = T20 + T1V; + Im[WS(rs, 2)] = T21 - T22; + } + { + E T1p, T1A, T1y, T1z; + { + E T11, T1d, T1q, T1u; + T11 = W[2]; + T1d = W[3]; + T1p = FNMS(T1d, T1o, T11 * T1c); + T1A = FMA(T1d, T1c, T11 * T1o); + T1q = W[4]; + T1u = W[5]; + T1y = FMA(T1q, T1t, T1u * T1x); + T1z = FNMS(T1u, T1t, T1q * T1x); + } + Rp[WS(rs, 1)] = T1p - T1y; + Ip[WS(rs, 1)] = T1z + T1A; + Rm[WS(rs, 1)] = T1y + T1p; + Im[WS(rs, 1)] = T1z - T1A; + } + { + E T1F, T1M, T1K, T1L; + { + E T1B, T1D, T1G, T1I; + T1B = W[14]; + T1D = W[15]; + T1F = FNMS(T1D, T1E, T1B * T1C); + T1M = FMA(T1D, T1C, T1B * T1E); + T1G = W[16]; + T1I = W[17]; + T1K = FMA(T1G, T1H, T1I * T1J); + T1L = FNMS(T1I, T1H, T1G * T1J); + } + Rp[WS(rs, 4)] = T1F - T1K; + Ip[WS(rs, 4)] = T1L + T1M; + Rm[WS(rs, 4)] = T1K + T1F; + Im[WS(rs, 4)] = T1L - T1M; + } + { + E T27, T2e, T2c, T2d; + { + E T23, T25, T28, T2a; + T23 = W[10]; + T25 = W[11]; + T27 = FNMS(T25, T26, T23 * T24); + T2e = FMA(T25, T24, T23 * T26); + T28 = W[12]; + T2a = W[13]; + T2c = FMA(T28, T29, T2a * T2b); + T2d = FNMS(T2a, T29, T28 * T2b); + } + Rp[WS(rs, 3)] = T27 - T2c; + Ip[WS(rs, 3)] = T2d + T2e; + Rm[WS(rs, 3)] = T2c + T27; + Im[WS(rs, 3)] = T2d - T2e; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 10}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 10, "hc2cbdft_10", twinstr, &GENUS, {92, 30, 30, 0} }; + +void X(codelet_hc2cbdft_10) (planner *p) { + X(khc2c_register) (p, hc2cbdft_10, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,635 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:44 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hc2cbdft_12 -include hc2cb.h */ + +/* + * This function contains 142 FP additions, 68 FP multiplications, + * (or, 96 additions, 22 multiplications, 46 fused multiply/add), + * 81 stack variables, 2 constants, and 48 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) { + E T2S, T2V, T2w, T2Z, T2T, T2I, T2Q, T2Y, T2U, T2K, T2G, T30, T2W; + { + E Tb, T1Z, T2D, T1E, T1N, T2y, TD, T2t, T1U, T1e, T2o, TY, T1f, TI, T1g; + E TN, Tm, T1V, T2z, T1H, T1Q, T2E, T19, T2u; + { + E T1c, TU, T1d, TX; + { + E Tu, T6, TT, TS, T5, Tt, Tw, Tx, TB, T9, Ty; + { + E T1, Tp, Tq, Tr, T4, T2, T3, T7, T8, Ts; + T1 = Rp[0]; + T2 = Rp[WS(rs, 4)]; + T3 = Rm[WS(rs, 3)]; + Tp = Ip[0]; + Tq = Ip[WS(rs, 4)]; + Tr = Im[WS(rs, 3)]; + T4 = T2 + T3; + Tu = T2 - T3; + T6 = Rm[WS(rs, 5)]; + TT = Tr + Tq; + Ts = Tq - Tr; + TS = FNMS(KP500000000, T4, T1); + T5 = T1 + T4; + T7 = Rm[WS(rs, 1)]; + T8 = Rp[WS(rs, 2)]; + T1c = Tp + Ts; + Tt = FNMS(KP500000000, Ts, Tp); + Tw = Im[WS(rs, 5)]; + Tx = Im[WS(rs, 1)]; + TB = T7 - T8; + T9 = T7 + T8; + Ty = Ip[WS(rs, 2)]; + } + { + E T1L, Tv, Ta, TV, TW, Tz; + T1L = FNMS(KP866025403, Tu, Tt); + Tv = FMA(KP866025403, Tu, Tt); + Ta = T6 + T9; + TV = FNMS(KP500000000, T9, T6); + TW = Tx + Ty; + Tz = Tx - Ty; + { + E TC, T1M, T1C, TA, T1D; + T1C = FMA(KP866025403, TT, TS); + TU = FNMS(KP866025403, TT, TS); + T1d = Tw + Tz; + TA = FNMS(KP500000000, Tz, Tw); + T1D = FNMS(KP866025403, TW, TV); + TX = FMA(KP866025403, TW, TV); + Tb = T5 + Ta; + T1Z = T5 - Ta; + TC = FNMS(KP866025403, TB, TA); + T1M = FMA(KP866025403, TB, TA); + T2D = T1C - T1D; + T1E = T1C + T1D; + T1N = T1L - T1M; + T2y = T1L + T1M; + TD = Tv + TC; + T2t = Tv - TC; + } + } + } + { + E T12, Th, TH, TE, Tg, T11, T14, TK, T17, Tk, TL; + { + E Tc, TZ, TF, TG, Tf, Td, Te, Ti, Tj, T10; + Tc = Rp[WS(rs, 3)]; + T1U = T1c + T1d; + T1e = T1c - T1d; + T2o = TU + TX; + TY = TU - TX; + Td = Rm[WS(rs, 4)]; + Te = Rm[0]; + TZ = Ip[WS(rs, 3)]; + TF = Im[WS(rs, 4)]; + TG = Im[0]; + Tf = Td + Te; + T12 = Td - Te; + Th = Rm[WS(rs, 2)]; + TH = TF - TG; + T10 = TF + TG; + TE = FNMS(KP500000000, Tf, Tc); + Tg = Tc + Tf; + Ti = Rp[WS(rs, 1)]; + Tj = Rp[WS(rs, 5)]; + T1f = TZ - T10; + T11 = FMA(KP500000000, T10, TZ); + T14 = Im[WS(rs, 2)]; + TK = Ip[WS(rs, 5)]; + T17 = Ti - Tj; + Tk = Ti + Tj; + TL = Ip[WS(rs, 1)]; + } + { + E T1O, T13, Tl, TJ, TM, T15; + T1O = FNMS(KP866025403, T12, T11); + T13 = FMA(KP866025403, T12, T11); + Tl = Th + Tk; + TJ = FNMS(KP500000000, Tk, Th); + TM = TK - TL; + T15 = TK + TL; + { + E T18, T1P, T1F, T16, T1G; + T1F = FNMS(KP866025403, TH, TE); + TI = FMA(KP866025403, TH, TE); + T1g = T15 - T14; + T16 = FMA(KP500000000, T15, T14); + T1G = FNMS(KP866025403, TM, TJ); + TN = FMA(KP866025403, TM, TJ); + Tm = Tg + Tl; + T1V = Tg - Tl; + T18 = FNMS(KP866025403, T17, T16); + T1P = FMA(KP866025403, T17, T16); + T2z = T1F - T1G; + T1H = T1F + T1G; + T1Q = T1O - T1P; + T2E = T1O + T1P; + T19 = T13 + T18; + T2u = T13 - T18; + } + } + } + } + { + E T20, T2p, T1v, T1s, T1q, T1y, T1u, T1z, T1t; + { + E T1m, Tn, T1a, T1p, T1i, To, TP, TR, T1h, TO; + T1m = Tb - Tm; + Tn = Tb + Tm; + T20 = T1f - T1g; + T1h = T1f + T1g; + T2p = TI + TN; + TO = TI - TN; + T1a = TY - T19; + T1v = TY + T19; + T1p = T1e - T1h; + T1i = T1e + T1h; + To = W[0]; + T1s = TD - TO; + TP = TD + TO; + TR = W[1]; + { + E T1l, T1o, T1n, T1x, T1r; + { + E T1j, TQ, T1k, T1b; + T1j = To * T1a; + TQ = To * TP; + T1l = W[10]; + T1k = FNMS(TR, TP, T1j); + T1b = FMA(TR, T1a, TQ); + T1o = W[11]; + T1n = T1l * T1m; + Im[0] = T1k - T1i; + Ip[0] = T1i + T1k; + Rm[0] = Tn + T1b; + Rp[0] = Tn - T1b; + T1x = T1o * T1m; + T1r = W[12]; + } + T1q = FNMS(T1o, T1p, T1n); + T1y = FMA(T1l, T1p, T1x); + T1u = W[13]; + T1z = T1r * T1v; + T1t = T1r * T1s; + } + } + { + E T2e, T2h, T1S, T2j, T2f, T26, T2c, T2m, T2g, T24, T22; + { + E T2b, T1R, T27, T2a, T1B, T29, T2l, T1K, T1J, T1W, T21, T25, T2d, T23, T1X; + E T1Y; + { + E T1I, T28, T1A, T1w, T1T; + T1A = FNMS(T1u, T1s, T1z); + T1w = FMA(T1u, T1v, T1t); + T1I = T1E - T1H; + T28 = T1E + T1H; + T2b = T1N + T1Q; + T1R = T1N - T1Q; + Im[WS(rs, 3)] = T1A - T1y; + Ip[WS(rs, 3)] = T1y + T1A; + Rm[WS(rs, 3)] = T1q + T1w; + Rp[WS(rs, 3)] = T1q - T1w; + T27 = W[14]; + T2a = W[15]; + T1B = W[2]; + T29 = T27 * T28; + T2l = T2a * T28; + T1K = W[3]; + T1J = T1B * T1I; + T1W = T1U - T1V; + T2e = T1V + T1U; + T2h = T1Z - T20; + T21 = T1Z + T20; + T25 = T1K * T1I; + T1T = W[4]; + T2d = W[16]; + T23 = T1T * T21; + T1X = T1T * T1W; + } + T1S = FNMS(T1K, T1R, T1J); + T2j = T2d * T2h; + T2f = T2d * T2e; + T26 = FMA(T1B, T1R, T25); + T1Y = W[5]; + T2c = FNMS(T2a, T2b, T29); + T2m = FMA(T27, T2b, T2l); + T2g = W[17]; + T24 = FNMS(T1Y, T1W, T23); + T22 = FMA(T1Y, T21, T1X); + } + { + E T2L, T2O, T2P, T2v, T2N, T2X, T2n, T2s, T2A, T2F, T2r, T2H, T2R, T2J, T2B; + E T2C; + { + E T2q, T2k, T2i, T2M, T2x; + T2k = FNMS(T2g, T2e, T2j); + T2i = FMA(T2g, T2h, T2f); + Im[WS(rs, 1)] = T24 - T26; + Ip[WS(rs, 1)] = T24 + T26; + Rm[WS(rs, 1)] = T22 + T1S; + Rp[WS(rs, 1)] = T1S - T22; + Im[WS(rs, 4)] = T2k - T2m; + Ip[WS(rs, 4)] = T2k + T2m; + Rm[WS(rs, 4)] = T2i + T2c; + Rp[WS(rs, 4)] = T2c - T2i; + T2q = T2o + T2p; + T2M = T2o - T2p; + T2L = W[18]; + T2O = W[19]; + T2P = T2t - T2u; + T2v = T2t + T2u; + T2N = T2L * T2M; + T2X = T2O * T2M; + T2n = W[6]; + T2s = W[7]; + T2S = T2y - T2z; + T2A = T2y + T2z; + T2F = T2D - T2E; + T2V = T2D + T2E; + T2r = T2n * T2q; + T2H = T2s * T2q; + T2x = W[8]; + T2R = W[20]; + T2J = T2x * T2F; + T2B = T2x * T2A; + } + T2w = FNMS(T2s, T2v, T2r); + T2Z = T2R * T2V; + T2T = T2R * T2S; + T2I = FMA(T2n, T2v, T2H); + T2C = W[9]; + T2Q = FNMS(T2O, T2P, T2N); + T2Y = FMA(T2L, T2P, T2X); + T2U = W[21]; + T2K = FNMS(T2C, T2A, T2J); + T2G = FMA(T2C, T2F, T2B); + } + } + } + } + T30 = FNMS(T2U, T2S, T2Z); + T2W = FMA(T2U, T2V, T2T); + Im[WS(rs, 2)] = T2K - T2I; + Ip[WS(rs, 2)] = T2I + T2K; + Rm[WS(rs, 2)] = T2w + T2G; + Rp[WS(rs, 2)] = T2w - T2G; + Im[WS(rs, 5)] = T30 - T2Y; + Ip[WS(rs, 5)] = T2Y + T30; + Rm[WS(rs, 5)] = T2Q + T2W; + Rp[WS(rs, 5)] = T2Q - T2W; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 12}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 12, "hc2cbdft_12", twinstr, &GENUS, {96, 22, 46, 0} }; + +void X(codelet_hc2cbdft_12) (planner *p) { + X(khc2c_register) (p, hc2cbdft_12, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hc2cbdft_12 -include hc2cb.h */ + +/* + * This function contains 142 FP additions, 60 FP multiplications, + * (or, 112 additions, 30 multiplications, 30 fused multiply/add), + * 47 stack variables, 2 constants, and 48 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) { + E Tv, T1E, TC, T1F, TW, T1x, TT, T1w, T1d, T1N, Tb, T1R, TI, T1z, TN; + E T1A, T17, T1I, T12, T1H, T1g, T1S, Tm, T1O; + { + E T1, Tq, T6, TA, T4, Tp, Tt, TS, T9, Tw, Tz, TV; + T1 = Rp[0]; + Tq = Ip[0]; + T6 = Rm[WS(rs, 5)]; + TA = Im[WS(rs, 5)]; + { + E T2, T3, Tr, Ts; + T2 = Rp[WS(rs, 4)]; + T3 = Rm[WS(rs, 3)]; + T4 = T2 + T3; + Tp = KP866025403 * (T2 - T3); + Tr = Im[WS(rs, 3)]; + Ts = Ip[WS(rs, 4)]; + Tt = Tr - Ts; + TS = KP866025403 * (Tr + Ts); + } + { + E T7, T8, Tx, Ty; + T7 = Rm[WS(rs, 1)]; + T8 = Rp[WS(rs, 2)]; + T9 = T7 + T8; + Tw = KP866025403 * (T7 - T8); + Tx = Im[WS(rs, 1)]; + Ty = Ip[WS(rs, 2)]; + Tz = Tx - Ty; + TV = KP866025403 * (Tx + Ty); + } + { + E Tu, TB, TU, TR; + Tu = FMA(KP500000000, Tt, Tq); + Tv = Tp + Tu; + T1E = Tu - Tp; + TB = FMS(KP500000000, Tz, TA); + TC = Tw + TB; + T1F = TB - Tw; + TU = FNMS(KP500000000, T9, T6); + TW = TU + TV; + T1x = TU - TV; + TR = FNMS(KP500000000, T4, T1); + TT = TR - TS; + T1w = TR + TS; + { + E T1b, T1c, T5, Ta; + T1b = Tq - Tt; + T1c = Tz + TA; + T1d = T1b - T1c; + T1N = T1b + T1c; + T5 = T1 + T4; + Ta = T6 + T9; + Tb = T5 + Ta; + T1R = T5 - Ta; + } + } + } + { + E Tc, T10, Th, T15, Tf, TY, TH, TZ, Tk, T13, TM, T14; + Tc = Rp[WS(rs, 3)]; + T10 = Ip[WS(rs, 3)]; + Th = Rm[WS(rs, 2)]; + T15 = Im[WS(rs, 2)]; + { + E Td, Te, TF, TG; + Td = Rm[WS(rs, 4)]; + Te = Rm[0]; + Tf = Td + Te; + TY = KP866025403 * (Td - Te); + TF = Im[WS(rs, 4)]; + TG = Im[0]; + TH = KP866025403 * (TF - TG); + TZ = TF + TG; + } + { + E Ti, Tj, TK, TL; + Ti = Rp[WS(rs, 1)]; + Tj = Rp[WS(rs, 5)]; + Tk = Ti + Tj; + T13 = KP866025403 * (Ti - Tj); + TK = Ip[WS(rs, 5)]; + TL = Ip[WS(rs, 1)]; + TM = KP866025403 * (TK - TL); + T14 = TK + TL; + } + { + E TE, TJ, T16, T11; + TE = FNMS(KP500000000, Tf, Tc); + TI = TE + TH; + T1z = TE - TH; + TJ = FNMS(KP500000000, Tk, Th); + TN = TJ + TM; + T1A = TJ - TM; + T16 = FMA(KP500000000, T14, T15); + T17 = T13 - T16; + T1I = T13 + T16; + T11 = FMA(KP500000000, TZ, T10); + T12 = TY + T11; + T1H = T11 - TY; + { + E T1e, T1f, Tg, Tl; + T1e = T10 - TZ; + T1f = T14 - T15; + T1g = T1e + T1f; + T1S = T1e - T1f; + Tg = Tc + Tf; + Tl = Th + Tk; + Tm = Tg + Tl; + T1O = Tg - Tl; + } + } + } + { + E Tn, T1h, TP, T1p, T19, T1r, T1n, T1t; + Tn = Tb + Tm; + T1h = T1d + T1g; + { + E TD, TO, TX, T18; + TD = Tv - TC; + TO = TI - TN; + TP = TD + TO; + T1p = TD - TO; + TX = TT - TW; + T18 = T12 - T17; + T19 = TX - T18; + T1r = TX + T18; + { + E T1k, T1m, T1j, T1l; + T1k = Tb - Tm; + T1m = T1d - T1g; + T1j = W[10]; + T1l = W[11]; + T1n = FNMS(T1l, T1m, T1j * T1k); + T1t = FMA(T1l, T1k, T1j * T1m); + } + } + { + E T1a, T1i, To, TQ; + To = W[0]; + TQ = W[1]; + T1a = FMA(To, TP, TQ * T19); + T1i = FNMS(TQ, TP, To * T19); + Rp[0] = Tn - T1a; + Ip[0] = T1h + T1i; + Rm[0] = Tn + T1a; + Im[0] = T1i - T1h; + } + { + E T1s, T1u, T1o, T1q; + T1o = W[12]; + T1q = W[13]; + T1s = FMA(T1o, T1p, T1q * T1r); + T1u = FNMS(T1q, T1p, T1o * T1r); + Rp[WS(rs, 3)] = T1n - T1s; + Ip[WS(rs, 3)] = T1t + T1u; + Rm[WS(rs, 3)] = T1n + T1s; + Im[WS(rs, 3)] = T1u - T1t; + } + } + { + E T1C, T1Y, T1K, T20, T1U, T1V, T26, T27; + { + E T1y, T1B, T1G, T1J; + T1y = T1w + T1x; + T1B = T1z + T1A; + T1C = T1y - T1B; + T1Y = T1y + T1B; + T1G = T1E + T1F; + T1J = T1H - T1I; + T1K = T1G - T1J; + T20 = T1G + T1J; + } + { + E T1P, T1T, T1M, T1Q; + T1P = T1N - T1O; + T1T = T1R + T1S; + T1M = W[4]; + T1Q = W[5]; + T1U = FMA(T1M, T1P, T1Q * T1T); + T1V = FNMS(T1Q, T1P, T1M * T1T); + } + { + E T23, T25, T22, T24; + T23 = T1O + T1N; + T25 = T1R - T1S; + T22 = W[16]; + T24 = W[17]; + T26 = FMA(T22, T23, T24 * T25); + T27 = FNMS(T24, T23, T22 * T25); + } + { + E T1L, T1W, T1v, T1D; + T1v = W[2]; + T1D = W[3]; + T1L = FNMS(T1D, T1K, T1v * T1C); + T1W = FMA(T1D, T1C, T1v * T1K); + Rp[WS(rs, 1)] = T1L - T1U; + Ip[WS(rs, 1)] = T1V + T1W; + Rm[WS(rs, 1)] = T1U + T1L; + Im[WS(rs, 1)] = T1V - T1W; + } + { + E T21, T28, T1X, T1Z; + T1X = W[14]; + T1Z = W[15]; + T21 = FNMS(T1Z, T20, T1X * T1Y); + T28 = FMA(T1Z, T1Y, T1X * T20); + Rp[WS(rs, 4)] = T21 - T26; + Ip[WS(rs, 4)] = T27 + T28; + Rm[WS(rs, 4)] = T26 + T21; + Im[WS(rs, 4)] = T27 - T28; + } + } + { + E T2c, T2u, T2p, T2B, T2g, T2w, T2l, T2z; + { + E T2a, T2b, T2n, T2o; + T2a = TT + TW; + T2b = TI + TN; + T2c = T2a + T2b; + T2u = T2a - T2b; + T2n = T1w - T1x; + T2o = T1H + T1I; + T2p = T2n - T2o; + T2B = T2n + T2o; + } + { + E T2e, T2f, T2j, T2k; + T2e = Tv + TC; + T2f = T12 + T17; + T2g = T2e + T2f; + T2w = T2e - T2f; + T2j = T1E - T1F; + T2k = T1z - T1A; + T2l = T2j + T2k; + T2z = T2j - T2k; + } + { + E T2h, T2r, T2q, T2s; + { + E T29, T2d, T2i, T2m; + T29 = W[6]; + T2d = W[7]; + T2h = FNMS(T2d, T2g, T29 * T2c); + T2r = FMA(T2d, T2c, T29 * T2g); + T2i = W[8]; + T2m = W[9]; + T2q = FMA(T2i, T2l, T2m * T2p); + T2s = FNMS(T2m, T2l, T2i * T2p); + } + Rp[WS(rs, 2)] = T2h - T2q; + Ip[WS(rs, 2)] = T2r + T2s; + Rm[WS(rs, 2)] = T2h + T2q; + Im[WS(rs, 2)] = T2s - T2r; + } + { + E T2x, T2D, T2C, T2E; + { + E T2t, T2v, T2y, T2A; + T2t = W[18]; + T2v = W[19]; + T2x = FNMS(T2v, T2w, T2t * T2u); + T2D = FMA(T2v, T2u, T2t * T2w); + T2y = W[20]; + T2A = W[21]; + T2C = FMA(T2y, T2z, T2A * T2B); + T2E = FNMS(T2A, T2z, T2y * T2B); + } + Rp[WS(rs, 5)] = T2x - T2C; + Ip[WS(rs, 5)] = T2D + T2E; + Rm[WS(rs, 5)] = T2x + T2C; + Im[WS(rs, 5)] = T2E - T2D; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 12}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 12, "hc2cbdft_12", twinstr, &GENUS, {112, 30, 30, 0} }; + +void X(codelet_hc2cbdft_12) (planner *p) { + X(khc2c_register) (p, hc2cbdft_12, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,880 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:45 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 16 -dif -name hc2cbdft_16 -include hc2cb.h */ + +/* + * This function contains 206 FP additions, 100 FP multiplications, + * (or, 136 additions, 30 multiplications, 70 fused multiply/add), + * 97 stack variables, 3 constants, and 64 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 30, MAKE_VOLATILE_STRIDE(64, rs)) { + E T3w, T3z, T2Y, T3D, T3x, T3m, T3u, T3C, T3y, T3o, T3k, T3E, T3A; + { + E T20, Tf, T3Q, T32, T3V, T3f, T2a, TN, T2f, T1m, T3G, T2G, T3L, T2T, T26; + E T1F, T3M, T2N, T3H, T2W, T25, Tu, T1n, T1o, T3R, T3i, T2g, T1a, T21, T1y; + E T3W, T39; + { + E T2R, T1B, T2S, T1E; + { + E T1e, T3, T1C, TA, Tx, T6, T1D, T1h, Td, T1A, TL, T1k, Ta, TC, TF; + E T1z; + { + E T4, T5, T1f, T1g; + { + E T1, T2, Ty, Tz; + T1 = Rp[0]; + T2 = Rm[WS(rs, 7)]; + Ty = Ip[0]; + Tz = Im[WS(rs, 7)]; + T4 = Rp[WS(rs, 4)]; + T1e = T1 - T2; + T3 = T1 + T2; + T1C = Ty - Tz; + TA = Ty + Tz; + T5 = Rm[WS(rs, 3)]; + } + T1f = Ip[WS(rs, 4)]; + T1g = Im[WS(rs, 3)]; + { + E Tb, Tc, TI, TJ; + Tb = Rm[WS(rs, 1)]; + Tx = T4 - T5; + T6 = T4 + T5; + T1D = T1f - T1g; + T1h = T1f + T1g; + Tc = Rp[WS(rs, 6)]; + TI = Im[WS(rs, 1)]; + TJ = Ip[WS(rs, 6)]; + { + E T8, TH, TK, T9, TD, TE; + T8 = Rp[WS(rs, 2)]; + Td = Tb + Tc; + TH = Tb - Tc; + T1A = TJ - TI; + TK = TI + TJ; + T9 = Rm[WS(rs, 5)]; + TD = Ip[WS(rs, 2)]; + TE = Im[WS(rs, 5)]; + TL = TH + TK; + T1k = TH - TK; + Ta = T8 + T9; + TC = T8 - T9; + TF = TD + TE; + T1z = TD - TE; + } + } + } + { + E T2E, TB, T1l, T1i, T3d, T3e, TM, T2F; + { + E T7, TG, Te, T30, T31, T1j; + T2E = T3 - T6; + T7 = T3 + T6; + T1j = TC - TF; + TG = TC + TF; + Te = Ta + Td; + T2R = Ta - Td; + TB = Tx + TA; + T30 = TA - Tx; + T31 = T1j - T1k; + T1l = T1j + T1k; + T1i = T1e - T1h; + T3d = T1e + T1h; + T20 = T7 - Te; + Tf = T7 + Te; + T3Q = FNMS(KP707106781, T31, T30); + T32 = FMA(KP707106781, T31, T30); + T3e = TG + TL; + TM = TG - TL; + } + T3V = FMA(KP707106781, T3e, T3d); + T3f = FNMS(KP707106781, T3e, T3d); + T2a = FNMS(KP707106781, TM, TB); + TN = FMA(KP707106781, TM, TB); + T2F = T1A - T1z; + T1B = T1z + T1A; + T2f = FNMS(KP707106781, T1l, T1i); + T1m = FMA(KP707106781, T1l, T1i); + T3G = T2E - T2F; + T2G = T2E + T2F; + T2S = T1C - T1D; + T1E = T1C + T1D; + } + } + { + E T34, TS, T2H, Tm, T1u, T2I, T33, TX, Tq, T14, Tp, T1v, T12, Tr, T15; + E T16; + { + E Tj, TT, Ti, T1s, TR, Tk, TU, TV; + { + E Tg, Th, TP, TQ; + Tg = Rp[WS(rs, 1)]; + T3L = T2S - T2R; + T2T = T2R + T2S; + T26 = T1E - T1B; + T1F = T1B + T1E; + Th = Rm[WS(rs, 6)]; + TP = Ip[WS(rs, 1)]; + TQ = Im[WS(rs, 6)]; + Tj = Rp[WS(rs, 5)]; + TT = Tg - Th; + Ti = Tg + Th; + T1s = TP - TQ; + TR = TP + TQ; + Tk = Rm[WS(rs, 2)]; + TU = Ip[WS(rs, 5)]; + TV = Im[WS(rs, 2)]; + } + { + E Tn, To, T10, T11; + Tn = Rm[0]; + { + E TO, Tl, T1t, TW; + TO = Tj - Tk; + Tl = Tj + Tk; + T1t = TU - TV; + TW = TU + TV; + T34 = TR - TO; + TS = TO + TR; + T2H = Ti - Tl; + Tm = Ti + Tl; + T1u = T1s + T1t; + T2I = T1s - T1t; + T33 = TT + TW; + TX = TT - TW; + To = Rp[WS(rs, 7)]; + } + T10 = Im[0]; + T11 = Ip[WS(rs, 7)]; + Tq = Rp[WS(rs, 3)]; + T14 = Tn - To; + Tp = Tn + To; + T1v = T11 - T10; + T12 = T10 + T11; + Tr = Rm[WS(rs, 4)]; + T15 = Ip[WS(rs, 3)]; + T16 = Im[WS(rs, 4)]; + } + } + { + E T13, T1x, T18, T35, T3g, T3h, T38, TY, T19; + { + E T2U, T2J, T37, Tt, T36, T2V, T2M, T2K, T2L; + T2U = T2H + T2I; + T2J = T2H - T2I; + { + E TZ, Ts, T1w, T17; + TZ = Tq - Tr; + Ts = Tq + Tr; + T1w = T15 - T16; + T17 = T15 + T16; + T37 = TZ + T12; + T13 = TZ - T12; + T2K = Tp - Ts; + Tt = Tp + Ts; + T1x = T1v + T1w; + T2L = T1v - T1w; + T36 = T14 + T17; + T18 = T14 - T17; + } + T2V = T2L - T2K; + T2M = T2K + T2L; + T3M = T2J - T2M; + T2N = T2J + T2M; + T3H = T2V - T2U; + T2W = T2U + T2V; + T35 = FMA(KP414213562, T34, T33); + T3g = FNMS(KP414213562, T33, T34); + T25 = Tm - Tt; + Tu = Tm + Tt; + T3h = FNMS(KP414213562, T36, T37); + T38 = FMA(KP414213562, T37, T36); + } + T1n = FNMS(KP414213562, TS, TX); + TY = FMA(KP414213562, TX, TS); + T19 = FNMS(KP414213562, T18, T13); + T1o = FMA(KP414213562, T13, T18); + T3R = T3h - T3g; + T3i = T3g + T3h; + T2g = T19 - TY; + T1a = TY + T19; + T21 = T1x - T1u; + T1y = T1u + T1x; + T3W = T35 + T38; + T39 = T35 - T38; + } + } + } + { + E T27, T22, T2c, T2u, T2x, T2h, T2s, T2A, T2w, T2B, T2v; + { + E T1K, Tv, T1G, T1N, T1Q, T1b, T2b, T1p, Tw, T1d; + T1K = Tf - Tu; + Tv = Tf + Tu; + T1G = T1y + T1F; + T1N = T1F - T1y; + T1Q = FNMS(KP923879532, T1a, TN); + T1b = FMA(KP923879532, T1a, TN); + T2b = T1n - T1o; + T1p = T1n + T1o; + Tw = W[0]; + T1d = W[1]; + { + E T1T, T1O, T1W, T1S, T1X, T1R; + { + E T1J, T1M, T1L, T1V, T1P, T1q; + T1T = FNMS(KP923879532, T1p, T1m); + T1q = FMA(KP923879532, T1p, T1m); + { + E T1c, T1I, T1H, T1r; + T1c = Tw * T1b; + T1J = W[14]; + T1H = Tw * T1q; + T1r = FMA(T1d, T1q, T1c); + T1M = W[15]; + T1L = T1J * T1K; + T1I = FNMS(T1d, T1b, T1H); + Rm[0] = Tv + T1r; + Rp[0] = Tv - T1r; + T1V = T1M * T1K; + Im[0] = T1I - T1G; + Ip[0] = T1G + T1I; + T1P = W[16]; + } + T1O = FNMS(T1M, T1N, T1L); + T1W = FMA(T1J, T1N, T1V); + T1S = W[17]; + T1X = T1P * T1T; + T1R = T1P * T1Q; + } + { + E T2r, T2n, T2q, T2p, T2z, T2t, T2o, T1Y, T1U; + T27 = T25 + T26; + T2r = T26 - T25; + T2o = T20 - T21; + T22 = T20 + T21; + T1Y = FNMS(T1S, T1Q, T1X); + T1U = FMA(T1S, T1T, T1R); + T2n = W[22]; + T2q = W[23]; + Im[WS(rs, 4)] = T1Y - T1W; + Ip[WS(rs, 4)] = T1W + T1Y; + Rm[WS(rs, 4)] = T1O + T1U; + Rp[WS(rs, 4)] = T1O - T1U; + T2p = T2n * T2o; + T2z = T2q * T2o; + T2c = FMA(KP923879532, T2b, T2a); + T2u = FNMS(KP923879532, T2b, T2a); + T2x = FNMS(KP923879532, T2g, T2f); + T2h = FMA(KP923879532, T2g, T2f); + T2t = W[24]; + T2s = FNMS(T2q, T2r, T2p); + T2A = FMA(T2n, T2r, T2z); + T2w = W[25]; + T2B = T2t * T2x; + T2v = T2t * T2u; + } + } + } + { + E T28, T2k, T2e, T2l, T2d; + { + E T1Z, T24, T23, T2j, T29, T2C, T2y; + T2C = FNMS(T2w, T2u, T2B); + T2y = FMA(T2w, T2x, T2v); + T1Z = W[6]; + T24 = W[7]; + Im[WS(rs, 6)] = T2C - T2A; + Ip[WS(rs, 6)] = T2A + T2C; + Rm[WS(rs, 6)] = T2s + T2y; + Rp[WS(rs, 6)] = T2s - T2y; + T23 = T1Z * T22; + T2j = T24 * T22; + T29 = W[8]; + T28 = FNMS(T24, T27, T23); + T2k = FMA(T1Z, T27, T2j); + T2e = W[9]; + T2l = T29 * T2h; + T2d = T29 * T2c; + } + { + E T4a, T4d, T3O, T4h, T4b, T40, T48, T4g, T4c, T42, T3Y; + { + E T3N, T47, T43, T46, T3F, T45, T4f, T3K, T3J, T3S, T3X, T3Z, T49, T41, T3T; + E T3U; + { + E T44, T3I, T2m, T2i, T3P; + T44 = FNMS(KP707106781, T3H, T3G); + T3I = FMA(KP707106781, T3H, T3G); + T2m = FNMS(T2e, T2c, T2l); + T2i = FMA(T2e, T2h, T2d); + T3N = FMA(KP707106781, T3M, T3L); + T47 = FNMS(KP707106781, T3M, T3L); + Im[WS(rs, 2)] = T2m - T2k; + Ip[WS(rs, 2)] = T2k + T2m; + Rm[WS(rs, 2)] = T28 + T2i; + Rp[WS(rs, 2)] = T28 - T2i; + T43 = W[26]; + T46 = W[27]; + T3F = W[10]; + T45 = T43 * T44; + T4f = T46 * T44; + T3K = W[11]; + T3J = T3F * T3I; + T4a = FNMS(KP923879532, T3R, T3Q); + T3S = FMA(KP923879532, T3R, T3Q); + T3X = FNMS(KP923879532, T3W, T3V); + T4d = FMA(KP923879532, T3W, T3V); + T3Z = T3K * T3I; + T3P = W[12]; + T49 = W[28]; + T41 = T3P * T3X; + T3T = T3P * T3S; + } + T3O = FNMS(T3K, T3N, T3J); + T4h = T49 * T4d; + T4b = T49 * T4a; + T40 = FMA(T3F, T3N, T3Z); + T3U = W[13]; + T48 = FNMS(T46, T47, T45); + T4g = FMA(T43, T47, T4f); + T4c = W[29]; + T42 = FNMS(T3U, T3S, T41); + T3Y = FMA(T3U, T3X, T3T); + } + { + E T3t, T2X, T3p, T3s, T2D, T3r, T3B, T2Q, T2P, T3a, T3j, T3l, T3v, T3n, T3b; + E T3c; + { + E T2O, T3q, T4i, T4e, T2Z; + T4i = FNMS(T4c, T4a, T4h); + T4e = FMA(T4c, T4d, T4b); + Im[WS(rs, 3)] = T42 - T40; + Ip[WS(rs, 3)] = T40 + T42; + Rm[WS(rs, 3)] = T3O + T3Y; + Rp[WS(rs, 3)] = T3O - T3Y; + Im[WS(rs, 7)] = T4i - T4g; + Ip[WS(rs, 7)] = T4g + T4i; + Rm[WS(rs, 7)] = T48 + T4e; + Rp[WS(rs, 7)] = T48 - T4e; + T3t = FNMS(KP707106781, T2W, T2T); + T2X = FMA(KP707106781, T2W, T2T); + T2O = FMA(KP707106781, T2N, T2G); + T3q = FNMS(KP707106781, T2N, T2G); + T3p = W[18]; + T3s = W[19]; + T2D = W[2]; + T3r = T3p * T3q; + T3B = T3s * T3q; + T2Q = W[3]; + T2P = T2D * T2O; + T3a = FMA(KP923879532, T39, T32); + T3w = FNMS(KP923879532, T39, T32); + T3z = FMA(KP923879532, T3i, T3f); + T3j = FNMS(KP923879532, T3i, T3f); + T3l = T2Q * T2O; + T2Z = W[4]; + T3v = W[20]; + T3n = T2Z * T3j; + T3b = T2Z * T3a; + } + T2Y = FNMS(T2Q, T2X, T2P); + T3D = T3v * T3z; + T3x = T3v * T3w; + T3m = FMA(T2D, T2X, T3l); + T3c = W[5]; + T3u = FNMS(T3s, T3t, T3r); + T3C = FMA(T3p, T3t, T3B); + T3y = W[21]; + T3o = FNMS(T3c, T3a, T3n); + T3k = FMA(T3c, T3j, T3b); + } + } + } + } + } + T3E = FNMS(T3y, T3w, T3D); + T3A = FMA(T3y, T3z, T3x); + Im[WS(rs, 1)] = T3o - T3m; + Ip[WS(rs, 1)] = T3m + T3o; + Rm[WS(rs, 1)] = T2Y + T3k; + Rp[WS(rs, 1)] = T2Y - T3k; + Im[WS(rs, 5)] = T3E - T3C; + Ip[WS(rs, 5)] = T3C + T3E; + Rm[WS(rs, 5)] = T3u + T3A; + Rp[WS(rs, 5)] = T3u - T3A; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 16}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 16, "hc2cbdft_16", twinstr, &GENUS, {136, 30, 70, 0} }; + +void X(codelet_hc2cbdft_16) (planner *p) { + X(khc2c_register) (p, hc2cbdft_16, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 16 -dif -name hc2cbdft_16 -include hc2cb.h */ + +/* + * This function contains 206 FP additions, 84 FP multiplications, + * (or, 168 additions, 46 multiplications, 38 fused multiply/add), + * 60 stack variables, 3 constants, and 64 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 30, MAKE_VOLATILE_STRIDE(64, rs)) { + E TB, T2L, T30, T1n, Tf, T1U, T2H, T3p, T1E, T1Z, TM, T31, T2s, T3k, T1i; + E T2M, Tu, T1Y, T2Q, T2X, T2T, T2Y, TY, T1d, T19, T1e, T2v, T2C, T2y, T2D; + E T1x, T1V; + { + E T3, T1j, TA, T1B, T6, Tx, T1m, T1C, Ta, TC, TF, T1y, Td, TH, TK; + E T1z; + { + E T1, T2, Ty, Tz; + T1 = Rp[0]; + T2 = Rm[WS(rs, 7)]; + T3 = T1 + T2; + T1j = T1 - T2; + Ty = Ip[0]; + Tz = Im[WS(rs, 7)]; + TA = Ty + Tz; + T1B = Ty - Tz; + } + { + E T4, T5, T1k, T1l; + T4 = Rp[WS(rs, 4)]; + T5 = Rm[WS(rs, 3)]; + T6 = T4 + T5; + Tx = T4 - T5; + T1k = Ip[WS(rs, 4)]; + T1l = Im[WS(rs, 3)]; + T1m = T1k + T1l; + T1C = T1k - T1l; + } + { + E T8, T9, TD, TE; + T8 = Rp[WS(rs, 2)]; + T9 = Rm[WS(rs, 5)]; + Ta = T8 + T9; + TC = T8 - T9; + TD = Ip[WS(rs, 2)]; + TE = Im[WS(rs, 5)]; + TF = TD + TE; + T1y = TD - TE; + } + { + E Tb, Tc, TI, TJ; + Tb = Rm[WS(rs, 1)]; + Tc = Rp[WS(rs, 6)]; + Td = Tb + Tc; + TH = Tb - Tc; + TI = Im[WS(rs, 1)]; + TJ = Ip[WS(rs, 6)]; + TK = TI + TJ; + T1z = TJ - TI; + } + { + E T7, Te, TG, TL; + TB = Tx + TA; + T2L = TA - Tx; + T30 = T1j + T1m; + T1n = T1j - T1m; + T7 = T3 + T6; + Te = Ta + Td; + Tf = T7 + Te; + T1U = T7 - Te; + { + E T2F, T2G, T1A, T1D; + T2F = Ta - Td; + T2G = T1B - T1C; + T2H = T2F + T2G; + T3p = T2G - T2F; + T1A = T1y + T1z; + T1D = T1B + T1C; + T1E = T1A + T1D; + T1Z = T1D - T1A; + } + TG = TC + TF; + TL = TH + TK; + TM = KP707106781 * (TG - TL); + T31 = KP707106781 * (TG + TL); + { + E T2q, T2r, T1g, T1h; + T2q = T3 - T6; + T2r = T1z - T1y; + T2s = T2q + T2r; + T3k = T2q - T2r; + T1g = TC - TF; + T1h = TH - TK; + T1i = KP707106781 * (T1g + T1h); + T2M = KP707106781 * (T1g - T1h); + } + } + } + { + E Ti, TT, TR, T1r, Tl, TO, TW, T1s, Tp, T14, T12, T1u, Ts, TZ, T17; + E T1v; + { + E Tg, Th, TP, TQ; + Tg = Rp[WS(rs, 1)]; + Th = Rm[WS(rs, 6)]; + Ti = Tg + Th; + TT = Tg - Th; + TP = Ip[WS(rs, 1)]; + TQ = Im[WS(rs, 6)]; + TR = TP + TQ; + T1r = TP - TQ; + } + { + E Tj, Tk, TU, TV; + Tj = Rp[WS(rs, 5)]; + Tk = Rm[WS(rs, 2)]; + Tl = Tj + Tk; + TO = Tj - Tk; + TU = Ip[WS(rs, 5)]; + TV = Im[WS(rs, 2)]; + TW = TU + TV; + T1s = TU - TV; + } + { + E Tn, To, T10, T11; + Tn = Rm[0]; + To = Rp[WS(rs, 7)]; + Tp = Tn + To; + T14 = Tn - To; + T10 = Im[0]; + T11 = Ip[WS(rs, 7)]; + T12 = T10 + T11; + T1u = T11 - T10; + } + { + E Tq, Tr, T15, T16; + Tq = Rp[WS(rs, 3)]; + Tr = Rm[WS(rs, 4)]; + Ts = Tq + Tr; + TZ = Tq - Tr; + T15 = Ip[WS(rs, 3)]; + T16 = Im[WS(rs, 4)]; + T17 = T15 + T16; + T1v = T15 - T16; + } + { + E Tm, Tt, T2O, T2P; + Tm = Ti + Tl; + Tt = Tp + Ts; + Tu = Tm + Tt; + T1Y = Tm - Tt; + T2O = TR - TO; + T2P = TT + TW; + T2Q = FMA(KP382683432, T2O, KP923879532 * T2P); + T2X = FNMS(KP923879532, T2O, KP382683432 * T2P); + } + { + E T2R, T2S, TS, TX; + T2R = TZ + T12; + T2S = T14 + T17; + T2T = FMA(KP382683432, T2R, KP923879532 * T2S); + T2Y = FNMS(KP923879532, T2R, KP382683432 * T2S); + TS = TO + TR; + TX = TT - TW; + TY = FMA(KP923879532, TS, KP382683432 * TX); + T1d = FNMS(KP382683432, TS, KP923879532 * TX); + } + { + E T13, T18, T2t, T2u; + T13 = TZ - T12; + T18 = T14 - T17; + T19 = FNMS(KP382683432, T18, KP923879532 * T13); + T1e = FMA(KP382683432, T13, KP923879532 * T18); + T2t = Ti - Tl; + T2u = T1r - T1s; + T2v = T2t - T2u; + T2C = T2t + T2u; + } + { + E T2w, T2x, T1t, T1w; + T2w = Tp - Ts; + T2x = T1u - T1v; + T2y = T2w + T2x; + T2D = T2x - T2w; + T1t = T1r + T1s; + T1w = T1u + T1v; + T1x = T1t + T1w; + T1V = T1w - T1t; + } + } + { + E Tv, T1F, T1b, T1N, T1p, T1P, T1L, T1R; + Tv = Tf + Tu; + T1F = T1x + T1E; + { + E TN, T1a, T1f, T1o; + TN = TB + TM; + T1a = TY + T19; + T1b = TN + T1a; + T1N = TN - T1a; + T1f = T1d + T1e; + T1o = T1i + T1n; + T1p = T1f + T1o; + T1P = T1o - T1f; + { + E T1I, T1K, T1H, T1J; + T1I = Tf - Tu; + T1K = T1E - T1x; + T1H = W[14]; + T1J = W[15]; + T1L = FNMS(T1J, T1K, T1H * T1I); + T1R = FMA(T1J, T1I, T1H * T1K); + } + } + { + E T1q, T1G, Tw, T1c; + Tw = W[0]; + T1c = W[1]; + T1q = FMA(Tw, T1b, T1c * T1p); + T1G = FNMS(T1c, T1b, Tw * T1p); + Rp[0] = Tv - T1q; + Ip[0] = T1F + T1G; + Rm[0] = Tv + T1q; + Im[0] = T1G - T1F; + } + { + E T1Q, T1S, T1M, T1O; + T1M = W[16]; + T1O = W[17]; + T1Q = FMA(T1M, T1N, T1O * T1P); + T1S = FNMS(T1O, T1N, T1M * T1P); + Rp[WS(rs, 4)] = T1L - T1Q; + Ip[WS(rs, 4)] = T1R + T1S; + Rm[WS(rs, 4)] = T1L + T1Q; + Im[WS(rs, 4)] = T1S - T1R; + } + } + { + E T25, T2j, T29, T2l, T21, T2b, T2h, T2n; + { + E T23, T24, T27, T28; + T23 = TB - TM; + T24 = T1d - T1e; + T25 = T23 + T24; + T2j = T23 - T24; + T27 = T19 - TY; + T28 = T1n - T1i; + T29 = T27 + T28; + T2l = T28 - T27; + } + { + E T1W, T20, T1T, T1X; + T1W = T1U + T1V; + T20 = T1Y + T1Z; + T1T = W[6]; + T1X = W[7]; + T21 = FNMS(T1X, T20, T1T * T1W); + T2b = FMA(T1X, T1W, T1T * T20); + } + { + E T2e, T2g, T2d, T2f; + T2e = T1U - T1V; + T2g = T1Z - T1Y; + T2d = W[22]; + T2f = W[23]; + T2h = FNMS(T2f, T2g, T2d * T2e); + T2n = FMA(T2f, T2e, T2d * T2g); + } + { + E T2a, T2c, T22, T26; + T22 = W[8]; + T26 = W[9]; + T2a = FMA(T22, T25, T26 * T29); + T2c = FNMS(T26, T25, T22 * T29); + Rp[WS(rs, 2)] = T21 - T2a; + Ip[WS(rs, 2)] = T2b + T2c; + Rm[WS(rs, 2)] = T21 + T2a; + Im[WS(rs, 2)] = T2c - T2b; + } + { + E T2m, T2o, T2i, T2k; + T2i = W[24]; + T2k = W[25]; + T2m = FMA(T2i, T2j, T2k * T2l); + T2o = FNMS(T2k, T2j, T2i * T2l); + Rp[WS(rs, 6)] = T2h - T2m; + Ip[WS(rs, 6)] = T2n + T2o; + Rm[WS(rs, 6)] = T2h + T2m; + Im[WS(rs, 6)] = T2o - T2n; + } + } + { + E T2A, T38, T2I, T3a, T2V, T3d, T33, T3f, T2z, T2E; + T2z = KP707106781 * (T2v + T2y); + T2A = T2s + T2z; + T38 = T2s - T2z; + T2E = KP707106781 * (T2C + T2D); + T2I = T2E + T2H; + T3a = T2H - T2E; + { + E T2N, T2U, T2Z, T32; + T2N = T2L + T2M; + T2U = T2Q - T2T; + T2V = T2N + T2U; + T3d = T2N - T2U; + T2Z = T2X + T2Y; + T32 = T30 - T31; + T33 = T2Z + T32; + T3f = T32 - T2Z; + } + { + E T2J, T35, T34, T36; + { + E T2p, T2B, T2K, T2W; + T2p = W[2]; + T2B = W[3]; + T2J = FNMS(T2B, T2I, T2p * T2A); + T35 = FMA(T2B, T2A, T2p * T2I); + T2K = W[4]; + T2W = W[5]; + T34 = FMA(T2K, T2V, T2W * T33); + T36 = FNMS(T2W, T2V, T2K * T33); + } + Rp[WS(rs, 1)] = T2J - T34; + Ip[WS(rs, 1)] = T35 + T36; + Rm[WS(rs, 1)] = T2J + T34; + Im[WS(rs, 1)] = T36 - T35; + } + { + E T3b, T3h, T3g, T3i; + { + E T37, T39, T3c, T3e; + T37 = W[18]; + T39 = W[19]; + T3b = FNMS(T39, T3a, T37 * T38); + T3h = FMA(T39, T38, T37 * T3a); + T3c = W[20]; + T3e = W[21]; + T3g = FMA(T3c, T3d, T3e * T3f); + T3i = FNMS(T3e, T3d, T3c * T3f); + } + Rp[WS(rs, 5)] = T3b - T3g; + Ip[WS(rs, 5)] = T3h + T3i; + Rm[WS(rs, 5)] = T3b + T3g; + Im[WS(rs, 5)] = T3i - T3h; + } + } + { + E T3m, T3E, T3q, T3G, T3v, T3J, T3z, T3L, T3l, T3o; + T3l = KP707106781 * (T2D - T2C); + T3m = T3k + T3l; + T3E = T3k - T3l; + T3o = KP707106781 * (T2v - T2y); + T3q = T3o + T3p; + T3G = T3p - T3o; + { + E T3t, T3u, T3x, T3y; + T3t = T2L - T2M; + T3u = T2X - T2Y; + T3v = T3t + T3u; + T3J = T3t - T3u; + T3x = T31 + T30; + T3y = T2Q + T2T; + T3z = T3x - T3y; + T3L = T3y + T3x; + } + { + E T3r, T3B, T3A, T3C; + { + E T3j, T3n, T3s, T3w; + T3j = W[10]; + T3n = W[11]; + T3r = FNMS(T3n, T3q, T3j * T3m); + T3B = FMA(T3n, T3m, T3j * T3q); + T3s = W[12]; + T3w = W[13]; + T3A = FMA(T3s, T3v, T3w * T3z); + T3C = FNMS(T3w, T3v, T3s * T3z); + } + Rp[WS(rs, 3)] = T3r - T3A; + Ip[WS(rs, 3)] = T3B + T3C; + Rm[WS(rs, 3)] = T3r + T3A; + Im[WS(rs, 3)] = T3C - T3B; + } + { + E T3H, T3N, T3M, T3O; + { + E T3D, T3F, T3I, T3K; + T3D = W[26]; + T3F = W[27]; + T3H = FNMS(T3F, T3G, T3D * T3E); + T3N = FMA(T3F, T3E, T3D * T3G); + T3I = W[28]; + T3K = W[29]; + T3M = FMA(T3I, T3J, T3K * T3L); + T3O = FNMS(T3K, T3J, T3I * T3L); + } + Rp[WS(rs, 7)] = T3H - T3M; + Ip[WS(rs, 7)] = T3N + T3O; + Rm[WS(rs, 7)] = T3H + T3M; + Im[WS(rs, 7)] = T3O - T3N; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 16}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 16, "hc2cbdft_16", twinstr, &GENUS, {168, 46, 38, 0} }; + +void X(codelet_hc2cbdft_16) (planner *p) { + X(khc2c_register) (p, hc2cbdft_16, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:44 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 2 -dif -name hc2cbdft_2 -include hc2cb.h */ + +/* + * This function contains 10 FP additions, 4 FP multiplications, + * (or, 8 additions, 2 multiplications, 2 fused multiply/add), + * 11 stack variables, 0 constants, and 8 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_2(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 2); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 2, MAKE_VOLATILE_STRIDE(8, rs)) { + E T9, Ta, T3, Tc, T7, T4; + { + E T1, T2, T5, T6; + T1 = Ip[0]; + T2 = Im[0]; + T5 = Rp[0]; + T6 = Rm[0]; + T9 = W[1]; + Ta = T1 + T2; + T3 = T1 - T2; + Tc = T5 + T6; + T7 = T5 - T6; + T4 = W[0]; + } + { + E Td, T8, Te, Tb; + Td = T9 * T7; + T8 = T4 * T7; + Te = FMA(T4, Ta, Td); + Tb = FNMS(T9, Ta, T8); + Rm[0] = Tc + Te; + Rp[0] = Tc - Te; + Im[0] = Tb - T3; + Ip[0] = T3 + Tb; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 2}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 2, "hc2cbdft_2", twinstr, &GENUS, {8, 2, 2, 0} }; + +void X(codelet_hc2cbdft_2) (planner *p) { + X(khc2c_register) (p, hc2cbdft_2, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 2 -dif -name hc2cbdft_2 -include hc2cb.h */ + +/* + * This function contains 10 FP additions, 4 FP multiplications, + * (or, 8 additions, 2 multiplications, 2 fused multiply/add), + * 9 stack variables, 0 constants, and 8 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_2(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 2); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 2, MAKE_VOLATILE_STRIDE(8, rs)) { + E T3, T9, T7, Tb; + { + E T1, T2, T5, T6; + T1 = Ip[0]; + T2 = Im[0]; + T3 = T1 - T2; + T9 = T1 + T2; + T5 = Rp[0]; + T6 = Rm[0]; + T7 = T5 - T6; + Tb = T5 + T6; + } + { + E Ta, Tc, T4, T8; + T4 = W[0]; + T8 = W[1]; + Ta = FNMS(T8, T9, T4 * T7); + Tc = FMA(T8, T7, T4 * T9); + Ip[0] = T3 + Ta; + Rp[0] = Tb - Tc; + Im[0] = Ta - T3; + Rm[0] = Tb + Tc; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 2}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 2, "hc2cbdft_2", twinstr, &GENUS, {8, 2, 2, 0} }; + +void X(codelet_hc2cbdft_2) (planner *p) { + X(khc2c_register) (p, hc2cbdft_2, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1135 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:45 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 20 -dif -name hc2cbdft_20 -include hc2cb.h */ + +/* + * This function contains 286 FP additions, 148 FP multiplications, + * (or, 176 additions, 38 multiplications, 110 fused multiply/add), + * 122 stack variables, 4 constants, and 80 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 38, MAKE_VOLATILE_STRIDE(80, rs)) { + E T5s, T5v, T5t, T5z, T5q, T5y, T5u, T5A, T5w; + { + E T3T, T27, T2o, T41, T2p, T40, TU, T15, T2Q, T1N, T2L, T1w, T59, T4n, T5e; + E T4A, T2m, T24, T2Z, T2h, T4J, T3P, T3Y, T3W, T2d, TJ, T3H, T2c, TD, T52; + E T3G, T1E, T4f, T5I, T4e, T4w, T5L, T4v, T1J, T1H; + { + E T1A, T3, T25, TI, TF, T6, T26, T1D, TO, T47, T3z, Te, T1S, T3M, T1e; + E T4k, TZ, T4a, T3C, Tt, T1Z, T3J, T1p, T4h, T14, T4b, T3D, TA, T22, T3K; + E T1u, T4i, Ti, T1f, Th, T1T, TS, Tj, T1g, T1h; + { + E T4, T5, T1B, T1C; + { + E T1, T2, TG, TH; + T1 = Rp[0]; + T2 = Rm[WS(rs, 9)]; + TG = Ip[0]; + TH = Im[WS(rs, 9)]; + T4 = Rp[WS(rs, 5)]; + T1A = T1 - T2; + T3 = T1 + T2; + T25 = TG - TH; + TI = TG + TH; + T5 = Rm[WS(rs, 4)]; + T1B = Ip[WS(rs, 5)]; + T1C = Im[WS(rs, 4)]; + } + { + E Tq, T1l, Tp, T1X, TY, Tr, T1m, T1n; + { + E Tb, T1a, Ta, T1Q, TN, Tc, T1b, T1c; + { + E T8, T9, TL, TM; + T8 = Rp[WS(rs, 4)]; + TF = T4 - T5; + T6 = T4 + T5; + T26 = T1B - T1C; + T1D = T1B + T1C; + T9 = Rm[WS(rs, 5)]; + TL = Ip[WS(rs, 4)]; + TM = Im[WS(rs, 5)]; + Tb = Rp[WS(rs, 9)]; + T1a = T8 - T9; + Ta = T8 + T9; + T1Q = TL - TM; + TN = TL + TM; + Tc = Rm[0]; + T1b = Ip[WS(rs, 9)]; + T1c = Im[0]; + } + { + E Tn, To, TW, TX; + Tn = Rp[WS(rs, 8)]; + { + E TK, Td, T1R, T1d; + TK = Tb - Tc; + Td = Tb + Tc; + T1R = T1b - T1c; + T1d = T1b + T1c; + TO = TK + TN; + T47 = TN - TK; + T3z = Ta - Td; + Te = Ta + Td; + T1S = T1Q + T1R; + T3M = T1Q - T1R; + T1e = T1a - T1d; + T4k = T1a + T1d; + To = Rm[WS(rs, 1)]; + } + TW = Ip[WS(rs, 8)]; + TX = Im[WS(rs, 1)]; + Tq = Rm[WS(rs, 6)]; + T1l = Tn - To; + Tp = Tn + To; + T1X = TW - TX; + TY = TW + TX; + Tr = Rp[WS(rs, 3)]; + T1m = Im[WS(rs, 6)]; + T1n = Ip[WS(rs, 3)]; + } + } + { + E Tx, T1q, Tw, T20, T13, Ty, T1r, T1s; + { + E Tu, Tv, T11, T12; + Tu = Rm[WS(rs, 7)]; + { + E TV, Ts, T1Y, T1o; + TV = Tq - Tr; + Ts = Tq + Tr; + T1Y = T1n - T1m; + T1o = T1m + T1n; + TZ = TV + TY; + T4a = TY - TV; + T3C = Tp - Ts; + Tt = Tp + Ts; + T1Z = T1X + T1Y; + T3J = T1X - T1Y; + T1p = T1l + T1o; + T4h = T1l - T1o; + Tv = Rp[WS(rs, 2)]; + } + T11 = Im[WS(rs, 7)]; + T12 = Ip[WS(rs, 2)]; + Tx = Rm[WS(rs, 2)]; + T1q = Tu - Tv; + Tw = Tu + Tv; + T20 = T12 - T11; + T13 = T11 + T12; + Ty = Rp[WS(rs, 7)]; + T1r = Im[WS(rs, 2)]; + T1s = Ip[WS(rs, 7)]; + } + { + E Tf, Tg, TQ, TR; + Tf = Rm[WS(rs, 3)]; + { + E T10, Tz, T21, T1t; + T10 = Tx - Ty; + Tz = Tx + Ty; + T21 = T1s - T1r; + T1t = T1r + T1s; + T14 = T10 - T13; + T4b = T10 + T13; + T3D = Tw - Tz; + TA = Tw + Tz; + T22 = T20 + T21; + T3K = T20 - T21; + T1u = T1q + T1t; + T4i = T1q - T1t; + Tg = Rp[WS(rs, 6)]; + } + TQ = Im[WS(rs, 3)]; + TR = Ip[WS(rs, 6)]; + Ti = Rp[WS(rs, 1)]; + T1f = Tf - Tg; + Th = Tf + Tg; + T1T = TR - TQ; + TS = TQ + TR; + Tj = Rm[WS(rs, 8)]; + T1g = Ip[WS(rs, 1)]; + T1h = Im[WS(rs, 8)]; + } + } + } + } + { + E T1V, T3N, TB, T3B, Tm, T3E, T1F, T1G, T4t, T4j, T4m, T4s, T4c, T4y, T4z; + E T49, T3y, T7; + { + E TT, T48, T1j, T4l, T3A, Tl; + T3T = T25 - T26; + T27 = T25 + T26; + { + E TP, Tk, T1U, T1i; + TP = Ti - Tj; + Tk = Ti + Tj; + T1U = T1g - T1h; + T1i = T1g + T1h; + TT = TP - TS; + T48 = TP + TS; + T3A = Th - Tk; + Tl = Th + Tk; + T1V = T1T + T1U; + T3N = T1T - T1U; + T1j = T1f - T1i; + T4l = T1f + T1i; + T2o = Tt - TA; + TB = Tt + TA; + } + T41 = T3z - T3A; + T3B = T3z + T3A; + Tm = Te + Tl; + T2p = Te - Tl; + { + E T1L, T1M, T1k, T1v; + T40 = T3C - T3D; + T3E = T3C + T3D; + TU = TO + TT; + T1L = TO - TT; + T1M = TZ - T14; + T15 = TZ + T14; + T1F = T1e + T1j; + T1k = T1e - T1j; + T1v = T1p - T1u; + T1G = T1p + T1u; + T4t = T4h + T4i; + T4j = T4h - T4i; + T2Q = FNMS(KP618033988, T1L, T1M); + T1N = FMA(KP618033988, T1M, T1L); + T2L = FNMS(KP618033988, T1k, T1v); + T1w = FMA(KP618033988, T1v, T1k); + T4m = T4k - T4l; + T4s = T4k + T4l; + T4c = T4a - T4b; + T4y = T4a + T4b; + T4z = T47 + T48; + T49 = T47 - T48; + } + } + { + E T2g, T1W, T23, T2f; + T2g = T1S - T1V; + T1W = T1S + T1V; + T59 = FMA(KP618033988, T4j, T4m); + T4n = FNMS(KP618033988, T4m, T4j); + T5e = FMA(KP618033988, T4y, T4z); + T4A = FNMS(KP618033988, T4z, T4y); + T23 = T1Z + T22; + T2f = T1Z - T22; + { + E T3V, T3L, T3O, T3U; + T3V = T3J + T3K; + T3L = T3J - T3K; + T2m = T1W - T23; + T24 = T1W + T23; + T2Z = FMA(KP618033988, T2f, T2g); + T2h = FNMS(KP618033988, T2g, T2f); + T3O = T3M - T3N; + T3U = T3M + T3N; + T3y = T3 - T6; + T7 = T3 + T6; + T4J = FMA(KP618033988, T3L, T3O); + T3P = FNMS(KP618033988, T3O, T3L); + T3Y = T3U - T3V; + T3W = T3U + T3V; + } + } + { + E T46, TC, T3F, T4r, T4d, T4u; + TC = Tm + TB; + T2d = Tm - TB; + TJ = TF + TI; + T46 = TI - TF; + T3H = T3B - T3E; + T3F = T3B + T3E; + T2c = FNMS(KP250000000, TC, T7); + TD = T7 + TC; + T52 = T3y + T3F; + T3G = FNMS(KP250000000, T3F, T3y); + T4r = T1A + T1D; + T1E = T1A - T1D; + T4f = T49 - T4c; + T4d = T49 + T4c; + T5I = T46 + T4d; + T4e = FNMS(KP250000000, T4d, T46); + T4w = T4s - T4t; + T4u = T4s + T4t; + T5L = T4u + T4r; + T4v = FNMS(KP250000000, T4u, T4r); + T1J = T1F - T1G; + T1H = T1F + T1G; + } + } + } + { + E T38, T3b, T39, T3f, T36, T3e, T3a; + { + E T28, T3r, T3o, T3v, T3p, T2b, T2k, T35, T3l, T2H, T2r, T2j, T2z, T2D, T2G; + E T2X, T2F, T2T, T32, T3h, T3k, T31, T3d, T3j, T3t, T1x, T2u, T1O, T2x, T2v; + E T1y, T2B, T29, T2J, T2M, T2R, T2N, T2V; + { + E T2l, T1I, T18, T2q, T34, T17, T16, T3n; + T28 = T24 + T27; + T2l = FNMS(KP250000000, T24, T27); + T3r = T1H + T1E; + T1I = FNMS(KP250000000, T1H, T1E); + T18 = TU - T15; + T16 = TU + T15; + T3n = W[8]; + T2q = FNMS(KP618033988, T2p, T2o); + T34 = FMA(KP618033988, T2o, T2p); + T17 = FNMS(KP250000000, T16, TJ); + T3o = TJ + T16; + T3v = T3n * T3r; + T3p = T3n * T3o; + { + E T2Y, T2E, T3i, T30; + { + E T2e, T33, T2n, T2i; + T2Y = FMA(KP559016994, T2d, T2c); + T2e = FNMS(KP559016994, T2d, T2c); + T2b = W[14]; + T2k = W[15]; + T33 = FMA(KP559016994, T2m, T2l); + T2n = FNMS(KP559016994, T2m, T2l); + T2E = FMA(KP951056516, T2h, T2e); + T2i = FNMS(KP951056516, T2h, T2e); + T35 = FMA(KP951056516, T34, T33); + T3l = FNMS(KP951056516, T34, T33); + T2H = FNMS(KP951056516, T2q, T2n); + T2r = FMA(KP951056516, T2q, T2n); + T2j = T2b * T2i; + T2z = T2k * T2i; + T2D = W[22]; + T2G = W[23]; + } + T2X = W[30]; + T2F = T2D * T2E; + T2T = T2G * T2E; + T3i = FMA(KP951056516, T2Z, T2Y); + T30 = FNMS(KP951056516, T2Z, T2Y); + T32 = W[31]; + T3h = W[6]; + T3k = W[7]; + T31 = T2X * T30; + T3d = T32 * T30; + T3j = T3h * T3i; + T3t = T3k * T3i; + } + { + E T2K, T2P, TE, T19, T1K, T2t, T37; + T2K = FNMS(KP559016994, T18, T17); + T19 = FMA(KP559016994, T18, T17); + T1K = FMA(KP559016994, T1J, T1I); + T2P = FNMS(KP559016994, T1J, T1I); + TE = W[0]; + T2t = W[16]; + T1x = FMA(KP951056516, T1w, T19); + T2u = FNMS(KP951056516, T1w, T19); + T1O = FNMS(KP951056516, T1N, T1K); + T2x = FMA(KP951056516, T1N, T1K); + T2v = T2t * T2u; + T1y = TE * T1x; + T2B = T2t * T2x; + T29 = TE * T1O; + T2J = W[24]; + T37 = W[32]; + T2M = FMA(KP951056516, T2L, T2K); + T38 = FNMS(KP951056516, T2L, T2K); + T2R = FNMS(KP951056516, T2Q, T2P); + T3b = FMA(KP951056516, T2Q, T2P); + T39 = T37 * T38; + T2N = T2J * T2M; + T3f = T37 * T3b; + } + } + T2V = T2J * T2R; + { + E T3m, T3u, T3q, T2a, T1P, T1z; + T1z = W[1]; + T3m = FNMS(T3k, T3l, T3j); + T3u = FMA(T3h, T3l, T3t); + T3q = W[9]; + T2a = FNMS(T1z, T1x, T29); + T1P = FMA(T1z, T1O, T1y); + { + E T2s, T2A, T2w, T3w, T3s; + T2s = FNMS(T2k, T2r, T2j); + T3w = FNMS(T3q, T3o, T3v); + T3s = FMA(T3q, T3r, T3p); + Im[0] = T2a - T28; + Ip[0] = T28 + T2a; + Rm[0] = TD + T1P; + Rp[0] = TD - T1P; + Im[WS(rs, 2)] = T3w - T3u; + Ip[WS(rs, 2)] = T3u + T3w; + Rm[WS(rs, 2)] = T3m + T3s; + Rp[WS(rs, 2)] = T3m - T3s; + T2A = FMA(T2b, T2r, T2z); + T2w = W[17]; + { + E T2I, T2U, T2O, T2C, T2y, T2W, T2S; + T2I = FNMS(T2G, T2H, T2F); + T2U = FMA(T2D, T2H, T2T); + T2O = W[25]; + T2C = FNMS(T2w, T2u, T2B); + T2y = FMA(T2w, T2x, T2v); + T36 = FNMS(T32, T35, T31); + T2W = FNMS(T2O, T2M, T2V); + T2S = FMA(T2O, T2R, T2N); + Im[WS(rs, 4)] = T2C - T2A; + Ip[WS(rs, 4)] = T2A + T2C; + Rm[WS(rs, 4)] = T2s + T2y; + Rp[WS(rs, 4)] = T2s - T2y; + Im[WS(rs, 6)] = T2W - T2U; + Ip[WS(rs, 6)] = T2U + T2W; + Rm[WS(rs, 6)] = T2I + T2S; + Rp[WS(rs, 6)] = T2I - T2S; + T3e = FMA(T2X, T35, T3d); + T3a = W[33]; + } + } + } + } + { + E T55, T51, T54, T53, T5h, T5P, T5J, T3x, T4P, T5F, T5p, T43, T3R, T3S, T5l; + E T5o, T4D, T5n, T5x, T4H, T4M, T5B, T5E, T4L, T4X, T5D, T5N, T4S, T4o, T4V; + E T4B, T4T, T4p, T4Z, T4F, T57, T5a, T5f, T5b, T5j; + { + E T3X, T4O, T42, T3g, T3c, T5H; + T55 = T3W + T3T; + T3X = FNMS(KP250000000, T3W, T3T); + T51 = W[18]; + T3g = FNMS(T3a, T38, T3f); + T3c = FMA(T3a, T3b, T39); + T54 = W[19]; + T53 = T51 * T52; + Im[WS(rs, 8)] = T3g - T3e; + Ip[WS(rs, 8)] = T3e + T3g; + Rm[WS(rs, 8)] = T36 + T3c; + Rp[WS(rs, 8)] = T36 - T3c; + T5h = T54 * T52; + T5H = W[28]; + T4O = FMA(KP618033988, T40, T41); + T42 = FNMS(KP618033988, T41, T40); + T5P = T5H * T5L; + T5J = T5H * T5I; + { + E T4I, T5m, T3Q, T3I, T3Z, T4N, T4K, T5C; + T3I = FNMS(KP559016994, T3H, T3G); + T4I = FMA(KP559016994, T3H, T3G); + T3Z = FNMS(KP559016994, T3Y, T3X); + T4N = FMA(KP559016994, T3Y, T3X); + T3x = W[2]; + T5m = FNMS(KP951056516, T3P, T3I); + T3Q = FMA(KP951056516, T3P, T3I); + T4P = FMA(KP951056516, T4O, T4N); + T5F = FNMS(KP951056516, T4O, T4N); + T5p = FMA(KP951056516, T42, T3Z); + T43 = FNMS(KP951056516, T42, T3Z); + T3R = T3x * T3Q; + T3S = W[3]; + T5l = W[34]; + T5o = W[35]; + T4D = T3S * T3Q; + T5n = T5l * T5m; + T5x = T5o * T5m; + T4K = FNMS(KP951056516, T4J, T4I); + T5C = FMA(KP951056516, T4J, T4I); + T4H = W[10]; + T4M = W[11]; + T5B = W[26]; + T5E = W[27]; + T4L = T4H * T4K; + T4X = T4M * T4K; + T5D = T5B * T5C; + T5N = T5E * T5C; + } + { + E T58, T5d, T45, T4g, T4x, T4R, T5r; + T4g = FNMS(KP559016994, T4f, T4e); + T58 = FMA(KP559016994, T4f, T4e); + T5d = FMA(KP559016994, T4w, T4v); + T4x = FNMS(KP559016994, T4w, T4v); + T45 = W[4]; + T4R = W[12]; + T4S = FNMS(KP951056516, T4n, T4g); + T4o = FMA(KP951056516, T4n, T4g); + T4V = FMA(KP951056516, T4A, T4x); + T4B = FNMS(KP951056516, T4A, T4x); + T4T = T4R * T4S; + T4p = T45 * T4o; + T4Z = T4R * T4V; + T4F = T45 * T4B; + T57 = W[20]; + T5r = W[36]; + T5s = FNMS(KP951056516, T59, T58); + T5a = FMA(KP951056516, T59, T58); + T5v = FMA(KP951056516, T5e, T5d); + T5f = FNMS(KP951056516, T5e, T5d); + T5t = T5r * T5s; + T5b = T57 * T5a; + T5z = T5r * T5v; + } + } + T5j = T57 * T5f; + { + E T44, T4E, T5G, T5O, T5K, T4G, T4C, T4q; + T44 = FNMS(T3S, T43, T3R); + T4E = FMA(T3x, T43, T4D); + T4q = W[5]; + T5G = FNMS(T5E, T5F, T5D); + T5O = FMA(T5B, T5F, T5N); + T5K = W[29]; + T4G = FNMS(T4q, T4o, T4F); + T4C = FMA(T4q, T4B, T4p); + { + E T4Q, T4Y, T4U, T5Q, T5M; + T4Q = FNMS(T4M, T4P, T4L); + T5Q = FNMS(T5K, T5I, T5P); + T5M = FMA(T5K, T5L, T5J); + Im[WS(rs, 1)] = T4G - T4E; + Ip[WS(rs, 1)] = T4E + T4G; + Rm[WS(rs, 1)] = T44 + T4C; + Rp[WS(rs, 1)] = T44 - T4C; + Im[WS(rs, 7)] = T5Q - T5O; + Ip[WS(rs, 7)] = T5O + T5Q; + Rm[WS(rs, 7)] = T5G + T5M; + Rp[WS(rs, 7)] = T5G - T5M; + T4Y = FMA(T4H, T4P, T4X); + T4U = W[13]; + { + E T56, T5i, T5c, T50, T4W, T5k, T5g; + T56 = FNMS(T54, T55, T53); + T5i = FMA(T51, T55, T5h); + T5c = W[21]; + T50 = FNMS(T4U, T4S, T4Z); + T4W = FMA(T4U, T4V, T4T); + T5q = FNMS(T5o, T5p, T5n); + T5k = FNMS(T5c, T5a, T5j); + T5g = FMA(T5c, T5f, T5b); + Im[WS(rs, 3)] = T50 - T4Y; + Ip[WS(rs, 3)] = T4Y + T50; + Rm[WS(rs, 3)] = T4Q + T4W; + Rp[WS(rs, 3)] = T4Q - T4W; + Im[WS(rs, 5)] = T5k - T5i; + Ip[WS(rs, 5)] = T5i + T5k; + Rm[WS(rs, 5)] = T56 + T5g; + Rp[WS(rs, 5)] = T56 - T5g; + T5y = FMA(T5l, T5p, T5x); + T5u = W[37]; + } + } + } + } + } + } + T5A = FNMS(T5u, T5s, T5z); + T5w = FMA(T5u, T5v, T5t); + Im[WS(rs, 9)] = T5A - T5y; + Ip[WS(rs, 9)] = T5y + T5A; + Rm[WS(rs, 9)] = T5q + T5w; + Rp[WS(rs, 9)] = T5q - T5w; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 20}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 20, "hc2cbdft_20", twinstr, &GENUS, {176, 38, 110, 0} }; + +void X(codelet_hc2cbdft_20) (planner *p) { + X(khc2c_register) (p, hc2cbdft_20, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 20 -dif -name hc2cbdft_20 -include hc2cb.h */ + +/* + * This function contains 286 FP additions, 124 FP multiplications, + * (or, 224 additions, 62 multiplications, 62 fused multiply/add), + * 89 stack variables, 4 constants, and 80 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 38, MAKE_VOLATILE_STRIDE(80, rs)) { + E T7, T3N, T4a, T16, T1G, T3g, T3D, T26, T1k, T3A, T3B, T1v, T2e, T48, T47; + E T2d, T1L, T43, T40, T1K, T2l, T3t, T2m, T3w, T3n, T3p, TC, T2b, T4d, T4f; + E T23, T2j, T1B, T1H, T3U, T3W, T3G, T3I, T11, T17; + { + E T3, T1C, T15, T24, T6, T12, T1F, T25; + { + E T1, T2, T13, T14; + T1 = Rp[0]; + T2 = Rm[WS(rs, 9)]; + T3 = T1 + T2; + T1C = T1 - T2; + T13 = Ip[0]; + T14 = Im[WS(rs, 9)]; + T15 = T13 + T14; + T24 = T13 - T14; + } + { + E T4, T5, T1D, T1E; + T4 = Rp[WS(rs, 5)]; + T5 = Rm[WS(rs, 4)]; + T6 = T4 + T5; + T12 = T4 - T5; + T1D = Ip[WS(rs, 5)]; + T1E = Im[WS(rs, 4)]; + T1F = T1D + T1E; + T25 = T1D - T1E; + } + T7 = T3 + T6; + T3N = T15 - T12; + T4a = T1C + T1F; + T16 = T12 + T15; + T1G = T1C - T1F; + T3g = T3 - T6; + T3D = T24 - T25; + T26 = T24 + T25; + } + { + E Te, T3O, T3Y, TJ, T1e, T3h, T3r, T1R, TA, T3S, T42, TZ, T1u, T3l, T3v; + E T21, Tl, T3P, T3Z, TO, T1j, T3i, T3s, T1U, Tt, T3R, T41, TU, T1p, T3k; + E T3u, T1Y; + { + E Ta, T1a, TI, T1P, Td, TF, T1d, T1Q; + { + E T8, T9, TG, TH; + T8 = Rp[WS(rs, 4)]; + T9 = Rm[WS(rs, 5)]; + Ta = T8 + T9; + T1a = T8 - T9; + TG = Ip[WS(rs, 4)]; + TH = Im[WS(rs, 5)]; + TI = TG + TH; + T1P = TG - TH; + } + { + E Tb, Tc, T1b, T1c; + Tb = Rp[WS(rs, 9)]; + Tc = Rm[0]; + Td = Tb + Tc; + TF = Tb - Tc; + T1b = Ip[WS(rs, 9)]; + T1c = Im[0]; + T1d = T1b + T1c; + T1Q = T1b - T1c; + } + Te = Ta + Td; + T3O = TI - TF; + T3Y = T1a + T1d; + TJ = TF + TI; + T1e = T1a - T1d; + T3h = Ta - Td; + T3r = T1P - T1Q; + T1R = T1P + T1Q; + } + { + E Tw, T1q, TY, T1Z, Tz, TV, T1t, T20; + { + E Tu, Tv, TW, TX; + Tu = Rm[WS(rs, 7)]; + Tv = Rp[WS(rs, 2)]; + Tw = Tu + Tv; + T1q = Tu - Tv; + TW = Im[WS(rs, 7)]; + TX = Ip[WS(rs, 2)]; + TY = TW + TX; + T1Z = TX - TW; + } + { + E Tx, Ty, T1r, T1s; + Tx = Rm[WS(rs, 2)]; + Ty = Rp[WS(rs, 7)]; + Tz = Tx + Ty; + TV = Tx - Ty; + T1r = Im[WS(rs, 2)]; + T1s = Ip[WS(rs, 7)]; + T1t = T1r + T1s; + T20 = T1s - T1r; + } + TA = Tw + Tz; + T3S = TV + TY; + T42 = T1q - T1t; + TZ = TV - TY; + T1u = T1q + T1t; + T3l = Tw - Tz; + T3v = T1Z - T20; + T21 = T1Z + T20; + } + { + E Th, T1f, TN, T1S, Tk, TK, T1i, T1T; + { + E Tf, Tg, TL, TM; + Tf = Rm[WS(rs, 3)]; + Tg = Rp[WS(rs, 6)]; + Th = Tf + Tg; + T1f = Tf - Tg; + TL = Im[WS(rs, 3)]; + TM = Ip[WS(rs, 6)]; + TN = TL + TM; + T1S = TM - TL; + } + { + E Ti, Tj, T1g, T1h; + Ti = Rp[WS(rs, 1)]; + Tj = Rm[WS(rs, 8)]; + Tk = Ti + Tj; + TK = Ti - Tj; + T1g = Ip[WS(rs, 1)]; + T1h = Im[WS(rs, 8)]; + T1i = T1g + T1h; + T1T = T1g - T1h; + } + Tl = Th + Tk; + T3P = TK + TN; + T3Z = T1f + T1i; + TO = TK - TN; + T1j = T1f - T1i; + T3i = Th - Tk; + T3s = T1S - T1T; + T1U = T1S + T1T; + } + { + E Tp, T1l, TT, T1W, Ts, TQ, T1o, T1X; + { + E Tn, To, TR, TS; + Tn = Rp[WS(rs, 8)]; + To = Rm[WS(rs, 1)]; + Tp = Tn + To; + T1l = Tn - To; + TR = Ip[WS(rs, 8)]; + TS = Im[WS(rs, 1)]; + TT = TR + TS; + T1W = TR - TS; + } + { + E Tq, Tr, T1m, T1n; + Tq = Rm[WS(rs, 6)]; + Tr = Rp[WS(rs, 3)]; + Ts = Tq + Tr; + TQ = Tq - Tr; + T1m = Im[WS(rs, 6)]; + T1n = Ip[WS(rs, 3)]; + T1o = T1m + T1n; + T1X = T1n - T1m; + } + Tt = Tp + Ts; + T3R = TT - TQ; + T41 = T1l - T1o; + TU = TQ + TT; + T1p = T1l + T1o; + T3k = Tp - Ts; + T3u = T1W - T1X; + T1Y = T1W + T1X; + } + T1k = T1e - T1j; + T3A = T3h - T3i; + T3B = T3k - T3l; + T1v = T1p - T1u; + T2e = T1Y - T21; + T48 = T3R + T3S; + T47 = T3O + T3P; + T2d = T1R - T1U; + T1L = TU - TZ; + T43 = T41 - T42; + T40 = T3Y - T3Z; + T1K = TJ - TO; + T2l = Te - Tl; + T3t = T3r - T3s; + T2m = Tt - TA; + T3w = T3u - T3v; + { + E T3j, T3m, Tm, TB; + T3j = T3h + T3i; + T3m = T3k + T3l; + T3n = T3j + T3m; + T3p = KP559016994 * (T3j - T3m); + Tm = Te + Tl; + TB = Tt + TA; + TC = Tm + TB; + T2b = KP559016994 * (Tm - TB); + } + { + E T4b, T4c, T3Q, T3T; + T4b = T3Y + T3Z; + T4c = T41 + T42; + T4d = T4b + T4c; + T4f = KP559016994 * (T4b - T4c); + { + E T1V, T22, T1z, T1A; + T1V = T1R + T1U; + T22 = T1Y + T21; + T23 = T1V + T22; + T2j = KP559016994 * (T1V - T22); + T1z = T1e + T1j; + T1A = T1p + T1u; + T1B = KP559016994 * (T1z - T1A); + T1H = T1z + T1A; + } + T3Q = T3O - T3P; + T3T = T3R - T3S; + T3U = T3Q + T3T; + T3W = KP559016994 * (T3Q - T3T); + { + E T3E, T3F, TP, T10; + T3E = T3r + T3s; + T3F = T3u + T3v; + T3G = T3E + T3F; + T3I = KP559016994 * (T3E - T3F); + TP = TJ + TO; + T10 = TU + TZ; + T11 = KP559016994 * (TP - T10); + T17 = TP + T10; + } + } + } + { + E TD, T27, T3c, T3e, T2o, T36, T2A, T2U, T1N, T2Z, T2t, T2J, T1x, T2X, T2r; + E T2F, T2g, T34, T2y, T2Q; + TD = T7 + TC; + T27 = T23 + T26; + { + E T39, T3b, T38, T3a; + T39 = T16 + T17; + T3b = T1H + T1G; + T38 = W[8]; + T3a = W[9]; + T3c = FMA(T38, T39, T3a * T3b); + T3e = FNMS(T3a, T39, T38 * T3b); + } + { + E T2n, T2S, T2k, T2T, T2i; + T2n = FNMS(KP951056516, T2m, KP587785252 * T2l); + T2S = FMA(KP951056516, T2l, KP587785252 * T2m); + T2i = FNMS(KP250000000, T23, T26); + T2k = T2i - T2j; + T2T = T2j + T2i; + T2o = T2k - T2n; + T36 = T2T - T2S; + T2A = T2n + T2k; + T2U = T2S + T2T; + } + { + E T1M, T2H, T1J, T2I, T1I; + T1M = FMA(KP951056516, T1K, KP587785252 * T1L); + T2H = FNMS(KP951056516, T1L, KP587785252 * T1K); + T1I = FNMS(KP250000000, T1H, T1G); + T1J = T1B + T1I; + T2I = T1I - T1B; + T1N = T1J - T1M; + T2Z = T2I - T2H; + T2t = T1M + T1J; + T2J = T2H + T2I; + } + { + E T1w, T2E, T19, T2D, T18; + T1w = FMA(KP951056516, T1k, KP587785252 * T1v); + T2E = FNMS(KP951056516, T1v, KP587785252 * T1k); + T18 = FNMS(KP250000000, T17, T16); + T19 = T11 + T18; + T2D = T18 - T11; + T1x = T19 + T1w; + T2X = T2D + T2E; + T2r = T19 - T1w; + T2F = T2D - T2E; + } + { + E T2f, T2P, T2c, T2O, T2a; + T2f = FNMS(KP951056516, T2e, KP587785252 * T2d); + T2P = FMA(KP951056516, T2d, KP587785252 * T2e); + T2a = FNMS(KP250000000, TC, T7); + T2c = T2a - T2b; + T2O = T2b + T2a; + T2g = T2c + T2f; + T34 = T2O + T2P; + T2y = T2c - T2f; + T2Q = T2O - T2P; + } + { + E T1O, T28, TE, T1y; + TE = W[0]; + T1y = W[1]; + T1O = FMA(TE, T1x, T1y * T1N); + T28 = FNMS(T1y, T1x, TE * T1N); + Rp[0] = TD - T1O; + Ip[0] = T27 + T28; + Rm[0] = TD + T1O; + Im[0] = T28 - T27; + } + { + E T37, T3d, T33, T35; + T33 = W[6]; + T35 = W[7]; + T37 = FNMS(T35, T36, T33 * T34); + T3d = FMA(T35, T34, T33 * T36); + Rp[WS(rs, 2)] = T37 - T3c; + Ip[WS(rs, 2)] = T3d + T3e; + Rm[WS(rs, 2)] = T37 + T3c; + Im[WS(rs, 2)] = T3e - T3d; + } + { + E T2p, T2v, T2u, T2w; + { + E T29, T2h, T2q, T2s; + T29 = W[14]; + T2h = W[15]; + T2p = FNMS(T2h, T2o, T29 * T2g); + T2v = FMA(T2h, T2g, T29 * T2o); + T2q = W[16]; + T2s = W[17]; + T2u = FMA(T2q, T2r, T2s * T2t); + T2w = FNMS(T2s, T2r, T2q * T2t); + } + Rp[WS(rs, 4)] = T2p - T2u; + Ip[WS(rs, 4)] = T2v + T2w; + Rm[WS(rs, 4)] = T2p + T2u; + Im[WS(rs, 4)] = T2w - T2v; + } + { + E T2B, T2L, T2K, T2M; + { + E T2x, T2z, T2C, T2G; + T2x = W[22]; + T2z = W[23]; + T2B = FNMS(T2z, T2A, T2x * T2y); + T2L = FMA(T2z, T2y, T2x * T2A); + T2C = W[24]; + T2G = W[25]; + T2K = FMA(T2C, T2F, T2G * T2J); + T2M = FNMS(T2G, T2F, T2C * T2J); + } + Rp[WS(rs, 6)] = T2B - T2K; + Ip[WS(rs, 6)] = T2L + T2M; + Rm[WS(rs, 6)] = T2B + T2K; + Im[WS(rs, 6)] = T2M - T2L; + } + { + E T2V, T31, T30, T32; + { + E T2N, T2R, T2W, T2Y; + T2N = W[30]; + T2R = W[31]; + T2V = FNMS(T2R, T2U, T2N * T2Q); + T31 = FMA(T2R, T2Q, T2N * T2U); + T2W = W[32]; + T2Y = W[33]; + T30 = FMA(T2W, T2X, T2Y * T2Z); + T32 = FNMS(T2Y, T2X, T2W * T2Z); + } + Rp[WS(rs, 8)] = T2V - T30; + Ip[WS(rs, 8)] = T31 + T32; + Rm[WS(rs, 8)] = T2V + T30; + Im[WS(rs, 8)] = T32 - T31; + } + } + { + E T4F, T4P, T5c, T5e, T3y, T54, T4o, T4S, T4h, T4Z, T4x, T4N, T45, T4X, T4v; + E T4J, T3K, T56, T4s, T4U; + { + E T4C, T4E, T4B, T4D; + T4C = T3g + T3n; + T4E = T3G + T3D; + T4B = W[18]; + T4D = W[19]; + T4F = FNMS(T4D, T4E, T4B * T4C); + T4P = FMA(T4D, T4C, T4B * T4E); + } + { + E T59, T5b, T58, T5a; + T59 = T3N + T3U; + T5b = T4d + T4a; + T58 = W[28]; + T5a = W[29]; + T5c = FMA(T58, T59, T5a * T5b); + T5e = FNMS(T5a, T59, T58 * T5b); + } + { + E T3x, T4n, T3q, T4m, T3o; + T3x = FNMS(KP951056516, T3w, KP587785252 * T3t); + T4n = FMA(KP951056516, T3t, KP587785252 * T3w); + T3o = FNMS(KP250000000, T3n, T3g); + T3q = T3o - T3p; + T4m = T3p + T3o; + T3y = T3q - T3x; + T54 = T4m + T4n; + T4o = T4m - T4n; + T4S = T3q + T3x; + } + { + E T49, T4M, T4g, T4L, T4e; + T49 = FNMS(KP951056516, T48, KP587785252 * T47); + T4M = FMA(KP951056516, T47, KP587785252 * T48); + T4e = FNMS(KP250000000, T4d, T4a); + T4g = T4e - T4f; + T4L = T4f + T4e; + T4h = T49 + T4g; + T4Z = T4M + T4L; + T4x = T4g - T49; + T4N = T4L - T4M; + } + { + E T44, T4I, T3X, T4H, T3V; + T44 = FNMS(KP951056516, T43, KP587785252 * T40); + T4I = FMA(KP951056516, T40, KP587785252 * T43); + T3V = FNMS(KP250000000, T3U, T3N); + T3X = T3V - T3W; + T4H = T3W + T3V; + T45 = T3X - T44; + T4X = T4H - T4I; + T4v = T3X + T44; + T4J = T4H + T4I; + } + { + E T3C, T4q, T3J, T4r, T3H; + T3C = FNMS(KP951056516, T3B, KP587785252 * T3A); + T4q = FMA(KP951056516, T3A, KP587785252 * T3B); + T3H = FNMS(KP250000000, T3G, T3D); + T3J = T3H - T3I; + T4r = T3I + T3H; + T3K = T3C + T3J; + T56 = T4r - T4q; + T4s = T4q + T4r; + T4U = T3J - T3C; + } + { + E T4O, T4Q, T4G, T4K; + T4G = W[20]; + T4K = W[21]; + T4O = FMA(T4G, T4J, T4K * T4N); + T4Q = FNMS(T4K, T4J, T4G * T4N); + Rp[WS(rs, 5)] = T4F - T4O; + Ip[WS(rs, 5)] = T4P + T4Q; + Rm[WS(rs, 5)] = T4F + T4O; + Im[WS(rs, 5)] = T4Q - T4P; + } + { + E T57, T5d, T53, T55; + T53 = W[26]; + T55 = W[27]; + T57 = FNMS(T55, T56, T53 * T54); + T5d = FMA(T55, T54, T53 * T56); + Rp[WS(rs, 7)] = T57 - T5c; + Ip[WS(rs, 7)] = T5d + T5e; + Rm[WS(rs, 7)] = T57 + T5c; + Im[WS(rs, 7)] = T5e - T5d; + } + { + E T3L, T4j, T4i, T4k; + { + E T3f, T3z, T3M, T46; + T3f = W[2]; + T3z = W[3]; + T3L = FNMS(T3z, T3K, T3f * T3y); + T4j = FMA(T3z, T3y, T3f * T3K); + T3M = W[4]; + T46 = W[5]; + T4i = FMA(T3M, T45, T46 * T4h); + T4k = FNMS(T46, T45, T3M * T4h); + } + Rp[WS(rs, 1)] = T3L - T4i; + Ip[WS(rs, 1)] = T4j + T4k; + Rm[WS(rs, 1)] = T3L + T4i; + Im[WS(rs, 1)] = T4k - T4j; + } + { + E T4t, T4z, T4y, T4A; + { + E T4l, T4p, T4u, T4w; + T4l = W[10]; + T4p = W[11]; + T4t = FNMS(T4p, T4s, T4l * T4o); + T4z = FMA(T4p, T4o, T4l * T4s); + T4u = W[12]; + T4w = W[13]; + T4y = FMA(T4u, T4v, T4w * T4x); + T4A = FNMS(T4w, T4v, T4u * T4x); + } + Rp[WS(rs, 3)] = T4t - T4y; + Ip[WS(rs, 3)] = T4z + T4A; + Rm[WS(rs, 3)] = T4t + T4y; + Im[WS(rs, 3)] = T4A - T4z; + } + { + E T4V, T51, T50, T52; + { + E T4R, T4T, T4W, T4Y; + T4R = W[34]; + T4T = W[35]; + T4V = FNMS(T4T, T4U, T4R * T4S); + T51 = FMA(T4T, T4S, T4R * T4U); + T4W = W[36]; + T4Y = W[37]; + T50 = FMA(T4W, T4X, T4Y * T4Z); + T52 = FNMS(T4Y, T4X, T4W * T4Z); + } + Rp[WS(rs, 9)] = T4V - T50; + Ip[WS(rs, 9)] = T51 + T52; + Rm[WS(rs, 9)] = T4V + T50; + Im[WS(rs, 9)] = T52 - T51; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 20}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 20, "hc2cbdft_20", twinstr, &GENUS, {224, 62, 62, 0} }; + +void X(codelet_hc2cbdft_20) (planner *p) { + X(khc2c_register) (p, hc2cbdft_20, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1888 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:45 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hc2cbdft_32 -include hc2cb.h */ + +/* + * This function contains 498 FP additions, 260 FP multiplications, + * (or, 300 additions, 62 multiplications, 198 fused multiply/add), + * 165 stack variables, 7 constants, and 128 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) { + E T8e, T8h, T7S, T8l, T8f, T84, T8c, T8k, T8g, T86, T82, T8m, T8i; + { + E T4B, T3h, T3K, Tv, T8Y, T6T, T8L, T7i, T8X, T7f, T4Y, T1G, T4K, T1j, T4X; + E T2M, T8C, T6d, T8o, T66, T8K, T6M, T4L, T2P, T4C, T3o, T5q, T4q, T8p, T6C; + E T8B, T6z, T72, T2u, T75, T10, T3P, T3a, T3L, T4t, T4E, T8F, T8t, T4F, T4w; + E T8E, T8w, T6E, T6l, T6F, T6s, T76, T4P, T51, T2R, T28, T8P, T90, T7k, T71; + E T2p, T4R, T2x, T73, T6x, T6y; + { + E T3l, T16, T3m, T2H, T2E, T13, T64, T7, T3i, T2J, T1c, T3j, T1h, T2K, Te; + E T1z, T6R, T6a, Tt, T3g, T6b, T1E, T6Q, Tj, T1p, Ti, T3b, T1n, Tk, T1q; + E T1r; + { + E T1, T2, T4, T5; + { + E T14, T15, T2F, T2G; + T14 = Ip[0]; + T15 = Im[WS(rs, 15)]; + T2F = Ip[WS(rs, 8)]; + T2G = Im[WS(rs, 7)]; + T1 = Rp[0]; + T3l = T14 - T15; + T16 = T14 + T15; + T3m = T2F - T2G; + T2H = T2F + T2G; + T2 = Rm[WS(rs, 15)]; + T4 = Rp[WS(rs, 8)]; + T5 = Rm[WS(rs, 7)]; + } + { + E T1b, T1e, T18, Ta, T1f, Tb, Tc, T8, T9, T1g, T1d, Td; + { + E T19, T3, T6, T1a; + T19 = Ip[WS(rs, 4)]; + T2E = T1 - T2; + T3 = T1 + T2; + T13 = T4 - T5; + T6 = T4 + T5; + T1a = Im[WS(rs, 11)]; + T8 = Rp[WS(rs, 4)]; + T9 = Rm[WS(rs, 11)]; + T64 = T3 - T6; + T7 = T3 + T6; + T1b = T19 + T1a; + T3i = T19 - T1a; + } + T1e = Im[WS(rs, 3)]; + T18 = T8 - T9; + Ta = T8 + T9; + T1f = Ip[WS(rs, 12)]; + Tb = Rm[WS(rs, 3)]; + Tc = Rp[WS(rs, 12)]; + T2J = T18 - T1b; + T1c = T18 + T1b; + T1g = T1e + T1f; + T3j = T1f - T1e; + T1d = Tb - Tc; + Td = Tb + Tc; + T1h = T1d + T1g; + T2K = T1d - T1g; + T6x = Ta - Td; + Te = Ta + Td; + } + { + E Tq, T1A, Tp, T3e, T1y, Tr, T1B, T1C; + { + E Tn, To, T1w, T1x; + Tn = Rm[WS(rs, 1)]; + To = Rp[WS(rs, 14)]; + T1w = Im[WS(rs, 1)]; + T1x = Ip[WS(rs, 14)]; + Tq = Rp[WS(rs, 6)]; + T1A = Tn - To; + Tp = Tn + To; + T3e = T1x - T1w; + T1y = T1w + T1x; + Tr = Rm[WS(rs, 9)]; + T1B = Ip[WS(rs, 6)]; + T1C = Im[WS(rs, 9)]; + } + { + E Tg, Th, T1l, T1m; + Tg = Rp[WS(rs, 2)]; + { + E T1v, Ts, T3f, T1D; + T1v = Tq - Tr; + Ts = Tq + Tr; + T3f = T1B - T1C; + T1D = T1B + T1C; + T1z = T1v - T1y; + T6R = T1v + T1y; + T6a = Tp - Ts; + Tt = Tp + Ts; + T3g = T3e + T3f; + T6b = T3e - T3f; + T1E = T1A - T1D; + T6Q = T1A + T1D; + Th = Rm[WS(rs, 13)]; + } + T1l = Ip[WS(rs, 2)]; + T1m = Im[WS(rs, 13)]; + Tj = Rp[WS(rs, 10)]; + T1p = Tg - Th; + Ti = Tg + Th; + T3b = T1l - T1m; + T1n = T1l + T1m; + Tk = Rm[WS(rs, 5)]; + T1q = Ip[WS(rs, 10)]; + T1r = Im[WS(rs, 5)]; + } + } + } + { + E T4o, T67, T68, T4p, T2I, T1i, T2N, T1u, T1F, T2O, T6K, T17; + { + E Tf, T1o, T1t, Tu, T7g, T6P, T6S, T7h, T7d, T7e; + { + E T6O, T6N, T1k, Tl; + T4o = T7 - Te; + Tf = T7 + Te; + T1k = Tj - Tk; + Tl = Tj + Tk; + { + E T3c, T1s, Tm, T3d; + T3c = T1q - T1r; + T1s = T1q + T1r; + T1o = T1k + T1n; + T6O = T1n - T1k; + T67 = Ti - Tl; + Tm = Ti + Tl; + T3d = T3b + T3c; + T68 = T3b - T3c; + T1t = T1p - T1s; + T6N = T1p + T1s; + T4B = Tm - Tt; + Tu = Tm + Tt; + T4p = T3g - T3d; + T3h = T3d + T3g; + } + T7g = FNMS(KP414213562, T6N, T6O); + T6P = FMA(KP414213562, T6O, T6N); + T6S = FMA(KP414213562, T6R, T6Q); + T7h = FNMS(KP414213562, T6Q, T6R); + } + T3K = Tf - Tu; + Tv = Tf + Tu; + T8Y = T6P + T6S; + T6T = T6P - T6S; + T2I = T2E - T2H; + T7d = T2E + T2H; + T7e = T1c + T1h; + T1i = T1c - T1h; + T2N = FNMS(KP414213562, T1o, T1t); + T1u = FMA(KP414213562, T1t, T1o); + T8L = T7h - T7g; + T7i = T7g + T7h; + T8X = FMA(KP707106781, T7e, T7d); + T7f = FNMS(KP707106781, T7e, T7d); + T1F = FNMS(KP414213562, T1E, T1z); + T2O = FMA(KP414213562, T1z, T1E); + T6K = T16 - T13; + T17 = T13 + T16; + } + { + E T6L, T6A, T6B, T65, T3k, T2L, T69, T6c, T3n; + T4Y = T1F - T1u; + T1G = T1u + T1F; + T4K = FNMS(KP707106781, T1i, T17); + T1j = FMA(KP707106781, T1i, T17); + T2L = T2J + T2K; + T6L = T2J - T2K; + T6A = T67 + T68; + T69 = T67 - T68; + T6c = T6a + T6b; + T6B = T6b - T6a; + T4X = FNMS(KP707106781, T2L, T2I); + T2M = FMA(KP707106781, T2L, T2I); + T8C = T69 - T6c; + T6d = T69 + T6c; + T65 = T3j - T3i; + T3k = T3i + T3j; + T8o = T64 - T65; + T66 = T64 + T65; + T8K = FNMS(KP707106781, T6L, T6K); + T6M = FMA(KP707106781, T6L, T6K); + T3n = T3l + T3m; + T6y = T3l - T3m; + T4L = T2N - T2O; + T2P = T2N + T2O; + T4C = T3n - T3k; + T3o = T3k + T3n; + T5q = T4o - T4p; + T4q = T4o + T4p; + T8p = T6B - T6A; + T6C = T6A + T6B; + } + } + } + { + E T1M, T6V, T6f, TC, T31, T6j, T23, T6Y, T2v, T2i, TY, T6p, T6n, T35, T2n; + E T2w, T24, T1R, TJ, T6i, T6g, T2Y, T1W, T25, T2q, TN, T2r, T36, T2c, T29; + E TQ, T2s; + { + E TU, T2k, T33, T2j, TX, T2l, T2m, T34; + { + E T1Z, Ty, T20, T2Z, T1L, T1I, TB, T21, T2e, T2h; + { + E T1J, T1K, Tw, Tx, Tz, TA; + Tw = Rp[WS(rs, 1)]; + Tx = Rm[WS(rs, 14)]; + T1J = Ip[WS(rs, 1)]; + T8B = T6y - T6x; + T6z = T6x + T6y; + T1Z = Tw - Tx; + Ty = Tw + Tx; + T1K = Im[WS(rs, 14)]; + Tz = Rp[WS(rs, 9)]; + TA = Rm[WS(rs, 6)]; + T20 = Ip[WS(rs, 9)]; + T2Z = T1J - T1K; + T1L = T1J + T1K; + T1I = Tz - TA; + TB = Tz + TA; + T21 = Im[WS(rs, 6)]; + } + { + E T2f, T2g, TV, TW; + { + E TS, T30, T22, TT; + TS = Rp[WS(rs, 3)]; + T1M = T1I + T1L; + T6V = T1L - T1I; + T6f = Ty - TB; + TC = Ty + TB; + T30 = T20 - T21; + T22 = T20 + T21; + TT = Rm[WS(rs, 12)]; + T2f = Ip[WS(rs, 3)]; + T31 = T2Z + T30; + T6j = T2Z - T30; + T23 = T1Z - T22; + T6Y = T1Z + T22; + T2e = TS - TT; + TU = TS + TT; + T2g = Im[WS(rs, 12)]; + } + TV = Rm[WS(rs, 4)]; + TW = Rp[WS(rs, 11)]; + T2k = Im[WS(rs, 4)]; + T33 = T2f - T2g; + T2h = T2f + T2g; + T2j = TV - TW; + TX = TV + TW; + T2l = Ip[WS(rs, 11)]; + } + T2v = T2e - T2h; + T2i = T2e + T2h; + } + TY = TU + TX; + T6p = TU - TX; + T2m = T2k + T2l; + T34 = T2l - T2k; + { + E TF, T1T, T2W, T1S, TI, T1U, T1N, T1Q, T1V, T2X; + { + E T1O, T1P, TD, TE, TG, TH; + TD = Rp[WS(rs, 5)]; + TE = Rm[WS(rs, 10)]; + T6n = T34 - T33; + T35 = T33 + T34; + T2n = T2j + T2m; + T2w = T2j - T2m; + T1N = TD - TE; + TF = TD + TE; + T1O = Ip[WS(rs, 5)]; + T1P = Im[WS(rs, 10)]; + TG = Rm[WS(rs, 2)]; + TH = Rp[WS(rs, 13)]; + T1T = Im[WS(rs, 2)]; + T2W = T1O - T1P; + T1Q = T1O + T1P; + T1S = TG - TH; + TI = TG + TH; + T1U = Ip[WS(rs, 13)]; + } + T24 = T1N - T1Q; + T1R = T1N + T1Q; + TJ = TF + TI; + T6i = TF - TI; + T1V = T1T + T1U; + T2X = T1U - T1T; + { + E T2a, T2b, TL, TM, TO, TP; + TL = Rm[0]; + TM = Rp[WS(rs, 15)]; + T6g = T2X - T2W; + T2Y = T2W + T2X; + T1W = T1S + T1V; + T25 = T1S - T1V; + T2q = TL - TM; + TN = TL + TM; + T2a = Im[0]; + T2b = Ip[WS(rs, 15)]; + TO = Rp[WS(rs, 7)]; + TP = Rm[WS(rs, 8)]; + T2r = Ip[WS(rs, 7)]; + T36 = T2b - T2a; + T2c = T2a + T2b; + T29 = TO - TP; + TQ = TO + TP; + T2s = Im[WS(rs, 8)]; + } + } + } + { + E T2d, T4u, T4v, T6r, T6o, T6k, T8u, T8v, T6h; + { + E T4r, T6m, T32, T4s, T6q, T39, T8r, T8s; + { + E TK, TR, T37, T2t, TZ, T38; + T4r = TC - TJ; + TK = TC + TJ; + T2d = T29 - T2c; + T72 = T29 + T2c; + T6m = TN - TQ; + TR = TN + TQ; + T37 = T2r - T2s; + T2t = T2r + T2s; + T32 = T2Y + T31; + T4s = T31 - T2Y; + T4u = TR - TY; + TZ = TR + TY; + T38 = T36 + T37; + T6q = T36 - T37; + T2u = T2q - T2t; + T75 = T2q + T2t; + T10 = TK + TZ; + T3P = TK - TZ; + T4v = T38 - T35; + T39 = T35 + T38; + } + T8r = T6q - T6p; + T6r = T6p + T6q; + T3a = T32 + T39; + T3L = T39 - T32; + T8s = T6m - T6n; + T6o = T6m + T6n; + T4t = T4r - T4s; + T4E = T4r + T4s; + T8F = FNMS(KP414213562, T8r, T8s); + T8t = FMA(KP414213562, T8s, T8r); + T6k = T6i + T6j; + T8u = T6j - T6i; + T8v = T6f - T6g; + T6h = T6f + T6g; + } + { + E T6Z, T1Y, T4O, T26, T6W, T1X, T2o, T4N, T27; + T4F = T4v - T4u; + T4w = T4u + T4v; + T8E = FMA(KP414213562, T8u, T8v); + T8w = FNMS(KP414213562, T8v, T8u); + T6Z = T1R + T1W; + T1X = T1R - T1W; + T6E = FMA(KP414213562, T6h, T6k); + T6l = FNMS(KP414213562, T6k, T6h); + T6F = FNMS(KP414213562, T6o, T6r); + T6s = FMA(KP414213562, T6r, T6o); + T1Y = FMA(KP707106781, T1X, T1M); + T4O = FNMS(KP707106781, T1X, T1M); + T26 = T24 + T25; + T6W = T25 - T24; + T76 = T2i + T2n; + T2o = T2i - T2n; + T4N = FNMS(KP707106781, T26, T23); + T27 = FMA(KP707106781, T26, T23); + { + E T8O, T6X, T8N, T70; + T8O = FMA(KP707106781, T6W, T6V); + T6X = FNMS(KP707106781, T6W, T6V); + T8N = FMA(KP707106781, T6Z, T6Y); + T70 = FNMS(KP707106781, T6Z, T6Y); + T4P = FMA(KP668178637, T4O, T4N); + T51 = FNMS(KP668178637, T4N, T4O); + T2R = FNMS(KP198912367, T1Y, T27); + T28 = FMA(KP198912367, T27, T1Y); + T8P = FMA(KP198912367, T8O, T8N); + T90 = FNMS(KP198912367, T8N, T8O); + T7k = FNMS(KP668178637, T6X, T70); + T71 = FMA(KP668178637, T70, T6X); + T2p = FMA(KP707106781, T2o, T2d); + T4R = FNMS(KP707106781, T2o, T2d); + } + T2x = T2v + T2w; + T73 = T2v - T2w; + } + } + } + { + E T8S, T91, T7l, T78, T5U, T5X, T5y, T61, T5V, T5K, T5S, T60, T5W, T5M, T5I; + { + E T4S, T50, T4e, T4h, T3S, T4l, T4f, T44, T4c, T4k, T4g, T46, T42; + { + E T3Q, T3U, T40, T3Z, T3V, T3A, T3D, T3H, T3B, T3y, T3G, T3C; + { + E T11, T3t, T3w, T3q, T3x, T3v, T3F, T12, T2B, T2U, T3z, T2C; + { + E T3u, T2S, T2z, T3p, T4Q, T2y; + T3u = Tv - T10; + T11 = Tv + T10; + T4Q = FNMS(KP707106781, T2x, T2u); + T2y = FMA(KP707106781, T2x, T2u); + { + E T8R, T74, T8Q, T77; + T8R = FMA(KP707106781, T73, T72); + T74 = FNMS(KP707106781, T73, T72); + T8Q = FMA(KP707106781, T76, T75); + T77 = FNMS(KP707106781, T76, T75); + T4S = FNMS(KP668178637, T4R, T4Q); + T50 = FMA(KP668178637, T4Q, T4R); + T2S = FMA(KP198912367, T2p, T2y); + T2z = FNMS(KP198912367, T2y, T2p); + T8S = FMA(KP198912367, T8R, T8Q); + T91 = FNMS(KP198912367, T8Q, T8R); + T7l = FNMS(KP668178637, T74, T77); + T78 = FMA(KP668178637, T77, T74); + T3Q = T3o - T3h; + T3p = T3h + T3o; + } + T3t = W[30]; + T3w = W[31]; + T3q = T3a + T3p; + T3x = T3p - T3a; + T3v = T3t * T3u; + T3F = T3w * T3u; + { + E T1H, T2A, T2Q, T2T; + T3U = FNMS(KP923879532, T1G, T1j); + T1H = FMA(KP923879532, T1G, T1j); + T2A = T28 + T2z; + T40 = T2z - T28; + T3Z = FNMS(KP923879532, T2P, T2M); + T2Q = FMA(KP923879532, T2P, T2M); + T2T = T2R + T2S; + T3V = T2R - T2S; + T12 = W[0]; + T3A = FNMS(KP980785280, T2A, T1H); + T2B = FMA(KP980785280, T2A, T1H); + T3D = FNMS(KP980785280, T2T, T2Q); + T2U = FMA(KP980785280, T2T, T2Q); + T3z = W[32]; + T2C = T12 * T2B; + } + } + { + E T2V, T3s, T2D, T3r; + T2D = W[1]; + T3r = T12 * T2U; + T3H = T3z * T3D; + T3B = T3z * T3A; + T2V = FMA(T2D, T2U, T2C); + T3s = FNMS(T2D, T2B, T3r); + T3y = FNMS(T3w, T3x, T3v); + T3G = FMA(T3t, T3x, T3F); + Rm[0] = T11 + T2V; + Rp[0] = T11 - T2V; + Im[0] = T3s - T3q; + Ip[0] = T3q + T3s; + T3C = W[33]; + } + } + { + E T4b, T3R, T47, T4a, T3J, T49, T4j, T3O, T3N, T43, T3W, T3T, T41, T4d, T3X; + E T45, T3Y; + { + E T3M, T48, T3I, T3E; + T3M = T3K + T3L; + T48 = T3K - T3L; + T3I = FNMS(T3C, T3A, T3H); + T3E = FMA(T3C, T3D, T3B); + T4b = T3Q - T3P; + T3R = T3P + T3Q; + Im[WS(rs, 8)] = T3I - T3G; + Ip[WS(rs, 8)] = T3G + T3I; + Rm[WS(rs, 8)] = T3y + T3E; + Rp[WS(rs, 8)] = T3y - T3E; + T47 = W[46]; + T4a = W[47]; + T3J = W[14]; + T49 = T47 * T48; + T4j = T4a * T48; + T3O = W[15]; + T3N = T3J * T3M; + T43 = T3O * T3M; + T3W = FMA(KP980785280, T3V, T3U); + T4e = FNMS(KP980785280, T3V, T3U); + T3T = W[16]; + T4h = FNMS(KP980785280, T40, T3Z); + T41 = FMA(KP980785280, T40, T3Z); + T4d = W[48]; + T3X = T3T * T3W; + } + T3S = FNMS(T3O, T3R, T3N); + T45 = T3T * T41; + T4l = T4d * T4h; + T4f = T4d * T4e; + T44 = FMA(T3J, T3R, T43); + T3Y = W[17]; + T4c = FNMS(T4a, T4b, T49); + T4k = FMA(T47, T4b, T4j); + T4g = W[49]; + T46 = FNMS(T3Y, T3W, T45); + T42 = FMA(T3Y, T41, T3X); + } + } + { + E T5v, T5r, T5w, T5A, T5G, T5F, T5B, T5g, T5j, T4I, T5n, T5h, T56, T5e, T5m; + E T5i, T58, T54; + { + E T4n, T4A, T5d, T4H, T59, T5c, T55, T4z, T5b, T5l, T4J, T4U, T53, T5f, T4V; + E T57, T4W; + { + E T4D, T4G, T4m, T4i, T5a, T4y, T4x; + T5v = T4C - T4B; + T4D = T4B + T4C; + T4m = FNMS(T4g, T4e, T4l); + T4i = FMA(T4g, T4h, T4f); + Im[WS(rs, 4)] = T46 - T44; + Ip[WS(rs, 4)] = T44 + T46; + Rm[WS(rs, 4)] = T3S + T42; + Rp[WS(rs, 4)] = T3S - T42; + Im[WS(rs, 12)] = T4m - T4k; + Ip[WS(rs, 12)] = T4k + T4m; + Rm[WS(rs, 12)] = T4c + T4i; + Rp[WS(rs, 12)] = T4c - T4i; + T4G = T4E + T4F; + T5r = T4F - T4E; + T5w = T4t - T4w; + T4x = T4t + T4w; + T4n = W[6]; + T4A = W[7]; + T5d = FNMS(KP707106781, T4G, T4D); + T4H = FMA(KP707106781, T4G, T4D); + T5a = FNMS(KP707106781, T4x, T4q); + T4y = FMA(KP707106781, T4x, T4q); + T59 = W[38]; + T5c = W[39]; + { + E T4M, T4T, T4Z, T52; + T4M = FMA(KP923879532, T4L, T4K); + T5A = FNMS(KP923879532, T4L, T4K); + T55 = T4A * T4y; + T4z = T4n * T4y; + T5b = T59 * T5a; + T5l = T5c * T5a; + T5G = T4P + T4S; + T4T = T4P - T4S; + T4Z = FMA(KP923879532, T4Y, T4X); + T5F = FNMS(KP923879532, T4Y, T4X); + T5B = T51 + T50; + T52 = T50 - T51; + T4J = W[8]; + T4U = FMA(KP831469612, T4T, T4M); + T5g = FNMS(KP831469612, T4T, T4M); + T53 = FMA(KP831469612, T52, T4Z); + T5j = FNMS(KP831469612, T52, T4Z); + T5f = W[40]; + T4V = T4J * T4U; + } + } + T4I = FNMS(T4A, T4H, T4z); + T57 = T4J * T53; + T5n = T5f * T5j; + T5h = T5f * T5g; + T56 = FMA(T4n, T4H, T55); + T4W = W[9]; + T5e = FNMS(T5c, T5d, T5b); + T5m = FMA(T59, T5d, T5l); + T5i = W[41]; + T58 = FNMS(T4W, T4U, T57); + T54 = FMA(T4W, T53, T4V); + } + { + E T5p, T5u, T5x, T5R, T5N, T5Q, T5J, T5t, T5P, T5Z, T5z, T5C, T5H, T5T, T5D; + E T5L, T5E; + { + E T5o, T5k, T5s, T5O; + T5o = FNMS(T5i, T5g, T5n); + T5k = FMA(T5i, T5j, T5h); + Im[WS(rs, 2)] = T58 - T56; + Ip[WS(rs, 2)] = T56 + T58; + Rm[WS(rs, 2)] = T4I + T54; + Rp[WS(rs, 2)] = T4I - T54; + Im[WS(rs, 10)] = T5o - T5m; + Ip[WS(rs, 10)] = T5m + T5o; + Rm[WS(rs, 10)] = T5e + T5k; + Rp[WS(rs, 10)] = T5e - T5k; + T5p = W[22]; + T5u = W[23]; + T5x = FMA(KP707106781, T5w, T5v); + T5R = FNMS(KP707106781, T5w, T5v); + T5s = FMA(KP707106781, T5r, T5q); + T5O = FNMS(KP707106781, T5r, T5q); + T5N = W[54]; + T5Q = W[55]; + T5J = T5u * T5s; + T5t = T5p * T5s; + T5P = T5N * T5O; + T5Z = T5Q * T5O; + T5z = W[24]; + T5U = FMA(KP831469612, T5B, T5A); + T5C = FNMS(KP831469612, T5B, T5A); + T5X = FMA(KP831469612, T5G, T5F); + T5H = FNMS(KP831469612, T5G, T5F); + T5T = W[56]; + T5D = T5z * T5C; + } + T5y = FNMS(T5u, T5x, T5t); + T5L = T5z * T5H; + T61 = T5T * T5X; + T5V = T5T * T5U; + T5K = FMA(T5p, T5x, T5J); + T5E = W[25]; + T5S = FNMS(T5Q, T5R, T5P); + T60 = FMA(T5N, T5R, T5Z); + T5W = W[57]; + T5M = FNMS(T5E, T5C, T5L); + T5I = FMA(T5E, T5H, T5D); + } + } + } + { + E T7P, T7L, T7K, T7Q, T7U, T80, T7Z, T7V, T9v, T9r, T9q, T9w, T9A, T9G, T9F; + E T9B, T9g, T9j, T8I, T9n, T9h, T96, T9e, T9m, T9i, T98, T94; + { + E T7A, T7D, T6I, T7H, T7B, T7q, T7y, T7G, T7C, T7s, T7o; + { + E T63, T7x, T6H, T6w, T7t, T7w, T6v, T7p, T7v, T7F, T6J, T7a, T7n, T7z, T7b; + E T7r, T7c; + { + E T6D, T6G, T62, T5Y; + T7P = FNMS(KP707106781, T6C, T6z); + T6D = FMA(KP707106781, T6C, T6z); + T62 = FNMS(T5W, T5U, T61); + T5Y = FMA(T5W, T5X, T5V); + Im[WS(rs, 6)] = T5M - T5K; + Ip[WS(rs, 6)] = T5K + T5M; + Rm[WS(rs, 6)] = T5y + T5I; + Rp[WS(rs, 6)] = T5y - T5I; + Im[WS(rs, 14)] = T62 - T60; + Ip[WS(rs, 14)] = T60 + T62; + Rm[WS(rs, 14)] = T5S + T5Y; + Rp[WS(rs, 14)] = T5S - T5Y; + T6G = T6E + T6F; + T7L = T6F - T6E; + { + E T6e, T6t, T7u, T6u; + T7K = FNMS(KP707106781, T6d, T66); + T6e = FMA(KP707106781, T6d, T66); + T6t = T6l + T6s; + T7Q = T6l - T6s; + T63 = W[2]; + T7x = FNMS(KP923879532, T6G, T6D); + T6H = FMA(KP923879532, T6G, T6D); + T7u = FNMS(KP923879532, T6t, T6e); + T6u = FMA(KP923879532, T6t, T6e); + T6w = W[3]; + T7t = W[34]; + T7w = W[35]; + T6v = T63 * T6u; + T7p = T6w * T6u; + T7v = T7t * T7u; + T7F = T7w * T7u; + } + { + E T6U, T79, T7j, T7m; + T7U = FNMS(KP923879532, T6T, T6M); + T6U = FMA(KP923879532, T6T, T6M); + T79 = T71 - T78; + T80 = T71 + T78; + T7Z = FMA(KP923879532, T7i, T7f); + T7j = FNMS(KP923879532, T7i, T7f); + T7m = T7k + T7l; + T7V = T7k - T7l; + T6J = W[4]; + T7A = FNMS(KP831469612, T79, T6U); + T7a = FMA(KP831469612, T79, T6U); + T7D = FNMS(KP831469612, T7m, T7j); + T7n = FMA(KP831469612, T7m, T7j); + T7z = W[36]; + T7b = T6J * T7a; + } + } + T6I = FNMS(T6w, T6H, T6v); + T7r = T6J * T7n; + T7H = T7z * T7D; + T7B = T7z * T7A; + T7q = FMA(T63, T6H, T7p); + T7c = W[5]; + T7y = FNMS(T7w, T7x, T7v); + T7G = FMA(T7t, T7x, T7F); + T7C = W[37]; + T7s = FNMS(T7c, T7a, T7r); + T7o = FMA(T7c, T7n, T7b); + } + { + E T8n, T9d, T8H, T8A, T99, T9c, T8z, T95, T9b, T9l, T8J, T8U, T93, T9f, T8V; + E T97, T8W; + { + E T8D, T8G, T7I, T7E; + T9v = FNMS(KP707106781, T8C, T8B); + T8D = FMA(KP707106781, T8C, T8B); + T7I = FNMS(T7C, T7A, T7H); + T7E = FMA(T7C, T7D, T7B); + Im[WS(rs, 1)] = T7s - T7q; + Ip[WS(rs, 1)] = T7q + T7s; + Rm[WS(rs, 1)] = T6I + T7o; + Rp[WS(rs, 1)] = T6I - T7o; + Im[WS(rs, 9)] = T7I - T7G; + Ip[WS(rs, 9)] = T7G + T7I; + Rm[WS(rs, 9)] = T7y + T7E; + Rp[WS(rs, 9)] = T7y - T7E; + T8G = T8E - T8F; + T9r = T8E + T8F; + { + E T8q, T8x, T9a, T8y; + T9q = FNMS(KP707106781, T8p, T8o); + T8q = FMA(KP707106781, T8p, T8o); + T8x = T8t - T8w; + T9w = T8w + T8t; + T8n = W[10]; + T9d = FNMS(KP923879532, T8G, T8D); + T8H = FMA(KP923879532, T8G, T8D); + T9a = FNMS(KP923879532, T8x, T8q); + T8y = FMA(KP923879532, T8x, T8q); + T8A = W[11]; + T99 = W[42]; + T9c = W[43]; + T8z = T8n * T8y; + T95 = T8A * T8y; + T9b = T99 * T9a; + T9l = T9c * T9a; + } + { + E T8M, T8T, T8Z, T92; + T9A = FNMS(KP923879532, T8L, T8K); + T8M = FMA(KP923879532, T8L, T8K); + T8T = T8P - T8S; + T9G = T8P + T8S; + T9F = FMA(KP923879532, T8Y, T8X); + T8Z = FNMS(KP923879532, T8Y, T8X); + T92 = T90 + T91; + T9B = T91 - T90; + T8J = W[12]; + T9g = FNMS(KP980785280, T8T, T8M); + T8U = FMA(KP980785280, T8T, T8M); + T9j = FMA(KP980785280, T92, T8Z); + T93 = FNMS(KP980785280, T92, T8Z); + T9f = W[44]; + T8V = T8J * T8U; + } + } + T8I = FNMS(T8A, T8H, T8z); + T97 = T8J * T93; + T9n = T9f * T9j; + T9h = T9f * T9g; + T96 = FMA(T8n, T8H, T95); + T8W = W[13]; + T9e = FNMS(T9c, T9d, T9b); + T9m = FMA(T99, T9d, T9l); + T9i = W[45]; + T98 = FNMS(T8W, T8U, T97); + T94 = FMA(T8W, T93, T8V); + } + } + { + E T9U, T9X, T9y, Ta1, T9V, T9K, T9S, Ta0, T9W, T9M, T9I; + { + E T9p, T9R, T9x, T9u, T9N, T9Q, T9t, T9J, T9P, T9Z, T9z, T9C, T9H, T9T, T9D; + E T9L, T9E; + { + E T9o, T9k, T9O, T9s; + T9o = FNMS(T9i, T9g, T9n); + T9k = FMA(T9i, T9j, T9h); + Im[WS(rs, 3)] = T98 - T96; + Ip[WS(rs, 3)] = T96 + T98; + Rm[WS(rs, 3)] = T8I + T94; + Rp[WS(rs, 3)] = T8I - T94; + Im[WS(rs, 11)] = T9o - T9m; + Ip[WS(rs, 11)] = T9m + T9o; + Rm[WS(rs, 11)] = T9e + T9k; + Rp[WS(rs, 11)] = T9e - T9k; + T9p = W[26]; + T9R = FMA(KP923879532, T9w, T9v); + T9x = FNMS(KP923879532, T9w, T9v); + T9O = FMA(KP923879532, T9r, T9q); + T9s = FNMS(KP923879532, T9r, T9q); + T9u = W[27]; + T9N = W[58]; + T9Q = W[59]; + T9t = T9p * T9s; + T9J = T9u * T9s; + T9P = T9N * T9O; + T9Z = T9Q * T9O; + T9z = W[28]; + T9U = FNMS(KP980785280, T9B, T9A); + T9C = FMA(KP980785280, T9B, T9A); + T9X = FMA(KP980785280, T9G, T9F); + T9H = FNMS(KP980785280, T9G, T9F); + T9T = W[60]; + T9D = T9z * T9C; + } + T9y = FNMS(T9u, T9x, T9t); + T9L = T9z * T9H; + Ta1 = T9T * T9X; + T9V = T9T * T9U; + T9K = FMA(T9p, T9x, T9J); + T9E = W[29]; + T9S = FNMS(T9Q, T9R, T9P); + Ta0 = FMA(T9N, T9R, T9Z); + T9W = W[61]; + T9M = FNMS(T9E, T9C, T9L); + T9I = FMA(T9E, T9H, T9D); + } + { + E T7J, T8b, T7R, T7O, T87, T8a, T7N, T83, T89, T8j, T7T, T7W, T81, T8d, T7X; + E T85, T7Y; + { + E Ta2, T9Y, T88, T7M; + Ta2 = FNMS(T9W, T9U, Ta1); + T9Y = FMA(T9W, T9X, T9V); + Im[WS(rs, 7)] = T9M - T9K; + Ip[WS(rs, 7)] = T9K + T9M; + Rm[WS(rs, 7)] = T9y + T9I; + Rp[WS(rs, 7)] = T9y - T9I; + Im[WS(rs, 15)] = Ta2 - Ta0; + Ip[WS(rs, 15)] = Ta0 + Ta2; + Rm[WS(rs, 15)] = T9S + T9Y; + Rp[WS(rs, 15)] = T9S - T9Y; + T7J = W[18]; + T8b = FNMS(KP923879532, T7Q, T7P); + T7R = FMA(KP923879532, T7Q, T7P); + T88 = FNMS(KP923879532, T7L, T7K); + T7M = FMA(KP923879532, T7L, T7K); + T7O = W[19]; + T87 = W[50]; + T8a = W[51]; + T7N = T7J * T7M; + T83 = T7O * T7M; + T89 = T87 * T88; + T8j = T8a * T88; + T7T = W[20]; + T8e = FNMS(KP831469612, T7V, T7U); + T7W = FMA(KP831469612, T7V, T7U); + T8h = FMA(KP831469612, T80, T7Z); + T81 = FNMS(KP831469612, T80, T7Z); + T8d = W[52]; + T7X = T7T * T7W; + } + T7S = FNMS(T7O, T7R, T7N); + T85 = T7T * T81; + T8l = T8d * T8h; + T8f = T8d * T8e; + T84 = FMA(T7J, T7R, T83); + T7Y = W[21]; + T8c = FNMS(T8a, T8b, T89); + T8k = FMA(T87, T8b, T8j); + T8g = W[53]; + T86 = FNMS(T7Y, T7W, T85); + T82 = FMA(T7Y, T81, T7X); + } + } + } + } + } + T8m = FNMS(T8g, T8e, T8l); + T8i = FMA(T8g, T8h, T8f); + Im[WS(rs, 5)] = T86 - T84; + Ip[WS(rs, 5)] = T84 + T86; + Rm[WS(rs, 5)] = T7S + T82; + Rp[WS(rs, 5)] = T7S - T82; + Im[WS(rs, 13)] = T8m - T8k; + Ip[WS(rs, 13)] = T8k + T8m; + Rm[WS(rs, 13)] = T8c + T8i; + Rp[WS(rs, 13)] = T8c - T8i; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 32}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 32, "hc2cbdft_32", twinstr, &GENUS, {300, 62, 198, 0} }; + +void X(codelet_hc2cbdft_32) (planner *p) { + X(khc2c_register) (p, hc2cbdft_32, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hc2cbdft_32 -include hc2cb.h */ + +/* + * This function contains 498 FP additions, 208 FP multiplications, + * (or, 404 additions, 114 multiplications, 94 fused multiply/add), + * 102 stack variables, 7 constants, and 128 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) { + E Tf, T4a, T6h, T7Z, T6P, T8e, T1j, T4v, T2R, T4L, T5C, T7E, T6a, T7U, T3n; + E T4q, TZ, T38, T2p, T4B, T7M, T7R, T2y, T4C, T5Y, T63, T6C, T86, T4i, T4n; + E T6z, T85, TK, T31, T1Y, T4y, T7J, T7Q, T27, T4z, T5R, T62, T6v, T83, T4f; + E T4m, T6s, T82, Tu, T4p, T6o, T8f, T6M, T80, T1G, T4K, T2I, T4w, T5J, T7T; + E T67, T7F, T3g, T4b; + { + E T3, T2M, T16, T3k, T6, T13, T2P, T3l, Td, T3i, T1h, T2K, Ta, T3h, T1c; + E T2J; + { + E T1, T2, T2N, T2O; + T1 = Rp[0]; + T2 = Rm[WS(rs, 15)]; + T3 = T1 + T2; + T2M = T1 - T2; + { + E T14, T15, T4, T5; + T14 = Ip[0]; + T15 = Im[WS(rs, 15)]; + T16 = T14 + T15; + T3k = T14 - T15; + T4 = Rp[WS(rs, 8)]; + T5 = Rm[WS(rs, 7)]; + T6 = T4 + T5; + T13 = T4 - T5; + } + T2N = Ip[WS(rs, 8)]; + T2O = Im[WS(rs, 7)]; + T2P = T2N + T2O; + T3l = T2N - T2O; + { + E Tb, Tc, T1d, T1e, T1f, T1g; + Tb = Rm[WS(rs, 3)]; + Tc = Rp[WS(rs, 12)]; + T1d = Tb - Tc; + T1e = Im[WS(rs, 3)]; + T1f = Ip[WS(rs, 12)]; + T1g = T1e + T1f; + Td = Tb + Tc; + T3i = T1f - T1e; + T1h = T1d + T1g; + T2K = T1d - T1g; + } + { + E T8, T9, T18, T19, T1a, T1b; + T8 = Rp[WS(rs, 4)]; + T9 = Rm[WS(rs, 11)]; + T18 = T8 - T9; + T19 = Ip[WS(rs, 4)]; + T1a = Im[WS(rs, 11)]; + T1b = T19 + T1a; + Ta = T8 + T9; + T3h = T19 - T1a; + T1c = T18 + T1b; + T2J = T18 - T1b; + } + } + { + E T7, Te, T6f, T6g; + T7 = T3 + T6; + Te = Ta + Td; + Tf = T7 + Te; + T4a = T7 - Te; + T6f = T16 - T13; + T6g = KP707106781 * (T2J - T2K); + T6h = T6f + T6g; + T7Z = T6f - T6g; + } + { + E T6N, T6O, T17, T1i; + T6N = T2M + T2P; + T6O = KP707106781 * (T1c + T1h); + T6P = T6N - T6O; + T8e = T6O + T6N; + T17 = T13 + T16; + T1i = KP707106781 * (T1c - T1h); + T1j = T17 + T1i; + T4v = T17 - T1i; + } + { + E T2L, T2Q, T5A, T5B; + T2L = KP707106781 * (T2J + T2K); + T2Q = T2M - T2P; + T2R = T2L + T2Q; + T4L = T2Q - T2L; + T5A = T3 - T6; + T5B = T3i - T3h; + T5C = T5A + T5B; + T7E = T5A - T5B; + } + { + E T68, T69, T3j, T3m; + T68 = Ta - Td; + T69 = T3k - T3l; + T6a = T68 + T69; + T7U = T69 - T68; + T3j = T3h + T3i; + T3m = T3k + T3l; + T3n = T3j + T3m; + T4q = T3m - T3j; + } + } + { + E TR, T5S, T29, T2t, T2c, T5W, T2w, T37, TY, T5T, T5V, T2i, T2n, T2r, T34; + E T2q, T6A, T6B; + { + E TL, TM, TN, TO, TP, TQ; + TL = Rm[0]; + TM = Rp[WS(rs, 15)]; + TN = TL + TM; + TO = Rp[WS(rs, 7)]; + TP = Rm[WS(rs, 8)]; + TQ = TO + TP; + TR = TN + TQ; + T5S = TN - TQ; + T29 = TO - TP; + T2t = TL - TM; + } + { + E T2a, T2b, T35, T2u, T2v, T36; + T2a = Im[0]; + T2b = Ip[WS(rs, 15)]; + T35 = T2b - T2a; + T2u = Ip[WS(rs, 7)]; + T2v = Im[WS(rs, 8)]; + T36 = T2u - T2v; + T2c = T2a + T2b; + T5W = T35 - T36; + T2w = T2u + T2v; + T37 = T35 + T36; + } + { + E TU, T2e, T2h, T32, TX, T2j, T2m, T33; + { + E TS, TT, T2f, T2g; + TS = Rp[WS(rs, 3)]; + TT = Rm[WS(rs, 12)]; + TU = TS + TT; + T2e = TS - TT; + T2f = Ip[WS(rs, 3)]; + T2g = Im[WS(rs, 12)]; + T2h = T2f + T2g; + T32 = T2f - T2g; + } + { + E TV, TW, T2k, T2l; + TV = Rm[WS(rs, 4)]; + TW = Rp[WS(rs, 11)]; + TX = TV + TW; + T2j = TV - TW; + T2k = Im[WS(rs, 4)]; + T2l = Ip[WS(rs, 11)]; + T2m = T2k + T2l; + T33 = T2l - T2k; + } + TY = TU + TX; + T5T = T33 - T32; + T5V = TU - TX; + T2i = T2e + T2h; + T2n = T2j + T2m; + T2r = T2j - T2m; + T34 = T32 + T33; + T2q = T2e - T2h; + } + TZ = TR + TY; + T38 = T34 + T37; + { + E T2d, T2o, T7K, T7L; + T2d = T29 - T2c; + T2o = KP707106781 * (T2i - T2n); + T2p = T2d + T2o; + T4B = T2d - T2o; + T7K = T5S - T5T; + T7L = T5W - T5V; + T7M = FMA(KP382683432, T7K, KP923879532 * T7L); + T7R = FNMS(KP923879532, T7K, KP382683432 * T7L); + } + { + E T2s, T2x, T5U, T5X; + T2s = KP707106781 * (T2q + T2r); + T2x = T2t - T2w; + T2y = T2s + T2x; + T4C = T2x - T2s; + T5U = T5S + T5T; + T5X = T5V + T5W; + T5Y = FMA(KP923879532, T5U, KP382683432 * T5X); + T63 = FNMS(KP382683432, T5U, KP923879532 * T5X); + } + T6A = T2t + T2w; + T6B = KP707106781 * (T2i + T2n); + T6C = T6A - T6B; + T86 = T6B + T6A; + { + E T4g, T4h, T6x, T6y; + T4g = TR - TY; + T4h = T37 - T34; + T4i = T4g + T4h; + T4n = T4h - T4g; + T6x = KP707106781 * (T2q - T2r); + T6y = T29 + T2c; + T6z = T6x - T6y; + T85 = T6y + T6x; + } + } + { + E TC, T5L, T1I, T22, T1L, T5P, T25, T30, TJ, T5M, T5O, T1R, T1W, T20, T2X; + E T1Z, T6t, T6u; + { + E Tw, Tx, Ty, Tz, TA, TB; + Tw = Rp[WS(rs, 1)]; + Tx = Rm[WS(rs, 14)]; + Ty = Tw + Tx; + Tz = Rp[WS(rs, 9)]; + TA = Rm[WS(rs, 6)]; + TB = Tz + TA; + TC = Ty + TB; + T5L = Ty - TB; + T1I = Tz - TA; + T22 = Tw - Tx; + } + { + E T1J, T1K, T2Y, T23, T24, T2Z; + T1J = Ip[WS(rs, 1)]; + T1K = Im[WS(rs, 14)]; + T2Y = T1J - T1K; + T23 = Ip[WS(rs, 9)]; + T24 = Im[WS(rs, 6)]; + T2Z = T23 - T24; + T1L = T1J + T1K; + T5P = T2Y - T2Z; + T25 = T23 + T24; + T30 = T2Y + T2Z; + } + { + E TF, T1N, T1Q, T2V, TI, T1S, T1V, T2W; + { + E TD, TE, T1O, T1P; + TD = Rp[WS(rs, 5)]; + TE = Rm[WS(rs, 10)]; + TF = TD + TE; + T1N = TD - TE; + T1O = Ip[WS(rs, 5)]; + T1P = Im[WS(rs, 10)]; + T1Q = T1O + T1P; + T2V = T1O - T1P; + } + { + E TG, TH, T1T, T1U; + TG = Rm[WS(rs, 2)]; + TH = Rp[WS(rs, 13)]; + TI = TG + TH; + T1S = TG - TH; + T1T = Im[WS(rs, 2)]; + T1U = Ip[WS(rs, 13)]; + T1V = T1T + T1U; + T2W = T1U - T1T; + } + TJ = TF + TI; + T5M = T2W - T2V; + T5O = TF - TI; + T1R = T1N + T1Q; + T1W = T1S + T1V; + T20 = T1S - T1V; + T2X = T2V + T2W; + T1Z = T1N - T1Q; + } + TK = TC + TJ; + T31 = T2X + T30; + { + E T1M, T1X, T7H, T7I; + T1M = T1I + T1L; + T1X = KP707106781 * (T1R - T1W); + T1Y = T1M + T1X; + T4y = T1M - T1X; + T7H = T5L - T5M; + T7I = T5P - T5O; + T7J = FNMS(KP923879532, T7I, KP382683432 * T7H); + T7Q = FMA(KP923879532, T7H, KP382683432 * T7I); + } + { + E T21, T26, T5N, T5Q; + T21 = KP707106781 * (T1Z + T20); + T26 = T22 - T25; + T27 = T21 + T26; + T4z = T26 - T21; + T5N = T5L + T5M; + T5Q = T5O + T5P; + T5R = FNMS(KP382683432, T5Q, KP923879532 * T5N); + T62 = FMA(KP382683432, T5N, KP923879532 * T5Q); + } + T6t = T22 + T25; + T6u = KP707106781 * (T1R + T1W); + T6v = T6t - T6u; + T83 = T6u + T6t; + { + E T4d, T4e, T6q, T6r; + T4d = TC - TJ; + T4e = T30 - T2X; + T4f = T4d - T4e; + T4m = T4d + T4e; + T6q = T1L - T1I; + T6r = KP707106781 * (T1Z - T20); + T6s = T6q + T6r; + T82 = T6q - T6r; + } + } + { + E Ti, T3a, Tl, T3b, T1o, T1t, T6j, T6i, T5E, T5D, Tp, T3d, Ts, T3e, T1z; + E T1E, T6m, T6l, T5H, T5G; + { + E T1p, T1n, T1k, T1s; + { + E Tg, Th, T1l, T1m; + Tg = Rp[WS(rs, 2)]; + Th = Rm[WS(rs, 13)]; + Ti = Tg + Th; + T1p = Tg - Th; + T1l = Ip[WS(rs, 2)]; + T1m = Im[WS(rs, 13)]; + T1n = T1l + T1m; + T3a = T1l - T1m; + } + { + E Tj, Tk, T1q, T1r; + Tj = Rp[WS(rs, 10)]; + Tk = Rm[WS(rs, 5)]; + Tl = Tj + Tk; + T1k = Tj - Tk; + T1q = Ip[WS(rs, 10)]; + T1r = Im[WS(rs, 5)]; + T1s = T1q + T1r; + T3b = T1q - T1r; + } + T1o = T1k + T1n; + T1t = T1p - T1s; + T6j = T1p + T1s; + T6i = T1n - T1k; + T5E = T3a - T3b; + T5D = Ti - Tl; + } + { + E T1A, T1y, T1v, T1D; + { + E Tn, To, T1w, T1x; + Tn = Rm[WS(rs, 1)]; + To = Rp[WS(rs, 14)]; + Tp = Tn + To; + T1A = Tn - To; + T1w = Im[WS(rs, 1)]; + T1x = Ip[WS(rs, 14)]; + T1y = T1w + T1x; + T3d = T1x - T1w; + } + { + E Tq, Tr, T1B, T1C; + Tq = Rp[WS(rs, 6)]; + Tr = Rm[WS(rs, 9)]; + Ts = Tq + Tr; + T1v = Tq - Tr; + T1B = Ip[WS(rs, 6)]; + T1C = Im[WS(rs, 9)]; + T1D = T1B + T1C; + T3e = T1B - T1C; + } + T1z = T1v - T1y; + T1E = T1A - T1D; + T6m = T1A + T1D; + T6l = T1v + T1y; + T5H = T3d - T3e; + T5G = Tp - Ts; + } + { + E Tm, Tt, T6k, T6n; + Tm = Ti + Tl; + Tt = Tp + Ts; + Tu = Tm + Tt; + T4p = Tm - Tt; + T6k = FMA(KP382683432, T6i, KP923879532 * T6j); + T6n = FMA(KP382683432, T6l, KP923879532 * T6m); + T6o = T6k - T6n; + T8f = T6k + T6n; + } + { + E T6K, T6L, T1u, T1F; + T6K = FNMS(KP923879532, T6i, KP382683432 * T6j); + T6L = FNMS(KP923879532, T6l, KP382683432 * T6m); + T6M = T6K + T6L; + T80 = T6K - T6L; + T1u = FMA(KP923879532, T1o, KP382683432 * T1t); + T1F = FNMS(KP382683432, T1E, KP923879532 * T1z); + T1G = T1u + T1F; + T4K = T1F - T1u; + } + { + E T2G, T2H, T5F, T5I; + T2G = FNMS(KP382683432, T1o, KP923879532 * T1t); + T2H = FMA(KP382683432, T1z, KP923879532 * T1E); + T2I = T2G + T2H; + T4w = T2G - T2H; + T5F = T5D - T5E; + T5I = T5G + T5H; + T5J = KP707106781 * (T5F + T5I); + T7T = KP707106781 * (T5F - T5I); + } + { + E T65, T66, T3c, T3f; + T65 = T5D + T5E; + T66 = T5H - T5G; + T67 = KP707106781 * (T65 + T66); + T7F = KP707106781 * (T66 - T65); + T3c = T3a + T3b; + T3f = T3d + T3e; + T3g = T3c + T3f; + T4b = T3f - T3c; + } + } + { + E T11, T3s, T3p, T3u, T3K, T40, T3G, T3Y, T2T, T43, T3z, T3P, T2B, T45, T3x; + E T3T; + { + E Tv, T10, T3E, T3F; + Tv = Tf + Tu; + T10 = TK + TZ; + T11 = Tv + T10; + T3s = Tv - T10; + { + E T39, T3o, T3I, T3J; + T39 = T31 + T38; + T3o = T3g + T3n; + T3p = T39 + T3o; + T3u = T3o - T39; + T3I = TK - TZ; + T3J = T3n - T3g; + T3K = T3I + T3J; + T40 = T3J - T3I; + } + T3E = Tf - Tu; + T3F = T38 - T31; + T3G = T3E + T3F; + T3Y = T3E - T3F; + { + E T2S, T3N, T2F, T3O, T2D, T2E; + T2S = T2I + T2R; + T3N = T1j - T1G; + T2D = FNMS(KP195090322, T1Y, KP980785280 * T27); + T2E = FMA(KP195090322, T2p, KP980785280 * T2y); + T2F = T2D + T2E; + T3O = T2D - T2E; + T2T = T2F + T2S; + T43 = T3N - T3O; + T3z = T2S - T2F; + T3P = T3N + T3O; + } + { + E T1H, T3S, T2A, T3R, T28, T2z; + T1H = T1j + T1G; + T3S = T2R - T2I; + T28 = FMA(KP980785280, T1Y, KP195090322 * T27); + T2z = FNMS(KP195090322, T2y, KP980785280 * T2p); + T2A = T28 + T2z; + T3R = T2z - T28; + T2B = T1H + T2A; + T45 = T3S - T3R; + T3x = T1H - T2A; + T3T = T3R + T3S; + } + } + { + E T2U, T3q, T12, T2C; + T12 = W[0]; + T2C = W[1]; + T2U = FMA(T12, T2B, T2C * T2T); + T3q = FNMS(T2C, T2B, T12 * T2T); + Rp[0] = T11 - T2U; + Ip[0] = T3p + T3q; + Rm[0] = T11 + T2U; + Im[0] = T3q - T3p; + } + { + E T41, T47, T46, T48; + { + E T3X, T3Z, T42, T44; + T3X = W[46]; + T3Z = W[47]; + T41 = FNMS(T3Z, T40, T3X * T3Y); + T47 = FMA(T3Z, T3Y, T3X * T40); + T42 = W[48]; + T44 = W[49]; + T46 = FMA(T42, T43, T44 * T45); + T48 = FNMS(T44, T43, T42 * T45); + } + Rp[WS(rs, 12)] = T41 - T46; + Ip[WS(rs, 12)] = T47 + T48; + Rm[WS(rs, 12)] = T41 + T46; + Im[WS(rs, 12)] = T48 - T47; + } + { + E T3v, T3B, T3A, T3C; + { + E T3r, T3t, T3w, T3y; + T3r = W[30]; + T3t = W[31]; + T3v = FNMS(T3t, T3u, T3r * T3s); + T3B = FMA(T3t, T3s, T3r * T3u); + T3w = W[32]; + T3y = W[33]; + T3A = FMA(T3w, T3x, T3y * T3z); + T3C = FNMS(T3y, T3x, T3w * T3z); + } + Rp[WS(rs, 8)] = T3v - T3A; + Ip[WS(rs, 8)] = T3B + T3C; + Rm[WS(rs, 8)] = T3v + T3A; + Im[WS(rs, 8)] = T3C - T3B; + } + { + E T3L, T3V, T3U, T3W; + { + E T3D, T3H, T3M, T3Q; + T3D = W[14]; + T3H = W[15]; + T3L = FNMS(T3H, T3K, T3D * T3G); + T3V = FMA(T3H, T3G, T3D * T3K); + T3M = W[16]; + T3Q = W[17]; + T3U = FMA(T3M, T3P, T3Q * T3T); + T3W = FNMS(T3Q, T3P, T3M * T3T); + } + Rp[WS(rs, 4)] = T3L - T3U; + Ip[WS(rs, 4)] = T3V + T3W; + Rm[WS(rs, 4)] = T3L + T3U; + Im[WS(rs, 4)] = T3W - T3V; + } + } + { + E T7O, T8m, T7W, T8o, T8E, T8U, T8A, T8S, T8h, T8X, T8t, T8J, T89, T8Z, T8r; + E T8N; + { + E T7G, T7N, T8y, T8z; + T7G = T7E + T7F; + T7N = T7J + T7M; + T7O = T7G + T7N; + T8m = T7G - T7N; + { + E T7S, T7V, T8C, T8D; + T7S = T7Q + T7R; + T7V = T7T + T7U; + T7W = T7S + T7V; + T8o = T7V - T7S; + T8C = T7J - T7M; + T8D = T7U - T7T; + T8E = T8C + T8D; + T8U = T8D - T8C; + } + T8y = T7E - T7F; + T8z = T7R - T7Q; + T8A = T8y + T8z; + T8S = T8y - T8z; + { + E T8g, T8H, T8d, T8I, T8b, T8c; + T8g = T8e - T8f; + T8H = T7Z - T80; + T8b = FNMS(KP980785280, T82, KP195090322 * T83); + T8c = FNMS(KP980785280, T85, KP195090322 * T86); + T8d = T8b + T8c; + T8I = T8b - T8c; + T8h = T8d + T8g; + T8X = T8H - T8I; + T8t = T8g - T8d; + T8J = T8H + T8I; + } + { + E T81, T8L, T88, T8M, T84, T87; + T81 = T7Z + T80; + T8L = T8f + T8e; + T84 = FMA(KP195090322, T82, KP980785280 * T83); + T87 = FMA(KP195090322, T85, KP980785280 * T86); + T88 = T84 - T87; + T8M = T84 + T87; + T89 = T81 + T88; + T8Z = T8M + T8L; + T8r = T81 - T88; + T8N = T8L - T8M; + } + } + { + E T7X, T8j, T8i, T8k; + { + E T7D, T7P, T7Y, T8a; + T7D = W[10]; + T7P = W[11]; + T7X = FNMS(T7P, T7W, T7D * T7O); + T8j = FMA(T7P, T7O, T7D * T7W); + T7Y = W[12]; + T8a = W[13]; + T8i = FMA(T7Y, T89, T8a * T8h); + T8k = FNMS(T8a, T89, T7Y * T8h); + } + Rp[WS(rs, 3)] = T7X - T8i; + Ip[WS(rs, 3)] = T8j + T8k; + Rm[WS(rs, 3)] = T7X + T8i; + Im[WS(rs, 3)] = T8k - T8j; + } + { + E T8V, T91, T90, T92; + { + E T8R, T8T, T8W, T8Y; + T8R = W[58]; + T8T = W[59]; + T8V = FNMS(T8T, T8U, T8R * T8S); + T91 = FMA(T8T, T8S, T8R * T8U); + T8W = W[60]; + T8Y = W[61]; + T90 = FMA(T8W, T8X, T8Y * T8Z); + T92 = FNMS(T8Y, T8X, T8W * T8Z); + } + Rp[WS(rs, 15)] = T8V - T90; + Ip[WS(rs, 15)] = T91 + T92; + Rm[WS(rs, 15)] = T8V + T90; + Im[WS(rs, 15)] = T92 - T91; + } + { + E T8p, T8v, T8u, T8w; + { + E T8l, T8n, T8q, T8s; + T8l = W[42]; + T8n = W[43]; + T8p = FNMS(T8n, T8o, T8l * T8m); + T8v = FMA(T8n, T8m, T8l * T8o); + T8q = W[44]; + T8s = W[45]; + T8u = FMA(T8q, T8r, T8s * T8t); + T8w = FNMS(T8s, T8r, T8q * T8t); + } + Rp[WS(rs, 11)] = T8p - T8u; + Ip[WS(rs, 11)] = T8v + T8w; + Rm[WS(rs, 11)] = T8p + T8u; + Im[WS(rs, 11)] = T8w - T8v; + } + { + E T8F, T8P, T8O, T8Q; + { + E T8x, T8B, T8G, T8K; + T8x = W[26]; + T8B = W[27]; + T8F = FNMS(T8B, T8E, T8x * T8A); + T8P = FMA(T8B, T8A, T8x * T8E); + T8G = W[28]; + T8K = W[29]; + T8O = FMA(T8G, T8J, T8K * T8N); + T8Q = FNMS(T8K, T8J, T8G * T8N); + } + Rp[WS(rs, 7)] = T8F - T8O; + Ip[WS(rs, 7)] = T8P + T8Q; + Rm[WS(rs, 7)] = T8F + T8O; + Im[WS(rs, 7)] = T8Q - T8P; + } + } + { + E T4k, T4S, T4s, T4U, T5a, T5q, T56, T5o, T4N, T5t, T4Z, T5f, T4F, T5v, T4X; + E T5j; + { + E T4c, T4j, T54, T55; + T4c = T4a + T4b; + T4j = KP707106781 * (T4f + T4i); + T4k = T4c + T4j; + T4S = T4c - T4j; + { + E T4o, T4r, T58, T59; + T4o = KP707106781 * (T4m + T4n); + T4r = T4p + T4q; + T4s = T4o + T4r; + T4U = T4r - T4o; + T58 = KP707106781 * (T4f - T4i); + T59 = T4q - T4p; + T5a = T58 + T59; + T5q = T59 - T58; + } + T54 = T4a - T4b; + T55 = KP707106781 * (T4n - T4m); + T56 = T54 + T55; + T5o = T54 - T55; + { + E T4M, T5d, T4J, T5e, T4H, T4I; + T4M = T4K + T4L; + T5d = T4v - T4w; + T4H = FNMS(KP831469612, T4y, KP555570233 * T4z); + T4I = FMA(KP831469612, T4B, KP555570233 * T4C); + T4J = T4H + T4I; + T5e = T4H - T4I; + T4N = T4J + T4M; + T5t = T5d - T5e; + T4Z = T4M - T4J; + T5f = T5d + T5e; + } + { + E T4x, T5i, T4E, T5h, T4A, T4D; + T4x = T4v + T4w; + T5i = T4L - T4K; + T4A = FMA(KP555570233, T4y, KP831469612 * T4z); + T4D = FNMS(KP831469612, T4C, KP555570233 * T4B); + T4E = T4A + T4D; + T5h = T4D - T4A; + T4F = T4x + T4E; + T5v = T5i - T5h; + T4X = T4x - T4E; + T5j = T5h + T5i; + } + } + { + E T4t, T4P, T4O, T4Q; + { + E T49, T4l, T4u, T4G; + T49 = W[6]; + T4l = W[7]; + T4t = FNMS(T4l, T4s, T49 * T4k); + T4P = FMA(T4l, T4k, T49 * T4s); + T4u = W[8]; + T4G = W[9]; + T4O = FMA(T4u, T4F, T4G * T4N); + T4Q = FNMS(T4G, T4F, T4u * T4N); + } + Rp[WS(rs, 2)] = T4t - T4O; + Ip[WS(rs, 2)] = T4P + T4Q; + Rm[WS(rs, 2)] = T4t + T4O; + Im[WS(rs, 2)] = T4Q - T4P; + } + { + E T5r, T5x, T5w, T5y; + { + E T5n, T5p, T5s, T5u; + T5n = W[54]; + T5p = W[55]; + T5r = FNMS(T5p, T5q, T5n * T5o); + T5x = FMA(T5p, T5o, T5n * T5q); + T5s = W[56]; + T5u = W[57]; + T5w = FMA(T5s, T5t, T5u * T5v); + T5y = FNMS(T5u, T5t, T5s * T5v); + } + Rp[WS(rs, 14)] = T5r - T5w; + Ip[WS(rs, 14)] = T5x + T5y; + Rm[WS(rs, 14)] = T5r + T5w; + Im[WS(rs, 14)] = T5y - T5x; + } + { + E T4V, T51, T50, T52; + { + E T4R, T4T, T4W, T4Y; + T4R = W[38]; + T4T = W[39]; + T4V = FNMS(T4T, T4U, T4R * T4S); + T51 = FMA(T4T, T4S, T4R * T4U); + T4W = W[40]; + T4Y = W[41]; + T50 = FMA(T4W, T4X, T4Y * T4Z); + T52 = FNMS(T4Y, T4X, T4W * T4Z); + } + Rp[WS(rs, 10)] = T4V - T50; + Ip[WS(rs, 10)] = T51 + T52; + Rm[WS(rs, 10)] = T4V + T50; + Im[WS(rs, 10)] = T52 - T51; + } + { + E T5b, T5l, T5k, T5m; + { + E T53, T57, T5c, T5g; + T53 = W[22]; + T57 = W[23]; + T5b = FNMS(T57, T5a, T53 * T56); + T5l = FMA(T57, T56, T53 * T5a); + T5c = W[24]; + T5g = W[25]; + T5k = FMA(T5c, T5f, T5g * T5j); + T5m = FNMS(T5g, T5f, T5c * T5j); + } + Rp[WS(rs, 6)] = T5b - T5k; + Ip[WS(rs, 6)] = T5l + T5m; + Rm[WS(rs, 6)] = T5b + T5k; + Im[WS(rs, 6)] = T5m - T5l; + } + } + { + E T60, T6W, T6c, T6Y, T7e, T7u, T7a, T7s, T6R, T7x, T73, T7j, T6F, T7z, T71; + E T7n; + { + E T5K, T5Z, T78, T79; + T5K = T5C + T5J; + T5Z = T5R + T5Y; + T60 = T5K + T5Z; + T6W = T5K - T5Z; + { + E T64, T6b, T7c, T7d; + T64 = T62 + T63; + T6b = T67 + T6a; + T6c = T64 + T6b; + T6Y = T6b - T64; + T7c = T5R - T5Y; + T7d = T6a - T67; + T7e = T7c + T7d; + T7u = T7d - T7c; + } + T78 = T5C - T5J; + T79 = T63 - T62; + T7a = T78 + T79; + T7s = T78 - T79; + { + E T6Q, T7h, T6J, T7i, T6H, T6I; + T6Q = T6M + T6P; + T7h = T6h - T6o; + T6H = FNMS(KP555570233, T6s, KP831469612 * T6v); + T6I = FMA(KP555570233, T6z, KP831469612 * T6C); + T6J = T6H + T6I; + T7i = T6H - T6I; + T6R = T6J + T6Q; + T7x = T7h - T7i; + T73 = T6Q - T6J; + T7j = T7h + T7i; + } + { + E T6p, T7m, T6E, T7l, T6w, T6D; + T6p = T6h + T6o; + T7m = T6P - T6M; + T6w = FMA(KP831469612, T6s, KP555570233 * T6v); + T6D = FNMS(KP555570233, T6C, KP831469612 * T6z); + T6E = T6w + T6D; + T7l = T6D - T6w; + T6F = T6p + T6E; + T7z = T7m - T7l; + T71 = T6p - T6E; + T7n = T7l + T7m; + } + } + { + E T6d, T6T, T6S, T6U; + { + E T5z, T61, T6e, T6G; + T5z = W[2]; + T61 = W[3]; + T6d = FNMS(T61, T6c, T5z * T60); + T6T = FMA(T61, T60, T5z * T6c); + T6e = W[4]; + T6G = W[5]; + T6S = FMA(T6e, T6F, T6G * T6R); + T6U = FNMS(T6G, T6F, T6e * T6R); + } + Rp[WS(rs, 1)] = T6d - T6S; + Ip[WS(rs, 1)] = T6T + T6U; + Rm[WS(rs, 1)] = T6d + T6S; + Im[WS(rs, 1)] = T6U - T6T; + } + { + E T7v, T7B, T7A, T7C; + { + E T7r, T7t, T7w, T7y; + T7r = W[50]; + T7t = W[51]; + T7v = FNMS(T7t, T7u, T7r * T7s); + T7B = FMA(T7t, T7s, T7r * T7u); + T7w = W[52]; + T7y = W[53]; + T7A = FMA(T7w, T7x, T7y * T7z); + T7C = FNMS(T7y, T7x, T7w * T7z); + } + Rp[WS(rs, 13)] = T7v - T7A; + Ip[WS(rs, 13)] = T7B + T7C; + Rm[WS(rs, 13)] = T7v + T7A; + Im[WS(rs, 13)] = T7C - T7B; + } + { + E T6Z, T75, T74, T76; + { + E T6V, T6X, T70, T72; + T6V = W[34]; + T6X = W[35]; + T6Z = FNMS(T6X, T6Y, T6V * T6W); + T75 = FMA(T6X, T6W, T6V * T6Y); + T70 = W[36]; + T72 = W[37]; + T74 = FMA(T70, T71, T72 * T73); + T76 = FNMS(T72, T71, T70 * T73); + } + Rp[WS(rs, 9)] = T6Z - T74; + Ip[WS(rs, 9)] = T75 + T76; + Rm[WS(rs, 9)] = T6Z + T74; + Im[WS(rs, 9)] = T76 - T75; + } + { + E T7f, T7p, T7o, T7q; + { + E T77, T7b, T7g, T7k; + T77 = W[18]; + T7b = W[19]; + T7f = FNMS(T7b, T7e, T77 * T7a); + T7p = FMA(T7b, T7a, T77 * T7e); + T7g = W[20]; + T7k = W[21]; + T7o = FMA(T7g, T7j, T7k * T7n); + T7q = FNMS(T7k, T7j, T7g * T7n); + } + Rp[WS(rs, 5)] = T7f - T7o; + Ip[WS(rs, 5)] = T7p + T7q; + Rm[WS(rs, 5)] = T7f + T7o; + Im[WS(rs, 5)] = T7q - T7p; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 32}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 32, "hc2cbdft_32", twinstr, &GENUS, {404, 114, 94, 0} }; + +void X(codelet_hc2cbdft_32) (planner *p) { + X(khc2c_register) (p, hc2cbdft_32, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:44 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 4 -dif -name hc2cbdft_4 -include hc2cb.h */ + +/* + * This function contains 30 FP additions, 12 FP multiplications, + * (or, 24 additions, 6 multiplications, 6 fused multiply/add), + * 35 stack variables, 0 constants, and 16 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { + E Ty, TB, Tw, TE, TA, TF, Tz, TG, TC; + { + E T4, Tg, T3, Tm, Tc, T5, Th, Ti; + { + E T1, T2, Ta, Tb; + T1 = Rp[0]; + T2 = Rm[WS(rs, 1)]; + Ta = Ip[0]; + Tb = Im[WS(rs, 1)]; + T4 = Rp[WS(rs, 1)]; + Tg = T1 - T2; + T3 = T1 + T2; + Tm = Ta - Tb; + Tc = Ta + Tb; + T5 = Rm[0]; + Th = Ip[WS(rs, 1)]; + Ti = Im[0]; + } + { + E T8, Td, T7, Ts, To, Tv, Tk, Te, Tf; + T8 = W[0]; + { + E T9, T6, Tn, Tj; + T9 = T4 - T5; + T6 = T4 + T5; + Tn = Th - Ti; + Tj = Th + Ti; + Ty = Tc - T9; + Td = T9 + Tc; + T7 = T3 + T6; + Ts = T3 - T6; + To = Tm + Tn; + Tv = Tm - Tn; + TB = Tg + Tj; + Tk = Tg - Tj; + Te = T8 * Td; + } + Tf = W[1]; + { + E Tr, Tu, Tt, TD, Tx, Tp, Tl, Tq; + Tr = W[2]; + Tp = T8 * Tk; + Tu = W[3]; + Tl = FMA(Tf, Tk, Te); + Tt = Tr * Ts; + Tq = FNMS(Tf, Td, Tp); + TD = Tu * Ts; + Rm[0] = T7 + Tl; + Rp[0] = T7 - Tl; + Im[0] = Tq - To; + Ip[0] = To + Tq; + Tx = W[4]; + Tw = FNMS(Tu, Tv, Tt); + TE = FMA(Tr, Tv, TD); + TA = W[5]; + TF = Tx * TB; + Tz = Tx * Ty; + } + } + } + TG = FNMS(TA, Ty, TF); + TC = FMA(TA, TB, Tz); + Im[WS(rs, 1)] = TG - TE; + Ip[WS(rs, 1)] = TE + TG; + Rm[WS(rs, 1)] = Tw + TC; + Rp[WS(rs, 1)] = Tw - TC; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 4}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 4, "hc2cbdft_4", twinstr, &GENUS, {24, 6, 6, 0} }; + +void X(codelet_hc2cbdft_4) (planner *p) { + X(khc2c_register) (p, hc2cbdft_4, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 4 -dif -name hc2cbdft_4 -include hc2cb.h */ + +/* + * This function contains 30 FP additions, 12 FP multiplications, + * (or, 24 additions, 6 multiplications, 6 fused multiply/add), + * 19 stack variables, 0 constants, and 16 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { + E T3, Tl, T6, Tm, Td, Tj, Tx, Tv, Ts, Tq; + { + E Tf, Tc, T9, Ti; + { + E T1, T2, Ta, Tb; + T1 = Rp[0]; + T2 = Rm[WS(rs, 1)]; + T3 = T1 + T2; + Tf = T1 - T2; + Ta = Ip[0]; + Tb = Im[WS(rs, 1)]; + Tc = Ta + Tb; + Tl = Ta - Tb; + } + { + E T4, T5, Tg, Th; + T4 = Rp[WS(rs, 1)]; + T5 = Rm[0]; + T6 = T4 + T5; + T9 = T4 - T5; + Tg = Ip[WS(rs, 1)]; + Th = Im[0]; + Ti = Tg + Th; + Tm = Tg - Th; + } + Td = T9 + Tc; + Tj = Tf - Ti; + Tx = Tf + Ti; + Tv = Tc - T9; + Ts = Tl - Tm; + Tq = T3 - T6; + } + { + E T7, Tn, Tk, To, T8, Te; + T7 = T3 + T6; + Tn = Tl + Tm; + T8 = W[0]; + Te = W[1]; + Tk = FMA(T8, Td, Te * Tj); + To = FNMS(Te, Td, T8 * Tj); + Rp[0] = T7 - Tk; + Ip[0] = Tn + To; + Rm[0] = T7 + Tk; + Im[0] = To - Tn; + } + { + E Tt, Tz, Ty, TA; + { + E Tp, Tr, Tu, Tw; + Tp = W[2]; + Tr = W[3]; + Tt = FNMS(Tr, Ts, Tp * Tq); + Tz = FMA(Tr, Tq, Tp * Ts); + Tu = W[4]; + Tw = W[5]; + Ty = FMA(Tu, Tv, Tw * Tx); + TA = FNMS(Tw, Tv, Tu * Tx); + } + Rp[WS(rs, 1)] = Tt - Ty; + Ip[WS(rs, 1)] = Tz + TA; + Rm[WS(rs, 1)] = Tt + Ty; + Im[WS(rs, 1)] = TA - Tz; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 4}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 4, "hc2cbdft_4", twinstr, &GENUS, {24, 6, 6, 0} }; + +void X(codelet_hc2cbdft_4) (planner *p) { + X(khc2c_register) (p, hc2cbdft_4, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,329 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:44 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 6 -dif -name hc2cbdft_6 -include hc2cb.h */ + +/* + * This function contains 58 FP additions, 32 FP multiplications, + * (or, 36 additions, 10 multiplications, 22 fused multiply/add), + * 52 stack variables, 2 constants, and 24 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 10, MAKE_VOLATILE_STRIDE(24, rs)) { + E T18, T1b, T16, T1e, T1a, T1f, T19, T1g, T1c; + { + E Tw, T4, TV, Tj, TP, TH, Tr, TY, T5, T6, Ta, Ty; + { + E Tg, TF, Tf, TD, Tp, Th; + { + E Td, Te, Tn, To; + Td = Ip[WS(rs, 1)]; + Te = Im[WS(rs, 1)]; + Tn = Ip[0]; + To = Im[WS(rs, 2)]; + Tg = Ip[WS(rs, 2)]; + TF = Te + Td; + Tf = Td - Te; + TD = Tn + To; + Tp = Tn - To; + Th = Im[0]; + } + { + E T2, T3, T8, T9; + T2 = Rp[0]; + T3 = Rm[WS(rs, 2)]; + { + E Tq, TE, Ti, TG; + T8 = Rm[WS(rs, 1)]; + TE = Tg + Th; + Ti = Tg - Th; + Tw = T2 - T3; + T4 = T2 + T3; + TG = TE - TF; + TV = TF + TE; + Tq = Tf + Ti; + Tj = Tf - Ti; + TP = FNMS(KP500000000, TG, TD); + TH = TD + TG; + T9 = Rp[WS(rs, 1)]; + Tr = FNMS(KP500000000, Tq, Tp); + TY = Tp + Tq; + } + T5 = Rp[WS(rs, 2)]; + T6 = Rm[0]; + Ta = T8 + T9; + Ty = T8 - T9; + } + } + { + E TO, TT, Ts, TA, TR, Tc, TN, TW, TS, Tx, T7; + Tx = T5 - T6; + T7 = T5 + T6; + TO = W[0]; + TT = W[1]; + { + E Tz, TQ, Tb, TU; + Tz = Tx + Ty; + TQ = Tx - Ty; + Tb = T7 + Ta; + Ts = T7 - Ta; + TU = FNMS(KP500000000, Tz, Tw); + TA = Tw + Tz; + TR = FMA(KP866025403, TQ, TP); + T18 = FNMS(KP866025403, TQ, TP); + Tc = FNMS(KP500000000, Tb, T4); + TN = T4 + Tb; + T1b = FMA(KP866025403, TV, TU); + TW = FNMS(KP866025403, TV, TU); + TS = TO * TR; + } + { + E T15, Tt, T12, T1, Tm, TI, TM, Tl, TJ; + { + E Tv, TC, TB, TL, Tk, TZ, TX, T10; + T15 = FMA(KP866025403, Ts, Tr); + Tt = FNMS(KP866025403, Ts, Tr); + TZ = TO * TW; + TX = FMA(TT, TW, TS); + Tv = W[4]; + TC = W[5]; + T10 = FNMS(TT, TR, TZ); + Rm[0] = TN + TX; + Rp[0] = TN - TX; + TB = Tv * TA; + Im[0] = T10 - TY; + Ip[0] = TY + T10; + TL = TC * TA; + Tk = FNMS(KP866025403, Tj, Tc); + T12 = FMA(KP866025403, Tj, Tc); + T1 = W[3]; + Tm = W[2]; + TI = FNMS(TC, TH, TB); + TM = FMA(Tv, TH, TL); + Tl = T1 * Tk; + TJ = Tm * Tk; + } + { + E T11, T14, T13, T1d, T17, Tu, TK; + Tu = FMA(Tm, Tt, Tl); + TK = FNMS(T1, Tt, TJ); + T11 = W[6]; + T14 = W[7]; + Im[WS(rs, 1)] = TI - Tu; + Ip[WS(rs, 1)] = Tu + TI; + Rm[WS(rs, 1)] = TK + TM; + Rp[WS(rs, 1)] = TK - TM; + T13 = T11 * T12; + T1d = T14 * T12; + T17 = W[8]; + T16 = FNMS(T14, T15, T13); + T1e = FMA(T11, T15, T1d); + T1a = W[9]; + T1f = T17 * T1b; + T19 = T17 * T18; + } + } + } + } + T1g = FNMS(T1a, T18, T1f); + T1c = FMA(T1a, T1b, T19); + Im[WS(rs, 2)] = T1g - T1e; + Ip[WS(rs, 2)] = T1e + T1g; + Rm[WS(rs, 2)] = T16 + T1c; + Rp[WS(rs, 2)] = T16 - T1c; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 6}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 6, "hc2cbdft_6", twinstr, &GENUS, {36, 10, 22, 0} }; + +void X(codelet_hc2cbdft_6) (planner *p) { + X(khc2c_register) (p, hc2cbdft_6, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 6 -dif -name hc2cbdft_6 -include hc2cb.h */ + +/* + * This function contains 58 FP additions, 28 FP multiplications, + * (or, 44 additions, 14 multiplications, 14 fused multiply/add), + * 29 stack variables, 2 constants, and 24 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 10, MAKE_VOLATILE_STRIDE(24, rs)) { + E T4, Tv, Tr, TL, Tb, Tc, Ty, TP, To, TB, Tj, TQ, Tp, Tq, TE; + E TM; + { + E Ta, Tx, T7, Tw, T2, T3; + T2 = Rp[0]; + T3 = Rm[WS(rs, 2)]; + T4 = T2 + T3; + Tv = T2 - T3; + { + E T8, T9, T5, T6; + T8 = Rm[WS(rs, 1)]; + T9 = Rp[WS(rs, 1)]; + Ta = T8 + T9; + Tx = T8 - T9; + T5 = Rp[WS(rs, 2)]; + T6 = Rm[0]; + T7 = T5 + T6; + Tw = T5 - T6; + } + Tr = KP866025403 * (T7 - Ta); + TL = KP866025403 * (Tw - Tx); + Tb = T7 + Ta; + Tc = FNMS(KP500000000, Tb, T4); + Ty = Tw + Tx; + TP = FNMS(KP500000000, Ty, Tv); + } + { + E Tf, TC, Ti, TD, Td, Te; + Td = Ip[WS(rs, 1)]; + Te = Im[WS(rs, 1)]; + Tf = Td - Te; + TC = Te + Td; + { + E Tm, Tn, Tg, Th; + Tm = Ip[0]; + Tn = Im[WS(rs, 2)]; + To = Tm - Tn; + TB = Tm + Tn; + Tg = Ip[WS(rs, 2)]; + Th = Im[0]; + Ti = Tg - Th; + TD = Tg + Th; + } + Tj = KP866025403 * (Tf - Ti); + TQ = KP866025403 * (TC + TD); + Tp = Tf + Ti; + Tq = FNMS(KP500000000, Tp, To); + TE = TC - TD; + TM = FMA(KP500000000, TE, TB); + } + { + E TJ, TT, TS, TU; + TJ = T4 + Tb; + TT = To + Tp; + { + E TN, TR, TK, TO; + TN = TL + TM; + TR = TP - TQ; + TK = W[0]; + TO = W[1]; + TS = FMA(TK, TN, TO * TR); + TU = FNMS(TO, TN, TK * TR); + } + Rp[0] = TJ - TS; + Ip[0] = TT + TU; + Rm[0] = TJ + TS; + Im[0] = TU - TT; + } + { + E TZ, T15, T14, T16; + { + E TW, TY, TV, TX; + TW = Tc + Tj; + TY = Tr + Tq; + TV = W[6]; + TX = W[7]; + TZ = FNMS(TX, TY, TV * TW); + T15 = FMA(TX, TW, TV * TY); + } + { + E T11, T13, T10, T12; + T11 = TM - TL; + T13 = TP + TQ; + T10 = W[8]; + T12 = W[9]; + T14 = FMA(T10, T11, T12 * T13); + T16 = FNMS(T12, T11, T10 * T13); + } + Rp[WS(rs, 2)] = TZ - T14; + Ip[WS(rs, 2)] = T15 + T16; + Rm[WS(rs, 2)] = TZ + T14; + Im[WS(rs, 2)] = T16 - T15; + } + { + E Tt, TH, TG, TI; + { + E Tk, Ts, T1, Tl; + Tk = Tc - Tj; + Ts = Tq - Tr; + T1 = W[3]; + Tl = W[2]; + Tt = FMA(T1, Tk, Tl * Ts); + TH = FNMS(T1, Ts, Tl * Tk); + } + { + E Tz, TF, Tu, TA; + Tz = Tv + Ty; + TF = TB - TE; + Tu = W[4]; + TA = W[5]; + TG = FNMS(TA, TF, Tu * Tz); + TI = FMA(TA, Tz, Tu * TF); + } + Ip[WS(rs, 1)] = Tt + TG; + Rp[WS(rs, 1)] = TH - TI; + Im[WS(rs, 1)] = TG - Tt; + Rm[WS(rs, 1)] = TH + TI; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 6}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 6, "hc2cbdft_6", twinstr, &GENUS, {44, 14, 14, 0} }; + +void X(codelet_hc2cbdft_6) (planner *p) { + X(khc2c_register) (p, hc2cbdft_6, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/hc2cbdft_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,427 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:44 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 8 -dif -name hc2cbdft_8 -include hc2cb.h */ + +/* + * This function contains 82 FP additions, 36 FP multiplications, + * (or, 60 additions, 14 multiplications, 22 fused multiply/add), + * 55 stack variables, 1 constants, and 32 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(32, rs)) { + E T1m, T1r, T1i, T1u, T1o, T1v, T1n, T1w, T1s; + { + E T1k, Tl, T1p, TE, TP, T1g, TM, T1b, T1f, T1a, TU, Tf, T1l, TH, Tw; + E T1q; + { + E TA, T3, TN, Tk, Th, T6, TO, TD, Tb, Tm, Ta, TK, Tp, Tc, Ts; + E Tt; + { + E T4, T5, TB, TC; + { + E T1, T2, Ti, Tj; + T1 = Rp[0]; + T2 = Rm[WS(rs, 3)]; + Ti = Ip[0]; + Tj = Im[WS(rs, 3)]; + T4 = Rp[WS(rs, 2)]; + TA = T1 - T2; + T3 = T1 + T2; + TN = Ti - Tj; + Tk = Ti + Tj; + T5 = Rm[WS(rs, 1)]; + TB = Ip[WS(rs, 2)]; + TC = Im[WS(rs, 1)]; + } + { + E T8, T9, Tn, To; + T8 = Rp[WS(rs, 1)]; + Th = T4 - T5; + T6 = T4 + T5; + TO = TB - TC; + TD = TB + TC; + T9 = Rm[WS(rs, 2)]; + Tn = Ip[WS(rs, 1)]; + To = Im[WS(rs, 2)]; + Tb = Rm[0]; + Tm = T8 - T9; + Ta = T8 + T9; + TK = Tn - To; + Tp = Tn + To; + Tc = Rp[WS(rs, 3)]; + Ts = Im[0]; + Tt = Ip[WS(rs, 3)]; + } + } + { + E Tr, Td, Tu, TL, Te, T7; + T1k = Tk - Th; + Tl = Th + Tk; + Tr = Tb - Tc; + Td = Tb + Tc; + TL = Tt - Ts; + Tu = Ts + Tt; + T1p = TA + TD; + TE = TA - TD; + TP = TN + TO; + T1g = TN - TO; + TM = TK + TL; + T1b = TL - TK; + T1f = Ta - Td; + Te = Ta + Td; + T1a = T3 - T6; + T7 = T3 + T6; + { + E Tq, TF, TG, Tv; + Tq = Tm + Tp; + TF = Tm - Tp; + TG = Tr - Tu; + Tv = Tr + Tu; + TU = T7 - Te; + Tf = T7 + Te; + T1l = TF - TG; + TH = TF + TG; + Tw = Tq - Tv; + T1q = Tq + Tv; + } + } + } + { + E TX, T10, T1c, T13, T1h, T1E, T1H, T1C, T1K, T1G, T1L, T1F; + { + E TQ, Tx, T1y, TI, Tg, Tz; + TX = TP - TM; + TQ = TM + TP; + Tx = FMA(KP707106781, Tw, Tl); + T10 = FNMS(KP707106781, Tw, Tl); + T1c = T1a + T1b; + T1y = T1a - T1b; + T13 = FNMS(KP707106781, TH, TE); + TI = FMA(KP707106781, TH, TE); + Tg = W[0]; + Tz = W[1]; + { + E T1B, T1A, T1x, T1J, T1z, T1D; + { + E TR, Ty, TS, TJ; + T1B = T1g - T1f; + T1h = T1f + T1g; + T1A = W[11]; + TR = Tg * TI; + Ty = Tg * Tx; + T1x = W[10]; + T1J = T1A * T1y; + TS = FNMS(Tz, Tx, TR); + TJ = FMA(Tz, TI, Ty); + T1z = T1x * T1y; + T1m = FMA(KP707106781, T1l, T1k); + T1E = FNMS(KP707106781, T1l, T1k); + Im[0] = TS - TQ; + Ip[0] = TQ + TS; + Rm[0] = Tf + TJ; + Rp[0] = Tf - TJ; + T1H = FMA(KP707106781, T1q, T1p); + T1r = FNMS(KP707106781, T1q, T1p); + T1D = W[12]; + } + T1C = FNMS(T1A, T1B, T1z); + T1K = FMA(T1x, T1B, T1J); + T1G = W[13]; + T1L = T1D * T1H; + T1F = T1D * T1E; + } + } + { + E TY, T16, T12, T17, T11; + { + E TW, TT, T15, TV, TZ, T1M, T1I; + TW = W[7]; + T1M = FNMS(T1G, T1E, T1L); + T1I = FMA(T1G, T1H, T1F); + TT = W[6]; + T15 = TW * TU; + Im[WS(rs, 3)] = T1M - T1K; + Ip[WS(rs, 3)] = T1K + T1M; + Rm[WS(rs, 3)] = T1C + T1I; + Rp[WS(rs, 3)] = T1C - T1I; + TV = TT * TU; + TZ = W[8]; + TY = FNMS(TW, TX, TV); + T16 = FMA(TT, TX, T15); + T12 = W[9]; + T17 = TZ * T13; + T11 = TZ * T10; + } + { + E T1e, T19, T1t, T1d, T1j, T18, T14; + T1e = W[3]; + T18 = FNMS(T12, T10, T17); + T14 = FMA(T12, T13, T11); + T19 = W[2]; + T1t = T1e * T1c; + Im[WS(rs, 2)] = T18 - T16; + Ip[WS(rs, 2)] = T16 + T18; + Rm[WS(rs, 2)] = TY + T14; + Rp[WS(rs, 2)] = TY - T14; + T1d = T19 * T1c; + T1j = W[4]; + T1i = FNMS(T1e, T1h, T1d); + T1u = FMA(T19, T1h, T1t); + T1o = W[5]; + T1v = T1j * T1r; + T1n = T1j * T1m; + } + } + } + } + T1w = FNMS(T1o, T1m, T1v); + T1s = FMA(T1o, T1r, T1n); + Im[WS(rs, 1)] = T1w - T1u; + Ip[WS(rs, 1)] = T1u + T1w; + Rm[WS(rs, 1)] = T1i + T1s; + Rp[WS(rs, 1)] = T1i - T1s; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 8}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 8, "hc2cbdft_8", twinstr, &GENUS, {60, 14, 22, 0} }; + +void X(codelet_hc2cbdft_8) (planner *p) { + X(khc2c_register) (p, hc2cbdft_8, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 8 -dif -name hc2cbdft_8 -include hc2cb.h */ + +/* + * This function contains 82 FP additions, 32 FP multiplications, + * (or, 68 additions, 18 multiplications, 14 fused multiply/add), + * 30 stack variables, 1 constants, and 32 memory accesses + */ +#include "hc2cb.h" + +static void hc2cbdft_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(32, rs)) { + E T7, T1d, T1h, Tl, TG, T14, T19, TO, Te, TL, T18, T15, TB, T1e, Tw; + E T1i; + { + E T3, TC, Tk, TM, T6, Th, TF, TN; + { + E T1, T2, Ti, Tj; + T1 = Rp[0]; + T2 = Rm[WS(rs, 3)]; + T3 = T1 + T2; + TC = T1 - T2; + Ti = Ip[0]; + Tj = Im[WS(rs, 3)]; + Tk = Ti + Tj; + TM = Ti - Tj; + } + { + E T4, T5, TD, TE; + T4 = Rp[WS(rs, 2)]; + T5 = Rm[WS(rs, 1)]; + T6 = T4 + T5; + Th = T4 - T5; + TD = Ip[WS(rs, 2)]; + TE = Im[WS(rs, 1)]; + TF = TD + TE; + TN = TD - TE; + } + T7 = T3 + T6; + T1d = Tk - Th; + T1h = TC + TF; + Tl = Th + Tk; + TG = TC - TF; + T14 = T3 - T6; + T19 = TM - TN; + TO = TM + TN; + } + { + E Ta, Tm, Tp, TJ, Td, Tr, Tu, TK; + { + E T8, T9, Tn, To; + T8 = Rp[WS(rs, 1)]; + T9 = Rm[WS(rs, 2)]; + Ta = T8 + T9; + Tm = T8 - T9; + Tn = Ip[WS(rs, 1)]; + To = Im[WS(rs, 2)]; + Tp = Tn + To; + TJ = Tn - To; + } + { + E Tb, Tc, Ts, Tt; + Tb = Rm[0]; + Tc = Rp[WS(rs, 3)]; + Td = Tb + Tc; + Tr = Tb - Tc; + Ts = Im[0]; + Tt = Ip[WS(rs, 3)]; + Tu = Ts + Tt; + TK = Tt - Ts; + } + Te = Ta + Td; + TL = TJ + TK; + T18 = Ta - Td; + T15 = TK - TJ; + { + E Tz, TA, Tq, Tv; + Tz = Tm - Tp; + TA = Tr - Tu; + TB = KP707106781 * (Tz + TA); + T1e = KP707106781 * (Tz - TA); + Tq = Tm + Tp; + Tv = Tr + Tu; + Tw = KP707106781 * (Tq - Tv); + T1i = KP707106781 * (Tq + Tv); + } + } + { + E Tf, TP, TI, TQ; + Tf = T7 + Te; + TP = TL + TO; + { + E Tx, TH, Tg, Ty; + Tx = Tl + Tw; + TH = TB + TG; + Tg = W[0]; + Ty = W[1]; + TI = FMA(Tg, Tx, Ty * TH); + TQ = FNMS(Ty, Tx, Tg * TH); + } + Rp[0] = Tf - TI; + Ip[0] = TP + TQ; + Rm[0] = Tf + TI; + Im[0] = TQ - TP; + } + { + E T1r, T1x, T1w, T1y; + { + E T1o, T1q, T1n, T1p; + T1o = T14 - T15; + T1q = T19 - T18; + T1n = W[10]; + T1p = W[11]; + T1r = FNMS(T1p, T1q, T1n * T1o); + T1x = FMA(T1p, T1o, T1n * T1q); + } + { + E T1t, T1v, T1s, T1u; + T1t = T1d - T1e; + T1v = T1i + T1h; + T1s = W[12]; + T1u = W[13]; + T1w = FMA(T1s, T1t, T1u * T1v); + T1y = FNMS(T1u, T1t, T1s * T1v); + } + Rp[WS(rs, 3)] = T1r - T1w; + Ip[WS(rs, 3)] = T1x + T1y; + Rm[WS(rs, 3)] = T1r + T1w; + Im[WS(rs, 3)] = T1y - T1x; + } + { + E TV, T11, T10, T12; + { + E TS, TU, TR, TT; + TS = T7 - Te; + TU = TO - TL; + TR = W[6]; + TT = W[7]; + TV = FNMS(TT, TU, TR * TS); + T11 = FMA(TT, TS, TR * TU); + } + { + E TX, TZ, TW, TY; + TX = Tl - Tw; + TZ = TG - TB; + TW = W[8]; + TY = W[9]; + T10 = FMA(TW, TX, TY * TZ); + T12 = FNMS(TY, TX, TW * TZ); + } + Rp[WS(rs, 2)] = TV - T10; + Ip[WS(rs, 2)] = T11 + T12; + Rm[WS(rs, 2)] = TV + T10; + Im[WS(rs, 2)] = T12 - T11; + } + { + E T1b, T1l, T1k, T1m; + { + E T16, T1a, T13, T17; + T16 = T14 + T15; + T1a = T18 + T19; + T13 = W[2]; + T17 = W[3]; + T1b = FNMS(T17, T1a, T13 * T16); + T1l = FMA(T17, T16, T13 * T1a); + } + { + E T1f, T1j, T1c, T1g; + T1f = T1d + T1e; + T1j = T1h - T1i; + T1c = W[4]; + T1g = W[5]; + T1k = FMA(T1c, T1f, T1g * T1j); + T1m = FNMS(T1g, T1f, T1c * T1j); + } + Rp[WS(rs, 1)] = T1b - T1k; + Ip[WS(rs, 1)] = T1l + T1m; + Rm[WS(rs, 1)] = T1b + T1k; + Im[WS(rs, 1)] = T1m - T1l; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 8}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 8, "hc2cbdft_8", twinstr, &GENUS, {68, 18, 14, 0} }; + +void X(codelet_hc2cbdft_8) (planner *p) { + X(khc2c_register) (p, hc2cbdft_8, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:33 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -name r2cbIII_10 -dft-III -include r2cbIII.h */ + +/* + * This function contains 32 FP additions, 28 FP multiplications, + * (or, 14 additions, 10 multiplications, 18 fused multiply/add), + * 38 stack variables, 5 constants, and 20 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_10(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(40, rs), MAKE_VOLATILE_STRIDE(40, csr), MAKE_VOLATILE_STRIDE(40, csi)) { + E Tq, Ti, Tk, Tu, Tw, Tp, Tb, Tj, Tr, Tv; + { + E T1, To, Ts, Tt, T8, Ta, Te, Tl, Tm, Th, Tn, T9; + T1 = Cr[WS(csr, 2)]; + To = Ci[WS(csi, 2)]; + { + E T2, T3, T5, T6; + T2 = Cr[WS(csr, 4)]; + T3 = Cr[0]; + T5 = Cr[WS(csr, 3)]; + T6 = Cr[WS(csr, 1)]; + { + E Tc, T4, T7, Td, Tf, Tg; + Tc = Ci[WS(csi, 3)]; + Ts = T2 - T3; + T4 = T2 + T3; + Tt = T5 - T6; + T7 = T5 + T6; + Td = Ci[WS(csi, 1)]; + Tf = Ci[WS(csi, 4)]; + Tg = Ci[0]; + T8 = T4 + T7; + Ta = T7 - T4; + Te = Tc - Td; + Tl = Tc + Td; + Tm = Tf + Tg; + Th = Tf - Tg; + } + } + R0[0] = KP2_000000000 * (T1 + T8); + Tn = Tl - Tm; + Tq = Tl + Tm; + Ti = FMA(KP618033988, Th, Te); + Tk = FNMS(KP618033988, Te, Th); + R1[WS(rs, 2)] = KP2_000000000 * (Tn - To); + T9 = FMS(KP250000000, T8, T1); + Tu = FMA(KP618033988, Tt, Ts); + Tw = FNMS(KP618033988, Ts, Tt); + Tp = FMA(KP250000000, Tn, To); + Tb = FNMS(KP559016994, Ta, T9); + Tj = FMA(KP559016994, Ta, T9); + } + Tr = FMA(KP559016994, Tq, Tp); + Tv = FNMS(KP559016994, Tq, Tp); + R0[WS(rs, 2)] = -(KP2_000000000 * (FNMS(KP951056516, Tk, Tj))); + R0[WS(rs, 3)] = KP2_000000000 * (FMA(KP951056516, Tk, Tj)); + R0[WS(rs, 4)] = -(KP2_000000000 * (FNMS(KP951056516, Ti, Tb))); + R0[WS(rs, 1)] = KP2_000000000 * (FMA(KP951056516, Ti, Tb)); + R1[WS(rs, 1)] = KP2_000000000 * (FMA(KP951056516, Tw, Tv)); + R1[WS(rs, 3)] = KP2_000000000 * (FNMS(KP951056516, Tw, Tv)); + R1[WS(rs, 4)] = -(KP2_000000000 * (FNMS(KP951056516, Tu, Tr))); + R1[0] = -(KP2_000000000 * (FMA(KP951056516, Tu, Tr))); + } + } +} + +static const kr2c_desc desc = { 10, "r2cbIII_10", {14, 10, 18, 0}, &GENUS }; + +void X(codelet_r2cbIII_10) (planner *p) { + X(kr2c_register) (p, r2cbIII_10, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -name r2cbIII_10 -dft-III -include r2cbIII.h */ + +/* + * This function contains 32 FP additions, 16 FP multiplications, + * (or, 26 additions, 10 multiplications, 6 fused multiply/add), + * 22 stack variables, 5 constants, and 20 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_10(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); + DK(KP1_175570504, +1.175570504584946258337411909278145537195304875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(40, rs), MAKE_VOLATILE_STRIDE(40, csr), MAKE_VOLATILE_STRIDE(40, csi)) { + E T1, To, T8, Tq, Ta, Tp, Te, Ts, Th, Tn; + T1 = Cr[WS(csr, 2)]; + To = Ci[WS(csi, 2)]; + { + E T2, T3, T4, T5, T6, T7; + T2 = Cr[WS(csr, 4)]; + T3 = Cr[0]; + T4 = T2 + T3; + T5 = Cr[WS(csr, 3)]; + T6 = Cr[WS(csr, 1)]; + T7 = T5 + T6; + T8 = T4 + T7; + Tq = T5 - T6; + Ta = KP1_118033988 * (T7 - T4); + Tp = T2 - T3; + } + { + E Tc, Td, Tm, Tf, Tg, Tl; + Tc = Ci[WS(csi, 4)]; + Td = Ci[0]; + Tm = Tc + Td; + Tf = Ci[WS(csi, 1)]; + Tg = Ci[WS(csi, 3)]; + Tl = Tg + Tf; + Te = Tc - Td; + Ts = KP1_118033988 * (Tl + Tm); + Th = Tf - Tg; + Tn = Tl - Tm; + } + R0[0] = KP2_000000000 * (T1 + T8); + R1[WS(rs, 2)] = KP2_000000000 * (Tn - To); + { + E Ti, Tj, Tb, Tk, T9; + Ti = FNMS(KP1_902113032, Th, KP1_175570504 * Te); + Tj = FMA(KP1_175570504, Th, KP1_902113032 * Te); + T9 = FNMS(KP2_000000000, T1, KP500000000 * T8); + Tb = T9 - Ta; + Tk = T9 + Ta; + R0[WS(rs, 1)] = Tb + Ti; + R0[WS(rs, 3)] = Tk + Tj; + R0[WS(rs, 4)] = Ti - Tb; + R0[WS(rs, 2)] = Tj - Tk; + } + { + E Tr, Tv, Tu, Tw, Tt; + Tr = FMA(KP1_902113032, Tp, KP1_175570504 * Tq); + Tv = FNMS(KP1_175570504, Tp, KP1_902113032 * Tq); + Tt = FMA(KP500000000, Tn, KP2_000000000 * To); + Tu = Ts + Tt; + Tw = Tt - Ts; + R1[0] = -(Tr + Tu); + R1[WS(rs, 3)] = Tw - Tv; + R1[WS(rs, 4)] = Tr - Tu; + R1[WS(rs, 1)] = Tv + Tw; + } + } + } +} + +static const kr2c_desc desc = { 10, "r2cbIII_10", {26, 10, 6, 0}, &GENUS }; + +void X(codelet_r2cbIII_10) (planner *p) { + X(kr2c_register) (p, r2cbIII_10, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:34 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -name r2cbIII_12 -dft-III -include r2cbIII.h */ + +/* + * This function contains 42 FP additions, 20 FP multiplications, + * (or, 30 additions, 8 multiplications, 12 fused multiply/add), + * 37 stack variables, 4 constants, and 24 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) { + E TE, TD, TF, TG; + { + E Tx, T6, Te, Tb, T5, Tw, Ts, To, Th, Ti, T9, TA; + { + E T1, Tq, Tc, Td, T4, T2, T3, T7, T8, Tr; + T1 = Cr[WS(csr, 1)]; + T2 = Cr[WS(csr, 5)]; + T3 = Cr[WS(csr, 2)]; + Tq = Ci[WS(csi, 1)]; + Tc = Ci[WS(csi, 5)]; + Td = Ci[WS(csi, 2)]; + T4 = T2 + T3; + Tx = T2 - T3; + T6 = Cr[WS(csr, 4)]; + Te = Tc + Td; + Tr = Td - Tc; + Tb = FNMS(KP2_000000000, T1, T4); + T5 = T1 + T4; + T7 = Cr[0]; + Tw = FMA(KP2_000000000, Tq, Tr); + Ts = Tq - Tr; + T8 = Cr[WS(csr, 3)]; + To = Ci[WS(csi, 4)]; + Th = Ci[0]; + Ti = Ci[WS(csi, 3)]; + T9 = T7 + T8; + TA = T7 - T8; + } + { + E Tl, Tm, Tv, TC; + { + E Tf, Ty, Tk, TB; + { + E Tj, Tn, Tg, Ta; + Tl = FNMS(KP1_732050807, Te, Tb); + Tf = FMA(KP1_732050807, Te, Tb); + Tj = Th + Ti; + Tn = Ti - Th; + Tg = FNMS(KP2_000000000, T6, T9); + Ta = T6 + T9; + { + E Tu, Tt, Tz, Tp; + Ty = FMA(KP1_732050807, Tx, Tw); + TE = FNMS(KP1_732050807, Tx, Tw); + Tz = FMA(KP2_000000000, To, Tn); + Tp = Tn - To; + Tm = FMA(KP1_732050807, Tj, Tg); + Tk = FNMS(KP1_732050807, Tj, Tg); + Tu = T5 - Ta; + R0[0] = KP2_000000000 * (T5 + Ta); + Tt = Tp - Ts; + R0[WS(rs, 3)] = KP2_000000000 * (Ts + Tp); + Tv = Tk - Tf; + TD = FMA(KP1_732050807, TA, Tz); + TB = FNMS(KP1_732050807, TA, Tz); + R1[WS(rs, 4)] = KP1_414213562 * (Tu + Tt); + R1[WS(rs, 1)] = KP1_414213562 * (Tt - Tu); + } + } + R0[WS(rs, 2)] = Tf + Tk; + TC = Ty + TB; + R0[WS(rs, 5)] = TB - Ty; + } + R1[WS(rs, 3)] = KP707106781 * (Tv + TC); + R1[0] = KP707106781 * (Tv - TC); + TF = Tl - Tm; + R0[WS(rs, 4)] = -(Tl + Tm); + } + } + R0[WS(rs, 1)] = TD - TE; + TG = TE + TD; + R1[WS(rs, 5)] = KP707106781 * (TF - TG); + R1[WS(rs, 2)] = KP707106781 * (TF + TG); + } + } +} + +static const kr2c_desc desc = { 12, "r2cbIII_12", {30, 8, 12, 0}, &GENUS }; + +void X(codelet_r2cbIII_12) (planner *p) { + X(kr2c_register) (p, r2cbIII_12, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -name r2cbIII_12 -dft-III -include r2cbIII.h */ + +/* + * This function contains 42 FP additions, 20 FP multiplications, + * (or, 38 additions, 16 multiplications, 4 fused multiply/add), + * 25 stack variables, 4 constants, and 24 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) { + E T5, Tw, Tb, Te, Tx, Ts, Ta, TA, Tg, Tj, Tz, Tp, Tt, Tu; + { + E T1, T2, T3, T4; + T1 = Cr[WS(csr, 1)]; + T2 = Cr[WS(csr, 5)]; + T3 = Cr[WS(csr, 2)]; + T4 = T2 + T3; + T5 = T1 + T4; + Tw = KP866025403 * (T2 - T3); + Tb = FNMS(KP500000000, T4, T1); + } + { + E Tq, Tc, Td, Tr; + Tq = Ci[WS(csi, 1)]; + Tc = Ci[WS(csi, 5)]; + Td = Ci[WS(csi, 2)]; + Tr = Td - Tc; + Te = KP866025403 * (Tc + Td); + Tx = FMA(KP500000000, Tr, Tq); + Ts = Tq - Tr; + } + { + E T6, T7, T8, T9; + T6 = Cr[WS(csr, 4)]; + T7 = Cr[0]; + T8 = Cr[WS(csr, 3)]; + T9 = T7 + T8; + Ta = T6 + T9; + TA = KP866025403 * (T7 - T8); + Tg = FNMS(KP500000000, T9, T6); + } + { + E To, Th, Ti, Tn; + To = Ci[WS(csi, 4)]; + Th = Ci[0]; + Ti = Ci[WS(csi, 3)]; + Tn = Ti - Th; + Tj = KP866025403 * (Th + Ti); + Tz = FMA(KP500000000, Tn, To); + Tp = Tn - To; + } + R0[0] = KP2_000000000 * (T5 + Ta); + R0[WS(rs, 3)] = KP2_000000000 * (Ts + Tp); + Tt = Tp - Ts; + Tu = T5 - Ta; + R1[WS(rs, 1)] = KP1_414213562 * (Tt - Tu); + R1[WS(rs, 4)] = KP1_414213562 * (Tu + Tt); + { + E Tf, Tk, Tv, Ty, TB, TC; + Tf = Tb - Te; + Tk = Tg + Tj; + Tv = Tf - Tk; + Ty = Tw + Tx; + TB = Tz - TA; + TC = Ty + TB; + R0[WS(rs, 2)] = -(KP2_000000000 * (Tf + Tk)); + R0[WS(rs, 5)] = KP2_000000000 * (TB - Ty); + R1[0] = KP1_414213562 * (Tv - TC); + R1[WS(rs, 3)] = KP1_414213562 * (Tv + TC); + } + { + E Tl, Tm, TF, TD, TE, TG; + Tl = Tb + Te; + Tm = Tg - Tj; + TF = Tm - Tl; + TD = TA + Tz; + TE = Tx - Tw; + TG = TE + TD; + R0[WS(rs, 4)] = KP2_000000000 * (Tl + Tm); + R1[WS(rs, 2)] = KP1_414213562 * (TF + TG); + R0[WS(rs, 1)] = KP2_000000000 * (TD - TE); + R1[WS(rs, 5)] = KP1_414213562 * (TF - TG); + } + } + } +} + +static const kr2c_desc desc = { 12, "r2cbIII_12", {38, 16, 4, 0}, &GENUS }; + +void X(codelet_r2cbIII_12) (planner *p) { + X(kr2c_register) (p, r2cbIII_12, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:34 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 15 -name r2cbIII_15 -dft-III -include r2cbIII.h */ + +/* + * This function contains 64 FP additions, 43 FP multiplications, + * (or, 21 additions, 0 multiplications, 43 fused multiply/add), + * 48 stack variables, 9 constants, and 30 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); + DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) { + E TX, Tv, To, TW, Tl, Tx, Ty, Tw; + { + E TA, Tk, T6, T5, Tz, Th, TI, Tp, Tu, TK, TR, Tn, Td, Tq; + { + E T1, T2, T3, Ti, Tj; + Ti = Ci[WS(csi, 4)]; + Tj = Ci[WS(csi, 1)]; + T1 = Cr[WS(csr, 7)]; + T2 = Cr[WS(csr, 4)]; + T3 = Cr[WS(csr, 1)]; + TA = FNMS(KP618033988, Ti, Tj); + Tk = FMA(KP618033988, Tj, Ti); + { + E T7, TP, Tc, T8; + T6 = Cr[WS(csr, 2)]; + { + E T4, Tg, Ta, Tb, Tf; + T4 = T2 + T3; + Tg = T2 - T3; + Ta = Cr[WS(csr, 3)]; + Tb = Cr[WS(csr, 6)]; + T7 = Cr[0]; + Tf = FNMS(KP500000000, T4, T1); + T5 = FMA(KP2_000000000, T4, T1); + TP = Ta - Tb; + Tc = Ta + Tb; + Tz = FNMS(KP1_118033988, Tg, Tf); + Th = FMA(KP1_118033988, Tg, Tf); + T8 = Cr[WS(csr, 5)]; + } + TI = Ci[WS(csi, 2)]; + { + E Ts, Tt, TQ, T9; + Ts = Ci[WS(csi, 3)]; + Tt = Ci[WS(csi, 6)]; + TQ = T7 - T8; + T9 = T7 + T8; + Tp = Ci[0]; + Tu = Ts - Tt; + TK = Ts + Tt; + TX = FMA(KP618033988, TP, TQ); + TR = FNMS(KP618033988, TQ, TP); + Tn = T9 - Tc; + Td = T9 + Tc; + Tq = Ci[WS(csi, 5)]; + } + } + } + { + E TB, TF, TO, TG, TE; + { + E Tm, T11, TN, TD, TM, T12, TC; + TB = FNMS(KP1_902113032, TA, Tz); + TF = FMA(KP1_902113032, TA, Tz); + { + E Te, Tr, TJ, TL; + Tm = FNMS(KP250000000, Td, T6); + Te = T6 + Td; + Tr = Tp + Tq; + TJ = Tq - Tp; + R0[0] = FMA(KP2_000000000, Te, T5); + T11 = Te - T5; + TN = TJ + TK; + TL = TJ - TK; + Tv = FMA(KP618033988, Tu, Tr); + TD = FNMS(KP618033988, Tr, Tu); + TM = FNMS(KP250000000, TL, TI); + T12 = TL + TI; + } + TC = FNMS(KP559016994, Tn, Tm); + To = FMA(KP559016994, Tn, Tm); + R1[WS(rs, 2)] = FMA(KP1_732050807, T12, T11); + R0[WS(rs, 5)] = FMS(KP1_732050807, T12, T11); + TW = FMA(KP559016994, TN, TM); + TO = FNMS(KP559016994, TN, TM); + TG = FNMS(KP951056516, TD, TC); + TE = FMA(KP951056516, TD, TC); + } + Tl = FNMS(KP1_902113032, Tk, Th); + Tx = FMA(KP1_902113032, Tk, Th); + { + E TS, TU, TT, TH; + TS = FMA(KP951056516, TR, TO); + TU = FNMS(KP951056516, TR, TO); + TT = TF - TG; + R1[WS(rs, 1)] = -(FMA(KP2_000000000, TG, TF)); + TH = TB - TE; + R0[WS(rs, 6)] = FMA(KP2_000000000, TE, TB); + R1[WS(rs, 6)] = -(FMA(KP1_732050807, TU, TT)); + R0[WS(rs, 4)] = FNMS(KP1_732050807, TU, TT); + R1[WS(rs, 3)] = -(FMA(KP1_732050807, TS, TH)); + R0[WS(rs, 1)] = FNMS(KP1_732050807, TS, TH); + } + } + } + Ty = FNMS(KP951056516, Tv, To); + Tw = FMA(KP951056516, Tv, To); + { + E T10, TY, TV, TZ; + T10 = FMA(KP951056516, TX, TW); + TY = FNMS(KP951056516, TX, TW); + TV = Ty - Tx; + R0[WS(rs, 3)] = FMA(KP2_000000000, Ty, Tx); + TZ = Tl - Tw; + R1[WS(rs, 4)] = -(FMA(KP2_000000000, Tw, Tl)); + R1[WS(rs, 5)] = FMA(KP1_732050807, TY, TV); + R1[0] = FNMS(KP1_732050807, TY, TV); + R0[WS(rs, 2)] = FMA(KP1_732050807, T10, TZ); + R0[WS(rs, 7)] = FNMS(KP1_732050807, T10, TZ); + } + } + } +} + +static const kr2c_desc desc = { 15, "r2cbIII_15", {21, 0, 43, 0}, &GENUS }; + +void X(codelet_r2cbIII_15) (planner *p) { + X(kr2c_register) (p, r2cbIII_15, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 15 -name r2cbIII_15 -dft-III -include r2cbIII.h */ + +/* + * This function contains 64 FP additions, 26 FP multiplications, + * (or, 49 additions, 11 multiplications, 15 fused multiply/add), + * 47 stack variables, 14 constants, and 30 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + DK(KP433012701, +0.433012701892219323381861585376468091735701313); + DK(KP968245836, +0.968245836551854221294816349945599902708230426); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP1_647278207, +1.647278207092663851754840078556380006059321028); + DK(KP1_018073920, +1.018073920910254366901961726787815297021466329); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); + DK(KP1_175570504, +1.175570504584946258337411909278145537195304875); + DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) { + E Tv, TD, T5, Ts, TC, T6, Tf, TW, TK, Td, Tg, TP, To, TN, TA; + E TO, TQ, Tt, Tu, T12, Te, T11; + Tt = Ci[WS(csi, 4)]; + Tu = Ci[WS(csi, 1)]; + Tv = FMA(KP1_902113032, Tt, KP1_175570504 * Tu); + TD = FNMS(KP1_175570504, Tt, KP1_902113032 * Tu); + { + E T1, T4, Tq, T2, T3, Tr; + T1 = Cr[WS(csr, 7)]; + T2 = Cr[WS(csr, 4)]; + T3 = Cr[WS(csr, 1)]; + T4 = T2 + T3; + Tq = KP1_118033988 * (T2 - T3); + T5 = FMA(KP2_000000000, T4, T1); + Tr = FNMS(KP500000000, T4, T1); + Ts = Tq + Tr; + TC = Tr - Tq; + } + { + E Tc, TJ, T9, TI; + T6 = Cr[WS(csr, 2)]; + { + E Ta, Tb, T7, T8; + Ta = Cr[WS(csr, 3)]; + Tb = Cr[WS(csr, 6)]; + Tc = Ta + Tb; + TJ = Ta - Tb; + T7 = Cr[0]; + T8 = Cr[WS(csr, 5)]; + T9 = T7 + T8; + TI = T7 - T8; + } + Tf = KP559016994 * (T9 - Tc); + TW = FNMS(KP1_647278207, TJ, KP1_018073920 * TI); + TK = FMA(KP1_647278207, TI, KP1_018073920 * TJ); + Td = T9 + Tc; + Tg = FNMS(KP250000000, Td, T6); + } + { + E Tn, TM, Tk, TL; + TP = Ci[WS(csi, 2)]; + { + E Tl, Tm, Ti, Tj; + Tl = Ci[WS(csi, 3)]; + Tm = Ci[WS(csi, 6)]; + Tn = Tl - Tm; + TM = Tl + Tm; + Ti = Ci[0]; + Tj = Ci[WS(csi, 5)]; + Tk = Ti + Tj; + TL = Ti - Tj; + } + To = FMA(KP951056516, Tk, KP587785252 * Tn); + TN = KP968245836 * (TL - TM); + TA = FNMS(KP587785252, Tk, KP951056516 * Tn); + TO = TL + TM; + TQ = FMA(KP433012701, TO, KP1_732050807 * TP); + } + T12 = KP1_732050807 * (TP - TO); + Te = T6 + Td; + T11 = Te - T5; + R0[0] = FMA(KP2_000000000, Te, T5); + R0[WS(rs, 5)] = T12 - T11; + R1[WS(rs, 2)] = T11 + T12; + { + E TE, TG, TB, TF, TY, T10, Tz, TX, TV, TZ; + TE = TC - TD; + TG = TC + TD; + Tz = Tg - Tf; + TB = Tz + TA; + TF = TA - Tz; + TX = TN + TQ; + TY = TW - TX; + T10 = TW + TX; + R0[WS(rs, 6)] = FMA(KP2_000000000, TB, TE); + R1[WS(rs, 1)] = FMS(KP2_000000000, TF, TG); + TV = TE - TB; + R0[WS(rs, 1)] = TV + TY; + R1[WS(rs, 3)] = TY - TV; + TZ = TF + TG; + R0[WS(rs, 4)] = TZ - T10; + R1[WS(rs, 6)] = -(TZ + T10); + } + { + E Tw, Ty, Tp, Tx, TS, TU, Th, TR, TH, TT; + Tw = Ts - Tv; + Ty = Ts + Tv; + Th = Tf + Tg; + Tp = Th + To; + Tx = Th - To; + TR = TN - TQ; + TS = TK + TR; + TU = TR - TK; + R1[WS(rs, 4)] = -(FMA(KP2_000000000, Tp, Tw)); + R0[WS(rs, 3)] = FMA(KP2_000000000, Tx, Ty); + TH = Tx - Ty; + R1[WS(rs, 5)] = TH - TS; + R1[0] = TH + TS; + TT = Tw - Tp; + R0[WS(rs, 2)] = TT - TU; + R0[WS(rs, 7)] = TT + TU; + } + } + } +} + +static const kr2c_desc desc = { 15, "r2cbIII_15", {49, 11, 15, 0}, &GENUS }; + +void X(codelet_r2cbIII_15) (planner *p) { + X(kr2c_register) (p, r2cbIII_15, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,320 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:34 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 16 -name r2cbIII_16 -dft-III -include r2cbIII.h */ + +/* + * This function contains 66 FP additions, 36 FP multiplications, + * (or, 46 additions, 16 multiplications, 20 fused multiply/add), + * 55 stack variables, 9 constants, and 32 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_16(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(64, rs), MAKE_VOLATILE_STRIDE(64, csr), MAKE_VOLATILE_STRIDE(64, csi)) { + E TA, TD, Tv, TG, TE, TF; + { + E TK, TP, T7, T13, TW, TH, Tj, TC, To, Te, TX, TS, T12, Tt, TB; + { + E T4, Tf, T3, TU, Tz, T5, Tg, Th; + { + E T1, T2, Tx, Ty; + T1 = Cr[0]; + T2 = Cr[WS(csr, 7)]; + Tx = Ci[0]; + Ty = Ci[WS(csi, 7)]; + T4 = Cr[WS(csr, 4)]; + Tf = T1 - T2; + T3 = T1 + T2; + TU = Ty - Tx; + Tz = Tx + Ty; + T5 = Cr[WS(csr, 3)]; + Tg = Ci[WS(csi, 4)]; + Th = Ci[WS(csi, 3)]; + } + { + E Tb, Tk, Ta, TR, Tn, Tc, Tq, Tr; + { + E T8, T9, Tl, Tm; + T8 = Cr[WS(csr, 2)]; + { + E Tw, T6, TV, Ti; + Tw = T4 - T5; + T6 = T4 + T5; + TV = Th - Tg; + Ti = Tg + Th; + TK = Tw - Tz; + TA = Tw + Tz; + TP = T3 - T6; + T7 = T3 + T6; + T13 = TV + TU; + TW = TU - TV; + TH = Tf + Ti; + Tj = Tf - Ti; + T9 = Cr[WS(csr, 5)]; + } + Tl = Ci[WS(csi, 2)]; + Tm = Ci[WS(csi, 5)]; + Tb = Cr[WS(csr, 1)]; + Tk = T8 - T9; + Ta = T8 + T9; + TR = Tl - Tm; + Tn = Tl + Tm; + Tc = Cr[WS(csr, 6)]; + Tq = Ci[WS(csi, 1)]; + Tr = Ci[WS(csi, 6)]; + } + TC = Tk + Tn; + To = Tk - Tn; + { + E Tp, Td, TQ, Ts; + Tp = Tb - Tc; + Td = Tb + Tc; + TQ = Tr - Tq; + Ts = Tq + Tr; + Te = Ta + Td; + TX = Ta - Td; + TS = TQ - TR; + T12 = TR + TQ; + Tt = Tp - Ts; + TB = Tp + Ts; + } + } + } + { + E T10, TT, TY, TZ; + R0[0] = KP2_000000000 * (T7 + Te); + R0[WS(rs, 4)] = KP2_000000000 * (T13 - T12); + T10 = TP - TS; + TT = TP + TS; + TY = TW - TX; + TZ = TX + TW; + { + E T11, T14, TI, TL, Tu; + T11 = T7 - Te; + T14 = T12 + T13; + R0[WS(rs, 5)] = KP1_847759065 * (FNMS(KP414213562, TT, TY)); + R0[WS(rs, 1)] = KP1_847759065 * (FMA(KP414213562, TY, TT)); + R0[WS(rs, 6)] = KP1_414213562 * (T14 - T11); + R0[WS(rs, 2)] = KP1_414213562 * (T11 + T14); + TD = TB - TC; + TI = TC + TB; + TL = To - Tt; + Tu = To + Tt; + { + E TO, TJ, TN, TM; + R0[WS(rs, 7)] = -(KP1_847759065 * (FNMS(KP414213562, TZ, T10))); + R0[WS(rs, 3)] = KP1_847759065 * (FMA(KP414213562, T10, TZ)); + TO = FMA(KP707106781, TI, TH); + TJ = FNMS(KP707106781, TI, TH); + TN = FMA(KP707106781, TL, TK); + TM = FNMS(KP707106781, TL, TK); + Tv = FMA(KP707106781, Tu, Tj); + TG = FNMS(KP707106781, Tu, Tj); + R1[WS(rs, 3)] = KP1_961570560 * (FMA(KP198912367, TO, TN)); + R1[WS(rs, 7)] = -(KP1_961570560 * (FNMS(KP198912367, TN, TO))); + R1[WS(rs, 5)] = KP1_662939224 * (FNMS(KP668178637, TJ, TM)); + R1[WS(rs, 1)] = KP1_662939224 * (FMA(KP668178637, TM, TJ)); + } + } + } + } + TE = FNMS(KP707106781, TD, TA); + TF = FMA(KP707106781, TD, TA); + R1[WS(rs, 2)] = -(KP1_662939224 * (FNMS(KP668178637, TG, TF))); + R1[WS(rs, 6)] = -(KP1_662939224 * (FMA(KP668178637, TF, TG))); + R1[WS(rs, 4)] = -(KP1_961570560 * (FMA(KP198912367, Tv, TE))); + R1[0] = KP1_961570560 * (FNMS(KP198912367, TE, Tv)); + } + } +} + +static const kr2c_desc desc = { 16, "r2cbIII_16", {46, 16, 20, 0}, &GENUS }; + +void X(codelet_r2cbIII_16) (planner *p) { + X(kr2c_register) (p, r2cbIII_16, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 16 -name r2cbIII_16 -dft-III -include r2cbIII.h */ + +/* + * This function contains 66 FP additions, 32 FP multiplications, + * (or, 54 additions, 20 multiplications, 12 fused multiply/add), + * 40 stack variables, 9 constants, and 32 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_16(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); + DK(KP390180644, +0.390180644032256535696569736954044481855383236); + DK(KP1_111140466, +1.111140466039204449485661627897065748749874382); + DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP765366864, +0.765366864730179543456919968060797733522689125); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(64, rs), MAKE_VOLATILE_STRIDE(64, csr), MAKE_VOLATILE_STRIDE(64, csi)) { + E T7, TW, T13, Tj, TD, TK, TP, TH, Te, TX, T12, To, Tt, Tx, TS; + E Tw, TT, TY; + { + E T3, Tf, TC, TV, T6, Tz, Ti, TU; + { + E T1, T2, TA, TB; + T1 = Cr[0]; + T2 = Cr[WS(csr, 7)]; + T3 = T1 + T2; + Tf = T1 - T2; + TA = Ci[0]; + TB = Ci[WS(csi, 7)]; + TC = TA + TB; + TV = TB - TA; + } + { + E T4, T5, Tg, Th; + T4 = Cr[WS(csr, 4)]; + T5 = Cr[WS(csr, 3)]; + T6 = T4 + T5; + Tz = T4 - T5; + Tg = Ci[WS(csi, 4)]; + Th = Ci[WS(csi, 3)]; + Ti = Tg + Th; + TU = Tg - Th; + } + T7 = T3 + T6; + TW = TU + TV; + T13 = TV - TU; + Tj = Tf - Ti; + TD = Tz + TC; + TK = Tz - TC; + TP = T3 - T6; + TH = Tf + Ti; + } + { + E Ta, Tk, Tn, TR, Td, Tp, Ts, TQ; + { + E T8, T9, Tl, Tm; + T8 = Cr[WS(csr, 2)]; + T9 = Cr[WS(csr, 5)]; + Ta = T8 + T9; + Tk = T8 - T9; + Tl = Ci[WS(csi, 2)]; + Tm = Ci[WS(csi, 5)]; + Tn = Tl + Tm; + TR = Tl - Tm; + } + { + E Tb, Tc, Tq, Tr; + Tb = Cr[WS(csr, 1)]; + Tc = Cr[WS(csr, 6)]; + Td = Tb + Tc; + Tp = Tb - Tc; + Tq = Ci[WS(csi, 1)]; + Tr = Ci[WS(csi, 6)]; + Ts = Tq + Tr; + TQ = Tr - Tq; + } + Te = Ta + Td; + TX = Ta - Td; + T12 = TR + TQ; + To = Tk - Tn; + Tt = Tp - Ts; + Tx = Tp + Ts; + TS = TQ - TR; + Tw = Tk + Tn; + } + R0[0] = KP2_000000000 * (T7 + Te); + R0[WS(rs, 4)] = KP2_000000000 * (T13 - T12); + TT = TP + TS; + TY = TW - TX; + R0[WS(rs, 1)] = FMA(KP1_847759065, TT, KP765366864 * TY); + R0[WS(rs, 5)] = FNMS(KP765366864, TT, KP1_847759065 * TY); + { + E T11, T14, TZ, T10; + T11 = T7 - Te; + T14 = T12 + T13; + R0[WS(rs, 2)] = KP1_414213562 * (T11 + T14); + R0[WS(rs, 6)] = KP1_414213562 * (T14 - T11); + TZ = TP - TS; + T10 = TX + TW; + R0[WS(rs, 3)] = FMA(KP765366864, TZ, KP1_847759065 * T10); + R0[WS(rs, 7)] = FNMS(KP1_847759065, TZ, KP765366864 * T10); + } + { + E TJ, TN, TM, TO, TI, TL; + TI = KP707106781 * (Tw + Tx); + TJ = TH - TI; + TN = TH + TI; + TL = KP707106781 * (To - Tt); + TM = TK - TL; + TO = TL + TK; + R1[WS(rs, 1)] = FMA(KP1_662939224, TJ, KP1_111140466 * TM); + R1[WS(rs, 7)] = FNMS(KP1_961570560, TN, KP390180644 * TO); + R1[WS(rs, 5)] = FNMS(KP1_111140466, TJ, KP1_662939224 * TM); + R1[WS(rs, 3)] = FMA(KP390180644, TN, KP1_961570560 * TO); + } + { + E Tv, TF, TE, TG, Tu, Ty; + Tu = KP707106781 * (To + Tt); + Tv = Tj + Tu; + TF = Tj - Tu; + Ty = KP707106781 * (Tw - Tx); + TE = Ty + TD; + TG = Ty - TD; + R1[0] = FNMS(KP390180644, TE, KP1_961570560 * Tv); + R1[WS(rs, 6)] = FNMS(KP1_662939224, TF, KP1_111140466 * TG); + R1[WS(rs, 4)] = -(FMA(KP390180644, Tv, KP1_961570560 * TE)); + R1[WS(rs, 2)] = FMA(KP1_111140466, TF, KP1_662939224 * TG); + } + } + } +} + +static const kr2c_desc desc = { 16, "r2cbIII_16", {54, 20, 12, 0}, &GENUS }; + +void X(codelet_r2cbIII_16) (planner *p) { + X(kr2c_register) (p, r2cbIII_16, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:31 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 2 -name r2cbIII_2 -dft-III -include r2cbIII.h */ + +/* + * This function contains 0 FP additions, 2 FP multiplications, + * (or, 0 additions, 2 multiplications, 0 fused multiply/add), + * 4 stack variables, 1 constants, and 4 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_2(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(8, rs), MAKE_VOLATILE_STRIDE(8, csr), MAKE_VOLATILE_STRIDE(8, csi)) { + E T1, T2; + T1 = Cr[0]; + T2 = Ci[0]; + R0[0] = KP2_000000000 * T1; + R1[0] = -(KP2_000000000 * T2); + } + } +} + +static const kr2c_desc desc = { 2, "r2cbIII_2", {0, 2, 0, 0}, &GENUS }; + +void X(codelet_r2cbIII_2) (planner *p) { + X(kr2c_register) (p, r2cbIII_2, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 2 -name r2cbIII_2 -dft-III -include r2cbIII.h */ + +/* + * This function contains 0 FP additions, 2 FP multiplications, + * (or, 0 additions, 2 multiplications, 0 fused multiply/add), + * 4 stack variables, 1 constants, and 4 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_2(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(8, rs), MAKE_VOLATILE_STRIDE(8, csr), MAKE_VOLATILE_STRIDE(8, csi)) { + E T1, T2; + T1 = Cr[0]; + T2 = Ci[0]; + R0[0] = KP2_000000000 * T1; + R1[0] = -(KP2_000000000 * T2); + } + } +} + +static const kr2c_desc desc = { 2, "r2cbIII_2", {0, 2, 0, 0}, &GENUS }; + +void X(codelet_r2cbIII_2) (planner *p) { + X(kr2c_register) (p, r2cbIII_2, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,409 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:36 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 20 -name r2cbIII_20 -dft-III -include r2cbIII.h */ + +/* + * This function contains 94 FP additions, 56 FP multiplications, + * (or, 58 additions, 20 multiplications, 36 fused multiply/add), + * 59 stack variables, 6 constants, and 40 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(80, rs), MAKE_VOLATILE_STRIDE(80, csr), MAKE_VOLATILE_STRIDE(80, csi)) { + E TZ, TD, TW, Tw, Tt, TF, T1f, T1b; + { + E T1l, Tk, T9, Tj, Ta, TV, TI, Ts, TU, T1t, T11, Tx, T13, TC, T1a; + E T1i, Th, Tv, Ty; + { + E TQ, TS, Tr, Tm, Tn; + { + E T1, T5, T6, T2, T3, T7, TY; + T1 = Cr[WS(csr, 2)]; + T5 = Cr[WS(csr, 9)]; + T6 = Cr[WS(csr, 5)]; + T2 = Cr[WS(csr, 6)]; + T3 = Cr[WS(csr, 1)]; + TQ = Ci[WS(csi, 2)]; + T7 = T5 + T6; + TY = T5 - T6; + { + E T4, TX, T8, Tp, Tq; + T4 = T2 + T3; + TX = T2 - T3; + Tp = Ci[WS(csi, 5)]; + Tq = Ci[WS(csi, 9)]; + T1l = FNMS(KP618033988, TX, TY); + TZ = FMA(KP618033988, TY, TX); + Tk = T4 - T7; + T8 = T4 + T7; + TS = Tp + Tq; + Tr = Tp - Tq; + T9 = T1 + T8; + Tj = FNMS(KP250000000, T8, T1); + Tm = Ci[WS(csi, 6)]; + Tn = Ci[WS(csi, 1)]; + } + } + { + E Tb, T19, Tg, Tc; + Ta = Cr[WS(csr, 7)]; + { + E Te, Tf, To, TR, TT; + Te = Cr[0]; + Tf = Cr[WS(csr, 4)]; + To = Tm + Tn; + TR = Tm - Tn; + Tb = Cr[WS(csr, 3)]; + T19 = Te - Tf; + Tg = Te + Tf; + TT = TR - TS; + TV = TR + TS; + TI = FNMS(KP618033988, To, Tr); + Ts = FMA(KP618033988, Tr, To); + TU = FNMS(KP250000000, TT, TQ); + T1t = TT + TQ; + Tc = Cr[WS(csr, 8)]; + } + T11 = Ci[WS(csi, 7)]; + { + E TA, TB, Td, T18; + TA = Ci[WS(csi, 4)]; + TB = Ci[0]; + Td = Tb + Tc; + T18 = Tb - Tc; + Tx = Ci[WS(csi, 3)]; + T13 = TB + TA; + TC = TA - TB; + T1a = FMA(KP618033988, T19, T18); + T1i = FNMS(KP618033988, T18, T19); + Th = Td + Tg; + Tv = Td - Tg; + Ty = Ci[WS(csi, 8)]; + } + } + } + { + E Tu, T1w, T16, TL, T15, T1u; + { + E Ti, T12, Tz, T14; + Tu = FNMS(KP250000000, Th, Ta); + Ti = Ta + Th; + T12 = Tx - Ty; + Tz = Tx + Ty; + T1w = T9 - Ti; + T14 = T12 - T13; + T16 = T12 + T13; + TL = FNMS(KP618033988, Tz, TC); + TD = FMA(KP618033988, TC, Tz); + T15 = FNMS(KP250000000, T14, T11); + T1u = T14 + T11; + R0[0] = KP2_000000000 * (T9 + Ti); + } + { + E Tl, TJ, TN, T1q, T1m, TK, T1h, T17, TH, T1k, T1v; + Tl = FMA(KP559016994, Tk, Tj); + TH = FNMS(KP559016994, Tk, Tj); + T1k = FNMS(KP559016994, TV, TU); + TW = FMA(KP559016994, TV, TU); + R0[WS(rs, 5)] = KP2_000000000 * (T1u - T1t); + T1v = T1t + T1u; + TJ = FNMS(KP951056516, TI, TH); + TN = FMA(KP951056516, TI, TH); + T1q = FMA(KP951056516, T1l, T1k); + T1m = FNMS(KP951056516, T1l, T1k); + R1[WS(rs, 7)] = KP1_414213562 * (T1w + T1v); + R1[WS(rs, 2)] = KP1_414213562 * (T1v - T1w); + Tw = FMA(KP559016994, Tv, Tu); + TK = FNMS(KP559016994, Tv, Tu); + T1h = FNMS(KP559016994, T16, T15); + T17 = FMA(KP559016994, T16, T15); + { + E TM, TO, T1j, T1r; + TM = FMA(KP951056516, TL, TK); + TO = FNMS(KP951056516, TL, TK); + T1j = FMA(KP951056516, T1i, T1h); + T1r = FNMS(KP951056516, T1i, T1h); + Tt = FNMS(KP951056516, Ts, Tl); + TF = FMA(KP951056516, Ts, Tl); + { + E T1n, T1p, T1s, T1o; + T1n = TN - TO; + R0[WS(rs, 6)] = -(KP2_000000000 * (TN + TO)); + T1p = TM - TJ; + R0[WS(rs, 4)] = KP2_000000000 * (TJ + TM); + T1s = T1q + T1r; + R0[WS(rs, 9)] = KP2_000000000 * (T1r - T1q); + T1o = T1m + T1j; + R0[WS(rs, 1)] = KP2_000000000 * (T1j - T1m); + R1[WS(rs, 6)] = KP1_414213562 * (T1p + T1s); + R1[WS(rs, 1)] = KP1_414213562 * (T1p - T1s); + R1[WS(rs, 3)] = KP1_414213562 * (T1n + T1o); + R1[WS(rs, 8)] = KP1_414213562 * (T1n - T1o); + T1f = FMA(KP951056516, T1a, T17); + T1b = FNMS(KP951056516, T1a, T17); + } + } + } + } + } + { + E TE, TG, T10, T1e; + TE = FMA(KP951056516, TD, Tw); + TG = FNMS(KP951056516, TD, Tw); + T10 = FMA(KP951056516, TZ, TW); + T1e = FNMS(KP951056516, TZ, TW); + { + E T1d, TP, T1g, T1c; + T1d = TF - TG; + R0[WS(rs, 2)] = -(KP2_000000000 * (TF + TG)); + TP = Tt - TE; + R0[WS(rs, 8)] = KP2_000000000 * (Tt + TE); + T1g = T1e + T1f; + R0[WS(rs, 7)] = KP2_000000000 * (T1e - T1f); + T1c = T10 + T1b; + R0[WS(rs, 3)] = KP2_000000000 * (T10 - T1b); + R1[WS(rs, 9)] = -(KP1_414213562 * (T1d + T1g)); + R1[WS(rs, 4)] = KP1_414213562 * (T1d - T1g); + R1[WS(rs, 5)] = -(KP1_414213562 * (TP + T1c)); + R1[0] = KP1_414213562 * (TP - T1c); + } + } + } + } +} + +static const kr2c_desc desc = { 20, "r2cbIII_20", {58, 20, 36, 0}, &GENUS }; + +void X(codelet_r2cbIII_20) (planner *p) { + X(kr2c_register) (p, r2cbIII_20, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 20 -name r2cbIII_20 -dft-III -include r2cbIII.h */ + +/* + * This function contains 94 FP additions, 44 FP multiplications, + * (or, 82 additions, 32 multiplications, 12 fused multiply/add), + * 43 stack variables, 6 constants, and 40 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(80, rs), MAKE_VOLATILE_STRIDE(80, csr), MAKE_VOLATILE_STRIDE(80, csi)) { + E T1, Tj, T1k, T13, T8, Tk, T17, Ts, T16, TI, T18, T19, Ta, Tu, T1i; + E TS, Th, Tv, TX, TD, TV, TL, TW, TY; + { + E T7, T12, T4, T11; + T1 = Cr[WS(csr, 2)]; + { + E T5, T6, T2, T3; + T5 = Cr[WS(csr, 9)]; + T6 = Cr[WS(csr, 5)]; + T7 = T5 + T6; + T12 = T5 - T6; + T2 = Cr[WS(csr, 6)]; + T3 = Cr[WS(csr, 1)]; + T4 = T2 + T3; + T11 = T2 - T3; + } + Tj = KP559016994 * (T4 - T7); + T1k = FNMS(KP951056516, T12, KP587785252 * T11); + T13 = FMA(KP951056516, T11, KP587785252 * T12); + T8 = T4 + T7; + Tk = FNMS(KP250000000, T8, T1); + } + { + E Tr, T15, To, T14; + T17 = Ci[WS(csi, 2)]; + { + E Tp, Tq, Tm, Tn; + Tp = Ci[WS(csi, 5)]; + Tq = Ci[WS(csi, 9)]; + Tr = Tp - Tq; + T15 = Tp + Tq; + Tm = Ci[WS(csi, 6)]; + Tn = Ci[WS(csi, 1)]; + To = Tm + Tn; + T14 = Tm - Tn; + } + Ts = FMA(KP951056516, To, KP587785252 * Tr); + T16 = KP559016994 * (T14 + T15); + TI = FNMS(KP951056516, Tr, KP587785252 * To); + T18 = T14 - T15; + T19 = FNMS(KP250000000, T18, T17); + } + { + E Tg, TR, Td, TQ; + Ta = Cr[WS(csr, 7)]; + { + E Te, Tf, Tb, Tc; + Te = Cr[0]; + Tf = Cr[WS(csr, 4)]; + Tg = Te + Tf; + TR = Te - Tf; + Tb = Cr[WS(csr, 3)]; + Tc = Cr[WS(csr, 8)]; + Td = Tb + Tc; + TQ = Tb - Tc; + } + Tu = KP559016994 * (Td - Tg); + T1i = FNMS(KP951056516, TR, KP587785252 * TQ); + TS = FMA(KP951056516, TQ, KP587785252 * TR); + Th = Td + Tg; + Tv = FNMS(KP250000000, Th, Ta); + } + { + E TC, TU, Tz, TT; + TX = Ci[WS(csi, 7)]; + { + E TA, TB, Tx, Ty; + TA = Ci[WS(csi, 4)]; + TB = Ci[0]; + TC = TA - TB; + TU = TB + TA; + Tx = Ci[WS(csi, 3)]; + Ty = Ci[WS(csi, 8)]; + Tz = Tx + Ty; + TT = Ty - Tx; + } + TD = FMA(KP951056516, Tz, KP587785252 * TC); + TV = KP559016994 * (TT - TU); + TL = FNMS(KP587785252, Tz, KP951056516 * TC); + TW = TT + TU; + TY = FMA(KP250000000, TW, TX); + } + { + E T9, Ti, T1w, T1t, T1u, T1v; + T9 = T1 + T8; + Ti = Ta + Th; + T1w = T9 - Ti; + T1t = T18 + T17; + T1u = TX - TW; + T1v = T1t + T1u; + R0[0] = KP2_000000000 * (T9 + Ti); + R0[WS(rs, 5)] = KP2_000000000 * (T1u - T1t); + R1[WS(rs, 2)] = KP1_414213562 * (T1v - T1w); + R1[WS(rs, 7)] = KP1_414213562 * (T1w + T1v); + } + { + E TJ, TO, T1m, T1q, TM, TN, T1j, T1r; + { + E TH, T1l, TK, T1h; + TH = Tk - Tj; + TJ = TH + TI; + TO = TH - TI; + T1l = T19 - T16; + T1m = T1k + T1l; + T1q = T1l - T1k; + TK = Tv - Tu; + TM = TK + TL; + TN = TL - TK; + T1h = TV + TY; + T1j = T1h - T1i; + T1r = T1i + T1h; + } + R0[WS(rs, 4)] = KP2_000000000 * (TJ + TM); + R0[WS(rs, 6)] = KP2_000000000 * (TN - TO); + R0[WS(rs, 9)] = KP2_000000000 * (T1r - T1q); + R0[WS(rs, 1)] = KP2_000000000 * (T1j - T1m); + { + E T1p, T1s, T1n, T1o; + T1p = TM - TJ; + T1s = T1q + T1r; + R1[WS(rs, 1)] = KP1_414213562 * (T1p - T1s); + R1[WS(rs, 6)] = KP1_414213562 * (T1p + T1s); + T1n = TO + TN; + T1o = T1m + T1j; + R1[WS(rs, 8)] = KP1_414213562 * (T1n - T1o); + R1[WS(rs, 3)] = KP1_414213562 * (T1n + T1o); + } + } + { + E Tt, TG, T1b, T1f, TE, TF, T10, T1e; + { + E Tl, T1a, Tw, TZ; + Tl = Tj + Tk; + Tt = Tl - Ts; + TG = Tl + Ts; + T1a = T16 + T19; + T1b = T13 + T1a; + T1f = T1a - T13; + Tw = Tu + Tv; + TE = Tw + TD; + TF = TD - Tw; + TZ = TV - TY; + T10 = TS + TZ; + T1e = TZ - TS; + } + R0[WS(rs, 8)] = KP2_000000000 * (Tt + TE); + R0[WS(rs, 2)] = KP2_000000000 * (TF - TG); + R0[WS(rs, 7)] = KP2_000000000 * (T1f + T1e); + R0[WS(rs, 3)] = KP2_000000000 * (T1b + T10); + { + E T1d, T1g, TP, T1c; + T1d = TG + TF; + T1g = T1e - T1f; + R1[WS(rs, 4)] = KP1_414213562 * (T1d + T1g); + R1[WS(rs, 9)] = KP1_414213562 * (T1g - T1d); + TP = Tt - TE; + T1c = T10 - T1b; + R1[0] = KP1_414213562 * (TP + T1c); + R1[WS(rs, 5)] = KP1_414213562 * (T1c - TP); + } + } + } + } +} + +static const kr2c_desc desc = { 20, "r2cbIII_20", {82, 32, 12, 0}, &GENUS }; + +void X(codelet_r2cbIII_20) (planner *p) { + X(kr2c_register) (p, r2cbIII_20, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,618 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:36 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 25 -name r2cbIII_25 -dft-III -include r2cbIII.h */ + +/* + * This function contains 152 FP additions, 120 FP multiplications, + * (or, 32 additions, 0 multiplications, 120 fused multiply/add), + * 115 stack variables, 44 constants, and 50 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP979740652, +0.979740652857618686258237536568998933733477632); + DK(KP438153340, +0.438153340021931793654057951961031291699532119); + DK(KP1_752613360, +1.752613360087727174616231807844125166798128477); + DK(KP963507348, +0.963507348203430549974383005744259307057084020); + DK(KP1_721083328, +1.721083328735889354196523361841037632825608373); + DK(KP1_606007150, +1.606007150877320829666881187140752009270929701); + DK(KP1_011627398, +1.011627398597394192215998921771049272931807941); + DK(KP641441904, +0.641441904830606407298806329068862424939687989); + DK(KP595480289, +0.595480289600000014706716770488118292997907308); + DK(KP452413526, +0.452413526233009763856834323966348796985206956); + DK(KP1_809654104, +1.809654104932039055427337295865395187940827822); + DK(KP933137358, +0.933137358350283770603023973254446451924190884); + DK(KP1_666834356, +1.666834356657377354817925100486477686277992119); + DK(KP1_842354653, +1.842354653930286640500894870830132058718564461); + DK(KP1_082908895, +1.082908895072625554092571180165639018104066379); + DK(KP576710603, +0.576710603632765877371579268136471017090111488); + DK(KP662318342, +0.662318342759882818626911127577439236802190210); + DK(KP484291580, +0.484291580564315559745084187732367906918006201); + DK(KP1_937166322, +1.937166322257262238980336750929471627672024806); + DK(KP1_898359647, +1.898359647016882523151110931686726543423167685); + DK(KP1_386580726, +1.386580726567734802700860150804827247498955921); + DK(KP904730450, +0.904730450839922351881287709692877908104763647); + DK(KP1_115827804, +1.115827804063668528375399296931134075984874304); + DK(KP470564281, +0.470564281212251493087595091036643380879947982); + DK(KP634619297, +0.634619297544148100711287640319130485732531031); + DK(KP499013364, +0.499013364214135780976168403431725276668452610); + DK(KP1_996053456, +1.996053456856543123904673613726901106673810439); + DK(KP559154169, +0.559154169276087864842202529084232643714075927); + DK(KP683113946, +0.683113946453479238701949862233725244439656928); + DK(KP730409924, +0.730409924561256563751459444999838399157094302); + DK(KP549754652, +0.549754652192770074288023275540779861653779767); + DK(KP256756360, +0.256756360367726783319498520922669048172391148); + DK(KP451418159, +0.451418159099103183892477933432151804893354132); + DK(KP846146756, +0.846146756728608505452954290121135880883743802); + DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); + DK(KP062914667, +0.062914667253649757225485955897349402364686947); + DK(KP939062505, +0.939062505817492352556001843133229685779824606); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(100, rs), MAKE_VOLATILE_STRIDE(100, csr), MAKE_VOLATILE_STRIDE(100, csi)) { + E T1P, T2c, T2a, T24, T26, T25, T27, T2b; + { + E T1O, TS, T5, T1N, TP, Te, TA, T2i, T1V, T17, T1B, T2h, T1S, T10, T1C; + E T1a, T19, Tn, T1h, T1l, T1Y, T1e, T21, TJ, T1g; + { + E T1, T2, T3, TQ, TR; + TQ = Ci[WS(csi, 7)]; + TR = Ci[WS(csi, 2)]; + T1 = Cr[WS(csr, 12)]; + T2 = Cr[WS(csr, 7)]; + T3 = Cr[WS(csr, 2)]; + T1O = FNMS(KP618033988, TQ, TR); + TS = FMA(KP618033988, TR, TQ); + { + E TV, TU, T1U, T16, T12, T1R, TZ, T11; + { + E T6, Tz, T14, T15, TX, Tu, Td, Tx, TY, T4, TO, Ty; + T6 = Cr[WS(csr, 11)]; + T4 = T2 + T3; + TO = T3 - T2; + Tz = Ci[WS(csi, 11)]; + { + E Ta, T9, Tb, T7, T8, TN; + T7 = Cr[WS(csr, 6)]; + T8 = Cr[WS(csr, 8)]; + TN = FNMS(KP500000000, T4, T1); + T5 = FMA(KP2_000000000, T4, T1); + Ta = Cr[WS(csr, 1)]; + T14 = T8 - T7; + T9 = T7 + T8; + T1N = FMA(KP1_118033988, TO, TN); + TP = FNMS(KP1_118033988, TO, TN); + Tb = Cr[WS(csr, 3)]; + { + E Tv, Tw, Ts, Tt, Tc; + Ts = Ci[WS(csi, 8)]; + Tt = Ci[WS(csi, 6)]; + T15 = Tb - Ta; + Tc = Ta + Tb; + Tv = Ci[WS(csi, 3)]; + TX = Tt + Ts; + Tu = Ts - Tt; + Tw = Ci[WS(csi, 1)]; + Td = T9 + Tc; + TV = Tc - T9; + Tx = Tv - Tw; + TY = Tw + Tv; + } + } + Te = T6 + Td; + TU = FMS(KP250000000, Td, T6); + T1U = FNMS(KP618033988, T14, T15); + T16 = FMA(KP618033988, T15, T14); + T12 = Tx - Tu; + Ty = Tu + Tx; + T1R = FNMS(KP618033988, TX, TY); + TZ = FMA(KP618033988, TY, TX); + TA = Ty - Tz; + T11 = FMA(KP250000000, Ty, Tz); + } + { + E Tf, TI, T1j, T1k, Tm, T1c, TD, TG, T1d, TH; + Tf = Cr[WS(csr, 10)]; + TI = Ci[WS(csi, 10)]; + { + E T13, T1T, TW, T1Q; + T13 = FMA(KP559016994, T12, T11); + T1T = FNMS(KP559016994, T12, T11); + TW = FMA(KP559016994, TV, TU); + T1Q = FNMS(KP559016994, TV, TU); + T2i = FMA(KP951056516, T1U, T1T); + T1V = FNMS(KP951056516, T1U, T1T); + T17 = FMA(KP951056516, T16, T13); + T1B = FNMS(KP951056516, T16, T13); + T2h = FNMS(KP951056516, T1R, T1Q); + T1S = FMA(KP951056516, T1R, T1Q); + T10 = FNMS(KP951056516, TZ, TW); + T1C = FMA(KP951056516, TZ, TW); + { + E Tg, Th, Tj, Tk; + Tg = Cr[WS(csr, 5)]; + Th = Cr[WS(csr, 9)]; + Tj = Cr[0]; + Tk = Cr[WS(csr, 4)]; + { + E TB, Ti, Tl, TC, TE, TF; + TB = Ci[WS(csi, 9)]; + T1j = Tg - Th; + Ti = Tg + Th; + T1k = Tk - Tj; + Tl = Tj + Tk; + TC = Ci[WS(csi, 5)]; + TE = Ci[WS(csi, 4)]; + TF = Ci[0]; + Tm = Ti + Tl; + T1a = Ti - Tl; + T1c = TC + TB; + TD = TB - TC; + TG = TE - TF; + T1d = TF + TE; + } + } + } + T19 = FMS(KP250000000, Tm, Tf); + Tn = Tf + Tm; + T1h = TD - TG; + TH = TD + TG; + T1l = FNMS(KP618033988, T1k, T1j); + T1Y = FMA(KP618033988, T1j, T1k); + T1e = FMA(KP618033988, T1d, T1c); + T21 = FNMS(KP618033988, T1c, T1d); + TJ = TH - TI; + T1g = FMA(KP250000000, TH, TI); + } + } + } + { + E T1Z, T1m, T1y, T22, T1f, T1z, T2j, T2g, T2d, T2q, T2s; + { + E Tq, To, T2e, T2f; + Tq = Tn - Te; + To = Te + Tn; + { + E T1i, T1X, T1b, T20; + T1i = FNMS(KP559016994, T1h, T1g); + T1X = FMA(KP559016994, T1h, T1g); + T1b = FNMS(KP559016994, T1a, T19); + T20 = FMA(KP559016994, T1a, T19); + T2e = FMA(KP951056516, T1Y, T1X); + T1Z = FNMS(KP951056516, T1Y, T1X); + T1m = FNMS(KP951056516, T1l, T1i); + T1y = FMA(KP951056516, T1l, T1i); + T2f = FNMS(KP951056516, T21, T20); + T22 = FMA(KP951056516, T21, T20); + T1f = FNMS(KP951056516, T1e, T1b); + T1z = FMA(KP951056516, T1e, T1b); + } + { + E T2o, TK, TM, T2p, Tr, TL, Tp; + T2o = FMA(KP939062505, T2h, T2i); + T2j = FNMS(KP939062505, T2i, T2h); + R0[0] = FMA(KP2_000000000, To, T5); + Tp = FNMS(KP500000000, To, T5); + TK = FMA(KP618033988, TJ, TA); + TM = FNMS(KP618033988, TA, TJ); + T2g = FNMS(KP062914667, T2f, T2e); + T2p = FMA(KP062914667, T2e, T2f); + Tr = FNMS(KP1_118033988, Tq, Tp); + TL = FMA(KP1_118033988, Tq, Tp); + T2d = FMA(KP1_902113032, T1O, T1N); + T1P = FNMS(KP1_902113032, T1O, T1N); + T2q = FMA(KP846146756, T2p, T2o); + T2s = FNMS(KP451418159, T2o, T2p); + R0[WS(rs, 10)] = FMA(KP1_902113032, TK, Tr); + R1[WS(rs, 2)] = FMS(KP1_902113032, TK, Tr); + R1[WS(rs, 7)] = FMS(KP1_902113032, TM, TL); + R0[WS(rs, 5)] = FMA(KP1_902113032, TM, TL); + } + } + { + E T18, T1n, T1x, TT, T2m, T1w, T1u, T2l, T1s, T1t, T2k; + T18 = FNMS(KP256756360, T17, T10); + T1s = FMA(KP256756360, T10, T17); + T1t = FMA(KP549754652, T1f, T1m); + T1n = FNMS(KP549754652, T1m, T1f); + T1x = FNMS(KP1_902113032, TS, TP); + TT = FMA(KP1_902113032, TS, TP); + T2m = FMA(KP730409924, T2j, T2g); + T2k = FNMS(KP730409924, T2j, T2g); + T1w = FNMS(KP683113946, T1s, T1t); + T1u = FMA(KP559154169, T1t, T1s); + R1[WS(rs, 1)] = -(FMA(KP1_996053456, T2k, T2d)); + T2l = FNMS(KP499013364, T2k, T2d); + { + E T1K, T1M, T1G, T1E; + { + E T1D, T1A, T1q, T1p, T1v, T1r; + { + E T1I, T1J, T2n, T2r, T1o; + T1I = FMA(KP634619297, T1B, T1C); + T1D = FNMS(KP634619297, T1C, T1B); + T1A = FMA(KP470564281, T1z, T1y); + T1J = FNMS(KP470564281, T1y, T1z); + T2n = FNMS(KP1_115827804, T2m, T2l); + T2r = FMA(KP1_115827804, T2m, T2l); + T1q = FNMS(KP904730450, T1n, T18); + T1o = FMA(KP904730450, T1n, T18); + R1[WS(rs, 11)] = FMS(KP1_386580726, T2q, T2n); + R0[WS(rs, 4)] = FMA(KP1_386580726, T2q, T2n); + R0[WS(rs, 9)] = FMA(KP1_898359647, T2s, T2r); + R1[WS(rs, 6)] = FMS(KP1_898359647, T2s, T2r); + R1[0] = FMS(KP1_937166322, T1o, TT); + T1p = FMA(KP484291580, T1o, TT); + T1K = FMA(KP662318342, T1J, T1I); + T1M = FNMS(KP576710603, T1I, T1J); + } + T1v = FMA(KP1_082908895, T1q, T1p); + T1r = FNMS(KP1_082908895, T1q, T1p); + R1[WS(rs, 10)] = FMS(KP1_842354653, T1u, T1r); + R0[WS(rs, 3)] = FMA(KP1_842354653, T1u, T1r); + R0[WS(rs, 8)] = FMA(KP1_666834356, T1w, T1v); + R1[WS(rs, 5)] = FMS(KP1_666834356, T1w, T1v); + T1G = FNMS(KP933137358, T1D, T1A); + T1E = FMA(KP933137358, T1D, T1A); + } + { + E T23, T28, T29, T1W, T1F, T1H, T1L; + T23 = FNMS(KP634619297, T22, T1Z); + T28 = FMA(KP634619297, T1Z, T22); + T29 = FMA(KP549754652, T1S, T1V); + T1W = FNMS(KP549754652, T1V, T1S); + R0[WS(rs, 2)] = FMA(KP1_809654104, T1E, T1x); + T1F = FNMS(KP452413526, T1E, T1x); + T2c = FMA(KP595480289, T28, T29); + T2a = FNMS(KP641441904, T29, T28); + T1H = FNMS(KP1_011627398, T1G, T1F); + T1L = FMA(KP1_011627398, T1G, T1F); + R0[WS(rs, 12)] = FNMS(KP1_606007150, T1K, T1H); + R1[WS(rs, 4)] = -(FMA(KP1_606007150, T1K, T1H)); + R1[WS(rs, 9)] = -(FMA(KP1_721083328, T1M, T1L)); + R0[WS(rs, 7)] = FNMS(KP1_721083328, T1M, T1L); + T24 = FNMS(KP963507348, T23, T1W); + T26 = FMA(KP963507348, T23, T1W); + } + } + } + } + } + R0[WS(rs, 1)] = FNMS(KP1_752613360, T24, T1P); + T25 = FMA(KP438153340, T24, T1P); + T27 = FMA(KP979740652, T26, T25); + T2b = FNMS(KP979740652, T26, T25); + R1[WS(rs, 8)] = -(FMA(KP1_606007150, T2a, T27)); + R0[WS(rs, 6)] = FNMS(KP1_606007150, T2a, T27); + R1[WS(rs, 3)] = -(FMA(KP1_666834356, T2c, T2b)); + R0[WS(rs, 11)] = FNMS(KP1_666834356, T2c, T2b); + } + } +} + +static const kr2c_desc desc = { 25, "r2cbIII_25", {32, 0, 120, 0}, &GENUS }; + +void X(codelet_r2cbIII_25) (planner *p) { + X(kr2c_register) (p, r2cbIII_25, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 25 -name r2cbIII_25 -dft-III -include r2cbIII.h */ + +/* + * This function contains 152 FP additions, 98 FP multiplications, + * (or, 100 additions, 46 multiplications, 52 fused multiply/add), + * 65 stack variables, 21 constants, and 50 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP248689887, +0.248689887164854788242283746006447968417567406); + DK(KP684547105, +0.684547105928688673732283357621209269889519233); + DK(KP728968627, +0.728968627421411523146730319055259111372571664); + DK(KP062790519, +0.062790519529313376076178224565631133122484832); + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP876306680, +0.876306680043863587308115903922062583399064238); + DK(KP481753674, +0.481753674101715274987191502872129653528542010); + DK(KP535826794, +0.535826794978996618271308767867639978063575346); + DK(KP844327925, +0.844327925502015078548558063966681505381659241); + DK(KP904827052, +0.904827052466019527713668647932697593970413911); + DK(KP425779291, +0.425779291565072648862502445744251703979973042); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); + DK(KP1_175570504, +1.175570504584946258337411909278145537195304875); + DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(100, rs), MAKE_VOLATILE_STRIDE(100, csr), MAKE_VOLATILE_STRIDE(100, csi)) { + E TS, T1O, T5, TP, T1N, TI, TH, Te, T17, T2h, T1y, T1V, T10, T2g, T1x; + E T1S, Tz, Ty, Tn, T1m, T2e, T1B, T22, T1f, T2d, T1A, T1Z, TQ, TR; + TQ = Ci[WS(csi, 2)]; + TR = Ci[WS(csi, 7)]; + TS = FNMS(KP1_175570504, TR, KP1_902113032 * TQ); + T1O = FMA(KP1_902113032, TR, KP1_175570504 * TQ); + { + E T1, T4, TN, T2, T3, TO; + T1 = Cr[WS(csr, 12)]; + T2 = Cr[WS(csr, 7)]; + T3 = Cr[WS(csr, 2)]; + T4 = T2 + T3; + TN = KP1_118033988 * (T3 - T2); + T5 = FMA(KP2_000000000, T4, T1); + TO = FMS(KP500000000, T4, T1); + TP = TN - TO; + T1N = TO + TN; + } + { + E T6, Td, T15, TU, T14, T11, TX, TY; + T6 = Cr[WS(csr, 11)]; + TI = Ci[WS(csi, 11)]; + { + E T7, T8, T9, Ta, Tb, Tc; + T7 = Cr[WS(csr, 6)]; + T8 = Cr[WS(csr, 8)]; + T9 = T7 + T8; + Ta = Cr[WS(csr, 1)]; + Tb = Cr[WS(csr, 3)]; + Tc = Ta + Tb; + Td = T9 + Tc; + T15 = Ta - Tb; + TU = KP559016994 * (Tc - T9); + T14 = T8 - T7; + } + { + E TB, TC, TD, TE, TF, TG; + TB = Ci[WS(csi, 6)]; + TC = Ci[WS(csi, 8)]; + TD = TB - TC; + TE = Ci[WS(csi, 1)]; + TF = Ci[WS(csi, 3)]; + TG = TE - TF; + TH = TD + TG; + T11 = KP559016994 * (TD - TG); + TX = TB + TC; + TY = TE + TF; + } + Te = T6 + Td; + { + E T16, T1T, T13, T1U, T12; + T16 = FMA(KP587785252, T14, KP951056516 * T15); + T1T = FNMS(KP587785252, T15, KP951056516 * T14); + T12 = FNMS(KP250000000, TH, TI); + T13 = T11 - T12; + T1U = T11 + T12; + T17 = T13 - T16; + T2h = T1T - T1U; + T1y = T16 + T13; + T1V = T1T + T1U; + } + { + E TZ, T1R, TW, T1Q, TV; + TZ = FNMS(KP951056516, TY, KP587785252 * TX); + T1R = FMA(KP951056516, TX, KP587785252 * TY); + TV = FMS(KP250000000, Td, T6); + TW = TU - TV; + T1Q = TV + TU; + T10 = TW + TZ; + T2g = T1Q + T1R; + T1x = TZ - TW; + T1S = T1Q - T1R; + } + } + { + E Tf, Tm, T1k, T19, T1j, T1g, T1c, T1d; + Tf = Cr[WS(csr, 10)]; + Tz = Ci[WS(csi, 10)]; + { + E Tg, Th, Ti, Tj, Tk, Tl; + Tg = Cr[WS(csr, 5)]; + Th = Cr[WS(csr, 9)]; + Ti = Tg + Th; + Tj = Cr[0]; + Tk = Cr[WS(csr, 4)]; + Tl = Tj + Tk; + Tm = Ti + Tl; + T1k = Tj - Tk; + T19 = KP559016994 * (Tl - Ti); + T1j = Th - Tg; + } + { + E Ts, Tt, Tu, Tv, Tw, Tx; + Ts = Ci[WS(csi, 4)]; + Tt = Ci[0]; + Tu = Ts - Tt; + Tv = Ci[WS(csi, 5)]; + Tw = Ci[WS(csi, 9)]; + Tx = Tv - Tw; + Ty = Tu - Tx; + T1g = KP559016994 * (Tx + Tu); + T1c = Tv + Tw; + T1d = Tt + Ts; + } + Tn = Tf + Tm; + { + E T1l, T20, T1i, T21, T1h; + T1l = FMA(KP587785252, T1j, KP951056516 * T1k); + T20 = FNMS(KP587785252, T1k, KP951056516 * T1j); + T1h = FMA(KP250000000, Ty, Tz); + T1i = T1g - T1h; + T21 = T1g + T1h; + T1m = T1i - T1l; + T2e = T21 - T20; + T1B = T1l + T1i; + T22 = T20 + T21; + } + { + E T1e, T1Y, T1b, T1X, T1a; + T1e = FNMS(KP951056516, T1d, KP587785252 * T1c); + T1Y = FMA(KP951056516, T1c, KP587785252 * T1d); + T1a = FMS(KP250000000, Tm, Tf); + T1b = T19 - T1a; + T1X = T1a + T19; + T1f = T1b + T1e; + T2d = T1X + T1Y; + T1A = T1e - T1b; + T1Z = T1X - T1Y; + } + } + { + E Tq, To, Tp, TK, TM, TA, TJ, TL, Tr; + Tq = KP1_118033988 * (Tn - Te); + To = Te + Tn; + Tp = FMS(KP500000000, To, T5); + TA = Ty - Tz; + TJ = TH + TI; + TK = FNMS(KP1_902113032, TJ, KP1_175570504 * TA); + TM = FMA(KP1_175570504, TJ, KP1_902113032 * TA); + R0[0] = FMA(KP2_000000000, To, T5); + TL = Tq - Tp; + R0[WS(rs, 5)] = TL + TM; + R1[WS(rs, 7)] = TM - TL; + Tr = Tp + Tq; + R1[WS(rs, 2)] = Tr + TK; + R0[WS(rs, 10)] = TK - Tr; + } + { + E T2q, T2s, T2k, T2j, T2l, T2m, T2r, T2n; + { + E T2o, T2p, T2f, T2i; + T2o = FNMS(KP904827052, T2d, KP425779291 * T2e); + T2p = FNMS(KP535826794, T2h, KP844327925 * T2g); + T2q = FNMS(KP1_902113032, T2p, KP1_175570504 * T2o); + T2s = FMA(KP1_175570504, T2p, KP1_902113032 * T2o); + T2k = T1N + T1O; + T2f = FMA(KP425779291, T2d, KP904827052 * T2e); + T2i = FMA(KP535826794, T2g, KP844327925 * T2h); + T2j = T2f - T2i; + T2l = FMA(KP500000000, T2j, T2k); + T2m = KP1_118033988 * (T2i + T2f); + } + R0[WS(rs, 2)] = FMS(KP2_000000000, T2j, T2k); + T2r = T2m - T2l; + R0[WS(rs, 7)] = T2r + T2s; + R1[WS(rs, 9)] = T2s - T2r; + T2n = T2l + T2m; + R1[WS(rs, 4)] = T2n + T2q; + R0[WS(rs, 12)] = T2q - T2n; + } + { + E T1u, T1w, TT, T1o, T1p, T1q, T1v, T1r; + { + E T1s, T1t, T18, T1n; + T1s = FMA(KP481753674, T10, KP876306680 * T17); + T1t = FMA(KP844327925, T1f, KP535826794 * T1m); + T1u = FMA(KP1_902113032, T1s, KP1_175570504 * T1t); + T1w = FNMS(KP1_175570504, T1s, KP1_902113032 * T1t); + TT = TP - TS; + T18 = FNMS(KP481753674, T17, KP876306680 * T10); + T1n = FNMS(KP844327925, T1m, KP535826794 * T1f); + T1o = T18 + T1n; + T1p = FMS(KP500000000, T1o, TT); + T1q = KP1_118033988 * (T1n - T18); + } + R0[WS(rs, 1)] = FMA(KP2_000000000, T1o, TT); + T1v = T1q - T1p; + R0[WS(rs, 6)] = T1v + T1w; + R1[WS(rs, 8)] = T1w - T1v; + T1r = T1p + T1q; + R1[WS(rs, 3)] = T1r + T1u; + R0[WS(rs, 11)] = T1u - T1r; + } + { + E T1H, T1L, T1E, T1D, T1I, T1J, T1M, T1K; + { + E T1F, T1G, T1z, T1C; + T1F = FNMS(KP062790519, T1B, KP998026728 * T1A); + T1G = FNMS(KP684547105, T1x, KP728968627 * T1y); + T1H = FNMS(KP1_902113032, T1G, KP1_175570504 * T1F); + T1L = FMA(KP1_175570504, T1G, KP1_902113032 * T1F); + T1E = TP + TS; + T1z = FMA(KP728968627, T1x, KP684547105 * T1y); + T1C = FMA(KP062790519, T1A, KP998026728 * T1B); + T1D = T1z + T1C; + T1I = FMA(KP500000000, T1D, T1E); + T1J = KP1_118033988 * (T1C - T1z); + } + R1[WS(rs, 1)] = FMS(KP2_000000000, T1D, T1E); + T1M = T1J - T1I; + R0[WS(rs, 9)] = T1L - T1M; + R1[WS(rs, 6)] = T1L + T1M; + T1K = T1I + T1J; + R1[WS(rs, 11)] = T1H - T1K; + R0[WS(rs, 4)] = T1H + T1K; + } + { + E T2a, T2c, T1P, T24, T25, T26, T2b, T27; + { + E T28, T29, T1W, T23; + T28 = FMA(KP248689887, T1S, KP968583161 * T1V); + T29 = FMA(KP481753674, T1Z, KP876306680 * T22); + T2a = FMA(KP1_902113032, T28, KP1_175570504 * T29); + T2c = FNMS(KP1_175570504, T28, KP1_902113032 * T29); + T1P = T1N - T1O; + T1W = FNMS(KP248689887, T1V, KP968583161 * T1S); + T23 = FNMS(KP481753674, T22, KP876306680 * T1Z); + T24 = T1W + T23; + T25 = FMS(KP500000000, T24, T1P); + T26 = KP1_118033988 * (T23 - T1W); + } + R1[0] = FMA(KP2_000000000, T24, T1P); + T2b = T26 - T25; + R1[WS(rs, 5)] = T2b + T2c; + R0[WS(rs, 8)] = T2c - T2b; + T27 = T25 + T26; + R0[WS(rs, 3)] = T27 + T2a; + R1[WS(rs, 10)] = T2a - T27; + } + } + } +} + +static const kr2c_desc desc = { 25, "r2cbIII_25", {100, 46, 52, 0}, &GENUS }; + +void X(codelet_r2cbIII_25) (planner *p) { + X(kr2c_register) (p, r2cbIII_25, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:31 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 3 -name r2cbIII_3 -dft-III -include r2cbIII.h */ + +/* + * This function contains 4 FP additions, 3 FP multiplications, + * (or, 1 additions, 0 multiplications, 3 fused multiply/add), + * 7 stack variables, 2 constants, and 6 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_3(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(12, rs), MAKE_VOLATILE_STRIDE(12, csr), MAKE_VOLATILE_STRIDE(12, csi)) { + E T4, T1, T2, T3; + T4 = Ci[0]; + T1 = Cr[WS(csr, 1)]; + T2 = Cr[0]; + R0[0] = FMA(KP2_000000000, T2, T1); + T3 = T2 - T1; + R1[0] = FNMS(KP1_732050807, T4, T3); + R0[WS(rs, 1)] = -(FMA(KP1_732050807, T4, T3)); + } + } +} + +static const kr2c_desc desc = { 3, "r2cbIII_3", {1, 0, 3, 0}, &GENUS }; + +void X(codelet_r2cbIII_3) (planner *p) { + X(kr2c_register) (p, r2cbIII_3, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 3 -name r2cbIII_3 -dft-III -include r2cbIII.h */ + +/* + * This function contains 4 FP additions, 2 FP multiplications, + * (or, 3 additions, 1 multiplications, 1 fused multiply/add), + * 8 stack variables, 2 constants, and 6 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_3(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(12, rs), MAKE_VOLATILE_STRIDE(12, csr), MAKE_VOLATILE_STRIDE(12, csi)) { + E T5, T1, T2, T3, T4; + T4 = Ci[0]; + T5 = KP1_732050807 * T4; + T1 = Cr[WS(csr, 1)]; + T2 = Cr[0]; + T3 = T2 - T1; + R0[0] = FMA(KP2_000000000, T2, T1); + R0[WS(rs, 1)] = -(T3 + T5); + R1[0] = T3 - T5; + } + } +} + +static const kr2c_desc desc = { 3, "r2cbIII_3", {3, 1, 1, 0}, &GENUS }; + +void X(codelet_r2cbIII_3) (planner *p) { + X(kr2c_register) (p, r2cbIII_3, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,695 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:35 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -name r2cbIII_32 -dft-III -include r2cbIII.h */ + +/* + * This function contains 174 FP additions, 100 FP multiplications, + * (or, 106 additions, 32 multiplications, 68 fused multiply/add), + * 101 stack variables, 18 constants, and 64 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP534511135, +0.534511135950791641089685961295362908582039528); + DK(KP1_763842528, +1.763842528696710059425513727320776699016885241); + DK(KP303346683, +0.303346683607342391675883946941299872384187453); + DK(KP1_913880671, +1.913880671464417729871595773960539938965698411); + DK(KP098491403, +0.098491403357164253077197521291327432293052451); + DK(KP1_990369453, +1.990369453344393772489673906218959843150949737); + DK(KP820678790, +0.820678790828660330972281985331011598767386482); + DK(KP1_546020906, +1.546020906725473921621813219516939601942082586); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) { + E T1N, T1K, T1Q, T1H, T1O, T1P; + { + E T1I, T1e, T1Z, T7, T2E, T2i, T1x, Tz, Te, T2j, T22, T2F, T1h, T1y, TK; + E T1J, Tm, T2B, TX, Tp, T2m, T28, T1M, T1C, T1k, TW, TY, T2a, T14, T15; + E Ts, TZ; + { + E TE, T1g, TJ, T1f; + { + E T4, Tv, T3, T2g, T1d, T5, Tw, Tx; + { + E T1, T2, T1b, T1c; + T1 = Cr[0]; + T2 = Cr[WS(csr, 15)]; + T1b = Ci[0]; + T1c = Ci[WS(csi, 15)]; + T4 = Cr[WS(csr, 8)]; + Tv = T1 - T2; + T3 = T1 + T2; + T2g = T1c - T1b; + T1d = T1b + T1c; + T5 = Cr[WS(csr, 7)]; + Tw = Ci[WS(csi, 8)]; + Tx = Ci[WS(csi, 7)]; + } + { + E Tb, TA, Ta, T20, TD, Tc, TG, TH; + { + E T8, T9, TB, TC; + T8 = Cr[WS(csr, 4)]; + { + E T1a, T6, T2h, Ty; + T1a = T4 - T5; + T6 = T4 + T5; + T2h = Tx - Tw; + Ty = Tw + Tx; + T1I = T1a - T1d; + T1e = T1a + T1d; + T1Z = T3 - T6; + T7 = T3 + T6; + T2E = T2h + T2g; + T2i = T2g - T2h; + T1x = Tv + Ty; + Tz = Tv - Ty; + T9 = Cr[WS(csr, 11)]; + } + TB = Ci[WS(csi, 4)]; + TC = Ci[WS(csi, 11)]; + Tb = Cr[WS(csr, 3)]; + TA = T8 - T9; + Ta = T8 + T9; + T20 = TC - TB; + TD = TB + TC; + Tc = Cr[WS(csr, 12)]; + TG = Ci[WS(csi, 3)]; + TH = Ci[WS(csi, 12)]; + } + { + E TF, Td, T21, TI; + TE = TA - TD; + T1g = TA + TD; + TF = Tb - Tc; + Td = Tb + Tc; + T21 = TG - TH; + TI = TG + TH; + Te = Ta + Td; + T2j = Ta - Td; + T22 = T20 - T21; + T2F = T20 + T21; + TJ = TF - TI; + T1f = TF + TI; + } + } + } + { + E TM, Ti, TN, T25, TU, TR, Tl, TO; + { + E TS, TT, Tg, Th, Tj, Tk; + Tg = Cr[WS(csr, 2)]; + Th = Cr[WS(csr, 13)]; + T1h = T1f - T1g; + T1y = T1g + T1f; + TK = TE + TJ; + T1J = TE - TJ; + TM = Tg - Th; + Ti = Tg + Th; + TS = Ci[WS(csi, 2)]; + TT = Ci[WS(csi, 13)]; + Tj = Cr[WS(csr, 10)]; + Tk = Cr[WS(csr, 5)]; + TN = Ci[WS(csi, 10)]; + T25 = TS - TT; + TU = TS + TT; + TR = Tj - Tk; + Tl = Tj + Tk; + TO = Ci[WS(csi, 5)]; + } + { + E T12, T13, Tq, Tr; + { + E Tn, T1A, TV, T24, T26, TP, To, T27, T1B, TQ; + Tn = Cr[WS(csr, 1)]; + T1A = TR - TU; + TV = TR + TU; + T24 = Ti - Tl; + Tm = Ti + Tl; + T26 = TN - TO; + TP = TN + TO; + To = Cr[WS(csr, 14)]; + T12 = Ci[WS(csi, 1)]; + T27 = T25 - T26; + T2B = T26 + T25; + T1B = TM + TP; + TQ = TM - TP; + TX = Tn - To; + Tp = Tn + To; + T2m = T24 + T27; + T28 = T24 - T27; + T1M = FNMS(KP414213562, T1A, T1B); + T1C = FMA(KP414213562, T1B, T1A); + T1k = FMA(KP414213562, TQ, TV); + TW = FNMS(KP414213562, TV, TQ); + T13 = Ci[WS(csi, 14)]; + } + Tq = Cr[WS(csr, 6)]; + Tr = Cr[WS(csr, 9)]; + TY = Ci[WS(csi, 6)]; + T2a = T13 - T12; + T14 = T12 + T13; + T15 = Tq - Tr; + Ts = Tq + Tr; + TZ = Ci[WS(csi, 9)]; + } + } + } + { + E T1L, T1F, T23, T2n, T2k, T2e, T1p, T1t, T1s, T1i, T1o, T19, T1l, T1q; + { + E T2z, T2G, T2H, T2C, T1j, T17, T2r, T2s, T2u, T2v, T2K, T2D; + { + E T2L, T2d, T2l, T2O; + { + E Tf, T2N, Tu, T2M; + { + E T1D, T16, T29, Tt, T2b, T10; + T2z = T7 - Te; + Tf = T7 + Te; + T1D = T15 + T14; + T16 = T14 - T15; + T29 = Tp - Ts; + Tt = Tp + Ts; + T2b = TY - TZ; + T10 = TY + TZ; + T2N = T2F + T2E; + T2G = T2E - T2F; + T2H = Tm - Tt; + Tu = Tm + Tt; + { + E T2c, T2A, T1E, T11; + T2c = T2a - T2b; + T2A = T2b + T2a; + T1E = TX + T10; + T11 = TX - T10; + T2L = Tf - Tu; + T2d = T29 + T2c; + T2l = T29 - T2c; + T2C = T2A - T2B; + T2M = T2B + T2A; + T1L = FMA(KP414213562, T1D, T1E); + T1F = FNMS(KP414213562, T1E, T1D); + T1j = FMA(KP414213562, T11, T16); + T17 = FNMS(KP414213562, T16, T11); + T2O = T2M + T2N; + } + } + R0[0] = KP2_000000000 * (Tf + Tu); + R0[WS(rs, 8)] = KP2_000000000 * (T2N - T2M); + } + T23 = T1Z + T22; + T2r = T1Z - T22; + R0[WS(rs, 12)] = KP1_414213562 * (T2O - T2L); + R0[WS(rs, 4)] = KP1_414213562 * (T2L + T2O); + T2s = T2m + T2l; + T2n = T2l - T2m; + T2k = T2i - T2j; + T2u = T2j + T2i; + T2v = T28 - T2d; + T2e = T28 + T2d; + } + { + E T2y, T2t, T2x, T2w; + T2y = FMA(KP707106781, T2s, T2r); + T2t = FNMS(KP707106781, T2s, T2r); + T2x = FMA(KP707106781, T2v, T2u); + T2w = FNMS(KP707106781, T2v, T2u); + R0[WS(rs, 7)] = KP1_961570560 * (FMA(KP198912367, T2y, T2x)); + R0[WS(rs, 15)] = -(KP1_961570560 * (FNMS(KP198912367, T2x, T2y))); + R0[WS(rs, 11)] = KP1_662939224 * (FNMS(KP668178637, T2t, T2w)); + R0[WS(rs, 3)] = KP1_662939224 * (FMA(KP668178637, T2w, T2t)); + T2K = T2z - T2C; + T2D = T2z + T2C; + } + { + E TL, T18, T2J, T2I; + T1p = FNMS(KP707106781, TK, Tz); + TL = FMA(KP707106781, TK, Tz); + T18 = TW + T17; + T1t = TW - T17; + T1s = FMA(KP707106781, T1h, T1e); + T1i = FNMS(KP707106781, T1h, T1e); + T2J = T2H + T2G; + T2I = T2G - T2H; + T1o = FNMS(KP923879532, T18, TL); + T19 = FMA(KP923879532, T18, TL); + R0[WS(rs, 6)] = KP1_847759065 * (FMA(KP414213562, T2K, T2J)); + R0[WS(rs, 14)] = -(KP1_847759065 * (FNMS(KP414213562, T2J, T2K))); + R0[WS(rs, 10)] = KP1_847759065 * (FNMS(KP414213562, T2D, T2I)); + R0[WS(rs, 2)] = KP1_847759065 * (FMA(KP414213562, T2I, T2D)); + T1l = T1j - T1k; + T1q = T1k + T1j; + } + } + { + E T1z, T1U, T1Y, T1T, T1V, T1G; + { + E T1w, T1r, T1n, T1m; + T1n = FMA(KP923879532, T1l, T1i); + T1m = FNMS(KP923879532, T1l, T1i); + T1w = FMA(KP923879532, T1q, T1p); + T1r = FNMS(KP923879532, T1q, T1p); + R1[WS(rs, 4)] = -(KP1_546020906 * (FNMS(KP820678790, T1o, T1n))); + R1[WS(rs, 12)] = -(KP1_546020906 * (FMA(KP820678790, T1n, T1o))); + R1[WS(rs, 8)] = -(KP1_990369453 * (FMA(KP098491403, T19, T1m))); + R1[0] = KP1_990369453 * (FNMS(KP098491403, T1m, T19)); + { + E T1R, T1S, T1v, T1u; + T1z = FNMS(KP707106781, T1y, T1x); + T1R = FMA(KP707106781, T1y, T1x); + T1S = T1M + T1L; + T1N = T1L - T1M; + T1K = FNMS(KP707106781, T1J, T1I); + T1U = FMA(KP707106781, T1J, T1I); + T1v = FNMS(KP923879532, T1t, T1s); + T1u = FMA(KP923879532, T1t, T1s); + T1Y = FMA(KP923879532, T1S, T1R); + T1T = FNMS(KP923879532, T1S, T1R); + R1[WS(rs, 6)] = -(KP1_913880671 * (FNMS(KP303346683, T1w, T1v))); + R1[WS(rs, 14)] = -(KP1_913880671 * (FMA(KP303346683, T1v, T1w))); + R1[WS(rs, 10)] = -(KP1_763842528 * (FMA(KP534511135, T1r, T1u))); + R1[WS(rs, 2)] = KP1_763842528 * (FNMS(KP534511135, T1u, T1r)); + T1V = T1C + T1F; + T1G = T1C - T1F; + } + } + { + E T2q, T2f, T1X, T1W, T2p, T2o; + T1X = FMA(KP923879532, T1V, T1U); + T1W = FNMS(KP923879532, T1V, T1U); + T2q = FNMS(KP707106781, T2e, T23); + T2f = FMA(KP707106781, T2e, T23); + R1[WS(rs, 7)] = KP1_990369453 * (FMA(KP098491403, T1Y, T1X)); + R1[WS(rs, 15)] = -(KP1_990369453 * (FNMS(KP098491403, T1X, T1Y))); + R1[WS(rs, 11)] = KP1_546020906 * (FNMS(KP820678790, T1T, T1W)); + R1[WS(rs, 3)] = KP1_546020906 * (FMA(KP820678790, T1W, T1T)); + T2p = FNMS(KP707106781, T2n, T2k); + T2o = FMA(KP707106781, T2n, T2k); + T1Q = FNMS(KP923879532, T1G, T1z); + T1H = FMA(KP923879532, T1G, T1z); + R0[WS(rs, 5)] = KP1_662939224 * (FMA(KP668178637, T2q, T2p)); + R0[WS(rs, 13)] = -(KP1_662939224 * (FNMS(KP668178637, T2p, T2q))); + R0[WS(rs, 9)] = KP1_961570560 * (FNMS(KP198912367, T2f, T2o)); + R0[WS(rs, 1)] = KP1_961570560 * (FMA(KP198912367, T2o, T2f)); + } + } + } + } + T1O = FMA(KP923879532, T1N, T1K); + T1P = FNMS(KP923879532, T1N, T1K); + R1[WS(rs, 5)] = KP1_763842528 * (FMA(KP534511135, T1Q, T1P)); + R1[WS(rs, 13)] = -(KP1_763842528 * (FNMS(KP534511135, T1P, T1Q))); + R1[WS(rs, 9)] = KP1_913880671 * (FNMS(KP303346683, T1H, T1O)); + R1[WS(rs, 1)] = KP1_913880671 * (FMA(KP303346683, T1O, T1H)); + } + } +} + +static const kr2c_desc desc = { 32, "r2cbIII_32", {106, 32, 68, 0}, &GENUS }; + +void X(codelet_r2cbIII_32) (planner *p) { + X(kr2c_register) (p, r2cbIII_32, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -name r2cbIII_32 -dft-III -include r2cbIII.h */ + +/* + * This function contains 174 FP additions, 84 FP multiplications, + * (or, 138 additions, 48 multiplications, 36 fused multiply/add), + * 66 stack variables, 19 constants, and 64 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_913880671, +1.913880671464417729871595773960539938965698411); + DK(KP580569354, +0.580569354508924735272384751634790549382952557); + DK(KP942793473, +0.942793473651995297112775251810508755314920638); + DK(KP1_763842528, +1.763842528696710059425513727320776699016885241); + DK(KP1_546020906, +1.546020906725473921621813219516939601942082586); + DK(KP1_268786568, +1.268786568327290996430343226450986741351374190); + DK(KP196034280, +0.196034280659121203988391127777283691722273346); + DK(KP1_990369453, +1.990369453344393772489673906218959843150949737); + DK(KP765366864, +0.765366864730179543456919968060797733522689125); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); + DK(KP390180644, +0.390180644032256535696569736954044481855383236); + DK(KP1_111140466, +1.111140466039204449485661627897065748749874382); + DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) { + E T7, T2i, T2F, Tz, T1k, T1I, T1Z, T1x, Te, T22, T2E, T2j, T1f, T1y, TK; + E T1J, Tm, T2B, TW, T1a, T1C, T1L, T28, T2l, Tt, T2A, T17, T1b, T1F, T1M; + E T2d, T2m; + { + E T3, Tv, T1j, T2h, T6, T1g, Ty, T2g; + { + E T1, T2, T1h, T1i; + T1 = Cr[0]; + T2 = Cr[WS(csr, 15)]; + T3 = T1 + T2; + Tv = T1 - T2; + T1h = Ci[0]; + T1i = Ci[WS(csi, 15)]; + T1j = T1h + T1i; + T2h = T1i - T1h; + } + { + E T4, T5, Tw, Tx; + T4 = Cr[WS(csr, 8)]; + T5 = Cr[WS(csr, 7)]; + T6 = T4 + T5; + T1g = T4 - T5; + Tw = Ci[WS(csi, 8)]; + Tx = Ci[WS(csi, 7)]; + Ty = Tw + Tx; + T2g = Tw - Tx; + } + T7 = T3 + T6; + T2i = T2g + T2h; + T2F = T2h - T2g; + Tz = Tv - Ty; + T1k = T1g + T1j; + T1I = T1g - T1j; + T1Z = T3 - T6; + T1x = Tv + Ty; + } + { + E Ta, TA, TD, T21, Td, TF, TI, T20; + { + E T8, T9, TB, TC; + T8 = Cr[WS(csr, 4)]; + T9 = Cr[WS(csr, 11)]; + Ta = T8 + T9; + TA = T8 - T9; + TB = Ci[WS(csi, 4)]; + TC = Ci[WS(csi, 11)]; + TD = TB + TC; + T21 = TB - TC; + } + { + E Tb, Tc, TG, TH; + Tb = Cr[WS(csr, 3)]; + Tc = Cr[WS(csr, 12)]; + Td = Tb + Tc; + TF = Tb - Tc; + TG = Ci[WS(csi, 3)]; + TH = Ci[WS(csi, 12)]; + TI = TG + TH; + T20 = TH - TG; + } + Te = Ta + Td; + T22 = T20 - T21; + T2E = T21 + T20; + T2j = Ta - Td; + { + E T1d, T1e, TE, TJ; + T1d = TA + TD; + T1e = TF + TI; + T1f = KP707106781 * (T1d - T1e); + T1y = KP707106781 * (T1d + T1e); + TE = TA - TD; + TJ = TF - TI; + TK = KP707106781 * (TE + TJ); + T1J = KP707106781 * (TE - TJ); + } + } + { + E Ti, TM, TU, T25, Tl, TR, TP, T26, TQ, TV; + { + E Tg, Th, TS, TT; + Tg = Cr[WS(csr, 2)]; + Th = Cr[WS(csr, 13)]; + Ti = Tg + Th; + TM = Tg - Th; + TS = Ci[WS(csi, 2)]; + TT = Ci[WS(csi, 13)]; + TU = TS + TT; + T25 = TS - TT; + } + { + E Tj, Tk, TN, TO; + Tj = Cr[WS(csr, 10)]; + Tk = Cr[WS(csr, 5)]; + Tl = Tj + Tk; + TR = Tj - Tk; + TN = Ci[WS(csi, 10)]; + TO = Ci[WS(csi, 5)]; + TP = TN + TO; + T26 = TN - TO; + } + Tm = Ti + Tl; + T2B = T26 + T25; + TQ = TM - TP; + TV = TR + TU; + TW = FNMS(KP382683432, TV, KP923879532 * TQ); + T1a = FMA(KP382683432, TQ, KP923879532 * TV); + { + E T1A, T1B, T24, T27; + T1A = TM + TP; + T1B = TU - TR; + T1C = FNMS(KP923879532, T1B, KP382683432 * T1A); + T1L = FMA(KP923879532, T1A, KP382683432 * T1B); + T24 = Ti - Tl; + T27 = T25 - T26; + T28 = T24 - T27; + T2l = T24 + T27; + } + } + { + E Tp, TX, T15, T2a, Ts, T12, T10, T2b, T11, T16; + { + E Tn, To, T13, T14; + Tn = Cr[WS(csr, 1)]; + To = Cr[WS(csr, 14)]; + Tp = Tn + To; + TX = Tn - To; + T13 = Ci[WS(csi, 1)]; + T14 = Ci[WS(csi, 14)]; + T15 = T13 + T14; + T2a = T14 - T13; + } + { + E Tq, Tr, TY, TZ; + Tq = Cr[WS(csr, 6)]; + Tr = Cr[WS(csr, 9)]; + Ts = Tq + Tr; + T12 = Tq - Tr; + TY = Ci[WS(csi, 6)]; + TZ = Ci[WS(csi, 9)]; + T10 = TY + TZ; + T2b = TY - TZ; + } + Tt = Tp + Ts; + T2A = T2b + T2a; + T11 = TX - T10; + T16 = T12 - T15; + T17 = FMA(KP923879532, T11, KP382683432 * T16); + T1b = FNMS(KP382683432, T11, KP923879532 * T16); + { + E T1D, T1E, T29, T2c; + T1D = TX + T10; + T1E = T12 + T15; + T1F = FNMS(KP923879532, T1E, KP382683432 * T1D); + T1M = FMA(KP923879532, T1D, KP382683432 * T1E); + T29 = Tp - Ts; + T2c = T2a - T2b; + T2d = T29 + T2c; + T2m = T2c - T29; + } + } + { + E Tf, Tu, T2L, T2M, T2N, T2O; + Tf = T7 + Te; + Tu = Tm + Tt; + T2L = Tf - Tu; + T2M = T2B + T2A; + T2N = T2F - T2E; + T2O = T2M + T2N; + R0[0] = KP2_000000000 * (Tf + Tu); + R0[WS(rs, 8)] = KP2_000000000 * (T2N - T2M); + R0[WS(rs, 4)] = KP1_414213562 * (T2L + T2O); + R0[WS(rs, 12)] = KP1_414213562 * (T2O - T2L); + } + { + E T2t, T2x, T2w, T2y; + { + E T2r, T2s, T2u, T2v; + T2r = T1Z - T22; + T2s = KP707106781 * (T2m - T2l); + T2t = T2r + T2s; + T2x = T2r - T2s; + T2u = T2j + T2i; + T2v = KP707106781 * (T28 - T2d); + T2w = T2u - T2v; + T2y = T2v + T2u; + } + R0[WS(rs, 3)] = FMA(KP1_662939224, T2t, KP1_111140466 * T2w); + R0[WS(rs, 15)] = FNMS(KP1_961570560, T2x, KP390180644 * T2y); + R0[WS(rs, 11)] = FNMS(KP1_111140466, T2t, KP1_662939224 * T2w); + R0[WS(rs, 7)] = FMA(KP390180644, T2x, KP1_961570560 * T2y); + } + { + E T2D, T2J, T2I, T2K; + { + E T2z, T2C, T2G, T2H; + T2z = T7 - Te; + T2C = T2A - T2B; + T2D = T2z + T2C; + T2J = T2z - T2C; + T2G = T2E + T2F; + T2H = Tm - Tt; + T2I = T2G - T2H; + T2K = T2H + T2G; + } + R0[WS(rs, 2)] = FMA(KP1_847759065, T2D, KP765366864 * T2I); + R0[WS(rs, 14)] = FNMS(KP1_847759065, T2J, KP765366864 * T2K); + R0[WS(rs, 10)] = FNMS(KP765366864, T2D, KP1_847759065 * T2I); + R0[WS(rs, 6)] = FMA(KP765366864, T2J, KP1_847759065 * T2K); + } + { + E T19, T1n, T1m, T1o; + { + E TL, T18, T1c, T1l; + TL = Tz + TK; + T18 = TW + T17; + T19 = TL + T18; + T1n = TL - T18; + T1c = T1a + T1b; + T1l = T1f + T1k; + T1m = T1c + T1l; + T1o = T1c - T1l; + } + R1[0] = FNMS(KP196034280, T1m, KP1_990369453 * T19); + R1[WS(rs, 12)] = FNMS(KP1_546020906, T1n, KP1_268786568 * T1o); + R1[WS(rs, 8)] = -(FMA(KP196034280, T19, KP1_990369453 * T1m)); + R1[WS(rs, 4)] = FMA(KP1_268786568, T1n, KP1_546020906 * T1o); + } + { + E T1r, T1v, T1u, T1w; + { + E T1p, T1q, T1s, T1t; + T1p = Tz - TK; + T1q = T1b - T1a; + T1r = T1p + T1q; + T1v = T1p - T1q; + T1s = T1f - T1k; + T1t = TW - T17; + T1u = T1s - T1t; + T1w = T1t + T1s; + } + R1[WS(rs, 2)] = FMA(KP1_763842528, T1r, KP942793473 * T1u); + R1[WS(rs, 14)] = FNMS(KP1_913880671, T1v, KP580569354 * T1w); + R1[WS(rs, 10)] = FNMS(KP942793473, T1r, KP1_763842528 * T1u); + R1[WS(rs, 6)] = FMA(KP580569354, T1v, KP1_913880671 * T1w); + } + { + E T1T, T1X, T1W, T1Y; + { + E T1R, T1S, T1U, T1V; + T1R = T1x + T1y; + T1S = T1L + T1M; + T1T = T1R - T1S; + T1X = T1R + T1S; + T1U = T1J + T1I; + T1V = T1C - T1F; + T1W = T1U - T1V; + T1Y = T1V + T1U; + } + R1[WS(rs, 3)] = FMA(KP1_546020906, T1T, KP1_268786568 * T1W); + R1[WS(rs, 15)] = FNMS(KP1_990369453, T1X, KP196034280 * T1Y); + R1[WS(rs, 11)] = FNMS(KP1_268786568, T1T, KP1_546020906 * T1W); + R1[WS(rs, 7)] = FMA(KP196034280, T1X, KP1_990369453 * T1Y); + } + { + E T2f, T2p, T2o, T2q; + { + E T23, T2e, T2k, T2n; + T23 = T1Z + T22; + T2e = KP707106781 * (T28 + T2d); + T2f = T23 + T2e; + T2p = T23 - T2e; + T2k = T2i - T2j; + T2n = KP707106781 * (T2l + T2m); + T2o = T2k - T2n; + T2q = T2n + T2k; + } + R0[WS(rs, 1)] = FMA(KP1_961570560, T2f, KP390180644 * T2o); + R0[WS(rs, 13)] = FNMS(KP1_662939224, T2p, KP1_111140466 * T2q); + R0[WS(rs, 9)] = FNMS(KP390180644, T2f, KP1_961570560 * T2o); + R0[WS(rs, 5)] = FMA(KP1_111140466, T2p, KP1_662939224 * T2q); + } + { + E T1H, T1P, T1O, T1Q; + { + E T1z, T1G, T1K, T1N; + T1z = T1x - T1y; + T1G = T1C + T1F; + T1H = T1z + T1G; + T1P = T1z - T1G; + T1K = T1I - T1J; + T1N = T1L - T1M; + T1O = T1K - T1N; + T1Q = T1N + T1K; + } + R1[WS(rs, 1)] = FMA(KP1_913880671, T1H, KP580569354 * T1O); + R1[WS(rs, 13)] = FNMS(KP1_763842528, T1P, KP942793473 * T1Q); + R1[WS(rs, 9)] = FNMS(KP580569354, T1H, KP1_913880671 * T1O); + R1[WS(rs, 5)] = FMA(KP942793473, T1P, KP1_763842528 * T1Q); + } + } + } +} + +static const kr2c_desc desc = { 32, "r2cbIII_32", {138, 48, 36, 0}, &GENUS }; + +void X(codelet_r2cbIII_32) (planner *p) { + X(kr2c_register) (p, r2cbIII_32, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:31 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 4 -name r2cbIII_4 -dft-III -include r2cbIII.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 6 additions, 4 multiplications, 0 fused multiply/add), + * 9 stack variables, 2 constants, and 8 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_4(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(16, csr), MAKE_VOLATILE_STRIDE(16, csi)) { + E T1, T2, T4, T5, T3, T6; + T1 = Cr[0]; + T2 = Cr[WS(csr, 1)]; + T4 = Ci[0]; + T5 = Ci[WS(csi, 1)]; + R0[0] = KP2_000000000 * (T1 + T2); + T3 = T1 - T2; + R0[WS(rs, 1)] = KP2_000000000 * (T5 - T4); + T6 = T4 + T5; + R1[WS(rs, 1)] = -(KP1_414213562 * (T3 + T6)); + R1[0] = KP1_414213562 * (T3 - T6); + } + } +} + +static const kr2c_desc desc = { 4, "r2cbIII_4", {6, 4, 0, 0}, &GENUS }; + +void X(codelet_r2cbIII_4) (planner *p) { + X(kr2c_register) (p, r2cbIII_4, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 4 -name r2cbIII_4 -dft-III -include r2cbIII.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 6 additions, 4 multiplications, 0 fused multiply/add), + * 9 stack variables, 2 constants, and 8 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_4(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(16, csr), MAKE_VOLATILE_STRIDE(16, csi)) { + E T1, T2, T3, T4, T5, T6; + T1 = Cr[0]; + T2 = Cr[WS(csr, 1)]; + T3 = T1 - T2; + T4 = Ci[0]; + T5 = Ci[WS(csi, 1)]; + T6 = T4 + T5; + R0[0] = KP2_000000000 * (T1 + T2); + R0[WS(rs, 1)] = KP2_000000000 * (T5 - T4); + R1[0] = KP1_414213562 * (T3 - T6); + R1[WS(rs, 1)] = -(KP1_414213562 * (T3 + T6)); + } + } +} + +static const kr2c_desc desc = { 4, "r2cbIII_4", {6, 4, 0, 0}, &GENUS }; + +void X(codelet_r2cbIII_4) (planner *p) { + X(kr2c_register) (p, r2cbIII_4, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:31 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 5 -name r2cbIII_5 -dft-III -include r2cbIII.h */ + +/* + * This function contains 12 FP additions, 10 FP multiplications, + * (or, 2 additions, 0 multiplications, 10 fused multiply/add), + * 18 stack variables, 5 constants, and 10 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_5(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); + DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(20, rs), MAKE_VOLATILE_STRIDE(20, csr), MAKE_VOLATILE_STRIDE(20, csi)) { + E T1, T2, T3, Tc, Ta, T8, T9; + T8 = Ci[WS(csi, 1)]; + T9 = Ci[0]; + T1 = Cr[WS(csr, 2)]; + T2 = Cr[WS(csr, 1)]; + T3 = Cr[0]; + Tc = FMS(KP618033988, T8, T9); + Ta = FMA(KP618033988, T9, T8); + { + E T6, T4, T5, T7, Tb; + T6 = T3 - T2; + T4 = T2 + T3; + R0[0] = FMA(KP2_000000000, T4, T1); + T5 = FNMS(KP500000000, T4, T1); + T7 = FNMS(KP1_118033988, T6, T5); + Tb = FMA(KP1_118033988, T6, T5); + R0[WS(rs, 2)] = FNMS(KP1_902113032, Ta, T7); + R1[0] = -(FMA(KP1_902113032, Ta, T7)); + R1[WS(rs, 1)] = FMS(KP1_902113032, Tc, Tb); + R0[WS(rs, 1)] = FMA(KP1_902113032, Tc, Tb); + } + } + } +} + +static const kr2c_desc desc = { 5, "r2cbIII_5", {2, 0, 10, 0}, &GENUS }; + +void X(codelet_r2cbIII_5) (planner *p) { + X(kr2c_register) (p, r2cbIII_5, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 5 -name r2cbIII_5 -dft-III -include r2cbIII.h */ + +/* + * This function contains 12 FP additions, 7 FP multiplications, + * (or, 8 additions, 3 multiplications, 4 fused multiply/add), + * 18 stack variables, 5 constants, and 10 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_5(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP1_175570504, +1.175570504584946258337411909278145537195304875); + DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(20, rs), MAKE_VOLATILE_STRIDE(20, csr), MAKE_VOLATILE_STRIDE(20, csi)) { + E Ta, Tc, T1, T4, T5, T6, Tb, T7; + { + E T8, T9, T2, T3; + T8 = Ci[WS(csi, 1)]; + T9 = Ci[0]; + Ta = FMA(KP1_902113032, T8, KP1_175570504 * T9); + Tc = FNMS(KP1_902113032, T9, KP1_175570504 * T8); + T1 = Cr[WS(csr, 2)]; + T2 = Cr[WS(csr, 1)]; + T3 = Cr[0]; + T4 = T2 + T3; + T5 = FMS(KP500000000, T4, T1); + T6 = KP1_118033988 * (T3 - T2); + } + R0[0] = FMA(KP2_000000000, T4, T1); + Tb = T6 - T5; + R0[WS(rs, 1)] = Tb + Tc; + R1[WS(rs, 1)] = Tc - Tb; + T7 = T5 + T6; + R1[0] = T7 - Ta; + R0[WS(rs, 2)] = -(T7 + Ta); + } + } +} + +static const kr2c_desc desc = { 5, "r2cbIII_5", {8, 3, 4, 0}, &GENUS }; + +void X(codelet_r2cbIII_5) (planner *p) { + X(kr2c_register) (p, r2cbIII_5, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:31 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 6 -name r2cbIII_6 -dft-III -include r2cbIII.h */ + +/* + * This function contains 12 FP additions, 8 FP multiplications, + * (or, 6 additions, 2 multiplications, 6 fused multiply/add), + * 15 stack variables, 2 constants, and 12 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_6(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(24, rs), MAKE_VOLATILE_STRIDE(24, csr), MAKE_VOLATILE_STRIDE(24, csi)) { + E T1, T8, T2, T3, T5, T6; + T1 = Cr[WS(csr, 1)]; + T8 = Ci[WS(csi, 1)]; + T2 = Cr[WS(csr, 2)]; + T3 = Cr[0]; + T5 = Ci[WS(csi, 2)]; + T6 = Ci[0]; + { + E T4, Ta, T7, Tc, Tb, T9; + T4 = T2 + T3; + Ta = T2 - T3; + T7 = T5 + T6; + Tc = T5 - T6; + Tb = FNMS(KP2_000000000, T1, T4); + R0[0] = KP2_000000000 * (T1 + T4); + T9 = FMA(KP2_000000000, T8, T7); + R1[WS(rs, 1)] = KP2_000000000 * (T8 - T7); + R0[WS(rs, 2)] = FMS(KP1_732050807, Tc, Tb); + R0[WS(rs, 1)] = FMA(KP1_732050807, Tc, Tb); + R1[WS(rs, 2)] = FMS(KP1_732050807, Ta, T9); + R1[0] = -(FMA(KP1_732050807, Ta, T9)); + } + } + } +} + +static const kr2c_desc desc = { 6, "r2cbIII_6", {6, 2, 6, 0}, &GENUS }; + +void X(codelet_r2cbIII_6) (planner *p) { + X(kr2c_register) (p, r2cbIII_6, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 6 -name r2cbIII_6 -dft-III -include r2cbIII.h */ + +/* + * This function contains 12 FP additions, 6 FP multiplications, + * (or, 10 additions, 4 multiplications, 2 fused multiply/add), + * 15 stack variables, 2 constants, and 12 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_6(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(24, rs), MAKE_VOLATILE_STRIDE(24, csr), MAKE_VOLATILE_STRIDE(24, csi)) { + E T1, T6, T4, T5, T9, Tb, Ta, Tc; + T1 = Cr[WS(csr, 1)]; + T6 = Ci[WS(csi, 1)]; + { + E T2, T3, T7, T8; + T2 = Cr[WS(csr, 2)]; + T3 = Cr[0]; + T4 = T2 + T3; + T5 = KP1_732050807 * (T2 - T3); + T7 = Ci[WS(csi, 2)]; + T8 = Ci[0]; + T9 = T7 + T8; + Tb = KP1_732050807 * (T7 - T8); + } + R0[0] = KP2_000000000 * (T1 + T4); + R1[WS(rs, 1)] = KP2_000000000 * (T6 - T9); + Ta = FMA(KP2_000000000, T6, T9); + R1[0] = -(T5 + Ta); + R1[WS(rs, 2)] = T5 - Ta; + Tc = FMS(KP2_000000000, T1, T4); + R0[WS(rs, 1)] = Tb - Tc; + R0[WS(rs, 2)] = Tc + Tb; + } + } +} + +static const kr2c_desc desc = { 6, "r2cbIII_6", {10, 4, 2, 0}, &GENUS }; + +void X(codelet_r2cbIII_6) (planner *p) { + X(kr2c_register) (p, r2cbIII_6, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1545 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:35 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -name r2cbIII_64 -dft-III -include r2cbIII.h */ + +/* + * This function contains 434 FP additions, 260 FP multiplications, + * (or, 238 additions, 64 multiplications, 196 fused multiply/add), + * 165 stack variables, 36 constants, and 128 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP357805721, +0.357805721314524104672487743774474392487532769); + DK(KP1_883088130, +1.883088130366041556825018805199004714371179592); + DK(KP472964775, +0.472964775891319928124438237972992463904131113); + DK(KP1_807978586, +1.807978586246886663172400594461074097420264050); + DK(KP049126849, +0.049126849769467254105343321271313617079695752); + DK(KP1_997590912, +1.997590912410344785429543209518201388886407229); + DK(KP906347169, +0.906347169019147157946142717268914412664134293); + DK(KP1_481902250, +1.481902250709918182351233794990325459457910619); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP250486960, +0.250486960191305461595702160124721208578685568); + DK(KP1_940062506, +1.940062506389087985207968414572200502913731924); + DK(KP599376933, +0.599376933681923766271389869014404232837890546); + DK(KP1_715457220, +1.715457220000544139804539968569540274084981599); + DK(KP148335987, +0.148335987538347428753676511486911367000625355); + DK(KP1_978353019, +1.978353019929561946903347476032486127967379067); + DK(KP741650546, +0.741650546272035369581266691172079863842265220); + DK(KP1_606415062, +1.606415062961289819613353025926283847759138854); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP303346683, +0.303346683607342391675883946941299872384187453); + DK(KP1_913880671, +1.913880671464417729871595773960539938965698411); + DK(KP534511135, +0.534511135950791641089685961295362908582039528); + DK(KP1_763842528, +1.763842528696710059425513727320776699016885241); + DK(KP098491403, +0.098491403357164253077197521291327432293052451); + DK(KP1_990369453, +1.990369453344393772489673906218959843150949737); + DK(KP820678790, +0.820678790828660330972281985331011598767386482); + DK(KP1_546020906, +1.546020906725473921621813219516939601942082586); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) { + E T43, T4b, T49, T4e, T3T, T46, T40, T4a; + { + E T3t, T15, T2E, T3U, T6b, Tf, T6Q, T6u, T5J, T4L, T3V, T1g, T5U, T5q, T3u; + E T2H, T6v, Tu, T5r, T4V, T6R, T6e, T2K, T1s, T2J, T1D, T3X, T3B, T5s, T4Q; + E T3Y, T3y, T6g, TK, T5M, T57, T6N, T6j, T35, T1W, T34, T25, T4i, T3J, T5N; + E T52, T4j, T3G, T6l, TZ, T3L, T5P, T5i, T6M, T6o, T3M, T38, T2n, T37, T2w; + E T4l, T3Q, T5Q, T5d; + { + E T3x, T3w, T3E, T3F; + { + E T5p, T5o, T2G, T2F; + { + E T11, T3, T5m, T2D, T2A, T6, T5n, T14, Tb, T16, Ta, T4I, T19, Tc, T1c; + E T1d; + { + E T4, T5, T12, T13; + { + E T1, T2, T2B, T2C; + T1 = Cr[0]; + T2 = Cr[WS(csr, 31)]; + T2B = Ci[0]; + T2C = Ci[WS(csi, 31)]; + T4 = Cr[WS(csr, 16)]; + T11 = T1 - T2; + T3 = T1 + T2; + T5m = T2C - T2B; + T2D = T2B + T2C; + T5 = Cr[WS(csr, 15)]; + T12 = Ci[WS(csi, 16)]; + T13 = Ci[WS(csi, 15)]; + } + { + E T8, T9, T17, T18; + T8 = Cr[WS(csr, 8)]; + T2A = T4 - T5; + T6 = T4 + T5; + T5n = T13 - T12; + T14 = T12 + T13; + T9 = Cr[WS(csr, 23)]; + T17 = Ci[WS(csi, 8)]; + T18 = Ci[WS(csi, 23)]; + Tb = Cr[WS(csr, 7)]; + T16 = T8 - T9; + Ta = T8 + T9; + T4I = T18 - T17; + T19 = T17 + T18; + Tc = Cr[WS(csr, 24)]; + T1c = Ci[WS(csi, 7)]; + T1d = Ci[WS(csi, 24)]; + } + } + { + E T1b, T4J, T1e, T4H, T7, Te, Td; + T3t = T11 + T14; + T15 = T11 - T14; + T1b = Tb - Tc; + Td = Tb + Tc; + T4J = T1c - T1d; + T1e = T1c + T1d; + T2E = T2A + T2D; + T3U = T2A - T2D; + T4H = T3 - T6; + T7 = T3 + T6; + Te = Ta + Td; + T5p = Ta - Td; + { + E T4K, T6s, T6t, T1a, T1f; + T5o = T5m - T5n; + T6s = T5n + T5m; + T6t = T4I + T4J; + T4K = T4I - T4J; + T6b = T7 - Te; + Tf = T7 + Te; + T6Q = T6t + T6s; + T6u = T6s - T6t; + T2G = T16 + T19; + T1a = T16 - T19; + T1f = T1b - T1e; + T2F = T1b + T1e; + T5J = T4H - T4K; + T4L = T4H + T4K; + T3V = T1a - T1f; + T1g = T1a + T1f; + } + } + } + { + E T1i, Ti, T4O, T1q, T1n, Tl, T4N, T1l, Tq, T1t, Tp, T4T, T1A, Tr, T1u; + E T1v; + { + E Tj, Tk, T1j, T1k; + { + E Tg, Th, T1o, T1p; + Tg = Cr[WS(csr, 4)]; + T5U = T5p + T5o; + T5q = T5o - T5p; + T3u = T2G + T2F; + T2H = T2F - T2G; + Th = Cr[WS(csr, 27)]; + T1o = Ci[WS(csi, 4)]; + T1p = Ci[WS(csi, 27)]; + Tj = Cr[WS(csr, 20)]; + T1i = Tg - Th; + Ti = Tg + Th; + T4O = T1p - T1o; + T1q = T1o + T1p; + Tk = Cr[WS(csr, 11)]; + T1j = Ci[WS(csi, 20)]; + T1k = Ci[WS(csi, 11)]; + } + { + E Tn, To, T1y, T1z; + Tn = Cr[WS(csr, 3)]; + T1n = Tj - Tk; + Tl = Tj + Tk; + T4N = T1k - T1j; + T1l = T1j + T1k; + To = Cr[WS(csr, 28)]; + T1y = Ci[WS(csi, 3)]; + T1z = Ci[WS(csi, 28)]; + Tq = Cr[WS(csr, 12)]; + T1t = Tn - To; + Tp = Tn + To; + T4T = T1y - T1z; + T1A = T1y + T1z; + Tr = Cr[WS(csr, 19)]; + T1u = Ci[WS(csi, 12)]; + T1v = Ci[WS(csi, 19)]; + } + } + { + E T4M, T1B, T1w, T4P, T1m, T1r, Tm, Ts, T4S; + T4M = Ti - Tl; + Tm = Ti + Tl; + T1B = Tq - Tr; + Ts = Tq + Tr; + T4S = T1v - T1u; + T1w = T1u + T1v; + { + E T6c, Tt, T4R, T6d, T4U; + T6c = T4N + T4O; + T4P = T4N - T4O; + Tt = Tp + Ts; + T4R = Tp - Ts; + T6d = T4S + T4T; + T4U = T4S - T4T; + T3x = T1i + T1l; + T1m = T1i - T1l; + T6v = Tm - Tt; + Tu = Tm + Tt; + T5r = T4R - T4U; + T4V = T4R + T4U; + T6R = T6c + T6d; + T6e = T6c - T6d; + T1r = T1n + T1q; + T3w = T1n - T1q; + } + { + E T3A, T3z, T1x, T1C; + T3A = T1t + T1w; + T1x = T1t - T1w; + T1C = T1A - T1B; + T3z = T1B + T1A; + T2K = FMA(KP414213562, T1m, T1r); + T1s = FNMS(KP414213562, T1r, T1m); + T2J = FMA(KP414213562, T1x, T1C); + T1D = FNMS(KP414213562, T1C, T1x); + T3X = FMA(KP414213562, T3z, T3A); + T3B = FNMS(KP414213562, T3A, T3z); + T5s = T4M + T4P; + T4Q = T4M - T4P; + } + } + } + } + { + E T1G, Ty, T54, T20, T1X, TB, T53, T1J, TI, T4Z, T1L, TF, T22, T1U, T50; + E T1O; + { + E T1Y, T1Z, Tz, TA, Tw, Tx, T1H, T1I; + Tw = Cr[WS(csr, 2)]; + Tx = Cr[WS(csr, 29)]; + T1Y = Ci[WS(csi, 2)]; + T3Y = FNMS(KP414213562, T3w, T3x); + T3y = FMA(KP414213562, T3x, T3w); + T1G = Tw - Tx; + Ty = Tw + Tx; + T1Z = Ci[WS(csi, 29)]; + Tz = Cr[WS(csr, 18)]; + TA = Cr[WS(csr, 13)]; + T1H = Ci[WS(csi, 18)]; + T54 = T1Y - T1Z; + T20 = T1Y + T1Z; + T1X = Tz - TA; + TB = Tz + TA; + T1I = Ci[WS(csi, 13)]; + { + E T1R, T1Q, T1S, TG, TH; + TG = Cr[WS(csr, 5)]; + TH = Cr[WS(csr, 26)]; + T1R = Ci[WS(csi, 5)]; + T53 = T1H - T1I; + T1J = T1H + T1I; + T1Q = TG - TH; + TI = TG + TH; + T1S = Ci[WS(csi, 26)]; + { + E T1M, T1N, TD, TE, T1T; + TD = Cr[WS(csr, 10)]; + TE = Cr[WS(csr, 21)]; + T1T = T1R + T1S; + T4Z = T1S - T1R; + T1M = Ci[WS(csi, 10)]; + T1L = TD - TE; + TF = TD + TE; + T1N = Ci[WS(csi, 21)]; + T22 = T1Q + T1T; + T1U = T1Q - T1T; + T50 = T1M - T1N; + T1O = T1M + T1N; + } + } + } + { + E T4Y, T23, T51, T1K, T1V, T3I, T3H, T21, T24; + { + E T56, T1P, T6h, T55, TC, TJ, T6i; + T4Y = Ty - TB; + TC = Ty + TB; + TJ = TF + TI; + T56 = TF - TI; + T1P = T1L - T1O; + T23 = T1L + T1O; + T6h = T53 + T54; + T55 = T53 - T54; + T6g = TC - TJ; + TK = TC + TJ; + T6i = T50 + T4Z; + T51 = T4Z - T50; + T3E = T1G + T1J; + T1K = T1G - T1J; + T5M = T56 + T55; + T57 = T55 - T56; + T6N = T6i + T6h; + T6j = T6h - T6i; + T1V = T1P + T1U; + T3I = T1P - T1U; + } + T3H = T1X - T20; + T21 = T1X + T20; + T24 = T22 - T23; + T3F = T23 + T22; + T35 = FNMS(KP707106781, T1V, T1K); + T1W = FMA(KP707106781, T1V, T1K); + T34 = FMA(KP707106781, T24, T21); + T25 = FNMS(KP707106781, T24, T21); + T4i = FMA(KP707106781, T3I, T3H); + T3J = FNMS(KP707106781, T3I, T3H); + T5N = T4Y - T51; + T52 = T4Y + T51; + } + } + { + E T27, TN, T5f, T2q, T2r, TQ, T5e, T2a, TX, T5a, T2c, TU, T2t, T2l, T5b; + E T2f; + { + E T2o, T2p, TO, TP, TL, TM, T28, T29; + TL = Cr[WS(csr, 1)]; + TM = Cr[WS(csr, 30)]; + T2o = Ci[WS(csi, 1)]; + T4j = FMA(KP707106781, T3F, T3E); + T3G = FNMS(KP707106781, T3F, T3E); + T27 = TL - TM; + TN = TL + TM; + T2p = Ci[WS(csi, 30)]; + TO = Cr[WS(csr, 14)]; + TP = Cr[WS(csr, 17)]; + T28 = Ci[WS(csi, 14)]; + T5f = T2p - T2o; + T2q = T2o + T2p; + T2r = TO - TP; + TQ = TO + TP; + T29 = Ci[WS(csi, 17)]; + { + E T2i, T2h, T2j, TV, TW; + TV = Cr[WS(csr, 9)]; + TW = Cr[WS(csr, 22)]; + T2i = Ci[WS(csi, 9)]; + T5e = T28 - T29; + T2a = T28 + T29; + T2h = TV - TW; + TX = TV + TW; + T2j = Ci[WS(csi, 22)]; + { + E T2d, T2e, TS, TT, T2k; + TS = Cr[WS(csr, 6)]; + TT = Cr[WS(csr, 25)]; + T2k = T2i + T2j; + T5a = T2j - T2i; + T2d = Ci[WS(csi, 6)]; + T2c = TS - TT; + TU = TS + TT; + T2e = Ci[WS(csi, 25)]; + T2t = T2h + T2k; + T2l = T2h - T2k; + T5b = T2d - T2e; + T2f = T2d + T2e; + } + } + } + { + E T59, T2u, T5c, T2b, T2m, T3P, T3O, T2s, T2v; + { + E T5h, T2g, T6m, T5g, TR, TY, T6n; + T59 = TN - TQ; + TR = TN + TQ; + TY = TU + TX; + T5h = TU - TX; + T2g = T2c - T2f; + T2u = T2c + T2f; + T6m = T5e + T5f; + T5g = T5e - T5f; + T6l = TR - TY; + TZ = TR + TY; + T6n = T5b + T5a; + T5c = T5a - T5b; + T3L = T27 + T2a; + T2b = T27 - T2a; + T5P = T5h + T5g; + T5i = T5g - T5h; + T6M = T6n + T6m; + T6o = T6m - T6n; + T2m = T2g + T2l; + T3P = T2g - T2l; + } + T3O = T2r + T2q; + T2s = T2q - T2r; + T2v = T2t - T2u; + T3M = T2u + T2t; + T38 = FNMS(KP707106781, T2m, T2b); + T2n = FMA(KP707106781, T2m, T2b); + T37 = FNMS(KP707106781, T2v, T2s); + T2w = FMA(KP707106781, T2v, T2s); + T4l = FMA(KP707106781, T3P, T3O); + T3Q = FNMS(KP707106781, T3P, T3O); + T5Q = T59 - T5c; + T5d = T59 + T5c; + } + } + } + { + E T4m, T3N, T5t, T5L, T63, T4W, T5Y, T5X, T66, T5W, T67, T5S; + { + E T6T, T6S, T6W, T6P; + { + E T6L, T6O, T6Y, T6X, T6Z, Tv, T10, T70; + T6L = Tf - Tu; + Tv = Tf + Tu; + T10 = TK + TZ; + T6T = TK - TZ; + T6O = T6M - T6N; + T6Y = T6N + T6M; + T4m = FMA(KP707106781, T3M, T3L); + T3N = FNMS(KP707106781, T3M, T3L); + T6X = Tv - T10; + T6S = T6Q - T6R; + T6Z = T6R + T6Q; + R0[0] = KP2_000000000 * (Tv + T10); + R0[WS(rs, 16)] = KP2_000000000 * (T6Z - T6Y); + T70 = T6Y + T6Z; + T6W = T6L - T6O; + T6P = T6L + T6O; + R0[WS(rs, 24)] = KP1_414213562 * (T70 - T6X); + R0[WS(rs, 8)] = KP1_414213562 * (T6X + T70); + } + { + E T6D, T6f, T6w, T6G, T6p, T6x, T6y, T6k, T6V, T6U; + T6D = T6b - T6e; + T6f = T6b + T6e; + T6w = T6u - T6v; + T6G = T6v + T6u; + T6V = T6T + T6S; + T6U = T6S - T6T; + T6p = T6l + T6o; + T6x = T6l - T6o; + R0[WS(rs, 12)] = KP1_847759065 * (FMA(KP414213562, T6W, T6V)); + R0[WS(rs, 28)] = -(KP1_847759065 * (FNMS(KP414213562, T6V, T6W))); + R0[WS(rs, 20)] = KP1_847759065 * (FNMS(KP414213562, T6P, T6U)); + R0[WS(rs, 4)] = KP1_847759065 * (FMA(KP414213562, T6U, T6P)); + T6y = T6g + T6j; + T6k = T6g - T6j; + { + E T5V, T5K, T5O, T5R; + T5t = T5r - T5s; + T5K = T5s + T5r; + { + E T6E, T6z, T6H, T6q; + T6E = T6y + T6x; + T6z = T6x - T6y; + T6H = T6k - T6p; + T6q = T6k + T6p; + { + E T6F, T6K, T6B, T6A; + T6F = FNMS(KP707106781, T6E, T6D); + T6K = FMA(KP707106781, T6E, T6D); + T6B = FNMS(KP707106781, T6z, T6w); + T6A = FMA(KP707106781, T6z, T6w); + { + E T6I, T6J, T6C, T6r; + T6I = FNMS(KP707106781, T6H, T6G); + T6J = FMA(KP707106781, T6H, T6G); + T6C = FNMS(KP707106781, T6q, T6f); + T6r = FMA(KP707106781, T6q, T6f); + R0[WS(rs, 22)] = KP1_662939224 * (FNMS(KP668178637, T6F, T6I)); + R0[WS(rs, 6)] = KP1_662939224 * (FMA(KP668178637, T6I, T6F)); + R0[WS(rs, 30)] = -(KP1_961570560 * (FNMS(KP198912367, T6J, T6K))); + R0[WS(rs, 14)] = KP1_961570560 * (FMA(KP198912367, T6K, T6J)); + R0[WS(rs, 26)] = -(KP1_662939224 * (FNMS(KP668178637, T6B, T6C))); + R0[WS(rs, 10)] = KP1_662939224 * (FMA(KP668178637, T6C, T6B)); + R0[WS(rs, 18)] = KP1_961570560 * (FNMS(KP198912367, T6r, T6A)); + R0[WS(rs, 2)] = KP1_961570560 * (FMA(KP198912367, T6A, T6r)); + T5L = FNMS(KP707106781, T5K, T5J); + T63 = FMA(KP707106781, T5K, T5J); + } + } + } + T5V = T4Q - T4V; + T4W = T4Q + T4V; + T5Y = FNMS(KP414213562, T5M, T5N); + T5O = FMA(KP414213562, T5N, T5M); + T5R = FNMS(KP414213562, T5Q, T5P); + T5X = FMA(KP414213562, T5P, T5Q); + T66 = FMA(KP707106781, T5V, T5U); + T5W = FNMS(KP707106781, T5V, T5U); + T67 = T5O + T5R; + T5S = T5O - T5R; + } + } + } + { + E T1h, T2L, T2I, T3h, T3p, T1E, T3n, T3s, T3b, T3k, T3e, T3o; + { + E T4X, T5B, T5v, T5w, T5E, T5u, T5F, T5k, T58, T5j; + { + E T68, T69, T62, T5T, T64, T5Z; + T68 = FNMS(KP923879532, T67, T66); + T69 = FMA(KP923879532, T67, T66); + T62 = FNMS(KP923879532, T5S, T5L); + T5T = FMA(KP923879532, T5S, T5L); + T64 = T5Y + T5X; + T5Z = T5X - T5Y; + T4X = FMA(KP707106781, T4W, T4L); + T5B = FNMS(KP707106781, T4W, T4L); + { + E T65, T6a, T61, T60; + T65 = FNMS(KP923879532, T64, T63); + T6a = FMA(KP923879532, T64, T63); + T61 = FNMS(KP923879532, T5Z, T5W); + T60 = FMA(KP923879532, T5Z, T5W); + R0[WS(rs, 23)] = KP1_546020906 * (FNMS(KP820678790, T65, T68)); + R0[WS(rs, 7)] = KP1_546020906 * (FMA(KP820678790, T68, T65)); + R0[WS(rs, 31)] = -(KP1_990369453 * (FNMS(KP098491403, T69, T6a))); + R0[WS(rs, 15)] = KP1_990369453 * (FMA(KP098491403, T6a, T69)); + R0[WS(rs, 27)] = -(KP1_763842528 * (FNMS(KP534511135, T61, T62))); + R0[WS(rs, 11)] = KP1_763842528 * (FMA(KP534511135, T62, T61)); + R0[WS(rs, 19)] = KP1_913880671 * (FNMS(KP303346683, T5T, T60)); + R0[WS(rs, 3)] = KP1_913880671 * (FMA(KP303346683, T60, T5T)); + } + } + T5v = FNMS(KP414213562, T52, T57); + T58 = FMA(KP414213562, T57, T52); + T5j = FNMS(KP414213562, T5i, T5d); + T5w = FMA(KP414213562, T5d, T5i); + T5E = FNMS(KP707106781, T5t, T5q); + T5u = FMA(KP707106781, T5t, T5q); + T5F = T58 - T5j; + T5k = T58 + T5j; + { + E T3l, T33, T3c, T3m, T3a, T3d; + { + E T39, T3f, T3g, T36; + { + E T31, T5G, T5H, T5A, T5l, T5C, T5x, T32; + T1h = FMA(KP707106781, T1g, T15); + T31 = FNMS(KP707106781, T1g, T15); + T5G = FNMS(KP923879532, T5F, T5E); + T5H = FMA(KP923879532, T5F, T5E); + T5A = FNMS(KP923879532, T5k, T4X); + T5l = FMA(KP923879532, T5k, T4X); + T5C = T5w - T5v; + T5x = T5v + T5w; + T32 = T2K + T2J; + T2L = T2J - T2K; + T39 = FNMS(KP668178637, T38, T37); + T3f = FMA(KP668178637, T37, T38); + { + E T5D, T5I, T5z, T5y; + T5D = FNMS(KP923879532, T5C, T5B); + T5I = FMA(KP923879532, T5C, T5B); + T5z = FNMS(KP923879532, T5x, T5u); + T5y = FMA(KP923879532, T5x, T5u); + T3l = FMA(KP923879532, T32, T31); + T33 = FNMS(KP923879532, T32, T31); + R0[WS(rs, 21)] = KP1_763842528 * (FNMS(KP534511135, T5D, T5G)); + R0[WS(rs, 5)] = KP1_763842528 * (FMA(KP534511135, T5G, T5D)); + R0[WS(rs, 29)] = -(KP1_913880671 * (FNMS(KP303346683, T5H, T5I))); + R0[WS(rs, 13)] = KP1_913880671 * (FMA(KP303346683, T5I, T5H)); + R0[WS(rs, 25)] = -(KP1_546020906 * (FNMS(KP820678790, T5z, T5A))); + R0[WS(rs, 9)] = KP1_546020906 * (FMA(KP820678790, T5A, T5z)); + R0[WS(rs, 17)] = KP1_990369453 * (FNMS(KP098491403, T5l, T5y)); + R0[WS(rs, 1)] = KP1_990369453 * (FMA(KP098491403, T5y, T5l)); + T3g = FMA(KP668178637, T34, T35); + T36 = FNMS(KP668178637, T35, T34); + } + } + T2I = FNMS(KP707106781, T2H, T2E); + T3c = FMA(KP707106781, T2H, T2E); + T3m = T3g + T3f; + T3h = T3f - T3g; + T3p = T39 - T36; + T3a = T36 + T39; + T3d = T1s - T1D; + T1E = T1s + T1D; + } + T3n = FNMS(KP831469612, T3m, T3l); + T3s = FMA(KP831469612, T3m, T3l); + T3b = FNMS(KP831469612, T3a, T33); + T3k = FMA(KP831469612, T3a, T33); + T3e = FMA(KP923879532, T3d, T3c); + T3o = FNMS(KP923879532, T3d, T3c); + } + } + { + E T3v, T3Z, T3W, T4v, T4D, T3C, T4B, T4G, T4p, T4y, T4s, T4C; + { + E T4z, T4h, T4q, T4A, T4o, T4r; + { + E T4n, T4t, T4u, T4k, T4f, T4g; + T3v = FNMS(KP707106781, T3u, T3t); + T4f = FMA(KP707106781, T3u, T3t); + T4g = T3Y + T3X; + T3Z = T3X - T3Y; + { + E T3r, T3q, T3i, T3j; + T3r = FNMS(KP831469612, T3p, T3o); + T3q = FMA(KP831469612, T3p, T3o); + T3i = FNMS(KP831469612, T3h, T3e); + T3j = FMA(KP831469612, T3h, T3e); + R1[WS(rs, 22)] = -(KP1_606415062 * (FMA(KP741650546, T3n, T3q))); + R1[WS(rs, 6)] = KP1_606415062 * (FNMS(KP741650546, T3q, T3n)); + R1[WS(rs, 30)] = -(KP1_978353019 * (FMA(KP148335987, T3r, T3s))); + R1[WS(rs, 14)] = -(KP1_978353019 * (FNMS(KP148335987, T3s, T3r))); + R1[WS(rs, 26)] = -(KP1_715457220 * (FMA(KP599376933, T3j, T3k))); + R1[WS(rs, 10)] = -(KP1_715457220 * (FNMS(KP599376933, T3k, T3j))); + R1[WS(rs, 18)] = -(KP1_940062506 * (FMA(KP250486960, T3b, T3i))); + R1[WS(rs, 2)] = KP1_940062506 * (FNMS(KP250486960, T3i, T3b)); + T4z = FMA(KP923879532, T4g, T4f); + T4h = FNMS(KP923879532, T4g, T4f); + } + T4n = FNMS(KP198912367, T4m, T4l); + T4t = FMA(KP198912367, T4l, T4m); + T4u = FNMS(KP198912367, T4i, T4j); + T4k = FMA(KP198912367, T4j, T4i); + T3W = FNMS(KP707106781, T3V, T3U); + T4q = FMA(KP707106781, T3V, T3U); + T4A = T4u + T4t; + T4v = T4t - T4u; + T4D = T4k + T4n; + T4o = T4k - T4n; + T4r = T3y + T3B; + T3C = T3y - T3B; + } + T4B = FNMS(KP980785280, T4A, T4z); + T4G = FMA(KP980785280, T4A, T4z); + T4p = FMA(KP980785280, T4o, T4h); + T4y = FNMS(KP980785280, T4o, T4h); + T4s = FNMS(KP923879532, T4r, T4q); + T4C = FMA(KP923879532, T4r, T4q); + } + { + E T2P, T2X, T2V, T30, T2z, T2S, T2M, T2W; + { + E T2T, T1F, T2U, T2y; + { + E T2x, T2N, T2O, T26; + { + E T4F, T4E, T4w, T4x; + T4F = FMA(KP980785280, T4D, T4C); + T4E = FNMS(KP980785280, T4D, T4C); + T4w = FMA(KP980785280, T4v, T4s); + T4x = FNMS(KP980785280, T4v, T4s); + R1[WS(rs, 23)] = KP1_481902250 * (FNMS(KP906347169, T4B, T4E)); + R1[WS(rs, 7)] = KP1_481902250 * (FMA(KP906347169, T4E, T4B)); + R1[WS(rs, 31)] = -(KP1_997590912 * (FNMS(KP049126849, T4F, T4G))); + R1[WS(rs, 15)] = KP1_997590912 * (FMA(KP049126849, T4G, T4F)); + R1[WS(rs, 27)] = -(KP1_807978586 * (FNMS(KP472964775, T4x, T4y))); + R1[WS(rs, 11)] = KP1_807978586 * (FMA(KP472964775, T4y, T4x)); + R1[WS(rs, 19)] = KP1_883088130 * (FNMS(KP357805721, T4p, T4w)); + R1[WS(rs, 3)] = KP1_883088130 * (FMA(KP357805721, T4w, T4p)); + T2T = FNMS(KP923879532, T1E, T1h); + T1F = FMA(KP923879532, T1E, T1h); + } + T2x = FNMS(KP198912367, T2w, T2n); + T2N = FMA(KP198912367, T2n, T2w); + T2O = FMA(KP198912367, T1W, T25); + T26 = FNMS(KP198912367, T25, T1W); + T2U = T2O + T2N; + T2P = T2N - T2O; + T2X = T26 - T2x; + T2y = T26 + T2x; + } + T2V = FNMS(KP980785280, T2U, T2T); + T30 = FMA(KP980785280, T2U, T2T); + T2z = FMA(KP980785280, T2y, T1F); + T2S = FNMS(KP980785280, T2y, T1F); + T2M = FNMS(KP923879532, T2L, T2I); + T2W = FMA(KP923879532, T2L, T2I); + } + { + E T47, T3D, T48, T3S; + { + E T3K, T41, T42, T3R; + { + E T2Z, T2Y, T2Q, T2R; + T2Z = FNMS(KP980785280, T2X, T2W); + T2Y = FMA(KP980785280, T2X, T2W); + T2Q = FNMS(KP980785280, T2P, T2M); + T2R = FMA(KP980785280, T2P, T2M); + R1[WS(rs, 20)] = -(KP1_807978586 * (FMA(KP472964775, T2V, T2Y))); + R1[WS(rs, 4)] = KP1_807978586 * (FNMS(KP472964775, T2Y, T2V)); + R1[WS(rs, 28)] = -(KP1_883088130 * (FMA(KP357805721, T2Z, T30))); + R1[WS(rs, 12)] = -(KP1_883088130 * (FNMS(KP357805721, T30, T2Z))); + R1[WS(rs, 24)] = -(KP1_481902250 * (FMA(KP906347169, T2R, T2S))); + R1[WS(rs, 8)] = -(KP1_481902250 * (FNMS(KP906347169, T2S, T2R))); + R1[WS(rs, 16)] = -(KP1_997590912 * (FMA(KP049126849, T2z, T2Q))); + R1[0] = KP1_997590912 * (FNMS(KP049126849, T2Q, T2z)); + T47 = FNMS(KP923879532, T3C, T3v); + T3D = FMA(KP923879532, T3C, T3v); + } + T3K = FMA(KP668178637, T3J, T3G); + T41 = FNMS(KP668178637, T3G, T3J); + T42 = FMA(KP668178637, T3N, T3Q); + T3R = FNMS(KP668178637, T3Q, T3N); + T48 = T42 - T41; + T43 = T41 + T42; + T4b = T3K - T3R; + T3S = T3K + T3R; + } + T49 = FNMS(KP831469612, T48, T47); + T4e = FMA(KP831469612, T48, T47); + T3T = FMA(KP831469612, T3S, T3D); + T46 = FNMS(KP831469612, T3S, T3D); + T40 = FMA(KP923879532, T3Z, T3W); + T4a = FNMS(KP923879532, T3Z, T3W); + } + } + } + } + } + } + { + E T4d, T4c, T44, T45; + T4d = FMA(KP831469612, T4b, T4a); + T4c = FNMS(KP831469612, T4b, T4a); + T44 = FMA(KP831469612, T43, T40); + T45 = FNMS(KP831469612, T43, T40); + R1[WS(rs, 21)] = KP1_715457220 * (FNMS(KP599376933, T49, T4c)); + R1[WS(rs, 5)] = KP1_715457220 * (FMA(KP599376933, T4c, T49)); + R1[WS(rs, 29)] = -(KP1_940062506 * (FNMS(KP250486960, T4d, T4e))); + R1[WS(rs, 13)] = KP1_940062506 * (FMA(KP250486960, T4e, T4d)); + R1[WS(rs, 25)] = -(KP1_606415062 * (FNMS(KP741650546, T45, T46))); + R1[WS(rs, 9)] = KP1_606415062 * (FMA(KP741650546, T46, T45)); + R1[WS(rs, 17)] = KP1_978353019 * (FNMS(KP148335987, T3T, T44)); + R1[WS(rs, 1)] = KP1_978353019 * (FMA(KP148335987, T44, T3T)); + } + } + } +} + +static const kr2c_desc desc = { 64, "r2cbIII_64", {238, 64, 196, 0}, &GENUS }; + +void X(codelet_r2cbIII_64) (planner *p) { + X(kr2c_register) (p, r2cbIII_64, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -name r2cbIII_64 -dft-III -include r2cbIII.h */ + +/* + * This function contains 434 FP additions, 208 FP multiplications, + * (or, 342 additions, 116 multiplications, 92 fused multiply/add), + * 130 stack variables, 39 constants, and 128 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_343117909, +1.343117909694036801250753700854843606457501264); + DK(KP1_481902250, +1.481902250709918182351233794990325459457910619); + DK(KP1_807978586, +1.807978586246886663172400594461074097420264050); + DK(KP855110186, +0.855110186860564188641933713777597068609157259); + DK(KP1_997590912, +1.997590912410344785429543209518201388886407229); + DK(KP098135348, +0.098135348654836028509909953885365316629490726); + DK(KP673779706, +0.673779706784440101378506425238295140955533559); + DK(KP1_883088130, +1.883088130366041556825018805199004714371179592); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP1_191398608, +1.191398608984866686934073057659939779023852677); + DK(KP1_606415062, +1.606415062961289819613353025926283847759138854); + DK(KP1_715457220, +1.715457220000544139804539968569540274084981599); + DK(KP1_028205488, +1.028205488386443453187387677937631545216098241); + DK(KP1_978353019, +1.978353019929561946903347476032486127967379067); + DK(KP293460948, +0.293460948910723503317700259293435639412430633); + DK(KP485960359, +0.485960359806527779896548324154942236641981567); + DK(KP1_940062506, +1.940062506389087985207968414572200502913731924); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP1_268786568, +1.268786568327290996430343226450986741351374190); + DK(KP1_546020906, +1.546020906725473921621813219516939601942082586); + DK(KP1_763842528, +1.763842528696710059425513727320776699016885241); + DK(KP942793473, +0.942793473651995297112775251810508755314920638); + DK(KP1_990369453, +1.990369453344393772489673906218959843150949737); + DK(KP196034280, +0.196034280659121203988391127777283691722273346); + DK(KP580569354, +0.580569354508924735272384751634790549382952557); + DK(KP1_913880671, +1.913880671464417729871595773960539938965698411); + DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); + DK(KP1_111140466, +1.111140466039204449485661627897065748749874382); + DK(KP390180644, +0.390180644032256535696569736954044481855383236); + DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); + DK(KP765366864, +0.765366864730179543456919968060797733522689125); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) { + E T15, T3t, T3U, T2N, Tf, T6b, T6u, T6R, T4L, T5J, T1g, T3V, T5q, T5U, T2I; + E T3u, Tu, T6v, T4V, T5s, T6e, T6Q, T1s, T2D, T1D, T2E, T3B, T3Y, T4Q, T5r; + E T3y, T3X, TK, T6g, T57, T5N, T6j, T6N, T1W, T34, T25, T35, T3J, T4j, T52; + E T5M, T3G, T4i, TZ, T6l, T5i, T5Q, T6o, T6M, T2n, T37, T2w, T38, T3Q, T4m; + E T5d, T5P, T3N, T4l; + { + E T3, T11, T2M, T5n, T6, T2J, T14, T5m, Ta, T16, T19, T4J, Td, T1b, T1e; + E T4I; + { + E T1, T2, T2K, T2L; + T1 = Cr[0]; + T2 = Cr[WS(csr, 31)]; + T3 = T1 + T2; + T11 = T1 - T2; + T2K = Ci[0]; + T2L = Ci[WS(csi, 31)]; + T2M = T2K + T2L; + T5n = T2L - T2K; + } + { + E T4, T5, T12, T13; + T4 = Cr[WS(csr, 16)]; + T5 = Cr[WS(csr, 15)]; + T6 = T4 + T5; + T2J = T4 - T5; + T12 = Ci[WS(csi, 16)]; + T13 = Ci[WS(csi, 15)]; + T14 = T12 + T13; + T5m = T12 - T13; + } + { + E T8, T9, T17, T18; + T8 = Cr[WS(csr, 8)]; + T9 = Cr[WS(csr, 23)]; + Ta = T8 + T9; + T16 = T8 - T9; + T17 = Ci[WS(csi, 8)]; + T18 = Ci[WS(csi, 23)]; + T19 = T17 + T18; + T4J = T17 - T18; + } + { + E Tb, Tc, T1c, T1d; + Tb = Cr[WS(csr, 7)]; + Tc = Cr[WS(csr, 24)]; + Td = Tb + Tc; + T1b = Tb - Tc; + T1c = Ci[WS(csi, 7)]; + T1d = Ci[WS(csi, 24)]; + T1e = T1c + T1d; + T4I = T1d - T1c; + } + { + E T7, Te, T1a, T1f; + T15 = T11 - T14; + T3t = T11 + T14; + T3U = T2J - T2M; + T2N = T2J + T2M; + T7 = T3 + T6; + Te = Ta + Td; + Tf = T7 + Te; + T6b = T7 - Te; + { + E T6s, T6t, T4H, T4K; + T6s = T4J + T4I; + T6t = T5n - T5m; + T6u = T6s + T6t; + T6R = T6t - T6s; + T4H = T3 - T6; + T4K = T4I - T4J; + T4L = T4H + T4K; + T5J = T4H - T4K; + } + T1a = T16 - T19; + T1f = T1b - T1e; + T1g = KP707106781 * (T1a + T1f); + T3V = KP707106781 * (T1a - T1f); + { + E T5o, T5p, T2G, T2H; + T5o = T5m + T5n; + T5p = Ta - Td; + T5q = T5o - T5p; + T5U = T5p + T5o; + T2G = T16 + T19; + T2H = T1b + T1e; + T2I = KP707106781 * (T2G - T2H); + T3u = KP707106781 * (T2G + T2H); + } + } + } + { + E Ti, T1i, T1q, T4N, Tl, T1n, T1l, T4O, Tp, T1t, T1B, T4S, Ts, T1y, T1w; + E T4T; + { + E Tg, Th, T1o, T1p; + Tg = Cr[WS(csr, 4)]; + Th = Cr[WS(csr, 27)]; + Ti = Tg + Th; + T1i = Tg - Th; + T1o = Ci[WS(csi, 4)]; + T1p = Ci[WS(csi, 27)]; + T1q = T1o + T1p; + T4N = T1o - T1p; + } + { + E Tj, Tk, T1j, T1k; + Tj = Cr[WS(csr, 20)]; + Tk = Cr[WS(csr, 11)]; + Tl = Tj + Tk; + T1n = Tj - Tk; + T1j = Ci[WS(csi, 20)]; + T1k = Ci[WS(csi, 11)]; + T1l = T1j + T1k; + T4O = T1j - T1k; + } + { + E Tn, To, T1z, T1A; + Tn = Cr[WS(csr, 3)]; + To = Cr[WS(csr, 28)]; + Tp = Tn + To; + T1t = Tn - To; + T1z = Ci[WS(csi, 3)]; + T1A = Ci[WS(csi, 28)]; + T1B = T1z + T1A; + T4S = T1A - T1z; + } + { + E Tq, Tr, T1u, T1v; + Tq = Cr[WS(csr, 12)]; + Tr = Cr[WS(csr, 19)]; + Ts = Tq + Tr; + T1y = Tq - Tr; + T1u = Ci[WS(csi, 12)]; + T1v = Ci[WS(csi, 19)]; + T1w = T1u + T1v; + T4T = T1u - T1v; + } + { + E Tm, Tt, T4R, T4U; + Tm = Ti + Tl; + Tt = Tp + Ts; + Tu = Tm + Tt; + T6v = Tm - Tt; + T4R = Tp - Ts; + T4U = T4S - T4T; + T4V = T4R + T4U; + T5s = T4U - T4R; + } + { + E T6c, T6d, T1m, T1r; + T6c = T4T + T4S; + T6d = T4O + T4N; + T6e = T6c - T6d; + T6Q = T6d + T6c; + T1m = T1i - T1l; + T1r = T1n + T1q; + T1s = FNMS(KP382683432, T1r, KP923879532 * T1m); + T2D = FMA(KP382683432, T1m, KP923879532 * T1r); + } + { + E T1x, T1C, T3z, T3A; + T1x = T1t - T1w; + T1C = T1y - T1B; + T1D = FMA(KP923879532, T1x, KP382683432 * T1C); + T2E = FNMS(KP382683432, T1x, KP923879532 * T1C); + T3z = T1t + T1w; + T3A = T1y + T1B; + T3B = FNMS(KP923879532, T3A, KP382683432 * T3z); + T3Y = FMA(KP923879532, T3z, KP382683432 * T3A); + } + { + E T4M, T4P, T3w, T3x; + T4M = Ti - Tl; + T4P = T4N - T4O; + T4Q = T4M - T4P; + T5r = T4M + T4P; + T3w = T1i + T1l; + T3x = T1q - T1n; + T3y = FNMS(KP923879532, T3x, KP382683432 * T3w); + T3X = FMA(KP923879532, T3w, KP382683432 * T3x); + } + } + { + E Ty, T1G, T23, T54, TB, T20, T1J, T55, TI, T4Z, T1U, T1Y, TF, T50, T1P; + E T1X; + { + E Tw, Tx, T1H, T1I; + Tw = Cr[WS(csr, 2)]; + Tx = Cr[WS(csr, 29)]; + Ty = Tw + Tx; + T1G = Tw - Tx; + { + E T21, T22, Tz, TA; + T21 = Ci[WS(csi, 2)]; + T22 = Ci[WS(csi, 29)]; + T23 = T21 + T22; + T54 = T21 - T22; + Tz = Cr[WS(csr, 18)]; + TA = Cr[WS(csr, 13)]; + TB = Tz + TA; + T20 = Tz - TA; + } + T1H = Ci[WS(csi, 18)]; + T1I = Ci[WS(csi, 13)]; + T1J = T1H + T1I; + T55 = T1H - T1I; + { + E TG, TH, T1Q, T1R, T1S, T1T; + TG = Cr[WS(csr, 5)]; + TH = Cr[WS(csr, 26)]; + T1Q = TG - TH; + T1R = Ci[WS(csi, 5)]; + T1S = Ci[WS(csi, 26)]; + T1T = T1R + T1S; + TI = TG + TH; + T4Z = T1S - T1R; + T1U = T1Q - T1T; + T1Y = T1Q + T1T; + } + { + E TD, TE, T1L, T1M, T1N, T1O; + TD = Cr[WS(csr, 10)]; + TE = Cr[WS(csr, 21)]; + T1L = TD - TE; + T1M = Ci[WS(csi, 10)]; + T1N = Ci[WS(csi, 21)]; + T1O = T1M + T1N; + TF = TD + TE; + T50 = T1M - T1N; + T1P = T1L - T1O; + T1X = T1L + T1O; + } + } + { + E TC, TJ, T53, T56; + TC = Ty + TB; + TJ = TF + TI; + TK = TC + TJ; + T6g = TC - TJ; + T53 = TF - TI; + T56 = T54 - T55; + T57 = T53 + T56; + T5N = T56 - T53; + } + { + E T6h, T6i, T1K, T1V; + T6h = T55 + T54; + T6i = T50 + T4Z; + T6j = T6h - T6i; + T6N = T6i + T6h; + T1K = T1G - T1J; + T1V = KP707106781 * (T1P + T1U); + T1W = T1K + T1V; + T34 = T1K - T1V; + } + { + E T1Z, T24, T3H, T3I; + T1Z = KP707106781 * (T1X - T1Y); + T24 = T20 + T23; + T25 = T1Z + T24; + T35 = T24 - T1Z; + T3H = KP707106781 * (T1P - T1U); + T3I = T23 - T20; + T3J = T3H + T3I; + T4j = T3I - T3H; + } + { + E T4Y, T51, T3E, T3F; + T4Y = Ty - TB; + T51 = T4Z - T50; + T52 = T4Y + T51; + T5M = T4Y - T51; + T3E = T1G + T1J; + T3F = KP707106781 * (T1X + T1Y); + T3G = T3E - T3F; + T4i = T3E + T3F; + } + } + { + E TN, T27, T2u, T5f, TQ, T2r, T2a, T5g, TX, T5a, T2l, T2p, TU, T5b, T2g; + E T2o; + { + E TL, TM, T28, T29; + TL = Cr[WS(csr, 1)]; + TM = Cr[WS(csr, 30)]; + TN = TL + TM; + T27 = TL - TM; + { + E T2s, T2t, TO, TP; + T2s = Ci[WS(csi, 1)]; + T2t = Ci[WS(csi, 30)]; + T2u = T2s + T2t; + T5f = T2t - T2s; + TO = Cr[WS(csr, 14)]; + TP = Cr[WS(csr, 17)]; + TQ = TO + TP; + T2r = TO - TP; + } + T28 = Ci[WS(csi, 14)]; + T29 = Ci[WS(csi, 17)]; + T2a = T28 + T29; + T5g = T28 - T29; + { + E TV, TW, T2h, T2i, T2j, T2k; + TV = Cr[WS(csr, 9)]; + TW = Cr[WS(csr, 22)]; + T2h = TV - TW; + T2i = Ci[WS(csi, 9)]; + T2j = Ci[WS(csi, 22)]; + T2k = T2i + T2j; + TX = TV + TW; + T5a = T2j - T2i; + T2l = T2h - T2k; + T2p = T2h + T2k; + } + { + E TS, TT, T2c, T2d, T2e, T2f; + TS = Cr[WS(csr, 6)]; + TT = Cr[WS(csr, 25)]; + T2c = TS - TT; + T2d = Ci[WS(csi, 6)]; + T2e = Ci[WS(csi, 25)]; + T2f = T2d + T2e; + TU = TS + TT; + T5b = T2d - T2e; + T2g = T2c - T2f; + T2o = T2c + T2f; + } + } + { + E TR, TY, T5e, T5h; + TR = TN + TQ; + TY = TU + TX; + TZ = TR + TY; + T6l = TR - TY; + T5e = TU - TX; + T5h = T5f - T5g; + T5i = T5e + T5h; + T5Q = T5h - T5e; + } + { + E T6m, T6n, T2b, T2m; + T6m = T5g + T5f; + T6n = T5b + T5a; + T6o = T6m - T6n; + T6M = T6n + T6m; + T2b = T27 - T2a; + T2m = KP707106781 * (T2g + T2l); + T2n = T2b + T2m; + T37 = T2b - T2m; + } + { + E T2q, T2v, T3O, T3P; + T2q = KP707106781 * (T2o - T2p); + T2v = T2r - T2u; + T2w = T2q + T2v; + T38 = T2v - T2q; + T3O = KP707106781 * (T2g - T2l); + T3P = T2r + T2u; + T3Q = T3O - T3P; + T4m = T3O + T3P; + } + { + E T59, T5c, T3L, T3M; + T59 = TN - TQ; + T5c = T5a - T5b; + T5d = T59 + T5c; + T5P = T59 - T5c; + T3L = T27 + T2a; + T3M = KP707106781 * (T2o + T2p); + T3N = T3L - T3M; + T4l = T3L + T3M; + } + } + { + E Tv, T10, T6X, T6Y, T6Z, T70; + Tv = Tf + Tu; + T10 = TK + TZ; + T6X = Tv - T10; + T6Y = T6N + T6M; + T6Z = T6R - T6Q; + T70 = T6Y + T6Z; + R0[0] = KP2_000000000 * (Tv + T10); + R0[WS(rs, 16)] = KP2_000000000 * (T6Z - T6Y); + R0[WS(rs, 8)] = KP1_414213562 * (T6X + T70); + R0[WS(rs, 24)] = KP1_414213562 * (T70 - T6X); + } + { + E T6P, T6V, T6U, T6W; + { + E T6L, T6O, T6S, T6T; + T6L = Tf - Tu; + T6O = T6M - T6N; + T6P = T6L + T6O; + T6V = T6L - T6O; + T6S = T6Q + T6R; + T6T = TK - TZ; + T6U = T6S - T6T; + T6W = T6T + T6S; + } + R0[WS(rs, 4)] = FMA(KP1_847759065, T6P, KP765366864 * T6U); + R0[WS(rs, 28)] = FNMS(KP1_847759065, T6V, KP765366864 * T6W); + R0[WS(rs, 20)] = FNMS(KP765366864, T6P, KP1_847759065 * T6U); + R0[WS(rs, 12)] = FMA(KP765366864, T6V, KP1_847759065 * T6W); + } + { + E T6f, T6w, T6G, T6D, T6z, T6E, T6q, T6H; + T6f = T6b + T6e; + T6w = T6u - T6v; + T6G = T6v + T6u; + T6D = T6b - T6e; + { + E T6x, T6y, T6k, T6p; + T6x = T6g + T6j; + T6y = T6o - T6l; + T6z = KP707106781 * (T6x + T6y); + T6E = KP707106781 * (T6y - T6x); + T6k = T6g - T6j; + T6p = T6l + T6o; + T6q = KP707106781 * (T6k + T6p); + T6H = KP707106781 * (T6k - T6p); + } + { + E T6r, T6A, T6J, T6K; + T6r = T6f + T6q; + T6A = T6w - T6z; + R0[WS(rs, 2)] = FMA(KP1_961570560, T6r, KP390180644 * T6A); + R0[WS(rs, 18)] = FNMS(KP390180644, T6r, KP1_961570560 * T6A); + T6J = T6D - T6E; + T6K = T6H + T6G; + R0[WS(rs, 14)] = FMA(KP390180644, T6J, KP1_961570560 * T6K); + R0[WS(rs, 30)] = FNMS(KP1_961570560, T6J, KP390180644 * T6K); + } + { + E T6B, T6C, T6F, T6I; + T6B = T6f - T6q; + T6C = T6z + T6w; + R0[WS(rs, 10)] = FMA(KP1_111140466, T6B, KP1_662939224 * T6C); + R0[WS(rs, 26)] = FNMS(KP1_662939224, T6B, KP1_111140466 * T6C); + T6F = T6D + T6E; + T6I = T6G - T6H; + R0[WS(rs, 6)] = FMA(KP1_662939224, T6F, KP1_111140466 * T6I); + R0[WS(rs, 22)] = FNMS(KP1_111140466, T6F, KP1_662939224 * T6I); + } + } + { + E T5L, T63, T5W, T66, T5S, T67, T5Z, T64, T5K, T5V; + T5K = KP707106781 * (T5s - T5r); + T5L = T5J + T5K; + T63 = T5J - T5K; + T5V = KP707106781 * (T4Q - T4V); + T5W = T5U - T5V; + T66 = T5V + T5U; + { + E T5O, T5R, T5X, T5Y; + T5O = FNMS(KP923879532, T5N, KP382683432 * T5M); + T5R = FMA(KP382683432, T5P, KP923879532 * T5Q); + T5S = T5O + T5R; + T67 = T5O - T5R; + T5X = FMA(KP923879532, T5M, KP382683432 * T5N); + T5Y = FNMS(KP923879532, T5P, KP382683432 * T5Q); + T5Z = T5X + T5Y; + T64 = T5Y - T5X; + } + { + E T5T, T60, T69, T6a; + T5T = T5L + T5S; + T60 = T5W - T5Z; + R0[WS(rs, 3)] = FMA(KP1_913880671, T5T, KP580569354 * T60); + R0[WS(rs, 19)] = FNMS(KP580569354, T5T, KP1_913880671 * T60); + T69 = T63 - T64; + T6a = T67 + T66; + R0[WS(rs, 15)] = FMA(KP196034280, T69, KP1_990369453 * T6a); + R0[WS(rs, 31)] = FNMS(KP1_990369453, T69, KP196034280 * T6a); + } + { + E T61, T62, T65, T68; + T61 = T5L - T5S; + T62 = T5Z + T5W; + R0[WS(rs, 11)] = FMA(KP942793473, T61, KP1_763842528 * T62); + R0[WS(rs, 27)] = FNMS(KP1_763842528, T61, KP942793473 * T62); + T65 = T63 + T64; + T68 = T66 - T67; + R0[WS(rs, 7)] = FMA(KP1_546020906, T65, KP1_268786568 * T68); + R0[WS(rs, 23)] = FNMS(KP1_268786568, T65, KP1_546020906 * T68); + } + } + { + E T4X, T5B, T5u, T5E, T5k, T5F, T5x, T5C, T4W, T5t; + T4W = KP707106781 * (T4Q + T4V); + T4X = T4L + T4W; + T5B = T4L - T4W; + T5t = KP707106781 * (T5r + T5s); + T5u = T5q - T5t; + T5E = T5t + T5q; + { + E T58, T5j, T5v, T5w; + T58 = FNMS(KP382683432, T57, KP923879532 * T52); + T5j = FMA(KP923879532, T5d, KP382683432 * T5i); + T5k = T58 + T5j; + T5F = T58 - T5j; + T5v = FMA(KP382683432, T52, KP923879532 * T57); + T5w = FNMS(KP382683432, T5d, KP923879532 * T5i); + T5x = T5v + T5w; + T5C = T5w - T5v; + } + { + E T5l, T5y, T5H, T5I; + T5l = T4X + T5k; + T5y = T5u - T5x; + R0[WS(rs, 1)] = FMA(KP1_990369453, T5l, KP196034280 * T5y); + R0[WS(rs, 17)] = FNMS(KP196034280, T5l, KP1_990369453 * T5y); + T5H = T5B - T5C; + T5I = T5F + T5E; + R0[WS(rs, 13)] = FMA(KP580569354, T5H, KP1_913880671 * T5I); + R0[WS(rs, 29)] = FNMS(KP1_913880671, T5H, KP580569354 * T5I); + } + { + E T5z, T5A, T5D, T5G; + T5z = T4X - T5k; + T5A = T5x + T5u; + R0[WS(rs, 9)] = FMA(KP1_268786568, T5z, KP1_546020906 * T5A); + R0[WS(rs, 25)] = FNMS(KP1_546020906, T5z, KP1_268786568 * T5A); + T5D = T5B + T5C; + T5G = T5E - T5F; + R0[WS(rs, 5)] = FMA(KP1_763842528, T5D, KP942793473 * T5G); + R0[WS(rs, 21)] = FNMS(KP942793473, T5D, KP1_763842528 * T5G); + } + } + { + E T33, T3l, T3h, T3m, T3a, T3p, T3e, T3o; + { + E T31, T32, T3f, T3g; + T31 = T15 - T1g; + T32 = T2E - T2D; + T33 = T31 + T32; + T3l = T31 - T32; + T3f = FMA(KP831469612, T34, KP555570233 * T35); + T3g = FNMS(KP831469612, T37, KP555570233 * T38); + T3h = T3f + T3g; + T3m = T3g - T3f; + } + { + E T36, T39, T3c, T3d; + T36 = FNMS(KP831469612, T35, KP555570233 * T34); + T39 = FMA(KP555570233, T37, KP831469612 * T38); + T3a = T36 + T39; + T3p = T36 - T39; + T3c = T2I - T2N; + T3d = T1s - T1D; + T3e = T3c - T3d; + T3o = T3d + T3c; + } + { + E T3b, T3i, T3r, T3s; + T3b = T33 + T3a; + T3i = T3e - T3h; + R1[WS(rs, 2)] = FMA(KP1_940062506, T3b, KP485960359 * T3i); + R1[WS(rs, 18)] = FNMS(KP485960359, T3b, KP1_940062506 * T3i); + T3r = T3l - T3m; + T3s = T3p + T3o; + R1[WS(rs, 14)] = FMA(KP293460948, T3r, KP1_978353019 * T3s); + R1[WS(rs, 30)] = FNMS(KP1_978353019, T3r, KP293460948 * T3s); + } + { + E T3j, T3k, T3n, T3q; + T3j = T33 - T3a; + T3k = T3h + T3e; + R1[WS(rs, 10)] = FMA(KP1_028205488, T3j, KP1_715457220 * T3k); + R1[WS(rs, 26)] = FNMS(KP1_715457220, T3j, KP1_028205488 * T3k); + T3n = T3l + T3m; + T3q = T3o - T3p; + R1[WS(rs, 6)] = FMA(KP1_606415062, T3n, KP1_191398608 * T3q); + R1[WS(rs, 22)] = FNMS(KP1_191398608, T3n, KP1_606415062 * T3q); + } + } + { + E T4h, T4z, T4v, T4A, T4o, T4D, T4s, T4C; + { + E T4f, T4g, T4t, T4u; + T4f = T3t + T3u; + T4g = T3X + T3Y; + T4h = T4f - T4g; + T4z = T4f + T4g; + T4t = FMA(KP980785280, T4i, KP195090322 * T4j); + T4u = FMA(KP980785280, T4l, KP195090322 * T4m); + T4v = T4t - T4u; + T4A = T4t + T4u; + } + { + E T4k, T4n, T4q, T4r; + T4k = FNMS(KP980785280, T4j, KP195090322 * T4i); + T4n = FNMS(KP980785280, T4m, KP195090322 * T4l); + T4o = T4k + T4n; + T4D = T4k - T4n; + T4q = T3V + T3U; + T4r = T3y - T3B; + T4s = T4q - T4r; + T4C = T4r + T4q; + } + { + E T4p, T4w, T4F, T4G; + T4p = T4h + T4o; + T4w = T4s - T4v; + R1[WS(rs, 3)] = FMA(KP1_883088130, T4p, KP673779706 * T4w); + R1[WS(rs, 19)] = FNMS(KP673779706, T4p, KP1_883088130 * T4w); + T4F = T4z + T4A; + T4G = T4D + T4C; + R1[WS(rs, 15)] = FMA(KP098135348, T4F, KP1_997590912 * T4G); + R1[WS(rs, 31)] = FNMS(KP1_997590912, T4F, KP098135348 * T4G); + } + { + E T4x, T4y, T4B, T4E; + T4x = T4h - T4o; + T4y = T4v + T4s; + R1[WS(rs, 11)] = FMA(KP855110186, T4x, KP1_807978586 * T4y); + R1[WS(rs, 27)] = FNMS(KP1_807978586, T4x, KP855110186 * T4y); + T4B = T4z - T4A; + T4E = T4C - T4D; + R1[WS(rs, 7)] = FMA(KP1_481902250, T4B, KP1_343117909 * T4E); + R1[WS(rs, 23)] = FNMS(KP1_343117909, T4B, KP1_481902250 * T4E); + } + } + { + E T1F, T2T, T2P, T2W, T2y, T2X, T2C, T2U; + { + E T1h, T1E, T2F, T2O; + T1h = T15 + T1g; + T1E = T1s + T1D; + T1F = T1h + T1E; + T2T = T1h - T1E; + T2F = T2D + T2E; + T2O = T2I + T2N; + T2P = T2F + T2O; + T2W = T2F - T2O; + } + { + E T26, T2x, T2A, T2B; + T26 = FNMS(KP195090322, T25, KP980785280 * T1W); + T2x = FMA(KP980785280, T2n, KP195090322 * T2w); + T2y = T26 + T2x; + T2X = T26 - T2x; + T2A = FMA(KP195090322, T1W, KP980785280 * T25); + T2B = FNMS(KP195090322, T2n, KP980785280 * T2w); + T2C = T2A + T2B; + T2U = T2B - T2A; + } + { + E T2z, T2Q, T2Z, T30; + T2z = T1F + T2y; + T2Q = T2C + T2P; + R1[0] = FNMS(KP098135348, T2Q, KP1_997590912 * T2z); + R1[WS(rs, 16)] = -(FMA(KP098135348, T2z, KP1_997590912 * T2Q)); + T2Z = T2T - T2U; + T30 = T2X + T2W; + R1[WS(rs, 12)] = FMA(KP673779706, T2Z, KP1_883088130 * T30); + R1[WS(rs, 28)] = FNMS(KP1_883088130, T2Z, KP673779706 * T30); + } + { + E T2R, T2S, T2V, T2Y; + T2R = T1F - T2y; + T2S = T2C - T2P; + R1[WS(rs, 8)] = FMA(KP1_343117909, T2R, KP1_481902250 * T2S); + R1[WS(rs, 24)] = FNMS(KP1_481902250, T2R, KP1_343117909 * T2S); + T2V = T2T + T2U; + T2Y = T2W - T2X; + R1[WS(rs, 4)] = FMA(KP1_807978586, T2V, KP855110186 * T2Y); + R1[WS(rs, 20)] = FNMS(KP855110186, T2V, KP1_807978586 * T2Y); + } + } + { + E T3D, T47, T43, T48, T3S, T4b, T40, T4a; + { + E T3v, T3C, T41, T42; + T3v = T3t - T3u; + T3C = T3y + T3B; + T3D = T3v + T3C; + T47 = T3v - T3C; + T41 = FMA(KP555570233, T3G, KP831469612 * T3J); + T42 = FNMS(KP555570233, T3N, KP831469612 * T3Q); + T43 = T41 + T42; + T48 = T42 - T41; + } + { + E T3K, T3R, T3W, T3Z; + T3K = FNMS(KP555570233, T3J, KP831469612 * T3G); + T3R = FMA(KP831469612, T3N, KP555570233 * T3Q); + T3S = T3K + T3R; + T4b = T3K - T3R; + T3W = T3U - T3V; + T3Z = T3X - T3Y; + T40 = T3W - T3Z; + T4a = T3Z + T3W; + } + { + E T3T, T44, T4d, T4e; + T3T = T3D + T3S; + T44 = T40 - T43; + R1[WS(rs, 1)] = FMA(KP1_978353019, T3T, KP293460948 * T44); + R1[WS(rs, 17)] = FNMS(KP293460948, T3T, KP1_978353019 * T44); + T4d = T47 - T48; + T4e = T4b + T4a; + R1[WS(rs, 13)] = FMA(KP485960359, T4d, KP1_940062506 * T4e); + R1[WS(rs, 29)] = FNMS(KP1_940062506, T4d, KP485960359 * T4e); + } + { + E T45, T46, T49, T4c; + T45 = T3D - T3S; + T46 = T43 + T40; + R1[WS(rs, 9)] = FMA(KP1_191398608, T45, KP1_606415062 * T46); + R1[WS(rs, 25)] = FNMS(KP1_606415062, T45, KP1_191398608 * T46); + T49 = T47 + T48; + T4c = T4a - T4b; + R1[WS(rs, 5)] = FMA(KP1_715457220, T49, KP1_028205488 * T4c); + R1[WS(rs, 21)] = FNMS(KP1_028205488, T49, KP1_715457220 * T4c); + } + } + } + } +} + +static const kr2c_desc desc = { 64, "r2cbIII_64", {342, 116, 92, 0}, &GENUS }; + +void X(codelet_r2cbIII_64) (planner *p) { + X(kr2c_register) (p, r2cbIII_64, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:31 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 7 -name r2cbIII_7 -dft-III -include r2cbIII.h */ + +/* + * This function contains 24 FP additions, 22 FP multiplications, + * (or, 2 additions, 0 multiplications, 22 fused multiply/add), + * 31 stack variables, 7 constants, and 14 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_7(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_949855824, +1.949855824363647214036263365987862434465571601); + DK(KP1_801937735, +1.801937735804838252472204639014890102331838324); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP692021471, +0.692021471630095869627814897002069140197260599); + DK(KP801937735, +0.801937735804838252472204639014890102331838324); + DK(KP356895867, +0.356895867892209443894399510021300583399127187); + DK(KP554958132, +0.554958132087371191422194871006410481067288862); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(28, rs), MAKE_VOLATILE_STRIDE(28, csr), MAKE_VOLATILE_STRIDE(28, csi)) { + E Tn, Td, Tg, Ti, Tl, T8; + { + E T1, T9, Tb, Ta, T2, T4, Th, Tm, Tc, T3, Te; + T1 = Cr[WS(csr, 3)]; + T9 = Ci[WS(csi, 1)]; + Tb = Ci[0]; + Ta = Ci[WS(csi, 2)]; + T2 = Cr[WS(csr, 2)]; + T4 = Cr[0]; + Th = FMA(KP554958132, T9, Tb); + Tm = FNMS(KP554958132, Ta, T9); + Tc = FMA(KP554958132, Tb, Ta); + T3 = Cr[WS(csr, 1)]; + Te = FNMS(KP356895867, T2, T4); + Tn = FNMS(KP801937735, Tm, Tb); + { + E Tf, Tk, T7, T5, Tj, T6; + Td = FMA(KP801937735, Tc, T9); + T5 = T2 + T3 + T4; + Tj = FNMS(KP356895867, T4, T3); + T6 = FNMS(KP356895867, T3, T2); + Tf = FNMS(KP692021471, Te, T3); + R0[0] = FMA(KP2_000000000, T5, T1); + Tk = FNMS(KP692021471, Tj, T2); + T7 = FNMS(KP692021471, T6, T4); + Tg = FNMS(KP1_801937735, Tf, T1); + Ti = FNMS(KP801937735, Th, Ta); + Tl = FNMS(KP1_801937735, Tk, T1); + T8 = FNMS(KP1_801937735, T7, T1); + } + } + R1[WS(rs, 2)] = FMS(KP1_949855824, Ti, Tg); + R0[WS(rs, 1)] = FMA(KP1_949855824, Ti, Tg); + R0[WS(rs, 2)] = FNMS(KP1_949855824, Tn, Tl); + R1[WS(rs, 1)] = -(FMA(KP1_949855824, Tn, Tl)); + R0[WS(rs, 3)] = FNMS(KP1_949855824, Td, T8); + R1[0] = -(FMA(KP1_949855824, Td, T8)); + } + } +} + +static const kr2c_desc desc = { 7, "r2cbIII_7", {2, 0, 22, 0}, &GENUS }; + +void X(codelet_r2cbIII_7) (planner *p) { + X(kr2c_register) (p, r2cbIII_7, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 7 -name r2cbIII_7 -dft-III -include r2cbIII.h */ + +/* + * This function contains 24 FP additions, 19 FP multiplications, + * (or, 9 additions, 4 multiplications, 15 fused multiply/add), + * 21 stack variables, 7 constants, and 14 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_7(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_246979603, +1.246979603717467061050009768008479621264549462); + DK(KP1_801937735, +1.801937735804838252472204639014890102331838324); + DK(KP445041867, +0.445041867912628808577805128993589518932711138); + DK(KP867767478, +0.867767478235116240951536665696717509219981456); + DK(KP1_949855824, +1.949855824363647214036263365987862434465571601); + DK(KP1_563662964, +1.563662964936059617416889053348115500464669037); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(28, rs), MAKE_VOLATILE_STRIDE(28, csr), MAKE_VOLATILE_STRIDE(28, csi)) { + E T9, Td, Tb, T1, T4, T2, T3, T5, Tc, Ta, T6, T8, T7; + T6 = Ci[WS(csi, 2)]; + T8 = Ci[0]; + T7 = Ci[WS(csi, 1)]; + T9 = FMA(KP1_563662964, T6, KP1_949855824 * T7) + (KP867767478 * T8); + Td = FNMS(KP1_949855824, T8, KP1_563662964 * T7) - (KP867767478 * T6); + Tb = FNMS(KP1_563662964, T8, KP1_949855824 * T6) - (KP867767478 * T7); + T1 = Cr[WS(csr, 3)]; + T4 = Cr[0]; + T2 = Cr[WS(csr, 2)]; + T3 = Cr[WS(csr, 1)]; + T5 = FMA(KP445041867, T3, KP1_801937735 * T4) + FNMA(KP1_246979603, T2, T1); + Tc = FMA(KP1_801937735, T2, KP445041867 * T4) + FNMA(KP1_246979603, T3, T1); + Ta = FMA(KP1_246979603, T4, T1) + FNMA(KP1_801937735, T3, KP445041867 * T2); + R1[0] = T5 - T9; + R0[WS(rs, 3)] = -(T5 + T9); + R0[WS(rs, 2)] = Td - Tc; + R1[WS(rs, 1)] = Tc + Td; + R1[WS(rs, 2)] = Tb - Ta; + R0[WS(rs, 1)] = Ta + Tb; + R0[0] = FMA(KP2_000000000, T2 + T3 + T4, T1); + } + } +} + +static const kr2c_desc desc = { 7, "r2cbIII_7", {9, 4, 15, 0}, &GENUS }; + +void X(codelet_r2cbIII_7) (planner *p) { + X(kr2c_register) (p, r2cbIII_7, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:33 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 8 -name r2cbIII_8 -dft-III -include r2cbIII.h */ + +/* + * This function contains 22 FP additions, 12 FP multiplications, + * (or, 18 additions, 8 multiplications, 4 fused multiply/add), + * 23 stack variables, 4 constants, and 16 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_8(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(32, rs), MAKE_VOLATILE_STRIDE(32, csr), MAKE_VOLATILE_STRIDE(32, csi)) { + E T4, T7, T3, Tl, Tf, T5, T8, T9, T6, Tc; + { + E T1, T2, Td, Te; + T1 = Cr[0]; + T2 = Cr[WS(csr, 3)]; + Td = Ci[0]; + Te = Ci[WS(csi, 3)]; + T4 = Cr[WS(csr, 2)]; + T7 = T1 - T2; + T3 = T1 + T2; + Tl = Te - Td; + Tf = Td + Te; + T5 = Cr[WS(csr, 1)]; + T8 = Ci[WS(csi, 2)]; + T9 = Ci[WS(csi, 1)]; + } + T6 = T4 + T5; + Tc = T4 - T5; + { + E Ta, Tk, Tg, Th; + Ta = T8 + T9; + Tk = T8 - T9; + Tg = Tc + Tf; + Th = Tc - Tf; + { + E Tj, Tm, Tb, Ti; + Tj = T3 - T6; + R0[0] = KP2_000000000 * (T3 + T6); + Tm = Tk + Tl; + R0[WS(rs, 2)] = KP2_000000000 * (Tl - Tk); + Tb = T7 - Ta; + Ti = T7 + Ta; + R0[WS(rs, 3)] = KP1_414213562 * (Tm - Tj); + R0[WS(rs, 1)] = KP1_414213562 * (Tj + Tm); + R1[WS(rs, 3)] = -(KP1_847759065 * (FNMS(KP414213562, Th, Ti))); + R1[WS(rs, 1)] = KP1_847759065 * (FMA(KP414213562, Ti, Th)); + R1[WS(rs, 2)] = -(KP1_847759065 * (FMA(KP414213562, Tb, Tg))); + R1[0] = KP1_847759065 * (FNMS(KP414213562, Tg, Tb)); + } + } + } + } +} + +static const kr2c_desc desc = { 8, "r2cbIII_8", {18, 8, 4, 0}, &GENUS }; + +void X(codelet_r2cbIII_8) (planner *p) { + X(kr2c_register) (p, r2cbIII_8, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 8 -name r2cbIII_8 -dft-III -include r2cbIII.h */ + +/* + * This function contains 22 FP additions, 12 FP multiplications, + * (or, 18 additions, 8 multiplications, 4 fused multiply/add), + * 19 stack variables, 4 constants, and 16 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_8(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP765366864, +0.765366864730179543456919968060797733522689125); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(32, rs), MAKE_VOLATILE_STRIDE(32, csr), MAKE_VOLATILE_STRIDE(32, csi)) { + E T3, T7, Tf, Tl, T6, Tc, Ta, Tk, Tb, Tg; + { + E T1, T2, Td, Te; + T1 = Cr[0]; + T2 = Cr[WS(csr, 3)]; + T3 = T1 + T2; + T7 = T1 - T2; + Td = Ci[0]; + Te = Ci[WS(csi, 3)]; + Tf = Td + Te; + Tl = Te - Td; + } + { + E T4, T5, T8, T9; + T4 = Cr[WS(csr, 2)]; + T5 = Cr[WS(csr, 1)]; + T6 = T4 + T5; + Tc = T4 - T5; + T8 = Ci[WS(csi, 2)]; + T9 = Ci[WS(csi, 1)]; + Ta = T8 + T9; + Tk = T8 - T9; + } + R0[0] = KP2_000000000 * (T3 + T6); + R0[WS(rs, 2)] = KP2_000000000 * (Tl - Tk); + Tb = T7 - Ta; + Tg = Tc + Tf; + R1[0] = FNMS(KP765366864, Tg, KP1_847759065 * Tb); + R1[WS(rs, 2)] = -(FMA(KP765366864, Tb, KP1_847759065 * Tg)); + { + E Th, Ti, Tj, Tm; + Th = T7 + Ta; + Ti = Tc - Tf; + R1[WS(rs, 1)] = FMA(KP765366864, Th, KP1_847759065 * Ti); + R1[WS(rs, 3)] = FNMS(KP1_847759065, Th, KP765366864 * Ti); + Tj = T3 - T6; + Tm = Tk + Tl; + R0[WS(rs, 1)] = KP1_414213562 * (Tj + Tm); + R0[WS(rs, 3)] = KP1_414213562 * (Tm - Tj); + } + } + } +} + +static const kr2c_desc desc = { 8, "r2cbIII_8", {18, 8, 4, 0}, &GENUS }; + +void X(codelet_r2cbIII_8) (planner *p) { + X(kr2c_register) (p, r2cbIII_8, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cbIII_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:33 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 9 -name r2cbIII_9 -dft-III -include r2cbIII.h */ + +/* + * This function contains 32 FP additions, 24 FP multiplications, + * (or, 8 additions, 0 multiplications, 24 fused multiply/add), + * 40 stack variables, 12 constants, and 18 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_9(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_326827896, +1.326827896337876792410842639271782594433726619); + DK(KP1_705737063, +1.705737063904886419256501927880148143872040591); + DK(KP766044443, +0.766044443118978035202392650555416673935832457); + DK(KP1_532088886, +1.532088886237956070404785301110833347871664914); + DK(KP984807753, +0.984807753012208059366743024589523013670643252); + DK(KP1_969615506, +1.969615506024416118733486049179046027341286503); + DK(KP839099631, +0.839099631177280011763127298123181364687434283); + DK(KP176326980, +0.176326980708464973471090386868618986121633062); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(36, rs), MAKE_VOLATILE_STRIDE(36, csr), MAKE_VOLATILE_STRIDE(36, csi)) { + E T4, Td, T3, Th, Tr, Tm, T7, Tc, Tj, Tg, T1, T2; + Tg = Ci[WS(csi, 1)]; + T1 = Cr[WS(csr, 4)]; + T2 = Cr[WS(csr, 1)]; + T4 = Cr[WS(csr, 3)]; + Td = Ci[WS(csi, 3)]; + { + E T5, Tf, T6, Ta, Tb; + T5 = Cr[0]; + Tf = T2 - T1; + T3 = FMA(KP2_000000000, T2, T1); + T6 = Cr[WS(csr, 2)]; + Ta = Ci[WS(csi, 2)]; + Tb = Ci[0]; + Th = FNMS(KP1_732050807, Tg, Tf); + Tr = FMA(KP1_732050807, Tg, Tf); + Tm = T5 - T6; + T7 = T5 + T6; + Tc = Ta - Tb; + Tj = Tb + Ta; + } + { + E Tw, Tq, Tv, Tp, Ti, T8; + Ti = FNMS(KP500000000, T7, T4); + T8 = T4 + T7; + { + E Te, Tl, Tt, Tk, T9; + Te = Tc - Td; + Tl = FMA(KP500000000, Tc, Td); + Tt = FNMS(KP866025403, Tj, Ti); + Tk = FMA(KP866025403, Tj, Ti); + T9 = T8 - T3; + R0[0] = FMA(KP2_000000000, T8, T3); + { + E Ts, Tn, Tu, To; + Ts = FMA(KP866025403, Tm, Tl); + Tn = FNMS(KP866025403, Tm, Tl); + R0[WS(rs, 3)] = FMS(KP1_732050807, Te, T9); + R1[WS(rs, 1)] = FMA(KP1_732050807, Te, T9); + Tu = FMA(KP176326980, Tt, Ts); + Tw = FNMS(KP176326980, Ts, Tt); + To = FMA(KP839099631, Tn, Tk); + Tq = FNMS(KP839099631, Tk, Tn); + R0[WS(rs, 1)] = FMS(KP1_969615506, Tu, Tr); + Tv = FMA(KP984807753, Tu, Tr); + R1[0] = FNMS(KP1_532088886, To, Th); + Tp = FMA(KP766044443, To, Th); + } + } + R0[WS(rs, 4)] = FMS(KP1_705737063, Tw, Tv); + R1[WS(rs, 2)] = FMA(KP1_705737063, Tw, Tv); + R0[WS(rs, 2)] = FMS(KP1_326827896, Tq, Tp); + R1[WS(rs, 3)] = FMA(KP1_326827896, Tq, Tp); + } + } + } +} + +static const kr2c_desc desc = { 9, "r2cbIII_9", {8, 0, 24, 0}, &GENUS }; + +void X(codelet_r2cbIII_9) (planner *p) { + X(kr2c_register) (p, r2cbIII_9, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 9 -name r2cbIII_9 -dft-III -include r2cbIII.h */ + +/* + * This function contains 32 FP additions, 18 FP multiplications, + * (or, 22 additions, 8 multiplications, 10 fused multiply/add), + * 35 stack variables, 12 constants, and 18 memory accesses + */ +#include "r2cbIII.h" + +static void r2cbIII_9(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP642787609, +0.642787609686539326322643409907263432907559884); + DK(KP766044443, +0.766044443118978035202392650555416673935832457); + DK(KP1_326827896, +1.326827896337876792410842639271782594433726619); + DK(KP1_113340798, +1.113340798452838732905825904094046265936583811); + DK(KP984807753, +0.984807753012208059366743024589523013670643252); + DK(KP173648177, +0.173648177666930348851716626769314796000375677); + DK(KP1_705737063, +1.705737063904886419256501927880148143872040591); + DK(KP300767466, +0.300767466360870593278543795225003852144476517); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(36, rs), MAKE_VOLATILE_STRIDE(36, csr), MAKE_VOLATILE_STRIDE(36, csi)) { + E T3, Ts, Ti, Td, Tc, T8, To, Tu, Tl, Tt, T9, Te; + { + E Th, T1, T2, Tf, Tg; + Tg = Ci[WS(csi, 1)]; + Th = KP1_732050807 * Tg; + T1 = Cr[WS(csr, 4)]; + T2 = Cr[WS(csr, 1)]; + Tf = T2 - T1; + T3 = FMA(KP2_000000000, T2, T1); + Ts = Tf - Th; + Ti = Tf + Th; + } + { + E T4, T7, Tm, Tk, Tn, Tj; + T4 = Cr[WS(csr, 3)]; + Td = Ci[WS(csi, 3)]; + { + E T5, T6, Ta, Tb; + T5 = Cr[0]; + T6 = Cr[WS(csr, 2)]; + T7 = T5 + T6; + Tm = KP866025403 * (T6 - T5); + Ta = Ci[WS(csi, 2)]; + Tb = Ci[0]; + Tc = Ta - Tb; + Tk = KP866025403 * (Tb + Ta); + } + T8 = T4 + T7; + Tn = FMA(KP500000000, Tc, Td); + To = Tm - Tn; + Tu = Tm + Tn; + Tj = FMS(KP500000000, T7, T4); + Tl = Tj + Tk; + Tt = Tj - Tk; + } + R0[0] = FMA(KP2_000000000, T8, T3); + T9 = T8 - T3; + Te = KP1_732050807 * (Tc - Td); + R1[WS(rs, 1)] = T9 + Te; + R0[WS(rs, 3)] = Te - T9; + { + E Tr, Tp, Tq, Tx, Tv, Tw; + Tr = FNMS(KP1_705737063, Tl, KP300767466 * To); + Tp = FMA(KP173648177, Tl, KP984807753 * To); + Tq = Ti - Tp; + R0[WS(rs, 1)] = -(FMA(KP2_000000000, Tp, Ti)); + R0[WS(rs, 4)] = Tr - Tq; + R1[WS(rs, 2)] = Tq + Tr; + Tx = FMA(KP1_113340798, Tt, KP1_326827896 * Tu); + Tv = FNMS(KP642787609, Tu, KP766044443 * Tt); + Tw = Tv - Ts; + R1[0] = FMA(KP2_000000000, Tv, Ts); + R1[WS(rs, 3)] = Tx - Tw; + R0[WS(rs, 2)] = Tw + Tx; + } + } + } +} + +static const kr2c_desc desc = { 9, "r2cbIII_9", {22, 8, 10, 0}, &GENUS }; + +void X(codelet_r2cbIII_9) (planner *p) { + X(kr2c_register) (p, r2cbIII_9, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -name r2cb_10 -include r2cb.h */ + +/* + * This function contains 34 FP additions, 20 FP multiplications, + * (or, 14 additions, 0 multiplications, 20 fused multiply/add), + * 30 stack variables, 5 constants, and 20 memory accesses + */ +#include "r2cb.h" + +static void r2cb_10(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); + DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(40, rs), MAKE_VOLATILE_STRIDE(40, csr), MAKE_VOLATILE_STRIDE(40, csi)) { + E Tb, T3, Tc, T6, Tq, To, Ty, Tw, Td, T9; + { + E Tu, Tn, T7, Tv, Tk, T8; + { + E T1, T2, Tl, Tm; + T1 = Cr[0]; + T2 = Cr[WS(csr, 5)]; + Tl = Ci[WS(csi, 2)]; + Tm = Ci[WS(csi, 3)]; + { + E Ti, Tj, T4, T5; + Ti = Ci[WS(csi, 4)]; + Tb = T1 + T2; + T3 = T1 - T2; + Tu = Tl + Tm; + Tn = Tl - Tm; + Tj = Ci[WS(csi, 1)]; + T4 = Cr[WS(csr, 2)]; + T5 = Cr[WS(csr, 3)]; + T7 = Cr[WS(csr, 4)]; + Tv = Ti + Tj; + Tk = Ti - Tj; + Tc = T4 + T5; + T6 = T4 - T5; + T8 = Cr[WS(csr, 1)]; + } + } + Tq = FMA(KP618033988, Tk, Tn); + To = FNMS(KP618033988, Tn, Tk); + Ty = FNMS(KP618033988, Tu, Tv); + Tw = FMA(KP618033988, Tv, Tu); + Td = T7 + T8; + T9 = T7 - T8; + } + { + E Te, Tg, Ta, Ts, Tf, Tr; + Te = Tc + Td; + Tg = Tc - Td; + Ta = T6 + T9; + Ts = T6 - T9; + Tf = FNMS(KP500000000, Te, Tb); + R0[0] = FMA(KP2_000000000, Te, Tb); + Tr = FNMS(KP500000000, Ta, T3); + R1[WS(rs, 2)] = FMA(KP2_000000000, Ta, T3); + { + E Th, Tp, Tt, Tx; + Th = FNMS(KP1_118033988, Tg, Tf); + Tp = FMA(KP1_118033988, Tg, Tf); + Tt = FMA(KP1_118033988, Ts, Tr); + Tx = FNMS(KP1_118033988, Ts, Tr); + R0[WS(rs, 3)] = FNMS(KP1_902113032, Tq, Tp); + R0[WS(rs, 2)] = FMA(KP1_902113032, Tq, Tp); + R0[WS(rs, 1)] = FMA(KP1_902113032, To, Th); + R0[WS(rs, 4)] = FNMS(KP1_902113032, To, Th); + R1[WS(rs, 1)] = FNMS(KP1_902113032, Ty, Tx); + R1[WS(rs, 3)] = FMA(KP1_902113032, Ty, Tx); + R1[WS(rs, 4)] = FMA(KP1_902113032, Tw, Tt); + R1[0] = FNMS(KP1_902113032, Tw, Tt); + } + } + } + } +} + +static const kr2c_desc desc = { 10, "r2cb_10", {14, 0, 20, 0}, &GENUS }; + +void X(codelet_r2cb_10) (planner *p) { + X(kr2c_register) (p, r2cb_10, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -name r2cb_10 -include r2cb.h */ + +/* + * This function contains 34 FP additions, 14 FP multiplications, + * (or, 26 additions, 6 multiplications, 8 fused multiply/add), + * 26 stack variables, 5 constants, and 20 memory accesses + */ +#include "r2cb.h" + +static void r2cb_10(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); + DK(KP1_175570504, +1.175570504584946258337411909278145537195304875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(40, rs), MAKE_VOLATILE_STRIDE(40, csr), MAKE_VOLATILE_STRIDE(40, csi)) { + E T3, Tb, Tn, Tv, Tk, Tu, Ta, Ts, Te, Tg, Ti, Tj; + { + E T1, T2, Tl, Tm; + T1 = Cr[0]; + T2 = Cr[WS(csr, 5)]; + T3 = T1 - T2; + Tb = T1 + T2; + Tl = Ci[WS(csi, 4)]; + Tm = Ci[WS(csi, 1)]; + Tn = Tl - Tm; + Tv = Tl + Tm; + } + Ti = Ci[WS(csi, 2)]; + Tj = Ci[WS(csi, 3)]; + Tk = Ti - Tj; + Tu = Ti + Tj; + { + E T6, Tc, T9, Td; + { + E T4, T5, T7, T8; + T4 = Cr[WS(csr, 2)]; + T5 = Cr[WS(csr, 3)]; + T6 = T4 - T5; + Tc = T4 + T5; + T7 = Cr[WS(csr, 4)]; + T8 = Cr[WS(csr, 1)]; + T9 = T7 - T8; + Td = T7 + T8; + } + Ta = T6 + T9; + Ts = KP1_118033988 * (T6 - T9); + Te = Tc + Td; + Tg = KP1_118033988 * (Tc - Td); + } + R1[WS(rs, 2)] = FMA(KP2_000000000, Ta, T3); + R0[0] = FMA(KP2_000000000, Te, Tb); + { + E To, Tq, Th, Tp, Tf; + To = FNMS(KP1_902113032, Tn, KP1_175570504 * Tk); + Tq = FMA(KP1_902113032, Tk, KP1_175570504 * Tn); + Tf = FNMS(KP500000000, Te, Tb); + Th = Tf - Tg; + Tp = Tg + Tf; + R0[WS(rs, 1)] = Th - To; + R0[WS(rs, 2)] = Tp + Tq; + R0[WS(rs, 4)] = Th + To; + R0[WS(rs, 3)] = Tp - Tq; + } + { + E Tw, Ty, Tt, Tx, Tr; + Tw = FNMS(KP1_902113032, Tv, KP1_175570504 * Tu); + Ty = FMA(KP1_902113032, Tu, KP1_175570504 * Tv); + Tr = FNMS(KP500000000, Ta, T3); + Tt = Tr - Ts; + Tx = Ts + Tr; + R1[WS(rs, 3)] = Tt - Tw; + R1[WS(rs, 4)] = Tx + Ty; + R1[WS(rs, 1)] = Tt + Tw; + R1[0] = Tx - Ty; + } + } + } +} + +static const kr2c_desc desc = { 10, "r2cb_10", {26, 6, 8, 0}, &GENUS }; + +void X(codelet_r2cb_10) (planner *p) { + X(kr2c_register) (p, r2cb_10, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_11.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_11.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 11 -name r2cb_11 -include r2cb.h */ + +/* + * This function contains 60 FP additions, 56 FP multiplications, + * (or, 4 additions, 0 multiplications, 56 fused multiply/add), + * 53 stack variables, 11 constants, and 22 memory accesses + */ +#include "r2cb.h" + +static void r2cb_11(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_979642883, +1.979642883761865464752184075553437574753038744); + DK(KP1_918985947, +1.918985947228994779780736114132655398124909697); + DK(KP876768831, +0.876768831002589333891339807079336796764054852); + DK(KP918985947, +0.918985947228994779780736114132655398124909697); + DK(KP778434453, +0.778434453334651800608337670740821884709317477); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP634356270, +0.634356270682424498893150776899916060542806975); + DK(KP342584725, +0.342584725681637509502641509861112333758894680); + DK(KP830830026, +0.830830026003772851058548298459246407048009821); + DK(KP715370323, +0.715370323453429719112414662767260662417897278); + DK(KP521108558, +0.521108558113202722944698153526659300680427422); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(44, rs), MAKE_VOLATILE_STRIDE(44, csr), MAKE_VOLATILE_STRIDE(44, csi)) { + E Tf, Tq, Tt, Tu; + { + E T1, Td, Th, Te, Tg, T2, Ts, TK, TB, TT, Tj, T6, T3, T4, T5; + E Tr; + T1 = Cr[0]; + Td = Ci[WS(csi, 3)]; + Th = Ci[WS(csi, 5)]; + Te = Ci[WS(csi, 2)]; + Tf = Ci[WS(csi, 4)]; + Tg = Ci[WS(csi, 1)]; + Tr = FMA(KP521108558, Td, Th); + T2 = Cr[WS(csr, 1)]; + { + E TJ, TA, TS, Ti; + TJ = FMA(KP521108558, Tf, Td); + TA = FNMS(KP521108558, Te, Tf); + TS = FMS(KP521108558, Tg, Te); + Ti = FMA(KP521108558, Th, Tg); + Ts = FNMS(KP715370323, Tr, Te); + TK = FMA(KP715370323, TJ, Tg); + TB = FMA(KP715370323, TA, Th); + TT = FMA(KP715370323, TS, Td); + Tj = FMA(KP715370323, Ti, Tf); + T6 = Cr[WS(csr, 5)]; + } + T3 = Cr[WS(csr, 2)]; + T4 = Cr[WS(csr, 3)]; + T5 = Cr[WS(csr, 4)]; + { + E TG, Tx, To, Tl, Tb, TU, TQ, TP, Ta; + { + E Tk, TE, Tv, T8; + Tk = FMA(KP830830026, Tj, Te); + TE = FNMS(KP342584725, T3, T6); + Tv = FNMS(KP342584725, T2, T4); + T8 = FNMS(KP342584725, T4, T3); + { + E T7, Tm, TN, TF; + T7 = T2 + T3 + T4 + T5 + T6; + Tm = FNMS(KP342584725, T5, T2); + TN = FNMS(KP342584725, T6, T5); + TF = FNMS(KP634356270, TE, T2); + { + E Tw, T9, Tn, TO; + Tw = FNMS(KP634356270, Tv, T6); + T9 = FNMS(KP634356270, T8, T5); + R0[0] = FMA(KP2_000000000, T7, T1); + Tn = FNMS(KP634356270, Tm, T3); + TO = FNMS(KP634356270, TN, T4); + TG = FNMS(KP778434453, TF, T4); + Tx = FNMS(KP778434453, Tw, T5); + Ta = FNMS(KP778434453, T9, T2); + To = FNMS(KP778434453, Tn, T6); + TP = FNMS(KP778434453, TO, T3); + Tl = FMA(KP918985947, Tk, Td); + } + } + } + Tb = FNMS(KP876768831, Ta, T6); + TU = FNMS(KP830830026, TT, Tf); + TQ = FNMS(KP876768831, TP, T2); + { + E TI, TL, Ty, TC; + { + E Tc, TV, TR, TH; + TH = FNMS(KP876768831, TG, T5); + Tc = FNMS(KP1_918985947, Tb, T1); + TV = FNMS(KP918985947, TU, Th); + TR = FNMS(KP1_918985947, TQ, T1); + TI = FNMS(KP1_918985947, TH, T1); + R0[WS(rs, 5)] = FMA(KP1_979642883, Tl, Tc); + R1[0] = FNMS(KP1_979642883, Tl, Tc); + R0[WS(rs, 3)] = FMA(KP1_979642883, TV, TR); + R1[WS(rs, 2)] = FNMS(KP1_979642883, TV, TR); + TL = FNMS(KP830830026, TK, Th); + } + Ty = FNMS(KP876768831, Tx, T3); + TC = FNMS(KP830830026, TB, Td); + { + E TM, Tz, TD, Tp; + Tp = FNMS(KP876768831, To, T4); + TM = FMA(KP918985947, TL, Te); + Tz = FNMS(KP1_918985947, Ty, T1); + TD = FNMS(KP918985947, TC, Tg); + Tq = FNMS(KP1_918985947, Tp, T1); + R0[WS(rs, 2)] = FMA(KP1_979642883, TM, TI); + R1[WS(rs, 3)] = FNMS(KP1_979642883, TM, TI); + R0[WS(rs, 4)] = FMA(KP1_979642883, TD, Tz); + R1[WS(rs, 1)] = FNMS(KP1_979642883, TD, Tz); + Tt = FMA(KP830830026, Ts, Tg); + } + } + } + } + Tu = FNMS(KP918985947, Tt, Tf); + R0[WS(rs, 1)] = FMA(KP1_979642883, Tu, Tq); + R1[WS(rs, 4)] = FNMS(KP1_979642883, Tu, Tq); + } + } +} + +static const kr2c_desc desc = { 11, "r2cb_11", {4, 0, 56, 0}, &GENUS }; + +void X(codelet_r2cb_11) (planner *p) { + X(kr2c_register) (p, r2cb_11, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 11 -name r2cb_11 -include r2cb.h */ + +/* + * This function contains 60 FP additions, 51 FP multiplications, + * (or, 19 additions, 10 multiplications, 41 fused multiply/add), + * 33 stack variables, 11 constants, and 22 memory accesses + */ +#include "r2cb.h" + +static void r2cb_11(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_918985947, +1.918985947228994779780736114132655398124909697); + DK(KP1_309721467, +1.309721467890570128113850144932587106367582399); + DK(KP284629676, +0.284629676546570280887585337232739337582102722); + DK(KP830830026, +0.830830026003772851058548298459246407048009821); + DK(KP1_682507065, +1.682507065662362337723623297838735435026584997); + DK(KP563465113, +0.563465113682859395422835830693233798071555798); + DK(KP1_511499148, +1.511499148708516567548071687944688840359434890); + DK(KP1_979642883, +1.979642883761865464752184075553437574753038744); + DK(KP1_819263990, +1.819263990709036742823430766158056920120482102); + DK(KP1_081281634, +1.081281634911195164215271908637383390863541216); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(44, rs), MAKE_VOLATILE_STRIDE(44, csr), MAKE_VOLATILE_STRIDE(44, csi)) { + E Td, Tl, Tf, Th, Tj, T1, T2, T6, T5, T4, T3, T7, Tk, Te, Tg; + E Ti; + { + E T8, Tc, T9, Ta, Tb; + T8 = Ci[WS(csi, 2)]; + Tc = Ci[WS(csi, 1)]; + T9 = Ci[WS(csi, 4)]; + Ta = Ci[WS(csi, 5)]; + Tb = Ci[WS(csi, 3)]; + Td = FMA(KP1_081281634, T8, KP1_819263990 * T9) + FNMA(KP1_979642883, Ta, KP1_511499148 * Tb) - (KP563465113 * Tc); + Tl = FMA(KP1_979642883, T8, KP1_819263990 * Ta) + FNMA(KP563465113, T9, KP1_081281634 * Tb) - (KP1_511499148 * Tc); + Tf = FMA(KP563465113, T8, KP1_819263990 * Tb) + FNMA(KP1_511499148, Ta, KP1_081281634 * T9) - (KP1_979642883 * Tc); + Th = FMA(KP1_081281634, Tc, KP1_819263990 * T8) + FMA(KP1_979642883, Tb, KP1_511499148 * T9) + (KP563465113 * Ta); + Tj = FMA(KP563465113, Tb, KP1_979642883 * T9) + FNMS(KP1_511499148, T8, KP1_081281634 * Ta) - (KP1_819263990 * Tc); + } + T1 = Cr[0]; + T2 = Cr[WS(csr, 1)]; + T6 = Cr[WS(csr, 5)]; + T5 = Cr[WS(csr, 4)]; + T4 = Cr[WS(csr, 3)]; + T3 = Cr[WS(csr, 2)]; + T7 = FMA(KP1_682507065, T3, T1) + FNMS(KP284629676, T6, KP830830026 * T5) + FNMA(KP1_309721467, T4, KP1_918985947 * T2); + Tk = FMA(KP1_682507065, T4, T1) + FNMS(KP1_918985947, T5, KP830830026 * T6) + FNMA(KP284629676, T3, KP1_309721467 * T2); + Te = FMA(KP830830026, T4, T1) + FNMS(KP1_309721467, T6, KP1_682507065 * T5) + FNMA(KP1_918985947, T3, KP284629676 * T2); + Tg = FMA(KP1_682507065, T2, T1) + FNMS(KP1_918985947, T6, KP830830026 * T3) + FNMA(KP1_309721467, T5, KP284629676 * T4); + Ti = FMA(KP830830026, T2, T1) + FNMS(KP284629676, T5, KP1_682507065 * T6) + FNMA(KP1_918985947, T4, KP1_309721467 * T3); + R0[WS(rs, 3)] = T7 - Td; + R0[WS(rs, 4)] = Te - Tf; + R0[WS(rs, 2)] = Tk + Tl; + R1[WS(rs, 2)] = T7 + Td; + R1[WS(rs, 3)] = Tk - Tl; + R0[WS(rs, 1)] = Ti + Tj; + R1[WS(rs, 1)] = Te + Tf; + R0[WS(rs, 5)] = Tg + Th; + R1[0] = Tg - Th; + R1[WS(rs, 4)] = Ti - Tj; + R0[0] = FMA(KP2_000000000, T2 + T3 + T4 + T5 + T6, T1); + } + } +} + +static const kr2c_desc desc = { 11, "r2cb_11", {19, 10, 41, 0}, &GENUS }; + +void X(codelet_r2cb_11) (planner *p) { + X(kr2c_register) (p, r2cb_11, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -name r2cb_12 -include r2cb.h */ + +/* + * This function contains 38 FP additions, 16 FP multiplications, + * (or, 22 additions, 0 multiplications, 16 fused multiply/add), + * 31 stack variables, 2 constants, and 24 memory accesses + */ +#include "r2cb.h" + +static void r2cb_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) { + E Ts, Tr; + { + E Tz, Te, Tn, Tk, Tc, Tw, Ty, Th, T4, T3, Td, T5; + { + E T8, Tu, Tl, Tm, Tb, T9, Ta, T1, T2, Tv; + T8 = Cr[WS(csr, 3)]; + T9 = Cr[WS(csr, 5)]; + Ta = Cr[WS(csr, 1)]; + Tu = Ci[WS(csi, 3)]; + Tl = Ci[WS(csi, 5)]; + Tm = Ci[WS(csi, 1)]; + Tb = T9 + Ta; + Tz = T9 - Ta; + Te = Ci[WS(csi, 4)]; + Tn = Tl - Tm; + Tv = Tl + Tm; + Tk = FNMS(KP2_000000000, T8, Tb); + Tc = T8 + Tb; + T1 = Cr[0]; + T2 = Cr[WS(csr, 4)]; + Tw = Tu - Tv; + Ty = FMA(KP2_000000000, Tu, Tv); + Th = Ci[WS(csi, 2)]; + T4 = Cr[WS(csr, 6)]; + T3 = FMA(KP2_000000000, T2, T1); + Td = T1 - T2; + T5 = Cr[WS(csr, 2)]; + } + { + E To, Tp, Tf, Tg, T6, TA, TC; + To = FMA(KP1_732050807, Tn, Tk); + Ts = FNMS(KP1_732050807, Tn, Tk); + Tp = FNMS(KP1_732050807, Te, Td); + Tf = FMA(KP1_732050807, Te, Td); + Tg = T4 - T5; + T6 = FMA(KP2_000000000, T5, T4); + TA = FMA(KP1_732050807, Tz, Ty); + TC = FNMS(KP1_732050807, Tz, Ty); + { + E Tt, T7, Ti, Tq, Tj, TB, Tx; + Tt = T3 - T6; + T7 = T3 + T6; + Ti = FNMS(KP1_732050807, Th, Tg); + Tq = FMA(KP1_732050807, Th, Tg); + R0[0] = FMA(KP2_000000000, Tc, T7); + R0[WS(rs, 3)] = FNMS(KP2_000000000, Tc, T7); + Tj = Tf + Ti; + TB = Tf - Ti; + Tr = Tp + Tq; + Tx = Tp - Tq; + R1[WS(rs, 5)] = TB + TC; + R1[WS(rs, 2)] = TB - TC; + R0[WS(rs, 4)] = Tj - To; + R0[WS(rs, 1)] = Tj + To; + R1[WS(rs, 3)] = Tx + TA; + R1[0] = Tx - TA; + R1[WS(rs, 4)] = FNMS(KP2_000000000, Tw, Tt); + R1[WS(rs, 1)] = FMA(KP2_000000000, Tw, Tt); + } + } + } + R0[WS(rs, 2)] = Tr - Ts; + R0[WS(rs, 5)] = Tr + Ts; + } + } +} + +static const kr2c_desc desc = { 12, "r2cb_12", {22, 0, 16, 0}, &GENUS }; + +void X(codelet_r2cb_12) (planner *p) { + X(kr2c_register) (p, r2cb_12, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -name r2cb_12 -include r2cb.h */ + +/* + * This function contains 38 FP additions, 10 FP multiplications, + * (or, 34 additions, 6 multiplications, 4 fused multiply/add), + * 25 stack variables, 2 constants, and 24 memory accesses + */ +#include "r2cb.h" + +static void r2cb_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) { + E T8, Tb, Tm, TA, Tw, Tx, Tp, TB, T3, Tr, Tg, T6, Ts, Tk; + { + E T9, Ta, Tn, To; + T8 = Cr[WS(csr, 3)]; + T9 = Cr[WS(csr, 5)]; + Ta = Cr[WS(csr, 1)]; + Tb = T9 + Ta; + Tm = FMS(KP2_000000000, T8, Tb); + TA = KP1_732050807 * (T9 - Ta); + Tw = Ci[WS(csi, 3)]; + Tn = Ci[WS(csi, 5)]; + To = Ci[WS(csi, 1)]; + Tx = Tn + To; + Tp = KP1_732050807 * (Tn - To); + TB = FMA(KP2_000000000, Tw, Tx); + } + { + E Tf, T1, T2, Td, Te; + Te = Ci[WS(csi, 4)]; + Tf = KP1_732050807 * Te; + T1 = Cr[0]; + T2 = Cr[WS(csr, 4)]; + Td = T1 - T2; + T3 = FMA(KP2_000000000, T2, T1); + Tr = Td - Tf; + Tg = Td + Tf; + } + { + E Tj, T4, T5, Th, Ti; + Ti = Ci[WS(csi, 2)]; + Tj = KP1_732050807 * Ti; + T4 = Cr[WS(csr, 6)]; + T5 = Cr[WS(csr, 2)]; + Th = T4 - T5; + T6 = FMA(KP2_000000000, T5, T4); + Ts = Th + Tj; + Tk = Th - Tj; + } + { + E T7, Tc, Tz, TC; + T7 = T3 + T6; + Tc = KP2_000000000 * (T8 + Tb); + R0[WS(rs, 3)] = T7 - Tc; + R0[0] = T7 + Tc; + { + E Tl, Tq, TD, TE; + Tl = Tg + Tk; + Tq = Tm - Tp; + R0[WS(rs, 1)] = Tl - Tq; + R0[WS(rs, 4)] = Tl + Tq; + TD = Tg - Tk; + TE = TB - TA; + R1[WS(rs, 2)] = TD - TE; + R1[WS(rs, 5)] = TD + TE; + } + Tz = Tr - Ts; + TC = TA + TB; + R1[0] = Tz - TC; + R1[WS(rs, 3)] = Tz + TC; + { + E Tv, Ty, Tt, Tu; + Tv = T3 - T6; + Ty = KP2_000000000 * (Tw - Tx); + R1[WS(rs, 4)] = Tv - Ty; + R1[WS(rs, 1)] = Tv + Ty; + Tt = Tr + Ts; + Tu = Tm + Tp; + R0[WS(rs, 5)] = Tt - Tu; + R0[WS(rs, 2)] = Tt + Tu; + } + } + } + } +} + +static const kr2c_desc desc = { 12, "r2cb_12", {34, 6, 4, 0}, &GENUS }; + +void X(codelet_r2cb_12) (planner *p) { + X(kr2c_register) (p, r2cb_12, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_128.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_128.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3181 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 128 -name r2cb_128 -include r2cb.h */ + +/* + * This function contains 956 FP additions, 540 FP multiplications, + * (or, 416 additions, 0 multiplications, 540 fused multiply/add), + * 242 stack variables, 36 constants, and 256 memory accesses + */ +#include "r2cb.h" + +static void r2cb_128(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_715457220, +1.715457220000544139804539968569540274084981599); + DK(KP1_606415062, +1.606415062961289819613353025926283847759138854); + DK(KP599376933, +0.599376933681923766271389869014404232837890546); + DK(KP741650546, +0.741650546272035369581266691172079863842265220); + DK(KP1_978353019, +1.978353019929561946903347476032486127967379067); + DK(KP1_940062506, +1.940062506389087985207968414572200502913731924); + DK(KP148335987, +0.148335987538347428753676511486911367000625355); + DK(KP250486960, +0.250486960191305461595702160124721208578685568); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP1_807978586, +1.807978586246886663172400594461074097420264050); + DK(KP1_481902250, +1.481902250709918182351233794990325459457910619); + DK(KP472964775, +0.472964775891319928124438237972992463904131113); + DK(KP906347169, +0.906347169019147157946142717268914412664134293); + DK(KP1_997590912, +1.997590912410344785429543209518201388886407229); + DK(KP1_883088130, +1.883088130366041556825018805199004714371179592); + DK(KP049126849, +0.049126849769467254105343321271313617079695752); + DK(KP357805721, +0.357805721314524104672487743774474392487532769); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP1_763842528, +1.763842528696710059425513727320776699016885241); + DK(KP1_913880671, +1.913880671464417729871595773960539938965698411); + DK(KP534511135, +0.534511135950791641089685961295362908582039528); + DK(KP303346683, +0.303346683607342391675883946941299872384187453); + DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP1_990369453, +1.990369453344393772489673906218959843150949737); + DK(KP1_546020906, +1.546020906725473921621813219516939601942082586); + DK(KP098491403, +0.098491403357164253077197521291327432293052451); + DK(KP820678790, +0.820678790828660330972281985331011598767386482); + DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(512, rs), MAKE_VOLATILE_STRIDE(512, csr), MAKE_VOLATILE_STRIDE(512, csi)) { + E T9H, T9I, T9X, T9Y; + { + E Tdr, T9, Tcl, Ta9, T6b, T2d, T91, T7j, Tg, Tds, Tcm, Tae, T92, T7m, T6c; + E T2o, Tdu, Tw, Tco, Tap, TeM, Tdx, T6f, T2G, T6e, T2P, T94, T7t, Tcp, Tak; + E T95, T7q, TdM, T1i, TcL, TbD, Tf0, Te6, T6q, T42, T6B, T5t, T9r, T8j, TcA; + E TaY, T9g, T7S, TdA, TM, Tcv, TaN, TeP, TdI, T6i, T38, T6l, T3F, T9b, T7J; + E Tcs, Taw, T98, T7y, T1N, TeW, T6x, T4H, Te8, TdV, T6w, T4Q, T9j, T86, TcO; + E TcI, T9k, T83, TbI, Tbl, T22, TeV, Te0, Te9, T58, T6u, T6t, T5h, T9m, T8d; + E TcP, TcF, T9n, T8a, TbJ, Tbw, Te3, T1x, TcB, TbG, Tf1, TdP, T6C, T4p, T6r; + E T5w, T9h, T8m, TcM, Tb9, T9s, T7Z, TaB, TaG, TdF, T11, Tct, TaQ, TeQ, TdD; + E T6m, T3v, T7B, T7E, T6j, T3I, T99, T7M; + { + E TaU, TaX, T7Q, T7R, Tbk, Tbf; + { + E Td, T2e, Tc, Tab, T2m, Te, T2f, T2g; + { + E T7h, T27, T2c, T7i; + { + E T4, T26, T29, T25, T3, T28, T8, T2a; + T4 = Cr[WS(csr, 32)]; + T26 = Ci[WS(csi, 32)]; + { + E T1, T2, T6, T7; + T1 = Cr[0]; + T2 = Cr[WS(csr, 64)]; + T6 = Cr[WS(csr, 16)]; + T7 = Cr[WS(csr, 48)]; + T29 = Ci[WS(csi, 16)]; + T25 = T1 - T2; + T3 = T1 + T2; + T28 = T6 - T7; + T8 = T6 + T7; + T2a = Ci[WS(csi, 48)]; + } + { + E Ta7, T5, Ta8, T2b; + Ta7 = FNMS(KP2_000000000, T4, T3); + T5 = FMA(KP2_000000000, T4, T3); + T7h = FMA(KP2_000000000, T26, T25); + T27 = FNMS(KP2_000000000, T26, T25); + Ta8 = T29 - T2a; + T2b = T29 + T2a; + Tdr = FNMS(KP2_000000000, T8, T5); + T9 = FMA(KP2_000000000, T8, T5); + Tcl = FMA(KP2_000000000, Ta8, Ta7); + Ta9 = FNMS(KP2_000000000, Ta8, Ta7); + T2c = T28 - T2b; + T7i = T28 + T2b; + } + } + { + E Ta, Tb, T2k, T2l; + Ta = Cr[WS(csr, 8)]; + T6b = FNMS(KP1_414213562, T2c, T27); + T2d = FMA(KP1_414213562, T2c, T27); + T91 = FMA(KP1_414213562, T7i, T7h); + T7j = FNMS(KP1_414213562, T7i, T7h); + Tb = Cr[WS(csr, 56)]; + T2k = Ci[WS(csi, 8)]; + T2l = Ci[WS(csi, 56)]; + Td = Cr[WS(csr, 40)]; + T2e = Ta - Tb; + Tc = Ta + Tb; + Tab = T2k - T2l; + T2m = T2k + T2l; + Te = Cr[WS(csr, 24)]; + T2f = Ci[WS(csi, 40)]; + T2g = Ci[WS(csi, 24)]; + } + } + { + E Tag, Taj, T7o, T7p; + { + E T2q, Tk, Tam, T2K, T2H, Tn, Tan, T2t, Tu, Tah, T2E, T2N, Tr, T2v, T2y; + E Tai; + { + E Tl, Tm, T2r, T2s; + { + E Ti, Tj, T2j, Tf, T2I, T2J; + Ti = Cr[WS(csr, 4)]; + T2j = Td - Te; + Tf = Td + Te; + { + E Tac, T2h, T7k, T2n; + Tac = T2f - T2g; + T2h = T2f + T2g; + T7k = T2m - T2j; + T2n = T2j + T2m; + { + E Taa, Tad, T7l, T2i; + Taa = Tc - Tf; + Tg = Tc + Tf; + Tad = Tab - Tac; + Tds = Tac + Tab; + T7l = T2e + T2h; + T2i = T2e - T2h; + Tcm = Taa + Tad; + Tae = Taa - Tad; + T92 = FMA(KP414213562, T7k, T7l); + T7m = FNMS(KP414213562, T7l, T7k); + T6c = FMA(KP414213562, T2i, T2n); + T2o = FNMS(KP414213562, T2n, T2i); + Tj = Cr[WS(csr, 60)]; + } + } + T2I = Ci[WS(csi, 4)]; + T2J = Ci[WS(csi, 60)]; + Tl = Cr[WS(csr, 36)]; + T2q = Ti - Tj; + Tk = Ti + Tj; + Tam = T2I - T2J; + T2K = T2I + T2J; + Tm = Cr[WS(csr, 28)]; + } + T2r = Ci[WS(csi, 36)]; + T2s = Ci[WS(csi, 28)]; + { + E Ts, Tt, T2B, T2C; + Ts = Cr[WS(csr, 12)]; + T2H = Tl - Tm; + Tn = Tl + Tm; + Tan = T2r - T2s; + T2t = T2r + T2s; + Tt = Cr[WS(csr, 52)]; + T2B = Ci[WS(csi, 12)]; + T2C = Ci[WS(csi, 52)]; + { + E Tp, T2A, T2D, Tq, T2w, T2x; + Tp = Cr[WS(csr, 20)]; + Tu = Ts + Tt; + T2A = Ts - Tt; + Tah = T2C - T2B; + T2D = T2B + T2C; + Tq = Cr[WS(csr, 44)]; + T2w = Ci[WS(csi, 20)]; + T2x = Ci[WS(csi, 44)]; + T2E = T2A - T2D; + T2N = T2A + T2D; + Tr = Tp + Tq; + T2v = Tp - Tq; + T2y = T2w + T2x; + Tai = T2w - T2x; + } + } + } + { + E T2M, Tdv, Tdw, T2u, T2F, T7s, T7r, T2L, T2O; + { + E To, T2z, Tv, Tal, Tao; + Tag = Tk - Tn; + To = Tk + Tn; + T2M = T2v + T2y; + T2z = T2v - T2y; + Tv = Tr + Tu; + Tal = Tr - Tu; + Tao = Tam - Tan; + Tdv = Tan + Tam; + Tdu = To - Tv; + Tw = To + Tv; + Tco = Tao - Tal; + Tap = Tal + Tao; + Tdw = Tai + Tah; + Taj = Tah - Tai; + T7o = T2q + T2t; + T2u = T2q - T2t; + T2F = T2z + T2E; + T7s = T2E - T2z; + } + T7r = T2K - T2H; + T2L = T2H + T2K; + TeM = Tdw + Tdv; + Tdx = Tdv - Tdw; + T6f = FNMS(KP707106781, T2F, T2u); + T2G = FMA(KP707106781, T2F, T2u); + T2O = T2M - T2N; + T7p = T2M + T2N; + T6e = FNMS(KP707106781, T2O, T2L); + T2P = FMA(KP707106781, T2O, T2L); + T94 = FMA(KP707106781, T7s, T7r); + T7t = FNMS(KP707106781, T7s, T7r); + } + } + { + E T3M, T16, TbA, T5o, T5l, T19, TbB, T3P, T1g, TaV, T40, T5r, T1d, T3R, T3U; + E TaW; + { + E T17, T18, T3N, T3O; + { + E T14, T15, T5m, T5n; + T14 = Cr[WS(csr, 1)]; + Tcp = Tag - Taj; + Tak = Tag + Taj; + T95 = FMA(KP707106781, T7p, T7o); + T7q = FNMS(KP707106781, T7p, T7o); + T15 = Cr[WS(csr, 63)]; + T5m = Ci[WS(csi, 1)]; + T5n = Ci[WS(csi, 63)]; + T17 = Cr[WS(csr, 33)]; + T3M = T14 - T15; + T16 = T14 + T15; + TbA = T5m - T5n; + T5o = T5m + T5n; + T18 = Cr[WS(csr, 31)]; + } + T3N = Ci[WS(csi, 33)]; + T3O = Ci[WS(csi, 31)]; + { + E T1e, T1f, T3X, T3Y; + T1e = Cr[WS(csr, 15)]; + T5l = T17 - T18; + T19 = T17 + T18; + TbB = T3N - T3O; + T3P = T3N + T3O; + T1f = Cr[WS(csr, 49)]; + T3X = Ci[WS(csi, 15)]; + T3Y = Ci[WS(csi, 49)]; + { + E T1b, T3W, T3Z, T1c, T3S, T3T; + T1b = Cr[WS(csr, 17)]; + T1g = T1e + T1f; + T3W = T1e - T1f; + TaV = T3Y - T3X; + T3Z = T3X + T3Y; + T1c = Cr[WS(csr, 47)]; + T3S = Ci[WS(csi, 17)]; + T3T = Ci[WS(csi, 47)]; + T40 = T3W - T3Z; + T5r = T3W + T3Z; + T1d = T1b + T1c; + T3R = T1b - T1c; + T3U = T3S + T3T; + TaW = T3S - T3T; + } + } + } + { + E T5q, Te4, Te5, T3Q, T41, T8i, T8h, T5p, T5s; + { + E T1a, T3V, T1h, Tbz, TbC; + TaU = T16 - T19; + T1a = T16 + T19; + T5q = T3R + T3U; + T3V = T3R - T3U; + T1h = T1d + T1g; + Tbz = T1d - T1g; + TbC = TbA - TbB; + Te4 = TbB + TbA; + TdM = T1a - T1h; + T1i = T1a + T1h; + TcL = TbC - Tbz; + TbD = Tbz + TbC; + Te5 = TaW + TaV; + TaX = TaV - TaW; + T7Q = T3M + T3P; + T3Q = T3M - T3P; + T41 = T3V + T40; + T8i = T40 - T3V; + } + T8h = T5o - T5l; + T5p = T5l + T5o; + Tf0 = Te5 + Te4; + Te6 = Te4 - Te5; + T6q = FNMS(KP707106781, T41, T3Q); + T42 = FMA(KP707106781, T41, T3Q); + T5s = T5q - T5r; + T7R = T5q + T5r; + T6B = FNMS(KP707106781, T5s, T5p); + T5t = FMA(KP707106781, T5s, T5p); + T9r = FMA(KP707106781, T8i, T8h); + T8j = FNMS(KP707106781, T8i, T8h); + } + } + } + } + { + E Tas, Tav, T7w, T7x; + { + E T2S, TA, TaK, T3A, T3x, TD, TaL, T2V, TK, Tat, T36, T3D, TH, T2X, T30; + E Tau; + { + E TB, TC, T2T, T2U; + { + E Ty, Tz, T3y, T3z; + Ty = Cr[WS(csr, 2)]; + TcA = TaU - TaX; + TaY = TaU + TaX; + T9g = FMA(KP707106781, T7R, T7Q); + T7S = FNMS(KP707106781, T7R, T7Q); + Tz = Cr[WS(csr, 62)]; + T3y = Ci[WS(csi, 2)]; + T3z = Ci[WS(csi, 62)]; + TB = Cr[WS(csr, 34)]; + T2S = Ty - Tz; + TA = Ty + Tz; + TaK = T3y - T3z; + T3A = T3y + T3z; + TC = Cr[WS(csr, 30)]; + } + T2T = Ci[WS(csi, 34)]; + T2U = Ci[WS(csi, 30)]; + { + E TI, TJ, T33, T34; + TI = Cr[WS(csr, 14)]; + T3x = TB - TC; + TD = TB + TC; + TaL = T2T - T2U; + T2V = T2T + T2U; + TJ = Cr[WS(csr, 50)]; + T33 = Ci[WS(csi, 14)]; + T34 = Ci[WS(csi, 50)]; + { + E TF, T32, T35, TG, T2Y, T2Z; + TF = Cr[WS(csr, 18)]; + TK = TI + TJ; + T32 = TI - TJ; + Tat = T34 - T33; + T35 = T33 + T34; + TG = Cr[WS(csr, 46)]; + T2Y = Ci[WS(csi, 18)]; + T2Z = Ci[WS(csi, 46)]; + T36 = T32 - T35; + T3D = T32 + T35; + TH = TF + TG; + T2X = TF - TG; + T30 = T2Y + T2Z; + Tau = T2Y - T2Z; + } + } + } + { + E T3C, TdG, TdH, T2W, T37, T7I, T7H, T3B, T3E; + { + E TE, T31, TL, TaJ, TaM; + Tas = TA - TD; + TE = TA + TD; + T3C = T2X + T30; + T31 = T2X - T30; + TL = TH + TK; + TaJ = TH - TK; + TaM = TaK - TaL; + TdG = TaL + TaK; + TdA = TE - TL; + TM = TE + TL; + Tcv = TaM - TaJ; + TaN = TaJ + TaM; + TdH = Tau + Tat; + Tav = Tat - Tau; + T7w = T2S + T2V; + T2W = T2S - T2V; + T37 = T31 + T36; + T7I = T36 - T31; + } + T7H = T3A - T3x; + T3B = T3x + T3A; + TeP = TdH + TdG; + TdI = TdG - TdH; + T6i = FNMS(KP707106781, T37, T2W); + T38 = FMA(KP707106781, T37, T2W); + T3E = T3C - T3D; + T7x = T3C + T3D; + T6l = FNMS(KP707106781, T3E, T3B); + T3F = FMA(KP707106781, T3E, T3B); + T9b = FMA(KP707106781, T7I, T7H); + T7J = FNMS(KP707106781, T7I, T7H); + } + } + { + E T4r, T4I, T1F, Tbb, T4u, T4L, Tbj, TdS, T1I, Tbd, T4N, T4A, T4B, T1L, Tbc; + E T4E, T1M, Tbg; + { + E T1z, T1A, T1C, T1D, Tbi, Tbh; + T1z = Cr[WS(csr, 5)]; + Tcs = Tas - Tav; + Taw = Tas + Tav; + T98 = FMA(KP707106781, T7x, T7w); + T7y = FNMS(KP707106781, T7x, T7w); + T1A = Cr[WS(csr, 59)]; + T1C = Cr[WS(csr, 37)]; + T1D = Cr[WS(csr, 27)]; + { + E T4s, T1B, T1E, T4t, T4J, T4K; + T4s = Ci[WS(csi, 37)]; + T4r = T1z - T1A; + T1B = T1z + T1A; + T4I = T1C - T1D; + T1E = T1C + T1D; + T4t = Ci[WS(csi, 27)]; + T4J = Ci[WS(csi, 5)]; + T4K = Ci[WS(csi, 59)]; + T1F = T1B + T1E; + Tbb = T1B - T1E; + T4u = T4s + T4t; + Tbi = T4s - T4t; + Tbh = T4J - T4K; + T4L = T4J + T4K; + } + { + E T1J, T4w, T4z, T1K, T4C, T4D; + { + E T1G, T1H, T4x, T4y; + T1G = Cr[WS(csr, 21)]; + Tbj = Tbh - Tbi; + TdS = Tbi + Tbh; + T1H = Cr[WS(csr, 43)]; + T4x = Ci[WS(csi, 21)]; + T4y = Ci[WS(csi, 43)]; + T1J = Cr[WS(csr, 11)]; + T4w = T1G - T1H; + T1I = T1G + T1H; + Tbd = T4x - T4y; + T4z = T4x + T4y; + T1K = Cr[WS(csr, 53)]; + T4C = Ci[WS(csi, 11)]; + T4D = Ci[WS(csi, 53)]; + } + T4N = T4w + T4z; + T4A = T4w - T4z; + T4B = T1J - T1K; + T1L = T1J + T1K; + Tbc = T4D - T4C; + T4E = T4C + T4D; + } + } + T1M = T1I + T1L; + Tbg = T1I - T1L; + { + E TdT, Tbe, T4F, T4O; + TdT = Tbd + Tbc; + Tbe = Tbc - Tbd; + T4F = T4B - T4E; + T4O = T4B + T4E; + { + E TdR, TdU, T81, T4v, T4G, T85; + TdR = T1F - T1M; + T1N = T1F + T1M; + TeW = TdT + TdS; + TdU = TdS - TdT; + T81 = T4r + T4u; + T4v = T4r - T4u; + T4G = T4A + T4F; + T85 = T4F - T4A; + { + E T84, T4M, T4P, T82, TcG, TcH; + T84 = T4L - T4I; + T4M = T4I + T4L; + T6x = FNMS(KP707106781, T4G, T4v); + T4H = FMA(KP707106781, T4G, T4v); + Te8 = TdR + TdU; + TdV = TdR - TdU; + T4P = T4N - T4O; + T82 = T4N + T4O; + Tbk = Tbg + Tbj; + TcG = Tbj - Tbg; + T6w = FNMS(KP707106781, T4P, T4M); + T4Q = FMA(KP707106781, T4P, T4M); + T9j = FMA(KP707106781, T85, T84); + T86 = FNMS(KP707106781, T85, T84); + TcH = Tbb - Tbe; + Tbf = Tbb + Tbe; + TcO = FMA(KP414213562, TcG, TcH); + TcI = FNMS(KP414213562, TcH, TcG); + T9k = FMA(KP707106781, T82, T81); + T83 = FNMS(KP707106781, T82, T81); + } + } + } + } + } + { + E T88, T89, Tbv, Tbq; + { + E T4S, T59, T4V, Tbm, T1U, T5c, TdX, Tbu, T1X, T53, Tbo, T52, T20, T54, T5e; + E T51; + { + E T1R, T1Q, T1S, T1O, T1P; + T1O = Cr[WS(csr, 3)]; + T1P = Cr[WS(csr, 61)]; + T1R = Cr[WS(csr, 29)]; + TbI = FMA(KP414213562, Tbf, Tbk); + Tbl = FNMS(KP414213562, Tbk, Tbf); + T1Q = T1O + T1P; + T4S = T1O - T1P; + T1S = Cr[WS(csr, 35)]; + { + E Tbt, Tbs, T4X, T50; + { + E T5a, T5b, T4T, T4U, T1T; + T4T = Ci[WS(csi, 29)]; + T4U = Ci[WS(csi, 35)]; + T1T = T1R + T1S; + T59 = T1R - T1S; + T5a = Ci[WS(csi, 3)]; + Tbt = T4T - T4U; + T4V = T4T + T4U; + T5b = Ci[WS(csi, 61)]; + Tbm = T1Q - T1T; + T1U = T1Q + T1T; + T5c = T5a + T5b; + Tbs = T5b - T5a; + } + { + E T4Y, T4Z, T1V, T1W, T1Y, T1Z; + T1V = Cr[WS(csr, 13)]; + T1W = Cr[WS(csr, 51)]; + TdX = Tbt + Tbs; + Tbu = Tbs - Tbt; + T4Y = Ci[WS(csi, 13)]; + T4X = T1V - T1W; + T1X = T1V + T1W; + T4Z = Ci[WS(csi, 51)]; + T1Y = Cr[WS(csr, 19)]; + T1Z = Cr[WS(csr, 45)]; + T53 = Ci[WS(csi, 19)]; + Tbo = T4Y - T4Z; + T50 = T4Y + T4Z; + T52 = T1Y - T1Z; + T20 = T1Y + T1Z; + T54 = Ci[WS(csi, 45)]; + } + T5e = T4X + T50; + T51 = T4X - T50; + } + } + { + E T21, Tbr, T55, Tbn; + T21 = T1X + T20; + Tbr = T1X - T20; + T55 = T53 + T54; + Tbn = T54 - T53; + { + E T4W, TdW, Tbp, T5f, TdZ, T57, T8c, TdY, T56; + T88 = T4S + T4V; + T4W = T4S - T4V; + T22 = T1U + T21; + TdW = T1U - T21; + TdY = Tbo + Tbn; + Tbp = Tbn - Tbo; + T56 = T52 - T55; + T5f = T52 + T55; + TeV = TdY + TdX; + TdZ = TdX - TdY; + T57 = T51 + T56; + T8c = T56 - T51; + { + E T8b, T5d, T5g, TcD, TcE; + T8b = T59 + T5c; + T5d = T59 - T5c; + T5g = T5e - T5f; + T89 = T5e + T5f; + Te0 = TdW + TdZ; + Te9 = TdZ - TdW; + T58 = FMA(KP707106781, T57, T4W); + T6u = FNMS(KP707106781, T57, T4W); + T6t = FNMS(KP707106781, T5g, T5d); + T5h = FMA(KP707106781, T5g, T5d); + Tbv = Tbr + Tbu; + TcD = Tbu - Tbr; + TcE = Tbm - Tbp; + Tbq = Tbm + Tbp; + T9m = FNMS(KP707106781, T8c, T8b); + T8d = FMA(KP707106781, T8c, T8b); + TcP = FNMS(KP414213562, TcD, TcE); + TcF = FMA(KP414213562, TcE, TcD); + } + } + } + } + { + E Tb3, Tb8, T7V, T7Y; + { + E T7T, T4c, TaZ, T1p, TdO, Tb2, T7U, T47, T1t, T4e, T1s, Tb5, T4m, T1u, T4f; + E T4g; + { + E T1m, T43, T1l, Tb0, T4b, T1n, T44, T45; + { + E T1j, T1k, T49, T4a; + T1j = Cr[WS(csr, 9)]; + T9n = FMA(KP707106781, T89, T88); + T8a = FNMS(KP707106781, T89, T88); + TbJ = FNMS(KP414213562, Tbq, Tbv); + Tbw = FMA(KP414213562, Tbv, Tbq); + T1k = Cr[WS(csr, 55)]; + T49 = Ci[WS(csi, 9)]; + T4a = Ci[WS(csi, 55)]; + T1m = Cr[WS(csr, 41)]; + T43 = T1j - T1k; + T1l = T1j + T1k; + Tb0 = T49 - T4a; + T4b = T49 + T4a; + T1n = Cr[WS(csr, 23)]; + T44 = Ci[WS(csi, 41)]; + T45 = Ci[WS(csi, 23)]; + } + { + E T1q, T1r, T4k, T4l; + T1q = Cr[WS(csr, 7)]; + { + E T48, T1o, Tb1, T46; + T48 = T1m - T1n; + T1o = T1m + T1n; + Tb1 = T44 - T45; + T46 = T44 + T45; + T7T = T4b - T48; + T4c = T48 + T4b; + TaZ = T1l - T1o; + T1p = T1l + T1o; + TdO = Tb1 + Tb0; + Tb2 = Tb0 - Tb1; + T7U = T43 + T46; + T47 = T43 - T46; + T1r = Cr[WS(csr, 57)]; + } + T4k = Ci[WS(csi, 7)]; + T4l = Ci[WS(csi, 57)]; + T1t = Cr[WS(csr, 25)]; + T4e = T1q - T1r; + T1s = T1q + T1r; + Tb5 = T4l - T4k; + T4m = T4k + T4l; + T1u = Cr[WS(csr, 39)]; + T4f = Ci[WS(csi, 25)]; + T4g = Ci[WS(csi, 39)]; + } + } + { + E T7W, TdN, T7X, T5u, T4d, T4o, T5v, T8k, T8l; + { + E T4n, T1w, T4i, TbE, TbF, Tb4, Tb7; + { + E T4j, T1v, Tb6, T4h; + T4j = T1t - T1u; + T1v = T1t + T1u; + Tb6 = T4f - T4g; + T4h = T4f + T4g; + T7W = T4j + T4m; + T4n = T4j - T4m; + Tb4 = T1s - T1v; + T1w = T1s + T1v; + TdN = Tb6 + Tb5; + Tb7 = Tb5 - Tb6; + T7X = T4e + T4h; + T4i = T4e - T4h; + } + Tb3 = TaZ - Tb2; + TbE = TaZ + Tb2; + TbF = Tb7 - Tb4; + Tb8 = Tb4 + Tb7; + Te3 = T1p - T1w; + T1x = T1p + T1w; + TcB = TbE - TbF; + TbG = TbE + TbF; + T5u = FMA(KP414213562, T47, T4c); + T4d = FNMS(KP414213562, T4c, T47); + T4o = FMA(KP414213562, T4n, T4i); + T5v = FNMS(KP414213562, T4i, T4n); + } + Tf1 = TdO + TdN; + TdP = TdN - TdO; + T6C = T4o - T4d; + T4p = T4d + T4o; + T7V = FNMS(KP414213562, T7U, T7T); + T8k = FMA(KP414213562, T7T, T7U); + T8l = FMA(KP414213562, T7W, T7X); + T7Y = FNMS(KP414213562, T7X, T7W); + T6r = T5u - T5v; + T5w = T5u + T5v; + T9h = T8k + T8l; + T8m = T8k - T8l; + } + } + { + E T7z, T3i, Tax, TT, TdC, TaA, T7A, T3d, TX, T3k, TW, TaD, T3s, TY, T3l; + E T3m; + { + E TQ, T39, TP, Tay, T3h, TR, T3a, T3b; + { + E TN, TO, T3f, T3g; + TN = Cr[WS(csr, 10)]; + TcM = Tb8 - Tb3; + Tb9 = Tb3 + Tb8; + T9s = T7V - T7Y; + T7Z = T7V + T7Y; + TO = Cr[WS(csr, 54)]; + T3f = Ci[WS(csi, 10)]; + T3g = Ci[WS(csi, 54)]; + TQ = Cr[WS(csr, 42)]; + T39 = TN - TO; + TP = TN + TO; + Tay = T3f - T3g; + T3h = T3f + T3g; + TR = Cr[WS(csr, 22)]; + T3a = Ci[WS(csi, 42)]; + T3b = Ci[WS(csi, 22)]; + } + { + E TU, TV, T3q, T3r; + TU = Cr[WS(csr, 6)]; + { + E T3e, TS, Taz, T3c; + T3e = TQ - TR; + TS = TQ + TR; + Taz = T3a - T3b; + T3c = T3a + T3b; + T7z = T3h - T3e; + T3i = T3e + T3h; + Tax = TP - TS; + TT = TP + TS; + TdC = Taz + Tay; + TaA = Tay - Taz; + T7A = T39 + T3c; + T3d = T39 - T3c; + TV = Cr[WS(csr, 58)]; + } + T3q = Ci[WS(csi, 6)]; + T3r = Ci[WS(csi, 58)]; + TX = Cr[WS(csr, 26)]; + T3k = TU - TV; + TW = TU + TV; + TaD = T3r - T3q; + T3s = T3q + T3r; + TY = Cr[WS(csr, 38)]; + T3l = Ci[WS(csi, 26)]; + T3m = Ci[WS(csi, 38)]; + } + } + { + E T7C, TdB, T7D, T3G, T3j, T3u, T3H, T7K, T7L; + { + E T3t, T10, T3o, TaO, TaP, TaC, TaF; + { + E T3p, TZ, TaE, T3n; + T3p = TX - TY; + TZ = TX + TY; + TaE = T3l - T3m; + T3n = T3l + T3m; + T7C = T3p + T3s; + T3t = T3p - T3s; + TaC = TW - TZ; + T10 = TW + TZ; + TdB = TaE + TaD; + TaF = TaD - TaE; + T7D = T3k + T3n; + T3o = T3k - T3n; + } + TaB = Tax - TaA; + TaO = Tax + TaA; + TaP = TaF - TaC; + TaG = TaC + TaF; + TdF = TT - T10; + T11 = TT + T10; + Tct = TaO - TaP; + TaQ = TaO + TaP; + T3G = FMA(KP414213562, T3d, T3i); + T3j = FNMS(KP414213562, T3i, T3d); + T3u = FMA(KP414213562, T3t, T3o); + T3H = FNMS(KP414213562, T3o, T3t); + } + TeQ = TdC + TdB; + TdD = TdB - TdC; + T6m = T3u - T3j; + T3v = T3j + T3u; + T7B = FNMS(KP414213562, T7A, T7z); + T7K = FMA(KP414213562, T7z, T7A); + T7L = FMA(KP414213562, T7C, T7D); + T7E = FNMS(KP414213562, T7D, T7C); + T6j = T3G - T3H; + T3I = T3G + T3H; + T99 = T7K + T7L; + T7M = T7K - T7L; + } + } + } + } + } + { + E Tcw, T9c, T7F, Tev, Teu, TeD, Tep, TeG, Tez, TeE, Tes; + { + E TbX, TbY, Tc7, TbP, Tar, Tc5, Tc1, Tc0, Tc4, Tba, TbS, TbL, TbQ, TaS, Tbx; + E Tc8; + { + E TeO, TaH, TeR, TeL, TeU, TeZ, Tf2, TeX, Tfh, Tfn, Tfo, Tfm; + { + E T12, Tfg, Tfj, Tx, Tff, T24, Tfi, Tfk, Th, T1y, T23; + TeO = TM - T11; + T12 = TM + T11; + Tcw = TaG - TaB; + TaH = TaB + TaG; + T9c = T7B - T7E; + T7F = T7B + T7E; + Tfg = TeQ + TeP; + TeR = TeP - TeQ; + TeL = FNMS(KP2_000000000, Tg, T9); + Th = FMA(KP2_000000000, Tg, T9); + T1y = T1i + T1x; + TeU = T1i - T1x; + TeZ = T1N - T22; + T23 = T1N + T22; + Tfj = Tf1 + Tf0; + Tf2 = Tf0 - Tf1; + Tx = FMA(KP2_000000000, Tw, Th); + Tff = FNMS(KP2_000000000, Tw, Th); + T24 = T1y + T23; + Tfi = T1y - T23; + TeX = TeV - TeW; + Tfk = TeW + TeV; + { + E T13, Tfp, Tfl, Tfq; + T13 = FMA(KP2_000000000, T12, Tx); + Tfp = FNMS(KP2_000000000, T12, Tx); + Tfh = FNMS(KP2_000000000, Tfg, Tff); + Tfn = FMA(KP2_000000000, Tfg, Tff); + Tfl = Tfj - Tfk; + Tfq = Tfk + Tfj; + R0[0] = FMA(KP2_000000000, T24, T13); + R0[WS(rs, 32)] = FNMS(KP2_000000000, T24, T13); + R0[WS(rs, 48)] = FMA(KP2_000000000, Tfq, Tfp); + R0[WS(rs, 16)] = FNMS(KP2_000000000, Tfq, Tfp); + Tfo = Tfi + Tfl; + Tfm = Tfi - Tfl; + } + } + { + E Tf7, TeN, Tfa, Tf3, Tf8, TeS; + R0[WS(rs, 8)] = FMA(KP1_414213562, Tfm, Tfh); + R0[WS(rs, 40)] = FNMS(KP1_414213562, Tfm, Tfh); + R0[WS(rs, 56)] = FMA(KP1_414213562, Tfo, Tfn); + R0[WS(rs, 24)] = FNMS(KP1_414213562, Tfo, Tfn); + Tf7 = FMA(KP2_000000000, TeM, TeL); + TeN = FNMS(KP2_000000000, TeM, TeL); + Tfa = Tf2 - TeZ; + Tf3 = TeZ + Tf2; + Tf8 = TeO + TeR; + TeS = TeO - TeR; + { + E TbH, TbK, TaI, TaR; + { + E Taf, Tf9, Tfd, Tf5, TeT, Tfb, TeY, Taq; + TbX = FNMS(KP1_414213562, Tae, Ta9); + Taf = FMA(KP1_414213562, Tae, Ta9); + Tf9 = FNMS(KP1_414213562, Tf8, Tf7); + Tfd = FMA(KP1_414213562, Tf8, Tf7); + Tf5 = FNMS(KP1_414213562, TeS, TeN); + TeT = FMA(KP1_414213562, TeS, TeN); + Tfb = TeU - TeX; + TeY = TeU + TeX; + Taq = FNMS(KP414213562, Tap, Tak); + TbY = FMA(KP414213562, Tak, Tap); + Tc7 = FNMS(KP707106781, TbG, TbD); + TbH = FMA(KP707106781, TbG, TbD); + { + E Tfc, Tfe, Tf6, Tf4; + Tfc = FNMS(KP414213562, Tfb, Tfa); + Tfe = FMA(KP414213562, Tfa, Tfb); + Tf6 = FMA(KP414213562, TeY, Tf3); + Tf4 = FNMS(KP414213562, Tf3, TeY); + TbP = FNMS(KP1_847759065, Taq, Taf); + Tar = FMA(KP1_847759065, Taq, Taf); + R0[WS(rs, 44)] = FMA(KP1_847759065, Tfc, Tf9); + R0[WS(rs, 12)] = FNMS(KP1_847759065, Tfc, Tf9); + R0[WS(rs, 60)] = FMA(KP1_847759065, Tfe, Tfd); + R0[WS(rs, 28)] = FNMS(KP1_847759065, Tfe, Tfd); + R0[WS(rs, 52)] = FMA(KP1_847759065, Tf6, Tf5); + R0[WS(rs, 20)] = FNMS(KP1_847759065, Tf6, Tf5); + R0[WS(rs, 4)] = FMA(KP1_847759065, Tf4, TeT); + R0[WS(rs, 36)] = FNMS(KP1_847759065, Tf4, TeT); + TbK = TbI + TbJ; + Tc5 = TbI - TbJ; + } + } + Tc1 = FNMS(KP707106781, TaH, Taw); + TaI = FMA(KP707106781, TaH, Taw); + TaR = FMA(KP707106781, TaQ, TaN); + Tc0 = FNMS(KP707106781, TaQ, TaN); + Tc4 = FNMS(KP707106781, Tb9, TaY); + Tba = FMA(KP707106781, Tb9, TaY); + TbS = FNMS(KP923879532, TbK, TbH); + TbL = FMA(KP923879532, TbK, TbH); + TbQ = FMA(KP198912367, TaI, TaR); + TaS = FNMS(KP198912367, TaR, TaI); + Tbx = Tbl + Tbw; + Tc8 = Tbw - Tbl; + } + } + } + { + E Ten, Teo, Tex, Tef, Tdz, Ter, Teq, TdQ, Tei, Teb, Teg, TdK, Te1, Tey; + { + E Te7, Tea, TdE, TdJ; + { + E Tdt, TbR, TbV, TbN, TaT, TbT, Tby, Tdy; + Ten = FMA(KP2_000000000, Tds, Tdr); + Tdt = FNMS(KP2_000000000, Tds, Tdr); + TbR = FNMS(KP1_961570560, TbQ, TbP); + TbV = FMA(KP1_961570560, TbQ, TbP); + TbN = FNMS(KP1_961570560, TaS, Tar); + TaT = FMA(KP1_961570560, TaS, Tar); + TbT = FNMS(KP923879532, Tbx, Tba); + Tby = FMA(KP923879532, Tbx, Tba); + Tdy = Tdu - Tdx; + Teo = Tdu + Tdx; + Tex = Te6 - Te3; + Te7 = Te3 + Te6; + { + E TbU, TbW, TbO, TbM; + TbU = FNMS(KP820678790, TbT, TbS); + TbW = FMA(KP820678790, TbS, TbT); + TbO = FMA(KP098491403, Tby, TbL); + TbM = FNMS(KP098491403, TbL, Tby); + Tef = FNMS(KP1_414213562, Tdy, Tdt); + Tdz = FMA(KP1_414213562, Tdy, Tdt); + R0[WS(rs, 41)] = FMA(KP1_546020906, TbU, TbR); + R0[WS(rs, 9)] = FNMS(KP1_546020906, TbU, TbR); + R0[WS(rs, 57)] = FMA(KP1_546020906, TbW, TbV); + R0[WS(rs, 25)] = FNMS(KP1_546020906, TbW, TbV); + R0[WS(rs, 49)] = FMA(KP1_990369453, TbO, TbN); + R0[WS(rs, 17)] = FNMS(KP1_990369453, TbO, TbN); + R0[WS(rs, 1)] = FMA(KP1_990369453, TbM, TaT); + R0[WS(rs, 33)] = FNMS(KP1_990369453, TbM, TaT); + Tea = Te8 + Te9; + Tev = Te8 - Te9; + } + } + Ter = TdA - TdD; + TdE = TdA + TdD; + TdJ = TdF + TdI; + Teq = TdI - TdF; + Teu = TdM - TdP; + TdQ = TdM + TdP; + Tei = FNMS(KP707106781, Tea, Te7); + Teb = FMA(KP707106781, Tea, Te7); + Teg = FMA(KP414213562, TdE, TdJ); + TdK = FNMS(KP414213562, TdJ, TdE); + Te1 = TdV + Te0; + Tey = Te0 - TdV; + } + { + E Tcd, TbZ, Tcg, Tc9, Tce, Tc2; + { + E Teh, Tel, Ted, TdL, Tej, Te2; + Teh = FNMS(KP1_847759065, Teg, Tef); + Tel = FMA(KP1_847759065, Teg, Tef); + Ted = FNMS(KP1_847759065, TdK, Tdz); + TdL = FMA(KP1_847759065, TdK, Tdz); + Tej = FNMS(KP707106781, Te1, TdQ); + Te2 = FMA(KP707106781, Te1, TdQ); + { + E Tek, Tem, Tee, Tec; + Tek = FNMS(KP668178637, Tej, Tei); + Tem = FMA(KP668178637, Tei, Tej); + Tee = FMA(KP198912367, Te2, Teb); + Tec = FNMS(KP198912367, Teb, Te2); + Tcd = FMA(KP1_847759065, TbY, TbX); + TbZ = FNMS(KP1_847759065, TbY, TbX); + R0[WS(rs, 42)] = FMA(KP1_662939224, Tek, Teh); + R0[WS(rs, 10)] = FNMS(KP1_662939224, Tek, Teh); + R0[WS(rs, 58)] = FMA(KP1_662939224, Tem, Tel); + R0[WS(rs, 26)] = FNMS(KP1_662939224, Tem, Tel); + R0[WS(rs, 50)] = FMA(KP1_961570560, Tee, Ted); + R0[WS(rs, 18)] = FNMS(KP1_961570560, Tee, Ted); + R0[WS(rs, 2)] = FMA(KP1_961570560, Tec, TdL); + R0[WS(rs, 34)] = FNMS(KP1_961570560, Tec, TdL); + } + } + Tcg = FMA(KP923879532, Tc8, Tc7); + Tc9 = FNMS(KP923879532, Tc8, Tc7); + Tce = FMA(KP668178637, Tc0, Tc1); + Tc2 = FNMS(KP668178637, Tc1, Tc0); + { + E Tcf, Tcj, Tcb, Tc3, Tch, Tc6; + Tcf = FNMS(KP1_662939224, Tce, Tcd); + Tcj = FMA(KP1_662939224, Tce, Tcd); + Tcb = FMA(KP1_662939224, Tc2, TbZ); + Tc3 = FNMS(KP1_662939224, Tc2, TbZ); + Tch = FMA(KP923879532, Tc5, Tc4); + Tc6 = FNMS(KP923879532, Tc5, Tc4); + { + E Tci, Tck, Tcc, Tca; + Tci = FNMS(KP303346683, Tch, Tcg); + Tck = FMA(KP303346683, Tcg, Tch); + Tcc = FMA(KP534511135, Tc6, Tc9); + Tca = FNMS(KP534511135, Tc9, Tc6); + TeD = FMA(KP1_414213562, Teo, Ten); + Tep = FNMS(KP1_414213562, Teo, Ten); + R0[WS(rs, 45)] = FMA(KP1_913880671, Tci, Tcf); + R0[WS(rs, 13)] = FNMS(KP1_913880671, Tci, Tcf); + R0[WS(rs, 61)] = FMA(KP1_913880671, Tck, Tcj); + R0[WS(rs, 29)] = FNMS(KP1_913880671, Tck, Tcj); + R0[WS(rs, 53)] = FMA(KP1_763842528, Tcc, Tcb); + R0[WS(rs, 21)] = FNMS(KP1_763842528, Tcc, Tcb); + R0[WS(rs, 5)] = FMA(KP1_763842528, Tca, Tc3); + R0[WS(rs, 37)] = FNMS(KP1_763842528, Tca, Tc3); + } + } + TeG = FMA(KP707106781, Tey, Tex); + Tez = FNMS(KP707106781, Tey, Tex); + TeE = FMA(KP414213562, Teq, Ter); + Tes = FNMS(KP414213562, Ter, Teq); + } + } + } + { + E T5L, T5M, T61, T62; + { + E Td3, Td4, Tdd, TcV, Tcr, Tdb, Td7, Td6, Tda, TcC, TcY, TcR, TcW, Tcy, TcJ; + E Tde; + { + E TcN, TcQ, Tcu, Tcx; + { + E Tcn, TeF, TeJ, TeB, Tet, TeH, Tew, Tcq; + Td3 = FMA(KP1_414213562, Tcm, Tcl); + Tcn = FNMS(KP1_414213562, Tcm, Tcl); + TeF = FNMS(KP1_847759065, TeE, TeD); + TeJ = FMA(KP1_847759065, TeE, TeD); + TeB = FMA(KP1_847759065, Tes, Tep); + Tet = FNMS(KP1_847759065, Tes, Tep); + TeH = FMA(KP707106781, Tev, Teu); + Tew = FNMS(KP707106781, Tev, Teu); + Tcq = FNMS(KP414213562, Tcp, Tco); + Td4 = FMA(KP414213562, Tco, Tcp); + Tdd = FMA(KP707106781, TcM, TcL); + TcN = FNMS(KP707106781, TcM, TcL); + { + E TeI, TeK, TeC, TeA; + TeI = FNMS(KP198912367, TeH, TeG); + TeK = FMA(KP198912367, TeG, TeH); + TeC = FMA(KP668178637, Tew, Tez); + TeA = FNMS(KP668178637, Tez, Tew); + TcV = FMA(KP1_847759065, Tcq, Tcn); + Tcr = FNMS(KP1_847759065, Tcq, Tcn); + R0[WS(rs, 46)] = FMA(KP1_961570560, TeI, TeF); + R0[WS(rs, 14)] = FNMS(KP1_961570560, TeI, TeF); + R0[WS(rs, 62)] = FMA(KP1_961570560, TeK, TeJ); + R0[WS(rs, 30)] = FNMS(KP1_961570560, TeK, TeJ); + R0[WS(rs, 54)] = FMA(KP1_662939224, TeC, TeB); + R0[WS(rs, 22)] = FNMS(KP1_662939224, TeC, TeB); + R0[WS(rs, 6)] = FMA(KP1_662939224, TeA, Tet); + R0[WS(rs, 38)] = FNMS(KP1_662939224, TeA, Tet); + TcQ = TcO - TcP; + Tdb = TcO + TcP; + } + } + Td7 = FMA(KP707106781, Tct, Tcs); + Tcu = FNMS(KP707106781, Tct, Tcs); + Tcx = FNMS(KP707106781, Tcw, Tcv); + Td6 = FMA(KP707106781, Tcw, Tcv); + Tda = FMA(KP707106781, TcB, TcA); + TcC = FNMS(KP707106781, TcB, TcA); + TcY = FNMS(KP923879532, TcQ, TcN); + TcR = FMA(KP923879532, TcQ, TcN); + TcW = FMA(KP668178637, Tcu, Tcx); + Tcy = FNMS(KP668178637, Tcx, Tcu); + TcJ = TcF - TcI; + Tde = TcI + TcF; + } + { + E Tdj, Td5, Tdm, Tdf, Tdk, Td8; + { + E TcX, Td1, TcT, Tcz, TcZ, TcK; + TcX = FNMS(KP1_662939224, TcW, TcV); + Td1 = FMA(KP1_662939224, TcW, TcV); + TcT = FNMS(KP1_662939224, Tcy, Tcr); + Tcz = FMA(KP1_662939224, Tcy, Tcr); + TcZ = FNMS(KP923879532, TcJ, TcC); + TcK = FMA(KP923879532, TcJ, TcC); + { + E Td0, Td2, TcU, TcS; + Td0 = FNMS(KP534511135, TcZ, TcY); + Td2 = FMA(KP534511135, TcY, TcZ); + TcU = FMA(KP303346683, TcK, TcR); + TcS = FNMS(KP303346683, TcR, TcK); + Tdj = FMA(KP1_847759065, Td4, Td3); + Td5 = FNMS(KP1_847759065, Td4, Td3); + R0[WS(rs, 43)] = FMA(KP1_763842528, Td0, TcX); + R0[WS(rs, 11)] = FNMS(KP1_763842528, Td0, TcX); + R0[WS(rs, 59)] = FMA(KP1_763842528, Td2, Td1); + R0[WS(rs, 27)] = FNMS(KP1_763842528, Td2, Td1); + R0[WS(rs, 51)] = FMA(KP1_913880671, TcU, TcT); + R0[WS(rs, 19)] = FNMS(KP1_913880671, TcU, TcT); + R0[WS(rs, 3)] = FMA(KP1_913880671, TcS, Tcz); + R0[WS(rs, 35)] = FNMS(KP1_913880671, TcS, Tcz); + } + } + Tdm = FMA(KP923879532, Tde, Tdd); + Tdf = FNMS(KP923879532, Tde, Tdd); + Tdk = FMA(KP198912367, Td6, Td7); + Td8 = FNMS(KP198912367, Td7, Td6); + { + E T5F, T2R, T5G, T3K, T64, T5S, T5X, T5x, T5U, T4q, T4R, T63, T5P, T5i, T5V; + E T5A; + { + E T5N, T5O, T5R, T3w, T3J, T5Q, T5y, T5z; + { + E T2p, Tdl, Tdp, Tdh, Td9, Tdn, Tdc, T2Q; + T5N = FNMS(KP1_847759065, T2o, T2d); + T2p = FMA(KP1_847759065, T2o, T2d); + Tdl = FNMS(KP1_961570560, Tdk, Tdj); + Tdp = FMA(KP1_961570560, Tdk, Tdj); + Tdh = FMA(KP1_961570560, Td8, Td5); + Td9 = FNMS(KP1_961570560, Td8, Td5); + Tdn = FMA(KP923879532, Tdb, Tda); + Tdc = FNMS(KP923879532, Tdb, Tda); + T2Q = FNMS(KP198912367, T2P, T2G); + T5O = FMA(KP198912367, T2G, T2P); + T5R = FNMS(KP923879532, T3v, T38); + T3w = FMA(KP923879532, T3v, T38); + { + E Tdo, Tdq, Tdi, Tdg; + Tdo = FNMS(KP098491403, Tdn, Tdm); + Tdq = FMA(KP098491403, Tdm, Tdn); + Tdi = FMA(KP820678790, Tdc, Tdf); + Tdg = FNMS(KP820678790, Tdf, Tdc); + T5F = FNMS(KP1_961570560, T2Q, T2p); + T2R = FMA(KP1_961570560, T2Q, T2p); + R0[WS(rs, 47)] = FMA(KP1_990369453, Tdo, Tdl); + R0[WS(rs, 15)] = FNMS(KP1_990369453, Tdo, Tdl); + R0[WS(rs, 63)] = FMA(KP1_990369453, Tdq, Tdp); + R0[WS(rs, 31)] = FNMS(KP1_990369453, Tdq, Tdp); + R0[WS(rs, 55)] = FMA(KP1_546020906, Tdi, Tdh); + R0[WS(rs, 23)] = FNMS(KP1_546020906, Tdi, Tdh); + R0[WS(rs, 7)] = FMA(KP1_546020906, Tdg, Td9); + R0[WS(rs, 39)] = FNMS(KP1_546020906, Tdg, Td9); + T3J = FMA(KP923879532, T3I, T3F); + T5Q = FNMS(KP923879532, T3I, T3F); + } + } + T5G = FMA(KP098491403, T3w, T3J); + T3K = FNMS(KP098491403, T3J, T3w); + T64 = FMA(KP820678790, T5Q, T5R); + T5S = FNMS(KP820678790, T5R, T5Q); + T5X = FNMS(KP923879532, T5w, T5t); + T5x = FMA(KP923879532, T5w, T5t); + T5U = FNMS(KP923879532, T4p, T42); + T4q = FMA(KP923879532, T4p, T42); + T4R = FNMS(KP198912367, T4Q, T4H); + T5y = FMA(KP198912367, T4H, T4Q); + T63 = FMA(KP1_961570560, T5O, T5N); + T5P = FNMS(KP1_961570560, T5O, T5N); + T5z = FNMS(KP198912367, T58, T5h); + T5i = FMA(KP198912367, T5h, T58); + T5V = T5y - T5z; + T5A = T5y + T5z; + } + { + E T5W, T5I, T5Z, T5J; + { + E T5D, T3L, T67, T5B, T5Y, T5j, T65, T69, T66, T5k; + T5D = FNMS(KP1_990369453, T3K, T2R); + T3L = FMA(KP1_990369453, T3K, T2R); + T5W = FNMS(KP980785280, T5V, T5U); + T67 = FMA(KP980785280, T5V, T5U); + T5I = FNMS(KP980785280, T5A, T5x); + T5B = FMA(KP980785280, T5A, T5x); + T5Y = T5i - T4R; + T5j = T4R + T5i; + T65 = FNMS(KP1_546020906, T64, T63); + T69 = FMA(KP1_546020906, T64, T63); + T5Z = FNMS(KP980785280, T5Y, T5X); + T66 = FMA(KP980785280, T5Y, T5X); + T5J = FNMS(KP980785280, T5j, T4q); + T5k = FMA(KP980785280, T5j, T4q); + { + E T68, T6a, T5E, T5C; + T68 = FNMS(KP357805721, T67, T66); + T6a = FMA(KP357805721, T66, T67); + T5E = FMA(KP049126849, T5k, T5B); + T5C = FNMS(KP049126849, T5B, T5k); + R1[WS(rs, 60)] = FMA(KP1_883088130, T6a, T69); + R1[WS(rs, 28)] = FNMS(KP1_883088130, T6a, T69); + R1[WS(rs, 44)] = FMA(KP1_883088130, T68, T65); + R1[WS(rs, 12)] = FNMS(KP1_883088130, T68, T65); + R1[0] = FMA(KP1_997590912, T5C, T3L); + R1[WS(rs, 32)] = FNMS(KP1_997590912, T5C, T3L); + R1[WS(rs, 16)] = FNMS(KP1_997590912, T5E, T5D); + R1[WS(rs, 48)] = FMA(KP1_997590912, T5E, T5D); + } + } + { + E T5H, T5K, T5T, T60; + T5L = FMA(KP1_990369453, T5G, T5F); + T5H = FNMS(KP1_990369453, T5G, T5F); + T5K = FNMS(KP906347169, T5J, T5I); + T5M = FMA(KP906347169, T5I, T5J); + T61 = FMA(KP1_546020906, T5S, T5P); + T5T = FNMS(KP1_546020906, T5S, T5P); + T60 = FNMS(KP472964775, T5Z, T5W); + T62 = FMA(KP472964775, T5W, T5Z); + R1[WS(rs, 40)] = FMA(KP1_481902250, T5K, T5H); + R1[WS(rs, 8)] = FNMS(KP1_481902250, T5K, T5H); + R1[WS(rs, 4)] = FMA(KP1_807978586, T60, T5T); + R1[WS(rs, 36)] = FNMS(KP1_807978586, T60, T5T); + } + } + } + } + } + { + E T8B, T8C, T8R, T8S; + { + E T8v, T7v, T8w, T7O, T8N, T8n, T8U, T8I, T8T, T8F, T8K, T80, T87, T8e, T8L; + E T8q; + { + E T8D, T8E, T8H, T8G, T8o, T8p; + { + E T7n, T7u, T7G, T7N; + T8D = FMA(KP1_847759065, T7m, T7j); + T7n = FNMS(KP1_847759065, T7m, T7j); + R1[WS(rs, 52)] = FMA(KP1_807978586, T62, T61); + R1[WS(rs, 20)] = FNMS(KP1_807978586, T62, T61); + R1[WS(rs, 56)] = FMA(KP1_481902250, T5M, T5L); + R1[WS(rs, 24)] = FNMS(KP1_481902250, T5M, T5L); + T7u = FNMS(KP668178637, T7t, T7q); + T8E = FMA(KP668178637, T7q, T7t); + T8H = FMA(KP923879532, T7F, T7y); + T7G = FNMS(KP923879532, T7F, T7y); + T7N = FMA(KP923879532, T7M, T7J); + T8G = FNMS(KP923879532, T7M, T7J); + T8v = FNMS(KP1_662939224, T7u, T7n); + T7v = FMA(KP1_662939224, T7u, T7n); + T8w = FMA(KP303346683, T7G, T7N); + T7O = FNMS(KP303346683, T7N, T7G); + } + T8N = FNMS(KP923879532, T8m, T8j); + T8n = FMA(KP923879532, T8m, T8j); + T8U = FMA(KP534511135, T8G, T8H); + T8I = FNMS(KP534511135, T8H, T8G); + T8T = FMA(KP1_662939224, T8E, T8D); + T8F = FNMS(KP1_662939224, T8E, T8D); + T8K = FMA(KP923879532, T7Z, T7S); + T80 = FNMS(KP923879532, T7Z, T7S); + T87 = FNMS(KP668178637, T86, T83); + T8o = FMA(KP668178637, T83, T86); + T8p = FMA(KP668178637, T8a, T8d); + T8e = FNMS(KP668178637, T8d, T8a); + T8L = T8o + T8p; + T8q = T8o - T8p; + } + { + E T8M, T8y, T8P, T8z; + { + E T8t, T7P, T8X, T8r, T8O, T8f, T8V, T8Z, T8W, T8g; + T8t = FNMS(KP1_913880671, T7O, T7v); + T7P = FMA(KP1_913880671, T7O, T7v); + T8M = FNMS(KP831469612, T8L, T8K); + T8X = FMA(KP831469612, T8L, T8K); + T8y = FNMS(KP831469612, T8q, T8n); + T8r = FMA(KP831469612, T8q, T8n); + T8O = T8e - T87; + T8f = T87 + T8e; + T8V = FNMS(KP1_763842528, T8U, T8T); + T8Z = FMA(KP1_763842528, T8U, T8T); + T8P = FNMS(KP831469612, T8O, T8N); + T8W = FMA(KP831469612, T8O, T8N); + T8z = FNMS(KP831469612, T8f, T80); + T8g = FMA(KP831469612, T8f, T80); + { + E T8Y, T90, T8u, T8s; + T8Y = FNMS(KP250486960, T8X, T8W); + T90 = FMA(KP250486960, T8W, T8X); + T8u = FMA(KP148335987, T8g, T8r); + T8s = FNMS(KP148335987, T8r, T8g); + R1[WS(rs, 61)] = FMA(KP1_940062506, T90, T8Z); + R1[WS(rs, 29)] = FNMS(KP1_940062506, T90, T8Z); + R1[WS(rs, 45)] = FMA(KP1_940062506, T8Y, T8V); + R1[WS(rs, 13)] = FNMS(KP1_940062506, T8Y, T8V); + R1[WS(rs, 1)] = FMA(KP1_978353019, T8s, T7P); + R1[WS(rs, 33)] = FNMS(KP1_978353019, T8s, T7P); + R1[WS(rs, 17)] = FNMS(KP1_978353019, T8u, T8t); + R1[WS(rs, 49)] = FMA(KP1_978353019, T8u, T8t); + } + } + { + E T8x, T8A, T8J, T8Q; + T8B = FMA(KP1_913880671, T8w, T8v); + T8x = FNMS(KP1_913880671, T8w, T8v); + T8A = FNMS(KP741650546, T8z, T8y); + T8C = FMA(KP741650546, T8y, T8z); + T8R = FMA(KP1_763842528, T8I, T8F); + T8J = FNMS(KP1_763842528, T8I, T8F); + T8Q = FNMS(KP599376933, T8P, T8M); + T8S = FMA(KP599376933, T8M, T8P); + R1[WS(rs, 41)] = FMA(KP1_606415062, T8A, T8x); + R1[WS(rs, 9)] = FNMS(KP1_606415062, T8A, T8x); + R1[WS(rs, 5)] = FMA(KP1_715457220, T8Q, T8J); + R1[WS(rs, 37)] = FNMS(KP1_715457220, T8Q, T8J); + } + } + } + { + E T6R, T6S, T77, T78; + { + E T6L, T6h, T6M, T6o, T73, T6D, T7a, T6Y, T79, T6V, T70, T6s, T6y, T6v, T71; + E T6G; + { + E T6T, T6U, T6X, T6W, T6E, T6F; + { + E T6d, T6g, T6k, T6n; + T6T = FMA(KP1_847759065, T6c, T6b); + T6d = FNMS(KP1_847759065, T6c, T6b); + R1[WS(rs, 53)] = FMA(KP1_715457220, T8S, T8R); + R1[WS(rs, 21)] = FNMS(KP1_715457220, T8S, T8R); + R1[WS(rs, 57)] = FMA(KP1_606415062, T8C, T8B); + R1[WS(rs, 25)] = FNMS(KP1_606415062, T8C, T8B); + T6g = FNMS(KP668178637, T6f, T6e); + T6U = FMA(KP668178637, T6e, T6f); + T6X = FMA(KP923879532, T6j, T6i); + T6k = FNMS(KP923879532, T6j, T6i); + T6n = FNMS(KP923879532, T6m, T6l); + T6W = FMA(KP923879532, T6m, T6l); + T6L = FMA(KP1_662939224, T6g, T6d); + T6h = FNMS(KP1_662939224, T6g, T6d); + T6M = FMA(KP534511135, T6k, T6n); + T6o = FNMS(KP534511135, T6n, T6k); + } + T73 = FMA(KP923879532, T6C, T6B); + T6D = FNMS(KP923879532, T6C, T6B); + T7a = FMA(KP303346683, T6W, T6X); + T6Y = FNMS(KP303346683, T6X, T6W); + T79 = FMA(KP1_662939224, T6U, T6T); + T6V = FNMS(KP1_662939224, T6U, T6T); + T70 = FMA(KP923879532, T6r, T6q); + T6s = FNMS(KP923879532, T6r, T6q); + T6y = FNMS(KP668178637, T6x, T6w); + T6E = FMA(KP668178637, T6w, T6x); + T6F = FNMS(KP668178637, T6t, T6u); + T6v = FMA(KP668178637, T6u, T6t); + T71 = T6E + T6F; + T6G = T6E - T6F; + } + { + E T72, T6O, T75, T6P; + { + E T6J, T6p, T7d, T6H, T74, T6z, T7b, T7f, T7c, T6A; + T6J = FNMS(KP1_763842528, T6o, T6h); + T6p = FMA(KP1_763842528, T6o, T6h); + T72 = FNMS(KP831469612, T71, T70); + T7d = FMA(KP831469612, T71, T70); + T6O = FNMS(KP831469612, T6G, T6D); + T6H = FMA(KP831469612, T6G, T6D); + T74 = T6y + T6v; + T6z = T6v - T6y; + T7b = FNMS(KP1_913880671, T7a, T79); + T7f = FMA(KP1_913880671, T7a, T79); + T75 = FNMS(KP831469612, T74, T73); + T7c = FMA(KP831469612, T74, T73); + T6P = FNMS(KP831469612, T6z, T6s); + T6A = FMA(KP831469612, T6z, T6s); + { + E T7e, T7g, T6K, T6I; + T7e = FNMS(KP148335987, T7d, T7c); + T7g = FMA(KP148335987, T7c, T7d); + T6K = FMA(KP250486960, T6A, T6H); + T6I = FNMS(KP250486960, T6H, T6A); + R1[WS(rs, 62)] = FMA(KP1_978353019, T7g, T7f); + R1[WS(rs, 30)] = FNMS(KP1_978353019, T7g, T7f); + R1[WS(rs, 46)] = FMA(KP1_978353019, T7e, T7b); + R1[WS(rs, 14)] = FNMS(KP1_978353019, T7e, T7b); + R1[WS(rs, 2)] = FMA(KP1_940062506, T6I, T6p); + R1[WS(rs, 34)] = FNMS(KP1_940062506, T6I, T6p); + R1[WS(rs, 18)] = FNMS(KP1_940062506, T6K, T6J); + R1[WS(rs, 50)] = FMA(KP1_940062506, T6K, T6J); + } + } + { + E T6N, T6Q, T6Z, T76; + T6R = FMA(KP1_763842528, T6M, T6L); + T6N = FNMS(KP1_763842528, T6M, T6L); + T6Q = FNMS(KP599376933, T6P, T6O); + T6S = FMA(KP599376933, T6O, T6P); + T77 = FMA(KP1_913880671, T6Y, T6V); + T6Z = FNMS(KP1_913880671, T6Y, T6V); + T76 = FNMS(KP741650546, T75, T72); + T78 = FMA(KP741650546, T72, T75); + R1[WS(rs, 42)] = FMA(KP1_715457220, T6Q, T6N); + R1[WS(rs, 10)] = FNMS(KP1_715457220, T6Q, T6N); + R1[WS(rs, 6)] = FMA(KP1_606415062, T76, T6Z); + R1[WS(rs, 38)] = FNMS(KP1_606415062, T76, T6Z); + } + } + } + { + E T9B, T97, T9C, T9e, T9T, T9t, Ta0, T9O, T9Z, T9L, T9Q, T9i, T9l, T9o, T9R; + E T9w; + { + E T9J, T9K, T9N, T9M, T9u, T9v; + { + E T93, T96, T9a, T9d; + T9J = FMA(KP1_847759065, T92, T91); + T93 = FNMS(KP1_847759065, T92, T91); + R1[WS(rs, 54)] = FMA(KP1_606415062, T78, T77); + R1[WS(rs, 22)] = FNMS(KP1_606415062, T78, T77); + R1[WS(rs, 58)] = FMA(KP1_715457220, T6S, T6R); + R1[WS(rs, 26)] = FNMS(KP1_715457220, T6S, T6R); + T96 = FNMS(KP198912367, T95, T94); + T9K = FMA(KP198912367, T94, T95); + T9N = FMA(KP923879532, T99, T98); + T9a = FNMS(KP923879532, T99, T98); + T9d = FNMS(KP923879532, T9c, T9b); + T9M = FMA(KP923879532, T9c, T9b); + T9B = FMA(KP1_961570560, T96, T93); + T97 = FNMS(KP1_961570560, T96, T93); + T9C = FMA(KP820678790, T9a, T9d); + T9e = FNMS(KP820678790, T9d, T9a); + } + T9T = FMA(KP923879532, T9s, T9r); + T9t = FNMS(KP923879532, T9s, T9r); + Ta0 = FMA(KP098491403, T9M, T9N); + T9O = FNMS(KP098491403, T9N, T9M); + T9Z = FMA(KP1_961570560, T9K, T9J); + T9L = FNMS(KP1_961570560, T9K, T9J); + T9Q = FMA(KP923879532, T9h, T9g); + T9i = FNMS(KP923879532, T9h, T9g); + T9l = FNMS(KP198912367, T9k, T9j); + T9u = FMA(KP198912367, T9j, T9k); + T9v = FMA(KP198912367, T9m, T9n); + T9o = FNMS(KP198912367, T9n, T9m); + T9R = T9u + T9v; + T9w = T9u - T9v; + } + { + E T9S, T9E, T9V, T9F; + { + E T9z, T9f, Ta3, T9x, T9U, T9p, Ta1, Ta5, Ta2, T9q; + T9z = FNMS(KP1_546020906, T9e, T97); + T9f = FMA(KP1_546020906, T9e, T97); + T9S = FNMS(KP980785280, T9R, T9Q); + Ta3 = FMA(KP980785280, T9R, T9Q); + T9E = FNMS(KP980785280, T9w, T9t); + T9x = FMA(KP980785280, T9w, T9t); + T9U = T9l - T9o; + T9p = T9l + T9o; + Ta1 = FNMS(KP1_990369453, Ta0, T9Z); + Ta5 = FMA(KP1_990369453, Ta0, T9Z); + T9V = FNMS(KP980785280, T9U, T9T); + Ta2 = FMA(KP980785280, T9U, T9T); + T9F = FMA(KP980785280, T9p, T9i); + T9q = FNMS(KP980785280, T9p, T9i); + { + E Ta4, Ta6, T9A, T9y; + Ta4 = FNMS(KP049126849, Ta3, Ta2); + Ta6 = FMA(KP049126849, Ta2, Ta3); + T9A = FMA(KP357805721, T9q, T9x); + T9y = FNMS(KP357805721, T9x, T9q); + R1[WS(rs, 63)] = FMA(KP1_997590912, Ta6, Ta5); + R1[WS(rs, 31)] = FNMS(KP1_997590912, Ta6, Ta5); + R1[WS(rs, 47)] = FMA(KP1_997590912, Ta4, Ta1); + R1[WS(rs, 15)] = FNMS(KP1_997590912, Ta4, Ta1); + R1[WS(rs, 3)] = FMA(KP1_883088130, T9y, T9f); + R1[WS(rs, 35)] = FNMS(KP1_883088130, T9y, T9f); + R1[WS(rs, 19)] = FNMS(KP1_883088130, T9A, T9z); + R1[WS(rs, 51)] = FMA(KP1_883088130, T9A, T9z); + } + } + { + E T9D, T9G, T9P, T9W; + T9H = FMA(KP1_546020906, T9C, T9B); + T9D = FNMS(KP1_546020906, T9C, T9B); + T9G = FNMS(KP472964775, T9F, T9E); + T9I = FMA(KP472964775, T9E, T9F); + T9X = FMA(KP1_990369453, T9O, T9L); + T9P = FNMS(KP1_990369453, T9O, T9L); + T9W = FNMS(KP906347169, T9V, T9S); + T9Y = FMA(KP906347169, T9S, T9V); + R1[WS(rs, 43)] = FMA(KP1_807978586, T9G, T9D); + R1[WS(rs, 11)] = FNMS(KP1_807978586, T9G, T9D); + R1[WS(rs, 7)] = FMA(KP1_481902250, T9W, T9P); + R1[WS(rs, 39)] = FNMS(KP1_481902250, T9W, T9P); + } + } + } + } + } + } + } + } + R1[WS(rs, 55)] = FMA(KP1_481902250, T9Y, T9X); + R1[WS(rs, 23)] = FNMS(KP1_481902250, T9Y, T9X); + R1[WS(rs, 59)] = FMA(KP1_807978586, T9I, T9H); + R1[WS(rs, 27)] = FNMS(KP1_807978586, T9I, T9H); + } + } +} + +static const kr2c_desc desc = { 128, "r2cb_128", {416, 0, 540, 0}, &GENUS }; + +void X(codelet_r2cb_128) (planner *p) { + X(kr2c_register) (p, r2cb_128, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 128 -name r2cb_128 -include r2cb.h */ + +/* + * This function contains 956 FP additions, 342 FP multiplications, + * (or, 812 additions, 198 multiplications, 144 fused multiply/add), + * 198 stack variables, 39 constants, and 256 memory accesses + */ +#include "r2cb.h" + +static void r2cb_128(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_028205488, +1.028205488386443453187387677937631545216098241); + DK(KP1_715457220, +1.715457220000544139804539968569540274084981599); + DK(KP1_606415062, +1.606415062961289819613353025926283847759138854); + DK(KP1_191398608, +1.191398608984866686934073057659939779023852677); + DK(KP1_940062506, +1.940062506389087985207968414572200502913731924); + DK(KP485960359, +0.485960359806527779896548324154942236641981567); + DK(KP293460948, +0.293460948910723503317700259293435639412430633); + DK(KP1_978353019, +1.978353019929561946903347476032486127967379067); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP855110186, +0.855110186860564188641933713777597068609157259); + DK(KP1_807978586, +1.807978586246886663172400594461074097420264050); + DK(KP1_481902250, +1.481902250709918182351233794990325459457910619); + DK(KP1_343117909, +1.343117909694036801250753700854843606457501264); + DK(KP1_883088130, +1.883088130366041556825018805199004714371179592); + DK(KP673779706, +0.673779706784440101378506425238295140955533559); + DK(KP098135348, +0.098135348654836028509909953885365316629490726); + DK(KP1_997590912, +1.997590912410344785429543209518201388886407229); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP580569354, +0.580569354508924735272384751634790549382952557); + DK(KP1_913880671, +1.913880671464417729871595773960539938965698411); + DK(KP942793473, +0.942793473651995297112775251810508755314920638); + DK(KP1_763842528, +1.763842528696710059425513727320776699016885241); + DK(KP1_111140466, +1.111140466039204449485661627897065748749874382); + DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); + DK(KP1_268786568, +1.268786568327290996430343226450986741351374190); + DK(KP1_546020906, +1.546020906725473921621813219516939601942082586); + DK(KP196034280, +0.196034280659121203988391127777283691722273346); + DK(KP1_990369453, +1.990369453344393772489673906218959843150949737); + DK(KP390180644, +0.390180644032256535696569736954044481855383236); + DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP765366864, +0.765366864730179543456919968060797733522689125); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(512, rs), MAKE_VOLATILE_STRIDE(512, csr), MAKE_VOLATILE_STRIDE(512, csi)) { + E Ta, T6q, T2a, T5k, T8x, Tbx, TcF, Ten, Th, T6r, T2j, T5l, T8E, Tby, TcI; + E Teo, Tx, T6t, TcM, Teq, TcP, Ter, T2t, T5n, T2C, T5o, T8Q, TbA, T8X, TbB; + E T6w, T7L, T1j, T6L, Tde, TeC, TdL, TeR, T3v, T5z, T4I, T5O, T9O, TbM, TaV; + E Tc1, T78, T7Z, TN, T6z, TcU, Teu, Td8, Tey, T2N, T5r, T3j, T5v, T9a, TbE; + E T9A, TbI, T6H, T7O, T1O, T7V, T48, T4u, Tds, TeG, T5E, T5K, Taf, TbP, Tdp; + E TeF, T6U, T72, Tam, TbQ, T23, T7U, T4r, T4v, Tdz, TeJ, T5H, T5L, Tay, TbS; + E Tdw, TeI, T6Z, T73, TaF, TbT, T1y, T75, Tdl, TeQ, TdI, TeD, T3O, T5N, T4z; + E T5A, Ta3, Tc0, TaO, TbN, T6O, T80, T12, T6E, Td1, Tex, Td5, Tev, T36, T5u; + E T3a, T5s, T9p, TbH, T9t, TbF, T6C, T7P; + { + E T5, T8s, T3, T8q, T9, T8u, T29, T8v, T6, T26; + { + E T4, T8r, T1, T2; + T4 = Cr[WS(csr, 32)]; + T5 = KP2_000000000 * T4; + T8r = Ci[WS(csi, 32)]; + T8s = KP2_000000000 * T8r; + T1 = Cr[0]; + T2 = Cr[WS(csr, 64)]; + T3 = T1 + T2; + T8q = T1 - T2; + { + E T7, T8, T27, T28; + T7 = Cr[WS(csr, 16)]; + T8 = Cr[WS(csr, 48)]; + T9 = KP2_000000000 * (T7 + T8); + T8u = T7 - T8; + T27 = Ci[WS(csi, 16)]; + T28 = Ci[WS(csi, 48)]; + T29 = KP2_000000000 * (T27 - T28); + T8v = T27 + T28; + } + } + T6 = T3 + T5; + Ta = T6 + T9; + T6q = T6 - T9; + T26 = T3 - T5; + T2a = T26 - T29; + T5k = T26 + T29; + { + E T8t, T8w, TcD, TcE; + T8t = T8q - T8s; + T8w = KP1_414213562 * (T8u - T8v); + T8x = T8t + T8w; + Tbx = T8t - T8w; + TcD = T8q + T8s; + TcE = KP1_414213562 * (T8u + T8v); + TcF = TcD - TcE; + Ten = TcD + TcE; + } + } + { + E Td, T8y, T2e, T8C, Tg, T8B, T2h, T8z, T2b, T2i; + { + E Tb, Tc, T2c, T2d; + Tb = Cr[WS(csr, 8)]; + Tc = Cr[WS(csr, 56)]; + Td = Tb + Tc; + T8y = Tb - Tc; + T2c = Ci[WS(csi, 8)]; + T2d = Ci[WS(csi, 56)]; + T2e = T2c - T2d; + T8C = T2c + T2d; + } + { + E Te, Tf, T2f, T2g; + Te = Cr[WS(csr, 40)]; + Tf = Cr[WS(csr, 24)]; + Tg = Te + Tf; + T8B = Te - Tf; + T2f = Ci[WS(csi, 40)]; + T2g = Ci[WS(csi, 24)]; + T2h = T2f - T2g; + T8z = T2f + T2g; + } + Th = KP2_000000000 * (Td + Tg); + T6r = KP2_000000000 * (T2h + T2e); + T2b = Td - Tg; + T2i = T2e - T2h; + T2j = KP1_414213562 * (T2b - T2i); + T5l = KP1_414213562 * (T2b + T2i); + { + E T8A, T8D, TcG, TcH; + T8A = T8y - T8z; + T8D = T8B + T8C; + T8E = FNMS(KP765366864, T8D, KP1_847759065 * T8A); + Tby = FMA(KP765366864, T8A, KP1_847759065 * T8D); + TcG = T8y + T8z; + TcH = T8C - T8B; + TcI = FNMS(KP1_847759065, TcH, KP765366864 * TcG); + Teo = FMA(KP1_847759065, TcG, KP765366864 * TcH); + } + } + { + E Tl, T8G, T2x, T8V, To, T8U, T2A, T8H, Tv, T8S, T2o, T8O, Ts, T8R, T2r; + E T8L; + { + E Tj, Tk, T2y, T2z; + Tj = Cr[WS(csr, 4)]; + Tk = Cr[WS(csr, 60)]; + Tl = Tj + Tk; + T8G = Tj - Tk; + { + E T2v, T2w, Tm, Tn; + T2v = Ci[WS(csi, 4)]; + T2w = Ci[WS(csi, 60)]; + T2x = T2v - T2w; + T8V = T2v + T2w; + Tm = Cr[WS(csr, 36)]; + Tn = Cr[WS(csr, 28)]; + To = Tm + Tn; + T8U = Tm - Tn; + } + T2y = Ci[WS(csi, 36)]; + T2z = Ci[WS(csi, 28)]; + T2A = T2y - T2z; + T8H = T2y + T2z; + { + E Tt, Tu, T8M, T2m, T2n, T8N; + Tt = Cr[WS(csr, 12)]; + Tu = Cr[WS(csr, 52)]; + T8M = Tt - Tu; + T2m = Ci[WS(csi, 52)]; + T2n = Ci[WS(csi, 12)]; + T8N = T2n + T2m; + Tv = Tt + Tu; + T8S = T8M + T8N; + T2o = T2m - T2n; + T8O = T8M - T8N; + } + { + E Tq, Tr, T8J, T2p, T2q, T8K; + Tq = Cr[WS(csr, 20)]; + Tr = Cr[WS(csr, 44)]; + T8J = Tq - Tr; + T2p = Ci[WS(csi, 20)]; + T2q = Ci[WS(csi, 44)]; + T8K = T2p + T2q; + Ts = Tq + Tr; + T8R = T8J + T8K; + T2r = T2p - T2q; + T8L = T8J - T8K; + } + } + { + E Tp, Tw, TcK, TcL; + Tp = Tl + To; + Tw = Ts + Tv; + Tx = KP2_000000000 * (Tp + Tw); + T6t = Tp - Tw; + TcK = T8G + T8H; + TcL = KP707106781 * (T8R + T8S); + TcM = TcK - TcL; + Teq = TcK + TcL; + } + { + E TcN, TcO, T2l, T2s; + TcN = KP707106781 * (T8L - T8O); + TcO = T8V - T8U; + TcP = TcN + TcO; + Ter = TcO - TcN; + T2l = Tl - To; + T2s = T2o - T2r; + T2t = T2l + T2s; + T5n = T2l - T2s; + } + { + E T2u, T2B, T8I, T8P; + T2u = Ts - Tv; + T2B = T2x - T2A; + T2C = T2u + T2B; + T5o = T2B - T2u; + T8I = T8G - T8H; + T8P = KP707106781 * (T8L + T8O); + T8Q = T8I + T8P; + TbA = T8I - T8P; + } + { + E T8T, T8W, T6u, T6v; + T8T = KP707106781 * (T8R - T8S); + T8W = T8U + T8V; + T8X = T8T + T8W; + TbB = T8W - T8T; + T6u = T2A + T2x; + T6v = T2r + T2o; + T6w = T6u - T6v; + T7L = KP2_000000000 * (T6v + T6u); + } + } + { + E T17, T9E, T4D, TaT, T1a, TaS, T4G, T9F, T1h, TaQ, T3q, T9M, T1e, TaP, T3t; + E T9J; + { + E T15, T16, T4E, T4F; + T15 = Cr[WS(csr, 1)]; + T16 = Cr[WS(csr, 63)]; + T17 = T15 + T16; + T9E = T15 - T16; + { + E T4B, T4C, T18, T19; + T4B = Ci[WS(csi, 1)]; + T4C = Ci[WS(csi, 63)]; + T4D = T4B - T4C; + TaT = T4B + T4C; + T18 = Cr[WS(csr, 33)]; + T19 = Cr[WS(csr, 31)]; + T1a = T18 + T19; + TaS = T18 - T19; + } + T4E = Ci[WS(csi, 33)]; + T4F = Ci[WS(csi, 31)]; + T4G = T4E - T4F; + T9F = T4E + T4F; + { + E T1f, T1g, T9K, T3o, T3p, T9L; + T1f = Cr[WS(csr, 15)]; + T1g = Cr[WS(csr, 49)]; + T9K = T1f - T1g; + T3o = Ci[WS(csi, 49)]; + T3p = Ci[WS(csi, 15)]; + T9L = T3p + T3o; + T1h = T1f + T1g; + TaQ = T9K + T9L; + T3q = T3o - T3p; + T9M = T9K - T9L; + } + { + E T1c, T1d, T9H, T3r, T3s, T9I; + T1c = Cr[WS(csr, 17)]; + T1d = Cr[WS(csr, 47)]; + T9H = T1c - T1d; + T3r = Ci[WS(csi, 17)]; + T3s = Ci[WS(csi, 47)]; + T9I = T3r + T3s; + T1e = T1c + T1d; + TaP = T9H + T9I; + T3t = T3r - T3s; + T9J = T9H - T9I; + } + } + { + E T1b, T1i, Tdc, Tdd; + T1b = T17 + T1a; + T1i = T1e + T1h; + T1j = T1b + T1i; + T6L = T1b - T1i; + Tdc = T9E + T9F; + Tdd = KP707106781 * (TaP + TaQ); + Tde = Tdc - Tdd; + TeC = Tdc + Tdd; + } + { + E TdJ, TdK, T3n, T3u; + TdJ = KP707106781 * (T9J - T9M); + TdK = TaT - TaS; + TdL = TdJ + TdK; + TeR = TdK - TdJ; + T3n = T17 - T1a; + T3u = T3q - T3t; + T3v = T3n + T3u; + T5z = T3n - T3u; + } + { + E T4A, T4H, T9G, T9N; + T4A = T1e - T1h; + T4H = T4D - T4G; + T4I = T4A + T4H; + T5O = T4H - T4A; + T9G = T9E - T9F; + T9N = KP707106781 * (T9J + T9M); + T9O = T9G + T9N; + TbM = T9G - T9N; + } + { + E TaR, TaU, T76, T77; + TaR = KP707106781 * (TaP - TaQ); + TaU = TaS + TaT; + TaV = TaR + TaU; + Tc1 = TaU - TaR; + T76 = T4G + T4D; + T77 = T3t + T3q; + T78 = T76 - T77; + T7Z = T77 + T76; + } + } + { + E TB, T90, T3e, T9y, TE, T9x, T3h, T91, TL, T9v, T2I, T98, TI, T9u, T2L; + E T95; + { + E Tz, TA, T3f, T3g; + Tz = Cr[WS(csr, 2)]; + TA = Cr[WS(csr, 62)]; + TB = Tz + TA; + T90 = Tz - TA; + { + E T3c, T3d, TC, TD; + T3c = Ci[WS(csi, 2)]; + T3d = Ci[WS(csi, 62)]; + T3e = T3c - T3d; + T9y = T3c + T3d; + TC = Cr[WS(csr, 34)]; + TD = Cr[WS(csr, 30)]; + TE = TC + TD; + T9x = TC - TD; + } + T3f = Ci[WS(csi, 34)]; + T3g = Ci[WS(csi, 30)]; + T3h = T3f - T3g; + T91 = T3f + T3g; + { + E TJ, TK, T96, T2G, T2H, T97; + TJ = Cr[WS(csr, 14)]; + TK = Cr[WS(csr, 50)]; + T96 = TJ - TK; + T2G = Ci[WS(csi, 50)]; + T2H = Ci[WS(csi, 14)]; + T97 = T2H + T2G; + TL = TJ + TK; + T9v = T96 + T97; + T2I = T2G - T2H; + T98 = T96 - T97; + } + { + E TG, TH, T93, T2J, T2K, T94; + TG = Cr[WS(csr, 18)]; + TH = Cr[WS(csr, 46)]; + T93 = TG - TH; + T2J = Ci[WS(csi, 18)]; + T2K = Ci[WS(csi, 46)]; + T94 = T2J + T2K; + TI = TG + TH; + T9u = T93 + T94; + T2L = T2J - T2K; + T95 = T93 - T94; + } + } + { + E TF, TM, TcS, TcT; + TF = TB + TE; + TM = TI + TL; + TN = TF + TM; + T6z = TF - TM; + TcS = T90 + T91; + TcT = KP707106781 * (T9u + T9v); + TcU = TcS - TcT; + Teu = TcS + TcT; + } + { + E Td6, Td7, T2F, T2M; + Td6 = KP707106781 * (T95 - T98); + Td7 = T9y - T9x; + Td8 = Td6 + Td7; + Tey = Td7 - Td6; + T2F = TB - TE; + T2M = T2I - T2L; + T2N = T2F + T2M; + T5r = T2F - T2M; + } + { + E T3b, T3i, T92, T99; + T3b = TI - TL; + T3i = T3e - T3h; + T3j = T3b + T3i; + T5v = T3i - T3b; + T92 = T90 - T91; + T99 = KP707106781 * (T95 + T98); + T9a = T92 + T99; + TbE = T92 - T99; + } + { + E T9w, T9z, T6F, T6G; + T9w = KP707106781 * (T9u - T9v); + T9z = T9x + T9y; + T9A = T9w + T9z; + TbI = T9z - T9w; + T6F = T3h + T3e; + T6G = T2L + T2I; + T6H = T6F - T6G; + T7O = T6G + T6F; + } + } + { + E T1G, Taj, T3Q, Ta5, T46, Tak, T6R, Ta6, T1N, Tag, Tah, T3X, T3Z, Taa, Tad; + E T6S, Tdn, Tdo; + { + E T1A, T1B, T1C, T1D, T1E, T1F; + T1A = Cr[WS(csr, 5)]; + T1B = Cr[WS(csr, 59)]; + T1C = T1A + T1B; + T1D = Cr[WS(csr, 37)]; + T1E = Cr[WS(csr, 27)]; + T1F = T1D + T1E; + T1G = T1C + T1F; + Taj = T1D - T1E; + T3Q = T1C - T1F; + Ta5 = T1A - T1B; + } + { + E T40, T41, T42, T43, T44, T45; + T40 = Ci[WS(csi, 5)]; + T41 = Ci[WS(csi, 59)]; + T42 = T40 - T41; + T43 = Ci[WS(csi, 37)]; + T44 = Ci[WS(csi, 27)]; + T45 = T43 - T44; + T46 = T42 - T45; + Tak = T40 + T41; + T6R = T45 + T42; + Ta6 = T43 + T44; + } + { + E T1J, Ta8, T3W, Ta9, T1M, Tab, T3T, Tac; + { + E T1H, T1I, T3U, T3V; + T1H = Cr[WS(csr, 21)]; + T1I = Cr[WS(csr, 43)]; + T1J = T1H + T1I; + Ta8 = T1H - T1I; + T3U = Ci[WS(csi, 21)]; + T3V = Ci[WS(csi, 43)]; + T3W = T3U - T3V; + Ta9 = T3U + T3V; + } + { + E T1K, T1L, T3R, T3S; + T1K = Cr[WS(csr, 11)]; + T1L = Cr[WS(csr, 53)]; + T1M = T1K + T1L; + Tab = T1K - T1L; + T3R = Ci[WS(csi, 53)]; + T3S = Ci[WS(csi, 11)]; + T3T = T3R - T3S; + Tac = T3S + T3R; + } + T1N = T1J + T1M; + Tag = Ta8 + Ta9; + Tah = Tab + Tac; + T3X = T3T - T3W; + T3Z = T1J - T1M; + Taa = Ta8 - Ta9; + Tad = Tab - Tac; + T6S = T3W + T3T; + } + T1O = T1G + T1N; + T7V = T6S + T6R; + { + E T3Y, T47, Tdq, Tdr; + T3Y = T3Q + T3X; + T47 = T3Z + T46; + T48 = FNMS(KP382683432, T47, KP923879532 * T3Y); + T4u = FMA(KP382683432, T3Y, KP923879532 * T47); + Tdq = KP707106781 * (Taa - Tad); + Tdr = Tak - Taj; + Tds = Tdq + Tdr; + TeG = Tdr - Tdq; + } + { + E T5C, T5D, Ta7, Tae; + T5C = T3Q - T3X; + T5D = T46 - T3Z; + T5E = FNMS(KP923879532, T5D, KP382683432 * T5C); + T5K = FMA(KP923879532, T5C, KP382683432 * T5D); + Ta7 = Ta5 - Ta6; + Tae = KP707106781 * (Taa + Tad); + Taf = Ta7 + Tae; + TbP = Ta7 - Tae; + } + Tdn = Ta5 + Ta6; + Tdo = KP707106781 * (Tag + Tah); + Tdp = Tdn - Tdo; + TeF = Tdn + Tdo; + { + E T6Q, T6T, Tai, Tal; + T6Q = T1G - T1N; + T6T = T6R - T6S; + T6U = T6Q - T6T; + T72 = T6Q + T6T; + Tai = KP707106781 * (Tag - Tah); + Tal = Taj + Tak; + Tam = Tai + Tal; + TbQ = Tal - Tai; + } + } + { + E T1V, TaC, T49, Tao, T4p, TaD, T6W, Tap, T22, Taz, TaA, T4g, T4i, Tat, Taw; + E T6X, Tdu, Tdv; + { + E T1P, T1Q, T1R, T1S, T1T, T1U; + T1P = Cr[WS(csr, 3)]; + T1Q = Cr[WS(csr, 61)]; + T1R = T1P + T1Q; + T1S = Cr[WS(csr, 29)]; + T1T = Cr[WS(csr, 35)]; + T1U = T1S + T1T; + T1V = T1R + T1U; + TaC = T1S - T1T; + T49 = T1R - T1U; + Tao = T1P - T1Q; + } + { + E T4j, T4k, T4l, T4m, T4n, T4o; + T4j = Ci[WS(csi, 61)]; + T4k = Ci[WS(csi, 3)]; + T4l = T4j - T4k; + T4m = Ci[WS(csi, 29)]; + T4n = Ci[WS(csi, 35)]; + T4o = T4m - T4n; + T4p = T4l - T4o; + TaD = T4k + T4j; + T6W = T4o + T4l; + Tap = T4m + T4n; + } + { + E T1Y, Tar, T4f, Tas, T21, Tau, T4c, Tav; + { + E T1W, T1X, T4d, T4e; + T1W = Cr[WS(csr, 13)]; + T1X = Cr[WS(csr, 51)]; + T1Y = T1W + T1X; + Tar = T1W - T1X; + T4d = Ci[WS(csi, 13)]; + T4e = Ci[WS(csi, 51)]; + T4f = T4d - T4e; + Tas = T4d + T4e; + } + { + E T1Z, T20, T4a, T4b; + T1Z = Cr[WS(csr, 19)]; + T20 = Cr[WS(csr, 45)]; + T21 = T1Z + T20; + Tau = T1Z - T20; + T4a = Ci[WS(csi, 45)]; + T4b = Ci[WS(csi, 19)]; + T4c = T4a - T4b; + Tav = T4b + T4a; + } + T22 = T1Y + T21; + Taz = Tar + Tas; + TaA = Tau + Tav; + T4g = T4c - T4f; + T4i = T1Y - T21; + Tat = Tar - Tas; + Taw = Tau - Tav; + T6X = T4f + T4c; + } + T23 = T1V + T22; + T7U = T6X + T6W; + { + E T4h, T4q, Tdx, Tdy; + T4h = T49 + T4g; + T4q = T4i + T4p; + T4r = FMA(KP923879532, T4h, KP382683432 * T4q); + T4v = FNMS(KP382683432, T4h, KP923879532 * T4q); + Tdx = KP707106781 * (Tat - Taw); + Tdy = TaC + TaD; + Tdz = Tdx - Tdy; + TeJ = Tdx + Tdy; + } + { + E T5F, T5G, Taq, Tax; + T5F = T49 - T4g; + T5G = T4p - T4i; + T5H = FMA(KP382683432, T5F, KP923879532 * T5G); + T5L = FNMS(KP923879532, T5F, KP382683432 * T5G); + Taq = Tao - Tap; + Tax = KP707106781 * (Tat + Taw); + Tay = Taq + Tax; + TbS = Taq - Tax; + } + Tdu = Tao + Tap; + Tdv = KP707106781 * (Taz + TaA); + Tdw = Tdu - Tdv; + TeI = Tdu + Tdv; + { + E T6V, T6Y, TaB, TaE; + T6V = T1V - T22; + T6Y = T6W - T6X; + T6Z = T6V + T6Y; + T73 = T6Y - T6V; + TaB = KP707106781 * (Taz - TaA); + TaE = TaC - TaD; + TaF = TaB + TaE; + TbT = TaE - TaB; + } + } + { + E T1m, T3z, T1p, T3C, T3w, T3D, Tdg, Tdf, T9U, T9R, T1t, T3I, T1w, T3L, T3F; + E T3M, Tdj, Tdi, Ta1, T9Y; + { + E T9P, T9T, T9S, T9Q; + { + E T1k, T1l, T3x, T3y; + T1k = Cr[WS(csr, 9)]; + T1l = Cr[WS(csr, 55)]; + T1m = T1k + T1l; + T9P = T1k - T1l; + T3x = Ci[WS(csi, 9)]; + T3y = Ci[WS(csi, 55)]; + T3z = T3x - T3y; + T9T = T3x + T3y; + } + { + E T1n, T1o, T3A, T3B; + T1n = Cr[WS(csr, 41)]; + T1o = Cr[WS(csr, 23)]; + T1p = T1n + T1o; + T9S = T1n - T1o; + T3A = Ci[WS(csi, 41)]; + T3B = Ci[WS(csi, 23)]; + T3C = T3A - T3B; + T9Q = T3A + T3B; + } + T3w = T1m - T1p; + T3D = T3z - T3C; + Tdg = T9T - T9S; + Tdf = T9P + T9Q; + T9U = T9S + T9T; + T9R = T9P - T9Q; + } + { + E T9W, Ta0, T9Z, T9X; + { + E T1r, T1s, T3G, T3H; + T1r = Cr[WS(csr, 7)]; + T1s = Cr[WS(csr, 57)]; + T1t = T1r + T1s; + T9W = T1r - T1s; + T3G = Ci[WS(csi, 57)]; + T3H = Ci[WS(csi, 7)]; + T3I = T3G - T3H; + Ta0 = T3H + T3G; + } + { + E T1u, T1v, T3J, T3K; + T1u = Cr[WS(csr, 25)]; + T1v = Cr[WS(csr, 39)]; + T1w = T1u + T1v; + T9Z = T1u - T1v; + T3J = Ci[WS(csi, 25)]; + T3K = Ci[WS(csi, 39)]; + T3L = T3J - T3K; + T9X = T3J + T3K; + } + T3F = T1t - T1w; + T3M = T3I - T3L; + Tdj = T9Z + Ta0; + Tdi = T9W + T9X; + Ta1 = T9Z - Ta0; + T9Y = T9W - T9X; + } + { + E T1q, T1x, Tdh, Tdk; + T1q = T1m + T1p; + T1x = T1t + T1w; + T1y = T1q + T1x; + T75 = T1q - T1x; + Tdh = FNMS(KP923879532, Tdg, KP382683432 * Tdf); + Tdk = FNMS(KP923879532, Tdj, KP382683432 * Tdi); + Tdl = Tdh + Tdk; + TeQ = Tdh - Tdk; + } + { + E TdG, TdH, T3E, T3N; + TdG = FMA(KP923879532, Tdf, KP382683432 * Tdg); + TdH = FMA(KP923879532, Tdi, KP382683432 * Tdj); + TdI = TdG - TdH; + TeD = TdG + TdH; + T3E = T3w - T3D; + T3N = T3F + T3M; + T3O = KP707106781 * (T3E + T3N); + T5N = KP707106781 * (T3E - T3N); + } + { + E T4x, T4y, T9V, Ta2; + T4x = T3w + T3D; + T4y = T3M - T3F; + T4z = KP707106781 * (T4x + T4y); + T5A = KP707106781 * (T4y - T4x); + T9V = FNMS(KP382683432, T9U, KP923879532 * T9R); + Ta2 = FMA(KP923879532, T9Y, KP382683432 * Ta1); + Ta3 = T9V + Ta2; + Tc0 = T9V - Ta2; + } + { + E TaM, TaN, T6M, T6N; + TaM = FMA(KP382683432, T9R, KP923879532 * T9U); + TaN = FNMS(KP382683432, T9Y, KP923879532 * Ta1); + TaO = TaM + TaN; + TbN = TaN - TaM; + T6M = T3L + T3I; + T6N = T3C + T3z; + T6O = T6M - T6N; + T80 = T6N + T6M; + } + } + { + E TQ, T2R, TT, T2U, T2O, T2V, TcW, TcV, T9g, T9d, TX, T30, T10, T33, T2X; + E T34, TcZ, TcY, T9n, T9k; + { + E T9b, T9f, T9e, T9c; + { + E TO, TP, T2P, T2Q; + TO = Cr[WS(csr, 10)]; + TP = Cr[WS(csr, 54)]; + TQ = TO + TP; + T9b = TO - TP; + T2P = Ci[WS(csi, 10)]; + T2Q = Ci[WS(csi, 54)]; + T2R = T2P - T2Q; + T9f = T2P + T2Q; + } + { + E TR, TS, T2S, T2T; + TR = Cr[WS(csr, 42)]; + TS = Cr[WS(csr, 22)]; + TT = TR + TS; + T9e = TR - TS; + T2S = Ci[WS(csi, 42)]; + T2T = Ci[WS(csi, 22)]; + T2U = T2S - T2T; + T9c = T2S + T2T; + } + T2O = TQ - TT; + T2V = T2R - T2U; + TcW = T9f - T9e; + TcV = T9b + T9c; + T9g = T9e + T9f; + T9d = T9b - T9c; + } + { + E T9i, T9m, T9l, T9j; + { + E TV, TW, T2Y, T2Z; + TV = Cr[WS(csr, 6)]; + TW = Cr[WS(csr, 58)]; + TX = TV + TW; + T9i = TV - TW; + T2Y = Ci[WS(csi, 58)]; + T2Z = Ci[WS(csi, 6)]; + T30 = T2Y - T2Z; + T9m = T2Z + T2Y; + } + { + E TY, TZ, T31, T32; + TY = Cr[WS(csr, 26)]; + TZ = Cr[WS(csr, 38)]; + T10 = TY + TZ; + T9l = TY - TZ; + T31 = Ci[WS(csi, 26)]; + T32 = Ci[WS(csi, 38)]; + T33 = T31 - T32; + T9j = T31 + T32; + } + T2X = TX - T10; + T34 = T30 - T33; + TcZ = T9l + T9m; + TcY = T9i + T9j; + T9n = T9l - T9m; + T9k = T9i - T9j; + } + { + E TU, T11, TcX, Td0; + TU = TQ + TT; + T11 = TX + T10; + T12 = TU + T11; + T6E = TU - T11; + TcX = FNMS(KP923879532, TcW, KP382683432 * TcV); + Td0 = FNMS(KP923879532, TcZ, KP382683432 * TcY); + Td1 = TcX + Td0; + Tex = TcX - Td0; + } + { + E Td3, Td4, T2W, T35; + Td3 = FMA(KP923879532, TcV, KP382683432 * TcW); + Td4 = FMA(KP923879532, TcY, KP382683432 * TcZ); + Td5 = Td3 - Td4; + Tev = Td3 + Td4; + T2W = T2O - T2V; + T35 = T2X + T34; + T36 = KP707106781 * (T2W + T35); + T5u = KP707106781 * (T2W - T35); + } + { + E T38, T39, T9h, T9o; + T38 = T2O + T2V; + T39 = T34 - T2X; + T3a = KP707106781 * (T38 + T39); + T5s = KP707106781 * (T39 - T38); + T9h = FNMS(KP382683432, T9g, KP923879532 * T9d); + T9o = FMA(KP923879532, T9k, KP382683432 * T9n); + T9p = T9h + T9o; + TbH = T9h - T9o; + } + { + E T9r, T9s, T6A, T6B; + T9r = FMA(KP382683432, T9d, KP923879532 * T9g); + T9s = FNMS(KP382683432, T9k, KP923879532 * T9n); + T9t = T9r + T9s; + TbF = T9s - T9r; + T6A = T33 + T30; + T6B = T2U + T2R; + T6C = T6A - T6B; + T7P = T6B + T6A; + } + } + { + E T13, T8f, Ty, T8e, T25, T8h, T8k, T8p, Ti, T14, T8o; + T13 = KP2_000000000 * (TN + T12); + T8f = KP2_000000000 * (T7P + T7O); + Ti = Ta + Th; + Ty = Ti + Tx; + T8e = Ti - Tx; + { + E T1z, T24, T8i, T8j; + T1z = T1j + T1y; + T24 = T1O + T23; + T25 = KP2_000000000 * (T1z + T24); + T8h = T1z - T24; + T8i = T80 + T7Z; + T8j = T7V + T7U; + T8k = T8i - T8j; + T8p = KP2_000000000 * (T8j + T8i); + } + T14 = Ty + T13; + R0[WS(rs, 32)] = T14 - T25; + R0[0] = T14 + T25; + T8o = Ty - T13; + R0[WS(rs, 16)] = T8o - T8p; + R0[WS(rs, 48)] = T8o + T8p; + { + E T8g, T8l, T8m, T8n; + T8g = T8e - T8f; + T8l = KP1_414213562 * (T8h - T8k); + R0[WS(rs, 40)] = T8g - T8l; + R0[WS(rs, 8)] = T8g + T8l; + T8m = T8e + T8f; + T8n = KP1_414213562 * (T8h + T8k); + R0[WS(rs, 24)] = T8m - T8n; + R0[WS(rs, 56)] = T8m + T8n; + } + } + { + E T7M, T86, T82, T8a, T7R, T87, T7X, T89, T7K, T7Y, T81; + T7K = Ta - Th; + T7M = T7K - T7L; + T86 = T7K + T7L; + T7Y = T1O - T23; + T81 = T7Z - T80; + T82 = T7Y + T81; + T8a = T81 - T7Y; + { + E T7N, T7Q, T7T, T7W; + T7N = TN - T12; + T7Q = T7O - T7P; + T7R = KP1_414213562 * (T7N - T7Q); + T87 = KP1_414213562 * (T7N + T7Q); + T7T = T1j - T1y; + T7W = T7U - T7V; + T7X = T7T + T7W; + T89 = T7T - T7W; + } + { + E T7S, T83, T8c, T8d; + T7S = T7M + T7R; + T83 = FNMS(KP765366864, T82, KP1_847759065 * T7X); + R0[WS(rs, 36)] = T7S - T83; + R0[WS(rs, 4)] = T7S + T83; + T8c = T86 + T87; + T8d = FMA(KP1_847759065, T89, KP765366864 * T8a); + R0[WS(rs, 28)] = T8c - T8d; + R0[WS(rs, 60)] = T8c + T8d; + } + { + E T84, T85, T88, T8b; + T84 = T7M - T7R; + T85 = FMA(KP765366864, T7X, KP1_847759065 * T82); + R0[WS(rs, 20)] = T84 - T85; + R0[WS(rs, 52)] = T84 + T85; + T88 = T86 - T87; + T8b = FNMS(KP1_847759065, T8a, KP765366864 * T89); + R0[WS(rs, 44)] = T88 - T8b; + R0[WS(rs, 12)] = T88 + T8b; + } + } + { + E T2E, T4O, T4K, T4S, T3l, T4P, T4t, T4R; + { + E T2k, T2D, T4w, T4J; + T2k = T2a + T2j; + T2D = FNMS(KP765366864, T2C, KP1_847759065 * T2t); + T2E = T2k + T2D; + T4O = T2k - T2D; + T4w = T4u + T4v; + T4J = T4z + T4I; + T4K = T4w + T4J; + T4S = T4J - T4w; + } + { + E T37, T3k, T3P, T4s; + T37 = T2N + T36; + T3k = T3a + T3j; + T3l = FNMS(KP390180644, T3k, KP1_961570560 * T37); + T4P = FMA(KP390180644, T37, KP1_961570560 * T3k); + T3P = T3v + T3O; + T4s = T48 + T4r; + T4t = T3P + T4s; + T4R = T3P - T4s; + } + { + E T3m, T4L, T4U, T4V; + T3m = T2E + T3l; + T4L = FNMS(KP196034280, T4K, KP1_990369453 * T4t); + R0[WS(rs, 33)] = T3m - T4L; + R0[WS(rs, 1)] = T3m + T4L; + T4U = T4O + T4P; + T4V = FMA(KP1_546020906, T4R, KP1_268786568 * T4S); + R0[WS(rs, 25)] = T4U - T4V; + R0[WS(rs, 57)] = T4U + T4V; + } + { + E T4M, T4N, T4Q, T4T; + T4M = T2E - T3l; + T4N = FMA(KP196034280, T4t, KP1_990369453 * T4K); + R0[WS(rs, 17)] = T4M - T4N; + R0[WS(rs, 49)] = T4M + T4N; + T4Q = T4O - T4P; + T4T = FNMS(KP1_546020906, T4S, KP1_268786568 * T4R); + R0[WS(rs, 41)] = T4Q - T4T; + R0[WS(rs, 9)] = T4Q + T4T; + } + } + { + E T6y, T7e, T7a, T7i, T6J, T7f, T71, T7h; + { + E T6s, T6x, T74, T79; + T6s = T6q - T6r; + T6x = KP1_414213562 * (T6t - T6w); + T6y = T6s + T6x; + T7e = T6s - T6x; + T74 = KP707106781 * (T72 + T73); + T79 = T75 + T78; + T7a = T74 + T79; + T7i = T79 - T74; + } + { + E T6D, T6I, T6P, T70; + T6D = T6z + T6C; + T6I = T6E + T6H; + T6J = FNMS(KP765366864, T6I, KP1_847759065 * T6D); + T7f = FMA(KP765366864, T6D, KP1_847759065 * T6I); + T6P = T6L + T6O; + T70 = KP707106781 * (T6U + T6Z); + T71 = T6P + T70; + T7h = T6P - T70; + } + { + E T6K, T7b, T7k, T7l; + T6K = T6y + T6J; + T7b = FNMS(KP390180644, T7a, KP1_961570560 * T71); + R0[WS(rs, 34)] = T6K - T7b; + R0[WS(rs, 2)] = T6K + T7b; + T7k = T7e + T7f; + T7l = FMA(KP1_662939224, T7h, KP1_111140466 * T7i); + R0[WS(rs, 26)] = T7k - T7l; + R0[WS(rs, 58)] = T7k + T7l; + } + { + E T7c, T7d, T7g, T7j; + T7c = T6y - T6J; + T7d = FMA(KP390180644, T71, KP1_961570560 * T7a); + R0[WS(rs, 18)] = T7c - T7d; + R0[WS(rs, 50)] = T7c + T7d; + T7g = T7e - T7f; + T7j = FNMS(KP1_662939224, T7i, KP1_111140466 * T7h); + R0[WS(rs, 42)] = T7g - T7j; + R0[WS(rs, 10)] = T7g + T7j; + } + } + { + E T4Y, T5c, T58, T5g, T51, T5d, T55, T5f; + { + E T4W, T4X, T56, T57; + T4W = T2a - T2j; + T4X = FMA(KP765366864, T2t, KP1_847759065 * T2C); + T4Y = T4W - T4X; + T5c = T4W + T4X; + T56 = T48 - T4r; + T57 = T4I - T4z; + T58 = T56 + T57; + T5g = T57 - T56; + } + { + E T4Z, T50, T53, T54; + T4Z = T2N - T36; + T50 = T3j - T3a; + T51 = FNMS(KP1_662939224, T50, KP1_111140466 * T4Z); + T5d = FMA(KP1_662939224, T4Z, KP1_111140466 * T50); + T53 = T3v - T3O; + T54 = T4v - T4u; + T55 = T53 + T54; + T5f = T53 - T54; + } + { + E T52, T59, T5i, T5j; + T52 = T4Y + T51; + T59 = FNMS(KP942793473, T58, KP1_763842528 * T55); + R0[WS(rs, 37)] = T52 - T59; + R0[WS(rs, 5)] = T52 + T59; + T5i = T5c + T5d; + T5j = FMA(KP1_913880671, T5f, KP580569354 * T5g); + R0[WS(rs, 29)] = T5i - T5j; + R0[WS(rs, 61)] = T5i + T5j; + } + { + E T5a, T5b, T5e, T5h; + T5a = T4Y - T51; + T5b = FMA(KP942793473, T55, KP1_763842528 * T58); + R0[WS(rs, 21)] = T5a - T5b; + R0[WS(rs, 53)] = T5a + T5b; + T5e = T5c - T5d; + T5h = FNMS(KP1_913880671, T5g, KP580569354 * T5f); + R0[WS(rs, 45)] = T5e - T5h; + R0[WS(rs, 13)] = T5e + T5h; + } + } + { + E T7o, T7C, T7y, T7G, T7r, T7D, T7v, T7F; + { + E T7m, T7n, T7w, T7x; + T7m = T6q + T6r; + T7n = KP1_414213562 * (T6t + T6w); + T7o = T7m - T7n; + T7C = T7m + T7n; + T7w = KP707106781 * (T6U - T6Z); + T7x = T78 - T75; + T7y = T7w + T7x; + T7G = T7x - T7w; + } + { + E T7p, T7q, T7t, T7u; + T7p = T6z - T6C; + T7q = T6H - T6E; + T7r = FNMS(KP1_847759065, T7q, KP765366864 * T7p); + T7D = FMA(KP1_847759065, T7p, KP765366864 * T7q); + T7t = T6L - T6O; + T7u = KP707106781 * (T73 - T72); + T7v = T7t + T7u; + T7F = T7t - T7u; + } + { + E T7s, T7z, T7I, T7J; + T7s = T7o + T7r; + T7z = FNMS(KP1_111140466, T7y, KP1_662939224 * T7v); + R0[WS(rs, 38)] = T7s - T7z; + R0[WS(rs, 6)] = T7s + T7z; + T7I = T7C + T7D; + T7J = FMA(KP1_961570560, T7F, KP390180644 * T7G); + R0[WS(rs, 30)] = T7I - T7J; + R0[WS(rs, 62)] = T7I + T7J; + } + { + E T7A, T7B, T7E, T7H; + T7A = T7o - T7r; + T7B = FMA(KP1_111140466, T7v, KP1_662939224 * T7y); + R0[WS(rs, 22)] = T7A - T7B; + R0[WS(rs, 54)] = T7A + T7B; + T7E = T7C - T7D; + T7H = FNMS(KP1_961570560, T7G, KP390180644 * T7F); + R0[WS(rs, 46)] = T7E - T7H; + R0[WS(rs, 14)] = T7E + T7H; + } + } + { + E T5q, T5U, T5Q, T5Y, T5x, T5V, T5J, T5X; + { + E T5m, T5p, T5M, T5P; + T5m = T5k - T5l; + T5p = FNMS(KP1_847759065, T5o, KP765366864 * T5n); + T5q = T5m + T5p; + T5U = T5m - T5p; + T5M = T5K + T5L; + T5P = T5N + T5O; + T5Q = T5M + T5P; + T5Y = T5P - T5M; + } + { + E T5t, T5w, T5B, T5I; + T5t = T5r + T5s; + T5w = T5u + T5v; + T5x = FNMS(KP1_111140466, T5w, KP1_662939224 * T5t); + T5V = FMA(KP1_111140466, T5t, KP1_662939224 * T5w); + T5B = T5z + T5A; + T5I = T5E + T5H; + T5J = T5B + T5I; + T5X = T5B - T5I; + } + { + E T5y, T5R, T60, T61; + T5y = T5q + T5x; + T5R = FNMS(KP580569354, T5Q, KP1_913880671 * T5J); + R0[WS(rs, 35)] = T5y - T5R; + R0[WS(rs, 3)] = T5y + T5R; + T60 = T5U + T5V; + T61 = FMA(KP1_763842528, T5X, KP942793473 * T5Y); + R0[WS(rs, 27)] = T60 - T61; + R0[WS(rs, 59)] = T60 + T61; + } + { + E T5S, T5T, T5W, T5Z; + T5S = T5q - T5x; + T5T = FMA(KP580569354, T5J, KP1_913880671 * T5Q); + R0[WS(rs, 19)] = T5S - T5T; + R0[WS(rs, 51)] = T5S + T5T; + T5W = T5U - T5V; + T5Z = FNMS(KP1_763842528, T5Y, KP942793473 * T5X); + R0[WS(rs, 43)] = T5W - T5Z; + R0[WS(rs, 11)] = T5W + T5Z; + } + } + { + E T64, T6i, T6e, T6m, T67, T6j, T6b, T6l; + { + E T62, T63, T6c, T6d; + T62 = T5k + T5l; + T63 = FMA(KP1_847759065, T5n, KP765366864 * T5o); + T64 = T62 - T63; + T6i = T62 + T63; + T6c = T5E - T5H; + T6d = T5O - T5N; + T6e = T6c + T6d; + T6m = T6d - T6c; + } + { + E T65, T66, T69, T6a; + T65 = T5r - T5s; + T66 = T5v - T5u; + T67 = FNMS(KP1_961570560, T66, KP390180644 * T65); + T6j = FMA(KP1_961570560, T65, KP390180644 * T66); + T69 = T5z - T5A; + T6a = T5L - T5K; + T6b = T69 + T6a; + T6l = T69 - T6a; + } + { + E T68, T6f, T6o, T6p; + T68 = T64 + T67; + T6f = FNMS(KP1_268786568, T6e, KP1_546020906 * T6b); + R0[WS(rs, 39)] = T68 - T6f; + R0[WS(rs, 7)] = T68 + T6f; + T6o = T6i + T6j; + T6p = FMA(KP1_990369453, T6l, KP196034280 * T6m); + R0[WS(rs, 31)] = T6o - T6p; + R0[WS(rs, 63)] = T6o + T6p; + } + { + E T6g, T6h, T6k, T6n; + T6g = T64 - T67; + T6h = FMA(KP1_268786568, T6b, KP1_546020906 * T6e); + R0[WS(rs, 23)] = T6g - T6h; + R0[WS(rs, 55)] = T6g + T6h; + T6k = T6i - T6j; + T6n = FNMS(KP1_990369453, T6m, KP196034280 * T6l); + R0[WS(rs, 47)] = T6k - T6n; + R0[WS(rs, 15)] = T6k + T6n; + } + } + { + E T8Z, Tb1, T9C, Tb2, Tbe, Tbq, Tbb, Tbp, TaX, Tbs, Tb5, Tbi, TaI, Tbt, Tb4; + E Tbl; + { + E T8F, T8Y, Tb9, Tba; + T8F = T8x + T8E; + T8Y = FNMS(KP390180644, T8X, KP1_961570560 * T8Q); + T8Z = T8F + T8Y; + Tb1 = T8F - T8Y; + { + E T9q, T9B, Tbc, Tbd; + T9q = T9a + T9p; + T9B = T9t + T9A; + T9C = FNMS(KP196034280, T9B, KP1_990369453 * T9q); + Tb2 = FMA(KP196034280, T9q, KP1_990369453 * T9B); + Tbc = T9a - T9p; + Tbd = T9A - T9t; + Tbe = FNMS(KP1_546020906, Tbd, KP1_268786568 * Tbc); + Tbq = FMA(KP1_546020906, Tbc, KP1_268786568 * Tbd); + } + Tb9 = T8x - T8E; + Tba = FMA(KP390180644, T8Q, KP1_961570560 * T8X); + Tbb = Tb9 - Tba; + Tbp = Tb9 + Tba; + { + E TaW, Tbg, TaL, Tbh, TaJ, TaK; + TaW = TaO + TaV; + Tbg = T9O - Ta3; + TaJ = FMA(KP195090322, Taf, KP980785280 * Tam); + TaK = FNMS(KP195090322, Tay, KP980785280 * TaF); + TaL = TaJ + TaK; + Tbh = TaK - TaJ; + TaX = TaL + TaW; + Tbs = Tbg - Tbh; + Tb5 = TaW - TaL; + Tbi = Tbg + Tbh; + } + { + E Ta4, Tbk, TaH, Tbj, Tan, TaG; + Ta4 = T9O + Ta3; + Tbk = TaV - TaO; + Tan = FNMS(KP195090322, Tam, KP980785280 * Taf); + TaG = FMA(KP980785280, Tay, KP195090322 * TaF); + TaH = Tan + TaG; + Tbj = Tan - TaG; + TaI = Ta4 + TaH; + Tbt = Tbk - Tbj; + Tb4 = Ta4 - TaH; + Tbl = Tbj + Tbk; + } + } + { + E T9D, TaY, Tbr, Tbu; + T9D = T8Z + T9C; + TaY = FNMS(KP098135348, TaX, KP1_997590912 * TaI); + R1[WS(rs, 32)] = T9D - TaY; + R1[0] = T9D + TaY; + Tbr = Tbp - Tbq; + Tbu = FNMS(KP1_883088130, Tbt, KP673779706 * Tbs); + R1[WS(rs, 44)] = Tbr - Tbu; + R1[WS(rs, 12)] = Tbr + Tbu; + } + { + E Tbv, Tbw, TaZ, Tb0; + Tbv = Tbp + Tbq; + Tbw = FMA(KP1_883088130, Tbs, KP673779706 * Tbt); + R1[WS(rs, 28)] = Tbv - Tbw; + R1[WS(rs, 60)] = Tbv + Tbw; + TaZ = T8Z - T9C; + Tb0 = FMA(KP098135348, TaI, KP1_997590912 * TaX); + R1[WS(rs, 16)] = TaZ - Tb0; + R1[WS(rs, 48)] = TaZ + Tb0; + } + { + E Tb3, Tb6, Tbf, Tbm; + Tb3 = Tb1 - Tb2; + Tb6 = FNMS(KP1_481902250, Tb5, KP1_343117909 * Tb4); + R1[WS(rs, 40)] = Tb3 - Tb6; + R1[WS(rs, 8)] = Tb3 + Tb6; + Tbf = Tbb + Tbe; + Tbm = FNMS(KP855110186, Tbl, KP1_807978586 * Tbi); + R1[WS(rs, 36)] = Tbf - Tbm; + R1[WS(rs, 4)] = Tbf + Tbm; + } + { + E Tbn, Tbo, Tb7, Tb8; + Tbn = Tbb - Tbe; + Tbo = FMA(KP855110186, Tbi, KP1_807978586 * Tbl); + R1[WS(rs, 20)] = Tbn - Tbo; + R1[WS(rs, 52)] = Tbn + Tbo; + Tb7 = Tb1 + Tb2; + Tb8 = FMA(KP1_481902250, Tb4, KP1_343117909 * Tb5); + R1[WS(rs, 24)] = Tb7 - Tb8; + R1[WS(rs, 56)] = Tb7 + Tb8; + } + } + { + E TcR, TdR, Tda, TdS, Te4, Teg, Te1, Tef, TdN, Tei, TdV, Te8, TdC, Tej, TdU; + E Teb; + { + E TcJ, TcQ, TdZ, Te0; + TcJ = TcF + TcI; + TcQ = FNMS(KP1_111140466, TcP, KP1_662939224 * TcM); + TcR = TcJ + TcQ; + TdR = TcJ - TcQ; + { + E Td2, Td9, Te2, Te3; + Td2 = TcU + Td1; + Td9 = Td5 + Td8; + Tda = FNMS(KP580569354, Td9, KP1_913880671 * Td2); + TdS = FMA(KP580569354, Td2, KP1_913880671 * Td9); + Te2 = TcU - Td1; + Te3 = Td8 - Td5; + Te4 = FNMS(KP1_763842528, Te3, KP942793473 * Te2); + Teg = FMA(KP1_763842528, Te2, KP942793473 * Te3); + } + TdZ = TcF - TcI; + Te0 = FMA(KP1_111140466, TcM, KP1_662939224 * TcP); + Te1 = TdZ - Te0; + Tef = TdZ + Te0; + { + E TdM, Te6, TdF, Te7, TdD, TdE; + TdM = TdI + TdL; + Te6 = Tde - Tdl; + TdD = FMA(KP555570233, Tdp, KP831469612 * Tds); + TdE = FNMS(KP555570233, Tdw, KP831469612 * Tdz); + TdF = TdD + TdE; + Te7 = TdE - TdD; + TdN = TdF + TdM; + Tei = Te6 - Te7; + TdV = TdM - TdF; + Te8 = Te6 + Te7; + } + { + E Tdm, Tea, TdB, Te9, Tdt, TdA; + Tdm = Tde + Tdl; + Tea = TdL - TdI; + Tdt = FNMS(KP555570233, Tds, KP831469612 * Tdp); + TdA = FMA(KP831469612, Tdw, KP555570233 * Tdz); + TdB = Tdt + TdA; + Te9 = Tdt - TdA; + TdC = Tdm + TdB; + Tej = Tea - Te9; + TdU = Tdm - TdB; + Teb = Te9 + Tea; + } + } + { + E Tdb, TdO, Teh, Tek; + Tdb = TcR + Tda; + TdO = FNMS(KP293460948, TdN, KP1_978353019 * TdC); + R1[WS(rs, 33)] = Tdb - TdO; + R1[WS(rs, 1)] = Tdb + TdO; + Teh = Tef - Teg; + Tek = FNMS(KP1_940062506, Tej, KP485960359 * Tei); + R1[WS(rs, 45)] = Teh - Tek; + R1[WS(rs, 13)] = Teh + Tek; + } + { + E Tel, Tem, TdP, TdQ; + Tel = Tef + Teg; + Tem = FMA(KP1_940062506, Tei, KP485960359 * Tej); + R1[WS(rs, 29)] = Tel - Tem; + R1[WS(rs, 61)] = Tel + Tem; + TdP = TcR - Tda; + TdQ = FMA(KP293460948, TdC, KP1_978353019 * TdN); + R1[WS(rs, 17)] = TdP - TdQ; + R1[WS(rs, 49)] = TdP + TdQ; + } + { + E TdT, TdW, Te5, Tec; + TdT = TdR - TdS; + TdW = FNMS(KP1_606415062, TdV, KP1_191398608 * TdU); + R1[WS(rs, 41)] = TdT - TdW; + R1[WS(rs, 9)] = TdT + TdW; + Te5 = Te1 + Te4; + Tec = FNMS(KP1_028205488, Teb, KP1_715457220 * Te8); + R1[WS(rs, 37)] = Te5 - Tec; + R1[WS(rs, 5)] = Te5 + Tec; + } + { + E Ted, Tee, TdX, TdY; + Ted = Te1 - Te4; + Tee = FMA(KP1_028205488, Te8, KP1_715457220 * Teb); + R1[WS(rs, 21)] = Ted - Tee; + R1[WS(rs, 53)] = Ted + Tee; + TdX = TdR + TdS; + TdY = FMA(KP1_606415062, TdU, KP1_191398608 * TdV); + R1[WS(rs, 25)] = TdX - TdY; + R1[WS(rs, 57)] = TdX + TdY; + } + } + { + E TbD, Tc7, TbK, Tc8, Tck, Tcw, Tch, Tcv, Tc3, Tcy, Tcb, Tco, TbW, Tcz, Tca; + E Tcr; + { + E Tbz, TbC, Tcf, Tcg; + Tbz = Tbx - Tby; + TbC = FNMS(KP1_662939224, TbB, KP1_111140466 * TbA); + TbD = Tbz + TbC; + Tc7 = Tbz - TbC; + { + E TbG, TbJ, Tci, Tcj; + TbG = TbE + TbF; + TbJ = TbH + TbI; + TbK = FNMS(KP942793473, TbJ, KP1_763842528 * TbG); + Tc8 = FMA(KP942793473, TbG, KP1_763842528 * TbJ); + Tci = TbE - TbF; + Tcj = TbI - TbH; + Tck = FNMS(KP1_913880671, Tcj, KP580569354 * Tci); + Tcw = FMA(KP1_913880671, Tci, KP580569354 * Tcj); + } + Tcf = Tbx + Tby; + Tcg = FMA(KP1_662939224, TbA, KP1_111140466 * TbB); + Tch = Tcf - Tcg; + Tcv = Tcf + Tcg; + { + E Tc2, Tcm, TbZ, Tcn, TbX, TbY; + Tc2 = Tc0 + Tc1; + Tcm = TbM - TbN; + TbX = FMA(KP831469612, TbP, KP555570233 * TbQ); + TbY = FNMS(KP831469612, TbS, KP555570233 * TbT); + TbZ = TbX + TbY; + Tcn = TbY - TbX; + Tc3 = TbZ + Tc2; + Tcy = Tcm - Tcn; + Tcb = Tc2 - TbZ; + Tco = Tcm + Tcn; + } + { + E TbO, Tcq, TbV, Tcp, TbR, TbU; + TbO = TbM + TbN; + Tcq = Tc1 - Tc0; + TbR = FNMS(KP831469612, TbQ, KP555570233 * TbP); + TbU = FMA(KP555570233, TbS, KP831469612 * TbT); + TbV = TbR + TbU; + Tcp = TbR - TbU; + TbW = TbO + TbV; + Tcz = Tcq - Tcp; + Tca = TbO - TbV; + Tcr = Tcp + Tcq; + } + } + { + E TbL, Tc4, Tcx, TcA; + TbL = TbD + TbK; + Tc4 = FNMS(KP485960359, Tc3, KP1_940062506 * TbW); + R1[WS(rs, 34)] = TbL - Tc4; + R1[WS(rs, 2)] = TbL + Tc4; + Tcx = Tcv - Tcw; + TcA = FNMS(KP1_978353019, Tcz, KP293460948 * Tcy); + R1[WS(rs, 46)] = Tcx - TcA; + R1[WS(rs, 14)] = Tcx + TcA; + } + { + E TcB, TcC, Tc5, Tc6; + TcB = Tcv + Tcw; + TcC = FMA(KP1_978353019, Tcy, KP293460948 * Tcz); + R1[WS(rs, 30)] = TcB - TcC; + R1[WS(rs, 62)] = TcB + TcC; + Tc5 = TbD - TbK; + Tc6 = FMA(KP485960359, TbW, KP1_940062506 * Tc3); + R1[WS(rs, 18)] = Tc5 - Tc6; + R1[WS(rs, 50)] = Tc5 + Tc6; + } + { + E Tc9, Tcc, Tcl, Tcs; + Tc9 = Tc7 - Tc8; + Tcc = FNMS(KP1_715457220, Tcb, KP1_028205488 * Tca); + R1[WS(rs, 42)] = Tc9 - Tcc; + R1[WS(rs, 10)] = Tc9 + Tcc; + Tcl = Tch + Tck; + Tcs = FNMS(KP1_191398608, Tcr, KP1_606415062 * Tco); + R1[WS(rs, 38)] = Tcl - Tcs; + R1[WS(rs, 6)] = Tcl + Tcs; + } + { + E Tct, Tcu, Tcd, Tce; + Tct = Tch - Tck; + Tcu = FMA(KP1_191398608, Tco, KP1_606415062 * Tcr); + R1[WS(rs, 22)] = Tct - Tcu; + R1[WS(rs, 54)] = Tct + Tcu; + Tcd = Tc7 + Tc8; + Tce = FMA(KP1_715457220, Tca, KP1_028205488 * Tcb); + R1[WS(rs, 26)] = Tcd - Tce; + R1[WS(rs, 58)] = Tcd + Tce; + } + } + { + E Tet, TeX, TeA, TeY, Tfa, Tfm, Tf7, Tfl, TeT, Tfo, Tf1, Tfe, TeM, Tfp, Tf0; + E Tfh; + { + E Tep, Tes, Tf5, Tf6; + Tep = Ten - Teo; + Tes = FNMS(KP1_961570560, Ter, KP390180644 * Teq); + Tet = Tep + Tes; + TeX = Tep - Tes; + { + E Tew, Tez, Tf8, Tf9; + Tew = Teu - Tev; + Tez = Tex + Tey; + TeA = FNMS(KP1_268786568, Tez, KP1_546020906 * Tew); + TeY = FMA(KP1_268786568, Tew, KP1_546020906 * Tez); + Tf8 = Teu + Tev; + Tf9 = Tey - Tex; + Tfa = FNMS(KP1_990369453, Tf9, KP196034280 * Tf8); + Tfm = FMA(KP1_990369453, Tf8, KP196034280 * Tf9); + } + Tf5 = Ten + Teo; + Tf6 = FMA(KP1_961570560, Teq, KP390180644 * Ter); + Tf7 = Tf5 - Tf6; + Tfl = Tf5 + Tf6; + { + E TeS, Tfc, TeP, Tfd, TeN, TeO; + TeS = TeQ + TeR; + Tfc = TeC + TeD; + TeN = FMA(KP980785280, TeF, KP195090322 * TeG); + TeO = FMA(KP980785280, TeI, KP195090322 * TeJ); + TeP = TeN - TeO; + Tfd = TeN + TeO; + TeT = TeP + TeS; + Tfo = Tfc + Tfd; + Tf1 = TeS - TeP; + Tfe = Tfc - Tfd; + } + { + E TeE, Tfg, TeL, Tff, TeH, TeK; + TeE = TeC - TeD; + Tfg = TeR - TeQ; + TeH = FNMS(KP980785280, TeG, KP195090322 * TeF); + TeK = FNMS(KP980785280, TeJ, KP195090322 * TeI); + TeL = TeH + TeK; + Tff = TeH - TeK; + TeM = TeE + TeL; + Tfp = Tfg - Tff; + Tf0 = TeE - TeL; + Tfh = Tff + Tfg; + } + } + { + E TeB, TeU, Tfn, Tfq; + TeB = Tet + TeA; + TeU = FNMS(KP673779706, TeT, KP1_883088130 * TeM); + R1[WS(rs, 35)] = TeB - TeU; + R1[WS(rs, 3)] = TeB + TeU; + Tfn = Tfl - Tfm; + Tfq = FNMS(KP1_997590912, Tfp, KP098135348 * Tfo); + R1[WS(rs, 47)] = Tfn - Tfq; + R1[WS(rs, 15)] = Tfn + Tfq; + } + { + E Tfr, Tfs, TeV, TeW; + Tfr = Tfl + Tfm; + Tfs = FMA(KP1_997590912, Tfo, KP098135348 * Tfp); + R1[WS(rs, 31)] = Tfr - Tfs; + R1[WS(rs, 63)] = Tfr + Tfs; + TeV = Tet - TeA; + TeW = FMA(KP673779706, TeM, KP1_883088130 * TeT); + R1[WS(rs, 19)] = TeV - TeW; + R1[WS(rs, 51)] = TeV + TeW; + } + { + E TeZ, Tf2, Tfb, Tfi; + TeZ = TeX - TeY; + Tf2 = FNMS(KP1_807978586, Tf1, KP855110186 * Tf0); + R1[WS(rs, 43)] = TeZ - Tf2; + R1[WS(rs, 11)] = TeZ + Tf2; + Tfb = Tf7 + Tfa; + Tfi = FNMS(KP1_343117909, Tfh, KP1_481902250 * Tfe); + R1[WS(rs, 39)] = Tfb - Tfi; + R1[WS(rs, 7)] = Tfb + Tfi; + } + { + E Tfj, Tfk, Tf3, Tf4; + Tfj = Tf7 - Tfa; + Tfk = FMA(KP1_343117909, Tfe, KP1_481902250 * Tfh); + R1[WS(rs, 23)] = Tfj - Tfk; + R1[WS(rs, 55)] = Tfj + Tfk; + Tf3 = TeX + TeY; + Tf4 = FMA(KP1_807978586, Tf0, KP855110186 * Tf1); + R1[WS(rs, 27)] = Tf3 - Tf4; + R1[WS(rs, 59)] = Tf3 + Tf4; + } + } + } + } +} + +static const kr2c_desc desc = { 128, "r2cb_128", {812, 198, 144, 0}, &GENUS }; + +void X(codelet_r2cb_128) (planner *p) { + X(kr2c_register) (p, r2cb_128, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_13.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_13.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,370 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 13 -name r2cb_13 -include r2cb.h */ + +/* + * This function contains 76 FP additions, 58 FP multiplications, + * (or, 18 additions, 0 multiplications, 58 fused multiply/add), + * 76 stack variables, 26 constants, and 26 memory accesses + */ +#include "r2cb.h" + +static void r2cb_13(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP968287244, +0.968287244361984016049539446938120421179794516); + DK(KP875502302, +0.875502302409147941146295545768755143177842006); + DK(KP1_150281458, +1.150281458948006242736771094910906776922003215); + DK(KP1_040057143, +1.040057143777729238234261000998465604986476278); + DK(KP1_200954543, +1.200954543865330565851538506669526018704025697); + DK(KP769338817, +0.769338817572980603471413688209101117038278899); + DK(KP600925212, +0.600925212577331548853203544578415991041882762); + DK(KP1_033041561, +1.033041561246979445681802577138034271410067244); + DK(KP1_007074065, +1.007074065727533254493747707736933954186697125); + DK(KP503537032, +0.503537032863766627246873853868466977093348562); + DK(KP581704778, +0.581704778510515730456870384989698884939833902); + DK(KP859542535, +0.859542535098774820163672132761689612766401925); + DK(KP166666666, +0.166666666666666666666666666666666666666666667); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP301479260, +0.301479260047709873958013540496673347309208464); + DK(KP226109445, +0.226109445035782405468510155372505010481906348); + DK(KP686558370, +0.686558370781754340655719594850823015421401653); + DK(KP514918778, +0.514918778086315755491789696138117261566051239); + DK(KP957805992, +0.957805992594665126462521754605754580515587217); + DK(KP522026385, +0.522026385161275033714027226654165028300441940); + DK(KP853480001, +0.853480001859823990758994934970528322872359049); + DK(KP038632954, +0.038632954644348171955506895830342264440241080); + DK(KP612264650, +0.612264650376756543746494474777125408779395514); + DK(KP302775637, +0.302775637731994646559610633735247973125648287); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(52, rs), MAKE_VOLATILE_STRIDE(52, csr), MAKE_VOLATILE_STRIDE(52, csi)) { + E TW, T14, TS, TO, T18, T1e, TY, TX, TQ, Tq, TP, Tl, T1d, Tr; + { + E T1, TN, T16, TJ, TV, TG, TU, Tf, T2, T3, Tb, Ti, T4; + { + E Ts, TB, Tx, Ty, Tv, TE, Tt, Tu, Tz, TC; + Ts = Ci[WS(csi, 5)]; + Tt = Ci[WS(csi, 2)]; + Tu = Ci[WS(csi, 6)]; + TB = Ci[WS(csi, 1)]; + Tx = Ci[WS(csi, 3)]; + Ty = Ci[WS(csi, 4)]; + Tv = Tt + Tu; + TE = Tu - Tt; + T1 = Cr[0]; + Tz = Tx + Ty; + TC = Tx - Ty; + { + E TL, Tw, T7, Ta; + TL = Ts + Tv; + Tw = FNMS(KP500000000, Tv, Ts); + T7 = Cr[WS(csr, 5)]; + { + E TD, TM, TA, TH; + TD = FNMS(KP500000000, TC, TB); + TM = TB + TC; + TA = FMA(KP866025403, Tz, Tw); + TH = FNMS(KP866025403, Tz, Tw); + TN = FMA(KP302775637, TM, TL); + T16 = FNMS(KP302775637, TL, TM); + { + E TF, TI, T8, T9; + TF = FMA(KP866025403, TE, TD); + TI = FNMS(KP866025403, TE, TD); + T8 = Cr[WS(csr, 2)]; + T9 = Cr[WS(csr, 6)]; + TJ = FNMS(KP612264650, TI, TH); + TV = FMA(KP612264650, TH, TI); + TG = FNMS(KP038632954, TF, TA); + TU = FMA(KP038632954, TA, TF); + Tf = T8 - T9; + Ta = T8 + T9; + } + } + T2 = Cr[WS(csr, 1)]; + T3 = Cr[WS(csr, 3)]; + Tb = T7 + Ta; + Ti = FMS(KP500000000, Ta, T7); + T4 = Cr[WS(csr, 4)]; + } + } + { + E T17, TK, T5, Te, Tk, Td; + TW = FMA(KP853480001, TV, TU); + T17 = FNMS(KP853480001, TV, TU); + TK = FNMS(KP853480001, TJ, TG); + T14 = FMA(KP853480001, TJ, TG); + T5 = T3 + T4; + Te = T3 - T4; + { + E Tn, Tg, Th, T6; + TS = FNMS(KP522026385, TK, TN); + TO = FMA(KP957805992, TN, TK); + Tn = Te - Tf; + Tg = Te + Tf; + Th = FNMS(KP500000000, T5, T2); + T6 = T2 + T5; + T18 = FNMS(KP522026385, T17, T16); + T1e = FMA(KP957805992, T16, T17); + { + E Tm, Tj, Tc, Tp, To; + Tm = Th + Ti; + Tj = Th - Ti; + Tc = T6 + Tb; + Tp = T6 - Tb; + To = FNMS(KP514918778, Tn, Tm); + TY = FMA(KP686558370, Tm, Tn); + TX = FNMS(KP226109445, Tg, Tj); + Tk = FMA(KP301479260, Tj, Tg); + R0[0] = FMA(KP2_000000000, Tc, T1); + Td = FNMS(KP166666666, Tc, T1); + TQ = FNMS(KP859542535, To, Tp); + Tq = FMA(KP581704778, Tp, To); + } + } + TP = FNMS(KP503537032, Tk, Td); + Tl = FMA(KP1_007074065, Tk, Td); + } + } + T1d = FNMS(KP1_033041561, Tq, Tl); + Tr = FMA(KP1_033041561, Tq, Tl); + { + E T13, TR, T19, TZ; + T13 = FNMS(KP600925212, TQ, TP); + TR = FMA(KP600925212, TQ, TP); + T19 = FMA(KP769338817, TY, TX); + TZ = FNMS(KP769338817, TY, TX); + R0[WS(rs, 4)] = FMA(KP1_200954543, T1e, T1d); + R1[WS(rs, 2)] = FNMS(KP1_200954543, T1e, T1d); + R0[WS(rs, 6)] = FMA(KP1_200954543, TO, Tr); + R1[0] = FNMS(KP1_200954543, TO, Tr); + { + E T1b, T15, T11, TT; + T1b = FNMS(KP1_040057143, T14, T13); + T15 = FMA(KP1_040057143, T14, T13); + T11 = FMA(KP1_150281458, TS, TR); + TT = FNMS(KP1_150281458, TS, TR); + { + E T1c, T1a, T12, T10; + T1c = FMA(KP875502302, T19, T18); + T1a = FNMS(KP875502302, T19, T18); + T12 = FMA(KP968287244, TZ, TW); + T10 = FNMS(KP968287244, TZ, TW); + R1[WS(rs, 5)] = FMA(KP1_150281458, T1c, T1b); + R0[WS(rs, 3)] = FNMS(KP1_150281458, T1c, T1b); + R1[WS(rs, 3)] = FMA(KP1_150281458, T1a, T15); + R0[WS(rs, 1)] = FNMS(KP1_150281458, T1a, T15); + R0[WS(rs, 5)] = FMA(KP1_040057143, T12, T11); + R0[WS(rs, 2)] = FNMS(KP1_040057143, T12, T11); + R1[WS(rs, 4)] = FMA(KP1_040057143, T10, TT); + R1[WS(rs, 1)] = FNMS(KP1_040057143, T10, TT); + } + } + } + } + } +} + +static const kr2c_desc desc = { 13, "r2cb_13", {18, 0, 58, 0}, &GENUS }; + +void X(codelet_r2cb_13) (planner *p) { + X(kr2c_register) (p, r2cb_13, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 13 -name r2cb_13 -include r2cb.h */ + +/* + * This function contains 76 FP additions, 35 FP multiplications, + * (or, 56 additions, 15 multiplications, 20 fused multiply/add), + * 56 stack variables, 19 constants, and 26 memory accesses + */ +#include "r2cb.h" + +static void r2cb_13(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_007074065, +1.007074065727533254493747707736933954186697125); + DK(KP227708958, +0.227708958111581597949308691735310621069285120); + DK(KP531932498, +0.531932498429674575175042127684371897596660533); + DK(KP774781170, +0.774781170935234584261351932853525703557550433); + DK(KP265966249, +0.265966249214837287587521063842185948798330267); + DK(KP516520780, +0.516520780623489722840901288569017135705033622); + DK(KP151805972, +0.151805972074387731966205794490207080712856746); + DK(KP503537032, +0.503537032863766627246873853868466977093348562); + DK(KP166666666, +0.166666666666666666666666666666666666666666667); + DK(KP600925212, +0.600925212577331548853203544578415991041882762); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP256247671, +0.256247671582936600958684654061725059144125175); + DK(KP156891391, +0.156891391051584611046832726756003269660212636); + DK(KP348277202, +0.348277202304271810011321589858529485233929352); + DK(KP1_150281458, +1.150281458948006242736771094910906776922003215); + DK(KP300238635, +0.300238635966332641462884626667381504676006424); + DK(KP011599105, +0.011599105605768290721655456654083252189827041); + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(52, rs), MAKE_VOLATILE_STRIDE(52, csr), MAKE_VOLATILE_STRIDE(52, csi)) { + E TG, TS, TR, T15, TJ, TT, T1, Tm, Tc, Td, Tg, Tj, Tk, Tn, To; + E Tp; + { + E Ts, Tv, Tw, TE, TC, TB, Tz, TD, TA, TF; + { + E Tt, Tu, Tx, Ty; + Ts = Ci[WS(csi, 1)]; + Tt = Ci[WS(csi, 3)]; + Tu = Ci[WS(csi, 4)]; + Tv = Tt - Tu; + Tw = FMS(KP2_000000000, Ts, Tv); + TE = KP1_732050807 * (Tt + Tu); + TC = Ci[WS(csi, 5)]; + Tx = Ci[WS(csi, 6)]; + Ty = Ci[WS(csi, 2)]; + TB = Tx + Ty; + Tz = KP1_732050807 * (Tx - Ty); + TD = FNMS(KP2_000000000, TC, TB); + } + TA = Tw + Tz; + TF = TD - TE; + TG = FMA(KP011599105, TA, KP300238635 * TF); + TS = FNMS(KP011599105, TF, KP300238635 * TA); + { + E TP, TQ, TH, TI; + TP = Ts + Tv; + TQ = TB + TC; + TR = FNMS(KP348277202, TQ, KP1_150281458 * TP); + T15 = FMA(KP348277202, TP, KP1_150281458 * TQ); + TH = Tw - Tz; + TI = TE + TD; + TJ = FMA(KP156891391, TH, KP256247671 * TI); + TT = FNMS(KP256247671, TH, KP156891391 * TI); + } + } + { + E Tb, Ti, Tf, T6, Th, Te; + T1 = Cr[0]; + { + E T7, T8, T9, Ta; + T7 = Cr[WS(csr, 5)]; + T8 = Cr[WS(csr, 2)]; + T9 = Cr[WS(csr, 6)]; + Ta = T8 + T9; + Tb = T7 + Ta; + Ti = FNMS(KP500000000, Ta, T7); + Tf = T8 - T9; + } + { + E T2, T3, T4, T5; + T2 = Cr[WS(csr, 1)]; + T3 = Cr[WS(csr, 3)]; + T4 = Cr[WS(csr, 4)]; + T5 = T3 + T4; + T6 = T2 + T5; + Th = FNMS(KP500000000, T5, T2); + Te = T3 - T4; + } + Tm = KP600925212 * (T6 - Tb); + Tc = T6 + Tb; + Td = FNMS(KP166666666, Tc, T1); + Tg = Te + Tf; + Tj = Th + Ti; + Tk = FMA(KP503537032, Tg, KP151805972 * Tj); + Tn = Th - Ti; + To = Te - Tf; + Tp = FNMS(KP265966249, To, KP516520780 * Tn); + } + R0[0] = FMA(KP2_000000000, Tc, T1); + { + E TK, T1b, TV, T12, T16, T18, TO, T1a, Tr, T17, T11, T13; + { + E TU, T14, TM, TN; + TK = KP1_732050807 * (TG + TJ); + T1b = KP1_732050807 * (TS - TT); + TU = TS + TT; + TV = TR - TU; + T12 = FMA(KP2_000000000, TU, TR); + T14 = TG - TJ; + T16 = FMS(KP2_000000000, T14, T15); + T18 = T14 + T15; + TM = FMA(KP774781170, To, KP531932498 * Tn); + TN = FNMS(KP1_007074065, Tj, KP227708958 * Tg); + TO = TM - TN; + T1a = TM + TN; + { + E Tl, Tq, TZ, T10; + Tl = Td - Tk; + Tq = Tm - Tp; + Tr = Tl - Tq; + T17 = Tq + Tl; + TZ = FMA(KP2_000000000, Tk, Td); + T10 = FMA(KP2_000000000, Tp, Tm); + T11 = TZ - T10; + T13 = T10 + TZ; + } + } + R1[WS(rs, 2)] = T11 - T12; + R0[WS(rs, 6)] = T13 - T16; + R1[0] = T13 + T16; + R0[WS(rs, 4)] = T11 + T12; + { + E TL, TW, T19, T1c; + TL = Tr - TK; + TW = TO - TV; + R1[WS(rs, 3)] = TL - TW; + R0[WS(rs, 1)] = TL + TW; + T19 = T17 - T18; + T1c = T1a + T1b; + R1[WS(rs, 1)] = T19 - T1c; + R1[WS(rs, 4)] = T1c + T19; + } + { + E T1d, T1e, TX, TY; + T1d = T1a - T1b; + T1e = T17 + T18; + R0[WS(rs, 2)] = T1d + T1e; + R0[WS(rs, 5)] = T1e - T1d; + TX = Tr + TK; + TY = TO + TV; + R0[WS(rs, 3)] = TX - TY; + R1[WS(rs, 5)] = TX + TY; + } + } + } + } +} + +static const kr2c_desc desc = { 13, "r2cb_13", {56, 15, 20, 0}, &GENUS }; + +void X(codelet_r2cb_13) (planner *p) { + X(kr2c_register) (p, r2cb_13, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 14 -name r2cb_14 -include r2cb.h */ + +/* + * This function contains 62 FP additions, 44 FP multiplications, + * (or, 18 additions, 0 multiplications, 44 fused multiply/add), + * 58 stack variables, 7 constants, and 28 memory accesses + */ +#include "r2cb.h" + +static void r2cb_14(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_949855824, +1.949855824363647214036263365987862434465571601); + DK(KP1_801937735, +1.801937735804838252472204639014890102331838324); + DK(KP692021471, +0.692021471630095869627814897002069140197260599); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP356895867, +0.356895867892209443894399510021300583399127187); + DK(KP801937735, +0.801937735804838252472204639014890102331838324); + DK(KP554958132, +0.554958132087371191422194871006410481067288862); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(56, rs), MAKE_VOLATILE_STRIDE(56, csr), MAKE_VOLATILE_STRIDE(56, csi)) { + E Te, TO, TT, TG, TJ, TD, TR, TE; + { + E T3, TK, To, TM, Tu, TL, Tr, TS, TA, TN, TX, TF, Tv, T7, Tf; + E T6, Th, Tc, T8, T1, T2; + T1 = Cr[0]; + T2 = Cr[WS(csr, 7)]; + { + E Ts, Tt, Tp, Tq, Tm, Tn; + Tm = Ci[WS(csi, 4)]; + Tn = Ci[WS(csi, 3)]; + Ts = Ci[WS(csi, 6)]; + Te = T1 + T2; + T3 = T1 - T2; + TK = Tm + Tn; + To = Tm - Tn; + Tt = Ci[WS(csi, 1)]; + Tp = Ci[WS(csi, 2)]; + Tq = Ci[WS(csi, 5)]; + { + E T4, T5, Ta, Tb; + T4 = Cr[WS(csr, 2)]; + TM = Ts + Tt; + Tu = Ts - Tt; + TL = Tp + Tq; + Tr = Tp - Tq; + TS = FMA(KP554958132, TK, TM); + TA = FMA(KP554958132, To, Tu); + TN = FMA(KP554958132, TM, TL); + TX = FNMS(KP554958132, TL, TK); + TF = FNMS(KP554958132, Tr, To); + Tv = FMA(KP554958132, Tu, Tr); + T5 = Cr[WS(csr, 5)]; + Ta = Cr[WS(csr, 6)]; + Tb = Cr[WS(csr, 1)]; + T7 = Cr[WS(csr, 4)]; + Tf = T4 + T5; + T6 = T4 - T5; + Th = Ta + Tb; + Tc = Ta - Tb; + T8 = Cr[WS(csr, 3)]; + } + } + { + E Tw, Tx, TP, Tg, T9, TY, TC, TI, TQ; + Tw = FMA(KP801937735, Tv, To); + Tx = FNMS(KP356895867, Tf, Th); + TP = FNMS(KP356895867, T6, Tc); + Tg = T7 + T8; + T9 = T7 - T8; + TY = FNMS(KP801937735, TX, TM); + { + E TB, TH, TV, Ty, Tl, Ti, TW, Tz; + TB = FNMS(KP801937735, TA, Tr); + Ti = Tf + Tg + Th; + TC = FNMS(KP356895867, Th, Tg); + { + E Tj, Td, TU, Tk; + Tj = FNMS(KP356895867, Tg, Tf); + Td = T6 + T9 + Tc; + TH = FNMS(KP356895867, T9, T6); + TU = FNMS(KP356895867, Tc, T9); + R0[0] = FMA(KP2_000000000, Ti, Te); + Tk = FNMS(KP692021471, Tj, Th); + R1[WS(rs, 3)] = FMA(KP2_000000000, Td, T3); + TV = FNMS(KP692021471, TU, T6); + Ty = FNMS(KP692021471, Tx, Tg); + Tl = FNMS(KP1_801937735, Tk, Te); + } + TO = FMA(KP801937735, TN, TK); + TW = FNMS(KP1_801937735, TV, T3); + Tz = FNMS(KP1_801937735, Ty, Te); + R0[WS(rs, 3)] = FMA(KP1_949855824, Tw, Tl); + R0[WS(rs, 4)] = FNMS(KP1_949855824, Tw, Tl); + R1[WS(rs, 5)] = FMA(KP1_949855824, TY, TW); + R1[WS(rs, 1)] = FNMS(KP1_949855824, TY, TW); + R0[WS(rs, 6)] = FMA(KP1_949855824, TB, Tz); + R0[WS(rs, 1)] = FNMS(KP1_949855824, TB, Tz); + TI = FNMS(KP692021471, TH, Tc); + } + TT = FNMS(KP801937735, TS, TL); + TQ = FNMS(KP692021471, TP, T9); + TG = FNMS(KP801937735, TF, Tu); + TJ = FNMS(KP1_801937735, TI, T3); + TD = FNMS(KP692021471, TC, Tf); + TR = FNMS(KP1_801937735, TQ, T3); + } + } + R1[WS(rs, 6)] = FMA(KP1_949855824, TO, TJ); + R1[0] = FNMS(KP1_949855824, TO, TJ); + TE = FNMS(KP1_801937735, TD, Te); + R1[WS(rs, 2)] = FMA(KP1_949855824, TT, TR); + R1[WS(rs, 4)] = FNMS(KP1_949855824, TT, TR); + R0[WS(rs, 2)] = FMA(KP1_949855824, TG, TE); + R0[WS(rs, 5)] = FNMS(KP1_949855824, TG, TE); + } + } +} + +static const kr2c_desc desc = { 14, "r2cb_14", {18, 0, 44, 0}, &GENUS }; + +void X(codelet_r2cb_14) (planner *p) { + X(kr2c_register) (p, r2cb_14, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 14 -name r2cb_14 -include r2cb.h */ + +/* + * This function contains 62 FP additions, 38 FP multiplications, + * (or, 36 additions, 12 multiplications, 26 fused multiply/add), + * 28 stack variables, 7 constants, and 28 memory accesses + */ +#include "r2cb.h" + +static void r2cb_14(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_801937735, +1.801937735804838252472204639014890102331838324); + DK(KP445041867, +0.445041867912628808577805128993589518932711138); + DK(KP1_246979603, +1.246979603717467061050009768008479621264549462); + DK(KP867767478, +0.867767478235116240951536665696717509219981456); + DK(KP1_949855824, +1.949855824363647214036263365987862434465571601); + DK(KP1_563662964, +1.563662964936059617416889053348115500464669037); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(56, rs), MAKE_VOLATILE_STRIDE(56, csr), MAKE_VOLATILE_STRIDE(56, csi)) { + E T3, Td, T6, Te, Tq, Tz, Tn, Ty, Tc, Tg, Tk, Tx, T9, Tf, T1; + E T2; + T1 = Cr[0]; + T2 = Cr[WS(csr, 7)]; + T3 = T1 - T2; + Td = T1 + T2; + { + E T4, T5, To, Tp; + T4 = Cr[WS(csr, 2)]; + T5 = Cr[WS(csr, 5)]; + T6 = T4 - T5; + Te = T4 + T5; + To = Ci[WS(csi, 2)]; + Tp = Ci[WS(csi, 5)]; + Tq = To - Tp; + Tz = To + Tp; + } + { + E Tl, Tm, Ta, Tb; + Tl = Ci[WS(csi, 6)]; + Tm = Ci[WS(csi, 1)]; + Tn = Tl - Tm; + Ty = Tl + Tm; + Ta = Cr[WS(csr, 6)]; + Tb = Cr[WS(csr, 1)]; + Tc = Ta - Tb; + Tg = Ta + Tb; + } + { + E Ti, Tj, T7, T8; + Ti = Ci[WS(csi, 4)]; + Tj = Ci[WS(csi, 3)]; + Tk = Ti - Tj; + Tx = Ti + Tj; + T7 = Cr[WS(csr, 4)]; + T8 = Cr[WS(csr, 3)]; + T9 = T7 - T8; + Tf = T7 + T8; + } + R1[WS(rs, 3)] = FMA(KP2_000000000, T6 + T9 + Tc, T3); + R0[0] = FMA(KP2_000000000, Te + Tf + Tg, Td); + { + E Tr, Th, TE, TD; + Tr = FNMS(KP1_949855824, Tn, KP1_563662964 * Tk) - (KP867767478 * Tq); + Th = FMA(KP1_246979603, Tf, Td) + FNMA(KP445041867, Tg, KP1_801937735 * Te); + R0[WS(rs, 2)] = Th - Tr; + R0[WS(rs, 5)] = Th + Tr; + TE = FMA(KP867767478, Tx, KP1_563662964 * Ty) - (KP1_949855824 * Tz); + TD = FMA(KP1_246979603, Tc, T3) + FNMA(KP1_801937735, T9, KP445041867 * T6); + R1[WS(rs, 2)] = TD - TE; + R1[WS(rs, 4)] = TD + TE; + } + { + E Tt, Ts, TA, Tw; + Tt = FMA(KP867767478, Tk, KP1_563662964 * Tn) - (KP1_949855824 * Tq); + Ts = FMA(KP1_246979603, Tg, Td) + FNMA(KP1_801937735, Tf, KP445041867 * Te); + R0[WS(rs, 6)] = Ts - Tt; + R0[WS(rs, 1)] = Ts + Tt; + TA = FNMS(KP1_949855824, Ty, KP1_563662964 * Tx) - (KP867767478 * Tz); + Tw = FMA(KP1_246979603, T9, T3) + FNMA(KP445041867, Tc, KP1_801937735 * T6); + R1[WS(rs, 5)] = Tw - TA; + R1[WS(rs, 1)] = Tw + TA; + } + { + E TC, TB, Tv, Tu; + TC = FMA(KP1_563662964, Tz, KP1_949855824 * Tx) + (KP867767478 * Ty); + TB = FMA(KP1_246979603, T6, T3) + FNMA(KP1_801937735, Tc, KP445041867 * T9); + R1[0] = TB - TC; + R1[WS(rs, 6)] = TB + TC; + Tv = FMA(KP1_563662964, Tq, KP1_949855824 * Tk) + (KP867767478 * Tn); + Tu = FMA(KP1_246979603, Te, Td) + FNMA(KP1_801937735, Tg, KP445041867 * Tf); + R0[WS(rs, 4)] = Tu - Tv; + R0[WS(rs, 3)] = Tu + Tv; + } + } + } +} + +static const kr2c_desc desc = { 14, "r2cb_14", {36, 12, 26, 0}, &GENUS }; + +void X(codelet_r2cb_14) (planner *p) { + X(kr2c_register) (p, r2cb_14, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 15 -name r2cb_15 -include r2cb.h */ + +/* + * This function contains 64 FP additions, 43 FP multiplications, + * (or, 21 additions, 0 multiplications, 43 fused multiply/add), + * 54 stack variables, 9 constants, and 30 memory accesses + */ +#include "r2cb.h" + +static void r2cb_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) { + E TL, Tz, TM, TK; + { + E T3, Th, Tt, TD, TI, TH, TY, TC, TZ, Tu, Tm, Tv, Tr, Te, TW; + E Tg, T1, T2, T12, T10, TV; + Tg = Ci[WS(csi, 5)]; + T1 = Cr[0]; + T2 = Cr[WS(csr, 5)]; + { + E T4, TA, T9, TF, T7, Tj, Tc, Tk, TG, Tq, Tf, Tl, TB; + T4 = Cr[WS(csr, 3)]; + TA = Ci[WS(csi, 3)]; + T9 = Cr[WS(csr, 6)]; + Tf = T1 - T2; + T3 = FMA(KP2_000000000, T2, T1); + TF = Ci[WS(csi, 6)]; + { + E Ta, Tb, T5, T6, To, Tp; + T5 = Cr[WS(csr, 7)]; + T6 = Cr[WS(csr, 2)]; + Th = FMA(KP1_732050807, Tg, Tf); + Tt = FNMS(KP1_732050807, Tg, Tf); + Ta = Cr[WS(csr, 4)]; + TD = T5 - T6; + T7 = T5 + T6; + Tb = Cr[WS(csr, 1)]; + To = Ci[WS(csi, 4)]; + Tp = Ci[WS(csi, 1)]; + Tj = Ci[WS(csi, 7)]; + Tc = Ta + Tb; + TI = Ta - Tb; + Tk = Ci[WS(csi, 2)]; + TG = Tp - To; + Tq = To + Tp; + } + Tl = Tj - Tk; + TB = Tj + Tk; + TH = FNMS(KP500000000, TG, TF); + TY = TG + TF; + TC = FMA(KP500000000, TB, TA); + TZ = TA - TB; + { + E Ti, T8, Td, Tn; + Ti = FNMS(KP2_000000000, T4, T7); + T8 = T4 + T7; + Td = T9 + Tc; + Tn = FNMS(KP2_000000000, T9, Tc); + Tu = FNMS(KP1_732050807, Tl, Ti); + Tm = FMA(KP1_732050807, Tl, Ti); + Tv = FNMS(KP1_732050807, Tq, Tn); + Tr = FMA(KP1_732050807, Tq, Tn); + Te = T8 + Td; + TW = T8 - Td; + } + } + T12 = FMA(KP618033988, TY, TZ); + T10 = FNMS(KP618033988, TZ, TY); + TV = FNMS(KP500000000, Te, T3); + R0[0] = FMA(KP2_000000000, Te, T3); + { + E TJ, TE, TT, TP, TU, TS, Ty, Tw, Tx; + { + E TO, Ts, TQ, TN, TR, T11, TX; + TO = Tr - Tm; + Ts = Tm + Tr; + T11 = FMA(KP1_118033988, TW, TV); + TX = FNMS(KP1_118033988, TW, TV); + TQ = FNMS(KP866025403, TI, TH); + TJ = FMA(KP866025403, TI, TH); + TN = FMA(KP250000000, Ts, Th); + R0[WS(rs, 3)] = FNMS(KP1_902113032, T12, T11); + R1[WS(rs, 4)] = FMA(KP1_902113032, T12, T11); + R0[WS(rs, 6)] = FMA(KP1_902113032, T10, TX); + R1[WS(rs, 1)] = FNMS(KP1_902113032, T10, TX); + TR = FNMS(KP866025403, TD, TC); + TE = FMA(KP866025403, TD, TC); + R1[WS(rs, 2)] = Th - Ts; + TT = FMA(KP559016994, TO, TN); + TP = FNMS(KP559016994, TO, TN); + TU = FMA(KP618033988, TQ, TR); + TS = FNMS(KP618033988, TR, TQ); + } + Ty = Tv - Tu; + Tw = Tu + Tv; + R0[WS(rs, 7)] = FMA(KP1_902113032, TU, TT); + R1[WS(rs, 5)] = FNMS(KP1_902113032, TU, TT); + R0[WS(rs, 1)] = FMA(KP1_902113032, TS, TP); + R0[WS(rs, 4)] = FNMS(KP1_902113032, TS, TP); + Tx = FMA(KP250000000, Tw, Tt); + R0[WS(rs, 5)] = Tt - Tw; + TL = FNMS(KP559016994, Ty, Tx); + Tz = FMA(KP559016994, Ty, Tx); + TM = FNMS(KP618033988, TE, TJ); + TK = FMA(KP618033988, TJ, TE); + } + } + R1[WS(rs, 3)] = FMA(KP1_902113032, TM, TL); + R1[WS(rs, 6)] = FNMS(KP1_902113032, TM, TL); + R0[WS(rs, 2)] = FMA(KP1_902113032, TK, Tz); + R1[0] = FNMS(KP1_902113032, TK, Tz); + } + } +} + +static const kr2c_desc desc = { 15, "r2cb_15", {21, 0, 43, 0}, &GENUS }; + +void X(codelet_r2cb_15) (planner *p) { + X(kr2c_register) (p, r2cb_15, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 15 -name r2cb_15 -include r2cb.h */ + +/* + * This function contains 64 FP additions, 31 FP multiplications, + * (or, 47 additions, 14 multiplications, 17 fused multiply/add), + * 44 stack variables, 7 constants, and 30 memory accesses + */ +#include "r2cb.h" + +static void r2cb_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); + DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); + DK(KP1_175570504, +1.175570504584946258337411909278145537195304875); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) { + E T3, Tu, Ti, TB, TZ, T10, TE, TG, TJ, Tn, Tv, Ts, Tw, T8, Td; + E Te; + { + E Th, T1, T2, Tf, Tg; + Tg = Ci[WS(csi, 5)]; + Th = KP1_732050807 * Tg; + T1 = Cr[0]; + T2 = Cr[WS(csr, 5)]; + Tf = T1 - T2; + T3 = FMA(KP2_000000000, T2, T1); + Tu = Tf - Th; + Ti = Tf + Th; + } + { + E T4, TD, T9, TI, T5, T6, T7, Ta, Tb, Tc, Tr, TH, Tm, TC, Tj; + E To; + T4 = Cr[WS(csr, 3)]; + TD = Ci[WS(csi, 3)]; + T9 = Cr[WS(csr, 6)]; + TI = Ci[WS(csi, 6)]; + T5 = Cr[WS(csr, 7)]; + T6 = Cr[WS(csr, 2)]; + T7 = T5 + T6; + Ta = Cr[WS(csr, 4)]; + Tb = Cr[WS(csr, 1)]; + Tc = Ta + Tb; + { + E Tp, Tq, Tk, Tl; + Tp = Ci[WS(csi, 4)]; + Tq = Ci[WS(csi, 1)]; + Tr = KP866025403 * (Tp + Tq); + TH = Tp - Tq; + Tk = Ci[WS(csi, 7)]; + Tl = Ci[WS(csi, 2)]; + Tm = KP866025403 * (Tk - Tl); + TC = Tk + Tl; + } + TB = KP866025403 * (T5 - T6); + TZ = TD - TC; + T10 = TI - TH; + TE = FMA(KP500000000, TC, TD); + TG = KP866025403 * (Ta - Tb); + TJ = FMA(KP500000000, TH, TI); + Tj = FNMS(KP500000000, T7, T4); + Tn = Tj - Tm; + Tv = Tj + Tm; + To = FNMS(KP500000000, Tc, T9); + Ts = To - Tr; + Tw = To + Tr; + T8 = T4 + T7; + Td = T9 + Tc; + Te = T8 + Td; + } + R0[0] = FMA(KP2_000000000, Te, T3); + { + E T11, T13, TY, T12, TW, TX; + T11 = FNMS(KP1_902113032, T10, KP1_175570504 * TZ); + T13 = FMA(KP1_902113032, TZ, KP1_175570504 * T10); + TW = FNMS(KP500000000, Te, T3); + TX = KP1_118033988 * (T8 - Td); + TY = TW - TX; + T12 = TX + TW; + R0[WS(rs, 6)] = TY - T11; + R1[WS(rs, 4)] = T12 + T13; + R1[WS(rs, 1)] = TY + T11; + R0[WS(rs, 3)] = T12 - T13; + } + { + E TP, Tt, TO, TT, TV, TR, TS, TU, TQ; + TP = KP1_118033988 * (Tn - Ts); + Tt = Tn + Ts; + TO = FNMS(KP500000000, Tt, Ti); + TR = TE - TB; + TS = TJ - TG; + TT = FNMS(KP1_902113032, TS, KP1_175570504 * TR); + TV = FMA(KP1_902113032, TR, KP1_175570504 * TS); + R1[WS(rs, 2)] = FMA(KP2_000000000, Tt, Ti); + TU = TP + TO; + R1[WS(rs, 5)] = TU - TV; + R0[WS(rs, 7)] = TU + TV; + TQ = TO - TP; + R0[WS(rs, 1)] = TQ - TT; + R0[WS(rs, 4)] = TQ + TT; + } + { + E Tz, Tx, Ty, TL, TN, TF, TK, TM, TA; + Tz = KP1_118033988 * (Tv - Tw); + Tx = Tv + Tw; + Ty = FNMS(KP500000000, Tx, Tu); + TF = TB + TE; + TK = TG + TJ; + TL = FNMS(KP1_902113032, TK, KP1_175570504 * TF); + TN = FMA(KP1_902113032, TF, KP1_175570504 * TK); + R0[WS(rs, 5)] = FMA(KP2_000000000, Tx, Tu); + TM = Tz + Ty; + R1[0] = TM - TN; + R0[WS(rs, 2)] = TM + TN; + TA = Ty - Tz; + R1[WS(rs, 3)] = TA - TL; + R1[WS(rs, 6)] = TA + TL; + } + } + } +} + +static const kr2c_desc desc = { 15, "r2cb_15", {47, 14, 17, 0}, &GENUS }; + +void X(codelet_r2cb_15) (planner *p) { + X(kr2c_register) (p, r2cb_15, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 16 -name r2cb_16 -include r2cb.h */ + +/* + * This function contains 58 FP additions, 32 FP multiplications, + * (or, 26 additions, 0 multiplications, 32 fused multiply/add), + * 47 stack variables, 4 constants, and 32 memory accesses + */ +#include "r2cb.h" + +static void r2cb_16(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(64, rs), MAKE_VOLATILE_STRIDE(64, csr), MAKE_VOLATILE_STRIDE(64, csi)) { + E TN, TS, TF, TI; + { + E T8, TD, Tj, TL, T5, TM, TE, To, Td, Tq, Tc, TP, Ty, Te, Tr; + E Ts; + { + E T4, Ti, T1, T2; + T4 = Cr[WS(csr, 4)]; + Ti = Ci[WS(csi, 4)]; + T1 = Cr[0]; + T2 = Cr[WS(csr, 8)]; + { + E Tk, Tn, T6, T7; + T6 = Cr[WS(csr, 2)]; + T7 = Cr[WS(csr, 6)]; + { + E Tl, Th, T3, Tm; + Tl = Ci[WS(csi, 2)]; + Th = T1 - T2; + T3 = T1 + T2; + Tk = T6 - T7; + T8 = T6 + T7; + Tm = Ci[WS(csi, 6)]; + TD = FMA(KP2_000000000, Ti, Th); + Tj = FNMS(KP2_000000000, Ti, Th); + TL = FNMS(KP2_000000000, T4, T3); + T5 = FMA(KP2_000000000, T4, T3); + Tn = Tl + Tm; + TM = Tl - Tm; + } + { + E Ta, Tb, Tw, Tx; + Ta = Cr[WS(csr, 1)]; + TE = Tk + Tn; + To = Tk - Tn; + Tb = Cr[WS(csr, 7)]; + Tw = Ci[WS(csi, 1)]; + Tx = Ci[WS(csi, 7)]; + Td = Cr[WS(csr, 5)]; + Tq = Ta - Tb; + Tc = Ta + Tb; + TP = Tw - Tx; + Ty = Tw + Tx; + Te = Cr[WS(csr, 3)]; + Tr = Ci[WS(csi, 5)]; + Ts = Ci[WS(csi, 3)]; + } + } + } + { + E TV, TG, TW, TH, TB, Tp, TA, TC, TJ, TK; + { + E T9, Tz, Tg, Tu, TT, TU, TO, TR; + TV = FNMS(KP2_000000000, T8, T5); + T9 = FMA(KP2_000000000, T8, T5); + { + E Tv, Tf, TQ, Tt; + Tv = Td - Te; + Tf = Td + Te; + TQ = Tr - Ts; + Tt = Tr + Ts; + TG = Ty - Tv; + Tz = Tv + Ty; + TO = Tc - Tf; + Tg = Tc + Tf; + TW = TQ + TP; + TR = TP - TQ; + TH = Tq + Tt; + Tu = Tq - Tt; + } + TN = FNMS(KP2_000000000, TM, TL); + TT = FMA(KP2_000000000, TM, TL); + TU = TO + TR; + TS = TO - TR; + R0[0] = FMA(KP2_000000000, Tg, T9); + R0[WS(rs, 4)] = FNMS(KP2_000000000, Tg, T9); + R0[WS(rs, 7)] = FMA(KP1_414213562, TU, TT); + R0[WS(rs, 3)] = FNMS(KP1_414213562, TU, TT); + TB = FNMS(KP1_414213562, To, Tj); + Tp = FMA(KP1_414213562, To, Tj); + TA = FNMS(KP414213562, Tz, Tu); + TC = FMA(KP414213562, Tu, Tz); + } + R0[WS(rs, 6)] = FMA(KP2_000000000, TW, TV); + R0[WS(rs, 2)] = FNMS(KP2_000000000, TW, TV); + R1[0] = FMA(KP1_847759065, TA, Tp); + R1[WS(rs, 4)] = FNMS(KP1_847759065, TA, Tp); + TF = FNMS(KP1_414213562, TE, TD); + TJ = FMA(KP1_414213562, TE, TD); + TK = FMA(KP414213562, TG, TH); + TI = FNMS(KP414213562, TH, TG); + R1[WS(rs, 6)] = FMA(KP1_847759065, TC, TB); + R1[WS(rs, 2)] = FNMS(KP1_847759065, TC, TB); + R1[WS(rs, 7)] = FMA(KP1_847759065, TK, TJ); + R1[WS(rs, 3)] = FNMS(KP1_847759065, TK, TJ); + } + } + R0[WS(rs, 1)] = FMA(KP1_414213562, TS, TN); + R0[WS(rs, 5)] = FNMS(KP1_414213562, TS, TN); + R1[WS(rs, 5)] = FMA(KP1_847759065, TI, TF); + R1[WS(rs, 1)] = FNMS(KP1_847759065, TI, TF); + } + } +} + +static const kr2c_desc desc = { 16, "r2cb_16", {26, 0, 32, 0}, &GENUS }; + +void X(codelet_r2cb_16) (planner *p) { + X(kr2c_register) (p, r2cb_16, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 16 -name r2cb_16 -include r2cb.h */ + +/* + * This function contains 58 FP additions, 18 FP multiplications, + * (or, 54 additions, 14 multiplications, 4 fused multiply/add), + * 31 stack variables, 4 constants, and 32 memory accesses + */ +#include "r2cb.h" + +static void r2cb_16(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP765366864, +0.765366864730179543456919968060797733522689125); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(64, rs), MAKE_VOLATILE_STRIDE(64, csr), MAKE_VOLATILE_STRIDE(64, csi)) { + E T9, TS, Tl, TG, T6, TR, Ti, TD, Td, Tq, Tg, Tt, Tn, Tu, TV; + E TU, TN, TK; + { + E T7, T8, TE, Tj, Tk, TF; + T7 = Cr[WS(csr, 2)]; + T8 = Cr[WS(csr, 6)]; + TE = T7 - T8; + Tj = Ci[WS(csi, 2)]; + Tk = Ci[WS(csi, 6)]; + TF = Tj + Tk; + T9 = KP2_000000000 * (T7 + T8); + TS = KP1_414213562 * (TE + TF); + Tl = KP2_000000000 * (Tj - Tk); + TG = KP1_414213562 * (TE - TF); + } + { + E T5, TC, T3, TA; + { + E T4, TB, T1, T2; + T4 = Cr[WS(csr, 4)]; + T5 = KP2_000000000 * T4; + TB = Ci[WS(csi, 4)]; + TC = KP2_000000000 * TB; + T1 = Cr[0]; + T2 = Cr[WS(csr, 8)]; + T3 = T1 + T2; + TA = T1 - T2; + } + T6 = T3 + T5; + TR = TA + TC; + Ti = T3 - T5; + TD = TA - TC; + } + { + E TI, TM, TL, TJ; + { + E Tb, Tc, To, Tp; + Tb = Cr[WS(csr, 1)]; + Tc = Cr[WS(csr, 7)]; + Td = Tb + Tc; + TI = Tb - Tc; + To = Ci[WS(csi, 1)]; + Tp = Ci[WS(csi, 7)]; + Tq = To - Tp; + TM = To + Tp; + } + { + E Te, Tf, Tr, Ts; + Te = Cr[WS(csr, 5)]; + Tf = Cr[WS(csr, 3)]; + Tg = Te + Tf; + TL = Te - Tf; + Tr = Ci[WS(csi, 5)]; + Ts = Ci[WS(csi, 3)]; + Tt = Tr - Ts; + TJ = Tr + Ts; + } + Tn = Td - Tg; + Tu = Tq - Tt; + TV = TM - TL; + TU = TI + TJ; + TN = TL + TM; + TK = TI - TJ; + } + { + E Ta, Th, TT, TW; + Ta = T6 + T9; + Th = KP2_000000000 * (Td + Tg); + R0[WS(rs, 4)] = Ta - Th; + R0[0] = Ta + Th; + TT = TR - TS; + TW = FNMS(KP1_847759065, TV, KP765366864 * TU); + R1[WS(rs, 5)] = TT - TW; + R1[WS(rs, 1)] = TT + TW; + } + { + E TX, TY, Tm, Tv; + TX = TR + TS; + TY = FMA(KP1_847759065, TU, KP765366864 * TV); + R1[WS(rs, 3)] = TX - TY; + R1[WS(rs, 7)] = TX + TY; + Tm = Ti - Tl; + Tv = KP1_414213562 * (Tn - Tu); + R0[WS(rs, 5)] = Tm - Tv; + R0[WS(rs, 1)] = Tm + Tv; + } + { + E Tw, Tx, TH, TO; + Tw = Ti + Tl; + Tx = KP1_414213562 * (Tn + Tu); + R0[WS(rs, 3)] = Tw - Tx; + R0[WS(rs, 7)] = Tw + Tx; + TH = TD + TG; + TO = FNMS(KP765366864, TN, KP1_847759065 * TK); + R1[WS(rs, 4)] = TH - TO; + R1[0] = TH + TO; + } + { + E TP, TQ, Ty, Tz; + TP = TD - TG; + TQ = FMA(KP765366864, TK, KP1_847759065 * TN); + R1[WS(rs, 2)] = TP - TQ; + R1[WS(rs, 6)] = TP + TQ; + Ty = T6 - T9; + Tz = KP2_000000000 * (Tt + Tq); + R0[WS(rs, 2)] = Ty - Tz; + R0[WS(rs, 6)] = Ty + Tz; + } + } + } +} + +static const kr2c_desc desc = { 16, "r2cb_16", {54, 14, 4, 0}, &GENUS }; + +void X(codelet_r2cb_16) (planner *p) { + X(kr2c_register) (p, r2cb_16, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 2 -name r2cb_2 -include r2cb.h */ + +/* + * This function contains 2 FP additions, 0 FP multiplications, + * (or, 2 additions, 0 multiplications, 0 fused multiply/add), + * 3 stack variables, 0 constants, and 4 memory accesses + */ +#include "r2cb.h" + +static void r2cb_2(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(8, rs), MAKE_VOLATILE_STRIDE(8, csr), MAKE_VOLATILE_STRIDE(8, csi)) { + E T1, T2; + T1 = Cr[0]; + T2 = Cr[WS(csr, 1)]; + R0[0] = T1 + T2; + R1[0] = T1 - T2; + } + } +} + +static const kr2c_desc desc = { 2, "r2cb_2", {2, 0, 0, 0}, &GENUS }; + +void X(codelet_r2cb_2) (planner *p) { + X(kr2c_register) (p, r2cb_2, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 2 -name r2cb_2 -include r2cb.h */ + +/* + * This function contains 2 FP additions, 0 FP multiplications, + * (or, 2 additions, 0 multiplications, 0 fused multiply/add), + * 3 stack variables, 0 constants, and 4 memory accesses + */ +#include "r2cb.h" + +static void r2cb_2(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(8, rs), MAKE_VOLATILE_STRIDE(8, csr), MAKE_VOLATILE_STRIDE(8, csi)) { + E T1, T2; + T1 = Cr[0]; + T2 = Cr[WS(csr, 1)]; + R1[0] = T1 - T2; + R0[0] = T1 + T2; + } + } +} + +static const kr2c_desc desc = { 2, "r2cb_2", {2, 0, 0, 0}, &GENUS }; + +void X(codelet_r2cb_2) (planner *p) { + X(kr2c_register) (p, r2cb_2, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 20 -name r2cb_20 -include r2cb.h */ + +/* + * This function contains 86 FP additions, 44 FP multiplications, + * (or, 42 additions, 0 multiplications, 44 fused multiply/add), + * 69 stack variables, 5 constants, and 40 memory accesses + */ +#include "r2cb.h" + +static void r2cb_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); + DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(80, rs), MAKE_VOLATILE_STRIDE(80, csr), MAKE_VOLATILE_STRIDE(80, csi)) { + E TY, T1o, T1m, T14, T12, TX, T1n, T1j, TZ, T13; + { + E Tr, TD, Tl, T5, T1a, T1l, T1d, T1k, TT, T10, TO, T11, TE, TF, Tk; + E TI, TC, T1i, To, TG, T16; + { + E T4, Tq, T1, T2; + T4 = Cr[WS(csr, 5)]; + Tq = Ci[WS(csi, 5)]; + T1 = Cr[0]; + T2 = Cr[WS(csr, 10)]; + { + E Ts, T8, T19, TR, T18, Tb, TS, Tv, Tx, Tf, Ty, T1c, TM, T1b, Ti; + E Tz, Tt, Tu, TN, TA; + { + E TP, TQ, T9, Ta; + { + E T6, T7, Tp, T3; + T6 = Cr[WS(csr, 4)]; + T7 = Cr[WS(csr, 6)]; + TP = Ci[WS(csi, 4)]; + Tp = T1 - T2; + T3 = T1 + T2; + Ts = T6 - T7; + T8 = T6 + T7; + Tr = FMA(KP2_000000000, Tq, Tp); + TD = FNMS(KP2_000000000, Tq, Tp); + Tl = FMA(KP2_000000000, T4, T3); + T5 = FNMS(KP2_000000000, T4, T3); + TQ = Ci[WS(csi, 6)]; + } + T9 = Cr[WS(csr, 9)]; + Ta = Cr[WS(csr, 1)]; + Tt = Ci[WS(csi, 9)]; + T19 = TP + TQ; + TR = TP - TQ; + T18 = T9 - Ta; + Tb = T9 + Ta; + Tu = Ci[WS(csi, 1)]; + } + { + E TK, TL, Td, Te, Tg, Th; + Td = Cr[WS(csr, 8)]; + Te = Cr[WS(csr, 2)]; + TK = Ci[WS(csi, 8)]; + TS = Tt - Tu; + Tv = Tt + Tu; + Tx = Td - Te; + Tf = Td + Te; + TL = Ci[WS(csi, 2)]; + Tg = Cr[WS(csr, 7)]; + Th = Cr[WS(csr, 3)]; + Ty = Ci[WS(csi, 7)]; + T1c = TK + TL; + TM = TK - TL; + T1b = Tg - Th; + Ti = Tg + Th; + Tz = Ci[WS(csi, 3)]; + } + T1a = T18 + T19; + T1l = T19 - T18; + T1d = T1b + T1c; + T1k = T1c - T1b; + TT = TR - TS; + T10 = TS + TR; + TN = Tz - Ty; + TA = Ty + Tz; + TO = TM - TN; + T11 = TN + TM; + { + E Tm, Tc, Tj, Tn, Tw, TB; + Tm = T8 + Tb; + Tc = T8 - Tb; + Tj = Tf - Ti; + Tn = Tf + Ti; + TE = Ts - Tv; + Tw = Ts + Tv; + TB = Tx - TA; + TF = Tx + TA; + Tk = Tc + Tj; + TI = Tc - Tj; + TC = Tw + TB; + T1i = Tw - TB; + TY = Tm - Tn; + To = Tm + Tn; + } + } + } + R0[WS(rs, 5)] = FMA(KP2_000000000, Tk, T5); + R1[WS(rs, 7)] = FMA(KP2_000000000, TC, Tr); + TG = TE + TF; + T16 = TE - TF; + R0[0] = FMA(KP2_000000000, To, Tl); + { + E TU, TW, T1g, T1e, T15, TV, TJ, TH, T1h, T1f, T17; + TU = FNMS(KP618033988, TT, TO); + TW = FMA(KP618033988, TO, TT); + R1[WS(rs, 2)] = FMA(KP2_000000000, TG, TD); + TH = FNMS(KP500000000, Tk, T5); + T1g = FNMS(KP618033988, T1a, T1d); + T1e = FMA(KP618033988, T1d, T1a); + T15 = FNMS(KP500000000, TG, TD); + TV = FMA(KP1_118033988, TI, TH); + TJ = FNMS(KP1_118033988, TI, TH); + T1o = FMA(KP618033988, T1k, T1l); + T1m = FNMS(KP618033988, T1l, T1k); + R0[WS(rs, 3)] = FNMS(KP1_902113032, TW, TV); + R0[WS(rs, 7)] = FMA(KP1_902113032, TW, TV); + R0[WS(rs, 1)] = FMA(KP1_902113032, TU, TJ); + R0[WS(rs, 9)] = FNMS(KP1_902113032, TU, TJ); + T1f = FNMS(KP1_118033988, T16, T15); + T17 = FMA(KP1_118033988, T16, T15); + T1h = FNMS(KP500000000, TC, Tr); + R1[WS(rs, 6)] = FNMS(KP1_902113032, T1g, T1f); + R1[WS(rs, 8)] = FMA(KP1_902113032, T1g, T1f); + R1[WS(rs, 4)] = FMA(KP1_902113032, T1e, T17); + R1[0] = FNMS(KP1_902113032, T1e, T17); + T14 = FNMS(KP618033988, T10, T11); + T12 = FMA(KP618033988, T11, T10); + TX = FNMS(KP500000000, To, Tl); + T1n = FMA(KP1_118033988, T1i, T1h); + T1j = FNMS(KP1_118033988, T1i, T1h); + } + } + R1[WS(rs, 5)] = FNMS(KP1_902113032, T1o, T1n); + R1[WS(rs, 9)] = FMA(KP1_902113032, T1o, T1n); + R1[WS(rs, 3)] = FMA(KP1_902113032, T1m, T1j); + R1[WS(rs, 1)] = FNMS(KP1_902113032, T1m, T1j); + TZ = FMA(KP1_118033988, TY, TX); + T13 = FNMS(KP1_118033988, TY, TX); + R0[WS(rs, 4)] = FNMS(KP1_902113032, T14, T13); + R0[WS(rs, 6)] = FMA(KP1_902113032, T14, T13); + R0[WS(rs, 2)] = FMA(KP1_902113032, T12, TZ); + R0[WS(rs, 8)] = FNMS(KP1_902113032, T12, TZ); + } + } +} + +static const kr2c_desc desc = { 20, "r2cb_20", {42, 0, 44, 0}, &GENUS }; + +void X(codelet_r2cb_20) (planner *p) { + X(kr2c_register) (p, r2cb_20, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 20 -name r2cb_20 -include r2cb.h */ + +/* + * This function contains 86 FP additions, 30 FP multiplications, + * (or, 70 additions, 14 multiplications, 16 fused multiply/add), + * 50 stack variables, 5 constants, and 40 memory accesses + */ +#include "r2cb.h" + +static void r2cb_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); + DK(KP1_175570504, +1.175570504584946258337411909278145537195304875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(80, rs), MAKE_VOLATILE_STRIDE(80, csr), MAKE_VOLATILE_STRIDE(80, csi)) { + E T6, TF, Tm, Tt, TQ, T1n, T1f, T12, T1m, TV, T13, T1c, Td, Tk, Tl; + E Ty, TD, TE, Tn, To, Tp, TG, TH, TI; + { + E T5, Ts, T3, Tq; + { + E T4, Tr, T1, T2; + T4 = Cr[WS(csr, 5)]; + T5 = KP2_000000000 * T4; + Tr = Ci[WS(csi, 5)]; + Ts = KP2_000000000 * Tr; + T1 = Cr[0]; + T2 = Cr[WS(csr, 10)]; + T3 = T1 + T2; + Tq = T1 - T2; + } + T6 = T3 - T5; + TF = Tq - Ts; + Tm = T3 + T5; + Tt = Tq + Ts; + } + { + E T9, Tu, TO, T1b, Tc, T1a, Tx, TP, Tg, Tz, TT, T1e, Tj, T1d, TC; + E TU; + { + E T7, T8, TM, TN; + T7 = Cr[WS(csr, 4)]; + T8 = Cr[WS(csr, 6)]; + T9 = T7 + T8; + Tu = T7 - T8; + TM = Ci[WS(csi, 4)]; + TN = Ci[WS(csi, 6)]; + TO = TM - TN; + T1b = TM + TN; + } + { + E Ta, Tb, Tv, Tw; + Ta = Cr[WS(csr, 9)]; + Tb = Cr[WS(csr, 1)]; + Tc = Ta + Tb; + T1a = Ta - Tb; + Tv = Ci[WS(csi, 9)]; + Tw = Ci[WS(csi, 1)]; + Tx = Tv + Tw; + TP = Tv - Tw; + } + { + E Te, Tf, TR, TS; + Te = Cr[WS(csr, 8)]; + Tf = Cr[WS(csr, 2)]; + Tg = Te + Tf; + Tz = Te - Tf; + TR = Ci[WS(csi, 8)]; + TS = Ci[WS(csi, 2)]; + TT = TR - TS; + T1e = TR + TS; + } + { + E Th, Ti, TA, TB; + Th = Cr[WS(csr, 7)]; + Ti = Cr[WS(csr, 3)]; + Tj = Th + Ti; + T1d = Th - Ti; + TA = Ci[WS(csi, 7)]; + TB = Ci[WS(csi, 3)]; + TC = TA + TB; + TU = TB - TA; + } + TQ = TO - TP; + T1n = T1e - T1d; + T1f = T1d + T1e; + T12 = TP + TO; + T1m = T1b - T1a; + TV = TT - TU; + T13 = TU + TT; + T1c = T1a + T1b; + Td = T9 - Tc; + Tk = Tg - Tj; + Tl = Td + Tk; + Ty = Tu + Tx; + TD = Tz - TC; + TE = Ty + TD; + Tn = T9 + Tc; + To = Tg + Tj; + Tp = Tn + To; + TG = Tu - Tx; + TH = Tz + TC; + TI = TG + TH; + } + R0[WS(rs, 5)] = FMA(KP2_000000000, Tl, T6); + R1[WS(rs, 7)] = FMA(KP2_000000000, TE, Tt); + R1[WS(rs, 2)] = FMA(KP2_000000000, TI, TF); + R0[0] = FMA(KP2_000000000, Tp, Tm); + { + E TW, TY, TL, TX, TJ, TK; + TW = FNMS(KP1_902113032, TV, KP1_175570504 * TQ); + TY = FMA(KP1_902113032, TQ, KP1_175570504 * TV); + TJ = FNMS(KP500000000, Tl, T6); + TK = KP1_118033988 * (Td - Tk); + TL = TJ - TK; + TX = TK + TJ; + R0[WS(rs, 1)] = TL - TW; + R0[WS(rs, 7)] = TX + TY; + R0[WS(rs, 9)] = TL + TW; + R0[WS(rs, 3)] = TX - TY; + } + { + E T1g, T1i, T19, T1h, T17, T18; + T1g = FNMS(KP1_902113032, T1f, KP1_175570504 * T1c); + T1i = FMA(KP1_902113032, T1c, KP1_175570504 * T1f); + T17 = FNMS(KP500000000, TI, TF); + T18 = KP1_118033988 * (TG - TH); + T19 = T17 - T18; + T1h = T18 + T17; + R1[WS(rs, 8)] = T19 - T1g; + R1[WS(rs, 4)] = T1h + T1i; + R1[WS(rs, 6)] = T19 + T1g; + R1[0] = T1h - T1i; + } + { + E T1o, T1q, T1l, T1p, T1j, T1k; + T1o = FNMS(KP1_902113032, T1n, KP1_175570504 * T1m); + T1q = FMA(KP1_902113032, T1m, KP1_175570504 * T1n); + T1j = FNMS(KP500000000, TE, Tt); + T1k = KP1_118033988 * (Ty - TD); + T1l = T1j - T1k; + T1p = T1k + T1j; + R1[WS(rs, 3)] = T1l - T1o; + R1[WS(rs, 9)] = T1p + T1q; + R1[WS(rs, 1)] = T1l + T1o; + R1[WS(rs, 5)] = T1p - T1q; + } + { + E T14, T16, T11, T15, TZ, T10; + T14 = FNMS(KP1_902113032, T13, KP1_175570504 * T12); + T16 = FMA(KP1_902113032, T12, KP1_175570504 * T13); + TZ = FNMS(KP500000000, Tp, Tm); + T10 = KP1_118033988 * (Tn - To); + T11 = TZ - T10; + T15 = T10 + TZ; + R0[WS(rs, 6)] = T11 - T14; + R0[WS(rs, 2)] = T15 + T16; + R0[WS(rs, 4)] = T11 + T14; + R0[WS(rs, 8)] = T15 - T16; + } + } + } +} + +static const kr2c_desc desc = { 20, "r2cb_20", {70, 14, 16, 0}, &GENUS }; + +void X(codelet_r2cb_20) (planner *p) { + X(kr2c_register) (p, r2cb_20, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,615 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:25 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 25 -name r2cb_25 -include r2cb.h */ + +/* + * This function contains 152 FP additions, 120 FP multiplications, + * (or, 32 additions, 0 multiplications, 120 fused multiply/add), + * 115 stack variables, 44 constants, and 50 memory accesses + */ +#include "r2cb.h" + +static void r2cb_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP979740652, +0.979740652857618686258237536568998933733477632); + DK(KP438153340, +0.438153340021931793654057951961031291699532119); + DK(KP1_752613360, +1.752613360087727174616231807844125166798128477); + DK(KP963507348, +0.963507348203430549974383005744259307057084020); + DK(KP1_606007150, +1.606007150877320829666881187140752009270929701); + DK(KP1_721083328, +1.721083328735889354196523361841037632825608373); + DK(KP1_011627398, +1.011627398597394192215998921771049272931807941); + DK(KP595480289, +0.595480289600000014706716770488118292997907308); + DK(KP641441904, +0.641441904830606407298806329068862424939687989); + DK(KP452413526, +0.452413526233009763856834323966348796985206956); + DK(KP1_809654104, +1.809654104932039055427337295865395187940827822); + DK(KP933137358, +0.933137358350283770603023973254446451924190884); + DK(KP1_666834356, +1.666834356657377354817925100486477686277992119); + DK(KP1_842354653, +1.842354653930286640500894870830132058718564461); + DK(KP1_082908895, +1.082908895072625554092571180165639018104066379); + DK(KP662318342, +0.662318342759882818626911127577439236802190210); + DK(KP576710603, +0.576710603632765877371579268136471017090111488); + DK(KP484291580, +0.484291580564315559745084187732367906918006201); + DK(KP1_937166322, +1.937166322257262238980336750929471627672024806); + DK(KP1_898359647, +1.898359647016882523151110931686726543423167685); + DK(KP1_386580726, +1.386580726567734802700860150804827247498955921); + DK(KP904730450, +0.904730450839922351881287709692877908104763647); + DK(KP1_115827804, +1.115827804063668528375399296931134075984874304); + DK(KP634619297, +0.634619297544148100711287640319130485732531031); + DK(KP470564281, +0.470564281212251493087595091036643380879947982); + DK(KP499013364, +0.499013364214135780976168403431725276668452610); + DK(KP1_996053456, +1.996053456856543123904673613726901106673810439); + DK(KP559154169, +0.559154169276087864842202529084232643714075927); + DK(KP683113946, +0.683113946453479238701949862233725244439656928); + DK(KP730409924, +0.730409924561256563751459444999838399157094302); + DK(KP549754652, +0.549754652192770074288023275540779861653779767); + DK(KP256756360, +0.256756360367726783319498520922669048172391148); + DK(KP451418159, +0.451418159099103183892477933432151804893354132); + DK(KP846146756, +0.846146756728608505452954290121135880883743802); + DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); + DK(KP062914667, +0.062914667253649757225485955897349402364686947); + DK(KP939062505, +0.939062505817492352556001843133229685779824606); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(100, rs), MAKE_VOLATILE_STRIDE(100, csr), MAKE_VOLATILE_STRIDE(100, csi)) { + E T1H, T24, T22, T1W, T1Y, T1X, T1Z, T23; + { + E T1G, Tu, T5, T1F, Tr, Te, T2o, T1N, T2a, T1t, TR, T1K, T29, T1u, TG; + E TU, TT, Tn, T1d, T1Q, T2p, T1T, T12, T1P, T1a; + { + E T1, T2, T3, Ts, Tt; + Ts = Ci[WS(csi, 5)]; + Tt = Ci[WS(csi, 10)]; + T1 = Cr[0]; + T2 = Cr[WS(csr, 5)]; + T3 = Cr[WS(csr, 10)]; + T1G = FMS(KP618033988, Ts, Tt); + Tu = FMA(KP618033988, Tt, Ts); + { + E Tx, Tw, T1M, TQ, TM, T1J, TF, TL; + { + E T6, TH, TO, TP, TB, TI, Td, TJ, TE, T4, Tq, TK; + T6 = Cr[WS(csr, 1)]; + T4 = T2 + T3; + Tq = T2 - T3; + TH = Ci[WS(csi, 1)]; + { + E Ta, T9, Tb, T7, T8, Tp; + T7 = Cr[WS(csr, 6)]; + T8 = Cr[WS(csr, 4)]; + Tp = FNMS(KP500000000, T4, T1); + T5 = FMA(KP2_000000000, T4, T1); + Ta = Cr[WS(csr, 11)]; + TO = T7 - T8; + T9 = T7 + T8; + T1F = FNMS(KP1_118033988, Tq, Tp); + Tr = FMA(KP1_118033988, Tq, Tp); + Tb = Cr[WS(csr, 9)]; + { + E TC, TD, Tz, TA, Tc; + Tz = Ci[WS(csi, 6)]; + TA = Ci[WS(csi, 4)]; + TP = Tb - Ta; + Tc = Ta + Tb; + TC = Ci[WS(csi, 11)]; + TB = Tz + TA; + TI = Tz - TA; + TD = Ci[WS(csi, 9)]; + Td = T9 + Tc; + Tx = T9 - Tc; + TJ = TC - TD; + TE = TC + TD; + } + } + Te = T6 + Td; + Tw = FNMS(KP250000000, Td, T6); + T1M = FMA(KP618033988, TO, TP); + TQ = FNMS(KP618033988, TP, TO); + TK = TI + TJ; + TM = TI - TJ; + T1J = FNMS(KP618033988, TB, TE); + TF = FMA(KP618033988, TE, TB); + TL = FNMS(KP250000000, TK, TH); + T2o = TK + TH; + } + { + E Tf, T14, T1b, T1c, Tm, TY, T15, T16, T11, T17, T19, T18; + Tf = Cr[WS(csr, 2)]; + { + E T1L, TN, T1I, Ty; + T1L = FNMS(KP559016994, TM, TL); + TN = FMA(KP559016994, TM, TL); + T1I = FNMS(KP559016994, Tx, Tw); + Ty = FMA(KP559016994, Tx, Tw); + T1N = FMA(KP951056516, T1M, T1L); + T2a = FNMS(KP951056516, T1M, T1L); + T1t = FNMS(KP951056516, TQ, TN); + TR = FMA(KP951056516, TQ, TN); + T1K = FMA(KP951056516, T1J, T1I); + T29 = FNMS(KP951056516, T1J, T1I); + T1u = FMA(KP951056516, TF, Ty); + TG = FNMS(KP951056516, TF, Ty); + T14 = Ci[WS(csi, 2)]; + } + { + E Tg, Th, Tj, Tk; + Tg = Cr[WS(csr, 7)]; + Th = Cr[WS(csr, 3)]; + Tj = Cr[WS(csr, 12)]; + Tk = Cr[WS(csr, 8)]; + { + E TW, Ti, Tl, TX, TZ, T10; + TW = Ci[WS(csi, 7)]; + T1b = Th - Tg; + Ti = Tg + Th; + T1c = Tj - Tk; + Tl = Tj + Tk; + TX = Ci[WS(csi, 3)]; + TZ = Ci[WS(csi, 12)]; + T10 = Ci[WS(csi, 8)]; + Tm = Ti + Tl; + TU = Tl - Ti; + TY = TW + TX; + T15 = TW - TX; + T16 = TZ - T10; + T11 = TZ + T10; + } + } + TT = FNMS(KP250000000, Tm, Tf); + Tn = Tf + Tm; + T17 = T15 + T16; + T19 = T16 - T15; + T1d = FNMS(KP618033988, T1c, T1b); + T1Q = FMA(KP618033988, T1b, T1c); + T18 = FNMS(KP250000000, T17, T14); + T2p = T17 + T14; + T1T = FNMS(KP618033988, TY, T11); + T12 = FMA(KP618033988, T11, TY); + T1P = FMA(KP559016994, T19, T18); + T1a = FNMS(KP559016994, T19, T18); + } + } + } + { + E T1R, T1e, T1q, T1U, T13, T1r, T2b, T28, T25, T2i, T2k; + { + E T2m, To, T26, T27, TV, T1S; + T2m = Te - Tn; + To = Te + Tn; + TV = FNMS(KP559016994, TU, TT); + T1S = FMA(KP559016994, TU, TT); + T26 = FMA(KP951056516, T1Q, T1P); + T1R = FNMS(KP951056516, T1Q, T1P); + T1e = FNMS(KP951056516, T1d, T1a); + T1q = FMA(KP951056516, T1d, T1a); + T27 = FNMS(KP951056516, T1T, T1S); + T1U = FMA(KP951056516, T1T, T1S); + T13 = FNMS(KP951056516, T12, TV); + T1r = FMA(KP951056516, T12, TV); + { + E T2g, T2q, T2s, T2h, T2n, T2r, T2l; + T2g = FMA(KP939062505, T29, T2a); + T2b = FNMS(KP939062505, T2a, T29); + R0[0] = FMA(KP2_000000000, To, T5); + T2l = FNMS(KP500000000, To, T5); + T2q = FMA(KP618033988, T2p, T2o); + T2s = FNMS(KP618033988, T2o, T2p); + T28 = FNMS(KP062914667, T27, T26); + T2h = FMA(KP062914667, T26, T27); + T2n = FMA(KP1_118033988, T2m, T2l); + T2r = FNMS(KP1_118033988, T2m, T2l); + T25 = FMA(KP1_902113032, T1G, T1F); + T1H = FNMS(KP1_902113032, T1G, T1F); + T2i = FMA(KP846146756, T2h, T2g); + T2k = FNMS(KP451418159, T2g, T2h); + R0[WS(rs, 10)] = FMA(KP1_902113032, T2q, T2n); + R1[WS(rs, 2)] = FNMS(KP1_902113032, T2q, T2n); + R0[WS(rs, 5)] = FMA(KP1_902113032, T2s, T2r); + R1[WS(rs, 7)] = FNMS(KP1_902113032, T2s, T2r); + } + } + { + E TS, T1f, T1p, Tv, T2e, T1o, T1m, T2d, T1k, T1l, T2c; + TS = FNMS(KP256756360, TR, TG); + T1k = FMA(KP256756360, TG, TR); + T1l = FMA(KP549754652, T13, T1e); + T1f = FNMS(KP549754652, T1e, T13); + T1p = FMA(KP1_902113032, Tu, Tr); + Tv = FNMS(KP1_902113032, Tu, Tr); + T2e = FMA(KP730409924, T2b, T28); + T2c = FNMS(KP730409924, T2b, T28); + T1o = FNMS(KP683113946, T1k, T1l); + T1m = FMA(KP559154169, T1l, T1k); + R1[WS(rs, 1)] = FNMS(KP1_996053456, T2c, T25); + T2d = FMA(KP499013364, T2c, T25); + { + E T1C, T1E, T1y, T1w; + { + E T1s, T1v, T1i, T1h, T1n, T1j; + { + E T1A, T1B, T2f, T2j, T1g; + T1A = FNMS(KP470564281, T1q, T1r); + T1s = FMA(KP470564281, T1r, T1q); + T1v = FNMS(KP634619297, T1u, T1t); + T1B = FMA(KP634619297, T1t, T1u); + T2f = FMA(KP1_115827804, T2e, T2d); + T2j = FNMS(KP1_115827804, T2e, T2d); + T1i = FNMS(KP904730450, T1f, TS); + T1g = FMA(KP904730450, T1f, TS); + R1[WS(rs, 11)] = FMA(KP1_386580726, T2i, T2f); + R0[WS(rs, 4)] = FNMS(KP1_386580726, T2i, T2f); + R1[WS(rs, 6)] = FMA(KP1_898359647, T2k, T2j); + R0[WS(rs, 9)] = FNMS(KP1_898359647, T2k, T2j); + R1[0] = FMA(KP1_937166322, T1g, Tv); + T1h = FNMS(KP484291580, T1g, Tv); + T1C = FNMS(KP576710603, T1B, T1A); + T1E = FMA(KP662318342, T1A, T1B); + } + T1n = FNMS(KP1_082908895, T1i, T1h); + T1j = FMA(KP1_082908895, T1i, T1h); + R1[WS(rs, 10)] = FMA(KP1_842354653, T1m, T1j); + R0[WS(rs, 3)] = FNMS(KP1_842354653, T1m, T1j); + R1[WS(rs, 5)] = FMA(KP1_666834356, T1o, T1n); + R0[WS(rs, 8)] = FNMS(KP1_666834356, T1o, T1n); + T1y = FNMS(KP933137358, T1v, T1s); + T1w = FMA(KP933137358, T1v, T1s); + } + { + E T1O, T20, T21, T1V, T1x, T1z, T1D; + T1O = FNMS(KP549754652, T1N, T1K); + T20 = FMA(KP549754652, T1K, T1N); + T21 = FMA(KP634619297, T1R, T1U); + T1V = FNMS(KP634619297, T1U, T1R); + R0[WS(rs, 2)] = FNMS(KP1_809654104, T1w, T1p); + T1x = FMA(KP452413526, T1w, T1p); + T24 = FNMS(KP641441904, T20, T21); + T22 = FMA(KP595480289, T21, T20); + T1z = FNMS(KP1_011627398, T1y, T1x); + T1D = FMA(KP1_011627398, T1y, T1x); + R1[WS(rs, 9)] = FNMS(KP1_721083328, T1C, T1z); + R0[WS(rs, 7)] = FMA(KP1_721083328, T1C, T1z); + R0[WS(rs, 12)] = FMA(KP1_606007150, T1E, T1D); + R1[WS(rs, 4)] = FNMS(KP1_606007150, T1E, T1D); + T1W = FNMS(KP963507348, T1V, T1O); + T1Y = FMA(KP963507348, T1V, T1O); + } + } + } + } + } + R0[WS(rs, 1)] = FMA(KP1_752613360, T1W, T1H); + T1X = FNMS(KP438153340, T1W, T1H); + T1Z = FMA(KP979740652, T1Y, T1X); + T23 = FNMS(KP979740652, T1Y, T1X); + R0[WS(rs, 11)] = FMA(KP1_666834356, T22, T1Z); + R1[WS(rs, 3)] = FNMS(KP1_666834356, T22, T1Z); + R1[WS(rs, 8)] = FNMS(KP1_606007150, T24, T23); + R0[WS(rs, 6)] = FMA(KP1_606007150, T24, T23); + } + } +} + +static const kr2c_desc desc = { 25, "r2cb_25", {32, 0, 120, 0}, &GENUS }; + +void X(codelet_r2cb_25) (planner *p) { + X(kr2c_register) (p, r2cb_25, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 25 -name r2cb_25 -include r2cb.h */ + +/* + * This function contains 152 FP additions, 98 FP multiplications, + * (or, 100 additions, 46 multiplications, 52 fused multiply/add), + * 65 stack variables, 21 constants, and 50 memory accesses + */ +#include "r2cb.h" + +static void r2cb_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP425779291, +0.425779291565072648862502445744251703979973042); + DK(KP904827052, +0.904827052466019527713668647932697593970413911); + DK(KP535826794, +0.535826794978996618271308767867639978063575346); + DK(KP844327925, +0.844327925502015078548558063966681505381659241); + DK(KP876306680, +0.876306680043863587308115903922062583399064238); + DK(KP481753674, +0.481753674101715274987191502872129653528542010); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP248689887, +0.248689887164854788242283746006447968417567406); + DK(KP062790519, +0.062790519529313376076178224565631133122484832); + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP728968627, +0.728968627421411523146730319055259111372571664); + DK(KP684547105, +0.684547105928688673732283357621209269889519233); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); + DK(KP1_175570504, +1.175570504584946258337411909278145537195304875); + DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(100, rs), MAKE_VOLATILE_STRIDE(100, csr), MAKE_VOLATILE_STRIDE(100, csi)) { + E Tu, T1G, T5, Tr, T1F, TN, TO, Te, TR, T27, T1r, T1N, TG, T26, T1q; + E T1K, T1a, T1b, Tn, T1e, T2a, T1u, T1U, T13, T29, T1t, T1R, Ts, Tt; + Ts = Ci[WS(csi, 5)]; + Tt = Ci[WS(csi, 10)]; + Tu = FMA(KP1_902113032, Ts, KP1_175570504 * Tt); + T1G = FNMS(KP1_902113032, Tt, KP1_175570504 * Ts); + { + E T1, T4, Tp, T2, T3, Tq; + T1 = Cr[0]; + T2 = Cr[WS(csr, 5)]; + T3 = Cr[WS(csr, 10)]; + T4 = T2 + T3; + Tp = KP1_118033988 * (T2 - T3); + T5 = FMA(KP2_000000000, T4, T1); + Tq = FNMS(KP500000000, T4, T1); + Tr = Tp + Tq; + T1F = Tq - Tp; + } + { + E T6, Td, TI, Tw, TH, TB, TE, TM; + T6 = Cr[WS(csr, 1)]; + TN = Ci[WS(csi, 1)]; + { + E T7, T8, T9, Ta, Tb, Tc; + T7 = Cr[WS(csr, 6)]; + T8 = Cr[WS(csr, 4)]; + T9 = T7 + T8; + Ta = Cr[WS(csr, 11)]; + Tb = Cr[WS(csr, 9)]; + Tc = Ta + Tb; + Td = T9 + Tc; + TI = Ta - Tb; + Tw = KP559016994 * (T9 - Tc); + TH = T7 - T8; + } + { + E Tz, TA, TK, TC, TD, TL; + Tz = Ci[WS(csi, 6)]; + TA = Ci[WS(csi, 4)]; + TK = Tz - TA; + TC = Ci[WS(csi, 11)]; + TD = Ci[WS(csi, 9)]; + TL = TC - TD; + TB = Tz + TA; + TO = TK + TL; + TE = TC + TD; + TM = KP559016994 * (TK - TL); + } + Te = T6 + Td; + { + E TJ, T1L, TQ, T1M, TP; + TJ = FMA(KP951056516, TH, KP587785252 * TI); + T1L = FNMS(KP951056516, TI, KP587785252 * TH); + TP = FNMS(KP250000000, TO, TN); + TQ = TM + TP; + T1M = TP - TM; + TR = TJ + TQ; + T27 = T1M - T1L; + T1r = TQ - TJ; + T1N = T1L + T1M; + } + { + E TF, T1J, Ty, T1I, Tx; + TF = FMA(KP951056516, TB, KP587785252 * TE); + T1J = FNMS(KP951056516, TE, KP587785252 * TB); + Tx = FNMS(KP250000000, Td, T6); + Ty = Tw + Tx; + T1I = Tx - Tw; + TG = Ty - TF; + T26 = T1I + T1J; + T1q = Ty + TF; + T1K = T1I - T1J; + } + } + { + E Tf, Tm, T15, TT, T14, TY, T11, T19; + Tf = Cr[WS(csr, 2)]; + T1a = Ci[WS(csi, 2)]; + { + E Tg, Th, Ti, Tj, Tk, Tl; + Tg = Cr[WS(csr, 7)]; + Th = Cr[WS(csr, 3)]; + Ti = Tg + Th; + Tj = Cr[WS(csr, 12)]; + Tk = Cr[WS(csr, 8)]; + Tl = Tj + Tk; + Tm = Ti + Tl; + T15 = Tj - Tk; + TT = KP559016994 * (Ti - Tl); + T14 = Tg - Th; + } + { + E TW, TX, T17, TZ, T10, T18; + TW = Ci[WS(csi, 7)]; + TX = Ci[WS(csi, 3)]; + T17 = TW - TX; + TZ = Ci[WS(csi, 12)]; + T10 = Ci[WS(csi, 8)]; + T18 = TZ - T10; + TY = TW + TX; + T1b = T17 + T18; + T11 = TZ + T10; + T19 = KP559016994 * (T17 - T18); + } + Tn = Tf + Tm; + { + E T16, T1S, T1d, T1T, T1c; + T16 = FMA(KP951056516, T14, KP587785252 * T15); + T1S = FNMS(KP951056516, T15, KP587785252 * T14); + T1c = FNMS(KP250000000, T1b, T1a); + T1d = T19 + T1c; + T1T = T1c - T19; + T1e = T16 + T1d; + T2a = T1T - T1S; + T1u = T1d - T16; + T1U = T1S + T1T; + } + { + E T12, T1Q, TV, T1P, TU; + T12 = FMA(KP951056516, TY, KP587785252 * T11); + T1Q = FNMS(KP951056516, T11, KP587785252 * TY); + TU = FNMS(KP250000000, Tm, Tf); + TV = TT + TU; + T1P = TU - TT; + T13 = TV - T12; + T29 = T1P + T1Q; + T1t = TV + T12; + T1R = T1P - T1Q; + } + } + { + E T2m, To, T2l, T2q, T2s, T2o, T2p, T2r, T2n; + T2m = KP1_118033988 * (Te - Tn); + To = Te + Tn; + T2l = FNMS(KP500000000, To, T5); + T2o = TO + TN; + T2p = T1b + T1a; + T2q = FNMS(KP1_902113032, T2p, KP1_175570504 * T2o); + T2s = FMA(KP1_902113032, T2o, KP1_175570504 * T2p); + R0[0] = FMA(KP2_000000000, To, T5); + T2r = T2m + T2l; + R1[WS(rs, 2)] = T2r - T2s; + R0[WS(rs, 10)] = T2r + T2s; + T2n = T2l - T2m; + R0[WS(rs, 5)] = T2n - T2q; + R1[WS(rs, 7)] = T2n + T2q; + } + { + E T2i, T2k, T25, T2c, T2d, T2e, T2j, T2f; + { + E T2g, T2h, T28, T2b; + T2g = FMA(KP684547105, T26, KP728968627 * T27); + T2h = FMA(KP998026728, T29, KP062790519 * T2a); + T2i = FNMS(KP1_902113032, T2h, KP1_175570504 * T2g); + T2k = FMA(KP1_902113032, T2g, KP1_175570504 * T2h); + T25 = T1F + T1G; + T28 = FNMS(KP684547105, T27, KP728968627 * T26); + T2b = FNMS(KP998026728, T2a, KP062790519 * T29); + T2c = T28 + T2b; + T2d = FNMS(KP500000000, T2c, T25); + T2e = KP1_118033988 * (T28 - T2b); + } + R1[WS(rs, 1)] = FMA(KP2_000000000, T2c, T25); + T2j = T2e + T2d; + R0[WS(rs, 4)] = T2j - T2k; + R1[WS(rs, 11)] = T2j + T2k; + T2f = T2d - T2e; + R1[WS(rs, 6)] = T2f - T2i; + R0[WS(rs, 9)] = T2f + T2i; + } + { + E T1m, T1o, Tv, T1g, T1h, T1i, T1n, T1j; + { + E T1k, T1l, TS, T1f; + T1k = FMA(KP248689887, TG, KP968583161 * TR); + T1l = FMA(KP481753674, T13, KP876306680 * T1e); + T1m = FNMS(KP1_902113032, T1l, KP1_175570504 * T1k); + T1o = FMA(KP1_902113032, T1k, KP1_175570504 * T1l); + Tv = Tr - Tu; + TS = FNMS(KP248689887, TR, KP968583161 * TG); + T1f = FNMS(KP481753674, T1e, KP876306680 * T13); + T1g = TS + T1f; + T1h = FNMS(KP500000000, T1g, Tv); + T1i = KP1_118033988 * (TS - T1f); + } + R1[0] = FMA(KP2_000000000, T1g, Tv); + T1n = T1i + T1h; + R0[WS(rs, 3)] = T1n - T1o; + R1[WS(rs, 10)] = T1n + T1o; + T1j = T1h - T1i; + R1[WS(rs, 5)] = T1j - T1m; + R0[WS(rs, 8)] = T1j + T1m; + } + { + E T1C, T1E, T1p, T1w, T1x, T1y, T1D, T1z; + { + E T1A, T1B, T1s, T1v; + T1A = FMA(KP844327925, T1q, KP535826794 * T1r); + T1B = FNMS(KP425779291, T1u, KP904827052 * T1t); + T1C = FNMS(KP1_902113032, T1B, KP1_175570504 * T1A); + T1E = FMA(KP1_902113032, T1A, KP1_175570504 * T1B); + T1p = Tr + Tu; + T1s = FNMS(KP844327925, T1r, KP535826794 * T1q); + T1v = FMA(KP425779291, T1t, KP904827052 * T1u); + T1w = T1s - T1v; + T1x = FNMS(KP500000000, T1w, T1p); + T1y = KP1_118033988 * (T1s + T1v); + } + R0[WS(rs, 2)] = FMA(KP2_000000000, T1w, T1p); + T1D = T1x + T1y; + R1[WS(rs, 4)] = T1D - T1E; + R0[WS(rs, 12)] = T1E + T1D; + T1z = T1x - T1y; + R0[WS(rs, 7)] = T1z - T1C; + R1[WS(rs, 9)] = T1C + T1z; + } + { + E T22, T24, T1H, T1W, T1X, T1Y, T23, T1Z; + { + E T20, T21, T1O, T1V; + T20 = FMA(KP481753674, T1K, KP876306680 * T1N); + T21 = FMA(KP844327925, T1R, KP535826794 * T1U); + T22 = FNMS(KP1_902113032, T21, KP1_175570504 * T20); + T24 = FMA(KP1_902113032, T20, KP1_175570504 * T21); + T1H = T1F - T1G; + T1O = FNMS(KP481753674, T1N, KP876306680 * T1K); + T1V = FNMS(KP844327925, T1U, KP535826794 * T1R); + T1W = T1O + T1V; + T1X = FNMS(KP500000000, T1W, T1H); + T1Y = KP1_118033988 * (T1O - T1V); + } + R0[WS(rs, 1)] = FMA(KP2_000000000, T1W, T1H); + T23 = T1Y + T1X; + R1[WS(rs, 3)] = T23 - T24; + R0[WS(rs, 11)] = T23 + T24; + T1Z = T1X - T1Y; + R0[WS(rs, 6)] = T1Z - T22; + R1[WS(rs, 8)] = T1Z + T22; + } + } + } +} + +static const kr2c_desc desc = { 25, "r2cb_25", {100, 46, 52, 0}, &GENUS }; + +void X(codelet_r2cb_25) (planner *p) { + X(kr2c_register) (p, r2cb_25, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 3 -name r2cb_3 -include r2cb.h */ + +/* + * This function contains 4 FP additions, 3 FP multiplications, + * (or, 1 additions, 0 multiplications, 3 fused multiply/add), + * 7 stack variables, 2 constants, and 6 memory accesses + */ +#include "r2cb.h" + +static void r2cb_3(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(12, rs), MAKE_VOLATILE_STRIDE(12, csr), MAKE_VOLATILE_STRIDE(12, csi)) { + E T4, T1, T2, T3; + T4 = Ci[WS(csi, 1)]; + T1 = Cr[0]; + T2 = Cr[WS(csr, 1)]; + R0[0] = FMA(KP2_000000000, T2, T1); + T3 = T1 - T2; + R1[0] = FNMS(KP1_732050807, T4, T3); + R0[WS(rs, 1)] = FMA(KP1_732050807, T4, T3); + } + } +} + +static const kr2c_desc desc = { 3, "r2cb_3", {1, 0, 3, 0}, &GENUS }; + +void X(codelet_r2cb_3) (planner *p) { + X(kr2c_register) (p, r2cb_3, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 3 -name r2cb_3 -include r2cb.h */ + +/* + * This function contains 4 FP additions, 2 FP multiplications, + * (or, 3 additions, 1 multiplications, 1 fused multiply/add), + * 8 stack variables, 2 constants, and 6 memory accesses + */ +#include "r2cb.h" + +static void r2cb_3(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(12, rs), MAKE_VOLATILE_STRIDE(12, csr), MAKE_VOLATILE_STRIDE(12, csi)) { + E T5, T1, T2, T3, T4; + T4 = Ci[WS(csi, 1)]; + T5 = KP1_732050807 * T4; + T1 = Cr[0]; + T2 = Cr[WS(csr, 1)]; + T3 = T1 - T2; + R0[0] = FMA(KP2_000000000, T2, T1); + R0[WS(rs, 1)] = T3 + T5; + R1[0] = T3 - T5; + } + } +} + +static const kr2c_desc desc = { 3, "r2cb_3", {3, 1, 1, 0}, &GENUS }; + +void X(codelet_r2cb_3) (planner *p) { + X(kr2c_register) (p, r2cb_3, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,624 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -name r2cb_32 -include r2cb.h */ + +/* + * This function contains 156 FP additions, 84 FP multiplications, + * (or, 72 additions, 0 multiplications, 84 fused multiply/add), + * 82 stack variables, 9 constants, and 64 memory accesses + */ +#include "r2cb.h" + +static void r2cb_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) { + E T1F, T1C, T1H, T1z, T1G, T1I; + { + E T8, T1t, Tz, T1R, T5, T1S, T1u, TE, T1w, TP, T1U, Tg, T2m, T1X, T1x; + E TK, T1D, T1d, T20, To, T2p, T28, T1A, TW, T11, T1e, Tv, T25, T23, T2q; + E T16, T1f, TA, TD; + { + E T4, Ty, T1, T2, T6, T7; + T4 = Cr[WS(csr, 8)]; + Ty = Ci[WS(csi, 8)]; + T1 = Cr[0]; + T2 = Cr[WS(csr, 16)]; + T6 = Cr[WS(csr, 4)]; + T7 = Cr[WS(csr, 12)]; + { + E TB, Tx, T3, TC; + TB = Ci[WS(csi, 4)]; + Tx = T1 - T2; + T3 = T1 + T2; + TA = T6 - T7; + T8 = T6 + T7; + TC = Ci[WS(csi, 12)]; + T1t = FMA(KP2_000000000, Ty, Tx); + Tz = FNMS(KP2_000000000, Ty, Tx); + T1R = FNMS(KP2_000000000, T4, T3); + T5 = FMA(KP2_000000000, T4, T3); + TD = TB + TC; + T1S = TB - TC; + } + } + { + E Td, TG, Tc, T1V, TO, Te, TH, TI; + { + E Ta, Tb, TM, TN; + Ta = Cr[WS(csr, 2)]; + T1u = TA + TD; + TE = TA - TD; + Tb = Cr[WS(csr, 14)]; + TM = Ci[WS(csi, 2)]; + TN = Ci[WS(csi, 14)]; + Td = Cr[WS(csr, 10)]; + TG = Ta - Tb; + Tc = Ta + Tb; + T1V = TM - TN; + TO = TM + TN; + Te = Cr[WS(csr, 6)]; + TH = Ci[WS(csi, 10)]; + TI = Ci[WS(csi, 6)]; + } + { + E Tl, TS, Tk, T26, T1c, Tm, TT, TU; + { + E Ti, Tj, T1a, T1b; + Ti = Cr[WS(csr, 1)]; + { + E TL, Tf, T1W, TJ; + TL = Td - Te; + Tf = Td + Te; + T1W = TH - TI; + TJ = TH + TI; + T1w = TO - TL; + TP = TL + TO; + T1U = Tc - Tf; + Tg = Tc + Tf; + T2m = T1W + T1V; + T1X = T1V - T1W; + T1x = TG + TJ; + TK = TG - TJ; + Tj = Cr[WS(csr, 15)]; + } + T1a = Ci[WS(csi, 1)]; + T1b = Ci[WS(csi, 15)]; + Tl = Cr[WS(csr, 9)]; + TS = Ti - Tj; + Tk = Ti + Tj; + T26 = T1a - T1b; + T1c = T1a + T1b; + Tm = Cr[WS(csr, 7)]; + TT = Ci[WS(csi, 9)]; + TU = Ci[WS(csi, 7)]; + } + { + E Ts, TX, Tr, T22, T10, Tt, T13, T14; + { + E Tp, Tq, TY, TZ; + Tp = Cr[WS(csr, 5)]; + { + E T19, Tn, T27, TV; + T19 = Tl - Tm; + Tn = Tl + Tm; + T27 = TT - TU; + TV = TT + TU; + T1D = T1c - T19; + T1d = T19 + T1c; + T20 = Tk - Tn; + To = Tk + Tn; + T2p = T27 + T26; + T28 = T26 - T27; + T1A = TS + TV; + TW = TS - TV; + Tq = Cr[WS(csr, 11)]; + } + TY = Ci[WS(csi, 5)]; + TZ = Ci[WS(csi, 11)]; + Ts = Cr[WS(csr, 3)]; + TX = Tp - Tq; + Tr = Tp + Tq; + T22 = TY - TZ; + T10 = TY + TZ; + Tt = Cr[WS(csr, 13)]; + T13 = Ci[WS(csi, 3)]; + T14 = Ci[WS(csi, 13)]; + } + { + E T12, Tu, T21, T15; + T11 = TX - T10; + T1e = TX + T10; + T12 = Ts - Tt; + Tu = Ts + Tt; + T21 = T14 - T13; + T15 = T13 + T14; + Tv = Tr + Tu; + T25 = Tr - Tu; + T23 = T21 - T22; + T2q = T22 + T21; + T16 = T12 - T15; + T1f = T12 + T15; + } + } + } + } + { + E T1B, T1E, T1l, T1m, T1p, T1o, T1T, T1Y, T29, T2g, T2j, T2f, T2h, T24; + { + E T1g, T17, T2n, T2t, T2u, T2s; + { + E T2o, Tw, T2w, T2r, T2l, T9, Th, T2v; + T2o = To - Tv; + Tw = To + Tv; + T2w = T2q + T2p; + T2r = T2p - T2q; + T1g = T1e - T1f; + T1B = T1e + T1f; + T17 = T11 + T16; + T1E = T16 - T11; + T2l = FNMS(KP2_000000000, T8, T5); + T9 = FMA(KP2_000000000, T8, T5); + Th = FMA(KP2_000000000, Tg, T9); + T2v = FNMS(KP2_000000000, Tg, T9); + T2n = FNMS(KP2_000000000, T2m, T2l); + T2t = FMA(KP2_000000000, T2m, T2l); + R0[WS(rs, 4)] = FNMS(KP2_000000000, T2w, T2v); + R0[WS(rs, 12)] = FMA(KP2_000000000, T2w, T2v); + R0[0] = FMA(KP2_000000000, Tw, Th); + R0[WS(rs, 8)] = FNMS(KP2_000000000, Tw, Th); + T2u = T2o + T2r; + T2s = T2o - T2r; + } + { + E T1j, TR, T18, T1h, TF, TQ; + T1l = FNMS(KP1_414213562, TE, Tz); + TF = FMA(KP1_414213562, TE, Tz); + TQ = FNMS(KP414213562, TP, TK); + T1m = FMA(KP414213562, TK, TP); + R0[WS(rs, 2)] = FMA(KP1_414213562, T2s, T2n); + R0[WS(rs, 10)] = FNMS(KP1_414213562, T2s, T2n); + R0[WS(rs, 6)] = FNMS(KP1_414213562, T2u, T2t); + R0[WS(rs, 14)] = FMA(KP1_414213562, T2u, T2t); + T1j = FNMS(KP1_847759065, TQ, TF); + TR = FMA(KP1_847759065, TQ, TF); + T1p = FNMS(KP707106781, T17, TW); + T18 = FMA(KP707106781, T17, TW); + T1h = FMA(KP707106781, T1g, T1d); + T1o = FNMS(KP707106781, T1g, T1d); + { + E T2d, T2e, T1k, T1i; + T1T = FNMS(KP2_000000000, T1S, T1R); + T2d = FMA(KP2_000000000, T1S, T1R); + T2e = T1U + T1X; + T1Y = T1U - T1X; + T29 = T25 + T28; + T2g = T28 - T25; + T1k = FMA(KP198912367, T18, T1h); + T1i = FNMS(KP198912367, T1h, T18); + T2j = FMA(KP1_414213562, T2e, T2d); + T2f = FNMS(KP1_414213562, T2e, T2d); + R1[WS(rs, 4)] = FNMS(KP1_961570560, T1k, T1j); + R1[WS(rs, 12)] = FMA(KP1_961570560, T1k, T1j); + R1[0] = FMA(KP1_961570560, T1i, TR); + R1[WS(rs, 8)] = FNMS(KP1_961570560, T1i, TR); + T2h = T20 - T23; + T24 = T20 + T23; + } + } + } + { + E T1v, T1y, T1M, T1P, T1L, T1N; + { + E T1r, T1n, T2k, T2i; + T2k = FMA(KP414213562, T2g, T2h); + T2i = FNMS(KP414213562, T2h, T2g); + T1r = FMA(KP1_847759065, T1m, T1l); + T1n = FNMS(KP1_847759065, T1m, T1l); + R0[WS(rs, 7)] = FNMS(KP1_847759065, T2k, T2j); + R0[WS(rs, 15)] = FMA(KP1_847759065, T2k, T2j); + R0[WS(rs, 11)] = FMA(KP1_847759065, T2i, T2f); + R0[WS(rs, 3)] = FNMS(KP1_847759065, T2i, T2f); + { + E T1J, T1K, T1s, T1q; + T1v = FNMS(KP1_414213562, T1u, T1t); + T1J = FMA(KP1_414213562, T1u, T1t); + T1K = FMA(KP414213562, T1w, T1x); + T1y = FNMS(KP414213562, T1x, T1w); + T1F = FNMS(KP707106781, T1E, T1D); + T1M = FMA(KP707106781, T1E, T1D); + T1s = FMA(KP668178637, T1o, T1p); + T1q = FNMS(KP668178637, T1p, T1o); + T1P = FMA(KP1_847759065, T1K, T1J); + T1L = FNMS(KP1_847759065, T1K, T1J); + R1[WS(rs, 6)] = FNMS(KP1_662939224, T1s, T1r); + R1[WS(rs, 14)] = FMA(KP1_662939224, T1s, T1r); + R1[WS(rs, 10)] = FMA(KP1_662939224, T1q, T1n); + R1[WS(rs, 2)] = FNMS(KP1_662939224, T1q, T1n); + T1N = FMA(KP707106781, T1B, T1A); + T1C = FNMS(KP707106781, T1B, T1A); + } + } + { + E T2b, T1Z, T1Q, T1O, T2c, T2a; + T1Q = FMA(KP198912367, T1M, T1N); + T1O = FNMS(KP198912367, T1N, T1M); + T2b = FNMS(KP1_414213562, T1Y, T1T); + T1Z = FMA(KP1_414213562, T1Y, T1T); + R1[WS(rs, 7)] = FNMS(KP1_961570560, T1Q, T1P); + R1[WS(rs, 15)] = FMA(KP1_961570560, T1Q, T1P); + R1[WS(rs, 11)] = FMA(KP1_961570560, T1O, T1L); + R1[WS(rs, 3)] = FNMS(KP1_961570560, T1O, T1L); + T2c = FMA(KP414213562, T24, T29); + T2a = FNMS(KP414213562, T29, T24); + T1H = FMA(KP1_847759065, T1y, T1v); + T1z = FNMS(KP1_847759065, T1y, T1v); + R0[WS(rs, 5)] = FNMS(KP1_847759065, T2c, T2b); + R0[WS(rs, 13)] = FMA(KP1_847759065, T2c, T2b); + R0[WS(rs, 1)] = FMA(KP1_847759065, T2a, T1Z); + R0[WS(rs, 9)] = FNMS(KP1_847759065, T2a, T1Z); + } + } + } + } + T1G = FNMS(KP668178637, T1F, T1C); + T1I = FMA(KP668178637, T1C, T1F); + R1[WS(rs, 5)] = FNMS(KP1_662939224, T1I, T1H); + R1[WS(rs, 13)] = FMA(KP1_662939224, T1I, T1H); + R1[WS(rs, 1)] = FMA(KP1_662939224, T1G, T1z); + R1[WS(rs, 9)] = FNMS(KP1_662939224, T1G, T1z); + } + } +} + +static const kr2c_desc desc = { 32, "r2cb_32", {72, 0, 84, 0}, &GENUS }; + +void X(codelet_r2cb_32) (planner *p) { + X(kr2c_register) (p, r2cb_32, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -name r2cb_32 -include r2cb.h */ + +/* + * This function contains 156 FP additions, 50 FP multiplications, + * (or, 140 additions, 34 multiplications, 16 fused multiply/add), + * 54 stack variables, 9 constants, and 64 memory accesses + */ +#include "r2cb.h" + +static void r2cb_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); + DK(KP1_111140466, +1.111140466039204449485661627897065748749874382); + DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); + DK(KP390180644, +0.390180644032256535696569736954044481855383236); + DK(KP765366864, +0.765366864730179543456919968060797733522689125); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) { + E T9, T2c, TB, T1y, T6, T2b, Ty, T1v, Th, T2e, T2f, TD, TK, T1C, T1F; + E T1h, Tp, T2i, T2m, TN, T13, T1K, T1Y, T1k, Tw, TU, T1l, TW, T1V, T2j; + E T1R, T2l; + { + E T7, T8, T1w, Tz, TA, T1x; + T7 = Cr[WS(csr, 4)]; + T8 = Cr[WS(csr, 12)]; + T1w = T7 - T8; + Tz = Ci[WS(csi, 4)]; + TA = Ci[WS(csi, 12)]; + T1x = Tz + TA; + T9 = KP2_000000000 * (T7 + T8); + T2c = KP1_414213562 * (T1w + T1x); + TB = KP2_000000000 * (Tz - TA); + T1y = KP1_414213562 * (T1w - T1x); + } + { + E T5, T1u, T3, T1s; + { + E T4, T1t, T1, T2; + T4 = Cr[WS(csr, 8)]; + T5 = KP2_000000000 * T4; + T1t = Ci[WS(csi, 8)]; + T1u = KP2_000000000 * T1t; + T1 = Cr[0]; + T2 = Cr[WS(csr, 16)]; + T3 = T1 + T2; + T1s = T1 - T2; + } + T6 = T3 + T5; + T2b = T1s + T1u; + Ty = T3 - T5; + T1v = T1s - T1u; + } + { + E Td, T1A, TG, T1E, Tg, T1D, TJ, T1B; + { + E Tb, Tc, TE, TF; + Tb = Cr[WS(csr, 2)]; + Tc = Cr[WS(csr, 14)]; + Td = Tb + Tc; + T1A = Tb - Tc; + TE = Ci[WS(csi, 2)]; + TF = Ci[WS(csi, 14)]; + TG = TE - TF; + T1E = TE + TF; + } + { + E Te, Tf, TH, TI; + Te = Cr[WS(csr, 10)]; + Tf = Cr[WS(csr, 6)]; + Tg = Te + Tf; + T1D = Te - Tf; + TH = Ci[WS(csi, 10)]; + TI = Ci[WS(csi, 6)]; + TJ = TH - TI; + T1B = TH + TI; + } + Th = KP2_000000000 * (Td + Tg); + T2e = T1A + T1B; + T2f = T1E - T1D; + TD = Td - Tg; + TK = TG - TJ; + T1C = T1A - T1B; + T1F = T1D + T1E; + T1h = KP2_000000000 * (TJ + TG); + } + { + E Tl, T1I, TZ, T1X, To, T1W, T12, T1J; + { + E Tj, Tk, TX, TY; + Tj = Cr[WS(csr, 1)]; + Tk = Cr[WS(csr, 15)]; + Tl = Tj + Tk; + T1I = Tj - Tk; + TX = Ci[WS(csi, 1)]; + TY = Ci[WS(csi, 15)]; + TZ = TX - TY; + T1X = TX + TY; + } + { + E Tm, Tn, T10, T11; + Tm = Cr[WS(csr, 9)]; + Tn = Cr[WS(csr, 7)]; + To = Tm + Tn; + T1W = Tm - Tn; + T10 = Ci[WS(csi, 9)]; + T11 = Ci[WS(csi, 7)]; + T12 = T10 - T11; + T1J = T10 + T11; + } + Tp = Tl + To; + T2i = T1I + T1J; + T2m = T1X - T1W; + TN = Tl - To; + T13 = TZ - T12; + T1K = T1I - T1J; + T1Y = T1W + T1X; + T1k = T12 + TZ; + } + { + E Ts, T1L, TT, T1M, Tv, T1O, TQ, T1P; + { + E Tq, Tr, TR, TS; + Tq = Cr[WS(csr, 5)]; + Tr = Cr[WS(csr, 11)]; + Ts = Tq + Tr; + T1L = Tq - Tr; + TR = Ci[WS(csi, 5)]; + TS = Ci[WS(csi, 11)]; + TT = TR - TS; + T1M = TR + TS; + } + { + E Tt, Tu, TO, TP; + Tt = Cr[WS(csr, 3)]; + Tu = Cr[WS(csr, 13)]; + Tv = Tt + Tu; + T1O = Tt - Tu; + TO = Ci[WS(csi, 13)]; + TP = Ci[WS(csi, 3)]; + TQ = TO - TP; + T1P = TP + TO; + } + Tw = Ts + Tv; + TU = TQ - TT; + T1l = TT + TQ; + TW = Ts - Tv; + { + E T1T, T1U, T1N, T1Q; + T1T = T1L + T1M; + T1U = T1O + T1P; + T1V = KP707106781 * (T1T - T1U); + T2j = KP707106781 * (T1T + T1U); + T1N = T1L - T1M; + T1Q = T1O - T1P; + T1R = KP707106781 * (T1N + T1Q); + T2l = KP707106781 * (T1N - T1Q); + } + } + { + E Tx, T1r, Ti, T1q, Ta; + Tx = KP2_000000000 * (Tp + Tw); + T1r = KP2_000000000 * (T1l + T1k); + Ta = T6 + T9; + Ti = Ta + Th; + T1q = Ta - Th; + R0[WS(rs, 8)] = Ti - Tx; + R0[WS(rs, 12)] = T1q + T1r; + R0[0] = Ti + Tx; + R0[WS(rs, 4)] = T1q - T1r; + } + { + E T1i, T1o, T1n, T1p, T1g, T1j, T1m; + T1g = T6 - T9; + T1i = T1g - T1h; + T1o = T1g + T1h; + T1j = Tp - Tw; + T1m = T1k - T1l; + T1n = KP1_414213562 * (T1j - T1m); + T1p = KP1_414213562 * (T1j + T1m); + R0[WS(rs, 10)] = T1i - T1n; + R0[WS(rs, 14)] = T1o + T1p; + R0[WS(rs, 2)] = T1i + T1n; + R0[WS(rs, 6)] = T1o - T1p; + } + { + E TM, T16, T15, T17; + { + E TC, TL, TV, T14; + TC = Ty - TB; + TL = KP1_414213562 * (TD - TK); + TM = TC + TL; + T16 = TC - TL; + TV = TN + TU; + T14 = TW + T13; + T15 = FNMS(KP765366864, T14, KP1_847759065 * TV); + T17 = FMA(KP765366864, TV, KP1_847759065 * T14); + } + R0[WS(rs, 9)] = TM - T15; + R0[WS(rs, 13)] = T16 + T17; + R0[WS(rs, 1)] = TM + T15; + R0[WS(rs, 5)] = T16 - T17; + } + { + E T2t, T2x, T2w, T2y; + { + E T2r, T2s, T2u, T2v; + T2r = T2b + T2c; + T2s = FMA(KP1_847759065, T2e, KP765366864 * T2f); + T2t = T2r - T2s; + T2x = T2r + T2s; + T2u = T2i + T2j; + T2v = T2m - T2l; + T2w = FNMS(KP1_961570560, T2v, KP390180644 * T2u); + T2y = FMA(KP1_961570560, T2u, KP390180644 * T2v); + } + R1[WS(rs, 11)] = T2t - T2w; + R1[WS(rs, 15)] = T2x + T2y; + R1[WS(rs, 3)] = T2t + T2w; + R1[WS(rs, 7)] = T2x - T2y; + } + { + E T1a, T1e, T1d, T1f; + { + E T18, T19, T1b, T1c; + T18 = Ty + TB; + T19 = KP1_414213562 * (TD + TK); + T1a = T18 - T19; + T1e = T18 + T19; + T1b = TN - TU; + T1c = T13 - TW; + T1d = FNMS(KP1_847759065, T1c, KP765366864 * T1b); + T1f = FMA(KP1_847759065, T1b, KP765366864 * T1c); + } + R0[WS(rs, 11)] = T1a - T1d; + R0[WS(rs, 15)] = T1e + T1f; + R0[WS(rs, 3)] = T1a + T1d; + R0[WS(rs, 7)] = T1e - T1f; + } + { + E T25, T29, T28, T2a; + { + E T23, T24, T26, T27; + T23 = T1v - T1y; + T24 = FMA(KP765366864, T1C, KP1_847759065 * T1F); + T25 = T23 - T24; + T29 = T23 + T24; + T26 = T1K - T1R; + T27 = T1Y - T1V; + T28 = FNMS(KP1_662939224, T27, KP1_111140466 * T26); + T2a = FMA(KP1_662939224, T26, KP1_111140466 * T27); + } + R1[WS(rs, 10)] = T25 - T28; + R1[WS(rs, 14)] = T29 + T2a; + R1[WS(rs, 2)] = T25 + T28; + R1[WS(rs, 6)] = T29 - T2a; + } + { + E T2h, T2p, T2o, T2q; + { + E T2d, T2g, T2k, T2n; + T2d = T2b - T2c; + T2g = FNMS(KP1_847759065, T2f, KP765366864 * T2e); + T2h = T2d + T2g; + T2p = T2d - T2g; + T2k = T2i - T2j; + T2n = T2l + T2m; + T2o = FNMS(KP1_111140466, T2n, KP1_662939224 * T2k); + T2q = FMA(KP1_111140466, T2k, KP1_662939224 * T2n); + } + R1[WS(rs, 9)] = T2h - T2o; + R1[WS(rs, 13)] = T2p + T2q; + R1[WS(rs, 1)] = T2h + T2o; + R1[WS(rs, 5)] = T2p - T2q; + } + { + E T1H, T21, T20, T22; + { + E T1z, T1G, T1S, T1Z; + T1z = T1v + T1y; + T1G = FNMS(KP765366864, T1F, KP1_847759065 * T1C); + T1H = T1z + T1G; + T21 = T1z - T1G; + T1S = T1K + T1R; + T1Z = T1V + T1Y; + T20 = FNMS(KP390180644, T1Z, KP1_961570560 * T1S); + T22 = FMA(KP390180644, T1S, KP1_961570560 * T1Z); + } + R1[WS(rs, 8)] = T1H - T20; + R1[WS(rs, 12)] = T21 + T22; + R1[0] = T1H + T20; + R1[WS(rs, 4)] = T21 - T22; + } + } + } +} + +static const kr2c_desc desc = { 32, "r2cb_32", {140, 34, 16, 0}, &GENUS }; + +void X(codelet_r2cb_32) (planner *p) { + X(kr2c_register) (p, r2cb_32, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 4 -name r2cb_4 -include r2cb.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 2 additions, 0 multiplications, 4 fused multiply/add), + * 8 stack variables, 1 constants, and 8 memory accesses + */ +#include "r2cb.h" + +static void r2cb_4(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(16, csr), MAKE_VOLATILE_STRIDE(16, csi)) { + E T4, T6, T1, T2, T3, T5; + T4 = Cr[WS(csr, 1)]; + T6 = Ci[WS(csi, 1)]; + T1 = Cr[0]; + T2 = Cr[WS(csr, 2)]; + T3 = T1 + T2; + T5 = T1 - T2; + R1[0] = FNMS(KP2_000000000, T6, T5); + R1[WS(rs, 1)] = FMA(KP2_000000000, T6, T5); + R0[0] = FMA(KP2_000000000, T4, T3); + R0[WS(rs, 1)] = FNMS(KP2_000000000, T4, T3); + } + } +} + +static const kr2c_desc desc = { 4, "r2cb_4", {2, 0, 4, 0}, &GENUS }; + +void X(codelet_r2cb_4) (planner *p) { + X(kr2c_register) (p, r2cb_4, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 4 -name r2cb_4 -include r2cb.h */ + +/* + * This function contains 6 FP additions, 2 FP multiplications, + * (or, 6 additions, 2 multiplications, 0 fused multiply/add), + * 10 stack variables, 1 constants, and 8 memory accesses + */ +#include "r2cb.h" + +static void r2cb_4(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(16, csr), MAKE_VOLATILE_STRIDE(16, csi)) { + E T5, T8, T3, T6; + { + E T4, T7, T1, T2; + T4 = Cr[WS(csr, 1)]; + T5 = KP2_000000000 * T4; + T7 = Ci[WS(csi, 1)]; + T8 = KP2_000000000 * T7; + T1 = Cr[0]; + T2 = Cr[WS(csr, 2)]; + T3 = T1 + T2; + T6 = T1 - T2; + } + R0[WS(rs, 1)] = T3 - T5; + R1[WS(rs, 1)] = T6 + T8; + R0[0] = T3 + T5; + R1[0] = T6 - T8; + } + } +} + +static const kr2c_desc desc = { 4, "r2cb_4", {6, 2, 0, 0}, &GENUS }; + +void X(codelet_r2cb_4) (planner *p) { + X(kr2c_register) (p, r2cb_4, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 5 -name r2cb_5 -include r2cb.h */ + +/* + * This function contains 12 FP additions, 10 FP multiplications, + * (or, 2 additions, 0 multiplications, 10 fused multiply/add), + * 18 stack variables, 5 constants, and 10 memory accesses + */ +#include "r2cb.h" + +static void r2cb_5(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); + DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(20, rs), MAKE_VOLATILE_STRIDE(20, csr), MAKE_VOLATILE_STRIDE(20, csi)) { + E T1, T2, T3, Tc, Ta, T8, T9; + T8 = Ci[WS(csi, 1)]; + T9 = Ci[WS(csi, 2)]; + T1 = Cr[0]; + T2 = Cr[WS(csr, 1)]; + T3 = Cr[WS(csr, 2)]; + Tc = FMS(KP618033988, T8, T9); + Ta = FMA(KP618033988, T9, T8); + { + E T6, T4, T5, T7, Tb; + T6 = T2 - T3; + T4 = T2 + T3; + R0[0] = FMA(KP2_000000000, T4, T1); + T5 = FNMS(KP500000000, T4, T1); + T7 = FMA(KP1_118033988, T6, T5); + Tb = FNMS(KP1_118033988, T6, T5); + R0[WS(rs, 2)] = FMA(KP1_902113032, Ta, T7); + R1[0] = FNMS(KP1_902113032, Ta, T7); + R1[WS(rs, 1)] = FMA(KP1_902113032, Tc, Tb); + R0[WS(rs, 1)] = FNMS(KP1_902113032, Tc, Tb); + } + } + } +} + +static const kr2c_desc desc = { 5, "r2cb_5", {2, 0, 10, 0}, &GENUS }; + +void X(codelet_r2cb_5) (planner *p) { + X(kr2c_register) (p, r2cb_5, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 5 -name r2cb_5 -include r2cb.h */ + +/* + * This function contains 12 FP additions, 7 FP multiplications, + * (or, 8 additions, 3 multiplications, 4 fused multiply/add), + * 18 stack variables, 5 constants, and 10 memory accesses + */ +#include "r2cb.h" + +static void r2cb_5(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); + DK(KP1_175570504, +1.175570504584946258337411909278145537195304875); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(20, rs), MAKE_VOLATILE_STRIDE(20, csr), MAKE_VOLATILE_STRIDE(20, csi)) { + E Ta, Tc, T1, T4, T5, T6, Tb, T7; + { + E T8, T9, T2, T3; + T8 = Ci[WS(csi, 1)]; + T9 = Ci[WS(csi, 2)]; + Ta = FNMS(KP1_902113032, T9, KP1_175570504 * T8); + Tc = FMA(KP1_902113032, T8, KP1_175570504 * T9); + T1 = Cr[0]; + T2 = Cr[WS(csr, 1)]; + T3 = Cr[WS(csr, 2)]; + T4 = T2 + T3; + T5 = FNMS(KP500000000, T4, T1); + T6 = KP1_118033988 * (T2 - T3); + } + R0[0] = FMA(KP2_000000000, T4, T1); + Tb = T6 + T5; + R1[0] = Tb - Tc; + R0[WS(rs, 2)] = Tb + Tc; + T7 = T5 - T6; + R0[WS(rs, 1)] = T7 - Ta; + R1[WS(rs, 1)] = T7 + Ta; + } + } +} + +static const kr2c_desc desc = { 5, "r2cb_5", {8, 3, 4, 0}, &GENUS }; + +void X(codelet_r2cb_5) (planner *p) { + X(kr2c_register) (p, r2cb_5, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 6 -name r2cb_6 -include r2cb.h */ + +/* + * This function contains 14 FP additions, 6 FP multiplications, + * (or, 8 additions, 0 multiplications, 6 fused multiply/add), + * 13 stack variables, 2 constants, and 12 memory accesses + */ +#include "r2cb.h" + +static void r2cb_6(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(24, rs), MAKE_VOLATILE_STRIDE(24, csr), MAKE_VOLATILE_STRIDE(24, csi)) { + E T4, T7, T3, Te, Tc, T5; + { + E T1, T2, Ta, Tb; + T1 = Cr[0]; + T2 = Cr[WS(csr, 3)]; + Ta = Ci[WS(csi, 2)]; + Tb = Ci[WS(csi, 1)]; + T4 = Cr[WS(csr, 2)]; + T7 = T1 - T2; + T3 = T1 + T2; + Te = Ta + Tb; + Tc = Ta - Tb; + T5 = Cr[WS(csr, 1)]; + } + { + E T6, T8, Td, T9; + T6 = T4 + T5; + T8 = T5 - T4; + Td = T7 + T8; + R1[WS(rs, 1)] = FNMS(KP2_000000000, T8, T7); + T9 = T3 - T6; + R0[0] = FMA(KP2_000000000, T6, T3); + R1[WS(rs, 2)] = FMA(KP1_732050807, Te, Td); + R1[0] = FNMS(KP1_732050807, Te, Td); + R0[WS(rs, 1)] = FMA(KP1_732050807, Tc, T9); + R0[WS(rs, 2)] = FNMS(KP1_732050807, Tc, T9); + } + } + } +} + +static const kr2c_desc desc = { 6, "r2cb_6", {8, 0, 6, 0}, &GENUS }; + +void X(codelet_r2cb_6) (planner *p) { + X(kr2c_register) (p, r2cb_6, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 6 -name r2cb_6 -include r2cb.h */ + +/* + * This function contains 14 FP additions, 4 FP multiplications, + * (or, 12 additions, 2 multiplications, 2 fused multiply/add), + * 17 stack variables, 2 constants, and 12 memory accesses + */ +#include "r2cb.h" + +static void r2cb_6(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(24, rs), MAKE_VOLATILE_STRIDE(24, csr), MAKE_VOLATILE_STRIDE(24, csi)) { + E T3, T7, Tc, Te, T6, T8, T1, T2, T9, Td; + T1 = Cr[0]; + T2 = Cr[WS(csr, 3)]; + T3 = T1 - T2; + T7 = T1 + T2; + { + E Ta, Tb, T4, T5; + Ta = Ci[WS(csi, 2)]; + Tb = Ci[WS(csi, 1)]; + Tc = KP1_732050807 * (Ta - Tb); + Te = KP1_732050807 * (Ta + Tb); + T4 = Cr[WS(csr, 2)]; + T5 = Cr[WS(csr, 1)]; + T6 = T4 - T5; + T8 = T4 + T5; + } + R1[WS(rs, 1)] = FMA(KP2_000000000, T6, T3); + R0[0] = FMA(KP2_000000000, T8, T7); + T9 = T7 - T8; + R0[WS(rs, 2)] = T9 - Tc; + R0[WS(rs, 1)] = T9 + Tc; + Td = T3 - T6; + R1[0] = Td - Te; + R1[WS(rs, 2)] = Td + Te; + } + } +} + +static const kr2c_desc desc = { 6, "r2cb_6", {12, 2, 2, 0}, &GENUS }; + +void X(codelet_r2cb_6) (planner *p) { + X(kr2c_register) (p, r2cb_6, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1392 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -name r2cb_64 -include r2cb.h */ + +/* + * This function contains 394 FP additions, 216 FP multiplications, + * (or, 178 additions, 0 multiplications, 216 fused multiply/add), + * 143 stack variables, 18 constants, and 128 memory accesses + */ +#include "r2cb.h" + +static void r2cb_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_990369453, +1.990369453344393772489673906218959843150949737); + DK(KP1_546020906, +1.546020906725473921621813219516939601942082586); + DK(KP098491403, +0.098491403357164253077197521291327432293052451); + DK(KP820678790, +0.820678790828660330972281985331011598767386482); + DK(KP1_913880671, +1.913880671464417729871595773960539938965698411); + DK(KP1_763842528, +1.763842528696710059425513727320776699016885241); + DK(KP303346683, +0.303346683607342391675883946941299872384187453); + DK(KP534511135, +0.534511135950791641089685961295362908582039528); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); + DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) { + E T3d, T32, T37, T2Z, T3f, T3b, T3c, T35; + { + E T5H, T9, T5j, T4p, T2T, T1b, T3Z, T3j, Tg, T5I, T5k, T4u, T40, T3m, T2U; + E T1m, T3o, T1s, T1J, T3r, T5K, Tw, T5N, T6c, T4A, T5n, T3s, T1D, T5m, T4F; + E T3p, T1M, T3w, T1U, T2z, T3H, T5Q, TM, T6f, T5Y, T5q, T4M, T3I, T25, T5t; + E T53, T3x, T2C, T3A, T5V, T11, T6g, T5T, T55, T4W, T3z, T2E, T2h, T2F, T2s; + E T3L, T3E, T54, T4R; + { + E Td, T1c, Tc, T4r, T1k, Te, T1d, T1e; + { + E T3h, T15, T1a, T3i; + { + E T4, T14, T17, T13, T3, T16, T8, T18; + T4 = Cr[WS(csr, 16)]; + T14 = Ci[WS(csi, 16)]; + { + E T1, T2, T6, T7; + T1 = Cr[0]; + T2 = Cr[WS(csr, 32)]; + T6 = Cr[WS(csr, 8)]; + T7 = Cr[WS(csr, 24)]; + T17 = Ci[WS(csi, 8)]; + T13 = T1 - T2; + T3 = T1 + T2; + T16 = T6 - T7; + T8 = T6 + T7; + T18 = Ci[WS(csi, 24)]; + } + { + E T4n, T5, T4o, T19; + T4n = FNMS(KP2_000000000, T4, T3); + T5 = FMA(KP2_000000000, T4, T3); + T3h = FMA(KP2_000000000, T14, T13); + T15 = FNMS(KP2_000000000, T14, T13); + T4o = T17 - T18; + T19 = T17 + T18; + T5H = FNMS(KP2_000000000, T8, T5); + T9 = FMA(KP2_000000000, T8, T5); + T5j = FMA(KP2_000000000, T4o, T4n); + T4p = FNMS(KP2_000000000, T4o, T4n); + T1a = T16 - T19; + T3i = T16 + T19; + } + } + { + E Ta, Tb, T1i, T1j; + Ta = Cr[WS(csr, 4)]; + T2T = FNMS(KP1_414213562, T1a, T15); + T1b = FMA(KP1_414213562, T1a, T15); + T3Z = FMA(KP1_414213562, T3i, T3h); + T3j = FNMS(KP1_414213562, T3i, T3h); + Tb = Cr[WS(csr, 28)]; + T1i = Ci[WS(csi, 4)]; + T1j = Ci[WS(csi, 28)]; + Td = Cr[WS(csr, 20)]; + T1c = Ta - Tb; + Tc = Ta + Tb; + T4r = T1i - T1j; + T1k = T1i + T1j; + Te = Cr[WS(csr, 12)]; + T1d = Ci[WS(csi, 20)]; + T1e = Ci[WS(csi, 12)]; + } + } + { + E T4B, T4E, T1K, T1L; + { + E T1o, Tk, T4C, T1I, T1F, Tn, T4D, T1r, Ts, T1t, Tr, T4y, T1w, Tt, T1z; + E T1A; + { + E Tl, Tm, T1p, T1q; + { + E Ti, Tj, T1G, T1H, T1h, Tf; + Ti = Cr[WS(csr, 2)]; + T1h = Td - Te; + Tf = Td + Te; + { + E T4s, T1f, T3k, T1l; + T4s = T1d - T1e; + T1f = T1d + T1e; + T3k = T1k - T1h; + T1l = T1h + T1k; + { + E T4q, T4t, T3l, T1g; + T4q = Tc - Tf; + Tg = Tc + Tf; + T4t = T4r - T4s; + T5I = T4s + T4r; + T3l = T1c + T1f; + T1g = T1c - T1f; + T5k = T4q + T4t; + T4u = T4q - T4t; + T40 = FMA(KP414213562, T3k, T3l); + T3m = FNMS(KP414213562, T3l, T3k); + T2U = FMA(KP414213562, T1g, T1l); + T1m = FNMS(KP414213562, T1l, T1g); + Tj = Cr[WS(csr, 30)]; + } + } + T1G = Ci[WS(csi, 2)]; + T1H = Ci[WS(csi, 30)]; + Tl = Cr[WS(csr, 18)]; + T1o = Ti - Tj; + Tk = Ti + Tj; + T4C = T1G - T1H; + T1I = T1G + T1H; + Tm = Cr[WS(csr, 14)]; + T1p = Ci[WS(csi, 18)]; + T1q = Ci[WS(csi, 14)]; + } + { + E Tp, Tq, T1u, T1v; + Tp = Cr[WS(csr, 10)]; + T1F = Tl - Tm; + Tn = Tl + Tm; + T4D = T1p - T1q; + T1r = T1p + T1q; + Tq = Cr[WS(csr, 22)]; + T1u = Ci[WS(csi, 10)]; + T1v = Ci[WS(csi, 22)]; + Ts = Cr[WS(csr, 6)]; + T1t = Tp - Tq; + Tr = Tp + Tq; + T4y = T1u - T1v; + T1w = T1u + T1v; + Tt = Cr[WS(csr, 26)]; + T1z = Ci[WS(csi, 6)]; + T1A = Ci[WS(csi, 26)]; + } + } + { + E T1y, T4x, T1B, T4w, To, Tv, Tu; + T3o = T1o + T1r; + T1s = T1o - T1r; + T1y = Ts - Tt; + Tu = Ts + Tt; + T4x = T1A - T1z; + T1B = T1z + T1A; + T1J = T1F + T1I; + T3r = T1I - T1F; + T4w = Tk - Tn; + To = Tk + Tn; + Tv = Tr + Tu; + T4B = Tr - Tu; + { + E T4z, T5L, T5M, T1x, T1C; + T4E = T4C - T4D; + T5L = T4D + T4C; + T5M = T4y + T4x; + T4z = T4x - T4y; + T5K = To - Tv; + Tw = To + Tv; + T5N = T5L - T5M; + T6c = T5M + T5L; + T1K = T1t + T1w; + T1x = T1t - T1w; + T1C = T1y - T1B; + T1L = T1y + T1B; + T4A = T4w + T4z; + T5n = T4w - T4z; + T3s = T1C - T1x; + T1D = T1x + T1C; + } + } + } + { + E T4Z, T52, T2A, T2B; + { + E T1Q, TA, T50, T2y, T2v, TD, T51, T1T, TI, T1V, TH, T4K, T1Y, TJ, T21; + E T22; + { + E TB, TC, T1R, T1S; + { + E Ty, Tz, T2w, T2x; + Ty = Cr[WS(csr, 1)]; + T5m = T4E - T4B; + T4F = T4B + T4E; + T3p = T1K + T1L; + T1M = T1K - T1L; + Tz = Cr[WS(csr, 31)]; + T2w = Ci[WS(csi, 1)]; + T2x = Ci[WS(csi, 31)]; + TB = Cr[WS(csr, 17)]; + T1Q = Ty - Tz; + TA = Ty + Tz; + T50 = T2w - T2x; + T2y = T2w + T2x; + TC = Cr[WS(csr, 15)]; + T1R = Ci[WS(csi, 17)]; + T1S = Ci[WS(csi, 15)]; + } + { + E TF, TG, T1W, T1X; + TF = Cr[WS(csr, 9)]; + T2v = TB - TC; + TD = TB + TC; + T51 = T1R - T1S; + T1T = T1R + T1S; + TG = Cr[WS(csr, 23)]; + T1W = Ci[WS(csi, 9)]; + T1X = Ci[WS(csi, 23)]; + TI = Cr[WS(csr, 7)]; + T1V = TF - TG; + TH = TF + TG; + T4K = T1W - T1X; + T1Y = T1W + T1X; + TJ = Cr[WS(csr, 25)]; + T21 = Ci[WS(csi, 7)]; + T22 = Ci[WS(csi, 25)]; + } + } + { + E T20, T4J, T23, T4I, TE, TL, TK; + T3w = T1Q + T1T; + T1U = T1Q - T1T; + T20 = TI - TJ; + TK = TI + TJ; + T4J = T22 - T21; + T23 = T21 + T22; + T2z = T2v + T2y; + T3H = T2y - T2v; + T4I = TA - TD; + TE = TA + TD; + TL = TH + TK; + T4Z = TH - TK; + { + E T4L, T5W, T5X, T1Z, T24; + T52 = T50 - T51; + T5W = T51 + T50; + T5X = T4K + T4J; + T4L = T4J - T4K; + T5Q = TE - TL; + TM = TE + TL; + T6f = T5X + T5W; + T5Y = T5W - T5X; + T2A = T1V + T1Y; + T1Z = T1V - T1Y; + T24 = T20 - T23; + T2B = T20 + T23; + T5q = T4I - T4L; + T4M = T4I + T4L; + T3I = T24 - T1Z; + T25 = T1Z + T24; + } + } + } + { + E T27, TP, T4O, T2f, T2c, TS, T4P, T2a, TX, T2i, TW, T4T, T2q, TY, T2j; + E T2k; + { + E TQ, TR, T28, T29; + { + E TN, TO, T2d, T2e; + TN = Cr[WS(csr, 5)]; + T5t = T52 - T4Z; + T53 = T4Z + T52; + T3x = T2A + T2B; + T2C = T2A - T2B; + TO = Cr[WS(csr, 27)]; + T2d = Ci[WS(csi, 5)]; + T2e = Ci[WS(csi, 27)]; + TQ = Cr[WS(csr, 21)]; + T27 = TN - TO; + TP = TN + TO; + T4O = T2d - T2e; + T2f = T2d + T2e; + TR = Cr[WS(csr, 11)]; + T28 = Ci[WS(csi, 21)]; + T29 = Ci[WS(csi, 11)]; + } + { + E TU, TV, T2o, T2p; + TU = Cr[WS(csr, 3)]; + T2c = TQ - TR; + TS = TQ + TR; + T4P = T28 - T29; + T2a = T28 + T29; + TV = Cr[WS(csr, 29)]; + T2o = Ci[WS(csi, 3)]; + T2p = Ci[WS(csi, 29)]; + TX = Cr[WS(csr, 13)]; + T2i = TU - TV; + TW = TU + TV; + T4T = T2p - T2o; + T2q = T2o + T2p; + TY = Cr[WS(csr, 19)]; + T2j = Ci[WS(csi, 13)]; + T2k = Ci[WS(csi, 19)]; + } + } + { + E T4N, T2n, T2l, T4Q, T2b, T2g, TT, TZ, T4U; + T4N = TP - TS; + TT = TP + TS; + T2n = TX - TY; + TZ = TX + TY; + T4U = T2j - T2k; + T2l = T2j + T2k; + { + E T5S, T10, T4S, T4V, T5R; + T5S = T4P + T4O; + T4Q = T4O - T4P; + T10 = TW + TZ; + T4S = TW - TZ; + T4V = T4T - T4U; + T5R = T4U + T4T; + T3A = T27 + T2a; + T2b = T27 - T2a; + T5V = TT - T10; + T11 = TT + T10; + T6g = T5S + T5R; + T5T = T5R - T5S; + T55 = T4V - T4S; + T4W = T4S + T4V; + T2g = T2c + T2f; + T3z = T2f - T2c; + } + { + E T3D, T3C, T2m, T2r; + T3D = T2i + T2l; + T2m = T2i - T2l; + T2r = T2n - T2q; + T3C = T2n + T2q; + T2E = FMA(KP414213562, T2b, T2g); + T2h = FNMS(KP414213562, T2g, T2b); + T2F = FNMS(KP414213562, T2m, T2r); + T2s = FMA(KP414213562, T2r, T2m); + T3L = FMA(KP414213562, T3C, T3D); + T3E = FNMS(KP414213562, T3D, T3C); + T54 = T4N + T4Q; + T4R = T4N - T4Q; + } + } + } + } + } + } + { + E T3K, T3B, T5u, T5r, T5d, T5g; + { + E T6e, T6h, T6b, T5J, T5O, T5Z, T66, T69, T65, T67, T5U, T12, T6m, Th; + T6e = TM - T11; + T12 = TM + T11; + T6m = T6g + T6f; + T6h = T6f - T6g; + T6b = FNMS(KP2_000000000, Tg, T9); + Th = FMA(KP2_000000000, Tg, T9); + T3K = FMA(KP414213562, T3z, T3A); + T3B = FNMS(KP414213562, T3A, T3z); + { + E T63, T64, T6l, Tx; + T5J = FNMS(KP2_000000000, T5I, T5H); + T63 = FMA(KP2_000000000, T5I, T5H); + T64 = T5K + T5N; + T5O = T5K - T5N; + T5Z = T5V + T5Y; + T66 = T5Y - T5V; + T6l = FNMS(KP2_000000000, Tw, Th); + Tx = FMA(KP2_000000000, Tw, Th); + T69 = FMA(KP1_414213562, T64, T63); + T65 = FNMS(KP1_414213562, T64, T63); + R0[WS(rs, 8)] = FNMS(KP2_000000000, T6m, T6l); + R0[WS(rs, 24)] = FMA(KP2_000000000, T6m, T6l); + R0[0] = FMA(KP2_000000000, T12, Tx); + R0[WS(rs, 16)] = FNMS(KP2_000000000, T12, Tx); + T67 = T5Q - T5T; + T5U = T5Q + T5T; + } + { + E T6j, T6d, T6a, T68; + T6a = FMA(KP414213562, T66, T67); + T68 = FNMS(KP414213562, T67, T66); + T6j = FMA(KP2_000000000, T6c, T6b); + T6d = FNMS(KP2_000000000, T6c, T6b); + R0[WS(rs, 14)] = FNMS(KP1_847759065, T6a, T69); + R0[WS(rs, 30)] = FMA(KP1_847759065, T6a, T69); + R0[WS(rs, 22)] = FMA(KP1_847759065, T68, T65); + R0[WS(rs, 6)] = FNMS(KP1_847759065, T68, T65); + { + E T61, T5P, T6k, T6i; + T6k = T6e + T6h; + T6i = T6e - T6h; + T61 = FNMS(KP1_414213562, T5O, T5J); + T5P = FMA(KP1_414213562, T5O, T5J); + R0[WS(rs, 12)] = FNMS(KP1_414213562, T6k, T6j); + R0[WS(rs, 28)] = FMA(KP1_414213562, T6k, T6j); + R0[WS(rs, 4)] = FMA(KP1_414213562, T6i, T6d); + R0[WS(rs, 20)] = FNMS(KP1_414213562, T6i, T6d); + { + E T5b, T4v, T5f, T4Y, T5e, T57, T4G, T5c; + { + E T4X, T56, T62, T60; + T5u = T4W - T4R; + T4X = T4R + T4W; + T56 = T54 + T55; + T5r = T54 - T55; + T5b = FNMS(KP1_414213562, T4u, T4p); + T4v = FMA(KP1_414213562, T4u, T4p); + T62 = FMA(KP414213562, T5U, T5Z); + T60 = FNMS(KP414213562, T5Z, T5U); + T5f = FNMS(KP707106781, T4X, T4M); + T4Y = FMA(KP707106781, T4X, T4M); + T5e = FNMS(KP707106781, T56, T53); + T57 = FMA(KP707106781, T56, T53); + R0[WS(rs, 10)] = FNMS(KP1_847759065, T62, T61); + R0[WS(rs, 26)] = FMA(KP1_847759065, T62, T61); + R0[WS(rs, 2)] = FMA(KP1_847759065, T60, T5P); + R0[WS(rs, 18)] = FNMS(KP1_847759065, T60, T5P); + T4G = FNMS(KP414213562, T4F, T4A); + T5c = FMA(KP414213562, T4A, T4F); + } + { + E T5a, T59, T5h, T5i, T58, T4H; + T5a = FMA(KP198912367, T4Y, T57); + T58 = FNMS(KP198912367, T57, T4Y); + T59 = FNMS(KP1_847759065, T4G, T4v); + T4H = FMA(KP1_847759065, T4G, T4v); + T5h = FMA(KP1_847759065, T5c, T5b); + T5d = FNMS(KP1_847759065, T5c, T5b); + T5i = FMA(KP668178637, T5e, T5f); + T5g = FNMS(KP668178637, T5f, T5e); + R0[WS(rs, 1)] = FMA(KP1_961570560, T58, T4H); + R0[WS(rs, 17)] = FNMS(KP1_961570560, T58, T4H); + R0[WS(rs, 29)] = FMA(KP1_662939224, T5i, T5h); + R0[WS(rs, 13)] = FNMS(KP1_662939224, T5i, T5h); + R0[WS(rs, 25)] = FMA(KP1_961570560, T5a, T59); + R0[WS(rs, 9)] = FNMS(KP1_961570560, T5a, T59); + } + } + } + } + } + { + E T43, T42, T46, T4a, T49, T3V, T3G, T47, T3P, T3v, T3X, T3T, T3U, T3N, T5B; + E T5E; + { + E T5s, T5D, T5z, T5l, T5C, T5v, T5o, T5A; + R0[WS(rs, 21)] = FMA(KP1_662939224, T5g, T5d); + R0[WS(rs, 5)] = FNMS(KP1_662939224, T5g, T5d); + T5s = FNMS(KP707106781, T5r, T5q); + T5D = FMA(KP707106781, T5r, T5q); + T5z = FMA(KP1_414213562, T5k, T5j); + T5l = FNMS(KP1_414213562, T5k, T5j); + T5C = FMA(KP707106781, T5u, T5t); + T5v = FNMS(KP707106781, T5u, T5t); + T5o = FNMS(KP414213562, T5n, T5m); + T5A = FMA(KP414213562, T5m, T5n); + { + E T5y, T5x, T5F, T5G, T5w, T5p; + T5y = FMA(KP668178637, T5s, T5v); + T5w = FNMS(KP668178637, T5v, T5s); + T5x = FMA(KP1_847759065, T5o, T5l); + T5p = FNMS(KP1_847759065, T5o, T5l); + T5F = FMA(KP1_847759065, T5A, T5z); + T5B = FNMS(KP1_847759065, T5A, T5z); + T5G = FMA(KP198912367, T5C, T5D); + T5E = FNMS(KP198912367, T5D, T5C); + R0[WS(rs, 3)] = FMA(KP1_662939224, T5w, T5p); + R0[WS(rs, 19)] = FNMS(KP1_662939224, T5w, T5p); + R0[WS(rs, 31)] = FMA(KP1_961570560, T5G, T5F); + R0[WS(rs, 15)] = FNMS(KP1_961570560, T5G, T5F); + R0[WS(rs, 27)] = FMA(KP1_662939224, T5y, T5x); + R0[WS(rs, 11)] = FNMS(KP1_662939224, T5y, T5x); + } + } + { + E T3R, T3n, T3J, T3S, T3u, T3M; + T3R = FMA(KP1_847759065, T3m, T3j); + T3n = FNMS(KP1_847759065, T3m, T3j); + R0[WS(rs, 23)] = FMA(KP1_961570560, T5E, T5B); + R0[WS(rs, 7)] = FNMS(KP1_961570560, T5E, T5B); + { + E T3q, T3t, T3y, T3F; + T43 = FMA(KP707106781, T3p, T3o); + T3q = FNMS(KP707106781, T3p, T3o); + T3t = FNMS(KP707106781, T3s, T3r); + T42 = FMA(KP707106781, T3s, T3r); + T46 = FMA(KP707106781, T3x, T3w); + T3y = FNMS(KP707106781, T3x, T3w); + T3F = T3B + T3E; + T4a = T3B - T3E; + T49 = FMA(KP707106781, T3I, T3H); + T3J = FNMS(KP707106781, T3I, T3H); + T3S = FMA(KP668178637, T3q, T3t); + T3u = FNMS(KP668178637, T3t, T3q); + T3V = FMA(KP923879532, T3F, T3y); + T3G = FNMS(KP923879532, T3F, T3y); + T3M = T3K - T3L; + T47 = T3K + T3L; + } + T3P = FNMS(KP1_662939224, T3u, T3n); + T3v = FMA(KP1_662939224, T3u, T3n); + T3X = FMA(KP1_662939224, T3S, T3R); + T3T = FNMS(KP1_662939224, T3S, T3R); + T3U = FNMS(KP923879532, T3M, T3J); + T3N = FMA(KP923879532, T3M, T3J); + } + { + E T2X, T2W, T30, T34, T33, T2P, T2u, T31, T2J, T1P, T2R, T2N, T2O, T2H; + { + E T2L, T1n, T2D, T2M, T1O, T2G; + T2L = FNMS(KP1_847759065, T1m, T1b); + T1n = FMA(KP1_847759065, T1m, T1b); + { + E T3W, T3Y, T3Q, T3O; + T3W = FNMS(KP534511135, T3V, T3U); + T3Y = FMA(KP534511135, T3U, T3V); + T3Q = FMA(KP303346683, T3G, T3N); + T3O = FNMS(KP303346683, T3N, T3G); + R1[WS(rs, 21)] = FMA(KP1_763842528, T3W, T3T); + R1[WS(rs, 5)] = FNMS(KP1_763842528, T3W, T3T); + R1[WS(rs, 29)] = FMA(KP1_763842528, T3Y, T3X); + R1[WS(rs, 13)] = FNMS(KP1_763842528, T3Y, T3X); + R1[WS(rs, 25)] = FMA(KP1_913880671, T3Q, T3P); + R1[WS(rs, 9)] = FNMS(KP1_913880671, T3Q, T3P); + R1[WS(rs, 1)] = FMA(KP1_913880671, T3O, T3v); + R1[WS(rs, 17)] = FNMS(KP1_913880671, T3O, T3v); + } + { + E T1E, T1N, T26, T2t; + T2X = FNMS(KP707106781, T1D, T1s); + T1E = FMA(KP707106781, T1D, T1s); + T1N = FMA(KP707106781, T1M, T1J); + T2W = FNMS(KP707106781, T1M, T1J); + T30 = FNMS(KP707106781, T25, T1U); + T26 = FMA(KP707106781, T25, T1U); + T2t = T2h + T2s; + T34 = T2s - T2h; + T33 = FNMS(KP707106781, T2C, T2z); + T2D = FMA(KP707106781, T2C, T2z); + T2M = FMA(KP198912367, T1E, T1N); + T1O = FNMS(KP198912367, T1N, T1E); + T2P = FNMS(KP923879532, T2t, T26); + T2u = FMA(KP923879532, T2t, T26); + T2G = T2E + T2F; + T31 = T2E - T2F; + } + T2J = FNMS(KP1_961570560, T1O, T1n); + T1P = FMA(KP1_961570560, T1O, T1n); + T2R = FMA(KP1_961570560, T2M, T2L); + T2N = FNMS(KP1_961570560, T2M, T2L); + T2O = FNMS(KP923879532, T2G, T2D); + T2H = FMA(KP923879532, T2G, T2D); + } + { + E T4j, T48, T4d, T45, T4l, T4h, T4i, T4b; + { + E T4f, T41, T4g, T44; + T4f = FMA(KP1_847759065, T40, T3Z); + T41 = FNMS(KP1_847759065, T40, T3Z); + { + E T2Q, T2S, T2K, T2I; + T2Q = FNMS(KP820678790, T2P, T2O); + T2S = FMA(KP820678790, T2O, T2P); + T2K = FMA(KP098491403, T2u, T2H); + T2I = FNMS(KP098491403, T2H, T2u); + R1[WS(rs, 20)] = FMA(KP1_546020906, T2Q, T2N); + R1[WS(rs, 4)] = FNMS(KP1_546020906, T2Q, T2N); + R1[WS(rs, 28)] = FMA(KP1_546020906, T2S, T2R); + R1[WS(rs, 12)] = FNMS(KP1_546020906, T2S, T2R); + R1[WS(rs, 24)] = FMA(KP1_990369453, T2K, T2J); + R1[WS(rs, 8)] = FNMS(KP1_990369453, T2K, T2J); + R1[0] = FMA(KP1_990369453, T2I, T1P); + R1[WS(rs, 16)] = FNMS(KP1_990369453, T2I, T1P); + } + T4g = FMA(KP198912367, T42, T43); + T44 = FNMS(KP198912367, T43, T42); + T4j = FMA(KP923879532, T47, T46); + T48 = FNMS(KP923879532, T47, T46); + T4d = FMA(KP1_961570560, T44, T41); + T45 = FNMS(KP1_961570560, T44, T41); + T4l = FMA(KP1_961570560, T4g, T4f); + T4h = FNMS(KP1_961570560, T4g, T4f); + T4i = FMA(KP923879532, T4a, T49); + T4b = FNMS(KP923879532, T4a, T49); + } + { + E T39, T2V, T3a, T2Y; + T39 = FMA(KP1_847759065, T2U, T2T); + T2V = FNMS(KP1_847759065, T2U, T2T); + { + E T4k, T4m, T4e, T4c; + T4k = FNMS(KP098491403, T4j, T4i); + T4m = FMA(KP098491403, T4i, T4j); + T4e = FMA(KP820678790, T48, T4b); + T4c = FNMS(KP820678790, T4b, T48); + R1[WS(rs, 23)] = FMA(KP1_990369453, T4k, T4h); + R1[WS(rs, 7)] = FNMS(KP1_990369453, T4k, T4h); + R1[WS(rs, 31)] = FMA(KP1_990369453, T4m, T4l); + R1[WS(rs, 15)] = FNMS(KP1_990369453, T4m, T4l); + R1[WS(rs, 27)] = FMA(KP1_546020906, T4e, T4d); + R1[WS(rs, 11)] = FNMS(KP1_546020906, T4e, T4d); + R1[WS(rs, 3)] = FMA(KP1_546020906, T4c, T45); + R1[WS(rs, 19)] = FNMS(KP1_546020906, T4c, T45); + } + T3a = FMA(KP668178637, T2W, T2X); + T2Y = FNMS(KP668178637, T2X, T2W); + T3d = FMA(KP923879532, T31, T30); + T32 = FNMS(KP923879532, T31, T30); + T37 = FMA(KP1_662939224, T2Y, T2V); + T2Z = FNMS(KP1_662939224, T2Y, T2V); + T3f = FMA(KP1_662939224, T3a, T39); + T3b = FNMS(KP1_662939224, T3a, T39); + T3c = FMA(KP923879532, T34, T33); + T35 = FNMS(KP923879532, T34, T33); + } + } + } + } + } + } + { + E T3g, T3e, T36, T38; + T3g = FMA(KP303346683, T3c, T3d); + T3e = FNMS(KP303346683, T3d, T3c); + T36 = FNMS(KP534511135, T35, T32); + T38 = FMA(KP534511135, T32, T35); + R1[WS(rs, 22)] = FMA(KP1_913880671, T3e, T3b); + R1[WS(rs, 6)] = FNMS(KP1_913880671, T3e, T3b); + R1[WS(rs, 30)] = FMA(KP1_913880671, T3g, T3f); + R1[WS(rs, 14)] = FNMS(KP1_913880671, T3g, T3f); + R1[WS(rs, 26)] = FMA(KP1_763842528, T38, T37); + R1[WS(rs, 10)] = FNMS(KP1_763842528, T38, T37); + R1[WS(rs, 2)] = FMA(KP1_763842528, T36, T2Z); + R1[WS(rs, 18)] = FNMS(KP1_763842528, T36, T2Z); + } + } + } +} + +static const kr2c_desc desc = { 64, "r2cb_64", {178, 0, 216, 0}, &GENUS }; + +void X(codelet_r2cb_64) (planner *p) { + X(kr2c_register) (p, r2cb_64, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -name r2cb_64 -include r2cb.h */ + +/* + * This function contains 394 FP additions, 134 FP multiplications, + * (or, 342 additions, 82 multiplications, 52 fused multiply/add), + * 110 stack variables, 19 constants, and 128 memory accesses + */ +#include "r2cb.h" + +static void r2cb_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_268786568, +1.268786568327290996430343226450986741351374190); + DK(KP1_546020906, +1.546020906725473921621813219516939601942082586); + DK(KP196034280, +0.196034280659121203988391127777283691722273346); + DK(KP1_990369453, +1.990369453344393772489673906218959843150949737); + DK(KP942793473, +0.942793473651995297112775251810508755314920638); + DK(KP1_763842528, +1.763842528696710059425513727320776699016885241); + DK(KP580569354, +0.580569354508924735272384751634790549382952557); + DK(KP1_913880671, +1.913880671464417729871595773960539938965698411); + DK(KP1_111140466, +1.111140466039204449485661627897065748749874382); + DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); + DK(KP390180644, +0.390180644032256535696569736954044481855383236); + DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP765366864, +0.765366864730179543456919968060797733522689125); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) { + E Ta, T2S, T18, T2u, T3F, T4V, T5l, T61, Th, T2T, T1h, T2v, T3M, T4W, T5o; + E T62, T3Q, T5q, T5u, T44, Tp, Tw, T2V, T2W, T2X, T2Y, T3X, T5t, T1r, T2x; + E T41, T5r, T1A, T2y, T4a, T5y, T5N, T4H, TN, T31, T4E, T5z, T39, T3q, T1L; + E T2B, T4h, T5M, T2h, T2F, T12, T36, T5D, T5J, T5G, T5K, T1U, T26, T23, T27; + E T4p, T4z, T4w, T4A, T34, T3r; + { + E T5, T3A, T3, T3y, T9, T3C, T17, T3D, T6, T14; + { + E T4, T3z, T1, T2; + T4 = Cr[WS(csr, 16)]; + T5 = KP2_000000000 * T4; + T3z = Ci[WS(csi, 16)]; + T3A = KP2_000000000 * T3z; + T1 = Cr[0]; + T2 = Cr[WS(csr, 32)]; + T3 = T1 + T2; + T3y = T1 - T2; + { + E T7, T8, T15, T16; + T7 = Cr[WS(csr, 8)]; + T8 = Cr[WS(csr, 24)]; + T9 = KP2_000000000 * (T7 + T8); + T3C = T7 - T8; + T15 = Ci[WS(csi, 8)]; + T16 = Ci[WS(csi, 24)]; + T17 = KP2_000000000 * (T15 - T16); + T3D = T15 + T16; + } + } + T6 = T3 + T5; + Ta = T6 + T9; + T2S = T6 - T9; + T14 = T3 - T5; + T18 = T14 - T17; + T2u = T14 + T17; + { + E T3B, T3E, T5j, T5k; + T3B = T3y - T3A; + T3E = KP1_414213562 * (T3C - T3D); + T3F = T3B + T3E; + T4V = T3B - T3E; + T5j = T3y + T3A; + T5k = KP1_414213562 * (T3C + T3D); + T5l = T5j - T5k; + T61 = T5j + T5k; + } + } + { + E Td, T3G, T1c, T3K, Tg, T3J, T1f, T3H, T19, T1g; + { + E Tb, Tc, T1a, T1b; + Tb = Cr[WS(csr, 4)]; + Tc = Cr[WS(csr, 28)]; + Td = Tb + Tc; + T3G = Tb - Tc; + T1a = Ci[WS(csi, 4)]; + T1b = Ci[WS(csi, 28)]; + T1c = T1a - T1b; + T3K = T1a + T1b; + } + { + E Te, Tf, T1d, T1e; + Te = Cr[WS(csr, 20)]; + Tf = Cr[WS(csr, 12)]; + Tg = Te + Tf; + T3J = Te - Tf; + T1d = Ci[WS(csi, 20)]; + T1e = Ci[WS(csi, 12)]; + T1f = T1d - T1e; + T3H = T1d + T1e; + } + Th = KP2_000000000 * (Td + Tg); + T2T = KP2_000000000 * (T1f + T1c); + T19 = Td - Tg; + T1g = T1c - T1f; + T1h = KP1_414213562 * (T19 - T1g); + T2v = KP1_414213562 * (T19 + T1g); + { + E T3I, T3L, T5m, T5n; + T3I = T3G - T3H; + T3L = T3J + T3K; + T3M = FNMS(KP765366864, T3L, KP1_847759065 * T3I); + T4W = FMA(KP765366864, T3I, KP1_847759065 * T3L); + T5m = T3G + T3H; + T5n = T3K - T3J; + T5o = FNMS(KP1_847759065, T5n, KP765366864 * T5m); + T62 = FMA(KP1_847759065, T5m, KP765366864 * T5n); + } + } + { + E Tl, T3O, T1v, T43, To, T42, T1y, T3P, Ts, T3R, T1p, T3S, Tv, T3U, T1m; + E T3V; + { + E Tj, Tk, T1t, T1u; + Tj = Cr[WS(csr, 2)]; + Tk = Cr[WS(csr, 30)]; + Tl = Tj + Tk; + T3O = Tj - Tk; + T1t = Ci[WS(csi, 2)]; + T1u = Ci[WS(csi, 30)]; + T1v = T1t - T1u; + T43 = T1t + T1u; + } + { + E Tm, Tn, T1w, T1x; + Tm = Cr[WS(csr, 18)]; + Tn = Cr[WS(csr, 14)]; + To = Tm + Tn; + T42 = Tm - Tn; + T1w = Ci[WS(csi, 18)]; + T1x = Ci[WS(csi, 14)]; + T1y = T1w - T1x; + T3P = T1w + T1x; + } + { + E Tq, Tr, T1n, T1o; + Tq = Cr[WS(csr, 10)]; + Tr = Cr[WS(csr, 22)]; + Ts = Tq + Tr; + T3R = Tq - Tr; + T1n = Ci[WS(csi, 10)]; + T1o = Ci[WS(csi, 22)]; + T1p = T1n - T1o; + T3S = T1n + T1o; + } + { + E Tt, Tu, T1k, T1l; + Tt = Cr[WS(csr, 6)]; + Tu = Cr[WS(csr, 26)]; + Tv = Tt + Tu; + T3U = Tt - Tu; + T1k = Ci[WS(csi, 26)]; + T1l = Ci[WS(csi, 6)]; + T1m = T1k - T1l; + T3V = T1l + T1k; + } + T3Q = T3O - T3P; + T5q = T3O + T3P; + T5u = T43 - T42; + T44 = T42 + T43; + Tp = Tl + To; + Tw = Ts + Tv; + T2V = Tp - Tw; + { + E T3T, T3W, T1j, T1q; + T2W = T1y + T1v; + T2X = T1p + T1m; + T2Y = T2W - T2X; + T3T = T3R - T3S; + T3W = T3U - T3V; + T3X = KP707106781 * (T3T + T3W); + T5t = KP707106781 * (T3T - T3W); + T1j = Tl - To; + T1q = T1m - T1p; + T1r = T1j + T1q; + T2x = T1j - T1q; + { + E T3Z, T40, T1s, T1z; + T3Z = T3R + T3S; + T40 = T3U + T3V; + T41 = KP707106781 * (T3Z - T40); + T5r = KP707106781 * (T3Z + T40); + T1s = Ts - Tv; + T1z = T1v - T1y; + T1A = T1s + T1z; + T2y = T1z - T1s; + } + } + } + { + E TB, T48, T2c, T4G, TE, T4F, T2f, T49, TI, T4b, T1J, T4c, TL, T4e, T1G; + E T4f; + { + E Tz, TA, T2a, T2b; + Tz = Cr[WS(csr, 1)]; + TA = Cr[WS(csr, 31)]; + TB = Tz + TA; + T48 = Tz - TA; + T2a = Ci[WS(csi, 1)]; + T2b = Ci[WS(csi, 31)]; + T2c = T2a - T2b; + T4G = T2a + T2b; + } + { + E TC, TD, T2d, T2e; + TC = Cr[WS(csr, 17)]; + TD = Cr[WS(csr, 15)]; + TE = TC + TD; + T4F = TC - TD; + T2d = Ci[WS(csi, 17)]; + T2e = Ci[WS(csi, 15)]; + T2f = T2d - T2e; + T49 = T2d + T2e; + } + { + E TG, TH, T1H, T1I; + TG = Cr[WS(csr, 9)]; + TH = Cr[WS(csr, 23)]; + TI = TG + TH; + T4b = TG - TH; + T1H = Ci[WS(csi, 9)]; + T1I = Ci[WS(csi, 23)]; + T1J = T1H - T1I; + T4c = T1H + T1I; + } + { + E TJ, TK, T1E, T1F; + TJ = Cr[WS(csr, 7)]; + TK = Cr[WS(csr, 25)]; + TL = TJ + TK; + T4e = TJ - TK; + T1E = Ci[WS(csi, 25)]; + T1F = Ci[WS(csi, 7)]; + T1G = T1E - T1F; + T4f = T1F + T1E; + } + { + E TF, TM, T1D, T1K; + T4a = T48 - T49; + T5y = T48 + T49; + T5N = T4G - T4F; + T4H = T4F + T4G; + TF = TB + TE; + TM = TI + TL; + TN = TF + TM; + T31 = TF - TM; + { + E T4C, T4D, T37, T38; + T4C = T4b + T4c; + T4D = T4e + T4f; + T4E = KP707106781 * (T4C - T4D); + T5z = KP707106781 * (T4C + T4D); + T37 = T2f + T2c; + T38 = T1J + T1G; + T39 = T37 - T38; + T3q = T38 + T37; + } + T1D = TB - TE; + T1K = T1G - T1J; + T1L = T1D + T1K; + T2B = T1D - T1K; + { + E T4d, T4g, T29, T2g; + T4d = T4b - T4c; + T4g = T4e - T4f; + T4h = KP707106781 * (T4d + T4g); + T5M = KP707106781 * (T4d - T4g); + T29 = TI - TL; + T2g = T2c - T2f; + T2h = T29 + T2g; + T2F = T2g - T29; + } + } + } + { + E TQ, T4j, T1P, T4n, TT, T4m, T1S, T4k, TX, T4q, T1Y, T4u, T10, T4t, T21; + E T4r; + { + E TO, TP, T1N, T1O; + TO = Cr[WS(csr, 5)]; + TP = Cr[WS(csr, 27)]; + TQ = TO + TP; + T4j = TO - TP; + T1N = Ci[WS(csi, 5)]; + T1O = Ci[WS(csi, 27)]; + T1P = T1N - T1O; + T4n = T1N + T1O; + } + { + E TR, TS, T1Q, T1R; + TR = Cr[WS(csr, 21)]; + TS = Cr[WS(csr, 11)]; + TT = TR + TS; + T4m = TR - TS; + T1Q = Ci[WS(csi, 21)]; + T1R = Ci[WS(csi, 11)]; + T1S = T1Q - T1R; + T4k = T1Q + T1R; + } + { + E TV, TW, T1W, T1X; + TV = Cr[WS(csr, 3)]; + TW = Cr[WS(csr, 29)]; + TX = TV + TW; + T4q = TV - TW; + T1W = Ci[WS(csi, 29)]; + T1X = Ci[WS(csi, 3)]; + T1Y = T1W - T1X; + T4u = T1X + T1W; + } + { + E TY, TZ, T1Z, T20; + TY = Cr[WS(csr, 13)]; + TZ = Cr[WS(csr, 19)]; + T10 = TY + TZ; + T4t = TY - TZ; + T1Z = Ci[WS(csi, 13)]; + T20 = Ci[WS(csi, 19)]; + T21 = T1Z - T20; + T4r = T1Z + T20; + } + { + E TU, T11, T5B, T5C; + TU = TQ + TT; + T11 = TX + T10; + T12 = TU + T11; + T36 = TU - T11; + T5B = T4j + T4k; + T5C = T4n - T4m; + T5D = FNMS(KP923879532, T5C, KP382683432 * T5B); + T5J = FMA(KP923879532, T5B, KP382683432 * T5C); + } + { + E T5E, T5F, T1M, T1T; + T5E = T4q + T4r; + T5F = T4t + T4u; + T5G = FNMS(KP923879532, T5F, KP382683432 * T5E); + T5K = FMA(KP923879532, T5E, KP382683432 * T5F); + T1M = TQ - TT; + T1T = T1P - T1S; + T1U = T1M - T1T; + T26 = T1M + T1T; + } + { + E T1V, T22, T4l, T4o; + T1V = TX - T10; + T22 = T1Y - T21; + T23 = T1V + T22; + T27 = T22 - T1V; + T4l = T4j - T4k; + T4o = T4m + T4n; + T4p = FNMS(KP382683432, T4o, KP923879532 * T4l); + T4z = FMA(KP382683432, T4l, KP923879532 * T4o); + } + { + E T4s, T4v, T32, T33; + T4s = T4q - T4r; + T4v = T4t - T4u; + T4w = FMA(KP923879532, T4s, KP382683432 * T4v); + T4A = FNMS(KP382683432, T4s, KP923879532 * T4v); + T32 = T21 + T1Y; + T33 = T1S + T1P; + T34 = T32 - T33; + T3r = T33 + T32; + } + } + { + E T13, T3x, Ty, T3w, Ti, Tx; + T13 = KP2_000000000 * (TN + T12); + T3x = KP2_000000000 * (T3r + T3q); + Ti = Ta + Th; + Tx = KP2_000000000 * (Tp + Tw); + Ty = Ti + Tx; + T3w = Ti - Tx; + R0[WS(rs, 16)] = Ty - T13; + R0[WS(rs, 24)] = T3w + T3x; + R0[0] = Ty + T13; + R0[WS(rs, 8)] = T3w - T3x; + } + { + E T3g, T3k, T3j, T3l; + { + E T3e, T3f, T3h, T3i; + T3e = T2S + T2T; + T3f = KP1_414213562 * (T2V + T2Y); + T3g = T3e - T3f; + T3k = T3e + T3f; + T3h = T31 - T34; + T3i = T39 - T36; + T3j = FNMS(KP1_847759065, T3i, KP765366864 * T3h); + T3l = FMA(KP1_847759065, T3h, KP765366864 * T3i); + } + R0[WS(rs, 22)] = T3g - T3j; + R0[WS(rs, 30)] = T3k + T3l; + R0[WS(rs, 6)] = T3g + T3j; + R0[WS(rs, 14)] = T3k - T3l; + } + { + E T3o, T3u, T3t, T3v; + { + E T3m, T3n, T3p, T3s; + T3m = Ta - Th; + T3n = KP2_000000000 * (T2X + T2W); + T3o = T3m - T3n; + T3u = T3m + T3n; + T3p = TN - T12; + T3s = T3q - T3r; + T3t = KP1_414213562 * (T3p - T3s); + T3v = KP1_414213562 * (T3p + T3s); + } + R0[WS(rs, 20)] = T3o - T3t; + R0[WS(rs, 28)] = T3u + T3v; + R0[WS(rs, 4)] = T3o + T3t; + R0[WS(rs, 12)] = T3u - T3v; + } + { + E T30, T3c, T3b, T3d; + { + E T2U, T2Z, T35, T3a; + T2U = T2S - T2T; + T2Z = KP1_414213562 * (T2V - T2Y); + T30 = T2U + T2Z; + T3c = T2U - T2Z; + T35 = T31 + T34; + T3a = T36 + T39; + T3b = FNMS(KP765366864, T3a, KP1_847759065 * T35); + T3d = FMA(KP765366864, T35, KP1_847759065 * T3a); + } + R0[WS(rs, 18)] = T30 - T3b; + R0[WS(rs, 26)] = T3c + T3d; + R0[WS(rs, 2)] = T30 + T3b; + R0[WS(rs, 10)] = T3c - T3d; + } + { + E T25, T2p, T2i, T2q, T1C, T2k, T2o, T2s, T24, T28; + T24 = KP707106781 * (T1U + T23); + T25 = T1L + T24; + T2p = T1L - T24; + T28 = KP707106781 * (T26 + T27); + T2i = T28 + T2h; + T2q = T2h - T28; + { + E T1i, T1B, T2m, T2n; + T1i = T18 + T1h; + T1B = FNMS(KP765366864, T1A, KP1_847759065 * T1r); + T1C = T1i + T1B; + T2k = T1i - T1B; + T2m = T18 - T1h; + T2n = FMA(KP765366864, T1r, KP1_847759065 * T1A); + T2o = T2m - T2n; + T2s = T2m + T2n; + } + { + E T2j, T2t, T2l, T2r; + T2j = FNMS(KP390180644, T2i, KP1_961570560 * T25); + R0[WS(rs, 17)] = T1C - T2j; + R0[WS(rs, 1)] = T1C + T2j; + T2t = FMA(KP1_662939224, T2p, KP1_111140466 * T2q); + R0[WS(rs, 13)] = T2s - T2t; + R0[WS(rs, 29)] = T2s + T2t; + T2l = FMA(KP390180644, T25, KP1_961570560 * T2i); + R0[WS(rs, 9)] = T2k - T2l; + R0[WS(rs, 25)] = T2k + T2l; + T2r = FNMS(KP1_662939224, T2q, KP1_111140466 * T2p); + R0[WS(rs, 21)] = T2o - T2r; + R0[WS(rs, 5)] = T2o + T2r; + } + } + { + E T2D, T2N, T2G, T2O, T2A, T2I, T2M, T2Q, T2C, T2E; + T2C = KP707106781 * (T27 - T26); + T2D = T2B + T2C; + T2N = T2B - T2C; + T2E = KP707106781 * (T1U - T23); + T2G = T2E + T2F; + T2O = T2F - T2E; + { + E T2w, T2z, T2K, T2L; + T2w = T2u - T2v; + T2z = FNMS(KP1_847759065, T2y, KP765366864 * T2x); + T2A = T2w + T2z; + T2I = T2w - T2z; + T2K = T2u + T2v; + T2L = FMA(KP1_847759065, T2x, KP765366864 * T2y); + T2M = T2K - T2L; + T2Q = T2K + T2L; + } + { + E T2H, T2R, T2J, T2P; + T2H = FNMS(KP1_111140466, T2G, KP1_662939224 * T2D); + R0[WS(rs, 19)] = T2A - T2H; + R0[WS(rs, 3)] = T2A + T2H; + T2R = FMA(KP1_961570560, T2N, KP390180644 * T2O); + R0[WS(rs, 15)] = T2Q - T2R; + R0[WS(rs, 31)] = T2Q + T2R; + T2J = FMA(KP1_111140466, T2D, KP1_662939224 * T2G); + R0[WS(rs, 11)] = T2I - T2J; + R0[WS(rs, 27)] = T2I + T2J; + T2P = FNMS(KP1_961570560, T2O, KP390180644 * T2N); + R0[WS(rs, 23)] = T2M - T2P; + R0[WS(rs, 7)] = T2M + T2P; + } + } + { + E T5p, T5T, T5w, T5U, T5I, T5W, T5P, T5X, T5s, T5v; + T5p = T5l + T5o; + T5T = T5l - T5o; + T5s = T5q - T5r; + T5v = T5t + T5u; + T5w = FNMS(KP1_111140466, T5v, KP1_662939224 * T5s); + T5U = FMA(KP1_111140466, T5s, KP1_662939224 * T5v); + { + E T5A, T5H, T5L, T5O; + T5A = T5y - T5z; + T5H = T5D + T5G; + T5I = T5A + T5H; + T5W = T5A - T5H; + T5L = T5J - T5K; + T5O = T5M + T5N; + T5P = T5L + T5O; + T5X = T5O - T5L; + } + { + E T5x, T5Q, T5Z, T60; + T5x = T5p + T5w; + T5Q = FNMS(KP580569354, T5P, KP1_913880671 * T5I); + R1[WS(rs, 17)] = T5x - T5Q; + R1[WS(rs, 1)] = T5x + T5Q; + T5Z = T5T + T5U; + T60 = FMA(KP1_763842528, T5W, KP942793473 * T5X); + R1[WS(rs, 13)] = T5Z - T60; + R1[WS(rs, 29)] = T5Z + T60; + } + { + E T5R, T5S, T5V, T5Y; + T5R = T5p - T5w; + T5S = FMA(KP580569354, T5I, KP1_913880671 * T5P); + R1[WS(rs, 9)] = T5R - T5S; + R1[WS(rs, 25)] = T5R + T5S; + T5V = T5T - T5U; + T5Y = FNMS(KP1_763842528, T5X, KP942793473 * T5W); + R1[WS(rs, 21)] = T5V - T5Y; + R1[WS(rs, 5)] = T5V + T5Y; + } + } + { + E T3N, T4N, T46, T4O, T4y, T4Q, T4J, T4R, T3Y, T45; + T3N = T3F + T3M; + T4N = T3F - T3M; + T3Y = T3Q + T3X; + T45 = T41 + T44; + T46 = FNMS(KP390180644, T45, KP1_961570560 * T3Y); + T4O = FMA(KP390180644, T3Y, KP1_961570560 * T45); + { + E T4i, T4x, T4B, T4I; + T4i = T4a + T4h; + T4x = T4p + T4w; + T4y = T4i + T4x; + T4Q = T4i - T4x; + T4B = T4z + T4A; + T4I = T4E + T4H; + T4J = T4B + T4I; + T4R = T4I - T4B; + } + { + E T47, T4K, T4T, T4U; + T47 = T3N + T46; + T4K = FNMS(KP196034280, T4J, KP1_990369453 * T4y); + R1[WS(rs, 16)] = T47 - T4K; + R1[0] = T47 + T4K; + T4T = T4N + T4O; + T4U = FMA(KP1_546020906, T4Q, KP1_268786568 * T4R); + R1[WS(rs, 12)] = T4T - T4U; + R1[WS(rs, 28)] = T4T + T4U; + } + { + E T4L, T4M, T4P, T4S; + T4L = T3N - T46; + T4M = FMA(KP196034280, T4y, KP1_990369453 * T4J); + R1[WS(rs, 8)] = T4L - T4M; + R1[WS(rs, 24)] = T4L + T4M; + T4P = T4N - T4O; + T4S = FNMS(KP1_546020906, T4R, KP1_268786568 * T4Q); + R1[WS(rs, 20)] = T4P - T4S; + R1[WS(rs, 4)] = T4P + T4S; + } + } + { + E T63, T6h, T66, T6i, T6a, T6k, T6d, T6l, T64, T65; + T63 = T61 - T62; + T6h = T61 + T62; + T64 = T5q + T5r; + T65 = T5u - T5t; + T66 = FNMS(KP1_961570560, T65, KP390180644 * T64); + T6i = FMA(KP1_961570560, T64, KP390180644 * T65); + { + E T68, T69, T6b, T6c; + T68 = T5y + T5z; + T69 = T5J + T5K; + T6a = T68 - T69; + T6k = T68 + T69; + T6b = T5D - T5G; + T6c = T5N - T5M; + T6d = T6b + T6c; + T6l = T6c - T6b; + } + { + E T67, T6e, T6n, T6o; + T67 = T63 + T66; + T6e = FNMS(KP1_268786568, T6d, KP1_546020906 * T6a); + R1[WS(rs, 19)] = T67 - T6e; + R1[WS(rs, 3)] = T67 + T6e; + T6n = T6h + T6i; + T6o = FMA(KP1_990369453, T6k, KP196034280 * T6l); + R1[WS(rs, 15)] = T6n - T6o; + R1[WS(rs, 31)] = T6n + T6o; + } + { + E T6f, T6g, T6j, T6m; + T6f = T63 - T66; + T6g = FMA(KP1_268786568, T6a, KP1_546020906 * T6d); + R1[WS(rs, 11)] = T6f - T6g; + R1[WS(rs, 27)] = T6f + T6g; + T6j = T6h - T6i; + T6m = FNMS(KP1_990369453, T6l, KP196034280 * T6k); + R1[WS(rs, 23)] = T6j - T6m; + R1[WS(rs, 7)] = T6j + T6m; + } + } + { + E T4X, T5b, T50, T5c, T54, T5e, T57, T5f, T4Y, T4Z; + T4X = T4V - T4W; + T5b = T4V + T4W; + T4Y = T3Q - T3X; + T4Z = T44 - T41; + T50 = FNMS(KP1_662939224, T4Z, KP1_111140466 * T4Y); + T5c = FMA(KP1_662939224, T4Y, KP1_111140466 * T4Z); + { + E T52, T53, T55, T56; + T52 = T4a - T4h; + T53 = T4A - T4z; + T54 = T52 + T53; + T5e = T52 - T53; + T55 = T4p - T4w; + T56 = T4H - T4E; + T57 = T55 + T56; + T5f = T56 - T55; + } + { + E T51, T58, T5h, T5i; + T51 = T4X + T50; + T58 = FNMS(KP942793473, T57, KP1_763842528 * T54); + R1[WS(rs, 18)] = T51 - T58; + R1[WS(rs, 2)] = T51 + T58; + T5h = T5b + T5c; + T5i = FMA(KP1_913880671, T5e, KP580569354 * T5f); + R1[WS(rs, 14)] = T5h - T5i; + R1[WS(rs, 30)] = T5h + T5i; + } + { + E T59, T5a, T5d, T5g; + T59 = T4X - T50; + T5a = FMA(KP942793473, T54, KP1_763842528 * T57); + R1[WS(rs, 10)] = T59 - T5a; + R1[WS(rs, 26)] = T59 + T5a; + T5d = T5b - T5c; + T5g = FNMS(KP1_913880671, T5f, KP580569354 * T5e); + R1[WS(rs, 22)] = T5d - T5g; + R1[WS(rs, 6)] = T5d + T5g; + } + } + } + } +} + +static const kr2c_desc desc = { 64, "r2cb_64", {342, 82, 52, 0}, &GENUS }; + +void X(codelet_r2cb_64) (planner *p) { + X(kr2c_register) (p, r2cb_64, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 7 -name r2cb_7 -include r2cb.h */ + +/* + * This function contains 24 FP additions, 22 FP multiplications, + * (or, 2 additions, 0 multiplications, 22 fused multiply/add), + * 31 stack variables, 7 constants, and 14 memory accesses + */ +#include "r2cb.h" + +static void r2cb_7(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_949855824, +1.949855824363647214036263365987862434465571601); + DK(KP1_801937735, +1.801937735804838252472204639014890102331838324); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP692021471, +0.692021471630095869627814897002069140197260599); + DK(KP801937735, +0.801937735804838252472204639014890102331838324); + DK(KP356895867, +0.356895867892209443894399510021300583399127187); + DK(KP554958132, +0.554958132087371191422194871006410481067288862); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(28, rs), MAKE_VOLATILE_STRIDE(28, csr), MAKE_VOLATILE_STRIDE(28, csi)) { + E Tn, Td, Tg, Ti, Tl, T8; + { + E T1, T9, Tb, Ta, T2, T4, Th, Tm, Tc, T3, Te; + T1 = Cr[0]; + T9 = Ci[WS(csi, 2)]; + Tb = Ci[WS(csi, 3)]; + Ta = Ci[WS(csi, 1)]; + T2 = Cr[WS(csr, 1)]; + T4 = Cr[WS(csr, 3)]; + Th = FMA(KP554958132, T9, Tb); + Tm = FMS(KP554958132, Ta, T9); + Tc = FMA(KP554958132, Tb, Ta); + T3 = Cr[WS(csr, 2)]; + Te = FNMS(KP356895867, T2, T4); + Tn = FMA(KP801937735, Tm, Tb); + { + E Tf, Tk, T7, T5, Tj, T6; + Td = FMA(KP801937735, Tc, T9); + T5 = T2 + T3 + T4; + Tj = FNMS(KP356895867, T4, T3); + T6 = FNMS(KP356895867, T3, T2); + Tf = FNMS(KP692021471, Te, T3); + R0[0] = FMA(KP2_000000000, T5, T1); + Tk = FNMS(KP692021471, Tj, T2); + T7 = FNMS(KP692021471, T6, T4); + Tg = FNMS(KP1_801937735, Tf, T1); + Ti = FNMS(KP801937735, Th, Ta); + Tl = FNMS(KP1_801937735, Tk, T1); + T8 = FNMS(KP1_801937735, T7, T1); + } + } + R1[WS(rs, 2)] = FMA(KP1_949855824, Ti, Tg); + R0[WS(rs, 1)] = FNMS(KP1_949855824, Ti, Tg); + R0[WS(rs, 2)] = FMA(KP1_949855824, Tn, Tl); + R1[WS(rs, 1)] = FNMS(KP1_949855824, Tn, Tl); + R0[WS(rs, 3)] = FMA(KP1_949855824, Td, T8); + R1[0] = FNMS(KP1_949855824, Td, T8); + } + } +} + +static const kr2c_desc desc = { 7, "r2cb_7", {2, 0, 22, 0}, &GENUS }; + +void X(codelet_r2cb_7) (planner *p) { + X(kr2c_register) (p, r2cb_7, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 7 -name r2cb_7 -include r2cb.h */ + +/* + * This function contains 24 FP additions, 19 FP multiplications, + * (or, 11 additions, 6 multiplications, 13 fused multiply/add), + * 21 stack variables, 7 constants, and 14 memory accesses + */ +#include "r2cb.h" + +static void r2cb_7(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_801937735, +1.801937735804838252472204639014890102331838324); + DK(KP445041867, +0.445041867912628808577805128993589518932711138); + DK(KP1_246979603, +1.246979603717467061050009768008479621264549462); + DK(KP867767478, +0.867767478235116240951536665696717509219981456); + DK(KP1_949855824, +1.949855824363647214036263365987862434465571601); + DK(KP1_563662964, +1.563662964936059617416889053348115500464669037); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(28, rs), MAKE_VOLATILE_STRIDE(28, csr), MAKE_VOLATILE_STRIDE(28, csi)) { + E T9, Td, Tb, T1, T4, T2, T3, T5, Tc, Ta, T6, T8, T7; + T6 = Ci[WS(csi, 2)]; + T8 = Ci[WS(csi, 1)]; + T7 = Ci[WS(csi, 3)]; + T9 = FNMS(KP1_949855824, T7, KP1_563662964 * T6) - (KP867767478 * T8); + Td = FMA(KP867767478, T6, KP1_563662964 * T7) - (KP1_949855824 * T8); + Tb = FMA(KP1_563662964, T8, KP1_949855824 * T6) + (KP867767478 * T7); + T1 = Cr[0]; + T4 = Cr[WS(csr, 3)]; + T2 = Cr[WS(csr, 1)]; + T3 = Cr[WS(csr, 2)]; + T5 = FMA(KP1_246979603, T3, T1) + FNMA(KP445041867, T4, KP1_801937735 * T2); + Tc = FMA(KP1_246979603, T4, T1) + FNMA(KP1_801937735, T3, KP445041867 * T2); + Ta = FMA(KP1_246979603, T2, T1) + FNMA(KP1_801937735, T4, KP445041867 * T3); + R0[WS(rs, 2)] = T5 - T9; + R1[WS(rs, 1)] = T5 + T9; + R0[WS(rs, 1)] = Tc + Td; + R1[WS(rs, 2)] = Tc - Td; + R0[WS(rs, 3)] = Ta + Tb; + R1[0] = Ta - Tb; + R0[0] = FMA(KP2_000000000, T2 + T3 + T4, T1); + } + } +} + +static const kr2c_desc desc = { 7, "r2cb_7", {11, 6, 13, 0}, &GENUS }; + +void X(codelet_r2cb_7) (planner *p) { + X(kr2c_register) (p, r2cb_7, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 8 -name r2cb_8 -include r2cb.h */ + +/* + * This function contains 20 FP additions, 12 FP multiplications, + * (or, 8 additions, 0 multiplications, 12 fused multiply/add), + * 19 stack variables, 2 constants, and 16 memory accesses + */ +#include "r2cb.h" + +static void r2cb_8(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(32, rs), MAKE_VOLATILE_STRIDE(32, csr), MAKE_VOLATILE_STRIDE(32, csi)) { + E Th, Tb, Tg, Ti; + { + E T4, Ta, Td, T9, T3, Tc, T8, Te; + T4 = Cr[WS(csr, 2)]; + Ta = Ci[WS(csi, 2)]; + { + E T1, T2, T6, T7; + T1 = Cr[0]; + T2 = Cr[WS(csr, 4)]; + T6 = Cr[WS(csr, 1)]; + T7 = Cr[WS(csr, 3)]; + Td = Ci[WS(csi, 1)]; + T9 = T1 - T2; + T3 = T1 + T2; + Tc = T6 - T7; + T8 = T6 + T7; + Te = Ci[WS(csi, 3)]; + } + { + E Tj, T5, Tk, Tf; + Tj = FNMS(KP2_000000000, T4, T3); + T5 = FMA(KP2_000000000, T4, T3); + Th = FMA(KP2_000000000, Ta, T9); + Tb = FNMS(KP2_000000000, Ta, T9); + Tk = Td - Te; + Tf = Td + Te; + R0[0] = FMA(KP2_000000000, T8, T5); + R0[WS(rs, 2)] = FNMS(KP2_000000000, T8, T5); + R0[WS(rs, 3)] = FMA(KP2_000000000, Tk, Tj); + R0[WS(rs, 1)] = FNMS(KP2_000000000, Tk, Tj); + Tg = Tc - Tf; + Ti = Tc + Tf; + } + } + R1[0] = FMA(KP1_414213562, Tg, Tb); + R1[WS(rs, 2)] = FNMS(KP1_414213562, Tg, Tb); + R1[WS(rs, 3)] = FMA(KP1_414213562, Ti, Th); + R1[WS(rs, 1)] = FNMS(KP1_414213562, Ti, Th); + } + } +} + +static const kr2c_desc desc = { 8, "r2cb_8", {8, 0, 12, 0}, &GENUS }; + +void X(codelet_r2cb_8) (planner *p) { + X(kr2c_register) (p, r2cb_8, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 8 -name r2cb_8 -include r2cb.h */ + +/* + * This function contains 20 FP additions, 6 FP multiplications, + * (or, 20 additions, 6 multiplications, 0 fused multiply/add), + * 21 stack variables, 2 constants, and 16 memory accesses + */ +#include "r2cb.h" + +static void r2cb_8(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(32, rs), MAKE_VOLATILE_STRIDE(32, csr), MAKE_VOLATILE_STRIDE(32, csi)) { + E T5, Tg, T3, Te, T9, Ti, Td, Tj, T6, Ta; + { + E T4, Tf, T1, T2; + T4 = Cr[WS(csr, 2)]; + T5 = KP2_000000000 * T4; + Tf = Ci[WS(csi, 2)]; + Tg = KP2_000000000 * Tf; + T1 = Cr[0]; + T2 = Cr[WS(csr, 4)]; + T3 = T1 + T2; + Te = T1 - T2; + { + E T7, T8, Tb, Tc; + T7 = Cr[WS(csr, 1)]; + T8 = Cr[WS(csr, 3)]; + T9 = KP2_000000000 * (T7 + T8); + Ti = T7 - T8; + Tb = Ci[WS(csi, 1)]; + Tc = Ci[WS(csi, 3)]; + Td = KP2_000000000 * (Tb - Tc); + Tj = Tb + Tc; + } + } + T6 = T3 + T5; + R0[WS(rs, 2)] = T6 - T9; + R0[0] = T6 + T9; + Ta = T3 - T5; + R0[WS(rs, 1)] = Ta - Td; + R0[WS(rs, 3)] = Ta + Td; + { + E Th, Tk, Tl, Tm; + Th = Te - Tg; + Tk = KP1_414213562 * (Ti - Tj); + R1[WS(rs, 2)] = Th - Tk; + R1[0] = Th + Tk; + Tl = Te + Tg; + Tm = KP1_414213562 * (Ti + Tj); + R1[WS(rs, 1)] = Tl - Tm; + R1[WS(rs, 3)] = Tl + Tm; + } + } + } +} + +static const kr2c_desc desc = { 8, "r2cb_8", {20, 6, 0, 0}, &GENUS }; + +void X(codelet_r2cb_8) (planner *p) { + X(kr2c_register) (p, r2cb_8, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cb/r2cb_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:50:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 9 -name r2cb_9 -include r2cb.h */ + +/* + * This function contains 32 FP additions, 24 FP multiplications, + * (or, 8 additions, 0 multiplications, 24 fused multiply/add), + * 40 stack variables, 12 constants, and 18 memory accesses + */ +#include "r2cb.h" + +static void r2cb_9(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_326827896, +1.326827896337876792410842639271782594433726619); + DK(KP1_705737063, +1.705737063904886419256501927880148143872040591); + DK(KP766044443, +0.766044443118978035202392650555416673935832457); + DK(KP1_532088886, +1.532088886237956070404785301110833347871664914); + DK(KP984807753, +0.984807753012208059366743024589523013670643252); + DK(KP1_969615506, +1.969615506024416118733486049179046027341286503); + DK(KP839099631, +0.839099631177280011763127298123181364687434283); + DK(KP176326980, +0.176326980708464973471090386868618986121633062); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(36, rs), MAKE_VOLATILE_STRIDE(36, csr), MAKE_VOLATILE_STRIDE(36, csi)) { + E T4, Th, T3, Tb, Tp, Tk, T7, Tf, Ti, Ta, T1, T2; + Ta = Ci[WS(csi, 3)]; + T1 = Cr[0]; + T2 = Cr[WS(csr, 3)]; + T4 = Cr[WS(csr, 1)]; + Th = Ci[WS(csi, 1)]; + { + E T5, T9, T6, Td, Te; + T5 = Cr[WS(csr, 4)]; + T9 = T1 - T2; + T3 = FMA(KP2_000000000, T2, T1); + T6 = Cr[WS(csr, 2)]; + Td = Ci[WS(csi, 4)]; + Te = Ci[WS(csi, 2)]; + Tb = FNMS(KP1_732050807, Ta, T9); + Tp = FMA(KP1_732050807, Ta, T9); + Tk = T6 - T5; + T7 = T5 + T6; + Tf = Td + Te; + Ti = Td - Te; + } + { + E Tu, To, Tt, Tn, Tc, T8; + Tc = FNMS(KP500000000, T7, T4); + T8 = T4 + T7; + { + E Tw, Tj, Tr, Tg, Tv; + Tw = Ti + Th; + Tj = FNMS(KP500000000, Ti, Th); + Tr = FMA(KP866025403, Tf, Tc); + Tg = FNMS(KP866025403, Tf, Tc); + Tv = T3 - T8; + R0[0] = FMA(KP2_000000000, T8, T3); + { + E Tq, Tl, Ts, Tm; + Tq = FMA(KP866025403, Tk, Tj); + Tl = FNMS(KP866025403, Tk, Tj); + R0[WS(rs, 3)] = FMA(KP1_732050807, Tw, Tv); + R1[WS(rs, 1)] = FNMS(KP1_732050807, Tw, Tv); + Ts = FNMS(KP176326980, Tr, Tq); + Tu = FMA(KP176326980, Tq, Tr); + Tm = FNMS(KP839099631, Tl, Tg); + To = FMA(KP839099631, Tg, Tl); + R0[WS(rs, 1)] = FNMS(KP1_969615506, Ts, Tp); + Tt = FMA(KP984807753, Ts, Tp); + R1[0] = FMA(KP1_532088886, Tm, Tb); + Tn = FNMS(KP766044443, Tm, Tb); + } + } + R1[WS(rs, 2)] = FNMS(KP1_705737063, Tu, Tt); + R0[WS(rs, 4)] = FMA(KP1_705737063, Tu, Tt); + R0[WS(rs, 2)] = FNMS(KP1_326827896, To, Tn); + R1[WS(rs, 3)] = FMA(KP1_326827896, To, Tn); + } + } + } +} + +static const kr2c_desc desc = { 9, "r2cb_9", {8, 0, 24, 0}, &GENUS }; + +void X(codelet_r2cb_9) (planner *p) { + X(kr2c_register) (p, r2cb_9, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 9 -name r2cb_9 -include r2cb.h */ + +/* + * This function contains 32 FP additions, 18 FP multiplications, + * (or, 22 additions, 8 multiplications, 10 fused multiply/add), + * 35 stack variables, 12 constants, and 18 memory accesses + */ +#include "r2cb.h" + +static void r2cb_9(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP984807753, +0.984807753012208059366743024589523013670643252); + DK(KP173648177, +0.173648177666930348851716626769314796000375677); + DK(KP300767466, +0.300767466360870593278543795225003852144476517); + DK(KP1_705737063, +1.705737063904886419256501927880148143872040591); + DK(KP642787609, +0.642787609686539326322643409907263432907559884); + DK(KP766044443, +0.766044443118978035202392650555416673935832457); + DK(KP1_326827896, +1.326827896337876792410842639271782594433726619); + DK(KP1_113340798, +1.113340798452838732905825904094046265936583811); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(36, rs), MAKE_VOLATILE_STRIDE(36, csr), MAKE_VOLATILE_STRIDE(36, csi)) { + E T3, Tq, Tc, Tk, Tj, T8, Tm, Ts, Th, Tr, Tw, Tx; + { + E Tb, T1, T2, T9, Ta; + Ta = Ci[WS(csi, 3)]; + Tb = KP1_732050807 * Ta; + T1 = Cr[0]; + T2 = Cr[WS(csr, 3)]; + T9 = T1 - T2; + T3 = FMA(KP2_000000000, T2, T1); + Tq = T9 + Tb; + Tc = T9 - Tb; + } + { + E T4, T7, Ti, Tg, Tl, Td; + T4 = Cr[WS(csr, 1)]; + Tk = Ci[WS(csi, 1)]; + { + E T5, T6, Te, Tf; + T5 = Cr[WS(csr, 4)]; + T6 = Cr[WS(csr, 2)]; + T7 = T5 + T6; + Ti = KP866025403 * (T5 - T6); + Te = Ci[WS(csi, 4)]; + Tf = Ci[WS(csi, 2)]; + Tg = KP866025403 * (Te + Tf); + Tj = Tf - Te; + } + T8 = T4 + T7; + Tl = FMA(KP500000000, Tj, Tk); + Tm = Ti + Tl; + Ts = Tl - Ti; + Td = FNMS(KP500000000, T7, T4); + Th = Td - Tg; + Tr = Td + Tg; + } + R0[0] = FMA(KP2_000000000, T8, T3); + Tw = T3 - T8; + Tx = KP1_732050807 * (Tk - Tj); + R1[WS(rs, 1)] = Tw - Tx; + R0[WS(rs, 3)] = Tw + Tx; + { + E Tp, Tn, To, Tv, Tt, Tu; + Tp = FMA(KP1_113340798, Th, KP1_326827896 * Tm); + Tn = FNMS(KP642787609, Tm, KP766044443 * Th); + To = Tc - Tn; + R1[0] = FMA(KP2_000000000, Tn, Tc); + R1[WS(rs, 3)] = To + Tp; + R0[WS(rs, 2)] = To - Tp; + Tv = FMA(KP1_705737063, Tr, KP300767466 * Ts); + Tt = FNMS(KP984807753, Ts, KP173648177 * Tr); + Tu = Tq - Tt; + R0[WS(rs, 1)] = FMA(KP2_000000000, Tt, Tq); + R0[WS(rs, 4)] = Tu + Tv; + R1[WS(rs, 2)] = Tu - Tv; + } + } + } +} + +static const kr2c_desc desc = { 9, "r2cb_9", {22, 8, 10, 0}, &GENUS }; + +void X(codelet_r2cb_9) (planner *p) { + X(kr2c_register) (p, r2cb_9, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cbIII.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cbIII.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#define GENUS X(rdft_r2cbIII_genus) +extern const kr2c_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#define GENUS X(rdft_r2cf_genus) +extern const kr2c_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,110 @@ +# This Makefile.am specifies a set of codelets, efficient transforms +# of small sizes, that are used as building blocks (kernels) by FFTW +# to build up large transforms, as well as the options for generating +# and compiling them. + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +########################################################################### +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/rdft \ +-I$(top_srcdir)/rdft/scalar +noinst_LTLIBRARIES = librdft_scalar_r2cf.la + +########################################################################### +# r2cf_ is a hard-coded real-to-complex FFT of size (base cases +# of real-input FFT recursion) +R2CF = r2cf_2.c r2cf_3.c r2cf_4.c r2cf_5.c r2cf_6.c r2cf_7.c r2cf_8.c \ +r2cf_9.c r2cf_10.c r2cf_11.c r2cf_12.c r2cf_13.c r2cf_14.c r2cf_15.c \ +r2cf_16.c r2cf_32.c r2cf_64.c r2cf_128.c \ +r2cf_20.c r2cf_25.c # r2cf_30.c r2cf_40.c r2cf_50.c + +########################################################################### +# hf_ is a "twiddle" FFT of size , implementing a radix-r DIT +# step for a real-input FFT. Every hf codelet must have a +# corresponding r2cfII codelet (see below)! +HF = hf_2.c hf_3.c hf_4.c hf_5.c hf_6.c hf_7.c hf_8.c hf_9.c \ +hf_10.c hf_12.c hf_15.c hf_16.c hf_32.c hf_64.c \ +hf_20.c hf_25.c # hf_30.c hf_40.c hf_50.c + +# like hf, but generates part of its trig table on the fly (good for large n) +HF2 = hf2_4.c hf2_8.c hf2_16.c hf2_32.c \ +hf2_5.c hf2_20.c hf2_25.c + +# an r2cf transform where the input is shifted by half a sample (output +# is multiplied by a phase). This is needed as part of the DIT recursion; +# every hf_ or hf2_ codelet should have a corresponding r2cfII_ +R2CFII = r2cfII_2.c r2cfII_3.c r2cfII_4.c r2cfII_5.c r2cfII_6.c \ +r2cfII_7.c r2cfII_8.c r2cfII_9.c r2cfII_10.c r2cfII_12.c r2cfII_15.c \ +r2cfII_16.c r2cfII_32.c r2cfII_64.c \ +r2cfII_20.c r2cfII_25.c # r2cfII_30.c r2cfII_40.c r2cfII_50.c + +########################################################################### +# hc2cf_ is a "twiddle" FFT of size , implementing a radix-r DIT +# step for a real-input FFT with rdft2-style output. must be even. +HC2CF = hc2cf_2.c hc2cf_4.c hc2cf_6.c hc2cf_8.c hc2cf_10.c hc2cf_12.c \ +hc2cf_16.c hc2cf_32.c \ +hc2cf_20.c # hc2cf_30.c + +HC2CFDFT = hc2cfdft_2.c hc2cfdft_4.c hc2cfdft_6.c hc2cfdft_8.c \ +hc2cfdft_10.c hc2cfdft_12.c hc2cfdft_16.c hc2cfdft_32.c \ +hc2cfdft_20.c # hc2cfdft_30.c + +# like hc2cf, but generates part of its trig table on the fly (good +# for large n) +HC2CF2 = hc2cf2_4.c hc2cf2_8.c hc2cf2_16.c hc2cf2_32.c \ +hc2cf2_20.c # hc2cf2_30.c +HC2CFDFT2 = hc2cfdft2_4.c hc2cfdft2_8.c hc2cfdft2_16.c hc2cfdft2_32.c \ +hc2cfdft2_20.c # hc2cfdft2_30.c + +########################################################################### +ALL_CODELETS = $(R2CF) $(HF) $(HF2) $(R2CFII) $(HC2CF) $(HC2CF2) \ +$(HC2CFDFT) $(HC2CFDFT2) + +BUILT_SOURCES= $(ALL_CODELETS) $(CODLIST) + +librdft_scalar_r2cf_la_SOURCES = $(BUILT_SOURCES) + +SOLVTAB_NAME = X(solvtab_rdft_r2cf) +XRENAME=X + +# special rules for regenerating codelets. +include $(top_srcdir)/support/Makefile.codelets + +if MAINTAINER_MODE +FLAGS_R2CF=$(RDFT_FLAGS_COMMON) +FLAGS_HF=$(RDFT_FLAGS_COMMON) +FLAGS_HF2=$(RDFT_FLAGS_COMMON) -twiddle-log3 -precompute-twiddles +FLAGS_HC2CF=$(RDFT_FLAGS_COMMON) +FLAGS_HC2CF2=$(RDFT_FLAGS_COMMON) -twiddle-log3 -precompute-twiddles +FLAGS_R2CFII=$(RDFT_FLAGS_COMMON) + +r2cf_%.c: $(CODELET_DEPS) $(GEN_R2CF) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2CF) $(FLAGS_R2CF) -n $* -name r2cf_$* -include "r2cf.h") | $(ADD_DATE) | $(INDENT) >$@ + +hf_%.c: $(CODELET_DEPS) $(GEN_HC2HC) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2HC) $(FLAGS_HF) -n $* -dit -name hf_$* -include "hf.h") | $(ADD_DATE) | $(INDENT) >$@ + +hf2_%.c: $(CODELET_DEPS) $(GEN_HC2HC) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2HC) $(FLAGS_HF2) -n $* -dit -name hf2_$* -include "hf.h") | $(ADD_DATE) | $(INDENT) >$@ + +r2cfII_%.c: $(CODELET_DEPS) $(GEN_R2CF) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2CF) $(FLAGS_R2CF) -n $* -name r2cfII_$* -dft-II -include "r2cfII.h") | $(ADD_DATE) | $(INDENT) >$@ + +hc2cf_%.c: $(CODELET_DEPS) $(GEN_HC2C) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2C) $(FLAGS_HC2CF) -n $* -dit -name hc2cf_$* -include "hc2cf.h") | $(ADD_DATE) | $(INDENT) >$@ + +hc2cf2_%.c: $(CODELET_DEPS) $(GEN_HC2C) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2C) $(FLAGS_HC2CF2) -n $* -dit -name hc2cf2_$* -include "hc2cf.h") | $(ADD_DATE) | $(INDENT) >$@ + +hc2cfdft_%.c: $(CODELET_DEPS) $(GEN_HC2CDFT) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2CDFT) $(FLAGS_HC2CF) -n $* -dit -name hc2cfdft_$* -include "hc2cf.h") | $(ADD_DATE) | $(INDENT) >$@ + +hc2cfdft2_%.c: $(CODELET_DEPS) $(GEN_HC2CDFT) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2CDFT) $(FLAGS_HC2CF2) -n $* -dit -name hc2cfdft2_$* -include "hc2cf.h") | $(ADD_DATE) | $(INDENT) >$@ + +endif # MAINTAINER_MODE diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,907 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# This Makefile.am specifies a set of codelets, efficient transforms +# of small sizes, that are used as building blocks (kernels) by FFTW +# to build up large transforms, as well as the options for generating +# and compiling them. + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +# -*- makefile -*- +# This file contains special make rules to generate codelets. +# Most of this file requires GNU make . + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +DIST_COMMON = $(top_srcdir)/support/Makefile.codelets \ + $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp +subdir = rdft/scalar/r2cf +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +librdft_scalar_r2cf_la_LIBADD = +am__objects_1 = r2cf_2.lo r2cf_3.lo r2cf_4.lo r2cf_5.lo r2cf_6.lo \ + r2cf_7.lo r2cf_8.lo r2cf_9.lo r2cf_10.lo r2cf_11.lo r2cf_12.lo \ + r2cf_13.lo r2cf_14.lo r2cf_15.lo r2cf_16.lo r2cf_32.lo \ + r2cf_64.lo r2cf_128.lo r2cf_20.lo r2cf_25.lo +am__objects_2 = hf_2.lo hf_3.lo hf_4.lo hf_5.lo hf_6.lo hf_7.lo \ + hf_8.lo hf_9.lo hf_10.lo hf_12.lo hf_15.lo hf_16.lo hf_32.lo \ + hf_64.lo hf_20.lo hf_25.lo +am__objects_3 = hf2_4.lo hf2_8.lo hf2_16.lo hf2_32.lo hf2_5.lo \ + hf2_20.lo hf2_25.lo +am__objects_4 = r2cfII_2.lo r2cfII_3.lo r2cfII_4.lo r2cfII_5.lo \ + r2cfII_6.lo r2cfII_7.lo r2cfII_8.lo r2cfII_9.lo r2cfII_10.lo \ + r2cfII_12.lo r2cfII_15.lo r2cfII_16.lo r2cfII_32.lo \ + r2cfII_64.lo r2cfII_20.lo r2cfII_25.lo +am__objects_5 = hc2cf_2.lo hc2cf_4.lo hc2cf_6.lo hc2cf_8.lo \ + hc2cf_10.lo hc2cf_12.lo hc2cf_16.lo hc2cf_32.lo hc2cf_20.lo +am__objects_6 = hc2cf2_4.lo hc2cf2_8.lo hc2cf2_16.lo hc2cf2_32.lo \ + hc2cf2_20.lo +am__objects_7 = hc2cfdft_2.lo hc2cfdft_4.lo hc2cfdft_6.lo \ + hc2cfdft_8.lo hc2cfdft_10.lo hc2cfdft_12.lo hc2cfdft_16.lo \ + hc2cfdft_32.lo hc2cfdft_20.lo +am__objects_8 = hc2cfdft2_4.lo hc2cfdft2_8.lo hc2cfdft2_16.lo \ + hc2cfdft2_32.lo hc2cfdft2_20.lo +am__objects_9 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \ + $(am__objects_4) $(am__objects_5) $(am__objects_6) \ + $(am__objects_7) $(am__objects_8) +am__objects_10 = codlist.lo +am__objects_11 = $(am__objects_9) $(am__objects_10) +am_librdft_scalar_r2cf_la_OBJECTS = $(am__objects_11) +librdft_scalar_r2cf_la_OBJECTS = $(am_librdft_scalar_r2cf_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(librdft_scalar_r2cf_la_SOURCES) +DIST_SOURCES = $(librdft_scalar_r2cf_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ + +########################################################################### +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/rdft \ +-I$(top_srcdir)/rdft/scalar + +noinst_LTLIBRARIES = librdft_scalar_r2cf.la + +########################################################################### +# r2cf_ is a hard-coded real-to-complex FFT of size (base cases +# of real-input FFT recursion) +R2CF = r2cf_2.c r2cf_3.c r2cf_4.c r2cf_5.c r2cf_6.c r2cf_7.c r2cf_8.c \ +r2cf_9.c r2cf_10.c r2cf_11.c r2cf_12.c r2cf_13.c r2cf_14.c r2cf_15.c \ +r2cf_16.c r2cf_32.c r2cf_64.c r2cf_128.c \ +r2cf_20.c r2cf_25.c # r2cf_30.c r2cf_40.c r2cf_50.c + + +########################################################################### +# hf_ is a "twiddle" FFT of size , implementing a radix-r DIT +# step for a real-input FFT. Every hf codelet must have a +# corresponding r2cfII codelet (see below)! +HF = hf_2.c hf_3.c hf_4.c hf_5.c hf_6.c hf_7.c hf_8.c hf_9.c \ +hf_10.c hf_12.c hf_15.c hf_16.c hf_32.c hf_64.c \ +hf_20.c hf_25.c # hf_30.c hf_40.c hf_50.c + + +# like hf, but generates part of its trig table on the fly (good for large n) +HF2 = hf2_4.c hf2_8.c hf2_16.c hf2_32.c \ +hf2_5.c hf2_20.c hf2_25.c + + +# an r2cf transform where the input is shifted by half a sample (output +# is multiplied by a phase). This is needed as part of the DIT recursion; +# every hf_ or hf2_ codelet should have a corresponding r2cfII_ +R2CFII = r2cfII_2.c r2cfII_3.c r2cfII_4.c r2cfII_5.c r2cfII_6.c \ +r2cfII_7.c r2cfII_8.c r2cfII_9.c r2cfII_10.c r2cfII_12.c r2cfII_15.c \ +r2cfII_16.c r2cfII_32.c r2cfII_64.c \ +r2cfII_20.c r2cfII_25.c # r2cfII_30.c r2cfII_40.c r2cfII_50.c + + +########################################################################### +# hc2cf_ is a "twiddle" FFT of size , implementing a radix-r DIT +# step for a real-input FFT with rdft2-style output. must be even. +HC2CF = hc2cf_2.c hc2cf_4.c hc2cf_6.c hc2cf_8.c hc2cf_10.c hc2cf_12.c \ +hc2cf_16.c hc2cf_32.c \ +hc2cf_20.c # hc2cf_30.c + +HC2CFDFT = hc2cfdft_2.c hc2cfdft_4.c hc2cfdft_6.c hc2cfdft_8.c \ +hc2cfdft_10.c hc2cfdft_12.c hc2cfdft_16.c hc2cfdft_32.c \ +hc2cfdft_20.c # hc2cfdft_30.c + + +# like hc2cf, but generates part of its trig table on the fly (good +# for large n) +HC2CF2 = hc2cf2_4.c hc2cf2_8.c hc2cf2_16.c hc2cf2_32.c \ +hc2cf2_20.c # hc2cf2_30.c + +HC2CFDFT2 = hc2cfdft2_4.c hc2cfdft2_8.c hc2cfdft2_16.c hc2cfdft2_32.c \ +hc2cfdft2_20.c # hc2cfdft2_30.c + + +########################################################################### +ALL_CODELETS = $(R2CF) $(HF) $(HF2) $(R2CFII) $(HC2CF) $(HC2CF2) \ +$(HC2CFDFT) $(HC2CFDFT2) + +BUILT_SOURCES = $(ALL_CODELETS) $(CODLIST) +librdft_scalar_r2cf_la_SOURCES = $(BUILT_SOURCES) +SOLVTAB_NAME = X(solvtab_rdft_r2cf) +XRENAME = X +CODLIST = codlist.c +CODELET_NAME = codelet_ +@MAINTAINER_MODE_TRUE@INDENT = indent -kr -cs -i5 -l800 -fca -nfc1 -sc -sob -cli4 -TR -Tplanner -TV +@MAINTAINER_MODE_TRUE@TWOVERS = sh ${top_srcdir}/support/twovers.sh +@MAINTAINER_MODE_TRUE@GENFFTDIR = ${top_builddir}/genfft +@MAINTAINER_MODE_TRUE@GEN_NOTW = ${GENFFTDIR}/gen_notw.native +@MAINTAINER_MODE_TRUE@GEN_NOTW_C = ${GENFFTDIR}/gen_notw_c.native +@MAINTAINER_MODE_TRUE@GEN_TWIDDLE = ${GENFFTDIR}/gen_twiddle.native +@MAINTAINER_MODE_TRUE@GEN_TWIDDLE_C = ${GENFFTDIR}/gen_twiddle_c.native +@MAINTAINER_MODE_TRUE@GEN_TWIDSQ = ${GENFFTDIR}/gen_twidsq.native +@MAINTAINER_MODE_TRUE@GEN_TWIDSQ_C = ${GENFFTDIR}/gen_twidsq_c.native +@MAINTAINER_MODE_TRUE@GEN_R2CF = ${GENFFTDIR}/gen_r2cf.native +@MAINTAINER_MODE_TRUE@GEN_R2CB = ${GENFFTDIR}/gen_r2cb.native +@MAINTAINER_MODE_TRUE@GEN_HC2HC = ${GENFFTDIR}/gen_hc2hc.native +@MAINTAINER_MODE_TRUE@GEN_HC2C = ${GENFFTDIR}/gen_hc2c.native +@MAINTAINER_MODE_TRUE@GEN_HC2CDFT = ${GENFFTDIR}/gen_hc2cdft.native +@MAINTAINER_MODE_TRUE@GEN_HC2CDFT_C = ${GENFFTDIR}/gen_hc2cdft_c.native +@MAINTAINER_MODE_TRUE@GEN_R2R = ${GENFFTDIR}/gen_r2r.native +@MAINTAINER_MODE_TRUE@PRELUDE_DFT = ${top_srcdir}/support/codelet_prelude.dft +@MAINTAINER_MODE_TRUE@PRELUDE_RDFT = ${top_srcdir}/support/codelet_prelude.rdft +@MAINTAINER_MODE_TRUE@ADD_DATE = sed -e s/@DATE@/"`date`"/ +@MAINTAINER_MODE_TRUE@COPYRIGHT = ${top_srcdir}/COPYRIGHT +@MAINTAINER_MODE_TRUE@CODELET_DEPS = $(COPYRIGHT) $(PRELUDE) +@MAINTAINER_MODE_TRUE@PRELUDE_COMMANDS_DFT = cat $(COPYRIGHT) $(PRELUDE_DFT) +@MAINTAINER_MODE_TRUE@PRELUDE_COMMANDS_RDFT = cat $(COPYRIGHT) $(PRELUDE_RDFT) +@MAINTAINER_MODE_TRUE@FLAGS_COMMON = -compact -variables 4 +@MAINTAINER_MODE_TRUE@DFT_FLAGS_COMMON = $(FLAGS_COMMON) -pipeline-latency 4 +@MAINTAINER_MODE_TRUE@RDFT_FLAGS_COMMON = $(FLAGS_COMMON) -pipeline-latency 4 + +# special rules for regenerating codelets. +@MAINTAINER_MODE_TRUE@FLAGS_R2CF = $(RDFT_FLAGS_COMMON) +@MAINTAINER_MODE_TRUE@FLAGS_HF = $(RDFT_FLAGS_COMMON) +@MAINTAINER_MODE_TRUE@FLAGS_HF2 = $(RDFT_FLAGS_COMMON) -twiddle-log3 -precompute-twiddles +@MAINTAINER_MODE_TRUE@FLAGS_HC2CF = $(RDFT_FLAGS_COMMON) +@MAINTAINER_MODE_TRUE@FLAGS_HC2CF2 = $(RDFT_FLAGS_COMMON) -twiddle-log3 -precompute-twiddles +@MAINTAINER_MODE_TRUE@FLAGS_R2CFII = $(RDFT_FLAGS_COMMON) +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/support/Makefile.codelets $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu rdft/scalar/r2cf/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu rdft/scalar/r2cf/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(top_srcdir)/support/Makefile.codelets: + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +librdft_scalar_r2cf.la: $(librdft_scalar_r2cf_la_OBJECTS) $(librdft_scalar_r2cf_la_DEPENDENCIES) $(EXTRA_librdft_scalar_r2cf_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(librdft_scalar_r2cf_la_OBJECTS) $(librdft_scalar_r2cf_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/codlist.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cf2_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cf2_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cf2_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cf2_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cf2_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cf_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cf_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cf_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cf_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cf_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cf_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cf_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cf_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cf_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdft2_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdft2_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdft2_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdft2_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdft2_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdft_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdft_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdft_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdft_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdft_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdft_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdft_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdft_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdft_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf2_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf2_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf2_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf2_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf2_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf2_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf2_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hf_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cfII_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cfII_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cfII_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cfII_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cfII_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cfII_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cfII_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cfII_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cfII_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cfII_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cfII_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cfII_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cfII_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cfII_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cfII_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cfII_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_11.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_128.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_13.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_25.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_64.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r2cf_9.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic \ + maintainer-clean-local + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: all check install install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic maintainer-clean-local mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am + + +# rule to build codlist +$(CODLIST): Makefile + ( \ + echo "#include \"ifftw.h\""; \ + echo $(INCLUDE_SIMD_HEADER); \ + echo; \ + for i in $(ALL_CODELETS) NIL; do \ + if test "$$i" != NIL; then \ + j=`basename $$i | sed -e 's/[.][cS]$$//g'`; \ + echo "extern void $(XRENAME)($(CODELET_NAME)$$j)(planner *);"; \ + fi \ + done; \ + echo; \ + echo; \ + echo "extern const solvtab $(SOLVTAB_NAME);"; \ + echo "const solvtab $(SOLVTAB_NAME) = {"; \ + for i in $(ALL_CODELETS) NIL; do \ + if test "$$i" != NIL; then \ + j=`basename $$i | sed -e 's/[.][cS]$$//g'`; \ + echo " SOLVTAB($(XRENAME)($(CODELET_NAME)$$j)),"; \ + fi \ + done; \ + echo " SOLVTAB_END"; \ + echo "};"; \ + ) >$@ + +# only delete codlist.c in maintainer-mode, since it is included in the dist +# FIXME: is there a way to delete in 'make clean' only when builddir != srcdir? +maintainer-clean-local: + rm -f $(CODLIST) + +# cancel the hideous builtin rules that cause an infinite loop +@MAINTAINER_MODE_TRUE@%: %.o +@MAINTAINER_MODE_TRUE@%: %.s +@MAINTAINER_MODE_TRUE@%: %.c +@MAINTAINER_MODE_TRUE@%: %.S + +@MAINTAINER_MODE_TRUE@r2cf_%.c: $(CODELET_DEPS) $(GEN_R2CF) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2CF) $(FLAGS_R2CF) -n $* -name r2cf_$* -include "r2cf.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@hf_%.c: $(CODELET_DEPS) $(GEN_HC2HC) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2HC) $(FLAGS_HF) -n $* -dit -name hf_$* -include "hf.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@hf2_%.c: $(CODELET_DEPS) $(GEN_HC2HC) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2HC) $(FLAGS_HF2) -n $* -dit -name hf2_$* -include "hf.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@r2cfII_%.c: $(CODELET_DEPS) $(GEN_R2CF) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2CF) $(FLAGS_R2CF) -n $* -name r2cfII_$* -dft-II -include "r2cfII.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@hc2cf_%.c: $(CODELET_DEPS) $(GEN_HC2C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2C) $(FLAGS_HC2CF) -n $* -dit -name hc2cf_$* -include "hc2cf.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@hc2cf2_%.c: $(CODELET_DEPS) $(GEN_HC2C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2C) $(FLAGS_HC2CF2) -n $* -dit -name hc2cf2_$* -include "hc2cf.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@hc2cfdft_%.c: $(CODELET_DEPS) $(GEN_HC2CDFT) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2CDFT) $(FLAGS_HC2CF) -n $* -dit -name hc2cfdft_$* -include "hc2cf.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@hc2cfdft2_%.c: $(CODELET_DEPS) $(GEN_HC2CDFT) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2CDFT) $(FLAGS_HC2CF2) -n $* -dit -name hc2cfdft2_$* -include "hc2cf.h") | $(ADD_DATE) | $(INDENT) >$@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/codlist.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/codlist.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,183 @@ +#include "ifftw.h" + + +extern void X(codelet_r2cf_2)(planner *); +extern void X(codelet_r2cf_3)(planner *); +extern void X(codelet_r2cf_4)(planner *); +extern void X(codelet_r2cf_5)(planner *); +extern void X(codelet_r2cf_6)(planner *); +extern void X(codelet_r2cf_7)(planner *); +extern void X(codelet_r2cf_8)(planner *); +extern void X(codelet_r2cf_9)(planner *); +extern void X(codelet_r2cf_10)(planner *); +extern void X(codelet_r2cf_11)(planner *); +extern void X(codelet_r2cf_12)(planner *); +extern void X(codelet_r2cf_13)(planner *); +extern void X(codelet_r2cf_14)(planner *); +extern void X(codelet_r2cf_15)(planner *); +extern void X(codelet_r2cf_16)(planner *); +extern void X(codelet_r2cf_32)(planner *); +extern void X(codelet_r2cf_64)(planner *); +extern void X(codelet_r2cf_128)(planner *); +extern void X(codelet_r2cf_20)(planner *); +extern void X(codelet_r2cf_25)(planner *); +extern void X(codelet_hf_2)(planner *); +extern void X(codelet_hf_3)(planner *); +extern void X(codelet_hf_4)(planner *); +extern void X(codelet_hf_5)(planner *); +extern void X(codelet_hf_6)(planner *); +extern void X(codelet_hf_7)(planner *); +extern void X(codelet_hf_8)(planner *); +extern void X(codelet_hf_9)(planner *); +extern void X(codelet_hf_10)(planner *); +extern void X(codelet_hf_12)(planner *); +extern void X(codelet_hf_15)(planner *); +extern void X(codelet_hf_16)(planner *); +extern void X(codelet_hf_32)(planner *); +extern void X(codelet_hf_64)(planner *); +extern void X(codelet_hf_20)(planner *); +extern void X(codelet_hf_25)(planner *); +extern void X(codelet_hf2_4)(planner *); +extern void X(codelet_hf2_8)(planner *); +extern void X(codelet_hf2_16)(planner *); +extern void X(codelet_hf2_32)(planner *); +extern void X(codelet_hf2_5)(planner *); +extern void X(codelet_hf2_20)(planner *); +extern void X(codelet_hf2_25)(planner *); +extern void X(codelet_r2cfII_2)(planner *); +extern void X(codelet_r2cfII_3)(planner *); +extern void X(codelet_r2cfII_4)(planner *); +extern void X(codelet_r2cfII_5)(planner *); +extern void X(codelet_r2cfII_6)(planner *); +extern void X(codelet_r2cfII_7)(planner *); +extern void X(codelet_r2cfII_8)(planner *); +extern void X(codelet_r2cfII_9)(planner *); +extern void X(codelet_r2cfII_10)(planner *); +extern void X(codelet_r2cfII_12)(planner *); +extern void X(codelet_r2cfII_15)(planner *); +extern void X(codelet_r2cfII_16)(planner *); +extern void X(codelet_r2cfII_32)(planner *); +extern void X(codelet_r2cfII_64)(planner *); +extern void X(codelet_r2cfII_20)(planner *); +extern void X(codelet_r2cfII_25)(planner *); +extern void X(codelet_hc2cf_2)(planner *); +extern void X(codelet_hc2cf_4)(planner *); +extern void X(codelet_hc2cf_6)(planner *); +extern void X(codelet_hc2cf_8)(planner *); +extern void X(codelet_hc2cf_10)(planner *); +extern void X(codelet_hc2cf_12)(planner *); +extern void X(codelet_hc2cf_16)(planner *); +extern void X(codelet_hc2cf_32)(planner *); +extern void X(codelet_hc2cf_20)(planner *); +extern void X(codelet_hc2cf2_4)(planner *); +extern void X(codelet_hc2cf2_8)(planner *); +extern void X(codelet_hc2cf2_16)(planner *); +extern void X(codelet_hc2cf2_32)(planner *); +extern void X(codelet_hc2cf2_20)(planner *); +extern void X(codelet_hc2cfdft_2)(planner *); +extern void X(codelet_hc2cfdft_4)(planner *); +extern void X(codelet_hc2cfdft_6)(planner *); +extern void X(codelet_hc2cfdft_8)(planner *); +extern void X(codelet_hc2cfdft_10)(planner *); +extern void X(codelet_hc2cfdft_12)(planner *); +extern void X(codelet_hc2cfdft_16)(planner *); +extern void X(codelet_hc2cfdft_32)(planner *); +extern void X(codelet_hc2cfdft_20)(planner *); +extern void X(codelet_hc2cfdft2_4)(planner *); +extern void X(codelet_hc2cfdft2_8)(planner *); +extern void X(codelet_hc2cfdft2_16)(planner *); +extern void X(codelet_hc2cfdft2_32)(planner *); +extern void X(codelet_hc2cfdft2_20)(planner *); + + +extern const solvtab X(solvtab_rdft_r2cf); +const solvtab X(solvtab_rdft_r2cf) = { + SOLVTAB(X(codelet_r2cf_2)), + SOLVTAB(X(codelet_r2cf_3)), + SOLVTAB(X(codelet_r2cf_4)), + SOLVTAB(X(codelet_r2cf_5)), + SOLVTAB(X(codelet_r2cf_6)), + SOLVTAB(X(codelet_r2cf_7)), + SOLVTAB(X(codelet_r2cf_8)), + SOLVTAB(X(codelet_r2cf_9)), + SOLVTAB(X(codelet_r2cf_10)), + SOLVTAB(X(codelet_r2cf_11)), + SOLVTAB(X(codelet_r2cf_12)), + SOLVTAB(X(codelet_r2cf_13)), + SOLVTAB(X(codelet_r2cf_14)), + SOLVTAB(X(codelet_r2cf_15)), + SOLVTAB(X(codelet_r2cf_16)), + SOLVTAB(X(codelet_r2cf_32)), + SOLVTAB(X(codelet_r2cf_64)), + SOLVTAB(X(codelet_r2cf_128)), + SOLVTAB(X(codelet_r2cf_20)), + SOLVTAB(X(codelet_r2cf_25)), + SOLVTAB(X(codelet_hf_2)), + SOLVTAB(X(codelet_hf_3)), + SOLVTAB(X(codelet_hf_4)), + SOLVTAB(X(codelet_hf_5)), + SOLVTAB(X(codelet_hf_6)), + SOLVTAB(X(codelet_hf_7)), + SOLVTAB(X(codelet_hf_8)), + SOLVTAB(X(codelet_hf_9)), + SOLVTAB(X(codelet_hf_10)), + SOLVTAB(X(codelet_hf_12)), + SOLVTAB(X(codelet_hf_15)), + SOLVTAB(X(codelet_hf_16)), + SOLVTAB(X(codelet_hf_32)), + SOLVTAB(X(codelet_hf_64)), + SOLVTAB(X(codelet_hf_20)), + SOLVTAB(X(codelet_hf_25)), + SOLVTAB(X(codelet_hf2_4)), + SOLVTAB(X(codelet_hf2_8)), + SOLVTAB(X(codelet_hf2_16)), + SOLVTAB(X(codelet_hf2_32)), + SOLVTAB(X(codelet_hf2_5)), + SOLVTAB(X(codelet_hf2_20)), + SOLVTAB(X(codelet_hf2_25)), + SOLVTAB(X(codelet_r2cfII_2)), + SOLVTAB(X(codelet_r2cfII_3)), + SOLVTAB(X(codelet_r2cfII_4)), + SOLVTAB(X(codelet_r2cfII_5)), + SOLVTAB(X(codelet_r2cfII_6)), + SOLVTAB(X(codelet_r2cfII_7)), + SOLVTAB(X(codelet_r2cfII_8)), + SOLVTAB(X(codelet_r2cfII_9)), + SOLVTAB(X(codelet_r2cfII_10)), + SOLVTAB(X(codelet_r2cfII_12)), + SOLVTAB(X(codelet_r2cfII_15)), + SOLVTAB(X(codelet_r2cfII_16)), + SOLVTAB(X(codelet_r2cfII_32)), + SOLVTAB(X(codelet_r2cfII_64)), + SOLVTAB(X(codelet_r2cfII_20)), + SOLVTAB(X(codelet_r2cfII_25)), + SOLVTAB(X(codelet_hc2cf_2)), + SOLVTAB(X(codelet_hc2cf_4)), + SOLVTAB(X(codelet_hc2cf_6)), + SOLVTAB(X(codelet_hc2cf_8)), + SOLVTAB(X(codelet_hc2cf_10)), + SOLVTAB(X(codelet_hc2cf_12)), + SOLVTAB(X(codelet_hc2cf_16)), + SOLVTAB(X(codelet_hc2cf_32)), + SOLVTAB(X(codelet_hc2cf_20)), + SOLVTAB(X(codelet_hc2cf2_4)), + SOLVTAB(X(codelet_hc2cf2_8)), + SOLVTAB(X(codelet_hc2cf2_16)), + SOLVTAB(X(codelet_hc2cf2_32)), + SOLVTAB(X(codelet_hc2cf2_20)), + SOLVTAB(X(codelet_hc2cfdft_2)), + SOLVTAB(X(codelet_hc2cfdft_4)), + SOLVTAB(X(codelet_hc2cfdft_6)), + SOLVTAB(X(codelet_hc2cfdft_8)), + SOLVTAB(X(codelet_hc2cfdft_10)), + SOLVTAB(X(codelet_hc2cfdft_12)), + SOLVTAB(X(codelet_hc2cfdft_16)), + SOLVTAB(X(codelet_hc2cfdft_32)), + SOLVTAB(X(codelet_hc2cfdft_20)), + SOLVTAB(X(codelet_hc2cfdft2_4)), + SOLVTAB(X(codelet_hc2cfdft2_8)), + SOLVTAB(X(codelet_hc2cfdft2_16)), + SOLVTAB(X(codelet_hc2cfdft2_32)), + SOLVTAB(X(codelet_hc2cfdft2_20)), + SOLVTAB_END +}; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf2_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf2_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,827 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:25 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 16 -dit -name hc2cf2_16 -include hc2cf.h */ + +/* + * This function contains 196 FP additions, 134 FP multiplications, + * (or, 104 additions, 42 multiplications, 92 fused multiply/add), + * 100 stack variables, 3 constants, and 64 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf2_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) { + E T3S, T3R; + { + E T2, Tf, TM, TO, T3, Tg, TN, TS, T4, Tp, T6, T5, Th; + T2 = W[0]; + Tf = W[2]; + TM = W[6]; + TO = W[7]; + T3 = W[4]; + Tg = T2 * Tf; + TN = T2 * TM; + TS = T2 * TO; + T4 = T2 * T3; + Tp = Tf * T3; + T6 = W[5]; + T5 = W[1]; + Th = W[3]; + { + E TZ, Te, T1U, T3A, T3L, T2D, T1G, T2B, T3h, T1R, T2w, T2I, T3i, Tx, T3M; + E T1Z, T3w, TL, T26, T25, T37, T1d, T2o, T2l, T3c, T1s, T2m, T2t, T3d, TX; + E T10, TV, T2a, TY, T2b; + { + E TF, TP, TT, Tq, TW, Tz, Tu, TI, TC, T1m, T1f, T1p, T1j, Tr, Ts; + E Tv, To, T1W; + { + E Ti, Tm, T1L, T1O, T1D, T1A, T1x, T2z, T1F, T2y; + { + E T1, T7, Tb, T3z, T8, T1z, T9, Tc; + { + E T1i, T1e, T1C, T1y, Tt, Ta, Tl; + T1 = Rp[0]; + Tt = Tf * T6; + Ta = T2 * T6; + T7 = FMA(T5, T6, T4); + TF = FNMS(T5, T6, T4); + TP = FMA(T5, TO, TN); + TT = FNMS(T5, TM, TS); + Tq = FNMS(Th, T6, Tp); + TW = FMA(Th, T6, Tp); + Tz = FMA(T5, Th, Tg); + Ti = FNMS(T5, Th, Tg); + Tl = T2 * Th; + Tu = FMA(Th, T3, Tt); + TZ = FNMS(Th, T3, Tt); + TI = FMA(T5, T3, Ta); + Tb = FNMS(T5, T3, Ta); + T1i = Ti * T6; + T1e = Ti * T3; + T1C = Tz * T6; + T1y = Tz * T3; + Tm = FMA(T5, Tf, Tl); + TC = FNMS(T5, Tf, Tl); + T3z = Rm[0]; + T8 = Rp[WS(rs, 4)]; + T1m = FNMS(Tm, T6, T1e); + T1f = FMA(Tm, T6, T1e); + T1p = FMA(Tm, T3, T1i); + T1j = FNMS(Tm, T3, T1i); + T1L = FNMS(TC, T6, T1y); + T1z = FMA(TC, T6, T1y); + T1O = FMA(TC, T3, T1C); + T1D = FNMS(TC, T3, T1C); + T9 = T7 * T8; + Tc = Rm[WS(rs, 4)]; + } + { + E T1u, T1w, T1v, T2x, T3y, T1B, T1E, Td, T3x; + T1u = Ip[WS(rs, 7)]; + T1w = Im[WS(rs, 7)]; + T1A = Ip[WS(rs, 3)]; + Td = FMA(Tb, Tc, T9); + T3x = T7 * Tc; + T1v = TM * T1u; + T2x = TM * T1w; + Te = T1 + Td; + T1U = T1 - Td; + T3y = FNMS(Tb, T8, T3x); + T1B = T1z * T1A; + T1E = Im[WS(rs, 3)]; + T1x = FMA(TO, T1w, T1v); + T3A = T3y + T3z; + T3L = T3z - T3y; + T2z = T1z * T1E; + T1F = FMA(T1D, T1E, T1B); + T2y = FNMS(TO, T1u, T2x); + } + } + { + E T1H, T1I, T1J, T1M, T1P, T2A; + T1H = Ip[WS(rs, 1)]; + T2A = FNMS(T1D, T1A, T2z); + T2D = T1x - T1F; + T1G = T1x + T1F; + T1I = Tf * T1H; + T2B = T2y - T2A; + T3h = T2y + T2A; + T1J = Im[WS(rs, 1)]; + T1M = Ip[WS(rs, 5)]; + T1P = Im[WS(rs, 5)]; + { + E Tj, Tk, Tn, T1V; + { + E T1K, T2F, T1Q, T2H, T2E, T1N, T2G; + Tj = Rp[WS(rs, 2)]; + T1K = FMA(Th, T1J, T1I); + T2E = Tf * T1J; + T1N = T1L * T1M; + T2G = T1L * T1P; + Tk = Ti * Tj; + T2F = FNMS(Th, T1H, T2E); + T1Q = FMA(T1O, T1P, T1N); + T2H = FNMS(T1O, T1M, T2G); + Tn = Rm[WS(rs, 2)]; + Tr = Rp[WS(rs, 6)]; + T1R = T1K + T1Q; + T2w = T1Q - T1K; + T2I = T2F - T2H; + T3i = T2F + T2H; + T1V = Ti * Tn; + Ts = Tq * Tr; + Tv = Rm[WS(rs, 6)]; + } + To = FMA(Tm, Tn, Tk); + T1W = FNMS(Tm, Tj, T1V); + } + } + } + { + E T19, T1b, T18, T2i, T1a, T2j; + { + E TE, T22, TK, T24; + { + E TA, TD, TB, T21, TG, TJ, TH, T23, T1Y, Tw, T1X; + TA = Rp[WS(rs, 1)]; + Tw = FMA(Tu, Tv, Ts); + T1X = Tq * Tv; + TD = Rm[WS(rs, 1)]; + TB = Tz * TA; + Tx = To + Tw; + T3M = To - Tw; + T1Y = FNMS(Tu, Tr, T1X); + T21 = Tz * TD; + TG = Rp[WS(rs, 5)]; + TJ = Rm[WS(rs, 5)]; + T1Z = T1W - T1Y; + T3w = T1W + T1Y; + TH = TF * TG; + T23 = TF * TJ; + TE = FMA(TC, TD, TB); + T22 = FNMS(TC, TA, T21); + TK = FMA(TI, TJ, TH); + T24 = FNMS(TI, TG, T23); + } + { + E T15, T17, T16, T2h; + T15 = Ip[0]; + T17 = Im[0]; + TL = TE + TK; + T26 = TE - TK; + T25 = T22 - T24; + T37 = T22 + T24; + T16 = T2 * T15; + T2h = T2 * T17; + T19 = Ip[WS(rs, 4)]; + T1b = Im[WS(rs, 4)]; + T18 = FMA(T5, T17, T16); + T2i = FNMS(T5, T15, T2h); + T1a = T3 * T19; + T2j = T3 * T1b; + } + } + { + E T1n, T1q, T1l, T2q, T1o, T2r; + { + E T1g, T1k, T1h, T2p, T1c, T2k; + T1g = Ip[WS(rs, 2)]; + T1k = Im[WS(rs, 2)]; + T1c = FMA(T6, T1b, T1a); + T2k = FNMS(T6, T19, T2j); + T1h = T1f * T1g; + T2p = T1f * T1k; + T1d = T18 + T1c; + T2o = T18 - T1c; + T2l = T2i - T2k; + T3c = T2i + T2k; + T1n = Ip[WS(rs, 6)]; + T1q = Im[WS(rs, 6)]; + T1l = FMA(T1j, T1k, T1h); + T2q = FNMS(T1j, T1g, T2p); + T1o = T1m * T1n; + T2r = T1m * T1q; + } + { + E TQ, TU, TR, T29, T1r, T2s; + TQ = Rp[WS(rs, 7)]; + TU = Rm[WS(rs, 7)]; + T1r = FMA(T1p, T1q, T1o); + T2s = FNMS(T1p, T1n, T2r); + TR = TP * TQ; + T29 = TP * TU; + T1s = T1l + T1r; + T2m = T1l - T1r; + T2t = T2q - T2s; + T3d = T2q + T2s; + TX = Rp[WS(rs, 3)]; + T10 = Rm[WS(rs, 3)]; + TV = FMA(TT, TU, TR); + T2a = FNMS(TT, TQ, T29); + TY = TW * TX; + T2b = TW * T10; + } + } + } + } + { + E T36, T3G, T3b, T3g, T28, T2d, T3F, T39, T3e, T3q, T3C, T3j, T3u, T3t; + { + E T3D, T1T, T3r, T14, T3E, T3s; + { + E Ty, T3B, T11, T2c, T13, T3v; + T36 = Te - Tx; + Ty = Te + Tx; + T3B = T3w + T3A; + T3G = T3A - T3w; + T11 = FMA(TZ, T10, TY); + T2c = FNMS(TZ, TX, T2b); + { + E T1t, T1S, T12, T38; + T3b = T1d - T1s; + T1t = T1d + T1s; + T1S = T1G + T1R; + T3g = T1G - T1R; + T12 = TV + T11; + T28 = TV - T11; + T2d = T2a - T2c; + T38 = T2a + T2c; + T3D = T1S - T1t; + T1T = T1t + T1S; + T13 = TL + T12; + T3F = T12 - TL; + T39 = T37 - T38; + T3v = T37 + T38; + } + T3e = T3c - T3d; + T3r = T3c + T3d; + T3q = Ty - T13; + T14 = Ty + T13; + T3E = T3B - T3v; + T3C = T3v + T3B; + T3s = T3h + T3i; + T3j = T3h - T3i; + } + Rm[WS(rs, 7)] = T14 - T1T; + Rp[0] = T14 + T1T; + Im[WS(rs, 3)] = T3D - T3E; + T3u = T3r + T3s; + T3t = T3r - T3s; + Ip[WS(rs, 4)] = T3D + T3E; + } + { + E T3m, T3a, T3J, T3H; + Ip[0] = T3u + T3C; + Im[WS(rs, 7)] = T3u - T3C; + Rp[WS(rs, 4)] = T3q + T3t; + Rm[WS(rs, 3)] = T3q - T3t; + T3m = T36 - T39; + T3a = T36 + T39; + T3J = T3G - T3F; + T3H = T3F + T3G; + { + E T2Q, T20, T3N, T3T, T2J, T2C, T3O, T2f, T34, T30, T2W, T2V, T3U, T2T, T2N; + E T2v; + { + E T2R, T27, T2e, T2S; + { + E T3n, T3f, T3o, T3k; + T2Q = T1U + T1Z; + T20 = T1U - T1Z; + T3n = T3e - T3b; + T3f = T3b + T3e; + T3o = T3g + T3j; + T3k = T3g - T3j; + T3N = T3L - T3M; + T3T = T3M + T3L; + { + E T3p, T3I, T3K, T3l; + T3p = T3n - T3o; + T3I = T3n + T3o; + T3K = T3k - T3f; + T3l = T3f + T3k; + Rp[WS(rs, 6)] = FMA(KP707106781, T3p, T3m); + Rm[WS(rs, 1)] = FNMS(KP707106781, T3p, T3m); + Ip[WS(rs, 2)] = FMA(KP707106781, T3I, T3H); + Im[WS(rs, 5)] = FMS(KP707106781, T3I, T3H); + Ip[WS(rs, 6)] = FMA(KP707106781, T3K, T3J); + Im[WS(rs, 1)] = FMS(KP707106781, T3K, T3J); + Rp[WS(rs, 2)] = FMA(KP707106781, T3l, T3a); + Rm[WS(rs, 5)] = FNMS(KP707106781, T3l, T3a); + T2R = T26 + T25; + T27 = T25 - T26; + T2e = T28 + T2d; + T2S = T28 - T2d; + } + } + { + E T2Y, T2Z, T2n, T2u; + T2J = T2D - T2I; + T2Y = T2D + T2I; + T2Z = T2B + T2w; + T2C = T2w - T2B; + T3O = T27 + T2e; + T2f = T27 - T2e; + T34 = FMA(KP414213562, T2Y, T2Z); + T30 = FNMS(KP414213562, T2Z, T2Y); + T2W = T2l - T2m; + T2n = T2l + T2m; + T2u = T2o - T2t; + T2V = T2o + T2t; + T3U = T2S - T2R; + T2T = T2R + T2S; + T2N = FNMS(KP414213562, T2n, T2u); + T2v = FMA(KP414213562, T2u, T2n); + } + } + { + E T33, T2X, T3X, T3Y; + { + E T2M, T2g, T2O, T2K, T3V, T3W, T2P, T2L; + T2M = FNMS(KP707106781, T2f, T20); + T2g = FMA(KP707106781, T2f, T20); + T33 = FNMS(KP414213562, T2V, T2W); + T2X = FMA(KP414213562, T2W, T2V); + T2O = FNMS(KP414213562, T2C, T2J); + T2K = FMA(KP414213562, T2J, T2C); + T3V = FMA(KP707106781, T3U, T3T); + T3X = FNMS(KP707106781, T3U, T3T); + T3W = T2O - T2N; + T2P = T2N + T2O; + T3Y = T2K - T2v; + T2L = T2v + T2K; + Ip[WS(rs, 3)] = FMA(KP923879532, T3W, T3V); + Im[WS(rs, 4)] = FMS(KP923879532, T3W, T3V); + Rp[WS(rs, 3)] = FMA(KP923879532, T2L, T2g); + Rm[WS(rs, 4)] = FNMS(KP923879532, T2L, T2g); + Rm[0] = FMA(KP923879532, T2P, T2M); + Rp[WS(rs, 7)] = FNMS(KP923879532, T2P, T2M); + } + { + E T32, T3P, T3Q, T35, T2U, T31; + T32 = FNMS(KP707106781, T2T, T2Q); + T2U = FMA(KP707106781, T2T, T2Q); + T31 = T2X + T30; + T3S = T30 - T2X; + T3R = FNMS(KP707106781, T3O, T3N); + T3P = FMA(KP707106781, T3O, T3N); + Ip[WS(rs, 7)] = FMA(KP923879532, T3Y, T3X); + Im[0] = FMS(KP923879532, T3Y, T3X); + Rp[WS(rs, 1)] = FMA(KP923879532, T31, T2U); + Rm[WS(rs, 6)] = FNMS(KP923879532, T31, T2U); + T3Q = T33 + T34; + T35 = T33 - T34; + Ip[WS(rs, 1)] = FMA(KP923879532, T3Q, T3P); + Im[WS(rs, 6)] = FMS(KP923879532, T3Q, T3P); + Rp[WS(rs, 5)] = FMA(KP923879532, T35, T32); + Rm[WS(rs, 2)] = FNMS(KP923879532, T35, T32); + } + } + } + } + } + } + } + Ip[WS(rs, 5)] = FMA(KP923879532, T3S, T3R); + Im[WS(rs, 2)] = FMS(KP923879532, T3S, T3R); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 15}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 16, "hc2cf2_16", twinstr, &GENUS, {104, 42, 92, 0} }; + +void X(codelet_hc2cf2_16) (planner *p) { + X(khc2c_register) (p, hc2cf2_16, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 16 -dit -name hc2cf2_16 -include hc2cf.h */ + +/* + * This function contains 196 FP additions, 108 FP multiplications, + * (or, 156 additions, 68 multiplications, 40 fused multiply/add), + * 82 stack variables, 3 constants, and 64 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf2_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) { + E T2, T5, Tg, Ti, Tk, To, TE, TC, T6, T3, T8, TW, TJ, Tt, TU; + E Tc, Tx, TH, TN, TO, TP, TR, T1f, T1k, T1b, T1i, T1y, T1H, T1u, T1F; + { + E T7, Tv, Ta, Ts, T4, Tw, Tb, Tr; + { + E Th, Tn, Tj, Tm; + T2 = W[0]; + T5 = W[1]; + Tg = W[2]; + Ti = W[3]; + Th = T2 * Tg; + Tn = T5 * Tg; + Tj = T5 * Ti; + Tm = T2 * Ti; + Tk = Th - Tj; + To = Tm + Tn; + TE = Tm - Tn; + TC = Th + Tj; + T6 = W[5]; + T7 = T5 * T6; + Tv = Tg * T6; + Ta = T2 * T6; + Ts = Ti * T6; + T3 = W[4]; + T4 = T2 * T3; + Tw = Ti * T3; + Tb = T5 * T3; + Tr = Tg * T3; + } + T8 = T4 + T7; + TW = Tv - Tw; + TJ = Ta + Tb; + Tt = Tr - Ts; + TU = Tr + Ts; + Tc = Ta - Tb; + Tx = Tv + Tw; + TH = T4 - T7; + TN = W[6]; + TO = W[7]; + TP = FMA(T2, TN, T5 * TO); + TR = FNMS(T5, TN, T2 * TO); + { + E T1d, T1e, T19, T1a; + T1d = Tk * T6; + T1e = To * T3; + T1f = T1d - T1e; + T1k = T1d + T1e; + T19 = Tk * T3; + T1a = To * T6; + T1b = T19 + T1a; + T1i = T19 - T1a; + } + { + E T1w, T1x, T1s, T1t; + T1w = TC * T6; + T1x = TE * T3; + T1y = T1w - T1x; + T1H = T1w + T1x; + T1s = TC * T3; + T1t = TE * T6; + T1u = T1s + T1t; + T1F = T1s - T1t; + } + } + { + E Tf, T3r, T1N, T3e, TA, T3s, T1Q, T3b, TM, T2M, T1W, T2w, TZ, T2N, T21; + E T2x, T1B, T1K, T2V, T2W, T2X, T2Y, T2j, T2D, T2o, T2E, T18, T1n, T2Q, T2R; + E T2S, T2T, T28, T2A, T2d, T2B; + { + E T1, T3d, Te, T3c, T9, Td; + T1 = Rp[0]; + T3d = Rm[0]; + T9 = Rp[WS(rs, 4)]; + Td = Rm[WS(rs, 4)]; + Te = FMA(T8, T9, Tc * Td); + T3c = FNMS(Tc, T9, T8 * Td); + Tf = T1 + Te; + T3r = T3d - T3c; + T1N = T1 - Te; + T3e = T3c + T3d; + } + { + E Tq, T1O, Tz, T1P; + { + E Tl, Tp, Tu, Ty; + Tl = Rp[WS(rs, 2)]; + Tp = Rm[WS(rs, 2)]; + Tq = FMA(Tk, Tl, To * Tp); + T1O = FNMS(To, Tl, Tk * Tp); + Tu = Rp[WS(rs, 6)]; + Ty = Rm[WS(rs, 6)]; + Tz = FMA(Tt, Tu, Tx * Ty); + T1P = FNMS(Tx, Tu, Tt * Ty); + } + TA = Tq + Tz; + T3s = Tq - Tz; + T1Q = T1O - T1P; + T3b = T1O + T1P; + } + { + E TG, T1S, TL, T1T, T1U, T1V; + { + E TD, TF, TI, TK; + TD = Rp[WS(rs, 1)]; + TF = Rm[WS(rs, 1)]; + TG = FMA(TC, TD, TE * TF); + T1S = FNMS(TE, TD, TC * TF); + TI = Rp[WS(rs, 5)]; + TK = Rm[WS(rs, 5)]; + TL = FMA(TH, TI, TJ * TK); + T1T = FNMS(TJ, TI, TH * TK); + } + TM = TG + TL; + T2M = T1S + T1T; + T1U = T1S - T1T; + T1V = TG - TL; + T1W = T1U - T1V; + T2w = T1V + T1U; + } + { + E TT, T1Y, TY, T1Z, T1X, T20; + { + E TQ, TS, TV, TX; + TQ = Rp[WS(rs, 7)]; + TS = Rm[WS(rs, 7)]; + TT = FMA(TP, TQ, TR * TS); + T1Y = FNMS(TR, TQ, TP * TS); + TV = Rp[WS(rs, 3)]; + TX = Rm[WS(rs, 3)]; + TY = FMA(TU, TV, TW * TX); + T1Z = FNMS(TW, TV, TU * TX); + } + TZ = TT + TY; + T2N = T1Y + T1Z; + T1X = TT - TY; + T20 = T1Y - T1Z; + T21 = T1X + T20; + T2x = T1X - T20; + } + { + E T1r, T2k, T1J, T2h, T1A, T2l, T1E, T2g; + { + E T1p, T1q, T1G, T1I; + T1p = Ip[WS(rs, 7)]; + T1q = Im[WS(rs, 7)]; + T1r = FMA(TN, T1p, TO * T1q); + T2k = FNMS(TO, T1p, TN * T1q); + T1G = Ip[WS(rs, 5)]; + T1I = Im[WS(rs, 5)]; + T1J = FMA(T1F, T1G, T1H * T1I); + T2h = FNMS(T1H, T1G, T1F * T1I); + } + { + E T1v, T1z, T1C, T1D; + T1v = Ip[WS(rs, 3)]; + T1z = Im[WS(rs, 3)]; + T1A = FMA(T1u, T1v, T1y * T1z); + T2l = FNMS(T1y, T1v, T1u * T1z); + T1C = Ip[WS(rs, 1)]; + T1D = Im[WS(rs, 1)]; + T1E = FMA(Tg, T1C, Ti * T1D); + T2g = FNMS(Ti, T1C, Tg * T1D); + } + T1B = T1r + T1A; + T1K = T1E + T1J; + T2V = T1B - T1K; + T2W = T2k + T2l; + T2X = T2g + T2h; + T2Y = T2W - T2X; + { + E T2f, T2i, T2m, T2n; + T2f = T1r - T1A; + T2i = T2g - T2h; + T2j = T2f - T2i; + T2D = T2f + T2i; + T2m = T2k - T2l; + T2n = T1E - T1J; + T2o = T2m + T2n; + T2E = T2m - T2n; + } + } + { + E T14, T24, T1m, T2b, T17, T25, T1h, T2a; + { + E T12, T13, T1j, T1l; + T12 = Ip[0]; + T13 = Im[0]; + T14 = FMA(T2, T12, T5 * T13); + T24 = FNMS(T5, T12, T2 * T13); + T1j = Ip[WS(rs, 6)]; + T1l = Im[WS(rs, 6)]; + T1m = FMA(T1i, T1j, T1k * T1l); + T2b = FNMS(T1k, T1j, T1i * T1l); + } + { + E T15, T16, T1c, T1g; + T15 = Ip[WS(rs, 4)]; + T16 = Im[WS(rs, 4)]; + T17 = FMA(T3, T15, T6 * T16); + T25 = FNMS(T6, T15, T3 * T16); + T1c = Ip[WS(rs, 2)]; + T1g = Im[WS(rs, 2)]; + T1h = FMA(T1b, T1c, T1f * T1g); + T2a = FNMS(T1f, T1c, T1b * T1g); + } + T18 = T14 + T17; + T1n = T1h + T1m; + T2Q = T18 - T1n; + T2R = T24 + T25; + T2S = T2a + T2b; + T2T = T2R - T2S; + { + E T26, T27, T29, T2c; + T26 = T24 - T25; + T27 = T1h - T1m; + T28 = T26 + T27; + T2A = T26 - T27; + T29 = T14 - T17; + T2c = T2a - T2b; + T2d = T29 - T2c; + T2B = T29 + T2c; + } + } + { + E T23, T2r, T3A, T3C, T2q, T3B, T2u, T3x; + { + E T1R, T22, T3y, T3z; + T1R = T1N - T1Q; + T22 = KP707106781 * (T1W - T21); + T23 = T1R + T22; + T2r = T1R - T22; + T3y = KP707106781 * (T2x - T2w); + T3z = T3s + T3r; + T3A = T3y + T3z; + T3C = T3z - T3y; + } + { + E T2e, T2p, T2s, T2t; + T2e = FMA(KP923879532, T28, KP382683432 * T2d); + T2p = FNMS(KP923879532, T2o, KP382683432 * T2j); + T2q = T2e + T2p; + T3B = T2p - T2e; + T2s = FNMS(KP923879532, T2d, KP382683432 * T28); + T2t = FMA(KP382683432, T2o, KP923879532 * T2j); + T2u = T2s - T2t; + T3x = T2s + T2t; + } + Rm[WS(rs, 4)] = T23 - T2q; + Im[WS(rs, 4)] = T3x - T3A; + Rp[WS(rs, 3)] = T23 + T2q; + Ip[WS(rs, 3)] = T3x + T3A; + Rm[0] = T2r - T2u; + Im[0] = T3B - T3C; + Rp[WS(rs, 7)] = T2r + T2u; + Ip[WS(rs, 7)] = T3B + T3C; + } + { + E T2P, T31, T3m, T3o, T30, T3n, T34, T3j; + { + E T2L, T2O, T3k, T3l; + T2L = Tf - TA; + T2O = T2M - T2N; + T2P = T2L + T2O; + T31 = T2L - T2O; + T3k = TZ - TM; + T3l = T3e - T3b; + T3m = T3k + T3l; + T3o = T3l - T3k; + } + { + E T2U, T2Z, T32, T33; + T2U = T2Q + T2T; + T2Z = T2V - T2Y; + T30 = KP707106781 * (T2U + T2Z); + T3n = KP707106781 * (T2Z - T2U); + T32 = T2T - T2Q; + T33 = T2V + T2Y; + T34 = KP707106781 * (T32 - T33); + T3j = KP707106781 * (T32 + T33); + } + Rm[WS(rs, 5)] = T2P - T30; + Im[WS(rs, 5)] = T3j - T3m; + Rp[WS(rs, 2)] = T2P + T30; + Ip[WS(rs, 2)] = T3j + T3m; + Rm[WS(rs, 1)] = T31 - T34; + Im[WS(rs, 1)] = T3n - T3o; + Rp[WS(rs, 6)] = T31 + T34; + Ip[WS(rs, 6)] = T3n + T3o; + } + { + E T2z, T2H, T3u, T3w, T2G, T3v, T2K, T3p; + { + E T2v, T2y, T3q, T3t; + T2v = T1N + T1Q; + T2y = KP707106781 * (T2w + T2x); + T2z = T2v + T2y; + T2H = T2v - T2y; + T3q = KP707106781 * (T1W + T21); + T3t = T3r - T3s; + T3u = T3q + T3t; + T3w = T3t - T3q; + } + { + E T2C, T2F, T2I, T2J; + T2C = FMA(KP382683432, T2A, KP923879532 * T2B); + T2F = FNMS(KP382683432, T2E, KP923879532 * T2D); + T2G = T2C + T2F; + T3v = T2F - T2C; + T2I = FNMS(KP382683432, T2B, KP923879532 * T2A); + T2J = FMA(KP923879532, T2E, KP382683432 * T2D); + T2K = T2I - T2J; + T3p = T2I + T2J; + } + Rm[WS(rs, 6)] = T2z - T2G; + Im[WS(rs, 6)] = T3p - T3u; + Rp[WS(rs, 1)] = T2z + T2G; + Ip[WS(rs, 1)] = T3p + T3u; + Rm[WS(rs, 2)] = T2H - T2K; + Im[WS(rs, 2)] = T3v - T3w; + Rp[WS(rs, 5)] = T2H + T2K; + Ip[WS(rs, 5)] = T3v + T3w; + } + { + E T11, T35, T3g, T3i, T1M, T3h, T38, T39; + { + E TB, T10, T3a, T3f; + TB = Tf + TA; + T10 = TM + TZ; + T11 = TB + T10; + T35 = TB - T10; + T3a = T2M + T2N; + T3f = T3b + T3e; + T3g = T3a + T3f; + T3i = T3f - T3a; + } + { + E T1o, T1L, T36, T37; + T1o = T18 + T1n; + T1L = T1B + T1K; + T1M = T1o + T1L; + T3h = T1L - T1o; + T36 = T2R + T2S; + T37 = T2W + T2X; + T38 = T36 - T37; + T39 = T36 + T37; + } + Rm[WS(rs, 7)] = T11 - T1M; + Im[WS(rs, 7)] = T39 - T3g; + Rp[0] = T11 + T1M; + Ip[0] = T39 + T3g; + Rm[WS(rs, 3)] = T35 - T38; + Im[WS(rs, 3)] = T3h - T3i; + Rp[WS(rs, 4)] = T35 + T38; + Ip[WS(rs, 4)] = T3h + T3i; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 15}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 16, "hc2cf2_16", twinstr, &GENUS, {156, 68, 40, 0} }; + +void X(codelet_hc2cf2_16) (planner *p) { + X(khc2c_register) (p, hc2cf2_16, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf2_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf2_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1064 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:27 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 20 -dit -name hc2cf2_20 -include hc2cf.h */ + +/* + * This function contains 276 FP additions, 198 FP multiplications, + * (or, 136 additions, 58 multiplications, 140 fused multiply/add), + * 142 stack variables, 4 constants, and 80 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf2_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(80, rs)) { + E T59, T5i, T5k, T5e, T5c, T5d, T5j, T5f; + { + E T2, Th, Tf, T6, T5, Tl, T1p, T1n, Ti, T3, Tt, Tv, T24, T1f, T1D; + E Tb, T1P, Tm, T21, T1b, T7, T1A, Tw, T1H, T13, TA, T1L, T17, T1S, Tq; + E T1o, T2g, T1t, T2c, TO, TK; + { + E T1e, Ta, Tk, Tg; + T2 = W[0]; + Th = W[3]; + Tf = W[2]; + T6 = W[5]; + T5 = W[1]; + Tk = T2 * Th; + Tg = T2 * Tf; + T1e = Tf * T6; + Ta = T2 * T6; + Tl = FMA(T5, Tf, Tk); + T1p = FNMS(T5, Tf, Tk); + T1n = FMA(T5, Th, Tg); + Ti = FNMS(T5, Th, Tg); + T3 = W[4]; + Tt = W[6]; + Tv = W[7]; + { + E Tp, Tj, TN, TJ; + Tp = Ti * T6; + T24 = FMA(Th, T3, T1e); + T1f = FNMS(Th, T3, T1e); + T1D = FNMS(T5, T3, Ta); + Tb = FMA(T5, T3, Ta); + Tj = Ti * T3; + { + E T1a, T4, Tu, T1G; + T1a = Tf * T3; + T4 = T2 * T3; + Tu = Ti * Tt; + T1G = T2 * Tt; + { + E T12, Tz, T1K, T16; + T12 = Tf * Tt; + Tz = Ti * Tv; + T1K = T2 * Tv; + T16 = Tf * Tv; + T1P = FNMS(Tl, T6, Tj); + Tm = FMA(Tl, T6, Tj); + T21 = FNMS(Th, T6, T1a); + T1b = FMA(Th, T6, T1a); + T7 = FNMS(T5, T6, T4); + T1A = FMA(T5, T6, T4); + Tw = FMA(Tl, Tv, Tu); + T1H = FMA(T5, Tv, T1G); + T13 = FMA(Th, Tv, T12); + TA = FNMS(Tl, Tt, Tz); + T1L = FNMS(T5, Tt, T1K); + T17 = FNMS(Th, Tt, T16); + T1S = FMA(Tl, T3, Tp); + Tq = FNMS(Tl, T3, Tp); + } + } + T1o = T1n * T3; + T2g = T1n * Tv; + TN = Tm * Tv; + TJ = Tm * Tt; + T1t = T1n * T6; + T2c = T1n * Tt; + TO = FNMS(Tq, Tt, TN); + TK = FMA(Tq, Tv, TJ); + } + } + { + E Te, T2C, T4L, T57, T58, TD, T2H, T4H, T3J, T3Z, T11, T2v, T2P, T3P, T4d; + E T4z, T3n, T43, T2r, T2z, T3b, T3T, T4n, T4v, T3u, T42, T20, T2y, T34, T3S; + E T4k, T4w, T1c, T19, T1d, T3y, T1w, T2U, T1g, T1j, T1l; + { + E T2d, T2h, T2k, T1q, T1u, T2n, TL, TI, TM, T3F, TZ, T2N, TP, TS, TU; + { + E T1, T4K, T8, T9, Tc; + T1 = Rp[0]; + T4K = Rm[0]; + T8 = Rp[WS(rs, 5)]; + T2d = FMA(T1p, Tv, T2c); + T2h = FNMS(T1p, Tt, T2g); + T2k = FMA(T1p, T6, T1o); + T1q = FNMS(T1p, T6, T1o); + T1u = FMA(T1p, T3, T1t); + T2n = FNMS(T1p, T3, T1t); + T9 = T7 * T8; + Tc = Rm[WS(rs, 5)]; + { + E Tx, Ts, T2F, TC, T2E; + { + E Tn, Tr, To, T2D, T4J, Ty, TB, Td, T4I; + Tn = Ip[WS(rs, 2)]; + Tr = Im[WS(rs, 2)]; + Tx = Ip[WS(rs, 7)]; + Td = FMA(Tb, Tc, T9); + T4I = T7 * Tc; + To = Tm * Tn; + T2D = Tm * Tr; + Te = T1 + Td; + T2C = T1 - Td; + T4J = FNMS(Tb, T8, T4I); + Ty = Tw * Tx; + TB = Im[WS(rs, 7)]; + Ts = FMA(Tq, Tr, To); + T4L = T4J + T4K; + T57 = T4K - T4J; + T2F = Tw * TB; + TC = FMA(TA, TB, Ty); + T2E = FNMS(Tq, Tn, T2D); + } + { + E TF, TG, TH, TW, TY, T2G, T3E, TX, T2M; + TF = Rp[WS(rs, 2)]; + T2G = FNMS(TA, Tx, T2F); + T58 = Ts - TC; + TD = Ts + TC; + TG = Ti * TF; + T2H = T2E - T2G; + T4H = T2E + T2G; + TH = Rm[WS(rs, 2)]; + TW = Ip[WS(rs, 9)]; + TY = Im[WS(rs, 9)]; + TL = Rp[WS(rs, 7)]; + TI = FMA(Tl, TH, TG); + T3E = Ti * TH; + TX = Tt * TW; + T2M = Tt * TY; + TM = TK * TL; + T3F = FNMS(Tl, TF, T3E); + TZ = FMA(Tv, TY, TX); + T2N = FNMS(Tv, TW, T2M); + TP = Rm[WS(rs, 7)]; + TS = Ip[WS(rs, 4)]; + TU = Im[WS(rs, 4)]; + } + } + } + { + E T27, T26, T28, T3j, T2p, T39, T29, T2e, T2i; + { + E T22, T23, T25, T2l, T2o, T3i, T2m, T38; + { + E TR, T2J, T3H, TV, T2L, T4b, T3I; + T22 = Rp[WS(rs, 6)]; + { + E TQ, T3G, TT, T2K; + TQ = FMA(TO, TP, TM); + T3G = TK * TP; + TT = T3 * TS; + T2K = T3 * TU; + TR = TI + TQ; + T2J = TI - TQ; + T3H = FNMS(TO, TL, T3G); + TV = FMA(T6, TU, TT); + T2L = FNMS(T6, TS, T2K); + T23 = T21 * T22; + } + T4b = T3F + T3H; + T3I = T3F - T3H; + { + E T10, T3D, T4c, T2O; + T10 = TV + TZ; + T3D = TZ - TV; + T4c = T2L + T2N; + T2O = T2L - T2N; + T3J = T3D - T3I; + T3Z = T3I + T3D; + T11 = TR - T10; + T2v = TR + T10; + T2P = T2J - T2O; + T3P = T2J + T2O; + T4d = T4b + T4c; + T4z = T4c - T4b; + T25 = Rm[WS(rs, 6)]; + } + } + T2l = Ip[WS(rs, 3)]; + T2o = Im[WS(rs, 3)]; + T27 = Rp[WS(rs, 1)]; + T26 = FMA(T24, T25, T23); + T3i = T21 * T25; + T2m = T2k * T2l; + T38 = T2k * T2o; + T28 = T1n * T27; + T3j = FNMS(T24, T22, T3i); + T2p = FMA(T2n, T2o, T2m); + T39 = FNMS(T2n, T2l, T38); + T29 = Rm[WS(rs, 1)]; + T2e = Ip[WS(rs, 8)]; + T2i = Im[WS(rs, 8)]; + } + { + E T1I, T1F, T1J, T3q, T1Y, T32, T1M, T1Q, T1T; + { + E T1B, T1C, T1E, T1V, T1X, T3p, T1W, T31; + { + E T2b, T35, T3l, T2j, T37, T4l, T3m; + T1B = Rp[WS(rs, 4)]; + { + E T2a, T3k, T2f, T36; + T2a = FMA(T1p, T29, T28); + T3k = T1n * T29; + T2f = T2d * T2e; + T36 = T2d * T2i; + T2b = T26 + T2a; + T35 = T26 - T2a; + T3l = FNMS(T1p, T27, T3k); + T2j = FMA(T2h, T2i, T2f); + T37 = FNMS(T2h, T2e, T36); + T1C = T1A * T1B; + } + T4l = T3j + T3l; + T3m = T3j - T3l; + { + E T2q, T3h, T4m, T3a; + T2q = T2j + T2p; + T3h = T2p - T2j; + T4m = T37 + T39; + T3a = T37 - T39; + T3n = T3h - T3m; + T43 = T3m + T3h; + T2r = T2b - T2q; + T2z = T2b + T2q; + T3b = T35 - T3a; + T3T = T35 + T3a; + T4n = T4l + T4m; + T4v = T4m - T4l; + T1E = Rm[WS(rs, 4)]; + } + } + T1V = Ip[WS(rs, 1)]; + T1X = Im[WS(rs, 1)]; + T1I = Rp[WS(rs, 9)]; + T1F = FMA(T1D, T1E, T1C); + T3p = T1A * T1E; + T1W = Tf * T1V; + T31 = Tf * T1X; + T1J = T1H * T1I; + T3q = FNMS(T1D, T1B, T3p); + T1Y = FMA(Th, T1X, T1W); + T32 = FNMS(Th, T1V, T31); + T1M = Rm[WS(rs, 9)]; + T1Q = Ip[WS(rs, 6)]; + T1T = Im[WS(rs, 6)]; + } + { + E T14, T15, T18, T1r, T1v, T3x, T1s, T2T; + { + E T1O, T2Y, T3s, T1U, T30, T4i, T3t; + T14 = Rp[WS(rs, 8)]; + { + E T1N, T3r, T1R, T2Z; + T1N = FMA(T1L, T1M, T1J); + T3r = T1H * T1M; + T1R = T1P * T1Q; + T2Z = T1P * T1T; + T1O = T1F + T1N; + T2Y = T1F - T1N; + T3s = FNMS(T1L, T1I, T3r); + T1U = FMA(T1S, T1T, T1R); + T30 = FNMS(T1S, T1Q, T2Z); + T15 = T13 * T14; + } + T4i = T3q + T3s; + T3t = T3q - T3s; + { + E T1Z, T3o, T4j, T33; + T1Z = T1U + T1Y; + T3o = T1Y - T1U; + T4j = T30 + T32; + T33 = T30 - T32; + T3u = T3o - T3t; + T42 = T3t + T3o; + T20 = T1O - T1Z; + T2y = T1O + T1Z; + T34 = T2Y - T33; + T3S = T2Y + T33; + T4k = T4i + T4j; + T4w = T4j - T4i; + T18 = Rm[WS(rs, 8)]; + } + } + T1r = Ip[WS(rs, 5)]; + T1v = Im[WS(rs, 5)]; + T1c = Rp[WS(rs, 3)]; + T19 = FMA(T17, T18, T15); + T3x = T13 * T18; + T1s = T1q * T1r; + T2T = T1q * T1v; + T1d = T1b * T1c; + T3y = FNMS(T17, T14, T3x); + T1w = FMA(T1u, T1v, T1s); + T2U = FNMS(T1u, T1r, T2T); + T1g = Rm[WS(rs, 3)]; + T1j = Ip[0]; + T1l = Im[0]; + } + } + } + } + { + E T3C, T40, T2W, T3Q, T4M, T4E, T4F, T4U, T4S; + { + E T4W, T2u, T2w, T4g, T4V, T4D, T4B, T54, T56, T4Y, T4u, T4C; + { + E T4x, TE, T53, T1z, T2s, T52, T4A, T4t, T4s, T2t; + { + E T1i, T2Q, T3A, T1m, T2S; + T4x = T4v - T4w; + T4W = T4w + T4v; + { + E T1h, T3z, T1k, T2R; + T1h = FMA(T1f, T1g, T1d); + T3z = T1b * T1g; + T1k = T2 * T1j; + T2R = T2 * T1l; + T1i = T19 + T1h; + T2Q = T19 - T1h; + T3A = FNMS(T1f, T1c, T3z); + T1m = FMA(T5, T1l, T1k); + T2S = FNMS(T5, T1j, T2R); + } + TE = Te - TD; + T2u = Te + TD; + { + E T4e, T3B, T1x, T3w; + T4e = T3y + T3A; + T3B = T3y - T3A; + T1x = T1m + T1w; + T3w = T1w - T1m; + { + E T4f, T2V, T1y, T4y; + T4f = T2S + T2U; + T2V = T2S - T2U; + T3C = T3w - T3B; + T40 = T3B + T3w; + T1y = T1i - T1x; + T2w = T1i + T1x; + T2W = T2Q - T2V; + T3Q = T2Q + T2V; + T4g = T4e + T4f; + T4y = T4f - T4e; + T53 = T1y - T11; + T1z = T11 + T1y; + T2s = T20 + T2r; + T52 = T20 - T2r; + T4V = T4z + T4y; + T4A = T4y - T4z; + } + } + } + T4t = T1z - T2s; + T2t = T1z + T2s; + T4D = FMA(KP618033988, T4x, T4A); + T4B = FNMS(KP618033988, T4A, T4x); + T54 = FMA(KP618033988, T53, T52); + T56 = FNMS(KP618033988, T52, T53); + Rm[WS(rs, 9)] = TE + T2t; + T4s = FNMS(KP250000000, T2t, TE); + T4Y = T4L - T4H; + T4M = T4H + T4L; + T4u = FNMS(KP559016994, T4t, T4s); + T4C = FMA(KP559016994, T4t, T4s); + } + { + E T2x, T4Q, T4p, T4r, T4R, T2A, T51, T55; + { + E T4h, T50, T4X, T4o, T4Z; + T4E = T4d + T4g; + T4h = T4d - T4g; + Rm[WS(rs, 1)] = FMA(KP951056516, T4B, T4u); + Rp[WS(rs, 2)] = FNMS(KP951056516, T4B, T4u); + Rp[WS(rs, 6)] = FMA(KP951056516, T4D, T4C); + Rm[WS(rs, 5)] = FNMS(KP951056516, T4D, T4C); + T50 = T4W - T4V; + T4X = T4V + T4W; + T4o = T4k - T4n; + T4F = T4k + T4n; + T2x = T2v + T2w; + T4Q = T2v - T2w; + Im[WS(rs, 9)] = T4X - T4Y; + T4Z = FMA(KP250000000, T4X, T4Y); + T4p = FMA(KP618033988, T4o, T4h); + T4r = FNMS(KP618033988, T4h, T4o); + T4R = T2z - T2y; + T2A = T2y + T2z; + T51 = FNMS(KP559016994, T50, T4Z); + T55 = FMA(KP559016994, T50, T4Z); + } + { + E T49, T48, T2B, T4a, T4q; + T2B = T2x + T2A; + T49 = T2x - T2A; + Ip[WS(rs, 2)] = FMA(KP951056516, T54, T51); + Im[WS(rs, 1)] = FMS(KP951056516, T54, T51); + Ip[WS(rs, 6)] = FMA(KP951056516, T56, T55); + Im[WS(rs, 5)] = FMS(KP951056516, T56, T55); + Rp[0] = T2u + T2B; + T48 = FNMS(KP250000000, T2B, T2u); + T4a = FMA(KP559016994, T49, T48); + T4q = FNMS(KP559016994, T49, T48); + T4U = FMA(KP618033988, T4Q, T4R); + T4S = FNMS(KP618033988, T4R, T4Q); + Rm[WS(rs, 3)] = FMA(KP951056516, T4p, T4a); + Rp[WS(rs, 4)] = FNMS(KP951056516, T4p, T4a); + Rp[WS(rs, 8)] = FMA(KP951056516, T4r, T4q); + Rm[WS(rs, 7)] = FNMS(KP951056516, T4r, T4q); + } + } + } + { + E T3O, T5u, T5w, T5o, T5q, T5n; + { + E T5m, T5l, T2I, T4O, T3N, T3L, T2X, T5s, T4N, T5t, T3c, T3v, T3K, T4G; + T5m = T3u + T3n; + T3v = T3n - T3u; + T3K = T3C - T3J; + T5l = T3J + T3C; + T3O = T2C + T2H; + T2I = T2C - T2H; + T4O = T4E - T4F; + T4G = T4E + T4F; + T3N = FMA(KP618033988, T3v, T3K); + T3L = FNMS(KP618033988, T3K, T3v); + T2X = T2P + T2W; + T5s = T2P - T2W; + Ip[0] = T4G + T4M; + T4N = FNMS(KP250000000, T4G, T4M); + T5t = T34 - T3b; + T3c = T34 + T3b; + { + E T3f, T3e, T4P, T4T, T3d, T3M, T3g; + T4P = FMA(KP559016994, T4O, T4N); + T4T = FNMS(KP559016994, T4O, T4N); + T3f = T2X - T3c; + T3d = T2X + T3c; + Ip[WS(rs, 4)] = FMA(KP951056516, T4S, T4P); + Im[WS(rs, 3)] = FMS(KP951056516, T4S, T4P); + Ip[WS(rs, 8)] = FMA(KP951056516, T4U, T4T); + Im[WS(rs, 7)] = FMS(KP951056516, T4U, T4T); + Rm[WS(rs, 4)] = T2I + T3d; + T3e = FNMS(KP250000000, T3d, T2I); + T5u = FMA(KP618033988, T5t, T5s); + T5w = FNMS(KP618033988, T5s, T5t); + T5o = T58 + T57; + T59 = T57 - T58; + T3M = FMA(KP559016994, T3f, T3e); + T3g = FNMS(KP559016994, T3f, T3e); + Rp[WS(rs, 7)] = FNMS(KP951056516, T3L, T3g); + Rp[WS(rs, 3)] = FMA(KP951056516, T3L, T3g); + Rm[0] = FNMS(KP951056516, T3N, T3M); + Rm[WS(rs, 8)] = FMA(KP951056516, T3N, T3M); + T5q = T5l - T5m; + T5n = T5l + T5m; + } + } + { + E T5a, T5b, T47, T45, T5h, T5g, T3V, T3X, T41, T44, T5p, T3W, T46, T3Y; + T5a = T3Z + T40; + T41 = T3Z - T40; + T44 = T42 - T43; + T5b = T42 + T43; + Im[WS(rs, 4)] = T5n - T5o; + T5p = FMA(KP250000000, T5n, T5o); + T47 = FNMS(KP618033988, T41, T44); + T45 = FMA(KP618033988, T44, T41); + { + E T5r, T5v, T3R, T3U; + T5r = FNMS(KP559016994, T5q, T5p); + T5v = FMA(KP559016994, T5q, T5p); + T3R = T3P + T3Q; + T5h = T3P - T3Q; + T5g = T3S - T3T; + T3U = T3S + T3T; + Im[0] = -(FMA(KP951056516, T5u, T5r)); + Im[WS(rs, 8)] = FMS(KP951056516, T5u, T5r); + Ip[WS(rs, 7)] = FMA(KP951056516, T5w, T5v); + Ip[WS(rs, 3)] = FNMS(KP951056516, T5w, T5v); + T3V = T3R + T3U; + T3X = T3R - T3U; + } + Rp[WS(rs, 5)] = T3O + T3V; + T3W = FNMS(KP250000000, T3V, T3O); + T5i = FNMS(KP618033988, T5h, T5g); + T5k = FMA(KP618033988, T5g, T5h); + T46 = FNMS(KP559016994, T3X, T3W); + T3Y = FMA(KP559016994, T3X, T3W); + Rp[WS(rs, 9)] = FNMS(KP951056516, T45, T3Y); + Rp[WS(rs, 1)] = FMA(KP951056516, T45, T3Y); + Rm[WS(rs, 2)] = FNMS(KP951056516, T47, T46); + Rm[WS(rs, 6)] = FMA(KP951056516, T47, T46); + T5e = T5a - T5b; + T5c = T5a + T5b; + } + } + } + } + } + Ip[WS(rs, 5)] = T5c + T59; + T5d = FNMS(KP250000000, T5c, T59); + T5j = FMA(KP559016994, T5e, T5d); + T5f = FNMS(KP559016994, T5e, T5d); + Im[WS(rs, 2)] = -(FMA(KP951056516, T5i, T5f)); + Im[WS(rs, 6)] = FMS(KP951056516, T5i, T5f); + Ip[WS(rs, 9)] = FMA(KP951056516, T5k, T5j); + Ip[WS(rs, 1)] = FNMS(KP951056516, T5k, T5j); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 19}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 20, "hc2cf2_20", twinstr, &GENUS, {136, 58, 140, 0} }; + +void X(codelet_hc2cf2_20) (planner *p) { + X(khc2c_register) (p, hc2cf2_20, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 20 -dit -name hc2cf2_20 -include hc2cf.h */ + +/* + * This function contains 276 FP additions, 164 FP multiplications, + * (or, 204 additions, 92 multiplications, 72 fused multiply/add), + * 123 stack variables, 4 constants, and 80 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf2_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(80, rs)) { + E T2, T5, Tg, Ti, Tk, To, T1h, T1f, T6, T3, T8, T14, T1Q, Tc, T1O; + E T1v, T18, T1t, T1n, T24, T1j, T22, Tq, Tu, T1E, T1G, Tx, Ty, Tz, TJ; + E T1Z, TB, T1X, T1A, TZ, TL, T1y, TX; + { + E T7, T16, Ta, T13, T4, T17, Tb, T12; + { + E Th, Tn, Tj, Tm; + T2 = W[0]; + T5 = W[1]; + Tg = W[2]; + Ti = W[3]; + Th = T2 * Tg; + Tn = T5 * Tg; + Tj = T5 * Ti; + Tm = T2 * Ti; + Tk = Th - Tj; + To = Tm + Tn; + T1h = Tm - Tn; + T1f = Th + Tj; + T6 = W[5]; + T7 = T5 * T6; + T16 = Tg * T6; + Ta = T2 * T6; + T13 = Ti * T6; + T3 = W[4]; + T4 = T2 * T3; + T17 = Ti * T3; + Tb = T5 * T3; + T12 = Tg * T3; + } + T8 = T4 - T7; + T14 = T12 + T13; + T1Q = T16 + T17; + Tc = Ta + Tb; + T1O = T12 - T13; + T1v = Ta - Tb; + T18 = T16 - T17; + T1t = T4 + T7; + { + E T1l, T1m, T1g, T1i; + T1l = T1f * T6; + T1m = T1h * T3; + T1n = T1l + T1m; + T24 = T1l - T1m; + T1g = T1f * T3; + T1i = T1h * T6; + T1j = T1g - T1i; + T22 = T1g + T1i; + { + E Tl, Tp, Ts, Tt; + Tl = Tk * T3; + Tp = To * T6; + Tq = Tl + Tp; + Ts = Tk * T6; + Tt = To * T3; + Tu = Ts - Tt; + T1E = Tl - Tp; + T1G = Ts + Tt; + Tx = W[6]; + Ty = W[7]; + Tz = FMA(Tk, Tx, To * Ty); + TJ = FMA(Tq, Tx, Tu * Ty); + T1Z = FNMS(T1h, Tx, T1f * Ty); + TB = FNMS(To, Tx, Tk * Ty); + T1X = FMA(T1f, Tx, T1h * Ty); + T1A = FNMS(T5, Tx, T2 * Ty); + TZ = FNMS(Ti, Tx, Tg * Ty); + TL = FNMS(Tu, Tx, Tq * Ty); + T1y = FMA(T2, Tx, T5 * Ty); + TX = FMA(Tg, Tx, Ti * Ty); + } + } + } + { + E TF, T2b, T4D, T4M, T2K, T3r, T4a, T4m, T1N, T28, T29, T3J, T3M, T44, T3U; + E T3V, T4j, T2f, T2g, T2h, T2n, T2s, T4K, T3g, T3h, T4z, T3n, T3o, T3p, T30; + E T35, T36, TW, T1r, T1s, T3C, T3F, T43, T3X, T3Y, T4k, T2c, T2d, T2e, T2y; + E T2D, T4J, T3d, T3e, T4y, T3k, T3l, T3m, T2P, T2U, T2V; + { + E T1, T48, Te, T47, Tw, T2H, TD, T2I, T9, Td; + T1 = Rp[0]; + T48 = Rm[0]; + T9 = Rp[WS(rs, 5)]; + Td = Rm[WS(rs, 5)]; + Te = FMA(T8, T9, Tc * Td); + T47 = FNMS(Tc, T9, T8 * Td); + { + E Tr, Tv, TA, TC; + Tr = Ip[WS(rs, 2)]; + Tv = Im[WS(rs, 2)]; + Tw = FMA(Tq, Tr, Tu * Tv); + T2H = FNMS(Tu, Tr, Tq * Tv); + TA = Ip[WS(rs, 7)]; + TC = Im[WS(rs, 7)]; + TD = FMA(Tz, TA, TB * TC); + T2I = FNMS(TB, TA, Tz * TC); + } + { + E Tf, TE, T4B, T4C; + Tf = T1 + Te; + TE = Tw + TD; + TF = Tf - TE; + T2b = Tf + TE; + T4B = T48 - T47; + T4C = Tw - TD; + T4D = T4B - T4C; + T4M = T4C + T4B; + } + { + E T2G, T2J, T46, T49; + T2G = T1 - Te; + T2J = T2H - T2I; + T2K = T2G - T2J; + T3r = T2G + T2J; + T46 = T2H + T2I; + T49 = T47 + T48; + T4a = T46 + T49; + T4m = T49 - T46; + } + } + { + E T1D, T3H, T2l, T2W, T27, T3L, T2r, T34, T1M, T3I, T2m, T2Z, T1W, T3K, T2q; + E T31; + { + E T1x, T2j, T1C, T2k; + { + E T1u, T1w, T1z, T1B; + T1u = Rp[WS(rs, 4)]; + T1w = Rm[WS(rs, 4)]; + T1x = FMA(T1t, T1u, T1v * T1w); + T2j = FNMS(T1v, T1u, T1t * T1w); + T1z = Rp[WS(rs, 9)]; + T1B = Rm[WS(rs, 9)]; + T1C = FMA(T1y, T1z, T1A * T1B); + T2k = FNMS(T1A, T1z, T1y * T1B); + } + T1D = T1x + T1C; + T3H = T2j + T2k; + T2l = T2j - T2k; + T2W = T1x - T1C; + } + { + E T21, T32, T26, T33; + { + E T1Y, T20, T23, T25; + T1Y = Ip[WS(rs, 8)]; + T20 = Im[WS(rs, 8)]; + T21 = FMA(T1X, T1Y, T1Z * T20); + T32 = FNMS(T1Z, T1Y, T1X * T20); + T23 = Ip[WS(rs, 3)]; + T25 = Im[WS(rs, 3)]; + T26 = FMA(T22, T23, T24 * T25); + T33 = FNMS(T24, T23, T22 * T25); + } + T27 = T21 + T26; + T3L = T32 + T33; + T2r = T21 - T26; + T34 = T32 - T33; + } + { + E T1I, T2X, T1L, T2Y; + { + E T1F, T1H, T1J, T1K; + T1F = Ip[WS(rs, 6)]; + T1H = Im[WS(rs, 6)]; + T1I = FMA(T1E, T1F, T1G * T1H); + T2X = FNMS(T1G, T1F, T1E * T1H); + T1J = Ip[WS(rs, 1)]; + T1K = Im[WS(rs, 1)]; + T1L = FMA(Tg, T1J, Ti * T1K); + T2Y = FNMS(Ti, T1J, Tg * T1K); + } + T1M = T1I + T1L; + T3I = T2X + T2Y; + T2m = T1I - T1L; + T2Z = T2X - T2Y; + } + { + E T1S, T2o, T1V, T2p; + { + E T1P, T1R, T1T, T1U; + T1P = Rp[WS(rs, 6)]; + T1R = Rm[WS(rs, 6)]; + T1S = FMA(T1O, T1P, T1Q * T1R); + T2o = FNMS(T1Q, T1P, T1O * T1R); + T1T = Rp[WS(rs, 1)]; + T1U = Rm[WS(rs, 1)]; + T1V = FMA(T1f, T1T, T1h * T1U); + T2p = FNMS(T1h, T1T, T1f * T1U); + } + T1W = T1S + T1V; + T3K = T2o + T2p; + T2q = T2o - T2p; + T31 = T1S - T1V; + } + T1N = T1D - T1M; + T28 = T1W - T27; + T29 = T1N + T28; + T3J = T3H + T3I; + T3M = T3K + T3L; + T44 = T3J + T3M; + T3U = T3H - T3I; + T3V = T3L - T3K; + T4j = T3V - T3U; + T2f = T1D + T1M; + T2g = T1W + T27; + T2h = T2f + T2g; + T2n = T2l + T2m; + T2s = T2q + T2r; + T4K = T2n + T2s; + T3g = T2l - T2m; + T3h = T2q - T2r; + T4z = T3g + T3h; + T3n = T2W + T2Z; + T3o = T31 + T34; + T3p = T3n + T3o; + T30 = T2W - T2Z; + T35 = T31 - T34; + T36 = T30 + T35; + } + { + E TO, T3A, T2w, T2L, T1q, T3E, T2z, T2T, TV, T3B, T2x, T2O, T1b, T3D, T2C; + E T2Q; + { + E TI, T2u, TN, T2v; + { + E TG, TH, TK, TM; + TG = Rp[WS(rs, 2)]; + TH = Rm[WS(rs, 2)]; + TI = FMA(Tk, TG, To * TH); + T2u = FNMS(To, TG, Tk * TH); + TK = Rp[WS(rs, 7)]; + TM = Rm[WS(rs, 7)]; + TN = FMA(TJ, TK, TL * TM); + T2v = FNMS(TL, TK, TJ * TM); + } + TO = TI + TN; + T3A = T2u + T2v; + T2w = T2u - T2v; + T2L = TI - TN; + } + { + E T1e, T2R, T1p, T2S; + { + E T1c, T1d, T1k, T1o; + T1c = Ip[0]; + T1d = Im[0]; + T1e = FMA(T2, T1c, T5 * T1d); + T2R = FNMS(T5, T1c, T2 * T1d); + T1k = Ip[WS(rs, 5)]; + T1o = Im[WS(rs, 5)]; + T1p = FMA(T1j, T1k, T1n * T1o); + T2S = FNMS(T1n, T1k, T1j * T1o); + } + T1q = T1e + T1p; + T3E = T2R + T2S; + T2z = T1p - T1e; + T2T = T2R - T2S; + } + { + E TR, T2M, TU, T2N; + { + E TP, TQ, TS, TT; + TP = Ip[WS(rs, 4)]; + TQ = Im[WS(rs, 4)]; + TR = FMA(T3, TP, T6 * TQ); + T2M = FNMS(T6, TP, T3 * TQ); + TS = Ip[WS(rs, 9)]; + TT = Im[WS(rs, 9)]; + TU = FMA(Tx, TS, Ty * TT); + T2N = FNMS(Ty, TS, Tx * TT); + } + TV = TR + TU; + T3B = T2M + T2N; + T2x = TR - TU; + T2O = T2M - T2N; + } + { + E T11, T2A, T1a, T2B; + { + E TY, T10, T15, T19; + TY = Rp[WS(rs, 8)]; + T10 = Rm[WS(rs, 8)]; + T11 = FMA(TX, TY, TZ * T10); + T2A = FNMS(TZ, TY, TX * T10); + T15 = Rp[WS(rs, 3)]; + T19 = Rm[WS(rs, 3)]; + T1a = FMA(T14, T15, T18 * T19); + T2B = FNMS(T18, T15, T14 * T19); + } + T1b = T11 + T1a; + T3D = T2A + T2B; + T2C = T2A - T2B; + T2Q = T11 - T1a; + } + TW = TO - TV; + T1r = T1b - T1q; + T1s = TW + T1r; + T3C = T3A + T3B; + T3F = T3D + T3E; + T43 = T3C + T3F; + T3X = T3A - T3B; + T3Y = T3D - T3E; + T4k = T3X + T3Y; + T2c = TO + TV; + T2d = T1b + T1q; + T2e = T2c + T2d; + T2y = T2w + T2x; + T2D = T2z - T2C; + T4J = T2D - T2y; + T3d = T2w - T2x; + T3e = T2C + T2z; + T4y = T3d + T3e; + T3k = T2L + T2O; + T3l = T2Q + T2T; + T3m = T3k + T3l; + T2P = T2L - T2O; + T2U = T2Q - T2T; + T2V = T2P + T2U; + } + { + E T3S, T2a, T3R, T40, T42, T3W, T3Z, T41, T3T; + T3S = KP559016994 * (T1s - T29); + T2a = T1s + T29; + T3R = FNMS(KP250000000, T2a, TF); + T3W = T3U + T3V; + T3Z = T3X - T3Y; + T40 = FNMS(KP587785252, T3Z, KP951056516 * T3W); + T42 = FMA(KP951056516, T3Z, KP587785252 * T3W); + Rm[WS(rs, 9)] = TF + T2a; + T41 = T3S + T3R; + Rm[WS(rs, 5)] = T41 - T42; + Rp[WS(rs, 6)] = T41 + T42; + T3T = T3R - T3S; + Rp[WS(rs, 2)] = T3T - T40; + Rm[WS(rs, 1)] = T3T + T40; + } + { + E T4r, T4l, T4q, T4p, T4t, T4n, T4o, T4u, T4s; + T4r = KP559016994 * (T4k + T4j); + T4l = T4j - T4k; + T4q = FMA(KP250000000, T4l, T4m); + T4n = T1r - TW; + T4o = T1N - T28; + T4p = FMA(KP587785252, T4n, KP951056516 * T4o); + T4t = FNMS(KP587785252, T4o, KP951056516 * T4n); + Im[WS(rs, 9)] = T4l - T4m; + T4u = T4r + T4q; + Im[WS(rs, 5)] = T4t - T4u; + Ip[WS(rs, 6)] = T4t + T4u; + T4s = T4q - T4r; + Im[WS(rs, 1)] = T4p - T4s; + Ip[WS(rs, 2)] = T4p + T4s; + } + { + E T3x, T2i, T3y, T3O, T3Q, T3G, T3N, T3P, T3z; + T3x = KP559016994 * (T2e - T2h); + T2i = T2e + T2h; + T3y = FNMS(KP250000000, T2i, T2b); + T3G = T3C - T3F; + T3N = T3J - T3M; + T3O = FMA(KP951056516, T3G, KP587785252 * T3N); + T3Q = FNMS(KP587785252, T3G, KP951056516 * T3N); + Rp[0] = T2b + T2i; + T3P = T3y - T3x; + Rm[WS(rs, 7)] = T3P - T3Q; + Rp[WS(rs, 8)] = T3P + T3Q; + T3z = T3x + T3y; + Rp[WS(rs, 4)] = T3z - T3O; + Rm[WS(rs, 3)] = T3z + T3O; + } + { + E T4e, T45, T4f, T4d, T4h, T4b, T4c, T4i, T4g; + T4e = KP559016994 * (T43 - T44); + T45 = T43 + T44; + T4f = FNMS(KP250000000, T45, T4a); + T4b = T2c - T2d; + T4c = T2f - T2g; + T4d = FMA(KP951056516, T4b, KP587785252 * T4c); + T4h = FNMS(KP951056516, T4c, KP587785252 * T4b); + Ip[0] = T45 + T4a; + T4i = T4f - T4e; + Im[WS(rs, 7)] = T4h - T4i; + Ip[WS(rs, 8)] = T4h + T4i; + T4g = T4e + T4f; + Im[WS(rs, 3)] = T4d - T4g; + Ip[WS(rs, 4)] = T4d + T4g; + } + { + E T39, T37, T38, T2F, T3b, T2t, T2E, T3c, T3a; + T39 = KP559016994 * (T2V - T36); + T37 = T2V + T36; + T38 = FNMS(KP250000000, T37, T2K); + T2t = T2n - T2s; + T2E = T2y + T2D; + T2F = FNMS(KP587785252, T2E, KP951056516 * T2t); + T3b = FMA(KP951056516, T2E, KP587785252 * T2t); + Rm[WS(rs, 4)] = T2K + T37; + T3c = T39 + T38; + Rm[WS(rs, 8)] = T3b + T3c; + Rm[0] = T3c - T3b; + T3a = T38 - T39; + Rp[WS(rs, 3)] = T2F + T3a; + Rp[WS(rs, 7)] = T3a - T2F; + } + { + E T4Q, T4L, T4R, T4P, T4U, T4N, T4O, T4T, T4S; + T4Q = KP559016994 * (T4J + T4K); + T4L = T4J - T4K; + T4R = FMA(KP250000000, T4L, T4M); + T4N = T2P - T2U; + T4O = T30 - T35; + T4P = FMA(KP951056516, T4N, KP587785252 * T4O); + T4U = FNMS(KP587785252, T4N, KP951056516 * T4O); + Im[WS(rs, 4)] = T4L - T4M; + T4T = T4Q + T4R; + Ip[WS(rs, 3)] = T4T - T4U; + Ip[WS(rs, 7)] = T4U + T4T; + T4S = T4Q - T4R; + Im[WS(rs, 8)] = T4P + T4S; + Im[0] = T4S - T4P; + } + { + E T3q, T3s, T3t, T3j, T3v, T3f, T3i, T3w, T3u; + T3q = KP559016994 * (T3m - T3p); + T3s = T3m + T3p; + T3t = FNMS(KP250000000, T3s, T3r); + T3f = T3d - T3e; + T3i = T3g - T3h; + T3j = FMA(KP951056516, T3f, KP587785252 * T3i); + T3v = FNMS(KP587785252, T3f, KP951056516 * T3i); + Rp[WS(rs, 5)] = T3r + T3s; + T3w = T3t - T3q; + Rm[WS(rs, 6)] = T3v + T3w; + Rm[WS(rs, 2)] = T3w - T3v; + T3u = T3q + T3t; + Rp[WS(rs, 1)] = T3j + T3u; + Rp[WS(rs, 9)] = T3u - T3j; + } + { + E T4A, T4E, T4F, T4x, T4I, T4v, T4w, T4H, T4G; + T4A = KP559016994 * (T4y - T4z); + T4E = T4y + T4z; + T4F = FNMS(KP250000000, T4E, T4D); + T4v = T3n - T3o; + T4w = T3k - T3l; + T4x = FNMS(KP587785252, T4w, KP951056516 * T4v); + T4I = FMA(KP951056516, T4w, KP587785252 * T4v); + Ip[WS(rs, 5)] = T4E + T4D; + T4H = T4A + T4F; + Ip[WS(rs, 1)] = T4H - T4I; + Ip[WS(rs, 9)] = T4I + T4H; + T4G = T4A - T4F; + Im[WS(rs, 6)] = T4x + T4G; + Im[WS(rs, 2)] = T4G - T4x; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 19}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 20, "hc2cf2_20", twinstr, &GENUS, {204, 92, 72, 0} }; + +void X(codelet_hc2cf2_20) (planner *p) { + X(khc2c_register) (p, hc2cf2_20, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf2_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf2_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1841 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:25 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hc2cf2_32 -include hc2cf.h */ + +/* + * This function contains 488 FP additions, 350 FP multiplications, + * (or, 236 additions, 98 multiplications, 252 fused multiply/add), + * 181 stack variables, 7 constants, and 128 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(128, rs)) { + E T9A, T9z; + { + E T2, T8, T3, T6, Te, Tr, T18, T4, Ta, Tz, T1n, T10, Ti, T5, Tc; + T2 = W[0]; + T8 = W[4]; + T3 = W[2]; + T6 = W[3]; + Te = W[6]; + Tr = T2 * T8; + T18 = T3 * T8; + T4 = T2 * T3; + Ta = T2 * T6; + Tz = T3 * Te; + T1n = T8 * Te; + T10 = T2 * Te; + Ti = W[7]; + T5 = W[1]; + Tc = W[5]; + { + E T34, T31, T2X, T2T, Tq, T46, T8H, T97, TH, T98, T4b, T8D, TZ, T7f, T4j; + E T6t, T1g, T7g, T4q, T6u, T1J, T7m, T6y, T4z, T7l, T8d, T6x, T4G, T2k, T7o; + E T7r, T8e, T6B, T4O, T6A, T4V, T7L, T3G, T6P, T61, T6M, T5E, T8n, T7J, T5s; + E T6I, T2N, T7A, T55, T6F, T7x, T8i, T5L, T62, T43, T7G, T5S, T63, T7O, T8o; + E T2U, T2R, T2V, T58, T3a, T5h, T2Y, T32, T35; + { + E T1K, T23, T1N, T26, T2b, T1U, T3C, T3j, T3z, T3f, T1R, T29, TR, Th, T2J; + E T2F, Td, TP, T3r, T3n, T2w, T2s, T3Q, T3M, T1Z, T1V, T2g, T2c; + { + E T11, T1C, TM, Tb, TJ, T7, T1o, T19, T1w, T1F, T15, T1s, T1d, T1z, TW; + E TS, Ty, T48, TG, T4a; + { + E T1, TA, Ts, TE, Tw, Tn, Tj, T8G, Tk, To, T14; + T1 = Rp[0]; + TA = FMA(T6, Ti, Tz); + T1K = FNMS(T6, Ti, Tz); + T14 = T2 * Ti; + { + E T1r, TD, T1c, Tv; + T1r = T8 * Ti; + TD = T3 * Ti; + T11 = FNMS(T5, Ti, T10); + T1C = FMA(T5, Ti, T10); + TM = FMA(T5, T3, Ta); + Tb = FNMS(T5, T3, Ta); + TJ = FNMS(T5, T6, T4); + T7 = FMA(T5, T6, T4); + T1o = FMA(Tc, Ti, T1n); + T23 = FMA(T6, Tc, T18); + T19 = FNMS(T6, Tc, T18); + T1w = FNMS(T5, Tc, Tr); + Ts = FMA(T5, Tc, Tr); + T1c = T3 * Tc; + Tv = T2 * Tc; + T1F = FNMS(T5, Te, T14); + T15 = FMA(T5, Te, T14); + T1s = FNMS(Tc, Te, T1r); + T1N = FMA(T6, Te, TD); + TE = FNMS(T6, Te, TD); + { + E T1T, T3i, T3e, T1Q; + T1T = TJ * Tc; + T3i = TJ * Ti; + T3e = TJ * Te; + T1Q = TJ * T8; + { + E Tg, T2I, T2E, T9; + Tg = T7 * Tc; + T2I = T7 * Ti; + T2E = T7 * Te; + T9 = T7 * T8; + { + E T3q, T3m, T2v, T2r; + T3q = T19 * Ti; + T3m = T19 * Te; + T2v = T1w * Ti; + T2r = T1w * Te; + { + E T2W, T2S, T3P, T3L; + T2W = T23 * Ti; + T2S = T23 * Te; + T3P = Ts * Ti; + T3L = Ts * Te; + T26 = FNMS(T6, T8, T1c); + T1d = FMA(T6, T8, T1c); + T1z = FMA(T5, T8, Tv); + Tw = FNMS(T5, T8, Tv); + T2b = FNMS(TM, T8, T1T); + T1U = FMA(TM, T8, T1T); + T3C = FNMS(TM, Te, T3i); + T3j = FMA(TM, Te, T3i); + T3z = FMA(TM, Ti, T3e); + T3f = FNMS(TM, Ti, T3e); + T1R = FNMS(TM, Tc, T1Q); + T29 = FMA(TM, Tc, T1Q); + TR = FNMS(Tb, T8, Tg); + Th = FMA(Tb, T8, Tg); + T34 = FMA(Tb, Te, T2I); + T2J = FNMS(Tb, Te, T2I); + T31 = FNMS(Tb, Ti, T2E); + T2F = FMA(Tb, Ti, T2E); + Td = FNMS(Tb, Tc, T9); + TP = FMA(Tb, Tc, T9); + T2X = FNMS(T26, Te, T2W); + T2T = FMA(T26, Ti, T2S); + T3r = FNMS(T1d, Te, T3q); + T3n = FMA(T1d, Ti, T3m); + T2w = FNMS(T1z, Te, T2v); + T2s = FMA(T1z, Ti, T2r); + T3Q = FNMS(Tw, Te, T3P); + T3M = FMA(Tw, Ti, T3L); + { + E T1Y, T1S, T2f, T2a; + T1Y = T1R * Ti; + T1S = T1R * Te; + T2f = T29 * Ti; + T2a = T29 * Te; + { + E Tm, Tf, TV, TQ; + Tm = Td * Ti; + Tf = Td * Te; + TV = TP * Ti; + TQ = TP * Te; + T1Z = FNMS(T1U, Te, T1Y); + T1V = FMA(T1U, Ti, T1S); + T2g = FNMS(T2b, Te, T2f); + T2c = FMA(T2b, Ti, T2a); + Tn = FNMS(Th, Te, Tm); + Tj = FMA(Th, Ti, Tf); + TW = FNMS(TR, Te, TV); + TS = FMA(TR, Ti, TQ); + T8G = Rm[0]; + } + } + } + } + } + } + } + Tk = Rp[WS(rs, 8)]; + To = Rm[WS(rs, 8)]; + { + E Tt, Tx, Tu, T47, TB, TF, TC, T49; + { + E Tl, T8E, Tp, T8F; + Tt = Rp[WS(rs, 4)]; + Tx = Rm[WS(rs, 4)]; + Tl = Tj * Tk; + T8E = Tj * To; + Tu = Ts * Tt; + T47 = Ts * Tx; + Tp = FMA(Tn, To, Tl); + T8F = FNMS(Tn, Tk, T8E); + TB = Rp[WS(rs, 12)]; + TF = Rm[WS(rs, 12)]; + Tq = T1 + Tp; + T46 = T1 - Tp; + T8H = T8F + T8G; + T97 = T8G - T8F; + TC = TA * TB; + T49 = TA * TF; + } + Ty = FMA(Tw, Tx, Tu); + T48 = FNMS(Tw, Tt, T47); + TG = FMA(TE, TF, TC); + T4a = FNMS(TE, TB, T49); + } + } + { + E TT, TX, TO, T4f, TU, T4g; + { + E TK, TN, TL, T4e; + TK = Rp[WS(rs, 2)]; + TN = Rm[WS(rs, 2)]; + TH = Ty + TG; + T98 = Ty - TG; + T4b = T48 - T4a; + T8D = T48 + T4a; + TL = TJ * TK; + T4e = TJ * TN; + TT = Rp[WS(rs, 10)]; + TX = Rm[WS(rs, 10)]; + TO = FMA(TM, TN, TL); + T4f = FNMS(TM, TK, T4e); + TU = TS * TT; + T4g = TS * TX; + } + { + E T17, T4m, T1a, T1e, T4d, T4i; + { + E T12, T16, TY, T4h, T13, T4l; + T12 = Rp[WS(rs, 14)]; + T16 = Rm[WS(rs, 14)]; + TY = FMA(TW, TX, TU); + T4h = FNMS(TW, TT, T4g); + T13 = T11 * T12; + T4l = T11 * T16; + TZ = TO + TY; + T4d = TO - TY; + T7f = T4f + T4h; + T4i = T4f - T4h; + T17 = FMA(T15, T16, T13); + T4m = FNMS(T15, T12, T4l); + } + T4j = T4d + T4i; + T6t = T4i - T4d; + T1a = Rp[WS(rs, 6)]; + T1e = Rm[WS(rs, 6)]; + { + E T1m, T4B, T1H, T4x, T1x, T1A, T1u, T4D, T1y, T4u; + { + E T1D, T1G, T1E, T4w; + { + E T1f, T4o, T4k, T4p; + { + E T1j, T1l, T1b, T4n, T1k, T4A; + T1j = Rp[WS(rs, 1)]; + T1l = Rm[WS(rs, 1)]; + T1b = T19 * T1a; + T4n = T19 * T1e; + T1k = T7 * T1j; + T4A = T7 * T1l; + T1f = FMA(T1d, T1e, T1b); + T4o = FNMS(T1d, T1a, T4n); + T1m = FMA(Tb, T1l, T1k); + T4B = FNMS(Tb, T1j, T4A); + } + T1g = T17 + T1f; + T4k = T17 - T1f; + T7g = T4m + T4o; + T4p = T4m - T4o; + T1D = Rp[WS(rs, 13)]; + T1G = Rm[WS(rs, 13)]; + T4q = T4k - T4p; + T6u = T4k + T4p; + T1E = T1C * T1D; + T4w = T1C * T1G; + } + { + E T1p, T1t, T1q, T4C; + T1p = Rp[WS(rs, 9)]; + T1t = Rm[WS(rs, 9)]; + T1H = FMA(T1F, T1G, T1E); + T4x = FNMS(T1F, T1D, T4w); + T1q = T1o * T1p; + T4C = T1o * T1t; + T1x = Rp[WS(rs, 5)]; + T1A = Rm[WS(rs, 5)]; + T1u = FMA(T1s, T1t, T1q); + T4D = FNMS(T1s, T1p, T4C); + T1y = T1w * T1x; + T4u = T1w * T1A; + } + } + { + E T4t, T1v, T7j, T4E, T1B, T4v; + T4t = T1m - T1u; + T1v = T1m + T1u; + T7j = T4B + T4D; + T4E = T4B - T4D; + T1B = FMA(T1z, T1A, T1y); + T4v = FNMS(T1z, T1x, T4u); + { + E T4F, T1I, T4y, T7k; + T4F = T1B - T1H; + T1I = T1B + T1H; + T4y = T4v - T4x; + T7k = T4v + T4x; + T1J = T1v + T1I; + T7m = T1v - T1I; + T6y = T4t - T4y; + T4z = T4t + T4y; + T7l = T7j - T7k; + T8d = T7j + T7k; + T6x = T4E + T4F; + T4G = T4E - T4F; + } + } + } + } + } + } + { + E T5C, T3u, T5y, T7H, T5Z, T3F, T60, T5A, T4T, T4U; + { + E T1P, T4Q, T2i, T4M, T21, T4S, T28, T4K; + { + E T1L, T1O, T1W, T20; + T1L = Rp[WS(rs, 15)]; + T1O = Rm[WS(rs, 15)]; + { + E T2d, T2h, T1M, T4P, T2e, T4L; + T2d = Rp[WS(rs, 11)]; + T2h = Rm[WS(rs, 11)]; + T1M = T1K * T1L; + T4P = T1K * T1O; + T2e = T2c * T2d; + T4L = T2c * T2h; + T1P = FMA(T1N, T1O, T1M); + T4Q = FNMS(T1N, T1L, T4P); + T2i = FMA(T2g, T2h, T2e); + T4M = FNMS(T2g, T2d, T4L); + } + T1W = Rp[WS(rs, 7)]; + T20 = Rm[WS(rs, 7)]; + { + E T24, T27, T1X, T4R, T25, T4J; + T24 = Rp[WS(rs, 3)]; + T27 = Rm[WS(rs, 3)]; + T1X = T1V * T1W; + T4R = T1V * T20; + T25 = T23 * T24; + T4J = T23 * T27; + T21 = FMA(T1Z, T20, T1X); + T4S = FNMS(T1Z, T1W, T4R); + T28 = FMA(T26, T27, T25); + T4K = FNMS(T26, T24, T4J); + } + } + { + E T4I, T22, T7p, T2j, T7q, T4N; + T4I = T1P - T21; + T22 = T1P + T21; + T7p = T4Q + T4S; + T4T = T4Q - T4S; + T4U = T28 - T2i; + T2j = T28 + T2i; + T7q = T4K + T4M; + T4N = T4K - T4M; + T2k = T22 + T2j; + T7o = T22 - T2j; + T7r = T7p - T7q; + T8e = T7p + T7q; + T6B = T4I - T4N; + T4O = T4I + T4N; + } + } + { + E T3l, T5W, T3E, T3v, T3t, T3w, T3x, T5Y, T3A, T3B, T3D, T3y, T5z; + { + E T3g, T3k, T3h, T5V; + T3g = Ip[WS(rs, 15)]; + T3k = Im[WS(rs, 15)]; + T3A = Ip[WS(rs, 11)]; + T6A = T4T + T4U; + T4V = T4T - T4U; + T3h = T3f * T3g; + T5V = T3f * T3k; + T3B = T3z * T3A; + T3D = Im[WS(rs, 11)]; + T3l = FMA(T3j, T3k, T3h); + T5W = FNMS(T3j, T3g, T5V); + } + { + E T3o, T5B, T3s, T3p, T5X; + T3o = Ip[WS(rs, 7)]; + T3E = FMA(T3C, T3D, T3B); + T5B = T3z * T3D; + T3s = Im[WS(rs, 7)]; + T3p = T3n * T3o; + T3v = Ip[WS(rs, 3)]; + T5C = FNMS(T3C, T3A, T5B); + T5X = T3n * T3s; + T3t = FMA(T3r, T3s, T3p); + T3w = TP * T3v; + T3x = Im[WS(rs, 3)]; + T5Y = FNMS(T3r, T3o, T5X); + } + T3u = T3l + T3t; + T5y = T3l - T3t; + T3y = FMA(TR, T3x, T3w); + T5z = TP * T3x; + T7H = T5W + T5Y; + T5Z = T5W - T5Y; + T3F = T3y + T3E; + T60 = T3E - T3y; + T5A = FNMS(TR, T3v, T5z); + } + { + E T2t, T2q, T2u, T5n, T2L, T53, T2x, T2A, T2C; + { + E T2n, T2o, T2p, T2G, T2K, T5D, T7I, T5m, T2H, T52; + T2n = Ip[0]; + T7L = T3u - T3F; + T3G = T3u + T3F; + T5D = T5A - T5C; + T7I = T5A + T5C; + T6P = T60 - T5Z; + T61 = T5Z + T60; + T6M = T5y - T5D; + T5E = T5y + T5D; + T8n = T7H + T7I; + T7J = T7H - T7I; + T2o = T2 * T2n; + T2p = Im[0]; + T2G = Ip[WS(rs, 12)]; + T2K = Im[WS(rs, 12)]; + T2t = Ip[WS(rs, 8)]; + T2q = FMA(T5, T2p, T2o); + T5m = T2 * T2p; + T2H = T2F * T2G; + T52 = T2F * T2K; + T2u = T2s * T2t; + T5n = FNMS(T5, T2n, T5m); + T2L = FMA(T2J, T2K, T2H); + T53 = FNMS(T2J, T2G, T52); + T2x = Im[WS(rs, 8)]; + T2A = Ip[WS(rs, 4)]; + T2C = Im[WS(rs, 4)]; + } + { + E T3N, T3K, T3O, T5H, T41, T5Q, T3R, T3U, T3W; + { + E T3H, T3I, T3J, T3Y, T40, T5G, T3Z, T5P; + { + E T2z, T4Z, T5p, T2D, T51, T7v, T5q; + T3H = Ip[WS(rs, 1)]; + { + E T2y, T5o, T2B, T50; + T2y = FMA(T2w, T2x, T2u); + T5o = T2s * T2x; + T2B = T8 * T2A; + T50 = T8 * T2C; + T2z = T2q + T2y; + T4Z = T2q - T2y; + T5p = FNMS(T2w, T2t, T5o); + T2D = FMA(Tc, T2C, T2B); + T51 = FNMS(Tc, T2A, T50); + T3I = T3 * T3H; + } + T7v = T5n + T5p; + T5q = T5n - T5p; + { + E T2M, T5r, T7w, T54; + T2M = T2D + T2L; + T5r = T2D - T2L; + T7w = T51 + T53; + T54 = T51 - T53; + T5s = T5q - T5r; + T6I = T5q + T5r; + T2N = T2z + T2M; + T7A = T2z - T2M; + T55 = T4Z + T54; + T6F = T4Z - T54; + T7x = T7v - T7w; + T8i = T7v + T7w; + T3J = Im[WS(rs, 1)]; + } + } + T3Y = Ip[WS(rs, 5)]; + T40 = Im[WS(rs, 5)]; + T3N = Ip[WS(rs, 9)]; + T3K = FMA(T6, T3J, T3I); + T5G = T3 * T3J; + T3Z = Td * T3Y; + T5P = Td * T40; + T3O = T3M * T3N; + T5H = FNMS(T6, T3H, T5G); + T41 = FMA(Th, T40, T3Z); + T5Q = FNMS(Th, T3Y, T5P); + T3R = Im[WS(rs, 9)]; + T3U = Ip[WS(rs, 13)]; + T3W = Im[WS(rs, 13)]; + } + { + E T2O, T2P, T2Q, T37, T39, T57, T38, T5g; + { + E T3T, T5F, T5J, T3X, T5O, T7M, T5K; + T2O = Ip[WS(rs, 2)]; + { + E T3S, T5I, T3V, T5N; + T3S = FMA(T3Q, T3R, T3O); + T5I = T3M * T3R; + T3V = Te * T3U; + T5N = Te * T3W; + T3T = T3K + T3S; + T5F = T3K - T3S; + T5J = FNMS(T3Q, T3N, T5I); + T3X = FMA(Ti, T3W, T3V); + T5O = FNMS(Ti, T3U, T5N); + T2P = T29 * T2O; + } + T7M = T5H + T5J; + T5K = T5H - T5J; + { + E T42, T5M, T7N, T5R; + T42 = T3X + T41; + T5M = T3X - T41; + T7N = T5O + T5Q; + T5R = T5O - T5Q; + T5L = T5F + T5K; + T62 = T5K - T5F; + T43 = T3T + T42; + T7G = T42 - T3T; + T5S = T5M - T5R; + T63 = T5M + T5R; + T7O = T7M - T7N; + T8o = T7M + T7N; + T2Q = Im[WS(rs, 2)]; + } + } + T37 = Ip[WS(rs, 6)]; + T39 = Im[WS(rs, 6)]; + T2U = Ip[WS(rs, 10)]; + T2R = FMA(T2b, T2Q, T2P); + T57 = T29 * T2Q; + T38 = T1R * T37; + T5g = T1R * T39; + T2V = T2T * T2U; + T58 = FNMS(T2b, T2O, T57); + T3a = FMA(T1U, T39, T38); + T5h = FNMS(T1U, T37, T5g); + T2Y = Im[WS(rs, 10)]; + T32 = Ip[WS(rs, 14)]; + T35 = Im[WS(rs, 14)]; + } + } + } + } + } + { + E T5c, T5t, T5j, T5u, T88, T90, T8Z, T8b; + { + E T7e, T8T, T7y, T7D, T7h, T8U, T8S, T8R; + { + E T8c, T1i, T8A, T8z, T8O, T8J, T8N, T2l, T8L, T45, T8t, T8l, T8u, T8q, T3c; + E T8k, T8p, T8w, T2m; + { + E T8x, T8y, T8j, T8C, T8I; + { + E TI, T30, T56, T5a, T36, T5f, T1h, T7B, T5b; + TI = Tq + TH; + T7e = Tq - TH; + { + E T2Z, T59, T33, T5e; + T2Z = FMA(T2X, T2Y, T2V); + T59 = T2T * T2Y; + T33 = T31 * T32; + T5e = T31 * T35; + T30 = T2R + T2Z; + T56 = T2R - T2Z; + T5a = FNMS(T2X, T2U, T59); + T36 = FMA(T34, T35, T33); + T5f = FNMS(T34, T32, T5e); + T1h = TZ + T1g; + T8T = T1g - TZ; + } + T7B = T58 + T5a; + T5b = T58 - T5a; + { + E T3b, T5d, T7C, T5i; + T3b = T36 + T3a; + T5d = T36 - T3a; + T7C = T5f + T5h; + T5i = T5f - T5h; + T5c = T56 + T5b; + T5t = T5b - T56; + T3c = T30 + T3b; + T7y = T3b - T30; + T5j = T5d - T5i; + T5u = T5d + T5i; + T7D = T7B - T7C; + T8j = T7B + T7C; + T8c = TI - T1h; + T1i = TI + T1h; + } + } + T8k = T8i - T8j; + T8x = T8i + T8j; + T8y = T8n + T8o; + T8p = T8n - T8o; + T7h = T7f - T7g; + T8C = T7f + T7g; + T8I = T8D + T8H; + T8U = T8H - T8D; + T8A = T8x + T8y; + T8z = T8x - T8y; + T8O = T8I - T8C; + T8J = T8C + T8I; + } + { + E T8h, T8m, T3d, T44; + T8h = T2N - T3c; + T3d = T2N + T3c; + T44 = T3G + T43; + T8m = T3G - T43; + T8N = T2k - T1J; + T2l = T1J + T2k; + T8L = T44 - T3d; + T45 = T3d + T44; + T8t = T8k - T8h; + T8l = T8h + T8k; + T8u = T8m + T8p; + T8q = T8m - T8p; + } + T8w = T1i - T2l; + T2m = T1i + T2l; + { + E T8s, T8P, T8Q, T8v; + { + E T8r, T8M, T8K, T8g, T8B, T8f; + T8S = T8q - T8l; + T8r = T8l + T8q; + T8B = T8d + T8e; + T8f = T8d - T8e; + Rp[0] = T2m + T45; + Rm[WS(rs, 15)] = T2m - T45; + Rp[WS(rs, 8)] = T8w + T8z; + Rm[WS(rs, 7)] = T8w - T8z; + T8M = T8J - T8B; + T8K = T8B + T8J; + T8g = T8c + T8f; + T8s = T8c - T8f; + T8R = T8O - T8N; + T8P = T8N + T8O; + Ip[WS(rs, 8)] = T8L + T8M; + Im[WS(rs, 7)] = T8L - T8M; + Ip[0] = T8A + T8K; + Im[WS(rs, 15)] = T8A - T8K; + Rp[WS(rs, 4)] = FMA(KP707106781, T8r, T8g); + Rm[WS(rs, 11)] = FNMS(KP707106781, T8r, T8g); + T8Q = T8t + T8u; + T8v = T8t - T8u; + } + Ip[WS(rs, 4)] = FMA(KP707106781, T8Q, T8P); + Im[WS(rs, 11)] = FMS(KP707106781, T8Q, T8P); + Rp[WS(rs, 12)] = FMA(KP707106781, T8v, T8s); + Rm[WS(rs, 3)] = FNMS(KP707106781, T8v, T8s); + } + } + { + E T7P, T7W, T7i, T7K, T8a, T86, T91, T8V, T8W, T7t, T7T, T7F, T92, T7Z, T89; + E T83; + { + E T7X, T7n, T7s, T7Y, T84, T85; + T7P = T7L - T7O; + T84 = T7L + T7O; + Ip[WS(rs, 12)] = FMA(KP707106781, T8S, T8R); + Im[WS(rs, 3)] = FMS(KP707106781, T8S, T8R); + T7W = T7e + T7h; + T7i = T7e - T7h; + T85 = T7J + T7G; + T7K = T7G - T7J; + T7X = T7m + T7l; + T7n = T7l - T7m; + T8a = FMA(KP414213562, T84, T85); + T86 = FNMS(KP414213562, T85, T84); + T91 = T8U - T8T; + T8V = T8T + T8U; + T7s = T7o + T7r; + T7Y = T7o - T7r; + { + E T82, T81, T7z, T7E; + T82 = T7x + T7y; + T7z = T7x - T7y; + T7E = T7A - T7D; + T81 = T7A + T7D; + T8W = T7n + T7s; + T7t = T7n - T7s; + T7T = FNMS(KP414213562, T7z, T7E); + T7F = FMA(KP414213562, T7E, T7z); + T92 = T7Y - T7X; + T7Z = T7X + T7Y; + T89 = FNMS(KP414213562, T81, T82); + T83 = FMA(KP414213562, T82, T81); + } + } + { + E T7S, T7u, T93, T95, T7U, T7Q; + T7S = FNMS(KP707106781, T7t, T7i); + T7u = FMA(KP707106781, T7t, T7i); + T93 = FMA(KP707106781, T92, T91); + T95 = FNMS(KP707106781, T92, T91); + T7U = FNMS(KP414213562, T7K, T7P); + T7Q = FMA(KP414213562, T7P, T7K); + { + E T80, T87, T8X, T8Y; + T88 = FNMS(KP707106781, T7Z, T7W); + T80 = FMA(KP707106781, T7Z, T7W); + { + E T7V, T94, T96, T7R; + T7V = T7T + T7U; + T94 = T7U - T7T; + T96 = T7Q - T7F; + T7R = T7F + T7Q; + Rm[WS(rs, 1)] = FMA(KP923879532, T7V, T7S); + Rp[WS(rs, 14)] = FNMS(KP923879532, T7V, T7S); + Ip[WS(rs, 6)] = FMA(KP923879532, T94, T93); + Im[WS(rs, 9)] = FMS(KP923879532, T94, T93); + Ip[WS(rs, 14)] = FMA(KP923879532, T96, T95); + Im[WS(rs, 1)] = FMS(KP923879532, T96, T95); + Rp[WS(rs, 6)] = FMA(KP923879532, T7R, T7u); + Rm[WS(rs, 9)] = FNMS(KP923879532, T7R, T7u); + T87 = T83 + T86; + T90 = T86 - T83; + } + T8Z = FNMS(KP707106781, T8W, T8V); + T8X = FMA(KP707106781, T8W, T8V); + T8Y = T89 + T8a; + T8b = T89 - T8a; + Rp[WS(rs, 2)] = FMA(KP923879532, T87, T80); + Rm[WS(rs, 13)] = FNMS(KP923879532, T87, T80); + Ip[WS(rs, 2)] = FMA(KP923879532, T8Y, T8X); + Im[WS(rs, 13)] = FMS(KP923879532, T8Y, T8X); + } + } + } + } + { + E T6s, T9o, T9n, T6v, T6Q, T6N, T6J, T6G, T9k, T9j; + { + E T6c, T4s, T9i, T4X, T9h, T9b, T9c, T6f, T5U, T6k, T64, T5k, T5v; + { + E T6d, T6e, T99, T9a, T5T; + { + E T4c, T4r, T4H, T4W; + T6s = T46 - T4b; + T4c = T46 + T4b; + Rp[WS(rs, 10)] = FMA(KP923879532, T8b, T88); + Rm[WS(rs, 5)] = FNMS(KP923879532, T8b, T88); + Ip[WS(rs, 10)] = FMA(KP923879532, T90, T8Z); + Im[WS(rs, 5)] = FMS(KP923879532, T90, T8Z); + T4r = T4j + T4q; + T9o = T4q - T4j; + T6d = FNMS(KP414213562, T4z, T4G); + T4H = FMA(KP414213562, T4G, T4z); + T4W = FNMS(KP414213562, T4V, T4O); + T6e = FMA(KP414213562, T4O, T4V); + T9n = T98 + T97; + T99 = T97 - T98; + T6c = FNMS(KP707106781, T4r, T4c); + T4s = FMA(KP707106781, T4r, T4c); + T9i = T4W - T4H; + T4X = T4H + T4W; + T9a = T6t + T6u; + T6v = T6t - T6u; + } + T6Q = T5S - T5L; + T5T = T5L + T5S; + T9h = FNMS(KP707106781, T9a, T99); + T9b = FMA(KP707106781, T9a, T99); + T9c = T6d + T6e; + T6f = T6d - T6e; + T5U = FMA(KP707106781, T5T, T5E); + T6k = FNMS(KP707106781, T5T, T5E); + T64 = T62 + T63; + T6N = T63 - T62; + T6J = T5c - T5j; + T5k = T5c + T5j; + T5v = T5t + T5u; + T6G = T5u - T5t; + } + { + E T6m, T6q, T6j, T6p, T9f, T9g; + { + E T68, T4Y, T6a, T66, T69, T5x, T9d, T6l, T65, T9e, T6b, T67; + T68 = FNMS(KP923879532, T4X, T4s); + T4Y = FMA(KP923879532, T4X, T4s); + T6l = FNMS(KP707106781, T64, T61); + T65 = FMA(KP707106781, T64, T61); + { + E T6h, T5l, T6i, T5w; + T6h = FNMS(KP707106781, T5k, T55); + T5l = FMA(KP707106781, T5k, T55); + T6i = FNMS(KP707106781, T5v, T5s); + T5w = FMA(KP707106781, T5v, T5s); + T6m = FMA(KP668178637, T6l, T6k); + T6q = FNMS(KP668178637, T6k, T6l); + T6a = FMA(KP198912367, T5U, T65); + T66 = FNMS(KP198912367, T65, T5U); + T6j = FNMS(KP668178637, T6i, T6h); + T6p = FMA(KP668178637, T6h, T6i); + T69 = FNMS(KP198912367, T5l, T5w); + T5x = FMA(KP198912367, T5w, T5l); + } + T9d = FMA(KP923879532, T9c, T9b); + T9f = FNMS(KP923879532, T9c, T9b); + T9e = T69 + T6a; + T6b = T69 - T6a; + T9g = T66 - T5x; + T67 = T5x + T66; + Ip[WS(rs, 1)] = FMA(KP980785280, T9e, T9d); + Im[WS(rs, 14)] = FMS(KP980785280, T9e, T9d); + Rp[WS(rs, 1)] = FMA(KP980785280, T67, T4Y); + Rm[WS(rs, 14)] = FNMS(KP980785280, T67, T4Y); + Rp[WS(rs, 9)] = FMA(KP980785280, T6b, T68); + Rm[WS(rs, 6)] = FNMS(KP980785280, T6b, T68); + } + { + E T6o, T9l, T9m, T6r, T6g, T6n; + T6o = FMA(KP923879532, T6f, T6c); + T6g = FNMS(KP923879532, T6f, T6c); + T6n = T6j + T6m; + T9k = T6m - T6j; + T9j = FMA(KP923879532, T9i, T9h); + T9l = FNMS(KP923879532, T9i, T9h); + Ip[WS(rs, 9)] = FMA(KP980785280, T9g, T9f); + Im[WS(rs, 6)] = FMS(KP980785280, T9g, T9f); + Rm[WS(rs, 2)] = FMA(KP831469612, T6n, T6g); + Rp[WS(rs, 13)] = FNMS(KP831469612, T6n, T6g); + T9m = T6p + T6q; + T6r = T6p - T6q; + Ip[WS(rs, 13)] = FNMS(KP831469612, T9m, T9l); + Im[WS(rs, 2)] = -(FMA(KP831469612, T9m, T9l)); + Rp[WS(rs, 5)] = FMA(KP831469612, T6r, T6o); + Rm[WS(rs, 10)] = FNMS(KP831469612, T6r, T6o); + } + } + } + { + E T6Y, T6w, T9w, T6D, T9v, T9p, T9q, T71, T6H, T74, T78, T7c, T6W, T6S; + { + E T6Z, T6z, T6C, T70; + T6Z = FNMS(KP414213562, T6x, T6y); + T6z = FMA(KP414213562, T6y, T6x); + Ip[WS(rs, 5)] = FMA(KP831469612, T9k, T9j); + Im[WS(rs, 10)] = FMS(KP831469612, T9k, T9j); + T6Y = FNMS(KP707106781, T6v, T6s); + T6w = FMA(KP707106781, T6v, T6s); + T6C = FNMS(KP414213562, T6B, T6A); + T70 = FMA(KP414213562, T6A, T6B); + T9w = T6z + T6C; + T6D = T6z - T6C; + T9v = FNMS(KP707106781, T9o, T9n); + T9p = FMA(KP707106781, T9o, T9n); + { + E T77, T6O, T76, T6R; + T9q = T70 - T6Z; + T71 = T6Z + T70; + T77 = FMA(KP707106781, T6N, T6M); + T6O = FNMS(KP707106781, T6N, T6M); + T76 = FMA(KP707106781, T6Q, T6P); + T6R = FNMS(KP707106781, T6Q, T6P); + T6H = FNMS(KP707106781, T6G, T6F); + T74 = FMA(KP707106781, T6G, T6F); + T78 = FMA(KP198912367, T77, T76); + T7c = FNMS(KP198912367, T76, T77); + T6W = FNMS(KP668178637, T6O, T6R); + T6S = FMA(KP668178637, T6R, T6O); + } + } + { + E T6U, T6E, T9r, T9t, T73, T6K; + T6U = FNMS(KP923879532, T6D, T6w); + T6E = FMA(KP923879532, T6D, T6w); + T9r = FMA(KP923879532, T9q, T9p); + T9t = FNMS(KP923879532, T9q, T9p); + T73 = FMA(KP707106781, T6J, T6I); + T6K = FNMS(KP707106781, T6J, T6I); + { + E T7a, T9x, T9y, T7d; + { + E T72, T7b, T6V, T6L, T79, T75; + T7a = FMA(KP923879532, T71, T6Y); + T72 = FNMS(KP923879532, T71, T6Y); + T75 = FMA(KP198912367, T74, T73); + T7b = FNMS(KP198912367, T73, T74); + T6V = FNMS(KP668178637, T6H, T6K); + T6L = FMA(KP668178637, T6K, T6H); + T79 = T75 + T78; + T9A = T78 - T75; + T9z = FMA(KP923879532, T9w, T9v); + T9x = FNMS(KP923879532, T9w, T9v); + { + E T6X, T9s, T9u, T6T; + T6X = T6V + T6W; + T9s = T6V - T6W; + T9u = T6S - T6L; + T6T = T6L + T6S; + Rp[WS(rs, 7)] = FMA(KP980785280, T79, T72); + Rm[WS(rs, 8)] = FNMS(KP980785280, T79, T72); + Rp[WS(rs, 11)] = FMA(KP831469612, T6X, T6U); + Rm[WS(rs, 4)] = FNMS(KP831469612, T6X, T6U); + Ip[WS(rs, 3)] = FMA(KP831469612, T9s, T9r); + Im[WS(rs, 12)] = FMS(KP831469612, T9s, T9r); + Ip[WS(rs, 11)] = FMA(KP831469612, T9u, T9t); + Im[WS(rs, 4)] = FMS(KP831469612, T9u, T9t); + Rp[WS(rs, 3)] = FMA(KP831469612, T6T, T6E); + Rm[WS(rs, 12)] = FNMS(KP831469612, T6T, T6E); + T9y = T7c - T7b; + T7d = T7b + T7c; + } + } + Ip[WS(rs, 7)] = FMA(KP980785280, T9y, T9x); + Im[WS(rs, 8)] = FMS(KP980785280, T9y, T9x); + Rm[0] = FMA(KP980785280, T7d, T7a); + Rp[WS(rs, 15)] = FNMS(KP980785280, T7d, T7a); + } + } + } + } + } + } + } + Ip[WS(rs, 15)] = FMA(KP980785280, T9A, T9z); + Im[0] = FMS(KP980785280, T9A, T9z); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 27}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 32, "hc2cf2_32", twinstr, &GENUS, {236, 98, 252, 0} }; + +void X(codelet_hc2cf2_32) (planner *p) { + X(khc2c_register) (p, hc2cf2_32, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hc2cf2_32 -include hc2cf.h */ + +/* + * This function contains 488 FP additions, 280 FP multiplications, + * (or, 376 additions, 168 multiplications, 112 fused multiply/add), + * 158 stack variables, 7 constants, and 128 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(128, rs)) { + E T2, T5, T3, T6, T8, TM, TO, Td, T9, Te, Th, Tl, TD, TH, T1y; + E T1H, T15, T1A, T11, T1F, T1n, T1p, T2q, T2I, T2u, T2K, T2V, T3b, T2Z, T3d; + E Tu, Ty, T3l, T3n, T1t, T1v, T2f, T2h, T1a, T1e, T32, T34, T1W, T1Y, T2C; + E T2E, Tg, TR, Tk, TS, Tm, TV, To, TT, T1M, T21, T1P, T22, T1Q, T25; + E T1S, T23; + { + E Ts, T1d, Tx, T18, Tt, T1c, Tw, T19, TB, T14, TG, TZ, TC, T13, TF; + E T10; + { + E T4, Tc, T7, Tb; + T2 = W[0]; + T5 = W[1]; + T3 = W[2]; + T6 = W[3]; + T4 = T2 * T3; + Tc = T5 * T3; + T7 = T5 * T6; + Tb = T2 * T6; + T8 = T4 + T7; + TM = T4 - T7; + TO = Tb + Tc; + Td = Tb - Tc; + T9 = W[4]; + Ts = T2 * T9; + T1d = T6 * T9; + Tx = T5 * T9; + T18 = T3 * T9; + Te = W[5]; + Tt = T5 * Te; + T1c = T3 * Te; + Tw = T2 * Te; + T19 = T6 * Te; + Th = W[6]; + TB = T3 * Th; + T14 = T5 * Th; + TG = T6 * Th; + TZ = T2 * Th; + Tl = W[7]; + TC = T6 * Tl; + T13 = T2 * Tl; + TF = T3 * Tl; + T10 = T5 * Tl; + } + TD = TB + TC; + TH = TF - TG; + T1y = TZ + T10; + T1H = TF + TG; + T15 = T13 + T14; + T1A = T13 - T14; + T11 = TZ - T10; + T1F = TB - TC; + T1n = FMA(T9, Th, Te * Tl); + T1p = FNMS(Te, Th, T9 * Tl); + { + E T2o, T2p, T2s, T2t; + T2o = T8 * Th; + T2p = Td * Tl; + T2q = T2o + T2p; + T2I = T2o - T2p; + T2s = T8 * Tl; + T2t = Td * Th; + T2u = T2s - T2t; + T2K = T2s + T2t; + } + { + E T2T, T2U, T2X, T2Y; + T2T = TM * Th; + T2U = TO * Tl; + T2V = T2T - T2U; + T3b = T2T + T2U; + T2X = TM * Tl; + T2Y = TO * Th; + T2Z = T2X + T2Y; + T3d = T2X - T2Y; + Tu = Ts + Tt; + Ty = Tw - Tx; + T3l = FMA(Tu, Th, Ty * Tl); + T3n = FNMS(Ty, Th, Tu * Tl); + } + T1t = Ts - Tt; + T1v = Tw + Tx; + T2f = FMA(T1t, Th, T1v * Tl); + T2h = FNMS(T1v, Th, T1t * Tl); + T1a = T18 - T19; + T1e = T1c + T1d; + T32 = FMA(T1a, Th, T1e * Tl); + T34 = FNMS(T1e, Th, T1a * Tl); + T1W = T18 + T19; + T1Y = T1c - T1d; + T2C = FMA(T1W, Th, T1Y * Tl); + T2E = FNMS(T1Y, Th, T1W * Tl); + { + E Ta, Tf, Ti, Tj; + Ta = T8 * T9; + Tf = Td * Te; + Tg = Ta - Tf; + TR = Ta + Tf; + Ti = T8 * Te; + Tj = Td * T9; + Tk = Ti + Tj; + TS = Ti - Tj; + } + Tm = FMA(Tg, Th, Tk * Tl); + TV = FNMS(TS, Th, TR * Tl); + To = FNMS(Tk, Th, Tg * Tl); + TT = FMA(TR, Th, TS * Tl); + { + E T1K, T1L, T1N, T1O; + T1K = TM * T9; + T1L = TO * Te; + T1M = T1K - T1L; + T21 = T1K + T1L; + T1N = TM * Te; + T1O = TO * T9; + T1P = T1N + T1O; + T22 = T1N - T1O; + } + T1Q = FMA(T1M, Th, T1P * Tl); + T25 = FNMS(T22, Th, T21 * Tl); + T1S = FNMS(T1P, Th, T1M * Tl); + T23 = FMA(T21, Th, T22 * Tl); + } + { + E TL, T6f, T8c, T8q, T3F, T5t, T7I, T7W, T2y, T6B, T6y, T7j, T4k, T5J, T4B; + E T5G, T3h, T6H, T6O, T7o, T4L, T5N, T52, T5Q, T1i, T7V, T6i, T7D, T3K, T5u; + E T3P, T5v, T1E, T6n, T6m, T7e, T3W, T5y, T41, T5z, T29, T6p, T6s, T7f, T47; + E T5B, T4c, T5C, T2R, T6z, T6E, T7k, T4v, T5H, T4E, T5K, T3y, T6P, T6K, T7p; + E T4W, T5R, T55, T5O; + { + E T1, T7G, Tq, T7F, TA, T3C, TJ, T3D, Tn, Tp; + T1 = Rp[0]; + T7G = Rm[0]; + Tn = Rp[WS(rs, 8)]; + Tp = Rm[WS(rs, 8)]; + Tq = FMA(Tm, Tn, To * Tp); + T7F = FNMS(To, Tn, Tm * Tp); + { + E Tv, Tz, TE, TI; + Tv = Rp[WS(rs, 4)]; + Tz = Rm[WS(rs, 4)]; + TA = FMA(Tu, Tv, Ty * Tz); + T3C = FNMS(Ty, Tv, Tu * Tz); + TE = Rp[WS(rs, 12)]; + TI = Rm[WS(rs, 12)]; + TJ = FMA(TD, TE, TH * TI); + T3D = FNMS(TH, TE, TD * TI); + } + { + E Tr, TK, T8a, T8b; + Tr = T1 + Tq; + TK = TA + TJ; + TL = Tr + TK; + T6f = Tr - TK; + T8a = T7G - T7F; + T8b = TA - TJ; + T8c = T8a - T8b; + T8q = T8b + T8a; + } + { + E T3B, T3E, T7E, T7H; + T3B = T1 - Tq; + T3E = T3C - T3D; + T3F = T3B - T3E; + T5t = T3B + T3E; + T7E = T3C + T3D; + T7H = T7F + T7G; + T7I = T7E + T7H; + T7W = T7H - T7E; + } + } + { + E T2e, T4g, T2w, T4z, T2j, T4h, T2n, T4y; + { + E T2c, T2d, T2r, T2v; + T2c = Ip[0]; + T2d = Im[0]; + T2e = FMA(T2, T2c, T5 * T2d); + T4g = FNMS(T5, T2c, T2 * T2d); + T2r = Ip[WS(rs, 12)]; + T2v = Im[WS(rs, 12)]; + T2w = FMA(T2q, T2r, T2u * T2v); + T4z = FNMS(T2u, T2r, T2q * T2v); + } + { + E T2g, T2i, T2l, T2m; + T2g = Ip[WS(rs, 8)]; + T2i = Im[WS(rs, 8)]; + T2j = FMA(T2f, T2g, T2h * T2i); + T4h = FNMS(T2h, T2g, T2f * T2i); + T2l = Ip[WS(rs, 4)]; + T2m = Im[WS(rs, 4)]; + T2n = FMA(T9, T2l, Te * T2m); + T4y = FNMS(Te, T2l, T9 * T2m); + } + { + E T2k, T2x, T6w, T6x; + T2k = T2e + T2j; + T2x = T2n + T2w; + T2y = T2k + T2x; + T6B = T2k - T2x; + T6w = T4g + T4h; + T6x = T4y + T4z; + T6y = T6w - T6x; + T7j = T6w + T6x; + } + { + E T4i, T4j, T4x, T4A; + T4i = T4g - T4h; + T4j = T2n - T2w; + T4k = T4i + T4j; + T5J = T4i - T4j; + T4x = T2e - T2j; + T4A = T4y - T4z; + T4B = T4x - T4A; + T5G = T4x + T4A; + } + } + { + E T31, T4Y, T3f, T4J, T36, T4Z, T3a, T4I; + { + E T2W, T30, T3c, T3e; + T2W = Ip[WS(rs, 15)]; + T30 = Im[WS(rs, 15)]; + T31 = FMA(T2V, T2W, T2Z * T30); + T4Y = FNMS(T2Z, T2W, T2V * T30); + T3c = Ip[WS(rs, 11)]; + T3e = Im[WS(rs, 11)]; + T3f = FMA(T3b, T3c, T3d * T3e); + T4J = FNMS(T3d, T3c, T3b * T3e); + } + { + E T33, T35, T38, T39; + T33 = Ip[WS(rs, 7)]; + T35 = Im[WS(rs, 7)]; + T36 = FMA(T32, T33, T34 * T35); + T4Z = FNMS(T34, T33, T32 * T35); + T38 = Ip[WS(rs, 3)]; + T39 = Im[WS(rs, 3)]; + T3a = FMA(TR, T38, TS * T39); + T4I = FNMS(TS, T38, TR * T39); + } + { + E T37, T3g, T6M, T6N; + T37 = T31 + T36; + T3g = T3a + T3f; + T3h = T37 + T3g; + T6H = T37 - T3g; + T6M = T4Y + T4Z; + T6N = T4I + T4J; + T6O = T6M - T6N; + T7o = T6M + T6N; + } + { + E T4H, T4K, T50, T51; + T4H = T31 - T36; + T4K = T4I - T4J; + T4L = T4H - T4K; + T5N = T4H + T4K; + T50 = T4Y - T4Z; + T51 = T3a - T3f; + T52 = T50 + T51; + T5Q = T50 - T51; + } + } + { + E TQ, T3G, T1g, T3N, TX, T3H, T17, T3M; + { + E TN, TP, T1b, T1f; + TN = Rp[WS(rs, 2)]; + TP = Rm[WS(rs, 2)]; + TQ = FMA(TM, TN, TO * TP); + T3G = FNMS(TO, TN, TM * TP); + T1b = Rp[WS(rs, 6)]; + T1f = Rm[WS(rs, 6)]; + T1g = FMA(T1a, T1b, T1e * T1f); + T3N = FNMS(T1e, T1b, T1a * T1f); + } + { + E TU, TW, T12, T16; + TU = Rp[WS(rs, 10)]; + TW = Rm[WS(rs, 10)]; + TX = FMA(TT, TU, TV * TW); + T3H = FNMS(TV, TU, TT * TW); + T12 = Rp[WS(rs, 14)]; + T16 = Rm[WS(rs, 14)]; + T17 = FMA(T11, T12, T15 * T16); + T3M = FNMS(T15, T12, T11 * T16); + } + { + E TY, T1h, T6g, T6h; + TY = TQ + TX; + T1h = T17 + T1g; + T1i = TY + T1h; + T7V = T1h - TY; + T6g = T3G + T3H; + T6h = T3M + T3N; + T6i = T6g - T6h; + T7D = T6g + T6h; + } + { + E T3I, T3J, T3L, T3O; + T3I = T3G - T3H; + T3J = TQ - TX; + T3K = T3I - T3J; + T5u = T3J + T3I; + T3L = T17 - T1g; + T3O = T3M - T3N; + T3P = T3L + T3O; + T5v = T3L - T3O; + } + } + { + E T1m, T3S, T1C, T3Z, T1r, T3T, T1x, T3Y; + { + E T1k, T1l, T1z, T1B; + T1k = Rp[WS(rs, 1)]; + T1l = Rm[WS(rs, 1)]; + T1m = FMA(T8, T1k, Td * T1l); + T3S = FNMS(Td, T1k, T8 * T1l); + T1z = Rp[WS(rs, 13)]; + T1B = Rm[WS(rs, 13)]; + T1C = FMA(T1y, T1z, T1A * T1B); + T3Z = FNMS(T1A, T1z, T1y * T1B); + } + { + E T1o, T1q, T1u, T1w; + T1o = Rp[WS(rs, 9)]; + T1q = Rm[WS(rs, 9)]; + T1r = FMA(T1n, T1o, T1p * T1q); + T3T = FNMS(T1p, T1o, T1n * T1q); + T1u = Rp[WS(rs, 5)]; + T1w = Rm[WS(rs, 5)]; + T1x = FMA(T1t, T1u, T1v * T1w); + T3Y = FNMS(T1v, T1u, T1t * T1w); + } + { + E T1s, T1D, T6k, T6l; + T1s = T1m + T1r; + T1D = T1x + T1C; + T1E = T1s + T1D; + T6n = T1s - T1D; + T6k = T3S + T3T; + T6l = T3Y + T3Z; + T6m = T6k - T6l; + T7e = T6k + T6l; + } + { + E T3U, T3V, T3X, T40; + T3U = T3S - T3T; + T3V = T1x - T1C; + T3W = T3U + T3V; + T5y = T3U - T3V; + T3X = T1m - T1r; + T40 = T3Y - T3Z; + T41 = T3X - T40; + T5z = T3X + T40; + } + } + { + E T1J, T43, T27, T4a, T1U, T44, T20, T49; + { + E T1G, T1I, T24, T26; + T1G = Rp[WS(rs, 15)]; + T1I = Rm[WS(rs, 15)]; + T1J = FMA(T1F, T1G, T1H * T1I); + T43 = FNMS(T1H, T1G, T1F * T1I); + T24 = Rp[WS(rs, 11)]; + T26 = Rm[WS(rs, 11)]; + T27 = FMA(T23, T24, T25 * T26); + T4a = FNMS(T25, T24, T23 * T26); + } + { + E T1R, T1T, T1X, T1Z; + T1R = Rp[WS(rs, 7)]; + T1T = Rm[WS(rs, 7)]; + T1U = FMA(T1Q, T1R, T1S * T1T); + T44 = FNMS(T1S, T1R, T1Q * T1T); + T1X = Rp[WS(rs, 3)]; + T1Z = Rm[WS(rs, 3)]; + T20 = FMA(T1W, T1X, T1Y * T1Z); + T49 = FNMS(T1Y, T1X, T1W * T1Z); + } + { + E T1V, T28, T6q, T6r; + T1V = T1J + T1U; + T28 = T20 + T27; + T29 = T1V + T28; + T6p = T1V - T28; + T6q = T43 + T44; + T6r = T49 + T4a; + T6s = T6q - T6r; + T7f = T6q + T6r; + } + { + E T45, T46, T48, T4b; + T45 = T43 - T44; + T46 = T20 - T27; + T47 = T45 + T46; + T5B = T45 - T46; + T48 = T1J - T1U; + T4b = T49 - T4a; + T4c = T48 - T4b; + T5C = T48 + T4b; + } + } + { + E T2B, T4r, T2G, T4s, T4q, T4t, T2M, T4m, T2P, T4n, T4l, T4o; + { + E T2z, T2A, T2D, T2F; + T2z = Ip[WS(rs, 2)]; + T2A = Im[WS(rs, 2)]; + T2B = FMA(T21, T2z, T22 * T2A); + T4r = FNMS(T22, T2z, T21 * T2A); + T2D = Ip[WS(rs, 10)]; + T2F = Im[WS(rs, 10)]; + T2G = FMA(T2C, T2D, T2E * T2F); + T4s = FNMS(T2E, T2D, T2C * T2F); + } + T4q = T2B - T2G; + T4t = T4r - T4s; + { + E T2J, T2L, T2N, T2O; + T2J = Ip[WS(rs, 14)]; + T2L = Im[WS(rs, 14)]; + T2M = FMA(T2I, T2J, T2K * T2L); + T4m = FNMS(T2K, T2J, T2I * T2L); + T2N = Ip[WS(rs, 6)]; + T2O = Im[WS(rs, 6)]; + T2P = FMA(T1M, T2N, T1P * T2O); + T4n = FNMS(T1P, T2N, T1M * T2O); + } + T4l = T2M - T2P; + T4o = T4m - T4n; + { + E T2H, T2Q, T6C, T6D; + T2H = T2B + T2G; + T2Q = T2M + T2P; + T2R = T2H + T2Q; + T6z = T2Q - T2H; + T6C = T4r + T4s; + T6D = T4m + T4n; + T6E = T6C - T6D; + T7k = T6C + T6D; + } + { + E T4p, T4u, T4C, T4D; + T4p = T4l - T4o; + T4u = T4q + T4t; + T4v = KP707106781 * (T4p - T4u); + T5H = KP707106781 * (T4u + T4p); + T4C = T4t - T4q; + T4D = T4l + T4o; + T4E = KP707106781 * (T4C - T4D); + T5K = KP707106781 * (T4C + T4D); + } + } + { + E T3k, T4M, T3p, T4N, T4O, T4P, T3t, T4S, T3w, T4T, T4R, T4U; + { + E T3i, T3j, T3m, T3o; + T3i = Ip[WS(rs, 1)]; + T3j = Im[WS(rs, 1)]; + T3k = FMA(T3, T3i, T6 * T3j); + T4M = FNMS(T6, T3i, T3 * T3j); + T3m = Ip[WS(rs, 9)]; + T3o = Im[WS(rs, 9)]; + T3p = FMA(T3l, T3m, T3n * T3o); + T4N = FNMS(T3n, T3m, T3l * T3o); + } + T4O = T4M - T4N; + T4P = T3k - T3p; + { + E T3r, T3s, T3u, T3v; + T3r = Ip[WS(rs, 13)]; + T3s = Im[WS(rs, 13)]; + T3t = FMA(Th, T3r, Tl * T3s); + T4S = FNMS(Tl, T3r, Th * T3s); + T3u = Ip[WS(rs, 5)]; + T3v = Im[WS(rs, 5)]; + T3w = FMA(Tg, T3u, Tk * T3v); + T4T = FNMS(Tk, T3u, Tg * T3v); + } + T4R = T3t - T3w; + T4U = T4S - T4T; + { + E T3q, T3x, T6I, T6J; + T3q = T3k + T3p; + T3x = T3t + T3w; + T3y = T3q + T3x; + T6P = T3x - T3q; + T6I = T4M + T4N; + T6J = T4S + T4T; + T6K = T6I - T6J; + T7p = T6I + T6J; + } + { + E T4Q, T4V, T53, T54; + T4Q = T4O - T4P; + T4V = T4R + T4U; + T4W = KP707106781 * (T4Q - T4V); + T5R = KP707106781 * (T4Q + T4V); + T53 = T4R - T4U; + T54 = T4P + T4O; + T55 = KP707106781 * (T53 - T54); + T5O = KP707106781 * (T54 + T53); + } + } + { + E T2b, T7x, T7K, T7M, T3A, T7L, T7A, T7B; + { + E T1j, T2a, T7C, T7J; + T1j = TL + T1i; + T2a = T1E + T29; + T2b = T1j + T2a; + T7x = T1j - T2a; + T7C = T7e + T7f; + T7J = T7D + T7I; + T7K = T7C + T7J; + T7M = T7J - T7C; + } + { + E T2S, T3z, T7y, T7z; + T2S = T2y + T2R; + T3z = T3h + T3y; + T3A = T2S + T3z; + T7L = T3z - T2S; + T7y = T7j + T7k; + T7z = T7o + T7p; + T7A = T7y - T7z; + T7B = T7y + T7z; + } + Rm[WS(rs, 15)] = T2b - T3A; + Im[WS(rs, 15)] = T7B - T7K; + Rp[0] = T2b + T3A; + Ip[0] = T7B + T7K; + Rm[WS(rs, 7)] = T7x - T7A; + Im[WS(rs, 7)] = T7L - T7M; + Rp[WS(rs, 8)] = T7x + T7A; + Ip[WS(rs, 8)] = T7L + T7M; + } + { + E T7h, T7t, T7Q, T7S, T7m, T7u, T7r, T7v; + { + E T7d, T7g, T7O, T7P; + T7d = TL - T1i; + T7g = T7e - T7f; + T7h = T7d + T7g; + T7t = T7d - T7g; + T7O = T29 - T1E; + T7P = T7I - T7D; + T7Q = T7O + T7P; + T7S = T7P - T7O; + } + { + E T7i, T7l, T7n, T7q; + T7i = T2y - T2R; + T7l = T7j - T7k; + T7m = T7i + T7l; + T7u = T7l - T7i; + T7n = T3h - T3y; + T7q = T7o - T7p; + T7r = T7n - T7q; + T7v = T7n + T7q; + } + { + E T7s, T7N, T7w, T7R; + T7s = KP707106781 * (T7m + T7r); + Rm[WS(rs, 11)] = T7h - T7s; + Rp[WS(rs, 4)] = T7h + T7s; + T7N = KP707106781 * (T7u + T7v); + Im[WS(rs, 11)] = T7N - T7Q; + Ip[WS(rs, 4)] = T7N + T7Q; + T7w = KP707106781 * (T7u - T7v); + Rm[WS(rs, 3)] = T7t - T7w; + Rp[WS(rs, 12)] = T7t + T7w; + T7R = KP707106781 * (T7r - T7m); + Im[WS(rs, 3)] = T7R - T7S; + Ip[WS(rs, 12)] = T7R + T7S; + } + } + { + E T6j, T7X, T83, T6X, T6u, T7U, T77, T7b, T70, T82, T6G, T6U, T74, T7a, T6R; + E T6V; + { + E T6o, T6t, T6A, T6F; + T6j = T6f - T6i; + T7X = T7V + T7W; + T83 = T7W - T7V; + T6X = T6f + T6i; + T6o = T6m - T6n; + T6t = T6p + T6s; + T6u = KP707106781 * (T6o - T6t); + T7U = KP707106781 * (T6o + T6t); + { + E T75, T76, T6Y, T6Z; + T75 = T6H + T6K; + T76 = T6O + T6P; + T77 = FNMS(KP382683432, T76, KP923879532 * T75); + T7b = FMA(KP923879532, T76, KP382683432 * T75); + T6Y = T6n + T6m; + T6Z = T6p - T6s; + T70 = KP707106781 * (T6Y + T6Z); + T82 = KP707106781 * (T6Z - T6Y); + } + T6A = T6y - T6z; + T6F = T6B - T6E; + T6G = FMA(KP923879532, T6A, KP382683432 * T6F); + T6U = FNMS(KP923879532, T6F, KP382683432 * T6A); + { + E T72, T73, T6L, T6Q; + T72 = T6y + T6z; + T73 = T6B + T6E; + T74 = FMA(KP382683432, T72, KP923879532 * T73); + T7a = FNMS(KP382683432, T73, KP923879532 * T72); + T6L = T6H - T6K; + T6Q = T6O - T6P; + T6R = FNMS(KP923879532, T6Q, KP382683432 * T6L); + T6V = FMA(KP382683432, T6Q, KP923879532 * T6L); + } + } + { + E T6v, T6S, T81, T84; + T6v = T6j + T6u; + T6S = T6G + T6R; + Rm[WS(rs, 9)] = T6v - T6S; + Rp[WS(rs, 6)] = T6v + T6S; + T81 = T6U + T6V; + T84 = T82 + T83; + Im[WS(rs, 9)] = T81 - T84; + Ip[WS(rs, 6)] = T81 + T84; + } + { + E T6T, T6W, T85, T86; + T6T = T6j - T6u; + T6W = T6U - T6V; + Rm[WS(rs, 1)] = T6T - T6W; + Rp[WS(rs, 14)] = T6T + T6W; + T85 = T6R - T6G; + T86 = T83 - T82; + Im[WS(rs, 1)] = T85 - T86; + Ip[WS(rs, 14)] = T85 + T86; + } + { + E T71, T78, T7T, T7Y; + T71 = T6X + T70; + T78 = T74 + T77; + Rm[WS(rs, 13)] = T71 - T78; + Rp[WS(rs, 2)] = T71 + T78; + T7T = T7a + T7b; + T7Y = T7U + T7X; + Im[WS(rs, 13)] = T7T - T7Y; + Ip[WS(rs, 2)] = T7T + T7Y; + } + { + E T79, T7c, T7Z, T80; + T79 = T6X - T70; + T7c = T7a - T7b; + Rm[WS(rs, 5)] = T79 - T7c; + Rp[WS(rs, 10)] = T79 + T7c; + T7Z = T77 - T74; + T80 = T7X - T7U; + Im[WS(rs, 5)] = T7Z - T80; + Ip[WS(rs, 10)] = T7Z + T80; + } + } + { + E T3R, T5d, T8r, T8x, T4e, T8o, T5n, T5r, T4G, T5a, T5g, T8w, T5k, T5q, T57; + E T5b, T3Q, T8p; + T3Q = KP707106781 * (T3K - T3P); + T3R = T3F - T3Q; + T5d = T3F + T3Q; + T8p = KP707106781 * (T5v - T5u); + T8r = T8p + T8q; + T8x = T8q - T8p; + { + E T42, T4d, T5l, T5m; + T42 = FNMS(KP923879532, T41, KP382683432 * T3W); + T4d = FMA(KP382683432, T47, KP923879532 * T4c); + T4e = T42 - T4d; + T8o = T42 + T4d; + T5l = T4L + T4W; + T5m = T52 + T55; + T5n = FNMS(KP555570233, T5m, KP831469612 * T5l); + T5r = FMA(KP831469612, T5m, KP555570233 * T5l); + } + { + E T4w, T4F, T5e, T5f; + T4w = T4k - T4v; + T4F = T4B - T4E; + T4G = FMA(KP980785280, T4w, KP195090322 * T4F); + T5a = FNMS(KP980785280, T4F, KP195090322 * T4w); + T5e = FMA(KP923879532, T3W, KP382683432 * T41); + T5f = FNMS(KP923879532, T47, KP382683432 * T4c); + T5g = T5e + T5f; + T8w = T5f - T5e; + } + { + E T5i, T5j, T4X, T56; + T5i = T4k + T4v; + T5j = T4B + T4E; + T5k = FMA(KP555570233, T5i, KP831469612 * T5j); + T5q = FNMS(KP555570233, T5j, KP831469612 * T5i); + T4X = T4L - T4W; + T56 = T52 - T55; + T57 = FNMS(KP980785280, T56, KP195090322 * T4X); + T5b = FMA(KP195090322, T56, KP980785280 * T4X); + } + { + E T4f, T58, T8v, T8y; + T4f = T3R + T4e; + T58 = T4G + T57; + Rm[WS(rs, 8)] = T4f - T58; + Rp[WS(rs, 7)] = T4f + T58; + T8v = T5a + T5b; + T8y = T8w + T8x; + Im[WS(rs, 8)] = T8v - T8y; + Ip[WS(rs, 7)] = T8v + T8y; + } + { + E T59, T5c, T8z, T8A; + T59 = T3R - T4e; + T5c = T5a - T5b; + Rm[0] = T59 - T5c; + Rp[WS(rs, 15)] = T59 + T5c; + T8z = T57 - T4G; + T8A = T8x - T8w; + Im[0] = T8z - T8A; + Ip[WS(rs, 15)] = T8z + T8A; + } + { + E T5h, T5o, T8n, T8s; + T5h = T5d + T5g; + T5o = T5k + T5n; + Rm[WS(rs, 12)] = T5h - T5o; + Rp[WS(rs, 3)] = T5h + T5o; + T8n = T5q + T5r; + T8s = T8o + T8r; + Im[WS(rs, 12)] = T8n - T8s; + Ip[WS(rs, 3)] = T8n + T8s; + } + { + E T5p, T5s, T8t, T8u; + T5p = T5d - T5g; + T5s = T5q - T5r; + Rm[WS(rs, 4)] = T5p - T5s; + Rp[WS(rs, 11)] = T5p + T5s; + T8t = T5n - T5k; + T8u = T8r - T8o; + Im[WS(rs, 4)] = T8t - T8u; + Ip[WS(rs, 11)] = T8t + T8u; + } + } + { + E T5x, T5Z, T8d, T8j, T5E, T88, T69, T6d, T5M, T5W, T62, T8i, T66, T6c, T5T; + E T5X, T5w, T89; + T5w = KP707106781 * (T5u + T5v); + T5x = T5t - T5w; + T5Z = T5t + T5w; + T89 = KP707106781 * (T3K + T3P); + T8d = T89 + T8c; + T8j = T8c - T89; + { + E T5A, T5D, T67, T68; + T5A = FNMS(KP382683432, T5z, KP923879532 * T5y); + T5D = FMA(KP923879532, T5B, KP382683432 * T5C); + T5E = T5A - T5D; + T88 = T5A + T5D; + T67 = T5N + T5O; + T68 = T5Q + T5R; + T69 = FNMS(KP195090322, T68, KP980785280 * T67); + T6d = FMA(KP195090322, T67, KP980785280 * T68); + } + { + E T5I, T5L, T60, T61; + T5I = T5G - T5H; + T5L = T5J - T5K; + T5M = FMA(KP555570233, T5I, KP831469612 * T5L); + T5W = FNMS(KP831469612, T5I, KP555570233 * T5L); + T60 = FMA(KP382683432, T5y, KP923879532 * T5z); + T61 = FNMS(KP382683432, T5B, KP923879532 * T5C); + T62 = T60 + T61; + T8i = T61 - T60; + } + { + E T64, T65, T5P, T5S; + T64 = T5G + T5H; + T65 = T5J + T5K; + T66 = FMA(KP980785280, T64, KP195090322 * T65); + T6c = FNMS(KP195090322, T64, KP980785280 * T65); + T5P = T5N - T5O; + T5S = T5Q - T5R; + T5T = FNMS(KP831469612, T5S, KP555570233 * T5P); + T5X = FMA(KP831469612, T5P, KP555570233 * T5S); + } + { + E T5F, T5U, T8h, T8k; + T5F = T5x + T5E; + T5U = T5M + T5T; + Rm[WS(rs, 10)] = T5F - T5U; + Rp[WS(rs, 5)] = T5F + T5U; + T8h = T5W + T5X; + T8k = T8i + T8j; + Im[WS(rs, 10)] = T8h - T8k; + Ip[WS(rs, 5)] = T8h + T8k; + } + { + E T5V, T5Y, T8l, T8m; + T5V = T5x - T5E; + T5Y = T5W - T5X; + Rm[WS(rs, 2)] = T5V - T5Y; + Rp[WS(rs, 13)] = T5V + T5Y; + T8l = T5T - T5M; + T8m = T8j - T8i; + Im[WS(rs, 2)] = T8l - T8m; + Ip[WS(rs, 13)] = T8l + T8m; + } + { + E T63, T6a, T87, T8e; + T63 = T5Z + T62; + T6a = T66 + T69; + Rm[WS(rs, 14)] = T63 - T6a; + Rp[WS(rs, 1)] = T63 + T6a; + T87 = T6c + T6d; + T8e = T88 + T8d; + Im[WS(rs, 14)] = T87 - T8e; + Ip[WS(rs, 1)] = T87 + T8e; + } + { + E T6b, T6e, T8f, T8g; + T6b = T5Z - T62; + T6e = T6c - T6d; + Rm[WS(rs, 6)] = T6b - T6e; + Rp[WS(rs, 9)] = T6b + T6e; + T8f = T69 - T66; + T8g = T8d - T88; + Im[WS(rs, 6)] = T8f - T8g; + Ip[WS(rs, 9)] = T8f + T8g; + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 27}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 32, "hc2cf2_32", twinstr, &GENUS, {376, 168, 112, 0} }; + +void X(codelet_hc2cf2_32) (planner *p) { + X(khc2c_register) (p, hc2cf2_32, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf2_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf2_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -dit -name hc2cf2_4 -include hc2cf.h */ + +/* + * This function contains 24 FP additions, 16 FP multiplications, + * (or, 16 additions, 8 multiplications, 8 fused multiply/add), + * 33 stack variables, 0 constants, and 16 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf2_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 4, MAKE_VOLATILE_STRIDE(16, rs)) { + E Ti, Tq, To, Te, Ty, TA, Tm, Ts; + { + E T2, T6, T3, T5; + T2 = W[0]; + T6 = W[3]; + T3 = W[2]; + T5 = W[1]; + { + E T1, Tx, Td, Tw, Tj, Tl, Ta, T4, Tk, Tr; + T1 = Rp[0]; + Ta = T2 * T6; + T4 = T2 * T3; + Tx = Rm[0]; + { + E T8, Tb, T7, Tc; + T8 = Rp[WS(rs, 1)]; + Tb = FNMS(T5, T3, Ta); + T7 = FMA(T5, T6, T4); + Tc = Rm[WS(rs, 1)]; + { + E Tf, Th, T9, Tv, Tg, Tp; + Tf = Ip[0]; + Th = Im[0]; + T9 = T7 * T8; + Tv = T7 * Tc; + Tg = T2 * Tf; + Tp = T2 * Th; + Td = FMA(Tb, Tc, T9); + Tw = FNMS(Tb, T8, Tv); + Ti = FMA(T5, Th, Tg); + Tq = FNMS(T5, Tf, Tp); + } + Tj = Ip[WS(rs, 1)]; + Tl = Im[WS(rs, 1)]; + } + To = T1 - Td; + Te = T1 + Td; + Ty = Tw + Tx; + TA = Tx - Tw; + Tk = T3 * Tj; + Tr = T3 * Tl; + Tm = FMA(T6, Tl, Tk); + Ts = FNMS(T6, Tj, Tr); + } + } + { + E Tn, Tz, Tu, Tt; + Tn = Ti + Tm; + Tz = Tm - Ti; + Tu = Tq + Ts; + Tt = Tq - Ts; + Ip[WS(rs, 1)] = Tz + TA; + Im[0] = Tz - TA; + Rp[0] = Te + Tn; + Rm[WS(rs, 1)] = Te - Tn; + Rp[WS(rs, 1)] = To + Tt; + Rm[0] = To - Tt; + Ip[0] = Tu + Ty; + Im[WS(rs, 1)] = Tu - Ty; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 4, "hc2cf2_4", twinstr, &GENUS, {16, 8, 8, 0} }; + +void X(codelet_hc2cf2_4) (planner *p) { + X(khc2c_register) (p, hc2cf2_4, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -dit -name hc2cf2_4 -include hc2cf.h */ + +/* + * This function contains 24 FP additions, 16 FP multiplications, + * (or, 16 additions, 8 multiplications, 8 fused multiply/add), + * 21 stack variables, 0 constants, and 16 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf2_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 4, MAKE_VOLATILE_STRIDE(16, rs)) { + E T2, T4, T3, T5, T6, T8; + T2 = W[0]; + T4 = W[1]; + T3 = W[2]; + T5 = W[3]; + T6 = FMA(T2, T3, T4 * T5); + T8 = FNMS(T4, T3, T2 * T5); + { + E T1, Tp, Ta, To, Te, Tk, Th, Tl, T7, T9; + T1 = Rp[0]; + Tp = Rm[0]; + T7 = Rp[WS(rs, 1)]; + T9 = Rm[WS(rs, 1)]; + Ta = FMA(T6, T7, T8 * T9); + To = FNMS(T8, T7, T6 * T9); + { + E Tc, Td, Tf, Tg; + Tc = Ip[0]; + Td = Im[0]; + Te = FMA(T2, Tc, T4 * Td); + Tk = FNMS(T4, Tc, T2 * Td); + Tf = Ip[WS(rs, 1)]; + Tg = Im[WS(rs, 1)]; + Th = FMA(T3, Tf, T5 * Tg); + Tl = FNMS(T5, Tf, T3 * Tg); + } + { + E Tb, Ti, Tn, Tq; + Tb = T1 + Ta; + Ti = Te + Th; + Rm[WS(rs, 1)] = Tb - Ti; + Rp[0] = Tb + Ti; + Tn = Tk + Tl; + Tq = To + Tp; + Im[WS(rs, 1)] = Tn - Tq; + Ip[0] = Tn + Tq; + } + { + E Tj, Tm, Tr, Ts; + Tj = T1 - Ta; + Tm = Tk - Tl; + Rm[0] = Tj - Tm; + Rp[WS(rs, 1)] = Tj + Tm; + Tr = Th - Te; + Ts = Tp - To; + Im[0] = Tr - Ts; + Ip[WS(rs, 1)] = Tr + Ts; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 4, "hc2cf2_4", twinstr, &GENUS, {16, 8, 8, 0} }; + +void X(codelet_hc2cf2_4) (planner *p) { + X(khc2c_register) (p, hc2cf2_4, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf2_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf2_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:24 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 8 -dit -name hc2cf2_8 -include hc2cf.h */ + +/* + * This function contains 74 FP additions, 50 FP multiplications, + * (or, 44 additions, 20 multiplications, 30 fused multiply/add), + * 64 stack variables, 1 constants, and 32 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf2_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(32, rs)) { + E TS, T1m, TJ, T1l, T1k, Tw, T1w, T1u; + { + E T2, T3, Tl, Tn, T5, T4, Tm, Tr, T6; + T2 = W[0]; + T3 = W[2]; + Tl = W[4]; + Tn = W[5]; + T5 = W[1]; + T4 = T2 * T3; + Tm = T2 * Tl; + Tr = T2 * Tn; + T6 = W[3]; + { + E T1, T1s, TG, Td, T1r, Tu, TY, Tk, TW, T18, T1d, TD, TH, TA, T13; + E TE, T14; + { + E To, Ts, Tf, T7, T8, Ti, Tb, T9, Tc, TC, Ta, TF, TB, Tg, Th; + E Tj; + T1 = Rp[0]; + To = FMA(T5, Tn, Tm); + Ts = FNMS(T5, Tl, Tr); + Tf = FMA(T5, T6, T4); + T7 = FNMS(T5, T6, T4); + Ta = T2 * T6; + T1s = Rm[0]; + T8 = Rp[WS(rs, 2)]; + TF = Tf * Tn; + TB = Tf * Tl; + Ti = FNMS(T5, T3, Ta); + Tb = FMA(T5, T3, Ta); + T9 = T7 * T8; + Tc = Rm[WS(rs, 2)]; + TG = FNMS(Ti, Tl, TF); + TC = FMA(Ti, Tn, TB); + { + E Tp, T1q, Tt, Tq, TX; + Tp = Rp[WS(rs, 3)]; + Td = FMA(Tb, Tc, T9); + T1q = T7 * Tc; + Tt = Rm[WS(rs, 3)]; + Tq = To * Tp; + Tg = Rp[WS(rs, 1)]; + T1r = FNMS(Tb, T8, T1q); + TX = To * Tt; + Tu = FMA(Ts, Tt, Tq); + Th = Tf * Tg; + Tj = Rm[WS(rs, 1)]; + TY = FNMS(Ts, Tp, TX); + } + { + E TO, TQ, TN, TP, T1a, T1b; + { + E TK, TM, TL, T19, TV; + TK = Ip[WS(rs, 3)]; + TM = Im[WS(rs, 3)]; + Tk = FMA(Ti, Tj, Th); + TV = Tf * Tj; + TL = Tl * TK; + T19 = Tl * TM; + TO = Ip[WS(rs, 1)]; + TW = FNMS(Ti, Tg, TV); + TQ = Im[WS(rs, 1)]; + TN = FMA(Tn, TM, TL); + TP = T3 * TO; + T1a = FNMS(Tn, TK, T19); + T1b = T3 * TQ; + } + { + E Tx, Tz, Ty, T12, T1c, TR; + Tx = Ip[0]; + TR = FMA(T6, TQ, TP); + Tz = Im[0]; + T1c = FNMS(T6, TO, T1b); + Ty = T2 * Tx; + T18 = TN - TR; + TS = TN + TR; + T12 = T2 * Tz; + T1d = T1a - T1c; + T1m = T1a + T1c; + TD = Ip[WS(rs, 2)]; + TH = Im[WS(rs, 2)]; + TA = FMA(T5, Tz, Ty); + T13 = FNMS(T5, Tx, T12); + TE = TC * TD; + T14 = TC * TH; + } + } + } + { + E Te, T1p, T1t, Tv; + { + E T1g, T10, T1z, T1B, T1A, T1j, T1C, T1f; + { + E T1x, T11, T16, T1y; + { + E TU, TZ, TI, T15; + Te = T1 + Td; + TU = T1 - Td; + TZ = TW - TY; + T1p = TW + TY; + TI = FMA(TG, TH, TE); + T15 = FNMS(TG, TD, T14); + T1t = T1r + T1s; + T1x = T1s - T1r; + T1g = TU - TZ; + T10 = TU + TZ; + T11 = TA - TI; + TJ = TA + TI; + T1l = T13 + T15; + T16 = T13 - T15; + T1y = Tk - Tu; + Tv = Tk + Tu; + } + { + E T1i, T1e, T17, T1h; + T1i = T18 + T1d; + T1e = T18 - T1d; + T17 = T11 + T16; + T1h = T16 - T11; + T1z = T1x - T1y; + T1B = T1y + T1x; + T1A = T1h + T1i; + T1j = T1h - T1i; + T1C = T1e - T17; + T1f = T17 + T1e; + } + } + Rm[0] = FNMS(KP707106781, T1j, T1g); + Im[0] = FMS(KP707106781, T1C, T1B); + Rp[WS(rs, 1)] = FMA(KP707106781, T1f, T10); + Rm[WS(rs, 2)] = FNMS(KP707106781, T1f, T10); + Ip[WS(rs, 1)] = FMA(KP707106781, T1A, T1z); + Im[WS(rs, 2)] = FMS(KP707106781, T1A, T1z); + Rp[WS(rs, 3)] = FMA(KP707106781, T1j, T1g); + Ip[WS(rs, 3)] = FMA(KP707106781, T1C, T1B); + } + T1k = Te - Tv; + Tw = Te + Tv; + T1w = T1t - T1p; + T1u = T1p + T1t; + } + } + } + { + E TT, T1v, T1n, T1o; + TT = TJ + TS; + T1v = TS - TJ; + T1n = T1l - T1m; + T1o = T1l + T1m; + Ip[WS(rs, 2)] = T1v + T1w; + Im[WS(rs, 1)] = T1v - T1w; + Rp[0] = Tw + TT; + Rm[WS(rs, 3)] = Tw - TT; + Ip[0] = T1o + T1u; + Im[WS(rs, 3)] = T1o - T1u; + Rp[WS(rs, 2)] = T1k + T1n; + Rm[WS(rs, 1)] = T1k - T1n; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 7}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 8, "hc2cf2_8", twinstr, &GENUS, {44, 20, 30, 0} }; + +void X(codelet_hc2cf2_8) (planner *p) { + X(khc2c_register) (p, hc2cf2_8, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 8 -dit -name hc2cf2_8 -include hc2cf.h */ + +/* + * This function contains 74 FP additions, 44 FP multiplications, + * (or, 56 additions, 26 multiplications, 18 fused multiply/add), + * 42 stack variables, 1 constants, and 32 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf2_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(32, rs)) { + E T2, T5, T3, T6, T8, Tc, Tg, Ti, Tl, Tm, Tn, Tz, Tp, Tx; + { + E T4, Tb, T7, Ta; + T2 = W[0]; + T5 = W[1]; + T3 = W[2]; + T6 = W[3]; + T4 = T2 * T3; + Tb = T5 * T3; + T7 = T5 * T6; + Ta = T2 * T6; + T8 = T4 - T7; + Tc = Ta + Tb; + Tg = T4 + T7; + Ti = Ta - Tb; + Tl = W[4]; + Tm = W[5]; + Tn = FMA(T2, Tl, T5 * Tm); + Tz = FNMS(Ti, Tl, Tg * Tm); + Tp = FNMS(T5, Tl, T2 * Tm); + Tx = FMA(Tg, Tl, Ti * Tm); + } + { + E Tf, T1i, TL, T1d, TJ, T17, TV, TY, Ts, T1j, TO, T1a, TC, T16, TQ; + E TT; + { + E T1, T1c, Te, T1b, T9, Td; + T1 = Rp[0]; + T1c = Rm[0]; + T9 = Rp[WS(rs, 2)]; + Td = Rm[WS(rs, 2)]; + Te = FMA(T8, T9, Tc * Td); + T1b = FNMS(Tc, T9, T8 * Td); + Tf = T1 + Te; + T1i = T1c - T1b; + TL = T1 - Te; + T1d = T1b + T1c; + } + { + E TF, TW, TI, TX; + { + E TD, TE, TG, TH; + TD = Ip[WS(rs, 3)]; + TE = Im[WS(rs, 3)]; + TF = FMA(Tl, TD, Tm * TE); + TW = FNMS(Tm, TD, Tl * TE); + TG = Ip[WS(rs, 1)]; + TH = Im[WS(rs, 1)]; + TI = FMA(T3, TG, T6 * TH); + TX = FNMS(T6, TG, T3 * TH); + } + TJ = TF + TI; + T17 = TW + TX; + TV = TF - TI; + TY = TW - TX; + } + { + E Tk, TM, Tr, TN; + { + E Th, Tj, To, Tq; + Th = Rp[WS(rs, 1)]; + Tj = Rm[WS(rs, 1)]; + Tk = FMA(Tg, Th, Ti * Tj); + TM = FNMS(Ti, Th, Tg * Tj); + To = Rp[WS(rs, 3)]; + Tq = Rm[WS(rs, 3)]; + Tr = FMA(Tn, To, Tp * Tq); + TN = FNMS(Tp, To, Tn * Tq); + } + Ts = Tk + Tr; + T1j = Tk - Tr; + TO = TM - TN; + T1a = TM + TN; + } + { + E Tw, TR, TB, TS; + { + E Tu, Tv, Ty, TA; + Tu = Ip[0]; + Tv = Im[0]; + Tw = FMA(T2, Tu, T5 * Tv); + TR = FNMS(T5, Tu, T2 * Tv); + Ty = Ip[WS(rs, 2)]; + TA = Im[WS(rs, 2)]; + TB = FMA(Tx, Ty, Tz * TA); + TS = FNMS(Tz, Ty, Tx * TA); + } + TC = Tw + TB; + T16 = TR + TS; + TQ = Tw - TB; + TT = TR - TS; + } + { + E Tt, TK, T1f, T1g; + Tt = Tf + Ts; + TK = TC + TJ; + Rm[WS(rs, 3)] = Tt - TK; + Rp[0] = Tt + TK; + { + E T19, T1e, T15, T18; + T19 = T16 + T17; + T1e = T1a + T1d; + Im[WS(rs, 3)] = T19 - T1e; + Ip[0] = T19 + T1e; + T15 = Tf - Ts; + T18 = T16 - T17; + Rm[WS(rs, 1)] = T15 - T18; + Rp[WS(rs, 2)] = T15 + T18; + } + T1f = TJ - TC; + T1g = T1d - T1a; + Im[WS(rs, 1)] = T1f - T1g; + Ip[WS(rs, 2)] = T1f + T1g; + { + E T11, T1k, T14, T1h, T12, T13; + T11 = TL - TO; + T1k = T1i - T1j; + T12 = TT - TQ; + T13 = TV + TY; + T14 = KP707106781 * (T12 - T13); + T1h = KP707106781 * (T12 + T13); + Rm[0] = T11 - T14; + Ip[WS(rs, 1)] = T1h + T1k; + Rp[WS(rs, 3)] = T11 + T14; + Im[WS(rs, 2)] = T1h - T1k; + } + { + E TP, T1m, T10, T1l, TU, TZ; + TP = TL + TO; + T1m = T1j + T1i; + TU = TQ + TT; + TZ = TV - TY; + T10 = KP707106781 * (TU + TZ); + T1l = KP707106781 * (TZ - TU); + Rm[WS(rs, 2)] = TP - T10; + Ip[WS(rs, 3)] = T1l + T1m; + Rp[WS(rs, 1)] = TP + T10; + Im[0] = T1l - T1m; + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 7}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 8, "hc2cf2_8", twinstr, &GENUS, {56, 26, 18, 0} }; + +void X(codelet_hc2cf2_8) (planner *p) { + X(khc2c_register) (p, hc2cf2_8, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,501 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:22 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 10 -dit -name hc2cf_10 -include hc2cf.h */ + +/* + * This function contains 102 FP additions, 72 FP multiplications, + * (or, 48 additions, 18 multiplications, 54 fused multiply/add), + * 70 stack variables, 4 constants, and 40 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) { + E T1X, T21, T20, T22; + { + E T26, T1U, T8, T12, T1n, T1P, T24, T1K, T1Y, T18, T10, T2b, T1H, T23, T15; + E T1Z, T2a, Tz, T1O, T1y; + { + E T1, T1T, T3, T6, T2, T5; + T1 = Rp[0]; + T1T = Rm[0]; + T3 = Ip[WS(rs, 2)]; + T6 = Im[WS(rs, 2)]; + T2 = W[8]; + T5 = W[9]; + { + E T1l, TY, T1h, T1J, TM, T16, T1j, TS; + { + E TF, T1e, TO, TR, T1g, TL, TN, TQ, T1i, TP; + { + E TU, TX, TT, TW; + { + E TB, TE, T1R, T4, TA, TD; + TB = Rp[WS(rs, 2)]; + TE = Rm[WS(rs, 2)]; + T1R = T2 * T6; + T4 = T2 * T3; + TA = W[6]; + TD = W[7]; + { + E T1S, T7, T1d, TC; + T1S = FNMS(T5, T3, T1R); + T7 = FMA(T5, T6, T4); + T1d = TA * TE; + TC = TA * TB; + T26 = T1T - T1S; + T1U = T1S + T1T; + T8 = T1 - T7; + T12 = T1 + T7; + TF = FMA(TD, TE, TC); + T1e = FNMS(TD, TB, T1d); + } + } + TU = Ip[0]; + TX = Im[0]; + TT = W[0]; + TW = W[1]; + { + E TH, TK, TJ, T1f, TI, T1k, TV, TG; + TH = Ip[WS(rs, 4)]; + TK = Im[WS(rs, 4)]; + T1k = TT * TX; + TV = TT * TU; + TG = W[16]; + TJ = W[17]; + T1l = FNMS(TW, TU, T1k); + TY = FMA(TW, TX, TV); + T1f = TG * TK; + TI = TG * TH; + TO = Rp[WS(rs, 3)]; + TR = Rm[WS(rs, 3)]; + T1g = FNMS(TJ, TH, T1f); + TL = FMA(TJ, TK, TI); + TN = W[10]; + TQ = W[11]; + } + } + T1h = T1e + T1g; + T1J = T1g - T1e; + TM = TF - TL; + T16 = TF + TL; + T1i = TN * TR; + TP = TN * TO; + T1j = FNMS(TQ, TO, T1i); + TS = FMA(TQ, TR, TP); + } + { + E T1p, Te, T1w, Tx, Tn, Tq, Tp, T1r, Tk, T1t, To; + { + E Tt, Tw, Tv, T1v, Tu; + { + E Ta, Td, T9, Tc, T1o, Tb, Ts; + Ta = Rp[WS(rs, 1)]; + Td = Rm[WS(rs, 1)]; + { + E T1I, T1m, TZ, T17; + T1I = T1l - T1j; + T1m = T1j + T1l; + TZ = TS - TY; + T17 = TS + TY; + T1n = T1h - T1m; + T1P = T1h + T1m; + T24 = T1J + T1I; + T1K = T1I - T1J; + T1Y = T16 - T17; + T18 = T16 + T17; + T10 = TM + TZ; + T2b = TZ - TM; + T9 = W[2]; + } + Tc = W[3]; + Tt = Ip[WS(rs, 1)]; + Tw = Im[WS(rs, 1)]; + T1o = T9 * Td; + Tb = T9 * Ta; + Ts = W[4]; + Tv = W[5]; + T1p = FNMS(Tc, Ta, T1o); + Te = FMA(Tc, Td, Tb); + T1v = Ts * Tw; + Tu = Ts * Tt; + } + { + E Tg, Tj, Tf, Ti, T1q, Th, Tm; + Tg = Ip[WS(rs, 3)]; + Tj = Im[WS(rs, 3)]; + T1w = FNMS(Tv, Tt, T1v); + Tx = FMA(Tv, Tw, Tu); + Tf = W[12]; + Ti = W[13]; + Tn = Rp[WS(rs, 4)]; + Tq = Rm[WS(rs, 4)]; + T1q = Tf * Tj; + Th = Tf * Tg; + Tm = W[14]; + Tp = W[15]; + T1r = FNMS(Ti, Tg, T1q); + Tk = FMA(Ti, Tj, Th); + T1t = Tm * Tq; + To = Tm * Tn; + } + } + { + E T1s, T1G, Tl, T13, T1u, Tr; + T1s = T1p + T1r; + T1G = T1r - T1p; + Tl = Te - Tk; + T13 = Te + Tk; + T1u = FNMS(Tp, Tn, T1t); + Tr = FMA(Tp, Tq, To); + { + E T1x, T1F, T14, Ty; + T1x = T1u + T1w; + T1F = T1w - T1u; + T14 = Tr + Tx; + Ty = Tr - Tx; + T1H = T1F - T1G; + T23 = T1G + T1F; + T15 = T13 + T14; + T1Z = T13 - T14; + T2a = Ty - Tl; + Tz = Tl + Ty; + T1O = T1s + T1x; + T1y = T1s - T1x; + } + } + } + } + } + { + E T2c, T2e, T29, T2d; + { + E T1D, T11, T25, T28, T27; + T1D = Tz - T10; + T11 = Tz + T10; + T25 = T23 + T24; + T28 = T24 - T23; + { + E T1N, T1L, T1C, T1M, T1E; + T1N = FNMS(KP618033988, T1H, T1K); + T1L = FMA(KP618033988, T1K, T1H); + Rm[WS(rs, 4)] = T8 + T11; + T1C = FNMS(KP250000000, T11, T8); + T1M = FNMS(KP559016994, T1D, T1C); + T1E = FMA(KP559016994, T1D, T1C); + T27 = FMA(KP250000000, T25, T26); + T2c = FMA(KP618033988, T2b, T2a); + T2e = FNMS(KP618033988, T2a, T2b); + Rp[WS(rs, 1)] = FMA(KP951056516, T1L, T1E); + Rm[0] = FNMS(KP951056516, T1L, T1E); + Rp[WS(rs, 3)] = FMA(KP951056516, T1N, T1M); + Rm[WS(rs, 2)] = FNMS(KP951056516, T1N, T1M); + } + Im[WS(rs, 4)] = T25 - T26; + T29 = FMA(KP559016994, T28, T27); + T2d = FNMS(KP559016994, T28, T27); + } + { + E T1c, T1A, T1z, T1B, T19, T1b, T1a, T1Q, T1W, T1V; + T19 = T15 + T18; + T1b = T15 - T18; + Ip[WS(rs, 3)] = FMA(KP951056516, T2e, T2d); + Im[WS(rs, 2)] = FMS(KP951056516, T2e, T2d); + Ip[WS(rs, 1)] = FMA(KP951056516, T2c, T29); + Im[0] = FMS(KP951056516, T2c, T29); + T1a = FNMS(KP250000000, T19, T12); + Rp[0] = T12 + T19; + T1c = FNMS(KP559016994, T1b, T1a); + T1A = FMA(KP559016994, T1b, T1a); + T1z = FNMS(KP618033988, T1y, T1n); + T1B = FMA(KP618033988, T1n, T1y); + T1Q = T1O + T1P; + T1W = T1O - T1P; + Rm[WS(rs, 3)] = FMA(KP951056516, T1B, T1A); + Rp[WS(rs, 4)] = FNMS(KP951056516, T1B, T1A); + Rm[WS(rs, 1)] = FMA(KP951056516, T1z, T1c); + Rp[WS(rs, 2)] = FNMS(KP951056516, T1z, T1c); + T1V = FNMS(KP250000000, T1Q, T1U); + Ip[0] = T1Q + T1U; + T1X = FNMS(KP559016994, T1W, T1V); + T21 = FMA(KP559016994, T1W, T1V); + T20 = FNMS(KP618033988, T1Z, T1Y); + T22 = FMA(KP618033988, T1Y, T1Z); + } + } + } + Ip[WS(rs, 4)] = FMA(KP951056516, T22, T21); + Im[WS(rs, 3)] = FMS(KP951056516, T22, T21); + Ip[WS(rs, 2)] = FMA(KP951056516, T20, T1X); + Im[WS(rs, 1)] = FMS(KP951056516, T20, T1X); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 10}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 10, "hc2cf_10", twinstr, &GENUS, {48, 18, 54, 0} }; + +void X(codelet_hc2cf_10) (planner *p) { + X(khc2c_register) (p, hc2cf_10, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -n 10 -dit -name hc2cf_10 -include hc2cf.h */ + +/* + * This function contains 102 FP additions, 60 FP multiplications, + * (or, 72 additions, 30 multiplications, 30 fused multiply/add), + * 45 stack variables, 4 constants, and 40 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) { + E T7, T1O, TT, T1C, TF, TQ, TR, T1r, T1s, T1L, TX, TY, TZ, T16, T19; + E T1y, Ti, Tt, Tu, T1o, T1p, T1M, TU, TV, TW, T1d, T1g, T1x; + { + E T1, T1B, T6, T1A; + T1 = Rp[0]; + T1B = Rm[0]; + { + E T3, T5, T2, T4; + T3 = Ip[WS(rs, 2)]; + T5 = Im[WS(rs, 2)]; + T2 = W[8]; + T4 = W[9]; + T6 = FMA(T2, T3, T4 * T5); + T1A = FNMS(T4, T3, T2 * T5); + } + T7 = T1 - T6; + T1O = T1B - T1A; + TT = T1 + T6; + T1C = T1A + T1B; + } + { + E Tz, T14, TP, T18, TE, T15, TK, T17; + { + E Tw, Ty, Tv, Tx; + Tw = Rp[WS(rs, 2)]; + Ty = Rm[WS(rs, 2)]; + Tv = W[6]; + Tx = W[7]; + Tz = FMA(Tv, Tw, Tx * Ty); + T14 = FNMS(Tx, Tw, Tv * Ty); + } + { + E TM, TO, TL, TN; + TM = Ip[0]; + TO = Im[0]; + TL = W[0]; + TN = W[1]; + TP = FMA(TL, TM, TN * TO); + T18 = FNMS(TN, TM, TL * TO); + } + { + E TB, TD, TA, TC; + TB = Ip[WS(rs, 4)]; + TD = Im[WS(rs, 4)]; + TA = W[16]; + TC = W[17]; + TE = FMA(TA, TB, TC * TD); + T15 = FNMS(TC, TB, TA * TD); + } + { + E TH, TJ, TG, TI; + TH = Rp[WS(rs, 3)]; + TJ = Rm[WS(rs, 3)]; + TG = W[10]; + TI = W[11]; + TK = FMA(TG, TH, TI * TJ); + T17 = FNMS(TI, TH, TG * TJ); + } + TF = Tz - TE; + TQ = TK - TP; + TR = TF + TQ; + T1r = T14 - T15; + T1s = T18 - T17; + T1L = T1s - T1r; + TX = Tz + TE; + TY = TK + TP; + TZ = TX + TY; + T16 = T14 + T15; + T19 = T17 + T18; + T1y = T16 + T19; + } + { + E Tc, T1b, Ts, T1f, Th, T1c, Tn, T1e; + { + E T9, Tb, T8, Ta; + T9 = Rp[WS(rs, 1)]; + Tb = Rm[WS(rs, 1)]; + T8 = W[2]; + Ta = W[3]; + Tc = FMA(T8, T9, Ta * Tb); + T1b = FNMS(Ta, T9, T8 * Tb); + } + { + E Tp, Tr, To, Tq; + Tp = Ip[WS(rs, 1)]; + Tr = Im[WS(rs, 1)]; + To = W[4]; + Tq = W[5]; + Ts = FMA(To, Tp, Tq * Tr); + T1f = FNMS(Tq, Tp, To * Tr); + } + { + E Te, Tg, Td, Tf; + Te = Ip[WS(rs, 3)]; + Tg = Im[WS(rs, 3)]; + Td = W[12]; + Tf = W[13]; + Th = FMA(Td, Te, Tf * Tg); + T1c = FNMS(Tf, Te, Td * Tg); + } + { + E Tk, Tm, Tj, Tl; + Tk = Rp[WS(rs, 4)]; + Tm = Rm[WS(rs, 4)]; + Tj = W[14]; + Tl = W[15]; + Tn = FMA(Tj, Tk, Tl * Tm); + T1e = FNMS(Tl, Tk, Tj * Tm); + } + Ti = Tc - Th; + Tt = Tn - Ts; + Tu = Ti + Tt; + T1o = T1b - T1c; + T1p = T1e - T1f; + T1M = T1o + T1p; + TU = Tc + Th; + TV = Tn + Ts; + TW = TU + TV; + T1d = T1b + T1c; + T1g = T1e + T1f; + T1x = T1d + T1g; + } + { + E T1l, TS, T1m, T1u, T1w, T1q, T1t, T1v, T1n; + T1l = KP559016994 * (Tu - TR); + TS = Tu + TR; + T1m = FNMS(KP250000000, TS, T7); + T1q = T1o - T1p; + T1t = T1r + T1s; + T1u = FMA(KP951056516, T1q, KP587785252 * T1t); + T1w = FNMS(KP587785252, T1q, KP951056516 * T1t); + Rm[WS(rs, 4)] = T7 + TS; + T1v = T1m - T1l; + Rm[WS(rs, 2)] = T1v - T1w; + Rp[WS(rs, 3)] = T1v + T1w; + T1n = T1l + T1m; + Rm[0] = T1n - T1u; + Rp[WS(rs, 1)] = T1n + T1u; + } + { + E T1S, T1N, T1T, T1R, T1V, T1P, T1Q, T1W, T1U; + T1S = KP559016994 * (T1M + T1L); + T1N = T1L - T1M; + T1T = FMA(KP250000000, T1N, T1O); + T1P = TQ - TF; + T1Q = Ti - Tt; + T1R = FNMS(KP951056516, T1Q, KP587785252 * T1P); + T1V = FMA(KP587785252, T1Q, KP951056516 * T1P); + Im[WS(rs, 4)] = T1N - T1O; + T1W = T1T - T1S; + Im[WS(rs, 2)] = T1V - T1W; + Ip[WS(rs, 3)] = T1V + T1W; + T1U = T1S + T1T; + Im[0] = T1R - T1U; + Ip[WS(rs, 1)] = T1R + T1U; + } + { + E T12, T10, T11, T1i, T1k, T1a, T1h, T1j, T13; + T12 = KP559016994 * (TW - TZ); + T10 = TW + TZ; + T11 = FNMS(KP250000000, T10, TT); + T1a = T16 - T19; + T1h = T1d - T1g; + T1i = FNMS(KP587785252, T1h, KP951056516 * T1a); + T1k = FMA(KP951056516, T1h, KP587785252 * T1a); + Rp[0] = TT + T10; + T1j = T12 + T11; + Rp[WS(rs, 4)] = T1j - T1k; + Rm[WS(rs, 3)] = T1j + T1k; + T13 = T11 - T12; + Rp[WS(rs, 2)] = T13 - T1i; + Rm[WS(rs, 1)] = T13 + T1i; + } + { + E T1H, T1z, T1G, T1F, T1J, T1D, T1E, T1K, T1I; + T1H = KP559016994 * (T1x - T1y); + T1z = T1x + T1y; + T1G = FNMS(KP250000000, T1z, T1C); + T1D = TX - TY; + T1E = TU - TV; + T1F = FNMS(KP587785252, T1E, KP951056516 * T1D); + T1J = FMA(KP951056516, T1E, KP587785252 * T1D); + Ip[0] = T1z + T1C; + T1K = T1H + T1G; + Im[WS(rs, 3)] = T1J - T1K; + Ip[WS(rs, 4)] = T1J + T1K; + T1I = T1G - T1H; + Im[WS(rs, 1)] = T1F - T1I; + Ip[WS(rs, 2)] = T1F + T1I; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 10}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 10, "hc2cf_10", twinstr, &GENUS, {72, 30, 30, 0} }; + +void X(codelet_hc2cf_10) (planner *p) { + X(khc2c_register) (p, hc2cf_10, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,566 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:22 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 12 -dit -name hc2cf_12 -include hc2cf.h */ + +/* + * This function contains 118 FP additions, 68 FP multiplications, + * (or, 72 additions, 22 multiplications, 46 fused multiply/add), + * 84 stack variables, 2 constants, and 48 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) { + E T2n, T2u; + { + E T1, T2i, T2e, Tl, T1Y, T10, T1S, TG, T2f, T1s, T2s, Ty, T1Z, T1H, T21; + E T1d, TI, TL, T2h, T1l, T2p, Te, TJ, T1w, TO, TR, TN, TK, TQ; + { + E TW, TZ, TY, T1X, TX; + T1 = Rp[0]; + T2i = Rm[0]; + { + E Th, Tk, Tg, Tj, T2d, Ti, TV; + Th = Rp[WS(rs, 3)]; + Tk = Rm[WS(rs, 3)]; + Tg = W[10]; + Tj = W[11]; + TW = Ip[WS(rs, 4)]; + TZ = Im[WS(rs, 4)]; + T2d = Tg * Tk; + Ti = Tg * Th; + TV = W[16]; + TY = W[17]; + T2e = FNMS(Tj, Th, T2d); + Tl = FMA(Tj, Tk, Ti); + T1X = TV * TZ; + TX = TV * TW; + } + { + E Tn, Tq, Tt, T1o, To, Tw, Ts, Tp, Tv; + { + E TC, TF, TB, TE, T1R, TD, Tm; + TC = Ip[WS(rs, 1)]; + TF = Im[WS(rs, 1)]; + T1Y = FNMS(TY, TW, T1X); + T10 = FMA(TY, TZ, TX); + TB = W[4]; + TE = W[5]; + Tn = Rp[WS(rs, 5)]; + Tq = Rm[WS(rs, 5)]; + T1R = TB * TF; + TD = TB * TC; + Tm = W[18]; + Tt = Rp[WS(rs, 1)]; + T1S = FNMS(TE, TC, T1R); + TG = FMA(TE, TF, TD); + T1o = Tm * Tq; + To = Tm * Tn; + Tw = Rm[WS(rs, 1)]; + Ts = W[2]; + Tp = W[19]; + Tv = W[3]; + } + { + E T12, T15, T13, T1D, T18, T1b, T17, T14, T1a; + { + E T1p, Tr, T1r, Tx, T1q, Tu, T11; + T12 = Ip[0]; + T1q = Ts * Tw; + Tu = Ts * Tt; + T1p = FNMS(Tp, Tn, T1o); + Tr = FMA(Tp, Tq, To); + T1r = FNMS(Tv, Tt, T1q); + Tx = FMA(Tv, Tw, Tu); + T15 = Im[0]; + T11 = W[0]; + T2f = T1p + T1r; + T1s = T1p - T1r; + T2s = Tx - Tr; + Ty = Tr + Tx; + T13 = T11 * T12; + T1D = T11 * T15; + } + T18 = Ip[WS(rs, 2)]; + T1b = Im[WS(rs, 2)]; + T17 = W[8]; + T14 = W[1]; + T1a = W[9]; + { + E T3, T6, T4, T1h, T9, Tc, T8, T5, Tb; + { + E T1E, T16, T1G, T1c, T1F, T19, T2; + T3 = Rp[WS(rs, 2)]; + T1F = T17 * T1b; + T19 = T17 * T18; + T1E = FNMS(T14, T12, T1D); + T16 = FMA(T14, T15, T13); + T1G = FNMS(T1a, T18, T1F); + T1c = FMA(T1a, T1b, T19); + T6 = Rm[WS(rs, 2)]; + T2 = W[6]; + T1Z = T1E + T1G; + T1H = T1E - T1G; + T21 = T1c - T16; + T1d = T16 + T1c; + T4 = T2 * T3; + T1h = T2 * T6; + } + T9 = Rp[WS(rs, 4)]; + Tc = Rm[WS(rs, 4)]; + T8 = W[14]; + T5 = W[7]; + Tb = W[15]; + { + E T1i, T7, T1k, Td, T1j, Ta, TH; + TI = Ip[WS(rs, 3)]; + T1j = T8 * Tc; + Ta = T8 * T9; + T1i = FNMS(T5, T3, T1h); + T7 = FMA(T5, T6, T4); + T1k = FNMS(Tb, T9, T1j); + Td = FMA(Tb, Tc, Ta); + TL = Im[WS(rs, 3)]; + TH = W[12]; + T2h = T1i + T1k; + T1l = T1i - T1k; + T2p = Td - T7; + Te = T7 + Td; + TJ = TH * TI; + T1w = TH * TL; + } + TO = Ip[WS(rs, 5)]; + TR = Im[WS(rs, 5)]; + TN = W[20]; + TK = W[13]; + TQ = W[21]; + } + } + } + } + { + E T1g, T1n, T2r, T1A, T1V, T28, TA, T2o, T1v, T1C, T1U, T29, T2m, T2k, T2l; + E T1f, T2a, T20; + { + E T2g, T1T, TT, T2j, TU, T1e; + { + E Tf, T1x, TM, T1z, TS, Tz, T1y, TP; + T1g = FNMS(KP500000000, Te, T1); + Tf = T1 + Te; + T1y = TN * TR; + TP = TN * TO; + T1x = FNMS(TK, TI, T1w); + TM = FMA(TK, TL, TJ); + T1z = FNMS(TQ, TO, T1y); + TS = FMA(TQ, TR, TP); + Tz = Tl + Ty; + T1n = FNMS(KP500000000, Ty, Tl); + T2r = FNMS(KP500000000, T2f, T2e); + T2g = T2e + T2f; + T1T = T1x + T1z; + T1A = T1x - T1z; + T1V = TS - TM; + TT = TM + TS; + T28 = Tf - Tz; + TA = Tf + Tz; + T2j = T2h + T2i; + T2o = FNMS(KP500000000, T2h, T2i); + } + T1v = FNMS(KP500000000, TT, TG); + TU = TG + TT; + T1e = T10 + T1d; + T1C = FNMS(KP500000000, T1d, T10); + T1U = FNMS(KP500000000, T1T, T1S); + T29 = T1S + T1T; + T2m = T2j - T2g; + T2k = T2g + T2j; + T2l = TU - T1e; + T1f = TU + T1e; + T2a = T1Y + T1Z; + T20 = FNMS(KP500000000, T1Z, T1Y); + } + { + E T1m, T1K, T2z, T2q, T2y, T2t, T1L, T1t, T1B, T1N, T2c, T2b; + Im[WS(rs, 2)] = T2l - T2m; + Ip[WS(rs, 3)] = T2l + T2m; + Rp[0] = TA + T1f; + Rm[WS(rs, 5)] = TA - T1f; + T2c = T29 + T2a; + T2b = T29 - T2a; + T1m = FNMS(KP866025403, T1l, T1g); + T1K = FMA(KP866025403, T1l, T1g); + Ip[0] = T2c + T2k; + Im[WS(rs, 5)] = T2c - T2k; + Rm[WS(rs, 2)] = T28 + T2b; + Rp[WS(rs, 3)] = T28 - T2b; + T2z = FNMS(KP866025403, T2p, T2o); + T2q = FMA(KP866025403, T2p, T2o); + T2y = FNMS(KP866025403, T2s, T2r); + T2t = FMA(KP866025403, T2s, T2r); + T1L = FMA(KP866025403, T1s, T1n); + T1t = FNMS(KP866025403, T1s, T1n); + T1B = FNMS(KP866025403, T1A, T1v); + T1N = FMA(KP866025403, T1A, T1v); + { + E T1Q, T2C, T23, T24, T2B, T27, T2v, T2w; + { + E T1u, T25, T26, T1O, T1I, T2A, T2x, T1W, T22, T1M, T1J, T1P; + T1Q = T1m - T1t; + T1u = T1m + T1t; + T25 = FMA(KP866025403, T1V, T1U); + T1W = FNMS(KP866025403, T1V, T1U); + T26 = FMA(KP866025403, T21, T20); + T22 = FNMS(KP866025403, T21, T20); + T1O = FMA(KP866025403, T1H, T1C); + T1I = FNMS(KP866025403, T1H, T1C); + T2A = T2y + T2z; + T2C = T2z - T2y; + T23 = T1W - T22; + T2x = T1W + T22; + T1M = T1K + T1L; + T24 = T1K - T1L; + T2B = T1I - T1B; + T1J = T1B + T1I; + T1P = T1N + T1O; + T2n = T1O - T1N; + Ip[WS(rs, 2)] = T2A - T2x; + Im[WS(rs, 3)] = -(T2x + T2A); + Rm[WS(rs, 3)] = T1u + T1J; + Rp[WS(rs, 2)] = T1u - T1J; + Rm[WS(rs, 1)] = T1M - T1P; + Rp[WS(rs, 4)] = T1M + T1P; + T27 = T25 - T26; + T2v = T25 + T26; + T2w = T2t + T2q; + T2u = T2q - T2t; + } + Ip[WS(rs, 4)] = T2v + T2w; + Im[WS(rs, 1)] = T2v - T2w; + Rp[WS(rs, 5)] = T1Q + T23; + Rm[0] = T1Q - T23; + Ip[WS(rs, 5)] = T2B + T2C; + Im[0] = T2B - T2C; + Rp[WS(rs, 1)] = T24 + T27; + Rm[WS(rs, 4)] = T24 - T27; + } + } + } + } + Ip[WS(rs, 1)] = T2n + T2u; + Im[WS(rs, 4)] = T2n - T2u; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 12}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 12, "hc2cf_12", twinstr, &GENUS, {72, 22, 46, 0} }; + +void X(codelet_hc2cf_12) (planner *p) { + X(khc2c_register) (p, hc2cf_12, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -n 12 -dit -name hc2cf_12 -include hc2cf.h */ + +/* + * This function contains 118 FP additions, 60 FP multiplications, + * (or, 88 additions, 30 multiplications, 30 fused multiply/add), + * 47 stack variables, 2 constants, and 48 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) { + E T1, T1W, T18, T22, Tc, T15, T1V, T23, TR, T1E, T1o, T1D, T12, T1l, T1F; + E T1G, Ti, T1S, T1d, T25, Tt, T1a, T1T, T26, TA, T1y, T1j, T1B, TL, T1g; + E T1z, T1A; + { + E T6, T16, Tb, T17; + T1 = Rp[0]; + T1W = Rm[0]; + { + E T3, T5, T2, T4; + T3 = Rp[WS(rs, 2)]; + T5 = Rm[WS(rs, 2)]; + T2 = W[6]; + T4 = W[7]; + T6 = FMA(T2, T3, T4 * T5); + T16 = FNMS(T4, T3, T2 * T5); + } + { + E T8, Ta, T7, T9; + T8 = Rp[WS(rs, 4)]; + Ta = Rm[WS(rs, 4)]; + T7 = W[14]; + T9 = W[15]; + Tb = FMA(T7, T8, T9 * Ta); + T17 = FNMS(T9, T8, T7 * Ta); + } + T18 = KP866025403 * (T16 - T17); + T22 = KP866025403 * (Tb - T6); + Tc = T6 + Tb; + T15 = FNMS(KP500000000, Tc, T1); + T1V = T16 + T17; + T23 = FNMS(KP500000000, T1V, T1W); + } + { + E T11, T1n, TW, T1m; + { + E TO, TQ, TN, TP; + TO = Ip[WS(rs, 4)]; + TQ = Im[WS(rs, 4)]; + TN = W[16]; + TP = W[17]; + TR = FMA(TN, TO, TP * TQ); + T1E = FNMS(TP, TO, TN * TQ); + } + { + E TY, T10, TX, TZ; + TY = Ip[WS(rs, 2)]; + T10 = Im[WS(rs, 2)]; + TX = W[8]; + TZ = W[9]; + T11 = FMA(TX, TY, TZ * T10); + T1n = FNMS(TZ, TY, TX * T10); + } + { + E TT, TV, TS, TU; + TT = Ip[0]; + TV = Im[0]; + TS = W[0]; + TU = W[1]; + TW = FMA(TS, TT, TU * TV); + T1m = FNMS(TU, TT, TS * TV); + } + T1o = KP866025403 * (T1m - T1n); + T1D = KP866025403 * (T11 - TW); + T12 = TW + T11; + T1l = FNMS(KP500000000, T12, TR); + T1F = T1m + T1n; + T1G = FNMS(KP500000000, T1F, T1E); + } + { + E Ts, T1c, Tn, T1b; + { + E Tf, Th, Te, Tg; + Tf = Rp[WS(rs, 3)]; + Th = Rm[WS(rs, 3)]; + Te = W[10]; + Tg = W[11]; + Ti = FMA(Te, Tf, Tg * Th); + T1S = FNMS(Tg, Tf, Te * Th); + } + { + E Tp, Tr, To, Tq; + Tp = Rp[WS(rs, 1)]; + Tr = Rm[WS(rs, 1)]; + To = W[2]; + Tq = W[3]; + Ts = FMA(To, Tp, Tq * Tr); + T1c = FNMS(Tq, Tp, To * Tr); + } + { + E Tk, Tm, Tj, Tl; + Tk = Rp[WS(rs, 5)]; + Tm = Rm[WS(rs, 5)]; + Tj = W[18]; + Tl = W[19]; + Tn = FMA(Tj, Tk, Tl * Tm); + T1b = FNMS(Tl, Tk, Tj * Tm); + } + T1d = KP866025403 * (T1b - T1c); + T25 = KP866025403 * (Ts - Tn); + Tt = Tn + Ts; + T1a = FNMS(KP500000000, Tt, Ti); + T1T = T1b + T1c; + T26 = FNMS(KP500000000, T1T, T1S); + } + { + E TK, T1i, TF, T1h; + { + E Tx, Tz, Tw, Ty; + Tx = Ip[WS(rs, 1)]; + Tz = Im[WS(rs, 1)]; + Tw = W[4]; + Ty = W[5]; + TA = FMA(Tw, Tx, Ty * Tz); + T1y = FNMS(Ty, Tx, Tw * Tz); + } + { + E TH, TJ, TG, TI; + TH = Ip[WS(rs, 5)]; + TJ = Im[WS(rs, 5)]; + TG = W[20]; + TI = W[21]; + TK = FMA(TG, TH, TI * TJ); + T1i = FNMS(TI, TH, TG * TJ); + } + { + E TC, TE, TB, TD; + TC = Ip[WS(rs, 3)]; + TE = Im[WS(rs, 3)]; + TB = W[12]; + TD = W[13]; + TF = FMA(TB, TC, TD * TE); + T1h = FNMS(TD, TC, TB * TE); + } + T1j = KP866025403 * (T1h - T1i); + T1B = KP866025403 * (TK - TF); + TL = TF + TK; + T1g = FNMS(KP500000000, TL, TA); + T1z = T1h + T1i; + T1A = FNMS(KP500000000, T1z, T1y); + } + { + E Tv, T1N, T1Y, T20, T14, T1Z, T1Q, T1R; + { + E Td, Tu, T1U, T1X; + Td = T1 + Tc; + Tu = Ti + Tt; + Tv = Td + Tu; + T1N = Td - Tu; + T1U = T1S + T1T; + T1X = T1V + T1W; + T1Y = T1U + T1X; + T20 = T1X - T1U; + } + { + E TM, T13, T1O, T1P; + TM = TA + TL; + T13 = TR + T12; + T14 = TM + T13; + T1Z = TM - T13; + T1O = T1y + T1z; + T1P = T1E + T1F; + T1Q = T1O - T1P; + T1R = T1O + T1P; + } + Rm[WS(rs, 5)] = Tv - T14; + Im[WS(rs, 5)] = T1R - T1Y; + Rp[0] = Tv + T14; + Ip[0] = T1R + T1Y; + Rp[WS(rs, 3)] = T1N - T1Q; + Ip[WS(rs, 3)] = T1Z + T20; + Rm[WS(rs, 2)] = T1N + T1Q; + Im[WS(rs, 2)] = T1Z - T20; + } + { + E T1t, T1J, T28, T2a, T1w, T21, T1M, T29; + { + E T1r, T1s, T24, T27; + T1r = T15 + T18; + T1s = T1a + T1d; + T1t = T1r + T1s; + T1J = T1r - T1s; + T24 = T22 + T23; + T27 = T25 + T26; + T28 = T24 - T27; + T2a = T27 + T24; + } + { + E T1u, T1v, T1K, T1L; + T1u = T1g + T1j; + T1v = T1l + T1o; + T1w = T1u + T1v; + T21 = T1v - T1u; + T1K = T1B + T1A; + T1L = T1D + T1G; + T1M = T1K - T1L; + T29 = T1K + T1L; + } + Rm[WS(rs, 1)] = T1t - T1w; + Im[WS(rs, 1)] = T29 - T2a; + Rp[WS(rs, 4)] = T1t + T1w; + Ip[WS(rs, 4)] = T29 + T2a; + Rm[WS(rs, 4)] = T1J - T1M; + Im[WS(rs, 4)] = T21 - T28; + Rp[WS(rs, 1)] = T1J + T1M; + Ip[WS(rs, 1)] = T21 + T28; + } + { + E T1f, T1x, T2e, T2g, T1q, T2f, T1I, T2b; + { + E T19, T1e, T2c, T2d; + T19 = T15 - T18; + T1e = T1a - T1d; + T1f = T19 + T1e; + T1x = T19 - T1e; + T2c = T26 - T25; + T2d = T23 - T22; + T2e = T2c + T2d; + T2g = T2d - T2c; + } + { + E T1k, T1p, T1C, T1H; + T1k = T1g - T1j; + T1p = T1l - T1o; + T1q = T1k + T1p; + T2f = T1p - T1k; + T1C = T1A - T1B; + T1H = T1D - T1G; + T1I = T1C + T1H; + T2b = T1H - T1C; + } + Rp[WS(rs, 2)] = T1f - T1q; + Ip[WS(rs, 2)] = T2b + T2e; + Rm[WS(rs, 3)] = T1f + T1q; + Im[WS(rs, 3)] = T2b - T2e; + Rm[0] = T1x - T1I; + Im[0] = T2f - T2g; + Rp[WS(rs, 5)] = T1x + T1I; + Ip[WS(rs, 5)] = T2f + T2g; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 12}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 12, "hc2cf_12", twinstr, &GENUS, {88, 30, 30, 0} }; + +void X(codelet_hc2cf_12) (planner *p) { + X(khc2c_register) (p, hc2cf_12, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,785 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:22 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 16 -dit -name hc2cf_16 -include hc2cf.h */ + +/* + * This function contains 174 FP additions, 100 FP multiplications, + * (or, 104 additions, 30 multiplications, 70 fused multiply/add), + * 97 stack variables, 3 constants, and 64 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 30, MAKE_VOLATILE_STRIDE(64, rs)) { + E T3G, T3F; + { + E T3z, T3o, T8, T1I, T2p, T35, T2r, T1s, T2w, T36, T2k, T1F, T3k, T1N, T3A; + E Tl, T1T, T2V, T1U, Tz, T29, T30, T2c, T11, TB, TE, T2h, T31, T2a, T1e; + E TC, T1X, TH, TK, TG, TD, TJ; + { + E Ta, Td, Tb, T1J, Tg, Tj, Tf, Tc, Ti; + { + E T1h, T1k, T1n, T2l, T1i, T1q, T1m, T1j, T1p; + { + E T1, T3n, T3, T6, T2, T5; + T1 = Rp[0]; + T3n = Rm[0]; + T3 = Rp[WS(rs, 4)]; + T6 = Rm[WS(rs, 4)]; + T2 = W[14]; + T5 = W[15]; + { + E T3l, T4, T1g, T3m, T7; + T1h = Ip[WS(rs, 7)]; + T1k = Im[WS(rs, 7)]; + T3l = T2 * T6; + T4 = T2 * T3; + T1g = W[28]; + T1n = Ip[WS(rs, 3)]; + T3m = FNMS(T5, T3, T3l); + T7 = FMA(T5, T6, T4); + T2l = T1g * T1k; + T1i = T1g * T1h; + T3z = T3n - T3m; + T3o = T3m + T3n; + T8 = T1 + T7; + T1I = T1 - T7; + T1q = Im[WS(rs, 3)]; + T1m = W[12]; + } + T1j = W[29]; + T1p = W[13]; + } + { + E T1u, T1x, T1v, T2s, T1A, T1D, T1z, T1w, T1C; + { + E T2m, T1l, T2o, T1r, T2n, T1o, T1t; + T1u = Ip[WS(rs, 1)]; + T2n = T1m * T1q; + T1o = T1m * T1n; + T2m = FNMS(T1j, T1h, T2l); + T1l = FMA(T1j, T1k, T1i); + T2o = FNMS(T1p, T1n, T2n); + T1r = FMA(T1p, T1q, T1o); + T1x = Im[WS(rs, 1)]; + T1t = W[4]; + T2p = T2m - T2o; + T35 = T2m + T2o; + T2r = T1l - T1r; + T1s = T1l + T1r; + T1v = T1t * T1u; + T2s = T1t * T1x; + } + T1A = Ip[WS(rs, 5)]; + T1D = Im[WS(rs, 5)]; + T1z = W[20]; + T1w = W[5]; + T1C = W[21]; + { + E T2t, T1y, T2v, T1E, T2u, T1B, T9; + Ta = Rp[WS(rs, 2)]; + T2u = T1z * T1D; + T1B = T1z * T1A; + T2t = FNMS(T1w, T1u, T2s); + T1y = FMA(T1w, T1x, T1v); + T2v = FNMS(T1C, T1A, T2u); + T1E = FMA(T1C, T1D, T1B); + Td = Rm[WS(rs, 2)]; + T9 = W[6]; + T2w = T2t - T2v; + T36 = T2t + T2v; + T2k = T1E - T1y; + T1F = T1y + T1E; + Tb = T9 * Ta; + T1J = T9 * Td; + } + Tg = Rp[WS(rs, 6)]; + Tj = Rm[WS(rs, 6)]; + Tf = W[22]; + Tc = W[7]; + Ti = W[23]; + } + } + { + E TQ, TT, TR, T25, TW, TZ, TV, TS, TY; + { + E To, Tr, Tp, T1P, Tu, Tx, Tt, Tq, Tw; + { + E T1K, Te, T1M, Tk, T1L, Th, Tn; + To = Rp[WS(rs, 1)]; + T1L = Tf * Tj; + Th = Tf * Tg; + T1K = FNMS(Tc, Ta, T1J); + Te = FMA(Tc, Td, Tb); + T1M = FNMS(Ti, Tg, T1L); + Tk = FMA(Ti, Tj, Th); + Tr = Rm[WS(rs, 1)]; + Tn = W[2]; + T3k = T1K + T1M; + T1N = T1K - T1M; + T3A = Te - Tk; + Tl = Te + Tk; + Tp = Tn * To; + T1P = Tn * Tr; + } + Tu = Rp[WS(rs, 5)]; + Tx = Rm[WS(rs, 5)]; + Tt = W[18]; + Tq = W[3]; + Tw = W[19]; + { + E T1Q, Ts, T1S, Ty, T1R, Tv, TP; + TQ = Ip[0]; + T1R = Tt * Tx; + Tv = Tt * Tu; + T1Q = FNMS(Tq, To, T1P); + Ts = FMA(Tq, Tr, Tp); + T1S = FNMS(Tw, Tu, T1R); + Ty = FMA(Tw, Tx, Tv); + TT = Im[0]; + TP = W[0]; + T1T = T1Q - T1S; + T2V = T1Q + T1S; + T1U = Ts - Ty; + Tz = Ts + Ty; + TR = TP * TQ; + T25 = TP * TT; + } + TW = Ip[WS(rs, 4)]; + TZ = Im[WS(rs, 4)]; + TV = W[16]; + TS = W[1]; + TY = W[17]; + } + { + E T13, T16, T14, T2d, T19, T1c, T18, T15, T1b; + { + E T26, TU, T28, T10, T27, TX, T12; + T13 = Ip[WS(rs, 2)]; + T27 = TV * TZ; + TX = TV * TW; + T26 = FNMS(TS, TQ, T25); + TU = FMA(TS, TT, TR); + T28 = FNMS(TY, TW, T27); + T10 = FMA(TY, TZ, TX); + T16 = Im[WS(rs, 2)]; + T12 = W[8]; + T29 = T26 - T28; + T30 = T26 + T28; + T2c = TU - T10; + T11 = TU + T10; + T14 = T12 * T13; + T2d = T12 * T16; + } + T19 = Ip[WS(rs, 6)]; + T1c = Im[WS(rs, 6)]; + T18 = W[24]; + T15 = W[9]; + T1b = W[25]; + { + E T2e, T17, T2g, T1d, T2f, T1a, TA; + TB = Rp[WS(rs, 7)]; + T2f = T18 * T1c; + T1a = T18 * T19; + T2e = FNMS(T15, T13, T2d); + T17 = FMA(T15, T16, T14); + T2g = FNMS(T1b, T19, T2f); + T1d = FMA(T1b, T1c, T1a); + TE = Rm[WS(rs, 7)]; + TA = W[26]; + T2h = T2e - T2g; + T31 = T2e + T2g; + T2a = T17 - T1d; + T1e = T17 + T1d; + TC = TA * TB; + T1X = TA * TE; + } + TH = Rp[WS(rs, 3)]; + TK = Rm[WS(rs, 3)]; + TG = W[10]; + TD = W[27]; + TJ = W[11]; + } + } + } + { + E T2U, T3u, T2Z, T21, T1W, T34, T2X, T3f, T32, T3t, T1H, T3q, T3e, TO, T3g; + E T37, T3r, T3s, T3h, T3i; + { + E Tm, T1Y, TF, T20, TL, T3p, T1Z, TI; + T2U = T8 - Tl; + Tm = T8 + Tl; + T1Z = TG * TK; + TI = TG * TH; + T1Y = FNMS(TD, TB, T1X); + TF = FMA(TD, TE, TC); + T20 = FNMS(TJ, TH, T1Z); + TL = FMA(TJ, TK, TI); + T3p = T3k + T3o; + T3u = T3o - T3k; + { + E T1f, TM, T1G, T3j, T2W, TN; + T2Z = T11 - T1e; + T1f = T11 + T1e; + T21 = T1Y - T20; + T2W = T1Y + T20; + T1W = TF - TL; + TM = TF + TL; + T1G = T1s + T1F; + T34 = T1s - T1F; + T2X = T2V - T2W; + T3j = T2V + T2W; + T3f = T30 + T31; + T32 = T30 - T31; + T3t = TM - Tz; + TN = Tz + TM; + T3r = T1G - T1f; + T1H = T1f + T1G; + T3s = T3p - T3j; + T3q = T3j + T3p; + T3e = Tm - TN; + TO = Tm + TN; + T3g = T35 + T36; + T37 = T35 - T36; + } + } + Im[WS(rs, 3)] = T3r - T3s; + Ip[WS(rs, 4)] = T3r + T3s; + Rp[0] = TO + T1H; + Rm[WS(rs, 7)] = TO - T1H; + T3h = T3f - T3g; + T3i = T3f + T3g; + { + E T3a, T2Y, T3x, T3v, T3b, T33; + Ip[0] = T3i + T3q; + Im[WS(rs, 7)] = T3i - T3q; + Rp[WS(rs, 4)] = T3e + T3h; + Rm[WS(rs, 3)] = T3e - T3h; + T3a = T2U - T2X; + T2Y = T2U + T2X; + T3x = T3u - T3t; + T3v = T3t + T3u; + T3b = T32 - T2Z; + T33 = T2Z + T32; + { + E T2E, T1O, T3B, T3H, T2x, T2q, T3C, T23, T2S, T2O, T2K, T2J, T3I, T2H, T2B; + E T2j; + { + E T2F, T1V, T22, T2G, T3c, T38; + T2E = T1I + T1N; + T1O = T1I - T1N; + T3B = T3z - T3A; + T3H = T3A + T3z; + T3c = T34 + T37; + T38 = T34 - T37; + T2F = T1U + T1T; + T1V = T1T - T1U; + { + E T3d, T3w, T3y, T39; + T3d = T3b - T3c; + T3w = T3b + T3c; + T3y = T38 - T33; + T39 = T33 + T38; + Rp[WS(rs, 6)] = FMA(KP707106781, T3d, T3a); + Rm[WS(rs, 1)] = FNMS(KP707106781, T3d, T3a); + Ip[WS(rs, 2)] = FMA(KP707106781, T3w, T3v); + Im[WS(rs, 5)] = FMS(KP707106781, T3w, T3v); + Ip[WS(rs, 6)] = FMA(KP707106781, T3y, T3x); + Im[WS(rs, 1)] = FMS(KP707106781, T3y, T3x); + Rp[WS(rs, 2)] = FMA(KP707106781, T39, T2Y); + Rm[WS(rs, 5)] = FNMS(KP707106781, T39, T2Y); + T22 = T1W + T21; + T2G = T1W - T21; + } + { + E T2M, T2N, T2b, T2i; + T2x = T2r - T2w; + T2M = T2r + T2w; + T2N = T2p + T2k; + T2q = T2k - T2p; + T3C = T1V + T22; + T23 = T1V - T22; + T2S = FMA(KP414213562, T2M, T2N); + T2O = FNMS(KP414213562, T2N, T2M); + T2K = T29 - T2a; + T2b = T29 + T2a; + T2i = T2c - T2h; + T2J = T2c + T2h; + T3I = T2G - T2F; + T2H = T2F + T2G; + T2B = FNMS(KP414213562, T2b, T2i); + T2j = FMA(KP414213562, T2i, T2b); + } + } + { + E T2R, T2L, T3L, T3M; + { + E T2A, T24, T2C, T2y, T3J, T3K, T2D, T2z; + T2A = FNMS(KP707106781, T23, T1O); + T24 = FMA(KP707106781, T23, T1O); + T2R = FNMS(KP414213562, T2J, T2K); + T2L = FMA(KP414213562, T2K, T2J); + T2C = FNMS(KP414213562, T2q, T2x); + T2y = FMA(KP414213562, T2x, T2q); + T3J = FMA(KP707106781, T3I, T3H); + T3L = FNMS(KP707106781, T3I, T3H); + T3K = T2C - T2B; + T2D = T2B + T2C; + T3M = T2y - T2j; + T2z = T2j + T2y; + Ip[WS(rs, 3)] = FMA(KP923879532, T3K, T3J); + Im[WS(rs, 4)] = FMS(KP923879532, T3K, T3J); + Rp[WS(rs, 3)] = FMA(KP923879532, T2z, T24); + Rm[WS(rs, 4)] = FNMS(KP923879532, T2z, T24); + Rm[0] = FMA(KP923879532, T2D, T2A); + Rp[WS(rs, 7)] = FNMS(KP923879532, T2D, T2A); + } + { + E T2Q, T3D, T3E, T2T, T2I, T2P; + T2Q = FNMS(KP707106781, T2H, T2E); + T2I = FMA(KP707106781, T2H, T2E); + T2P = T2L + T2O; + T3G = T2O - T2L; + T3F = FNMS(KP707106781, T3C, T3B); + T3D = FMA(KP707106781, T3C, T3B); + Ip[WS(rs, 7)] = FMA(KP923879532, T3M, T3L); + Im[0] = FMS(KP923879532, T3M, T3L); + Rp[WS(rs, 1)] = FMA(KP923879532, T2P, T2I); + Rm[WS(rs, 6)] = FNMS(KP923879532, T2P, T2I); + T3E = T2R + T2S; + T2T = T2R - T2S; + Ip[WS(rs, 1)] = FMA(KP923879532, T3E, T3D); + Im[WS(rs, 6)] = FMS(KP923879532, T3E, T3D); + Rp[WS(rs, 5)] = FMA(KP923879532, T2T, T2Q); + Rm[WS(rs, 2)] = FNMS(KP923879532, T2T, T2Q); + } + } + } + } + } + } + Ip[WS(rs, 5)] = FMA(KP923879532, T3G, T3F); + Im[WS(rs, 2)] = FMS(KP923879532, T3G, T3F); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 16}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 16, "hc2cf_16", twinstr, &GENUS, {104, 30, 70, 0} }; + +void X(codelet_hc2cf_16) (planner *p) { + X(khc2c_register) (p, hc2cf_16, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -n 16 -dit -name hc2cf_16 -include hc2cf.h */ + +/* + * This function contains 174 FP additions, 84 FP multiplications, + * (or, 136 additions, 46 multiplications, 38 fused multiply/add), + * 52 stack variables, 3 constants, and 64 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 30, MAKE_VOLATILE_STRIDE(64, rs)) { + E T7, T37, T1t, T2U, Ti, T38, T1w, T2R, Tu, T2s, T1C, T2c, TF, T2t, T1H; + E T2d, T1f, T1q, T2B, T2C, T2D, T2E, T1Z, T2j, T24, T2k, TS, T13, T2w, T2x; + E T2y, T2z, T1O, T2g, T1T, T2h; + { + E T1, T2T, T6, T2S; + T1 = Rp[0]; + T2T = Rm[0]; + { + E T3, T5, T2, T4; + T3 = Rp[WS(rs, 4)]; + T5 = Rm[WS(rs, 4)]; + T2 = W[14]; + T4 = W[15]; + T6 = FMA(T2, T3, T4 * T5); + T2S = FNMS(T4, T3, T2 * T5); + } + T7 = T1 + T6; + T37 = T2T - T2S; + T1t = T1 - T6; + T2U = T2S + T2T; + } + { + E Tc, T1u, Th, T1v; + { + E T9, Tb, T8, Ta; + T9 = Rp[WS(rs, 2)]; + Tb = Rm[WS(rs, 2)]; + T8 = W[6]; + Ta = W[7]; + Tc = FMA(T8, T9, Ta * Tb); + T1u = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = Rp[WS(rs, 6)]; + Tg = Rm[WS(rs, 6)]; + Td = W[22]; + Tf = W[23]; + Th = FMA(Td, Te, Tf * Tg); + T1v = FNMS(Tf, Te, Td * Tg); + } + Ti = Tc + Th; + T38 = Tc - Th; + T1w = T1u - T1v; + T2R = T1u + T1v; + } + { + E To, T1y, Tt, T1z, T1A, T1B; + { + E Tl, Tn, Tk, Tm; + Tl = Rp[WS(rs, 1)]; + Tn = Rm[WS(rs, 1)]; + Tk = W[2]; + Tm = W[3]; + To = FMA(Tk, Tl, Tm * Tn); + T1y = FNMS(Tm, Tl, Tk * Tn); + } + { + E Tq, Ts, Tp, Tr; + Tq = Rp[WS(rs, 5)]; + Ts = Rm[WS(rs, 5)]; + Tp = W[18]; + Tr = W[19]; + Tt = FMA(Tp, Tq, Tr * Ts); + T1z = FNMS(Tr, Tq, Tp * Ts); + } + Tu = To + Tt; + T2s = T1y + T1z; + T1A = T1y - T1z; + T1B = To - Tt; + T1C = T1A - T1B; + T2c = T1B + T1A; + } + { + E Tz, T1E, TE, T1F, T1D, T1G; + { + E Tw, Ty, Tv, Tx; + Tw = Rp[WS(rs, 7)]; + Ty = Rm[WS(rs, 7)]; + Tv = W[26]; + Tx = W[27]; + Tz = FMA(Tv, Tw, Tx * Ty); + T1E = FNMS(Tx, Tw, Tv * Ty); + } + { + E TB, TD, TA, TC; + TB = Rp[WS(rs, 3)]; + TD = Rm[WS(rs, 3)]; + TA = W[10]; + TC = W[11]; + TE = FMA(TA, TB, TC * TD); + T1F = FNMS(TC, TB, TA * TD); + } + TF = Tz + TE; + T2t = T1E + T1F; + T1D = Tz - TE; + T1G = T1E - T1F; + T1H = T1D + T1G; + T2d = T1D - T1G; + } + { + E T19, T20, T1p, T1X, T1e, T21, T1k, T1W; + { + E T16, T18, T15, T17; + T16 = Ip[WS(rs, 7)]; + T18 = Im[WS(rs, 7)]; + T15 = W[28]; + T17 = W[29]; + T19 = FMA(T15, T16, T17 * T18); + T20 = FNMS(T17, T16, T15 * T18); + } + { + E T1m, T1o, T1l, T1n; + T1m = Ip[WS(rs, 5)]; + T1o = Im[WS(rs, 5)]; + T1l = W[20]; + T1n = W[21]; + T1p = FMA(T1l, T1m, T1n * T1o); + T1X = FNMS(T1n, T1m, T1l * T1o); + } + { + E T1b, T1d, T1a, T1c; + T1b = Ip[WS(rs, 3)]; + T1d = Im[WS(rs, 3)]; + T1a = W[12]; + T1c = W[13]; + T1e = FMA(T1a, T1b, T1c * T1d); + T21 = FNMS(T1c, T1b, T1a * T1d); + } + { + E T1h, T1j, T1g, T1i; + T1h = Ip[WS(rs, 1)]; + T1j = Im[WS(rs, 1)]; + T1g = W[4]; + T1i = W[5]; + T1k = FMA(T1g, T1h, T1i * T1j); + T1W = FNMS(T1i, T1h, T1g * T1j); + } + T1f = T19 + T1e; + T1q = T1k + T1p; + T2B = T1f - T1q; + T2C = T20 + T21; + T2D = T1W + T1X; + T2E = T2C - T2D; + { + E T1V, T1Y, T22, T23; + T1V = T19 - T1e; + T1Y = T1W - T1X; + T1Z = T1V - T1Y; + T2j = T1V + T1Y; + T22 = T20 - T21; + T23 = T1k - T1p; + T24 = T22 + T23; + T2k = T22 - T23; + } + } + { + E TM, T1K, T12, T1R, TR, T1L, TX, T1Q; + { + E TJ, TL, TI, TK; + TJ = Ip[0]; + TL = Im[0]; + TI = W[0]; + TK = W[1]; + TM = FMA(TI, TJ, TK * TL); + T1K = FNMS(TK, TJ, TI * TL); + } + { + E TZ, T11, TY, T10; + TZ = Ip[WS(rs, 6)]; + T11 = Im[WS(rs, 6)]; + TY = W[24]; + T10 = W[25]; + T12 = FMA(TY, TZ, T10 * T11); + T1R = FNMS(T10, TZ, TY * T11); + } + { + E TO, TQ, TN, TP; + TO = Ip[WS(rs, 4)]; + TQ = Im[WS(rs, 4)]; + TN = W[16]; + TP = W[17]; + TR = FMA(TN, TO, TP * TQ); + T1L = FNMS(TP, TO, TN * TQ); + } + { + E TU, TW, TT, TV; + TU = Ip[WS(rs, 2)]; + TW = Im[WS(rs, 2)]; + TT = W[8]; + TV = W[9]; + TX = FMA(TT, TU, TV * TW); + T1Q = FNMS(TV, TU, TT * TW); + } + TS = TM + TR; + T13 = TX + T12; + T2w = TS - T13; + T2x = T1K + T1L; + T2y = T1Q + T1R; + T2z = T2x - T2y; + { + E T1M, T1N, T1P, T1S; + T1M = T1K - T1L; + T1N = TX - T12; + T1O = T1M + T1N; + T2g = T1M - T1N; + T1P = TM - TR; + T1S = T1Q - T1R; + T1T = T1P - T1S; + T2h = T1P + T1S; + } + } + { + E T1J, T27, T3g, T3i, T26, T3h, T2a, T3d; + { + E T1x, T1I, T3e, T3f; + T1x = T1t - T1w; + T1I = KP707106781 * (T1C - T1H); + T1J = T1x + T1I; + T27 = T1x - T1I; + T3e = KP707106781 * (T2d - T2c); + T3f = T38 + T37; + T3g = T3e + T3f; + T3i = T3f - T3e; + } + { + E T1U, T25, T28, T29; + T1U = FMA(KP923879532, T1O, KP382683432 * T1T); + T25 = FNMS(KP923879532, T24, KP382683432 * T1Z); + T26 = T1U + T25; + T3h = T25 - T1U; + T28 = FNMS(KP923879532, T1T, KP382683432 * T1O); + T29 = FMA(KP382683432, T24, KP923879532 * T1Z); + T2a = T28 - T29; + T3d = T28 + T29; + } + Rm[WS(rs, 4)] = T1J - T26; + Im[WS(rs, 4)] = T3d - T3g; + Rp[WS(rs, 3)] = T1J + T26; + Ip[WS(rs, 3)] = T3d + T3g; + Rm[0] = T27 - T2a; + Im[0] = T3h - T3i; + Rp[WS(rs, 7)] = T27 + T2a; + Ip[WS(rs, 7)] = T3h + T3i; + } + { + E T2v, T2H, T32, T34, T2G, T33, T2K, T2Z; + { + E T2r, T2u, T30, T31; + T2r = T7 - Ti; + T2u = T2s - T2t; + T2v = T2r + T2u; + T2H = T2r - T2u; + T30 = TF - Tu; + T31 = T2U - T2R; + T32 = T30 + T31; + T34 = T31 - T30; + } + { + E T2A, T2F, T2I, T2J; + T2A = T2w + T2z; + T2F = T2B - T2E; + T2G = KP707106781 * (T2A + T2F); + T33 = KP707106781 * (T2F - T2A); + T2I = T2z - T2w; + T2J = T2B + T2E; + T2K = KP707106781 * (T2I - T2J); + T2Z = KP707106781 * (T2I + T2J); + } + Rm[WS(rs, 5)] = T2v - T2G; + Im[WS(rs, 5)] = T2Z - T32; + Rp[WS(rs, 2)] = T2v + T2G; + Ip[WS(rs, 2)] = T2Z + T32; + Rm[WS(rs, 1)] = T2H - T2K; + Im[WS(rs, 1)] = T33 - T34; + Rp[WS(rs, 6)] = T2H + T2K; + Ip[WS(rs, 6)] = T33 + T34; + } + { + E T2f, T2n, T3a, T3c, T2m, T3b, T2q, T35; + { + E T2b, T2e, T36, T39; + T2b = T1t + T1w; + T2e = KP707106781 * (T2c + T2d); + T2f = T2b + T2e; + T2n = T2b - T2e; + T36 = KP707106781 * (T1C + T1H); + T39 = T37 - T38; + T3a = T36 + T39; + T3c = T39 - T36; + } + { + E T2i, T2l, T2o, T2p; + T2i = FMA(KP382683432, T2g, KP923879532 * T2h); + T2l = FNMS(KP382683432, T2k, KP923879532 * T2j); + T2m = T2i + T2l; + T3b = T2l - T2i; + T2o = FNMS(KP382683432, T2h, KP923879532 * T2g); + T2p = FMA(KP923879532, T2k, KP382683432 * T2j); + T2q = T2o - T2p; + T35 = T2o + T2p; + } + Rm[WS(rs, 6)] = T2f - T2m; + Im[WS(rs, 6)] = T35 - T3a; + Rp[WS(rs, 1)] = T2f + T2m; + Ip[WS(rs, 1)] = T35 + T3a; + Rm[WS(rs, 2)] = T2n - T2q; + Im[WS(rs, 2)] = T3b - T3c; + Rp[WS(rs, 5)] = T2n + T2q; + Ip[WS(rs, 5)] = T3b + T3c; + } + { + E TH, T2L, T2W, T2Y, T1s, T2X, T2O, T2P; + { + E Tj, TG, T2Q, T2V; + Tj = T7 + Ti; + TG = Tu + TF; + TH = Tj + TG; + T2L = Tj - TG; + T2Q = T2s + T2t; + T2V = T2R + T2U; + T2W = T2Q + T2V; + T2Y = T2V - T2Q; + } + { + E T14, T1r, T2M, T2N; + T14 = TS + T13; + T1r = T1f + T1q; + T1s = T14 + T1r; + T2X = T1r - T14; + T2M = T2x + T2y; + T2N = T2C + T2D; + T2O = T2M - T2N; + T2P = T2M + T2N; + } + Rm[WS(rs, 7)] = TH - T1s; + Im[WS(rs, 7)] = T2P - T2W; + Rp[0] = TH + T1s; + Ip[0] = T2P + T2W; + Rm[WS(rs, 3)] = T2L - T2O; + Im[WS(rs, 3)] = T2X - T2Y; + Rp[WS(rs, 4)] = T2L + T2O; + Ip[WS(rs, 4)] = T2X + T2Y; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 16}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 16, "hc2cf_16", twinstr, &GENUS, {136, 46, 38, 0} }; + +void X(codelet_hc2cf_16) (planner *p) { + X(khc2c_register) (p, hc2cf_16, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:21 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 2 -dit -name hc2cf_2 -include hc2cf.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 4 additions, 2 multiplications, 2 fused multiply/add), + * 11 stack variables, 0 constants, and 8 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_2(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 2); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 2, MAKE_VOLATILE_STRIDE(8, rs)) { + E T1, Ta, T3, T6, T2, T5; + T1 = Rp[0]; + Ta = Rm[0]; + T3 = Ip[0]; + T6 = Im[0]; + T2 = W[0]; + T5 = W[1]; + { + E T8, T4, T9, T7; + T8 = T2 * T6; + T4 = T2 * T3; + T9 = FNMS(T5, T3, T8); + T7 = FMA(T5, T6, T4); + Ip[0] = T9 + Ta; + Im[0] = T9 - Ta; + Rp[0] = T1 + T7; + Rm[0] = T1 - T7; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 2}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 2, "hc2cf_2", twinstr, &GENUS, {4, 2, 2, 0} }; + +void X(codelet_hc2cf_2) (planner *p) { + X(khc2c_register) (p, hc2cf_2, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -n 2 -dit -name hc2cf_2 -include hc2cf.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 4 additions, 2 multiplications, 2 fused multiply/add), + * 9 stack variables, 0 constants, and 8 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_2(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 2); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 2, MAKE_VOLATILE_STRIDE(8, rs)) { + E T1, T8, T6, T7; + T1 = Rp[0]; + T8 = Rm[0]; + { + E T3, T5, T2, T4; + T3 = Ip[0]; + T5 = Im[0]; + T2 = W[0]; + T4 = W[1]; + T6 = FMA(T2, T3, T4 * T5); + T7 = FNMS(T4, T3, T2 * T5); + } + Rm[0] = T1 - T6; + Im[0] = T7 - T8; + Rp[0] = T1 + T6; + Ip[0] = T7 + T8; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 2}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 2, "hc2cf_2", twinstr, &GENUS, {4, 2, 2, 0} }; + +void X(codelet_hc2cf_2) (planner *p) { + X(khc2c_register) (p, hc2cf_2, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1029 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:23 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 20 -dit -name hc2cf_20 -include hc2cf.h */ + +/* + * This function contains 246 FP additions, 148 FP multiplications, + * (or, 136 additions, 38 multiplications, 110 fused multiply/add), + * 97 stack variables, 4 constants, and 80 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 38, MAKE_VOLATILE_STRIDE(80, rs)) { + E T4P, T4Y, T50, T4U, T4S, T4T, T4Z, T4V; + { + E T4N, T4r, T8, T2i, T4n, T2n, T4O, Tl, T2v, T3v, T3T, T4f, TN, T2b, T3F; + E T3p, T2R, T3z, T43, T4b, T27, T2f, T3J, T33, T2K, T3y, T40, T4c, T1G, T2e; + E T3I, T3a, T2C, T3w, T3W, T4e, T1e, T2c, T3G, T3i; + { + E T1, T4q, T3, T6, T2, T5; + T1 = Rp[0]; + T4q = Rm[0]; + T3 = Rp[WS(rs, 5)]; + T6 = Rm[WS(rs, 5)]; + T2 = W[18]; + T5 = W[19]; + { + E Ta, Td, Tg, T2j, Tb, Tj, Tf, Tc, Ti; + { + E T4o, T4, T9, T4p, T7; + Ta = Ip[WS(rs, 2)]; + Td = Im[WS(rs, 2)]; + T4o = T2 * T6; + T4 = T2 * T3; + T9 = W[8]; + Tg = Ip[WS(rs, 7)]; + T4p = FNMS(T5, T3, T4o); + T7 = FMA(T5, T6, T4); + T2j = T9 * Td; + Tb = T9 * Ta; + T4N = T4q - T4p; + T4r = T4p + T4q; + T8 = T1 + T7; + T2i = T1 - T7; + Tj = Im[WS(rs, 7)]; + Tf = W[28]; + } + Tc = W[9]; + Ti = W[29]; + { + E T3l, Ts, T2t, TL, TB, TE, TD, T3n, Ty, T2q, TC; + { + E TH, TK, TJ, T2s, TI; + { + E To, Tr, Tp, T3k, Tq, TG; + { + E T2k, Te, T2m, Tk, T2l, Th, Tn; + To = Rp[WS(rs, 2)]; + T2l = Tf * Tj; + Th = Tf * Tg; + T2k = FNMS(Tc, Ta, T2j); + Te = FMA(Tc, Td, Tb); + T2m = FNMS(Ti, Tg, T2l); + Tk = FMA(Ti, Tj, Th); + Tr = Rm[WS(rs, 2)]; + Tn = W[6]; + T4n = T2k + T2m; + T2n = T2k - T2m; + T4O = Te - Tk; + Tl = Te + Tk; + Tp = Tn * To; + T3k = Tn * Tr; + } + Tq = W[7]; + TH = Ip[WS(rs, 9)]; + TK = Im[WS(rs, 9)]; + TG = W[36]; + T3l = FNMS(Tq, To, T3k); + Ts = FMA(Tq, Tr, Tp); + TJ = W[37]; + T2s = TG * TK; + TI = TG * TH; + } + { + E Tu, Tx, Tt, Tw, T3m, Tv, TA; + Tu = Rp[WS(rs, 7)]; + Tx = Rm[WS(rs, 7)]; + T2t = FNMS(TJ, TH, T2s); + TL = FMA(TJ, TK, TI); + Tt = W[26]; + Tw = W[27]; + TB = Ip[WS(rs, 4)]; + TE = Im[WS(rs, 4)]; + T3m = Tt * Tx; + Tv = Tt * Tu; + TA = W[16]; + TD = W[17]; + T3n = FNMS(Tw, Tu, T3m); + Ty = FMA(Tw, Tx, Tv); + T2q = TA * TE; + TC = TA * TB; + } + } + { + E T3o, T3R, Tz, T2p, T2r, TF; + T3o = T3l - T3n; + T3R = T3l + T3n; + Tz = Ts + Ty; + T2p = Ts - Ty; + T2r = FNMS(TD, TB, T2q); + TF = FMA(TD, TE, TC); + { + E T3S, T2u, TM, T3j; + T3S = T2r + T2t; + T2u = T2r - T2t; + TM = TF + TL; + T3j = TL - TF; + T2v = T2p - T2u; + T3v = T2p + T2u; + T3T = T3R + T3S; + T4f = T3S - T3R; + TN = Tz - TM; + T2b = Tz + TM; + T3F = T3o + T3j; + T3p = T3j - T3o; + } + } + } + } + } + { + E T2Z, T1M, T2P, T25, T1V, T1Y, T1X, T31, T1S, T2M, T1W; + { + E T21, T24, T23, T2O, T22; + { + E T1I, T1L, T1H, T1K, T2Y, T1J, T20; + T1I = Rp[WS(rs, 6)]; + T1L = Rm[WS(rs, 6)]; + T1H = W[22]; + T1K = W[23]; + T21 = Ip[WS(rs, 3)]; + T24 = Im[WS(rs, 3)]; + T2Y = T1H * T1L; + T1J = T1H * T1I; + T20 = W[12]; + T23 = W[13]; + T2Z = FNMS(T1K, T1I, T2Y); + T1M = FMA(T1K, T1L, T1J); + T2O = T20 * T24; + T22 = T20 * T21; + } + { + E T1O, T1R, T1N, T1Q, T30, T1P, T1U; + T1O = Rp[WS(rs, 1)]; + T1R = Rm[WS(rs, 1)]; + T2P = FNMS(T23, T21, T2O); + T25 = FMA(T23, T24, T22); + T1N = W[2]; + T1Q = W[3]; + T1V = Ip[WS(rs, 8)]; + T1Y = Im[WS(rs, 8)]; + T30 = T1N * T1R; + T1P = T1N * T1O; + T1U = W[32]; + T1X = W[33]; + T31 = FNMS(T1Q, T1O, T30); + T1S = FMA(T1Q, T1R, T1P); + T2M = T1U * T1Y; + T1W = T1U * T1V; + } + } + { + E T32, T41, T1T, T2L, T2N, T1Z; + T32 = T2Z - T31; + T41 = T2Z + T31; + T1T = T1M + T1S; + T2L = T1M - T1S; + T2N = FNMS(T1X, T1V, T2M); + T1Z = FMA(T1X, T1Y, T1W); + { + E T42, T2Q, T26, T2X; + T42 = T2N + T2P; + T2Q = T2N - T2P; + T26 = T1Z + T25; + T2X = T25 - T1Z; + T2R = T2L - T2Q; + T3z = T2L + T2Q; + T43 = T41 + T42; + T4b = T42 - T41; + T27 = T1T - T26; + T2f = T1T + T26; + T3J = T32 + T2X; + T33 = T2X - T32; + } + } + } + { + E T36, T1l, T2I, T1E, T1u, T1x, T1w, T38, T1r, T2F, T1v; + { + E T1A, T1D, T1C, T2H, T1B; + { + E T1h, T1k, T1g, T1j, T35, T1i, T1z; + T1h = Rp[WS(rs, 4)]; + T1k = Rm[WS(rs, 4)]; + T1g = W[14]; + T1j = W[15]; + T1A = Ip[WS(rs, 1)]; + T1D = Im[WS(rs, 1)]; + T35 = T1g * T1k; + T1i = T1g * T1h; + T1z = W[4]; + T1C = W[5]; + T36 = FNMS(T1j, T1h, T35); + T1l = FMA(T1j, T1k, T1i); + T2H = T1z * T1D; + T1B = T1z * T1A; + } + { + E T1n, T1q, T1m, T1p, T37, T1o, T1t; + T1n = Rp[WS(rs, 9)]; + T1q = Rm[WS(rs, 9)]; + T2I = FNMS(T1C, T1A, T2H); + T1E = FMA(T1C, T1D, T1B); + T1m = W[34]; + T1p = W[35]; + T1u = Ip[WS(rs, 6)]; + T1x = Im[WS(rs, 6)]; + T37 = T1m * T1q; + T1o = T1m * T1n; + T1t = W[24]; + T1w = W[25]; + T38 = FNMS(T1p, T1n, T37); + T1r = FMA(T1p, T1q, T1o); + T2F = T1t * T1x; + T1v = T1t * T1u; + } + } + { + E T39, T3Y, T1s, T2E, T2G, T1y; + T39 = T36 - T38; + T3Y = T36 + T38; + T1s = T1l + T1r; + T2E = T1l - T1r; + T2G = FNMS(T1w, T1u, T2F); + T1y = FMA(T1w, T1x, T1v); + { + E T3Z, T2J, T1F, T34; + T3Z = T2G + T2I; + T2J = T2G - T2I; + T1F = T1y + T1E; + T34 = T1E - T1y; + T2K = T2E - T2J; + T3y = T2E + T2J; + T40 = T3Y + T3Z; + T4c = T3Z - T3Y; + T1G = T1s - T1F; + T2e = T1s + T1F; + T3I = T39 + T34; + T3a = T34 - T39; + } + } + } + { + E T3e, TT, T2A, T1c, T12, T15, T14, T3g, TZ, T2x, T13; + { + E T18, T1b, T1a, T2z, T19; + { + E TP, TS, TO, TR, T3d, TQ, T17; + TP = Rp[WS(rs, 8)]; + TS = Rm[WS(rs, 8)]; + TO = W[30]; + TR = W[31]; + T18 = Ip[WS(rs, 5)]; + T1b = Im[WS(rs, 5)]; + T3d = TO * TS; + TQ = TO * TP; + T17 = W[20]; + T1a = W[21]; + T3e = FNMS(TR, TP, T3d); + TT = FMA(TR, TS, TQ); + T2z = T17 * T1b; + T19 = T17 * T18; + } + { + E TV, TY, TU, TX, T3f, TW, T11; + TV = Rp[WS(rs, 3)]; + TY = Rm[WS(rs, 3)]; + T2A = FNMS(T1a, T18, T2z); + T1c = FMA(T1a, T1b, T19); + TU = W[10]; + TX = W[11]; + T12 = Ip[0]; + T15 = Im[0]; + T3f = TU * TY; + TW = TU * TV; + T11 = W[0]; + T14 = W[1]; + T3g = FNMS(TX, TV, T3f); + TZ = FMA(TX, TY, TW); + T2x = T11 * T15; + T13 = T11 * T12; + } + } + { + E T3h, T3U, T10, T2w, T2y, T16; + T3h = T3e - T3g; + T3U = T3e + T3g; + T10 = TT + TZ; + T2w = TT - TZ; + T2y = FNMS(T14, T12, T2x); + T16 = FMA(T14, T15, T13); + { + E T3V, T2B, T1d, T3c; + T3V = T2y + T2A; + T2B = T2y - T2A; + T1d = T16 + T1c; + T3c = T1c - T16; + T2C = T2w - T2B; + T3w = T2w + T2B; + T3W = T3U + T3V; + T4e = T3V - T3U; + T1e = T10 - T1d; + T2c = T10 + T1d; + T3G = T3h + T3c; + T3i = T3c - T3h; + } + } + } + { + E T4s, T4k, T4l, T45, T47, T3P, T4y, T4A, T3O; + { + E T4C, T4B, T2a, T4j, T4h, T4E, T4M, T4K, T4i, T4a; + { + E Tm, T1f, T4J, T4I, T28, T4d, T4g, T29, T49, T48; + T4C = T4c + T4b; + T4d = T4b - T4c; + T4g = T4e - T4f; + T4B = T4f + T4e; + T2a = T8 + Tl; + Tm = T8 - Tl; + T1f = TN + T1e; + T4J = T1e - TN; + T4I = T1G - T27; + T28 = T1G + T27; + T4j = FMA(KP618033988, T4d, T4g); + T4h = FNMS(KP618033988, T4g, T4d); + T29 = T1f + T28; + T49 = T1f - T28; + T4E = T4r - T4n; + T4s = T4n + T4r; + Rm[WS(rs, 9)] = Tm + T29; + T48 = FNMS(KP250000000, T29, Tm); + T4M = FNMS(KP618033988, T4I, T4J); + T4K = FMA(KP618033988, T4J, T4I); + T4i = FMA(KP559016994, T49, T48); + T4a = FNMS(KP559016994, T49, T48); + } + { + E T2d, T4w, T4x, T2g, T2h; + { + E T3X, T4G, T4F, T44, T4D, T4L, T4H; + T4k = T3T + T3W; + T3X = T3T - T3W; + T4G = T4C - T4B; + T4D = T4B + T4C; + Rm[WS(rs, 1)] = FMA(KP951056516, T4h, T4a); + Rp[WS(rs, 2)] = FNMS(KP951056516, T4h, T4a); + Rp[WS(rs, 6)] = FMA(KP951056516, T4j, T4i); + Rm[WS(rs, 5)] = FNMS(KP951056516, T4j, T4i); + Im[WS(rs, 9)] = T4D - T4E; + T4F = FMA(KP250000000, T4D, T4E); + T44 = T40 - T43; + T4l = T40 + T43; + T2d = T2b + T2c; + T4w = T2b - T2c; + T4L = FMA(KP559016994, T4G, T4F); + T4H = FNMS(KP559016994, T4G, T4F); + T45 = FMA(KP618033988, T44, T3X); + T47 = FNMS(KP618033988, T3X, T44); + Ip[WS(rs, 2)] = FMA(KP951056516, T4K, T4H); + Im[WS(rs, 1)] = FMS(KP951056516, T4K, T4H); + Ip[WS(rs, 6)] = FMA(KP951056516, T4M, T4L); + Im[WS(rs, 5)] = FMS(KP951056516, T4M, T4L); + T4x = T2f - T2e; + T2g = T2e + T2f; + } + T2h = T2d + T2g; + T3P = T2d - T2g; + T4y = FNMS(KP618033988, T4x, T4w); + T4A = FMA(KP618033988, T4w, T4x); + Rp[0] = T2a + T2h; + T3O = FNMS(KP250000000, T2h, T2a); + } + } + { + E T3u, T54, T5a, T5c, T56, T53; + { + E T52, T51, T3t, T3r, T2o, T58, T59, T2T, T2V, T4u, T4t, T2U, T3s, T2W; + { + E T3b, T3q, T46, T3Q, T4m; + T52 = T3a + T33; + T3b = T33 - T3a; + T3q = T3i - T3p; + T51 = T3p + T3i; + T46 = FNMS(KP559016994, T3P, T3O); + T3Q = FMA(KP559016994, T3P, T3O); + T4m = T4k + T4l; + T4u = T4k - T4l; + Rm[WS(rs, 3)] = FMA(KP951056516, T45, T3Q); + Rp[WS(rs, 4)] = FNMS(KP951056516, T45, T3Q); + Rp[WS(rs, 8)] = FMA(KP951056516, T47, T46); + Rm[WS(rs, 7)] = FNMS(KP951056516, T47, T46); + Ip[0] = T4m + T4s; + T4t = FNMS(KP250000000, T4m, T4s); + T3t = FMA(KP618033988, T3b, T3q); + T3r = FNMS(KP618033988, T3q, T3b); + } + T3u = T2i + T2n; + T2o = T2i - T2n; + { + E T4v, T4z, T2D, T2S; + T4v = FMA(KP559016994, T4u, T4t); + T4z = FNMS(KP559016994, T4u, T4t); + T2D = T2v + T2C; + T58 = T2v - T2C; + T59 = T2K - T2R; + T2S = T2K + T2R; + Ip[WS(rs, 4)] = FMA(KP951056516, T4y, T4v); + Im[WS(rs, 3)] = FMS(KP951056516, T4y, T4v); + Ip[WS(rs, 8)] = FMA(KP951056516, T4A, T4z); + Im[WS(rs, 7)] = FMS(KP951056516, T4A, T4z); + T2T = T2D + T2S; + T2V = T2D - T2S; + } + Rm[WS(rs, 4)] = T2o + T2T; + T2U = FNMS(KP250000000, T2T, T2o); + T54 = T4O + T4N; + T4P = T4N - T4O; + T5a = FMA(KP618033988, T59, T58); + T5c = FNMS(KP618033988, T58, T59); + T3s = FMA(KP559016994, T2V, T2U); + T2W = FNMS(KP559016994, T2V, T2U); + Rp[WS(rs, 7)] = FNMS(KP951056516, T3r, T2W); + Rp[WS(rs, 3)] = FMA(KP951056516, T3r, T2W); + Rm[0] = FNMS(KP951056516, T3t, T3s); + Rm[WS(rs, 8)] = FMA(KP951056516, T3t, T3s); + T56 = T51 - T52; + T53 = T51 + T52; + } + { + E T4Q, T4R, T3N, T3L, T4X, T4W, T3B, T3D, T3H, T3K, T55, T3C, T3M, T3E; + T4Q = T3F + T3G; + T3H = T3F - T3G; + T3K = T3I - T3J; + T4R = T3I + T3J; + Im[WS(rs, 4)] = T53 - T54; + T55 = FMA(KP250000000, T53, T54); + T3N = FNMS(KP618033988, T3H, T3K); + T3L = FMA(KP618033988, T3K, T3H); + { + E T57, T5b, T3x, T3A; + T57 = FNMS(KP559016994, T56, T55); + T5b = FMA(KP559016994, T56, T55); + T3x = T3v + T3w; + T4X = T3v - T3w; + T4W = T3y - T3z; + T3A = T3y + T3z; + Im[0] = -(FMA(KP951056516, T5a, T57)); + Im[WS(rs, 8)] = FMS(KP951056516, T5a, T57); + Ip[WS(rs, 7)] = FMA(KP951056516, T5c, T5b); + Ip[WS(rs, 3)] = FNMS(KP951056516, T5c, T5b); + T3B = T3x + T3A; + T3D = T3x - T3A; + } + Rp[WS(rs, 5)] = T3u + T3B; + T3C = FNMS(KP250000000, T3B, T3u); + T4Y = FNMS(KP618033988, T4X, T4W); + T50 = FMA(KP618033988, T4W, T4X); + T3M = FNMS(KP559016994, T3D, T3C); + T3E = FMA(KP559016994, T3D, T3C); + Rp[WS(rs, 9)] = FNMS(KP951056516, T3L, T3E); + Rp[WS(rs, 1)] = FMA(KP951056516, T3L, T3E); + Rm[WS(rs, 2)] = FNMS(KP951056516, T3N, T3M); + Rm[WS(rs, 6)] = FMA(KP951056516, T3N, T3M); + T4U = T4Q - T4R; + T4S = T4Q + T4R; + } + } + } + } + Ip[WS(rs, 5)] = T4S + T4P; + T4T = FNMS(KP250000000, T4S, T4P); + T4Z = FMA(KP559016994, T4U, T4T); + T4V = FNMS(KP559016994, T4U, T4T); + Im[WS(rs, 2)] = -(FMA(KP951056516, T4Y, T4V)); + Im[WS(rs, 6)] = FMS(KP951056516, T4Y, T4V); + Ip[WS(rs, 9)] = FMA(KP951056516, T50, T4Z); + Ip[WS(rs, 1)] = FNMS(KP951056516, T50, T4Z); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 20}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 20, "hc2cf_20", twinstr, &GENUS, {136, 38, 110, 0} }; + +void X(codelet_hc2cf_20) (planner *p) { + X(khc2c_register) (p, hc2cf_20, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -n 20 -dit -name hc2cf_20 -include hc2cf.h */ + +/* + * This function contains 246 FP additions, 124 FP multiplications, + * (or, 184 additions, 62 multiplications, 62 fused multiply/add), + * 85 stack variables, 4 constants, and 80 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 38, MAKE_VOLATILE_STRIDE(80, rs)) { + E Tj, T1R, T4j, T4s, T2q, T37, T3Q, T42, T1r, T1O, T1P, T3p, T3s, T3K, T3A; + E T3B, T3Z, T1V, T1W, T1X, T23, T28, T4q, T2W, T2X, T4f, T33, T34, T35, T2G; + E T2L, T2M, TG, T13, T14, T3i, T3l, T3J, T3D, T3E, T40, T1S, T1T, T1U, T2e; + E T2j, T4p, T2T, T2U, T4e, T30, T31, T32, T2v, T2A, T2B; + { + E T1, T3O, T6, T3N, Tc, T2n, Th, T2o; + T1 = Rp[0]; + T3O = Rm[0]; + { + E T3, T5, T2, T4; + T3 = Rp[WS(rs, 5)]; + T5 = Rm[WS(rs, 5)]; + T2 = W[18]; + T4 = W[19]; + T6 = FMA(T2, T3, T4 * T5); + T3N = FNMS(T4, T3, T2 * T5); + } + { + E T9, Tb, T8, Ta; + T9 = Ip[WS(rs, 2)]; + Tb = Im[WS(rs, 2)]; + T8 = W[8]; + Ta = W[9]; + Tc = FMA(T8, T9, Ta * Tb); + T2n = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = Ip[WS(rs, 7)]; + Tg = Im[WS(rs, 7)]; + Td = W[28]; + Tf = W[29]; + Th = FMA(Td, Te, Tf * Tg); + T2o = FNMS(Tf, Te, Td * Tg); + } + { + E T7, Ti, T4h, T4i; + T7 = T1 + T6; + Ti = Tc + Th; + Tj = T7 - Ti; + T1R = T7 + Ti; + T4h = T3O - T3N; + T4i = Tc - Th; + T4j = T4h - T4i; + T4s = T4i + T4h; + } + { + E T2m, T2p, T3M, T3P; + T2m = T1 - T6; + T2p = T2n - T2o; + T2q = T2m - T2p; + T37 = T2m + T2p; + T3M = T2n + T2o; + T3P = T3N + T3O; + T3Q = T3M + T3P; + T42 = T3P - T3M; + } + } + { + E T1f, T3n, T21, T2C, T1N, T3r, T27, T2K, T1q, T3o, T22, T2F, T1C, T3q, T26; + E T2H; + { + E T19, T1Z, T1e, T20; + { + E T16, T18, T15, T17; + T16 = Rp[WS(rs, 4)]; + T18 = Rm[WS(rs, 4)]; + T15 = W[14]; + T17 = W[15]; + T19 = FMA(T15, T16, T17 * T18); + T1Z = FNMS(T17, T16, T15 * T18); + } + { + E T1b, T1d, T1a, T1c; + T1b = Rp[WS(rs, 9)]; + T1d = Rm[WS(rs, 9)]; + T1a = W[34]; + T1c = W[35]; + T1e = FMA(T1a, T1b, T1c * T1d); + T20 = FNMS(T1c, T1b, T1a * T1d); + } + T1f = T19 + T1e; + T3n = T1Z + T20; + T21 = T1Z - T20; + T2C = T19 - T1e; + } + { + E T1H, T2I, T1M, T2J; + { + E T1E, T1G, T1D, T1F; + T1E = Ip[WS(rs, 8)]; + T1G = Im[WS(rs, 8)]; + T1D = W[32]; + T1F = W[33]; + T1H = FMA(T1D, T1E, T1F * T1G); + T2I = FNMS(T1F, T1E, T1D * T1G); + } + { + E T1J, T1L, T1I, T1K; + T1J = Ip[WS(rs, 3)]; + T1L = Im[WS(rs, 3)]; + T1I = W[12]; + T1K = W[13]; + T1M = FMA(T1I, T1J, T1K * T1L); + T2J = FNMS(T1K, T1J, T1I * T1L); + } + T1N = T1H + T1M; + T3r = T2I + T2J; + T27 = T1H - T1M; + T2K = T2I - T2J; + } + { + E T1k, T2D, T1p, T2E; + { + E T1h, T1j, T1g, T1i; + T1h = Ip[WS(rs, 6)]; + T1j = Im[WS(rs, 6)]; + T1g = W[24]; + T1i = W[25]; + T1k = FMA(T1g, T1h, T1i * T1j); + T2D = FNMS(T1i, T1h, T1g * T1j); + } + { + E T1m, T1o, T1l, T1n; + T1m = Ip[WS(rs, 1)]; + T1o = Im[WS(rs, 1)]; + T1l = W[4]; + T1n = W[5]; + T1p = FMA(T1l, T1m, T1n * T1o); + T2E = FNMS(T1n, T1m, T1l * T1o); + } + T1q = T1k + T1p; + T3o = T2D + T2E; + T22 = T1k - T1p; + T2F = T2D - T2E; + } + { + E T1w, T24, T1B, T25; + { + E T1t, T1v, T1s, T1u; + T1t = Rp[WS(rs, 6)]; + T1v = Rm[WS(rs, 6)]; + T1s = W[22]; + T1u = W[23]; + T1w = FMA(T1s, T1t, T1u * T1v); + T24 = FNMS(T1u, T1t, T1s * T1v); + } + { + E T1y, T1A, T1x, T1z; + T1y = Rp[WS(rs, 1)]; + T1A = Rm[WS(rs, 1)]; + T1x = W[2]; + T1z = W[3]; + T1B = FMA(T1x, T1y, T1z * T1A); + T25 = FNMS(T1z, T1y, T1x * T1A); + } + T1C = T1w + T1B; + T3q = T24 + T25; + T26 = T24 - T25; + T2H = T1w - T1B; + } + T1r = T1f - T1q; + T1O = T1C - T1N; + T1P = T1r + T1O; + T3p = T3n + T3o; + T3s = T3q + T3r; + T3K = T3p + T3s; + T3A = T3n - T3o; + T3B = T3r - T3q; + T3Z = T3B - T3A; + T1V = T1f + T1q; + T1W = T1C + T1N; + T1X = T1V + T1W; + T23 = T21 + T22; + T28 = T26 + T27; + T4q = T23 + T28; + T2W = T21 - T22; + T2X = T26 - T27; + T4f = T2W + T2X; + T33 = T2C + T2F; + T34 = T2H + T2K; + T35 = T33 + T34; + T2G = T2C - T2F; + T2L = T2H - T2K; + T2M = T2G + T2L; + } + { + E Tu, T3g, T2c, T2r, T12, T3k, T2f, T2z, TF, T3h, T2d, T2u, TR, T3j, T2i; + E T2w; + { + E To, T2a, Tt, T2b; + { + E Tl, Tn, Tk, Tm; + Tl = Rp[WS(rs, 2)]; + Tn = Rm[WS(rs, 2)]; + Tk = W[6]; + Tm = W[7]; + To = FMA(Tk, Tl, Tm * Tn); + T2a = FNMS(Tm, Tl, Tk * Tn); + } + { + E Tq, Ts, Tp, Tr; + Tq = Rp[WS(rs, 7)]; + Ts = Rm[WS(rs, 7)]; + Tp = W[26]; + Tr = W[27]; + Tt = FMA(Tp, Tq, Tr * Ts); + T2b = FNMS(Tr, Tq, Tp * Ts); + } + Tu = To + Tt; + T3g = T2a + T2b; + T2c = T2a - T2b; + T2r = To - Tt; + } + { + E TW, T2x, T11, T2y; + { + E TT, TV, TS, TU; + TT = Ip[0]; + TV = Im[0]; + TS = W[0]; + TU = W[1]; + TW = FMA(TS, TT, TU * TV); + T2x = FNMS(TU, TT, TS * TV); + } + { + E TY, T10, TX, TZ; + TY = Ip[WS(rs, 5)]; + T10 = Im[WS(rs, 5)]; + TX = W[20]; + TZ = W[21]; + T11 = FMA(TX, TY, TZ * T10); + T2y = FNMS(TZ, TY, TX * T10); + } + T12 = TW + T11; + T3k = T2x + T2y; + T2f = T11 - TW; + T2z = T2x - T2y; + } + { + E Tz, T2s, TE, T2t; + { + E Tw, Ty, Tv, Tx; + Tw = Ip[WS(rs, 4)]; + Ty = Im[WS(rs, 4)]; + Tv = W[16]; + Tx = W[17]; + Tz = FMA(Tv, Tw, Tx * Ty); + T2s = FNMS(Tx, Tw, Tv * Ty); + } + { + E TB, TD, TA, TC; + TB = Ip[WS(rs, 9)]; + TD = Im[WS(rs, 9)]; + TA = W[36]; + TC = W[37]; + TE = FMA(TA, TB, TC * TD); + T2t = FNMS(TC, TB, TA * TD); + } + TF = Tz + TE; + T3h = T2s + T2t; + T2d = Tz - TE; + T2u = T2s - T2t; + } + { + E TL, T2g, TQ, T2h; + { + E TI, TK, TH, TJ; + TI = Rp[WS(rs, 8)]; + TK = Rm[WS(rs, 8)]; + TH = W[30]; + TJ = W[31]; + TL = FMA(TH, TI, TJ * TK); + T2g = FNMS(TJ, TI, TH * TK); + } + { + E TN, TP, TM, TO; + TN = Rp[WS(rs, 3)]; + TP = Rm[WS(rs, 3)]; + TM = W[10]; + TO = W[11]; + TQ = FMA(TM, TN, TO * TP); + T2h = FNMS(TO, TN, TM * TP); + } + TR = TL + TQ; + T3j = T2g + T2h; + T2i = T2g - T2h; + T2w = TL - TQ; + } + TG = Tu - TF; + T13 = TR - T12; + T14 = TG + T13; + T3i = T3g + T3h; + T3l = T3j + T3k; + T3J = T3i + T3l; + T3D = T3g - T3h; + T3E = T3j - T3k; + T40 = T3D + T3E; + T1S = Tu + TF; + T1T = TR + T12; + T1U = T1S + T1T; + T2e = T2c + T2d; + T2j = T2f - T2i; + T4p = T2j - T2e; + T2T = T2c - T2d; + T2U = T2i + T2f; + T4e = T2T + T2U; + T30 = T2r + T2u; + T31 = T2w + T2z; + T32 = T30 + T31; + T2v = T2r - T2u; + T2A = T2w - T2z; + T2B = T2v + T2A; + } + { + E T3y, T1Q, T3x, T3G, T3I, T3C, T3F, T3H, T3z; + T3y = KP559016994 * (T14 - T1P); + T1Q = T14 + T1P; + T3x = FNMS(KP250000000, T1Q, Tj); + T3C = T3A + T3B; + T3F = T3D - T3E; + T3G = FNMS(KP587785252, T3F, KP951056516 * T3C); + T3I = FMA(KP951056516, T3F, KP587785252 * T3C); + Rm[WS(rs, 9)] = Tj + T1Q; + T3H = T3y + T3x; + Rm[WS(rs, 5)] = T3H - T3I; + Rp[WS(rs, 6)] = T3H + T3I; + T3z = T3x - T3y; + Rp[WS(rs, 2)] = T3z - T3G; + Rm[WS(rs, 1)] = T3z + T3G; + } + { + E T47, T41, T46, T45, T49, T43, T44, T4a, T48; + T47 = KP559016994 * (T40 + T3Z); + T41 = T3Z - T40; + T46 = FMA(KP250000000, T41, T42); + T43 = T13 - TG; + T44 = T1r - T1O; + T45 = FMA(KP587785252, T43, KP951056516 * T44); + T49 = FNMS(KP587785252, T44, KP951056516 * T43); + Im[WS(rs, 9)] = T41 - T42; + T4a = T47 + T46; + Im[WS(rs, 5)] = T49 - T4a; + Ip[WS(rs, 6)] = T49 + T4a; + T48 = T46 - T47; + Im[WS(rs, 1)] = T45 - T48; + Ip[WS(rs, 2)] = T45 + T48; + } + { + E T3d, T1Y, T3e, T3u, T3w, T3m, T3t, T3v, T3f; + T3d = KP559016994 * (T1U - T1X); + T1Y = T1U + T1X; + T3e = FNMS(KP250000000, T1Y, T1R); + T3m = T3i - T3l; + T3t = T3p - T3s; + T3u = FMA(KP951056516, T3m, KP587785252 * T3t); + T3w = FNMS(KP587785252, T3m, KP951056516 * T3t); + Rp[0] = T1R + T1Y; + T3v = T3e - T3d; + Rm[WS(rs, 7)] = T3v - T3w; + Rp[WS(rs, 8)] = T3v + T3w; + T3f = T3d + T3e; + Rp[WS(rs, 4)] = T3f - T3u; + Rm[WS(rs, 3)] = T3f + T3u; + } + { + E T3U, T3L, T3V, T3T, T3X, T3R, T3S, T3Y, T3W; + T3U = KP559016994 * (T3J - T3K); + T3L = T3J + T3K; + T3V = FNMS(KP250000000, T3L, T3Q); + T3R = T1S - T1T; + T3S = T1V - T1W; + T3T = FMA(KP951056516, T3R, KP587785252 * T3S); + T3X = FNMS(KP951056516, T3S, KP587785252 * T3R); + Ip[0] = T3L + T3Q; + T3Y = T3V - T3U; + Im[WS(rs, 7)] = T3X - T3Y; + Ip[WS(rs, 8)] = T3X + T3Y; + T3W = T3U + T3V; + Im[WS(rs, 3)] = T3T - T3W; + Ip[WS(rs, 4)] = T3T + T3W; + } + { + E T2P, T2N, T2O, T2l, T2R, T29, T2k, T2S, T2Q; + T2P = KP559016994 * (T2B - T2M); + T2N = T2B + T2M; + T2O = FNMS(KP250000000, T2N, T2q); + T29 = T23 - T28; + T2k = T2e + T2j; + T2l = FNMS(KP587785252, T2k, KP951056516 * T29); + T2R = FMA(KP951056516, T2k, KP587785252 * T29); + Rm[WS(rs, 4)] = T2q + T2N; + T2S = T2P + T2O; + Rm[WS(rs, 8)] = T2R + T2S; + Rm[0] = T2S - T2R; + T2Q = T2O - T2P; + Rp[WS(rs, 3)] = T2l + T2Q; + Rp[WS(rs, 7)] = T2Q - T2l; + } + { + E T4w, T4r, T4x, T4v, T4A, T4t, T4u, T4z, T4y; + T4w = KP559016994 * (T4p + T4q); + T4r = T4p - T4q; + T4x = FMA(KP250000000, T4r, T4s); + T4t = T2v - T2A; + T4u = T2G - T2L; + T4v = FMA(KP951056516, T4t, KP587785252 * T4u); + T4A = FNMS(KP587785252, T4t, KP951056516 * T4u); + Im[WS(rs, 4)] = T4r - T4s; + T4z = T4w + T4x; + Ip[WS(rs, 3)] = T4z - T4A; + Ip[WS(rs, 7)] = T4A + T4z; + T4y = T4w - T4x; + Im[WS(rs, 8)] = T4v + T4y; + Im[0] = T4y - T4v; + } + { + E T36, T38, T39, T2Z, T3b, T2V, T2Y, T3c, T3a; + T36 = KP559016994 * (T32 - T35); + T38 = T32 + T35; + T39 = FNMS(KP250000000, T38, T37); + T2V = T2T - T2U; + T2Y = T2W - T2X; + T2Z = FMA(KP951056516, T2V, KP587785252 * T2Y); + T3b = FNMS(KP587785252, T2V, KP951056516 * T2Y); + Rp[WS(rs, 5)] = T37 + T38; + T3c = T39 - T36; + Rm[WS(rs, 6)] = T3b + T3c; + Rm[WS(rs, 2)] = T3c - T3b; + T3a = T36 + T39; + Rp[WS(rs, 1)] = T2Z + T3a; + Rp[WS(rs, 9)] = T3a - T2Z; + } + { + E T4g, T4k, T4l, T4d, T4o, T4b, T4c, T4n, T4m; + T4g = KP559016994 * (T4e - T4f); + T4k = T4e + T4f; + T4l = FNMS(KP250000000, T4k, T4j); + T4b = T33 - T34; + T4c = T30 - T31; + T4d = FNMS(KP587785252, T4c, KP951056516 * T4b); + T4o = FMA(KP951056516, T4c, KP587785252 * T4b); + Ip[WS(rs, 5)] = T4k + T4j; + T4n = T4g + T4l; + Ip[WS(rs, 1)] = T4n - T4o; + Ip[WS(rs, 9)] = T4o + T4n; + T4m = T4g - T4l; + Im[WS(rs, 6)] = T4d + T4m; + Im[WS(rs, 2)] = T4m - T4d; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 20}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 20, "hc2cf_20", twinstr, &GENUS, {184, 62, 62, 0} }; + +void X(codelet_hc2cf_20) (planner *p) { + X(khc2c_register) (p, hc2cf_20, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1771 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:23 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hc2cf_32 -include hc2cf.h */ + +/* + * This function contains 434 FP additions, 260 FP multiplications, + * (or, 236 additions, 62 multiplications, 198 fused multiply/add), + * 135 stack variables, 7 constants, and 128 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) { + E T90, T8Z; + { + E T8x, T87, T8, T3w, T83, T3B, T8y, Tl, T6F, Tz, T3J, T5T, T6G, TM, T3Q; + E T5U, T3Z, T5Y, T7D, T6L, T5X, T46, T6M, T1f, T4e, T61, T7E, T6R, T6O, T1G; + E T60, T4l, T54, T6c, T79, T7N, T32, T7b, T6f, T5r, T4v, T65, T6X, T7I, T29; + E T70, T68, T4S, T5s, T5b, T7O, T7e, T76, T3t, T5t, T5i, T4H, T2y, T4B, T71; + E T2m, T4w, T4F, T2s; + { + E T3X, T1d, T44, T6J, T11, T3T, T3V, T17, T5h, T5c; + { + E Ta, Td, Tg, T3x, Tb, Tj, Tf, Tc, Ti; + { + E T1, T86, T3, T6, T2, T5; + T1 = Rp[0]; + T86 = Rm[0]; + T3 = Rp[WS(rs, 8)]; + T6 = Rm[WS(rs, 8)]; + T2 = W[30]; + T5 = W[31]; + { + E T84, T4, T9, T85, T7; + Ta = Rp[WS(rs, 4)]; + Td = Rm[WS(rs, 4)]; + T84 = T2 * T6; + T4 = T2 * T3; + T9 = W[14]; + Tg = Rp[WS(rs, 12)]; + T85 = FNMS(T5, T3, T84); + T7 = FMA(T5, T6, T4); + T3x = T9 * Td; + Tb = T9 * Ta; + T8x = T86 - T85; + T87 = T85 + T86; + T8 = T1 + T7; + T3w = T1 - T7; + Tj = Rm[WS(rs, 12)]; + Tf = W[46]; + } + Tc = W[15]; + Ti = W[47]; + } + { + E Tu, Tx, T3F, Ts, Tw, T3G, Tv; + { + E To, Tr, Tp, T3E, Tq, Tt; + { + E T3y, Te, T3A, Tk, T3z, Th, Tn; + To = Rp[WS(rs, 2)]; + T3z = Tf * Tj; + Th = Tf * Tg; + T3y = FNMS(Tc, Ta, T3x); + Te = FMA(Tc, Td, Tb); + T3A = FNMS(Ti, Tg, T3z); + Tk = FMA(Ti, Tj, Th); + Tr = Rm[WS(rs, 2)]; + Tn = W[6]; + T83 = T3y + T3A; + T3B = T3y - T3A; + T8y = Te - Tk; + Tl = Te + Tk; + Tp = Tn * To; + T3E = Tn * Tr; + } + Tq = W[7]; + Tu = Rp[WS(rs, 10)]; + Tx = Rm[WS(rs, 10)]; + Tt = W[38]; + T3F = FNMS(Tq, To, T3E); + Ts = FMA(Tq, Tr, Tp); + Tw = W[39]; + T3G = Tt * Tx; + Tv = Tt * Tu; + } + { + E T3M, TF, TH, TK, TG, TJ, TE, TD, TC; + { + E TB, T3H, Ty, TA, T3I, T3D, T3L; + TB = Rp[WS(rs, 14)]; + TE = Rm[WS(rs, 14)]; + T3H = FNMS(Tw, Tu, T3G); + Ty = FMA(Tw, Tx, Tv); + TA = W[54]; + TD = W[55]; + T6F = T3F + T3H; + T3I = T3F - T3H; + Tz = Ts + Ty; + T3D = Ts - Ty; + T3L = TA * TE; + TC = TA * TB; + T3J = T3D + T3I; + T5T = T3I - T3D; + T3M = FNMS(TD, TB, T3L); + } + TF = FMA(TD, TE, TC); + TH = Rp[WS(rs, 6)]; + TK = Rm[WS(rs, 6)]; + TG = W[22]; + TJ = W[23]; + { + E TU, T41, T13, T16, T43, T10, T12, T15, T3U, T14; + { + E T19, T1c, T18, T1b, T3P, T3K; + { + E TQ, TT, T3N, TI, TP, TS; + TQ = Rp[WS(rs, 1)]; + TT = Rm[WS(rs, 1)]; + T3N = TG * TK; + TI = TG * TH; + TP = W[2]; + TS = W[3]; + { + E T3O, TL, T40, TR; + T3O = FNMS(TJ, TH, T3N); + TL = FMA(TJ, TK, TI); + T40 = TP * TT; + TR = TP * TQ; + T6G = T3M + T3O; + T3P = T3M - T3O; + TM = TF + TL; + T3K = TF - TL; + TU = FMA(TS, TT, TR); + T41 = FNMS(TS, TQ, T40); + } + } + T3Q = T3K - T3P; + T5U = T3K + T3P; + T19 = Rp[WS(rs, 13)]; + T1c = Rm[WS(rs, 13)]; + T18 = W[50]; + T1b = W[51]; + { + E TW, TZ, TY, T42, TX, T3W, T1a, TV; + TW = Rp[WS(rs, 9)]; + TZ = Rm[WS(rs, 9)]; + T3W = T18 * T1c; + T1a = T18 * T19; + TV = W[34]; + TY = W[35]; + T3X = FNMS(T1b, T19, T3W); + T1d = FMA(T1b, T1c, T1a); + T42 = TV * TZ; + TX = TV * TW; + T13 = Rp[WS(rs, 5)]; + T16 = Rm[WS(rs, 5)]; + T43 = FNMS(TY, TW, T42); + T10 = FMA(TY, TZ, TX); + T12 = W[18]; + T15 = W[19]; + } + } + T44 = T41 - T43; + T6J = T41 + T43; + T11 = TU + T10; + T3T = TU - T10; + T3U = T12 * T16; + T14 = T12 * T13; + T3V = FNMS(T15, T13, T3U); + T17 = FMA(T15, T16, T14); + } + } + } + } + { + E T4g, T1l, T4c, T1E, T1u, T1x, T1w, T4i, T1r, T49, T1v; + { + E T1A, T1D, T1C, T4b, T1B; + { + E T1h, T1k, T1g, T1j, T4f, T1i, T1z; + T1h = Rp[WS(rs, 15)]; + T1k = Rm[WS(rs, 15)]; + { + E T6K, T3Y, T1e, T45; + T6K = T3V + T3X; + T3Y = T3V - T3X; + T1e = T17 + T1d; + T45 = T17 - T1d; + T3Z = T3T + T3Y; + T5Y = T3T - T3Y; + T7D = T6J + T6K; + T6L = T6J - T6K; + T5X = T44 + T45; + T46 = T44 - T45; + T6M = T11 - T1e; + T1f = T11 + T1e; + T1g = W[58]; + } + T1j = W[59]; + T1A = Rp[WS(rs, 11)]; + T1D = Rm[WS(rs, 11)]; + T4f = T1g * T1k; + T1i = T1g * T1h; + T1z = W[42]; + T1C = W[43]; + T4g = FNMS(T1j, T1h, T4f); + T1l = FMA(T1j, T1k, T1i); + T4b = T1z * T1D; + T1B = T1z * T1A; + } + { + E T1n, T1q, T1m, T1p, T4h, T1o, T1t; + T1n = Rp[WS(rs, 7)]; + T1q = Rm[WS(rs, 7)]; + T4c = FNMS(T1C, T1A, T4b); + T1E = FMA(T1C, T1D, T1B); + T1m = W[26]; + T1p = W[27]; + T1u = Rp[WS(rs, 3)]; + T1x = Rm[WS(rs, 3)]; + T4h = T1m * T1q; + T1o = T1m * T1n; + T1t = W[10]; + T1w = W[11]; + T4i = FNMS(T1p, T1n, T4h); + T1r = FMA(T1p, T1q, T1o); + T49 = T1t * T1x; + T1v = T1t * T1u; + } + } + { + E T4j, T6P, T1s, T48, T4a, T1y; + T4j = T4g - T4i; + T6P = T4g + T4i; + T1s = T1l + T1r; + T48 = T1l - T1r; + T4a = FNMS(T1w, T1u, T49); + T1y = FMA(T1w, T1x, T1v); + { + E T6Q, T4d, T4k, T1F; + T6Q = T4a + T4c; + T4d = T4a - T4c; + T4k = T1y - T1E; + T1F = T1y + T1E; + T4e = T48 + T4d; + T61 = T48 - T4d; + T7E = T6P + T6Q; + T6R = T6P - T6Q; + T6O = T1s - T1F; + T1G = T1s + T1F; + T60 = T4j + T4k; + T4l = T4j - T4k; + } + } + } + { + E T5m, T2H, T52, T30, T2Q, T2T, T2S, T5o, T2N, T4Z, T2R; + { + E T2W, T2Z, T2Y, T51, T2X; + { + E T2D, T2G, T2C, T2F, T5l, T2E, T2V; + T2D = Ip[WS(rs, 15)]; + T2G = Im[WS(rs, 15)]; + T2C = W[60]; + T2F = W[61]; + T2W = Ip[WS(rs, 11)]; + T2Z = Im[WS(rs, 11)]; + T5l = T2C * T2G; + T2E = T2C * T2D; + T2V = W[44]; + T2Y = W[45]; + T5m = FNMS(T2F, T2D, T5l); + T2H = FMA(T2F, T2G, T2E); + T51 = T2V * T2Z; + T2X = T2V * T2W; + } + { + E T2J, T2M, T2I, T2L, T5n, T2K, T2P; + T2J = Ip[WS(rs, 7)]; + T2M = Im[WS(rs, 7)]; + T52 = FNMS(T2Y, T2W, T51); + T30 = FMA(T2Y, T2Z, T2X); + T2I = W[28]; + T2L = W[29]; + T2Q = Ip[WS(rs, 3)]; + T2T = Im[WS(rs, 3)]; + T5n = T2I * T2M; + T2K = T2I * T2J; + T2P = W[12]; + T2S = W[13]; + T5o = FNMS(T2L, T2J, T5n); + T2N = FMA(T2L, T2M, T2K); + T4Z = T2P * T2T; + T2R = T2P * T2Q; + } + } + { + E T5p, T77, T2O, T4Y, T50, T2U; + T5p = T5m - T5o; + T77 = T5m + T5o; + T2O = T2H + T2N; + T4Y = T2H - T2N; + T50 = FNMS(T2S, T2Q, T4Z); + T2U = FMA(T2S, T2T, T2R); + { + E T78, T53, T5q, T31; + T78 = T50 + T52; + T53 = T50 - T52; + T5q = T30 - T2U; + T31 = T2U + T30; + T54 = T4Y + T53; + T6c = T4Y - T53; + T79 = T77 - T78; + T7N = T77 + T78; + T32 = T2O + T31; + T7b = T2O - T31; + T6f = T5q - T5p; + T5r = T5p + T5q; + } + } + } + { + E T4N, T1O, T4t, T27, T1X, T20, T1Z, T4P, T1U, T4q, T1Y; + { + E T23, T26, T25, T4s, T24; + { + E T1K, T1N, T1J, T1M, T4M, T1L, T22; + T1K = Ip[0]; + T1N = Im[0]; + T1J = W[0]; + T1M = W[1]; + T23 = Ip[WS(rs, 12)]; + T26 = Im[WS(rs, 12)]; + T4M = T1J * T1N; + T1L = T1J * T1K; + T22 = W[48]; + T25 = W[49]; + T4N = FNMS(T1M, T1K, T4M); + T1O = FMA(T1M, T1N, T1L); + T4s = T22 * T26; + T24 = T22 * T23; + } + { + E T1Q, T1T, T1P, T1S, T4O, T1R, T1W; + T1Q = Ip[WS(rs, 8)]; + T1T = Im[WS(rs, 8)]; + T4t = FNMS(T25, T23, T4s); + T27 = FMA(T25, T26, T24); + T1P = W[32]; + T1S = W[33]; + T1X = Ip[WS(rs, 4)]; + T20 = Im[WS(rs, 4)]; + T4O = T1P * T1T; + T1R = T1P * T1Q; + T1W = W[16]; + T1Z = W[17]; + T4P = FNMS(T1S, T1Q, T4O); + T1U = FMA(T1S, T1T, T1R); + T4q = T1W * T20; + T1Y = T1W * T1X; + } + } + { + E T4Q, T6V, T1V, T4p, T4r, T21; + T4Q = T4N - T4P; + T6V = T4N + T4P; + T1V = T1O + T1U; + T4p = T1O - T1U; + T4r = FNMS(T1Z, T1X, T4q); + T21 = FMA(T1Z, T20, T1Y); + { + E T6W, T4u, T4R, T28; + T6W = T4r + T4t; + T4u = T4r - T4t; + T4R = T21 - T27; + T28 = T21 + T27; + T4v = T4p + T4u; + T65 = T4p - T4u; + T6X = T6V - T6W; + T7I = T6V + T6W; + T29 = T1V + T28; + T70 = T1V - T28; + T68 = T4Q + T4R; + T4S = T4Q - T4R; + } + } + } + { + E T57, T38, T5g, T3r, T3h, T3k, T3j, T59, T3e, T5d, T3i; + { + E T3n, T3q, T3p, T5f, T3o; + { + E T34, T37, T33, T36, T56, T35, T3m; + T34 = Ip[WS(rs, 1)]; + T37 = Im[WS(rs, 1)]; + T33 = W[4]; + T36 = W[5]; + T3n = Ip[WS(rs, 5)]; + T3q = Im[WS(rs, 5)]; + T56 = T33 * T37; + T35 = T33 * T34; + T3m = W[20]; + T3p = W[21]; + T57 = FNMS(T36, T34, T56); + T38 = FMA(T36, T37, T35); + T5f = T3m * T3q; + T3o = T3m * T3n; + } + { + E T3a, T3d, T39, T3c, T58, T3b, T3g; + T3a = Ip[WS(rs, 9)]; + T3d = Im[WS(rs, 9)]; + T5g = FNMS(T3p, T3n, T5f); + T3r = FMA(T3p, T3q, T3o); + T39 = W[36]; + T3c = W[37]; + T3h = Ip[WS(rs, 13)]; + T3k = Im[WS(rs, 13)]; + T58 = T39 * T3d; + T3b = T39 * T3a; + T3g = W[52]; + T3j = W[53]; + T59 = FNMS(T3c, T3a, T58); + T3e = FMA(T3c, T3d, T3b); + T5d = T3g * T3k; + T3i = T3g * T3h; + } + } + { + E T5a, T7c, T3f, T55, T5e, T3l, T7d, T3s; + T5a = T57 - T59; + T7c = T57 + T59; + T3f = T38 + T3e; + T55 = T38 - T3e; + T5e = FNMS(T3j, T3h, T5d); + T3l = FMA(T3j, T3k, T3i); + T5h = T5e - T5g; + T7d = T5e + T5g; + T3s = T3l + T3r; + T5c = T3l - T3r; + T5s = T5a - T55; + T5b = T55 + T5a; + T7O = T7c + T7d; + T7e = T7c - T7d; + T76 = T3s - T3f; + T3t = T3f + T3s; + } + } + { + E T4y, T2f, T2o, T2r, T4A, T2l, T2n, T2q, T4E, T2p; + { + E T2u, T2x, T2t, T2w; + { + E T2b, T2e, T2d, T4x, T2c, T2a; + T2b = Ip[WS(rs, 2)]; + T2e = Im[WS(rs, 2)]; + T2a = W[8]; + T5t = T5c + T5h; + T5i = T5c - T5h; + T2d = W[9]; + T4x = T2a * T2e; + T2c = T2a * T2b; + T2u = Ip[WS(rs, 6)]; + T2x = Im[WS(rs, 6)]; + T4y = FNMS(T2d, T2b, T4x); + T2f = FMA(T2d, T2e, T2c); + T2t = W[24]; + T2w = W[25]; + } + { + E T2h, T2k, T2j, T4z, T2i, T4G, T2v, T2g; + T2h = Ip[WS(rs, 10)]; + T2k = Im[WS(rs, 10)]; + T4G = T2t * T2x; + T2v = T2t * T2u; + T2g = W[40]; + T2j = W[41]; + T4H = FNMS(T2w, T2u, T4G); + T2y = FMA(T2w, T2x, T2v); + T4z = T2g * T2k; + T2i = T2g * T2h; + T2o = Ip[WS(rs, 14)]; + T2r = Im[WS(rs, 14)]; + T4A = FNMS(T2j, T2h, T4z); + T2l = FMA(T2j, T2k, T2i); + T2n = W[56]; + T2q = W[57]; + } + } + T4B = T4y - T4A; + T71 = T4y + T4A; + T2m = T2f + T2l; + T4w = T2f - T2l; + T4E = T2n * T2r; + T2p = T2n * T2o; + T4F = FNMS(T2q, T2o, T4E); + T2s = FMA(T2q, T2r, T2p); + } + } + { + E T4T, T4C, T4J, T4U, T7y, T8q, T8p, T7B; + { + E T6E, T8j, T73, T6Y, T6H, T8k, T8i, T8h; + { + E T7C, TO, T80, T7Z, T8e, T89, T8d, T1H, T8b, T3v, T7T, T7L, T7U, T7Q, T2A; + E T7K, T7P, T7W, T1I; + { + E T7X, T7Y, T7J, T82, T88; + { + E Tm, T4I, T72, T4D, T2z, TN; + T6E = T8 - Tl; + Tm = T8 + Tl; + T4T = T4B - T4w; + T4C = T4w + T4B; + T4I = T4F - T4H; + T72 = T4F + T4H; + T4D = T2s - T2y; + T2z = T2s + T2y; + TN = Tz + TM; + T8j = TM - Tz; + T73 = T71 - T72; + T7J = T71 + T72; + T4J = T4D - T4I; + T4U = T4D + T4I; + T2A = T2m + T2z; + T6Y = T2z - T2m; + T7C = Tm - TN; + TO = Tm + TN; + } + T7K = T7I - T7J; + T7X = T7I + T7J; + T7Y = T7N + T7O; + T7P = T7N - T7O; + T6H = T6F - T6G; + T82 = T6F + T6G; + T88 = T83 + T87; + T8k = T87 - T83; + T80 = T7X + T7Y; + T7Z = T7X - T7Y; + T8e = T88 - T82; + T89 = T82 + T88; + } + { + E T7H, T7M, T2B, T3u; + T7H = T29 - T2A; + T2B = T29 + T2A; + T3u = T32 + T3t; + T7M = T32 - T3t; + T8d = T1G - T1f; + T1H = T1f + T1G; + T8b = T3u - T2B; + T3v = T2B + T3u; + T7T = T7K - T7H; + T7L = T7H + T7K; + T7U = T7M + T7P; + T7Q = T7M - T7P; + } + T7W = TO - T1H; + T1I = TO + T1H; + { + E T7S, T8f, T8g, T7V; + { + E T7R, T8c, T8a, T7G, T81, T7F; + T8i = T7Q - T7L; + T7R = T7L + T7Q; + T81 = T7D + T7E; + T7F = T7D - T7E; + Rp[0] = T1I + T3v; + Rm[WS(rs, 15)] = T1I - T3v; + Rp[WS(rs, 8)] = T7W + T7Z; + Rm[WS(rs, 7)] = T7W - T7Z; + T8c = T89 - T81; + T8a = T81 + T89; + T7G = T7C + T7F; + T7S = T7C - T7F; + T8h = T8e - T8d; + T8f = T8d + T8e; + Ip[WS(rs, 8)] = T8b + T8c; + Im[WS(rs, 7)] = T8b - T8c; + Ip[0] = T80 + T8a; + Im[WS(rs, 15)] = T80 - T8a; + Rp[WS(rs, 4)] = FMA(KP707106781, T7R, T7G); + Rm[WS(rs, 11)] = FNMS(KP707106781, T7R, T7G); + T8g = T7T + T7U; + T7V = T7T - T7U; + } + Ip[WS(rs, 4)] = FMA(KP707106781, T8g, T8f); + Im[WS(rs, 11)] = FMS(KP707106781, T8g, T8f); + Rp[WS(rs, 12)] = FMA(KP707106781, T7V, T7S); + Rm[WS(rs, 3)] = FNMS(KP707106781, T7V, T7S); + } + } + { + E T7f, T7m, T6I, T7a, T7A, T7w, T8r, T8l, T8m, T6T, T7j, T75, T8s, T7p, T7z; + E T7t; + { + E T7n, T6N, T6S, T7o, T7u, T7v; + T7f = T7b - T7e; + T7u = T7b + T7e; + Ip[WS(rs, 12)] = FMA(KP707106781, T8i, T8h); + Im[WS(rs, 3)] = FMS(KP707106781, T8i, T8h); + T7m = T6E + T6H; + T6I = T6E - T6H; + T7v = T79 + T76; + T7a = T76 - T79; + T7n = T6M + T6L; + T6N = T6L - T6M; + T7A = FMA(KP414213562, T7u, T7v); + T7w = FNMS(KP414213562, T7v, T7u); + T8r = T8k - T8j; + T8l = T8j + T8k; + T6S = T6O + T6R; + T7o = T6O - T6R; + { + E T7s, T7r, T6Z, T74; + T7s = T6X + T6Y; + T6Z = T6X - T6Y; + T74 = T70 - T73; + T7r = T70 + T73; + T8m = T6N + T6S; + T6T = T6N - T6S; + T7j = FNMS(KP414213562, T6Z, T74); + T75 = FMA(KP414213562, T74, T6Z); + T8s = T7o - T7n; + T7p = T7n + T7o; + T7z = FNMS(KP414213562, T7r, T7s); + T7t = FMA(KP414213562, T7s, T7r); + } + } + { + E T7i, T6U, T8t, T8v, T7k, T7g; + T7i = FNMS(KP707106781, T6T, T6I); + T6U = FMA(KP707106781, T6T, T6I); + T8t = FMA(KP707106781, T8s, T8r); + T8v = FNMS(KP707106781, T8s, T8r); + T7k = FNMS(KP414213562, T7a, T7f); + T7g = FMA(KP414213562, T7f, T7a); + { + E T7q, T7x, T8n, T8o; + T7y = FNMS(KP707106781, T7p, T7m); + T7q = FMA(KP707106781, T7p, T7m); + { + E T7l, T8u, T8w, T7h; + T7l = T7j + T7k; + T8u = T7k - T7j; + T8w = T7g - T75; + T7h = T75 + T7g; + Rm[WS(rs, 1)] = FMA(KP923879532, T7l, T7i); + Rp[WS(rs, 14)] = FNMS(KP923879532, T7l, T7i); + Ip[WS(rs, 6)] = FMA(KP923879532, T8u, T8t); + Im[WS(rs, 9)] = FMS(KP923879532, T8u, T8t); + Ip[WS(rs, 14)] = FMA(KP923879532, T8w, T8v); + Im[WS(rs, 1)] = FMS(KP923879532, T8w, T8v); + Rp[WS(rs, 6)] = FMA(KP923879532, T7h, T6U); + Rm[WS(rs, 9)] = FNMS(KP923879532, T7h, T6U); + T7x = T7t + T7w; + T8q = T7w - T7t; + } + T8p = FNMS(KP707106781, T8m, T8l); + T8n = FMA(KP707106781, T8m, T8l); + T8o = T7z + T7A; + T7B = T7z - T7A; + Rp[WS(rs, 2)] = FMA(KP923879532, T7x, T7q); + Rm[WS(rs, 13)] = FNMS(KP923879532, T7x, T7q); + Ip[WS(rs, 2)] = FMA(KP923879532, T8o, T8n); + Im[WS(rs, 13)] = FMS(KP923879532, T8o, T8n); + } + } + } + } + { + E T5S, T8O, T8N, T5V, T6g, T6d, T69, T66, T8K, T8J; + { + E T5C, T3S, T8I, T4n, T8H, T8B, T8C, T5F, T5k, T5K, T5u, T4K, T4V; + { + E T5D, T5E, T8z, T8A, T5j; + { + E T3C, T3R, T47, T4m; + T5S = T3w - T3B; + T3C = T3w + T3B; + Rp[WS(rs, 10)] = FMA(KP923879532, T7B, T7y); + Rm[WS(rs, 5)] = FNMS(KP923879532, T7B, T7y); + Ip[WS(rs, 10)] = FMA(KP923879532, T8q, T8p); + Im[WS(rs, 5)] = FMS(KP923879532, T8q, T8p); + T3R = T3J + T3Q; + T8O = T3Q - T3J; + T5D = FNMS(KP414213562, T3Z, T46); + T47 = FMA(KP414213562, T46, T3Z); + T4m = FNMS(KP414213562, T4l, T4e); + T5E = FMA(KP414213562, T4e, T4l); + T8N = T8y + T8x; + T8z = T8x - T8y; + T5C = FNMS(KP707106781, T3R, T3C); + T3S = FMA(KP707106781, T3R, T3C); + T8I = T4m - T47; + T4n = T47 + T4m; + T8A = T5T + T5U; + T5V = T5T - T5U; + } + T6g = T5i - T5b; + T5j = T5b + T5i; + T8H = FNMS(KP707106781, T8A, T8z); + T8B = FMA(KP707106781, T8A, T8z); + T8C = T5D + T5E; + T5F = T5D - T5E; + T5k = FMA(KP707106781, T5j, T54); + T5K = FNMS(KP707106781, T5j, T54); + T5u = T5s + T5t; + T6d = T5t - T5s; + T69 = T4C - T4J; + T4K = T4C + T4J; + T4V = T4T + T4U; + T66 = T4U - T4T; + } + { + E T5M, T5Q, T5J, T5P, T8F, T8G; + { + E T5y, T4o, T5A, T5w, T5z, T4X, T8D, T5L, T5v, T8E, T5B, T5x; + T5y = FNMS(KP923879532, T4n, T3S); + T4o = FMA(KP923879532, T4n, T3S); + T5L = FNMS(KP707106781, T5u, T5r); + T5v = FMA(KP707106781, T5u, T5r); + { + E T5H, T4L, T5I, T4W; + T5H = FNMS(KP707106781, T4K, T4v); + T4L = FMA(KP707106781, T4K, T4v); + T5I = FNMS(KP707106781, T4V, T4S); + T4W = FMA(KP707106781, T4V, T4S); + T5M = FMA(KP668178637, T5L, T5K); + T5Q = FNMS(KP668178637, T5K, T5L); + T5A = FMA(KP198912367, T5k, T5v); + T5w = FNMS(KP198912367, T5v, T5k); + T5J = FNMS(KP668178637, T5I, T5H); + T5P = FMA(KP668178637, T5H, T5I); + T5z = FNMS(KP198912367, T4L, T4W); + T4X = FMA(KP198912367, T4W, T4L); + } + T8D = FMA(KP923879532, T8C, T8B); + T8F = FNMS(KP923879532, T8C, T8B); + T8E = T5z + T5A; + T5B = T5z - T5A; + T8G = T5w - T4X; + T5x = T4X + T5w; + Ip[WS(rs, 1)] = FMA(KP980785280, T8E, T8D); + Im[WS(rs, 14)] = FMS(KP980785280, T8E, T8D); + Rp[WS(rs, 1)] = FMA(KP980785280, T5x, T4o); + Rm[WS(rs, 14)] = FNMS(KP980785280, T5x, T4o); + Rp[WS(rs, 9)] = FMA(KP980785280, T5B, T5y); + Rm[WS(rs, 6)] = FNMS(KP980785280, T5B, T5y); + } + { + E T5O, T8L, T8M, T5R, T5G, T5N; + T5O = FMA(KP923879532, T5F, T5C); + T5G = FNMS(KP923879532, T5F, T5C); + T5N = T5J + T5M; + T8K = T5M - T5J; + T8J = FMA(KP923879532, T8I, T8H); + T8L = FNMS(KP923879532, T8I, T8H); + Ip[WS(rs, 9)] = FMA(KP980785280, T8G, T8F); + Im[WS(rs, 6)] = FMS(KP980785280, T8G, T8F); + Rm[WS(rs, 2)] = FMA(KP831469612, T5N, T5G); + Rp[WS(rs, 13)] = FNMS(KP831469612, T5N, T5G); + T8M = T5P + T5Q; + T5R = T5P - T5Q; + Ip[WS(rs, 13)] = FNMS(KP831469612, T8M, T8L); + Im[WS(rs, 2)] = -(FMA(KP831469612, T8M, T8L)); + Rp[WS(rs, 5)] = FMA(KP831469612, T5R, T5O); + Rm[WS(rs, 10)] = FNMS(KP831469612, T5R, T5O); + } + } + } + { + E T6o, T5W, T8W, T63, T8V, T8P, T8Q, T6r, T67, T6u, T6y, T6C, T6m, T6i; + { + E T6p, T5Z, T62, T6q; + T6p = FNMS(KP414213562, T5X, T5Y); + T5Z = FMA(KP414213562, T5Y, T5X); + Ip[WS(rs, 5)] = FMA(KP831469612, T8K, T8J); + Im[WS(rs, 10)] = FMS(KP831469612, T8K, T8J); + T6o = FNMS(KP707106781, T5V, T5S); + T5W = FMA(KP707106781, T5V, T5S); + T62 = FNMS(KP414213562, T61, T60); + T6q = FMA(KP414213562, T60, T61); + T8W = T5Z + T62; + T63 = T5Z - T62; + T8V = FNMS(KP707106781, T8O, T8N); + T8P = FMA(KP707106781, T8O, T8N); + { + E T6x, T6e, T6w, T6h; + T8Q = T6q - T6p; + T6r = T6p + T6q; + T6x = FMA(KP707106781, T6d, T6c); + T6e = FNMS(KP707106781, T6d, T6c); + T6w = FMA(KP707106781, T6g, T6f); + T6h = FNMS(KP707106781, T6g, T6f); + T67 = FNMS(KP707106781, T66, T65); + T6u = FMA(KP707106781, T66, T65); + T6y = FMA(KP198912367, T6x, T6w); + T6C = FNMS(KP198912367, T6w, T6x); + T6m = FNMS(KP668178637, T6e, T6h); + T6i = FMA(KP668178637, T6h, T6e); + } + } + { + E T6k, T64, T8R, T8T, T6t, T6a; + T6k = FNMS(KP923879532, T63, T5W); + T64 = FMA(KP923879532, T63, T5W); + T8R = FMA(KP923879532, T8Q, T8P); + T8T = FNMS(KP923879532, T8Q, T8P); + T6t = FMA(KP707106781, T69, T68); + T6a = FNMS(KP707106781, T69, T68); + { + E T6A, T8X, T8Y, T6D; + { + E T6s, T6B, T6l, T6b, T6z, T6v; + T6A = FMA(KP923879532, T6r, T6o); + T6s = FNMS(KP923879532, T6r, T6o); + T6v = FMA(KP198912367, T6u, T6t); + T6B = FNMS(KP198912367, T6t, T6u); + T6l = FNMS(KP668178637, T67, T6a); + T6b = FMA(KP668178637, T6a, T67); + T6z = T6v + T6y; + T90 = T6y - T6v; + T8Z = FMA(KP923879532, T8W, T8V); + T8X = FNMS(KP923879532, T8W, T8V); + { + E T6n, T8S, T8U, T6j; + T6n = T6l + T6m; + T8S = T6l - T6m; + T8U = T6i - T6b; + T6j = T6b + T6i; + Rp[WS(rs, 7)] = FMA(KP980785280, T6z, T6s); + Rm[WS(rs, 8)] = FNMS(KP980785280, T6z, T6s); + Rp[WS(rs, 11)] = FMA(KP831469612, T6n, T6k); + Rm[WS(rs, 4)] = FNMS(KP831469612, T6n, T6k); + Ip[WS(rs, 3)] = FMA(KP831469612, T8S, T8R); + Im[WS(rs, 12)] = FMS(KP831469612, T8S, T8R); + Ip[WS(rs, 11)] = FMA(KP831469612, T8U, T8T); + Im[WS(rs, 4)] = FMS(KP831469612, T8U, T8T); + Rp[WS(rs, 3)] = FMA(KP831469612, T6j, T64); + Rm[WS(rs, 12)] = FNMS(KP831469612, T6j, T64); + T8Y = T6C - T6B; + T6D = T6B + T6C; + } + } + Ip[WS(rs, 7)] = FMA(KP980785280, T8Y, T8X); + Im[WS(rs, 8)] = FMS(KP980785280, T8Y, T8X); + Rm[0] = FMA(KP980785280, T6D, T6A); + Rp[WS(rs, 15)] = FNMS(KP980785280, T6D, T6A); + } + } + } + } + } + } + Ip[WS(rs, 15)] = FMA(KP980785280, T90, T8Z); + Im[0] = FMS(KP980785280, T90, T8Z); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 32}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 32, "hc2cf_32", twinstr, &GENUS, {236, 62, 198, 0} }; + +void X(codelet_hc2cf_32) (planner *p) { + X(khc2c_register) (p, hc2cf_32, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hc2cf_32 -include hc2cf.h */ + +/* + * This function contains 434 FP additions, 208 FP multiplications, + * (or, 340 additions, 114 multiplications, 94 fused multiply/add), + * 96 stack variables, 7 constants, and 128 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) { + E Tj, T5F, T7C, T7Q, T35, T4T, T78, T7m, T1Q, T61, T5Y, T6J, T3K, T59, T41; + E T56, T2B, T67, T6e, T6O, T4b, T5d, T4s, T5g, TG, T7l, T5I, T73, T3a, T4U; + E T3f, T4V, T14, T5N, T5M, T6E, T3m, T4Y, T3r, T4Z, T1r, T5P, T5S, T6F, T3x; + E T51, T3C, T52, T2d, T5Z, T64, T6K, T3V, T57, T44, T5a, T2Y, T6f, T6a, T6P; + E T4m, T5h, T4v, T5e; + { + E T1, T76, T6, T75, Tc, T32, Th, T33; + T1 = Rp[0]; + T76 = Rm[0]; + { + E T3, T5, T2, T4; + T3 = Rp[WS(rs, 8)]; + T5 = Rm[WS(rs, 8)]; + T2 = W[30]; + T4 = W[31]; + T6 = FMA(T2, T3, T4 * T5); + T75 = FNMS(T4, T3, T2 * T5); + } + { + E T9, Tb, T8, Ta; + T9 = Rp[WS(rs, 4)]; + Tb = Rm[WS(rs, 4)]; + T8 = W[14]; + Ta = W[15]; + Tc = FMA(T8, T9, Ta * Tb); + T32 = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = Rp[WS(rs, 12)]; + Tg = Rm[WS(rs, 12)]; + Td = W[46]; + Tf = W[47]; + Th = FMA(Td, Te, Tf * Tg); + T33 = FNMS(Tf, Te, Td * Tg); + } + { + E T7, Ti, T7A, T7B; + T7 = T1 + T6; + Ti = Tc + Th; + Tj = T7 + Ti; + T5F = T7 - Ti; + T7A = T76 - T75; + T7B = Tc - Th; + T7C = T7A - T7B; + T7Q = T7B + T7A; + } + { + E T31, T34, T74, T77; + T31 = T1 - T6; + T34 = T32 - T33; + T35 = T31 - T34; + T4T = T31 + T34; + T74 = T32 + T33; + T77 = T75 + T76; + T78 = T74 + T77; + T7m = T77 - T74; + } + } + { + E T1y, T3G, T1O, T3Z, T1D, T3H, T1J, T3Y; + { + E T1v, T1x, T1u, T1w; + T1v = Ip[0]; + T1x = Im[0]; + T1u = W[0]; + T1w = W[1]; + T1y = FMA(T1u, T1v, T1w * T1x); + T3G = FNMS(T1w, T1v, T1u * T1x); + } + { + E T1L, T1N, T1K, T1M; + T1L = Ip[WS(rs, 12)]; + T1N = Im[WS(rs, 12)]; + T1K = W[48]; + T1M = W[49]; + T1O = FMA(T1K, T1L, T1M * T1N); + T3Z = FNMS(T1M, T1L, T1K * T1N); + } + { + E T1A, T1C, T1z, T1B; + T1A = Ip[WS(rs, 8)]; + T1C = Im[WS(rs, 8)]; + T1z = W[32]; + T1B = W[33]; + T1D = FMA(T1z, T1A, T1B * T1C); + T3H = FNMS(T1B, T1A, T1z * T1C); + } + { + E T1G, T1I, T1F, T1H; + T1G = Ip[WS(rs, 4)]; + T1I = Im[WS(rs, 4)]; + T1F = W[16]; + T1H = W[17]; + T1J = FMA(T1F, T1G, T1H * T1I); + T3Y = FNMS(T1H, T1G, T1F * T1I); + } + { + E T1E, T1P, T5W, T5X; + T1E = T1y + T1D; + T1P = T1J + T1O; + T1Q = T1E + T1P; + T61 = T1E - T1P; + T5W = T3G + T3H; + T5X = T3Y + T3Z; + T5Y = T5W - T5X; + T6J = T5W + T5X; + } + { + E T3I, T3J, T3X, T40; + T3I = T3G - T3H; + T3J = T1J - T1O; + T3K = T3I + T3J; + T59 = T3I - T3J; + T3X = T1y - T1D; + T40 = T3Y - T3Z; + T41 = T3X - T40; + T56 = T3X + T40; + } + } + { + E T2j, T4o, T2z, T49, T2o, T4p, T2u, T48; + { + E T2g, T2i, T2f, T2h; + T2g = Ip[WS(rs, 15)]; + T2i = Im[WS(rs, 15)]; + T2f = W[60]; + T2h = W[61]; + T2j = FMA(T2f, T2g, T2h * T2i); + T4o = FNMS(T2h, T2g, T2f * T2i); + } + { + E T2w, T2y, T2v, T2x; + T2w = Ip[WS(rs, 11)]; + T2y = Im[WS(rs, 11)]; + T2v = W[44]; + T2x = W[45]; + T2z = FMA(T2v, T2w, T2x * T2y); + T49 = FNMS(T2x, T2w, T2v * T2y); + } + { + E T2l, T2n, T2k, T2m; + T2l = Ip[WS(rs, 7)]; + T2n = Im[WS(rs, 7)]; + T2k = W[28]; + T2m = W[29]; + T2o = FMA(T2k, T2l, T2m * T2n); + T4p = FNMS(T2m, T2l, T2k * T2n); + } + { + E T2r, T2t, T2q, T2s; + T2r = Ip[WS(rs, 3)]; + T2t = Im[WS(rs, 3)]; + T2q = W[12]; + T2s = W[13]; + T2u = FMA(T2q, T2r, T2s * T2t); + T48 = FNMS(T2s, T2r, T2q * T2t); + } + { + E T2p, T2A, T6c, T6d; + T2p = T2j + T2o; + T2A = T2u + T2z; + T2B = T2p + T2A; + T67 = T2p - T2A; + T6c = T4o + T4p; + T6d = T48 + T49; + T6e = T6c - T6d; + T6O = T6c + T6d; + } + { + E T47, T4a, T4q, T4r; + T47 = T2j - T2o; + T4a = T48 - T49; + T4b = T47 - T4a; + T5d = T47 + T4a; + T4q = T4o - T4p; + T4r = T2u - T2z; + T4s = T4q + T4r; + T5g = T4q - T4r; + } + } + { + E To, T36, TE, T3d, Tt, T37, Tz, T3c; + { + E Tl, Tn, Tk, Tm; + Tl = Rp[WS(rs, 2)]; + Tn = Rm[WS(rs, 2)]; + Tk = W[6]; + Tm = W[7]; + To = FMA(Tk, Tl, Tm * Tn); + T36 = FNMS(Tm, Tl, Tk * Tn); + } + { + E TB, TD, TA, TC; + TB = Rp[WS(rs, 6)]; + TD = Rm[WS(rs, 6)]; + TA = W[22]; + TC = W[23]; + TE = FMA(TA, TB, TC * TD); + T3d = FNMS(TC, TB, TA * TD); + } + { + E Tq, Ts, Tp, Tr; + Tq = Rp[WS(rs, 10)]; + Ts = Rm[WS(rs, 10)]; + Tp = W[38]; + Tr = W[39]; + Tt = FMA(Tp, Tq, Tr * Ts); + T37 = FNMS(Tr, Tq, Tp * Ts); + } + { + E Tw, Ty, Tv, Tx; + Tw = Rp[WS(rs, 14)]; + Ty = Rm[WS(rs, 14)]; + Tv = W[54]; + Tx = W[55]; + Tz = FMA(Tv, Tw, Tx * Ty); + T3c = FNMS(Tx, Tw, Tv * Ty); + } + { + E Tu, TF, T5G, T5H; + Tu = To + Tt; + TF = Tz + TE; + TG = Tu + TF; + T7l = TF - Tu; + T5G = T36 + T37; + T5H = T3c + T3d; + T5I = T5G - T5H; + T73 = T5G + T5H; + } + { + E T38, T39, T3b, T3e; + T38 = T36 - T37; + T39 = To - Tt; + T3a = T38 - T39; + T4U = T39 + T38; + T3b = Tz - TE; + T3e = T3c - T3d; + T3f = T3b + T3e; + T4V = T3b - T3e; + } + } + { + E TM, T3i, T12, T3p, TR, T3j, TX, T3o; + { + E TJ, TL, TI, TK; + TJ = Rp[WS(rs, 1)]; + TL = Rm[WS(rs, 1)]; + TI = W[2]; + TK = W[3]; + TM = FMA(TI, TJ, TK * TL); + T3i = FNMS(TK, TJ, TI * TL); + } + { + E TZ, T11, TY, T10; + TZ = Rp[WS(rs, 13)]; + T11 = Rm[WS(rs, 13)]; + TY = W[50]; + T10 = W[51]; + T12 = FMA(TY, TZ, T10 * T11); + T3p = FNMS(T10, TZ, TY * T11); + } + { + E TO, TQ, TN, TP; + TO = Rp[WS(rs, 9)]; + TQ = Rm[WS(rs, 9)]; + TN = W[34]; + TP = W[35]; + TR = FMA(TN, TO, TP * TQ); + T3j = FNMS(TP, TO, TN * TQ); + } + { + E TU, TW, TT, TV; + TU = Rp[WS(rs, 5)]; + TW = Rm[WS(rs, 5)]; + TT = W[18]; + TV = W[19]; + TX = FMA(TT, TU, TV * TW); + T3o = FNMS(TV, TU, TT * TW); + } + { + E TS, T13, T5K, T5L; + TS = TM + TR; + T13 = TX + T12; + T14 = TS + T13; + T5N = TS - T13; + T5K = T3i + T3j; + T5L = T3o + T3p; + T5M = T5K - T5L; + T6E = T5K + T5L; + } + { + E T3k, T3l, T3n, T3q; + T3k = T3i - T3j; + T3l = TX - T12; + T3m = T3k + T3l; + T4Y = T3k - T3l; + T3n = TM - TR; + T3q = T3o - T3p; + T3r = T3n - T3q; + T4Z = T3n + T3q; + } + } + { + E T19, T3t, T1p, T3A, T1e, T3u, T1k, T3z; + { + E T16, T18, T15, T17; + T16 = Rp[WS(rs, 15)]; + T18 = Rm[WS(rs, 15)]; + T15 = W[58]; + T17 = W[59]; + T19 = FMA(T15, T16, T17 * T18); + T3t = FNMS(T17, T16, T15 * T18); + } + { + E T1m, T1o, T1l, T1n; + T1m = Rp[WS(rs, 11)]; + T1o = Rm[WS(rs, 11)]; + T1l = W[42]; + T1n = W[43]; + T1p = FMA(T1l, T1m, T1n * T1o); + T3A = FNMS(T1n, T1m, T1l * T1o); + } + { + E T1b, T1d, T1a, T1c; + T1b = Rp[WS(rs, 7)]; + T1d = Rm[WS(rs, 7)]; + T1a = W[26]; + T1c = W[27]; + T1e = FMA(T1a, T1b, T1c * T1d); + T3u = FNMS(T1c, T1b, T1a * T1d); + } + { + E T1h, T1j, T1g, T1i; + T1h = Rp[WS(rs, 3)]; + T1j = Rm[WS(rs, 3)]; + T1g = W[10]; + T1i = W[11]; + T1k = FMA(T1g, T1h, T1i * T1j); + T3z = FNMS(T1i, T1h, T1g * T1j); + } + { + E T1f, T1q, T5Q, T5R; + T1f = T19 + T1e; + T1q = T1k + T1p; + T1r = T1f + T1q; + T5P = T1f - T1q; + T5Q = T3t + T3u; + T5R = T3z + T3A; + T5S = T5Q - T5R; + T6F = T5Q + T5R; + } + { + E T3v, T3w, T3y, T3B; + T3v = T3t - T3u; + T3w = T1k - T1p; + T3x = T3v + T3w; + T51 = T3v - T3w; + T3y = T19 - T1e; + T3B = T3z - T3A; + T3C = T3y - T3B; + T52 = T3y + T3B; + } + } + { + E T1V, T3R, T20, T3S, T3Q, T3T, T26, T3M, T2b, T3N, T3L, T3O; + { + E T1S, T1U, T1R, T1T; + T1S = Ip[WS(rs, 2)]; + T1U = Im[WS(rs, 2)]; + T1R = W[8]; + T1T = W[9]; + T1V = FMA(T1R, T1S, T1T * T1U); + T3R = FNMS(T1T, T1S, T1R * T1U); + } + { + E T1X, T1Z, T1W, T1Y; + T1X = Ip[WS(rs, 10)]; + T1Z = Im[WS(rs, 10)]; + T1W = W[40]; + T1Y = W[41]; + T20 = FMA(T1W, T1X, T1Y * T1Z); + T3S = FNMS(T1Y, T1X, T1W * T1Z); + } + T3Q = T1V - T20; + T3T = T3R - T3S; + { + E T23, T25, T22, T24; + T23 = Ip[WS(rs, 14)]; + T25 = Im[WS(rs, 14)]; + T22 = W[56]; + T24 = W[57]; + T26 = FMA(T22, T23, T24 * T25); + T3M = FNMS(T24, T23, T22 * T25); + } + { + E T28, T2a, T27, T29; + T28 = Ip[WS(rs, 6)]; + T2a = Im[WS(rs, 6)]; + T27 = W[24]; + T29 = W[25]; + T2b = FMA(T27, T28, T29 * T2a); + T3N = FNMS(T29, T28, T27 * T2a); + } + T3L = T26 - T2b; + T3O = T3M - T3N; + { + E T21, T2c, T62, T63; + T21 = T1V + T20; + T2c = T26 + T2b; + T2d = T21 + T2c; + T5Z = T2c - T21; + T62 = T3R + T3S; + T63 = T3M + T3N; + T64 = T62 - T63; + T6K = T62 + T63; + } + { + E T3P, T3U, T42, T43; + T3P = T3L - T3O; + T3U = T3Q + T3T; + T3V = KP707106781 * (T3P - T3U); + T57 = KP707106781 * (T3U + T3P); + T42 = T3T - T3Q; + T43 = T3L + T3O; + T44 = KP707106781 * (T42 - T43); + T5a = KP707106781 * (T42 + T43); + } + } + { + E T2G, T4c, T2L, T4d, T4e, T4f, T2R, T4i, T2W, T4j, T4h, T4k; + { + E T2D, T2F, T2C, T2E; + T2D = Ip[WS(rs, 1)]; + T2F = Im[WS(rs, 1)]; + T2C = W[4]; + T2E = W[5]; + T2G = FMA(T2C, T2D, T2E * T2F); + T4c = FNMS(T2E, T2D, T2C * T2F); + } + { + E T2I, T2K, T2H, T2J; + T2I = Ip[WS(rs, 9)]; + T2K = Im[WS(rs, 9)]; + T2H = W[36]; + T2J = W[37]; + T2L = FMA(T2H, T2I, T2J * T2K); + T4d = FNMS(T2J, T2I, T2H * T2K); + } + T4e = T4c - T4d; + T4f = T2G - T2L; + { + E T2O, T2Q, T2N, T2P; + T2O = Ip[WS(rs, 13)]; + T2Q = Im[WS(rs, 13)]; + T2N = W[52]; + T2P = W[53]; + T2R = FMA(T2N, T2O, T2P * T2Q); + T4i = FNMS(T2P, T2O, T2N * T2Q); + } + { + E T2T, T2V, T2S, T2U; + T2T = Ip[WS(rs, 5)]; + T2V = Im[WS(rs, 5)]; + T2S = W[20]; + T2U = W[21]; + T2W = FMA(T2S, T2T, T2U * T2V); + T4j = FNMS(T2U, T2T, T2S * T2V); + } + T4h = T2R - T2W; + T4k = T4i - T4j; + { + E T2M, T2X, T68, T69; + T2M = T2G + T2L; + T2X = T2R + T2W; + T2Y = T2M + T2X; + T6f = T2X - T2M; + T68 = T4c + T4d; + T69 = T4i + T4j; + T6a = T68 - T69; + T6P = T68 + T69; + } + { + E T4g, T4l, T4t, T4u; + T4g = T4e - T4f; + T4l = T4h + T4k; + T4m = KP707106781 * (T4g - T4l); + T5h = KP707106781 * (T4g + T4l); + T4t = T4h - T4k; + T4u = T4f + T4e; + T4v = KP707106781 * (T4t - T4u); + T5e = KP707106781 * (T4u + T4t); + } + } + { + E T1t, T6X, T7a, T7c, T30, T7b, T70, T71; + { + E TH, T1s, T72, T79; + TH = Tj + TG; + T1s = T14 + T1r; + T1t = TH + T1s; + T6X = TH - T1s; + T72 = T6E + T6F; + T79 = T73 + T78; + T7a = T72 + T79; + T7c = T79 - T72; + } + { + E T2e, T2Z, T6Y, T6Z; + T2e = T1Q + T2d; + T2Z = T2B + T2Y; + T30 = T2e + T2Z; + T7b = T2Z - T2e; + T6Y = T6J + T6K; + T6Z = T6O + T6P; + T70 = T6Y - T6Z; + T71 = T6Y + T6Z; + } + Rm[WS(rs, 15)] = T1t - T30; + Im[WS(rs, 15)] = T71 - T7a; + Rp[0] = T1t + T30; + Ip[0] = T71 + T7a; + Rm[WS(rs, 7)] = T6X - T70; + Im[WS(rs, 7)] = T7b - T7c; + Rp[WS(rs, 8)] = T6X + T70; + Ip[WS(rs, 8)] = T7b + T7c; + } + { + E T6H, T6T, T7g, T7i, T6M, T6U, T6R, T6V; + { + E T6D, T6G, T7e, T7f; + T6D = Tj - TG; + T6G = T6E - T6F; + T6H = T6D + T6G; + T6T = T6D - T6G; + T7e = T1r - T14; + T7f = T78 - T73; + T7g = T7e + T7f; + T7i = T7f - T7e; + } + { + E T6I, T6L, T6N, T6Q; + T6I = T1Q - T2d; + T6L = T6J - T6K; + T6M = T6I + T6L; + T6U = T6L - T6I; + T6N = T2B - T2Y; + T6Q = T6O - T6P; + T6R = T6N - T6Q; + T6V = T6N + T6Q; + } + { + E T6S, T7d, T6W, T7h; + T6S = KP707106781 * (T6M + T6R); + Rm[WS(rs, 11)] = T6H - T6S; + Rp[WS(rs, 4)] = T6H + T6S; + T7d = KP707106781 * (T6U + T6V); + Im[WS(rs, 11)] = T7d - T7g; + Ip[WS(rs, 4)] = T7d + T7g; + T6W = KP707106781 * (T6U - T6V); + Rm[WS(rs, 3)] = T6T - T6W; + Rp[WS(rs, 12)] = T6T + T6W; + T7h = KP707106781 * (T6R - T6M); + Im[WS(rs, 3)] = T7h - T7i; + Ip[WS(rs, 12)] = T7h + T7i; + } + } + { + E T5J, T7n, T7t, T6n, T5U, T7k, T6x, T6B, T6q, T7s, T66, T6k, T6u, T6A, T6h; + E T6l; + { + E T5O, T5T, T60, T65; + T5J = T5F - T5I; + T7n = T7l + T7m; + T7t = T7m - T7l; + T6n = T5F + T5I; + T5O = T5M - T5N; + T5T = T5P + T5S; + T5U = KP707106781 * (T5O - T5T); + T7k = KP707106781 * (T5O + T5T); + { + E T6v, T6w, T6o, T6p; + T6v = T67 + T6a; + T6w = T6e + T6f; + T6x = FNMS(KP382683432, T6w, KP923879532 * T6v); + T6B = FMA(KP923879532, T6w, KP382683432 * T6v); + T6o = T5N + T5M; + T6p = T5P - T5S; + T6q = KP707106781 * (T6o + T6p); + T7s = KP707106781 * (T6p - T6o); + } + T60 = T5Y - T5Z; + T65 = T61 - T64; + T66 = FMA(KP923879532, T60, KP382683432 * T65); + T6k = FNMS(KP923879532, T65, KP382683432 * T60); + { + E T6s, T6t, T6b, T6g; + T6s = T5Y + T5Z; + T6t = T61 + T64; + T6u = FMA(KP382683432, T6s, KP923879532 * T6t); + T6A = FNMS(KP382683432, T6t, KP923879532 * T6s); + T6b = T67 - T6a; + T6g = T6e - T6f; + T6h = FNMS(KP923879532, T6g, KP382683432 * T6b); + T6l = FMA(KP382683432, T6g, KP923879532 * T6b); + } + } + { + E T5V, T6i, T7r, T7u; + T5V = T5J + T5U; + T6i = T66 + T6h; + Rm[WS(rs, 9)] = T5V - T6i; + Rp[WS(rs, 6)] = T5V + T6i; + T7r = T6k + T6l; + T7u = T7s + T7t; + Im[WS(rs, 9)] = T7r - T7u; + Ip[WS(rs, 6)] = T7r + T7u; + } + { + E T6j, T6m, T7v, T7w; + T6j = T5J - T5U; + T6m = T6k - T6l; + Rm[WS(rs, 1)] = T6j - T6m; + Rp[WS(rs, 14)] = T6j + T6m; + T7v = T6h - T66; + T7w = T7t - T7s; + Im[WS(rs, 1)] = T7v - T7w; + Ip[WS(rs, 14)] = T7v + T7w; + } + { + E T6r, T6y, T7j, T7o; + T6r = T6n + T6q; + T6y = T6u + T6x; + Rm[WS(rs, 13)] = T6r - T6y; + Rp[WS(rs, 2)] = T6r + T6y; + T7j = T6A + T6B; + T7o = T7k + T7n; + Im[WS(rs, 13)] = T7j - T7o; + Ip[WS(rs, 2)] = T7j + T7o; + } + { + E T6z, T6C, T7p, T7q; + T6z = T6n - T6q; + T6C = T6A - T6B; + Rm[WS(rs, 5)] = T6z - T6C; + Rp[WS(rs, 10)] = T6z + T6C; + T7p = T6x - T6u; + T7q = T7n - T7k; + Im[WS(rs, 5)] = T7p - T7q; + Ip[WS(rs, 10)] = T7p + T7q; + } + } + { + E T3h, T4D, T7R, T7X, T3E, T7O, T4N, T4R, T46, T4A, T4G, T7W, T4K, T4Q, T4x; + E T4B, T3g, T7P; + T3g = KP707106781 * (T3a - T3f); + T3h = T35 - T3g; + T4D = T35 + T3g; + T7P = KP707106781 * (T4V - T4U); + T7R = T7P + T7Q; + T7X = T7Q - T7P; + { + E T3s, T3D, T4L, T4M; + T3s = FNMS(KP923879532, T3r, KP382683432 * T3m); + T3D = FMA(KP382683432, T3x, KP923879532 * T3C); + T3E = T3s - T3D; + T7O = T3s + T3D; + T4L = T4b + T4m; + T4M = T4s + T4v; + T4N = FNMS(KP555570233, T4M, KP831469612 * T4L); + T4R = FMA(KP831469612, T4M, KP555570233 * T4L); + } + { + E T3W, T45, T4E, T4F; + T3W = T3K - T3V; + T45 = T41 - T44; + T46 = FMA(KP980785280, T3W, KP195090322 * T45); + T4A = FNMS(KP980785280, T45, KP195090322 * T3W); + T4E = FMA(KP923879532, T3m, KP382683432 * T3r); + T4F = FNMS(KP923879532, T3x, KP382683432 * T3C); + T4G = T4E + T4F; + T7W = T4F - T4E; + } + { + E T4I, T4J, T4n, T4w; + T4I = T3K + T3V; + T4J = T41 + T44; + T4K = FMA(KP555570233, T4I, KP831469612 * T4J); + T4Q = FNMS(KP555570233, T4J, KP831469612 * T4I); + T4n = T4b - T4m; + T4w = T4s - T4v; + T4x = FNMS(KP980785280, T4w, KP195090322 * T4n); + T4B = FMA(KP195090322, T4w, KP980785280 * T4n); + } + { + E T3F, T4y, T7V, T7Y; + T3F = T3h + T3E; + T4y = T46 + T4x; + Rm[WS(rs, 8)] = T3F - T4y; + Rp[WS(rs, 7)] = T3F + T4y; + T7V = T4A + T4B; + T7Y = T7W + T7X; + Im[WS(rs, 8)] = T7V - T7Y; + Ip[WS(rs, 7)] = T7V + T7Y; + } + { + E T4z, T4C, T7Z, T80; + T4z = T3h - T3E; + T4C = T4A - T4B; + Rm[0] = T4z - T4C; + Rp[WS(rs, 15)] = T4z + T4C; + T7Z = T4x - T46; + T80 = T7X - T7W; + Im[0] = T7Z - T80; + Ip[WS(rs, 15)] = T7Z + T80; + } + { + E T4H, T4O, T7N, T7S; + T4H = T4D + T4G; + T4O = T4K + T4N; + Rm[WS(rs, 12)] = T4H - T4O; + Rp[WS(rs, 3)] = T4H + T4O; + T7N = T4Q + T4R; + T7S = T7O + T7R; + Im[WS(rs, 12)] = T7N - T7S; + Ip[WS(rs, 3)] = T7N + T7S; + } + { + E T4P, T4S, T7T, T7U; + T4P = T4D - T4G; + T4S = T4Q - T4R; + Rm[WS(rs, 4)] = T4P - T4S; + Rp[WS(rs, 11)] = T4P + T4S; + T7T = T4N - T4K; + T7U = T7R - T7O; + Im[WS(rs, 4)] = T7T - T7U; + Ip[WS(rs, 11)] = T7T + T7U; + } + } + { + E T4X, T5p, T7D, T7J, T54, T7y, T5z, T5D, T5c, T5m, T5s, T7I, T5w, T5C, T5j; + E T5n, T4W, T7z; + T4W = KP707106781 * (T4U + T4V); + T4X = T4T - T4W; + T5p = T4T + T4W; + T7z = KP707106781 * (T3a + T3f); + T7D = T7z + T7C; + T7J = T7C - T7z; + { + E T50, T53, T5x, T5y; + T50 = FNMS(KP382683432, T4Z, KP923879532 * T4Y); + T53 = FMA(KP923879532, T51, KP382683432 * T52); + T54 = T50 - T53; + T7y = T50 + T53; + T5x = T5d + T5e; + T5y = T5g + T5h; + T5z = FNMS(KP195090322, T5y, KP980785280 * T5x); + T5D = FMA(KP195090322, T5x, KP980785280 * T5y); + } + { + E T58, T5b, T5q, T5r; + T58 = T56 - T57; + T5b = T59 - T5a; + T5c = FMA(KP555570233, T58, KP831469612 * T5b); + T5m = FNMS(KP831469612, T58, KP555570233 * T5b); + T5q = FMA(KP382683432, T4Y, KP923879532 * T4Z); + T5r = FNMS(KP382683432, T51, KP923879532 * T52); + T5s = T5q + T5r; + T7I = T5r - T5q; + } + { + E T5u, T5v, T5f, T5i; + T5u = T56 + T57; + T5v = T59 + T5a; + T5w = FMA(KP980785280, T5u, KP195090322 * T5v); + T5C = FNMS(KP195090322, T5u, KP980785280 * T5v); + T5f = T5d - T5e; + T5i = T5g - T5h; + T5j = FNMS(KP831469612, T5i, KP555570233 * T5f); + T5n = FMA(KP831469612, T5f, KP555570233 * T5i); + } + { + E T55, T5k, T7H, T7K; + T55 = T4X + T54; + T5k = T5c + T5j; + Rm[WS(rs, 10)] = T55 - T5k; + Rp[WS(rs, 5)] = T55 + T5k; + T7H = T5m + T5n; + T7K = T7I + T7J; + Im[WS(rs, 10)] = T7H - T7K; + Ip[WS(rs, 5)] = T7H + T7K; + } + { + E T5l, T5o, T7L, T7M; + T5l = T4X - T54; + T5o = T5m - T5n; + Rm[WS(rs, 2)] = T5l - T5o; + Rp[WS(rs, 13)] = T5l + T5o; + T7L = T5j - T5c; + T7M = T7J - T7I; + Im[WS(rs, 2)] = T7L - T7M; + Ip[WS(rs, 13)] = T7L + T7M; + } + { + E T5t, T5A, T7x, T7E; + T5t = T5p + T5s; + T5A = T5w + T5z; + Rm[WS(rs, 14)] = T5t - T5A; + Rp[WS(rs, 1)] = T5t + T5A; + T7x = T5C + T5D; + T7E = T7y + T7D; + Im[WS(rs, 14)] = T7x - T7E; + Ip[WS(rs, 1)] = T7x + T7E; + } + { + E T5B, T5E, T7F, T7G; + T5B = T5p - T5s; + T5E = T5C - T5D; + Rm[WS(rs, 6)] = T5B - T5E; + Rp[WS(rs, 9)] = T5B + T5E; + T7F = T5z - T5w; + T7G = T7D - T7y; + Im[WS(rs, 6)] = T7F - T7G; + Ip[WS(rs, 9)] = T7F + T7G; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 32}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 32, "hc2cf_32", twinstr, &GENUS, {340, 114, 94, 0} }; + +void X(codelet_hc2cf_32) (planner *p) { + X(khc2c_register) (p, hc2cf_32, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:21 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 4 -dit -name hc2cf_4 -include hc2cf.h */ + +/* + * This function contains 22 FP additions, 12 FP multiplications, + * (or, 16 additions, 6 multiplications, 6 fused multiply/add), + * 31 stack variables, 0 constants, and 16 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { + E To, Te, Tm, T8, Tw, Ty, Tq, Tk; + { + E T1, Tv, Tu, T7, Tg, Tj, Tf, Ti, Tp, Th; + T1 = Rp[0]; + Tv = Rm[0]; + { + E T3, T6, T2, T5; + T3 = Rp[WS(rs, 1)]; + T6 = Rm[WS(rs, 1)]; + T2 = W[2]; + T5 = W[3]; + { + E Ta, Td, Tc, Tn, Tb, Tt, T4, T9; + Ta = Ip[0]; + Td = Im[0]; + Tt = T2 * T6; + T4 = T2 * T3; + T9 = W[0]; + Tc = W[1]; + Tu = FNMS(T5, T3, Tt); + T7 = FMA(T5, T6, T4); + Tn = T9 * Td; + Tb = T9 * Ta; + Tg = Ip[WS(rs, 1)]; + Tj = Im[WS(rs, 1)]; + To = FNMS(Tc, Ta, Tn); + Te = FMA(Tc, Td, Tb); + Tf = W[4]; + Ti = W[5]; + } + } + Tm = T1 - T7; + T8 = T1 + T7; + Tw = Tu + Tv; + Ty = Tv - Tu; + Tp = Tf * Tj; + Th = Tf * Tg; + Tq = FNMS(Ti, Tg, Tp); + Tk = FMA(Ti, Tj, Th); + } + { + E Ts, Tr, Tl, Tx; + Ts = To + Tq; + Tr = To - Tq; + Tl = Te + Tk; + Tx = Tk - Te; + Rp[WS(rs, 1)] = Tm + Tr; + Rm[0] = Tm - Tr; + Ip[0] = Ts + Tw; + Im[WS(rs, 1)] = Ts - Tw; + Ip[WS(rs, 1)] = Tx + Ty; + Im[0] = Tx - Ty; + Rp[0] = T8 + Tl; + Rm[WS(rs, 1)] = T8 - Tl; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 4}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 4, "hc2cf_4", twinstr, &GENUS, {16, 6, 6, 0} }; + +void X(codelet_hc2cf_4) (planner *p) { + X(khc2c_register) (p, hc2cf_4, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -n 4 -dit -name hc2cf_4 -include hc2cf.h */ + +/* + * This function contains 22 FP additions, 12 FP multiplications, + * (or, 16 additions, 6 multiplications, 6 fused multiply/add), + * 13 stack variables, 0 constants, and 16 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { + E T1, Tp, T6, To, Tc, Tk, Th, Tl; + T1 = Rp[0]; + Tp = Rm[0]; + { + E T3, T5, T2, T4; + T3 = Rp[WS(rs, 1)]; + T5 = Rm[WS(rs, 1)]; + T2 = W[2]; + T4 = W[3]; + T6 = FMA(T2, T3, T4 * T5); + To = FNMS(T4, T3, T2 * T5); + } + { + E T9, Tb, T8, Ta; + T9 = Ip[0]; + Tb = Im[0]; + T8 = W[0]; + Ta = W[1]; + Tc = FMA(T8, T9, Ta * Tb); + Tk = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = Ip[WS(rs, 1)]; + Tg = Im[WS(rs, 1)]; + Td = W[4]; + Tf = W[5]; + Th = FMA(Td, Te, Tf * Tg); + Tl = FNMS(Tf, Te, Td * Tg); + } + { + E T7, Ti, Tn, Tq; + T7 = T1 + T6; + Ti = Tc + Th; + Rm[WS(rs, 1)] = T7 - Ti; + Rp[0] = T7 + Ti; + Tn = Tk + Tl; + Tq = To + Tp; + Im[WS(rs, 1)] = Tn - Tq; + Ip[0] = Tn + Tq; + } + { + E Tj, Tm, Tr, Ts; + Tj = T1 - T6; + Tm = Tk - Tl; + Rm[0] = Tj - Tm; + Rp[WS(rs, 1)] = Tj + Tm; + Tr = Th - Tc; + Ts = Tp - To; + Im[0] = Tr - Ts; + Ip[WS(rs, 1)] = Tr + Ts; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 4}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 4, "hc2cf_4", twinstr, &GENUS, {16, 6, 6, 0} }; + +void X(codelet_hc2cf_4) (planner *p) { + X(khc2c_register) (p, hc2cf_4, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:21 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 6 -dit -name hc2cf_6 -include hc2cf.h */ + +/* + * This function contains 46 FP additions, 32 FP multiplications, + * (or, 24 additions, 10 multiplications, 22 fused multiply/add), + * 47 stack variables, 2 constants, and 24 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 10, MAKE_VOLATILE_STRIDE(24, rs)) { + E TY, TU, T10, TZ; + { + E T1, TX, TW, T7, Tn, Tq, TJ, TS, TB, Tl, To, TK, Tt, Tw, Ts; + E Tp, Tv; + T1 = Rp[0]; + TX = Rm[0]; + { + E T3, T6, T2, T5; + T3 = Ip[WS(rs, 1)]; + T6 = Im[WS(rs, 1)]; + T2 = W[4]; + T5 = W[5]; + { + E Ta, Td, Tg, TF, Tb, Tj, Tf, Tc, Ti, TV, T4, T9; + Ta = Rp[WS(rs, 1)]; + Td = Rm[WS(rs, 1)]; + TV = T2 * T6; + T4 = T2 * T3; + T9 = W[2]; + Tg = Ip[WS(rs, 2)]; + TW = FNMS(T5, T3, TV); + T7 = FMA(T5, T6, T4); + TF = T9 * Td; + Tb = T9 * Ta; + Tj = Im[WS(rs, 2)]; + Tf = W[8]; + Tc = W[3]; + Ti = W[9]; + { + E TG, Te, TI, Tk, TH, Th, Tm; + Tn = Rp[WS(rs, 2)]; + TH = Tf * Tj; + Th = Tf * Tg; + TG = FNMS(Tc, Ta, TF); + Te = FMA(Tc, Td, Tb); + TI = FNMS(Ti, Tg, TH); + Tk = FMA(Ti, Tj, Th); + Tq = Rm[WS(rs, 2)]; + Tm = W[6]; + TJ = TG + TI; + TS = TI - TG; + TB = Te + Tk; + Tl = Te - Tk; + To = Tm * Tn; + TK = Tm * Tq; + } + Tt = Ip[0]; + Tw = Im[0]; + Ts = W[0]; + Tp = W[7]; + Tv = W[1]; + } + } + { + E TA, T8, TL, Tr, TN, Tx, T12, TM, Tu; + TA = T1 + T7; + T8 = T1 - T7; + TM = Ts * Tw; + Tu = Ts * Tt; + TL = FNMS(Tp, Tn, TK); + Tr = FMA(Tp, Tq, To); + TN = FNMS(Tv, Tt, TM); + Tx = FMA(Tv, Tw, Tu); + T12 = TX - TW; + TY = TW + TX; + { + E TP, TT, TD, TQ, TE, Tz, T14, T13; + { + E TO, TR, TC, Ty, T11; + TO = TL + TN; + TR = TN - TL; + TC = Tr + Tx; + Ty = Tr - Tx; + TP = TJ - TO; + TU = TJ + TO; + TT = TR - TS; + T11 = TS + TR; + Tz = Tl + Ty; + T14 = Ty - Tl; + Im[WS(rs, 2)] = T11 - T12; + T13 = FMA(KP500000000, T11, T12); + T10 = TB - TC; + TD = TB + TC; + } + Rm[WS(rs, 2)] = T8 + Tz; + TQ = FNMS(KP500000000, Tz, T8); + Im[0] = FMS(KP866025403, T14, T13); + Ip[WS(rs, 1)] = FMA(KP866025403, T14, T13); + TE = FNMS(KP500000000, TD, TA); + Rm[0] = FNMS(KP866025403, TT, TQ); + Rp[WS(rs, 1)] = FMA(KP866025403, TT, TQ); + Rp[0] = TA + TD; + Rm[WS(rs, 1)] = FMA(KP866025403, TP, TE); + Rp[WS(rs, 2)] = FNMS(KP866025403, TP, TE); + } + } + } + Ip[0] = TU + TY; + TZ = FNMS(KP500000000, TU, TY); + Im[WS(rs, 1)] = FMS(KP866025403, T10, TZ); + Ip[WS(rs, 2)] = FMA(KP866025403, T10, TZ); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 6}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 6, "hc2cf_6", twinstr, &GENUS, {24, 10, 22, 0} }; + +void X(codelet_hc2cf_6) (planner *p) { + X(khc2c_register) (p, hc2cf_6, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -n 6 -dit -name hc2cf_6 -include hc2cf.h */ + +/* + * This function contains 46 FP additions, 28 FP multiplications, + * (or, 32 additions, 14 multiplications, 14 fused multiply/add), + * 23 stack variables, 2 constants, and 24 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 10, MAKE_VOLATILE_STRIDE(24, rs)) { + E T7, TS, Tv, TO, Tt, TJ, Tx, TF, Ti, TI, Tw, TC; + { + E T1, TN, T6, TM; + T1 = Rp[0]; + TN = Rm[0]; + { + E T3, T5, T2, T4; + T3 = Ip[WS(rs, 1)]; + T5 = Im[WS(rs, 1)]; + T2 = W[4]; + T4 = W[5]; + T6 = FMA(T2, T3, T4 * T5); + TM = FNMS(T4, T3, T2 * T5); + } + T7 = T1 - T6; + TS = TN - TM; + Tv = T1 + T6; + TO = TM + TN; + } + { + E Tn, TD, Ts, TE; + { + E Tk, Tm, Tj, Tl; + Tk = Rp[WS(rs, 2)]; + Tm = Rm[WS(rs, 2)]; + Tj = W[6]; + Tl = W[7]; + Tn = FMA(Tj, Tk, Tl * Tm); + TD = FNMS(Tl, Tk, Tj * Tm); + } + { + E Tp, Tr, To, Tq; + Tp = Ip[0]; + Tr = Im[0]; + To = W[0]; + Tq = W[1]; + Ts = FMA(To, Tp, Tq * Tr); + TE = FNMS(Tq, Tp, To * Tr); + } + Tt = Tn - Ts; + TJ = TE - TD; + Tx = Tn + Ts; + TF = TD + TE; + } + { + E Tc, TA, Th, TB; + { + E T9, Tb, T8, Ta; + T9 = Rp[WS(rs, 1)]; + Tb = Rm[WS(rs, 1)]; + T8 = W[2]; + Ta = W[3]; + Tc = FMA(T8, T9, Ta * Tb); + TA = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = Ip[WS(rs, 2)]; + Tg = Im[WS(rs, 2)]; + Td = W[8]; + Tf = W[9]; + Th = FMA(Td, Te, Tf * Tg); + TB = FNMS(Tf, Te, Td * Tg); + } + Ti = Tc - Th; + TI = TA - TB; + Tw = Tc + Th; + TC = TA + TB; + } + { + E TK, Tu, TH, TT, TR, TU; + TK = KP866025403 * (TI + TJ); + Tu = Ti + Tt; + TH = FNMS(KP500000000, Tu, T7); + Rm[WS(rs, 2)] = T7 + Tu; + Rp[WS(rs, 1)] = TH + TK; + Rm[0] = TH - TK; + TT = KP866025403 * (Tt - Ti); + TR = TJ - TI; + TU = FMA(KP500000000, TR, TS); + Im[WS(rs, 2)] = TR - TS; + Ip[WS(rs, 1)] = TT + TU; + Im[0] = TT - TU; + } + { + E TG, Ty, Tz, TP, TL, TQ; + TG = KP866025403 * (TC - TF); + Ty = Tw + Tx; + Tz = FNMS(KP500000000, Ty, Tv); + Rp[0] = Tv + Ty; + Rm[WS(rs, 1)] = Tz + TG; + Rp[WS(rs, 2)] = Tz - TG; + TP = KP866025403 * (Tw - Tx); + TL = TC + TF; + TQ = FNMS(KP500000000, TL, TO); + Ip[0] = TL + TO; + Ip[WS(rs, 2)] = TP + TQ; + Im[WS(rs, 1)] = TP - TQ; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 6}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 6, "hc2cf_6", twinstr, &GENUS, {32, 14, 14, 0} }; + +void X(codelet_hc2cf_6) (planner *p) { + X(khc2c_register) (p, hc2cf_6, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cf_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,370 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:22 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 8 -dit -name hc2cf_8 -include hc2cf.h */ + +/* + * This function contains 66 FP additions, 36 FP multiplications, + * (or, 44 additions, 14 multiplications, 22 fused multiply/add), + * 61 stack variables, 1 constants, and 32 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(32, rs)) { + E T1g, T1f, T1e, Tm, T1q, T1o, T1p, TN, T1h, T1i; + { + E T1, T1m, T1l, T7, TS, Tk, TQ, Te, To, Tr, T17, TM, T12, Tu, TW; + E Tp, Tx, Tt, Tq, Tw; + { + E T3, T6, T2, T5; + T1 = Rp[0]; + T1m = Rm[0]; + T3 = Rp[WS(rs, 2)]; + T6 = Rm[WS(rs, 2)]; + T2 = W[6]; + T5 = W[7]; + { + E Ta, Td, T9, Tc; + { + E Tg, Tj, Ti, TR, Th, T1k, T4, Tf; + Tg = Rp[WS(rs, 3)]; + Tj = Rm[WS(rs, 3)]; + T1k = T2 * T6; + T4 = T2 * T3; + Tf = W[10]; + Ti = W[11]; + T1l = FNMS(T5, T3, T1k); + T7 = FMA(T5, T6, T4); + TR = Tf * Tj; + Th = Tf * Tg; + Ta = Rp[WS(rs, 1)]; + Td = Rm[WS(rs, 1)]; + TS = FNMS(Ti, Tg, TR); + Tk = FMA(Ti, Tj, Th); + T9 = W[2]; + Tc = W[3]; + } + { + E TB, TE, TH, T13, TC, TK, TG, TD, TJ, TP, Tb, TA, Tn; + TB = Ip[WS(rs, 3)]; + TE = Im[WS(rs, 3)]; + TP = T9 * Td; + Tb = T9 * Ta; + TA = W[12]; + TH = Ip[WS(rs, 1)]; + TQ = FNMS(Tc, Ta, TP); + Te = FMA(Tc, Td, Tb); + T13 = TA * TE; + TC = TA * TB; + TK = Im[WS(rs, 1)]; + TG = W[4]; + TD = W[13]; + TJ = W[5]; + { + E T14, TF, T16, TL, T15, TI; + To = Ip[0]; + T15 = TG * TK; + TI = TG * TH; + T14 = FNMS(TD, TB, T13); + TF = FMA(TD, TE, TC); + T16 = FNMS(TJ, TH, T15); + TL = FMA(TJ, TK, TI); + Tr = Im[0]; + Tn = W[0]; + T17 = T14 - T16; + T1g = T14 + T16; + TM = TF + TL; + T12 = TF - TL; + } + Tu = Ip[WS(rs, 2)]; + TW = Tn * Tr; + Tp = Tn * To; + Tx = Im[WS(rs, 2)]; + Tt = W[8]; + Tq = W[1]; + Tw = W[9]; + } + } + } + { + E T8, T1j, T1n, Tz, T1a, TU, Tl, T1b, T1c, T1v, T1t, T1w, T19, T1u, T1d; + { + E T1r, T10, TV, T1s, T11, T18; + { + E TO, TX, Ts, TZ, Ty, TT, TY, Tv; + T8 = T1 + T7; + TO = T1 - T7; + TY = Tt * Tx; + Tv = Tt * Tu; + TX = FNMS(Tq, To, TW); + Ts = FMA(Tq, Tr, Tp); + TZ = FNMS(Tw, Tu, TY); + Ty = FMA(Tw, Tx, Tv); + TT = TQ - TS; + T1j = TQ + TS; + T1n = T1l + T1m; + T1r = T1m - T1l; + T10 = TX - TZ; + T1f = TX + TZ; + Tz = Ts + Ty; + TV = Ts - Ty; + T1a = TO - TT; + TU = TO + TT; + T1s = Te - Tk; + Tl = Te + Tk; + } + T1b = T10 - TV; + T11 = TV + T10; + T18 = T12 - T17; + T1c = T12 + T17; + T1v = T1s + T1r; + T1t = T1r - T1s; + T1w = T18 - T11; + T19 = T11 + T18; + } + Ip[WS(rs, 3)] = FMA(KP707106781, T1w, T1v); + Im[0] = FMS(KP707106781, T1w, T1v); + Rp[WS(rs, 1)] = FMA(KP707106781, T19, TU); + Rm[WS(rs, 2)] = FNMS(KP707106781, T19, TU); + T1u = T1b + T1c; + T1d = T1b - T1c; + Ip[WS(rs, 1)] = FMA(KP707106781, T1u, T1t); + Im[WS(rs, 2)] = FMS(KP707106781, T1u, T1t); + Rp[WS(rs, 3)] = FMA(KP707106781, T1d, T1a); + Rm[0] = FNMS(KP707106781, T1d, T1a); + T1e = T8 - Tl; + Tm = T8 + Tl; + T1q = T1n - T1j; + T1o = T1j + T1n; + T1p = TM - Tz; + TN = Tz + TM; + } + } + Ip[WS(rs, 2)] = T1p + T1q; + Im[WS(rs, 1)] = T1p - T1q; + Rp[0] = Tm + TN; + Rm[WS(rs, 3)] = Tm - TN; + T1h = T1f - T1g; + T1i = T1f + T1g; + Ip[0] = T1i + T1o; + Im[WS(rs, 3)] = T1i - T1o; + Rp[WS(rs, 2)] = T1e + T1h; + Rm[WS(rs, 1)] = T1e - T1h; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 8}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 8, "hc2cf_8", twinstr, &GENUS, {44, 14, 22, 0} }; + +void X(codelet_hc2cf_8) (planner *p) { + X(khc2c_register) (p, hc2cf_8, &desc, HC2C_VIA_RDFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -n 8 -dit -name hc2cf_8 -include hc2cf.h */ + +/* + * This function contains 66 FP additions, 32 FP multiplications, + * (or, 52 additions, 18 multiplications, 14 fused multiply/add), + * 28 stack variables, 1 constants, and 32 memory accesses + */ +#include "hc2cf.h" + +static void hc2cf_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(32, rs)) { + E T7, T1e, TH, T19, TF, T13, TR, TU, Ti, T1f, TK, T16, Tu, T12, TM; + E TP; + { + E T1, T18, T6, T17; + T1 = Rp[0]; + T18 = Rm[0]; + { + E T3, T5, T2, T4; + T3 = Rp[WS(rs, 2)]; + T5 = Rm[WS(rs, 2)]; + T2 = W[6]; + T4 = W[7]; + T6 = FMA(T2, T3, T4 * T5); + T17 = FNMS(T4, T3, T2 * T5); + } + T7 = T1 + T6; + T1e = T18 - T17; + TH = T1 - T6; + T19 = T17 + T18; + } + { + E Tz, TS, TE, TT; + { + E Tw, Ty, Tv, Tx; + Tw = Ip[WS(rs, 3)]; + Ty = Im[WS(rs, 3)]; + Tv = W[12]; + Tx = W[13]; + Tz = FMA(Tv, Tw, Tx * Ty); + TS = FNMS(Tx, Tw, Tv * Ty); + } + { + E TB, TD, TA, TC; + TB = Ip[WS(rs, 1)]; + TD = Im[WS(rs, 1)]; + TA = W[4]; + TC = W[5]; + TE = FMA(TA, TB, TC * TD); + TT = FNMS(TC, TB, TA * TD); + } + TF = Tz + TE; + T13 = TS + TT; + TR = Tz - TE; + TU = TS - TT; + } + { + E Tc, TI, Th, TJ; + { + E T9, Tb, T8, Ta; + T9 = Rp[WS(rs, 1)]; + Tb = Rm[WS(rs, 1)]; + T8 = W[2]; + Ta = W[3]; + Tc = FMA(T8, T9, Ta * Tb); + TI = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = Rp[WS(rs, 3)]; + Tg = Rm[WS(rs, 3)]; + Td = W[10]; + Tf = W[11]; + Th = FMA(Td, Te, Tf * Tg); + TJ = FNMS(Tf, Te, Td * Tg); + } + Ti = Tc + Th; + T1f = Tc - Th; + TK = TI - TJ; + T16 = TI + TJ; + } + { + E To, TN, Tt, TO; + { + E Tl, Tn, Tk, Tm; + Tl = Ip[0]; + Tn = Im[0]; + Tk = W[0]; + Tm = W[1]; + To = FMA(Tk, Tl, Tm * Tn); + TN = FNMS(Tm, Tl, Tk * Tn); + } + { + E Tq, Ts, Tp, Tr; + Tq = Ip[WS(rs, 2)]; + Ts = Im[WS(rs, 2)]; + Tp = W[8]; + Tr = W[9]; + Tt = FMA(Tp, Tq, Tr * Ts); + TO = FNMS(Tr, Tq, Tp * Ts); + } + Tu = To + Tt; + T12 = TN + TO; + TM = To - Tt; + TP = TN - TO; + } + { + E Tj, TG, T1b, T1c; + Tj = T7 + Ti; + TG = Tu + TF; + Rm[WS(rs, 3)] = Tj - TG; + Rp[0] = Tj + TG; + { + E T15, T1a, T11, T14; + T15 = T12 + T13; + T1a = T16 + T19; + Im[WS(rs, 3)] = T15 - T1a; + Ip[0] = T15 + T1a; + T11 = T7 - Ti; + T14 = T12 - T13; + Rm[WS(rs, 1)] = T11 - T14; + Rp[WS(rs, 2)] = T11 + T14; + } + T1b = TF - Tu; + T1c = T19 - T16; + Im[WS(rs, 1)] = T1b - T1c; + Ip[WS(rs, 2)] = T1b + T1c; + { + E TX, T1g, T10, T1d, TY, TZ; + TX = TH - TK; + T1g = T1e - T1f; + TY = TP - TM; + TZ = TR + TU; + T10 = KP707106781 * (TY - TZ); + T1d = KP707106781 * (TY + TZ); + Rm[0] = TX - T10; + Ip[WS(rs, 1)] = T1d + T1g; + Rp[WS(rs, 3)] = TX + T10; + Im[WS(rs, 2)] = T1d - T1g; + } + { + E TL, T1i, TW, T1h, TQ, TV; + TL = TH + TK; + T1i = T1f + T1e; + TQ = TM + TP; + TV = TR - TU; + TW = KP707106781 * (TQ + TV); + T1h = KP707106781 * (TV - TQ); + Rm[WS(rs, 2)] = TL - TW; + Ip[WS(rs, 3)] = T1h + T1i; + Rp[WS(rs, 1)] = TL + TW; + Im[0] = T1h - T1i; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 8}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 8, "hc2cf_8", twinstr, &GENUS, {52, 18, 14, 0} }; + +void X(codelet_hc2cf_8) (planner *p) { + X(khc2c_register) (p, hc2cf_8, &desc, HC2C_VIA_RDFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft2_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft2_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,916 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:31 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 16 -dit -name hc2cfdft2_16 -include hc2cf.h */ + +/* + * This function contains 228 FP additions, 166 FP multiplications, + * (or, 136 additions, 74 multiplications, 92 fused multiply/add), + * 103 stack variables, 4 constants, and 64 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft2_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) { + E T4p, T4o, T4n, T4s; + { + E T1, T2, Tw, Ty, Th, T3, Tx, TE, Ti, TK, Tj, T4, T5; + T1 = W[0]; + T2 = W[2]; + Tw = W[6]; + Ty = W[7]; + Th = W[4]; + T3 = T1 * T2; + Tx = T1 * Tw; + TE = T1 * Ty; + Ti = T1 * Th; + TK = T2 * Th; + Tj = W[5]; + T4 = W[1]; + T5 = W[3]; + { + E T1v, T2q, T1s, T2s, T38, T3T, T1Y, T3P, T17, T1h, T2x, T2v, T33, T3Q, T3S; + E T1N, Tv, T3A, T2E, T3B, T3L, T2c, T3I, T2S, TW, T3E, T3J, T2n, T3D, T2J; + E T3M, T2X; + { + E TF, Tk, Tz, TL, T6, TR, Tq, Tc, T2h, T25, T2k, T29, T1G, T1M, T2P; + E T2R; + { + E T18, TY, T1d, T13, T1H, T1A, T1K, T1E, T37, T1R, T35, T1X; + { + E T1j, T1o, T1W, T1p, T1m, T1Q, T1U, T1q; + { + E T1k, T1l, T1S, T1T; + { + E T1t, T28, T24, T1D, T1z, T1u, TQ, Tp, Tb; + T1t = Ip[0]; + TQ = T2 * Tj; + Tp = T1 * Tj; + TF = FNMS(T4, Tw, TE); + T1j = FMA(T4, Tj, Ti); + Tk = FNMS(T4, Tj, Ti); + Tz = FMA(T4, Ty, Tx); + T18 = FNMS(T5, Tj, TK); + TL = FMA(T5, Tj, TK); + TY = FNMS(T4, T5, T3); + T6 = FMA(T4, T5, T3); + Tb = T1 * T5; + TR = FNMS(T5, Th, TQ); + T1d = FMA(T5, Th, TQ); + Tq = FMA(T4, Th, Tp); + T1o = FNMS(T4, Th, Tp); + T28 = T6 * Tj; + T24 = T6 * Th; + T1D = TY * Tj; + T1z = TY * Th; + Tc = FNMS(T4, T2, Tb); + T13 = FMA(T4, T2, Tb); + T1u = Im[0]; + T1k = Ip[WS(rs, 4)]; + T2h = FMA(Tc, Tj, T24); + T25 = FNMS(Tc, Tj, T24); + T2k = FNMS(Tc, Th, T28); + T29 = FMA(Tc, Th, T28); + T1H = FNMS(T13, Tj, T1z); + T1A = FMA(T13, Tj, T1z); + T1K = FMA(T13, Th, T1D); + T1E = FNMS(T13, Th, T1D); + T1W = T1t + T1u; + T1v = T1t - T1u; + T1l = Im[WS(rs, 4)]; + } + T1S = Rm[0]; + T1T = Rp[0]; + T1p = Rp[WS(rs, 4)]; + T1m = T1k - T1l; + T1Q = T1k + T1l; + T2q = T1T + T1S; + T1U = T1S - T1T; + T1q = Rm[WS(rs, 4)]; + } + { + E T36, T1V, T1O, T1r, T1n, T1P, T34, T2r; + T36 = T4 * T1U; + T1V = T1 * T1U; + T1O = T1q - T1p; + T1r = T1p + T1q; + T1n = T1j * T1m; + T37 = FMA(T1, T1W, T36); + T2r = T1j * T1r; + T1P = Th * T1O; + T34 = Tj * T1O; + T1s = FNMS(T1o, T1r, T1n); + T2s = FMA(T1o, T1m, T2r); + T1R = FNMS(Tj, T1Q, T1P); + T35 = FMA(Th, T1Q, T34); + T1X = FNMS(T4, T1W, T1V); + } + } + { + E T1F, T11, T1e, T16, T1L, T1b, T1f, T1C, T2Z; + { + E T14, T15, TZ, T10, T19, T1a, T1B; + TZ = Ip[WS(rs, 2)]; + T10 = Im[WS(rs, 2)]; + T38 = T35 + T37; + T3T = T37 - T35; + T1Y = T1R + T1X; + T3P = T1X - T1R; + T1F = TZ + T10; + T11 = TZ - T10; + T14 = Rp[WS(rs, 2)]; + T15 = Rm[WS(rs, 2)]; + T19 = Ip[WS(rs, 6)]; + T1a = Im[WS(rs, 6)]; + T1e = Rp[WS(rs, 6)]; + T16 = T14 + T15; + T1B = T15 - T14; + T1L = T19 + T1a; + T1b = T19 - T1a; + T1f = Rm[WS(rs, 6)]; + T1C = T1A * T1B; + T2Z = T1E * T1B; + } + { + E T1J, T31, T2u, T30, T32; + { + E T12, T1g, T1I, T1c, T2w; + T12 = TY * T11; + T1g = T1e + T1f; + T1I = T1f - T1e; + T1c = T18 * T1b; + T17 = FNMS(T13, T16, T12); + T2w = T18 * T1g; + T1J = T1H * T1I; + T31 = T1K * T1I; + T1h = FNMS(T1d, T1g, T1c); + T2x = FMA(T1d, T1b, T2w); + } + T2u = TY * T16; + T30 = FMA(T1A, T1F, T2Z); + T32 = FMA(T1H, T1L, T31); + T1G = FNMS(T1E, T1F, T1C); + T2v = FMA(T13, T11, T2u); + T1M = FNMS(T1K, T1L, T1J); + T33 = T30 + T32; + T3Q = T30 - T32; + } + } + } + { + E Tl, T22, T9, T20, Tf, T2O, Ta, T21, T2A, Tm, Tr, Ts; + { + E T7, T8, Td, Te; + T7 = Ip[WS(rs, 1)]; + T3S = T1G - T1M; + T1N = T1G + T1M; + T8 = Im[WS(rs, 1)]; + Td = Rp[WS(rs, 1)]; + Te = Rm[WS(rs, 1)]; + Tl = Ip[WS(rs, 5)]; + T22 = T7 + T8; + T9 = T7 - T8; + T20 = Td - Te; + Tf = Td + Te; + T2O = T2 * T22; + Ta = T6 * T9; + T21 = T2 * T20; + T2A = T6 * Tf; + Tm = Im[WS(rs, 5)]; + Tr = Rp[WS(rs, 5)]; + Ts = Rm[WS(rs, 5)]; + } + { + E Tg, T2a, Tn, T26, T2Q, T27, T2C, T2B, Tu, Tt, To, T23, T2D, T2b; + Tg = FNMS(Tc, Tf, Ta); + T2a = Tl + Tm; + Tn = Tl - Tm; + T26 = Tr - Ts; + Tt = Tr + Ts; + T2Q = T25 * T2a; + To = Tk * Tn; + T27 = T25 * T26; + T2C = Tk * Tt; + T2B = FMA(Tc, T9, T2A); + Tu = FNMS(Tq, Tt, To); + T23 = FMA(T5, T22, T21); + T2D = FMA(Tq, Tn, T2C); + T2b = FMA(T29, T2a, T27); + Tv = Tg + Tu; + T3A = Tg - Tu; + T2P = FNMS(T5, T20, T2O); + T2E = T2B + T2D; + T3B = T2B - T2D; + T3L = T2b - T23; + T2c = T23 + T2b; + T2R = FNMS(T29, T26, T2Q); + } + } + { + E T2f, TC, T2T, TD, T2d, TI, TS, T2e, T2F, T2l, TO, TT; + { + E TG, TH, TA, TB, TM, TN; + TA = Ip[WS(rs, 7)]; + TB = Im[WS(rs, 7)]; + TG = Rp[WS(rs, 7)]; + T3I = T2R - T2P; + T2S = T2P + T2R; + T2f = TA + TB; + TC = TA - TB; + TH = Rm[WS(rs, 7)]; + TM = Ip[WS(rs, 3)]; + T2T = Tw * T2f; + TD = Tz * TC; + T2d = TG - TH; + TI = TG + TH; + TN = Im[WS(rs, 3)]; + TS = Rp[WS(rs, 3)]; + T2e = Tw * T2d; + T2F = Tz * TI; + T2l = TM + TN; + TO = TM - TN; + TT = Rm[WS(rs, 3)]; + } + { + E TJ, T2V, TP, T2i, TU, T2G; + TJ = FNMS(TF, TI, TD); + T2V = T2h * T2l; + TP = TL * TO; + T2i = TS - TT; + TU = TS + TT; + T2G = FMA(TF, TC, T2F); + { + E T2g, T2j, TV, T2H; + T2g = FMA(Ty, T2f, T2e); + T2j = T2h * T2i; + TV = FNMS(TR, TU, TP); + T2H = TL * TU; + { + E T2U, T2m, T2I, T2W; + T2U = FNMS(Ty, T2d, T2T); + T2m = FMA(T2k, T2l, T2j); + TW = TJ + TV; + T3E = TJ - TV; + T2I = FMA(TR, TO, T2H); + T2W = FNMS(T2k, T2i, T2V); + T3J = T2m - T2g; + T2n = T2g + T2m; + T3D = T2G - T2I; + T2J = T2G + T2I; + T3M = T2U - T2W; + T2X = T2U + T2W; + } + } + } + } + } + { + E T3Y, T3x, T3X, T3y, T3r, T3q, T3p, T3u; + { + E T2Y, T3o, TX, T3s, T3i, T39, T3t, T3l, T3e, T1x, T2M, T2p, T3d, T2K, T2t; + E T2y; + { + E T2o, T1Z, T3j, T3k, T1i, T1w, T3g, T3h; + T2Y = T2S + T2X; + T3g = T2X - T2S; + T3h = T2c - T2n; + T2o = T2c + T2n; + T1Z = T1N + T1Y; + T3j = T1Y - T1N; + T3o = Tv - TW; + TX = Tv + TW; + T3s = T3g - T3h; + T3i = T3g + T3h; + T3k = T38 - T33; + T39 = T33 + T38; + T3Y = T17 - T1h; + T1i = T17 + T1h; + T1w = T1s + T1v; + T3x = T1v - T1s; + T3t = T3j + T3k; + T3l = T3j - T3k; + T3e = T1w - T1i; + T1x = T1i + T1w; + T2M = T2o + T1Z; + T2p = T1Z - T2o; + T3d = T2J - T2E; + T2K = T2E + T2J; + T3X = T2q - T2s; + T2t = T2q + T2s; + T2y = T2v + T2x; + T3y = T2v - T2x; + } + { + E T2N, T3c, T3a, T3n, T3b, T2L, T2z, T1y; + T2N = T1x - TX; + T1y = TX + T1x; + T3c = T2Y + T39; + T3a = T2Y - T39; + T3n = T2t - T2y; + T2z = T2t + T2y; + Ip[0] = KP500000000 * (T1y + T2p); + Im[WS(rs, 7)] = KP500000000 * (T2p - T1y); + T3b = T2z + T2K; + T2L = T2z - T2K; + { + E T3f, T3m, T3v, T3w; + T3r = T3e - T3d; + T3f = T3d + T3e; + Im[WS(rs, 3)] = KP500000000 * (T3a - T2N); + Ip[WS(rs, 4)] = KP500000000 * (T2N + T3a); + Rp[WS(rs, 4)] = KP500000000 * (T2L + T2M); + Rm[WS(rs, 3)] = KP500000000 * (T2L - T2M); + Rp[0] = KP500000000 * (T3b + T3c); + Rm[WS(rs, 7)] = KP500000000 * (T3b - T3c); + T3m = T3i + T3l; + T3q = T3l - T3i; + T3p = T3n - T3o; + T3v = T3n + T3o; + T3w = T3s + T3t; + T3u = T3s - T3t; + Im[WS(rs, 5)] = -(KP500000000 * (FNMS(KP707106781, T3m, T3f))); + Ip[WS(rs, 2)] = KP500000000 * (FMA(KP707106781, T3m, T3f)); + Rp[WS(rs, 2)] = KP500000000 * (FMA(KP707106781, T3w, T3v)); + Rm[WS(rs, 5)] = KP500000000 * (FNMS(KP707106781, T3w, T3v)); + } + } + } + { + E T3R, T4b, T3z, T4q, T4g, T3U, T40, T41, T4r, T4j, T4m, T3G, T46, T3O, T4l; + E T3Z, T4c; + { + E T3K, T3N, T4h, T4i, T3C, T3F, T4e, T4f; + Rp[WS(rs, 6)] = KP500000000 * (FMA(KP707106781, T3q, T3p)); + Rm[WS(rs, 1)] = KP500000000 * (FNMS(KP707106781, T3q, T3p)); + Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP707106781, T3u, T3r))); + Ip[WS(rs, 6)] = KP500000000 * (FMA(KP707106781, T3u, T3r)); + T3K = T3I + T3J; + T4e = T3I - T3J; + T4f = T3M - T3L; + T3N = T3L + T3M; + T3R = T3P - T3Q; + T4h = T3Q + T3P; + T4b = T3y + T3x; + T3z = T3x - T3y; + T4q = FNMS(KP414213562, T4e, T4f); + T4g = FMA(KP414213562, T4f, T4e); + T4i = T3T - T3S; + T3U = T3S + T3T; + T40 = T3B + T3A; + T3C = T3A - T3B; + T3F = T3D + T3E; + T41 = T3D - T3E; + T4r = FNMS(KP414213562, T4h, T4i); + T4j = FMA(KP414213562, T4i, T4h); + T4m = T3C - T3F; + T3G = T3C + T3F; + T46 = FNMS(KP414213562, T3K, T3N); + T3O = FMA(KP414213562, T3N, T3K); + T4l = T3X - T3Y; + T3Z = T3X + T3Y; + } + { + E T45, T3H, T42, T47, T3V; + T45 = FNMS(KP707106781, T3G, T3z); + T3H = FMA(KP707106781, T3G, T3z); + T4c = T41 - T40; + T42 = T40 + T41; + T47 = FMA(KP414213562, T3R, T3U); + T3V = FNMS(KP414213562, T3U, T3R); + { + E T49, T43, T48, T4a, T44, T3W; + T49 = FMA(KP707106781, T42, T3Z); + T43 = FNMS(KP707106781, T42, T3Z); + T48 = T46 - T47; + T4a = T46 + T47; + T44 = T3V - T3O; + T3W = T3O + T3V; + Rp[WS(rs, 1)] = KP500000000 * (FMA(KP923879532, T4a, T49)); + Rm[WS(rs, 6)] = KP500000000 * (FNMS(KP923879532, T4a, T49)); + Rp[WS(rs, 5)] = KP500000000 * (FMA(KP923879532, T44, T43)); + Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP923879532, T44, T43)); + Im[WS(rs, 6)] = -(KP500000000 * (FNMS(KP923879532, T3W, T3H))); + Ip[WS(rs, 1)] = KP500000000 * (FMA(KP923879532, T3W, T3H)); + Ip[WS(rs, 5)] = KP500000000 * (FMA(KP923879532, T48, T45)); + Im[WS(rs, 2)] = -(KP500000000 * (FNMS(KP923879532, T48, T45))); + } + } + { + E T4d, T4k, T4t, T4u; + T4p = FMA(KP707106781, T4c, T4b); + T4d = FNMS(KP707106781, T4c, T4b); + T4k = T4g - T4j; + T4o = T4g + T4j; + T4n = FMA(KP707106781, T4m, T4l); + T4t = FNMS(KP707106781, T4m, T4l); + T4u = T4q + T4r; + T4s = T4q - T4r; + Im[0] = -(KP500000000 * (FNMS(KP923879532, T4k, T4d))); + Ip[WS(rs, 7)] = KP500000000 * (FMA(KP923879532, T4k, T4d)); + Rm[0] = KP500000000 * (FMA(KP923879532, T4u, T4t)); + Rp[WS(rs, 7)] = KP500000000 * (FNMS(KP923879532, T4u, T4t)); + } + } + } + } + } + Rp[WS(rs, 3)] = KP500000000 * (FMA(KP923879532, T4o, T4n)); + Rm[WS(rs, 4)] = KP500000000 * (FNMS(KP923879532, T4o, T4n)); + Im[WS(rs, 4)] = -(KP500000000 * (FNMS(KP923879532, T4s, T4p))); + Ip[WS(rs, 3)] = KP500000000 * (FMA(KP923879532, T4s, T4p)); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 15}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 16, "hc2cfdft2_16", twinstr, &GENUS, {136, 74, 92, 0} }; + +void X(codelet_hc2cfdft2_16) (planner *p) { + X(khc2c_register) (p, hc2cfdft2_16, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 16 -dit -name hc2cfdft2_16 -include hc2cf.h */ + +/* + * This function contains 228 FP additions, 124 FP multiplications, + * (or, 188 additions, 84 multiplications, 40 fused multiply/add), + * 91 stack variables, 4 constants, and 64 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft2_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP461939766, +0.461939766255643378064091594698394143411208313); + DK(KP191341716, +0.191341716182544885864229992015199433380672281); + DK(KP353553390, +0.353553390593273762200422181052424519642417969); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) { + E T1, T4, T2, T5, T7, Td, T12, TY, Tk, Ti, Tm, T1l, T1b, TL, T1h; + E Ts, TR, T17, Ty, Tz, TA, TE, T1L, T1Q, T1H, T1O, T24, T2d, T20, T2b; + { + E Tl, TP, Tq, TK, Tj, TQ, Tr, TJ; + { + E T3, Tc, T6, Tb; + T1 = W[0]; + T4 = W[1]; + T2 = W[2]; + T5 = W[3]; + T3 = T1 * T2; + Tc = T4 * T2; + T6 = T4 * T5; + Tb = T1 * T5; + T7 = T3 + T6; + Td = Tb - Tc; + T12 = Tb + Tc; + TY = T3 - T6; + Tk = W[5]; + Tl = T4 * Tk; + TP = T2 * Tk; + Tq = T1 * Tk; + TK = T5 * Tk; + Ti = W[4]; + Tj = T1 * Ti; + TQ = T5 * Ti; + Tr = T4 * Ti; + TJ = T2 * Ti; + } + Tm = Tj - Tl; + T1l = Tq - Tr; + T1b = TP + TQ; + TL = TJ + TK; + T1h = Tj + Tl; + Ts = Tq + Tr; + TR = TP - TQ; + T17 = TJ - TK; + Ty = W[6]; + Tz = W[7]; + TA = FMA(T1, Ty, T4 * Tz); + TE = FNMS(T4, Ty, T1 * Tz); + { + E T1J, T1K, T1F, T1G; + T1J = TY * Tk; + T1K = T12 * Ti; + T1L = T1J - T1K; + T1Q = T1J + T1K; + T1F = TY * Ti; + T1G = T12 * Tk; + T1H = T1F + T1G; + T1O = T1F - T1G; + } + { + E T22, T23, T1Y, T1Z; + T22 = T7 * Tk; + T23 = Td * Ti; + T24 = T22 + T23; + T2d = T22 - T23; + T1Y = T7 * Ti; + T1Z = Td * Tk; + T20 = T1Y - T1Z; + T2b = T1Y + T1Z; + } + } + { + E T1t, T3i, T2l, T3B, T1E, T3t, T2M, T3x, T1g, T3C, T2J, T3u, T1T, T3w, T2o; + E T3j, Tx, T3b, T2C, T3q, T27, T3m, T2s, T3c, TW, T3f, T2F, T3n, T2g, T3p; + E T2v, T3e; + { + E T1k, T1C, T1o, T1B, T1s, T1z, T1y, T2j, T1p, T2k; + { + E T1i, T1j, T1m, T1n; + T1i = Ip[WS(rs, 4)]; + T1j = Im[WS(rs, 4)]; + T1k = T1i - T1j; + T1C = T1i + T1j; + T1m = Rp[WS(rs, 4)]; + T1n = Rm[WS(rs, 4)]; + T1o = T1m + T1n; + T1B = T1m - T1n; + } + { + E T1q, T1r, T1w, T1x; + T1q = Ip[0]; + T1r = Im[0]; + T1s = T1q - T1r; + T1z = T1q + T1r; + T1w = Rm[0]; + T1x = Rp[0]; + T1y = T1w - T1x; + T2j = T1x + T1w; + } + T1p = FNMS(T1l, T1o, T1h * T1k); + T1t = T1p + T1s; + T3i = T1s - T1p; + T2k = FMA(T1h, T1o, T1l * T1k); + T2l = T2j + T2k; + T3B = T2j - T2k; + { + E T1A, T1D, T2K, T2L; + T1A = FNMS(T4, T1z, T1 * T1y); + T1D = FMA(Ti, T1B, Tk * T1C); + T1E = T1A - T1D; + T3t = T1D + T1A; + T2K = FNMS(Tk, T1B, Ti * T1C); + T2L = FMA(T4, T1y, T1 * T1z); + T2M = T2K + T2L; + T3x = T2L - T2K; + } + } + { + E T11, T1M, T15, T1I, T1a, T1R, T1e, T1P; + { + E TZ, T10, T13, T14; + TZ = Ip[WS(rs, 2)]; + T10 = Im[WS(rs, 2)]; + T11 = TZ - T10; + T1M = TZ + T10; + T13 = Rp[WS(rs, 2)]; + T14 = Rm[WS(rs, 2)]; + T15 = T13 + T14; + T1I = T13 - T14; + } + { + E T18, T19, T1c, T1d; + T18 = Ip[WS(rs, 6)]; + T19 = Im[WS(rs, 6)]; + T1a = T18 - T19; + T1R = T18 + T19; + T1c = Rp[WS(rs, 6)]; + T1d = Rm[WS(rs, 6)]; + T1e = T1c + T1d; + T1P = T1c - T1d; + } + { + E T16, T1f, T2H, T2I; + T16 = FNMS(T12, T15, TY * T11); + T1f = FNMS(T1b, T1e, T17 * T1a); + T1g = T16 + T1f; + T3C = T16 - T1f; + T2H = FNMS(T1L, T1I, T1H * T1M); + T2I = FNMS(T1Q, T1P, T1O * T1R); + T2J = T2H + T2I; + T3u = T2H - T2I; + } + { + E T1N, T1S, T2m, T2n; + T1N = FMA(T1H, T1I, T1L * T1M); + T1S = FMA(T1O, T1P, T1Q * T1R); + T1T = T1N + T1S; + T3w = T1S - T1N; + T2m = FMA(TY, T15, T12 * T11); + T2n = FMA(T17, T1e, T1b * T1a); + T2o = T2m + T2n; + T3j = T2m - T2n; + } + } + { + E Ta, T1W, Tg, T1V, Tp, T25, Tv, T21; + { + E T8, T9, Te, Tf; + T8 = Ip[WS(rs, 1)]; + T9 = Im[WS(rs, 1)]; + Ta = T8 - T9; + T1W = T8 + T9; + Te = Rp[WS(rs, 1)]; + Tf = Rm[WS(rs, 1)]; + Tg = Te + Tf; + T1V = Te - Tf; + } + { + E Tn, To, Tt, Tu; + Tn = Ip[WS(rs, 5)]; + To = Im[WS(rs, 5)]; + Tp = Tn - To; + T25 = Tn + To; + Tt = Rp[WS(rs, 5)]; + Tu = Rm[WS(rs, 5)]; + Tv = Tt + Tu; + T21 = Tt - Tu; + } + { + E Th, Tw, T2A, T2B; + Th = FNMS(Td, Tg, T7 * Ta); + Tw = FNMS(Ts, Tv, Tm * Tp); + Tx = Th + Tw; + T3b = Th - Tw; + T2A = FNMS(T5, T1V, T2 * T1W); + T2B = FNMS(T24, T21, T20 * T25); + T2C = T2A + T2B; + T3q = T2A - T2B; + } + { + E T1X, T26, T2q, T2r; + T1X = FMA(T2, T1V, T5 * T1W); + T26 = FMA(T20, T21, T24 * T25); + T27 = T1X + T26; + T3m = T26 - T1X; + T2q = FMA(T7, Tg, Td * Ta); + T2r = FMA(Tm, Tv, Ts * Tp); + T2s = T2q + T2r; + T3c = T2q - T2r; + } + } + { + E TD, T29, TH, T28, TO, T2e, TU, T2c; + { + E TB, TC, TF, TG; + TB = Ip[WS(rs, 7)]; + TC = Im[WS(rs, 7)]; + TD = TB - TC; + T29 = TB + TC; + TF = Rp[WS(rs, 7)]; + TG = Rm[WS(rs, 7)]; + TH = TF + TG; + T28 = TF - TG; + } + { + E TM, TN, TS, TT; + TM = Ip[WS(rs, 3)]; + TN = Im[WS(rs, 3)]; + TO = TM - TN; + T2e = TM + TN; + TS = Rp[WS(rs, 3)]; + TT = Rm[WS(rs, 3)]; + TU = TS + TT; + T2c = TS - TT; + } + { + E TI, TV, T2D, T2E; + TI = FNMS(TE, TH, TA * TD); + TV = FNMS(TR, TU, TL * TO); + TW = TI + TV; + T3f = TI - TV; + T2D = FNMS(Tz, T28, Ty * T29); + T2E = FNMS(T2d, T2c, T2b * T2e); + T2F = T2D + T2E; + T3n = T2D - T2E; + } + { + E T2a, T2f, T2t, T2u; + T2a = FMA(Ty, T28, Tz * T29); + T2f = FMA(T2b, T2c, T2d * T2e); + T2g = T2a + T2f; + T3p = T2f - T2a; + T2t = FMA(TA, TH, TE * TD); + T2u = FMA(TL, TU, TR * TO); + T2v = T2t + T2u; + T3e = T2t - T2u; + } + } + { + E T1v, T2z, T2O, T2Q, T2i, T2y, T2x, T2P; + { + E TX, T1u, T2G, T2N; + TX = Tx + TW; + T1u = T1g + T1t; + T1v = TX + T1u; + T2z = T1u - TX; + T2G = T2C + T2F; + T2N = T2J + T2M; + T2O = T2G - T2N; + T2Q = T2G + T2N; + } + { + E T1U, T2h, T2p, T2w; + T1U = T1E - T1T; + T2h = T27 + T2g; + T2i = T1U - T2h; + T2y = T2h + T1U; + T2p = T2l + T2o; + T2w = T2s + T2v; + T2x = T2p - T2w; + T2P = T2p + T2w; + } + Ip[0] = KP500000000 * (T1v + T2i); + Rp[0] = KP500000000 * (T2P + T2Q); + Im[WS(rs, 7)] = KP500000000 * (T2i - T1v); + Rm[WS(rs, 7)] = KP500000000 * (T2P - T2Q); + Rm[WS(rs, 3)] = KP500000000 * (T2x - T2y); + Im[WS(rs, 3)] = KP500000000 * (T2O - T2z); + Rp[WS(rs, 4)] = KP500000000 * (T2x + T2y); + Ip[WS(rs, 4)] = KP500000000 * (T2z + T2O); + } + { + E T2T, T35, T33, T39, T2W, T36, T2Z, T37; + { + E T2R, T2S, T31, T32; + T2R = T2v - T2s; + T2S = T1t - T1g; + T2T = KP500000000 * (T2R + T2S); + T35 = KP500000000 * (T2S - T2R); + T31 = T2l - T2o; + T32 = Tx - TW; + T33 = KP500000000 * (T31 - T32); + T39 = KP500000000 * (T31 + T32); + } + { + E T2U, T2V, T2X, T2Y; + T2U = T2F - T2C; + T2V = T27 - T2g; + T2W = T2U + T2V; + T36 = T2U - T2V; + T2X = T1T + T1E; + T2Y = T2M - T2J; + T2Z = T2X - T2Y; + T37 = T2X + T2Y; + } + { + E T30, T3a, T34, T38; + T30 = KP353553390 * (T2W + T2Z); + Ip[WS(rs, 2)] = T2T + T30; + Im[WS(rs, 5)] = T30 - T2T; + T3a = KP353553390 * (T36 + T37); + Rm[WS(rs, 5)] = T39 - T3a; + Rp[WS(rs, 2)] = T39 + T3a; + T34 = KP353553390 * (T2Z - T2W); + Rm[WS(rs, 1)] = T33 - T34; + Rp[WS(rs, 6)] = T33 + T34; + T38 = KP353553390 * (T36 - T37); + Ip[WS(rs, 6)] = T35 + T38; + Im[WS(rs, 1)] = T38 - T35; + } + } + { + E T3k, T3Q, T3Z, T3D, T3h, T40, T3X, T45, T3G, T3P, T3s, T3K, T3U, T44, T3z; + E T3L; + { + E T3d, T3g, T3o, T3r; + T3k = KP500000000 * (T3i - T3j); + T3Q = KP500000000 * (T3j + T3i); + T3Z = KP500000000 * (T3B - T3C); + T3D = KP500000000 * (T3B + T3C); + T3d = T3b - T3c; + T3g = T3e + T3f; + T3h = KP353553390 * (T3d + T3g); + T40 = KP353553390 * (T3d - T3g); + { + E T3V, T3W, T3E, T3F; + T3V = T3u + T3t; + T3W = T3x - T3w; + T3X = FNMS(KP461939766, T3W, KP191341716 * T3V); + T45 = FMA(KP461939766, T3V, KP191341716 * T3W); + T3E = T3c + T3b; + T3F = T3e - T3f; + T3G = KP353553390 * (T3E + T3F); + T3P = KP353553390 * (T3F - T3E); + } + T3o = T3m + T3n; + T3r = T3p - T3q; + T3s = FMA(KP191341716, T3o, KP461939766 * T3r); + T3K = FNMS(KP191341716, T3r, KP461939766 * T3o); + { + E T3S, T3T, T3v, T3y; + T3S = T3n - T3m; + T3T = T3q + T3p; + T3U = FMA(KP461939766, T3S, KP191341716 * T3T); + T44 = FNMS(KP461939766, T3T, KP191341716 * T3S); + T3v = T3t - T3u; + T3y = T3w + T3x; + T3z = FNMS(KP191341716, T3y, KP461939766 * T3v); + T3L = FMA(KP191341716, T3v, KP461939766 * T3y); + } + } + { + E T3l, T3A, T3N, T3O; + T3l = T3h + T3k; + T3A = T3s + T3z; + Ip[WS(rs, 1)] = T3l + T3A; + Im[WS(rs, 6)] = T3A - T3l; + T3N = T3D + T3G; + T3O = T3K + T3L; + Rm[WS(rs, 6)] = T3N - T3O; + Rp[WS(rs, 1)] = T3N + T3O; + } + { + E T3H, T3I, T3J, T3M; + T3H = T3D - T3G; + T3I = T3z - T3s; + Rm[WS(rs, 2)] = T3H - T3I; + Rp[WS(rs, 5)] = T3H + T3I; + T3J = T3k - T3h; + T3M = T3K - T3L; + Ip[WS(rs, 5)] = T3J + T3M; + Im[WS(rs, 2)] = T3M - T3J; + } + { + E T3R, T3Y, T47, T48; + T3R = T3P + T3Q; + T3Y = T3U + T3X; + Ip[WS(rs, 3)] = T3R + T3Y; + Im[WS(rs, 4)] = T3Y - T3R; + T47 = T3Z + T40; + T48 = T44 + T45; + Rm[WS(rs, 4)] = T47 - T48; + Rp[WS(rs, 3)] = T47 + T48; + } + { + E T41, T42, T43, T46; + T41 = T3Z - T40; + T42 = T3X - T3U; + Rm[0] = T41 - T42; + Rp[WS(rs, 7)] = T41 + T42; + T43 = T3Q - T3P; + T46 = T44 - T45; + Ip[WS(rs, 7)] = T43 + T46; + Im[0] = T46 - T43; + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 15}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 16, "hc2cfdft2_16", twinstr, &GENUS, {188, 84, 40, 0} }; + +void X(codelet_hc2cfdft2_16) (planner *p) { + X(khc2c_register) (p, hc2cfdft2_16, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft2_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft2_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1191 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:33 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 20 -dit -name hc2cfdft2_20 -include hc2cf.h */ + +/* + * This function contains 316 FP additions, 238 FP multiplications, + * (or, 176 additions, 98 multiplications, 140 fused multiply/add), + * 180 stack variables, 5 constants, and 80 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft2_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(80, rs)) { + E T5h, T5C, T5E, T5y, T5w, T5x, T5D, T5z; + { + E Tm, Tq, Tn, T1, T6, Tg, Tp, Tb, T1i, TU, Tr, TW, Tx, T2B, T1A; + E T1u, T2y, T33, T26, T1o, T30, T22, TD, T1Q, T2a, T2e, T2V, T2R, TG, T1V; + E TV, TH, TN, T2t, T12, T2p; + { + E Tw, To, T29, T1h, T1n, T2d, TC, T2U; + Tm = W[0]; + Tq = W[3]; + Tn = W[2]; + T1 = W[6]; + T6 = W[7]; + Tw = Tm * Tq; + To = Tm * Tn; + T29 = Tm * T1; + T1h = Tn * T1; + T1n = Tn * T6; + T2d = Tm * T6; + Tg = W[5]; + Tp = W[1]; + Tb = W[4]; + { + E T21, T25, T1t, T1z; + T1i = FMA(Tq, T6, T1h); + T25 = Tm * Tg; + T1z = Tn * Tg; + TU = FMA(Tp, Tq, To); + Tr = FNMS(Tp, Tq, To); + TW = FNMS(Tp, Tn, Tw); + Tx = FMA(Tp, Tn, Tw); + T1t = Tn * Tb; + T21 = Tm * Tb; + T2B = FMA(Tq, Tb, T1z); + T1A = FNMS(Tq, Tb, T1z); + TC = Tr * Tb; + T1u = FMA(Tq, Tg, T1t); + T2y = FNMS(Tq, Tg, T1t); + T33 = FMA(Tp, Tb, T25); + T26 = FNMS(Tp, Tb, T25); + T1o = FNMS(Tq, T1, T1n); + T30 = FNMS(Tp, Tg, T21); + T22 = FMA(Tp, Tg, T21); + } + TD = FMA(Tx, Tg, TC); + T1Q = FNMS(Tx, Tg, TC); + T2a = FMA(Tp, T6, T29); + T2e = FNMS(Tp, T1, T2d); + T2U = Tr * T6; + { + E T2Q, TE, TM, TF; + T2Q = Tr * T1; + TF = Tr * Tg; + T2V = FNMS(Tx, T1, T2U); + T2R = FMA(Tx, T6, T2Q); + TG = FNMS(Tx, Tb, TF); + T1V = FMA(Tx, Tb, TF); + TE = TD * T1; + TM = TD * T6; + TV = TU * Tb; + TH = FMA(TG, T6, TE); + TN = FNMS(TG, T1, TM); + T2t = TU * T1; + T12 = TU * Tg; + T2p = TU * T6; + } + } + { + E T36, T3Q, T5f, T4D, T5g, T2Y, T4E, T3P, T5R, T5k, T39, TT, T3T, T3m, T49; + E T4X, T5T, T5r, T3c, T2i, T3W, T3B, T4o, T4U, T5U, T5u, T3d, T2J, T3X, T3I; + E T4v, T4V, T5Q, T5n, T3a, T1G, T3U, T3t, T4g, T4Y; + { + E T13, T2m, T2q, T2u, T2f, T9, T2O, TA, T2c, T4k, T3i, T5, T2Z, T1e, T2G; + E T1O, T2W, TQ, T2C, T1Y, T3v, T27, Tj, T1l, T2v, T3g, T1m, T1D, T2n, T1x; + E T2k, T3E, T4c, T2l, T1y, T10, T31, T16, T34, T32, T11, T4B, T3p, T4A, T1T; + E T3n, T1b, T2A, T4q, T1U, Te, Tf, T24, T4i, T1r, T4a, T3C, T2s, T43, Tv; + E T3L, T2N, T45, TL, T3N, T2T, T2E, T1K; + { + E T2j, TX, T1B, T1C; + { + E T1c, T1d, T1M, T1N; + { + E T2, T3, T7, T8; + T7 = Rp[WS(rs, 9)]; + T8 = Rm[WS(rs, 9)]; + T2 = Ip[WS(rs, 9)]; + T2j = FMA(TW, Tg, TV); + TX = FNMS(TW, Tg, TV); + T13 = FMA(TW, Tb, T12); + T2m = FNMS(TW, Tb, T12); + T2q = FNMS(TW, T1, T2p); + T2u = FMA(TW, T6, T2t); + T2f = T7 + T8; + T9 = T7 - T8; + T3 = Im[WS(rs, 9)]; + { + E Ty, Tz, T2b, T4; + Ty = Rp[WS(rs, 2)]; + Tz = Rm[WS(rs, 2)]; + T1c = Ip[0]; + T2b = T2 - T3; + T4 = T2 + T3; + T2O = Ty - Tz; + TA = Ty + Tz; + T2c = T2a * T2b; + T4k = T2e * T2b; + T3i = T6 * T4; + T5 = T1 * T4; + T1d = Im[0]; + T1M = Rp[WS(rs, 1)]; + T1N = Rm[WS(rs, 1)]; + } + } + { + E TO, TP, T1W, T1X; + TO = Rp[WS(rs, 7)]; + T2Z = T1c - T1d; + T1e = T1c + T1d; + T2G = T1M + T1N; + T1O = T1M - T1N; + TP = Rm[WS(rs, 7)]; + T1W = Rm[WS(rs, 6)]; + T1X = Rp[WS(rs, 6)]; + { + E Th, Ti, T1j, T1k; + Th = Rm[WS(rs, 4)]; + T2W = TO - TP; + TQ = TO + TP; + T2C = T1X + T1W; + T1Y = T1W - T1X; + Ti = Rp[WS(rs, 4)]; + T1j = Ip[WS(rs, 8)]; + T1k = Im[WS(rs, 8)]; + T3v = T1Q * T1Y; + T27 = Ti + Th; + Tj = Th - Ti; + T1l = T1j - T1k; + T2v = T1j + T1k; + T1B = Rp[WS(rs, 3)]; + T3g = Tb * Tj; + T1m = T1i * T1l; + T1C = Rm[WS(rs, 3)]; + } + } + } + { + E T18, T19, T1R, T1S; + { + E TY, TZ, T1v, T1w, T14, T15; + T1v = Ip[WS(rs, 3)]; + T1w = Im[WS(rs, 3)]; + TY = Ip[WS(rs, 5)]; + T1D = T1B + T1C; + T2n = T1B - T1C; + T1x = T1v - T1w; + T2k = T1v + T1w; + T3E = T2j * T2n; + T4c = T1u * T1D; + T2l = T2j * T2k; + T1y = T1u * T1x; + TZ = Im[WS(rs, 5)]; + T14 = Rp[WS(rs, 5)]; + T15 = Rm[WS(rs, 5)]; + T18 = Rm[0]; + T10 = TY + TZ; + T31 = TY - TZ; + T16 = T14 - T15; + T34 = T14 + T15; + T32 = T30 * T31; + T11 = TX * T10; + T4B = T30 * T34; + T3p = TX * T16; + T19 = Rp[0]; + T1R = Ip[WS(rs, 6)]; + T1S = Im[WS(rs, 6)]; + } + { + E T2r, T23, T1p, T1q; + { + E Tc, T1a, T2z, Td; + Tc = Ip[WS(rs, 4)]; + T1a = T18 - T19; + T4A = T19 + T18; + T1T = T1R + T1S; + T2z = T1R - T1S; + Td = Im[WS(rs, 4)]; + T3n = Tm * T1a; + T1b = Tp * T1a; + T2A = T2y * T2z; + T4q = T2B * T2z; + T1U = T1Q * T1T; + T23 = Tc - Td; + Te = Tc + Td; + } + T1p = Rp[WS(rs, 8)]; + T1q = Rm[WS(rs, 8)]; + Tf = Tb * Te; + T24 = T22 * T23; + T4i = T26 * T23; + T1r = T1p + T1q; + T2r = T1q - T1p; + { + E T2M, Tu, Ts, Tt; + Ts = Ip[WS(rs, 2)]; + Tt = Im[WS(rs, 2)]; + T4a = T1i * T1r; + T3C = T2u * T2r; + T2s = T2q * T2r; + T2M = Ts + Tt; + Tu = Ts - Tt; + { + E T2S, TK, TI, TJ, T1I, T1J; + TI = Ip[WS(rs, 7)]; + TJ = Im[WS(rs, 7)]; + T43 = Tx * Tu; + Tv = Tr * Tu; + T3L = TG * T2M; + T2N = TD * T2M; + T2S = TI + TJ; + TK = TI - TJ; + T1I = Ip[WS(rs, 1)]; + T1J = Im[WS(rs, 1)]; + T45 = TN * TK; + TL = TH * TK; + T3N = T2V * T2S; + T2T = T2R * T2S; + T2E = T1I - T1J; + T1K = T1I + T1J; + } + } + } + } + } + { + E T3x, T1L, T2F, T4s, T2P, T2X, T3M, T3O, T35, T4C; + T35 = FNMS(T33, T34, T32); + T4C = FMA(T33, T31, T4B); + T3x = Tq * T1K; + T1L = Tn * T1K; + T2F = TU * T2E; + T4s = TW * T2E; + T36 = T2Z - T35; + T3Q = T35 + T2Z; + T5f = T4A + T4C; + T4D = T4A - T4C; + T2P = FNMS(TG, T2O, T2N); + T2X = FNMS(T2V, T2W, T2T); + T3M = FMA(TD, T2O, T3L); + T3O = FMA(T2R, T2W, T3N); + { + E TB, T5j, Tl, T5i, T47, TR, T3h, T3j; + { + E Ta, Tk, T44, T46; + Ta = FNMS(T6, T9, T5); + T5g = T2P + T2X; + T2Y = T2P - T2X; + T4E = T3O - T3M; + T3P = T3M + T3O; + Tk = FMA(Tg, Tj, Tf); + T44 = FMA(Tr, TA, T43); + T46 = FMA(TH, TQ, T45); + TB = FNMS(Tx, TA, Tv); + T5j = Tk + Ta; + Tl = Ta - Tk; + T5i = T44 + T46; + T47 = T44 - T46; + TR = FNMS(TN, TQ, TL); + T3h = FNMS(Tg, Te, T3g); + T3j = FMA(T1, T9, T3i); + } + { + E T3l, T48, T3k, TS; + T5R = T5i - T5j; + T5k = T5i + T5j; + T3l = TB + TR; + TS = TB - TR; + T48 = T3h + T3j; + T3k = T3h - T3j; + T39 = TS + Tl; + TT = Tl - TS; + T3T = T3l + T3k; + T3m = T3k - T3l; + T49 = T47 + T48; + T4X = T47 - T48; + } + } + { + E T28, T5q, T20, T5p, T4m, T2g, T3w, T3y; + { + E T1P, T1Z, T4j, T4l; + T1P = FNMS(Tq, T1O, T1L); + T1Z = FMA(T1V, T1Y, T1U); + T4j = FMA(T22, T27, T4i); + T4l = FMA(T2a, T2f, T4k); + T28 = FNMS(T26, T27, T24); + T5q = T1Z + T1P; + T20 = T1P - T1Z; + T5p = T4j + T4l; + T4m = T4j - T4l; + T2g = FNMS(T2e, T2f, T2c); + T3w = FNMS(T1V, T1T, T3v); + T3y = FMA(Tn, T1O, T3x); + } + { + E T3A, T4n, T3z, T2h; + T5T = T5p - T5q; + T5r = T5p + T5q; + T3A = T28 + T2g; + T2h = T28 - T2g; + T4n = T3w + T3y; + T3z = T3w - T3y; + T3c = T2h + T20; + T2i = T20 - T2h; + T3W = T3A + T3z; + T3B = T3z - T3A; + T4o = T4m + T4n; + T4U = T4m - T4n; + } + } + { + E T2D, T5s, T2x, T5t, T4u, T2H, T3D, T3F; + { + E T2o, T2w, T4r, T4t; + T2o = FNMS(T2m, T2n, T2l); + T2w = FMA(T2u, T2v, T2s); + T4r = FMA(T2y, T2C, T4q); + T4t = FMA(TU, T2G, T4s); + T2D = FNMS(T2B, T2C, T2A); + T5s = T2w + T2o; + T2x = T2o - T2w; + T5t = T4r + T4t; + T4u = T4r - T4t; + T2H = FNMS(TW, T2G, T2F); + T3D = FNMS(T2q, T2v, T3C); + T3F = FMA(T2m, T2k, T3E); + } + { + E T3H, T4p, T3G, T2I; + T5U = T5t - T5s; + T5u = T5s + T5t; + T3H = T2D + T2H; + T2I = T2D - T2H; + T4p = T3D + T3F; + T3G = T3D - T3F; + T3d = T2x + T2I; + T2J = T2x - T2I; + T3X = T3G + T3H; + T3I = T3G - T3H; + T4v = T4p + T4u; + T4V = T4u - T4p; + } + } + { + E T1s, T5m, T1g, T5l, T4e, T1E, T3o, T3q; + { + E T17, T1f, T4b, T4d; + T17 = FNMS(T13, T16, T11); + T1f = FMA(Tm, T1e, T1b); + T4b = FMA(T1o, T1l, T4a); + T4d = FMA(T1A, T1x, T4c); + T1s = FNMS(T1o, T1r, T1m); + T5m = T17 + T1f; + T1g = T17 - T1f; + T5l = T4b + T4d; + T4e = T4b - T4d; + T1E = FNMS(T1A, T1D, T1y); + T3o = FNMS(Tp, T1e, T3n); + T3q = FMA(T13, T10, T3p); + } + { + E T3s, T4f, T3r, T1F; + T5Q = T5l - T5m; + T5n = T5l + T5m; + T3s = T1s + T1E; + T1F = T1s - T1E; + T4f = T3q + T3o; + T3r = T3o - T3q; + T3a = T1F + T1g; + T1G = T1g - T1F; + T3U = T3s + T3r; + T3t = T3r - T3s; + T4g = T4e + T4f; + T4Y = T4e - T4f; + } + } + } + } + { + E T4F, T4G, T4H, T4x, T4z, T41, T4O, T4Q, T40; + { + E T55, T38, T54, T50, T52, T53, T5e, T5c, T51, T4T; + { + E T4W, T37, T4Z, T1H, T5b, T5a, T2K, T2L, T4S, T4R; + T55 = T4U + T4V; + T4W = T4U - T4V; + T37 = T2Y + T36; + T38 = T36 - T2Y; + T54 = T4X + T4Y; + T4Z = T4X - T4Y; + T1H = TT + T1G; + T5b = T1G - TT; + T5a = T2J - T2i; + T2K = T2i + T2J; + T50 = FNMS(KP618033988, T4Z, T4W); + T52 = FMA(KP618033988, T4W, T4Z); + T2L = T1H + T2K; + T4S = T1H - T2K; + T53 = T4D - T4E; + T4F = T4D + T4E; + Im[WS(rs, 4)] = KP500000000 * (T2L - T37); + T4R = FMA(KP250000000, T2L, T37); + T5e = FMA(KP618033988, T5a, T5b); + T5c = FNMS(KP618033988, T5b, T5a); + T51 = FNMS(KP559016994, T4S, T4R); + T4T = FMA(KP559016994, T4S, T4R); + } + { + E T3b, T4M, T4N, T3e, T3f; + { + E T4h, T58, T57, T4w, T56, T5d, T59; + T4G = T49 + T4g; + T4h = T49 - T4g; + T58 = T54 - T55; + T56 = T54 + T55; + Ip[WS(rs, 7)] = KP500000000 * (FMA(KP951056516, T50, T4T)); + Ip[WS(rs, 3)] = KP500000000 * (FNMS(KP951056516, T50, T4T)); + Im[WS(rs, 8)] = -(KP500000000 * (FNMS(KP951056516, T52, T51))); + Im[0] = -(KP500000000 * (FMA(KP951056516, T52, T51))); + Rm[WS(rs, 4)] = KP500000000 * (T53 + T56); + T57 = FNMS(KP250000000, T56, T53); + T4w = T4o - T4v; + T4H = T4o + T4v; + T3b = T39 + T3a; + T4M = T39 - T3a; + T5d = FMA(KP559016994, T58, T57); + T59 = FNMS(KP559016994, T58, T57); + T4x = FMA(KP618033988, T4w, T4h); + T4z = FNMS(KP618033988, T4h, T4w); + Rp[WS(rs, 7)] = KP500000000 * (FNMS(KP951056516, T5c, T59)); + Rp[WS(rs, 3)] = KP500000000 * (FMA(KP951056516, T5c, T59)); + Rm[0] = KP500000000 * (FNMS(KP951056516, T5e, T5d)); + Rm[WS(rs, 8)] = KP500000000 * (FMA(KP951056516, T5e, T5d)); + T4N = T3c - T3d; + T3e = T3c + T3d; + } + T3f = T3b + T3e; + T41 = T3b - T3e; + T4O = FMA(KP618033988, T4N, T4M); + T4Q = FNMS(KP618033988, T4M, T4N); + Ip[WS(rs, 5)] = KP500000000 * (T38 + T3f); + T40 = FNMS(KP250000000, T3f, T38); + } + } + { + E T3S, T5Z, T68, T6a, T64, T62; + { + E T60, T61, T5Y, T5W, T3R, T67, T66, T3K, T5O, T4K, T4J, T5N, T5X, T5P; + { + E T5S, T5V, T4y, T42, T4I; + T60 = T5R + T5Q; + T5S = T5Q - T5R; + T5V = T5T - T5U; + T61 = T5T + T5U; + T4y = FNMS(KP559016994, T41, T40); + T42 = FMA(KP559016994, T41, T40); + T4I = T4G + T4H; + T4K = T4G - T4H; + Ip[WS(rs, 9)] = KP500000000 * (FMA(KP951056516, T4x, T42)); + Ip[WS(rs, 1)] = KP500000000 * (FNMS(KP951056516, T4x, T42)); + Im[WS(rs, 6)] = -(KP500000000 * (FNMS(KP951056516, T4z, T4y))); + Im[WS(rs, 2)] = -(KP500000000 * (FMA(KP951056516, T4z, T4y))); + Rp[WS(rs, 5)] = KP500000000 * (T4F + T4I); + T4J = FNMS(KP250000000, T4I, T4F); + T5Y = FMA(KP618033988, T5S, T5V); + T5W = FNMS(KP618033988, T5V, T5S); + } + T3S = T3Q - T3P; + T3R = T3P + T3Q; + { + E T4L, T4P, T3u, T3J; + T4L = FMA(KP559016994, T4K, T4J); + T4P = FNMS(KP559016994, T4K, T4J); + T3u = T3m + T3t; + T67 = T3t - T3m; + T66 = T3I - T3B; + T3J = T3B + T3I; + Rp[WS(rs, 9)] = KP500000000 * (FNMS(KP951056516, T4O, T4L)); + Rp[WS(rs, 1)] = KP500000000 * (FMA(KP951056516, T4O, T4L)); + Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP951056516, T4Q, T4P)); + Rm[WS(rs, 6)] = KP500000000 * (FMA(KP951056516, T4Q, T4P)); + T3K = T3u + T3J; + T5O = T3J - T3u; + } + Im[WS(rs, 9)] = KP500000000 * (T3K - T3R); + T5N = FMA(KP250000000, T3K, T3R); + T5Z = T5f - T5g; + T5h = T5f + T5g; + T68 = FNMS(KP618033988, T67, T66); + T6a = FMA(KP618033988, T66, T67); + T5X = FNMS(KP559016994, T5O, T5N); + T5P = FMA(KP559016994, T5O, T5N); + Im[WS(rs, 5)] = -(KP500000000 * (FNMS(KP951056516, T5W, T5P))); + Ip[WS(rs, 6)] = KP500000000 * (FMA(KP951056516, T5W, T5P)); + Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP951056516, T5Y, T5X))); + Ip[WS(rs, 2)] = KP500000000 * (FMA(KP951056516, T5Y, T5X)); + T64 = T60 - T61; + T62 = T60 + T61; + } + { + E T5o, T5v, T5M, T5K, T5A, T5B, T3Z, T5G, T5I, T5J, T63, T5F, T5L, T5H; + T5o = T5k + T5n; + T5I = T5k - T5n; + T5J = T5u - T5r; + T5v = T5r + T5u; + Rm[WS(rs, 9)] = KP500000000 * (T5Z + T62); + T63 = FNMS(KP250000000, T62, T5Z); + T5M = FMA(KP618033988, T5I, T5J); + T5K = FNMS(KP618033988, T5J, T5I); + { + E T65, T69, T3V, T3Y; + T65 = FNMS(KP559016994, T64, T63); + T69 = FMA(KP559016994, T64, T63); + T3V = T3T + T3U; + T5A = T3T - T3U; + T5B = T3W - T3X; + T3Y = T3W + T3X; + Rm[WS(rs, 1)] = KP500000000 * (FMA(KP951056516, T68, T65)); + Rp[WS(rs, 2)] = KP500000000 * (FNMS(KP951056516, T68, T65)); + Rm[WS(rs, 5)] = KP500000000 * (FNMS(KP951056516, T6a, T69)); + Rp[WS(rs, 6)] = KP500000000 * (FMA(KP951056516, T6a, T69)); + T3Z = T3V + T3Y; + T5G = T3V - T3Y; + } + Ip[0] = KP500000000 * (T3S + T3Z); + T5F = FNMS(KP250000000, T3Z, T3S); + T5C = FMA(KP618033988, T5B, T5A); + T5E = FNMS(KP618033988, T5A, T5B); + T5L = FNMS(KP559016994, T5G, T5F); + T5H = FMA(KP559016994, T5G, T5F); + Im[WS(rs, 3)] = -(KP500000000 * (FNMS(KP951056516, T5K, T5H))); + Ip[WS(rs, 4)] = KP500000000 * (FMA(KP951056516, T5K, T5H)); + Im[WS(rs, 7)] = -(KP500000000 * (FNMS(KP951056516, T5M, T5L))); + Ip[WS(rs, 8)] = KP500000000 * (FMA(KP951056516, T5M, T5L)); + T5y = T5o - T5v; + T5w = T5o + T5v; + } + } + } + } + } + Rp[0] = KP500000000 * (T5h + T5w); + T5x = FNMS(KP250000000, T5w, T5h); + T5D = FNMS(KP559016994, T5y, T5x); + T5z = FMA(KP559016994, T5y, T5x); + Rm[WS(rs, 3)] = KP500000000 * (FMA(KP951056516, T5C, T5z)); + Rp[WS(rs, 4)] = KP500000000 * (FNMS(KP951056516, T5C, T5z)); + Rm[WS(rs, 7)] = KP500000000 * (FNMS(KP951056516, T5E, T5D)); + Rp[WS(rs, 8)] = KP500000000 * (FMA(KP951056516, T5E, T5D)); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 19}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 20, "hc2cfdft2_20", twinstr, &GENUS, {176, 98, 140, 0} }; + +void X(codelet_hc2cfdft2_20) (planner *p) { + X(khc2c_register) (p, hc2cfdft2_20, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 20 -dit -name hc2cfdft2_20 -include hc2cf.h */ + +/* + * This function contains 316 FP additions, 180 FP multiplications, + * (or, 244 additions, 108 multiplications, 72 fused multiply/add), + * 134 stack variables, 5 constants, and 80 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft2_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP125000000, +0.125000000000000000000000000000000000000000000); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP279508497, +0.279508497187473712051146708591409529430077295); + DK(KP293892626, +0.293892626146236564584352977319536384298826219); + DK(KP475528258, +0.475528258147576786058219666689691071702849317); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(80, rs)) { + E T4, T7, Tm, To, Tq, Tu, T1I, T1G, T8, T5, Ta, T1u, T2u, Tg, T2s; + E T21, T1A, T1Z, T1O, T2I, T1K, T2G, Tw, TC, T2a, T2e, TH, TI, TJ, TX; + E T2D, TN, T2B, T26, T1n, TZ, T24, T1j; + { + E T9, T1y, Te, T1t, T6, T1z, Tf, T1s; + { + E Tn, Tt, Tp, Ts; + T4 = W[0]; + T7 = W[1]; + Tm = W[2]; + To = W[3]; + Tn = T4 * Tm; + Tt = T7 * Tm; + Tp = T7 * To; + Ts = T4 * To; + Tq = Tn - Tp; + Tu = Ts + Tt; + T1I = Ts - Tt; + T1G = Tn + Tp; + T8 = W[5]; + T9 = T7 * T8; + T1y = Tm * T8; + Te = T4 * T8; + T1t = To * T8; + T5 = W[4]; + T6 = T4 * T5; + T1z = To * T5; + Tf = T7 * T5; + T1s = Tm * T5; + } + Ta = T6 - T9; + T1u = T1s + T1t; + T2u = T1y + T1z; + Tg = Te + Tf; + T2s = T1s - T1t; + T21 = Te - Tf; + T1A = T1y - T1z; + T1Z = T6 + T9; + { + E T1M, T1N, T1H, T1J; + T1M = T1G * T8; + T1N = T1I * T5; + T1O = T1M + T1N; + T2I = T1M - T1N; + T1H = T1G * T5; + T1J = T1I * T8; + T1K = T1H - T1J; + T2G = T1H + T1J; + { + E Tr, Tv, TA, TB; + Tr = Tq * T5; + Tv = Tu * T8; + Tw = Tr + Tv; + TA = Tq * T8; + TB = Tu * T5; + TC = TA - TB; + T2a = Tr - Tv; + T2e = TA + TB; + TH = W[6]; + TI = W[7]; + TJ = FMA(Tq, TH, Tu * TI); + TX = FMA(Tw, TH, TC * TI); + T2D = FMA(T1G, TH, T1I * TI); + TN = FNMS(Tu, TH, Tq * TI); + T2B = FNMS(T1I, TH, T1G * TI); + T26 = FNMS(T7, TH, T4 * TI); + T1n = FNMS(To, TH, Tm * TI); + TZ = FNMS(TC, TH, Tw * TI); + T24 = FMA(T4, TH, T7 * TI); + T1j = FMA(Tm, TH, To * TI); + } + } + } + { + E Tl, T3n, T1i, T2Q, T47, T50, T4S, T5i, T2M, T2T, T4I, T5f, T4L, T5e, T4P; + E T5h, T2r, T2S, T1X, T2P, T31, T3u, T36, T3t, T3E, T4l, T3U, T4j, T3h, T3r; + E T3J, T4m, T3c, T3q, T3P, T4i, TS, T51, T3m, T48; + { + E T3, T45, T1V, T3f, Tz, TF, TW, T3A, TM, TQ, T11, T3B, Td, Tj, T1Q; + E T3e, T19, T3L, T23, T39, T2p, T3S, T2z, T34, T1E, T3G, T2K, T2Y, T1g, T3M; + E T28, T3a, T2i, T3R, T2w, T33, T1r, T3F, T2F, T2X, T4N, T4O; + { + E T1, T2, T1R, T1S, T1T, T1U; + T1 = Ip[0]; + T2 = Im[0]; + T1R = T1 + T2; + T1S = Rp[0]; + T1T = Rm[0]; + T1U = T1S - T1T; + T3 = T1 - T2; + T45 = T1S + T1T; + T1V = FNMS(T7, T1U, T4 * T1R); + T3f = FMA(T4, T1U, T7 * T1R); + } + { + E Tx, Ty, TU, TD, TE, TV; + Tx = Ip[WS(rs, 2)]; + Ty = Im[WS(rs, 2)]; + TU = Tx - Ty; + TD = Rp[WS(rs, 2)]; + TE = Rm[WS(rs, 2)]; + TV = TD + TE; + Tz = Tx + Ty; + TF = TD - TE; + TW = FNMS(Tu, TV, Tq * TU); + T3A = FMA(Tu, TU, Tq * TV); + } + { + E TK, TL, TY, TO, TP, T10; + TK = Ip[WS(rs, 7)]; + TL = Im[WS(rs, 7)]; + TY = TK - TL; + TO = Rp[WS(rs, 7)]; + TP = Rm[WS(rs, 7)]; + T10 = TO + TP; + TM = TK + TL; + TQ = TO - TP; + T11 = FNMS(TZ, T10, TX * TY); + T3B = FMA(TZ, TY, TX * T10); + } + { + E Tb, Tc, T1L, Th, Ti, T1P; + Tb = Ip[WS(rs, 5)]; + Tc = Im[WS(rs, 5)]; + T1L = Tb + Tc; + Th = Rp[WS(rs, 5)]; + Ti = Rm[WS(rs, 5)]; + T1P = Th - Ti; + Td = Tb - Tc; + Tj = Th + Ti; + T1Q = FNMS(T1O, T1P, T1K * T1L); + T3e = FMA(T1K, T1P, T1O * T1L); + } + { + E T15, T20, T18, T22; + { + E T13, T14, T16, T17; + T13 = Ip[WS(rs, 4)]; + T14 = Im[WS(rs, 4)]; + T15 = T13 + T14; + T20 = T13 - T14; + T16 = Rp[WS(rs, 4)]; + T17 = Rm[WS(rs, 4)]; + T18 = T16 - T17; + T22 = T16 + T17; + } + T19 = FNMS(T8, T18, T5 * T15); + T3L = FMA(T21, T20, T1Z * T22); + T23 = FNMS(T21, T22, T1Z * T20); + T39 = FMA(T8, T15, T5 * T18); + } + { + E T2l, T2x, T2o, T2y; + { + E T2j, T2k, T2m, T2n; + T2j = Ip[WS(rs, 1)]; + T2k = Im[WS(rs, 1)]; + T2l = T2j + T2k; + T2x = T2j - T2k; + T2m = Rp[WS(rs, 1)]; + T2n = Rm[WS(rs, 1)]; + T2o = T2m - T2n; + T2y = T2m + T2n; + } + T2p = FNMS(To, T2o, Tm * T2l); + T3S = FMA(T1I, T2x, T1G * T2y); + T2z = FNMS(T1I, T2y, T1G * T2x); + T34 = FMA(To, T2l, Tm * T2o); + } + { + E T1x, T2H, T1D, T2J; + { + E T1v, T1w, T1B, T1C; + T1v = Ip[WS(rs, 3)]; + T1w = Im[WS(rs, 3)]; + T1x = T1v - T1w; + T2H = T1v + T1w; + T1B = Rp[WS(rs, 3)]; + T1C = Rm[WS(rs, 3)]; + T1D = T1B + T1C; + T2J = T1B - T1C; + } + T1E = FNMS(T1A, T1D, T1u * T1x); + T3G = FMA(T1u, T1D, T1A * T1x); + T2K = FNMS(T2I, T2J, T2G * T2H); + T2Y = FMA(T2G, T2J, T2I * T2H); + } + { + E T1c, T25, T1f, T27; + { + E T1a, T1b, T1d, T1e; + T1a = Ip[WS(rs, 9)]; + T1b = Im[WS(rs, 9)]; + T1c = T1a + T1b; + T25 = T1a - T1b; + T1d = Rp[WS(rs, 9)]; + T1e = Rm[WS(rs, 9)]; + T1f = T1d - T1e; + T27 = T1d + T1e; + } + T1g = FNMS(TI, T1f, TH * T1c); + T3M = FMA(T26, T25, T24 * T27); + T28 = FNMS(T26, T27, T24 * T25); + T3a = FMA(TI, T1c, TH * T1f); + } + { + E T2d, T2t, T2h, T2v; + { + E T2b, T2c, T2f, T2g; + T2b = Ip[WS(rs, 6)]; + T2c = Im[WS(rs, 6)]; + T2d = T2b + T2c; + T2t = T2b - T2c; + T2f = Rp[WS(rs, 6)]; + T2g = Rm[WS(rs, 6)]; + T2h = T2f - T2g; + T2v = T2f + T2g; + } + T2i = FNMS(T2e, T2h, T2a * T2d); + T3R = FMA(T2u, T2t, T2s * T2v); + T2w = FNMS(T2u, T2v, T2s * T2t); + T33 = FMA(T2e, T2d, T2a * T2h); + } + { + E T1m, T2E, T1q, T2C; + { + E T1k, T1l, T1o, T1p; + T1k = Ip[WS(rs, 8)]; + T1l = Im[WS(rs, 8)]; + T1m = T1k - T1l; + T2E = T1k + T1l; + T1o = Rp[WS(rs, 8)]; + T1p = Rm[WS(rs, 8)]; + T1q = T1o + T1p; + T2C = T1p - T1o; + } + T1r = FNMS(T1n, T1q, T1j * T1m); + T3F = FMA(T1j, T1q, T1n * T1m); + T2F = FMA(T2B, T2C, T2D * T2E); + T2X = FNMS(T2B, T2E, T2D * T2C); + } + { + E Tk, T12, T1h, T46; + Tk = FNMS(Tg, Tj, Ta * Td); + Tl = T3 - Tk; + T3n = Tk + T3; + T12 = TW - T11; + T1h = T19 - T1g; + T1i = T12 - T1h; + T2Q = T12 + T1h; + T46 = FMA(Ta, Tj, Tg * Td); + T47 = T45 - T46; + T50 = T45 + T46; + { + E T4Q, T4R, T2A, T2L; + T4Q = T2F + T2K; + T4R = T3R + T3S; + T4S = T4Q + T4R; + T5i = T4R - T4Q; + T2A = T2w - T2z; + T2L = T2F - T2K; + T2M = T2A - T2L; + T2T = T2L + T2A; + } + } + { + E T4G, T4H, T4J, T4K; + T4G = T3A + T3B; + T4H = T19 + T1g; + T4I = T4G + T4H; + T5f = T4G - T4H; + T4J = T3F + T3G; + T4K = T1Q + T1V; + T4L = T4J + T4K; + T5e = T4J - T4K; + } + T4N = T3L + T3M; + T4O = T2i + T2p; + T4P = T4N + T4O; + T5h = T4N - T4O; + { + E T29, T2q, T1F, T1W; + T29 = T23 - T28; + T2q = T2i - T2p; + T2r = T29 - T2q; + T2S = T29 + T2q; + T1F = T1r - T1E; + T1W = T1Q - T1V; + T1X = T1F + T1W; + T2P = T1W - T1F; + } + { + E T3C, T3D, T3N, T3O; + { + E T2Z, T30, T32, T35; + T2Z = T2X - T2Y; + T30 = T2w + T2z; + T31 = T2Z - T30; + T3u = T2Z + T30; + T32 = T23 + T28; + T35 = T33 + T34; + T36 = T32 + T35; + T3t = T32 - T35; + } + T3C = T3A - T3B; + T3D = T3a - T39; + T3E = T3C + T3D; + T4l = T3C - T3D; + { + E T3Q, T3T, T3d, T3g; + T3Q = T2X + T2Y; + T3T = T3R - T3S; + T3U = T3Q + T3T; + T4j = T3T - T3Q; + T3d = T1r + T1E; + T3g = T3e + T3f; + T3h = T3d + T3g; + T3r = T3d - T3g; + } + { + E T3H, T3I, T38, T3b; + T3H = T3F - T3G; + T3I = T3e - T3f; + T3J = T3H + T3I; + T4m = T3H - T3I; + T38 = TW + T11; + T3b = T39 + T3a; + T3c = T38 + T3b; + T3q = T38 - T3b; + } + T3N = T3L - T3M; + T3O = T34 - T33; + T3P = T3N + T3O; + T4i = T3N - T3O; + { + E TG, TR, T3k, T3l; + TG = FNMS(TC, TF, Tw * Tz); + TR = FNMS(TN, TQ, TJ * TM); + TS = TG - TR; + T51 = TG + TR; + T3k = FMA(TC, Tz, Tw * TF); + T3l = FMA(TN, TM, TJ * TQ); + T3m = T3k + T3l; + T48 = T3l - T3k; + } + } + } + { + E T3W, T3Y, TT, T2O, T3x, T3y, T3X, T3z; + { + E T3K, T3V, T1Y, T2N; + T3K = T3E - T3J; + T3V = T3P - T3U; + T3W = FMA(KP475528258, T3K, KP293892626 * T3V); + T3Y = FNMS(KP293892626, T3K, KP475528258 * T3V); + TT = Tl - TS; + T1Y = T1i + T1X; + T2N = T2r + T2M; + T2O = T1Y + T2N; + T3x = KP279508497 * (T1Y - T2N); + T3y = FNMS(KP125000000, T2O, KP500000000 * TT); + } + Ip[WS(rs, 5)] = KP500000000 * (TT + T2O); + T3X = T3x - T3y; + Im[WS(rs, 2)] = T3X - T3Y; + Im[WS(rs, 6)] = T3X + T3Y; + T3z = T3x + T3y; + Ip[WS(rs, 1)] = T3z - T3W; + Ip[WS(rs, 9)] = T3z + T3W; + } + { + E T41, T4d, T49, T4a, T44, T4b, T4e, T4c; + { + E T3Z, T40, T42, T43; + T3Z = T1i - T1X; + T40 = T2r - T2M; + T41 = FMA(KP475528258, T3Z, KP293892626 * T40); + T4d = FNMS(KP293892626, T3Z, KP475528258 * T40); + T49 = T47 + T48; + T42 = T3E + T3J; + T43 = T3P + T3U; + T4a = T42 + T43; + T44 = KP279508497 * (T42 - T43); + T4b = FNMS(KP125000000, T4a, KP500000000 * T49); + } + Rp[WS(rs, 5)] = KP500000000 * (T49 + T4a); + T4e = T4b - T44; + Rm[WS(rs, 6)] = T4d + T4e; + Rm[WS(rs, 2)] = T4e - T4d; + T4c = T44 + T4b; + Rp[WS(rs, 1)] = T41 + T4c; + Rp[WS(rs, 9)] = T4c - T41; + } + { + E T4o, T4q, T2W, T2V, T4f, T4g, T4p, T4h; + { + E T4k, T4n, T2R, T2U; + T4k = T4i - T4j; + T4n = T4l - T4m; + T4o = FNMS(KP293892626, T4n, KP475528258 * T4k); + T4q = FMA(KP475528258, T4n, KP293892626 * T4k); + T2W = TS + Tl; + T2R = T2P - T2Q; + T2U = T2S + T2T; + T2V = T2R - T2U; + T4f = FMA(KP500000000, T2W, KP125000000 * T2V); + T4g = KP279508497 * (T2R + T2U); + } + Im[WS(rs, 4)] = KP500000000 * (T2V - T2W); + T4p = T4g - T4f; + Im[0] = T4p - T4q; + Im[WS(rs, 8)] = T4p + T4q; + T4h = T4f + T4g; + Ip[WS(rs, 3)] = T4h - T4o; + Ip[WS(rs, 7)] = T4h + T4o; + } + { + E T4t, T4B, T4u, T4x, T4y, T4z, T4C, T4A; + { + E T4r, T4s, T4v, T4w; + T4r = T2S - T2T; + T4s = T2Q + T2P; + T4t = FNMS(KP293892626, T4s, KP475528258 * T4r); + T4B = FMA(KP475528258, T4s, KP293892626 * T4r); + T4u = T47 - T48; + T4v = T4l + T4m; + T4w = T4i + T4j; + T4x = T4v + T4w; + T4y = FNMS(KP125000000, T4x, KP500000000 * T4u); + T4z = KP279508497 * (T4v - T4w); + } + Rm[WS(rs, 4)] = KP500000000 * (T4u + T4x); + T4C = T4z + T4y; + Rm[WS(rs, 8)] = T4B + T4C; + Rm[0] = T4C - T4B; + T4A = T4y - T4z; + Rp[WS(rs, 3)] = T4t + T4A; + Rp[WS(rs, 7)] = T4A - T4t; + } + { + E T5k, T5m, T3o, T3j, T5b, T5c, T5l, T5d; + { + E T5g, T5j, T37, T3i; + T5g = T5e - T5f; + T5j = T5h - T5i; + T5k = FNMS(KP293892626, T5j, KP475528258 * T5g); + T5m = FMA(KP293892626, T5g, KP475528258 * T5j); + T3o = T3m + T3n; + T37 = T31 - T36; + T3i = T3c + T3h; + T3j = T37 - T3i; + T5b = FMA(KP500000000, T3o, KP125000000 * T3j); + T5c = KP279508497 * (T3i + T37); + } + Im[WS(rs, 9)] = KP500000000 * (T3j - T3o); + T5l = T5b - T5c; + Ip[WS(rs, 2)] = T5l + T5m; + Im[WS(rs, 1)] = T5m - T5l; + T5d = T5b + T5c; + Ip[WS(rs, 6)] = T5d + T5k; + Im[WS(rs, 5)] = T5k - T5d; + } + { + E T5w, T5x, T5n, T5q, T5r, T5s, T5y, T5t; + { + E T5u, T5v, T5o, T5p; + T5u = T36 + T31; + T5v = T3c - T3h; + T5w = FNMS(KP293892626, T5v, KP475528258 * T5u); + T5x = FMA(KP475528258, T5v, KP293892626 * T5u); + T5n = T50 - T51; + T5o = T5f + T5e; + T5p = T5h + T5i; + T5q = T5o + T5p; + T5r = FNMS(KP125000000, T5q, KP500000000 * T5n); + T5s = KP279508497 * (T5o - T5p); + } + Rm[WS(rs, 9)] = KP500000000 * (T5n + T5q); + T5y = T5s + T5r; + Rp[WS(rs, 6)] = T5x + T5y; + Rm[WS(rs, 5)] = T5y - T5x; + T5t = T5r - T5s; + Rp[WS(rs, 2)] = T5t - T5w; + Rm[WS(rs, 1)] = T5w + T5t; + } + { + E T4U, T4W, T3p, T3w, T4D, T4E, T4V, T4F; + { + E T4M, T4T, T3s, T3v; + T4M = T4I - T4L; + T4T = T4P - T4S; + T4U = FNMS(KP475528258, T4T, KP293892626 * T4M); + T4W = FMA(KP475528258, T4M, KP293892626 * T4T); + T3p = T3n - T3m; + T3s = T3q + T3r; + T3v = T3t + T3u; + T3w = T3s + T3v; + T4D = FNMS(KP125000000, T3w, KP500000000 * T3p); + T4E = KP279508497 * (T3s - T3v); + } + Ip[0] = KP500000000 * (T3p + T3w); + T4V = T4E + T4D; + Ip[WS(rs, 4)] = T4V + T4W; + Im[WS(rs, 3)] = T4W - T4V; + T4F = T4D - T4E; + Ip[WS(rs, 8)] = T4F + T4U; + Im[WS(rs, 7)] = T4U - T4F; + } + { + E T58, T59, T52, T53, T4Z, T54, T5a, T55; + { + E T56, T57, T4X, T4Y; + T56 = T3q - T3r; + T57 = T3t - T3u; + T58 = FMA(KP475528258, T56, KP293892626 * T57); + T59 = FNMS(KP293892626, T56, KP475528258 * T57); + T52 = T50 + T51; + T4X = T4I + T4L; + T4Y = T4P + T4S; + T53 = T4X + T4Y; + T4Z = KP279508497 * (T4X - T4Y); + T54 = FNMS(KP125000000, T53, KP500000000 * T52); + } + Rp[0] = KP500000000 * (T52 + T53); + T5a = T54 - T4Z; + Rp[WS(rs, 8)] = T59 + T5a; + Rm[WS(rs, 7)] = T5a - T59; + T55 = T4Z + T54; + Rp[WS(rs, 4)] = T55 - T58; + Rm[WS(rs, 3)] = T58 + T55; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 19}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 20, "hc2cfdft2_20", twinstr, &GENUS, {244, 108, 72, 0} }; + +void X(codelet_hc2cfdft2_20) (planner *p) { + X(khc2c_register) (p, hc2cfdft2_20, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft2_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft2_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2012 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:31 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hc2cfdft2_32 -include hc2cf.h */ + +/* + * This function contains 552 FP additions, 414 FP multiplications, + * (or, 300 additions, 162 multiplications, 252 fused multiply/add), + * 196 stack variables, 8 constants, and 128 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(128, rs)) { + E Tax, TaA; + { + E T1, Th, T2, T5, Ti, Ty, T1t, T3, Tb, Tj, TY, TK, Tl, T4, Tk; + T1 = W[0]; + Th = W[4]; + T2 = W[2]; + T5 = W[3]; + Ti = W[6]; + Ty = T1 * Th; + T1t = T2 * Th; + T3 = T1 * T2; + Tb = T1 * T5; + Tj = Th * Ti; + TY = T2 * Ti; + TK = T1 * Ti; + Tl = W[7]; + T4 = W[1]; + Tk = W[5]; + { + E T3j, T7Z, T5b, T93, T6B, T8V, T4d, T8J, T8r, T6e, T8l, T1T, T8C, T54, T8i; + E T5O, T94, T31, T8K, T6w, T8U, T3Y, T80, T5g, T8B, T69, T8h, T1s, T8q, T4T; + E T8k, T5J, Tx, T8a, T5y, T8d, T4s, T5Y, T8v, T8E, T2k, T82, T6l, T3z, T83; + E T5m, T8X, T8O, T2F, T86, T6q, T3M, T85, T5r, T8Y, T8R, TW, T8e, T8x, T4B; + E T5D, T8b, T63, T8w; + { + E TL, T2l, T1c, Tc, T1a, T6, Tm, T2v, Tz, T2q, TR, Ts, T2A, TF, T1H; + E T1g, T1d, T1F, T34, T3F, T3B, T32, T3w, T3s, T4p, T4l, T2f, T29, T4K, T4S; + E T5G, T5I; + { + E TZ, T2R, T2H, T15, T2W, T2M, T4I, T4E, T3V, T3S, T4Q, T4M, T1n, T1h, T4X; + E T53, T5L, T5N, T5d, T5f; + { + E T1u, T1A, T51, T4Y, T28, T25, T44, T40, T1O, T1I, T3b, T35, T4b, T3i, T45; + E T38, T39, T58, T49, T3e, T41; + { + E T3g, T3h, T36, T37, TQ; + T3g = Ip[0]; + TZ = FNMS(T5, Tl, TY); + T2R = FMA(T5, Tl, TY); + TQ = T1 * Tl; + { + E T14, Tr, T1z, TE; + T14 = T2 * Tl; + Tr = Th * Tl; + TL = FMA(T4, Tl, TK); + T2l = FNMS(T4, Tl, TK); + T1c = FMA(T4, T2, Tb); + Tc = FNMS(T4, T2, Tb); + T1a = FNMS(T4, T5, T3); + T6 = FMA(T4, T5, T3); + Tm = FMA(Tk, Tl, Tj); + T2v = FNMS(T5, Tk, T1t); + T1u = FMA(T5, Tk, T1t); + Tz = FNMS(T4, Tk, Ty); + T2H = FMA(T4, Tk, Ty); + T1z = T2 * Tk; + TE = T1 * Tk; + T2q = FMA(T4, Ti, TQ); + TR = FNMS(T4, Ti, TQ); + T15 = FMA(T5, Ti, T14); + T2W = FNMS(T5, Ti, T14); + Ts = FNMS(Tk, Ti, Tr); + { + E T1f, T4H, T4D, T1b; + T1f = T1a * Tk; + T4H = T1a * Tl; + T4D = T1a * Ti; + T1b = T1a * Th; + { + E T27, T3E, T3A, T24; + T27 = T6 * Tk; + T3E = T6 * Tl; + T3A = T6 * Ti; + T24 = T6 * Th; + { + E T3v, T3r, T4P, T4L; + T3v = T1u * Tl; + T3r = T1u * Ti; + T4P = T2v * Tl; + T4L = T2v * Ti; + { + E T4o, T4k, T43, T3Z; + T4o = T2H * Tl; + T4k = T2H * Ti; + T43 = Tz * Tl; + T3Z = Tz * Ti; + T1A = FNMS(T5, Th, T1z); + T2A = FMA(T5, Th, T1z); + T2M = FNMS(T4, Th, TE); + TF = FMA(T4, Th, TE); + T1H = FNMS(T1c, Th, T1f); + T1g = FMA(T1c, Th, T1f); + T51 = FNMS(T1c, Ti, T4H); + T4I = FMA(T1c, Ti, T4H); + T4Y = FMA(T1c, Tl, T4D); + T4E = FNMS(T1c, Tl, T4D); + T1d = FNMS(T1c, Tk, T1b); + T1F = FMA(T1c, Tk, T1b); + T34 = FMA(Tc, Th, T27); + T28 = FNMS(Tc, Th, T27); + T3V = FNMS(Tc, Ti, T3E); + T3F = FMA(Tc, Ti, T3E); + T3S = FMA(Tc, Tl, T3A); + T3B = FNMS(Tc, Tl, T3A); + T25 = FMA(Tc, Tk, T24); + T32 = FNMS(Tc, Tk, T24); + T3w = FNMS(T1A, Ti, T3v); + T3s = FMA(T1A, Tl, T3r); + T4Q = FNMS(T2A, Ti, T4P); + T4M = FMA(T2A, Tl, T4L); + T4p = FNMS(T2M, Ti, T4o); + T4l = FMA(T2M, Tl, T4k); + T44 = FNMS(TF, Ti, T43); + T40 = FMA(TF, Tl, T3Z); + { + E T1m, T1e, T1N, T1G; + T1m = T1d * Tl; + T1e = T1d * Ti; + T1N = T1F * Tl; + T1G = T1F * Ti; + { + E T2e, T26, T3a, T33; + T2e = T25 * Tl; + T26 = T25 * Ti; + T3a = T32 * Tl; + T33 = T32 * Ti; + T1n = FNMS(T1g, Ti, T1m); + T1h = FMA(T1g, Tl, T1e); + T1O = FNMS(T1H, Ti, T1N); + T1I = FMA(T1H, Tl, T1G); + T2f = FNMS(T28, Ti, T2e); + T29 = FMA(T28, Tl, T26); + T3b = FNMS(T34, Ti, T3a); + T35 = FMA(T34, Tl, T33); + T3h = Im[0]; + } + } + } + } + } + } + } + T36 = Ip[WS(rs, 8)]; + T37 = Im[WS(rs, 8)]; + { + E T47, T48, T3c, T3d; + T47 = Rm[0]; + T4b = T3g + T3h; + T3i = T3g - T3h; + T45 = T36 + T37; + T38 = T36 - T37; + T48 = Rp[0]; + T3c = Rp[WS(rs, 8)]; + T3d = Rm[WS(rs, 8)]; + T39 = T35 * T38; + T58 = T48 + T47; + T49 = T47 - T48; + T3e = T3c + T3d; + T41 = T3d - T3c; + } + } + { + E T4W, T1x, T1y, T6a, T4U, T1D, T1P, T4V, T5K, T52, T1L, T1Q; + { + E T1B, T1C, T1J, T1K; + { + E T1v, T6A, T4c, T5a, T6y, T46, T1w, T6z, T4a; + T1v = Ip[WS(rs, 3)]; + T6z = T4 * T49; + T4a = T1 * T49; + { + E T3f, T59, T6x, T42; + T3f = FNMS(T3b, T3e, T39); + T59 = T35 * T3e; + T6x = T44 * T41; + T42 = T40 * T41; + T6A = FMA(T1, T4b, T6z); + T4c = FNMS(T4, T4b, T4a); + T3j = T3f + T3i; + T7Z = T3i - T3f; + T5a = FMA(T3b, T38, T59); + T6y = FMA(T40, T45, T6x); + T46 = FNMS(T44, T45, T42); + T1w = Im[WS(rs, 3)]; + } + T5b = T58 + T5a; + T93 = T58 - T5a; + T6B = T6y + T6A; + T8V = T6A - T6y; + T4d = T46 + T4c; + T8J = T4c - T46; + T4W = T1v + T1w; + T1x = T1v - T1w; + } + T1B = Rp[WS(rs, 3)]; + T1C = Rm[WS(rs, 3)]; + T1y = T1u * T1x; + T6a = T25 * T4W; + T1J = Ip[WS(rs, 11)]; + T4U = T1B - T1C; + T1D = T1B + T1C; + T1K = Im[WS(rs, 11)]; + T1P = Rp[WS(rs, 11)]; + T4V = T25 * T4U; + T5K = T1u * T1D; + T52 = T1J + T1K; + T1L = T1J - T1K; + T1Q = Rm[WS(rs, 11)]; + } + { + E T1E, T6c, T1M, T4Z, T1R, T6b; + T1E = FNMS(T1A, T1D, T1y); + T6c = T4Y * T52; + T1M = T1I * T1L; + T4Z = T1P - T1Q; + T1R = T1P + T1Q; + T6b = FNMS(T28, T4U, T6a); + { + E T5M, T6d, T50, T1S; + T4X = FMA(T28, T4W, T4V); + T6d = FNMS(T51, T4Z, T6c); + T50 = T4Y * T4Z; + T1S = FNMS(T1O, T1R, T1M); + T5M = T1I * T1R; + T8r = T6d - T6b; + T6e = T6b + T6d; + T8l = T1E - T1S; + T1T = T1E + T1S; + T53 = FMA(T51, T52, T50); + T5L = FMA(T1A, T1x, T5K); + T5N = FMA(T1O, T1L, T5M); + } + } + } + } + { + E T3Q, T2K, T2P, T2L, T6s, T3P, T5c, T3W, T2U, T2X, T2Y, T2V; + { + E T2I, T2J, T2N, T2O, T2S, T3O, T2T; + T2I = Ip[WS(rs, 4)]; + T8C = T53 - T4X; + T54 = T4X + T53; + T8i = T5L - T5N; + T5O = T5L + T5N; + T2J = Im[WS(rs, 4)]; + T2N = Rp[WS(rs, 4)]; + T2O = Rm[WS(rs, 4)]; + T2S = Ip[WS(rs, 12)]; + T3Q = T2I + T2J; + T2K = T2I - T2J; + T3O = T2O - T2N; + T2P = T2N + T2O; + T2T = Im[WS(rs, 12)]; + T2L = T2H * T2K; + T6s = Tk * T3O; + T3P = Th * T3O; + T5c = T2H * T2P; + T3W = T2S + T2T; + T2U = T2S - T2T; + T2X = Rp[WS(rs, 12)]; + T2Y = Rm[WS(rs, 12)]; + T2V = T2R * T2U; + } + { + E T2Q, T6t, T3T, T2Z, T3R, T6u, T3U; + T2Q = FNMS(T2M, T2P, T2L); + T6t = FMA(Th, T3Q, T6s); + T3T = T2Y - T2X; + T2Z = T2X + T2Y; + T3R = FNMS(Tk, T3Q, T3P); + T5d = FMA(T2M, T2K, T5c); + T6u = T3V * T3T; + T3U = T3S * T3T; + { + E T30, T5e, T6v, T3X; + T30 = FNMS(T2W, T2Z, T2V); + T5e = T2R * T2Z; + T6v = FMA(T3S, T3W, T6u); + T3X = FNMS(T3V, T3W, T3U); + T94 = T2Q - T30; + T31 = T2Q + T30; + T8K = T6t - T6v; + T6w = T6t + T6v; + T8U = T3R - T3X; + T3Y = T3R + T3X; + T5f = FMA(T2W, T2U, T5e); + } + } + } + { + E T4J, T12, T65, T13, T4F, T18, T1o, T4G, T5F, T4R, T1k, T1p; + { + E T16, T17, T10, T11, T1i, T1j; + T10 = Ip[WS(rs, 15)]; + T11 = Im[WS(rs, 15)]; + T16 = Rp[WS(rs, 15)]; + T80 = T5d - T5f; + T5g = T5d + T5f; + T4J = T10 + T11; + T12 = T10 - T11; + T17 = Rm[WS(rs, 15)]; + T1i = Ip[WS(rs, 7)]; + T65 = T4E * T4J; + T13 = TZ * T12; + T4F = T16 - T17; + T18 = T16 + T17; + T1j = Im[WS(rs, 7)]; + T1o = Rp[WS(rs, 7)]; + T4G = T4E * T4F; + T5F = TZ * T18; + T4R = T1i + T1j; + T1k = T1i - T1j; + T1p = Rm[WS(rs, 7)]; + } + { + E T19, T67, T1l, T4N, T1q, T66; + T19 = FNMS(T15, T18, T13); + T67 = T4M * T4R; + T1l = T1h * T1k; + T4N = T1o - T1p; + T1q = T1o + T1p; + T66 = FNMS(T4I, T4F, T65); + { + E T5H, T68, T4O, T1r; + T4K = FMA(T4I, T4J, T4G); + T68 = FNMS(T4Q, T4N, T67); + T4O = T4M * T4N; + T1r = FNMS(T1n, T1q, T1l); + T5H = T1h * T1q; + T8B = T66 - T68; + T69 = T66 + T68; + T8h = T19 - T1r; + T1s = T19 + T1r; + T4S = FMA(T4Q, T4R, T4O); + T5G = FMA(T15, T12, T5F); + T5I = FMA(T1n, T1k, T5H); + } + } + } + } + { + E T2c, T3x, T2d, T23, T5j, T3q, T2i, T3t, T6i, T8t, T5V, T5X; + { + E Tn, T4i, T9, T4g, Tf, T5U, Ta, T4h, T5u, To, Tt, Tu; + { + E T7, T8, Td, Te; + T7 = Ip[WS(rs, 1)]; + T8q = T4S - T4K; + T4T = T4K + T4S; + T8k = T5G - T5I; + T5J = T5G + T5I; + T8 = Im[WS(rs, 1)]; + Td = Rp[WS(rs, 1)]; + Te = Rm[WS(rs, 1)]; + Tn = Ip[WS(rs, 9)]; + T4i = T7 + T8; + T9 = T7 - T8; + T4g = Td - Te; + Tf = Td + Te; + T5U = T2 * T4i; + Ta = T6 * T9; + T4h = T2 * T4g; + T5u = T6 * Tf; + To = Im[WS(rs, 9)]; + Tt = Rp[WS(rs, 9)]; + Tu = Rm[WS(rs, 9)]; + } + { + E Tg, T4q, Tp, T4m, Tv, T5W, Tq, T4n, T5w; + Tg = FNMS(Tc, Tf, Ta); + T4q = Tn + To; + Tp = Tn - To; + T4m = Tt - Tu; + Tv = Tt + Tu; + T5W = T4l * T4q; + Tq = Tm * Tp; + T4n = T4l * T4m; + T5w = Tm * Tv; + { + E T5v, Tw, T4j, T5x, T4r; + T5v = FMA(Tc, T9, T5u); + Tw = FNMS(Ts, Tv, Tq); + T4j = FMA(T5, T4i, T4h); + T5x = FMA(Ts, Tp, T5w); + T4r = FMA(T4p, T4q, T4n); + Tx = Tg + Tw; + T8a = Tg - Tw; + T5y = T5v + T5x; + T8d = T5v - T5x; + T4s = T4j + T4r; + T8t = T4r - T4j; + T5V = FNMS(T5, T4g, T5U); + T5X = FNMS(T4p, T4m, T5W); + } + } + } + { + E T3p, T1Y, T1Z, T22, T2g, T6h, T3o, T5i, T2h; + { + E T20, T21, T1W, T1X, T8u, T2a, T2b, T3n; + T1W = Ip[WS(rs, 2)]; + T1X = Im[WS(rs, 2)]; + T8u = T5V - T5X; + T5Y = T5V + T5X; + T20 = Rp[WS(rs, 2)]; + T3p = T1W + T1X; + T1Y = T1W - T1X; + T8v = T8t - T8u; + T8E = T8u + T8t; + T21 = Rm[WS(rs, 2)]; + T1Z = T1a * T1Y; + T2a = Ip[WS(rs, 10)]; + T2b = Im[WS(rs, 10)]; + T3n = T21 - T20; + T22 = T20 + T21; + T2g = Rp[WS(rs, 10)]; + T2c = T2a - T2b; + T3x = T2a + T2b; + T6h = T1H * T3n; + T3o = T1F * T3n; + T5i = T1a * T22; + T2d = T29 * T2c; + T2h = Rm[WS(rs, 10)]; + } + T23 = FNMS(T1c, T22, T1Z); + T5j = FMA(T1c, T1Y, T5i); + T3q = FNMS(T1H, T3p, T3o); + T2i = T2g + T2h; + T3t = T2h - T2g; + T6i = FMA(T1F, T3p, T6h); + } + { + E T2y, T3K, T2z, T2u, T5o, T3H, T2D, T3I, T6n; + { + E T3G, T2o, T2p, T2t, T6m, T3D, T5n, T2B, T2C; + { + E T2r, T2s, T2m, T2n, T3C, T2w, T2x; + { + E T8N, T8M, T6j, T3u, T2j; + T2m = Ip[WS(rs, 14)]; + T6j = T3w * T3t; + T3u = T3s * T3t; + T2j = FNMS(T2f, T2i, T2d); + { + E T5k, T6k, T3y, T5l; + T5k = T29 * T2i; + T6k = FMA(T3s, T3x, T6j); + T3y = FNMS(T3w, T3x, T3u); + T2k = T23 + T2j; + T82 = T23 - T2j; + T5l = FMA(T2f, T2c, T5k); + T6l = T6i + T6k; + T8N = T6i - T6k; + T3z = T3q + T3y; + T8M = T3q - T3y; + T83 = T5j - T5l; + T5m = T5j + T5l; + T2n = Im[WS(rs, 14)]; + } + T8X = T8M + T8N; + T8O = T8M - T8N; + } + T2r = Rp[WS(rs, 14)]; + T3G = T2m + T2n; + T2o = T2m - T2n; + T2s = Rm[WS(rs, 14)]; + T2w = Ip[WS(rs, 6)]; + T2x = Im[WS(rs, 6)]; + T2p = T2l * T2o; + T3C = T2s - T2r; + T2t = T2r + T2s; + T2y = T2w - T2x; + T3K = T2w + T2x; + T6m = T3F * T3C; + T3D = T3B * T3C; + T5n = T2l * T2t; + T2z = T2v * T2y; + T2B = Rp[WS(rs, 6)]; + T2C = Rm[WS(rs, 6)]; + } + T2u = FNMS(T2q, T2t, T2p); + T5o = FMA(T2q, T2o, T5n); + T3H = FNMS(T3F, T3G, T3D); + T2D = T2B + T2C; + T3I = T2C - T2B; + T6n = FMA(T3B, T3G, T6m); + } + { + E T4v, TC, T5Z, TD, T4t, TI, TS, T4u, T5z, T4z, TO, TT; + { + E TG, TH, TA, TB, TM, TN; + { + E T8Q, T8P, T6o, T3J, T2E; + TA = Ip[WS(rs, 5)]; + T6o = T1g * T3I; + T3J = T1d * T3I; + T2E = FNMS(T2A, T2D, T2z); + { + E T5p, T6p, T3L, T5q; + T5p = T2v * T2D; + T6p = FMA(T1d, T3K, T6o); + T3L = FNMS(T1g, T3K, T3J); + T2F = T2u + T2E; + T86 = T2u - T2E; + T5q = FMA(T2A, T2y, T5p); + T6q = T6n + T6p; + T8Q = T6n - T6p; + T3M = T3H + T3L; + T8P = T3H - T3L; + T85 = T5o - T5q; + T5r = T5o + T5q; + TB = Im[WS(rs, 5)]; + } + T8Y = T8Q - T8P; + T8R = T8P + T8Q; + } + TG = Rp[WS(rs, 5)]; + T4v = TA + TB; + TC = TA - TB; + TH = Rm[WS(rs, 5)]; + TM = Ip[WS(rs, 13)]; + T5Z = T32 * T4v; + TD = Tz * TC; + T4t = TG - TH; + TI = TG + TH; + TN = Im[WS(rs, 13)]; + TS = Rp[WS(rs, 13)]; + T4u = T32 * T4t; + T5z = Tz * TI; + T4z = TM + TN; + TO = TM - TN; + TT = Rm[WS(rs, 13)]; + } + { + E TJ, T61, TP, T4x, TU; + TJ = FNMS(TF, TI, TD); + T61 = Ti * T4z; + TP = TL * TO; + T4x = TS - TT; + TU = TS + TT; + { + E T5A, T60, T5C, T62; + T5A = FMA(TF, TC, T5z); + { + E T4w, T4y, TV, T5B, T4A; + T4w = FMA(T34, T4v, T4u); + T4y = Ti * T4x; + TV = FNMS(TR, TU, TP); + T5B = TL * TU; + T60 = FNMS(T34, T4t, T5Z); + T4A = FMA(Tl, T4z, T4y); + TW = TJ + TV; + T8e = TJ - TV; + T5C = FMA(TR, TO, T5B); + T8x = T4w - T4A; + T4B = T4w + T4A; + T62 = FNMS(Tl, T4x, T61); + } + T5D = T5A + T5C; + T8b = T5A - T5C; + T63 = T60 + T62; + T8w = T62 - T60; + } + } + } + } + } + } + { + E T74, T78, T8F, T8y, T7s, T72, T75, T77, T7r, T71, T7f, T7d, T7c, T7g, T7m; + E T7k, T7j, T7n, T6V, T6Y, T7T, T7W; + { + E T6S, T1V, T6I, T3l, T6H, T5Q, T6R, T5t, T56, T6g, T6N, T4f, T6M, T6W, T6D; + E T6O; + { + E T2G, T3k, T5E, T5P, TX, T1U, T5h, T5s; + T74 = Tx - TW; + TX = Tx + TW; + T1U = T1s + T1T; + T78 = T1s - T1T; + T8F = T8w - T8x; + T8y = T8w + T8x; + T7s = T2k - T2F; + T2G = T2k + T2F; + T6S = TX - T1U; + T1V = TX + T1U; + T3k = T31 + T3j; + T72 = T3j - T31; + T75 = T5y - T5D; + T5E = T5y + T5D; + T5P = T5J + T5O; + T77 = T5J - T5O; + T7r = T5b - T5g; + T5h = T5b + T5g; + T6I = T3k - T2G; + T3l = T2G + T3k; + T6H = T5P - T5E; + T5Q = T5E + T5P; + T5s = T5m + T5r; + T71 = T5r - T5m; + { + E T64, T6L, T6f, T4C, T55; + T7f = T4B - T4s; + T4C = T4s + T4B; + T55 = T4T + T54; + T7d = T54 - T4T; + T7c = T63 - T5Y; + T64 = T5Y + T63; + T6R = T5h - T5s; + T5t = T5h + T5s; + T6L = T4C - T55; + T56 = T4C + T55; + T7g = T69 - T6e; + T6f = T69 + T6e; + { + E T6r, T6C, T3N, T4e, T6K; + T7m = T3z - T3M; + T3N = T3z + T3M; + T4e = T3Y + T4d; + T7k = T4d - T3Y; + T6K = T6f - T64; + T6g = T64 + T6f; + T7j = T6q - T6l; + T6r = T6l + T6q; + T6N = T4e - T3N; + T4f = T3N + T4e; + T7n = T6B - T6w; + T6C = T6w + T6B; + T6M = T6K + T6L; + T6W = T6K - T6L; + T6D = T6r + T6C; + T6O = T6C - T6r; + } + } + } + { + E T5T, T6X, T6P, T6E; + { + E T5S, T5R, T6F, T6G, T3m, T57; + T5T = T3l - T1V; + T3m = T1V + T3l; + T57 = T4f - T56; + T5S = T56 + T4f; + T6X = T6N + T6O; + T6P = T6N - T6O; + T5R = T5t - T5Q; + T6F = T5t + T5Q; + Im[WS(rs, 15)] = KP500000000 * (T57 - T3m); + Ip[0] = KP500000000 * (T3m + T57); + T6G = T6g + T6D; + T6E = T6g - T6D; + Rp[0] = KP500000000 * (T6F + T6G); + Rm[WS(rs, 15)] = KP500000000 * (T6F - T6G); + Rp[WS(rs, 8)] = KP500000000 * (T5R + T5S); + Rm[WS(rs, 7)] = KP500000000 * (T5R - T5S); + } + { + E T6U, T6T, T6Z, T70, T6J, T6Q; + T6V = T6I - T6H; + T6J = T6H + T6I; + T6Q = T6M + T6P; + T6U = T6P - T6M; + T6T = T6R - T6S; + T6Z = T6R + T6S; + Im[WS(rs, 7)] = KP500000000 * (T6E - T5T); + Ip[WS(rs, 8)] = KP500000000 * (T5T + T6E); + Im[WS(rs, 11)] = -(KP500000000 * (FNMS(KP707106781, T6Q, T6J))); + Ip[WS(rs, 4)] = KP500000000 * (FMA(KP707106781, T6Q, T6J)); + T70 = T6W + T6X; + T6Y = T6W - T6X; + Rp[WS(rs, 4)] = KP500000000 * (FMA(KP707106781, T70, T6Z)); + Rm[WS(rs, 11)] = KP500000000 * (FNMS(KP707106781, T70, T6Z)); + Rp[WS(rs, 12)] = KP500000000 * (FMA(KP707106781, T6U, T6T)); + Rm[WS(rs, 3)] = KP500000000 * (FNMS(KP707106781, T6U, T6T)); + } + } + } + { + E T7F, T73, T7P, T7t, T7G, T7w, T7Q, T7a, T7L, T7l, T7K, T7U, T7A, T7i, T7u; + E T76; + Im[WS(rs, 3)] = -(KP500000000 * (FNMS(KP707106781, T6Y, T6V))); + Ip[WS(rs, 12)] = KP500000000 * (FMA(KP707106781, T6Y, T6V)); + T7F = T72 - T71; + T73 = T71 + T72; + T7P = T7r - T7s; + T7t = T7r + T7s; + T7u = T75 + T74; + T76 = T74 - T75; + { + E T7I, T7e, T7v, T79, T7J, T7h; + T7v = T77 - T78; + T79 = T77 + T78; + T7I = T7c - T7d; + T7e = T7c + T7d; + T7G = T7v - T7u; + T7w = T7u + T7v; + T7Q = T76 - T79; + T7a = T76 + T79; + T7J = T7g - T7f; + T7h = T7f + T7g; + T7L = T7k - T7j; + T7l = T7j + T7k; + T7K = FMA(KP414213562, T7J, T7I); + T7U = FNMS(KP414213562, T7I, T7J); + T7A = FNMS(KP414213562, T7e, T7h); + T7i = FMA(KP414213562, T7h, T7e); + } + { + E T7z, T7b, T7D, T7x, T7M, T7o; + T7z = FNMS(KP707106781, T7a, T73); + T7b = FMA(KP707106781, T7a, T73); + T7D = FMA(KP707106781, T7w, T7t); + T7x = FNMS(KP707106781, T7w, T7t); + T7M = T7n - T7m; + T7o = T7m + T7n; + { + E T7S, T7R, T7X, T7Y; + { + E T7H, T7V, T7B, T7p, T7O, T7N; + T7T = FMA(KP707106781, T7G, T7F); + T7H = FNMS(KP707106781, T7G, T7F); + T7N = FMA(KP414213562, T7M, T7L); + T7V = FNMS(KP414213562, T7L, T7M); + T7B = FMA(KP414213562, T7l, T7o); + T7p = FNMS(KP414213562, T7o, T7l); + T7O = T7K - T7N; + T7S = T7K + T7N; + T7R = FMA(KP707106781, T7Q, T7P); + T7X = FNMS(KP707106781, T7Q, T7P); + { + E T7C, T7E, T7y, T7q; + T7C = T7A - T7B; + T7E = T7A + T7B; + T7y = T7p - T7i; + T7q = T7i + T7p; + Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP923879532, T7O, T7H))); + Ip[WS(rs, 14)] = KP500000000 * (FMA(KP923879532, T7O, T7H)); + Im[WS(rs, 5)] = -(KP500000000 * (FNMS(KP923879532, T7C, T7z))); + Ip[WS(rs, 10)] = KP500000000 * (FMA(KP923879532, T7C, T7z)); + Rp[WS(rs, 2)] = KP500000000 * (FMA(KP923879532, T7E, T7D)); + Rm[WS(rs, 13)] = KP500000000 * (FNMS(KP923879532, T7E, T7D)); + Rp[WS(rs, 10)] = KP500000000 * (FMA(KP923879532, T7y, T7x)); + Rm[WS(rs, 5)] = KP500000000 * (FNMS(KP923879532, T7y, T7x)); + Im[WS(rs, 13)] = -(KP500000000 * (FNMS(KP923879532, T7q, T7b))); + Ip[WS(rs, 2)] = KP500000000 * (FMA(KP923879532, T7q, T7b)); + T7Y = T7U + T7V; + T7W = T7U - T7V; + } + } + Rm[WS(rs, 1)] = KP500000000 * (FMA(KP923879532, T7Y, T7X)); + Rp[WS(rs, 14)] = KP500000000 * (FNMS(KP923879532, T7Y, T7X)); + Rp[WS(rs, 6)] = KP500000000 * (FMA(KP923879532, T7S, T7R)); + Rm[WS(rs, 9)] = KP500000000 * (FNMS(KP923879532, T7S, T7R)); + } + } + } + { + E Ta7, Tat, T9l, T89, T9H, Taj, T9v, T99, T9m, T9c, T9w, T8o, Tao, Tay, Tae; + E Ta3, T9q, T9A, T9g, T8I, T8Z, T8W, Tak, Taa, Tau, T9O, T9r, T8T, Tar, Taz; + E Taf, T9W; + { + E T9M, T9L, T9J, T9I, T8s, T8G, T8D, Ta0, Tam, T9Z, Ta1, T8z, Ta9, T9K; + { + E T9F, T81, Ta5, T95, T96, T97, Ta6, T88, T84, T87; + T9F = T80 + T7Z; + T81 = T7Z - T80; + Ta5 = T93 - T94; + T95 = T93 + T94; + T96 = T83 + T82; + T84 = T82 - T83; + Im[WS(rs, 9)] = -(KP500000000 * (FNMS(KP923879532, T7W, T7T))); + Ip[WS(rs, 6)] = KP500000000 * (FMA(KP923879532, T7W, T7T)); + T87 = T85 + T86; + T97 = T85 - T86; + Ta6 = T84 - T87; + T88 = T84 + T87; + { + E T8j, T9a, T8g, T8m; + { + E T8c, T9G, T98, T8f; + T9M = T8a + T8b; + T8c = T8a - T8b; + Ta7 = FMA(KP707106781, Ta6, Ta5); + Tat = FNMS(KP707106781, Ta6, Ta5); + T9l = FNMS(KP707106781, T88, T81); + T89 = FMA(KP707106781, T88, T81); + T9G = T97 - T96; + T98 = T96 + T97; + T8f = T8d + T8e; + T9L = T8d - T8e; + T9J = T8h + T8i; + T8j = T8h - T8i; + T9H = FMA(KP707106781, T9G, T9F); + Taj = FNMS(KP707106781, T9G, T9F); + T9v = FNMS(KP707106781, T98, T95); + T99 = FMA(KP707106781, T98, T95); + T9a = FMA(KP414213562, T8c, T8f); + T8g = FNMS(KP414213562, T8f, T8c); + T8m = T8k + T8l; + T9I = T8k - T8l; + } + { + E T9X, T9Y, T9b, T8n; + T8s = T8q + T8r; + T9X = T8r - T8q; + T9Y = T8F - T8E; + T8G = T8E + T8F; + T8D = T8B + T8C; + Ta0 = T8B - T8C; + T9b = FNMS(KP414213562, T8j, T8m); + T8n = FMA(KP414213562, T8m, T8j); + Tam = FMA(KP707106781, T9Y, T9X); + T9Z = FNMS(KP707106781, T9Y, T9X); + T9m = T9b - T9a; + T9c = T9a + T9b; + T9w = T8g - T8n; + T8o = T8g + T8n; + Ta1 = T8y - T8v; + T8z = T8v + T8y; + } + } + } + { + E T9o, T8A, Tan, Ta2, T9p, T8H; + Tan = FMA(KP707106781, Ta1, Ta0); + Ta2 = FNMS(KP707106781, Ta1, Ta0); + T9o = FNMS(KP707106781, T8z, T8s); + T8A = FMA(KP707106781, T8z, T8s); + Tao = FMA(KP198912367, Tan, Tam); + Tay = FNMS(KP198912367, Tam, Tan); + Tae = FMA(KP668178637, T9Z, Ta2); + Ta3 = FNMS(KP668178637, Ta2, T9Z); + T9p = FNMS(KP707106781, T8G, T8D); + T8H = FMA(KP707106781, T8G, T8D); + Ta9 = FNMS(KP414213562, T9I, T9J); + T9K = FMA(KP414213562, T9J, T9I); + T9q = FNMS(KP668178637, T9p, T9o); + T9A = FMA(KP668178637, T9o, T9p); + T9g = FNMS(KP198912367, T8A, T8H); + T8I = FMA(KP198912367, T8H, T8A); + } + { + E T8L, T9T, Tap, T9S, T9U, T8S, Taq, T9V; + { + E T9Q, T9R, Ta8, T9N; + T8L = T8J - T8K; + T9Q = T8K + T8J; + T9R = T8X - T8Y; + T8Z = T8X + T8Y; + T8W = T8U + T8V; + T9T = T8V - T8U; + Ta8 = FMA(KP414213562, T9L, T9M); + T9N = FNMS(KP414213562, T9M, T9L); + Tap = FMA(KP707106781, T9R, T9Q); + T9S = FNMS(KP707106781, T9R, T9Q); + Tak = Ta8 + Ta9; + Taa = Ta8 - Ta9; + Tau = T9N + T9K; + T9O = T9K - T9N; + T9U = T8R - T8O; + T8S = T8O + T8R; + } + Taq = FMA(KP707106781, T9U, T9T); + T9V = FNMS(KP707106781, T9U, T9T); + T9r = FNMS(KP707106781, T8S, T8L); + T8T = FMA(KP707106781, T8S, T8L); + Tar = FMA(KP198912367, Taq, Tap); + Taz = FNMS(KP198912367, Tap, Taq); + Taf = FMA(KP668178637, T9S, T9V); + T9W = FNMS(KP668178637, T9V, T9S); + } + } + { + E T9z, T9C, Tad, Tag; + { + E T9f, T8p, T9j, T9d, T9s, T90; + T9f = FNMS(KP923879532, T8o, T89); + T8p = FMA(KP923879532, T8o, T89); + T9j = FMA(KP923879532, T9c, T99); + T9d = FNMS(KP923879532, T9c, T99); + T9s = FNMS(KP707106781, T8Z, T8W); + T90 = FMA(KP707106781, T8Z, T8W); + { + E T9y, T9x, T9D, T9E; + { + E T9n, T9B, T9h, T91, T9u, T9t; + T9z = FMA(KP923879532, T9m, T9l); + T9n = FNMS(KP923879532, T9m, T9l); + T9t = FMA(KP668178637, T9s, T9r); + T9B = FNMS(KP668178637, T9r, T9s); + T9h = FMA(KP198912367, T8T, T90); + T91 = FNMS(KP198912367, T90, T8T); + T9u = T9q + T9t; + T9y = T9t - T9q; + T9x = FMA(KP923879532, T9w, T9v); + T9D = FNMS(KP923879532, T9w, T9v); + { + E T9i, T9k, T9e, T92; + T9i = T9g - T9h; + T9k = T9g + T9h; + T9e = T91 - T8I; + T92 = T8I + T91; + Im[WS(rs, 2)] = -(KP500000000 * (FMA(KP831469612, T9u, T9n))); + Ip[WS(rs, 13)] = KP500000000 * (FNMS(KP831469612, T9u, T9n)); + Im[WS(rs, 6)] = -(KP500000000 * (FNMS(KP980785280, T9i, T9f))); + Ip[WS(rs, 9)] = KP500000000 * (FMA(KP980785280, T9i, T9f)); + Rp[WS(rs, 1)] = KP500000000 * (FMA(KP980785280, T9k, T9j)); + Rm[WS(rs, 14)] = KP500000000 * (FNMS(KP980785280, T9k, T9j)); + Rp[WS(rs, 9)] = KP500000000 * (FMA(KP980785280, T9e, T9d)); + Rm[WS(rs, 6)] = KP500000000 * (FNMS(KP980785280, T9e, T9d)); + Im[WS(rs, 14)] = -(KP500000000 * (FNMS(KP980785280, T92, T8p))); + Ip[WS(rs, 1)] = KP500000000 * (FMA(KP980785280, T92, T8p)); + T9E = T9A + T9B; + T9C = T9A - T9B; + } + } + Rm[WS(rs, 2)] = KP500000000 * (FMA(KP831469612, T9E, T9D)); + Rp[WS(rs, 13)] = KP500000000 * (FNMS(KP831469612, T9E, T9D)); + Rp[WS(rs, 5)] = KP500000000 * (FMA(KP831469612, T9y, T9x)); + Rm[WS(rs, 10)] = KP500000000 * (FNMS(KP831469612, T9y, T9x)); + } + } + { + E Tac, Tab, Tah, Tai, T9P, Ta4; + Tad = FNMS(KP923879532, T9O, T9H); + T9P = FMA(KP923879532, T9O, T9H); + Ta4 = T9W - Ta3; + Tac = Ta3 + T9W; + Tab = FNMS(KP923879532, Taa, Ta7); + Tah = FMA(KP923879532, Taa, Ta7); + Im[WS(rs, 10)] = -(KP500000000 * (FNMS(KP831469612, T9C, T9z))); + Ip[WS(rs, 5)] = KP500000000 * (FMA(KP831469612, T9C, T9z)); + Im[WS(rs, 12)] = -(KP500000000 * (FNMS(KP831469612, Ta4, T9P))); + Ip[WS(rs, 3)] = KP500000000 * (FMA(KP831469612, Ta4, T9P)); + Tai = Tae + Taf; + Tag = Tae - Taf; + Rp[WS(rs, 3)] = KP500000000 * (FMA(KP831469612, Tai, Tah)); + Rm[WS(rs, 12)] = KP500000000 * (FNMS(KP831469612, Tai, Tah)); + Rp[WS(rs, 11)] = KP500000000 * (FMA(KP831469612, Tac, Tab)); + Rm[WS(rs, 4)] = KP500000000 * (FNMS(KP831469612, Tac, Tab)); + } + { + E Taw, Tav, TaB, TaC, Tal, Tas; + Tax = FNMS(KP923879532, Tak, Taj); + Tal = FMA(KP923879532, Tak, Taj); + Tas = Tao - Tar; + Taw = Tao + Tar; + Tav = FNMS(KP923879532, Tau, Tat); + TaB = FMA(KP923879532, Tau, Tat); + Im[WS(rs, 4)] = -(KP500000000 * (FNMS(KP831469612, Tag, Tad))); + Ip[WS(rs, 11)] = KP500000000 * (FMA(KP831469612, Tag, Tad)); + Im[0] = -(KP500000000 * (FNMS(KP980785280, Tas, Tal))); + Ip[WS(rs, 15)] = KP500000000 * (FMA(KP980785280, Tas, Tal)); + TaC = Tay + Taz; + TaA = Tay - Taz; + Rm[0] = KP500000000 * (FMA(KP980785280, TaC, TaB)); + Rp[WS(rs, 15)] = KP500000000 * (FNMS(KP980785280, TaC, TaB)); + Rp[WS(rs, 7)] = KP500000000 * (FMA(KP980785280, Taw, Tav)); + Rm[WS(rs, 8)] = KP500000000 * (FNMS(KP980785280, Taw, Tav)); + } + } + } + } + } + } + Im[WS(rs, 8)] = -(KP500000000 * (FNMS(KP980785280, TaA, Tax))); + Ip[WS(rs, 7)] = KP500000000 * (FMA(KP980785280, TaA, Tax)); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 27}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 32, "hc2cfdft2_32", twinstr, &GENUS, {300, 162, 252, 0} }; + +void X(codelet_hc2cfdft2_32) (planner *p) { + X(khc2c_register) (p, hc2cfdft2_32, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hc2cfdft2_32 -include hc2cf.h */ + +/* + * This function contains 552 FP additions, 300 FP multiplications, + * (or, 440 additions, 188 multiplications, 112 fused multiply/add), + * 166 stack variables, 9 constants, and 128 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP277785116, +0.277785116509801112371415406974266437187468595); + DK(KP415734806, +0.415734806151272618539394188808952878369280406); + DK(KP097545161, +0.097545161008064133924142434238511120463845809); + DK(KP490392640, +0.490392640201615224563091118067119518486966865); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP191341716, +0.191341716182544885864229992015199433380672281); + DK(KP461939766, +0.461939766255643378064091594698394143411208313); + DK(KP353553390, +0.353553390593273762200422181052424519642417969); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(128, rs)) { + E T1, T4, T2, T5, T7, T1b, T1d, Td, Ti, Tk, Tj, Tl, TL, TR, T2h; + E T2O, T16, T2l, T10, T2K, Tm, Tq, T3s, T3K, T3w, T3M, T4e, T4u, T4i, T4w; + E Ty, TE, T3h, T3j, T2q, T2u, T4l, T4n, T1v, T1B, T3E, T3G, T2B, T2F, T3Y; + E T40, T1f, T1G, T1i, T1H, T1j, T1M, T1n, T1I, T23, T2U, T26, T2V, T27, T30; + E T2b, T2W; + { + E Tw, T1A, TD, T1t, Tx, T1z, TC, T1u, TJ, T15, TQ, TY, TK, T14, TP; + E TZ; + { + E T3, Tc, T6, Tb; + T1 = W[0]; + T4 = W[1]; + T2 = W[2]; + T5 = W[3]; + T3 = T1 * T2; + Tc = T4 * T2; + T6 = T4 * T5; + Tb = T1 * T5; + T7 = T3 + T6; + T1b = T3 - T6; + T1d = Tb + Tc; + Td = Tb - Tc; + Ti = W[4]; + Tw = T1 * Ti; + T1A = T5 * Ti; + TD = T4 * Ti; + T1t = T2 * Ti; + Tk = W[5]; + Tx = T4 * Tk; + T1z = T2 * Tk; + TC = T1 * Tk; + T1u = T5 * Tk; + Tj = W[6]; + TJ = T1 * Tj; + T15 = T5 * Tj; + TQ = T4 * Tj; + TY = T2 * Tj; + Tl = W[7]; + TK = T4 * Tl; + T14 = T2 * Tl; + TP = T1 * Tl; + TZ = T5 * Tl; + } + TL = TJ + TK; + TR = TP - TQ; + T2h = TJ - TK; + T2O = T14 - T15; + T16 = T14 + T15; + T2l = TP + TQ; + T10 = TY - TZ; + T2K = TY + TZ; + Tm = FMA(Ti, Tj, Tk * Tl); + Tq = FNMS(Tk, Tj, Ti * Tl); + { + E T3q, T3r, T3u, T3v; + T3q = T7 * Tj; + T3r = Td * Tl; + T3s = T3q + T3r; + T3K = T3q - T3r; + T3u = T7 * Tl; + T3v = Td * Tj; + T3w = T3u - T3v; + T3M = T3u + T3v; + } + { + E T4c, T4d, T4g, T4h; + T4c = T1b * Tj; + T4d = T1d * Tl; + T4e = T4c - T4d; + T4u = T4c + T4d; + T4g = T1b * Tl; + T4h = T1d * Tj; + T4i = T4g + T4h; + T4w = T4g - T4h; + Ty = Tw - Tx; + TE = TC + TD; + T3h = FMA(Ty, Tj, TE * Tl); + T3j = FNMS(TE, Tj, Ty * Tl); + } + T2q = T1t - T1u; + T2u = T1z + T1A; + T4l = FMA(T2q, Tj, T2u * Tl); + T4n = FNMS(T2u, Tj, T2q * Tl); + T1v = T1t + T1u; + T1B = T1z - T1A; + T3E = FMA(T1v, Tj, T1B * Tl); + T3G = FNMS(T1B, Tj, T1v * Tl); + T2B = Tw + Tx; + T2F = TC - TD; + T3Y = FMA(T2B, Tj, T2F * Tl); + T40 = FNMS(T2F, Tj, T2B * Tl); + { + E T1c, T1e, T1g, T1h; + T1c = T1b * Ti; + T1e = T1d * Tk; + T1f = T1c - T1e; + T1G = T1c + T1e; + T1g = T1b * Tk; + T1h = T1d * Ti; + T1i = T1g + T1h; + T1H = T1g - T1h; + } + T1j = FMA(T1f, Tj, T1i * Tl); + T1M = FNMS(T1H, Tj, T1G * Tl); + T1n = FNMS(T1i, Tj, T1f * Tl); + T1I = FMA(T1G, Tj, T1H * Tl); + { + E T21, T22, T24, T25; + T21 = T7 * Ti; + T22 = Td * Tk; + T23 = T21 + T22; + T2U = T21 - T22; + T24 = T7 * Tk; + T25 = Td * Ti; + T26 = T24 - T25; + T2V = T24 + T25; + } + T27 = FMA(T23, Tj, T26 * Tl); + T30 = FNMS(T2V, Tj, T2U * Tl); + T2b = FNMS(T26, Tj, T23 * Tl); + T2W = FMA(T2U, Tj, T2V * Tl); + } + { + E T38, T7l, T7S, T8Y, T7Z, T91, T3A, T6k, T4F, T83, T5C, T6n, T2T, T84, T4I; + E T7m, T2g, T4M, T4P, T2z, T3T, T6m, T7O, T7V, T7j, T87, T5v, T6j, T7L, T7U; + E T7g, T86, Tv, TW, T61, T4U, T4X, T62, T4b, T6c, T7v, T7C, T5g, T6f, T74; + E T8G, T7s, T7B, T71, T8F, T1s, T1R, T65, T51, T54, T64, T4A, T6g, T7G, T8U; + E T5n, T6d, T7b, T8J, T7z, T8R, T78, T8I; + { + E T2E, T2I, T3p, T5w, T37, T4D, T3g, T5A, T2N, T2R, T3y, T5x, T2Z, T33, T3l; + E T5z; + { + E T2C, T2D, T3o, T2G, T2H, T3n; + T2C = Ip[WS(rs, 4)]; + T2D = Im[WS(rs, 4)]; + T3o = T2C + T2D; + T2G = Rp[WS(rs, 4)]; + T2H = Rm[WS(rs, 4)]; + T3n = T2G - T2H; + T2E = T2C - T2D; + T2I = T2G + T2H; + T3p = FMA(Ti, T3n, Tk * T3o); + T5w = FNMS(Tk, T3n, Ti * T3o); + } + { + E T35, T36, T3f, T3c, T3d, T3e; + T35 = Ip[0]; + T36 = Im[0]; + T3f = T35 + T36; + T3c = Rm[0]; + T3d = Rp[0]; + T3e = T3c - T3d; + T37 = T35 - T36; + T4D = T3d + T3c; + T3g = FNMS(T4, T3f, T1 * T3e); + T5A = FMA(T4, T3e, T1 * T3f); + } + { + E T2L, T2M, T3x, T2P, T2Q, T3t; + T2L = Ip[WS(rs, 12)]; + T2M = Im[WS(rs, 12)]; + T3x = T2L + T2M; + T2P = Rp[WS(rs, 12)]; + T2Q = Rm[WS(rs, 12)]; + T3t = T2P - T2Q; + T2N = T2L - T2M; + T2R = T2P + T2Q; + T3y = FMA(T3s, T3t, T3w * T3x); + T5x = FNMS(T3w, T3t, T3s * T3x); + } + { + E T2X, T2Y, T3k, T31, T32, T3i; + T2X = Ip[WS(rs, 8)]; + T2Y = Im[WS(rs, 8)]; + T3k = T2X + T2Y; + T31 = Rp[WS(rs, 8)]; + T32 = Rm[WS(rs, 8)]; + T3i = T31 - T32; + T2Z = T2X - T2Y; + T33 = T31 + T32; + T3l = FMA(T3h, T3i, T3j * T3k); + T5z = FNMS(T3j, T3i, T3h * T3k); + } + { + E T34, T7Q, T7R, T4E, T5y, T5B; + T34 = FNMS(T30, T33, T2W * T2Z); + T38 = T34 + T37; + T7l = T37 - T34; + T7Q = T3l + T3g; + T7R = T5w - T5x; + T7S = T7Q - T7R; + T8Y = T7R + T7Q; + { + E T7X, T7Y, T3m, T3z; + T7X = T3y - T3p; + T7Y = T5A - T5z; + T7Z = T7X + T7Y; + T91 = T7Y - T7X; + T3m = T3g - T3l; + T3z = T3p + T3y; + T3A = T3m - T3z; + T6k = T3z + T3m; + } + T4E = FMA(T2W, T33, T30 * T2Z); + T4F = T4D + T4E; + T83 = T4D - T4E; + T5y = T5w + T5x; + T5B = T5z + T5A; + T5C = T5y + T5B; + T6n = T5B - T5y; + { + E T2J, T2S, T4G, T4H; + T2J = FNMS(T2F, T2I, T2B * T2E); + T2S = FNMS(T2O, T2R, T2K * T2N); + T2T = T2J + T2S; + T84 = T2J - T2S; + T4G = FMA(T2B, T2I, T2F * T2E); + T4H = FMA(T2K, T2R, T2O * T2N); + T4I = T4G + T4H; + T7m = T4G - T4H; + } + } + } + { + E T20, T5p, T3D, T4K, T2y, T5t, T3R, T4O, T2f, T5q, T3I, T4L, T2p, T5s, T3O; + E T4N; + { + E T1W, T3C, T1Z, T3B; + { + E T1U, T1V, T1X, T1Y; + T1U = Ip[WS(rs, 2)]; + T1V = Im[WS(rs, 2)]; + T1W = T1U - T1V; + T3C = T1U + T1V; + T1X = Rp[WS(rs, 2)]; + T1Y = Rm[WS(rs, 2)]; + T1Z = T1X + T1Y; + T3B = T1X - T1Y; + } + T20 = FNMS(T1d, T1Z, T1b * T1W); + T5p = FNMS(T1H, T3B, T1G * T3C); + T3D = FMA(T1G, T3B, T1H * T3C); + T4K = FMA(T1b, T1Z, T1d * T1W); + } + { + E T2t, T3Q, T2x, T3P; + { + E T2r, T2s, T2v, T2w; + T2r = Ip[WS(rs, 6)]; + T2s = Im[WS(rs, 6)]; + T2t = T2r - T2s; + T3Q = T2r + T2s; + T2v = Rp[WS(rs, 6)]; + T2w = Rm[WS(rs, 6)]; + T2x = T2v + T2w; + T3P = T2v - T2w; + } + T2y = FNMS(T2u, T2x, T2q * T2t); + T5t = FNMS(T1i, T3P, T1f * T3Q); + T3R = FMA(T1f, T3P, T1i * T3Q); + T4O = FMA(T2q, T2x, T2u * T2t); + } + { + E T2a, T3H, T2e, T3F; + { + E T28, T29, T2c, T2d; + T28 = Ip[WS(rs, 10)]; + T29 = Im[WS(rs, 10)]; + T2a = T28 - T29; + T3H = T28 + T29; + T2c = Rp[WS(rs, 10)]; + T2d = Rm[WS(rs, 10)]; + T2e = T2c + T2d; + T3F = T2c - T2d; + } + T2f = FNMS(T2b, T2e, T27 * T2a); + T5q = FNMS(T3G, T3F, T3E * T3H); + T3I = FMA(T3E, T3F, T3G * T3H); + T4L = FMA(T27, T2e, T2b * T2a); + } + { + E T2k, T3N, T2o, T3L; + { + E T2i, T2j, T2m, T2n; + T2i = Ip[WS(rs, 14)]; + T2j = Im[WS(rs, 14)]; + T2k = T2i - T2j; + T3N = T2i + T2j; + T2m = Rp[WS(rs, 14)]; + T2n = Rm[WS(rs, 14)]; + T2o = T2m + T2n; + T3L = T2m - T2n; + } + T2p = FNMS(T2l, T2o, T2h * T2k); + T5s = FNMS(T3M, T3L, T3K * T3N); + T3O = FMA(T3K, T3L, T3M * T3N); + T4N = FMA(T2h, T2o, T2l * T2k); + } + { + E T3J, T3S, T5r, T5u; + T2g = T20 + T2f; + T4M = T4K + T4L; + T4P = T4N + T4O; + T2z = T2p + T2y; + T3J = T3D + T3I; + T3S = T3O + T3R; + T3T = T3J + T3S; + T6m = T3S - T3J; + { + E T7M, T7N, T7h, T7i; + T7M = T5s - T5t; + T7N = T3R - T3O; + T7O = T7M + T7N; + T7V = T7M - T7N; + T7h = T4N - T4O; + T7i = T2p - T2y; + T7j = T7h + T7i; + T87 = T7h - T7i; + } + T5r = T5p + T5q; + T5u = T5s + T5t; + T5v = T5r + T5u; + T6j = T5u - T5r; + { + E T7J, T7K, T7e, T7f; + T7J = T3I - T3D; + T7K = T5p - T5q; + T7L = T7J - T7K; + T7U = T7K + T7J; + T7e = T20 - T2f; + T7f = T4K - T4L; + T7g = T7e - T7f; + T86 = T7f + T7e; + } + } + } + { + E Th, T5a, T3X, T4S, TV, T5e, T49, T4W, Tu, T5b, T42, T4T, TI, T5d, T46; + E T4V; + { + E Ta, T3W, Tg, T3V; + { + E T8, T9, Te, Tf; + T8 = Ip[WS(rs, 1)]; + T9 = Im[WS(rs, 1)]; + Ta = T8 - T9; + T3W = T8 + T9; + Te = Rp[WS(rs, 1)]; + Tf = Rm[WS(rs, 1)]; + Tg = Te + Tf; + T3V = Te - Tf; + } + Th = FNMS(Td, Tg, T7 * Ta); + T5a = FNMS(T5, T3V, T2 * T3W); + T3X = FMA(T2, T3V, T5 * T3W); + T4S = FMA(T7, Tg, Td * Ta); + } + { + E TO, T48, TU, T47; + { + E TM, TN, TS, TT; + TM = Ip[WS(rs, 13)]; + TN = Im[WS(rs, 13)]; + TO = TM - TN; + T48 = TM + TN; + TS = Rp[WS(rs, 13)]; + TT = Rm[WS(rs, 13)]; + TU = TS + TT; + T47 = TS - TT; + } + TV = FNMS(TR, TU, TL * TO); + T5e = FNMS(Tl, T47, Tj * T48); + T49 = FMA(Tj, T47, Tl * T48); + T4W = FMA(TL, TU, TR * TO); + } + { + E Tp, T41, Tt, T3Z; + { + E Tn, To, Tr, Ts; + Tn = Ip[WS(rs, 9)]; + To = Im[WS(rs, 9)]; + Tp = Tn - To; + T41 = Tn + To; + Tr = Rp[WS(rs, 9)]; + Ts = Rm[WS(rs, 9)]; + Tt = Tr + Ts; + T3Z = Tr - Ts; + } + Tu = FNMS(Tq, Tt, Tm * Tp); + T5b = FNMS(T40, T3Z, T3Y * T41); + T42 = FMA(T3Y, T3Z, T40 * T41); + T4T = FMA(Tm, Tt, Tq * Tp); + } + { + E TB, T45, TH, T44; + { + E Tz, TA, TF, TG; + Tz = Ip[WS(rs, 5)]; + TA = Im[WS(rs, 5)]; + TB = Tz - TA; + T45 = Tz + TA; + TF = Rp[WS(rs, 5)]; + TG = Rm[WS(rs, 5)]; + TH = TF + TG; + T44 = TF - TG; + } + TI = FNMS(TE, TH, Ty * TB); + T5d = FNMS(T2V, T44, T2U * T45); + T46 = FMA(T2U, T44, T2V * T45); + T4V = FMA(Ty, TH, TE * TB); + } + Tv = Th + Tu; + TW = TI + TV; + T61 = Tv - TW; + T4U = T4S + T4T; + T4X = T4V + T4W; + T62 = T4U - T4X; + { + E T43, T4a, T7t, T7u; + T43 = T3X + T42; + T4a = T46 + T49; + T4b = T43 + T4a; + T6c = T4a - T43; + T7t = T5e - T5d; + T7u = T46 - T49; + T7v = T7t + T7u; + T7C = T7t - T7u; + } + { + E T5c, T5f, T72, T73; + T5c = T5a + T5b; + T5f = T5d + T5e; + T5g = T5c + T5f; + T6f = T5f - T5c; + T72 = T4S - T4T; + T73 = TI - TV; + T74 = T72 + T73; + T8G = T72 - T73; + } + { + E T7q, T7r, T6Z, T70; + T7q = T42 - T3X; + T7r = T5a - T5b; + T7s = T7q - T7r; + T7B = T7r + T7q; + T6Z = Th - Tu; + T70 = T4V - T4W; + T71 = T6Z - T70; + T8F = T6Z + T70; + } + } + { + E T1a, T5h, T4k, T4Z, T1Q, T5l, T4y, T53, T1r, T5i, T4p, T50, T1F, T5k, T4t; + E T52; + { + E T13, T4j, T19, T4f; + { + E T11, T12, T17, T18; + T11 = Ip[WS(rs, 15)]; + T12 = Im[WS(rs, 15)]; + T13 = T11 - T12; + T4j = T11 + T12; + T17 = Rp[WS(rs, 15)]; + T18 = Rm[WS(rs, 15)]; + T19 = T17 + T18; + T4f = T17 - T18; + } + T1a = FNMS(T16, T19, T10 * T13); + T5h = FNMS(T4i, T4f, T4e * T4j); + T4k = FMA(T4e, T4f, T4i * T4j); + T4Z = FMA(T10, T19, T16 * T13); + } + { + E T1L, T4x, T1P, T4v; + { + E T1J, T1K, T1N, T1O; + T1J = Ip[WS(rs, 11)]; + T1K = Im[WS(rs, 11)]; + T1L = T1J - T1K; + T4x = T1J + T1K; + T1N = Rp[WS(rs, 11)]; + T1O = Rm[WS(rs, 11)]; + T1P = T1N + T1O; + T4v = T1N - T1O; + } + T1Q = FNMS(T1M, T1P, T1I * T1L); + T5l = FNMS(T4w, T4v, T4u * T4x); + T4y = FMA(T4u, T4v, T4w * T4x); + T53 = FMA(T1I, T1P, T1M * T1L); + } + { + E T1m, T4o, T1q, T4m; + { + E T1k, T1l, T1o, T1p; + T1k = Ip[WS(rs, 7)]; + T1l = Im[WS(rs, 7)]; + T1m = T1k - T1l; + T4o = T1k + T1l; + T1o = Rp[WS(rs, 7)]; + T1p = Rm[WS(rs, 7)]; + T1q = T1o + T1p; + T4m = T1o - T1p; + } + T1r = FNMS(T1n, T1q, T1j * T1m); + T5i = FNMS(T4n, T4m, T4l * T4o); + T4p = FMA(T4l, T4m, T4n * T4o); + T50 = FMA(T1j, T1q, T1n * T1m); + } + { + E T1y, T4s, T1E, T4r; + { + E T1w, T1x, T1C, T1D; + T1w = Ip[WS(rs, 3)]; + T1x = Im[WS(rs, 3)]; + T1y = T1w - T1x; + T4s = T1w + T1x; + T1C = Rp[WS(rs, 3)]; + T1D = Rm[WS(rs, 3)]; + T1E = T1C + T1D; + T4r = T1C - T1D; + } + T1F = FNMS(T1B, T1E, T1v * T1y); + T5k = FNMS(T26, T4r, T23 * T4s); + T4t = FMA(T23, T4r, T26 * T4s); + T52 = FMA(T1v, T1E, T1B * T1y); + } + T1s = T1a + T1r; + T1R = T1F + T1Q; + T65 = T1s - T1R; + T51 = T4Z + T50; + T54 = T52 + T53; + T64 = T51 - T54; + { + E T4q, T4z, T7E, T7F; + T4q = T4k + T4p; + T4z = T4t + T4y; + T4A = T4q + T4z; + T6g = T4z - T4q; + T7E = T5h - T5i; + T7F = T4y - T4t; + T7G = T7E + T7F; + T8U = T7E - T7F; + } + { + E T5j, T5m, T79, T7a; + T5j = T5h + T5i; + T5m = T5k + T5l; + T5n = T5j + T5m; + T6d = T5j - T5m; + T79 = T4Z - T50; + T7a = T1F - T1Q; + T7b = T79 + T7a; + T8J = T79 - T7a; + } + { + E T7x, T7y, T76, T77; + T7x = T4p - T4k; + T7y = T5k - T5l; + T7z = T7x - T7y; + T8R = T7x + T7y; + T76 = T1a - T1r; + T77 = T52 - T53; + T78 = T76 - T77; + T8I = T76 + T77; + } + } + { + E T1T, T5S, T5M, T5W, T5P, T5X, T3a, T5I, T4C, T58, T56, T5H, T5E, T5G, T4R; + E T5R; + { + E TX, T1S, T5K, T5L; + TX = Tv + TW; + T1S = T1s + T1R; + T1T = TX + T1S; + T5S = TX - T1S; + T5K = T5n - T5g; + T5L = T4b - T4A; + T5M = T5K + T5L; + T5W = T5K - T5L; + } + { + E T5N, T5O, T2A, T39; + T5N = T3T + T3A; + T5O = T5C - T5v; + T5P = T5N - T5O; + T5X = T5N + T5O; + T2A = T2g + T2z; + T39 = T2T + T38; + T3a = T2A + T39; + T5I = T39 - T2A; + } + { + E T3U, T4B, T4Y, T55; + T3U = T3A - T3T; + T4B = T4b + T4A; + T4C = T3U - T4B; + T58 = T4B + T3U; + T4Y = T4U + T4X; + T55 = T51 + T54; + T56 = T4Y + T55; + T5H = T55 - T4Y; + } + { + E T5o, T5D, T4J, T4Q; + T5o = T5g + T5n; + T5D = T5v + T5C; + T5E = T5o - T5D; + T5G = T5o + T5D; + T4J = T4F + T4I; + T4Q = T4M + T4P; + T4R = T4J + T4Q; + T5R = T4J - T4Q; + } + { + E T3b, T5F, T57, T59; + T3b = T1T + T3a; + Ip[0] = KP500000000 * (T3b + T4C); + Im[WS(rs, 15)] = KP500000000 * (T4C - T3b); + T5F = T4R + T56; + Rm[WS(rs, 15)] = KP500000000 * (T5F - T5G); + Rp[0] = KP500000000 * (T5F + T5G); + T57 = T4R - T56; + Rm[WS(rs, 7)] = KP500000000 * (T57 - T58); + Rp[WS(rs, 8)] = KP500000000 * (T57 + T58); + T59 = T3a - T1T; + Ip[WS(rs, 8)] = KP500000000 * (T59 + T5E); + Im[WS(rs, 7)] = KP500000000 * (T5E - T59); + } + { + E T5J, T5Q, T5Z, T60; + T5J = KP500000000 * (T5H + T5I); + T5Q = KP353553390 * (T5M + T5P); + Ip[WS(rs, 4)] = T5J + T5Q; + Im[WS(rs, 11)] = T5Q - T5J; + T5Z = KP500000000 * (T5R + T5S); + T60 = KP353553390 * (T5W + T5X); + Rm[WS(rs, 11)] = T5Z - T60; + Rp[WS(rs, 4)] = T5Z + T60; + } + { + E T5T, T5U, T5V, T5Y; + T5T = KP500000000 * (T5R - T5S); + T5U = KP353553390 * (T5P - T5M); + Rm[WS(rs, 3)] = T5T - T5U; + Rp[WS(rs, 12)] = T5T + T5U; + T5V = KP500000000 * (T5I - T5H); + T5Y = KP353553390 * (T5W - T5X); + Ip[WS(rs, 12)] = T5V + T5Y; + Im[WS(rs, 3)] = T5Y - T5V; + } + } + { + E T67, T6Q, T6K, T6U, T6N, T6V, T6a, T6G, T6i, T6A, T6t, T6P, T6w, T6F, T6p; + E T6B; + { + E T63, T66, T6I, T6J; + T63 = T61 - T62; + T66 = T64 + T65; + T67 = KP353553390 * (T63 + T66); + T6Q = KP353553390 * (T63 - T66); + T6I = T6d - T6c; + T6J = T6g - T6f; + T6K = FMA(KP461939766, T6I, KP191341716 * T6J); + T6U = FNMS(KP461939766, T6J, KP191341716 * T6I); + } + { + E T6L, T6M, T68, T69; + T6L = T6k - T6j; + T6M = T6n - T6m; + T6N = FNMS(KP461939766, T6M, KP191341716 * T6L); + T6V = FMA(KP461939766, T6L, KP191341716 * T6M); + T68 = T4P - T4M; + T69 = T38 - T2T; + T6a = KP500000000 * (T68 + T69); + T6G = KP500000000 * (T69 - T68); + } + { + E T6e, T6h, T6r, T6s; + T6e = T6c + T6d; + T6h = T6f + T6g; + T6i = FMA(KP191341716, T6e, KP461939766 * T6h); + T6A = FNMS(KP191341716, T6h, KP461939766 * T6e); + T6r = T4F - T4I; + T6s = T2g - T2z; + T6t = KP500000000 * (T6r + T6s); + T6P = KP500000000 * (T6r - T6s); + } + { + E T6u, T6v, T6l, T6o; + T6u = T62 + T61; + T6v = T64 - T65; + T6w = KP353553390 * (T6u + T6v); + T6F = KP353553390 * (T6v - T6u); + T6l = T6j + T6k; + T6o = T6m + T6n; + T6p = FNMS(KP191341716, T6o, KP461939766 * T6l); + T6B = FMA(KP191341716, T6l, KP461939766 * T6o); + } + { + E T6b, T6q, T6D, T6E; + T6b = T67 + T6a; + T6q = T6i + T6p; + Ip[WS(rs, 2)] = T6b + T6q; + Im[WS(rs, 13)] = T6q - T6b; + T6D = T6t + T6w; + T6E = T6A + T6B; + Rm[WS(rs, 13)] = T6D - T6E; + Rp[WS(rs, 2)] = T6D + T6E; + } + { + E T6x, T6y, T6z, T6C; + T6x = T6t - T6w; + T6y = T6p - T6i; + Rm[WS(rs, 5)] = T6x - T6y; + Rp[WS(rs, 10)] = T6x + T6y; + T6z = T6a - T67; + T6C = T6A - T6B; + Ip[WS(rs, 10)] = T6z + T6C; + Im[WS(rs, 5)] = T6C - T6z; + } + { + E T6H, T6O, T6X, T6Y; + T6H = T6F + T6G; + T6O = T6K + T6N; + Ip[WS(rs, 6)] = T6H + T6O; + Im[WS(rs, 9)] = T6O - T6H; + T6X = T6P + T6Q; + T6Y = T6U + T6V; + Rm[WS(rs, 9)] = T6X - T6Y; + Rp[WS(rs, 6)] = T6X + T6Y; + } + { + E T6R, T6S, T6T, T6W; + T6R = T6P - T6Q; + T6S = T6N - T6K; + Rm[WS(rs, 1)] = T6R - T6S; + Rp[WS(rs, 14)] = T6R + T6S; + T6T = T6G - T6F; + T6W = T6U - T6V; + Ip[WS(rs, 14)] = T6T + T6W; + Im[WS(rs, 1)] = T6W - T6T; + } + } + { + E T7d, T8w, T7o, T8m, T8c, T8l, T89, T8v, T81, T8B, T8h, T8t, T7I, T8A, T8g; + E T8q; + { + E T75, T7c, T85, T88; + T75 = FNMS(KP191341716, T74, KP461939766 * T71); + T7c = FMA(KP461939766, T78, KP191341716 * T7b); + T7d = T75 + T7c; + T8w = T75 - T7c; + { + E T7k, T7n, T8a, T8b; + T7k = KP353553390 * (T7g + T7j); + T7n = KP500000000 * (T7l - T7m); + T7o = T7k + T7n; + T8m = T7n - T7k; + T8a = FMA(KP191341716, T71, KP461939766 * T74); + T8b = FNMS(KP191341716, T78, KP461939766 * T7b); + T8c = T8a + T8b; + T8l = T8b - T8a; + } + T85 = KP500000000 * (T83 + T84); + T88 = KP353553390 * (T86 + T87); + T89 = T85 + T88; + T8v = T85 - T88; + { + E T7T, T8r, T80, T8s, T7P, T7W; + T7P = KP707106781 * (T7L + T7O); + T7T = T7P + T7S; + T8r = T7S - T7P; + T7W = KP707106781 * (T7U + T7V); + T80 = T7W + T7Z; + T8s = T7Z - T7W; + T81 = FNMS(KP097545161, T80, KP490392640 * T7T); + T8B = FMA(KP415734806, T8r, KP277785116 * T8s); + T8h = FMA(KP097545161, T7T, KP490392640 * T80); + T8t = FNMS(KP415734806, T8s, KP277785116 * T8r); + } + { + E T7A, T8o, T7H, T8p, T7w, T7D; + T7w = KP707106781 * (T7s + T7v); + T7A = T7w + T7z; + T8o = T7z - T7w; + T7D = KP707106781 * (T7B + T7C); + T7H = T7D + T7G; + T8p = T7G - T7D; + T7I = FMA(KP490392640, T7A, KP097545161 * T7H); + T8A = FNMS(KP415734806, T8o, KP277785116 * T8p); + T8g = FNMS(KP097545161, T7A, KP490392640 * T7H); + T8q = FMA(KP277785116, T8o, KP415734806 * T8p); + } + } + { + E T7p, T82, T8j, T8k; + T7p = T7d + T7o; + T82 = T7I + T81; + Ip[WS(rs, 1)] = T7p + T82; + Im[WS(rs, 14)] = T82 - T7p; + T8j = T89 + T8c; + T8k = T8g + T8h; + Rm[WS(rs, 14)] = T8j - T8k; + Rp[WS(rs, 1)] = T8j + T8k; + } + { + E T8d, T8e, T8f, T8i; + T8d = T89 - T8c; + T8e = T81 - T7I; + Rm[WS(rs, 6)] = T8d - T8e; + Rp[WS(rs, 9)] = T8d + T8e; + T8f = T7o - T7d; + T8i = T8g - T8h; + Ip[WS(rs, 9)] = T8f + T8i; + Im[WS(rs, 6)] = T8i - T8f; + } + { + E T8n, T8u, T8D, T8E; + T8n = T8l + T8m; + T8u = T8q + T8t; + Ip[WS(rs, 5)] = T8n + T8u; + Im[WS(rs, 10)] = T8u - T8n; + T8D = T8v + T8w; + T8E = T8A + T8B; + Rm[WS(rs, 10)] = T8D - T8E; + Rp[WS(rs, 5)] = T8D + T8E; + } + { + E T8x, T8y, T8z, T8C; + T8x = T8v - T8w; + T8y = T8t - T8q; + Rm[WS(rs, 2)] = T8x - T8y; + Rp[WS(rs, 13)] = T8x + T8y; + T8z = T8m - T8l; + T8C = T8A - T8B; + Ip[WS(rs, 13)] = T8z + T8C; + Im[WS(rs, 2)] = T8C - T8z; + } + } + { + E T8L, T9u, T8O, T9k, T9a, T9j, T97, T9t, T93, T9z, T9f, T9r, T8W, T9y, T9e; + E T9o; + { + E T8H, T8K, T95, T96; + T8H = FNMS(KP461939766, T8G, KP191341716 * T8F); + T8K = FMA(KP191341716, T8I, KP461939766 * T8J); + T8L = T8H + T8K; + T9u = T8H - T8K; + { + E T8M, T8N, T98, T99; + T8M = KP353553390 * (T87 - T86); + T8N = KP500000000 * (T7m + T7l); + T8O = T8M + T8N; + T9k = T8N - T8M; + T98 = FMA(KP461939766, T8F, KP191341716 * T8G); + T99 = FNMS(KP461939766, T8I, KP191341716 * T8J); + T9a = T98 + T99; + T9j = T99 - T98; + } + T95 = KP500000000 * (T83 - T84); + T96 = KP353553390 * (T7g - T7j); + T97 = T95 + T96; + T9t = T95 - T96; + { + E T8Z, T9p, T92, T9q, T8X, T90; + T8X = KP707106781 * (T7V - T7U); + T8Z = T8X + T8Y; + T9p = T8Y - T8X; + T90 = KP707106781 * (T7L - T7O); + T92 = T90 + T91; + T9q = T91 - T90; + T93 = FNMS(KP277785116, T92, KP415734806 * T8Z); + T9z = FMA(KP490392640, T9p, KP097545161 * T9q); + T9f = FMA(KP277785116, T8Z, KP415734806 * T92); + T9r = FNMS(KP490392640, T9q, KP097545161 * T9p); + } + { + E T8S, T9m, T8V, T9n, T8Q, T8T; + T8Q = KP707106781 * (T7C - T7B); + T8S = T8Q + T8R; + T9m = T8R - T8Q; + T8T = KP707106781 * (T7s - T7v); + T8V = T8T + T8U; + T9n = T8U - T8T; + T8W = FMA(KP415734806, T8S, KP277785116 * T8V); + T9y = FNMS(KP490392640, T9m, KP097545161 * T9n); + T9e = FNMS(KP277785116, T8S, KP415734806 * T8V); + T9o = FMA(KP097545161, T9m, KP490392640 * T9n); + } + } + { + E T8P, T94, T9h, T9i; + T8P = T8L + T8O; + T94 = T8W + T93; + Ip[WS(rs, 3)] = T8P + T94; + Im[WS(rs, 12)] = T94 - T8P; + T9h = T97 + T9a; + T9i = T9e + T9f; + Rm[WS(rs, 12)] = T9h - T9i; + Rp[WS(rs, 3)] = T9h + T9i; + } + { + E T9b, T9c, T9d, T9g; + T9b = T97 - T9a; + T9c = T93 - T8W; + Rm[WS(rs, 4)] = T9b - T9c; + Rp[WS(rs, 11)] = T9b + T9c; + T9d = T8O - T8L; + T9g = T9e - T9f; + Ip[WS(rs, 11)] = T9d + T9g; + Im[WS(rs, 4)] = T9g - T9d; + } + { + E T9l, T9s, T9B, T9C; + T9l = T9j + T9k; + T9s = T9o + T9r; + Ip[WS(rs, 7)] = T9l + T9s; + Im[WS(rs, 8)] = T9s - T9l; + T9B = T9t + T9u; + T9C = T9y + T9z; + Rm[WS(rs, 8)] = T9B - T9C; + Rp[WS(rs, 7)] = T9B + T9C; + } + { + E T9v, T9w, T9x, T9A; + T9v = T9t - T9u; + T9w = T9r - T9o; + Rm[0] = T9v - T9w; + Rp[WS(rs, 15)] = T9v + T9w; + T9x = T9k - T9j; + T9A = T9y - T9z; + Ip[WS(rs, 15)] = T9x + T9A; + Im[0] = T9A - T9x; + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 27}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 32, "hc2cfdft2_32", twinstr, &GENUS, {440, 188, 112, 0} }; + +void X(codelet_hc2cfdft2_32) (planner *p) { + X(khc2c_register) (p, hc2cfdft2_32, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft2_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft2_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:31 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -dit -name hc2cfdft2_4 -include hc2cf.h */ + +/* + * This function contains 32 FP additions, 24 FP multiplications, + * (or, 24 additions, 16 multiplications, 8 fused multiply/add), + * 33 stack variables, 1 constants, and 16 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft2_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 4, MAKE_VOLATILE_STRIDE(16, rs)) { + E T1, T5, T2, T4; + T1 = W[0]; + T5 = W[3]; + T2 = W[2]; + T4 = W[1]; + { + E Tc, T6, Tp, Tj, Tw, Tt, T9, TE, To, TC, Ta, Tr, Tf, Tl, Tm; + { + E Th, Tb, T3, Ti; + Th = Ip[0]; + Tb = T1 * T5; + T3 = T1 * T2; + Ti = Im[0]; + Tl = Rm[0]; + Tc = FNMS(T4, T2, Tb); + T6 = FMA(T4, T5, T3); + Tp = Th + Ti; + Tj = Th - Ti; + Tm = Rp[0]; + } + { + E T7, T8, Td, Tn, Te; + T7 = Ip[WS(rs, 1)]; + T8 = Im[WS(rs, 1)]; + Td = Rp[WS(rs, 1)]; + Tw = Tm + Tl; + Tn = Tl - Tm; + Tt = T7 + T8; + T9 = T7 - T8; + Te = Rm[WS(rs, 1)]; + TE = T4 * Tn; + To = T1 * Tn; + TC = T2 * Tt; + Ta = T6 * T9; + Tr = Td - Te; + Tf = Td + Te; + } + { + E Tq, Tk, TB, Ty, Tu, TI, TG, TF; + Tq = FNMS(T4, Tp, To); + TF = FMA(T1, Tp, TE); + { + E Tg, Tx, TD, Ts; + Tg = FNMS(Tc, Tf, Ta); + Tx = T6 * Tf; + TD = FNMS(T5, Tr, TC); + Ts = T2 * Tr; + Tk = Tg + Tj; + TB = Tj - Tg; + Ty = FMA(Tc, T9, Tx); + Tu = FMA(T5, Tt, Ts); + TI = TD + TF; + TG = TD - TF; + } + { + E Tz, TH, Tv, TA; + Tz = Tw - Ty; + TH = Tw + Ty; + Tv = Tq - Tu; + TA = Tu + Tq; + Rp[0] = KP500000000 * (TH + TI); + Rm[WS(rs, 1)] = KP500000000 * (TH - TI); + Rm[0] = KP500000000 * (Tz - TA); + Im[WS(rs, 1)] = KP500000000 * (Tv - Tk); + Ip[0] = KP500000000 * (Tk + Tv); + Im[0] = KP500000000 * (TG - TB); + Rp[WS(rs, 1)] = KP500000000 * (Tz + TA); + Ip[WS(rs, 1)] = KP500000000 * (TB + TG); + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 4, "hc2cfdft2_4", twinstr, &GENUS, {24, 16, 8, 0} }; + +void X(codelet_hc2cfdft2_4) (planner *p) { + X(khc2c_register) (p, hc2cfdft2_4, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -dit -name hc2cfdft2_4 -include hc2cf.h */ + +/* + * This function contains 32 FP additions, 24 FP multiplications, + * (or, 24 additions, 16 multiplications, 8 fused multiply/add), + * 24 stack variables, 1 constants, and 16 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft2_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 4, MAKE_VOLATILE_STRIDE(16, rs)) { + E T1, T3, T2, T4, T5, T9; + T1 = W[0]; + T3 = W[1]; + T2 = W[2]; + T4 = W[3]; + T5 = FMA(T1, T2, T3 * T4); + T9 = FNMS(T3, T2, T1 * T4); + { + E Tg, Tr, Tm, Tx, Td, Tw, Tp, Ts; + { + E Te, Tf, Tl, Ti, Tj, Tk; + Te = Ip[0]; + Tf = Im[0]; + Tl = Te + Tf; + Ti = Rm[0]; + Tj = Rp[0]; + Tk = Ti - Tj; + Tg = Te - Tf; + Tr = Tj + Ti; + Tm = FNMS(T3, Tl, T1 * Tk); + Tx = FMA(T3, Tk, T1 * Tl); + } + { + E T8, To, Tc, Tn; + { + E T6, T7, Ta, Tb; + T6 = Ip[WS(rs, 1)]; + T7 = Im[WS(rs, 1)]; + T8 = T6 - T7; + To = T6 + T7; + Ta = Rp[WS(rs, 1)]; + Tb = Rm[WS(rs, 1)]; + Tc = Ta + Tb; + Tn = Ta - Tb; + } + Td = FNMS(T9, Tc, T5 * T8); + Tw = FNMS(T4, Tn, T2 * To); + Tp = FMA(T2, Tn, T4 * To); + Ts = FMA(T5, Tc, T9 * T8); + } + { + E Th, Tq, Tz, TA; + Th = Td + Tg; + Tq = Tm - Tp; + Ip[0] = KP500000000 * (Th + Tq); + Im[WS(rs, 1)] = KP500000000 * (Tq - Th); + Tz = Tr + Ts; + TA = Tw + Tx; + Rm[WS(rs, 1)] = KP500000000 * (Tz - TA); + Rp[0] = KP500000000 * (Tz + TA); + } + { + E Tt, Tu, Tv, Ty; + Tt = Tr - Ts; + Tu = Tp + Tm; + Rm[0] = KP500000000 * (Tt - Tu); + Rp[WS(rs, 1)] = KP500000000 * (Tt + Tu); + Tv = Tg - Td; + Ty = Tw - Tx; + Ip[WS(rs, 1)] = KP500000000 * (Tv + Ty); + Im[0] = KP500000000 * (Ty - Tv); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 4, "hc2cfdft2_4", twinstr, &GENUS, {24, 16, 8, 0} }; + +void X(codelet_hc2cfdft2_4) (planner *p) { + X(khc2c_register) (p, hc2cfdft2_4, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft2_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft2_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,433 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:31 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 8 -dit -name hc2cfdft2_8 -include hc2cf.h */ + +/* + * This function contains 90 FP additions, 66 FP multiplications, + * (or, 60 additions, 36 multiplications, 30 fused multiply/add), + * 68 stack variables, 2 constants, and 32 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft2_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(32, rs)) { + E T1G, T1F, T1C, T1D, T1N, T1B, T1R, T1L; + { + E T1, T2, Th, Tj, T4, T3, Ti, Tp, T5; + T1 = W[0]; + T2 = W[2]; + Th = W[4]; + Tj = W[5]; + T4 = W[1]; + T3 = T1 * T2; + Ti = T1 * Th; + Tp = T1 * Tj; + T5 = W[3]; + { + E Tk, Tq, TI, T1a, T1u, TY, TF, TS, T1s, T1c, Tr, T1n, Tg, T16, Tn; + E T13, T1f, Ts, To, T1o; + { + E T6, Tw, Tc, TB, TQ, TM, TC, TR, Tz, TD, TA; + { + E TX, TV, TT, TU; + { + E TG, Tb, TH, TP, TL; + TG = Ip[0]; + Tk = FMA(T4, Tj, Ti); + Tq = FNMS(T4, Th, Tp); + T6 = FMA(T4, T5, T3); + Tw = FNMS(T4, T5, T3); + Tb = T1 * T5; + TH = Im[0]; + TT = Rm[0]; + TP = T6 * Tj; + TL = T6 * Th; + Tc = FNMS(T4, T2, Tb); + TB = FMA(T4, T2, Tb); + TX = TG + TH; + TI = TG - TH; + TU = Rp[0]; + TQ = FNMS(Tc, Th, TP); + TM = FMA(Tc, Tj, TL); + } + T1a = TU + TT; + TV = TT - TU; + { + E Tx, Ty, T1t, TW; + Tx = Ip[WS(rs, 2)]; + Ty = Im[WS(rs, 2)]; + T1t = T4 * TV; + TW = T1 * TV; + TC = Rp[WS(rs, 2)]; + TR = Tx + Ty; + Tz = Tx - Ty; + T1u = FMA(T1, TX, T1t); + TY = FNMS(T4, TX, TW); + TD = Rm[WS(rs, 2)]; + } + TA = Tw * Tz; + } + { + E Td, T9, T12, Te, Ta, T1m; + { + E T7, T8, TN, TE, TO, T1r, T1b; + T7 = Ip[WS(rs, 1)]; + T8 = Im[WS(rs, 1)]; + TN = TD - TC; + TE = TC + TD; + Td = Rp[WS(rs, 1)]; + T9 = T7 - T8; + T12 = T7 + T8; + TO = TM * TN; + T1r = TQ * TN; + T1b = Tw * TE; + TF = FNMS(TB, TE, TA); + TS = FNMS(TQ, TR, TO); + T1s = FMA(TM, TR, T1r); + T1c = FMA(TB, Tz, T1b); + Te = Rm[WS(rs, 1)]; + } + Ta = T6 * T9; + T1m = T2 * T12; + { + E Tl, T10, Tf, Tm, T11, T1e; + Tl = Ip[WS(rs, 3)]; + T10 = Td - Te; + Tf = Td + Te; + Tm = Im[WS(rs, 3)]; + Tr = Rp[WS(rs, 3)]; + T11 = T2 * T10; + T1n = FNMS(T5, T10, T1m); + T1e = T6 * Tf; + Tg = FNMS(Tc, Tf, Ta); + T16 = Tl + Tm; + Tn = Tl - Tm; + T13 = FMA(T5, T12, T11); + T1f = FMA(Tc, T9, T1e); + Ts = Rm[WS(rs, 3)]; + } + To = Tk * Tn; + T1o = Th * T16; + } + } + { + E T1z, T1K, T1y, T1k, T1J, T1A, T1x, T1j; + { + E T1w, TK, T1l, T19, T1d, T1i; + { + E TJ, T14, Tt, T1v, T1h; + T1z = TI - TF; + TJ = TF + TI; + T14 = Tr - Ts; + Tt = Tr + Ts; + T1v = T1s + T1u; + T1G = T1u - T1s; + { + E TZ, T1q, Tv, T18, T15; + T1F = TY - TS; + TZ = TS + TY; + T15 = Th * T14; + { + E T1p, T1g, Tu, T17; + T1p = FNMS(Tj, T14, T1o); + T1g = Tk * Tt; + Tu = FNMS(Tq, Tt, To); + T17 = FMA(Tj, T16, T15); + T1C = T1p - T1n; + T1q = T1n + T1p; + T1h = FMA(Tq, Tn, T1g); + T1K = Tg - Tu; + Tv = Tg + Tu; + T18 = T13 + T17; + T1D = T13 - T17; + } + T1w = T1q - T1v; + T1y = T1q + T1v; + TK = Tv + TJ; + T1l = TJ - Tv; + T1k = T18 + TZ; + T19 = TZ - T18; + } + T1J = T1a - T1c; + T1d = T1a + T1c; + T1i = T1f + T1h; + T1A = T1f - T1h; + } + Ip[0] = KP500000000 * (TK + T19); + Im[WS(rs, 3)] = KP500000000 * (T19 - TK); + Im[WS(rs, 1)] = KP500000000 * (T1w - T1l); + T1x = T1d + T1i; + T1j = T1d - T1i; + Ip[WS(rs, 2)] = KP500000000 * (T1l + T1w); + } + Rm[WS(rs, 3)] = KP500000000 * (T1x - T1y); + Rp[0] = KP500000000 * (T1x + T1y); + Rp[WS(rs, 2)] = KP500000000 * (T1j + T1k); + Rm[WS(rs, 1)] = KP500000000 * (T1j - T1k); + T1N = T1A + T1z; + T1B = T1z - T1A; + T1R = T1J + T1K; + T1L = T1J - T1K; + } + } + } + { + E T1E, T1O, T1H, T1P; + T1E = T1C + T1D; + T1O = T1C - T1D; + T1H = T1F - T1G; + T1P = T1F + T1G; + { + E T1S, T1Q, T1I, T1M; + T1S = T1O + T1P; + T1Q = T1O - T1P; + T1I = T1E + T1H; + T1M = T1H - T1E; + Im[0] = -(KP500000000 * (FNMS(KP707106781, T1Q, T1N))); + Ip[WS(rs, 3)] = KP500000000 * (FMA(KP707106781, T1Q, T1N)); + Rp[WS(rs, 1)] = KP500000000 * (FMA(KP707106781, T1S, T1R)); + Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP707106781, T1S, T1R)); + Rp[WS(rs, 3)] = KP500000000 * (FMA(KP707106781, T1M, T1L)); + Rm[0] = KP500000000 * (FNMS(KP707106781, T1M, T1L)); + Im[WS(rs, 2)] = -(KP500000000 * (FNMS(KP707106781, T1I, T1B))); + Ip[WS(rs, 1)] = KP500000000 * (FMA(KP707106781, T1I, T1B)); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 7}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 8, "hc2cfdft2_8", twinstr, &GENUS, {60, 36, 30, 0} }; + +void X(codelet_hc2cfdft2_8) (planner *p) { + X(khc2c_register) (p, hc2cfdft2_8, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 8 -dit -name hc2cfdft2_8 -include hc2cf.h */ + +/* + * This function contains 90 FP additions, 56 FP multiplications, + * (or, 72 additions, 38 multiplications, 18 fused multiply/add), + * 51 stack variables, 2 constants, and 32 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft2_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP353553390, +0.353553390593273762200422181052424519642417969); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(32, rs)) { + E T1, T4, T2, T5, Tu, Ty, T7, Td, Ti, Tj, Tk, TP, To, TN; + { + E T3, Tc, T6, Tb; + T1 = W[0]; + T4 = W[1]; + T2 = W[2]; + T5 = W[3]; + T3 = T1 * T2; + Tc = T4 * T2; + T6 = T4 * T5; + Tb = T1 * T5; + Tu = T3 - T6; + Ty = Tb + Tc; + T7 = T3 + T6; + Td = Tb - Tc; + Ti = W[4]; + Tj = W[5]; + Tk = FMA(T1, Ti, T4 * Tj); + TP = FNMS(Td, Ti, T7 * Tj); + To = FNMS(T4, Ti, T1 * Tj); + TN = FMA(T7, Ti, Td * Tj); + } + { + E TF, T11, TC, T12, T1d, T1e, T1q, TM, TR, T1p, Th, Ts, T15, T14, T1a; + E T1b, T1m, TV, TY, T1n; + { + E TD, TE, TL, TI, TJ, TK, Tx, TQ, TB, TO; + TD = Ip[0]; + TE = Im[0]; + TL = TD + TE; + TI = Rm[0]; + TJ = Rp[0]; + TK = TI - TJ; + { + E Tv, Tw, Tz, TA; + Tv = Ip[WS(rs, 2)]; + Tw = Im[WS(rs, 2)]; + Tx = Tv - Tw; + TQ = Tv + Tw; + Tz = Rp[WS(rs, 2)]; + TA = Rm[WS(rs, 2)]; + TB = Tz + TA; + TO = Tz - TA; + } + TF = TD - TE; + T11 = TJ + TI; + TC = FNMS(Ty, TB, Tu * Tx); + T12 = FMA(Tu, TB, Ty * Tx); + T1d = FNMS(TP, TO, TN * TQ); + T1e = FMA(T4, TK, T1 * TL); + T1q = T1e - T1d; + TM = FNMS(T4, TL, T1 * TK); + TR = FMA(TN, TO, TP * TQ); + T1p = TR + TM; + } + { + E Ta, TU, Tg, TT, Tn, TX, Tr, TW; + { + E T8, T9, Te, Tf; + T8 = Ip[WS(rs, 1)]; + T9 = Im[WS(rs, 1)]; + Ta = T8 - T9; + TU = T8 + T9; + Te = Rp[WS(rs, 1)]; + Tf = Rm[WS(rs, 1)]; + Tg = Te + Tf; + TT = Te - Tf; + } + { + E Tl, Tm, Tp, Tq; + Tl = Ip[WS(rs, 3)]; + Tm = Im[WS(rs, 3)]; + Tn = Tl - Tm; + TX = Tl + Tm; + Tp = Rp[WS(rs, 3)]; + Tq = Rm[WS(rs, 3)]; + Tr = Tp + Tq; + TW = Tp - Tq; + } + Th = FNMS(Td, Tg, T7 * Ta); + Ts = FNMS(To, Tr, Tk * Tn); + T15 = FMA(Tk, Tr, To * Tn); + T14 = FMA(T7, Tg, Td * Ta); + T1a = FNMS(T5, TT, T2 * TU); + T1b = FNMS(Tj, TW, Ti * TX); + T1m = T1b - T1a; + TV = FMA(T2, TT, T5 * TU); + TY = FMA(Ti, TW, Tj * TX); + T1n = TV - TY; + } + { + E T1l, T1x, T1A, T1C, T1s, T1w, T1v, T1B; + { + E T1j, T1k, T1y, T1z; + T1j = TF - TC; + T1k = T14 - T15; + T1l = KP500000000 * (T1j - T1k); + T1x = KP500000000 * (T1k + T1j); + T1y = T1m - T1n; + T1z = T1p + T1q; + T1A = KP353553390 * (T1y - T1z); + T1C = KP353553390 * (T1y + T1z); + } + { + E T1o, T1r, T1t, T1u; + T1o = T1m + T1n; + T1r = T1p - T1q; + T1s = KP353553390 * (T1o + T1r); + T1w = KP353553390 * (T1r - T1o); + T1t = T11 - T12; + T1u = Th - Ts; + T1v = KP500000000 * (T1t - T1u); + T1B = KP500000000 * (T1t + T1u); + } + Ip[WS(rs, 1)] = T1l + T1s; + Rp[WS(rs, 1)] = T1B + T1C; + Im[WS(rs, 2)] = T1s - T1l; + Rm[WS(rs, 2)] = T1B - T1C; + Rm[0] = T1v - T1w; + Im[0] = T1A - T1x; + Rp[WS(rs, 3)] = T1v + T1w; + Ip[WS(rs, 3)] = T1x + T1A; + } + { + E TH, T19, T1g, T1i, T10, T18, T17, T1h; + { + E Tt, TG, T1c, T1f; + Tt = Th + Ts; + TG = TC + TF; + TH = Tt + TG; + T19 = TG - Tt; + T1c = T1a + T1b; + T1f = T1d + T1e; + T1g = T1c - T1f; + T1i = T1c + T1f; + } + { + E TS, TZ, T13, T16; + TS = TM - TR; + TZ = TV + TY; + T10 = TS - TZ; + T18 = TZ + TS; + T13 = T11 + T12; + T16 = T14 + T15; + T17 = T13 - T16; + T1h = T13 + T16; + } + Ip[0] = KP500000000 * (TH + T10); + Rp[0] = KP500000000 * (T1h + T1i); + Im[WS(rs, 3)] = KP500000000 * (T10 - TH); + Rm[WS(rs, 3)] = KP500000000 * (T1h - T1i); + Rm[WS(rs, 1)] = KP500000000 * (T17 - T18); + Im[WS(rs, 1)] = KP500000000 * (T1g - T19); + Rp[WS(rs, 2)] = KP500000000 * (T17 + T18); + Ip[WS(rs, 2)] = KP500000000 * (T19 + T1g); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 7}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 8, "hc2cfdft2_8", twinstr, &GENUS, {72, 38, 18, 0} }; + +void X(codelet_hc2cfdft2_8) (planner *p) { + X(khc2c_register) (p, hc2cfdft2_8, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,546 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:27 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 10 -dit -name hc2cfdft_10 -include hc2cf.h */ + +/* + * This function contains 122 FP additions, 92 FP multiplications, + * (or, 68 additions, 38 multiplications, 54 fused multiply/add), + * 94 stack variables, 5 constants, and 40 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) { + E T1x, T1I, T1T, T22, T20; + { + E T3, T1u, T1S, T2f, Td, T1w, T14, T1p, T1j, T1q, T1N, T2e, T1z, To, T2i; + E T1H, TQ, T1n, Ty, T1B; + { + E T1h, TW, Tc, T1b, T1g, T1f, T1Q, TV, T7, TS, T1J, TU, Ts, T19, T18; + E T15, Tx, T17, T1O, T1A, Tt, TD, Ti, TE, Tn, TA, T1F, TC, T1y, Tj; + E T11, T12, TJ, TZ, TO, TY, TG, T1L, T1e, T1, T2; + T1 = Ip[0]; + T2 = Im[0]; + { + E Ta, Tb, T1c, T1d; + Ta = Rp[WS(rs, 2)]; + Tb = Rm[WS(rs, 2)]; + T1c = Rm[0]; + T1h = T1 + T2; + T3 = T1 - T2; + T1d = Rp[0]; + TW = Ta + Tb; + Tc = Ta - Tb; + T1b = W[0]; + T1u = T1d + T1c; + T1e = T1c - T1d; + T1g = W[1]; + } + { + E T16, Tp, TT, T5, T6, TB, Tf; + T5 = Ip[WS(rs, 2)]; + T6 = Im[WS(rs, 2)]; + T1f = T1b * T1e; + T1Q = T1g * T1e; + TV = W[7]; + T7 = T5 + T6; + TT = T5 - T6; + TS = W[6]; + { + E Tv, Tw, Tq, Tr; + Tq = Rm[WS(rs, 3)]; + Tr = Rp[WS(rs, 3)]; + T1J = TV * TT; + TU = TS * TT; + Tv = Ip[WS(rs, 3)]; + Ts = Tq - Tr; + T19 = Tr + Tq; + Tw = Im[WS(rs, 3)]; + T18 = W[11]; + T15 = W[10]; + Tx = Tv + Tw; + T16 = Tv - Tw; + Tp = W[12]; + } + { + E Tg, Th, Tl, Tm; + Tg = Ip[WS(rs, 1)]; + T17 = T15 * T16; + T1O = T18 * T16; + T1A = Tp * Tx; + Tt = Tp * Ts; + Th = Im[WS(rs, 1)]; + Tl = Rp[WS(rs, 1)]; + Tm = Rm[WS(rs, 1)]; + TD = W[5]; + Ti = Tg - Th; + TE = Tg + Th; + Tn = Tl + Tm; + TB = Tm - Tl; + TA = W[4]; + Tf = W[2]; + T1F = TD * TB; + } + { + E TH, TI, TM, TN; + TH = Ip[WS(rs, 4)]; + TC = TA * TB; + T1y = Tf * Tn; + Tj = Tf * Ti; + TI = Im[WS(rs, 4)]; + TM = Rp[WS(rs, 4)]; + TN = Rm[WS(rs, 4)]; + T11 = W[17]; + T12 = TH + TI; + TJ = TH - TI; + TZ = TN - TM; + TO = TM + TN; + TY = W[16]; + TG = W[14]; + T1L = T11 * TZ; + } + } + { + E T10, T1D, TK, T4, T9, T1P, T1R, T8, T1v; + T10 = TY * TZ; + T1D = TG * TO; + TK = TG * TJ; + T4 = W[9]; + T9 = W[8]; + T1P = FMA(T15, T19, T1O); + T1R = FMA(T1b, T1h, T1Q); + T8 = T4 * T7; + T1v = T9 * T7; + { + E TX, T13, T1a, T1i; + TX = FNMS(TV, TW, TU); + T1S = T1P - T1R; + T2f = T1P + T1R; + Td = FMA(T9, Tc, T8); + T1w = FNMS(T4, Tc, T1v); + T13 = FNMS(T11, T12, T10); + T1a = FNMS(T18, T19, T17); + T1i = FNMS(T1g, T1h, T1f); + { + E T1K, T1M, TF, T1G, TL; + T1K = FMA(TS, TW, T1J); + T14 = TX + T13; + T1p = T13 - TX; + T1j = T1a + T1i; + T1q = T1i - T1a; + T1M = FMA(TY, T12, T1L); + TF = FNMS(TD, TE, TC); + T1G = FMA(TA, TE, T1F); + TL = W[15]; + T1N = T1K - T1M; + T2e = T1K + T1M; + { + E Tk, T1E, TP, Tu; + Tk = W[3]; + T1E = FMA(TL, TJ, T1D); + TP = FNMS(TL, TO, TK); + Tu = W[13]; + T1z = FMA(Tk, Ti, T1y); + To = FNMS(Tk, Tn, Tj); + T2i = T1G + T1E; + T1H = T1E - T1G; + TQ = TF + TP; + T1n = TF - TP; + Ty = FNMS(Tu, Tx, Tt); + T1B = FMA(Tu, Ts, T1A); + } + } + } + } + } + { + E T2p, T1t, T1m, T1C, T2o, T2m, T2k, T2w, T2y, T2n, T2d, T2l; + { + E T2g, Te, T2h, T2u, T1k, TR, T2v, Tz; + T2p = T2e + T2f; + T2g = T2e - T2f; + Te = T3 - Td; + T1t = Td + T3; + Tz = To + Ty; + T1m = Ty - To; + T2h = T1z + T1B; + T1C = T1z - T1B; + T2u = T14 - T1j; + T1k = T14 + T1j; + TR = Tz + TQ; + T2v = Tz - TQ; + { + E T2c, T2b, T2j, T1l; + T2j = T2h - T2i; + T2o = T2h + T2i; + T2c = TR - T1k; + T1l = TR + T1k; + T2m = FMA(KP618033988, T2g, T2j); + T2k = FNMS(KP618033988, T2j, T2g); + T2w = FNMS(KP618033988, T2v, T2u); + T2y = FMA(KP618033988, T2u, T2v); + Ip[0] = KP500000000 * (Te + T1l); + T2b = FNMS(KP250000000, T1l, Te); + T2n = T1u + T1w; + T1x = T1u - T1w; + T2d = FNMS(KP559016994, T2c, T2b); + T2l = FMA(KP559016994, T2c, T2b); + } + } + { + E T1o, T1Y, T28, T2a, T1Z, T1r, T2t, T2x; + { + E T26, T2s, T2q, T27, T2r; + T1I = T1C + T1H; + T26 = T1H - T1C; + Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP951056516, T2k, T2d))); + Ip[WS(rs, 2)] = KP500000000 * (FMA(KP951056516, T2k, T2d)); + Im[WS(rs, 3)] = -(KP500000000 * (FNMS(KP951056516, T2m, T2l))); + Ip[WS(rs, 4)] = KP500000000 * (FMA(KP951056516, T2m, T2l)); + T2s = T2o - T2p; + T2q = T2o + T2p; + T27 = T1S - T1N; + T1T = T1N + T1S; + T1o = T1m + T1n; + T1Y = T1n - T1m; + Rp[0] = KP500000000 * (T2n + T2q); + T2r = FNMS(KP250000000, T2q, T2n); + T28 = FMA(KP618033988, T27, T26); + T2a = FNMS(KP618033988, T26, T27); + T1Z = T1q - T1p; + T1r = T1p + T1q; + T2t = FNMS(KP559016994, T2s, T2r); + T2x = FMA(KP559016994, T2s, T2r); + } + { + E T24, T23, T1s, T25, T29; + T1s = T1o + T1r; + T24 = T1r - T1o; + Rm[WS(rs, 1)] = KP500000000 * (FMA(KP951056516, T2w, T2t)); + Rp[WS(rs, 2)] = KP500000000 * (FNMS(KP951056516, T2w, T2t)); + Rm[WS(rs, 3)] = KP500000000 * (FMA(KP951056516, T2y, T2x)); + Rp[WS(rs, 4)] = KP500000000 * (FNMS(KP951056516, T2y, T2x)); + Im[WS(rs, 4)] = KP500000000 * (T1s - T1t); + T23 = FMA(KP250000000, T1s, T1t); + T25 = FMA(KP559016994, T24, T23); + T29 = FNMS(KP559016994, T24, T23); + T22 = FNMS(KP618033988, T1Y, T1Z); + T20 = FMA(KP618033988, T1Z, T1Y); + Im[0] = -(KP500000000 * (FNMS(KP951056516, T28, T25))); + Ip[WS(rs, 1)] = KP500000000 * (FMA(KP951056516, T28, T25)); + Im[WS(rs, 2)] = -(KP500000000 * (FNMS(KP951056516, T2a, T29))); + Ip[WS(rs, 3)] = KP500000000 * (FMA(KP951056516, T2a, T29)); + } + } + } + } + { + E T1U, T1W, T1V, T21, T1X; + T1U = T1I + T1T; + T1W = T1I - T1T; + Rm[WS(rs, 4)] = KP500000000 * (T1x + T1U); + T1V = FNMS(KP250000000, T1U, T1x); + T21 = FNMS(KP559016994, T1W, T1V); + T1X = FMA(KP559016994, T1W, T1V); + Rm[0] = KP500000000 * (FNMS(KP951056516, T20, T1X)); + Rp[WS(rs, 1)] = KP500000000 * (FMA(KP951056516, T20, T1X)); + Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP951056516, T22, T21)); + Rp[WS(rs, 3)] = KP500000000 * (FMA(KP951056516, T22, T21)); + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 10}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 10, "hc2cfdft_10", twinstr, &GENUS, {68, 38, 54, 0} }; + +void X(codelet_hc2cfdft_10) (planner *p) { + X(khc2c_register) (p, hc2cfdft_10, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -n 10 -dit -name hc2cfdft_10 -include hc2cf.h */ + +/* + * This function contains 122 FP additions, 68 FP multiplications, + * (or, 92 additions, 38 multiplications, 30 fused multiply/add), + * 62 stack variables, 5 constants, and 40 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP293892626, +0.293892626146236564584352977319536384298826219); + DK(KP475528258, +0.475528258147576786058219666689691071702849317); + DK(KP125000000, +0.125000000000000000000000000000000000000000000); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP279508497, +0.279508497187473712051146708591409529430077295); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) { + E Tw, TL, TM, T1W, T1X, T27, T1Z, T20, T26, TX, T1a, T1b, T1d, T1e, T1f; + E T1q, T1t, T1u, T1x, T1A, T1B, T1g, T1h, T1i, Td, T25, T1k, T1F; + { + E T3, T1D, T19, T1z, T7, Tb, TR, T1v, Tm, T1o, TK, T1s, Tv, T1p, T12; + E T1y, TF, T1r, TW, T1w; + { + E T1, T2, T18, T14, T15, T16, T13, T17; + T1 = Ip[0]; + T2 = Im[0]; + T18 = T1 + T2; + T14 = Rm[0]; + T15 = Rp[0]; + T16 = T14 - T15; + T3 = T1 - T2; + T1D = T15 + T14; + T13 = W[0]; + T17 = W[1]; + T19 = FNMS(T17, T18, T13 * T16); + T1z = FMA(T17, T16, T13 * T18); + } + { + E T5, T6, TO, T9, Ta, TQ, TN, TP; + T5 = Ip[WS(rs, 2)]; + T6 = Im[WS(rs, 2)]; + TO = T5 - T6; + T9 = Rp[WS(rs, 2)]; + Ta = Rm[WS(rs, 2)]; + TQ = T9 + Ta; + T7 = T5 + T6; + Tb = T9 - Ta; + TN = W[6]; + TP = W[7]; + TR = FNMS(TP, TQ, TN * TO); + T1v = FMA(TP, TO, TN * TQ); + } + { + E Th, TJ, Tl, TH; + { + E Tf, Tg, Tj, Tk; + Tf = Ip[WS(rs, 1)]; + Tg = Im[WS(rs, 1)]; + Th = Tf - Tg; + TJ = Tf + Tg; + Tj = Rp[WS(rs, 1)]; + Tk = Rm[WS(rs, 1)]; + Tl = Tj + Tk; + TH = Tj - Tk; + } + { + E Te, Ti, TG, TI; + Te = W[2]; + Ti = W[3]; + Tm = FNMS(Ti, Tl, Te * Th); + T1o = FMA(Te, Tl, Ti * Th); + TG = W[4]; + TI = W[5]; + TK = FMA(TG, TH, TI * TJ); + T1s = FNMS(TI, TH, TG * TJ); + } + } + { + E Tq, TZ, Tu, T11; + { + E To, Tp, Ts, Tt; + To = Ip[WS(rs, 3)]; + Tp = Im[WS(rs, 3)]; + Tq = To + Tp; + TZ = To - Tp; + Ts = Rp[WS(rs, 3)]; + Tt = Rm[WS(rs, 3)]; + Tu = Ts - Tt; + T11 = Ts + Tt; + } + { + E Tn, Tr, TY, T10; + Tn = W[13]; + Tr = W[12]; + Tv = FMA(Tn, Tq, Tr * Tu); + T1p = FNMS(Tn, Tu, Tr * Tq); + TY = W[10]; + T10 = W[11]; + T12 = FNMS(T10, T11, TY * TZ); + T1y = FMA(T10, TZ, TY * T11); + } + } + { + E TA, TV, TE, TT; + { + E Ty, Tz, TC, TD; + Ty = Ip[WS(rs, 4)]; + Tz = Im[WS(rs, 4)]; + TA = Ty - Tz; + TV = Ty + Tz; + TC = Rp[WS(rs, 4)]; + TD = Rm[WS(rs, 4)]; + TE = TC + TD; + TT = TC - TD; + } + { + E Tx, TB, TS, TU; + Tx = W[14]; + TB = W[15]; + TF = FNMS(TB, TE, Tx * TA); + T1r = FMA(Tx, TE, TB * TA); + TS = W[16]; + TU = W[17]; + TW = FMA(TS, TT, TU * TV); + T1w = FNMS(TU, TT, TS * TV); + } + } + Tw = Tm - Tv; + TL = TF - TK; + TM = Tw + TL; + T1W = T1v + T1w; + T1X = T1y + T1z; + T27 = T1W + T1X; + T1Z = T1o + T1p; + T20 = T1s + T1r; + T26 = T1Z + T20; + TX = TR - TW; + T1a = T12 + T19; + T1b = TX + T1a; + T1d = T19 - T12; + T1e = TR + TW; + T1f = T1d - T1e; + T1q = T1o - T1p; + T1t = T1r - T1s; + T1u = T1q + T1t; + T1x = T1v - T1w; + T1A = T1y - T1z; + T1B = T1x + T1A; + T1g = Tm + Tv; + T1h = TK + TF; + T1i = T1g + T1h; + { + E Tc, T1E, T4, T8; + T4 = W[9]; + T8 = W[8]; + Tc = FMA(T4, T7, T8 * Tb); + T1E = FNMS(T4, Tb, T8 * T7); + Td = T3 - Tc; + T25 = T1D + T1E; + T1k = Tc + T3; + T1F = T1D - T1E; + } + } + { + E T1U, T1c, T1T, T22, T24, T1Y, T21, T23, T1V; + T1U = KP279508497 * (TM - T1b); + T1c = TM + T1b; + T1T = FNMS(KP125000000, T1c, KP500000000 * Td); + T1Y = T1W - T1X; + T21 = T1Z - T20; + T22 = FNMS(KP293892626, T21, KP475528258 * T1Y); + T24 = FMA(KP475528258, T21, KP293892626 * T1Y); + Ip[0] = KP500000000 * (Td + T1c); + T23 = T1U + T1T; + Ip[WS(rs, 4)] = T23 + T24; + Im[WS(rs, 3)] = T24 - T23; + T1V = T1T - T1U; + Ip[WS(rs, 2)] = T1V + T22; + Im[WS(rs, 1)] = T22 - T1V; + } + { + E T2a, T28, T29, T2e, T2g, T2c, T2d, T2f, T2b; + T2a = KP279508497 * (T26 - T27); + T28 = T26 + T27; + T29 = FNMS(KP125000000, T28, KP500000000 * T25); + T2c = TX - T1a; + T2d = Tw - TL; + T2e = FNMS(KP293892626, T2d, KP475528258 * T2c); + T2g = FMA(KP475528258, T2d, KP293892626 * T2c); + Rp[0] = KP500000000 * (T25 + T28); + T2f = T2a + T29; + Rp[WS(rs, 4)] = T2f - T2g; + Rm[WS(rs, 3)] = T2g + T2f; + T2b = T29 - T2a; + Rp[WS(rs, 2)] = T2b - T2e; + Rm[WS(rs, 1)] = T2e + T2b; + } + { + E T1M, T1j, T1L, T1Q, T1S, T1O, T1P, T1R, T1N; + T1M = KP279508497 * (T1i + T1f); + T1j = T1f - T1i; + T1L = FMA(KP500000000, T1k, KP125000000 * T1j); + T1O = T1A - T1x; + T1P = T1q - T1t; + T1Q = FNMS(KP475528258, T1P, KP293892626 * T1O); + T1S = FMA(KP293892626, T1P, KP475528258 * T1O); + Im[WS(rs, 4)] = KP500000000 * (T1j - T1k); + T1R = T1L - T1M; + Ip[WS(rs, 3)] = T1R + T1S; + Im[WS(rs, 2)] = T1S - T1R; + T1N = T1L + T1M; + Ip[WS(rs, 1)] = T1N + T1Q; + Im[0] = T1Q - T1N; + } + { + E T1C, T1G, T1H, T1n, T1J, T1l, T1m, T1K, T1I; + T1C = KP279508497 * (T1u - T1B); + T1G = T1u + T1B; + T1H = FNMS(KP125000000, T1G, KP500000000 * T1F); + T1l = T1g - T1h; + T1m = T1e + T1d; + T1n = FMA(KP475528258, T1l, KP293892626 * T1m); + T1J = FNMS(KP293892626, T1l, KP475528258 * T1m); + Rm[WS(rs, 4)] = KP500000000 * (T1F + T1G); + T1K = T1H - T1C; + Rp[WS(rs, 3)] = T1J + T1K; + Rm[WS(rs, 2)] = T1K - T1J; + T1I = T1C + T1H; + Rp[WS(rs, 1)] = T1n + T1I; + Rm[0] = T1I - T1n; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 10}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 10, "hc2cfdft_10", twinstr, &GENUS, {92, 38, 30, 0} }; + +void X(codelet_hc2cfdft_10) (planner *p) { + X(khc2c_register) (p, hc2cfdft_10, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,644 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:28 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 12 -dit -name hc2cfdft_12 -include hc2cf.h */ + +/* + * This function contains 142 FP additions, 92 FP multiplications, + * (or, 96 additions, 46 multiplications, 46 fused multiply/add), + * 71 stack variables, 2 constants, and 48 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) { + E T2z, T2M; + { + E To, T1E, T2H, T1m, T1W, Tl, T1J, T2i, T2K, T1B, T2I, T2e, T19, T2E, T2C; + E T27, T1M, Tz, T2B, T1f, T1O, TJ, TT, T1Q; + { + E T2b, T1s, T1A, T2d; + { + E T1u, T1z, T1v, T2c, T1i, Te, T1l, Tj, Tf, T1H, T4, T1o, T1, T1r, T9; + E T1n, T5; + { + E T1x, T1y, T1t, Tm, Tn; + Tm = Ip[0]; + Tn = Im[0]; + T1x = Rp[0]; + T1y = Rm[0]; + T1t = W[0]; + T1u = Tm + Tn; + To = Tm - Tn; + { + E Th, Ti, Tb, Tc, Td; + Tc = Ip[WS(rs, 4)]; + T1z = T1x - T1y; + T1E = T1x + T1y; + Td = Im[WS(rs, 4)]; + T1v = T1t * T1u; + Th = Rp[WS(rs, 4)]; + T2c = T1t * T1z; + T1i = Tc + Td; + Te = Tc - Td; + Ti = Rm[WS(rs, 4)]; + Tb = W[14]; + { + E T7, T8, T2, T3; + T2 = Ip[WS(rs, 2)]; + T1l = Th - Ti; + Tj = Th + Ti; + Tf = Tb * Te; + T3 = Im[WS(rs, 2)]; + T7 = Rp[WS(rs, 2)]; + T1H = Tb * Tj; + T8 = Rm[WS(rs, 2)]; + T4 = T2 - T3; + T1o = T2 + T3; + T1 = W[6]; + T1r = T7 - T8; + T9 = T7 + T8; + T1n = W[8]; + T5 = T1 * T4; + } + } + } + { + E T1F, T2a, T1p, T1h, T1k; + T1F = T1 * T9; + T2a = T1n * T1r; + T1p = T1n * T1o; + T1h = W[16]; + T1k = W[17]; + { + E T1G, Ta, Tk, T1I, T1q, T1w; + { + E T6, Tg, T2G, T1j; + T6 = W[7]; + Tg = W[15]; + T2G = T1h * T1l; + T1j = T1h * T1i; + T1G = FMA(T6, T4, T1F); + Ta = FNMS(T6, T9, T5); + T2H = FMA(T1k, T1i, T2G); + T1m = FNMS(T1k, T1l, T1j); + Tk = FNMS(Tg, Tj, Tf); + T1I = FMA(Tg, Te, T1H); + } + T1q = W[9]; + T1w = W[1]; + T1W = Ta - Tk; + Tl = Ta + Tk; + T1J = T1G + T1I; + T2i = T1I - T1G; + T2b = FMA(T1q, T1o, T2a); + T1s = FNMS(T1q, T1r, T1p); + T1A = FNMS(T1w, T1z, T1v); + T2d = FMA(T1w, T1u, T2c); + } + } + } + { + E T11, Tt, T10, TX, Ty, TZ, T23, T1b, TN, TS, T1e, T1P, TO, T17, TD; + E T16, T13, T14, TI, TA; + { + E Tw, Tx, Tr, Ts, TK; + Tr = Ip[WS(rs, 3)]; + Ts = Im[WS(rs, 3)]; + T2K = T1s - T1A; + T1B = T1s + T1A; + T2I = T2b + T2d; + T2e = T2b - T2d; + Tw = Rp[WS(rs, 3)]; + T11 = Tr + Ts; + Tt = Tr - Ts; + Tx = Rm[WS(rs, 3)]; + T10 = W[12]; + TX = W[13]; + { + E TL, TY, TM, TQ, TR; + TL = Ip[WS(rs, 1)]; + Ty = Tw + Tx; + TY = Tx - Tw; + TM = Im[WS(rs, 1)]; + TQ = Rp[WS(rs, 1)]; + TR = Rm[WS(rs, 1)]; + TZ = TX * TY; + T23 = T10 * TY; + T1b = TL + TM; + TN = TL - TM; + TS = TQ + TR; + T1e = TQ - TR; + } + TK = W[2]; + { + E TG, TH, TB, TC; + TB = Ip[WS(rs, 5)]; + TC = Im[WS(rs, 5)]; + TG = Rp[WS(rs, 5)]; + T1P = TK * TS; + TO = TK * TN; + T17 = TB + TC; + TD = TB - TC; + TH = Rm[WS(rs, 5)]; + T16 = W[20]; + T13 = W[21]; + T14 = TH - TG; + TI = TG + TH; + TA = W[18]; + } + } + { + E T12, T1N, TE, T18, T24, T26, T25, T15; + T12 = FMA(T10, T11, TZ); + T15 = T13 * T14; + T25 = T16 * T14; + T1N = TA * TI; + TE = TA * TD; + T18 = FMA(T16, T17, T15); + T24 = FNMS(TX, T11, T23); + T26 = FNMS(T13, T17, T25); + { + E Tv, T1L, Tu, Tq; + Tq = W[10]; + T19 = T12 + T18; + T2E = T18 - T12; + Tv = W[11]; + T2C = T24 + T26; + T27 = T24 - T26; + T1L = Tq * Ty; + Tu = Tq * Tt; + { + E T1d, T2A, T1c, T1a, TF, TP; + T1a = W[4]; + T1d = W[5]; + T1M = FMA(Tv, Tt, T1L); + Tz = FNMS(Tv, Ty, Tu); + T2A = T1a * T1e; + T1c = T1a * T1b; + TF = W[19]; + TP = W[3]; + T2B = FMA(T1d, T1b, T2A); + T1f = FNMS(T1d, T1e, T1c); + T1O = FMA(TF, TD, T1N); + TJ = FNMS(TF, TI, TE); + TT = FNMS(TP, TS, TO); + T1Q = FMA(TP, TN, T1P); + } + } + } + } + } + { + E T2h, T2D, T1Z, T2l, T2J, T22, T2k, T29, T30, T1U, T1V, T1Y, T2Z, T1T; + { + E T2Y, TW, T2V, T1D, T1K, T1S; + { + E Tp, T2W, TU, T1R, T2X, T1g, TV, T1C; + T2h = FNMS(KP500000000, Tl, To); + Tp = Tl + To; + T2W = T2C - T2B; + T2D = FMA(KP500000000, T2C, T2B); + T1Z = TJ - TT; + TU = TJ + TT; + T1R = T1O + T1Q; + T2l = T1Q - T1O; + T2J = FNMS(KP500000000, T2I, T2H); + T2X = T2H + T2I; + T1g = T19 + T1f; + T22 = FNMS(KP500000000, T19, T1f); + T2k = FNMS(KP500000000, TU, Tz); + TV = Tz + TU; + T1C = T1m + T1B; + T29 = FNMS(KP500000000, T1B, T1m); + T2Y = T2W - T2X; + T30 = T2W + T2X; + TW = Tp - TV; + T2V = TV + Tp; + T1U = T1g + T1C; + T1D = T1g - T1C; + T1V = FNMS(KP500000000, T1J, T1E); + T1K = T1E + T1J; + T1S = T1M + T1R; + T1Y = FNMS(KP500000000, T1R, T1M); + } + Ip[WS(rs, 3)] = KP500000000 * (TW + T1D); + Im[WS(rs, 2)] = KP500000000 * (T1D - TW); + Im[WS(rs, 5)] = KP500000000 * (T2Y - T2V); + T2Z = T1K - T1S; + T1T = T1K + T1S; + Ip[0] = KP500000000 * (T2V + T2Y); + } + { + E T2v, T1X, T2Q, T2F, T2R, T2L, T2w, T20, T2t, T28, T2p, T2j; + Rm[WS(rs, 2)] = KP500000000 * (T2Z + T30); + Rp[WS(rs, 3)] = KP500000000 * (T2Z - T30); + Rp[0] = KP500000000 * (T1T + T1U); + Rm[WS(rs, 5)] = KP500000000 * (T1T - T1U); + T2v = FMA(KP866025403, T1W, T1V); + T1X = FNMS(KP866025403, T1W, T1V); + T2Q = FMA(KP866025403, T2E, T2D); + T2F = FNMS(KP866025403, T2E, T2D); + T2R = FMA(KP866025403, T2K, T2J); + T2L = FNMS(KP866025403, T2K, T2J); + T2w = FMA(KP866025403, T1Z, T1Y); + T20 = FNMS(KP866025403, T1Z, T1Y); + T2t = FMA(KP866025403, T27, T22); + T28 = FNMS(KP866025403, T27, T22); + T2p = FMA(KP866025403, T2i, T2h); + T2j = FNMS(KP866025403, T2i, T2h); + { + E T2T, T2q, T2s, T2U; + { + E T21, T2f, T2S, T2n, T2P, T2m, T2o, T2g; + T2T = T1X - T20; + T21 = T1X + T20; + T2q = FMA(KP866025403, T2l, T2k); + T2m = FNMS(KP866025403, T2l, T2k); + T2s = FMA(KP866025403, T2e, T29); + T2f = FNMS(KP866025403, T2e, T29); + T2S = T2Q + T2R; + T2U = T2R - T2Q; + T2n = T2j - T2m; + T2P = T2m + T2j; + T2o = T2f - T28; + T2g = T28 + T2f; + Im[WS(rs, 3)] = KP500000000 * (T2S - T2P); + Ip[WS(rs, 2)] = KP500000000 * (T2P + T2S); + Rm[WS(rs, 3)] = KP500000000 * (T21 + T2g); + Rp[WS(rs, 2)] = KP500000000 * (T21 - T2g); + Ip[WS(rs, 5)] = KP500000000 * (T2n + T2o); + Im[0] = KP500000000 * (T2o - T2n); + } + { + E T2y, T2x, T2N, T2O, T2r, T2u; + T2z = T2q + T2p; + T2r = T2p - T2q; + T2u = T2s - T2t; + T2y = T2t + T2s; + T2x = T2v + T2w; + T2N = T2v - T2w; + Rp[WS(rs, 5)] = KP500000000 * (T2T + T2U); + Rm[0] = KP500000000 * (T2T - T2U); + Im[WS(rs, 4)] = KP500000000 * (T2u - T2r); + Ip[WS(rs, 1)] = KP500000000 * (T2r + T2u); + T2O = T2L - T2F; + T2M = T2F + T2L; + Rp[WS(rs, 1)] = KP500000000 * (T2N + T2O); + Rm[WS(rs, 4)] = KP500000000 * (T2N - T2O); + Rp[WS(rs, 4)] = KP500000000 * (T2x + T2y); + Rm[WS(rs, 1)] = KP500000000 * (T2x - T2y); + } + } + } + } + } + Im[WS(rs, 1)] = -(KP500000000 * (T2z + T2M)); + Ip[WS(rs, 4)] = KP500000000 * (T2z - T2M); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 12}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 12, "hc2cfdft_12", twinstr, &GENUS, {96, 46, 46, 0} }; + +void X(codelet_hc2cfdft_12) (planner *p) { + X(khc2c_register) (p, hc2cfdft_12, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -n 12 -dit -name hc2cfdft_12 -include hc2cf.h */ + +/* + * This function contains 142 FP additions, 76 FP multiplications, + * (or, 112 additions, 46 multiplications, 30 fused multiply/add), + * 52 stack variables, 3 constants, and 48 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP433012701, +0.433012701892219323381861585376468091735701313); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) { + E Tm, T1t, T1d, T2j, Tj, T1Y, T1w, T1G, T1q, T2q, T1U, T2k, Tw, T1y, T17; + E T2g, TP, T21, T1B, T1J, T12, T2u, T1P, T2h; + { + E Tk, Tl, T1k, T1m, T1n, T1o, T4, T1f, T8, T1h, Th, T1c, Td, T1a, T19; + E T1b; + { + E T2, T3, T6, T7; + Tk = Ip[0]; + Tl = Im[0]; + T1k = Tk + Tl; + T1m = Rp[0]; + T1n = Rm[0]; + T1o = T1m - T1n; + T2 = Ip[WS(rs, 2)]; + T3 = Im[WS(rs, 2)]; + T4 = T2 - T3; + T1f = T2 + T3; + T6 = Rp[WS(rs, 2)]; + T7 = Rm[WS(rs, 2)]; + T8 = T6 + T7; + T1h = T6 - T7; + { + E Tf, Tg, Tb, Tc; + Tf = Rp[WS(rs, 4)]; + Tg = Rm[WS(rs, 4)]; + Th = Tf + Tg; + T1c = Tf - Tg; + Tb = Ip[WS(rs, 4)]; + Tc = Im[WS(rs, 4)]; + Td = Tb - Tc; + T1a = Tb + Tc; + } + } + Tm = Tk - Tl; + T1t = T1m + T1n; + T19 = W[16]; + T1b = W[17]; + T1d = FNMS(T1b, T1c, T19 * T1a); + T2j = FMA(T19, T1c, T1b * T1a); + { + E T9, T1u, Ti, T1v; + { + E T1, T5, Ta, Te; + T1 = W[6]; + T5 = W[7]; + T9 = FNMS(T5, T8, T1 * T4); + T1u = FMA(T1, T8, T5 * T4); + Ta = W[14]; + Te = W[15]; + Ti = FNMS(Te, Th, Ta * Td); + T1v = FMA(Ta, Th, Te * Td); + } + Tj = T9 + Ti; + T1Y = KP433012701 * (T1v - T1u); + T1w = T1u + T1v; + T1G = KP433012701 * (T9 - Ti); + } + { + E T1i, T1S, T1p, T1T; + { + E T1e, T1g, T1j, T1l; + T1e = W[8]; + T1g = W[9]; + T1i = FNMS(T1g, T1h, T1e * T1f); + T1S = FMA(T1e, T1h, T1g * T1f); + T1j = W[0]; + T1l = W[1]; + T1p = FNMS(T1l, T1o, T1j * T1k); + T1T = FMA(T1j, T1o, T1l * T1k); + } + T1q = T1i + T1p; + T2q = KP433012701 * (T1i - T1p); + T1U = KP433012701 * (T1S - T1T); + T2k = T1S + T1T; + } + } + { + E Tr, TT, Tv, TV, TA, TY, TE, T10, TN, T14, TJ, T16; + { + E Tp, Tq, TC, TD; + Tp = Ip[WS(rs, 3)]; + Tq = Im[WS(rs, 3)]; + Tr = Tp - Tq; + TT = Tp + Tq; + { + E Tt, Tu, Ty, Tz; + Tt = Rp[WS(rs, 3)]; + Tu = Rm[WS(rs, 3)]; + Tv = Tt + Tu; + TV = Tt - Tu; + Ty = Ip[WS(rs, 5)]; + Tz = Im[WS(rs, 5)]; + TA = Ty - Tz; + TY = Ty + Tz; + } + TC = Rp[WS(rs, 5)]; + TD = Rm[WS(rs, 5)]; + TE = TC + TD; + T10 = TC - TD; + { + E TL, TM, TH, TI; + TL = Rp[WS(rs, 1)]; + TM = Rm[WS(rs, 1)]; + TN = TL + TM; + T14 = TM - TL; + TH = Ip[WS(rs, 1)]; + TI = Im[WS(rs, 1)]; + TJ = TH - TI; + T16 = TH + TI; + } + } + { + E To, Ts, T13, T15; + To = W[10]; + Ts = W[11]; + Tw = FNMS(Ts, Tv, To * Tr); + T1y = FMA(To, Tv, Ts * Tr); + T13 = W[5]; + T15 = W[4]; + T17 = FMA(T13, T14, T15 * T16); + T2g = FNMS(T13, T16, T15 * T14); + } + { + E TF, T1z, TO, T1A; + { + E Tx, TB, TG, TK; + Tx = W[18]; + TB = W[19]; + TF = FNMS(TB, TE, Tx * TA); + T1z = FMA(Tx, TE, TB * TA); + TG = W[2]; + TK = W[3]; + TO = FNMS(TK, TN, TG * TJ); + T1A = FMA(TG, TN, TK * TJ); + } + TP = TF + TO; + T21 = KP433012701 * (T1A - T1z); + T1B = T1z + T1A; + T1J = KP433012701 * (TF - TO); + } + { + E TW, T1O, T11, T1N; + { + E TS, TU, TX, TZ; + TS = W[12]; + TU = W[13]; + TW = FNMS(TU, TV, TS * TT); + T1O = FMA(TS, TV, TU * TT); + TX = W[20]; + TZ = W[21]; + T11 = FNMS(TZ, T10, TX * TY); + T1N = FMA(TX, T10, TZ * TY); + } + T12 = TW + T11; + T2u = KP433012701 * (T11 - TW); + T1P = KP433012701 * (T1N - T1O); + T2h = T1O + T1N; + } + } + { + E TR, T2f, T2m, T2o, T1s, T1E, T1D, T2n; + { + E Tn, TQ, T2i, T2l; + Tn = Tj + Tm; + TQ = Tw + TP; + TR = Tn - TQ; + T2f = TQ + Tn; + T2i = T2g - T2h; + T2l = T2j + T2k; + T2m = T2i - T2l; + T2o = T2i + T2l; + } + { + E T18, T1r, T1x, T1C; + T18 = T12 + T17; + T1r = T1d + T1q; + T1s = T18 - T1r; + T1E = T18 + T1r; + T1x = T1t + T1w; + T1C = T1y + T1B; + T1D = T1x + T1C; + T2n = T1x - T1C; + } + Ip[WS(rs, 3)] = KP500000000 * (TR + T1s); + Rp[WS(rs, 3)] = KP500000000 * (T2n - T2o); + Im[WS(rs, 2)] = KP500000000 * (T1s - TR); + Rm[WS(rs, 2)] = KP500000000 * (T2n + T2o); + Rm[WS(rs, 5)] = KP500000000 * (T1D - T1E); + Im[WS(rs, 5)] = KP500000000 * (T2m - T2f); + Rp[0] = KP500000000 * (T1D + T1E); + Ip[0] = KP500000000 * (T2f + T2m); + } + { + E T1H, T2b, T2s, T2B, T2v, T2A, T1K, T2c, T1Q, T29, T1Z, T25, T22, T26, T1V; + E T28; + { + E T1F, T2r, T2t, T1I; + T1F = FNMS(KP250000000, T1w, KP500000000 * T1t); + T1H = T1F - T1G; + T2b = T1F + T1G; + T2r = FNMS(KP500000000, T2j, KP250000000 * T2k); + T2s = T2q - T2r; + T2B = T2q + T2r; + T2t = FMA(KP250000000, T2h, KP500000000 * T2g); + T2v = T2t - T2u; + T2A = T2u + T2t; + T1I = FNMS(KP250000000, T1B, KP500000000 * T1y); + T1K = T1I - T1J; + T2c = T1I + T1J; + } + { + E T1M, T1X, T20, T1R; + T1M = FNMS(KP250000000, T12, KP500000000 * T17); + T1Q = T1M - T1P; + T29 = T1P + T1M; + T1X = FNMS(KP250000000, Tj, KP500000000 * Tm); + T1Z = T1X - T1Y; + T25 = T1Y + T1X; + T20 = FNMS(KP250000000, TP, KP500000000 * Tw); + T22 = T20 - T21; + T26 = T21 + T20; + T1R = FNMS(KP250000000, T1q, KP500000000 * T1d); + T1V = T1R - T1U; + T28 = T1R + T1U; + } + { + E T1L, T1W, T2p, T2w; + T1L = T1H + T1K; + T1W = T1Q + T1V; + Rp[WS(rs, 2)] = T1L - T1W; + Rm[WS(rs, 3)] = T1L + T1W; + T2p = T22 + T1Z; + T2w = T2s - T2v; + Ip[WS(rs, 2)] = T2p + T2w; + Im[WS(rs, 3)] = T2w - T2p; + } + { + E T23, T24, T2x, T2y; + T23 = T1Z - T22; + T24 = T1V - T1Q; + Ip[WS(rs, 5)] = T23 + T24; + Im[0] = T24 - T23; + T2x = T1H - T1K; + T2y = T2v + T2s; + Rm[0] = T2x - T2y; + Rp[WS(rs, 5)] = T2x + T2y; + } + { + E T27, T2a, T2z, T2C; + T27 = T25 - T26; + T2a = T28 - T29; + Ip[WS(rs, 1)] = T27 + T2a; + Im[WS(rs, 4)] = T2a - T27; + T2z = T2b - T2c; + T2C = T2A - T2B; + Rm[WS(rs, 4)] = T2z - T2C; + Rp[WS(rs, 1)] = T2z + T2C; + } + { + E T2d, T2e, T2D, T2E; + T2d = T2b + T2c; + T2e = T29 + T28; + Rm[WS(rs, 1)] = T2d - T2e; + Rp[WS(rs, 4)] = T2d + T2e; + T2D = T26 + T25; + T2E = T2A + T2B; + Ip[WS(rs, 4)] = T2D + T2E; + Im[WS(rs, 1)] = T2E - T2D; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 12}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 12, "hc2cfdft_12", twinstr, &GENUS, {112, 46, 30, 0} }; + +void X(codelet_hc2cfdft_12) (planner *p) { + X(khc2c_register) (p, hc2cfdft_12, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,896 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:28 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 16 -dit -name hc2cfdft_16 -include hc2cf.h */ + +/* + * This function contains 206 FP additions, 132 FP multiplications, + * (or, 136 additions, 62 multiplications, 70 fused multiply/add), + * 96 stack variables, 4 constants, and 64 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 30, MAKE_VOLATILE_STRIDE(64, rs)) { + E T4d, T4g; + { + E T1f, T2e, T3D, T1K, T2g, T1c, T3H, T2W, T2j, TR, T3E, T2R, T2l, T11, T3G; + E T1v, T3p, T2s, Tl, T3o, T3w, T2G, T3z, T1Y, T23, T20, T2H, T21, T29, Tz; + E T26, TE, TA, T2v, T2J, T27, Tv, T2u, TB, T22, T28; + { + E T1o, T1u, T2T, T2V; + { + E T1I, T1A, T16, T1C, T1H, T1G, T2U, T1z, T1b, T1x, T1w; + { + E T1d, T1e, T14, T15; + T1d = Ip[0]; + T1e = Im[0]; + T14 = Ip[WS(rs, 4)]; + T15 = Im[WS(rs, 4)]; + { + E T1F, T1D, T1E, T19, T1a; + T1D = Rm[0]; + T1I = T1d + T1e; + T1f = T1d - T1e; + T1E = Rp[0]; + T1A = T14 + T15; + T16 = T14 - T15; + T1C = W[0]; + T2e = T1E + T1D; + T1F = T1D - T1E; + T1H = W[1]; + T19 = Rp[WS(rs, 4)]; + T1a = Rm[WS(rs, 4)]; + T1G = T1C * T1F; + T2U = T1H * T1F; + T1z = W[17]; + T1b = T19 + T1a; + T1x = T1a - T19; + T1w = W[16]; + } + } + { + E T2S, T1y, T13, T18; + T2S = T1z * T1x; + T1y = T1w * T1x; + T13 = W[14]; + T18 = W[15]; + { + E T1J, T1B, T2f, T17; + T1J = FNMS(T1H, T1I, T1G); + T1B = FNMS(T1z, T1A, T1y); + T2f = T13 * T1b; + T17 = T13 * T16; + T2T = FMA(T1w, T1A, T2S); + T3D = T1J - T1B; + T1K = T1B + T1J; + T2g = FMA(T18, T16, T2f); + T1c = FNMS(T18, T1b, T17); + T2V = FMA(T1C, T1I, T2U); + } + } + } + { + E T1n, TL, T1m, T1j, TQ, T1l, T2N, TV, T1t, T10, T1q, T1s, T1p, T1r, T2O; + E T2Q; + { + E TO, TP, TJ, TK; + TJ = Ip[WS(rs, 2)]; + TK = Im[WS(rs, 2)]; + TO = Rp[WS(rs, 2)]; + T3H = T2V - T2T; + T2W = T2T + T2V; + T1n = TJ + TK; + TL = TJ - TK; + TP = Rm[WS(rs, 2)]; + T1m = W[9]; + T1j = W[8]; + { + E TT, T1k, TU, TY, TZ; + TT = Ip[WS(rs, 6)]; + TQ = TO + TP; + T1k = TP - TO; + TU = Im[WS(rs, 6)]; + TY = Rp[WS(rs, 6)]; + TZ = Rm[WS(rs, 6)]; + T1l = T1j * T1k; + T2N = T1m * T1k; + TV = TT - TU; + T1t = TT + TU; + T10 = TY + TZ; + T1q = TZ - TY; + T1s = W[25]; + T1p = W[24]; + } + } + { + E TN, T2P, T2i, TM, TI; + TI = W[6]; + TN = W[7]; + T2P = T1s * T1q; + T1r = T1p * T1q; + T2i = TI * TQ; + TM = TI * TL; + T2O = FMA(T1j, T1n, T2N); + T2Q = FMA(T1p, T1t, T2P); + T2j = FMA(TN, TL, T2i); + TR = FNMS(TN, TQ, TM); + } + { + E TX, T2k, TW, TS; + TS = W[22]; + T3E = T2O - T2Q; + T2R = T2O + T2Q; + TX = W[23]; + T2k = TS * T10; + TW = TS * TV; + T1o = FNMS(T1m, T1n, T1l); + T1u = FNMS(T1s, T1t, T1r); + T2l = FMA(TX, TV, T2k); + T11 = FNMS(TX, T10, TW); + } + } + { + E T1Q, T1N, T2C, T1O, T1W, Te, T1T, Tj, Tf, T2q, T2E, T1U, Ta, T2p, Tg; + E T1P, T1V; + { + E T4, T9, T5, T2o, Tb, T1S, T1, T1M, T6; + { + E T2, T3, T7, T8; + T2 = Ip[WS(rs, 1)]; + T3G = T1o - T1u; + T1v = T1o + T1u; + T3 = Im[WS(rs, 1)]; + T7 = Rp[WS(rs, 1)]; + T8 = Rm[WS(rs, 1)]; + T1 = W[2]; + T1Q = T2 + T3; + T4 = T2 - T3; + T1N = T7 - T8; + T9 = T7 + T8; + T1M = W[4]; + T5 = T1 * T4; + } + { + E Tc, Td, Th, Ti; + Tc = Ip[WS(rs, 5)]; + T2o = T1 * T9; + T2C = T1M * T1Q; + T1O = T1M * T1N; + Td = Im[WS(rs, 5)]; + Th = Rp[WS(rs, 5)]; + Ti = Rm[WS(rs, 5)]; + Tb = W[18]; + T1W = Tc + Td; + Te = Tc - Td; + T1T = Th - Ti; + Tj = Th + Ti; + T1S = W[20]; + Tf = Tb * Te; + } + T6 = W[3]; + T2q = Tb * Tj; + T2E = T1S * T1W; + T1U = T1S * T1T; + Ta = FNMS(T6, T9, T5); + T2p = FMA(T6, T4, T2o); + Tg = W[19]; + T1P = W[5]; + T1V = W[21]; + } + { + E Tp, Tu, Tq, T2t, Tw, T25, Tm, T1Z, Tr; + { + E Tn, To, Ts, Tt, T2r, Tk; + Tn = Ip[WS(rs, 7)]; + T2r = FMA(Tg, Te, T2q); + Tk = FNMS(Tg, Tj, Tf); + { + E T2D, T1R, T2F, T1X; + T2D = FNMS(T1P, T1N, T2C); + T1R = FMA(T1P, T1Q, T1O); + T2F = FNMS(T1V, T1T, T2E); + T1X = FMA(T1V, T1W, T1U); + T3p = T2p - T2r; + T2s = T2p + T2r; + Tl = Ta + Tk; + T3o = Ta - Tk; + T3w = T2F - T2D; + T2G = T2D + T2F; + T3z = T1X - T1R; + T1Y = T1R + T1X; + To = Im[WS(rs, 7)]; + } + Ts = Rp[WS(rs, 7)]; + Tt = Rm[WS(rs, 7)]; + Tm = W[26]; + T23 = Tn + To; + Tp = Tn - To; + T20 = Ts - Tt; + Tu = Ts + Tt; + T1Z = W[28]; + Tq = Tm * Tp; + } + { + E Tx, Ty, TC, TD; + Tx = Ip[WS(rs, 3)]; + T2t = Tm * Tu; + T2H = T1Z * T23; + T21 = T1Z * T20; + Ty = Im[WS(rs, 3)]; + TC = Rp[WS(rs, 3)]; + TD = Rm[WS(rs, 3)]; + Tw = W[10]; + T29 = Tx + Ty; + Tz = Tx - Ty; + T26 = TC - TD; + TE = TC + TD; + T25 = W[12]; + TA = Tw * Tz; + } + Tr = W[27]; + T2v = Tw * TE; + T2J = T25 * T29; + T27 = T25 * T26; + Tv = FNMS(Tr, Tu, Tq); + T2u = FMA(Tr, Tp, T2t); + TB = W[11]; + T22 = W[29]; + T28 = W[13]; + } + } + } + { + E T3r, T3s, T3A, T3x, T3M, T3l, T3L, T3m, T3f, T3i; + { + E T3c, TH, T36, T3g, T3h, T39, T32, T1h, T2A, T2d, T2h, T31, T2y, T30, T2Y; + E T2m, T2B, T1i; + { + E T2x, T2M, T1L, T2c, T2X, T12, T1g; + { + E TG, T2b, T34, T2L, T2w, TF, T37, T38, T35; + T2w = FMA(TB, Tz, T2v); + TF = FNMS(TB, TE, TA); + { + E T2I, T24, T2K, T2a; + T2I = FNMS(T22, T20, T2H); + T24 = FMA(T22, T23, T21); + T2K = FNMS(T28, T26, T2J); + T2a = FMA(T28, T29, T27); + T3r = T2u - T2w; + T2x = T2u + T2w; + TG = Tv + TF; + T3s = Tv - TF; + T2L = T2I + T2K; + T3A = T2I - T2K; + T3x = T2a - T24; + T2b = T24 + T2a; + } + T2M = T2G + T2L; + T34 = T2L - T2G; + T37 = T1K - T1v; + T1L = T1v + T1K; + T2c = T1Y + T2b; + T35 = T1Y - T2b; + T3c = Tl - TG; + TH = Tl + TG; + T38 = T2W - T2R; + T2X = T2R + T2W; + T36 = T34 + T35; + T3g = T34 - T35; + T3M = TR - T11; + T12 = TR + T11; + T3h = T37 + T38; + T39 = T37 - T38; + T1g = T1c + T1f; + T3l = T1f - T1c; + } + T32 = T1g - T12; + T1h = T12 + T1g; + T2A = T2c + T1L; + T2d = T1L - T2c; + T3L = T2e - T2g; + T2h = T2e + T2g; + T31 = T2x - T2s; + T2y = T2s + T2x; + T30 = T2M + T2X; + T2Y = T2M - T2X; + T2m = T2j + T2l; + T3m = T2j - T2l; + } + T2B = T1h - TH; + T1i = TH + T1h; + { + E T3e, T3d, T3j, T3k; + { + E T33, T3b, T2z, T2Z, T3a, T2n; + T3f = T32 - T31; + T33 = T31 + T32; + T3b = T2h - T2m; + T2n = T2h + T2m; + Im[WS(rs, 7)] = KP500000000 * (T2d - T1i); + Ip[0] = KP500000000 * (T1i + T2d); + Im[WS(rs, 3)] = KP500000000 * (T2Y - T2B); + Ip[WS(rs, 4)] = KP500000000 * (T2B + T2Y); + T2z = T2n - T2y; + T2Z = T2n + T2y; + T3a = T36 + T39; + T3e = T39 - T36; + T3d = T3b - T3c; + T3j = T3b + T3c; + Rp[WS(rs, 4)] = KP500000000 * (T2z + T2A); + Rm[WS(rs, 3)] = KP500000000 * (T2z - T2A); + Rp[0] = KP500000000 * (T2Z + T30); + Rm[WS(rs, 7)] = KP500000000 * (T2Z - T30); + Im[WS(rs, 5)] = -(KP500000000 * (FNMS(KP707106781, T3a, T33))); + Ip[WS(rs, 2)] = KP500000000 * (FMA(KP707106781, T3a, T33)); + T3k = T3g + T3h; + T3i = T3g - T3h; + } + Rp[WS(rs, 2)] = KP500000000 * (FMA(KP707106781, T3k, T3j)); + Rm[WS(rs, 5)] = KP500000000 * (FNMS(KP707106781, T3k, T3j)); + Rp[WS(rs, 6)] = KP500000000 * (FMA(KP707106781, T3e, T3d)); + Rm[WS(rs, 1)] = KP500000000 * (FNMS(KP707106781, T3e, T3d)); + } + } + { + E T3Z, T3n, T3F, T3I, T4e, T44, T4f, T47, T4a, T3u, T3U, T3C, T49, T3N, T40; + E T3Q; + { + E T3y, T3B, T3O, T3q, T3t, T3P; + { + E T42, T43, T45, T46; + T3y = T3w + T3x; + T42 = T3w - T3x; + Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP707106781, T3i, T3f))); + Ip[WS(rs, 6)] = KP500000000 * (FMA(KP707106781, T3i, T3f)); + T3Z = T3m + T3l; + T3n = T3l - T3m; + T43 = T3A - T3z; + T3B = T3z + T3A; + T3F = T3D - T3E; + T45 = T3E + T3D; + T46 = T3H - T3G; + T3I = T3G + T3H; + T3O = T3p + T3o; + T3q = T3o - T3p; + T4e = FNMS(KP414213562, T42, T43); + T44 = FMA(KP414213562, T43, T42); + T4f = FNMS(KP414213562, T45, T46); + T47 = FMA(KP414213562, T46, T45); + T3t = T3r + T3s; + T3P = T3r - T3s; + } + T4a = T3q - T3t; + T3u = T3q + T3t; + T3U = FNMS(KP414213562, T3y, T3B); + T3C = FMA(KP414213562, T3B, T3y); + T49 = T3L - T3M; + T3N = T3L + T3M; + T40 = T3P - T3O; + T3Q = T3O + T3P; + } + { + E T3T, T3v, T3X, T3R, T3J, T3V; + T3T = FNMS(KP707106781, T3u, T3n); + T3v = FMA(KP707106781, T3u, T3n); + T3X = FMA(KP707106781, T3Q, T3N); + T3R = FNMS(KP707106781, T3Q, T3N); + T3J = FNMS(KP414213562, T3I, T3F); + T3V = FMA(KP414213562, T3F, T3I); + { + E T4c, T4b, T4h, T4i, T41, T48; + T4d = FMA(KP707106781, T40, T3Z); + T41 = FNMS(KP707106781, T40, T3Z); + T48 = T44 - T47; + T4c = T44 + T47; + { + E T3Y, T3W, T3K, T3S; + T3Y = T3U + T3V; + T3W = T3U - T3V; + T3K = T3C + T3J; + T3S = T3J - T3C; + Im[WS(rs, 2)] = -(KP500000000 * (FNMS(KP923879532, T3W, T3T))); + Ip[WS(rs, 5)] = KP500000000 * (FMA(KP923879532, T3W, T3T)); + Rp[WS(rs, 1)] = KP500000000 * (FMA(KP923879532, T3Y, T3X)); + Rm[WS(rs, 6)] = KP500000000 * (FNMS(KP923879532, T3Y, T3X)); + Rp[WS(rs, 5)] = KP500000000 * (FMA(KP923879532, T3S, T3R)); + Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP923879532, T3S, T3R)); + Im[WS(rs, 6)] = -(KP500000000 * (FNMS(KP923879532, T3K, T3v))); + Ip[WS(rs, 1)] = KP500000000 * (FMA(KP923879532, T3K, T3v)); + Ip[WS(rs, 7)] = KP500000000 * (FMA(KP923879532, T48, T41)); + Im[0] = -(KP500000000 * (FNMS(KP923879532, T48, T41))); + } + T4b = FMA(KP707106781, T4a, T49); + T4h = FNMS(KP707106781, T4a, T49); + T4i = T4e + T4f; + T4g = T4e - T4f; + Rm[0] = KP500000000 * (FMA(KP923879532, T4i, T4h)); + Rp[WS(rs, 7)] = KP500000000 * (FNMS(KP923879532, T4i, T4h)); + Rp[WS(rs, 3)] = KP500000000 * (FMA(KP923879532, T4c, T4b)); + Rm[WS(rs, 4)] = KP500000000 * (FNMS(KP923879532, T4c, T4b)); + } + } + } + } + } + Im[WS(rs, 4)] = -(KP500000000 * (FNMS(KP923879532, T4g, T4d))); + Ip[WS(rs, 3)] = KP500000000 * (FMA(KP923879532, T4g, T4d)); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 16}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 16, "hc2cfdft_16", twinstr, &GENUS, {136, 62, 70, 0} }; + +void X(codelet_hc2cfdft_16) (planner *p) { + X(khc2c_register) (p, hc2cfdft_16, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -n 16 -dit -name hc2cfdft_16 -include hc2cf.h */ + +/* + * This function contains 206 FP additions, 100 FP multiplications, + * (or, 168 additions, 62 multiplications, 38 fused multiply/add), + * 61 stack variables, 4 constants, and 64 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP461939766, +0.461939766255643378064091594698394143411208313); + DK(KP191341716, +0.191341716182544885864229992015199433380672281); + DK(KP353553390, +0.353553390593273762200422181052424519642417969); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 30, MAKE_VOLATILE_STRIDE(64, rs)) { + E T19, T3h, T21, T2Y, T1o, T3d, T2s, T39, TW, T3i, T24, T2Z, T1z, T3c, T2p; + E T3a, Tj, T2S, T28, T2R, T1L, T36, T2i, T32, TC, T2V, T2b, T2U, T1W, T35; + E T2l, T33; + { + E T10, T1m, T14, T1k, T18, T1h, T1f, T1Z; + { + E TY, TZ, T12, T13; + TY = Ip[WS(rs, 4)]; + TZ = Im[WS(rs, 4)]; + T10 = TY - TZ; + T1m = TY + TZ; + T12 = Rp[WS(rs, 4)]; + T13 = Rm[WS(rs, 4)]; + T14 = T12 + T13; + T1k = T12 - T13; + } + { + E T16, T17, T1d, T1e; + T16 = Ip[0]; + T17 = Im[0]; + T18 = T16 - T17; + T1h = T16 + T17; + T1d = Rm[0]; + T1e = Rp[0]; + T1f = T1d - T1e; + T1Z = T1e + T1d; + } + { + E T15, T20, TX, T11; + TX = W[14]; + T11 = W[15]; + T15 = FNMS(T11, T14, TX * T10); + T20 = FMA(TX, T14, T11 * T10); + T19 = T15 + T18; + T3h = T1Z - T20; + T21 = T1Z + T20; + T2Y = T18 - T15; + } + { + E T1i, T2r, T1n, T2q; + { + E T1c, T1g, T1j, T1l; + T1c = W[0]; + T1g = W[1]; + T1i = FNMS(T1g, T1h, T1c * T1f); + T2r = FMA(T1g, T1f, T1c * T1h); + T1j = W[16]; + T1l = W[17]; + T1n = FMA(T1j, T1k, T1l * T1m); + T2q = FNMS(T1l, T1k, T1j * T1m); + } + T1o = T1i - T1n; + T3d = T2r - T2q; + T2s = T2q + T2r; + T39 = T1n + T1i; + } + } + { + E TH, T1s, TL, T1q, TQ, T1x, TU, T1v; + { + E TF, TG, TJ, TK; + TF = Ip[WS(rs, 2)]; + TG = Im[WS(rs, 2)]; + TH = TF - TG; + T1s = TF + TG; + TJ = Rp[WS(rs, 2)]; + TK = Rm[WS(rs, 2)]; + TL = TJ + TK; + T1q = TJ - TK; + } + { + E TO, TP, TS, TT; + TO = Ip[WS(rs, 6)]; + TP = Im[WS(rs, 6)]; + TQ = TO - TP; + T1x = TO + TP; + TS = Rp[WS(rs, 6)]; + TT = Rm[WS(rs, 6)]; + TU = TS + TT; + T1v = TS - TT; + } + { + E TM, T22, TV, T23; + { + E TE, TI, TN, TR; + TE = W[6]; + TI = W[7]; + TM = FNMS(TI, TL, TE * TH); + T22 = FMA(TE, TL, TI * TH); + TN = W[22]; + TR = W[23]; + TV = FNMS(TR, TU, TN * TQ); + T23 = FMA(TN, TU, TR * TQ); + } + TW = TM + TV; + T3i = TM - TV; + T24 = T22 + T23; + T2Z = T22 - T23; + } + { + E T1t, T2n, T1y, T2o; + { + E T1p, T1r, T1u, T1w; + T1p = W[8]; + T1r = W[9]; + T1t = FMA(T1p, T1q, T1r * T1s); + T2n = FNMS(T1r, T1q, T1p * T1s); + T1u = W[24]; + T1w = W[25]; + T1y = FMA(T1u, T1v, T1w * T1x); + T2o = FNMS(T1w, T1v, T1u * T1x); + } + T1z = T1t + T1y; + T3c = T1y - T1t; + T2p = T2n + T2o; + T3a = T2n - T2o; + } + } + { + E T4, T1E, T8, T1C, Td, T1J, Th, T1H; + { + E T2, T3, T6, T7; + T2 = Ip[WS(rs, 1)]; + T3 = Im[WS(rs, 1)]; + T4 = T2 - T3; + T1E = T2 + T3; + T6 = Rp[WS(rs, 1)]; + T7 = Rm[WS(rs, 1)]; + T8 = T6 + T7; + T1C = T6 - T7; + } + { + E Tb, Tc, Tf, Tg; + Tb = Ip[WS(rs, 5)]; + Tc = Im[WS(rs, 5)]; + Td = Tb - Tc; + T1J = Tb + Tc; + Tf = Rp[WS(rs, 5)]; + Tg = Rm[WS(rs, 5)]; + Th = Tf + Tg; + T1H = Tf - Tg; + } + { + E T9, T26, Ti, T27; + { + E T1, T5, Ta, Te; + T1 = W[2]; + T5 = W[3]; + T9 = FNMS(T5, T8, T1 * T4); + T26 = FMA(T1, T8, T5 * T4); + Ta = W[18]; + Te = W[19]; + Ti = FNMS(Te, Th, Ta * Td); + T27 = FMA(Ta, Th, Te * Td); + } + Tj = T9 + Ti; + T2S = T26 - T27; + T28 = T26 + T27; + T2R = T9 - Ti; + } + { + E T1F, T2g, T1K, T2h; + { + E T1B, T1D, T1G, T1I; + T1B = W[4]; + T1D = W[5]; + T1F = FMA(T1B, T1C, T1D * T1E); + T2g = FNMS(T1D, T1C, T1B * T1E); + T1G = W[20]; + T1I = W[21]; + T1K = FMA(T1G, T1H, T1I * T1J); + T2h = FNMS(T1I, T1H, T1G * T1J); + } + T1L = T1F + T1K; + T36 = T2g - T2h; + T2i = T2g + T2h; + T32 = T1K - T1F; + } + } + { + E Tn, T1P, Tr, T1N, Tw, T1U, TA, T1S; + { + E Tl, Tm, Tp, Tq; + Tl = Ip[WS(rs, 7)]; + Tm = Im[WS(rs, 7)]; + Tn = Tl - Tm; + T1P = Tl + Tm; + Tp = Rp[WS(rs, 7)]; + Tq = Rm[WS(rs, 7)]; + Tr = Tp + Tq; + T1N = Tp - Tq; + } + { + E Tu, Tv, Ty, Tz; + Tu = Ip[WS(rs, 3)]; + Tv = Im[WS(rs, 3)]; + Tw = Tu - Tv; + T1U = Tu + Tv; + Ty = Rp[WS(rs, 3)]; + Tz = Rm[WS(rs, 3)]; + TA = Ty + Tz; + T1S = Ty - Tz; + } + { + E Ts, T29, TB, T2a; + { + E Tk, To, Tt, Tx; + Tk = W[26]; + To = W[27]; + Ts = FNMS(To, Tr, Tk * Tn); + T29 = FMA(Tk, Tr, To * Tn); + Tt = W[10]; + Tx = W[11]; + TB = FNMS(Tx, TA, Tt * Tw); + T2a = FMA(Tt, TA, Tx * Tw); + } + TC = Ts + TB; + T2V = Ts - TB; + T2b = T29 + T2a; + T2U = T29 - T2a; + } + { + E T1Q, T2j, T1V, T2k; + { + E T1M, T1O, T1R, T1T; + T1M = W[28]; + T1O = W[29]; + T1Q = FMA(T1M, T1N, T1O * T1P); + T2j = FNMS(T1O, T1N, T1M * T1P); + T1R = W[12]; + T1T = W[13]; + T1V = FMA(T1R, T1S, T1T * T1U); + T2k = FNMS(T1T, T1S, T1R * T1U); + } + T1W = T1Q + T1V; + T35 = T1V - T1Q; + T2l = T2j + T2k; + T33 = T2j - T2k; + } + } + { + E T1b, T2f, T2u, T2w, T1Y, T2e, T2d, T2v; + { + E TD, T1a, T2m, T2t; + TD = Tj + TC; + T1a = TW + T19; + T1b = TD + T1a; + T2f = T1a - TD; + T2m = T2i + T2l; + T2t = T2p + T2s; + T2u = T2m - T2t; + T2w = T2m + T2t; + } + { + E T1A, T1X, T25, T2c; + T1A = T1o - T1z; + T1X = T1L + T1W; + T1Y = T1A - T1X; + T2e = T1X + T1A; + T25 = T21 + T24; + T2c = T28 + T2b; + T2d = T25 - T2c; + T2v = T25 + T2c; + } + Ip[0] = KP500000000 * (T1b + T1Y); + Rp[0] = KP500000000 * (T2v + T2w); + Im[WS(rs, 7)] = KP500000000 * (T1Y - T1b); + Rm[WS(rs, 7)] = KP500000000 * (T2v - T2w); + Rm[WS(rs, 3)] = KP500000000 * (T2d - T2e); + Im[WS(rs, 3)] = KP500000000 * (T2u - T2f); + Rp[WS(rs, 4)] = KP500000000 * (T2d + T2e); + Ip[WS(rs, 4)] = KP500000000 * (T2f + T2u); + } + { + E T2z, T2L, T2J, T2P, T2C, T2M, T2F, T2N; + { + E T2x, T2y, T2H, T2I; + T2x = T2b - T28; + T2y = T19 - TW; + T2z = KP500000000 * (T2x + T2y); + T2L = KP500000000 * (T2y - T2x); + T2H = T21 - T24; + T2I = Tj - TC; + T2J = KP500000000 * (T2H - T2I); + T2P = KP500000000 * (T2H + T2I); + } + { + E T2A, T2B, T2D, T2E; + T2A = T2l - T2i; + T2B = T1L - T1W; + T2C = T2A + T2B; + T2M = T2A - T2B; + T2D = T1z + T1o; + T2E = T2s - T2p; + T2F = T2D - T2E; + T2N = T2D + T2E; + } + { + E T2G, T2Q, T2K, T2O; + T2G = KP353553390 * (T2C + T2F); + Ip[WS(rs, 2)] = T2z + T2G; + Im[WS(rs, 5)] = T2G - T2z; + T2Q = KP353553390 * (T2M + T2N); + Rm[WS(rs, 5)] = T2P - T2Q; + Rp[WS(rs, 2)] = T2P + T2Q; + T2K = KP353553390 * (T2F - T2C); + Rm[WS(rs, 1)] = T2J - T2K; + Rp[WS(rs, 6)] = T2J + T2K; + T2O = KP353553390 * (T2M - T2N); + Ip[WS(rs, 6)] = T2L + T2O; + Im[WS(rs, 1)] = T2O - T2L; + } + } + { + E T30, T3w, T3F, T3j, T2X, T3G, T3D, T3L, T3m, T3v, T38, T3q, T3A, T3K, T3f; + E T3r; + { + E T2T, T2W, T34, T37; + T30 = KP500000000 * (T2Y - T2Z); + T3w = KP500000000 * (T2Z + T2Y); + T3F = KP500000000 * (T3h - T3i); + T3j = KP500000000 * (T3h + T3i); + T2T = T2R - T2S; + T2W = T2U + T2V; + T2X = KP353553390 * (T2T + T2W); + T3G = KP353553390 * (T2T - T2W); + { + E T3B, T3C, T3k, T3l; + T3B = T3a + T39; + T3C = T3d - T3c; + T3D = FNMS(KP461939766, T3C, KP191341716 * T3B); + T3L = FMA(KP461939766, T3B, KP191341716 * T3C); + T3k = T2S + T2R; + T3l = T2U - T2V; + T3m = KP353553390 * (T3k + T3l); + T3v = KP353553390 * (T3l - T3k); + } + T34 = T32 + T33; + T37 = T35 - T36; + T38 = FMA(KP191341716, T34, KP461939766 * T37); + T3q = FNMS(KP191341716, T37, KP461939766 * T34); + { + E T3y, T3z, T3b, T3e; + T3y = T33 - T32; + T3z = T36 + T35; + T3A = FMA(KP461939766, T3y, KP191341716 * T3z); + T3K = FNMS(KP461939766, T3z, KP191341716 * T3y); + T3b = T39 - T3a; + T3e = T3c + T3d; + T3f = FNMS(KP191341716, T3e, KP461939766 * T3b); + T3r = FMA(KP191341716, T3b, KP461939766 * T3e); + } + } + { + E T31, T3g, T3t, T3u; + T31 = T2X + T30; + T3g = T38 + T3f; + Ip[WS(rs, 1)] = T31 + T3g; + Im[WS(rs, 6)] = T3g - T31; + T3t = T3j + T3m; + T3u = T3q + T3r; + Rm[WS(rs, 6)] = T3t - T3u; + Rp[WS(rs, 1)] = T3t + T3u; + } + { + E T3n, T3o, T3p, T3s; + T3n = T3j - T3m; + T3o = T3f - T38; + Rm[WS(rs, 2)] = T3n - T3o; + Rp[WS(rs, 5)] = T3n + T3o; + T3p = T30 - T2X; + T3s = T3q - T3r; + Ip[WS(rs, 5)] = T3p + T3s; + Im[WS(rs, 2)] = T3s - T3p; + } + { + E T3x, T3E, T3N, T3O; + T3x = T3v + T3w; + T3E = T3A + T3D; + Ip[WS(rs, 3)] = T3x + T3E; + Im[WS(rs, 4)] = T3E - T3x; + T3N = T3F + T3G; + T3O = T3K + T3L; + Rm[WS(rs, 4)] = T3N - T3O; + Rp[WS(rs, 3)] = T3N + T3O; + } + { + E T3H, T3I, T3J, T3M; + T3H = T3F - T3G; + T3I = T3D - T3A; + Rm[0] = T3H - T3I; + Rp[WS(rs, 7)] = T3H + T3I; + T3J = T3w - T3v; + T3M = T3K - T3L; + Ip[WS(rs, 7)] = T3J + T3M; + Im[0] = T3M - T3J; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 16}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 16, "hc2cfdft_16", twinstr, &GENUS, {168, 62, 38, 0} }; + +void X(codelet_hc2cfdft_16) (planner *p) { + X(khc2c_register) (p, hc2cfdft_16, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:27 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 2 -dit -name hc2cfdft_2 -include hc2cf.h */ + +/* + * This function contains 10 FP additions, 8 FP multiplications, + * (or, 8 additions, 6 multiplications, 2 fused multiply/add), + * 12 stack variables, 1 constants, and 8 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_2(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 2); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 2, MAKE_VOLATILE_STRIDE(8, rs)) { + E T9, Ta, T3, Tc, T7, T4; + { + E T1, T2, T5, T6; + T1 = Ip[0]; + T2 = Im[0]; + T5 = Rm[0]; + T6 = Rp[0]; + T9 = W[1]; + Ta = T1 + T2; + T3 = T1 - T2; + Tc = T6 + T5; + T7 = T5 - T6; + T4 = W[0]; + } + { + E Td, T8, Te, Tb; + Td = T9 * T7; + T8 = T4 * T7; + Te = FMA(T4, Ta, Td); + Tb = FNMS(T9, Ta, T8); + Rp[0] = KP500000000 * (Tc + Te); + Rm[0] = KP500000000 * (Tc - Te); + Im[0] = KP500000000 * (Tb - T3); + Ip[0] = KP500000000 * (T3 + Tb); + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 2}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 2, "hc2cfdft_2", twinstr, &GENUS, {8, 6, 2, 0} }; + +void X(codelet_hc2cfdft_2) (planner *p) { + X(khc2c_register) (p, hc2cfdft_2, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -n 2 -dit -name hc2cfdft_2 -include hc2cf.h */ + +/* + * This function contains 10 FP additions, 8 FP multiplications, + * (or, 8 additions, 6 multiplications, 2 fused multiply/add), + * 10 stack variables, 1 constants, and 8 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_2(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 2); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 2, MAKE_VOLATILE_STRIDE(8, rs)) { + E T3, T9, T7, Tb; + { + E T1, T2, T5, T6; + T1 = Ip[0]; + T2 = Im[0]; + T3 = T1 - T2; + T9 = T1 + T2; + T5 = Rm[0]; + T6 = Rp[0]; + T7 = T5 - T6; + Tb = T6 + T5; + } + { + E Ta, Tc, T4, T8; + T4 = W[0]; + T8 = W[1]; + Ta = FNMS(T8, T9, T4 * T7); + Tc = FMA(T8, T7, T4 * T9); + Ip[0] = KP500000000 * (T3 + Ta); + Rp[0] = KP500000000 * (Tb + Tc); + Im[0] = KP500000000 * (Ta - T3); + Rm[0] = KP500000000 * (Tb - Tc); + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 2}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 2, "hc2cfdft_2", twinstr, &GENUS, {8, 6, 2, 0} }; + +void X(codelet_hc2cfdft_2) (planner *p) { + X(khc2c_register) (p, hc2cfdft_2, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1143 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:29 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 20 -dit -name hc2cfdft_20 -include hc2cf.h */ + +/* + * This function contains 286 FP additions, 188 FP multiplications, + * (or, 176 additions, 78 multiplications, 110 fused multiply/add), + * 174 stack variables, 5 constants, and 80 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 38, MAKE_VOLATILE_STRIDE(80, rs)) { + E T4X, T5i, T5k, T5e, T5c, T5d, T5j, T5f; + { + E T2E, T4W, T3v, T4k, T2M, T3w, T4V, T4j, T2p, T2T, T5a, T5A, T3D, T3o, T4b; + E T4B, T1Y, T2S, T5z, T57, T3h, T3C, T4A, T44, TH, T2P, T50, T5x, T3z, T32; + E T3P, T4D, T3V, T3U, T5w, T53, T2Q, T1o, T3A, T39; + { + E T1V, T9, T2w, Tu, T1, T6, T1R, T1U, T1T, T2Y, T5, T40, T2F, T10, T2C; + E TE, TX, T2m, T1y, T4g, TS, T33, TW, Tw, TB, T2y, T2B, TA, T3L, T2A; + E T3t, T1q, T1v, T2i, T2l, T2k, T3d, T1u, T48, Tm, Tr, T2s, T2v, T2u, T3J; + E Tq, T3r, T20, T1g, T23, T1l, T1h, T3S, T3k, T21, T2H, TL, T2K, TQ, TM; + E T35, T4h, T2I, T2f, T2g, T1I, T1D, T2c, T46, T2e, T3b, T1E, T28, T16, T29; + E T1b, T25, T3i, T27, T3Q, T17, T1O, T1P, Tj, T1M, Te, T1L, Tb, T3Y, TV; + E T1d, T1Z; + { + E T1S, T4, T7, T8; + T7 = Rp[WS(rs, 9)]; + T8 = Rm[WS(rs, 9)]; + { + E Ts, Tt, T2, T3; + Ts = Rp[WS(rs, 2)]; + Tt = Rm[WS(rs, 2)]; + T2 = Ip[WS(rs, 9)]; + T1V = T7 + T8; + T9 = T7 - T8; + T2w = Ts - Tt; + Tu = Ts + Tt; + T3 = Im[WS(rs, 9)]; + T1 = W[36]; + T6 = W[37]; + T1R = W[34]; + T1S = T2 - T3; + T4 = T2 + T3; + T1U = W[35]; + } + { + E TY, TZ, TC, TD; + TY = Ip[0]; + T1T = T1R * T1S; + T2Y = T6 * T4; + T5 = T1 * T4; + T40 = T1U * T1S; + TZ = Im[0]; + TC = Rp[WS(rs, 7)]; + TD = Rm[WS(rs, 7)]; + { + E T1w, T1x, TT, TU; + T1w = Rp[WS(rs, 1)]; + T2F = TY - TZ; + T10 = TY + TZ; + T2C = TC - TD; + TE = TC + TD; + T1x = Rm[WS(rs, 1)]; + TT = Rm[0]; + TU = Rp[0]; + TX = W[0]; + T2m = T1w + T1x; + T1y = T1w - T1x; + T4g = TU + TT; + TV = TT - TU; + TS = W[1]; + } + } + } + { + E T2j, T1t, T1r, T1s; + { + E Tx, Ty, T2z, Tz; + Tx = Ip[WS(rs, 7)]; + Ty = Im[WS(rs, 7)]; + T33 = TX * TV; + TW = TS * TV; + Tw = W[26]; + T2z = Tx + Ty; + Tz = Tx - Ty; + TB = W[27]; + T2y = W[28]; + T2B = W[29]; + TA = Tw * Tz; + T3L = TB * Tz; + T2A = T2y * T2z; + T3t = T2B * T2z; + } + T1r = Ip[WS(rs, 1)]; + T1s = Im[WS(rs, 1)]; + T1q = W[4]; + T1v = W[5]; + T2i = W[2]; + T2j = T1r - T1s; + T1t = T1r + T1s; + T2l = W[3]; + { + E T2t, Tp, Tn, To; + Tn = Ip[WS(rs, 2)]; + T2k = T2i * T2j; + T3d = T1v * T1t; + T1u = T1q * T1t; + T48 = T2l * T2j; + To = Im[WS(rs, 2)]; + Tm = W[6]; + Tr = W[7]; + T2s = W[8]; + T2t = Tn + To; + Tp = Tn - To; + T2v = W[9]; + { + E T1e, T1f, T1j, T1k; + T1e = Ip[WS(rs, 3)]; + T2u = T2s * T2t; + T3J = Tr * Tp; + Tq = Tm * Tp; + T3r = T2v * T2t; + T1f = Im[WS(rs, 3)]; + T1j = Rp[WS(rs, 3)]; + T1k = Rm[WS(rs, 3)]; + T1d = W[10]; + T20 = T1e + T1f; + T1g = T1e - T1f; + T23 = T1j - T1k; + T1l = T1j + T1k; + T1Z = W[12]; + T1h = T1d * T1g; + } + } + } + { + E T2d, T1A, TI, T2G, T26, T13; + { + E TJ, TK, TO, TP; + TJ = Ip[WS(rs, 5)]; + T3S = T1d * T1l; + T3k = T1Z * T23; + T21 = T1Z * T20; + TK = Im[WS(rs, 5)]; + TO = Rp[WS(rs, 5)]; + TP = Rm[WS(rs, 5)]; + TI = W[20]; + T2H = TJ - TK; + TL = TJ + TK; + T2K = TO + TP; + TQ = TO - TP; + T2G = W[18]; + TM = TI * TL; + } + { + E T1G, T1H, T1B, T1C; + T1G = Rm[WS(rs, 6)]; + T35 = TI * TQ; + T4h = T2G * T2K; + T2I = T2G * T2H; + T1H = Rp[WS(rs, 6)]; + T1B = Ip[WS(rs, 6)]; + T1C = Im[WS(rs, 6)]; + T2f = W[23]; + T2g = T1H + T1G; + T1I = T1G - T1H; + T2d = T1B - T1C; + T1D = T1B + T1C; + T2c = W[22]; + T1A = W[24]; + T46 = T2f * T2d; + } + { + E T14, T15, T19, T1a; + T14 = Ip[WS(rs, 8)]; + T2e = T2c * T2d; + T3b = T1A * T1I; + T1E = T1A * T1D; + T15 = Im[WS(rs, 8)]; + T19 = Rp[WS(rs, 8)]; + T1a = Rm[WS(rs, 8)]; + T28 = W[32]; + T16 = T14 - T15; + T29 = T14 + T15; + T1b = T19 + T1a; + T26 = T1a - T19; + T25 = W[33]; + T13 = W[30]; + T3i = T28 * T26; + } + { + E Th, Ti, Tc, Td; + Th = Rm[WS(rs, 4)]; + T27 = T25 * T26; + T3Q = T13 * T1b; + T17 = T13 * T16; + Ti = Rp[WS(rs, 4)]; + Tc = Ip[WS(rs, 4)]; + Td = Im[WS(rs, 4)]; + T1O = W[15]; + T1P = Ti + Th; + Tj = Th - Ti; + T1M = Tc - Td; + Te = Tc + Td; + T1L = W[14]; + Tb = W[16]; + T3Y = T1O * T1M; + } + } + { + E T1N, T2W, Tf, T2L, T4i; + { + E T2x, T2D, T3s, T3u, T2J; + T2x = FNMS(T2v, T2w, T2u); + T1N = T1L * T1M; + T2W = Tb * Tj; + Tf = Tb * Te; + T2D = FNMS(T2B, T2C, T2A); + T3s = FMA(T2s, T2w, T3r); + T3u = FMA(T2y, T2C, T3t); + T2J = W[19]; + T2E = T2x - T2D; + T4W = T2x + T2D; + T3v = T3s + T3u; + T4k = T3u - T3s; + T2L = FNMS(T2J, T2K, T2I); + T4i = FMA(T2J, T2H, T4h); + } + { + E T42, T43, T45, T4a, T3O, T3N; + { + E T2a, T3j, T47, T3l, T24, T2o, T3n, T49, T22, T2h, T2n; + T2a = FMA(T28, T29, T27); + T3j = FNMS(T25, T29, T3i); + T2M = T2F - T2L; + T3w = T2L + T2F; + T4V = T4g + T4i; + T4j = T4g - T4i; + T22 = W[13]; + T2h = FNMS(T2f, T2g, T2e); + T2n = FNMS(T2l, T2m, T2k); + T47 = FMA(T2c, T2g, T46); + T3l = FMA(T22, T20, T3k); + T24 = FNMS(T22, T23, T21); + T2o = T2h - T2n; + T3n = T2h + T2n; + T49 = FMA(T2i, T2m, T48); + { + E T2b, T58, T3m, T59; + T2b = T24 - T2a; + T58 = T2a + T24; + T3m = T3j - T3l; + T45 = T3j + T3l; + T4a = T47 - T49; + T59 = T47 + T49; + T2p = T2b - T2o; + T2T = T2b + T2o; + T5a = T58 + T59; + T5A = T59 - T58; + T3D = T3m + T3n; + T3o = T3m - T3n; + } + } + { + E T1z, T3e, T1Q, T3c, T1J, T1W, T3Z, T41, T1F; + T1z = FNMS(T1v, T1y, T1u); + T3e = FMA(T1q, T1y, T3d); + T1F = W[25]; + T4b = T45 + T4a; + T4B = T4a - T45; + T1Q = FNMS(T1O, T1P, T1N); + T3c = FNMS(T1F, T1D, T3b); + T1J = FMA(T1F, T1I, T1E); + T1W = FNMS(T1U, T1V, T1T); + T3Z = FMA(T1L, T1P, T3Y); + T41 = FMA(T1R, T1V, T40); + { + E T56, T3g, T55, T1K, T1X, T3f; + T56 = T1J + T1z; + T1K = T1z - T1J; + T3g = T1Q + T1W; + T1X = T1Q - T1W; + T55 = T3Z + T41; + T42 = T3Z - T41; + T1Y = T1K - T1X; + T2S = T1X + T1K; + T43 = T3c + T3e; + T3f = T3c - T3e; + T5z = T55 - T56; + T57 = T55 + T56; + T3h = T3f - T3g; + T3C = T3g + T3f; + } + } + { + E Ta, T2Z, T3K, T2X, Tk, TG, T31, T3M, Tg, Tv, TF; + Ta = FNMS(T6, T9, T5); + T4A = T42 - T43; + T44 = T42 + T43; + T2Z = FMA(T1, T9, T2Y); + Tg = W[17]; + Tv = FNMS(Tr, Tu, Tq); + TF = FNMS(TB, TE, TA); + T3K = FMA(Tm, Tu, T3J); + T2X = FNMS(Tg, Te, T2W); + Tk = FMA(Tg, Tj, Tf); + TG = Tv - TF; + T31 = Tv + TF; + T3M = FMA(Tw, TE, T3L); + { + E Tl, T4Z, T30, T4Y; + Tl = Ta - Tk; + T4Z = Tk + Ta; + T30 = T2X - T2Z; + T3O = T2X + T2Z; + T3N = T3K - T3M; + T4Y = T3K + T3M; + TH = Tl - TG; + T2P = TG + Tl; + T50 = T4Y + T4Z; + T5x = T4Y - T4Z; + T3z = T31 + T30; + T32 = T30 - T31; + } + } + { + E T11, T34, T36, TR, T1i, T3R, T1c, TN, T18; + T11 = FMA(TX, T10, TW); + T34 = FNMS(TS, T10, T33); + TN = W[21]; + T3P = T3N + T3O; + T4D = T3N - T3O; + T18 = W[31]; + T36 = FMA(TN, TL, T35); + TR = FNMS(TN, TQ, TM); + T1i = W[11]; + T3R = FMA(T18, T16, T3Q); + T1c = FNMS(T18, T1b, T17); + { + E T52, T12, T3T, T1m; + T52 = TR + T11; + T12 = TR - T11; + T3T = FMA(T1i, T1g, T3S); + T1m = FNMS(T1i, T1l, T1h); + { + E T37, T51, T38, T1n; + T3V = T36 + T34; + T37 = T34 - T36; + T51 = T3R + T3T; + T3U = T3R - T3T; + T38 = T1c + T1m; + T1n = T1c - T1m; + T5w = T51 - T52; + T53 = T51 + T52; + T2Q = T1n + T12; + T1o = T12 - T1n; + T3A = T38 + T37; + T39 = T37 - T38; + } + } + } + } + } + } + { + E T4l, T4m, T4n, T4w, T4u; + { + E T4L, T2O, T3W, T4K, T4I, T4G, T4S, T4U, T4J, T4z, T4H; + { + E T4C, T2N, T4R, T1p, T4E, T2q, T4Q; + T4L = T4A + T4B; + T4C = T4A - T4B; + T2N = T2E + T2M; + T2O = T2M - T2E; + T4R = T1o - TH; + T1p = TH + T1o; + T4E = T3U - T3V; + T3W = T3U + T3V; + T2q = T1Y + T2p; + T4Q = T2p - T1Y; + { + E T4y, T4x, T4F, T2r; + T4F = T4D - T4E; + T4K = T4D + T4E; + T4y = T1p - T2q; + T2r = T1p + T2q; + T4I = FMA(KP618033988, T4C, T4F); + T4G = FNMS(KP618033988, T4F, T4C); + T4S = FNMS(KP618033988, T4R, T4Q); + T4U = FMA(KP618033988, T4Q, T4R); + Im[WS(rs, 4)] = KP500000000 * (T2r - T2N); + T4x = FMA(KP250000000, T2r, T2N); + T4J = T4j - T4k; + T4l = T4j + T4k; + T4z = FMA(KP559016994, T4y, T4x); + T4H = FNMS(KP559016994, T4y, T4x); + } + } + { + E T2R, T4s, T4d, T4f, T4t, T2U, T4P, T4T; + { + E T3X, T4O, T4M, T4c, T4N; + T4m = T3P + T3W; + T3X = T3P - T3W; + Ip[WS(rs, 7)] = KP500000000 * (FMA(KP951056516, T4G, T4z)); + Ip[WS(rs, 3)] = KP500000000 * (FNMS(KP951056516, T4G, T4z)); + Im[WS(rs, 8)] = -(KP500000000 * (FNMS(KP951056516, T4I, T4H))); + Im[0] = -(KP500000000 * (FMA(KP951056516, T4I, T4H))); + T4O = T4K - T4L; + T4M = T4K + T4L; + T4c = T44 - T4b; + T4n = T44 + T4b; + T2R = T2P + T2Q; + T4s = T2P - T2Q; + Rm[WS(rs, 4)] = KP500000000 * (T4J + T4M); + T4N = FNMS(KP250000000, T4M, T4J); + T4d = FMA(KP618033988, T4c, T3X); + T4f = FNMS(KP618033988, T3X, T4c); + T4t = T2S - T2T; + T2U = T2S + T2T; + T4P = FNMS(KP559016994, T4O, T4N); + T4T = FMA(KP559016994, T4O, T4N); + } + { + E T3H, T3G, T2V, T3I, T4e; + T2V = T2R + T2U; + T3H = T2R - T2U; + Rp[WS(rs, 7)] = KP500000000 * (FNMS(KP951056516, T4S, T4P)); + Rp[WS(rs, 3)] = KP500000000 * (FMA(KP951056516, T4S, T4P)); + Rm[0] = KP500000000 * (FNMS(KP951056516, T4U, T4T)); + Rm[WS(rs, 8)] = KP500000000 * (FMA(KP951056516, T4U, T4T)); + Ip[WS(rs, 5)] = KP500000000 * (T2O + T2V); + T3G = FNMS(KP250000000, T2V, T2O); + T3I = FMA(KP559016994, T3H, T3G); + T4e = FNMS(KP559016994, T3H, T3G); + T4w = FNMS(KP618033988, T4s, T4t); + T4u = FMA(KP618033988, T4t, T4s); + Ip[WS(rs, 9)] = KP500000000 * (FMA(KP951056516, T4d, T3I)); + Ip[WS(rs, 1)] = KP500000000 * (FNMS(KP951056516, T4d, T3I)); + Im[WS(rs, 6)] = -(KP500000000 * (FNMS(KP951056516, T4f, T4e))); + Im[WS(rs, 2)] = -(KP500000000 * (FMA(KP951056516, T4f, T4e))); + } + } + } + { + E T3y, T5O, T5Q, T5F, T5K, T5I; + { + E T5G, T5H, T3x, T4q, T5E, T5C, T3a, T5N, T4p, T5M, T3p, T5y, T5B, T4o; + T5G = T5x + T5w; + T5y = T5w - T5x; + T5B = T5z - T5A; + T5H = T5z + T5A; + T3y = T3w - T3v; + T3x = T3v + T3w; + T4q = T4m - T4n; + T4o = T4m + T4n; + T5E = FMA(KP618033988, T5y, T5B); + T5C = FNMS(KP618033988, T5B, T5y); + T3a = T32 + T39; + T5N = T39 - T32; + Rp[WS(rs, 5)] = KP500000000 * (T4l + T4o); + T4p = FNMS(KP250000000, T4o, T4l); + T5M = T3o - T3h; + T3p = T3h + T3o; + { + E T5u, T5t, T4r, T4v, T3q, T5D, T5v; + T4r = FMA(KP559016994, T4q, T4p); + T4v = FNMS(KP559016994, T4q, T4p); + T5u = T3p - T3a; + T3q = T3a + T3p; + Rp[WS(rs, 9)] = KP500000000 * (FNMS(KP951056516, T4u, T4r)); + Rp[WS(rs, 1)] = KP500000000 * (FMA(KP951056516, T4u, T4r)); + Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP951056516, T4w, T4v)); + Rm[WS(rs, 6)] = KP500000000 * (FMA(KP951056516, T4w, T4v)); + Im[WS(rs, 9)] = KP500000000 * (T3q - T3x); + T5t = FMA(KP250000000, T3q, T3x); + T5O = FNMS(KP618033988, T5N, T5M); + T5Q = FMA(KP618033988, T5M, T5N); + T5F = T4V - T4W; + T4X = T4V + T4W; + T5D = FNMS(KP559016994, T5u, T5t); + T5v = FMA(KP559016994, T5u, T5t); + Im[WS(rs, 5)] = -(KP500000000 * (FNMS(KP951056516, T5C, T5v))); + Ip[WS(rs, 6)] = KP500000000 * (FMA(KP951056516, T5C, T5v)); + Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP951056516, T5E, T5D))); + Ip[WS(rs, 2)] = KP500000000 * (FMA(KP951056516, T5E, T5D)); + T5K = T5G - T5H; + T5I = T5G + T5H; + } + } + { + E T54, T5b, T5s, T5q, T5g, T5h, T3F, T5m, T5o, T5p, T5J, T5l, T5r, T5n; + T54 = T50 + T53; + T5o = T50 - T53; + T5p = T5a - T57; + T5b = T57 + T5a; + Rm[WS(rs, 9)] = KP500000000 * (T5F + T5I); + T5J = FNMS(KP250000000, T5I, T5F); + T5s = FMA(KP618033988, T5o, T5p); + T5q = FNMS(KP618033988, T5p, T5o); + { + E T5L, T5P, T3B, T3E; + T5L = FNMS(KP559016994, T5K, T5J); + T5P = FMA(KP559016994, T5K, T5J); + T3B = T3z + T3A; + T5g = T3z - T3A; + T5h = T3C - T3D; + T3E = T3C + T3D; + Rm[WS(rs, 1)] = KP500000000 * (FMA(KP951056516, T5O, T5L)); + Rp[WS(rs, 2)] = KP500000000 * (FNMS(KP951056516, T5O, T5L)); + Rm[WS(rs, 5)] = KP500000000 * (FNMS(KP951056516, T5Q, T5P)); + Rp[WS(rs, 6)] = KP500000000 * (FMA(KP951056516, T5Q, T5P)); + T3F = T3B + T3E; + T5m = T3B - T3E; + } + Ip[0] = KP500000000 * (T3y + T3F); + T5l = FNMS(KP250000000, T3F, T3y); + T5i = FMA(KP618033988, T5h, T5g); + T5k = FNMS(KP618033988, T5g, T5h); + T5r = FNMS(KP559016994, T5m, T5l); + T5n = FMA(KP559016994, T5m, T5l); + Im[WS(rs, 3)] = -(KP500000000 * (FNMS(KP951056516, T5q, T5n))); + Ip[WS(rs, 4)] = KP500000000 * (FMA(KP951056516, T5q, T5n)); + Im[WS(rs, 7)] = -(KP500000000 * (FNMS(KP951056516, T5s, T5r))); + Ip[WS(rs, 8)] = KP500000000 * (FMA(KP951056516, T5s, T5r)); + T5e = T54 - T5b; + T5c = T54 + T5b; + } + } + } + } + Rp[0] = KP500000000 * (T4X + T5c); + T5d = FNMS(KP250000000, T5c, T4X); + T5j = FNMS(KP559016994, T5e, T5d); + T5f = FMA(KP559016994, T5e, T5d); + Rm[WS(rs, 3)] = KP500000000 * (FMA(KP951056516, T5i, T5f)); + Rp[WS(rs, 4)] = KP500000000 * (FNMS(KP951056516, T5i, T5f)); + Rm[WS(rs, 7)] = KP500000000 * (FNMS(KP951056516, T5k, T5j)); + Rp[WS(rs, 8)] = KP500000000 * (FMA(KP951056516, T5k, T5j)); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 20}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 20, "hc2cfdft_20", twinstr, &GENUS, {176, 78, 110, 0} }; + +void X(codelet_hc2cfdft_20) (planner *p) { + X(khc2c_register) (p, hc2cfdft_20, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -n 20 -dit -name hc2cfdft_20 -include hc2cf.h */ + +/* + * This function contains 286 FP additions, 140 FP multiplications, + * (or, 224 additions, 78 multiplications, 62 fused multiply/add), + * 98 stack variables, 5 constants, and 80 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP125000000, +0.125000000000000000000000000000000000000000000); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP279508497, +0.279508497187473712051146708591409529430077295); + DK(KP293892626, +0.293892626146236564584352977319536384298826219); + DK(KP475528258, +0.475528258147576786058219666689691071702849317); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 38, MAKE_VOLATILE_STRIDE(80, rs)) { + E T12, T2w, T4o, T4V, T2H, T3a, T4y, T4Y, T1z, T2v, T25, T2y, T2s, T2z, T4v; + E T4X, T4r, T4U, T3A, T3Z, T2X, T37, T3k, T41, T2M, T39, T3v, T3Y, T2S, T36; + E T3p, T42, Td, T4G, T33, T3N, Tw, T4H, T32, T3O; + { + E T3, T3L, T1x, T2V, Th, Tl, TC, T3g, Tq, Tu, TH, T3h, T7, Tb, T1q; + E T2U, TR, T2P, T1F, T3r, T23, T2K, T2f, T3y, T1k, T3m, T2q, T2E, T10, T2Q; + E T1K, T3s, T1U, T2J, T2a, T3x, T1b, T3l, T2l, T2D; + { + E T1, T2, T1s, T1u, T1v, T1w, T1r, T1t; + T1 = Ip[0]; + T2 = Im[0]; + T1s = T1 + T2; + T1u = Rp[0]; + T1v = Rm[0]; + T1w = T1u - T1v; + T3 = T1 - T2; + T3L = T1u + T1v; + T1r = W[0]; + T1t = W[1]; + T1x = FNMS(T1t, T1w, T1r * T1s); + T2V = FMA(T1r, T1w, T1t * T1s); + } + { + E Tf, Tg, Tz, Tj, Tk, TB, Ty, TA; + Tf = Ip[WS(rs, 2)]; + Tg = Im[WS(rs, 2)]; + Tz = Tf - Tg; + Tj = Rp[WS(rs, 2)]; + Tk = Rm[WS(rs, 2)]; + TB = Tj + Tk; + Th = Tf + Tg; + Tl = Tj - Tk; + Ty = W[6]; + TA = W[7]; + TC = FNMS(TA, TB, Ty * Tz); + T3g = FMA(TA, Tz, Ty * TB); + } + { + E To, Tp, TE, Ts, Tt, TG, TD, TF; + To = Ip[WS(rs, 7)]; + Tp = Im[WS(rs, 7)]; + TE = To - Tp; + Ts = Rp[WS(rs, 7)]; + Tt = Rm[WS(rs, 7)]; + TG = Ts + Tt; + Tq = To + Tp; + Tu = Ts - Tt; + TD = W[26]; + TF = W[27]; + TH = FNMS(TF, TG, TD * TE); + T3h = FMA(TF, TE, TD * TG); + } + { + E T5, T6, T1n, T9, Ta, T1p, T1m, T1o; + T5 = Ip[WS(rs, 5)]; + T6 = Im[WS(rs, 5)]; + T1n = T5 + T6; + T9 = Rp[WS(rs, 5)]; + Ta = Rm[WS(rs, 5)]; + T1p = T9 - Ta; + T7 = T5 - T6; + Tb = T9 + Ta; + T1m = W[20]; + T1o = W[21]; + T1q = FNMS(T1o, T1p, T1m * T1n); + T2U = FMA(T1m, T1p, T1o * T1n); + } + { + E TM, T1C, TQ, T1E; + { + E TK, TL, TO, TP; + TK = Ip[WS(rs, 4)]; + TL = Im[WS(rs, 4)]; + TM = TK + TL; + T1C = TK - TL; + TO = Rp[WS(rs, 4)]; + TP = Rm[WS(rs, 4)]; + TQ = TO - TP; + T1E = TO + TP; + } + { + E TJ, TN, T1B, T1D; + TJ = W[16]; + TN = W[17]; + TR = FNMS(TN, TQ, TJ * TM); + T2P = FMA(TN, TM, TJ * TQ); + T1B = W[14]; + T1D = W[15]; + T1F = FNMS(T1D, T1E, T1B * T1C); + T3r = FMA(T1D, T1C, T1B * T1E); + } + } + { + E T1Y, T2c, T22, T2e; + { + E T1W, T1X, T20, T21; + T1W = Ip[WS(rs, 1)]; + T1X = Im[WS(rs, 1)]; + T1Y = T1W + T1X; + T2c = T1W - T1X; + T20 = Rp[WS(rs, 1)]; + T21 = Rm[WS(rs, 1)]; + T22 = T20 - T21; + T2e = T20 + T21; + } + { + E T1V, T1Z, T2b, T2d; + T1V = W[4]; + T1Z = W[5]; + T23 = FNMS(T1Z, T22, T1V * T1Y); + T2K = FMA(T1Z, T1Y, T1V * T22); + T2b = W[2]; + T2d = W[3]; + T2f = FNMS(T2d, T2e, T2b * T2c); + T3y = FMA(T2d, T2c, T2b * T2e); + } + } + { + E T1f, T2n, T1j, T2p; + { + E T1d, T1e, T1h, T1i; + T1d = Ip[WS(rs, 3)]; + T1e = Im[WS(rs, 3)]; + T1f = T1d - T1e; + T2n = T1d + T1e; + T1h = Rp[WS(rs, 3)]; + T1i = Rm[WS(rs, 3)]; + T1j = T1h + T1i; + T2p = T1h - T1i; + } + { + E T1c, T1g, T2m, T2o; + T1c = W[10]; + T1g = W[11]; + T1k = FNMS(T1g, T1j, T1c * T1f); + T3m = FMA(T1c, T1j, T1g * T1f); + T2m = W[12]; + T2o = W[13]; + T2q = FNMS(T2o, T2p, T2m * T2n); + T2E = FMA(T2m, T2p, T2o * T2n); + } + } + { + E TV, T1H, TZ, T1J; + { + E TT, TU, TX, TY; + TT = Ip[WS(rs, 9)]; + TU = Im[WS(rs, 9)]; + TV = TT + TU; + T1H = TT - TU; + TX = Rp[WS(rs, 9)]; + TY = Rm[WS(rs, 9)]; + TZ = TX - TY; + T1J = TX + TY; + } + { + E TS, TW, T1G, T1I; + TS = W[36]; + TW = W[37]; + T10 = FNMS(TW, TZ, TS * TV); + T2Q = FMA(TW, TV, TS * TZ); + T1G = W[34]; + T1I = W[35]; + T1K = FNMS(T1I, T1J, T1G * T1H); + T3s = FMA(T1I, T1H, T1G * T1J); + } + } + { + E T1P, T27, T1T, T29; + { + E T1N, T1O, T1R, T1S; + T1N = Ip[WS(rs, 6)]; + T1O = Im[WS(rs, 6)]; + T1P = T1N + T1O; + T27 = T1N - T1O; + T1R = Rp[WS(rs, 6)]; + T1S = Rm[WS(rs, 6)]; + T1T = T1R - T1S; + T29 = T1R + T1S; + } + { + E T1M, T1Q, T26, T28; + T1M = W[24]; + T1Q = W[25]; + T1U = FNMS(T1Q, T1T, T1M * T1P); + T2J = FMA(T1Q, T1P, T1M * T1T); + T26 = W[22]; + T28 = W[23]; + T2a = FNMS(T28, T29, T26 * T27); + T3x = FMA(T28, T27, T26 * T29); + } + } + { + E T16, T2k, T1a, T2i; + { + E T14, T15, T18, T19; + T14 = Ip[WS(rs, 8)]; + T15 = Im[WS(rs, 8)]; + T16 = T14 - T15; + T2k = T14 + T15; + T18 = Rp[WS(rs, 8)]; + T19 = Rm[WS(rs, 8)]; + T1a = T18 + T19; + T2i = T19 - T18; + } + { + E T13, T17, T2h, T2j; + T13 = W[30]; + T17 = W[31]; + T1b = FNMS(T17, T1a, T13 * T16); + T3l = FMA(T13, T1a, T17 * T16); + T2h = W[33]; + T2j = W[32]; + T2l = FMA(T2h, T2i, T2j * T2k); + T2D = FNMS(T2h, T2k, T2j * T2i); + } + } + { + E T2g, T2r, T3n, T3o; + { + E TI, T11, T4m, T4n; + TI = TC - TH; + T11 = TR - T10; + T12 = TI - T11; + T2w = TI + T11; + T4m = T3g + T3h; + T4n = TR + T10; + T4o = T4m + T4n; + T4V = T4m - T4n; + } + { + E T2F, T2G, T4w, T4x; + T2F = T2D - T2E; + T2G = T2a + T2f; + T2H = T2F - T2G; + T3a = T2F + T2G; + T4w = T2l + T2q; + T4x = T3x + T3y; + T4y = T4w + T4x; + T4Y = T4x - T4w; + } + { + E T1l, T1y, T1L, T24; + T1l = T1b - T1k; + T1y = T1q - T1x; + T1z = T1l + T1y; + T2v = T1y - T1l; + T1L = T1F - T1K; + T24 = T1U - T23; + T25 = T1L - T24; + T2y = T1L + T24; + } + T2g = T2a - T2f; + T2r = T2l - T2q; + T2s = T2g - T2r; + T2z = T2r + T2g; + { + E T4t, T4u, T4p, T4q; + T4t = T3r + T3s; + T4u = T1U + T23; + T4v = T4t + T4u; + T4X = T4t - T4u; + T4p = T3l + T3m; + T4q = T1q + T1x; + T4r = T4p + T4q; + T4U = T4p - T4q; + } + { + E T3w, T3z, T2T, T2W; + T3w = T2D + T2E; + T3z = T3x - T3y; + T3A = T3w + T3z; + T3Z = T3z - T3w; + T2T = T1b + T1k; + T2W = T2U + T2V; + T2X = T2T + T2W; + T37 = T2T - T2W; + } + { + E T3i, T3j, T2I, T2L; + T3i = T3g - T3h; + T3j = T2Q - T2P; + T3k = T3i + T3j; + T41 = T3i - T3j; + T2I = T1F + T1K; + T2L = T2J + T2K; + T2M = T2I + T2L; + T39 = T2I - T2L; + } + { + E T3t, T3u, T2O, T2R; + T3t = T3r - T3s; + T3u = T2K - T2J; + T3v = T3t + T3u; + T3Y = T3t - T3u; + T2O = TC + TH; + T2R = T2P + T2Q; + T2S = T2O + T2R; + T36 = T2O - T2R; + } + T3n = T3l - T3m; + T3o = T2U - T2V; + T3p = T3n + T3o; + T42 = T3n - T3o; + { + E Tc, T3M, T4, T8; + T4 = W[18]; + T8 = W[19]; + Tc = FNMS(T8, Tb, T4 * T7); + T3M = FMA(T4, Tb, T8 * T7); + Td = T3 - Tc; + T4G = T3L + T3M; + T33 = Tc + T3; + T3N = T3L - T3M; + } + { + E Tm, T30, Tv, T31; + { + E Te, Ti, Tn, Tr; + Te = W[8]; + Ti = W[9]; + Tm = FNMS(Ti, Tl, Te * Th); + T30 = FMA(Ti, Th, Te * Tl); + Tn = W[28]; + Tr = W[29]; + Tv = FNMS(Tr, Tu, Tn * Tq); + T31 = FMA(Tr, Tq, Tn * Tu); + } + Tw = Tm - Tv; + T4H = Tm + Tv; + T32 = T30 + T31; + T3O = T31 - T30; + } + } + } + { + E T3C, T3E, Tx, T2u, T3d, T3e, T3D, T3f; + { + E T3q, T3B, T1A, T2t; + T3q = T3k - T3p; + T3B = T3v - T3A; + T3C = FMA(KP475528258, T3q, KP293892626 * T3B); + T3E = FNMS(KP293892626, T3q, KP475528258 * T3B); + Tx = Td - Tw; + T1A = T12 + T1z; + T2t = T25 + T2s; + T2u = T1A + T2t; + T3d = KP279508497 * (T1A - T2t); + T3e = FNMS(KP125000000, T2u, KP500000000 * Tx); + } + Ip[WS(rs, 5)] = KP500000000 * (Tx + T2u); + T3D = T3d - T3e; + Im[WS(rs, 2)] = T3D - T3E; + Im[WS(rs, 6)] = T3D + T3E; + T3f = T3d + T3e; + Ip[WS(rs, 1)] = T3f - T3C; + Ip[WS(rs, 9)] = T3f + T3C; + } + { + E T3H, T3T, T3P, T3Q, T3K, T3R, T3U, T3S; + { + E T3F, T3G, T3I, T3J; + T3F = T12 - T1z; + T3G = T25 - T2s; + T3H = FMA(KP475528258, T3F, KP293892626 * T3G); + T3T = FNMS(KP293892626, T3F, KP475528258 * T3G); + T3P = T3N + T3O; + T3I = T3k + T3p; + T3J = T3v + T3A; + T3Q = T3I + T3J; + T3K = KP279508497 * (T3I - T3J); + T3R = FNMS(KP125000000, T3Q, KP500000000 * T3P); + } + Rp[WS(rs, 5)] = KP500000000 * (T3P + T3Q); + T3U = T3R - T3K; + Rm[WS(rs, 6)] = T3T + T3U; + Rm[WS(rs, 2)] = T3U - T3T; + T3S = T3K + T3R; + Rp[WS(rs, 1)] = T3H + T3S; + Rp[WS(rs, 9)] = T3S - T3H; + } + { + E T44, T46, T2C, T2B, T3V, T3W, T45, T3X; + { + E T40, T43, T2x, T2A; + T40 = T3Y - T3Z; + T43 = T41 - T42; + T44 = FNMS(KP293892626, T43, KP475528258 * T40); + T46 = FMA(KP475528258, T43, KP293892626 * T40); + T2C = Tw + Td; + T2x = T2v - T2w; + T2A = T2y + T2z; + T2B = T2x - T2A; + T3V = FMA(KP500000000, T2C, KP125000000 * T2B); + T3W = KP279508497 * (T2x + T2A); + } + Im[WS(rs, 4)] = KP500000000 * (T2B - T2C); + T45 = T3W - T3V; + Im[0] = T45 - T46; + Im[WS(rs, 8)] = T45 + T46; + T3X = T3V + T3W; + Ip[WS(rs, 3)] = T3X - T44; + Ip[WS(rs, 7)] = T3X + T44; + } + { + E T49, T4h, T4a, T4d, T4e, T4f, T4i, T4g; + { + E T47, T48, T4b, T4c; + T47 = T2y - T2z; + T48 = T2w + T2v; + T49 = FNMS(KP293892626, T48, KP475528258 * T47); + T4h = FMA(KP475528258, T48, KP293892626 * T47); + T4a = T3N - T3O; + T4b = T41 + T42; + T4c = T3Y + T3Z; + T4d = T4b + T4c; + T4e = FNMS(KP125000000, T4d, KP500000000 * T4a); + T4f = KP279508497 * (T4b - T4c); + } + Rm[WS(rs, 4)] = KP500000000 * (T4a + T4d); + T4i = T4f + T4e; + Rm[WS(rs, 8)] = T4h + T4i; + Rm[0] = T4i - T4h; + T4g = T4e - T4f; + Rp[WS(rs, 3)] = T49 + T4g; + Rp[WS(rs, 7)] = T4g - T49; + } + { + E T50, T52, T34, T2Z, T4R, T4S, T51, T4T; + { + E T4W, T4Z, T2N, T2Y; + T4W = T4U - T4V; + T4Z = T4X - T4Y; + T50 = FNMS(KP293892626, T4Z, KP475528258 * T4W); + T52 = FMA(KP293892626, T4W, KP475528258 * T4Z); + T34 = T32 + T33; + T2N = T2H - T2M; + T2Y = T2S + T2X; + T2Z = T2N - T2Y; + T4R = FMA(KP500000000, T34, KP125000000 * T2Z); + T4S = KP279508497 * (T2Y + T2N); + } + Im[WS(rs, 9)] = KP500000000 * (T2Z - T34); + T51 = T4R - T4S; + Ip[WS(rs, 2)] = T51 + T52; + Im[WS(rs, 1)] = T52 - T51; + T4T = T4R + T4S; + Ip[WS(rs, 6)] = T4T + T50; + Im[WS(rs, 5)] = T50 - T4T; + } + { + E T5c, T5d, T53, T56, T57, T58, T5e, T59; + { + E T5a, T5b, T54, T55; + T5a = T2M + T2H; + T5b = T2S - T2X; + T5c = FNMS(KP293892626, T5b, KP475528258 * T5a); + T5d = FMA(KP475528258, T5b, KP293892626 * T5a); + T53 = T4G - T4H; + T54 = T4V + T4U; + T55 = T4X + T4Y; + T56 = T54 + T55; + T57 = FNMS(KP125000000, T56, KP500000000 * T53); + T58 = KP279508497 * (T54 - T55); + } + Rm[WS(rs, 9)] = KP500000000 * (T53 + T56); + T5e = T58 + T57; + Rp[WS(rs, 6)] = T5d + T5e; + Rm[WS(rs, 5)] = T5e - T5d; + T59 = T57 - T58; + Rp[WS(rs, 2)] = T59 - T5c; + Rm[WS(rs, 1)] = T5c + T59; + } + { + E T4A, T4C, T35, T3c, T4j, T4k, T4B, T4l; + { + E T4s, T4z, T38, T3b; + T4s = T4o - T4r; + T4z = T4v - T4y; + T4A = FNMS(KP475528258, T4z, KP293892626 * T4s); + T4C = FMA(KP475528258, T4s, KP293892626 * T4z); + T35 = T33 - T32; + T38 = T36 + T37; + T3b = T39 + T3a; + T3c = T38 + T3b; + T4j = FNMS(KP125000000, T3c, KP500000000 * T35); + T4k = KP279508497 * (T38 - T3b); + } + Ip[0] = KP500000000 * (T35 + T3c); + T4B = T4k + T4j; + Ip[WS(rs, 4)] = T4B + T4C; + Im[WS(rs, 3)] = T4C - T4B; + T4l = T4j - T4k; + Ip[WS(rs, 8)] = T4l + T4A; + Im[WS(rs, 7)] = T4A - T4l; + } + { + E T4O, T4P, T4I, T4J, T4F, T4K, T4Q, T4L; + { + E T4M, T4N, T4D, T4E; + T4M = T36 - T37; + T4N = T39 - T3a; + T4O = FMA(KP475528258, T4M, KP293892626 * T4N); + T4P = FNMS(KP293892626, T4M, KP475528258 * T4N); + T4I = T4G + T4H; + T4D = T4o + T4r; + T4E = T4v + T4y; + T4J = T4D + T4E; + T4F = KP279508497 * (T4D - T4E); + T4K = FNMS(KP125000000, T4J, KP500000000 * T4I); + } + Rp[0] = KP500000000 * (T4I + T4J); + T4Q = T4K - T4F; + Rp[WS(rs, 8)] = T4P + T4Q; + Rm[WS(rs, 7)] = T4Q - T4P; + T4L = T4F + T4K; + Rp[WS(rs, 4)] = T4L - T4O; + Rm[WS(rs, 3)] = T4O + T4L; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 20}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 20, "hc2cfdft_20", twinstr, &GENUS, {224, 78, 62, 0} }; + +void X(codelet_hc2cfdft_20) (planner *p) { + X(khc2c_register) (p, hc2cfdft_20, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1943 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:28 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hc2cfdft_32 -include hc2cf.h */ + +/* + * This function contains 498 FP additions, 324 FP multiplications, + * (or, 300 additions, 126 multiplications, 198 fused multiply/add), + * 172 stack variables, 8 constants, and 128 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) { + E T9X, Ta0; + { + E T3B, T89, T61, T8l, T2F, T7p, T8t, T4B, T7I, T5e, T7L, T1n, T7R, T5E, T82; + E T4u, T3m, T8k, T5W, T8a, T2r, T8u, T4G, T7q, T59, T7K, T7H, T12, T5z, T81; + E T7Q, T4h, T4Y, T7D, T7A, Tl, T5o, T3Q, T84, T7V, T2V, T4M, T7t, T7s, T1K; + E T5L, T8e, T8n, T38, T7v, T4R, T7w, T25, T5Q, T8h, T8o, T3V, T3S, T5p, T3T; + E T41, Tz, T3Y, TE, TA, T51, T5r, T3Z, Tv, T50, TB, T3U, T40; + { + E T49, T46, T5v, T47, T4f, TV, T4c, T10, TW, T57, T5x, T4d, TR, T56, TX; + E T48, T4e; + { + E T4m, T4j, T5A, T4k, T4s, T1g, T4p, T1l, T1h, T5c, T5C, T4q, T1c, T5b, T1i; + E T4l, T4r; + { + E T2E, T4y, T2B, T4A; + { + E T3y, T3z, T3t, T5Z, T3x, T2v, T3r, T3q, T3n, T2A, T3o, T2s; + { + E T2C, T2D, T3w, T3u, T3v; + T2C = Ip[0]; + T2D = Im[0]; + T3u = Rm[0]; + T3v = Rp[0]; + T3y = W[1]; + T3z = T2C + T2D; + T2E = T2C - T2D; + T4y = T3v + T3u; + T3w = T3u - T3v; + T3t = W[0]; + { + E T2y, T2z, T2t, T2u; + T2t = Ip[WS(rs, 8)]; + T2u = Im[WS(rs, 8)]; + T5Z = T3y * T3w; + T3x = T3t * T3w; + T2y = Rp[WS(rs, 8)]; + T2v = T2t - T2u; + T3r = T2t + T2u; + T2z = Rm[WS(rs, 8)]; + T3q = W[33]; + T3n = W[32]; + T2A = T2y + T2z; + T3o = T2z - T2y; + T2s = W[30]; + } + } + { + E T3A, T5X, T4z, T2w, T3s, T3p, T5Y, T60, T2x; + T3A = FNMS(T3y, T3z, T3x); + T3p = T3n * T3o; + T5X = T3q * T3o; + T4z = T2s * T2A; + T2w = T2s * T2v; + T3s = FNMS(T3q, T3r, T3p); + T5Y = FMA(T3n, T3r, T5X); + T60 = FMA(T3t, T3z, T5Z); + T2x = W[31]; + T3B = T3s + T3A; + T89 = T3A - T3s; + T61 = T5Y + T60; + T8l = T60 - T5Y; + T2B = FNMS(T2x, T2A, T2w); + T4A = FMA(T2x, T2v, T4z); + } + } + { + E T16, T1b, T17, T5a, T1d, T4o, T18; + { + E T19, T1a, T13, T4i, T14, T15; + T14 = Ip[WS(rs, 3)]; + T15 = Im[WS(rs, 3)]; + T2F = T2B + T2E; + T7p = T2E - T2B; + T8t = T4y - T4A; + T4B = T4y + T4A; + T4m = T14 + T15; + T16 = T14 - T15; + T19 = Rp[WS(rs, 3)]; + T1a = Rm[WS(rs, 3)]; + T13 = W[10]; + T4i = W[12]; + { + E T1e, T1f, T1j, T1k; + T1e = Ip[WS(rs, 11)]; + T4j = T19 - T1a; + T1b = T19 + T1a; + T17 = T13 * T16; + T5A = T4i * T4m; + T4k = T4i * T4j; + T5a = T13 * T1b; + T1f = Im[WS(rs, 11)]; + T1j = Rp[WS(rs, 11)]; + T1k = Rm[WS(rs, 11)]; + T1d = W[42]; + T4s = T1e + T1f; + T1g = T1e - T1f; + T4p = T1j - T1k; + T1l = T1j + T1k; + T4o = W[44]; + T1h = T1d * T1g; + } + } + T18 = W[11]; + T5c = T1d * T1l; + T5C = T4o * T4s; + T4q = T4o * T4p; + T1c = FNMS(T18, T1b, T17); + T5b = FMA(T18, T16, T5a); + T1i = W[43]; + T4l = W[13]; + T4r = W[45]; + } + } + { + E T4D, T2g, T2q, T4F; + { + E T3d, T3e, T2a, T2f, T3a, T5S, T3c, T4C, T2b, T3j, T2k, T3k, T2p, T3h, T3g; + E T2h, T5U, T3b, T27; + { + E T28, T29, T2d, T2e, T5d, T1m; + T28 = Ip[WS(rs, 4)]; + T5d = FMA(T1i, T1g, T5c); + T1m = FNMS(T1i, T1l, T1h); + { + E T5B, T4n, T5D, T4t; + T5B = FNMS(T4l, T4j, T5A); + T4n = FMA(T4l, T4m, T4k); + T5D = FNMS(T4r, T4p, T5C); + T4t = FMA(T4r, T4s, T4q); + T7I = T5b - T5d; + T5e = T5b + T5d; + T7L = T1c - T1m; + T1n = T1c + T1m; + T7R = T5D - T5B; + T5E = T5B + T5D; + T82 = T4t - T4n; + T4u = T4n + T4t; + T29 = Im[WS(rs, 4)]; + } + T2d = Rp[WS(rs, 4)]; + T2e = Rm[WS(rs, 4)]; + T3d = W[17]; + T3e = T28 + T29; + T2a = T28 - T29; + T3b = T2e - T2d; + T2f = T2d + T2e; + T3a = W[16]; + T27 = W[14]; + T5S = T3d * T3b; + } + { + E T2i, T2j, T2n, T2o; + T2i = Ip[WS(rs, 12)]; + T3c = T3a * T3b; + T4C = T27 * T2f; + T2b = T27 * T2a; + T2j = Im[WS(rs, 12)]; + T2n = Rp[WS(rs, 12)]; + T2o = Rm[WS(rs, 12)]; + T3j = W[49]; + T2k = T2i - T2j; + T3k = T2i + T2j; + T2p = T2n + T2o; + T3h = T2o - T2n; + T3g = W[48]; + T2h = W[46]; + T5U = T3j * T3h; + } + { + E T3f, T3i, T4E, T2l; + T3f = FNMS(T3d, T3e, T3c); + T3i = T3g * T3h; + T4E = T2h * T2p; + T2l = T2h * T2k; + { + E T5T, T3l, T5V, T2c, T2m; + T5T = FMA(T3a, T3e, T5S); + T3l = FNMS(T3j, T3k, T3i); + T5V = FMA(T3g, T3k, T5U); + T2c = W[15]; + T2m = W[47]; + T3m = T3f + T3l; + T8k = T3f - T3l; + T5W = T5T + T5V; + T8a = T5T - T5V; + T4D = FMA(T2c, T2a, T4C); + T2g = FNMS(T2c, T2f, T2b); + T2q = FNMS(T2m, T2p, T2l); + T4F = FMA(T2m, T2k, T4E); + } + } + } + { + E TL, TQ, TM, T55, TS, T4b, TN; + { + E TO, TP, TI, T45, TJ, TK; + TJ = Ip[WS(rs, 15)]; + TK = Im[WS(rs, 15)]; + T2r = T2g + T2q; + T8u = T2g - T2q; + T4G = T4D + T4F; + T7q = T4D - T4F; + T49 = TJ + TK; + TL = TJ - TK; + TO = Rp[WS(rs, 15)]; + TP = Rm[WS(rs, 15)]; + TI = W[58]; + T45 = W[60]; + { + E TT, TU, TY, TZ; + TT = Ip[WS(rs, 7)]; + T46 = TO - TP; + TQ = TO + TP; + TM = TI * TL; + T5v = T45 * T49; + T47 = T45 * T46; + T55 = TI * TQ; + TU = Im[WS(rs, 7)]; + TY = Rp[WS(rs, 7)]; + TZ = Rm[WS(rs, 7)]; + TS = W[26]; + T4f = TT + TU; + TV = TT - TU; + T4c = TY - TZ; + T10 = TY + TZ; + T4b = W[28]; + TW = TS * TV; + } + } + TN = W[59]; + T57 = TS * T10; + T5x = T4b * T4f; + T4d = T4b * T4c; + TR = FNMS(TN, TQ, TM); + T56 = FMA(TN, TL, T55); + TX = W[27]; + T48 = W[61]; + T4e = W[29]; + } + } + } + { + E T8c, T8d, T8f, T8g; + { + E T3I, T3F, T5k, T3G, T3O, Te, T3L, Tj, Tf, T4W, T5m, T3M, Ta, T4V, Tg; + E T3H, T3N; + { + E T4, T9, T5, T4U, Tb, T3K, T1, T3E, T6; + { + E T2, T3, T7, T8, T58, T11; + T2 = Ip[WS(rs, 1)]; + T58 = FMA(TX, TV, T57); + T11 = FNMS(TX, T10, TW); + { + E T5w, T4a, T5y, T4g; + T5w = FNMS(T48, T46, T5v); + T4a = FMA(T48, T49, T47); + T5y = FNMS(T4e, T4c, T5x); + T4g = FMA(T4e, T4f, T4d); + T59 = T56 + T58; + T7K = T56 - T58; + T7H = TR - T11; + T12 = TR + T11; + T5z = T5w + T5y; + T81 = T5w - T5y; + T7Q = T4g - T4a; + T4h = T4a + T4g; + T3 = Im[WS(rs, 1)]; + } + T7 = Rp[WS(rs, 1)]; + T8 = Rm[WS(rs, 1)]; + T1 = W[2]; + T3I = T2 + T3; + T4 = T2 - T3; + T3F = T7 - T8; + T9 = T7 + T8; + T3E = W[4]; + T5 = T1 * T4; + } + { + E Tc, Td, Th, Ti; + Tc = Ip[WS(rs, 9)]; + T4U = T1 * T9; + T5k = T3E * T3I; + T3G = T3E * T3F; + Td = Im[WS(rs, 9)]; + Th = Rp[WS(rs, 9)]; + Ti = Rm[WS(rs, 9)]; + Tb = W[34]; + T3O = Tc + Td; + Te = Tc - Td; + T3L = Th - Ti; + Tj = Th + Ti; + T3K = W[36]; + Tf = Tb * Te; + } + T6 = W[3]; + T4W = Tb * Tj; + T5m = T3K * T3O; + T3M = T3K * T3L; + Ta = FNMS(T6, T9, T5); + T4V = FMA(T6, T4, T4U); + Tg = W[35]; + T3H = W[5]; + T3N = W[37]; + } + { + E T1t, T2N, T2M, T2J, T1y, T2L, T5H, T4I, T1u, T2S, T1D, T2T, T1I, T2Q, T2P; + E T1A, T5J; + { + E T2K, T1q, T1w, T1x; + { + E T1r, T7U, T7T, T1s, T4X, Tk; + T1r = Ip[WS(rs, 2)]; + T4X = FMA(Tg, Te, T4W); + Tk = FNMS(Tg, Tj, Tf); + { + E T5l, T3J, T5n, T3P; + T5l = FNMS(T3H, T3F, T5k); + T3J = FMA(T3H, T3I, T3G); + T5n = FNMS(T3N, T3L, T5m); + T3P = FMA(T3N, T3O, T3M); + T4Y = T4V + T4X; + T7D = T4V - T4X; + T7A = Ta - Tk; + Tl = Ta + Tk; + T7U = T5l - T5n; + T5o = T5l + T5n; + T7T = T3P - T3J; + T3Q = T3J + T3P; + T1s = Im[WS(rs, 2)]; + } + T1w = Rp[WS(rs, 2)]; + T84 = T7U + T7T; + T7V = T7T - T7U; + T1t = T1r - T1s; + T2N = T1r + T1s; + T1x = Rm[WS(rs, 2)]; + } + T2M = W[9]; + T2J = W[8]; + T1y = T1w + T1x; + T2K = T1x - T1w; + T1q = W[6]; + { + E T1B, T1C, T1G, T1H; + T1B = Ip[WS(rs, 10)]; + T2L = T2J * T2K; + T5H = T2M * T2K; + T4I = T1q * T1y; + T1u = T1q * T1t; + T1C = Im[WS(rs, 10)]; + T1G = Rp[WS(rs, 10)]; + T1H = Rm[WS(rs, 10)]; + T2S = W[41]; + T1D = T1B - T1C; + T2T = T1B + T1C; + T1I = T1G + T1H; + T2Q = T1H - T1G; + T2P = W[40]; + T1A = W[38]; + T5J = T2S * T2Q; + } + } + { + E T2R, T4K, T1E, T1z, T4J, T1F, T1v, T2O, T2U; + T1v = W[7]; + T2R = T2P * T2Q; + T4K = T1A * T1I; + T1E = T1A * T1D; + T1z = FNMS(T1v, T1y, T1u); + T4J = FMA(T1v, T1t, T4I); + T1F = W[39]; + T2O = FNMS(T2M, T2N, T2L); + T2U = FNMS(T2S, T2T, T2R); + { + E T5I, T4L, T1J, T5K; + T5I = FMA(T2J, T2N, T5H); + T4L = FMA(T1F, T1D, T4K); + T1J = FNMS(T1F, T1I, T1E); + T8c = T2O - T2U; + T2V = T2O + T2U; + T5K = FMA(T2P, T2T, T5J); + T4M = T4J + T4L; + T7t = T4J - T4L; + T7s = T1z - T1J; + T1K = T1z + T1J; + T8d = T5I - T5K; + T5L = T5I + T5K; + } + } + } + } + { + E T2Z, T30, T1O, T1T, T2W, T5M, T2Y, T4N, T1P, T35, T1Y, T36, T23, T33, T32; + E T1V, T5O, T2X, T1L; + { + E T1M, T1N, T1R, T1S; + T1M = Ip[WS(rs, 14)]; + T8e = T8c - T8d; + T8n = T8c + T8d; + T1N = Im[WS(rs, 14)]; + T1R = Rp[WS(rs, 14)]; + T1S = Rm[WS(rs, 14)]; + T2Z = W[57]; + T30 = T1M + T1N; + T1O = T1M - T1N; + T2X = T1S - T1R; + T1T = T1R + T1S; + T2W = W[56]; + T1L = W[54]; + T5M = T2Z * T2X; + } + { + E T1W, T1X, T21, T22; + T1W = Ip[WS(rs, 6)]; + T2Y = T2W * T2X; + T4N = T1L * T1T; + T1P = T1L * T1O; + T1X = Im[WS(rs, 6)]; + T21 = Rp[WS(rs, 6)]; + T22 = Rm[WS(rs, 6)]; + T35 = W[25]; + T1Y = T1W - T1X; + T36 = T1W + T1X; + T23 = T21 + T22; + T33 = T22 - T21; + T32 = W[24]; + T1V = W[22]; + T5O = T35 * T33; + } + { + E T34, T4P, T1Z, T1U, T4O, T20, T1Q, T31, T37; + T1Q = W[55]; + T34 = T32 * T33; + T4P = T1V * T23; + T1Z = T1V * T1Y; + T1U = FNMS(T1Q, T1T, T1P); + T4O = FMA(T1Q, T1O, T4N); + T20 = W[23]; + T31 = FNMS(T2Z, T30, T2Y); + T37 = FNMS(T35, T36, T34); + { + E T5N, T4Q, T24, T5P; + T5N = FMA(T2W, T30, T5M); + T4Q = FMA(T20, T1Y, T4P); + T24 = FNMS(T20, T23, T1Z); + T8f = T31 - T37; + T38 = T31 + T37; + T5P = FMA(T32, T36, T5O); + T7v = T4O - T4Q; + T4R = T4O + T4Q; + T7w = T1U - T24; + T25 = T1U + T24; + T8g = T5N - T5P; + T5Q = T5N + T5P; + } + } + } + { + E Tp, Tu, Tq, T4Z, Tw, T3X, Tm, T3R, Tr; + { + E Tn, To, Ts, Tt; + Tn = Ip[WS(rs, 5)]; + T8h = T8f + T8g; + T8o = T8g - T8f; + To = Im[WS(rs, 5)]; + Ts = Rp[WS(rs, 5)]; + Tt = Rm[WS(rs, 5)]; + Tm = W[18]; + T3V = Tn + To; + Tp = Tn - To; + T3S = Ts - Tt; + Tu = Ts + Tt; + T3R = W[20]; + Tq = Tm * Tp; + } + { + E Tx, Ty, TC, TD; + Tx = Ip[WS(rs, 13)]; + T4Z = Tm * Tu; + T5p = T3R * T3V; + T3T = T3R * T3S; + Ty = Im[WS(rs, 13)]; + TC = Rp[WS(rs, 13)]; + TD = Rm[WS(rs, 13)]; + Tw = W[50]; + T41 = Tx + Ty; + Tz = Tx - Ty; + T3Y = TC - TD; + TE = TC + TD; + T3X = W[52]; + TA = Tw * Tz; + } + Tr = W[19]; + T51 = Tw * TE; + T5r = T3X * T41; + T3Z = T3X * T3Y; + Tv = FNMS(Tr, Tu, Tq); + T50 = FMA(Tr, Tp, T4Z); + TB = W[51]; + T3U = W[21]; + T40 = W[53]; + } + } + } + { + E T6y, T7B, T7E, T6u, T6S, T85, T7Y, T6s, T6v, T6x, T6R, T6r, T6F, T6D, T6C; + E T6G, T6M, T6K, T6J, T6N, T6l, T6o, T7j, T7m; + { + E T6i, T1p, T68, T2H, T67, T5g, T6h, T4T, T4w, T5G, T6d, T3D, T6c, T6m, T63; + E T6e; + { + E T5t, T43, T26, T2G, T54, T5f, T4H, T4S; + { + E T1o, T53, T7W, T7X, TH, T52, TF, T5q; + T6y = T12 - T1n; + T1o = T12 + T1n; + T52 = FMA(TB, Tz, T51); + TF = FNMS(TB, TE, TA); + T5q = FNMS(T3U, T3S, T5p); + { + E T3W, T5s, T42, TG; + T3W = FMA(T3U, T3V, T3T); + T5s = FNMS(T40, T3Y, T5r); + T42 = FMA(T40, T41, T3Z); + T7B = T50 - T52; + T53 = T50 + T52; + T7E = Tv - TF; + TG = Tv + TF; + T7W = T5s - T5q; + T5t = T5q + T5s; + T7X = T3W - T42; + T43 = T3W + T42; + TH = Tl + TG; + T6u = Tl - TG; + } + T6S = T1K - T25; + T26 = T1K + T25; + T85 = T7W - T7X; + T7Y = T7W + T7X; + T6i = TH - T1o; + T1p = TH + T1o; + T2G = T2r + T2F; + T6s = T2F - T2r; + T6v = T4Y - T53; + T54 = T4Y + T53; + T5f = T59 + T5e; + T6x = T59 - T5e; + } + T6R = T4B - T4G; + T4H = T4B + T4G; + T68 = T2G - T26; + T2H = T26 + T2G; + T67 = T5f - T54; + T5g = T54 + T5f; + T4S = T4M + T4R; + T6r = T4R - T4M; + { + E T5u, T6b, T5F, T44, T4v; + T6F = T43 - T3Q; + T44 = T3Q + T43; + T4v = T4h + T4u; + T6D = T4u - T4h; + T6C = T5t - T5o; + T5u = T5o + T5t; + T6h = T4H - T4S; + T4T = T4H + T4S; + T6b = T44 - T4v; + T4w = T44 + T4v; + T6G = T5z - T5E; + T5F = T5z + T5E; + { + E T5R, T62, T39, T3C, T6a; + T6M = T2V - T38; + T39 = T2V + T38; + T3C = T3m + T3B; + T6K = T3B - T3m; + T6a = T5F - T5u; + T5G = T5u + T5F; + T6J = T5Q - T5L; + T5R = T5L + T5Q; + T6d = T3C - T39; + T3D = T39 + T3C; + T6N = T61 - T5W; + T62 = T5W + T61; + T6c = T6a + T6b; + T6m = T6a - T6b; + T63 = T5R + T62; + T6e = T62 - T5R; + } + } + } + { + E T5j, T6n, T6f, T64; + { + E T5i, T5h, T65, T66, T2I, T4x; + T5j = T2H - T1p; + T2I = T1p + T2H; + T4x = T3D - T4w; + T5i = T4w + T3D; + T6n = T6d + T6e; + T6f = T6d - T6e; + T5h = T4T - T5g; + T65 = T4T + T5g; + Im[WS(rs, 15)] = KP500000000 * (T4x - T2I); + Ip[0] = KP500000000 * (T2I + T4x); + T66 = T5G + T63; + T64 = T5G - T63; + Rp[0] = KP500000000 * (T65 + T66); + Rm[WS(rs, 15)] = KP500000000 * (T65 - T66); + Rp[WS(rs, 8)] = KP500000000 * (T5h + T5i); + Rm[WS(rs, 7)] = KP500000000 * (T5h - T5i); + } + { + E T6k, T6j, T6p, T6q, T69, T6g; + T6l = T68 - T67; + T69 = T67 + T68; + T6g = T6c + T6f; + T6k = T6f - T6c; + T6j = T6h - T6i; + T6p = T6h + T6i; + Im[WS(rs, 7)] = KP500000000 * (T64 - T5j); + Ip[WS(rs, 8)] = KP500000000 * (T5j + T64); + Im[WS(rs, 11)] = -(KP500000000 * (FNMS(KP707106781, T6g, T69))); + Ip[WS(rs, 4)] = KP500000000 * (FMA(KP707106781, T6g, T69)); + T6q = T6m + T6n; + T6o = T6m - T6n; + Rp[WS(rs, 4)] = KP500000000 * (FMA(KP707106781, T6q, T6p)); + Rm[WS(rs, 11)] = KP500000000 * (FNMS(KP707106781, T6q, T6p)); + Rp[WS(rs, 12)] = KP500000000 * (FMA(KP707106781, T6k, T6j)); + Rm[WS(rs, 3)] = KP500000000 * (FNMS(KP707106781, T6k, T6j)); + } + } + } + { + E T75, T6t, T7f, T6T, T76, T6W, T7g, T6A, T7b, T6L, T7a, T7k, T70, T6I, T6U; + E T6w; + Im[WS(rs, 3)] = -(KP500000000 * (FNMS(KP707106781, T6o, T6l))); + Ip[WS(rs, 12)] = KP500000000 * (FMA(KP707106781, T6o, T6l)); + T75 = T6s - T6r; + T6t = T6r + T6s; + T7f = T6R - T6S; + T6T = T6R + T6S; + T6U = T6v + T6u; + T6w = T6u - T6v; + { + E T78, T6E, T6V, T6z, T79, T6H; + T6V = T6x - T6y; + T6z = T6x + T6y; + T78 = T6C - T6D; + T6E = T6C + T6D; + T76 = T6V - T6U; + T6W = T6U + T6V; + T7g = T6w - T6z; + T6A = T6w + T6z; + T79 = T6G - T6F; + T6H = T6F + T6G; + T7b = T6K - T6J; + T6L = T6J + T6K; + T7a = FMA(KP414213562, T79, T78); + T7k = FNMS(KP414213562, T78, T79); + T70 = FNMS(KP414213562, T6E, T6H); + T6I = FMA(KP414213562, T6H, T6E); + } + { + E T6Z, T6B, T73, T6X, T7c, T6O; + T6Z = FNMS(KP707106781, T6A, T6t); + T6B = FMA(KP707106781, T6A, T6t); + T73 = FMA(KP707106781, T6W, T6T); + T6X = FNMS(KP707106781, T6W, T6T); + T7c = T6N - T6M; + T6O = T6M + T6N; + { + E T7i, T7h, T7n, T7o; + { + E T77, T7l, T71, T6P, T7e, T7d; + T7j = FMA(KP707106781, T76, T75); + T77 = FNMS(KP707106781, T76, T75); + T7d = FMA(KP414213562, T7c, T7b); + T7l = FNMS(KP414213562, T7b, T7c); + T71 = FMA(KP414213562, T6L, T6O); + T6P = FNMS(KP414213562, T6O, T6L); + T7e = T7a - T7d; + T7i = T7a + T7d; + T7h = FMA(KP707106781, T7g, T7f); + T7n = FNMS(KP707106781, T7g, T7f); + { + E T72, T74, T6Y, T6Q; + T72 = T70 - T71; + T74 = T70 + T71; + T6Y = T6P - T6I; + T6Q = T6I + T6P; + Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP923879532, T7e, T77))); + Ip[WS(rs, 14)] = KP500000000 * (FMA(KP923879532, T7e, T77)); + Im[WS(rs, 5)] = -(KP500000000 * (FNMS(KP923879532, T72, T6Z))); + Ip[WS(rs, 10)] = KP500000000 * (FMA(KP923879532, T72, T6Z)); + Rp[WS(rs, 2)] = KP500000000 * (FMA(KP923879532, T74, T73)); + Rm[WS(rs, 13)] = KP500000000 * (FNMS(KP923879532, T74, T73)); + Rp[WS(rs, 10)] = KP500000000 * (FMA(KP923879532, T6Y, T6X)); + Rm[WS(rs, 5)] = KP500000000 * (FNMS(KP923879532, T6Y, T6X)); + Im[WS(rs, 13)] = -(KP500000000 * (FNMS(KP923879532, T6Q, T6B))); + Ip[WS(rs, 2)] = KP500000000 * (FMA(KP923879532, T6Q, T6B)); + T7o = T7k + T7l; + T7m = T7k - T7l; + } + } + Rm[WS(rs, 1)] = KP500000000 * (FMA(KP923879532, T7o, T7n)); + Rp[WS(rs, 14)] = KP500000000 * (FNMS(KP923879532, T7o, T7n)); + Rp[WS(rs, 6)] = KP500000000 * (FMA(KP923879532, T7i, T7h)); + Rm[WS(rs, 9)] = KP500000000 * (FNMS(KP923879532, T7i, T7h)); + } + } + } + { + E T9x, T9T, T8L, T7z, T97, T9J, T8V, T8z, T8M, T8C, T8W, T7O, T9O, T9Y, T9E; + E T9t, T8Q, T90, T8G, T88, T8p, T8m, T9K, T9A, T9U, T9e, T8R, T8j, T9R, T9Z; + E T9F, T9m; + { + E T9c, T9b, T99, T98, T7S, T86, T83, T9q, T9M, T9p, T9r, T7Z, T9z, T9a; + { + E T95, T7r, T9v, T8v, T8w, T8x, T9w, T7y, T7u, T7x; + T95 = T7q + T7p; + T7r = T7p - T7q; + T9v = T8t - T8u; + T8v = T8t + T8u; + T8w = T7t + T7s; + T7u = T7s - T7t; + Im[WS(rs, 9)] = -(KP500000000 * (FNMS(KP923879532, T7m, T7j))); + Ip[WS(rs, 6)] = KP500000000 * (FMA(KP923879532, T7m, T7j)); + T7x = T7v + T7w; + T8x = T7v - T7w; + T9w = T7u - T7x; + T7y = T7u + T7x; + { + E T7J, T8A, T7G, T7M; + { + E T7C, T96, T8y, T7F; + T9c = T7A + T7B; + T7C = T7A - T7B; + T9x = FMA(KP707106781, T9w, T9v); + T9T = FNMS(KP707106781, T9w, T9v); + T8L = FNMS(KP707106781, T7y, T7r); + T7z = FMA(KP707106781, T7y, T7r); + T96 = T8x - T8w; + T8y = T8w + T8x; + T7F = T7D + T7E; + T9b = T7D - T7E; + T99 = T7H + T7I; + T7J = T7H - T7I; + T97 = FMA(KP707106781, T96, T95); + T9J = FNMS(KP707106781, T96, T95); + T8V = FNMS(KP707106781, T8y, T8v); + T8z = FMA(KP707106781, T8y, T8v); + T8A = FMA(KP414213562, T7C, T7F); + T7G = FNMS(KP414213562, T7F, T7C); + T7M = T7K + T7L; + T98 = T7K - T7L; + } + { + E T9n, T9o, T8B, T7N; + T7S = T7Q + T7R; + T9n = T7R - T7Q; + T9o = T85 - T84; + T86 = T84 + T85; + T83 = T81 + T82; + T9q = T81 - T82; + T8B = FNMS(KP414213562, T7J, T7M); + T7N = FMA(KP414213562, T7M, T7J); + T9M = FMA(KP707106781, T9o, T9n); + T9p = FNMS(KP707106781, T9o, T9n); + T8M = T8B - T8A; + T8C = T8A + T8B; + T8W = T7G - T7N; + T7O = T7G + T7N; + T9r = T7Y - T7V; + T7Z = T7V + T7Y; + } + } + } + { + E T8O, T80, T9N, T9s, T8P, T87; + T9N = FMA(KP707106781, T9r, T9q); + T9s = FNMS(KP707106781, T9r, T9q); + T8O = FNMS(KP707106781, T7Z, T7S); + T80 = FMA(KP707106781, T7Z, T7S); + T9O = FMA(KP198912367, T9N, T9M); + T9Y = FNMS(KP198912367, T9M, T9N); + T9E = FMA(KP668178637, T9p, T9s); + T9t = FNMS(KP668178637, T9s, T9p); + T8P = FNMS(KP707106781, T86, T83); + T87 = FMA(KP707106781, T86, T83); + T9z = FNMS(KP414213562, T98, T99); + T9a = FMA(KP414213562, T99, T98); + T8Q = FNMS(KP668178637, T8P, T8O); + T90 = FMA(KP668178637, T8O, T8P); + T8G = FNMS(KP198912367, T80, T87); + T88 = FMA(KP198912367, T87, T80); + } + { + E T8b, T9j, T9P, T9i, T9k, T8i, T9Q, T9l; + { + E T9g, T9h, T9y, T9d; + T8b = T89 - T8a; + T9g = T8a + T89; + T9h = T8n - T8o; + T8p = T8n + T8o; + T8m = T8k + T8l; + T9j = T8l - T8k; + T9y = FMA(KP414213562, T9b, T9c); + T9d = FNMS(KP414213562, T9c, T9b); + T9P = FMA(KP707106781, T9h, T9g); + T9i = FNMS(KP707106781, T9h, T9g); + T9K = T9y + T9z; + T9A = T9y - T9z; + T9U = T9d + T9a; + T9e = T9a - T9d; + T9k = T8h - T8e; + T8i = T8e + T8h; + } + T9Q = FMA(KP707106781, T9k, T9j); + T9l = FNMS(KP707106781, T9k, T9j); + T8R = FNMS(KP707106781, T8i, T8b); + T8j = FMA(KP707106781, T8i, T8b); + T9R = FMA(KP198912367, T9Q, T9P); + T9Z = FNMS(KP198912367, T9P, T9Q); + T9F = FMA(KP668178637, T9i, T9l); + T9m = FNMS(KP668178637, T9l, T9i); + } + } + { + E T8Z, T92, T9D, T9G; + { + E T8F, T7P, T8J, T8D, T8S, T8q; + T8F = FNMS(KP923879532, T7O, T7z); + T7P = FMA(KP923879532, T7O, T7z); + T8J = FMA(KP923879532, T8C, T8z); + T8D = FNMS(KP923879532, T8C, T8z); + T8S = FNMS(KP707106781, T8p, T8m); + T8q = FMA(KP707106781, T8p, T8m); + { + E T8Y, T8X, T93, T94; + { + E T8N, T91, T8H, T8r, T8U, T8T; + T8Z = FMA(KP923879532, T8M, T8L); + T8N = FNMS(KP923879532, T8M, T8L); + T8T = FMA(KP668178637, T8S, T8R); + T91 = FNMS(KP668178637, T8R, T8S); + T8H = FMA(KP198912367, T8j, T8q); + T8r = FNMS(KP198912367, T8q, T8j); + T8U = T8Q + T8T; + T8Y = T8T - T8Q; + T8X = FMA(KP923879532, T8W, T8V); + T93 = FNMS(KP923879532, T8W, T8V); + { + E T8I, T8K, T8E, T8s; + T8I = T8G - T8H; + T8K = T8G + T8H; + T8E = T8r - T88; + T8s = T88 + T8r; + Im[WS(rs, 2)] = -(KP500000000 * (FMA(KP831469612, T8U, T8N))); + Ip[WS(rs, 13)] = KP500000000 * (FNMS(KP831469612, T8U, T8N)); + Im[WS(rs, 6)] = -(KP500000000 * (FNMS(KP980785280, T8I, T8F))); + Ip[WS(rs, 9)] = KP500000000 * (FMA(KP980785280, T8I, T8F)); + Rp[WS(rs, 1)] = KP500000000 * (FMA(KP980785280, T8K, T8J)); + Rm[WS(rs, 14)] = KP500000000 * (FNMS(KP980785280, T8K, T8J)); + Rp[WS(rs, 9)] = KP500000000 * (FMA(KP980785280, T8E, T8D)); + Rm[WS(rs, 6)] = KP500000000 * (FNMS(KP980785280, T8E, T8D)); + Im[WS(rs, 14)] = -(KP500000000 * (FNMS(KP980785280, T8s, T7P))); + Ip[WS(rs, 1)] = KP500000000 * (FMA(KP980785280, T8s, T7P)); + T94 = T90 + T91; + T92 = T90 - T91; + } + } + Rm[WS(rs, 2)] = KP500000000 * (FMA(KP831469612, T94, T93)); + Rp[WS(rs, 13)] = KP500000000 * (FNMS(KP831469612, T94, T93)); + Rp[WS(rs, 5)] = KP500000000 * (FMA(KP831469612, T8Y, T8X)); + Rm[WS(rs, 10)] = KP500000000 * (FNMS(KP831469612, T8Y, T8X)); + } + } + { + E T9C, T9B, T9H, T9I, T9f, T9u; + T9D = FNMS(KP923879532, T9e, T97); + T9f = FMA(KP923879532, T9e, T97); + T9u = T9m - T9t; + T9C = T9t + T9m; + T9B = FNMS(KP923879532, T9A, T9x); + T9H = FMA(KP923879532, T9A, T9x); + Im[WS(rs, 10)] = -(KP500000000 * (FNMS(KP831469612, T92, T8Z))); + Ip[WS(rs, 5)] = KP500000000 * (FMA(KP831469612, T92, T8Z)); + Im[WS(rs, 12)] = -(KP500000000 * (FNMS(KP831469612, T9u, T9f))); + Ip[WS(rs, 3)] = KP500000000 * (FMA(KP831469612, T9u, T9f)); + T9I = T9E + T9F; + T9G = T9E - T9F; + Rp[WS(rs, 3)] = KP500000000 * (FMA(KP831469612, T9I, T9H)); + Rm[WS(rs, 12)] = KP500000000 * (FNMS(KP831469612, T9I, T9H)); + Rp[WS(rs, 11)] = KP500000000 * (FMA(KP831469612, T9C, T9B)); + Rm[WS(rs, 4)] = KP500000000 * (FNMS(KP831469612, T9C, T9B)); + } + { + E T9W, T9V, Ta1, Ta2, T9L, T9S; + T9X = FNMS(KP923879532, T9K, T9J); + T9L = FMA(KP923879532, T9K, T9J); + T9S = T9O - T9R; + T9W = T9O + T9R; + T9V = FNMS(KP923879532, T9U, T9T); + Ta1 = FMA(KP923879532, T9U, T9T); + Im[WS(rs, 4)] = -(KP500000000 * (FNMS(KP831469612, T9G, T9D))); + Ip[WS(rs, 11)] = KP500000000 * (FMA(KP831469612, T9G, T9D)); + Im[0] = -(KP500000000 * (FNMS(KP980785280, T9S, T9L))); + Ip[WS(rs, 15)] = KP500000000 * (FMA(KP980785280, T9S, T9L)); + Ta2 = T9Y + T9Z; + Ta0 = T9Y - T9Z; + Rm[0] = KP500000000 * (FMA(KP980785280, Ta2, Ta1)); + Rp[WS(rs, 15)] = KP500000000 * (FNMS(KP980785280, Ta2, Ta1)); + Rp[WS(rs, 7)] = KP500000000 * (FMA(KP980785280, T9W, T9V)); + Rm[WS(rs, 8)] = KP500000000 * (FNMS(KP980785280, T9W, T9V)); + } + } + } + } + } + Im[WS(rs, 8)] = -(KP500000000 * (FNMS(KP980785280, Ta0, T9X))); + Ip[WS(rs, 7)] = KP500000000 * (FMA(KP980785280, Ta0, T9X)); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 32}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 32, "hc2cfdft_32", twinstr, &GENUS, {300, 126, 198, 0} }; + +void X(codelet_hc2cfdft_32) (planner *p) { + X(khc2c_register) (p, hc2cfdft_32, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hc2cfdft_32 -include hc2cf.h */ + +/* + * This function contains 498 FP additions, 228 FP multiplications, + * (or, 404 additions, 134 multiplications, 94 fused multiply/add), + * 106 stack variables, 9 constants, and 128 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP277785116, +0.277785116509801112371415406974266437187468595); + DK(KP415734806, +0.415734806151272618539394188808952878369280406); + DK(KP097545161, +0.097545161008064133924142434238511120463845809); + DK(KP490392640, +0.490392640201615224563091118067119518486966865); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP191341716, +0.191341716182544885864229992015199433380672281); + DK(KP461939766, +0.461939766255643378064091594698394143411208313); + DK(KP353553390, +0.353553390593273762200422181052424519642417969); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) { + E T2S, T5K, T52, T5N, T7p, T8r, T7i, T8o, T2q, T7t, T45, T6L, T2d, T7u, T48; + E T6M, T1A, T4c, T4f, T1T, T3f, T5M, T7e, T7l, T6J, T7x, T4V, T5J, T7b, T7k; + E T6G, T7w, Tj, TC, T5r, T4k, T4n, T5s, T3D, T5C, T6V, T72, T4G, T5F, T6u; + E T86, T6S, T71, T6r, T85, TW, T1f, T5v, T4r, T4u, T5u, T40, T5G, T76, T8k; + E T4N, T5D, T6B, T89, T6Z, T8h, T6y, T88; + { + E T1Y, T22, T2L, T4W, T2p, T43, T2A, T50, T27, T2b, T2Q, T4X, T2h, T2l, T2F; + E T4Z; + { + E T1W, T1X, T2K, T20, T21, T2I, T2H, T2J; + T1W = Ip[WS(rs, 4)]; + T1X = Im[WS(rs, 4)]; + T2K = T1W + T1X; + T20 = Rp[WS(rs, 4)]; + T21 = Rm[WS(rs, 4)]; + T2I = T20 - T21; + T1Y = T1W - T1X; + T22 = T20 + T21; + T2H = W[16]; + T2J = W[17]; + T2L = FMA(T2H, T2I, T2J * T2K); + T4W = FNMS(T2J, T2I, T2H * T2K); + } + { + E T2n, T2o, T2z, T2v, T2w, T2x, T2u, T2y; + T2n = Ip[0]; + T2o = Im[0]; + T2z = T2n + T2o; + T2v = Rm[0]; + T2w = Rp[0]; + T2x = T2v - T2w; + T2p = T2n - T2o; + T43 = T2w + T2v; + T2u = W[0]; + T2y = W[1]; + T2A = FNMS(T2y, T2z, T2u * T2x); + T50 = FMA(T2y, T2x, T2u * T2z); + } + { + E T25, T26, T2P, T29, T2a, T2N, T2M, T2O; + T25 = Ip[WS(rs, 12)]; + T26 = Im[WS(rs, 12)]; + T2P = T25 + T26; + T29 = Rp[WS(rs, 12)]; + T2a = Rm[WS(rs, 12)]; + T2N = T29 - T2a; + T27 = T25 - T26; + T2b = T29 + T2a; + T2M = W[48]; + T2O = W[49]; + T2Q = FMA(T2M, T2N, T2O * T2P); + T4X = FNMS(T2O, T2N, T2M * T2P); + } + { + E T2f, T2g, T2E, T2j, T2k, T2C, T2B, T2D; + T2f = Ip[WS(rs, 8)]; + T2g = Im[WS(rs, 8)]; + T2E = T2f + T2g; + T2j = Rp[WS(rs, 8)]; + T2k = Rm[WS(rs, 8)]; + T2C = T2j - T2k; + T2h = T2f - T2g; + T2l = T2j + T2k; + T2B = W[32]; + T2D = W[33]; + T2F = FMA(T2B, T2C, T2D * T2E); + T4Z = FNMS(T2D, T2C, T2B * T2E); + } + { + E T2G, T2R, T7g, T7h; + T2G = T2A - T2F; + T2R = T2L + T2Q; + T2S = T2G - T2R; + T5K = T2R + T2G; + { + E T4Y, T51, T7n, T7o; + T4Y = T4W + T4X; + T51 = T4Z + T50; + T52 = T4Y + T51; + T5N = T51 - T4Y; + T7n = T2Q - T2L; + T7o = T50 - T4Z; + T7p = T7n + T7o; + T8r = T7o - T7n; + } + T7g = T2F + T2A; + T7h = T4W - T4X; + T7i = T7g - T7h; + T8o = T7h + T7g; + { + E T2m, T44, T2e, T2i; + T2e = W[30]; + T2i = W[31]; + T2m = FNMS(T2i, T2l, T2e * T2h); + T44 = FMA(T2e, T2l, T2i * T2h); + T2q = T2m + T2p; + T7t = T43 - T44; + T45 = T43 + T44; + T6L = T2p - T2m; + } + { + E T23, T46, T2c, T47; + { + E T1V, T1Z, T24, T28; + T1V = W[14]; + T1Z = W[15]; + T23 = FNMS(T1Z, T22, T1V * T1Y); + T46 = FMA(T1V, T22, T1Z * T1Y); + T24 = W[46]; + T28 = W[47]; + T2c = FNMS(T28, T2b, T24 * T27); + T47 = FMA(T24, T2b, T28 * T27); + } + T2d = T23 + T2c; + T7u = T23 - T2c; + T48 = T46 + T47; + T6M = T46 - T47; + } + } + } + { + E T1q, T4a, T2X, T4P, T1S, T4e, T3d, T4T, T1z, T4b, T32, T4Q, T1J, T4d, T38; + E T4S; + { + E T1l, T2W, T1p, T2U; + { + E T1j, T1k, T1n, T1o; + T1j = Ip[WS(rs, 2)]; + T1k = Im[WS(rs, 2)]; + T1l = T1j - T1k; + T2W = T1j + T1k; + T1n = Rp[WS(rs, 2)]; + T1o = Rm[WS(rs, 2)]; + T1p = T1n + T1o; + T2U = T1n - T1o; + } + { + E T1i, T1m, T2T, T2V; + T1i = W[6]; + T1m = W[7]; + T1q = FNMS(T1m, T1p, T1i * T1l); + T4a = FMA(T1i, T1p, T1m * T1l); + T2T = W[8]; + T2V = W[9]; + T2X = FMA(T2T, T2U, T2V * T2W); + T4P = FNMS(T2V, T2U, T2T * T2W); + } + } + { + E T1N, T3c, T1R, T3a; + { + E T1L, T1M, T1P, T1Q; + T1L = Ip[WS(rs, 6)]; + T1M = Im[WS(rs, 6)]; + T1N = T1L - T1M; + T3c = T1L + T1M; + T1P = Rp[WS(rs, 6)]; + T1Q = Rm[WS(rs, 6)]; + T1R = T1P + T1Q; + T3a = T1P - T1Q; + } + { + E T1K, T1O, T39, T3b; + T1K = W[22]; + T1O = W[23]; + T1S = FNMS(T1O, T1R, T1K * T1N); + T4e = FMA(T1K, T1R, T1O * T1N); + T39 = W[24]; + T3b = W[25]; + T3d = FMA(T39, T3a, T3b * T3c); + T4T = FNMS(T3b, T3a, T39 * T3c); + } + } + { + E T1u, T31, T1y, T2Z; + { + E T1s, T1t, T1w, T1x; + T1s = Ip[WS(rs, 10)]; + T1t = Im[WS(rs, 10)]; + T1u = T1s - T1t; + T31 = T1s + T1t; + T1w = Rp[WS(rs, 10)]; + T1x = Rm[WS(rs, 10)]; + T1y = T1w + T1x; + T2Z = T1w - T1x; + } + { + E T1r, T1v, T2Y, T30; + T1r = W[38]; + T1v = W[39]; + T1z = FNMS(T1v, T1y, T1r * T1u); + T4b = FMA(T1r, T1y, T1v * T1u); + T2Y = W[40]; + T30 = W[41]; + T32 = FMA(T2Y, T2Z, T30 * T31); + T4Q = FNMS(T30, T2Z, T2Y * T31); + } + } + { + E T1E, T37, T1I, T35; + { + E T1C, T1D, T1G, T1H; + T1C = Ip[WS(rs, 14)]; + T1D = Im[WS(rs, 14)]; + T1E = T1C - T1D; + T37 = T1C + T1D; + T1G = Rp[WS(rs, 14)]; + T1H = Rm[WS(rs, 14)]; + T1I = T1G + T1H; + T35 = T1G - T1H; + } + { + E T1B, T1F, T34, T36; + T1B = W[54]; + T1F = W[55]; + T1J = FNMS(T1F, T1I, T1B * T1E); + T4d = FMA(T1B, T1I, T1F * T1E); + T34 = W[56]; + T36 = W[57]; + T38 = FMA(T34, T35, T36 * T37); + T4S = FNMS(T36, T35, T34 * T37); + } + } + { + E T33, T3e, T4R, T4U; + T1A = T1q + T1z; + T4c = T4a + T4b; + T4f = T4d + T4e; + T1T = T1J + T1S; + T33 = T2X + T32; + T3e = T38 + T3d; + T3f = T33 + T3e; + T5M = T3e - T33; + { + E T7c, T7d, T6H, T6I; + T7c = T4S - T4T; + T7d = T3d - T38; + T7e = T7c + T7d; + T7l = T7c - T7d; + T6H = T4d - T4e; + T6I = T1J - T1S; + T6J = T6H + T6I; + T7x = T6H - T6I; + } + T4R = T4P + T4Q; + T4U = T4S + T4T; + T4V = T4R + T4U; + T5J = T4U - T4R; + { + E T79, T7a, T6E, T6F; + T79 = T32 - T2X; + T7a = T4P - T4Q; + T7b = T79 - T7a; + T7k = T7a + T79; + T6E = T1q - T1z; + T6F = T4a - T4b; + T6G = T6E - T6F; + T7w = T6F + T6E; + } + } + } + { + E T9, T4i, T3l, T4A, TB, T4m, T3B, T4E, Ti, T4j, T3q, T4B, Ts, T4l, T3w; + E T4D; + { + E T4, T3k, T8, T3i; + { + E T2, T3, T6, T7; + T2 = Ip[WS(rs, 1)]; + T3 = Im[WS(rs, 1)]; + T4 = T2 - T3; + T3k = T2 + T3; + T6 = Rp[WS(rs, 1)]; + T7 = Rm[WS(rs, 1)]; + T8 = T6 + T7; + T3i = T6 - T7; + } + { + E T1, T5, T3h, T3j; + T1 = W[2]; + T5 = W[3]; + T9 = FNMS(T5, T8, T1 * T4); + T4i = FMA(T1, T8, T5 * T4); + T3h = W[4]; + T3j = W[5]; + T3l = FMA(T3h, T3i, T3j * T3k); + T4A = FNMS(T3j, T3i, T3h * T3k); + } + } + { + E Tw, T3A, TA, T3y; + { + E Tu, Tv, Ty, Tz; + Tu = Ip[WS(rs, 13)]; + Tv = Im[WS(rs, 13)]; + Tw = Tu - Tv; + T3A = Tu + Tv; + Ty = Rp[WS(rs, 13)]; + Tz = Rm[WS(rs, 13)]; + TA = Ty + Tz; + T3y = Ty - Tz; + } + { + E Tt, Tx, T3x, T3z; + Tt = W[50]; + Tx = W[51]; + TB = FNMS(Tx, TA, Tt * Tw); + T4m = FMA(Tt, TA, Tx * Tw); + T3x = W[52]; + T3z = W[53]; + T3B = FMA(T3x, T3y, T3z * T3A); + T4E = FNMS(T3z, T3y, T3x * T3A); + } + } + { + E Td, T3p, Th, T3n; + { + E Tb, Tc, Tf, Tg; + Tb = Ip[WS(rs, 9)]; + Tc = Im[WS(rs, 9)]; + Td = Tb - Tc; + T3p = Tb + Tc; + Tf = Rp[WS(rs, 9)]; + Tg = Rm[WS(rs, 9)]; + Th = Tf + Tg; + T3n = Tf - Tg; + } + { + E Ta, Te, T3m, T3o; + Ta = W[34]; + Te = W[35]; + Ti = FNMS(Te, Th, Ta * Td); + T4j = FMA(Ta, Th, Te * Td); + T3m = W[36]; + T3o = W[37]; + T3q = FMA(T3m, T3n, T3o * T3p); + T4B = FNMS(T3o, T3n, T3m * T3p); + } + } + { + E Tn, T3v, Tr, T3t; + { + E Tl, Tm, Tp, Tq; + Tl = Ip[WS(rs, 5)]; + Tm = Im[WS(rs, 5)]; + Tn = Tl - Tm; + T3v = Tl + Tm; + Tp = Rp[WS(rs, 5)]; + Tq = Rm[WS(rs, 5)]; + Tr = Tp + Tq; + T3t = Tp - Tq; + } + { + E Tk, To, T3s, T3u; + Tk = W[18]; + To = W[19]; + Ts = FNMS(To, Tr, Tk * Tn); + T4l = FMA(Tk, Tr, To * Tn); + T3s = W[20]; + T3u = W[21]; + T3w = FMA(T3s, T3t, T3u * T3v); + T4D = FNMS(T3u, T3t, T3s * T3v); + } + } + Tj = T9 + Ti; + TC = Ts + TB; + T5r = Tj - TC; + T4k = T4i + T4j; + T4n = T4l + T4m; + T5s = T4k - T4n; + { + E T3r, T3C, T6T, T6U; + T3r = T3l + T3q; + T3C = T3w + T3B; + T3D = T3r + T3C; + T5C = T3C - T3r; + T6T = T4E - T4D; + T6U = T3w - T3B; + T6V = T6T + T6U; + T72 = T6T - T6U; + } + { + E T4C, T4F, T6s, T6t; + T4C = T4A + T4B; + T4F = T4D + T4E; + T4G = T4C + T4F; + T5F = T4F - T4C; + T6s = T4i - T4j; + T6t = Ts - TB; + T6u = T6s + T6t; + T86 = T6s - T6t; + } + { + E T6Q, T6R, T6p, T6q; + T6Q = T3q - T3l; + T6R = T4A - T4B; + T6S = T6Q - T6R; + T71 = T6R + T6Q; + T6p = T9 - Ti; + T6q = T4l - T4m; + T6r = T6p - T6q; + T85 = T6p + T6q; + } + } + { + E TM, T4p, T3I, T4H, T1e, T4t, T3Y, T4L, TV, T4q, T3N, T4I, T15, T4s, T3T; + E T4K; + { + E TH, T3H, TL, T3F; + { + E TF, TG, TJ, TK; + TF = Ip[WS(rs, 15)]; + TG = Im[WS(rs, 15)]; + TH = TF - TG; + T3H = TF + TG; + TJ = Rp[WS(rs, 15)]; + TK = Rm[WS(rs, 15)]; + TL = TJ + TK; + T3F = TJ - TK; + } + { + E TE, TI, T3E, T3G; + TE = W[58]; + TI = W[59]; + TM = FNMS(TI, TL, TE * TH); + T4p = FMA(TE, TL, TI * TH); + T3E = W[60]; + T3G = W[61]; + T3I = FMA(T3E, T3F, T3G * T3H); + T4H = FNMS(T3G, T3F, T3E * T3H); + } + } + { + E T19, T3X, T1d, T3V; + { + E T17, T18, T1b, T1c; + T17 = Ip[WS(rs, 11)]; + T18 = Im[WS(rs, 11)]; + T19 = T17 - T18; + T3X = T17 + T18; + T1b = Rp[WS(rs, 11)]; + T1c = Rm[WS(rs, 11)]; + T1d = T1b + T1c; + T3V = T1b - T1c; + } + { + E T16, T1a, T3U, T3W; + T16 = W[42]; + T1a = W[43]; + T1e = FNMS(T1a, T1d, T16 * T19); + T4t = FMA(T16, T1d, T1a * T19); + T3U = W[44]; + T3W = W[45]; + T3Y = FMA(T3U, T3V, T3W * T3X); + T4L = FNMS(T3W, T3V, T3U * T3X); + } + } + { + E TQ, T3M, TU, T3K; + { + E TO, TP, TS, TT; + TO = Ip[WS(rs, 7)]; + TP = Im[WS(rs, 7)]; + TQ = TO - TP; + T3M = TO + TP; + TS = Rp[WS(rs, 7)]; + TT = Rm[WS(rs, 7)]; + TU = TS + TT; + T3K = TS - TT; + } + { + E TN, TR, T3J, T3L; + TN = W[26]; + TR = W[27]; + TV = FNMS(TR, TU, TN * TQ); + T4q = FMA(TN, TU, TR * TQ); + T3J = W[28]; + T3L = W[29]; + T3N = FMA(T3J, T3K, T3L * T3M); + T4I = FNMS(T3L, T3K, T3J * T3M); + } + } + { + E T10, T3S, T14, T3Q; + { + E TY, TZ, T12, T13; + TY = Ip[WS(rs, 3)]; + TZ = Im[WS(rs, 3)]; + T10 = TY - TZ; + T3S = TY + TZ; + T12 = Rp[WS(rs, 3)]; + T13 = Rm[WS(rs, 3)]; + T14 = T12 + T13; + T3Q = T12 - T13; + } + { + E TX, T11, T3P, T3R; + TX = W[10]; + T11 = W[11]; + T15 = FNMS(T11, T14, TX * T10); + T4s = FMA(TX, T14, T11 * T10); + T3P = W[12]; + T3R = W[13]; + T3T = FMA(T3P, T3Q, T3R * T3S); + T4K = FNMS(T3R, T3Q, T3P * T3S); + } + } + TW = TM + TV; + T1f = T15 + T1e; + T5v = TW - T1f; + T4r = T4p + T4q; + T4u = T4s + T4t; + T5u = T4r - T4u; + { + E T3O, T3Z, T74, T75; + T3O = T3I + T3N; + T3Z = T3T + T3Y; + T40 = T3O + T3Z; + T5G = T3Z - T3O; + T74 = T4H - T4I; + T75 = T3Y - T3T; + T76 = T74 + T75; + T8k = T74 - T75; + } + { + E T4J, T4M, T6z, T6A; + T4J = T4H + T4I; + T4M = T4K + T4L; + T4N = T4J + T4M; + T5D = T4J - T4M; + T6z = T4p - T4q; + T6A = T15 - T1e; + T6B = T6z + T6A; + T89 = T6z - T6A; + } + { + E T6X, T6Y, T6w, T6x; + T6X = T3N - T3I; + T6Y = T4K - T4L; + T6Z = T6X - T6Y; + T8h = T6X + T6Y; + T6w = TM - TV; + T6x = T4s - T4t; + T6y = T6w - T6x; + T88 = T6w + T6x; + } + } + { + E T1h, T5i, T5c, T5m, T5f, T5n, T2s, T58, T42, T4y, T4w, T57, T54, T56, T4h; + E T5h; + { + E TD, T1g, T5a, T5b; + TD = Tj + TC; + T1g = TW + T1f; + T1h = TD + T1g; + T5i = TD - T1g; + T5a = T4N - T4G; + T5b = T3D - T40; + T5c = T5a + T5b; + T5m = T5a - T5b; + } + { + E T5d, T5e, T1U, T2r; + T5d = T3f + T2S; + T5e = T52 - T4V; + T5f = T5d - T5e; + T5n = T5d + T5e; + T1U = T1A + T1T; + T2r = T2d + T2q; + T2s = T1U + T2r; + T58 = T2r - T1U; + } + { + E T3g, T41, T4o, T4v; + T3g = T2S - T3f; + T41 = T3D + T40; + T42 = T3g - T41; + T4y = T41 + T3g; + T4o = T4k + T4n; + T4v = T4r + T4u; + T4w = T4o + T4v; + T57 = T4v - T4o; + } + { + E T4O, T53, T49, T4g; + T4O = T4G + T4N; + T53 = T4V + T52; + T54 = T4O - T53; + T56 = T4O + T53; + T49 = T45 + T48; + T4g = T4c + T4f; + T4h = T49 + T4g; + T5h = T49 - T4g; + } + { + E T2t, T55, T4x, T4z; + T2t = T1h + T2s; + Ip[0] = KP500000000 * (T2t + T42); + Im[WS(rs, 15)] = KP500000000 * (T42 - T2t); + T55 = T4h + T4w; + Rm[WS(rs, 15)] = KP500000000 * (T55 - T56); + Rp[0] = KP500000000 * (T55 + T56); + T4x = T4h - T4w; + Rm[WS(rs, 7)] = KP500000000 * (T4x - T4y); + Rp[WS(rs, 8)] = KP500000000 * (T4x + T4y); + T4z = T2s - T1h; + Ip[WS(rs, 8)] = KP500000000 * (T4z + T54); + Im[WS(rs, 7)] = KP500000000 * (T54 - T4z); + } + { + E T59, T5g, T5p, T5q; + T59 = KP500000000 * (T57 + T58); + T5g = KP353553390 * (T5c + T5f); + Ip[WS(rs, 4)] = T59 + T5g; + Im[WS(rs, 11)] = T5g - T59; + T5p = KP500000000 * (T5h + T5i); + T5q = KP353553390 * (T5m + T5n); + Rm[WS(rs, 11)] = T5p - T5q; + Rp[WS(rs, 4)] = T5p + T5q; + } + { + E T5j, T5k, T5l, T5o; + T5j = KP500000000 * (T5h - T5i); + T5k = KP353553390 * (T5f - T5c); + Rm[WS(rs, 3)] = T5j - T5k; + Rp[WS(rs, 12)] = T5j + T5k; + T5l = KP500000000 * (T58 - T57); + T5o = KP353553390 * (T5m - T5n); + Ip[WS(rs, 12)] = T5l + T5o; + Im[WS(rs, 3)] = T5o - T5l; + } + } + { + E T5x, T6g, T6a, T6k, T6d, T6l, T5A, T66, T5I, T60, T5T, T6f, T5W, T65, T5P; + E T61; + { + E T5t, T5w, T68, T69; + T5t = T5r - T5s; + T5w = T5u + T5v; + T5x = KP353553390 * (T5t + T5w); + T6g = KP353553390 * (T5t - T5w); + T68 = T5D - T5C; + T69 = T5G - T5F; + T6a = FMA(KP461939766, T68, KP191341716 * T69); + T6k = FNMS(KP461939766, T69, KP191341716 * T68); + } + { + E T6b, T6c, T5y, T5z; + T6b = T5K - T5J; + T6c = T5N - T5M; + T6d = FNMS(KP461939766, T6c, KP191341716 * T6b); + T6l = FMA(KP461939766, T6b, KP191341716 * T6c); + T5y = T4f - T4c; + T5z = T2q - T2d; + T5A = KP500000000 * (T5y + T5z); + T66 = KP500000000 * (T5z - T5y); + } + { + E T5E, T5H, T5R, T5S; + T5E = T5C + T5D; + T5H = T5F + T5G; + T5I = FMA(KP191341716, T5E, KP461939766 * T5H); + T60 = FNMS(KP191341716, T5H, KP461939766 * T5E); + T5R = T45 - T48; + T5S = T1A - T1T; + T5T = KP500000000 * (T5R + T5S); + T6f = KP500000000 * (T5R - T5S); + } + { + E T5U, T5V, T5L, T5O; + T5U = T5s + T5r; + T5V = T5u - T5v; + T5W = KP353553390 * (T5U + T5V); + T65 = KP353553390 * (T5V - T5U); + T5L = T5J + T5K; + T5O = T5M + T5N; + T5P = FNMS(KP191341716, T5O, KP461939766 * T5L); + T61 = FMA(KP191341716, T5L, KP461939766 * T5O); + } + { + E T5B, T5Q, T63, T64; + T5B = T5x + T5A; + T5Q = T5I + T5P; + Ip[WS(rs, 2)] = T5B + T5Q; + Im[WS(rs, 13)] = T5Q - T5B; + T63 = T5T + T5W; + T64 = T60 + T61; + Rm[WS(rs, 13)] = T63 - T64; + Rp[WS(rs, 2)] = T63 + T64; + } + { + E T5X, T5Y, T5Z, T62; + T5X = T5T - T5W; + T5Y = T5P - T5I; + Rm[WS(rs, 5)] = T5X - T5Y; + Rp[WS(rs, 10)] = T5X + T5Y; + T5Z = T5A - T5x; + T62 = T60 - T61; + Ip[WS(rs, 10)] = T5Z + T62; + Im[WS(rs, 5)] = T62 - T5Z; + } + { + E T67, T6e, T6n, T6o; + T67 = T65 + T66; + T6e = T6a + T6d; + Ip[WS(rs, 6)] = T67 + T6e; + Im[WS(rs, 9)] = T6e - T67; + T6n = T6f + T6g; + T6o = T6k + T6l; + Rm[WS(rs, 9)] = T6n - T6o; + Rp[WS(rs, 6)] = T6n + T6o; + } + { + E T6h, T6i, T6j, T6m; + T6h = T6f - T6g; + T6i = T6d - T6a; + Rm[WS(rs, 1)] = T6h - T6i; + Rp[WS(rs, 14)] = T6h + T6i; + T6j = T66 - T65; + T6m = T6k - T6l; + Ip[WS(rs, 14)] = T6j + T6m; + Im[WS(rs, 1)] = T6m - T6j; + } + } + { + E T6D, T7W, T6O, T7M, T7C, T7L, T7z, T7V, T7r, T81, T7H, T7T, T78, T80, T7G; + E T7Q; + { + E T6v, T6C, T7v, T7y; + T6v = FNMS(KP191341716, T6u, KP461939766 * T6r); + T6C = FMA(KP461939766, T6y, KP191341716 * T6B); + T6D = T6v + T6C; + T7W = T6v - T6C; + { + E T6K, T6N, T7A, T7B; + T6K = KP353553390 * (T6G + T6J); + T6N = KP500000000 * (T6L - T6M); + T6O = T6K + T6N; + T7M = T6N - T6K; + T7A = FMA(KP191341716, T6r, KP461939766 * T6u); + T7B = FNMS(KP191341716, T6y, KP461939766 * T6B); + T7C = T7A + T7B; + T7L = T7B - T7A; + } + T7v = KP500000000 * (T7t + T7u); + T7y = KP353553390 * (T7w + T7x); + T7z = T7v + T7y; + T7V = T7v - T7y; + { + E T7j, T7R, T7q, T7S, T7f, T7m; + T7f = KP707106781 * (T7b + T7e); + T7j = T7f + T7i; + T7R = T7i - T7f; + T7m = KP707106781 * (T7k + T7l); + T7q = T7m + T7p; + T7S = T7p - T7m; + T7r = FNMS(KP097545161, T7q, KP490392640 * T7j); + T81 = FMA(KP415734806, T7R, KP277785116 * T7S); + T7H = FMA(KP097545161, T7j, KP490392640 * T7q); + T7T = FNMS(KP415734806, T7S, KP277785116 * T7R); + } + { + E T70, T7O, T77, T7P, T6W, T73; + T6W = KP707106781 * (T6S + T6V); + T70 = T6W + T6Z; + T7O = T6Z - T6W; + T73 = KP707106781 * (T71 + T72); + T77 = T73 + T76; + T7P = T76 - T73; + T78 = FMA(KP490392640, T70, KP097545161 * T77); + T80 = FNMS(KP415734806, T7O, KP277785116 * T7P); + T7G = FNMS(KP097545161, T70, KP490392640 * T77); + T7Q = FMA(KP277785116, T7O, KP415734806 * T7P); + } + } + { + E T6P, T7s, T7J, T7K; + T6P = T6D + T6O; + T7s = T78 + T7r; + Ip[WS(rs, 1)] = T6P + T7s; + Im[WS(rs, 14)] = T7s - T6P; + T7J = T7z + T7C; + T7K = T7G + T7H; + Rm[WS(rs, 14)] = T7J - T7K; + Rp[WS(rs, 1)] = T7J + T7K; + } + { + E T7D, T7E, T7F, T7I; + T7D = T7z - T7C; + T7E = T7r - T78; + Rm[WS(rs, 6)] = T7D - T7E; + Rp[WS(rs, 9)] = T7D + T7E; + T7F = T6O - T6D; + T7I = T7G - T7H; + Ip[WS(rs, 9)] = T7F + T7I; + Im[WS(rs, 6)] = T7I - T7F; + } + { + E T7N, T7U, T83, T84; + T7N = T7L + T7M; + T7U = T7Q + T7T; + Ip[WS(rs, 5)] = T7N + T7U; + Im[WS(rs, 10)] = T7U - T7N; + T83 = T7V + T7W; + T84 = T80 + T81; + Rm[WS(rs, 10)] = T83 - T84; + Rp[WS(rs, 5)] = T83 + T84; + } + { + E T7X, T7Y, T7Z, T82; + T7X = T7V - T7W; + T7Y = T7T - T7Q; + Rm[WS(rs, 2)] = T7X - T7Y; + Rp[WS(rs, 13)] = T7X + T7Y; + T7Z = T7M - T7L; + T82 = T80 - T81; + Ip[WS(rs, 13)] = T7Z + T82; + Im[WS(rs, 2)] = T82 - T7Z; + } + } + { + E T8b, T8U, T8e, T8K, T8A, T8J, T8x, T8T, T8t, T8Z, T8F, T8R, T8m, T8Y, T8E; + E T8O; + { + E T87, T8a, T8v, T8w; + T87 = FNMS(KP461939766, T86, KP191341716 * T85); + T8a = FMA(KP191341716, T88, KP461939766 * T89); + T8b = T87 + T8a; + T8U = T87 - T8a; + { + E T8c, T8d, T8y, T8z; + T8c = KP353553390 * (T7x - T7w); + T8d = KP500000000 * (T6M + T6L); + T8e = T8c + T8d; + T8K = T8d - T8c; + T8y = FMA(KP461939766, T85, KP191341716 * T86); + T8z = FNMS(KP461939766, T88, KP191341716 * T89); + T8A = T8y + T8z; + T8J = T8z - T8y; + } + T8v = KP500000000 * (T7t - T7u); + T8w = KP353553390 * (T6G - T6J); + T8x = T8v + T8w; + T8T = T8v - T8w; + { + E T8p, T8P, T8s, T8Q, T8n, T8q; + T8n = KP707106781 * (T7l - T7k); + T8p = T8n + T8o; + T8P = T8o - T8n; + T8q = KP707106781 * (T7b - T7e); + T8s = T8q + T8r; + T8Q = T8r - T8q; + T8t = FNMS(KP277785116, T8s, KP415734806 * T8p); + T8Z = FMA(KP490392640, T8P, KP097545161 * T8Q); + T8F = FMA(KP277785116, T8p, KP415734806 * T8s); + T8R = FNMS(KP490392640, T8Q, KP097545161 * T8P); + } + { + E T8i, T8M, T8l, T8N, T8g, T8j; + T8g = KP707106781 * (T72 - T71); + T8i = T8g + T8h; + T8M = T8h - T8g; + T8j = KP707106781 * (T6S - T6V); + T8l = T8j + T8k; + T8N = T8k - T8j; + T8m = FMA(KP415734806, T8i, KP277785116 * T8l); + T8Y = FNMS(KP490392640, T8M, KP097545161 * T8N); + T8E = FNMS(KP277785116, T8i, KP415734806 * T8l); + T8O = FMA(KP097545161, T8M, KP490392640 * T8N); + } + } + { + E T8f, T8u, T8H, T8I; + T8f = T8b + T8e; + T8u = T8m + T8t; + Ip[WS(rs, 3)] = T8f + T8u; + Im[WS(rs, 12)] = T8u - T8f; + T8H = T8x + T8A; + T8I = T8E + T8F; + Rm[WS(rs, 12)] = T8H - T8I; + Rp[WS(rs, 3)] = T8H + T8I; + } + { + E T8B, T8C, T8D, T8G; + T8B = T8x - T8A; + T8C = T8t - T8m; + Rm[WS(rs, 4)] = T8B - T8C; + Rp[WS(rs, 11)] = T8B + T8C; + T8D = T8e - T8b; + T8G = T8E - T8F; + Ip[WS(rs, 11)] = T8D + T8G; + Im[WS(rs, 4)] = T8G - T8D; + } + { + E T8L, T8S, T91, T92; + T8L = T8J + T8K; + T8S = T8O + T8R; + Ip[WS(rs, 7)] = T8L + T8S; + Im[WS(rs, 8)] = T8S - T8L; + T91 = T8T + T8U; + T92 = T8Y + T8Z; + Rm[WS(rs, 8)] = T91 - T92; + Rp[WS(rs, 7)] = T91 + T92; + } + { + E T8V, T8W, T8X, T90; + T8V = T8T - T8U; + T8W = T8R - T8O; + Rm[0] = T8V - T8W; + Rp[WS(rs, 15)] = T8V + T8W; + T8X = T8K - T8J; + T90 = T8Y - T8Z; + Ip[WS(rs, 15)] = T8X + T90; + Im[0] = T90 - T8X; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 32}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 32, "hc2cfdft_32", twinstr, &GENUS, {404, 134, 94, 0} }; + +void X(codelet_hc2cfdft_32) (planner *p) { + X(khc2c_register) (p, hc2cfdft_32, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:27 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 4 -dit -name hc2cfdft_4 -include hc2cf.h */ + +/* + * This function contains 30 FP additions, 20 FP multiplications, + * (or, 24 additions, 14 multiplications, 6 fused multiply/add), + * 32 stack variables, 1 constants, and 16 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { + E Td, Tu, Tr, T4, Tm, To, T9, T5, TA, Tp, Tv, TD, T6, Tq; + { + E Tk, Tl, Tf, TC, Tj, T7, T8, T1, Tn, Tb, Tc; + Tb = Ip[0]; + Tc = Im[0]; + { + E Ti, Tg, Th, T2, T3; + Tg = Rm[0]; + Th = Rp[0]; + Tk = W[1]; + Tl = Tb + Tc; + Td = Tb - Tc; + Tu = Th + Tg; + Ti = Tg - Th; + Tf = W[0]; + T2 = Ip[WS(rs, 1)]; + T3 = Im[WS(rs, 1)]; + TC = Tk * Ti; + Tj = Tf * Ti; + T7 = Rp[WS(rs, 1)]; + Tr = T2 + T3; + T4 = T2 - T3; + T8 = Rm[WS(rs, 1)]; + T1 = W[2]; + Tn = W[4]; + } + Tm = FNMS(Tk, Tl, Tj); + To = T7 - T8; + T9 = T7 + T8; + T5 = T1 * T4; + TA = Tn * Tr; + Tp = Tn * To; + Tv = T1 * T9; + TD = FMA(Tf, Tl, TC); + T6 = W[3]; + Tq = W[5]; + } + { + E Tw, Ta, TB, Ts; + Tw = FMA(T6, T4, Tv); + Ta = FNMS(T6, T9, T5); + TB = FNMS(Tq, To, TA); + Ts = FMA(Tq, Tr, Tp); + { + E TF, Tx, Te, Tz; + TF = Tu + Tw; + Tx = Tu - Tw; + Te = Ta + Td; + Tz = Td - Ta; + { + E TG, TE, Tt, Ty; + TG = TB + TD; + TE = TB - TD; + Tt = Tm - Ts; + Ty = Ts + Tm; + Im[0] = KP500000000 * (TE - Tz); + Ip[WS(rs, 1)] = KP500000000 * (Tz + TE); + Rp[0] = KP500000000 * (TF + TG); + Rm[WS(rs, 1)] = KP500000000 * (TF - TG); + Rp[WS(rs, 1)] = KP500000000 * (Tx + Ty); + Rm[0] = KP500000000 * (Tx - Ty); + Im[WS(rs, 1)] = KP500000000 * (Tt - Te); + Ip[0] = KP500000000 * (Te + Tt); + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 4}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 4, "hc2cfdft_4", twinstr, &GENUS, {24, 14, 6, 0} }; + +void X(codelet_hc2cfdft_4) (planner *p) { + X(khc2c_register) (p, hc2cfdft_4, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -n 4 -dit -name hc2cfdft_4 -include hc2cf.h */ + +/* + * This function contains 30 FP additions, 20 FP multiplications, + * (or, 24 additions, 14 multiplications, 6 fused multiply/add), + * 18 stack variables, 1 constants, and 16 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { + E Tc, Tr, Tk, Tx, T9, Ts, Tp, Tw; + { + E Ta, Tb, Tj, Tf, Tg, Th, Te, Ti; + Ta = Ip[0]; + Tb = Im[0]; + Tj = Ta + Tb; + Tf = Rm[0]; + Tg = Rp[0]; + Th = Tf - Tg; + Tc = Ta - Tb; + Tr = Tg + Tf; + Te = W[0]; + Ti = W[1]; + Tk = FNMS(Ti, Tj, Te * Th); + Tx = FMA(Ti, Th, Te * Tj); + } + { + E T4, To, T8, Tm; + { + E T2, T3, T6, T7; + T2 = Ip[WS(rs, 1)]; + T3 = Im[WS(rs, 1)]; + T4 = T2 - T3; + To = T2 + T3; + T6 = Rp[WS(rs, 1)]; + T7 = Rm[WS(rs, 1)]; + T8 = T6 + T7; + Tm = T6 - T7; + } + { + E T1, T5, Tl, Tn; + T1 = W[2]; + T5 = W[3]; + T9 = FNMS(T5, T8, T1 * T4); + Ts = FMA(T1, T8, T5 * T4); + Tl = W[4]; + Tn = W[5]; + Tp = FMA(Tl, Tm, Tn * To); + Tw = FNMS(Tn, Tm, Tl * To); + } + } + { + E Td, Tq, Tz, TA; + Td = T9 + Tc; + Tq = Tk - Tp; + Ip[0] = KP500000000 * (Td + Tq); + Im[WS(rs, 1)] = KP500000000 * (Tq - Td); + Tz = Tr + Ts; + TA = Tw + Tx; + Rm[WS(rs, 1)] = KP500000000 * (Tz - TA); + Rp[0] = KP500000000 * (Tz + TA); + } + { + E Tt, Tu, Tv, Ty; + Tt = Tr - Ts; + Tu = Tp + Tk; + Rm[0] = KP500000000 * (Tt - Tu); + Rp[WS(rs, 1)] = KP500000000 * (Tt + Tu); + Tv = Tc - T9; + Ty = Tw - Tx; + Ip[WS(rs, 1)] = KP500000000 * (Tv + Ty); + Im[0] = KP500000000 * (Ty - Tv); + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 4}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 4, "hc2cfdft_4", twinstr, &GENUS, {24, 14, 6, 0} }; + +void X(codelet_hc2cfdft_4) (planner *p) { + X(khc2c_register) (p, hc2cfdft_4, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:27 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 6 -dit -name hc2cfdft_6 -include hc2cf.h */ + +/* + * This function contains 58 FP additions, 44 FP multiplications, + * (or, 36 additions, 22 multiplications, 22 fused multiply/add), + * 42 stack variables, 2 constants, and 24 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 10, MAKE_VOLATILE_STRIDE(24, rs)) { + E TP, TT, TN, TM, TY, T13; + { + E T3, TQ, TJ, T12, Tu, TB, TX, T10, Tj, Tf, Ti, Td, Th, TU, TS; + { + E TC, TI, TF, TH, TA, Tw, TZ; + { + E T1, T2, TD, TE; + T1 = Ip[0]; + T2 = Im[0]; + TD = Rm[0]; + TE = Rp[0]; + TC = W[0]; + T3 = T1 - T2; + TI = T1 + T2; + TQ = TE + TD; + TF = TD - TE; + TH = W[1]; + } + { + E Tr, To, Ts, Tl, Tq; + { + E Tm, Tn, TG, T11; + Tm = Rm[WS(rs, 2)]; + Tn = Rp[WS(rs, 2)]; + TG = TC * TF; + T11 = TH * TF; + Tr = Ip[WS(rs, 2)]; + TA = Tn + Tm; + To = Tm - Tn; + TJ = FNMS(TH, TI, TG); + T12 = FMA(TC, TI, T11); + Ts = Im[WS(rs, 2)]; + } + Tl = W[8]; + Tq = W[9]; + { + E Tz, Ty, TW, Tx, Tt, Tp; + Tw = W[6]; + Tx = Tr - Ts; + Tt = Tr + Ts; + Tp = Tl * To; + Tz = W[7]; + Ty = Tw * Tx; + TW = Tl * Tt; + Tu = FNMS(Tq, Tt, Tp); + TZ = Tz * Tx; + TB = FNMS(Tz, TA, Ty); + TX = FMA(Tq, To, TW); + } + } + { + E T5, T6, Ta, Tb; + T5 = Ip[WS(rs, 1)]; + T10 = FMA(Tw, TA, TZ); + T6 = Im[WS(rs, 1)]; + Ta = Rp[WS(rs, 1)]; + Tb = Rm[WS(rs, 1)]; + { + E T4, Tg, T7, Tc, T9, T8, TR; + T4 = W[5]; + Tg = T5 - T6; + T7 = T5 + T6; + Tj = Ta + Tb; + Tc = Ta - Tb; + T9 = W[4]; + T8 = T4 * T7; + Tf = W[2]; + Ti = W[3]; + TR = T9 * T7; + Td = FMA(T9, Tc, T8); + Th = Tf * Tg; + TU = Ti * Tg; + TS = FNMS(T4, Tc, TR); + } + } + } + { + E Te, T1d, TK, Tv, T1a, T1b, Tk, TV; + TP = Td + T3; + Te = T3 - Td; + Tk = FNMS(Ti, Tj, Th); + TV = FMA(Tf, Tj, TU); + T1d = TQ + TS; + TT = TQ - TS; + TN = TJ - TB; + TK = TB + TJ; + Tv = Tk + Tu; + TM = Tu - Tk; + TY = TV - TX; + T1a = TV + TX; + T1b = T10 + T12; + T13 = T10 - T12; + { + E T1g, TL, T1e, T1c, T19, T1f; + T1g = Tv - TK; + TL = Tv + TK; + T1e = T1a + T1b; + T1c = T1a - T1b; + T19 = FNMS(KP500000000, TL, Te); + Ip[0] = KP500000000 * (Te + TL); + T1f = FNMS(KP500000000, T1e, T1d); + Rp[0] = KP500000000 * (T1d + T1e); + Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP866025403, T1c, T19))); + Ip[WS(rs, 2)] = KP500000000 * (FMA(KP866025403, T1c, T19)); + Rm[WS(rs, 1)] = KP500000000 * (FMA(KP866025403, T1g, T1f)); + Rp[WS(rs, 2)] = KP500000000 * (FNMS(KP866025403, T1g, T1f)); + } + } + } + { + E TO, T16, T14, T18, T17, T15; + TO = TM + TN; + T16 = TN - TM; + T14 = TY + T13; + T18 = T13 - TY; + T17 = FMA(KP500000000, TO, TP); + Im[WS(rs, 2)] = KP500000000 * (TO - TP); + T15 = FNMS(KP500000000, T14, TT); + Rm[WS(rs, 2)] = KP500000000 * (TT + T14); + Im[0] = -(KP500000000 * (FNMS(KP866025403, T18, T17))); + Ip[WS(rs, 1)] = KP500000000 * (FMA(KP866025403, T18, T17)); + Rm[0] = KP500000000 * (FNMS(KP866025403, T16, T15)); + Rp[WS(rs, 1)] = KP500000000 * (FMA(KP866025403, T16, T15)); + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 6}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 6, "hc2cfdft_6", twinstr, &GENUS, {36, 22, 22, 0} }; + +void X(codelet_hc2cfdft_6) (planner *p) { + X(khc2c_register) (p, hc2cfdft_6, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -n 6 -dit -name hc2cfdft_6 -include hc2cf.h */ + +/* + * This function contains 58 FP additions, 36 FP multiplications, + * (or, 44 additions, 22 multiplications, 14 fused multiply/add), + * 40 stack variables, 3 constants, and 24 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP433012701, +0.433012701892219323381861585376468091735701313); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 10, MAKE_VOLATILE_STRIDE(24, rs)) { + E T3, TM, Tc, TN, Ts, T10, TI, TR, TF, T11, TH, TU; + { + E T1, T2, TD, Tz, TA, TB, T7, Tf, Tb, Th, Tq, Tw, Tm, Tu, T4; + E T8; + { + E T5, T6, T9, Ta; + T1 = Ip[0]; + T2 = Im[0]; + TD = T1 + T2; + Tz = Rm[0]; + TA = Rp[0]; + TB = Tz - TA; + T5 = Ip[WS(rs, 1)]; + T6 = Im[WS(rs, 1)]; + T7 = T5 + T6; + Tf = T5 - T6; + T9 = Rp[WS(rs, 1)]; + Ta = Rm[WS(rs, 1)]; + Tb = T9 - Ta; + Th = T9 + Ta; + { + E To, Tp, Tk, Tl; + To = Rp[WS(rs, 2)]; + Tp = Rm[WS(rs, 2)]; + Tq = To - Tp; + Tw = To + Tp; + Tk = Ip[WS(rs, 2)]; + Tl = Im[WS(rs, 2)]; + Tm = Tk + Tl; + Tu = Tk - Tl; + } + } + T3 = T1 - T2; + TM = TA + Tz; + T4 = W[5]; + T8 = W[4]; + Tc = FMA(T4, T7, T8 * Tb); + TN = FNMS(T4, Tb, T8 * T7); + { + E Ti, TP, Tr, TQ; + { + E Te, Tg, Tj, Tn; + Te = W[2]; + Tg = W[3]; + Ti = FNMS(Tg, Th, Te * Tf); + TP = FMA(Tg, Tf, Te * Th); + Tj = W[9]; + Tn = W[8]; + Tr = FMA(Tj, Tm, Tn * Tq); + TQ = FNMS(Tj, Tq, Tn * Tm); + } + Ts = Ti - Tr; + T10 = TP + TQ; + TI = Ti + Tr; + TR = TP - TQ; + } + { + E Tx, TS, TE, TT; + { + E Tt, Tv, Ty, TC; + Tt = W[6]; + Tv = W[7]; + Tx = FNMS(Tv, Tw, Tt * Tu); + TS = FMA(Tv, Tu, Tt * Tw); + Ty = W[0]; + TC = W[1]; + TE = FNMS(TC, TD, Ty * TB); + TT = FMA(TC, TB, Ty * TD); + } + TF = Tx + TE; + T11 = TS + TT; + TH = TE - Tx; + TU = TS - TT; + } + } + { + E T12, Td, TG, TZ; + T12 = KP433012701 * (T10 - T11); + Td = T3 - Tc; + TG = Ts + TF; + TZ = FNMS(KP250000000, TG, KP500000000 * Td); + Ip[0] = KP500000000 * (Td + TG); + Im[WS(rs, 1)] = T12 - TZ; + Ip[WS(rs, 2)] = TZ + T12; + } + { + E T16, T13, T14, T15; + T16 = KP433012701 * (Ts - TF); + T13 = TM + TN; + T14 = T10 + T11; + T15 = FNMS(KP250000000, T14, KP500000000 * T13); + Rp[WS(rs, 2)] = T15 - T16; + Rp[0] = KP500000000 * (T13 + T14); + Rm[WS(rs, 1)] = T16 + T15; + } + { + E TY, TJ, TK, TX; + TY = KP433012701 * (TU - TR); + TJ = TH - TI; + TK = Tc + T3; + TX = FMA(KP500000000, TK, KP250000000 * TJ); + Im[WS(rs, 2)] = KP500000000 * (TJ - TK); + Im[0] = TY - TX; + Ip[WS(rs, 1)] = TX + TY; + } + { + E TL, TO, TV, TW; + TL = KP433012701 * (TI + TH); + TO = TM - TN; + TV = TR + TU; + TW = FNMS(KP250000000, TV, KP500000000 * TO); + Rp[WS(rs, 1)] = TL + TW; + Rm[WS(rs, 2)] = KP500000000 * (TO + TV); + Rm[0] = TW - TL; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 6}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 6, "hc2cfdft_6", twinstr, &GENUS, {44, 22, 14, 0} }; + +void X(codelet_hc2cfdft_6) (planner *p) { + X(khc2c_register) (p, hc2cfdft_6, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hc2cfdft_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:27 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 8 -dit -name hc2cfdft_8 -include hc2cf.h */ + +/* + * This function contains 82 FP additions, 52 FP multiplications, + * (or, 60 additions, 30 multiplications, 22 fused multiply/add), + * 55 stack variables, 2 constants, and 32 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(32, rs)) { + E T1A, T1w, T1z, T1x, T1H, T1v, T1L, T1F; + { + E Ty, T14, TO, T1o, Tv, TG, T16, T1m, Ta, T19, T1h, TV, T10, TX, TZ; + E Tk, T1i, TY, T1b, TF, TB, T1l; + { + E TH, TN, TK, TM; + { + E Tw, Tx, TI, TJ; + Tw = Ip[0]; + Tx = Im[0]; + TI = Rm[0]; + TJ = Rp[0]; + TH = W[0]; + Ty = Tw - Tx; + TN = Tw + Tx; + T14 = TJ + TI; + TK = TI - TJ; + TM = W[1]; + } + { + E Ts, Tp, Tt, Tm, Tr; + { + E Tn, To, TL, T1n; + Tn = Ip[WS(rs, 2)]; + To = Im[WS(rs, 2)]; + TL = TH * TK; + T1n = TM * TK; + Ts = Rp[WS(rs, 2)]; + TF = Tn + To; + Tp = Tn - To; + TO = FNMS(TM, TN, TL); + T1o = FMA(TH, TN, T1n); + Tt = Rm[WS(rs, 2)]; + } + Tm = W[6]; + Tr = W[7]; + { + E TE, TD, T15, TC, Tu, Tq; + TB = W[8]; + TC = Tt - Ts; + Tu = Ts + Tt; + Tq = Tm * Tp; + TE = W[9]; + TD = TB * TC; + T15 = Tm * Tu; + Tv = FNMS(Tr, Tu, Tq); + T1l = TE * TC; + TG = FNMS(TE, TF, TD); + T16 = FMA(Tr, Tp, T15); + } + } + } + { + E TU, TR, TT, T1g, TS; + { + E T2, T3, T7, T8; + T2 = Ip[WS(rs, 1)]; + T1m = FMA(TB, TF, T1l); + T3 = Im[WS(rs, 1)]; + T7 = Rp[WS(rs, 1)]; + T8 = Rm[WS(rs, 1)]; + { + E T1, T4, T9, T6, T5, TQ, T18; + T1 = W[2]; + TU = T2 + T3; + T4 = T2 - T3; + TR = T7 - T8; + T9 = T7 + T8; + T6 = W[3]; + T5 = T1 * T4; + TQ = W[4]; + T18 = T1 * T9; + TT = W[5]; + Ta = FNMS(T6, T9, T5); + T1g = TQ * TU; + TS = TQ * TR; + T19 = FMA(T6, T4, T18); + } + } + { + E Tc, Td, Th, Ti; + Tc = Ip[WS(rs, 3)]; + T1h = FNMS(TT, TR, T1g); + TV = FMA(TT, TU, TS); + Td = Im[WS(rs, 3)]; + Th = Rp[WS(rs, 3)]; + Ti = Rm[WS(rs, 3)]; + { + E Tb, Te, Tj, Tg, Tf, TW, T1a; + Tb = W[10]; + T10 = Tc + Td; + Te = Tc - Td; + TX = Th - Ti; + Tj = Th + Ti; + Tg = W[11]; + Tf = Tb * Te; + TW = W[12]; + T1a = Tb * Tj; + TZ = W[13]; + Tk = FNMS(Tg, Tj, Tf); + T1i = TW * T10; + TY = TW * TX; + T1b = FMA(Tg, Te, T1a); + } + } + } + { + E T1E, T1t, TA, T1s, T1D, T1u, T1e, T13, T1r, T1d; + { + E TP, T1f, T1q, T12, T17, T1c; + { + E Tl, T11, Tz, T1p, T1k, T1j; + T1E = Ta - Tk; + Tl = Ta + Tk; + T1j = FNMS(TZ, TX, T1i); + T11 = FMA(TZ, T10, TY); + Tz = Tv + Ty; + T1t = Ty - Tv; + T1A = T1o - T1m; + T1p = T1m + T1o; + T1k = T1h + T1j; + T1w = T1j - T1h; + T1z = TO - TG; + TP = TG + TO; + T1f = Tz - Tl; + TA = Tl + Tz; + T1s = T1k + T1p; + T1q = T1k - T1p; + T12 = TV + T11; + T1x = TV - T11; + T1D = T14 - T16; + T17 = T14 + T16; + T1c = T19 + T1b; + T1u = T19 - T1b; + } + Im[WS(rs, 1)] = KP500000000 * (T1q - T1f); + T1e = T12 + TP; + T13 = TP - T12; + T1r = T17 + T1c; + T1d = T17 - T1c; + Ip[WS(rs, 2)] = KP500000000 * (T1f + T1q); + } + Im[WS(rs, 3)] = KP500000000 * (T13 - TA); + Ip[0] = KP500000000 * (TA + T13); + Rm[WS(rs, 3)] = KP500000000 * (T1r - T1s); + Rp[0] = KP500000000 * (T1r + T1s); + Rp[WS(rs, 2)] = KP500000000 * (T1d + T1e); + Rm[WS(rs, 1)] = KP500000000 * (T1d - T1e); + T1H = T1u + T1t; + T1v = T1t - T1u; + T1L = T1D + T1E; + T1F = T1D - T1E; + } + } + { + E T1y, T1I, T1B, T1J; + T1y = T1w + T1x; + T1I = T1w - T1x; + T1B = T1z - T1A; + T1J = T1z + T1A; + { + E T1M, T1K, T1C, T1G; + T1M = T1I + T1J; + T1K = T1I - T1J; + T1C = T1y + T1B; + T1G = T1B - T1y; + Im[0] = -(KP500000000 * (FNMS(KP707106781, T1K, T1H))); + Ip[WS(rs, 3)] = KP500000000 * (FMA(KP707106781, T1K, T1H)); + Rp[WS(rs, 1)] = KP500000000 * (FMA(KP707106781, T1M, T1L)); + Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP707106781, T1M, T1L)); + Rp[WS(rs, 3)] = KP500000000 * (FMA(KP707106781, T1G, T1F)); + Rm[0] = KP500000000 * (FNMS(KP707106781, T1G, T1F)); + Im[WS(rs, 2)] = -(KP500000000 * (FNMS(KP707106781, T1C, T1v))); + Ip[WS(rs, 1)] = KP500000000 * (FMA(KP707106781, T1C, T1v)); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 8}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 8, "hc2cfdft_8", twinstr, &GENUS, {60, 30, 22, 0} }; + +void X(codelet_hc2cfdft_8) (planner *p) { + X(khc2c_register) (p, hc2cfdft_8, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -n 8 -dit -name hc2cfdft_8 -include hc2cf.h */ + +/* + * This function contains 82 FP additions, 44 FP multiplications, + * (or, 68 additions, 30 multiplications, 14 fused multiply/add), + * 39 stack variables, 2 constants, and 32 memory accesses + */ +#include "hc2cf.h" + +static void hc2cfdft_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP353553390, +0.353553390593273762200422181052424519642417969); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(32, rs)) { + E Tv, TX, Ts, TY, TE, T1a, TJ, T19, T1l, T1m, T9, T10, Ti, T11, TP; + E T16, TU, T17, T1i, T1j; + { + E Tt, Tu, TD, Tz, TA, TB, Tn, TI, Tr, TG, Tk, To; + Tt = Ip[0]; + Tu = Im[0]; + TD = Tt + Tu; + Tz = Rm[0]; + TA = Rp[0]; + TB = Tz - TA; + { + E Tl, Tm, Tp, Tq; + Tl = Ip[WS(rs, 2)]; + Tm = Im[WS(rs, 2)]; + Tn = Tl - Tm; + TI = Tl + Tm; + Tp = Rp[WS(rs, 2)]; + Tq = Rm[WS(rs, 2)]; + Tr = Tp + Tq; + TG = Tp - Tq; + } + Tv = Tt - Tu; + TX = TA + Tz; + Tk = W[6]; + To = W[7]; + Ts = FNMS(To, Tr, Tk * Tn); + TY = FMA(Tk, Tr, To * Tn); + { + E Ty, TC, TF, TH; + Ty = W[0]; + TC = W[1]; + TE = FNMS(TC, TD, Ty * TB); + T1a = FMA(TC, TB, Ty * TD); + TF = W[8]; + TH = W[9]; + TJ = FMA(TF, TG, TH * TI); + T19 = FNMS(TH, TG, TF * TI); + } + T1l = TJ + TE; + T1m = T1a - T19; + } + { + E T4, TO, T8, TM, Td, TT, Th, TR; + { + E T2, T3, T6, T7; + T2 = Ip[WS(rs, 1)]; + T3 = Im[WS(rs, 1)]; + T4 = T2 - T3; + TO = T2 + T3; + T6 = Rp[WS(rs, 1)]; + T7 = Rm[WS(rs, 1)]; + T8 = T6 + T7; + TM = T6 - T7; + } + { + E Tb, Tc, Tf, Tg; + Tb = Ip[WS(rs, 3)]; + Tc = Im[WS(rs, 3)]; + Td = Tb - Tc; + TT = Tb + Tc; + Tf = Rp[WS(rs, 3)]; + Tg = Rm[WS(rs, 3)]; + Th = Tf + Tg; + TR = Tf - Tg; + } + { + E T1, T5, Ta, Te; + T1 = W[2]; + T5 = W[3]; + T9 = FNMS(T5, T8, T1 * T4); + T10 = FMA(T1, T8, T5 * T4); + Ta = W[10]; + Te = W[11]; + Ti = FNMS(Te, Th, Ta * Td); + T11 = FMA(Ta, Th, Te * Td); + { + E TL, TN, TQ, TS; + TL = W[4]; + TN = W[5]; + TP = FMA(TL, TM, TN * TO); + T16 = FNMS(TN, TM, TL * TO); + TQ = W[12]; + TS = W[13]; + TU = FMA(TQ, TR, TS * TT); + T17 = FNMS(TS, TR, TQ * TT); + } + T1i = T17 - T16; + T1j = TP - TU; + } + } + { + E T1h, T1t, T1w, T1y, T1o, T1s, T1r, T1x; + { + E T1f, T1g, T1u, T1v; + T1f = Tv - Ts; + T1g = T10 - T11; + T1h = KP500000000 * (T1f - T1g); + T1t = KP500000000 * (T1g + T1f); + T1u = T1i - T1j; + T1v = T1l + T1m; + T1w = KP353553390 * (T1u - T1v); + T1y = KP353553390 * (T1u + T1v); + } + { + E T1k, T1n, T1p, T1q; + T1k = T1i + T1j; + T1n = T1l - T1m; + T1o = KP353553390 * (T1k + T1n); + T1s = KP353553390 * (T1n - T1k); + T1p = TX - TY; + T1q = T9 - Ti; + T1r = KP500000000 * (T1p - T1q); + T1x = KP500000000 * (T1p + T1q); + } + Ip[WS(rs, 1)] = T1h + T1o; + Rp[WS(rs, 1)] = T1x + T1y; + Im[WS(rs, 2)] = T1o - T1h; + Rm[WS(rs, 2)] = T1x - T1y; + Rm[0] = T1r - T1s; + Im[0] = T1w - T1t; + Rp[WS(rs, 3)] = T1r + T1s; + Ip[WS(rs, 3)] = T1t + T1w; + } + { + E Tx, T15, T1c, T1e, TW, T14, T13, T1d; + { + E Tj, Tw, T18, T1b; + Tj = T9 + Ti; + Tw = Ts + Tv; + Tx = Tj + Tw; + T15 = Tw - Tj; + T18 = T16 + T17; + T1b = T19 + T1a; + T1c = T18 - T1b; + T1e = T18 + T1b; + } + { + E TK, TV, TZ, T12; + TK = TE - TJ; + TV = TP + TU; + TW = TK - TV; + T14 = TV + TK; + TZ = TX + TY; + T12 = T10 + T11; + T13 = TZ - T12; + T1d = TZ + T12; + } + Ip[0] = KP500000000 * (Tx + TW); + Rp[0] = KP500000000 * (T1d + T1e); + Im[WS(rs, 3)] = KP500000000 * (TW - Tx); + Rm[WS(rs, 3)] = KP500000000 * (T1d - T1e); + Rm[WS(rs, 1)] = KP500000000 * (T13 - T14); + Im[WS(rs, 1)] = KP500000000 * (T1c - T15); + Rp[WS(rs, 2)] = KP500000000 * (T13 + T14); + Ip[WS(rs, 2)] = KP500000000 * (T15 + T1c); + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 8}, + {TW_NEXT, 1, 0} +}; + +static const hc2c_desc desc = { 8, "hc2cfdft_8", twinstr, &GENUS, {68, 30, 14, 0} }; + +void X(codelet_hc2cfdft_8) (planner *p) { + X(khc2c_register) (p, hc2cfdft_8, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf2_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf2_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,824 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:12 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 16 -dit -name hf2_16 -include hf.h */ + +/* + * This function contains 196 FP additions, 134 FP multiplications, + * (or, 104 additions, 42 multiplications, 92 fused multiply/add), + * 106 stack variables, 3 constants, and 64 memory accesses + */ +#include "hf.h" + +static void hf2_16(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(32, rs)) { + E T35, T32; + { + E T2, Tf, TM, TO, T3, Tg, TN, TS, T4, Tp, T6, T5, Th; + T2 = W[0]; + Tf = W[2]; + TM = W[6]; + TO = W[7]; + T3 = W[4]; + Tg = T2 * Tf; + TN = T2 * TM; + TS = T2 * TO; + T4 = T2 * T3; + Tp = Tf * T3; + T6 = W[5]; + T5 = W[1]; + Th = W[3]; + { + E TZ, Te, T1U, T3A, T3M, T2w, T1G, T2I, T3h, T1R, T2D, T2B, T3i, Tx, T3L; + E T1Z, T3w, TL, T21, T26, T38, T1d, T2h, T2s, T3c, T1s, T2t, T2m, T3d, TX; + E T10, TV, T2a, TY, T2b; + { + E TF, TP, TT, Tq, TW, Tz, Tu, TI, TC, T1m, T1f, T1p, T1j, Tr, Ts; + E Tv, To, T1W; + { + E Ti, Tm, T1L, T1O, T1D, T1A, T1x, T2G, T1F, T2F; + { + E T1, T7, Tb, T3z, T8, T1z, T9, Tc; + { + E T1i, T1e, T1C, T1y, Tt, Ta, Tl; + T1 = cr[0]; + Tt = Tf * T6; + Ta = T2 * T6; + T7 = FMA(T5, T6, T4); + TF = FNMS(T5, T6, T4); + TP = FMA(T5, TO, TN); + TT = FNMS(T5, TM, TS); + Tq = FNMS(Th, T6, Tp); + TW = FMA(Th, T6, Tp); + Tz = FMA(T5, Th, Tg); + Ti = FNMS(T5, Th, Tg); + Tl = T2 * Th; + Tu = FMA(Th, T3, Tt); + TZ = FNMS(Th, T3, Tt); + TI = FMA(T5, T3, Ta); + Tb = FNMS(T5, T3, Ta); + T1i = Ti * T6; + T1e = Ti * T3; + T1C = Tz * T6; + T1y = Tz * T3; + Tm = FMA(T5, Tf, Tl); + TC = FNMS(T5, Tf, Tl); + T3z = ci[0]; + T8 = cr[WS(rs, 8)]; + T1m = FNMS(Tm, T6, T1e); + T1f = FMA(Tm, T6, T1e); + T1p = FMA(Tm, T3, T1i); + T1j = FNMS(Tm, T3, T1i); + T1L = FNMS(TC, T6, T1y); + T1z = FMA(TC, T6, T1y); + T1O = FMA(TC, T3, T1C); + T1D = FNMS(TC, T3, T1C); + T9 = T7 * T8; + Tc = ci[WS(rs, 8)]; + } + { + E T1u, T1w, T1v, T2E, T3y, T1B, T1E, Td, T3x; + T1u = cr[WS(rs, 15)]; + T1w = ci[WS(rs, 15)]; + T1A = cr[WS(rs, 7)]; + Td = FMA(Tb, Tc, T9); + T3x = T7 * Tc; + T1v = TM * T1u; + T2E = TM * T1w; + Te = T1 + Td; + T1U = T1 - Td; + T3y = FNMS(Tb, T8, T3x); + T1B = T1z * T1A; + T1E = ci[WS(rs, 7)]; + T1x = FMA(TO, T1w, T1v); + T3A = T3y + T3z; + T3M = T3z - T3y; + T2G = T1z * T1E; + T1F = FMA(T1D, T1E, T1B); + T2F = FNMS(TO, T1u, T2E); + } + } + { + E T1H, T1I, T1J, T1M, T1P, T2H; + T1H = cr[WS(rs, 3)]; + T2H = FNMS(T1D, T1A, T2G); + T2w = T1x - T1F; + T1G = T1x + T1F; + T1I = Tf * T1H; + T2I = T2F - T2H; + T3h = T2F + T2H; + T1J = ci[WS(rs, 3)]; + T1M = cr[WS(rs, 11)]; + T1P = ci[WS(rs, 11)]; + { + E Tj, Tk, Tn, T1V; + { + E T1K, T2y, T1Q, T2A, T2x, T1N, T2z; + Tj = cr[WS(rs, 4)]; + T1K = FMA(Th, T1J, T1I); + T2x = Tf * T1J; + T1N = T1L * T1M; + T2z = T1L * T1P; + Tk = Ti * Tj; + T2y = FNMS(Th, T1H, T2x); + T1Q = FMA(T1O, T1P, T1N); + T2A = FNMS(T1O, T1M, T2z); + Tn = ci[WS(rs, 4)]; + Tr = cr[WS(rs, 12)]; + T1R = T1K + T1Q; + T2D = T1Q - T1K; + T2B = T2y - T2A; + T3i = T2y + T2A; + T1V = Ti * Tn; + Ts = Tq * Tr; + Tv = ci[WS(rs, 12)]; + } + To = FMA(Tm, Tn, Tk); + T1W = FNMS(Tm, Tj, T1V); + } + } + } + { + E T19, T1b, T18, T2p, T1a, T2q; + { + E TE, T23, TK, T25; + { + E TA, TD, TB, T22, TG, TJ, TH, T24, T1Y, Tw, T1X; + TA = cr[WS(rs, 2)]; + Tw = FMA(Tu, Tv, Ts); + T1X = Tq * Tv; + TD = ci[WS(rs, 2)]; + TB = Tz * TA; + Tx = To + Tw; + T3L = To - Tw; + T1Y = FNMS(Tu, Tr, T1X); + T22 = Tz * TD; + TG = cr[WS(rs, 10)]; + TJ = ci[WS(rs, 10)]; + T1Z = T1W - T1Y; + T3w = T1W + T1Y; + TH = TF * TG; + T24 = TF * TJ; + TE = FMA(TC, TD, TB); + T23 = FNMS(TC, TA, T22); + TK = FMA(TI, TJ, TH); + T25 = FNMS(TI, TG, T24); + } + { + E T15, T17, T16, T2o; + T15 = cr[WS(rs, 1)]; + T17 = ci[WS(rs, 1)]; + TL = TE + TK; + T21 = TE - TK; + T26 = T23 - T25; + T38 = T23 + T25; + T16 = T2 * T15; + T2o = T2 * T17; + T19 = cr[WS(rs, 9)]; + T1b = ci[WS(rs, 9)]; + T18 = FMA(T5, T17, T16); + T2p = FNMS(T5, T15, T2o); + T1a = T3 * T19; + T2q = T3 * T1b; + } + } + { + E T1n, T1q, T1l, T2j, T1o, T2k; + { + E T1g, T1k, T1h, T2i, T1c, T2r; + T1g = cr[WS(rs, 5)]; + T1k = ci[WS(rs, 5)]; + T1c = FMA(T6, T1b, T1a); + T2r = FNMS(T6, T19, T2q); + T1h = T1f * T1g; + T2i = T1f * T1k; + T1d = T18 + T1c; + T2h = T18 - T1c; + T2s = T2p - T2r; + T3c = T2p + T2r; + T1n = cr[WS(rs, 13)]; + T1q = ci[WS(rs, 13)]; + T1l = FMA(T1j, T1k, T1h); + T2j = FNMS(T1j, T1g, T2i); + T1o = T1m * T1n; + T2k = T1m * T1q; + } + { + E TQ, TU, TR, T29, T1r, T2l; + TQ = cr[WS(rs, 14)]; + TU = ci[WS(rs, 14)]; + T1r = FMA(T1p, T1q, T1o); + T2l = FNMS(T1p, T1n, T2k); + TR = TP * TQ; + T29 = TP * TU; + T1s = T1l + T1r; + T2t = T1l - T1r; + T2m = T2j - T2l; + T3d = T2j + T2l; + TX = cr[WS(rs, 6)]; + T10 = ci[WS(rs, 6)]; + TV = FMA(TT, TU, TR); + T2a = FNMS(TT, TQ, T29); + TY = TW * TX; + T2b = TW * T10; + } + } + } + } + { + E T36, T3G, T3b, T3g, T28, T2d, T3F, T39, T3j, T3q, T3C, T3e, T3u, T3t; + { + E T3D, T1T, T3r, T14, T3E, T3s; + { + E Ty, T3B, T11, T2c, T13, T3v; + T36 = Te - Tx; + Ty = Te + Tx; + T3B = T3w + T3A; + T3G = T3A - T3w; + T11 = FMA(TZ, T10, TY); + T2c = FNMS(TZ, TX, T2b); + { + E T1t, T1S, T12, T37; + T3b = T1d - T1s; + T1t = T1d + T1s; + T1S = T1G + T1R; + T3g = T1G - T1R; + T12 = TV + T11; + T28 = TV - T11; + T2d = T2a - T2c; + T37 = T2a + T2c; + T3D = T1S - T1t; + T1T = T1t + T1S; + T13 = TL + T12; + T3F = TL - T12; + T39 = T37 - T38; + T3v = T38 + T37; + } + T3j = T3h - T3i; + T3r = T3h + T3i; + T3q = Ty - T13; + T14 = Ty + T13; + T3E = T3B - T3v; + T3C = T3v + T3B; + T3s = T3c + T3d; + T3e = T3c - T3d; + } + ci[WS(rs, 7)] = T14 - T1T; + cr[WS(rs, 12)] = T3D - T3E; + ci[WS(rs, 11)] = T3D + T3E; + T3u = T3s + T3r; + T3t = T3r - T3s; + cr[0] = T14 + T1T; + } + { + E T3m, T3a, T3J, T3H; + ci[WS(rs, 15)] = T3u + T3C; + cr[WS(rs, 8)] = T3u - T3C; + ci[WS(rs, 3)] = T3q + T3t; + cr[WS(rs, 4)] = T3q - T3t; + T3m = T36 + T39; + T3a = T36 - T39; + T3J = T3G - T3F; + T3H = T3F + T3G; + { + E T2Q, T20, T3N, T3T, T2C, T2J, T3U, T2f, T33, T30, T2V, T2W, T3O, T2T, T2N; + E T2v; + { + E T2R, T27, T2e, T2S; + { + E T3n, T3f, T3o, T3k; + T2Q = T1U + T1Z; + T20 = T1U - T1Z; + T3n = T3b - T3e; + T3f = T3b + T3e; + T3o = T3g + T3j; + T3k = T3g - T3j; + T3N = T3L + T3M; + T3T = T3M - T3L; + { + E T3p, T3K, T3I, T3l; + T3p = T3n + T3o; + T3K = T3o - T3n; + T3I = T3k - T3f; + T3l = T3f + T3k; + ci[WS(rs, 1)] = FMA(KP707106781, T3p, T3m); + cr[WS(rs, 6)] = FNMS(KP707106781, T3p, T3m); + ci[WS(rs, 13)] = FMA(KP707106781, T3K, T3J); + cr[WS(rs, 10)] = FMS(KP707106781, T3K, T3J); + ci[WS(rs, 9)] = FMA(KP707106781, T3I, T3H); + cr[WS(rs, 14)] = FMS(KP707106781, T3I, T3H); + cr[WS(rs, 2)] = FMA(KP707106781, T3l, T3a); + ci[WS(rs, 5)] = FNMS(KP707106781, T3l, T3a); + T2R = T21 + T26; + T27 = T21 - T26; + T2e = T28 + T2d; + T2S = T28 - T2d; + } + } + { + E T2Y, T2Z, T2n, T2u; + T2C = T2w - T2B; + T2Y = T2w + T2B; + T2Z = T2I + T2D; + T2J = T2D - T2I; + T3U = T2e - T27; + T2f = T27 + T2e; + T33 = FMA(KP414213562, T2Y, T2Z); + T30 = FNMS(KP414213562, T2Z, T2Y); + T2V = T2h + T2m; + T2n = T2h - T2m; + T2u = T2s + T2t; + T2W = T2s - T2t; + T3O = T2R - T2S; + T2T = T2R + T2S; + T2N = FMA(KP414213562, T2n, T2u); + T2v = FNMS(KP414213562, T2u, T2n); + } + } + { + E T2M, T3S, T31, T2P, T3Q, T3R, T3P, T2U; + { + E T2g, T2X, T2O, T2K, T3V, T3X, T3W, T34, T2L, T3Y; + T2M = FNMS(KP707106781, T2f, T20); + T2g = FMA(KP707106781, T2f, T20); + T34 = FNMS(KP414213562, T2V, T2W); + T2X = FMA(KP414213562, T2W, T2V); + T2O = FMA(KP414213562, T2C, T2J); + T2K = FNMS(KP414213562, T2J, T2C); + T3V = FMA(KP707106781, T3U, T3T); + T3X = FNMS(KP707106781, T3U, T3T); + T35 = T33 - T34; + T3W = T34 + T33; + T3S = T2K - T2v; + T2L = T2v + T2K; + T3Y = T30 - T2X; + T31 = T2X + T30; + ci[WS(rs, 14)] = FMA(KP923879532, T3W, T3V); + cr[WS(rs, 9)] = FMS(KP923879532, T3W, T3V); + ci[0] = FMA(KP923879532, T2L, T2g); + cr[WS(rs, 7)] = FNMS(KP923879532, T2L, T2g); + cr[WS(rs, 13)] = FMS(KP923879532, T3Y, T3X); + ci[WS(rs, 10)] = FMA(KP923879532, T3Y, T3X); + T2P = T2N + T2O; + T3Q = T2O - T2N; + } + T32 = FNMS(KP707106781, T2T, T2Q); + T2U = FMA(KP707106781, T2T, T2Q); + T3R = FNMS(KP707106781, T3O, T3N); + T3P = FMA(KP707106781, T3O, T3N); + cr[WS(rs, 3)] = FMA(KP923879532, T2P, T2M); + ci[WS(rs, 4)] = FNMS(KP923879532, T2P, T2M); + cr[WS(rs, 1)] = FMA(KP923879532, T31, T2U); + ci[WS(rs, 6)] = FNMS(KP923879532, T31, T2U); + ci[WS(rs, 8)] = FMA(KP923879532, T3Q, T3P); + cr[WS(rs, 15)] = FMS(KP923879532, T3Q, T3P); + ci[WS(rs, 12)] = FMA(KP923879532, T3S, T3R); + cr[WS(rs, 11)] = FMS(KP923879532, T3S, T3R); + } + } + } + } + } + } + ci[WS(rs, 2)] = FMA(KP923879532, T35, T32); + cr[WS(rs, 5)] = FNMS(KP923879532, T35, T32); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 15}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 16, "hf2_16", twinstr, &GENUS, {104, 42, 92, 0} }; + +void X(codelet_hf2_16) (planner *p) { + X(khc2hc_register) (p, hf2_16, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 16 -dit -name hf2_16 -include hf.h */ + +/* + * This function contains 196 FP additions, 108 FP multiplications, + * (or, 156 additions, 68 multiplications, 40 fused multiply/add), + * 82 stack variables, 3 constants, and 64 memory accesses + */ +#include "hf.h" + +static void hf2_16(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(32, rs)) { + E T2, T5, Tg, Ti, Tk, To, TE, TC, T6, T3, T8, TW, TJ, Tt, TU; + E Tc, Tx, TH, TN, TO, TP, TR, T1f, T1k, T1b, T1i, T1y, T1H, T1u, T1F; + { + E T7, Tv, Ta, Ts, T4, Tw, Tb, Tr; + { + E Th, Tn, Tj, Tm; + T2 = W[0]; + T5 = W[1]; + Tg = W[2]; + Ti = W[3]; + Th = T2 * Tg; + Tn = T5 * Tg; + Tj = T5 * Ti; + Tm = T2 * Ti; + Tk = Th - Tj; + To = Tm + Tn; + TE = Tm - Tn; + TC = Th + Tj; + T6 = W[5]; + T7 = T5 * T6; + Tv = Tg * T6; + Ta = T2 * T6; + Ts = Ti * T6; + T3 = W[4]; + T4 = T2 * T3; + Tw = Ti * T3; + Tb = T5 * T3; + Tr = Tg * T3; + } + T8 = T4 + T7; + TW = Tv - Tw; + TJ = Ta + Tb; + Tt = Tr - Ts; + TU = Tr + Ts; + Tc = Ta - Tb; + Tx = Tv + Tw; + TH = T4 - T7; + TN = W[6]; + TO = W[7]; + TP = FMA(T2, TN, T5 * TO); + TR = FNMS(T5, TN, T2 * TO); + { + E T1d, T1e, T19, T1a; + T1d = Tk * T6; + T1e = To * T3; + T1f = T1d - T1e; + T1k = T1d + T1e; + T19 = Tk * T3; + T1a = To * T6; + T1b = T19 + T1a; + T1i = T19 - T1a; + } + { + E T1w, T1x, T1s, T1t; + T1w = TC * T6; + T1x = TE * T3; + T1y = T1w - T1x; + T1H = T1w + T1x; + T1s = TC * T3; + T1t = TE * T6; + T1u = T1s + T1t; + T1F = T1s - T1t; + } + } + { + E Tf, T3s, T1N, T3e, TA, T3r, T1Q, T3b, TM, T2N, T1W, T2w, TZ, T2M, T21; + E T2x, T1B, T1K, T2V, T2W, T2X, T2Y, T2j, T2E, T2o, T2D, T18, T1n, T2Q, T2R; + E T2S, T2T, T28, T2B, T2d, T2A; + { + E T1, T3d, Te, T3c, T9, Td; + T1 = cr[0]; + T3d = ci[0]; + T9 = cr[WS(rs, 8)]; + Td = ci[WS(rs, 8)]; + Te = FMA(T8, T9, Tc * Td); + T3c = FNMS(Tc, T9, T8 * Td); + Tf = T1 + Te; + T3s = T3d - T3c; + T1N = T1 - Te; + T3e = T3c + T3d; + } + { + E Tq, T1O, Tz, T1P; + { + E Tl, Tp, Tu, Ty; + Tl = cr[WS(rs, 4)]; + Tp = ci[WS(rs, 4)]; + Tq = FMA(Tk, Tl, To * Tp); + T1O = FNMS(To, Tl, Tk * Tp); + Tu = cr[WS(rs, 12)]; + Ty = ci[WS(rs, 12)]; + Tz = FMA(Tt, Tu, Tx * Ty); + T1P = FNMS(Tx, Tu, Tt * Ty); + } + TA = Tq + Tz; + T3r = Tq - Tz; + T1Q = T1O - T1P; + T3b = T1O + T1P; + } + { + E TG, T1T, TL, T1U, T1S, T1V; + { + E TD, TF, TI, TK; + TD = cr[WS(rs, 2)]; + TF = ci[WS(rs, 2)]; + TG = FMA(TC, TD, TE * TF); + T1T = FNMS(TE, TD, TC * TF); + TI = cr[WS(rs, 10)]; + TK = ci[WS(rs, 10)]; + TL = FMA(TH, TI, TJ * TK); + T1U = FNMS(TJ, TI, TH * TK); + } + TM = TG + TL; + T2N = T1T + T1U; + T1S = TG - TL; + T1V = T1T - T1U; + T1W = T1S - T1V; + T2w = T1S + T1V; + } + { + E TT, T1Y, TY, T1Z, T1X, T20; + { + E TQ, TS, TV, TX; + TQ = cr[WS(rs, 14)]; + TS = ci[WS(rs, 14)]; + TT = FMA(TP, TQ, TR * TS); + T1Y = FNMS(TR, TQ, TP * TS); + TV = cr[WS(rs, 6)]; + TX = ci[WS(rs, 6)]; + TY = FMA(TU, TV, TW * TX); + T1Z = FNMS(TW, TV, TU * TX); + } + TZ = TT + TY; + T2M = T1Y + T1Z; + T1X = TT - TY; + T20 = T1Y - T1Z; + T21 = T1X + T20; + T2x = T1X - T20; + } + { + E T1r, T2f, T1J, T2m, T1A, T2g, T1E, T2l; + { + E T1p, T1q, T1G, T1I; + T1p = cr[WS(rs, 15)]; + T1q = ci[WS(rs, 15)]; + T1r = FMA(TN, T1p, TO * T1q); + T2f = FNMS(TO, T1p, TN * T1q); + T1G = cr[WS(rs, 11)]; + T1I = ci[WS(rs, 11)]; + T1J = FMA(T1F, T1G, T1H * T1I); + T2m = FNMS(T1H, T1G, T1F * T1I); + } + { + E T1v, T1z, T1C, T1D; + T1v = cr[WS(rs, 7)]; + T1z = ci[WS(rs, 7)]; + T1A = FMA(T1u, T1v, T1y * T1z); + T2g = FNMS(T1y, T1v, T1u * T1z); + T1C = cr[WS(rs, 3)]; + T1D = ci[WS(rs, 3)]; + T1E = FMA(Tg, T1C, Ti * T1D); + T2l = FNMS(Ti, T1C, Tg * T1D); + } + T1B = T1r + T1A; + T1K = T1E + T1J; + T2V = T1B - T1K; + T2W = T2f + T2g; + T2X = T2l + T2m; + T2Y = T2W - T2X; + { + E T2h, T2i, T2k, T2n; + T2h = T2f - T2g; + T2i = T1E - T1J; + T2j = T2h + T2i; + T2E = T2h - T2i; + T2k = T1r - T1A; + T2n = T2l - T2m; + T2o = T2k - T2n; + T2D = T2k + T2n; + } + } + { + E T14, T29, T1m, T26, T17, T2a, T1h, T25; + { + E T12, T13, T1j, T1l; + T12 = cr[WS(rs, 1)]; + T13 = ci[WS(rs, 1)]; + T14 = FMA(T2, T12, T5 * T13); + T29 = FNMS(T5, T12, T2 * T13); + T1j = cr[WS(rs, 13)]; + T1l = ci[WS(rs, 13)]; + T1m = FMA(T1i, T1j, T1k * T1l); + T26 = FNMS(T1k, T1j, T1i * T1l); + } + { + E T15, T16, T1c, T1g; + T15 = cr[WS(rs, 9)]; + T16 = ci[WS(rs, 9)]; + T17 = FMA(T3, T15, T6 * T16); + T2a = FNMS(T6, T15, T3 * T16); + T1c = cr[WS(rs, 5)]; + T1g = ci[WS(rs, 5)]; + T1h = FMA(T1b, T1c, T1f * T1g); + T25 = FNMS(T1f, T1c, T1b * T1g); + } + T18 = T14 + T17; + T1n = T1h + T1m; + T2Q = T18 - T1n; + T2R = T29 + T2a; + T2S = T25 + T26; + T2T = T2R - T2S; + { + E T24, T27, T2b, T2c; + T24 = T14 - T17; + T27 = T25 - T26; + T28 = T24 - T27; + T2B = T24 + T27; + T2b = T29 - T2a; + T2c = T1h - T1m; + T2d = T2b + T2c; + T2A = T2b - T2c; + } + } + { + E T23, T2r, T3u, T3w, T2q, T3v, T2u, T3p; + { + E T1R, T22, T3q, T3t; + T1R = T1N - T1Q; + T22 = KP707106781 * (T1W + T21); + T23 = T1R + T22; + T2r = T1R - T22; + T3q = KP707106781 * (T2w - T2x); + T3t = T3r + T3s; + T3u = T3q + T3t; + T3w = T3t - T3q; + } + { + E T2e, T2p, T2s, T2t; + T2e = FNMS(KP382683432, T2d, KP923879532 * T28); + T2p = FMA(KP382683432, T2j, KP923879532 * T2o); + T2q = T2e + T2p; + T3v = T2p - T2e; + T2s = FMA(KP923879532, T2d, KP382683432 * T28); + T2t = FNMS(KP923879532, T2j, KP382683432 * T2o); + T2u = T2s + T2t; + T3p = T2t - T2s; + } + cr[WS(rs, 7)] = T23 - T2q; + cr[WS(rs, 11)] = T3v - T3w; + ci[WS(rs, 12)] = T3v + T3w; + ci[0] = T23 + T2q; + ci[WS(rs, 4)] = T2r - T2u; + cr[WS(rs, 15)] = T3p - T3u; + ci[WS(rs, 8)] = T3p + T3u; + cr[WS(rs, 3)] = T2r + T2u; + } + { + E T11, T35, T3g, T3i, T1M, T3h, T38, T39; + { + E TB, T10, T3a, T3f; + TB = Tf + TA; + T10 = TM + TZ; + T11 = TB + T10; + T35 = TB - T10; + T3a = T2N + T2M; + T3f = T3b + T3e; + T3g = T3a + T3f; + T3i = T3f - T3a; + } + { + E T1o, T1L, T36, T37; + T1o = T18 + T1n; + T1L = T1B + T1K; + T1M = T1o + T1L; + T3h = T1L - T1o; + T36 = T2W + T2X; + T37 = T2R + T2S; + T38 = T36 - T37; + T39 = T37 + T36; + } + ci[WS(rs, 7)] = T11 - T1M; + cr[WS(rs, 12)] = T3h - T3i; + ci[WS(rs, 11)] = T3h + T3i; + cr[0] = T11 + T1M; + cr[WS(rs, 4)] = T35 - T38; + cr[WS(rs, 8)] = T39 - T3g; + ci[WS(rs, 15)] = T39 + T3g; + ci[WS(rs, 3)] = T35 + T38; + } + { + E T2z, T2H, T3A, T3C, T2G, T3B, T2K, T3x; + { + E T2v, T2y, T3y, T3z; + T2v = T1N + T1Q; + T2y = KP707106781 * (T2w + T2x); + T2z = T2v + T2y; + T2H = T2v - T2y; + T3y = KP707106781 * (T21 - T1W); + T3z = T3s - T3r; + T3A = T3y + T3z; + T3C = T3z - T3y; + } + { + E T2C, T2F, T2I, T2J; + T2C = FMA(KP382683432, T2A, KP923879532 * T2B); + T2F = FNMS(KP382683432, T2E, KP923879532 * T2D); + T2G = T2C + T2F; + T3B = T2F - T2C; + T2I = FNMS(KP923879532, T2A, KP382683432 * T2B); + T2J = FMA(KP923879532, T2E, KP382683432 * T2D); + T2K = T2I + T2J; + T3x = T2J - T2I; + } + ci[WS(rs, 6)] = T2z - T2G; + cr[WS(rs, 13)] = T3B - T3C; + ci[WS(rs, 10)] = T3B + T3C; + cr[WS(rs, 1)] = T2z + T2G; + cr[WS(rs, 5)] = T2H - T2K; + cr[WS(rs, 9)] = T3x - T3A; + ci[WS(rs, 14)] = T3x + T3A; + ci[WS(rs, 2)] = T2H + T2K; + } + { + E T2P, T31, T3m, T3o, T30, T3j, T34, T3n; + { + E T2L, T2O, T3k, T3l; + T2L = Tf - TA; + T2O = T2M - T2N; + T2P = T2L - T2O; + T31 = T2L + T2O; + T3k = TM - TZ; + T3l = T3e - T3b; + T3m = T3k + T3l; + T3o = T3l - T3k; + } + { + E T2U, T2Z, T32, T33; + T2U = T2Q + T2T; + T2Z = T2V - T2Y; + T30 = KP707106781 * (T2U + T2Z); + T3j = KP707106781 * (T2Z - T2U); + T32 = T2Q - T2T; + T33 = T2V + T2Y; + T34 = KP707106781 * (T32 + T33); + T3n = KP707106781 * (T33 - T32); + } + ci[WS(rs, 5)] = T2P - T30; + cr[WS(rs, 10)] = T3n - T3o; + ci[WS(rs, 13)] = T3n + T3o; + cr[WS(rs, 2)] = T2P + T30; + cr[WS(rs, 6)] = T31 - T34; + cr[WS(rs, 14)] = T3j - T3m; + ci[WS(rs, 9)] = T3j + T3m; + ci[WS(rs, 1)] = T31 + T34; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 15}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 16, "hf2_16", twinstr, &GENUS, {156, 68, 40, 0} }; + +void X(codelet_hf2_16) (planner *p) { + X(khc2hc_register) (p, hf2_16, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf2_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf2_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1062 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:14 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 20 -dit -name hf2_20 -include hf.h */ + +/* + * This function contains 276 FP additions, 198 FP multiplications, + * (or, 136 additions, 58 multiplications, 140 fused multiply/add), + * 146 stack variables, 4 constants, and 80 memory accesses + */ +#include "hf.h" + +static void hf2_20(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(40, rs)) { + E T5o, T5u, T5w, T5q, T5n, T5p, T5v, T5r; + { + E T2, Th, Tf, T6, T5, Tl, T1p, T1n, Ti, T3, Tt, Tv, T24, T1f, T1D; + E Tb, T1P, Tm, T21, T1b, T7, T1A, Tw, T1H, T13, TA, T1L, T17, T1S, Tq; + E T1o, T2g, T1t, T2c, TO, TK; + { + E T1e, Ta, Tk, Tg; + T2 = W[0]; + Th = W[3]; + Tf = W[2]; + T6 = W[5]; + T5 = W[1]; + Tk = T2 * Th; + Tg = T2 * Tf; + T1e = Tf * T6; + Ta = T2 * T6; + Tl = FMA(T5, Tf, Tk); + T1p = FNMS(T5, Tf, Tk); + T1n = FMA(T5, Th, Tg); + Ti = FNMS(T5, Th, Tg); + T3 = W[4]; + Tt = W[6]; + Tv = W[7]; + { + E Tp, Tj, TN, TJ; + Tp = Ti * T6; + T24 = FMA(Th, T3, T1e); + T1f = FNMS(Th, T3, T1e); + T1D = FNMS(T5, T3, Ta); + Tb = FMA(T5, T3, Ta); + Tj = Ti * T3; + { + E T1a, T4, Tu, T1G; + T1a = Tf * T3; + T4 = T2 * T3; + Tu = Ti * Tt; + T1G = T2 * Tt; + { + E T12, Tz, T1K, T16; + T12 = Tf * Tt; + Tz = Ti * Tv; + T1K = T2 * Tv; + T16 = Tf * Tv; + T1P = FNMS(Tl, T6, Tj); + Tm = FMA(Tl, T6, Tj); + T21 = FNMS(Th, T6, T1a); + T1b = FMA(Th, T6, T1a); + T7 = FNMS(T5, T6, T4); + T1A = FMA(T5, T6, T4); + Tw = FMA(Tl, Tv, Tu); + T1H = FMA(T5, Tv, T1G); + T13 = FMA(Th, Tv, T12); + TA = FNMS(Tl, Tt, Tz); + T1L = FNMS(T5, Tt, T1K); + T17 = FNMS(Th, Tt, T16); + T1S = FMA(Tl, T3, Tp); + Tq = FNMS(Tl, T3, Tp); + } + } + T1o = T1n * T3; + T2g = T1n * Tv; + TN = Tm * Tv; + TJ = Tm * Tt; + T1t = T1n * T6; + T2c = T1n * Tt; + TO = FNMS(Tq, Tt, TN); + TK = FMA(Tq, Tv, TJ); + } + } + { + E Te, T2C, T4K, T57, T58, TD, T2H, T4L, T3u, T3Z, T11, T2v, T2P, T3P, T4n; + E T4v, T3C, T43, T2r, T2z, T3b, T3T, T4d, T4z, T3J, T42, T20, T2y, T34, T3S; + E T4g, T4y, T1c, T19, T1d, T3j, T1w, T2U, T1g, T1j, T1l; + { + E T2d, T2h, T2k, T1q, T1u, T2n, TL, TI, TM, T3q, TZ, T2N, TP, TS, TU; + { + E T1, T4J, T8, T9, Tc; + T1 = cr[0]; + T4J = ci[0]; + T8 = cr[WS(rs, 10)]; + T2d = FMA(T1p, Tv, T2c); + T2h = FNMS(T1p, Tt, T2g); + T2k = FMA(T1p, T6, T1o); + T1q = FNMS(T1p, T6, T1o); + T1u = FMA(T1p, T3, T1t); + T2n = FNMS(T1p, T3, T1t); + T9 = T7 * T8; + Tc = ci[WS(rs, 10)]; + { + E Tx, Ts, T2F, TC, T2E; + { + E Tn, Tr, To, T2D, T4I, Ty, TB, Td, T4H; + Tn = cr[WS(rs, 5)]; + Tr = ci[WS(rs, 5)]; + Tx = cr[WS(rs, 15)]; + Td = FMA(Tb, Tc, T9); + T4H = T7 * Tc; + To = Tm * Tn; + T2D = Tm * Tr; + Te = T1 + Td; + T2C = T1 - Td; + T4I = FNMS(Tb, T8, T4H); + Ty = Tw * Tx; + TB = ci[WS(rs, 15)]; + Ts = FMA(Tq, Tr, To); + T4K = T4I + T4J; + T57 = T4J - T4I; + T2F = Tw * TB; + TC = FMA(TA, TB, Ty); + T2E = FNMS(Tq, Tn, T2D); + } + { + E TF, TG, TH, TW, TY, T2G, T3p, TX, T2M; + TF = cr[WS(rs, 4)]; + T2G = FNMS(TA, Tx, T2F); + T58 = Ts - TC; + TD = Ts + TC; + TG = Ti * TF; + T2H = T2E - T2G; + T4L = T2E + T2G; + TH = ci[WS(rs, 4)]; + TW = cr[WS(rs, 19)]; + TY = ci[WS(rs, 19)]; + TL = cr[WS(rs, 14)]; + TI = FMA(Tl, TH, TG); + T3p = Ti * TH; + TX = Tt * TW; + T2M = Tt * TY; + TM = TK * TL; + T3q = FNMS(Tl, TF, T3p); + TZ = FMA(Tv, TY, TX); + T2N = FNMS(Tv, TW, T2M); + TP = ci[WS(rs, 14)]; + TS = cr[WS(rs, 9)]; + TU = ci[WS(rs, 9)]; + } + } + } + { + E T27, T26, T28, T3y, T2p, T39, T29, T2e, T2i; + { + E T22, T23, T25, T2l, T2o, T3x, T2m, T38; + { + E TR, T2J, T3s, TV, T2L, T4m, T3t; + T22 = cr[WS(rs, 12)]; + { + E TQ, T3r, TT, T2K; + TQ = FMA(TO, TP, TM); + T3r = TK * TP; + TT = T3 * TS; + T2K = T3 * TU; + TR = TI + TQ; + T2J = TI - TQ; + T3s = FNMS(TO, TL, T3r); + TV = FMA(T6, TU, TT); + T2L = FNMS(T6, TS, T2K); + T23 = T21 * T22; + } + T4m = T3q + T3s; + T3t = T3q - T3s; + { + E T10, T3o, T4l, T2O; + T10 = TV + TZ; + T3o = TZ - TV; + T4l = T2L + T2N; + T2O = T2L - T2N; + T3u = T3o - T3t; + T3Z = T3t + T3o; + T11 = TR - T10; + T2v = TR + T10; + T2P = T2J - T2O; + T3P = T2J + T2O; + T4n = T4l - T4m; + T4v = T4m + T4l; + T25 = ci[WS(rs, 12)]; + } + } + T2l = cr[WS(rs, 7)]; + T2o = ci[WS(rs, 7)]; + T27 = cr[WS(rs, 2)]; + T26 = FMA(T24, T25, T23); + T3x = T21 * T25; + T2m = T2k * T2l; + T38 = T2k * T2o; + T28 = T1n * T27; + T3y = FNMS(T24, T22, T3x); + T2p = FMA(T2n, T2o, T2m); + T39 = FNMS(T2n, T2l, T38); + T29 = ci[WS(rs, 2)]; + T2e = cr[WS(rs, 17)]; + T2i = ci[WS(rs, 17)]; + } + { + E T1I, T1F, T1J, T3F, T1Y, T32, T1M, T1Q, T1T; + { + E T1B, T1C, T1E, T1V, T1X, T3E, T1W, T31; + { + E T2b, T35, T3A, T2j, T37, T4c, T3B; + T1B = cr[WS(rs, 8)]; + { + E T2a, T3z, T2f, T36; + T2a = FMA(T1p, T29, T28); + T3z = T1n * T29; + T2f = T2d * T2e; + T36 = T2d * T2i; + T2b = T26 + T2a; + T35 = T26 - T2a; + T3A = FNMS(T1p, T27, T3z); + T2j = FMA(T2h, T2i, T2f); + T37 = FNMS(T2h, T2e, T36); + T1C = T1A * T1B; + } + T4c = T3y + T3A; + T3B = T3y - T3A; + { + E T2q, T3w, T4b, T3a; + T2q = T2j + T2p; + T3w = T2p - T2j; + T4b = T37 + T39; + T3a = T37 - T39; + T3C = T3w - T3B; + T43 = T3B + T3w; + T2r = T2b - T2q; + T2z = T2b + T2q; + T3b = T35 - T3a; + T3T = T35 + T3a; + T4d = T4b - T4c; + T4z = T4c + T4b; + T1E = ci[WS(rs, 8)]; + } + } + T1V = cr[WS(rs, 3)]; + T1X = ci[WS(rs, 3)]; + T1I = cr[WS(rs, 18)]; + T1F = FMA(T1D, T1E, T1C); + T3E = T1A * T1E; + T1W = Tf * T1V; + T31 = Tf * T1X; + T1J = T1H * T1I; + T3F = FNMS(T1D, T1B, T3E); + T1Y = FMA(Th, T1X, T1W); + T32 = FNMS(Th, T1V, T31); + T1M = ci[WS(rs, 18)]; + T1Q = cr[WS(rs, 13)]; + T1T = ci[WS(rs, 13)]; + } + { + E T14, T15, T18, T1r, T1v, T3i, T1s, T2T; + { + E T1O, T2Y, T3H, T1U, T30, T4f, T3I; + T14 = cr[WS(rs, 16)]; + { + E T1N, T3G, T1R, T2Z; + T1N = FMA(T1L, T1M, T1J); + T3G = T1H * T1M; + T1R = T1P * T1Q; + T2Z = T1P * T1T; + T1O = T1F + T1N; + T2Y = T1F - T1N; + T3H = FNMS(T1L, T1I, T3G); + T1U = FMA(T1S, T1T, T1R); + T30 = FNMS(T1S, T1Q, T2Z); + T15 = T13 * T14; + } + T4f = T3F + T3H; + T3I = T3F - T3H; + { + E T1Z, T3D, T4e, T33; + T1Z = T1U + T1Y; + T3D = T1Y - T1U; + T4e = T30 + T32; + T33 = T30 - T32; + T3J = T3D - T3I; + T42 = T3I + T3D; + T20 = T1O - T1Z; + T2y = T1O + T1Z; + T34 = T2Y - T33; + T3S = T2Y + T33; + T4g = T4e - T4f; + T4y = T4f + T4e; + T18 = ci[WS(rs, 16)]; + } + } + T1r = cr[WS(rs, 11)]; + T1v = ci[WS(rs, 11)]; + T1c = cr[WS(rs, 6)]; + T19 = FMA(T17, T18, T15); + T3i = T13 * T18; + T1s = T1q * T1r; + T2T = T1q * T1v; + T1d = T1b * T1c; + T3j = FNMS(T17, T14, T3i); + T1w = FMA(T1u, T1v, T1s); + T2U = FNMS(T1u, T1r, T2T); + T1g = ci[WS(rs, 6)]; + T1j = cr[WS(rs, 1)]; + T1l = ci[WS(rs, 1)]; + } + } + } + } + { + E T4F, T4Q, T4R, T5a, T4E, T5b, T2I, T5h, T5g, T4W, T4X, T53, T52, T5l, T5m; + E T5s, T2X, T3N, T3L, T3c, T5t; + { + E T2u, T3n, T2w, T2W, T4w, T4r, T4p, T45, T47, T3O, T3R, T4a, T4q, T3U; + { + E T4h, TE, T40, T3Q, T4k, T1z, T2s, T49, T48; + { + E T1i, T2Q, T3l, T1m, T2S, T4j, T3m; + T4h = T4d - T4g; + T4F = T4g + T4d; + { + E T1h, T3k, T1k, T2R; + T1h = FMA(T1f, T1g, T1d); + T3k = T1b * T1g; + T1k = T2 * T1j; + T2R = T2 * T1l; + T1i = T19 + T1h; + T2Q = T19 - T1h; + T3l = FNMS(T1f, T1c, T3k); + T1m = FMA(T5, T1l, T1k); + T2S = FNMS(T5, T1j, T2R); + } + TE = Te - TD; + T2u = Te + TD; + T4j = T3j + T3l; + T3m = T3j - T3l; + { + E T1x, T3h, T4i, T2V, T1y; + T1x = T1m + T1w; + T3h = T1w - T1m; + T4i = T2S + T2U; + T2V = T2S - T2U; + T3n = T3h - T3m; + T40 = T3m + T3h; + T1y = T1i - T1x; + T2w = T1i + T1x; + T2W = T2Q - T2V; + T3Q = T2Q + T2V; + T4k = T4i - T4j; + T4w = T4j + T4i; + T4Q = T1y - T11; + T1z = T11 + T1y; + T2s = T20 + T2r; + T4R = T20 - T2r; + } + } + { + E T41, T4o, T44, T2t; + T5a = T3Z + T40; + T41 = T3Z - T40; + T4o = T4k - T4n; + T4E = T4n + T4k; + T5b = T42 + T43; + T44 = T42 - T43; + T49 = T1z - T2s; + T2t = T1z + T2s; + T4r = FMA(KP618033988, T4h, T4o); + T4p = FNMS(KP618033988, T4o, T4h); + T45 = FMA(KP618033988, T44, T41); + T47 = FNMS(KP618033988, T41, T44); + ci[WS(rs, 9)] = TE + T2t; + T48 = FNMS(KP250000000, T2t, TE); + } + T3O = T2C + T2H; + T2I = T2C - T2H; + T5h = T3P - T3Q; + T3R = T3P + T3Q; + T4a = FNMS(KP559016994, T49, T48); + T4q = FMA(KP559016994, T49, T48); + T3U = T3S + T3T; + T5g = T3S - T3T; + } + { + E T2x, T4B, T4D, T2A, T3Y, T46; + { + E T4x, T3X, T3V, T4A, T3W; + T4W = T4v + T4w; + T4x = T4v - T4w; + ci[WS(rs, 1)] = FMA(KP951056516, T4p, T4a); + cr[WS(rs, 2)] = FNMS(KP951056516, T4p, T4a); + cr[WS(rs, 6)] = FMA(KP951056516, T4r, T4q); + ci[WS(rs, 5)] = FNMS(KP951056516, T4r, T4q); + T3X = T3R - T3U; + T3V = T3R + T3U; + T4A = T4y - T4z; + T4X = T4y + T4z; + T2x = T2v + T2w; + T53 = T2v - T2w; + cr[WS(rs, 5)] = T3O + T3V; + T3W = FNMS(KP250000000, T3V, T3O); + T4B = FMA(KP618033988, T4A, T4x); + T4D = FNMS(KP618033988, T4x, T4A); + T52 = T2z - T2y; + T2A = T2y + T2z; + T3Y = FMA(KP559016994, T3X, T3W); + T46 = FNMS(KP559016994, T3X, T3W); + } + { + E T3v, T4t, T4s, T3K, T2B, T4u, T4C; + T3v = T3n - T3u; + T5l = T3u + T3n; + T2B = T2x + T2A; + T4t = T2x - T2A; + cr[WS(rs, 9)] = FNMS(KP951056516, T45, T3Y); + cr[WS(rs, 1)] = FMA(KP951056516, T45, T3Y); + ci[WS(rs, 6)] = FMA(KP951056516, T47, T46); + ci[WS(rs, 2)] = FNMS(KP951056516, T47, T46); + cr[0] = T2u + T2B; + T4s = FNMS(KP250000000, T2B, T2u); + T5m = T3J + T3C; + T3K = T3C - T3J; + T5s = T2P - T2W; + T2X = T2P + T2W; + T4u = FMA(KP559016994, T4t, T4s); + T4C = FNMS(KP559016994, T4t, T4s); + T3N = FNMS(KP618033988, T3v, T3K); + T3L = FMA(KP618033988, T3K, T3v); + ci[WS(rs, 3)] = FMA(KP951056516, T4B, T4u); + cr[WS(rs, 4)] = FNMS(KP951056516, T4B, T4u); + cr[WS(rs, 8)] = FMA(KP951056516, T4D, T4C); + ci[WS(rs, 7)] = FNMS(KP951056516, T4D, T4C); + T3c = T34 + T3b; + T5t = T34 - T3b; + } + } + } + { + E T4V, T5i, T5k, T59, T5e, T5c; + { + E T4M, T3f, T4U, T4S, T3e, T3d; + T4V = T4L + T4K; + T4M = T4K - T4L; + T3f = T2X - T3c; + T3d = T2X + T3c; + T4U = FMA(KP618033988, T4Q, T4R); + T4S = FNMS(KP618033988, T4R, T4Q); + ci[WS(rs, 4)] = T2I + T3d; + T3e = FNMS(KP250000000, T3d, T2I); + { + E T4O, T4N, T3g, T3M, T4G, T4T, T4P; + T3g = FMA(KP559016994, T3f, T3e); + T3M = FNMS(KP559016994, T3f, T3e); + T4O = T4F - T4E; + T4G = T4E + T4F; + ci[WS(rs, 8)] = FMA(KP951056516, T3L, T3g); + ci[0] = FNMS(KP951056516, T3L, T3g); + cr[WS(rs, 7)] = FNMS(KP951056516, T3N, T3M); + cr[WS(rs, 3)] = FMA(KP951056516, T3N, T3M); + cr[WS(rs, 10)] = T4G - T4M; + T4N = FMA(KP250000000, T4G, T4M); + T5i = FNMS(KP618033988, T5h, T5g); + T5k = FMA(KP618033988, T5g, T5h); + T59 = T57 - T58; + T5o = T58 + T57; + T4T = FNMS(KP559016994, T4O, T4N); + T4P = FMA(KP559016994, T4O, T4N); + ci[WS(rs, 13)] = FMA(KP951056516, T4S, T4P); + cr[WS(rs, 14)] = FMS(KP951056516, T4S, T4P); + ci[WS(rs, 17)] = FMA(KP951056516, T4U, T4T); + cr[WS(rs, 18)] = FMS(KP951056516, T4U, T4T); + T5e = T5a - T5b; + T5c = T5a + T5b; + } + } + { + E T56, T54, T4Y, T50, T5d, T5f, T5j, T4Z, T55, T51; + ci[WS(rs, 14)] = T5c + T59; + T5d = FNMS(KP250000000, T5c, T59); + T56 = FNMS(KP618033988, T52, T53); + T54 = FMA(KP618033988, T53, T52); + T5f = FNMS(KP559016994, T5e, T5d); + T5j = FMA(KP559016994, T5e, T5d); + cr[WS(rs, 17)] = -(FMA(KP951056516, T5i, T5f)); + cr[WS(rs, 13)] = FMS(KP951056516, T5i, T5f); + ci[WS(rs, 18)] = FNMS(KP951056516, T5k, T5j); + ci[WS(rs, 10)] = FMA(KP951056516, T5k, T5j); + T4Y = T4W + T4X; + T50 = T4W - T4X; + ci[WS(rs, 19)] = T4Y + T4V; + T4Z = FNMS(KP250000000, T4Y, T4V); + T5u = FMA(KP618033988, T5t, T5s); + T5w = FNMS(KP618033988, T5s, T5t); + T55 = FMA(KP559016994, T50, T4Z); + T51 = FNMS(KP559016994, T50, T4Z); + ci[WS(rs, 11)] = FMA(KP951056516, T54, T51); + cr[WS(rs, 12)] = FMS(KP951056516, T54, T51); + ci[WS(rs, 15)] = FMA(KP951056516, T56, T55); + cr[WS(rs, 16)] = FMS(KP951056516, T56, T55); + T5q = T5l - T5m; + T5n = T5l + T5m; + } + } + } + } + } + cr[WS(rs, 15)] = T5n - T5o; + T5p = FMA(KP250000000, T5n, T5o); + T5v = FMA(KP559016994, T5q, T5p); + T5r = FNMS(KP559016994, T5q, T5p); + cr[WS(rs, 19)] = -(FMA(KP951056516, T5u, T5r)); + cr[WS(rs, 11)] = FMS(KP951056516, T5u, T5r); + ci[WS(rs, 16)] = FNMS(KP951056516, T5w, T5v); + ci[WS(rs, 12)] = FMA(KP951056516, T5w, T5v); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 19}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 20, "hf2_20", twinstr, &GENUS, {136, 58, 140, 0} }; + +void X(codelet_hf2_20) (planner *p) { + X(khc2hc_register) (p, hf2_20, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 20 -dit -name hf2_20 -include hf.h */ + +/* + * This function contains 276 FP additions, 164 FP multiplications, + * (or, 204 additions, 92 multiplications, 72 fused multiply/add), + * 123 stack variables, 4 constants, and 80 memory accesses + */ +#include "hf.h" + +static void hf2_20(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(40, rs)) { + E T2, T5, Tg, Ti, Tk, To, T1h, T1f, T6, T3, T8, T14, T1Q, Tc, T1O; + E T1v, T18, T1t, T1n, T24, T1j, T22, Tq, Tu, T1E, T1G, Tx, Ty, Tz, TJ; + E T1Z, TB, T1X, T1A, TZ, TL, T1y, TX; + { + E T7, T16, Ta, T13, T4, T17, Tb, T12; + { + E Th, Tn, Tj, Tm; + T2 = W[0]; + T5 = W[1]; + Tg = W[2]; + Ti = W[3]; + Th = T2 * Tg; + Tn = T5 * Tg; + Tj = T5 * Ti; + Tm = T2 * Ti; + Tk = Th - Tj; + To = Tm + Tn; + T1h = Tm - Tn; + T1f = Th + Tj; + T6 = W[5]; + T7 = T5 * T6; + T16 = Tg * T6; + Ta = T2 * T6; + T13 = Ti * T6; + T3 = W[4]; + T4 = T2 * T3; + T17 = Ti * T3; + Tb = T5 * T3; + T12 = Tg * T3; + } + T8 = T4 - T7; + T14 = T12 + T13; + T1Q = T16 + T17; + Tc = Ta + Tb; + T1O = T12 - T13; + T1v = Ta - Tb; + T18 = T16 - T17; + T1t = T4 + T7; + { + E T1l, T1m, T1g, T1i; + T1l = T1f * T6; + T1m = T1h * T3; + T1n = T1l + T1m; + T24 = T1l - T1m; + T1g = T1f * T3; + T1i = T1h * T6; + T1j = T1g - T1i; + T22 = T1g + T1i; + { + E Tl, Tp, Ts, Tt; + Tl = Tk * T3; + Tp = To * T6; + Tq = Tl + Tp; + Ts = Tk * T6; + Tt = To * T3; + Tu = Ts - Tt; + T1E = Tl - Tp; + T1G = Ts + Tt; + Tx = W[6]; + Ty = W[7]; + Tz = FMA(Tk, Tx, To * Ty); + TJ = FMA(Tq, Tx, Tu * Ty); + T1Z = FNMS(T1h, Tx, T1f * Ty); + TB = FNMS(To, Tx, Tk * Ty); + T1X = FMA(T1f, Tx, T1h * Ty); + T1A = FNMS(T5, Tx, T2 * Ty); + TZ = FNMS(Ti, Tx, Tg * Ty); + TL = FNMS(Tu, Tx, Tq * Ty); + T1y = FMA(T2, Tx, T5 * Ty); + TX = FMA(Tg, Tx, Ti * Ty); + } + } + } + { + E TF, T2b, T4D, T4M, T2K, T3r, T4a, T4m, T1N, T28, T29, T3C, T3F, T43, T3X; + E T3Y, T4o, T2f, T2g, T2h, T2y, T2D, T2E, T3g, T3h, T4z, T3n, T3o, T3p, T33; + E T38, T4K, TW, T1r, T1s, T3J, T3M, T44, T3U, T3V, T4n, T2c, T2d, T2e, T2n; + E T2s, T2t, T3d, T3e, T4y, T3k, T3l, T3m, T2S, T2X, T4J; + { + E T1, T47, Te, T46, Tw, T2H, TD, T2I, T9, Td; + T1 = cr[0]; + T47 = ci[0]; + T9 = cr[WS(rs, 10)]; + Td = ci[WS(rs, 10)]; + Te = FMA(T8, T9, Tc * Td); + T46 = FNMS(Tc, T9, T8 * Td); + { + E Tr, Tv, TA, TC; + Tr = cr[WS(rs, 5)]; + Tv = ci[WS(rs, 5)]; + Tw = FMA(Tq, Tr, Tu * Tv); + T2H = FNMS(Tu, Tr, Tq * Tv); + TA = cr[WS(rs, 15)]; + TC = ci[WS(rs, 15)]; + TD = FMA(Tz, TA, TB * TC); + T2I = FNMS(TB, TA, Tz * TC); + } + { + E Tf, TE, T4B, T4C; + Tf = T1 + Te; + TE = Tw + TD; + TF = Tf - TE; + T2b = Tf + TE; + T4B = T47 - T46; + T4C = Tw - TD; + T4D = T4B - T4C; + T4M = T4C + T4B; + } + { + E T2G, T2J, T48, T49; + T2G = T1 - Te; + T2J = T2H - T2I; + T2K = T2G - T2J; + T3r = T2G + T2J; + T48 = T46 + T47; + T49 = T2H + T2I; + T4a = T48 - T49; + T4m = T49 + T48; + } + } + { + E T1D, T3A, T2u, T31, T27, T3D, T2C, T37, T1M, T3B, T2x, T32, T1W, T3E, T2z; + E T36; + { + E T1x, T2Z, T1C, T30; + { + E T1u, T1w, T1z, T1B; + T1u = cr[WS(rs, 8)]; + T1w = ci[WS(rs, 8)]; + T1x = FMA(T1t, T1u, T1v * T1w); + T2Z = FNMS(T1v, T1u, T1t * T1w); + T1z = cr[WS(rs, 18)]; + T1B = ci[WS(rs, 18)]; + T1C = FMA(T1y, T1z, T1A * T1B); + T30 = FNMS(T1A, T1z, T1y * T1B); + } + T1D = T1x + T1C; + T3A = T2Z + T30; + T2u = T1x - T1C; + T31 = T2Z - T30; + } + { + E T21, T2A, T26, T2B; + { + E T1Y, T20, T23, T25; + T1Y = cr[WS(rs, 17)]; + T20 = ci[WS(rs, 17)]; + T21 = FMA(T1X, T1Y, T1Z * T20); + T2A = FNMS(T1Z, T1Y, T1X * T20); + T23 = cr[WS(rs, 7)]; + T25 = ci[WS(rs, 7)]; + T26 = FMA(T22, T23, T24 * T25); + T2B = FNMS(T24, T23, T22 * T25); + } + T27 = T21 + T26; + T3D = T2A + T2B; + T2C = T2A - T2B; + T37 = T21 - T26; + } + { + E T1I, T2v, T1L, T2w; + { + E T1F, T1H, T1J, T1K; + T1F = cr[WS(rs, 13)]; + T1H = ci[WS(rs, 13)]; + T1I = FMA(T1E, T1F, T1G * T1H); + T2v = FNMS(T1G, T1F, T1E * T1H); + T1J = cr[WS(rs, 3)]; + T1K = ci[WS(rs, 3)]; + T1L = FMA(Tg, T1J, Ti * T1K); + T2w = FNMS(Ti, T1J, Tg * T1K); + } + T1M = T1I + T1L; + T3B = T2v + T2w; + T2x = T2v - T2w; + T32 = T1I - T1L; + } + { + E T1S, T34, T1V, T35; + { + E T1P, T1R, T1T, T1U; + T1P = cr[WS(rs, 12)]; + T1R = ci[WS(rs, 12)]; + T1S = FMA(T1O, T1P, T1Q * T1R); + T34 = FNMS(T1Q, T1P, T1O * T1R); + T1T = cr[WS(rs, 2)]; + T1U = ci[WS(rs, 2)]; + T1V = FMA(T1f, T1T, T1h * T1U); + T35 = FNMS(T1h, T1T, T1f * T1U); + } + T1W = T1S + T1V; + T3E = T34 + T35; + T2z = T1S - T1V; + T36 = T34 - T35; + } + T1N = T1D - T1M; + T28 = T1W - T27; + T29 = T1N + T28; + T3C = T3A - T3B; + T3F = T3D - T3E; + T43 = T3F - T3C; + T3X = T3A + T3B; + T3Y = T3E + T3D; + T4o = T3X + T3Y; + T2f = T1D + T1M; + T2g = T1W + T27; + T2h = T2f + T2g; + T2y = T2u - T2x; + T2D = T2z - T2C; + T2E = T2y + T2D; + T3g = T31 - T32; + T3h = T36 - T37; + T4z = T3g + T3h; + T3n = T2u + T2x; + T3o = T2z + T2C; + T3p = T3n + T3o; + T33 = T31 + T32; + T38 = T36 + T37; + T4K = T33 + T38; + } + { + E TO, T3H, T2j, T2Q, T1q, T3L, T2r, T2T, TV, T3I, T2m, T2R, T1b, T3K, T2o; + E T2W; + { + E TI, T2O, TN, T2P; + { + E TG, TH, TK, TM; + TG = cr[WS(rs, 4)]; + TH = ci[WS(rs, 4)]; + TI = FMA(Tk, TG, To * TH); + T2O = FNMS(To, TG, Tk * TH); + TK = cr[WS(rs, 14)]; + TM = ci[WS(rs, 14)]; + TN = FMA(TJ, TK, TL * TM); + T2P = FNMS(TL, TK, TJ * TM); + } + TO = TI + TN; + T3H = T2O + T2P; + T2j = TI - TN; + T2Q = T2O - T2P; + } + { + E T1e, T2p, T1p, T2q; + { + E T1c, T1d, T1k, T1o; + T1c = cr[WS(rs, 1)]; + T1d = ci[WS(rs, 1)]; + T1e = FMA(T2, T1c, T5 * T1d); + T2p = FNMS(T5, T1c, T2 * T1d); + T1k = cr[WS(rs, 11)]; + T1o = ci[WS(rs, 11)]; + T1p = FMA(T1j, T1k, T1n * T1o); + T2q = FNMS(T1n, T1k, T1j * T1o); + } + T1q = T1e + T1p; + T3L = T2p + T2q; + T2r = T2p - T2q; + T2T = T1p - T1e; + } + { + E TR, T2k, TU, T2l; + { + E TP, TQ, TS, TT; + TP = cr[WS(rs, 9)]; + TQ = ci[WS(rs, 9)]; + TR = FMA(T3, TP, T6 * TQ); + T2k = FNMS(T6, TP, T3 * TQ); + TS = cr[WS(rs, 19)]; + TT = ci[WS(rs, 19)]; + TU = FMA(Tx, TS, Ty * TT); + T2l = FNMS(Ty, TS, Tx * TT); + } + TV = TR + TU; + T3I = T2k + T2l; + T2m = T2k - T2l; + T2R = TR - TU; + } + { + E T11, T2U, T1a, T2V; + { + E TY, T10, T15, T19; + TY = cr[WS(rs, 16)]; + T10 = ci[WS(rs, 16)]; + T11 = FMA(TX, TY, TZ * T10); + T2U = FNMS(TZ, TY, TX * T10); + T15 = cr[WS(rs, 6)]; + T19 = ci[WS(rs, 6)]; + T1a = FMA(T14, T15, T18 * T19); + T2V = FNMS(T18, T15, T14 * T19); + } + T1b = T11 + T1a; + T3K = T2U + T2V; + T2o = T11 - T1a; + T2W = T2U - T2V; + } + TW = TO - TV; + T1r = T1b - T1q; + T1s = TW + T1r; + T3J = T3H - T3I; + T3M = T3K - T3L; + T44 = T3J + T3M; + T3U = T3H + T3I; + T3V = T3K + T3L; + T4n = T3U + T3V; + T2c = TO + TV; + T2d = T1b + T1q; + T2e = T2c + T2d; + T2n = T2j - T2m; + T2s = T2o - T2r; + T2t = T2n + T2s; + T3d = T2Q - T2R; + T3e = T2W + T2T; + T4y = T3d + T3e; + T3k = T2j + T2m; + T3l = T2o + T2r; + T3m = T3k + T3l; + T2S = T2Q + T2R; + T2X = T2T - T2W; + T4J = T2X - T2S; + } + { + E T3y, T2a, T3x, T3O, T3Q, T3G, T3N, T3P, T3z; + T3y = KP559016994 * (T1s - T29); + T2a = T1s + T29; + T3x = FNMS(KP250000000, T2a, TF); + T3G = T3C + T3F; + T3N = T3J - T3M; + T3O = FNMS(KP587785252, T3N, KP951056516 * T3G); + T3Q = FMA(KP951056516, T3N, KP587785252 * T3G); + ci[WS(rs, 9)] = TF + T2a; + T3P = T3y + T3x; + ci[WS(rs, 5)] = T3P - T3Q; + cr[WS(rs, 6)] = T3P + T3Q; + T3z = T3x - T3y; + cr[WS(rs, 2)] = T3z - T3O; + ci[WS(rs, 1)] = T3z + T3O; + } + { + E T3q, T3s, T3t, T3j, T3w, T3f, T3i, T3v, T3u; + T3q = KP559016994 * (T3m - T3p); + T3s = T3m + T3p; + T3t = FNMS(KP250000000, T3s, T3r); + T3f = T3d - T3e; + T3i = T3g - T3h; + T3j = FMA(KP951056516, T3f, KP587785252 * T3i); + T3w = FNMS(KP587785252, T3f, KP951056516 * T3i); + cr[WS(rs, 5)] = T3r + T3s; + T3v = T3t - T3q; + ci[WS(rs, 2)] = T3v - T3w; + ci[WS(rs, 6)] = T3w + T3v; + T3u = T3q + T3t; + cr[WS(rs, 1)] = T3j + T3u; + cr[WS(rs, 9)] = T3u - T3j; + } + { + E T3R, T2i, T3S, T40, T42, T3W, T3Z, T41, T3T; + T3R = KP559016994 * (T2e - T2h); + T2i = T2e + T2h; + T3S = FNMS(KP250000000, T2i, T2b); + T3W = T3U - T3V; + T3Z = T3X - T3Y; + T40 = FMA(KP951056516, T3W, KP587785252 * T3Z); + T42 = FNMS(KP587785252, T3W, KP951056516 * T3Z); + cr[0] = T2b + T2i; + T41 = T3S - T3R; + ci[WS(rs, 7)] = T41 - T42; + cr[WS(rs, 8)] = T41 + T42; + T3T = T3R + T3S; + cr[WS(rs, 4)] = T3T - T40; + ci[WS(rs, 3)] = T3T + T40; + } + { + E T2F, T2L, T2M, T3a, T3b, T2Y, T39, T3c, T2N; + T2F = KP559016994 * (T2t - T2E); + T2L = T2t + T2E; + T2M = FNMS(KP250000000, T2L, T2K); + T2Y = T2S + T2X; + T39 = T33 - T38; + T3a = FMA(KP951056516, T2Y, KP587785252 * T39); + T3b = FNMS(KP587785252, T2Y, KP951056516 * T39); + ci[WS(rs, 4)] = T2K + T2L; + T3c = T2M - T2F; + cr[WS(rs, 3)] = T3b + T3c; + cr[WS(rs, 7)] = T3c - T3b; + T2N = T2F + T2M; + ci[0] = T2N - T3a; + ci[WS(rs, 8)] = T3a + T2N; + } + { + E T4e, T45, T4f, T4d, T4h, T4b, T4c, T4i, T4g; + T4e = KP559016994 * (T44 + T43); + T45 = T43 - T44; + T4f = FMA(KP250000000, T45, T4a); + T4b = T1r - TW; + T4c = T1N - T28; + T4d = FNMS(KP587785252, T4c, KP951056516 * T4b); + T4h = FMA(KP587785252, T4b, KP951056516 * T4c); + cr[WS(rs, 10)] = T45 - T4a; + T4i = T4f - T4e; + cr[WS(rs, 18)] = T4h - T4i; + ci[WS(rs, 17)] = T4h + T4i; + T4g = T4e + T4f; + cr[WS(rs, 14)] = T4d - T4g; + ci[WS(rs, 13)] = T4d + T4g; + } + { + E T4A, T4E, T4F, T4x, T4H, T4v, T4w, T4I, T4G; + T4A = KP559016994 * (T4y - T4z); + T4E = T4y + T4z; + T4F = FNMS(KP250000000, T4E, T4D); + T4v = T3n - T3o; + T4w = T3k - T3l; + T4x = FNMS(KP587785252, T4w, KP951056516 * T4v); + T4H = FMA(KP951056516, T4w, KP587785252 * T4v); + ci[WS(rs, 14)] = T4E + T4D; + T4I = T4A + T4F; + ci[WS(rs, 10)] = T4H + T4I; + ci[WS(rs, 18)] = T4I - T4H; + T4G = T4A - T4F; + cr[WS(rs, 13)] = T4x + T4G; + cr[WS(rs, 17)] = T4G - T4x; + } + { + E T4r, T4p, T4q, T4l, T4t, T4j, T4k, T4u, T4s; + T4r = KP559016994 * (T4n - T4o); + T4p = T4n + T4o; + T4q = FNMS(KP250000000, T4p, T4m); + T4j = T2c - T2d; + T4k = T2f - T2g; + T4l = FNMS(KP951056516, T4k, KP587785252 * T4j); + T4t = FMA(KP951056516, T4j, KP587785252 * T4k); + ci[WS(rs, 19)] = T4p + T4m; + T4u = T4r + T4q; + cr[WS(rs, 16)] = T4t - T4u; + ci[WS(rs, 15)] = T4t + T4u; + T4s = T4q - T4r; + cr[WS(rs, 12)] = T4l - T4s; + ci[WS(rs, 11)] = T4l + T4s; + } + { + E T4Q, T4L, T4R, T4P, T4T, T4N, T4O, T4U, T4S; + T4Q = KP559016994 * (T4J + T4K); + T4L = T4J - T4K; + T4R = FMA(KP250000000, T4L, T4M); + T4N = T2n - T2s; + T4O = T2y - T2D; + T4P = FMA(KP951056516, T4N, KP587785252 * T4O); + T4T = FNMS(KP587785252, T4N, KP951056516 * T4O); + cr[WS(rs, 15)] = T4L - T4M; + T4U = T4Q + T4R; + ci[WS(rs, 12)] = T4T + T4U; + ci[WS(rs, 16)] = T4U - T4T; + T4S = T4Q - T4R; + cr[WS(rs, 11)] = T4P + T4S; + cr[WS(rs, 19)] = T4S - T4P; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 19}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 20, "hf2_20", twinstr, &GENUS, {204, 92, 72, 0} }; + +void X(codelet_hf2_20) (planner *p) { + X(khc2hc_register) (p, hf2_20, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf2_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf2_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1625 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:14 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 25 -dit -name hf2_25 -include hf.h */ + +/* + * This function contains 440 FP additions, 434 FP multiplications, + * (or, 84 additions, 78 multiplications, 356 fused multiply/add), + * 215 stack variables, 47 constants, and 100 memory accesses + */ +#include "hf.h" + +static void hf2_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP949179823, +0.949179823508441261575555465843363271711583843); + DK(KP860541664, +0.860541664367944677098261680920518816412804187); + DK(KP621716863, +0.621716863012209892444754556304102309693593202); + DK(KP614372930, +0.614372930789563808870829930444362096004872855); + DK(KP557913902, +0.557913902031834264187699648465567037992437152); + DK(KP249506682, +0.249506682107067890488084201715862638334226305); + DK(KP560319534, +0.560319534973832390111614715371676131169633784); + DK(KP681693190, +0.681693190061530575150324149145440022633095390); + DK(KP906616052, +0.906616052148196230441134447086066874408359177); + DK(KP968479752, +0.968479752739016373193524836781420152702090879); + DK(KP845997307, +0.845997307939530944175097360758058292389769300); + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP994076283, +0.994076283785401014123185814696322018529298887); + DK(KP734762448, +0.734762448793050413546343770063151342619912334); + DK(KP772036680, +0.772036680810363904029489473607579825330539880); + DK(KP062914667, +0.062914667253649757225485955897349402364686947); + DK(KP921177326, +0.921177326965143320250447435415066029359282231); + DK(KP833417178, +0.833417178328688677408962550243238843138996060); + DK(KP541454447, +0.541454447536312777046285590082819509052033189); + DK(KP803003575, +0.803003575438660414833440593570376004635464850); + DK(KP943557151, +0.943557151597354104399655195398983005179443399); + DK(KP242145790, +0.242145790282157779872542093866183953459003101); + DK(KP554608978, +0.554608978404018097464974850792216217022558774); + DK(KP559154169, +0.559154169276087864842202529084232643714075927); + DK(KP683113946, +0.683113946453479238701949862233725244439656928); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP248028675, +0.248028675328619457762448260696444630363259177); + DK(KP904730450, +0.904730450839922351881287709692877908104763647); + DK(KP831864738, +0.831864738706457140726048799369896829771167132); + DK(KP525970792, +0.525970792408939708442463226536226366643874659); + DK(KP726211448, +0.726211448929902658173535992263577167607493062); + DK(KP549754652, +0.549754652192770074288023275540779861653779767); + DK(KP871714437, +0.871714437527667770979999223229522602943903653); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP939062505, +0.939062505817492352556001843133229685779824606); + DK(KP851038619, +0.851038619207379630836264138867114231259902550); + DK(KP256756360, +0.256756360367726783319498520922669048172391148); + DK(KP912575812, +0.912575812670962425556968549836277086778922727); + DK(KP634619297, +0.634619297544148100711287640319130485732531031); + DK(KP912018591, +0.912018591466481957908415381764119056233607330); + DK(KP470564281, +0.470564281212251493087595091036643380879947982); + DK(KP827271945, +0.827271945972475634034355757144307982555673741); + DK(KP126329378, +0.126329378446108174786050455341811215027378105); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) { + E T7M, T6S, T6Q, T7S, T7Q, T7L, T6R, T6J, T7N, T7R; + { + E T2, T8, T3, T6, Tk, Tv, TS, T4, Ta, TD, T2L, T10, Tm, T5, Tc; + T2 = W[0]; + T8 = W[4]; + T3 = W[2]; + T6 = W[3]; + Tk = W[6]; + Tv = T2 * T8; + TS = T3 * T8; + T4 = T2 * T3; + Ta = T2 * T6; + TD = T8 * Tk; + T2L = T2 * Tk; + T10 = T3 * Tk; + Tm = W[7]; + T5 = W[1]; + Tc = W[5]; + { + E T7u, T7U, T4s, T6a, T4g, TN, T4f, T7q, T8j, T7p, T4G, T6k, T3a, T4z, T6n; + E T6m, T4w, T4a, T4D, T6j, T6C, T54, T6z, T5b, T1v, T3t, T6y, T58, T6B, T51; + E T6v, T5j, T6s, T5q, T21, T3H, T6r, T5n, T6u, T5g, T26, T3K, T4N, T2A, T3U; + E T4U, T2c, T3M, T2k, T3O; + { + E T11, T1b, Tb, T19, T7, T2m, TT, T15, T2Q, TX, T2p, T1g, T2a, T2e, T2i; + E T27, T1c, T1O, T1K, T1q, T1m, T2x, T2t, T1W, T1S, T2G, T3Y, T2N, T4F, T38; + E T48, T4y, T2K, T40, T2S, T41; + { + E T2M, T1j, T1l, T2X, T2U, T35, T31, T7l, T7n, T7m, T2O, T2R; + { + E T1, Tj, T4j, TK, T4q, TC, T4o, Tt, T4l; + { + E TE, Tw, TI, TA, Th, Tr, Tn, Td, Te, Ti, T14, T2P, TH, Tx, TB; + T1 = cr[0]; + T11 = FMA(T6, Tm, T10); + T14 = T3 * Tm; + T2P = T2 * Tm; + TH = T8 * Tm; + T2M = FMA(T5, Tm, T2L); + T1b = FNMS(T5, T3, Ta); + Tb = FMA(T5, T3, Ta); + T19 = FMA(T5, T6, T4); + T7 = FNMS(T5, T6, T4); + T2m = FNMS(T6, Tc, TS); + TT = FMA(T6, Tc, TS); + TE = FMA(Tc, Tm, TD); + T1j = FMA(T5, Tc, Tv); + Tw = FNMS(T5, Tc, Tv); + { + E TW, Tz, T1f, T2d; + TW = T3 * Tc; + Tz = T2 * Tc; + T15 = FNMS(T6, Tk, T14); + T2Q = FNMS(T5, Tk, T2P); + TI = FNMS(Tc, Tk, TH); + T1f = T19 * Tc; + T2d = T19 * Tk; + { + E T2h, T1a, Tg, Tq; + T2h = T19 * Tm; + T1a = T19 * T8; + Tg = T7 * Tc; + Tq = T7 * Tm; + { + E Tl, T9, T1p, T1k; + Tl = T7 * Tk; + T9 = T7 * T8; + T1p = T1j * Tm; + T1k = T1j * Tk; + { + E T34, T30, T1N, T1J; + T34 = TT * Tm; + T30 = TT * Tk; + T1N = Tw * Tm; + T1J = Tw * Tk; + TX = FNMS(T6, T8, TW); + T2p = FMA(T6, T8, TW); + TA = FMA(T5, T8, Tz); + T1l = FNMS(T5, T8, Tz); + T1g = FMA(T1b, T8, T1f); + T2a = FNMS(T1b, T8, T1f); + T2e = FMA(T1b, Tm, T2d); + T2i = FNMS(T1b, Tk, T2h); + T27 = FMA(T1b, Tc, T1a); + T1c = FNMS(T1b, Tc, T1a); + T2X = FMA(Tb, T8, Tg); + Th = FNMS(Tb, T8, Tg); + Tr = FNMS(Tb, Tk, Tq); + Tn = FMA(Tb, Tm, Tl); + Td = FMA(Tb, Tc, T9); + T2U = FNMS(Tb, Tc, T9); + T35 = FNMS(TX, Tk, T34); + T31 = FMA(TX, Tm, T30); + T1O = FNMS(TA, Tk, T1N); + T1K = FMA(TA, Tm, T1J); + T1q = FNMS(T1l, Tk, T1p); + T1m = FMA(T1l, Tm, T1k); + { + E T2w, T2s, T1V, T1R; + T2w = T27 * Tm; + T2s = T27 * Tk; + T1V = Td * Tm; + T1R = Td * Tk; + T2x = FNMS(T2a, Tk, T2w); + T2t = FMA(T2a, Tm, T2s); + T1W = FNMS(Th, Tk, T1V); + T1S = FMA(Th, Tm, T1R); + T7l = ci[0]; + Te = cr[WS(rs, 5)]; + Ti = ci[WS(rs, 5)]; + } + } + } + } + } + { + E TF, TJ, Tf, T4i, TG, T4p; + TF = cr[WS(rs, 15)]; + TJ = ci[WS(rs, 15)]; + Tf = Td * Te; + T4i = Td * Ti; + TG = TE * TF; + T4p = TE * TJ; + Tj = FMA(Th, Ti, Tf); + T4j = FNMS(Th, Te, T4i); + TK = FMA(TI, TJ, TG); + T4q = FNMS(TI, TF, T4p); + } + Tx = cr[WS(rs, 10)]; + TB = ci[WS(rs, 10)]; + { + E To, Ts, Ty, T4n, Tp, T4k; + To = cr[WS(rs, 20)]; + Ts = ci[WS(rs, 20)]; + Ty = Tw * Tx; + T4n = Tw * TB; + Tp = Tn * To; + T4k = Tn * Ts; + TC = FMA(TA, TB, Ty); + T4o = FNMS(TA, Tx, T4n); + Tt = FMA(Tr, Ts, Tp); + T4l = FNMS(Tr, To, T4k); + } + } + { + E TL, T7s, T4r, Tu, T7t, T4m, TM; + TL = TC + TK; + T7s = TC - TK; + T4r = T4o - T4q; + T7n = T4o + T4q; + Tu = Tj + Tt; + T7t = Tj - Tt; + T4m = T4j - T4l; + T7m = T4j + T4l; + T7u = FNMS(KP618033988, T7t, T7s); + T7U = FMA(KP618033988, T7s, T7t); + T4s = FMA(KP618033988, T4r, T4m); + T6a = FNMS(KP618033988, T4m, T4r); + T4g = Tu - TL; + TM = Tu + TL; + TN = T1 + TM; + T4f = FNMS(KP250000000, TM, T1); + } + } + { + E T2D, T2F, T7o, T2E, T3X; + T2D = cr[WS(rs, 3)]; + T2F = ci[WS(rs, 3)]; + T7q = T7m - T7n; + T7o = T7m + T7n; + T2E = T3 * T2D; + T3X = T3 * T2F; + { + E T2V, T2W, T2Y, T32, T36; + T2V = cr[WS(rs, 13)]; + T8j = T7o + T7l; + T7p = FNMS(KP250000000, T7o, T7l); + T2G = FMA(T6, T2F, T2E); + T3Y = FNMS(T6, T2D, T3X); + T2W = T2U * T2V; + T2Y = ci[WS(rs, 13)]; + T32 = cr[WS(rs, 18)]; + T36 = ci[WS(rs, 18)]; + { + E T2H, T2I, T2J, T3Z; + { + E T2Z, T45, T37, T47, T44, T33, T46; + T2H = cr[WS(rs, 8)]; + T2Z = FMA(T2X, T2Y, T2W); + T44 = T2U * T2Y; + T33 = T31 * T32; + T46 = T31 * T36; + T2I = T1j * T2H; + T45 = FNMS(T2X, T2V, T44); + T37 = FMA(T35, T36, T33); + T47 = FNMS(T35, T32, T46); + T2J = ci[WS(rs, 8)]; + T2N = cr[WS(rs, 23)]; + T4F = T2Z - T37; + T38 = T2Z + T37; + T48 = T45 + T47; + T4y = T47 - T45; + T3Z = T1j * T2J; + T2O = T2M * T2N; + T2R = ci[WS(rs, 23)]; + } + T2K = FMA(T1l, T2J, T2I); + T40 = FNMS(T1l, T2H, T3Z); + } + } + } + T2S = FMA(T2Q, T2R, T2O); + T41 = T2M * T2R; + } + { + E TR, T3h, T1t, T53, T3r, T5a, TZ, T3j, T17, T3l; + { + E T12, T16, T13, T3k; + { + E TO, TP, T4C, T4B, TQ; + { + E T2T, T4E, T42, T4v, T39; + TO = cr[WS(rs, 1)]; + T2T = T2K + T2S; + T4E = T2K - T2S; + T42 = FNMS(T2Q, T2N, T41); + TP = T2 * TO; + T4G = FMA(KP618033988, T4F, T4E); + T6k = FNMS(KP618033988, T4E, T4F); + T4v = T38 - T2T; + T39 = T2T + T38; + { + E T43, T4x, T4u, T49; + T43 = T40 + T42; + T4x = T42 - T40; + T4u = FNMS(KP250000000, T39, T2G); + T3a = T2G + T39; + T4z = FMA(KP618033988, T4y, T4x); + T6n = FNMS(KP618033988, T4x, T4y); + T4C = T48 - T43; + T49 = T43 + T48; + T6m = FMA(KP559016994, T4v, T4u); + T4w = FNMS(KP559016994, T4v, T4u); + T4B = FNMS(KP250000000, T49, T3Y); + T4a = T3Y + T49; + TQ = ci[WS(rs, 1)]; + } + } + { + E T1n, T1r, T1i, T1o, T3o, T3p; + { + E T1d, T1h, T1e, T3n, T3g; + T1d = cr[WS(rs, 11)]; + T1h = ci[WS(rs, 11)]; + T4D = FNMS(KP559016994, T4C, T4B); + T6j = FMA(KP559016994, T4C, T4B); + TR = FMA(T5, TQ, TP); + T3g = T2 * TQ; + T1e = T1c * T1d; + T3n = T1c * T1h; + T1n = cr[WS(rs, 16)]; + T3h = FNMS(T5, TO, T3g); + T1r = ci[WS(rs, 16)]; + T1i = FMA(T1g, T1h, T1e); + T1o = T1m * T1n; + T3o = FNMS(T1g, T1d, T3n); + T3p = T1m * T1r; + } + { + E TU, TY, TV, T3i, T3q, T1s; + TU = cr[WS(rs, 6)]; + T1s = FMA(T1q, T1r, T1o); + TY = ci[WS(rs, 6)]; + T3q = FNMS(T1q, T1n, T3p); + TV = TT * TU; + T1t = T1i + T1s; + T53 = T1s - T1i; + T3i = TT * TY; + T3r = T3o + T3q; + T5a = T3q - T3o; + T12 = cr[WS(rs, 21)]; + T16 = ci[WS(rs, 21)]; + TZ = FMA(TX, TY, TV); + T3j = FNMS(TX, TU, T3i); + T13 = T11 * T12; + T3k = T11 * T16; + } + } + } + T17 = FMA(T15, T16, T13); + T3l = FNMS(T15, T12, T3k); + } + { + E T1z, T3v, T5i, T1Z, T3F, T5p, T1D, T3x, T1H, T3z; + { + E T1E, T1G, T1F, T3y; + { + E T1w, T1y, T1x, T57, T50, T56, T4Z, T3u, T18, T52; + T1w = cr[WS(rs, 4)]; + T1y = ci[WS(rs, 4)]; + T18 = TZ + T17; + T52 = T17 - TZ; + { + E T3m, T59, T1u, T3s; + T3m = T3j + T3l; + T59 = T3j - T3l; + T1x = T7 * T1w; + T6C = FNMS(KP618033988, T52, T53); + T54 = FMA(KP618033988, T53, T52); + T1u = T18 + T1t; + T57 = T18 - T1t; + T6z = FMA(KP618033988, T59, T5a); + T5b = FNMS(KP618033988, T5a, T59); + T3s = T3m + T3r; + T50 = T3m - T3r; + T1v = TR + T1u; + T56 = FNMS(KP250000000, T1u, TR); + T3t = T3h + T3s; + T4Z = FNMS(KP250000000, T3s, T3h); + T3u = T7 * T1y; + } + T6y = FNMS(KP559016994, T57, T56); + T58 = FMA(KP559016994, T57, T56); + T6B = FNMS(KP559016994, T50, T4Z); + T51 = FMA(KP559016994, T50, T4Z); + T1z = FMA(Tb, T1y, T1x); + T3v = FNMS(Tb, T1w, T3u); + } + { + E T1Q, T3C, T1Y, T3E; + { + E T1L, T1P, T1T, T1X, T1M, T3B, T1U, T3D; + T1L = cr[WS(rs, 14)]; + T1P = ci[WS(rs, 14)]; + T1T = cr[WS(rs, 19)]; + T1X = ci[WS(rs, 19)]; + T1M = T1K * T1L; + T3B = T1K * T1P; + T1U = T1S * T1T; + T3D = T1S * T1X; + T1Q = FMA(T1O, T1P, T1M); + T3C = FNMS(T1O, T1L, T3B); + T1Y = FMA(T1W, T1X, T1U); + T3E = FNMS(T1W, T1T, T3D); + } + { + E T1A, T1C, T1B, T3w; + T1A = cr[WS(rs, 9)]; + T1C = ci[WS(rs, 9)]; + T5i = T1Y - T1Q; + T1Z = T1Q + T1Y; + T3F = T3C + T3E; + T5p = T3E - T3C; + T1B = T8 * T1A; + T3w = T8 * T1C; + T1E = cr[WS(rs, 24)]; + T1G = ci[WS(rs, 24)]; + T1D = FMA(Tc, T1C, T1B); + T3x = FNMS(Tc, T1A, T3w); + T1F = Tk * T1E; + T3y = Tk * T1G; + } + } + T1H = FMA(Tm, T1G, T1F); + T3z = FNMS(Tm, T1E, T3y); + } + { + E T2f, T2j, T2g, T3N; + { + E T23, T25, T24, T5m, T5f, T5l, T5e, T3J, T1I, T5h; + T23 = cr[WS(rs, 2)]; + T25 = ci[WS(rs, 2)]; + T1I = T1D + T1H; + T5h = T1H - T1D; + { + E T3A, T5o, T20, T3G; + T3A = T3x + T3z; + T5o = T3z - T3x; + T24 = T19 * T23; + T6v = FNMS(KP618033988, T5h, T5i); + T5j = FMA(KP618033988, T5i, T5h); + T20 = T1I + T1Z; + T5m = T1I - T1Z; + T6s = FNMS(KP618033988, T5o, T5p); + T5q = FMA(KP618033988, T5p, T5o); + T3G = T3A + T3F; + T5f = T3F - T3A; + T21 = T1z + T20; + T5l = FNMS(KP250000000, T20, T1z); + T3H = T3v + T3G; + T5e = FNMS(KP250000000, T3G, T3v); + T3J = T19 * T25; + } + T6r = FNMS(KP559016994, T5m, T5l); + T5n = FMA(KP559016994, T5m, T5l); + T6u = FMA(KP559016994, T5f, T5e); + T5g = FNMS(KP559016994, T5f, T5e); + T26 = FMA(T1b, T25, T24); + T3K = FNMS(T1b, T23, T3J); + } + { + E T2r, T3R, T2z, T3T; + { + E T2n, T2q, T2u, T2y, T2o, T3Q, T2v, T3S; + T2n = cr[WS(rs, 12)]; + T2q = ci[WS(rs, 12)]; + T2u = cr[WS(rs, 17)]; + T2y = ci[WS(rs, 17)]; + T2o = T2m * T2n; + T3Q = T2m * T2q; + T2v = T2t * T2u; + T3S = T2t * T2y; + T2r = FMA(T2p, T2q, T2o); + T3R = FNMS(T2p, T2n, T3Q); + T2z = FMA(T2x, T2y, T2v); + T3T = FNMS(T2x, T2u, T3S); + } + { + E T28, T2b, T29, T3L; + T28 = cr[WS(rs, 7)]; + T2b = ci[WS(rs, 7)]; + T4N = T2z - T2r; + T2A = T2r + T2z; + T3U = T3R + T3T; + T4U = T3R - T3T; + T29 = T27 * T28; + T3L = T27 * T2b; + T2f = cr[WS(rs, 22)]; + T2j = ci[WS(rs, 22)]; + T2c = FMA(T2a, T2b, T29); + T3M = FNMS(T2a, T28, T3L); + T2g = T2e * T2f; + T3N = T2e * T2j; + } + } + T2k = FMA(T2i, T2j, T2g); + T3O = FNMS(T2i, T2f, T3N); + } + } + } + } + { + E T8k, T6d, T6g, T8r, T6f, T8l, T6c, T8q, T69, T7r, T5Y, T8g, T8i, T66, T68; + E T5X, T8d, T8h; + { + E T4O, T4V, T22, T4S, T4L, T3b, T4e, T4c, T3I; + T8k = T3t + T3H; + T3I = T3t - T3H; + { + E T2l, T4M, T3P, T4T; + T2l = T2c + T2k; + T4M = T2k - T2c; + T3P = T3M + T3O; + T4T = T3O - T3M; + T4O = FMA(KP618033988, T4N, T4M); + T6d = FNMS(KP618033988, T4M, T4N); + { + E T4R, T2B, T4K, T3V; + T4R = T2A - T2l; + T2B = T2l + T2A; + T4V = FNMS(KP618033988, T4U, T4T); + T6g = FMA(KP618033988, T4T, T4U); + T4K = T3U - T3P; + T3V = T3P + T3U; + { + E T4Q, T2C, T4J, T3W, T4b; + T4Q = FNMS(KP250000000, T2B, T26); + T2C = T26 + T2B; + T4J = FNMS(KP250000000, T3V, T3K); + T3W = T3K + T3V; + T8r = T21 - T1v; + T22 = T1v + T21; + T4S = FNMS(KP559016994, T4R, T4Q); + T6f = FMA(KP559016994, T4R, T4Q); + T4b = T3W - T4a; + T8l = T3W + T4a; + T6c = FMA(KP559016994, T4K, T4J); + T4L = FNMS(KP559016994, T4K, T4J); + T8q = T2C - T3a; + T3b = T2C + T3a; + T4e = FNMS(KP618033988, T3I, T4b); + T4c = FMA(KP618033988, T4b, T3I); + } + } + } + { + E T5H, T4t, T7V, T87, T5Q, T5P, T5D, T8e, T5A, T8f, T5K, T60, T8c, T8a, T5u; + E T5w, T5U, T64, T5N, T61; + { + E T3e, T3d, T4h, T3c, T7T; + T4h = FMA(KP559016994, T4g, T4f); + T69 = FNMS(KP559016994, T4g, T4f); + T3c = T22 + T3b; + T3e = T22 - T3b; + T7r = FNMS(KP559016994, T7q, T7p); + T7T = FMA(KP559016994, T7q, T7p); + T5H = FMA(KP951056516, T4s, T4h); + T4t = FNMS(KP951056516, T4s, T4h); + cr[0] = TN + T3c; + T3d = FNMS(KP250000000, T3c, TN); + T7V = FNMS(KP951056516, T7U, T7T); + T87 = FMA(KP951056516, T7U, T7T); + { + E T5S, T5T, T5L, T4I, T5B, T5M, T55, T5J, T5s, T5z, T4X, T5C, T5I, T5c; + { + E T5k, T5r, T4P, T4W; + { + E T4A, T4d, T3f, T4H; + T4A = FMA(KP951056516, T4z, T4w); + T5S = FNMS(KP951056516, T4z, T4w); + T4d = FNMS(KP559016994, T3e, T3d); + T3f = FMA(KP559016994, T3e, T3d); + T5T = FNMS(KP951056516, T4G, T4D); + T4H = FMA(KP951056516, T4G, T4D); + T5k = FNMS(KP951056516, T5j, T5g); + T5L = FMA(KP951056516, T5j, T5g); + cr[WS(rs, 5)] = FMA(KP951056516, T4c, T3f); + ci[WS(rs, 4)] = FNMS(KP951056516, T4c, T3f); + ci[WS(rs, 9)] = FMA(KP951056516, T4e, T4d); + cr[WS(rs, 10)] = FNMS(KP951056516, T4e, T4d); + T4I = FNMS(KP126329378, T4H, T4A); + T5B = FMA(KP126329378, T4A, T4H); + T5M = FNMS(KP951056516, T5q, T5n); + T5r = FMA(KP951056516, T5q, T5n); + } + T4P = FNMS(KP951056516, T4O, T4L); + T5Q = FMA(KP951056516, T4O, T4L); + T5P = FNMS(KP951056516, T4V, T4S); + T4W = FMA(KP951056516, T4V, T4S); + T55 = FNMS(KP951056516, T54, T51); + T5J = FMA(KP951056516, T54, T51); + T5s = FMA(KP827271945, T5r, T5k); + T5z = FNMS(KP827271945, T5k, T5r); + T4X = FNMS(KP470564281, T4W, T4P); + T5C = FMA(KP470564281, T4P, T4W); + T5I = FMA(KP951056516, T5b, T58); + T5c = FNMS(KP951056516, T5b, T58); + } + { + E T88, T4Y, T5d, T5y, T89, T5t; + T5D = FNMS(KP912018591, T5C, T5B); + T88 = FMA(KP912018591, T5C, T5B); + T8e = FMA(KP912018591, T4X, T4I); + T4Y = FNMS(KP912018591, T4X, T4I); + T5d = FMA(KP634619297, T5c, T55); + T5y = FNMS(KP634619297, T55, T5c); + T5A = FMA(KP912575812, T5z, T5y); + T89 = FNMS(KP912575812, T5z, T5y); + T8f = FMA(KP912575812, T5s, T5d); + T5t = FNMS(KP912575812, T5s, T5d); + T5K = FMA(KP256756360, T5J, T5I); + T60 = FNMS(KP256756360, T5I, T5J); + T8c = FNMS(KP851038619, T89, T88); + T8a = FMA(KP851038619, T89, T88); + T5u = FNMS(KP851038619, T5t, T4Y); + T5w = FMA(KP851038619, T5t, T4Y); + } + T5U = FMA(KP939062505, T5T, T5S); + T64 = FNMS(KP939062505, T5S, T5T); + T5N = FMA(KP634619297, T5M, T5L); + T61 = FNMS(KP634619297, T5L, T5M); + } + } + { + E T62, T7W, T83, T5O, T5R, T63; + cr[WS(rs, 4)] = FNMS(KP992114701, T5u, T4t); + T62 = FMA(KP871714437, T61, T60); + T7W = FNMS(KP871714437, T61, T60); + T83 = FNMS(KP871714437, T5N, T5K); + T5O = FMA(KP871714437, T5N, T5K); + T5R = FMA(KP549754652, T5Q, T5P); + T63 = FNMS(KP549754652, T5P, T5Q); + ci[WS(rs, 20)] = FNMS(KP992114701, T8a, T87); + { + E T65, T5W, T84, T86, T81, T85, T8b; + { + E T5E, T5G, T82, T80, T7Y, T5v, T7X, T5V, T5F, T5x, T7Z; + T5E = FNMS(KP726211448, T5D, T5A); + T5G = FMA(KP525970792, T5A, T5D); + T65 = FNMS(KP831864738, T64, T63); + T7X = FMA(KP831864738, T64, T63); + T82 = FNMS(KP831864738, T5U, T5R); + T5V = FMA(KP831864738, T5U, T5R); + T80 = FNMS(KP904730450, T7X, T7W); + T7Y = FMA(KP904730450, T7X, T7W); + T5Y = FNMS(KP904730450, T5V, T5O); + T5W = FMA(KP904730450, T5V, T5O); + T5v = FMA(KP248028675, T5u, T4t); + ci[WS(rs, 23)] = FMA(KP968583161, T7Y, T7V); + cr[WS(rs, 1)] = FMA(KP968583161, T5W, T5H); + T84 = FNMS(KP683113946, T83, T82); + T86 = FMA(KP559154169, T82, T83); + T5F = FNMS(KP554608978, T5w, T5v); + T5x = FMA(KP554608978, T5w, T5v); + T7Z = FNMS(KP242145790, T7Y, T7V); + ci[WS(rs, 10)] = FNMS(KP943557151, T5G, T5F); + ci[WS(rs, 5)] = FMA(KP943557151, T5G, T5F); + ci[0] = FMA(KP803003575, T5E, T5x); + cr[WS(rs, 9)] = FNMS(KP803003575, T5E, T5x); + T81 = FNMS(KP541454447, T80, T7Z); + T85 = FMA(KP541454447, T80, T7Z); + } + T8g = FNMS(KP525970792, T8f, T8e); + T8i = FMA(KP726211448, T8e, T8f); + ci[WS(rs, 13)] = FMA(KP833417178, T84, T81); + cr[WS(rs, 16)] = FMS(KP833417178, T84, T81); + cr[WS(rs, 21)] = -(FMA(KP921177326, T86, T85)); + ci[WS(rs, 18)] = FNMS(KP921177326, T86, T85); + T8b = FMA(KP248028675, T8a, T87); + T66 = FMA(KP559154169, T65, T62); + T68 = FNMS(KP683113946, T62, T65); + T5X = FNMS(KP242145790, T5W, T5H); + T8d = FNMS(KP554608978, T8c, T8b); + T8h = FMA(KP554608978, T8c, T8b); + } + } + } + } + { + E T8s, T8u, T5Z, T67; + cr[WS(rs, 24)] = -(FMA(KP803003575, T8i, T8h)); + ci[WS(rs, 15)] = FNMS(KP803003575, T8i, T8h); + cr[WS(rs, 19)] = FMS(KP943557151, T8g, T8d); + cr[WS(rs, 14)] = -(FMA(KP943557151, T8g, T8d)); + T5Z = FMA(KP541454447, T5Y, T5X); + T67 = FNMS(KP541454447, T5Y, T5X); + cr[WS(rs, 11)] = FNMS(KP833417178, T68, T67); + ci[WS(rs, 8)] = FMA(KP833417178, T68, T67); + cr[WS(rs, 6)] = FMA(KP921177326, T66, T5Z); + ci[WS(rs, 3)] = FNMS(KP921177326, T66, T5Z); + T8s = FMA(KP618033988, T8r, T8q); + T8u = FNMS(KP618033988, T8q, T8r); + { + E T6X, T6T, T6b, T7H, T7v, T6Y, T72, T71, T6P, T7O, T6M, T7P, T7K, T6G, T6I; + E T6W, T7f, T7d, T76; + { + E T74, T75, T6i, T6N, T6L, T6E, T6U, T6l, T6o, T6V, T6t, T6w; + { + E T6e, T8o, T8n, T6h, T8m; + T6X = FNMS(KP951056516, T6d, T6c); + T6e = FMA(KP951056516, T6d, T6c); + T8o = T8k - T8l; + T8m = T8k + T8l; + T6T = FNMS(KP951056516, T6a, T69); + T6b = FMA(KP951056516, T6a, T69); + T7H = FNMS(KP951056516, T7u, T7r); + T7v = FMA(KP951056516, T7u, T7r); + ci[WS(rs, 24)] = T8m + T8j; + T8n = FNMS(KP250000000, T8m, T8j); + T6h = FMA(KP951056516, T6g, T6f); + T6Y = FNMS(KP951056516, T6g, T6f); + { + E T6A, T6D, T8t, T8p; + T74 = FMA(KP951056516, T6z, T6y); + T6A = FNMS(KP951056516, T6z, T6y); + T6D = FMA(KP951056516, T6C, T6B); + T75 = FNMS(KP951056516, T6C, T6B); + T8t = FMA(KP559016994, T8o, T8n); + T8p = FNMS(KP559016994, T8o, T8n); + T6i = FMA(KP062914667, T6h, T6e); + T6N = FNMS(KP062914667, T6e, T6h); + ci[WS(rs, 14)] = FMA(KP951056516, T8s, T8p); + cr[WS(rs, 15)] = FMS(KP951056516, T8s, T8p); + ci[WS(rs, 19)] = FMA(KP951056516, T8u, T8t); + cr[WS(rs, 20)] = FMS(KP951056516, T8u, T8t); + T6L = FNMS(KP939062505, T6A, T6D); + T6E = FMA(KP939062505, T6D, T6A); + } + } + T6U = FMA(KP951056516, T6k, T6j); + T6l = FNMS(KP951056516, T6k, T6j); + T6o = FNMS(KP951056516, T6n, T6m); + T6V = FMA(KP951056516, T6n, T6m); + T72 = FMA(KP951056516, T6s, T6r); + T6t = FNMS(KP951056516, T6s, T6r); + T6w = FMA(KP951056516, T6v, T6u); + T71 = FNMS(KP951056516, T6v, T6u); + { + E T6q, T6F, T6O, T6p; + T6O = FMA(KP827271945, T6l, T6o); + T6p = FNMS(KP827271945, T6o, T6l); + { + E T6K, T6x, T7I, T7J; + T6K = FMA(KP126329378, T6t, T6w); + T6x = FNMS(KP126329378, T6w, T6t); + T7I = FMA(KP772036680, T6O, T6N); + T6P = FNMS(KP772036680, T6O, T6N); + T6q = FMA(KP772036680, T6p, T6i); + T7O = FNMS(KP772036680, T6p, T6i); + T7J = FNMS(KP734762448, T6L, T6K); + T6M = FMA(KP734762448, T6L, T6K); + T6F = FNMS(KP734762448, T6E, T6x); + T7P = FMA(KP734762448, T6E, T6x); + T7K = FMA(KP994076283, T7J, T7I); + T7M = FNMS(KP994076283, T7J, T7I); + } + T6G = FNMS(KP994076283, T6F, T6q); + T6I = FMA(KP994076283, T6F, T6q); + } + T6W = FMA(KP062914667, T6V, T6U); + T7f = FNMS(KP062914667, T6U, T6V); + T7d = FNMS(KP549754652, T74, T75); + T76 = FMA(KP549754652, T75, T74); + } + { + E T7h, T7C, T7e, T7D, T7y, T7A, T78, T7a; + { + E T70, T77, T7g, T6Z; + cr[WS(rs, 3)] = FMA(KP998026728, T6G, T6b); + T7g = FNMS(KP634619297, T6X, T6Y); + T6Z = FMA(KP634619297, T6Y, T6X); + { + E T7c, T73, T7w, T7x; + T7c = FMA(KP470564281, T71, T72); + T73 = FNMS(KP470564281, T72, T71); + T7w = FMA(KP845997307, T7g, T7f); + T7h = FNMS(KP845997307, T7g, T7f); + T70 = FMA(KP845997307, T6Z, T6W); + T7C = FNMS(KP845997307, T6Z, T6W); + T7x = FNMS(KP968479752, T7d, T7c); + T7e = FMA(KP968479752, T7d, T7c); + T77 = FMA(KP968479752, T76, T73); + T7D = FNMS(KP968479752, T76, T73); + T7y = FMA(KP906616052, T7x, T7w); + T7A = FNMS(KP906616052, T7x, T7w); + } + ci[WS(rs, 21)] = FNMS(KP998026728, T7K, T7H); + T78 = FMA(KP906616052, T77, T70); + T7a = FNMS(KP906616052, T77, T70); + } + { + E T7G, T7E, T7k, T7i, T79, T7F, T7B, T7z, T6H, T7j, T7b; + T7G = FMA(KP681693190, T7C, T7D); + T7E = FNMS(KP560319534, T7D, T7C); + ci[WS(rs, 22)] = FNMS(KP998026728, T7y, T7v); + cr[WS(rs, 2)] = FMA(KP998026728, T78, T6T); + T7z = FMA(KP249506682, T7y, T7v); + T7k = FNMS(KP560319534, T7e, T7h); + T7i = FMA(KP681693190, T7h, T7e); + T79 = FNMS(KP249506682, T78, T6T); + T7F = FMA(KP557913902, T7A, T7z); + T7B = FNMS(KP557913902, T7A, T7z); + T6S = FMA(KP614372930, T6M, T6P); + T6Q = FNMS(KP621716863, T6P, T6M); + cr[WS(rs, 22)] = FMS(KP860541664, T7G, T7F); + ci[WS(rs, 17)] = FMA(KP860541664, T7G, T7F); + ci[WS(rs, 12)] = FNMS(KP949179823, T7E, T7B); + cr[WS(rs, 17)] = -(FMA(KP949179823, T7E, T7B)); + T7j = FMA(KP557913902, T7a, T79); + T7b = FNMS(KP557913902, T7a, T79); + T6H = FNMS(KP249506682, T6G, T6b); + ci[WS(rs, 7)] = FMA(KP949179823, T7k, T7j); + cr[WS(rs, 12)] = FNMS(KP949179823, T7k, T7j); + cr[WS(rs, 7)] = FMA(KP860541664, T7i, T7b); + ci[WS(rs, 2)] = FNMS(KP860541664, T7i, T7b); + T7S = FMA(KP621716863, T7O, T7P); + T7Q = FNMS(KP614372930, T7P, T7O); + T7L = FMA(KP249506682, T7K, T7H); + T6R = FMA(KP557913902, T6I, T6H); + T6J = FNMS(KP557913902, T6I, T6H); + } + } + } + } + } + } + } + ci[WS(rs, 6)] = FNMS(KP949179823, T6S, T6R); + ci[WS(rs, 11)] = FMA(KP949179823, T6S, T6R); + cr[WS(rs, 8)] = FMA(KP943557151, T6Q, T6J); + ci[WS(rs, 1)] = FNMS(KP943557151, T6Q, T6J); + T7N = FNMS(KP557913902, T7M, T7L); + T7R = FMA(KP557913902, T7M, T7L); + cr[WS(rs, 23)] = -(FMA(KP943557151, T7S, T7R)); + ci[WS(rs, 16)] = FNMS(KP943557151, T7S, T7R); + cr[WS(rs, 18)] = FMS(KP949179823, T7Q, T7N); + cr[WS(rs, 13)] = -(FMA(KP949179823, T7Q, T7N)); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 24}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 25, "hf2_25", twinstr, &GENUS, {84, 78, 356, 0} }; + +void X(codelet_hf2_25) (planner *p) { + X(khc2hc_register) (p, hf2_25, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 25 -dit -name hf2_25 -include hf.h */ + +/* + * This function contains 440 FP additions, 340 FP multiplications, + * (or, 280 additions, 180 multiplications, 160 fused multiply/add), + * 149 stack variables, 20 constants, and 100 memory accesses + */ +#include "hf.h" + +static void hf2_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP062790519, +0.062790519529313376076178224565631133122484832); + DK(KP684547105, +0.684547105928688673732283357621209269889519233); + DK(KP728968627, +0.728968627421411523146730319055259111372571664); + DK(KP481753674, +0.481753674101715274987191502872129653528542010); + DK(KP876306680, +0.876306680043863587308115903922062583399064238); + DK(KP248689887, +0.248689887164854788242283746006447968417567406); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP125333233, +0.125333233564304245373118759816508793942918247); + DK(KP425779291, +0.425779291565072648862502445744251703979973042); + DK(KP904827052, +0.904827052466019527713668647932697593970413911); + DK(KP637423989, +0.637423989748689710176712811676016195434917298); + DK(KP770513242, +0.770513242775789230803009636396177847271667672); + DK(KP844327925, +0.844327925502015078548558063966681505381659241); + DK(KP535826794, +0.535826794978996618271308767867639978063575346); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(50, rs)) { + E T2, T5, T3, T6, T8, Td, T16, T14, Te, T9, T21, T23, Tx, TR, T1g; + E TB, T1f, TV, T1Q, Tg, T1S, Tk, T18, T2s, T1c, T2q, Tn, To, Tp, Tr; + E T28, T2x, TY, T2k, T2m, T2v, TG, TE, T10, T1h, T1E, T26, T1B, T1G, T1V; + E T1X, T1z, T1j; + { + E Tw, TT, Tz, TQ, Tv, TU, TA, TP; + { + E T4, Tc, T7, Tb; + T2 = W[0]; + T5 = W[1]; + T3 = W[2]; + T6 = W[3]; + T4 = T2 * T3; + Tc = T5 * T3; + T7 = T5 * T6; + Tb = T2 * T6; + T8 = T4 - T7; + Td = Tb + Tc; + T16 = Tb - Tc; + T14 = T4 + T7; + Te = W[5]; + Tw = T5 * Te; + TT = T3 * Te; + Tz = T2 * Te; + TQ = T6 * Te; + T9 = W[4]; + Tv = T2 * T9; + TU = T6 * T9; + TA = T5 * T9; + TP = T3 * T9; + } + T21 = TP - TQ; + T23 = TT + TU; + { + E T15, T17, Ta, Tf, T1a, T1b, Ti, Tj; + Tx = Tv - Tw; + TR = TP + TQ; + T1g = Tz - TA; + TB = Tz + TA; + T1f = Tv + Tw; + TV = TT - TU; + T15 = T14 * T9; + T17 = T16 * Te; + T1Q = T15 + T17; + Ta = T8 * T9; + Tf = Td * Te; + Tg = Ta + Tf; + T1a = T14 * Te; + T1b = T16 * T9; + T1S = T1a - T1b; + Ti = T8 * Te; + Tj = Td * T9; + Tk = Ti - Tj; + T18 = T15 - T17; + T2s = Ti + Tj; + T1c = T1a + T1b; + T2q = Ta - Tf; + Tn = W[6]; + To = W[7]; + Tp = FMA(T8, Tn, Td * To); + Tr = FNMS(Td, Tn, T8 * To); + T28 = FNMS(T1S, Tn, T1Q * To); + T2x = FNMS(TV, Tn, TR * To); + TY = FMA(T3, Tn, T6 * To); + T2k = FMA(T2, Tn, T5 * To); + T2m = FNMS(T5, Tn, T2 * To); + T2v = FMA(TR, Tn, TV * To); + TG = FNMS(Te, Tn, T9 * To); + TE = FMA(T9, Tn, Te * To); + T10 = FNMS(T6, Tn, T3 * To); + T1h = FMA(T1f, Tn, T1g * To); + T1E = FMA(Tg, Tn, Tk * To); + T26 = FMA(T1Q, Tn, T1S * To); + T1B = FNMS(TB, Tn, Tx * To); + T1G = FNMS(Tk, Tn, Tg * To); + T1V = FMA(T14, Tn, T16 * To); + T1X = FNMS(T16, Tn, T14 * To); + T1z = FMA(Tx, Tn, TB * To); + T1j = FNMS(T1g, Tn, T1f * To); + } + } + { + E T1, T6v, T2F, T6A, TK, T2G, T6y, T6z, T6u, T71, T2O, T52, T2C, T6k, T4c; + E T5X, T4L, T5s, T4j, T5W, T4K, T5v, T1o, T6g, T30, T5M, T4A, T56, T3b, T5N; + E T4B, T59, T1L, T6h, T3r, T5P, T4E, T5d, T3y, T5Q, T4D, T5g, T2d, T6j, T3P; + E T5U, T4I, T5o, T3W, T5T, T4H, T5l; + { + E Tm, T2I, Tt, T2J, Tu, T6w, TD, T2L, TI, T2M, TJ, T6x; + T1 = cr[0]; + T6v = ci[0]; + { + E Th, Tl, Tq, Ts; + Th = cr[WS(rs, 5)]; + Tl = ci[WS(rs, 5)]; + Tm = FMA(Tg, Th, Tk * Tl); + T2I = FNMS(Tk, Th, Tg * Tl); + Tq = cr[WS(rs, 20)]; + Ts = ci[WS(rs, 20)]; + Tt = FMA(Tp, Tq, Tr * Ts); + T2J = FNMS(Tr, Tq, Tp * Ts); + } + Tu = Tm + Tt; + T6w = T2I + T2J; + { + E Ty, TC, TF, TH; + Ty = cr[WS(rs, 10)]; + TC = ci[WS(rs, 10)]; + TD = FMA(Tx, Ty, TB * TC); + T2L = FNMS(TB, Ty, Tx * TC); + TF = cr[WS(rs, 15)]; + TH = ci[WS(rs, 15)]; + TI = FMA(TE, TF, TG * TH); + T2M = FNMS(TG, TF, TE * TH); + } + TJ = TD + TI; + T6x = T2L + T2M; + T2F = KP559016994 * (Tu - TJ); + T6A = KP559016994 * (T6w - T6x); + TK = Tu + TJ; + T2G = FNMS(KP250000000, TK, T1); + T6y = T6w + T6x; + T6z = FNMS(KP250000000, T6y, T6v); + { + E T6s, T6t, T2K, T2N; + T6s = TD - TI; + T6t = Tm - Tt; + T6u = FNMS(KP587785252, T6t, KP951056516 * T6s); + T71 = FMA(KP951056516, T6t, KP587785252 * T6s); + T2K = T2I - T2J; + T2N = T2L - T2M; + T2O = FMA(KP951056516, T2K, KP587785252 * T2N); + T52 = FNMS(KP587785252, T2K, KP951056516 * T2N); + } + } + { + E T2g, T48, T3Y, T3Z, T4h, T4g, T43, T46, T49, T2p, T2A, T2B, T2e, T2f; + T2e = cr[WS(rs, 3)]; + T2f = ci[WS(rs, 3)]; + T2g = FMA(T3, T2e, T6 * T2f); + T48 = FNMS(T6, T2e, T3 * T2f); + { + E T2j, T41, T2z, T45, T2o, T42, T2u, T44; + { + E T2h, T2i, T2w, T2y; + T2h = cr[WS(rs, 8)]; + T2i = ci[WS(rs, 8)]; + T2j = FMA(T1f, T2h, T1g * T2i); + T41 = FNMS(T1g, T2h, T1f * T2i); + T2w = cr[WS(rs, 18)]; + T2y = ci[WS(rs, 18)]; + T2z = FMA(T2v, T2w, T2x * T2y); + T45 = FNMS(T2x, T2w, T2v * T2y); + } + { + E T2l, T2n, T2r, T2t; + T2l = cr[WS(rs, 23)]; + T2n = ci[WS(rs, 23)]; + T2o = FMA(T2k, T2l, T2m * T2n); + T42 = FNMS(T2m, T2l, T2k * T2n); + T2r = cr[WS(rs, 13)]; + T2t = ci[WS(rs, 13)]; + T2u = FMA(T2q, T2r, T2s * T2t); + T44 = FNMS(T2s, T2r, T2q * T2t); + } + T3Y = T2j - T2o; + T3Z = T2u - T2z; + T4h = T44 - T45; + T4g = T41 - T42; + T43 = T41 + T42; + T46 = T44 + T45; + T49 = T43 + T46; + T2p = T2j + T2o; + T2A = T2u + T2z; + T2B = T2p + T2A; + } + T2C = T2g + T2B; + T6k = T48 + T49; + { + E T40, T5r, T4b, T5q, T47, T4a; + T40 = FMA(KP951056516, T3Y, KP587785252 * T3Z); + T5r = FNMS(KP587785252, T3Y, KP951056516 * T3Z); + T47 = KP559016994 * (T43 - T46); + T4a = FNMS(KP250000000, T49, T48); + T4b = T47 + T4a; + T5q = T4a - T47; + T4c = T40 + T4b; + T5X = T5r + T5q; + T4L = T4b - T40; + T5s = T5q - T5r; + } + { + E T4i, T5u, T4f, T5t, T4d, T4e; + T4i = FMA(KP951056516, T4g, KP587785252 * T4h); + T5u = FNMS(KP587785252, T4g, KP951056516 * T4h); + T4d = KP559016994 * (T2p - T2A); + T4e = FNMS(KP250000000, T2B, T2g); + T4f = T4d + T4e; + T5t = T4e - T4d; + T4j = T4f - T4i; + T5W = T5t - T5u; + T4K = T4f + T4i; + T5v = T5t + T5u; + } + } + { + E TO, T37, T2V, T2Y, T32, T31, T34, T35, T38, T13, T1m, T1n, TM, TN; + TM = cr[WS(rs, 1)]; + TN = ci[WS(rs, 1)]; + TO = FMA(T2, TM, T5 * TN); + T37 = FNMS(T5, TM, T2 * TN); + { + E TX, T2T, T1l, T2X, T12, T2U, T1e, T2W; + { + E TS, TW, T1i, T1k; + TS = cr[WS(rs, 6)]; + TW = ci[WS(rs, 6)]; + TX = FMA(TR, TS, TV * TW); + T2T = FNMS(TV, TS, TR * TW); + T1i = cr[WS(rs, 16)]; + T1k = ci[WS(rs, 16)]; + T1l = FMA(T1h, T1i, T1j * T1k); + T2X = FNMS(T1j, T1i, T1h * T1k); + } + { + E TZ, T11, T19, T1d; + TZ = cr[WS(rs, 21)]; + T11 = ci[WS(rs, 21)]; + T12 = FMA(TY, TZ, T10 * T11); + T2U = FNMS(T10, TZ, TY * T11); + T19 = cr[WS(rs, 11)]; + T1d = ci[WS(rs, 11)]; + T1e = FMA(T18, T19, T1c * T1d); + T2W = FNMS(T1c, T19, T18 * T1d); + } + T2V = T2T - T2U; + T2Y = T2W - T2X; + T32 = T1e - T1l; + T31 = TX - T12; + T34 = T2T + T2U; + T35 = T2W + T2X; + T38 = T34 + T35; + T13 = TX + T12; + T1m = T1e + T1l; + T1n = T13 + T1m; + } + T1o = TO + T1n; + T6g = T37 + T38; + { + E T2Z, T55, T2S, T54, T2Q, T2R; + T2Z = FMA(KP951056516, T2V, KP587785252 * T2Y); + T55 = FNMS(KP587785252, T2V, KP951056516 * T2Y); + T2Q = KP559016994 * (T13 - T1m); + T2R = FNMS(KP250000000, T1n, TO); + T2S = T2Q + T2R; + T54 = T2R - T2Q; + T30 = T2S - T2Z; + T5M = T54 - T55; + T4A = T2S + T2Z; + T56 = T54 + T55; + } + { + E T33, T58, T3a, T57, T36, T39; + T33 = FMA(KP951056516, T31, KP587785252 * T32); + T58 = FNMS(KP587785252, T31, KP951056516 * T32); + T36 = KP559016994 * (T34 - T35); + T39 = FNMS(KP250000000, T38, T37); + T3a = T36 + T39; + T57 = T39 - T36; + T3b = T33 + T3a; + T5N = T58 + T57; + T4B = T3a - T33; + T59 = T57 - T58; + } + } + { + E T1r, T3n, T3d, T3e, T3w, T3v, T3i, T3l, T3o, T1y, T1J, T1K, T1p, T1q; + T1p = cr[WS(rs, 4)]; + T1q = ci[WS(rs, 4)]; + T1r = FMA(T8, T1p, Td * T1q); + T3n = FNMS(Td, T1p, T8 * T1q); + { + E T1u, T3g, T1I, T3k, T1x, T3h, T1D, T3j; + { + E T1s, T1t, T1F, T1H; + T1s = cr[WS(rs, 9)]; + T1t = ci[WS(rs, 9)]; + T1u = FMA(T9, T1s, Te * T1t); + T3g = FNMS(Te, T1s, T9 * T1t); + T1F = cr[WS(rs, 19)]; + T1H = ci[WS(rs, 19)]; + T1I = FMA(T1E, T1F, T1G * T1H); + T3k = FNMS(T1G, T1F, T1E * T1H); + } + { + E T1v, T1w, T1A, T1C; + T1v = cr[WS(rs, 24)]; + T1w = ci[WS(rs, 24)]; + T1x = FMA(Tn, T1v, To * T1w); + T3h = FNMS(To, T1v, Tn * T1w); + T1A = cr[WS(rs, 14)]; + T1C = ci[WS(rs, 14)]; + T1D = FMA(T1z, T1A, T1B * T1C); + T3j = FNMS(T1B, T1A, T1z * T1C); + } + T3d = T1x - T1u; + T3e = T1D - T1I; + T3w = T3j - T3k; + T3v = T3g - T3h; + T3i = T3g + T3h; + T3l = T3j + T3k; + T3o = T3i + T3l; + T1y = T1u + T1x; + T1J = T1D + T1I; + T1K = T1y + T1J; + } + T1L = T1r + T1K; + T6h = T3n + T3o; + { + E T3f, T5c, T3q, T5b, T3m, T3p; + T3f = FNMS(KP587785252, T3e, KP951056516 * T3d); + T5c = FMA(KP587785252, T3d, KP951056516 * T3e); + T3m = KP559016994 * (T3i - T3l); + T3p = FNMS(KP250000000, T3o, T3n); + T3q = T3m + T3p; + T5b = T3p - T3m; + T3r = T3f - T3q; + T5P = T5c + T5b; + T4E = T3f + T3q; + T5d = T5b - T5c; + } + { + E T3x, T5f, T3u, T5e, T3s, T3t; + T3x = FMA(KP951056516, T3v, KP587785252 * T3w); + T5f = FNMS(KP587785252, T3v, KP951056516 * T3w); + T3s = KP559016994 * (T1y - T1J); + T3t = FNMS(KP250000000, T1K, T1r); + T3u = T3s + T3t; + T5e = T3t - T3s; + T3y = T3u - T3x; + T5Q = T5e - T5f; + T4D = T3u + T3x; + T5g = T5e + T5f; + } + } + { + E T1P, T3L, T3B, T3C, T3U, T3T, T3G, T3J, T3M, T20, T2b, T2c, T1N, T1O; + T1N = cr[WS(rs, 2)]; + T1O = ci[WS(rs, 2)]; + T1P = FMA(T14, T1N, T16 * T1O); + T3L = FNMS(T16, T1N, T14 * T1O); + { + E T1U, T3E, T2a, T3I, T1Z, T3F, T25, T3H; + { + E T1R, T1T, T27, T29; + T1R = cr[WS(rs, 7)]; + T1T = ci[WS(rs, 7)]; + T1U = FMA(T1Q, T1R, T1S * T1T); + T3E = FNMS(T1S, T1R, T1Q * T1T); + T27 = cr[WS(rs, 17)]; + T29 = ci[WS(rs, 17)]; + T2a = FMA(T26, T27, T28 * T29); + T3I = FNMS(T28, T27, T26 * T29); + } + { + E T1W, T1Y, T22, T24; + T1W = cr[WS(rs, 22)]; + T1Y = ci[WS(rs, 22)]; + T1Z = FMA(T1V, T1W, T1X * T1Y); + T3F = FNMS(T1X, T1W, T1V * T1Y); + T22 = cr[WS(rs, 12)]; + T24 = ci[WS(rs, 12)]; + T25 = FMA(T21, T22, T23 * T24); + T3H = FNMS(T23, T22, T21 * T24); + } + T3B = T1U - T1Z; + T3C = T25 - T2a; + T3U = T3H - T3I; + T3T = T3E - T3F; + T3G = T3E + T3F; + T3J = T3H + T3I; + T3M = T3G + T3J; + T20 = T1U + T1Z; + T2b = T25 + T2a; + T2c = T20 + T2b; + } + T2d = T1P + T2c; + T6j = T3L + T3M; + { + E T3D, T5n, T3O, T5m, T3K, T3N; + T3D = FMA(KP951056516, T3B, KP587785252 * T3C); + T5n = FNMS(KP587785252, T3B, KP951056516 * T3C); + T3K = KP559016994 * (T3G - T3J); + T3N = FNMS(KP250000000, T3M, T3L); + T3O = T3K + T3N; + T5m = T3N - T3K; + T3P = T3D + T3O; + T5U = T5n + T5m; + T4I = T3O - T3D; + T5o = T5m - T5n; + } + { + E T3V, T5k, T3S, T5j, T3Q, T3R; + T3V = FMA(KP951056516, T3T, KP587785252 * T3U); + T5k = FNMS(KP587785252, T3T, KP951056516 * T3U); + T3Q = KP559016994 * (T20 - T2b); + T3R = FNMS(KP250000000, T2c, T1P); + T3S = T3Q + T3R; + T5j = T3R - T3Q; + T3W = T3S - T3V; + T5T = T5j - T5k; + T4H = T3S + T3V; + T5l = T5j + T5k; + } + } + { + E T6m, T6o, TL, T2E, T6d, T6e, T6n, T6f; + { + E T6i, T6l, T1M, T2D; + T6i = T6g - T6h; + T6l = T6j - T6k; + T6m = FMA(KP951056516, T6i, KP587785252 * T6l); + T6o = FNMS(KP587785252, T6i, KP951056516 * T6l); + TL = T1 + TK; + T1M = T1o + T1L; + T2D = T2d + T2C; + T2E = T1M + T2D; + T6d = KP559016994 * (T1M - T2D); + T6e = FNMS(KP250000000, T2E, TL); + } + cr[0] = TL + T2E; + T6n = T6e - T6d; + cr[WS(rs, 10)] = T6n - T6o; + ci[WS(rs, 9)] = T6n + T6o; + T6f = T6d + T6e; + ci[WS(rs, 4)] = T6f - T6m; + cr[WS(rs, 5)] = T6f + T6m; + } + { + E T2P, T4z, T72, T7e, T4m, T7j, T4n, T7i, T4U, T77, T4X, T75, T4O, T6Y, T4P; + E T6X, T4s, T7f, T4v, T7d, T2H, T70; + T2H = T2F + T2G; + T2P = T2H - T2O; + T4z = T2H + T2O; + T70 = T6A + T6z; + T72 = T70 - T71; + T7e = T71 + T70; + { + E T3c, T3z, T3A, T3X, T4k, T4l; + T3c = FMA(KP535826794, T30, KP844327925 * T3b); + T3z = FNMS(KP637423989, T3y, KP770513242 * T3r); + T3A = T3c + T3z; + T3X = FNMS(KP425779291, T3W, KP904827052 * T3P); + T4k = FNMS(KP992114701, T4j, KP125333233 * T4c); + T4l = T3X + T4k; + T4m = T3A + T4l; + T7j = T3X - T4k; + T4n = KP559016994 * (T3A - T4l); + T7i = T3z - T3c; + } + { + E T4S, T4T, T73, T4V, T4W, T74; + T4S = FNMS(KP248689887, T4A, KP968583161 * T4B); + T4T = FNMS(KP844327925, T4D, KP535826794 * T4E); + T73 = T4S + T4T; + T4V = FNMS(KP481753674, T4H, KP876306680 * T4I); + T4W = FNMS(KP684547105, T4K, KP728968627 * T4L); + T74 = T4V + T4W; + T4U = T4S - T4T; + T77 = KP559016994 * (T73 - T74); + T4X = T4V - T4W; + T75 = T73 + T74; + } + { + E T4C, T4F, T4G, T4J, T4M, T4N; + T4C = FMA(KP968583161, T4A, KP248689887 * T4B); + T4F = FMA(KP535826794, T4D, KP844327925 * T4E); + T4G = T4C + T4F; + T4J = FMA(KP876306680, T4H, KP481753674 * T4I); + T4M = FMA(KP728968627, T4K, KP684547105 * T4L); + T4N = T4J + T4M; + T4O = T4G + T4N; + T6Y = T4J - T4M; + T4P = KP559016994 * (T4G - T4N); + T6X = T4F - T4C; + } + { + E T4q, T4r, T7b, T4t, T4u, T7c; + T4q = FNMS(KP844327925, T30, KP535826794 * T3b); + T4r = FMA(KP770513242, T3y, KP637423989 * T3r); + T7b = T4q + T4r; + T4t = FMA(KP125333233, T4j, KP992114701 * T4c); + T4u = FMA(KP904827052, T3W, KP425779291 * T3P); + T7c = T4u + T4t; + T4s = T4q - T4r; + T7f = T7b - T7c; + T4v = T4t - T4u; + T7d = KP559016994 * (T7b + T7c); + } + cr[WS(rs, 4)] = T2P + T4m; + ci[WS(rs, 23)] = T75 + T72; + ci[WS(rs, 20)] = T7f + T7e; + cr[WS(rs, 1)] = T4z + T4O; + { + E T4w, T4y, T4p, T4x, T4o; + T4w = FMA(KP951056516, T4s, KP587785252 * T4v); + T4y = FNMS(KP587785252, T4s, KP951056516 * T4v); + T4o = FNMS(KP250000000, T4m, T2P); + T4p = T4n + T4o; + T4x = T4o - T4n; + ci[0] = T4p - T4w; + ci[WS(rs, 5)] = T4x + T4y; + cr[WS(rs, 9)] = T4p + T4w; + ci[WS(rs, 10)] = T4x - T4y; + } + { + E T6Z, T79, T78, T7a, T76; + T6Z = FMA(KP587785252, T6X, KP951056516 * T6Y); + T79 = FNMS(KP587785252, T6Y, KP951056516 * T6X); + T76 = FNMS(KP250000000, T75, T72); + T78 = T76 - T77; + T7a = T77 + T76; + cr[WS(rs, 16)] = T6Z - T78; + ci[WS(rs, 18)] = T79 + T7a; + ci[WS(rs, 13)] = T6Z + T78; + cr[WS(rs, 21)] = T79 - T7a; + } + { + E T7k, T7l, T7h, T7m, T7g; + T7k = FMA(KP587785252, T7i, KP951056516 * T7j); + T7l = FNMS(KP587785252, T7j, KP951056516 * T7i); + T7g = FNMS(KP250000000, T7f, T7e); + T7h = T7d - T7g; + T7m = T7d + T7g; + cr[WS(rs, 14)] = T7h - T7k; + ci[WS(rs, 15)] = T7l + T7m; + cr[WS(rs, 19)] = T7k + T7h; + cr[WS(rs, 24)] = T7l - T7m; + } + { + E T4Y, T50, T4R, T4Z, T4Q; + T4Y = FMA(KP951056516, T4U, KP587785252 * T4X); + T50 = FNMS(KP587785252, T4U, KP951056516 * T4X); + T4Q = FNMS(KP250000000, T4O, T4z); + T4R = T4P + T4Q; + T4Z = T4Q - T4P; + ci[WS(rs, 3)] = T4R - T4Y; + ci[WS(rs, 8)] = T4Z + T50; + cr[WS(rs, 6)] = T4R + T4Y; + cr[WS(rs, 11)] = T4Z - T50; + } + } + { + E T7p, T7x, T7q, T7t, T7u, T7v, T7y, T7w; + { + E T7n, T7o, T7r, T7s; + T7n = T1L - T1o; + T7o = T2d - T2C; + T7p = FMA(KP587785252, T7n, KP951056516 * T7o); + T7x = FNMS(KP587785252, T7o, KP951056516 * T7n); + T7q = T6y + T6v; + T7r = T6g + T6h; + T7s = T6j + T6k; + T7t = T7r + T7s; + T7u = FNMS(KP250000000, T7t, T7q); + T7v = KP559016994 * (T7r - T7s); + } + ci[WS(rs, 24)] = T7t + T7q; + T7y = T7v + T7u; + cr[WS(rs, 20)] = T7x - T7y; + ci[WS(rs, 19)] = T7x + T7y; + T7w = T7u - T7v; + cr[WS(rs, 15)] = T7p - T7w; + ci[WS(rs, 14)] = T7p + T7w; + } + { + E T53, T5L, T6C, T6O, T5y, T6T, T5z, T6S, T66, T6H, T69, T6F, T60, T6q, T61; + E T6p, T5E, T6P, T5H, T6N, T51, T6B; + T51 = T2G - T2F; + T53 = T51 + T52; + T5L = T51 - T52; + T6B = T6z - T6A; + T6C = T6u + T6B; + T6O = T6B - T6u; + { + E T5a, T5h, T5i, T5p, T5w, T5x; + T5a = FMA(KP728968627, T56, KP684547105 * T59); + T5h = FNMS(KP992114701, T5g, KP125333233 * T5d); + T5i = T5a + T5h; + T5p = FMA(KP062790519, T5l, KP998026728 * T5o); + T5w = FNMS(KP637423989, T5v, KP770513242 * T5s); + T5x = T5p + T5w; + T5y = T5i + T5x; + T6T = T5p - T5w; + T5z = KP559016994 * (T5i - T5x); + T6S = T5h - T5a; + } + { + E T64, T65, T6D, T67, T68, T6E; + T64 = FNMS(KP481753674, T5M, KP876306680 * T5N); + T65 = FMA(KP904827052, T5Q, KP425779291 * T5P); + T6D = T64 - T65; + T67 = FNMS(KP844327925, T5T, KP535826794 * T5U); + T68 = FNMS(KP998026728, T5W, KP062790519 * T5X); + T6E = T67 + T68; + T66 = T64 + T65; + T6H = KP559016994 * (T6D - T6E); + T69 = T67 - T68; + T6F = T6D + T6E; + } + { + E T5O, T5R, T5S, T5V, T5Y, T5Z; + T5O = FMA(KP876306680, T5M, KP481753674 * T5N); + T5R = FNMS(KP425779291, T5Q, KP904827052 * T5P); + T5S = T5O + T5R; + T5V = FMA(KP535826794, T5T, KP844327925 * T5U); + T5Y = FMA(KP062790519, T5W, KP998026728 * T5X); + T5Z = T5V + T5Y; + T60 = T5S + T5Z; + T6q = T5V - T5Y; + T61 = KP559016994 * (T5S - T5Z); + T6p = T5R - T5O; + } + { + E T5C, T5D, T6L, T5F, T5G, T6M; + T5C = FNMS(KP684547105, T56, KP728968627 * T59); + T5D = FMA(KP125333233, T5g, KP992114701 * T5d); + T6L = T5C - T5D; + T5F = FNMS(KP998026728, T5l, KP062790519 * T5o); + T5G = FMA(KP770513242, T5v, KP637423989 * T5s); + T6M = T5F - T5G; + T5E = T5C + T5D; + T6P = T6L + T6M; + T5H = T5F + T5G; + T6N = KP559016994 * (T6L - T6M); + } + cr[WS(rs, 3)] = T53 + T5y; + ci[WS(rs, 22)] = T6F + T6C; + ci[WS(rs, 21)] = T6P + T6O; + cr[WS(rs, 2)] = T5L + T60; + { + E T6r, T6J, T6I, T6K, T6G; + T6r = FMA(KP587785252, T6p, KP951056516 * T6q); + T6J = FNMS(KP587785252, T6q, KP951056516 * T6p); + T6G = FNMS(KP250000000, T6F, T6C); + T6I = T6G - T6H; + T6K = T6H + T6G; + cr[WS(rs, 17)] = T6r - T6I; + ci[WS(rs, 17)] = T6J + T6K; + ci[WS(rs, 12)] = T6r + T6I; + cr[WS(rs, 22)] = T6J - T6K; + } + { + E T6a, T6c, T63, T6b, T62; + T6a = FMA(KP951056516, T66, KP587785252 * T69); + T6c = FNMS(KP587785252, T66, KP951056516 * T69); + T62 = FNMS(KP250000000, T60, T5L); + T63 = T61 + T62; + T6b = T62 - T61; + ci[WS(rs, 2)] = T63 - T6a; + ci[WS(rs, 7)] = T6b + T6c; + cr[WS(rs, 7)] = T63 + T6a; + cr[WS(rs, 12)] = T6b - T6c; + } + { + E T5I, T5K, T5B, T5J, T5A; + T5I = FMA(KP951056516, T5E, KP587785252 * T5H); + T5K = FNMS(KP587785252, T5E, KP951056516 * T5H); + T5A = FNMS(KP250000000, T5y, T53); + T5B = T5z + T5A; + T5J = T5A - T5z; + ci[WS(rs, 1)] = T5B - T5I; + ci[WS(rs, 6)] = T5J + T5K; + cr[WS(rs, 8)] = T5B + T5I; + ci[WS(rs, 11)] = T5J - T5K; + } + { + E T6U, T6V, T6R, T6W, T6Q; + T6U = FMA(KP587785252, T6S, KP951056516 * T6T); + T6V = FNMS(KP587785252, T6T, KP951056516 * T6S); + T6Q = FNMS(KP250000000, T6P, T6O); + T6R = T6N - T6Q; + T6W = T6N + T6Q; + cr[WS(rs, 13)] = T6R - T6U; + ci[WS(rs, 16)] = T6V + T6W; + cr[WS(rs, 18)] = T6U + T6R; + cr[WS(rs, 23)] = T6V - T6W; + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 24}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 25, "hf2_25", twinstr, &GENUS, {280, 180, 160, 0} }; + +void X(codelet_hf2_25) (planner *p) { + X(khc2hc_register) (p, hf2_25, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf2_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf2_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1842 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:12 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hf2_32 -include hf.h */ + +/* + * This function contains 488 FP additions, 350 FP multiplications, + * (or, 236 additions, 98 multiplications, 252 fused multiply/add), + * 181 stack variables, 7 constants, and 128 memory accesses + */ +#include "hf.h" + +static void hf2_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) { + E T7d, T7a; + { + E T2, T8, T3, T6, Te, Tr, T18, T4, Ta, Tz, T1n, T10, Ti, T5, Tc; + T2 = W[0]; + T8 = W[4]; + T3 = W[2]; + T6 = W[3]; + Te = W[6]; + Tr = T2 * T8; + T18 = T3 * T8; + T4 = T2 * T3; + Ta = T2 * T6; + Tz = T3 * Te; + T1n = T8 * Te; + T10 = T2 * Te; + Ti = W[7]; + T5 = W[1]; + Tc = W[5]; + { + E T34, T31, T2X, T2T, Tq, T46, T8H, T98, TH, T97, T4b, T8D, TZ, T7g, T4j; + E T6t, T1g, T7f, T4q, T6u, T4z, T6y, T1J, T7j, T7m, T8e, T6x, T4G, T2k, T7o; + E T7r, T8d, T6B, T4O, T6A, T4V, T6P, T61, T7G, T3G, T6M, T5E, T8n, T7N, T6I; + E T5s, T7v, T2N, T6F, T55, T8i, T7C, T5L, T63, T43, T7O, T5S, T62, T7J, T8o; + E T2U, T2R, T2V, T58, T3a, T5h, T2Y, T32, T35; + { + E T1K, T23, T1N, T26, T2b, T1U, T3C, T3j, T3z, T3f, T1R, T29, TR, Th, T2J; + E T2F, Td, TP, T3r, T3n, T2w, T2s, T3Q, T3M, T1Z, T1V, T2g, T2c; + { + E T11, T1C, TM, Tb, TJ, T7, T1o, T19, T1w, T1F, T15, T1s, T1d, T1z, TW; + E TS, Ty, T48, TG, T4a; + { + E T1, TA, Ts, TE, Tw, Tn, Tj, T8G, Tk, To, T14; + T1 = cr[0]; + TA = FMA(T6, Ti, Tz); + T1K = FNMS(T6, Ti, Tz); + T14 = T2 * Ti; + { + E T1r, TD, T1c, Tv; + T1r = T8 * Ti; + TD = T3 * Ti; + T11 = FNMS(T5, Ti, T10); + T1C = FMA(T5, Ti, T10); + TM = FMA(T5, T3, Ta); + Tb = FNMS(T5, T3, Ta); + TJ = FNMS(T5, T6, T4); + T7 = FMA(T5, T6, T4); + T1o = FMA(Tc, Ti, T1n); + T23 = FMA(T6, Tc, T18); + T19 = FNMS(T6, Tc, T18); + T1w = FNMS(T5, Tc, Tr); + Ts = FMA(T5, Tc, Tr); + T1c = T3 * Tc; + Tv = T2 * Tc; + T1F = FNMS(T5, Te, T14); + T15 = FMA(T5, Te, T14); + T1s = FNMS(Tc, Te, T1r); + T1N = FMA(T6, Te, TD); + TE = FNMS(T6, Te, TD); + { + E T1T, T3i, T3e, T1Q; + T1T = TJ * Tc; + T3i = TJ * Ti; + T3e = TJ * Te; + T1Q = TJ * T8; + { + E Tg, T2I, T2E, T9; + Tg = T7 * Tc; + T2I = T7 * Ti; + T2E = T7 * Te; + T9 = T7 * T8; + { + E T3q, T3m, T2v, T2r; + T3q = T19 * Ti; + T3m = T19 * Te; + T2v = T1w * Ti; + T2r = T1w * Te; + { + E T2W, T2S, T3P, T3L; + T2W = T23 * Ti; + T2S = T23 * Te; + T3P = Ts * Ti; + T3L = Ts * Te; + T26 = FNMS(T6, T8, T1c); + T1d = FMA(T6, T8, T1c); + T1z = FMA(T5, T8, Tv); + Tw = FNMS(T5, T8, Tv); + T2b = FNMS(TM, T8, T1T); + T1U = FMA(TM, T8, T1T); + T3C = FNMS(TM, Te, T3i); + T3j = FMA(TM, Te, T3i); + T3z = FMA(TM, Ti, T3e); + T3f = FNMS(TM, Ti, T3e); + T1R = FNMS(TM, Tc, T1Q); + T29 = FMA(TM, Tc, T1Q); + TR = FNMS(Tb, T8, Tg); + Th = FMA(Tb, T8, Tg); + T34 = FMA(Tb, Te, T2I); + T2J = FNMS(Tb, Te, T2I); + T31 = FNMS(Tb, Ti, T2E); + T2F = FMA(Tb, Ti, T2E); + Td = FNMS(Tb, Tc, T9); + TP = FMA(Tb, Tc, T9); + T2X = FNMS(T26, Te, T2W); + T2T = FMA(T26, Ti, T2S); + T3r = FNMS(T1d, Te, T3q); + T3n = FMA(T1d, Ti, T3m); + T2w = FNMS(T1z, Te, T2v); + T2s = FMA(T1z, Ti, T2r); + T3Q = FNMS(Tw, Te, T3P); + T3M = FMA(Tw, Ti, T3L); + { + E T1Y, T1S, T2f, T2a; + T1Y = T1R * Ti; + T1S = T1R * Te; + T2f = T29 * Ti; + T2a = T29 * Te; + { + E Tm, Tf, TV, TQ; + Tm = Td * Ti; + Tf = Td * Te; + TV = TP * Ti; + TQ = TP * Te; + T1Z = FNMS(T1U, Te, T1Y); + T1V = FMA(T1U, Ti, T1S); + T2g = FNMS(T2b, Te, T2f); + T2c = FMA(T2b, Ti, T2a); + Tn = FNMS(Th, Te, Tm); + Tj = FMA(Th, Ti, Tf); + TW = FNMS(TR, Te, TV); + TS = FMA(TR, Ti, TQ); + T8G = ci[0]; + } + } + } + } + } + } + } + Tk = cr[WS(rs, 16)]; + To = ci[WS(rs, 16)]; + { + E Tt, Tx, Tu, T47, TB, TF, TC, T49; + { + E Tl, T8E, Tp, T8F; + Tt = cr[WS(rs, 8)]; + Tx = ci[WS(rs, 8)]; + Tl = Tj * Tk; + T8E = Tj * To; + Tu = Ts * Tt; + T47 = Ts * Tx; + Tp = FMA(Tn, To, Tl); + T8F = FNMS(Tn, Tk, T8E); + TB = cr[WS(rs, 24)]; + TF = ci[WS(rs, 24)]; + Tq = T1 + Tp; + T46 = T1 - Tp; + T8H = T8F + T8G; + T98 = T8G - T8F; + TC = TA * TB; + T49 = TA * TF; + } + Ty = FMA(Tw, Tx, Tu); + T48 = FNMS(Tw, Tt, T47); + TG = FMA(TE, TF, TC); + T4a = FNMS(TE, TB, T49); + } + } + { + E TT, TX, TO, T4f, TU, T4g; + { + E TK, TN, TL, T4e; + TK = cr[WS(rs, 4)]; + TN = ci[WS(rs, 4)]; + TH = Ty + TG; + T97 = Ty - TG; + T4b = T48 - T4a; + T8D = T48 + T4a; + TL = TJ * TK; + T4e = TJ * TN; + TT = cr[WS(rs, 20)]; + TX = ci[WS(rs, 20)]; + TO = FMA(TM, TN, TL); + T4f = FNMS(TM, TK, T4e); + TU = TS * TT; + T4g = TS * TX; + } + { + E T17, T4m, T1a, T1e, T4d, T4i; + { + E T12, T16, TY, T4h, T13, T4l; + T12 = cr[WS(rs, 28)]; + T16 = ci[WS(rs, 28)]; + TY = FMA(TW, TX, TU); + T4h = FNMS(TW, TT, T4g); + T13 = T11 * T12; + T4l = T11 * T16; + TZ = TO + TY; + T4d = TO - TY; + T7g = T4f + T4h; + T4i = T4f - T4h; + T17 = FMA(T15, T16, T13); + T4m = FNMS(T15, T12, T4l); + } + T4j = T4d - T4i; + T6t = T4d + T4i; + T1a = cr[WS(rs, 12)]; + T1e = ci[WS(rs, 12)]; + { + E T1m, T4u, T1H, T4E, T1x, T1A, T1u, T4w, T1y, T4B; + { + E T1D, T1G, T1E, T4D; + { + E T1f, T4o, T4k, T4p; + { + E T1j, T1l, T1b, T4n, T1k, T4t; + T1j = cr[WS(rs, 2)]; + T1l = ci[WS(rs, 2)]; + T1b = T19 * T1a; + T4n = T19 * T1e; + T1k = T7 * T1j; + T4t = T7 * T1l; + T1f = FMA(T1d, T1e, T1b); + T4o = FNMS(T1d, T1a, T4n); + T1m = FMA(Tb, T1l, T1k); + T4u = FNMS(Tb, T1j, T4t); + } + T1g = T17 + T1f; + T4k = T17 - T1f; + T7f = T4m + T4o; + T4p = T4m - T4o; + T1D = cr[WS(rs, 26)]; + T1G = ci[WS(rs, 26)]; + T4q = T4k + T4p; + T6u = T4k - T4p; + T1E = T1C * T1D; + T4D = T1C * T1G; + } + { + E T1p, T1t, T1q, T4v; + T1p = cr[WS(rs, 18)]; + T1t = ci[WS(rs, 18)]; + T1H = FMA(T1F, T1G, T1E); + T4E = FNMS(T1F, T1D, T4D); + T1q = T1o * T1p; + T4v = T1o * T1t; + T1x = cr[WS(rs, 10)]; + T1A = ci[WS(rs, 10)]; + T1u = FMA(T1s, T1t, T1q); + T4w = FNMS(T1s, T1p, T4v); + T1y = T1w * T1x; + T4B = T1w * T1A; + } + } + { + E T4A, T1v, T7k, T4x, T1B, T4C; + T4A = T1m - T1u; + T1v = T1m + T1u; + T7k = T4u + T4w; + T4x = T4u - T4w; + T1B = FMA(T1z, T1A, T1y); + T4C = FNMS(T1z, T1x, T4B); + { + E T1I, T4y, T4F, T7l; + T1I = T1B + T1H; + T4y = T1B - T1H; + T4F = T4C - T4E; + T7l = T4C + T4E; + T4z = T4x + T4y; + T6y = T4x - T4y; + T1J = T1v + T1I; + T7j = T1v - T1I; + T7m = T7k - T7l; + T8e = T7k + T7l; + T6x = T4A + T4F; + T4G = T4A - T4F; + } + } + } + } + } + } + { + E T5C, T3u, T5y, T7L, T60, T5V, T3F, T5A, T4P, T4U; + { + E T1P, T4J, T2i, T4T, T21, T4L, T28, T4R; + { + E T1L, T1O, T1W, T20; + T1L = cr[WS(rs, 30)]; + T1O = ci[WS(rs, 30)]; + { + E T2d, T2h, T1M, T4I, T2e, T4S; + T2d = cr[WS(rs, 22)]; + T2h = ci[WS(rs, 22)]; + T1M = T1K * T1L; + T4I = T1K * T1O; + T2e = T2c * T2d; + T4S = T2c * T2h; + T1P = FMA(T1N, T1O, T1M); + T4J = FNMS(T1N, T1L, T4I); + T2i = FMA(T2g, T2h, T2e); + T4T = FNMS(T2g, T2d, T4S); + } + T1W = cr[WS(rs, 14)]; + T20 = ci[WS(rs, 14)]; + { + E T24, T27, T1X, T4K, T25, T4Q; + T24 = cr[WS(rs, 6)]; + T27 = ci[WS(rs, 6)]; + T1X = T1V * T1W; + T4K = T1V * T20; + T25 = T23 * T24; + T4Q = T23 * T27; + T21 = FMA(T1Z, T20, T1X); + T4L = FNMS(T1Z, T1W, T4K); + T28 = FMA(T26, T27, T25); + T4R = FNMS(T26, T24, T4Q); + } + } + { + E T22, T7p, T4M, T4N, T2j, T7q; + T4P = T1P - T21; + T22 = T1P + T21; + T7p = T4J + T4L; + T4M = T4J - T4L; + T4N = T28 - T2i; + T2j = T28 + T2i; + T7q = T4R + T4T; + T4U = T4R - T4T; + T2k = T22 + T2j; + T7o = T22 - T2j; + T7r = T7p - T7q; + T8d = T7p + T7q; + T6B = T4M - T4N; + T4O = T4M + T4N; + } + } + { + E T3l, T5X, T3E, T3v, T3t, T3w, T3x, T5Z, T3A, T3B, T3D, T3y, T5z; + { + E T3g, T3k, T3h, T5W; + T3g = cr[WS(rs, 31)]; + T3k = ci[WS(rs, 31)]; + T3A = cr[WS(rs, 23)]; + T6A = T4P + T4U; + T4V = T4P - T4U; + T3h = T3f * T3g; + T5W = T3f * T3k; + T3B = T3z * T3A; + T3D = ci[WS(rs, 23)]; + T3l = FMA(T3j, T3k, T3h); + T5X = FNMS(T3j, T3g, T5W); + } + { + E T3o, T5B, T3s, T3p, T5Y; + T3o = cr[WS(rs, 15)]; + T3E = FMA(T3C, T3D, T3B); + T5B = T3z * T3D; + T3s = ci[WS(rs, 15)]; + T3p = T3n * T3o; + T3v = cr[WS(rs, 7)]; + T5C = FNMS(T3C, T3A, T5B); + T5Y = T3n * T3s; + T3t = FMA(T3r, T3s, T3p); + T3w = TP * T3v; + T3x = ci[WS(rs, 7)]; + T5Z = FNMS(T3r, T3o, T5Y); + } + T3u = T3l + T3t; + T5y = T3l - T3t; + T3y = FMA(TR, T3x, T3w); + T5z = TP * T3x; + T7L = T5X + T5Z; + T60 = T5X - T5Z; + T5V = T3E - T3y; + T3F = T3y + T3E; + T5A = FNMS(TR, T3v, T5z); + } + { + E T2L, T53, T4Z, T2z, T7A, T5q, T2D, T51; + { + E T2q, T5n, T2y, T2A, T2C, T5p, T2B, T50; + { + E T2G, T2K, T2n, T5m, T2t, T5o; + { + E T2o, T2p, T5D, T7M; + T2n = cr[WS(rs, 1)]; + T6P = T60 + T5V; + T61 = T5V - T60; + T7G = T3u - T3F; + T3G = T3u + T3F; + T5D = T5A - T5C; + T7M = T5A + T5C; + T2o = T2 * T2n; + T2p = ci[WS(rs, 1)]; + T6M = T5y + T5D; + T5E = T5y - T5D; + T8n = T7L + T7M; + T7N = T7L - T7M; + T5m = T2 * T2p; + T2q = FMA(T5, T2p, T2o); + } + T2G = cr[WS(rs, 25)]; + T2K = ci[WS(rs, 25)]; + T5n = FNMS(T5, T2n, T5m); + { + E T2x, T2u, T2H, T52; + T2t = cr[WS(rs, 17)]; + T2H = T2F * T2G; + T52 = T2F * T2K; + T2x = ci[WS(rs, 17)]; + T2u = T2s * T2t; + T2L = FMA(T2J, T2K, T2H); + T53 = FNMS(T2J, T2G, T52); + T5o = T2s * T2x; + T2y = FMA(T2w, T2x, T2u); + } + T2A = cr[WS(rs, 9)]; + T2C = ci[WS(rs, 9)]; + T5p = FNMS(T2w, T2t, T5o); + } + T4Z = T2q - T2y; + T2z = T2q + T2y; + T2B = T8 * T2A; + T50 = T8 * T2C; + T7A = T5n + T5p; + T5q = T5n - T5p; + T2D = FMA(Tc, T2C, T2B); + T51 = FNMS(Tc, T2A, T50); + } + { + E T3N, T3K, T3O, T5H, T41, T5Q, T3R, T3U, T3W; + { + E T3H, T3I, T3J, T3Y, T40, T5G, T3Z, T5P; + T3H = cr[WS(rs, 3)]; + { + E T5r, T2M, T54, T7B; + T5r = T2D - T2L; + T2M = T2D + T2L; + T54 = T51 - T53; + T7B = T51 + T53; + T6I = T5q - T5r; + T5s = T5q + T5r; + T7v = T2z - T2M; + T2N = T2z + T2M; + T6F = T4Z + T54; + T55 = T4Z - T54; + T8i = T7A + T7B; + T7C = T7A - T7B; + T3I = T3 * T3H; + } + T3J = ci[WS(rs, 3)]; + T3Y = cr[WS(rs, 11)]; + T40 = ci[WS(rs, 11)]; + T3N = cr[WS(rs, 19)]; + T3K = FMA(T6, T3J, T3I); + T5G = T3 * T3J; + T3Z = Td * T3Y; + T5P = Td * T40; + T3O = T3M * T3N; + T5H = FNMS(T6, T3H, T5G); + T41 = FMA(Th, T40, T3Z); + T5Q = FNMS(Th, T3Y, T5P); + T3R = ci[WS(rs, 19)]; + T3U = cr[WS(rs, 27)]; + T3W = ci[WS(rs, 27)]; + } + { + E T2O, T2P, T2Q, T37, T39, T57, T38, T5g; + { + E T3T, T5F, T5J, T3X, T5O, T7I, T5K; + T2O = cr[WS(rs, 5)]; + { + E T3S, T5I, T3V, T5N; + T3S = FMA(T3Q, T3R, T3O); + T5I = T3M * T3R; + T3V = Te * T3U; + T5N = Te * T3W; + T3T = T3K + T3S; + T5F = T3K - T3S; + T5J = FNMS(T3Q, T3N, T5I); + T3X = FMA(Ti, T3W, T3V); + T5O = FNMS(Ti, T3U, T5N); + T2P = T29 * T2O; + } + T7I = T5H + T5J; + T5K = T5H - T5J; + { + E T42, T5M, T7H, T5R; + T42 = T3X + T41; + T5M = T3X - T41; + T7H = T5O + T5Q; + T5R = T5O - T5Q; + T5L = T5F - T5K; + T63 = T5F + T5K; + T43 = T3T + T42; + T7O = T42 - T3T; + T5S = T5M + T5R; + T62 = T5M - T5R; + T7J = T7H - T7I; + T8o = T7I + T7H; + T2Q = ci[WS(rs, 5)]; + } + } + T37 = cr[WS(rs, 13)]; + T39 = ci[WS(rs, 13)]; + T2U = cr[WS(rs, 21)]; + T2R = FMA(T2b, T2Q, T2P); + T57 = T29 * T2Q; + T38 = T1R * T37; + T5g = T1R * T39; + T2V = T2T * T2U; + T58 = FNMS(T2b, T2O, T57); + T3a = FMA(T1U, T39, T38); + T5h = FNMS(T1U, T37, T5g); + T2Y = ci[WS(rs, 21)]; + T32 = cr[WS(rs, 29)]; + T35 = ci[WS(rs, 29)]; + } + } + } + } + } + { + E T7e, T8T, T7D, T7y, T7h, T8U, T6s, T9o, T9n, T6v, T6Q, T6N, T6J, T6G, T6o; + E T6r; + { + E T5c, T5t, T5j, T5u, T8s, T8v; + { + E T8c, T1i, T8A, T8z, T8O, T8J, T8N, T2l, T8L, T45, T8t, T8l, T8u, T8q, T3c; + E T8p, T8k, T8w, T2m; + { + E T8x, T8y, T8j, T8C, T8I; + { + E TI, T30, T56, T5a, T36, T5f, T1h, T7x, T5b; + TI = Tq + TH; + T7e = Tq - TH; + { + E T2Z, T59, T33, T5e; + T2Z = FMA(T2X, T2Y, T2V); + T59 = T2T * T2Y; + T33 = T31 * T32; + T5e = T31 * T35; + T30 = T2R + T2Z; + T56 = T2R - T2Z; + T5a = FNMS(T2X, T2U, T59); + T36 = FMA(T34, T35, T33); + T5f = FNMS(T34, T32, T5e); + T1h = TZ + T1g; + T8T = TZ - T1g; + } + T7x = T58 + T5a; + T5b = T58 - T5a; + { + E T3b, T5d, T7w, T5i; + T3b = T36 + T3a; + T5d = T36 - T3a; + T7w = T5f + T5h; + T5i = T5f - T5h; + T5c = T56 - T5b; + T5t = T56 + T5b; + T3c = T30 + T3b; + T7D = T30 - T3b; + T5j = T5d + T5i; + T5u = T5i - T5d; + T7y = T7w - T7x; + T8j = T7x + T7w; + T8c = TI - T1h; + T1i = TI + T1h; + } + } + T8p = T8n - T8o; + T8x = T8n + T8o; + T8y = T8i + T8j; + T8k = T8i - T8j; + T7h = T7f - T7g; + T8C = T7g + T7f; + T8I = T8D + T8H; + T8U = T8H - T8D; + T8A = T8y + T8x; + T8z = T8x - T8y; + T8O = T8I - T8C; + T8J = T8C + T8I; + } + { + E T8h, T8m, T3d, T44; + T8h = T2N - T3c; + T3d = T2N + T3c; + T44 = T3G + T43; + T8m = T3G - T43; + T8N = T1J - T2k; + T2l = T1J + T2k; + T8L = T44 - T3d; + T45 = T3d + T44; + T8t = T8h - T8k; + T8l = T8h + T8k; + T8u = T8m + T8p; + T8q = T8m - T8p; + } + T8w = T1i - T2l; + T2m = T1i + T2l; + { + E T8Q, T8R, T8P, T8S; + { + E T8r, T8M, T8K, T8g, T8B, T8f; + T8Q = T8q - T8l; + T8r = T8l + T8q; + T8B = T8e + T8d; + T8f = T8d - T8e; + cr[0] = T2m + T45; + ci[WS(rs, 15)] = T2m - T45; + ci[WS(rs, 7)] = T8w + T8z; + cr[WS(rs, 8)] = T8w - T8z; + T8M = T8J - T8B; + T8K = T8B + T8J; + T8g = T8c - T8f; + T8s = T8c + T8f; + T8R = T8O - T8N; + T8P = T8N + T8O; + ci[WS(rs, 23)] = T8L + T8M; + cr[WS(rs, 24)] = T8L - T8M; + ci[WS(rs, 31)] = T8A + T8K; + cr[WS(rs, 16)] = T8A - T8K; + cr[WS(rs, 4)] = FMA(KP707106781, T8r, T8g); + ci[WS(rs, 11)] = FNMS(KP707106781, T8r, T8g); + } + T8S = T8u - T8t; + T8v = T8t + T8u; + ci[WS(rs, 19)] = FMA(KP707106781, T8Q, T8P); + cr[WS(rs, 28)] = FMS(KP707106781, T8Q, T8P); + ci[WS(rs, 27)] = FMA(KP707106781, T8S, T8R); + cr[WS(rs, 20)] = FMS(KP707106781, T8S, T8R); + } + } + { + E T6c, T4s, T9c, T4X, T9h, T9b, T9i, T6f, T5l, T6h, T6m, T6q, T6a, T66, T5v; + { + E T6d, T4H, T4W, T6e, T99, T9a, T4c, T4r, T5T, T64; + T6s = T46 + T4b; + T4c = T46 - T4b; + T4r = T4j + T4q; + T9o = T4q - T4j; + T6d = FNMS(KP414213562, T4z, T4G); + T4H = FMA(KP414213562, T4G, T4z); + ci[WS(rs, 3)] = FMA(KP707106781, T8v, T8s); + cr[WS(rs, 12)] = FNMS(KP707106781, T8v, T8s); + T6c = FMA(KP707106781, T4r, T4c); + T4s = FNMS(KP707106781, T4r, T4c); + T4W = FNMS(KP414213562, T4V, T4O); + T6e = FMA(KP414213562, T4O, T4V); + T9n = T98 - T97; + T99 = T97 + T98; + T9a = T6t - T6u; + T6v = T6t + T6u; + T9c = T4H + T4W; + T4X = T4H - T4W; + T9h = FNMS(KP707106781, T9a, T99); + T9b = FMA(KP707106781, T9a, T99); + T6Q = T5S - T5L; + T5T = T5L + T5S; + T64 = T62 - T63; + T6N = T63 + T62; + { + E T6k, T5U, T6l, T65, T5k; + T6J = T5j - T5c; + T5k = T5c + T5j; + T9i = T6e - T6d; + T6f = T6d + T6e; + T6k = FMA(KP707106781, T5T, T5E); + T5U = FNMS(KP707106781, T5T, T5E); + T6l = FMA(KP707106781, T64, T61); + T65 = FNMS(KP707106781, T64, T61); + T5l = FNMS(KP707106781, T5k, T55); + T6h = FMA(KP707106781, T5k, T55); + T6m = FNMS(KP198912367, T6l, T6k); + T6q = FMA(KP198912367, T6k, T6l); + T6a = FNMS(KP668178637, T5U, T65); + T66 = FMA(KP668178637, T65, T5U); + T5v = T5t + T5u; + T6G = T5t - T5u; + } + } + { + E T68, T4Y, T9j, T9l, T6i, T5w; + T68 = FNMS(KP923879532, T4X, T4s); + T4Y = FMA(KP923879532, T4X, T4s); + T9j = FMA(KP923879532, T9i, T9h); + T9l = FNMS(KP923879532, T9i, T9h); + T6i = FMA(KP707106781, T5v, T5s); + T5w = FNMS(KP707106781, T5v, T5s); + { + E T9g, T9f, T9d, T9e; + { + E T6g, T6p, T69, T5x, T6n, T6j; + T6o = FNMS(KP923879532, T6f, T6c); + T6g = FMA(KP923879532, T6f, T6c); + T6j = FNMS(KP198912367, T6i, T6h); + T6p = FMA(KP198912367, T6h, T6i); + T69 = FNMS(KP668178637, T5l, T5w); + T5x = FMA(KP668178637, T5w, T5l); + T6n = T6j + T6m; + T9g = T6m - T6j; + T9f = FNMS(KP923879532, T9c, T9b); + T9d = FMA(KP923879532, T9c, T9b); + { + E T6b, T9k, T9m, T67; + T6b = T69 + T6a; + T9k = T69 - T6a; + T9m = T66 - T5x; + T67 = T5x + T66; + ci[0] = FMA(KP980785280, T6n, T6g); + cr[WS(rs, 15)] = FNMS(KP980785280, T6n, T6g); + ci[WS(rs, 4)] = FNMS(KP831469612, T6b, T68); + cr[WS(rs, 11)] = FMA(KP831469612, T6b, T68); + ci[WS(rs, 28)] = FMA(KP831469612, T9k, T9j); + cr[WS(rs, 19)] = FMS(KP831469612, T9k, T9j); + ci[WS(rs, 20)] = FMA(KP831469612, T9m, T9l); + cr[WS(rs, 27)] = FMS(KP831469612, T9m, T9l); + cr[WS(rs, 3)] = FMA(KP831469612, T67, T4Y); + ci[WS(rs, 12)] = FNMS(KP831469612, T67, T4Y); + T9e = T6q - T6p; + T6r = T6p + T6q; + } + } + ci[WS(rs, 16)] = FMA(KP980785280, T9e, T9d); + cr[WS(rs, 31)] = FMS(KP980785280, T9e, T9d); + ci[WS(rs, 24)] = FMA(KP980785280, T9g, T9f); + cr[WS(rs, 23)] = FMS(KP980785280, T9g, T9f); + } + } + } + } + { + E T88, T90, T8Z, T8b; + { + E T7K, T7W, T7i, T7P, T8a, T86, T91, T8V, T8W, T7t, T7U, T7F, T92, T7Z, T89; + E T83; + { + E T7X, T7n, T7s, T7Y, T84, T85; + T7K = T7G - T7J; + T84 = T7G + T7J; + cr[WS(rs, 7)] = FMA(KP980785280, T6r, T6o); + ci[WS(rs, 8)] = FNMS(KP980785280, T6r, T6o); + T7W = T7e + T7h; + T7i = T7e - T7h; + T85 = T7O - T7N; + T7P = T7N + T7O; + T7X = T7j - T7m; + T7n = T7j + T7m; + T8a = FMA(KP414213562, T84, T85); + T86 = FNMS(KP414213562, T85, T84); + T91 = T8U - T8T; + T8V = T8T + T8U; + T7s = T7o - T7r; + T7Y = T7o + T7r; + { + E T81, T82, T7z, T7E; + T81 = T7v + T7y; + T7z = T7v - T7y; + T7E = T7C - T7D; + T82 = T7C + T7D; + T8W = T7n - T7s; + T7t = T7n + T7s; + T7U = FNMS(KP414213562, T7z, T7E); + T7F = FMA(KP414213562, T7E, T7z); + T92 = T7Y - T7X; + T7Z = T7X + T7Y; + T89 = FMA(KP414213562, T81, T82); + T83 = FNMS(KP414213562, T82, T81); + } + } + { + E T7S, T7u, T93, T95, T7T, T7Q; + T7S = FNMS(KP707106781, T7t, T7i); + T7u = FMA(KP707106781, T7t, T7i); + T93 = FMA(KP707106781, T92, T91); + T95 = FNMS(KP707106781, T92, T91); + T7T = FMA(KP414213562, T7K, T7P); + T7Q = FNMS(KP414213562, T7P, T7K); + { + E T80, T87, T8X, T8Y; + T88 = FNMS(KP707106781, T7Z, T7W); + T80 = FMA(KP707106781, T7Z, T7W); + { + E T7V, T94, T96, T7R; + T7V = T7T - T7U; + T94 = T7U + T7T; + T96 = T7Q - T7F; + T7R = T7F + T7Q; + ci[WS(rs, 5)] = FMA(KP923879532, T7V, T7S); + cr[WS(rs, 10)] = FNMS(KP923879532, T7V, T7S); + ci[WS(rs, 29)] = FMA(KP923879532, T94, T93); + cr[WS(rs, 18)] = FMS(KP923879532, T94, T93); + ci[WS(rs, 21)] = FMA(KP923879532, T96, T95); + cr[WS(rs, 26)] = FMS(KP923879532, T96, T95); + cr[WS(rs, 2)] = FMA(KP923879532, T7R, T7u); + ci[WS(rs, 13)] = FNMS(KP923879532, T7R, T7u); + T87 = T83 + T86; + T90 = T86 - T83; + } + T8Z = FNMS(KP707106781, T8W, T8V); + T8X = FMA(KP707106781, T8W, T8V); + T8Y = T8a - T89; + T8b = T89 + T8a; + ci[WS(rs, 1)] = FMA(KP923879532, T87, T80); + cr[WS(rs, 14)] = FNMS(KP923879532, T87, T80); + ci[WS(rs, 17)] = FMA(KP923879532, T8Y, T8X); + cr[WS(rs, 30)] = FMS(KP923879532, T8Y, T8X); + } + } + } + { + E T6Y, T6w, T9w, T6D, T9v, T9p, T9q, T71, T6O, T76; + { + E T70, T6Z, T6z, T6C; + ci[WS(rs, 25)] = FMA(KP923879532, T90, T8Z); + cr[WS(rs, 22)] = FMS(KP923879532, T90, T8Z); + cr[WS(rs, 6)] = FMA(KP923879532, T8b, T88); + ci[WS(rs, 9)] = FNMS(KP923879532, T8b, T88); + T70 = FNMS(KP414213562, T6x, T6y); + T6z = FMA(KP414213562, T6y, T6x); + T6C = FNMS(KP414213562, T6B, T6A); + T6Z = FMA(KP414213562, T6A, T6B); + T6Y = FNMS(KP707106781, T6v, T6s); + T6w = FMA(KP707106781, T6v, T6s); + T9w = T6z - T6C; + T6D = T6z + T6C; + T9v = FNMS(KP707106781, T9o, T9n); + T9p = FMA(KP707106781, T9o, T9n); + T9q = T70 + T6Z; + T71 = T6Z - T70; + T6O = FMA(KP707106781, T6N, T6M); + T76 = FNMS(KP707106781, T6N, T6M); + } + { + E T6U, T9u, T79, T6X, T9s, T9t, T9r, T72; + { + E T6E, T78, T6V, T6S, T75, T6W, T6L, T9x, T9z, T9y, T6T, T9A; + { + E T7c, T7b, T77, T6R; + T6U = FNMS(KP923879532, T6D, T6w); + T6E = FMA(KP923879532, T6D, T6w); + T77 = FNMS(KP707106781, T6Q, T6P); + T6R = FMA(KP707106781, T6Q, T6P); + { + E T73, T6H, T74, T6K; + T73 = FNMS(KP707106781, T6G, T6F); + T6H = FMA(KP707106781, T6G, T6F); + T74 = FNMS(KP707106781, T6J, T6I); + T6K = FMA(KP707106781, T6J, T6I); + T78 = FMA(KP668178637, T77, T76); + T7c = FNMS(KP668178637, T76, T77); + T6V = FMA(KP198912367, T6O, T6R); + T6S = FNMS(KP198912367, T6R, T6O); + T75 = FNMS(KP668178637, T74, T73); + T7b = FMA(KP668178637, T73, T74); + T6W = FNMS(KP198912367, T6H, T6K); + T6L = FMA(KP198912367, T6K, T6H); + } + T9x = FMA(KP923879532, T9w, T9v); + T9z = FNMS(KP923879532, T9w, T9v); + T7d = T7b - T7c; + T9y = T7b + T7c; + } + T9u = T6S - T6L; + T6T = T6L + T6S; + T9A = T78 - T75; + T79 = T75 + T78; + ci[WS(rs, 18)] = FNMS(KP831469612, T9y, T9x); + cr[WS(rs, 29)] = -(FMA(KP831469612, T9y, T9x)); + cr[WS(rs, 1)] = FMA(KP980785280, T6T, T6E); + ci[WS(rs, 14)] = FNMS(KP980785280, T6T, T6E); + cr[WS(rs, 21)] = FMS(KP831469612, T9A, T9z); + ci[WS(rs, 26)] = FMA(KP831469612, T9A, T9z); + T6X = T6V - T6W; + T9s = T6W + T6V; + } + T7a = FNMS(KP923879532, T71, T6Y); + T72 = FMA(KP923879532, T71, T6Y); + T9t = FNMS(KP923879532, T9q, T9p); + T9r = FMA(KP923879532, T9q, T9p); + ci[WS(rs, 6)] = FMA(KP980785280, T6X, T6U); + cr[WS(rs, 9)] = FNMS(KP980785280, T6X, T6U); + ci[WS(rs, 2)] = FMA(KP831469612, T79, T72); + cr[WS(rs, 13)] = FNMS(KP831469612, T79, T72); + ci[WS(rs, 30)] = FMA(KP980785280, T9s, T9r); + cr[WS(rs, 17)] = FMS(KP980785280, T9s, T9r); + ci[WS(rs, 22)] = FMA(KP980785280, T9u, T9t); + cr[WS(rs, 25)] = FMS(KP980785280, T9u, T9t); + } + } + } + } + } + } + cr[WS(rs, 5)] = FMA(KP831469612, T7d, T7a); + ci[WS(rs, 10)] = FNMS(KP831469612, T7d, T7a); + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 27}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 32, "hf2_32", twinstr, &GENUS, {236, 98, 252, 0} }; + +void X(codelet_hf2_32) (planner *p) { + X(khc2hc_register) (p, hf2_32, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hf2_32 -include hf.h */ + +/* + * This function contains 488 FP additions, 280 FP multiplications, + * (or, 376 additions, 168 multiplications, 112 fused multiply/add), + * 158 stack variables, 7 constants, and 128 memory accesses + */ +#include "hf.h" + +static void hf2_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) { + E T2, T5, T3, T6, T8, TM, TO, Td, T9, Te, Th, Tl, TD, TH, T1y; + E T1H, T15, T1A, T11, T1F, T1n, T1p, T2q, T2I, T2u, T2K, T2V, T3b, T2Z, T3d; + E Tu, Ty, T3l, T3n, T1t, T1v, T2f, T2h, T1a, T1e, T32, T34, T1W, T1Y, T2C; + E T2E, Tg, TR, Tk, TS, Tm, TV, To, TT, T1M, T21, T1P, T22, T1Q, T25; + E T1S, T23; + { + E Ts, T1d, Tx, T18, Tt, T1c, Tw, T19, TB, T14, TG, TZ, TC, T13, TF; + E T10; + { + E T4, Tc, T7, Tb; + T2 = W[0]; + T5 = W[1]; + T3 = W[2]; + T6 = W[3]; + T4 = T2 * T3; + Tc = T5 * T3; + T7 = T5 * T6; + Tb = T2 * T6; + T8 = T4 + T7; + TM = T4 - T7; + TO = Tb + Tc; + Td = Tb - Tc; + T9 = W[4]; + Ts = T2 * T9; + T1d = T6 * T9; + Tx = T5 * T9; + T18 = T3 * T9; + Te = W[5]; + Tt = T5 * Te; + T1c = T3 * Te; + Tw = T2 * Te; + T19 = T6 * Te; + Th = W[6]; + TB = T3 * Th; + T14 = T5 * Th; + TG = T6 * Th; + TZ = T2 * Th; + Tl = W[7]; + TC = T6 * Tl; + T13 = T2 * Tl; + TF = T3 * Tl; + T10 = T5 * Tl; + } + TD = TB + TC; + TH = TF - TG; + T1y = TZ + T10; + T1H = TF + TG; + T15 = T13 + T14; + T1A = T13 - T14; + T11 = TZ - T10; + T1F = TB - TC; + T1n = FMA(T9, Th, Te * Tl); + T1p = FNMS(Te, Th, T9 * Tl); + { + E T2o, T2p, T2s, T2t; + T2o = T8 * Th; + T2p = Td * Tl; + T2q = T2o + T2p; + T2I = T2o - T2p; + T2s = T8 * Tl; + T2t = Td * Th; + T2u = T2s - T2t; + T2K = T2s + T2t; + } + { + E T2T, T2U, T2X, T2Y; + T2T = TM * Th; + T2U = TO * Tl; + T2V = T2T - T2U; + T3b = T2T + T2U; + T2X = TM * Tl; + T2Y = TO * Th; + T2Z = T2X + T2Y; + T3d = T2X - T2Y; + Tu = Ts + Tt; + Ty = Tw - Tx; + T3l = FMA(Tu, Th, Ty * Tl); + T3n = FNMS(Ty, Th, Tu * Tl); + } + T1t = Ts - Tt; + T1v = Tw + Tx; + T2f = FMA(T1t, Th, T1v * Tl); + T2h = FNMS(T1v, Th, T1t * Tl); + T1a = T18 - T19; + T1e = T1c + T1d; + T32 = FMA(T1a, Th, T1e * Tl); + T34 = FNMS(T1e, Th, T1a * Tl); + T1W = T18 + T19; + T1Y = T1c - T1d; + T2C = FMA(T1W, Th, T1Y * Tl); + T2E = FNMS(T1Y, Th, T1W * Tl); + { + E Ta, Tf, Ti, Tj; + Ta = T8 * T9; + Tf = Td * Te; + Tg = Ta - Tf; + TR = Ta + Tf; + Ti = T8 * Te; + Tj = Td * T9; + Tk = Ti + Tj; + TS = Ti - Tj; + } + Tm = FMA(Tg, Th, Tk * Tl); + TV = FNMS(TS, Th, TR * Tl); + To = FNMS(Tk, Th, Tg * Tl); + TT = FMA(TR, Th, TS * Tl); + { + E T1K, T1L, T1N, T1O; + T1K = TM * T9; + T1L = TO * Te; + T1M = T1K - T1L; + T21 = T1K + T1L; + T1N = TM * Te; + T1O = TO * T9; + T1P = T1N + T1O; + T22 = T1N - T1O; + } + T1Q = FMA(T1M, Th, T1P * Tl); + T25 = FNMS(T22, Th, T21 * Tl); + T1S = FNMS(T1P, Th, T1M * Tl); + T23 = FMA(T21, Th, T22 * Tl); + } + { + E TL, T6f, T8c, T8q, T3F, T5t, T7I, T7W, T2y, T6B, T6y, T7j, T4k, T5G, T4B; + E T5J, T3h, T6H, T6O, T7o, T4L, T5Q, T52, T5N, T1i, T7V, T6i, T7D, T3K, T5u; + E T3P, T5v, T1E, T6k, T6n, T7f, T3W, T5z, T41, T5y, T29, T6p, T6s, T7e, T47; + E T5C, T4c, T5B, T2R, T6z, T6E, T7k, T4v, T5K, T4E, T5H, T3y, T6P, T6K, T7p; + E T4W, T5O, T55, T5R; + { + E T1, T7G, Tq, T7F, TA, T3C, TJ, T3D, Tn, Tp; + T1 = cr[0]; + T7G = ci[0]; + Tn = cr[WS(rs, 16)]; + Tp = ci[WS(rs, 16)]; + Tq = FMA(Tm, Tn, To * Tp); + T7F = FNMS(To, Tn, Tm * Tp); + { + E Tv, Tz, TE, TI; + Tv = cr[WS(rs, 8)]; + Tz = ci[WS(rs, 8)]; + TA = FMA(Tu, Tv, Ty * Tz); + T3C = FNMS(Ty, Tv, Tu * Tz); + TE = cr[WS(rs, 24)]; + TI = ci[WS(rs, 24)]; + TJ = FMA(TD, TE, TH * TI); + T3D = FNMS(TH, TE, TD * TI); + } + { + E Tr, TK, T8a, T8b; + Tr = T1 + Tq; + TK = TA + TJ; + TL = Tr + TK; + T6f = Tr - TK; + T8a = TA - TJ; + T8b = T7G - T7F; + T8c = T8a + T8b; + T8q = T8b - T8a; + } + { + E T3B, T3E, T7E, T7H; + T3B = T1 - Tq; + T3E = T3C - T3D; + T3F = T3B + T3E; + T5t = T3B - T3E; + T7E = T3C + T3D; + T7H = T7F + T7G; + T7I = T7E + T7H; + T7W = T7H - T7E; + } + } + { + E T2e, T4x, T2w, T4i, T2j, T4y, T2n, T4h; + { + E T2c, T2d, T2r, T2v; + T2c = cr[WS(rs, 1)]; + T2d = ci[WS(rs, 1)]; + T2e = FMA(T2, T2c, T5 * T2d); + T4x = FNMS(T5, T2c, T2 * T2d); + T2r = cr[WS(rs, 25)]; + T2v = ci[WS(rs, 25)]; + T2w = FMA(T2q, T2r, T2u * T2v); + T4i = FNMS(T2u, T2r, T2q * T2v); + } + { + E T2g, T2i, T2l, T2m; + T2g = cr[WS(rs, 17)]; + T2i = ci[WS(rs, 17)]; + T2j = FMA(T2f, T2g, T2h * T2i); + T4y = FNMS(T2h, T2g, T2f * T2i); + T2l = cr[WS(rs, 9)]; + T2m = ci[WS(rs, 9)]; + T2n = FMA(T9, T2l, Te * T2m); + T4h = FNMS(Te, T2l, T9 * T2m); + } + { + E T2k, T2x, T6w, T6x; + T2k = T2e + T2j; + T2x = T2n + T2w; + T2y = T2k + T2x; + T6B = T2k - T2x; + T6w = T4x + T4y; + T6x = T4h + T4i; + T6y = T6w - T6x; + T7j = T6w + T6x; + } + { + E T4g, T4j, T4z, T4A; + T4g = T2e - T2j; + T4j = T4h - T4i; + T4k = T4g + T4j; + T5G = T4g - T4j; + T4z = T4x - T4y; + T4A = T2n - T2w; + T4B = T4z - T4A; + T5J = T4z + T4A; + } + } + { + E T31, T4H, T3f, T50, T36, T4I, T3a, T4Z; + { + E T2W, T30, T3c, T3e; + T2W = cr[WS(rs, 31)]; + T30 = ci[WS(rs, 31)]; + T31 = FMA(T2V, T2W, T2Z * T30); + T4H = FNMS(T2Z, T2W, T2V * T30); + T3c = cr[WS(rs, 23)]; + T3e = ci[WS(rs, 23)]; + T3f = FMA(T3b, T3c, T3d * T3e); + T50 = FNMS(T3d, T3c, T3b * T3e); + } + { + E T33, T35, T38, T39; + T33 = cr[WS(rs, 15)]; + T35 = ci[WS(rs, 15)]; + T36 = FMA(T32, T33, T34 * T35); + T4I = FNMS(T34, T33, T32 * T35); + T38 = cr[WS(rs, 7)]; + T39 = ci[WS(rs, 7)]; + T3a = FMA(TR, T38, TS * T39); + T4Z = FNMS(TS, T38, TR * T39); + } + { + E T37, T3g, T6M, T6N; + T37 = T31 + T36; + T3g = T3a + T3f; + T3h = T37 + T3g; + T6H = T37 - T3g; + T6M = T4H + T4I; + T6N = T4Z + T50; + T6O = T6M - T6N; + T7o = T6M + T6N; + } + { + E T4J, T4K, T4Y, T51; + T4J = T4H - T4I; + T4K = T3a - T3f; + T4L = T4J - T4K; + T5Q = T4J + T4K; + T4Y = T31 - T36; + T51 = T4Z - T50; + T52 = T4Y + T51; + T5N = T4Y - T51; + } + } + { + E TQ, T3H, T1g, T3N, TX, T3I, T17, T3M; + { + E TN, TP, T1b, T1f; + TN = cr[WS(rs, 4)]; + TP = ci[WS(rs, 4)]; + TQ = FMA(TM, TN, TO * TP); + T3H = FNMS(TO, TN, TM * TP); + T1b = cr[WS(rs, 12)]; + T1f = ci[WS(rs, 12)]; + T1g = FMA(T1a, T1b, T1e * T1f); + T3N = FNMS(T1e, T1b, T1a * T1f); + } + { + E TU, TW, T12, T16; + TU = cr[WS(rs, 20)]; + TW = ci[WS(rs, 20)]; + TX = FMA(TT, TU, TV * TW); + T3I = FNMS(TV, TU, TT * TW); + T12 = cr[WS(rs, 28)]; + T16 = ci[WS(rs, 28)]; + T17 = FMA(T11, T12, T15 * T16); + T3M = FNMS(T15, T12, T11 * T16); + } + { + E TY, T1h, T6g, T6h; + TY = TQ + TX; + T1h = T17 + T1g; + T1i = TY + T1h; + T7V = TY - T1h; + T6g = T3M + T3N; + T6h = T3H + T3I; + T6i = T6g - T6h; + T7D = T6h + T6g; + } + { + E T3G, T3J, T3L, T3O; + T3G = TQ - TX; + T3J = T3H - T3I; + T3K = T3G + T3J; + T5u = T3G - T3J; + T3L = T17 - T1g; + T3O = T3M - T3N; + T3P = T3L - T3O; + T5v = T3L + T3O; + } + } + { + E T1m, T3X, T1C, T3U, T1r, T3Y, T1x, T3T; + { + E T1k, T1l, T1z, T1B; + T1k = cr[WS(rs, 2)]; + T1l = ci[WS(rs, 2)]; + T1m = FMA(T8, T1k, Td * T1l); + T3X = FNMS(Td, T1k, T8 * T1l); + T1z = cr[WS(rs, 26)]; + T1B = ci[WS(rs, 26)]; + T1C = FMA(T1y, T1z, T1A * T1B); + T3U = FNMS(T1A, T1z, T1y * T1B); + } + { + E T1o, T1q, T1u, T1w; + T1o = cr[WS(rs, 18)]; + T1q = ci[WS(rs, 18)]; + T1r = FMA(T1n, T1o, T1p * T1q); + T3Y = FNMS(T1p, T1o, T1n * T1q); + T1u = cr[WS(rs, 10)]; + T1w = ci[WS(rs, 10)]; + T1x = FMA(T1t, T1u, T1v * T1w); + T3T = FNMS(T1v, T1u, T1t * T1w); + } + { + E T1s, T1D, T6l, T6m; + T1s = T1m + T1r; + T1D = T1x + T1C; + T1E = T1s + T1D; + T6k = T1s - T1D; + T6l = T3X + T3Y; + T6m = T3T + T3U; + T6n = T6l - T6m; + T7f = T6l + T6m; + } + { + E T3S, T3V, T3Z, T40; + T3S = T1m - T1r; + T3V = T3T - T3U; + T3W = T3S + T3V; + T5z = T3S - T3V; + T3Z = T3X - T3Y; + T40 = T1x - T1C; + T41 = T3Z - T40; + T5y = T3Z + T40; + } + } + { + E T1J, T43, T27, T4a, T1U, T44, T20, T49; + { + E T1G, T1I, T24, T26; + T1G = cr[WS(rs, 30)]; + T1I = ci[WS(rs, 30)]; + T1J = FMA(T1F, T1G, T1H * T1I); + T43 = FNMS(T1H, T1G, T1F * T1I); + T24 = cr[WS(rs, 22)]; + T26 = ci[WS(rs, 22)]; + T27 = FMA(T23, T24, T25 * T26); + T4a = FNMS(T25, T24, T23 * T26); + } + { + E T1R, T1T, T1X, T1Z; + T1R = cr[WS(rs, 14)]; + T1T = ci[WS(rs, 14)]; + T1U = FMA(T1Q, T1R, T1S * T1T); + T44 = FNMS(T1S, T1R, T1Q * T1T); + T1X = cr[WS(rs, 6)]; + T1Z = ci[WS(rs, 6)]; + T20 = FMA(T1W, T1X, T1Y * T1Z); + T49 = FNMS(T1Y, T1X, T1W * T1Z); + } + { + E T1V, T28, T6q, T6r; + T1V = T1J + T1U; + T28 = T20 + T27; + T29 = T1V + T28; + T6p = T1V - T28; + T6q = T43 + T44; + T6r = T49 + T4a; + T6s = T6q - T6r; + T7e = T6q + T6r; + } + { + E T45, T46, T48, T4b; + T45 = T43 - T44; + T46 = T20 - T27; + T47 = T45 - T46; + T5C = T45 + T46; + T48 = T1J - T1U; + T4b = T49 - T4a; + T4c = T48 + T4b; + T5B = T48 - T4b; + } + } + { + E T2B, T4m, T2G, T4n, T4l, T4o, T2M, T4q, T2P, T4r, T4s, T4t; + { + E T2z, T2A, T2D, T2F; + T2z = cr[WS(rs, 5)]; + T2A = ci[WS(rs, 5)]; + T2B = FMA(T21, T2z, T22 * T2A); + T4m = FNMS(T22, T2z, T21 * T2A); + T2D = cr[WS(rs, 21)]; + T2F = ci[WS(rs, 21)]; + T2G = FMA(T2C, T2D, T2E * T2F); + T4n = FNMS(T2E, T2D, T2C * T2F); + } + T4l = T2B - T2G; + T4o = T4m - T4n; + { + E T2J, T2L, T2N, T2O; + T2J = cr[WS(rs, 29)]; + T2L = ci[WS(rs, 29)]; + T2M = FMA(T2I, T2J, T2K * T2L); + T4q = FNMS(T2K, T2J, T2I * T2L); + T2N = cr[WS(rs, 13)]; + T2O = ci[WS(rs, 13)]; + T2P = FMA(T1M, T2N, T1P * T2O); + T4r = FNMS(T1P, T2N, T1M * T2O); + } + T4s = T4q - T4r; + T4t = T2M - T2P; + { + E T2H, T2Q, T6C, T6D; + T2H = T2B + T2G; + T2Q = T2M + T2P; + T2R = T2H + T2Q; + T6z = T2H - T2Q; + T6C = T4q + T4r; + T6D = T4m + T4n; + T6E = T6C - T6D; + T7k = T6D + T6C; + } + { + E T4p, T4u, T4C, T4D; + T4p = T4l + T4o; + T4u = T4s - T4t; + T4v = KP707106781 * (T4p - T4u); + T5K = KP707106781 * (T4p + T4u); + T4C = T4t + T4s; + T4D = T4l - T4o; + T4E = KP707106781 * (T4C - T4D); + T5H = KP707106781 * (T4D + T4C); + } + } + { + E T3k, T4S, T3p, T4T, T4R, T4U, T3t, T4N, T3w, T4O, T4M, T4P; + { + E T3i, T3j, T3m, T3o; + T3i = cr[WS(rs, 3)]; + T3j = ci[WS(rs, 3)]; + T3k = FMA(T3, T3i, T6 * T3j); + T4S = FNMS(T6, T3i, T3 * T3j); + T3m = cr[WS(rs, 19)]; + T3o = ci[WS(rs, 19)]; + T3p = FMA(T3l, T3m, T3n * T3o); + T4T = FNMS(T3n, T3m, T3l * T3o); + } + T4R = T3k - T3p; + T4U = T4S - T4T; + { + E T3r, T3s, T3u, T3v; + T3r = cr[WS(rs, 27)]; + T3s = ci[WS(rs, 27)]; + T3t = FMA(Th, T3r, Tl * T3s); + T4N = FNMS(Tl, T3r, Th * T3s); + T3u = cr[WS(rs, 11)]; + T3v = ci[WS(rs, 11)]; + T3w = FMA(Tg, T3u, Tk * T3v); + T4O = FNMS(Tk, T3u, Tg * T3v); + } + T4M = T3t - T3w; + T4P = T4N - T4O; + { + E T3q, T3x, T6I, T6J; + T3q = T3k + T3p; + T3x = T3t + T3w; + T3y = T3q + T3x; + T6P = T3q - T3x; + T6I = T4N + T4O; + T6J = T4S + T4T; + T6K = T6I - T6J; + T7p = T6J + T6I; + } + { + E T4Q, T4V, T53, T54; + T4Q = T4M + T4P; + T4V = T4R - T4U; + T4W = KP707106781 * (T4Q - T4V); + T5O = KP707106781 * (T4V + T4Q); + T53 = T4R + T4U; + T54 = T4P - T4M; + T55 = KP707106781 * (T53 - T54); + T5R = KP707106781 * (T53 + T54); + } + } + { + E T2b, T7x, T7K, T7M, T3A, T7L, T7A, T7B; + { + E T1j, T2a, T7C, T7J; + T1j = TL + T1i; + T2a = T1E + T29; + T2b = T1j + T2a; + T7x = T1j - T2a; + T7C = T7f + T7e; + T7J = T7D + T7I; + T7K = T7C + T7J; + T7M = T7J - T7C; + } + { + E T2S, T3z, T7y, T7z; + T2S = T2y + T2R; + T3z = T3h + T3y; + T3A = T2S + T3z; + T7L = T3z - T2S; + T7y = T7o + T7p; + T7z = T7j + T7k; + T7A = T7y - T7z; + T7B = T7z + T7y; + } + ci[WS(rs, 15)] = T2b - T3A; + cr[WS(rs, 24)] = T7L - T7M; + ci[WS(rs, 23)] = T7L + T7M; + cr[0] = T2b + T3A; + cr[WS(rs, 8)] = T7x - T7A; + cr[WS(rs, 16)] = T7B - T7K; + ci[WS(rs, 31)] = T7B + T7K; + ci[WS(rs, 7)] = T7x + T7A; + } + { + E T5x, T5Z, T8d, T8j, T5E, T88, T69, T6d, T5M, T5W, T62, T8i, T66, T6c, T5T; + E T5X, T5w, T89; + T5w = KP707106781 * (T5u + T5v); + T5x = T5t - T5w; + T5Z = T5t + T5w; + T89 = KP707106781 * (T3K - T3P); + T8d = T89 + T8c; + T8j = T8c - T89; + { + E T5A, T5D, T67, T68; + T5A = FMA(KP923879532, T5y, KP382683432 * T5z); + T5D = FNMS(KP923879532, T5C, KP382683432 * T5B); + T5E = T5A + T5D; + T88 = T5A - T5D; + T67 = T5N + T5O; + T68 = T5Q + T5R; + T69 = FNMS(KP980785280, T68, KP195090322 * T67); + T6d = FMA(KP980785280, T67, KP195090322 * T68); + } + { + E T5I, T5L, T60, T61; + T5I = T5G - T5H; + T5L = T5J - T5K; + T5M = FMA(KP831469612, T5I, KP555570233 * T5L); + T5W = FNMS(KP831469612, T5L, KP555570233 * T5I); + T60 = FNMS(KP382683432, T5y, KP923879532 * T5z); + T61 = FMA(KP382683432, T5C, KP923879532 * T5B); + T62 = T60 + T61; + T8i = T61 - T60; + } + { + E T64, T65, T5P, T5S; + T64 = T5G + T5H; + T65 = T5J + T5K; + T66 = FMA(KP195090322, T64, KP980785280 * T65); + T6c = FNMS(KP195090322, T65, KP980785280 * T64); + T5P = T5N - T5O; + T5S = T5Q - T5R; + T5T = FNMS(KP555570233, T5S, KP831469612 * T5P); + T5X = FMA(KP555570233, T5P, KP831469612 * T5S); + } + { + E T5F, T5U, T8h, T8k; + T5F = T5x + T5E; + T5U = T5M + T5T; + ci[WS(rs, 12)] = T5F - T5U; + cr[WS(rs, 3)] = T5F + T5U; + T8h = T5X - T5W; + T8k = T8i + T8j; + cr[WS(rs, 19)] = T8h - T8k; + ci[WS(rs, 28)] = T8h + T8k; + } + { + E T8l, T8m, T5V, T5Y; + T8l = T5T - T5M; + T8m = T8j - T8i; + cr[WS(rs, 27)] = T8l - T8m; + ci[WS(rs, 20)] = T8l + T8m; + T5V = T5x - T5E; + T5Y = T5W + T5X; + cr[WS(rs, 11)] = T5V - T5Y; + ci[WS(rs, 4)] = T5V + T5Y; + } + { + E T63, T6a, T87, T8e; + T63 = T5Z - T62; + T6a = T66 + T69; + ci[WS(rs, 8)] = T63 - T6a; + cr[WS(rs, 7)] = T63 + T6a; + T87 = T69 - T66; + T8e = T88 + T8d; + cr[WS(rs, 31)] = T87 - T8e; + ci[WS(rs, 16)] = T87 + T8e; + } + { + E T8f, T8g, T6b, T6e; + T8f = T6d - T6c; + T8g = T8d - T88; + cr[WS(rs, 23)] = T8f - T8g; + ci[WS(rs, 24)] = T8f + T8g; + T6b = T5Z + T62; + T6e = T6c + T6d; + cr[WS(rs, 15)] = T6b - T6e; + ci[0] = T6b + T6e; + } + } + { + E T7h, T7t, T7Q, T7S, T7m, T7u, T7r, T7v; + { + E T7d, T7g, T7O, T7P; + T7d = TL - T1i; + T7g = T7e - T7f; + T7h = T7d - T7g; + T7t = T7d + T7g; + T7O = T1E - T29; + T7P = T7I - T7D; + T7Q = T7O + T7P; + T7S = T7P - T7O; + } + { + E T7i, T7l, T7n, T7q; + T7i = T2y - T2R; + T7l = T7j - T7k; + T7m = T7i + T7l; + T7u = T7i - T7l; + T7n = T3h - T3y; + T7q = T7o - T7p; + T7r = T7n - T7q; + T7v = T7n + T7q; + } + { + E T7s, T7R, T7w, T7N; + T7s = KP707106781 * (T7m + T7r); + ci[WS(rs, 11)] = T7h - T7s; + cr[WS(rs, 4)] = T7h + T7s; + T7R = KP707106781 * (T7v - T7u); + cr[WS(rs, 20)] = T7R - T7S; + ci[WS(rs, 27)] = T7R + T7S; + T7w = KP707106781 * (T7u + T7v); + cr[WS(rs, 12)] = T7t - T7w; + ci[WS(rs, 3)] = T7t + T7w; + T7N = KP707106781 * (T7r - T7m); + cr[WS(rs, 28)] = T7N - T7Q; + ci[WS(rs, 19)] = T7N + T7Q; + } + } + { + E T6j, T7X, T83, T6X, T6u, T7U, T77, T7b, T70, T82, T6G, T6U, T74, T7a, T6R; + E T6V; + { + E T6o, T6t, T6A, T6F; + T6j = T6f - T6i; + T7X = T7V + T7W; + T83 = T7W - T7V; + T6X = T6f + T6i; + T6o = T6k + T6n; + T6t = T6p - T6s; + T6u = KP707106781 * (T6o + T6t); + T7U = KP707106781 * (T6o - T6t); + { + E T75, T76, T6Y, T6Z; + T75 = T6O + T6P; + T76 = T6H + T6K; + T77 = FMA(KP382683432, T75, KP923879532 * T76); + T7b = FNMS(KP923879532, T75, KP382683432 * T76); + T6Y = T6k - T6n; + T6Z = T6p + T6s; + T70 = KP707106781 * (T6Y + T6Z); + T82 = KP707106781 * (T6Z - T6Y); + } + T6A = T6y - T6z; + T6F = T6B - T6E; + T6G = FMA(KP382683432, T6A, KP923879532 * T6F); + T6U = FNMS(KP923879532, T6A, KP382683432 * T6F); + { + E T72, T73, T6L, T6Q; + T72 = T6B + T6E; + T73 = T6y + T6z; + T74 = FNMS(KP382683432, T73, KP923879532 * T72); + T7a = FMA(KP923879532, T73, KP382683432 * T72); + T6L = T6H - T6K; + T6Q = T6O - T6P; + T6R = FNMS(KP382683432, T6Q, KP923879532 * T6L); + T6V = FMA(KP923879532, T6Q, KP382683432 * T6L); + } + } + { + E T6v, T6S, T81, T84; + T6v = T6j + T6u; + T6S = T6G + T6R; + ci[WS(rs, 13)] = T6v - T6S; + cr[WS(rs, 2)] = T6v + T6S; + T81 = T6V - T6U; + T84 = T82 + T83; + cr[WS(rs, 18)] = T81 - T84; + ci[WS(rs, 29)] = T81 + T84; + } + { + E T85, T86, T6T, T6W; + T85 = T6R - T6G; + T86 = T83 - T82; + cr[WS(rs, 26)] = T85 - T86; + ci[WS(rs, 21)] = T85 + T86; + T6T = T6j - T6u; + T6W = T6U + T6V; + cr[WS(rs, 10)] = T6T - T6W; + ci[WS(rs, 5)] = T6T + T6W; + } + { + E T71, T78, T7T, T7Y; + T71 = T6X + T70; + T78 = T74 + T77; + cr[WS(rs, 14)] = T71 - T78; + ci[WS(rs, 1)] = T71 + T78; + T7T = T7b - T7a; + T7Y = T7U + T7X; + cr[WS(rs, 30)] = T7T - T7Y; + ci[WS(rs, 17)] = T7T + T7Y; + } + { + E T7Z, T80, T79, T7c; + T7Z = T77 - T74; + T80 = T7X - T7U; + cr[WS(rs, 22)] = T7Z - T80; + ci[WS(rs, 25)] = T7Z + T80; + T79 = T6X - T70; + T7c = T7a + T7b; + ci[WS(rs, 9)] = T79 - T7c; + cr[WS(rs, 6)] = T79 + T7c; + } + } + { + E T3R, T5d, T8r, T8x, T4e, T8o, T5n, T5r, T4G, T5a, T5g, T8w, T5k, T5q, T57; + E T5b, T3Q, T8p; + T3Q = KP707106781 * (T3K + T3P); + T3R = T3F - T3Q; + T5d = T3F + T3Q; + T8p = KP707106781 * (T5v - T5u); + T8r = T8p + T8q; + T8x = T8q - T8p; + { + E T42, T4d, T5l, T5m; + T42 = FNMS(KP923879532, T41, KP382683432 * T3W); + T4d = FMA(KP923879532, T47, KP382683432 * T4c); + T4e = T42 + T4d; + T8o = T4d - T42; + T5l = T52 + T55; + T5m = T4L + T4W; + T5n = FNMS(KP195090322, T5m, KP980785280 * T5l); + T5r = FMA(KP980785280, T5m, KP195090322 * T5l); + } + { + E T4w, T4F, T5e, T5f; + T4w = T4k - T4v; + T4F = T4B - T4E; + T4G = FNMS(KP555570233, T4F, KP831469612 * T4w); + T5a = FMA(KP831469612, T4F, KP555570233 * T4w); + T5e = FMA(KP382683432, T41, KP923879532 * T3W); + T5f = FNMS(KP382683432, T47, KP923879532 * T4c); + T5g = T5e + T5f; + T8w = T5e - T5f; + } + { + E T5i, T5j, T4X, T56; + T5i = T4B + T4E; + T5j = T4k + T4v; + T5k = FMA(KP195090322, T5i, KP980785280 * T5j); + T5q = FNMS(KP980785280, T5i, KP195090322 * T5j); + T4X = T4L - T4W; + T56 = T52 - T55; + T57 = FMA(KP555570233, T4X, KP831469612 * T56); + T5b = FNMS(KP831469612, T4X, KP555570233 * T56); + } + { + E T4f, T58, T8v, T8y; + T4f = T3R + T4e; + T58 = T4G + T57; + cr[WS(rs, 13)] = T4f - T58; + ci[WS(rs, 2)] = T4f + T58; + T8v = T5b - T5a; + T8y = T8w + T8x; + cr[WS(rs, 29)] = T8v - T8y; + ci[WS(rs, 18)] = T8v + T8y; + } + { + E T8z, T8A, T59, T5c; + T8z = T57 - T4G; + T8A = T8x - T8w; + cr[WS(rs, 21)] = T8z - T8A; + ci[WS(rs, 26)] = T8z + T8A; + T59 = T3R - T4e; + T5c = T5a + T5b; + ci[WS(rs, 10)] = T59 - T5c; + cr[WS(rs, 5)] = T59 + T5c; + } + { + E T5h, T5o, T8n, T8s; + T5h = T5d + T5g; + T5o = T5k + T5n; + ci[WS(rs, 14)] = T5h - T5o; + cr[WS(rs, 1)] = T5h + T5o; + T8n = T5r - T5q; + T8s = T8o + T8r; + cr[WS(rs, 17)] = T8n - T8s; + ci[WS(rs, 30)] = T8n + T8s; + } + { + E T8t, T8u, T5p, T5s; + T8t = T5n - T5k; + T8u = T8r - T8o; + cr[WS(rs, 25)] = T8t - T8u; + ci[WS(rs, 22)] = T8t + T8u; + T5p = T5d - T5g; + T5s = T5q + T5r; + cr[WS(rs, 9)] = T5p - T5s; + ci[WS(rs, 6)] = T5p + T5s; + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 9}, + {TW_CEXP, 1, 27}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 32, "hf2_32", twinstr, &GENUS, {376, 168, 112, 0} }; + +void X(codelet_hf2_32) (planner *p) { + X(khc2hc_register) (p, hf2_32, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf2_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf2_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:11 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -dit -name hf2_4 -include hf.h */ + +/* + * This function contains 24 FP additions, 16 FP multiplications, + * (or, 16 additions, 8 multiplications, 8 fused multiply/add), + * 33 stack variables, 0 constants, and 16 memory accesses + */ +#include "hf.h" + +static void hf2_4(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(8, rs)) { + E Ti, Tq, To, Te, TA, Ty, Tm, Ts; + { + E T2, T6, T3, T5; + T2 = W[0]; + T6 = W[3]; + T3 = W[2]; + T5 = W[1]; + { + E T1, Tx, Td, Tw, Tj, Tl, Ta, T4, Tk, Tr; + T1 = cr[0]; + Ta = T2 * T6; + T4 = T2 * T3; + Tx = ci[0]; + { + E T8, Tb, T7, Tc; + T8 = cr[WS(rs, 2)]; + Tb = FNMS(T5, T3, Ta); + T7 = FMA(T5, T6, T4); + Tc = ci[WS(rs, 2)]; + { + E Tf, Th, T9, Tv, Tg, Tp; + Tf = cr[WS(rs, 1)]; + Th = ci[WS(rs, 1)]; + T9 = T7 * T8; + Tv = T7 * Tc; + Tg = T2 * Tf; + Tp = T2 * Th; + Td = FMA(Tb, Tc, T9); + Tw = FNMS(Tb, T8, Tv); + Ti = FMA(T5, Th, Tg); + Tq = FNMS(T5, Tf, Tp); + } + Tj = cr[WS(rs, 3)]; + Tl = ci[WS(rs, 3)]; + } + To = T1 - Td; + Te = T1 + Td; + Tk = T3 * Tj; + Tr = T3 * Tl; + TA = Tx - Tw; + Ty = Tw + Tx; + Tm = FMA(T6, Tl, Tk); + Ts = FNMS(T6, Tj, Tr); + } + } + { + E Tn, Tz, Tt, Tu; + Tn = Ti + Tm; + Tz = Tm - Ti; + Tt = Tq - Ts; + Tu = Tq + Ts; + ci[WS(rs, 2)] = Tz + TA; + cr[WS(rs, 3)] = Tz - TA; + cr[0] = Te + Tn; + ci[WS(rs, 1)] = Te - Tn; + ci[WS(rs, 3)] = Tu + Ty; + cr[WS(rs, 2)] = Tu - Ty; + cr[WS(rs, 1)] = To + Tt; + ci[0] = To - Tt; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 4, "hf2_4", twinstr, &GENUS, {16, 8, 8, 0} }; + +void X(codelet_hf2_4) (planner *p) { + X(khc2hc_register) (p, hf2_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -dit -name hf2_4 -include hf.h */ + +/* + * This function contains 24 FP additions, 16 FP multiplications, + * (or, 16 additions, 8 multiplications, 8 fused multiply/add), + * 21 stack variables, 0 constants, and 16 memory accesses + */ +#include "hf.h" + +static void hf2_4(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(8, rs)) { + E T2, T4, T3, T5, T6, T8; + T2 = W[0]; + T4 = W[1]; + T3 = W[2]; + T5 = W[3]; + T6 = FMA(T2, T3, T4 * T5); + T8 = FNMS(T4, T3, T2 * T5); + { + E T1, Tp, Ta, To, Te, Tk, Th, Tl, T7, T9; + T1 = cr[0]; + Tp = ci[0]; + T7 = cr[WS(rs, 2)]; + T9 = ci[WS(rs, 2)]; + Ta = FMA(T6, T7, T8 * T9); + To = FNMS(T8, T7, T6 * T9); + { + E Tc, Td, Tf, Tg; + Tc = cr[WS(rs, 1)]; + Td = ci[WS(rs, 1)]; + Te = FMA(T2, Tc, T4 * Td); + Tk = FNMS(T4, Tc, T2 * Td); + Tf = cr[WS(rs, 3)]; + Tg = ci[WS(rs, 3)]; + Th = FMA(T3, Tf, T5 * Tg); + Tl = FNMS(T5, Tf, T3 * Tg); + } + { + E Tb, Ti, Tj, Tm; + Tb = T1 + Ta; + Ti = Te + Th; + ci[WS(rs, 1)] = Tb - Ti; + cr[0] = Tb + Ti; + Tj = T1 - Ta; + Tm = Tk - Tl; + ci[0] = Tj - Tm; + cr[WS(rs, 1)] = Tj + Tm; + } + { + E Tn, Tq, Tr, Ts; + Tn = Tk + Tl; + Tq = To + Tp; + cr[WS(rs, 2)] = Tn - Tq; + ci[WS(rs, 3)] = Tn + Tq; + Tr = Th - Te; + Ts = Tp - To; + cr[WS(rs, 3)] = Tr - Ts; + ci[WS(rs, 2)] = Tr + Ts; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 4, "hf2_4", twinstr, &GENUS, {16, 8, 8, 0} }; + +void X(codelet_hf2_4) (planner *p) { + X(khc2hc_register) (p, hf2_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf2_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf2_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,271 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:14 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 5 -dit -name hf2_5 -include hf.h */ + +/* + * This function contains 44 FP additions, 40 FP multiplications, + * (or, 14 additions, 10 multiplications, 30 fused multiply/add), + * 47 stack variables, 4 constants, and 20 memory accesses + */ +#include "hf.h" + +static void hf2_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(10, rs)) { + E Ta, T1, TL, Tp, TT, Ti, TM, TC, To, TE, Ts, TF, T2, T8, T5; + E TS, Tt, TG; + T2 = W[0]; + Ta = W[3]; + T8 = W[2]; + T5 = W[1]; + { + E Tq, Tr, Te, T9; + T1 = cr[0]; + Te = T2 * Ta; + T9 = T2 * T8; + TL = ci[0]; + { + E T3, Tf, Tm, Tj, Tb, T4, T6, Tc, Tg; + T3 = cr[WS(rs, 1)]; + Tf = FMA(T5, T8, Te); + Tm = FNMS(T5, T8, Te); + Tj = FMA(T5, Ta, T9); + Tb = FNMS(T5, Ta, T9); + T4 = T2 * T3; + T6 = ci[WS(rs, 1)]; + Tc = cr[WS(rs, 4)]; + Tg = ci[WS(rs, 4)]; + { + E Tk, Tl, Tn, TD; + { + E T7, Tz, Th, TB, Ty, Td, TA; + Tk = cr[WS(rs, 2)]; + T7 = FMA(T5, T6, T4); + Ty = T2 * T6; + Td = Tb * Tc; + TA = Tb * Tg; + Tl = Tj * Tk; + Tz = FNMS(T5, T3, Ty); + Th = FMA(Tf, Tg, Td); + TB = FNMS(Tf, Tc, TA); + Tn = ci[WS(rs, 2)]; + Tp = cr[WS(rs, 3)]; + TT = Th - T7; + Ti = T7 + Th; + TM = Tz + TB; + TC = Tz - TB; + TD = Tj * Tn; + Tq = T8 * Tp; + Tr = ci[WS(rs, 3)]; + } + To = FMA(Tm, Tn, Tl); + TE = FNMS(Tm, Tk, TD); + } + } + Ts = FMA(Ta, Tr, Tq); + TF = T8 * Tr; + } + TS = To - Ts; + Tt = To + Ts; + TG = FNMS(Ta, Tp, TF); + { + E TU, TW, TV, TR, Tw, Tu; + TU = FMA(KP618033988, TT, TS); + TW = FNMS(KP618033988, TS, TT); + Tw = Ti - Tt; + Tu = Ti + Tt; + { + E TN, TH, Tv, TI, TK; + TN = TE + TG; + TH = TE - TG; + cr[0] = T1 + Tu; + Tv = FNMS(KP250000000, Tu, T1); + TI = FMA(KP618033988, TH, TC); + TK = FNMS(KP618033988, TC, TH); + { + E TQ, TO, Tx, TJ, TP; + TQ = TM - TN; + TO = TM + TN; + Tx = FMA(KP559016994, Tw, Tv); + TJ = FNMS(KP559016994, Tw, Tv); + ci[WS(rs, 4)] = TO + TL; + TP = FNMS(KP250000000, TO, TL); + ci[WS(rs, 1)] = FMA(KP951056516, TK, TJ); + cr[WS(rs, 2)] = FNMS(KP951056516, TK, TJ); + cr[WS(rs, 1)] = FMA(KP951056516, TI, Tx); + ci[0] = FNMS(KP951056516, TI, Tx); + TV = FMA(KP559016994, TQ, TP); + TR = FNMS(KP559016994, TQ, TP); + } + } + ci[WS(rs, 2)] = FMA(KP951056516, TU, TR); + cr[WS(rs, 3)] = FMS(KP951056516, TU, TR); + ci[WS(rs, 3)] = FMA(KP951056516, TW, TV); + cr[WS(rs, 4)] = FMS(KP951056516, TW, TV); + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 5, "hf2_5", twinstr, &GENUS, {14, 10, 30, 0} }; + +void X(codelet_hf2_5) (planner *p) { + X(khc2hc_register) (p, hf2_5, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 5 -dit -name hf2_5 -include hf.h */ + +/* + * This function contains 44 FP additions, 32 FP multiplications, + * (or, 30 additions, 18 multiplications, 14 fused multiply/add), + * 37 stack variables, 4 constants, and 20 memory accesses + */ +#include "hf.h" + +static void hf2_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(10, rs)) { + E T2, T4, T7, T9, Tb, Tl, Tf, Tj; + { + E T8, Te, Ta, Td; + T2 = W[0]; + T4 = W[1]; + T7 = W[2]; + T9 = W[3]; + T8 = T2 * T7; + Te = T4 * T7; + Ta = T4 * T9; + Td = T2 * T9; + Tb = T8 - Ta; + Tl = Td - Te; + Tf = Td + Te; + Tj = T8 + Ta; + } + { + E T1, TI, Ty, TB, TG, TF, TJ, TK, TL, Ti, Tr, Ts; + T1 = cr[0]; + TI = ci[0]; + { + E T6, Tw, Tq, TA, Th, Tx, Tn, Tz; + { + E T3, T5, To, Tp; + T3 = cr[WS(rs, 1)]; + T5 = ci[WS(rs, 1)]; + T6 = FMA(T2, T3, T4 * T5); + Tw = FNMS(T4, T3, T2 * T5); + To = cr[WS(rs, 3)]; + Tp = ci[WS(rs, 3)]; + Tq = FMA(T7, To, T9 * Tp); + TA = FNMS(T9, To, T7 * Tp); + } + { + E Tc, Tg, Tk, Tm; + Tc = cr[WS(rs, 4)]; + Tg = ci[WS(rs, 4)]; + Th = FMA(Tb, Tc, Tf * Tg); + Tx = FNMS(Tf, Tc, Tb * Tg); + Tk = cr[WS(rs, 2)]; + Tm = ci[WS(rs, 2)]; + Tn = FMA(Tj, Tk, Tl * Tm); + Tz = FNMS(Tl, Tk, Tj * Tm); + } + Ty = Tw - Tx; + TB = Tz - TA; + TG = Tn - Tq; + TF = Th - T6; + TJ = Tw + Tx; + TK = Tz + TA; + TL = TJ + TK; + Ti = T6 + Th; + Tr = Tn + Tq; + Ts = Ti + Tr; + } + cr[0] = T1 + Ts; + { + E TC, TE, Tv, TD, Tt, Tu; + TC = FMA(KP951056516, Ty, KP587785252 * TB); + TE = FNMS(KP587785252, Ty, KP951056516 * TB); + Tt = KP559016994 * (Ti - Tr); + Tu = FNMS(KP250000000, Ts, T1); + Tv = Tt + Tu; + TD = Tu - Tt; + ci[0] = Tv - TC; + ci[WS(rs, 1)] = TD + TE; + cr[WS(rs, 1)] = Tv + TC; + cr[WS(rs, 2)] = TD - TE; + } + ci[WS(rs, 4)] = TL + TI; + { + E TH, TP, TO, TQ, TM, TN; + TH = FMA(KP587785252, TF, KP951056516 * TG); + TP = FNMS(KP587785252, TG, KP951056516 * TF); + TM = FNMS(KP250000000, TL, TI); + TN = KP559016994 * (TJ - TK); + TO = TM - TN; + TQ = TN + TM; + cr[WS(rs, 3)] = TH - TO; + ci[WS(rs, 3)] = TP + TQ; + ci[WS(rs, 2)] = TH + TO; + cr[WS(rs, 4)] = TP - TQ; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 5, "hf2_5", twinstr, &GENUS, {30, 18, 14, 0} }; + +void X(codelet_hf2_5) (planner *p) { + X(khc2hc_register) (p, hf2_5, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf2_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf2_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:12 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 8 -dit -name hf2_8 -include hf.h */ + +/* + * This function contains 74 FP additions, 50 FP multiplications, + * (or, 44 additions, 20 multiplications, 30 fused multiply/add), + * 64 stack variables, 1 constants, and 32 memory accesses + */ +#include "hf.h" + +static void hf2_8(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { + E TS, T1l, TJ, T1m, T1k, Tw, T1w, T1u; + { + E T2, T3, Tl, Tn, T5, T4, Tm, Tr, T6; + T2 = W[0]; + T3 = W[2]; + Tl = W[4]; + Tn = W[5]; + T5 = W[1]; + T4 = T2 * T3; + Tm = T2 * Tl; + Tr = T2 * Tn; + T6 = W[3]; + { + E T1, T1s, TG, Td, T1r, Tu, TY, Tk, TW, T18, T1d, TD, TH, TA, T13; + E TE, T14; + { + E To, Ts, Tf, T7, T8, Ti, Tb, T9, Tc, TC, Ta, TF, TB, Tg, Th; + E Tj; + T1 = cr[0]; + To = FMA(T5, Tn, Tm); + Ts = FNMS(T5, Tl, Tr); + Tf = FMA(T5, T6, T4); + T7 = FNMS(T5, T6, T4); + Ta = T2 * T6; + T1s = ci[0]; + T8 = cr[WS(rs, 4)]; + TF = Tf * Tn; + TB = Tf * Tl; + Ti = FNMS(T5, T3, Ta); + Tb = FMA(T5, T3, Ta); + T9 = T7 * T8; + Tc = ci[WS(rs, 4)]; + TG = FNMS(Ti, Tl, TF); + TC = FMA(Ti, Tn, TB); + { + E Tp, T1q, Tt, Tq, TX; + Tp = cr[WS(rs, 6)]; + Td = FMA(Tb, Tc, T9); + T1q = T7 * Tc; + Tt = ci[WS(rs, 6)]; + Tq = To * Tp; + Tg = cr[WS(rs, 2)]; + T1r = FNMS(Tb, T8, T1q); + TX = To * Tt; + Tu = FMA(Ts, Tt, Tq); + Th = Tf * Tg; + Tj = ci[WS(rs, 2)]; + TY = FNMS(Ts, Tp, TX); + } + { + E TO, TQ, TN, TP, T1a, T1b; + { + E TK, TM, TL, T19, TV; + TK = cr[WS(rs, 7)]; + TM = ci[WS(rs, 7)]; + Tk = FMA(Ti, Tj, Th); + TV = Tf * Tj; + TL = Tl * TK; + T19 = Tl * TM; + TO = cr[WS(rs, 3)]; + TW = FNMS(Ti, Tg, TV); + TQ = ci[WS(rs, 3)]; + TN = FMA(Tn, TM, TL); + TP = T3 * TO; + T1a = FNMS(Tn, TK, T19); + T1b = T3 * TQ; + } + { + E Tx, Tz, Ty, T12, T1c, TR; + Tx = cr[WS(rs, 1)]; + TR = FMA(T6, TQ, TP); + Tz = ci[WS(rs, 1)]; + T1c = FNMS(T6, TO, T1b); + Ty = T2 * Tx; + T18 = TN - TR; + TS = TN + TR; + T12 = T2 * Tz; + T1d = T1a - T1c; + T1l = T1a + T1c; + TD = cr[WS(rs, 5)]; + TH = ci[WS(rs, 5)]; + TA = FMA(T5, Tz, Ty); + T13 = FNMS(T5, Tx, T12); + TE = TC * TD; + T14 = TC * TH; + } + } + } + { + E Te, T1p, Tv, T1t; + { + E T1g, T10, T1z, T1B, T1C, T1j, T1A, T1f; + { + E T1x, T11, T16, T1y; + { + E TU, TZ, TI, T15; + Te = T1 + Td; + TU = T1 - Td; + TZ = TW - TY; + T1p = TW + TY; + TI = FMA(TG, TH, TE); + T15 = FNMS(TG, TD, T14); + Tv = Tk + Tu; + T1x = Tk - Tu; + T1g = TU - TZ; + T10 = TU + TZ; + T11 = TA - TI; + TJ = TA + TI; + T1m = T13 + T15; + T16 = T13 - T15; + T1y = T1s - T1r; + T1t = T1r + T1s; + } + { + E T1i, T1e, T17, T1h; + T1i = T18 + T1d; + T1e = T18 - T1d; + T17 = T11 + T16; + T1h = T11 - T16; + T1z = T1x + T1y; + T1B = T1y - T1x; + T1C = T1i - T1h; + T1j = T1h + T1i; + T1A = T1e - T17; + T1f = T17 + T1e; + } + } + cr[WS(rs, 3)] = FNMS(KP707106781, T1j, T1g); + cr[WS(rs, 7)] = FMS(KP707106781, T1A, T1z); + cr[WS(rs, 1)] = FMA(KP707106781, T1f, T10); + ci[WS(rs, 2)] = FNMS(KP707106781, T1f, T10); + ci[WS(rs, 6)] = FMA(KP707106781, T1C, T1B); + cr[WS(rs, 5)] = FMS(KP707106781, T1C, T1B); + ci[WS(rs, 4)] = FMA(KP707106781, T1A, T1z); + ci[0] = FMA(KP707106781, T1j, T1g); + } + T1k = Te - Tv; + Tw = Te + Tv; + T1w = T1t - T1p; + T1u = T1p + T1t; + } + } + } + { + E TT, T1v, T1n, T1o; + TT = TJ + TS; + T1v = TS - TJ; + T1n = T1l - T1m; + T1o = T1m + T1l; + ci[WS(rs, 5)] = T1v + T1w; + cr[WS(rs, 6)] = T1v - T1w; + cr[0] = Tw + TT; + ci[WS(rs, 3)] = Tw - TT; + ci[WS(rs, 7)] = T1o + T1u; + cr[WS(rs, 4)] = T1o - T1u; + ci[WS(rs, 1)] = T1k + T1n; + cr[WS(rs, 2)] = T1k - T1n; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 7}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 8, "hf2_8", twinstr, &GENUS, {44, 20, 30, 0} }; + +void X(codelet_hf2_8) (planner *p) { + X(khc2hc_register) (p, hf2_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 8 -dit -name hf2_8 -include hf.h */ + +/* + * This function contains 74 FP additions, 44 FP multiplications, + * (or, 56 additions, 26 multiplications, 18 fused multiply/add), + * 42 stack variables, 1 constants, and 32 memory accesses + */ +#include "hf.h" + +static void hf2_8(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { + E T2, T5, T3, T6, T8, Tc, Tg, Ti, Tl, Tm, Tn, Tz, Tp, Tx; + { + E T4, Tb, T7, Ta; + T2 = W[0]; + T5 = W[1]; + T3 = W[2]; + T6 = W[3]; + T4 = T2 * T3; + Tb = T5 * T3; + T7 = T5 * T6; + Ta = T2 * T6; + T8 = T4 - T7; + Tc = Ta + Tb; + Tg = T4 + T7; + Ti = Ta - Tb; + Tl = W[4]; + Tm = W[5]; + Tn = FMA(T2, Tl, T5 * Tm); + Tz = FNMS(Ti, Tl, Tg * Tm); + Tp = FNMS(T5, Tl, T2 * Tm); + Tx = FMA(Tg, Tl, Ti * Tm); + } + { + E Tf, T1j, TL, T1d, TJ, T16, TV, TY, Ts, T1i, TO, T1a, TC, T17, TQ; + E TT; + { + E T1, T1c, Te, T1b, T9, Td; + T1 = cr[0]; + T1c = ci[0]; + T9 = cr[WS(rs, 4)]; + Td = ci[WS(rs, 4)]; + Te = FMA(T8, T9, Tc * Td); + T1b = FNMS(Tc, T9, T8 * Td); + Tf = T1 + Te; + T1j = T1c - T1b; + TL = T1 - Te; + T1d = T1b + T1c; + } + { + E TF, TW, TI, TX; + { + E TD, TE, TG, TH; + TD = cr[WS(rs, 7)]; + TE = ci[WS(rs, 7)]; + TF = FMA(Tl, TD, Tm * TE); + TW = FNMS(Tm, TD, Tl * TE); + TG = cr[WS(rs, 3)]; + TH = ci[WS(rs, 3)]; + TI = FMA(T3, TG, T6 * TH); + TX = FNMS(T6, TG, T3 * TH); + } + TJ = TF + TI; + T16 = TW + TX; + TV = TF - TI; + TY = TW - TX; + } + { + E Tk, TM, Tr, TN; + { + E Th, Tj, To, Tq; + Th = cr[WS(rs, 2)]; + Tj = ci[WS(rs, 2)]; + Tk = FMA(Tg, Th, Ti * Tj); + TM = FNMS(Ti, Th, Tg * Tj); + To = cr[WS(rs, 6)]; + Tq = ci[WS(rs, 6)]; + Tr = FMA(Tn, To, Tp * Tq); + TN = FNMS(Tp, To, Tn * Tq); + } + Ts = Tk + Tr; + T1i = Tk - Tr; + TO = TM - TN; + T1a = TM + TN; + } + { + E Tw, TR, TB, TS; + { + E Tu, Tv, Ty, TA; + Tu = cr[WS(rs, 1)]; + Tv = ci[WS(rs, 1)]; + Tw = FMA(T2, Tu, T5 * Tv); + TR = FNMS(T5, Tu, T2 * Tv); + Ty = cr[WS(rs, 5)]; + TA = ci[WS(rs, 5)]; + TB = FMA(Tx, Ty, Tz * TA); + TS = FNMS(Tz, Ty, Tx * TA); + } + TC = Tw + TB; + T17 = TR + TS; + TQ = Tw - TB; + TT = TR - TS; + } + { + E Tt, TK, T1f, T1g; + Tt = Tf + Ts; + TK = TC + TJ; + ci[WS(rs, 3)] = Tt - TK; + cr[0] = Tt + TK; + T1f = TJ - TC; + T1g = T1d - T1a; + cr[WS(rs, 6)] = T1f - T1g; + ci[WS(rs, 5)] = T1f + T1g; + { + E T11, T1m, T14, T1l, T12, T13; + T11 = TL - TO; + T1m = T1j - T1i; + T12 = TQ - TT; + T13 = TV + TY; + T14 = KP707106781 * (T12 + T13); + T1l = KP707106781 * (T13 - T12); + cr[WS(rs, 3)] = T11 - T14; + ci[WS(rs, 6)] = T1l + T1m; + ci[0] = T11 + T14; + cr[WS(rs, 5)] = T1l - T1m; + } + } + { + E T19, T1e, T15, T18; + T19 = T17 + T16; + T1e = T1a + T1d; + cr[WS(rs, 4)] = T19 - T1e; + ci[WS(rs, 7)] = T19 + T1e; + T15 = Tf - Ts; + T18 = T16 - T17; + cr[WS(rs, 2)] = T15 - T18; + ci[WS(rs, 1)] = T15 + T18; + { + E TP, T1k, T10, T1h, TU, TZ; + TP = TL + TO; + T1k = T1i + T1j; + TU = TQ + TT; + TZ = TV - TY; + T10 = KP707106781 * (TU + TZ); + T1h = KP707106781 * (TZ - TU); + ci[WS(rs, 2)] = TP - T10; + ci[WS(rs, 4)] = T1h + T1k; + cr[WS(rs, 1)] = TP + T10; + cr[WS(rs, 7)] = T1h - T1k; + } + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_CEXP, 1, 1}, + {TW_CEXP, 1, 3}, + {TW_CEXP, 1, 7}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 8, "hf2_8", twinstr, &GENUS, {56, 26, 18, 0} }; + +void X(codelet_hf2_8) (planner *p) { + X(khc2hc_register) (p, hf2_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,501 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:09 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 10 -dit -name hf_10 -include hf.h */ + +/* + * This function contains 102 FP additions, 72 FP multiplications, + * (or, 48 additions, 18 multiplications, 54 fused multiply/add), + * 72 stack variables, 4 constants, and 40 memory accesses + */ +#include "hf.h" + +static void hf_10(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 18, MAKE_VOLATILE_STRIDE(20, rs)) { + E T29, T2d, T2c, T2e; + { + E T23, T1U, T8, T12, T1y, T1P, T25, T1H, T2b, T18, T10, T1Y, T1I, Tl, T13; + E T1J, Ty, T14, T1n, T1O, T24, T1K; + { + E T1, T1R, T3, T6, T2, T5; + T1 = cr[0]; + T1R = ci[0]; + T3 = cr[WS(rs, 5)]; + T6 = ci[WS(rs, 5)]; + T2 = W[8]; + T5 = W[9]; + { + E T1p, TY, T1x, T1F, TM, T16, T1r, TS; + { + E TF, T1w, TO, TR, T1u, TL, TN, TQ, T1q, TP; + { + E TU, TX, TT, TW; + { + E TB, TE, T1S, T4, TA, TD; + TB = cr[WS(rs, 4)]; + TE = ci[WS(rs, 4)]; + T1S = T2 * T6; + T4 = T2 * T3; + TA = W[6]; + TD = W[7]; + { + E T1T, T7, T1v, TC; + T1T = FNMS(T5, T3, T1S); + T7 = FMA(T5, T6, T4); + T1v = TA * TE; + TC = TA * TB; + T23 = T1T + T1R; + T1U = T1R - T1T; + T8 = T1 - T7; + T12 = T1 + T7; + TF = FMA(TD, TE, TC); + T1w = FNMS(TD, TB, T1v); + } + } + TU = cr[WS(rs, 1)]; + TX = ci[WS(rs, 1)]; + TT = W[0]; + TW = W[1]; + { + E TH, TK, TJ, T1t, TI, T1o, TV, TG; + TH = cr[WS(rs, 9)]; + TK = ci[WS(rs, 9)]; + T1o = TT * TX; + TV = TT * TU; + TG = W[16]; + TJ = W[17]; + T1p = FNMS(TW, TU, T1o); + TY = FMA(TW, TX, TV); + T1t = TG * TK; + TI = TG * TH; + TO = cr[WS(rs, 6)]; + TR = ci[WS(rs, 6)]; + T1u = FNMS(TJ, TH, T1t); + TL = FMA(TJ, TK, TI); + TN = W[10]; + TQ = W[11]; + } + } + T1x = T1u - T1w; + T1F = T1w + T1u; + TM = TF - TL; + T16 = TF + TL; + T1q = TN * TR; + TP = TN * TO; + T1r = FNMS(TQ, TO, T1q); + TS = FMA(TQ, TR, TP); + } + { + E T1l, Te, T1e, Tx, Tn, Tq, Tp, T1j, Tk, T1f, To; + { + E Tt, Tw, Tv, T1d, Tu; + { + E Ta, Td, T9, Tc, T1k, Tb, Ts; + Ta = cr[WS(rs, 2)]; + Td = ci[WS(rs, 2)]; + { + E T1G, T1s, TZ, T17; + T1G = T1r + T1p; + T1s = T1p - T1r; + TZ = TS - TY; + T17 = TS + TY; + T1y = T1s - T1x; + T1P = T1x + T1s; + T25 = T1F + T1G; + T1H = T1F - T1G; + T2b = T16 - T17; + T18 = T16 + T17; + T10 = TM + TZ; + T1Y = TZ - TM; + T9 = W[2]; + } + Tc = W[3]; + Tt = cr[WS(rs, 3)]; + Tw = ci[WS(rs, 3)]; + T1k = T9 * Td; + Tb = T9 * Ta; + Ts = W[4]; + Tv = W[5]; + T1l = FNMS(Tc, Ta, T1k); + Te = FMA(Tc, Td, Tb); + T1d = Ts * Tw; + Tu = Ts * Tt; + } + { + E Tg, Tj, Tf, Ti, T1i, Th, Tm; + Tg = cr[WS(rs, 7)]; + Tj = ci[WS(rs, 7)]; + T1e = FNMS(Tv, Tt, T1d); + Tx = FMA(Tv, Tw, Tu); + Tf = W[12]; + Ti = W[13]; + Tn = cr[WS(rs, 8)]; + Tq = ci[WS(rs, 8)]; + T1i = Tf * Tj; + Th = Tf * Tg; + Tm = W[14]; + Tp = W[15]; + T1j = FNMS(Ti, Tg, T1i); + Tk = FMA(Ti, Tj, Th); + T1f = Tm * Tq; + To = Tm * Tn; + } + } + { + E T1m, T1g, Tr, T1h; + T1m = T1j - T1l; + T1I = T1l + T1j; + Tl = Te - Tk; + T13 = Te + Tk; + T1g = FNMS(Tp, Tn, T1f); + Tr = FMA(Tp, Tq, To); + T1J = T1g + T1e; + T1h = T1e - T1g; + Ty = Tr - Tx; + T14 = Tr + Tx; + T1n = T1h - T1m; + T1O = T1m + T1h; + } + } + } + } + T24 = T1I + T1J; + T1K = T1I - T1J; + { + E T2a, T15, Tz, T1Z; + T2a = T13 - T14; + T15 = T13 + T14; + Tz = Tl + Ty; + T1Z = Ty - Tl; + { + E T1L, T1N, T1E, T1M; + { + E T19, T1D, T1C, T11, T1b; + T19 = T15 + T18; + T1D = T15 - T18; + T11 = Tz + T10; + T1b = Tz - T10; + { + E T1B, T1z, T1a, T1A, T1c; + T1B = FNMS(KP618033988, T1n, T1y); + T1z = FMA(KP618033988, T1y, T1n); + ci[WS(rs, 4)] = T8 + T11; + T1a = FNMS(KP250000000, T11, T8); + T1A = FNMS(KP559016994, T1b, T1a); + T1c = FMA(KP559016994, T1b, T1a); + T1C = FNMS(KP250000000, T19, T12); + T1L = FNMS(KP618033988, T1K, T1H); + T1N = FMA(KP618033988, T1H, T1K); + cr[WS(rs, 1)] = FMA(KP951056516, T1z, T1c); + ci[0] = FNMS(KP951056516, T1z, T1c); + cr[WS(rs, 3)] = FMA(KP951056516, T1B, T1A); + ci[WS(rs, 2)] = FNMS(KP951056516, T1B, T1A); + } + cr[0] = T12 + T19; + T1E = FNMS(KP559016994, T1D, T1C); + T1M = FMA(KP559016994, T1D, T1C); + } + { + E T1X, T21, T20, T22, T1Q, T1W, T1V, T26, T28, T27; + T1Q = T1O + T1P; + T1W = T1P - T1O; + ci[WS(rs, 3)] = FMA(KP951056516, T1N, T1M); + cr[WS(rs, 4)] = FNMS(KP951056516, T1N, T1M); + ci[WS(rs, 1)] = FMA(KP951056516, T1L, T1E); + cr[WS(rs, 2)] = FNMS(KP951056516, T1L, T1E); + T1V = FMA(KP250000000, T1Q, T1U); + cr[WS(rs, 5)] = T1Q - T1U; + T1X = FNMS(KP559016994, T1W, T1V); + T21 = FMA(KP559016994, T1W, T1V); + T20 = FNMS(KP618033988, T1Z, T1Y); + T22 = FMA(KP618033988, T1Y, T1Z); + T26 = T24 + T25; + T28 = T24 - T25; + ci[WS(rs, 8)] = FMA(KP951056516, T22, T21); + cr[WS(rs, 9)] = FMS(KP951056516, T22, T21); + ci[WS(rs, 6)] = FMA(KP951056516, T20, T1X); + cr[WS(rs, 7)] = FMS(KP951056516, T20, T1X); + T27 = FNMS(KP250000000, T26, T23); + ci[WS(rs, 9)] = T26 + T23; + T29 = FMA(KP559016994, T28, T27); + T2d = FNMS(KP559016994, T28, T27); + T2c = FMA(KP618033988, T2b, T2a); + T2e = FNMS(KP618033988, T2a, T2b); + } + } + } + } + ci[WS(rs, 7)] = FMA(KP951056516, T2e, T2d); + cr[WS(rs, 8)] = FMS(KP951056516, T2e, T2d); + ci[WS(rs, 5)] = FMA(KP951056516, T2c, T29); + cr[WS(rs, 6)] = FMS(KP951056516, T2c, T29); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 10}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 10, "hf_10", twinstr, &GENUS, {48, 18, 54, 0} }; + +void X(codelet_hf_10) (planner *p) { + X(khc2hc_register) (p, hf_10, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 10 -dit -name hf_10 -include hf.h */ + +/* + * This function contains 102 FP additions, 60 FP multiplications, + * (or, 72 additions, 30 multiplications, 30 fused multiply/add), + * 45 stack variables, 4 constants, and 40 memory accesses + */ +#include "hf.h" + +static void hf_10(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 18, MAKE_VOLATILE_STRIDE(20, rs)) { + E T7, T1R, TT, T1C, TF, TQ, TR, T1o, T1p, T1P, TX, TY, TZ, T1d, T1g; + E T1x, Ti, Tt, Tu, T1r, T1s, T1O, TU, TV, TW, T16, T19, T1y; + { + E T1, T1A, T6, T1B; + T1 = cr[0]; + T1A = ci[0]; + { + E T3, T5, T2, T4; + T3 = cr[WS(rs, 5)]; + T5 = ci[WS(rs, 5)]; + T2 = W[8]; + T4 = W[9]; + T6 = FMA(T2, T3, T4 * T5); + T1B = FNMS(T4, T3, T2 * T5); + } + T7 = T1 - T6; + T1R = T1B + T1A; + TT = T1 + T6; + T1C = T1A - T1B; + } + { + E Tz, T1b, TP, T1e, TE, T1c, TK, T1f; + { + E Tw, Ty, Tv, Tx; + Tw = cr[WS(rs, 4)]; + Ty = ci[WS(rs, 4)]; + Tv = W[6]; + Tx = W[7]; + Tz = FMA(Tv, Tw, Tx * Ty); + T1b = FNMS(Tx, Tw, Tv * Ty); + } + { + E TM, TO, TL, TN; + TM = cr[WS(rs, 1)]; + TO = ci[WS(rs, 1)]; + TL = W[0]; + TN = W[1]; + TP = FMA(TL, TM, TN * TO); + T1e = FNMS(TN, TM, TL * TO); + } + { + E TB, TD, TA, TC; + TB = cr[WS(rs, 9)]; + TD = ci[WS(rs, 9)]; + TA = W[16]; + TC = W[17]; + TE = FMA(TA, TB, TC * TD); + T1c = FNMS(TC, TB, TA * TD); + } + { + E TH, TJ, TG, TI; + TH = cr[WS(rs, 6)]; + TJ = ci[WS(rs, 6)]; + TG = W[10]; + TI = W[11]; + TK = FMA(TG, TH, TI * TJ); + T1f = FNMS(TI, TH, TG * TJ); + } + TF = Tz - TE; + TQ = TK - TP; + TR = TF + TQ; + T1o = T1b + T1c; + T1p = T1f + T1e; + T1P = T1o + T1p; + TX = Tz + TE; + TY = TK + TP; + TZ = TX + TY; + T1d = T1b - T1c; + T1g = T1e - T1f; + T1x = T1g - T1d; + } + { + E Tc, T14, Ts, T18, Th, T15, Tn, T17; + { + E T9, Tb, T8, Ta; + T9 = cr[WS(rs, 2)]; + Tb = ci[WS(rs, 2)]; + T8 = W[2]; + Ta = W[3]; + Tc = FMA(T8, T9, Ta * Tb); + T14 = FNMS(Ta, T9, T8 * Tb); + } + { + E Tp, Tr, To, Tq; + Tp = cr[WS(rs, 3)]; + Tr = ci[WS(rs, 3)]; + To = W[4]; + Tq = W[5]; + Ts = FMA(To, Tp, Tq * Tr); + T18 = FNMS(Tq, Tp, To * Tr); + } + { + E Te, Tg, Td, Tf; + Te = cr[WS(rs, 7)]; + Tg = ci[WS(rs, 7)]; + Td = W[12]; + Tf = W[13]; + Th = FMA(Td, Te, Tf * Tg); + T15 = FNMS(Tf, Te, Td * Tg); + } + { + E Tk, Tm, Tj, Tl; + Tk = cr[WS(rs, 8)]; + Tm = ci[WS(rs, 8)]; + Tj = W[14]; + Tl = W[15]; + Tn = FMA(Tj, Tk, Tl * Tm); + T17 = FNMS(Tl, Tk, Tj * Tm); + } + Ti = Tc - Th; + Tt = Tn - Ts; + Tu = Ti + Tt; + T1r = T14 + T15; + T1s = T17 + T18; + T1O = T1r + T1s; + TU = Tc + Th; + TV = Tn + Ts; + TW = TU + TV; + T16 = T14 - T15; + T19 = T17 - T18; + T1y = T16 + T19; + } + { + E T11, TS, T12, T1i, T1k, T1a, T1h, T1j, T13; + T11 = KP559016994 * (Tu - TR); + TS = Tu + TR; + T12 = FNMS(KP250000000, TS, T7); + T1a = T16 - T19; + T1h = T1d + T1g; + T1i = FMA(KP951056516, T1a, KP587785252 * T1h); + T1k = FNMS(KP587785252, T1a, KP951056516 * T1h); + ci[WS(rs, 4)] = T7 + TS; + T1j = T12 - T11; + ci[WS(rs, 2)] = T1j - T1k; + cr[WS(rs, 3)] = T1j + T1k; + T13 = T11 + T12; + ci[0] = T13 - T1i; + cr[WS(rs, 1)] = T13 + T1i; + } + { + E T1m, T10, T1l, T1u, T1w, T1q, T1t, T1v, T1n; + T1m = KP559016994 * (TW - TZ); + T10 = TW + TZ; + T1l = FNMS(KP250000000, T10, TT); + T1q = T1o - T1p; + T1t = T1r - T1s; + T1u = FNMS(KP587785252, T1t, KP951056516 * T1q); + T1w = FMA(KP951056516, T1t, KP587785252 * T1q); + cr[0] = TT + T10; + T1v = T1m + T1l; + cr[WS(rs, 4)] = T1v - T1w; + ci[WS(rs, 3)] = T1v + T1w; + T1n = T1l - T1m; + cr[WS(rs, 2)] = T1n - T1u; + ci[WS(rs, 1)] = T1n + T1u; + } + { + E T1H, T1z, T1G, T1F, T1J, T1D, T1E, T1K, T1I; + T1H = KP559016994 * (T1y + T1x); + T1z = T1x - T1y; + T1G = FMA(KP250000000, T1z, T1C); + T1D = Ti - Tt; + T1E = TQ - TF; + T1F = FMA(KP587785252, T1D, KP951056516 * T1E); + T1J = FNMS(KP951056516, T1D, KP587785252 * T1E); + cr[WS(rs, 5)] = T1z - T1C; + T1K = T1H + T1G; + cr[WS(rs, 9)] = T1J - T1K; + ci[WS(rs, 8)] = T1J + T1K; + T1I = T1G - T1H; + cr[WS(rs, 7)] = T1F - T1I; + ci[WS(rs, 6)] = T1F + T1I; + } + { + E T1Q, T1S, T1T, T1N, T1V, T1L, T1M, T1W, T1U; + T1Q = KP559016994 * (T1O - T1P); + T1S = T1O + T1P; + T1T = FNMS(KP250000000, T1S, T1R); + T1L = TU - TV; + T1M = TX - TY; + T1N = FMA(KP951056516, T1L, KP587785252 * T1M); + T1V = FNMS(KP587785252, T1L, KP951056516 * T1M); + ci[WS(rs, 9)] = T1S + T1R; + T1W = T1T - T1Q; + cr[WS(rs, 8)] = T1V - T1W; + ci[WS(rs, 7)] = T1V + T1W; + T1U = T1Q + T1T; + cr[WS(rs, 6)] = T1N - T1U; + ci[WS(rs, 5)] = T1N + T1U; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 10}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 10, "hf_10", twinstr, &GENUS, {72, 30, 30, 0} }; + +void X(codelet_hf_10) (planner *p) { + X(khc2hc_register) (p, hf_10, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,566 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:09 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 12 -dit -name hf_12 -include hf.h */ + +/* + * This function contains 118 FP additions, 68 FP multiplications, + * (or, 72 additions, 22 multiplications, 46 fused multiply/add), + * 84 stack variables, 2 constants, and 48 memory accesses + */ +#include "hf.h" + +static void hf_12(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 22, MAKE_VOLATILE_STRIDE(24, rs)) { + E T2u, T2n; + { + E T1, T2i, T2e, Tl, T1Y, T10, T1S, TG, T2f, T1s, T2s, Ty, T1Z, T1H, T21; + E T1d, TI, TL, T2h, T1l, T2p, Te, TJ, T1w, TO, TR, TN, TK, TQ; + { + E TW, TZ, TY, T1X, TX; + T1 = cr[0]; + T2i = ci[0]; + { + E Th, Tk, Tg, Tj, T2d, Ti, TV; + Th = cr[WS(rs, 6)]; + Tk = ci[WS(rs, 6)]; + Tg = W[10]; + Tj = W[11]; + TW = cr[WS(rs, 9)]; + TZ = ci[WS(rs, 9)]; + T2d = Tg * Tk; + Ti = Tg * Th; + TV = W[16]; + TY = W[17]; + T2e = FNMS(Tj, Th, T2d); + Tl = FMA(Tj, Tk, Ti); + T1X = TV * TZ; + TX = TV * TW; + } + { + E Tn, Tq, Tt, T1o, To, Tw, Ts, Tp, Tv; + { + E TC, TF, TB, TE, T1R, TD, Tm; + TC = cr[WS(rs, 3)]; + TF = ci[WS(rs, 3)]; + T1Y = FNMS(TY, TW, T1X); + T10 = FMA(TY, TZ, TX); + TB = W[4]; + TE = W[5]; + Tn = cr[WS(rs, 10)]; + Tq = ci[WS(rs, 10)]; + T1R = TB * TF; + TD = TB * TC; + Tm = W[18]; + Tt = cr[WS(rs, 2)]; + T1S = FNMS(TE, TC, T1R); + TG = FMA(TE, TF, TD); + T1o = Tm * Tq; + To = Tm * Tn; + Tw = ci[WS(rs, 2)]; + Ts = W[2]; + Tp = W[19]; + Tv = W[3]; + } + { + E T12, T15, T13, T1D, T18, T1b, T17, T14, T1a; + { + E T1p, Tr, T1r, Tx, T1q, Tu, T11; + T12 = cr[WS(rs, 1)]; + T1q = Ts * Tw; + Tu = Ts * Tt; + T1p = FNMS(Tp, Tn, T1o); + Tr = FMA(Tp, Tq, To); + T1r = FNMS(Tv, Tt, T1q); + Tx = FMA(Tv, Tw, Tu); + T15 = ci[WS(rs, 1)]; + T11 = W[0]; + T2f = T1p + T1r; + T1s = T1p - T1r; + T2s = Tx - Tr; + Ty = Tr + Tx; + T13 = T11 * T12; + T1D = T11 * T15; + } + T18 = cr[WS(rs, 5)]; + T1b = ci[WS(rs, 5)]; + T17 = W[8]; + T14 = W[1]; + T1a = W[9]; + { + E T3, T6, T4, T1h, T9, Tc, T8, T5, Tb; + { + E T1E, T16, T1G, T1c, T1F, T19, T2; + T3 = cr[WS(rs, 4)]; + T1F = T17 * T1b; + T19 = T17 * T18; + T1E = FNMS(T14, T12, T1D); + T16 = FMA(T14, T15, T13); + T1G = FNMS(T1a, T18, T1F); + T1c = FMA(T1a, T1b, T19); + T6 = ci[WS(rs, 4)]; + T2 = W[6]; + T1Z = T1E + T1G; + T1H = T1E - T1G; + T21 = T1c - T16; + T1d = T16 + T1c; + T4 = T2 * T3; + T1h = T2 * T6; + } + T9 = cr[WS(rs, 8)]; + Tc = ci[WS(rs, 8)]; + T8 = W[14]; + T5 = W[7]; + Tb = W[15]; + { + E T1i, T7, T1k, Td, T1j, Ta, TH; + TI = cr[WS(rs, 7)]; + T1j = T8 * Tc; + Ta = T8 * T9; + T1i = FNMS(T5, T3, T1h); + T7 = FMA(T5, T6, T4); + T1k = FNMS(Tb, T9, T1j); + Td = FMA(Tb, Tc, Ta); + TL = ci[WS(rs, 7)]; + TH = W[12]; + T2h = T1i + T1k; + T1l = T1i - T1k; + T2p = Td - T7; + Te = T7 + Td; + TJ = TH * TI; + T1w = TH * TL; + } + TO = cr[WS(rs, 11)]; + TR = ci[WS(rs, 11)]; + TN = W[20]; + TK = W[13]; + TQ = W[21]; + } + } + } + } + { + E T1g, T1n, T2r, T1A, T1V, T28, TA, T2o, T1v, T1C, T1U, T29, T2m, T2k, T2l; + E T1f, T2a, T20; + { + E T2g, T1T, TT, T2j, TU, T1e; + { + E Tf, T1x, TM, T1z, TS, Tz, T1y, TP; + T1g = FNMS(KP500000000, Te, T1); + Tf = T1 + Te; + T1y = TN * TR; + TP = TN * TO; + T1x = FNMS(TK, TI, T1w); + TM = FMA(TK, TL, TJ); + T1z = FNMS(TQ, TO, T1y); + TS = FMA(TQ, TR, TP); + Tz = Tl + Ty; + T1n = FNMS(KP500000000, Ty, Tl); + T2r = FNMS(KP500000000, T2f, T2e); + T2g = T2e + T2f; + T1T = T1x + T1z; + T1A = T1x - T1z; + T1V = TS - TM; + TT = TM + TS; + T28 = Tf - Tz; + TA = Tf + Tz; + T2j = T2h + T2i; + T2o = FNMS(KP500000000, T2h, T2i); + } + T1v = FNMS(KP500000000, TT, TG); + TU = TG + TT; + T1e = T10 + T1d; + T1C = FNMS(KP500000000, T1d, T10); + T1U = FNMS(KP500000000, T1T, T1S); + T29 = T1S + T1T; + T2m = T2j - T2g; + T2k = T2g + T2j; + T2l = TU - T1e; + T1f = TU + T1e; + T2a = T1Y + T1Z; + T20 = FNMS(KP500000000, T1Z, T1Y); + } + { + E T1m, T1K, T2y, T2q, T2z, T2t, T1L, T1t, T1B, T1N, T2c, T2b; + ci[WS(rs, 8)] = T2l + T2m; + cr[WS(rs, 9)] = T2l - T2m; + cr[0] = TA + T1f; + ci[WS(rs, 5)] = TA - T1f; + T2c = T29 + T2a; + T2b = T29 - T2a; + T1m = FNMS(KP866025403, T1l, T1g); + T1K = FMA(KP866025403, T1l, T1g); + ci[WS(rs, 11)] = T2c + T2k; + cr[WS(rs, 6)] = T2c - T2k; + ci[WS(rs, 2)] = T28 + T2b; + cr[WS(rs, 3)] = T28 - T2b; + T2y = FMA(KP866025403, T2p, T2o); + T2q = FNMS(KP866025403, T2p, T2o); + T2z = FMA(KP866025403, T2s, T2r); + T2t = FNMS(KP866025403, T2s, T2r); + T1L = FMA(KP866025403, T1s, T1n); + T1t = FNMS(KP866025403, T1s, T1n); + T1B = FNMS(KP866025403, T1A, T1v); + T1N = FMA(KP866025403, T1A, T1v); + { + E T1Q, T23, T27, T2A, T1P, T2x, T24, T1M; + { + E T1u, T25, T26, T1O, T1I, T2w, T2v, T1W, T22, T2B, T1J, T2C; + T1Q = T1m - T1t; + T1u = T1m + T1t; + T25 = FMA(KP866025403, T1V, T1U); + T1W = FNMS(KP866025403, T1V, T1U); + T26 = FMA(KP866025403, T21, T20); + T22 = FNMS(KP866025403, T21, T20); + T1O = FMA(KP866025403, T1H, T1C); + T1I = FNMS(KP866025403, T1H, T1C); + T2w = T2t + T2q; + T2u = T2q - T2t; + T23 = T1W - T22; + T2v = T1W + T22; + T2B = T25 + T26; + T27 = T25 - T26; + T2n = T1I - T1B; + T1J = T1B + T1I; + T2C = T2z + T2y; + T2A = T2y - T2z; + ci[WS(rs, 9)] = T2w - T2v; + cr[WS(rs, 8)] = -(T2v + T2w); + ci[WS(rs, 3)] = T1u + T1J; + cr[WS(rs, 2)] = T1u - T1J; + cr[WS(rs, 10)] = T2B - T2C; + ci[WS(rs, 7)] = T2B + T2C; + T1P = T1N + T1O; + T2x = T1O - T1N; + } + T24 = T1K - T1L; + T1M = T1K + T1L; + ci[WS(rs, 10)] = T2x + T2A; + cr[WS(rs, 7)] = T2x - T2A; + cr[WS(rs, 4)] = T1M + T1P; + ci[WS(rs, 1)] = T1M - T1P; + cr[WS(rs, 1)] = T24 + T27; + ci[WS(rs, 4)] = T24 - T27; + cr[WS(rs, 5)] = T1Q + T23; + ci[0] = T1Q - T23; + } + } + } + } + ci[WS(rs, 6)] = T2n + T2u; + cr[WS(rs, 11)] = T2n - T2u; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 12}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 12, "hf_12", twinstr, &GENUS, {72, 22, 46, 0} }; + +void X(codelet_hf_12) (planner *p) { + X(khc2hc_register) (p, hf_12, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 12 -dit -name hf_12 -include hf.h */ + +/* + * This function contains 118 FP additions, 60 FP multiplications, + * (or, 88 additions, 30 multiplications, 30 fused multiply/add), + * 47 stack variables, 2 constants, and 48 memory accesses + */ +#include "hf.h" + +static void hf_12(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 22, MAKE_VOLATILE_STRIDE(24, rs)) { + E T1, T1W, T18, T23, Tc, T15, T1V, T22, TR, T1E, T1o, T1D, T12, T1l, T1F; + E T1G, Ti, T1S, T1d, T26, Tt, T1a, T1T, T25, TA, T1y, T1j, T1B, TL, T1g; + E T1z, T1A; + { + E T6, T16, Tb, T17; + T1 = cr[0]; + T1W = ci[0]; + { + E T3, T5, T2, T4; + T3 = cr[WS(rs, 4)]; + T5 = ci[WS(rs, 4)]; + T2 = W[6]; + T4 = W[7]; + T6 = FMA(T2, T3, T4 * T5); + T16 = FNMS(T4, T3, T2 * T5); + } + { + E T8, Ta, T7, T9; + T8 = cr[WS(rs, 8)]; + Ta = ci[WS(rs, 8)]; + T7 = W[14]; + T9 = W[15]; + Tb = FMA(T7, T8, T9 * Ta); + T17 = FNMS(T9, T8, T7 * Ta); + } + T18 = KP866025403 * (T16 - T17); + T23 = KP866025403 * (Tb - T6); + Tc = T6 + Tb; + T15 = FNMS(KP500000000, Tc, T1); + T1V = T16 + T17; + T22 = FNMS(KP500000000, T1V, T1W); + } + { + E T11, T1n, TW, T1m; + { + E TO, TQ, TN, TP; + TO = cr[WS(rs, 9)]; + TQ = ci[WS(rs, 9)]; + TN = W[16]; + TP = W[17]; + TR = FMA(TN, TO, TP * TQ); + T1E = FNMS(TP, TO, TN * TQ); + } + { + E TY, T10, TX, TZ; + TY = cr[WS(rs, 5)]; + T10 = ci[WS(rs, 5)]; + TX = W[8]; + TZ = W[9]; + T11 = FMA(TX, TY, TZ * T10); + T1n = FNMS(TZ, TY, TX * T10); + } + { + E TT, TV, TS, TU; + TT = cr[WS(rs, 1)]; + TV = ci[WS(rs, 1)]; + TS = W[0]; + TU = W[1]; + TW = FMA(TS, TT, TU * TV); + T1m = FNMS(TU, TT, TS * TV); + } + T1o = KP866025403 * (T1m - T1n); + T1D = KP866025403 * (T11 - TW); + T12 = TW + T11; + T1l = FNMS(KP500000000, T12, TR); + T1F = T1m + T1n; + T1G = FNMS(KP500000000, T1F, T1E); + } + { + E Ts, T1c, Tn, T1b; + { + E Tf, Th, Te, Tg; + Tf = cr[WS(rs, 6)]; + Th = ci[WS(rs, 6)]; + Te = W[10]; + Tg = W[11]; + Ti = FMA(Te, Tf, Tg * Th); + T1S = FNMS(Tg, Tf, Te * Th); + } + { + E Tp, Tr, To, Tq; + Tp = cr[WS(rs, 2)]; + Tr = ci[WS(rs, 2)]; + To = W[2]; + Tq = W[3]; + Ts = FMA(To, Tp, Tq * Tr); + T1c = FNMS(Tq, Tp, To * Tr); + } + { + E Tk, Tm, Tj, Tl; + Tk = cr[WS(rs, 10)]; + Tm = ci[WS(rs, 10)]; + Tj = W[18]; + Tl = W[19]; + Tn = FMA(Tj, Tk, Tl * Tm); + T1b = FNMS(Tl, Tk, Tj * Tm); + } + T1d = KP866025403 * (T1b - T1c); + T26 = KP866025403 * (Ts - Tn); + Tt = Tn + Ts; + T1a = FNMS(KP500000000, Tt, Ti); + T1T = T1b + T1c; + T25 = FNMS(KP500000000, T1T, T1S); + } + { + E TK, T1i, TF, T1h; + { + E Tx, Tz, Tw, Ty; + Tx = cr[WS(rs, 3)]; + Tz = ci[WS(rs, 3)]; + Tw = W[4]; + Ty = W[5]; + TA = FMA(Tw, Tx, Ty * Tz); + T1y = FNMS(Ty, Tx, Tw * Tz); + } + { + E TH, TJ, TG, TI; + TH = cr[WS(rs, 11)]; + TJ = ci[WS(rs, 11)]; + TG = W[20]; + TI = W[21]; + TK = FMA(TG, TH, TI * TJ); + T1i = FNMS(TI, TH, TG * TJ); + } + { + E TC, TE, TB, TD; + TC = cr[WS(rs, 7)]; + TE = ci[WS(rs, 7)]; + TB = W[12]; + TD = W[13]; + TF = FMA(TB, TC, TD * TE); + T1h = FNMS(TD, TC, TB * TE); + } + T1j = KP866025403 * (T1h - T1i); + T1B = KP866025403 * (TK - TF); + TL = TF + TK; + T1g = FNMS(KP500000000, TL, TA); + T1z = T1h + T1i; + T1A = FNMS(KP500000000, T1z, T1y); + } + { + E Tv, T1N, T1Y, T20, T14, T1Z, T1Q, T1R; + { + E Td, Tu, T1U, T1X; + Td = T1 + Tc; + Tu = Ti + Tt; + Tv = Td + Tu; + T1N = Td - Tu; + T1U = T1S + T1T; + T1X = T1V + T1W; + T1Y = T1U + T1X; + T20 = T1X - T1U; + } + { + E TM, T13, T1O, T1P; + TM = TA + TL; + T13 = TR + T12; + T14 = TM + T13; + T1Z = TM - T13; + T1O = T1y + T1z; + T1P = T1E + T1F; + T1Q = T1O - T1P; + T1R = T1O + T1P; + } + ci[WS(rs, 5)] = Tv - T14; + cr[WS(rs, 9)] = T1Z - T20; + ci[WS(rs, 8)] = T1Z + T20; + cr[0] = Tv + T14; + cr[WS(rs, 3)] = T1N - T1Q; + cr[WS(rs, 6)] = T1R - T1Y; + ci[WS(rs, 11)] = T1R + T1Y; + ci[WS(rs, 2)] = T1N + T1Q; + } + { + E T1f, T1x, T28, T2a, T1q, T21, T1I, T29; + { + E T19, T1e, T24, T27; + T19 = T15 - T18; + T1e = T1a - T1d; + T1f = T19 + T1e; + T1x = T19 - T1e; + T24 = T22 - T23; + T27 = T25 - T26; + T28 = T24 - T27; + T2a = T27 + T24; + } + { + E T1k, T1p, T1C, T1H; + T1k = T1g - T1j; + T1p = T1l - T1o; + T1q = T1k + T1p; + T21 = T1p - T1k; + T1C = T1A - T1B; + T1H = T1D - T1G; + T1I = T1C + T1H; + T29 = T1H - T1C; + } + cr[WS(rs, 2)] = T1f - T1q; + cr[WS(rs, 8)] = T29 - T2a; + ci[WS(rs, 9)] = T29 + T2a; + ci[WS(rs, 3)] = T1f + T1q; + ci[0] = T1x - T1I; + cr[WS(rs, 11)] = T21 - T28; + ci[WS(rs, 6)] = T21 + T28; + cr[WS(rs, 5)] = T1x + T1I; + } + { + E T1t, T1J, T2e, T2g, T1w, T2b, T1M, T2f; + { + E T1r, T1s, T2c, T2d; + T1r = T15 + T18; + T1s = T1a + T1d; + T1t = T1r + T1s; + T1J = T1r - T1s; + T2c = T23 + T22; + T2d = T26 + T25; + T2e = T2c - T2d; + T2g = T2d + T2c; + } + { + E T1u, T1v, T1K, T1L; + T1u = T1g + T1j; + T1v = T1l + T1o; + T1w = T1u + T1v; + T2b = T1v - T1u; + T1K = T1B + T1A; + T1L = T1D + T1G; + T1M = T1K - T1L; + T2f = T1K + T1L; + } + ci[WS(rs, 1)] = T1t - T1w; + cr[WS(rs, 1)] = T1J + T1M; + cr[WS(rs, 4)] = T1t + T1w; + ci[WS(rs, 4)] = T1J - T1M; + cr[WS(rs, 7)] = T2b - T2e; + ci[WS(rs, 7)] = T2f + T2g; + ci[WS(rs, 10)] = T2b + T2e; + cr[WS(rs, 10)] = T2f - T2g; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 12}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 12, "hf_12", twinstr, &GENUS, {88, 30, 30, 0} }; + +void X(codelet_hf_12) (planner *p) { + X(khc2hc_register) (p, hf_12, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,802 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:10 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 15 -dit -name hf_15 -include hf.h */ + +/* + * This function contains 184 FP additions, 140 FP multiplications, + * (or, 72 additions, 28 multiplications, 112 fused multiply/add), + * 97 stack variables, 6 constants, and 60 memory accesses + */ +#include "hf.h" + +static void hf_15(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 28); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 28, MAKE_VOLATILE_STRIDE(30, rs)) { + E T3v, T3E, T3G, T3A, T3y, T3z, T3F, T3B; + { + E T1G, T3l, T3H, T3k, T1B, Tf, T37, T1y, T2Y, T2M, T2a, T2i, T39, Tz, T2U; + E T2t, T1O, T2e, T3a, TT, T10, T2V, T2z, T1V, T2f, T2C, T12, T15, T14, T21; + E T1c, T1Y, T13; + { + E T2I, T1k, T1m, T1p, T1o, T28, T1w, T25, T1n; + { + E T1, T3i, T9, Tc, Tb, T1D, T7, T1E, Ta, T1j, T1i, T1h; + T1 = cr[0]; + T3i = ci[0]; + { + E T3, T6, T2, T5, T1C, T4, T8; + T3 = cr[WS(rs, 5)]; + T6 = ci[WS(rs, 5)]; + T2 = W[8]; + T5 = W[9]; + T9 = cr[WS(rs, 10)]; + Tc = ci[WS(rs, 10)]; + T1C = T2 * T6; + T4 = T2 * T3; + T8 = W[18]; + Tb = W[19]; + T1D = FNMS(T5, T3, T1C); + T7 = FMA(T5, T6, T4); + T1E = T8 * Tc; + Ta = T8 * T9; + } + { + E T1g, T1F, Td, T1f, T3j, Te, T2H; + T1g = cr[WS(rs, 9)]; + T1j = ci[WS(rs, 9)]; + T1F = FNMS(Tb, T9, T1E); + Td = FMA(Tb, Tc, Ta); + T1f = W[16]; + T1i = W[17]; + T1G = T1D - T1F; + T3j = T1D + T1F; + T3l = Td - T7; + Te = T7 + Td; + T2H = T1f * T1j; + T1h = T1f * T1g; + T3H = T3j + T3i; + T3k = FNMS(KP500000000, T3j, T3i); + T1B = FNMS(KP500000000, Te, T1); + Tf = T1 + Te; + T2I = FNMS(T1i, T1g, T2H); + } + T1k = FMA(T1i, T1j, T1h); + { + E T1s, T1v, T1r, T1u, T27, T1t, T1l; + T1s = cr[WS(rs, 4)]; + T1v = ci[WS(rs, 4)]; + T1r = W[6]; + T1u = W[7]; + T1m = cr[WS(rs, 14)]; + T1p = ci[WS(rs, 14)]; + T27 = T1r * T1v; + T1t = T1r * T1s; + T1l = W[26]; + T1o = W[27]; + T28 = FNMS(T1u, T1s, T27); + T1w = FMA(T1u, T1v, T1t); + T25 = T1l * T1p; + T1n = T1l * T1m; + } + } + { + E Tl, T2p, Tn, Tq, Tp, T1M, Tx, T1J, To; + { + E Th, Tk, T26, T1q, Tg, Tj; + Th = cr[WS(rs, 3)]; + Tk = ci[WS(rs, 3)]; + T26 = FNMS(T1o, T1m, T25); + T1q = FMA(T1o, T1p, T1n); + Tg = W[4]; + Tj = W[5]; + { + E T29, T2J, T1x, T2L; + T29 = T26 - T28; + T2J = T26 + T28; + T1x = T1q + T1w; + T2L = T1q - T1w; + { + E T2o, Ti, T2K, T24; + T2o = Tg * Tk; + Ti = Tg * Th; + T2K = FNMS(KP500000000, T2J, T2I); + T37 = T2I + T2J; + T24 = FNMS(KP500000000, T1x, T1k); + T1y = T1k + T1x; + Tl = FMA(Tj, Tk, Ti); + T2Y = FMA(KP866025403, T2L, T2K); + T2M = FNMS(KP866025403, T2L, T2K); + T2a = FNMS(KP866025403, T29, T24); + T2i = FMA(KP866025403, T29, T24); + T2p = FNMS(Tj, Th, T2o); + } + } + } + { + E Tt, Tw, Ts, Tv, T1L, Tu, Tm; + Tt = cr[WS(rs, 13)]; + Tw = ci[WS(rs, 13)]; + Ts = W[24]; + Tv = W[25]; + Tn = cr[WS(rs, 8)]; + Tq = ci[WS(rs, 8)]; + T1L = Ts * Tw; + Tu = Ts * Tt; + Tm = W[14]; + Tp = W[15]; + T1M = FNMS(Tv, Tt, T1L); + Tx = FMA(Tv, Tw, Tu); + T1J = Tm * Tq; + To = Tm * Tn; + } + { + E TF, T2v, TH, TK, TJ, T1T, TR, T1Q, TI; + { + E TB, TE, T1K, Tr, TA, TD; + TB = cr[WS(rs, 12)]; + TE = ci[WS(rs, 12)]; + T1K = FNMS(Tp, Tn, T1J); + Tr = FMA(Tp, Tq, To); + TA = W[22]; + TD = W[23]; + { + E T1N, T2q, Ty, T2s; + T1N = T1K - T1M; + T2q = T1K + T1M; + Ty = Tr + Tx; + T2s = Tr - Tx; + { + E T2u, TC, T2r, T1I; + T2u = TA * TE; + TC = TA * TB; + T2r = FNMS(KP500000000, T2q, T2p); + T39 = T2p + T2q; + T1I = FNMS(KP500000000, Ty, Tl); + Tz = Tl + Ty; + TF = FMA(TD, TE, TC); + T2U = FMA(KP866025403, T2s, T2r); + T2t = FNMS(KP866025403, T2s, T2r); + T1O = FNMS(KP866025403, T1N, T1I); + T2e = FMA(KP866025403, T1N, T1I); + T2v = FNMS(TD, TB, T2u); + } + } + } + { + E TN, TQ, TM, TP, T1S, TO, TG; + TN = cr[WS(rs, 7)]; + TQ = ci[WS(rs, 7)]; + TM = W[12]; + TP = W[13]; + TH = cr[WS(rs, 2)]; + TK = ci[WS(rs, 2)]; + T1S = TM * TQ; + TO = TM * TN; + TG = W[2]; + TJ = W[3]; + T1T = FNMS(TP, TN, T1S); + TR = FMA(TP, TQ, TO); + T1Q = TG * TK; + TI = TG * TH; + } + { + E TW, TZ, T1R, TL, TV, TY; + TW = cr[WS(rs, 6)]; + TZ = ci[WS(rs, 6)]; + T1R = FNMS(TJ, TH, T1Q); + TL = FMA(TJ, TK, TI); + TV = W[10]; + TY = W[11]; + { + E T1U, T2w, TS, T2y; + T1U = T1R - T1T; + T2w = T1R + T1T; + TS = TL + TR; + T2y = TL - TR; + { + E T2B, TX, T2x, T1P; + T2B = TV * TZ; + TX = TV * TW; + T2x = FNMS(KP500000000, T2w, T2v); + T3a = T2v + T2w; + T1P = FNMS(KP500000000, TS, TF); + TT = TF + TS; + T10 = FMA(TY, TZ, TX); + T2V = FMA(KP866025403, T2y, T2x); + T2z = FNMS(KP866025403, T2y, T2x); + T1V = FNMS(KP866025403, T1U, T1P); + T2f = FMA(KP866025403, T1U, T1P); + T2C = FNMS(TY, TW, T2B); + } + } + } + { + E T18, T1b, T17, T1a, T20, T19, T11; + T18 = cr[WS(rs, 1)]; + T1b = ci[WS(rs, 1)]; + T17 = W[0]; + T1a = W[1]; + T12 = cr[WS(rs, 11)]; + T15 = ci[WS(rs, 11)]; + T20 = T17 * T1b; + T19 = T17 * T18; + T11 = W[20]; + T14 = W[21]; + T21 = FNMS(T1a, T18, T20); + T1c = FMA(T1a, T1b, T19); + T1Y = T11 * T15; + T13 = T11 * T12; + } + } + } + } + { + E T3I, T3O, T3w, T2d, T3J, T3P, T3x, T3C, T3D, T3f, T3g, T2Q, T2O, T3r, T3q; + E T2k, T2m; + { + E T3b, T1Z, T16, TU; + T3I = T39 + T3a; + T3b = T39 - T3a; + T1Z = FNMS(T14, T12, T1Y); + T16 = FMA(T14, T15, T13); + T3O = TT - Tz; + TU = Tz + TT; + { + E T1H, T2G, T2h, T3e, T3c, T34, T1W, T32, T30, T33, T2b, T2S, T2R; + { + E T2W, T22, T1d, T2F, T2E, T36, T2D; + T2W = T2U - T2V; + T3w = T2U + T2V; + T22 = T1Z - T21; + T2D = T1Z + T21; + T1d = T16 + T1c; + T2F = T16 - T1c; + T2E = FNMS(KP500000000, T2D, T2C); + T36 = T2C + T2D; + T2d = FMA(KP866025403, T1G, T1B); + T1H = FNMS(KP866025403, T1G, T1B); + { + E T1e, T1X, T38, T2X; + T1e = T10 + T1d; + T1X = FNMS(KP500000000, T1d, T10); + T38 = T36 - T37; + T3J = T36 + T37; + T2G = FNMS(KP866025403, T2F, T2E); + T2X = FMA(KP866025403, T2F, T2E); + { + E T1z, T23, T2Z, T1A; + T3P = T1y - T1e; + T1z = T1e + T1y; + T23 = FNMS(KP866025403, T22, T1X); + T2h = FMA(KP866025403, T22, T1X); + T3e = FMA(KP618033988, T38, T3b); + T3c = FNMS(KP618033988, T3b, T38); + T2Z = T2X - T2Y; + T3x = T2X + T2Y; + T1A = TU + T1z; + T34 = TU - T1z; + T3C = T1O - T1V; + T1W = T1O + T1V; + T32 = FNMS(KP618033988, T2W, T2Z); + T30 = FMA(KP618033988, T2Z, T2W); + cr[0] = Tf + T1A; + T33 = FNMS(KP250000000, T1A, Tf); + T2b = T23 + T2a; + T3D = T23 - T2a; + } + } + } + { + E T2A, T2N, T3d, T35, T2c; + T3f = T2t + T2z; + T2A = T2t - T2z; + T2N = T2G - T2M; + T3g = T2G + T2M; + T3d = FMA(KP559016994, T34, T33); + T35 = FNMS(KP559016994, T34, T33); + T2c = T1W + T2b; + T2S = T1W - T2b; + cr[WS(rs, 3)] = FMA(KP951056516, T3c, T35); + ci[WS(rs, 2)] = FNMS(KP951056516, T3c, T35); + cr[WS(rs, 6)] = FMA(KP951056516, T3e, T3d); + ci[WS(rs, 5)] = FNMS(KP951056516, T3e, T3d); + cr[WS(rs, 5)] = T1H + T2c; + T2R = FNMS(KP250000000, T2c, T1H); + T2Q = FNMS(KP618033988, T2A, T2N); + T2O = FMA(KP618033988, T2N, T2A); + } + { + E T2T, T31, T2g, T2j; + T2T = FMA(KP559016994, T2S, T2R); + T31 = FNMS(KP559016994, T2S, T2R); + T2g = T2e + T2f; + T3r = T2e - T2f; + T3q = T2h - T2i; + T2j = T2h + T2i; + ci[WS(rs, 3)] = FMA(KP951056516, T30, T2T); + ci[0] = FNMS(KP951056516, T30, T2T); + ci[WS(rs, 6)] = FMA(KP951056516, T32, T31); + cr[WS(rs, 2)] = FNMS(KP951056516, T32, T31); + T2k = T2g + T2j; + T2m = T2g - T2j; + } + } + } + { + E T3m, T3s, T3u, T3o, T3h, T2l, T2n, T2P; + ci[WS(rs, 4)] = T2d + T2k; + T2l = FNMS(KP250000000, T2k, T2d); + T3m = FMA(KP866025403, T3l, T3k); + T3v = FNMS(KP866025403, T3l, T3k); + T3s = FNMS(KP618033988, T3r, T3q); + T3u = FMA(KP618033988, T3q, T3r); + T2n = FMA(KP559016994, T2m, T2l); + T2P = FNMS(KP559016994, T2m, T2l); + ci[WS(rs, 1)] = FMA(KP951056516, T2Q, T2P); + cr[WS(rs, 7)] = FNMS(KP951056516, T2Q, T2P); + cr[WS(rs, 1)] = FMA(KP951056516, T2O, T2n); + cr[WS(rs, 4)] = FNMS(KP951056516, T2O, T2n); + T3o = T3f - T3g; + T3h = T3f + T3g; + { + E T3S, T3Q, T3K, T3M, T3n, T3p, T3t, T3L, T3R, T3N; + cr[WS(rs, 10)] = -(T3h + T3m); + T3n = FNMS(KP250000000, T3h, T3m); + T3S = FNMS(KP618033988, T3O, T3P); + T3Q = FMA(KP618033988, T3P, T3O); + T3p = FNMS(KP559016994, T3o, T3n); + T3t = FMA(KP559016994, T3o, T3n); + ci[WS(rs, 7)] = FMA(KP951056516, T3s, T3p); + cr[WS(rs, 13)] = FMS(KP951056516, T3s, T3p); + ci[WS(rs, 13)] = FNMS(KP951056516, T3u, T3t); + ci[WS(rs, 10)] = FMA(KP951056516, T3u, T3t); + T3K = T3I + T3J; + T3M = T3I - T3J; + ci[WS(rs, 14)] = T3K + T3H; + T3L = FNMS(KP250000000, T3K, T3H); + T3E = FMA(KP618033988, T3D, T3C); + T3G = FNMS(KP618033988, T3C, T3D); + T3R = FNMS(KP559016994, T3M, T3L); + T3N = FMA(KP559016994, T3M, T3L); + ci[WS(rs, 8)] = FMA(KP951056516, T3Q, T3N); + cr[WS(rs, 9)] = FMS(KP951056516, T3Q, T3N); + ci[WS(rs, 11)] = FMA(KP951056516, T3S, T3R); + cr[WS(rs, 12)] = FMS(KP951056516, T3S, T3R); + T3A = T3x - T3w; + T3y = T3w + T3x; + } + } + } + } + ci[WS(rs, 9)] = T3y + T3v; + T3z = FNMS(KP250000000, T3y, T3v); + T3F = FMA(KP559016994, T3A, T3z); + T3B = FNMS(KP559016994, T3A, T3z); + cr[WS(rs, 14)] = -(FMA(KP951056516, T3E, T3B)); + cr[WS(rs, 11)] = FMS(KP951056516, T3E, T3B); + ci[WS(rs, 12)] = FMA(KP951056516, T3G, T3F); + cr[WS(rs, 8)] = FMS(KP951056516, T3G, T3F); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 15}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 15, "hf_15", twinstr, &GENUS, {72, 28, 112, 0} }; + +void X(codelet_hf_15) (planner *p) { + X(khc2hc_register) (p, hf_15, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 15 -dit -name hf_15 -include hf.h */ + +/* + * This function contains 184 FP additions, 112 FP multiplications, + * (or, 128 additions, 56 multiplications, 56 fused multiply/add), + * 65 stack variables, 6 constants, and 60 memory accesses + */ +#include "hf.h" + +static void hf_15(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 28); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 28, MAKE_VOLATILE_STRIDE(30, rs)) { + E T1q, T2Q, Td, T1n, T2T, T3l, T13, T1k, T1l, T2E, T2F, T3j, T1H, T1T, T2k; + E T2w, T2f, T2v, T1M, T1U, Tu, TL, TM, T2H, T2I, T3i, T1w, T1Q, T29, T2t; + E T24, T2s, T1B, T1R; + { + E T1, T2R, T6, T1o, Tb, T1p, Tc, T2S; + T1 = cr[0]; + T2R = ci[0]; + { + E T3, T5, T2, T4; + T3 = cr[WS(rs, 5)]; + T5 = ci[WS(rs, 5)]; + T2 = W[8]; + T4 = W[9]; + T6 = FMA(T2, T3, T4 * T5); + T1o = FNMS(T4, T3, T2 * T5); + } + { + E T8, Ta, T7, T9; + T8 = cr[WS(rs, 10)]; + Ta = ci[WS(rs, 10)]; + T7 = W[18]; + T9 = W[19]; + Tb = FMA(T7, T8, T9 * Ta); + T1p = FNMS(T9, T8, T7 * Ta); + } + T1q = KP866025403 * (T1o - T1p); + T2Q = KP866025403 * (Tb - T6); + Tc = T6 + Tb; + Td = T1 + Tc; + T1n = FNMS(KP500000000, Tc, T1); + T2S = T1o + T1p; + T2T = FNMS(KP500000000, T2S, T2R); + T3l = T2S + T2R; + } + { + E TR, T2c, T18, T2h, TW, T1E, T11, T1F, T12, T2d, T1d, T1J, T1i, T1K, T1j; + E T2i; + { + E TO, TQ, TN, TP; + TO = cr[WS(rs, 6)]; + TQ = ci[WS(rs, 6)]; + TN = W[10]; + TP = W[11]; + TR = FMA(TN, TO, TP * TQ); + T2c = FNMS(TP, TO, TN * TQ); + } + { + E T15, T17, T14, T16; + T15 = cr[WS(rs, 9)]; + T17 = ci[WS(rs, 9)]; + T14 = W[16]; + T16 = W[17]; + T18 = FMA(T14, T15, T16 * T17); + T2h = FNMS(T16, T15, T14 * T17); + } + { + E TT, TV, TS, TU; + TT = cr[WS(rs, 11)]; + TV = ci[WS(rs, 11)]; + TS = W[20]; + TU = W[21]; + TW = FMA(TS, TT, TU * TV); + T1E = FNMS(TU, TT, TS * TV); + } + { + E TY, T10, TX, TZ; + TY = cr[WS(rs, 1)]; + T10 = ci[WS(rs, 1)]; + TX = W[0]; + TZ = W[1]; + T11 = FMA(TX, TY, TZ * T10); + T1F = FNMS(TZ, TY, TX * T10); + } + T12 = TW + T11; + T2d = T1E + T1F; + { + E T1a, T1c, T19, T1b; + T1a = cr[WS(rs, 14)]; + T1c = ci[WS(rs, 14)]; + T19 = W[26]; + T1b = W[27]; + T1d = FMA(T19, T1a, T1b * T1c); + T1J = FNMS(T1b, T1a, T19 * T1c); + } + { + E T1f, T1h, T1e, T1g; + T1f = cr[WS(rs, 4)]; + T1h = ci[WS(rs, 4)]; + T1e = W[6]; + T1g = W[7]; + T1i = FMA(T1e, T1f, T1g * T1h); + T1K = FNMS(T1g, T1f, T1e * T1h); + } + T1j = T1d + T1i; + T2i = T1J + T1K; + { + E T1D, T1G, T2g, T2j; + T13 = TR + T12; + T1k = T18 + T1j; + T1l = T13 + T1k; + T2E = T2c + T2d; + T2F = T2h + T2i; + T3j = T2E + T2F; + T1D = FNMS(KP500000000, T12, TR); + T1G = KP866025403 * (T1E - T1F); + T1H = T1D - T1G; + T1T = T1D + T1G; + T2g = KP866025403 * (T1d - T1i); + T2j = FNMS(KP500000000, T2i, T2h); + T2k = T2g - T2j; + T2w = T2g + T2j; + { + E T2b, T2e, T1I, T1L; + T2b = KP866025403 * (T11 - TW); + T2e = FNMS(KP500000000, T2d, T2c); + T2f = T2b + T2e; + T2v = T2e - T2b; + T1I = FNMS(KP500000000, T1j, T18); + T1L = KP866025403 * (T1J - T1K); + T1M = T1I - T1L; + T1U = T1I + T1L; + } + } + } + { + E Ti, T21, Tz, T26, Tn, T1t, Ts, T1u, Tt, T22, TE, T1y, TJ, T1z, TK; + E T27; + { + E Tf, Th, Te, Tg; + Tf = cr[WS(rs, 3)]; + Th = ci[WS(rs, 3)]; + Te = W[4]; + Tg = W[5]; + Ti = FMA(Te, Tf, Tg * Th); + T21 = FNMS(Tg, Tf, Te * Th); + } + { + E Tw, Ty, Tv, Tx; + Tw = cr[WS(rs, 12)]; + Ty = ci[WS(rs, 12)]; + Tv = W[22]; + Tx = W[23]; + Tz = FMA(Tv, Tw, Tx * Ty); + T26 = FNMS(Tx, Tw, Tv * Ty); + } + { + E Tk, Tm, Tj, Tl; + Tk = cr[WS(rs, 8)]; + Tm = ci[WS(rs, 8)]; + Tj = W[14]; + Tl = W[15]; + Tn = FMA(Tj, Tk, Tl * Tm); + T1t = FNMS(Tl, Tk, Tj * Tm); + } + { + E Tp, Tr, To, Tq; + Tp = cr[WS(rs, 13)]; + Tr = ci[WS(rs, 13)]; + To = W[24]; + Tq = W[25]; + Ts = FMA(To, Tp, Tq * Tr); + T1u = FNMS(Tq, Tp, To * Tr); + } + Tt = Tn + Ts; + T22 = T1t + T1u; + { + E TB, TD, TA, TC; + TB = cr[WS(rs, 2)]; + TD = ci[WS(rs, 2)]; + TA = W[2]; + TC = W[3]; + TE = FMA(TA, TB, TC * TD); + T1y = FNMS(TC, TB, TA * TD); + } + { + E TG, TI, TF, TH; + TG = cr[WS(rs, 7)]; + TI = ci[WS(rs, 7)]; + TF = W[12]; + TH = W[13]; + TJ = FMA(TF, TG, TH * TI); + T1z = FNMS(TH, TG, TF * TI); + } + TK = TE + TJ; + T27 = T1y + T1z; + { + E T1s, T1v, T25, T28; + Tu = Ti + Tt; + TL = Tz + TK; + TM = Tu + TL; + T2H = T21 + T22; + T2I = T26 + T27; + T3i = T2H + T2I; + T1s = FNMS(KP500000000, Tt, Ti); + T1v = KP866025403 * (T1t - T1u); + T1w = T1s - T1v; + T1Q = T1s + T1v; + T25 = KP866025403 * (TJ - TE); + T28 = FNMS(KP500000000, T27, T26); + T29 = T25 + T28; + T2t = T28 - T25; + { + E T20, T23, T1x, T1A; + T20 = KP866025403 * (Ts - Tn); + T23 = FNMS(KP500000000, T22, T21); + T24 = T20 + T23; + T2s = T23 - T20; + T1x = FNMS(KP500000000, TK, Tz); + T1A = KP866025403 * (T1y - T1z); + T1B = T1x - T1A; + T1R = T1x + T1A; + } + } + } + { + E T2C, T1m, T2B, T2K, T2M, T2G, T2J, T2L, T2D; + T2C = KP559016994 * (TM - T1l); + T1m = TM + T1l; + T2B = FNMS(KP250000000, T1m, Td); + T2G = T2E - T2F; + T2J = T2H - T2I; + T2K = FNMS(KP587785252, T2J, KP951056516 * T2G); + T2M = FMA(KP951056516, T2J, KP587785252 * T2G); + cr[0] = Td + T1m; + T2L = T2C + T2B; + ci[WS(rs, 5)] = T2L - T2M; + cr[WS(rs, 6)] = T2L + T2M; + T2D = T2B - T2C; + ci[WS(rs, 2)] = T2D - T2K; + cr[WS(rs, 3)] = T2D + T2K; + } + { + E T3k, T3m, T3n, T3h, T3p, T3f, T3g, T3q, T3o; + T3k = KP559016994 * (T3i - T3j); + T3m = T3i + T3j; + T3n = FNMS(KP250000000, T3m, T3l); + T3f = T1k - T13; + T3g = Tu - TL; + T3h = FNMS(KP951056516, T3g, KP587785252 * T3f); + T3p = FMA(KP587785252, T3g, KP951056516 * T3f); + ci[WS(rs, 14)] = T3m + T3l; + T3q = T3n - T3k; + cr[WS(rs, 12)] = T3p - T3q; + ci[WS(rs, 11)] = T3p + T3q; + T3o = T3k + T3n; + cr[WS(rs, 9)] = T3h - T3o; + ci[WS(rs, 8)] = T3h + T3o; + } + { + E T2y, T2A, T1r, T1O, T2p, T2q, T2z, T2r; + { + E T2u, T2x, T1C, T1N; + T2u = T2s - T2t; + T2x = T2v - T2w; + T2y = FMA(KP951056516, T2u, KP587785252 * T2x); + T2A = FNMS(KP587785252, T2u, KP951056516 * T2x); + T1r = T1n - T1q; + T1C = T1w + T1B; + T1N = T1H + T1M; + T1O = T1C + T1N; + T2p = KP559016994 * (T1C - T1N); + T2q = FNMS(KP250000000, T1O, T1r); + } + cr[WS(rs, 5)] = T1r + T1O; + T2z = T2q - T2p; + cr[WS(rs, 2)] = T2z - T2A; + ci[WS(rs, 6)] = T2z + T2A; + T2r = T2p + T2q; + ci[0] = T2r - T2y; + ci[WS(rs, 3)] = T2r + T2y; + } + { + E T35, T3d, T39, T3a, T38, T3b, T3e, T3c; + { + E T33, T34, T36, T37; + T33 = T1w - T1B; + T34 = T1H - T1M; + T35 = FMA(KP951056516, T33, KP587785252 * T34); + T3d = FNMS(KP587785252, T33, KP951056516 * T34); + T39 = T2T - T2Q; + T36 = T2v + T2w; + T37 = T2s + T2t; + T3a = T37 + T36; + T38 = KP559016994 * (T36 - T37); + T3b = FNMS(KP250000000, T3a, T39); + } + ci[WS(rs, 9)] = T3a + T39; + T3e = T38 + T3b; + cr[WS(rs, 8)] = T3d - T3e; + ci[WS(rs, 12)] = T3d + T3e; + T3c = T38 - T3b; + cr[WS(rs, 11)] = T35 + T3c; + cr[WS(rs, 14)] = T3c - T35; + } + { + E T2X, T31, T2U, T2P, T2Y, T2Z, T32, T30; + { + E T2V, T2W, T2N, T2O; + T2V = T1T - T1U; + T2W = T1Q - T1R; + T2X = FNMS(KP587785252, T2W, KP951056516 * T2V); + T31 = FMA(KP951056516, T2W, KP587785252 * T2V); + T2U = T2Q + T2T; + T2N = T2k - T2f; + T2O = T24 + T29; + T2P = T2N - T2O; + T2Y = FMA(KP250000000, T2P, T2U); + T2Z = KP559016994 * (T2O + T2N); + } + cr[WS(rs, 10)] = T2P - T2U; + T32 = T2Z + T2Y; + ci[WS(rs, 10)] = T31 + T32; + ci[WS(rs, 13)] = T32 - T31; + T30 = T2Y - T2Z; + cr[WS(rs, 13)] = T2X - T30; + ci[WS(rs, 7)] = T2X + T30; + } + { + E T2m, T2o, T1P, T1W, T1X, T1Y, T1Z, T2n; + { + E T2a, T2l, T1S, T1V; + T2a = T24 - T29; + T2l = T2f + T2k; + T2m = FMA(KP951056516, T2a, KP587785252 * T2l); + T2o = FNMS(KP587785252, T2a, KP951056516 * T2l); + T1P = T1n + T1q; + T1S = T1Q + T1R; + T1V = T1T + T1U; + T1W = T1S + T1V; + T1X = KP559016994 * (T1S - T1V); + T1Y = FNMS(KP250000000, T1W, T1P); + } + ci[WS(rs, 4)] = T1P + T1W; + T1Z = T1X + T1Y; + cr[WS(rs, 4)] = T1Z - T2m; + cr[WS(rs, 1)] = T1Z + T2m; + T2n = T1Y - T1X; + cr[WS(rs, 7)] = T2n - T2o; + ci[WS(rs, 1)] = T2n + T2o; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 15}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 15, "hf_15", twinstr, &GENUS, {128, 56, 56, 0} }; + +void X(codelet_hf_15) (planner *p) { + X(khc2hc_register) (p, hf_15, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,787 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:10 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 16 -dit -name hf_16 -include hf.h */ + +/* + * This function contains 174 FP additions, 100 FP multiplications, + * (or, 104 additions, 30 multiplications, 70 fused multiply/add), + * 95 stack variables, 3 constants, and 64 memory accesses + */ +#include "hf.h" + +static void hf_16(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 30, MAKE_VOLATILE_STRIDE(32, rs)) { + E T2T, T2Q; + { + E T3A, T3o, T8, T1I, T2w, T35, T2k, T1s, T2p, T36, T2r, T1F, T3k, T1N, T3z; + E Tl, T1U, T2W, T1P, Tz, T2g, T30, T25, T11, TB, TE, T2a, T31, T2h, T1e; + E TC, T1X, TH, TK, TG, TD, TJ; + { + E Ta, Td, Tb, T1J, Tg, Tj, Tf, Tc, Ti; + { + E T1h, T1k, T1n, T2s, T1i, T1q, T1m, T1j, T1p; + { + E T1, T3n, T3, T6, T2, T5; + T1 = cr[0]; + T3n = ci[0]; + T3 = cr[WS(rs, 8)]; + T6 = ci[WS(rs, 8)]; + T2 = W[14]; + T5 = W[15]; + { + E T3l, T4, T1g, T3m, T7; + T1h = cr[WS(rs, 15)]; + T1k = ci[WS(rs, 15)]; + T3l = T2 * T6; + T4 = T2 * T3; + T1g = W[28]; + T1n = cr[WS(rs, 7)]; + T3m = FNMS(T5, T3, T3l); + T7 = FMA(T5, T6, T4); + T2s = T1g * T1k; + T1i = T1g * T1h; + T3A = T3n - T3m; + T3o = T3m + T3n; + T8 = T1 + T7; + T1I = T1 - T7; + T1q = ci[WS(rs, 7)]; + T1m = W[12]; + } + T1j = W[29]; + T1p = W[13]; + } + { + E T1u, T1x, T1v, T2l, T1A, T1D, T1z, T1w, T1C; + { + E T2t, T1l, T2v, T1r, T2u, T1o, T1t; + T1u = cr[WS(rs, 3)]; + T2u = T1m * T1q; + T1o = T1m * T1n; + T2t = FNMS(T1j, T1h, T2s); + T1l = FMA(T1j, T1k, T1i); + T2v = FNMS(T1p, T1n, T2u); + T1r = FMA(T1p, T1q, T1o); + T1x = ci[WS(rs, 3)]; + T1t = W[4]; + T2w = T2t - T2v; + T35 = T2t + T2v; + T2k = T1l - T1r; + T1s = T1l + T1r; + T1v = T1t * T1u; + T2l = T1t * T1x; + } + T1A = cr[WS(rs, 11)]; + T1D = ci[WS(rs, 11)]; + T1z = W[20]; + T1w = W[5]; + T1C = W[21]; + { + E T2m, T1y, T2o, T1E, T2n, T1B, T9; + Ta = cr[WS(rs, 4)]; + T2n = T1z * T1D; + T1B = T1z * T1A; + T2m = FNMS(T1w, T1u, T2l); + T1y = FMA(T1w, T1x, T1v); + T2o = FNMS(T1C, T1A, T2n); + T1E = FMA(T1C, T1D, T1B); + Td = ci[WS(rs, 4)]; + T9 = W[6]; + T2p = T2m - T2o; + T36 = T2m + T2o; + T2r = T1E - T1y; + T1F = T1y + T1E; + Tb = T9 * Ta; + T1J = T9 * Td; + } + Tg = cr[WS(rs, 12)]; + Tj = ci[WS(rs, 12)]; + Tf = W[22]; + Tc = W[7]; + Ti = W[23]; + } + } + { + E TQ, TT, TR, T2c, TW, TZ, TV, TS, TY; + { + E To, Tr, Tp, T1Q, Tu, Tx, Tt, Tq, Tw; + { + E T1K, Te, T1M, Tk, T1L, Th, Tn; + To = cr[WS(rs, 2)]; + T1L = Tf * Tj; + Th = Tf * Tg; + T1K = FNMS(Tc, Ta, T1J); + Te = FMA(Tc, Td, Tb); + T1M = FNMS(Ti, Tg, T1L); + Tk = FMA(Ti, Tj, Th); + Tr = ci[WS(rs, 2)]; + Tn = W[2]; + T3k = T1K + T1M; + T1N = T1K - T1M; + T3z = Te - Tk; + Tl = Te + Tk; + Tp = Tn * To; + T1Q = Tn * Tr; + } + Tu = cr[WS(rs, 10)]; + Tx = ci[WS(rs, 10)]; + Tt = W[18]; + Tq = W[3]; + Tw = W[19]; + { + E T1R, Ts, T1T, Ty, T1S, Tv, TP; + TQ = cr[WS(rs, 1)]; + T1S = Tt * Tx; + Tv = Tt * Tu; + T1R = FNMS(Tq, To, T1Q); + Ts = FMA(Tq, Tr, Tp); + T1T = FNMS(Tw, Tu, T1S); + Ty = FMA(Tw, Tx, Tv); + TT = ci[WS(rs, 1)]; + TP = W[0]; + T1U = T1R - T1T; + T2W = T1R + T1T; + T1P = Ts - Ty; + Tz = Ts + Ty; + TR = TP * TQ; + T2c = TP * TT; + } + TW = cr[WS(rs, 9)]; + TZ = ci[WS(rs, 9)]; + TV = W[16]; + TS = W[1]; + TY = W[17]; + } + { + E T13, T16, T14, T26, T19, T1c, T18, T15, T1b; + { + E T2d, TU, T2f, T10, T2e, TX, T12; + T13 = cr[WS(rs, 5)]; + T2e = TV * TZ; + TX = TV * TW; + T2d = FNMS(TS, TQ, T2c); + TU = FMA(TS, TT, TR); + T2f = FNMS(TY, TW, T2e); + T10 = FMA(TY, TZ, TX); + T16 = ci[WS(rs, 5)]; + T12 = W[8]; + T2g = T2d - T2f; + T30 = T2d + T2f; + T25 = TU - T10; + T11 = TU + T10; + T14 = T12 * T13; + T26 = T12 * T16; + } + T19 = cr[WS(rs, 13)]; + T1c = ci[WS(rs, 13)]; + T18 = W[24]; + T15 = W[9]; + T1b = W[25]; + { + E T27, T17, T29, T1d, T28, T1a, TA; + TB = cr[WS(rs, 14)]; + T28 = T18 * T1c; + T1a = T18 * T19; + T27 = FNMS(T15, T13, T26); + T17 = FMA(T15, T16, T14); + T29 = FNMS(T1b, T19, T28); + T1d = FMA(T1b, T1c, T1a); + TE = ci[WS(rs, 14)]; + TA = W[26]; + T2a = T27 - T29; + T31 = T27 + T29; + T2h = T17 - T1d; + T1e = T17 + T1d; + TC = TA * TB; + T1X = TA * TE; + } + TH = cr[WS(rs, 6)]; + TK = ci[WS(rs, 6)]; + TG = W[10]; + TD = W[27]; + TJ = W[11]; + } + } + } + { + E T2U, T3u, T2Z, T21, T1W, T34, T2X, T37, T3t, T3q, T3e, T32, T3i, T3h; + { + E T3f, T3r, T1H, T3s, TO, T3g; + { + E Tm, T1Y, TF, T20, TL, T3p, T1Z, TI; + T2U = T8 - Tl; + Tm = T8 + Tl; + T1Z = TG * TK; + TI = TG * TH; + T1Y = FNMS(TD, TB, T1X); + TF = FMA(TD, TE, TC); + T20 = FNMS(TJ, TH, T1Z); + TL = FMA(TJ, TK, TI); + T3p = T3k + T3o; + T3u = T3o - T3k; + { + E T1f, TM, T1G, T3j, T2V, TN; + T2Z = T11 - T1e; + T1f = T11 + T1e; + T21 = T1Y - T20; + T2V = T1Y + T20; + T1W = TF - TL; + TM = TF + TL; + T1G = T1s + T1F; + T34 = T1s - T1F; + T2X = T2V - T2W; + T3j = T2W + T2V; + T3f = T35 + T36; + T37 = T35 - T36; + T3t = Tz - TM; + TN = Tz + TM; + T3r = T1G - T1f; + T1H = T1f + T1G; + T3s = T3p - T3j; + T3q = T3j + T3p; + T3e = Tm - TN; + TO = Tm + TN; + T3g = T30 + T31; + T32 = T30 - T31; + } + } + cr[WS(rs, 12)] = T3r - T3s; + ci[WS(rs, 11)] = T3r + T3s; + ci[WS(rs, 7)] = TO - T1H; + T3i = T3g + T3f; + T3h = T3f - T3g; + cr[0] = TO + T1H; + } + { + E T3a, T2Y, T3x, T3v; + ci[WS(rs, 15)] = T3i + T3q; + cr[WS(rs, 8)] = T3i - T3q; + ci[WS(rs, 3)] = T3e + T3h; + cr[WS(rs, 4)] = T3e - T3h; + T3a = T2U + T2X; + T2Y = T2U - T2X; + T3x = T3u - T3t; + T3v = T3t + T3u; + { + E T2E, T1O, T3B, T3H, T2q, T2x, T3I, T23, T2R, T2O, T2J, T2K, T3C, T2H, T2B; + E T2j; + { + E T2F, T1V, T22, T2G; + { + E T3b, T33, T3c, T38; + T2E = T1I + T1N; + T1O = T1I - T1N; + T3b = T2Z - T32; + T33 = T2Z + T32; + T3c = T34 + T37; + T38 = T34 - T37; + T3B = T3z + T3A; + T3H = T3A - T3z; + { + E T3d, T3y, T3w, T39; + T3d = T3b + T3c; + T3y = T3c - T3b; + T3w = T38 - T33; + T39 = T33 + T38; + ci[WS(rs, 1)] = FMA(KP707106781, T3d, T3a); + cr[WS(rs, 6)] = FNMS(KP707106781, T3d, T3a); + ci[WS(rs, 13)] = FMA(KP707106781, T3y, T3x); + cr[WS(rs, 10)] = FMS(KP707106781, T3y, T3x); + ci[WS(rs, 9)] = FMA(KP707106781, T3w, T3v); + cr[WS(rs, 14)] = FMS(KP707106781, T3w, T3v); + cr[WS(rs, 2)] = FMA(KP707106781, T39, T2Y); + ci[WS(rs, 5)] = FNMS(KP707106781, T39, T2Y); + T2F = T1P + T1U; + T1V = T1P - T1U; + T22 = T1W + T21; + T2G = T1W - T21; + } + } + { + E T2M, T2N, T2b, T2i; + T2q = T2k - T2p; + T2M = T2k + T2p; + T2N = T2w + T2r; + T2x = T2r - T2w; + T3I = T22 - T1V; + T23 = T1V + T22; + T2R = FMA(KP414213562, T2M, T2N); + T2O = FNMS(KP414213562, T2N, T2M); + T2J = T25 + T2a; + T2b = T25 - T2a; + T2i = T2g + T2h; + T2K = T2g - T2h; + T3C = T2F - T2G; + T2H = T2F + T2G; + T2B = FMA(KP414213562, T2b, T2i); + T2j = FNMS(KP414213562, T2i, T2b); + } + } + { + E T2A, T3G, T2P, T2D, T3E, T3F, T3D, T2I; + { + E T24, T2L, T2C, T2y, T3J, T3L, T3K, T2S, T2z, T3M; + T2A = FNMS(KP707106781, T23, T1O); + T24 = FMA(KP707106781, T23, T1O); + T2S = FNMS(KP414213562, T2J, T2K); + T2L = FMA(KP414213562, T2K, T2J); + T2C = FMA(KP414213562, T2q, T2x); + T2y = FNMS(KP414213562, T2x, T2q); + T3J = FMA(KP707106781, T3I, T3H); + T3L = FNMS(KP707106781, T3I, T3H); + T2T = T2R - T2S; + T3K = T2S + T2R; + T3G = T2y - T2j; + T2z = T2j + T2y; + T3M = T2O - T2L; + T2P = T2L + T2O; + ci[WS(rs, 14)] = FMA(KP923879532, T3K, T3J); + cr[WS(rs, 9)] = FMS(KP923879532, T3K, T3J); + ci[0] = FMA(KP923879532, T2z, T24); + cr[WS(rs, 7)] = FNMS(KP923879532, T2z, T24); + cr[WS(rs, 13)] = FMS(KP923879532, T3M, T3L); + ci[WS(rs, 10)] = FMA(KP923879532, T3M, T3L); + T2D = T2B + T2C; + T3E = T2C - T2B; + } + T2Q = FNMS(KP707106781, T2H, T2E); + T2I = FMA(KP707106781, T2H, T2E); + T3F = FNMS(KP707106781, T3C, T3B); + T3D = FMA(KP707106781, T3C, T3B); + cr[WS(rs, 3)] = FMA(KP923879532, T2D, T2A); + ci[WS(rs, 4)] = FNMS(KP923879532, T2D, T2A); + cr[WS(rs, 1)] = FMA(KP923879532, T2P, T2I); + ci[WS(rs, 6)] = FNMS(KP923879532, T2P, T2I); + ci[WS(rs, 8)] = FMA(KP923879532, T3E, T3D); + cr[WS(rs, 15)] = FMS(KP923879532, T3E, T3D); + ci[WS(rs, 12)] = FMA(KP923879532, T3G, T3F); + cr[WS(rs, 11)] = FMS(KP923879532, T3G, T3F); + } + } + } + } + } + ci[WS(rs, 2)] = FMA(KP923879532, T2T, T2Q); + cr[WS(rs, 5)] = FNMS(KP923879532, T2T, T2Q); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 16}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 16, "hf_16", twinstr, &GENUS, {104, 30, 70, 0} }; + +void X(codelet_hf_16) (planner *p) { + X(khc2hc_register) (p, hf_16, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 16 -dit -name hf_16 -include hf.h */ + +/* + * This function contains 174 FP additions, 84 FP multiplications, + * (or, 136 additions, 46 multiplications, 38 fused multiply/add), + * 52 stack variables, 3 constants, and 64 memory accesses + */ +#include "hf.h" + +static void hf_16(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 30, MAKE_VOLATILE_STRIDE(32, rs)) { + E T7, T38, T1t, T2U, Ti, T37, T1w, T2R, Tu, T2t, T1C, T2c, TF, T2s, T1H; + E T2d, T1f, T1q, T2B, T2C, T2D, T2E, T1Z, T2k, T24, T2j, TS, T13, T2w, T2x; + E T2y, T2z, T1O, T2h, T1T, T2g; + { + E T1, T2T, T6, T2S; + T1 = cr[0]; + T2T = ci[0]; + { + E T3, T5, T2, T4; + T3 = cr[WS(rs, 8)]; + T5 = ci[WS(rs, 8)]; + T2 = W[14]; + T4 = W[15]; + T6 = FMA(T2, T3, T4 * T5); + T2S = FNMS(T4, T3, T2 * T5); + } + T7 = T1 + T6; + T38 = T2T - T2S; + T1t = T1 - T6; + T2U = T2S + T2T; + } + { + E Tc, T1u, Th, T1v; + { + E T9, Tb, T8, Ta; + T9 = cr[WS(rs, 4)]; + Tb = ci[WS(rs, 4)]; + T8 = W[6]; + Ta = W[7]; + Tc = FMA(T8, T9, Ta * Tb); + T1u = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = cr[WS(rs, 12)]; + Tg = ci[WS(rs, 12)]; + Td = W[22]; + Tf = W[23]; + Th = FMA(Td, Te, Tf * Tg); + T1v = FNMS(Tf, Te, Td * Tg); + } + Ti = Tc + Th; + T37 = Tc - Th; + T1w = T1u - T1v; + T2R = T1u + T1v; + } + { + E To, T1z, Tt, T1A, T1y, T1B; + { + E Tl, Tn, Tk, Tm; + Tl = cr[WS(rs, 2)]; + Tn = ci[WS(rs, 2)]; + Tk = W[2]; + Tm = W[3]; + To = FMA(Tk, Tl, Tm * Tn); + T1z = FNMS(Tm, Tl, Tk * Tn); + } + { + E Tq, Ts, Tp, Tr; + Tq = cr[WS(rs, 10)]; + Ts = ci[WS(rs, 10)]; + Tp = W[18]; + Tr = W[19]; + Tt = FMA(Tp, Tq, Tr * Ts); + T1A = FNMS(Tr, Tq, Tp * Ts); + } + Tu = To + Tt; + T2t = T1z + T1A; + T1y = To - Tt; + T1B = T1z - T1A; + T1C = T1y - T1B; + T2c = T1y + T1B; + } + { + E Tz, T1E, TE, T1F, T1D, T1G; + { + E Tw, Ty, Tv, Tx; + Tw = cr[WS(rs, 14)]; + Ty = ci[WS(rs, 14)]; + Tv = W[26]; + Tx = W[27]; + Tz = FMA(Tv, Tw, Tx * Ty); + T1E = FNMS(Tx, Tw, Tv * Ty); + } + { + E TB, TD, TA, TC; + TB = cr[WS(rs, 6)]; + TD = ci[WS(rs, 6)]; + TA = W[10]; + TC = W[11]; + TE = FMA(TA, TB, TC * TD); + T1F = FNMS(TC, TB, TA * TD); + } + TF = Tz + TE; + T2s = T1E + T1F; + T1D = Tz - TE; + T1G = T1E - T1F; + T1H = T1D + T1G; + T2d = T1D - T1G; + } + { + E T19, T1V, T1p, T22, T1e, T1W, T1k, T21; + { + E T16, T18, T15, T17; + T16 = cr[WS(rs, 15)]; + T18 = ci[WS(rs, 15)]; + T15 = W[28]; + T17 = W[29]; + T19 = FMA(T15, T16, T17 * T18); + T1V = FNMS(T17, T16, T15 * T18); + } + { + E T1m, T1o, T1l, T1n; + T1m = cr[WS(rs, 11)]; + T1o = ci[WS(rs, 11)]; + T1l = W[20]; + T1n = W[21]; + T1p = FMA(T1l, T1m, T1n * T1o); + T22 = FNMS(T1n, T1m, T1l * T1o); + } + { + E T1b, T1d, T1a, T1c; + T1b = cr[WS(rs, 7)]; + T1d = ci[WS(rs, 7)]; + T1a = W[12]; + T1c = W[13]; + T1e = FMA(T1a, T1b, T1c * T1d); + T1W = FNMS(T1c, T1b, T1a * T1d); + } + { + E T1h, T1j, T1g, T1i; + T1h = cr[WS(rs, 3)]; + T1j = ci[WS(rs, 3)]; + T1g = W[4]; + T1i = W[5]; + T1k = FMA(T1g, T1h, T1i * T1j); + T21 = FNMS(T1i, T1h, T1g * T1j); + } + T1f = T19 + T1e; + T1q = T1k + T1p; + T2B = T1f - T1q; + T2C = T1V + T1W; + T2D = T21 + T22; + T2E = T2C - T2D; + { + E T1X, T1Y, T20, T23; + T1X = T1V - T1W; + T1Y = T1k - T1p; + T1Z = T1X + T1Y; + T2k = T1X - T1Y; + T20 = T19 - T1e; + T23 = T21 - T22; + T24 = T20 - T23; + T2j = T20 + T23; + } + } + { + E TM, T1P, T12, T1M, TR, T1Q, TX, T1L; + { + E TJ, TL, TI, TK; + TJ = cr[WS(rs, 1)]; + TL = ci[WS(rs, 1)]; + TI = W[0]; + TK = W[1]; + TM = FMA(TI, TJ, TK * TL); + T1P = FNMS(TK, TJ, TI * TL); + } + { + E TZ, T11, TY, T10; + TZ = cr[WS(rs, 13)]; + T11 = ci[WS(rs, 13)]; + TY = W[24]; + T10 = W[25]; + T12 = FMA(TY, TZ, T10 * T11); + T1M = FNMS(T10, TZ, TY * T11); + } + { + E TO, TQ, TN, TP; + TO = cr[WS(rs, 9)]; + TQ = ci[WS(rs, 9)]; + TN = W[16]; + TP = W[17]; + TR = FMA(TN, TO, TP * TQ); + T1Q = FNMS(TP, TO, TN * TQ); + } + { + E TU, TW, TT, TV; + TU = cr[WS(rs, 5)]; + TW = ci[WS(rs, 5)]; + TT = W[8]; + TV = W[9]; + TX = FMA(TT, TU, TV * TW); + T1L = FNMS(TV, TU, TT * TW); + } + TS = TM + TR; + T13 = TX + T12; + T2w = TS - T13; + T2x = T1P + T1Q; + T2y = T1L + T1M; + T2z = T2x - T2y; + { + E T1K, T1N, T1R, T1S; + T1K = TM - TR; + T1N = T1L - T1M; + T1O = T1K - T1N; + T2h = T1K + T1N; + T1R = T1P - T1Q; + T1S = TX - T12; + T1T = T1R + T1S; + T2g = T1R - T1S; + } + } + { + E T1J, T27, T3a, T3c, T26, T3b, T2a, T35; + { + E T1x, T1I, T36, T39; + T1x = T1t - T1w; + T1I = KP707106781 * (T1C + T1H); + T1J = T1x + T1I; + T27 = T1x - T1I; + T36 = KP707106781 * (T2c - T2d); + T39 = T37 + T38; + T3a = T36 + T39; + T3c = T39 - T36; + } + { + E T1U, T25, T28, T29; + T1U = FNMS(KP382683432, T1T, KP923879532 * T1O); + T25 = FMA(KP382683432, T1Z, KP923879532 * T24); + T26 = T1U + T25; + T3b = T25 - T1U; + T28 = FMA(KP923879532, T1T, KP382683432 * T1O); + T29 = FNMS(KP923879532, T1Z, KP382683432 * T24); + T2a = T28 + T29; + T35 = T29 - T28; + } + cr[WS(rs, 7)] = T1J - T26; + cr[WS(rs, 11)] = T3b - T3c; + ci[WS(rs, 12)] = T3b + T3c; + ci[0] = T1J + T26; + ci[WS(rs, 4)] = T27 - T2a; + cr[WS(rs, 15)] = T35 - T3a; + ci[WS(rs, 8)] = T35 + T3a; + cr[WS(rs, 3)] = T27 + T2a; + } + { + E TH, T2L, T2W, T2Y, T1s, T2X, T2O, T2P; + { + E Tj, TG, T2Q, T2V; + Tj = T7 + Ti; + TG = Tu + TF; + TH = Tj + TG; + T2L = Tj - TG; + T2Q = T2t + T2s; + T2V = T2R + T2U; + T2W = T2Q + T2V; + T2Y = T2V - T2Q; + } + { + E T14, T1r, T2M, T2N; + T14 = TS + T13; + T1r = T1f + T1q; + T1s = T14 + T1r; + T2X = T1r - T14; + T2M = T2C + T2D; + T2N = T2x + T2y; + T2O = T2M - T2N; + T2P = T2N + T2M; + } + ci[WS(rs, 7)] = TH - T1s; + cr[WS(rs, 12)] = T2X - T2Y; + ci[WS(rs, 11)] = T2X + T2Y; + cr[0] = TH + T1s; + cr[WS(rs, 4)] = T2L - T2O; + cr[WS(rs, 8)] = T2P - T2W; + ci[WS(rs, 15)] = T2P + T2W; + ci[WS(rs, 3)] = T2L + T2O; + } + { + E T2f, T2n, T3g, T3i, T2m, T3h, T2q, T3d; + { + E T2b, T2e, T3e, T3f; + T2b = T1t + T1w; + T2e = KP707106781 * (T2c + T2d); + T2f = T2b + T2e; + T2n = T2b - T2e; + T3e = KP707106781 * (T1H - T1C); + T3f = T38 - T37; + T3g = T3e + T3f; + T3i = T3f - T3e; + } + { + E T2i, T2l, T2o, T2p; + T2i = FMA(KP382683432, T2g, KP923879532 * T2h); + T2l = FNMS(KP382683432, T2k, KP923879532 * T2j); + T2m = T2i + T2l; + T3h = T2l - T2i; + T2o = FNMS(KP923879532, T2g, KP382683432 * T2h); + T2p = FMA(KP923879532, T2k, KP382683432 * T2j); + T2q = T2o + T2p; + T3d = T2p - T2o; + } + ci[WS(rs, 6)] = T2f - T2m; + cr[WS(rs, 13)] = T3h - T3i; + ci[WS(rs, 10)] = T3h + T3i; + cr[WS(rs, 1)] = T2f + T2m; + cr[WS(rs, 5)] = T2n - T2q; + cr[WS(rs, 9)] = T3d - T3g; + ci[WS(rs, 14)] = T3d + T3g; + ci[WS(rs, 2)] = T2n + T2q; + } + { + E T2v, T2H, T32, T34, T2G, T2Z, T2K, T33; + { + E T2r, T2u, T30, T31; + T2r = T7 - Ti; + T2u = T2s - T2t; + T2v = T2r - T2u; + T2H = T2r + T2u; + T30 = Tu - TF; + T31 = T2U - T2R; + T32 = T30 + T31; + T34 = T31 - T30; + } + { + E T2A, T2F, T2I, T2J; + T2A = T2w + T2z; + T2F = T2B - T2E; + T2G = KP707106781 * (T2A + T2F); + T2Z = KP707106781 * (T2F - T2A); + T2I = T2w - T2z; + T2J = T2B + T2E; + T2K = KP707106781 * (T2I + T2J); + T33 = KP707106781 * (T2J - T2I); + } + ci[WS(rs, 5)] = T2v - T2G; + cr[WS(rs, 10)] = T33 - T34; + ci[WS(rs, 13)] = T33 + T34; + cr[WS(rs, 2)] = T2v + T2G; + cr[WS(rs, 6)] = T2H - T2K; + cr[WS(rs, 14)] = T2Z - T32; + ci[WS(rs, 9)] = T2Z + T32; + ci[WS(rs, 1)] = T2H + T2K; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 16}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 16, "hf_16", twinstr, &GENUS, {136, 46, 38, 0} }; + +void X(codelet_hf_16) (planner *p) { + X(khc2hc_register) (p, hf_16, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:09 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 2 -dit -name hf_2 -include hf.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 4 additions, 2 multiplications, 2 fused multiply/add), + * 11 stack variables, 0 constants, and 8 memory accesses + */ +#include "hf.h" + +static void hf_2(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 2); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 2, MAKE_VOLATILE_STRIDE(4, rs)) { + E T1, Ta, T3, T6, T2, T5; + T1 = cr[0]; + Ta = ci[0]; + T3 = cr[WS(rs, 1)]; + T6 = ci[WS(rs, 1)]; + T2 = W[0]; + T5 = W[1]; + { + E T8, T4, T9, T7; + T8 = T2 * T6; + T4 = T2 * T3; + T9 = FNMS(T5, T3, T8); + T7 = FMA(T5, T6, T4); + ci[WS(rs, 1)] = T9 + Ta; + cr[WS(rs, 1)] = T9 - Ta; + cr[0] = T1 + T7; + ci[0] = T1 - T7; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 2}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 2, "hf_2", twinstr, &GENUS, {4, 2, 2, 0} }; + +void X(codelet_hf_2) (planner *p) { + X(khc2hc_register) (p, hf_2, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 2 -dit -name hf_2 -include hf.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 4 additions, 2 multiplications, 2 fused multiply/add), + * 9 stack variables, 0 constants, and 8 memory accesses + */ +#include "hf.h" + +static void hf_2(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 2); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 2, MAKE_VOLATILE_STRIDE(4, rs)) { + E T1, T8, T6, T7; + T1 = cr[0]; + T8 = ci[0]; + { + E T3, T5, T2, T4; + T3 = cr[WS(rs, 1)]; + T5 = ci[WS(rs, 1)]; + T2 = W[0]; + T4 = W[1]; + T6 = FMA(T2, T3, T4 * T5); + T7 = FNMS(T4, T3, T2 * T5); + } + ci[0] = T1 - T6; + cr[0] = T1 + T6; + cr[WS(rs, 1)] = T7 - T8; + ci[WS(rs, 1)] = T7 + T8; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 2}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 2, "hf_2", twinstr, &GENUS, {4, 2, 2, 0} }; + +void X(codelet_hf_2) (planner *p) { + X(khc2hc_register) (p, hf_2, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1027 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:10 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 20 -dit -name hf_20 -include hf.h */ + +/* + * This function contains 246 FP additions, 148 FP multiplications, + * (or, 136 additions, 38 multiplications, 110 fused multiply/add), + * 100 stack variables, 4 constants, and 80 memory accesses + */ +#include "hf.h" + +static void hf_20(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 38, MAKE_VOLATILE_STRIDE(40, rs)) { + E T54, T5a, T5c, T56, T53, T55, T5b, T57; + { + E T4N, T4q, T8, T2i, T4r, T2n, T4O, Tl, T2v, T3v, T43, T4b, TN, T2b, T3F; + E T3a, T2R, T3z, T3T, T4f, T27, T2f, T3J, T3i, T2K, T3y, T3W, T4e, T1G, T2e; + E T3I, T3p, T2C, T3w, T40, T4c, T1e, T2c, T3G, T33; + { + E T1, T4p, T3, T6, T2, T5; + T1 = cr[0]; + T4p = ci[0]; + T3 = cr[WS(rs, 10)]; + T6 = ci[WS(rs, 10)]; + T2 = W[18]; + T5 = W[19]; + { + E Ta, Td, Tg, T2j, Tb, Tj, Tf, Tc, Ti; + { + E T4n, T4, T9, T4o, T7; + Ta = cr[WS(rs, 5)]; + Td = ci[WS(rs, 5)]; + T4n = T2 * T6; + T4 = T2 * T3; + T9 = W[8]; + Tg = cr[WS(rs, 15)]; + T4o = FNMS(T5, T3, T4n); + T7 = FMA(T5, T6, T4); + T2j = T9 * Td; + Tb = T9 * Ta; + T4N = T4p - T4o; + T4q = T4o + T4p; + T8 = T1 + T7; + T2i = T1 - T7; + Tj = ci[WS(rs, 15)]; + Tf = W[28]; + } + Tc = W[9]; + Ti = W[29]; + { + E T36, Ts, T2t, TL, TB, TE, TD, T38, Ty, T2q, TC; + { + E TH, TK, TJ, T2s, TI; + { + E To, Tr, Tp, T35, Tq, TG; + { + E T2k, Te, T2m, Tk, T2l, Th, Tn; + To = cr[WS(rs, 4)]; + T2l = Tf * Tj; + Th = Tf * Tg; + T2k = FNMS(Tc, Ta, T2j); + Te = FMA(Tc, Td, Tb); + T2m = FNMS(Ti, Tg, T2l); + Tk = FMA(Ti, Tj, Th); + Tr = ci[WS(rs, 4)]; + Tn = W[6]; + T4r = T2k + T2m; + T2n = T2k - T2m; + T4O = Te - Tk; + Tl = Te + Tk; + Tp = Tn * To; + T35 = Tn * Tr; + } + Tq = W[7]; + TH = cr[WS(rs, 19)]; + TK = ci[WS(rs, 19)]; + TG = W[36]; + T36 = FNMS(Tq, To, T35); + Ts = FMA(Tq, Tr, Tp); + TJ = W[37]; + T2s = TG * TK; + TI = TG * TH; + } + { + E Tu, Tx, Tt, Tw, T37, Tv, TA; + Tu = cr[WS(rs, 14)]; + Tx = ci[WS(rs, 14)]; + T2t = FNMS(TJ, TH, T2s); + TL = FMA(TJ, TK, TI); + Tt = W[26]; + Tw = W[27]; + TB = cr[WS(rs, 9)]; + TE = ci[WS(rs, 9)]; + T37 = Tt * Tx; + Tv = Tt * Tu; + TA = W[16]; + TD = W[17]; + T38 = FNMS(Tw, Tu, T37); + Ty = FMA(Tw, Tx, Tv); + T2q = TA * TE; + TC = TA * TB; + } + } + { + E T39, T42, Tz, T2p, T2r, TF; + T39 = T36 - T38; + T42 = T36 + T38; + Tz = Ts + Ty; + T2p = Ts - Ty; + T2r = FNMS(TD, TB, T2q); + TF = FMA(TD, TE, TC); + { + E T41, T2u, TM, T34; + T41 = T2r + T2t; + T2u = T2r - T2t; + TM = TF + TL; + T34 = TL - TF; + T2v = T2p - T2u; + T3v = T2p + T2u; + T43 = T41 - T42; + T4b = T42 + T41; + TN = Tz - TM; + T2b = Tz + TM; + T3F = T39 + T34; + T3a = T34 - T39; + } + } + } + } + } + { + E T3e, T1M, T2P, T25, T1V, T1Y, T1X, T3g, T1S, T2M, T1W; + { + E T21, T24, T23, T2O, T22; + { + E T1I, T1L, T1H, T1K, T3d, T1J, T20; + T1I = cr[WS(rs, 12)]; + T1L = ci[WS(rs, 12)]; + T1H = W[22]; + T1K = W[23]; + T21 = cr[WS(rs, 7)]; + T24 = ci[WS(rs, 7)]; + T3d = T1H * T1L; + T1J = T1H * T1I; + T20 = W[12]; + T23 = W[13]; + T3e = FNMS(T1K, T1I, T3d); + T1M = FMA(T1K, T1L, T1J); + T2O = T20 * T24; + T22 = T20 * T21; + } + { + E T1O, T1R, T1N, T1Q, T3f, T1P, T1U; + T1O = cr[WS(rs, 2)]; + T1R = ci[WS(rs, 2)]; + T2P = FNMS(T23, T21, T2O); + T25 = FMA(T23, T24, T22); + T1N = W[2]; + T1Q = W[3]; + T1V = cr[WS(rs, 17)]; + T1Y = ci[WS(rs, 17)]; + T3f = T1N * T1R; + T1P = T1N * T1O; + T1U = W[32]; + T1X = W[33]; + T3g = FNMS(T1Q, T1O, T3f); + T1S = FMA(T1Q, T1R, T1P); + T2M = T1U * T1Y; + T1W = T1U * T1V; + } + } + { + E T3h, T3S, T1T, T2L, T2N, T1Z; + T3h = T3e - T3g; + T3S = T3e + T3g; + T1T = T1M + T1S; + T2L = T1M - T1S; + T2N = FNMS(T1X, T1V, T2M); + T1Z = FMA(T1X, T1Y, T1W); + { + E T3R, T2Q, T26, T3c; + T3R = T2N + T2P; + T2Q = T2N - T2P; + T26 = T1Z + T25; + T3c = T25 - T1Z; + T2R = T2L - T2Q; + T3z = T2L + T2Q; + T3T = T3R - T3S; + T4f = T3S + T3R; + T27 = T1T - T26; + T2f = T1T + T26; + T3J = T3h + T3c; + T3i = T3c - T3h; + } + } + } + { + E T3l, T1l, T2I, T1E, T1u, T1x, T1w, T3n, T1r, T2F, T1v; + { + E T1A, T1D, T1C, T2H, T1B; + { + E T1h, T1k, T1g, T1j, T3k, T1i, T1z; + T1h = cr[WS(rs, 8)]; + T1k = ci[WS(rs, 8)]; + T1g = W[14]; + T1j = W[15]; + T1A = cr[WS(rs, 3)]; + T1D = ci[WS(rs, 3)]; + T3k = T1g * T1k; + T1i = T1g * T1h; + T1z = W[4]; + T1C = W[5]; + T3l = FNMS(T1j, T1h, T3k); + T1l = FMA(T1j, T1k, T1i); + T2H = T1z * T1D; + T1B = T1z * T1A; + } + { + E T1n, T1q, T1m, T1p, T3m, T1o, T1t; + T1n = cr[WS(rs, 18)]; + T1q = ci[WS(rs, 18)]; + T2I = FNMS(T1C, T1A, T2H); + T1E = FMA(T1C, T1D, T1B); + T1m = W[34]; + T1p = W[35]; + T1u = cr[WS(rs, 13)]; + T1x = ci[WS(rs, 13)]; + T3m = T1m * T1q; + T1o = T1m * T1n; + T1t = W[24]; + T1w = W[25]; + T3n = FNMS(T1p, T1n, T3m); + T1r = FMA(T1p, T1q, T1o); + T2F = T1t * T1x; + T1v = T1t * T1u; + } + } + { + E T3o, T3V, T1s, T2E, T2G, T1y; + T3o = T3l - T3n; + T3V = T3l + T3n; + T1s = T1l + T1r; + T2E = T1l - T1r; + T2G = FNMS(T1w, T1u, T2F); + T1y = FMA(T1w, T1x, T1v); + { + E T3U, T2J, T1F, T3j; + T3U = T2G + T2I; + T2J = T2G - T2I; + T1F = T1y + T1E; + T3j = T1E - T1y; + T2K = T2E - T2J; + T3y = T2E + T2J; + T3W = T3U - T3V; + T4e = T3V + T3U; + T1G = T1s - T1F; + T2e = T1s + T1F; + T3I = T3o + T3j; + T3p = T3j - T3o; + } + } + } + { + E T2Z, TT, T2A, T1c, T12, T15, T14, T31, TZ, T2x, T13; + { + E T18, T1b, T1a, T2z, T19; + { + E TP, TS, TO, TR, T2Y, TQ, T17; + TP = cr[WS(rs, 16)]; + TS = ci[WS(rs, 16)]; + TO = W[30]; + TR = W[31]; + T18 = cr[WS(rs, 11)]; + T1b = ci[WS(rs, 11)]; + T2Y = TO * TS; + TQ = TO * TP; + T17 = W[20]; + T1a = W[21]; + T2Z = FNMS(TR, TP, T2Y); + TT = FMA(TR, TS, TQ); + T2z = T17 * T1b; + T19 = T17 * T18; + } + { + E TV, TY, TU, TX, T30, TW, T11; + TV = cr[WS(rs, 6)]; + TY = ci[WS(rs, 6)]; + T2A = FNMS(T1a, T18, T2z); + T1c = FMA(T1a, T1b, T19); + TU = W[10]; + TX = W[11]; + T12 = cr[WS(rs, 1)]; + T15 = ci[WS(rs, 1)]; + T30 = TU * TY; + TW = TU * TV; + T11 = W[0]; + T14 = W[1]; + T31 = FNMS(TX, TV, T30); + TZ = FMA(TX, TY, TW); + T2x = T11 * T15; + T13 = T11 * T12; + } + } + { + E T32, T3Z, T10, T2w, T2y, T16; + T32 = T2Z - T31; + T3Z = T2Z + T31; + T10 = TT + TZ; + T2w = TT - TZ; + T2y = FNMS(T14, T12, T2x); + T16 = FMA(T14, T15, T13); + { + E T3Y, T2B, T1d, T2X; + T3Y = T2y + T2A; + T2B = T2y - T2A; + T1d = T16 + T1c; + T2X = T1c - T16; + T2C = T2w - T2B; + T3w = T2w + T2B; + T40 = T3Y - T3Z; + T4c = T3Z + T3Y; + T1e = T10 - T1d; + T2c = T10 + T1d; + T3G = T32 + T2X; + T33 = T2X - T32; + } + } + } + { + E T4l, T4k, T4w, T4x, T4Q, T4R, T2o, T4X, T4W, T4C, T4D, T4J, T4h, T4j, T4I; + E T51, T52, T49, T3r, T3t, T58, T2D, T48, T2S, T59; + { + E T2a, T47, T45, T3u, T3x, T3N, T3L, T3A, T46, T3Q; + { + E Tm, T1f, T28, T3X, T44; + T4l = T3W + T3T; + T3X = T3T - T3W; + T44 = T40 - T43; + T4k = T43 + T40; + T2a = T8 + Tl; + Tm = T8 - Tl; + T1f = TN + T1e; + T4w = T1e - TN; + T4x = T1G - T27; + T28 = T1G + T27; + T47 = FMA(KP618033988, T3X, T44); + T45 = FNMS(KP618033988, T44, T3X); + { + E T3H, T29, T3P, T3K, T3O; + T3H = T3F - T3G; + T4Q = T3F + T3G; + T29 = T1f + T28; + T3P = T1f - T28; + T4R = T3I + T3J; + T3K = T3I - T3J; + T3u = T2i + T2n; + T2o = T2i - T2n; + T4X = T3v - T3w; + T3x = T3v + T3w; + ci[WS(rs, 9)] = Tm + T29; + T3O = FNMS(KP250000000, T29, Tm); + T3N = FNMS(KP618033988, T3H, T3K); + T3L = FMA(KP618033988, T3K, T3H); + T3A = T3y + T3z; + T4W = T3y - T3z; + T46 = FMA(KP559016994, T3P, T3O); + T3Q = FNMS(KP559016994, T3P, T3O); + } + } + { + E T2d, T2g, T3b, T3q, T2h; + { + E T4d, T3D, T3C, T4g, T3B, T3M, T3E; + T4C = T4b + T4c; + T4d = T4b - T4c; + T3D = T3x - T3A; + T3B = T3x + T3A; + ci[WS(rs, 1)] = FMA(KP951056516, T45, T3Q); + cr[WS(rs, 2)] = FNMS(KP951056516, T45, T3Q); + cr[WS(rs, 6)] = FMA(KP951056516, T47, T46); + ci[WS(rs, 5)] = FNMS(KP951056516, T47, T46); + cr[WS(rs, 5)] = T3u + T3B; + T3C = FNMS(KP250000000, T3B, T3u); + T4g = T4e - T4f; + T4D = T4e + T4f; + T2d = T2b + T2c; + T4J = T2b - T2c; + T3M = FNMS(KP559016994, T3D, T3C); + T3E = FMA(KP559016994, T3D, T3C); + T4h = FMA(KP618033988, T4g, T4d); + T4j = FNMS(KP618033988, T4d, T4g); + cr[WS(rs, 9)] = FNMS(KP951056516, T3L, T3E); + cr[WS(rs, 1)] = FMA(KP951056516, T3L, T3E); + ci[WS(rs, 6)] = FMA(KP951056516, T3N, T3M); + ci[WS(rs, 2)] = FNMS(KP951056516, T3N, T3M); + T4I = T2f - T2e; + T2g = T2e + T2f; + } + T3b = T33 - T3a; + T51 = T3a + T33; + T52 = T3p + T3i; + T3q = T3i - T3p; + T2h = T2d + T2g; + T49 = T2d - T2g; + T3r = FMA(KP618033988, T3q, T3b); + T3t = FNMS(KP618033988, T3b, T3q); + T58 = T2v - T2C; + T2D = T2v + T2C; + cr[0] = T2a + T2h; + T48 = FNMS(KP250000000, T2h, T2a); + T2S = T2K + T2R; + T59 = T2K - T2R; + } + } + { + E T4B, T4P, T4Y, T50, T4U, T4S; + { + E T4A, T4y, T4s, T4m, T4u, T4t, T4z, T4v; + { + E T2V, T2U, T4i, T4a, T2T, T2W, T3s; + T4i = FNMS(KP559016994, T49, T48); + T4a = FMA(KP559016994, T49, T48); + T2T = T2D + T2S; + T2V = T2D - T2S; + ci[WS(rs, 3)] = FMA(KP951056516, T4h, T4a); + cr[WS(rs, 4)] = FNMS(KP951056516, T4h, T4a); + cr[WS(rs, 8)] = FMA(KP951056516, T4j, T4i); + ci[WS(rs, 7)] = FNMS(KP951056516, T4j, T4i); + ci[WS(rs, 4)] = T2o + T2T; + T2U = FNMS(KP250000000, T2T, T2o); + T4A = FMA(KP618033988, T4w, T4x); + T4y = FNMS(KP618033988, T4x, T4w); + T4B = T4r + T4q; + T4s = T4q - T4r; + T2W = FMA(KP559016994, T2V, T2U); + T3s = FNMS(KP559016994, T2V, T2U); + ci[WS(rs, 8)] = FMA(KP951056516, T3r, T2W); + ci[0] = FNMS(KP951056516, T3r, T2W); + cr[WS(rs, 7)] = FNMS(KP951056516, T3t, T3s); + cr[WS(rs, 3)] = FMA(KP951056516, T3t, T3s); + T4m = T4k + T4l; + T4u = T4l - T4k; + } + cr[WS(rs, 10)] = T4m - T4s; + T4t = FMA(KP250000000, T4m, T4s); + T4P = T4N - T4O; + T54 = T4O + T4N; + T4Y = FNMS(KP618033988, T4X, T4W); + T50 = FMA(KP618033988, T4W, T4X); + T4z = FNMS(KP559016994, T4u, T4t); + T4v = FMA(KP559016994, T4u, T4t); + ci[WS(rs, 13)] = FMA(KP951056516, T4y, T4v); + cr[WS(rs, 14)] = FMS(KP951056516, T4y, T4v); + ci[WS(rs, 17)] = FMA(KP951056516, T4A, T4z); + cr[WS(rs, 18)] = FMS(KP951056516, T4A, T4z); + T4U = T4Q - T4R; + T4S = T4Q + T4R; + } + { + E T4M, T4K, T4E, T4G, T4T, T4V, T4Z, T4F, T4L, T4H; + ci[WS(rs, 14)] = T4S + T4P; + T4T = FNMS(KP250000000, T4S, T4P); + T4M = FNMS(KP618033988, T4I, T4J); + T4K = FMA(KP618033988, T4J, T4I); + T4V = FNMS(KP559016994, T4U, T4T); + T4Z = FMA(KP559016994, T4U, T4T); + cr[WS(rs, 17)] = -(FMA(KP951056516, T4Y, T4V)); + cr[WS(rs, 13)] = FMS(KP951056516, T4Y, T4V); + ci[WS(rs, 18)] = FNMS(KP951056516, T50, T4Z); + ci[WS(rs, 10)] = FMA(KP951056516, T50, T4Z); + T4E = T4C + T4D; + T4G = T4C - T4D; + ci[WS(rs, 19)] = T4E + T4B; + T4F = FNMS(KP250000000, T4E, T4B); + T5a = FMA(KP618033988, T59, T58); + T5c = FNMS(KP618033988, T58, T59); + T4L = FMA(KP559016994, T4G, T4F); + T4H = FNMS(KP559016994, T4G, T4F); + ci[WS(rs, 11)] = FMA(KP951056516, T4K, T4H); + cr[WS(rs, 12)] = FMS(KP951056516, T4K, T4H); + ci[WS(rs, 15)] = FMA(KP951056516, T4M, T4L); + cr[WS(rs, 16)] = FMS(KP951056516, T4M, T4L); + T56 = T51 - T52; + T53 = T51 + T52; + } + } + } + } + cr[WS(rs, 15)] = T53 - T54; + T55 = FMA(KP250000000, T53, T54); + T5b = FMA(KP559016994, T56, T55); + T57 = FNMS(KP559016994, T56, T55); + cr[WS(rs, 19)] = -(FMA(KP951056516, T5a, T57)); + cr[WS(rs, 11)] = FMS(KP951056516, T5a, T57); + ci[WS(rs, 16)] = FNMS(KP951056516, T5c, T5b); + ci[WS(rs, 12)] = FMA(KP951056516, T5c, T5b); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 20}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 20, "hf_20", twinstr, &GENUS, {136, 38, 110, 0} }; + +void X(codelet_hf_20) (planner *p) { + X(khc2hc_register) (p, hf_20, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 20 -dit -name hf_20 -include hf.h */ + +/* + * This function contains 246 FP additions, 124 FP multiplications, + * (or, 184 additions, 62 multiplications, 62 fused multiply/add), + * 85 stack variables, 4 constants, and 80 memory accesses + */ +#include "hf.h" + +static void hf_20(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 38, MAKE_VOLATILE_STRIDE(40, rs)) { + E Tj, T1R, T4j, T4s, T2q, T37, T3Q, T42, T1r, T1O, T1P, T3i, T3l, T3J, T3D; + E T3E, T44, T1V, T1W, T1X, T2e, T2j, T2k, T2W, T2X, T4f, T33, T34, T35, T2J; + E T2O, T4q, TG, T13, T14, T3p, T3s, T3K, T3A, T3B, T43, T1S, T1T, T1U, T23; + E T28, T29, T2T, T2U, T4e, T30, T31, T32, T2y, T2D, T4p; + { + E T1, T3N, T6, T3M, Tc, T2n, Th, T2o; + T1 = cr[0]; + T3N = ci[0]; + { + E T3, T5, T2, T4; + T3 = cr[WS(rs, 10)]; + T5 = ci[WS(rs, 10)]; + T2 = W[18]; + T4 = W[19]; + T6 = FMA(T2, T3, T4 * T5); + T3M = FNMS(T4, T3, T2 * T5); + } + { + E T9, Tb, T8, Ta; + T9 = cr[WS(rs, 5)]; + Tb = ci[WS(rs, 5)]; + T8 = W[8]; + Ta = W[9]; + Tc = FMA(T8, T9, Ta * Tb); + T2n = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = cr[WS(rs, 15)]; + Tg = ci[WS(rs, 15)]; + Td = W[28]; + Tf = W[29]; + Th = FMA(Td, Te, Tf * Tg); + T2o = FNMS(Tf, Te, Td * Tg); + } + { + E T7, Ti, T4h, T4i; + T7 = T1 + T6; + Ti = Tc + Th; + Tj = T7 - Ti; + T1R = T7 + Ti; + T4h = T3N - T3M; + T4i = Tc - Th; + T4j = T4h - T4i; + T4s = T4i + T4h; + } + { + E T2m, T2p, T3O, T3P; + T2m = T1 - T6; + T2p = T2n - T2o; + T2q = T2m - T2p; + T37 = T2m + T2p; + T3O = T3M + T3N; + T3P = T2n + T2o; + T3Q = T3O - T3P; + T42 = T3P + T3O; + } + } + { + E T1f, T3g, T2a, T2H, T1N, T3j, T2i, T2N, T1q, T3h, T2d, T2I, T1C, T3k, T2f; + E T2M; + { + E T19, T2F, T1e, T2G; + { + E T16, T18, T15, T17; + T16 = cr[WS(rs, 8)]; + T18 = ci[WS(rs, 8)]; + T15 = W[14]; + T17 = W[15]; + T19 = FMA(T15, T16, T17 * T18); + T2F = FNMS(T17, T16, T15 * T18); + } + { + E T1b, T1d, T1a, T1c; + T1b = cr[WS(rs, 18)]; + T1d = ci[WS(rs, 18)]; + T1a = W[34]; + T1c = W[35]; + T1e = FMA(T1a, T1b, T1c * T1d); + T2G = FNMS(T1c, T1b, T1a * T1d); + } + T1f = T19 + T1e; + T3g = T2F + T2G; + T2a = T19 - T1e; + T2H = T2F - T2G; + } + { + E T1H, T2g, T1M, T2h; + { + E T1E, T1G, T1D, T1F; + T1E = cr[WS(rs, 17)]; + T1G = ci[WS(rs, 17)]; + T1D = W[32]; + T1F = W[33]; + T1H = FMA(T1D, T1E, T1F * T1G); + T2g = FNMS(T1F, T1E, T1D * T1G); + } + { + E T1J, T1L, T1I, T1K; + T1J = cr[WS(rs, 7)]; + T1L = ci[WS(rs, 7)]; + T1I = W[12]; + T1K = W[13]; + T1M = FMA(T1I, T1J, T1K * T1L); + T2h = FNMS(T1K, T1J, T1I * T1L); + } + T1N = T1H + T1M; + T3j = T2g + T2h; + T2i = T2g - T2h; + T2N = T1H - T1M; + } + { + E T1k, T2b, T1p, T2c; + { + E T1h, T1j, T1g, T1i; + T1h = cr[WS(rs, 13)]; + T1j = ci[WS(rs, 13)]; + T1g = W[24]; + T1i = W[25]; + T1k = FMA(T1g, T1h, T1i * T1j); + T2b = FNMS(T1i, T1h, T1g * T1j); + } + { + E T1m, T1o, T1l, T1n; + T1m = cr[WS(rs, 3)]; + T1o = ci[WS(rs, 3)]; + T1l = W[4]; + T1n = W[5]; + T1p = FMA(T1l, T1m, T1n * T1o); + T2c = FNMS(T1n, T1m, T1l * T1o); + } + T1q = T1k + T1p; + T3h = T2b + T2c; + T2d = T2b - T2c; + T2I = T1k - T1p; + } + { + E T1w, T2K, T1B, T2L; + { + E T1t, T1v, T1s, T1u; + T1t = cr[WS(rs, 12)]; + T1v = ci[WS(rs, 12)]; + T1s = W[22]; + T1u = W[23]; + T1w = FMA(T1s, T1t, T1u * T1v); + T2K = FNMS(T1u, T1t, T1s * T1v); + } + { + E T1y, T1A, T1x, T1z; + T1y = cr[WS(rs, 2)]; + T1A = ci[WS(rs, 2)]; + T1x = W[2]; + T1z = W[3]; + T1B = FMA(T1x, T1y, T1z * T1A); + T2L = FNMS(T1z, T1y, T1x * T1A); + } + T1C = T1w + T1B; + T3k = T2K + T2L; + T2f = T1w - T1B; + T2M = T2K - T2L; + } + T1r = T1f - T1q; + T1O = T1C - T1N; + T1P = T1r + T1O; + T3i = T3g - T3h; + T3l = T3j - T3k; + T3J = T3l - T3i; + T3D = T3g + T3h; + T3E = T3k + T3j; + T44 = T3D + T3E; + T1V = T1f + T1q; + T1W = T1C + T1N; + T1X = T1V + T1W; + T2e = T2a - T2d; + T2j = T2f - T2i; + T2k = T2e + T2j; + T2W = T2H - T2I; + T2X = T2M - T2N; + T4f = T2W + T2X; + T33 = T2a + T2d; + T34 = T2f + T2i; + T35 = T33 + T34; + T2J = T2H + T2I; + T2O = T2M + T2N; + T4q = T2J + T2O; + } + { + E Tu, T3n, T1Z, T2w, T12, T3r, T27, T2z, TF, T3o, T22, T2x, TR, T3q, T24; + E T2C; + { + E To, T2u, Tt, T2v; + { + E Tl, Tn, Tk, Tm; + Tl = cr[WS(rs, 4)]; + Tn = ci[WS(rs, 4)]; + Tk = W[6]; + Tm = W[7]; + To = FMA(Tk, Tl, Tm * Tn); + T2u = FNMS(Tm, Tl, Tk * Tn); + } + { + E Tq, Ts, Tp, Tr; + Tq = cr[WS(rs, 14)]; + Ts = ci[WS(rs, 14)]; + Tp = W[26]; + Tr = W[27]; + Tt = FMA(Tp, Tq, Tr * Ts); + T2v = FNMS(Tr, Tq, Tp * Ts); + } + Tu = To + Tt; + T3n = T2u + T2v; + T1Z = To - Tt; + T2w = T2u - T2v; + } + { + E TW, T25, T11, T26; + { + E TT, TV, TS, TU; + TT = cr[WS(rs, 1)]; + TV = ci[WS(rs, 1)]; + TS = W[0]; + TU = W[1]; + TW = FMA(TS, TT, TU * TV); + T25 = FNMS(TU, TT, TS * TV); + } + { + E TY, T10, TX, TZ; + TY = cr[WS(rs, 11)]; + T10 = ci[WS(rs, 11)]; + TX = W[20]; + TZ = W[21]; + T11 = FMA(TX, TY, TZ * T10); + T26 = FNMS(TZ, TY, TX * T10); + } + T12 = TW + T11; + T3r = T25 + T26; + T27 = T25 - T26; + T2z = T11 - TW; + } + { + E Tz, T20, TE, T21; + { + E Tw, Ty, Tv, Tx; + Tw = cr[WS(rs, 9)]; + Ty = ci[WS(rs, 9)]; + Tv = W[16]; + Tx = W[17]; + Tz = FMA(Tv, Tw, Tx * Ty); + T20 = FNMS(Tx, Tw, Tv * Ty); + } + { + E TB, TD, TA, TC; + TB = cr[WS(rs, 19)]; + TD = ci[WS(rs, 19)]; + TA = W[36]; + TC = W[37]; + TE = FMA(TA, TB, TC * TD); + T21 = FNMS(TC, TB, TA * TD); + } + TF = Tz + TE; + T3o = T20 + T21; + T22 = T20 - T21; + T2x = Tz - TE; + } + { + E TL, T2A, TQ, T2B; + { + E TI, TK, TH, TJ; + TI = cr[WS(rs, 16)]; + TK = ci[WS(rs, 16)]; + TH = W[30]; + TJ = W[31]; + TL = FMA(TH, TI, TJ * TK); + T2A = FNMS(TJ, TI, TH * TK); + } + { + E TN, TP, TM, TO; + TN = cr[WS(rs, 6)]; + TP = ci[WS(rs, 6)]; + TM = W[10]; + TO = W[11]; + TQ = FMA(TM, TN, TO * TP); + T2B = FNMS(TO, TN, TM * TP); + } + TR = TL + TQ; + T3q = T2A + T2B; + T24 = TL - TQ; + T2C = T2A - T2B; + } + TG = Tu - TF; + T13 = TR - T12; + T14 = TG + T13; + T3p = T3n - T3o; + T3s = T3q - T3r; + T3K = T3p + T3s; + T3A = T3n + T3o; + T3B = T3q + T3r; + T43 = T3A + T3B; + T1S = Tu + TF; + T1T = TR + T12; + T1U = T1S + T1T; + T23 = T1Z - T22; + T28 = T24 - T27; + T29 = T23 + T28; + T2T = T2w - T2x; + T2U = T2C + T2z; + T4e = T2T + T2U; + T30 = T1Z + T22; + T31 = T24 + T27; + T32 = T30 + T31; + T2y = T2w + T2x; + T2D = T2z - T2C; + T4p = T2D - T2y; + } + { + E T3e, T1Q, T3d, T3u, T3w, T3m, T3t, T3v, T3f; + T3e = KP559016994 * (T14 - T1P); + T1Q = T14 + T1P; + T3d = FNMS(KP250000000, T1Q, Tj); + T3m = T3i + T3l; + T3t = T3p - T3s; + T3u = FNMS(KP587785252, T3t, KP951056516 * T3m); + T3w = FMA(KP951056516, T3t, KP587785252 * T3m); + ci[WS(rs, 9)] = Tj + T1Q; + T3v = T3e + T3d; + ci[WS(rs, 5)] = T3v - T3w; + cr[WS(rs, 6)] = T3v + T3w; + T3f = T3d - T3e; + cr[WS(rs, 2)] = T3f - T3u; + ci[WS(rs, 1)] = T3f + T3u; + } + { + E T36, T38, T39, T2Z, T3c, T2V, T2Y, T3b, T3a; + T36 = KP559016994 * (T32 - T35); + T38 = T32 + T35; + T39 = FNMS(KP250000000, T38, T37); + T2V = T2T - T2U; + T2Y = T2W - T2X; + T2Z = FMA(KP951056516, T2V, KP587785252 * T2Y); + T3c = FNMS(KP587785252, T2V, KP951056516 * T2Y); + cr[WS(rs, 5)] = T37 + T38; + T3b = T39 - T36; + ci[WS(rs, 2)] = T3b - T3c; + ci[WS(rs, 6)] = T3c + T3b; + T3a = T36 + T39; + cr[WS(rs, 1)] = T2Z + T3a; + cr[WS(rs, 9)] = T3a - T2Z; + } + { + E T3x, T1Y, T3y, T3G, T3I, T3C, T3F, T3H, T3z; + T3x = KP559016994 * (T1U - T1X); + T1Y = T1U + T1X; + T3y = FNMS(KP250000000, T1Y, T1R); + T3C = T3A - T3B; + T3F = T3D - T3E; + T3G = FMA(KP951056516, T3C, KP587785252 * T3F); + T3I = FNMS(KP587785252, T3C, KP951056516 * T3F); + cr[0] = T1R + T1Y; + T3H = T3y - T3x; + ci[WS(rs, 7)] = T3H - T3I; + cr[WS(rs, 8)] = T3H + T3I; + T3z = T3x + T3y; + cr[WS(rs, 4)] = T3z - T3G; + ci[WS(rs, 3)] = T3z + T3G; + } + { + E T2l, T2r, T2s, T2Q, T2R, T2E, T2P, T2S, T2t; + T2l = KP559016994 * (T29 - T2k); + T2r = T29 + T2k; + T2s = FNMS(KP250000000, T2r, T2q); + T2E = T2y + T2D; + T2P = T2J - T2O; + T2Q = FMA(KP951056516, T2E, KP587785252 * T2P); + T2R = FNMS(KP587785252, T2E, KP951056516 * T2P); + ci[WS(rs, 4)] = T2q + T2r; + T2S = T2s - T2l; + cr[WS(rs, 3)] = T2R + T2S; + cr[WS(rs, 7)] = T2S - T2R; + T2t = T2l + T2s; + ci[0] = T2t - T2Q; + ci[WS(rs, 8)] = T2Q + T2t; + } + { + E T3U, T3L, T3V, T3T, T3X, T3R, T3S, T3Y, T3W; + T3U = KP559016994 * (T3K + T3J); + T3L = T3J - T3K; + T3V = FMA(KP250000000, T3L, T3Q); + T3R = T13 - TG; + T3S = T1r - T1O; + T3T = FNMS(KP587785252, T3S, KP951056516 * T3R); + T3X = FMA(KP587785252, T3R, KP951056516 * T3S); + cr[WS(rs, 10)] = T3L - T3Q; + T3Y = T3V - T3U; + cr[WS(rs, 18)] = T3X - T3Y; + ci[WS(rs, 17)] = T3X + T3Y; + T3W = T3U + T3V; + cr[WS(rs, 14)] = T3T - T3W; + ci[WS(rs, 13)] = T3T + T3W; + } + { + E T4g, T4k, T4l, T4d, T4n, T4b, T4c, T4o, T4m; + T4g = KP559016994 * (T4e - T4f); + T4k = T4e + T4f; + T4l = FNMS(KP250000000, T4k, T4j); + T4b = T33 - T34; + T4c = T30 - T31; + T4d = FNMS(KP587785252, T4c, KP951056516 * T4b); + T4n = FMA(KP951056516, T4c, KP587785252 * T4b); + ci[WS(rs, 14)] = T4k + T4j; + T4o = T4g + T4l; + ci[WS(rs, 10)] = T4n + T4o; + ci[WS(rs, 18)] = T4o - T4n; + T4m = T4g - T4l; + cr[WS(rs, 13)] = T4d + T4m; + cr[WS(rs, 17)] = T4m - T4d; + } + { + E T47, T45, T46, T41, T49, T3Z, T40, T4a, T48; + T47 = KP559016994 * (T43 - T44); + T45 = T43 + T44; + T46 = FNMS(KP250000000, T45, T42); + T3Z = T1S - T1T; + T40 = T1V - T1W; + T41 = FNMS(KP951056516, T40, KP587785252 * T3Z); + T49 = FMA(KP951056516, T3Z, KP587785252 * T40); + ci[WS(rs, 19)] = T45 + T42; + T4a = T47 + T46; + cr[WS(rs, 16)] = T49 - T4a; + ci[WS(rs, 15)] = T49 + T4a; + T48 = T46 - T47; + cr[WS(rs, 12)] = T41 - T48; + ci[WS(rs, 11)] = T41 + T48; + } + { + E T4w, T4r, T4x, T4v, T4z, T4t, T4u, T4A, T4y; + T4w = KP559016994 * (T4p + T4q); + T4r = T4p - T4q; + T4x = FMA(KP250000000, T4r, T4s); + T4t = T23 - T28; + T4u = T2e - T2j; + T4v = FMA(KP951056516, T4t, KP587785252 * T4u); + T4z = FNMS(KP587785252, T4t, KP951056516 * T4u); + cr[WS(rs, 15)] = T4r - T4s; + T4A = T4w + T4x; + ci[WS(rs, 12)] = T4z + T4A; + ci[WS(rs, 16)] = T4A - T4z; + T4y = T4w - T4x; + cr[WS(rs, 11)] = T4v + T4y; + cr[WS(rs, 19)] = T4y - T4v; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 20}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 20, "hf_20", twinstr, &GENUS, {184, 62, 62, 0} }; + +void X(codelet_hf_20) (planner *p) { + X(khc2hc_register) (p, hf_20, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1573 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:11 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 25 -dit -name hf_25 -include hf.h */ + +/* + * This function contains 400 FP additions, 364 FP multiplications, + * (or, 84 additions, 48 multiplications, 316 fused multiply/add), + * 178 stack variables, 47 constants, and 100 memory accesses + */ +#include "hf.h" + +static void hf_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP949179823, +0.949179823508441261575555465843363271711583843); + DK(KP860541664, +0.860541664367944677098261680920518816412804187); + DK(KP621716863, +0.621716863012209892444754556304102309693593202); + DK(KP614372930, +0.614372930789563808870829930444362096004872855); + DK(KP557913902, +0.557913902031834264187699648465567037992437152); + DK(KP249506682, +0.249506682107067890488084201715862638334226305); + DK(KP560319534, +0.560319534973832390111614715371676131169633784); + DK(KP681693190, +0.681693190061530575150324149145440022633095390); + DK(KP906616052, +0.906616052148196230441134447086066874408359177); + DK(KP968479752, +0.968479752739016373193524836781420152702090879); + DK(KP845997307, +0.845997307939530944175097360758058292389769300); + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP994076283, +0.994076283785401014123185814696322018529298887); + DK(KP734762448, +0.734762448793050413546343770063151342619912334); + DK(KP772036680, +0.772036680810363904029489473607579825330539880); + DK(KP062914667, +0.062914667253649757225485955897349402364686947); + DK(KP833417178, +0.833417178328688677408962550243238843138996060); + DK(KP921177326, +0.921177326965143320250447435415066029359282231); + DK(KP541454447, +0.541454447536312777046285590082819509052033189); + DK(KP803003575, +0.803003575438660414833440593570376004635464850); + DK(KP943557151, +0.943557151597354104399655195398983005179443399); + DK(KP554608978, +0.554608978404018097464974850792216217022558774); + DK(KP242145790, +0.242145790282157779872542093866183953459003101); + DK(KP559154169, +0.559154169276087864842202529084232643714075927); + DK(KP683113946, +0.683113946453479238701949862233725244439656928); + DK(KP248028675, +0.248028675328619457762448260696444630363259177); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP525970792, +0.525970792408939708442463226536226366643874659); + DK(KP726211448, +0.726211448929902658173535992263577167607493062); + DK(KP904730450, +0.904730450839922351881287709692877908104763647); + DK(KP831864738, +0.831864738706457140726048799369896829771167132); + DK(KP871714437, +0.871714437527667770979999223229522602943903653); + DK(KP549754652, +0.549754652192770074288023275540779861653779767); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP939062505, +0.939062505817492352556001843133229685779824606); + DK(KP256756360, +0.256756360367726783319498520922669048172391148); + DK(KP851038619, +0.851038619207379630836264138867114231259902550); + DK(KP912575812, +0.912575812670962425556968549836277086778922727); + DK(KP912018591, +0.912018591466481957908415381764119056233607330); + DK(KP634619297, +0.634619297544148100711287640319130485732531031); + DK(KP470564281, +0.470564281212251493087595091036643380879947982); + DK(KP827271945, +0.827271945972475634034355757144307982555673741); + DK(KP126329378, +0.126329378446108174786050455341811215027378105); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 48); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 48, MAKE_VOLATILE_STRIDE(50, rs)) { + E T7i, T6o, T6m, T7o, T7m, T7h, T6n, T6f, T7j, T7n; + { + E T6W, T5G, T3Y, T3M, T7q, T70, T6V, T7P, Tt, T3L, T5T, T45, T5Q, T4c, T3G; + E T2G, T5P, T49, T5S, T42, T65, T4H, T68, T4A, T2Z, T11, T67, T4x, T64, T4E; + E T5Y, T4W, T61, T4P, T3d, T1z, T60, T4M, T5X, T4T, T3g, T1G, T3q, T4q, T4j; + E T26, T3i, T1M, T3k, T1S; + { + E T3u, T2e, T3E, T44, T4b, T2E, T3w, T2k, T3y, T2q; + { + E T1, T6R, T3P, T7, T3W, Tq, T9, Tc, Tb, T3U, Tk, T3Q, Ta; + { + E T3, T6, T2, T5; + T1 = cr[0]; + T6R = ci[0]; + T3 = cr[WS(rs, 5)]; + T6 = ci[WS(rs, 5)]; + T2 = W[8]; + T5 = W[9]; + { + E Tm, Tp, To, T3V, Tn, T3O, T4, Tl; + Tm = cr[WS(rs, 15)]; + Tp = ci[WS(rs, 15)]; + T3O = T2 * T6; + T4 = T2 * T3; + Tl = W[28]; + To = W[29]; + T3P = FNMS(T5, T3, T3O); + T7 = FMA(T5, T6, T4); + T3V = Tl * Tp; + Tn = Tl * Tm; + { + E Tg, Tj, Tf, Ti, T3T, Th, T8; + Tg = cr[WS(rs, 10)]; + Tj = ci[WS(rs, 10)]; + T3W = FNMS(To, Tm, T3V); + Tq = FMA(To, Tp, Tn); + Tf = W[18]; + Ti = W[19]; + T9 = cr[WS(rs, 20)]; + Tc = ci[WS(rs, 20)]; + T3T = Tf * Tj; + Th = Tf * Tg; + T8 = W[38]; + Tb = W[39]; + T3U = FNMS(Ti, Tg, T3T); + Tk = FMA(Ti, Tj, Th); + T3Q = T8 * Tc; + Ta = T8 * T9; + } + } + } + { + E T6T, T3X, T6Y, Tr, T3R, Td; + T6T = T3U + T3W; + T3X = T3U - T3W; + T6Y = Tk - Tq; + Tr = Tk + Tq; + T3R = FNMS(Tb, T9, T3Q); + Td = FMA(Tb, Tc, Ta); + { + E T3S, T6Z, Te, T6U, T6S, Ts; + T3S = T3P - T3R; + T6S = T3P + T3R; + T6Z = T7 - Td; + Te = T7 + Td; + T6W = T6S - T6T; + T6U = T6S + T6T; + T5G = FNMS(KP618033988, T3S, T3X); + T3Y = FMA(KP618033988, T3X, T3S); + T3M = Te - Tr; + Ts = Te + Tr; + T7q = FMA(KP618033988, T6Y, T6Z); + T70 = FNMS(KP618033988, T6Z, T6Y); + T6V = FNMS(KP250000000, T6U, T6R); + T7P = T6U + T6R; + Tt = T1 + Ts; + T3L = FNMS(KP250000000, Ts, T1); + } + } + } + { + E T2g, T2j, T2m, T3v, T2h, T2p, T2l, T2i, T2o, T3x, T2n; + { + E T2a, T2d, T29, T2c; + T2a = cr[WS(rs, 3)]; + T2d = ci[WS(rs, 3)]; + T29 = W[4]; + T2c = W[5]; + { + E T2t, T2w, T2z, T3A, T2u, T2C, T2y, T2v, T2B, T3t, T2b, T2s, T2f; + T2t = cr[WS(rs, 13)]; + T2w = ci[WS(rs, 13)]; + T3t = T29 * T2d; + T2b = T29 * T2a; + T2s = W[24]; + T2z = cr[WS(rs, 18)]; + T3u = FNMS(T2c, T2a, T3t); + T2e = FMA(T2c, T2d, T2b); + T3A = T2s * T2w; + T2u = T2s * T2t; + T2C = ci[WS(rs, 18)]; + T2y = W[34]; + T2v = W[25]; + T2B = W[35]; + { + E T3B, T2x, T3D, T2D, T3C, T2A; + T2g = cr[WS(rs, 8)]; + T3C = T2y * T2C; + T2A = T2y * T2z; + T3B = FNMS(T2v, T2t, T3A); + T2x = FMA(T2v, T2w, T2u); + T3D = FNMS(T2B, T2z, T3C); + T2D = FMA(T2B, T2C, T2A); + T2j = ci[WS(rs, 8)]; + T2f = W[14]; + T3E = T3B + T3D; + T44 = T3D - T3B; + T4b = T2x - T2D; + T2E = T2x + T2D; + } + T2m = cr[WS(rs, 23)]; + T3v = T2f * T2j; + T2h = T2f * T2g; + T2p = ci[WS(rs, 23)]; + T2l = W[44]; + T2i = W[15]; + T2o = W[45]; + } + } + T3x = T2l * T2p; + T2n = T2l * T2m; + T3w = FNMS(T2i, T2g, T3v); + T2k = FMA(T2i, T2j, T2h); + T3y = FNMS(T2o, T2m, T3x); + T2q = FMA(T2o, T2p, T2n); + } + { + E T2N, Tz, T2X, T4G, T4z, TZ, T2P, TF, T2R, TL; + { + E TB, TE, TH, T2O, TC, TK, TG, TD, TJ, T2Q, TI; + { + E Tv, Ty, Tu, Tx; + { + E T48, T41, T47, T40, T43, T3z; + Tv = cr[WS(rs, 1)]; + T43 = T3y - T3w; + T3z = T3w + T3y; + { + E T4a, T2r, T3F, T2F; + T4a = T2k - T2q; + T2r = T2k + T2q; + T5T = FNMS(KP618033988, T43, T44); + T45 = FMA(KP618033988, T44, T43); + T3F = T3z + T3E; + T48 = T3E - T3z; + T5Q = FNMS(KP618033988, T4a, T4b); + T4c = FMA(KP618033988, T4b, T4a); + T2F = T2r + T2E; + T41 = T2E - T2r; + T3G = T3u + T3F; + T47 = FNMS(KP250000000, T3F, T3u); + T2G = T2e + T2F; + T40 = FNMS(KP250000000, T2F, T2e); + Ty = ci[WS(rs, 1)]; + } + T5P = FMA(KP559016994, T48, T47); + T49 = FNMS(KP559016994, T48, T47); + T5S = FMA(KP559016994, T41, T40); + T42 = FNMS(KP559016994, T41, T40); + Tu = W[0]; + } + Tx = W[1]; + { + E TO, TR, TU, T2T, TP, TX, TT, TQ, TW, T2M, Tw, TN, TA; + TO = cr[WS(rs, 11)]; + TR = ci[WS(rs, 11)]; + T2M = Tu * Ty; + Tw = Tu * Tv; + TN = W[20]; + TU = cr[WS(rs, 16)]; + T2N = FNMS(Tx, Tv, T2M); + Tz = FMA(Tx, Ty, Tw); + T2T = TN * TR; + TP = TN * TO; + TX = ci[WS(rs, 16)]; + TT = W[30]; + TQ = W[21]; + TW = W[31]; + { + E T2U, TS, T2W, TY, T2V, TV; + TB = cr[WS(rs, 6)]; + T2V = TT * TX; + TV = TT * TU; + T2U = FNMS(TQ, TO, T2T); + TS = FMA(TQ, TR, TP); + T2W = FNMS(TW, TU, T2V); + TY = FMA(TW, TX, TV); + TE = ci[WS(rs, 6)]; + TA = W[10]; + T2X = T2U + T2W; + T4G = T2W - T2U; + T4z = TY - TS; + TZ = TS + TY; + } + TH = cr[WS(rs, 21)]; + T2O = TA * TE; + TC = TA * TB; + TK = ci[WS(rs, 21)]; + TG = W[40]; + TD = W[11]; + TJ = W[41]; + } + } + T2Q = TG * TK; + TI = TG * TH; + T2P = FNMS(TD, TB, T2O); + TF = FMA(TD, TE, TC); + T2R = FNMS(TJ, TH, T2Q); + TL = FMA(TJ, TK, TI); + } + { + E T31, T17, T3b, T4V, T4O, T1x, T33, T1d, T35, T1j; + { + E T19, T1c, T1f, T32, T1a, T1i, T1e, T1b, T1h, T34, T1g; + { + E T13, T16, T12, T15; + { + E T4w, T4D, T4v, T4C, T4F, T2S; + T13 = cr[WS(rs, 4)]; + T4F = T2P - T2R; + T2S = T2P + T2R; + { + E T4y, TM, T2Y, T10; + T4y = TL - TF; + TM = TF + TL; + T65 = FMA(KP618033988, T4F, T4G); + T4H = FNMS(KP618033988, T4G, T4F); + T2Y = T2S + T2X; + T4w = T2S - T2X; + T68 = FNMS(KP618033988, T4y, T4z); + T4A = FMA(KP618033988, T4z, T4y); + T10 = TM + TZ; + T4D = TM - TZ; + T2Z = T2N + T2Y; + T4v = FNMS(KP250000000, T2Y, T2N); + T11 = Tz + T10; + T4C = FNMS(KP250000000, T10, Tz); + T16 = ci[WS(rs, 4)]; + } + T67 = FNMS(KP559016994, T4w, T4v); + T4x = FMA(KP559016994, T4w, T4v); + T64 = FNMS(KP559016994, T4D, T4C); + T4E = FMA(KP559016994, T4D, T4C); + T12 = W[6]; + } + T15 = W[7]; + { + E T1m, T1p, T1s, T37, T1n, T1v, T1r, T1o, T1u, T30, T14, T1l, T18; + T1m = cr[WS(rs, 14)]; + T1p = ci[WS(rs, 14)]; + T30 = T12 * T16; + T14 = T12 * T13; + T1l = W[26]; + T1s = cr[WS(rs, 19)]; + T31 = FNMS(T15, T13, T30); + T17 = FMA(T15, T16, T14); + T37 = T1l * T1p; + T1n = T1l * T1m; + T1v = ci[WS(rs, 19)]; + T1r = W[36]; + T1o = W[27]; + T1u = W[37]; + { + E T38, T1q, T3a, T1w, T39, T1t; + T19 = cr[WS(rs, 9)]; + T39 = T1r * T1v; + T1t = T1r * T1s; + T38 = FNMS(T1o, T1m, T37); + T1q = FMA(T1o, T1p, T1n); + T3a = FNMS(T1u, T1s, T39); + T1w = FMA(T1u, T1v, T1t); + T1c = ci[WS(rs, 9)]; + T18 = W[16]; + T3b = T38 + T3a; + T4V = T3a - T38; + T4O = T1w - T1q; + T1x = T1q + T1w; + } + T1f = cr[WS(rs, 24)]; + T32 = T18 * T1c; + T1a = T18 * T19; + T1i = ci[WS(rs, 24)]; + T1e = W[46]; + T1b = W[17]; + T1h = W[47]; + } + } + T34 = T1e * T1i; + T1g = T1e * T1f; + T33 = FNMS(T1b, T19, T32); + T1d = FMA(T1b, T1c, T1a); + T35 = FNMS(T1h, T1f, T34); + T1j = FMA(T1h, T1i, T1g); + } + { + E T1I, T1L, T1O, T3h, T1J, T1R, T1N, T1K, T1Q, T3j, T1P; + { + E T1C, T1F, T1B, T1E; + { + E T4L, T4S, T4K, T4R, T4U, T36; + T1C = cr[WS(rs, 2)]; + T4U = T35 - T33; + T36 = T33 + T35; + { + E T4N, T1k, T3c, T1y; + T4N = T1j - T1d; + T1k = T1d + T1j; + T5Y = FNMS(KP618033988, T4U, T4V); + T4W = FMA(KP618033988, T4V, T4U); + T3c = T36 + T3b; + T4L = T3b - T36; + T61 = FNMS(KP618033988, T4N, T4O); + T4P = FMA(KP618033988, T4O, T4N); + T1y = T1k + T1x; + T4S = T1k - T1x; + T3d = T31 + T3c; + T4K = FNMS(KP250000000, T3c, T31); + T1z = T17 + T1y; + T4R = FNMS(KP250000000, T1y, T17); + T1F = ci[WS(rs, 2)]; + } + T60 = FMA(KP559016994, T4L, T4K); + T4M = FNMS(KP559016994, T4L, T4K); + T5X = FNMS(KP559016994, T4S, T4R); + T4T = FMA(KP559016994, T4S, T4R); + T1B = W[2]; + } + T1E = W[3]; + { + E T1V, T1Y, T21, T3m, T1W, T24, T20, T1X, T23, T3f, T1D, T1U, T1H; + T1V = cr[WS(rs, 12)]; + T1Y = ci[WS(rs, 12)]; + T3f = T1B * T1F; + T1D = T1B * T1C; + T1U = W[22]; + T21 = cr[WS(rs, 17)]; + T3g = FNMS(T1E, T1C, T3f); + T1G = FMA(T1E, T1F, T1D); + T3m = T1U * T1Y; + T1W = T1U * T1V; + T24 = ci[WS(rs, 17)]; + T20 = W[32]; + T1X = W[23]; + T23 = W[33]; + { + E T3n, T1Z, T3p, T25, T3o, T22; + T1I = cr[WS(rs, 7)]; + T3o = T20 * T24; + T22 = T20 * T21; + T3n = FNMS(T1X, T1V, T3m); + T1Z = FMA(T1X, T1Y, T1W); + T3p = FNMS(T23, T21, T3o); + T25 = FMA(T23, T24, T22); + T1L = ci[WS(rs, 7)]; + T1H = W[12]; + T3q = T3n + T3p; + T4q = T3n - T3p; + T4j = T25 - T1Z; + T26 = T1Z + T25; + } + T1O = cr[WS(rs, 22)]; + T3h = T1H * T1L; + T1J = T1H * T1I; + T1R = ci[WS(rs, 22)]; + T1N = W[42]; + T1K = W[13]; + T1Q = W[43]; + } + } + T3j = T1N * T1R; + T1P = T1N * T1O; + T3i = FNMS(T1K, T1I, T3h); + T1M = FMA(T1K, T1L, T1J); + T3k = FNMS(T1Q, T1O, T3j); + T1S = FMA(T1Q, T1R, T1P); + } + } + } + } + { + E T7Q, T5M, T5J, T7R, T5I, T5L, T7X, T7W, T5F, T6X, T5u, T7M, T7O, T5C, T5E; + E T5t, T7J, T7N; + { + E T4r, T4k, T4h, T4o, T3K, T3I, T1A, T2H, T28; + { + E T3e, T4g, T4n, T4f, T4m, T3H, T4p, T3l; + T7Q = T2Z + T3d; + T3e = T2Z - T3d; + T4p = T3k - T3i; + T3l = T3i + T3k; + { + E T4i, T1T, T3r, T27, T3s; + T4i = T1S - T1M; + T1T = T1M + T1S; + T5M = FMA(KP618033988, T4p, T4q); + T4r = FNMS(KP618033988, T4q, T4p); + T3r = T3l + T3q; + T4g = T3q - T3l; + T5J = FNMS(KP618033988, T4i, T4j); + T4k = FMA(KP618033988, T4j, T4i); + T27 = T1T + T26; + T4n = T26 - T1T; + T3s = T3g + T3r; + T4f = FNMS(KP250000000, T3r, T3g); + T28 = T1G + T27; + T4m = FNMS(KP250000000, T27, T1G); + T3H = T3s - T3G; + T7R = T3s + T3G; + } + T5I = FMA(KP559016994, T4g, T4f); + T4h = FNMS(KP559016994, T4g, T4f); + T5L = FMA(KP559016994, T4n, T4m); + T4o = FNMS(KP559016994, T4n, T4m); + T3K = FNMS(KP618033988, T3e, T3H); + T3I = FMA(KP618033988, T3H, T3e); + } + T1A = T11 + T1z; + T7X = T1z - T11; + T7W = T28 - T2G; + T2H = T28 + T2G; + { + E T3Z, T5d, T7r, T7D, T5h, T5i, T5m, T5l, T59, T7K, T56, T7L, T7I, T7G, T52; + E T50, T5w, T5g, T5q, T5A, T3N, T7p; + T3N = FMA(KP559016994, T3M, T3L); + T5F = FNMS(KP559016994, T3M, T3L); + T6X = FNMS(KP559016994, T6W, T6V); + T7p = FMA(KP559016994, T6W, T6V); + { + E T5o, T5p, T57, T4e, T4Y, T55, T4l, T4s, T4B, T5f, T5e, T4I; + { + E T46, T2K, T2J, T4d, T2I; + T46 = FMA(KP951056516, T45, T42); + T5o = FNMS(KP951056516, T45, T42); + T2I = T1A + T2H; + T2K = T1A - T2H; + T3Z = FNMS(KP951056516, T3Y, T3N); + T5d = FMA(KP951056516, T3Y, T3N); + T7r = FNMS(KP951056516, T7q, T7p); + T7D = FMA(KP951056516, T7q, T7p); + cr[0] = Tt + T2I; + T2J = FNMS(KP250000000, T2I, Tt); + T5p = FNMS(KP951056516, T4c, T49); + T4d = FMA(KP951056516, T4c, T49); + { + E T4Q, T4X, T2L, T3J; + T4Q = FNMS(KP951056516, T4P, T4M); + T5h = FMA(KP951056516, T4P, T4M); + T5i = FNMS(KP951056516, T4W, T4T); + T4X = FMA(KP951056516, T4W, T4T); + T2L = FMA(KP559016994, T2K, T2J); + T3J = FNMS(KP559016994, T2K, T2J); + T57 = FMA(KP126329378, T46, T4d); + T4e = FNMS(KP126329378, T4d, T46); + cr[WS(rs, 5)] = FMA(KP951056516, T3I, T2L); + ci[WS(rs, 4)] = FNMS(KP951056516, T3I, T2L); + ci[WS(rs, 9)] = FMA(KP951056516, T3K, T3J); + cr[WS(rs, 10)] = FNMS(KP951056516, T3K, T3J); + T4Y = FMA(KP827271945, T4X, T4Q); + T55 = FNMS(KP827271945, T4Q, T4X); + } + } + T4l = FNMS(KP951056516, T4k, T4h); + T5m = FMA(KP951056516, T4k, T4h); + T5l = FNMS(KP951056516, T4r, T4o); + T4s = FMA(KP951056516, T4r, T4o); + T4B = FNMS(KP951056516, T4A, T4x); + T5f = FMA(KP951056516, T4A, T4x); + T5e = FMA(KP951056516, T4H, T4E); + T4I = FNMS(KP951056516, T4H, T4E); + { + E T4u, T4Z, T4t, T58; + T4t = FNMS(KP470564281, T4s, T4l); + T58 = FMA(KP470564281, T4l, T4s); + { + E T4J, T54, T7E, T7F; + T4J = FMA(KP634619297, T4I, T4B); + T54 = FNMS(KP634619297, T4B, T4I); + T59 = FNMS(KP912018591, T58, T57); + T7E = FMA(KP912018591, T58, T57); + T7K = FMA(KP912018591, T4t, T4e); + T4u = FNMS(KP912018591, T4t, T4e); + T56 = FMA(KP912575812, T55, T54); + T7F = FNMS(KP912575812, T55, T54); + T7L = FMA(KP912575812, T4Y, T4J); + T4Z = FNMS(KP912575812, T4Y, T4J); + T7I = FNMS(KP851038619, T7F, T7E); + T7G = FMA(KP851038619, T7F, T7E); + } + T52 = FMA(KP851038619, T4Z, T4u); + T50 = FNMS(KP851038619, T4Z, T4u); + } + T5w = FNMS(KP256756360, T5e, T5f); + T5g = FMA(KP256756360, T5f, T5e); + T5q = FMA(KP939062505, T5p, T5o); + T5A = FNMS(KP939062505, T5o, T5p); + } + { + E T5y, T7z, T5B, T7y, T7w, T7u, T5s; + { + E T5k, T5r, T5j, T5x; + cr[WS(rs, 4)] = FNMS(KP992114701, T50, T3Z); + T5j = FMA(KP634619297, T5i, T5h); + T5x = FNMS(KP634619297, T5h, T5i); + { + E T5n, T5z, T7s, T7t; + T5n = FMA(KP549754652, T5m, T5l); + T5z = FNMS(KP549754652, T5l, T5m); + T5y = FMA(KP871714437, T5x, T5w); + T7s = FNMS(KP871714437, T5x, T5w); + T7z = FNMS(KP871714437, T5j, T5g); + T5k = FMA(KP871714437, T5j, T5g); + T5B = FNMS(KP831864738, T5A, T5z); + T7t = FMA(KP831864738, T5A, T5z); + T7y = FNMS(KP831864738, T5q, T5n); + T5r = FMA(KP831864738, T5q, T5n); + T7w = FNMS(KP904730450, T7t, T7s); + T7u = FMA(KP904730450, T7t, T7s); + } + ci[WS(rs, 20)] = FNMS(KP992114701, T7G, T7D); + T5u = FNMS(KP904730450, T5r, T5k); + T5s = FMA(KP904730450, T5r, T5k); + } + { + E T5a, T5c, T7A, T7C, T7v, T53, T5b, T51, T7H, T7x, T7B; + T5a = FNMS(KP726211448, T59, T56); + T5c = FMA(KP525970792, T56, T59); + ci[WS(rs, 23)] = FMA(KP968583161, T7u, T7r); + cr[WS(rs, 1)] = FMA(KP968583161, T5s, T5d); + T51 = FMA(KP248028675, T50, T3Z); + T7A = FNMS(KP683113946, T7z, T7y); + T7C = FMA(KP559154169, T7y, T7z); + T7v = FNMS(KP242145790, T7u, T7r); + T53 = FMA(KP554608978, T52, T51); + T5b = FNMS(KP554608978, T52, T51); + T7M = FNMS(KP525970792, T7L, T7K); + T7O = FMA(KP726211448, T7K, T7L); + ci[WS(rs, 10)] = FNMS(KP943557151, T5c, T5b); + ci[WS(rs, 5)] = FMA(KP943557151, T5c, T5b); + ci[0] = FMA(KP803003575, T5a, T53); + cr[WS(rs, 9)] = FNMS(KP803003575, T5a, T53); + T7x = FNMS(KP541454447, T7w, T7v); + T7B = FMA(KP541454447, T7w, T7v); + T7H = FMA(KP248028675, T7G, T7D); + cr[WS(rs, 21)] = -(FMA(KP921177326, T7C, T7B)); + ci[WS(rs, 18)] = FNMS(KP921177326, T7C, T7B); + ci[WS(rs, 13)] = FMA(KP833417178, T7A, T7x); + cr[WS(rs, 16)] = FMS(KP833417178, T7A, T7x); + T5C = FMA(KP559154169, T5B, T5y); + T5E = FNMS(KP683113946, T5y, T5B); + T5t = FNMS(KP242145790, T5s, T5d); + T7J = FNMS(KP554608978, T7I, T7H); + T7N = FMA(KP554608978, T7I, T7H); + } + } + } + } + { + E T7Y, T80, T5v, T5D; + cr[WS(rs, 24)] = -(FMA(KP803003575, T7O, T7N)); + ci[WS(rs, 15)] = FNMS(KP803003575, T7O, T7N); + cr[WS(rs, 19)] = FMS(KP943557151, T7M, T7J); + cr[WS(rs, 14)] = -(FMA(KP943557151, T7M, T7J)); + T5v = FMA(KP541454447, T5u, T5t); + T5D = FNMS(KP541454447, T5u, T5t); + cr[WS(rs, 11)] = FNMS(KP833417178, T5E, T5D); + ci[WS(rs, 8)] = FMA(KP833417178, T5E, T5D); + cr[WS(rs, 6)] = FMA(KP921177326, T5C, T5v); + ci[WS(rs, 3)] = FNMS(KP921177326, T5C, T5v); + T7Y = FMA(KP618033988, T7X, T7W); + T80 = FNMS(KP618033988, T7W, T7X); + { + E T6t, T6p, T5H, T7d, T71, T6u, T6y, T6x, T6l, T7k, T6i, T7l, T7g, T6c, T6e; + E T6s, T6L, T6J, T6C; + { + E T6A, T6B, T5O, T6j, T6h, T6a, T6q, T5R, T5U, T6r, T5Z, T62; + { + E T5K, T7U, T7T, T5N, T7S; + T6t = FNMS(KP951056516, T5J, T5I); + T5K = FMA(KP951056516, T5J, T5I); + T7U = T7Q - T7R; + T7S = T7Q + T7R; + T6p = FNMS(KP951056516, T5G, T5F); + T5H = FMA(KP951056516, T5G, T5F); + T7d = FNMS(KP951056516, T70, T6X); + T71 = FMA(KP951056516, T70, T6X); + ci[WS(rs, 24)] = T7S + T7P; + T7T = FNMS(KP250000000, T7S, T7P); + T5N = FMA(KP951056516, T5M, T5L); + T6u = FNMS(KP951056516, T5M, T5L); + { + E T66, T69, T7Z, T7V; + T6A = FMA(KP951056516, T65, T64); + T66 = FNMS(KP951056516, T65, T64); + T69 = FMA(KP951056516, T68, T67); + T6B = FNMS(KP951056516, T68, T67); + T7Z = FMA(KP559016994, T7U, T7T); + T7V = FNMS(KP559016994, T7U, T7T); + T5O = FMA(KP062914667, T5N, T5K); + T6j = FNMS(KP062914667, T5K, T5N); + ci[WS(rs, 14)] = FMA(KP951056516, T7Y, T7V); + cr[WS(rs, 15)] = FMS(KP951056516, T7Y, T7V); + ci[WS(rs, 19)] = FMA(KP951056516, T80, T7Z); + cr[WS(rs, 20)] = FMS(KP951056516, T80, T7Z); + T6h = FNMS(KP939062505, T66, T69); + T6a = FMA(KP939062505, T69, T66); + } + } + T6q = FMA(KP951056516, T5Q, T5P); + T5R = FNMS(KP951056516, T5Q, T5P); + T5U = FNMS(KP951056516, T5T, T5S); + T6r = FMA(KP951056516, T5T, T5S); + T6y = FMA(KP951056516, T5Y, T5X); + T5Z = FNMS(KP951056516, T5Y, T5X); + T62 = FMA(KP951056516, T61, T60); + T6x = FNMS(KP951056516, T61, T60); + { + E T5W, T6b, T6k, T5V; + T6k = FMA(KP827271945, T5R, T5U); + T5V = FNMS(KP827271945, T5U, T5R); + { + E T6g, T63, T7e, T7f; + T6g = FMA(KP126329378, T5Z, T62); + T63 = FNMS(KP126329378, T62, T5Z); + T7e = FMA(KP772036680, T6k, T6j); + T6l = FNMS(KP772036680, T6k, T6j); + T5W = FMA(KP772036680, T5V, T5O); + T7k = FNMS(KP772036680, T5V, T5O); + T7f = FNMS(KP734762448, T6h, T6g); + T6i = FMA(KP734762448, T6h, T6g); + T6b = FNMS(KP734762448, T6a, T63); + T7l = FMA(KP734762448, T6a, T63); + T7g = FMA(KP994076283, T7f, T7e); + T7i = FNMS(KP994076283, T7f, T7e); + } + T6c = FNMS(KP994076283, T6b, T5W); + T6e = FMA(KP994076283, T6b, T5W); + } + T6s = FMA(KP062914667, T6r, T6q); + T6L = FNMS(KP062914667, T6q, T6r); + T6J = FNMS(KP549754652, T6A, T6B); + T6C = FMA(KP549754652, T6B, T6A); + } + { + E T6N, T78, T6K, T79, T74, T76, T6E, T6G; + { + E T6w, T6D, T6M, T6v; + cr[WS(rs, 3)] = FMA(KP998026728, T6c, T5H); + T6M = FNMS(KP634619297, T6t, T6u); + T6v = FMA(KP634619297, T6u, T6t); + { + E T6I, T6z, T72, T73; + T6I = FMA(KP470564281, T6x, T6y); + T6z = FNMS(KP470564281, T6y, T6x); + T72 = FMA(KP845997307, T6M, T6L); + T6N = FNMS(KP845997307, T6M, T6L); + T6w = FMA(KP845997307, T6v, T6s); + T78 = FNMS(KP845997307, T6v, T6s); + T73 = FNMS(KP968479752, T6J, T6I); + T6K = FMA(KP968479752, T6J, T6I); + T6D = FMA(KP968479752, T6C, T6z); + T79 = FNMS(KP968479752, T6C, T6z); + T74 = FMA(KP906616052, T73, T72); + T76 = FNMS(KP906616052, T73, T72); + } + ci[WS(rs, 21)] = FNMS(KP998026728, T7g, T7d); + T6E = FMA(KP906616052, T6D, T6w); + T6G = FNMS(KP906616052, T6D, T6w); + } + { + E T7c, T7a, T6Q, T6O, T6F, T7b, T77, T75, T6d, T6P, T6H; + T7c = FMA(KP681693190, T78, T79); + T7a = FNMS(KP560319534, T79, T78); + ci[WS(rs, 22)] = FNMS(KP998026728, T74, T71); + cr[WS(rs, 2)] = FMA(KP998026728, T6E, T6p); + T75 = FMA(KP249506682, T74, T71); + T6Q = FNMS(KP560319534, T6K, T6N); + T6O = FMA(KP681693190, T6N, T6K); + T6F = FNMS(KP249506682, T6E, T6p); + T7b = FMA(KP557913902, T76, T75); + T77 = FNMS(KP557913902, T76, T75); + T6o = FMA(KP614372930, T6i, T6l); + T6m = FNMS(KP621716863, T6l, T6i); + cr[WS(rs, 22)] = FMS(KP860541664, T7c, T7b); + ci[WS(rs, 17)] = FMA(KP860541664, T7c, T7b); + ci[WS(rs, 12)] = FNMS(KP949179823, T7a, T77); + cr[WS(rs, 17)] = -(FMA(KP949179823, T7a, T77)); + T6P = FMA(KP557913902, T6G, T6F); + T6H = FNMS(KP557913902, T6G, T6F); + T6d = FNMS(KP249506682, T6c, T5H); + ci[WS(rs, 7)] = FMA(KP949179823, T6Q, T6P); + cr[WS(rs, 12)] = FNMS(KP949179823, T6Q, T6P); + cr[WS(rs, 7)] = FMA(KP860541664, T6O, T6H); + ci[WS(rs, 2)] = FNMS(KP860541664, T6O, T6H); + T7o = FMA(KP621716863, T7k, T7l); + T7m = FNMS(KP614372930, T7l, T7k); + T7h = FMA(KP249506682, T7g, T7d); + T6n = FMA(KP557913902, T6e, T6d); + T6f = FNMS(KP557913902, T6e, T6d); + } + } + } + } + } + } + ci[WS(rs, 6)] = FNMS(KP949179823, T6o, T6n); + ci[WS(rs, 11)] = FMA(KP949179823, T6o, T6n); + cr[WS(rs, 8)] = FMA(KP943557151, T6m, T6f); + ci[WS(rs, 1)] = FNMS(KP943557151, T6m, T6f); + T7j = FNMS(KP557913902, T7i, T7h); + T7n = FMA(KP557913902, T7i, T7h); + cr[WS(rs, 23)] = -(FMA(KP943557151, T7o, T7n)); + ci[WS(rs, 16)] = FNMS(KP943557151, T7o, T7n); + cr[WS(rs, 18)] = FMS(KP949179823, T7m, T7j); + cr[WS(rs, 13)] = -(FMA(KP949179823, T7m, T7j)); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 25}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 25, "hf_25", twinstr, &GENUS, {84, 48, 316, 0} }; + +void X(codelet_hf_25) (planner *p) { + X(khc2hc_register) (p, hf_25, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 25 -dit -name hf_25 -include hf.h */ + +/* + * This function contains 400 FP additions, 280 FP multiplications, + * (or, 260 additions, 140 multiplications, 140 fused multiply/add), + * 101 stack variables, 20 constants, and 100 memory accesses + */ +#include "hf.h" + +static void hf_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP062790519, +0.062790519529313376076178224565631133122484832); + DK(KP684547105, +0.684547105928688673732283357621209269889519233); + DK(KP728968627, +0.728968627421411523146730319055259111372571664); + DK(KP481753674, +0.481753674101715274987191502872129653528542010); + DK(KP876306680, +0.876306680043863587308115903922062583399064238); + DK(KP248689887, +0.248689887164854788242283746006447968417567406); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP125333233, +0.125333233564304245373118759816508793942918247); + DK(KP425779291, +0.425779291565072648862502445744251703979973042); + DK(KP904827052, +0.904827052466019527713668647932697593970413911); + DK(KP637423989, +0.637423989748689710176712811676016195434917298); + DK(KP770513242, +0.770513242775789230803009636396177847271667672); + DK(KP844327925, +0.844327925502015078548558063966681505381659241); + DK(KP535826794, +0.535826794978996618271308767867639978063575346); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 48); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 48, MAKE_VOLATILE_STRIDE(50, rs)) { + E T1, T6b, T2l, T6g, To, T2m, T6e, T6f, T6a, T6H, T2u, T4I, T2i, T60, T3S; + E T5D, T4r, T58, T3Z, T5C, T4q, T5b, TS, T5W, T2G, T5s, T4g, T4M, T2R, T5t; + E T4h, T4P, T1l, T5X, T37, T5v, T4k, T4T, T3e, T5w, T4j, T4W, T1P, T5Z, T3v; + E T5A, T4o, T54, T3C, T5z, T4n, T51; + { + E T6, T2o, Tb, T2p, Tc, T6c, Th, T2r, Tm, T2s, Tn, T6d; + T1 = cr[0]; + T6b = ci[0]; + { + E T3, T5, T2, T4; + T3 = cr[WS(rs, 5)]; + T5 = ci[WS(rs, 5)]; + T2 = W[8]; + T4 = W[9]; + T6 = FMA(T2, T3, T4 * T5); + T2o = FNMS(T4, T3, T2 * T5); + } + { + E T8, Ta, T7, T9; + T8 = cr[WS(rs, 20)]; + Ta = ci[WS(rs, 20)]; + T7 = W[38]; + T9 = W[39]; + Tb = FMA(T7, T8, T9 * Ta); + T2p = FNMS(T9, T8, T7 * Ta); + } + Tc = T6 + Tb; + T6c = T2o + T2p; + { + E Te, Tg, Td, Tf; + Te = cr[WS(rs, 10)]; + Tg = ci[WS(rs, 10)]; + Td = W[18]; + Tf = W[19]; + Th = FMA(Td, Te, Tf * Tg); + T2r = FNMS(Tf, Te, Td * Tg); + } + { + E Tj, Tl, Ti, Tk; + Tj = cr[WS(rs, 15)]; + Tl = ci[WS(rs, 15)]; + Ti = W[28]; + Tk = W[29]; + Tm = FMA(Ti, Tj, Tk * Tl); + T2s = FNMS(Tk, Tj, Ti * Tl); + } + Tn = Th + Tm; + T6d = T2r + T2s; + T2l = KP559016994 * (Tc - Tn); + T6g = KP559016994 * (T6c - T6d); + To = Tc + Tn; + T2m = FNMS(KP250000000, To, T1); + T6e = T6c + T6d; + T6f = FNMS(KP250000000, T6e, T6b); + { + E T68, T69, T2q, T2t; + T68 = Th - Tm; + T69 = T6 - Tb; + T6a = FNMS(KP587785252, T69, KP951056516 * T68); + T6H = FMA(KP951056516, T69, KP587785252 * T68); + T2q = T2o - T2p; + T2t = T2r - T2s; + T2u = FMA(KP951056516, T2q, KP587785252 * T2t); + T4I = FNMS(KP587785252, T2q, KP951056516 * T2t); + } + } + { + E T1U, T3O, T3E, T3F, T3X, T3W, T3J, T3M, T3P, T25, T2g, T2h; + { + E T1R, T1T, T1Q, T1S; + T1R = cr[WS(rs, 3)]; + T1T = ci[WS(rs, 3)]; + T1Q = W[4]; + T1S = W[5]; + T1U = FMA(T1Q, T1R, T1S * T1T); + T3O = FNMS(T1S, T1R, T1Q * T1T); + } + { + E T1Z, T3H, T2f, T3L, T24, T3I, T2a, T3K; + { + E T1W, T1Y, T1V, T1X; + T1W = cr[WS(rs, 8)]; + T1Y = ci[WS(rs, 8)]; + T1V = W[14]; + T1X = W[15]; + T1Z = FMA(T1V, T1W, T1X * T1Y); + T3H = FNMS(T1X, T1W, T1V * T1Y); + } + { + E T2c, T2e, T2b, T2d; + T2c = cr[WS(rs, 18)]; + T2e = ci[WS(rs, 18)]; + T2b = W[34]; + T2d = W[35]; + T2f = FMA(T2b, T2c, T2d * T2e); + T3L = FNMS(T2d, T2c, T2b * T2e); + } + { + E T21, T23, T20, T22; + T21 = cr[WS(rs, 23)]; + T23 = ci[WS(rs, 23)]; + T20 = W[44]; + T22 = W[45]; + T24 = FMA(T20, T21, T22 * T23); + T3I = FNMS(T22, T21, T20 * T23); + } + { + E T27, T29, T26, T28; + T27 = cr[WS(rs, 13)]; + T29 = ci[WS(rs, 13)]; + T26 = W[24]; + T28 = W[25]; + T2a = FMA(T26, T27, T28 * T29); + T3K = FNMS(T28, T27, T26 * T29); + } + T3E = T1Z - T24; + T3F = T2a - T2f; + T3X = T3K - T3L; + T3W = T3H - T3I; + T3J = T3H + T3I; + T3M = T3K + T3L; + T3P = T3J + T3M; + T25 = T1Z + T24; + T2g = T2a + T2f; + T2h = T25 + T2g; + } + T2i = T1U + T2h; + T60 = T3O + T3P; + { + E T3G, T57, T3R, T56, T3N, T3Q; + T3G = FMA(KP951056516, T3E, KP587785252 * T3F); + T57 = FNMS(KP587785252, T3E, KP951056516 * T3F); + T3N = KP559016994 * (T3J - T3M); + T3Q = FNMS(KP250000000, T3P, T3O); + T3R = T3N + T3Q; + T56 = T3Q - T3N; + T3S = T3G + T3R; + T5D = T57 + T56; + T4r = T3R - T3G; + T58 = T56 - T57; + } + { + E T3Y, T5a, T3V, T59, T3T, T3U; + T3Y = FMA(KP951056516, T3W, KP587785252 * T3X); + T5a = FNMS(KP587785252, T3W, KP951056516 * T3X); + T3T = KP559016994 * (T25 - T2g); + T3U = FNMS(KP250000000, T2h, T1U); + T3V = T3T + T3U; + T59 = T3U - T3T; + T3Z = T3V - T3Y; + T5C = T59 - T5a; + T4q = T3V + T3Y; + T5b = T59 + T5a; + } + } + { + E Tu, T2N, T2B, T2E, T2I, T2H, T2K, T2L, T2O, TF, TQ, TR; + { + E Tr, Tt, Tq, Ts; + Tr = cr[WS(rs, 1)]; + Tt = ci[WS(rs, 1)]; + Tq = W[0]; + Ts = W[1]; + Tu = FMA(Tq, Tr, Ts * Tt); + T2N = FNMS(Ts, Tr, Tq * Tt); + } + { + E Tz, T2z, TP, T2D, TE, T2A, TK, T2C; + { + E Tw, Ty, Tv, Tx; + Tw = cr[WS(rs, 6)]; + Ty = ci[WS(rs, 6)]; + Tv = W[10]; + Tx = W[11]; + Tz = FMA(Tv, Tw, Tx * Ty); + T2z = FNMS(Tx, Tw, Tv * Ty); + } + { + E TM, TO, TL, TN; + TM = cr[WS(rs, 16)]; + TO = ci[WS(rs, 16)]; + TL = W[30]; + TN = W[31]; + TP = FMA(TL, TM, TN * TO); + T2D = FNMS(TN, TM, TL * TO); + } + { + E TB, TD, TA, TC; + TB = cr[WS(rs, 21)]; + TD = ci[WS(rs, 21)]; + TA = W[40]; + TC = W[41]; + TE = FMA(TA, TB, TC * TD); + T2A = FNMS(TC, TB, TA * TD); + } + { + E TH, TJ, TG, TI; + TH = cr[WS(rs, 11)]; + TJ = ci[WS(rs, 11)]; + TG = W[20]; + TI = W[21]; + TK = FMA(TG, TH, TI * TJ); + T2C = FNMS(TI, TH, TG * TJ); + } + T2B = T2z - T2A; + T2E = T2C - T2D; + T2I = TK - TP; + T2H = Tz - TE; + T2K = T2z + T2A; + T2L = T2C + T2D; + T2O = T2K + T2L; + TF = Tz + TE; + TQ = TK + TP; + TR = TF + TQ; + } + TS = Tu + TR; + T5W = T2N + T2O; + { + E T2F, T4L, T2y, T4K, T2w, T2x; + T2F = FMA(KP951056516, T2B, KP587785252 * T2E); + T4L = FNMS(KP587785252, T2B, KP951056516 * T2E); + T2w = KP559016994 * (TF - TQ); + T2x = FNMS(KP250000000, TR, Tu); + T2y = T2w + T2x; + T4K = T2x - T2w; + T2G = T2y - T2F; + T5s = T4K - T4L; + T4g = T2y + T2F; + T4M = T4K + T4L; + } + { + E T2J, T4O, T2Q, T4N, T2M, T2P; + T2J = FMA(KP951056516, T2H, KP587785252 * T2I); + T4O = FNMS(KP587785252, T2H, KP951056516 * T2I); + T2M = KP559016994 * (T2K - T2L); + T2P = FNMS(KP250000000, T2O, T2N); + T2Q = T2M + T2P; + T4N = T2P - T2M; + T2R = T2J + T2Q; + T5t = T4O + T4N; + T4h = T2Q - T2J; + T4P = T4N - T4O; + } + } + { + E TX, T33, T2T, T2U, T3c, T3b, T2Y, T31, T34, T18, T1j, T1k; + { + E TU, TW, TT, TV; + TU = cr[WS(rs, 4)]; + TW = ci[WS(rs, 4)]; + TT = W[6]; + TV = W[7]; + TX = FMA(TT, TU, TV * TW); + T33 = FNMS(TV, TU, TT * TW); + } + { + E T12, T2W, T1i, T30, T17, T2X, T1d, T2Z; + { + E TZ, T11, TY, T10; + TZ = cr[WS(rs, 9)]; + T11 = ci[WS(rs, 9)]; + TY = W[16]; + T10 = W[17]; + T12 = FMA(TY, TZ, T10 * T11); + T2W = FNMS(T10, TZ, TY * T11); + } + { + E T1f, T1h, T1e, T1g; + T1f = cr[WS(rs, 19)]; + T1h = ci[WS(rs, 19)]; + T1e = W[36]; + T1g = W[37]; + T1i = FMA(T1e, T1f, T1g * T1h); + T30 = FNMS(T1g, T1f, T1e * T1h); + } + { + E T14, T16, T13, T15; + T14 = cr[WS(rs, 24)]; + T16 = ci[WS(rs, 24)]; + T13 = W[46]; + T15 = W[47]; + T17 = FMA(T13, T14, T15 * T16); + T2X = FNMS(T15, T14, T13 * T16); + } + { + E T1a, T1c, T19, T1b; + T1a = cr[WS(rs, 14)]; + T1c = ci[WS(rs, 14)]; + T19 = W[26]; + T1b = W[27]; + T1d = FMA(T19, T1a, T1b * T1c); + T2Z = FNMS(T1b, T1a, T19 * T1c); + } + T2T = T17 - T12; + T2U = T1d - T1i; + T3c = T2Z - T30; + T3b = T2W - T2X; + T2Y = T2W + T2X; + T31 = T2Z + T30; + T34 = T2Y + T31; + T18 = T12 + T17; + T1j = T1d + T1i; + T1k = T18 + T1j; + } + T1l = TX + T1k; + T5X = T33 + T34; + { + E T2V, T4S, T36, T4R, T32, T35; + T2V = FNMS(KP587785252, T2U, KP951056516 * T2T); + T4S = FMA(KP587785252, T2T, KP951056516 * T2U); + T32 = KP559016994 * (T2Y - T31); + T35 = FNMS(KP250000000, T34, T33); + T36 = T32 + T35; + T4R = T35 - T32; + T37 = T2V - T36; + T5v = T4S + T4R; + T4k = T2V + T36; + T4T = T4R - T4S; + } + { + E T3d, T4V, T3a, T4U, T38, T39; + T3d = FMA(KP951056516, T3b, KP587785252 * T3c); + T4V = FNMS(KP587785252, T3b, KP951056516 * T3c); + T38 = KP559016994 * (T18 - T1j); + T39 = FNMS(KP250000000, T1k, TX); + T3a = T38 + T39; + T4U = T39 - T38; + T3e = T3a - T3d; + T5w = T4U - T4V; + T4j = T3a + T3d; + T4W = T4U + T4V; + } + } + { + E T1r, T3r, T3h, T3i, T3A, T3z, T3m, T3p, T3s, T1C, T1N, T1O; + { + E T1o, T1q, T1n, T1p; + T1o = cr[WS(rs, 2)]; + T1q = ci[WS(rs, 2)]; + T1n = W[2]; + T1p = W[3]; + T1r = FMA(T1n, T1o, T1p * T1q); + T3r = FNMS(T1p, T1o, T1n * T1q); + } + { + E T1w, T3k, T1M, T3o, T1B, T3l, T1H, T3n; + { + E T1t, T1v, T1s, T1u; + T1t = cr[WS(rs, 7)]; + T1v = ci[WS(rs, 7)]; + T1s = W[12]; + T1u = W[13]; + T1w = FMA(T1s, T1t, T1u * T1v); + T3k = FNMS(T1u, T1t, T1s * T1v); + } + { + E T1J, T1L, T1I, T1K; + T1J = cr[WS(rs, 17)]; + T1L = ci[WS(rs, 17)]; + T1I = W[32]; + T1K = W[33]; + T1M = FMA(T1I, T1J, T1K * T1L); + T3o = FNMS(T1K, T1J, T1I * T1L); + } + { + E T1y, T1A, T1x, T1z; + T1y = cr[WS(rs, 22)]; + T1A = ci[WS(rs, 22)]; + T1x = W[42]; + T1z = W[43]; + T1B = FMA(T1x, T1y, T1z * T1A); + T3l = FNMS(T1z, T1y, T1x * T1A); + } + { + E T1E, T1G, T1D, T1F; + T1E = cr[WS(rs, 12)]; + T1G = ci[WS(rs, 12)]; + T1D = W[22]; + T1F = W[23]; + T1H = FMA(T1D, T1E, T1F * T1G); + T3n = FNMS(T1F, T1E, T1D * T1G); + } + T3h = T1w - T1B; + T3i = T1H - T1M; + T3A = T3n - T3o; + T3z = T3k - T3l; + T3m = T3k + T3l; + T3p = T3n + T3o; + T3s = T3m + T3p; + T1C = T1w + T1B; + T1N = T1H + T1M; + T1O = T1C + T1N; + } + T1P = T1r + T1O; + T5Z = T3r + T3s; + { + E T3j, T53, T3u, T52, T3q, T3t; + T3j = FMA(KP951056516, T3h, KP587785252 * T3i); + T53 = FNMS(KP587785252, T3h, KP951056516 * T3i); + T3q = KP559016994 * (T3m - T3p); + T3t = FNMS(KP250000000, T3s, T3r); + T3u = T3q + T3t; + T52 = T3t - T3q; + T3v = T3j + T3u; + T5A = T53 + T52; + T4o = T3u - T3j; + T54 = T52 - T53; + } + { + E T3B, T50, T3y, T4Z, T3w, T3x; + T3B = FMA(KP951056516, T3z, KP587785252 * T3A); + T50 = FNMS(KP587785252, T3z, KP951056516 * T3A); + T3w = KP559016994 * (T1C - T1N); + T3x = FNMS(KP250000000, T1O, T1r); + T3y = T3w + T3x; + T4Z = T3x - T3w; + T3C = T3y - T3B; + T5z = T4Z - T50; + T4n = T3y + T3B; + T51 = T4Z + T50; + } + } + { + E T62, T64, Tp, T2k, T5T, T5U, T63, T5V; + { + E T5Y, T61, T1m, T2j; + T5Y = T5W - T5X; + T61 = T5Z - T60; + T62 = FMA(KP951056516, T5Y, KP587785252 * T61); + T64 = FNMS(KP587785252, T5Y, KP951056516 * T61); + Tp = T1 + To; + T1m = TS + T1l; + T2j = T1P + T2i; + T2k = T1m + T2j; + T5T = KP559016994 * (T1m - T2j); + T5U = FNMS(KP250000000, T2k, Tp); + } + cr[0] = Tp + T2k; + T63 = T5U - T5T; + cr[WS(rs, 10)] = T63 - T64; + ci[WS(rs, 9)] = T63 + T64; + T5V = T5T + T5U; + ci[WS(rs, 4)] = T5V - T62; + cr[WS(rs, 5)] = T5V + T62; + } + { + E T2v, T4f, T6I, T6U, T42, T6Z, T43, T6Y, T4A, T6N, T4D, T6L, T4u, T6E, T4v; + E T6D, T48, T6V, T4b, T6T, T2n, T6G; + T2n = T2l + T2m; + T2v = T2n - T2u; + T4f = T2n + T2u; + T6G = T6g + T6f; + T6I = T6G - T6H; + T6U = T6H + T6G; + { + E T2S, T3f, T3g, T3D, T40, T41; + T2S = FMA(KP535826794, T2G, KP844327925 * T2R); + T3f = FNMS(KP637423989, T3e, KP770513242 * T37); + T3g = T2S + T3f; + T3D = FNMS(KP425779291, T3C, KP904827052 * T3v); + T40 = FNMS(KP992114701, T3Z, KP125333233 * T3S); + T41 = T3D + T40; + T42 = T3g + T41; + T6Z = T3D - T40; + T43 = KP559016994 * (T3g - T41); + T6Y = T3f - T2S; + } + { + E T4y, T4z, T6J, T4B, T4C, T6K; + T4y = FNMS(KP248689887, T4g, KP968583161 * T4h); + T4z = FNMS(KP844327925, T4j, KP535826794 * T4k); + T6J = T4y + T4z; + T4B = FNMS(KP481753674, T4n, KP876306680 * T4o); + T4C = FNMS(KP684547105, T4q, KP728968627 * T4r); + T6K = T4B + T4C; + T4A = T4y - T4z; + T6N = KP559016994 * (T6J - T6K); + T4D = T4B - T4C; + T6L = T6J + T6K; + } + { + E T4i, T4l, T4m, T4p, T4s, T4t; + T4i = FMA(KP968583161, T4g, KP248689887 * T4h); + T4l = FMA(KP535826794, T4j, KP844327925 * T4k); + T4m = T4i + T4l; + T4p = FMA(KP876306680, T4n, KP481753674 * T4o); + T4s = FMA(KP728968627, T4q, KP684547105 * T4r); + T4t = T4p + T4s; + T4u = T4m + T4t; + T6E = T4p - T4s; + T4v = KP559016994 * (T4m - T4t); + T6D = T4l - T4i; + } + { + E T46, T47, T6R, T49, T4a, T6S; + T46 = FNMS(KP844327925, T2G, KP535826794 * T2R); + T47 = FMA(KP770513242, T3e, KP637423989 * T37); + T6R = T46 + T47; + T49 = FMA(KP125333233, T3Z, KP992114701 * T3S); + T4a = FMA(KP904827052, T3C, KP425779291 * T3v); + T6S = T4a + T49; + T48 = T46 - T47; + T6V = T6R - T6S; + T4b = T49 - T4a; + T6T = KP559016994 * (T6R + T6S); + } + cr[WS(rs, 4)] = T2v + T42; + ci[WS(rs, 23)] = T6L + T6I; + ci[WS(rs, 20)] = T6V + T6U; + cr[WS(rs, 1)] = T4f + T4u; + { + E T4c, T4e, T45, T4d, T44; + T4c = FMA(KP951056516, T48, KP587785252 * T4b); + T4e = FNMS(KP587785252, T48, KP951056516 * T4b); + T44 = FNMS(KP250000000, T42, T2v); + T45 = T43 + T44; + T4d = T44 - T43; + ci[0] = T45 - T4c; + ci[WS(rs, 5)] = T4d + T4e; + cr[WS(rs, 9)] = T45 + T4c; + ci[WS(rs, 10)] = T4d - T4e; + } + { + E T6F, T6P, T6O, T6Q, T6M; + T6F = FMA(KP587785252, T6D, KP951056516 * T6E); + T6P = FNMS(KP587785252, T6E, KP951056516 * T6D); + T6M = FNMS(KP250000000, T6L, T6I); + T6O = T6M - T6N; + T6Q = T6N + T6M; + cr[WS(rs, 16)] = T6F - T6O; + ci[WS(rs, 18)] = T6P + T6Q; + ci[WS(rs, 13)] = T6F + T6O; + cr[WS(rs, 21)] = T6P - T6Q; + } + { + E T70, T71, T6X, T72, T6W; + T70 = FMA(KP587785252, T6Y, KP951056516 * T6Z); + T71 = FNMS(KP587785252, T6Z, KP951056516 * T6Y); + T6W = FNMS(KP250000000, T6V, T6U); + T6X = T6T - T6W; + T72 = T6T + T6W; + cr[WS(rs, 14)] = T6X - T70; + ci[WS(rs, 15)] = T71 + T72; + cr[WS(rs, 19)] = T70 + T6X; + cr[WS(rs, 24)] = T71 - T72; + } + { + E T4E, T4G, T4x, T4F, T4w; + T4E = FMA(KP951056516, T4A, KP587785252 * T4D); + T4G = FNMS(KP587785252, T4A, KP951056516 * T4D); + T4w = FNMS(KP250000000, T4u, T4f); + T4x = T4v + T4w; + T4F = T4w - T4v; + ci[WS(rs, 3)] = T4x - T4E; + ci[WS(rs, 8)] = T4F + T4G; + cr[WS(rs, 6)] = T4x + T4E; + cr[WS(rs, 11)] = T4F - T4G; + } + } + { + E T75, T7d, T76, T79, T7a, T7b, T7e, T7c; + { + E T73, T74, T77, T78; + T73 = T1l - TS; + T74 = T1P - T2i; + T75 = FMA(KP587785252, T73, KP951056516 * T74); + T7d = FNMS(KP587785252, T74, KP951056516 * T73); + T76 = T6e + T6b; + T77 = T5W + T5X; + T78 = T5Z + T60; + T79 = T77 + T78; + T7a = FNMS(KP250000000, T79, T76); + T7b = KP559016994 * (T77 - T78); + } + ci[WS(rs, 24)] = T79 + T76; + T7e = T7b + T7a; + cr[WS(rs, 20)] = T7d - T7e; + ci[WS(rs, 19)] = T7d + T7e; + T7c = T7a - T7b; + cr[WS(rs, 15)] = T75 - T7c; + ci[WS(rs, 14)] = T75 + T7c; + } + { + E T4J, T5r, T6i, T6u, T5e, T6z, T5f, T6y, T5M, T6n, T5P, T6l, T5G, T66, T5H; + E T65, T5k, T6v, T5n, T6t, T4H, T6h; + T4H = T2m - T2l; + T4J = T4H + T4I; + T5r = T4H - T4I; + T6h = T6f - T6g; + T6i = T6a + T6h; + T6u = T6h - T6a; + { + E T4Q, T4X, T4Y, T55, T5c, T5d; + T4Q = FMA(KP728968627, T4M, KP684547105 * T4P); + T4X = FNMS(KP992114701, T4W, KP125333233 * T4T); + T4Y = T4Q + T4X; + T55 = FMA(KP062790519, T51, KP998026728 * T54); + T5c = FNMS(KP637423989, T5b, KP770513242 * T58); + T5d = T55 + T5c; + T5e = T4Y + T5d; + T6z = T55 - T5c; + T5f = KP559016994 * (T4Y - T5d); + T6y = T4X - T4Q; + } + { + E T5K, T5L, T6j, T5N, T5O, T6k; + T5K = FNMS(KP481753674, T5s, KP876306680 * T5t); + T5L = FMA(KP904827052, T5w, KP425779291 * T5v); + T6j = T5K - T5L; + T5N = FNMS(KP844327925, T5z, KP535826794 * T5A); + T5O = FNMS(KP998026728, T5C, KP062790519 * T5D); + T6k = T5N + T5O; + T5M = T5K + T5L; + T6n = KP559016994 * (T6j - T6k); + T5P = T5N - T5O; + T6l = T6j + T6k; + } + { + E T5u, T5x, T5y, T5B, T5E, T5F; + T5u = FMA(KP876306680, T5s, KP481753674 * T5t); + T5x = FNMS(KP425779291, T5w, KP904827052 * T5v); + T5y = T5u + T5x; + T5B = FMA(KP535826794, T5z, KP844327925 * T5A); + T5E = FMA(KP062790519, T5C, KP998026728 * T5D); + T5F = T5B + T5E; + T5G = T5y + T5F; + T66 = T5B - T5E; + T5H = KP559016994 * (T5y - T5F); + T65 = T5x - T5u; + } + { + E T5i, T5j, T6r, T5l, T5m, T6s; + T5i = FNMS(KP684547105, T4M, KP728968627 * T4P); + T5j = FMA(KP125333233, T4W, KP992114701 * T4T); + T6r = T5i - T5j; + T5l = FNMS(KP998026728, T51, KP062790519 * T54); + T5m = FMA(KP770513242, T5b, KP637423989 * T58); + T6s = T5l - T5m; + T5k = T5i + T5j; + T6v = T6r + T6s; + T5n = T5l + T5m; + T6t = KP559016994 * (T6r - T6s); + } + cr[WS(rs, 3)] = T4J + T5e; + ci[WS(rs, 22)] = T6l + T6i; + ci[WS(rs, 21)] = T6v + T6u; + cr[WS(rs, 2)] = T5r + T5G; + { + E T67, T6p, T6o, T6q, T6m; + T67 = FMA(KP587785252, T65, KP951056516 * T66); + T6p = FNMS(KP587785252, T66, KP951056516 * T65); + T6m = FNMS(KP250000000, T6l, T6i); + T6o = T6m - T6n; + T6q = T6n + T6m; + cr[WS(rs, 17)] = T67 - T6o; + ci[WS(rs, 17)] = T6p + T6q; + ci[WS(rs, 12)] = T67 + T6o; + cr[WS(rs, 22)] = T6p - T6q; + } + { + E T5Q, T5S, T5J, T5R, T5I; + T5Q = FMA(KP951056516, T5M, KP587785252 * T5P); + T5S = FNMS(KP587785252, T5M, KP951056516 * T5P); + T5I = FNMS(KP250000000, T5G, T5r); + T5J = T5H + T5I; + T5R = T5I - T5H; + ci[WS(rs, 2)] = T5J - T5Q; + ci[WS(rs, 7)] = T5R + T5S; + cr[WS(rs, 7)] = T5J + T5Q; + cr[WS(rs, 12)] = T5R - T5S; + } + { + E T5o, T5q, T5h, T5p, T5g; + T5o = FMA(KP951056516, T5k, KP587785252 * T5n); + T5q = FNMS(KP587785252, T5k, KP951056516 * T5n); + T5g = FNMS(KP250000000, T5e, T4J); + T5h = T5f + T5g; + T5p = T5g - T5f; + ci[WS(rs, 1)] = T5h - T5o; + ci[WS(rs, 6)] = T5p + T5q; + cr[WS(rs, 8)] = T5h + T5o; + ci[WS(rs, 11)] = T5p - T5q; + } + { + E T6A, T6B, T6x, T6C, T6w; + T6A = FMA(KP587785252, T6y, KP951056516 * T6z); + T6B = FNMS(KP587785252, T6z, KP951056516 * T6y); + T6w = FNMS(KP250000000, T6v, T6u); + T6x = T6t - T6w; + T6C = T6t + T6w; + cr[WS(rs, 13)] = T6x - T6A; + ci[WS(rs, 16)] = T6B + T6C; + cr[WS(rs, 18)] = T6A + T6x; + cr[WS(rs, 23)] = T6B - T6C; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 25}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 25, "hf_25", twinstr, &GENUS, {260, 140, 140, 0} }; + +void X(codelet_hf_25) (planner *p) { + X(khc2hc_register) (p, hf_25, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:09 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 3 -dit -name hf_3 -include hf.h */ + +/* + * This function contains 16 FP additions, 14 FP multiplications, + * (or, 6 additions, 4 multiplications, 10 fused multiply/add), + * 21 stack variables, 2 constants, and 12 memory accesses + */ +#include "hf.h" + +static void hf_3(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(6, rs)) { + E T1, Tl, T9, Tc, Tb, Th, T7, Ti, Ta, Tj, Td; + T1 = cr[0]; + Tl = ci[0]; + { + E T3, T6, T2, T5, Tg, T4, T8; + T3 = cr[WS(rs, 1)]; + T6 = ci[WS(rs, 1)]; + T2 = W[0]; + T5 = W[1]; + T9 = cr[WS(rs, 2)]; + Tc = ci[WS(rs, 2)]; + Tg = T2 * T6; + T4 = T2 * T3; + T8 = W[2]; + Tb = W[3]; + Th = FNMS(T5, T3, Tg); + T7 = FMA(T5, T6, T4); + Ti = T8 * Tc; + Ta = T8 * T9; + } + Tj = FNMS(Tb, T9, Ti); + Td = FMA(Tb, Tc, Ta); + { + E Tk, Te, To, Tn, Tm, Tf; + Tk = Th - Tj; + Tm = Th + Tj; + Te = T7 + Td; + To = Td - T7; + ci[WS(rs, 2)] = Tm + Tl; + Tn = FNMS(KP500000000, Tm, Tl); + cr[0] = T1 + Te; + Tf = FNMS(KP500000000, Te, T1); + ci[WS(rs, 1)] = FMA(KP866025403, To, Tn); + cr[WS(rs, 2)] = FMS(KP866025403, To, Tn); + cr[WS(rs, 1)] = FMA(KP866025403, Tk, Tf); + ci[0] = FNMS(KP866025403, Tk, Tf); + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 3, "hf_3", twinstr, &GENUS, {6, 4, 10, 0} }; + +void X(codelet_hf_3) (planner *p) { + X(khc2hc_register) (p, hf_3, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 3 -dit -name hf_3 -include hf.h */ + +/* + * This function contains 16 FP additions, 12 FP multiplications, + * (or, 10 additions, 6 multiplications, 6 fused multiply/add), + * 15 stack variables, 2 constants, and 12 memory accesses + */ +#include "hf.h" + +static void hf_3(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(6, rs)) { + E T1, Ti, T6, Te, Tb, Tf, Tc, Tj; + T1 = cr[0]; + Ti = ci[0]; + { + E T3, T5, T2, T4; + T3 = cr[WS(rs, 1)]; + T5 = ci[WS(rs, 1)]; + T2 = W[0]; + T4 = W[1]; + T6 = FMA(T2, T3, T4 * T5); + Te = FNMS(T4, T3, T2 * T5); + } + { + E T8, Ta, T7, T9; + T8 = cr[WS(rs, 2)]; + Ta = ci[WS(rs, 2)]; + T7 = W[2]; + T9 = W[3]; + Tb = FMA(T7, T8, T9 * Ta); + Tf = FNMS(T9, T8, T7 * Ta); + } + Tc = T6 + Tb; + Tj = Te + Tf; + { + E Td, Tg, Th, Tk; + cr[0] = T1 + Tc; + Td = FNMS(KP500000000, Tc, T1); + Tg = KP866025403 * (Te - Tf); + ci[0] = Td - Tg; + cr[WS(rs, 1)] = Td + Tg; + ci[WS(rs, 2)] = Tj + Ti; + Th = KP866025403 * (Tb - T6); + Tk = FNMS(KP500000000, Tj, Ti); + cr[WS(rs, 2)] = Th - Tk; + ci[WS(rs, 1)] = Th + Tk; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 3}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 3, "hf_3", twinstr, &GENUS, {10, 6, 6, 0} }; + +void X(codelet_hf_3) (planner *p) { + X(khc2hc_register) (p, hf_3, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1769 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:10 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hf_32 -include hf.h */ + +/* + * This function contains 434 FP additions, 260 FP multiplications, + * (or, 236 additions, 62 multiplications, 198 fused multiply/add), + * 135 stack variables, 7 constants, and 128 memory accesses + */ +#include "hf.h" + +static void hf_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(64, rs)) { + E T6D, T6A; + { + E T8y, T87, T8, T3w, T83, T3B, T8x, Tl, T6G, Tz, T3J, T5T, T6F, TM, T3Q; + E T5U, T46, T5X, T7E, T6M, T5Y, T3Z, T6J, T1f, T7D, T6R, T61, T4e, T6O, T1G; + E T60, T4l, T54, T6c, T7d, T7N, T32, T76, T6f, T5r, T4v, T65, T72, T7I, T29; + E T6V, T68, T4S, T5t, T5b, T7O, T79, T7e, T3t, T5s, T5i, T4H, T2y, T4B, T6X; + E T2m, T4w, T4F, T2s; + { + E T44, T1d, T3X, T6K, T11, T40, T42, T17, T5h, T5c; + { + E Ta, Td, Tg, T3x, Tb, Tj, Tf, Tc, Ti; + { + E T1, T86, T3, T6, T2, T5; + T1 = cr[0]; + T86 = ci[0]; + T3 = cr[WS(rs, 16)]; + T6 = ci[WS(rs, 16)]; + T2 = W[30]; + T5 = W[31]; + { + E T84, T4, T9, T85, T7; + Ta = cr[WS(rs, 8)]; + Td = ci[WS(rs, 8)]; + T84 = T2 * T6; + T4 = T2 * T3; + T9 = W[14]; + Tg = cr[WS(rs, 24)]; + T85 = FNMS(T5, T3, T84); + T7 = FMA(T5, T6, T4); + T3x = T9 * Td; + Tb = T9 * Ta; + T8y = T86 - T85; + T87 = T85 + T86; + T8 = T1 + T7; + T3w = T1 - T7; + Tj = ci[WS(rs, 24)]; + Tf = W[46]; + } + Tc = W[15]; + Ti = W[47]; + } + { + E Tu, Tx, T3F, Ts, Tw, T3G, Tv; + { + E To, Tr, Tp, T3E, Tq, Tt; + { + E T3y, Te, T3A, Tk, T3z, Th, Tn; + To = cr[WS(rs, 4)]; + T3z = Tf * Tj; + Th = Tf * Tg; + T3y = FNMS(Tc, Ta, T3x); + Te = FMA(Tc, Td, Tb); + T3A = FNMS(Ti, Tg, T3z); + Tk = FMA(Ti, Tj, Th); + Tr = ci[WS(rs, 4)]; + Tn = W[6]; + T83 = T3y + T3A; + T3B = T3y - T3A; + T8x = Te - Tk; + Tl = Te + Tk; + Tp = Tn * To; + T3E = Tn * Tr; + } + Tq = W[7]; + Tu = cr[WS(rs, 20)]; + Tx = ci[WS(rs, 20)]; + Tt = W[38]; + T3F = FNMS(Tq, To, T3E); + Ts = FMA(Tq, Tr, Tp); + Tw = W[39]; + T3G = Tt * Tx; + Tv = Tt * Tu; + } + { + E T3M, TF, TH, TK, TG, TJ, TE, TD, TC; + { + E TB, T3H, Ty, TA, T3I, T3D, T3L; + TB = cr[WS(rs, 28)]; + TE = ci[WS(rs, 28)]; + T3H = FNMS(Tw, Tu, T3G); + Ty = FMA(Tw, Tx, Tv); + TA = W[54]; + TD = W[55]; + T6G = T3F + T3H; + T3I = T3F - T3H; + Tz = Ts + Ty; + T3D = Ts - Ty; + T3L = TA * TE; + TC = TA * TB; + T3J = T3D - T3I; + T5T = T3D + T3I; + T3M = FNMS(TD, TB, T3L); + } + TF = FMA(TD, TE, TC); + TH = cr[WS(rs, 12)]; + TK = ci[WS(rs, 12)]; + TG = W[22]; + TJ = W[23]; + { + E TU, T3U, T13, T16, T3W, T10, T12, T15, T41, T14; + { + E T19, T1c, T18, T1b, T3P, T3K; + { + E TQ, TT, T3N, TI, TP, TS; + TQ = cr[WS(rs, 2)]; + TT = ci[WS(rs, 2)]; + T3N = TG * TK; + TI = TG * TH; + TP = W[2]; + TS = W[3]; + { + E T3O, TL, T3T, TR; + T3O = FNMS(TJ, TH, T3N); + TL = FMA(TJ, TK, TI); + T3T = TP * TT; + TR = TP * TQ; + T6F = T3M + T3O; + T3P = T3M - T3O; + TM = TF + TL; + T3K = TF - TL; + TU = FMA(TS, TT, TR); + T3U = FNMS(TS, TQ, T3T); + } + } + T3Q = T3K + T3P; + T5U = T3K - T3P; + T19 = cr[WS(rs, 26)]; + T1c = ci[WS(rs, 26)]; + T18 = W[50]; + T1b = W[51]; + { + E TW, TZ, TY, T3V, TX, T43, T1a, TV; + TW = cr[WS(rs, 18)]; + TZ = ci[WS(rs, 18)]; + T43 = T18 * T1c; + T1a = T18 * T19; + TV = W[34]; + TY = W[35]; + T44 = FNMS(T1b, T19, T43); + T1d = FMA(T1b, T1c, T1a); + T3V = TV * TZ; + TX = TV * TW; + T13 = cr[WS(rs, 10)]; + T16 = ci[WS(rs, 10)]; + T3W = FNMS(TY, TW, T3V); + T10 = FMA(TY, TZ, TX); + T12 = W[18]; + T15 = W[19]; + } + } + T3X = T3U - T3W; + T6K = T3U + T3W; + T11 = TU + T10; + T40 = TU - T10; + T41 = T12 * T16; + T14 = T12 * T13; + T42 = FNMS(T15, T13, T41); + T17 = FMA(T15, T16, T14); + } + } + } + } + { + E T49, T1l, T4j, T1E, T1u, T1x, T1w, T4b, T1r, T4g, T1v; + { + E T1A, T1D, T1C, T4i, T1B; + { + E T1h, T1k, T1g, T1j, T48, T1i, T1z; + T1h = cr[WS(rs, 30)]; + T1k = ci[WS(rs, 30)]; + { + E T6L, T45, T1e, T3Y; + T6L = T42 + T44; + T45 = T42 - T44; + T1e = T17 + T1d; + T3Y = T17 - T1d; + T46 = T40 - T45; + T5X = T40 + T45; + T7E = T6K + T6L; + T6M = T6K - T6L; + T5Y = T3X - T3Y; + T3Z = T3X + T3Y; + T6J = T11 - T1e; + T1f = T11 + T1e; + T1g = W[58]; + } + T1j = W[59]; + T1A = cr[WS(rs, 22)]; + T1D = ci[WS(rs, 22)]; + T48 = T1g * T1k; + T1i = T1g * T1h; + T1z = W[42]; + T1C = W[43]; + T49 = FNMS(T1j, T1h, T48); + T1l = FMA(T1j, T1k, T1i); + T4i = T1z * T1D; + T1B = T1z * T1A; + } + { + E T1n, T1q, T1m, T1p, T4a, T1o, T1t; + T1n = cr[WS(rs, 14)]; + T1q = ci[WS(rs, 14)]; + T4j = FNMS(T1C, T1A, T4i); + T1E = FMA(T1C, T1D, T1B); + T1m = W[26]; + T1p = W[27]; + T1u = cr[WS(rs, 6)]; + T1x = ci[WS(rs, 6)]; + T4a = T1m * T1q; + T1o = T1m * T1n; + T1t = W[10]; + T1w = W[11]; + T4b = FNMS(T1p, T1n, T4a); + T1r = FMA(T1p, T1q, T1o); + T4g = T1t * T1x; + T1v = T1t * T1u; + } + } + { + E T4c, T6P, T1s, T4f, T4h, T1y; + T4c = T49 - T4b; + T6P = T49 + T4b; + T1s = T1l + T1r; + T4f = T1l - T1r; + T4h = FNMS(T1w, T1u, T4g); + T1y = FMA(T1w, T1x, T1v); + { + E T4k, T6Q, T4d, T1F; + T4k = T4h - T4j; + T6Q = T4h + T4j; + T4d = T1y - T1E; + T1F = T1y + T1E; + T7D = T6P + T6Q; + T6R = T6P - T6Q; + T61 = T4c - T4d; + T4e = T4c + T4d; + T6O = T1s - T1F; + T1G = T1s + T1F; + T60 = T4f + T4k; + T4l = T4f - T4k; + } + } + } + { + E T5n, T2H, T52, T30, T2Q, T2T, T2S, T5p, T2N, T4Z, T2R; + { + E T2W, T2Z, T2Y, T51, T2X; + { + E T2D, T2G, T2C, T2F, T5m, T2E, T2V; + T2D = cr[WS(rs, 31)]; + T2G = ci[WS(rs, 31)]; + T2C = W[60]; + T2F = W[61]; + T2W = cr[WS(rs, 23)]; + T2Z = ci[WS(rs, 23)]; + T5m = T2C * T2G; + T2E = T2C * T2D; + T2V = W[44]; + T2Y = W[45]; + T5n = FNMS(T2F, T2D, T5m); + T2H = FMA(T2F, T2G, T2E); + T51 = T2V * T2Z; + T2X = T2V * T2W; + } + { + E T2J, T2M, T2I, T2L, T5o, T2K, T2P; + T2J = cr[WS(rs, 15)]; + T2M = ci[WS(rs, 15)]; + T52 = FNMS(T2Y, T2W, T51); + T30 = FMA(T2Y, T2Z, T2X); + T2I = W[28]; + T2L = W[29]; + T2Q = cr[WS(rs, 7)]; + T2T = ci[WS(rs, 7)]; + T5o = T2I * T2M; + T2K = T2I * T2J; + T2P = W[12]; + T2S = W[13]; + T5p = FNMS(T2L, T2J, T5o); + T2N = FMA(T2L, T2M, T2K); + T4Z = T2P * T2T; + T2R = T2P * T2Q; + } + } + { + E T5q, T7b, T2O, T4Y, T50, T2U; + T5q = T5n - T5p; + T7b = T5n + T5p; + T2O = T2H + T2N; + T4Y = T2H - T2N; + T50 = FNMS(T2S, T2Q, T4Z); + T2U = FMA(T2S, T2T, T2R); + { + E T7c, T53, T31, T5l; + T7c = T50 + T52; + T53 = T50 - T52; + T31 = T2U + T30; + T5l = T30 - T2U; + T54 = T4Y - T53; + T6c = T4Y + T53; + T7d = T7b - T7c; + T7N = T7b + T7c; + T32 = T2O + T31; + T76 = T2O - T31; + T6f = T5q + T5l; + T5r = T5l - T5q; + } + } + } + { + E T4N, T1O, T4t, T27, T1X, T20, T1Z, T4P, T1U, T4q, T1Y; + { + E T23, T26, T25, T4s, T24; + { + E T1K, T1N, T1J, T1M, T4M, T1L, T22; + T1K = cr[WS(rs, 1)]; + T1N = ci[WS(rs, 1)]; + T1J = W[0]; + T1M = W[1]; + T23 = cr[WS(rs, 25)]; + T26 = ci[WS(rs, 25)]; + T4M = T1J * T1N; + T1L = T1J * T1K; + T22 = W[48]; + T25 = W[49]; + T4N = FNMS(T1M, T1K, T4M); + T1O = FMA(T1M, T1N, T1L); + T4s = T22 * T26; + T24 = T22 * T23; + } + { + E T1Q, T1T, T1P, T1S, T4O, T1R, T1W; + T1Q = cr[WS(rs, 17)]; + T1T = ci[WS(rs, 17)]; + T4t = FNMS(T25, T23, T4s); + T27 = FMA(T25, T26, T24); + T1P = W[32]; + T1S = W[33]; + T1X = cr[WS(rs, 9)]; + T20 = ci[WS(rs, 9)]; + T4O = T1P * T1T; + T1R = T1P * T1Q; + T1W = W[16]; + T1Z = W[17]; + T4P = FNMS(T1S, T1Q, T4O); + T1U = FMA(T1S, T1T, T1R); + T4q = T1W * T20; + T1Y = T1W * T1X; + } + } + { + E T4Q, T70, T1V, T4p, T4r, T21; + T4Q = T4N - T4P; + T70 = T4N + T4P; + T1V = T1O + T1U; + T4p = T1O - T1U; + T4r = FNMS(T1Z, T1X, T4q); + T21 = FMA(T1Z, T20, T1Y); + { + E T71, T4u, T4R, T28; + T71 = T4r + T4t; + T4u = T4r - T4t; + T4R = T21 - T27; + T28 = T21 + T27; + T4v = T4p - T4u; + T65 = T4p + T4u; + T72 = T70 - T71; + T7I = T70 + T71; + T29 = T1V + T28; + T6V = T1V - T28; + T68 = T4Q - T4R; + T4S = T4Q + T4R; + } + } + } + { + E T57, T38, T5g, T3r, T3h, T3k, T3j, T59, T3e, T5d, T3i; + { + E T3n, T3q, T3p, T5f, T3o; + { + E T34, T37, T33, T36, T56, T35, T3m; + T34 = cr[WS(rs, 3)]; + T37 = ci[WS(rs, 3)]; + T33 = W[4]; + T36 = W[5]; + T3n = cr[WS(rs, 11)]; + T3q = ci[WS(rs, 11)]; + T56 = T33 * T37; + T35 = T33 * T34; + T3m = W[20]; + T3p = W[21]; + T57 = FNMS(T36, T34, T56); + T38 = FMA(T36, T37, T35); + T5f = T3m * T3q; + T3o = T3m * T3n; + } + { + E T3a, T3d, T39, T3c, T58, T3b, T3g; + T3a = cr[WS(rs, 19)]; + T3d = ci[WS(rs, 19)]; + T5g = FNMS(T3p, T3n, T5f); + T3r = FMA(T3p, T3q, T3o); + T39 = W[36]; + T3c = W[37]; + T3h = cr[WS(rs, 27)]; + T3k = ci[WS(rs, 27)]; + T58 = T39 * T3d; + T3b = T39 * T3a; + T3g = W[52]; + T3j = W[53]; + T59 = FNMS(T3c, T3a, T58); + T3e = FMA(T3c, T3d, T3b); + T5d = T3g * T3k; + T3i = T3g * T3h; + } + } + { + E T5a, T78, T3f, T55, T5e, T3l, T77, T3s; + T5a = T57 - T59; + T78 = T57 + T59; + T3f = T38 + T3e; + T55 = T38 - T3e; + T5e = FNMS(T3j, T3h, T5d); + T3l = FMA(T3j, T3k, T3i); + T5h = T5e - T5g; + T77 = T5e + T5g; + T3s = T3l + T3r; + T5c = T3l - T3r; + T5t = T55 + T5a; + T5b = T55 - T5a; + T7O = T78 + T77; + T79 = T77 - T78; + T7e = T3s - T3f; + T3t = T3f + T3s; + } + } + { + E T4y, T2f, T2o, T2r, T4A, T2l, T2n, T2q, T4E, T2p; + { + E T2u, T2x, T2t, T2w; + { + E T2b, T2e, T2d, T4x, T2c, T2a; + T2b = cr[WS(rs, 5)]; + T2e = ci[WS(rs, 5)]; + T2a = W[8]; + T5s = T5c - T5h; + T5i = T5c + T5h; + T2d = W[9]; + T4x = T2a * T2e; + T2c = T2a * T2b; + T2u = cr[WS(rs, 13)]; + T2x = ci[WS(rs, 13)]; + T4y = FNMS(T2d, T2b, T4x); + T2f = FMA(T2d, T2e, T2c); + T2t = W[24]; + T2w = W[25]; + } + { + E T2h, T2k, T2j, T4z, T2i, T4G, T2v, T2g; + T2h = cr[WS(rs, 21)]; + T2k = ci[WS(rs, 21)]; + T4G = T2t * T2x; + T2v = T2t * T2u; + T2g = W[40]; + T2j = W[41]; + T4H = FNMS(T2w, T2u, T4G); + T2y = FMA(T2w, T2x, T2v); + T4z = T2g * T2k; + T2i = T2g * T2h; + T2o = cr[WS(rs, 29)]; + T2r = ci[WS(rs, 29)]; + T4A = FNMS(T2j, T2h, T4z); + T2l = FMA(T2j, T2k, T2i); + T2n = W[56]; + T2q = W[57]; + } + } + T4B = T4y - T4A; + T6X = T4y + T4A; + T2m = T2f + T2l; + T4w = T2f - T2l; + T4E = T2n * T2r; + T2p = T2n * T2o; + T4F = FNMS(T2q, T2o, T4E); + T2s = FMA(T2q, T2r, T2p); + } + } + { + E T6E, T8j, T6Y, T73, T6H, T8k, T5S, T8O, T8N, T5V, T6g, T6d, T69, T66, T5O; + E T5R; + { + E T4T, T4C, T4J, T4U, T7S, T7V; + { + E T7C, TO, T80, T7Z, T8e, T89, T8d, T1H, T8b, T3v, T7T, T7L, T7U, T7Q, T2A; + E T7P, T7K, T7W, T1I; + { + E T7X, T7Y, T7J, T82, T88; + { + E Tm, T4I, T6W, T4D, T2z, TN; + T6E = T8 - Tl; + Tm = T8 + Tl; + T4T = T4w + T4B; + T4C = T4w - T4B; + T4I = T4F - T4H; + T6W = T4F + T4H; + T4D = T2s - T2y; + T2z = T2s + T2y; + TN = Tz + TM; + T8j = Tz - TM; + T6Y = T6W - T6X; + T7J = T6X + T6W; + T4J = T4D + T4I; + T4U = T4I - T4D; + T2A = T2m + T2z; + T73 = T2m - T2z; + T7C = Tm - TN; + TO = Tm + TN; + } + T7P = T7N - T7O; + T7X = T7N + T7O; + T7Y = T7I + T7J; + T7K = T7I - T7J; + T6H = T6F - T6G; + T82 = T6G + T6F; + T88 = T83 + T87; + T8k = T87 - T83; + T80 = T7Y + T7X; + T7Z = T7X - T7Y; + T8e = T88 - T82; + T89 = T82 + T88; + } + { + E T7H, T7M, T2B, T3u; + T7H = T29 - T2A; + T2B = T29 + T2A; + T3u = T32 + T3t; + T7M = T32 - T3t; + T8d = T1f - T1G; + T1H = T1f + T1G; + T8b = T3u - T2B; + T3v = T2B + T3u; + T7T = T7H - T7K; + T7L = T7H + T7K; + T7U = T7M + T7P; + T7Q = T7M - T7P; + } + T7W = TO - T1H; + T1I = TO + T1H; + { + E T8g, T8h, T8f, T8i; + { + E T7R, T8c, T8a, T7G, T81, T7F; + T8g = T7Q - T7L; + T7R = T7L + T7Q; + T81 = T7E + T7D; + T7F = T7D - T7E; + cr[0] = T1I + T3v; + ci[WS(rs, 15)] = T1I - T3v; + ci[WS(rs, 7)] = T7W + T7Z; + cr[WS(rs, 8)] = T7W - T7Z; + T8c = T89 - T81; + T8a = T81 + T89; + T7G = T7C - T7F; + T7S = T7C + T7F; + T8h = T8e - T8d; + T8f = T8d + T8e; + ci[WS(rs, 23)] = T8b + T8c; + cr[WS(rs, 24)] = T8b - T8c; + ci[WS(rs, 31)] = T80 + T8a; + cr[WS(rs, 16)] = T80 - T8a; + cr[WS(rs, 4)] = FMA(KP707106781, T7R, T7G); + ci[WS(rs, 11)] = FNMS(KP707106781, T7R, T7G); + } + T8i = T7U - T7T; + T7V = T7T + T7U; + ci[WS(rs, 19)] = FMA(KP707106781, T8g, T8f); + cr[WS(rs, 28)] = FMS(KP707106781, T8g, T8f); + ci[WS(rs, 27)] = FMA(KP707106781, T8i, T8h); + cr[WS(rs, 20)] = FMS(KP707106781, T8i, T8h); + } + } + { + E T5C, T3S, T8C, T4n, T8H, T8B, T8I, T5F, T4L, T5H, T5M, T5Q, T5A, T5w, T4V; + { + E T5D, T47, T4m, T5E, T8z, T8A, T3C, T3R, T5j, T5u; + T5S = T3w + T3B; + T3C = T3w - T3B; + T3R = T3J + T3Q; + T8O = T3Q - T3J; + T5D = FNMS(KP414213562, T3Z, T46); + T47 = FMA(KP414213562, T46, T3Z); + ci[WS(rs, 3)] = FMA(KP707106781, T7V, T7S); + cr[WS(rs, 12)] = FNMS(KP707106781, T7V, T7S); + T5C = FMA(KP707106781, T3R, T3C); + T3S = FNMS(KP707106781, T3R, T3C); + T4m = FNMS(KP414213562, T4l, T4e); + T5E = FMA(KP414213562, T4e, T4l); + T8N = T8y - T8x; + T8z = T8x + T8y; + T8A = T5T - T5U; + T5V = T5T + T5U; + T8C = T47 + T4m; + T4n = T47 - T4m; + T8H = FNMS(KP707106781, T8A, T8z); + T8B = FMA(KP707106781, T8A, T8z); + T6g = T5i - T5b; + T5j = T5b + T5i; + T5u = T5s - T5t; + T6d = T5t + T5s; + { + E T5K, T5k, T5L, T5v, T4K; + T69 = T4J - T4C; + T4K = T4C + T4J; + T8I = T5E - T5D; + T5F = T5D + T5E; + T5K = FMA(KP707106781, T5j, T54); + T5k = FNMS(KP707106781, T5j, T54); + T5L = FMA(KP707106781, T5u, T5r); + T5v = FNMS(KP707106781, T5u, T5r); + T4L = FNMS(KP707106781, T4K, T4v); + T5H = FMA(KP707106781, T4K, T4v); + T5M = FNMS(KP198912367, T5L, T5K); + T5Q = FMA(KP198912367, T5K, T5L); + T5A = FNMS(KP668178637, T5k, T5v); + T5w = FMA(KP668178637, T5v, T5k); + T4V = T4T + T4U; + T66 = T4T - T4U; + } + } + { + E T5y, T4o, T8J, T8L, T5I, T4W; + T5y = FNMS(KP923879532, T4n, T3S); + T4o = FMA(KP923879532, T4n, T3S); + T8J = FMA(KP923879532, T8I, T8H); + T8L = FNMS(KP923879532, T8I, T8H); + T5I = FMA(KP707106781, T4V, T4S); + T4W = FNMS(KP707106781, T4V, T4S); + { + E T8G, T8F, T8D, T8E; + { + E T5G, T5P, T5z, T4X, T5N, T5J; + T5O = FNMS(KP923879532, T5F, T5C); + T5G = FMA(KP923879532, T5F, T5C); + T5J = FNMS(KP198912367, T5I, T5H); + T5P = FMA(KP198912367, T5H, T5I); + T5z = FNMS(KP668178637, T4L, T4W); + T4X = FMA(KP668178637, T4W, T4L); + T5N = T5J + T5M; + T8G = T5M - T5J; + T8F = FNMS(KP923879532, T8C, T8B); + T8D = FMA(KP923879532, T8C, T8B); + { + E T5B, T8K, T8M, T5x; + T5B = T5z + T5A; + T8K = T5z - T5A; + T8M = T5w - T4X; + T5x = T4X + T5w; + ci[0] = FMA(KP980785280, T5N, T5G); + cr[WS(rs, 15)] = FNMS(KP980785280, T5N, T5G); + ci[WS(rs, 4)] = FNMS(KP831469612, T5B, T5y); + cr[WS(rs, 11)] = FMA(KP831469612, T5B, T5y); + ci[WS(rs, 28)] = FMA(KP831469612, T8K, T8J); + cr[WS(rs, 19)] = FMS(KP831469612, T8K, T8J); + ci[WS(rs, 20)] = FMA(KP831469612, T8M, T8L); + cr[WS(rs, 27)] = FMS(KP831469612, T8M, T8L); + cr[WS(rs, 3)] = FMA(KP831469612, T5x, T4o); + ci[WS(rs, 12)] = FNMS(KP831469612, T5x, T4o); + T8E = T5Q - T5P; + T5R = T5P + T5Q; + } + } + ci[WS(rs, 16)] = FMA(KP980785280, T8E, T8D); + cr[WS(rs, 31)] = FMS(KP980785280, T8E, T8D); + ci[WS(rs, 24)] = FMA(KP980785280, T8G, T8F); + cr[WS(rs, 23)] = FMS(KP980785280, T8G, T8F); + } + } + } + } + { + E T7y, T8q, T8p, T7B; + { + E T7a, T7m, T6I, T7f, T7A, T7w, T8r, T8l, T8m, T6T, T7k, T75, T8s, T7p, T7z; + E T7t; + { + E T7n, T6N, T6S, T7o, T7u, T7v; + T7a = T76 - T79; + T7u = T76 + T79; + cr[WS(rs, 7)] = FMA(KP980785280, T5R, T5O); + ci[WS(rs, 8)] = FNMS(KP980785280, T5R, T5O); + T7m = T6E + T6H; + T6I = T6E - T6H; + T7v = T7e - T7d; + T7f = T7d + T7e; + T7n = T6J - T6M; + T6N = T6J + T6M; + T7A = FMA(KP414213562, T7u, T7v); + T7w = FNMS(KP414213562, T7v, T7u); + T8r = T8k - T8j; + T8l = T8j + T8k; + T6S = T6O - T6R; + T7o = T6O + T6R; + { + E T7r, T7s, T6Z, T74; + T7r = T6V + T6Y; + T6Z = T6V - T6Y; + T74 = T72 - T73; + T7s = T72 + T73; + T8m = T6N - T6S; + T6T = T6N + T6S; + T7k = FNMS(KP414213562, T6Z, T74); + T75 = FMA(KP414213562, T74, T6Z); + T8s = T7o - T7n; + T7p = T7n + T7o; + T7z = FMA(KP414213562, T7r, T7s); + T7t = FNMS(KP414213562, T7s, T7r); + } + } + { + E T7i, T6U, T8t, T8v, T7j, T7g; + T7i = FNMS(KP707106781, T6T, T6I); + T6U = FMA(KP707106781, T6T, T6I); + T8t = FMA(KP707106781, T8s, T8r); + T8v = FNMS(KP707106781, T8s, T8r); + T7j = FMA(KP414213562, T7a, T7f); + T7g = FNMS(KP414213562, T7f, T7a); + { + E T7q, T7x, T8n, T8o; + T7y = FNMS(KP707106781, T7p, T7m); + T7q = FMA(KP707106781, T7p, T7m); + { + E T7l, T8u, T8w, T7h; + T7l = T7j - T7k; + T8u = T7k + T7j; + T8w = T7g - T75; + T7h = T75 + T7g; + ci[WS(rs, 5)] = FMA(KP923879532, T7l, T7i); + cr[WS(rs, 10)] = FNMS(KP923879532, T7l, T7i); + ci[WS(rs, 29)] = FMA(KP923879532, T8u, T8t); + cr[WS(rs, 18)] = FMS(KP923879532, T8u, T8t); + ci[WS(rs, 21)] = FMA(KP923879532, T8w, T8v); + cr[WS(rs, 26)] = FMS(KP923879532, T8w, T8v); + cr[WS(rs, 2)] = FMA(KP923879532, T7h, T6U); + ci[WS(rs, 13)] = FNMS(KP923879532, T7h, T6U); + T7x = T7t + T7w; + T8q = T7w - T7t; + } + T8p = FNMS(KP707106781, T8m, T8l); + T8n = FMA(KP707106781, T8m, T8l); + T8o = T7A - T7z; + T7B = T7z + T7A; + ci[WS(rs, 1)] = FMA(KP923879532, T7x, T7q); + cr[WS(rs, 14)] = FNMS(KP923879532, T7x, T7q); + ci[WS(rs, 17)] = FMA(KP923879532, T8o, T8n); + cr[WS(rs, 30)] = FMS(KP923879532, T8o, T8n); + } + } + } + { + E T6o, T5W, T8W, T63, T8V, T8P, T8Q, T6r, T6e, T6w; + { + E T6q, T6p, T5Z, T62; + ci[WS(rs, 25)] = FMA(KP923879532, T8q, T8p); + cr[WS(rs, 22)] = FMS(KP923879532, T8q, T8p); + cr[WS(rs, 6)] = FMA(KP923879532, T7B, T7y); + ci[WS(rs, 9)] = FNMS(KP923879532, T7B, T7y); + T6q = FNMS(KP414213562, T5X, T5Y); + T5Z = FMA(KP414213562, T5Y, T5X); + T62 = FNMS(KP414213562, T61, T60); + T6p = FMA(KP414213562, T60, T61); + T6o = FNMS(KP707106781, T5V, T5S); + T5W = FMA(KP707106781, T5V, T5S); + T8W = T5Z - T62; + T63 = T5Z + T62; + T8V = FNMS(KP707106781, T8O, T8N); + T8P = FMA(KP707106781, T8O, T8N); + T8Q = T6q + T6p; + T6r = T6p - T6q; + T6e = FMA(KP707106781, T6d, T6c); + T6w = FNMS(KP707106781, T6d, T6c); + } + { + E T6k, T8U, T6z, T6n, T8S, T8T, T8R, T6s; + { + E T64, T6y, T6l, T6i, T6v, T6m, T6b, T8X, T8Z, T8Y, T6j, T90; + { + E T6C, T6B, T6x, T6h; + T6k = FNMS(KP923879532, T63, T5W); + T64 = FMA(KP923879532, T63, T5W); + T6x = FNMS(KP707106781, T6g, T6f); + T6h = FMA(KP707106781, T6g, T6f); + { + E T6t, T67, T6u, T6a; + T6t = FNMS(KP707106781, T66, T65); + T67 = FMA(KP707106781, T66, T65); + T6u = FNMS(KP707106781, T69, T68); + T6a = FMA(KP707106781, T69, T68); + T6y = FMA(KP668178637, T6x, T6w); + T6C = FNMS(KP668178637, T6w, T6x); + T6l = FMA(KP198912367, T6e, T6h); + T6i = FNMS(KP198912367, T6h, T6e); + T6v = FNMS(KP668178637, T6u, T6t); + T6B = FMA(KP668178637, T6t, T6u); + T6m = FNMS(KP198912367, T67, T6a); + T6b = FMA(KP198912367, T6a, T67); + } + T8X = FMA(KP923879532, T8W, T8V); + T8Z = FNMS(KP923879532, T8W, T8V); + T6D = T6B - T6C; + T8Y = T6B + T6C; + } + T8U = T6i - T6b; + T6j = T6b + T6i; + T90 = T6y - T6v; + T6z = T6v + T6y; + ci[WS(rs, 18)] = FNMS(KP831469612, T8Y, T8X); + cr[WS(rs, 29)] = -(FMA(KP831469612, T8Y, T8X)); + cr[WS(rs, 1)] = FMA(KP980785280, T6j, T64); + ci[WS(rs, 14)] = FNMS(KP980785280, T6j, T64); + cr[WS(rs, 21)] = FMS(KP831469612, T90, T8Z); + ci[WS(rs, 26)] = FMA(KP831469612, T90, T8Z); + T6n = T6l - T6m; + T8S = T6m + T6l; + } + T6A = FNMS(KP923879532, T6r, T6o); + T6s = FMA(KP923879532, T6r, T6o); + T8T = FNMS(KP923879532, T8Q, T8P); + T8R = FMA(KP923879532, T8Q, T8P); + ci[WS(rs, 6)] = FMA(KP980785280, T6n, T6k); + cr[WS(rs, 9)] = FNMS(KP980785280, T6n, T6k); + ci[WS(rs, 2)] = FMA(KP831469612, T6z, T6s); + cr[WS(rs, 13)] = FNMS(KP831469612, T6z, T6s); + ci[WS(rs, 30)] = FMA(KP980785280, T8S, T8R); + cr[WS(rs, 17)] = FMS(KP980785280, T8S, T8R); + ci[WS(rs, 22)] = FMA(KP980785280, T8U, T8T); + cr[WS(rs, 25)] = FMS(KP980785280, T8U, T8T); + } + } + } + } + } + cr[WS(rs, 5)] = FMA(KP831469612, T6D, T6A); + ci[WS(rs, 10)] = FNMS(KP831469612, T6D, T6A); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 32}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 32, "hf_32", twinstr, &GENUS, {236, 62, 198, 0} }; + +void X(codelet_hf_32) (planner *p) { + X(khc2hc_register) (p, hf_32, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hf_32 -include hf.h */ + +/* + * This function contains 434 FP additions, 208 FP multiplications, + * (or, 340 additions, 114 multiplications, 94 fused multiply/add), + * 96 stack variables, 7 constants, and 128 memory accesses + */ +#include "hf.h" + +static void hf_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(64, rs)) { + E Tj, T5F, T7C, T7Q, T35, T4T, T78, T7m, T1Q, T61, T5Y, T6J, T3K, T56, T41; + E T59, T2B, T67, T6e, T6O, T4b, T5g, T4s, T5d, TG, T7l, T5I, T73, T3a, T4U; + E T3f, T4V, T14, T5K, T5N, T6F, T3m, T4Z, T3r, T4Y, T1r, T5P, T5S, T6E, T3x; + E T52, T3C, T51, T2d, T5Z, T64, T6K, T3V, T5a, T44, T57, T2Y, T6f, T6a, T6P; + E T4m, T5e, T4v, T5h; + { + E T1, T76, T6, T75, Tc, T32, Th, T33; + T1 = cr[0]; + T76 = ci[0]; + { + E T3, T5, T2, T4; + T3 = cr[WS(rs, 16)]; + T5 = ci[WS(rs, 16)]; + T2 = W[30]; + T4 = W[31]; + T6 = FMA(T2, T3, T4 * T5); + T75 = FNMS(T4, T3, T2 * T5); + } + { + E T9, Tb, T8, Ta; + T9 = cr[WS(rs, 8)]; + Tb = ci[WS(rs, 8)]; + T8 = W[14]; + Ta = W[15]; + Tc = FMA(T8, T9, Ta * Tb); + T32 = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = cr[WS(rs, 24)]; + Tg = ci[WS(rs, 24)]; + Td = W[46]; + Tf = W[47]; + Th = FMA(Td, Te, Tf * Tg); + T33 = FNMS(Tf, Te, Td * Tg); + } + { + E T7, Ti, T7A, T7B; + T7 = T1 + T6; + Ti = Tc + Th; + Tj = T7 + Ti; + T5F = T7 - Ti; + T7A = Tc - Th; + T7B = T76 - T75; + T7C = T7A + T7B; + T7Q = T7B - T7A; + } + { + E T31, T34, T74, T77; + T31 = T1 - T6; + T34 = T32 - T33; + T35 = T31 + T34; + T4T = T31 - T34; + T74 = T32 + T33; + T77 = T75 + T76; + T78 = T74 + T77; + T7m = T77 - T74; + } + } + { + E T1y, T3X, T1O, T3I, T1D, T3Y, T1J, T3H; + { + E T1v, T1x, T1u, T1w; + T1v = cr[WS(rs, 1)]; + T1x = ci[WS(rs, 1)]; + T1u = W[0]; + T1w = W[1]; + T1y = FMA(T1u, T1v, T1w * T1x); + T3X = FNMS(T1w, T1v, T1u * T1x); + } + { + E T1L, T1N, T1K, T1M; + T1L = cr[WS(rs, 25)]; + T1N = ci[WS(rs, 25)]; + T1K = W[48]; + T1M = W[49]; + T1O = FMA(T1K, T1L, T1M * T1N); + T3I = FNMS(T1M, T1L, T1K * T1N); + } + { + E T1A, T1C, T1z, T1B; + T1A = cr[WS(rs, 17)]; + T1C = ci[WS(rs, 17)]; + T1z = W[32]; + T1B = W[33]; + T1D = FMA(T1z, T1A, T1B * T1C); + T3Y = FNMS(T1B, T1A, T1z * T1C); + } + { + E T1G, T1I, T1F, T1H; + T1G = cr[WS(rs, 9)]; + T1I = ci[WS(rs, 9)]; + T1F = W[16]; + T1H = W[17]; + T1J = FMA(T1F, T1G, T1H * T1I); + T3H = FNMS(T1H, T1G, T1F * T1I); + } + { + E T1E, T1P, T5W, T5X; + T1E = T1y + T1D; + T1P = T1J + T1O; + T1Q = T1E + T1P; + T61 = T1E - T1P; + T5W = T3X + T3Y; + T5X = T3H + T3I; + T5Y = T5W - T5X; + T6J = T5W + T5X; + } + { + E T3G, T3J, T3Z, T40; + T3G = T1y - T1D; + T3J = T3H - T3I; + T3K = T3G + T3J; + T56 = T3G - T3J; + T3Z = T3X - T3Y; + T40 = T1J - T1O; + T41 = T3Z - T40; + T59 = T3Z + T40; + } + } + { + E T2j, T47, T2z, T4q, T2o, T48, T2u, T4p; + { + E T2g, T2i, T2f, T2h; + T2g = cr[WS(rs, 31)]; + T2i = ci[WS(rs, 31)]; + T2f = W[60]; + T2h = W[61]; + T2j = FMA(T2f, T2g, T2h * T2i); + T47 = FNMS(T2h, T2g, T2f * T2i); + } + { + E T2w, T2y, T2v, T2x; + T2w = cr[WS(rs, 23)]; + T2y = ci[WS(rs, 23)]; + T2v = W[44]; + T2x = W[45]; + T2z = FMA(T2v, T2w, T2x * T2y); + T4q = FNMS(T2x, T2w, T2v * T2y); + } + { + E T2l, T2n, T2k, T2m; + T2l = cr[WS(rs, 15)]; + T2n = ci[WS(rs, 15)]; + T2k = W[28]; + T2m = W[29]; + T2o = FMA(T2k, T2l, T2m * T2n); + T48 = FNMS(T2m, T2l, T2k * T2n); + } + { + E T2r, T2t, T2q, T2s; + T2r = cr[WS(rs, 7)]; + T2t = ci[WS(rs, 7)]; + T2q = W[12]; + T2s = W[13]; + T2u = FMA(T2q, T2r, T2s * T2t); + T4p = FNMS(T2s, T2r, T2q * T2t); + } + { + E T2p, T2A, T6c, T6d; + T2p = T2j + T2o; + T2A = T2u + T2z; + T2B = T2p + T2A; + T67 = T2p - T2A; + T6c = T47 + T48; + T6d = T4p + T4q; + T6e = T6c - T6d; + T6O = T6c + T6d; + } + { + E T49, T4a, T4o, T4r; + T49 = T47 - T48; + T4a = T2u - T2z; + T4b = T49 - T4a; + T5g = T49 + T4a; + T4o = T2j - T2o; + T4r = T4p - T4q; + T4s = T4o + T4r; + T5d = T4o - T4r; + } + } + { + E To, T37, TE, T3d, Tt, T38, Tz, T3c; + { + E Tl, Tn, Tk, Tm; + Tl = cr[WS(rs, 4)]; + Tn = ci[WS(rs, 4)]; + Tk = W[6]; + Tm = W[7]; + To = FMA(Tk, Tl, Tm * Tn); + T37 = FNMS(Tm, Tl, Tk * Tn); + } + { + E TB, TD, TA, TC; + TB = cr[WS(rs, 12)]; + TD = ci[WS(rs, 12)]; + TA = W[22]; + TC = W[23]; + TE = FMA(TA, TB, TC * TD); + T3d = FNMS(TC, TB, TA * TD); + } + { + E Tq, Ts, Tp, Tr; + Tq = cr[WS(rs, 20)]; + Ts = ci[WS(rs, 20)]; + Tp = W[38]; + Tr = W[39]; + Tt = FMA(Tp, Tq, Tr * Ts); + T38 = FNMS(Tr, Tq, Tp * Ts); + } + { + E Tw, Ty, Tv, Tx; + Tw = cr[WS(rs, 28)]; + Ty = ci[WS(rs, 28)]; + Tv = W[54]; + Tx = W[55]; + Tz = FMA(Tv, Tw, Tx * Ty); + T3c = FNMS(Tx, Tw, Tv * Ty); + } + { + E Tu, TF, T5G, T5H; + Tu = To + Tt; + TF = Tz + TE; + TG = Tu + TF; + T7l = Tu - TF; + T5G = T3c + T3d; + T5H = T37 + T38; + T5I = T5G - T5H; + T73 = T5H + T5G; + } + { + E T36, T39, T3b, T3e; + T36 = To - Tt; + T39 = T37 - T38; + T3a = T36 + T39; + T4U = T36 - T39; + T3b = Tz - TE; + T3e = T3c - T3d; + T3f = T3b - T3e; + T4V = T3b + T3e; + } + } + { + E TM, T3n, T12, T3k, TR, T3o, TX, T3j; + { + E TJ, TL, TI, TK; + TJ = cr[WS(rs, 2)]; + TL = ci[WS(rs, 2)]; + TI = W[2]; + TK = W[3]; + TM = FMA(TI, TJ, TK * TL); + T3n = FNMS(TK, TJ, TI * TL); + } + { + E TZ, T11, TY, T10; + TZ = cr[WS(rs, 26)]; + T11 = ci[WS(rs, 26)]; + TY = W[50]; + T10 = W[51]; + T12 = FMA(TY, TZ, T10 * T11); + T3k = FNMS(T10, TZ, TY * T11); + } + { + E TO, TQ, TN, TP; + TO = cr[WS(rs, 18)]; + TQ = ci[WS(rs, 18)]; + TN = W[34]; + TP = W[35]; + TR = FMA(TN, TO, TP * TQ); + T3o = FNMS(TP, TO, TN * TQ); + } + { + E TU, TW, TT, TV; + TU = cr[WS(rs, 10)]; + TW = ci[WS(rs, 10)]; + TT = W[18]; + TV = W[19]; + TX = FMA(TT, TU, TV * TW); + T3j = FNMS(TV, TU, TT * TW); + } + { + E TS, T13, T5L, T5M; + TS = TM + TR; + T13 = TX + T12; + T14 = TS + T13; + T5K = TS - T13; + T5L = T3n + T3o; + T5M = T3j + T3k; + T5N = T5L - T5M; + T6F = T5L + T5M; + } + { + E T3i, T3l, T3p, T3q; + T3i = TM - TR; + T3l = T3j - T3k; + T3m = T3i + T3l; + T4Z = T3i - T3l; + T3p = T3n - T3o; + T3q = TX - T12; + T3r = T3p - T3q; + T4Y = T3p + T3q; + } + } + { + E T19, T3t, T1p, T3A, T1e, T3u, T1k, T3z; + { + E T16, T18, T15, T17; + T16 = cr[WS(rs, 30)]; + T18 = ci[WS(rs, 30)]; + T15 = W[58]; + T17 = W[59]; + T19 = FMA(T15, T16, T17 * T18); + T3t = FNMS(T17, T16, T15 * T18); + } + { + E T1m, T1o, T1l, T1n; + T1m = cr[WS(rs, 22)]; + T1o = ci[WS(rs, 22)]; + T1l = W[42]; + T1n = W[43]; + T1p = FMA(T1l, T1m, T1n * T1o); + T3A = FNMS(T1n, T1m, T1l * T1o); + } + { + E T1b, T1d, T1a, T1c; + T1b = cr[WS(rs, 14)]; + T1d = ci[WS(rs, 14)]; + T1a = W[26]; + T1c = W[27]; + T1e = FMA(T1a, T1b, T1c * T1d); + T3u = FNMS(T1c, T1b, T1a * T1d); + } + { + E T1h, T1j, T1g, T1i; + T1h = cr[WS(rs, 6)]; + T1j = ci[WS(rs, 6)]; + T1g = W[10]; + T1i = W[11]; + T1k = FMA(T1g, T1h, T1i * T1j); + T3z = FNMS(T1i, T1h, T1g * T1j); + } + { + E T1f, T1q, T5Q, T5R; + T1f = T19 + T1e; + T1q = T1k + T1p; + T1r = T1f + T1q; + T5P = T1f - T1q; + T5Q = T3t + T3u; + T5R = T3z + T3A; + T5S = T5Q - T5R; + T6E = T5Q + T5R; + } + { + E T3v, T3w, T3y, T3B; + T3v = T3t - T3u; + T3w = T1k - T1p; + T3x = T3v - T3w; + T52 = T3v + T3w; + T3y = T19 - T1e; + T3B = T3z - T3A; + T3C = T3y + T3B; + T51 = T3y - T3B; + } + } + { + E T1V, T3M, T20, T3N, T3L, T3O, T26, T3Q, T2b, T3R, T3S, T3T; + { + E T1S, T1U, T1R, T1T; + T1S = cr[WS(rs, 5)]; + T1U = ci[WS(rs, 5)]; + T1R = W[8]; + T1T = W[9]; + T1V = FMA(T1R, T1S, T1T * T1U); + T3M = FNMS(T1T, T1S, T1R * T1U); + } + { + E T1X, T1Z, T1W, T1Y; + T1X = cr[WS(rs, 21)]; + T1Z = ci[WS(rs, 21)]; + T1W = W[40]; + T1Y = W[41]; + T20 = FMA(T1W, T1X, T1Y * T1Z); + T3N = FNMS(T1Y, T1X, T1W * T1Z); + } + T3L = T1V - T20; + T3O = T3M - T3N; + { + E T23, T25, T22, T24; + T23 = cr[WS(rs, 29)]; + T25 = ci[WS(rs, 29)]; + T22 = W[56]; + T24 = W[57]; + T26 = FMA(T22, T23, T24 * T25); + T3Q = FNMS(T24, T23, T22 * T25); + } + { + E T28, T2a, T27, T29; + T28 = cr[WS(rs, 13)]; + T2a = ci[WS(rs, 13)]; + T27 = W[24]; + T29 = W[25]; + T2b = FMA(T27, T28, T29 * T2a); + T3R = FNMS(T29, T28, T27 * T2a); + } + T3S = T3Q - T3R; + T3T = T26 - T2b; + { + E T21, T2c, T62, T63; + T21 = T1V + T20; + T2c = T26 + T2b; + T2d = T21 + T2c; + T5Z = T21 - T2c; + T62 = T3Q + T3R; + T63 = T3M + T3N; + T64 = T62 - T63; + T6K = T63 + T62; + } + { + E T3P, T3U, T42, T43; + T3P = T3L + T3O; + T3U = T3S - T3T; + T3V = KP707106781 * (T3P - T3U); + T5a = KP707106781 * (T3P + T3U); + T42 = T3T + T3S; + T43 = T3L - T3O; + T44 = KP707106781 * (T42 - T43); + T57 = KP707106781 * (T43 + T42); + } + } + { + E T2G, T4i, T2L, T4j, T4h, T4k, T2R, T4d, T2W, T4e, T4c, T4f; + { + E T2D, T2F, T2C, T2E; + T2D = cr[WS(rs, 3)]; + T2F = ci[WS(rs, 3)]; + T2C = W[4]; + T2E = W[5]; + T2G = FMA(T2C, T2D, T2E * T2F); + T4i = FNMS(T2E, T2D, T2C * T2F); + } + { + E T2I, T2K, T2H, T2J; + T2I = cr[WS(rs, 19)]; + T2K = ci[WS(rs, 19)]; + T2H = W[36]; + T2J = W[37]; + T2L = FMA(T2H, T2I, T2J * T2K); + T4j = FNMS(T2J, T2I, T2H * T2K); + } + T4h = T2G - T2L; + T4k = T4i - T4j; + { + E T2O, T2Q, T2N, T2P; + T2O = cr[WS(rs, 27)]; + T2Q = ci[WS(rs, 27)]; + T2N = W[52]; + T2P = W[53]; + T2R = FMA(T2N, T2O, T2P * T2Q); + T4d = FNMS(T2P, T2O, T2N * T2Q); + } + { + E T2T, T2V, T2S, T2U; + T2T = cr[WS(rs, 11)]; + T2V = ci[WS(rs, 11)]; + T2S = W[20]; + T2U = W[21]; + T2W = FMA(T2S, T2T, T2U * T2V); + T4e = FNMS(T2U, T2T, T2S * T2V); + } + T4c = T2R - T2W; + T4f = T4d - T4e; + { + E T2M, T2X, T68, T69; + T2M = T2G + T2L; + T2X = T2R + T2W; + T2Y = T2M + T2X; + T6f = T2M - T2X; + T68 = T4d + T4e; + T69 = T4i + T4j; + T6a = T68 - T69; + T6P = T69 + T68; + } + { + E T4g, T4l, T4t, T4u; + T4g = T4c + T4f; + T4l = T4h - T4k; + T4m = KP707106781 * (T4g - T4l); + T5e = KP707106781 * (T4l + T4g); + T4t = T4h + T4k; + T4u = T4f - T4c; + T4v = KP707106781 * (T4t - T4u); + T5h = KP707106781 * (T4t + T4u); + } + } + { + E T1t, T6X, T7a, T7c, T30, T7b, T70, T71; + { + E TH, T1s, T72, T79; + TH = Tj + TG; + T1s = T14 + T1r; + T1t = TH + T1s; + T6X = TH - T1s; + T72 = T6F + T6E; + T79 = T73 + T78; + T7a = T72 + T79; + T7c = T79 - T72; + } + { + E T2e, T2Z, T6Y, T6Z; + T2e = T1Q + T2d; + T2Z = T2B + T2Y; + T30 = T2e + T2Z; + T7b = T2Z - T2e; + T6Y = T6O + T6P; + T6Z = T6J + T6K; + T70 = T6Y - T6Z; + T71 = T6Z + T6Y; + } + ci[WS(rs, 15)] = T1t - T30; + cr[WS(rs, 24)] = T7b - T7c; + ci[WS(rs, 23)] = T7b + T7c; + cr[0] = T1t + T30; + cr[WS(rs, 8)] = T6X - T70; + cr[WS(rs, 16)] = T71 - T7a; + ci[WS(rs, 31)] = T71 + T7a; + ci[WS(rs, 7)] = T6X + T70; + } + { + E T4X, T5p, T7D, T7J, T54, T7y, T5z, T5D, T5c, T5m, T5s, T7I, T5w, T5C, T5j; + E T5n, T4W, T7z; + T4W = KP707106781 * (T4U + T4V); + T4X = T4T - T4W; + T5p = T4T + T4W; + T7z = KP707106781 * (T3a - T3f); + T7D = T7z + T7C; + T7J = T7C - T7z; + { + E T50, T53, T5x, T5y; + T50 = FMA(KP923879532, T4Y, KP382683432 * T4Z); + T53 = FNMS(KP923879532, T52, KP382683432 * T51); + T54 = T50 + T53; + T7y = T50 - T53; + T5x = T5d + T5e; + T5y = T5g + T5h; + T5z = FNMS(KP980785280, T5y, KP195090322 * T5x); + T5D = FMA(KP980785280, T5x, KP195090322 * T5y); + } + { + E T58, T5b, T5q, T5r; + T58 = T56 - T57; + T5b = T59 - T5a; + T5c = FMA(KP831469612, T58, KP555570233 * T5b); + T5m = FNMS(KP831469612, T5b, KP555570233 * T58); + T5q = FNMS(KP382683432, T4Y, KP923879532 * T4Z); + T5r = FMA(KP382683432, T52, KP923879532 * T51); + T5s = T5q + T5r; + T7I = T5r - T5q; + } + { + E T5u, T5v, T5f, T5i; + T5u = T56 + T57; + T5v = T59 + T5a; + T5w = FMA(KP195090322, T5u, KP980785280 * T5v); + T5C = FNMS(KP195090322, T5v, KP980785280 * T5u); + T5f = T5d - T5e; + T5i = T5g - T5h; + T5j = FNMS(KP555570233, T5i, KP831469612 * T5f); + T5n = FMA(KP555570233, T5f, KP831469612 * T5i); + } + { + E T55, T5k, T7H, T7K; + T55 = T4X + T54; + T5k = T5c + T5j; + ci[WS(rs, 12)] = T55 - T5k; + cr[WS(rs, 3)] = T55 + T5k; + T7H = T5n - T5m; + T7K = T7I + T7J; + cr[WS(rs, 19)] = T7H - T7K; + ci[WS(rs, 28)] = T7H + T7K; + } + { + E T7L, T7M, T5l, T5o; + T7L = T5j - T5c; + T7M = T7J - T7I; + cr[WS(rs, 27)] = T7L - T7M; + ci[WS(rs, 20)] = T7L + T7M; + T5l = T4X - T54; + T5o = T5m + T5n; + cr[WS(rs, 11)] = T5l - T5o; + ci[WS(rs, 4)] = T5l + T5o; + } + { + E T5t, T5A, T7x, T7E; + T5t = T5p - T5s; + T5A = T5w + T5z; + ci[WS(rs, 8)] = T5t - T5A; + cr[WS(rs, 7)] = T5t + T5A; + T7x = T5z - T5w; + T7E = T7y + T7D; + cr[WS(rs, 31)] = T7x - T7E; + ci[WS(rs, 16)] = T7x + T7E; + } + { + E T7F, T7G, T5B, T5E; + T7F = T5D - T5C; + T7G = T7D - T7y; + cr[WS(rs, 23)] = T7F - T7G; + ci[WS(rs, 24)] = T7F + T7G; + T5B = T5p + T5s; + T5E = T5C + T5D; + cr[WS(rs, 15)] = T5B - T5E; + ci[0] = T5B + T5E; + } + } + { + E T6H, T6T, T7g, T7i, T6M, T6U, T6R, T6V; + { + E T6D, T6G, T7e, T7f; + T6D = Tj - TG; + T6G = T6E - T6F; + T6H = T6D - T6G; + T6T = T6D + T6G; + T7e = T14 - T1r; + T7f = T78 - T73; + T7g = T7e + T7f; + T7i = T7f - T7e; + } + { + E T6I, T6L, T6N, T6Q; + T6I = T1Q - T2d; + T6L = T6J - T6K; + T6M = T6I + T6L; + T6U = T6I - T6L; + T6N = T2B - T2Y; + T6Q = T6O - T6P; + T6R = T6N - T6Q; + T6V = T6N + T6Q; + } + { + E T6S, T7h, T6W, T7d; + T6S = KP707106781 * (T6M + T6R); + ci[WS(rs, 11)] = T6H - T6S; + cr[WS(rs, 4)] = T6H + T6S; + T7h = KP707106781 * (T6V - T6U); + cr[WS(rs, 20)] = T7h - T7i; + ci[WS(rs, 27)] = T7h + T7i; + T6W = KP707106781 * (T6U + T6V); + cr[WS(rs, 12)] = T6T - T6W; + ci[WS(rs, 3)] = T6T + T6W; + T7d = KP707106781 * (T6R - T6M); + cr[WS(rs, 28)] = T7d - T7g; + ci[WS(rs, 19)] = T7d + T7g; + } + } + { + E T5J, T7n, T7t, T6n, T5U, T7k, T6x, T6B, T6q, T7s, T66, T6k, T6u, T6A, T6h; + E T6l; + { + E T5O, T5T, T60, T65; + T5J = T5F - T5I; + T7n = T7l + T7m; + T7t = T7m - T7l; + T6n = T5F + T5I; + T5O = T5K + T5N; + T5T = T5P - T5S; + T5U = KP707106781 * (T5O + T5T); + T7k = KP707106781 * (T5O - T5T); + { + E T6v, T6w, T6o, T6p; + T6v = T6e + T6f; + T6w = T67 + T6a; + T6x = FMA(KP382683432, T6v, KP923879532 * T6w); + T6B = FNMS(KP923879532, T6v, KP382683432 * T6w); + T6o = T5K - T5N; + T6p = T5P + T5S; + T6q = KP707106781 * (T6o + T6p); + T7s = KP707106781 * (T6p - T6o); + } + T60 = T5Y - T5Z; + T65 = T61 - T64; + T66 = FMA(KP382683432, T60, KP923879532 * T65); + T6k = FNMS(KP923879532, T60, KP382683432 * T65); + { + E T6s, T6t, T6b, T6g; + T6s = T61 + T64; + T6t = T5Y + T5Z; + T6u = FNMS(KP382683432, T6t, KP923879532 * T6s); + T6A = FMA(KP923879532, T6t, KP382683432 * T6s); + T6b = T67 - T6a; + T6g = T6e - T6f; + T6h = FNMS(KP382683432, T6g, KP923879532 * T6b); + T6l = FMA(KP923879532, T6g, KP382683432 * T6b); + } + } + { + E T5V, T6i, T7r, T7u; + T5V = T5J + T5U; + T6i = T66 + T6h; + ci[WS(rs, 13)] = T5V - T6i; + cr[WS(rs, 2)] = T5V + T6i; + T7r = T6l - T6k; + T7u = T7s + T7t; + cr[WS(rs, 18)] = T7r - T7u; + ci[WS(rs, 29)] = T7r + T7u; + } + { + E T7v, T7w, T6j, T6m; + T7v = T6h - T66; + T7w = T7t - T7s; + cr[WS(rs, 26)] = T7v - T7w; + ci[WS(rs, 21)] = T7v + T7w; + T6j = T5J - T5U; + T6m = T6k + T6l; + cr[WS(rs, 10)] = T6j - T6m; + ci[WS(rs, 5)] = T6j + T6m; + } + { + E T6r, T6y, T7j, T7o; + T6r = T6n + T6q; + T6y = T6u + T6x; + cr[WS(rs, 14)] = T6r - T6y; + ci[WS(rs, 1)] = T6r + T6y; + T7j = T6B - T6A; + T7o = T7k + T7n; + cr[WS(rs, 30)] = T7j - T7o; + ci[WS(rs, 17)] = T7j + T7o; + } + { + E T7p, T7q, T6z, T6C; + T7p = T6x - T6u; + T7q = T7n - T7k; + cr[WS(rs, 22)] = T7p - T7q; + ci[WS(rs, 25)] = T7p + T7q; + T6z = T6n - T6q; + T6C = T6A + T6B; + ci[WS(rs, 9)] = T6z - T6C; + cr[WS(rs, 6)] = T6z + T6C; + } + } + { + E T3h, T4D, T7R, T7X, T3E, T7O, T4N, T4R, T46, T4A, T4G, T7W, T4K, T4Q, T4x; + E T4B, T3g, T7P; + T3g = KP707106781 * (T3a + T3f); + T3h = T35 - T3g; + T4D = T35 + T3g; + T7P = KP707106781 * (T4V - T4U); + T7R = T7P + T7Q; + T7X = T7Q - T7P; + { + E T3s, T3D, T4L, T4M; + T3s = FNMS(KP923879532, T3r, KP382683432 * T3m); + T3D = FMA(KP923879532, T3x, KP382683432 * T3C); + T3E = T3s + T3D; + T7O = T3D - T3s; + T4L = T4s + T4v; + T4M = T4b + T4m; + T4N = FNMS(KP195090322, T4M, KP980785280 * T4L); + T4R = FMA(KP980785280, T4M, KP195090322 * T4L); + } + { + E T3W, T45, T4E, T4F; + T3W = T3K - T3V; + T45 = T41 - T44; + T46 = FNMS(KP555570233, T45, KP831469612 * T3W); + T4A = FMA(KP831469612, T45, KP555570233 * T3W); + T4E = FMA(KP382683432, T3r, KP923879532 * T3m); + T4F = FNMS(KP382683432, T3x, KP923879532 * T3C); + T4G = T4E + T4F; + T7W = T4E - T4F; + } + { + E T4I, T4J, T4n, T4w; + T4I = T41 + T44; + T4J = T3K + T3V; + T4K = FMA(KP195090322, T4I, KP980785280 * T4J); + T4Q = FNMS(KP980785280, T4I, KP195090322 * T4J); + T4n = T4b - T4m; + T4w = T4s - T4v; + T4x = FMA(KP555570233, T4n, KP831469612 * T4w); + T4B = FNMS(KP831469612, T4n, KP555570233 * T4w); + } + { + E T3F, T4y, T7V, T7Y; + T3F = T3h + T3E; + T4y = T46 + T4x; + cr[WS(rs, 13)] = T3F - T4y; + ci[WS(rs, 2)] = T3F + T4y; + T7V = T4B - T4A; + T7Y = T7W + T7X; + cr[WS(rs, 29)] = T7V - T7Y; + ci[WS(rs, 18)] = T7V + T7Y; + } + { + E T7Z, T80, T4z, T4C; + T7Z = T4x - T46; + T80 = T7X - T7W; + cr[WS(rs, 21)] = T7Z - T80; + ci[WS(rs, 26)] = T7Z + T80; + T4z = T3h - T3E; + T4C = T4A + T4B; + ci[WS(rs, 10)] = T4z - T4C; + cr[WS(rs, 5)] = T4z + T4C; + } + { + E T4H, T4O, T7N, T7S; + T4H = T4D + T4G; + T4O = T4K + T4N; + ci[WS(rs, 14)] = T4H - T4O; + cr[WS(rs, 1)] = T4H + T4O; + T7N = T4R - T4Q; + T7S = T7O + T7R; + cr[WS(rs, 17)] = T7N - T7S; + ci[WS(rs, 30)] = T7N + T7S; + } + { + E T7T, T7U, T4P, T4S; + T7T = T4N - T4K; + T7U = T7R - T7O; + cr[WS(rs, 25)] = T7T - T7U; + ci[WS(rs, 22)] = T7T + T7U; + T4P = T4D - T4G; + T4S = T4Q + T4R; + cr[WS(rs, 9)] = T4P - T4S; + ci[WS(rs, 6)] = T4P + T4S; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 32}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 32, "hf_32", twinstr, &GENUS, {340, 114, 94, 0} }; + +void X(codelet_hf_32) (planner *p) { + X(khc2hc_register) (p, hf_32, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:09 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 4 -dit -name hf_4 -include hf.h */ + +/* + * This function contains 22 FP additions, 12 FP multiplications, + * (or, 16 additions, 6 multiplications, 6 fused multiply/add), + * 31 stack variables, 0 constants, and 16 memory accesses + */ +#include "hf.h" + +static void hf_4(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 6, MAKE_VOLATILE_STRIDE(8, rs)) { + E To, Te, Tm, T8, Ty, Tw, Tq, Tk; + { + E T1, Tv, Tu, T7, Tg, Tj, Tf, Ti, Tp, Th; + T1 = cr[0]; + Tv = ci[0]; + { + E T3, T6, T2, T5; + T3 = cr[WS(rs, 2)]; + T6 = ci[WS(rs, 2)]; + T2 = W[2]; + T5 = W[3]; + { + E Ta, Td, Tc, Tn, Tb, Tt, T4, T9; + Ta = cr[WS(rs, 1)]; + Td = ci[WS(rs, 1)]; + Tt = T2 * T6; + T4 = T2 * T3; + T9 = W[0]; + Tc = W[1]; + Tu = FNMS(T5, T3, Tt); + T7 = FMA(T5, T6, T4); + Tn = T9 * Td; + Tb = T9 * Ta; + Tg = cr[WS(rs, 3)]; + Tj = ci[WS(rs, 3)]; + To = FNMS(Tc, Ta, Tn); + Te = FMA(Tc, Td, Tb); + Tf = W[4]; + Ti = W[5]; + } + } + Tm = T1 - T7; + T8 = T1 + T7; + Tp = Tf * Tj; + Th = Tf * Tg; + Ty = Tv - Tu; + Tw = Tu + Tv; + Tq = FNMS(Ti, Tg, Tp); + Tk = FMA(Ti, Tj, Th); + } + { + E Tr, Ts, Tl, Tx; + Tr = To - Tq; + Ts = To + Tq; + Tl = Te + Tk; + Tx = Tk - Te; + ci[WS(rs, 3)] = Ts + Tw; + cr[WS(rs, 2)] = Ts - Tw; + cr[WS(rs, 1)] = Tm + Tr; + ci[0] = Tm - Tr; + ci[WS(rs, 2)] = Tx + Ty; + cr[WS(rs, 3)] = Tx - Ty; + cr[0] = T8 + Tl; + ci[WS(rs, 1)] = T8 - Tl; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 4}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 4, "hf_4", twinstr, &GENUS, {16, 6, 6, 0} }; + +void X(codelet_hf_4) (planner *p) { + X(khc2hc_register) (p, hf_4, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 4 -dit -name hf_4 -include hf.h */ + +/* + * This function contains 22 FP additions, 12 FP multiplications, + * (or, 16 additions, 6 multiplications, 6 fused multiply/add), + * 13 stack variables, 0 constants, and 16 memory accesses + */ +#include "hf.h" + +static void hf_4(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 6, MAKE_VOLATILE_STRIDE(8, rs)) { + E T1, Tp, T6, To, Tc, Tk, Th, Tl; + T1 = cr[0]; + Tp = ci[0]; + { + E T3, T5, T2, T4; + T3 = cr[WS(rs, 2)]; + T5 = ci[WS(rs, 2)]; + T2 = W[2]; + T4 = W[3]; + T6 = FMA(T2, T3, T4 * T5); + To = FNMS(T4, T3, T2 * T5); + } + { + E T9, Tb, T8, Ta; + T9 = cr[WS(rs, 1)]; + Tb = ci[WS(rs, 1)]; + T8 = W[0]; + Ta = W[1]; + Tc = FMA(T8, T9, Ta * Tb); + Tk = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = cr[WS(rs, 3)]; + Tg = ci[WS(rs, 3)]; + Td = W[4]; + Tf = W[5]; + Th = FMA(Td, Te, Tf * Tg); + Tl = FNMS(Tf, Te, Td * Tg); + } + { + E T7, Ti, Tj, Tm; + T7 = T1 + T6; + Ti = Tc + Th; + ci[WS(rs, 1)] = T7 - Ti; + cr[0] = T7 + Ti; + Tj = T1 - T6; + Tm = Tk - Tl; + ci[0] = Tj - Tm; + cr[WS(rs, 1)] = Tj + Tm; + } + { + E Tn, Tq, Tr, Ts; + Tn = Tk + Tl; + Tq = To + Tp; + cr[WS(rs, 2)] = Tn - Tq; + ci[WS(rs, 3)] = Tn + Tq; + Tr = Th - Tc; + Ts = Tp - To; + cr[WS(rs, 3)] = Tr - Ts; + ci[WS(rs, 2)] = Tr + Ts; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 4}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 4, "hf_4", twinstr, &GENUS, {16, 6, 6, 0} }; + +void X(codelet_hf_4) (planner *p) { + X(khc2hc_register) (p, hf_4, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:09 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 5 -dit -name hf_5 -include hf.h */ + +/* + * This function contains 40 FP additions, 34 FP multiplications, + * (or, 14 additions, 8 multiplications, 26 fused multiply/add), + * 43 stack variables, 4 constants, and 20 memory accesses + */ +#include "hf.h" + +static void hf_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(10, rs)) { + E T1, TJ, TK, TA, TR, Te, TC, Tk, TE, Tq; + { + E Tg, Tj, Tm, TB, Th, Tp, Tl, Ti, To, TD, Tn; + T1 = cr[0]; + TJ = ci[0]; + { + E T9, Tc, Ty, Ta, Tb, Tx, T7, Tf, Tz, Td; + { + E T3, T6, T8, Tw, T4, T2, T5; + T3 = cr[WS(rs, 1)]; + T6 = ci[WS(rs, 1)]; + T2 = W[0]; + T9 = cr[WS(rs, 4)]; + Tc = ci[WS(rs, 4)]; + T8 = W[6]; + Tw = T2 * T6; + T4 = T2 * T3; + T5 = W[1]; + Ty = T8 * Tc; + Ta = T8 * T9; + Tb = W[7]; + Tx = FNMS(T5, T3, Tw); + T7 = FMA(T5, T6, T4); + } + Tg = cr[WS(rs, 2)]; + Tz = FNMS(Tb, T9, Ty); + Td = FMA(Tb, Tc, Ta); + Tj = ci[WS(rs, 2)]; + Tf = W[2]; + TK = Tx + Tz; + TA = Tx - Tz; + TR = Td - T7; + Te = T7 + Td; + Tm = cr[WS(rs, 3)]; + TB = Tf * Tj; + Th = Tf * Tg; + Tp = ci[WS(rs, 3)]; + Tl = W[4]; + Ti = W[3]; + To = W[5]; + } + TD = Tl * Tp; + Tn = Tl * Tm; + TC = FNMS(Ti, Tg, TB); + Tk = FMA(Ti, Tj, Th); + TE = FNMS(To, Tm, TD); + Tq = FMA(To, Tp, Tn); + } + { + E TG, TI, TO, TS, TU, Tu, TN, Tt, TL, TF; + TL = TC + TE; + TF = TC - TE; + { + E Tr, TQ, TM, Ts; + Tr = Tk + Tq; + TQ = Tk - Tq; + TG = FMA(KP618033988, TF, TA); + TI = FNMS(KP618033988, TA, TF); + TO = TK - TL; + TM = TK + TL; + TS = FMA(KP618033988, TR, TQ); + TU = FNMS(KP618033988, TQ, TR); + Tu = Te - Tr; + Ts = Te + Tr; + ci[WS(rs, 4)] = TM + TJ; + TN = FNMS(KP250000000, TM, TJ); + cr[0] = T1 + Ts; + Tt = FNMS(KP250000000, Ts, T1); + } + { + E TT, TP, Tv, TH; + TT = FMA(KP559016994, TO, TN); + TP = FNMS(KP559016994, TO, TN); + Tv = FMA(KP559016994, Tu, Tt); + TH = FNMS(KP559016994, Tu, Tt); + ci[WS(rs, 2)] = FMA(KP951056516, TS, TP); + cr[WS(rs, 3)] = FMS(KP951056516, TS, TP); + ci[WS(rs, 3)] = FMA(KP951056516, TU, TT); + cr[WS(rs, 4)] = FMS(KP951056516, TU, TT); + ci[WS(rs, 1)] = FMA(KP951056516, TI, TH); + cr[WS(rs, 2)] = FNMS(KP951056516, TI, TH); + cr[WS(rs, 1)] = FMA(KP951056516, TG, Tv); + ci[0] = FNMS(KP951056516, TG, Tv); + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 5}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 5, "hf_5", twinstr, &GENUS, {14, 8, 26, 0} }; + +void X(codelet_hf_5) (planner *p) { + X(khc2hc_register) (p, hf_5, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 5 -dit -name hf_5 -include hf.h */ + +/* + * This function contains 40 FP additions, 28 FP multiplications, + * (or, 26 additions, 14 multiplications, 14 fused multiply/add), + * 29 stack variables, 4 constants, and 20 memory accesses + */ +#include "hf.h" + +static void hf_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(10, rs)) { + E T1, TE, Tu, Tx, TC, TB, TF, TG, TH, Tc, Tn, To; + T1 = cr[0]; + TE = ci[0]; + { + E T6, Ts, Tm, Tw, Tb, Tt, Th, Tv; + { + E T3, T5, T2, T4; + T3 = cr[WS(rs, 1)]; + T5 = ci[WS(rs, 1)]; + T2 = W[0]; + T4 = W[1]; + T6 = FMA(T2, T3, T4 * T5); + Ts = FNMS(T4, T3, T2 * T5); + } + { + E Tj, Tl, Ti, Tk; + Tj = cr[WS(rs, 3)]; + Tl = ci[WS(rs, 3)]; + Ti = W[4]; + Tk = W[5]; + Tm = FMA(Ti, Tj, Tk * Tl); + Tw = FNMS(Tk, Tj, Ti * Tl); + } + { + E T8, Ta, T7, T9; + T8 = cr[WS(rs, 4)]; + Ta = ci[WS(rs, 4)]; + T7 = W[6]; + T9 = W[7]; + Tb = FMA(T7, T8, T9 * Ta); + Tt = FNMS(T9, T8, T7 * Ta); + } + { + E Te, Tg, Td, Tf; + Te = cr[WS(rs, 2)]; + Tg = ci[WS(rs, 2)]; + Td = W[2]; + Tf = W[3]; + Th = FMA(Td, Te, Tf * Tg); + Tv = FNMS(Tf, Te, Td * Tg); + } + Tu = Ts - Tt; + Tx = Tv - Tw; + TC = Th - Tm; + TB = Tb - T6; + TF = Ts + Tt; + TG = Tv + Tw; + TH = TF + TG; + Tc = T6 + Tb; + Tn = Th + Tm; + To = Tc + Tn; + } + cr[0] = T1 + To; + { + E Ty, TA, Tr, Tz, Tp, Tq; + Ty = FMA(KP951056516, Tu, KP587785252 * Tx); + TA = FNMS(KP587785252, Tu, KP951056516 * Tx); + Tp = KP559016994 * (Tc - Tn); + Tq = FNMS(KP250000000, To, T1); + Tr = Tp + Tq; + Tz = Tq - Tp; + ci[0] = Tr - Ty; + ci[WS(rs, 1)] = Tz + TA; + cr[WS(rs, 1)] = Tr + Ty; + cr[WS(rs, 2)] = Tz - TA; + } + ci[WS(rs, 4)] = TH + TE; + { + E TD, TL, TK, TM, TI, TJ; + TD = FMA(KP587785252, TB, KP951056516 * TC); + TL = FNMS(KP587785252, TC, KP951056516 * TB); + TI = FNMS(KP250000000, TH, TE); + TJ = KP559016994 * (TF - TG); + TK = TI - TJ; + TM = TJ + TI; + cr[WS(rs, 3)] = TD - TK; + ci[WS(rs, 3)] = TL + TM; + ci[WS(rs, 2)] = TD + TK; + cr[WS(rs, 4)] = TL - TM; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 5}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 5, "hf_5", twinstr, &GENUS, {26, 14, 14, 0} }; + +void X(codelet_hf_5) (planner *p) { + X(khc2hc_register) (p, hf_5, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:09 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 6 -dit -name hf_6 -include hf.h */ + +/* + * This function contains 46 FP additions, 32 FP multiplications, + * (or, 24 additions, 10 multiplications, 22 fused multiply/add), + * 47 stack variables, 2 constants, and 24 memory accesses + */ +#include "hf.h" + +static void hf_6(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) { + E T11, T12, T14, T13; + { + E T1, TV, TX, T7, Tn, Tq, TO, TR, TB, Tl, To, TH, Tt, Tw, Ts; + E Tp, Tv; + T1 = cr[0]; + TV = ci[0]; + { + E T3, T6, T2, T5; + T3 = cr[WS(rs, 3)]; + T6 = ci[WS(rs, 3)]; + T2 = W[4]; + T5 = W[5]; + { + E Ta, Td, Tg, TM, Tb, Tj, Tf, Tc, Ti, TW, T4, T9; + Ta = cr[WS(rs, 2)]; + Td = ci[WS(rs, 2)]; + TW = T2 * T6; + T4 = T2 * T3; + T9 = W[2]; + Tg = cr[WS(rs, 5)]; + TX = FNMS(T5, T3, TW); + T7 = FMA(T5, T6, T4); + TM = T9 * Td; + Tb = T9 * Ta; + Tj = ci[WS(rs, 5)]; + Tf = W[8]; + Tc = W[3]; + Ti = W[9]; + { + E TN, Te, TL, Tk, TK, Th, Tm; + Tn = cr[WS(rs, 4)]; + TK = Tf * Tj; + Th = Tf * Tg; + TN = FNMS(Tc, Ta, TM); + Te = FMA(Tc, Td, Tb); + TL = FNMS(Ti, Tg, TK); + Tk = FMA(Ti, Tj, Th); + Tq = ci[WS(rs, 4)]; + Tm = W[6]; + TO = TL - TN; + TR = TN + TL; + TB = Te + Tk; + Tl = Te - Tk; + To = Tm * Tn; + TH = Tm * Tq; + } + Tt = cr[WS(rs, 1)]; + Tw = ci[WS(rs, 1)]; + Ts = W[0]; + Tp = W[7]; + Tv = W[1]; + } + } + { + E TA, T8, TI, Tr, TG, Tx, TF, Tu; + TA = T1 + T7; + T8 = T1 - T7; + TF = Ts * Tw; + Tu = Ts * Tt; + TI = FNMS(Tp, Tn, TH); + Tr = FMA(Tp, Tq, To); + TG = FNMS(Tv, Tt, TF); + Tx = FMA(Tv, Tw, Tu); + { + E TY, TU, TP, TT, TD, T10, Tz, TZ, TQ, TE; + T11 = TX + TV; + TY = TV - TX; + { + E TJ, TS, TC, Ty; + TJ = TG - TI; + TS = TI + TG; + TC = Tr + Tx; + Ty = Tr - Tx; + TU = TO + TJ; + TP = TJ - TO; + TT = TR - TS; + T12 = TR + TS; + T14 = TB - TC; + TD = TB + TC; + T10 = Ty - Tl; + Tz = Tl + Ty; + TZ = FMA(KP500000000, TU, TY); + } + cr[0] = TA + TD; + TQ = FNMS(KP500000000, TD, TA); + ci[WS(rs, 2)] = T8 + Tz; + TE = FNMS(KP500000000, Tz, T8); + cr[WS(rs, 3)] = TU - TY; + cr[WS(rs, 2)] = FNMS(KP866025403, TT, TQ); + ci[WS(rs, 1)] = FMA(KP866025403, TT, TQ); + ci[0] = FNMS(KP866025403, TP, TE); + cr[WS(rs, 1)] = FMA(KP866025403, TP, TE); + ci[WS(rs, 4)] = FMA(KP866025403, T10, TZ); + cr[WS(rs, 5)] = FMS(KP866025403, T10, TZ); + } + } + } + ci[WS(rs, 5)] = T12 + T11; + T13 = FNMS(KP500000000, T12, T11); + ci[WS(rs, 3)] = FMA(KP866025403, T14, T13); + cr[WS(rs, 4)] = FMS(KP866025403, T14, T13); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 6}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 6, "hf_6", twinstr, &GENUS, {24, 10, 22, 0} }; + +void X(codelet_hf_6) (planner *p) { + X(khc2hc_register) (p, hf_6, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 6 -dit -name hf_6 -include hf.h */ + +/* + * This function contains 46 FP additions, 28 FP multiplications, + * (or, 32 additions, 14 multiplications, 14 fused multiply/add), + * 23 stack variables, 2 constants, and 24 memory accesses + */ +#include "hf.h" + +static void hf_6(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) { + E T7, TS, Tv, TO, Tt, TJ, Tx, TF, Ti, TI, Tw, TC; + { + E T1, TM, T6, TN; + T1 = cr[0]; + TM = ci[0]; + { + E T3, T5, T2, T4; + T3 = cr[WS(rs, 3)]; + T5 = ci[WS(rs, 3)]; + T2 = W[4]; + T4 = W[5]; + T6 = FMA(T2, T3, T4 * T5); + TN = FNMS(T4, T3, T2 * T5); + } + T7 = T1 - T6; + TS = TN + TM; + Tv = T1 + T6; + TO = TM - TN; + } + { + E Tn, TE, Ts, TD; + { + E Tk, Tm, Tj, Tl; + Tk = cr[WS(rs, 4)]; + Tm = ci[WS(rs, 4)]; + Tj = W[6]; + Tl = W[7]; + Tn = FMA(Tj, Tk, Tl * Tm); + TE = FNMS(Tl, Tk, Tj * Tm); + } + { + E Tp, Tr, To, Tq; + Tp = cr[WS(rs, 1)]; + Tr = ci[WS(rs, 1)]; + To = W[0]; + Tq = W[1]; + Ts = FMA(To, Tp, Tq * Tr); + TD = FNMS(Tq, Tp, To * Tr); + } + Tt = Tn - Ts; + TJ = TE + TD; + Tx = Tn + Ts; + TF = TD - TE; + } + { + E Tc, TA, Th, TB; + { + E T9, Tb, T8, Ta; + T9 = cr[WS(rs, 2)]; + Tb = ci[WS(rs, 2)]; + T8 = W[2]; + Ta = W[3]; + Tc = FMA(T8, T9, Ta * Tb); + TA = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = cr[WS(rs, 5)]; + Tg = ci[WS(rs, 5)]; + Td = W[8]; + Tf = W[9]; + Th = FMA(Td, Te, Tf * Tg); + TB = FNMS(Tf, Te, Td * Tg); + } + Ti = Tc - Th; + TI = TA + TB; + Tw = Tc + Th; + TC = TA - TB; + } + { + E TG, Tu, Tz, TK, Ty, TH; + TG = KP866025403 * (TC + TF); + Tu = Ti + Tt; + Tz = FNMS(KP500000000, Tu, T7); + ci[WS(rs, 2)] = T7 + Tu; + cr[WS(rs, 1)] = Tz + TG; + ci[0] = Tz - TG; + TK = KP866025403 * (TI - TJ); + Ty = Tw + Tx; + TH = FNMS(KP500000000, Ty, Tv); + cr[0] = Tv + Ty; + ci[WS(rs, 1)] = TH + TK; + cr[WS(rs, 2)] = TH - TK; + } + { + E TP, TL, TQ, TR, TT, TU; + TP = KP866025403 * (Tt - Ti); + TL = TF - TC; + TQ = FMA(KP500000000, TL, TO); + cr[WS(rs, 3)] = TL - TO; + ci[WS(rs, 4)] = TP + TQ; + cr[WS(rs, 5)] = TP - TQ; + TR = KP866025403 * (Tw - Tx); + TT = TI + TJ; + TU = FNMS(KP500000000, TT, TS); + cr[WS(rs, 4)] = TR - TU; + ci[WS(rs, 5)] = TT + TS; + ci[WS(rs, 3)] = TR + TU; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 6}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 6, "hf_6", twinstr, &GENUS, {32, 14, 14, 0} }; + +void X(codelet_hf_6) (planner *p) { + X(khc2hc_register) (p, hf_6, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3948 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:10 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 64 -dit -name hf_64 -include hf.h */ + +/* + * This function contains 1038 FP additions, 644 FP multiplications, + * (or, 520 additions, 126 multiplications, 518 fused multiply/add), + * 246 stack variables, 15 constants, and 256 memory accesses + */ +#include "hf.h" + +static void hf_64(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP534511135, +0.534511135950791641089685961295362908582039528); + DK(KP303346683, +0.303346683607342391675883946941299872384187453); + DK(KP098491403, +0.098491403357164253077197521291327432293052451); + DK(KP820678790, +0.820678790828660330972281985331011598767386482); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 126); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 126, MAKE_VOLATILE_STRIDE(128, rs)) { + E Tku, Tky, Tkt, Tkx; + { + E TiV, Tjm, T7e, TcA, TjR, Tkl, Tm, TeM, T7Q, TcI, TeZ, Thr, T1G, TeW, TcJ; + E T7X, T87, TcN, Tf5, Thw, T29, Tf8, TcQ, T8u, Taq, Tdm, Tgc, ThX, T5K, TfS; + E Tdx, Tbj, TcB, T7l, TiP, TeP, Tjl, TN, TcC, T7s, T7B, TcF, TeU, Ths, T7I; + E TcG, T1f, TeR, T8G, TcU, Tfg, ThB, T32, Tfj, TcX, T93, T9h, Td3, TfK, ThM; + E T3X, Tfr, Tde, Taa, Thx, Tfb, Tf6, T2A, T8x, TcO, T8m, TcR, Tfm, ThC, T3t; + E Tfh, T96, TcV, T8V, TcY, ThN, Tfu, TfL, T4o, Tad, Td4, T9w, Tdf, TfV, ThY; + E T6b, Tg9, Tbm, Tdn, TaF, Tdy, ThJ, T4Q, TfN, TfA, Taf, T9M, Td8, Tdh, ThI; + E T5h, TfO, TfF, Tag, Ta1, Tdb, Tdi, ThU, T6D, Tgf, Tg1, Tbo, TaV, Tdr, TdA; + E Tb2, Tds, Tg5, ThT, Tg2, T74, Tdt, Tb9; + { + E T7a, Te, T78, T8, TjQ, TiU, T7c, Tk; + { + E T1, TiT, TiS, T7, Tg, Tj, Tf, Ti, T7b, Th; + T1 = cr[0]; + TiT = ci[0]; + { + E T3, T6, T2, T5; + T3 = cr[WS(rs, 32)]; + T6 = ci[WS(rs, 32)]; + T2 = W[62]; + T5 = W[63]; + { + E Ta, Td, Tc, T79, Tb, TiR, T4, T9; + Ta = cr[WS(rs, 16)]; + Td = ci[WS(rs, 16)]; + TiR = T2 * T6; + T4 = T2 * T3; + T9 = W[30]; + Tc = W[31]; + TiS = FNMS(T5, T3, TiR); + T7 = FMA(T5, T6, T4); + T79 = T9 * Td; + Tb = T9 * Ta; + Tg = cr[WS(rs, 48)]; + Tj = ci[WS(rs, 48)]; + T7a = FNMS(Tc, Ta, T79); + Te = FMA(Tc, Td, Tb); + Tf = W[94]; + Ti = W[95]; + } + } + T78 = T1 - T7; + T8 = T1 + T7; + TjQ = TiT - TiS; + TiU = TiS + TiT; + T7b = Tf * Tj; + Th = Tf * Tg; + T7c = FNMS(Ti, Tg, T7b); + Tk = FMA(Ti, Tj, Th); + } + { + E T7S, T1l, T7O, T1E, T1u, T1x, T1w, T7U, T1r, T7L, T1v; + { + E T1A, T1D, T1C, T7N, T1B; + { + E T1h, T1k, T1g, T1j, T7R, T1i, T1z; + T1h = cr[WS(rs, 60)]; + T1k = ci[WS(rs, 60)]; + { + E T7d, TiQ, Tl, TjP; + T7d = T7a - T7c; + TiQ = T7a + T7c; + Tl = Te + Tk; + TjP = Te - Tk; + TiV = TiQ + TiU; + Tjm = TiU - TiQ; + T7e = T78 - T7d; + TcA = T78 + T7d; + TjR = TjP + TjQ; + Tkl = TjQ - TjP; + Tm = T8 + Tl; + TeM = T8 - Tl; + T1g = W[118]; + } + T1j = W[119]; + T1A = cr[WS(rs, 44)]; + T1D = ci[WS(rs, 44)]; + T7R = T1g * T1k; + T1i = T1g * T1h; + T1z = W[86]; + T1C = W[87]; + T7S = FNMS(T1j, T1h, T7R); + T1l = FMA(T1j, T1k, T1i); + T7N = T1z * T1D; + T1B = T1z * T1A; + } + { + E T1n, T1q, T1m, T1p, T7T, T1o, T1t; + T1n = cr[WS(rs, 28)]; + T1q = ci[WS(rs, 28)]; + T7O = FNMS(T1C, T1A, T7N); + T1E = FMA(T1C, T1D, T1B); + T1m = W[54]; + T1p = W[55]; + T1u = cr[WS(rs, 12)]; + T1x = ci[WS(rs, 12)]; + T7T = T1m * T1q; + T1o = T1m * T1n; + T1t = W[22]; + T1w = W[23]; + T7U = FNMS(T1p, T1n, T7T); + T1r = FMA(T1p, T1q, T1o); + T7L = T1t * T1x; + T1v = T1t * T1u; + } + } + { + E T7V, TeX, T1s, T7K, T7M, T1y; + T7V = T7S - T7U; + TeX = T7S + T7U; + T1s = T1l + T1r; + T7K = T1l - T1r; + T7M = FNMS(T1w, T1u, T7L); + T1y = FMA(T1w, T1x, T1v); + { + E TeY, T7P, T7W, T1F; + TeY = T7M + T7O; + T7P = T7M - T7O; + T7W = T1y - T1E; + T1F = T1y + T1E; + T7Q = T7K - T7P; + TcI = T7K + T7P; + TeZ = TeX - TeY; + Thr = TeX + TeY; + T1G = T1s + T1F; + TeW = T1s - T1F; + TcJ = T7V - T7W; + T7X = T7V + T7W; + } + } + } + } + { + E T8p, T1O, T85, T27, T1X, T20, T1Z, T8r, T1U, T82, T1Y; + { + E T23, T26, T25, T84, T24; + { + E T1K, T1N, T1J, T1M, T8o, T1L, T22; + T1K = cr[WS(rs, 2)]; + T1N = ci[WS(rs, 2)]; + T1J = W[2]; + T1M = W[3]; + T23 = cr[WS(rs, 50)]; + T26 = ci[WS(rs, 50)]; + T8o = T1J * T1N; + T1L = T1J * T1K; + T22 = W[98]; + T25 = W[99]; + T8p = FNMS(T1M, T1K, T8o); + T1O = FMA(T1M, T1N, T1L); + T84 = T22 * T26; + T24 = T22 * T23; + } + { + E T1Q, T1T, T1P, T1S, T8q, T1R, T1W; + T1Q = cr[WS(rs, 34)]; + T1T = ci[WS(rs, 34)]; + T85 = FNMS(T25, T23, T84); + T27 = FMA(T25, T26, T24); + T1P = W[66]; + T1S = W[67]; + T1X = cr[WS(rs, 18)]; + T20 = ci[WS(rs, 18)]; + T8q = T1P * T1T; + T1R = T1P * T1Q; + T1W = W[34]; + T1Z = W[35]; + T8r = FNMS(T1S, T1Q, T8q); + T1U = FMA(T1S, T1T, T1R); + T82 = T1W * T20; + T1Y = T1W * T1X; + } + } + { + E T8s, Tf3, T1V, T81, T83, T21; + T8s = T8p - T8r; + Tf3 = T8p + T8r; + T1V = T1O + T1U; + T81 = T1O - T1U; + T83 = FNMS(T1Z, T1X, T82); + T21 = FMA(T1Z, T20, T1Y); + { + E Tf4, T86, T8t, T28; + Tf4 = T83 + T85; + T86 = T83 - T85; + T8t = T21 - T27; + T28 = T21 + T27; + T87 = T81 - T86; + TcN = T81 + T86; + Tf5 = Tf3 - Tf4; + Thw = Tf3 + Tf4; + T29 = T1V + T28; + Tf8 = T1V - T28; + TcQ = T8s - T8t; + T8u = T8s + T8t; + } + } + } + { + E Tbf, T5p, Tao, T5I, T5y, T5B, T5A, Tbh, T5v, Tal, T5z; + { + E T5E, T5H, T5G, Tan, T5F; + { + E T5l, T5o, T5k, T5n, Tbe, T5m, T5D; + T5l = cr[WS(rs, 63)]; + T5o = ci[WS(rs, 63)]; + T5k = W[124]; + T5n = W[125]; + T5E = cr[WS(rs, 47)]; + T5H = ci[WS(rs, 47)]; + Tbe = T5k * T5o; + T5m = T5k * T5l; + T5D = W[92]; + T5G = W[93]; + Tbf = FNMS(T5n, T5l, Tbe); + T5p = FMA(T5n, T5o, T5m); + Tan = T5D * T5H; + T5F = T5D * T5E; + } + { + E T5r, T5u, T5q, T5t, Tbg, T5s, T5x; + T5r = cr[WS(rs, 31)]; + T5u = ci[WS(rs, 31)]; + Tao = FNMS(T5G, T5E, Tan); + T5I = FMA(T5G, T5H, T5F); + T5q = W[60]; + T5t = W[61]; + T5y = cr[WS(rs, 15)]; + T5B = ci[WS(rs, 15)]; + Tbg = T5q * T5u; + T5s = T5q * T5r; + T5x = W[28]; + T5A = W[29]; + Tbh = FNMS(T5t, T5r, Tbg); + T5v = FMA(T5t, T5u, T5s); + Tal = T5x * T5B; + T5z = T5x * T5y; + } + } + { + E Tbi, Tga, T5w, Tak, Tam, T5C; + Tbi = Tbf - Tbh; + Tga = Tbf + Tbh; + T5w = T5p + T5v; + Tak = T5p - T5v; + Tam = FNMS(T5A, T5y, Tal); + T5C = FMA(T5A, T5B, T5z); + { + E Tgb, Tap, T5J, Tbd; + Tgb = Tam + Tao; + Tap = Tam - Tao; + T5J = T5C + T5I; + Tbd = T5I - T5C; + Taq = Tak - Tap; + Tdm = Tak + Tap; + Tgc = Tga - Tgb; + ThX = Tga + Tgb; + T5K = T5w + T5J; + TfS = T5w - T5J; + Tdx = Tbi + Tbd; + Tbj = Tbd - Tbi; + } + } + } + { + E T7z, T1d, T7G, TeS, T11, T7v, T7x, T17, T7r, T7m; + { + E T7h, Ts, T7q, TL, TB, TE, TD, T7j, Ty, T7n, TC; + { + E TH, TK, TJ, T7p, TI; + { + E To, Tr, Tn, Tq, T7g, Tp, TG; + To = cr[WS(rs, 8)]; + Tr = ci[WS(rs, 8)]; + Tn = W[14]; + Tq = W[15]; + TH = cr[WS(rs, 24)]; + TK = ci[WS(rs, 24)]; + T7g = Tn * Tr; + Tp = Tn * To; + TG = W[46]; + TJ = W[47]; + T7h = FNMS(Tq, To, T7g); + Ts = FMA(Tq, Tr, Tp); + T7p = TG * TK; + TI = TG * TH; + } + { + E Tu, Tx, Tt, Tw, T7i, Tv, TA; + Tu = cr[WS(rs, 40)]; + Tx = ci[WS(rs, 40)]; + T7q = FNMS(TJ, TH, T7p); + TL = FMA(TJ, TK, TI); + Tt = W[78]; + Tw = W[79]; + TB = cr[WS(rs, 56)]; + TE = ci[WS(rs, 56)]; + T7i = Tt * Tx; + Tv = Tt * Tu; + TA = W[110]; + TD = W[111]; + T7j = FNMS(Tw, Tu, T7i); + Ty = FMA(Tw, Tx, Tv); + T7n = TA * TE; + TC = TA * TB; + } + } + { + E T7k, TeO, Tz, T7f, T7o, TF, TeN, TM; + T7k = T7h - T7j; + TeO = T7h + T7j; + Tz = Ts + Ty; + T7f = Ts - Ty; + T7o = FNMS(TD, TB, T7n); + TF = FMA(TD, TE, TC); + T7r = T7o - T7q; + TeN = T7o + T7q; + TM = TF + TL; + T7m = TF - TL; + TcB = T7f + T7k; + T7l = T7f - T7k; + TiP = TeO + TeN; + TeP = TeN - TeO; + Tjl = Tz - TM; + TN = Tz + TM; + } + } + { + E T7D, TU, T13, T16, T7F, T10, T12, T15, T7w, T14; + { + E T19, T1c, T18, T1b; + { + E TQ, TT, TS, T7C, TR, TP; + TQ = cr[WS(rs, 4)]; + TT = ci[WS(rs, 4)]; + TP = W[6]; + TcC = T7m - T7r; + T7s = T7m + T7r; + TS = W[7]; + T7C = TP * TT; + TR = TP * TQ; + T19 = cr[WS(rs, 52)]; + T1c = ci[WS(rs, 52)]; + T7D = FNMS(TS, TQ, T7C); + TU = FMA(TS, TT, TR); + T18 = W[102]; + T1b = W[103]; + } + { + E TW, TZ, TY, T7E, TX, T7y, T1a, TV; + TW = cr[WS(rs, 36)]; + TZ = ci[WS(rs, 36)]; + T7y = T18 * T1c; + T1a = T18 * T19; + TV = W[70]; + TY = W[71]; + T7z = FNMS(T1b, T19, T7y); + T1d = FMA(T1b, T1c, T1a); + T7E = TV * TZ; + TX = TV * TW; + T13 = cr[WS(rs, 20)]; + T16 = ci[WS(rs, 20)]; + T7F = FNMS(TY, TW, T7E); + T10 = FMA(TY, TZ, TX); + T12 = W[38]; + T15 = W[39]; + } + } + T7G = T7D - T7F; + TeS = T7D + T7F; + T11 = TU + T10; + T7v = TU - T10; + T7w = T12 * T16; + T14 = T12 * T13; + T7x = FNMS(T15, T13, T7w); + T17 = FMA(T15, T16, T14); + } + { + E T8Y, T2H, T8E, T30, T2Q, T2T, T2S, T90, T2N, T8B, T2R; + { + E T2W, T2Z, T2Y, T8D, T2X; + { + E T2D, T2G, T2C, T2F, T8X, T2E, T2V; + T2D = cr[WS(rs, 62)]; + T2G = ci[WS(rs, 62)]; + { + E TeT, T7A, T1e, T7H; + TeT = T7x + T7z; + T7A = T7x - T7z; + T1e = T17 + T1d; + T7H = T17 - T1d; + T7B = T7v - T7A; + TcF = T7v + T7A; + TeU = TeS - TeT; + Ths = TeS + TeT; + T7I = T7G + T7H; + TcG = T7G - T7H; + T1f = T11 + T1e; + TeR = T11 - T1e; + T2C = W[122]; + } + T2F = W[123]; + T2W = cr[WS(rs, 46)]; + T2Z = ci[WS(rs, 46)]; + T8X = T2C * T2G; + T2E = T2C * T2D; + T2V = W[90]; + T2Y = W[91]; + T8Y = FNMS(T2F, T2D, T8X); + T2H = FMA(T2F, T2G, T2E); + T8D = T2V * T2Z; + T2X = T2V * T2W; + } + { + E T2J, T2M, T2I, T2L, T8Z, T2K, T2P; + T2J = cr[WS(rs, 30)]; + T2M = ci[WS(rs, 30)]; + T8E = FNMS(T2Y, T2W, T8D); + T30 = FMA(T2Y, T2Z, T2X); + T2I = W[58]; + T2L = W[59]; + T2Q = cr[WS(rs, 14)]; + T2T = ci[WS(rs, 14)]; + T8Z = T2I * T2M; + T2K = T2I * T2J; + T2P = W[26]; + T2S = W[27]; + T90 = FNMS(T2L, T2J, T8Z); + T2N = FMA(T2L, T2M, T2K); + T8B = T2P * T2T; + T2R = T2P * T2Q; + } + } + { + E T91, Tfe, T2O, T8A, T8C, T2U; + T91 = T8Y - T90; + Tfe = T8Y + T90; + T2O = T2H + T2N; + T8A = T2H - T2N; + T8C = FNMS(T2S, T2Q, T8B); + T2U = FMA(T2S, T2T, T2R); + { + E Tff, T8F, T92, T31; + Tff = T8C + T8E; + T8F = T8C - T8E; + T92 = T2U - T30; + T31 = T2U + T30; + T8G = T8A - T8F; + TcU = T8A + T8F; + Tfg = Tfe - Tff; + ThB = Tfe + Tff; + T32 = T2O + T31; + Tfj = T2O - T31; + TcX = T91 - T92; + T93 = T91 + T92; + } + } + } + { + E Ta5, T3C, T9f, T3V, T3L, T3O, T3N, Ta7, T3I, T9c, T3M; + { + E T3R, T3U, T3T, T9e, T3S; + { + E T3y, T3B, T3x, T3A, Ta4, T3z, T3Q; + T3y = cr[WS(rs, 1)]; + T3B = ci[WS(rs, 1)]; + T3x = W[0]; + T3A = W[1]; + T3R = cr[WS(rs, 49)]; + T3U = ci[WS(rs, 49)]; + Ta4 = T3x * T3B; + T3z = T3x * T3y; + T3Q = W[96]; + T3T = W[97]; + Ta5 = FNMS(T3A, T3y, Ta4); + T3C = FMA(T3A, T3B, T3z); + T9e = T3Q * T3U; + T3S = T3Q * T3R; + } + { + E T3E, T3H, T3D, T3G, Ta6, T3F, T3K; + T3E = cr[WS(rs, 33)]; + T3H = ci[WS(rs, 33)]; + T9f = FNMS(T3T, T3R, T9e); + T3V = FMA(T3T, T3U, T3S); + T3D = W[64]; + T3G = W[65]; + T3L = cr[WS(rs, 17)]; + T3O = ci[WS(rs, 17)]; + Ta6 = T3D * T3H; + T3F = T3D * T3E; + T3K = W[32]; + T3N = W[33]; + Ta7 = FNMS(T3G, T3E, Ta6); + T3I = FMA(T3G, T3H, T3F); + T9c = T3K * T3O; + T3M = T3K * T3L; + } + } + { + E Ta8, TfI, T3J, T9b, T9d, T3P; + Ta8 = Ta5 - Ta7; + TfI = Ta5 + Ta7; + T3J = T3C + T3I; + T9b = T3C - T3I; + T9d = FNMS(T3N, T3L, T9c); + T3P = FMA(T3N, T3O, T3M); + { + E TfJ, T9g, Ta9, T3W; + TfJ = T9d + T9f; + T9g = T9d - T9f; + Ta9 = T3P - T3V; + T3W = T3P + T3V; + T9h = T9b - T9g; + Td3 = T9b + T9g; + TfK = TfI - TfJ; + ThM = TfI + TfJ; + T3X = T3J + T3W; + Tfr = T3J - T3W; + Tde = Ta8 - Ta9; + Taa = Ta8 + Ta9; + } + } + } + } + { + E TaC, T69, Taw, TfU, T5X, Tar, TaA, T63; + { + E T8S, T3r, T8M, Tfl, T3f, T8H, T8Q, T3l; + { + E T8k, T8f, T8v, T8e; + { + E T8a, T2f, T8j, T2y, T2o, T2r, T2q, T8c, T2l, T8g, T2p; + { + E T2u, T2x, T2w, T8i, T2v; + { + E T2b, T2e, T2a, T2d, T89, T2c, T2t; + T2b = cr[WS(rs, 10)]; + T2e = ci[WS(rs, 10)]; + T2a = W[18]; + T2d = W[19]; + T2u = cr[WS(rs, 26)]; + T2x = ci[WS(rs, 26)]; + T89 = T2a * T2e; + T2c = T2a * T2b; + T2t = W[50]; + T2w = W[51]; + T8a = FNMS(T2d, T2b, T89); + T2f = FMA(T2d, T2e, T2c); + T8i = T2t * T2x; + T2v = T2t * T2u; + } + { + E T2h, T2k, T2g, T2j, T8b, T2i, T2n; + T2h = cr[WS(rs, 42)]; + T2k = ci[WS(rs, 42)]; + T8j = FNMS(T2w, T2u, T8i); + T2y = FMA(T2w, T2x, T2v); + T2g = W[82]; + T2j = W[83]; + T2o = cr[WS(rs, 58)]; + T2r = ci[WS(rs, 58)]; + T8b = T2g * T2k; + T2i = T2g * T2h; + T2n = W[114]; + T2q = W[115]; + T8c = FNMS(T2j, T2h, T8b); + T2l = FMA(T2j, T2k, T2i); + T8g = T2n * T2r; + T2p = T2n * T2o; + } + } + { + E T8d, Tfa, T2m, T88, T8h, T2s, Tf9, T2z; + T8d = T8a - T8c; + Tfa = T8a + T8c; + T2m = T2f + T2l; + T88 = T2f - T2l; + T8h = FNMS(T2q, T2o, T8g); + T2s = FMA(T2q, T2r, T2p); + T8k = T8h - T8j; + Tf9 = T8h + T8j; + T2z = T2s + T2y; + T8f = T2s - T2y; + T8v = T88 + T8d; + T8e = T88 - T8d; + Thx = Tfa + Tf9; + Tfb = Tf9 - Tfa; + Tf6 = T2m - T2z; + T2A = T2m + T2z; + } + } + { + E T38, T8J, T3h, T3k, T8L, T3e, T3g, T3j, T8P, T3i; + { + E T3n, T3q, T3m, T3p; + { + E T34, T37, T33, T8w, T8l, T36, T8I, T35; + T34 = cr[WS(rs, 6)]; + T37 = ci[WS(rs, 6)]; + T33 = W[10]; + T8w = T8k - T8f; + T8l = T8f + T8k; + T36 = W[11]; + T8I = T33 * T37; + T35 = T33 * T34; + T8x = T8v + T8w; + TcO = T8v - T8w; + T8m = T8e + T8l; + TcR = T8l - T8e; + T38 = FMA(T36, T37, T35); + T8J = FNMS(T36, T34, T8I); + } + T3n = cr[WS(rs, 22)]; + T3q = ci[WS(rs, 22)]; + T3m = W[42]; + T3p = W[43]; + { + E T3a, T3d, T3c, T8K, T3b, T8R, T3o, T39; + T3a = cr[WS(rs, 38)]; + T3d = ci[WS(rs, 38)]; + T8R = T3m * T3q; + T3o = T3m * T3n; + T39 = W[74]; + T3c = W[75]; + T8S = FNMS(T3p, T3n, T8R); + T3r = FMA(T3p, T3q, T3o); + T8K = T39 * T3d; + T3b = T39 * T3a; + T3h = cr[WS(rs, 54)]; + T3k = ci[WS(rs, 54)]; + T8L = FNMS(T3c, T3a, T8K); + T3e = FMA(T3c, T3d, T3b); + T3g = W[106]; + T3j = W[107]; + } + } + T8M = T8J - T8L; + Tfl = T8J + T8L; + T3f = T38 + T3e; + T8H = T38 - T3e; + T8P = T3g * T3k; + T3i = T3g * T3h; + T8Q = FNMS(T3j, T3h, T8P); + T3l = FMA(T3j, T3k, T3i); + } + } + { + E T9u, T9p, Tab, T9o; + { + E T9k, T43, T9t, T4m, T4c, T4f, T4e, T9m, T49, T9q, T4d; + { + E T4i, T4l, T4k, T9s, T4j; + { + E T3Z, T42, T3Y, T41, T9j, T40, T4h; + { + E T94, T8N, T8T, Tfk, T8O, T3s, T8U, T95; + T3Z = cr[WS(rs, 9)]; + T94 = T8H + T8M; + T8N = T8H - T8M; + T8T = T8Q - T8S; + Tfk = T8Q + T8S; + T8O = T3l - T3r; + T3s = T3l + T3r; + T42 = ci[WS(rs, 9)]; + Tfm = Tfk - Tfl; + ThC = Tfl + Tfk; + T8U = T8O + T8T; + T95 = T8T - T8O; + T3t = T3f + T3s; + Tfh = T3f - T3s; + T96 = T94 + T95; + TcV = T94 - T95; + T8V = T8N + T8U; + TcY = T8U - T8N; + T3Y = W[16]; + } + T41 = W[17]; + T4i = cr[WS(rs, 25)]; + T4l = ci[WS(rs, 25)]; + T9j = T3Y * T42; + T40 = T3Y * T3Z; + T4h = W[48]; + T4k = W[49]; + T9k = FNMS(T41, T3Z, T9j); + T43 = FMA(T41, T42, T40); + T9s = T4h * T4l; + T4j = T4h * T4i; + } + { + E T45, T48, T44, T47, T9l, T46, T4b; + T45 = cr[WS(rs, 41)]; + T48 = ci[WS(rs, 41)]; + T9t = FNMS(T4k, T4i, T9s); + T4m = FMA(T4k, T4l, T4j); + T44 = W[80]; + T47 = W[81]; + T4c = cr[WS(rs, 57)]; + T4f = ci[WS(rs, 57)]; + T9l = T44 * T48; + T46 = T44 * T45; + T4b = W[112]; + T4e = W[113]; + T9m = FNMS(T47, T45, T9l); + T49 = FMA(T47, T48, T46); + T9q = T4b * T4f; + T4d = T4b * T4c; + } + } + { + E T9n, Tft, T4a, T9i, T9r, T4g, Tfs, T4n; + T9n = T9k - T9m; + Tft = T9k + T9m; + T4a = T43 + T49; + T9i = T43 - T49; + T9r = FNMS(T4e, T4c, T9q); + T4g = FMA(T4e, T4f, T4d); + T9u = T9r - T9t; + Tfs = T9r + T9t; + T4n = T4g + T4m; + T9p = T4g - T4m; + Tab = T9i + T9n; + T9o = T9i - T9n; + ThN = Tft + Tfs; + Tfu = Tfs - Tft; + TfL = T4a - T4n; + T4o = T4a + T4n; + } + } + { + E T5Q, Tat, T5Z, T62, Tav, T5W, T5Y, T61, Taz, T60; + { + E T65, T68, T64, T67; + { + E T5M, T5P, T5L, Tac, T9v, T5O, Tas, T5N; + T5M = cr[WS(rs, 7)]; + T5P = ci[WS(rs, 7)]; + T5L = W[12]; + Tac = T9u - T9p; + T9v = T9p + T9u; + T5O = W[13]; + Tas = T5L * T5P; + T5N = T5L * T5M; + Tad = Tab + Tac; + Td4 = Tab - Tac; + T9w = T9o + T9v; + Tdf = T9v - T9o; + T5Q = FMA(T5O, T5P, T5N); + Tat = FNMS(T5O, T5M, Tas); + } + T65 = cr[WS(rs, 23)]; + T68 = ci[WS(rs, 23)]; + T64 = W[44]; + T67 = W[45]; + { + E T5S, T5V, T5U, Tau, T5T, TaB, T66, T5R; + T5S = cr[WS(rs, 39)]; + T5V = ci[WS(rs, 39)]; + TaB = T64 * T68; + T66 = T64 * T65; + T5R = W[76]; + T5U = W[77]; + TaC = FNMS(T67, T65, TaB); + T69 = FMA(T67, T68, T66); + Tau = T5R * T5V; + T5T = T5R * T5S; + T5Z = cr[WS(rs, 55)]; + T62 = ci[WS(rs, 55)]; + Tav = FNMS(T5U, T5S, Tau); + T5W = FMA(T5U, T5V, T5T); + T5Y = W[108]; + T61 = W[109]; + } + } + Taw = Tat - Tav; + TfU = Tat + Tav; + T5X = T5Q + T5W; + Tar = T5Q - T5W; + Taz = T5Y * T62; + T60 = T5Y * T5Z; + TaA = FNMS(T61, T5Z, Taz); + T63 = FMA(T61, T62, T60); + } + } + } + { + E T9T, Td9, TfE, TfB, Tda, Ta0; + { + E T9E, Td6, Tfz, Tfw, Td7, T9L; + { + E T9G, T4v, T9C, T4O, T4E, T4H, T4G, T9I, T4B, T9z, T4F; + { + E T4K, T4N, T4M, T9B, T4L; + { + E T4r, T4u, T4q, T4t, T9F, T4s, T4J; + { + E Tbl, Tax, TaD, TfT, Tay, T6a, TaE, Tbk; + T4r = cr[WS(rs, 5)]; + Tbl = Tar + Taw; + Tax = Tar - Taw; + TaD = TaA - TaC; + TfT = TaA + TaC; + Tay = T63 - T69; + T6a = T63 + T69; + T4u = ci[WS(rs, 5)]; + TfV = TfT - TfU; + ThY = TfU + TfT; + TaE = Tay + TaD; + Tbk = Tay - TaD; + T6b = T5X + T6a; + Tg9 = T6a - T5X; + Tbm = Tbk - Tbl; + Tdn = Tbl + Tbk; + TaF = Tax + TaE; + Tdy = TaE - Tax; + T4q = W[8]; + } + T4t = W[9]; + T4K = cr[WS(rs, 53)]; + T4N = ci[WS(rs, 53)]; + T9F = T4q * T4u; + T4s = T4q * T4r; + T4J = W[104]; + T4M = W[105]; + T9G = FNMS(T4t, T4r, T9F); + T4v = FMA(T4t, T4u, T4s); + T9B = T4J * T4N; + T4L = T4J * T4K; + } + { + E T4x, T4A, T4w, T4z, T9H, T4y, T4D; + T4x = cr[WS(rs, 37)]; + T4A = ci[WS(rs, 37)]; + T9C = FNMS(T4M, T4K, T9B); + T4O = FMA(T4M, T4N, T4L); + T4w = W[72]; + T4z = W[73]; + T4E = cr[WS(rs, 21)]; + T4H = ci[WS(rs, 21)]; + T9H = T4w * T4A; + T4y = T4w * T4x; + T4D = W[40]; + T4G = W[41]; + T9I = FNMS(T4z, T4x, T9H); + T4B = FMA(T4z, T4A, T4y); + T9z = T4D * T4H; + T4F = T4D * T4E; + } + } + { + E T9J, Tfx, T4C, T9y, T9A, T4I; + T9J = T9G - T9I; + Tfx = T9G + T9I; + T4C = T4v + T4B; + T9y = T4v - T4B; + T9A = FNMS(T4G, T4E, T9z); + T4I = FMA(T4G, T4H, T4F); + { + E Tfy, T9D, T9K, T4P; + Tfy = T9A + T9C; + T9D = T9A - T9C; + T9K = T4I - T4O; + T4P = T4I + T4O; + T9E = T9y - T9D; + Td6 = T9y + T9D; + Tfz = Tfx - Tfy; + ThJ = Tfx + Tfy; + Tfw = T4C - T4P; + T4Q = T4C + T4P; + Td7 = T9J - T9K; + T9L = T9J + T9K; + } + } + } + { + E T9V, T4W, T9R, T5f, T55, T58, T57, T9X, T52, T9O, T56; + { + E T5b, T5e, T5d, T9Q, T5c; + { + E T4S, T4V, T4R, T4U, T9U, T4T, T5a; + T4S = cr[WS(rs, 61)]; + TfN = Tfw + Tfz; + TfA = Tfw - Tfz; + Taf = FMA(KP414213562, T9E, T9L); + T9M = FNMS(KP414213562, T9L, T9E); + Td8 = FMA(KP414213562, Td7, Td6); + Tdh = FNMS(KP414213562, Td6, Td7); + T4V = ci[WS(rs, 61)]; + T4R = W[120]; + T4U = W[121]; + T5b = cr[WS(rs, 45)]; + T5e = ci[WS(rs, 45)]; + T9U = T4R * T4V; + T4T = T4R * T4S; + T5a = W[88]; + T5d = W[89]; + T9V = FNMS(T4U, T4S, T9U); + T4W = FMA(T4U, T4V, T4T); + T9Q = T5a * T5e; + T5c = T5a * T5b; + } + { + E T4Y, T51, T4X, T50, T9W, T4Z, T54; + T4Y = cr[WS(rs, 29)]; + T51 = ci[WS(rs, 29)]; + T9R = FNMS(T5d, T5b, T9Q); + T5f = FMA(T5d, T5e, T5c); + T4X = W[56]; + T50 = W[57]; + T55 = cr[WS(rs, 13)]; + T58 = ci[WS(rs, 13)]; + T9W = T4X * T51; + T4Z = T4X * T4Y; + T54 = W[24]; + T57 = W[25]; + T9X = FNMS(T50, T4Y, T9W); + T52 = FMA(T50, T51, T4Z); + T9O = T54 * T58; + T56 = T54 * T55; + } + } + { + E T9Y, TfC, T53, T9N, T9P, T59; + T9Y = T9V - T9X; + TfC = T9V + T9X; + T53 = T4W + T52; + T9N = T4W - T52; + T9P = FNMS(T57, T55, T9O); + T59 = FMA(T57, T58, T56); + { + E TfD, T9S, T9Z, T5g; + TfD = T9P + T9R; + T9S = T9P - T9R; + T9Z = T59 - T5f; + T5g = T59 + T5f; + T9T = T9N - T9S; + Td9 = T9N + T9S; + TfE = TfC - TfD; + ThI = TfC + TfD; + TfB = T53 - T5g; + T5h = T53 + T5g; + Tda = T9Y - T9Z; + Ta0 = T9Y + T9Z; + } + } + } + } + { + E TaN, Tdp, Tg0, TfX, Tdq, TaU; + { + E TaQ, T6i, TaL, T6B, T6r, T6u, T6t, TaS, T6o, TaI, T6s; + { + E T6x, T6A, T6z, TaK, T6y; + { + E T6e, T6h, T6d, T6g, TaP, T6f, T6w; + T6e = cr[WS(rs, 3)]; + TfO = TfE - TfB; + TfF = TfB + TfE; + Tag = FNMS(KP414213562, T9T, Ta0); + Ta1 = FMA(KP414213562, Ta0, T9T); + Tdb = FNMS(KP414213562, Tda, Td9); + Tdi = FMA(KP414213562, Td9, Tda); + T6h = ci[WS(rs, 3)]; + T6d = W[4]; + T6g = W[5]; + T6x = cr[WS(rs, 51)]; + T6A = ci[WS(rs, 51)]; + TaP = T6d * T6h; + T6f = T6d * T6e; + T6w = W[100]; + T6z = W[101]; + TaQ = FNMS(T6g, T6e, TaP); + T6i = FMA(T6g, T6h, T6f); + TaK = T6w * T6A; + T6y = T6w * T6x; + } + { + E T6k, T6n, T6j, T6m, TaR, T6l, T6q; + T6k = cr[WS(rs, 35)]; + T6n = ci[WS(rs, 35)]; + TaL = FNMS(T6z, T6x, TaK); + T6B = FMA(T6z, T6A, T6y); + T6j = W[68]; + T6m = W[69]; + T6r = cr[WS(rs, 19)]; + T6u = ci[WS(rs, 19)]; + TaR = T6j * T6n; + T6l = T6j * T6k; + T6q = W[36]; + T6t = W[37]; + TaS = FNMS(T6m, T6k, TaR); + T6o = FMA(T6m, T6n, T6l); + TaI = T6q * T6u; + T6s = T6q * T6r; + } + } + { + E TaT, TfY, T6p, TaH, TaJ, T6v; + TaT = TaQ - TaS; + TfY = TaQ + TaS; + T6p = T6i + T6o; + TaH = T6i - T6o; + TaJ = FNMS(T6t, T6r, TaI); + T6v = FMA(T6t, T6u, T6s); + { + E TfZ, TaM, T6C, TaO; + TfZ = TaJ + TaL; + TaM = TaJ - TaL; + T6C = T6v + T6B; + TaO = T6B - T6v; + TaN = TaH - TaM; + Tdp = TaH + TaM; + Tg0 = TfY - TfZ; + ThU = TfY + TfZ; + TfX = T6p - T6C; + T6D = T6p + T6C; + Tdq = TaT + TaO; + TaU = TaO - TaT; + } + } + } + { + E Tb5, T6J, Tb0, T72, T6S, T6V, T6U, Tb7, T6P, TaX, T6T; + { + E T6Y, T71, T70, TaZ, T6Z; + { + E T6F, T6I, T6E, T6H, Tb4, T6G, T6X; + T6F = cr[WS(rs, 59)]; + Tgf = TfX + Tg0; + Tg1 = TfX - Tg0; + Tbo = FNMS(KP414213562, TaN, TaU); + TaV = FMA(KP414213562, TaU, TaN); + Tdr = FMA(KP414213562, Tdq, Tdp); + TdA = FNMS(KP414213562, Tdp, Tdq); + T6I = ci[WS(rs, 59)]; + T6E = W[116]; + T6H = W[117]; + T6Y = cr[WS(rs, 43)]; + T71 = ci[WS(rs, 43)]; + Tb4 = T6E * T6I; + T6G = T6E * T6F; + T6X = W[84]; + T70 = W[85]; + Tb5 = FNMS(T6H, T6F, Tb4); + T6J = FMA(T6H, T6I, T6G); + TaZ = T6X * T71; + T6Z = T6X * T6Y; + } + { + E T6L, T6O, T6K, T6N, Tb6, T6M, T6R; + T6L = cr[WS(rs, 27)]; + T6O = ci[WS(rs, 27)]; + Tb0 = FNMS(T70, T6Y, TaZ); + T72 = FMA(T70, T71, T6Z); + T6K = W[52]; + T6N = W[53]; + T6S = cr[WS(rs, 11)]; + T6V = ci[WS(rs, 11)]; + Tb6 = T6K * T6O; + T6M = T6K * T6L; + T6R = W[20]; + T6U = W[21]; + Tb7 = FNMS(T6N, T6L, Tb6); + T6P = FMA(T6N, T6O, T6M); + TaX = T6R * T6V; + T6T = T6R * T6S; + } + } + { + E Tb8, Tg3, T6Q, TaW, TaY, T6W; + Tb8 = Tb5 - Tb7; + Tg3 = Tb5 + Tb7; + T6Q = T6J + T6P; + TaW = T6J - T6P; + TaY = FNMS(T6U, T6S, TaX); + T6W = FMA(T6U, T6V, T6T); + { + E Tg4, Tb1, T73, Tb3; + Tg4 = TaY + Tb0; + Tb1 = TaY - Tb0; + T73 = T6W + T72; + Tb3 = T72 - T6W; + Tb2 = TaW - Tb1; + Tds = TaW + Tb1; + Tg5 = Tg3 - Tg4; + ThT = Tg3 + Tg4; + Tg2 = T6Q - T73; + T74 = T6Q + T73; + Tdt = Tb8 + Tb3; + Tb9 = Tb3 - Tb8; + } + } + } + } + } + } + { + E Thq, Tge, Tg6, Tdu, TdB, Tj7, Thv, ThA, Tht, Tj8, ThD, Thy, ThS, Ti0, ThZ; + E ThV, ThH, ThP, ThO, ThK, Tkm, TcD, Tk0, Tk4, TjZ, Tk3, Tik, Tin; + { + E Tbp, Tba, TiI, TiL; + { + E Tio, T1I, Tj1, T3v, Tj2, TiX, TiN, Tir, T76, TiJ, TiC, TiG, T5j, Tit, Tiw; + E TiK; + { + E TiO, TiW, Tip, Tiq; + { + E TO, T1H, T2B, T3u; + Thq = Tm - TN; + TO = Tm + TN; + Tge = Tg2 - Tg5; + Tg6 = Tg2 + Tg5; + Tbp = FMA(KP414213562, Tb2, Tb9); + Tba = FNMS(KP414213562, Tb9, Tb2); + Tdu = FNMS(KP414213562, Tdt, Tds); + TdB = FMA(KP414213562, Tds, Tdt); + T1H = T1f + T1G; + Tj7 = T1f - T1G; + Thv = T29 - T2A; + T2B = T29 + T2A; + T3u = T32 + T3t; + ThA = T32 - T3t; + Tht = Thr - Ths; + TiO = Ths + Thr; + Tio = TO - T1H; + T1I = TO + T1H; + Tj1 = T2B - T3u; + T3v = T2B + T3u; + TiW = TiP + TiV; + Tj8 = TiV - TiP; + } + ThD = ThB - ThC; + Tip = ThB + ThC; + Tiq = Thw + Thx; + Thy = Thw - Thx; + { + E T6c, T75, Tiz, TiA; + ThS = T5K - T6b; + T6c = T5K + T6b; + Tj2 = TiW - TiO; + TiX = TiO + TiW; + TiN = Tiq + Tip; + Tir = Tip - Tiq; + T75 = T6D + T74; + Ti0 = T74 - T6D; + ThZ = ThX - ThY; + Tiz = ThX + ThY; + TiA = ThU + ThT; + ThV = ThT - ThU; + { + E T4p, Tiy, TiB, T5i, Tiu, Tiv; + ThH = T3X - T4o; + T4p = T3X + T4o; + T76 = T6c + T75; + Tiy = T6c - T75; + TiJ = Tiz + TiA; + TiB = Tiz - TiA; + T5i = T4Q + T5h; + ThP = T4Q - T5h; + ThO = ThM - ThN; + Tiu = ThM + ThN; + Tiv = ThJ + ThI; + ThK = ThI - ThJ; + TiC = Tiy - TiB; + TiG = Tiy + TiB; + T5j = T4p + T5i; + Tit = T4p - T5i; + Tiw = Tiu - Tiv; + TiK = Tiu + Tiv; + } + } + } + { + E TiZ, TiD, TiH, TiE, Tis, TiM, TiY, Tj0; + { + E T3w, TiF, Tix, T77, Tj5, Tj3, Tj6, Tj4; + TiI = T1I - T3v; + T3w = T1I + T3v; + TiF = Tit - Tiw; + Tix = Tit + Tiw; + T77 = T5j + T76; + TiZ = T76 - T5j; + Tj5 = Tj2 - Tj1; + Tj3 = Tj1 + Tj2; + TiD = Tix + TiC; + Tj4 = TiC - Tix; + cr[0] = T3w + T77; + ci[WS(rs, 31)] = T3w - T77; + Tj6 = TiG - TiF; + TiH = TiF + TiG; + ci[WS(rs, 39)] = FMA(KP707106781, Tj4, Tj3); + cr[WS(rs, 56)] = FMS(KP707106781, Tj4, Tj3); + TiE = Tio + Tir; + Tis = Tio - Tir; + ci[WS(rs, 55)] = FMA(KP707106781, Tj6, Tj5); + cr[WS(rs, 40)] = FMS(KP707106781, Tj6, Tj5); + } + TiL = TiJ - TiK; + TiM = TiK + TiJ; + cr[WS(rs, 8)] = FMA(KP707106781, TiD, Tis); + ci[WS(rs, 23)] = FNMS(KP707106781, TiD, Tis); + ci[WS(rs, 7)] = FMA(KP707106781, TiH, TiE); + cr[WS(rs, 24)] = FNMS(KP707106781, TiH, TiE); + TiY = TiN + TiX; + Tj0 = TiX - TiN; + ci[WS(rs, 63)] = TiM + TiY; + cr[WS(rs, 32)] = TiM - TiY; + ci[WS(rs, 47)] = TiZ + Tj0; + cr[WS(rs, 48)] = TiZ - Tj0; + } + } + { + E TjW, TbB, Tk2, T99, TbF, TbL, Tbv, Taj, Tcu, Tcy, Tci, Tce, Tcr, Tcx, Tch; + E Tc7, Tcn, Tkg, Tka, TbZ, TbP, T7J, TbO, T7u, Tk7, TjT, TbI, TbM, Tbw, Tbs; + E T7Y, TbQ; + { + E TbX, TbW, TbU, TbT, Tc1, Tc5, Tc4, Tc2, TaG, Tbq, Tbn, Tcb, Tcs, Tca, Tcc; + E Tbb, Tcm, TbV; + { + E T8W, Tbz, T8z, T97, T8n, T8y; + TbX = FNMS(KP707106781, T8m, T87); + T8n = FMA(KP707106781, T8m, T87); + T8y = FMA(KP707106781, T8x, T8u); + TbW = FNMS(KP707106781, T8x, T8u); + TbU = FNMS(KP707106781, T8V, T8G); + T8W = FMA(KP707106781, T8V, T8G); + ci[WS(rs, 15)] = TiI + TiL; + cr[WS(rs, 16)] = TiI - TiL; + Tbz = FMA(KP198912367, T8n, T8y); + T8z = FNMS(KP198912367, T8y, T8n); + T97 = FMA(KP707106781, T96, T93); + TbT = FNMS(KP707106781, T96, T93); + { + E Tae, TbD, Ta3, Tah; + { + E T9x, Ta2, TbA, T98; + Tc1 = FNMS(KP707106781, T9w, T9h); + T9x = FMA(KP707106781, T9w, T9h); + Ta2 = T9M + Ta1; + Tc5 = Ta1 - T9M; + Tc4 = FNMS(KP707106781, Tad, Taa); + Tae = FMA(KP707106781, Tad, Taa); + TbA = FNMS(KP198912367, T8W, T97); + T98 = FMA(KP198912367, T97, T8W); + TbD = FNMS(KP923879532, Ta2, T9x); + Ta3 = FMA(KP923879532, Ta2, T9x); + TjW = Tbz + TbA; + TbB = Tbz - TbA; + Tk2 = T98 - T8z; + T99 = T8z + T98; + Tah = Taf + Tag; + Tc2 = Taf - Tag; + } + { + E Tc8, Tc9, TbE, Tai; + TaG = FMA(KP707106781, TaF, Taq); + Tc8 = FNMS(KP707106781, TaF, Taq); + Tc9 = Tbp - Tbo; + Tbq = Tbo + Tbp; + Tbn = FMA(KP707106781, Tbm, Tbj); + Tcb = FNMS(KP707106781, Tbm, Tbj); + TbE = FNMS(KP923879532, Tah, Tae); + Tai = FMA(KP923879532, Tah, Tae); + Tcs = FMA(KP923879532, Tc9, Tc8); + Tca = FNMS(KP923879532, Tc9, Tc8); + TbF = FMA(KP820678790, TbE, TbD); + TbL = FNMS(KP820678790, TbD, TbE); + Tbv = FMA(KP098491403, Ta3, Tai); + Taj = FNMS(KP098491403, Tai, Ta3); + Tcc = Tba - TaV; + Tbb = TaV + Tba; + } + } + } + { + E Tcp, Tc3, Tct, Tcd, Tcq, Tc6; + Tct = FNMS(KP923879532, Tcc, Tcb); + Tcd = FMA(KP923879532, Tcc, Tcb); + Tcp = FMA(KP923879532, Tc2, Tc1); + Tc3 = FNMS(KP923879532, Tc2, Tc1); + Tcu = FMA(KP303346683, Tct, Tcs); + Tcy = FNMS(KP303346683, Tcs, Tct); + Tci = FMA(KP534511135, Tca, Tcd); + Tce = FNMS(KP534511135, Tcd, Tca); + Tcq = FMA(KP923879532, Tc5, Tc4); + Tc6 = FNMS(KP923879532, Tc5, Tc4); + Tcm = FNMS(KP668178637, TbT, TbU); + TbV = FMA(KP668178637, TbU, TbT); + Tcr = FMA(KP303346683, Tcq, Tcp); + Tcx = FNMS(KP303346683, Tcp, Tcq); + Tch = FMA(KP534511135, Tc3, Tc6); + Tc7 = FNMS(KP534511135, Tc6, Tc3); + } + { + E TbG, Tbc, Tcl, TbY; + Tcl = FMA(KP668178637, TbW, TbX); + TbY = FNMS(KP668178637, TbX, TbW); + TbG = FNMS(KP923879532, Tbb, TaG); + Tbc = FMA(KP923879532, Tbb, TaG); + Tcn = Tcl + Tcm; + Tkg = Tcl - Tcm; + Tka = TbY + TbV; + TbZ = TbV - TbY; + { + E T7t, TjS, TbH, Tbr; + Tkm = T7s - T7l; + T7t = T7l + T7s; + TjS = TcB - TcC; + TcD = TcB + TcC; + TbP = FMA(KP414213562, T7B, T7I); + T7J = FNMS(KP414213562, T7I, T7B); + TbH = FNMS(KP923879532, Tbq, Tbn); + Tbr = FMA(KP923879532, Tbq, Tbn); + TbO = FNMS(KP707106781, T7t, T7e); + T7u = FMA(KP707106781, T7t, T7e); + Tk7 = FNMS(KP707106781, TjS, TjR); + TjT = FMA(KP707106781, TjS, TjR); + TbI = FMA(KP820678790, TbH, TbG); + TbM = FNMS(KP820678790, TbG, TbH); + Tbw = FMA(KP098491403, Tbc, Tbr); + Tbs = FNMS(KP098491403, Tbr, Tbc); + T7Y = FMA(KP414213562, T7X, T7Q); + TbQ = FNMS(KP414213562, T7Q, T7X); + } + } + } + { + E Tk1, TjV, Tck, TbS, Tkd, Tcz, Tkh, Tcf, TjY, Tk6, Tke, Tcv, Tki, Tcj; + { + E Tbu, TbC, Tkb, Tkc, Tkj, Tkk, Tbx, TbJ; + { + E Tbt, Tkf, Tk9, T9a, TbK, TbN, Tby; + Tk0 = Tbs - Taj; + Tbt = Taj + Tbs; + { + E Tk8, T7Z, TjU, TbR, T80; + Tk8 = T7Y - T7J; + T7Z = T7J + T7Y; + TjU = TbP + TbQ; + TbR = TbP - TbQ; + Tkf = FNMS(KP923879532, Tk8, Tk7); + Tk9 = FMA(KP923879532, Tk8, Tk7); + Tby = FNMS(KP923879532, T7Z, T7u); + T80 = FMA(KP923879532, T7Z, T7u); + Tk1 = FNMS(KP923879532, TjU, TjT); + TjV = FMA(KP923879532, TjU, TjT); + Tck = FMA(KP923879532, TbR, TbO); + TbS = FNMS(KP923879532, TbR, TbO); + T9a = FMA(KP980785280, T99, T80); + Tbu = FNMS(KP980785280, T99, T80); + } + TbC = FMA(KP980785280, TbB, Tby); + TbK = FNMS(KP980785280, TbB, Tby); + TbN = TbL + TbM; + Tk4 = TbL - TbM; + Tkd = FNMS(KP831469612, Tka, Tk9); + Tkb = FMA(KP831469612, Tka, Tk9); + ci[0] = FMA(KP995184726, Tbt, T9a); + cr[WS(rs, 31)] = FNMS(KP995184726, Tbt, T9a); + ci[WS(rs, 8)] = FNMS(KP773010453, TbN, TbK); + cr[WS(rs, 23)] = FMA(KP773010453, TbN, TbK); + Tkc = Tcx - Tcy; + Tcz = Tcx + Tcy; + Tkh = FMA(KP831469612, Tkg, Tkf); + Tkj = FNMS(KP831469612, Tkg, Tkf); + Tkk = Tce - Tc7; + Tcf = Tc7 + Tce; + } + ci[WS(rs, 60)] = FMA(KP956940335, Tkc, Tkb); + cr[WS(rs, 35)] = FMS(KP956940335, Tkc, Tkb); + ci[WS(rs, 52)] = FMA(KP881921264, Tkk, Tkj); + cr[WS(rs, 43)] = FMS(KP881921264, Tkk, Tkj); + Tbx = Tbv + Tbw; + TjY = Tbw - Tbv; + TbJ = TbF + TbI; + Tk6 = TbI - TbF; + cr[WS(rs, 15)] = FMA(KP995184726, Tbx, Tbu); + ci[WS(rs, 16)] = FNMS(KP995184726, Tbx, Tbu); + cr[WS(rs, 7)] = FMA(KP773010453, TbJ, TbC); + ci[WS(rs, 24)] = FNMS(KP773010453, TbJ, TbC); + Tke = Tcu - Tcr; + Tcv = Tcr + Tcu; + Tki = Tci - Tch; + Tcj = Tch + Tci; + } + { + E Tcg, Tco, TjX, Tk5, Tc0, Tcw; + Tcg = FNMS(KP831469612, TbZ, TbS); + Tc0 = FMA(KP831469612, TbZ, TbS); + ci[WS(rs, 44)] = FMA(KP956940335, Tke, Tkd); + cr[WS(rs, 51)] = FMS(KP956940335, Tke, Tkd); + ci[WS(rs, 36)] = FMA(KP881921264, Tki, Tkh); + cr[WS(rs, 59)] = FMS(KP881921264, Tki, Tkh); + Tco = FMA(KP831469612, Tcn, Tck); + Tcw = FNMS(KP831469612, Tcn, Tck); + TjZ = FNMS(KP980785280, TjW, TjV); + TjX = FMA(KP980785280, TjW, TjV); + ci[WS(rs, 4)] = FMA(KP881921264, Tcf, Tc0); + cr[WS(rs, 27)] = FNMS(KP881921264, Tcf, Tc0); + ci[WS(rs, 12)] = FNMS(KP956940335, Tcz, Tcw); + cr[WS(rs, 19)] = FMA(KP956940335, Tcz, Tcw); + Tk3 = FMA(KP980785280, Tk2, Tk1); + Tk5 = FNMS(KP980785280, Tk2, Tk1); + ci[WS(rs, 32)] = FMA(KP995184726, TjY, TjX); + cr[WS(rs, 63)] = FMS(KP995184726, TjY, TjX); + ci[WS(rs, 40)] = FMA(KP773010453, Tk6, Tk5); + cr[WS(rs, 55)] = FMS(KP773010453, Tk6, Tk5); + cr[WS(rs, 11)] = FMA(KP881921264, Tcj, Tcg); + ci[WS(rs, 20)] = FNMS(KP881921264, Tcj, Tcg); + cr[WS(rs, 3)] = FMA(KP956940335, Tcv, Tco); + ci[WS(rs, 28)] = FNMS(KP956940335, Tcv, Tco); + } + } + } + } + { + E Ti8, Thu, Tjf, Tj9, Tib, Tjg, Tja, ThF, Tig, ThW, Tif, Til, Ti6, ThR; + ci[WS(rs, 48)] = FMA(KP995184726, Tk0, TjZ); + cr[WS(rs, 47)] = FMS(KP995184726, Tk0, TjZ); + ci[WS(rs, 56)] = FMA(KP773010453, Tk4, Tk3); + cr[WS(rs, 39)] = FMS(KP773010453, Tk4, Tk3); + Ti8 = Thq + Tht; + Thu = Thq - Tht; + Tjf = Tj8 - Tj7; + Tj9 = Tj7 + Tj8; + { + E Tid, ThL, Tie, ThQ; + { + E Ti9, Thz, Tia, ThE; + Ti9 = Thv - Thy; + Thz = Thv + Thy; + Tia = ThA + ThD; + ThE = ThA - ThD; + Tib = Ti9 + Tia; + Tjg = Tia - Ti9; + Tja = Thz - ThE; + ThF = Thz + ThE; + Tid = ThH + ThK; + ThL = ThH - ThK; + } + Tie = ThO + ThP; + ThQ = ThO - ThP; + Tig = ThS + ThV; + ThW = ThS - ThV; + Tif = FNMS(KP414213562, Tie, Tid); + Til = FMA(KP414213562, Tid, Tie); + Ti6 = FNMS(KP414213562, ThL, ThQ); + ThR = FMA(KP414213562, ThQ, ThL); + } + { + E Ti4, ThG, Tjh, Tjj, Tih, Ti1; + Ti4 = FNMS(KP707106781, ThF, Thu); + ThG = FMA(KP707106781, ThF, Thu); + Tjh = FMA(KP707106781, Tjg, Tjf); + Tjj = FNMS(KP707106781, Tjg, Tjf); + Tih = Ti0 - ThZ; + Ti1 = ThZ + Ti0; + { + E Tje, Tjd, Tjb, Tjc; + { + E Tic, Tim, Ti5, Ti2, Tij, Tii; + Tik = FNMS(KP707106781, Tib, Ti8); + Tic = FMA(KP707106781, Tib, Ti8); + Tii = FNMS(KP414213562, Tih, Tig); + Tim = FMA(KP414213562, Tig, Tih); + Ti5 = FMA(KP414213562, ThW, Ti1); + Ti2 = FNMS(KP414213562, Ti1, ThW); + Tij = Tif + Tii; + Tje = Tii - Tif; + Tjd = FNMS(KP707106781, Tja, Tj9); + Tjb = FMA(KP707106781, Tja, Tj9); + { + E Ti7, Tji, Tjk, Ti3; + Ti7 = Ti5 - Ti6; + Tji = Ti6 + Ti5; + Tjk = Ti2 - ThR; + Ti3 = ThR + Ti2; + ci[WS(rs, 3)] = FMA(KP923879532, Tij, Tic); + cr[WS(rs, 28)] = FNMS(KP923879532, Tij, Tic); + ci[WS(rs, 11)] = FMA(KP923879532, Ti7, Ti4); + cr[WS(rs, 20)] = FNMS(KP923879532, Ti7, Ti4); + ci[WS(rs, 59)] = FMA(KP923879532, Tji, Tjh); + cr[WS(rs, 36)] = FMS(KP923879532, Tji, Tjh); + ci[WS(rs, 43)] = FMA(KP923879532, Tjk, Tjj); + cr[WS(rs, 52)] = FMS(KP923879532, Tjk, Tjj); + cr[WS(rs, 4)] = FMA(KP923879532, Ti3, ThG); + ci[WS(rs, 27)] = FNMS(KP923879532, Ti3, ThG); + Tjc = Tim - Til; + Tin = Til + Tim; + } + } + ci[WS(rs, 35)] = FMA(KP923879532, Tjc, Tjb); + cr[WS(rs, 60)] = FMS(KP923879532, Tjc, Tjb); + ci[WS(rs, 51)] = FMA(KP923879532, Tje, Tjd); + cr[WS(rs, 44)] = FMS(KP923879532, Tje, Tjd); + } + } + } + { + E Tjy, Tju, Tjt, Tjx; + { + E TjD, TjJ, Tgo, Tf2, Tjp, Tjv, Tha, TgI, Tgg, Tgd, Tgr, Tjw, Tjq, Tfp, Thk; + E Tho, Th7, Th4, Tgv, TgB, Tgl, TfR, TjE, Thd, TjK, TgP, Tgw, Tg8, Thh, Thn; + E Th8, TgX; + { + E TgK, TgJ, TgN, TgM, TfW, Th1, Thi, Th0, Th2, Tg7; + { + E TgE, TeQ, TjB, Tjn, TgF, TgG, TjC, Tf1, TeV, Tf0; + TgE = TeM - TeP; + TeQ = TeM + TeP; + TjB = Tjm - Tjl; + Tjn = Tjl + Tjm; + TgF = TeR + TeU; + TeV = TeR - TeU; + cr[WS(rs, 12)] = FMA(KP923879532, Tin, Tik); + ci[WS(rs, 19)] = FNMS(KP923879532, Tin, Tik); + Tf0 = TeW + TeZ; + TgG = TeW - TeZ; + TjC = Tf0 - TeV; + Tf1 = TeV + Tf0; + { + E Tfi, Tgp, Tfd, Tfn; + { + E Tf7, Tjo, TgH, Tfc; + TgK = Tf5 - Tf6; + Tf7 = Tf5 + Tf6; + TjD = FMA(KP707106781, TjC, TjB); + TjJ = FNMS(KP707106781, TjC, TjB); + Tgo = FMA(KP707106781, Tf1, TeQ); + Tf2 = FNMS(KP707106781, Tf1, TeQ); + Tjo = TgF - TgG; + TgH = TgF + TgG; + Tfc = Tf8 + Tfb; + TgJ = Tf8 - Tfb; + TgN = Tfg - Tfh; + Tfi = Tfg + Tfh; + Tjp = FMA(KP707106781, Tjo, Tjn); + Tjv = FNMS(KP707106781, Tjo, Tjn); + Tha = FNMS(KP707106781, TgH, TgE); + TgI = FMA(KP707106781, TgH, TgE); + Tgp = FNMS(KP414213562, Tf7, Tfc); + Tfd = FMA(KP414213562, Tfc, Tf7); + Tfn = Tfj + Tfm; + TgM = Tfj - Tfm; + } + { + E TgY, TgZ, Tgq, Tfo; + TfW = TfS + TfV; + TgY = TfS - TfV; + TgZ = Tgf + Tge; + Tgg = Tge - Tgf; + Tgd = Tg9 - Tgc; + Th1 = Tgc + Tg9; + Tgq = FMA(KP414213562, Tfi, Tfn); + Tfo = FNMS(KP414213562, Tfn, Tfi); + Thi = FNMS(KP707106781, TgZ, TgY); + Th0 = FMA(KP707106781, TgZ, TgY); + Tgr = Tgp + Tgq; + Tjw = Tgq - Tgp; + Tjq = Tfd + Tfo; + Tfp = Tfd - Tfo; + Th2 = Tg6 - Tg1; + Tg7 = Tg1 + Tg6; + } + } + } + { + E TgR, TgV, TgU, TgS, Thc, TgL; + { + E TfM, Tgt, TfH, TfP, Tgu, TfQ; + { + E Tfv, TfG, Thj, Th3; + TgR = Tfr - Tfu; + Tfv = Tfr + Tfu; + TfG = TfA + TfF; + TgV = TfF - TfA; + TgU = TfK - TfL; + TfM = TfK + TfL; + Thj = FNMS(KP707106781, Th2, Th1); + Th3 = FMA(KP707106781, Th2, Th1); + Tgt = FMA(KP707106781, TfG, Tfv); + TfH = FNMS(KP707106781, TfG, Tfv); + Thk = FMA(KP668178637, Thj, Thi); + Tho = FNMS(KP668178637, Thi, Thj); + Th7 = FMA(KP198912367, Th0, Th3); + Th4 = FNMS(KP198912367, Th3, Th0); + TfP = TfN + TfO; + TgS = TfN - TfO; + } + Tgu = FMA(KP707106781, TfP, TfM); + TfQ = FNMS(KP707106781, TfP, TfM); + Thc = FNMS(KP414213562, TgJ, TgK); + TgL = FMA(KP414213562, TgK, TgJ); + Tgv = FNMS(KP198912367, Tgu, Tgt); + TgB = FMA(KP198912367, Tgt, Tgu); + Tgl = FNMS(KP668178637, TfH, TfQ); + TfR = FMA(KP668178637, TfQ, TfH); + } + { + E Thf, TgT, Thb, TgO, Thg, TgW; + Thb = FMA(KP414213562, TgM, TgN); + TgO = FNMS(KP414213562, TgN, TgM); + Thf = FNMS(KP707106781, TgS, TgR); + TgT = FMA(KP707106781, TgS, TgR); + TjE = Thc + Thb; + Thd = Thb - Thc; + TjK = TgL - TgO; + TgP = TgL + TgO; + Thg = FNMS(KP707106781, TgV, TgU); + TgW = FMA(KP707106781, TgV, TgU); + Tgw = FMA(KP707106781, Tg7, TfW); + Tg8 = FNMS(KP707106781, Tg7, TfW); + Thh = FNMS(KP668178637, Thg, Thf); + Thn = FMA(KP668178637, Thf, Thg); + Th8 = FNMS(KP198912367, TgT, TgW); + TgX = FMA(KP198912367, TgW, TgT); + } + } + } + { + E TjH, Th9, TjL, Tjs, TjA, Thl, TjI, Th5, TjM, Thp; + { + E Tgk, Tfq, TgA, Tgs, TjN, Tgy, Tgm, TgD, Tgj, TjO, Tgn, Tgz; + Tgk = FNMS(KP923879532, Tfp, Tf2); + Tfq = FMA(KP923879532, Tfp, Tf2); + TgA = FNMS(KP923879532, Tgr, Tgo); + Tgs = FMA(KP923879532, Tgr, Tgo); + { + E TjF, Tgx, Tgh, TjG, TgC, Tgi; + TjH = FNMS(KP923879532, TjE, TjD); + TjF = FMA(KP923879532, TjE, TjD); + Tgx = FMA(KP707106781, Tgg, Tgd); + Tgh = FNMS(KP707106781, Tgg, Tgd); + TjG = Th8 + Th7; + Th9 = Th7 - Th8; + TjL = FMA(KP923879532, TjK, TjJ); + TjN = FNMS(KP923879532, TjK, TjJ); + Tgy = FNMS(KP198912367, Tgx, Tgw); + TgC = FMA(KP198912367, Tgw, Tgx); + Tgm = FNMS(KP668178637, Tg8, Tgh); + Tgi = FMA(KP668178637, Tgh, Tg8); + ci[WS(rs, 61)] = FMA(KP980785280, TjG, TjF); + cr[WS(rs, 34)] = FMS(KP980785280, TjG, TjF); + TgD = TgB + TgC; + Tjs = TgC - TgB; + TjA = Tgi - TfR; + Tgj = TfR + Tgi; + TjO = Thk - Thh; + Thl = Thh + Thk; + } + cr[WS(rs, 14)] = FMA(KP980785280, TgD, TgA); + ci[WS(rs, 17)] = FNMS(KP980785280, TgD, TgA); + cr[WS(rs, 6)] = FMA(KP831469612, Tgj, Tfq); + ci[WS(rs, 25)] = FNMS(KP831469612, Tgj, Tfq); + ci[WS(rs, 53)] = FMA(KP831469612, TjO, TjN); + cr[WS(rs, 42)] = FMS(KP831469612, TjO, TjN); + Tgn = Tgl + Tgm; + Tjy = Tgl - Tgm; + Tgz = Tgv + Tgy; + Tju = Tgy - Tgv; + ci[WS(rs, 9)] = FNMS(KP831469612, Tgn, Tgk); + cr[WS(rs, 22)] = FMA(KP831469612, Tgn, Tgk); + ci[WS(rs, 1)] = FMA(KP980785280, Tgz, Tgs); + cr[WS(rs, 30)] = FNMS(KP980785280, Tgz, Tgs); + TjI = Th4 - TgX; + Th5 = TgX + Th4; + TjM = Thn + Tho; + Thp = Thn - Tho; + } + { + E Th6, The, Tjr, Tjz, TgQ, Thm; + Th6 = FNMS(KP923879532, TgP, TgI); + TgQ = FMA(KP923879532, TgP, TgI); + ci[WS(rs, 45)] = FMA(KP980785280, TjI, TjH); + cr[WS(rs, 50)] = FMS(KP980785280, TjI, TjH); + ci[WS(rs, 37)] = FNMS(KP831469612, TjM, TjL); + cr[WS(rs, 58)] = -(FMA(KP831469612, TjM, TjL)); + The = FMA(KP923879532, Thd, Tha); + Thm = FNMS(KP923879532, Thd, Tha); + Tjt = FNMS(KP923879532, Tjq, Tjp); + Tjr = FMA(KP923879532, Tjq, Tjp); + cr[WS(rs, 2)] = FMA(KP980785280, Th5, TgQ); + ci[WS(rs, 29)] = FNMS(KP980785280, Th5, TgQ); + cr[WS(rs, 10)] = FMA(KP831469612, Thp, Thm); + ci[WS(rs, 21)] = FNMS(KP831469612, Thp, Thm); + Tjx = FMA(KP923879532, Tjw, Tjv); + Tjz = FNMS(KP923879532, Tjw, Tjv); + ci[WS(rs, 33)] = FMA(KP980785280, Tjs, Tjr); + cr[WS(rs, 62)] = FMS(KP980785280, Tjs, Tjr); + ci[WS(rs, 41)] = FMA(KP831469612, TjA, Tjz); + cr[WS(rs, 54)] = FMS(KP831469612, TjA, Tjz); + ci[WS(rs, 13)] = FMA(KP980785280, Th9, Th6); + cr[WS(rs, 18)] = FNMS(KP980785280, Th9, Th6); + ci[WS(rs, 5)] = FMA(KP831469612, Thl, The); + cr[WS(rs, 26)] = FNMS(KP831469612, Thl, The); + } + } + } + { + E Tkq, TdN, Tkw, Td1, TdR, TdX, TdI, Tdl, TeG, TeK, Tet, Teq, TeD, TeJ, Teu; + E Tej, Tez, TkK, TkE, Teb, Te2, TcH, Te0, TcE, TkB, Tkn, TdU, TdY, TdH, TdE; + E TcK, Te1; + { + E Te6, Te5, Te9, Te8, Ted, Teh, Teg, Tee, Tdo, TdC, Tdz, Ten, TeE, Tem, Teo; + E Tdv, Tex, Te7; + { + E TcP, TcS, TcW, TcZ; + Te6 = FNMS(KP707106781, TcO, TcN); + TcP = FMA(KP707106781, TcO, TcN); + ci[WS(rs, 49)] = FMA(KP980785280, Tju, Tjt); + cr[WS(rs, 46)] = FMS(KP980785280, Tju, Tjt); + ci[WS(rs, 57)] = FMA(KP831469612, Tjy, Tjx); + cr[WS(rs, 38)] = FMS(KP831469612, Tjy, Tjx); + TcS = FMA(KP707106781, TcR, TcQ); + Te5 = FNMS(KP707106781, TcR, TcQ); + Te9 = FNMS(KP707106781, TcV, TcU); + TcW = FMA(KP707106781, TcV, TcU); + TcZ = FMA(KP707106781, TcY, TcX); + Te8 = FNMS(KP707106781, TcY, TcX); + { + E Tdg, TdP, Tdd, Tdj; + { + E Td5, TdM, TcT, TdL, Td0, Tdc; + Ted = FNMS(KP707106781, Td4, Td3); + Td5 = FMA(KP707106781, Td4, Td3); + TdM = FNMS(KP198912367, TcP, TcS); + TcT = FMA(KP198912367, TcS, TcP); + TdL = FMA(KP198912367, TcW, TcZ); + Td0 = FNMS(KP198912367, TcZ, TcW); + Tdc = Td8 + Tdb; + Teh = Td8 - Tdb; + Teg = FNMS(KP707106781, Tdf, Tde); + Tdg = FMA(KP707106781, Tdf, Tde); + Tkq = TdM + TdL; + TdN = TdL - TdM; + Tkw = TcT - Td0; + Td1 = TcT + Td0; + TdP = FNMS(KP923879532, Tdc, Td5); + Tdd = FMA(KP923879532, Tdc, Td5); + Tdj = Tdh + Tdi; + Tee = Tdi - Tdh; + } + { + E Tek, Tel, TdQ, Tdk; + Tdo = FMA(KP707106781, Tdn, Tdm); + Tek = FNMS(KP707106781, Tdn, Tdm); + Tel = TdB - TdA; + TdC = TdA + TdB; + Tdz = FMA(KP707106781, Tdy, Tdx); + Ten = FNMS(KP707106781, Tdy, Tdx); + TdQ = FNMS(KP923879532, Tdj, Tdg); + Tdk = FMA(KP923879532, Tdj, Tdg); + TeE = FMA(KP923879532, Tel, Tek); + Tem = FNMS(KP923879532, Tel, Tek); + TdR = FNMS(KP820678790, TdQ, TdP); + TdX = FMA(KP820678790, TdP, TdQ); + TdI = FNMS(KP098491403, Tdd, Tdk); + Tdl = FMA(KP098491403, Tdk, Tdd); + Teo = Tdu - Tdr; + Tdv = Tdr + Tdu; + } + } + } + { + E TeB, Tef, TeF, Tep, TeC, Tei; + TeF = FNMS(KP923879532, Teo, Ten); + Tep = FMA(KP923879532, Teo, Ten); + TeB = FMA(KP923879532, Tee, Ted); + Tef = FNMS(KP923879532, Tee, Ted); + TeG = FMA(KP303346683, TeF, TeE); + TeK = FNMS(KP303346683, TeE, TeF); + Tet = FMA(KP534511135, Tem, Tep); + Teq = FNMS(KP534511135, Tep, Tem); + TeC = FMA(KP923879532, Teh, Teg); + Tei = FNMS(KP923879532, Teh, Teg); + Tex = FNMS(KP668178637, Te5, Te6); + Te7 = FMA(KP668178637, Te6, Te5); + TeD = FNMS(KP303346683, TeC, TeB); + TeJ = FMA(KP303346683, TeB, TeC); + Teu = FNMS(KP534511135, Tef, Tei); + Tej = FMA(KP534511135, Tei, Tef); + } + { + E TdS, Tdw, Tey, Tea, TdT, TdD; + Tey = FMA(KP668178637, Te8, Te9); + Tea = FNMS(KP668178637, Te9, Te8); + TdS = FNMS(KP923879532, Tdv, Tdo); + Tdw = FMA(KP923879532, Tdv, Tdo); + Tez = Tex + Tey; + TkK = Tey - Tex; + TkE = Te7 + Tea; + Teb = Te7 - Tea; + Te2 = FNMS(KP414213562, TcF, TcG); + TcH = FMA(KP414213562, TcG, TcF); + TdT = FNMS(KP923879532, TdC, Tdz); + TdD = FMA(KP923879532, TdC, Tdz); + Te0 = FNMS(KP707106781, TcD, TcA); + TcE = FMA(KP707106781, TcD, TcA); + TkB = FNMS(KP707106781, Tkm, Tkl); + Tkn = FMA(KP707106781, Tkm, Tkl); + TdU = FMA(KP820678790, TdT, TdS); + TdY = FNMS(KP820678790, TdS, TdT); + TdH = FMA(KP098491403, Tdw, TdD); + TdE = FNMS(KP098491403, TdD, Tdw); + TcK = FNMS(KP414213562, TcJ, TcI); + Te1 = FMA(KP414213562, TcI, TcJ); + } + } + { + E Tkv, Tkp, Tew, Te4, TkH, TeL, TkL, Ter, Tks, TkA, TkI, TeH, TkM, Tev; + { + E TdG, TdO, TkF, TkG, TkN, TkO, TdJ, TdV; + { + E TdF, TkJ, TkD, Td2, TdW, TdZ, TdK; + Tku = TdE - Tdl; + TdF = Tdl + TdE; + { + E TkC, TcL, Tko, Te3, TcM; + TkC = TcH - TcK; + TcL = TcH + TcK; + Tko = Te2 + Te1; + Te3 = Te1 - Te2; + TkJ = FNMS(KP923879532, TkC, TkB); + TkD = FMA(KP923879532, TkC, TkB); + TdK = FNMS(KP923879532, TcL, TcE); + TcM = FMA(KP923879532, TcL, TcE); + Tkv = FNMS(KP923879532, Tko, Tkn); + Tkp = FMA(KP923879532, Tko, Tkn); + Tew = FMA(KP923879532, Te3, Te0); + Te4 = FNMS(KP923879532, Te3, Te0); + Td2 = FMA(KP980785280, Td1, TcM); + TdG = FNMS(KP980785280, Td1, TcM); + } + TdO = FMA(KP980785280, TdN, TdK); + TdW = FNMS(KP980785280, TdN, TdK); + TdZ = TdX - TdY; + Tky = TdX + TdY; + TkH = FNMS(KP831469612, TkE, TkD); + TkF = FMA(KP831469612, TkE, TkD); + cr[WS(rs, 1)] = FMA(KP995184726, TdF, Td2); + ci[WS(rs, 30)] = FNMS(KP995184726, TdF, Td2); + cr[WS(rs, 9)] = FMA(KP773010453, TdZ, TdW); + ci[WS(rs, 22)] = FNMS(KP773010453, TdZ, TdW); + TkG = TeJ + TeK; + TeL = TeJ - TeK; + TkL = FMA(KP831469612, TkK, TkJ); + TkN = FNMS(KP831469612, TkK, TkJ); + TkO = Teq - Tej; + Ter = Tej + Teq; + } + ci[WS(rs, 34)] = FNMS(KP956940335, TkG, TkF); + cr[WS(rs, 61)] = -(FMA(KP956940335, TkG, TkF)); + ci[WS(rs, 42)] = FMA(KP881921264, TkO, TkN); + cr[WS(rs, 53)] = FMS(KP881921264, TkO, TkN); + TdJ = TdH - TdI; + Tks = TdI + TdH; + TdV = TdR + TdU; + TkA = TdU - TdR; + ci[WS(rs, 14)] = FMA(KP995184726, TdJ, TdG); + cr[WS(rs, 17)] = FNMS(KP995184726, TdJ, TdG); + ci[WS(rs, 6)] = FMA(KP773010453, TdV, TdO); + cr[WS(rs, 25)] = FNMS(KP773010453, TdV, TdO); + TkI = TeG - TeD; + TeH = TeD + TeG; + TkM = Teu + Tet; + Tev = Tet - Teu; + } + { + E Tes, TeA, Tkr, Tkz, Tec, TeI; + Tes = FNMS(KP831469612, Teb, Te4); + Tec = FMA(KP831469612, Teb, Te4); + ci[WS(rs, 50)] = FMA(KP956940335, TkI, TkH); + cr[WS(rs, 45)] = FMS(KP956940335, TkI, TkH); + ci[WS(rs, 58)] = FMA(KP881921264, TkM, TkL); + cr[WS(rs, 37)] = FMS(KP881921264, TkM, TkL); + TeA = FMA(KP831469612, Tez, Tew); + TeI = FNMS(KP831469612, Tez, Tew); + Tkt = FNMS(KP980785280, Tkq, Tkp); + Tkr = FMA(KP980785280, Tkq, Tkp); + cr[WS(rs, 5)] = FMA(KP881921264, Ter, Tec); + ci[WS(rs, 26)] = FNMS(KP881921264, Ter, Tec); + cr[WS(rs, 13)] = FMA(KP956940335, TeL, TeI); + ci[WS(rs, 18)] = FNMS(KP956940335, TeL, TeI); + Tkx = FMA(KP980785280, Tkw, Tkv); + Tkz = FNMS(KP980785280, Tkw, Tkv); + ci[WS(rs, 62)] = FMA(KP995184726, Tks, Tkr); + cr[WS(rs, 33)] = FMS(KP995184726, Tks, Tkr); + ci[WS(rs, 54)] = FMA(KP773010453, TkA, Tkz); + cr[WS(rs, 41)] = FMS(KP773010453, TkA, Tkz); + ci[WS(rs, 10)] = FMA(KP881921264, Tev, Tes); + cr[WS(rs, 21)] = FNMS(KP881921264, Tev, Tes); + ci[WS(rs, 2)] = FMA(KP956940335, TeH, TeA); + cr[WS(rs, 29)] = FNMS(KP956940335, TeH, TeA); + } + } + } + } + } + } + ci[WS(rs, 46)] = FMA(KP995184726, Tku, Tkt); + cr[WS(rs, 49)] = FMS(KP995184726, Tku, Tkt); + ci[WS(rs, 38)] = FNMS(KP773010453, Tky, Tkx); + cr[WS(rs, 57)] = -(FMA(KP773010453, Tky, Tkx)); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 64}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 64, "hf_64", twinstr, &GENUS, {520, 126, 518, 0} }; + +void X(codelet_hf_64) (planner *p) { + X(khc2hc_register) (p, hf_64, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 64 -dit -name hf_64 -include hf.h */ + +/* + * This function contains 1038 FP additions, 500 FP multiplications, + * (or, 808 additions, 270 multiplications, 230 fused multiply/add), + * 176 stack variables, 15 constants, and 256 memory accesses + */ +#include "hf.h" + +static void hf_64(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP290284677, +0.290284677254462367636192375817395274691476278); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP471396736, +0.471396736825997648556387625905254377657460319); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP098017140, +0.098017140329560601994195563888641845861136673); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP634393284, +0.634393284163645498215171613225493370675687095); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 126); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 126, MAKE_VOLATILE_STRIDE(128, rs)) { + E Tj, TcL, ThT, Tin, T6b, Taz, TgT, Thn, TG, Thm, TcO, TgO, T6m, Tim, TaC; + E ThQ, T14, Tfr, T6y, T9O, TaG, Tc0, TcU, TeE, T1r, Tfq, T6J, T9P, TaJ, Tc1; + E TcZ, TeF, T1Q, T2d, Tfu, Tfv, Tfw, Tfx, T6Q, TaM, Tdb, TeI, T71, TaQ, T7a; + E TaN, Td6, TeJ, T77, TaP, T2B, T2Y, Tfz, TfA, TfB, TfC, T7h, TaW, Tdm, TeL; + E T7s, TaU, T7B, TaX, Tdh, TeM, T7y, TaT, T5j, TfR, Tec, TeX, TfY, Tgy, T8D; + E Tbl, T8O, Tbx, T9l, Tbm, TdV, Tf0, T9i, Tbw, T3M, TfL, TdL, TeT, TfI, Tgt; + E T7K, Tbd, T7V, Tb3, T8s, Tbe, Tdu, TeQ, T8p, Tb2, T4x, TfJ, TdE, TdM, TfO; + E Tgu, T87, T8u, T8i, T8v, Tba, Tbh, Tdz, TdN, Tb7, Tbg, T64, TfZ, Te5, Ted; + E TfU, Tgz, T90, T9n, T9b, T9o, Tbt, TbA, Te0, Tee, Tbq, Tbz; + { + E T1, TgR, T6, TgQ, Tc, T68, Th, T69; + T1 = cr[0]; + TgR = ci[0]; + { + E T3, T5, T2, T4; + T3 = cr[WS(rs, 32)]; + T5 = ci[WS(rs, 32)]; + T2 = W[62]; + T4 = W[63]; + T6 = FMA(T2, T3, T4 * T5); + TgQ = FNMS(T4, T3, T2 * T5); + } + { + E T9, Tb, T8, Ta; + T9 = cr[WS(rs, 16)]; + Tb = ci[WS(rs, 16)]; + T8 = W[30]; + Ta = W[31]; + Tc = FMA(T8, T9, Ta * Tb); + T68 = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = cr[WS(rs, 48)]; + Tg = ci[WS(rs, 48)]; + Td = W[94]; + Tf = W[95]; + Th = FMA(Td, Te, Tf * Tg); + T69 = FNMS(Tf, Te, Td * Tg); + } + { + E T7, Ti, ThR, ThS; + T7 = T1 + T6; + Ti = Tc + Th; + Tj = T7 + Ti; + TcL = T7 - Ti; + ThR = Tc - Th; + ThS = TgR - TgQ; + ThT = ThR + ThS; + Tin = ThS - ThR; + } + { + E T67, T6a, TgP, TgS; + T67 = T1 - T6; + T6a = T68 - T69; + T6b = T67 - T6a; + Taz = T67 + T6a; + TgP = T68 + T69; + TgS = TgQ + TgR; + TgT = TgP + TgS; + Thn = TgS - TgP; + } + } + { + E To, T6d, Tt, T6e, T6c, T6f, Tz, T6i, TE, T6j, T6h, T6k; + { + E Tl, Tn, Tk, Tm; + Tl = cr[WS(rs, 8)]; + Tn = ci[WS(rs, 8)]; + Tk = W[14]; + Tm = W[15]; + To = FMA(Tk, Tl, Tm * Tn); + T6d = FNMS(Tm, Tl, Tk * Tn); + } + { + E Tq, Ts, Tp, Tr; + Tq = cr[WS(rs, 40)]; + Ts = ci[WS(rs, 40)]; + Tp = W[78]; + Tr = W[79]; + Tt = FMA(Tp, Tq, Tr * Ts); + T6e = FNMS(Tr, Tq, Tp * Ts); + } + T6c = To - Tt; + T6f = T6d - T6e; + { + E Tw, Ty, Tv, Tx; + Tw = cr[WS(rs, 56)]; + Ty = ci[WS(rs, 56)]; + Tv = W[110]; + Tx = W[111]; + Tz = FMA(Tv, Tw, Tx * Ty); + T6i = FNMS(Tx, Tw, Tv * Ty); + } + { + E TB, TD, TA, TC; + TB = cr[WS(rs, 24)]; + TD = ci[WS(rs, 24)]; + TA = W[46]; + TC = W[47]; + TE = FMA(TA, TB, TC * TD); + T6j = FNMS(TC, TB, TA * TD); + } + T6h = Tz - TE; + T6k = T6i - T6j; + { + E Tu, TF, TcM, TcN; + Tu = To + Tt; + TF = Tz + TE; + TG = Tu + TF; + Thm = Tu - TF; + TcM = T6i + T6j; + TcN = T6d + T6e; + TcO = TcM - TcN; + TgO = TcN + TcM; + } + { + E T6g, T6l, TaA, TaB; + T6g = T6c - T6f; + T6l = T6h + T6k; + T6m = KP707106781 * (T6g + T6l); + Tim = KP707106781 * (T6l - T6g); + TaA = T6c + T6f; + TaB = T6h - T6k; + TaC = KP707106781 * (TaA + TaB); + ThQ = KP707106781 * (TaA - TaB); + } + } + { + E TS, TcR, T6o, T6v, T13, TcS, T6r, T6w, T6s, T6x; + { + E TM, T6t, TR, T6u; + { + E TJ, TL, TI, TK; + TJ = cr[WS(rs, 4)]; + TL = ci[WS(rs, 4)]; + TI = W[6]; + TK = W[7]; + TM = FMA(TI, TJ, TK * TL); + T6t = FNMS(TK, TJ, TI * TL); + } + { + E TO, TQ, TN, TP; + TO = cr[WS(rs, 36)]; + TQ = ci[WS(rs, 36)]; + TN = W[70]; + TP = W[71]; + TR = FMA(TN, TO, TP * TQ); + T6u = FNMS(TP, TO, TN * TQ); + } + TS = TM + TR; + TcR = T6t + T6u; + T6o = TM - TR; + T6v = T6t - T6u; + } + { + E TX, T6p, T12, T6q; + { + E TU, TW, TT, TV; + TU = cr[WS(rs, 20)]; + TW = ci[WS(rs, 20)]; + TT = W[38]; + TV = W[39]; + TX = FMA(TT, TU, TV * TW); + T6p = FNMS(TV, TU, TT * TW); + } + { + E TZ, T11, TY, T10; + TZ = cr[WS(rs, 52)]; + T11 = ci[WS(rs, 52)]; + TY = W[102]; + T10 = W[103]; + T12 = FMA(TY, TZ, T10 * T11); + T6q = FNMS(T10, TZ, TY * T11); + } + T13 = TX + T12; + TcS = T6p + T6q; + T6r = T6p - T6q; + T6w = TX - T12; + } + T14 = TS + T13; + Tfr = TcR + TcS; + T6s = T6o - T6r; + T6x = T6v + T6w; + T6y = FNMS(KP382683432, T6x, KP923879532 * T6s); + T9O = FMA(KP923879532, T6x, KP382683432 * T6s); + { + E TaE, TaF, TcQ, TcT; + TaE = T6v - T6w; + TaF = T6o + T6r; + TaG = FMA(KP382683432, TaE, KP923879532 * TaF); + Tc0 = FNMS(KP923879532, TaE, KP382683432 * TaF); + TcQ = TS - T13; + TcT = TcR - TcS; + TcU = TcQ + TcT; + TeE = TcQ - TcT; + } + } + { + E T1f, TcW, T6B, T6E, T1q, TcX, T6C, T6H, T6D, T6I; + { + E T19, T6z, T1e, T6A; + { + E T16, T18, T15, T17; + T16 = cr[WS(rs, 60)]; + T18 = ci[WS(rs, 60)]; + T15 = W[118]; + T17 = W[119]; + T19 = FMA(T15, T16, T17 * T18); + T6z = FNMS(T17, T16, T15 * T18); + } + { + E T1b, T1d, T1a, T1c; + T1b = cr[WS(rs, 28)]; + T1d = ci[WS(rs, 28)]; + T1a = W[54]; + T1c = W[55]; + T1e = FMA(T1a, T1b, T1c * T1d); + T6A = FNMS(T1c, T1b, T1a * T1d); + } + T1f = T19 + T1e; + TcW = T6z + T6A; + T6B = T6z - T6A; + T6E = T19 - T1e; + } + { + E T1k, T6F, T1p, T6G; + { + E T1h, T1j, T1g, T1i; + T1h = cr[WS(rs, 12)]; + T1j = ci[WS(rs, 12)]; + T1g = W[22]; + T1i = W[23]; + T1k = FMA(T1g, T1h, T1i * T1j); + T6F = FNMS(T1i, T1h, T1g * T1j); + } + { + E T1m, T1o, T1l, T1n; + T1m = cr[WS(rs, 44)]; + T1o = ci[WS(rs, 44)]; + T1l = W[86]; + T1n = W[87]; + T1p = FMA(T1l, T1m, T1n * T1o); + T6G = FNMS(T1n, T1m, T1l * T1o); + } + T1q = T1k + T1p; + TcX = T6F + T6G; + T6C = T1k - T1p; + T6H = T6F - T6G; + } + T1r = T1f + T1q; + Tfq = TcW + TcX; + T6D = T6B + T6C; + T6I = T6E - T6H; + T6J = FMA(KP382683432, T6D, KP923879532 * T6I); + T9P = FNMS(KP923879532, T6D, KP382683432 * T6I); + { + E TaH, TaI, TcV, TcY; + TaH = T6E + T6H; + TaI = T6B - T6C; + TaJ = FNMS(KP382683432, TaI, KP923879532 * TaH); + Tc1 = FMA(KP923879532, TaI, KP382683432 * TaH); + TcV = T1f - T1q; + TcY = TcW - TcX; + TcZ = TcV - TcY; + TeF = TcV + TcY; + } + } + { + E T1y, T73, T1D, T74, T1E, Td7, T1J, T6N, T1O, T6O, T1P, Td8, T21, Td4, T6R; + E T6U, T2c, Td3, T6W, T6Z; + { + E T1v, T1x, T1u, T1w; + T1v = cr[WS(rs, 2)]; + T1x = ci[WS(rs, 2)]; + T1u = W[2]; + T1w = W[3]; + T1y = FMA(T1u, T1v, T1w * T1x); + T73 = FNMS(T1w, T1v, T1u * T1x); + } + { + E T1A, T1C, T1z, T1B; + T1A = cr[WS(rs, 34)]; + T1C = ci[WS(rs, 34)]; + T1z = W[66]; + T1B = W[67]; + T1D = FMA(T1z, T1A, T1B * T1C); + T74 = FNMS(T1B, T1A, T1z * T1C); + } + T1E = T1y + T1D; + Td7 = T73 + T74; + { + E T1G, T1I, T1F, T1H; + T1G = cr[WS(rs, 18)]; + T1I = ci[WS(rs, 18)]; + T1F = W[34]; + T1H = W[35]; + T1J = FMA(T1F, T1G, T1H * T1I); + T6N = FNMS(T1H, T1G, T1F * T1I); + } + { + E T1L, T1N, T1K, T1M; + T1L = cr[WS(rs, 50)]; + T1N = ci[WS(rs, 50)]; + T1K = W[98]; + T1M = W[99]; + T1O = FMA(T1K, T1L, T1M * T1N); + T6O = FNMS(T1M, T1L, T1K * T1N); + } + T1P = T1J + T1O; + Td8 = T6N + T6O; + { + E T1V, T6S, T20, T6T; + { + E T1S, T1U, T1R, T1T; + T1S = cr[WS(rs, 10)]; + T1U = ci[WS(rs, 10)]; + T1R = W[18]; + T1T = W[19]; + T1V = FMA(T1R, T1S, T1T * T1U); + T6S = FNMS(T1T, T1S, T1R * T1U); + } + { + E T1X, T1Z, T1W, T1Y; + T1X = cr[WS(rs, 42)]; + T1Z = ci[WS(rs, 42)]; + T1W = W[82]; + T1Y = W[83]; + T20 = FMA(T1W, T1X, T1Y * T1Z); + T6T = FNMS(T1Y, T1X, T1W * T1Z); + } + T21 = T1V + T20; + Td4 = T6S + T6T; + T6R = T1V - T20; + T6U = T6S - T6T; + } + { + E T26, T6X, T2b, T6Y; + { + E T23, T25, T22, T24; + T23 = cr[WS(rs, 58)]; + T25 = ci[WS(rs, 58)]; + T22 = W[114]; + T24 = W[115]; + T26 = FMA(T22, T23, T24 * T25); + T6X = FNMS(T24, T23, T22 * T25); + } + { + E T28, T2a, T27, T29; + T28 = cr[WS(rs, 26)]; + T2a = ci[WS(rs, 26)]; + T27 = W[50]; + T29 = W[51]; + T2b = FMA(T27, T28, T29 * T2a); + T6Y = FNMS(T29, T28, T27 * T2a); + } + T2c = T26 + T2b; + Td3 = T6X + T6Y; + T6W = T26 - T2b; + T6Z = T6X - T6Y; + } + T1Q = T1E + T1P; + T2d = T21 + T2c; + Tfu = T1Q - T2d; + Tfv = Td7 + Td8; + Tfw = Td4 + Td3; + Tfx = Tfv - Tfw; + { + E T6M, T6P, Td9, Tda; + T6M = T1y - T1D; + T6P = T6N - T6O; + T6Q = T6M - T6P; + TaM = T6M + T6P; + Td9 = Td7 - Td8; + Tda = T21 - T2c; + Tdb = Td9 - Tda; + TeI = Td9 + Tda; + } + { + E T6V, T70, T78, T79; + T6V = T6R - T6U; + T70 = T6W + T6Z; + T71 = KP707106781 * (T6V + T70); + TaQ = KP707106781 * (T70 - T6V); + T78 = T6R + T6U; + T79 = T6Z - T6W; + T7a = KP707106781 * (T78 + T79); + TaN = KP707106781 * (T78 - T79); + } + { + E Td2, Td5, T75, T76; + Td2 = T1E - T1P; + Td5 = Td3 - Td4; + Td6 = Td2 - Td5; + TeJ = Td2 + Td5; + T75 = T73 - T74; + T76 = T1J - T1O; + T77 = T75 + T76; + TaP = T75 - T76; + } + } + { + E T2j, T7u, T2o, T7v, T2p, Tdd, T2u, T7e, T2z, T7f, T2A, Tde, T2M, Tdk, T7i; + E T7l, T2X, Tdj, T7n, T7q; + { + E T2g, T2i, T2f, T2h; + T2g = cr[WS(rs, 62)]; + T2i = ci[WS(rs, 62)]; + T2f = W[122]; + T2h = W[123]; + T2j = FMA(T2f, T2g, T2h * T2i); + T7u = FNMS(T2h, T2g, T2f * T2i); + } + { + E T2l, T2n, T2k, T2m; + T2l = cr[WS(rs, 30)]; + T2n = ci[WS(rs, 30)]; + T2k = W[58]; + T2m = W[59]; + T2o = FMA(T2k, T2l, T2m * T2n); + T7v = FNMS(T2m, T2l, T2k * T2n); + } + T2p = T2j + T2o; + Tdd = T7u + T7v; + { + E T2r, T2t, T2q, T2s; + T2r = cr[WS(rs, 14)]; + T2t = ci[WS(rs, 14)]; + T2q = W[26]; + T2s = W[27]; + T2u = FMA(T2q, T2r, T2s * T2t); + T7e = FNMS(T2s, T2r, T2q * T2t); + } + { + E T2w, T2y, T2v, T2x; + T2w = cr[WS(rs, 46)]; + T2y = ci[WS(rs, 46)]; + T2v = W[90]; + T2x = W[91]; + T2z = FMA(T2v, T2w, T2x * T2y); + T7f = FNMS(T2x, T2w, T2v * T2y); + } + T2A = T2u + T2z; + Tde = T7e + T7f; + { + E T2G, T7j, T2L, T7k; + { + E T2D, T2F, T2C, T2E; + T2D = cr[WS(rs, 6)]; + T2F = ci[WS(rs, 6)]; + T2C = W[10]; + T2E = W[11]; + T2G = FMA(T2C, T2D, T2E * T2F); + T7j = FNMS(T2E, T2D, T2C * T2F); + } + { + E T2I, T2K, T2H, T2J; + T2I = cr[WS(rs, 38)]; + T2K = ci[WS(rs, 38)]; + T2H = W[74]; + T2J = W[75]; + T2L = FMA(T2H, T2I, T2J * T2K); + T7k = FNMS(T2J, T2I, T2H * T2K); + } + T2M = T2G + T2L; + Tdk = T7j + T7k; + T7i = T2G - T2L; + T7l = T7j - T7k; + } + { + E T2R, T7o, T2W, T7p; + { + E T2O, T2Q, T2N, T2P; + T2O = cr[WS(rs, 54)]; + T2Q = ci[WS(rs, 54)]; + T2N = W[106]; + T2P = W[107]; + T2R = FMA(T2N, T2O, T2P * T2Q); + T7o = FNMS(T2P, T2O, T2N * T2Q); + } + { + E T2T, T2V, T2S, T2U; + T2T = cr[WS(rs, 22)]; + T2V = ci[WS(rs, 22)]; + T2S = W[42]; + T2U = W[43]; + T2W = FMA(T2S, T2T, T2U * T2V); + T7p = FNMS(T2U, T2T, T2S * T2V); + } + T2X = T2R + T2W; + Tdj = T7o + T7p; + T7n = T2R - T2W; + T7q = T7o - T7p; + } + T2B = T2p + T2A; + T2Y = T2M + T2X; + Tfz = T2B - T2Y; + TfA = Tdd + Tde; + TfB = Tdk + Tdj; + TfC = TfA - TfB; + { + E T7d, T7g, Tdi, Tdl; + T7d = T2j - T2o; + T7g = T7e - T7f; + T7h = T7d - T7g; + TaW = T7d + T7g; + Tdi = T2p - T2A; + Tdl = Tdj - Tdk; + Tdm = Tdi - Tdl; + TeL = Tdi + Tdl; + } + { + E T7m, T7r, T7z, T7A; + T7m = T7i - T7l; + T7r = T7n + T7q; + T7s = KP707106781 * (T7m + T7r); + TaU = KP707106781 * (T7r - T7m); + T7z = T7i + T7l; + T7A = T7q - T7n; + T7B = KP707106781 * (T7z + T7A); + TaX = KP707106781 * (T7z - T7A); + } + { + E Tdf, Tdg, T7w, T7x; + Tdf = Tdd - Tde; + Tdg = T2M - T2X; + Tdh = Tdf - Tdg; + TeM = Tdf + Tdg; + T7w = T7u - T7v; + T7x = T2u - T2z; + T7y = T7w + T7x; + TaT = T7w - T7x; + } + } + { + E T4D, T9e, T4I, T9f, T4J, TdR, T4O, T8A, T4T, T8B, T4U, TdS, T56, Tea, T8E; + E T8H, T5h, Te9, T8J, T8M; + { + E T4A, T4C, T4z, T4B; + T4A = cr[WS(rs, 63)]; + T4C = ci[WS(rs, 63)]; + T4z = W[124]; + T4B = W[125]; + T4D = FMA(T4z, T4A, T4B * T4C); + T9e = FNMS(T4B, T4A, T4z * T4C); + } + { + E T4F, T4H, T4E, T4G; + T4F = cr[WS(rs, 31)]; + T4H = ci[WS(rs, 31)]; + T4E = W[60]; + T4G = W[61]; + T4I = FMA(T4E, T4F, T4G * T4H); + T9f = FNMS(T4G, T4F, T4E * T4H); + } + T4J = T4D + T4I; + TdR = T9e + T9f; + { + E T4L, T4N, T4K, T4M; + T4L = cr[WS(rs, 15)]; + T4N = ci[WS(rs, 15)]; + T4K = W[28]; + T4M = W[29]; + T4O = FMA(T4K, T4L, T4M * T4N); + T8A = FNMS(T4M, T4L, T4K * T4N); + } + { + E T4Q, T4S, T4P, T4R; + T4Q = cr[WS(rs, 47)]; + T4S = ci[WS(rs, 47)]; + T4P = W[92]; + T4R = W[93]; + T4T = FMA(T4P, T4Q, T4R * T4S); + T8B = FNMS(T4R, T4Q, T4P * T4S); + } + T4U = T4O + T4T; + TdS = T8A + T8B; + { + E T50, T8F, T55, T8G; + { + E T4X, T4Z, T4W, T4Y; + T4X = cr[WS(rs, 7)]; + T4Z = ci[WS(rs, 7)]; + T4W = W[12]; + T4Y = W[13]; + T50 = FMA(T4W, T4X, T4Y * T4Z); + T8F = FNMS(T4Y, T4X, T4W * T4Z); + } + { + E T52, T54, T51, T53; + T52 = cr[WS(rs, 39)]; + T54 = ci[WS(rs, 39)]; + T51 = W[76]; + T53 = W[77]; + T55 = FMA(T51, T52, T53 * T54); + T8G = FNMS(T53, T52, T51 * T54); + } + T56 = T50 + T55; + Tea = T8F + T8G; + T8E = T50 - T55; + T8H = T8F - T8G; + } + { + E T5b, T8K, T5g, T8L; + { + E T58, T5a, T57, T59; + T58 = cr[WS(rs, 55)]; + T5a = ci[WS(rs, 55)]; + T57 = W[108]; + T59 = W[109]; + T5b = FMA(T57, T58, T59 * T5a); + T8K = FNMS(T59, T58, T57 * T5a); + } + { + E T5d, T5f, T5c, T5e; + T5d = cr[WS(rs, 23)]; + T5f = ci[WS(rs, 23)]; + T5c = W[44]; + T5e = W[45]; + T5g = FMA(T5c, T5d, T5e * T5f); + T8L = FNMS(T5e, T5d, T5c * T5f); + } + T5h = T5b + T5g; + Te9 = T8K + T8L; + T8J = T5b - T5g; + T8M = T8K - T8L; + } + { + E T4V, T5i, Te8, Teb; + T4V = T4J + T4U; + T5i = T56 + T5h; + T5j = T4V + T5i; + TfR = T4V - T5i; + Te8 = T4J - T4U; + Teb = Te9 - Tea; + Tec = Te8 - Teb; + TeX = Te8 + Teb; + } + { + E TfW, TfX, T8z, T8C; + TfW = TdR + TdS; + TfX = Tea + Te9; + TfY = TfW - TfX; + Tgy = TfW + TfX; + T8z = T4D - T4I; + T8C = T8A - T8B; + T8D = T8z - T8C; + Tbl = T8z + T8C; + } + { + E T8I, T8N, T9j, T9k; + T8I = T8E - T8H; + T8N = T8J + T8M; + T8O = KP707106781 * (T8I + T8N); + Tbx = KP707106781 * (T8N - T8I); + T9j = T8E + T8H; + T9k = T8M - T8J; + T9l = KP707106781 * (T9j + T9k); + Tbm = KP707106781 * (T9j - T9k); + } + { + E TdT, TdU, T9g, T9h; + TdT = TdR - TdS; + TdU = T56 - T5h; + TdV = TdT - TdU; + Tf0 = TdT + TdU; + T9g = T9e - T9f; + T9h = T4O - T4T; + T9i = T9g + T9h; + Tbw = T9g - T9h; + } + } + { + E T36, T7G, T3b, T7H, T3c, TdH, T3h, T8m, T3m, T8n, T3n, TdI, T3z, Tds, T7L; + E T7O, T3K, Tdr, T7S, T7T; + { + E T33, T35, T32, T34; + T33 = cr[WS(rs, 1)]; + T35 = ci[WS(rs, 1)]; + T32 = W[0]; + T34 = W[1]; + T36 = FMA(T32, T33, T34 * T35); + T7G = FNMS(T34, T33, T32 * T35); + } + { + E T38, T3a, T37, T39; + T38 = cr[WS(rs, 33)]; + T3a = ci[WS(rs, 33)]; + T37 = W[64]; + T39 = W[65]; + T3b = FMA(T37, T38, T39 * T3a); + T7H = FNMS(T39, T38, T37 * T3a); + } + T3c = T36 + T3b; + TdH = T7G + T7H; + { + E T3e, T3g, T3d, T3f; + T3e = cr[WS(rs, 17)]; + T3g = ci[WS(rs, 17)]; + T3d = W[32]; + T3f = W[33]; + T3h = FMA(T3d, T3e, T3f * T3g); + T8m = FNMS(T3f, T3e, T3d * T3g); + } + { + E T3j, T3l, T3i, T3k; + T3j = cr[WS(rs, 49)]; + T3l = ci[WS(rs, 49)]; + T3i = W[96]; + T3k = W[97]; + T3m = FMA(T3i, T3j, T3k * T3l); + T8n = FNMS(T3k, T3j, T3i * T3l); + } + T3n = T3h + T3m; + TdI = T8m + T8n; + { + E T3t, T7M, T3y, T7N; + { + E T3q, T3s, T3p, T3r; + T3q = cr[WS(rs, 9)]; + T3s = ci[WS(rs, 9)]; + T3p = W[16]; + T3r = W[17]; + T3t = FMA(T3p, T3q, T3r * T3s); + T7M = FNMS(T3r, T3q, T3p * T3s); + } + { + E T3v, T3x, T3u, T3w; + T3v = cr[WS(rs, 41)]; + T3x = ci[WS(rs, 41)]; + T3u = W[80]; + T3w = W[81]; + T3y = FMA(T3u, T3v, T3w * T3x); + T7N = FNMS(T3w, T3v, T3u * T3x); + } + T3z = T3t + T3y; + Tds = T7M + T7N; + T7L = T3t - T3y; + T7O = T7M - T7N; + } + { + E T3E, T7Q, T3J, T7R; + { + E T3B, T3D, T3A, T3C; + T3B = cr[WS(rs, 57)]; + T3D = ci[WS(rs, 57)]; + T3A = W[112]; + T3C = W[113]; + T3E = FMA(T3A, T3B, T3C * T3D); + T7Q = FNMS(T3C, T3B, T3A * T3D); + } + { + E T3G, T3I, T3F, T3H; + T3G = cr[WS(rs, 25)]; + T3I = ci[WS(rs, 25)]; + T3F = W[48]; + T3H = W[49]; + T3J = FMA(T3F, T3G, T3H * T3I); + T7R = FNMS(T3H, T3G, T3F * T3I); + } + T3K = T3E + T3J; + Tdr = T7Q + T7R; + T7S = T7Q - T7R; + T7T = T3E - T3J; + } + { + E T3o, T3L, TdJ, TdK; + T3o = T3c + T3n; + T3L = T3z + T3K; + T3M = T3o + T3L; + TfL = T3o - T3L; + TdJ = TdH - TdI; + TdK = T3z - T3K; + TdL = TdJ - TdK; + TeT = TdJ + TdK; + } + { + E TfG, TfH, T7I, T7J; + TfG = TdH + TdI; + TfH = Tds + Tdr; + TfI = TfG - TfH; + Tgt = TfG + TfH; + T7I = T7G - T7H; + T7J = T3h - T3m; + T7K = T7I + T7J; + Tbd = T7I - T7J; + } + { + E T7P, T7U, T8q, T8r; + T7P = T7L + T7O; + T7U = T7S - T7T; + T7V = KP707106781 * (T7P + T7U); + Tb3 = KP707106781 * (T7P - T7U); + T8q = T7L - T7O; + T8r = T7T + T7S; + T8s = KP707106781 * (T8q + T8r); + Tbe = KP707106781 * (T8r - T8q); + } + { + E Tdq, Tdt, T8l, T8o; + Tdq = T3c - T3n; + Tdt = Tdr - Tds; + Tdu = Tdq - Tdt; + TeQ = Tdq + Tdt; + T8l = T36 - T3b; + T8o = T8m - T8n; + T8p = T8l - T8o; + Tb2 = T8l + T8o; + } + } + { + E T3X, Tdw, T7Z, T82, T4v, TdB, T8b, T8g, T48, Tdx, T80, T85, T4k, TdA, T8a; + E T8d; + { + E T3R, T7X, T3W, T7Y; + { + E T3O, T3Q, T3N, T3P; + T3O = cr[WS(rs, 5)]; + T3Q = ci[WS(rs, 5)]; + T3N = W[8]; + T3P = W[9]; + T3R = FMA(T3N, T3O, T3P * T3Q); + T7X = FNMS(T3P, T3O, T3N * T3Q); + } + { + E T3T, T3V, T3S, T3U; + T3T = cr[WS(rs, 37)]; + T3V = ci[WS(rs, 37)]; + T3S = W[72]; + T3U = W[73]; + T3W = FMA(T3S, T3T, T3U * T3V); + T7Y = FNMS(T3U, T3T, T3S * T3V); + } + T3X = T3R + T3W; + Tdw = T7X + T7Y; + T7Z = T7X - T7Y; + T82 = T3R - T3W; + } + { + E T4p, T8e, T4u, T8f; + { + E T4m, T4o, T4l, T4n; + T4m = cr[WS(rs, 13)]; + T4o = ci[WS(rs, 13)]; + T4l = W[24]; + T4n = W[25]; + T4p = FMA(T4l, T4m, T4n * T4o); + T8e = FNMS(T4n, T4m, T4l * T4o); + } + { + E T4r, T4t, T4q, T4s; + T4r = cr[WS(rs, 45)]; + T4t = ci[WS(rs, 45)]; + T4q = W[88]; + T4s = W[89]; + T4u = FMA(T4q, T4r, T4s * T4t); + T8f = FNMS(T4s, T4r, T4q * T4t); + } + T4v = T4p + T4u; + TdB = T8e + T8f; + T8b = T4p - T4u; + T8g = T8e - T8f; + } + { + E T42, T83, T47, T84; + { + E T3Z, T41, T3Y, T40; + T3Z = cr[WS(rs, 21)]; + T41 = ci[WS(rs, 21)]; + T3Y = W[40]; + T40 = W[41]; + T42 = FMA(T3Y, T3Z, T40 * T41); + T83 = FNMS(T40, T3Z, T3Y * T41); + } + { + E T44, T46, T43, T45; + T44 = cr[WS(rs, 53)]; + T46 = ci[WS(rs, 53)]; + T43 = W[104]; + T45 = W[105]; + T47 = FMA(T43, T44, T45 * T46); + T84 = FNMS(T45, T44, T43 * T46); + } + T48 = T42 + T47; + Tdx = T83 + T84; + T80 = T42 - T47; + T85 = T83 - T84; + } + { + E T4e, T88, T4j, T89; + { + E T4b, T4d, T4a, T4c; + T4b = cr[WS(rs, 61)]; + T4d = ci[WS(rs, 61)]; + T4a = W[120]; + T4c = W[121]; + T4e = FMA(T4a, T4b, T4c * T4d); + T88 = FNMS(T4c, T4b, T4a * T4d); + } + { + E T4g, T4i, T4f, T4h; + T4g = cr[WS(rs, 29)]; + T4i = ci[WS(rs, 29)]; + T4f = W[56]; + T4h = W[57]; + T4j = FMA(T4f, T4g, T4h * T4i); + T89 = FNMS(T4h, T4g, T4f * T4i); + } + T4k = T4e + T4j; + TdA = T88 + T89; + T8a = T88 - T89; + T8d = T4e - T4j; + } + { + E T49, T4w, TdC, TdD; + T49 = T3X + T48; + T4w = T4k + T4v; + T4x = T49 + T4w; + TfJ = T49 - T4w; + TdC = TdA - TdB; + TdD = T4k - T4v; + TdE = TdC - TdD; + TdM = TdD + TdC; + } + { + E TfM, TfN, T81, T86; + TfM = TdA + TdB; + TfN = Tdw + Tdx; + TfO = TfM - TfN; + Tgu = TfN + TfM; + T81 = T7Z + T80; + T86 = T82 - T85; + T87 = FMA(KP923879532, T81, KP382683432 * T86); + T8u = FNMS(KP382683432, T81, KP923879532 * T86); + } + { + E T8c, T8h, Tb8, Tb9; + T8c = T8a + T8b; + T8h = T8d - T8g; + T8i = FNMS(KP382683432, T8h, KP923879532 * T8c); + T8v = FMA(KP382683432, T8c, KP923879532 * T8h); + Tb8 = T8d + T8g; + Tb9 = T8a - T8b; + Tba = FNMS(KP382683432, Tb9, KP923879532 * Tb8); + Tbh = FMA(KP923879532, Tb9, KP382683432 * Tb8); + } + { + E Tdv, Tdy, Tb5, Tb6; + Tdv = T3X - T48; + Tdy = Tdw - Tdx; + Tdz = Tdv + Tdy; + TdN = Tdv - Tdy; + Tb5 = T7Z - T80; + Tb6 = T82 + T85; + Tb7 = FMA(KP382683432, Tb5, KP923879532 * Tb6); + Tbg = FNMS(KP382683432, Tb6, KP923879532 * Tb5); + } + } + { + E T5u, Te2, T8Q, T8X, T62, TdY, T94, T99, T5F, Te3, T8T, T8Y, T5R, TdX, T93; + E T96; + { + E T5o, T8V, T5t, T8W; + { + E T5l, T5n, T5k, T5m; + T5l = cr[WS(rs, 3)]; + T5n = ci[WS(rs, 3)]; + T5k = W[4]; + T5m = W[5]; + T5o = FMA(T5k, T5l, T5m * T5n); + T8V = FNMS(T5m, T5l, T5k * T5n); + } + { + E T5q, T5s, T5p, T5r; + T5q = cr[WS(rs, 35)]; + T5s = ci[WS(rs, 35)]; + T5p = W[68]; + T5r = W[69]; + T5t = FMA(T5p, T5q, T5r * T5s); + T8W = FNMS(T5r, T5q, T5p * T5s); + } + T5u = T5o + T5t; + Te2 = T8V + T8W; + T8Q = T5o - T5t; + T8X = T8V - T8W; + } + { + E T5W, T97, T61, T98; + { + E T5T, T5V, T5S, T5U; + T5T = cr[WS(rs, 11)]; + T5V = ci[WS(rs, 11)]; + T5S = W[20]; + T5U = W[21]; + T5W = FMA(T5S, T5T, T5U * T5V); + T97 = FNMS(T5U, T5T, T5S * T5V); + } + { + E T5Y, T60, T5X, T5Z; + T5Y = cr[WS(rs, 43)]; + T60 = ci[WS(rs, 43)]; + T5X = W[84]; + T5Z = W[85]; + T61 = FMA(T5X, T5Y, T5Z * T60); + T98 = FNMS(T5Z, T5Y, T5X * T60); + } + T62 = T5W + T61; + TdY = T97 + T98; + T94 = T5W - T61; + T99 = T97 - T98; + } + { + E T5z, T8R, T5E, T8S; + { + E T5w, T5y, T5v, T5x; + T5w = cr[WS(rs, 19)]; + T5y = ci[WS(rs, 19)]; + T5v = W[36]; + T5x = W[37]; + T5z = FMA(T5v, T5w, T5x * T5y); + T8R = FNMS(T5x, T5w, T5v * T5y); + } + { + E T5B, T5D, T5A, T5C; + T5B = cr[WS(rs, 51)]; + T5D = ci[WS(rs, 51)]; + T5A = W[100]; + T5C = W[101]; + T5E = FMA(T5A, T5B, T5C * T5D); + T8S = FNMS(T5C, T5B, T5A * T5D); + } + T5F = T5z + T5E; + Te3 = T8R + T8S; + T8T = T8R - T8S; + T8Y = T5z - T5E; + } + { + E T5L, T91, T5Q, T92; + { + E T5I, T5K, T5H, T5J; + T5I = cr[WS(rs, 59)]; + T5K = ci[WS(rs, 59)]; + T5H = W[116]; + T5J = W[117]; + T5L = FMA(T5H, T5I, T5J * T5K); + T91 = FNMS(T5J, T5I, T5H * T5K); + } + { + E T5N, T5P, T5M, T5O; + T5N = cr[WS(rs, 27)]; + T5P = ci[WS(rs, 27)]; + T5M = W[52]; + T5O = W[53]; + T5Q = FMA(T5M, T5N, T5O * T5P); + T92 = FNMS(T5O, T5N, T5M * T5P); + } + T5R = T5L + T5Q; + TdX = T91 + T92; + T93 = T91 - T92; + T96 = T5L - T5Q; + } + { + E T5G, T63, Te1, Te4; + T5G = T5u + T5F; + T63 = T5R + T62; + T64 = T5G + T63; + TfZ = T5G - T63; + Te1 = T5u - T5F; + Te4 = Te2 - Te3; + Te5 = Te1 - Te4; + Ted = Te1 + Te4; + } + { + E TfS, TfT, T8U, T8Z; + TfS = TdX + TdY; + TfT = Te2 + Te3; + TfU = TfS - TfT; + Tgz = TfT + TfS; + T8U = T8Q - T8T; + T8Z = T8X + T8Y; + T90 = FNMS(KP382683432, T8Z, KP923879532 * T8U); + T9n = FMA(KP923879532, T8Z, KP382683432 * T8U); + } + { + E T95, T9a, Tbr, Tbs; + T95 = T93 + T94; + T9a = T96 - T99; + T9b = FMA(KP382683432, T95, KP923879532 * T9a); + T9o = FNMS(KP382683432, T9a, KP923879532 * T95); + Tbr = T96 + T99; + Tbs = T93 - T94; + Tbt = FNMS(KP382683432, Tbs, KP923879532 * Tbr); + TbA = FMA(KP923879532, Tbs, KP382683432 * Tbr); + } + { + E TdW, TdZ, Tbo, Tbp; + TdW = T5R - T62; + TdZ = TdX - TdY; + Te0 = TdW + TdZ; + Tee = TdZ - TdW; + Tbo = T8X - T8Y; + Tbp = T8Q + T8T; + Tbq = FMA(KP382683432, Tbo, KP923879532 * Tbp); + Tbz = FNMS(KP382683432, Tbp, KP923879532 * Tbo); + } + } + { + E T1t, Tgn, TgK, TgL, TgV, Th1, T30, Th0, T66, TgX, Tgw, TgE, TgB, TgF, Tgq; + E TgM; + { + E TH, T1s, TgI, TgJ; + TH = Tj + TG; + T1s = T14 + T1r; + T1t = TH + T1s; + Tgn = TH - T1s; + TgI = Tgy + Tgz; + TgJ = Tgt + Tgu; + TgK = TgI - TgJ; + TgL = TgJ + TgI; + } + { + E TgN, TgU, T2e, T2Z; + TgN = Tfr + Tfq; + TgU = TgO + TgT; + TgV = TgN + TgU; + Th1 = TgU - TgN; + T2e = T1Q + T2d; + T2Z = T2B + T2Y; + T30 = T2e + T2Z; + Th0 = T2e - T2Z; + } + { + E T4y, T65, Tgs, Tgv; + T4y = T3M + T4x; + T65 = T5j + T64; + T66 = T4y + T65; + TgX = T65 - T4y; + Tgs = T3M - T4x; + Tgv = Tgt - Tgu; + Tgw = Tgs + Tgv; + TgE = Tgs - Tgv; + } + { + E Tgx, TgA, Tgo, Tgp; + Tgx = T5j - T64; + TgA = Tgy - Tgz; + TgB = Tgx - TgA; + TgF = Tgx + TgA; + Tgo = TfA + TfB; + Tgp = Tfv + Tfw; + Tgq = Tgo - Tgp; + TgM = Tgp + Tgo; + } + { + E T31, TgW, TgY, TgH; + T31 = T1t + T30; + ci[WS(rs, 31)] = T31 - T66; + cr[0] = T31 + T66; + TgW = TgM + TgV; + cr[WS(rs, 32)] = TgL - TgW; + ci[WS(rs, 63)] = TgL + TgW; + TgY = TgV - TgM; + cr[WS(rs, 48)] = TgX - TgY; + ci[WS(rs, 47)] = TgX + TgY; + TgH = T1t - T30; + cr[WS(rs, 16)] = TgH - TgK; + ci[WS(rs, 15)] = TgH + TgK; + } + { + E Tgr, TgC, TgZ, Th2; + Tgr = Tgn - Tgq; + TgC = KP707106781 * (Tgw + TgB); + ci[WS(rs, 23)] = Tgr - TgC; + cr[WS(rs, 8)] = Tgr + TgC; + TgZ = KP707106781 * (TgB - Tgw); + Th2 = Th0 + Th1; + cr[WS(rs, 56)] = TgZ - Th2; + ci[WS(rs, 39)] = TgZ + Th2; + } + { + E Th3, Th4, TgD, TgG; + Th3 = KP707106781 * (TgF - TgE); + Th4 = Th1 - Th0; + cr[WS(rs, 40)] = Th3 - Th4; + ci[WS(rs, 55)] = Th3 + Th4; + TgD = Tgn + Tgq; + TgG = KP707106781 * (TgE + TgF); + cr[WS(rs, 24)] = TgD - TgG; + ci[WS(rs, 7)] = TgD + TgG; + } + } + { + E T6L, T9x, ThV, Ti1, T7E, Ti0, T9A, ThO, T8y, T9K, T9u, T9E, T9r, T9L, T9v; + E T9H; + { + E T6n, T6K, ThP, ThU; + T6n = T6b + T6m; + T6K = T6y + T6J; + T6L = T6n - T6K; + T9x = T6n + T6K; + ThP = T9O - T9P; + ThU = ThQ + ThT; + ThV = ThP + ThU; + Ti1 = ThU - ThP; + } + { + E T7c, T9y, T7D, T9z; + { + E T72, T7b, T7t, T7C; + T72 = T6Q + T71; + T7b = T77 + T7a; + T7c = FMA(KP195090322, T72, KP980785280 * T7b); + T9y = FNMS(KP195090322, T7b, KP980785280 * T72); + T7t = T7h + T7s; + T7C = T7y + T7B; + T7D = FNMS(KP980785280, T7C, KP195090322 * T7t); + T9z = FMA(KP980785280, T7t, KP195090322 * T7C); + } + T7E = T7c + T7D; + Ti0 = T9z - T9y; + T9A = T9y + T9z; + ThO = T7c - T7D; + } + { + E T8k, T9D, T8x, T9C; + { + E T7W, T8j, T8t, T8w; + T7W = T7K + T7V; + T8j = T87 + T8i; + T8k = T7W - T8j; + T9D = T7W + T8j; + T8t = T8p + T8s; + T8w = T8u + T8v; + T8x = T8t - T8w; + T9C = T8t + T8w; + } + T8y = FMA(KP634393284, T8k, KP773010453 * T8x); + T9K = FMA(KP995184726, T9D, KP098017140 * T9C); + T9u = FNMS(KP773010453, T8k, KP634393284 * T8x); + T9E = FNMS(KP098017140, T9D, KP995184726 * T9C); + } + { + E T9d, T9G, T9q, T9F; + { + E T8P, T9c, T9m, T9p; + T8P = T8D + T8O; + T9c = T90 + T9b; + T9d = T8P - T9c; + T9G = T8P + T9c; + T9m = T9i + T9l; + T9p = T9n + T9o; + T9q = T9m - T9p; + T9F = T9m + T9p; + } + T9r = FNMS(KP634393284, T9q, KP773010453 * T9d); + T9L = FNMS(KP995184726, T9F, KP098017140 * T9G); + T9v = FMA(KP773010453, T9q, KP634393284 * T9d); + T9H = FMA(KP098017140, T9F, KP995184726 * T9G); + } + { + E T7F, T9s, ThZ, Ti2; + T7F = T6L + T7E; + T9s = T8y + T9r; + ci[WS(rs, 24)] = T7F - T9s; + cr[WS(rs, 7)] = T7F + T9s; + ThZ = T9v - T9u; + Ti2 = Ti0 + Ti1; + cr[WS(rs, 39)] = ThZ - Ti2; + ci[WS(rs, 56)] = ThZ + Ti2; + } + { + E Ti3, Ti4, T9t, T9w; + Ti3 = T9r - T8y; + Ti4 = Ti1 - Ti0; + cr[WS(rs, 55)] = Ti3 - Ti4; + ci[WS(rs, 40)] = Ti3 + Ti4; + T9t = T6L - T7E; + T9w = T9u + T9v; + cr[WS(rs, 23)] = T9t - T9w; + ci[WS(rs, 8)] = T9t + T9w; + } + { + E T9B, T9I, ThN, ThW; + T9B = T9x + T9A; + T9I = T9E + T9H; + cr[WS(rs, 31)] = T9B - T9I; + ci[0] = T9B + T9I; + ThN = T9L - T9K; + ThW = ThO + ThV; + cr[WS(rs, 63)] = ThN - ThW; + ci[WS(rs, 32)] = ThN + ThW; + } + { + E ThX, ThY, T9J, T9M; + ThX = T9H - T9E; + ThY = ThV - ThO; + cr[WS(rs, 47)] = ThX - ThY; + ci[WS(rs, 48)] = ThX + ThY; + T9J = T9x - T9A; + T9M = T9K + T9L; + ci[WS(rs, 16)] = T9J - T9M; + cr[WS(rs, 15)] = T9J + T9M; + } + } + { + E Tft, Tg7, Tgh, Tgl, Th9, Thf, TfE, Th6, TfQ, Tg4, Tga, The, Tge, Tgk, Tg1; + E Tg5; + { + E Tfp, Tfs, Tgf, Tgg; + Tfp = Tj - TG; + Tfs = Tfq - Tfr; + Tft = Tfp - Tfs; + Tg7 = Tfp + Tfs; + Tgf = TfY + TfZ; + Tgg = TfR + TfU; + Tgh = FMA(KP382683432, Tgf, KP923879532 * Tgg); + Tgl = FNMS(KP923879532, Tgf, KP382683432 * Tgg); + } + { + E Th7, Th8, Tfy, TfD; + Th7 = T14 - T1r; + Th8 = TgT - TgO; + Th9 = Th7 + Th8; + Thf = Th8 - Th7; + Tfy = Tfu + Tfx; + TfD = Tfz - TfC; + TfE = KP707106781 * (Tfy + TfD); + Th6 = KP707106781 * (Tfy - TfD); + } + { + E TfK, TfP, Tg8, Tg9; + TfK = TfI - TfJ; + TfP = TfL - TfO; + TfQ = FMA(KP382683432, TfK, KP923879532 * TfP); + Tg4 = FNMS(KP923879532, TfK, KP382683432 * TfP); + Tg8 = Tfu - Tfx; + Tg9 = Tfz + TfC; + Tga = KP707106781 * (Tg8 + Tg9); + The = KP707106781 * (Tg9 - Tg8); + } + { + E Tgc, Tgd, TfV, Tg0; + Tgc = TfL + TfO; + Tgd = TfI + TfJ; + Tge = FNMS(KP382683432, Tgd, KP923879532 * Tgc); + Tgk = FMA(KP923879532, Tgd, KP382683432 * Tgc); + TfV = TfR - TfU; + Tg0 = TfY - TfZ; + Tg1 = FNMS(KP382683432, Tg0, KP923879532 * TfV); + Tg5 = FMA(KP923879532, Tg0, KP382683432 * TfV); + } + { + E TfF, Tg2, Thd, Thg; + TfF = Tft + TfE; + Tg2 = TfQ + Tg1; + ci[WS(rs, 27)] = TfF - Tg2; + cr[WS(rs, 4)] = TfF + Tg2; + Thd = Tg5 - Tg4; + Thg = The + Thf; + cr[WS(rs, 36)] = Thd - Thg; + ci[WS(rs, 59)] = Thd + Thg; + } + { + E Thh, Thi, Tg3, Tg6; + Thh = Tg1 - TfQ; + Thi = Thf - The; + cr[WS(rs, 52)] = Thh - Thi; + ci[WS(rs, 43)] = Thh + Thi; + Tg3 = Tft - TfE; + Tg6 = Tg4 + Tg5; + cr[WS(rs, 20)] = Tg3 - Tg6; + ci[WS(rs, 11)] = Tg3 + Tg6; + } + { + E Tgb, Tgi, Th5, Tha; + Tgb = Tg7 + Tga; + Tgi = Tge + Tgh; + cr[WS(rs, 28)] = Tgb - Tgi; + ci[WS(rs, 3)] = Tgb + Tgi; + Th5 = Tgl - Tgk; + Tha = Th6 + Th9; + cr[WS(rs, 60)] = Th5 - Tha; + ci[WS(rs, 35)] = Th5 + Tha; + } + { + E Thb, Thc, Tgj, Tgm; + Thb = Tgh - Tge; + Thc = Th9 - Th6; + cr[WS(rs, 44)] = Thb - Thc; + ci[WS(rs, 51)] = Thb + Thc; + Tgj = Tg7 - Tga; + Tgm = Tgk + Tgl; + ci[WS(rs, 19)] = Tgj - Tgm; + cr[WS(rs, 12)] = Tgj + Tgm; + } + } + { + E TeH, Tf9, TeO, Thk, Thp, Thv, Tfc, Thu, Tf3, Tfn, Tf7, Tfj, TeW, Tfm, Tf6; + E Tfg; + { + E TeD, TeG, Tfa, Tfb; + TeD = TcL + TcO; + TeG = KP707106781 * (TeE + TeF); + TeH = TeD - TeG; + Tf9 = TeD + TeG; + { + E TeK, TeN, Thl, Tho; + TeK = FMA(KP923879532, TeI, KP382683432 * TeJ); + TeN = FNMS(KP923879532, TeM, KP382683432 * TeL); + TeO = TeK + TeN; + Thk = TeK - TeN; + Thl = KP707106781 * (TcU - TcZ); + Tho = Thm + Thn; + Thp = Thl + Tho; + Thv = Tho - Thl; + } + Tfa = FNMS(KP382683432, TeI, KP923879532 * TeJ); + Tfb = FMA(KP382683432, TeM, KP923879532 * TeL); + Tfc = Tfa + Tfb; + Thu = Tfb - Tfa; + { + E TeZ, Tfh, Tf2, Tfi, TeY, Tf1; + TeY = KP707106781 * (Te5 + Te0); + TeZ = TeX - TeY; + Tfh = TeX + TeY; + Tf1 = KP707106781 * (Ted + Tee); + Tf2 = Tf0 - Tf1; + Tfi = Tf0 + Tf1; + Tf3 = FNMS(KP555570233, Tf2, KP831469612 * TeZ); + Tfn = FMA(KP980785280, Tfh, KP195090322 * Tfi); + Tf7 = FMA(KP555570233, TeZ, KP831469612 * Tf2); + Tfj = FNMS(KP980785280, Tfi, KP195090322 * Tfh); + } + { + E TeS, Tfe, TeV, Tff, TeR, TeU; + TeR = KP707106781 * (TdN + TdM); + TeS = TeQ - TeR; + Tfe = TeQ + TeR; + TeU = KP707106781 * (Tdz + TdE); + TeV = TeT - TeU; + Tff = TeT + TeU; + TeW = FMA(KP831469612, TeS, KP555570233 * TeV); + Tfm = FNMS(KP195090322, Tff, KP980785280 * Tfe); + Tf6 = FNMS(KP831469612, TeV, KP555570233 * TeS); + Tfg = FMA(KP195090322, Tfe, KP980785280 * Tff); + } + } + { + E TeP, Tf4, Tht, Thw; + TeP = TeH + TeO; + Tf4 = TeW + Tf3; + ci[WS(rs, 25)] = TeP - Tf4; + cr[WS(rs, 6)] = TeP + Tf4; + Tht = Tf7 - Tf6; + Thw = Thu + Thv; + cr[WS(rs, 38)] = Tht - Thw; + ci[WS(rs, 57)] = Tht + Thw; + } + { + E Thx, Thy, Tf5, Tf8; + Thx = Tf3 - TeW; + Thy = Thv - Thu; + cr[WS(rs, 54)] = Thx - Thy; + ci[WS(rs, 41)] = Thx + Thy; + Tf5 = TeH - TeO; + Tf8 = Tf6 + Tf7; + cr[WS(rs, 22)] = Tf5 - Tf8; + ci[WS(rs, 9)] = Tf5 + Tf8; + } + { + E Tfd, Tfk, Thj, Thq; + Tfd = Tf9 - Tfc; + Tfk = Tfg + Tfj; + ci[WS(rs, 17)] = Tfd - Tfk; + cr[WS(rs, 14)] = Tfd + Tfk; + Thj = Tfj - Tfg; + Thq = Thk + Thp; + cr[WS(rs, 62)] = Thj - Thq; + ci[WS(rs, 33)] = Thj + Thq; + } + { + E Thr, Ths, Tfl, Tfo; + Thr = Tfn - Tfm; + Ths = Thp - Thk; + cr[WS(rs, 46)] = Thr - Ths; + ci[WS(rs, 49)] = Thr + Ths; + Tfl = Tf9 + Tfc; + Tfo = Tfm + Tfn; + cr[WS(rs, 30)] = Tfl - Tfo; + ci[WS(rs, 1)] = Tfl + Tfo; + } + } + { + E Td1, Ten, Tdo, ThA, ThD, ThJ, Teq, ThI, Teh, TeB, Tel, Tex, TdQ, TeA, Tek; + E Teu; + { + E TcP, Td0, Teo, Tep; + TcP = TcL - TcO; + Td0 = KP707106781 * (TcU + TcZ); + Td1 = TcP - Td0; + Ten = TcP + Td0; + { + E Tdc, Tdn, ThB, ThC; + Tdc = FNMS(KP923879532, Tdb, KP382683432 * Td6); + Tdn = FMA(KP923879532, Tdh, KP382683432 * Tdm); + Tdo = Tdc + Tdn; + ThA = Tdn - Tdc; + ThB = KP707106781 * (TeF - TeE); + ThC = Thn - Thm; + ThD = ThB + ThC; + ThJ = ThC - ThB; + } + Teo = FMA(KP382683432, Tdb, KP923879532 * Td6); + Tep = FNMS(KP382683432, Tdh, KP923879532 * Tdm); + Teq = Teo + Tep; + ThI = Teo - Tep; + { + E Te7, Tew, Teg, Tev, Te6, Tef; + Te6 = KP707106781 * (Te0 - Te5); + Te7 = TdV - Te6; + Tew = TdV + Te6; + Tef = KP707106781 * (Ted - Tee); + Teg = Tec - Tef; + Tev = Tec + Tef; + Teh = FMA(KP555570233, Te7, KP831469612 * Teg); + TeB = FMA(KP980785280, Tew, KP195090322 * Tev); + Tel = FNMS(KP831469612, Te7, KP555570233 * Teg); + Tex = FNMS(KP195090322, Tew, KP980785280 * Tev); + } + { + E TdG, Tet, TdP, Tes, TdF, TdO; + TdF = KP707106781 * (Tdz - TdE); + TdG = Tdu - TdF; + Tet = Tdu + TdF; + TdO = KP707106781 * (TdM - TdN); + TdP = TdL - TdO; + Tes = TdL + TdO; + TdQ = FNMS(KP555570233, TdP, KP831469612 * TdG); + TeA = FNMS(KP980785280, Tes, KP195090322 * Tet); + Tek = FMA(KP831469612, TdP, KP555570233 * TdG); + Teu = FMA(KP195090322, Tes, KP980785280 * Tet); + } + } + { + E Tdp, Tei, ThH, ThK; + Tdp = Td1 + Tdo; + Tei = TdQ + Teh; + cr[WS(rs, 26)] = Tdp - Tei; + ci[WS(rs, 5)] = Tdp + Tei; + ThH = Tel - Tek; + ThK = ThI + ThJ; + cr[WS(rs, 58)] = ThH - ThK; + ci[WS(rs, 37)] = ThH + ThK; + } + { + E ThL, ThM, Tej, Tem; + ThL = Teh - TdQ; + ThM = ThJ - ThI; + cr[WS(rs, 42)] = ThL - ThM; + ci[WS(rs, 53)] = ThL + ThM; + Tej = Td1 - Tdo; + Tem = Tek + Tel; + ci[WS(rs, 21)] = Tej - Tem; + cr[WS(rs, 10)] = Tej + Tem; + } + { + E Ter, Tey, Thz, ThE; + Ter = Ten + Teq; + Tey = Teu + Tex; + ci[WS(rs, 29)] = Ter - Tey; + cr[WS(rs, 2)] = Ter + Tey; + Thz = TeB - TeA; + ThE = ThA + ThD; + cr[WS(rs, 34)] = Thz - ThE; + ci[WS(rs, 61)] = Thz + ThE; + } + { + E ThF, ThG, Tez, TeC; + ThF = Tex - Teu; + ThG = ThD - ThA; + cr[WS(rs, 50)] = ThF - ThG; + ci[WS(rs, 45)] = ThF + ThG; + Tez = Ten - Teq; + TeC = TeA + TeB; + cr[WS(rs, 18)] = Tez - TeC; + ci[WS(rs, 13)] = Tez + TeC; + } + } + { + E Tc3, Tcv, TiD, TiJ, Tca, TiI, Tcy, TiA, Tci, TcI, Tcs, TcC, Tcp, TcJ, Tct; + E TcF; + { + E TbZ, Tc2, TiB, TiC; + TbZ = Taz - TaC; + Tc2 = Tc0 + Tc1; + Tc3 = TbZ - Tc2; + Tcv = TbZ + Tc2; + TiB = TaG - TaJ; + TiC = Tin - Tim; + TiD = TiB + TiC; + TiJ = TiC - TiB; + } + { + E Tc6, Tcw, Tc9, Tcx; + { + E Tc4, Tc5, Tc7, Tc8; + Tc4 = TaP - TaQ; + Tc5 = TaM - TaN; + Tc6 = FMA(KP831469612, Tc4, KP555570233 * Tc5); + Tcw = FNMS(KP555570233, Tc4, KP831469612 * Tc5); + Tc7 = TaW - TaX; + Tc8 = TaT - TaU; + Tc9 = FNMS(KP831469612, Tc8, KP555570233 * Tc7); + Tcx = FMA(KP555570233, Tc8, KP831469612 * Tc7); + } + Tca = Tc6 + Tc9; + TiI = Tcx - Tcw; + Tcy = Tcw + Tcx; + TiA = Tc6 - Tc9; + } + { + E Tce, TcB, Tch, TcA; + { + E Tcc, Tcd, Tcf, Tcg; + Tcc = Tbd - Tbe; + Tcd = Tb7 - Tba; + Tce = Tcc - Tcd; + TcB = Tcc + Tcd; + Tcf = Tb2 - Tb3; + Tcg = Tbh - Tbg; + Tch = Tcf - Tcg; + TcA = Tcf + Tcg; + } + Tci = FMA(KP471396736, Tce, KP881921264 * Tch); + TcI = FMA(KP956940335, TcB, KP290284677 * TcA); + Tcs = FNMS(KP881921264, Tce, KP471396736 * Tch); + TcC = FNMS(KP290284677, TcB, KP956940335 * TcA); + } + { + E Tcl, TcE, Tco, TcD; + { + E Tcj, Tck, Tcm, Tcn; + Tcj = Tbl - Tbm; + Tck = TbA - Tbz; + Tcl = Tcj - Tck; + TcE = Tcj + Tck; + Tcm = Tbw - Tbx; + Tcn = Tbq - Tbt; + Tco = Tcm - Tcn; + TcD = Tcm + Tcn; + } + Tcp = FNMS(KP471396736, Tco, KP881921264 * Tcl); + TcJ = FNMS(KP956940335, TcD, KP290284677 * TcE); + Tct = FMA(KP881921264, Tco, KP471396736 * Tcl); + TcF = FMA(KP290284677, TcD, KP956940335 * TcE); + } + { + E Tcb, Tcq, TiH, TiK; + Tcb = Tc3 + Tca; + Tcq = Tci + Tcp; + ci[WS(rs, 26)] = Tcb - Tcq; + cr[WS(rs, 5)] = Tcb + Tcq; + TiH = Tct - Tcs; + TiK = TiI + TiJ; + cr[WS(rs, 37)] = TiH - TiK; + ci[WS(rs, 58)] = TiH + TiK; + } + { + E TiL, TiM, Tcr, Tcu; + TiL = Tcp - Tci; + TiM = TiJ - TiI; + cr[WS(rs, 53)] = TiL - TiM; + ci[WS(rs, 42)] = TiL + TiM; + Tcr = Tc3 - Tca; + Tcu = Tcs + Tct; + cr[WS(rs, 21)] = Tcr - Tcu; + ci[WS(rs, 10)] = Tcr + Tcu; + } + { + E Tcz, TcG, Tiz, TiE; + Tcz = Tcv + Tcy; + TcG = TcC + TcF; + cr[WS(rs, 29)] = Tcz - TcG; + ci[WS(rs, 2)] = Tcz + TcG; + Tiz = TcJ - TcI; + TiE = TiA + TiD; + cr[WS(rs, 61)] = Tiz - TiE; + ci[WS(rs, 34)] = Tiz + TiE; + } + { + E TiF, TiG, TcH, TcK; + TiF = TcF - TcC; + TiG = TiD - TiA; + cr[WS(rs, 45)] = TiF - TiG; + ci[WS(rs, 50)] = TiF + TiG; + TcH = Tcv - Tcy; + TcK = TcI + TcJ; + ci[WS(rs, 18)] = TcH - TcK; + cr[WS(rs, 13)] = TcH + TcK; + } + } + { + E TaL, TbJ, Tip, Tiv, Tb0, Tiu, TbM, Tik, Tbk, TbW, TbG, TbQ, TbD, TbX, TbH; + E TbT; + { + E TaD, TaK, Til, Tio; + TaD = Taz + TaC; + TaK = TaG + TaJ; + TaL = TaD - TaK; + TbJ = TaD + TaK; + Til = Tc1 - Tc0; + Tio = Tim + Tin; + Tip = Til + Tio; + Tiv = Tio - Til; + } + { + E TaS, TbK, TaZ, TbL; + { + E TaO, TaR, TaV, TaY; + TaO = TaM + TaN; + TaR = TaP + TaQ; + TaS = FNMS(KP980785280, TaR, KP195090322 * TaO); + TbK = FMA(KP195090322, TaR, KP980785280 * TaO); + TaV = TaT + TaU; + TaY = TaW + TaX; + TaZ = FMA(KP980785280, TaV, KP195090322 * TaY); + TbL = FNMS(KP195090322, TaV, KP980785280 * TaY); + } + Tb0 = TaS + TaZ; + Tiu = TbK - TbL; + TbM = TbK + TbL; + Tik = TaZ - TaS; + } + { + E Tbc, TbO, Tbj, TbP; + { + E Tb4, Tbb, Tbf, Tbi; + Tb4 = Tb2 + Tb3; + Tbb = Tb7 + Tba; + Tbc = Tb4 - Tbb; + TbO = Tb4 + Tbb; + Tbf = Tbd + Tbe; + Tbi = Tbg + Tbh; + Tbj = Tbf - Tbi; + TbP = Tbf + Tbi; + } + Tbk = FMA(KP634393284, Tbc, KP773010453 * Tbj); + TbW = FNMS(KP995184726, TbP, KP098017140 * TbO); + TbG = FNMS(KP634393284, Tbj, KP773010453 * Tbc); + TbQ = FMA(KP995184726, TbO, KP098017140 * TbP); + } + { + E Tbv, TbR, TbC, TbS; + { + E Tbn, Tbu, Tby, TbB; + Tbn = Tbl + Tbm; + Tbu = Tbq + Tbt; + Tbv = Tbn - Tbu; + TbR = Tbn + Tbu; + Tby = Tbw + Tbx; + TbB = Tbz + TbA; + TbC = Tby - TbB; + TbS = Tby + TbB; + } + TbD = FNMS(KP773010453, TbC, KP634393284 * Tbv); + TbX = FMA(KP098017140, TbR, KP995184726 * TbS); + TbH = FMA(KP773010453, Tbv, KP634393284 * TbC); + TbT = FNMS(KP098017140, TbS, KP995184726 * TbR); + } + { + E Tb1, TbE, Tit, Tiw; + Tb1 = TaL - Tb0; + TbE = Tbk + TbD; + ci[WS(rs, 22)] = Tb1 - TbE; + cr[WS(rs, 9)] = Tb1 + TbE; + Tit = TbD - Tbk; + Tiw = Tiu + Tiv; + cr[WS(rs, 57)] = Tit - Tiw; + ci[WS(rs, 38)] = Tit + Tiw; + } + { + E Tix, Tiy, TbF, TbI; + Tix = TbH - TbG; + Tiy = Tiv - Tiu; + cr[WS(rs, 41)] = Tix - Tiy; + ci[WS(rs, 54)] = Tix + Tiy; + TbF = TaL + Tb0; + TbI = TbG + TbH; + cr[WS(rs, 25)] = TbF - TbI; + ci[WS(rs, 6)] = TbF + TbI; + } + { + E TbN, TbU, Tij, Tiq; + TbN = TbJ + TbM; + TbU = TbQ + TbT; + ci[WS(rs, 30)] = TbN - TbU; + cr[WS(rs, 1)] = TbN + TbU; + Tij = TbX - TbW; + Tiq = Tik + Tip; + cr[WS(rs, 33)] = Tij - Tiq; + ci[WS(rs, 62)] = Tij + Tiq; + } + { + E Tir, Tis, TbV, TbY; + Tir = TbT - TbQ; + Tis = Tip - Tik; + cr[WS(rs, 49)] = Tir - Tis; + ci[WS(rs, 46)] = Tir + Tis; + TbV = TbJ - TbM; + TbY = TbW + TbX; + cr[WS(rs, 17)] = TbV - TbY; + ci[WS(rs, 14)] = TbV + TbY; + } + } + { + E T9R, Taj, Ti9, Tif, T9Y, Tie, Tam, Ti6, Ta6, Taw, Tag, Taq, Tad, Tax, Tah; + E Tat; + { + E T9N, T9Q, Ti7, Ti8; + T9N = T6b - T6m; + T9Q = T9O + T9P; + T9R = T9N - T9Q; + Taj = T9N + T9Q; + Ti7 = T6J - T6y; + Ti8 = ThT - ThQ; + Ti9 = Ti7 + Ti8; + Tif = Ti8 - Ti7; + } + { + E T9U, Tak, T9X, Tal; + { + E T9S, T9T, T9V, T9W; + T9S = T6Q - T71; + T9T = T77 - T7a; + T9U = FNMS(KP831469612, T9T, KP555570233 * T9S); + Tak = FMA(KP831469612, T9S, KP555570233 * T9T); + T9V = T7h - T7s; + T9W = T7y - T7B; + T9X = FMA(KP555570233, T9V, KP831469612 * T9W); + Tal = FNMS(KP555570233, T9W, KP831469612 * T9V); + } + T9Y = T9U + T9X; + Tie = Tak - Tal; + Tam = Tak + Tal; + Ti6 = T9X - T9U; + } + { + E Ta2, Tao, Ta5, Tap; + { + E Ta0, Ta1, Ta3, Ta4; + Ta0 = T8p - T8s; + Ta1 = T87 - T8i; + Ta2 = Ta0 - Ta1; + Tao = Ta0 + Ta1; + Ta3 = T7K - T7V; + Ta4 = T8v - T8u; + Ta5 = Ta3 - Ta4; + Tap = Ta3 + Ta4; + } + Ta6 = FMA(KP471396736, Ta2, KP881921264 * Ta5); + Taw = FNMS(KP956940335, Tap, KP290284677 * Tao); + Tag = FNMS(KP471396736, Ta5, KP881921264 * Ta2); + Taq = FMA(KP956940335, Tao, KP290284677 * Tap); + } + { + E Ta9, Tar, Tac, Tas; + { + E Ta7, Ta8, Taa, Tab; + Ta7 = T8D - T8O; + Ta8 = T9n - T9o; + Ta9 = Ta7 - Ta8; + Tar = Ta7 + Ta8; + Taa = T9i - T9l; + Tab = T9b - T90; + Tac = Taa - Tab; + Tas = Taa + Tab; + } + Tad = FNMS(KP881921264, Tac, KP471396736 * Ta9); + Tax = FMA(KP290284677, Tar, KP956940335 * Tas); + Tah = FMA(KP881921264, Ta9, KP471396736 * Tac); + Tat = FNMS(KP290284677, Tas, KP956940335 * Tar); + } + { + E T9Z, Tae, Tid, Tig; + T9Z = T9R - T9Y; + Tae = Ta6 + Tad; + ci[WS(rs, 20)] = T9Z - Tae; + cr[WS(rs, 11)] = T9Z + Tae; + Tid = Tad - Ta6; + Tig = Tie + Tif; + cr[WS(rs, 59)] = Tid - Tig; + ci[WS(rs, 36)] = Tid + Tig; + } + { + E Tih, Tii, Taf, Tai; + Tih = Tah - Tag; + Tii = Tif - Tie; + cr[WS(rs, 43)] = Tih - Tii; + ci[WS(rs, 52)] = Tih + Tii; + Taf = T9R + T9Y; + Tai = Tag + Tah; + cr[WS(rs, 27)] = Taf - Tai; + ci[WS(rs, 4)] = Taf + Tai; + } + { + E Tan, Tau, Ti5, Tia; + Tan = Taj + Tam; + Tau = Taq + Tat; + ci[WS(rs, 28)] = Tan - Tau; + cr[WS(rs, 3)] = Tan + Tau; + Ti5 = Tax - Taw; + Tia = Ti6 + Ti9; + cr[WS(rs, 35)] = Ti5 - Tia; + ci[WS(rs, 60)] = Ti5 + Tia; + } + { + E Tib, Tic, Tav, Tay; + Tib = Tat - Taq; + Tic = Ti9 - Ti6; + cr[WS(rs, 51)] = Tib - Tic; + ci[WS(rs, 44)] = Tib + Tic; + Tav = Taj - Tam; + Tay = Taw + Tax; + cr[WS(rs, 19)] = Tav - Tay; + ci[WS(rs, 12)] = Tav + Tay; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 64}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 64, "hf_64", twinstr, &GENUS, {808, 270, 230, 0} }; + +void X(codelet_hf_64) (planner *p) { + X(khc2hc_register) (p, hf_64, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,352 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:09 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 7 -dit -name hf_7 -include hf.h */ + +/* + * This function contains 72 FP additions, 66 FP multiplications, + * (or, 18 additions, 12 multiplications, 54 fused multiply/add), + * 62 stack variables, 6 constants, and 28 memory accesses + */ +#include "hf.h" + +static void hf_7(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP974927912, +0.974927912181823607018131682993931217232785801); + DK(KP801937735, +0.801937735804838252472204639014890102331838324); + DK(KP900968867, +0.900968867902419126236102319507445051165919162); + DK(KP692021471, +0.692021471630095869627814897002069140197260599); + DK(KP554958132, +0.554958132087371191422194871006410481067288862); + DK(KP356895867, +0.356895867892209443894399510021300583399127187); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 12); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 12, MAKE_VOLATILE_STRIDE(14, rs)) { + E T1, TR, T18, T10, T12, T16, T11, T13; + { + E T19, T1a, T1i, Te, Tt, Tw, T1b, TM, T1h, Tr, Tu, TS, Tz, TC, Ty; + E Tv, TB; + T1 = cr[0]; + T19 = ci[0]; + { + E T9, Tc, TP, Ta, Tb, TO, T7; + { + E T3, T6, T8, TN, T4, T2, T5; + T3 = cr[WS(rs, 1)]; + T6 = ci[WS(rs, 1)]; + T2 = W[0]; + T9 = cr[WS(rs, 6)]; + Tc = ci[WS(rs, 6)]; + T8 = W[10]; + TN = T2 * T6; + T4 = T2 * T3; + T5 = W[1]; + TP = T8 * Tc; + Ta = T8 * T9; + Tb = W[11]; + TO = FNMS(T5, T3, TN); + T7 = FMA(T5, T6, T4); + } + { + E Tg, Tj, Th, TI, Tm, Tp, Tl, Ti, To, TQ, Td, Tf; + Tg = cr[WS(rs, 2)]; + TQ = FNMS(Tb, T9, TP); + Td = FMA(Tb, Tc, Ta); + Tj = ci[WS(rs, 2)]; + Tf = W[2]; + T1a = TO + TQ; + TR = TO - TQ; + T1i = Td - T7; + Te = T7 + Td; + Th = Tf * Tg; + TI = Tf * Tj; + Tm = cr[WS(rs, 5)]; + Tp = ci[WS(rs, 5)]; + Tl = W[8]; + Ti = W[3]; + To = W[9]; + { + E TJ, Tk, TL, Tq, TK, Tn, Ts; + Tt = cr[WS(rs, 3)]; + TK = Tl * Tp; + Tn = Tl * Tm; + TJ = FNMS(Ti, Tg, TI); + Tk = FMA(Ti, Tj, Th); + TL = FNMS(To, Tm, TK); + Tq = FMA(To, Tp, Tn); + Tw = ci[WS(rs, 3)]; + Ts = W[4]; + T1b = TJ + TL; + TM = TJ - TL; + T1h = Tq - Tk; + Tr = Tk + Tq; + Tu = Ts * Tt; + TS = Ts * Tw; + } + Tz = cr[WS(rs, 4)]; + TC = ci[WS(rs, 4)]; + Ty = W[6]; + Tv = W[5]; + TB = W[7]; + } + } + { + E TF, TT, Tx, TV, TD, T1q, TU, TA; + TF = FNMS(KP356895867, Tr, Te); + TU = Ty * TC; + TA = Ty * Tz; + TT = FNMS(Tv, Tt, TS); + Tx = FMA(Tv, Tw, Tu); + TV = FNMS(TB, Tz, TU); + TD = FMA(TB, TC, TA); + T1q = FNMS(KP356895867, T1b, T1a); + { + E TW, TE, T1k, T1f; + { + E T1e, T1s, TY, T1p, T1u, TH, T1n, T1j, T1c, T1g; + T1j = FNMS(KP554958132, T1i, T1h); + T1c = TT + TV; + TW = TT - TV; + T1g = TD - Tx; + TE = Tx + TD; + { + E T1d, T1l, T1r, TX; + T1d = FNMS(KP356895867, T1c, T1b); + T1l = FNMS(KP356895867, T1a, T1c); + T1r = FNMS(KP692021471, T1q, T1c); + ci[WS(rs, 6)] = T1a + T1b + T1c + T19; + TX = FMA(KP554958132, TW, TR); + { + E T1o, T1t, TG, T1m; + T1o = FMA(KP554958132, T1h, T1g); + T1t = FMA(KP554958132, T1g, T1i); + TG = FNMS(KP692021471, TF, TE); + cr[0] = T1 + Te + Tr + TE; + T1e = FNMS(KP692021471, T1d, T1a); + T1m = FNMS(KP692021471, T1l, T1b); + T1s = FNMS(KP900968867, T1r, T19); + TY = FMA(KP801937735, TX, TM); + T1p = FNMS(KP801937735, T1o, T1i); + T1u = FMA(KP801937735, T1t, T1h); + TH = FNMS(KP900968867, TG, T1); + T1n = FNMS(KP900968867, T1m, T19); + T1k = FNMS(KP801937735, T1j, T1g); + } + } + ci[WS(rs, 5)] = FMA(KP974927912, T1u, T1s); + cr[WS(rs, 6)] = FMS(KP974927912, T1u, T1s); + cr[WS(rs, 1)] = FMA(KP974927912, TY, TH); + ci[0] = FNMS(KP974927912, TY, TH); + ci[WS(rs, 4)] = FMA(KP974927912, T1p, T1n); + cr[WS(rs, 5)] = FMS(KP974927912, T1p, T1n); + T1f = FNMS(KP900968867, T1e, T19); + } + { + E T14, T17, T15, TZ; + T14 = FNMS(KP356895867, TE, Tr); + T17 = FNMS(KP554958132, TR, TM); + TZ = FNMS(KP356895867, Te, TE); + ci[WS(rs, 3)] = FMA(KP974927912, T1k, T1f); + cr[WS(rs, 4)] = FMS(KP974927912, T1k, T1f); + T15 = FNMS(KP692021471, T14, Te); + T18 = FNMS(KP801937735, T17, TW); + T10 = FNMS(KP692021471, TZ, Tr); + T12 = FMA(KP554958132, TM, TW); + T16 = FNMS(KP900968867, T15, T1); + } + } + } + } + T11 = FNMS(KP900968867, T10, T1); + T13 = FNMS(KP801937735, T12, TR); + cr[WS(rs, 3)] = FMA(KP974927912, T18, T16); + ci[WS(rs, 2)] = FNMS(KP974927912, T18, T16); + cr[WS(rs, 2)] = FMA(KP974927912, T13, T11); + ci[WS(rs, 1)] = FNMS(KP974927912, T13, T11); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 7}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 7, "hf_7", twinstr, &GENUS, {18, 12, 54, 0} }; + +void X(codelet_hf_7) (planner *p) { + X(khc2hc_register) (p, hf_7, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 7 -dit -name hf_7 -include hf.h */ + +/* + * This function contains 72 FP additions, 60 FP multiplications, + * (or, 36 additions, 24 multiplications, 36 fused multiply/add), + * 29 stack variables, 6 constants, and 28 memory accesses + */ +#include "hf.h" + +static void hf_7(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP222520933, +0.222520933956314404288902564496794759466355569); + DK(KP900968867, +0.900968867902419126236102319507445051165919162); + DK(KP623489801, +0.623489801858733530525004884004239810632274731); + DK(KP433883739, +0.433883739117558120475768332848358754609990728); + DK(KP974927912, +0.974927912181823607018131682993931217232785801); + DK(KP781831482, +0.781831482468029808708444526674057750232334519); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 12); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 12, MAKE_VOLATILE_STRIDE(14, rs)) { + E T1, TT, Tc, TV, TC, TO, Tn, TS, TI, TP, Ty, TU, TF, TQ; + T1 = cr[0]; + TT = ci[0]; + { + E T6, TA, Tb, TB; + { + E T3, T5, T2, T4; + T3 = cr[WS(rs, 1)]; + T5 = ci[WS(rs, 1)]; + T2 = W[0]; + T4 = W[1]; + T6 = FMA(T2, T3, T4 * T5); + TA = FNMS(T4, T3, T2 * T5); + } + { + E T8, Ta, T7, T9; + T8 = cr[WS(rs, 6)]; + Ta = ci[WS(rs, 6)]; + T7 = W[10]; + T9 = W[11]; + Tb = FMA(T7, T8, T9 * Ta); + TB = FNMS(T9, T8, T7 * Ta); + } + Tc = T6 + Tb; + TV = TA + TB; + TC = TA - TB; + TO = Tb - T6; + } + { + E Th, TG, Tm, TH; + { + E Te, Tg, Td, Tf; + Te = cr[WS(rs, 2)]; + Tg = ci[WS(rs, 2)]; + Td = W[2]; + Tf = W[3]; + Th = FMA(Td, Te, Tf * Tg); + TG = FNMS(Tf, Te, Td * Tg); + } + { + E Tj, Tl, Ti, Tk; + Tj = cr[WS(rs, 5)]; + Tl = ci[WS(rs, 5)]; + Ti = W[8]; + Tk = W[9]; + Tm = FMA(Ti, Tj, Tk * Tl); + TH = FNMS(Tk, Tj, Ti * Tl); + } + Tn = Th + Tm; + TS = TG + TH; + TI = TG - TH; + TP = Th - Tm; + } + { + E Ts, TD, Tx, TE; + { + E Tp, Tr, To, Tq; + Tp = cr[WS(rs, 3)]; + Tr = ci[WS(rs, 3)]; + To = W[4]; + Tq = W[5]; + Ts = FMA(To, Tp, Tq * Tr); + TD = FNMS(Tq, Tp, To * Tr); + } + { + E Tu, Tw, Tt, Tv; + Tu = cr[WS(rs, 4)]; + Tw = ci[WS(rs, 4)]; + Tt = W[6]; + Tv = W[7]; + Tx = FMA(Tt, Tu, Tv * Tw); + TE = FNMS(Tv, Tu, Tt * Tw); + } + Ty = Ts + Tx; + TU = TD + TE; + TF = TD - TE; + TQ = Tx - Ts; + } + { + E TL, TK, TZ, T10; + cr[0] = T1 + Tc + Tn + Ty; + TL = FMA(KP781831482, TC, KP974927912 * TI) + (KP433883739 * TF); + TK = FMA(KP623489801, Tc, T1) + FNMA(KP900968867, Ty, KP222520933 * Tn); + ci[0] = TK - TL; + cr[WS(rs, 1)] = TK + TL; + ci[WS(rs, 6)] = TV + TS + TU + TT; + TZ = FMA(KP781831482, TO, KP433883739 * TQ) - (KP974927912 * TP); + T10 = FMA(KP623489801, TV, TT) + FNMA(KP900968867, TU, KP222520933 * TS); + cr[WS(rs, 6)] = TZ - T10; + ci[WS(rs, 5)] = TZ + T10; + } + { + E TX, TY, TR, TW; + TX = FMA(KP974927912, TO, KP433883739 * TP) - (KP781831482 * TQ); + TY = FMA(KP623489801, TU, TT) + FNMA(KP900968867, TS, KP222520933 * TV); + cr[WS(rs, 5)] = TX - TY; + ci[WS(rs, 4)] = TX + TY; + TR = FMA(KP433883739, TO, KP781831482 * TP) + (KP974927912 * TQ); + TW = FMA(KP623489801, TS, TT) + FNMA(KP222520933, TU, KP900968867 * TV); + cr[WS(rs, 4)] = TR - TW; + ci[WS(rs, 3)] = TR + TW; + } + { + E TN, TM, TJ, Tz; + TN = FMA(KP433883739, TC, KP974927912 * TF) - (KP781831482 * TI); + TM = FMA(KP623489801, Tn, T1) + FNMA(KP222520933, Ty, KP900968867 * Tc); + ci[WS(rs, 2)] = TM - TN; + cr[WS(rs, 3)] = TM + TN; + TJ = FNMS(KP781831482, TF, KP974927912 * TC) - (KP433883739 * TI); + Tz = FMA(KP623489801, Ty, T1) + FNMA(KP900968867, Tn, KP222520933 * Tc); + ci[WS(rs, 1)] = Tz - TJ; + cr[WS(rs, 2)] = Tz + TJ; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 7}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 7, "hf_7", twinstr, &GENUS, {36, 24, 36, 0} }; + +void X(codelet_hf_7) (planner *p) { + X(khc2hc_register) (p, hf_7, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,370 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:09 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 8 -dit -name hf_8 -include hf.h */ + +/* + * This function contains 66 FP additions, 36 FP multiplications, + * (or, 44 additions, 14 multiplications, 22 fused multiply/add), + * 61 stack variables, 1 constants, and 32 memory accesses + */ +#include "hf.h" + +static void hf_8(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 14, MAKE_VOLATILE_STRIDE(16, rs)) { + E T1f, T1g, T1e, Tm, T1q, T1o, T1p, TN, T1h, T1i; + { + E T1, T1m, T1l, T7, TS, Tk, TQ, Te, To, Tr, T17, TM, T12, Tu, TW; + E Tp, Tx, Tt, Tq, Tw; + { + E T3, T6, T2, T5; + T1 = cr[0]; + T1m = ci[0]; + T3 = cr[WS(rs, 4)]; + T6 = ci[WS(rs, 4)]; + T2 = W[6]; + T5 = W[7]; + { + E Ta, Td, T9, Tc; + { + E Tg, Tj, Ti, TR, Th, T1k, T4, Tf; + Tg = cr[WS(rs, 6)]; + Tj = ci[WS(rs, 6)]; + T1k = T2 * T6; + T4 = T2 * T3; + Tf = W[10]; + Ti = W[11]; + T1l = FNMS(T5, T3, T1k); + T7 = FMA(T5, T6, T4); + TR = Tf * Tj; + Th = Tf * Tg; + Ta = cr[WS(rs, 2)]; + Td = ci[WS(rs, 2)]; + TS = FNMS(Ti, Tg, TR); + Tk = FMA(Ti, Tj, Th); + T9 = W[2]; + Tc = W[3]; + } + { + E TB, TE, TH, T13, TC, TK, TG, TD, TJ, TP, Tb, TA, Tn; + TB = cr[WS(rs, 7)]; + TE = ci[WS(rs, 7)]; + TP = T9 * Td; + Tb = T9 * Ta; + TA = W[12]; + TH = cr[WS(rs, 3)]; + TQ = FNMS(Tc, Ta, TP); + Te = FMA(Tc, Td, Tb); + T13 = TA * TE; + TC = TA * TB; + TK = ci[WS(rs, 3)]; + TG = W[4]; + TD = W[13]; + TJ = W[5]; + { + E T14, TF, T16, TL, T15, TI; + To = cr[WS(rs, 1)]; + T15 = TG * TK; + TI = TG * TH; + T14 = FNMS(TD, TB, T13); + TF = FMA(TD, TE, TC); + T16 = FNMS(TJ, TH, T15); + TL = FMA(TJ, TK, TI); + Tr = ci[WS(rs, 1)]; + Tn = W[0]; + T17 = T14 - T16; + T1f = T14 + T16; + TM = TF + TL; + T12 = TF - TL; + } + Tu = cr[WS(rs, 5)]; + TW = Tn * Tr; + Tp = Tn * To; + Tx = ci[WS(rs, 5)]; + Tt = W[8]; + Tq = W[1]; + Tw = W[9]; + } + } + } + { + E T8, T1j, Tl, Tz, T1a, TU, T1n, T1b, T1c, T1v, T1t, T1u, T19, T1w, T1d; + { + E T1r, T10, TV, T1s, T11, T18; + { + E TO, TX, Ts, TZ, Ty, TT, TY, Tv; + T8 = T1 + T7; + TO = T1 - T7; + TY = Tt * Tx; + Tv = Tt * Tu; + TX = FNMS(Tq, To, TW); + Ts = FMA(Tq, Tr, Tp); + TZ = FNMS(Tw, Tu, TY); + Ty = FMA(Tw, Tx, Tv); + TT = TQ - TS; + T1j = TQ + TS; + Tl = Te + Tk; + T1r = Te - Tk; + T10 = TX - TZ; + T1g = TX + TZ; + Tz = Ts + Ty; + TV = Ts - Ty; + T1a = TO - TT; + TU = TO + TT; + T1s = T1m - T1l; + T1n = T1l + T1m; + } + T1b = TV - T10; + T11 = TV + T10; + T18 = T12 - T17; + T1c = T12 + T17; + T1v = T1s - T1r; + T1t = T1r + T1s; + T1u = T18 - T11; + T19 = T11 + T18; + } + ci[WS(rs, 4)] = FMA(KP707106781, T1u, T1t); + cr[WS(rs, 7)] = FMS(KP707106781, T1u, T1t); + cr[WS(rs, 1)] = FMA(KP707106781, T19, TU); + ci[WS(rs, 2)] = FNMS(KP707106781, T19, TU); + T1w = T1c - T1b; + T1d = T1b + T1c; + ci[WS(rs, 6)] = FMA(KP707106781, T1w, T1v); + cr[WS(rs, 5)] = FMS(KP707106781, T1w, T1v); + ci[0] = FMA(KP707106781, T1d, T1a); + cr[WS(rs, 3)] = FNMS(KP707106781, T1d, T1a); + T1e = T8 - Tl; + Tm = T8 + Tl; + T1q = T1n - T1j; + T1o = T1j + T1n; + T1p = TM - Tz; + TN = Tz + TM; + } + } + ci[WS(rs, 5)] = T1p + T1q; + cr[WS(rs, 6)] = T1p - T1q; + cr[0] = Tm + TN; + ci[WS(rs, 3)] = Tm - TN; + T1h = T1f - T1g; + T1i = T1g + T1f; + ci[WS(rs, 7)] = T1i + T1o; + cr[WS(rs, 4)] = T1i - T1o; + ci[WS(rs, 1)] = T1e + T1h; + cr[WS(rs, 2)] = T1e - T1h; + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 8}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 8, "hf_8", twinstr, &GENUS, {44, 14, 22, 0} }; + +void X(codelet_hf_8) (planner *p) { + X(khc2hc_register) (p, hf_8, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 8 -dit -name hf_8 -include hf.h */ + +/* + * This function contains 66 FP additions, 32 FP multiplications, + * (or, 52 additions, 18 multiplications, 14 fused multiply/add), + * 28 stack variables, 1 constants, and 32 memory accesses + */ +#include "hf.h" + +static void hf_8(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 14, MAKE_VOLATILE_STRIDE(16, rs)) { + E T7, T1f, TH, T19, TF, T12, TR, TU, Ti, T1e, TK, T16, Tu, T13, TM; + E TP; + { + E T1, T18, T6, T17; + T1 = cr[0]; + T18 = ci[0]; + { + E T3, T5, T2, T4; + T3 = cr[WS(rs, 4)]; + T5 = ci[WS(rs, 4)]; + T2 = W[6]; + T4 = W[7]; + T6 = FMA(T2, T3, T4 * T5); + T17 = FNMS(T4, T3, T2 * T5); + } + T7 = T1 + T6; + T1f = T18 - T17; + TH = T1 - T6; + T19 = T17 + T18; + } + { + E Tz, TS, TE, TT; + { + E Tw, Ty, Tv, Tx; + Tw = cr[WS(rs, 7)]; + Ty = ci[WS(rs, 7)]; + Tv = W[12]; + Tx = W[13]; + Tz = FMA(Tv, Tw, Tx * Ty); + TS = FNMS(Tx, Tw, Tv * Ty); + } + { + E TB, TD, TA, TC; + TB = cr[WS(rs, 3)]; + TD = ci[WS(rs, 3)]; + TA = W[4]; + TC = W[5]; + TE = FMA(TA, TB, TC * TD); + TT = FNMS(TC, TB, TA * TD); + } + TF = Tz + TE; + T12 = TS + TT; + TR = Tz - TE; + TU = TS - TT; + } + { + E Tc, TI, Th, TJ; + { + E T9, Tb, T8, Ta; + T9 = cr[WS(rs, 2)]; + Tb = ci[WS(rs, 2)]; + T8 = W[2]; + Ta = W[3]; + Tc = FMA(T8, T9, Ta * Tb); + TI = FNMS(Ta, T9, T8 * Tb); + } + { + E Te, Tg, Td, Tf; + Te = cr[WS(rs, 6)]; + Tg = ci[WS(rs, 6)]; + Td = W[10]; + Tf = W[11]; + Th = FMA(Td, Te, Tf * Tg); + TJ = FNMS(Tf, Te, Td * Tg); + } + Ti = Tc + Th; + T1e = Tc - Th; + TK = TI - TJ; + T16 = TI + TJ; + } + { + E To, TN, Tt, TO; + { + E Tl, Tn, Tk, Tm; + Tl = cr[WS(rs, 1)]; + Tn = ci[WS(rs, 1)]; + Tk = W[0]; + Tm = W[1]; + To = FMA(Tk, Tl, Tm * Tn); + TN = FNMS(Tm, Tl, Tk * Tn); + } + { + E Tq, Ts, Tp, Tr; + Tq = cr[WS(rs, 5)]; + Ts = ci[WS(rs, 5)]; + Tp = W[8]; + Tr = W[9]; + Tt = FMA(Tp, Tq, Tr * Ts); + TO = FNMS(Tr, Tq, Tp * Ts); + } + Tu = To + Tt; + T13 = TN + TO; + TM = To - Tt; + TP = TN - TO; + } + { + E Tj, TG, T1b, T1c; + Tj = T7 + Ti; + TG = Tu + TF; + ci[WS(rs, 3)] = Tj - TG; + cr[0] = Tj + TG; + T1b = TF - Tu; + T1c = T19 - T16; + cr[WS(rs, 6)] = T1b - T1c; + ci[WS(rs, 5)] = T1b + T1c; + { + E TX, T1i, T10, T1h, TY, TZ; + TX = TH - TK; + T1i = T1f - T1e; + TY = TM - TP; + TZ = TR + TU; + T10 = KP707106781 * (TY + TZ); + T1h = KP707106781 * (TZ - TY); + cr[WS(rs, 3)] = TX - T10; + ci[WS(rs, 6)] = T1h + T1i; + ci[0] = TX + T10; + cr[WS(rs, 5)] = T1h - T1i; + } + } + { + E T15, T1a, T11, T14; + T15 = T13 + T12; + T1a = T16 + T19; + cr[WS(rs, 4)] = T15 - T1a; + ci[WS(rs, 7)] = T15 + T1a; + T11 = T7 - Ti; + T14 = T12 - T13; + cr[WS(rs, 2)] = T11 - T14; + ci[WS(rs, 1)] = T11 + T14; + { + E TL, T1g, TW, T1d, TQ, TV; + TL = TH + TK; + T1g = T1e + T1f; + TQ = TM + TP; + TV = TR - TU; + TW = KP707106781 * (TQ + TV); + T1d = KP707106781 * (TV - TQ); + ci[WS(rs, 2)] = TL - TW; + ci[WS(rs, 4)] = T1d + T1g; + cr[WS(rs, 1)] = TL + TW; + cr[WS(rs, 7)] = T1d - T1g; + } + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 8}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 8, "hf_8", twinstr, &GENUS, {52, 18, 14, 0} }; + +void X(codelet_hf_8) (planner *p) { + X(khc2hc_register) (p, hf_8, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/hf_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,484 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:09 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 9 -dit -name hf_9 -include hf.h */ + +/* + * This function contains 96 FP additions, 88 FP multiplications, + * (or, 24 additions, 16 multiplications, 72 fused multiply/add), + * 69 stack variables, 10 constants, and 36 memory accesses + */ +#include "hf.h" + +static void hf_9(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP777861913, +0.777861913430206160028177977318626690410586096); + DK(KP852868531, +0.852868531952443209628250963940074071936020296); + DK(KP839099631, +0.839099631177280011763127298123181364687434283); + DK(KP492403876, +0.492403876506104029683371512294761506835321626); + DK(KP984807753, +0.984807753012208059366743024589523013670643252); + DK(KP954188894, +0.954188894138671133499268364187245676532219158); + DK(KP363970234, +0.363970234266202361351047882776834043890471784); + DK(KP176326980, +0.176326980708464973471090386868618986121633062); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 16); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) { + E T20, T1Z; + { + E T1, T1P, T1Q, T10, T1S, Te, TB, T1d, T1a, T19, T1M, TE, T1c, Tz, T1n; + E TC, TH, TK, T1k, TR, TG, TJ, TD; + T1 = cr[0]; + T1P = ci[0]; + { + E T9, Tc, TY, Ta, Tb, TX, T7; + { + E T3, T6, T8, TW, T4, T2, T5; + T3 = cr[WS(rs, 3)]; + T6 = ci[WS(rs, 3)]; + T2 = W[4]; + T9 = cr[WS(rs, 6)]; + Tc = ci[WS(rs, 6)]; + T8 = W[10]; + TW = T2 * T6; + T4 = T2 * T3; + T5 = W[5]; + TY = T8 * Tc; + Ta = T8 * T9; + Tb = W[11]; + TX = FNMS(T5, T3, TW); + T7 = FMA(T5, T6, T4); + } + { + E Th, Tk, Ti, T12, Tn, Tq, Tp, T17, Tx, T14, To, Tj, TZ, Td, Tg; + E TA, Tl, Ty; + Th = cr[WS(rs, 1)]; + TZ = FNMS(Tb, T9, TY); + Td = FMA(Tb, Tc, Ta); + Tk = ci[WS(rs, 1)]; + Tg = W[0]; + T1Q = TX + TZ; + T10 = TX - TZ; + T1S = Td - T7; + Te = T7 + Td; + Ti = Tg * Th; + T12 = Tg * Tk; + { + E Tt, Tw, Ts, Tv, T16, Tu, Tm; + Tt = cr[WS(rs, 7)]; + Tw = ci[WS(rs, 7)]; + Ts = W[12]; + Tv = W[13]; + Tn = cr[WS(rs, 4)]; + Tq = ci[WS(rs, 4)]; + T16 = Ts * Tw; + Tu = Ts * Tt; + Tm = W[6]; + Tp = W[7]; + T17 = FNMS(Tv, Tt, T16); + Tx = FMA(Tv, Tw, Tu); + T14 = Tm * Tq; + To = Tm * Tn; + } + Tj = W[1]; + TB = cr[WS(rs, 2)]; + { + E T15, Tr, T13, T18; + T15 = FNMS(Tp, Tn, T14); + Tr = FMA(Tp, Tq, To); + T13 = FNMS(Tj, Th, T12); + Tl = FMA(Tj, Tk, Ti); + T18 = T15 + T17; + T1d = T15 - T17; + Ty = Tr + Tx; + T1a = Tr - Tx; + T19 = FNMS(KP500000000, T18, T13); + T1M = T13 + T18; + TE = ci[WS(rs, 2)]; + } + T1c = FNMS(KP500000000, Ty, Tl); + Tz = Tl + Ty; + TA = W[2]; + { + E TN, TQ, TP, T1j, TO, TM; + TN = cr[WS(rs, 8)]; + TQ = ci[WS(rs, 8)]; + TM = W[14]; + T1n = TA * TE; + TC = TA * TB; + TP = W[15]; + T1j = TM * TQ; + TO = TM * TN; + TH = cr[WS(rs, 5)]; + TK = ci[WS(rs, 5)]; + T1k = FNMS(TP, TN, T1j); + TR = FMA(TP, TQ, TO); + TG = W[8]; + TJ = W[9]; + } + TD = W[3]; + } + } + { + E TV, Tf, T21, T1R, T1l, T1r, T1q, T1N, TT, T1g; + { + E T1o, TF, T1i, TL, T1h, TI, TS, T1p; + TV = FNMS(KP500000000, Te, T1); + Tf = T1 + Te; + T1h = TG * TK; + TI = TG * TH; + T1o = FNMS(TD, TB, T1n); + TF = FMA(TD, TE, TC); + T1i = FNMS(TJ, TH, T1h); + TL = FMA(TJ, TK, TI); + T21 = T1Q + T1P; + T1R = FNMS(KP500000000, T1Q, T1P); + T1p = T1i + T1k; + T1l = T1i - T1k; + TS = TL + TR; + T1r = TR - TL; + T1q = FNMS(KP500000000, T1p, T1o); + T1N = T1o + T1p; + TT = TF + TS; + T1g = FNMS(KP500000000, TS, TF); + } + { + E T11, T1z, T1E, T1D, T1X, T1T, T1I, T1C, T1Y, T1y, T1u, T24, TU; + T24 = TT - Tz; + TU = Tz + TT; + { + E T22, T1O, T1L, T23; + T22 = T1M + T1N; + T1O = T1M - T1N; + T11 = FNMS(KP866025403, T10, TV); + T1z = FMA(KP866025403, T10, TV); + T1L = FNMS(KP500000000, TU, Tf); + cr[0] = Tf + TU; + T23 = FNMS(KP500000000, T22, T21); + ci[WS(rs, 8)] = T22 + T21; + cr[WS(rs, 3)] = FMA(KP866025403, T1O, T1L); + ci[WS(rs, 2)] = FNMS(KP866025403, T1O, T1L); + ci[WS(rs, 5)] = FMA(KP866025403, T24, T23); + cr[WS(rs, 6)] = FMS(KP866025403, T24, T23); + } + { + E T1B, T1m, T1w, T1f, T1s, T1A, T1b, T1e, T1x, T1t; + T1E = FNMS(KP866025403, T1a, T19); + T1b = FMA(KP866025403, T1a, T19); + T1e = FNMS(KP866025403, T1d, T1c); + T1D = FMA(KP866025403, T1d, T1c); + T1B = FMA(KP866025403, T1l, T1g); + T1m = FNMS(KP866025403, T1l, T1g); + T1X = FNMS(KP866025403, T1S, T1R); + T1T = FMA(KP866025403, T1S, T1R); + T1w = FNMS(KP176326980, T1b, T1e); + T1f = FMA(KP176326980, T1e, T1b); + T1s = FNMS(KP866025403, T1r, T1q); + T1A = FMA(KP866025403, T1r, T1q); + T1x = FMA(KP363970234, T1m, T1s); + T1t = FNMS(KP363970234, T1s, T1m); + T1I = FNMS(KP176326980, T1A, T1B); + T1C = FMA(KP176326980, T1B, T1A); + T1Y = FMA(KP954188894, T1x, T1w); + T1y = FNMS(KP954188894, T1x, T1w); + T20 = FMA(KP954188894, T1t, T1f); + T1u = FNMS(KP954188894, T1t, T1f); + } + { + E T1F, T1J, T1v, T1U, T1K; + ci[WS(rs, 6)] = FNMS(KP984807753, T1Y, T1X); + T1v = FNMS(KP492403876, T1u, T11); + cr[WS(rs, 2)] = FMA(KP984807753, T1u, T11); + T1F = FMA(KP839099631, T1E, T1D); + T1J = FNMS(KP839099631, T1D, T1E); + ci[WS(rs, 3)] = FNMS(KP852868531, T1y, T1v); + ci[0] = FMA(KP852868531, T1y, T1v); + T1U = FNMS(KP777861913, T1J, T1I); + T1K = FMA(KP777861913, T1J, T1I); + { + E T1G, T1W, T1V, T1H; + T1G = FMA(KP777861913, T1F, T1C); + T1W = FNMS(KP777861913, T1F, T1C); + T1Z = FMA(KP492403876, T1Y, T1X); + T1V = FMA(KP492403876, T1U, T1T); + ci[WS(rs, 7)] = FNMS(KP984807753, T1U, T1T); + T1H = FNMS(KP492403876, T1G, T1z); + cr[WS(rs, 1)] = FMA(KP984807753, T1G, T1z); + ci[WS(rs, 4)] = FMA(KP852868531, T1W, T1V); + cr[WS(rs, 7)] = FMS(KP852868531, T1W, T1V); + cr[WS(rs, 4)] = FMA(KP852868531, T1K, T1H); + ci[WS(rs, 1)] = FNMS(KP852868531, T1K, T1H); + } + } + } + } + } + cr[WS(rs, 8)] = -(FMA(KP852868531, T20, T1Z)); + cr[WS(rs, 5)] = FMS(KP852868531, T20, T1Z); + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 9}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 9, "hf_9", twinstr, &GENUS, {24, 16, 72, 0} }; + +void X(codelet_hf_9) (planner *p) { + X(khc2hc_register) (p, hf_9, &desc); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 9 -dit -name hf_9 -include hf.h */ + +/* + * This function contains 96 FP additions, 72 FP multiplications, + * (or, 60 additions, 36 multiplications, 36 fused multiply/add), + * 41 stack variables, 8 constants, and 36 memory accesses + */ +#include "hf.h" + +static void hf_9(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DK(KP642787609, +0.642787609686539326322643409907263432907559884); + DK(KP766044443, +0.766044443118978035202392650555416673935832457); + DK(KP939692620, +0.939692620785908384054109277324731469936208134); + DK(KP342020143, +0.342020143325668733044099614682259580763083368); + DK(KP984807753, +0.984807753012208059366743024589523013670643252); + DK(KP173648177, +0.173648177666930348851716626769314796000375677); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + ((mb - 1) * 16); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) { + E T1, T1B, TQ, T1A, Tc, TN, T1C, T1D, TL, T1x, T19, T1o, T1c, T1n, Tu; + E T1w, TW, T1k, T11, T1l; + { + E T6, TO, Tb, TP; + T1 = cr[0]; + T1B = ci[0]; + { + E T3, T5, T2, T4; + T3 = cr[WS(rs, 3)]; + T5 = ci[WS(rs, 3)]; + T2 = W[4]; + T4 = W[5]; + T6 = FMA(T2, T3, T4 * T5); + TO = FNMS(T4, T3, T2 * T5); + } + { + E T8, Ta, T7, T9; + T8 = cr[WS(rs, 6)]; + Ta = ci[WS(rs, 6)]; + T7 = W[10]; + T9 = W[11]; + Tb = FMA(T7, T8, T9 * Ta); + TP = FNMS(T9, T8, T7 * Ta); + } + TQ = KP866025403 * (TO - TP); + T1A = KP866025403 * (Tb - T6); + Tc = T6 + Tb; + TN = FNMS(KP500000000, Tc, T1); + T1C = TO + TP; + T1D = FNMS(KP500000000, T1C, T1B); + } + { + E Tz, T13, TE, T14, TJ, T15, TK, T16; + { + E Tw, Ty, Tv, Tx; + Tw = cr[WS(rs, 2)]; + Ty = ci[WS(rs, 2)]; + Tv = W[2]; + Tx = W[3]; + Tz = FMA(Tv, Tw, Tx * Ty); + T13 = FNMS(Tx, Tw, Tv * Ty); + } + { + E TB, TD, TA, TC; + TB = cr[WS(rs, 5)]; + TD = ci[WS(rs, 5)]; + TA = W[8]; + TC = W[9]; + TE = FMA(TA, TB, TC * TD); + T14 = FNMS(TC, TB, TA * TD); + } + { + E TG, TI, TF, TH; + TG = cr[WS(rs, 8)]; + TI = ci[WS(rs, 8)]; + TF = W[14]; + TH = W[15]; + TJ = FMA(TF, TG, TH * TI); + T15 = FNMS(TH, TG, TF * TI); + } + TK = TE + TJ; + T16 = T14 + T15; + TL = Tz + TK; + T1x = T13 + T16; + { + E T17, T18, T1a, T1b; + T17 = FNMS(KP500000000, T16, T13); + T18 = KP866025403 * (TJ - TE); + T19 = T17 - T18; + T1o = T18 + T17; + T1a = FNMS(KP500000000, TK, Tz); + T1b = KP866025403 * (T14 - T15); + T1c = T1a - T1b; + T1n = T1a + T1b; + } + } + { + E Ti, TX, Tn, TT, Ts, TU, Tt, TY; + { + E Tf, Th, Te, Tg; + Tf = cr[WS(rs, 1)]; + Th = ci[WS(rs, 1)]; + Te = W[0]; + Tg = W[1]; + Ti = FMA(Te, Tf, Tg * Th); + TX = FNMS(Tg, Tf, Te * Th); + } + { + E Tk, Tm, Tj, Tl; + Tk = cr[WS(rs, 4)]; + Tm = ci[WS(rs, 4)]; + Tj = W[6]; + Tl = W[7]; + Tn = FMA(Tj, Tk, Tl * Tm); + TT = FNMS(Tl, Tk, Tj * Tm); + } + { + E Tp, Tr, To, Tq; + Tp = cr[WS(rs, 7)]; + Tr = ci[WS(rs, 7)]; + To = W[12]; + Tq = W[13]; + Ts = FMA(To, Tp, Tq * Tr); + TU = FNMS(Tq, Tp, To * Tr); + } + Tt = Tn + Ts; + TY = TT + TU; + Tu = Ti + Tt; + T1w = TX + TY; + { + E TS, TV, TZ, T10; + TS = FNMS(KP500000000, Tt, Ti); + TV = KP866025403 * (TT - TU); + TW = TS - TV; + T1k = TS + TV; + TZ = FNMS(KP500000000, TY, TX); + T10 = KP866025403 * (Ts - Tn); + T11 = TZ - T10; + T1l = T10 + TZ; + } + } + { + E T1y, Td, TM, T1v; + T1y = KP866025403 * (T1w - T1x); + Td = T1 + Tc; + TM = Tu + TL; + T1v = FNMS(KP500000000, TM, Td); + cr[0] = Td + TM; + cr[WS(rs, 3)] = T1v + T1y; + ci[WS(rs, 2)] = T1v - T1y; + } + { + E TR, T1I, T1e, T1K, T1i, T1H, T1f, T1J; + TR = TN - TQ; + T1I = T1D - T1A; + { + E T12, T1d, T1g, T1h; + T12 = FMA(KP173648177, TW, KP984807753 * T11); + T1d = FNMS(KP939692620, T1c, KP342020143 * T19); + T1e = T12 + T1d; + T1K = KP866025403 * (T1d - T12); + T1g = FNMS(KP984807753, TW, KP173648177 * T11); + T1h = FMA(KP342020143, T1c, KP939692620 * T19); + T1i = KP866025403 * (T1g + T1h); + T1H = T1g - T1h; + } + cr[WS(rs, 2)] = TR + T1e; + ci[WS(rs, 6)] = T1H + T1I; + T1f = FNMS(KP500000000, T1e, TR); + ci[0] = T1f - T1i; + ci[WS(rs, 3)] = T1f + T1i; + T1J = FMS(KP500000000, T1H, T1I); + cr[WS(rs, 5)] = T1J - T1K; + cr[WS(rs, 8)] = T1K + T1J; + } + { + E T1L, T1M, T1N, T1O; + T1L = KP866025403 * (TL - Tu); + T1M = T1C + T1B; + T1N = T1w + T1x; + T1O = FNMS(KP500000000, T1N, T1M); + cr[WS(rs, 6)] = T1L - T1O; + ci[WS(rs, 8)] = T1N + T1M; + ci[WS(rs, 5)] = T1L + T1O; + } + { + E T1j, T1E, T1q, T1z, T1u, T1F, T1r, T1G; + T1j = TN + TQ; + T1E = T1A + T1D; + { + E T1m, T1p, T1s, T1t; + T1m = FMA(KP766044443, T1k, KP642787609 * T1l); + T1p = FMA(KP173648177, T1n, KP984807753 * T1o); + T1q = T1m + T1p; + T1z = KP866025403 * (T1p - T1m); + T1s = FNMS(KP642787609, T1k, KP766044443 * T1l); + T1t = FNMS(KP984807753, T1n, KP173648177 * T1o); + T1u = KP866025403 * (T1s - T1t); + T1F = T1s + T1t; + } + cr[WS(rs, 1)] = T1j + T1q; + T1r = FNMS(KP500000000, T1q, T1j); + ci[WS(rs, 1)] = T1r - T1u; + cr[WS(rs, 4)] = T1r + T1u; + ci[WS(rs, 7)] = T1F + T1E; + T1G = FNMS(KP500000000, T1F, T1E); + cr[WS(rs, 7)] = T1z - T1G; + ci[WS(rs, 4)] = T1z + T1G; + } + } + } +} + +static const tw_instr twinstr[] = { + {TW_FULL, 1, 9}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 9, "hf_9", twinstr, &GENUS, {60, 36, 36, 0} }; + +void X(codelet_hf_9) (planner *p) { + X(khc2hc_register) (p, hf_9, &desc); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:18 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 10 -name r2cfII_10 -dft-II -include r2cfII.h */ + +/* + * This function contains 32 FP additions, 18 FP multiplications, + * (or, 14 additions, 0 multiplications, 18 fused multiply/add), + * 37 stack variables, 4 constants, and 20 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_10(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(40, rs), MAKE_VOLATILE_STRIDE(40, csr), MAKE_VOLATILE_STRIDE(40, csi)) { + E Tq, Ti, Tk, Tu, Tw, Tp, Tb, Tj, Tr, Tv; + { + E T1, To, Ts, Tt, T8, Ta, Te, Tm, Tl, Th, Tn, T9; + T1 = R0[0]; + To = R1[WS(rs, 2)]; + { + E T2, T3, T5, T6; + T2 = R0[WS(rs, 2)]; + T3 = R0[WS(rs, 3)]; + T5 = R0[WS(rs, 4)]; + T6 = R0[WS(rs, 1)]; + { + E Tc, T4, T7, Td, Tf, Tg; + Tc = R1[0]; + Ts = T2 + T3; + T4 = T2 - T3; + Tt = T5 + T6; + T7 = T5 - T6; + Td = R1[WS(rs, 4)]; + Tf = R1[WS(rs, 1)]; + Tg = R1[WS(rs, 3)]; + T8 = T4 + T7; + Ta = T4 - T7; + Te = Tc - Td; + Tm = Tc + Td; + Tl = Tf + Tg; + Th = Tf - Tg; + } + } + Cr[WS(csr, 2)] = T1 + T8; + Tn = Tl - Tm; + Tq = Tm + Tl; + Ti = FMA(KP618033988, Th, Te); + Tk = FNMS(KP618033988, Te, Th); + Ci[WS(csi, 2)] = Tn - To; + T9 = FNMS(KP250000000, T8, T1); + Tu = FMA(KP618033988, Tt, Ts); + Tw = FNMS(KP618033988, Ts, Tt); + Tp = FMA(KP250000000, Tn, To); + Tb = FMA(KP559016994, Ta, T9); + Tj = FNMS(KP559016994, Ta, T9); + } + Tr = FMA(KP559016994, Tq, Tp); + Tv = FNMS(KP559016994, Tq, Tp); + Cr[WS(csr, 1)] = FNMS(KP951056516, Tk, Tj); + Cr[WS(csr, 3)] = FMA(KP951056516, Tk, Tj); + Cr[0] = FMA(KP951056516, Ti, Tb); + Cr[WS(csr, 4)] = FNMS(KP951056516, Ti, Tb); + Ci[WS(csi, 1)] = FNMS(KP951056516, Tw, Tv); + Ci[WS(csi, 3)] = FMA(KP951056516, Tw, Tv); + Ci[WS(csi, 4)] = FMS(KP951056516, Tu, Tr); + Ci[0] = -(FMA(KP951056516, Tu, Tr)); + } + } +} + +static const kr2c_desc desc = { 10, "r2cfII_10", {14, 0, 18, 0}, &GENUS }; + +void X(codelet_r2cfII_10) (planner *p) { + X(kr2c_register) (p, r2cfII_10, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 10 -name r2cfII_10 -dft-II -include r2cfII.h */ + +/* + * This function contains 32 FP additions, 12 FP multiplications, + * (or, 26 additions, 6 multiplications, 6 fused multiply/add), + * 21 stack variables, 4 constants, and 20 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_10(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(40, rs), MAKE_VOLATILE_STRIDE(40, csr), MAKE_VOLATILE_STRIDE(40, csi)) { + E T1, To, T8, Tq, T9, Tp, Te, Ts, Th, Tn; + T1 = R0[0]; + To = R1[WS(rs, 2)]; + { + E T2, T3, T4, T5, T6, T7; + T2 = R0[WS(rs, 2)]; + T3 = R0[WS(rs, 3)]; + T4 = T2 - T3; + T5 = R0[WS(rs, 4)]; + T6 = R0[WS(rs, 1)]; + T7 = T5 - T6; + T8 = T4 + T7; + Tq = T5 + T6; + T9 = KP559016994 * (T4 - T7); + Tp = T2 + T3; + } + { + E Tc, Td, Tm, Tf, Tg, Tl; + Tc = R1[0]; + Td = R1[WS(rs, 4)]; + Tm = Tc + Td; + Tf = R1[WS(rs, 1)]; + Tg = R1[WS(rs, 3)]; + Tl = Tf + Tg; + Te = Tc - Td; + Ts = KP559016994 * (Tm + Tl); + Th = Tf - Tg; + Tn = Tl - Tm; + } + Cr[WS(csr, 2)] = T1 + T8; + Ci[WS(csi, 2)] = Tn - To; + { + E Ti, Tk, Tb, Tj, Ta; + Ti = FMA(KP951056516, Te, KP587785252 * Th); + Tk = FNMS(KP587785252, Te, KP951056516 * Th); + Ta = FNMS(KP250000000, T8, T1); + Tb = T9 + Ta; + Tj = Ta - T9; + Cr[WS(csr, 4)] = Tb - Ti; + Cr[WS(csr, 3)] = Tj + Tk; + Cr[0] = Tb + Ti; + Cr[WS(csr, 1)] = Tj - Tk; + } + { + E Tr, Tw, Tu, Tv, Tt; + Tr = FMA(KP951056516, Tp, KP587785252 * Tq); + Tw = FNMS(KP587785252, Tp, KP951056516 * Tq); + Tt = FMA(KP250000000, Tn, To); + Tu = Ts + Tt; + Tv = Tt - Ts; + Ci[0] = -(Tr + Tu); + Ci[WS(csi, 3)] = Tw + Tv; + Ci[WS(csi, 4)] = Tr - Tu; + Ci[WS(csi, 1)] = Tv - Tw; + } + } + } +} + +static const kr2c_desc desc = { 10, "r2cfII_10", {26, 6, 6, 0}, &GENUS }; + +void X(codelet_r2cfII_10) (planner *p) { + X(kr2c_register) (p, r2cfII_10, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:18 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 12 -name r2cfII_12 -dft-II -include r2cfII.h */ + +/* + * This function contains 45 FP additions, 24 FP multiplications, + * (or, 21 additions, 0 multiplications, 24 fused multiply/add), + * 37 stack variables, 3 constants, and 24 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) { + E TD, TB, Tp, T9, Tq, Tr, TE, To, Ts, TC; + { + E T8, T1, Tv, Tm, TF, Tz, Tl, Ta, Tb, Tt, TA, T4, Tc; + { + E Tx, Th, Ti, Tj, Ty, T6, T7, T2, T3, Tk; + Tx = R0[WS(rs, 3)]; + T6 = R0[WS(rs, 5)]; + T7 = R0[WS(rs, 1)]; + Th = R1[WS(rs, 4)]; + Ti = R1[WS(rs, 2)]; + Tj = R1[0]; + Ty = T6 + T7; + T8 = T6 - T7; + T1 = R0[0]; + Tv = Ti - Tj - Th; + Tk = Ti - Tj; + Tm = Ti + Tj; + TF = Tx - Ty; + Tz = FMA(KP500000000, Ty, Tx); + T2 = R0[WS(rs, 2)]; + T3 = R0[WS(rs, 4)]; + Tl = FMA(KP500000000, Tk, Th); + Ta = R1[WS(rs, 1)]; + Tb = R1[WS(rs, 3)]; + Tt = T1 + T3 - T2; + TA = T3 + T2; + T4 = T2 - T3; + Tc = R1[WS(rs, 5)]; + } + { + E Tn, Tg, T5, Tu; + TD = FNMS(KP866025403, TA, Tz); + TB = FMA(KP866025403, TA, Tz); + T5 = FMA(KP500000000, T4, T1); + Tu = Ta + Tc - Tb; + { + E Td, Tf, TG, Tw, Te; + Td = Tb - Tc; + Tf = Tc + Tb; + Tp = FMA(KP866025403, T8, T5); + T9 = FNMS(KP866025403, T8, T5); + TG = Tv - Tu; + Tw = Tu + Tv; + Te = FMA(KP500000000, Td, Ta); + Tq = FMA(KP866025403, Tm, Tl); + Tn = FNMS(KP866025403, Tm, Tl); + Ci[WS(csi, 1)] = FMA(KP707106781, TG, TF); + Ci[WS(csi, 4)] = FMS(KP707106781, TG, TF); + Cr[WS(csr, 4)] = FMA(KP707106781, Tw, Tt); + Cr[WS(csr, 1)] = FNMS(KP707106781, Tw, Tt); + Tg = FNMS(KP866025403, Tf, Te); + Tr = FMA(KP866025403, Tf, Te); + } + TE = Tg + Tn; + To = Tg - Tn; + } + } + Ci[WS(csi, 2)] = FMS(KP707106781, TE, TD); + Ci[WS(csi, 3)] = FMA(KP707106781, TE, TD); + Cr[0] = FMA(KP707106781, To, T9); + Cr[WS(csr, 5)] = FNMS(KP707106781, To, T9); + Ts = Tq - Tr; + TC = Tr + Tq; + Ci[0] = -(FMA(KP707106781, TC, TB)); + Ci[WS(csi, 5)] = FNMS(KP707106781, TC, TB); + Cr[WS(csr, 2)] = FMA(KP707106781, Ts, Tp); + Cr[WS(csr, 3)] = FNMS(KP707106781, Ts, Tp); + } + } +} + +static const kr2c_desc desc = { 12, "r2cfII_12", {21, 0, 24, 0}, &GENUS }; + +void X(codelet_r2cfII_12) (planner *p) { + X(kr2c_register) (p, r2cfII_12, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 12 -name r2cfII_12 -dft-II -include r2cfII.h */ + +/* + * This function contains 43 FP additions, 12 FP multiplications, + * (or, 39 additions, 8 multiplications, 4 fused multiply/add), + * 28 stack variables, 5 constants, and 24 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP353553390, +0.353553390593273762200422181052424519642417969); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP612372435, +0.612372435695794524549321018676472847991486870); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) { + E Tx, Tg, T4, Tz, Ty, Tj, TA, T9, Tm, Tl, Te, Tp, To, Tf, TE; + E TF; + { + E T1, T3, T2, Th, Ti; + T1 = R0[0]; + T3 = R0[WS(rs, 2)]; + T2 = R0[WS(rs, 4)]; + Tx = KP866025403 * (T2 + T3); + Tg = FMA(KP500000000, T3 - T2, T1); + T4 = T1 + T2 - T3; + Tz = R0[WS(rs, 3)]; + Th = R0[WS(rs, 5)]; + Ti = R0[WS(rs, 1)]; + Ty = Th + Ti; + Tj = KP866025403 * (Th - Ti); + TA = FMA(KP500000000, Ty, Tz); + } + { + E T5, T6, T7, T8; + T5 = R1[WS(rs, 1)]; + T6 = R1[WS(rs, 5)]; + T7 = R1[WS(rs, 3)]; + T8 = T6 - T7; + T9 = T5 + T8; + Tm = KP612372435 * (T6 + T7); + Tl = FNMS(KP353553390, T8, KP707106781 * T5); + } + { + E Td, Ta, Tb, Tc; + Td = R1[WS(rs, 4)]; + Ta = R1[WS(rs, 2)]; + Tb = R1[0]; + Tc = Ta - Tb; + Te = Tc - Td; + Tp = FMA(KP353553390, Tc, KP707106781 * Td); + To = KP612372435 * (Ta + Tb); + } + Tf = KP707106781 * (T9 + Te); + Cr[WS(csr, 1)] = T4 - Tf; + Cr[WS(csr, 4)] = T4 + Tf; + TE = KP707106781 * (Te - T9); + TF = Tz - Ty; + Ci[WS(csi, 4)] = TE - TF; + Ci[WS(csi, 1)] = TE + TF; + { + E Tk, TB, Tr, Tw, Tn, Tq; + Tk = Tg - Tj; + TB = Tx - TA; + Tn = Tl - Tm; + Tq = To - Tp; + Tr = Tn + Tq; + Tw = Tn - Tq; + Cr[WS(csr, 5)] = Tk - Tr; + Ci[WS(csi, 2)] = Tw + TB; + Cr[0] = Tk + Tr; + Ci[WS(csi, 3)] = Tw - TB; + } + { + E Ts, TD, Tv, TC, Tt, Tu; + Ts = Tg + Tj; + TD = Tx + TA; + Tt = To + Tp; + Tu = Tm + Tl; + Tv = Tt - Tu; + TC = Tu + Tt; + Cr[WS(csr, 3)] = Ts - Tv; + Ci[WS(csi, 5)] = TD - TC; + Cr[WS(csr, 2)] = Ts + Tv; + Ci[0] = -(TC + TD); + } + } + } +} + +static const kr2c_desc desc = { 12, "r2cfII_12", {39, 8, 4, 0}, &GENUS }; + +void X(codelet_r2cfII_12) (planner *p) { + X(kr2c_register) (p, r2cfII_12, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:19 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 15 -name r2cfII_15 -dft-II -include r2cfII.h */ + +/* + * This function contains 72 FP additions, 41 FP multiplications, + * (or, 38 additions, 7 multiplications, 34 fused multiply/add), + * 57 stack variables, 12 constants, and 30 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP823639103, +0.823639103546331925877420039278190003029660514); + DK(KP910592997, +0.910592997310029334643087372129977886038870291); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP690983005, +0.690983005625052575897706582817180941139845410); + DK(KP552786404, +0.552786404500042060718165266253744752911876328); + DK(KP447213595, +0.447213595499957939281834733746255247088123672); + DK(KP809016994, +0.809016994374947424102293417182819058860154590); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) { + E T9, TQ, TV, TW, Tw, TJ; + { + E Ta, Tl, Tg, T8, T7, TF, TX, TT, Tm, Th, TM, TZ, Tr, Tn, Tj; + E Tz, To, TN, TH, Tp, TO; + Ta = R0[WS(rs, 5)]; + Tl = R1[WS(rs, 2)]; + { + E T1, T2, T5, T3, T4; + T1 = R0[0]; + T2 = R0[WS(rs, 3)]; + T5 = R1[WS(rs, 4)]; + T3 = R0[WS(rs, 6)]; + T4 = R1[WS(rs, 1)]; + { + E Tb, TL, Te, TK, TR, Tf, Ti, Ty; + Tb = R1[0]; + TR = T2 + T5; + Tg = R0[WS(rs, 2)]; + { + E T6, TS, Tc, Td; + T6 = T2 + T3 - T4 - T5; + T8 = (T3 + T5 - T2) - T4; + TS = T3 + T4; + Tc = R1[WS(rs, 3)]; + Td = R1[WS(rs, 6)]; + T7 = FNMS(KP250000000, T6, T1); + TF = T1 + T6; + TX = FNMS(KP618033988, TR, TS); + TT = FMA(KP618033988, TS, TR); + TL = Tc - Td; + Te = Tc + Td; + } + TK = Tg + Tb; + Tm = R0[WS(rs, 7)]; + Tf = Tb - Te; + Th = Tb + Te; + TM = FMA(KP618033988, TL, TK); + TZ = FNMS(KP618033988, TK, TL); + Ti = FMA(KP809016994, Th, Tg); + Ty = FMA(KP447213595, Th, Tf); + Tr = R1[WS(rs, 5)]; + Tn = R0[WS(rs, 1)]; + Tj = FNMS(KP552786404, Ti, Tf); + Tz = FNMS(KP690983005, Ty, Tg); + To = R0[WS(rs, 4)]; + TN = Tr + Tm; + } + } + TH = Ta + Tg - Th; + Tp = Tn + To; + TO = To - Tn; + { + E Tx, TA, TP, T14, T11, Tu, TD; + { + E T10, TI, TC, TY; + T9 = FNMS(KP559016994, T8, T7); + Tx = FMA(KP559016994, T8, T7); + TA = FNMS(KP809016994, Tz, Ta); + TP = FMA(KP618033988, TO, TN); + TY = FNMS(KP618033988, TN, TO); + { + E Tq, Ts, TG, Tt, TB; + Tq = Tm - Tp; + Ts = Tm + Tp; + T14 = TZ - TY; + T10 = TY + TZ; + TG = Ts - Tr - Tl; + Tt = FMA(KP809016994, Ts, Tr); + TB = FMA(KP447213595, Ts, Tq); + T11 = FMA(KP500000000, T10, TX); + Ci[WS(csi, 2)] = KP866025403 * (TH - TG); + TI = TG + TH; + Tu = FNMS(KP552786404, Tt, Tq); + TC = FNMS(KP690983005, TB, Tr); + } + Ci[WS(csi, 1)] = KP951056516 * (T10 - TX); + Cr[WS(csr, 7)] = TF + TI; + Cr[WS(csr, 2)] = FNMS(KP500000000, TI, TF); + TD = FNMS(KP809016994, TC, Tl); + } + { + E TU, Tk, T13, Tv, T12, TE; + TQ = TM - TP; + TU = TP + TM; + T12 = TD + TA; + TE = TA - TD; + Tk = FNMS(KP559016994, Tj, Ta); + TV = FMA(KP500000000, TU, TT); + Ci[WS(csi, 6)] = -(KP951056516 * (FMA(KP910592997, T12, T11))); + Ci[WS(csi, 3)] = KP951056516 * (FNMS(KP910592997, T12, T11)); + T13 = FNMS(KP500000000, TE, Tx); + Cr[WS(csr, 1)] = Tx + TE; + Tv = FNMS(KP559016994, Tu, Tl); + Ci[WS(csi, 4)] = KP951056516 * (TT - TU); + Cr[WS(csr, 6)] = FMA(KP823639103, T14, T13); + Cr[WS(csr, 3)] = FNMS(KP823639103, T14, T13); + TW = Tv + Tk; + Tw = Tk - Tv; + } + } + } + Ci[WS(csi, 5)] = -(KP951056516 * (FNMS(KP910592997, TW, TV))); + Ci[0] = -(KP951056516 * (FMA(KP910592997, TW, TV))); + TJ = FNMS(KP500000000, Tw, T9); + Cr[WS(csr, 4)] = T9 + Tw; + Cr[0] = FMA(KP823639103, TQ, TJ); + Cr[WS(csr, 5)] = FNMS(KP823639103, TQ, TJ); + } + } +} + +static const kr2c_desc desc = { 15, "r2cfII_15", {38, 7, 34, 0}, &GENUS }; + +void X(codelet_r2cfII_15) (planner *p) { + X(kr2c_register) (p, r2cfII_15, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 15 -name r2cfII_15 -dft-II -include r2cfII.h */ + +/* + * This function contains 72 FP additions, 33 FP multiplications, + * (or, 54 additions, 15 multiplications, 18 fused multiply/add), + * 37 stack variables, 8 constants, and 30 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP809016994, +0.809016994374947424102293417182819058860154590); + DK(KP309016994, +0.309016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) { + E T1, T2, Tx, TR, TE, T7, TD, Th, Tm, Tr, TQ, TA, TB, Tf, Te; + E Tu, TS, Td, TH, TO; + T1 = R0[WS(rs, 5)]; + { + E T3, Tv, T6, Tw, T4, T5; + T2 = R0[WS(rs, 2)]; + T3 = R1[0]; + Tv = T2 + T3; + T4 = R1[WS(rs, 3)]; + T5 = R1[WS(rs, 6)]; + T6 = T4 + T5; + Tw = T4 - T5; + Tx = FMA(KP951056516, Tv, KP587785252 * Tw); + TR = FNMS(KP587785252, Tv, KP951056516 * Tw); + TE = KP559016994 * (T3 - T6); + T7 = T3 + T6; + TD = KP250000000 * T7; + } + { + E Ti, Tl, Tj, Tk, Tp, Tq; + Th = R0[0]; + Ti = R1[WS(rs, 4)]; + Tl = R0[WS(rs, 6)]; + Tj = R1[WS(rs, 1)]; + Tk = R0[WS(rs, 3)]; + Tp = Tk + Ti; + Tq = Tl + Tj; + Tm = Ti + Tj - (Tk + Tl); + Tr = FMA(KP951056516, Tp, KP587785252 * Tq); + TQ = FNMS(KP951056516, Tq, KP587785252 * Tp); + TA = FMA(KP250000000, Tm, Th); + TB = KP559016994 * (Tl + Ti - (Tk + Tj)); + } + { + E T9, Tt, Tc, Ts, Ta, Tb, TG; + Tf = R1[WS(rs, 2)]; + T9 = R0[WS(rs, 7)]; + Te = R1[WS(rs, 5)]; + Tt = T9 + Te; + Ta = R0[WS(rs, 1)]; + Tb = R0[WS(rs, 4)]; + Tc = Ta + Tb; + Ts = Ta - Tb; + Tu = FNMS(KP951056516, Tt, KP587785252 * Ts); + TS = FMA(KP951056516, Ts, KP587785252 * Tt); + Td = T9 + Tc; + TG = KP559016994 * (T9 - Tc); + TH = FNMS(KP309016994, Te, TG) + FNMA(KP250000000, Td, Tf); + TO = FMS(KP809016994, Te, Tf) + FNMA(KP250000000, Td, TG); + } + { + E Tn, T8, Tg, To; + Tn = Th - Tm; + T8 = T1 + T2 - T7; + Tg = Td - Te - Tf; + To = T8 + Tg; + Ci[WS(csi, 2)] = KP866025403 * (T8 - Tg); + Cr[WS(csr, 2)] = FNMS(KP500000000, To, Tn); + Cr[WS(csr, 7)] = Tn + To; + } + { + E TM, TX, TT, TV, TP, TU, TN, TW; + TM = TB + TA; + TX = KP866025403 * (TR + TS); + TT = TR - TS; + TV = FMS(KP500000000, TT, TQ); + TN = T1 + TE + FNMS(KP809016994, T2, TD); + TP = TN + TO; + TU = KP866025403 * (TO - TN); + Cr[WS(csr, 1)] = TM + TP; + Ci[WS(csi, 1)] = TQ + TT; + Ci[WS(csi, 6)] = TU - TV; + Ci[WS(csi, 3)] = TU + TV; + TW = FNMS(KP500000000, TP, TM); + Cr[WS(csr, 3)] = TW - TX; + Cr[WS(csr, 6)] = TW + TX; + } + { + E Tz, TC, Ty, TK, TI, TL, TF, TJ; + Tz = KP866025403 * (Tx + Tu); + TC = TA - TB; + Ty = Tu - Tx; + TK = FMS(KP500000000, Ty, Tr); + TF = FMA(KP309016994, T2, T1) + TD - TE; + TI = TF + TH; + TL = KP866025403 * (TH - TF); + Ci[WS(csi, 4)] = Tr + Ty; + Cr[WS(csr, 4)] = TC + TI; + Ci[WS(csi, 5)] = TK - TL; + Ci[0] = TK + TL; + TJ = FNMS(KP500000000, TI, TC); + Cr[0] = Tz + TJ; + Cr[WS(csr, 5)] = TJ - Tz; + } + } + } +} + +static const kr2c_desc desc = { 15, "r2cfII_15", {54, 15, 18, 0}, &GENUS }; + +void X(codelet_r2cfII_15) (planner *p) { + X(kr2c_register) (p, r2cfII_15, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,308 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:19 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 16 -name r2cfII_16 -dft-II -include r2cfII.h */ + +/* + * This function contains 66 FP additions, 48 FP multiplications, + * (or, 18 additions, 0 multiplications, 48 fused multiply/add), + * 54 stack variables, 7 constants, and 32 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_16(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(64, rs), MAKE_VOLATILE_STRIDE(64, csr), MAKE_VOLATILE_STRIDE(64, csi)) { + E TN, TF, TX, TV, TO, TP, TY, TM, TQ, TW; + { + E TT, TZ, TB, T5, Tu, TK, TJ, Tr, T9, TC, T8, Tl, TH, TG, Ti; + E Ta; + { + E T1, TR, Tn, Ts, To, TS, T4, Tp, T2, T3; + T1 = R0[0]; + TR = R0[WS(rs, 4)]; + T2 = R0[WS(rs, 2)]; + T3 = R0[WS(rs, 6)]; + Tn = R1[WS(rs, 7)]; + Ts = R1[WS(rs, 3)]; + To = R1[WS(rs, 1)]; + TS = T2 + T3; + T4 = T2 - T3; + Tp = R1[WS(rs, 5)]; + { + E Te, Tj, Tf, Tg, Tt, Tq; + Te = R1[0]; + TT = FMA(KP707106781, TS, TR); + TZ = FNMS(KP707106781, TS, TR); + TB = FMA(KP707106781, T4, T1); + T5 = FNMS(KP707106781, T4, T1); + Tt = To + Tp; + Tq = To - Tp; + Tj = R1[WS(rs, 4)]; + Tf = R1[WS(rs, 2)]; + Tu = FNMS(KP707106781, Tt, Ts); + TK = FMA(KP707106781, Tt, Ts); + TJ = FMS(KP707106781, Tq, Tn); + Tr = FMA(KP707106781, Tq, Tn); + Tg = R1[WS(rs, 6)]; + { + E T6, T7, Tk, Th; + T6 = R0[WS(rs, 5)]; + T7 = R0[WS(rs, 1)]; + T9 = R0[WS(rs, 3)]; + Tk = Tf + Tg; + Th = Tf - Tg; + TC = FNMS(KP414213562, T6, T7); + T8 = FMA(KP414213562, T7, T6); + Tl = FNMS(KP707106781, Tk, Tj); + TH = FMA(KP707106781, Tk, Tj); + TG = FMA(KP707106781, Th, Te); + Ti = FNMS(KP707106781, Th, Te); + Ta = R0[WS(rs, 7)]; + } + } + } + { + E TE, TU, Ty, Tv, TI, TL; + Ty = FNMS(KP668178637, Tr, Tu); + Tv = FMA(KP668178637, Tu, Tr); + { + E Tw, T14, T12, TA, T11, T13, Tx, Td; + { + E Tz, Tm, TD, Tb, T10, Tc; + Tz = FNMS(KP668178637, Ti, Tl); + Tm = FMA(KP668178637, Tl, Ti); + TD = FMS(KP414213562, T9, Ta); + Tb = FMA(KP414213562, Ta, T9); + Tw = Tm - Tv; + T14 = Tm + Tv; + T10 = TD - TC; + TE = TC + TD; + Tc = T8 - Tb; + TU = T8 + Tb; + T12 = Tz + Ty; + TA = Ty - Tz; + T11 = FMA(KP923879532, T10, TZ); + T13 = FNMS(KP923879532, T10, TZ); + Tx = FNMS(KP923879532, Tc, T5); + Td = FMA(KP923879532, Tc, T5); + } + Ci[WS(csi, 2)] = -(FMA(KP831469612, T14, T13)); + Ci[WS(csi, 5)] = FNMS(KP831469612, T14, T13); + Cr[WS(csr, 1)] = FMA(KP831469612, Tw, Td); + Cr[WS(csr, 6)] = FNMS(KP831469612, Tw, Td); + Cr[WS(csr, 5)] = FNMS(KP831469612, TA, Tx); + Ci[WS(csi, 1)] = FMA(KP831469612, T12, T11); + Cr[WS(csr, 2)] = FMA(KP831469612, TA, Tx); + Ci[WS(csi, 6)] = FMS(KP831469612, T12, T11); + } + TN = FNMS(KP923879532, TE, TB); + TF = FMA(KP923879532, TE, TB); + TX = FNMS(KP923879532, TU, TT); + TV = FMA(KP923879532, TU, TT); + TO = FMA(KP198912367, TG, TH); + TI = FNMS(KP198912367, TH, TG); + TL = FMA(KP198912367, TK, TJ); + TP = FNMS(KP198912367, TJ, TK); + TY = TL - TI; + TM = TI + TL; + } + } + Ci[WS(csi, 4)] = FMS(KP980785280, TY, TX); + Ci[WS(csi, 3)] = FMA(KP980785280, TY, TX); + Cr[0] = FMA(KP980785280, TM, TF); + Cr[WS(csr, 7)] = FNMS(KP980785280, TM, TF); + TQ = TO - TP; + TW = TO + TP; + Ci[0] = -(FMA(KP980785280, TW, TV)); + Ci[WS(csi, 7)] = FNMS(KP980785280, TW, TV); + Cr[WS(csr, 3)] = FMA(KP980785280, TQ, TN); + Cr[WS(csr, 4)] = FNMS(KP980785280, TQ, TN); + } + } +} + +static const kr2c_desc desc = { 16, "r2cfII_16", {18, 0, 48, 0}, &GENUS }; + +void X(codelet_r2cfII_16) (planner *p) { + X(kr2c_register) (p, r2cfII_16, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 16 -name r2cfII_16 -dft-II -include r2cfII.h */ + +/* + * This function contains 66 FP additions, 30 FP multiplications, + * (or, 54 additions, 18 multiplications, 12 fused multiply/add), + * 32 stack variables, 7 constants, and 32 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_16(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(64, rs), MAKE_VOLATILE_STRIDE(64, csr), MAKE_VOLATILE_STRIDE(64, csi)) { + E T5, T11, TB, TV, Tr, TK, Tu, TJ, Ti, TH, Tl, TG, Tc, T10, TE; + E TS; + { + E T1, TU, T4, TT, T2, T3; + T1 = R0[0]; + TU = R0[WS(rs, 4)]; + T2 = R0[WS(rs, 2)]; + T3 = R0[WS(rs, 6)]; + T4 = KP707106781 * (T2 - T3); + TT = KP707106781 * (T2 + T3); + T5 = T1 + T4; + T11 = TU - TT; + TB = T1 - T4; + TV = TT + TU; + } + { + E Tq, Tt, Tp, Ts, Tn, To; + Tq = R1[WS(rs, 7)]; + Tt = R1[WS(rs, 3)]; + Tn = R1[WS(rs, 1)]; + To = R1[WS(rs, 5)]; + Tp = KP707106781 * (Tn - To); + Ts = KP707106781 * (Tn + To); + Tr = Tp - Tq; + TK = Tt - Ts; + Tu = Ts + Tt; + TJ = Tp + Tq; + } + { + E Te, Tk, Th, Tj, Tf, Tg; + Te = R1[0]; + Tk = R1[WS(rs, 4)]; + Tf = R1[WS(rs, 2)]; + Tg = R1[WS(rs, 6)]; + Th = KP707106781 * (Tf - Tg); + Tj = KP707106781 * (Tf + Tg); + Ti = Te + Th; + TH = Tk - Tj; + Tl = Tj + Tk; + TG = Te - Th; + } + { + E T8, TC, Tb, TD; + { + E T6, T7, T9, Ta; + T6 = R0[WS(rs, 1)]; + T7 = R0[WS(rs, 5)]; + T8 = FNMS(KP382683432, T7, KP923879532 * T6); + TC = FMA(KP382683432, T6, KP923879532 * T7); + T9 = R0[WS(rs, 3)]; + Ta = R0[WS(rs, 7)]; + Tb = FNMS(KP923879532, Ta, KP382683432 * T9); + TD = FMA(KP923879532, T9, KP382683432 * Ta); + } + Tc = T8 + Tb; + T10 = Tb - T8; + TE = TC - TD; + TS = TC + TD; + } + { + E Td, TW, Tw, TR, Tm, Tv; + Td = T5 - Tc; + TW = TS + TV; + Tm = FMA(KP195090322, Ti, KP980785280 * Tl); + Tv = FNMS(KP980785280, Tu, KP195090322 * Tr); + Tw = Tm + Tv; + TR = Tv - Tm; + Cr[WS(csr, 4)] = Td - Tw; + Ci[WS(csi, 7)] = TR + TW; + Cr[WS(csr, 3)] = Td + Tw; + Ci[0] = TR - TW; + } + { + E Tx, TY, TA, TX, Ty, Tz; + Tx = T5 + Tc; + TY = TV - TS; + Ty = FNMS(KP195090322, Tl, KP980785280 * Ti); + Tz = FMA(KP980785280, Tr, KP195090322 * Tu); + TA = Ty + Tz; + TX = Tz - Ty; + Cr[WS(csr, 7)] = Tx - TA; + Ci[WS(csi, 3)] = TX + TY; + Cr[0] = Tx + TA; + Ci[WS(csi, 4)] = TX - TY; + } + { + E TF, T12, TM, TZ, TI, TL; + TF = TB + TE; + T12 = T10 - T11; + TI = FMA(KP831469612, TG, KP555570233 * TH); + TL = FMA(KP831469612, TJ, KP555570233 * TK); + TM = TI - TL; + TZ = TI + TL; + Cr[WS(csr, 6)] = TF - TM; + Ci[WS(csi, 2)] = T12 - TZ; + Cr[WS(csr, 1)] = TF + TM; + Ci[WS(csi, 5)] = -(TZ + T12); + } + { + E TN, T14, TQ, T13, TO, TP; + TN = TB - TE; + T14 = T10 + T11; + TO = FNMS(KP555570233, TJ, KP831469612 * TK); + TP = FNMS(KP555570233, TG, KP831469612 * TH); + TQ = TO - TP; + T13 = TP + TO; + Cr[WS(csr, 5)] = TN - TQ; + Ci[WS(csi, 1)] = T13 + T14; + Cr[WS(csr, 2)] = TN + TQ; + Ci[WS(csi, 6)] = T13 - T14; + } + } + } +} + +static const kr2c_desc desc = { 16, "r2cfII_16", {54, 18, 12, 0}, &GENUS }; + +void X(codelet_r2cfII_16) (planner *p) { + X(kr2c_register) (p, r2cfII_16, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:16 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 2 -name r2cfII_2 -dft-II -include r2cfII.h */ + +/* + * This function contains 0 FP additions, 0 FP multiplications, + * (or, 0 additions, 0 multiplications, 0 fused multiply/add), + * 3 stack variables, 0 constants, and 4 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_2(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(8, rs), MAKE_VOLATILE_STRIDE(8, csr), MAKE_VOLATILE_STRIDE(8, csi)) { + E T1, T2; + T1 = R0[0]; + T2 = R1[0]; + Cr[0] = T1; + Ci[0] = -T2; + } + } +} + +static const kr2c_desc desc = { 2, "r2cfII_2", {0, 0, 0, 0}, &GENUS }; + +void X(codelet_r2cfII_2) (planner *p) { + X(kr2c_register) (p, r2cfII_2, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 2 -name r2cfII_2 -dft-II -include r2cfII.h */ + +/* + * This function contains 0 FP additions, 0 FP multiplications, + * (or, 0 additions, 0 multiplications, 0 fused multiply/add), + * 3 stack variables, 0 constants, and 4 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_2(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(8, rs), MAKE_VOLATILE_STRIDE(8, csr), MAKE_VOLATILE_STRIDE(8, csi)) { + E T1, T2; + T1 = R0[0]; + T2 = R1[0]; + Cr[0] = T1; + Ci[0] = -T2; + } + } +} + +static const kr2c_desc desc = { 2, "r2cfII_2", {0, 0, 0, 0}, &GENUS }; + +void X(codelet_r2cfII_2) (planner *p) { + X(kr2c_register) (p, r2cfII_2, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:21 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 20 -name r2cfII_20 -dft-II -include r2cfII.h */ + +/* + * This function contains 102 FP additions, 63 FP multiplications, + * (or, 39 additions, 0 multiplications, 63 fused multiply/add), + * 67 stack variables, 10 constants, and 40 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP690983005, +0.690983005625052575897706582817180941139845410); + DK(KP552786404, +0.552786404500042060718165266253744752911876328); + DK(KP447213595, +0.447213595499957939281834733746255247088123672); + DK(KP809016994, +0.809016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP381966011, +0.381966011250105151795413165634361882279690820); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(80, rs), MAKE_VOLATILE_STRIDE(80, csr), MAKE_VOLATILE_STRIDE(80, csi)) { + E Tv, TK, TN, Th, T1l, T1n, Ts, TH; + { + E Ti, T1d, T1f, T1e, T1g, T1p, TS, Tg, To, T8, T7, T19, T1r, T1k, Tx; + E Tp, TX, Ty, TF, Tr, TV, Tz, TA, TI; + { + E Ta, Tb, Td, Te; + Ti = R1[WS(rs, 2)]; + T1d = R0[WS(rs, 5)]; + Ta = R0[WS(rs, 9)]; + Tb = R0[WS(rs, 1)]; + Td = R0[WS(rs, 3)]; + Te = R0[WS(rs, 7)]; + { + E T1, T2, T5, T3, T4, T1i, Tc, Tf; + T1 = R0[0]; + T1f = Ta + Tb; + Tc = Ta - Tb; + T1e = Td + Te; + Tf = Td - Te; + T2 = R0[WS(rs, 4)]; + T5 = R0[WS(rs, 6)]; + T1g = FMA(KP381966011, T1f, T1e); + T1p = FMA(KP381966011, T1e, T1f); + TS = FMA(KP618033988, Tc, Tf); + Tg = FNMS(KP618033988, Tf, Tc); + T3 = R0[WS(rs, 8)]; + T4 = R0[WS(rs, 2)]; + T1i = T2 + T5; + { + E Tj, Tu, Tm, Tt, Tn, Tq, TU; + Tj = R1[WS(rs, 8)]; + To = R1[WS(rs, 6)]; + { + E T6, T1j, Tk, Tl; + T6 = T2 + T3 - T4 - T5; + T8 = (T3 + T5 - T2) - T4; + T1j = T3 + T4; + Tk = R1[0]; + Tl = R1[WS(rs, 4)]; + T7 = FNMS(KP250000000, T6, T1); + T19 = T1 + T6; + T1r = FNMS(KP618033988, T1i, T1j); + T1k = FMA(KP618033988, T1j, T1i); + Tu = Tk - Tl; + Tm = Tk + Tl; + } + Tt = To + Tj; + Tx = R1[WS(rs, 7)]; + Tn = Tj - Tm; + Tp = Tj + Tm; + Tv = FNMS(KP618033988, Tu, Tt); + TX = FMA(KP618033988, Tt, Tu); + Tq = FMA(KP809016994, Tp, To); + TU = FMA(KP447213595, Tp, Tn); + Ty = R1[WS(rs, 1)]; + TF = R1[WS(rs, 3)]; + Tr = FNMS(KP552786404, Tq, Tn); + TV = FNMS(KP690983005, TU, To); + Tz = R1[WS(rs, 5)]; + TA = R1[WS(rs, 9)]; + TI = TF + Ty; + } + } + } + { + E T1w, TJ, TB, T1a; + T1w = T1f + T1d - T1e; + TJ = Tz - TA; + TB = Tz + TA; + T1a = Ti + To - Tp; + { + E T9, T12, TT, T15, TG, TD, T1s, T1u, TW, T11, T10, T1h; + { + E TE, TC, TR, T1b; + T9 = FNMS(KP559016994, T8, T7); + TR = FMA(KP559016994, T8, T7); + TK = FMA(KP618033988, TJ, TI); + T12 = FNMS(KP618033988, TI, TJ); + TE = Ty - TB; + TC = Ty + TB; + TT = FMA(KP951056516, TS, TR); + T15 = FNMS(KP951056516, TS, TR); + TG = FNMS(KP552786404, TF, TE); + T1b = TC - TF - Tx; + { + E TZ, T1q, T1c, T1x; + TZ = FMA(KP447213595, TC, TE); + TD = FMA(KP250000000, TC, Tx); + T1q = FNMS(KP809016994, T1p, T1d); + T1c = T1a + T1b; + T1x = T1a - T1b; + T10 = FNMS(KP690983005, TZ, TF); + T1s = FNMS(KP951056516, T1r, T1q); + T1u = FMA(KP951056516, T1r, T1q); + Ci[WS(csi, 7)] = FMA(KP707106781, T1x, T1w); + Ci[WS(csi, 2)] = FMS(KP707106781, T1x, T1w); + Cr[WS(csr, 7)] = FMA(KP707106781, T1c, T19); + Cr[WS(csr, 2)] = FNMS(KP707106781, T1c, T19); + } + } + TW = FNMS(KP809016994, TV, Ti); + T11 = FNMS(KP809016994, T10, Tx); + T1h = FMA(KP809016994, T1g, T1d); + { + E T17, TY, T16, T13; + T17 = FNMS(KP951056516, TX, TW); + TY = FMA(KP951056516, TX, TW); + T16 = FMA(KP951056516, T12, T11); + T13 = FNMS(KP951056516, T12, T11); + TN = FMA(KP951056516, Tg, T9); + Th = FNMS(KP951056516, Tg, T9); + { + E T18, T1v, T1t, T14; + T18 = T16 - T17; + T1v = T17 + T16; + T1t = TY + T13; + T14 = TY - T13; + Cr[WS(csr, 1)] = FMA(KP707106781, T18, T15); + Cr[WS(csr, 8)] = FNMS(KP707106781, T18, T15); + Ci[WS(csi, 3)] = FMA(KP707106781, T1v, T1u); + Ci[WS(csi, 6)] = FMS(KP707106781, T1v, T1u); + Ci[WS(csi, 1)] = FNMS(KP707106781, T1t, T1s); + Ci[WS(csi, 8)] = -(FMA(KP707106781, T1t, T1s)); + Cr[WS(csr, 3)] = FMA(KP707106781, T14, TT); + Cr[WS(csr, 6)] = FNMS(KP707106781, T14, TT); + T1l = FMA(KP951056516, T1k, T1h); + T1n = FNMS(KP951056516, T1k, T1h); + } + } + Ts = FNMS(KP559016994, Tr, Ti); + TH = FNMS(KP559016994, TG, TD); + } + } + } + { + E TO, Tw, TP, TL; + TO = FMA(KP951056516, Tv, Ts); + Tw = FNMS(KP951056516, Tv, Ts); + TP = FMA(KP951056516, TK, TH); + TL = FNMS(KP951056516, TK, TH); + { + E TQ, T1m, T1o, TM; + TQ = TO - TP; + T1m = TO + TP; + T1o = Tw + TL; + TM = Tw - TL; + Cr[WS(csr, 4)] = FMA(KP707106781, TQ, TN); + Cr[WS(csr, 5)] = FNMS(KP707106781, TQ, TN); + Ci[WS(csi, 9)] = FNMS(KP707106781, T1m, T1l); + Ci[0] = -(FMA(KP707106781, T1m, T1l)); + Ci[WS(csi, 5)] = FNMS(KP707106781, T1o, T1n); + Ci[WS(csi, 4)] = -(FMA(KP707106781, T1o, T1n)); + Cr[0] = FMA(KP707106781, TM, Th); + Cr[WS(csr, 9)] = FNMS(KP707106781, TM, Th); + } + } + } + } +} + +static const kr2c_desc desc = { 20, "r2cfII_20", {39, 0, 63, 0}, &GENUS }; + +void X(codelet_r2cfII_20) (planner *p) { + X(kr2c_register) (p, r2cfII_20, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 20 -name r2cfII_20 -dft-II -include r2cfII.h */ + +/* + * This function contains 102 FP additions, 34 FP multiplications, + * (or, 86 additions, 18 multiplications, 16 fused multiply/add), + * 60 stack variables, 13 constants, and 40 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP572061402, +0.572061402817684297600072783580302076536153377); + DK(KP218508012, +0.218508012224410535399650602527877556893735408); + DK(KP309016994, +0.309016994374947424102293417182819058860154590); + DK(KP809016994, +0.809016994374947424102293417182819058860154590); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP176776695, +0.176776695296636881100211090526212259821208984); + DK(KP395284707, +0.395284707521047416499861693054089816714944392); + DK(KP672498511, +0.672498511963957326960058968885748755876783111); + DK(KP415626937, +0.415626937777453428589967464113135184222253485); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(80, rs), MAKE_VOLATILE_STRIDE(80, csr), MAKE_VOLATILE_STRIDE(80, csi)) { + E T8, TD, Tm, TN, T9, TC, TY, TE, Te, TF, Tl, TK, T12, TL, Tk; + E TM, T1, T6, Tq, T1l, T1c, Tp, T1f, T1e, T1d, Ty, TW, T1g, T1m, Tx; + E Tu; + T8 = R1[WS(rs, 2)]; + TD = KP707106781 * T8; + Tm = R1[WS(rs, 7)]; + TN = KP707106781 * Tm; + { + E Ta, TA, Td, TB, Tb, Tc; + T9 = R1[WS(rs, 6)]; + Ta = R1[WS(rs, 8)]; + TA = T9 + Ta; + Tb = R1[0]; + Tc = R1[WS(rs, 4)]; + Td = Tb + Tc; + TB = Tb - Tc; + TC = FMA(KP415626937, TA, KP672498511 * TB); + TY = FNMS(KP415626937, TB, KP672498511 * TA); + TE = KP395284707 * (Ta - Td); + Te = Ta + Td; + TF = KP176776695 * Te; + } + { + E Tg, TJ, Tj, TI, Th, Ti; + Tg = R1[WS(rs, 1)]; + Tl = R1[WS(rs, 3)]; + TJ = Tg + Tl; + Th = R1[WS(rs, 5)]; + Ti = R1[WS(rs, 9)]; + Tj = Th + Ti; + TI = Th - Ti; + TK = FNMS(KP415626937, TJ, KP672498511 * TI); + T12 = FMA(KP415626937, TI, KP672498511 * TJ); + TL = KP395284707 * (Tg - Tj); + Tk = Tg + Tj; + TM = KP176776695 * Tk; + } + { + E T2, T5, T3, T4, T1a, T1b; + T1 = R0[0]; + T2 = R0[WS(rs, 6)]; + T5 = R0[WS(rs, 8)]; + T3 = R0[WS(rs, 2)]; + T4 = R0[WS(rs, 4)]; + T1a = T4 + T2; + T1b = T5 + T3; + T6 = T2 + T3 - (T4 + T5); + Tq = FMA(KP250000000, T6, T1); + T1l = FNMS(KP951056516, T1b, KP587785252 * T1a); + T1c = FMA(KP951056516, T1a, KP587785252 * T1b); + Tp = KP559016994 * (T5 + T2 - (T4 + T3)); + } + T1f = R0[WS(rs, 5)]; + { + E Tv, Tw, Ts, Tt; + Tv = R0[WS(rs, 9)]; + Tw = R0[WS(rs, 1)]; + Tx = Tv - Tw; + T1e = Tv + Tw; + Ts = R0[WS(rs, 3)]; + Tt = R0[WS(rs, 7)]; + Tu = Ts - Tt; + T1d = Ts + Tt; + } + Ty = FMA(KP951056516, Tu, KP587785252 * Tx); + TW = FNMS(KP951056516, Tx, KP587785252 * Tu); + T1g = FMA(KP809016994, T1d, KP309016994 * T1e) + T1f; + T1m = FNMS(KP809016994, T1e, T1f) - (KP309016994 * T1d); + { + E T7, T1r, To, T1q, Tf, Tn; + T7 = T1 - T6; + T1r = T1e + T1f - T1d; + Tf = T8 + (T9 - Te); + Tn = (Tk - Tl) - Tm; + To = KP707106781 * (Tf + Tn); + T1q = KP707106781 * (Tf - Tn); + Cr[WS(csr, 2)] = T7 - To; + Ci[WS(csi, 2)] = T1q - T1r; + Cr[WS(csr, 7)] = T7 + To; + Ci[WS(csi, 7)] = T1q + T1r; + } + { + E T1h, T1j, TX, T15, T10, T16, T13, T17, TV, TZ, T11; + T1h = T1c - T1g; + T1j = T1c + T1g; + TV = Tq - Tp; + TX = TV - TW; + T15 = TV + TW; + TZ = FMA(KP218508012, T9, TD) + TF - TE; + T10 = TY + TZ; + T16 = TZ - TY; + T11 = FNMS(KP218508012, Tl, TL) - (TM + TN); + T13 = T11 - T12; + T17 = T11 + T12; + { + E T14, T19, T18, T1i; + T14 = T10 + T13; + Cr[WS(csr, 5)] = TX - T14; + Cr[WS(csr, 4)] = TX + T14; + T19 = T17 - T16; + Ci[WS(csi, 5)] = T19 - T1h; + Ci[WS(csi, 4)] = T19 + T1h; + T18 = T16 + T17; + Cr[WS(csr, 9)] = T15 - T18; + Cr[0] = T15 + T18; + T1i = T13 - T10; + Ci[0] = T1i - T1j; + Ci[WS(csi, 9)] = T1i + T1j; + } + } + { + E T1n, T1p, Tz, TR, TH, TS, TP, TT, Tr, TG, TO; + T1n = T1l + T1m; + T1p = T1m - T1l; + Tr = Tp + Tq; + Tz = Tr + Ty; + TR = Tr - Ty; + TG = TD + TE + FNMS(KP572061402, T9, TF); + TH = TC + TG; + TS = TC - TG; + TO = TL + TM + FNMS(KP572061402, Tl, TN); + TP = TK - TO; + TT = TK + TO; + { + E TQ, T1o, TU, T1k; + TQ = TH + TP; + Cr[WS(csr, 6)] = Tz - TQ; + Cr[WS(csr, 3)] = Tz + TQ; + T1o = TT - TS; + Ci[WS(csi, 6)] = T1o - T1p; + Ci[WS(csi, 3)] = T1o + T1p; + TU = TS + TT; + Cr[WS(csr, 8)] = TR - TU; + Cr[WS(csr, 1)] = TR + TU; + T1k = TP - TH; + Ci[WS(csi, 8)] = T1k - T1n; + Ci[WS(csi, 1)] = T1k + T1n; + } + } + } + } +} + +static const kr2c_desc desc = { 20, "r2cfII_20", {86, 18, 16, 0}, &GENUS }; + +void X(codelet_r2cfII_20) (planner *p) { + X(kr2c_register) (p, r2cfII_20, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,783 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:21 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 25 -name r2cfII_25 -dft-II -include r2cfII.h */ + +/* + * This function contains 212 FP additions, 177 FP multiplications, + * (or, 47 additions, 12 multiplications, 165 fused multiply/add), + * 163 stack variables, 67 constants, and 50 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP876091699, +0.876091699473550838204498029706869638173524346); + DK(KP792626838, +0.792626838241819413632131824093538848057784557); + DK(KP690668130, +0.690668130712929053565177988380887884042527623); + DK(KP809385824, +0.809385824416008241660603814668679683846476688); + DK(KP860541664, +0.860541664367944677098261680920518816412804187); + DK(KP681693190, +0.681693190061530575150324149145440022633095390); + DK(KP560319534, +0.560319534973832390111614715371676131169633784); + DK(KP237294955, +0.237294955877110315393888866460840817927895961); + DK(KP897376177, +0.897376177523557693138608077137219684419427330); + DK(KP997675361, +0.997675361079556513670859573984492383596555031); + DK(KP584303379, +0.584303379262766050358567120694562180043261496); + DK(KP653711795, +0.653711795629256296299985401753308353544378892); + DK(KP591287873, +0.591287873858343558732323717242372865934480959); + DK(KP645989928, +0.645989928319777763844272876603899665178054552); + DK(KP956723877, +0.956723877038460305821989399535483155872969262); + DK(KP952936919, +0.952936919628306576880750665357914584765951388); + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP945422727, +0.945422727388575946270360266328811958657216298); + DK(KP559154169, +0.559154169276087864842202529084232643714075927); + DK(KP683113946, +0.683113946453479238701949862233725244439656928); + DK(KP999754674, +0.999754674276473633366203429228112409535557487); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP242145790, +0.242145790282157779872542093866183953459003101); + DK(KP734762448, +0.734762448793050413546343770063151342619912334); + DK(KP904730450, +0.904730450839922351881287709692877908104763647); + DK(KP876306680, +0.876306680043863587308115903922062583399064238); + DK(KP949179823, +0.949179823508441261575555465843363271711583843); + DK(KP772036680, +0.772036680810363904029489473607579825330539880); + DK(KP669429328, +0.669429328479476605641803240971985825917022098); + DK(KP916574801, +0.916574801383451584742370439148878693530976769); + DK(KP829049696, +0.829049696159252993975487806364305442437946767); + DK(KP923225144, +0.923225144846402650453449441572664695995209956); + DK(KP262346850, +0.262346850930607871785420028382979691334784273); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP803003575, +0.803003575438660414833440593570376004635464850); + DK(KP906616052, +0.906616052148196230441134447086066874408359177); + DK(KP831864738, +0.831864738706457140726048799369896829771167132); + DK(KP763583905, +0.763583905359130246362948588764067237776594106); + DK(KP921078979, +0.921078979742360627699756128143719920817673854); + DK(KP904508497, +0.904508497187473712051146708591409529430077295); + DK(KP248028675, +0.248028675328619457762448260696444630363259177); + DK(KP894834959, +0.894834959464455102997960030820114611498661386); + DK(KP982009705, +0.982009705009746369461829878184175962711969869); + DK(KP845997307, +0.845997307939530944175097360758058292389769300); + DK(KP958953096, +0.958953096729998668045963838399037225970891871); + DK(KP867381224, +0.867381224396525206773171885031575671309956167); + DK(KP912575812, +0.912575812670962425556968549836277086778922727); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP869845200, +0.869845200362138853122720822420327157933056305); + DK(KP786782374, +0.786782374965295178365099601674911834788448471); + DK(KP120146378, +0.120146378570687701782758537356596213647956445); + DK(KP132830569, +0.132830569247582714407653942074819768844536507); + DK(KP269969613, +0.269969613759572083574752974412347470060951301); + DK(KP244189809, +0.244189809627953270309879511234821255780225091); + DK(KP987388751, +0.987388751065621252324603216482382109400433949); + DK(KP893101515, +0.893101515366181661711202267938416198338079437); + DK(KP494780565, +0.494780565770515410344588413655324772219443730); + DK(KP447533225, +0.447533225982656890041886979663652563063114397); + DK(KP522847744, +0.522847744331509716623755382187077770911012542); + DK(KP578046249, +0.578046249379945007321754579646815604023525655); + DK(KP066152395, +0.066152395967733048213034281011006031460903353); + DK(KP059835404, +0.059835404262124915169548397419498386427871950); + DK(KP667278218, +0.667278218140296670899089292254759909713898805); + DK(KP603558818, +0.603558818296015001454675132653458027918768137); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(100, rs), MAKE_VOLATILE_STRIDE(100, csr), MAKE_VOLATILE_STRIDE(100, csi)) { + E T2R, T2T, T2D, T2C, T2H, T2G, T2B, T2P, T2S; + { + E T2A, TJ, T1K, T3l, T2z, TB, T2d, T2l, T1N, T21, T15, T1g, T1s, T1D, T9; + E T25, T1X, T2o, T2g, T1z, T1u, T1j, TQ, Ti, T1a, T2f, T2p, T1U, T24, TX; + E T1k, T1v, T1A, T19, Ts, T18, T1P; + { + E Tt, Tw, TZ, Tx, Ty; + { + E T2v, TG, TH, TD, TE, TI, T2x; + T2v = R0[0]; + TG = R0[WS(rs, 10)]; + TH = R1[WS(rs, 2)]; + TD = R0[WS(rs, 5)]; + TE = R1[WS(rs, 7)]; + Tt = R0[WS(rs, 2)]; + TI = TG + TH; + T2x = TG - TH; + { + E TF, T2w, Tu, Tv, T2y; + TF = TD + TE; + T2w = TD - TE; + Tu = R0[WS(rs, 7)]; + Tv = R1[WS(rs, 9)]; + T2A = T2w - T2x; + T2y = T2w + T2x; + TJ = FMA(KP618033988, TI, TF); + T1K = FNMS(KP618033988, TF, TI); + T3l = T2v + T2y; + T2z = FNMS(KP250000000, T2y, T2v); + Tw = Tu - Tv; + TZ = Tu + Tv; + Tx = R0[WS(rs, 12)]; + Ty = R1[WS(rs, 4)]; + } + } + { + E TO, TN, TM, T1V; + { + E T1, T1M, T11, T13, T4, TK, T12, TL, T7, T5, TA, T6, T14, T1L, T8; + T1 = R0[WS(rs, 1)]; + { + E T2, T10, Tz, T3; + T2 = R0[WS(rs, 6)]; + T10 = Tx + Ty; + Tz = Tx - Ty; + T3 = R1[WS(rs, 8)]; + T5 = R0[WS(rs, 11)]; + T1M = FNMS(KP618033988, TZ, T10); + T11 = FMA(KP618033988, T10, TZ); + T13 = Tz - Tw; + TA = Tw + Tz; + T4 = T2 - T3; + TK = T2 + T3; + T6 = R1[WS(rs, 3)]; + } + TB = Tt + TA; + T12 = FNMS(KP250000000, TA, Tt); + TL = T5 + T6; + T7 = T5 - T6; + T14 = FNMS(KP559016994, T13, T12); + T1L = FMA(KP559016994, T13, T12); + T8 = T4 + T7; + TO = T4 - T7; + T2d = FNMS(KP603558818, T1M, T1L); + T2l = FMA(KP667278218, T1L, T1M); + T1N = FMA(KP059835404, T1M, T1L); + T21 = FNMS(KP066152395, T1L, T1M); + T15 = FMA(KP578046249, T14, T11); + T1g = FNMS(KP522847744, T11, T14); + T1s = FMA(KP447533225, T11, T14); + T1D = FNMS(KP494780565, T14, T11); + TN = FNMS(KP250000000, T8, T1); + T9 = T1 + T8; + TM = FMA(KP618033988, TL, TK); + T1V = FNMS(KP618033988, TK, TL); + } + { + E Th, Td, TU, Tc, Te; + Th = R0[WS(rs, 4)]; + { + E Ta, Tb, T1W, TP; + Ta = R0[WS(rs, 9)]; + Tb = R1[WS(rs, 11)]; + T1W = FNMS(KP559016994, TO, TN); + TP = FMA(KP559016994, TO, TN); + Td = R1[WS(rs, 6)]; + TU = Ta + Tb; + Tc = Ta - Tb; + T25 = FNMS(KP893101515, T1V, T1W); + T1X = FMA(KP987388751, T1W, T1V); + T2o = FMA(KP522847744, T1V, T1W); + T2g = FNMS(KP578046249, T1W, T1V); + T1z = FMA(KP667278218, TP, TM); + T1u = FNMS(KP603558818, TM, TP); + T1j = FNMS(KP244189809, TM, TP); + TQ = FMA(KP269969613, TP, TM); + Te = R1[WS(rs, 1)]; + } + { + E Tk, T1S, TW, TS, Tn, T16, TR, T17, Tq, To, Tg, Tp, TT, T1T, Tr; + Tk = R0[WS(rs, 3)]; + { + E Tl, TV, Tf, Tm; + Tl = R0[WS(rs, 8)]; + TV = Te - Td; + Tf = Td + Te; + Tm = R1[WS(rs, 10)]; + To = R1[0]; + T1S = FMA(KP618033988, TU, TV); + TW = FNMS(KP618033988, TV, TU); + TS = Tc + Tf; + Tg = Tc - Tf; + Tn = Tl - Tm; + T16 = Tl + Tm; + Tp = R1[WS(rs, 5)]; + } + Ti = Tg + Th; + TR = FNMS(KP250000000, Tg, Th); + T17 = Tp - To; + Tq = To + Tp; + TT = FMA(KP559016994, TS, TR); + T1T = FNMS(KP559016994, TS, TR); + Tr = Tn - Tq; + T1a = Tn + Tq; + T2f = FNMS(KP447533225, T1S, T1T); + T2p = FMA(KP494780565, T1T, T1S); + T1U = FMA(KP132830569, T1T, T1S); + T24 = FNMS(KP120146378, T1S, T1T); + TX = FMA(KP603558818, TW, TT); + T1k = FNMS(KP667278218, TT, TW); + T1v = FNMS(KP786782374, TW, TT); + T1A = FMA(KP869845200, TT, TW); + T19 = FNMS(KP250000000, Tr, Tk); + Ts = Tk + Tr; + T18 = FMA(KP618033988, T17, T16); + T1P = FNMS(KP618033988, T16, T17); + } + } + } + } + { + E T22, T1Q, T1h, T1c, T2O, T2N, T2m, T3a, T3b, T2q, T1y, T3f, T2e, T2h, T3e; + E T1H, T1J; + { + E T3m, T3n, T2k, T2c, T1C, T1r; + { + E Tj, TC, T1O, T1b; + T3m = T9 + Ti; + Tj = T9 - Ti; + TC = Ts - TB; + T3n = TB + Ts; + T1O = FNMS(KP559016994, T1a, T19); + T1b = FMA(KP559016994, T1a, T19); + Ci[WS(csi, 7)] = KP951056516 * (FMA(KP618033988, Tj, TC)); + Ci[WS(csi, 2)] = -(KP951056516 * (FNMS(KP618033988, TC, Tj))); + T22 = FMA(KP869845200, T1O, T1P); + T1Q = FNMS(KP786782374, T1P, T1O); + T2k = FMA(KP066152395, T1O, T1P); + T2c = FNMS(KP059835404, T1P, T1O); + T1C = FNMS(KP120146378, T18, T1b); + T1r = FMA(KP132830569, T1b, T18); + T1h = FNMS(KP893101515, T18, T1b); + T1c = FMA(KP987388751, T1b, T18); + } + { + E T1B, T1E, T1t, T3o, T3q, T1w, T3p; + T1B = FMA(KP912575812, T1A, T1z); + T2O = FNMS(KP912575812, T1A, T1z); + T2N = FNMS(KP867381224, T1D, T1C); + T1E = FMA(KP867381224, T1D, T1C); + T1t = FMA(KP958953096, T1s, T1r); + T2R = FNMS(KP958953096, T1s, T1r); + T3o = T3m + T3n; + T3q = T3m - T3n; + T2T = FMA(KP912575812, T1v, T1u); + T1w = FNMS(KP912575812, T1v, T1u); + T2m = FNMS(KP845997307, T2l, T2k); + T3a = FMA(KP845997307, T2l, T2k); + T3b = FNMS(KP982009705, T2p, T2o); + T2q = FMA(KP982009705, T2p, T2o); + T3p = FNMS(KP250000000, T3o, T3l); + Cr[WS(csr, 12)] = T3o + T3l; + { + E T1x, T1F, T1G, T1I; + T1x = FMA(KP894834959, T1w, T1t); + T1F = FNMS(KP894834959, T1w, T1t); + Cr[WS(csr, 7)] = FNMS(KP559016994, T3q, T3p); + Cr[WS(csr, 2)] = FMA(KP559016994, T3q, T3p); + T1y = FMA(KP248028675, T1x, TJ); + T1G = FNMS(KP904508497, T1F, T1E); + T1I = FNMS(KP894834959, T1B, T1F); + T3f = FNMS(KP845997307, T2d, T2c); + T2e = FMA(KP845997307, T2d, T2c); + T2h = FNMS(KP921078979, T2g, T2f); + T3e = FMA(KP921078979, T2g, T2f); + T1H = FMA(KP763583905, T1G, T1B); + T1J = FMA(KP559016994, T1I, T1E); + } + } + } + { + E T1i, T1l, T23, T30, T2Z, T26, T1R, T33, T1f, T1n, T1p, T34, T1Y, T3d, T3k; + E T3i; + { + E T2j, TY, T2s, T2u, T1d, T1m, T1e; + T2D = FMA(KP831864738, T1h, T1g); + T1i = FNMS(KP831864738, T1h, T1g); + { + E T2i, T2n, T2r, T2t; + T2i = FMA(KP906616052, T2h, T2e); + T2n = FNMS(KP906616052, T2h, T2e); + Ci[WS(csi, 4)] = KP951056516 * (FNMS(KP803003575, T1H, T1y)); + Ci[WS(csi, 9)] = KP951056516 * (FNMS(KP992114701, T1J, T1y)); + T2j = FMA(KP262346850, T2i, T1K); + T2r = FNMS(KP923225144, T2q, T2n); + T2t = T2m + T2n; + T2C = FNMS(KP829049696, T1k, T1j); + T1l = FMA(KP829049696, T1k, T1j); + TY = FMA(KP916574801, TX, TQ); + T2H = FNMS(KP916574801, TX, TQ); + T2s = FNMS(KP618033988, T2r, T2m); + T2u = FNMS(KP669429328, T2t, T2q); + T2G = FNMS(KP831864738, T1c, T15); + T1d = FMA(KP831864738, T1c, T15); + } + T23 = FNMS(KP772036680, T22, T21); + T30 = FMA(KP772036680, T22, T21); + Ci[WS(csi, 8)] = KP951056516 * (FMA(KP949179823, T2s, T2j)); + Ci[WS(csi, 3)] = KP951056516 * (FNMS(KP876306680, T2u, T2j)); + T1m = FNMS(KP904730450, T1d, TY); + T1e = FMA(KP904730450, T1d, TY); + T2Z = FNMS(KP734762448, T25, T24); + T26 = FMA(KP734762448, T25, T24); + T1R = FMA(KP772036680, T1Q, T1N); + T33 = FNMS(KP772036680, T1Q, T1N); + T1f = FNMS(KP242145790, T1e, TJ); + Ci[0] = -(KP951056516 * (FMA(KP968583161, T1e, TJ))); + T1n = FNMS(KP904508497, T1m, T1l); + T1p = FNMS(KP999754674, T1m, T1i); + T34 = FNMS(KP734762448, T1X, T1U); + T1Y = FMA(KP734762448, T1X, T1U); + } + { + E T2Y, T31, T38, T36, T3c, T3g; + { + E T20, T28, T2a, T29, T2b, T35; + T2Y = FNMS(KP559016994, T2A, T2z); + T2B = FMA(KP559016994, T2A, T2z); + { + E T1o, T1q, T27, T1Z; + T1o = FNMS(KP683113946, T1n, T1i); + T1q = FMA(KP559154169, T1p, T1l); + T27 = FNMS(KP945422727, T1Y, T1R); + T1Z = FMA(KP945422727, T1Y, T1R); + Ci[WS(csi, 5)] = -(KP951056516 * (FNMS(KP876306680, T1o, T1f))); + Ci[WS(csi, 10)] = -(KP951056516 * (FNMS(KP968583161, T1q, T1f))); + T20 = FNMS(KP262346850, T1Z, T1K); + Ci[WS(csi, 1)] = -(KP998026728 * (FMA(KP952936919, T1K, T1Z))); + T28 = FMA(KP956723877, T27, T26); + T2a = T27 - T23; + } + T29 = FMA(KP645989928, T28, T23); + T2b = FMA(KP591287873, T2a, T26); + Ci[WS(csi, 6)] = -(KP951056516 * (FMA(KP949179823, T29, T20))); + Ci[WS(csi, 11)] = -(KP951056516 * (FNMS(KP992114701, T2b, T20))); + T31 = FMA(KP956723877, T30, T2Z); + T35 = FNMS(KP956723877, T30, T2Z); + T38 = FMA(KP618033988, T35, T34); + T36 = T34 + T35; + } + Cr[WS(csr, 1)] = FNMS(KP992114701, T31, T2Y); + T3c = FMA(KP923225144, T3b, T3a); + T3g = FNMS(KP923225144, T3b, T3a); + { + E T32, T37, T3h, T3j, T39; + T32 = FMA(KP248028675, T31, T2Y); + T39 = FNMS(KP653711795, T33, T38); + T37 = FMA(KP584303379, T36, T33); + T3h = FNMS(KP904508497, T3g, T3f); + T3j = FNMS(KP997675361, T3g, T3e); + Cr[WS(csr, 11)] = FNMS(KP897376177, T39, T32); + Cr[WS(csr, 6)] = FMA(KP949179823, T37, T32); + T3d = FNMS(KP237294955, T3c, T2Y); + T3k = FNMS(KP560319534, T3j, T3f); + T3i = FMA(KP681693190, T3h, T3e); + } + } + Cr[WS(csr, 8)] = FMA(KP949179823, T3k, T3d); + Cr[WS(csr, 3)] = FMA(KP860541664, T3i, T3d); + T2P = FNMS(KP809385824, T2O, T2N); + T2S = FMA(KP809385824, T2O, T2N); + } + } + } + { + E T2F, T2K, T2M, T2Q; + T2Q = FMA(KP248028675, T2P, T2B); + { + E T2U, T2W, T2E, T2I; + T2U = FNMS(KP894834959, T2T, T2S); + T2W = T2R + T2S; + T2E = FMA(KP904730450, T2D, T2C); + T2I = FNMS(KP904730450, T2D, T2C); + { + E T2V, T2X, T2J, T2L; + T2V = FNMS(KP618033988, T2U, T2R); + T2X = FNMS(KP690668130, T2W, T2T); + T2F = FNMS(KP242145790, T2E, T2B); + Cr[0] = FMA(KP968583161, T2E, T2B); + T2J = T2H + T2I; + T2L = FMA(KP904730450, T2G, T2I); + Cr[WS(csr, 9)] = FMA(KP897376177, T2V, T2Q); + Cr[WS(csr, 4)] = FNMS(KP803003575, T2X, T2Q); + T2K = FNMS(KP683113946, T2J, T2G); + T2M = FMA(KP618033988, T2L, T2H); + } + } + Cr[WS(csr, 5)] = FMA(KP792626838, T2K, T2F); + Cr[WS(csr, 10)] = FMA(KP876091699, T2M, T2F); + } + } + } +} + +static const kr2c_desc desc = { 25, "r2cfII_25", {47, 12, 165, 0}, &GENUS }; + +void X(codelet_r2cfII_25) (planner *p) { + X(kr2c_register) (p, r2cfII_25, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 25 -name r2cfII_25 -dft-II -include r2cfII.h */ + +/* + * This function contains 213 FP additions, 148 FP multiplications, + * (or, 126 additions, 61 multiplications, 87 fused multiply/add), + * 94 stack variables, 38 constants, and 50 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP1_996053456, +1.996053456856543123904673613726901106673810439); + DK(KP062790519, +0.062790519529313376076178224565631133122484832); + DK(KP125581039, +0.125581039058626752152356449131262266244969664); + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP1_369094211, +1.369094211857377347464566715242418539779038465); + DK(KP728968627, +0.728968627421411523146730319055259111372571664); + DK(KP963507348, +0.963507348203430549974383005744259307057084020); + DK(KP876306680, +0.876306680043863587308115903922062583399064238); + DK(KP497379774, +0.497379774329709576484567492012895936835134813); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP1_457937254, +1.457937254842823046293460638110518222745143328); + DK(KP684547105, +0.684547105928688673732283357621209269889519233); + DK(KP1_752613360, +1.752613360087727174616231807844125166798128477); + DK(KP481753674, +0.481753674101715274987191502872129653528542010); + DK(KP1_937166322, +1.937166322257262238980336750929471627672024806); + DK(KP248689887, +0.248689887164854788242283746006447968417567406); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP250666467, +0.250666467128608490746237519633017587885836494); + DK(KP1_809654104, +1.809654104932039055427337295865395187940827822); + DK(KP425779291, +0.425779291565072648862502445744251703979973042); + DK(KP1_541026485, +1.541026485551578461606019272792355694543335344); + DK(KP637423989, +0.637423989748689710176712811676016195434917298); + DK(KP1_688655851, +1.688655851004030157097116127933363010763318483); + DK(KP535826794, +0.535826794978996618271308767867639978063575346); + DK(KP851558583, +0.851558583130145297725004891488503407959946084); + DK(KP904827052, +0.904827052466019527713668647932697593970413911); + DK(KP1_984229402, +1.984229402628955662099586085571557042906073418); + DK(KP125333233, +0.125333233564304245373118759816508793942918247); + DK(KP1_274847979, +1.274847979497379420353425623352032390869834596); + DK(KP770513242, +0.770513242775789230803009636396177847271667672); + DK(KP844327925, +0.844327925502015078548558063966681505381659241); + DK(KP1_071653589, +1.071653589957993236542617535735279956127150691); + DK(KP293892626, +0.293892626146236564584352977319536384298826219); + DK(KP475528258, +0.475528258147576786058219666689691071702849317); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(100, rs), MAKE_VOLATILE_STRIDE(100, csr), MAKE_VOLATILE_STRIDE(100, csi)) { + E TE, TR, T2i, T1z, TL, TS, TB, T2d, T1l, T1i, T2c, T9, T23, TZ, TW; + E T22, Ti, T26, T16, T13, T25, Ts, T2a, T1e, T1b, T29, TP, TQ; + { + E TK, T1y, TH, T1x; + TE = R0[0]; + { + E TI, TJ, TF, TG; + TI = R0[WS(rs, 10)]; + TJ = R1[WS(rs, 2)]; + TK = TI - TJ; + T1y = TI + TJ; + TF = R0[WS(rs, 5)]; + TG = R1[WS(rs, 7)]; + TH = TF - TG; + T1x = TF + TG; + } + TR = KP559016994 * (TH - TK); + T2i = FNMS(KP587785252, T1x, KP951056516 * T1y); + T1z = FMA(KP951056516, T1x, KP587785252 * T1y); + TL = TH + TK; + TS = FNMS(KP250000000, TL, TE); + } + { + E Tt, Tw, Tz, TA, T1k, T1j, T1g, T1h; + Tt = R0[WS(rs, 3)]; + { + E Tu, Tv, Tx, Ty; + Tu = R0[WS(rs, 8)]; + Tv = R1[WS(rs, 10)]; + Tw = Tu - Tv; + Tx = R1[0]; + Ty = R1[WS(rs, 5)]; + Tz = Tx + Ty; + TA = Tw - Tz; + T1k = Ty - Tx; + T1j = Tu + Tv; + } + TB = Tt + TA; + T2d = FNMS(KP293892626, T1j, KP475528258 * T1k); + T1l = FMA(KP475528258, T1j, KP293892626 * T1k); + T1g = FNMS(KP250000000, TA, Tt); + T1h = KP559016994 * (Tw + Tz); + T1i = T1g + T1h; + T2c = T1g - T1h; + } + { + E T1, T4, T7, T8, TY, TX, TU, TV; + T1 = R0[WS(rs, 1)]; + { + E T2, T3, T5, T6; + T2 = R0[WS(rs, 6)]; + T3 = R1[WS(rs, 8)]; + T4 = T2 - T3; + T5 = R0[WS(rs, 11)]; + T6 = R1[WS(rs, 3)]; + T7 = T5 - T6; + T8 = T4 + T7; + TY = T5 + T6; + TX = T2 + T3; + } + T9 = T1 + T8; + T23 = FNMS(KP293892626, TX, KP475528258 * TY); + TZ = FMA(KP475528258, TX, KP293892626 * TY); + TU = KP559016994 * (T4 - T7); + TV = FNMS(KP250000000, T8, T1); + TW = TU + TV; + T22 = TV - TU; + } + { + E Ta, Td, Tg, Th, T15, T14, T11, T12; + Ta = R0[WS(rs, 4)]; + { + E Tb, Tc, Te, Tf; + Tb = R0[WS(rs, 9)]; + Tc = R1[WS(rs, 11)]; + Td = Tb - Tc; + Te = R1[WS(rs, 1)]; + Tf = R1[WS(rs, 6)]; + Tg = Te + Tf; + Th = Td - Tg; + T15 = Tf - Te; + T14 = Tb + Tc; + } + Ti = Ta + Th; + T26 = FNMS(KP293892626, T14, KP475528258 * T15); + T16 = FMA(KP475528258, T14, KP293892626 * T15); + T11 = FNMS(KP250000000, Th, Ta); + T12 = KP559016994 * (Td + Tg); + T13 = T11 + T12; + T25 = T11 - T12; + } + { + E Tk, Tn, Tq, Tr, T1d, T1c, T19, T1a; + Tk = R0[WS(rs, 2)]; + { + E Tl, Tm, To, Tp; + Tl = R0[WS(rs, 7)]; + Tm = R1[WS(rs, 9)]; + Tn = Tl - Tm; + To = R0[WS(rs, 12)]; + Tp = R1[WS(rs, 4)]; + Tq = To - Tp; + Tr = Tn + Tq; + T1d = To + Tp; + T1c = Tl + Tm; + } + Ts = Tk + Tr; + T2a = FNMS(KP293892626, T1c, KP475528258 * T1d); + T1e = FMA(KP475528258, T1c, KP293892626 * T1d); + T19 = KP559016994 * (Tn - Tq); + T1a = FNMS(KP250000000, Tr, Tk); + T1b = T19 + T1a; + T29 = T1a - T19; + } + TP = TB - Ts; + TQ = T9 - Ti; + Ci[WS(csi, 2)] = FNMS(KP951056516, TQ, KP587785252 * TP); + Ci[WS(csi, 7)] = FMA(KP587785252, TQ, KP951056516 * TP); + { + E TM, TD, TN, Tj, TC, TO; + TM = TE + TL; + Tj = T9 + Ti; + TC = Ts + TB; + TD = KP559016994 * (Tj - TC); + TN = Tj + TC; + Cr[WS(csr, 12)] = TM + TN; + TO = FNMS(KP250000000, TN, TM); + Cr[WS(csr, 2)] = TD + TO; + Cr[WS(csr, 7)] = TO - TD; + } + { + E TT, T1J, T1Y, T1U, T1X, T1P, T1V, T1M, T1W, T1A, T1B, T1r, T1C, T1v, T18; + E T1n, T1o, T1G, T1D; + TT = TR + TS; + { + E T1H, T1I, T1S, T1T; + T1H = FNMS(KP844327925, TW, KP1_071653589 * TZ); + T1I = FNMS(KP1_274847979, T16, KP770513242 * T13); + T1J = T1H - T1I; + T1Y = T1H + T1I; + T1S = FMA(KP125333233, T1i, KP1_984229402 * T1l); + T1T = FMA(KP904827052, T1b, KP851558583 * T1e); + T1U = T1S - T1T; + T1X = T1T + T1S; + } + { + E T1N, T1O, T1K, T1L; + T1N = FMA(KP535826794, TW, KP1_688655851 * TZ); + T1O = FMA(KP637423989, T13, KP1_541026485 * T16); + T1P = T1N - T1O; + T1V = T1N + T1O; + T1K = FNMS(KP1_809654104, T1e, KP425779291 * T1b); + T1L = FNMS(KP992114701, T1i, KP250666467 * T1l); + T1M = T1K - T1L; + T1W = T1K + T1L; + } + { + E T1p, T1q, T1t, T1u; + T1p = FMA(KP844327925, T13, KP1_071653589 * T16); + T1q = FMA(KP248689887, TW, KP1_937166322 * TZ); + T1A = T1q + T1p; + T1t = FMA(KP481753674, T1b, KP1_752613360 * T1e); + T1u = FMA(KP684547105, T1i, KP1_457937254 * T1l); + T1B = T1t + T1u; + T1r = T1p - T1q; + T1C = T1A + T1B; + T1v = T1t - T1u; + } + { + E T10, T17, T1f, T1m; + T10 = FNMS(KP497379774, TZ, KP968583161 * TW); + T17 = FNMS(KP1_688655851, T16, KP535826794 * T13); + T18 = T10 + T17; + T1f = FNMS(KP963507348, T1e, KP876306680 * T1b); + T1m = FNMS(KP1_369094211, T1l, KP728968627 * T1i); + T1n = T1f + T1m; + T1o = T18 + T1n; + T1G = T10 - T17; + T1D = T1f - T1m; + } + { + E T1R, T1Q, T20, T1Z; + Cr[0] = TT + T1o; + Ci[0] = -(T1z + T1C); + T1R = KP559016994 * (T1P + T1M); + T1Q = FMA(KP250000000, T1M - T1P, TT); + Cr[WS(csr, 4)] = FMA(KP951056516, T1J, T1Q) + FMA(KP587785252, T1U, T1R); + Cr[WS(csr, 9)] = FMA(KP951056516, T1U, T1Q) + FNMA(KP587785252, T1J, T1R); + T20 = KP559016994 * (T1Y + T1X); + T1Z = FMA(KP250000000, T1X - T1Y, T1z); + Ci[WS(csi, 9)] = FMA(KP587785252, T1V, KP951056516 * T1W) + T1Z - T20; + Ci[WS(csi, 4)] = FMA(KP587785252, T1W, T1Z) + FNMS(KP951056516, T1V, T20); + { + E T1E, T1F, T1s, T1w; + T1E = FMS(KP250000000, T1C, T1z); + T1F = KP559016994 * (T1B - T1A); + Ci[WS(csi, 5)] = FMA(KP951056516, T1D, T1E) + FNMA(KP587785252, T1G, T1F); + Ci[WS(csi, 10)] = FMA(KP951056516, T1G, KP587785252 * T1D) + T1E + T1F; + T1s = FNMS(KP250000000, T1o, TT); + T1w = KP559016994 * (T18 - T1n); + Cr[WS(csr, 5)] = FMA(KP587785252, T1r, T1s) + FMS(KP951056516, T1v, T1w); + Cr[WS(csr, 10)] = T1w + FMA(KP587785252, T1v, T1s) - (KP951056516 * T1r); + } + } + } + { + E T21, T2z, T2L, T2K, T2M, T2F, T2P, T2C, T2Q, T2l, T2o, T2p, T2w, T2u, T28; + E T2f, T2g, T2s, T2h; + T21 = TS - TR; + { + E T2x, T2y, T2I, T2J; + T2x = FNMS(KP844327925, T29, KP1_071653589 * T2a); + T2y = FNMS(KP125581039, T2d, KP998026728 * T2c); + T2z = T2x + T2y; + T2L = T2y - T2x; + T2I = FNMS(KP481753674, T22, KP1_752613360 * T23); + T2J = FMA(KP904827052, T25, KP851558583 * T26); + T2K = T2I + T2J; + T2M = T2I - T2J; + } + { + E T2D, T2E, T2A, T2B; + T2D = FMA(KP535826794, T29, KP1_688655851 * T2a); + T2E = FMA(KP062790519, T2c, KP1_996053456 * T2d); + T2F = T2D + T2E; + T2P = T2E - T2D; + T2A = FMA(KP876306680, T22, KP963507348 * T23); + T2B = FNMS(KP425779291, T25, KP1_809654104 * T26); + T2C = T2A + T2B; + T2Q = T2A - T2B; + } + { + E T2j, T2k, T2m, T2n; + T2j = FNMS(KP125333233, T25, KP1_984229402 * T26); + T2k = FMA(KP684547105, T22, KP1_457937254 * T23); + T2l = T2j - T2k; + T2m = FNMS(KP770513242, T2c, KP1_274847979 * T2d); + T2n = FMA(KP998026728, T29, KP125581039 * T2a); + T2o = T2m - T2n; + T2p = T2l + T2o; + T2w = T2k + T2j; + T2u = T2n + T2m; + } + { + E T24, T27, T2b, T2e; + T24 = FNMS(KP1_369094211, T23, KP728968627 * T22); + T27 = FMA(KP992114701, T25, KP250666467 * T26); + T28 = T24 - T27; + T2b = FNMS(KP1_996053456, T2a, KP062790519 * T29); + T2e = FMA(KP637423989, T2c, KP1_541026485 * T2d); + T2f = T2b - T2e; + T2g = T28 + T2f; + T2s = T24 + T27; + T2h = T2b + T2e; + } + { + E T2H, T2G, T2O, T2N; + Cr[WS(csr, 1)] = T21 + T2g; + Ci[WS(csi, 1)] = T2p - T2i; + T2H = KP559016994 * (T2C - T2F); + T2G = FNMS(KP250000000, T2C + T2F, T21); + Cr[WS(csr, 8)] = FMA(KP951056516, T2z, T2G) + FNMA(KP587785252, T2K, T2H); + Cr[WS(csr, 3)] = FMA(KP951056516, T2K, KP587785252 * T2z) + T2G + T2H; + T2O = KP559016994 * (T2M + T2L); + T2N = FMA(KP250000000, T2L - T2M, T2i); + Ci[WS(csi, 3)] = T2N + FMA(KP587785252, T2P, T2O) - (KP951056516 * T2Q); + Ci[WS(csi, 8)] = FMA(KP587785252, T2Q, T2N) + FMS(KP951056516, T2P, T2O); + { + E T2t, T2v, T2q, T2r; + T2t = FNMS(KP250000000, T2g, T21); + T2v = KP559016994 * (T28 - T2f); + Cr[WS(csr, 6)] = FMA(KP951056516, T2u, T2t) + FNMA(KP587785252, T2w, T2v); + Cr[WS(csr, 11)] = FMA(KP951056516, T2w, T2v) + FMA(KP587785252, T2u, T2t); + T2q = KP250000000 * T2p; + T2r = KP559016994 * (T2l - T2o); + Ci[WS(csi, 6)] = FMS(KP951056516, T2h, T2i + T2q) + FNMA(KP587785252, T2s, T2r); + Ci[WS(csi, 11)] = FMA(KP951056516, T2s, KP587785252 * T2h) + T2r - (T2i + T2q); + } + } + } + } + } +} + +static const kr2c_desc desc = { 25, "r2cfII_25", {126, 61, 87, 0}, &GENUS }; + +void X(codelet_r2cfII_25) (planner *p) { + X(kr2c_register) (p, r2cfII_25, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:16 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 3 -name r2cfII_3 -dft-II -include r2cfII.h */ + +/* + * This function contains 4 FP additions, 2 FP multiplications, + * (or, 3 additions, 1 multiplications, 1 fused multiply/add), + * 7 stack variables, 2 constants, and 6 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_3(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(12, rs), MAKE_VOLATILE_STRIDE(12, csr), MAKE_VOLATILE_STRIDE(12, csi)) { + E T3, T1, T2, T4; + T3 = R0[0]; + T1 = R1[0]; + T2 = R0[WS(rs, 1)]; + Ci[0] = -(KP866025403 * (T1 + T2)); + T4 = T2 - T1; + Cr[WS(csr, 1)] = T3 + T4; + Cr[0] = FNMS(KP500000000, T4, T3); + } + } +} + +static const kr2c_desc desc = { 3, "r2cfII_3", {3, 1, 1, 0}, &GENUS }; + +void X(codelet_r2cfII_3) (planner *p) { + X(kr2c_register) (p, r2cfII_3, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 3 -name r2cfII_3 -dft-II -include r2cfII.h */ + +/* + * This function contains 4 FP additions, 2 FP multiplications, + * (or, 3 additions, 1 multiplications, 1 fused multiply/add), + * 7 stack variables, 2 constants, and 6 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_3(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(12, rs), MAKE_VOLATILE_STRIDE(12, csr), MAKE_VOLATILE_STRIDE(12, csi)) { + E T1, T2, T3, T4; + T1 = R0[0]; + T2 = R1[0]; + T3 = R0[WS(rs, 1)]; + T4 = T2 - T3; + Cr[WS(csr, 1)] = T1 - T4; + Ci[0] = -(KP866025403 * (T2 + T3)); + Cr[0] = FMA(KP500000000, T4, T1); + } + } +} + +static const kr2c_desc desc = { 3, "r2cfII_3", {3, 1, 1, 0}, &GENUS }; + +void X(codelet_r2cfII_3) (planner *p) { + X(kr2c_register) (p, r2cfII_3, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,668 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:19 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cfII_32 -dft-II -include r2cfII.h */ + +/* + * This function contains 174 FP additions, 128 FP multiplications, + * (or, 46 additions, 0 multiplications, 128 fused multiply/add), + * 96 stack variables, 15 constants, and 64 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP820678790, +0.820678790828660330972281985331011598767386482); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP303346683, +0.303346683607342391675883946941299872384187453); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP098491403, +0.098491403357164253077197521291327432293052451); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP534511135, +0.534511135950791641089685961295362908582039528); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) { + E T23, T1S, T21, T1L, T2z, T2x, T1Z, T22; + { + E T2n, T2B, T1z, T5, T1C, T2C, T2o, Tc, T27, T1J, T1l, Tm, T26, T1G, T1k; + E Tv, T1s, T1c, T2e, T1Y, T1r, T15, T2d, T1V, TP, TF, T1M, TC, T1P, TN; + E TO, TI; + { + E T1A, T8, Te, Tj, Tf, T1B, Tb, Tg; + { + E T1, T2l, T2, T3, T9, Ta; + T1 = R0[0]; + T2l = R0[WS(rs, 8)]; + T2 = R0[WS(rs, 4)]; + T3 = R0[WS(rs, 12)]; + { + E T6, T7, T2m, T4; + T6 = R0[WS(rs, 10)]; + T7 = R0[WS(rs, 2)]; + T9 = R0[WS(rs, 6)]; + T2m = T2 + T3; + T4 = T2 - T3; + T1A = FNMS(KP414213562, T6, T7); + T8 = FMA(KP414213562, T7, T6); + T2n = FMA(KP707106781, T2m, T2l); + T2B = FNMS(KP707106781, T2m, T2l); + T1z = FMA(KP707106781, T4, T1); + T5 = FNMS(KP707106781, T4, T1); + Ta = R0[WS(rs, 14)]; + } + Te = R0[WS(rs, 7)]; + Tj = R0[WS(rs, 15)]; + Tf = R0[WS(rs, 3)]; + T1B = FMS(KP414213562, T9, Ta); + Tb = FMA(KP414213562, Ta, T9); + Tg = R0[WS(rs, 11)]; + } + { + E Tn, Ts, To, T1I, Tl, T1H, Ti, Tp, Tk, Th, T1T, T1U; + Tn = R0[WS(rs, 9)]; + T1C = T1A + T1B; + T2C = T1B - T1A; + T2o = T8 + Tb; + Tc = T8 - Tb; + Tk = Tg - Tf; + Th = Tf + Tg; + Ts = R0[WS(rs, 1)]; + To = R0[WS(rs, 5)]; + T1I = FMA(KP707106781, Tk, Tj); + Tl = FNMS(KP707106781, Tk, Tj); + T1H = FMA(KP707106781, Th, Te); + Ti = FNMS(KP707106781, Th, Te); + Tp = R0[WS(rs, 13)]; + { + E TT, T16, TY, T17, TW, TZ, T11, T12, Tt, Tq; + TT = R1[WS(rs, 15)]; + T27 = FNMS(KP198912367, T1H, T1I); + T1J = FMA(KP198912367, T1I, T1H); + T1l = FMA(KP668178637, Ti, Tl); + Tm = FNMS(KP668178637, Tl, Ti); + Tt = To - Tp; + Tq = To + Tp; + T16 = R1[WS(rs, 7)]; + { + E TU, T1F, Tu, T1E, Tr, TV; + TU = R1[WS(rs, 3)]; + T1F = FMA(KP707106781, Tt, Ts); + Tu = FNMS(KP707106781, Tt, Ts); + T1E = FMA(KP707106781, Tq, Tn); + Tr = FNMS(KP707106781, Tq, Tn); + TV = R1[WS(rs, 11)]; + TY = R1[WS(rs, 9)]; + T26 = FNMS(KP198912367, T1E, T1F); + T1G = FMA(KP198912367, T1F, T1E); + T1k = FMA(KP668178637, Tr, Tu); + Tv = FNMS(KP668178637, Tu, Tr); + T17 = TU + TV; + TW = TU - TV; + TZ = R1[WS(rs, 1)]; + T11 = R1[WS(rs, 5)]; + T12 = R1[WS(rs, 13)]; + } + { + E TX, T1a, T10, T19, T13, T1W, T18, T1b, T14, T1X; + T1T = FMS(KP707106781, TW, TT); + TX = FMA(KP707106781, TW, TT); + T1a = FNMS(KP414213562, TY, TZ); + T10 = FMA(KP414213562, TZ, TY); + T19 = FMS(KP414213562, T11, T12); + T13 = FMA(KP414213562, T12, T11); + T1W = FMA(KP707106781, T17, T16); + T18 = FNMS(KP707106781, T17, T16); + T1b = T19 - T1a; + T1U = T1a + T19; + T14 = T10 - T13; + T1X = T10 + T13; + T1s = FMA(KP923879532, T1b, T18); + T1c = FNMS(KP923879532, T1b, T18); + T2e = FMA(KP923879532, T1X, T1W); + T1Y = FNMS(KP923879532, T1X, T1W); + T1r = FNMS(KP923879532, T14, TX); + T15 = FMA(KP923879532, T14, TX); + } + } + { + E Ty, TL, TG, TM, TB, TH; + Ty = R1[0]; + TL = R1[WS(rs, 8)]; + { + E Tz, TA, TD, TE; + Tz = R1[WS(rs, 4)]; + T2d = FMA(KP923879532, T1U, T1T); + T1V = FNMS(KP923879532, T1U, T1T); + TA = R1[WS(rs, 12)]; + TD = R1[WS(rs, 10)]; + TE = R1[WS(rs, 2)]; + TG = R1[WS(rs, 6)]; + TM = Tz + TA; + TB = Tz - TA; + TP = FNMS(KP414213562, TD, TE); + TF = FMA(KP414213562, TE, TD); + TH = R1[WS(rs, 14)]; + } + T1M = FMA(KP707106781, TB, Ty); + TC = FNMS(KP707106781, TB, Ty); + T1P = FMA(KP707106781, TM, TL); + TN = FNMS(KP707106781, TM, TL); + TO = FMS(KP414213562, TG, TH); + TI = FMA(KP414213562, TH, TG); + } + } + } + { + E T1j, T1O, T1p, T1R, T1o, T2E, T2D, T1m, T1D, T2w, T2v, T1K, T2i, T2c, T2h; + E T29, T2t, T2r, T2f, T2j; + { + E T2a, T2b, T1g, TS, T1f, Tx, T2N, T2L, T1d, T1h; + { + E Td, TR, TK, Tw, T2J, T2K; + T1j = FMA(KP923879532, Tc, T5); + Td = FNMS(KP923879532, Tc, T5); + { + E T1N, TQ, T1Q, TJ; + T1N = TP + TO; + TQ = TO - TP; + T1Q = TF + TI; + TJ = TF - TI; + T2a = FMA(KP923879532, T1N, T1M); + T1O = FNMS(KP923879532, T1N, T1M); + T1p = FMA(KP923879532, TQ, TN); + TR = FNMS(KP923879532, TQ, TN); + T2b = FMA(KP923879532, T1Q, T1P); + T1R = FNMS(KP923879532, T1Q, T1P); + T1o = FMA(KP923879532, TJ, TC); + TK = FNMS(KP923879532, TJ, TC); + Tw = Tm - Tv; + T2E = Tv + Tm; + } + T2D = FMA(KP923879532, T2C, T2B); + T2J = FNMS(KP923879532, T2C, T2B); + T2K = T1k + T1l; + T1m = T1k - T1l; + T1g = FMA(KP534511135, TK, TR); + TS = FNMS(KP534511135, TR, TK); + T1f = FNMS(KP831469612, Tw, Td); + Tx = FMA(KP831469612, Tw, Td); + T2N = FNMS(KP831469612, T2K, T2J); + T2L = FMA(KP831469612, T2K, T2J); + T1d = FNMS(KP534511135, T1c, T15); + T1h = FMA(KP534511135, T15, T1c); + } + { + E T25, T28, T2p, T2q; + T1D = FNMS(KP923879532, T1C, T1z); + T25 = FMA(KP923879532, T1C, T1z); + { + E T2O, T1e, T2M, T1i; + T2O = TS + T1d; + T1e = TS - T1d; + T2M = T1g + T1h; + T1i = T1g - T1h; + Ci[WS(csi, 5)] = FNMS(KP881921264, T2O, T2N); + Ci[WS(csi, 10)] = -(FMA(KP881921264, T2O, T2N)); + Cr[WS(csr, 2)] = FMA(KP881921264, T1e, Tx); + Cr[WS(csr, 13)] = FNMS(KP881921264, T1e, Tx); + Ci[WS(csi, 2)] = -(FMA(KP881921264, T2M, T2L)); + Ci[WS(csi, 13)] = FNMS(KP881921264, T2M, T2L); + Cr[WS(csr, 5)] = FMA(KP881921264, T1i, T1f); + Cr[WS(csr, 10)] = FNMS(KP881921264, T1i, T1f); + T28 = T26 - T27; + T2w = T26 + T27; + } + T2v = FNMS(KP923879532, T2o, T2n); + T2p = FMA(KP923879532, T2o, T2n); + T2q = T1G + T1J; + T1K = T1G - T1J; + T2i = FMA(KP098491403, T2a, T2b); + T2c = FNMS(KP098491403, T2b, T2a); + T2h = FNMS(KP980785280, T28, T25); + T29 = FMA(KP980785280, T28, T25); + T2t = FNMS(KP980785280, T2q, T2p); + T2r = FMA(KP980785280, T2q, T2p); + T2f = FMA(KP098491403, T2e, T2d); + T2j = FNMS(KP098491403, T2d, T2e); + } + } + { + E T1x, T1q, T1v, T1n, T2H, T2F, T1t, T1w; + { + E T2u, T2g, T2s, T2k; + T2u = T2f - T2c; + T2g = T2c + T2f; + T2s = T2i + T2j; + T2k = T2i - T2j; + Ci[WS(csi, 7)] = FMA(KP995184726, T2u, T2t); + Ci[WS(csi, 8)] = FMS(KP995184726, T2u, T2t); + Cr[0] = FMA(KP995184726, T2g, T29); + Cr[WS(csr, 15)] = FNMS(KP995184726, T2g, T29); + Ci[0] = -(FMA(KP995184726, T2s, T2r)); + Ci[WS(csi, 15)] = FNMS(KP995184726, T2s, T2r); + Cr[WS(csr, 7)] = FMA(KP995184726, T2k, T2h); + Cr[WS(csr, 8)] = FNMS(KP995184726, T2k, T2h); + } + T1x = FNMS(KP303346683, T1o, T1p); + T1q = FMA(KP303346683, T1p, T1o); + T1v = FNMS(KP831469612, T1m, T1j); + T1n = FMA(KP831469612, T1m, T1j); + T2H = FNMS(KP831469612, T2E, T2D); + T2F = FMA(KP831469612, T2E, T2D); + T1t = FMA(KP303346683, T1s, T1r); + T1w = FNMS(KP303346683, T1r, T1s); + { + E T2I, T1u, T2G, T1y; + T2I = T1q + T1t; + T1u = T1q - T1t; + T2G = T1x + T1w; + T1y = T1w - T1x; + Ci[WS(csi, 6)] = -(FMA(KP956940335, T2I, T2H)); + Ci[WS(csi, 9)] = FNMS(KP956940335, T2I, T2H); + Cr[WS(csr, 1)] = FMA(KP956940335, T1u, T1n); + Cr[WS(csr, 14)] = FNMS(KP956940335, T1u, T1n); + Ci[WS(csi, 1)] = FMA(KP956940335, T2G, T2F); + Ci[WS(csi, 14)] = FMS(KP956940335, T2G, T2F); + Cr[WS(csr, 6)] = FMA(KP956940335, T1y, T1v); + Cr[WS(csr, 9)] = FNMS(KP956940335, T1y, T1v); + } + T23 = FNMS(KP820678790, T1O, T1R); + T1S = FMA(KP820678790, T1R, T1O); + T21 = FNMS(KP980785280, T1K, T1D); + T1L = FMA(KP980785280, T1K, T1D); + T2z = FMA(KP980785280, T2w, T2v); + T2x = FNMS(KP980785280, T2w, T2v); + T1Z = FNMS(KP820678790, T1Y, T1V); + T22 = FMA(KP820678790, T1V, T1Y); + } + } + } + { + E T20, T2A, T24, T2y; + T20 = T1S + T1Z; + T2A = T1Z - T1S; + T24 = T22 - T23; + T2y = T23 + T22; + Ci[WS(csi, 4)] = FMS(KP773010453, T2A, T2z); + Ci[WS(csi, 11)] = FMA(KP773010453, T2A, T2z); + Cr[WS(csr, 3)] = FMA(KP773010453, T20, T1L); + Cr[WS(csr, 12)] = FNMS(KP773010453, T20, T1L); + Ci[WS(csi, 3)] = FMA(KP773010453, T2y, T2x); + Ci[WS(csi, 12)] = FMS(KP773010453, T2y, T2x); + Cr[WS(csr, 4)] = FMA(KP773010453, T24, T21); + Cr[WS(csr, 11)] = FNMS(KP773010453, T24, T21); + } + } + } +} + +static const kr2c_desc desc = { 32, "r2cfII_32", {46, 0, 128, 0}, &GENUS }; + +void X(codelet_r2cfII_32) (planner *p) { + X(kr2c_register) (p, r2cfII_32, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cfII_32 -dft-II -include r2cfII.h */ + +/* + * This function contains 174 FP additions, 82 FP multiplications, + * (or, 138 additions, 46 multiplications, 36 fused multiply/add), + * 62 stack variables, 15 constants, and 64 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP471396736, +0.471396736825997648556387625905254377657460319); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP634393284, +0.634393284163645498215171613225493370675687095); + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP290284677, +0.290284677254462367636192375817395274691476278); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP098017140, +0.098017140329560601994195563888641845861136673); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) { + E T5, T2D, T1z, T2q, Tc, T2C, T1C, T2n, Tm, T1k, T1J, T26, Tv, T1l, T1G; + E T27, T15, T1r, T1Y, T2e, T1c, T1s, T1V, T2d, TK, T1o, T1R, T2b, TR, T1p; + E T1O, T2a; + { + E T1, T2p, T4, T2o, T2, T3; + T1 = R0[0]; + T2p = R0[WS(rs, 8)]; + T2 = R0[WS(rs, 4)]; + T3 = R0[WS(rs, 12)]; + T4 = KP707106781 * (T2 - T3); + T2o = KP707106781 * (T2 + T3); + T5 = T1 + T4; + T2D = T2p - T2o; + T1z = T1 - T4; + T2q = T2o + T2p; + } + { + E T8, T1A, Tb, T1B; + { + E T6, T7, T9, Ta; + T6 = R0[WS(rs, 2)]; + T7 = R0[WS(rs, 10)]; + T8 = FNMS(KP382683432, T7, KP923879532 * T6); + T1A = FMA(KP382683432, T6, KP923879532 * T7); + T9 = R0[WS(rs, 6)]; + Ta = R0[WS(rs, 14)]; + Tb = FNMS(KP923879532, Ta, KP382683432 * T9); + T1B = FMA(KP923879532, T9, KP382683432 * Ta); + } + Tc = T8 + Tb; + T2C = Tb - T8; + T1C = T1A - T1B; + T2n = T1A + T1B; + } + { + E Te, Tk, Th, Tj, Tf, Tg; + Te = R0[WS(rs, 1)]; + Tk = R0[WS(rs, 9)]; + Tf = R0[WS(rs, 5)]; + Tg = R0[WS(rs, 13)]; + Th = KP707106781 * (Tf - Tg); + Tj = KP707106781 * (Tf + Tg); + { + E Ti, Tl, T1H, T1I; + Ti = Te + Th; + Tl = Tj + Tk; + Tm = FNMS(KP195090322, Tl, KP980785280 * Ti); + T1k = FMA(KP195090322, Ti, KP980785280 * Tl); + T1H = Tk - Tj; + T1I = Te - Th; + T1J = FNMS(KP555570233, T1I, KP831469612 * T1H); + T26 = FMA(KP831469612, T1I, KP555570233 * T1H); + } + } + { + E Tq, Tt, Tp, Ts, Tn, To; + Tq = R0[WS(rs, 15)]; + Tt = R0[WS(rs, 7)]; + Tn = R0[WS(rs, 3)]; + To = R0[WS(rs, 11)]; + Tp = KP707106781 * (Tn - To); + Ts = KP707106781 * (Tn + To); + { + E Tr, Tu, T1E, T1F; + Tr = Tp - Tq; + Tu = Ts + Tt; + Tv = FMA(KP980785280, Tr, KP195090322 * Tu); + T1l = FNMS(KP980785280, Tu, KP195090322 * Tr); + T1E = Tt - Ts; + T1F = Tp + Tq; + T1G = FNMS(KP555570233, T1F, KP831469612 * T1E); + T27 = FMA(KP831469612, T1F, KP555570233 * T1E); + } + } + { + E TW, T1a, TV, T19, T10, T16, T13, T17, TT, TU; + TW = R1[WS(rs, 15)]; + T1a = R1[WS(rs, 7)]; + TT = R1[WS(rs, 3)]; + TU = R1[WS(rs, 11)]; + TV = KP707106781 * (TT - TU); + T19 = KP707106781 * (TT + TU); + { + E TY, TZ, T11, T12; + TY = R1[WS(rs, 1)]; + TZ = R1[WS(rs, 9)]; + T10 = FNMS(KP382683432, TZ, KP923879532 * TY); + T16 = FMA(KP382683432, TY, KP923879532 * TZ); + T11 = R1[WS(rs, 5)]; + T12 = R1[WS(rs, 13)]; + T13 = FNMS(KP923879532, T12, KP382683432 * T11); + T17 = FMA(KP923879532, T11, KP382683432 * T12); + } + { + E TX, T14, T1W, T1X; + TX = TV - TW; + T14 = T10 + T13; + T15 = TX + T14; + T1r = TX - T14; + T1W = T13 - T10; + T1X = T1a - T19; + T1Y = T1W - T1X; + T2e = T1W + T1X; + } + { + E T18, T1b, T1T, T1U; + T18 = T16 + T17; + T1b = T19 + T1a; + T1c = T18 + T1b; + T1s = T1b - T18; + T1T = TV + TW; + T1U = T16 - T17; + T1V = T1T + T1U; + T2d = T1U - T1T; + } + } + { + E Ty, TP, TB, TO, TF, TL, TI, TM, Tz, TA; + Ty = R1[0]; + TP = R1[WS(rs, 8)]; + Tz = R1[WS(rs, 4)]; + TA = R1[WS(rs, 12)]; + TB = KP707106781 * (Tz - TA); + TO = KP707106781 * (Tz + TA); + { + E TD, TE, TG, TH; + TD = R1[WS(rs, 2)]; + TE = R1[WS(rs, 10)]; + TF = FNMS(KP382683432, TE, KP923879532 * TD); + TL = FMA(KP382683432, TD, KP923879532 * TE); + TG = R1[WS(rs, 6)]; + TH = R1[WS(rs, 14)]; + TI = FNMS(KP923879532, TH, KP382683432 * TG); + TM = FMA(KP923879532, TG, KP382683432 * TH); + } + { + E TC, TJ, T1P, T1Q; + TC = Ty + TB; + TJ = TF + TI; + TK = TC + TJ; + T1o = TC - TJ; + T1P = TI - TF; + T1Q = TP - TO; + T1R = T1P - T1Q; + T2b = T1P + T1Q; + } + { + E TN, TQ, T1M, T1N; + TN = TL + TM; + TQ = TO + TP; + TR = TN + TQ; + T1p = TQ - TN; + T1M = Ty - TB; + T1N = TL - TM; + T1O = T1M - T1N; + T2a = T1M + T1N; + } + } + { + E Tx, T1f, T2s, T2u, T1e, T2l, T1i, T2t; + { + E Td, Tw, T2m, T2r; + Td = T5 + Tc; + Tw = Tm + Tv; + Tx = Td - Tw; + T1f = Td + Tw; + T2m = T1l - T1k; + T2r = T2n + T2q; + T2s = T2m - T2r; + T2u = T2m + T2r; + } + { + E TS, T1d, T1g, T1h; + TS = FMA(KP098017140, TK, KP995184726 * TR); + T1d = FNMS(KP995184726, T1c, KP098017140 * T15); + T1e = TS + T1d; + T2l = T1d - TS; + T1g = FNMS(KP098017140, TR, KP995184726 * TK); + T1h = FMA(KP995184726, T15, KP098017140 * T1c); + T1i = T1g + T1h; + T2t = T1h - T1g; + } + Cr[WS(csr, 8)] = Tx - T1e; + Ci[WS(csi, 8)] = T2t - T2u; + Cr[WS(csr, 7)] = Tx + T1e; + Ci[WS(csi, 7)] = T2t + T2u; + Cr[WS(csr, 15)] = T1f - T1i; + Ci[WS(csi, 15)] = T2l - T2s; + Cr[0] = T1f + T1i; + Ci[0] = T2l + T2s; + } + { + E T29, T2h, T2M, T2O, T2g, T2J, T2k, T2N; + { + E T25, T28, T2K, T2L; + T25 = T1z + T1C; + T28 = T26 - T27; + T29 = T25 + T28; + T2h = T25 - T28; + T2K = T1J + T1G; + T2L = T2C + T2D; + T2M = T2K - T2L; + T2O = T2K + T2L; + } + { + E T2c, T2f, T2i, T2j; + T2c = FMA(KP956940335, T2a, KP290284677 * T2b); + T2f = FNMS(KP290284677, T2e, KP956940335 * T2d); + T2g = T2c + T2f; + T2J = T2f - T2c; + T2i = FMA(KP290284677, T2d, KP956940335 * T2e); + T2j = FNMS(KP290284677, T2a, KP956940335 * T2b); + T2k = T2i - T2j; + T2N = T2j + T2i; + } + Cr[WS(csr, 14)] = T29 - T2g; + Ci[WS(csi, 14)] = T2N - T2O; + Cr[WS(csr, 1)] = T29 + T2g; + Ci[WS(csi, 1)] = T2N + T2O; + Cr[WS(csr, 9)] = T2h - T2k; + Ci[WS(csi, 9)] = T2J - T2M; + Cr[WS(csr, 6)] = T2h + T2k; + Ci[WS(csi, 6)] = T2J + T2M; + } + { + E T1n, T1v, T2y, T2A, T1u, T2v, T1y, T2z; + { + E T1j, T1m, T2w, T2x; + T1j = T5 - Tc; + T1m = T1k + T1l; + T1n = T1j + T1m; + T1v = T1j - T1m; + T2w = Tv - Tm; + T2x = T2q - T2n; + T2y = T2w - T2x; + T2A = T2w + T2x; + } + { + E T1q, T1t, T1w, T1x; + T1q = FMA(KP773010453, T1o, KP634393284 * T1p); + T1t = FNMS(KP634393284, T1s, KP773010453 * T1r); + T1u = T1q + T1t; + T2v = T1t - T1q; + T1w = FMA(KP634393284, T1r, KP773010453 * T1s); + T1x = FNMS(KP634393284, T1o, KP773010453 * T1p); + T1y = T1w - T1x; + T2z = T1x + T1w; + } + Cr[WS(csr, 12)] = T1n - T1u; + Ci[WS(csi, 12)] = T2z - T2A; + Cr[WS(csr, 3)] = T1n + T1u; + Ci[WS(csi, 3)] = T2z + T2A; + Cr[WS(csr, 11)] = T1v - T1y; + Ci[WS(csi, 11)] = T2v - T2y; + Cr[WS(csr, 4)] = T1v + T1y; + Ci[WS(csi, 4)] = T2v + T2y; + } + { + E T1L, T21, T2G, T2I, T20, T2H, T24, T2B; + { + E T1D, T1K, T2E, T2F; + T1D = T1z - T1C; + T1K = T1G - T1J; + T1L = T1D + T1K; + T21 = T1D - T1K; + T2E = T2C - T2D; + T2F = T26 + T27; + T2G = T2E - T2F; + T2I = T2F + T2E; + } + { + E T1S, T1Z, T22, T23; + T1S = FMA(KP881921264, T1O, KP471396736 * T1R); + T1Z = FMA(KP881921264, T1V, KP471396736 * T1Y); + T20 = T1S - T1Z; + T2H = T1S + T1Z; + T22 = FNMS(KP471396736, T1V, KP881921264 * T1Y); + T23 = FNMS(KP471396736, T1O, KP881921264 * T1R); + T24 = T22 - T23; + T2B = T23 + T22; + } + Cr[WS(csr, 13)] = T1L - T20; + Ci[WS(csi, 13)] = T2B - T2G; + Cr[WS(csr, 2)] = T1L + T20; + Ci[WS(csi, 2)] = T2B + T2G; + Cr[WS(csr, 10)] = T21 - T24; + Ci[WS(csi, 10)] = T2I - T2H; + Cr[WS(csr, 5)] = T21 + T24; + Ci[WS(csi, 5)] = -(T2H + T2I); + } + } + } +} + +static const kr2c_desc desc = { 32, "r2cfII_32", {138, 46, 36, 0}, &GENUS }; + +void X(codelet_r2cfII_32) (planner *p) { + X(kr2c_register) (p, r2cfII_32, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:16 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 4 -name r2cfII_4 -dft-II -include r2cfII.h */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 2 additions, 0 multiplications, 4 fused multiply/add), + * 8 stack variables, 1 constants, and 8 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_4(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(16, csr), MAKE_VOLATILE_STRIDE(16, csi)) { + E T1, T5, T2, T3, T4, T6; + T1 = R0[0]; + T5 = R0[WS(rs, 1)]; + T2 = R1[0]; + T3 = R1[WS(rs, 1)]; + T4 = T2 - T3; + T6 = T2 + T3; + Ci[0] = -(FMA(KP707106781, T6, T5)); + Ci[WS(csi, 1)] = FNMS(KP707106781, T6, T5); + Cr[0] = FMA(KP707106781, T4, T1); + Cr[WS(csr, 1)] = FNMS(KP707106781, T4, T1); + } + } +} + +static const kr2c_desc desc = { 4, "r2cfII_4", {2, 0, 4, 0}, &GENUS }; + +void X(codelet_r2cfII_4) (planner *p) { + X(kr2c_register) (p, r2cfII_4, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 4 -name r2cfII_4 -dft-II -include r2cfII.h */ + +/* + * This function contains 6 FP additions, 2 FP multiplications, + * (or, 6 additions, 2 multiplications, 0 fused multiply/add), + * 8 stack variables, 1 constants, and 8 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_4(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(16, csr), MAKE_VOLATILE_STRIDE(16, csi)) { + E T1, T6, T4, T5, T2, T3; + T1 = R0[0]; + T6 = R0[WS(rs, 1)]; + T2 = R1[0]; + T3 = R1[WS(rs, 1)]; + T4 = KP707106781 * (T2 - T3); + T5 = KP707106781 * (T2 + T3); + Cr[WS(csr, 1)] = T1 - T4; + Ci[WS(csi, 1)] = T6 - T5; + Cr[0] = T1 + T4; + Ci[0] = -(T5 + T6); + } + } +} + +static const kr2c_desc desc = { 4, "r2cfII_4", {6, 2, 0, 0}, &GENUS }; + +void X(codelet_r2cfII_4) (planner *p) { + X(kr2c_register) (p, r2cfII_4, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:16 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 5 -name r2cfII_5 -dft-II -include r2cfII.h */ + +/* + * This function contains 12 FP additions, 7 FP multiplications, + * (or, 7 additions, 2 multiplications, 5 fused multiply/add), + * 17 stack variables, 4 constants, and 10 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_5(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(20, rs), MAKE_VOLATILE_STRIDE(20, csr), MAKE_VOLATILE_STRIDE(20, csi)) { + E T1, T2, T3, T5, T6; + T1 = R0[0]; + T2 = R0[WS(rs, 1)]; + T3 = R1[WS(rs, 1)]; + T5 = R0[WS(rs, 2)]; + T6 = R1[0]; + { + E Tb, T4, Tc, T7, Ta, T8, T9; + Tb = T2 + T3; + T4 = T2 - T3; + Tc = T5 + T6; + T7 = T5 - T6; + Ci[0] = -(KP951056516 * (FMA(KP618033988, Tc, Tb))); + Ci[WS(csi, 1)] = -(KP951056516 * (FNMS(KP618033988, Tb, Tc))); + Ta = T4 - T7; + T8 = T4 + T7; + T9 = FNMS(KP250000000, T8, T1); + Cr[WS(csr, 2)] = T1 + T8; + Cr[WS(csr, 1)] = FNMS(KP559016994, Ta, T9); + Cr[0] = FMA(KP559016994, Ta, T9); + } + } + } +} + +static const kr2c_desc desc = { 5, "r2cfII_5", {7, 2, 5, 0}, &GENUS }; + +void X(codelet_r2cfII_5) (planner *p) { + X(kr2c_register) (p, r2cfII_5, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 5 -name r2cfII_5 -dft-II -include r2cfII.h */ + +/* + * This function contains 12 FP additions, 6 FP multiplications, + * (or, 9 additions, 3 multiplications, 3 fused multiply/add), + * 17 stack variables, 4 constants, and 10 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_5(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(20, rs), MAKE_VOLATILE_STRIDE(20, csr), MAKE_VOLATILE_STRIDE(20, csi)) { + E T8, T3, T6, T9, Tc, Tb, T7, Ta; + T8 = R0[0]; + { + E T1, T2, T4, T5; + T1 = R0[WS(rs, 1)]; + T2 = R1[WS(rs, 1)]; + T3 = T1 - T2; + T4 = R0[WS(rs, 2)]; + T5 = R1[0]; + T6 = T4 - T5; + T9 = T3 + T6; + Tc = T4 + T5; + Tb = T1 + T2; + } + Cr[WS(csr, 2)] = T8 + T9; + Ci[WS(csi, 1)] = FNMS(KP951056516, Tc, KP587785252 * Tb); + Ci[0] = -(FMA(KP951056516, Tb, KP587785252 * Tc)); + T7 = KP559016994 * (T3 - T6); + Ta = FNMS(KP250000000, T9, T8); + Cr[0] = T7 + Ta; + Cr[WS(csr, 1)] = Ta - T7; + } + } +} + +static const kr2c_desc desc = { 5, "r2cfII_5", {9, 3, 3, 0}, &GENUS }; + +void X(codelet_r2cfII_5) (planner *p) { + X(kr2c_register) (p, r2cfII_5, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:17 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 6 -name r2cfII_6 -dft-II -include r2cfII.h */ + +/* + * This function contains 13 FP additions, 6 FP multiplications, + * (or, 7 additions, 0 multiplications, 6 fused multiply/add), + * 15 stack variables, 2 constants, and 12 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_6(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(24, rs), MAKE_VOLATILE_STRIDE(24, csr), MAKE_VOLATILE_STRIDE(24, csi)) { + E T1, T9, T2, T3, T6, T7; + T1 = R0[0]; + T9 = R1[WS(rs, 1)]; + T2 = R0[WS(rs, 2)]; + T3 = R0[WS(rs, 1)]; + T6 = R1[WS(rs, 2)]; + T7 = R1[0]; + { + E Tc, T4, Ta, T8, T5, Tb; + Cr[WS(csr, 1)] = T1 + T2 - T3; + Tc = T2 + T3; + T4 = T3 - T2; + Ta = T6 + T7; + T8 = T6 - T7; + T5 = FMA(KP500000000, T4, T1); + Tb = FMA(KP500000000, Ta, T9); + Ci[WS(csi, 1)] = T9 - Ta; + Cr[WS(csr, 2)] = FMA(KP866025403, T8, T5); + Cr[0] = FNMS(KP866025403, T8, T5); + Ci[WS(csi, 2)] = FMS(KP866025403, Tc, Tb); + Ci[0] = -(FMA(KP866025403, Tc, Tb)); + } + } + } +} + +static const kr2c_desc desc = { 6, "r2cfII_6", {7, 0, 6, 0}, &GENUS }; + +void X(codelet_r2cfII_6) (planner *p) { + X(kr2c_register) (p, r2cfII_6, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 6 -name r2cfII_6 -dft-II -include r2cfII.h */ + +/* + * This function contains 13 FP additions, 4 FP multiplications, + * (or, 11 additions, 2 multiplications, 2 fused multiply/add), + * 14 stack variables, 2 constants, and 12 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_6(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(24, rs), MAKE_VOLATILE_STRIDE(24, csr), MAKE_VOLATILE_STRIDE(24, csi)) { + E Ta, T7, T9, T1, T3, T2, T8, T4, T5, T6, Tb; + Ta = R1[WS(rs, 1)]; + T5 = R1[WS(rs, 2)]; + T6 = R1[0]; + T7 = KP866025403 * (T5 - T6); + T9 = T5 + T6; + T1 = R0[0]; + T3 = R0[WS(rs, 1)]; + T2 = R0[WS(rs, 2)]; + T8 = KP866025403 * (T2 + T3); + T4 = FMA(KP500000000, T3 - T2, T1); + Cr[0] = T4 - T7; + Cr[WS(csr, 2)] = T4 + T7; + Ci[WS(csi, 1)] = Ta - T9; + Cr[WS(csr, 1)] = T1 + T2 - T3; + Tb = FMA(KP500000000, T9, Ta); + Ci[0] = -(T8 + Tb); + Ci[WS(csi, 2)] = T8 - Tb; + } + } +} + +static const kr2c_desc desc = { 6, "r2cfII_6", {11, 2, 2, 0}, &GENUS }; + +void X(codelet_r2cfII_6) (planner *p) { + X(kr2c_register) (p, r2cfII_6, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1535 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:20 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 64 -name r2cfII_64 -dft-II -include r2cfII.h */ + +/* + * This function contains 434 FP additions, 320 FP multiplications, + * (or, 114 additions, 0 multiplications, 320 fused multiply/add), + * 158 stack variables, 31 constants, and 128 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP941544065, +0.941544065183020778412509402599502357185589796); + DK(KP903989293, +0.903989293123443331586200297230537048710132025); + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP472964775, +0.472964775891319928124438237972992463904131113); + DK(KP357805721, +0.357805721314524104672487743774474392487532769); + DK(KP820678790, +0.820678790828660330972281985331011598767386482); + DK(KP989176509, +0.989176509964780973451673738016243063983689533); + DK(KP803207531, +0.803207531480644909806676512963141923879569427); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP741650546, +0.741650546272035369581266691172079863842265220); + DK(KP148335987, +0.148335987538347428753676511486911367000625355); + DK(KP303346683, +0.303346683607342391675883946941299872384187453); + DK(KP998795456, +0.998795456205172392714771604759100694443203615); + DK(KP740951125, +0.740951125354959091175616897495162729728955309); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP906347169, +0.906347169019147157946142717268914412664134293); + DK(KP049126849, +0.049126849769467254105343321271313617079695752); + DK(KP098491403, +0.098491403357164253077197521291327432293052451); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP970031253, +0.970031253194543992603984207286100251456865962); + DK(KP857728610, +0.857728610000272069902269984284770137042490799); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP599376933, +0.599376933681923766271389869014404232837890546); + DK(KP250486960, +0.250486960191305461595702160124721208578685568); + DK(KP534511135, +0.534511135950791641089685961295362908582039528); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) { + E T5b, T6q, T6p, T5e; + { + E T5h, T3Z, T35, Tm, T5g, T3W, T34, Tv, T5f, T3T, T6N, T6z, T6j, T65, T33; + E Td, T5z, T4D, T3q, T2C, T5C, T4O, T3n, T2b, T5k, T4b, T3c, TR, T5l, T4e; + E T3b, TK, T5n, T44, T39, T1c, T5o, T47, T38, T15, T5s, T4k, T3j, T1T, T5v; + E T4v, T3g, T1s, T1t, T1y, T5D, T4K, T5A, T4R, T3o, T2F, T3r, T2u, T1C, T1H; + E T1D, T1z, T1w, T1E; + { + E T2A, T26, T4B, T23, T4M, T2y, T2z, T29; + { + E Te, Tj, Tn, Ts, To, Tk, Th, Tp, Tf, Tg; + Te = R0[WS(rs, 14)]; + Tj = R0[WS(rs, 30)]; + Tf = R0[WS(rs, 6)]; + Tg = R0[WS(rs, 22)]; + Tn = R0[WS(rs, 18)]; + Ts = R0[WS(rs, 2)]; + To = R0[WS(rs, 10)]; + Tk = Tg - Tf; + Th = Tf + Tg; + Tp = R0[WS(rs, 26)]; + { + E T3Q, T8, T3P, T5, T6x, T63, T3R, Tb; + { + E T1, T61, T9, T62, T4, Ta; + { + E T3V, Tu, T3U, Tr, T3Y, Tl; + T1 = R0[0]; + T3Y = FMA(KP707106781, Tk, Tj); + Tl = FNMS(KP707106781, Tk, Tj); + { + E T3X, Ti, Tt, Tq; + T3X = FMA(KP707106781, Th, Te); + Ti = FNMS(KP707106781, Th, Te); + Tt = To - Tp; + Tq = To + Tp; + T5h = FNMS(KP198912367, T3X, T3Y); + T3Z = FMA(KP198912367, T3Y, T3X); + T35 = FMA(KP668178637, Ti, Tl); + Tm = FNMS(KP668178637, Tl, Ti); + T3V = FMA(KP707106781, Tt, Ts); + Tu = FNMS(KP707106781, Tt, Ts); + T3U = FMA(KP707106781, Tq, Tn); + Tr = FNMS(KP707106781, Tq, Tn); + T61 = R0[WS(rs, 16)]; + } + { + E T2, T3, T6, T7; + T2 = R0[WS(rs, 8)]; + T5g = FNMS(KP198912367, T3U, T3V); + T3W = FMA(KP198912367, T3V, T3U); + T34 = FMA(KP668178637, Tr, Tu); + Tv = FNMS(KP668178637, Tu, Tr); + T3 = R0[WS(rs, 24)]; + T6 = R0[WS(rs, 20)]; + T7 = R0[WS(rs, 4)]; + T9 = R0[WS(rs, 12)]; + T62 = T2 + T3; + T4 = T2 - T3; + T3Q = FNMS(KP414213562, T6, T7); + T8 = FMA(KP414213562, T7, T6); + Ta = R0[WS(rs, 28)]; + } + } + T3P = FMA(KP707106781, T4, T1); + T5 = FNMS(KP707106781, T4, T1); + T6x = FNMS(KP707106781, T62, T61); + T63 = FMA(KP707106781, T62, T61); + T3R = FMS(KP414213562, T9, Ta); + Tb = FMA(KP414213562, Ta, T9); + } + { + E T1Z, T2w, T27, T2x, T22, T28; + T1Z = R1[WS(rs, 31)]; + { + E T3S, T6y, T64, Tc; + T3S = T3Q + T3R; + T6y = T3R - T3Q; + T64 = T8 + Tb; + Tc = T8 - Tb; + T5f = FMA(KP923879532, T3S, T3P); + T3T = FNMS(KP923879532, T3S, T3P); + T6N = FNMS(KP923879532, T6y, T6x); + T6z = FMA(KP923879532, T6y, T6x); + T6j = FNMS(KP923879532, T64, T63); + T65 = FMA(KP923879532, T64, T63); + T33 = FMA(KP923879532, Tc, T5); + Td = FNMS(KP923879532, Tc, T5); + T2w = R1[WS(rs, 15)]; + } + { + E T20, T21, T24, T25; + T20 = R1[WS(rs, 7)]; + T21 = R1[WS(rs, 23)]; + T24 = R1[WS(rs, 19)]; + T25 = R1[WS(rs, 3)]; + T27 = R1[WS(rs, 11)]; + T2x = T20 + T21; + T22 = T20 - T21; + T2A = FNMS(KP414213562, T24, T25); + T26 = FMA(KP414213562, T25, T24); + T28 = R1[WS(rs, 27)]; + } + T4B = FMS(KP707106781, T22, T1Z); + T23 = FMA(KP707106781, T22, T1Z); + T4M = FMA(KP707106781, T2x, T2w); + T2y = FNMS(KP707106781, T2x, T2w); + T2z = FMS(KP414213562, T27, T28); + T29 = FMA(KP414213562, T28, T27); + } + } + } + { + E T1a, T10, T42, TX, T45, T18, T19, T13; + { + E TP, TF, T49, TC, T4c, TN, TO, TI; + { + E Ty, TL, TG, TM, TB, TH; + Ty = R0[WS(rs, 17)]; + { + E T4C, T2B, T4N, T2a; + T4C = T2A + T2z; + T2B = T2z - T2A; + T4N = T26 + T29; + T2a = T26 - T29; + T5z = FMA(KP923879532, T4C, T4B); + T4D = FNMS(KP923879532, T4C, T4B); + T3q = FMA(KP923879532, T2B, T2y); + T2C = FNMS(KP923879532, T2B, T2y); + T5C = FMA(KP923879532, T4N, T4M); + T4O = FNMS(KP923879532, T4N, T4M); + T3n = FNMS(KP923879532, T2a, T23); + T2b = FMA(KP923879532, T2a, T23); + TL = R0[WS(rs, 1)]; + } + { + E Tz, TA, TD, TE; + Tz = R0[WS(rs, 9)]; + TA = R0[WS(rs, 25)]; + TD = R0[WS(rs, 29)]; + TE = R0[WS(rs, 13)]; + TG = R0[WS(rs, 5)]; + TM = Tz - TA; + TB = Tz + TA; + TP = FMA(KP414213562, TD, TE); + TF = FMS(KP414213562, TE, TD); + TH = R0[WS(rs, 21)]; + } + T49 = FMA(KP707106781, TB, Ty); + TC = FNMS(KP707106781, TB, Ty); + T4c = FMA(KP707106781, TM, TL); + TN = FNMS(KP707106781, TM, TL); + TO = FMA(KP414213562, TG, TH); + TI = FNMS(KP414213562, TH, TG); + } + { + E TT, T16, T11, T17, TW, T12; + TT = R0[WS(rs, 15)]; + { + E T4a, TQ, T4d, TJ; + T4a = TO + TP; + TQ = TO - TP; + T4d = TI + TF; + TJ = TF - TI; + T5k = FMA(KP923879532, T4a, T49); + T4b = FNMS(KP923879532, T4a, T49); + T3c = FMA(KP923879532, TQ, TN); + TR = FNMS(KP923879532, TQ, TN); + T5l = FMA(KP923879532, T4d, T4c); + T4e = FNMS(KP923879532, T4d, T4c); + T3b = FMA(KP923879532, TJ, TC); + TK = FNMS(KP923879532, TJ, TC); + T16 = R0[WS(rs, 31)]; + } + { + E TU, TV, TY, TZ; + TU = R0[WS(rs, 7)]; + TV = R0[WS(rs, 23)]; + TY = R0[WS(rs, 3)]; + TZ = R0[WS(rs, 19)]; + T11 = R0[WS(rs, 27)]; + T17 = TV - TU; + TW = TU + TV; + T1a = FMA(KP414213562, TY, TZ); + T10 = FMS(KP414213562, TZ, TY); + T12 = R0[WS(rs, 11)]; + } + T42 = FMA(KP707106781, TW, TT); + TX = FNMS(KP707106781, TW, TT); + T45 = FMA(KP707106781, T17, T16); + T18 = FNMS(KP707106781, T17, T16); + T19 = FMA(KP414213562, T11, T12); + T13 = FNMS(KP414213562, T12, T11); + } + } + { + E T1R, T1n, T4i, T1k, T4t, T1P, T1Q, T1q; + { + E T1g, T1N, T1o, T1O, T1j, T1p; + T1g = R1[0]; + { + E T43, T1b, T46, T14; + T43 = T1a + T19; + T1b = T19 - T1a; + T46 = T10 + T13; + T14 = T10 - T13; + T5n = FMA(KP923879532, T43, T42); + T44 = FNMS(KP923879532, T43, T42); + T39 = FMA(KP923879532, T1b, T18); + T1c = FNMS(KP923879532, T1b, T18); + T5o = FMA(KP923879532, T46, T45); + T47 = FNMS(KP923879532, T46, T45); + T38 = FMA(KP923879532, T14, TX); + T15 = FNMS(KP923879532, T14, TX); + T1N = R1[WS(rs, 16)]; + } + { + E T1h, T1i, T1l, T1m; + T1h = R1[WS(rs, 8)]; + T1i = R1[WS(rs, 24)]; + T1l = R1[WS(rs, 20)]; + T1m = R1[WS(rs, 4)]; + T1o = R1[WS(rs, 12)]; + T1O = T1h + T1i; + T1j = T1h - T1i; + T1R = FNMS(KP414213562, T1l, T1m); + T1n = FMA(KP414213562, T1m, T1l); + T1p = R1[WS(rs, 28)]; + } + T4i = FMA(KP707106781, T1j, T1g); + T1k = FNMS(KP707106781, T1j, T1g); + T4t = FMA(KP707106781, T1O, T1N); + T1P = FNMS(KP707106781, T1O, T1N); + T1Q = FMS(KP414213562, T1o, T1p); + T1q = FMA(KP414213562, T1p, T1o); + } + { + E T2c, T2h, T2l, T2q, T2m, T2i, T2f, T2n, T2d, T2e; + T2c = R1[WS(rs, 13)]; + { + E T4j, T1S, T4u, T1r; + T4j = T1R + T1Q; + T1S = T1Q - T1R; + T4u = T1n + T1q; + T1r = T1n - T1q; + T5s = FMA(KP923879532, T4j, T4i); + T4k = FNMS(KP923879532, T4j, T4i); + T3j = FMA(KP923879532, T1S, T1P); + T1T = FNMS(KP923879532, T1S, T1P); + T5v = FMA(KP923879532, T4u, T4t); + T4v = FNMS(KP923879532, T4u, T4t); + T3g = FMA(KP923879532, T1r, T1k); + T1s = FNMS(KP923879532, T1r, T1k); + T2h = R1[WS(rs, 29)]; + T2d = R1[WS(rs, 5)]; + T2e = R1[WS(rs, 21)]; + } + T2l = R1[WS(rs, 17)]; + T2q = R1[WS(rs, 1)]; + T2m = R1[WS(rs, 9)]; + T2i = T2d - T2e; + T2f = T2d + T2e; + T2n = R1[WS(rs, 25)]; + { + E T1u, T1v, T2j, T4I; + T1t = R1[WS(rs, 14)]; + T2j = FMA(KP707106781, T2i, T2h); + T4I = FMS(KP707106781, T2i, T2h); + { + E T4H, T2g, T2r, T2o; + T4H = FMA(KP707106781, T2f, T2c); + T2g = FNMS(KP707106781, T2f, T2c); + T2r = T2m - T2n; + T2o = T2m + T2n; + { + E T4J, T4P, T2E, T2k; + T4J = FNMS(KP198912367, T4I, T4H); + T4P = FMA(KP198912367, T4H, T4I); + T2E = FMA(KP668178637, T2g, T2j); + T2k = FNMS(KP668178637, T2j, T2g); + { + E T2s, T4F, T4E, T2p; + T2s = FNMS(KP707106781, T2r, T2q); + T4F = FMA(KP707106781, T2r, T2q); + T4E = FMA(KP707106781, T2o, T2l); + T2p = FNMS(KP707106781, T2o, T2l); + T1y = R1[WS(rs, 30)]; + T1u = R1[WS(rs, 6)]; + { + E T4G, T4Q, T2D, T2t; + T4G = FMA(KP198912367, T4F, T4E); + T4Q = FNMS(KP198912367, T4E, T4F); + T2D = FMA(KP668178637, T2p, T2s); + T2t = FNMS(KP668178637, T2s, T2p); + T5D = T4G + T4J; + T4K = T4G - T4J; + T5A = T4Q + T4P; + T4R = T4P - T4Q; + T3o = T2D - T2E; + T2F = T2D + T2E; + T3r = T2t + T2k; + T2u = T2k - T2t; + T1v = R1[WS(rs, 22)]; + } + } + } + } + T1C = R1[WS(rs, 18)]; + T1H = R1[WS(rs, 2)]; + T1D = R1[WS(rs, 10)]; + T1z = T1u - T1v; + T1w = T1u + T1v; + T1E = R1[WS(rs, 26)]; + } + } + } + } + } + { + E T6A, T4r, T4y, T3h, T3k, T36, T6k, T40, T5X, T6c, T6b, T60; + { + E T5w, T5t, T2Z, T6U, T6T, T32; + { + E Tx, T2N, T2v, T6V, T6P, T6Q, T1e, T2G, T31, T2X, T2L, T1Y, T6W, T2Q, T30; + E T2U; + { + E T1W, T1L, T2O, T2P, T2V, T2W, T6O, TS, T1d; + { + E T4q, T4w, T1V, T1B, T1J, T4m, T4l, T1G, Tw, T1A, T4p; + T6A = Tv + Tm; + Tw = Tm - Tv; + T1A = FMA(KP707106781, T1z, T1y); + T4p = FMS(KP707106781, T1z, T1y); + { + E T4o, T1x, T1I, T1F; + T4o = FMA(KP707106781, T1w, T1t); + T1x = FNMS(KP707106781, T1w, T1t); + T1I = T1D - T1E; + T1F = T1D + T1E; + T4q = FNMS(KP198912367, T4p, T4o); + T4w = FMA(KP198912367, T4o, T4p); + T1V = FMA(KP668178637, T1x, T1A); + T1B = FNMS(KP668178637, T1A, T1x); + T1J = FNMS(KP707106781, T1I, T1H); + T4m = FMA(KP707106781, T1I, T1H); + T4l = FMA(KP707106781, T1F, T1C); + T1G = FNMS(KP707106781, T1F, T1C); + Tx = FNMS(KP831469612, Tw, Td); + T2N = FMA(KP831469612, Tw, Td); + } + { + E T4n, T4x, T1U, T1K; + T4n = FMA(KP198912367, T4m, T4l); + T4x = FNMS(KP198912367, T4l, T4m); + T1U = FMA(KP668178637, T1G, T1J); + T1K = FNMS(KP668178637, T1J, T1G); + T5w = T4n + T4q; + T4r = T4n - T4q; + T5t = T4x + T4w; + T4y = T4w - T4x; + T3h = T1U - T1V; + T1W = T1U + T1V; + T3k = T1K + T1B; + T1L = T1B - T1K; + T6O = T34 + T35; + T36 = T34 - T35; + } + } + T2O = FNMS(KP534511135, TK, TR); + TS = FMA(KP534511135, TR, TK); + T1d = FMA(KP534511135, T1c, T15); + T2P = FNMS(KP534511135, T15, T1c); + T2v = FMA(KP831469612, T2u, T2b); + T2V = FNMS(KP831469612, T2u, T2b); + T6V = FNMS(KP831469612, T6O, T6N); + T6P = FMA(KP831469612, T6O, T6N); + T6Q = TS + T1d; + T1e = TS - T1d; + T2W = FMA(KP831469612, T2F, T2C); + T2G = FNMS(KP831469612, T2F, T2C); + { + E T2S, T2T, T1M, T1X; + T2S = FMA(KP831469612, T1L, T1s); + T1M = FNMS(KP831469612, T1L, T1s); + T1X = FNMS(KP831469612, T1W, T1T); + T2T = FMA(KP831469612, T1W, T1T); + T31 = FMA(KP250486960, T2V, T2W); + T2X = FNMS(KP250486960, T2W, T2V); + T2L = FNMS(KP599376933, T1M, T1X); + T1Y = FMA(KP599376933, T1X, T1M); + T6W = T2O + T2P; + T2Q = T2O - T2P; + T30 = FMA(KP250486960, T2S, T2T); + T2U = FNMS(KP250486960, T2T, T2S); + } + } + { + E T2J, T1f, T6X, T6Z, T2K, T2H; + T2J = FNMS(KP881921264, T1e, Tx); + T1f = FMA(KP881921264, T1e, Tx); + T6X = FNMS(KP881921264, T6W, T6V); + T6Z = FMA(KP881921264, T6W, T6V); + T2K = FNMS(KP599376933, T2v, T2G); + T2H = FMA(KP599376933, T2G, T2v); + { + E T2R, T2Y, T6R, T6S; + T2Z = FNMS(KP881921264, T2Q, T2N); + T2R = FMA(KP881921264, T2Q, T2N); + { + E T2M, T6Y, T70, T2I; + T2M = T2K - T2L; + T6Y = T2L + T2K; + T70 = T1Y + T2H; + T2I = T1Y - T2H; + Cr[WS(csr, 10)] = FMA(KP857728610, T2M, T2J); + Cr[WS(csr, 21)] = FNMS(KP857728610, T2M, T2J); + Ci[WS(csi, 5)] = FMA(KP857728610, T6Y, T6X); + Ci[WS(csi, 26)] = FMS(KP857728610, T6Y, T6X); + Ci[WS(csi, 21)] = FNMS(KP857728610, T70, T6Z); + Ci[WS(csi, 10)] = -(FMA(KP857728610, T70, T6Z)); + Cr[WS(csr, 5)] = FMA(KP857728610, T2I, T1f); + Cr[WS(csr, 26)] = FNMS(KP857728610, T2I, T1f); + T2Y = T2U - T2X; + T6U = T2U + T2X; + } + T6T = FNMS(KP881921264, T6Q, T6P); + T6R = FMA(KP881921264, T6Q, T6P); + T6S = T30 + T31; + T32 = T30 - T31; + Cr[WS(csr, 2)] = FMA(KP970031253, T2Y, T2R); + Cr[WS(csr, 29)] = FNMS(KP970031253, T2Y, T2R); + Ci[WS(csi, 29)] = FNMS(KP970031253, T6S, T6R); + Ci[WS(csi, 2)] = -(FMA(KP970031253, T6S, T6R)); + } + } + } + { + E T5j, T5L, T5B, T6d, T67, T68, T5q, T5E, T5Z, T5V, T5J, T5y, T6e, T5O, T5Y; + E T5S; + { + E T5M, T5N, T5T, T5U; + { + E T66, T5i, T5m, T5p; + T6k = T5g + T5h; + T5i = T5g - T5h; + Cr[WS(csr, 13)] = FMA(KP970031253, T32, T2Z); + Cr[WS(csr, 18)] = FNMS(KP970031253, T32, T2Z); + Ci[WS(csi, 13)] = FNMS(KP970031253, T6U, T6T); + Ci[WS(csi, 18)] = -(FMA(KP970031253, T6U, T6T)); + T5j = FNMS(KP980785280, T5i, T5f); + T5L = FMA(KP980785280, T5i, T5f); + T66 = T3W + T3Z; + T40 = T3W - T3Z; + T5M = FNMS(KP098491403, T5k, T5l); + T5m = FMA(KP098491403, T5l, T5k); + T5p = FMA(KP098491403, T5o, T5n); + T5N = FNMS(KP098491403, T5n, T5o); + T5B = FNMS(KP980785280, T5A, T5z); + T5T = FMA(KP980785280, T5A, T5z); + T6d = FNMS(KP980785280, T66, T65); + T67 = FMA(KP980785280, T66, T65); + T68 = T5m + T5p; + T5q = T5m - T5p; + T5U = FMA(KP980785280, T5D, T5C); + T5E = FNMS(KP980785280, T5D, T5C); + } + { + E T5Q, T5R, T5u, T5x; + T5Q = FMA(KP980785280, T5t, T5s); + T5u = FNMS(KP980785280, T5t, T5s); + T5x = FNMS(KP980785280, T5w, T5v); + T5R = FMA(KP980785280, T5w, T5v); + T5Z = FNMS(KP049126849, T5T, T5U); + T5V = FMA(KP049126849, T5U, T5T); + T5J = FNMS(KP906347169, T5u, T5x); + T5y = FMA(KP906347169, T5x, T5u); + T6e = T5M + T5N; + T5O = T5M - T5N; + T5Y = FMA(KP049126849, T5Q, T5R); + T5S = FNMS(KP049126849, T5R, T5Q); + } + } + { + E T5H, T5r, T6f, T6h, T5I, T5F; + T5H = FNMS(KP995184726, T5q, T5j); + T5r = FMA(KP995184726, T5q, T5j); + T6f = FNMS(KP995184726, T6e, T6d); + T6h = FMA(KP995184726, T6e, T6d); + T5I = FMA(KP906347169, T5B, T5E); + T5F = FNMS(KP906347169, T5E, T5B); + { + E T5P, T5W, T69, T6a; + T5X = FNMS(KP995184726, T5O, T5L); + T5P = FMA(KP995184726, T5O, T5L); + { + E T5K, T6g, T6i, T5G; + T5K = T5I - T5J; + T6g = T5J + T5I; + T6i = T5F - T5y; + T5G = T5y + T5F; + Cr[WS(csr, 8)] = FMA(KP740951125, T5K, T5H); + Cr[WS(csr, 23)] = FNMS(KP740951125, T5K, T5H); + Ci[WS(csi, 7)] = FMA(KP740951125, T6g, T6f); + Ci[WS(csi, 24)] = FMS(KP740951125, T6g, T6f); + Ci[WS(csi, 23)] = FMA(KP740951125, T6i, T6h); + Ci[WS(csi, 8)] = FMS(KP740951125, T6i, T6h); + Cr[WS(csr, 7)] = FMA(KP740951125, T5G, T5r); + Cr[WS(csr, 24)] = FNMS(KP740951125, T5G, T5r); + T5W = T5S + T5V; + T6c = T5V - T5S; + } + T6b = FNMS(KP995184726, T68, T67); + T69 = FMA(KP995184726, T68, T67); + T6a = T5Y + T5Z; + T60 = T5Y - T5Z; + Cr[0] = FMA(KP998795456, T5W, T5P); + Cr[WS(csr, 31)] = FNMS(KP998795456, T5W, T5P); + Ci[WS(csi, 31)] = FNMS(KP998795456, T6a, T69); + Ci[0] = -(FMA(KP998795456, T6a, T69)); + } + } + } + } + { + E T3L, T6G, T6F, T3O; + { + E T37, T3z, T3p, T6H, T6B, T6C, T3e, T3s, T3M, T3J, T3w, T3m, T6I, T3C, T3N; + E T3G; + { + E T3B, T3A, T3H, T3I, T3a, T3d; + Cr[WS(csr, 15)] = FMA(KP998795456, T60, T5X); + Cr[WS(csr, 16)] = FNMS(KP998795456, T60, T5X); + Ci[WS(csi, 15)] = FMA(KP998795456, T6c, T6b); + Ci[WS(csi, 16)] = FMS(KP998795456, T6c, T6b); + T37 = FNMS(KP831469612, T36, T33); + T3z = FMA(KP831469612, T36, T33); + T3B = FMA(KP303346683, T38, T39); + T3a = FNMS(KP303346683, T39, T38); + T3d = FNMS(KP303346683, T3c, T3b); + T3A = FMA(KP303346683, T3b, T3c); + T3p = FMA(KP831469612, T3o, T3n); + T3H = FNMS(KP831469612, T3o, T3n); + T6H = FNMS(KP831469612, T6A, T6z); + T6B = FMA(KP831469612, T6A, T6z); + T6C = T3d + T3a; + T3e = T3a - T3d; + T3I = FMA(KP831469612, T3r, T3q); + T3s = FNMS(KP831469612, T3r, T3q); + { + E T3E, T3F, T3i, T3l; + T3E = FMA(KP831469612, T3h, T3g); + T3i = FNMS(KP831469612, T3h, T3g); + T3l = FNMS(KP831469612, T3k, T3j); + T3F = FMA(KP831469612, T3k, T3j); + T3M = FNMS(KP148335987, T3H, T3I); + T3J = FMA(KP148335987, T3I, T3H); + T3w = FMA(KP741650546, T3i, T3l); + T3m = FNMS(KP741650546, T3l, T3i); + T6I = T3A + T3B; + T3C = T3A - T3B; + T3N = FNMS(KP148335987, T3E, T3F); + T3G = FMA(KP148335987, T3F, T3E); + } + } + { + E T3v, T3f, T6J, T6L, T3x, T3t; + T3v = FNMS(KP956940335, T3e, T37); + T3f = FMA(KP956940335, T3e, T37); + T6J = FMA(KP956940335, T6I, T6H); + T6L = FNMS(KP956940335, T6I, T6H); + T3x = FMA(KP741650546, T3p, T3s); + T3t = FNMS(KP741650546, T3s, T3p); + { + E T3D, T3K, T6D, T6E; + T3L = FNMS(KP956940335, T3C, T3z); + T3D = FMA(KP956940335, T3C, T3z); + { + E T3y, T6K, T6M, T3u; + T3y = T3w - T3x; + T6K = T3w + T3x; + T6M = T3m + T3t; + T3u = T3m - T3t; + Cr[WS(csr, 9)] = FMA(KP803207531, T3y, T3v); + Cr[WS(csr, 22)] = FNMS(KP803207531, T3y, T3v); + Ci[WS(csi, 25)] = FNMS(KP803207531, T6K, T6J); + Ci[WS(csi, 6)] = -(FMA(KP803207531, T6K, T6J)); + Ci[WS(csi, 9)] = FNMS(KP803207531, T6M, T6L); + Ci[WS(csi, 22)] = -(FMA(KP803207531, T6M, T6L)); + Cr[WS(csr, 6)] = FMA(KP803207531, T3u, T3f); + Cr[WS(csr, 25)] = FNMS(KP803207531, T3u, T3f); + T3K = T3G - T3J; + T6G = T3G + T3J; + } + T6F = FNMS(KP956940335, T6C, T6B); + T6D = FMA(KP956940335, T6C, T6B); + T6E = T3N + T3M; + T3O = T3M - T3N; + Cr[WS(csr, 1)] = FMA(KP989176509, T3K, T3D); + Cr[WS(csr, 30)] = FNMS(KP989176509, T3K, T3D); + Ci[WS(csi, 1)] = FMA(KP989176509, T6E, T6D); + Ci[WS(csi, 30)] = FMS(KP989176509, T6E, T6D); + } + } + } + { + E T41, T4Z, T4L, T6r, T6l, T6m, T4g, T4S, T5c, T59, T4W, T4A, T6s, T52, T5d; + E T56; + { + E T51, T50, T57, T58, T48, T4f; + Cr[WS(csr, 14)] = FMA(KP989176509, T3O, T3L); + Cr[WS(csr, 17)] = FNMS(KP989176509, T3O, T3L); + Ci[WS(csi, 17)] = FNMS(KP989176509, T6G, T6F); + Ci[WS(csi, 14)] = -(FMA(KP989176509, T6G, T6F)); + T41 = FNMS(KP980785280, T40, T3T); + T4Z = FMA(KP980785280, T40, T3T); + T51 = FMA(KP820678790, T44, T47); + T48 = FNMS(KP820678790, T47, T44); + T4f = FNMS(KP820678790, T4e, T4b); + T50 = FMA(KP820678790, T4b, T4e); + T4L = FNMS(KP980785280, T4K, T4D); + T57 = FMA(KP980785280, T4K, T4D); + T6r = FMA(KP980785280, T6k, T6j); + T6l = FNMS(KP980785280, T6k, T6j); + T6m = T4f + T48; + T4g = T48 - T4f; + T58 = FMA(KP980785280, T4R, T4O); + T4S = FNMS(KP980785280, T4R, T4O); + { + E T54, T55, T4s, T4z; + T54 = FMA(KP980785280, T4r, T4k); + T4s = FNMS(KP980785280, T4r, T4k); + T4z = FNMS(KP980785280, T4y, T4v); + T55 = FMA(KP980785280, T4y, T4v); + T5c = FMA(KP357805721, T57, T58); + T59 = FNMS(KP357805721, T58, T57); + T4W = FMA(KP472964775, T4s, T4z); + T4A = FNMS(KP472964775, T4z, T4s); + T6s = T50 + T51; + T52 = T50 - T51; + T5d = FNMS(KP357805721, T54, T55); + T56 = FMA(KP357805721, T55, T54); + } + } + { + E T4V, T4h, T6t, T6v, T4X, T4T; + T4V = FNMS(KP773010453, T4g, T41); + T4h = FMA(KP773010453, T4g, T41); + T6t = FMA(KP773010453, T6s, T6r); + T6v = FNMS(KP773010453, T6s, T6r); + T4X = FNMS(KP472964775, T4L, T4S); + T4T = FMA(KP472964775, T4S, T4L); + { + E T53, T5a, T6n, T6o; + T5b = FNMS(KP773010453, T52, T4Z); + T53 = FMA(KP773010453, T52, T4Z); + { + E T4Y, T6u, T6w, T4U; + T4Y = T4W - T4X; + T6u = T4W + T4X; + T6w = T4T - T4A; + T4U = T4A + T4T; + Cr[WS(csr, 11)] = FMA(KP903989293, T4Y, T4V); + Cr[WS(csr, 20)] = FNMS(KP903989293, T4Y, T4V); + Ci[WS(csi, 27)] = FNMS(KP903989293, T6u, T6t); + Ci[WS(csi, 4)] = -(FMA(KP903989293, T6u, T6t)); + Ci[WS(csi, 11)] = FMA(KP903989293, T6w, T6v); + Ci[WS(csi, 20)] = FMS(KP903989293, T6w, T6v); + Cr[WS(csr, 4)] = FMA(KP903989293, T4U, T4h); + Cr[WS(csr, 27)] = FNMS(KP903989293, T4U, T4h); + T5a = T56 + T59; + T6q = T59 - T56; + } + T6p = FNMS(KP773010453, T6m, T6l); + T6n = FMA(KP773010453, T6m, T6l); + T6o = T5d + T5c; + T5e = T5c - T5d; + Cr[WS(csr, 3)] = FMA(KP941544065, T5a, T53); + Cr[WS(csr, 28)] = FNMS(KP941544065, T5a, T53); + Ci[WS(csi, 3)] = FMA(KP941544065, T6o, T6n); + Ci[WS(csi, 28)] = FMS(KP941544065, T6o, T6n); + } + } + } + } + } + } + Cr[WS(csr, 12)] = FMA(KP941544065, T5e, T5b); + Cr[WS(csr, 19)] = FNMS(KP941544065, T5e, T5b); + Ci[WS(csi, 19)] = FMA(KP941544065, T6q, T6p); + Ci[WS(csi, 12)] = FMS(KP941544065, T6q, T6p); + } + } +} + +static const kr2c_desc desc = { 64, "r2cfII_64", {114, 0, 320, 0}, &GENUS }; + +void X(codelet_r2cfII_64) (planner *p) { + X(kr2c_register) (p, r2cfII_64, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 64 -name r2cfII_64 -dft-II -include r2cfII.h */ + +/* + * This function contains 434 FP additions, 206 FP multiplications, + * (or, 342 additions, 114 multiplications, 92 fused multiply/add), + * 118 stack variables, 31 constants, and 128 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP242980179, +0.242980179903263889948274162077471118320990783); + DK(KP970031253, +0.970031253194543992603984207286100251456865962); + DK(KP857728610, +0.857728610000272069902269984284770137042490799); + DK(KP514102744, +0.514102744193221726593693838968815772608049120); + DK(KP471396736, +0.471396736825997648556387625905254377657460319); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP427555093, +0.427555093430282094320966856888798534304578629); + DK(KP903989293, +0.903989293123443331586200297230537048710132025); + DK(KP336889853, +0.336889853392220050689253212619147570477766780); + DK(KP941544065, +0.941544065183020778412509402599502357185589796); + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP634393284, +0.634393284163645498215171613225493370675687095); + DK(KP595699304, +0.595699304492433343467036528829969889511926338); + DK(KP803207531, +0.803207531480644909806676512963141923879569427); + DK(KP146730474, +0.146730474455361751658850129646717819706215317); + DK(KP989176509, +0.989176509964780973451673738016243063983689533); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP290284677, +0.290284677254462367636192375817395274691476278); + DK(KP049067674, +0.049067674327418014254954976942682658314745363); + DK(KP998795456, +0.998795456205172392714771604759100694443203615); + DK(KP671558954, +0.671558954847018400625376850427421803228750632); + DK(KP740951125, +0.740951125354959091175616897495162729728955309); + DK(KP098017140, +0.098017140329560601994195563888641845861136673); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) { + E Tm, T34, T3Z, T5g, Tv, T35, T3W, T5h, Td, T33, T6B, T6Q, T3T, T5f, T68; + E T6m, T2b, T3n, T4O, T5D, T2F, T3r, T4K, T5z, TK, T3c, T47, T5n, TR, T3b; + E T44, T5o, T15, T38, T4e, T5l, T1c, T39, T4b, T5k, T1s, T3g, T4v, T5w, T1W; + E T3k, T4k, T5s, T2u, T3q, T4R, T5A, T2y, T3o, T4H, T5C, T1L, T3j, T4y, T5t; + E T1P, T3h, T4r, T5v; + { + E Te, Tk, Th, Tj, Tf, Tg; + Te = R0[WS(rs, 2)]; + Tk = R0[WS(rs, 18)]; + Tf = R0[WS(rs, 10)]; + Tg = R0[WS(rs, 26)]; + Th = KP707106781 * (Tf - Tg); + Tj = KP707106781 * (Tf + Tg); + { + E Ti, Tl, T3X, T3Y; + Ti = Te + Th; + Tl = Tj + Tk; + Tm = FNMS(KP195090322, Tl, KP980785280 * Ti); + T34 = FMA(KP195090322, Ti, KP980785280 * Tl); + T3X = Tk - Tj; + T3Y = Te - Th; + T3Z = FNMS(KP555570233, T3Y, KP831469612 * T3X); + T5g = FMA(KP831469612, T3Y, KP555570233 * T3X); + } + } + { + E Tq, Tt, Tp, Ts, Tn, To; + Tq = R0[WS(rs, 30)]; + Tt = R0[WS(rs, 14)]; + Tn = R0[WS(rs, 6)]; + To = R0[WS(rs, 22)]; + Tp = KP707106781 * (Tn - To); + Ts = KP707106781 * (Tn + To); + { + E Tr, Tu, T3U, T3V; + Tr = Tp - Tq; + Tu = Ts + Tt; + Tv = FMA(KP980785280, Tr, KP195090322 * Tu); + T35 = FNMS(KP980785280, Tu, KP195090322 * Tr); + T3U = Tt - Ts; + T3V = Tp + Tq; + T3W = FNMS(KP555570233, T3V, KP831469612 * T3U); + T5h = FMA(KP831469612, T3V, KP555570233 * T3U); + } + } + { + E T1, T66, T4, T65, T8, T3Q, Tb, T3R, T2, T3; + T1 = R0[0]; + T66 = R0[WS(rs, 16)]; + T2 = R0[WS(rs, 8)]; + T3 = R0[WS(rs, 24)]; + T4 = KP707106781 * (T2 - T3); + T65 = KP707106781 * (T2 + T3); + { + E T6, T7, T9, Ta; + T6 = R0[WS(rs, 4)]; + T7 = R0[WS(rs, 20)]; + T8 = FNMS(KP382683432, T7, KP923879532 * T6); + T3Q = FMA(KP382683432, T6, KP923879532 * T7); + T9 = R0[WS(rs, 12)]; + Ta = R0[WS(rs, 28)]; + Tb = FNMS(KP923879532, Ta, KP382683432 * T9); + T3R = FMA(KP923879532, T9, KP382683432 * Ta); + } + { + E T5, Tc, T6z, T6A; + T5 = T1 + T4; + Tc = T8 + Tb; + Td = T5 + Tc; + T33 = T5 - Tc; + T6z = Tb - T8; + T6A = T66 - T65; + T6B = T6z - T6A; + T6Q = T6z + T6A; + } + { + E T3P, T3S, T64, T67; + T3P = T1 - T4; + T3S = T3Q - T3R; + T3T = T3P - T3S; + T5f = T3P + T3S; + T64 = T3Q + T3R; + T67 = T65 + T66; + T68 = T64 + T67; + T6m = T67 - T64; + } + } + { + E T22, T2D, T21, T2C, T26, T2z, T29, T2A, T1Z, T20; + T22 = R1[WS(rs, 31)]; + T2D = R1[WS(rs, 15)]; + T1Z = R1[WS(rs, 7)]; + T20 = R1[WS(rs, 23)]; + T21 = KP707106781 * (T1Z - T20); + T2C = KP707106781 * (T1Z + T20); + { + E T24, T25, T27, T28; + T24 = R1[WS(rs, 3)]; + T25 = R1[WS(rs, 19)]; + T26 = FNMS(KP382683432, T25, KP923879532 * T24); + T2z = FMA(KP382683432, T24, KP923879532 * T25); + T27 = R1[WS(rs, 11)]; + T28 = R1[WS(rs, 27)]; + T29 = FNMS(KP923879532, T28, KP382683432 * T27); + T2A = FMA(KP923879532, T27, KP382683432 * T28); + } + { + E T23, T2a, T4M, T4N; + T23 = T21 - T22; + T2a = T26 + T29; + T2b = T23 + T2a; + T3n = T23 - T2a; + T4M = T29 - T26; + T4N = T2D - T2C; + T4O = T4M - T4N; + T5D = T4M + T4N; + } + { + E T2B, T2E, T4I, T4J; + T2B = T2z + T2A; + T2E = T2C + T2D; + T2F = T2B + T2E; + T3r = T2E - T2B; + T4I = T21 + T22; + T4J = T2z - T2A; + T4K = T4I + T4J; + T5z = T4J - T4I; + } + } + { + E Ty, TP, TB, TO, TF, TL, TI, TM, Tz, TA; + Ty = R0[WS(rs, 1)]; + TP = R0[WS(rs, 17)]; + Tz = R0[WS(rs, 9)]; + TA = R0[WS(rs, 25)]; + TB = KP707106781 * (Tz - TA); + TO = KP707106781 * (Tz + TA); + { + E TD, TE, TG, TH; + TD = R0[WS(rs, 5)]; + TE = R0[WS(rs, 21)]; + TF = FNMS(KP382683432, TE, KP923879532 * TD); + TL = FMA(KP382683432, TD, KP923879532 * TE); + TG = R0[WS(rs, 13)]; + TH = R0[WS(rs, 29)]; + TI = FNMS(KP923879532, TH, KP382683432 * TG); + TM = FMA(KP923879532, TG, KP382683432 * TH); + } + { + E TC, TJ, T45, T46; + TC = Ty + TB; + TJ = TF + TI; + TK = TC + TJ; + T3c = TC - TJ; + T45 = TI - TF; + T46 = TP - TO; + T47 = T45 - T46; + T5n = T45 + T46; + } + { + E TN, TQ, T42, T43; + TN = TL + TM; + TQ = TO + TP; + TR = TN + TQ; + T3b = TQ - TN; + T42 = Ty - TB; + T43 = TL - TM; + T44 = T42 - T43; + T5o = T42 + T43; + } + } + { + E TW, T1a, TV, T19, T10, T16, T13, T17, TT, TU; + TW = R0[WS(rs, 31)]; + T1a = R0[WS(rs, 15)]; + TT = R0[WS(rs, 7)]; + TU = R0[WS(rs, 23)]; + TV = KP707106781 * (TT - TU); + T19 = KP707106781 * (TT + TU); + { + E TY, TZ, T11, T12; + TY = R0[WS(rs, 3)]; + TZ = R0[WS(rs, 19)]; + T10 = FNMS(KP382683432, TZ, KP923879532 * TY); + T16 = FMA(KP382683432, TY, KP923879532 * TZ); + T11 = R0[WS(rs, 11)]; + T12 = R0[WS(rs, 27)]; + T13 = FNMS(KP923879532, T12, KP382683432 * T11); + T17 = FMA(KP923879532, T11, KP382683432 * T12); + } + { + E TX, T14, T4c, T4d; + TX = TV - TW; + T14 = T10 + T13; + T15 = TX + T14; + T38 = TX - T14; + T4c = T13 - T10; + T4d = T1a - T19; + T4e = T4c - T4d; + T5l = T4c + T4d; + } + { + E T18, T1b, T49, T4a; + T18 = T16 + T17; + T1b = T19 + T1a; + T1c = T18 + T1b; + T39 = T1b - T18; + T49 = TV + TW; + T4a = T16 - T17; + T4b = T49 + T4a; + T5k = T4a - T49; + } + } + { + E T1g, T1U, T1j, T1T, T1n, T1Q, T1q, T1R, T1h, T1i; + T1g = R1[0]; + T1U = R1[WS(rs, 16)]; + T1h = R1[WS(rs, 8)]; + T1i = R1[WS(rs, 24)]; + T1j = KP707106781 * (T1h - T1i); + T1T = KP707106781 * (T1h + T1i); + { + E T1l, T1m, T1o, T1p; + T1l = R1[WS(rs, 4)]; + T1m = R1[WS(rs, 20)]; + T1n = FNMS(KP382683432, T1m, KP923879532 * T1l); + T1Q = FMA(KP382683432, T1l, KP923879532 * T1m); + T1o = R1[WS(rs, 12)]; + T1p = R1[WS(rs, 28)]; + T1q = FNMS(KP923879532, T1p, KP382683432 * T1o); + T1R = FMA(KP923879532, T1o, KP382683432 * T1p); + } + { + E T1k, T1r, T4t, T4u; + T1k = T1g + T1j; + T1r = T1n + T1q; + T1s = T1k + T1r; + T3g = T1k - T1r; + T4t = T1q - T1n; + T4u = T1U - T1T; + T4v = T4t - T4u; + T5w = T4t + T4u; + } + { + E T1S, T1V, T4i, T4j; + T1S = T1Q + T1R; + T1V = T1T + T1U; + T1W = T1S + T1V; + T3k = T1V - T1S; + T4i = T1g - T1j; + T4j = T1Q - T1R; + T4k = T4i - T4j; + T5s = T4i + T4j; + } + } + { + E T2g, T4F, T2j, T4E, T2p, T4C, T2s, T4B; + { + E T2c, T2i, T2f, T2h, T2d, T2e; + T2c = R1[WS(rs, 1)]; + T2i = R1[WS(rs, 17)]; + T2d = R1[WS(rs, 9)]; + T2e = R1[WS(rs, 25)]; + T2f = KP707106781 * (T2d - T2e); + T2h = KP707106781 * (T2d + T2e); + T2g = T2c + T2f; + T4F = T2c - T2f; + T2j = T2h + T2i; + T4E = T2i - T2h; + } + { + E T2o, T2r, T2n, T2q, T2l, T2m; + T2o = R1[WS(rs, 29)]; + T2r = R1[WS(rs, 13)]; + T2l = R1[WS(rs, 5)]; + T2m = R1[WS(rs, 21)]; + T2n = KP707106781 * (T2l - T2m); + T2q = KP707106781 * (T2l + T2m); + T2p = T2n - T2o; + T4C = T2n + T2o; + T2s = T2q + T2r; + T4B = T2r - T2q; + } + { + E T2k, T2t, T4P, T4Q; + T2k = FNMS(KP195090322, T2j, KP980785280 * T2g); + T2t = FMA(KP980785280, T2p, KP195090322 * T2s); + T2u = T2k + T2t; + T3q = T2t - T2k; + T4P = FMA(KP831469612, T4F, KP555570233 * T4E); + T4Q = FMA(KP831469612, T4C, KP555570233 * T4B); + T4R = T4P + T4Q; + T5A = T4P - T4Q; + } + { + E T2w, T2x, T4D, T4G; + T2w = FNMS(KP980785280, T2s, KP195090322 * T2p); + T2x = FMA(KP195090322, T2g, KP980785280 * T2j); + T2y = T2w - T2x; + T3o = T2x + T2w; + T4D = FNMS(KP555570233, T4C, KP831469612 * T4B); + T4G = FNMS(KP555570233, T4F, KP831469612 * T4E); + T4H = T4D - T4G; + T5C = T4G + T4D; + } + } + { + E T1x, T4p, T1A, T4o, T1G, T4m, T1J, T4l; + { + E T1t, T1z, T1w, T1y, T1u, T1v; + T1t = R1[WS(rs, 2)]; + T1z = R1[WS(rs, 18)]; + T1u = R1[WS(rs, 10)]; + T1v = R1[WS(rs, 26)]; + T1w = KP707106781 * (T1u - T1v); + T1y = KP707106781 * (T1u + T1v); + T1x = T1t + T1w; + T4p = T1t - T1w; + T1A = T1y + T1z; + T4o = T1z - T1y; + } + { + E T1F, T1I, T1E, T1H, T1C, T1D; + T1F = R1[WS(rs, 30)]; + T1I = R1[WS(rs, 14)]; + T1C = R1[WS(rs, 6)]; + T1D = R1[WS(rs, 22)]; + T1E = KP707106781 * (T1C - T1D); + T1H = KP707106781 * (T1C + T1D); + T1G = T1E - T1F; + T4m = T1E + T1F; + T1J = T1H + T1I; + T4l = T1I - T1H; + } + { + E T1B, T1K, T4w, T4x; + T1B = FNMS(KP195090322, T1A, KP980785280 * T1x); + T1K = FMA(KP980785280, T1G, KP195090322 * T1J); + T1L = T1B + T1K; + T3j = T1K - T1B; + T4w = FMA(KP831469612, T4p, KP555570233 * T4o); + T4x = FMA(KP831469612, T4m, KP555570233 * T4l); + T4y = T4w + T4x; + T5t = T4w - T4x; + } + { + E T1N, T1O, T4n, T4q; + T1N = FNMS(KP980785280, T1J, KP195090322 * T1G); + T1O = FMA(KP195090322, T1x, KP980785280 * T1A); + T1P = T1N - T1O; + T3h = T1O + T1N; + T4n = FNMS(KP555570233, T4m, KP831469612 * T4l); + T4q = FNMS(KP555570233, T4p, KP831469612 * T4o); + T4r = T4n - T4q; + T5v = T4q + T4n; + } + } + { + E Tx, T2N, T69, T6f, T1e, T6e, T2X, T30, T1Y, T2L, T2Q, T62, T2U, T31, T2H; + E T2K, Tw, T63; + Tw = Tm + Tv; + Tx = Td + Tw; + T2N = Td - Tw; + T63 = T35 - T34; + T69 = T63 - T68; + T6f = T63 + T68; + { + E TS, T1d, T2V, T2W; + TS = FNMS(KP098017140, TR, KP995184726 * TK); + T1d = FMA(KP995184726, T15, KP098017140 * T1c); + T1e = TS + T1d; + T6e = T1d - TS; + T2V = T2b - T2u; + T2W = T2y + T2F; + T2X = FNMS(KP671558954, T2W, KP740951125 * T2V); + T30 = FMA(KP671558954, T2V, KP740951125 * T2W); + } + { + E T1M, T1X, T2O, T2P; + T1M = T1s + T1L; + T1X = T1P - T1W; + T1Y = FMA(KP998795456, T1M, KP049067674 * T1X); + T2L = FNMS(KP049067674, T1M, KP998795456 * T1X); + T2O = FMA(KP098017140, TK, KP995184726 * TR); + T2P = FNMS(KP995184726, T1c, KP098017140 * T15); + T2Q = T2O + T2P; + T62 = T2P - T2O; + } + { + E T2S, T2T, T2v, T2G; + T2S = T1s - T1L; + T2T = T1P + T1W; + T2U = FMA(KP740951125, T2S, KP671558954 * T2T); + T31 = FNMS(KP671558954, T2S, KP740951125 * T2T); + T2v = T2b + T2u; + T2G = T2y - T2F; + T2H = FNMS(KP049067674, T2G, KP998795456 * T2v); + T2K = FMA(KP049067674, T2v, KP998795456 * T2G); + } + { + E T1f, T2I, T6b, T6c; + T1f = Tx + T1e; + T2I = T1Y + T2H; + Cr[WS(csr, 31)] = T1f - T2I; + Cr[0] = T1f + T2I; + T6b = T2L + T2K; + T6c = T62 + T69; + Ci[WS(csi, 31)] = T6b - T6c; + Ci[0] = T6b + T6c; + } + { + E T2J, T2M, T61, T6a; + T2J = Tx - T1e; + T2M = T2K - T2L; + Cr[WS(csr, 16)] = T2J - T2M; + Cr[WS(csr, 15)] = T2J + T2M; + T61 = T2H - T1Y; + T6a = T62 - T69; + Ci[WS(csi, 16)] = T61 - T6a; + Ci[WS(csi, 15)] = T61 + T6a; + } + { + E T2R, T2Y, T6h, T6i; + T2R = T2N + T2Q; + T2Y = T2U + T2X; + Cr[WS(csr, 24)] = T2R - T2Y; + Cr[WS(csr, 7)] = T2R + T2Y; + T6h = T31 + T30; + T6i = T6e + T6f; + Ci[WS(csi, 24)] = T6h - T6i; + Ci[WS(csi, 7)] = T6h + T6i; + } + { + E T2Z, T32, T6d, T6g; + T2Z = T2N - T2Q; + T32 = T30 - T31; + Cr[WS(csr, 23)] = T2Z - T32; + Cr[WS(csr, 8)] = T2Z + T32; + T6d = T2X - T2U; + T6g = T6e - T6f; + Ci[WS(csi, 23)] = T6d - T6g; + Ci[WS(csi, 8)] = T6d + T6g; + } + } + { + E T5j, T5L, T6R, T6X, T5q, T6W, T5V, T5Y, T5y, T5J, T5O, T6O, T5S, T5Z, T5F; + E T5I, T5i, T6P; + T5i = T5g - T5h; + T5j = T5f - T5i; + T5L = T5f + T5i; + T6P = T3Z + T3W; + T6R = T6P - T6Q; + T6X = T6P + T6Q; + { + E T5m, T5p, T5T, T5U; + T5m = FMA(KP290284677, T5k, KP956940335 * T5l); + T5p = FNMS(KP290284677, T5o, KP956940335 * T5n); + T5q = T5m - T5p; + T6W = T5p + T5m; + T5T = T5z + T5A; + T5U = T5C + T5D; + T5V = FNMS(KP146730474, T5U, KP989176509 * T5T); + T5Y = FMA(KP146730474, T5T, KP989176509 * T5U); + } + { + E T5u, T5x, T5M, T5N; + T5u = T5s - T5t; + T5x = T5v - T5w; + T5y = FMA(KP803207531, T5u, KP595699304 * T5x); + T5J = FNMS(KP595699304, T5u, KP803207531 * T5x); + T5M = FMA(KP956940335, T5o, KP290284677 * T5n); + T5N = FNMS(KP290284677, T5l, KP956940335 * T5k); + T5O = T5M + T5N; + T6O = T5N - T5M; + } + { + E T5Q, T5R, T5B, T5E; + T5Q = T5s + T5t; + T5R = T5v + T5w; + T5S = FMA(KP989176509, T5Q, KP146730474 * T5R); + T5Z = FNMS(KP146730474, T5Q, KP989176509 * T5R); + T5B = T5z - T5A; + T5E = T5C - T5D; + T5F = FNMS(KP595699304, T5E, KP803207531 * T5B); + T5I = FMA(KP595699304, T5B, KP803207531 * T5E); + } + { + E T5r, T5G, T6T, T6U; + T5r = T5j + T5q; + T5G = T5y + T5F; + Cr[WS(csr, 25)] = T5r - T5G; + Cr[WS(csr, 6)] = T5r + T5G; + T6T = T5J + T5I; + T6U = T6O + T6R; + Ci[WS(csi, 25)] = T6T - T6U; + Ci[WS(csi, 6)] = T6T + T6U; + } + { + E T5H, T5K, T6N, T6S; + T5H = T5j - T5q; + T5K = T5I - T5J; + Cr[WS(csr, 22)] = T5H - T5K; + Cr[WS(csr, 9)] = T5H + T5K; + T6N = T5F - T5y; + T6S = T6O - T6R; + Ci[WS(csi, 22)] = T6N - T6S; + Ci[WS(csi, 9)] = T6N + T6S; + } + { + E T5P, T5W, T6Z, T70; + T5P = T5L + T5O; + T5W = T5S + T5V; + Cr[WS(csr, 30)] = T5P - T5W; + Cr[WS(csr, 1)] = T5P + T5W; + T6Z = T5Z + T5Y; + T70 = T6W + T6X; + Ci[WS(csi, 30)] = T6Z - T70; + Ci[WS(csi, 1)] = T6Z + T70; + } + { + E T5X, T60, T6V, T6Y; + T5X = T5L - T5O; + T60 = T5Y - T5Z; + Cr[WS(csr, 17)] = T5X - T60; + Cr[WS(csr, 14)] = T5X + T60; + T6V = T5V - T5S; + T6Y = T6W - T6X; + Ci[WS(csi, 17)] = T6V - T6Y; + Ci[WS(csi, 14)] = T6V + T6Y; + } + } + { + E T37, T3z, T6n, T6t, T3e, T6s, T3J, T3M, T3m, T3x, T3C, T6k, T3G, T3N, T3t; + E T3w, T36, T6l; + T36 = T34 + T35; + T37 = T33 - T36; + T3z = T33 + T36; + T6l = Tv - Tm; + T6n = T6l - T6m; + T6t = T6l + T6m; + { + E T3a, T3d, T3H, T3I; + T3a = FMA(KP634393284, T38, KP773010453 * T39); + T3d = FNMS(KP634393284, T3c, KP773010453 * T3b); + T3e = T3a - T3d; + T6s = T3d + T3a; + T3H = T3n + T3o; + T3I = T3q + T3r; + T3J = FNMS(KP336889853, T3I, KP941544065 * T3H); + T3M = FMA(KP336889853, T3H, KP941544065 * T3I); + } + { + E T3i, T3l, T3A, T3B; + T3i = T3g - T3h; + T3l = T3j - T3k; + T3m = FMA(KP903989293, T3i, KP427555093 * T3l); + T3x = FNMS(KP427555093, T3i, KP903989293 * T3l); + T3A = FMA(KP773010453, T3c, KP634393284 * T3b); + T3B = FNMS(KP634393284, T39, KP773010453 * T38); + T3C = T3A + T3B; + T6k = T3B - T3A; + } + { + E T3E, T3F, T3p, T3s; + T3E = T3g + T3h; + T3F = T3j + T3k; + T3G = FMA(KP941544065, T3E, KP336889853 * T3F); + T3N = FNMS(KP336889853, T3E, KP941544065 * T3F); + T3p = T3n - T3o; + T3s = T3q - T3r; + T3t = FNMS(KP427555093, T3s, KP903989293 * T3p); + T3w = FMA(KP427555093, T3p, KP903989293 * T3s); + } + { + E T3f, T3u, T6p, T6q; + T3f = T37 + T3e; + T3u = T3m + T3t; + Cr[WS(csr, 27)] = T3f - T3u; + Cr[WS(csr, 4)] = T3f + T3u; + T6p = T3x + T3w; + T6q = T6k + T6n; + Ci[WS(csi, 27)] = T6p - T6q; + Ci[WS(csi, 4)] = T6p + T6q; + } + { + E T3v, T3y, T6j, T6o; + T3v = T37 - T3e; + T3y = T3w - T3x; + Cr[WS(csr, 20)] = T3v - T3y; + Cr[WS(csr, 11)] = T3v + T3y; + T6j = T3t - T3m; + T6o = T6k - T6n; + Ci[WS(csi, 20)] = T6j - T6o; + Ci[WS(csi, 11)] = T6j + T6o; + } + { + E T3D, T3K, T6v, T6w; + T3D = T3z + T3C; + T3K = T3G + T3J; + Cr[WS(csr, 28)] = T3D - T3K; + Cr[WS(csr, 3)] = T3D + T3K; + T6v = T3N + T3M; + T6w = T6s + T6t; + Ci[WS(csi, 28)] = T6v - T6w; + Ci[WS(csi, 3)] = T6v + T6w; + } + { + E T3L, T3O, T6r, T6u; + T3L = T3z - T3C; + T3O = T3M - T3N; + Cr[WS(csr, 19)] = T3L - T3O; + Cr[WS(csr, 12)] = T3L + T3O; + T6r = T3J - T3G; + T6u = T6s - T6t; + Ci[WS(csi, 19)] = T6r - T6u; + Ci[WS(csi, 12)] = T6r + T6u; + } + } + { + E T41, T4Z, T6D, T6J, T4g, T6I, T59, T5d, T4A, T4X, T52, T6y, T56, T5c, T4T; + E T4W, T40, T6C; + T40 = T3W - T3Z; + T41 = T3T + T40; + T4Z = T3T - T40; + T6C = T5g + T5h; + T6D = T6B - T6C; + T6J = T6C + T6B; + { + E T48, T4f, T57, T58; + T48 = FMA(KP881921264, T44, KP471396736 * T47); + T4f = FMA(KP881921264, T4b, KP471396736 * T4e); + T4g = T48 - T4f; + T6I = T48 + T4f; + T57 = T4K + T4H; + T58 = T4R + T4O; + T59 = FMA(KP514102744, T57, KP857728610 * T58); + T5d = FNMS(KP857728610, T57, KP514102744 * T58); + } + { + E T4s, T4z, T50, T51; + T4s = T4k + T4r; + T4z = T4v - T4y; + T4A = FMA(KP970031253, T4s, KP242980179 * T4z); + T4X = FNMS(KP242980179, T4s, KP970031253 * T4z); + T50 = FNMS(KP471396736, T4b, KP881921264 * T4e); + T51 = FNMS(KP471396736, T44, KP881921264 * T47); + T52 = T50 - T51; + T6y = T51 + T50; + } + { + E T54, T55, T4L, T4S; + T54 = T4k - T4r; + T55 = T4y + T4v; + T56 = FMA(KP514102744, T54, KP857728610 * T55); + T5c = FNMS(KP514102744, T55, KP857728610 * T54); + T4L = T4H - T4K; + T4S = T4O - T4R; + T4T = FNMS(KP242980179, T4S, KP970031253 * T4L); + T4W = FMA(KP242980179, T4L, KP970031253 * T4S); + } + { + E T4h, T4U, T6F, T6G; + T4h = T41 + T4g; + T4U = T4A + T4T; + Cr[WS(csr, 29)] = T4h - T4U; + Cr[WS(csr, 2)] = T4h + T4U; + T6F = T4X + T4W; + T6G = T6y + T6D; + Ci[WS(csi, 29)] = T6F - T6G; + Ci[WS(csi, 2)] = T6F + T6G; + } + { + E T4V, T4Y, T6x, T6E; + T4V = T41 - T4g; + T4Y = T4W - T4X; + Cr[WS(csr, 18)] = T4V - T4Y; + Cr[WS(csr, 13)] = T4V + T4Y; + T6x = T4T - T4A; + T6E = T6y - T6D; + Ci[WS(csi, 18)] = T6x - T6E; + Ci[WS(csi, 13)] = T6x + T6E; + } + { + E T53, T5a, T6L, T6M; + T53 = T4Z - T52; + T5a = T56 - T59; + Cr[WS(csr, 21)] = T53 - T5a; + Cr[WS(csr, 10)] = T53 + T5a; + T6L = T5d - T5c; + T6M = T6J - T6I; + Ci[WS(csi, 21)] = T6L - T6M; + Ci[WS(csi, 10)] = T6L + T6M; + } + { + E T5b, T5e, T6H, T6K; + T5b = T4Z + T52; + T5e = T5c + T5d; + Cr[WS(csr, 26)] = T5b - T5e; + Cr[WS(csr, 5)] = T5b + T5e; + T6H = T56 + T59; + T6K = T6I + T6J; + Ci[WS(csi, 5)] = -(T6H + T6K); + Ci[WS(csi, 26)] = T6K - T6H; + } + } + } + } +} + +static const kr2c_desc desc = { 64, "r2cfII_64", {342, 114, 92, 0}, &GENUS }; + +void X(codelet_r2cfII_64) (planner *p) { + X(kr2c_register) (p, r2cfII_64, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:17 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 7 -name r2cfII_7 -dft-II -include r2cfII.h */ + +/* + * This function contains 24 FP additions, 18 FP multiplications, + * (or, 9 additions, 3 multiplications, 15 fused multiply/add), + * 25 stack variables, 6 constants, and 14 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_7(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP900968867, +0.900968867902419126236102319507445051165919162); + DK(KP692021471, +0.692021471630095869627814897002069140197260599); + DK(KP801937735, +0.801937735804838252472204639014890102331838324); + DK(KP974927912, +0.974927912181823607018131682993931217232785801); + DK(KP554958132, +0.554958132087371191422194871006410481067288862); + DK(KP356895867, +0.356895867892209443894399510021300583399127187); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(28, rs), MAKE_VOLATILE_STRIDE(28, csr), MAKE_VOLATILE_STRIDE(28, csi)) { + E Td, Tk; + { + E T4, T3, Te, T5, T9, Tf, T6, Tg, Tj; + Td = R0[0]; + { + E T1, T2, T7, T8; + T1 = R0[WS(rs, 1)]; + T2 = R1[WS(rs, 2)]; + T7 = R1[WS(rs, 1)]; + T8 = R0[WS(rs, 2)]; + T4 = R1[0]; + T3 = T1 + T2; + Te = T1 - T2; + T5 = R0[WS(rs, 3)]; + T9 = T7 + T8; + Tf = T8 - T7; + } + T6 = T4 + T5; + Tg = T5 - T4; + Tj = FNMS(KP356895867, Tf, Te); + { + E Ta, Th, Tl, Tb, Ti, Tm, Tc; + Tb = FNMS(KP554958132, T3, T9); + Ta = FMA(KP554958132, T9, T6); + Th = FNMS(KP356895867, Tg, Tf); + Tl = FNMS(KP356895867, Te, Tg); + Ci[WS(csi, 1)] = -(KP974927912 * (FNMS(KP801937735, Tb, T6))); + Ci[WS(csi, 2)] = KP974927912 * (FNMS(KP801937735, Ta, T3)); + Ti = FNMS(KP692021471, Th, Te); + Tm = FNMS(KP692021471, Tl, Tf); + Cr[WS(csr, 3)] = Te + Tg + Tf + Td; + Tc = FMA(KP554958132, T6, T3); + Cr[WS(csr, 1)] = FNMS(KP900968867, Ti, Td); + Cr[WS(csr, 2)] = FNMS(KP900968867, Tm, Td); + Tk = FNMS(KP692021471, Tj, Tg); + Ci[0] = -(KP974927912 * (FMA(KP801937735, Tc, T9))); + } + } + Cr[0] = FNMS(KP900968867, Tk, Td); + } + } +} + +static const kr2c_desc desc = { 7, "r2cfII_7", {9, 3, 15, 0}, &GENUS }; + +void X(codelet_r2cfII_7) (planner *p) { + X(kr2c_register) (p, r2cfII_7, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 7 -name r2cfII_7 -dft-II -include r2cfII.h */ + +/* + * This function contains 24 FP additions, 18 FP multiplications, + * (or, 12 additions, 6 multiplications, 12 fused multiply/add), + * 20 stack variables, 6 constants, and 14 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_7(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP900968867, +0.900968867902419126236102319507445051165919162); + DK(KP222520933, +0.222520933956314404288902564496794759466355569); + DK(KP623489801, +0.623489801858733530525004884004239810632274731); + DK(KP433883739, +0.433883739117558120475768332848358754609990728); + DK(KP974927912, +0.974927912181823607018131682993931217232785801); + DK(KP781831482, +0.781831482468029808708444526674057750232334519); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(28, rs), MAKE_VOLATILE_STRIDE(28, csr), MAKE_VOLATILE_STRIDE(28, csi)) { + E T1, Ta, Td, T4, Tb, T7, Tc, T8, T9; + T1 = R0[0]; + T8 = R1[0]; + T9 = R0[WS(rs, 3)]; + Ta = T8 - T9; + Td = T8 + T9; + { + E T2, T3, T5, T6; + T2 = R0[WS(rs, 1)]; + T3 = R1[WS(rs, 2)]; + T4 = T2 - T3; + Tb = T2 + T3; + T5 = R1[WS(rs, 1)]; + T6 = R0[WS(rs, 2)]; + T7 = T5 - T6; + Tc = T5 + T6; + } + Ci[0] = -(FMA(KP781831482, Tb, KP974927912 * Tc) + (KP433883739 * Td)); + Ci[WS(csi, 1)] = FNMS(KP974927912, Td, KP781831482 * Tc) - (KP433883739 * Tb); + Cr[0] = FMA(KP623489801, T4, T1) + FMA(KP222520933, T7, KP900968867 * Ta); + Ci[WS(csi, 2)] = FNMS(KP781831482, Td, KP974927912 * Tb) - (KP433883739 * Tc); + Cr[WS(csr, 2)] = FMA(KP900968867, T7, T1) + FNMA(KP623489801, Ta, KP222520933 * T4); + Cr[WS(csr, 1)] = FMA(KP222520933, Ta, T1) + FNMA(KP623489801, T7, KP900968867 * T4); + Cr[WS(csr, 3)] = T1 + T4 - (T7 + Ta); + } + } +} + +static const kr2c_desc desc = { 7, "r2cfII_7", {12, 6, 12, 0}, &GENUS }; + +void X(codelet_r2cfII_7) (planner *p) { + X(kr2c_register) (p, r2cfII_7, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:18 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 8 -name r2cfII_8 -dft-II -include r2cfII.h */ + +/* + * This function contains 22 FP additions, 16 FP multiplications, + * (or, 6 additions, 0 multiplications, 16 fused multiply/add), + * 22 stack variables, 3 constants, and 16 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_8(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(32, rs), MAKE_VOLATILE_STRIDE(32, csr), MAKE_VOLATILE_STRIDE(32, csi)) { + E Te, T8, Td, T5, Tj, Tl, Tf, Tb; + { + E T1, Th, T9, Ti, T4, Ta; + T1 = R0[0]; + Th = R0[WS(rs, 2)]; + { + E T2, T3, T6, T7; + T2 = R0[WS(rs, 1)]; + T3 = R0[WS(rs, 3)]; + T6 = R1[0]; + T7 = R1[WS(rs, 2)]; + T9 = R1[WS(rs, 3)]; + Ti = T2 + T3; + T4 = T2 - T3; + Te = FMA(KP414213562, T6, T7); + T8 = FNMS(KP414213562, T7, T6); + Ta = R1[WS(rs, 1)]; + } + Td = FNMS(KP707106781, T4, T1); + T5 = FMA(KP707106781, T4, T1); + Tj = FMA(KP707106781, Ti, Th); + Tl = FNMS(KP707106781, Ti, Th); + Tf = FMA(KP414213562, T9, Ta); + Tb = FMS(KP414213562, Ta, T9); + } + { + E Tk, Tg, Tc, Tm; + Tk = Te + Tf; + Tg = Te - Tf; + Tc = T8 + Tb; + Tm = Tb - T8; + Cr[WS(csr, 1)] = FMA(KP923879532, Tg, Td); + Cr[WS(csr, 2)] = FNMS(KP923879532, Tg, Td); + Ci[WS(csi, 3)] = FNMS(KP923879532, Tk, Tj); + Ci[0] = -(FMA(KP923879532, Tk, Tj)); + Ci[WS(csi, 1)] = FMA(KP923879532, Tm, Tl); + Ci[WS(csi, 2)] = FMS(KP923879532, Tm, Tl); + Cr[0] = FMA(KP923879532, Tc, T5); + Cr[WS(csr, 3)] = FNMS(KP923879532, Tc, T5); + } + } + } +} + +static const kr2c_desc desc = { 8, "r2cfII_8", {6, 0, 16, 0}, &GENUS }; + +void X(codelet_r2cfII_8) (planner *p) { + X(kr2c_register) (p, r2cfII_8, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 8 -name r2cfII_8 -dft-II -include r2cfII.h */ + +/* + * This function contains 22 FP additions, 10 FP multiplications, + * (or, 18 additions, 6 multiplications, 4 fused multiply/add), + * 18 stack variables, 3 constants, and 16 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_8(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(32, rs), MAKE_VOLATILE_STRIDE(32, csr), MAKE_VOLATILE_STRIDE(32, csi)) { + E T1, Tj, T4, Ti, T8, Te, Tb, Tf, T2, T3; + T1 = R0[0]; + Tj = R0[WS(rs, 2)]; + T2 = R0[WS(rs, 1)]; + T3 = R0[WS(rs, 3)]; + T4 = KP707106781 * (T2 - T3); + Ti = KP707106781 * (T2 + T3); + { + E T6, T7, T9, Ta; + T6 = R1[0]; + T7 = R1[WS(rs, 2)]; + T8 = FNMS(KP382683432, T7, KP923879532 * T6); + Te = FMA(KP382683432, T6, KP923879532 * T7); + T9 = R1[WS(rs, 1)]; + Ta = R1[WS(rs, 3)]; + Tb = FNMS(KP923879532, Ta, KP382683432 * T9); + Tf = FMA(KP923879532, T9, KP382683432 * Ta); + } + { + E T5, Tc, Th, Tk; + T5 = T1 + T4; + Tc = T8 + Tb; + Cr[WS(csr, 3)] = T5 - Tc; + Cr[0] = T5 + Tc; + Th = Te + Tf; + Tk = Ti + Tj; + Ci[0] = -(Th + Tk); + Ci[WS(csi, 3)] = Tk - Th; + } + { + E Td, Tg, Tl, Tm; + Td = T1 - T4; + Tg = Te - Tf; + Cr[WS(csr, 2)] = Td - Tg; + Cr[WS(csr, 1)] = Td + Tg; + Tl = Tb - T8; + Tm = Tj - Ti; + Ci[WS(csi, 2)] = Tl - Tm; + Ci[WS(csi, 1)] = Tl + Tm; + } + } + } +} + +static const kr2c_desc desc = { 8, "r2cfII_8", {18, 6, 4, 0}, &GENUS }; + +void X(codelet_r2cfII_8) (planner *p) { + X(kr2c_register) (p, r2cfII_8, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cfII_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:18 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 9 -name r2cfII_9 -dft-II -include r2cfII.h */ + +/* + * This function contains 42 FP additions, 34 FP multiplications, + * (or, 12 additions, 4 multiplications, 30 fused multiply/add), + * 46 stack variables, 17 constants, and 18 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_9(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP939692620, +0.939692620785908384054109277324731469936208134); + DK(KP879385241, +0.879385241571816768108218554649462939872416269); + DK(KP984807753, +0.984807753012208059366743024589523013670643252); + DK(KP852868531, +0.852868531952443209628250963940074071936020296); + DK(KP666666666, +0.666666666666666666666666666666666666666666667); + DK(KP673648177, +0.673648177666930348851716626769314796000375677); + DK(KP898197570, +0.898197570222573798468955502359086394667167570); + DK(KP826351822, +0.826351822333069651148283373230685203999624323); + DK(KP907603734, +0.907603734547952313649323976213898122064543220); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP420276625, +0.420276625461206169731530603237061658838781920); + DK(KP315207469, +0.315207469095904627298647952427796244129086440); + DK(KP203604859, +0.203604859554852403062088995281827210665664861); + DK(KP152703644, +0.152703644666139302296566746461370407999248646); + DK(KP726681596, +0.726681596905677465811651808188092531873167623); + DK(KP968908795, +0.968908795874236621082202410917456709164223497); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(36, rs), MAKE_VOLATILE_STRIDE(36, csr), MAKE_VOLATILE_STRIDE(36, csi)) { + E To, T5, Tp, Ta, Ti, Tm, TB, Tq, Tt, Tf, Th; + { + E T1, T6, T4, Tb, Tk, T9, Tc, Td, Tl, Te; + { + E T2, T3, T7, T8; + T1 = R0[0]; + T2 = R0[WS(rs, 3)]; + T3 = R1[WS(rs, 1)]; + T6 = R0[WS(rs, 1)]; + T7 = R0[WS(rs, 4)]; + T8 = R1[WS(rs, 2)]; + T4 = T2 - T3; + To = T2 + T3; + Tb = R0[WS(rs, 2)]; + Tk = T7 + T8; + T9 = T7 - T8; + Tc = R1[0]; + Td = R1[WS(rs, 3)]; + } + T5 = T1 + T4; + Tp = FNMS(KP500000000, T4, T1); + Ta = T6 + T9; + Tl = FNMS(KP500000000, T9, T6); + Te = Tc + Td; + Ti = Tc - Td; + Tm = FMA(KP968908795, Tl, Tk); + TB = FNMS(KP726681596, Tk, Tl); + Tq = FNMS(KP152703644, Tk, Tl); + Tt = FMA(KP203604859, Tl, Tk); + Tf = Tb - Te; + Th = FMA(KP500000000, Te, Tb); + } + { + E Ts, Tr, TA, Tj, Tg; + Ts = FMA(KP315207469, Ti, Th); + Tr = FNMS(KP420276625, Th, Ti); + TA = FMA(KP203604859, Th, Ti); + Tj = FNMS(KP152703644, Ti, Th); + Tg = Ta + Tf; + Ci[WS(csi, 1)] = KP866025403 * (Tf - Ta); + { + E Tu, Tx, TF, TC; + Tu = FNMS(KP907603734, Tt, Ts); + Tx = FNMS(KP826351822, Tr, Tq); + TF = FMA(KP898197570, TB, TA); + TC = FNMS(KP898197570, TB, TA); + { + E TE, Tn, Tv, Ty; + TE = FNMS(KP673648177, Tm, Tj); + Tn = FMA(KP673648177, Tm, Tj); + Cr[WS(csr, 4)] = T5 + Tg; + Cr[WS(csr, 1)] = FNMS(KP500000000, Tg, T5); + Tv = FNMS(KP666666666, Tu, Tr); + Ty = FNMS(KP666666666, Tx, Tt); + Cr[0] = FMA(KP852868531, TF, Tp); + { + E TG, TD, Tw, Tz; + TG = FMA(KP500000000, TF, TE); + Ci[0] = -(KP984807753 * (FMA(KP879385241, To, Tn))); + TD = FNMS(KP666666666, Tn, TC); + Tw = FMA(KP826351822, Tv, Tq); + Tz = FMA(KP907603734, Ty, Ts); + Cr[WS(csr, 3)] = FNMS(KP852868531, TG, Tp); + Ci[WS(csi, 3)] = -(KP866025403 * (FMA(KP852868531, TD, To))); + Cr[WS(csr, 2)] = FNMS(KP852868531, Tw, Tp); + Ci[WS(csi, 2)] = KP866025403 * (FNMS(KP939692620, Tz, To)); + } + } + } + } + } + } +} + +static const kr2c_desc desc = { 9, "r2cfII_9", {12, 4, 30, 0}, &GENUS }; + +void X(codelet_r2cfII_9) (planner *p) { + X(kr2c_register) (p, r2cfII_9, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 9 -name r2cfII_9 -dft-II -include r2cfII.h */ + +/* + * This function contains 42 FP additions, 30 FP multiplications, + * (or, 25 additions, 13 multiplications, 17 fused multiply/add), + * 39 stack variables, 14 constants, and 18 memory accesses + */ +#include "r2cfII.h" + +static void r2cfII_9(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP663413948, +0.663413948168938396205421319635891297216863310); + DK(KP642787609, +0.642787609686539326322643409907263432907559884); + DK(KP556670399, +0.556670399226419366452912952047023132968291906); + DK(KP766044443, +0.766044443118978035202392650555416673935832457); + DK(KP852868531, +0.852868531952443209628250963940074071936020296); + DK(KP173648177, +0.173648177666930348851716626769314796000375677); + DK(KP984807753, +0.984807753012208059366743024589523013670643252); + DK(KP150383733, +0.150383733180435296639271897612501926072238258); + DK(KP813797681, +0.813797681349373692844693217248393223289101568); + DK(KP342020143, +0.342020143325668733044099614682259580763083368); + DK(KP939692620, +0.939692620785908384054109277324731469936208134); + DK(KP296198132, +0.296198132726023843175338011893050938967728390); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(36, rs), MAKE_VOLATILE_STRIDE(36, csr), MAKE_VOLATILE_STRIDE(36, csi)) { + E T1, T4, To, Ta, Tl, Tk, Tf, Ti, Th, T2, T3, T5, Tg; + T1 = R0[0]; + T2 = R1[WS(rs, 1)]; + T3 = R0[WS(rs, 3)]; + T4 = T2 - T3; + To = T2 + T3; + { + E T6, T7, T8, T9; + T6 = R0[WS(rs, 1)]; + T7 = R1[WS(rs, 2)]; + T8 = R0[WS(rs, 4)]; + T9 = T7 - T8; + Ta = T6 - T9; + Tl = T7 + T8; + Tk = FMA(KP500000000, T9, T6); + } + { + E Tb, Tc, Td, Te; + Tb = R0[WS(rs, 2)]; + Tc = R1[0]; + Td = R1[WS(rs, 3)]; + Te = Tc + Td; + Tf = Tb - Te; + Ti = FMA(KP500000000, Te, Tb); + Th = Tc - Td; + } + Ci[WS(csi, 1)] = KP866025403 * (Tf - Ta); + T5 = T1 - T4; + Tg = Ta + Tf; + Cr[WS(csr, 1)] = FNMS(KP500000000, Tg, T5); + Cr[WS(csr, 4)] = T5 + Tg; + { + E Tr, Tt, Tw, Tv, Tu, Tp, Tq, Ts, Tj, Tm, Tn; + Tr = FMA(KP500000000, T4, T1); + Tt = FMA(KP296198132, Th, KP939692620 * Ti); + Tw = FNMS(KP813797681, Th, KP342020143 * Ti); + Tv = FNMS(KP984807753, Tk, KP150383733 * Tl); + Tu = FMA(KP173648177, Tk, KP852868531 * Tl); + Tp = FNMS(KP556670399, Tl, KP766044443 * Tk); + Tq = FMA(KP852868531, Th, KP173648177 * Ti); + Ts = Tp + Tq; + Tj = FNMS(KP984807753, Ti, KP150383733 * Th); + Tm = FMA(KP642787609, Tk, KP663413948 * Tl); + Tn = Tj - Tm; + Ci[0] = FNMS(KP866025403, To, Tn); + Cr[0] = Tr + Ts; + Ci[WS(csi, 3)] = FNMS(KP500000000, Tn, KP866025403 * ((Tp - Tq) - To)); + Cr[WS(csr, 3)] = FMA(KP866025403, Tm + Tj, Tr) - (KP500000000 * Ts); + Ci[WS(csi, 2)] = FMA(KP866025403, To - (Tu + Tt), KP500000000 * (Tw - Tv)); + Cr[WS(csr, 2)] = FMA(KP500000000, Tt - Tu, Tr) + (KP866025403 * (Tv + Tw)); + } + } + } +} + +static const kr2c_desc desc = { 9, "r2cfII_9", {25, 13, 17, 0}, &GENUS }; + +void X(codelet_r2cfII_9) (planner *p) { + X(kr2c_register) (p, r2cfII_9, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:07 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 10 -name r2cf_10 -include r2cf.h */ + +/* + * This function contains 34 FP additions, 14 FP multiplications, + * (or, 24 additions, 4 multiplications, 10 fused multiply/add), + * 29 stack variables, 4 constants, and 20 memory accesses + */ +#include "r2cf.h" + +static void r2cf_10(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(40, rs), MAKE_VOLATILE_STRIDE(40, csr), MAKE_VOLATILE_STRIDE(40, csi)) { + E Tt, T3, T7, Tq, T6, Tv, Tp, Tm, Th, T8, T1, T2, T9, Tr; + T1 = R0[0]; + T2 = R1[WS(rs, 2)]; + { + E Te, Tn, Td, Tf, Tb, Tc; + Tb = R0[WS(rs, 2)]; + Tc = R1[WS(rs, 4)]; + Te = R0[WS(rs, 3)]; + Tt = T1 + T2; + T3 = T1 - T2; + Tn = Tb + Tc; + Td = Tb - Tc; + Tf = R1[0]; + { + E T4, T5, To, Tg; + T4 = R0[WS(rs, 1)]; + T5 = R1[WS(rs, 3)]; + T7 = R0[WS(rs, 4)]; + To = Te + Tf; + Tg = Te - Tf; + Tq = T4 + T5; + T6 = T4 - T5; + Tv = Tn + To; + Tp = Tn - To; + Tm = Tg - Td; + Th = Td + Tg; + T8 = R1[WS(rs, 1)]; + } + } + T9 = T7 - T8; + Tr = T7 + T8; + { + E Ty, Tk, Tx, Tj, Tu, Ts; + Tu = Tq + Tr; + Ts = Tq - Tr; + { + E Ta, Tl, Tw, Ti; + Ta = T6 + T9; + Tl = T6 - T9; + Ci[WS(csi, 4)] = KP951056516 * (FMA(KP618033988, Tp, Ts)); + Ci[WS(csi, 2)] = KP951056516 * (FNMS(KP618033988, Ts, Tp)); + Ty = Tu - Tv; + Tw = Tu + Tv; + Ci[WS(csi, 3)] = KP951056516 * (FMA(KP618033988, Tl, Tm)); + Ci[WS(csi, 1)] = -(KP951056516 * (FNMS(KP618033988, Tm, Tl))); + Tk = Ta - Th; + Ti = Ta + Th; + Cr[0] = Tt + Tw; + Tx = FNMS(KP250000000, Tw, Tt); + Cr[WS(csr, 5)] = T3 + Ti; + Tj = FNMS(KP250000000, Ti, T3); + } + Cr[WS(csr, 4)] = FMA(KP559016994, Ty, Tx); + Cr[WS(csr, 2)] = FNMS(KP559016994, Ty, Tx); + Cr[WS(csr, 3)] = FNMS(KP559016994, Tk, Tj); + Cr[WS(csr, 1)] = FMA(KP559016994, Tk, Tj); + } + } + } +} + +static const kr2c_desc desc = { 10, "r2cf_10", {24, 4, 10, 0}, &GENUS }; + +void X(codelet_r2cf_10) (planner *p) { + X(kr2c_register) (p, r2cf_10, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 10 -name r2cf_10 -include r2cf.h */ + +/* + * This function contains 34 FP additions, 12 FP multiplications, + * (or, 28 additions, 6 multiplications, 6 fused multiply/add), + * 26 stack variables, 4 constants, and 20 memory accesses + */ +#include "r2cf.h" + +static void r2cf_10(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(40, rs), MAKE_VOLATILE_STRIDE(40, csr), MAKE_VOLATILE_STRIDE(40, csi)) { + E Ti, Tt, Ta, Tn, Td, To, Te, Tv, T3, Tq, T6, Tr, T7, Tu, Tg; + E Th; + Tg = R0[0]; + Th = R1[WS(rs, 2)]; + Ti = Tg - Th; + Tt = Tg + Th; + { + E T8, T9, Tb, Tc; + T8 = R0[WS(rs, 2)]; + T9 = R1[WS(rs, 4)]; + Ta = T8 - T9; + Tn = T8 + T9; + Tb = R0[WS(rs, 3)]; + Tc = R1[0]; + Td = Tb - Tc; + To = Tb + Tc; + } + Te = Ta + Td; + Tv = Tn + To; + { + E T1, T2, T4, T5; + T1 = R0[WS(rs, 1)]; + T2 = R1[WS(rs, 3)]; + T3 = T1 - T2; + Tq = T1 + T2; + T4 = R0[WS(rs, 4)]; + T5 = R1[WS(rs, 1)]; + T6 = T4 - T5; + Tr = T4 + T5; + } + T7 = T3 + T6; + Tu = Tq + Tr; + { + E Tl, Tm, Tf, Tj, Tk; + Tl = Td - Ta; + Tm = T3 - T6; + Ci[WS(csi, 1)] = FNMS(KP951056516, Tm, KP587785252 * Tl); + Ci[WS(csi, 3)] = FMA(KP587785252, Tm, KP951056516 * Tl); + Tf = KP559016994 * (T7 - Te); + Tj = T7 + Te; + Tk = FNMS(KP250000000, Tj, Ti); + Cr[WS(csr, 1)] = Tf + Tk; + Cr[WS(csr, 5)] = Ti + Tj; + Cr[WS(csr, 3)] = Tk - Tf; + } + { + E Tp, Ts, Ty, Tw, Tx; + Tp = Tn - To; + Ts = Tq - Tr; + Ci[WS(csi, 2)] = FNMS(KP587785252, Ts, KP951056516 * Tp); + Ci[WS(csi, 4)] = FMA(KP951056516, Ts, KP587785252 * Tp); + Ty = KP559016994 * (Tu - Tv); + Tw = Tu + Tv; + Tx = FNMS(KP250000000, Tw, Tt); + Cr[WS(csr, 2)] = Tx - Ty; + Cr[0] = Tt + Tw; + Cr[WS(csr, 4)] = Ty + Tx; + } + } + } +} + +static const kr2c_desc desc = { 10, "r2cf_10", {28, 6, 6, 0}, &GENUS }; + +void X(codelet_r2cf_10) (planner *p) { + X(kr2c_register) (p, r2cf_10, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_11.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_11.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:07 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 11 -name r2cf_11 -include r2cf.h */ + +/* + * This function contains 60 FP additions, 50 FP multiplications, + * (or, 15 additions, 5 multiplications, 45 fused multiply/add), + * 51 stack variables, 10 constants, and 22 memory accesses + */ +#include "r2cf.h" + +static void r2cf_11(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP959492973, +0.959492973614497389890368057066327699062454848); + DK(KP876768831, +0.876768831002589333891339807079336796764054852); + DK(KP918985947, +0.918985947228994779780736114132655398124909697); + DK(KP989821441, +0.989821441880932732376092037776718787376519372); + DK(KP778434453, +0.778434453334651800608337670740821884709317477); + DK(KP830830026, +0.830830026003772851058548298459246407048009821); + DK(KP715370323, +0.715370323453429719112414662767260662417897278); + DK(KP634356270, +0.634356270682424498893150776899916060542806975); + DK(KP342584725, +0.342584725681637509502641509861112333758894680); + DK(KP521108558, +0.521108558113202722944698153526659300680427422); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(44, rs), MAKE_VOLATILE_STRIDE(44, csr), MAKE_VOLATILE_STRIDE(44, csi)) { + E T1, Tg, TF, TB, TI, TL, Tz, TA; + { + E T4, TC, TE, T7, TD, Ta, TS, TG, TJ, Td, TP, TM, Ty, Tq, Th; + E Tt, Tl; + T1 = R0[0]; + { + E Tb, Tc, Tx, Tp; + { + E T2, T3, Te, Tf; + T2 = R1[0]; + T3 = R0[WS(rs, 5)]; + Te = R1[WS(rs, 2)]; + Tf = R0[WS(rs, 3)]; + { + E T5, T6, T8, T9; + T5 = R0[WS(rs, 1)]; + T4 = T2 + T3; + TC = T3 - T2; + Tg = Te + Tf; + TE = Tf - Te; + T6 = R1[WS(rs, 4)]; + T8 = R1[WS(rs, 1)]; + T9 = R0[WS(rs, 4)]; + Tb = R0[WS(rs, 2)]; + T7 = T5 + T6; + TD = T5 - T6; + Ta = T8 + T9; + TF = T9 - T8; + Tc = R1[WS(rs, 3)]; + } + } + TS = FMA(KP521108558, TC, TD); + TG = FMA(KP521108558, TF, TE); + TJ = FMA(KP521108558, TE, TC); + Td = Tb + Tc; + TB = Tb - Tc; + Tx = FNMS(KP342584725, Ta, T7); + Tp = FNMS(KP342584725, T4, Ta); + TP = FNMS(KP521108558, TB, TF); + TM = FNMS(KP521108558, TD, TB); + Ty = FNMS(KP634356270, Tx, Td); + Tq = FNMS(KP634356270, Tp, Tg); + Th = FNMS(KP342584725, Tg, Td); + Tt = FNMS(KP342584725, Td, T4); + Tl = FNMS(KP342584725, T7, Tg); + } + { + E Tu, Ts, TN, Tv; + { + E Tm, TU, Tj, Ti, TT; + TT = FMA(KP715370323, TS, TF); + Ti = FNMS(KP634356270, Th, Ta); + Tu = FNMS(KP634356270, Tt, T7); + Tm = FNMS(KP634356270, Tl, T4); + TU = FMA(KP830830026, TT, TB); + Tj = FNMS(KP778434453, Ti, T7); + { + E Tk, TR, To, Tn, TQ, Tr; + TQ = FMA(KP715370323, TP, TC); + Tn = FNMS(KP778434453, Tm, Ta); + Ci[WS(csi, 5)] = KP989821441 * (FMA(KP918985947, TU, TE)); + Tk = FNMS(KP876768831, Tj, T4); + TR = FNMS(KP830830026, TQ, TE); + To = FNMS(KP876768831, Tn, Td); + Tr = FNMS(KP778434453, Tq, Td); + Cr[WS(csr, 5)] = FNMS(KP959492973, Tk, T1); + Ci[WS(csi, 4)] = KP989821441 * (FNMS(KP918985947, TR, TD)); + Cr[WS(csr, 4)] = FNMS(KP959492973, To, T1); + Ts = FNMS(KP876768831, Tr, T7); + } + } + TN = FNMS(KP715370323, TM, TE); + Tv = FNMS(KP778434453, Tu, Tg); + Cr[0] = T1 + T4 + T7 + Ta + Td + Tg; + Cr[WS(csr, 3)] = FNMS(KP959492973, Ts, T1); + { + E TO, Tw, TH, TK; + TO = FNMS(KP830830026, TN, TF); + Tw = FNMS(KP876768831, Tv, Ta); + TH = FMA(KP715370323, TG, TD); + TK = FNMS(KP715370323, TJ, TB); + Ci[WS(csi, 3)] = KP989821441 * (FNMS(KP918985947, TO, TC)); + Cr[WS(csr, 2)] = FNMS(KP959492973, Tw, T1); + TI = FNMS(KP830830026, TH, TC); + TL = FMA(KP830830026, TK, TD); + Tz = FNMS(KP778434453, Ty, T4); + } + } + } + Ci[WS(csi, 2)] = KP989821441 * (FMA(KP918985947, TI, TB)); + Ci[WS(csi, 1)] = KP989821441 * (FNMS(KP918985947, TL, TF)); + TA = FNMS(KP876768831, Tz, Tg); + Cr[WS(csr, 1)] = FNMS(KP959492973, TA, T1); + } + } +} + +static const kr2c_desc desc = { 11, "r2cf_11", {15, 5, 45, 0}, &GENUS }; + +void X(codelet_r2cf_11) (planner *p) { + X(kr2c_register) (p, r2cf_11, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 11 -name r2cf_11 -include r2cf.h */ + +/* + * This function contains 60 FP additions, 50 FP multiplications, + * (or, 20 additions, 10 multiplications, 40 fused multiply/add), + * 28 stack variables, 10 constants, and 22 memory accesses + */ +#include "r2cf.h" + +static void r2cf_11(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP654860733, +0.654860733945285064056925072466293553183791199); + DK(KP142314838, +0.142314838273285140443792668616369668791051361); + DK(KP959492973, +0.959492973614497389890368057066327699062454848); + DK(KP415415013, +0.415415013001886425529274149229623203524004910); + DK(KP841253532, +0.841253532831181168861811648919367717513292498); + DK(KP989821441, +0.989821441880932732376092037776718787376519372); + DK(KP909631995, +0.909631995354518371411715383079028460060241051); + DK(KP281732556, +0.281732556841429697711417915346616899035777899); + DK(KP540640817, +0.540640817455597582107635954318691695431770608); + DK(KP755749574, +0.755749574354258283774035843972344420179717445); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(44, rs), MAKE_VOLATILE_STRIDE(44, csr), MAKE_VOLATILE_STRIDE(44, csi)) { + E T1, T4, Tl, Tg, Th, Td, Ti, Ta, Tk, T7, Tj, Tb, Tc; + T1 = R0[0]; + { + E T2, T3, Te, Tf; + T2 = R0[WS(rs, 1)]; + T3 = R1[WS(rs, 4)]; + T4 = T2 + T3; + Tl = T3 - T2; + Te = R1[0]; + Tf = R0[WS(rs, 5)]; + Tg = Te + Tf; + Th = Tf - Te; + } + Tb = R1[WS(rs, 1)]; + Tc = R0[WS(rs, 4)]; + Td = Tb + Tc; + Ti = Tc - Tb; + { + E T8, T9, T5, T6; + T8 = R1[WS(rs, 2)]; + T9 = R0[WS(rs, 3)]; + Ta = T8 + T9; + Tk = T9 - T8; + T5 = R0[WS(rs, 2)]; + T6 = R1[WS(rs, 3)]; + T7 = T5 + T6; + Tj = T6 - T5; + } + Ci[WS(csi, 4)] = FMA(KP755749574, Th, KP540640817 * Ti) + FNMS(KP909631995, Tk, KP281732556 * Tj) - (KP989821441 * Tl); + Cr[WS(csr, 4)] = FMA(KP841253532, Td, T1) + FNMS(KP959492973, T7, KP415415013 * Ta) + FNMA(KP142314838, T4, KP654860733 * Tg); + Ci[WS(csi, 2)] = FMA(KP909631995, Th, KP755749574 * Tl) + FNMA(KP540640817, Tk, KP989821441 * Tj) - (KP281732556 * Ti); + Ci[WS(csi, 5)] = FMA(KP281732556, Th, KP755749574 * Ti) + FNMS(KP909631995, Tj, KP989821441 * Tk) - (KP540640817 * Tl); + Ci[WS(csi, 1)] = FMA(KP540640817, Th, KP909631995 * Tl) + FMA(KP989821441, Ti, KP755749574 * Tj) + (KP281732556 * Tk); + Ci[WS(csi, 3)] = FMA(KP989821441, Th, KP540640817 * Tj) + FNMS(KP909631995, Ti, KP755749574 * Tk) - (KP281732556 * Tl); + Cr[WS(csr, 3)] = FMA(KP415415013, Td, T1) + FNMS(KP654860733, Ta, KP841253532 * T7) + FNMA(KP959492973, T4, KP142314838 * Tg); + Cr[WS(csr, 1)] = FMA(KP841253532, Tg, T1) + FNMS(KP959492973, Ta, KP415415013 * T4) + FNMA(KP654860733, T7, KP142314838 * Td); + Cr[0] = T1 + Tg + T4 + Td + T7 + Ta; + Cr[WS(csr, 2)] = FMA(KP415415013, Tg, T1) + FNMS(KP142314838, T7, KP841253532 * Ta) + FNMA(KP959492973, Td, KP654860733 * T4); + Cr[WS(csr, 5)] = FMA(KP841253532, T4, T1) + FNMS(KP142314838, Ta, KP415415013 * T7) + FNMA(KP654860733, Td, KP959492973 * Tg); + } + } +} + +static const kr2c_desc desc = { 11, "r2cf_11", {20, 10, 40, 0}, &GENUS }; + +void X(codelet_r2cf_11) (planner *p) { + X(kr2c_register) (p, r2cf_11, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:07 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 12 -name r2cf_12 -include r2cf.h */ + +/* + * This function contains 38 FP additions, 10 FP multiplications, + * (or, 30 additions, 2 multiplications, 8 fused multiply/add), + * 31 stack variables, 2 constants, and 24 memory accesses + */ +#include "r2cf.h" + +static void r2cf_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) { + E Tm, T6, Ty, Tp, T5, Tk, Tt, Tb, Tc, Td, T9, Tn; + { + E T1, Tg, Th, Ti, T4, T2, T3, T7, T8, Tj; + T1 = R0[0]; + T2 = R0[WS(rs, 2)]; + T3 = R0[WS(rs, 4)]; + Tg = R1[WS(rs, 1)]; + Th = R1[WS(rs, 3)]; + Ti = R1[WS(rs, 5)]; + T4 = T2 + T3; + Tm = T3 - T2; + T6 = R0[WS(rs, 3)]; + Ty = Ti - Th; + Tj = Th + Ti; + Tp = FNMS(KP500000000, T4, T1); + T5 = T1 + T4; + T7 = R0[WS(rs, 5)]; + Tk = FNMS(KP500000000, Tj, Tg); + Tt = Tg + Tj; + T8 = R0[WS(rs, 1)]; + Tb = R1[WS(rs, 4)]; + Tc = R1[0]; + Td = R1[WS(rs, 2)]; + T9 = T7 + T8; + Tn = T8 - T7; + } + { + E Te, Tz, To, TC; + Te = Tc + Td; + Tz = Td - Tc; + To = Tm - Tn; + TC = Tm + Tn; + { + E Ta, Tq, TA, TB; + Ta = T6 + T9; + Tq = FNMS(KP500000000, T9, T6); + TA = Ty - Tz; + TB = Ty + Tz; + { + E Tf, Tu, Tx, Tr; + Tf = FNMS(KP500000000, Te, Tb); + Tu = Tb + Te; + Tx = Tp - Tq; + Tr = Tp + Tq; + { + E Tv, Tw, Tl, Ts; + Tv = T5 + Ta; + Cr[WS(csr, 3)] = T5 - Ta; + Ci[WS(csi, 4)] = KP866025403 * (TC + TB); + Ci[WS(csi, 2)] = KP866025403 * (TB - TC); + Tw = Tt + Tu; + Ci[WS(csi, 3)] = Tt - Tu; + Tl = Tf - Tk; + Ts = Tk + Tf; + Cr[WS(csr, 1)] = FMA(KP866025403, TA, Tx); + Cr[WS(csr, 5)] = FNMS(KP866025403, TA, Tx); + Cr[0] = Tv + Tw; + Cr[WS(csr, 6)] = Tv - Tw; + Cr[WS(csr, 4)] = Tr + Ts; + Cr[WS(csr, 2)] = Tr - Ts; + Ci[WS(csi, 5)] = FNMS(KP866025403, To, Tl); + Ci[WS(csi, 1)] = FMA(KP866025403, To, Tl); + } + } + } + } + } + } +} + +static const kr2c_desc desc = { 12, "r2cf_12", {30, 2, 8, 0}, &GENUS }; + +void X(codelet_r2cf_12) (planner *p) { + X(kr2c_register) (p, r2cf_12, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 12 -name r2cf_12 -include r2cf.h */ + +/* + * This function contains 38 FP additions, 8 FP multiplications, + * (or, 34 additions, 4 multiplications, 4 fused multiply/add), + * 21 stack variables, 2 constants, and 24 memory accesses + */ +#include "r2cf.h" + +static void r2cf_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) { + E T5, Tp, Tb, Tn, Ty, Tt, Ta, Tq, Tc, Ti, Tz, Tu, Td, To; + { + E T1, T2, T3, T4; + T1 = R0[0]; + T2 = R0[WS(rs, 2)]; + T3 = R0[WS(rs, 4)]; + T4 = T2 + T3; + T5 = T1 + T4; + Tp = FNMS(KP500000000, T4, T1); + Tb = T3 - T2; + } + { + E Tj, Tk, Tl, Tm; + Tj = R1[WS(rs, 1)]; + Tk = R1[WS(rs, 3)]; + Tl = R1[WS(rs, 5)]; + Tm = Tk + Tl; + Tn = FNMS(KP500000000, Tm, Tj); + Ty = Tl - Tk; + Tt = Tj + Tm; + } + { + E T6, T7, T8, T9; + T6 = R0[WS(rs, 3)]; + T7 = R0[WS(rs, 5)]; + T8 = R0[WS(rs, 1)]; + T9 = T7 + T8; + Ta = T6 + T9; + Tq = FNMS(KP500000000, T9, T6); + Tc = T8 - T7; + } + { + E Te, Tf, Tg, Th; + Te = R1[WS(rs, 4)]; + Tf = R1[0]; + Tg = R1[WS(rs, 2)]; + Th = Tf + Tg; + Ti = FNMS(KP500000000, Th, Te); + Tz = Tg - Tf; + Tu = Te + Th; + } + Cr[WS(csr, 3)] = T5 - Ta; + Ci[WS(csi, 3)] = Tt - Tu; + Td = KP866025403 * (Tb - Tc); + To = Ti - Tn; + Ci[WS(csi, 1)] = Td + To; + Ci[WS(csi, 5)] = To - Td; + { + E Tx, TA, Tv, Tw; + Tx = Tp - Tq; + TA = KP866025403 * (Ty - Tz); + Cr[WS(csr, 5)] = Tx - TA; + Cr[WS(csr, 1)] = Tx + TA; + Tv = T5 + Ta; + Tw = Tt + Tu; + Cr[WS(csr, 6)] = Tv - Tw; + Cr[0] = Tv + Tw; + } + { + E Tr, Ts, TB, TC; + Tr = Tp + Tq; + Ts = Tn + Ti; + Cr[WS(csr, 2)] = Tr - Ts; + Cr[WS(csr, 4)] = Tr + Ts; + TB = Ty + Tz; + TC = Tb + Tc; + Ci[WS(csi, 2)] = KP866025403 * (TB - TC); + Ci[WS(csi, 4)] = KP866025403 * (TC + TB); + } + } + } +} + +static const kr2c_desc desc = { 12, "r2cf_12", {34, 4, 4, 0}, &GENUS }; + +void X(codelet_r2cf_12) (planner *p) { + X(kr2c_register) (p, r2cf_12, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_128.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_128.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3180 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:08 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 128 -name r2cf_128 -include r2cf.h */ + +/* + * This function contains 956 FP additions, 516 FP multiplications, + * (or, 440 additions, 0 multiplications, 516 fused multiply/add), + * 229 stack variables, 31 constants, and 256 memory accesses + */ +#include "r2cf.h" + +static void r2cf_128(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP989176509, +0.989176509964780973451673738016243063983689533); + DK(KP803207531, +0.803207531480644909806676512963141923879569427); + DK(KP148335987, +0.148335987538347428753676511486911367000625355); + DK(KP741650546, +0.741650546272035369581266691172079863842265220); + DK(KP998795456, +0.998795456205172392714771604759100694443203615); + DK(KP740951125, +0.740951125354959091175616897495162729728955309); + DK(KP049126849, +0.049126849769467254105343321271313617079695752); + DK(KP906347169, +0.906347169019147157946142717268914412664134293); + DK(KP857728610, +0.857728610000272069902269984284770137042490799); + DK(KP970031253, +0.970031253194543992603984207286100251456865962); + DK(KP599376933, +0.599376933681923766271389869014404232837890546); + DK(KP250486960, +0.250486960191305461595702160124721208578685568); + DK(KP941544065, +0.941544065183020778412509402599502357185589796); + DK(KP903989293, +0.903989293123443331586200297230537048710132025); + DK(KP472964775, +0.472964775891319928124438237972992463904131113); + DK(KP357805721, +0.357805721314524104672487743774474392487532769); + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP098491403, +0.098491403357164253077197521291327432293052451); + DK(KP820678790, +0.820678790828660330972281985331011598767386482); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP534511135, +0.534511135950791641089685961295362908582039528); + DK(KP303346683, +0.303346683607342391675883946941299872384187453); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(512, rs), MAKE_VOLATILE_STRIDE(512, csr), MAKE_VOLATILE_STRIDE(512, csi)) { + E T95, T96; + { + E TcD, TdR, T5P, T8v, T27, T7r, Tf, Ta5, T7s, T5S, T8w, T2e, TdS, TcG, Tbn; + E Tu, TcK, TdU, TK, Ta6, T7w, T8y, T2o, T5U, TcN, TdV, TZ, Ta7, T7z, T8z; + E T2x, T5V, T1g, Taa, Tab, T1v, Tew, TcX, Tex, TcU, T6A, T2M, T9b, T7E, T9a; + E T7H, T6z, T2T, TeO, TdK, TeL, Tdz, T9p, T8d, T6O, T5G, T6L, T4X, Tc3, TaV; + E Tc4, Tbi, T9s, T8o, TeH, Tdp, TeE, Tde, T9i, T7U, T6H, T4r, T6E, T3I, TbW; + E Tao, TbX, TaL, T9l, T85, T1L, Tad, Tae, T20, Tez, Td6, TeA, Td3, T6x, T37; + E T9e, T7L, T9d, T7O, T6w, T3e, TbZ, T3Z, T4s, Tc0, TeF, Tds, T4t, T4g, T87; + E T80, TeI, Tdl, T86, T7X, TaM, TaD, Tb2, Tc6, T8e, T8f, T5e, T5H, Tb9, Tc7; + E TeM, TdN, T5I, T5v, T8q, T8j, TeP, TdG; + { + E T7G, T2S, T2P, T7F; + { + E T28, Ti, Tn, T2c, Ts, T29, Tl, To; + { + E T4, T23, T3, T25, Td, T5, T8, T9; + { + E T1, T2, Tb, Tc; + T1 = R0[0]; + T2 = R0[WS(rs, 32)]; + Tb = R0[WS(rs, 56)]; + Tc = R0[WS(rs, 24)]; + T4 = R0[WS(rs, 16)]; + T23 = T1 - T2; + T3 = T1 + T2; + T25 = Tb - Tc; + Td = Tb + Tc; + T5 = R0[WS(rs, 48)]; + T8 = R0[WS(rs, 8)]; + T9 = R0[WS(rs, 40)]; + } + { + E Tq, Tr, Tj, Tk; + { + E Tg, T5N, T6, T24, Ta, Th; + Tg = R0[WS(rs, 4)]; + T5N = T4 - T5; + T6 = T4 + T5; + T24 = T8 - T9; + Ta = T8 + T9; + Th = R0[WS(rs, 36)]; + { + E T7, T26, T5O, Te; + TcD = T3 - T6; + T7 = T3 + T6; + T26 = T24 + T25; + T5O = T25 - T24; + TdR = Td - Ta; + Te = Ta + Td; + T5P = FNMS(KP707106781, T5O, T5N); + T8v = FMA(KP707106781, T5O, T5N); + T27 = FMA(KP707106781, T26, T23); + T7r = FNMS(KP707106781, T26, T23); + Tf = T7 + Te; + Ta5 = T7 - Te; + T28 = Tg - Th; + Ti = Tg + Th; + } + } + Tq = R0[WS(rs, 12)]; + Tr = R0[WS(rs, 44)]; + Tj = R0[WS(rs, 20)]; + Tk = R0[WS(rs, 52)]; + Tn = R0[WS(rs, 60)]; + T2c = Tq - Tr; + Ts = Tq + Tr; + T29 = Tj - Tk; + Tl = Tj + Tk; + To = R0[WS(rs, 28)]; + } + } + { + E T2g, T2l, T2h, TF, TcI, TC, T2i, TI; + { + E Ty, TG, TB, TH; + { + E Tw, T5Q, T2a, TcE, Tm, T2b, Tp, Tx; + Tw = R0[WS(rs, 2)]; + T5Q = FMA(KP414213562, T28, T29); + T2a = FNMS(KP414213562, T29, T28); + TcE = Ti - Tl; + Tm = Ti + Tl; + T2b = Tn - To; + Tp = Tn + To; + Tx = R0[WS(rs, 34)]; + { + E Tz, TA, TD, TE; + Tz = R0[WS(rs, 18)]; + { + E T5R, T2d, TcF, Tt; + T5R = FNMS(KP414213562, T2b, T2c); + T2d = FMA(KP414213562, T2c, T2b); + TcF = Tp - Ts; + Tt = Tp + Ts; + T2g = Tw - Tx; + Ty = Tw + Tx; + T7s = T5Q - T5R; + T5S = T5Q + T5R; + T8w = T2d - T2a; + T2e = T2a + T2d; + TdS = TcF - TcE; + TcG = TcE + TcF; + Tbn = Tt - Tm; + Tu = Tm + Tt; + TA = R0[WS(rs, 50)]; + } + TD = R0[WS(rs, 10)]; + TE = R0[WS(rs, 42)]; + TG = R0[WS(rs, 58)]; + T2l = Tz - TA; + TB = Tz + TA; + T2h = TD - TE; + TF = TD + TE; + TH = R0[WS(rs, 26)]; + } + } + TcI = Ty - TB; + TC = Ty + TB; + T2i = TG - TH; + TI = TG + TH; + } + { + E T2p, T2u, T2q, TU, TcL, TR, T2r, TX; + { + E TN, TV, TQ, TW; + { + E T2k, T7u, T2n, T7v, TL, TM; + TL = R0[WS(rs, 62)]; + TM = R0[WS(rs, 30)]; + { + E TJ, TcJ, T2m, T2j; + TJ = TF + TI; + TcJ = TI - TF; + T2m = T2h - T2i; + T2j = T2h + T2i; + TcK = FMA(KP414213562, TcJ, TcI); + TdU = FNMS(KP414213562, TcI, TcJ); + TK = TC + TJ; + Ta6 = TC - TJ; + T2k = FMA(KP707106781, T2j, T2g); + T7u = FNMS(KP707106781, T2j, T2g); + T2n = FMA(KP707106781, T2m, T2l); + T7v = FNMS(KP707106781, T2m, T2l); + T2p = TL - TM; + TN = TL + TM; + } + T7w = FMA(KP668178637, T7v, T7u); + T8y = FNMS(KP668178637, T7u, T7v); + T2o = FNMS(KP198912367, T2n, T2k); + T5U = FMA(KP198912367, T2k, T2n); + { + E TO, TP, TS, TT; + TO = R0[WS(rs, 14)]; + TP = R0[WS(rs, 46)]; + TS = R0[WS(rs, 6)]; + TT = R0[WS(rs, 38)]; + TV = R0[WS(rs, 54)]; + T2u = TO - TP; + TQ = TO + TP; + T2q = TS - TT; + TU = TS + TT; + TW = R0[WS(rs, 22)]; + } + } + TcL = TN - TQ; + TR = TN + TQ; + T2r = TV - TW; + TX = TV + TW; + } + { + E T2A, T14, T2N, T17, T1b, T1e, T2D, T2O, T1r, T2I, T1q, T2Q, T2H, TcR, T1n; + E T1s, T15, T16; + { + E T2t, T7x, T2w, T7y, T12, T13; + T12 = R0[WS(rs, 1)]; + T13 = R0[WS(rs, 33)]; + { + E TY, TcM, T2v, T2s; + TY = TU + TX; + TcM = TX - TU; + T2v = T2q - T2r; + T2s = T2q + T2r; + TcN = FNMS(KP414213562, TcM, TcL); + TdV = FMA(KP414213562, TcL, TcM); + TZ = TR + TY; + Ta7 = TR - TY; + T2t = FMA(KP707106781, T2s, T2p); + T7x = FNMS(KP707106781, T2s, T2p); + T2w = FMA(KP707106781, T2v, T2u); + T7y = FNMS(KP707106781, T2v, T2u); + T2A = T12 - T13; + T14 = T12 + T13; + } + T7z = FNMS(KP668178637, T7y, T7x); + T8z = FMA(KP668178637, T7x, T7y); + T2x = FMA(KP198912367, T2w, T2t); + T5V = FNMS(KP198912367, T2t, T2w); + T15 = R0[WS(rs, 17)]; + T16 = R0[WS(rs, 49)]; + } + { + E T1c, T2B, T1d, T19, T1a; + T19 = R0[WS(rs, 9)]; + T1a = R0[WS(rs, 41)]; + T1c = R0[WS(rs, 57)]; + T2N = T15 - T16; + T17 = T15 + T16; + T2B = T19 - T1a; + T1b = T19 + T1a; + T1d = R0[WS(rs, 25)]; + { + E T1k, T2F, T1j, T1l, T1h, T1i, T2C; + T1h = R0[WS(rs, 5)]; + T1i = R0[WS(rs, 37)]; + T2C = T1c - T1d; + T1e = T1c + T1d; + T1k = R0[WS(rs, 21)]; + T2F = T1h - T1i; + T1j = T1h + T1i; + T2D = T2B + T2C; + T2O = T2B - T2C; + T1l = R0[WS(rs, 53)]; + { + E T1o, T1p, T2G, T1m; + T1o = R0[WS(rs, 61)]; + T1p = R0[WS(rs, 29)]; + T1r = R0[WS(rs, 13)]; + T2G = T1k - T1l; + T1m = T1k + T1l; + T2I = T1o - T1p; + T1q = T1o + T1p; + T2Q = FMA(KP414213562, T2F, T2G); + T2H = FNMS(KP414213562, T2G, T2F); + TcR = T1j - T1m; + T1n = T1j + T1m; + T1s = R0[WS(rs, 45)]; + } + } + } + { + E TcQ, TcV, T2K, T2R, T1u, TcT, TcW, TcS; + { + E T18, T1f, T1t, T2J; + T18 = T14 + T17; + TcQ = T14 - T17; + TcV = T1e - T1b; + T1f = T1b + T1e; + T1t = T1r + T1s; + T2J = T1r - T1s; + T1g = T18 + T1f; + Taa = T18 - T1f; + T2K = FMA(KP414213562, T2J, T2I); + T2R = FNMS(KP414213562, T2I, T2J); + T1u = T1q + T1t; + TcS = T1q - T1t; + } + TcT = TcR + TcS; + TcW = TcS - TcR; + { + E T7C, T2E, T2L, T7D; + T7C = FNMS(KP707106781, T2D, T2A); + T2E = FMA(KP707106781, T2D, T2A); + Tab = T1u - T1n; + T1v = T1n + T1u; + Tew = FNMS(KP707106781, TcW, TcV); + TcX = FMA(KP707106781, TcW, TcV); + Tex = FNMS(KP707106781, TcT, TcQ); + TcU = FMA(KP707106781, TcT, TcQ); + T2L = T2H + T2K; + T7G = T2K - T2H; + T7D = T2Q - T2R; + T2S = T2Q + T2R; + T2P = FMA(KP707106781, T2O, T2N); + T7F = FNMS(KP707106781, T2O, T2N); + T6A = FNMS(KP923879532, T2L, T2E); + T2M = FMA(KP923879532, T2L, T2E); + T9b = FNMS(KP923879532, T7D, T7C); + T7E = FMA(KP923879532, T7D, T7C); + } + } + } + } + } + } + { + E T83, T84, T8m, T8n; + { + E TaP, T4z, TaQ, T5A, TaS, TaT, T4G, T5B, T4O, T5D, Tbh, Tdw, T4R, Tbc, T4S; + E T4T; + { + E T4x, T4y, T5y, T5z; + T4x = R1[WS(rs, 63)]; + T9a = FNMS(KP923879532, T7G, T7F); + T7H = FMA(KP923879532, T7G, T7F); + T6z = FNMS(KP923879532, T2S, T2P); + T2T = FMA(KP923879532, T2S, T2P); + T4y = R1[WS(rs, 31)]; + T5y = R1[WS(rs, 47)]; + T5z = R1[WS(rs, 15)]; + { + E T4A, T4B, T4D, T4E; + T4A = R1[WS(rs, 7)]; + TaP = T4x + T4y; + T4z = T4x - T4y; + TaQ = T5z + T5y; + T5A = T5y - T5z; + T4B = R1[WS(rs, 39)]; + T4D = R1[WS(rs, 55)]; + T4E = R1[WS(rs, 23)]; + { + E T4K, Tbf, Tbg, T4N, T4P, T4Q; + { + E T4I, T4C, T4F, T4J, T4L, T4M; + T4I = R1[WS(rs, 3)]; + TaS = T4A + T4B; + T4C = T4A - T4B; + TaT = T4D + T4E; + T4F = T4D - T4E; + T4J = R1[WS(rs, 35)]; + T4L = R1[WS(rs, 51)]; + T4M = R1[WS(rs, 19)]; + T4G = T4C + T4F; + T5B = T4F - T4C; + T4K = T4I - T4J; + Tbf = T4I + T4J; + Tbg = T4M + T4L; + T4N = T4L - T4M; + } + T4P = R1[WS(rs, 59)]; + T4Q = R1[WS(rs, 27)]; + T4O = FMA(KP414213562, T4N, T4K); + T5D = FNMS(KP414213562, T4K, T4N); + Tbh = Tbf + Tbg; + Tdw = Tbf - Tbg; + T4R = T4P - T4Q; + Tbc = T4P + T4Q; + T4S = R1[WS(rs, 43)]; + T4T = R1[WS(rs, 11)]; + } + } + } + { + E T4H, T8b, TaR, Tdv, TdI, TaU, T4U, Tbd, T5C; + T4H = FMA(KP707106781, T4G, T4z); + T8b = FNMS(KP707106781, T4G, T4z); + TaR = TaP + TaQ; + Tdv = TaP - TaQ; + TdI = TaT - TaS; + TaU = TaS + TaT; + T4U = T4S - T4T; + Tbd = T4T + T4S; + T8m = FNMS(KP707106781, T5B, T5A); + T5C = FMA(KP707106781, T5B, T5A); + { + E Tbe, Tdx, T5E, T4V; + Tbe = Tbc + Tbd; + Tdx = Tbc - Tbd; + T5E = FMA(KP414213562, T4R, T4U); + T4V = FNMS(KP414213562, T4U, T4R); + { + E Tdy, TdJ, T5F, T8c, T4W; + Tdy = Tdw + Tdx; + TdJ = Tdx - Tdw; + T5F = T5D + T5E; + T8c = T5E - T5D; + T8n = T4V - T4O; + T4W = T4O + T4V; + TeO = FNMS(KP707106781, TdJ, TdI); + TdK = FMA(KP707106781, TdJ, TdI); + TeL = FNMS(KP707106781, Tdy, Tdv); + Tdz = FMA(KP707106781, Tdy, Tdv); + T9p = FNMS(KP923879532, T8c, T8b); + T8d = FMA(KP923879532, T8c, T8b); + T6O = FNMS(KP923879532, T5F, T5C); + T5G = FMA(KP923879532, T5F, T5C); + T6L = FNMS(KP923879532, T4W, T4H); + T4X = FMA(KP923879532, T4W, T4H); + } + Tc3 = TaR + TaU; + TaV = TaR - TaU; + Tc4 = Tbh + Tbe; + Tbi = Tbe - Tbh; + } + } + } + { + E Tai, T3k, Taj, T4l, Tal, Tam, T4m, T3r, T3D, TaF, T3C, Tdb, TaK, T3z, T4o; + E T3E; + { + E T4j, T4k, T3i, T3j; + T3i = R1[0]; + T3j = R1[WS(rs, 32)]; + T4j = R1[WS(rs, 16)]; + T9s = FMA(KP923879532, T8n, T8m); + T8o = FNMS(KP923879532, T8n, T8m); + Tai = T3i + T3j; + T3k = T3i - T3j; + T4k = R1[WS(rs, 48)]; + { + E T3o, T3n, T3p, T3l, T3m; + T3l = R1[WS(rs, 8)]; + T3m = R1[WS(rs, 40)]; + T3o = R1[WS(rs, 56)]; + Taj = T4j + T4k; + T4l = T4j - T4k; + T3n = T3l - T3m; + Tal = T3l + T3m; + T3p = R1[WS(rs, 24)]; + { + E T3w, TaI, T3v, T3x, T3t, T3u, T3q; + T3t = R1[WS(rs, 4)]; + T3u = R1[WS(rs, 36)]; + T3q = T3o - T3p; + Tam = T3o + T3p; + T3w = R1[WS(rs, 20)]; + TaI = T3t + T3u; + T3v = T3t - T3u; + T4m = T3n - T3q; + T3r = T3n + T3q; + T3x = R1[WS(rs, 52)]; + { + E T3A, T3B, TaJ, T3y; + T3A = R1[WS(rs, 60)]; + T3B = R1[WS(rs, 28)]; + T3D = R1[WS(rs, 12)]; + TaJ = T3w + T3x; + T3y = T3w - T3x; + TaF = T3A + T3B; + T3C = T3A - T3B; + Tdb = TaI - TaJ; + TaK = TaI + TaJ; + T3z = FNMS(KP414213562, T3y, T3v); + T4o = FMA(KP414213562, T3v, T3y); + T3E = R1[WS(rs, 44)]; + } + } + } + } + { + E T3s, T7S, Tak, Tda, Tdn, Tan, T3F, TaG, T4n; + T3s = FMA(KP707106781, T3r, T3k); + T7S = FNMS(KP707106781, T3r, T3k); + Tak = Tai + Taj; + Tda = Tai - Taj; + Tdn = Tam - Tal; + Tan = Tal + Tam; + T3F = T3D - T3E; + TaG = T3D + T3E; + T83 = FNMS(KP707106781, T4m, T4l); + T4n = FMA(KP707106781, T4m, T4l); + { + E TaH, Tdc, T4p, T3G; + TaH = TaF + TaG; + Tdc = TaF - TaG; + T4p = FNMS(KP414213562, T3C, T3F); + T3G = FMA(KP414213562, T3F, T3C); + { + E Tdd, Tdo, T4q, T7T, T3H; + Tdd = Tdb + Tdc; + Tdo = Tdc - Tdb; + T4q = T4o + T4p; + T7T = T4o - T4p; + T84 = T3G - T3z; + T3H = T3z + T3G; + TeH = FNMS(KP707106781, Tdo, Tdn); + Tdp = FMA(KP707106781, Tdo, Tdn); + TeE = FNMS(KP707106781, Tdd, Tda); + Tde = FMA(KP707106781, Tdd, Tda); + T9i = FNMS(KP923879532, T7T, T7S); + T7U = FMA(KP923879532, T7T, T7S); + T6H = FNMS(KP923879532, T4q, T4n); + T4r = FMA(KP923879532, T4q, T4n); + T6E = FNMS(KP923879532, T3H, T3s); + T3I = FMA(KP923879532, T3H, T3s); + } + TbW = Tak + Tan; + Tao = Tak - Tan; + TbX = TaK + TaH; + TaL = TaH - TaK; + } + } + } + { + E T7N, T3d, T3a, T7M; + { + E T2V, T1z, T38, T1C, T1G, T1J, T2Y, T39, T1W, T33, T1V, T3b, T32, Td0, T1S; + E T1X; + { + E T1A, T1B, T1x, T1y; + T1x = R0[WS(rs, 63)]; + T1y = R0[WS(rs, 31)]; + T1A = R0[WS(rs, 15)]; + T9l = FNMS(KP923879532, T84, T83); + T85 = FMA(KP923879532, T84, T83); + T2V = T1x - T1y; + T1z = T1x + T1y; + T1B = R0[WS(rs, 47)]; + { + E T1H, T2W, T1I, T1E, T1F; + T1E = R0[WS(rs, 7)]; + T1F = R0[WS(rs, 39)]; + T1H = R0[WS(rs, 55)]; + T38 = T1A - T1B; + T1C = T1A + T1B; + T2W = T1E - T1F; + T1G = T1E + T1F; + T1I = R0[WS(rs, 23)]; + { + E T1P, T30, T1O, T1Q, T1M, T1N, T2X; + T1M = R0[WS(rs, 3)]; + T1N = R0[WS(rs, 35)]; + T2X = T1H - T1I; + T1J = T1H + T1I; + T1P = R0[WS(rs, 19)]; + T30 = T1M - T1N; + T1O = T1M + T1N; + T2Y = T2W + T2X; + T39 = T2W - T2X; + T1Q = R0[WS(rs, 51)]; + { + E T1T, T1U, T31, T1R; + T1T = R0[WS(rs, 59)]; + T1U = R0[WS(rs, 27)]; + T1W = R0[WS(rs, 11)]; + T31 = T1P - T1Q; + T1R = T1P + T1Q; + T33 = T1T - T1U; + T1V = T1T + T1U; + T3b = FMA(KP414213562, T30, T31); + T32 = FNMS(KP414213562, T31, T30); + Td0 = T1O - T1R; + T1S = T1O + T1R; + T1X = R0[WS(rs, 43)]; + } + } + } + } + { + E TcZ, Td4, T35, T3c, T1Z, Td2, Td5, Td1; + { + E T1D, T1K, T1Y, T34; + T1D = T1z + T1C; + TcZ = T1z - T1C; + Td4 = T1J - T1G; + T1K = T1G + T1J; + T1Y = T1W + T1X; + T34 = T1W - T1X; + T1L = T1D + T1K; + Tad = T1D - T1K; + T35 = FMA(KP414213562, T34, T33); + T3c = FNMS(KP414213562, T33, T34); + T1Z = T1V + T1Y; + Td1 = T1V - T1Y; + } + Td2 = Td0 + Td1; + Td5 = Td1 - Td0; + { + E T7J, T2Z, T36, T7K; + T7J = FNMS(KP707106781, T2Y, T2V); + T2Z = FMA(KP707106781, T2Y, T2V); + Tae = T1Z - T1S; + T20 = T1S + T1Z; + Tez = FNMS(KP707106781, Td5, Td4); + Td6 = FMA(KP707106781, Td5, Td4); + TeA = FNMS(KP707106781, Td2, TcZ); + Td3 = FMA(KP707106781, Td2, TcZ); + T36 = T32 + T35; + T7N = T35 - T32; + T7K = T3b - T3c; + T3d = T3b + T3c; + T3a = FMA(KP707106781, T39, T38); + T7M = FNMS(KP707106781, T39, T38); + T6x = FNMS(KP923879532, T36, T2Z); + T37 = FMA(KP923879532, T36, T2Z); + T9e = FNMS(KP923879532, T7K, T7J); + T7L = FMA(KP923879532, T7K, T7J); + } + } + } + { + E Tav, T7V, T7W, TaC; + { + E T3L, T3W, Tdf, Tar, T42, T4d, Tay, Tdi, T46, Tau, Tdg, T3X, T3S, Taz, T45; + E T47, Taw, Tax; + { + E T3J, T3K, T3U, T3V; + T3J = R1[WS(rs, 2)]; + T9d = FNMS(KP923879532, T7N, T7M); + T7O = FMA(KP923879532, T7N, T7M); + T6w = FNMS(KP923879532, T3d, T3a); + T3e = FMA(KP923879532, T3d, T3a); + T3K = R1[WS(rs, 34)]; + T3U = R1[WS(rs, 18)]; + T3V = R1[WS(rs, 50)]; + { + E T40, Tap, Taq, T41, T4b, T4c; + T40 = R1[WS(rs, 62)]; + T3L = T3J - T3K; + Tap = T3J + T3K; + T3W = T3U - T3V; + Taq = T3U + T3V; + T41 = R1[WS(rs, 30)]; + T4b = R1[WS(rs, 14)]; + T4c = R1[WS(rs, 46)]; + Tdf = Tap - Taq; + Tar = Tap + Taq; + T42 = T40 - T41; + Taw = T40 + T41; + Tax = T4b + T4c; + T4d = T4b - T4c; + } + } + { + E T3M, T3N, T3P, T3Q; + T3M = R1[WS(rs, 10)]; + Tay = Taw + Tax; + Tdi = Taw - Tax; + T3N = R1[WS(rs, 42)]; + T3P = R1[WS(rs, 58)]; + T3Q = R1[WS(rs, 26)]; + { + E T43, Tas, T3O, Tat, T3R, T44; + T43 = R1[WS(rs, 6)]; + Tas = T3M + T3N; + T3O = T3M - T3N; + Tat = T3P + T3Q; + T3R = T3P - T3Q; + T44 = R1[WS(rs, 38)]; + T46 = R1[WS(rs, 54)]; + Tau = Tas + Tat; + Tdg = Tat - Tas; + T3X = T3O - T3R; + T3S = T3O + T3R; + Taz = T43 + T44; + T45 = T43 - T44; + T47 = R1[WS(rs, 22)]; + } + } + { + E Tdq, Tdh, T49, T4e, Tdr, Tdk; + Tav = Tar - Tau; + TbZ = Tar + Tau; + { + E T3T, T3Y, TaA, T48, Tdj, TaB; + T3T = FMA(KP707106781, T3S, T3L); + T7V = FNMS(KP707106781, T3S, T3L); + T7W = FNMS(KP707106781, T3X, T3W); + T3Y = FMA(KP707106781, T3X, T3W); + TaA = T46 + T47; + T48 = T46 - T47; + Tdq = FNMS(KP414213562, Tdf, Tdg); + Tdh = FMA(KP414213562, Tdg, Tdf); + T3Z = FNMS(KP198912367, T3Y, T3T); + T4s = FMA(KP198912367, T3T, T3Y); + Tdj = TaA - Taz; + TaB = Taz + TaA; + T49 = T45 + T48; + T4e = T45 - T48; + TaC = Tay - TaB; + Tc0 = Tay + TaB; + Tdr = FMA(KP414213562, Tdi, Tdj); + Tdk = FNMS(KP414213562, Tdj, Tdi); + } + { + E T7Z, T7Y, T4f, T4a; + T7Z = FNMS(KP707106781, T4e, T4d); + T4f = FMA(KP707106781, T4e, T4d); + T4a = FMA(KP707106781, T49, T42); + T7Y = FNMS(KP707106781, T49, T42); + TeF = Tdr - Tdq; + Tds = Tdq + Tdr; + T4t = FNMS(KP198912367, T4a, T4f); + T4g = FMA(KP198912367, T4f, T4a); + T87 = FMA(KP668178637, T7Y, T7Z); + T80 = FNMS(KP668178637, T7Z, T7Y); + TeI = Tdh - Tdk; + Tdl = Tdh + Tdk; + } + } + } + { + E T50, T5b, TdA, TaY, T5h, T5s, Tb5, TdD, T5l, Tb1, TdB, T5c, T57, Tb6, T5k; + E T5m, Tb3, Tb4; + { + E T4Y, T4Z, T59, T5a; + T4Y = R1[WS(rs, 1)]; + T86 = FNMS(KP668178637, T7V, T7W); + T7X = FMA(KP668178637, T7W, T7V); + TaM = TaC - Tav; + TaD = Tav + TaC; + T4Z = R1[WS(rs, 33)]; + T59 = R1[WS(rs, 49)]; + T5a = R1[WS(rs, 17)]; + { + E T5f, TaW, TaX, T5g, T5q, T5r; + T5f = R1[WS(rs, 61)]; + T50 = T4Y - T4Z; + TaW = T4Y + T4Z; + T5b = T59 - T5a; + TaX = T5a + T59; + T5g = R1[WS(rs, 29)]; + T5q = R1[WS(rs, 45)]; + T5r = R1[WS(rs, 13)]; + TdA = TaW - TaX; + TaY = TaW + TaX; + T5h = T5f - T5g; + Tb3 = T5f + T5g; + Tb4 = T5r + T5q; + T5s = T5q - T5r; + } + } + { + E T51, T52, T54, T55; + T51 = R1[WS(rs, 9)]; + Tb5 = Tb3 + Tb4; + TdD = Tb3 - Tb4; + T52 = R1[WS(rs, 41)]; + T54 = R1[WS(rs, 57)]; + T55 = R1[WS(rs, 25)]; + { + E T5i, TaZ, T53, Tb0, T56, T5j; + T5i = R1[WS(rs, 5)]; + TaZ = T51 + T52; + T53 = T51 - T52; + Tb0 = T54 + T55; + T56 = T54 - T55; + T5j = R1[WS(rs, 37)]; + T5l = R1[WS(rs, 53)]; + Tb1 = TaZ + Tb0; + TdB = Tb0 - TaZ; + T5c = T56 - T53; + T57 = T53 + T56; + Tb6 = T5i + T5j; + T5k = T5i - T5j; + T5m = R1[WS(rs, 21)]; + } + } + { + E TdL, TdC, T5o, T5t, TdM, TdF; + Tb2 = TaY - Tb1; + Tc6 = TaY + Tb1; + { + E T58, T5d, Tb7, T5n, TdE, Tb8; + T58 = FMA(KP707106781, T57, T50); + T8e = FNMS(KP707106781, T57, T50); + T8f = FNMS(KP707106781, T5c, T5b); + T5d = FMA(KP707106781, T5c, T5b); + Tb7 = T5l + T5m; + T5n = T5l - T5m; + TdL = FNMS(KP414213562, TdA, TdB); + TdC = FMA(KP414213562, TdB, TdA); + T5e = FMA(KP198912367, T5d, T58); + T5H = FNMS(KP198912367, T58, T5d); + TdE = Tb7 - Tb6; + Tb8 = Tb6 + Tb7; + T5o = T5k + T5n; + T5t = T5n - T5k; + Tb9 = Tb5 - Tb8; + Tc7 = Tb5 + Tb8; + TdM = FMA(KP414213562, TdD, TdE); + TdF = FNMS(KP414213562, TdE, TdD); + } + { + E T8i, T8h, T5u, T5p; + T8i = FNMS(KP707106781, T5t, T5s); + T5u = FMA(KP707106781, T5t, T5s); + T5p = FMA(KP707106781, T5o, T5h); + T8h = FNMS(KP707106781, T5o, T5h); + TeM = TdM - TdL; + TdN = TdL + TdM; + T5I = FMA(KP198912367, T5p, T5u); + T5v = FNMS(KP198912367, T5u, T5p); + T8q = FNMS(KP668178637, T8h, T8i); + T8j = FMA(KP668178637, T8i, T8h); + TeP = TdF - TdC; + TdG = TdC + TdF; + } + } + } + } + } + } + } + { + E T8p, T8g, TcH, TdW, TdT, TcO, Tfp, Tfk, Tfj, Tfq; + { + E Tbj, Tba, Tcy, Tco, TcB, Tcl, Tcx, Tcv, Tcz, Tcr; + { + E Tch, Tct, Tcp, Tcq, Tci, T1w, TbV, T11, Tcf, Tc9, T21, Tcj, Tcm, TbY, Tc1; + E Tcn, Tcu, Tck; + { + E Tv, T10, Tc5, Tc8; + Tch = Tf - Tu; + Tv = Tf + Tu; + T8p = FMA(KP668178637, T8e, T8f); + T8g = FNMS(KP668178637, T8f, T8e); + Tbj = Tb9 - Tb2; + Tba = Tb2 + Tb9; + T10 = TK + TZ; + Tct = TZ - TK; + Tcp = Tc3 - Tc4; + Tc5 = Tc3 + Tc4; + Tc8 = Tc6 + Tc7; + Tcq = Tc7 - Tc6; + Tci = T1g - T1v; + T1w = T1g + T1v; + TbV = Tv - T10; + T11 = Tv + T10; + Tcf = Tc5 + Tc8; + Tc9 = Tc5 - Tc8; + T21 = T1L + T20; + Tcj = T1L - T20; + Tcm = TbW - TbX; + TbY = TbW + TbX; + Tc1 = TbZ + Tc0; + Tcn = Tc0 - TbZ; + } + { + E Tcb, T22, Tce, Tc2; + Tcb = T21 - T1w; + T22 = T1w + T21; + Tce = TbY + Tc1; + Tc2 = TbY - Tc1; + { + E Tcd, Tcg, Tca, Tcc; + Tcd = T11 + T22; + Cr[WS(csr, 32)] = T11 - T22; + Tcg = Tce + Tcf; + Ci[WS(csi, 32)] = Tcf - Tce; + Tca = Tc2 + Tc9; + Tcc = Tc9 - Tc2; + Cr[0] = Tcd + Tcg; + Cr[WS(csr, 64)] = Tcd - Tcg; + Ci[WS(csi, 48)] = FMS(KP707106781, Tcc, Tcb); + Ci[WS(csi, 16)] = FMA(KP707106781, Tcc, Tcb); + Cr[WS(csr, 16)] = FMA(KP707106781, Tca, TbV); + Cr[WS(csr, 48)] = FNMS(KP707106781, Tca, TbV); + Tcu = Tcj - Tci; + Tck = Tci + Tcj; + Tcy = FNMS(KP414213562, Tcm, Tcn); + Tco = FMA(KP414213562, Tcn, Tcm); + } + } + TcB = FNMS(KP707106781, Tck, Tch); + Tcl = FMA(KP707106781, Tck, Tch); + Tcx = FMA(KP707106781, Tcu, Tct); + Tcv = FNMS(KP707106781, Tcu, Tct); + Tcz = FMA(KP414213562, Tcp, Tcq); + Tcr = FNMS(KP414213562, Tcq, Tcp); + } + { + E TbT, TbO, TbN, TbU; + { + E Ta9, TbB, Tbb, TbL, Tbp, TbM, Tag, Tbk, TbR, TbJ, Tbw, TaO, TbC, Tbs, TbQ; + E TbG; + { + E Tbq, Tbr, TbH, TbI; + { + E Tbo, Ta8, Tac, Taf; + Tbo = Ta7 - Ta6; + Ta8 = Ta6 + Ta7; + { + E TcC, TcA, Tcw, Tcs; + TcC = Tcz - Tcy; + TcA = Tcy + Tcz; + Tcw = Tcr - Tco; + Tcs = Tco + Tcr; + Cr[WS(csr, 24)] = FMA(KP923879532, TcC, TcB); + Cr[WS(csr, 40)] = FNMS(KP923879532, TcC, TcB); + Ci[WS(csi, 56)] = FMS(KP923879532, TcA, Tcx); + Ci[WS(csi, 8)] = FMA(KP923879532, TcA, Tcx); + Ci[WS(csi, 40)] = FMA(KP923879532, Tcw, Tcv); + Ci[WS(csi, 24)] = FMS(KP923879532, Tcw, Tcv); + Cr[WS(csr, 8)] = FMA(KP923879532, Tcs, Tcl); + Cr[WS(csr, 56)] = FNMS(KP923879532, Tcs, Tcl); + Ta9 = FMA(KP707106781, Ta8, Ta5); + TbB = FNMS(KP707106781, Ta8, Ta5); + } + Tbq = FNMS(KP414213562, Taa, Tab); + Tac = FMA(KP414213562, Tab, Taa); + Taf = FNMS(KP414213562, Tae, Tad); + Tbr = FMA(KP414213562, Tad, Tae); + Tbb = FMA(KP707106781, Tba, TaV); + TbH = FNMS(KP707106781, Tba, TaV); + TbL = FNMS(KP707106781, Tbo, Tbn); + Tbp = FMA(KP707106781, Tbo, Tbn); + TbM = Taf - Tac; + Tag = Tac + Taf; + TbI = FNMS(KP707106781, Tbj, Tbi); + Tbk = FMA(KP707106781, Tbj, Tbi); + } + { + E TbE, TbF, TaE, TaN; + TbE = FNMS(KP707106781, TaD, Tao); + TaE = FMA(KP707106781, TaD, Tao); + TaN = FMA(KP707106781, TaM, TaL); + TbF = FNMS(KP707106781, TaM, TaL); + TbR = FNMS(KP668178637, TbH, TbI); + TbJ = FMA(KP668178637, TbI, TbH); + Tbw = FNMS(KP198912367, TaE, TaN); + TaO = FMA(KP198912367, TaN, TaE); + TbC = Tbr - Tbq; + Tbs = Tbq + Tbr; + TbQ = FMA(KP668178637, TbE, TbF); + TbG = FNMS(KP668178637, TbF, TbE); + } + } + { + E Tbz, Tah, Tbv, Tbt, Tbx, Tbl; + Tbz = FNMS(KP923879532, Tag, Ta9); + Tah = FMA(KP923879532, Tag, Ta9); + Tbv = FMA(KP923879532, Tbs, Tbp); + Tbt = FNMS(KP923879532, Tbs, Tbp); + Tbx = FMA(KP198912367, Tbb, Tbk); + Tbl = FNMS(KP198912367, Tbk, Tbb); + { + E TbD, TbK, TbP, TbS; + TbT = FNMS(KP923879532, TbC, TbB); + TbD = FMA(KP923879532, TbC, TbB); + { + E TbA, Tby, Tbu, Tbm; + TbA = Tbx - Tbw; + Tby = Tbw + Tbx; + Tbu = Tbl - TaO; + Tbm = TaO + Tbl; + Cr[WS(csr, 28)] = FMA(KP980785280, TbA, Tbz); + Cr[WS(csr, 36)] = FNMS(KP980785280, TbA, Tbz); + Ci[WS(csi, 60)] = FMS(KP980785280, Tby, Tbv); + Ci[WS(csi, 4)] = FMA(KP980785280, Tby, Tbv); + Ci[WS(csi, 36)] = FMA(KP980785280, Tbu, Tbt); + Ci[WS(csi, 28)] = FMS(KP980785280, Tbu, Tbt); + Cr[WS(csr, 4)] = FMA(KP980785280, Tbm, Tah); + Cr[WS(csr, 60)] = FNMS(KP980785280, Tbm, Tah); + TbK = TbG + TbJ; + TbO = TbJ - TbG; + } + TbN = FMA(KP923879532, TbM, TbL); + TbP = FNMS(KP923879532, TbM, TbL); + TbS = TbQ + TbR; + TbU = TbQ - TbR; + Cr[WS(csr, 12)] = FMA(KP831469612, TbK, TbD); + Cr[WS(csr, 52)] = FNMS(KP831469612, TbK, TbD); + Ci[WS(csi, 52)] = FNMS(KP831469612, TbS, TbP); + Ci[WS(csi, 12)] = -(FMA(KP831469612, TbS, TbP)); + } + } + } + { + E TeN, Tf7, Tev, Tfm, Tfc, TeQ, TeX, TeW, Tfn, Tff, Tfi, TeC, Tf2, TeK, Tfh; + E TeV, Tf8; + { + E TeG, TeJ, Tfd, Tfe, Tey, TeB, TeT, TeU; + { + E Tet, Teu, Tfa, Tfb; + TcH = FMA(KP707106781, TcG, TcD); + Tet = FNMS(KP707106781, TcG, TcD); + Ci[WS(csi, 44)] = FMS(KP831469612, TbO, TbN); + Ci[WS(csi, 20)] = FMA(KP831469612, TbO, TbN); + Cr[WS(csr, 20)] = FMA(KP831469612, TbU, TbT); + Cr[WS(csr, 44)] = FNMS(KP831469612, TbU, TbT); + Teu = TdV - TdU; + TdW = TdU + TdV; + TeG = FNMS(KP923879532, TeF, TeE); + Tfa = FMA(KP923879532, TeF, TeE); + Tfb = FMA(KP923879532, TeI, TeH); + TeJ = FNMS(KP923879532, TeI, TeH); + TeN = FNMS(KP923879532, TeM, TeL); + Tfd = FMA(KP923879532, TeM, TeL); + Tf7 = FMA(KP923879532, Teu, Tet); + Tev = FNMS(KP923879532, Teu, Tet); + Tfm = FMA(KP303346683, Tfa, Tfb); + Tfc = FNMS(KP303346683, Tfb, Tfa); + Tfe = FNMS(KP923879532, TeP, TeO); + TeQ = FMA(KP923879532, TeP, TeO); + TeX = FNMS(KP668178637, Tew, Tex); + Tey = FMA(KP668178637, Tex, Tew); + TeB = FNMS(KP668178637, TeA, Tez); + TeW = FMA(KP668178637, Tez, TeA); + } + Tfn = FNMS(KP303346683, Tfd, Tfe); + Tff = FMA(KP303346683, Tfe, Tfd); + Tfi = Tey + TeB; + TeC = Tey - TeB; + TdT = FMA(KP707106781, TdS, TdR); + TeT = FNMS(KP707106781, TdS, TdR); + TeU = TcN - TcK; + TcO = TcK + TcN; + Tf2 = FNMS(KP534511135, TeG, TeJ); + TeK = FMA(KP534511135, TeJ, TeG); + Tfh = FNMS(KP923879532, TeU, TeT); + TeV = FMA(KP923879532, TeU, TeT); + } + { + E Tf5, TeD, TeY, Tf3, TeR; + Tf5 = FNMS(KP831469612, TeC, Tev); + TeD = FMA(KP831469612, TeC, Tev); + Tf8 = TeX + TeW; + TeY = TeW - TeX; + Tf3 = FMA(KP534511135, TeN, TeQ); + TeR = FNMS(KP534511135, TeQ, TeN); + { + E Tf1, TeZ, Tf6, Tf4, Tf0, TeS; + Tf1 = FMA(KP831469612, TeY, TeV); + TeZ = FNMS(KP831469612, TeY, TeV); + Tf6 = Tf3 - Tf2; + Tf4 = Tf2 + Tf3; + Tf0 = TeR - TeK; + TeS = TeK + TeR; + Ci[WS(csi, 54)] = FMS(KP881921264, Tf4, Tf1); + Ci[WS(csi, 10)] = FMA(KP881921264, Tf4, Tf1); + Ci[WS(csi, 42)] = FMA(KP881921264, Tf0, TeZ); + Ci[WS(csi, 22)] = FMS(KP881921264, Tf0, TeZ); + Cr[WS(csr, 10)] = FMA(KP881921264, TeS, TeD); + Cr[WS(csr, 54)] = FNMS(KP881921264, TeS, TeD); + Cr[WS(csr, 42)] = FNMS(KP881921264, Tf6, Tf5); + Cr[WS(csr, 22)] = FMA(KP881921264, Tf6, Tf5); + } + } + { + E Tf9, Tfg, Tfl, Tfo; + Tfp = FNMS(KP831469612, Tf8, Tf7); + Tf9 = FMA(KP831469612, Tf8, Tf7); + Tfg = Tfc + Tff; + Tfk = Tff - Tfc; + Tfj = FNMS(KP831469612, Tfi, Tfh); + Tfl = FMA(KP831469612, Tfi, Tfh); + Tfo = Tfm + Tfn; + Tfq = Tfm - Tfn; + Cr[WS(csr, 6)] = FMA(KP956940335, Tfg, Tf9); + Cr[WS(csr, 58)] = FNMS(KP956940335, Tfg, Tf9); + Ci[WS(csi, 58)] = FNMS(KP956940335, Tfo, Tfl); + Ci[WS(csi, 6)] = -(FMA(KP956940335, Tfo, Tfl)); + } + } + } + } + { + E T2f, T5W, T5T, T2y, T5J, T5w, T4u, T4h, T7p, T7q; + { + E Ter, Tem, Tel, Tes; + { + E TdH, Te9, TcP, Teo, Tee, TdO, TdY, TdZ, Tep, Teh, Tek, Td8, Te4, Tdu, Tej; + E TdX, Tea; + { + E Tdm, Tdt, Tef, Teg, TcY, Td7, Tec, Ted; + Ci[WS(csi, 38)] = FMS(KP956940335, Tfk, Tfj); + Ci[WS(csi, 26)] = FMA(KP956940335, Tfk, Tfj); + Cr[WS(csr, 26)] = FMA(KP956940335, Tfq, Tfp); + Cr[WS(csr, 38)] = FNMS(KP956940335, Tfq, Tfp); + Tdm = FMA(KP923879532, Tdl, Tde); + Tec = FNMS(KP923879532, Tdl, Tde); + Ted = FNMS(KP923879532, Tds, Tdp); + Tdt = FMA(KP923879532, Tds, Tdp); + TdH = FMA(KP923879532, TdG, Tdz); + Tef = FNMS(KP923879532, TdG, Tdz); + Te9 = FNMS(KP923879532, TcO, TcH); + TcP = FMA(KP923879532, TcO, TcH); + Teo = FMA(KP820678790, Tec, Ted); + Tee = FNMS(KP820678790, Ted, Tec); + Teg = FNMS(KP923879532, TdN, TdK); + TdO = FMA(KP923879532, TdN, TdK); + TdY = FNMS(KP198912367, TcU, TcX); + TcY = FMA(KP198912367, TcX, TcU); + Td7 = FNMS(KP198912367, Td6, Td3); + TdZ = FMA(KP198912367, Td3, Td6); + Tep = FNMS(KP820678790, Tef, Teg); + Teh = FMA(KP820678790, Teg, Tef); + Tek = Td7 - TcY; + Td8 = TcY + Td7; + Te4 = FNMS(KP098491403, Tdm, Tdt); + Tdu = FMA(KP098491403, Tdt, Tdm); + Tej = FNMS(KP923879532, TdW, TdT); + TdX = FMA(KP923879532, TdW, TdT); + } + { + E Te7, Td9, Te0, Te5, TdP; + Te7 = FNMS(KP980785280, Td8, TcP); + Td9 = FMA(KP980785280, Td8, TcP); + Tea = TdZ - TdY; + Te0 = TdY + TdZ; + Te5 = FMA(KP098491403, TdH, TdO); + TdP = FNMS(KP098491403, TdO, TdH); + { + E Te3, Te1, Te8, Te6, Te2, TdQ; + Te3 = FMA(KP980785280, Te0, TdX); + Te1 = FNMS(KP980785280, Te0, TdX); + Te8 = Te5 - Te4; + Te6 = Te4 + Te5; + Te2 = TdP - Tdu; + TdQ = Tdu + TdP; + Ci[WS(csi, 62)] = FMS(KP995184726, Te6, Te3); + Ci[WS(csi, 2)] = FMA(KP995184726, Te6, Te3); + Ci[WS(csi, 34)] = FMA(KP995184726, Te2, Te1); + Ci[WS(csi, 30)] = FMS(KP995184726, Te2, Te1); + Cr[WS(csr, 2)] = FMA(KP995184726, TdQ, Td9); + Cr[WS(csr, 62)] = FNMS(KP995184726, TdQ, Td9); + Cr[WS(csr, 34)] = FNMS(KP995184726, Te8, Te7); + Cr[WS(csr, 30)] = FMA(KP995184726, Te8, Te7); + } + } + { + E Teb, Tei, Ten, Teq; + Ter = FNMS(KP980785280, Tea, Te9); + Teb = FMA(KP980785280, Tea, Te9); + Tei = Tee + Teh; + Tem = Teh - Tee; + Tel = FMA(KP980785280, Tek, Tej); + Ten = FNMS(KP980785280, Tek, Tej); + Teq = Teo + Tep; + Tes = Teo - Tep; + Cr[WS(csr, 14)] = FMA(KP773010453, Tei, Teb); + Cr[WS(csr, 50)] = FNMS(KP773010453, Tei, Teb); + Ci[WS(csi, 50)] = FNMS(KP773010453, Teq, Ten); + Ci[WS(csi, 14)] = -(FMA(KP773010453, Teq, Ten)); + } + } + { + E T77, T6v, T7i, T6C, T78, T6Y, T7h, T6V, T6N, T7d, T6P, T6F, T6I; + { + E T6W, T6X, T6T, T6U, T6M; + { + E T6t, T6u, T6y, T6B; + T2f = FMA(KP923879532, T2e, T27); + T6t = FNMS(KP923879532, T2e, T27); + Ci[WS(csi, 46)] = FMS(KP773010453, Tem, Tel); + Ci[WS(csi, 18)] = FMA(KP773010453, Tem, Tel); + Cr[WS(csr, 18)] = FMA(KP773010453, Tes, Ter); + Cr[WS(csr, 46)] = FNMS(KP773010453, Tes, Ter); + T6u = T5U - T5V; + T5W = T5U + T5V; + T6W = FNMS(KP820678790, T6w, T6x); + T6y = FMA(KP820678790, T6x, T6w); + T6B = FNMS(KP820678790, T6A, T6z); + T6X = FMA(KP820678790, T6z, T6A); + T77 = FMA(KP980785280, T6u, T6t); + T6v = FNMS(KP980785280, T6u, T6t); + T7i = T6B + T6y; + T6C = T6y - T6B; + } + T5T = FMA(KP923879532, T5S, T5P); + T6T = FNMS(KP923879532, T5S, T5P); + T6U = T2x - T2o; + T2y = T2o + T2x; + T5J = T5H + T5I; + T6M = T5I - T5H; + T78 = T6X + T6W; + T6Y = T6W - T6X; + T7h = FMA(KP980785280, T6U, T6T); + T6V = FNMS(KP980785280, T6U, T6T); + T6N = FNMS(KP980785280, T6M, T6L); + T7d = FMA(KP980785280, T6M, T6L); + T6P = T5v - T5e; + T5w = T5e + T5v; + T4u = T4s + T4t; + T6F = T4s - T4t; + T6I = T4g - T3Z; + T4h = T3Z + T4g; + } + { + E T75, T7f, T7n, T7c, T7m, T76; + { + E T6D, T72, T6R, T73, T6K, T71, T6Z, T7e, T6Q, T74, T70, T6S; + T75 = FNMS(KP773010453, T6C, T6v); + T6D = FMA(KP773010453, T6C, T6v); + T7e = FNMS(KP980785280, T6P, T6O); + T6Q = FMA(KP980785280, T6P, T6O); + { + E T7a, T6G, T7b, T6J; + T7a = FMA(KP980785280, T6F, T6E); + T6G = FNMS(KP980785280, T6F, T6E); + T7b = FMA(KP980785280, T6I, T6H); + T6J = FNMS(KP980785280, T6I, T6H); + T7f = FMA(KP357805721, T7e, T7d); + T7n = FNMS(KP357805721, T7d, T7e); + T72 = FMA(KP472964775, T6N, T6Q); + T6R = FNMS(KP472964775, T6Q, T6N); + T7c = FMA(KP357805721, T7b, T7a); + T7m = FNMS(KP357805721, T7a, T7b); + T73 = FMA(KP472964775, T6G, T6J); + T6K = FNMS(KP472964775, T6J, T6G); + } + T71 = FNMS(KP773010453, T6Y, T6V); + T6Z = FMA(KP773010453, T6Y, T6V); + T74 = T72 - T73; + T76 = T73 + T72; + T70 = T6R - T6K; + T6S = T6K + T6R; + Ci[WS(csi, 55)] = FMA(KP903989293, T74, T71); + Ci[WS(csi, 9)] = FMS(KP903989293, T74, T71); + Cr[WS(csr, 9)] = FMA(KP903989293, T6S, T6D); + Cr[WS(csr, 55)] = FNMS(KP903989293, T6S, T6D); + Ci[WS(csi, 41)] = FMS(KP903989293, T70, T6Z); + Ci[WS(csi, 23)] = FMA(KP903989293, T70, T6Z); + } + { + E T7k, T7j, T7l, T7o, T79, T7g; + T7p = FNMS(KP773010453, T78, T77); + T79 = FMA(KP773010453, T78, T77); + T7g = T7c + T7f; + T7k = T7f - T7c; + T7j = FNMS(KP773010453, T7i, T7h); + T7l = FMA(KP773010453, T7i, T7h); + Cr[WS(csr, 23)] = FMA(KP903989293, T76, T75); + Cr[WS(csr, 41)] = FNMS(KP903989293, T76, T75); + Cr[WS(csr, 7)] = FMA(KP941544065, T7g, T79); + Cr[WS(csr, 57)] = FNMS(KP941544065, T7g, T79); + T7o = T7m - T7n; + T7q = T7m + T7n; + Ci[WS(csi, 57)] = FMS(KP941544065, T7o, T7l); + Ci[WS(csi, 7)] = FMA(KP941544065, T7o, T7l); + Ci[WS(csi, 39)] = FMA(KP941544065, T7k, T7j); + Ci[WS(csi, 25)] = FMS(KP941544065, T7k, T7j); + } + } + } + } + { + E T7t, T8A, T8x, T7A, T8r, T8k, T88, T81, Ta3, Ta4, T6r, T6s; + { + E T9L, T99, T9W, T9g, T9M, T9C, T9V, T9z, T9k, T9O, T9T, Ta0, T9H, T9v, T9m; + { + E T9B, T9c, T9f, T9A, T97, T98; + T7t = FMA(KP923879532, T7s, T7r); + T97 = FNMS(KP923879532, T7s, T7r); + T98 = T8z - T8y; + T8A = T8y + T8z; + T9B = FNMS(KP534511135, T9a, T9b); + T9c = FMA(KP534511135, T9b, T9a); + Cr[WS(csr, 25)] = FNMS(KP941544065, T7q, T7p); + Cr[WS(csr, 39)] = FMA(KP941544065, T7q, T7p); + T9L = FMA(KP831469612, T98, T97); + T99 = FNMS(KP831469612, T98, T97); + T9f = FNMS(KP534511135, T9e, T9d); + T9A = FMA(KP534511135, T9d, T9e); + { + E T9x, T9y, T9q, T9t; + T8x = FMA(KP923879532, T8w, T8v); + T9x = FNMS(KP923879532, T8w, T8v); + T9W = T9c + T9f; + T9g = T9c - T9f; + T9M = T9B + T9A; + T9C = T9A - T9B; + T9y = T7z - T7w; + T7A = T7w + T7z; + T8r = T8p + T8q; + T9q = T8p - T8q; + T9t = T8j - T8g; + T8k = T8g + T8j; + { + E T9R, T9r, T9S, T9u, T9j; + T88 = T86 + T87; + T9j = T87 - T86; + T9V = FNMS(KP831469612, T9y, T9x); + T9z = FMA(KP831469612, T9y, T9x); + T9R = FMA(KP831469612, T9q, T9p); + T9r = FNMS(KP831469612, T9q, T9p); + T9S = FMA(KP831469612, T9t, T9s); + T9u = FNMS(KP831469612, T9t, T9s); + T9k = FNMS(KP831469612, T9j, T9i); + T9O = FMA(KP831469612, T9j, T9i); + T9T = FNMS(KP250486960, T9S, T9R); + Ta0 = FMA(KP250486960, T9R, T9S); + T9H = FNMS(KP599376933, T9r, T9u); + T9v = FMA(KP599376933, T9u, T9r); + T9m = T7X - T80; + T81 = T7X + T80; + } + } + } + { + E T9J, T9h, T9F, T9D, T9P, T9n; + T9J = FNMS(KP881921264, T9g, T99); + T9h = FMA(KP881921264, T9g, T99); + T9F = FMA(KP881921264, T9C, T9z); + T9D = FNMS(KP881921264, T9C, T9z); + T9P = FMA(KP831469612, T9m, T9l); + T9n = FNMS(KP831469612, T9m, T9l); + { + E T9Y, T9X, T9Z, Ta2; + { + E T9N, Ta1, T9G, T9o, T9U, T9Q; + Ta3 = FNMS(KP881921264, T9M, T9L); + T9N = FMA(KP881921264, T9M, T9L); + T9Q = FNMS(KP250486960, T9P, T9O); + Ta1 = FMA(KP250486960, T9O, T9P); + T9G = FNMS(KP599376933, T9k, T9n); + T9o = FMA(KP599376933, T9n, T9k); + T9U = T9Q + T9T; + T9Y = T9T - T9Q; + T9X = FNMS(KP881921264, T9W, T9V); + T9Z = FMA(KP881921264, T9W, T9V); + { + E T9K, T9I, T9E, T9w; + T9K = T9G + T9H; + T9I = T9G - T9H; + T9E = T9v - T9o; + T9w = T9o + T9v; + Cr[WS(csr, 5)] = FMA(KP970031253, T9U, T9N); + Cr[WS(csr, 59)] = FNMS(KP970031253, T9U, T9N); + Cr[WS(csr, 21)] = FNMS(KP857728610, T9K, T9J); + Cr[WS(csr, 43)] = FMA(KP857728610, T9K, T9J); + Ci[WS(csi, 53)] = FMS(KP857728610, T9I, T9F); + Ci[WS(csi, 11)] = FMA(KP857728610, T9I, T9F); + Ci[WS(csi, 43)] = FMA(KP857728610, T9E, T9D); + Ci[WS(csi, 21)] = FMS(KP857728610, T9E, T9D); + Cr[WS(csr, 11)] = FMA(KP857728610, T9w, T9h); + Cr[WS(csr, 53)] = FNMS(KP857728610, T9w, T9h); + Ta2 = Ta0 - Ta1; + Ta4 = Ta1 + Ta0; + } + } + Ci[WS(csi, 59)] = FMA(KP970031253, Ta2, T9Z); + Ci[WS(csi, 5)] = FMS(KP970031253, Ta2, T9Z); + Ci[WS(csi, 37)] = FMS(KP970031253, T9Y, T9X); + Ci[WS(csi, 27)] = FMA(KP970031253, T9Y, T9X); + } + } + } + { + E T69, T2z, T6k, T3g, T6a, T60, T6j, T5X, T4i, T6c, T6h, T6p, T64, T5L; + { + E T5Y, T2U, T3f, T5Z; + T5Y = FMA(KP098491403, T2M, T2T); + T2U = FNMS(KP098491403, T2T, T2M); + Cr[WS(csr, 27)] = FMA(KP970031253, Ta4, Ta3); + Cr[WS(csr, 37)] = FNMS(KP970031253, Ta4, Ta3); + T69 = FNMS(KP980785280, T2y, T2f); + T2z = FMA(KP980785280, T2y, T2f); + T3f = FMA(KP098491403, T3e, T37); + T5Z = FNMS(KP098491403, T37, T3e); + T6k = T3f - T2U; + T3g = T2U + T3f; + T6a = T5Y - T5Z; + T60 = T5Y + T5Z; + { + E T6f, T5x, T6g, T5K; + T6j = FNMS(KP980785280, T5W, T5T); + T5X = FMA(KP980785280, T5W, T5T); + T6f = FNMS(KP980785280, T5w, T4X); + T5x = FMA(KP980785280, T5w, T4X); + T6g = FNMS(KP980785280, T5J, T5G); + T5K = FMA(KP980785280, T5J, T5G); + T4i = FMA(KP980785280, T4h, T3I); + T6c = FNMS(KP980785280, T4h, T3I); + T6h = FMA(KP906347169, T6g, T6f); + T6p = FNMS(KP906347169, T6f, T6g); + T64 = FMA(KP049126849, T5x, T5K); + T5L = FNMS(KP049126849, T5K, T5x); + } + } + { + E T67, T3h, T63, T61, T6d, T4v; + T67 = FNMS(KP995184726, T3g, T2z); + T3h = FMA(KP995184726, T3g, T2z); + T63 = FMA(KP995184726, T60, T5X); + T61 = FNMS(KP995184726, T60, T5X); + T6d = FNMS(KP980785280, T4u, T4r); + T4v = FMA(KP980785280, T4u, T4r); + { + E T6m, T6l, T6n, T6q; + { + E T6b, T6o, T65, T4w, T6i, T6e; + T6r = FNMS(KP995184726, T6a, T69); + T6b = FMA(KP995184726, T6a, T69); + T6e = FMA(KP906347169, T6d, T6c); + T6o = FNMS(KP906347169, T6c, T6d); + T65 = FMA(KP049126849, T4i, T4v); + T4w = FNMS(KP049126849, T4v, T4i); + T6i = T6e + T6h; + T6m = T6h - T6e; + T6l = FNMS(KP995184726, T6k, T6j); + T6n = FMA(KP995184726, T6k, T6j); + { + E T68, T66, T62, T5M; + T68 = T65 + T64; + T66 = T64 - T65; + T62 = T5L - T4w; + T5M = T4w + T5L; + Cr[WS(csr, 15)] = FMA(KP740951125, T6i, T6b); + Cr[WS(csr, 49)] = FNMS(KP740951125, T6i, T6b); + Cr[WS(csr, 31)] = FMA(KP998795456, T68, T67); + Cr[WS(csr, 33)] = FNMS(KP998795456, T68, T67); + Ci[WS(csi, 63)] = FMA(KP998795456, T66, T63); + Ci[WS(csi, 1)] = FMS(KP998795456, T66, T63); + Ci[WS(csi, 33)] = FMS(KP998795456, T62, T61); + Ci[WS(csi, 31)] = FMA(KP998795456, T62, T61); + Cr[WS(csr, 1)] = FMA(KP998795456, T5M, T3h); + Cr[WS(csr, 63)] = FNMS(KP998795456, T5M, T3h); + T6q = T6o - T6p; + T6s = T6o + T6p; + } + } + Ci[WS(csi, 49)] = FMS(KP740951125, T6q, T6n); + Ci[WS(csi, 15)] = FMA(KP740951125, T6q, T6n); + Ci[WS(csi, 47)] = FMA(KP740951125, T6m, T6l); + Ci[WS(csi, 17)] = FMS(KP740951125, T6m, T6l); + } + } + } + { + E T8N, T7B, T8Y, T7Q, T8O, T8E, T8X, T8B, T82, T8Q, T8V, T92, T8J, T8t; + { + E T8C, T7I, T7P, T8D; + T8C = FNMS(KP303346683, T7E, T7H); + T7I = FMA(KP303346683, T7H, T7E); + Cr[WS(csr, 17)] = FNMS(KP740951125, T6s, T6r); + Cr[WS(csr, 47)] = FMA(KP740951125, T6s, T6r); + T8N = FNMS(KP831469612, T7A, T7t); + T7B = FMA(KP831469612, T7A, T7t); + T7P = FNMS(KP303346683, T7O, T7L); + T8D = FMA(KP303346683, T7L, T7O); + T8Y = T7P - T7I; + T7Q = T7I + T7P; + T8O = T8D - T8C; + T8E = T8C + T8D; + { + E T8T, T8l, T8U, T8s; + T8X = FNMS(KP831469612, T8A, T8x); + T8B = FMA(KP831469612, T8A, T8x); + T8T = FNMS(KP831469612, T8k, T8d); + T8l = FMA(KP831469612, T8k, T8d); + T8U = FNMS(KP831469612, T8r, T8o); + T8s = FMA(KP831469612, T8r, T8o); + T82 = FMA(KP831469612, T81, T7U); + T8Q = FNMS(KP831469612, T81, T7U); + T8V = FNMS(KP741650546, T8U, T8T); + T92 = FMA(KP741650546, T8T, T8U); + T8J = FNMS(KP148335987, T8l, T8s); + T8t = FMA(KP148335987, T8s, T8l); + } + } + { + E T8L, T7R, T8H, T8F, T8R, T89; + T8L = FNMS(KP956940335, T7Q, T7B); + T7R = FMA(KP956940335, T7Q, T7B); + T8H = FMA(KP956940335, T8E, T8B); + T8F = FNMS(KP956940335, T8E, T8B); + T8R = FNMS(KP831469612, T88, T85); + T89 = FMA(KP831469612, T88, T85); + { + E T90, T8Z, T91, T94; + { + E T8P, T93, T8I, T8a, T8W, T8S; + T95 = FNMS(KP956940335, T8O, T8N); + T8P = FMA(KP956940335, T8O, T8N); + T8S = FNMS(KP741650546, T8R, T8Q); + T93 = FMA(KP741650546, T8Q, T8R); + T8I = FNMS(KP148335987, T82, T89); + T8a = FMA(KP148335987, T89, T82); + T8W = T8S + T8V; + T90 = T8V - T8S; + T8Z = FMA(KP956940335, T8Y, T8X); + T91 = FNMS(KP956940335, T8Y, T8X); + { + E T8M, T8K, T8G, T8u; + T8M = T8I + T8J; + T8K = T8I - T8J; + T8G = T8t - T8a; + T8u = T8a + T8t; + Cr[WS(csr, 13)] = FMA(KP803207531, T8W, T8P); + Cr[WS(csr, 51)] = FNMS(KP803207531, T8W, T8P); + Cr[WS(csr, 29)] = FNMS(KP989176509, T8M, T8L); + Cr[WS(csr, 35)] = FMA(KP989176509, T8M, T8L); + Ci[WS(csi, 61)] = FMS(KP989176509, T8K, T8H); + Ci[WS(csi, 3)] = FMA(KP989176509, T8K, T8H); + Ci[WS(csi, 35)] = FMA(KP989176509, T8G, T8F); + Ci[WS(csi, 29)] = FMS(KP989176509, T8G, T8F); + Cr[WS(csr, 3)] = FMA(KP989176509, T8u, T7R); + Cr[WS(csr, 61)] = FNMS(KP989176509, T8u, T7R); + T94 = T92 - T93; + T96 = T93 + T92; + } + } + Ci[WS(csi, 51)] = FMA(KP803207531, T94, T91); + Ci[WS(csi, 13)] = FMS(KP803207531, T94, T91); + Ci[WS(csi, 45)] = FMS(KP803207531, T90, T8Z); + Ci[WS(csi, 19)] = FMA(KP803207531, T90, T8Z); + } + } + } + } + } + } + } + Cr[WS(csr, 19)] = FMA(KP803207531, T96, T95); + Cr[WS(csr, 45)] = FNMS(KP803207531, T96, T95); + } + } +} + +static const kr2c_desc desc = { 128, "r2cf_128", {440, 0, 516, 0}, &GENUS }; + +void X(codelet_r2cf_128) (planner *p) { + X(kr2c_register) (p, r2cf_128, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 128 -name r2cf_128 -include r2cf.h */ + +/* + * This function contains 956 FP additions, 330 FP multiplications, + * (or, 812 additions, 186 multiplications, 144 fused multiply/add), + * 186 stack variables, 31 constants, and 256 memory accesses + */ +#include "r2cf.h" + +static void r2cf_128(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP803207531, +0.803207531480644909806676512963141923879569427); + DK(KP595699304, +0.595699304492433343467036528829969889511926338); + DK(KP146730474, +0.146730474455361751658850129646717819706215317); + DK(KP989176509, +0.989176509964780973451673738016243063983689533); + DK(KP740951125, +0.740951125354959091175616897495162729728955309); + DK(KP671558954, +0.671558954847018400625376850427421803228750632); + DK(KP049067674, +0.049067674327418014254954976942682658314745363); + DK(KP998795456, +0.998795456205172392714771604759100694443203615); + DK(KP242980179, +0.242980179903263889948274162077471118320990783); + DK(KP970031253, +0.970031253194543992603984207286100251456865962); + DK(KP514102744, +0.514102744193221726593693838968815772608049120); + DK(KP857728610, +0.857728610000272069902269984284770137042490799); + DK(KP336889853, +0.336889853392220050689253212619147570477766780); + DK(KP941544065, +0.941544065183020778412509402599502357185589796); + DK(KP427555093, +0.427555093430282094320966856888798534304578629); + DK(KP903989293, +0.903989293123443331586200297230537048710132025); + DK(KP098017140, +0.098017140329560601994195563888641845861136673); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP634393284, +0.634393284163645498215171613225493370675687095); + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP471396736, +0.471396736825997648556387625905254377657460319); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP290284677, +0.290284677254462367636192375817395274691476278); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(512, rs), MAKE_VOLATILE_STRIDE(512, csr), MAKE_VOLATILE_STRIDE(512, csi)) { + E TcD, TdU, T27, T7r, T5S, T8y, Tf, Ta5, Tu, Tbq, TcG, TdV, T2e, T8z, T5V; + E T7s, TK, Ta6, TcK, TdX, T2o, T5X, T7w, T8B, TZ, Ta7, TcN, TdY, T2x, T5Y; + E T7z, T8C, T1g, Taa, TcU, TeA, TcX, Tez, T1v, Tab, T2M, T6z, T7E, T9e, T7H; + E T9d, T2T, T6A, T4X, T6L, Tdz, TeL, TdK, TeP, T5G, T6P, T8d, T9p, TaV, Tc3; + E Tbi, Tc4, T8o, T9t, T3I, T6H, Tde, TeH, Tdp, TeF, T4r, T6F, T7U, T9l, Tao; + E TbW, TaL, TbX, T85, T9j, T1L, Tad, Td3, Tew, Td6, Tex, T20, Tae, T37, T6x; + E T7L, T9a, T7O, T9b, T3e, T6w, TbZ, Tc0, T3Z, T4s, Tds, TeI, T4g, T4t, T80; + E T87, Tdl, TeE, T7X, T86, TaD, TaM, Tc6, Tc7, T5e, T5H, TdN, TeM, T5v, T5I; + E T8j, T8q, TdG, TeO, T8g, T8p, Tba, Tbj; + { + E T3, T23, Td, T25, T6, T5R, Ta, T24; + { + E T1, T2, Tb, Tc; + T1 = R0[0]; + T2 = R0[WS(rs, 32)]; + T3 = T1 + T2; + T23 = T1 - T2; + Tb = R0[WS(rs, 56)]; + Tc = R0[WS(rs, 24)]; + Td = Tb + Tc; + T25 = Tb - Tc; + } + { + E T4, T5, T8, T9; + T4 = R0[WS(rs, 16)]; + T5 = R0[WS(rs, 48)]; + T6 = T4 + T5; + T5R = T4 - T5; + T8 = R0[WS(rs, 8)]; + T9 = R0[WS(rs, 40)]; + Ta = T8 + T9; + T24 = T8 - T9; + } + TcD = T3 - T6; + TdU = Td - Ta; + { + E T26, T5Q, T7, Te; + T26 = KP707106781 * (T24 + T25); + T27 = T23 + T26; + T7r = T23 - T26; + T5Q = KP707106781 * (T25 - T24); + T5S = T5Q - T5R; + T8y = T5R + T5Q; + T7 = T3 + T6; + Te = Ta + Td; + Tf = T7 + Te; + Ta5 = T7 - Te; + } + } + { + E Ti, T28, Ts, T2c, Tl, T29, Tp, T2b; + { + E Tg, Th, Tq, Tr; + Tg = R0[WS(rs, 4)]; + Th = R0[WS(rs, 36)]; + Ti = Tg + Th; + T28 = Tg - Th; + Tq = R0[WS(rs, 12)]; + Tr = R0[WS(rs, 44)]; + Ts = Tq + Tr; + T2c = Tq - Tr; + } + { + E Tj, Tk, Tn, To; + Tj = R0[WS(rs, 20)]; + Tk = R0[WS(rs, 52)]; + Tl = Tj + Tk; + T29 = Tj - Tk; + Tn = R0[WS(rs, 60)]; + To = R0[WS(rs, 28)]; + Tp = Tn + To; + T2b = Tn - To; + } + { + E Tm, Tt, TcE, TcF; + Tm = Ti + Tl; + Tt = Tp + Ts; + Tu = Tm + Tt; + Tbq = Tt - Tm; + TcE = Ti - Tl; + TcF = Tp - Ts; + TcG = KP707106781 * (TcE + TcF); + TdV = KP707106781 * (TcF - TcE); + } + { + E T2a, T2d, T5T, T5U; + T2a = FNMS(KP382683432, T29, KP923879532 * T28); + T2d = FMA(KP923879532, T2b, KP382683432 * T2c); + T2e = T2a + T2d; + T8z = T2d - T2a; + T5T = FNMS(KP923879532, T2c, KP382683432 * T2b); + T5U = FMA(KP382683432, T28, KP923879532 * T29); + T5V = T5T - T5U; + T7s = T5U + T5T; + } + } + { + E Ty, T2g, TB, T2m, TF, T2l, TI, T2j; + { + E Tw, Tx, Tz, TA; + Tw = R0[WS(rs, 2)]; + Tx = R0[WS(rs, 34)]; + Ty = Tw + Tx; + T2g = Tw - Tx; + Tz = R0[WS(rs, 18)]; + TA = R0[WS(rs, 50)]; + TB = Tz + TA; + T2m = Tz - TA; + { + E TD, TE, T2h, TG, TH, T2i; + TD = R0[WS(rs, 10)]; + TE = R0[WS(rs, 42)]; + T2h = TD - TE; + TG = R0[WS(rs, 58)]; + TH = R0[WS(rs, 26)]; + T2i = TG - TH; + TF = TD + TE; + T2l = KP707106781 * (T2i - T2h); + TI = TG + TH; + T2j = KP707106781 * (T2h + T2i); + } + } + { + E TC, TJ, TcI, TcJ; + TC = Ty + TB; + TJ = TF + TI; + TK = TC + TJ; + Ta6 = TC - TJ; + TcI = Ty - TB; + TcJ = TI - TF; + TcK = FMA(KP923879532, TcI, KP382683432 * TcJ); + TdX = FNMS(KP382683432, TcI, KP923879532 * TcJ); + } + { + E T2k, T2n, T7u, T7v; + T2k = T2g + T2j; + T2n = T2l - T2m; + T2o = FMA(KP980785280, T2k, KP195090322 * T2n); + T5X = FNMS(KP195090322, T2k, KP980785280 * T2n); + T7u = T2g - T2j; + T7v = T2m + T2l; + T7w = FMA(KP831469612, T7u, KP555570233 * T7v); + T8B = FNMS(KP555570233, T7u, KP831469612 * T7v); + } + } + { + E TN, T2p, TQ, T2v, TU, T2u, TX, T2s; + { + E TL, TM, TO, TP; + TL = R0[WS(rs, 62)]; + TM = R0[WS(rs, 30)]; + TN = TL + TM; + T2p = TL - TM; + TO = R0[WS(rs, 14)]; + TP = R0[WS(rs, 46)]; + TQ = TO + TP; + T2v = TO - TP; + { + E TS, TT, T2q, TV, TW, T2r; + TS = R0[WS(rs, 6)]; + TT = R0[WS(rs, 38)]; + T2q = TS - TT; + TV = R0[WS(rs, 54)]; + TW = R0[WS(rs, 22)]; + T2r = TV - TW; + TU = TS + TT; + T2u = KP707106781 * (T2r - T2q); + TX = TV + TW; + T2s = KP707106781 * (T2q + T2r); + } + } + { + E TR, TY, TcL, TcM; + TR = TN + TQ; + TY = TU + TX; + TZ = TR + TY; + Ta7 = TR - TY; + TcL = TN - TQ; + TcM = TX - TU; + TcN = FNMS(KP382683432, TcM, KP923879532 * TcL); + TdY = FMA(KP382683432, TcL, KP923879532 * TcM); + } + { + E T2t, T2w, T7x, T7y; + T2t = T2p + T2s; + T2w = T2u - T2v; + T2x = FNMS(KP195090322, T2w, KP980785280 * T2t); + T5Y = FMA(KP195090322, T2t, KP980785280 * T2w); + T7x = T2p - T2s; + T7y = T2v + T2u; + T7z = FNMS(KP555570233, T7y, KP831469612 * T7x); + T8C = FMA(KP555570233, T7x, KP831469612 * T7y); + } + } + { + E T14, T2N, T17, T2D, T1b, T2O, T1e, T2C, T1j, T1m, T2K, TcR, T2Q, T1q, T1t; + E T2H, TcS, T2R; + { + E T12, T13, T15, T16; + T12 = R0[WS(rs, 1)]; + T13 = R0[WS(rs, 33)]; + T14 = T12 + T13; + T2N = T12 - T13; + T15 = R0[WS(rs, 17)]; + T16 = R0[WS(rs, 49)]; + T17 = T15 + T16; + T2D = T15 - T16; + } + { + E T19, T1a, T2B, T1c, T1d, T2A; + T19 = R0[WS(rs, 9)]; + T1a = R0[WS(rs, 41)]; + T2B = T19 - T1a; + T1c = R0[WS(rs, 57)]; + T1d = R0[WS(rs, 25)]; + T2A = T1c - T1d; + T1b = T19 + T1a; + T2O = KP707106781 * (T2B + T2A); + T1e = T1c + T1d; + T2C = KP707106781 * (T2A - T2B); + } + { + E T2I, T2J, T2F, T2G; + { + E T1h, T1i, T1k, T1l; + T1h = R0[WS(rs, 5)]; + T1i = R0[WS(rs, 37)]; + T1j = T1h + T1i; + T2I = T1h - T1i; + T1k = R0[WS(rs, 21)]; + T1l = R0[WS(rs, 53)]; + T1m = T1k + T1l; + T2J = T1k - T1l; + } + T2K = FMA(KP382683432, T2I, KP923879532 * T2J); + TcR = T1j - T1m; + T2Q = FNMS(KP382683432, T2J, KP923879532 * T2I); + { + E T1o, T1p, T1r, T1s; + T1o = R0[WS(rs, 61)]; + T1p = R0[WS(rs, 29)]; + T1q = T1o + T1p; + T2F = T1o - T1p; + T1r = R0[WS(rs, 13)]; + T1s = R0[WS(rs, 45)]; + T1t = T1r + T1s; + T2G = T1r - T1s; + } + T2H = FNMS(KP923879532, T2G, KP382683432 * T2F); + TcS = T1q - T1t; + T2R = FMA(KP923879532, T2F, KP382683432 * T2G); + } + { + E T18, T1f, TcQ, TcT; + T18 = T14 + T17; + T1f = T1b + T1e; + T1g = T18 + T1f; + Taa = T18 - T1f; + TcQ = T14 - T17; + TcT = KP707106781 * (TcR + TcS); + TcU = TcQ + TcT; + TeA = TcQ - TcT; + } + { + E TcV, TcW, T1n, T1u; + TcV = T1e - T1b; + TcW = KP707106781 * (TcS - TcR); + TcX = TcV + TcW; + Tez = TcW - TcV; + T1n = T1j + T1m; + T1u = T1q + T1t; + T1v = T1n + T1u; + Tab = T1u - T1n; + } + { + E T2E, T2L, T7C, T7D; + T2E = T2C - T2D; + T2L = T2H - T2K; + T2M = T2E + T2L; + T6z = T2L - T2E; + T7C = T2N - T2O; + T7D = T2K + T2H; + T7E = T7C + T7D; + T9e = T7C - T7D; + } + { + E T7F, T7G, T2P, T2S; + T7F = T2D + T2C; + T7G = T2R - T2Q; + T7H = T7F + T7G; + T9d = T7G - T7F; + T2P = T2N + T2O; + T2S = T2Q + T2R; + T2T = T2P + T2S; + T6A = T2P - T2S; + } + } + { + E T4z, TaP, T5B, TaQ, T4G, TaT, T5y, TaS, Tbf, Tbg, T4O, Tdw, T5E, Tbc, Tbd; + E T4V, Tdx, T5D; + { + E T4x, T4y, T5z, T5A; + T4x = R1[WS(rs, 63)]; + T4y = R1[WS(rs, 31)]; + T4z = T4x - T4y; + TaP = T4x + T4y; + T5z = R1[WS(rs, 15)]; + T5A = R1[WS(rs, 47)]; + T5B = T5z - T5A; + TaQ = T5z + T5A; + } + { + E T4A, T4B, T4C, T4D, T4E, T4F; + T4A = R1[WS(rs, 7)]; + T4B = R1[WS(rs, 39)]; + T4C = T4A - T4B; + T4D = R1[WS(rs, 55)]; + T4E = R1[WS(rs, 23)]; + T4F = T4D - T4E; + T4G = KP707106781 * (T4C + T4F); + TaT = T4D + T4E; + T5y = KP707106781 * (T4F - T4C); + TaS = T4A + T4B; + } + { + E T4K, T4N, T4R, T4U; + { + E T4I, T4J, T4L, T4M; + T4I = R1[WS(rs, 3)]; + T4J = R1[WS(rs, 35)]; + T4K = T4I - T4J; + Tbf = T4I + T4J; + T4L = R1[WS(rs, 19)]; + T4M = R1[WS(rs, 51)]; + T4N = T4L - T4M; + Tbg = T4L + T4M; + } + T4O = FNMS(KP382683432, T4N, KP923879532 * T4K); + Tdw = Tbf - Tbg; + T5E = FMA(KP382683432, T4K, KP923879532 * T4N); + { + E T4P, T4Q, T4S, T4T; + T4P = R1[WS(rs, 59)]; + T4Q = R1[WS(rs, 27)]; + T4R = T4P - T4Q; + Tbc = T4P + T4Q; + T4S = R1[WS(rs, 11)]; + T4T = R1[WS(rs, 43)]; + T4U = T4S - T4T; + Tbd = T4S + T4T; + } + T4V = FMA(KP923879532, T4R, KP382683432 * T4U); + Tdx = Tbc - Tbd; + T5D = FNMS(KP923879532, T4U, KP382683432 * T4R); + } + { + E T4H, T4W, Tdv, Tdy; + T4H = T4z + T4G; + T4W = T4O + T4V; + T4X = T4H + T4W; + T6L = T4H - T4W; + Tdv = TaP - TaQ; + Tdy = KP707106781 * (Tdw + Tdx); + Tdz = Tdv + Tdy; + TeL = Tdv - Tdy; + } + { + E TdI, TdJ, T5C, T5F; + TdI = TaT - TaS; + TdJ = KP707106781 * (Tdx - Tdw); + TdK = TdI + TdJ; + TeP = TdJ - TdI; + T5C = T5y - T5B; + T5F = T5D - T5E; + T5G = T5C + T5F; + T6P = T5F - T5C; + } + { + E T8b, T8c, TaR, TaU; + T8b = T4z - T4G; + T8c = T5E + T5D; + T8d = T8b + T8c; + T9p = T8b - T8c; + TaR = TaP + TaQ; + TaU = TaS + TaT; + TaV = TaR - TaU; + Tc3 = TaR + TaU; + } + { + E Tbe, Tbh, T8m, T8n; + Tbe = Tbc + Tbd; + Tbh = Tbf + Tbg; + Tbi = Tbe - Tbh; + Tc4 = Tbh + Tbe; + T8m = T5B + T5y; + T8n = T4V - T4O; + T8o = T8m + T8n; + T9t = T8n - T8m; + } + } + { + E T3k, Tai, T4m, Taj, T3r, Tam, T4j, Tal, TaI, TaJ, T3z, Tdb, T4p, TaF, TaG; + E T3G, Tdc, T4o; + { + E T3i, T3j, T4k, T4l; + T3i = R1[0]; + T3j = R1[WS(rs, 32)]; + T3k = T3i - T3j; + Tai = T3i + T3j; + T4k = R1[WS(rs, 16)]; + T4l = R1[WS(rs, 48)]; + T4m = T4k - T4l; + Taj = T4k + T4l; + } + { + E T3l, T3m, T3n, T3o, T3p, T3q; + T3l = R1[WS(rs, 8)]; + T3m = R1[WS(rs, 40)]; + T3n = T3l - T3m; + T3o = R1[WS(rs, 56)]; + T3p = R1[WS(rs, 24)]; + T3q = T3o - T3p; + T3r = KP707106781 * (T3n + T3q); + Tam = T3o + T3p; + T4j = KP707106781 * (T3q - T3n); + Tal = T3l + T3m; + } + { + E T3v, T3y, T3C, T3F; + { + E T3t, T3u, T3w, T3x; + T3t = R1[WS(rs, 4)]; + T3u = R1[WS(rs, 36)]; + T3v = T3t - T3u; + TaI = T3t + T3u; + T3w = R1[WS(rs, 20)]; + T3x = R1[WS(rs, 52)]; + T3y = T3w - T3x; + TaJ = T3w + T3x; + } + T3z = FNMS(KP382683432, T3y, KP923879532 * T3v); + Tdb = TaI - TaJ; + T4p = FMA(KP382683432, T3v, KP923879532 * T3y); + { + E T3A, T3B, T3D, T3E; + T3A = R1[WS(rs, 60)]; + T3B = R1[WS(rs, 28)]; + T3C = T3A - T3B; + TaF = T3A + T3B; + T3D = R1[WS(rs, 12)]; + T3E = R1[WS(rs, 44)]; + T3F = T3D - T3E; + TaG = T3D + T3E; + } + T3G = FMA(KP923879532, T3C, KP382683432 * T3F); + Tdc = TaF - TaG; + T4o = FNMS(KP923879532, T3F, KP382683432 * T3C); + } + { + E T3s, T3H, Tda, Tdd; + T3s = T3k + T3r; + T3H = T3z + T3G; + T3I = T3s + T3H; + T6H = T3s - T3H; + Tda = Tai - Taj; + Tdd = KP707106781 * (Tdb + Tdc); + Tde = Tda + Tdd; + TeH = Tda - Tdd; + } + { + E Tdn, Tdo, T4n, T4q; + Tdn = Tam - Tal; + Tdo = KP707106781 * (Tdc - Tdb); + Tdp = Tdn + Tdo; + TeF = Tdo - Tdn; + T4n = T4j - T4m; + T4q = T4o - T4p; + T4r = T4n + T4q; + T6F = T4q - T4n; + } + { + E T7S, T7T, Tak, Tan; + T7S = T3k - T3r; + T7T = T4p + T4o; + T7U = T7S + T7T; + T9l = T7S - T7T; + Tak = Tai + Taj; + Tan = Tal + Tam; + Tao = Tak - Tan; + TbW = Tak + Tan; + } + { + E TaH, TaK, T83, T84; + TaH = TaF + TaG; + TaK = TaI + TaJ; + TaL = TaH - TaK; + TbX = TaK + TaH; + T83 = T4m + T4j; + T84 = T3G - T3z; + T85 = T83 + T84; + T9j = T84 - T83; + } + } + { + E T1z, T2V, T1C, T39, T1G, T38, T1J, T2Y, T1O, T1R, T32, Td0, T3c, T1V, T1Y; + E T35, Td1, T3b; + { + E T1x, T1y, T1A, T1B; + T1x = R0[WS(rs, 63)]; + T1y = R0[WS(rs, 31)]; + T1z = T1x + T1y; + T2V = T1x - T1y; + T1A = R0[WS(rs, 15)]; + T1B = R0[WS(rs, 47)]; + T1C = T1A + T1B; + T39 = T1A - T1B; + } + { + E T1E, T1F, T2W, T1H, T1I, T2X; + T1E = R0[WS(rs, 7)]; + T1F = R0[WS(rs, 39)]; + T2W = T1E - T1F; + T1H = R0[WS(rs, 55)]; + T1I = R0[WS(rs, 23)]; + T2X = T1H - T1I; + T1G = T1E + T1F; + T38 = KP707106781 * (T2X - T2W); + T1J = T1H + T1I; + T2Y = KP707106781 * (T2W + T2X); + } + { + E T30, T31, T33, T34; + { + E T1M, T1N, T1P, T1Q; + T1M = R0[WS(rs, 3)]; + T1N = R0[WS(rs, 35)]; + T1O = T1M + T1N; + T30 = T1M - T1N; + T1P = R0[WS(rs, 19)]; + T1Q = R0[WS(rs, 51)]; + T1R = T1P + T1Q; + T31 = T1P - T1Q; + } + T32 = FNMS(KP382683432, T31, KP923879532 * T30); + Td0 = T1O - T1R; + T3c = FMA(KP382683432, T30, KP923879532 * T31); + { + E T1T, T1U, T1W, T1X; + T1T = R0[WS(rs, 59)]; + T1U = R0[WS(rs, 27)]; + T1V = T1T + T1U; + T33 = T1T - T1U; + T1W = R0[WS(rs, 11)]; + T1X = R0[WS(rs, 43)]; + T1Y = T1W + T1X; + T34 = T1W - T1X; + } + T35 = FMA(KP923879532, T33, KP382683432 * T34); + Td1 = T1V - T1Y; + T3b = FNMS(KP923879532, T34, KP382683432 * T33); + } + { + E T1D, T1K, TcZ, Td2; + T1D = T1z + T1C; + T1K = T1G + T1J; + T1L = T1D + T1K; + Tad = T1D - T1K; + TcZ = T1z - T1C; + Td2 = KP707106781 * (Td0 + Td1); + Td3 = TcZ + Td2; + Tew = TcZ - Td2; + } + { + E Td4, Td5, T1S, T1Z; + Td4 = T1J - T1G; + Td5 = KP707106781 * (Td1 - Td0); + Td6 = Td4 + Td5; + Tex = Td5 - Td4; + T1S = T1O + T1R; + T1Z = T1V + T1Y; + T20 = T1S + T1Z; + Tae = T1Z - T1S; + } + { + E T2Z, T36, T7J, T7K; + T2Z = T2V + T2Y; + T36 = T32 + T35; + T37 = T2Z + T36; + T6x = T2Z - T36; + T7J = T2V - T2Y; + T7K = T3c + T3b; + T7L = T7J + T7K; + T9a = T7J - T7K; + } + { + E T7M, T7N, T3a, T3d; + T7M = T39 + T38; + T7N = T35 - T32; + T7O = T7M + T7N; + T9b = T7N - T7M; + T3a = T38 - T39; + T3d = T3b - T3c; + T3e = T3a + T3d; + T6w = T3d - T3a; + } + } + { + E T3L, Tdf, T3X, Tar, T42, Tdi, T4e, Tay, T3S, Tdg, T3U, Tau, T49, Tdj, T4b; + E TaB, Tdh, Tdk; + { + E T3J, T3K, Tap, T3V, T3W, Taq; + T3J = R1[WS(rs, 2)]; + T3K = R1[WS(rs, 34)]; + Tap = T3J + T3K; + T3V = R1[WS(rs, 18)]; + T3W = R1[WS(rs, 50)]; + Taq = T3V + T3W; + T3L = T3J - T3K; + Tdf = Tap - Taq; + T3X = T3V - T3W; + Tar = Tap + Taq; + } + { + E T40, T41, Taw, T4c, T4d, Tax; + T40 = R1[WS(rs, 62)]; + T41 = R1[WS(rs, 30)]; + Taw = T40 + T41; + T4c = R1[WS(rs, 14)]; + T4d = R1[WS(rs, 46)]; + Tax = T4c + T4d; + T42 = T40 - T41; + Tdi = Taw - Tax; + T4e = T4c - T4d; + Tay = Taw + Tax; + } + { + E T3O, Tas, T3R, Tat; + { + E T3M, T3N, T3P, T3Q; + T3M = R1[WS(rs, 10)]; + T3N = R1[WS(rs, 42)]; + T3O = T3M - T3N; + Tas = T3M + T3N; + T3P = R1[WS(rs, 58)]; + T3Q = R1[WS(rs, 26)]; + T3R = T3P - T3Q; + Tat = T3P + T3Q; + } + T3S = KP707106781 * (T3O + T3R); + Tdg = Tat - Tas; + T3U = KP707106781 * (T3R - T3O); + Tau = Tas + Tat; + } + { + E T45, Taz, T48, TaA; + { + E T43, T44, T46, T47; + T43 = R1[WS(rs, 6)]; + T44 = R1[WS(rs, 38)]; + T45 = T43 - T44; + Taz = T43 + T44; + T46 = R1[WS(rs, 54)]; + T47 = R1[WS(rs, 22)]; + T48 = T46 - T47; + TaA = T46 + T47; + } + T49 = KP707106781 * (T45 + T48); + Tdj = TaA - Taz; + T4b = KP707106781 * (T48 - T45); + TaB = Taz + TaA; + } + TbZ = Tar + Tau; + Tc0 = Tay + TaB; + { + E T3T, T3Y, Tdq, Tdr; + T3T = T3L + T3S; + T3Y = T3U - T3X; + T3Z = FMA(KP980785280, T3T, KP195090322 * T3Y); + T4s = FNMS(KP195090322, T3T, KP980785280 * T3Y); + Tdq = FNMS(KP382683432, Tdf, KP923879532 * Tdg); + Tdr = FMA(KP382683432, Tdi, KP923879532 * Tdj); + Tds = Tdq + Tdr; + TeI = Tdr - Tdq; + } + { + E T4a, T4f, T7Y, T7Z; + T4a = T42 + T49; + T4f = T4b - T4e; + T4g = FNMS(KP195090322, T4f, KP980785280 * T4a); + T4t = FMA(KP195090322, T4a, KP980785280 * T4f); + T7Y = T42 - T49; + T7Z = T4e + T4b; + T80 = FNMS(KP555570233, T7Z, KP831469612 * T7Y); + T87 = FMA(KP555570233, T7Y, KP831469612 * T7Z); + } + Tdh = FMA(KP923879532, Tdf, KP382683432 * Tdg); + Tdk = FNMS(KP382683432, Tdj, KP923879532 * Tdi); + Tdl = Tdh + Tdk; + TeE = Tdk - Tdh; + { + E T7V, T7W, Tav, TaC; + T7V = T3L - T3S; + T7W = T3X + T3U; + T7X = FMA(KP831469612, T7V, KP555570233 * T7W); + T86 = FNMS(KP555570233, T7V, KP831469612 * T7W); + Tav = Tar - Tau; + TaC = Tay - TaB; + TaD = KP707106781 * (Tav + TaC); + TaM = KP707106781 * (TaC - Tav); + } + } + { + E T50, TdA, T5c, TaY, T5h, TdD, T5t, Tb5, T57, TdB, T59, Tb1, T5o, TdE, T5q; + E Tb8, TdC, TdF; + { + E T4Y, T4Z, TaW, T5a, T5b, TaX; + T4Y = R1[WS(rs, 1)]; + T4Z = R1[WS(rs, 33)]; + TaW = T4Y + T4Z; + T5a = R1[WS(rs, 17)]; + T5b = R1[WS(rs, 49)]; + TaX = T5a + T5b; + T50 = T4Y - T4Z; + TdA = TaW - TaX; + T5c = T5a - T5b; + TaY = TaW + TaX; + } + { + E T5f, T5g, Tb3, T5r, T5s, Tb4; + T5f = R1[WS(rs, 61)]; + T5g = R1[WS(rs, 29)]; + Tb3 = T5f + T5g; + T5r = R1[WS(rs, 13)]; + T5s = R1[WS(rs, 45)]; + Tb4 = T5r + T5s; + T5h = T5f - T5g; + TdD = Tb3 - Tb4; + T5t = T5r - T5s; + Tb5 = Tb3 + Tb4; + } + { + E T53, TaZ, T56, Tb0; + { + E T51, T52, T54, T55; + T51 = R1[WS(rs, 9)]; + T52 = R1[WS(rs, 41)]; + T53 = T51 - T52; + TaZ = T51 + T52; + T54 = R1[WS(rs, 57)]; + T55 = R1[WS(rs, 25)]; + T56 = T54 - T55; + Tb0 = T54 + T55; + } + T57 = KP707106781 * (T53 + T56); + TdB = Tb0 - TaZ; + T59 = KP707106781 * (T56 - T53); + Tb1 = TaZ + Tb0; + } + { + E T5k, Tb6, T5n, Tb7; + { + E T5i, T5j, T5l, T5m; + T5i = R1[WS(rs, 5)]; + T5j = R1[WS(rs, 37)]; + T5k = T5i - T5j; + Tb6 = T5i + T5j; + T5l = R1[WS(rs, 53)]; + T5m = R1[WS(rs, 21)]; + T5n = T5l - T5m; + Tb7 = T5l + T5m; + } + T5o = KP707106781 * (T5k + T5n); + TdE = Tb7 - Tb6; + T5q = KP707106781 * (T5n - T5k); + Tb8 = Tb6 + Tb7; + } + Tc6 = TaY + Tb1; + Tc7 = Tb5 + Tb8; + { + E T58, T5d, TdL, TdM; + T58 = T50 + T57; + T5d = T59 - T5c; + T5e = FMA(KP980785280, T58, KP195090322 * T5d); + T5H = FNMS(KP195090322, T58, KP980785280 * T5d); + TdL = FNMS(KP382683432, TdA, KP923879532 * TdB); + TdM = FMA(KP382683432, TdD, KP923879532 * TdE); + TdN = TdL + TdM; + TeM = TdM - TdL; + } + { + E T5p, T5u, T8h, T8i; + T5p = T5h + T5o; + T5u = T5q - T5t; + T5v = FNMS(KP195090322, T5u, KP980785280 * T5p); + T5I = FMA(KP195090322, T5p, KP980785280 * T5u); + T8h = T5h - T5o; + T8i = T5t + T5q; + T8j = FNMS(KP555570233, T8i, KP831469612 * T8h); + T8q = FMA(KP555570233, T8h, KP831469612 * T8i); + } + TdC = FMA(KP923879532, TdA, KP382683432 * TdB); + TdF = FNMS(KP382683432, TdE, KP923879532 * TdD); + TdG = TdC + TdF; + TeO = TdF - TdC; + { + E T8e, T8f, Tb2, Tb9; + T8e = T50 - T57; + T8f = T5c + T59; + T8g = FMA(KP831469612, T8e, KP555570233 * T8f); + T8p = FNMS(KP555570233, T8e, KP831469612 * T8f); + Tb2 = TaY - Tb1; + Tb9 = Tb5 - Tb8; + Tba = KP707106781 * (Tb2 + Tb9); + Tbj = KP707106781 * (Tb9 - Tb2); + } + } + { + E T11, TbV, Tc9, Tcf, T22, Tcb, Tc2, Tce; + { + E Tv, T10, Tc5, Tc8; + Tv = Tf + Tu; + T10 = TK + TZ; + T11 = Tv + T10; + TbV = Tv - T10; + Tc5 = Tc3 + Tc4; + Tc8 = Tc6 + Tc7; + Tc9 = Tc5 - Tc8; + Tcf = Tc5 + Tc8; + } + { + E T1w, T21, TbY, Tc1; + T1w = T1g + T1v; + T21 = T1L + T20; + T22 = T1w + T21; + Tcb = T21 - T1w; + TbY = TbW + TbX; + Tc1 = TbZ + Tc0; + Tc2 = TbY - Tc1; + Tce = TbY + Tc1; + } + Cr[WS(csr, 32)] = T11 - T22; + Ci[WS(csi, 32)] = Tcf - Tce; + { + E Tca, Tcc, Tcd, Tcg; + Tca = KP707106781 * (Tc2 + Tc9); + Cr[WS(csr, 48)] = TbV - Tca; + Cr[WS(csr, 16)] = TbV + Tca; + Tcc = KP707106781 * (Tc9 - Tc2); + Ci[WS(csi, 16)] = Tcb + Tcc; + Ci[WS(csi, 48)] = Tcc - Tcb; + Tcd = T11 + T22; + Tcg = Tce + Tcf; + Cr[WS(csr, 64)] = Tcd - Tcg; + Cr[0] = Tcd + Tcg; + } + } + { + E Tch, Tcu, Tck, Tct, Tco, Tcy, Tcr, Tcz, Tci, Tcj; + Tch = Tf - Tu; + Tcu = TZ - TK; + Tci = T1g - T1v; + Tcj = T1L - T20; + Tck = KP707106781 * (Tci + Tcj); + Tct = KP707106781 * (Tcj - Tci); + { + E Tcm, Tcn, Tcp, Tcq; + Tcm = TbW - TbX; + Tcn = Tc0 - TbZ; + Tco = FMA(KP923879532, Tcm, KP382683432 * Tcn); + Tcy = FNMS(KP382683432, Tcm, KP923879532 * Tcn); + Tcp = Tc3 - Tc4; + Tcq = Tc7 - Tc6; + Tcr = FNMS(KP382683432, Tcq, KP923879532 * Tcp); + Tcz = FMA(KP382683432, Tcp, KP923879532 * Tcq); + } + { + E Tcl, Tcs, Tcx, TcA; + Tcl = Tch + Tck; + Tcs = Tco + Tcr; + Cr[WS(csr, 56)] = Tcl - Tcs; + Cr[WS(csr, 8)] = Tcl + Tcs; + Tcx = Tcu + Tct; + TcA = Tcy + Tcz; + Ci[WS(csi, 8)] = Tcx + TcA; + Ci[WS(csi, 56)] = TcA - Tcx; + } + { + E Tcv, Tcw, TcB, TcC; + Tcv = Tct - Tcu; + Tcw = Tcr - Tco; + Ci[WS(csi, 24)] = Tcv + Tcw; + Ci[WS(csi, 40)] = Tcw - Tcv; + TcB = Tch - Tck; + TcC = Tcz - Tcy; + Cr[WS(csr, 40)] = TcB - TcC; + Cr[WS(csr, 24)] = TcB + TcC; + } + } + { + E Ta9, TbB, Tbs, TbM, Tag, TbL, TbJ, TbR, TaO, Tbw, Tbp, TbC, TbG, TbQ, Tbl; + E Tbx, Ta8, Tbr; + Ta8 = KP707106781 * (Ta6 + Ta7); + Ta9 = Ta5 + Ta8; + TbB = Ta5 - Ta8; + Tbr = KP707106781 * (Ta7 - Ta6); + Tbs = Tbq + Tbr; + TbM = Tbr - Tbq; + { + E Tac, Taf, TbH, TbI; + Tac = FMA(KP923879532, Taa, KP382683432 * Tab); + Taf = FNMS(KP382683432, Tae, KP923879532 * Tad); + Tag = Tac + Taf; + TbL = Taf - Tac; + TbH = TaV - Tba; + TbI = Tbj - Tbi; + TbJ = FNMS(KP555570233, TbI, KP831469612 * TbH); + TbR = FMA(KP555570233, TbH, KP831469612 * TbI); + } + { + E TaE, TaN, Tbn, Tbo; + TaE = Tao + TaD; + TaN = TaL + TaM; + TaO = FMA(KP980785280, TaE, KP195090322 * TaN); + Tbw = FNMS(KP195090322, TaE, KP980785280 * TaN); + Tbn = FNMS(KP382683432, Taa, KP923879532 * Tab); + Tbo = FMA(KP382683432, Tad, KP923879532 * Tae); + Tbp = Tbn + Tbo; + TbC = Tbo - Tbn; + } + { + E TbE, TbF, Tbb, Tbk; + TbE = Tao - TaD; + TbF = TaM - TaL; + TbG = FMA(KP831469612, TbE, KP555570233 * TbF); + TbQ = FNMS(KP555570233, TbE, KP831469612 * TbF); + Tbb = TaV + Tba; + Tbk = Tbi + Tbj; + Tbl = FNMS(KP195090322, Tbk, KP980785280 * Tbb); + Tbx = FMA(KP195090322, Tbb, KP980785280 * Tbk); + } + { + E Tah, Tbm, Tbv, Tby; + Tah = Ta9 + Tag; + Tbm = TaO + Tbl; + Cr[WS(csr, 60)] = Tah - Tbm; + Cr[WS(csr, 4)] = Tah + Tbm; + Tbv = Tbs + Tbp; + Tby = Tbw + Tbx; + Ci[WS(csi, 4)] = Tbv + Tby; + Ci[WS(csi, 60)] = Tby - Tbv; + } + { + E Tbt, Tbu, Tbz, TbA; + Tbt = Tbp - Tbs; + Tbu = Tbl - TaO; + Ci[WS(csi, 28)] = Tbt + Tbu; + Ci[WS(csi, 36)] = Tbu - Tbt; + Tbz = Ta9 - Tag; + TbA = Tbx - Tbw; + Cr[WS(csr, 36)] = Tbz - TbA; + Cr[WS(csr, 28)] = Tbz + TbA; + } + { + E TbD, TbK, TbP, TbS; + TbD = TbB + TbC; + TbK = TbG + TbJ; + Cr[WS(csr, 52)] = TbD - TbK; + Cr[WS(csr, 12)] = TbD + TbK; + TbP = TbM + TbL; + TbS = TbQ + TbR; + Ci[WS(csi, 12)] = TbP + TbS; + Ci[WS(csi, 52)] = TbS - TbP; + } + { + E TbN, TbO, TbT, TbU; + TbN = TbL - TbM; + TbO = TbJ - TbG; + Ci[WS(csi, 20)] = TbN + TbO; + Ci[WS(csi, 44)] = TbO - TbN; + TbT = TbB - TbC; + TbU = TbR - TbQ; + Cr[WS(csr, 44)] = TbT - TbU; + Cr[WS(csr, 20)] = TbT + TbU; + } + } + { + E Tev, Tf7, Tfc, Tfm, Tff, Tfn, TeC, Tfh, TeK, Tf2, TeV, Tf8, TeY, Tfi, TeR; + E Tf3; + { + E Tet, Teu, Tfa, Tfb; + Tet = TcD - TcG; + Teu = TdY - TdX; + Tev = Tet - Teu; + Tf7 = Tet + Teu; + Tfa = TeF + TeE; + Tfb = TeH + TeI; + Tfc = FMA(KP290284677, Tfa, KP956940335 * Tfb); + Tfm = FNMS(KP290284677, Tfb, KP956940335 * Tfa); + } + { + E Tfd, Tfe, Tey, TeB; + Tfd = TeL + TeM; + Tfe = TeP + TeO; + Tff = FNMS(KP290284677, Tfe, KP956940335 * Tfd); + Tfn = FMA(KP956940335, Tfe, KP290284677 * Tfd); + Tey = FMA(KP555570233, Tew, KP831469612 * Tex); + TeB = FNMS(KP555570233, TeA, KP831469612 * Tez); + TeC = Tey - TeB; + Tfh = TeB + Tey; + } + { + E TeG, TeJ, TeT, TeU; + TeG = TeE - TeF; + TeJ = TeH - TeI; + TeK = FMA(KP471396736, TeG, KP881921264 * TeJ); + Tf2 = FNMS(KP471396736, TeJ, KP881921264 * TeG); + TeT = FNMS(KP555570233, Tex, KP831469612 * Tew); + TeU = FMA(KP831469612, TeA, KP555570233 * Tez); + TeV = TeT - TeU; + Tf8 = TeU + TeT; + } + { + E TeW, TeX, TeN, TeQ; + TeW = TcN - TcK; + TeX = TdV - TdU; + TeY = TeW - TeX; + Tfi = TeX + TeW; + TeN = TeL - TeM; + TeQ = TeO - TeP; + TeR = FNMS(KP471396736, TeQ, KP881921264 * TeN); + Tf3 = FMA(KP881921264, TeQ, KP471396736 * TeN); + } + { + E TeD, TeS, Tf1, Tf4; + TeD = Tev + TeC; + TeS = TeK + TeR; + Cr[WS(csr, 54)] = TeD - TeS; + Cr[WS(csr, 10)] = TeD + TeS; + Tf1 = TeY + TeV; + Tf4 = Tf2 + Tf3; + Ci[WS(csi, 10)] = Tf1 + Tf4; + Ci[WS(csi, 54)] = Tf4 - Tf1; + } + { + E TeZ, Tf0, Tf5, Tf6; + TeZ = TeV - TeY; + Tf0 = TeR - TeK; + Ci[WS(csi, 22)] = TeZ + Tf0; + Ci[WS(csi, 42)] = Tf0 - TeZ; + Tf5 = Tev - TeC; + Tf6 = Tf3 - Tf2; + Cr[WS(csr, 42)] = Tf5 - Tf6; + Cr[WS(csr, 22)] = Tf5 + Tf6; + } + { + E Tf9, Tfg, Tfl, Tfo; + Tf9 = Tf7 + Tf8; + Tfg = Tfc + Tff; + Cr[WS(csr, 58)] = Tf9 - Tfg; + Cr[WS(csr, 6)] = Tf9 + Tfg; + Tfl = Tfi + Tfh; + Tfo = Tfm + Tfn; + Ci[WS(csi, 6)] = Tfl + Tfo; + Ci[WS(csi, 58)] = Tfo - Tfl; + } + { + E Tfj, Tfk, Tfp, Tfq; + Tfj = Tfh - Tfi; + Tfk = Tff - Tfc; + Ci[WS(csi, 26)] = Tfj + Tfk; + Ci[WS(csi, 38)] = Tfk - Tfj; + Tfp = Tf7 - Tf8; + Tfq = Tfn - Tfm; + Cr[WS(csr, 38)] = Tfp - Tfq; + Cr[WS(csr, 26)] = Tfp + Tfq; + } + } + { + E TcP, Te9, Tee, Teo, Teh, Tep, Td8, Tej, Tdu, Te4, TdT, Tea, Te0, Tek, TdP; + E Te5; + { + E TcH, TcO, Tec, Ted; + TcH = TcD + TcG; + TcO = TcK + TcN; + TcP = TcH + TcO; + Te9 = TcH - TcO; + Tec = Tde - Tdl; + Ted = Tds - Tdp; + Tee = FMA(KP773010453, Tec, KP634393284 * Ted); + Teo = FNMS(KP634393284, Tec, KP773010453 * Ted); + } + { + E Tef, Teg, TcY, Td7; + Tef = Tdz - TdG; + Teg = TdN - TdK; + Teh = FNMS(KP634393284, Teg, KP773010453 * Tef); + Tep = FMA(KP634393284, Tef, KP773010453 * Teg); + TcY = FMA(KP980785280, TcU, KP195090322 * TcX); + Td7 = FNMS(KP195090322, Td6, KP980785280 * Td3); + Td8 = TcY + Td7; + Tej = Td7 - TcY; + } + { + E Tdm, Tdt, TdR, TdS; + Tdm = Tde + Tdl; + Tdt = Tdp + Tds; + Tdu = FMA(KP995184726, Tdm, KP098017140 * Tdt); + Te4 = FNMS(KP098017140, Tdm, KP995184726 * Tdt); + TdR = FNMS(KP195090322, TcU, KP980785280 * TcX); + TdS = FMA(KP195090322, Td3, KP980785280 * Td6); + TdT = TdR + TdS; + Tea = TdS - TdR; + } + { + E TdW, TdZ, TdH, TdO; + TdW = TdU + TdV; + TdZ = TdX + TdY; + Te0 = TdW + TdZ; + Tek = TdZ - TdW; + TdH = Tdz + TdG; + TdO = TdK + TdN; + TdP = FNMS(KP098017140, TdO, KP995184726 * TdH); + Te5 = FMA(KP098017140, TdH, KP995184726 * TdO); + } + { + E Td9, TdQ, Te3, Te6; + Td9 = TcP + Td8; + TdQ = Tdu + TdP; + Cr[WS(csr, 62)] = Td9 - TdQ; + Cr[WS(csr, 2)] = Td9 + TdQ; + Te3 = Te0 + TdT; + Te6 = Te4 + Te5; + Ci[WS(csi, 2)] = Te3 + Te6; + Ci[WS(csi, 62)] = Te6 - Te3; + } + { + E Te1, Te2, Te7, Te8; + Te1 = TdT - Te0; + Te2 = TdP - Tdu; + Ci[WS(csi, 30)] = Te1 + Te2; + Ci[WS(csi, 34)] = Te2 - Te1; + Te7 = TcP - Td8; + Te8 = Te5 - Te4; + Cr[WS(csr, 34)] = Te7 - Te8; + Cr[WS(csr, 30)] = Te7 + Te8; + } + { + E Teb, Tei, Ten, Teq; + Teb = Te9 + Tea; + Tei = Tee + Teh; + Cr[WS(csr, 50)] = Teb - Tei; + Cr[WS(csr, 14)] = Teb + Tei; + Ten = Tek + Tej; + Teq = Teo + Tep; + Ci[WS(csi, 14)] = Ten + Teq; + Ci[WS(csi, 50)] = Teq - Ten; + } + { + E Tel, Tem, Ter, Tes; + Tel = Tej - Tek; + Tem = Teh - Tee; + Ci[WS(csi, 18)] = Tel + Tem; + Ci[WS(csi, 46)] = Tem - Tel; + Ter = Te9 - Tea; + Tes = Tep - Teo; + Cr[WS(csr, 46)] = Ter - Tes; + Cr[WS(csr, 18)] = Ter + Tes; + } + } + { + E T6v, T77, T6C, T7h, T6Y, T7i, T6V, T78, T6R, T7n, T73, T7f, T6K, T7m, T72; + E T7c; + { + E T6t, T6u, T6T, T6U; + T6t = T27 - T2e; + T6u = T5Y - T5X; + T6v = T6t - T6u; + T77 = T6t + T6u; + { + E T6y, T6B, T6W, T6X; + T6y = FMA(KP773010453, T6w, KP634393284 * T6x); + T6B = FNMS(KP634393284, T6A, KP773010453 * T6z); + T6C = T6y - T6B; + T7h = T6B + T6y; + T6W = T2x - T2o; + T6X = T5V - T5S; + T6Y = T6W - T6X; + T7i = T6X + T6W; + } + T6T = FNMS(KP634393284, T6w, KP773010453 * T6x); + T6U = FMA(KP634393284, T6z, KP773010453 * T6A); + T6V = T6T - T6U; + T78 = T6U + T6T; + { + E T6N, T7d, T6Q, T7e, T6M, T6O; + T6M = T5I - T5H; + T6N = T6L - T6M; + T7d = T6L + T6M; + T6O = T5v - T5e; + T6Q = T6O - T6P; + T7e = T6P + T6O; + T6R = FNMS(KP427555093, T6Q, KP903989293 * T6N); + T7n = FMA(KP941544065, T7e, KP336889853 * T7d); + T73 = FMA(KP903989293, T6Q, KP427555093 * T6N); + T7f = FNMS(KP336889853, T7e, KP941544065 * T7d); + } + { + E T6G, T7a, T6J, T7b, T6E, T6I; + T6E = T4g - T3Z; + T6G = T6E - T6F; + T7a = T6F + T6E; + T6I = T4t - T4s; + T6J = T6H - T6I; + T7b = T6H + T6I; + T6K = FMA(KP427555093, T6G, KP903989293 * T6J); + T7m = FNMS(KP336889853, T7b, KP941544065 * T7a); + T72 = FNMS(KP427555093, T6J, KP903989293 * T6G); + T7c = FMA(KP336889853, T7a, KP941544065 * T7b); + } + } + { + E T6D, T6S, T71, T74; + T6D = T6v + T6C; + T6S = T6K + T6R; + Cr[WS(csr, 55)] = T6D - T6S; + Cr[WS(csr, 9)] = T6D + T6S; + T71 = T6Y + T6V; + T74 = T72 + T73; + Ci[WS(csi, 9)] = T71 + T74; + Ci[WS(csi, 55)] = T74 - T71; + } + { + E T6Z, T70, T75, T76; + T6Z = T6V - T6Y; + T70 = T6R - T6K; + Ci[WS(csi, 23)] = T6Z + T70; + Ci[WS(csi, 41)] = T70 - T6Z; + T75 = T6v - T6C; + T76 = T73 - T72; + Cr[WS(csr, 41)] = T75 - T76; + Cr[WS(csr, 23)] = T75 + T76; + } + { + E T79, T7g, T7l, T7o; + T79 = T77 + T78; + T7g = T7c + T7f; + Cr[WS(csr, 57)] = T79 - T7g; + Cr[WS(csr, 7)] = T79 + T7g; + T7l = T7i + T7h; + T7o = T7m + T7n; + Ci[WS(csi, 7)] = T7l + T7o; + Ci[WS(csi, 57)] = T7o - T7l; + } + { + E T7j, T7k, T7p, T7q; + T7j = T7h - T7i; + T7k = T7f - T7c; + Ci[WS(csi, 25)] = T7j + T7k; + Ci[WS(csi, 39)] = T7k - T7j; + T7p = T77 - T78; + T7q = T7n - T7m; + Cr[WS(csr, 39)] = T7p - T7q; + Cr[WS(csr, 25)] = T7p + T7q; + } + } + { + E T99, T9L, T9g, T9V, T9C, T9W, T9z, T9M, T9v, Ta1, T9H, T9T, T9o, Ta0, T9G; + E T9Q; + { + E T97, T98, T9x, T9y; + T97 = T7r - T7s; + T98 = T8C - T8B; + T99 = T97 - T98; + T9L = T97 + T98; + { + E T9c, T9f, T9A, T9B; + T9c = FMA(KP471396736, T9a, KP881921264 * T9b); + T9f = FNMS(KP471396736, T9e, KP881921264 * T9d); + T9g = T9c - T9f; + T9V = T9f + T9c; + T9A = T7z - T7w; + T9B = T8z - T8y; + T9C = T9A - T9B; + T9W = T9B + T9A; + } + T9x = FNMS(KP471396736, T9b, KP881921264 * T9a); + T9y = FMA(KP881921264, T9e, KP471396736 * T9d); + T9z = T9x - T9y; + T9M = T9y + T9x; + { + E T9r, T9R, T9u, T9S, T9q, T9s; + T9q = T8q - T8p; + T9r = T9p - T9q; + T9R = T9p + T9q; + T9s = T8j - T8g; + T9u = T9s - T9t; + T9S = T9t + T9s; + T9v = FNMS(KP514102744, T9u, KP857728610 * T9r); + Ta1 = FMA(KP970031253, T9S, KP242980179 * T9R); + T9H = FMA(KP857728610, T9u, KP514102744 * T9r); + T9T = FNMS(KP242980179, T9S, KP970031253 * T9R); + } + { + E T9k, T9O, T9n, T9P, T9i, T9m; + T9i = T80 - T7X; + T9k = T9i - T9j; + T9O = T9j + T9i; + T9m = T87 - T86; + T9n = T9l - T9m; + T9P = T9l + T9m; + T9o = FMA(KP514102744, T9k, KP857728610 * T9n); + Ta0 = FNMS(KP242980179, T9P, KP970031253 * T9O); + T9G = FNMS(KP514102744, T9n, KP857728610 * T9k); + T9Q = FMA(KP242980179, T9O, KP970031253 * T9P); + } + } + { + E T9h, T9w, T9F, T9I; + T9h = T99 + T9g; + T9w = T9o + T9v; + Cr[WS(csr, 53)] = T9h - T9w; + Cr[WS(csr, 11)] = T9h + T9w; + T9F = T9C + T9z; + T9I = T9G + T9H; + Ci[WS(csi, 11)] = T9F + T9I; + Ci[WS(csi, 53)] = T9I - T9F; + } + { + E T9D, T9E, T9J, T9K; + T9D = T9z - T9C; + T9E = T9v - T9o; + Ci[WS(csi, 21)] = T9D + T9E; + Ci[WS(csi, 43)] = T9E - T9D; + T9J = T99 - T9g; + T9K = T9H - T9G; + Cr[WS(csr, 43)] = T9J - T9K; + Cr[WS(csr, 21)] = T9J + T9K; + } + { + E T9N, T9U, T9Z, Ta2; + T9N = T9L + T9M; + T9U = T9Q + T9T; + Cr[WS(csr, 59)] = T9N - T9U; + Cr[WS(csr, 5)] = T9N + T9U; + T9Z = T9W + T9V; + Ta2 = Ta0 + Ta1; + Ci[WS(csi, 5)] = T9Z + Ta2; + Ci[WS(csi, 59)] = Ta2 - T9Z; + } + { + E T9X, T9Y, Ta3, Ta4; + T9X = T9V - T9W; + T9Y = T9T - T9Q; + Ci[WS(csi, 27)] = T9X + T9Y; + Ci[WS(csi, 37)] = T9Y - T9X; + Ta3 = T9L - T9M; + Ta4 = Ta1 - Ta0; + Cr[WS(csr, 37)] = Ta3 - Ta4; + Cr[WS(csr, 27)] = Ta3 + Ta4; + } + } + { + E T2z, T69, T3g, T6j, T60, T6k, T5P, T6a, T5L, T6p, T65, T6h, T4w, T6o, T64; + E T6e; + { + E T2f, T2y, T5N, T5O; + T2f = T27 + T2e; + T2y = T2o + T2x; + T2z = T2f + T2y; + T69 = T2f - T2y; + { + E T2U, T3f, T5W, T5Z; + T2U = FMA(KP098017140, T2M, KP995184726 * T2T); + T3f = FNMS(KP098017140, T3e, KP995184726 * T37); + T3g = T2U + T3f; + T6j = T3f - T2U; + T5W = T5S + T5V; + T5Z = T5X + T5Y; + T60 = T5W + T5Z; + T6k = T5Z - T5W; + } + T5N = FNMS(KP098017140, T2T, KP995184726 * T2M); + T5O = FMA(KP995184726, T3e, KP098017140 * T37); + T5P = T5N + T5O; + T6a = T5O - T5N; + { + E T5x, T6f, T5K, T6g, T5w, T5J; + T5w = T5e + T5v; + T5x = T4X + T5w; + T6f = T4X - T5w; + T5J = T5H + T5I; + T5K = T5G + T5J; + T6g = T5J - T5G; + T5L = FNMS(KP049067674, T5K, KP998795456 * T5x); + T6p = FMA(KP671558954, T6f, KP740951125 * T6g); + T65 = FMA(KP049067674, T5x, KP998795456 * T5K); + T6h = FNMS(KP671558954, T6g, KP740951125 * T6f); + } + { + E T4i, T6c, T4v, T6d, T4h, T4u; + T4h = T3Z + T4g; + T4i = T3I + T4h; + T6c = T3I - T4h; + T4u = T4s + T4t; + T4v = T4r + T4u; + T6d = T4u - T4r; + T4w = FMA(KP998795456, T4i, KP049067674 * T4v); + T6o = FNMS(KP671558954, T6c, KP740951125 * T6d); + T64 = FNMS(KP049067674, T4i, KP998795456 * T4v); + T6e = FMA(KP740951125, T6c, KP671558954 * T6d); + } + } + { + E T3h, T5M, T63, T66; + T3h = T2z + T3g; + T5M = T4w + T5L; + Cr[WS(csr, 63)] = T3h - T5M; + Cr[WS(csr, 1)] = T3h + T5M; + T63 = T60 + T5P; + T66 = T64 + T65; + Ci[WS(csi, 1)] = T63 + T66; + Ci[WS(csi, 63)] = T66 - T63; + } + { + E T61, T62, T67, T68; + T61 = T5P - T60; + T62 = T5L - T4w; + Ci[WS(csi, 31)] = T61 + T62; + Ci[WS(csi, 33)] = T62 - T61; + T67 = T2z - T3g; + T68 = T65 - T64; + Cr[WS(csr, 33)] = T67 - T68; + Cr[WS(csr, 31)] = T67 + T68; + } + { + E T6b, T6i, T6n, T6q; + T6b = T69 + T6a; + T6i = T6e + T6h; + Cr[WS(csr, 49)] = T6b - T6i; + Cr[WS(csr, 15)] = T6b + T6i; + T6n = T6k + T6j; + T6q = T6o + T6p; + Ci[WS(csi, 15)] = T6n + T6q; + Ci[WS(csi, 49)] = T6q - T6n; + } + { + E T6l, T6m, T6r, T6s; + T6l = T6j - T6k; + T6m = T6h - T6e; + Ci[WS(csi, 17)] = T6l + T6m; + Ci[WS(csi, 47)] = T6m - T6l; + T6r = T69 - T6a; + T6s = T6p - T6o; + Cr[WS(csr, 47)] = T6r - T6s; + Cr[WS(csr, 17)] = T6r + T6s; + } + } + { + E T7B, T8N, T7Q, T8X, T8E, T8Y, T8x, T8O, T8t, T93, T8J, T8V, T8a, T92, T8I; + E T8S; + { + E T7t, T7A, T8v, T8w; + T7t = T7r + T7s; + T7A = T7w + T7z; + T7B = T7t + T7A; + T8N = T7t - T7A; + { + E T7I, T7P, T8A, T8D; + T7I = FMA(KP956940335, T7E, KP290284677 * T7H); + T7P = FNMS(KP290284677, T7O, KP956940335 * T7L); + T7Q = T7I + T7P; + T8X = T7P - T7I; + T8A = T8y + T8z; + T8D = T8B + T8C; + T8E = T8A + T8D; + T8Y = T8D - T8A; + } + T8v = FNMS(KP290284677, T7E, KP956940335 * T7H); + T8w = FMA(KP290284677, T7L, KP956940335 * T7O); + T8x = T8v + T8w; + T8O = T8w - T8v; + { + E T8l, T8T, T8s, T8U, T8k, T8r; + T8k = T8g + T8j; + T8l = T8d + T8k; + T8T = T8d - T8k; + T8r = T8p + T8q; + T8s = T8o + T8r; + T8U = T8r - T8o; + T8t = FNMS(KP146730474, T8s, KP989176509 * T8l); + T93 = FMA(KP595699304, T8T, KP803207531 * T8U); + T8J = FMA(KP146730474, T8l, KP989176509 * T8s); + T8V = FNMS(KP595699304, T8U, KP803207531 * T8T); + } + { + E T82, T8Q, T89, T8R, T81, T88; + T81 = T7X + T80; + T82 = T7U + T81; + T8Q = T7U - T81; + T88 = T86 + T87; + T89 = T85 + T88; + T8R = T88 - T85; + T8a = FMA(KP989176509, T82, KP146730474 * T89); + T92 = FNMS(KP595699304, T8Q, KP803207531 * T8R); + T8I = FNMS(KP146730474, T82, KP989176509 * T89); + T8S = FMA(KP803207531, T8Q, KP595699304 * T8R); + } + } + { + E T7R, T8u, T8H, T8K; + T7R = T7B + T7Q; + T8u = T8a + T8t; + Cr[WS(csr, 61)] = T7R - T8u; + Cr[WS(csr, 3)] = T7R + T8u; + T8H = T8E + T8x; + T8K = T8I + T8J; + Ci[WS(csi, 3)] = T8H + T8K; + Ci[WS(csi, 61)] = T8K - T8H; + } + { + E T8F, T8G, T8L, T8M; + T8F = T8x - T8E; + T8G = T8t - T8a; + Ci[WS(csi, 29)] = T8F + T8G; + Ci[WS(csi, 35)] = T8G - T8F; + T8L = T7B - T7Q; + T8M = T8J - T8I; + Cr[WS(csr, 35)] = T8L - T8M; + Cr[WS(csr, 29)] = T8L + T8M; + } + { + E T8P, T8W, T91, T94; + T8P = T8N + T8O; + T8W = T8S + T8V; + Cr[WS(csr, 51)] = T8P - T8W; + Cr[WS(csr, 13)] = T8P + T8W; + T91 = T8Y + T8X; + T94 = T92 + T93; + Ci[WS(csi, 13)] = T91 + T94; + Ci[WS(csi, 51)] = T94 - T91; + } + { + E T8Z, T90, T95, T96; + T8Z = T8X - T8Y; + T90 = T8V - T8S; + Ci[WS(csi, 19)] = T8Z + T90; + Ci[WS(csi, 45)] = T90 - T8Z; + T95 = T8N - T8O; + T96 = T93 - T92; + Cr[WS(csr, 45)] = T95 - T96; + Cr[WS(csr, 19)] = T95 + T96; + } + } + } + } +} + +static const kr2c_desc desc = { 128, "r2cf_128", {812, 186, 144, 0}, &GENUS }; + +void X(codelet_r2cf_128) (planner *p) { + X(kr2c_register) (p, r2cf_128, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_13.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_13.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:07 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 13 -name r2cf_13 -include r2cf.h */ + +/* + * This function contains 76 FP additions, 51 FP multiplications, + * (or, 31 additions, 6 multiplications, 45 fused multiply/add), + * 68 stack variables, 23 constants, and 26 memory accesses + */ +#include "r2cf.h" + +static void r2cf_13(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP516520780, +0.516520780623489722840901288569017135705033622); + DK(KP300462606, +0.300462606288665774426601772289207995520941381); + DK(KP581704778, +0.581704778510515730456870384989698884939833902); + DK(KP859542535, +0.859542535098774820163672132761689612766401925); + DK(KP769338817, +0.769338817572980603471413688209101117038278899); + DK(KP686558370, +0.686558370781754340655719594850823015421401653); + DK(KP514918778, +0.514918778086315755491789696138117261566051239); + DK(KP251768516, +0.251768516431883313623436926934233488546674281); + DK(KP503537032, +0.503537032863766627246873853868466977093348562); + DK(KP904176221, +0.904176221990848204433795481776887926501523162); + DK(KP575140729, +0.575140729474003121368385547455453388461001608); + DK(KP957805992, +0.957805992594665126462521754605754580515587217); + DK(KP600477271, +0.600477271932665282925769253334763009352012849); + DK(KP522026385, +0.522026385161275033714027226654165028300441940); + DK(KP301479260, +0.301479260047709873958013540496673347309208464); + DK(KP226109445, +0.226109445035782405468510155372505010481906348); + DK(KP853480001, +0.853480001859823990758994934970528322872359049); + DK(KP083333333, +0.083333333333333333333333333333333333333333333); + DK(KP612264650, +0.612264650376756543746494474777125408779395514); + DK(KP038632954, +0.038632954644348171955506895830342264440241080); + DK(KP302775637, +0.302775637731994646559610633735247973125648287); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(52, rs), MAKE_VOLATILE_STRIDE(52, csr), MAKE_VOLATILE_STRIDE(52, csi)) { + E T15, T1a, T11, T17, T14, T1b; + { + E TN, TD, TV, TA, Tb, TZ, T12, TS, Tx, Tu, Ti, TU; + TN = R0[0]; + { + E T3, TP, Th, TB, Tp, Te, Tm, TC, Tr, T6, T9, Ts; + { + E Tn, Tf, Tg, T1, T2; + T1 = R0[WS(rs, 4)]; + T2 = R1[WS(rs, 2)]; + Tn = R0[WS(rs, 6)]; + Tf = R0[WS(rs, 5)]; + Tg = R0[WS(rs, 2)]; + T3 = T1 - T2; + TP = T1 + T2; + { + E Tk, To, Tc, Td; + Tk = R1[0]; + Th = Tf - Tg; + To = Tf + Tg; + Tc = R1[WS(rs, 4)]; + Td = R1[WS(rs, 1)]; + { + E T4, Tl, T5, T7, T8; + T4 = R1[WS(rs, 5)]; + TB = Tn + To; + Tp = FMS(KP500000000, To, Tn); + Tl = Td + Tc; + Te = Tc - Td; + T5 = R0[WS(rs, 3)]; + T7 = R1[WS(rs, 3)]; + T8 = R0[WS(rs, 1)]; + Tm = FNMS(KP500000000, Tl, Tk); + TC = Tk + Tl; + Tr = T4 + T5; + T6 = T4 - T5; + T9 = T7 - T8; + Ts = T7 + T8; + } + } + } + { + E TO, Ta, Tt, TQ; + TD = TB - TC; + TO = TC + TB; + Ta = T6 + T9; + TV = T6 - T9; + Tt = Tr - Ts; + TQ = Tr + Ts; + { + E TX, Tq, TR, TY; + TX = Tm - Tp; + Tq = Tm + Tp; + TA = T3 + Ta; + Tb = FNMS(KP500000000, Ta, T3); + TR = TP + TQ; + TY = FNMS(KP500000000, TQ, TP); + TZ = TX + TY; + T12 = TX - TY; + T15 = TO - TR; + TS = TO + TR; + Tx = FNMS(KP866025403, Tt, Tq); + Tu = FMA(KP866025403, Tt, Tq); + Ti = Te + Th; + TU = Th - Te; + } + } + } + Cr[0] = TN + TS; + { + E Tw, Tj, T13, TW; + Tw = FNMS(KP866025403, Ti, Tb); + Tj = FMA(KP866025403, Ti, Tb); + T13 = TU - TV; + TW = TU + TV; + { + E TE, TI, Tv, TF, TG, Ty; + TE = FMA(KP302775637, TD, TA); + TI = FNMS(KP302775637, TA, TD); + Tv = FMA(KP038632954, Tu, Tj); + TF = FNMS(KP038632954, Tj, Tu); + TG = FNMS(KP612264650, Tw, Tx); + Ty = FMA(KP612264650, Tx, Tw); + { + E TT, Tz, TK, TH, TM, T10, TL, TJ; + TT = FNMS(KP083333333, TS, TN); + Tz = FNMS(KP853480001, Ty, Tv); + TK = FMA(KP853480001, Ty, Tv); + TH = FNMS(KP853480001, TG, TF); + TM = FMA(KP853480001, TG, TF); + T1a = FNMS(KP226109445, TW, TZ); + T10 = FMA(KP301479260, TZ, TW); + TL = FNMS(KP522026385, Tz, TE); + Ci[WS(csi, 1)] = KP600477271 * (FMA(KP957805992, TE, Tz)); + TJ = FMA(KP522026385, TH, TI); + Ci[WS(csi, 5)] = -(KP600477271 * (FNMS(KP957805992, TI, TH))); + Ci[WS(csi, 4)] = -(KP575140729 * (FMA(KP904176221, TM, TL))); + Ci[WS(csi, 3)] = KP575140729 * (FNMS(KP904176221, TM, TL)); + Ci[WS(csi, 6)] = KP575140729 * (FMA(KP904176221, TK, TJ)); + Ci[WS(csi, 2)] = KP575140729 * (FNMS(KP904176221, TK, TJ)); + T11 = FMA(KP503537032, T10, TT); + T17 = FNMS(KP251768516, T10, TT); + } + T14 = FNMS(KP514918778, T13, T12); + T1b = FMA(KP686558370, T12, T13); + } + } + } + { + E T1e, T1c, T18, T16, T1d, T19; + T1e = FMA(KP769338817, T1b, T1a); + T1c = FNMS(KP769338817, T1b, T1a); + T18 = FNMS(KP859542535, T14, T15); + T16 = FMA(KP581704778, T15, T14); + T1d = FNMS(KP300462606, T18, T17); + T19 = FMA(KP300462606, T18, T17); + Cr[WS(csr, 1)] = FMA(KP516520780, T16, T11); + Cr[WS(csr, 5)] = FNMS(KP516520780, T16, T11); + Cr[WS(csr, 2)] = FMA(KP503537032, T1e, T1d); + Cr[WS(csr, 6)] = FNMS(KP503537032, T1e, T1d); + Cr[WS(csr, 3)] = FMA(KP503537032, T1c, T19); + Cr[WS(csr, 4)] = FNMS(KP503537032, T1c, T19); + } + } + } +} + +static const kr2c_desc desc = { 13, "r2cf_13", {31, 6, 45, 0}, &GENUS }; + +void X(codelet_r2cf_13) (planner *p) { + X(kr2c_register) (p, r2cf_13, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 13 -name r2cf_13 -include r2cf.h */ + +/* + * This function contains 76 FP additions, 34 FP multiplications, + * (or, 57 additions, 15 multiplications, 19 fused multiply/add), + * 55 stack variables, 20 constants, and 26 memory accesses + */ +#include "r2cf.h" + +static void r2cf_13(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP083333333, +0.083333333333333333333333333333333333333333333); + DK(KP075902986, +0.075902986037193865983102897245103540356428373); + DK(KP251768516, +0.251768516431883313623436926934233488546674281); + DK(KP503537032, +0.503537032863766627246873853868466977093348562); + DK(KP113854479, +0.113854479055790798974654345867655310534642560); + DK(KP265966249, +0.265966249214837287587521063842185948798330267); + DK(KP387390585, +0.387390585467617292130675966426762851778775217); + DK(KP300462606, +0.300462606288665774426601772289207995520941381); + DK(KP132983124, +0.132983124607418643793760531921092974399165133); + DK(KP258260390, +0.258260390311744861420450644284508567852516811); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); + DK(KP300238635, +0.300238635966332641462884626667381504676006424); + DK(KP011599105, +0.011599105605768290721655456654083252189827041); + DK(KP156891391, +0.156891391051584611046832726756003269660212636); + DK(KP256247671, +0.256247671582936600958684654061725059144125175); + DK(KP174138601, +0.174138601152135905005660794929264742616964676); + DK(KP575140729, +0.575140729474003121368385547455453388461001608); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(52, rs), MAKE_VOLATILE_STRIDE(52, csr), MAKE_VOLATILE_STRIDE(52, csi)) { + E T13, Tb, Tm, TW, TX, T14, TU, T10, Tz, TB, Tu, TC, TR, T11; + T13 = R0[0]; + { + E Te, TO, Ta, Tv, To, T5, Tw, Tp, Th, Tr, Tk, Ts, Tl, TP, Tc; + E Td; + Tc = R0[WS(rs, 4)]; + Td = R1[WS(rs, 2)]; + Te = Tc - Td; + TO = Tc + Td; + { + E T6, T7, T8, T9; + T6 = R1[0]; + T7 = R1[WS(rs, 1)]; + T8 = R1[WS(rs, 4)]; + T9 = T7 + T8; + Ta = T6 + T9; + Tv = T7 - T8; + To = FNMS(KP500000000, T9, T6); + } + { + E T1, T2, T3, T4; + T1 = R0[WS(rs, 6)]; + T2 = R0[WS(rs, 5)]; + T3 = R0[WS(rs, 2)]; + T4 = T2 + T3; + T5 = T1 + T4; + Tw = T2 - T3; + Tp = FNMS(KP500000000, T4, T1); + } + { + E Tf, Tg, Ti, Tj; + Tf = R1[WS(rs, 5)]; + Tg = R0[WS(rs, 3)]; + Th = Tf - Tg; + Tr = Tf + Tg; + Ti = R1[WS(rs, 3)]; + Tj = R0[WS(rs, 1)]; + Tk = Ti - Tj; + Ts = Ti + Tj; + } + Tl = Th + Tk; + TP = Tr + Ts; + Tb = T5 - Ta; + Tm = Te + Tl; + TW = Ta + T5; + TX = TO + TP; + T14 = TW + TX; + { + E TS, TT, Tx, Ty; + TS = Tv + Tw; + TT = Th - Tk; + TU = TS - TT; + T10 = TS + TT; + Tx = KP866025403 * (Tv - Tw); + Ty = FNMS(KP500000000, Tl, Te); + Tz = Tx + Ty; + TB = Ty - Tx; + } + { + E Tq, Tt, TN, TQ; + Tq = To - Tp; + Tt = KP866025403 * (Tr - Ts); + Tu = Tq - Tt; + TC = Tq + Tt; + TN = To + Tp; + TQ = FNMS(KP500000000, TP, TO); + TR = TN - TQ; + T11 = TN + TQ; + } + } + Cr[0] = T13 + T14; + { + E Tn, TG, TE, TF, TJ, TM, TK, TL; + Tn = FNMS(KP174138601, Tm, KP575140729 * Tb); + TG = FMA(KP174138601, Tb, KP575140729 * Tm); + { + E TA, TD, TH, TI; + TA = FNMS(KP156891391, Tz, KP256247671 * Tu); + TD = FNMS(KP300238635, TC, KP011599105 * TB); + TE = TA + TD; + TF = KP1_732050807 * (TD - TA); + TH = FMA(KP300238635, TB, KP011599105 * TC); + TI = FMA(KP256247671, Tz, KP156891391 * Tu); + TJ = TH - TI; + TM = KP1_732050807 * (TI + TH); + } + Ci[WS(csi, 5)] = FMA(KP2_000000000, TE, Tn); + Ci[WS(csi, 1)] = FMA(KP2_000000000, TJ, TG); + TK = TG - TJ; + Ci[WS(csi, 4)] = TF - TK; + Ci[WS(csi, 3)] = TF + TK; + TL = Tn - TE; + Ci[WS(csi, 2)] = TL - TM; + Ci[WS(csi, 6)] = TL + TM; + } + { + E TZ, T1b, T19, T1e, T16, T1a, TV, TY, T1c, T1d; + TV = FNMS(KP132983124, TU, KP258260390 * TR); + TY = KP300462606 * (TW - TX); + TZ = FMA(KP2_000000000, TV, TY); + T1b = TY - TV; + { + E T17, T18, T12, T15; + T17 = FMA(KP387390585, TU, KP265966249 * TR); + T18 = FNMS(KP503537032, T11, KP113854479 * T10); + T19 = T17 - T18; + T1e = T17 + T18; + T12 = FMA(KP251768516, T10, KP075902986 * T11); + T15 = FNMS(KP083333333, T14, T13); + T16 = FMA(KP2_000000000, T12, T15); + T1a = T15 - T12; + } + Cr[WS(csr, 1)] = TZ + T16; + Cr[WS(csr, 5)] = T16 - TZ; + T1c = T1a - T1b; + Cr[WS(csr, 2)] = T19 + T1c; + Cr[WS(csr, 6)] = T1c - T19; + T1d = T1b + T1a; + Cr[WS(csr, 3)] = T1d - T1e; + Cr[WS(csr, 4)] = T1e + T1d; + } + } + } +} + +static const kr2c_desc desc = { 13, "r2cf_13", {57, 15, 19, 0}, &GENUS }; + +void X(codelet_r2cf_13) (planner *p) { + X(kr2c_register) (p, r2cf_13, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_14.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_14.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:07 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 14 -name r2cf_14 -include r2cf.h */ + +/* + * This function contains 62 FP additions, 36 FP multiplications, + * (or, 32 additions, 6 multiplications, 30 fused multiply/add), + * 45 stack variables, 6 constants, and 28 memory accesses + */ +#include "r2cf.h" + +static void r2cf_14(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP900968867, +0.900968867902419126236102319507445051165919162); + DK(KP692021471, +0.692021471630095869627814897002069140197260599); + DK(KP801937735, +0.801937735804838252472204639014890102331838324); + DK(KP974927912, +0.974927912181823607018131682993931217232785801); + DK(KP356895867, +0.356895867892209443894399510021300583399127187); + DK(KP554958132, +0.554958132087371191422194871006410481067288862); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(56, rs), MAKE_VOLATILE_STRIDE(56, csr), MAKE_VOLATILE_STRIDE(56, csi)) { + E TN, T3, TG, TQ, Tx, To, TH, Td, TD, TO, Tw, Ta, TL, Ty, TT; + E TI, Tg, Tr, Te, Tf, TP, TJ; + { + E Tl, TE, Tk, Tm; + { + E T1, T2, Ti, Tj; + T1 = R0[0]; + T2 = R1[WS(rs, 3)]; + Ti = R0[WS(rs, 3)]; + Tj = R1[WS(rs, 6)]; + Tl = R0[WS(rs, 4)]; + TN = T1 + T2; + T3 = T1 - T2; + TE = Ti + Tj; + Tk = Ti - Tj; + Tm = R1[0]; + } + { + E T7, TC, T6, T8; + { + E T4, T5, TF, Tn; + T4 = R0[WS(rs, 1)]; + T5 = R1[WS(rs, 4)]; + T7 = R0[WS(rs, 6)]; + TF = Tl + Tm; + Tn = Tl - Tm; + TC = T4 + T5; + T6 = T4 - T5; + TG = TE - TF; + TQ = TE + TF; + Tx = Tn - Tk; + To = Tk + Tn; + T8 = R1[WS(rs, 2)]; + } + { + E Tb, Tc, TB, T9; + Tb = R0[WS(rs, 2)]; + Tc = R1[WS(rs, 5)]; + Te = R0[WS(rs, 5)]; + TB = T7 + T8; + T9 = T7 - T8; + TH = Tb + Tc; + Td = Tb - Tc; + TD = TB - TC; + TO = TC + TB; + Tw = T6 - T9; + Ta = T6 + T9; + Tf = R1[WS(rs, 1)]; + } + } + } + TL = FNMS(KP554958132, TG, TD); + Ty = FNMS(KP554958132, Tx, Tw); + TT = FNMS(KP356895867, TO, TQ); + TI = Te + Tf; + Tg = Te - Tf; + Tr = FNMS(KP356895867, Ta, To); + TP = TH + TI; + TJ = TH - TI; + { + E Th, Tv, TK, TM; + Th = Td + Tg; + Tv = Tg - Td; + TK = FMA(KP554958132, TJ, TG); + TM = FMA(KP554958132, TD, TJ); + Ci[WS(csi, 6)] = KP974927912 * (FNMS(KP801937735, TL, TJ)); + { + E TR, TV, TU, Tz; + TR = FNMS(KP356895867, TQ, TP); + TV = FNMS(KP356895867, TP, TO); + TU = FNMS(KP692021471, TT, TP); + Cr[0] = TN + TO + TP + TQ; + Tz = FMA(KP554958132, Tv, Tx); + Ci[WS(csi, 1)] = KP974927912 * (FNMS(KP801937735, Ty, Tv)); + { + E TA, Ts, Tt, Tp; + TA = FMA(KP554958132, Tw, Tv); + Ts = FNMS(KP692021471, Tr, Th); + Tt = FNMS(KP356895867, Th, Ta); + Tp = FNMS(KP356895867, To, Th); + Cr[WS(csr, 7)] = T3 + Ta + Th + To; + Ci[WS(csi, 2)] = KP974927912 * (FMA(KP801937735, TK, TD)); + Ci[WS(csi, 4)] = KP974927912 * (FNMS(KP801937735, TM, TG)); + { + E TS, TW, Tu, Tq; + TS = FNMS(KP692021471, TR, TO); + TW = FNMS(KP692021471, TV, TQ); + Cr[WS(csr, 2)] = FNMS(KP900968867, TU, TN); + Ci[WS(csi, 5)] = KP974927912 * (FMA(KP801937735, Tz, Tw)); + Ci[WS(csi, 3)] = KP974927912 * (FNMS(KP801937735, TA, Tx)); + Cr[WS(csr, 5)] = FNMS(KP900968867, Ts, T3); + Tu = FNMS(KP692021471, Tt, To); + Tq = FNMS(KP692021471, Tp, Ta); + Cr[WS(csr, 4)] = FNMS(KP900968867, TS, TN); + Cr[WS(csr, 6)] = FNMS(KP900968867, TW, TN); + Cr[WS(csr, 1)] = FNMS(KP900968867, Tu, T3); + Cr[WS(csr, 3)] = FNMS(KP900968867, Tq, T3); + } + } + } + } + } + } +} + +static const kr2c_desc desc = { 14, "r2cf_14", {32, 6, 30, 0}, &GENUS }; + +void X(codelet_r2cf_14) (planner *p) { + X(kr2c_register) (p, r2cf_14, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 14 -name r2cf_14 -include r2cf.h */ + +/* + * This function contains 62 FP additions, 36 FP multiplications, + * (or, 38 additions, 12 multiplications, 24 fused multiply/add), + * 29 stack variables, 6 constants, and 28 memory accesses + */ +#include "r2cf.h" + +static void r2cf_14(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP900968867, +0.900968867902419126236102319507445051165919162); + DK(KP222520933, +0.222520933956314404288902564496794759466355569); + DK(KP623489801, +0.623489801858733530525004884004239810632274731); + DK(KP433883739, +0.433883739117558120475768332848358754609990728); + DK(KP974927912, +0.974927912181823607018131682993931217232785801); + DK(KP781831482, +0.781831482468029808708444526674057750232334519); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(56, rs), MAKE_VOLATILE_STRIDE(56, csr), MAKE_VOLATILE_STRIDE(56, csi)) { + E T3, TB, T6, Tv, Tn, Ts, Tk, Tt, Td, Ty, T9, Tw, Tg, Tz, T1; + E T2; + T1 = R0[0]; + T2 = R1[WS(rs, 3)]; + T3 = T1 - T2; + TB = T1 + T2; + { + E T4, T5, Tl, Tm; + T4 = R0[WS(rs, 2)]; + T5 = R1[WS(rs, 5)]; + T6 = T4 - T5; + Tv = T4 + T5; + Tl = R0[WS(rs, 6)]; + Tm = R1[WS(rs, 2)]; + Tn = Tl - Tm; + Ts = Tl + Tm; + } + { + E Ti, Tj, Tb, Tc; + Ti = R0[WS(rs, 1)]; + Tj = R1[WS(rs, 4)]; + Tk = Ti - Tj; + Tt = Ti + Tj; + Tb = R0[WS(rs, 3)]; + Tc = R1[WS(rs, 6)]; + Td = Tb - Tc; + Ty = Tb + Tc; + } + { + E T7, T8, Te, Tf; + T7 = R0[WS(rs, 5)]; + T8 = R1[WS(rs, 1)]; + T9 = T7 - T8; + Tw = T7 + T8; + Te = R0[WS(rs, 4)]; + Tf = R1[0]; + Tg = Te - Tf; + Tz = Te + Tf; + } + { + E Tp, Tr, Tq, Ta, To, Th; + Tp = Tn - Tk; + Tr = Tg - Td; + Tq = T9 - T6; + Ci[WS(csi, 1)] = FMA(KP781831482, Tp, KP974927912 * Tq) + (KP433883739 * Tr); + Ci[WS(csi, 5)] = FMA(KP433883739, Tq, KP781831482 * Tr) - (KP974927912 * Tp); + Ci[WS(csi, 3)] = FMA(KP433883739, Tp, KP974927912 * Tr) - (KP781831482 * Tq); + Ta = T6 + T9; + To = Tk + Tn; + Th = Td + Tg; + Cr[WS(csr, 3)] = FMA(KP623489801, Ta, T3) + FNMA(KP222520933, Th, KP900968867 * To); + Cr[WS(csr, 7)] = T3 + To + Ta + Th; + Cr[WS(csr, 1)] = FMA(KP623489801, To, T3) + FNMA(KP900968867, Th, KP222520933 * Ta); + Cr[WS(csr, 5)] = FMA(KP623489801, Th, T3) + FNMA(KP900968867, Ta, KP222520933 * To); + } + { + E Tu, TA, Tx, TC, TE, TD; + Tu = Ts - Tt; + TA = Ty - Tz; + Tx = Tv - Tw; + Ci[WS(csi, 2)] = FMA(KP974927912, Tu, KP433883739 * Tx) + (KP781831482 * TA); + Ci[WS(csi, 6)] = FMA(KP974927912, Tx, KP433883739 * TA) - (KP781831482 * Tu); + Ci[WS(csi, 4)] = FNMS(KP781831482, Tx, KP974927912 * TA) - (KP433883739 * Tu); + TC = Tt + Ts; + TE = Tv + Tw; + TD = Ty + Tz; + Cr[WS(csr, 6)] = FMA(KP623489801, TC, TB) + FNMA(KP900968867, TD, KP222520933 * TE); + Cr[WS(csr, 2)] = FMA(KP623489801, TD, TB) + FNMA(KP900968867, TE, KP222520933 * TC); + Cr[WS(csr, 4)] = FMA(KP623489801, TE, TB) + FNMA(KP222520933, TD, KP900968867 * TC); + Cr[0] = TB + TC + TE + TD; + } + } + } +} + +static const kr2c_desc desc = { 14, "r2cf_14", {38, 12, 24, 0}, &GENUS }; + +void X(codelet_r2cf_14) (planner *p) { + X(kr2c_register) (p, r2cf_14, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_15.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_15.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,303 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:07 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 15 -name r2cf_15 -include r2cf.h */ + +/* + * This function contains 64 FP additions, 35 FP multiplications, + * (or, 36 additions, 7 multiplications, 28 fused multiply/add), + * 50 stack variables, 8 constants, and 30 memory accesses + */ +#include "r2cf.h" + +static void r2cf_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP910592997, +0.910592997310029334643087372129977886038870291); + DK(KP823639103, +0.823639103546331925877420039278190003029660514); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) { + E Tw, Tz, Tp, Ty; + { + E Ti, TF, TR, TN, TX, T11, TM, TS, Tl, TH, Tf, To, TT, TD, Tg; + E Th; + TD = R0[0]; + Tg = R0[WS(rs, 5)]; + Th = R1[WS(rs, 2)]; + { + E Tj, Tq, Tt, Tm, T3, Tk, T4, Ta, Tr, Td, Tu, T5, TE; + Tj = R1[WS(rs, 1)]; + Tq = R0[WS(rs, 3)]; + Tt = R1[WS(rs, 4)]; + TE = Th + Tg; + Ti = Tg - Th; + Tm = R0[WS(rs, 6)]; + { + E T8, T9, T1, T2, Tb, Tc; + T1 = R0[WS(rs, 4)]; + T2 = R1[WS(rs, 6)]; + TF = FNMS(KP500000000, TE, TD); + TR = TD + TE; + T8 = R1[WS(rs, 5)]; + T3 = T1 - T2; + Tk = T1 + T2; + T9 = R1[0]; + Tb = R0[WS(rs, 7)]; + Tc = R0[WS(rs, 2)]; + T4 = R0[WS(rs, 1)]; + Ta = T8 - T9; + Tr = T8 + T9; + Td = Tb - Tc; + Tu = Tb + Tc; + T5 = R1[WS(rs, 3)]; + } + { + E Ts, Tv, Te, Tn, T7, T6, TV, TW; + TV = Tq + Tr; + Ts = FNMS(KP500000000, Tr, Tq); + Tv = FNMS(KP500000000, Tu, Tt); + TW = Tt + Tu; + Te = Ta + Td; + TN = Td - Ta; + Tn = T4 + T5; + T6 = T4 - T5; + TX = TV + TW; + T11 = TW - TV; + TM = T6 - T3; + T7 = T3 + T6; + TS = Tj + Tk; + Tl = FNMS(KP500000000, Tk, Tj); + TH = Ts + Tv; + Tw = Ts - Tv; + Tz = Te - T7; + Tf = T7 + Te; + To = FNMS(KP500000000, Tn, Tm); + TT = Tm + Tn; + } + } + { + E TO, TQ, TU, T12, TK, TI, TG; + Ci[WS(csi, 5)] = KP866025403 * (Tf - Ti); + TG = Tl + To; + Tp = Tl - To; + TO = FMA(KP618033988, TN, TM); + TQ = FNMS(KP618033988, TM, TN); + TU = TS + TT; + T12 = TS - TT; + TK = TG - TH; + TI = TG + TH; + { + E T10, TY, TL, TP, TJ, TZ; + T10 = TU - TX; + TY = TU + TX; + Cr[WS(csr, 5)] = TF + TI; + TJ = FNMS(KP250000000, TI, TF); + Ci[WS(csi, 6)] = -(KP951056516 * (FNMS(KP618033988, T11, T12))); + Ci[WS(csi, 3)] = KP951056516 * (FMA(KP618033988, T12, T11)); + TL = FMA(KP559016994, TK, TJ); + TP = FNMS(KP559016994, TK, TJ); + Cr[0] = TR + TY; + TZ = FNMS(KP250000000, TY, TR); + Cr[WS(csr, 4)] = FNMS(KP823639103, TO, TL); + Cr[WS(csr, 1)] = FMA(KP823639103, TO, TL); + Cr[WS(csr, 7)] = FNMS(KP823639103, TQ, TP); + Cr[WS(csr, 2)] = FMA(KP823639103, TQ, TP); + Cr[WS(csr, 6)] = FMA(KP559016994, T10, TZ); + Cr[WS(csr, 3)] = FNMS(KP559016994, T10, TZ); + Ty = FMA(KP250000000, Tf, Ti); + } + } + } + { + E TB, Tx, TC, TA; + TB = FNMS(KP618033988, Tp, Tw); + Tx = FMA(KP618033988, Tw, Tp); + TC = FNMS(KP559016994, Tz, Ty); + TA = FMA(KP559016994, Tz, Ty); + Ci[WS(csi, 2)] = KP951056516 * (FNMS(KP910592997, TC, TB)); + Ci[WS(csi, 7)] = KP951056516 * (FMA(KP910592997, TC, TB)); + Ci[WS(csi, 4)] = KP951056516 * (FMA(KP910592997, TA, Tx)); + Ci[WS(csi, 1)] = -(KP951056516 * (FNMS(KP910592997, TA, Tx))); + } + } + } +} + +static const kr2c_desc desc = { 15, "r2cf_15", {36, 7, 28, 0}, &GENUS }; + +void X(codelet_r2cf_15) (planner *p) { + X(kr2c_register) (p, r2cf_15, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 15 -name r2cf_15 -include r2cf.h */ + +/* + * This function contains 64 FP additions, 25 FP multiplications, + * (or, 50 additions, 11 multiplications, 14 fused multiply/add), + * 47 stack variables, 10 constants, and 30 memory accesses + */ +#include "r2cf.h" + +static void r2cf_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP484122918, +0.484122918275927110647408174972799951354115213); + DK(KP216506350, +0.216506350946109661690930792688234045867850657); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP509036960, +0.509036960455127183450980863393907648510733164); + DK(KP823639103, +0.823639103546331925877420039278190003029660514); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) { + E Ti, TR, TL, TD, TE, T7, Te, Tf, TV, TW, TX, Tv, Ty, TH, To; + E Tr, TG, TS, TT, TU; + { + E TJ, Tg, Th, TK; + TJ = R0[0]; + Tg = R0[WS(rs, 5)]; + Th = R1[WS(rs, 2)]; + TK = Th + Tg; + Ti = Tg - Th; + TR = TJ + TK; + TL = FNMS(KP500000000, TK, TJ); + } + { + E Tm, Tt, Tw, Tp, T3, Tx, Ta, Tn, Td, Tq, T6, Tu; + Tm = R1[WS(rs, 1)]; + Tt = R0[WS(rs, 3)]; + Tw = R1[WS(rs, 4)]; + Tp = R0[WS(rs, 6)]; + { + E T1, T2, T8, T9; + T1 = R0[WS(rs, 7)]; + T2 = R0[WS(rs, 2)]; + T3 = T1 - T2; + Tx = T1 + T2; + T8 = R1[WS(rs, 6)]; + T9 = R0[WS(rs, 4)]; + Ta = T8 - T9; + Tn = T9 + T8; + } + { + E Tb, Tc, T4, T5; + Tb = R1[WS(rs, 3)]; + Tc = R0[WS(rs, 1)]; + Td = Tb - Tc; + Tq = Tc + Tb; + T4 = R1[0]; + T5 = R1[WS(rs, 5)]; + T6 = T4 - T5; + Tu = T5 + T4; + } + TD = Ta - Td; + TE = T6 + T3; + T7 = T3 - T6; + Te = Ta + Td; + Tf = T7 - Te; + TV = Tt + Tu; + TW = Tw + Tx; + TX = TV + TW; + Tv = FNMS(KP500000000, Tu, Tt); + Ty = FNMS(KP500000000, Tx, Tw); + TH = Tv + Ty; + To = FNMS(KP500000000, Tn, Tm); + Tr = FNMS(KP500000000, Tq, Tp); + TG = To + Tr; + TS = Tm + Tn; + TT = Tp + Tq; + TU = TS + TT; + } + Ci[WS(csi, 5)] = KP866025403 * (Tf - Ti); + { + E TF, TP, TI, TM, TN, TQ, TO; + TF = FMA(KP823639103, TD, KP509036960 * TE); + TP = FNMS(KP509036960, TD, KP823639103 * TE); + TI = KP559016994 * (TG - TH); + TM = TG + TH; + TN = FNMS(KP250000000, TM, TL); + Cr[WS(csr, 5)] = TL + TM; + TQ = TN - TI; + Cr[WS(csr, 2)] = TP + TQ; + Cr[WS(csr, 7)] = TQ - TP; + TO = TI + TN; + Cr[WS(csr, 1)] = TF + TO; + Cr[WS(csr, 4)] = TO - TF; + } + { + E T11, T12, T10, TY, TZ; + T11 = TS - TT; + T12 = TW - TV; + Ci[WS(csi, 3)] = FMA(KP587785252, T11, KP951056516 * T12); + Ci[WS(csi, 6)] = FNMS(KP951056516, T11, KP587785252 * T12); + T10 = KP559016994 * (TU - TX); + TY = TU + TX; + TZ = FNMS(KP250000000, TY, TR); + Cr[WS(csr, 3)] = TZ - T10; + Cr[0] = TR + TY; + Cr[WS(csr, 6)] = T10 + TZ; + { + E Tl, TB, TA, TC; + { + E Tj, Tk, Ts, Tz; + Tj = FMA(KP866025403, Ti, KP216506350 * Tf); + Tk = KP484122918 * (Te + T7); + Tl = Tj + Tk; + TB = Tk - Tj; + Ts = To - Tr; + Tz = Tv - Ty; + TA = FMA(KP951056516, Ts, KP587785252 * Tz); + TC = FNMS(KP587785252, Ts, KP951056516 * Tz); + } + Ci[WS(csi, 1)] = Tl - TA; + Ci[WS(csi, 7)] = TC - TB; + Ci[WS(csi, 4)] = Tl + TA; + Ci[WS(csi, 2)] = TB + TC; + } + } + } + } +} + +static const kr2c_desc desc = { 15, "r2cf_15", {50, 11, 14, 0}, &GENUS }; + +void X(codelet_r2cf_15) (planner *p) { + X(kr2c_register) (p, r2cf_15, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:07 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 16 -name r2cf_16 -include r2cf.h */ + +/* + * This function contains 58 FP additions, 20 FP multiplications, + * (or, 38 additions, 0 multiplications, 20 fused multiply/add), + * 38 stack variables, 3 constants, and 32 memory accesses + */ +#include "r2cf.h" + +static void r2cf_16(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(64, rs), MAKE_VOLATILE_STRIDE(64, csr), MAKE_VOLATILE_STRIDE(64, csi)) { + E TQ, TP; + { + E TB, TN, Tf, T7, Te, Tv, TO, TE, Tq, TJ, Tp, TI, TT, Ty, Tm; + E Tr, TK, Ts; + { + E TC, Ta, Td, TD; + { + E T1, T2, T4, T5; + T1 = R0[0]; + T2 = R0[WS(rs, 4)]; + T4 = R0[WS(rs, 2)]; + T5 = R0[WS(rs, 6)]; + { + E T8, T3, T6, T9, Tb, Tc; + T8 = R0[WS(rs, 1)]; + TB = T1 - T2; + T3 = T1 + T2; + TN = T4 - T5; + T6 = T4 + T5; + T9 = R0[WS(rs, 5)]; + Tb = R0[WS(rs, 7)]; + Tc = R0[WS(rs, 3)]; + Tf = T3 - T6; + T7 = T3 + T6; + TC = T8 - T9; + Ta = T8 + T9; + Td = Tb + Tc; + TD = Tb - Tc; + } + } + { + E TG, Ti, Tj, Tk, Tg, Th; + Tg = R1[0]; + Th = R1[WS(rs, 4)]; + Te = Ta + Td; + Tv = Td - Ta; + TO = TD - TC; + TE = TC + TD; + TG = Tg - Th; + Ti = Tg + Th; + Tj = R1[WS(rs, 2)]; + Tk = R1[WS(rs, 6)]; + { + E Tn, To, TH, Tl; + Tn = R1[WS(rs, 7)]; + To = R1[WS(rs, 3)]; + Tq = R1[WS(rs, 1)]; + TH = Tj - Tk; + Tl = Tj + Tk; + TJ = Tn - To; + Tp = Tn + To; + TI = FNMS(KP414213562, TH, TG); + TT = FMA(KP414213562, TG, TH); + Ty = Ti + Tl; + Tm = Ti - Tl; + Tr = R1[WS(rs, 5)]; + } + } + } + Cr[WS(csr, 4)] = T7 - Te; + TK = Tr - Tq; + Ts = Tq + Tr; + { + E Tx, TV, TF, TS, Tz, Tt, TM, TL; + Tx = T7 + Te; + TV = FNMS(KP707106781, TE, TB); + TF = FMA(KP707106781, TE, TB); + TL = FNMS(KP414213562, TK, TJ); + TS = FMA(KP414213562, TJ, TK); + Tz = Tp + Ts; + Tt = Tp - Ts; + TM = TI + TL; + TQ = TL - TI; + { + E TR, TU, TW, TA, Tw, Tu; + TP = FMA(KP707106781, TO, TN); + TR = FNMS(KP707106781, TO, TN); + TA = Ty + Tz; + Ci[WS(csi, 4)] = Tz - Ty; + Tw = Tt - Tm; + Tu = Tm + Tt; + Cr[WS(csr, 1)] = FMA(KP923879532, TM, TF); + Cr[WS(csr, 7)] = FNMS(KP923879532, TM, TF); + Cr[0] = Tx + TA; + Cr[WS(csr, 8)] = Tx - TA; + Ci[WS(csi, 6)] = FMS(KP707106781, Tw, Tv); + Ci[WS(csi, 2)] = FMA(KP707106781, Tw, Tv); + Cr[WS(csr, 2)] = FMA(KP707106781, Tu, Tf); + Cr[WS(csr, 6)] = FNMS(KP707106781, Tu, Tf); + TU = TS - TT; + TW = TT + TS; + Ci[WS(csi, 7)] = FMA(KP923879532, TU, TR); + Ci[WS(csi, 1)] = FMS(KP923879532, TU, TR); + Cr[WS(csr, 3)] = FMA(KP923879532, TW, TV); + Cr[WS(csr, 5)] = FNMS(KP923879532, TW, TV); + } + } + } + Ci[WS(csi, 5)] = FMS(KP923879532, TQ, TP); + Ci[WS(csi, 3)] = FMA(KP923879532, TQ, TP); + } + } +} + +static const kr2c_desc desc = { 16, "r2cf_16", {38, 0, 20, 0}, &GENUS }; + +void X(codelet_r2cf_16) (planner *p) { + X(kr2c_register) (p, r2cf_16, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 16 -name r2cf_16 -include r2cf.h */ + +/* + * This function contains 58 FP additions, 12 FP multiplications, + * (or, 54 additions, 8 multiplications, 4 fused multiply/add), + * 34 stack variables, 3 constants, and 32 memory accesses + */ +#include "r2cf.h" + +static void r2cf_16(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(64, rs), MAKE_VOLATILE_STRIDE(64, csr), MAKE_VOLATILE_STRIDE(64, csi)) { + E T3, T6, T7, Tz, Ti, Ta, Td, Te, TA, Th, Tq, TV, TF, TP, Tx; + E TU, TE, TM, Tg, Tf, TJ, TQ; + { + E T1, T2, T4, T5; + T1 = R0[0]; + T2 = R0[WS(rs, 4)]; + T3 = T1 + T2; + T4 = R0[WS(rs, 2)]; + T5 = R0[WS(rs, 6)]; + T6 = T4 + T5; + T7 = T3 + T6; + Tz = T1 - T2; + Ti = T4 - T5; + } + { + E T8, T9, Tb, Tc; + T8 = R0[WS(rs, 1)]; + T9 = R0[WS(rs, 5)]; + Ta = T8 + T9; + Tg = T8 - T9; + Tb = R0[WS(rs, 7)]; + Tc = R0[WS(rs, 3)]; + Td = Tb + Tc; + Tf = Tb - Tc; + } + Te = Ta + Td; + TA = KP707106781 * (Tg + Tf); + Th = KP707106781 * (Tf - Tg); + { + E Tm, TN, Tp, TO; + { + E Tk, Tl, Tn, To; + Tk = R1[WS(rs, 7)]; + Tl = R1[WS(rs, 3)]; + Tm = Tk - Tl; + TN = Tk + Tl; + Tn = R1[WS(rs, 1)]; + To = R1[WS(rs, 5)]; + Tp = Tn - To; + TO = Tn + To; + } + Tq = FNMS(KP923879532, Tp, KP382683432 * Tm); + TV = TN + TO; + TF = FMA(KP923879532, Tm, KP382683432 * Tp); + TP = TN - TO; + } + { + E Tt, TK, Tw, TL; + { + E Tr, Ts, Tu, Tv; + Tr = R1[0]; + Ts = R1[WS(rs, 4)]; + Tt = Tr - Ts; + TK = Tr + Ts; + Tu = R1[WS(rs, 2)]; + Tv = R1[WS(rs, 6)]; + Tw = Tu - Tv; + TL = Tu + Tv; + } + Tx = FMA(KP382683432, Tt, KP923879532 * Tw); + TU = TK + TL; + TE = FNMS(KP382683432, Tw, KP923879532 * Tt); + TM = TK - TL; + } + Cr[WS(csr, 4)] = T7 - Te; + Ci[WS(csi, 4)] = TV - TU; + { + E Tj, Ty, TD, TG; + Tj = Th - Ti; + Ty = Tq - Tx; + Ci[WS(csi, 1)] = Tj + Ty; + Ci[WS(csi, 7)] = Ty - Tj; + TD = Tz + TA; + TG = TE + TF; + Cr[WS(csr, 7)] = TD - TG; + Cr[WS(csr, 1)] = TD + TG; + } + { + E TB, TC, TH, TI; + TB = Tz - TA; + TC = Tx + Tq; + Cr[WS(csr, 5)] = TB - TC; + Cr[WS(csr, 3)] = TB + TC; + TH = Ti + Th; + TI = TF - TE; + Ci[WS(csi, 3)] = TH + TI; + Ci[WS(csi, 5)] = TI - TH; + } + TJ = T3 - T6; + TQ = KP707106781 * (TM + TP); + Cr[WS(csr, 6)] = TJ - TQ; + Cr[WS(csr, 2)] = TJ + TQ; + { + E TR, TS, TT, TW; + TR = Td - Ta; + TS = KP707106781 * (TP - TM); + Ci[WS(csi, 2)] = TR + TS; + Ci[WS(csi, 6)] = TS - TR; + TT = T7 + Te; + TW = TU + TV; + Cr[WS(csr, 8)] = TT - TW; + Cr[0] = TT + TW; + } + } + } +} + +static const kr2c_desc desc = { 16, "r2cf_16", {54, 8, 4, 0}, &GENUS }; + +void X(codelet_r2cf_16) (planner *p) { + X(kr2c_register) (p, r2cf_16, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:07 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 2 -name r2cf_2 -include r2cf.h */ + +/* + * This function contains 2 FP additions, 0 FP multiplications, + * (or, 2 additions, 0 multiplications, 0 fused multiply/add), + * 3 stack variables, 0 constants, and 4 memory accesses + */ +#include "r2cf.h" + +static void r2cf_2(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(8, rs), MAKE_VOLATILE_STRIDE(8, csr), MAKE_VOLATILE_STRIDE(8, csi)) { + E T1, T2; + T1 = R0[0]; + T2 = R1[0]; + Cr[0] = T1 + T2; + Cr[WS(csr, 1)] = T1 - T2; + } + } +} + +static const kr2c_desc desc = { 2, "r2cf_2", {2, 0, 0, 0}, &GENUS }; + +void X(codelet_r2cf_2) (planner *p) { + X(kr2c_register) (p, r2cf_2, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 2 -name r2cf_2 -include r2cf.h */ + +/* + * This function contains 2 FP additions, 0 FP multiplications, + * (or, 2 additions, 0 multiplications, 0 fused multiply/add), + * 3 stack variables, 0 constants, and 4 memory accesses + */ +#include "r2cf.h" + +static void r2cf_2(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(8, rs), MAKE_VOLATILE_STRIDE(8, csr), MAKE_VOLATILE_STRIDE(8, csi)) { + E T1, T2; + T1 = R0[0]; + T2 = R1[0]; + Cr[WS(csr, 1)] = T1 - T2; + Cr[0] = T1 + T2; + } + } +} + +static const kr2c_desc desc = { 2, "r2cf_2", {2, 0, 0, 0}, &GENUS }; + +void X(codelet_r2cf_2) (planner *p) { + X(kr2c_register) (p, r2cf_2, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,361 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:08 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 20 -name r2cf_20 -include r2cf.h */ + +/* + * This function contains 86 FP additions, 32 FP multiplications, + * (or, 58 additions, 4 multiplications, 28 fused multiply/add), + * 70 stack variables, 4 constants, and 40 memory accesses + */ +#include "r2cf.h" + +static void r2cf_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(80, rs), MAKE_VOLATILE_STRIDE(80, csr), MAKE_VOLATILE_STRIDE(80, csi)) { + E T1i, T1c, T1a, T1o, T1m, T1h, T1b, T13, T1j, T1n; + { + E T3, T1d, TJ, TV, T1k, T16, T19, T1l, Ty, Ti, T12, TD, T1g, TR, TX; + E TK, Tt, TU, TW, TL, TE; + { + E T1, T2, TG, TH; + T1 = R0[0]; + T2 = R0[WS(rs, 5)]; + TG = R1[WS(rs, 2)]; + TH = R1[WS(rs, 7)]; + { + E T6, To, T17, Tx, T18, TC, Tj, T9, Tp, Tu, Td, T15, Tm, Tq, Te; + E Tf; + { + E TA, TB, T7, T8; + { + E T4, TF, TI, T5, Tv, Tw; + T4 = R0[WS(rs, 2)]; + T3 = T1 - T2; + TF = T1 + T2; + T1d = TG - TH; + TI = TG + TH; + T5 = R0[WS(rs, 7)]; + Tv = R1[WS(rs, 6)]; + Tw = R1[WS(rs, 1)]; + TJ = TF - TI; + TV = TF + TI; + T6 = T4 - T5; + To = T4 + T5; + T17 = Tw - Tv; + Tx = Tv + Tw; + } + TA = R1[WS(rs, 8)]; + TB = R1[WS(rs, 3)]; + T7 = R0[WS(rs, 8)]; + T8 = R0[WS(rs, 3)]; + { + E Tb, Tc, Tk, Tl; + Tb = R0[WS(rs, 4)]; + T18 = TB - TA; + TC = TA + TB; + Tj = T7 + T8; + T9 = T7 - T8; + Tc = R0[WS(rs, 9)]; + Tk = R1[0]; + Tl = R1[WS(rs, 5)]; + Tp = R1[WS(rs, 4)]; + Tu = Tb + Tc; + Td = Tb - Tc; + T15 = Tl - Tk; + Tm = Tk + Tl; + Tq = R1[WS(rs, 9)]; + Te = R0[WS(rs, 6)]; + Tf = R0[WS(rs, 1)]; + } + } + { + E Ta, Tr, Tz, T1e, T1f, Th, T14, Tg, TP, TQ; + Ta = T6 + T9; + T1k = T6 - T9; + T14 = Tq - Tp; + Tr = Tp + Tq; + Tz = Te + Tf; + Tg = Te - Tf; + T16 = T14 - T15; + T1e = T14 + T15; + T1f = T17 + T18; + T19 = T17 - T18; + Th = Td + Tg; + T1l = Td - Tg; + Ty = Tu - Tx; + TP = Tu + Tx; + Ti = Ta + Th; + T12 = Ta - Th; + TD = Tz - TC; + TQ = Tz + TC; + T1g = T1e + T1f; + T1i = T1e - T1f; + { + E TT, Tn, Ts, TS; + TT = Tj + Tm; + Tn = Tj - Tm; + Ts = To - Tr; + TS = To + Tr; + TR = TP - TQ; + TX = TP + TQ; + TK = Ts + Tn; + Tt = Tn - Ts; + TU = TS - TT; + TW = TS + TT; + } + } + } + } + Cr[WS(csr, 5)] = T3 + Ti; + Ci[WS(csi, 5)] = T1g - T1d; + TL = Ty + TD; + TE = Ty - TD; + { + E TY, T10, TM, TO, T11, TZ, TN; + TY = TW + TX; + T10 = TW - TX; + Ci[WS(csi, 2)] = KP951056516 * (FMA(KP618033988, Tt, TE)); + Ci[WS(csi, 6)] = KP951056516 * (FNMS(KP618033988, TE, Tt)); + Ci[WS(csi, 4)] = KP951056516 * (FMA(KP618033988, TR, TU)); + Ci[WS(csi, 8)] = -(KP951056516 * (FNMS(KP618033988, TU, TR))); + TM = TK + TL; + TO = TK - TL; + T1c = FNMS(KP618033988, T16, T19); + T1a = FMA(KP618033988, T19, T16); + Cr[0] = TV + TY; + TZ = FNMS(KP250000000, TY, TV); + Cr[WS(csr, 10)] = TJ + TM; + TN = FNMS(KP250000000, TM, TJ); + Cr[WS(csr, 8)] = FNMS(KP559016994, T10, TZ); + Cr[WS(csr, 4)] = FMA(KP559016994, T10, TZ); + Cr[WS(csr, 6)] = FMA(KP559016994, TO, TN); + Cr[WS(csr, 2)] = FNMS(KP559016994, TO, TN); + T11 = FNMS(KP250000000, Ti, T3); + T1o = FNMS(KP618033988, T1k, T1l); + T1m = FMA(KP618033988, T1l, T1k); + T1h = FMA(KP250000000, T1g, T1d); + T1b = FNMS(KP559016994, T12, T11); + T13 = FMA(KP559016994, T12, T11); + } + } + Cr[WS(csr, 3)] = FNMS(KP951056516, T1c, T1b); + Cr[WS(csr, 7)] = FMA(KP951056516, T1c, T1b); + Cr[WS(csr, 1)] = FMA(KP951056516, T1a, T13); + Cr[WS(csr, 9)] = FNMS(KP951056516, T1a, T13); + T1j = FNMS(KP559016994, T1i, T1h); + T1n = FMA(KP559016994, T1i, T1h); + Ci[WS(csi, 3)] = FNMS(KP951056516, T1o, T1n); + Ci[WS(csi, 7)] = FMA(KP951056516, T1o, T1n); + Ci[WS(csi, 9)] = FMS(KP951056516, T1m, T1j); + Ci[WS(csi, 1)] = -(FMA(KP951056516, T1m, T1j)); + } + } +} + +static const kr2c_desc desc = { 20, "r2cf_20", {58, 4, 28, 0}, &GENUS }; + +void X(codelet_r2cf_20) (planner *p) { + X(kr2c_register) (p, r2cf_20, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 20 -name r2cf_20 -include r2cf.h */ + +/* + * This function contains 86 FP additions, 24 FP multiplications, + * (or, 74 additions, 12 multiplications, 12 fused multiply/add), + * 51 stack variables, 4 constants, and 40 memory accesses + */ +#include "r2cf.h" + +static void r2cf_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(80, rs), MAKE_VOLATILE_STRIDE(80, csr), MAKE_VOLATILE_STRIDE(80, csi)) { + E T3, T1m, TF, T17, Ts, TM, TN, Tz, Ta, Th, Ti, T1g, T1h, T1k, T10; + E T13, T19, TG, TH, TI, T1d, T1e, T1j, TT, TW, T18; + { + E T1, T2, T15, TD, TE, T16; + T1 = R0[0]; + T2 = R0[WS(rs, 5)]; + T15 = T1 + T2; + TD = R1[WS(rs, 7)]; + TE = R1[WS(rs, 2)]; + T16 = TE + TD; + T3 = T1 - T2; + T1m = T15 + T16; + TF = TD - TE; + T17 = T15 - T16; + } + { + E T6, TU, Tv, T12, Ty, TZ, T9, TR, Td, TY, To, TS, Tr, TV, Tg; + E T11; + { + E T4, T5, Tt, Tu; + T4 = R0[WS(rs, 2)]; + T5 = R0[WS(rs, 7)]; + T6 = T4 - T5; + TU = T4 + T5; + Tt = R1[WS(rs, 8)]; + Tu = R1[WS(rs, 3)]; + Tv = Tt - Tu; + T12 = Tt + Tu; + } + { + E Tw, Tx, T7, T8; + Tw = R1[WS(rs, 6)]; + Tx = R1[WS(rs, 1)]; + Ty = Tw - Tx; + TZ = Tw + Tx; + T7 = R0[WS(rs, 8)]; + T8 = R0[WS(rs, 3)]; + T9 = T7 - T8; + TR = T7 + T8; + } + { + E Tb, Tc, Tm, Tn; + Tb = R0[WS(rs, 4)]; + Tc = R0[WS(rs, 9)]; + Td = Tb - Tc; + TY = Tb + Tc; + Tm = R1[0]; + Tn = R1[WS(rs, 5)]; + To = Tm - Tn; + TS = Tm + Tn; + } + { + E Tp, Tq, Te, Tf; + Tp = R1[WS(rs, 4)]; + Tq = R1[WS(rs, 9)]; + Tr = Tp - Tq; + TV = Tp + Tq; + Te = R0[WS(rs, 6)]; + Tf = R0[WS(rs, 1)]; + Tg = Te - Tf; + T11 = Te + Tf; + } + Ts = To - Tr; + TM = T6 - T9; + TN = Td - Tg; + Tz = Tv - Ty; + Ta = T6 + T9; + Th = Td + Tg; + Ti = Ta + Th; + T1g = TY + TZ; + T1h = T11 + T12; + T1k = T1g + T1h; + T10 = TY - TZ; + T13 = T11 - T12; + T19 = T10 + T13; + TG = Tr + To; + TH = Ty + Tv; + TI = TG + TH; + T1d = TU + TV; + T1e = TR + TS; + T1j = T1d + T1e; + TT = TR - TS; + TW = TU - TV; + T18 = TW + TT; + } + Cr[WS(csr, 5)] = T3 + Ti; + Ci[WS(csi, 5)] = TF - TI; + { + E TX, T14, T1f, T1i; + TX = TT - TW; + T14 = T10 - T13; + Ci[WS(csi, 6)] = FNMS(KP587785252, T14, KP951056516 * TX); + Ci[WS(csi, 2)] = FMA(KP587785252, TX, KP951056516 * T14); + T1f = T1d - T1e; + T1i = T1g - T1h; + Ci[WS(csi, 8)] = FNMS(KP951056516, T1i, KP587785252 * T1f); + Ci[WS(csi, 4)] = FMA(KP951056516, T1f, KP587785252 * T1i); + } + { + E T1l, T1n, T1o, T1c, T1a, T1b; + T1l = KP559016994 * (T1j - T1k); + T1n = T1j + T1k; + T1o = FNMS(KP250000000, T1n, T1m); + Cr[WS(csr, 4)] = T1l + T1o; + Cr[0] = T1m + T1n; + Cr[WS(csr, 8)] = T1o - T1l; + T1c = KP559016994 * (T18 - T19); + T1a = T18 + T19; + T1b = FNMS(KP250000000, T1a, T17); + Cr[WS(csr, 2)] = T1b - T1c; + Cr[WS(csr, 10)] = T17 + T1a; + Cr[WS(csr, 6)] = T1c + T1b; + } + { + E TA, TC, Tl, TB, Tj, Tk; + TA = FMA(KP951056516, Ts, KP587785252 * Tz); + TC = FNMS(KP587785252, Ts, KP951056516 * Tz); + Tj = KP559016994 * (Ta - Th); + Tk = FNMS(KP250000000, Ti, T3); + Tl = Tj + Tk; + TB = Tk - Tj; + Cr[WS(csr, 9)] = Tl - TA; + Cr[WS(csr, 7)] = TB + TC; + Cr[WS(csr, 1)] = Tl + TA; + Cr[WS(csr, 3)] = TB - TC; + } + { + E TO, TQ, TL, TP, TJ, TK; + TO = FMA(KP951056516, TM, KP587785252 * TN); + TQ = FNMS(KP587785252, TM, KP951056516 * TN); + TJ = FMA(KP250000000, TI, TF); + TK = KP559016994 * (TH - TG); + TL = TJ + TK; + TP = TK - TJ; + Ci[WS(csi, 1)] = TL - TO; + Ci[WS(csi, 7)] = TQ + TP; + Ci[WS(csi, 9)] = TO + TL; + Ci[WS(csi, 3)] = TP - TQ; + } + } + } +} + +static const kr2c_desc desc = { 20, "r2cf_20", {74, 12, 12, 0}, &GENUS }; + +void X(codelet_r2cf_20) (planner *p) { + X(kr2c_register) (p, r2cf_20, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_25.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_25.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,730 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:08 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 25 -name r2cf_25 -include r2cf.h */ + +/* + * This function contains 200 FP additions, 168 FP multiplications, + * (or, 44 additions, 12 multiplications, 156 fused multiply/add), + * 157 stack variables, 66 constants, and 50 memory accesses + */ +#include "r2cf.h" + +static void r2cf_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP792626838, +0.792626838241819413632131824093538848057784557); + DK(KP876091699, +0.876091699473550838204498029706869638173524346); + DK(KP809385824, +0.809385824416008241660603814668679683846476688); + DK(KP860541664, +0.860541664367944677098261680920518816412804187); + DK(KP681693190, +0.681693190061530575150324149145440022633095390); + DK(KP560319534, +0.560319534973832390111614715371676131169633784); + DK(KP997675361, +0.997675361079556513670859573984492383596555031); + DK(KP237294955, +0.237294955877110315393888866460840817927895961); + DK(KP897376177, +0.897376177523557693138608077137219684419427330); + DK(KP923225144, +0.923225144846402650453449441572664695995209956); + DK(KP956723877, +0.956723877038460305821989399535483155872969262); + DK(KP949179823, +0.949179823508441261575555465843363271711583843); + DK(KP669429328, +0.669429328479476605641803240971985825917022098); + DK(KP570584518, +0.570584518783621657366766175430996792655723863); + DK(KP262346850, +0.262346850930607871785420028382979691334784273); + DK(KP876306680, +0.876306680043863587308115903922062583399064238); + DK(KP906616052, +0.906616052148196230441134447086066874408359177); + DK(KP683113946, +0.683113946453479238701949862233725244439656928); + DK(KP559154169, +0.559154169276087864842202529084232643714075927); + DK(KP921078979, +0.921078979742360627699756128143719920817673854); + DK(KP904508497, +0.904508497187473712051146708591409529430077295); + DK(KP999754674, +0.999754674276473633366203429228112409535557487); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP242145790, +0.242145790282157779872542093866183953459003101); + DK(KP904730450, +0.904730450839922351881287709692877908104763647); + DK(KP845997307, +0.845997307939530944175097360758058292389769300); + DK(KP855719849, +0.855719849902058969314654733608091555096772472); + DK(KP982009705, +0.982009705009746369461829878184175962711969869); + DK(KP916574801, +0.916574801383451584742370439148878693530976769); + DK(KP690983005, +0.690983005625052575897706582817180941139845410); + DK(KP952936919, +0.952936919628306576880750665357914584765951388); + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP831864738, +0.831864738706457140726048799369896829771167132); + DK(KP803003575, +0.803003575438660414833440593570376004635464850); + DK(KP522616830, +0.522616830205754336872861364785224694908468440); + DK(KP829049696, +0.829049696159252993975487806364305442437946767); + DK(KP999544308, +0.999544308746292983948881682379742149196758193); + DK(KP772036680, +0.772036680810363904029489473607579825330539880); + DK(KP763932022, +0.763932022500210303590826331268723764559381640); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP447417479, +0.447417479732227551498980015410057305749330693); + DK(KP734762448, +0.734762448793050413546343770063151342619912334); + DK(KP894834959, +0.894834959464455102997960030820114611498661386); + DK(KP867381224, +0.867381224396525206773171885031575671309956167); + DK(KP958953096, +0.958953096729998668045963838399037225970891871); + DK(KP912575812, +0.912575812670962425556968549836277086778922727); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + DK(KP244189809, +0.244189809627953270309879511234821255780225091); + DK(KP269969613, +0.269969613759572083574752974412347470060951301); + DK(KP522847744, +0.522847744331509716623755382187077770911012542); + DK(KP578046249, +0.578046249379945007321754579646815604023525655); + DK(KP603558818, +0.603558818296015001454675132653458027918768137); + DK(KP667278218, +0.667278218140296670899089292254759909713898805); + DK(KP447533225, +0.447533225982656890041886979663652563063114397); + DK(KP494780565, +0.494780565770515410344588413655324772219443730); + DK(KP987388751, +0.987388751065621252324603216482382109400433949); + DK(KP893101515, +0.893101515366181661711202267938416198338079437); + DK(KP132830569, +0.132830569247582714407653942074819768844536507); + DK(KP120146378, +0.120146378570687701782758537356596213647956445); + DK(KP059835404, +0.059835404262124915169548397419498386427871950); + DK(KP066152395, +0.066152395967733048213034281011006031460903353); + DK(KP786782374, +0.786782374965295178365099601674911834788448471); + DK(KP869845200, +0.869845200362138853122720822420327157933056305); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(100, rs), MAKE_VOLATILE_STRIDE(100, csr), MAKE_VOLATILE_STRIDE(100, csi)) { + E T2H, T2w, T2x, T2A, T2C, T2v, T2M, T2y, T2B, T2N; + { + E T2u, TJ, T1O, T39, T2t, TB, T21, T1M, T2e, T26, T1B, T1r, T1k, T1c, T9; + E T1X, T1R, T2k, T29, T1z, T1v, T1h, TX, Ti, T13, T2a, T2j, T1U, T1Y, TQ; + E T1g, T1u, T1y, T12, Ts, T11, T1I; + { + E Tt, Tw, T16, Tx, Ty; + { + E T2p, TG, TH, TD, TE, TI, T2r; + T2p = R0[0]; + TG = R0[WS(rs, 5)]; + TH = R1[WS(rs, 7)]; + TD = R1[WS(rs, 2)]; + TE = R0[WS(rs, 10)]; + Tt = R1[WS(rs, 1)]; + TI = TG - TH; + T2r = TG + TH; + { + E TF, T2q, Tu, Tv, T2s; + TF = TD - TE; + T2q = TD + TE; + Tu = R0[WS(rs, 4)]; + Tv = R1[WS(rs, 11)]; + T2u = T2q - T2r; + T2s = T2q + T2r; + TJ = FMA(KP618033988, TI, TF); + T1O = FNMS(KP618033988, TF, TI); + T39 = T2p + T2s; + T2t = FNMS(KP250000000, T2s, T2p); + Tw = Tu + Tv; + T16 = Tv - Tu; + Tx = R1[WS(rs, 6)]; + Ty = R0[WS(rs, 9)]; + } + } + { + E T1P, TW, TS, TR; + { + E T1, T5, T1L, T18, T1a, TA, T4, TU, T6, T19; + T1 = R0[WS(rs, 2)]; + { + E T2, T17, Tz, T3; + T2 = R1[WS(rs, 4)]; + T17 = Tx - Ty; + Tz = Tx + Ty; + T3 = R0[WS(rs, 12)]; + T5 = R0[WS(rs, 7)]; + T1L = FMA(KP618033988, T16, T17); + T18 = FNMS(KP618033988, T17, T16); + T1a = Tz - Tw; + TA = Tw + Tz; + T4 = T2 + T3; + TU = T3 - T2; + T6 = R1[WS(rs, 9)]; + } + TB = Tt + TA; + T19 = FNMS(KP250000000, TA, Tt); + { + E T7, TV, T1b, T1K, T8; + T7 = T5 + T6; + TV = T5 - T6; + T1b = FNMS(KP559016994, T1a, T19); + T1K = FMA(KP559016994, T1a, T19); + T1P = FMA(KP618033988, TU, TV); + TW = FNMS(KP618033988, TV, TU); + TS = T4 - T7; + T8 = T4 + T7; + T21 = FMA(KP869845200, T1K, T1L); + T1M = FNMS(KP786782374, T1L, T1K); + T2e = FMA(KP066152395, T1K, T1L); + T26 = FNMS(KP059835404, T1L, T1K); + T1B = FMA(KP120146378, T18, T1b); + T1r = FNMS(KP132830569, T1b, T18); + T1k = FMA(KP893101515, T18, T1b); + T1c = FNMS(KP987388751, T1b, T18); + T9 = T1 + T8; + TR = FMS(KP250000000, T8, T1); + } + } + { + E Ta, Te, TK, Td, Tf; + Ta = R1[0]; + { + E Tb, Tc, T1Q, TT; + Tb = R0[WS(rs, 3)]; + Tc = R1[WS(rs, 10)]; + T1Q = FMA(KP559016994, TS, TR); + TT = FNMS(KP559016994, TS, TR); + Te = R1[WS(rs, 5)]; + TK = Tb - Tc; + Td = Tb + Tc; + T1X = FNMS(KP120146378, T1P, T1Q); + T1R = FMA(KP132830569, T1Q, T1P); + T2k = FMA(KP494780565, T1Q, T1P); + T29 = FNMS(KP447533225, T1P, T1Q); + T1z = FMA(KP869845200, TT, TW); + T1v = FNMS(KP786782374, TW, TT); + T1h = FNMS(KP667278218, TT, TW); + TX = FMA(KP603558818, TW, TT); + Tf = R0[WS(rs, 8)]; + } + { + E Tk, T1S, TM, TO, Tn, TZ, TN, T10, Tq, To, Th, Tp, TP, T1T, Tr; + Tk = R0[WS(rs, 1)]; + { + E Tl, TL, Tg, Tm; + Tl = R1[WS(rs, 3)]; + TL = Tf - Te; + Tg = Te + Tf; + Tm = R0[WS(rs, 11)]; + To = R0[WS(rs, 6)]; + T1S = FMA(KP618033988, TK, TL); + TM = FNMS(KP618033988, TL, TK); + TO = Td - Tg; + Th = Td + Tg; + Tn = Tl + Tm; + TZ = Tm - Tl; + Tp = R1[WS(rs, 8)]; + } + Ti = Ta + Th; + TN = FNMS(KP250000000, Th, Ta); + T10 = Tp - To; + Tq = To + Tp; + TP = FMA(KP559016994, TO, TN); + T1T = FNMS(KP559016994, TO, TN); + Tr = Tn + Tq; + T13 = Tn - Tq; + T2a = FMA(KP578046249, T1T, T1S); + T2j = FNMS(KP522847744, T1S, T1T); + T1U = FNMS(KP987388751, T1T, T1S); + T1Y = FMA(KP893101515, T1S, T1T); + TQ = FMA(KP269969613, TP, TM); + T1g = FNMS(KP244189809, TM, TP); + T1u = FNMS(KP603558818, TM, TP); + T1y = FMA(KP667278218, TP, TM); + T12 = FMS(KP250000000, Tr, Tk); + Ts = Tk + Tr; + T11 = FMA(KP618033988, T10, TZ); + T1I = FNMS(KP618033988, TZ, T10); + } + } + } + } + { + E T2f, T27, T1j, T15, T2K, T2J, T2I, T2T, T1Z, T2X, T1N, T1V, T2W, T2U, T22; + E T1G; + { + E T3a, T3b, T20, T1J, T1C, T1s; + { + E Tj, TC, T1H, T14; + T3a = T9 + Ti; + Tj = T9 - Ti; + TC = Ts - TB; + T3b = Ts + TB; + T1H = FMA(KP559016994, T13, T12); + T14 = FNMS(KP559016994, T13, T12); + Ci[WS(csi, 10)] = KP951056516 * (FMA(KP618033988, Tj, TC)); + Ci[WS(csi, 5)] = KP951056516 * (FNMS(KP618033988, TC, Tj)); + T20 = FNMS(KP066152395, T1H, T1I); + T1J = FMA(KP059835404, T1I, T1H); + T2f = FMA(KP667278218, T1H, T1I); + T27 = FNMS(KP603558818, T1I, T1H); + T1C = FNMS(KP494780565, T14, T11); + T1s = FMA(KP447533225, T11, T14); + T1j = FNMS(KP522847744, T11, T14); + T15 = FMA(KP578046249, T14, T11); + } + { + E T1A, T1t, T1w, T3c, T3e, T1D, T1x, T3d, T1E, T1F; + T1A = FNMS(KP912575812, T1z, T1y); + T2K = FMA(KP912575812, T1z, T1y); + T2J = FNMS(KP958953096, T1s, T1r); + T1t = FMA(KP958953096, T1s, T1r); + T1w = FMA(KP912575812, T1v, T1u); + T2H = FNMS(KP912575812, T1v, T1u); + T3c = T3a + T3b; + T3e = T3a - T3b; + T2I = FMA(KP867381224, T1C, T1B); + T1D = FNMS(KP867381224, T1C, T1B); + T1x = FNMS(KP894834959, T1w, T1t); + T2T = FMA(KP734762448, T1Y, T1X); + T1Z = FNMS(KP734762448, T1Y, T1X); + T3d = FNMS(KP250000000, T3c, T39); + Cr[0] = T3c + T39; + T1E = FMA(KP447417479, T1w, T1D); + Ci[WS(csi, 4)] = KP951056516 * (FMA(KP992114701, T1x, TJ)); + Cr[WS(csr, 10)] = FNMS(KP559016994, T3e, T3d); + Cr[WS(csr, 5)] = FMA(KP559016994, T3e, T3d); + T1F = FMA(KP763932022, T1E, T1t); + T2X = FMA(KP772036680, T1M, T1J); + T1N = FNMS(KP772036680, T1M, T1J); + T1V = FMA(KP734762448, T1U, T1R); + T2W = FNMS(KP734762448, T1U, T1R); + T2U = FNMS(KP772036680, T21, T20); + T22 = FMA(KP772036680, T21, T20); + T1G = FMA(KP999544308, T1F, T1A); + } + } + { + E T1i, T1l, T2l, T2R, T2g, T2Q, T28, T32, T1f, T1n, T1p, T33, T2b; + { + E T24, TY, T1d, T1W, T23, T25, T1m, T1e; + T2w = FMA(KP829049696, T1h, T1g); + T1i = FNMS(KP829049696, T1h, T1g); + T1W = FNMS(KP992114701, T1V, T1O); + T23 = FNMS(KP522616830, T1V, T22); + Ci[WS(csi, 9)] = KP951056516 * (FNMS(KP803003575, T1G, TJ)); + T2x = FNMS(KP831864738, T1k, T1j); + T1l = FMA(KP831864738, T1k, T1j); + Ci[WS(csi, 3)] = KP998026728 * (FNMS(KP952936919, T1W, T1N)); + T24 = FMA(KP690983005, T23, T1N); + TY = FNMS(KP916574801, TX, TQ); + T2A = FMA(KP916574801, TX, TQ); + T2C = FNMS(KP831864738, T1c, T15); + T1d = FMA(KP831864738, T1c, T15); + T2l = FNMS(KP982009705, T2k, T2j); + T2R = FMA(KP982009705, T2k, T2j); + T25 = FNMS(KP855719849, T24, T1Z); + T2g = FMA(KP845997307, T2f, T2e); + T2Q = FNMS(KP845997307, T2f, T2e); + T1m = FMA(KP904730450, T1d, TY); + T1e = FNMS(KP904730450, T1d, TY); + Ci[WS(csi, 8)] = -(KP951056516 * (FNMS(KP992114701, T25, T1O))); + T28 = FNMS(KP845997307, T27, T26); + T32 = FMA(KP845997307, T27, T26); + T1f = FNMS(KP242145790, T1e, TJ); + Ci[WS(csi, 1)] = -(KP951056516 * (FMA(KP968583161, T1e, TJ))); + T1n = FNMS(KP999754674, T1m, T1l); + T1p = FNMS(KP904508497, T1m, T1i); + T33 = FMA(KP921078979, T2a, T29); + T2b = FNMS(KP921078979, T2a, T29); + } + { + E T2P, T2Z, T2V, T2O; + { + E T2d, T2n, T2i, T2Y, T2m, T2o; + T2P = FNMS(KP559016994, T2u, T2t); + T2v = FMA(KP559016994, T2u, T2t); + { + E T1o, T1q, T2h, T2c; + T1o = FNMS(KP559154169, T1n, T1i); + T1q = FMA(KP683113946, T1p, T1l); + T2h = FMA(KP906616052, T2b, T28); + T2c = FNMS(KP906616052, T2b, T28); + Ci[WS(csi, 6)] = -(KP951056516 * (FMA(KP968583161, T1o, T1f))); + Ci[WS(csi, 11)] = -(KP951056516 * (FMA(KP876306680, T1q, T1f))); + T2d = FMA(KP262346850, T2c, T1O); + Ci[WS(csi, 2)] = -(KP998026728 * (FNMS(KP952936919, T1O, T2c))); + T2n = T2g + T2h; + T2i = FMA(KP618033988, T2h, T2g); + } + T2m = FMA(KP570584518, T2l, T2i); + T2o = FNMS(KP669429328, T2n, T2l); + Ci[WS(csi, 12)] = KP951056516 * (FNMS(KP949179823, T2m, T2d)); + Ci[WS(csi, 7)] = KP951056516 * (FNMS(KP876306680, T2o, T2d)); + T2V = FMA(KP956723877, T2U, T2T); + T2Y = FMA(KP522616830, T2T, T2X); + T2Z = FNMS(KP763932022, T2Y, T2U); + } + Cr[WS(csr, 3)] = FMA(KP992114701, T2V, T2P); + { + E T30, T34, T2S, T31, T35; + T30 = FMA(KP855719849, T2Z, T2W); + T34 = FNMS(KP923225144, T2R, T2Q); + T2S = FMA(KP923225144, T2R, T2Q); + Cr[WS(csr, 8)] = FNMS(KP897376177, T30, T2P); + T31 = FNMS(KP237294955, T2S, T2P); + Cr[WS(csr, 2)] = FMA(KP949179823, T2S, T2P); + T35 = FNMS(KP997675361, T34, T33); + { + E T37, T36, T38, T2L; + T37 = FNMS(KP904508497, T34, T32); + T36 = FMA(KP560319534, T35, T32); + T38 = FNMS(KP681693190, T37, T33); + Cr[WS(csr, 12)] = FNMS(KP949179823, T36, T31); + Cr[WS(csr, 7)] = FNMS(KP860541664, T38, T31); + T2O = FNMS(KP809385824, T2K, T2I); + T2L = FNMS(KP447417479, T2K, T2J); + T2M = FNMS(KP690983005, T2L, T2I); + } + } + Cr[WS(csr, 4)] = FNMS(KP992114701, T2O, T2v); + } + } + } + } + T2y = FNMS(KP904730450, T2x, T2w); + T2B = FMA(KP904730450, T2x, T2w); + T2N = FNMS(KP999544308, T2M, T2H); + { + E T2z, T2D, T2F, T2E, T2G; + T2z = FNMS(KP242145790, T2y, T2v); + Cr[WS(csr, 1)] = FMA(KP968583161, T2y, T2v); + T2D = FMA(KP904730450, T2C, T2B); + T2F = T2A + T2B; + Cr[WS(csr, 9)] = FNMS(KP803003575, T2N, T2v); + T2E = FNMS(KP618033988, T2D, T2A); + T2G = FMA(KP683113946, T2F, T2C); + Cr[WS(csr, 6)] = FNMS(KP876091699, T2E, T2z); + Cr[WS(csr, 11)] = FNMS(KP792626838, T2G, T2z); + } + } + } +} + +static const kr2c_desc desc = { 25, "r2cf_25", {44, 12, 156, 0}, &GENUS }; + +void X(codelet_r2cf_25) (planner *p) { + X(kr2c_register) (p, r2cf_25, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 25 -name r2cf_25 -include r2cf.h */ + +/* + * This function contains 200 FP additions, 140 FP multiplications, + * (or, 117 additions, 57 multiplications, 83 fused multiply/add), + * 101 stack variables, 40 constants, and 50 memory accesses + */ +#include "r2cf.h" + +static void r2cf_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP998026728, +0.998026728428271561952336806863450553336905220); + DK(KP125581039, +0.125581039058626752152356449131262266244969664); + DK(KP1_996053456, +1.996053456856543123904673613726901106673810439); + DK(KP062790519, +0.062790519529313376076178224565631133122484832); + DK(KP809016994, +0.809016994374947424102293417182819058860154590); + DK(KP309016994, +0.309016994374947424102293417182819058860154590); + DK(KP1_369094211, +1.369094211857377347464566715242418539779038465); + DK(KP728968627, +0.728968627421411523146730319055259111372571664); + DK(KP963507348, +0.963507348203430549974383005744259307057084020); + DK(KP876306680, +0.876306680043863587308115903922062583399064238); + DK(KP497379774, +0.497379774329709576484567492012895936835134813); + DK(KP968583161, +0.968583161128631119490168375464735813836012403); + DK(KP684547105, +0.684547105928688673732283357621209269889519233); + DK(KP1_457937254, +1.457937254842823046293460638110518222745143328); + DK(KP481753674, +0.481753674101715274987191502872129653528542010); + DK(KP1_752613360, +1.752613360087727174616231807844125166798128477); + DK(KP248689887, +0.248689887164854788242283746006447968417567406); + DK(KP1_937166322, +1.937166322257262238980336750929471627672024806); + DK(KP992114701, +0.992114701314477831049793042785778521453036709); + DK(KP250666467, +0.250666467128608490746237519633017587885836494); + DK(KP425779291, +0.425779291565072648862502445744251703979973042); + DK(KP1_809654104, +1.809654104932039055427337295865395187940827822); + DK(KP1_274847979, +1.274847979497379420353425623352032390869834596); + DK(KP770513242, +0.770513242775789230803009636396177847271667672); + DK(KP844327925, +0.844327925502015078548558063966681505381659241); + DK(KP1_071653589, +1.071653589957993236542617535735279956127150691); + DK(KP125333233, +0.125333233564304245373118759816508793942918247); + DK(KP1_984229402, +1.984229402628955662099586085571557042906073418); + DK(KP904827052, +0.904827052466019527713668647932697593970413911); + DK(KP851558583, +0.851558583130145297725004891488503407959946084); + DK(KP637423989, +0.637423989748689710176712811676016195434917298); + DK(KP1_541026485, +1.541026485551578461606019272792355694543335344); + DK(KP535826794, +0.535826794978996618271308767867639978063575346); + DK(KP1_688655851, +1.688655851004030157097116127933363010763318483); + DK(KP293892626, +0.293892626146236564584352977319536384298826219); + DK(KP475528258, +0.475528258147576786058219666689691071702849317); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(100, rs), MAKE_VOLATILE_STRIDE(100, csr), MAKE_VOLATILE_STRIDE(100, csi)) { + E T8, T1j, T1V, T1l, T7, T9, Ta, T12, T2u, T1O, T19, T1P, Ti, T2r, T1K; + E Tp, T1L, Tx, T2q, T1H, TE, T1I, TN, T2t, T1R, TU, T1S, T6, T1k, T3; + E T2s, T2v; + T8 = R0[0]; + { + E T4, T5, T1, T2; + T4 = R0[WS(rs, 5)]; + T5 = R1[WS(rs, 7)]; + T6 = T4 + T5; + T1k = T4 - T5; + T1 = R1[WS(rs, 2)]; + T2 = R0[WS(rs, 10)]; + T3 = T1 + T2; + T1j = T1 - T2; + } + T1V = KP951056516 * T1k; + T1l = FMA(KP951056516, T1j, KP587785252 * T1k); + T7 = KP559016994 * (T3 - T6); + T9 = T3 + T6; + Ta = FNMS(KP250000000, T9, T8); + { + E T16, T13, T14, TY, T17, T11, T15, T18; + T16 = R1[WS(rs, 1)]; + { + E TW, TX, TZ, T10; + TW = R0[WS(rs, 4)]; + TX = R1[WS(rs, 11)]; + T13 = TW + TX; + TZ = R1[WS(rs, 6)]; + T10 = R0[WS(rs, 9)]; + T14 = TZ + T10; + TY = TW - TX; + T17 = T13 + T14; + T11 = TZ - T10; + } + T12 = FMA(KP475528258, TY, KP293892626 * T11); + T2u = T16 + T17; + T1O = FNMS(KP293892626, TY, KP475528258 * T11); + T15 = KP559016994 * (T13 - T14); + T18 = FNMS(KP250000000, T17, T16); + T19 = T15 + T18; + T1P = T18 - T15; + } + { + E Tm, Tj, Tk, Te, Tn, Th, Tl, To; + Tm = R1[0]; + { + E Tc, Td, Tf, Tg; + Tc = R0[WS(rs, 3)]; + Td = R1[WS(rs, 10)]; + Tj = Tc + Td; + Tf = R1[WS(rs, 5)]; + Tg = R0[WS(rs, 8)]; + Tk = Tf + Tg; + Te = Tc - Td; + Tn = Tj + Tk; + Th = Tf - Tg; + } + Ti = FMA(KP475528258, Te, KP293892626 * Th); + T2r = Tm + Tn; + T1K = FNMS(KP293892626, Te, KP475528258 * Th); + Tl = KP559016994 * (Tj - Tk); + To = FNMS(KP250000000, Tn, Tm); + Tp = Tl + To; + T1L = To - Tl; + } + { + E TB, Ty, Tz, Tt, TC, Tw, TA, TD; + TB = R0[WS(rs, 2)]; + { + E Tr, Ts, Tu, Tv; + Tr = R1[WS(rs, 4)]; + Ts = R0[WS(rs, 12)]; + Ty = Tr + Ts; + Tu = R0[WS(rs, 7)]; + Tv = R1[WS(rs, 9)]; + Tz = Tu + Tv; + Tt = Tr - Ts; + TC = Ty + Tz; + Tw = Tu - Tv; + } + Tx = FMA(KP475528258, Tt, KP293892626 * Tw); + T2q = TB + TC; + T1H = FNMS(KP293892626, Tt, KP475528258 * Tw); + TA = KP559016994 * (Ty - Tz); + TD = FNMS(KP250000000, TC, TB); + TE = TA + TD; + T1I = TD - TA; + } + { + E TR, TO, TP, TJ, TS, TM, TQ, TT; + TR = R0[WS(rs, 1)]; + { + E TH, TI, TK, TL; + TH = R1[WS(rs, 3)]; + TI = R0[WS(rs, 11)]; + TO = TH + TI; + TK = R0[WS(rs, 6)]; + TL = R1[WS(rs, 8)]; + TP = TK + TL; + TJ = TH - TI; + TS = TO + TP; + TM = TK - TL; + } + TN = FMA(KP475528258, TJ, KP293892626 * TM); + T2t = TR + TS; + T1R = FNMS(KP293892626, TJ, KP475528258 * TM); + TQ = KP559016994 * (TO - TP); + TT = FNMS(KP250000000, TS, TR); + TU = TQ + TT; + T1S = TT - TQ; + } + T2s = T2q - T2r; + T2v = T2t - T2u; + Ci[WS(csi, 5)] = FNMS(KP587785252, T2v, KP951056516 * T2s); + Ci[WS(csi, 10)] = FMA(KP587785252, T2s, KP951056516 * T2v); + { + E T2z, T2y, T2A, T2w, T2x, T2B; + T2z = T8 + T9; + T2w = T2r + T2q; + T2x = T2t + T2u; + T2y = KP559016994 * (T2w - T2x); + T2A = T2w + T2x; + Cr[0] = T2z + T2A; + T2B = FNMS(KP250000000, T2A, T2z); + Cr[WS(csr, 5)] = T2y + T2B; + Cr[WS(csr, 10)] = T2B - T2y; + } + { + E Tb, Tq, TF, TG, T1E, T1F, T1G, T1B, T1C, T1D, TV, T1a, T1b, T1o, T1r; + E T1s, T1z, T1x, T1e, T1h, T1i, T1u, T1t; + Tb = T7 + Ta; + Tq = FMA(KP1_688655851, Ti, KP535826794 * Tp); + TF = FMA(KP1_541026485, Tx, KP637423989 * TE); + TG = Tq - TF; + T1E = FMA(KP851558583, TN, KP904827052 * TU); + T1F = FMA(KP1_984229402, T12, KP125333233 * T19); + T1G = T1E + T1F; + T1B = FNMS(KP844327925, Tp, KP1_071653589 * Ti); + T1C = FNMS(KP1_274847979, Tx, KP770513242 * TE); + T1D = T1B + T1C; + TV = FNMS(KP425779291, TU, KP1_809654104 * TN); + T1a = FNMS(KP992114701, T19, KP250666467 * T12); + T1b = TV + T1a; + { + E T1m, T1n, T1p, T1q; + T1m = FMA(KP1_937166322, Ti, KP248689887 * Tp); + T1n = FMA(KP1_071653589, Tx, KP844327925 * TE); + T1o = T1m + T1n; + T1p = FMA(KP1_752613360, TN, KP481753674 * TU); + T1q = FMA(KP1_457937254, T12, KP684547105 * T19); + T1r = T1p + T1q; + T1s = T1o + T1r; + T1z = T1q - T1p; + T1x = T1n - T1m; + } + { + E T1c, T1d, T1f, T1g; + T1c = FNMS(KP497379774, Ti, KP968583161 * Tp); + T1d = FNMS(KP1_688655851, Tx, KP535826794 * TE); + T1e = T1c + T1d; + T1f = FNMS(KP963507348, TN, KP876306680 * TU); + T1g = FNMS(KP1_369094211, T12, KP728968627 * T19); + T1h = T1f + T1g; + T1i = T1e + T1h; + T1u = T1f - T1g; + T1t = T1d - T1c; + } + Cr[WS(csr, 1)] = Tb + T1i; + Ci[WS(csi, 1)] = -(T1l + T1s); + Cr[WS(csr, 4)] = Tb + TG + T1b; + Ci[WS(csi, 4)] = T1l + T1D - T1G; + Ci[WS(csi, 9)] = FMA(KP309016994, T1D, T1l) + FMA(KP587785252, T1a - TV, KP809016994 * T1G) - (KP951056516 * (Tq + TF)); + Cr[WS(csr, 9)] = FMA(KP309016994, TG, Tb) + FMA(KP951056516, T1B - T1C, KP587785252 * (T1F - T1E)) - (KP809016994 * T1b); + { + E T1v, T1w, T1y, T1A; + T1v = FMS(KP250000000, T1s, T1l); + T1w = KP559016994 * (T1r - T1o); + Ci[WS(csi, 11)] = FMA(KP587785252, T1t, KP951056516 * T1u) + T1v - T1w; + Ci[WS(csi, 6)] = FMA(KP951056516, T1t, T1v) + FNMS(KP587785252, T1u, T1w); + T1y = FNMS(KP250000000, T1i, Tb); + T1A = KP559016994 * (T1e - T1h); + Cr[WS(csr, 11)] = FMA(KP587785252, T1x, T1y) + FNMA(KP951056516, T1z, T1A); + Cr[WS(csr, 6)] = FMA(KP951056516, T1x, T1A) + FMA(KP587785252, T1z, T1y); + } + } + { + E T1W, T1X, T1J, T1M, T1N, T21, T22, T23, T1Q, T1T, T1U, T1Y, T1Z, T20, T26; + E T29, T2a, T2k, T2j, T2l, T2m, T2d, T2o, T2i; + T1W = FNMS(KP587785252, T1j, T1V); + T1X = Ta - T7; + T1J = FNMS(KP125333233, T1I, KP1_984229402 * T1H); + T1M = FMA(KP1_457937254, T1K, KP684547105 * T1L); + T1N = T1J - T1M; + T21 = FNMS(KP1_996053456, T1R, KP062790519 * T1S); + T22 = FMA(KP1_541026485, T1O, KP637423989 * T1P); + T23 = T21 - T22; + T1Q = FNMS(KP770513242, T1P, KP1_274847979 * T1O); + T1T = FMA(KP125581039, T1R, KP998026728 * T1S); + T1U = T1Q - T1T; + T1Y = FNMS(KP1_369094211, T1K, KP728968627 * T1L); + T1Z = FMA(KP250666467, T1H, KP992114701 * T1I); + T20 = T1Y - T1Z; + { + E T24, T25, T27, T28; + T24 = FNMS(KP481753674, T1L, KP1_752613360 * T1K); + T25 = FMA(KP851558583, T1H, KP904827052 * T1I); + T26 = T24 - T25; + T27 = FNMS(KP844327925, T1S, KP1_071653589 * T1R); + T28 = FNMS(KP998026728, T1P, KP125581039 * T1O); + T29 = T27 + T28; + T2a = T26 + T29; + T2k = T27 - T28; + T2j = T24 + T25; + } + { + E T2b, T2c, T2g, T2h; + T2b = FNMS(KP425779291, T1I, KP1_809654104 * T1H); + T2c = FMA(KP963507348, T1K, KP876306680 * T1L); + T2l = T2c + T2b; + T2g = FMA(KP1_688655851, T1R, KP535826794 * T1S); + T2h = FMA(KP1_996053456, T1O, KP062790519 * T1P); + T2m = T2g + T2h; + T2d = T2b - T2c; + T2o = T2l + T2m; + T2i = T2g - T2h; + } + Ci[WS(csi, 2)] = T1W + T2a; + Cr[WS(csr, 2)] = T1X + T2o; + Ci[WS(csi, 3)] = T1N + T1U - T1W; + Cr[WS(csr, 3)] = T1X + T20 + T23; + Cr[WS(csr, 8)] = FMA(KP309016994, T20, T1X) + FNMA(KP809016994, T23, KP587785252 * (T1T + T1Q)) - (KP951056516 * (T1M + T1J)); + Ci[WS(csi, 8)] = FNMS(KP587785252, T21 + T22, KP309016994 * T1N) + FNMA(KP809016994, T1U, KP951056516 * (T1Y + T1Z)) - T1W; + { + E T2e, T2f, T2n, T2p; + T2e = KP559016994 * (T26 - T29); + T2f = FNMS(KP250000000, T2a, T1W); + Ci[WS(csi, 7)] = FMA(KP951056516, T2d, T2e) + FNMS(KP587785252, T2i, T2f); + Ci[WS(csi, 12)] = FMA(KP587785252, T2d, T2f) + FMS(KP951056516, T2i, T2e); + T2n = KP559016994 * (T2l - T2m); + T2p = FNMS(KP250000000, T2o, T1X); + Cr[WS(csr, 7)] = FMA(KP951056516, T2j, KP587785252 * T2k) + T2n + T2p; + Cr[WS(csr, 12)] = FMA(KP587785252, T2j, T2p) + FNMA(KP951056516, T2k, T2n); + } + } + } + } +} + +static const kr2c_desc desc = { 25, "r2cf_25", {117, 57, 83, 0}, &GENUS }; + +void X(codelet_r2cf_25) (planner *p) { + X(kr2c_register) (p, r2cf_25, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_3.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_3.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:07 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 3 -name r2cf_3 -include r2cf.h */ + +/* + * This function contains 4 FP additions, 2 FP multiplications, + * (or, 3 additions, 1 multiplications, 1 fused multiply/add), + * 7 stack variables, 2 constants, and 6 memory accesses + */ +#include "r2cf.h" + +static void r2cf_3(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(12, rs), MAKE_VOLATILE_STRIDE(12, csr), MAKE_VOLATILE_STRIDE(12, csi)) { + E T1, T2, T3, T4; + T1 = R0[0]; + T2 = R1[0]; + T3 = R0[WS(rs, 1)]; + Ci[WS(csi, 1)] = KP866025403 * (T3 - T2); + T4 = T2 + T3; + Cr[0] = T1 + T4; + Cr[WS(csr, 1)] = FNMS(KP500000000, T4, T1); + } + } +} + +static const kr2c_desc desc = { 3, "r2cf_3", {3, 1, 1, 0}, &GENUS }; + +void X(codelet_r2cf_3) (planner *p) { + X(kr2c_register) (p, r2cf_3, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 3 -name r2cf_3 -include r2cf.h */ + +/* + * This function contains 4 FP additions, 2 FP multiplications, + * (or, 3 additions, 1 multiplications, 1 fused multiply/add), + * 7 stack variables, 2 constants, and 6 memory accesses + */ +#include "r2cf.h" + +static void r2cf_3(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(12, rs), MAKE_VOLATILE_STRIDE(12, csr), MAKE_VOLATILE_STRIDE(12, csi)) { + E T1, T2, T3, T4; + T1 = R0[0]; + T2 = R1[0]; + T3 = R0[WS(rs, 1)]; + T4 = T2 + T3; + Cr[WS(csr, 1)] = FNMS(KP500000000, T4, T1); + Ci[WS(csi, 1)] = KP866025403 * (T3 - T2); + Cr[0] = T1 + T4; + } + } +} + +static const kr2c_desc desc = { 3, "r2cf_3", {3, 1, 1, 0}, &GENUS }; + +void X(codelet_r2cf_3) (planner *p) { + X(kr2c_register) (p, r2cf_3, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,609 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:07 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cf_32 -include r2cf.h */ + +/* + * This function contains 156 FP additions, 68 FP multiplications, + * (or, 88 additions, 0 multiplications, 68 fused multiply/add), + * 89 stack variables, 7 constants, and 64 memory accesses + */ +#include "r2cf.h" + +static void r2cf_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) { + E T1x, T1M, T1I, T1E, T1J, T1H; + { + E Tv, T1h, T7, T2b, Te, T2n, Ty, T1i, T1l, TF, T2d, Tt, T1k, TC, T2c; + E Tm, T2j, T1Z, T2k, T22, TK, T1B, T19, T1C, T1e, TO, TV, T1T, TN, TP; + E T2g, T1S; + { + E TD, Tp, Tq, Tr; + { + E T1, T2, T4, T5; + T1 = R0[0]; + T2 = R0[WS(rs, 8)]; + T4 = R0[WS(rs, 4)]; + T5 = R0[WS(rs, 12)]; + { + E Ta, Tw, Tx, Td, Tn, To; + { + E T8, T3, T6, T9, Tb, Tc; + T8 = R0[WS(rs, 2)]; + Tv = T1 - T2; + T3 = T1 + T2; + T1h = T4 - T5; + T6 = T4 + T5; + T9 = R0[WS(rs, 10)]; + Tb = R0[WS(rs, 14)]; + Tc = R0[WS(rs, 6)]; + T7 = T3 + T6; + T2b = T3 - T6; + Ta = T8 + T9; + Tw = T8 - T9; + Tx = Tb - Tc; + Td = Tb + Tc; + } + Tn = R0[WS(rs, 15)]; + To = R0[WS(rs, 7)]; + Te = Ta + Td; + T2n = Td - Ta; + Ty = Tw + Tx; + T1i = Tx - Tw; + TD = Tn - To; + Tp = Tn + To; + Tq = R0[WS(rs, 3)]; + Tr = R0[WS(rs, 11)]; + } + } + { + E Tj, TA, Ti, Tk; + { + E Tg, Th, TE, Ts; + Tg = R0[WS(rs, 1)]; + Th = R0[WS(rs, 9)]; + Tj = R0[WS(rs, 5)]; + TE = Tq - Tr; + Ts = Tq + Tr; + TA = Tg - Th; + Ti = Tg + Th; + T1l = FNMS(KP414213562, TD, TE); + TF = FMA(KP414213562, TE, TD); + T2d = Tp - Ts; + Tt = Tp + Ts; + Tk = R0[WS(rs, 13)]; + } + { + E T11, T15, T1c, T20, T14, T16, T1X, T1Y, T1Q, T1R; + { + E T1a, T1b, T12, T13; + { + E TZ, T10, TB, Tl; + TZ = R1[WS(rs, 15)]; + T10 = R1[WS(rs, 7)]; + T1a = R1[WS(rs, 11)]; + TB = Tj - Tk; + Tl = Tj + Tk; + T1X = TZ + T10; + T11 = TZ - T10; + T1k = FMA(KP414213562, TA, TB); + TC = FNMS(KP414213562, TB, TA); + T2c = Ti - Tl; + Tm = Ti + Tl; + T1b = R1[WS(rs, 3)]; + } + T12 = R1[WS(rs, 1)]; + T13 = R1[WS(rs, 9)]; + T15 = R1[WS(rs, 13)]; + T1Y = T1b + T1a; + T1c = T1a - T1b; + T20 = T12 + T13; + T14 = T12 - T13; + T16 = R1[WS(rs, 5)]; + } + T2j = T1X - T1Y; + T1Z = T1X + T1Y; + { + E TT, TU, TL, TM; + { + E TI, T21, T17, TJ, T18, T1d; + TI = R1[0]; + T21 = T15 + T16; + T17 = T15 - T16; + TJ = R1[WS(rs, 8)]; + TT = R1[WS(rs, 4)]; + T2k = T21 - T20; + T22 = T20 + T21; + T18 = T14 + T17; + T1d = T17 - T14; + T1Q = TI + TJ; + TK = TI - TJ; + T1B = FNMS(KP707106781, T18, T11); + T19 = FMA(KP707106781, T18, T11); + T1C = FNMS(KP707106781, T1d, T1c); + T1e = FMA(KP707106781, T1d, T1c); + TU = R1[WS(rs, 12)]; + } + TL = R1[WS(rs, 2)]; + TM = R1[WS(rs, 10)]; + TO = R1[WS(rs, 14)]; + T1R = TT + TU; + TV = TT - TU; + T1T = TL + TM; + TN = TL - TM; + TP = R1[WS(rs, 6)]; + } + T2g = T1Q - T1R; + T1S = T1Q + T1R; + } + } + } + { + E T1P, T25, T23, T2h, T1W, T1y, TS, T1z, TX, T27, T2a; + { + E Tf, Tu, T29, T28; + { + E T1U, TQ, T1V, TR, TW; + T1P = T7 - Te; + Tf = T7 + Te; + T1U = TO + TP; + TQ = TO - TP; + Tu = Tm + Tt; + T25 = Tt - Tm; + T23 = T1Z - T22; + T29 = T1Z + T22; + T2h = T1U - T1T; + T1V = T1T + T1U; + TR = TN + TQ; + TW = TN - TQ; + T27 = Tf + Tu; + T1W = T1S - T1V; + T28 = T1S + T1V; + T1y = FNMS(KP707106781, TR, TK); + TS = FMA(KP707106781, TR, TK); + T1z = FNMS(KP707106781, TW, TV); + TX = FMA(KP707106781, TW, TV); + T2a = T28 + T29; + } + Cr[WS(csr, 8)] = Tf - Tu; + Ci[WS(csi, 8)] = T29 - T28; + } + Cr[0] = T27 + T2a; + Cr[WS(csr, 16)] = T27 - T2a; + { + E T2s, T2i, T2v, T2f, T2r, T2p, T2l, T2t; + { + E T2o, T2e, T26, T24; + T2o = T2d - T2c; + T2e = T2c + T2d; + T2s = FNMS(KP414213562, T2g, T2h); + T2i = FMA(KP414213562, T2h, T2g); + T26 = T23 - T1W; + T24 = T1W + T23; + T2v = FNMS(KP707106781, T2e, T2b); + T2f = FMA(KP707106781, T2e, T2b); + T2r = FMA(KP707106781, T2o, T2n); + T2p = FNMS(KP707106781, T2o, T2n); + Ci[WS(csi, 4)] = FMA(KP707106781, T26, T25); + Ci[WS(csi, 12)] = FMS(KP707106781, T26, T25); + Cr[WS(csr, 4)] = FMA(KP707106781, T24, T1P); + Cr[WS(csr, 12)] = FNMS(KP707106781, T24, T1P); + T2l = FNMS(KP414213562, T2k, T2j); + T2t = FMA(KP414213562, T2j, T2k); + } + { + E T1v, T1G, TH, T1s, T1F, T1w, T1o, T1g, T1p, T1n; + { + E T1f, TY, T1t, T1u, T1j, T1m; + { + E Tz, TG, T1q, T1r; + T1v = FNMS(KP707106781, Ty, Tv); + Tz = FMA(KP707106781, Ty, Tv); + { + E T2q, T2m, T2w, T2u; + T2q = T2l - T2i; + T2m = T2i + T2l; + T2w = T2t - T2s; + T2u = T2s + T2t; + Ci[WS(csi, 10)] = FMA(KP923879532, T2q, T2p); + Ci[WS(csi, 6)] = FMS(KP923879532, T2q, T2p); + Cr[WS(csr, 2)] = FMA(KP923879532, T2m, T2f); + Cr[WS(csr, 14)] = FNMS(KP923879532, T2m, T2f); + Cr[WS(csr, 10)] = FNMS(KP923879532, T2w, T2v); + Cr[WS(csr, 6)] = FMA(KP923879532, T2w, T2v); + Ci[WS(csi, 2)] = FMA(KP923879532, T2u, T2r); + Ci[WS(csi, 14)] = FMS(KP923879532, T2u, T2r); + TG = TC + TF; + T1G = TF - TC; + } + T1f = FNMS(KP198912367, T1e, T19); + T1q = FMA(KP198912367, T19, T1e); + T1r = FMA(KP198912367, TS, TX); + TY = FNMS(KP198912367, TX, TS); + T1t = FNMS(KP923879532, TG, Tz); + TH = FMA(KP923879532, TG, Tz); + T1u = T1r + T1q; + T1s = T1q - T1r; + T1F = FMA(KP707106781, T1i, T1h); + T1j = FNMS(KP707106781, T1i, T1h); + T1m = T1k + T1l; + T1w = T1k - T1l; + } + Cr[WS(csr, 7)] = FMA(KP980785280, T1u, T1t); + T1o = T1f - TY; + T1g = TY + T1f; + T1p = FMA(KP923879532, T1m, T1j); + T1n = FNMS(KP923879532, T1m, T1j); + Cr[WS(csr, 9)] = FNMS(KP980785280, T1u, T1t); + } + Cr[WS(csr, 1)] = FMA(KP980785280, T1g, TH); + Cr[WS(csr, 15)] = FNMS(KP980785280, T1g, TH); + Ci[WS(csi, 1)] = FMS(KP980785280, T1s, T1p); + Ci[WS(csi, 15)] = FMA(KP980785280, T1s, T1p); + Ci[WS(csi, 9)] = FMS(KP980785280, T1o, T1n); + Ci[WS(csi, 7)] = FMA(KP980785280, T1o, T1n); + { + E T1A, T1D, T1N, T1O, T1K, T1L; + T1A = FMA(KP668178637, T1z, T1y); + T1K = FNMS(KP668178637, T1y, T1z); + T1L = FNMS(KP668178637, T1B, T1C); + T1D = FMA(KP668178637, T1C, T1B); + T1N = FNMS(KP923879532, T1w, T1v); + T1x = FMA(KP923879532, T1w, T1v); + T1O = T1K + T1L; + T1M = T1K - T1L; + Cr[WS(csr, 5)] = FNMS(KP831469612, T1O, T1N); + T1I = T1D - T1A; + T1E = T1A + T1D; + T1J = FMA(KP923879532, T1G, T1F); + T1H = FNMS(KP923879532, T1G, T1F); + Cr[WS(csr, 11)] = FMA(KP831469612, T1O, T1N); + } + } + } + } + } + Ci[WS(csi, 3)] = FMA(KP831469612, T1M, T1J); + Cr[WS(csr, 3)] = FMA(KP831469612, T1E, T1x); + Ci[WS(csi, 13)] = FMS(KP831469612, T1M, T1J); + Cr[WS(csr, 13)] = FNMS(KP831469612, T1E, T1x); + Ci[WS(csi, 11)] = FMA(KP831469612, T1I, T1H); + Ci[WS(csi, 5)] = FMS(KP831469612, T1I, T1H); + } + } +} + +static const kr2c_desc desc = { 32, "r2cf_32", {88, 0, 68, 0}, &GENUS }; + +void X(codelet_r2cf_32) (planner *p) { + X(kr2c_register) (p, r2cf_32, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cf_32 -include r2cf.h */ + +/* + * This function contains 156 FP additions, 42 FP multiplications, + * (or, 140 additions, 26 multiplications, 16 fused multiply/add), + * 54 stack variables, 7 constants, and 64 memory accesses + */ +#include "r2cf.h" + +static void r2cf_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) { + E T7, T2b, Tv, T1l, Te, T2o, Ty, T1k, Tt, T2d, TF, T1h, Tm, T2c, TC; + E T1i, T1Z, T22, T2k, T2j, T1e, T1C, T19, T1B, T1S, T1V, T2h, T2g, TX, T1z; + E TS, T1y; + { + E T1, T2, T3, T4, T5, T6; + T1 = R0[0]; + T2 = R0[WS(rs, 8)]; + T3 = T1 + T2; + T4 = R0[WS(rs, 4)]; + T5 = R0[WS(rs, 12)]; + T6 = T4 + T5; + T7 = T3 + T6; + T2b = T3 - T6; + Tv = T1 - T2; + T1l = T4 - T5; + } + { + E Ta, Tw, Td, Tx; + { + E T8, T9, Tb, Tc; + T8 = R0[WS(rs, 2)]; + T9 = R0[WS(rs, 10)]; + Ta = T8 + T9; + Tw = T8 - T9; + Tb = R0[WS(rs, 14)]; + Tc = R0[WS(rs, 6)]; + Td = Tb + Tc; + Tx = Tb - Tc; + } + Te = Ta + Td; + T2o = Td - Ta; + Ty = KP707106781 * (Tw + Tx); + T1k = KP707106781 * (Tx - Tw); + } + { + E Tp, TD, Ts, TE; + { + E Tn, To, Tq, Tr; + Tn = R0[WS(rs, 15)]; + To = R0[WS(rs, 7)]; + Tp = Tn + To; + TD = Tn - To; + Tq = R0[WS(rs, 3)]; + Tr = R0[WS(rs, 11)]; + Ts = Tq + Tr; + TE = Tq - Tr; + } + Tt = Tp + Ts; + T2d = Tp - Ts; + TF = FMA(KP923879532, TD, KP382683432 * TE); + T1h = FNMS(KP923879532, TE, KP382683432 * TD); + } + { + E Ti, TA, Tl, TB; + { + E Tg, Th, Tj, Tk; + Tg = R0[WS(rs, 1)]; + Th = R0[WS(rs, 9)]; + Ti = Tg + Th; + TA = Tg - Th; + Tj = R0[WS(rs, 5)]; + Tk = R0[WS(rs, 13)]; + Tl = Tj + Tk; + TB = Tj - Tk; + } + Tm = Ti + Tl; + T2c = Ti - Tl; + TC = FNMS(KP382683432, TB, KP923879532 * TA); + T1i = FMA(KP382683432, TA, KP923879532 * TB); + } + { + E T11, T1X, T1d, T1Y, T14, T20, T17, T21, T1a, T18; + { + E TZ, T10, T1b, T1c; + TZ = R1[WS(rs, 15)]; + T10 = R1[WS(rs, 7)]; + T11 = TZ - T10; + T1X = TZ + T10; + T1b = R1[WS(rs, 3)]; + T1c = R1[WS(rs, 11)]; + T1d = T1b - T1c; + T1Y = T1b + T1c; + } + { + E T12, T13, T15, T16; + T12 = R1[WS(rs, 1)]; + T13 = R1[WS(rs, 9)]; + T14 = T12 - T13; + T20 = T12 + T13; + T15 = R1[WS(rs, 13)]; + T16 = R1[WS(rs, 5)]; + T17 = T15 - T16; + T21 = T15 + T16; + } + T1Z = T1X + T1Y; + T22 = T20 + T21; + T2k = T21 - T20; + T2j = T1X - T1Y; + T1a = KP707106781 * (T17 - T14); + T1e = T1a - T1d; + T1C = T1d + T1a; + T18 = KP707106781 * (T14 + T17); + T19 = T11 + T18; + T1B = T11 - T18; + } + { + E TK, T1Q, TW, T1R, TN, T1T, TQ, T1U, TT, TR; + { + E TI, TJ, TU, TV; + TI = R1[0]; + TJ = R1[WS(rs, 8)]; + TK = TI - TJ; + T1Q = TI + TJ; + TU = R1[WS(rs, 4)]; + TV = R1[WS(rs, 12)]; + TW = TU - TV; + T1R = TU + TV; + } + { + E TL, TM, TO, TP; + TL = R1[WS(rs, 2)]; + TM = R1[WS(rs, 10)]; + TN = TL - TM; + T1T = TL + TM; + TO = R1[WS(rs, 14)]; + TP = R1[WS(rs, 6)]; + TQ = TO - TP; + T1U = TO + TP; + } + T1S = T1Q + T1R; + T1V = T1T + T1U; + T2h = T1U - T1T; + T2g = T1Q - T1R; + TT = KP707106781 * (TQ - TN); + TX = TT - TW; + T1z = TW + TT; + TR = KP707106781 * (TN + TQ); + TS = TK + TR; + T1y = TK - TR; + } + { + E Tf, Tu, T27, T28, T29, T2a; + Tf = T7 + Te; + Tu = Tm + Tt; + T27 = Tf + Tu; + T28 = T1S + T1V; + T29 = T1Z + T22; + T2a = T28 + T29; + Cr[WS(csr, 8)] = Tf - Tu; + Ci[WS(csi, 8)] = T29 - T28; + Cr[WS(csr, 16)] = T27 - T2a; + Cr[0] = T27 + T2a; + } + { + E T1P, T25, T24, T26, T1W, T23; + T1P = T7 - Te; + T25 = Tt - Tm; + T1W = T1S - T1V; + T23 = T1Z - T22; + T24 = KP707106781 * (T1W + T23); + T26 = KP707106781 * (T23 - T1W); + Cr[WS(csr, 12)] = T1P - T24; + Ci[WS(csi, 12)] = T26 - T25; + Cr[WS(csr, 4)] = T1P + T24; + Ci[WS(csi, 4)] = T25 + T26; + } + { + E T2f, T2v, T2p, T2r, T2m, T2q, T2u, T2w, T2e, T2n; + T2e = KP707106781 * (T2c + T2d); + T2f = T2b + T2e; + T2v = T2b - T2e; + T2n = KP707106781 * (T2d - T2c); + T2p = T2n - T2o; + T2r = T2o + T2n; + { + E T2i, T2l, T2s, T2t; + T2i = FMA(KP923879532, T2g, KP382683432 * T2h); + T2l = FNMS(KP382683432, T2k, KP923879532 * T2j); + T2m = T2i + T2l; + T2q = T2l - T2i; + T2s = FNMS(KP382683432, T2g, KP923879532 * T2h); + T2t = FMA(KP382683432, T2j, KP923879532 * T2k); + T2u = T2s + T2t; + T2w = T2t - T2s; + } + Cr[WS(csr, 14)] = T2f - T2m; + Ci[WS(csi, 14)] = T2u - T2r; + Cr[WS(csr, 2)] = T2f + T2m; + Ci[WS(csi, 2)] = T2r + T2u; + Ci[WS(csi, 6)] = T2p + T2q; + Cr[WS(csr, 6)] = T2v + T2w; + Ci[WS(csi, 10)] = T2q - T2p; + Cr[WS(csr, 10)] = T2v - T2w; + } + { + E TH, T1t, T1s, T1u, T1g, T1o, T1n, T1p; + { + E Tz, TG, T1q, T1r; + Tz = Tv + Ty; + TG = TC + TF; + TH = Tz + TG; + T1t = Tz - TG; + T1q = FNMS(KP195090322, TS, KP980785280 * TX); + T1r = FMA(KP195090322, T19, KP980785280 * T1e); + T1s = T1q + T1r; + T1u = T1r - T1q; + } + { + E TY, T1f, T1j, T1m; + TY = FMA(KP980785280, TS, KP195090322 * TX); + T1f = FNMS(KP195090322, T1e, KP980785280 * T19); + T1g = TY + T1f; + T1o = T1f - TY; + T1j = T1h - T1i; + T1m = T1k - T1l; + T1n = T1j - T1m; + T1p = T1m + T1j; + } + Cr[WS(csr, 15)] = TH - T1g; + Ci[WS(csi, 15)] = T1s - T1p; + Cr[WS(csr, 1)] = TH + T1g; + Ci[WS(csi, 1)] = T1p + T1s; + Ci[WS(csi, 7)] = T1n + T1o; + Cr[WS(csr, 7)] = T1t + T1u; + Ci[WS(csi, 9)] = T1o - T1n; + Cr[WS(csr, 9)] = T1t - T1u; + } + { + E T1x, T1N, T1M, T1O, T1E, T1I, T1H, T1J; + { + E T1v, T1w, T1K, T1L; + T1v = Tv - Ty; + T1w = T1i + T1h; + T1x = T1v + T1w; + T1N = T1v - T1w; + T1K = FNMS(KP555570233, T1y, KP831469612 * T1z); + T1L = FMA(KP555570233, T1B, KP831469612 * T1C); + T1M = T1K + T1L; + T1O = T1L - T1K; + } + { + E T1A, T1D, T1F, T1G; + T1A = FMA(KP831469612, T1y, KP555570233 * T1z); + T1D = FNMS(KP555570233, T1C, KP831469612 * T1B); + T1E = T1A + T1D; + T1I = T1D - T1A; + T1F = TF - TC; + T1G = T1l + T1k; + T1H = T1F - T1G; + T1J = T1G + T1F; + } + Cr[WS(csr, 13)] = T1x - T1E; + Ci[WS(csi, 13)] = T1M - T1J; + Cr[WS(csr, 3)] = T1x + T1E; + Ci[WS(csi, 3)] = T1J + T1M; + Ci[WS(csi, 5)] = T1H + T1I; + Cr[WS(csr, 5)] = T1N + T1O; + Ci[WS(csi, 11)] = T1I - T1H; + Cr[WS(csr, 11)] = T1N - T1O; + } + } + } +} + +static const kr2c_desc desc = { 32, "r2cf_32", {140, 26, 16, 0}, &GENUS }; + +void X(codelet_r2cf_32) (planner *p) { + X(kr2c_register) (p, r2cf_32, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:07 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 4 -name r2cf_4 -include r2cf.h */ + +/* + * This function contains 6 FP additions, 0 FP multiplications, + * (or, 6 additions, 0 multiplications, 0 fused multiply/add), + * 7 stack variables, 0 constants, and 8 memory accesses + */ +#include "r2cf.h" + +static void r2cf_4(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(16, csr), MAKE_VOLATILE_STRIDE(16, csi)) { + E T1, T2, T4, T5, T3, T6; + T1 = R0[0]; + T2 = R0[WS(rs, 1)]; + T4 = R1[0]; + T5 = R1[WS(rs, 1)]; + Cr[WS(csr, 1)] = T1 - T2; + T3 = T1 + T2; + Ci[WS(csi, 1)] = T5 - T4; + T6 = T4 + T5; + Cr[0] = T3 + T6; + Cr[WS(csr, 2)] = T3 - T6; + } + } +} + +static const kr2c_desc desc = { 4, "r2cf_4", {6, 0, 0, 0}, &GENUS }; + +void X(codelet_r2cf_4) (planner *p) { + X(kr2c_register) (p, r2cf_4, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 4 -name r2cf_4 -include r2cf.h */ + +/* + * This function contains 6 FP additions, 0 FP multiplications, + * (or, 6 additions, 0 multiplications, 0 fused multiply/add), + * 7 stack variables, 0 constants, and 8 memory accesses + */ +#include "r2cf.h" + +static void r2cf_4(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(16, csr), MAKE_VOLATILE_STRIDE(16, csi)) { + E T1, T2, T3, T4, T5, T6; + T1 = R0[0]; + T2 = R0[WS(rs, 1)]; + T3 = T1 + T2; + T4 = R1[0]; + T5 = R1[WS(rs, 1)]; + T6 = T4 + T5; + Cr[WS(csr, 1)] = T1 - T2; + Ci[WS(csi, 1)] = T5 - T4; + Cr[WS(csr, 2)] = T3 - T6; + Cr[0] = T3 + T6; + } + } +} + +static const kr2c_desc desc = { 4, "r2cf_4", {6, 0, 0, 0}, &GENUS }; + +void X(codelet_r2cf_4) (planner *p) { + X(kr2c_register) (p, r2cf_4, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_5.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:07 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 5 -name r2cf_5 -include r2cf.h */ + +/* + * This function contains 12 FP additions, 7 FP multiplications, + * (or, 7 additions, 2 multiplications, 5 fused multiply/add), + * 17 stack variables, 4 constants, and 10 memory accesses + */ +#include "r2cf.h" + +static void r2cf_5(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP618033988, +0.618033988749894848204586834365638117720309180); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(20, rs), MAKE_VOLATILE_STRIDE(20, csr), MAKE_VOLATILE_STRIDE(20, csi)) { + E T7, T1, T2, T4, T5; + T7 = R0[0]; + T1 = R0[WS(rs, 2)]; + T2 = R1[0]; + T4 = R0[WS(rs, 1)]; + T5 = R1[WS(rs, 1)]; + { + E T3, T8, T6, T9, Tc, Ta, Tb; + T3 = T1 - T2; + T8 = T2 + T1; + T6 = T4 - T5; + T9 = T4 + T5; + Ci[WS(csi, 2)] = KP951056516 * (FMA(KP618033988, T3, T6)); + Ci[WS(csi, 1)] = KP951056516 * (FNMS(KP618033988, T6, T3)); + Tc = T8 - T9; + Ta = T8 + T9; + Tb = FNMS(KP250000000, Ta, T7); + Cr[0] = T7 + Ta; + Cr[WS(csr, 2)] = FNMS(KP559016994, Tc, Tb); + Cr[WS(csr, 1)] = FMA(KP559016994, Tc, Tb); + } + } + } +} + +static const kr2c_desc desc = { 5, "r2cf_5", {7, 2, 5, 0}, &GENUS }; + +void X(codelet_r2cf_5) (planner *p) { + X(kr2c_register) (p, r2cf_5, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 5 -name r2cf_5 -include r2cf.h */ + +/* + * This function contains 12 FP additions, 6 FP multiplications, + * (or, 9 additions, 3 multiplications, 3 fused multiply/add), + * 17 stack variables, 4 constants, and 10 memory accesses + */ +#include "r2cf.h" + +static void r2cf_5(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP250000000, +0.250000000000000000000000000000000000000000000); + DK(KP559016994, +0.559016994374947424102293417182819058860154590); + DK(KP587785252, +0.587785252292473129168705954639072768597652438); + DK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(20, rs), MAKE_VOLATILE_STRIDE(20, csr), MAKE_VOLATILE_STRIDE(20, csi)) { + E Ta, T7, T8, T3, Tb, T6, T9, Tc; + Ta = R0[0]; + { + E T1, T2, T4, T5; + T1 = R0[WS(rs, 2)]; + T2 = R1[0]; + T7 = T2 + T1; + T4 = R0[WS(rs, 1)]; + T5 = R1[WS(rs, 1)]; + T8 = T4 + T5; + T3 = T1 - T2; + Tb = T7 + T8; + T6 = T4 - T5; + } + Ci[WS(csi, 1)] = FNMS(KP587785252, T6, KP951056516 * T3); + Cr[0] = Ta + Tb; + Ci[WS(csi, 2)] = FMA(KP587785252, T3, KP951056516 * T6); + T9 = KP559016994 * (T7 - T8); + Tc = FNMS(KP250000000, Tb, Ta); + Cr[WS(csr, 1)] = T9 + Tc; + Cr[WS(csr, 2)] = Tc - T9; + } + } +} + +static const kr2c_desc desc = { 5, "r2cf_5", {9, 3, 3, 0}, &GENUS }; + +void X(codelet_r2cf_5) (planner *p) { + X(kr2c_register) (p, r2cf_5, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:07 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 6 -name r2cf_6 -include r2cf.h */ + +/* + * This function contains 14 FP additions, 4 FP multiplications, + * (or, 12 additions, 2 multiplications, 2 fused multiply/add), + * 13 stack variables, 2 constants, and 12 memory accesses + */ +#include "r2cf.h" + +static void r2cf_6(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(24, rs), MAKE_VOLATILE_STRIDE(24, csr), MAKE_VOLATILE_STRIDE(24, csi)) { + E T4, Td, T3, Tc, T9, T5; + { + E T1, T2, T7, T8; + T1 = R0[0]; + T2 = R1[WS(rs, 1)]; + T7 = R0[WS(rs, 2)]; + T8 = R1[0]; + T4 = R0[WS(rs, 1)]; + Td = T1 + T2; + T3 = T1 - T2; + Tc = T7 + T8; + T9 = T7 - T8; + T5 = R1[WS(rs, 2)]; + } + { + E T6, Tb, Te, Ta; + T6 = T4 - T5; + Tb = T4 + T5; + Te = Tb + Tc; + Ci[WS(csi, 2)] = KP866025403 * (Tb - Tc); + Ta = T6 + T9; + Ci[WS(csi, 1)] = KP866025403 * (T9 - T6); + Cr[0] = Td + Te; + Cr[WS(csr, 2)] = FNMS(KP500000000, Te, Td); + Cr[WS(csr, 3)] = T3 + Ta; + Cr[WS(csr, 1)] = FNMS(KP500000000, Ta, T3); + } + } + } +} + +static const kr2c_desc desc = { 6, "r2cf_6", {12, 2, 2, 0}, &GENUS }; + +void X(codelet_r2cf_6) (planner *p) { + X(kr2c_register) (p, r2cf_6, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 6 -name r2cf_6 -include r2cf.h */ + +/* + * This function contains 14 FP additions, 4 FP multiplications, + * (or, 12 additions, 2 multiplications, 2 fused multiply/add), + * 17 stack variables, 2 constants, and 12 memory accesses + */ +#include "r2cf.h" + +static void r2cf_6(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(24, rs), MAKE_VOLATILE_STRIDE(24, csr), MAKE_VOLATILE_STRIDE(24, csi)) { + E T3, Td, T9, Tc, T6, Tb, T1, T2, Ta, Te; + T1 = R0[0]; + T2 = R1[WS(rs, 1)]; + T3 = T1 - T2; + Td = T1 + T2; + { + E T7, T8, T4, T5; + T7 = R0[WS(rs, 2)]; + T8 = R1[0]; + T9 = T7 - T8; + Tc = T7 + T8; + T4 = R0[WS(rs, 1)]; + T5 = R1[WS(rs, 2)]; + T6 = T4 - T5; + Tb = T4 + T5; + } + Ci[WS(csi, 1)] = KP866025403 * (T9 - T6); + Ta = T6 + T9; + Cr[WS(csr, 1)] = FNMS(KP500000000, Ta, T3); + Cr[WS(csr, 3)] = T3 + Ta; + Ci[WS(csi, 2)] = KP866025403 * (Tb - Tc); + Te = Tb + Tc; + Cr[WS(csr, 2)] = FNMS(KP500000000, Te, Td); + Cr[0] = Td + Te; + } + } +} + +static const kr2c_desc desc = { 6, "r2cf_6", {12, 2, 2, 0}, &GENUS }; + +void X(codelet_r2cf_6) (planner *p) { + X(kr2c_register) (p, r2cf_6, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_64.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_64.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,1375 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:08 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 64 -name r2cf_64 -include r2cf.h */ + +/* + * This function contains 394 FP additions, 196 FP multiplications, + * (or, 198 additions, 0 multiplications, 196 fused multiply/add), + * 133 stack variables, 15 constants, and 128 memory accesses + */ +#include "r2cf.h" + +static void r2cf_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP098491403, +0.098491403357164253077197521291327432293052451); + DK(KP820678790, +0.820678790828660330972281985331011598767386482); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP534511135, +0.534511135950791641089685961295362908582039528); + DK(KP303346683, +0.303346683607342391675883946941299872384187453); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) { + E T5n, T5o; + { + E T11, T2j, T4P, T5P, T3D, T5p, T3d, Tf, T1k, T1H, T5D, T4l, T5A, T4a, T3i; + E T2U, T1R, T2e, T5K, T4G, T5H, T4v, T3l, T31, T5s, T42, T5t, T3Z, T2n, T1b; + E T3f, TZ, T5v, T3T, T5w, T3Q, T2m, T18, T3e, TK, T3K, T5Q, T4S, T5q, T14; + E T2k, T3p, Tu, T4w, T1U, T5E, T4h, T5B, T4o, T3j, T2X, T1I, T1z, T1Z, T4A; + E T24, T4x, T1X, T20; + { + E TN, T3V, TS, TX, T3X, TQ, T40, TT; + { + E T1g, T46, T1B, T1G, T47, T1j, T4j, T1C; + { + E T4, T3z, T3, T3B, Td, T5, T8, T9; + { + E T1, T2, Tb, Tc; + T1 = R0[0]; + T2 = R0[WS(rs, 16)]; + Tb = R0[WS(rs, 28)]; + Tc = R0[WS(rs, 12)]; + T4 = R0[WS(rs, 8)]; + T3z = T1 - T2; + T3 = T1 + T2; + T3B = Tb - Tc; + Td = Tb + Tc; + T5 = R0[WS(rs, 24)]; + T8 = R0[WS(rs, 4)]; + T9 = R0[WS(rs, 20)]; + } + { + E T1E, T1F, T1h, T1i; + { + E T1e, T4N, T6, T3A, Ta, T1f; + T1e = R1[0]; + T4N = T4 - T5; + T6 = T4 + T5; + T3A = T8 - T9; + Ta = T8 + T9; + T1f = R1[WS(rs, 16)]; + { + E T7, T3C, T4O, Te; + T11 = T3 - T6; + T7 = T3 + T6; + T3C = T3A + T3B; + T4O = T3B - T3A; + T2j = Td - Ta; + Te = Ta + Td; + T4P = FNMS(KP707106781, T4O, T4N); + T5P = FMA(KP707106781, T4O, T4N); + T3D = FMA(KP707106781, T3C, T3z); + T5p = FNMS(KP707106781, T3C, T3z); + T3d = T7 - Te; + Tf = T7 + Te; + T1g = T1e + T1f; + T46 = T1e - T1f; + } + } + T1E = R1[WS(rs, 4)]; + T1F = R1[WS(rs, 20)]; + T1h = R1[WS(rs, 8)]; + T1i = R1[WS(rs, 24)]; + T1B = R1[WS(rs, 28)]; + T1G = T1E + T1F; + T47 = T1E - T1F; + T1j = T1h + T1i; + T4j = T1h - T1i; + T1C = R1[WS(rs, 12)]; + } + } + { + E T1N, T4r, T28, T2d, T4s, T1Q, T4E, T29; + { + E T2b, T2c, T1O, T1P; + { + E T2S, T48, T1D, T1L, T1M, T4k, T49, T2T; + T1L = R1[WS(rs, 31)]; + T1M = R1[WS(rs, 15)]; + T2S = T1g + T1j; + T1k = T1g - T1j; + T48 = T1B - T1C; + T1D = T1B + T1C; + T1N = T1L + T1M; + T4r = T1L - T1M; + T4k = T47 - T48; + T49 = T47 + T48; + T2T = T1G + T1D; + T1H = T1D - T1G; + T5D = FNMS(KP707106781, T4k, T4j); + T4l = FMA(KP707106781, T4k, T4j); + T5A = FNMS(KP707106781, T49, T46); + T4a = FMA(KP707106781, T49, T46); + T3i = T2S - T2T; + T2U = T2S + T2T; + T2b = R1[WS(rs, 3)]; + T2c = R1[WS(rs, 19)]; + } + T1O = R1[WS(rs, 7)]; + T1P = R1[WS(rs, 23)]; + T28 = R1[WS(rs, 27)]; + T2d = T2b + T2c; + T4s = T2b - T2c; + T1Q = T1O + T1P; + T4E = T1P - T1O; + T29 = R1[WS(rs, 11)]; + } + { + E TV, TW, TO, TP; + { + E T2Z, T4t, T2a, TL, TM, T4F, T4u, T30; + TL = R0[WS(rs, 31)]; + TM = R0[WS(rs, 15)]; + T2Z = T1N + T1Q; + T1R = T1N - T1Q; + T4t = T28 - T29; + T2a = T28 + T29; + TN = TL + TM; + T3V = TL - TM; + T4F = T4t - T4s; + T4u = T4s + T4t; + T30 = T2d + T2a; + T2e = T2a - T2d; + T5K = FNMS(KP707106781, T4F, T4E); + T4G = FMA(KP707106781, T4F, T4E); + T5H = FNMS(KP707106781, T4u, T4r); + T4v = FMA(KP707106781, T4u, T4r); + T3l = T2Z - T30; + T31 = T2Z + T30; + TV = R0[WS(rs, 27)]; + TW = R0[WS(rs, 11)]; + } + TO = R0[WS(rs, 7)]; + TP = R0[WS(rs, 23)]; + TS = R0[WS(rs, 3)]; + TX = TV + TW; + T3X = TV - TW; + TQ = TO + TP; + T40 = TO - TP; + TT = R0[WS(rs, 19)]; + } + } + } + { + E Ti, T3E, Tn, Ts, T3I, Tl, T3F, To; + { + E Ty, T3M, TD, TI, T3O, TB, T3R, TE; + { + E TG, TH, Tz, TA; + { + E T19, TR, T3W, TU, Tw, Tx; + Tw = R0[WS(rs, 1)]; + Tx = R0[WS(rs, 17)]; + T19 = TN - TQ; + TR = TN + TQ; + T3W = TS - TT; + TU = TS + TT; + Ty = Tw + Tx; + T3M = Tw - Tx; + { + E T41, T3Y, T1a, TY; + T41 = T3W - T3X; + T3Y = T3W + T3X; + T1a = TX - TU; + TY = TU + TX; + T5s = FNMS(KP707106781, T41, T40); + T42 = FMA(KP707106781, T41, T40); + T5t = FNMS(KP707106781, T3Y, T3V); + T3Z = FMA(KP707106781, T3Y, T3V); + T2n = FMA(KP414213562, T19, T1a); + T1b = FNMS(KP414213562, T1a, T19); + T3f = TR - TY; + TZ = TR + TY; + TG = R0[WS(rs, 29)]; + TH = R0[WS(rs, 13)]; + } + } + Tz = R0[WS(rs, 9)]; + TA = R0[WS(rs, 25)]; + TD = R0[WS(rs, 5)]; + TI = TG + TH; + T3O = TG - TH; + TB = Tz + TA; + T3R = Tz - TA; + TE = R0[WS(rs, 21)]; + } + { + E Tq, Tr, Tj, Tk; + { + E T16, TC, T3N, TF, Tg, Th; + Tg = R0[WS(rs, 2)]; + Th = R0[WS(rs, 18)]; + T16 = Ty - TB; + TC = Ty + TB; + T3N = TD - TE; + TF = TD + TE; + Ti = Tg + Th; + T3E = Tg - Th; + { + E T3S, T3P, T17, TJ; + T3S = T3N - T3O; + T3P = T3N + T3O; + T17 = TI - TF; + TJ = TF + TI; + T5v = FNMS(KP707106781, T3S, T3R); + T3T = FMA(KP707106781, T3S, T3R); + T5w = FNMS(KP707106781, T3P, T3M); + T3Q = FMA(KP707106781, T3P, T3M); + T2m = FNMS(KP414213562, T16, T17); + T18 = FMA(KP414213562, T17, T16); + T3e = TC - TJ; + TK = TC + TJ; + Tq = R0[WS(rs, 6)]; + Tr = R0[WS(rs, 22)]; + } + } + Tj = R0[WS(rs, 10)]; + Tk = R0[WS(rs, 26)]; + Tn = R0[WS(rs, 30)]; + Ts = Tq + Tr; + T3I = Tq - Tr; + Tl = Tj + Tk; + T3F = Tj - Tk; + To = R0[WS(rs, 14)]; + } + } + { + E T1n, T4b, T1s, T4f, T1x, T4c, T1q, T1t; + { + E T1v, T1w, T1o, T1p; + { + E T1l, T4Q, T3G, Tm, T12, Tp, T3H, T1m; + T1l = R1[WS(rs, 2)]; + T4Q = FMA(KP414213562, T3E, T3F); + T3G = FNMS(KP414213562, T3F, T3E); + Tm = Ti + Tl; + T12 = Ti - Tl; + Tp = Tn + To; + T3H = Tn - To; + T1m = R1[WS(rs, 18)]; + T1v = R1[WS(rs, 6)]; + { + E T4R, T3J, Tt, T13; + T4R = FNMS(KP414213562, T3H, T3I); + T3J = FMA(KP414213562, T3I, T3H); + Tt = Tp + Ts; + T13 = Tp - Ts; + T1n = T1l + T1m; + T4b = T1l - T1m; + T3K = T3G + T3J; + T5Q = T3J - T3G; + T4S = T4Q + T4R; + T5q = T4Q - T4R; + T14 = T12 + T13; + T2k = T13 - T12; + T3p = Tt - Tm; + Tu = Tm + Tt; + T1w = R1[WS(rs, 22)]; + } + } + T1o = R1[WS(rs, 10)]; + T1p = R1[WS(rs, 26)]; + T1s = R1[WS(rs, 30)]; + T4f = T1v - T1w; + T1x = T1v + T1w; + T4c = T1o - T1p; + T1q = T1o + T1p; + T1t = R1[WS(rs, 14)]; + } + { + E T22, T23, T1V, T1W; + { + E T1S, T4d, T4m, T2V, T1r, T4e, T1u, T1T; + T1S = R1[WS(rs, 1)]; + T4d = FNMS(KP414213562, T4c, T4b); + T4m = FMA(KP414213562, T4b, T4c); + T2V = T1n + T1q; + T1r = T1n - T1q; + T4e = T1s - T1t; + T1u = T1s + T1t; + T1T = R1[WS(rs, 17)]; + T22 = R1[WS(rs, 5)]; + { + E T4g, T4n, T2W, T1y; + T4g = FMA(KP414213562, T4f, T4e); + T4n = FNMS(KP414213562, T4e, T4f); + T2W = T1u + T1x; + T1y = T1u - T1x; + T4w = T1S - T1T; + T1U = T1S + T1T; + T5E = T4g - T4d; + T4h = T4d + T4g; + T5B = T4m - T4n; + T4o = T4m + T4n; + T3j = T2W - T2V; + T2X = T2V + T2W; + T1I = T1y - T1r; + T1z = T1r + T1y; + T23 = R1[WS(rs, 21)]; + } + } + T1V = R1[WS(rs, 9)]; + T1W = R1[WS(rs, 25)]; + T1Z = R1[WS(rs, 29)]; + T4A = T23 - T22; + T24 = T22 + T23; + T4x = T1W - T1V; + T1X = T1V + T1W; + T20 = R1[WS(rs, 13)]; + } + } + } + } + { + E T4C, T5L, T4J, T5I, T26, T2f, T3q, T3h, T3w, T3s, T3o, T3r, T3t; + { + E T2R, T37, T2Y, T3a, T39, T3m, T3b, T35, Tv, T10, T34, T3c, T3x, T3y; + { + E T4y, T4H, T32, T1Y, T4z, T21; + T2R = Tf - Tu; + Tv = Tf + Tu; + T4y = FMA(KP414213562, T4x, T4w); + T4H = FNMS(KP414213562, T4w, T4x); + T32 = T1U + T1X; + T1Y = T1U - T1X; + T4z = T1Z - T20; + T21 = T1Z + T20; + T10 = TK + TZ; + T37 = TZ - TK; + T2Y = T2U - T2X; + T3a = T2U + T2X; + { + E T4B, T4I, T33, T25; + T4B = FNMS(KP414213562, T4A, T4z); + T4I = FMA(KP414213562, T4z, T4A); + T33 = T21 + T24; + T25 = T21 - T24; + T39 = Tv + T10; + T4C = T4y + T4B; + T5L = T4B - T4y; + T4J = T4H + T4I; + T5I = T4I - T4H; + T34 = T32 + T33; + T3m = T33 - T32; + T26 = T1Y + T25; + T2f = T25 - T1Y; + } + } + Cr[WS(csr, 16)] = Tv - T10; + T3b = T31 + T34; + T35 = T31 - T34; + Ci[WS(csi, 16)] = T3b - T3a; + T3c = T3a + T3b; + { + E T3k, T3u, T3v, T3n, T36, T38, T3g; + T3g = T3e + T3f; + T3q = T3f - T3e; + Cr[0] = T39 + T3c; + Cr[WS(csr, 32)] = T39 - T3c; + T36 = T2Y + T35; + T38 = T35 - T2Y; + T3x = FNMS(KP707106781, T3g, T3d); + T3h = FMA(KP707106781, T3g, T3d); + Ci[WS(csi, 8)] = FMA(KP707106781, T38, T37); + Ci[WS(csi, 24)] = FMS(KP707106781, T38, T37); + Cr[WS(csr, 8)] = FMA(KP707106781, T36, T2R); + Cr[WS(csr, 24)] = FNMS(KP707106781, T36, T2R); + T3k = FMA(KP414213562, T3j, T3i); + T3u = FNMS(KP414213562, T3i, T3j); + T3v = FMA(KP414213562, T3l, T3m); + T3n = FNMS(KP414213562, T3m, T3l); + T3y = T3v - T3u; + T3w = T3u + T3v; + T3s = T3n - T3k; + T3o = T3k + T3n; + } + Cr[WS(csr, 12)] = FMA(KP923879532, T3y, T3x); + Cr[WS(csr, 20)] = FNMS(KP923879532, T3y, T3x); + } + Cr[WS(csr, 4)] = FMA(KP923879532, T3o, T3h); + Cr[WS(csr, 28)] = FNMS(KP923879532, T3o, T3h); + T3r = FNMS(KP707106781, T3q, T3p); + T3t = FMA(KP707106781, T3q, T3p); + { + E T27, T2g, T2v, T1d, T2r, T2p, T2s, T1K, T6l, T6m; + { + E T15, T2o, T2P, T2z, T2l, T1c, T1A, T1J, T2D, T2L, T2J, T2M, T2C, T2E, T2N; + E T2F; + { + E T2H, T2I, T2x, T2y, T2A, T2B; + T15 = FMA(KP707106781, T14, T11); + T2x = FNMS(KP707106781, T14, T11); + T2y = T2n - T2m; + T2o = T2m + T2n; + Ci[WS(csi, 4)] = FMA(KP923879532, T3w, T3t); + Ci[WS(csi, 28)] = FMS(KP923879532, T3w, T3t); + Ci[WS(csi, 20)] = FMA(KP923879532, T3s, T3r); + Ci[WS(csi, 12)] = FMS(KP923879532, T3s, T3r); + T2P = FNMS(KP923879532, T2y, T2x); + T2z = FMA(KP923879532, T2y, T2x); + T2l = FMA(KP707106781, T2k, T2j); + T2H = FNMS(KP707106781, T2k, T2j); + T2I = T1b - T18; + T1c = T18 + T1b; + T1A = FMA(KP707106781, T1z, T1k); + T2A = FNMS(KP707106781, T1z, T1k); + T2B = FNMS(KP707106781, T1I, T1H); + T1J = FMA(KP707106781, T1I, T1H); + T27 = FMA(KP707106781, T26, T1R); + T2D = FNMS(KP707106781, T26, T1R); + T2L = FNMS(KP923879532, T2I, T2H); + T2J = FMA(KP923879532, T2I, T2H); + T2M = FMA(KP668178637, T2A, T2B); + T2C = FNMS(KP668178637, T2B, T2A); + T2E = FNMS(KP707106781, T2f, T2e); + T2g = FMA(KP707106781, T2f, T2e); + } + T2N = FNMS(KP668178637, T2D, T2E); + T2F = FMA(KP668178637, T2E, T2D); + T2v = FNMS(KP923879532, T1c, T15); + T1d = FMA(KP923879532, T1c, T15); + { + E T2Q, T2O, T2K, T2G; + T2Q = T2M - T2N; + T2O = T2M + T2N; + T2K = T2F - T2C; + T2G = T2C + T2F; + Cr[WS(csr, 10)] = FMA(KP831469612, T2Q, T2P); + Cr[WS(csr, 22)] = FNMS(KP831469612, T2Q, T2P); + Ci[WS(csi, 26)] = FNMS(KP831469612, T2O, T2L); + Ci[WS(csi, 6)] = -(FMA(KP831469612, T2O, T2L)); + Ci[WS(csi, 22)] = FMS(KP831469612, T2K, T2J); + Ci[WS(csi, 10)] = FMA(KP831469612, T2K, T2J); + Cr[WS(csr, 6)] = FMA(KP831469612, T2G, T2z); + Cr[WS(csr, 26)] = FNMS(KP831469612, T2G, T2z); + } + T2r = FMA(KP923879532, T2o, T2l); + T2p = FNMS(KP923879532, T2o, T2l); + T2s = FNMS(KP198912367, T1A, T1J); + T1K = FMA(KP198912367, T1J, T1A); + } + { + E T63, T5r, T5R, T6d, T5J, T5M, T6e, T5y, T6j, T6b, T66, T67, T64, T5U, T5Z; + E T5G; + { + E T5S, T5u, T5x, T5T, T2t, T2h; + T63 = FMA(KP923879532, T5q, T5p); + T5r = FNMS(KP923879532, T5q, T5p); + T5R = FNMS(KP923879532, T5Q, T5P); + T6d = FMA(KP923879532, T5Q, T5P); + T2t = FMA(KP198912367, T27, T2g); + T2h = FNMS(KP198912367, T2g, T27); + T5S = FNMS(KP668178637, T5s, T5t); + T5u = FMA(KP668178637, T5t, T5s); + { + E T2w, T2u, T2q, T2i; + T2w = T2t - T2s; + T2u = T2s + T2t; + T2q = T2h - T1K; + T2i = T1K + T2h; + Cr[WS(csr, 14)] = FMA(KP980785280, T2w, T2v); + Cr[WS(csr, 18)] = FNMS(KP980785280, T2w, T2v); + Ci[WS(csi, 30)] = FMS(KP980785280, T2u, T2r); + Ci[WS(csi, 2)] = FMA(KP980785280, T2u, T2r); + Ci[WS(csi, 18)] = FMA(KP980785280, T2q, T2p); + Ci[WS(csi, 14)] = FMS(KP980785280, T2q, T2p); + Cr[WS(csr, 2)] = FMA(KP980785280, T2i, T1d); + Cr[WS(csr, 30)] = FNMS(KP980785280, T2i, T1d); + T5x = FNMS(KP668178637, T5w, T5v); + T5T = FMA(KP668178637, T5v, T5w); + } + { + E T69, T6a, T5C, T5F; + T5J = FNMS(KP923879532, T5I, T5H); + T69 = FMA(KP923879532, T5I, T5H); + T6a = FNMS(KP923879532, T5L, T5K); + T5M = FMA(KP923879532, T5L, T5K); + T6e = T5x + T5u; + T5y = T5u - T5x; + T6j = FNMS(KP303346683, T69, T6a); + T6b = FMA(KP303346683, T6a, T69); + T66 = FMA(KP923879532, T5B, T5A); + T5C = FNMS(KP923879532, T5B, T5A); + T5F = FNMS(KP923879532, T5E, T5D); + T67 = FMA(KP923879532, T5E, T5D); + T64 = T5T + T5S; + T5U = T5S - T5T; + T5Z = FMA(KP534511135, T5C, T5F); + T5G = FNMS(KP534511135, T5F, T5C); + } + } + { + E T61, T6i, T68, T62; + { + E T5z, T5Y, T5N, T5X, T5V, T60, T5W, T5O; + T61 = FNMS(KP831469612, T5y, T5r); + T5z = FMA(KP831469612, T5y, T5r); + T6i = FNMS(KP303346683, T66, T67); + T68 = FMA(KP303346683, T67, T66); + T5Y = FMA(KP534511135, T5J, T5M); + T5N = FNMS(KP534511135, T5M, T5J); + T5X = FNMS(KP831469612, T5U, T5R); + T5V = FMA(KP831469612, T5U, T5R); + T60 = T5Y - T5Z; + T62 = T5Z + T5Y; + T5W = T5N - T5G; + T5O = T5G + T5N; + Ci[WS(csi, 27)] = FMA(KP881921264, T60, T5X); + Ci[WS(csi, 5)] = FMS(KP881921264, T60, T5X); + Cr[WS(csr, 5)] = FMA(KP881921264, T5O, T5z); + Cr[WS(csr, 27)] = FNMS(KP881921264, T5O, T5z); + Ci[WS(csi, 21)] = FMS(KP881921264, T5W, T5V); + Ci[WS(csi, 11)] = FMA(KP881921264, T5W, T5V); + } + { + E T6g, T6f, T6h, T6k, T65, T6c; + T6l = FNMS(KP831469612, T64, T63); + T65 = FMA(KP831469612, T64, T63); + T6c = T68 + T6b; + T6g = T6b - T68; + T6f = FNMS(KP831469612, T6e, T6d); + T6h = FMA(KP831469612, T6e, T6d); + Cr[WS(csr, 11)] = FMA(KP881921264, T62, T61); + Cr[WS(csr, 21)] = FNMS(KP881921264, T62, T61); + Cr[WS(csr, 3)] = FMA(KP956940335, T6c, T65); + Cr[WS(csr, 29)] = FNMS(KP956940335, T6c, T65); + T6k = T6i - T6j; + T6m = T6i + T6j; + Ci[WS(csi, 29)] = FMS(KP956940335, T6k, T6h); + Ci[WS(csi, 3)] = FMA(KP956940335, T6k, T6h); + Ci[WS(csi, 19)] = FMA(KP956940335, T6g, T6f); + Ci[WS(csi, 13)] = FMS(KP956940335, T6g, T6f); + } + } + } + { + E T55, T3L, T4T, T5f, T4D, T4K, T5g, T44, T5l, T5d, T58, T59, T56, T4W, T51; + E T4q; + { + E T4U, T3U, T43, T4V; + T55 = FNMS(KP923879532, T3K, T3D); + T3L = FMA(KP923879532, T3K, T3D); + T4T = FMA(KP923879532, T4S, T4P); + T5f = FNMS(KP923879532, T4S, T4P); + Cr[WS(csr, 13)] = FNMS(KP956940335, T6m, T6l); + Cr[WS(csr, 19)] = FMA(KP956940335, T6m, T6l); + T4U = FMA(KP198912367, T3Q, T3T); + T3U = FNMS(KP198912367, T3T, T3Q); + T43 = FMA(KP198912367, T42, T3Z); + T4V = FNMS(KP198912367, T3Z, T42); + { + E T5b, T5c, T4i, T4p; + T4D = FMA(KP923879532, T4C, T4v); + T5b = FNMS(KP923879532, T4C, T4v); + T5c = FNMS(KP923879532, T4J, T4G); + T4K = FMA(KP923879532, T4J, T4G); + T5g = T43 - T3U; + T44 = T3U + T43; + T5l = FNMS(KP820678790, T5b, T5c); + T5d = FMA(KP820678790, T5c, T5b); + T58 = FNMS(KP923879532, T4h, T4a); + T4i = FMA(KP923879532, T4h, T4a); + T4p = FMA(KP923879532, T4o, T4l); + T59 = FNMS(KP923879532, T4o, T4l); + T56 = T4U - T4V; + T4W = T4U + T4V; + T51 = FMA(KP098491403, T4i, T4p); + T4q = FNMS(KP098491403, T4p, T4i); + } + } + { + E T53, T5k, T5a, T54; + { + E T45, T50, T4L, T4Z, T4X, T52, T4Y, T4M; + T53 = FNMS(KP980785280, T44, T3L); + T45 = FMA(KP980785280, T44, T3L); + T5k = FNMS(KP820678790, T58, T59); + T5a = FMA(KP820678790, T59, T58); + T50 = FMA(KP098491403, T4D, T4K); + T4L = FNMS(KP098491403, T4K, T4D); + T4Z = FMA(KP980785280, T4W, T4T); + T4X = FNMS(KP980785280, T4W, T4T); + T52 = T50 - T51; + T54 = T51 + T50; + T4Y = T4L - T4q; + T4M = T4q + T4L; + Ci[WS(csi, 31)] = FMA(KP995184726, T52, T4Z); + Ci[WS(csi, 1)] = FMS(KP995184726, T52, T4Z); + Cr[WS(csr, 1)] = FMA(KP995184726, T4M, T45); + Cr[WS(csr, 31)] = FNMS(KP995184726, T4M, T45); + Ci[WS(csi, 17)] = FMS(KP995184726, T4Y, T4X); + Ci[WS(csi, 15)] = FMA(KP995184726, T4Y, T4X); + } + { + E T5i, T5h, T5j, T5m, T57, T5e; + T5n = FNMS(KP980785280, T56, T55); + T57 = FMA(KP980785280, T56, T55); + T5e = T5a + T5d; + T5i = T5d - T5a; + T5h = FNMS(KP980785280, T5g, T5f); + T5j = FMA(KP980785280, T5g, T5f); + Cr[WS(csr, 15)] = FMA(KP995184726, T54, T53); + Cr[WS(csr, 17)] = FNMS(KP995184726, T54, T53); + Cr[WS(csr, 7)] = FMA(KP773010453, T5e, T57); + Cr[WS(csr, 25)] = FNMS(KP773010453, T5e, T57); + T5m = T5k - T5l; + T5o = T5k + T5l; + Ci[WS(csi, 25)] = FMS(KP773010453, T5m, T5j); + Ci[WS(csi, 7)] = FMA(KP773010453, T5m, T5j); + Ci[WS(csi, 23)] = FMA(KP773010453, T5i, T5h); + Ci[WS(csi, 9)] = FMS(KP773010453, T5i, T5h); + } + } + } + } + } + } + Cr[WS(csr, 9)] = FNMS(KP773010453, T5o, T5n); + Cr[WS(csr, 23)] = FMA(KP773010453, T5o, T5n); + } + } +} + +static const kr2c_desc desc = { 64, "r2cf_64", {198, 0, 196, 0}, &GENUS }; + +void X(codelet_r2cf_64) (planner *p) { + X(kr2c_register) (p, r2cf_64, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 64 -name r2cf_64 -include r2cf.h */ + +/* + * This function contains 394 FP additions, 124 FP multiplications, + * (or, 342 additions, 72 multiplications, 52 fused multiply/add), + * 106 stack variables, 15 constants, and 128 memory accesses + */ +#include "r2cf.h" + +static void r2cf_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP634393284, +0.634393284163645498215171613225493370675687095); + DK(KP098017140, +0.098017140329560601994195563888641845861136673); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP290284677, +0.290284677254462367636192375817395274691476278); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP471396736, +0.471396736825997648556387625905254377657460319); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) { + E T4l, T5a, T15, T3n, T2T, T3Q, T7, Te, Tf, T4A, T4L, T1X, T3B, T23, T3y; + E T5I, T66, T4R, T52, T2j, T3F, T2H, T3I, T5P, T69, T1i, T3t, T1l, T3u, TZ; + E T63, T4v, T58, T1r, T3r, T1u, T3q, TK, T62, T4s, T57, Tm, Tt, Tu, T4o; + E T5b, T1c, T3R, T2Q, T3o, T1M, T3z, T5L, T67, T26, T3C, T4H, T4M, T2y, T3J; + E T5S, T6a, T2C, T3G, T4Y, T53; + { + E T3, T11, Td, T13, T6, T2S, Ta, T12, T14, T2R; + { + E T1, T2, Tb, Tc; + T1 = R0[0]; + T2 = R0[WS(rs, 16)]; + T3 = T1 + T2; + T11 = T1 - T2; + Tb = R0[WS(rs, 28)]; + Tc = R0[WS(rs, 12)]; + Td = Tb + Tc; + T13 = Tb - Tc; + } + { + E T4, T5, T8, T9; + T4 = R0[WS(rs, 8)]; + T5 = R0[WS(rs, 24)]; + T6 = T4 + T5; + T2S = T4 - T5; + T8 = R0[WS(rs, 4)]; + T9 = R0[WS(rs, 20)]; + Ta = T8 + T9; + T12 = T8 - T9; + } + T4l = T3 - T6; + T5a = Td - Ta; + T14 = KP707106781 * (T12 + T13); + T15 = T11 + T14; + T3n = T11 - T14; + T2R = KP707106781 * (T13 - T12); + T2T = T2R - T2S; + T3Q = T2S + T2R; + T7 = T3 + T6; + Te = Ta + Td; + Tf = T7 + Te; + } + { + E T1P, T4J, T21, T4y, T1S, T4K, T1W, T4z; + { + E T1N, T1O, T1Z, T20; + T1N = R1[WS(rs, 28)]; + T1O = R1[WS(rs, 12)]; + T1P = T1N - T1O; + T4J = T1N + T1O; + T1Z = R1[0]; + T20 = R1[WS(rs, 16)]; + T21 = T1Z - T20; + T4y = T1Z + T20; + } + { + E T1Q, T1R, T1U, T1V; + T1Q = R1[WS(rs, 4)]; + T1R = R1[WS(rs, 20)]; + T1S = T1Q - T1R; + T4K = T1Q + T1R; + T1U = R1[WS(rs, 8)]; + T1V = R1[WS(rs, 24)]; + T1W = T1U - T1V; + T4z = T1U + T1V; + } + T4A = T4y - T4z; + T4L = T4J - T4K; + { + E T1T, T22, T5G, T5H; + T1T = KP707106781 * (T1P - T1S); + T1X = T1T - T1W; + T3B = T1W + T1T; + T22 = KP707106781 * (T1S + T1P); + T23 = T21 + T22; + T3y = T21 - T22; + T5G = T4y + T4z; + T5H = T4K + T4J; + T5I = T5G + T5H; + T66 = T5G - T5H; + } + } + { + E T2b, T4P, T2G, T4Q, T2e, T51, T2h, T50; + { + E T29, T2a, T2E, T2F; + T29 = R1[WS(rs, 31)]; + T2a = R1[WS(rs, 15)]; + T2b = T29 - T2a; + T4P = T29 + T2a; + T2E = R1[WS(rs, 7)]; + T2F = R1[WS(rs, 23)]; + T2G = T2E - T2F; + T4Q = T2E + T2F; + } + { + E T2c, T2d, T2f, T2g; + T2c = R1[WS(rs, 3)]; + T2d = R1[WS(rs, 19)]; + T2e = T2c - T2d; + T51 = T2c + T2d; + T2f = R1[WS(rs, 27)]; + T2g = R1[WS(rs, 11)]; + T2h = T2f - T2g; + T50 = T2f + T2g; + } + T4R = T4P - T4Q; + T52 = T50 - T51; + { + E T2i, T2D, T5N, T5O; + T2i = KP707106781 * (T2e + T2h); + T2j = T2b + T2i; + T3F = T2b - T2i; + T2D = KP707106781 * (T2h - T2e); + T2H = T2D - T2G; + T3I = T2G + T2D; + T5N = T4P + T4Q; + T5O = T51 + T50; + T5P = T5N + T5O; + T69 = T5N - T5O; + } + } + { + E TN, T1e, TX, T1g, TQ, T1k, TU, T1f, T1h, T1j; + { + E TL, TM, TV, TW; + TL = R0[WS(rs, 31)]; + TM = R0[WS(rs, 15)]; + TN = TL + TM; + T1e = TL - TM; + TV = R0[WS(rs, 27)]; + TW = R0[WS(rs, 11)]; + TX = TV + TW; + T1g = TV - TW; + } + { + E TO, TP, TS, TT; + TO = R0[WS(rs, 7)]; + TP = R0[WS(rs, 23)]; + TQ = TO + TP; + T1k = TO - TP; + TS = R0[WS(rs, 3)]; + TT = R0[WS(rs, 19)]; + TU = TS + TT; + T1f = TS - TT; + } + T1h = KP707106781 * (T1f + T1g); + T1i = T1e + T1h; + T3t = T1e - T1h; + T1j = KP707106781 * (T1g - T1f); + T1l = T1j - T1k; + T3u = T1k + T1j; + { + E TR, TY, T4t, T4u; + TR = TN + TQ; + TY = TU + TX; + TZ = TR + TY; + T63 = TR - TY; + T4t = TN - TQ; + T4u = TX - TU; + T4v = FNMS(KP382683432, T4u, KP923879532 * T4t); + T58 = FMA(KP382683432, T4t, KP923879532 * T4u); + } + } + { + E Ty, T1s, TI, T1n, TB, T1q, TF, T1o, T1p, T1t; + { + E Tw, Tx, TG, TH; + Tw = R0[WS(rs, 1)]; + Tx = R0[WS(rs, 17)]; + Ty = Tw + Tx; + T1s = Tw - Tx; + TG = R0[WS(rs, 29)]; + TH = R0[WS(rs, 13)]; + TI = TG + TH; + T1n = TG - TH; + } + { + E Tz, TA, TD, TE; + Tz = R0[WS(rs, 9)]; + TA = R0[WS(rs, 25)]; + TB = Tz + TA; + T1q = Tz - TA; + TD = R0[WS(rs, 5)]; + TE = R0[WS(rs, 21)]; + TF = TD + TE; + T1o = TD - TE; + } + T1p = KP707106781 * (T1n - T1o); + T1r = T1p - T1q; + T3r = T1q + T1p; + T1t = KP707106781 * (T1o + T1n); + T1u = T1s + T1t; + T3q = T1s - T1t; + { + E TC, TJ, T4q, T4r; + TC = Ty + TB; + TJ = TF + TI; + TK = TC + TJ; + T62 = TC - TJ; + T4q = Ty - TB; + T4r = TI - TF; + T4s = FMA(KP923879532, T4q, KP382683432 * T4r); + T57 = FNMS(KP382683432, T4q, KP923879532 * T4r); + } + } + { + E Ti, T16, Ts, T1a, Tl, T17, Tp, T19, T4m, T4n; + { + E Tg, Th, Tq, Tr; + Tg = R0[WS(rs, 2)]; + Th = R0[WS(rs, 18)]; + Ti = Tg + Th; + T16 = Tg - Th; + Tq = R0[WS(rs, 6)]; + Tr = R0[WS(rs, 22)]; + Ts = Tq + Tr; + T1a = Tq - Tr; + } + { + E Tj, Tk, Tn, To; + Tj = R0[WS(rs, 10)]; + Tk = R0[WS(rs, 26)]; + Tl = Tj + Tk; + T17 = Tj - Tk; + Tn = R0[WS(rs, 30)]; + To = R0[WS(rs, 14)]; + Tp = Tn + To; + T19 = Tn - To; + } + Tm = Ti + Tl; + Tt = Tp + Ts; + Tu = Tm + Tt; + T4m = Ti - Tl; + T4n = Tp - Ts; + T4o = KP707106781 * (T4m + T4n); + T5b = KP707106781 * (T4n - T4m); + { + E T18, T1b, T2O, T2P; + T18 = FNMS(KP382683432, T17, KP923879532 * T16); + T1b = FMA(KP923879532, T19, KP382683432 * T1a); + T1c = T18 + T1b; + T3R = T1b - T18; + T2O = FNMS(KP923879532, T1a, KP382683432 * T19); + T2P = FMA(KP382683432, T16, KP923879532 * T17); + T2Q = T2O - T2P; + T3o = T2P + T2O; + } + } + { + E T1A, T4E, T1K, T4C, T1D, T4F, T1H, T4B; + { + E T1y, T1z, T1I, T1J; + T1y = R1[WS(rs, 30)]; + T1z = R1[WS(rs, 14)]; + T1A = T1y - T1z; + T4E = T1y + T1z; + T1I = R1[WS(rs, 10)]; + T1J = R1[WS(rs, 26)]; + T1K = T1I - T1J; + T4C = T1I + T1J; + } + { + E T1B, T1C, T1F, T1G; + T1B = R1[WS(rs, 6)]; + T1C = R1[WS(rs, 22)]; + T1D = T1B - T1C; + T4F = T1B + T1C; + T1F = R1[WS(rs, 2)]; + T1G = R1[WS(rs, 18)]; + T1H = T1F - T1G; + T4B = T1F + T1G; + } + { + E T1E, T1L, T5J, T5K; + T1E = FNMS(KP923879532, T1D, KP382683432 * T1A); + T1L = FMA(KP382683432, T1H, KP923879532 * T1K); + T1M = T1E - T1L; + T3z = T1L + T1E; + T5J = T4B + T4C; + T5K = T4E + T4F; + T5L = T5J + T5K; + T67 = T5K - T5J; + } + { + E T24, T25, T4D, T4G; + T24 = FNMS(KP382683432, T1K, KP923879532 * T1H); + T25 = FMA(KP923879532, T1A, KP382683432 * T1D); + T26 = T24 + T25; + T3C = T25 - T24; + T4D = T4B - T4C; + T4G = T4E - T4F; + T4H = KP707106781 * (T4D + T4G); + T4M = KP707106781 * (T4G - T4D); + } + } + { + E T2m, T4S, T2w, T4W, T2p, T4T, T2t, T4V; + { + E T2k, T2l, T2u, T2v; + T2k = R1[WS(rs, 1)]; + T2l = R1[WS(rs, 17)]; + T2m = T2k - T2l; + T4S = T2k + T2l; + T2u = R1[WS(rs, 5)]; + T2v = R1[WS(rs, 21)]; + T2w = T2u - T2v; + T4W = T2u + T2v; + } + { + E T2n, T2o, T2r, T2s; + T2n = R1[WS(rs, 9)]; + T2o = R1[WS(rs, 25)]; + T2p = T2n - T2o; + T4T = T2n + T2o; + T2r = R1[WS(rs, 29)]; + T2s = R1[WS(rs, 13)]; + T2t = T2r - T2s; + T4V = T2r + T2s; + } + { + E T2q, T2x, T5Q, T5R; + T2q = FNMS(KP382683432, T2p, KP923879532 * T2m); + T2x = FMA(KP923879532, T2t, KP382683432 * T2w); + T2y = T2q + T2x; + T3J = T2x - T2q; + T5Q = T4S + T4T; + T5R = T4V + T4W; + T5S = T5Q + T5R; + T6a = T5R - T5Q; + } + { + E T2A, T2B, T4U, T4X; + T2A = FNMS(KP923879532, T2w, KP382683432 * T2t); + T2B = FMA(KP382683432, T2m, KP923879532 * T2p); + T2C = T2A - T2B; + T3G = T2B + T2A; + T4U = T4S - T4T; + T4X = T4V - T4W; + T4Y = KP707106781 * (T4U + T4X); + T53 = KP707106781 * (T4X - T4U); + } + } + { + E Tv, T10, T5X, T5Y, T5Z, T60; + Tv = Tf + Tu; + T10 = TK + TZ; + T5X = Tv + T10; + T5Y = T5I + T5L; + T5Z = T5P + T5S; + T60 = T5Y + T5Z; + Cr[WS(csr, 16)] = Tv - T10; + Ci[WS(csi, 16)] = T5Z - T5Y; + Cr[WS(csr, 32)] = T5X - T60; + Cr[0] = T5X + T60; + } + { + E T5F, T5V, T5U, T5W, T5M, T5T; + T5F = Tf - Tu; + T5V = TZ - TK; + T5M = T5I - T5L; + T5T = T5P - T5S; + T5U = KP707106781 * (T5M + T5T); + T5W = KP707106781 * (T5T - T5M); + Cr[WS(csr, 24)] = T5F - T5U; + Ci[WS(csi, 24)] = T5W - T5V; + Cr[WS(csr, 8)] = T5F + T5U; + Ci[WS(csi, 8)] = T5V + T5W; + } + { + E T65, T6l, T6k, T6m, T6c, T6g, T6f, T6h; + { + E T61, T64, T6i, T6j; + T61 = T7 - Te; + T64 = KP707106781 * (T62 + T63); + T65 = T61 + T64; + T6l = T61 - T64; + T6i = FNMS(KP382683432, T66, KP923879532 * T67); + T6j = FMA(KP382683432, T69, KP923879532 * T6a); + T6k = T6i + T6j; + T6m = T6j - T6i; + } + { + E T68, T6b, T6d, T6e; + T68 = FMA(KP923879532, T66, KP382683432 * T67); + T6b = FNMS(KP382683432, T6a, KP923879532 * T69); + T6c = T68 + T6b; + T6g = T6b - T68; + T6d = KP707106781 * (T63 - T62); + T6e = Tt - Tm; + T6f = T6d - T6e; + T6h = T6e + T6d; + } + Cr[WS(csr, 28)] = T65 - T6c; + Ci[WS(csi, 28)] = T6k - T6h; + Cr[WS(csr, 4)] = T65 + T6c; + Ci[WS(csi, 4)] = T6h + T6k; + Ci[WS(csi, 12)] = T6f + T6g; + Cr[WS(csr, 12)] = T6l + T6m; + Ci[WS(csi, 20)] = T6g - T6f; + Cr[WS(csr, 20)] = T6l - T6m; + } + { + E T5n, T5D, T5x, T5z, T5q, T5A, T5t, T5B; + { + E T5l, T5m, T5v, T5w; + T5l = T4l - T4o; + T5m = T58 - T57; + T5n = T5l + T5m; + T5D = T5l - T5m; + T5v = T4v - T4s; + T5w = T5b - T5a; + T5x = T5v - T5w; + T5z = T5w + T5v; + } + { + E T5o, T5p, T5r, T5s; + T5o = T4A - T4H; + T5p = T4M - T4L; + T5q = FMA(KP831469612, T5o, KP555570233 * T5p); + T5A = FNMS(KP555570233, T5o, KP831469612 * T5p); + T5r = T4R - T4Y; + T5s = T53 - T52; + T5t = FNMS(KP555570233, T5s, KP831469612 * T5r); + T5B = FMA(KP555570233, T5r, KP831469612 * T5s); + } + { + E T5u, T5C, T5y, T5E; + T5u = T5q + T5t; + Cr[WS(csr, 26)] = T5n - T5u; + Cr[WS(csr, 6)] = T5n + T5u; + T5C = T5A + T5B; + Ci[WS(csi, 6)] = T5z + T5C; + Ci[WS(csi, 26)] = T5C - T5z; + T5y = T5t - T5q; + Ci[WS(csi, 10)] = T5x + T5y; + Ci[WS(csi, 22)] = T5y - T5x; + T5E = T5B - T5A; + Cr[WS(csr, 22)] = T5D - T5E; + Cr[WS(csr, 10)] = T5D + T5E; + } + } + { + E T4x, T5j, T5d, T5f, T4O, T5g, T55, T5h; + { + E T4p, T4w, T59, T5c; + T4p = T4l + T4o; + T4w = T4s + T4v; + T4x = T4p + T4w; + T5j = T4p - T4w; + T59 = T57 + T58; + T5c = T5a + T5b; + T5d = T59 - T5c; + T5f = T5c + T59; + } + { + E T4I, T4N, T4Z, T54; + T4I = T4A + T4H; + T4N = T4L + T4M; + T4O = FMA(KP980785280, T4I, KP195090322 * T4N); + T5g = FNMS(KP195090322, T4I, KP980785280 * T4N); + T4Z = T4R + T4Y; + T54 = T52 + T53; + T55 = FNMS(KP195090322, T54, KP980785280 * T4Z); + T5h = FMA(KP195090322, T4Z, KP980785280 * T54); + } + { + E T56, T5i, T5e, T5k; + T56 = T4O + T55; + Cr[WS(csr, 30)] = T4x - T56; + Cr[WS(csr, 2)] = T4x + T56; + T5i = T5g + T5h; + Ci[WS(csi, 2)] = T5f + T5i; + Ci[WS(csi, 30)] = T5i - T5f; + T5e = T55 - T4O; + Ci[WS(csi, 14)] = T5d + T5e; + Ci[WS(csi, 18)] = T5e - T5d; + T5k = T5h - T5g; + Cr[WS(csr, 18)] = T5j - T5k; + Cr[WS(csr, 14)] = T5j + T5k; + } + } + { + E T3p, T41, T4c, T3S, T3w, T4b, T49, T4h, T3P, T42, T3E, T3W, T46, T4g, T3L; + E T3X; + { + E T3s, T3v, T3A, T3D; + T3p = T3n + T3o; + T41 = T3n - T3o; + T4c = T3R - T3Q; + T3S = T3Q + T3R; + T3s = FMA(KP831469612, T3q, KP555570233 * T3r); + T3v = FNMS(KP555570233, T3u, KP831469612 * T3t); + T3w = T3s + T3v; + T4b = T3v - T3s; + { + E T47, T48, T3N, T3O; + T47 = T3F - T3G; + T48 = T3J - T3I; + T49 = FNMS(KP471396736, T48, KP881921264 * T47); + T4h = FMA(KP471396736, T47, KP881921264 * T48); + T3N = FNMS(KP555570233, T3q, KP831469612 * T3r); + T3O = FMA(KP555570233, T3t, KP831469612 * T3u); + T3P = T3N + T3O; + T42 = T3O - T3N; + } + T3A = T3y + T3z; + T3D = T3B + T3C; + T3E = FMA(KP956940335, T3A, KP290284677 * T3D); + T3W = FNMS(KP290284677, T3A, KP956940335 * T3D); + { + E T44, T45, T3H, T3K; + T44 = T3y - T3z; + T45 = T3C - T3B; + T46 = FMA(KP881921264, T44, KP471396736 * T45); + T4g = FNMS(KP471396736, T44, KP881921264 * T45); + T3H = T3F + T3G; + T3K = T3I + T3J; + T3L = FNMS(KP290284677, T3K, KP956940335 * T3H); + T3X = FMA(KP290284677, T3H, KP956940335 * T3K); + } + } + { + E T3x, T3M, T3V, T3Y; + T3x = T3p + T3w; + T3M = T3E + T3L; + Cr[WS(csr, 29)] = T3x - T3M; + Cr[WS(csr, 3)] = T3x + T3M; + T3V = T3S + T3P; + T3Y = T3W + T3X; + Ci[WS(csi, 3)] = T3V + T3Y; + Ci[WS(csi, 29)] = T3Y - T3V; + } + { + E T3T, T3U, T3Z, T40; + T3T = T3P - T3S; + T3U = T3L - T3E; + Ci[WS(csi, 13)] = T3T + T3U; + Ci[WS(csi, 19)] = T3U - T3T; + T3Z = T3p - T3w; + T40 = T3X - T3W; + Cr[WS(csr, 19)] = T3Z - T40; + Cr[WS(csr, 13)] = T3Z + T40; + } + { + E T43, T4a, T4f, T4i; + T43 = T41 + T42; + T4a = T46 + T49; + Cr[WS(csr, 27)] = T43 - T4a; + Cr[WS(csr, 5)] = T43 + T4a; + T4f = T4c + T4b; + T4i = T4g + T4h; + Ci[WS(csi, 5)] = T4f + T4i; + Ci[WS(csi, 27)] = T4i - T4f; + } + { + E T4d, T4e, T4j, T4k; + T4d = T4b - T4c; + T4e = T49 - T46; + Ci[WS(csi, 11)] = T4d + T4e; + Ci[WS(csi, 21)] = T4e - T4d; + T4j = T41 - T42; + T4k = T4h - T4g; + Cr[WS(csr, 21)] = T4j - T4k; + Cr[WS(csr, 11)] = T4j + T4k; + } + } + { + E T1d, T33, T3e, T2U, T1w, T3d, T3b, T3j, T2N, T34, T28, T2Y, T38, T3i, T2J; + E T2Z; + { + E T1m, T1v, T1Y, T27; + T1d = T15 - T1c; + T33 = T15 + T1c; + T3e = T2T + T2Q; + T2U = T2Q - T2T; + T1m = FMA(KP195090322, T1i, KP980785280 * T1l); + T1v = FNMS(KP195090322, T1u, KP980785280 * T1r); + T1w = T1m - T1v; + T3d = T1v + T1m; + { + E T39, T3a, T2L, T2M; + T39 = T2j + T2y; + T3a = T2H + T2C; + T3b = FNMS(KP098017140, T3a, KP995184726 * T39); + T3j = FMA(KP995184726, T3a, KP098017140 * T39); + T2L = FNMS(KP195090322, T1l, KP980785280 * T1i); + T2M = FMA(KP980785280, T1u, KP195090322 * T1r); + T2N = T2L - T2M; + T34 = T2M + T2L; + } + T1Y = T1M - T1X; + T27 = T23 - T26; + T28 = FMA(KP634393284, T1Y, KP773010453 * T27); + T2Y = FNMS(KP634393284, T27, KP773010453 * T1Y); + { + E T36, T37, T2z, T2I; + T36 = T1X + T1M; + T37 = T23 + T26; + T38 = FMA(KP098017140, T36, KP995184726 * T37); + T3i = FNMS(KP098017140, T37, KP995184726 * T36); + T2z = T2j - T2y; + T2I = T2C - T2H; + T2J = FNMS(KP634393284, T2I, KP773010453 * T2z); + T2Z = FMA(KP773010453, T2I, KP634393284 * T2z); + } + } + { + E T1x, T2K, T2X, T30; + T1x = T1d + T1w; + T2K = T28 + T2J; + Cr[WS(csr, 25)] = T1x - T2K; + Cr[WS(csr, 7)] = T1x + T2K; + T2X = T2U + T2N; + T30 = T2Y + T2Z; + Ci[WS(csi, 7)] = T2X + T30; + Ci[WS(csi, 25)] = T30 - T2X; + } + { + E T2V, T2W, T31, T32; + T2V = T2N - T2U; + T2W = T2J - T28; + Ci[WS(csi, 9)] = T2V + T2W; + Ci[WS(csi, 23)] = T2W - T2V; + T31 = T1d - T1w; + T32 = T2Z - T2Y; + Cr[WS(csr, 23)] = T31 - T32; + Cr[WS(csr, 9)] = T31 + T32; + } + { + E T35, T3c, T3h, T3k; + T35 = T33 + T34; + T3c = T38 + T3b; + Cr[WS(csr, 31)] = T35 - T3c; + Cr[WS(csr, 1)] = T35 + T3c; + T3h = T3e + T3d; + T3k = T3i + T3j; + Ci[WS(csi, 1)] = T3h + T3k; + Ci[WS(csi, 31)] = T3k - T3h; + } + { + E T3f, T3g, T3l, T3m; + T3f = T3d - T3e; + T3g = T3b - T38; + Ci[WS(csi, 15)] = T3f + T3g; + Ci[WS(csi, 17)] = T3g - T3f; + T3l = T33 - T34; + T3m = T3j - T3i; + Cr[WS(csr, 17)] = T3l - T3m; + Cr[WS(csr, 15)] = T3l + T3m; + } + } + } + } +} + +static const kr2c_desc desc = { 64, "r2cf_64", {342, 72, 52, 0}, &GENUS }; + +void X(codelet_r2cf_64) (planner *p) { + X(kr2c_register) (p, r2cf_64, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_7.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_7.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:07 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 7 -name r2cf_7 -include r2cf.h */ + +/* + * This function contains 24 FP additions, 18 FP multiplications, + * (or, 9 additions, 3 multiplications, 15 fused multiply/add), + * 25 stack variables, 6 constants, and 14 memory accesses + */ +#include "r2cf.h" + +static void r2cf_7(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP900968867, +0.900968867902419126236102319507445051165919162); + DK(KP801937735, +0.801937735804838252472204639014890102331838324); + DK(KP974927912, +0.974927912181823607018131682993931217232785801); + DK(KP692021471, +0.692021471630095869627814897002069140197260599); + DK(KP554958132, +0.554958132087371191422194871006410481067288862); + DK(KP356895867, +0.356895867892209443894399510021300583399127187); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(28, rs), MAKE_VOLATILE_STRIDE(28, csr), MAKE_VOLATILE_STRIDE(28, csi)) { + E T1, Tg, Tc; + { + E Th, T4, Ti, Ta, Tj, T7, Td, T5, T6, Tl, Tk; + T1 = R0[0]; + { + E T2, T3, T8, T9; + T2 = R1[0]; + T3 = R0[WS(rs, 3)]; + T8 = R1[WS(rs, 1)]; + T9 = R0[WS(rs, 2)]; + T5 = R0[WS(rs, 1)]; + Th = T3 - T2; + T4 = T2 + T3; + T6 = R1[WS(rs, 2)]; + Ti = T9 - T8; + Ta = T8 + T9; + } + Tj = T6 - T5; + T7 = T5 + T6; + Td = FNMS(KP356895867, T4, Ta); + Tl = FMA(KP554958132, Ti, Th); + Tk = FMA(KP554958132, Tj, Ti); + { + E Tm, Tf, Tb, Te; + Tm = FNMS(KP554958132, Th, Tj); + Cr[0] = T1 + T4 + T7 + Ta; + Tf = FNMS(KP356895867, T7, T4); + Tb = FNMS(KP356895867, Ta, T7); + Te = FNMS(KP692021471, Td, T7); + Ci[WS(csi, 2)] = KP974927912 * (FNMS(KP801937735, Tk, Th)); + Ci[WS(csi, 3)] = KP974927912 * (FNMS(KP801937735, Tm, Ti)); + Tg = FNMS(KP692021471, Tf, Ta); + Tc = FNMS(KP692021471, Tb, T4); + Cr[WS(csr, 2)] = FNMS(KP900968867, Te, T1); + Ci[WS(csi, 1)] = KP974927912 * (FMA(KP801937735, Tl, Tj)); + } + } + Cr[WS(csr, 1)] = FNMS(KP900968867, Tg, T1); + Cr[WS(csr, 3)] = FNMS(KP900968867, Tc, T1); + } + } +} + +static const kr2c_desc desc = { 7, "r2cf_7", {9, 3, 15, 0}, &GENUS }; + +void X(codelet_r2cf_7) (planner *p) { + X(kr2c_register) (p, r2cf_7, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 7 -name r2cf_7 -include r2cf.h */ + +/* + * This function contains 24 FP additions, 18 FP multiplications, + * (or, 12 additions, 6 multiplications, 12 fused multiply/add), + * 20 stack variables, 6 constants, and 14 memory accesses + */ +#include "r2cf.h" + +static void r2cf_7(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP222520933, +0.222520933956314404288902564496794759466355569); + DK(KP900968867, +0.900968867902419126236102319507445051165919162); + DK(KP623489801, +0.623489801858733530525004884004239810632274731); + DK(KP433883739, +0.433883739117558120475768332848358754609990728); + DK(KP781831482, +0.781831482468029808708444526674057750232334519); + DK(KP974927912, +0.974927912181823607018131682993931217232785801); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(28, rs), MAKE_VOLATILE_STRIDE(28, csr), MAKE_VOLATILE_STRIDE(28, csi)) { + E T1, Ta, Tb, T4, Td, T7, Tc, T8, T9; + T1 = R0[0]; + T8 = R1[0]; + T9 = R0[WS(rs, 3)]; + Ta = T8 + T9; + Tb = T9 - T8; + { + E T2, T3, T5, T6; + T2 = R0[WS(rs, 1)]; + T3 = R1[WS(rs, 2)]; + T4 = T2 + T3; + Td = T3 - T2; + T5 = R1[WS(rs, 1)]; + T6 = R0[WS(rs, 2)]; + T7 = T5 + T6; + Tc = T6 - T5; + } + Ci[WS(csi, 2)] = FNMS(KP781831482, Tc, KP974927912 * Tb) - (KP433883739 * Td); + Ci[WS(csi, 1)] = FMA(KP781831482, Tb, KP974927912 * Td) + (KP433883739 * Tc); + Cr[WS(csr, 2)] = FMA(KP623489801, T7, T1) + FNMA(KP900968867, T4, KP222520933 * Ta); + Ci[WS(csi, 3)] = FMA(KP433883739, Tb, KP974927912 * Tc) - (KP781831482 * Td); + Cr[WS(csr, 3)] = FMA(KP623489801, T4, T1) + FNMA(KP222520933, T7, KP900968867 * Ta); + Cr[WS(csr, 1)] = FMA(KP623489801, Ta, T1) + FNMA(KP900968867, T7, KP222520933 * T4); + Cr[0] = T1 + Ta + T4 + T7; + } + } +} + +static const kr2c_desc desc = { 7, "r2cf_7", {12, 6, 12, 0}, &GENUS }; + +void X(codelet_r2cf_7) (planner *p) { + X(kr2c_register) (p, r2cf_7, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:07 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 8 -name r2cf_8 -include r2cf.h */ + +/* + * This function contains 20 FP additions, 4 FP multiplications, + * (or, 16 additions, 0 multiplications, 4 fused multiply/add), + * 18 stack variables, 1 constants, and 16 memory accesses + */ +#include "r2cf.h" + +static void r2cf_8(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(32, rs), MAKE_VOLATILE_STRIDE(32, csr), MAKE_VOLATILE_STRIDE(32, csi)) { + E T4, T7, T3, Tj, Td, T5, T8, T9; + { + E T1, T2, Tb, Tc; + T1 = R0[0]; + T2 = R0[WS(rs, 2)]; + Tb = R1[WS(rs, 3)]; + Tc = R1[WS(rs, 1)]; + T4 = R0[WS(rs, 1)]; + T7 = T1 - T2; + T3 = T1 + T2; + Tj = Tb + Tc; + Td = Tb - Tc; + T5 = R0[WS(rs, 3)]; + T8 = R1[0]; + T9 = R1[WS(rs, 2)]; + } + { + E T6, Tf, Ta, Ti; + T6 = T4 + T5; + Tf = T4 - T5; + Ta = T8 - T9; + Ti = T8 + T9; + { + E Th, Tk, Te, Tg; + Th = T3 + T6; + Cr[WS(csr, 2)] = T3 - T6; + Tk = Ti + Tj; + Ci[WS(csi, 2)] = Tj - Ti; + Te = Ta + Td; + Tg = Td - Ta; + Cr[0] = Th + Tk; + Cr[WS(csr, 4)] = Th - Tk; + Ci[WS(csi, 3)] = FMA(KP707106781, Tg, Tf); + Ci[WS(csi, 1)] = FMS(KP707106781, Tg, Tf); + Cr[WS(csr, 1)] = FMA(KP707106781, Te, T7); + Cr[WS(csr, 3)] = FNMS(KP707106781, Te, T7); + } + } + } + } +} + +static const kr2c_desc desc = { 8, "r2cf_8", {16, 0, 4, 0}, &GENUS }; + +void X(codelet_r2cf_8) (planner *p) { + X(kr2c_register) (p, r2cf_8, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 8 -name r2cf_8 -include r2cf.h */ + +/* + * This function contains 20 FP additions, 2 FP multiplications, + * (or, 20 additions, 2 multiplications, 0 fused multiply/add), + * 14 stack variables, 1 constants, and 16 memory accesses + */ +#include "r2cf.h" + +static void r2cf_8(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(32, rs), MAKE_VOLATILE_STRIDE(32, csr), MAKE_VOLATILE_STRIDE(32, csi)) { + E T3, T7, Td, Tj, T6, Tg, Ta, Ti; + { + E T1, T2, Tb, Tc; + T1 = R0[0]; + T2 = R0[WS(rs, 2)]; + T3 = T1 + T2; + T7 = T1 - T2; + Tb = R1[WS(rs, 3)]; + Tc = R1[WS(rs, 1)]; + Td = Tb - Tc; + Tj = Tb + Tc; + } + { + E T4, T5, T8, T9; + T4 = R0[WS(rs, 1)]; + T5 = R0[WS(rs, 3)]; + T6 = T4 + T5; + Tg = T4 - T5; + T8 = R1[0]; + T9 = R1[WS(rs, 2)]; + Ta = T8 - T9; + Ti = T8 + T9; + } + Cr[WS(csr, 2)] = T3 - T6; + Ci[WS(csi, 2)] = Tj - Ti; + { + E Te, Tf, Th, Tk; + Te = KP707106781 * (Ta + Td); + Cr[WS(csr, 3)] = T7 - Te; + Cr[WS(csr, 1)] = T7 + Te; + Tf = KP707106781 * (Td - Ta); + Ci[WS(csi, 1)] = Tf - Tg; + Ci[WS(csi, 3)] = Tg + Tf; + Th = T3 + T6; + Tk = Ti + Tj; + Cr[WS(csr, 4)] = Th - Tk; + Cr[0] = Th + Tk; + } + } + } +} + +static const kr2c_desc desc = { 8, "r2cf_8", {20, 2, 0, 0}, &GENUS }; + +void X(codelet_r2cf_8) (planner *p) { + X(kr2c_register) (p, r2cf_8, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_9.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cf/r2cf_9.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:49:07 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 9 -name r2cf_9 -include r2cf.h */ + +/* + * This function contains 38 FP additions, 30 FP multiplications, + * (or, 12 additions, 4 multiplications, 26 fused multiply/add), + * 57 stack variables, 18 constants, and 18 memory accesses + */ +#include "r2cf.h" + +static void r2cf_9(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP907603734, +0.907603734547952313649323976213898122064543220); + DK(KP852868531, +0.852868531952443209628250963940074071936020296); + DK(KP347296355, +0.347296355333860697703433253538629592000751354); + DK(KP666666666, +0.666666666666666666666666666666666666666666667); + DK(KP879385241, +0.879385241571816768108218554649462939872416269); + DK(KP984807753, +0.984807753012208059366743024589523013670643252); + DK(KP673648177, +0.673648177666930348851716626769314796000375677); + DK(KP898197570, +0.898197570222573798468955502359086394667167570); + DK(KP939692620, +0.939692620785908384054109277324731469936208134); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP203604859, +0.203604859554852403062088995281827210665664861); + DK(KP152703644, +0.152703644666139302296566746461370407999248646); + DK(KP394930843, +0.394930843634698457567117349190734585290304520); + DK(KP968908795, +0.968908795874236621082202410917456709164223497); + DK(KP726681596, +0.726681596905677465811651808188092531873167623); + DK(KP586256827, +0.586256827714544512072145703099641959914944179); + DK(KP184792530, +0.184792530904095372701352047572203755870913560); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(36, rs), MAKE_VOLATILE_STRIDE(36, csr), MAKE_VOLATILE_STRIDE(36, csi)) { + E Tp, Tz, Tw, Ts, TA; + { + E T1, T6, Tb, T7, T4, To, T8, Tc, Td, T2, T3; + T1 = R0[0]; + T2 = R1[WS(rs, 1)]; + T3 = R0[WS(rs, 3)]; + T6 = R1[0]; + Tb = R0[WS(rs, 1)]; + T7 = R0[WS(rs, 2)]; + T4 = T2 + T3; + To = T3 - T2; + T8 = R1[WS(rs, 3)]; + Tc = R1[WS(rs, 2)]; + Td = R0[WS(rs, 4)]; + { + E T5, T9, Tk, Te, Ti; + T5 = T1 + T4; + Tp = FNMS(KP500000000, T4, T1); + T9 = T7 + T8; + Tk = T7 - T8; + Te = Tc + Td; + Ti = Td - Tc; + { + E Tl, Ta, Tu, Tf, Th; + Tl = FMS(KP500000000, T9, T6); + Ta = T6 + T9; + Tu = FMA(KP184792530, Tk, Ti); + Tf = Tb + Te; + Th = FNMS(KP500000000, Te, Tb); + { + E Tq, Ty, Tm, Tt; + Tq = FMA(KP586256827, Tl, Ti); + Ty = FMA(KP726681596, Tk, Tl); + Tm = FNMS(KP968908795, Tl, Tk); + Tt = FMA(KP394930843, Th, To); + { + E Tj, Tx, Tg, Tv; + Tj = FNMS(KP152703644, Ti, Th); + Tx = FMA(KP203604859, Th, Ti); + Tg = Ta + Tf; + Ci[WS(csi, 3)] = KP866025403 * (Tf - Ta); + Tv = FNMS(KP939692620, Tu, Tt); + { + E TB, Tn, TC, Tr; + TB = FMA(KP898197570, Ty, Tx); + Tz = FNMS(KP898197570, Ty, Tx); + Tw = FNMS(KP673648177, Tm, Tj); + Tn = FMA(KP673648177, Tm, Tj); + Cr[0] = T5 + Tg; + Cr[WS(csr, 3)] = FNMS(KP500000000, Tg, T5); + Ci[WS(csi, 2)] = KP984807753 * (FNMS(KP879385241, Tv, Tl)); + Ci[WS(csi, 1)] = -(KP984807753 * (FNMS(KP879385241, To, Tn))); + TC = FMA(KP666666666, Tn, TB); + Tr = FNMS(KP347296355, Tq, Tk); + Ci[WS(csi, 4)] = KP866025403 * (FMA(KP852868531, TC, To)); + Ts = FNMS(KP907603734, Tr, Th); + } + } + } + } + } + } + Cr[WS(csr, 1)] = FMA(KP852868531, Tz, Tp); + TA = FNMS(KP500000000, Tz, Tw); + Cr[WS(csr, 2)] = FNMS(KP939692620, Ts, Tp); + Cr[WS(csr, 4)] = FMA(KP852868531, TA, Tp); + } + } +} + +static const kr2c_desc desc = { 9, "r2cf_9", {12, 4, 26, 0}, &GENUS }; + +void X(codelet_r2cf_9) (planner *p) { + X(kr2c_register) (p, r2cf_9, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 9 -name r2cf_9 -include r2cf.h */ + +/* + * This function contains 38 FP additions, 26 FP multiplications, + * (or, 21 additions, 9 multiplications, 17 fused multiply/add), + * 36 stack variables, 14 constants, and 18 memory accesses + */ +#include "r2cf.h" + +static void r2cf_9(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) +{ + DK(KP939692620, +0.939692620785908384054109277324731469936208134); + DK(KP296198132, +0.296198132726023843175338011893050938967728390); + DK(KP342020143, +0.342020143325668733044099614682259580763083368); + DK(KP813797681, +0.813797681349373692844693217248393223289101568); + DK(KP984807753, +0.984807753012208059366743024589523013670643252); + DK(KP150383733, +0.150383733180435296639271897612501926072238258); + DK(KP642787609, +0.642787609686539326322643409907263432907559884); + DK(KP663413948, +0.663413948168938396205421319635891297216863310); + DK(KP852868531, +0.852868531952443209628250963940074071936020296); + DK(KP173648177, +0.173648177666930348851716626769314796000375677); + DK(KP556670399, +0.556670399226419366452912952047023132968291906); + DK(KP766044443, +0.766044443118978035202392650555416673935832457); + DK(KP866025403, +0.866025403784438646763723170752936183471402627); + DK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT i; + for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(36, rs), MAKE_VOLATILE_STRIDE(36, csr), MAKE_VOLATILE_STRIDE(36, csi)) { + E T1, T4, Tr, Ta, Tl, Ti, Tf, Tk, Tj, T2, T3, T5, Tg; + T1 = R0[0]; + T2 = R1[WS(rs, 1)]; + T3 = R0[WS(rs, 3)]; + T4 = T2 + T3; + Tr = T3 - T2; + { + E T6, T7, T8, T9; + T6 = R1[0]; + T7 = R0[WS(rs, 2)]; + T8 = R1[WS(rs, 3)]; + T9 = T7 + T8; + Ta = T6 + T9; + Tl = T8 - T7; + Ti = FNMS(KP500000000, T9, T6); + } + { + E Tb, Tc, Td, Te; + Tb = R0[WS(rs, 1)]; + Tc = R1[WS(rs, 2)]; + Td = R0[WS(rs, 4)]; + Te = Tc + Td; + Tf = Tb + Te; + Tk = FNMS(KP500000000, Te, Tb); + Tj = Td - Tc; + } + Ci[WS(csi, 3)] = KP866025403 * (Tf - Ta); + T5 = T1 + T4; + Tg = Ta + Tf; + Cr[WS(csr, 3)] = FNMS(KP500000000, Tg, T5); + Cr[0] = T5 + Tg; + { + E Tt, Th, Tm, Tn, To, Tp, Tq, Ts; + Tt = KP866025403 * Tr; + Th = FNMS(KP500000000, T4, T1); + Tm = FMA(KP766044443, Ti, KP556670399 * Tl); + Tn = FMA(KP173648177, Tk, KP852868531 * Tj); + To = Tm + Tn; + Tp = FNMS(KP642787609, Ti, KP663413948 * Tl); + Tq = FNMS(KP984807753, Tk, KP150383733 * Tj); + Ts = Tp + Tq; + Cr[WS(csr, 1)] = Th + To; + Ci[WS(csi, 1)] = Tt + Ts; + Cr[WS(csr, 4)] = FMA(KP866025403, Tp - Tq, Th) - (KP500000000 * To); + Ci[WS(csi, 4)] = FNMS(KP500000000, Ts, KP866025403 * (Tr + (Tn - Tm))); + Ci[WS(csi, 2)] = FNMS(KP342020143, Tk, KP813797681 * Tj) + FNMA(KP150383733, Tl, KP984807753 * Ti) - Tt; + Cr[WS(csr, 2)] = FMA(KP173648177, Ti, Th) + FNMA(KP296198132, Tj, KP939692620 * Tk) - (KP852868531 * Tl); + } + } + } +} + +static const kr2c_desc desc = { 9, "r2cf_9", {21, 9, 17, 0}, &GENUS }; + +void X(codelet_r2cf_9) (planner *p) { + X(kr2c_register) (p, r2cf_9, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2cfII.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2cfII.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#define GENUS X(rdft_r2cfII_genus) +extern const kr2c_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2r.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2r.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "codelet-rdft.h" +#include "r2r.h" + +const kr2r_genus GENUS = { 1 }; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2r.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2r.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#define GENUS X(rdft_r2r_genus) +extern const kr2r_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2r/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2r/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,90 @@ +# This Makefile.am specifies a set of codelets, efficient transforms +# of small sizes, that are used as building blocks (kernels) by FFTW +# to build up large transforms, as well as the options for generating +# and compiling them. + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +########################################################################### +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/rdft \ +-I$(top_srcdir)/rdft/scalar +noinst_LTLIBRARIES = librdft_scalar_r2r.la + +########################################################################### +# The following lines specify the REDFT/RODFT/DHT sizes for which to generate +# specialized codelets. Currently, only REDFT01/10 of size 8 (used in JPEG). + +# e_ is a hard-coded REDFT FFT (DCT) of size +E00 = # e00_2.c e00_3.c e00_4.c e00_5.c e00_6.c e00_7.c e00_8.c +E01 = e01_8.c # e01_2.c e01_3.c e01_4.c e01_5.c e01_6.c e01_7.c +E10 = e10_8.c # e10_2.c e10_3.c e10_4.c e10_5.c e10_6.c e10_7.c +E11 = # e11_2.c e11_3.c e11_4.c e11_5.c e11_6.c e11_7.c e11_8.c + +# o_ is a hard-coded RODFT FFT (DST) of size +O00 = # o00_2.c o00_3.c o00_4.c o00_5.c o00_6.c o00_7.c o00_8.c +O01 = # o01_2.c o01_3.c o01_4.c o01_5.c o01_6.c o01_7.c o01_8.c +O10 = # o10_2.c o10_3.c o10_4.c o10_5.c o10_6.c o10_7.c o10_8.c +O11 = # o11_2.c o11_3.c o11_4.c o11_5.c o11_6.c o11_7.c o11_8.c + +# dht_ is a hard-coded DHT of size +DHT = # dht_2.c dht_3.c dht_4.c dht_5.c dht_6.c dht_7.c dht_8.c + +########################################################################### +ALL_CODELETS = $(E00) $(E01) $(E10) $(E11) $(O00) $(O01) $(O10) $(O11) $(DHT) + +BUILT_SOURCES= $(ALL_CODELETS) $(CODLIST) + +librdft_scalar_r2r_la_SOURCES = $(BUILT_SOURCES) + +SOLVTAB_NAME = X(solvtab_rdft_r2r) +XRENAME=X + +# special rules for regenerating codelets. +include $(top_srcdir)/support/Makefile.codelets + +if MAINTAINER_MODE +FLAGS_E00=$(RDFT_FLAGS_COMMON) +FLAGS_E01=$(RDFT_FLAGS_COMMON) +FLAGS_E10=$(RDFT_FLAGS_COMMON) +FLAGS_E11=$(RDFT_FLAGS_COMMON) +FLAGS_O00=$(RDFT_FLAGS_COMMON) +FLAGS_O01=$(RDFT_FLAGS_COMMON) +FLAGS_O10=$(RDFT_FLAGS_COMMON) +FLAGS_O11=$(RDFT_FLAGS_COMMON) +FLAGS_DHT=$(RDFT_FLAGS_COMMON) + +e00_%.c: $(CODELET_DEPS) $(GEN_R2R) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_E00) -redft00 -n $* -name e00_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + +e01_%.c: $(CODELET_DEPS) $(GEN_R2R) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_E01) -redft01 -n $* -name e01_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + +e10_%.c: $(CODELET_DEPS) $(GEN_R2R) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_E10) -redft10 -n $* -name e10_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + +e11_%.c: $(CODELET_DEPS) $(GEN_R2R) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_E11) -redft11 -n $* -name e11_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + + +o00_%.c: $(CODELET_DEPS) $(GEN_R2R) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_O00) -rodft00 -n $* -name o00_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + +o01_%.c: $(CODELET_DEPS) $(GEN_R2R) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_O01) -rodft01 -n $* -name o01_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + +o10_%.c: $(CODELET_DEPS) $(GEN_R2R) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_O10) -rodft10 -n $* -name o10_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + +o11_%.c: $(CODELET_DEPS) $(GEN_R2R) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_O11) -rodft11 -n $* -name o11_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + + +dht_%.c: $(CODELET_DEPS) $(GEN_R2R) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_DHT) -dht -sign 1 -n $* -name dht_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + +endif # MAINTAINER_MODE diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2r/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2r/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,773 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# This Makefile.am specifies a set of codelets, efficient transforms +# of small sizes, that are used as building blocks (kernels) by FFTW +# to build up large transforms, as well as the options for generating +# and compiling them. + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +# -*- makefile -*- +# This file contains special make rules to generate codelets. +# Most of this file requires GNU make . + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +DIST_COMMON = $(top_srcdir)/support/Makefile.codelets \ + $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp +subdir = rdft/scalar/r2r +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +librdft_scalar_r2r_la_LIBADD = +am__objects_1 = +am__objects_2 = e01_8.lo +am__objects_3 = e10_8.lo +am__objects_4 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \ + $(am__objects_1) $(am__objects_1) $(am__objects_1) \ + $(am__objects_1) $(am__objects_1) $(am__objects_1) +am__objects_5 = codlist.lo +am__objects_6 = $(am__objects_4) $(am__objects_5) +am_librdft_scalar_r2r_la_OBJECTS = $(am__objects_6) +librdft_scalar_r2r_la_OBJECTS = $(am_librdft_scalar_r2r_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(librdft_scalar_r2r_la_SOURCES) +DIST_SOURCES = $(librdft_scalar_r2r_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ + +########################################################################### +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/rdft \ +-I$(top_srcdir)/rdft/scalar + +noinst_LTLIBRARIES = librdft_scalar_r2r.la + +########################################################################### +# The following lines specify the REDFT/RODFT/DHT sizes for which to generate +# specialized codelets. Currently, only REDFT01/10 of size 8 (used in JPEG). + +# e_ is a hard-coded REDFT FFT (DCT) of size +E00 = # e00_2.c e00_3.c e00_4.c e00_5.c e00_6.c e00_7.c e00_8.c +E01 = e01_8.c # e01_2.c e01_3.c e01_4.c e01_5.c e01_6.c e01_7.c +E10 = e10_8.c # e10_2.c e10_3.c e10_4.c e10_5.c e10_6.c e10_7.c +E11 = # e11_2.c e11_3.c e11_4.c e11_5.c e11_6.c e11_7.c e11_8.c + +# o_ is a hard-coded RODFT FFT (DST) of size +O00 = # o00_2.c o00_3.c o00_4.c o00_5.c o00_6.c o00_7.c o00_8.c +O01 = # o01_2.c o01_3.c o01_4.c o01_5.c o01_6.c o01_7.c o01_8.c +O10 = # o10_2.c o10_3.c o10_4.c o10_5.c o10_6.c o10_7.c o10_8.c +O11 = # o11_2.c o11_3.c o11_4.c o11_5.c o11_6.c o11_7.c o11_8.c + +# dht_ is a hard-coded DHT of size +DHT = # dht_2.c dht_3.c dht_4.c dht_5.c dht_6.c dht_7.c dht_8.c + +########################################################################### +ALL_CODELETS = $(E00) $(E01) $(E10) $(E11) $(O00) $(O01) $(O10) $(O11) $(DHT) +BUILT_SOURCES = $(ALL_CODELETS) $(CODLIST) +librdft_scalar_r2r_la_SOURCES = $(BUILT_SOURCES) +SOLVTAB_NAME = X(solvtab_rdft_r2r) +XRENAME = X +CODLIST = codlist.c +CODELET_NAME = codelet_ +@MAINTAINER_MODE_TRUE@INDENT = indent -kr -cs -i5 -l800 -fca -nfc1 -sc -sob -cli4 -TR -Tplanner -TV +@MAINTAINER_MODE_TRUE@TWOVERS = sh ${top_srcdir}/support/twovers.sh +@MAINTAINER_MODE_TRUE@GENFFTDIR = ${top_builddir}/genfft +@MAINTAINER_MODE_TRUE@GEN_NOTW = ${GENFFTDIR}/gen_notw.native +@MAINTAINER_MODE_TRUE@GEN_NOTW_C = ${GENFFTDIR}/gen_notw_c.native +@MAINTAINER_MODE_TRUE@GEN_TWIDDLE = ${GENFFTDIR}/gen_twiddle.native +@MAINTAINER_MODE_TRUE@GEN_TWIDDLE_C = ${GENFFTDIR}/gen_twiddle_c.native +@MAINTAINER_MODE_TRUE@GEN_TWIDSQ = ${GENFFTDIR}/gen_twidsq.native +@MAINTAINER_MODE_TRUE@GEN_TWIDSQ_C = ${GENFFTDIR}/gen_twidsq_c.native +@MAINTAINER_MODE_TRUE@GEN_R2CF = ${GENFFTDIR}/gen_r2cf.native +@MAINTAINER_MODE_TRUE@GEN_R2CB = ${GENFFTDIR}/gen_r2cb.native +@MAINTAINER_MODE_TRUE@GEN_HC2HC = ${GENFFTDIR}/gen_hc2hc.native +@MAINTAINER_MODE_TRUE@GEN_HC2C = ${GENFFTDIR}/gen_hc2c.native +@MAINTAINER_MODE_TRUE@GEN_HC2CDFT = ${GENFFTDIR}/gen_hc2cdft.native +@MAINTAINER_MODE_TRUE@GEN_HC2CDFT_C = ${GENFFTDIR}/gen_hc2cdft_c.native +@MAINTAINER_MODE_TRUE@GEN_R2R = ${GENFFTDIR}/gen_r2r.native +@MAINTAINER_MODE_TRUE@PRELUDE_DFT = ${top_srcdir}/support/codelet_prelude.dft +@MAINTAINER_MODE_TRUE@PRELUDE_RDFT = ${top_srcdir}/support/codelet_prelude.rdft +@MAINTAINER_MODE_TRUE@ADD_DATE = sed -e s/@DATE@/"`date`"/ +@MAINTAINER_MODE_TRUE@COPYRIGHT = ${top_srcdir}/COPYRIGHT +@MAINTAINER_MODE_TRUE@CODELET_DEPS = $(COPYRIGHT) $(PRELUDE) +@MAINTAINER_MODE_TRUE@PRELUDE_COMMANDS_DFT = cat $(COPYRIGHT) $(PRELUDE_DFT) +@MAINTAINER_MODE_TRUE@PRELUDE_COMMANDS_RDFT = cat $(COPYRIGHT) $(PRELUDE_RDFT) +@MAINTAINER_MODE_TRUE@FLAGS_COMMON = -compact -variables 4 +@MAINTAINER_MODE_TRUE@DFT_FLAGS_COMMON = $(FLAGS_COMMON) -pipeline-latency 4 +@MAINTAINER_MODE_TRUE@RDFT_FLAGS_COMMON = $(FLAGS_COMMON) -pipeline-latency 4 + +# special rules for regenerating codelets. +@MAINTAINER_MODE_TRUE@FLAGS_E00 = $(RDFT_FLAGS_COMMON) +@MAINTAINER_MODE_TRUE@FLAGS_E01 = $(RDFT_FLAGS_COMMON) +@MAINTAINER_MODE_TRUE@FLAGS_E10 = $(RDFT_FLAGS_COMMON) +@MAINTAINER_MODE_TRUE@FLAGS_E11 = $(RDFT_FLAGS_COMMON) +@MAINTAINER_MODE_TRUE@FLAGS_O00 = $(RDFT_FLAGS_COMMON) +@MAINTAINER_MODE_TRUE@FLAGS_O01 = $(RDFT_FLAGS_COMMON) +@MAINTAINER_MODE_TRUE@FLAGS_O10 = $(RDFT_FLAGS_COMMON) +@MAINTAINER_MODE_TRUE@FLAGS_O11 = $(RDFT_FLAGS_COMMON) +@MAINTAINER_MODE_TRUE@FLAGS_DHT = $(RDFT_FLAGS_COMMON) +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/support/Makefile.codelets $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu rdft/scalar/r2r/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu rdft/scalar/r2r/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(top_srcdir)/support/Makefile.codelets: + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +librdft_scalar_r2r.la: $(librdft_scalar_r2r_la_OBJECTS) $(librdft_scalar_r2r_la_DEPENDENCIES) $(EXTRA_librdft_scalar_r2r_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(librdft_scalar_r2r_la_OBJECTS) $(librdft_scalar_r2r_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/codlist.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/e01_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/e10_8.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic \ + maintainer-clean-local + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: all check install install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic maintainer-clean-local mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am + + +# rule to build codlist +$(CODLIST): Makefile + ( \ + echo "#include \"ifftw.h\""; \ + echo $(INCLUDE_SIMD_HEADER); \ + echo; \ + for i in $(ALL_CODELETS) NIL; do \ + if test "$$i" != NIL; then \ + j=`basename $$i | sed -e 's/[.][cS]$$//g'`; \ + echo "extern void $(XRENAME)($(CODELET_NAME)$$j)(planner *);"; \ + fi \ + done; \ + echo; \ + echo; \ + echo "extern const solvtab $(SOLVTAB_NAME);"; \ + echo "const solvtab $(SOLVTAB_NAME) = {"; \ + for i in $(ALL_CODELETS) NIL; do \ + if test "$$i" != NIL; then \ + j=`basename $$i | sed -e 's/[.][cS]$$//g'`; \ + echo " SOLVTAB($(XRENAME)($(CODELET_NAME)$$j)),"; \ + fi \ + done; \ + echo " SOLVTAB_END"; \ + echo "};"; \ + ) >$@ + +# only delete codlist.c in maintainer-mode, since it is included in the dist +# FIXME: is there a way to delete in 'make clean' only when builddir != srcdir? +maintainer-clean-local: + rm -f $(CODLIST) + +# cancel the hideous builtin rules that cause an infinite loop +@MAINTAINER_MODE_TRUE@%: %.o +@MAINTAINER_MODE_TRUE@%: %.s +@MAINTAINER_MODE_TRUE@%: %.c +@MAINTAINER_MODE_TRUE@%: %.S + +@MAINTAINER_MODE_TRUE@e00_%.c: $(CODELET_DEPS) $(GEN_R2R) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_E00) -redft00 -n $* -name e00_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@e01_%.c: $(CODELET_DEPS) $(GEN_R2R) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_E01) -redft01 -n $* -name e01_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@e10_%.c: $(CODELET_DEPS) $(GEN_R2R) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_E10) -redft10 -n $* -name e10_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@e11_%.c: $(CODELET_DEPS) $(GEN_R2R) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_E11) -redft11 -n $* -name e11_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@o00_%.c: $(CODELET_DEPS) $(GEN_R2R) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_O00) -rodft00 -n $* -name o00_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@o01_%.c: $(CODELET_DEPS) $(GEN_R2R) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_O01) -rodft01 -n $* -name o01_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@o10_%.c: $(CODELET_DEPS) $(GEN_R2R) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_O10) -rodft10 -n $* -name o10_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@o11_%.c: $(CODELET_DEPS) $(GEN_R2R) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_O11) -rodft11 -n $* -name o11_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@dht_%.c: $(CODELET_DEPS) $(GEN_R2R) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_R2R) $(FLAGS_DHT) -dht -sign 1 -n $* -name dht_$* -include "r2r.h") | $(ADD_DATE) | $(INDENT) >$@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2r/codlist.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2r/codlist.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,13 @@ +#include "ifftw.h" + + +extern void X(codelet_e01_8)(planner *); +extern void X(codelet_e10_8)(planner *); + + +extern const solvtab X(solvtab_rdft_r2r); +const solvtab X(solvtab_rdft_r2r) = { + SOLVTAB(X(codelet_e01_8)), + SOLVTAB(X(codelet_e10_8)), + SOLVTAB_END +}; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2r/e01_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2r/e01_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:48 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2r.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -redft01 -n 8 -name e01_8 -include r2r.h */ + +/* + * This function contains 26 FP additions, 24 FP multiplications, + * (or, 2 additions, 0 multiplications, 24 fused multiply/add), + * 27 stack variables, 8 constants, and 16 memory accesses + */ +#include "r2r.h" + +static void e01_8(const R *I, R *O, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); + DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + { + INT i; + for (i = v; i > 0; i = i - 1, I = I + ivs, O = O + ovs, MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) { + E T8, Td, Th, T7, Tp, Tl, Te, Tb; + { + E Tj, T3, Tk, T6, T9, Ta; + { + E T1, T2, T4, T5; + T1 = I[0]; + T2 = I[WS(is, 4)]; + T4 = I[WS(is, 2)]; + T5 = I[WS(is, 6)]; + T8 = I[WS(is, 1)]; + Tj = FNMS(KP1_414213562, T2, T1); + T3 = FMA(KP1_414213562, T2, T1); + Tk = FMS(KP414213562, T4, T5); + T6 = FMA(KP414213562, T5, T4); + Td = I[WS(is, 7)]; + T9 = I[WS(is, 5)]; + Ta = I[WS(is, 3)]; + } + Th = FNMS(KP1_847759065, T6, T3); + T7 = FMA(KP1_847759065, T6, T3); + Tp = FNMS(KP1_847759065, Tk, Tj); + Tl = FMA(KP1_847759065, Tk, Tj); + Te = Ta - T9; + Tb = T9 + Ta; + } + { + E Tn, Tf, Tc, Tm; + Tn = FNMS(KP707106781, Te, Td); + Tf = FMA(KP707106781, Te, Td); + Tc = FMA(KP707106781, Tb, T8); + Tm = FNMS(KP707106781, Tb, T8); + { + E Tq, To, Tg, Ti; + Tq = FMA(KP668178637, Tm, Tn); + To = FNMS(KP668178637, Tn, Tm); + Tg = FMA(KP198912367, Tf, Tc); + Ti = FNMS(KP198912367, Tc, Tf); + O[WS(os, 1)] = FMA(KP1_662939224, To, Tl); + O[WS(os, 6)] = FNMS(KP1_662939224, To, Tl); + O[WS(os, 2)] = FMA(KP1_662939224, Tq, Tp); + O[WS(os, 5)] = FNMS(KP1_662939224, Tq, Tp); + O[WS(os, 4)] = FMA(KP1_961570560, Ti, Th); + O[WS(os, 3)] = FNMS(KP1_961570560, Ti, Th); + O[0] = FMA(KP1_961570560, Tg, T7); + O[WS(os, 7)] = FNMS(KP1_961570560, Tg, T7); + } + } + } + } +} + +static const kr2r_desc desc = { 8, "e01_8", {2, 0, 24, 0}, &GENUS, REDFT01 }; + +void X(codelet_e01_8) (planner *p) { + X(kr2r_register) (p, e01_8, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2r.native -compact -variables 4 -pipeline-latency 4 -redft01 -n 8 -name e01_8 -include r2r.h */ + +/* + * This function contains 26 FP additions, 15 FP multiplications, + * (or, 20 additions, 9 multiplications, 6 fused multiply/add), + * 28 stack variables, 8 constants, and 16 memory accesses + */ +#include "r2r.h" + +static void e01_8(const R *I, R *O, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); + DK(KP1_111140466, +1.111140466039204449485661627897065748749874382); + DK(KP390180644, +0.390180644032256535696569736954044481855383236); + DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP765366864, +0.765366864730179543456919968060797733522689125); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + { + INT i; + for (i = v; i > 0; i = i - 1, I = I + ivs, O = O + ovs, MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) { + E T7, Tl, T4, Tk, Td, To, Tg, Tn; + { + E T5, T6, T1, T3, T2; + T5 = I[WS(is, 2)]; + T6 = I[WS(is, 6)]; + T7 = FMA(KP1_847759065, T5, KP765366864 * T6); + Tl = FNMS(KP1_847759065, T6, KP765366864 * T5); + T1 = I[0]; + T2 = I[WS(is, 4)]; + T3 = KP1_414213562 * T2; + T4 = T1 + T3; + Tk = T1 - T3; + { + E T9, Tf, Tc, Te, Ta, Tb; + T9 = I[WS(is, 1)]; + Tf = I[WS(is, 7)]; + Ta = I[WS(is, 5)]; + Tb = I[WS(is, 3)]; + Tc = KP707106781 * (Ta + Tb); + Te = KP707106781 * (Ta - Tb); + Td = T9 + Tc; + To = Te + Tf; + Tg = Te - Tf; + Tn = T9 - Tc; + } + } + { + E T8, Th, Tq, Tr; + T8 = T4 + T7; + Th = FNMS(KP390180644, Tg, KP1_961570560 * Td); + O[WS(os, 7)] = T8 - Th; + O[0] = T8 + Th; + Tq = Tk - Tl; + Tr = FMA(KP1_111140466, Tn, KP1_662939224 * To); + O[WS(os, 5)] = Tq - Tr; + O[WS(os, 2)] = Tq + Tr; + } + { + E Ti, Tj, Tm, Tp; + Ti = T4 - T7; + Tj = FMA(KP390180644, Td, KP1_961570560 * Tg); + O[WS(os, 4)] = Ti - Tj; + O[WS(os, 3)] = Ti + Tj; + Tm = Tk + Tl; + Tp = FNMS(KP1_111140466, To, KP1_662939224 * Tn); + O[WS(os, 6)] = Tm - Tp; + O[WS(os, 1)] = Tm + Tp; + } + } + } +} + +static const kr2r_desc desc = { 8, "e01_8", {20, 9, 6, 0}, &GENUS, REDFT01 }; + +void X(codelet_e01_8) (planner *p) { + X(kr2r_register) (p, e01_8, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/scalar/r2r/e10_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/scalar/r2r/e10_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:48 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_r2r.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -redft10 -n 8 -name e10_8 -include r2r.h */ + +/* + * This function contains 26 FP additions, 18 FP multiplications, + * (or, 16 additions, 8 multiplications, 10 fused multiply/add), + * 28 stack variables, 9 constants, and 16 memory accesses + */ +#include "r2r.h" + +static void e10_8(const R *I, R *O, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP668178637, +0.668178637919298919997757686523080761552472251); + DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); + DK(KP198912367, +0.198912367379658006911597622644676228597850501); + DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP414213562, +0.414213562373095048801688724209698078569671875); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + { + INT i; + for (i = v; i > 0; i = i - 1, I = I + ivs, O = O + ovs, MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) { + E T3, Te, Tl, Tp, Tm, T6, Tn, T9; + { + E T4, Tj, Tk, T5, T7, T8; + { + E T1, T2, Tc, Td; + T1 = I[0]; + T2 = I[WS(is, 7)]; + Tc = I[WS(is, 4)]; + Td = I[WS(is, 3)]; + T4 = I[WS(is, 2)]; + Tj = T1 + T2; + T3 = T1 - T2; + Tk = Tc + Td; + Te = Tc - Td; + T5 = I[WS(is, 5)]; + T7 = I[WS(is, 1)]; + T8 = I[WS(is, 6)]; + } + Tl = Tj - Tk; + Tp = Tj + Tk; + Tm = T4 + T5; + T6 = T4 - T5; + Tn = T7 + T8; + T9 = T7 - T8; + } + { + E Tg, Ti, Tb, Th; + { + E Tq, To, Ta, Tf; + Tq = Tm + Tn; + To = Tm - Tn; + Ta = T6 + T9; + Tf = T6 - T9; + O[WS(os, 6)] = KP1_847759065 * (FMA(KP414213562, Tl, To)); + O[WS(os, 2)] = KP1_847759065 * (FNMS(KP414213562, To, Tl)); + O[0] = KP2_000000000 * (Tp + Tq); + O[WS(os, 4)] = KP1_414213562 * (Tp - Tq); + Tg = FNMS(KP707106781, Tf, Te); + Ti = FMA(KP707106781, Tf, Te); + Tb = FNMS(KP707106781, Ta, T3); + Th = FMA(KP707106781, Ta, T3); + } + O[WS(os, 7)] = KP1_961570560 * (FMA(KP198912367, Th, Ti)); + O[WS(os, 1)] = KP1_961570560 * (FNMS(KP198912367, Ti, Th)); + O[WS(os, 5)] = -(KP1_662939224 * (FNMS(KP668178637, Tb, Tg))); + O[WS(os, 3)] = KP1_662939224 * (FMA(KP668178637, Tg, Tb)); + } + } + } +} + +static const kr2r_desc desc = { 8, "e10_8", {16, 8, 10, 0}, &GENUS, REDFT10 }; + +void X(codelet_e10_8) (planner *p) { + X(kr2r_register) (p, e10_8, &desc); +} + +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_r2r.native -compact -variables 4 -pipeline-latency 4 -redft10 -n 8 -name e10_8 -include r2r.h */ + +/* + * This function contains 26 FP additions, 16 FP multiplications, + * (or, 20 additions, 10 multiplications, 6 fused multiply/add), + * 28 stack variables, 9 constants, and 16 memory accesses + */ +#include "r2r.h" + +static void e10_8(const R *I, R *O, stride is, stride os, INT v, INT ivs, INT ovs) +{ + DK(KP765366864, +0.765366864730179543456919968060797733522689125); + DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); + DK(KP390180644, +0.390180644032256535696569736954044481855383236); + DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); + DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); + DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); + DK(KP1_111140466, +1.111140466039204449485661627897065748749874382); + DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT i; + for (i = v; i > 0; i = i - 1, I = I + ivs, O = O + ovs, MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) { + E T3, Tj, Tf, Tk, Ta, Tn, Tc, Tm; + { + E T1, T2, Td, Te; + T1 = I[0]; + T2 = I[WS(is, 7)]; + T3 = T1 - T2; + Tj = T1 + T2; + Td = I[WS(is, 4)]; + Te = I[WS(is, 3)]; + Tf = Td - Te; + Tk = Td + Te; + { + E T4, T5, T6, T7, T8, T9; + T4 = I[WS(is, 2)]; + T5 = I[WS(is, 5)]; + T6 = T4 - T5; + T7 = I[WS(is, 1)]; + T8 = I[WS(is, 6)]; + T9 = T7 - T8; + Ta = KP707106781 * (T6 + T9); + Tn = T7 + T8; + Tc = KP707106781 * (T6 - T9); + Tm = T4 + T5; + } + } + { + E Tb, Tg, Tp, Tq; + Tb = T3 - Ta; + Tg = Tc - Tf; + O[WS(os, 3)] = FNMS(KP1_111140466, Tg, KP1_662939224 * Tb); + O[WS(os, 5)] = FMA(KP1_662939224, Tg, KP1_111140466 * Tb); + Tp = Tj + Tk; + Tq = Tm + Tn; + O[WS(os, 4)] = KP1_414213562 * (Tp - Tq); + O[0] = KP2_000000000 * (Tp + Tq); + } + { + E Th, Ti, Tl, To; + Th = T3 + Ta; + Ti = Tf + Tc; + O[WS(os, 1)] = FNMS(KP390180644, Ti, KP1_961570560 * Th); + O[WS(os, 7)] = FMA(KP1_961570560, Ti, KP390180644 * Th); + Tl = Tj - Tk; + To = Tm - Tn; + O[WS(os, 2)] = FNMS(KP765366864, To, KP1_847759065 * Tl); + O[WS(os, 6)] = FMA(KP765366864, Tl, KP1_847759065 * To); + } + } + } +} + +static const kr2r_desc desc = { 8, "e10_8", {20, 10, 6, 0}, &GENUS, REDFT10 }; + +void X(codelet_e10_8) (planner *p) { + X(kr2r_register) (p, e10_8, &desc); +} + +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +SUBDIRS = common sse2 avx altivec neon +EXTRA_DIST = hc2cbv.h hc2cfv.h codlist.mk simd.mk diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,640 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = rdft/simd +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = common sse2 avx altivec neon +EXTRA_DIST = hc2cbv.h hc2cfv.h codlist.mk simd.mk +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu rdft/simd/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu rdft/simd/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ + ps ps-am tags tags-am uninstall uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,13 @@ +AM_CFLAGS = $(ALTIVEC_CFLAGS) +SIMD_HEADER=simd-altivec.h + +include $(top_srcdir)/rdft/simd/codlist.mk +include $(top_srcdir)/rdft/simd/simd.mk + +if HAVE_ALTIVEC + +noinst_LTLIBRARIES = librdft_altivec_codelets.la +BUILT_SOURCES = $(EXTRA_DIST) +librdft_altivec_codelets_la_SOURCES = $(BUILT_SOURCES) + +endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,687 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# This file contains a standard list of RDFT SIMD codelets. It is +# included by common/Makefile to generate the C files with the actual +# codelets in them. It is included by {sse,sse2,...}/Makefile to +# generate and compile stub files that include common/*.c + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +DIST_COMMON = $(top_srcdir)/rdft/simd/codlist.mk \ + $(top_srcdir)/rdft/simd/simd.mk $(srcdir)/Makefile.in \ + $(srcdir)/Makefile.am $(top_srcdir)/depcomp +subdir = rdft/simd/altivec +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +librdft_altivec_codelets_la_LIBADD = +am__librdft_altivec_codelets_la_SOURCES_DIST = hc2cfdftv_2.c \ + hc2cfdftv_4.c hc2cfdftv_6.c hc2cfdftv_8.c hc2cfdftv_10.c \ + hc2cfdftv_12.c hc2cfdftv_16.c hc2cfdftv_32.c hc2cfdftv_20.c \ + hc2cbdftv_2.c hc2cbdftv_4.c hc2cbdftv_6.c hc2cbdftv_8.c \ + hc2cbdftv_10.c hc2cbdftv_12.c hc2cbdftv_16.c hc2cbdftv_32.c \ + hc2cbdftv_20.c genus.c codlist.c +am__objects_1 = hc2cfdftv_2.lo hc2cfdftv_4.lo hc2cfdftv_6.lo \ + hc2cfdftv_8.lo hc2cfdftv_10.lo hc2cfdftv_12.lo hc2cfdftv_16.lo \ + hc2cfdftv_32.lo hc2cfdftv_20.lo +am__objects_2 = hc2cbdftv_2.lo hc2cbdftv_4.lo hc2cbdftv_6.lo \ + hc2cbdftv_8.lo hc2cbdftv_10.lo hc2cbdftv_12.lo hc2cbdftv_16.lo \ + hc2cbdftv_32.lo hc2cbdftv_20.lo +am__objects_3 = $(am__objects_1) $(am__objects_2) +am__objects_4 = $(am__objects_3) genus.lo codlist.lo +@HAVE_ALTIVEC_TRUE@am__objects_5 = $(am__objects_4) +@HAVE_ALTIVEC_TRUE@am_librdft_altivec_codelets_la_OBJECTS = \ +@HAVE_ALTIVEC_TRUE@ $(am__objects_5) +librdft_altivec_codelets_la_OBJECTS = \ + $(am_librdft_altivec_codelets_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +@HAVE_ALTIVEC_TRUE@am_librdft_altivec_codelets_la_rpath = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(librdft_altivec_codelets_la_SOURCES) +DIST_SOURCES = $(am__librdft_altivec_codelets_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(ALTIVEC_CFLAGS) +SIMD_HEADER = simd-altivec.h +HC2CFDFTV = hc2cfdftv_2.c hc2cfdftv_4.c hc2cfdftv_6.c hc2cfdftv_8.c \ +hc2cfdftv_10.c hc2cfdftv_12.c hc2cfdftv_16.c hc2cfdftv_32.c \ +hc2cfdftv_20.c + +HC2CBDFTV = hc2cbdftv_2.c hc2cbdftv_4.c hc2cbdftv_6.c hc2cbdftv_8.c \ +hc2cbdftv_10.c hc2cbdftv_12.c hc2cbdftv_16.c hc2cbdftv_32.c \ +hc2cbdftv_20.c + + +########################################################################### +SIMD_CODELETS = $(HC2CFDFTV) $(HC2CBDFTV) +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/rdft \ +-I$(top_srcdir)/rdft/simd -I$(top_srcdir)/simd-support + +EXTRA_DIST = $(SIMD_CODELETS) genus.c codlist.c +@HAVE_ALTIVEC_TRUE@noinst_LTLIBRARIES = librdft_altivec_codelets.la +@HAVE_ALTIVEC_TRUE@BUILT_SOURCES = $(EXTRA_DIST) +@HAVE_ALTIVEC_TRUE@librdft_altivec_codelets_la_SOURCES = $(BUILT_SOURCES) +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/rdft/simd/codlist.mk $(top_srcdir)/rdft/simd/simd.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu rdft/simd/altivec/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu rdft/simd/altivec/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(top_srcdir)/rdft/simd/codlist.mk $(top_srcdir)/rdft/simd/simd.mk: + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +librdft_altivec_codelets.la: $(librdft_altivec_codelets_la_OBJECTS) $(librdft_altivec_codelets_la_DEPENDENCIES) $(EXTRA_librdft_altivec_codelets_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(am_librdft_altivec_codelets_la_rpath) $(librdft_altivec_codelets_la_OBJECTS) $(librdft_altivec_codelets_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/codlist.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/genus.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_8.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: all check install install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + + +$(EXTRA_DIST): Makefile + ( \ + echo "/* Generated automatically. DO NOT EDIT! */"; \ + echo "#define SIMD_HEADER \"$(SIMD_HEADER)\""; \ + echo "#include \"../common/"$*".c\""; \ + ) >$@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/codlist.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/codlist.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/codlist.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/genus.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/genus.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/genus.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cbdftv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cbdftv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cbdftv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cbdftv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cbdftv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cbdftv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cbdftv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cbdftv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cbdftv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cbdftv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cfdftv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cfdftv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cfdftv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cfdftv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cfdftv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cfdftv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cfdftv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cfdftv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/altivec/hc2cfdftv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-altivec.h" +#include "../common/hc2cfdftv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,15 @@ +AM_CFLAGS = $(AVX_CFLAGS) +SIMD_HEADER=simd-avx.h + +include $(top_srcdir)/rdft/simd/codlist.mk +include $(top_srcdir)/rdft/simd/simd.mk + +if HAVE_AVX + +noinst_LTLIBRARIES = librdft_avx_codelets.la +BUILT_SOURCES = $(EXTRA_DIST) +librdft_avx_codelets_la_SOURCES = $(BUILT_SOURCES) + +endif + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,686 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# This file contains a standard list of RDFT SIMD codelets. It is +# included by common/Makefile to generate the C files with the actual +# codelets in them. It is included by {sse,sse2,...}/Makefile to +# generate and compile stub files that include common/*.c + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +DIST_COMMON = $(top_srcdir)/rdft/simd/codlist.mk \ + $(top_srcdir)/rdft/simd/simd.mk $(srcdir)/Makefile.in \ + $(srcdir)/Makefile.am $(top_srcdir)/depcomp +subdir = rdft/simd/avx +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +librdft_avx_codelets_la_LIBADD = +am__librdft_avx_codelets_la_SOURCES_DIST = hc2cfdftv_2.c hc2cfdftv_4.c \ + hc2cfdftv_6.c hc2cfdftv_8.c hc2cfdftv_10.c hc2cfdftv_12.c \ + hc2cfdftv_16.c hc2cfdftv_32.c hc2cfdftv_20.c hc2cbdftv_2.c \ + hc2cbdftv_4.c hc2cbdftv_6.c hc2cbdftv_8.c hc2cbdftv_10.c \ + hc2cbdftv_12.c hc2cbdftv_16.c hc2cbdftv_32.c hc2cbdftv_20.c \ + genus.c codlist.c +am__objects_1 = hc2cfdftv_2.lo hc2cfdftv_4.lo hc2cfdftv_6.lo \ + hc2cfdftv_8.lo hc2cfdftv_10.lo hc2cfdftv_12.lo hc2cfdftv_16.lo \ + hc2cfdftv_32.lo hc2cfdftv_20.lo +am__objects_2 = hc2cbdftv_2.lo hc2cbdftv_4.lo hc2cbdftv_6.lo \ + hc2cbdftv_8.lo hc2cbdftv_10.lo hc2cbdftv_12.lo hc2cbdftv_16.lo \ + hc2cbdftv_32.lo hc2cbdftv_20.lo +am__objects_3 = $(am__objects_1) $(am__objects_2) +am__objects_4 = $(am__objects_3) genus.lo codlist.lo +@HAVE_AVX_TRUE@am__objects_5 = $(am__objects_4) +@HAVE_AVX_TRUE@am_librdft_avx_codelets_la_OBJECTS = $(am__objects_5) +librdft_avx_codelets_la_OBJECTS = \ + $(am_librdft_avx_codelets_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +@HAVE_AVX_TRUE@am_librdft_avx_codelets_la_rpath = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(librdft_avx_codelets_la_SOURCES) +DIST_SOURCES = $(am__librdft_avx_codelets_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(AVX_CFLAGS) +SIMD_HEADER = simd-avx.h +HC2CFDFTV = hc2cfdftv_2.c hc2cfdftv_4.c hc2cfdftv_6.c hc2cfdftv_8.c \ +hc2cfdftv_10.c hc2cfdftv_12.c hc2cfdftv_16.c hc2cfdftv_32.c \ +hc2cfdftv_20.c + +HC2CBDFTV = hc2cbdftv_2.c hc2cbdftv_4.c hc2cbdftv_6.c hc2cbdftv_8.c \ +hc2cbdftv_10.c hc2cbdftv_12.c hc2cbdftv_16.c hc2cbdftv_32.c \ +hc2cbdftv_20.c + + +########################################################################### +SIMD_CODELETS = $(HC2CFDFTV) $(HC2CBDFTV) +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/rdft \ +-I$(top_srcdir)/rdft/simd -I$(top_srcdir)/simd-support + +EXTRA_DIST = $(SIMD_CODELETS) genus.c codlist.c +@HAVE_AVX_TRUE@noinst_LTLIBRARIES = librdft_avx_codelets.la +@HAVE_AVX_TRUE@BUILT_SOURCES = $(EXTRA_DIST) +@HAVE_AVX_TRUE@librdft_avx_codelets_la_SOURCES = $(BUILT_SOURCES) +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/rdft/simd/codlist.mk $(top_srcdir)/rdft/simd/simd.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu rdft/simd/avx/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu rdft/simd/avx/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(top_srcdir)/rdft/simd/codlist.mk $(top_srcdir)/rdft/simd/simd.mk: + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +librdft_avx_codelets.la: $(librdft_avx_codelets_la_OBJECTS) $(librdft_avx_codelets_la_DEPENDENCIES) $(EXTRA_librdft_avx_codelets_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(am_librdft_avx_codelets_la_rpath) $(librdft_avx_codelets_la_OBJECTS) $(librdft_avx_codelets_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/codlist.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/genus.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_8.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: all check install install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + + +$(EXTRA_DIST): Makefile + ( \ + echo "/* Generated automatically. DO NOT EDIT! */"; \ + echo "#define SIMD_HEADER \"$(SIMD_HEADER)\""; \ + echo "#include \"../common/"$*".c\""; \ + ) >$@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/codlist.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/codlist.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/codlist.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/genus.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/genus.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/genus.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cbdftv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cbdftv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cbdftv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cbdftv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cbdftv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cbdftv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cbdftv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cbdftv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cbdftv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cbdftv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cfdftv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cfdftv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cfdftv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cfdftv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cfdftv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cfdftv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cfdftv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cfdftv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/avx/hc2cfdftv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-avx.h" +#include "../common/hc2cfdftv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/codlist.mk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/codlist.mk Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,22 @@ +# This file contains a standard list of RDFT SIMD codelets. It is +# included by common/Makefile to generate the C files with the actual +# codelets in them. It is included by {sse,sse2,...}/Makefile to +# generate and compile stub files that include common/*.c + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +HC2CFDFTV = hc2cfdftv_2.c hc2cfdftv_4.c hc2cfdftv_6.c hc2cfdftv_8.c \ +hc2cfdftv_10.c hc2cfdftv_12.c hc2cfdftv_16.c hc2cfdftv_32.c \ +hc2cfdftv_20.c + +HC2CBDFTV = hc2cbdftv_2.c hc2cbdftv_4.c hc2cbdftv_6.c hc2cbdftv_8.c \ +hc2cbdftv_10.c hc2cbdftv_12.c hc2cbdftv_16.c hc2cbdftv_32.c \ +hc2cbdftv_20.c + +########################################################################### +SIMD_CODELETS = $(HC2CFDFTV) $(HC2CBDFTV) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,24 @@ +# include the list of codelets + +include $(top_srcdir)/rdft/simd/codlist.mk + +ALL_CODELETS = $(SIMD_CODELETS) +BUILT_SOURCES= $(SIMD_CODELETS) $(CODLIST) +EXTRA_DIST = $(BUILT_SOURCES) genus.c +INCLUDE_SIMD_HEADER="\#include SIMD_HEADER" +XRENAME=XSIMD +SOLVTAB_NAME = XSIMD(solvtab_rdft) + +# include special rules for regenerating codelets. +include $(top_srcdir)/support/Makefile.codelets + +if MAINTAINER_MODE +FLAGS_HC2C=-simd $(FLAGS_COMMON) -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw + +hc2cfdftv_%.c: $(CODELET_DEPS) $(GEN_HC2CDFT_C) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2CDFT_C) $(FLAGS_HC2C) -n $* -dit -name hc2cfdftv_$* -include "hc2cfv.h") | $(ADD_DATE) | $(INDENT) >$@ + +hc2cbdftv_%.c: $(CODELET_DEPS) $(GEN_HC2CDFT_C) + ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2CDFT_C) $(FLAGS_HC2C) -n $* -dif -sign 1 -name hc2cbdftv_$* -include "hc2cbv.h") | $(ADD_DATE) | $(INDENT) >$@ + +endif # MAINTAINER_MODE diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,576 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# include the list of codelets + +# This file contains a standard list of RDFT SIMD codelets. It is +# included by common/Makefile to generate the C files with the actual +# codelets in them. It is included by {sse,sse2,...}/Makefile to +# generate and compile stub files that include common/*.c + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +# -*- makefile -*- +# This file contains special make rules to generate codelets. +# Most of this file requires GNU make . +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +DIST_COMMON = $(top_srcdir)/rdft/simd/codlist.mk \ + $(top_srcdir)/support/Makefile.codelets $(srcdir)/Makefile.in \ + $(srcdir)/Makefile.am +subdir = rdft/simd/common +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +HC2CFDFTV = hc2cfdftv_2.c hc2cfdftv_4.c hc2cfdftv_6.c hc2cfdftv_8.c \ +hc2cfdftv_10.c hc2cfdftv_12.c hc2cfdftv_16.c hc2cfdftv_32.c \ +hc2cfdftv_20.c + +HC2CBDFTV = hc2cbdftv_2.c hc2cbdftv_4.c hc2cbdftv_6.c hc2cbdftv_8.c \ +hc2cbdftv_10.c hc2cbdftv_12.c hc2cbdftv_16.c hc2cbdftv_32.c \ +hc2cbdftv_20.c + + +########################################################################### +SIMD_CODELETS = $(HC2CFDFTV) $(HC2CBDFTV) +ALL_CODELETS = $(SIMD_CODELETS) +BUILT_SOURCES = $(SIMD_CODELETS) $(CODLIST) +EXTRA_DIST = $(BUILT_SOURCES) genus.c +INCLUDE_SIMD_HEADER = "\#include SIMD_HEADER" +XRENAME = XSIMD +SOLVTAB_NAME = XSIMD(solvtab_rdft) +CODLIST = codlist.c +CODELET_NAME = codelet_ +@MAINTAINER_MODE_TRUE@INDENT = indent -kr -cs -i5 -l800 -fca -nfc1 -sc -sob -cli4 -TR -Tplanner -TV +@MAINTAINER_MODE_TRUE@TWOVERS = sh ${top_srcdir}/support/twovers.sh +@MAINTAINER_MODE_TRUE@GENFFTDIR = ${top_builddir}/genfft +@MAINTAINER_MODE_TRUE@GEN_NOTW = ${GENFFTDIR}/gen_notw.native +@MAINTAINER_MODE_TRUE@GEN_NOTW_C = ${GENFFTDIR}/gen_notw_c.native +@MAINTAINER_MODE_TRUE@GEN_TWIDDLE = ${GENFFTDIR}/gen_twiddle.native +@MAINTAINER_MODE_TRUE@GEN_TWIDDLE_C = ${GENFFTDIR}/gen_twiddle_c.native +@MAINTAINER_MODE_TRUE@GEN_TWIDSQ = ${GENFFTDIR}/gen_twidsq.native +@MAINTAINER_MODE_TRUE@GEN_TWIDSQ_C = ${GENFFTDIR}/gen_twidsq_c.native +@MAINTAINER_MODE_TRUE@GEN_R2CF = ${GENFFTDIR}/gen_r2cf.native +@MAINTAINER_MODE_TRUE@GEN_R2CB = ${GENFFTDIR}/gen_r2cb.native +@MAINTAINER_MODE_TRUE@GEN_HC2HC = ${GENFFTDIR}/gen_hc2hc.native +@MAINTAINER_MODE_TRUE@GEN_HC2C = ${GENFFTDIR}/gen_hc2c.native +@MAINTAINER_MODE_TRUE@GEN_HC2CDFT = ${GENFFTDIR}/gen_hc2cdft.native +@MAINTAINER_MODE_TRUE@GEN_HC2CDFT_C = ${GENFFTDIR}/gen_hc2cdft_c.native +@MAINTAINER_MODE_TRUE@GEN_R2R = ${GENFFTDIR}/gen_r2r.native +@MAINTAINER_MODE_TRUE@PRELUDE_DFT = ${top_srcdir}/support/codelet_prelude.dft +@MAINTAINER_MODE_TRUE@PRELUDE_RDFT = ${top_srcdir}/support/codelet_prelude.rdft +@MAINTAINER_MODE_TRUE@ADD_DATE = sed -e s/@DATE@/"`date`"/ +@MAINTAINER_MODE_TRUE@COPYRIGHT = ${top_srcdir}/COPYRIGHT +@MAINTAINER_MODE_TRUE@CODELET_DEPS = $(COPYRIGHT) $(PRELUDE) +@MAINTAINER_MODE_TRUE@PRELUDE_COMMANDS_DFT = cat $(COPYRIGHT) $(PRELUDE_DFT) +@MAINTAINER_MODE_TRUE@PRELUDE_COMMANDS_RDFT = cat $(COPYRIGHT) $(PRELUDE_RDFT) +@MAINTAINER_MODE_TRUE@FLAGS_COMMON = -compact -variables 4 +@MAINTAINER_MODE_TRUE@DFT_FLAGS_COMMON = $(FLAGS_COMMON) -pipeline-latency 4 +@MAINTAINER_MODE_TRUE@RDFT_FLAGS_COMMON = $(FLAGS_COMMON) -pipeline-latency 4 + +# include special rules for regenerating codelets. +@MAINTAINER_MODE_TRUE@FLAGS_HC2C = -simd $(FLAGS_COMMON) -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/rdft/simd/codlist.mk $(top_srcdir)/support/Makefile.codelets $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu rdft/simd/common/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu rdft/simd/common/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(top_srcdir)/rdft/simd/codlist.mk $(top_srcdir)/support/Makefile.codelets: + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile +installdirs: +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic \ + maintainer-clean-local + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: all check install install-am install-strip + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic \ + maintainer-clean-local mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ + uninstall-am + + +# rule to build codlist +$(CODLIST): Makefile + ( \ + echo "#include \"ifftw.h\""; \ + echo $(INCLUDE_SIMD_HEADER); \ + echo; \ + for i in $(ALL_CODELETS) NIL; do \ + if test "$$i" != NIL; then \ + j=`basename $$i | sed -e 's/[.][cS]$$//g'`; \ + echo "extern void $(XRENAME)($(CODELET_NAME)$$j)(planner *);"; \ + fi \ + done; \ + echo; \ + echo; \ + echo "extern const solvtab $(SOLVTAB_NAME);"; \ + echo "const solvtab $(SOLVTAB_NAME) = {"; \ + for i in $(ALL_CODELETS) NIL; do \ + if test "$$i" != NIL; then \ + j=`basename $$i | sed -e 's/[.][cS]$$//g'`; \ + echo " SOLVTAB($(XRENAME)($(CODELET_NAME)$$j)),"; \ + fi \ + done; \ + echo " SOLVTAB_END"; \ + echo "};"; \ + ) >$@ + +# only delete codlist.c in maintainer-mode, since it is included in the dist +# FIXME: is there a way to delete in 'make clean' only when builddir != srcdir? +maintainer-clean-local: + rm -f $(CODLIST) + +# cancel the hideous builtin rules that cause an infinite loop +@MAINTAINER_MODE_TRUE@%: %.o +@MAINTAINER_MODE_TRUE@%: %.s +@MAINTAINER_MODE_TRUE@%: %.c +@MAINTAINER_MODE_TRUE@%: %.S + +@MAINTAINER_MODE_TRUE@hc2cfdftv_%.c: $(CODELET_DEPS) $(GEN_HC2CDFT_C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2CDFT_C) $(FLAGS_HC2C) -n $* -dit -name hc2cfdftv_$* -include "hc2cfv.h") | $(ADD_DATE) | $(INDENT) >$@ + +@MAINTAINER_MODE_TRUE@hc2cbdftv_%.c: $(CODELET_DEPS) $(GEN_HC2CDFT_C) +@MAINTAINER_MODE_TRUE@ ($(PRELUDE_COMMANDS_RDFT); $(TWOVERS) $(GEN_HC2CDFT_C) $(FLAGS_HC2C) -n $* -dif -sign 1 -name hc2cbdftv_$* -include "hc2cbv.h") | $(ADD_DATE) | $(INDENT) >$@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/codlist.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/codlist.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,45 @@ +#include "ifftw.h" +#include SIMD_HEADER + +extern void XSIMD(codelet_hc2cfdftv_2)(planner *); +extern void XSIMD(codelet_hc2cfdftv_4)(planner *); +extern void XSIMD(codelet_hc2cfdftv_6)(planner *); +extern void XSIMD(codelet_hc2cfdftv_8)(planner *); +extern void XSIMD(codelet_hc2cfdftv_10)(planner *); +extern void XSIMD(codelet_hc2cfdftv_12)(planner *); +extern void XSIMD(codelet_hc2cfdftv_16)(planner *); +extern void XSIMD(codelet_hc2cfdftv_32)(planner *); +extern void XSIMD(codelet_hc2cfdftv_20)(planner *); +extern void XSIMD(codelet_hc2cbdftv_2)(planner *); +extern void XSIMD(codelet_hc2cbdftv_4)(planner *); +extern void XSIMD(codelet_hc2cbdftv_6)(planner *); +extern void XSIMD(codelet_hc2cbdftv_8)(planner *); +extern void XSIMD(codelet_hc2cbdftv_10)(planner *); +extern void XSIMD(codelet_hc2cbdftv_12)(planner *); +extern void XSIMD(codelet_hc2cbdftv_16)(planner *); +extern void XSIMD(codelet_hc2cbdftv_32)(planner *); +extern void XSIMD(codelet_hc2cbdftv_20)(planner *); + + +extern const solvtab XSIMD(solvtab_rdft); +const solvtab XSIMD(solvtab_rdft) = { + SOLVTAB(XSIMD(codelet_hc2cfdftv_2)), + SOLVTAB(XSIMD(codelet_hc2cfdftv_4)), + SOLVTAB(XSIMD(codelet_hc2cfdftv_6)), + SOLVTAB(XSIMD(codelet_hc2cfdftv_8)), + SOLVTAB(XSIMD(codelet_hc2cfdftv_10)), + SOLVTAB(XSIMD(codelet_hc2cfdftv_12)), + SOLVTAB(XSIMD(codelet_hc2cfdftv_16)), + SOLVTAB(XSIMD(codelet_hc2cfdftv_32)), + SOLVTAB(XSIMD(codelet_hc2cfdftv_20)), + SOLVTAB(XSIMD(codelet_hc2cbdftv_2)), + SOLVTAB(XSIMD(codelet_hc2cbdftv_4)), + SOLVTAB(XSIMD(codelet_hc2cbdftv_6)), + SOLVTAB(XSIMD(codelet_hc2cbdftv_8)), + SOLVTAB(XSIMD(codelet_hc2cbdftv_10)), + SOLVTAB(XSIMD(codelet_hc2cbdftv_12)), + SOLVTAB(XSIMD(codelet_hc2cbdftv_16)), + SOLVTAB(XSIMD(codelet_hc2cbdftv_32)), + SOLVTAB(XSIMD(codelet_hc2cbdftv_20)), + SOLVTAB_END +}; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/genus.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/genus.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "codelet-rdft.h" +#include SIMD_HEADER + +#define EXTERN_CONST(t, x) extern const t x; const t x + +static int hc2cbv_okp(const R *Rp, const R *Ip, const R *Rm, const R *Im, + INT rs, INT mb, INT me, INT ms, + const planner *plnr) +{ + return (1 + && !NO_SIMDP(plnr) + && SIMD_STRIDE_OK(rs) + && SIMD_VSTRIDE_OK(ms) + && ((me - mb) % VL) == 0 + && ((mb - 1) % VL) == 0 /* twiddle factors alignment */ + && ALIGNED(Rp) + && ALIGNED(Rm) + && Ip == Rp + 1 + && Im == Rm + 1); +} + +EXTERN_CONST(hc2c_genus, XSIMD(rdft_hc2cbv_genus)) = { hc2cbv_okp, HC2R, VL }; + +static int hc2cfv_okp(const R *Rp, const R *Ip, const R *Rm, const R *Im, + INT rs, INT mb, INT me, INT ms, + const planner *plnr) +{ + return (1 + && !NO_SIMDP(plnr) + && SIMD_STRIDE_OK(rs) + && SIMD_VSTRIDE_OK(ms) + && ((me - mb) % VL) == 0 + && ((mb - 1) % VL) == 0 /* twiddle factors alignment */ + && ALIGNED(Rp) + && ALIGNED(Rm) + && Ip == Rp + 1 + && Im == Rm + 1); +} + +EXTERN_CONST(hc2c_genus, XSIMD(rdft_hc2cfv_genus)) = { hc2cfv_okp, R2HC, VL }; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 10 -dif -sign 1 -name hc2cbdftv_10 -include hc2cbv.h */ + +/* + * This function contains 61 FP additions, 50 FP multiplications, + * (or, 33 additions, 22 multiplications, 28 fused multiply/add), + * 76 stack variables, 4 constants, and 20 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 18)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(40, rs)) { + V Ts, T4, TR, T1, TZ, TD, Ty, Tn, Ti, TT, T11, TJ, T15, Tr, TN; + V TE, Tv, To, Tb, T8, Tw, Te, Tx, Th, Tt, T7, T9, T2, T3, Tc; + V Td, Tf, Tg, T5, T6, Tu, Ta; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + T3 = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + Tc = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + Td = LD(&(Rm[0]), -ms, &(Rm[0])); + Tf = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + Tg = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + T5 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + T6 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + T8 = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + Ts = VFMACONJ(T3, T2); + T4 = VFNMSCONJ(T3, T2); + Tw = VFMACONJ(Td, Tc); + Te = VFNMSCONJ(Td, Tc); + Tx = VFMACONJ(Tg, Tf); + Th = VFMSCONJ(Tg, Tf); + Tt = VFMACONJ(T6, T5); + T7 = VFNMSCONJ(T6, T5); + T9 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + TR = LDW(&(W[TWVL * 8])); + T1 = LDW(&(W[TWVL * 4])); + TZ = LDW(&(W[TWVL * 12])); + TD = VSUB(Tw, Tx); + Ty = VADD(Tw, Tx); + Tn = VSUB(Te, Th); + Ti = VADD(Te, Th); + Tu = VFMACONJ(T9, T8); + Ta = VFMSCONJ(T9, T8); + TT = LDW(&(W[TWVL * 6])); + T11 = LDW(&(W[TWVL * 10])); + TJ = LDW(&(W[TWVL * 16])); + T15 = LDW(&(W[0])); + Tr = LDW(&(W[TWVL * 2])); + TN = LDW(&(W[TWVL * 14])); + TE = VSUB(Tt, Tu); + Tv = VADD(Tt, Tu); + To = VSUB(T7, Ta); + Tb = VADD(T7, Ta); + { + V TV, TF, Tz, TB, TL, Tp, Tj, Tl, T17, TA, TS, Tk, TC, TU, TK; + V Tm, TO, TG, T12, TW, T16, TM, T10, Tq, TX, TY, T18, T19, TQ, TP; + V T13, T14, TI, TH; + TV = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TD, TE)); + TF = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TE, TD)); + Tz = VADD(Tv, Ty); + TB = VSUB(Tv, Ty); + TL = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tn, To)); + Tp = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), To, Tn)); + Tj = VADD(Tb, Ti); + Tl = VSUB(Tb, Ti); + T17 = VADD(Ts, Tz); + TA = VFNMS(LDK(KP250000000), Tz, Ts); + TS = VZMULI(TR, VADD(T4, Tj)); + Tk = VFNMS(LDK(KP250000000), Tj, T4); + TC = VFNMS(LDK(KP559016994), TB, TA); + TU = VFMA(LDK(KP559016994), TB, TA); + TK = VFMA(LDK(KP559016994), Tl, Tk); + Tm = VFNMS(LDK(KP559016994), Tl, Tk); + TO = VZMUL(TN, VFMAI(TF, TC)); + TG = VZMUL(Tr, VFNMSI(TF, TC)); + T12 = VZMUL(T11, VFMAI(TV, TU)); + TW = VZMUL(TT, VFNMSI(TV, TU)); + T16 = VZMULI(T15, VFMAI(TL, TK)); + TM = VZMULI(TJ, VFNMSI(TL, TK)); + T10 = VZMULI(TZ, VFNMSI(Tp, Tm)); + Tq = VZMULI(T1, VFMAI(Tp, Tm)); + TX = VADD(TS, TW); + TY = VCONJ(VSUB(TW, TS)); + T18 = VADD(T16, T17); + T19 = VCONJ(VSUB(T17, T16)); + TQ = VCONJ(VSUB(TO, TM)); + TP = VADD(TM, TO); + T13 = VADD(T10, T12); + T14 = VCONJ(VSUB(T12, T10)); + TI = VCONJ(VSUB(TG, Tq)); + TH = VADD(Tq, TG); + ST(&(Rp[WS(rs, 2)]), TX, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 2)]), TY, -ms, &(Rm[0])); + ST(&(Rp[0]), T18, ms, &(Rp[0])); + ST(&(Rm[0]), T19, -ms, &(Rm[0])); + ST(&(Rm[WS(rs, 4)]), TQ, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 4)]), TP, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 3)]), T13, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 3)]), T14, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rm[WS(rs, 1)]), TI, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 1)]), TH, ms, &(Rp[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 10, XSIMD_STRING("hc2cbdftv_10"), twinstr, &GENUS, {33, 22, 28, 0} }; + +void XSIMD(codelet_hc2cbdftv_10) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_10, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 10 -dif -sign 1 -name hc2cbdftv_10 -include hc2cbv.h */ + +/* + * This function contains 61 FP additions, 30 FP multiplications, + * (or, 55 additions, 24 multiplications, 6 fused multiply/add), + * 81 stack variables, 4 constants, and 20 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 18)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(40, rs)) { + V T5, TE, Ts, Tt, TC, Tz, TH, TJ, To, Tq, T2, T4, T3, T9, Tx; + V Tm, TB, Td, Ty, Ti, TA, T6, T8, T7, Tl, Tk, Tj, Tc, Tb, Ta; + V Tf, Th, Tg, TF, TG, Te, Tn; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + T3 = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + T4 = VCONJ(T3); + T5 = VSUB(T2, T4); + TE = VADD(T2, T4); + T6 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + T7 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + T8 = VCONJ(T7); + T9 = VSUB(T6, T8); + Tx = VADD(T6, T8); + Tl = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + Tj = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + Tk = VCONJ(Tj); + Tm = VSUB(Tk, Tl); + TB = VADD(Tk, Tl); + Tc = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + Ta = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + Tb = VCONJ(Ta); + Td = VSUB(Tb, Tc); + Ty = VADD(Tb, Tc); + Tf = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + Tg = LD(&(Rm[0]), -ms, &(Rm[0])); + Th = VCONJ(Tg); + Ti = VSUB(Tf, Th); + TA = VADD(Tf, Th); + Ts = VSUB(T9, Td); + Tt = VSUB(Ti, Tm); + TC = VSUB(TA, TB); + Tz = VSUB(Tx, Ty); + TF = VADD(Tx, Ty); + TG = VADD(TA, TB); + TH = VADD(TF, TG); + TJ = VMUL(LDK(KP559016994), VSUB(TF, TG)); + Te = VADD(T9, Td); + Tn = VADD(Ti, Tm); + To = VADD(Te, Tn); + Tq = VMUL(LDK(KP559016994), VSUB(Te, Tn)); + { + V T1c, TX, Tv, T1b, TR, T15, TL, T17, TT, T11, TW, Tu, TQ, Tr, TP; + V Tp, T1, T1a, TO, T14, TD, T10, TK, TZ, TI, Tw, T16, TS, TY, TM; + V TU, T1e, TN, T1d, T19, T13, TV, T18, T12; + T1c = VADD(TE, TH); + TW = LDW(&(W[TWVL * 8])); + TX = VZMULI(TW, VADD(T5, To)); + Tu = VBYI(VFNMS(LDK(KP951056516), Tt, VMUL(LDK(KP587785252), Ts))); + TQ = VBYI(VFMA(LDK(KP951056516), Ts, VMUL(LDK(KP587785252), Tt))); + Tp = VFNMS(LDK(KP250000000), To, T5); + Tr = VSUB(Tp, Tq); + TP = VADD(Tq, Tp); + T1 = LDW(&(W[TWVL * 4])); + Tv = VZMULI(T1, VSUB(Tr, Tu)); + T1a = LDW(&(W[0])); + T1b = VZMULI(T1a, VADD(TQ, TP)); + TO = LDW(&(W[TWVL * 16])); + TR = VZMULI(TO, VSUB(TP, TQ)); + T14 = LDW(&(W[TWVL * 12])); + T15 = VZMULI(T14, VADD(Tu, Tr)); + TD = VBYI(VFNMS(LDK(KP951056516), TC, VMUL(LDK(KP587785252), Tz))); + T10 = VBYI(VFMA(LDK(KP951056516), Tz, VMUL(LDK(KP587785252), TC))); + TI = VFNMS(LDK(KP250000000), TH, TE); + TK = VSUB(TI, TJ); + TZ = VADD(TJ, TI); + Tw = LDW(&(W[TWVL * 2])); + TL = VZMUL(Tw, VADD(TD, TK)); + T16 = LDW(&(W[TWVL * 10])); + T17 = VZMUL(T16, VADD(T10, TZ)); + TS = LDW(&(W[TWVL * 14])); + TT = VZMUL(TS, VSUB(TK, TD)); + TY = LDW(&(W[TWVL * 6])); + T11 = VZMUL(TY, VSUB(TZ, T10)); + TM = VADD(Tv, TL); + ST(&(Rp[WS(rs, 1)]), TM, ms, &(Rp[WS(rs, 1)])); + TU = VADD(TR, TT); + ST(&(Rp[WS(rs, 4)]), TU, ms, &(Rp[0])); + T1e = VCONJ(VSUB(T1c, T1b)); + ST(&(Rm[0]), T1e, -ms, &(Rm[0])); + TN = VCONJ(VSUB(TL, Tv)); + ST(&(Rm[WS(rs, 1)]), TN, -ms, &(Rm[WS(rs, 1)])); + T1d = VADD(T1b, T1c); + ST(&(Rp[0]), T1d, ms, &(Rp[0])); + T19 = VCONJ(VSUB(T17, T15)); + ST(&(Rm[WS(rs, 3)]), T19, -ms, &(Rm[WS(rs, 1)])); + T13 = VCONJ(VSUB(T11, TX)); + ST(&(Rm[WS(rs, 2)]), T13, -ms, &(Rm[0])); + TV = VCONJ(VSUB(TT, TR)); + ST(&(Rm[WS(rs, 4)]), TV, -ms, &(Rm[0])); + T18 = VADD(T15, T17); + ST(&(Rp[WS(rs, 3)]), T18, ms, &(Rp[WS(rs, 1)])); + T12 = VADD(TX, T11); + ST(&(Rp[WS(rs, 2)]), T12, ms, &(Rp[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 10, XSIMD_STRING("hc2cbdftv_10"), twinstr, &GENUS, {55, 24, 6, 0} }; + +void XSIMD(codelet_hc2cbdftv_10) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_10, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,324 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 12 -dif -sign 1 -name hc2cbdftv_12 -include hc2cbv.h */ + +/* + * This function contains 71 FP additions, 51 FP multiplications, + * (or, 45 additions, 25 multiplications, 26 fused multiply/add), + * 88 stack variables, 2 constants, and 24 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 22)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 22), MAKE_VOLATILE_STRIDE(48, rs)) { + V Tz, TT, T1, T1j, TN, TF, TP, TL, Tx, T15, TJ, T1b, T1g, T1l, T18; + V T12, TO, TC, TK, Tl, T16, TQ, TU, TG, T1c, TM, T1k, Ty, T19, T1a; + V T13, T14, T1h, T1i, TS, TR, T1m, T1n, TI, TH; + { + V T2, Tm, T7, Tp, T8, Tq, T9, Tu, T5, Tr, Tg, Tn, Tj, Ta, T3; + V T4, Te, Tf, Th, Ti, TV, T6, TW, Tk, TD, Tt, TB, T11, T1f, Tw; + V TE, TX, Tc, Ts, T10, TZ, To, Tb, Tv, T17, T1d, T1e, TY, TA, Td; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + Tm = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + T7 = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); + Tp = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + T3 = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + T4 = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + Te = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + Tf = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); + Th = LD(&(Rm[0]), -ms, &(Rm[0])); + Ti = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + T8 = VCONJ(T7); + Tq = VCONJ(Tp); + T9 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + Tu = VFNMSCONJ(T4, T3); + T5 = VFMACONJ(T4, T3); + Tr = VADD(Te, Tf); + Tg = VSUB(Te, Tf); + Tn = VADD(Ti, Th); + Tj = VSUB(Th, Ti); + Ta = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + TV = LDW(&(W[TWVL * 4])); + Tz = LDW(&(W[TWVL * 18])); + T6 = VFNMS(LDK(KP500000000), T5, T2); + TW = VADD(T2, T5); + Ts = VFNMS(LDK(KP500000000), Tr, Tq); + T10 = VFMACONJ(Tp, Tr); + TZ = VFMACONJ(Tn, Tm); + To = VFNMS(LDK(KP500000000), VCONJ(Tn), Tm); + Tk = VFMACONJ(Tj, Tg); + TD = VFNMSCONJ(Tj, Tg); + Tb = VFMACONJ(Ta, T9); + Tv = VFMSCONJ(Ta, T9); + TT = LDW(&(W[TWVL * 2])); + T1 = LDW(&(W[TWVL * 20])); + Tt = VSUB(To, Ts); + TB = VADD(To, Ts); + T11 = VSUB(TZ, T10); + T1f = VADD(TZ, T10); + Tw = VSUB(Tu, Tv); + TE = VADD(Tu, Tv); + TX = VFMACONJ(T7, Tb); + Tc = VFNMS(LDK(KP500000000), Tb, T8); + T1j = LDW(&(W[0])); + T17 = LDW(&(W[TWVL * 16])); + T1d = LDW(&(W[TWVL * 10])); + TN = LDW(&(W[TWVL * 6])); + TF = VMUL(LDK(KP866025403), VSUB(TD, TE)); + TP = VMUL(LDK(KP866025403), VADD(TE, TD)); + TL = VFNMS(LDK(KP866025403), Tw, Tt); + Tx = VFMA(LDK(KP866025403), Tw, Tt); + T1e = VADD(TW, TX); + TY = VSUB(TW, TX); + TA = VADD(T6, Tc); + Td = VSUB(T6, Tc); + T15 = LDW(&(W[TWVL * 14])); + TJ = LDW(&(W[TWVL * 8])); + T1b = LDW(&(W[TWVL * 12])); + T1g = VZMUL(T1d, VSUB(T1e, T1f)); + T1l = VADD(T1e, T1f); + T18 = VZMULI(T17, VFMAI(T11, TY)); + T12 = VZMULI(TV, VFNMSI(T11, TY)); + TO = VADD(TA, TB); + TC = VSUB(TA, TB); + TK = VFNMS(LDK(KP866025403), Tk, Td); + Tl = VFMA(LDK(KP866025403), Tk, Td); + } + T16 = VZMUL(T15, VFNMSI(TP, TO)); + TQ = VZMUL(TN, VFMAI(TP, TO)); + TU = VZMUL(TT, VFMAI(TF, TC)); + TG = VZMUL(Tz, VFNMSI(TF, TC)); + T1c = VZMULI(T1b, VFNMSI(TL, TK)); + TM = VZMULI(TJ, VFMAI(TL, TK)); + T1k = VZMULI(T1j, VFMAI(Tx, Tl)); + Ty = VZMULI(T1, VFNMSI(Tx, Tl)); + T19 = VCONJ(VSUB(T16, T18)); + T1a = VADD(T16, T18); + T13 = VCONJ(VSUB(TU, T12)); + T14 = VADD(TU, T12); + T1h = VADD(T1c, T1g); + T1i = VCONJ(VSUB(T1g, T1c)); + TS = VCONJ(VSUB(TQ, TM)); + TR = VADD(TM, TQ); + T1m = VADD(T1k, T1l); + T1n = VCONJ(VSUB(T1l, T1k)); + TI = VCONJ(VSUB(TG, Ty)); + TH = VADD(Ty, TG); + ST(&(Rm[WS(rs, 4)]), T19, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 4)]), T1a, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 1)]), T13, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 1)]), T14, ms, &(Rp[WS(rs, 1)])); + ST(&(Rp[WS(rs, 3)]), T1h, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 3)]), T1i, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rm[WS(rs, 2)]), TS, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 2)]), TR, ms, &(Rp[0])); + ST(&(Rp[0]), T1m, ms, &(Rp[0])); + ST(&(Rm[0]), T1n, -ms, &(Rm[0])); + ST(&(Rm[WS(rs, 5)]), TI, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 5)]), TH, ms, &(Rp[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + VTW(1, 10), + VTW(1, 11), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 12, XSIMD_STRING("hc2cbdftv_12"), twinstr, &GENUS, {45, 25, 26, 0} }; + +void XSIMD(codelet_hc2cbdftv_12) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_12, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 12 -dif -sign 1 -name hc2cbdftv_12 -include hc2cbv.h */ + +/* + * This function contains 71 FP additions, 30 FP multiplications, + * (or, 67 additions, 26 multiplications, 4 fused multiply/add), + * 90 stack variables, 2 constants, and 24 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 22)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 22), MAKE_VOLATILE_STRIDE(48, rs)) { + V TY, TZ, Tf, TC, Tq, TG, Tm, TF, Ty, TD, T13, T1h, T2, T9, T3; + V T5, T6, Tc, Tb, Td, T8, T4, Ta, T7, Te, To, Tp, Tr, Tv, Ti; + V Ts, Tl, Tw, Tu, Tg, Th, Tj, Tk, Tt, Tx, T11, T12; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + T8 = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); + T9 = VCONJ(T8); + T3 = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + T4 = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + T5 = VCONJ(T4); + T6 = VADD(T3, T5); + Tc = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + Ta = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + Tb = VCONJ(Ta); + Td = VADD(Tb, Tc); + TY = VADD(T2, T6); + TZ = VADD(T9, Td); + T7 = VFNMS(LDK(KP500000000), T6, T2); + Te = VFNMS(LDK(KP500000000), Td, T9); + Tf = VSUB(T7, Te); + TC = VADD(T7, Te); + To = VSUB(T3, T5); + Tp = VSUB(Tb, Tc); + Tq = VMUL(LDK(KP866025403), VSUB(To, Tp)); + TG = VADD(To, Tp); + Tr = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + Tu = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + Tv = VCONJ(Tu); + Tg = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + Th = LD(&(Rm[0]), -ms, &(Rm[0])); + Ti = VCONJ(VSUB(Tg, Th)); + Ts = VCONJ(VADD(Tg, Th)); + Tj = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + Tk = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); + Tl = VSUB(Tj, Tk); + Tw = VADD(Tj, Tk); + Tm = VMUL(LDK(KP866025403), VSUB(Ti, Tl)); + TF = VADD(Ti, Tl); + Tt = VFNMS(LDK(KP500000000), Ts, Tr); + Tx = VFNMS(LDK(KP500000000), Tw, Tv); + Ty = VSUB(Tt, Tx); + TD = VADD(Tt, Tx); + T11 = VADD(Tr, Ts); + T12 = VADD(Tv, Tw); + T13 = VBYI(VSUB(T11, T12)); + T1h = VADD(T11, T12); + { + V T1n, T1i, T14, T1a, TA, T1m, TS, T18, TO, T1e, TI, TW, T1g, T1f, T10; + V TX, T19, Tn, Tz, T1, T1l, TQ, TR, TP, T17, TM, TN, TL, T1d, TE; + V TH, TB, TV, TJ, T1p, T1k, TT, T1o, TK, TU, T1j, T1b, T16, T1c, T15; + T1g = VADD(TY, TZ); + T1n = VADD(T1g, T1h); + T1f = LDW(&(W[TWVL * 10])); + T1i = VZMUL(T1f, VSUB(T1g, T1h)); + T10 = VSUB(TY, TZ); + TX = LDW(&(W[TWVL * 4])); + T14 = VZMULI(TX, VSUB(T10, T13)); + T19 = LDW(&(W[TWVL * 16])); + T1a = VZMULI(T19, VADD(T10, T13)); + Tn = VSUB(Tf, Tm); + Tz = VBYI(VADD(Tq, Ty)); + T1 = LDW(&(W[TWVL * 20])); + TA = VZMULI(T1, VSUB(Tn, Tz)); + T1l = LDW(&(W[0])); + T1m = VZMULI(T1l, VADD(Tn, Tz)); + TQ = VBYI(VMUL(LDK(KP866025403), VADD(TG, TF))); + TR = VADD(TC, TD); + TP = LDW(&(W[TWVL * 6])); + TS = VZMUL(TP, VADD(TQ, TR)); + T17 = LDW(&(W[TWVL * 14])); + T18 = VZMUL(T17, VSUB(TR, TQ)); + TM = VADD(Tf, Tm); + TN = VBYI(VSUB(Ty, Tq)); + TL = LDW(&(W[TWVL * 8])); + TO = VZMULI(TL, VADD(TM, TN)); + T1d = LDW(&(W[TWVL * 12])); + T1e = VZMULI(T1d, VSUB(TM, TN)); + TE = VSUB(TC, TD); + TH = VBYI(VMUL(LDK(KP866025403), VSUB(TF, TG))); + TB = LDW(&(W[TWVL * 18])); + TI = VZMUL(TB, VSUB(TE, TH)); + TV = LDW(&(W[TWVL * 2])); + TW = VZMUL(TV, VADD(TH, TE)); + TJ = VADD(TA, TI); + ST(&(Rp[WS(rs, 5)]), TJ, ms, &(Rp[WS(rs, 1)])); + T1p = VCONJ(VSUB(T1n, T1m)); + ST(&(Rm[0]), T1p, -ms, &(Rm[0])); + T1k = VCONJ(VSUB(T1i, T1e)); + ST(&(Rm[WS(rs, 3)]), T1k, -ms, &(Rm[WS(rs, 1)])); + TT = VADD(TO, TS); + ST(&(Rp[WS(rs, 2)]), TT, ms, &(Rp[0])); + T1o = VADD(T1m, T1n); + ST(&(Rp[0]), T1o, ms, &(Rp[0])); + TK = VCONJ(VSUB(TI, TA)); + ST(&(Rm[WS(rs, 5)]), TK, -ms, &(Rm[WS(rs, 1)])); + TU = VCONJ(VSUB(TS, TO)); + ST(&(Rm[WS(rs, 2)]), TU, -ms, &(Rm[0])); + T1j = VADD(T1e, T1i); + ST(&(Rp[WS(rs, 3)]), T1j, ms, &(Rp[WS(rs, 1)])); + T1b = VCONJ(VSUB(T18, T1a)); + ST(&(Rm[WS(rs, 4)]), T1b, -ms, &(Rm[0])); + T16 = VADD(TW, T14); + ST(&(Rp[WS(rs, 1)]), T16, ms, &(Rp[WS(rs, 1)])); + T1c = VADD(T18, T1a); + ST(&(Rp[WS(rs, 4)]), T1c, ms, &(Rp[0])); + T15 = VCONJ(VSUB(TW, T14)); + ST(&(Rm[WS(rs, 1)]), T15, -ms, &(Rm[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + VTW(1, 10), + VTW(1, 11), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 12, XSIMD_STRING("hc2cbdftv_12"), twinstr, &GENUS, {67, 26, 4, 0} }; + +void XSIMD(codelet_hc2cbdftv_12) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_12, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,428 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 16 -dif -sign 1 -name hc2cbdftv_16 -include hc2cbv.h */ + +/* + * This function contains 103 FP additions, 80 FP multiplications, + * (or, 53 additions, 30 multiplications, 50 fused multiply/add), + * 123 stack variables, 3 constants, and 32 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 30)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 30), MAKE_VOLATILE_STRIDE(64, rs)) { + V T1D, T1F, TV, TW, T17, T18, T1B, T1A, T1H, T1G; + { + V T8, Tv, Tb, TF, Tl, TJ, TP, T1w, TE, T1t, T10, T1p, TG, Te, Tg; + V Th, T2, T3, Ts, Tt, T5, T6, Tp, Tq, T9, TA, T4, TC, Tu, TN; + V T7, TB, Tr, Ta, Tj, Tk, Tc, Td, TY, TD, TO, TZ, T1Q, T19, T1I; + V T1d, Tf, T11, TH, TQ, Ti, TI, T1k, T1K, T1S, T1r, T14, T16, TU, Ty; + V T1z, TX, T1o, T1, TK, TR, Tm, T12, T1C, Tz, T15; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + T3 = LD(&(Rm[WS(rs, 7)]), -ms, &(Rm[WS(rs, 1)])); + Ts = LD(&(Rp[WS(rs, 6)]), ms, &(Rp[0])); + Tt = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + T5 = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + T6 = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + Tp = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + Tq = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); + T9 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + TA = VFNMSCONJ(T3, T2); + T4 = VFMACONJ(T3, T2); + TC = VFMSCONJ(Tt, Ts); + Tu = VFMACONJ(Tt, Ts); + TN = VFNMSCONJ(T6, T5); + T7 = VFMACONJ(T6, T5); + TB = VFNMSCONJ(Tq, Tp); + Tr = VFMACONJ(Tq, Tp); + Ta = LD(&(Rm[WS(rs, 6)]), -ms, &(Rm[0])); + Tj = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + Tk = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + Tc = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); + Td = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + T8 = VSUB(T4, T7); + TY = VADD(T4, T7); + TD = VADD(TB, TC); + TO = VSUB(TB, TC); + Tv = VSUB(Tr, Tu); + TZ = VADD(Tr, Tu); + Tb = VFMACONJ(Ta, T9); + TF = VFNMSCONJ(Ta, T9); + Tl = VFMACONJ(Tk, Tj); + TJ = VFNMSCONJ(Tk, Tj); + TP = VFMA(LDK(KP707106781), TO, TN); + T1w = VFNMS(LDK(KP707106781), TO, TN); + TE = VFMA(LDK(KP707106781), TD, TA); + T1t = VFNMS(LDK(KP707106781), TD, TA); + T10 = VADD(TY, TZ); + T1p = VSUB(TY, TZ); + TG = VFNMSCONJ(Td, Tc); + Te = VFMACONJ(Td, Tc); + Tg = LD(&(Rp[WS(rs, 7)]), ms, &(Rp[WS(rs, 1)])); + Th = LD(&(Rm[0]), -ms, &(Rm[0])); + T1Q = LDW(&(W[TWVL * 22])); + T19 = LDW(&(W[TWVL * 26])); + T1I = LDW(&(W[TWVL * 2])); + T1d = LDW(&(W[TWVL * 28])); + Tf = VSUB(Tb, Te); + T11 = VADD(Tb, Te); + TH = VFNMS(LDK(KP414213562), TG, TF); + TQ = VFMA(LDK(KP414213562), TF, TG); + Ti = VFMACONJ(Th, Tg); + TI = VFMSCONJ(Th, Tg); + T1k = LDW(&(W[0])); + T1K = LDW(&(W[TWVL * 4])); + T1S = LDW(&(W[TWVL * 24])); + TX = LDW(&(W[TWVL * 14])); + T1o = LDW(&(W[TWVL * 6])); + T1 = LDW(&(W[TWVL * 10])); + TK = VFMA(LDK(KP414213562), TJ, TI); + TR = VFNMS(LDK(KP414213562), TI, TJ); + Tm = VSUB(Ti, Tl); + T12 = VADD(Ti, Tl); + T1C = LDW(&(W[TWVL * 18])); + Tz = LDW(&(W[TWVL * 12])); + T15 = LDW(&(W[TWVL * 16])); + { + V T1v, T1y, T1N, T1g, T1J, T1c, T1U, T1V, T1m, T1n, T1s, TS, T1u, TL, T1x; + V T13, T1q, Tn, Tw, T1L, T1f, TT, T1M, T1e, TM, T1R, T1j, T1b, Tx, T1a; + V To, T1T, T1l, T1E, T1O, T1P, T1h, T1i; + T1s = LDW(&(W[TWVL * 8])); + TS = VADD(TQ, TR); + T1u = VSUB(TQ, TR); + TL = VADD(TH, TK); + T1x = VSUB(TH, TK); + T13 = VADD(T11, T12); + T1q = VSUB(T11, T12); + Tn = VADD(Tf, Tm); + Tw = VSUB(Tf, Tm); + T1L = VFMA(LDK(KP923879532), T1u, T1t); + T1v = VFNMS(LDK(KP923879532), T1u, T1t); + T1f = VFMA(LDK(KP923879532), TS, TP); + TT = VFNMS(LDK(KP923879532), TS, TP); + T1M = VFNMS(LDK(KP923879532), T1x, T1w); + T1y = VFMA(LDK(KP923879532), T1x, T1w); + T1e = VFMA(LDK(KP923879532), TL, TE); + TM = VFNMS(LDK(KP923879532), TL, TE); + T1r = VZMUL(T1o, VFMAI(T1q, T1p)); + T1R = VZMUL(T1Q, VFNMSI(T1q, T1p)); + T14 = VZMUL(TX, VSUB(T10, T13)); + T1j = VADD(T10, T13); + T1b = VFMA(LDK(KP707106781), Tw, Tv); + Tx = VFNMS(LDK(KP707106781), Tw, Tv); + T1a = VFMA(LDK(KP707106781), Tn, T8); + To = VFNMS(LDK(KP707106781), Tn, T8); + T1T = VZMULI(T1S, VFMAI(T1M, T1L)); + T1N = VZMULI(T1K, VFNMSI(T1M, T1L)); + T16 = VZMULI(T15, VFMAI(TT, TM)); + TU = VZMULI(Tz, VFNMSI(TT, TM)); + T1l = VZMULI(T1k, VFMAI(T1f, T1e)); + T1g = VZMULI(T1d, VFNMSI(T1f, T1e)); + T1D = VZMUL(T1C, VFMAI(Tx, To)); + Ty = VZMUL(T1, VFNMSI(Tx, To)); + T1J = VZMUL(T1I, VFMAI(T1b, T1a)); + T1c = VZMUL(T19, VFNMSI(T1b, T1a)); + T1U = VCONJ(VSUB(T1R, T1T)); + T1V = VADD(T1R, T1T); + T1m = VCONJ(VSUB(T1j, T1l)); + T1n = VADD(T1j, T1l); + T1z = VZMULI(T1s, VFMAI(T1y, T1v)); + T1E = LDW(&(W[TWVL * 20])); + T1O = VCONJ(VSUB(T1J, T1N)); + T1P = VADD(T1J, T1N); + T1h = VCONJ(VSUB(T1c, T1g)); + T1i = VADD(T1c, T1g); + ST(&(Rp[WS(rs, 6)]), T1V, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 6)]), T1U, -ms, &(Rm[0])); + ST(&(Rp[0]), T1n, ms, &(Rp[0])); + ST(&(Rm[0]), T1m, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 1)]), T1P, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 1)]), T1O, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 7)]), T1i, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 7)]), T1h, -ms, &(Rm[WS(rs, 1)])); + T1F = VZMULI(T1E, VFNMSI(T1y, T1v)); + } + TV = VCONJ(VSUB(Ty, TU)); + TW = VADD(Ty, TU); + T17 = VCONJ(VSUB(T14, T16)); + T18 = VADD(T14, T16); + T1B = VADD(T1r, T1z); + T1A = VCONJ(VSUB(T1r, T1z)); + } + T1H = VADD(T1D, T1F); + T1G = VCONJ(VSUB(T1D, T1F)); + ST(&(Rm[WS(rs, 3)]), TV, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 3)]), TW, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 4)]), T17, -ms, &(Rm[0])); + ST(&(Rm[WS(rs, 2)]), T1A, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 2)]), T1B, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 4)]), T18, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 5)]), T1H, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 5)]), T1G, -ms, &(Rm[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + VTW(1, 10), + VTW(1, 11), + VTW(1, 12), + VTW(1, 13), + VTW(1, 14), + VTW(1, 15), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 16, XSIMD_STRING("hc2cbdftv_16"), twinstr, &GENUS, {53, 30, 50, 0} }; + +void XSIMD(codelet_hc2cbdftv_16) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_16, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 16 -dif -sign 1 -name hc2cbdftv_16 -include hc2cbv.h */ + +/* + * This function contains 103 FP additions, 42 FP multiplications, + * (or, 99 additions, 38 multiplications, 4 fused multiply/add), + * 83 stack variables, 3 constants, and 32 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 30)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 30), MAKE_VOLATILE_STRIDE(64, rs)) { + V Tf, T16, TZ, T1C, TI, T1a, TV, T1D, T1F, T1G, Ty, T19, TC, T17, TS; + V T10; + { + V T2, TD, T4, TF, Tc, Tb, Td, T6, T8, T9, T3, TE, Ta, T7, T5; + V Te, TX, TY, TG, TH, TT, TU, Tj, TM, Tw, TQ, Tn, TN, Ts, TP; + V Tg, Ti, Th, Tt, Tv, Tu, Tk, Tm, Tl, Tr, Tq, Tp, To, Tx, TA; + V TB, TO, TR; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + TD = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + T3 = LD(&(Rm[WS(rs, 7)]), -ms, &(Rm[WS(rs, 1)])); + T4 = VCONJ(T3); + TE = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + TF = VCONJ(TE); + Tc = LD(&(Rp[WS(rs, 6)]), ms, &(Rp[0])); + Ta = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + Tb = VCONJ(Ta); + Td = VSUB(Tb, Tc); + T6 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + T7 = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); + T8 = VCONJ(T7); + T9 = VSUB(T6, T8); + T5 = VSUB(T2, T4); + Te = VMUL(LDK(KP707106781), VADD(T9, Td)); + Tf = VADD(T5, Te); + T16 = VSUB(T5, Te); + TX = VADD(T2, T4); + TY = VADD(TD, TF); + TZ = VSUB(TX, TY); + T1C = VADD(TX, TY); + TG = VSUB(TD, TF); + TH = VMUL(LDK(KP707106781), VSUB(T9, Td)); + TI = VADD(TG, TH); + T1a = VSUB(TH, TG); + TT = VADD(T6, T8); + TU = VADD(Tb, Tc); + TV = VSUB(TT, TU); + T1D = VADD(TT, TU); + Tg = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + Th = LD(&(Rm[WS(rs, 6)]), -ms, &(Rm[0])); + Ti = VCONJ(Th); + Tj = VSUB(Tg, Ti); + TM = VADD(Tg, Ti); + Tt = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + Tu = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + Tv = VCONJ(Tu); + Tw = VSUB(Tt, Tv); + TQ = VADD(Tt, Tv); + Tk = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); + Tl = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + Tm = VCONJ(Tl); + Tn = VSUB(Tk, Tm); + TN = VADD(Tk, Tm); + Tr = LD(&(Rp[WS(rs, 7)]), ms, &(Rp[WS(rs, 1)])); + Tp = LD(&(Rm[0]), -ms, &(Rm[0])); + Tq = VCONJ(Tp); + Ts = VSUB(Tq, Tr); + TP = VADD(Tq, Tr); + T1F = VADD(TM, TN); + T1G = VADD(TP, TQ); + To = VFNMS(LDK(KP382683432), Tn, VMUL(LDK(KP923879532), Tj)); + Tx = VFMA(LDK(KP923879532), Ts, VMUL(LDK(KP382683432), Tw)); + Ty = VADD(To, Tx); + T19 = VSUB(To, Tx); + TA = VFMA(LDK(KP382683432), Tj, VMUL(LDK(KP923879532), Tn)); + TB = VFNMS(LDK(KP382683432), Ts, VMUL(LDK(KP923879532), Tw)); + TC = VADD(TA, TB); + T17 = VSUB(TA, TB); + TO = VSUB(TM, TN); + TR = VSUB(TP, TQ); + TS = VMUL(LDK(KP707106781), VSUB(TO, TR)); + T10 = VMUL(LDK(KP707106781), VADD(TO, TR)); + } + { + V T21, T1W, T1u, T20, T1I, T1O, TK, T1S, T12, T1e, T1k, T1A, T1o, T1w, T1c; + V T1M, T1U, T1V, T1T, T1s, T1t, T1r, T1Z, T1E, T1H, T1B, T1N, Tz, TJ, T1; + V T1R, TW, T11, TL, T1d, T1i, T1j, T1h, T1z, T1m, T1n, T1l, T1v, T18, T1b; + V T15, T1L, T13, T1g, T1X, T23, T14, T1f, T1Y, T22, T1p, T1y, T1J, T1Q, T1q; + V T1x, T1K, T1P; + T1U = VADD(T1C, T1D); + T1V = VADD(T1F, T1G); + T21 = VADD(T1U, T1V); + T1T = LDW(&(W[TWVL * 14])); + T1W = VZMUL(T1T, VSUB(T1U, T1V)); + T1s = VADD(Tf, Ty); + T1t = VBYI(VADD(TI, TC)); + T1r = LDW(&(W[TWVL * 28])); + T1u = VZMULI(T1r, VSUB(T1s, T1t)); + T1Z = LDW(&(W[0])); + T20 = VZMULI(T1Z, VADD(T1s, T1t)); + T1E = VSUB(T1C, T1D); + T1H = VBYI(VSUB(T1F, T1G)); + T1B = LDW(&(W[TWVL * 22])); + T1I = VZMUL(T1B, VSUB(T1E, T1H)); + T1N = LDW(&(W[TWVL * 6])); + T1O = VZMUL(T1N, VADD(T1E, T1H)); + Tz = VSUB(Tf, Ty); + TJ = VBYI(VSUB(TC, TI)); + T1 = LDW(&(W[TWVL * 12])); + TK = VZMULI(T1, VADD(Tz, TJ)); + T1R = LDW(&(W[TWVL * 16])); + T1S = VZMULI(T1R, VSUB(Tz, TJ)); + TW = VBYI(VSUB(TS, TV)); + T11 = VSUB(TZ, T10); + TL = LDW(&(W[TWVL * 10])); + T12 = VZMUL(TL, VADD(TW, T11)); + T1d = LDW(&(W[TWVL * 18])); + T1e = VZMUL(T1d, VSUB(T11, TW)); + T1i = VBYI(VADD(T1a, T19)); + T1j = VADD(T16, T17); + T1h = LDW(&(W[TWVL * 4])); + T1k = VZMULI(T1h, VADD(T1i, T1j)); + T1z = LDW(&(W[TWVL * 24])); + T1A = VZMULI(T1z, VSUB(T1j, T1i)); + T1m = VBYI(VADD(TV, TS)); + T1n = VADD(TZ, T10); + T1l = LDW(&(W[TWVL * 2])); + T1o = VZMUL(T1l, VADD(T1m, T1n)); + T1v = LDW(&(W[TWVL * 26])); + T1w = VZMUL(T1v, VSUB(T1n, T1m)); + T18 = VSUB(T16, T17); + T1b = VBYI(VSUB(T19, T1a)); + T15 = LDW(&(W[TWVL * 20])); + T1c = VZMULI(T15, VSUB(T18, T1b)); + T1L = LDW(&(W[TWVL * 8])); + T1M = VZMULI(T1L, VADD(T1b, T18)); + T13 = VADD(TK, T12); + ST(&(Rp[WS(rs, 3)]), T13, ms, &(Rp[WS(rs, 1)])); + T1g = VCONJ(VSUB(T1e, T1c)); + ST(&(Rm[WS(rs, 5)]), T1g, -ms, &(Rm[WS(rs, 1)])); + T1X = VADD(T1S, T1W); + ST(&(Rp[WS(rs, 4)]), T1X, ms, &(Rp[0])); + T23 = VCONJ(VSUB(T21, T20)); + ST(&(Rm[0]), T23, -ms, &(Rm[0])); + T14 = VCONJ(VSUB(T12, TK)); + ST(&(Rm[WS(rs, 3)]), T14, -ms, &(Rm[WS(rs, 1)])); + T1f = VADD(T1c, T1e); + ST(&(Rp[WS(rs, 5)]), T1f, ms, &(Rp[WS(rs, 1)])); + T1Y = VCONJ(VSUB(T1W, T1S)); + ST(&(Rm[WS(rs, 4)]), T1Y, -ms, &(Rm[0])); + T22 = VADD(T20, T21); + ST(&(Rp[0]), T22, ms, &(Rp[0])); + T1p = VADD(T1k, T1o); + ST(&(Rp[WS(rs, 1)]), T1p, ms, &(Rp[WS(rs, 1)])); + T1y = VCONJ(VSUB(T1w, T1u)); + ST(&(Rm[WS(rs, 7)]), T1y, -ms, &(Rm[WS(rs, 1)])); + T1J = VADD(T1A, T1I); + ST(&(Rp[WS(rs, 6)]), T1J, ms, &(Rp[0])); + T1Q = VCONJ(VSUB(T1O, T1M)); + ST(&(Rm[WS(rs, 2)]), T1Q, -ms, &(Rm[0])); + T1q = VCONJ(VSUB(T1o, T1k)); + ST(&(Rm[WS(rs, 1)]), T1q, -ms, &(Rm[WS(rs, 1)])); + T1x = VADD(T1u, T1w); + ST(&(Rp[WS(rs, 7)]), T1x, ms, &(Rp[WS(rs, 1)])); + T1K = VCONJ(VSUB(T1I, T1A)); + ST(&(Rm[WS(rs, 6)]), T1K, -ms, &(Rm[0])); + T1P = VADD(T1M, T1O); + ST(&(Rp[WS(rs, 2)]), T1P, ms, &(Rp[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + VTW(1, 10), + VTW(1, 11), + VTW(1, 12), + VTW(1, 13), + VTW(1, 14), + VTW(1, 15), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 16, XSIMD_STRING("hc2cbdftv_16"), twinstr, &GENUS, {99, 38, 4, 0} }; + +void XSIMD(codelet_hc2cbdftv_16) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_16, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 2 -dif -sign 1 -name hc2cbdftv_2 -include hc2cbv.h */ + +/* + * This function contains 5 FP additions, 4 FP multiplications, + * (or, 3 additions, 2 multiplications, 2 fused multiply/add), + * 8 stack variables, 0 constants, and 4 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_2(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 2)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(8, rs)) { + V T2, T3, T1, T5, T4, T7, T6; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + T3 = LD(&(Rm[0]), -ms, &(Rm[0])); + T1 = LDW(&(W[0])); + T5 = VFMACONJ(T3, T2); + T4 = VZMULI(T1, VFNMSCONJ(T3, T2)); + T7 = VCONJ(VSUB(T5, T4)); + T6 = VADD(T4, T5); + ST(&(Rm[0]), T7, -ms, &(Rm[0])); + ST(&(Rp[0]), T6, ms, &(Rp[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 2, XSIMD_STRING("hc2cbdftv_2"), twinstr, &GENUS, {3, 2, 2, 0} }; + +void XSIMD(codelet_hc2cbdftv_2) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_2, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 2 -dif -sign 1 -name hc2cbdftv_2 -include hc2cbv.h */ + +/* + * This function contains 5 FP additions, 2 FP multiplications, + * (or, 5 additions, 2 multiplications, 0 fused multiply/add), + * 9 stack variables, 0 constants, and 4 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_2(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 2)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(8, rs)) { + V T6, T5, T2, T4, T3, T1, T7, T8; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + T3 = LD(&(Rm[0]), -ms, &(Rm[0])); + T4 = VCONJ(T3); + T6 = VADD(T2, T4); + T1 = LDW(&(W[0])); + T5 = VZMULI(T1, VSUB(T2, T4)); + T7 = VADD(T5, T6); + ST(&(Rp[0]), T7, ms, &(Rp[0])); + T8 = VCONJ(VSUB(T6, T5)); + ST(&(Rm[0]), T8, -ms, &(Rm[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 2, XSIMD_STRING("hc2cbdftv_2"), twinstr, &GENUS, {5, 2, 0, 0} }; + +void XSIMD(codelet_hc2cbdftv_2) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_2, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,547 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 20 -dif -sign 1 -name hc2cbdftv_20 -include hc2cbv.h */ + +/* + * This function contains 143 FP additions, 108 FP multiplications, + * (or, 77 additions, 42 multiplications, 66 fused multiply/add), + * 134 stack variables, 4 constants, and 40 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 38)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 38), MAKE_VOLATILE_STRIDE(80, rs)) { + V T1M, T1T, T4, TF, T12, Te, T16, Ts, Tb, TN, TA, TG, TU, T1Y, T11; + V T1e, T29, T21, T15, Th, T13, Tp; + { + V TS, TT, Tf, T10, T20, T1Z, TX, Tg, Tn, To, T2, T3, TD, TE, T8; + V TV, T7, TZ, Tz, T9, Tu, Tv, T5, T6, Tx, Ty, Tc, Td, Tq, Tr; + V TY, Ta, TW, Tw; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + T3 = LD(&(Rm[WS(rs, 9)]), -ms, &(Rm[WS(rs, 1)])); + TD = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); + TE = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + T5 = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + T6 = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); + Tx = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + Ty = LD(&(Rm[WS(rs, 8)]), -ms, &(Rm[0])); + T8 = LD(&(Rp[WS(rs, 6)]), ms, &(Rp[0])); + TS = VFMACONJ(T3, T2); + T4 = VFNMSCONJ(T3, T2); + TT = VFMACONJ(TE, TD); + TF = VFNMSCONJ(TE, TD); + TV = VFMACONJ(T6, T5); + T7 = VFNMSCONJ(T6, T5); + TZ = VFMACONJ(Ty, Tx); + Tz = VFNMSCONJ(Ty, Tx); + T9 = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + Tu = LD(&(Rp[WS(rs, 9)]), ms, &(Rp[WS(rs, 1)])); + Tv = LD(&(Rm[0]), -ms, &(Rm[0])); + Tc = LD(&(Rp[WS(rs, 8)]), ms, &(Rp[0])); + Td = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + Tq = LD(&(Rp[WS(rs, 7)]), ms, &(Rp[WS(rs, 1)])); + Tr = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + Tf = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + TY = VFMACONJ(T9, T8); + Ta = VFMSCONJ(T9, T8); + TW = VFMACONJ(Tv, Tu); + Tw = VFNMSCONJ(Tv, Tu); + T12 = VFMACONJ(Td, Tc); + Te = VFNMSCONJ(Td, Tc); + T16 = VFMACONJ(Tr, Tq); + Ts = VFMSCONJ(Tr, Tq); + T10 = VSUB(TY, TZ); + T20 = VADD(TY, TZ); + Tb = VADD(T7, Ta); + TN = VSUB(T7, Ta); + T1Z = VADD(TV, TW); + TX = VSUB(TV, TW); + TA = VSUB(Tw, Tz); + TG = VADD(Tw, Tz); + Tg = LD(&(Rm[WS(rs, 7)]), -ms, &(Rm[WS(rs, 1)])); + Tn = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + To = LD(&(Rm[WS(rs, 6)]), -ms, &(Rm[0])); + TU = VSUB(TS, TT); + T1Y = VADD(TS, TT); + T11 = VADD(TX, T10); + T1e = VSUB(TX, T10); + T29 = VSUB(T1Z, T20); + T21 = VADD(T1Z, T20); + T15 = VFMACONJ(Tg, Tf); + Th = VFMSCONJ(Tg, Tf); + T13 = VFMACONJ(To, Tn); + Tp = VFMSCONJ(To, Tn); + } + { + V T1S, T2B, T1W, T1I, T2q, T2w, T2i, T2c, T1C, T1K, T1s, T1g, T1, T2t, T1v; + V T1Q, T2A, T1q, T2m, TC, T1w, TP, T1x, T2f, T2r, T2g, T1E, T1D, T2y, T2x; + V T1i, T1h, T2D, T2C, T2s, T1t, T1u, T1y, T2u, TQ, T2d, T2e, T1U, T1L, T2j; + V T2k; + { + V T1R, T1F, T1V, T1o, TO, Tl, T1d, T2a, T1l, TB, TK, T1G, Tk, T1b, T19; + V T27, T25, T1H, TJ, T17, T23, TM, Ti, T14, T22, Tt, TH, Tj, T18, T24; + V TI, T2b, T2p, T1X, T2v, T2h, T2n, T1B, T1f, T28, T2o, T1a, TR, T1J, T1r; + V T1z, T26, Tm, TL, T1O, T1m, T1j, T2z, T1N, T1p, T1P, T2l, T1c, T1A, T1n; + V T1k; + T1R = LDW(&(W[TWVL * 18])); + T17 = VSUB(T15, T16); + T23 = VADD(T15, T16); + TM = VSUB(Te, Th); + Ti = VADD(Te, Th); + T14 = VSUB(T12, T13); + T22 = VADD(T12, T13); + Tt = VSUB(Tp, Ts); + TH = VADD(Tp, Ts); + T1F = LDW(&(W[TWVL * 28])); + T1V = LDW(&(W[TWVL * 8])); + T1o = VFMA(LDK(KP618033988), TM, TN); + TO = VFNMS(LDK(KP618033988), TN, TM); + Tj = VADD(Tb, Ti); + Tl = VSUB(Tb, Ti); + T18 = VADD(T14, T17); + T1d = VSUB(T14, T17); + T24 = VADD(T22, T23); + T2a = VSUB(T22, T23); + T1l = VFMA(LDK(KP618033988), Tt, TA); + TB = VFNMS(LDK(KP618033988), TA, Tt); + TI = VADD(TG, TH); + TK = VSUB(TG, TH); + T1G = VADD(T4, Tj); + Tk = VFNMS(LDK(KP250000000), Tj, T4); + T1b = VSUB(T11, T18); + T19 = VADD(T11, T18); + T27 = VSUB(T21, T24); + T25 = VADD(T21, T24); + T1H = VADD(TF, TI); + TJ = VFNMS(LDK(KP250000000), TI, TF); + T2b = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T2a, T29)); + T2p = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T29, T2a)); + T1X = LDW(&(W[TWVL * 6])); + T1S = VZMUL(T1R, VADD(TU, T19)); + T2v = LDW(&(W[TWVL * 22])); + T2B = VADD(T1Y, T25); + T26 = VFNMS(LDK(KP250000000), T25, T1Y); + T1W = VZMULI(T1V, VFMAI(T1H, T1G)); + T1I = VZMULI(T1F, VFNMSI(T1H, T1G)); + T2h = LDW(&(W[TWVL * 30])); + T2n = LDW(&(W[TWVL * 14])); + T1B = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1d, T1e)); + T1f = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1e, T1d)); + T28 = VFMA(LDK(KP559016994), T27, T26); + T2o = VFNMS(LDK(KP559016994), T27, T26); + T1a = VFNMS(LDK(KP250000000), T19, TU); + TR = LDW(&(W[TWVL * 2])); + T1J = LDW(&(W[TWVL * 26])); + T1r = LDW(&(W[TWVL * 34])); + T1z = LDW(&(W[TWVL * 10])); + T1k = VFMA(LDK(KP559016994), Tl, Tk); + Tm = VFNMS(LDK(KP559016994), Tl, Tk); + T2q = VZMUL(T2n, VFMAI(T2p, T2o)); + T2w = VZMUL(T2v, VFNMSI(T2p, T2o)); + T2i = VZMUL(T2h, VFMAI(T2b, T28)); + T2c = VZMUL(T1X, VFNMSI(T2b, T28)); + T1c = VFNMS(LDK(KP559016994), T1b, T1a); + T1A = VFMA(LDK(KP559016994), T1b, T1a); + TL = VFNMS(LDK(KP559016994), TK, TJ); + T1n = VFMA(LDK(KP559016994), TK, TJ); + T1O = VFMA(LDK(KP951056516), T1l, T1k); + T1m = VFNMS(LDK(KP951056516), T1l, T1k); + T1j = LDW(&(W[TWVL * 36])); + T2z = LDW(&(W[0])); + T1N = LDW(&(W[TWVL * 20])); + T1C = VZMUL(T1z, VFMAI(T1B, T1A)); + T1K = VZMUL(T1J, VFNMSI(T1B, T1A)); + T1s = VZMUL(T1r, VFMAI(T1f, T1c)); + T1g = VZMUL(TR, VFNMSI(T1f, T1c)); + T1p = VFMA(LDK(KP951056516), T1o, T1n); + T1P = VFNMS(LDK(KP951056516), T1o, T1n); + T2l = LDW(&(W[TWVL * 16])); + T1 = LDW(&(W[TWVL * 4])); + T2t = LDW(&(W[TWVL * 24])); + T1v = LDW(&(W[TWVL * 12])); + T1Q = VZMULI(T1N, VFNMSI(T1P, T1O)); + T2A = VZMULI(T2z, VFMAI(T1p, T1m)); + T1q = VZMULI(T1j, VFNMSI(T1p, T1m)); + T2m = VZMULI(T2l, VFMAI(T1P, T1O)); + TC = VFMA(LDK(KP951056516), TB, Tm); + T1w = VFNMS(LDK(KP951056516), TB, Tm); + TP = VFNMS(LDK(KP951056516), TO, TL); + T1x = VFMA(LDK(KP951056516), TO, TL); + T2f = LDW(&(W[TWVL * 32])); + } + T2D = VCONJ(VSUB(T2B, T2A)); + T2C = VADD(T2A, T2B); + T2s = VCONJ(VSUB(T2q, T2m)); + T2r = VADD(T2m, T2q); + T1t = VADD(T1q, T1s); + T1u = VCONJ(VSUB(T1s, T1q)); + T1y = VZMULI(T1v, VFNMSI(T1x, T1w)); + T2u = VZMULI(T2t, VFMAI(T1x, T1w)); + TQ = VZMULI(T1, VFNMSI(TP, TC)); + T2g = VZMULI(T2f, VFMAI(TP, TC)); + ST(&(Rm[0]), T2D, -ms, &(Rm[0])); + ST(&(Rp[0]), T2C, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 4)]), T2s, -ms, &(Rm[0])); + ST(&(Rm[WS(rs, 9)]), T1u, -ms, &(Rm[WS(rs, 1)])); + T1E = VCONJ(VSUB(T1C, T1y)); + T1D = VADD(T1y, T1C); + T2y = VCONJ(VSUB(T2w, T2u)); + T2x = VADD(T2u, T2w); + T1i = VCONJ(VSUB(T1g, TQ)); + T1h = VADD(TQ, T1g); + ST(&(Rp[WS(rs, 9)]), T1t, ms, &(Rp[WS(rs, 1)])); + T1L = VADD(T1I, T1K); + T1M = VCONJ(VSUB(T1K, T1I)); + ST(&(Rp[WS(rs, 3)]), T1D, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 6)]), T2y, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 6)]), T2x, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 1)]), T1i, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 1)]), T1h, ms, &(Rp[WS(rs, 1)])); + T2d = VADD(T1W, T2c); + T2e = VCONJ(VSUB(T2c, T1W)); + ST(&(Rm[WS(rs, 3)]), T1E, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 7)]), T1L, ms, &(Rp[WS(rs, 1)])); + T1U = VCONJ(VSUB(T1S, T1Q)); + T1T = VADD(T1Q, T1S); + T2j = VADD(T2g, T2i); + T2k = VCONJ(VSUB(T2i, T2g)); + ST(&(Rp[WS(rs, 2)]), T2d, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 4)]), T2r, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 5)]), T1U, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rm[WS(rs, 2)]), T2e, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 8)]), T2j, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 8)]), T2k, -ms, &(Rm[0])); + } + ST(&(Rp[WS(rs, 5)]), T1T, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 7)]), T1M, -ms, &(Rm[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + VTW(1, 10), + VTW(1, 11), + VTW(1, 12), + VTW(1, 13), + VTW(1, 14), + VTW(1, 15), + VTW(1, 16), + VTW(1, 17), + VTW(1, 18), + VTW(1, 19), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 20, XSIMD_STRING("hc2cbdftv_20"), twinstr, &GENUS, {77, 42, 66, 0} }; + +void XSIMD(codelet_hc2cbdftv_20) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_20, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 20 -dif -sign 1 -name hc2cbdftv_20 -include hc2cbv.h */ + +/* + * This function contains 143 FP additions, 62 FP multiplications, + * (or, 131 additions, 50 multiplications, 12 fused multiply/add), + * 114 stack variables, 4 constants, and 40 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 38)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 38), MAKE_VOLATILE_STRIDE(80, rs)) { + V TK, T1v, TY, T1x, T1j, T2f, TS, TT, TO, TU, T5, To, Tp, Tq, T2a; + V T2d, T2g, T2k, T2j, T1k, T1l, T18, T1m, T1f; + { + V T2, TP, T4, TR, TI, T1d, T9, T12, Td, T15, TE, T1a, Tv, T13, Tm; + V T1c, Tz, T16, Ti, T19, T3, TQ, TH, TG, TF, T6, T8, T7, Tc, Tb; + V Ta, TD, TC, TB, Ts, Tu, Tt, Tl, Tk, Tj, Tw, Ty, Tx, Tf, Th; + V Tg, TA, TJ, TW, TX, T1h, T1i, TM, TN, Te, Tn, T28, T29, T2b, T2c; + V T14, T17, T1b, T1e; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + TP = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); + T3 = LD(&(Rm[WS(rs, 9)]), -ms, &(Rm[WS(rs, 1)])); + T4 = VCONJ(T3); + TQ = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + TR = VCONJ(TQ); + TH = LD(&(Rp[WS(rs, 7)]), ms, &(Rp[WS(rs, 1)])); + TF = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + TG = VCONJ(TF); + TI = VSUB(TG, TH); + T1d = VADD(TG, TH); + T6 = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + T7 = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); + T8 = VCONJ(T7); + T9 = VSUB(T6, T8); + T12 = VADD(T6, T8); + Tc = LD(&(Rp[WS(rs, 6)]), ms, &(Rp[0])); + Ta = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + Tb = VCONJ(Ta); + Td = VSUB(Tb, Tc); + T15 = VADD(Tb, Tc); + TD = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + TB = LD(&(Rm[WS(rs, 6)]), -ms, &(Rm[0])); + TC = VCONJ(TB); + TE = VSUB(TC, TD); + T1a = VADD(TC, TD); + Ts = LD(&(Rp[WS(rs, 9)]), ms, &(Rp[WS(rs, 1)])); + Tt = LD(&(Rm[0]), -ms, &(Rm[0])); + Tu = VCONJ(Tt); + Tv = VSUB(Ts, Tu); + T13 = VADD(Ts, Tu); + Tl = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + Tj = LD(&(Rm[WS(rs, 7)]), -ms, &(Rm[WS(rs, 1)])); + Tk = VCONJ(Tj); + Tm = VSUB(Tk, Tl); + T1c = VADD(Tk, Tl); + Tw = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + Tx = LD(&(Rm[WS(rs, 8)]), -ms, &(Rm[0])); + Ty = VCONJ(Tx); + Tz = VSUB(Tw, Ty); + T16 = VADD(Tw, Ty); + Tf = LD(&(Rp[WS(rs, 8)]), ms, &(Rp[0])); + Tg = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + Th = VCONJ(Tg); + Ti = VSUB(Tf, Th); + T19 = VADD(Tf, Th); + TA = VSUB(Tv, Tz); + TJ = VSUB(TE, TI); + TK = VFNMS(LDK(KP951056516), TJ, VMUL(LDK(KP587785252), TA)); + T1v = VFMA(LDK(KP951056516), TA, VMUL(LDK(KP587785252), TJ)); + TW = VSUB(T9, Td); + TX = VSUB(Ti, Tm); + TY = VFNMS(LDK(KP951056516), TX, VMUL(LDK(KP587785252), TW)); + T1x = VFMA(LDK(KP951056516), TW, VMUL(LDK(KP587785252), TX)); + T1h = VADD(T2, T4); + T1i = VADD(TP, TR); + T1j = VSUB(T1h, T1i); + T2f = VADD(T1h, T1i); + TS = VSUB(TP, TR); + TM = VADD(Tv, Tz); + TN = VADD(TE, TI); + TT = VADD(TM, TN); + TO = VMUL(LDK(KP559016994), VSUB(TM, TN)); + TU = VFNMS(LDK(KP250000000), TT, TS); + T5 = VSUB(T2, T4); + Te = VADD(T9, Td); + Tn = VADD(Ti, Tm); + To = VADD(Te, Tn); + Tp = VFNMS(LDK(KP250000000), To, T5); + Tq = VMUL(LDK(KP559016994), VSUB(Te, Tn)); + T28 = VADD(T12, T13); + T29 = VADD(T15, T16); + T2a = VADD(T28, T29); + T2b = VADD(T19, T1a); + T2c = VADD(T1c, T1d); + T2d = VADD(T2b, T2c); + T2g = VADD(T2a, T2d); + T2k = VSUB(T2b, T2c); + T2j = VSUB(T28, T29); + T14 = VSUB(T12, T13); + T17 = VSUB(T15, T16); + T1k = VADD(T14, T17); + T1b = VSUB(T19, T1a); + T1e = VSUB(T1c, T1d); + T1l = VADD(T1b, T1e); + T18 = VSUB(T14, T17); + T1m = VADD(T1k, T1l); + T1f = VSUB(T1b, T1e); + } + { + V T2L, T22, T1S, T26, T2m, T2G, T2s, T2A, T1q, T1U, T1C, T1M, T10, T2E, T1I; + V T2q, T1A, T2K, T20, T2w, T21, T1Q, T1R, T1P, T25, T1r, T1s, T2C, T2N, T1N; + V T2H, T2I, T2M, T1E, T1D, T1O, T1V, T2n, T2B, T24, T2o, T2t, T2u, T23, T1W; + T2L = VADD(T2f, T2g); + T21 = LDW(&(W[TWVL * 18])); + T22 = VZMUL(T21, VADD(T1j, T1m)); + T1Q = VADD(T5, To); + T1R = VBYI(VADD(TS, TT)); + T1P = LDW(&(W[TWVL * 28])); + T1S = VZMULI(T1P, VSUB(T1Q, T1R)); + T25 = LDW(&(W[TWVL * 8])); + T26 = VZMULI(T25, VADD(T1Q, T1R)); + { + V T2l, T2z, T2i, T2y, T2e, T2h, T27, T2F, T2r, T2x, T1g, T1K, T1p, T1L, T1n; + V T1o, T11, T1T, T1B, T1J, TL, T1G, TZ, T1H, Tr, TV, T1, T2D, T1F, T2p; + V T1w, T1Y, T1z, T1Z, T1u, T1y, T1t, T2J, T1X, T2v; + T2l = VBYI(VFMA(LDK(KP951056516), T2j, VMUL(LDK(KP587785252), T2k))); + T2z = VBYI(VFNMS(LDK(KP951056516), T2k, VMUL(LDK(KP587785252), T2j))); + T2e = VMUL(LDK(KP559016994), VSUB(T2a, T2d)); + T2h = VFNMS(LDK(KP250000000), T2g, T2f); + T2i = VADD(T2e, T2h); + T2y = VSUB(T2h, T2e); + T27 = LDW(&(W[TWVL * 6])); + T2m = VZMUL(T27, VSUB(T2i, T2l)); + T2F = LDW(&(W[TWVL * 22])); + T2G = VZMUL(T2F, VADD(T2z, T2y)); + T2r = LDW(&(W[TWVL * 30])); + T2s = VZMUL(T2r, VADD(T2l, T2i)); + T2x = LDW(&(W[TWVL * 14])); + T2A = VZMUL(T2x, VSUB(T2y, T2z)); + T1g = VBYI(VFNMS(LDK(KP951056516), T1f, VMUL(LDK(KP587785252), T18))); + T1K = VBYI(VFMA(LDK(KP951056516), T18, VMUL(LDK(KP587785252), T1f))); + T1n = VFNMS(LDK(KP250000000), T1m, T1j); + T1o = VMUL(LDK(KP559016994), VSUB(T1k, T1l)); + T1p = VSUB(T1n, T1o); + T1L = VADD(T1o, T1n); + T11 = LDW(&(W[TWVL * 2])); + T1q = VZMUL(T11, VADD(T1g, T1p)); + T1T = LDW(&(W[TWVL * 26])); + T1U = VZMUL(T1T, VSUB(T1L, T1K)); + T1B = LDW(&(W[TWVL * 34])); + T1C = VZMUL(T1B, VSUB(T1p, T1g)); + T1J = LDW(&(W[TWVL * 10])); + T1M = VZMUL(T1J, VADD(T1K, T1L)); + Tr = VSUB(Tp, Tq); + TL = VSUB(Tr, TK); + T1G = VADD(Tr, TK); + TV = VSUB(TO, TU); + TZ = VBYI(VSUB(TV, TY)); + T1H = VBYI(VADD(TY, TV)); + T1 = LDW(&(W[TWVL * 4])); + T10 = VZMULI(T1, VADD(TL, TZ)); + T2D = LDW(&(W[TWVL * 24])); + T2E = VZMULI(T2D, VSUB(T1G, T1H)); + T1F = LDW(&(W[TWVL * 12])); + T1I = VZMULI(T1F, VADD(T1G, T1H)); + T2p = LDW(&(W[TWVL * 32])); + T2q = VZMULI(T2p, VSUB(TL, TZ)); + T1u = VADD(Tq, Tp); + T1w = VSUB(T1u, T1v); + T1Y = VADD(T1u, T1v); + T1y = VADD(TO, TU); + T1z = VBYI(VADD(T1x, T1y)); + T1Z = VBYI(VSUB(T1y, T1x)); + T1t = LDW(&(W[TWVL * 36])); + T1A = VZMULI(T1t, VSUB(T1w, T1z)); + T2J = LDW(&(W[0])); + T2K = VZMULI(T2J, VADD(T1w, T1z)); + T1X = LDW(&(W[TWVL * 20])); + T20 = VZMULI(T1X, VSUB(T1Y, T1Z)); + T2v = LDW(&(W[TWVL * 16])); + T2w = VZMULI(T2v, VADD(T1Y, T1Z)); + } + T1r = VADD(T10, T1q); + ST(&(Rp[WS(rs, 1)]), T1r, ms, &(Rp[WS(rs, 1)])); + T1s = VCONJ(VSUB(T1q, T10)); + ST(&(Rm[WS(rs, 1)]), T1s, -ms, &(Rm[WS(rs, 1)])); + T2C = VCONJ(VSUB(T2A, T2w)); + ST(&(Rm[WS(rs, 4)]), T2C, -ms, &(Rm[0])); + T2N = VCONJ(VSUB(T2L, T2K)); + ST(&(Rm[0]), T2N, -ms, &(Rm[0])); + T1N = VADD(T1I, T1M); + ST(&(Rp[WS(rs, 3)]), T1N, ms, &(Rp[WS(rs, 1)])); + T2H = VADD(T2E, T2G); + ST(&(Rp[WS(rs, 6)]), T2H, ms, &(Rp[0])); + T2I = VCONJ(VSUB(T2G, T2E)); + ST(&(Rm[WS(rs, 6)]), T2I, -ms, &(Rm[0])); + T2M = VADD(T2K, T2L); + ST(&(Rp[0]), T2M, ms, &(Rp[0])); + T1E = VCONJ(VSUB(T1C, T1A)); + ST(&(Rm[WS(rs, 9)]), T1E, -ms, &(Rm[WS(rs, 1)])); + T1D = VADD(T1A, T1C); + ST(&(Rp[WS(rs, 9)]), T1D, ms, &(Rp[WS(rs, 1)])); + T1O = VCONJ(VSUB(T1M, T1I)); + ST(&(Rm[WS(rs, 3)]), T1O, -ms, &(Rm[WS(rs, 1)])); + T1V = VADD(T1S, T1U); + ST(&(Rp[WS(rs, 7)]), T1V, ms, &(Rp[WS(rs, 1)])); + T2n = VADD(T26, T2m); + ST(&(Rp[WS(rs, 2)]), T2n, ms, &(Rp[0])); + T2B = VADD(T2w, T2A); + ST(&(Rp[WS(rs, 4)]), T2B, ms, &(Rp[0])); + T24 = VCONJ(VSUB(T22, T20)); + ST(&(Rm[WS(rs, 5)]), T24, -ms, &(Rm[WS(rs, 1)])); + T2o = VCONJ(VSUB(T2m, T26)); + ST(&(Rm[WS(rs, 2)]), T2o, -ms, &(Rm[0])); + T2t = VADD(T2q, T2s); + ST(&(Rp[WS(rs, 8)]), T2t, ms, &(Rp[0])); + T2u = VCONJ(VSUB(T2s, T2q)); + ST(&(Rm[WS(rs, 8)]), T2u, -ms, &(Rm[0])); + T23 = VADD(T20, T22); + ST(&(Rp[WS(rs, 5)]), T23, ms, &(Rp[WS(rs, 1)])); + T1W = VCONJ(VSUB(T1U, T1S)); + ST(&(Rm[WS(rs, 7)]), T1W, -ms, &(Rm[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + VTW(1, 10), + VTW(1, 11), + VTW(1, 12), + VTW(1, 13), + VTW(1, 14), + VTW(1, 15), + VTW(1, 16), + VTW(1, 17), + VTW(1, 18), + VTW(1, 19), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 20, XSIMD_STRING("hc2cbdftv_20"), twinstr, &GENUS, {131, 50, 12, 0} }; + +void XSIMD(codelet_hc2cbdftv_20) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_20, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,878 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 32 -dif -sign 1 -name hc2cbdftv_32 -include hc2cbv.h */ + +/* + * This function contains 249 FP additions, 192 FP multiplications, + * (or, 119 additions, 62 multiplications, 130 fused multiply/add), + * 166 stack variables, 7 constants, and 64 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 62)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 62), MAKE_VOLATILE_STRIDE(128, rs)) { + V T3a, T3N; + { + V T2G, T1o, T2o, T2Y, T1b, T1V, Ts, T1S, T3A, T48, T3p, T45, T31, T2z, T2H; + V T1L, Tv, TG, TM, T3q, T1r, TX, TN, T1s, Ty, T1t, TB, TO, TQ, T1y; + V T3t, TR, T1H, T1K, TV, T1p, T1q, T1w, TW, Tt, Tu, TE, TF, TK, TL; + V Tw, Tx, Tz, TA, T1x; + { + V T1i, T4, T1j, T15, T1l, T1m, Tb, T16, Tf, T1G, Ti, T1F, Tm, T1J, T1I; + V Tp, T2, T3, T13, T14, T5, T6, T8, T9, Td, T7, Ta, Te, Tg, Th; + V Tk, Tl, Tn, To, T2m, Tc, T3l, T1k, T3m, T18, Tj, T3y, T1n, Tq, T19; + V T3n, T17, T2x, T1a, T2n, T2y, Tr, T3z, T3o; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + T3 = LD(&(Rm[WS(rs, 15)]), -ms, &(Rm[WS(rs, 1)])); + T13 = LD(&(Rp[WS(rs, 8)]), ms, &(Rp[0])); + T14 = LD(&(Rm[WS(rs, 7)]), -ms, &(Rm[WS(rs, 1)])); + T5 = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + T6 = LD(&(Rm[WS(rs, 11)]), -ms, &(Rm[WS(rs, 1)])); + T8 = LD(&(Rp[WS(rs, 12)]), ms, &(Rp[0])); + T9 = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + Td = LD(&(Rp[WS(rs, 10)]), ms, &(Rp[0])); + T1i = VFMACONJ(T3, T2); + T4 = VFNMSCONJ(T3, T2); + T1j = VFMACONJ(T14, T13); + T15 = VFNMSCONJ(T14, T13); + T1l = VFMACONJ(T6, T5); + T7 = VFNMSCONJ(T6, T5); + T1m = VFMACONJ(T9, T8); + Ta = VFMSCONJ(T9, T8); + Te = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); + Tg = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + Th = LD(&(Rm[WS(rs, 13)]), -ms, &(Rm[WS(rs, 1)])); + Tk = LD(&(Rp[WS(rs, 6)]), ms, &(Rp[0])); + Tl = LD(&(Rm[WS(rs, 9)]), -ms, &(Rm[WS(rs, 1)])); + Tn = LD(&(Rp[WS(rs, 14)]), ms, &(Rp[0])); + To = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + Tb = VADD(T7, Ta); + T16 = VSUB(T7, Ta); + Tf = VFNMSCONJ(Te, Td); + T1G = VFMACONJ(Te, Td); + Ti = VFNMSCONJ(Th, Tg); + T1F = VFMACONJ(Th, Tg); + Tm = VFNMSCONJ(Tl, Tk); + T1J = VFMACONJ(Tl, Tk); + T1I = VFMACONJ(To, Tn); + Tp = VFMSCONJ(To, Tn); + T2m = VFMA(LDK(KP707106781), Tb, T4); + Tc = VFNMS(LDK(KP707106781), Tb, T4); + T3l = VSUB(T1i, T1j); + T1k = VADD(T1i, T1j); + T1H = VADD(T1F, T1G); + T3m = VSUB(T1F, T1G); + T18 = VFNMS(LDK(KP414213562), Tf, Ti); + Tj = VFMA(LDK(KP414213562), Ti, Tf); + T3y = VSUB(T1l, T1m); + T1n = VADD(T1l, T1m); + Tq = VFNMS(LDK(KP414213562), Tp, Tm); + T19 = VFMA(LDK(KP414213562), Tm, Tp); + T1K = VADD(T1I, T1J); + T3n = VSUB(T1I, T1J); + T17 = VFNMS(LDK(KP707106781), T16, T15); + T2x = VFMA(LDK(KP707106781), T16, T15); + T1a = VSUB(T18, T19); + T2n = VADD(T18, T19); + T2y = VADD(Tj, Tq); + Tr = VSUB(Tj, Tq); + T3z = VSUB(T3m, T3n); + T3o = VADD(T3m, T3n); + T2G = VADD(T1k, T1n); + T1o = VSUB(T1k, T1n); + T2o = VFNMS(LDK(KP923879532), T2n, T2m); + T2Y = VFMA(LDK(KP923879532), T2n, T2m); + T1b = VFNMS(LDK(KP923879532), T1a, T17); + T1V = VFMA(LDK(KP923879532), T1a, T17); + Ts = VFMA(LDK(KP923879532), Tr, Tc); + T1S = VFNMS(LDK(KP923879532), Tr, Tc); + T3A = VFMA(LDK(KP707106781), T3z, T3y); + T48 = VFNMS(LDK(KP707106781), T3z, T3y); + T3p = VFMA(LDK(KP707106781), T3o, T3l); + T45 = VFNMS(LDK(KP707106781), T3o, T3l); + T31 = VFMA(LDK(KP923879532), T2y, T2x); + T2z = VFNMS(LDK(KP923879532), T2y, T2x); + } + Tt = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + Tu = LD(&(Rm[WS(rs, 14)]), -ms, &(Rm[0])); + TE = LD(&(Rp[WS(rs, 9)]), ms, &(Rp[WS(rs, 1)])); + TF = LD(&(Rm[WS(rs, 6)]), -ms, &(Rm[0])); + TK = LD(&(Rp[WS(rs, 15)]), ms, &(Rp[WS(rs, 1)])); + TL = LD(&(Rm[0]), -ms, &(Rm[0])); + TV = LD(&(Rp[WS(rs, 7)]), ms, &(Rp[WS(rs, 1)])); + T2H = VADD(T1H, T1K); + T1L = VSUB(T1H, T1K); + Tv = VFNMSCONJ(Tu, Tt); + T1p = VFMACONJ(Tu, Tt); + TG = VFNMSCONJ(TF, TE); + T1q = VFMACONJ(TF, TE); + T1w = VFMACONJ(TL, TK); + TM = VFMSCONJ(TL, TK); + TW = LD(&(Rm[WS(rs, 8)]), -ms, &(Rm[0])); + Tw = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); + Tx = LD(&(Rm[WS(rs, 10)]), -ms, &(Rm[0])); + Tz = LD(&(Rp[WS(rs, 13)]), ms, &(Rp[WS(rs, 1)])); + TA = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + T3q = VSUB(T1p, T1q); + T1r = VADD(T1p, T1q); + T1x = VFMACONJ(TW, TV); + TX = VFNMSCONJ(TW, TV); + TN = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + T1s = VFMACONJ(Tx, Tw); + Ty = VFNMSCONJ(Tx, Tw); + T1t = VFMACONJ(TA, Tz); + TB = VFMSCONJ(TA, Tz); + TO = LD(&(Rm[WS(rs, 12)]), -ms, &(Rm[0])); + TQ = LD(&(Rp[WS(rs, 11)]), ms, &(Rp[WS(rs, 1)])); + T1y = VADD(T1w, T1x); + T3t = VSUB(T1w, T1x); + TR = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + { + V T38, T3f, T4p, T4v, T3T, T3Z, T2a, T2i, T4b, T4h, T1O, T20, T2M, T2U, T3F; + V T3L, T2g, T3X, T3J, T1g, T4f, T2S, T4l, T2E, T2X, T3O, T3b, T3i, T26, T4t; + V T43, T1Y, T3c, T30, T3d, T33; + { + V T2I, T2A, T2r, T1c, TJ, T2L, T2u, T2B, T10, T1d, T3x, T3E, T1E, T1N, T1h; + V T1Z, T4m, T1M, T1D, T4a, T4o, T4n, T47, T4u, T3R, T3S, T3Q, T3Y, T28, T29; + V T27, T2h, T44, T4g; + { + V T36, T1v, T2J, T3s, T3B, T2p, TI, T2q, TD, T1B, T3u, TY, TT, T35, T1u; + V T3r, TH, TC, T1z, TP, T1A, TS, T3w, T3D, T1C, T2K, T3v, T3C, T2s, TZ; + V T2t, TU, T37, T49, T46; + T2I = VSUB(T2G, T2H); + T36 = VADD(T2G, T2H); + T1u = VADD(T1s, T1t); + T3r = VSUB(T1s, T1t); + TH = VSUB(Ty, TB); + TC = VADD(Ty, TB); + T1z = VFMACONJ(TO, TN); + TP = VFNMSCONJ(TO, TN); + T1A = VFMACONJ(TR, TQ); + TS = VFMSCONJ(TR, TQ); + T1v = VSUB(T1r, T1u); + T2J = VADD(T1r, T1u); + T3s = VFNMS(LDK(KP414213562), T3r, T3q); + T3B = VFMA(LDK(KP414213562), T3q, T3r); + T2p = VFMA(LDK(KP707106781), TH, TG); + TI = VFNMS(LDK(KP707106781), TH, TG); + T2q = VFMA(LDK(KP707106781), TC, Tv); + TD = VFNMS(LDK(KP707106781), TC, Tv); + T1B = VADD(T1z, T1A); + T3u = VSUB(T1A, T1z); + TY = VSUB(TS, TP); + TT = VADD(TP, TS); + T35 = LDW(&(W[TWVL * 30])); + T4m = LDW(&(W[TWVL * 10])); + T2A = VFNMS(LDK(KP198912367), T2p, T2q); + T2r = VFMA(LDK(KP198912367), T2q, T2p); + T1c = VFNMS(LDK(KP668178637), TD, TI); + TJ = VFMA(LDK(KP668178637), TI, TD); + T1C = VSUB(T1y, T1B); + T2K = VADD(T1y, T1B); + T3v = VFNMS(LDK(KP414213562), T3u, T3t); + T3C = VFMA(LDK(KP414213562), T3t, T3u); + T2s = VFNMS(LDK(KP707106781), TY, TX); + TZ = VFMA(LDK(KP707106781), TY, TX); + T2t = VFMA(LDK(KP707106781), TT, TM); + TU = VFNMS(LDK(KP707106781), TT, TM); + T1M = VSUB(T1v, T1C); + T1D = VADD(T1v, T1C); + T37 = VADD(T2J, T2K); + T2L = VSUB(T2J, T2K); + T3w = VADD(T3s, T3v); + T49 = VSUB(T3s, T3v); + T3D = VSUB(T3B, T3C); + T46 = VADD(T3B, T3C); + T2u = VFNMS(LDK(KP198912367), T2t, T2s); + T2B = VFMA(LDK(KP198912367), T2s, T2t); + T10 = VFNMS(LDK(KP668178637), TZ, TU); + T1d = VFMA(LDK(KP668178637), TU, TZ); + T38 = VZMUL(T35, VSUB(T36, T37)); + T3f = VADD(T36, T37); + T4a = VFMA(LDK(KP923879532), T49, T48); + T4o = VFNMS(LDK(KP923879532), T49, T48); + T4n = VFMA(LDK(KP923879532), T46, T45); + T47 = VFNMS(LDK(KP923879532), T46, T45); + T4u = LDW(&(W[TWVL * 50])); + T3R = VFMA(LDK(KP923879532), T3w, T3p); + T3x = VFNMS(LDK(KP923879532), T3w, T3p); + T3E = VFNMS(LDK(KP923879532), T3D, T3A); + T3S = VFMA(LDK(KP923879532), T3D, T3A); + T3Q = LDW(&(W[TWVL * 58])); + T3Y = LDW(&(W[TWVL * 2])); + } + T28 = VFMA(LDK(KP707106781), T1D, T1o); + T1E = VFNMS(LDK(KP707106781), T1D, T1o); + T1N = VFNMS(LDK(KP707106781), T1M, T1L); + T29 = VFMA(LDK(KP707106781), T1M, T1L); + T4p = VZMUL(T4m, VFNMSI(T4o, T4n)); + T4v = VZMUL(T4u, VFMAI(T4o, T4n)); + T27 = LDW(&(W[TWVL * 6])); + T2h = LDW(&(W[TWVL * 54])); + T3T = VZMUL(T3Q, VFNMSI(T3S, T3R)); + T3Z = VZMUL(T3Y, VFMAI(T3S, T3R)); + T44 = LDW(&(W[TWVL * 18])); + T4g = LDW(&(W[TWVL * 42])); + T2a = VZMUL(T27, VFMAI(T29, T28)); + T2i = VZMUL(T2h, VFNMSI(T29, T28)); + T1h = LDW(&(W[TWVL * 22])); + T1Z = LDW(&(W[TWVL * 38])); + T4b = VZMUL(T44, VFMAI(T4a, T47)); + T4h = VZMUL(T4g, VFNMSI(T4a, T47)); + { + V T1W, T1T, T1, T3W, T2d, T3I, T2e, T12, T2f, T1f, T2F, T2T, T3k, T3K, T11; + V T1e, T32, T2Z, T2l, T4k, T2P, T4e, T2Q, T2w, T2R, T2D, T2v, T2C, T1R, T4s; + V T23, T42, T24, T1U, T25, T1X; + T2F = LDW(&(W[TWVL * 46])); + T2T = LDW(&(W[TWVL * 14])); + T1O = VZMUL(T1h, VFNMSI(T1N, T1E)); + T20 = VZMUL(T1Z, VFMAI(T1N, T1E)); + T3k = LDW(&(W[TWVL * 26])); + T3K = LDW(&(W[TWVL * 34])); + T2M = VZMUL(T2F, VFNMSI(T2L, T2I)); + T2U = VZMUL(T2T, VFMAI(T2L, T2I)); + T11 = VADD(TJ, T10); + T1W = VSUB(TJ, T10); + T1T = VSUB(T1d, T1c); + T1e = VADD(T1c, T1d); + T1 = LDW(&(W[TWVL * 24])); + T3W = LDW(&(W[TWVL * 4])); + T3F = VZMUL(T3k, VFNMSI(T3E, T3x)); + T3L = VZMUL(T3K, VFMAI(T3E, T3x)); + T2d = LDW(&(W[TWVL * 56])); + T3I = LDW(&(W[TWVL * 36])); + T2e = VFMA(LDK(KP831469612), T11, Ts); + T12 = VFNMS(LDK(KP831469612), T11, Ts); + T2f = VFMA(LDK(KP831469612), T1e, T1b); + T1f = VFNMS(LDK(KP831469612), T1e, T1b); + T2v = VSUB(T2r, T2u); + T32 = VADD(T2r, T2u); + T2Z = VADD(T2A, T2B); + T2C = VSUB(T2A, T2B); + T2l = LDW(&(W[TWVL * 48])); + T4k = LDW(&(W[TWVL * 12])); + T2P = LDW(&(W[TWVL * 16])); + T4e = LDW(&(W[TWVL * 44])); + T2g = VZMULI(T2d, VFMAI(T2f, T2e)); + T3X = VZMULI(T3W, VFNMSI(T2f, T2e)); + T3J = VZMULI(T3I, VFNMSI(T1f, T12)); + T1g = VZMULI(T1, VFMAI(T1f, T12)); + T2Q = VFNMS(LDK(KP980785280), T2v, T2o); + T2w = VFMA(LDK(KP980785280), T2v, T2o); + T2R = VFMA(LDK(KP980785280), T2C, T2z); + T2D = VFNMS(LDK(KP980785280), T2C, T2z); + T1R = LDW(&(W[TWVL * 40])); + T4s = LDW(&(W[TWVL * 52])); + T23 = LDW(&(W[TWVL * 8])); + T42 = LDW(&(W[TWVL * 20])); + T4f = VZMULI(T4e, VFNMSI(T2R, T2Q)); + T2S = VZMULI(T2P, VFMAI(T2R, T2Q)); + T4l = VZMULI(T4k, VFNMSI(T2D, T2w)); + T2E = VZMULI(T2l, VFMAI(T2D, T2w)); + T24 = VFMA(LDK(KP831469612), T1T, T1S); + T1U = VFNMS(LDK(KP831469612), T1T, T1S); + T25 = VFMA(LDK(KP831469612), T1W, T1V); + T1X = VFNMS(LDK(KP831469612), T1W, T1V); + T2X = LDW(&(W[TWVL * 32])); + T3O = LDW(&(W[TWVL * 60])); + T3b = LDW(&(W[0])); + T3i = LDW(&(W[TWVL * 28])); + T26 = VZMULI(T23, VFMAI(T25, T24)); + T4t = VZMULI(T4s, VFNMSI(T25, T24)); + T43 = VZMULI(T42, VFNMSI(T1X, T1U)); + T1Y = VZMULI(T1R, VFMAI(T1X, T1U)); + T3c = VFMA(LDK(KP980785280), T2Z, T2Y); + T30 = VFNMS(LDK(KP980785280), T2Z, T2Y); + T3d = VFMA(LDK(KP980785280), T32, T31); + T33 = VFNMS(LDK(KP980785280), T32, T31); + } + } + { + V T3e, T3P, T3j, T34, T2c, T4j, T2k, T4d, T1P, T1Q, T4x, T4w, T2j, T4c, T21; + V T22, T4r, T4q, T2b, T4i, T3h, T3H, T2N, T2O, T41, T40, T3g, T3G, T2V, T2W; + V T3V, T3U, T39, T3M; + T1P = VADD(T1g, T1O); + T1Q = VCONJ(VSUB(T1O, T1g)); + T4x = VCONJ(VSUB(T4v, T4t)); + T4w = VADD(T4t, T4v); + T2j = VADD(T2g, T2i); + T2k = VCONJ(VSUB(T2i, T2g)); + T4d = VCONJ(VSUB(T4b, T43)); + T4c = VADD(T43, T4b); + T3e = VZMULI(T3b, VFMAI(T3d, T3c)); + T3P = VZMULI(T3O, VFNMSI(T3d, T3c)); + T3j = VZMULI(T3i, VFNMSI(T33, T30)); + T34 = VZMULI(T2X, VFMAI(T33, T30)); + ST(&(Rp[WS(rs, 6)]), T1P, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 13)]), T4w, ms, &(Rp[WS(rs, 1)])); + ST(&(Rp[WS(rs, 14)]), T2j, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 5)]), T4c, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 13)]), T4x, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rm[WS(rs, 6)]), T1Q, -ms, &(Rm[0])); + T21 = VADD(T1Y, T20); + T22 = VCONJ(VSUB(T20, T1Y)); + T4r = VCONJ(VSUB(T4p, T4l)); + T4q = VADD(T4l, T4p); + T2b = VADD(T26, T2a); + T2c = VCONJ(VSUB(T2a, T26)); + T4j = VCONJ(VSUB(T4h, T4f)); + T4i = VADD(T4f, T4h); + ST(&(Rm[WS(rs, 5)]), T4d, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rm[WS(rs, 14)]), T2k, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 10)]), T21, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 3)]), T4q, ms, &(Rp[WS(rs, 1)])); + ST(&(Rp[WS(rs, 2)]), T2b, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 11)]), T4i, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 3)]), T4r, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rm[WS(rs, 10)]), T22, -ms, &(Rm[0])); + T2N = VADD(T2E, T2M); + T2O = VCONJ(VSUB(T2M, T2E)); + T41 = VCONJ(VSUB(T3Z, T3X)); + T40 = VADD(T3X, T3Z); + T3g = VADD(T3e, T3f); + T3h = VCONJ(VSUB(T3f, T3e)); + T3H = VCONJ(VSUB(T3F, T3j)); + T3G = VADD(T3j, T3F); + ST(&(Rm[WS(rs, 11)]), T4j, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rm[WS(rs, 2)]), T2c, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 12)]), T2N, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 1)]), T40, ms, &(Rp[WS(rs, 1)])); + ST(&(Rp[0]), T3g, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 7)]), T3G, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 1)]), T41, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rm[WS(rs, 12)]), T2O, -ms, &(Rm[0])); + T2V = VADD(T2S, T2U); + T2W = VCONJ(VSUB(T2U, T2S)); + T3V = VCONJ(VSUB(T3T, T3P)); + T3U = VADD(T3P, T3T); + T39 = VADD(T34, T38); + T3a = VCONJ(VSUB(T38, T34)); + T3N = VCONJ(VSUB(T3L, T3J)); + T3M = VADD(T3J, T3L); + ST(&(Rm[WS(rs, 7)]), T3H, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rm[0]), T3h, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 4)]), T2V, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 15)]), T3U, ms, &(Rp[WS(rs, 1)])); + ST(&(Rp[WS(rs, 8)]), T39, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 9)]), T3M, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 15)]), T3V, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rm[WS(rs, 4)]), T2W, -ms, &(Rm[0])); + } + } + } + ST(&(Rm[WS(rs, 9)]), T3N, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rm[WS(rs, 8)]), T3a, -ms, &(Rm[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + VTW(1, 10), + VTW(1, 11), + VTW(1, 12), + VTW(1, 13), + VTW(1, 14), + VTW(1, 15), + VTW(1, 16), + VTW(1, 17), + VTW(1, 18), + VTW(1, 19), + VTW(1, 20), + VTW(1, 21), + VTW(1, 22), + VTW(1, 23), + VTW(1, 24), + VTW(1, 25), + VTW(1, 26), + VTW(1, 27), + VTW(1, 28), + VTW(1, 29), + VTW(1, 30), + VTW(1, 31), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 32, XSIMD_STRING("hc2cbdftv_32"), twinstr, &GENUS, {119, 62, 130, 0} }; + +void XSIMD(codelet_hc2cbdftv_32) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_32, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 32 -dif -sign 1 -name hc2cbdftv_32 -include hc2cbv.h */ + +/* + * This function contains 249 FP additions, 104 FP multiplications, + * (or, 233 additions, 88 multiplications, 16 fused multiply/add), + * 161 stack variables, 7 constants, and 64 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 62)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 62), MAKE_VOLATILE_STRIDE(128, rs)) { + V T1W, T21, Tf, T2c, T1t, T2r, T3T, T4m, Ty, T2q, T3P, T4n, T1n, T2d, T1T; + V T22, T1E, T24, T3I, T4p, TU, T2n, T1i, T2h, T1L, T25, T3L, T4q, T1f, T2o; + V T1j, T2k; + { + V T2, T4, T1Z, T1p, T1r, T20, T9, T1U, Td, T1V, T3, T1q, T6, T8, T7; + V Tc, Tb, Ta, T5, Te, T1o, T1s, T3R, T3S, Tj, T1N, Tw, T1Q, Tn, T1O; + V Ts, T1R, Tg, Ti, Th, Tv, Tu, Tt, Tk, Tm, Tl, Tp, Tr, Tq, To; + V Tx, T3N, T3O, T1l, T1m, T1P, T1S; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + T3 = LD(&(Rm[WS(rs, 15)]), -ms, &(Rm[WS(rs, 1)])); + T4 = VCONJ(T3); + T1Z = VADD(T2, T4); + T1p = LD(&(Rp[WS(rs, 8)]), ms, &(Rp[0])); + T1q = LD(&(Rm[WS(rs, 7)]), -ms, &(Rm[WS(rs, 1)])); + T1r = VCONJ(T1q); + T20 = VADD(T1p, T1r); + T6 = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + T7 = LD(&(Rm[WS(rs, 11)]), -ms, &(Rm[WS(rs, 1)])); + T8 = VCONJ(T7); + T9 = VSUB(T6, T8); + T1U = VADD(T6, T8); + Tc = LD(&(Rp[WS(rs, 12)]), ms, &(Rp[0])); + Ta = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + Tb = VCONJ(Ta); + Td = VSUB(Tb, Tc); + T1V = VADD(Tb, Tc); + T1W = VSUB(T1U, T1V); + T21 = VSUB(T1Z, T20); + T5 = VSUB(T2, T4); + Te = VMUL(LDK(KP707106781), VADD(T9, Td)); + Tf = VSUB(T5, Te); + T2c = VADD(T5, Te); + T1o = VMUL(LDK(KP707106781), VSUB(T9, Td)); + T1s = VSUB(T1p, T1r); + T1t = VSUB(T1o, T1s); + T2r = VADD(T1s, T1o); + T3R = VADD(T1Z, T20); + T3S = VADD(T1U, T1V); + T3T = VSUB(T3R, T3S); + T4m = VADD(T3R, T3S); + Tg = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + Th = LD(&(Rm[WS(rs, 13)]), -ms, &(Rm[WS(rs, 1)])); + Ti = VCONJ(Th); + Tj = VSUB(Tg, Ti); + T1N = VADD(Tg, Ti); + Tv = LD(&(Rp[WS(rs, 14)]), ms, &(Rp[0])); + Tt = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + Tu = VCONJ(Tt); + Tw = VSUB(Tu, Tv); + T1Q = VADD(Tu, Tv); + Tk = LD(&(Rp[WS(rs, 10)]), ms, &(Rp[0])); + Tl = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); + Tm = VCONJ(Tl); + Tn = VSUB(Tk, Tm); + T1O = VADD(Tk, Tm); + Tp = LD(&(Rp[WS(rs, 6)]), ms, &(Rp[0])); + Tq = LD(&(Rm[WS(rs, 9)]), -ms, &(Rm[WS(rs, 1)])); + Tr = VCONJ(Tq); + Ts = VSUB(Tp, Tr); + T1R = VADD(Tp, Tr); + To = VFMA(LDK(KP382683432), Tj, VMUL(LDK(KP923879532), Tn)); + Tx = VFNMS(LDK(KP382683432), Tw, VMUL(LDK(KP923879532), Ts)); + Ty = VSUB(To, Tx); + T2q = VADD(To, Tx); + T3N = VADD(T1N, T1O); + T3O = VADD(T1Q, T1R); + T3P = VSUB(T3N, T3O); + T4n = VADD(T3N, T3O); + T1l = VFNMS(LDK(KP382683432), Tn, VMUL(LDK(KP923879532), Tj)); + T1m = VFMA(LDK(KP923879532), Tw, VMUL(LDK(KP382683432), Ts)); + T1n = VSUB(T1l, T1m); + T2d = VADD(T1l, T1m); + T1P = VSUB(T1N, T1O); + T1S = VSUB(T1Q, T1R); + T1T = VMUL(LDK(KP707106781), VSUB(T1P, T1S)); + T22 = VMUL(LDK(KP707106781), VADD(T1P, T1S)); + } + { + V TD, T1B, TR, T1y, TH, T1C, TM, T1z, TA, TC, TB, TO, TQ, TP, TG; + V TF, TE, TJ, TL, TK, T1A, T1D, T3G, T3H, TN, T2f, TT, T2g, TI, TS; + V TY, T1I, T1c, T1F, T12, T1J, T17, T1G, TV, TX, TW, T1b, T1a, T19, T11; + V T10, TZ, T14, T16, T15, T1H, T1K, T3J, T3K, T18, T2i, T1e, T2j, T13, T1d; + TA = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); + TB = LD(&(Rm[WS(rs, 10)]), -ms, &(Rm[0])); + TC = VCONJ(TB); + TD = VSUB(TA, TC); + T1B = VADD(TA, TC); + TO = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + TP = LD(&(Rm[WS(rs, 14)]), -ms, &(Rm[0])); + TQ = VCONJ(TP); + TR = VSUB(TO, TQ); + T1y = VADD(TO, TQ); + TG = LD(&(Rp[WS(rs, 13)]), ms, &(Rp[WS(rs, 1)])); + TE = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + TF = VCONJ(TE); + TH = VSUB(TF, TG); + T1C = VADD(TF, TG); + TJ = LD(&(Rp[WS(rs, 9)]), ms, &(Rp[WS(rs, 1)])); + TK = LD(&(Rm[WS(rs, 6)]), -ms, &(Rm[0])); + TL = VCONJ(TK); + TM = VSUB(TJ, TL); + T1z = VADD(TJ, TL); + T1A = VSUB(T1y, T1z); + T1D = VSUB(T1B, T1C); + T1E = VFNMS(LDK(KP382683432), T1D, VMUL(LDK(KP923879532), T1A)); + T24 = VFMA(LDK(KP382683432), T1A, VMUL(LDK(KP923879532), T1D)); + T3G = VADD(T1y, T1z); + T3H = VADD(T1B, T1C); + T3I = VSUB(T3G, T3H); + T4p = VADD(T3G, T3H); + TI = VMUL(LDK(KP707106781), VSUB(TD, TH)); + TN = VSUB(TI, TM); + T2f = VADD(TM, TI); + TS = VMUL(LDK(KP707106781), VADD(TD, TH)); + TT = VSUB(TR, TS); + T2g = VADD(TR, TS); + TU = VFMA(LDK(KP831469612), TN, VMUL(LDK(KP555570233), TT)); + T2n = VFNMS(LDK(KP195090322), T2f, VMUL(LDK(KP980785280), T2g)); + T1i = VFNMS(LDK(KP555570233), TN, VMUL(LDK(KP831469612), TT)); + T2h = VFMA(LDK(KP980785280), T2f, VMUL(LDK(KP195090322), T2g)); + TV = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + TW = LD(&(Rm[WS(rs, 12)]), -ms, &(Rm[0])); + TX = VCONJ(TW); + TY = VSUB(TV, TX); + T1I = VADD(TV, TX); + T1b = LD(&(Rp[WS(rs, 15)]), ms, &(Rp[WS(rs, 1)])); + T19 = LD(&(Rm[0]), -ms, &(Rm[0])); + T1a = VCONJ(T19); + T1c = VSUB(T1a, T1b); + T1F = VADD(T1a, T1b); + T11 = LD(&(Rp[WS(rs, 11)]), ms, &(Rp[WS(rs, 1)])); + TZ = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + T10 = VCONJ(TZ); + T12 = VSUB(T10, T11); + T1J = VADD(T10, T11); + T14 = LD(&(Rp[WS(rs, 7)]), ms, &(Rp[WS(rs, 1)])); + T15 = LD(&(Rm[WS(rs, 8)]), -ms, &(Rm[0])); + T16 = VCONJ(T15); + T17 = VSUB(T14, T16); + T1G = VADD(T14, T16); + T1H = VSUB(T1F, T1G); + T1K = VSUB(T1I, T1J); + T1L = VFMA(LDK(KP923879532), T1H, VMUL(LDK(KP382683432), T1K)); + T25 = VFNMS(LDK(KP382683432), T1H, VMUL(LDK(KP923879532), T1K)); + T3J = VADD(T1F, T1G); + T3K = VADD(T1I, T1J); + T3L = VSUB(T3J, T3K); + T4q = VADD(T3J, T3K); + T13 = VMUL(LDK(KP707106781), VSUB(TY, T12)); + T18 = VSUB(T13, T17); + T2i = VADD(T17, T13); + T1d = VMUL(LDK(KP707106781), VADD(TY, T12)); + T1e = VSUB(T1c, T1d); + T2j = VADD(T1c, T1d); + T1f = VFNMS(LDK(KP555570233), T1e, VMUL(LDK(KP831469612), T18)); + T2o = VFMA(LDK(KP195090322), T2i, VMUL(LDK(KP980785280), T2j)); + T1j = VFMA(LDK(KP555570233), T18, VMUL(LDK(KP831469612), T1e)); + T2k = VFNMS(LDK(KP195090322), T2j, VMUL(LDK(KP980785280), T2i)); + } + { + V T4L, T4G, T4s, T4y, T3W, T4g, T42, T4a, T3g, T4e, T3o, T3E, T1w, T46, T2M; + V T40, T2u, T4w, T2C, T4k, T36, T3A, T3i, T3s, T28, T2O, T2w, T2G, T2Y, T4K; + V T3y, T4C; + { + V T4E, T4F, T4D, T4o, T4r, T4l, T4x, T3Q, T48, T3V, T49, T3M, T3U, T3F, T4f; + V T41, T47, T3c, T3n, T3f, T3m, T3a, T3b, T3d, T3e, T39, T4d, T3l, T3D, T1h; + V T2K, T1v, T2L, Tz, T1g, T1k, T1u, T1, T45, T2J, T3Z, T2m, T2A, T2t, T2B; + V T2e, T2l, T2p, T2s, T2b, T4v, T2z, T4j; + T4E = VADD(T4m, T4n); + T4F = VADD(T4p, T4q); + T4L = VADD(T4E, T4F); + T4D = LDW(&(W[TWVL * 30])); + T4G = VZMUL(T4D, VSUB(T4E, T4F)); + T4o = VSUB(T4m, T4n); + T4r = VBYI(VSUB(T4p, T4q)); + T4l = LDW(&(W[TWVL * 46])); + T4s = VZMUL(T4l, VSUB(T4o, T4r)); + T4x = LDW(&(W[TWVL * 14])); + T4y = VZMUL(T4x, VADD(T4o, T4r)); + T3M = VMUL(LDK(KP707106781), VSUB(T3I, T3L)); + T3Q = VBYI(VSUB(T3M, T3P)); + T48 = VBYI(VADD(T3P, T3M)); + T3U = VMUL(LDK(KP707106781), VADD(T3I, T3L)); + T3V = VSUB(T3T, T3U); + T49 = VADD(T3T, T3U); + T3F = LDW(&(W[TWVL * 22])); + T3W = VZMUL(T3F, VADD(T3Q, T3V)); + T4f = LDW(&(W[TWVL * 54])); + T4g = VZMUL(T4f, VSUB(T49, T48)); + T41 = LDW(&(W[TWVL * 38])); + T42 = VZMUL(T41, VSUB(T3V, T3Q)); + T47 = LDW(&(W[TWVL * 6])); + T4a = VZMUL(T47, VADD(T48, T49)); + T3a = VADD(T1t, T1n); + T3b = VADD(TU, T1f); + T3c = VBYI(VADD(T3a, T3b)); + T3n = VBYI(VSUB(T3b, T3a)); + T3d = VADD(Tf, Ty); + T3e = VADD(T1i, T1j); + T3f = VADD(T3d, T3e); + T3m = VSUB(T3d, T3e); + T39 = LDW(&(W[TWVL * 4])); + T3g = VZMULI(T39, VADD(T3c, T3f)); + T4d = LDW(&(W[TWVL * 56])); + T4e = VZMULI(T4d, VSUB(T3f, T3c)); + T3l = LDW(&(W[TWVL * 36])); + T3o = VZMULI(T3l, VSUB(T3m, T3n)); + T3D = LDW(&(W[TWVL * 24])); + T3E = VZMULI(T3D, VADD(T3n, T3m)); + Tz = VSUB(Tf, Ty); + T1g = VSUB(TU, T1f); + T1h = VSUB(Tz, T1g); + T2K = VADD(Tz, T1g); + T1k = VSUB(T1i, T1j); + T1u = VSUB(T1n, T1t); + T1v = VBYI(VSUB(T1k, T1u)); + T2L = VBYI(VADD(T1u, T1k)); + T1 = LDW(&(W[TWVL * 20])); + T1w = VZMULI(T1, VADD(T1h, T1v)); + T45 = LDW(&(W[TWVL * 8])); + T46 = VZMULI(T45, VADD(T2K, T2L)); + T2J = LDW(&(W[TWVL * 52])); + T2M = VZMULI(T2J, VSUB(T2K, T2L)); + T3Z = LDW(&(W[TWVL * 40])); + T40 = VZMULI(T3Z, VSUB(T1h, T1v)); + T2e = VSUB(T2c, T2d); + T2l = VSUB(T2h, T2k); + T2m = VSUB(T2e, T2l); + T2A = VADD(T2e, T2l); + T2p = VSUB(T2n, T2o); + T2s = VSUB(T2q, T2r); + T2t = VBYI(VSUB(T2p, T2s)); + T2B = VBYI(VADD(T2s, T2p)); + T2b = LDW(&(W[TWVL * 44])); + T2u = VZMULI(T2b, VSUB(T2m, T2t)); + T4v = LDW(&(W[TWVL * 16])); + T4w = VZMULI(T4v, VADD(T2m, T2t)); + T2z = LDW(&(W[TWVL * 12])); + T2C = VZMULI(T2z, VADD(T2A, T2B)); + T4j = LDW(&(W[TWVL * 48])); + T4k = VZMULI(T4j, VSUB(T2A, T2B)); + { + V T32, T3q, T35, T3r, T30, T31, T33, T34, T2Z, T3z, T3h, T3p, T1Y, T2E, T27; + V T2F, T1M, T1X, T23, T26, T1x, T2N, T2v, T2D, T2U, T3x, T2X, T3w, T2S, T2T; + V T2V, T2W, T2R, T4J, T3v, T4B; + T30 = VADD(T21, T22); + T31 = VADD(T1E, T1L); + T32 = VADD(T30, T31); + T3q = VSUB(T30, T31); + T33 = VADD(T1W, T1T); + T34 = VADD(T24, T25); + T35 = VBYI(VADD(T33, T34)); + T3r = VBYI(VSUB(T34, T33)); + T2Z = LDW(&(W[TWVL * 58])); + T36 = VZMUL(T2Z, VSUB(T32, T35)); + T3z = LDW(&(W[TWVL * 26])); + T3A = VZMUL(T3z, VADD(T3q, T3r)); + T3h = LDW(&(W[TWVL * 2])); + T3i = VZMUL(T3h, VADD(T32, T35)); + T3p = LDW(&(W[TWVL * 34])); + T3s = VZMUL(T3p, VSUB(T3q, T3r)); + T1M = VSUB(T1E, T1L); + T1X = VSUB(T1T, T1W); + T1Y = VBYI(VSUB(T1M, T1X)); + T2E = VBYI(VADD(T1X, T1M)); + T23 = VSUB(T21, T22); + T26 = VSUB(T24, T25); + T27 = VSUB(T23, T26); + T2F = VADD(T23, T26); + T1x = LDW(&(W[TWVL * 18])); + T28 = VZMUL(T1x, VADD(T1Y, T27)); + T2N = LDW(&(W[TWVL * 50])); + T2O = VZMUL(T2N, VSUB(T2F, T2E)); + T2v = LDW(&(W[TWVL * 42])); + T2w = VZMUL(T2v, VSUB(T27, T1Y)); + T2D = LDW(&(W[TWVL * 10])); + T2G = VZMUL(T2D, VADD(T2E, T2F)); + T2S = VADD(T2c, T2d); + T2T = VADD(T2n, T2o); + T2U = VADD(T2S, T2T); + T3x = VSUB(T2S, T2T); + T2V = VADD(T2r, T2q); + T2W = VADD(T2h, T2k); + T2X = VBYI(VADD(T2V, T2W)); + T3w = VBYI(VSUB(T2W, T2V)); + T2R = LDW(&(W[TWVL * 60])); + T2Y = VZMULI(T2R, VSUB(T2U, T2X)); + T4J = LDW(&(W[0])); + T4K = VZMULI(T4J, VADD(T2X, T2U)); + T3v = LDW(&(W[TWVL * 28])); + T3y = VZMULI(T3v, VADD(T3w, T3x)); + T4B = LDW(&(W[TWVL * 32])); + T4C = VZMULI(T4B, VSUB(T3x, T3w)); + } + } + { + V T29, T4M, T2P, T4t, T4N, T2a, T4u, T2Q, T2x, T4H, T2H, T4z, T4I, T2y, T4A; + V T2I, T37, T4h, T3B, T3X, T4i, T38, T3Y, T3C, T3j, T4b, T3t, T43, T4c, T3k; + V T44, T3u; + T29 = VADD(T1w, T28); + ST(&(Rp[WS(rs, 5)]), T29, ms, &(Rp[WS(rs, 1)])); + T4M = VADD(T4K, T4L); + ST(&(Rp[0]), T4M, ms, &(Rp[0])); + T2P = VADD(T2M, T2O); + ST(&(Rp[WS(rs, 13)]), T2P, ms, &(Rp[WS(rs, 1)])); + T4t = VADD(T4k, T4s); + ST(&(Rp[WS(rs, 12)]), T4t, ms, &(Rp[0])); + T4N = VCONJ(VSUB(T4L, T4K)); + ST(&(Rm[0]), T4N, -ms, &(Rm[0])); + T2a = VCONJ(VSUB(T28, T1w)); + ST(&(Rm[WS(rs, 5)]), T2a, -ms, &(Rm[WS(rs, 1)])); + T4u = VCONJ(VSUB(T4s, T4k)); + ST(&(Rm[WS(rs, 12)]), T4u, -ms, &(Rm[0])); + T2Q = VCONJ(VSUB(T2O, T2M)); + ST(&(Rm[WS(rs, 13)]), T2Q, -ms, &(Rm[WS(rs, 1)])); + T2x = VADD(T2u, T2w); + ST(&(Rp[WS(rs, 11)]), T2x, ms, &(Rp[WS(rs, 1)])); + T4H = VADD(T4C, T4G); + ST(&(Rp[WS(rs, 8)]), T4H, ms, &(Rp[0])); + T2H = VADD(T2C, T2G); + ST(&(Rp[WS(rs, 3)]), T2H, ms, &(Rp[WS(rs, 1)])); + T4z = VADD(T4w, T4y); + ST(&(Rp[WS(rs, 4)]), T4z, ms, &(Rp[0])); + T4I = VCONJ(VSUB(T4G, T4C)); + ST(&(Rm[WS(rs, 8)]), T4I, -ms, &(Rm[0])); + T2y = VCONJ(VSUB(T2w, T2u)); + ST(&(Rm[WS(rs, 11)]), T2y, -ms, &(Rm[WS(rs, 1)])); + T4A = VCONJ(VSUB(T4y, T4w)); + ST(&(Rm[WS(rs, 4)]), T4A, -ms, &(Rm[0])); + T2I = VCONJ(VSUB(T2G, T2C)); + ST(&(Rm[WS(rs, 3)]), T2I, -ms, &(Rm[WS(rs, 1)])); + T37 = VADD(T2Y, T36); + ST(&(Rp[WS(rs, 15)]), T37, ms, &(Rp[WS(rs, 1)])); + T4h = VADD(T4e, T4g); + ST(&(Rp[WS(rs, 14)]), T4h, ms, &(Rp[0])); + T3B = VADD(T3y, T3A); + ST(&(Rp[WS(rs, 7)]), T3B, ms, &(Rp[WS(rs, 1)])); + T3X = VADD(T3E, T3W); + ST(&(Rp[WS(rs, 6)]), T3X, ms, &(Rp[0])); + T4i = VCONJ(VSUB(T4g, T4e)); + ST(&(Rm[WS(rs, 14)]), T4i, -ms, &(Rm[0])); + T38 = VCONJ(VSUB(T36, T2Y)); + ST(&(Rm[WS(rs, 15)]), T38, -ms, &(Rm[WS(rs, 1)])); + T3Y = VCONJ(VSUB(T3W, T3E)); + ST(&(Rm[WS(rs, 6)]), T3Y, -ms, &(Rm[0])); + T3C = VCONJ(VSUB(T3A, T3y)); + ST(&(Rm[WS(rs, 7)]), T3C, -ms, &(Rm[WS(rs, 1)])); + T3j = VADD(T3g, T3i); + ST(&(Rp[WS(rs, 1)]), T3j, ms, &(Rp[WS(rs, 1)])); + T4b = VADD(T46, T4a); + ST(&(Rp[WS(rs, 2)]), T4b, ms, &(Rp[0])); + T3t = VADD(T3o, T3s); + ST(&(Rp[WS(rs, 9)]), T3t, ms, &(Rp[WS(rs, 1)])); + T43 = VADD(T40, T42); + ST(&(Rp[WS(rs, 10)]), T43, ms, &(Rp[0])); + T4c = VCONJ(VSUB(T4a, T46)); + ST(&(Rm[WS(rs, 2)]), T4c, -ms, &(Rm[0])); + T3k = VCONJ(VSUB(T3i, T3g)); + ST(&(Rm[WS(rs, 1)]), T3k, -ms, &(Rm[WS(rs, 1)])); + T44 = VCONJ(VSUB(T42, T40)); + ST(&(Rm[WS(rs, 10)]), T44, -ms, &(Rm[0])); + T3u = VCONJ(VSUB(T3s, T3o)); + ST(&(Rm[WS(rs, 9)]), T3u, -ms, &(Rm[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + VTW(1, 10), + VTW(1, 11), + VTW(1, 12), + VTW(1, 13), + VTW(1, 14), + VTW(1, 15), + VTW(1, 16), + VTW(1, 17), + VTW(1, 18), + VTW(1, 19), + VTW(1, 20), + VTW(1, 21), + VTW(1, 22), + VTW(1, 23), + VTW(1, 24), + VTW(1, 25), + VTW(1, 26), + VTW(1, 27), + VTW(1, 28), + VTW(1, 29), + VTW(1, 30), + VTW(1, 31), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 32, XSIMD_STRING("hc2cbdftv_32"), twinstr, &GENUS, {233, 88, 16, 0} }; + +void XSIMD(codelet_hc2cbdftv_32) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_32, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 4 -dif -sign 1 -name hc2cbdftv_4 -include hc2cbv.h */ + +/* + * This function contains 15 FP additions, 12 FP multiplications, + * (or, 9 additions, 6 multiplications, 6 fused multiply/add), + * 20 stack variables, 0 constants, and 8 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 6)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(16, rs)) { + V T2, T3, T5, T6, Tf, T1, T9, Ta, T4, Tb, T7, Tc, Th, T8, Tg; + V Te, Td, Ti, Tj; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + T3 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + T5 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + T6 = LD(&(Rm[0]), -ms, &(Rm[0])); + Tf = LDW(&(W[0])); + T1 = LDW(&(W[TWVL * 4])); + T9 = LDW(&(W[TWVL * 2])); + Ta = VFMACONJ(T3, T2); + T4 = VFNMSCONJ(T3, T2); + Tb = VFMACONJ(T6, T5); + T7 = VFNMSCONJ(T6, T5); + Tc = VZMUL(T9, VSUB(Ta, Tb)); + Th = VADD(Ta, Tb); + T8 = VZMULI(T1, VFNMSI(T7, T4)); + Tg = VZMULI(Tf, VFMAI(T7, T4)); + Te = VCONJ(VSUB(Tc, T8)); + Td = VADD(T8, Tc); + Ti = VADD(Tg, Th); + Tj = VCONJ(VSUB(Th, Tg)); + ST(&(Rm[WS(rs, 1)]), Te, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 1)]), Td, ms, &(Rp[WS(rs, 1)])); + ST(&(Rp[0]), Ti, ms, &(Rp[0])); + ST(&(Rm[0]), Tj, -ms, &(Rm[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 4, XSIMD_STRING("hc2cbdftv_4"), twinstr, &GENUS, {9, 6, 6, 0} }; + +void XSIMD(codelet_hc2cbdftv_4) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_4, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 4 -dif -sign 1 -name hc2cbdftv_4 -include hc2cbv.h */ + +/* + * This function contains 15 FP additions, 6 FP multiplications, + * (or, 15 additions, 6 multiplications, 0 fused multiply/add), + * 22 stack variables, 0 constants, and 8 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 6)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(16, rs)) { + V T5, Tc, T9, Td, T2, T4, T3, T6, T8, T7, Tj, Ti, Th, Tk, Tl; + V Ta, Te, T1, Tb, Tf, Tg; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + T3 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + T4 = VCONJ(T3); + T5 = VSUB(T2, T4); + Tc = VADD(T2, T4); + T6 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + T7 = LD(&(Rm[0]), -ms, &(Rm[0])); + T8 = VCONJ(T7); + T9 = VBYI(VSUB(T6, T8)); + Td = VADD(T6, T8); + Tj = VADD(Tc, Td); + Th = LDW(&(W[0])); + Ti = VZMULI(Th, VADD(T5, T9)); + Tk = VADD(Ti, Tj); + ST(&(Rp[0]), Tk, ms, &(Rp[0])); + Tl = VCONJ(VSUB(Tj, Ti)); + ST(&(Rm[0]), Tl, -ms, &(Rm[0])); + T1 = LDW(&(W[TWVL * 4])); + Ta = VZMULI(T1, VSUB(T5, T9)); + Tb = LDW(&(W[TWVL * 2])); + Te = VZMUL(Tb, VSUB(Tc, Td)); + Tf = VADD(Ta, Te); + ST(&(Rp[WS(rs, 1)]), Tf, ms, &(Rp[WS(rs, 1)])); + Tg = VCONJ(VSUB(Te, Ta)); + ST(&(Rm[WS(rs, 1)]), Tg, -ms, &(Rm[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 4, XSIMD_STRING("hc2cbdftv_4"), twinstr, &GENUS, {15, 6, 0, 0} }; + +void XSIMD(codelet_hc2cbdftv_4) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_4, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 6 -dif -sign 1 -name hc2cbdftv_6 -include hc2cbv.h */ + +/* + * This function contains 29 FP additions, 24 FP multiplications, + * (or, 17 additions, 12 multiplications, 12 fused multiply/add), + * 38 stack variables, 2 constants, and 12 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 10)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(24, rs)) { + V Tv, Tn, Tr, Te, T4, Tg, Ta, Tf, T7, T1, Td, T2, T3, T8, T9; + V T5, T6, Th, Tj, Tb, Tp, Tx, Ti, Tc, To, Tk, Ts, Tq, Tw, Tm; + V Tl, Tu, Tt, Tz, Ty; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + T3 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + T8 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + T9 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + T5 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + T6 = LD(&(Rm[0]), -ms, &(Rm[0])); + Tv = LDW(&(W[0])); + Tn = LDW(&(W[TWVL * 8])); + Tr = LDW(&(W[TWVL * 6])); + Te = VFMACONJ(T3, T2); + T4 = VFNMSCONJ(T3, T2); + Tg = VFMACONJ(T9, T8); + Ta = VFMSCONJ(T9, T8); + Tf = VFMACONJ(T6, T5); + T7 = VFNMSCONJ(T6, T5); + T1 = LDW(&(W[TWVL * 4])); + Td = LDW(&(W[TWVL * 2])); + Th = VADD(Tf, Tg); + Tj = VMUL(LDK(KP866025403), VSUB(Tf, Tg)); + Tb = VADD(T7, Ta); + Tp = VMUL(LDK(KP866025403), VSUB(T7, Ta)); + Tx = VADD(Te, Th); + Ti = VFNMS(LDK(KP500000000), Th, Te); + Tc = VZMULI(T1, VADD(T4, Tb)); + To = VFNMS(LDK(KP500000000), Tb, T4); + Tk = VZMUL(Td, VFNMSI(Tj, Ti)); + Ts = VZMUL(Tr, VFMAI(Tj, Ti)); + Tq = VZMULI(Tn, VFNMSI(Tp, To)); + Tw = VZMULI(Tv, VFMAI(Tp, To)); + Tm = VCONJ(VSUB(Tk, Tc)); + Tl = VADD(Tc, Tk); + Tu = VCONJ(VSUB(Ts, Tq)); + Tt = VADD(Tq, Ts); + Tz = VCONJ(VSUB(Tx, Tw)); + Ty = VADD(Tw, Tx); + ST(&(Rm[WS(rs, 1)]), Tm, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 1)]), Tl, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 2)]), Tu, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 2)]), Tt, ms, &(Rp[0])); + ST(&(Rm[0]), Tz, -ms, &(Rm[0])); + ST(&(Rp[0]), Ty, ms, &(Rp[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 6, XSIMD_STRING("hc2cbdftv_6"), twinstr, &GENUS, {17, 12, 12, 0} }; + +void XSIMD(codelet_hc2cbdftv_6) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_6, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 6 -dif -sign 1 -name hc2cbdftv_6 -include hc2cbv.h */ + +/* + * This function contains 29 FP additions, 14 FP multiplications, + * (or, 27 additions, 12 multiplications, 2 fused multiply/add), + * 41 stack variables, 2 constants, and 12 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 10)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(24, rs)) { + V T5, Th, Te, Ts, Tk, Tm, T2, T4, T3, T6, Tc, T8, Tb, T7, Ta; + V T9, Td, Ti, Tj, TA, Tf, Tn, Tv, Tt, Tz, T1, Tl, Tg, Tu, Tr; + V Tq, Ty, To, Tp, TC, TB, Tx, Tw; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + T3 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + T4 = VCONJ(T3); + T5 = VSUB(T2, T4); + Th = VADD(T2, T4); + T6 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + Tc = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + T7 = LD(&(Rm[0]), -ms, &(Rm[0])); + T8 = VCONJ(T7); + Ta = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + Tb = VCONJ(Ta); + T9 = VSUB(T6, T8); + Td = VSUB(Tb, Tc); + Te = VADD(T9, Td); + Ts = VBYI(VMUL(LDK(KP866025403), VSUB(T9, Td))); + Ti = VADD(T6, T8); + Tj = VADD(Tb, Tc); + Tk = VADD(Ti, Tj); + Tm = VBYI(VMUL(LDK(KP866025403), VSUB(Ti, Tj))); + TA = VADD(Th, Tk); + T1 = LDW(&(W[TWVL * 4])); + Tf = VZMULI(T1, VADD(T5, Te)); + Tl = VFNMS(LDK(KP500000000), Tk, Th); + Tg = LDW(&(W[TWVL * 2])); + Tn = VZMUL(Tg, VSUB(Tl, Tm)); + Tu = LDW(&(W[TWVL * 6])); + Tv = VZMUL(Tu, VADD(Tm, Tl)); + Tr = VFNMS(LDK(KP500000000), Te, T5); + Tq = LDW(&(W[TWVL * 8])); + Tt = VZMULI(Tq, VSUB(Tr, Ts)); + Ty = LDW(&(W[0])); + Tz = VZMULI(Ty, VADD(Ts, Tr)); + To = VADD(Tf, Tn); + ST(&(Rp[WS(rs, 1)]), To, ms, &(Rp[WS(rs, 1)])); + Tp = VCONJ(VSUB(Tn, Tf)); + ST(&(Rm[WS(rs, 1)]), Tp, -ms, &(Rm[WS(rs, 1)])); + TC = VCONJ(VSUB(TA, Tz)); + ST(&(Rm[0]), TC, -ms, &(Rm[0])); + TB = VADD(Tz, TA); + ST(&(Rp[0]), TB, ms, &(Rp[0])); + Tx = VCONJ(VSUB(Tv, Tt)); + ST(&(Rm[WS(rs, 2)]), Tx, -ms, &(Rm[0])); + Tw = VADD(Tt, Tv); + ST(&(Rp[WS(rs, 2)]), Tw, ms, &(Rp[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 6, XSIMD_STRING("hc2cbdftv_6"), twinstr, &GENUS, {27, 12, 2, 0} }; + +void XSIMD(codelet_hc2cbdftv_6) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_6, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cbdftv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 8 -dif -sign 1 -name hc2cbdftv_8 -include hc2cbv.h */ + +/* + * This function contains 41 FP additions, 32 FP multiplications, + * (or, 23 additions, 14 multiplications, 18 fused multiply/add), + * 51 stack variables, 1 constants, and 16 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 14)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(32, rs)) { + V TJ, T4, Tf, TB, TD, TE, Tm, T1, Tj, TF, Tp, Tb, Tg, Tt, Tx; + V T2, T3, Td, Te, T5, T6, T8, T9, Tn, T7, To, Ta, Tk, Tl, TG; + V TL, Tq, Tc, Tu, Th, Tv, Ty, Tw, TC, Ti, TK, TA, Tz, TI, TH; + V Ts, Tr, TN, TM; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + T3 = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + Td = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + Te = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + T5 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + T6 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + T8 = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + T9 = LD(&(Rm[0]), -ms, &(Rm[0])); + TJ = LDW(&(W[0])); + Tk = VFMACONJ(T3, T2); + T4 = VFNMSCONJ(T3, T2); + Tl = VFMACONJ(Te, Td); + Tf = VFNMSCONJ(Te, Td); + Tn = VFMACONJ(T6, T5); + T7 = VFNMSCONJ(T6, T5); + To = VFMACONJ(T9, T8); + Ta = VFMSCONJ(T9, T8); + TB = LDW(&(W[TWVL * 8])); + TD = LDW(&(W[TWVL * 6])); + TE = VADD(Tk, Tl); + Tm = VSUB(Tk, Tl); + T1 = LDW(&(W[TWVL * 12])); + Tj = LDW(&(W[TWVL * 10])); + TF = VADD(Tn, To); + Tp = VSUB(Tn, To); + Tb = VADD(T7, Ta); + Tg = VSUB(T7, Ta); + Tt = LDW(&(W[TWVL * 4])); + Tx = LDW(&(W[TWVL * 2])); + TG = VZMUL(TD, VSUB(TE, TF)); + TL = VADD(TE, TF); + Tq = VZMUL(Tj, VFNMSI(Tp, Tm)); + Tc = VFMA(LDK(KP707106781), Tb, T4); + Tu = VFNMS(LDK(KP707106781), Tb, T4); + Th = VFMA(LDK(KP707106781), Tg, Tf); + Tv = VFNMS(LDK(KP707106781), Tg, Tf); + Ty = VZMUL(Tx, VFMAI(Tp, Tm)); + Tw = VZMULI(Tt, VFNMSI(Tv, Tu)); + TC = VZMULI(TB, VFMAI(Tv, Tu)); + Ti = VZMULI(T1, VFNMSI(Th, Tc)); + TK = VZMULI(TJ, VFMAI(Th, Tc)); + TA = VCONJ(VSUB(Ty, Tw)); + Tz = VADD(Tw, Ty); + TI = VCONJ(VSUB(TG, TC)); + TH = VADD(TC, TG); + Ts = VCONJ(VSUB(Tq, Ti)); + Tr = VADD(Ti, Tq); + TN = VCONJ(VSUB(TL, TK)); + TM = VADD(TK, TL); + ST(&(Rm[WS(rs, 1)]), TA, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 1)]), Tz, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 2)]), TI, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 2)]), TH, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 3)]), Ts, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 3)]), Tr, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[0]), TN, -ms, &(Rm[0])); + ST(&(Rp[0]), TM, ms, &(Rp[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 8, XSIMD_STRING("hc2cbdftv_8"), twinstr, &GENUS, {23, 14, 18, 0} }; + +void XSIMD(codelet_hc2cbdftv_8) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_8, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 8 -dif -sign 1 -name hc2cbdftv_8 -include hc2cbv.h */ + +/* + * This function contains 41 FP additions, 16 FP multiplications, + * (or, 41 additions, 16 multiplications, 0 fused multiply/add), + * 55 stack variables, 1 constants, and 16 memory accesses + */ +#include "hc2cbv.h" + +static void hc2cbdftv_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 14)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(32, rs)) { + V T5, Tj, Tq, TI, Te, Tk, Tt, TJ, T2, Tg, T4, Ti, T3, Th, To; + V Tp, T6, Tc, T8, Tb, T7, Ta, T9, Td, Tr, Ts, TP, Tu, Tm, TO; + V Tn, Tf, Tl, T1, TN, Tv, TR, Tw, TQ, TC, TK, TA, TG, TB, TH; + V Ty, Tz, Tx, TF, TD, TM, TE, TL; + T2 = LD(&(Rp[0]), ms, &(Rp[0])); + Tg = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + T3 = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + T4 = VCONJ(T3); + Th = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + Ti = VCONJ(Th); + T5 = VSUB(T2, T4); + Tj = VSUB(Tg, Ti); + To = VADD(T2, T4); + Tp = VADD(Tg, Ti); + Tq = VSUB(To, Tp); + TI = VADD(To, Tp); + T6 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + Tc = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + T7 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + T8 = VCONJ(T7); + Ta = LD(&(Rm[0]), -ms, &(Rm[0])); + Tb = VCONJ(Ta); + T9 = VSUB(T6, T8); + Td = VSUB(Tb, Tc); + Te = VMUL(LDK(KP707106781), VADD(T9, Td)); + Tk = VMUL(LDK(KP707106781), VSUB(T9, Td)); + Tr = VADD(T6, T8); + Ts = VADD(Tb, Tc); + Tt = VBYI(VSUB(Tr, Ts)); + TJ = VADD(Tr, Ts); + TP = VADD(TI, TJ); + Tn = LDW(&(W[TWVL * 10])); + Tu = VZMUL(Tn, VSUB(Tq, Tt)); + Tf = VADD(T5, Te); + Tl = VBYI(VADD(Tj, Tk)); + T1 = LDW(&(W[TWVL * 12])); + Tm = VZMULI(T1, VSUB(Tf, Tl)); + TN = LDW(&(W[0])); + TO = VZMULI(TN, VADD(Tl, Tf)); + Tv = VADD(Tm, Tu); + ST(&(Rp[WS(rs, 3)]), Tv, ms, &(Rp[WS(rs, 1)])); + TR = VCONJ(VSUB(TP, TO)); + ST(&(Rm[0]), TR, -ms, &(Rm[0])); + Tw = VCONJ(VSUB(Tu, Tm)); + ST(&(Rm[WS(rs, 3)]), Tw, -ms, &(Rm[WS(rs, 1)])); + TQ = VADD(TO, TP); + ST(&(Rp[0]), TQ, ms, &(Rp[0])); + TB = LDW(&(W[TWVL * 2])); + TC = VZMUL(TB, VADD(Tq, Tt)); + TH = LDW(&(W[TWVL * 6])); + TK = VZMUL(TH, VSUB(TI, TJ)); + Ty = VBYI(VSUB(Tk, Tj)); + Tz = VSUB(T5, Te); + Tx = LDW(&(W[TWVL * 4])); + TA = VZMULI(Tx, VADD(Ty, Tz)); + TF = LDW(&(W[TWVL * 8])); + TG = VZMULI(TF, VSUB(Tz, Ty)); + TD = VADD(TA, TC); + ST(&(Rp[WS(rs, 1)]), TD, ms, &(Rp[WS(rs, 1)])); + TM = VCONJ(VSUB(TK, TG)); + ST(&(Rm[WS(rs, 2)]), TM, -ms, &(Rm[0])); + TE = VCONJ(VSUB(TC, TA)); + ST(&(Rm[WS(rs, 1)]), TE, -ms, &(Rm[WS(rs, 1)])); + TL = VADD(TG, TK); + ST(&(Rp[WS(rs, 2)]), TL, ms, &(Rp[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 8, XSIMD_STRING("hc2cbdftv_8"), twinstr, &GENUS, {41, 16, 0, 0} }; + +void XSIMD(codelet_hc2cbdftv_8) (planner *p) { + X(khc2c_register) (p, hc2cbdftv_8, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 10 -dit -name hc2cfdftv_10 -include hc2cfv.h */ + +/* + * This function contains 61 FP additions, 60 FP multiplications, + * (or, 33 additions, 32 multiplications, 28 fused multiply/add), + * 77 stack variables, 5 constants, and 20 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 18)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(40, rs)) { + V T5, T6, Tw, Tr, Tc, Tj, Tl, Tm, Tk, Ts, Tg, Ty, T3, T4, T1; + V T2, Tv, Tq, Ta, Tb, T9, Ti, Te, Tf, Td, Tx, Tn, Tt, Th, TQ; + V TT, Tz, T7, TR, To, Tu, TU; + T1 = LD(&(Rp[0]), ms, &(Rp[0])); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + Tv = LDW(&(W[0])); + T5 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + T6 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + Tq = LDW(&(W[TWVL * 6])); + Ta = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + Tb = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + T9 = LDW(&(W[TWVL * 2])); + Ti = LDW(&(W[TWVL * 4])); + Tw = VZMULIJ(Tv, VFNMSCONJ(T2, T1)); + Te = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + Tf = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + Tr = VZMULJ(Tq, VFMACONJ(T6, T5)); + Td = LDW(&(W[TWVL * 12])); + Tx = LDW(&(W[TWVL * 10])); + Tc = VZMULJ(T9, VFMACONJ(Tb, Ta)); + Tj = VZMULIJ(Ti, VFNMSCONJ(Tb, Ta)); + Tl = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + Tm = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + Tk = LDW(&(W[TWVL * 14])); + Ts = LDW(&(W[TWVL * 16])); + Tg = VZMULIJ(Td, VFNMSCONJ(Tf, Te)); + Ty = VZMULJ(Tx, VFMACONJ(Tf, Te)); + T3 = VFMACONJ(T2, T1); + T4 = LDW(&(W[TWVL * 8])); + Tn = VZMULJ(Tk, VFMACONJ(Tm, Tl)); + Tt = VZMULIJ(Ts, VFNMSCONJ(Tm, Tl)); + Th = VSUB(Tc, Tg); + TQ = VADD(Tc, Tg); + TT = VADD(Tw, Ty); + Tz = VSUB(Tw, Ty); + T7 = VZMULIJ(T4, VFNMSCONJ(T6, T5)); + TR = VADD(Tj, Tn); + To = VSUB(Tj, Tn); + Tu = VSUB(Tr, Tt); + TU = VADD(Tr, Tt); + { + V TP, T8, TS, T11, Tp, TH, TA, TG, TV, T12, TE, TB, TM, TI, TZ; + V TW, T17, T13, TD, TC, TY, TX, TL, TF, T10, T16, TN, TO, TK, TJ; + V T18, T19, T15, T14; + TP = VADD(T3, T7); + T8 = VSUB(T3, T7); + TS = VADD(TQ, TR); + T11 = VSUB(TQ, TR); + Tp = VSUB(Th, To); + TH = VADD(Th, To); + TA = VSUB(Tu, Tz); + TG = VADD(Tz, Tu); + TV = VADD(TT, TU); + T12 = VSUB(TU, TT); + TE = VSUB(Tp, TA); + TB = VADD(Tp, TA); + TM = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TG, TH)); + TI = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TH, TG)); + TZ = VSUB(TS, TV); + TW = VADD(TS, TV); + T17 = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T11, T12)); + T13 = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T12, T11)); + TD = VFNMS(LDK(KP250000000), TB, T8); + TC = VMUL(LDK(KP500000000), VADD(T8, TB)); + TY = VFNMS(LDK(KP250000000), TW, TP); + TX = VCONJ(VMUL(LDK(KP500000000), VADD(TP, TW))); + TL = VFMA(LDK(KP559016994), TE, TD); + TF = VFNMS(LDK(KP559016994), TE, TD); + ST(&(Rp[0]), TC, ms, &(Rp[0])); + T10 = VFMA(LDK(KP559016994), TZ, TY); + T16 = VFNMS(LDK(KP559016994), TZ, TY); + ST(&(Rm[WS(rs, 4)]), TX, -ms, &(Rm[0])); + TN = VCONJ(VMUL(LDK(KP500000000), VFNMSI(TM, TL))); + TO = VMUL(LDK(KP500000000), VFMAI(TM, TL)); + TK = VMUL(LDK(KP500000000), VFMAI(TI, TF)); + TJ = VCONJ(VMUL(LDK(KP500000000), VFNMSI(TI, TF))); + T18 = VMUL(LDK(KP500000000), VFNMSI(T17, T16)); + T19 = VCONJ(VMUL(LDK(KP500000000), VFMAI(T17, T16))); + T15 = VCONJ(VMUL(LDK(KP500000000), VFMAI(T13, T10))); + T14 = VMUL(LDK(KP500000000), VFNMSI(T13, T10)); + ST(&(Rm[WS(rs, 3)]), TN, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 4)]), TO, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 2)]), TK, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 1)]), TJ, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 3)]), T18, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 2)]), T19, -ms, &(Rm[0])); + ST(&(Rm[0]), T15, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 1)]), T14, ms, &(Rp[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 10, XSIMD_STRING("hc2cfdftv_10"), twinstr, &GENUS, {33, 32, 28, 0} }; + +void XSIMD(codelet_hc2cfdftv_10) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_10, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 10 -dit -name hc2cfdftv_10 -include hc2cfv.h */ + +/* + * This function contains 61 FP additions, 38 FP multiplications, + * (or, 55 additions, 32 multiplications, 6 fused multiply/add), + * 82 stack variables, 5 constants, and 20 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP125000000, +0.125000000000000000000000000000000000000000000); + DVK(KP279508497, +0.279508497187473712051146708591409529430077295); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 18)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(40, rs)) { + V Tl, Tt, Tu, TY, TZ, T10, Tz, TE, TF, TV, TW, TX, Ta, TU, TN; + V TR, TH, TQ, TK, TL, TM, TI, TG, TJ, TT, TO, TP, TS, T18, T1c; + V T12, T1b, T15, T16, T17, T14, T11, T13, T1e, T19, T1a, T1d; + { + V T1, T3, Ty, T8, T7, TB, Tf, Ts, Tk, Tw, Tq, TD, T2, Tx, T6; + V TA, Tc, Te, Td, Tb, Tr, Tj, Ti, Th, Tg, Tv, Tn, Tp, To, Tm; + V TC, T4, T9, T5; + T1 = LD(&(Rp[0]), ms, &(Rp[0])); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + T3 = VCONJ(T2); + Tx = LDW(&(W[0])); + Ty = VZMULIJ(Tx, VSUB(T3, T1)); + T8 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + T6 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + T7 = VCONJ(T6); + TA = LDW(&(W[TWVL * 6])); + TB = VZMULJ(TA, VADD(T7, T8)); + Tc = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + Td = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + Te = VCONJ(Td); + Tb = LDW(&(W[TWVL * 2])); + Tf = VZMULJ(Tb, VADD(Tc, Te)); + Tr = LDW(&(W[TWVL * 4])); + Ts = VZMULIJ(Tr, VSUB(Te, Tc)); + Tj = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + Th = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + Ti = VCONJ(Th); + Tg = LDW(&(W[TWVL * 12])); + Tk = VZMULIJ(Tg, VSUB(Ti, Tj)); + Tv = LDW(&(W[TWVL * 10])); + Tw = VZMULJ(Tv, VADD(Ti, Tj)); + Tn = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + To = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + Tp = VCONJ(To); + Tm = LDW(&(W[TWVL * 14])); + Tq = VZMULJ(Tm, VADD(Tn, Tp)); + TC = LDW(&(W[TWVL * 16])); + TD = VZMULIJ(TC, VSUB(Tp, Tn)); + Tl = VSUB(Tf, Tk); + Tt = VSUB(Tq, Ts); + Tu = VADD(Tl, Tt); + TY = VADD(Ty, Tw); + TZ = VADD(TB, TD); + T10 = VADD(TY, TZ); + Tz = VSUB(Tw, Ty); + TE = VSUB(TB, TD); + TF = VADD(Tz, TE); + TV = VADD(Tf, Tk); + TW = VADD(Ts, Tq); + TX = VADD(TV, TW); + T4 = VADD(T1, T3); + T5 = LDW(&(W[TWVL * 8])); + T9 = VZMULIJ(T5, VSUB(T7, T8)); + Ta = VSUB(T4, T9); + TU = VADD(T4, T9); + } + TL = VSUB(Tl, Tt); + TM = VSUB(TE, Tz); + TN = VMUL(LDK(KP500000000), VBYI(VFMA(LDK(KP951056516), TL, VMUL(LDK(KP587785252), TM)))); + TR = VMUL(LDK(KP500000000), VBYI(VFNMS(LDK(KP587785252), TL, VMUL(LDK(KP951056516), TM)))); + TI = VMUL(LDK(KP279508497), VSUB(Tu, TF)); + TG = VADD(Tu, TF); + TJ = VFNMS(LDK(KP125000000), TG, VMUL(LDK(KP500000000), Ta)); + TH = VCONJ(VMUL(LDK(KP500000000), VADD(Ta, TG))); + TQ = VSUB(TJ, TI); + TK = VADD(TI, TJ); + ST(&(Rm[WS(rs, 4)]), TH, -ms, &(Rm[0])); + TT = VCONJ(VADD(TQ, TR)); + ST(&(Rm[WS(rs, 2)]), TT, -ms, &(Rm[0])); + TO = VSUB(TK, TN); + ST(&(Rp[WS(rs, 1)]), TO, ms, &(Rp[WS(rs, 1)])); + TP = VCONJ(VADD(TK, TN)); + ST(&(Rm[0]), TP, -ms, &(Rm[0])); + TS = VSUB(TQ, TR); + ST(&(Rp[WS(rs, 3)]), TS, ms, &(Rp[WS(rs, 1)])); + T16 = VSUB(TZ, TY); + T17 = VSUB(TV, TW); + T18 = VMUL(LDK(KP500000000), VBYI(VFNMS(LDK(KP587785252), T17, VMUL(LDK(KP951056516), T16)))); + T1c = VMUL(LDK(KP500000000), VBYI(VFMA(LDK(KP951056516), T17, VMUL(LDK(KP587785252), T16)))); + T14 = VMUL(LDK(KP279508497), VSUB(TX, T10)); + T11 = VADD(TX, T10); + T13 = VFNMS(LDK(KP125000000), T11, VMUL(LDK(KP500000000), TU)); + T12 = VMUL(LDK(KP500000000), VADD(TU, T11)); + T1b = VADD(T14, T13); + T15 = VSUB(T13, T14); + ST(&(Rp[0]), T12, ms, &(Rp[0])); + T1e = VADD(T1b, T1c); + ST(&(Rp[WS(rs, 4)]), T1e, ms, &(Rp[0])); + T19 = VCONJ(VSUB(T15, T18)); + ST(&(Rm[WS(rs, 1)]), T19, -ms, &(Rm[WS(rs, 1)])); + T1a = VADD(T15, T18); + ST(&(Rp[WS(rs, 2)]), T1a, ms, &(Rp[0])); + T1d = VCONJ(VSUB(T1b, T1c)); + ST(&(Rm[WS(rs, 3)]), T1d, -ms, &(Rm[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 10, XSIMD_STRING("hc2cfdftv_10"), twinstr, &GENUS, {55, 32, 6, 0} }; + +void XSIMD(codelet_hc2cfdftv_10) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_10, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 12 -dit -name hc2cfdftv_12 -include hc2cfv.h */ + +/* + * This function contains 71 FP additions, 66 FP multiplications, + * (or, 41 additions, 36 multiplications, 30 fused multiply/add), + * 86 stack variables, 2 constants, and 24 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 22)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 22), MAKE_VOLATILE_STRIDE(48, rs)) { + V T3, T7, TH, TE, Th, TC, Tq, T11, TU, Tx, Tb, Tz, Tu, Tw, Tp; + V Tl, T9, Ta, T8, Ty, Tn, To, Tm, TG, T1, T2, Tt, T5, T6, T4; + V Tv, Tj, Tk, Ti, TD, Tf, Tg, Te, TB, TT, TF, TR, Tr; + T1 = LD(&(Rp[0]), ms, &(Rp[0])); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + Tt = LDW(&(W[0])); + T5 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + T6 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + T4 = LDW(&(W[TWVL * 6])); + Tv = LDW(&(W[TWVL * 8])); + Tn = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + To = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + T3 = VFMACONJ(T2, T1); + Tu = VZMULIJ(Tt, VFNMSCONJ(T2, T1)); + Tm = LDW(&(W[TWVL * 2])); + TG = LDW(&(W[TWVL * 4])); + T7 = VZMULJ(T4, VFMACONJ(T6, T5)); + Tw = VZMULIJ(Tv, VFNMSCONJ(T6, T5)); + Tj = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); + Tk = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); + Ti = LDW(&(W[TWVL * 18])); + TD = LDW(&(W[TWVL * 20])); + Tp = VZMULJ(Tm, VFMACONJ(To, Tn)); + TH = VZMULIJ(TG, VFNMSCONJ(To, Tn)); + Tf = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + Tg = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + Te = LDW(&(W[TWVL * 10])); + TB = LDW(&(W[TWVL * 12])); + Tl = VZMULJ(Ti, VFMACONJ(Tk, Tj)); + TE = VZMULIJ(TD, VFNMSCONJ(Tk, Tj)); + T9 = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + Ta = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + T8 = LDW(&(W[TWVL * 14])); + Ty = LDW(&(W[TWVL * 16])); + Th = VZMULJ(Te, VFMACONJ(Tg, Tf)); + TC = VZMULIJ(TB, VFNMSCONJ(Tg, Tf)); + Tq = VADD(Tl, Tp); + T11 = VSUB(Tp, Tl); + TU = VSUB(Tu, Tw); + Tx = VADD(Tu, Tw); + Tb = VZMULJ(T8, VFMACONJ(Ta, T9)); + Tz = VZMULIJ(Ty, VFNMSCONJ(Ta, T9)); + TT = VSUB(TC, TE); + TF = VADD(TC, TE); + TR = VFNMS(LDK(KP500000000), Tq, Th); + Tr = VADD(Th, Tq); + { + V TX, TA, T1d, TV, TY, TI, T1e, T12, TQ, Td, T10, Tc, T1a, TN, TJ; + V T1j, T1f, T1b, TS, TM, Ts, T17, T13, TZ, T1i, T1c, T16, TW, TP, TO; + V TL, TK, T1k, T1l, T1h, T1g, T18, T19, T15, T14; + T10 = VSUB(Tb, T7); + Tc = VADD(T7, Tb); + TX = VFNMS(LDK(KP500000000), Tx, Tz); + TA = VADD(Tx, Tz); + T1d = VADD(TU, TT); + TV = VSUB(TT, TU); + TY = VFNMS(LDK(KP500000000), TF, TH); + TI = VADD(TF, TH); + T1e = VADD(T10, T11); + T12 = VSUB(T10, T11); + TQ = VFNMS(LDK(KP500000000), Tc, T3); + Td = VADD(T3, Tc); + T1a = VADD(TX, TY); + TZ = VSUB(TX, TY); + TN = VADD(TA, TI); + TJ = VSUB(TA, TI); + T1j = VMUL(LDK(KP866025403), VADD(T1d, T1e)); + T1f = VMUL(LDK(KP866025403), VSUB(T1d, T1e)); + T1b = VADD(TQ, TR); + TS = VSUB(TQ, TR); + TM = VADD(Td, Tr); + Ts = VSUB(Td, Tr); + T17 = VFMA(LDK(KP866025403), T12, TZ); + T13 = VFNMS(LDK(KP866025403), T12, TZ); + T1i = VSUB(T1b, T1a); + T1c = VADD(T1a, T1b); + T16 = VFNMS(LDK(KP866025403), TV, TS); + TW = VFMA(LDK(KP866025403), TV, TS); + TP = VCONJ(VMUL(LDK(KP500000000), VADD(TN, TM))); + TO = VMUL(LDK(KP500000000), VSUB(TM, TN)); + TL = VCONJ(VMUL(LDK(KP500000000), VFNMSI(TJ, Ts))); + TK = VMUL(LDK(KP500000000), VFMAI(TJ, Ts)); + T1k = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T1j, T1i))); + T1l = VMUL(LDK(KP500000000), VFMAI(T1j, T1i)); + T1h = VMUL(LDK(KP500000000), VFMAI(T1f, T1c)); + T1g = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T1f, T1c))); + T18 = VMUL(LDK(KP500000000), VFNMSI(T17, T16)); + T19 = VCONJ(VMUL(LDK(KP500000000), VFMAI(T17, T16))); + T15 = VCONJ(VMUL(LDK(KP500000000), VFMAI(T13, TW))); + T14 = VMUL(LDK(KP500000000), VFNMSI(T13, TW)); + ST(&(Rm[WS(rs, 5)]), TP, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[0]), TO, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 2)]), TL, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 3)]), TK, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 3)]), T1k, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 4)]), T1l, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 2)]), T1h, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 1)]), T1g, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 5)]), T18, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 4)]), T19, -ms, &(Rm[0])); + ST(&(Rm[0]), T15, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 1)]), T14, ms, &(Rp[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + VTW(1, 10), + VTW(1, 11), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 12, XSIMD_STRING("hc2cfdftv_12"), twinstr, &GENUS, {41, 36, 30, 0} }; + +void XSIMD(codelet_hc2cfdftv_12) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_12, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 12 -dit -name hc2cfdftv_12 -include hc2cfv.h */ + +/* + * This function contains 71 FP additions, 41 FP multiplications, + * (or, 67 additions, 37 multiplications, 4 fused multiply/add), + * 58 stack variables, 4 constants, and 24 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP433012701, +0.433012701892219323381861585376468091735701313); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 22)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 22), MAKE_VOLATILE_STRIDE(48, rs)) { + V TX, T13, T4, Tf, TZ, TD, TF, T17, TW, T14, Tw, Tl, T10, TL, TN; + V T16; + { + V T1, T3, TA, Tb, Td, Te, T9, TC, T2, Tz, Tc, Ta, T6, T8, T7; + V T5, TB, TE, Ti, Tk, TI, Ts, Tu, Tv, Tq, TK, Tj, TH, Tt, Tr; + V Tn, Tp, To, Tm, TJ, Th, TM; + T1 = LD(&(Rp[0]), ms, &(Rp[0])); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + T3 = VCONJ(T2); + Tz = LDW(&(W[0])); + TA = VZMULIJ(Tz, VSUB(T3, T1)); + Tb = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + Tc = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + Td = VCONJ(Tc); + Ta = LDW(&(W[TWVL * 14])); + Te = VZMULJ(Ta, VADD(Tb, Td)); + T6 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + T7 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + T8 = VCONJ(T7); + T5 = LDW(&(W[TWVL * 6])); + T9 = VZMULJ(T5, VADD(T6, T8)); + TB = LDW(&(W[TWVL * 8])); + TC = VZMULIJ(TB, VSUB(T8, T6)); + TX = VSUB(TC, TA); + T13 = VSUB(Te, T9); + T4 = VADD(T1, T3); + Tf = VADD(T9, Te); + TZ = VFNMS(LDK(KP250000000), Tf, VMUL(LDK(KP500000000), T4)); + TD = VADD(TA, TC); + TE = LDW(&(W[TWVL * 16])); + TF = VZMULIJ(TE, VSUB(Td, Tb)); + T17 = VFNMS(LDK(KP500000000), TD, TF); + Ti = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + Tj = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + Tk = VCONJ(Tj); + TH = LDW(&(W[TWVL * 12])); + TI = VZMULIJ(TH, VSUB(Tk, Ti)); + Ts = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + Tt = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + Tu = VCONJ(Tt); + Tr = LDW(&(W[TWVL * 2])); + Tv = VZMULJ(Tr, VADD(Ts, Tu)); + Tn = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); + To = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); + Tp = VCONJ(To); + Tm = LDW(&(W[TWVL * 18])); + Tq = VZMULJ(Tm, VADD(Tn, Tp)); + TJ = LDW(&(W[TWVL * 20])); + TK = VZMULIJ(TJ, VSUB(Tp, Tn)); + TW = VSUB(TK, TI); + T14 = VSUB(Tv, Tq); + Tw = VADD(Tq, Tv); + Th = LDW(&(W[TWVL * 10])); + Tl = VZMULJ(Th, VADD(Ti, Tk)); + T10 = VFNMS(LDK(KP250000000), Tw, VMUL(LDK(KP500000000), Tl)); + TL = VADD(TI, TK); + TM = LDW(&(W[TWVL * 4])); + TN = VZMULIJ(TM, VSUB(Tu, Ts)); + T16 = VFNMS(LDK(KP500000000), TL, TN); + } + { + V Ty, TS, TP, TT, Tg, Tx, TG, TO, TQ, TV, TR, TU, T1i, T1o, T1l; + V T1p, T1g, T1h, T1j, T1k, T1m, T1r, T1n, T1q, T12, T1c, T19, T1d, TY, T11; + V T15, T18, T1a, T1f, T1b, T1e; + Tg = VADD(T4, Tf); + Tx = VADD(Tl, Tw); + Ty = VADD(Tg, Tx); + TS = VSUB(Tg, Tx); + TG = VADD(TD, TF); + TO = VADD(TL, TN); + TP = VADD(TG, TO); + TT = VBYI(VSUB(TO, TG)); + TQ = VCONJ(VMUL(LDK(KP500000000), VSUB(Ty, TP))); + ST(&(Rm[WS(rs, 5)]), TQ, -ms, &(Rm[WS(rs, 1)])); + TV = VMUL(LDK(KP500000000), VADD(TS, TT)); + ST(&(Rp[WS(rs, 3)]), TV, ms, &(Rp[WS(rs, 1)])); + TR = VMUL(LDK(KP500000000), VADD(Ty, TP)); + ST(&(Rp[0]), TR, ms, &(Rp[0])); + TU = VCONJ(VMUL(LDK(KP500000000), VSUB(TS, TT))); + ST(&(Rm[WS(rs, 2)]), TU, -ms, &(Rm[0])); + T1g = VADD(TX, TW); + T1h = VADD(T13, T14); + T1i = VMUL(LDK(KP500000000), VBYI(VMUL(LDK(KP866025403), VSUB(T1g, T1h)))); + T1o = VMUL(LDK(KP500000000), VBYI(VMUL(LDK(KP866025403), VADD(T1g, T1h)))); + T1j = VADD(TZ, T10); + T1k = VMUL(LDK(KP500000000), VADD(T17, T16)); + T1l = VSUB(T1j, T1k); + T1p = VADD(T1j, T1k); + T1m = VADD(T1i, T1l); + ST(&(Rp[WS(rs, 2)]), T1m, ms, &(Rp[0])); + T1r = VCONJ(VSUB(T1p, T1o)); + ST(&(Rm[WS(rs, 3)]), T1r, -ms, &(Rm[WS(rs, 1)])); + T1n = VCONJ(VSUB(T1l, T1i)); + ST(&(Rm[WS(rs, 1)]), T1n, -ms, &(Rm[WS(rs, 1)])); + T1q = VADD(T1o, T1p); + ST(&(Rp[WS(rs, 4)]), T1q, ms, &(Rp[0])); + TY = VMUL(LDK(KP433012701), VSUB(TW, TX)); + T11 = VSUB(TZ, T10); + T12 = VADD(TY, T11); + T1c = VSUB(T11, TY); + T15 = VMUL(LDK(KP866025403), VSUB(T13, T14)); + T18 = VSUB(T16, T17); + T19 = VMUL(LDK(KP500000000), VBYI(VSUB(T15, T18))); + T1d = VMUL(LDK(KP500000000), VBYI(VADD(T15, T18))); + T1a = VCONJ(VSUB(T12, T19)); + ST(&(Rm[0]), T1a, -ms, &(Rm[0])); + T1f = VCONJ(VADD(T1c, T1d)); + ST(&(Rm[WS(rs, 4)]), T1f, -ms, &(Rm[0])); + T1b = VADD(T12, T19); + ST(&(Rp[WS(rs, 1)]), T1b, ms, &(Rp[WS(rs, 1)])); + T1e = VSUB(T1c, T1d); + ST(&(Rp[WS(rs, 5)]), T1e, ms, &(Rp[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + VTW(1, 10), + VTW(1, 11), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 12, XSIMD_STRING("hc2cfdftv_12"), twinstr, &GENUS, {67, 37, 4, 0} }; + +void XSIMD(codelet_hc2cfdftv_12) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_12, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,432 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 16 -dit -name hc2cfdftv_16 -include hc2cfv.h */ + +/* + * This function contains 103 FP additions, 96 FP multiplications, + * (or, 53 additions, 46 multiplications, 50 fused multiply/add), + * 92 stack variables, 4 constants, and 32 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 30)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 30), MAKE_VOLATILE_STRIDE(64, rs)) { + V T8, Tc, TQ, TZ, T1J, T1x, T12, TH, T1I, T1q, Tp, TJ, Te, Tf, Td; + V TN, Tj, Tk, Ti, TK, Tg, TO, Tl, TL, T1r, Th, TR, T1y, T1s, Tq; + V TM, T1z, T1N, T1t, T10, Tr, T13, TS, T1K, T1A, T1E, T1u, T1f, T11, T1c; + V Ts, T1d, T14, T1g, TT; + { + V T3, Tw, TF, TW, Tz, TA, Ty, TX, T7, Tu, T1, T2, Tv, TD, TE; + V TC, TV, T5, T6, T4, Tt, TB, TY, T1o, T1v, Tx, Ta, Tb, T9, TP; + V T1w, TG, T1p, Tn, To, Tm, TI; + T1 = LD(&(Rp[0]), ms, &(Rp[0])); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + Tv = LDW(&(W[0])); + TD = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + TE = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + TC = LDW(&(W[TWVL * 8])); + TV = LDW(&(W[TWVL * 6])); + T5 = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + T6 = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + T3 = VFMACONJ(T2, T1); + Tw = VZMULIJ(Tv, VFNMSCONJ(T2, T1)); + T4 = LDW(&(W[TWVL * 14])); + Tt = LDW(&(W[TWVL * 16])); + TF = VZMULIJ(TC, VFNMSCONJ(TE, TD)); + TW = VZMULJ(TV, VFMACONJ(TE, TD)); + Tz = LD(&(Rp[WS(rs, 6)]), ms, &(Rp[0])); + TA = LD(&(Rm[WS(rs, 6)]), -ms, &(Rm[0])); + Ty = LDW(&(W[TWVL * 24])); + TX = LDW(&(W[TWVL * 22])); + T7 = VZMULJ(T4, VFMACONJ(T6, T5)); + Tu = VZMULIJ(Tt, VFNMSCONJ(T6, T5)); + Ta = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + Tb = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + T9 = LDW(&(W[TWVL * 2])); + TP = LDW(&(W[TWVL * 4])); + TB = VZMULIJ(Ty, VFNMSCONJ(TA, Tz)); + TY = VZMULJ(TX, VFMACONJ(TA, Tz)); + T1o = VADD(T3, T7); + T8 = VSUB(T3, T7); + T1v = VADD(Tw, Tu); + Tx = VSUB(Tu, Tw); + Tc = VZMULJ(T9, VFMACONJ(Tb, Ta)); + TQ = VZMULIJ(TP, VFNMSCONJ(Tb, Ta)); + T1w = VADD(TF, TB); + TG = VSUB(TB, TF); + T1p = VADD(TW, TY); + TZ = VSUB(TW, TY); + Tn = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + To = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + Tm = LDW(&(W[TWVL * 10])); + TI = LDW(&(W[TWVL * 12])); + T1J = VSUB(T1w, T1v); + T1x = VADD(T1v, T1w); + T12 = VFMA(LDK(KP414213562), Tx, TG); + TH = VFNMS(LDK(KP414213562), TG, Tx); + T1I = VSUB(T1o, T1p); + T1q = VADD(T1o, T1p); + Tp = VZMULJ(Tm, VFMACONJ(To, Tn)); + TJ = VZMULIJ(TI, VFNMSCONJ(To, Tn)); + Te = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); + Tf = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); + Td = LDW(&(W[TWVL * 18])); + TN = LDW(&(W[TWVL * 20])); + Tj = LD(&(Rp[WS(rs, 7)]), ms, &(Rp[WS(rs, 1)])); + Tk = LD(&(Rm[WS(rs, 7)]), -ms, &(Rm[WS(rs, 1)])); + Ti = LDW(&(W[TWVL * 26])); + TK = LDW(&(W[TWVL * 28])); + } + Tg = VZMULJ(Td, VFMACONJ(Tf, Te)); + TO = VZMULIJ(TN, VFNMSCONJ(Tf, Te)); + Tl = VZMULJ(Ti, VFMACONJ(Tk, Tj)); + TL = VZMULIJ(TK, VFNMSCONJ(Tk, Tj)); + T1r = VADD(Tc, Tg); + Th = VSUB(Tc, Tg); + TR = VSUB(TO, TQ); + T1y = VADD(TQ, TO); + T1s = VADD(Tl, Tp); + Tq = VSUB(Tl, Tp); + TM = VSUB(TJ, TL); + T1z = VADD(TL, TJ); + T1N = VSUB(T1s, T1r); + T1t = VADD(T1r, T1s); + T10 = VSUB(Tq, Th); + Tr = VADD(Th, Tq); + T13 = VFNMS(LDK(KP414213562), TM, TR); + TS = VFMA(LDK(KP414213562), TR, TM); + T1K = VSUB(T1y, T1z); + T1A = VADD(T1y, T1z); + T1E = VADD(T1q, T1t); + T1u = VSUB(T1q, T1t); + T1f = VFMA(LDK(KP707106781), T10, TZ); + T11 = VFNMS(LDK(KP707106781), T10, TZ); + T1c = VFNMS(LDK(KP707106781), Tr, T8); + Ts = VFMA(LDK(KP707106781), Tr, T8); + T1d = VSUB(T12, T13); + T14 = VADD(T12, T13); + T1g = VSUB(TS, TH); + TT = VADD(TH, TS); + { + V T1O, T1L, T1F, T1B, T1k, T1e, T19, T15, T1l, T1h, T18, TU, T1T, T1P, T1S; + V T1M, T1H, T1G, T1D, T1C, T1m, T1n, T1j, T1i, T1a, T1b, T17, T16, T1U, T1V; + V T1R, T1Q; + T1O = VSUB(T1K, T1J); + T1L = VADD(T1J, T1K); + T1F = VADD(T1x, T1A); + T1B = VSUB(T1x, T1A); + T1k = VFNMS(LDK(KP923879532), T1d, T1c); + T1e = VFMA(LDK(KP923879532), T1d, T1c); + T19 = VFNMS(LDK(KP923879532), T14, T11); + T15 = VFMA(LDK(KP923879532), T14, T11); + T1l = VFNMS(LDK(KP923879532), T1g, T1f); + T1h = VFMA(LDK(KP923879532), T1g, T1f); + T18 = VFNMS(LDK(KP923879532), TT, Ts); + TU = VFMA(LDK(KP923879532), TT, Ts); + T1T = VFNMS(LDK(KP707106781), T1O, T1N); + T1P = VFMA(LDK(KP707106781), T1O, T1N); + T1S = VFNMS(LDK(KP707106781), T1L, T1I); + T1M = VFMA(LDK(KP707106781), T1L, T1I); + T1H = VCONJ(VMUL(LDK(KP500000000), VADD(T1F, T1E))); + T1G = VMUL(LDK(KP500000000), VSUB(T1E, T1F)); + T1D = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T1B, T1u))); + T1C = VMUL(LDK(KP500000000), VFMAI(T1B, T1u)); + T1m = VMUL(LDK(KP500000000), VFNMSI(T1l, T1k)); + T1n = VCONJ(VMUL(LDK(KP500000000), VFMAI(T1l, T1k))); + T1j = VMUL(LDK(KP500000000), VFMAI(T1h, T1e)); + T1i = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T1h, T1e))); + T1a = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T19, T18))); + T1b = VMUL(LDK(KP500000000), VFMAI(T19, T18)); + T17 = VCONJ(VMUL(LDK(KP500000000), VFMAI(T15, TU))); + T16 = VMUL(LDK(KP500000000), VFNMSI(T15, TU)); + T1U = VMUL(LDK(KP500000000), VFNMSI(T1T, T1S)); + T1V = VCONJ(VMUL(LDK(KP500000000), VFMAI(T1T, T1S))); + T1R = VMUL(LDK(KP500000000), VFMAI(T1P, T1M)); + T1Q = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T1P, T1M))); + ST(&(Rm[WS(rs, 7)]), T1H, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[0]), T1G, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 3)]), T1D, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 4)]), T1C, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 5)]), T1m, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 4)]), T1n, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 3)]), T1j, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 2)]), T1i, -ms, &(Rm[0])); + ST(&(Rm[WS(rs, 6)]), T1a, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 7)]), T1b, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[0]), T17, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 1)]), T16, ms, &(Rp[WS(rs, 1)])); + ST(&(Rp[WS(rs, 6)]), T1U, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 5)]), T1V, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 2)]), T1R, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 1)]), T1Q, -ms, &(Rm[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + VTW(1, 10), + VTW(1, 11), + VTW(1, 12), + VTW(1, 13), + VTW(1, 14), + VTW(1, 15), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 16, XSIMD_STRING("hc2cfdftv_16"), twinstr, &GENUS, {53, 46, 50, 0} }; + +void XSIMD(codelet_hc2cfdftv_16) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_16, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 16 -dit -name hc2cfdftv_16 -include hc2cfv.h */ + +/* + * This function contains 103 FP additions, 56 FP multiplications, + * (or, 99 additions, 52 multiplications, 4 fused multiply/add), + * 101 stack variables, 5 constants, and 32 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + DVK(KP353553390, +0.353553390593273762200422181052424519642417969); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 30)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 30), MAKE_VOLATILE_STRIDE(64, rs)) { + V T1D, T1E, T1R, TP, T1b, Ta, T1w, T18, T1x, T1z, T1A, T1G, T1H, T1S, Tx; + V T13, T10, T1a, T1, T3, TA, TM, TL, TN, T6, T8, TC, TH, TG, TI; + V T2, Tz, TK, TJ, T7, TB, TF, TE, TD, TO, T4, T9, T5, T15, T17; + V T14, T16; + T1 = LD(&(Rp[0]), ms, &(Rp[0])); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + T3 = VCONJ(T2); + Tz = LDW(&(W[0])); + TA = VZMULIJ(Tz, VSUB(T3, T1)); + TM = LD(&(Rp[WS(rs, 6)]), ms, &(Rp[0])); + TK = LD(&(Rm[WS(rs, 6)]), -ms, &(Rm[0])); + TL = VCONJ(TK); + TJ = LDW(&(W[TWVL * 24])); + TN = VZMULIJ(TJ, VSUB(TL, TM)); + T6 = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + T7 = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + T8 = VCONJ(T7); + TB = LDW(&(W[TWVL * 16])); + TC = VZMULIJ(TB, VSUB(T8, T6)); + TH = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + TF = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + TG = VCONJ(TF); + TE = LDW(&(W[TWVL * 8])); + TI = VZMULIJ(TE, VSUB(TG, TH)); + T1D = VADD(TA, TC); + T1E = VADD(TI, TN); + T1R = VSUB(T1D, T1E); + TD = VSUB(TA, TC); + TO = VSUB(TI, TN); + TP = VFNMS(LDK(KP382683432), TO, VMUL(LDK(KP923879532), TD)); + T1b = VFMA(LDK(KP382683432), TD, VMUL(LDK(KP923879532), TO)); + T4 = VADD(T1, T3); + T5 = LDW(&(W[TWVL * 14])); + T9 = VZMULJ(T5, VADD(T6, T8)); + Ta = VMUL(LDK(KP500000000), VSUB(T4, T9)); + T1w = VADD(T4, T9); + T14 = LDW(&(W[TWVL * 6])); + T15 = VZMULJ(T14, VADD(TH, TG)); + T16 = LDW(&(W[TWVL * 22])); + T17 = VZMULJ(T16, VADD(TM, TL)); + T18 = VSUB(T15, T17); + T1x = VADD(T15, T17); + { + V Tf, TR, Tv, TY, Tk, TT, Tq, TW, Tc, Te, Td, Tb, TQ, Ts, Tu; + V Tt, Tr, TX, Th, Tj, Ti, Tg, TS, Tn, Tp, To, Tm, TV, Tl, Tw; + V TU, TZ; + Tc = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + Td = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + Te = VCONJ(Td); + Tb = LDW(&(W[TWVL * 2])); + Tf = VZMULJ(Tb, VADD(Tc, Te)); + TQ = LDW(&(W[TWVL * 4])); + TR = VZMULIJ(TQ, VSUB(Te, Tc)); + Ts = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + Tt = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + Tu = VCONJ(Tt); + Tr = LDW(&(W[TWVL * 10])); + Tv = VZMULJ(Tr, VADD(Ts, Tu)); + TX = LDW(&(W[TWVL * 12])); + TY = VZMULIJ(TX, VSUB(Tu, Ts)); + Th = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); + Ti = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); + Tj = VCONJ(Ti); + Tg = LDW(&(W[TWVL * 18])); + Tk = VZMULJ(Tg, VADD(Th, Tj)); + TS = LDW(&(W[TWVL * 20])); + TT = VZMULIJ(TS, VSUB(Tj, Th)); + Tn = LD(&(Rp[WS(rs, 7)]), ms, &(Rp[WS(rs, 1)])); + To = LD(&(Rm[WS(rs, 7)]), -ms, &(Rm[WS(rs, 1)])); + Tp = VCONJ(To); + Tm = LDW(&(W[TWVL * 26])); + Tq = VZMULJ(Tm, VADD(Tn, Tp)); + TV = LDW(&(W[TWVL * 28])); + TW = VZMULIJ(TV, VSUB(Tp, Tn)); + T1z = VADD(Tf, Tk); + T1A = VADD(Tq, Tv); + T1G = VADD(TR, TT); + T1H = VADD(TW, TY); + T1S = VSUB(T1H, T1G); + Tl = VSUB(Tf, Tk); + Tw = VSUB(Tq, Tv); + Tx = VMUL(LDK(KP353553390), VADD(Tl, Tw)); + T13 = VMUL(LDK(KP707106781), VSUB(Tw, Tl)); + TU = VSUB(TR, TT); + TZ = VSUB(TW, TY); + T10 = VFMA(LDK(KP382683432), TU, VMUL(LDK(KP923879532), TZ)); + T1a = VFNMS(LDK(KP923879532), TU, VMUL(LDK(KP382683432), TZ)); + } + { + V T1U, T20, T1X, T21, T1Q, T1T, T1V, T1W, T1Y, T23, T1Z, T22, T1C, T1M, T1J; + V T1N, T1y, T1B, T1F, T1I, T1K, T1P, T1L, T1O, T12, T1g, T1d, T1h, Ty, T11; + V T19, T1c, T1e, T1j, T1f, T1i, T1m, T1s, T1p, T1t, T1k, T1l, T1n, T1o, T1q; + V T1v, T1r, T1u; + T1Q = VMUL(LDK(KP500000000), VSUB(T1w, T1x)); + T1T = VMUL(LDK(KP353553390), VADD(T1R, T1S)); + T1U = VADD(T1Q, T1T); + T20 = VSUB(T1Q, T1T); + T1V = VSUB(T1A, T1z); + T1W = VMUL(LDK(KP707106781), VSUB(T1S, T1R)); + T1X = VMUL(LDK(KP500000000), VBYI(VADD(T1V, T1W))); + T21 = VMUL(LDK(KP500000000), VBYI(VSUB(T1W, T1V))); + T1Y = VCONJ(VSUB(T1U, T1X)); + ST(&(Rm[WS(rs, 1)]), T1Y, -ms, &(Rm[WS(rs, 1)])); + T23 = VADD(T20, T21); + ST(&(Rp[WS(rs, 6)]), T23, ms, &(Rp[0])); + T1Z = VADD(T1U, T1X); + ST(&(Rp[WS(rs, 2)]), T1Z, ms, &(Rp[0])); + T22 = VCONJ(VSUB(T20, T21)); + ST(&(Rm[WS(rs, 5)]), T22, -ms, &(Rm[WS(rs, 1)])); + T1y = VADD(T1w, T1x); + T1B = VADD(T1z, T1A); + T1C = VADD(T1y, T1B); + T1M = VSUB(T1y, T1B); + T1F = VADD(T1D, T1E); + T1I = VADD(T1G, T1H); + T1J = VADD(T1F, T1I); + T1N = VBYI(VSUB(T1I, T1F)); + T1K = VCONJ(VMUL(LDK(KP500000000), VSUB(T1C, T1J))); + ST(&(Rm[WS(rs, 7)]), T1K, -ms, &(Rm[WS(rs, 1)])); + T1P = VMUL(LDK(KP500000000), VADD(T1M, T1N)); + ST(&(Rp[WS(rs, 4)]), T1P, ms, &(Rp[0])); + T1L = VMUL(LDK(KP500000000), VADD(T1C, T1J)); + ST(&(Rp[0]), T1L, ms, &(Rp[0])); + T1O = VCONJ(VMUL(LDK(KP500000000), VSUB(T1M, T1N))); + ST(&(Rm[WS(rs, 3)]), T1O, -ms, &(Rm[WS(rs, 1)])); + Ty = VADD(Ta, Tx); + T11 = VMUL(LDK(KP500000000), VADD(TP, T10)); + T12 = VADD(Ty, T11); + T1g = VSUB(Ty, T11); + T19 = VSUB(T13, T18); + T1c = VSUB(T1a, T1b); + T1d = VMUL(LDK(KP500000000), VBYI(VADD(T19, T1c))); + T1h = VMUL(LDK(KP500000000), VBYI(VSUB(T1c, T19))); + T1e = VCONJ(VSUB(T12, T1d)); + ST(&(Rm[0]), T1e, -ms, &(Rm[0])); + T1j = VADD(T1g, T1h); + ST(&(Rp[WS(rs, 7)]), T1j, ms, &(Rp[WS(rs, 1)])); + T1f = VADD(T12, T1d); + ST(&(Rp[WS(rs, 1)]), T1f, ms, &(Rp[WS(rs, 1)])); + T1i = VCONJ(VSUB(T1g, T1h)); + ST(&(Rm[WS(rs, 6)]), T1i, -ms, &(Rm[0])); + T1k = VSUB(T10, TP); + T1l = VADD(T18, T13); + T1m = VMUL(LDK(KP500000000), VBYI(VSUB(T1k, T1l))); + T1s = VMUL(LDK(KP500000000), VBYI(VADD(T1l, T1k))); + T1n = VSUB(Ta, Tx); + T1o = VMUL(LDK(KP500000000), VADD(T1b, T1a)); + T1p = VSUB(T1n, T1o); + T1t = VADD(T1n, T1o); + T1q = VADD(T1m, T1p); + ST(&(Rp[WS(rs, 5)]), T1q, ms, &(Rp[WS(rs, 1)])); + T1v = VCONJ(VSUB(T1t, T1s)); + ST(&(Rm[WS(rs, 2)]), T1v, -ms, &(Rm[0])); + T1r = VCONJ(VSUB(T1p, T1m)); + ST(&(Rm[WS(rs, 4)]), T1r, -ms, &(Rm[0])); + T1u = VADD(T1s, T1t); + ST(&(Rp[WS(rs, 3)]), T1u, ms, &(Rp[WS(rs, 1)])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + VTW(1, 10), + VTW(1, 11), + VTW(1, 12), + VTW(1, 13), + VTW(1, 14), + VTW(1, 15), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 16, XSIMD_STRING("hc2cfdftv_16"), twinstr, &GENUS, {99, 52, 4, 0} }; + +void XSIMD(codelet_hc2cfdftv_16) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_16, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 2 -dit -name hc2cfdftv_2 -include hc2cfv.h */ + +/* + * This function contains 5 FP additions, 6 FP multiplications, + * (or, 3 additions, 4 multiplications, 2 fused multiply/add), + * 9 stack variables, 1 constants, and 4 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_2(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 2)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(8, rs)) { + V T1, T2, T4, T3, T5, T7, T6; + T1 = LD(&(Rp[0]), ms, &(Rp[0])); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + T4 = LDW(&(W[0])); + T3 = VFMACONJ(T2, T1); + T5 = VZMULIJ(T4, VFNMSCONJ(T2, T1)); + T7 = VCONJ(VMUL(LDK(KP500000000), VADD(T3, T5))); + T6 = VMUL(LDK(KP500000000), VSUB(T3, T5)); + ST(&(Rm[0]), T7, -ms, &(Rm[0])); + ST(&(Rp[0]), T6, ms, &(Rp[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 2, XSIMD_STRING("hc2cfdftv_2"), twinstr, &GENUS, {3, 4, 2, 0} }; + +void XSIMD(codelet_hc2cfdftv_2) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_2, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 2 -dit -name hc2cfdftv_2 -include hc2cfv.h */ + +/* + * This function contains 5 FP additions, 4 FP multiplications, + * (or, 5 additions, 4 multiplications, 0 fused multiply/add), + * 10 stack variables, 1 constants, and 4 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_2(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 2)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(8, rs)) { + V T4, T6, T1, T3, T2, T5, T7, T8; + T1 = LD(&(Rp[0]), ms, &(Rp[0])); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + T3 = VCONJ(T2); + T4 = VADD(T1, T3); + T5 = LDW(&(W[0])); + T6 = VZMULIJ(T5, VSUB(T3, T1)); + T7 = VCONJ(VMUL(LDK(KP500000000), VSUB(T4, T6))); + ST(&(Rm[0]), T7, -ms, &(Rm[0])); + T8 = VMUL(LDK(KP500000000), VADD(T4, T6)); + ST(&(Rp[0]), T8, ms, &(Rp[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 2, XSIMD_STRING("hc2cfdftv_2"), twinstr, &GENUS, {5, 4, 0, 0} }; + +void XSIMD(codelet_hc2cfdftv_2) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_2, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,552 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 20 -dit -name hc2cfdftv_20 -include hc2cfv.h */ + +/* + * This function contains 143 FP additions, 128 FP multiplications, + * (or, 77 additions, 62 multiplications, 66 fused multiply/add), + * 130 stack variables, 5 constants, and 40 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP618033988, +0.618033988749894848204586834365638117720309180); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 38)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 38), MAKE_VOLATILE_STRIDE(80, rs)) { + V T2g, T2f, T2w, T2k, T2A, T2u, T2e, T2o, T1O, T2b, T2i, T1R, T1X, T1k, TN; + V T1w, T1G, T1t, Ti, T2c, T12, T1x, T2j, T1U, T1y, T1d, T24, T2v, T2h, T2x; + V T2B, T2p, T2l, T2z, T2y, T2D, T2C, T2r, T2q, T2n, T2m; + { + V T3, T7, TC, T1Y, Tc, Tg, Tn, T1P, T1Z, Tw, T1S, TS, TY, TZ, T1Q; + V TL, T17, T21, TW, T19, TX, T1a, T8, T20, Th, Tx, T1u, T1v, TM, T10; + V T1b, T22, T11, T1T, T1c, T23; + { + V Ta, Tb, Tz, Te, TB, Tf, Tl, T9, Td, Tk, T1, T2, Ty, T5, T6; + V TA, T4, Tj, Tt, Tu, Ts, TQ, Tr, TP, Tp, Tq, Tm, To, TO, TG; + V T14, TK, T16, TE, TF, Tv, TD, T13, TR, TI, TJ, TH, T15, TU, TV; + V TT, T18; + T1 = LD(&(Rp[0]), ms, &(Rp[0])); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + Ty = LDW(&(W[0])); + T5 = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); + T6 = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); + TA = LDW(&(W[TWVL * 20])); + T4 = LDW(&(W[TWVL * 18])); + Ta = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + Tb = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + T3 = VFMACONJ(T2, T1); + Tz = VZMULIJ(Ty, VFNMSCONJ(T2, T1)); + Tj = LDW(&(W[TWVL * 6])); + Te = LD(&(Rp[WS(rs, 7)]), ms, &(Rp[WS(rs, 1)])); + TB = VZMULIJ(TA, VFNMSCONJ(T6, T5)); + T7 = VZMULJ(T4, VFMACONJ(T6, T5)); + Tf = LD(&(Rm[WS(rs, 7)]), -ms, &(Rm[WS(rs, 1)])); + Tl = LDW(&(W[TWVL * 26])); + T9 = LDW(&(W[TWVL * 8])); + Td = LDW(&(W[TWVL * 28])); + Tk = VZMULJ(Tj, VFMACONJ(Tb, Ta)); + Tp = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + TC = VADD(Tz, TB); + T1Y = VSUB(TB, Tz); + Tq = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + Tm = VZMULJ(Tl, VFMACONJ(Tf, Te)); + Tc = VZMULIJ(T9, VFNMSCONJ(Tb, Ta)); + Tg = VZMULIJ(Td, VFNMSCONJ(Tf, Te)); + To = LDW(&(W[TWVL * 16])); + TO = LDW(&(W[TWVL * 14])); + Tt = LD(&(Rp[WS(rs, 9)]), ms, &(Rp[WS(rs, 1)])); + Tu = LD(&(Rm[WS(rs, 9)]), -ms, &(Rm[WS(rs, 1)])); + Ts = LDW(&(W[TWVL * 36])); + Tn = VADD(Tk, Tm); + T1P = VSUB(Tk, Tm); + TQ = LDW(&(W[TWVL * 34])); + Tr = VZMULIJ(To, VFNMSCONJ(Tq, Tp)); + TP = VZMULJ(TO, VFMACONJ(Tq, Tp)); + TE = LD(&(Rp[WS(rs, 8)]), ms, &(Rp[0])); + TF = LD(&(Rm[WS(rs, 8)]), -ms, &(Rm[0])); + Tv = VZMULIJ(Ts, VFNMSCONJ(Tu, Tt)); + TD = LDW(&(W[TWVL * 30])); + T13 = LDW(&(W[TWVL * 32])); + TR = VZMULJ(TQ, VFMACONJ(Tu, Tt)); + TI = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + TJ = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + TH = LDW(&(W[TWVL * 10])); + T15 = LDW(&(W[TWVL * 12])); + T1Z = VSUB(Tv, Tr); + Tw = VADD(Tr, Tv); + TG = VZMULJ(TD, VFMACONJ(TF, TE)); + T14 = VZMULIJ(T13, VFNMSCONJ(TF, TE)); + T1S = VSUB(TP, TR); + TS = VADD(TP, TR); + TK = VZMULJ(TH, VFMACONJ(TJ, TI)); + T16 = VZMULIJ(T15, VFNMSCONJ(TJ, TI)); + TU = LD(&(Rp[WS(rs, 6)]), ms, &(Rp[0])); + TV = LD(&(Rm[WS(rs, 6)]), -ms, &(Rm[0])); + TT = LDW(&(W[TWVL * 24])); + T18 = LDW(&(W[TWVL * 22])); + TY = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + TZ = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + T1Q = VSUB(TK, TG); + TL = VADD(TG, TK); + T17 = VADD(T14, T16); + T21 = VSUB(T16, T14); + TW = VZMULIJ(TT, VFNMSCONJ(TV, TU)); + T19 = VZMULJ(T18, VFMACONJ(TV, TU)); + TX = LDW(&(W[TWVL * 4])); + T1a = LDW(&(W[TWVL * 2])); + } + T1O = VSUB(T3, T7); + T8 = VADD(T3, T7); + T20 = VADD(T1Y, T1Z); + T2b = VSUB(T1Y, T1Z); + T2i = VADD(T1P, T1Q); + T1R = VSUB(T1P, T1Q); + Th = VADD(Tc, Tg); + T1X = VSUB(Tg, Tc); + Tx = VSUB(Tn, Tw); + T1u = VADD(Tn, Tw); + T1v = VADD(TC, TL); + TM = VSUB(TC, TL); + T10 = VZMULIJ(TX, VFNMSCONJ(TZ, TY)); + T1b = VZMULJ(T1a, VFMACONJ(TZ, TY)); + T1k = VADD(Tx, TM); + TN = VSUB(Tx, TM); + T22 = VSUB(T10, TW); + T11 = VADD(TW, T10); + T1T = VSUB(T1b, T19); + T1c = VADD(T19, T1b); + T1w = VADD(T1u, T1v); + T1G = VSUB(T1u, T1v); + T1t = VADD(T8, Th); + Ti = VSUB(T8, Th); + T23 = VADD(T21, T22); + T2c = VSUB(T21, T22); + T12 = VSUB(TS, T11); + T1x = VADD(TS, T11); + T2j = VADD(T1S, T1T); + T1U = VSUB(T1S, T1T); + T1y = VADD(T17, T1c); + T1d = VSUB(T17, T1c); + T2g = VSUB(T23, T20); + T24 = VADD(T20, T23); + } + { + V T2d, T2t, T29, T25, T1m, T1q, T1i, T1H, T1L, T1D, T1A, T28, T1W, T1h, T1g; + V T1e, T1l, T1z, T1F, T1V, T1f, T1C, T1B, T26, T27, T2a, T2s, T1j, T1p, T1K; + V T1E, T1n, T1o, T1s, T1r, T1I, T1J, T1N, T1M; + T2d = VFMA(LDK(KP618033988), T2c, T2b); + T2t = VFNMS(LDK(KP618033988), T2b, T2c); + T1e = VSUB(T12, T1d); + T1l = VADD(T12, T1d); + T1z = VADD(T1x, T1y); + T1F = VSUB(T1x, T1y); + T1V = VADD(T1R, T1U); + T29 = VSUB(T1R, T1U); + T2f = VFNMS(LDK(KP250000000), T24, T1X); + T25 = VADD(T1X, T24); + T1m = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1l, T1k)); + T1q = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1k, T1l)); + T1i = VSUB(TN, T1e); + T1f = VADD(TN, T1e); + T1H = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1G, T1F)); + T1L = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1F, T1G)); + T1D = VSUB(T1w, T1z); + T1A = VADD(T1w, T1z); + T28 = VFNMS(LDK(KP250000000), T1V, T1O); + T1W = VADD(T1O, T1V); + T1h = VFNMS(LDK(KP250000000), T1f, Ti); + T1g = VMUL(LDK(KP500000000), VADD(Ti, T1f)); + T2w = VFNMS(LDK(KP618033988), T2i, T2j); + T2k = VFMA(LDK(KP618033988), T2j, T2i); + T1C = VFNMS(LDK(KP250000000), T1A, T1t); + T1B = VCONJ(VMUL(LDK(KP500000000), VADD(T1t, T1A))); + T26 = VMUL(LDK(KP500000000), VFNMSI(T25, T1W)); + T27 = VCONJ(VMUL(LDK(KP500000000), VFMAI(T25, T1W))); + T2a = VFMA(LDK(KP559016994), T29, T28); + T2s = VFNMS(LDK(KP559016994), T29, T28); + ST(&(Rp[0]), T1g, ms, &(Rp[0])); + T1j = VFMA(LDK(KP559016994), T1i, T1h); + T1p = VFNMS(LDK(KP559016994), T1i, T1h); + ST(&(Rm[WS(rs, 9)]), T1B, -ms, &(Rm[WS(rs, 1)])); + T1K = VFMA(LDK(KP559016994), T1D, T1C); + T1E = VFNMS(LDK(KP559016994), T1D, T1C); + ST(&(Rm[WS(rs, 4)]), T27, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 5)]), T26, ms, &(Rp[WS(rs, 1)])); + T2A = VFMA(LDK(KP951056516), T2t, T2s); + T2u = VFNMS(LDK(KP951056516), T2t, T2s); + T2e = VFNMS(LDK(KP951056516), T2d, T2a); + T2o = VFMA(LDK(KP951056516), T2d, T2a); + T1n = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T1m, T1j))); + T1o = VMUL(LDK(KP500000000), VFMAI(T1m, T1j)); + T1s = VCONJ(VMUL(LDK(KP500000000), VFMAI(T1q, T1p))); + T1r = VMUL(LDK(KP500000000), VFNMSI(T1q, T1p)); + T1I = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T1H, T1E))); + T1J = VMUL(LDK(KP500000000), VFMAI(T1H, T1E)); + T1N = VCONJ(VMUL(LDK(KP500000000), VFMAI(T1L, T1K))); + T1M = VMUL(LDK(KP500000000), VFNMSI(T1L, T1K)); + ST(&(Rp[WS(rs, 4)]), T1o, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 3)]), T1n, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 8)]), T1r, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 7)]), T1s, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 2)]), T1J, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 1)]), T1I, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 6)]), T1M, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 5)]), T1N, -ms, &(Rm[WS(rs, 1)])); + } + T2v = VFMA(LDK(KP559016994), T2g, T2f); + T2h = VFNMS(LDK(KP559016994), T2g, T2f); + T2x = VFNMS(LDK(KP951056516), T2w, T2v); + T2B = VFMA(LDK(KP951056516), T2w, T2v); + T2p = VFMA(LDK(KP951056516), T2k, T2h); + T2l = VFNMS(LDK(KP951056516), T2k, T2h); + T2z = VMUL(LDK(KP500000000), VFMAI(T2x, T2u)); + T2y = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T2x, T2u))); + T2D = VMUL(LDK(KP500000000), VFMAI(T2B, T2A)); + T2C = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T2B, T2A))); + T2r = VCONJ(VMUL(LDK(KP500000000), VFMAI(T2p, T2o))); + T2q = VMUL(LDK(KP500000000), VFNMSI(T2p, T2o)); + T2n = VCONJ(VMUL(LDK(KP500000000), VFMAI(T2l, T2e))); + T2m = VMUL(LDK(KP500000000), VFNMSI(T2l, T2e)); + ST(&(Rp[WS(rs, 3)]), T2z, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 2)]), T2y, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 7)]), T2D, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 6)]), T2C, -ms, &(Rm[0])); + ST(&(Rm[0]), T2r, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 1)]), T2q, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 8)]), T2n, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 9)]), T2m, ms, &(Rp[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + VTW(1, 10), + VTW(1, 11), + VTW(1, 12), + VTW(1, 13), + VTW(1, 14), + VTW(1, 15), + VTW(1, 16), + VTW(1, 17), + VTW(1, 18), + VTW(1, 19), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 20, XSIMD_STRING("hc2cfdftv_20"), twinstr, &GENUS, {77, 62, 66, 0} }; + +void XSIMD(codelet_hc2cfdftv_20) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_20, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 20 -dit -name hc2cfdftv_20 -include hc2cfv.h */ + +/* + * This function contains 143 FP additions, 77 FP multiplications, + * (or, 131 additions, 65 multiplications, 12 fused multiply/add), + * 141 stack variables, 9 constants, and 40 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP293892626, +0.293892626146236564584352977319536384298826219); + DVK(KP475528258, +0.475528258147576786058219666689691071702849317); + DVK(KP559016994, +0.559016994374947424102293417182819058860154590); + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP125000000, +0.125000000000000000000000000000000000000000000); + DVK(KP279508497, +0.279508497187473712051146708591409529430077295); + DVK(KP587785252, +0.587785252292473129168705954639072768597652438); + DVK(KP951056516, +0.951056516295153572116439333379382143405698634); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 38)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 38), MAKE_VOLATILE_STRIDE(80, rs)) { + V TW, T1x, T2i, T2A, T1r, T1s, T1a, T1y, T1l, Tn, TK, TL, T1p, T1o, T27; + V T2t, T2a, T2u, T2e, T2C, T20, T2w, T23, T2x, T2d, T2B, T1W, T1X, T1U, T1V; + V T2z, T2K, T2G, T2N, T2J, T2v, T2y, T2F, T2D, T2E, T2M, T2H, T2I, T2L; + { + V T1u, T5, Tg, T1c, TV, T13, Ta, T1w, TQ, T11, TI, T1j, Tx, T18, Tl; + V T1e, TD, T1h, Ts, T16, T2g, T2h, T14, T19, T1f, T1k, Tb, Tm, Ty, TJ; + V T25, T26, T28, T29, T1Y, T1Z, T21, T22; + { + V T4, T3, T2, T1, Tf, Te, Td, Tc, T1b, TU, TT, TS, TR, T12, T9; + V T8, T7, T6, T1v, TP, TO, TN, TM, T10, TH, TG, TF, TE, T1i, Tw; + V Tv, Tu, Tt, T17, Tk, Tj, Ti, Th, T1d, TC, TB, TA, Tz, T1g, Tr; + V Tq, Tp, To, T15; + T4 = LD(&(Rp[0]), ms, &(Rp[0])); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + T3 = VCONJ(T2); + T1u = VADD(T4, T3); + T1 = LDW(&(W[0])); + T5 = VZMULIJ(T1, VSUB(T3, T4)); + Tf = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + Td = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + Te = VCONJ(Td); + Tc = LDW(&(W[TWVL * 16])); + Tg = VZMULIJ(Tc, VSUB(Te, Tf)); + T1b = LDW(&(W[TWVL * 14])); + T1c = VZMULJ(T1b, VADD(Te, Tf)); + TU = LD(&(Rp[WS(rs, 7)]), ms, &(Rp[WS(rs, 1)])); + TS = LD(&(Rm[WS(rs, 7)]), -ms, &(Rm[WS(rs, 1)])); + TT = VCONJ(TS); + TR = LDW(&(W[TWVL * 28])); + TV = VZMULIJ(TR, VSUB(TT, TU)); + T12 = LDW(&(W[TWVL * 26])); + T13 = VZMULJ(T12, VADD(TT, TU)); + T9 = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); + T7 = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); + T8 = VCONJ(T7); + T6 = LDW(&(W[TWVL * 20])); + Ta = VZMULIJ(T6, VSUB(T8, T9)); + T1v = LDW(&(W[TWVL * 18])); + T1w = VZMULJ(T1v, VADD(T9, T8)); + TP = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + TN = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + TO = VCONJ(TN); + TM = LDW(&(W[TWVL * 8])); + TQ = VZMULIJ(TM, VSUB(TO, TP)); + T10 = LDW(&(W[TWVL * 6])); + T11 = VZMULJ(T10, VADD(TO, TP)); + TH = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + TF = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + TG = VCONJ(TF); + TE = LDW(&(W[TWVL * 4])); + TI = VZMULIJ(TE, VSUB(TG, TH)); + T1i = LDW(&(W[TWVL * 2])); + T1j = VZMULJ(T1i, VADD(TG, TH)); + Tw = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + Tu = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + Tv = VCONJ(Tu); + Tt = LDW(&(W[TWVL * 12])); + Tx = VZMULIJ(Tt, VSUB(Tv, Tw)); + T17 = LDW(&(W[TWVL * 10])); + T18 = VZMULJ(T17, VADD(Tw, Tv)); + Tk = LD(&(Rp[WS(rs, 9)]), ms, &(Rp[WS(rs, 1)])); + Ti = LD(&(Rm[WS(rs, 9)]), -ms, &(Rm[WS(rs, 1)])); + Tj = VCONJ(Ti); + Th = LDW(&(W[TWVL * 36])); + Tl = VZMULIJ(Th, VSUB(Tj, Tk)); + T1d = LDW(&(W[TWVL * 34])); + T1e = VZMULJ(T1d, VADD(Tj, Tk)); + TC = LD(&(Rp[WS(rs, 6)]), ms, &(Rp[0])); + TA = LD(&(Rm[WS(rs, 6)]), -ms, &(Rm[0])); + TB = VCONJ(TA); + Tz = LDW(&(W[TWVL * 24])); + TD = VZMULIJ(Tz, VSUB(TB, TC)); + T1g = LDW(&(W[TWVL * 22])); + T1h = VZMULJ(T1g, VADD(TB, TC)); + Tr = LD(&(Rp[WS(rs, 8)]), ms, &(Rp[0])); + Tp = LD(&(Rm[WS(rs, 8)]), -ms, &(Rm[0])); + Tq = VCONJ(Tp); + To = LDW(&(W[TWVL * 32])); + Ts = VZMULIJ(To, VSUB(Tq, Tr)); + T15 = LDW(&(W[TWVL * 30])); + T16 = VZMULJ(T15, VADD(Tr, Tq)); + } + TW = VSUB(TQ, TV); + T1x = VSUB(T1u, T1w); + T2g = VADD(T1u, T1w); + T2h = VADD(TQ, TV); + T2i = VADD(T2g, T2h); + T2A = VSUB(T2g, T2h); + T14 = VSUB(T11, T13); + T19 = VSUB(T16, T18); + T1r = VADD(T14, T19); + T1f = VSUB(T1c, T1e); + T1k = VSUB(T1h, T1j); + T1s = VADD(T1f, T1k); + T1a = VSUB(T14, T19); + T1y = VADD(T1r, T1s); + T1l = VSUB(T1f, T1k); + Tb = VSUB(T5, Ta); + Tm = VSUB(Tg, Tl); + Tn = VADD(Tb, Tm); + Ty = VSUB(Ts, Tx); + TJ = VSUB(TD, TI); + TK = VADD(Ty, TJ); + TL = VADD(Tn, TK); + T1p = VSUB(Ty, TJ); + T1o = VSUB(Tb, Tm); + T25 = VADD(T1c, T1e); + T26 = VADD(TD, TI); + T27 = VADD(T25, T26); + T2t = VSUB(T25, T26); + T28 = VADD(Ts, Tx); + T29 = VADD(T1h, T1j); + T2a = VADD(T28, T29); + T2u = VSUB(T29, T28); + T2e = VADD(T27, T2a); + T2C = VADD(T2t, T2u); + T1Y = VADD(T11, T13); + T1Z = VADD(Tg, Tl); + T20 = VADD(T1Y, T1Z); + T2w = VSUB(T1Y, T1Z); + T21 = VADD(T5, Ta); + T22 = VADD(T16, T18); + T23 = VADD(T21, T22); + T2x = VSUB(T22, T21); + T2d = VADD(T20, T23); + T2B = VADD(T2w, T2x); + } + T1U = VADD(T1x, T1y); + T1V = VBYI(VADD(TW, TL)); + T1W = VMUL(LDK(KP500000000), VSUB(T1U, T1V)); + T1X = VCONJ(VMUL(LDK(KP500000000), VADD(T1V, T1U))); + ST(&(Rp[WS(rs, 5)]), T1W, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 4)]), T1X, -ms, &(Rm[0])); + T2v = VSUB(T2t, T2u); + T2y = VSUB(T2w, T2x); + T2z = VMUL(LDK(KP500000000), VBYI(VFNMS(LDK(KP587785252), T2y, VMUL(LDK(KP951056516), T2v)))); + T2K = VMUL(LDK(KP500000000), VBYI(VFMA(LDK(KP951056516), T2y, VMUL(LDK(KP587785252), T2v)))); + T2F = VMUL(LDK(KP279508497), VSUB(T2B, T2C)); + T2D = VADD(T2B, T2C); + T2E = VFNMS(LDK(KP125000000), T2D, VMUL(LDK(KP500000000), T2A)); + T2G = VSUB(T2E, T2F); + T2N = VCONJ(VMUL(LDK(KP500000000), VADD(T2A, T2D))); + T2J = VADD(T2F, T2E); + ST(&(Rm[WS(rs, 9)]), T2N, -ms, &(Rm[WS(rs, 1)])); + T2M = VCONJ(VADD(T2K, T2J)); + ST(&(Rm[WS(rs, 5)]), T2M, -ms, &(Rm[WS(rs, 1)])); + T2H = VADD(T2z, T2G); + ST(&(Rp[WS(rs, 2)]), T2H, ms, &(Rp[0])); + T2I = VCONJ(VSUB(T2G, T2z)); + ST(&(Rm[WS(rs, 1)]), T2I, -ms, &(Rm[WS(rs, 1)])); + T2L = VSUB(T2J, T2K); + ST(&(Rp[WS(rs, 6)]), T2L, ms, &(Rp[0])); + { + V T2c, T2p, T2l, T2s, T2o, T24, T2b, T2f, T2j, T2k, T2r, T2m, T2n, T2q, T1n; + V T1Q, T1E, T1K, T1B, T1R, T1F, T1N, T1m, T1J, TZ, T1I, TX, TY, T1q, T1M; + V T1A, T1L, T1t, T1z, T1C, T1S, T1T, T1D, T1G, T1O, T1P, T1H; + T24 = VSUB(T20, T23); + T2b = VSUB(T27, T2a); + T2c = VMUL(LDK(KP500000000), VBYI(VFMA(LDK(KP951056516), T24, VMUL(LDK(KP587785252), T2b)))); + T2p = VMUL(LDK(KP500000000), VBYI(VFNMS(LDK(KP587785252), T24, VMUL(LDK(KP951056516), T2b)))); + T2f = VMUL(LDK(KP279508497), VSUB(T2d, T2e)); + T2j = VADD(T2d, T2e); + T2k = VFNMS(LDK(KP125000000), T2j, VMUL(LDK(KP500000000), T2i)); + T2l = VADD(T2f, T2k); + T2s = VMUL(LDK(KP500000000), VADD(T2i, T2j)); + T2o = VSUB(T2k, T2f); + ST(&(Rp[0]), T2s, ms, &(Rp[0])); + T2r = VCONJ(VADD(T2p, T2o)); + ST(&(Rm[WS(rs, 7)]), T2r, -ms, &(Rm[WS(rs, 1)])); + T2m = VADD(T2c, T2l); + ST(&(Rp[WS(rs, 4)]), T2m, ms, &(Rp[0])); + T2n = VCONJ(VSUB(T2l, T2c)); + ST(&(Rm[WS(rs, 3)]), T2n, -ms, &(Rm[WS(rs, 1)])); + T2q = VSUB(T2o, T2p); + ST(&(Rp[WS(rs, 8)]), T2q, ms, &(Rp[0])); + T1m = VFMA(LDK(KP951056516), T1a, VMUL(LDK(KP587785252), T1l)); + T1J = VFNMS(LDK(KP587785252), T1a, VMUL(LDK(KP951056516), T1l)); + TX = VFMS(LDK(KP250000000), TL, TW); + TY = VMUL(LDK(KP559016994), VSUB(TK, Tn)); + TZ = VADD(TX, TY); + T1I = VSUB(TY, TX); + T1n = VMUL(LDK(KP500000000), VBYI(VSUB(TZ, T1m))); + T1Q = VMUL(LDK(KP500000000), VBYI(VADD(T1I, T1J))); + T1E = VMUL(LDK(KP500000000), VBYI(VADD(TZ, T1m))); + T1K = VMUL(LDK(KP500000000), VBYI(VSUB(T1I, T1J))); + T1q = VFMA(LDK(KP475528258), T1o, VMUL(LDK(KP293892626), T1p)); + T1M = VFNMS(LDK(KP293892626), T1o, VMUL(LDK(KP475528258), T1p)); + T1t = VMUL(LDK(KP279508497), VSUB(T1r, T1s)); + T1z = VFNMS(LDK(KP125000000), T1y, VMUL(LDK(KP500000000), T1x)); + T1A = VADD(T1t, T1z); + T1L = VSUB(T1z, T1t); + T1B = VADD(T1q, T1A); + T1R = VADD(T1M, T1L); + T1F = VSUB(T1A, T1q); + T1N = VSUB(T1L, T1M); + T1C = VADD(T1n, T1B); + ST(&(Rp[WS(rs, 1)]), T1C, ms, &(Rp[WS(rs, 1)])); + T1S = VADD(T1Q, T1R); + ST(&(Rp[WS(rs, 7)]), T1S, ms, &(Rp[WS(rs, 1)])); + T1T = VCONJ(VSUB(T1R, T1Q)); + ST(&(Rm[WS(rs, 6)]), T1T, -ms, &(Rm[0])); + T1D = VCONJ(VSUB(T1B, T1n)); + ST(&(Rm[0]), T1D, -ms, &(Rm[0])); + T1G = VADD(T1E, T1F); + ST(&(Rp[WS(rs, 9)]), T1G, ms, &(Rp[WS(rs, 1)])); + T1O = VADD(T1K, T1N); + ST(&(Rp[WS(rs, 3)]), T1O, ms, &(Rp[WS(rs, 1)])); + T1P = VCONJ(VSUB(T1N, T1K)); + ST(&(Rm[WS(rs, 2)]), T1P, -ms, &(Rm[0])); + T1H = VCONJ(VSUB(T1F, T1E)); + ST(&(Rm[WS(rs, 8)]), T1H, -ms, &(Rm[0])); + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + VTW(1, 10), + VTW(1, 11), + VTW(1, 12), + VTW(1, 13), + VTW(1, 14), + VTW(1, 15), + VTW(1, 16), + VTW(1, 17), + VTW(1, 18), + VTW(1, 19), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 20, XSIMD_STRING("hc2cfdftv_20"), twinstr, &GENUS, {131, 65, 12, 0} }; + +void XSIMD(codelet_hc2cfdftv_20) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_20, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,881 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 32 -dit -name hc2cfdftv_32 -include hc2cfv.h */ + +/* + * This function contains 249 FP additions, 224 FP multiplications, + * (or, 119 additions, 94 multiplications, 130 fused multiply/add), + * 167 stack variables, 8 constants, and 64 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP668178637, +0.668178637919298919997757686523080761552472251); + DVK(KP198912367, +0.198912367379658006911597622644676228597850501); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP414213562, +0.414213562373095048801688724209698078569671875); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 62)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 62), MAKE_VOLATILE_STRIDE(128, rs)) { + V T2m, T2b, T2c, T2d, T2v, T2r, T20, T2i, T2n, T2e, T2o, T2u, T2j, T2f, T2t; + V T2s, T2x, T2w, T2l, T2k, T2h, T2g; + { + V T41, T3B, T40, T3a, T2J, T27, T2y, Ts, T2C, T1X, T2B, T1Q, T3F, T3w, T4l; + V T49, T1b, T1s, T3c, TB, T1f, T3g, T44, T1l, T3k, T3o, T4b, T28, T14, T1d; + V T3b, TK; + { + V T1V, T1E, T3A, Th, T3v, T47, T1J, T3q, T8, T38, T25, T39, T3z, Tq, T1O; + V T3r, T3, T7, T3u, T24, T22, T3t, T1I, Tn, T1G, To, Tm, T1K, Tl, T1N; + V Tp, T1L, TU, T3f, T3m, T13, T3e, T3n, T1i, TH, TI, T1k, TG, TF, T1c; + V TJ; + { + V T1x, T1y, T1U, T1B, T1S, T1C, T1A, T23, T21, T1z, T1, T2, T1T, T5, T6; + V T1R, T4, T1w, Ta, Tb, T1H, Te, Tf, Td, Tc, T1F, T9, T1D, Tj, Tk; + V Ti, Tg, T1M; + T1 = LD(&(Rp[0]), ms, &(Rp[0])); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + T1T = LDW(&(W[0])); + T5 = LD(&(Rp[WS(rs, 8)]), ms, &(Rp[0])); + T6 = LD(&(Rm[WS(rs, 8)]), -ms, &(Rm[0])); + T1R = LDW(&(W[TWVL * 32])); + T4 = LDW(&(W[TWVL * 30])); + T1x = LD(&(Rp[WS(rs, 12)]), ms, &(Rp[0])); + T1y = LD(&(Rm[WS(rs, 12)]), -ms, &(Rm[0])); + T3 = VFMACONJ(T2, T1); + T1U = VZMULIJ(T1T, VFNMSCONJ(T2, T1)); + T1w = LDW(&(W[TWVL * 48])); + T1B = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + T1S = VZMULIJ(T1R, VFNMSCONJ(T6, T5)); + T7 = VZMULJ(T4, VFMACONJ(T6, T5)); + T1C = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + T1A = LDW(&(W[TWVL * 16])); + T23 = LDW(&(W[TWVL * 46])); + T21 = LDW(&(W[TWVL * 14])); + T1z = VZMULIJ(T1w, VFNMSCONJ(T1y, T1x)); + Ta = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + T3u = VADD(T1U, T1S); + T1V = VSUB(T1S, T1U); + Tb = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + T9 = LDW(&(W[TWVL * 6])); + T1D = VZMULIJ(T1A, VFNMSCONJ(T1C, T1B)); + T24 = VZMULJ(T23, VFMACONJ(T1y, T1x)); + T22 = VZMULJ(T21, VFMACONJ(T1C, T1B)); + T1H = LDW(&(W[TWVL * 8])); + Te = LD(&(Rp[WS(rs, 10)]), ms, &(Rp[0])); + Tf = LD(&(Rm[WS(rs, 10)]), -ms, &(Rm[0])); + Td = LDW(&(W[TWVL * 38])); + Tc = VZMULJ(T9, VFMACONJ(Tb, Ta)); + T1E = VSUB(T1z, T1D); + T3t = VADD(T1D, T1z); + T1F = LDW(&(W[TWVL * 40])); + Tj = LD(&(Rp[WS(rs, 14)]), ms, &(Rp[0])); + T1I = VZMULIJ(T1H, VFNMSCONJ(Tb, Ta)); + Tk = LD(&(Rm[WS(rs, 14)]), -ms, &(Rm[0])); + Ti = LDW(&(W[TWVL * 54])); + Tg = VZMULJ(Td, VFMACONJ(Tf, Te)); + T1M = LDW(&(W[TWVL * 56])); + Tn = LD(&(Rp[WS(rs, 6)]), ms, &(Rp[0])); + T1G = VZMULIJ(T1F, VFNMSCONJ(Tf, Te)); + To = LD(&(Rm[WS(rs, 6)]), -ms, &(Rm[0])); + Tm = LDW(&(W[TWVL * 22])); + T1K = LDW(&(W[TWVL * 24])); + Tl = VZMULJ(Ti, VFMACONJ(Tk, Tj)); + T3A = VADD(Tc, Tg); + Th = VSUB(Tc, Tg); + T1N = VZMULIJ(T1M, VFNMSCONJ(Tk, Tj)); + } + T3v = VSUB(T3t, T3u); + T47 = VADD(T3u, T3t); + T1J = VSUB(T1G, T1I); + T3q = VADD(T1I, T1G); + Tp = VZMULJ(Tm, VFMACONJ(To, Tn)); + T1L = VZMULIJ(T1K, VFNMSCONJ(To, Tn)); + T8 = VSUB(T3, T7); + T38 = VADD(T3, T7); + T25 = VSUB(T22, T24); + T39 = VADD(T22, T24); + T3z = VADD(Tl, Tp); + Tq = VSUB(Tl, Tp); + T1O = VSUB(T1L, T1N); + T3r = VADD(T1N, T1L); + { + V T10, T11, TZ, T1o, TY, T1r, TN, TO, TM, T19, TR, TS, TQ, T17, T26; + V Tr, T1W, T1P, T3s, T48, TW, TX, TP, T1a, TV, T1q, TT, T18, Ty, Tz; + V Tx, Tw, T1j, Tu, T12, T1p, Tv, Tt, T1h, TD, TA, TE, TC, T1e; + TN = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + TO = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + T41 = VADD(T3A, T3z); + T3B = VSUB(T3z, T3A); + T26 = VSUB(Tq, Th); + Tr = VADD(Th, Tq); + T1W = VADD(T1J, T1O); + T1P = VSUB(T1J, T1O); + T3s = VSUB(T3q, T3r); + T48 = VADD(T3q, T3r); + T40 = VADD(T38, T39); + T3a = VSUB(T38, T39); + T2J = VFNMS(LDK(KP707106781), T26, T25); + T27 = VFMA(LDK(KP707106781), T26, T25); + T2y = VFMA(LDK(KP707106781), Tr, T8); + Ts = VFNMS(LDK(KP707106781), Tr, T8); + T2C = VFMA(LDK(KP707106781), T1W, T1V); + T1X = VFNMS(LDK(KP707106781), T1W, T1V); + T2B = VFMA(LDK(KP707106781), T1P, T1E); + T1Q = VFNMS(LDK(KP707106781), T1P, T1E); + T3F = VFMA(LDK(KP414213562), T3s, T3v); + T3w = VFNMS(LDK(KP414213562), T3v, T3s); + T4l = VSUB(T48, T47); + T49 = VADD(T47, T48); + TM = LDW(&(W[TWVL * 10])); + T19 = LDW(&(W[TWVL * 12])); + TR = LD(&(Rp[WS(rs, 11)]), ms, &(Rp[WS(rs, 1)])); + TS = LD(&(Rm[WS(rs, 11)]), -ms, &(Rm[WS(rs, 1)])); + TQ = LDW(&(W[TWVL * 42])); + T17 = LDW(&(W[TWVL * 44])); + TW = LD(&(Rp[WS(rs, 15)]), ms, &(Rp[WS(rs, 1)])); + TX = LD(&(Rm[WS(rs, 15)]), -ms, &(Rm[WS(rs, 1)])); + TP = VZMULJ(TM, VFMACONJ(TO, TN)); + T1a = VZMULIJ(T19, VFNMSCONJ(TO, TN)); + TV = LDW(&(W[TWVL * 58])); + T1q = LDW(&(W[TWVL * 60])); + TT = VZMULJ(TQ, VFMACONJ(TS, TR)); + T18 = VZMULIJ(T17, VFNMSCONJ(TS, TR)); + T10 = LD(&(Rp[WS(rs, 7)]), ms, &(Rp[WS(rs, 1)])); + T11 = LD(&(Rm[WS(rs, 7)]), -ms, &(Rm[WS(rs, 1)])); + TZ = LDW(&(W[TWVL * 26])); + T1o = LDW(&(W[TWVL * 28])); + TY = VZMULJ(TV, VFMACONJ(TX, TW)); + T1r = VZMULIJ(T1q, VFNMSCONJ(TX, TW)); + TU = VSUB(TP, TT); + T3f = VADD(TP, TT); + T1b = VSUB(T18, T1a); + T3m = VADD(T1a, T18); + Tu = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); + T12 = VZMULJ(TZ, VFMACONJ(T11, T10)); + T1p = VZMULIJ(T1o, VFNMSCONJ(T11, T10)); + Tv = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); + Tt = LDW(&(W[TWVL * 18])); + T1h = LDW(&(W[TWVL * 20])); + Ty = LD(&(Rp[WS(rs, 13)]), ms, &(Rp[WS(rs, 1)])); + Tz = LD(&(Rm[WS(rs, 13)]), -ms, &(Rm[WS(rs, 1)])); + Tx = LDW(&(W[TWVL * 50])); + T13 = VSUB(TY, T12); + T3e = VADD(TY, T12); + T1s = VSUB(T1p, T1r); + T3n = VADD(T1r, T1p); + Tw = VZMULJ(Tt, VFMACONJ(Tv, Tu)); + T1i = VZMULIJ(T1h, VFNMSCONJ(Tv, Tu)); + T1j = LDW(&(W[TWVL * 52])); + TD = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + TA = VZMULJ(Tx, VFMACONJ(Tz, Ty)); + TE = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + TC = LDW(&(W[TWVL * 2])); + T1e = LDW(&(W[TWVL * 4])); + TH = LD(&(Rp[WS(rs, 9)]), ms, &(Rp[WS(rs, 1)])); + TI = LD(&(Rm[WS(rs, 9)]), -ms, &(Rm[WS(rs, 1)])); + T1k = VZMULIJ(T1j, VFNMSCONJ(Tz, Ty)); + TG = LDW(&(W[TWVL * 34])); + T3c = VADD(Tw, TA); + TB = VSUB(Tw, TA); + TF = VZMULJ(TC, VFMACONJ(TE, TD)); + T1f = VZMULIJ(T1e, VFNMSCONJ(TE, TD)); + T1c = LDW(&(W[TWVL * 36])); + } + T3g = VSUB(T3e, T3f); + T44 = VADD(T3e, T3f); + T1l = VSUB(T1i, T1k); + T3k = VADD(T1i, T1k); + TJ = VZMULJ(TG, VFMACONJ(TI, TH)); + T3o = VSUB(T3m, T3n); + T4b = VADD(T3n, T3m); + T28 = VFMA(LDK(KP414213562), TU, T13); + T14 = VFNMS(LDK(KP414213562), T13, TU); + T1d = VZMULIJ(T1c, VFNMSCONJ(TI, TH)); + T3b = VADD(TF, TJ); + TK = VSUB(TF, TJ); + } + { + V T4k, T4p, T2z, T2a, T2K, T15, T2E, T1n, T2F, T1u, T4c, T3R, T3D, T3i, T3O; + V T46, T4g, T3G, T3P, T3S, T3x, T4q, T4n, T42, T1g, T3j, T3E, T3p, T4m, T3d; + V T43, T29, TL, T1m, T1t, T3l, T4a, T3C, T3h, T45, T3Q, T3W, T4d, T4h, T3H; + V T3L, T3y, T3K, T4r, T4v, T4o, T4u, T4j, T4i, T4e, T4f, T3N, T3M, T3I, T3J; + V T4x, T4w, T4s, T4t; + T42 = VADD(T40, T41); + T4k = VSUB(T40, T41); + T1g = VSUB(T1d, T1f); + T3j = VADD(T1f, T1d); + T3d = VSUB(T3b, T3c); + T43 = VADD(T3b, T3c); + T29 = VFNMS(LDK(KP414213562), TB, TK); + TL = VFMA(LDK(KP414213562), TK, TB); + T1m = VSUB(T1g, T1l); + T1t = VADD(T1g, T1l); + T3l = VSUB(T3j, T3k); + T4a = VADD(T3j, T3k); + T3C = VSUB(T3g, T3d); + T3h = VADD(T3d, T3g); + T45 = VADD(T43, T44); + T4p = VSUB(T44, T43); + T2z = VADD(T29, T28); + T2a = VSUB(T28, T29); + T2K = VADD(TL, T14); + T15 = VSUB(TL, T14); + T2E = VFMA(LDK(KP707106781), T1m, T1b); + T1n = VFNMS(LDK(KP707106781), T1m, T1b); + T2F = VFMA(LDK(KP707106781), T1t, T1s); + T1u = VFNMS(LDK(KP707106781), T1t, T1s); + T3E = VFNMS(LDK(KP414213562), T3l, T3o); + T3p = VFMA(LDK(KP414213562), T3o, T3l); + T4m = VSUB(T4a, T4b); + T4c = VADD(T4a, T4b); + T3R = VFMA(LDK(KP707106781), T3C, T3B); + T3D = VFNMS(LDK(KP707106781), T3C, T3B); + T3i = VFNMS(LDK(KP707106781), T3h, T3a); + T3O = VFMA(LDK(KP707106781), T3h, T3a); + T46 = VSUB(T42, T45); + T4g = VADD(T42, T45); + T3G = VSUB(T3E, T3F); + T3P = VADD(T3F, T3E); + T3S = VADD(T3w, T3p); + T3x = VSUB(T3p, T3w); + T4q = VSUB(T4m, T4l); + T4n = VADD(T4l, T4m); + T4d = VSUB(T49, T4c); + T4h = VADD(T49, T4c); + T3H = VFNMS(LDK(KP923879532), T3G, T3D); + T3L = VFMA(LDK(KP923879532), T3G, T3D); + T3y = VFMA(LDK(KP923879532), T3x, T3i); + T3K = VFNMS(LDK(KP923879532), T3x, T3i); + T4r = VFMA(LDK(KP707106781), T4q, T4p); + T4v = VFNMS(LDK(KP707106781), T4q, T4p); + T4o = VFMA(LDK(KP707106781), T4n, T4k); + T4u = VFNMS(LDK(KP707106781), T4n, T4k); + T3Q = VFMA(LDK(KP923879532), T3P, T3O); + T3W = VFNMS(LDK(KP923879532), T3P, T3O); + T4j = VCONJ(VMUL(LDK(KP500000000), VADD(T4h, T4g))); + T4i = VMUL(LDK(KP500000000), VSUB(T4g, T4h)); + T4e = VMUL(LDK(KP500000000), VFMAI(T4d, T46)); + T4f = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T4d, T46))); + T3N = VMUL(LDK(KP500000000), VFMAI(T3L, T3K)); + T3M = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T3L, T3K))); + T3I = VMUL(LDK(KP500000000), VFNMSI(T3H, T3y)); + T3J = VCONJ(VMUL(LDK(KP500000000), VFMAI(T3H, T3y))); + T4x = VCONJ(VMUL(LDK(KP500000000), VFMAI(T4v, T4u))); + T4w = VMUL(LDK(KP500000000), VFNMSI(T4v, T4u)); + T4s = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T4r, T4o))); + T4t = VMUL(LDK(KP500000000), VFMAI(T4r, T4o)); + ST(&(Rp[0]), T4i, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 15)]), T4j, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rm[WS(rs, 7)]), T4f, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 8)]), T4e, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 9)]), T3M, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 10)]), T3N, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 5)]), T3J, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 6)]), T3I, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 12)]), T4w, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 11)]), T4x, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 4)]), T4t, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 3)]), T4s, -ms, &(Rm[WS(rs, 1)])); + { + V T2A, T2W, T2L, T2Z, T2D, T2N, T2M, T2G, T3T, T3X, T16, T2p, T1v, T35, T31; + V T2I, T2S, T34, T2Y, T2P, T2T, T1Y, T2H, T30, T3Z, T3Y, T3U, T3V, T2O, T2X; + V T32, T33, T36, T37, T2U, T2V, T2Q, T2R, T1Z, T2q; + T2A = VFNMS(LDK(KP923879532), T2z, T2y); + T2W = VFMA(LDK(KP923879532), T2z, T2y); + T2L = VFNMS(LDK(KP923879532), T2K, T2J); + T2Z = VFMA(LDK(KP923879532), T2K, T2J); + T2D = VFMA(LDK(KP198912367), T2C, T2B); + T2N = VFNMS(LDK(KP198912367), T2B, T2C); + T2M = VFMA(LDK(KP198912367), T2E, T2F); + T2G = VFNMS(LDK(KP198912367), T2F, T2E); + T3T = VFMA(LDK(KP923879532), T3S, T3R); + T3X = VFNMS(LDK(KP923879532), T3S, T3R); + T16 = VFNMS(LDK(KP923879532), T15, Ts); + T2m = VFMA(LDK(KP923879532), T15, Ts); + T2H = VSUB(T2D, T2G); + T30 = VADD(T2D, T2G); + T2b = VFNMS(LDK(KP923879532), T2a, T27); + T2p = VFMA(LDK(KP923879532), T2a, T27); + T1v = VFMA(LDK(KP668178637), T1u, T1n); + T2c = VFNMS(LDK(KP668178637), T1n, T1u); + T3Z = VCONJ(VMUL(LDK(KP500000000), VFMAI(T3X, T3W))); + T3Y = VMUL(LDK(KP500000000), VFNMSI(T3X, T3W)); + T3U = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T3T, T3Q))); + T3V = VMUL(LDK(KP500000000), VFMAI(T3T, T3Q)); + T2O = VSUB(T2M, T2N); + T2X = VADD(T2N, T2M); + T35 = VFNMS(LDK(KP980785280), T30, T2Z); + T31 = VFMA(LDK(KP980785280), T30, T2Z); + T2I = VFMA(LDK(KP980785280), T2H, T2A); + T2S = VFNMS(LDK(KP980785280), T2H, T2A); + ST(&(Rp[WS(rs, 14)]), T3Y, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 13)]), T3Z, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 2)]), T3V, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 1)]), T3U, -ms, &(Rm[WS(rs, 1)])); + T34 = VFNMS(LDK(KP980785280), T2X, T2W); + T2Y = VFMA(LDK(KP980785280), T2X, T2W); + T2P = VFMA(LDK(KP980785280), T2O, T2L); + T2T = VFNMS(LDK(KP980785280), T2O, T2L); + T2d = VFMA(LDK(KP668178637), T1Q, T1X); + T1Y = VFNMS(LDK(KP668178637), T1X, T1Q); + T32 = VMUL(LDK(KP500000000), VFNMSI(T31, T2Y)); + T33 = VCONJ(VMUL(LDK(KP500000000), VFMAI(T31, T2Y))); + T36 = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T35, T34))); + T37 = VMUL(LDK(KP500000000), VFMAI(T35, T34)); + T2U = VMUL(LDK(KP500000000), VFNMSI(T2T, T2S)); + T2V = VCONJ(VMUL(LDK(KP500000000), VFMAI(T2T, T2S))); + T2Q = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T2P, T2I))); + T2R = VMUL(LDK(KP500000000), VFMAI(T2P, T2I)); + T1Z = VSUB(T1v, T1Y); + T2q = VADD(T1Y, T1v); + ST(&(Rm[0]), T33, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 1)]), T32, ms, &(Rp[WS(rs, 1)])); + ST(&(Rp[WS(rs, 15)]), T37, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 14)]), T36, -ms, &(Rm[0])); + ST(&(Rm[WS(rs, 8)]), T2V, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 9)]), T2U, ms, &(Rp[WS(rs, 1)])); + ST(&(Rp[WS(rs, 7)]), T2R, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 6)]), T2Q, -ms, &(Rm[0])); + T2v = VFNMS(LDK(KP831469612), T2q, T2p); + T2r = VFMA(LDK(KP831469612), T2q, T2p); + T20 = VFMA(LDK(KP831469612), T1Z, T16); + T2i = VFNMS(LDK(KP831469612), T1Z, T16); + } + } + } + T2n = VADD(T2d, T2c); + T2e = VSUB(T2c, T2d); + T2o = VFMA(LDK(KP831469612), T2n, T2m); + T2u = VFNMS(LDK(KP831469612), T2n, T2m); + T2j = VFMA(LDK(KP831469612), T2e, T2b); + T2f = VFNMS(LDK(KP831469612), T2e, T2b); + T2t = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T2r, T2o))); + T2s = VMUL(LDK(KP500000000), VFMAI(T2r, T2o)); + T2x = VCONJ(VMUL(LDK(KP500000000), VFMAI(T2v, T2u))); + T2w = VMUL(LDK(KP500000000), VFNMSI(T2v, T2u)); + T2l = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T2j, T2i))); + T2k = VMUL(LDK(KP500000000), VFMAI(T2j, T2i)); + T2h = VCONJ(VMUL(LDK(KP500000000), VFMAI(T2f, T20))); + T2g = VMUL(LDK(KP500000000), VFNMSI(T2f, T20)); + ST(&(Rm[WS(rs, 2)]), T2t, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 3)]), T2s, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 12)]), T2x, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 13)]), T2w, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 10)]), T2l, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 11)]), T2k, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 4)]), T2h, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 5)]), T2g, ms, &(Rp[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + VTW(1, 10), + VTW(1, 11), + VTW(1, 12), + VTW(1, 13), + VTW(1, 14), + VTW(1, 15), + VTW(1, 16), + VTW(1, 17), + VTW(1, 18), + VTW(1, 19), + VTW(1, 20), + VTW(1, 21), + VTW(1, 22), + VTW(1, 23), + VTW(1, 24), + VTW(1, 25), + VTW(1, 26), + VTW(1, 27), + VTW(1, 28), + VTW(1, 29), + VTW(1, 30), + VTW(1, 31), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 32, XSIMD_STRING("hc2cfdftv_32"), twinstr, &GENUS, {119, 94, 130, 0} }; + +void XSIMD(codelet_hc2cfdftv_32) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_32, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 32 -dit -name hc2cfdftv_32 -include hc2cfv.h */ + +/* + * This function contains 249 FP additions, 133 FP multiplications, + * (or, 233 additions, 117 multiplications, 16 fused multiply/add), + * 130 stack variables, 9 constants, and 64 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP555570233, +0.555570233019602224742830813948532874374937191); + DVK(KP831469612, +0.831469612302545237078788377617905756738560812); + DVK(KP195090322, +0.195090322016128267848284868477022240927691618); + DVK(KP980785280, +0.980785280403230449126182236134239036973933731); + DVK(KP382683432, +0.382683432365089771728459984030398866761344562); + DVK(KP923879532, +0.923879532511286756128183189396788286822416626); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + DVK(KP353553390, +0.353553390593273762200422181052424519642417969); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 62)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 62), MAKE_VOLATILE_STRIDE(128, rs)) { + V Ta, T2m, Tx, T2h, T3R, T4h, T3q, T4g, T3B, T4n, T3E, T4o, T1B, T2S, T1O; + V T2R, TV, T2p, T1i, T2o, T3L, T4q, T3I, T4r, T3w, T4k, T3t, T4j, T26, T2V; + V T2d, T2U; + { + V T4, T1m, T1H, T2j, T1M, T2l, T9, T1o, Tf, T1r, Tq, T1w, Tv, T1y, Tk; + V T1t, Tl, Tw, T3P, T3Q, T3o, T3p, T3z, T3A, T3C, T3D, T1p, T1N, T1A, T1C; + V T1u, T1z; + { + V T1, T3, T2, T1l, T1G, T1F, T1E, T1D, T2i, T1L, T1K, T1J, T1I, T2k, T6; + V T8, T7, T5, T1n, Tc, Te, Td, Tb, T1q, Tn, Tp, To, Tm, T1v, Ts; + V Tu, Tt, Tr, T1x, Th, Tj, Ti, Tg, T1s; + T1 = LD(&(Rp[0]), ms, &(Rp[0])); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + T3 = VCONJ(T2); + T4 = VADD(T1, T3); + T1l = LDW(&(W[0])); + T1m = VZMULIJ(T1l, VSUB(T3, T1)); + T1G = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); + T1E = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); + T1F = VCONJ(T1E); + T1D = LDW(&(W[TWVL * 16])); + T1H = VZMULIJ(T1D, VSUB(T1F, T1G)); + T2i = LDW(&(W[TWVL * 14])); + T2j = VZMULJ(T2i, VADD(T1G, T1F)); + T1L = LD(&(Rp[WS(rs, 12)]), ms, &(Rp[0])); + T1J = LD(&(Rm[WS(rs, 12)]), -ms, &(Rm[0])); + T1K = VCONJ(T1J); + T1I = LDW(&(W[TWVL * 48])); + T1M = VZMULIJ(T1I, VSUB(T1K, T1L)); + T2k = LDW(&(W[TWVL * 46])); + T2l = VZMULJ(T2k, VADD(T1L, T1K)); + T6 = LD(&(Rp[WS(rs, 8)]), ms, &(Rp[0])); + T7 = LD(&(Rm[WS(rs, 8)]), -ms, &(Rm[0])); + T8 = VCONJ(T7); + T5 = LDW(&(W[TWVL * 30])); + T9 = VZMULJ(T5, VADD(T6, T8)); + T1n = LDW(&(W[TWVL * 32])); + T1o = VZMULIJ(T1n, VSUB(T8, T6)); + Tc = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + Td = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + Te = VCONJ(Td); + Tb = LDW(&(W[TWVL * 6])); + Tf = VZMULJ(Tb, VADD(Tc, Te)); + T1q = LDW(&(W[TWVL * 8])); + T1r = VZMULIJ(T1q, VSUB(Te, Tc)); + Tn = LD(&(Rp[WS(rs, 14)]), ms, &(Rp[0])); + To = LD(&(Rm[WS(rs, 14)]), -ms, &(Rm[0])); + Tp = VCONJ(To); + Tm = LDW(&(W[TWVL * 54])); + Tq = VZMULJ(Tm, VADD(Tn, Tp)); + T1v = LDW(&(W[TWVL * 56])); + T1w = VZMULIJ(T1v, VSUB(Tp, Tn)); + Ts = LD(&(Rp[WS(rs, 6)]), ms, &(Rp[0])); + Tt = LD(&(Rm[WS(rs, 6)]), -ms, &(Rm[0])); + Tu = VCONJ(Tt); + Tr = LDW(&(W[TWVL * 22])); + Tv = VZMULJ(Tr, VADD(Ts, Tu)); + T1x = LDW(&(W[TWVL * 24])); + T1y = VZMULIJ(T1x, VSUB(Tu, Ts)); + Th = LD(&(Rp[WS(rs, 10)]), ms, &(Rp[0])); + Ti = LD(&(Rm[WS(rs, 10)]), -ms, &(Rm[0])); + Tj = VCONJ(Ti); + Tg = LDW(&(W[TWVL * 38])); + Tk = VZMULJ(Tg, VADD(Th, Tj)); + T1s = LDW(&(W[TWVL * 40])); + T1t = VZMULIJ(T1s, VSUB(Tj, Th)); + } + Ta = VMUL(LDK(KP500000000), VSUB(T4, T9)); + T2m = VSUB(T2j, T2l); + Tl = VSUB(Tf, Tk); + Tw = VSUB(Tq, Tv); + Tx = VMUL(LDK(KP353553390), VADD(Tl, Tw)); + T2h = VMUL(LDK(KP707106781), VSUB(Tw, Tl)); + T3P = VADD(Tq, Tv); + T3Q = VADD(Tf, Tk); + T3R = VSUB(T3P, T3Q); + T4h = VADD(T3Q, T3P); + T3o = VADD(T4, T9); + T3p = VADD(T2j, T2l); + T3q = VMUL(LDK(KP500000000), VSUB(T3o, T3p)); + T4g = VADD(T3o, T3p); + T3z = VADD(T1m, T1o); + T3A = VADD(T1H, T1M); + T3B = VSUB(T3z, T3A); + T4n = VADD(T3z, T3A); + T3C = VADD(T1w, T1y); + T3D = VADD(T1r, T1t); + T3E = VSUB(T3C, T3D); + T4o = VADD(T3D, T3C); + T1p = VSUB(T1m, T1o); + T1N = VSUB(T1H, T1M); + T1u = VSUB(T1r, T1t); + T1z = VSUB(T1w, T1y); + T1A = VMUL(LDK(KP707106781), VADD(T1u, T1z)); + T1C = VMUL(LDK(KP707106781), VSUB(T1z, T1u)); + T1B = VADD(T1p, T1A); + T2S = VADD(T1N, T1C); + T1O = VSUB(T1C, T1N); + T2R = VSUB(T1p, T1A); + } + { + V TD, T1R, T1b, T29, T1g, T2b, TI, T1T, TO, T1Y, T10, T22, T15, T24, TT; + V T1W, TJ, TU, T16, T1h, T3J, T3K, T3G, T3H, T3u, T3v, T3r, T3s, T25, T2c; + V T20, T27, T1U, T1Z; + { + V TA, TC, TB, Tz, T1Q, T18, T1a, T19, T17, T28, T1d, T1f, T1e, T1c, T2a; + V TF, TH, TG, TE, T1S, TL, TN, TM, TK, T1X, TX, TZ, TY, TW, T21; + V T12, T14, T13, T11, T23, TQ, TS, TR, TP, T1V; + TA = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + TB = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + TC = VCONJ(TB); + Tz = LDW(&(W[TWVL * 2])); + TD = VZMULJ(Tz, VADD(TA, TC)); + T1Q = LDW(&(W[TWVL * 4])); + T1R = VZMULIJ(T1Q, VSUB(TC, TA)); + T18 = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + T19 = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + T1a = VCONJ(T19); + T17 = LDW(&(W[TWVL * 10])); + T1b = VZMULJ(T17, VADD(T18, T1a)); + T28 = LDW(&(W[TWVL * 12])); + T29 = VZMULIJ(T28, VSUB(T1a, T18)); + T1d = LD(&(Rp[WS(rs, 11)]), ms, &(Rp[WS(rs, 1)])); + T1e = LD(&(Rm[WS(rs, 11)]), -ms, &(Rm[WS(rs, 1)])); + T1f = VCONJ(T1e); + T1c = LDW(&(W[TWVL * 42])); + T1g = VZMULJ(T1c, VADD(T1d, T1f)); + T2a = LDW(&(W[TWVL * 44])); + T2b = VZMULIJ(T2a, VSUB(T1f, T1d)); + TF = LD(&(Rp[WS(rs, 9)]), ms, &(Rp[WS(rs, 1)])); + TG = LD(&(Rm[WS(rs, 9)]), -ms, &(Rm[WS(rs, 1)])); + TH = VCONJ(TG); + TE = LDW(&(W[TWVL * 34])); + TI = VZMULJ(TE, VADD(TF, TH)); + T1S = LDW(&(W[TWVL * 36])); + T1T = VZMULIJ(T1S, VSUB(TH, TF)); + TL = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); + TM = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); + TN = VCONJ(TM); + TK = LDW(&(W[TWVL * 18])); + TO = VZMULJ(TK, VADD(TL, TN)); + T1X = LDW(&(W[TWVL * 20])); + T1Y = VZMULIJ(T1X, VSUB(TN, TL)); + TX = LD(&(Rp[WS(rs, 15)]), ms, &(Rp[WS(rs, 1)])); + TY = LD(&(Rm[WS(rs, 15)]), -ms, &(Rm[WS(rs, 1)])); + TZ = VCONJ(TY); + TW = LDW(&(W[TWVL * 58])); + T10 = VZMULJ(TW, VADD(TX, TZ)); + T21 = LDW(&(W[TWVL * 60])); + T22 = VZMULIJ(T21, VSUB(TZ, TX)); + T12 = LD(&(Rp[WS(rs, 7)]), ms, &(Rp[WS(rs, 1)])); + T13 = LD(&(Rm[WS(rs, 7)]), -ms, &(Rm[WS(rs, 1)])); + T14 = VCONJ(T13); + T11 = LDW(&(W[TWVL * 26])); + T15 = VZMULJ(T11, VADD(T12, T14)); + T23 = LDW(&(W[TWVL * 28])); + T24 = VZMULIJ(T23, VSUB(T14, T12)); + TQ = LD(&(Rp[WS(rs, 13)]), ms, &(Rp[WS(rs, 1)])); + TR = LD(&(Rm[WS(rs, 13)]), -ms, &(Rm[WS(rs, 1)])); + TS = VCONJ(TR); + TP = LDW(&(W[TWVL * 50])); + TT = VZMULJ(TP, VADD(TQ, TS)); + T1V = LDW(&(W[TWVL * 52])); + T1W = VZMULIJ(T1V, VSUB(TS, TQ)); + } + TJ = VSUB(TD, TI); + TU = VSUB(TO, TT); + TV = VFNMS(LDK(KP382683432), TU, VMUL(LDK(KP923879532), TJ)); + T2p = VFMA(LDK(KP382683432), TJ, VMUL(LDK(KP923879532), TU)); + T16 = VSUB(T10, T15); + T1h = VSUB(T1b, T1g); + T1i = VFMA(LDK(KP923879532), T16, VMUL(LDK(KP382683432), T1h)); + T2o = VFNMS(LDK(KP923879532), T1h, VMUL(LDK(KP382683432), T16)); + T3J = VADD(T1Y, T1W); + T3K = VADD(T1R, T1T); + T3L = VSUB(T3J, T3K); + T4q = VADD(T3K, T3J); + T3G = VADD(T22, T24); + T3H = VADD(T29, T2b); + T3I = VSUB(T3G, T3H); + T4r = VADD(T3G, T3H); + T3u = VADD(T10, T15); + T3v = VADD(T1b, T1g); + T3w = VSUB(T3u, T3v); + T4k = VADD(T3u, T3v); + T3r = VADD(TD, TI); + T3s = VADD(TO, TT); + T3t = VSUB(T3r, T3s); + T4j = VADD(T3r, T3s); + T25 = VSUB(T22, T24); + T2c = VSUB(T29, T2b); + T1U = VSUB(T1R, T1T); + T1Z = VSUB(T1W, T1Y); + T20 = VMUL(LDK(KP707106781), VADD(T1U, T1Z)); + T27 = VMUL(LDK(KP707106781), VSUB(T1Z, T1U)); + T26 = VADD(T20, T25); + T2V = VADD(T27, T2c); + T2d = VSUB(T27, T2c); + T2U = VSUB(T25, T20); + } + { + V T4m, T4w, T4t, T4x, T4i, T4l, T4p, T4s, T4u, T4z, T4v, T4y, T4E, T4L, T4H; + V T4K, T4A, T4F, T4D, T4G, T4B, T4C, T4I, T4N, T4J, T4M, T3O, T4c, T4d, T3X; + V T40, T46, T49, T41, T3y, T47, T3T, T45, T3N, T44, T3W, T48, T3x, T3S, T3F; + V T3M, T3U, T3V, T3Y, T4e, T4f, T3Z, T42, T4a, T4b, T43; + T4i = VADD(T4g, T4h); + T4l = VADD(T4j, T4k); + T4m = VADD(T4i, T4l); + T4w = VSUB(T4i, T4l); + T4p = VADD(T4n, T4o); + T4s = VADD(T4q, T4r); + T4t = VADD(T4p, T4s); + T4x = VBYI(VSUB(T4s, T4p)); + T4u = VCONJ(VMUL(LDK(KP500000000), VSUB(T4m, T4t))); + ST(&(Rm[WS(rs, 15)]), T4u, -ms, &(Rm[WS(rs, 1)])); + T4z = VMUL(LDK(KP500000000), VADD(T4w, T4x)); + ST(&(Rp[WS(rs, 8)]), T4z, ms, &(Rp[0])); + T4v = VMUL(LDK(KP500000000), VADD(T4m, T4t)); + ST(&(Rp[0]), T4v, ms, &(Rp[0])); + T4y = VCONJ(VMUL(LDK(KP500000000), VSUB(T4w, T4x))); + ST(&(Rm[WS(rs, 7)]), T4y, -ms, &(Rm[WS(rs, 1)])); + T4A = VMUL(LDK(KP500000000), VSUB(T4g, T4h)); + T4F = VSUB(T4k, T4j); + T4B = VSUB(T4n, T4o); + T4C = VSUB(T4r, T4q); + T4D = VMUL(LDK(KP353553390), VADD(T4B, T4C)); + T4G = VMUL(LDK(KP707106781), VSUB(T4C, T4B)); + T4E = VADD(T4A, T4D); + T4L = VMUL(LDK(KP500000000), VBYI(VSUB(T4G, T4F))); + T4H = VMUL(LDK(KP500000000), VBYI(VADD(T4F, T4G))); + T4K = VSUB(T4A, T4D); + T4I = VCONJ(VSUB(T4E, T4H)); + ST(&(Rm[WS(rs, 3)]), T4I, -ms, &(Rm[WS(rs, 1)])); + T4N = VADD(T4K, T4L); + ST(&(Rp[WS(rs, 12)]), T4N, ms, &(Rp[0])); + T4J = VADD(T4E, T4H); + ST(&(Rp[WS(rs, 4)]), T4J, ms, &(Rp[0])); + T4M = VCONJ(VSUB(T4K, T4L)); + ST(&(Rm[WS(rs, 11)]), T4M, -ms, &(Rm[WS(rs, 1)])); + T3x = VMUL(LDK(KP353553390), VADD(T3t, T3w)); + T3y = VADD(T3q, T3x); + T47 = VSUB(T3q, T3x); + T3S = VMUL(LDK(KP707106781), VSUB(T3w, T3t)); + T3T = VADD(T3R, T3S); + T45 = VSUB(T3S, T3R); + T3F = VFMA(LDK(KP923879532), T3B, VMUL(LDK(KP382683432), T3E)); + T3M = VFNMS(LDK(KP382683432), T3L, VMUL(LDK(KP923879532), T3I)); + T3N = VMUL(LDK(KP500000000), VADD(T3F, T3M)); + T44 = VSUB(T3M, T3F); + T3U = VFNMS(LDK(KP382683432), T3B, VMUL(LDK(KP923879532), T3E)); + T3V = VFMA(LDK(KP923879532), T3L, VMUL(LDK(KP382683432), T3I)); + T3W = VADD(T3U, T3V); + T48 = VMUL(LDK(KP500000000), VSUB(T3V, T3U)); + T3O = VADD(T3y, T3N); + T4c = VMUL(LDK(KP500000000), VBYI(VADD(T45, T44))); + T4d = VADD(T47, T48); + T3X = VMUL(LDK(KP500000000), VBYI(VADD(T3T, T3W))); + T40 = VSUB(T3y, T3N); + T46 = VMUL(LDK(KP500000000), VBYI(VSUB(T44, T45))); + T49 = VSUB(T47, T48); + T41 = VMUL(LDK(KP500000000), VBYI(VSUB(T3W, T3T))); + T3Y = VCONJ(VSUB(T3O, T3X)); + ST(&(Rm[WS(rs, 1)]), T3Y, -ms, &(Rm[WS(rs, 1)])); + T4e = VADD(T4c, T4d); + ST(&(Rp[WS(rs, 6)]), T4e, ms, &(Rp[0])); + T4f = VCONJ(VSUB(T4d, T4c)); + ST(&(Rm[WS(rs, 5)]), T4f, -ms, &(Rm[WS(rs, 1)])); + T3Z = VADD(T3O, T3X); + ST(&(Rp[WS(rs, 2)]), T3Z, ms, &(Rp[0])); + T42 = VCONJ(VSUB(T40, T41)); + ST(&(Rm[WS(rs, 13)]), T42, -ms, &(Rm[WS(rs, 1)])); + T4a = VADD(T46, T49); + ST(&(Rp[WS(rs, 10)]), T4a, ms, &(Rp[0])); + T4b = VCONJ(VSUB(T49, T46)); + ST(&(Rm[WS(rs, 9)]), T4b, -ms, &(Rm[WS(rs, 1)])); + T43 = VADD(T40, T41); + ST(&(Rp[WS(rs, 14)]), T43, ms, &(Rp[0])); + { + V T2g, T2K, T2L, T2v, T2y, T2E, T2H, T2z, T1k, T2F, T2u, T2G, T2f, T2C, T2r; + V T2D, Ty, T1j, T2s, T2t, T1P, T2e, T2n, T2q, T2w, T2M, T2N, T2x, T2A, T2I; + V T2J, T2B; + Ty = VADD(Ta, Tx); + T1j = VMUL(LDK(KP500000000), VADD(TV, T1i)); + T1k = VADD(Ty, T1j); + T2F = VSUB(Ty, T1j); + T2s = VFNMS(LDK(KP195090322), T1B, VMUL(LDK(KP980785280), T1O)); + T2t = VFMA(LDK(KP195090322), T26, VMUL(LDK(KP980785280), T2d)); + T2u = VADD(T2s, T2t); + T2G = VMUL(LDK(KP500000000), VSUB(T2t, T2s)); + T1P = VFMA(LDK(KP980785280), T1B, VMUL(LDK(KP195090322), T1O)); + T2e = VFNMS(LDK(KP195090322), T2d, VMUL(LDK(KP980785280), T26)); + T2f = VMUL(LDK(KP500000000), VADD(T1P, T2e)); + T2C = VSUB(T2e, T1P); + T2n = VSUB(T2h, T2m); + T2q = VSUB(T2o, T2p); + T2r = VADD(T2n, T2q); + T2D = VSUB(T2q, T2n); + T2g = VADD(T1k, T2f); + T2K = VMUL(LDK(KP500000000), VBYI(VADD(T2D, T2C))); + T2L = VADD(T2F, T2G); + T2v = VMUL(LDK(KP500000000), VBYI(VADD(T2r, T2u))); + T2y = VSUB(T1k, T2f); + T2E = VMUL(LDK(KP500000000), VBYI(VSUB(T2C, T2D))); + T2H = VSUB(T2F, T2G); + T2z = VMUL(LDK(KP500000000), VBYI(VSUB(T2u, T2r))); + T2w = VCONJ(VSUB(T2g, T2v)); + ST(&(Rm[0]), T2w, -ms, &(Rm[0])); + T2M = VADD(T2K, T2L); + ST(&(Rp[WS(rs, 7)]), T2M, ms, &(Rp[WS(rs, 1)])); + T2N = VCONJ(VSUB(T2L, T2K)); + ST(&(Rm[WS(rs, 6)]), T2N, -ms, &(Rm[0])); + T2x = VADD(T2g, T2v); + ST(&(Rp[WS(rs, 1)]), T2x, ms, &(Rp[WS(rs, 1)])); + T2A = VCONJ(VSUB(T2y, T2z)); + ST(&(Rm[WS(rs, 14)]), T2A, -ms, &(Rm[0])); + T2I = VADD(T2E, T2H); + ST(&(Rp[WS(rs, 9)]), T2I, ms, &(Rp[WS(rs, 1)])); + T2J = VCONJ(VSUB(T2H, T2E)); + ST(&(Rm[WS(rs, 8)]), T2J, -ms, &(Rm[0])); + T2B = VADD(T2y, T2z); + ST(&(Rp[WS(rs, 15)]), T2B, ms, &(Rp[WS(rs, 1)])); + } + { + V T2Y, T3k, T3l, T35, T38, T3e, T3h, T39, T2Q, T3f, T34, T3g, T2X, T3c, T31; + V T3d, T2O, T2P, T32, T33, T2T, T2W, T2Z, T30, T36, T3m, T3n, T37, T3a, T3i; + V T3j, T3b; + T2O = VSUB(Ta, Tx); + T2P = VMUL(LDK(KP500000000), VADD(T2p, T2o)); + T2Q = VADD(T2O, T2P); + T3f = VSUB(T2O, T2P); + T32 = VFNMS(LDK(KP555570233), T2R, VMUL(LDK(KP831469612), T2S)); + T33 = VFMA(LDK(KP555570233), T2U, VMUL(LDK(KP831469612), T2V)); + T34 = VADD(T32, T33); + T3g = VMUL(LDK(KP500000000), VSUB(T33, T32)); + T2T = VFMA(LDK(KP831469612), T2R, VMUL(LDK(KP555570233), T2S)); + T2W = VFNMS(LDK(KP555570233), T2V, VMUL(LDK(KP831469612), T2U)); + T2X = VMUL(LDK(KP500000000), VADD(T2T, T2W)); + T3c = VSUB(T2W, T2T); + T2Z = VADD(T2m, T2h); + T30 = VSUB(T1i, TV); + T31 = VADD(T2Z, T30); + T3d = VSUB(T30, T2Z); + T2Y = VADD(T2Q, T2X); + T3k = VMUL(LDK(KP500000000), VBYI(VADD(T3d, T3c))); + T3l = VADD(T3f, T3g); + T35 = VMUL(LDK(KP500000000), VBYI(VADD(T31, T34))); + T38 = VSUB(T2Q, T2X); + T3e = VMUL(LDK(KP500000000), VBYI(VSUB(T3c, T3d))); + T3h = VSUB(T3f, T3g); + T39 = VMUL(LDK(KP500000000), VBYI(VSUB(T34, T31))); + T36 = VCONJ(VSUB(T2Y, T35)); + ST(&(Rm[WS(rs, 2)]), T36, -ms, &(Rm[0])); + T3m = VADD(T3k, T3l); + ST(&(Rp[WS(rs, 5)]), T3m, ms, &(Rp[WS(rs, 1)])); + T3n = VCONJ(VSUB(T3l, T3k)); + ST(&(Rm[WS(rs, 4)]), T3n, -ms, &(Rm[0])); + T37 = VADD(T2Y, T35); + ST(&(Rp[WS(rs, 3)]), T37, ms, &(Rp[WS(rs, 1)])); + T3a = VCONJ(VSUB(T38, T39)); + ST(&(Rm[WS(rs, 12)]), T3a, -ms, &(Rm[0])); + T3i = VADD(T3e, T3h); + ST(&(Rp[WS(rs, 11)]), T3i, ms, &(Rp[WS(rs, 1)])); + T3j = VCONJ(VSUB(T3h, T3e)); + ST(&(Rm[WS(rs, 10)]), T3j, -ms, &(Rm[0])); + T3b = VADD(T38, T39); + ST(&(Rp[WS(rs, 13)]), T3b, ms, &(Rp[WS(rs, 1)])); + } + } + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + VTW(1, 8), + VTW(1, 9), + VTW(1, 10), + VTW(1, 11), + VTW(1, 12), + VTW(1, 13), + VTW(1, 14), + VTW(1, 15), + VTW(1, 16), + VTW(1, 17), + VTW(1, 18), + VTW(1, 19), + VTW(1, 20), + VTW(1, 21), + VTW(1, 22), + VTW(1, 23), + VTW(1, 24), + VTW(1, 25), + VTW(1, 26), + VTW(1, 27), + VTW(1, 28), + VTW(1, 29), + VTW(1, 30), + VTW(1, 31), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 32, XSIMD_STRING("hc2cfdftv_32"), twinstr, &GENUS, {233, 117, 16, 0} }; + +void XSIMD(codelet_hc2cfdftv_32) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_32, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 4 -dit -name hc2cfdftv_4 -include hc2cfv.h */ + +/* + * This function contains 15 FP additions, 16 FP multiplications, + * (or, 9 additions, 10 multiplications, 6 fused multiply/add), + * 21 stack variables, 1 constants, and 8 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 6)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(16, rs)) { + V T1, T2, Tb, T5, T6, T4, T9, T3, Tc, T7, Ta, Tg, T8, Td, Th; + V Tf, Te, Ti, Tj; + T1 = LD(&(Rp[0]), ms, &(Rp[0])); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + Tb = LDW(&(W[0])); + T5 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + T6 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + T4 = LDW(&(W[TWVL * 2])); + T9 = LDW(&(W[TWVL * 4])); + T3 = VFMACONJ(T2, T1); + Tc = VZMULIJ(Tb, VFNMSCONJ(T2, T1)); + T7 = VZMULJ(T4, VFMACONJ(T6, T5)); + Ta = VZMULIJ(T9, VFNMSCONJ(T6, T5)); + Tg = VADD(T3, T7); + T8 = VSUB(T3, T7); + Td = VSUB(Ta, Tc); + Th = VADD(Tc, Ta); + Tf = VCONJ(VMUL(LDK(KP500000000), VFMAI(Td, T8))); + Te = VMUL(LDK(KP500000000), VFNMSI(Td, T8)); + Ti = VMUL(LDK(KP500000000), VSUB(Tg, Th)); + Tj = VCONJ(VMUL(LDK(KP500000000), VADD(Th, Tg))); + ST(&(Rm[0]), Tf, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 1)]), Te, ms, &(Rp[WS(rs, 1)])); + ST(&(Rp[0]), Ti, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 1)]), Tj, -ms, &(Rm[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 4, XSIMD_STRING("hc2cfdftv_4"), twinstr, &GENUS, {9, 10, 6, 0} }; + +void XSIMD(codelet_hc2cfdftv_4) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_4, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 4 -dit -name hc2cfdftv_4 -include hc2cfv.h */ + +/* + * This function contains 15 FP additions, 10 FP multiplications, + * (or, 15 additions, 10 multiplications, 0 fused multiply/add), + * 23 stack variables, 1 constants, and 8 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 6)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(16, rs)) { + V T4, Tc, T9, Te, T1, T3, T2, Tb, T6, T8, T7, T5, Td, Tg, Th; + V Ta, Tf, Tk, Tl, Ti, Tj; + T1 = LD(&(Rp[0]), ms, &(Rp[0])); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + T3 = VCONJ(T2); + T4 = VADD(T1, T3); + Tb = LDW(&(W[0])); + Tc = VZMULIJ(Tb, VSUB(T3, T1)); + T6 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + T7 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + T8 = VCONJ(T7); + T5 = LDW(&(W[TWVL * 2])); + T9 = VZMULJ(T5, VADD(T6, T8)); + Td = LDW(&(W[TWVL * 4])); + Te = VZMULIJ(Td, VSUB(T8, T6)); + Ta = VSUB(T4, T9); + Tf = VBYI(VSUB(Tc, Te)); + Tg = VMUL(LDK(KP500000000), VSUB(Ta, Tf)); + Th = VCONJ(VMUL(LDK(KP500000000), VADD(Ta, Tf))); + ST(&(Rp[WS(rs, 1)]), Tg, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[0]), Th, -ms, &(Rm[0])); + Ti = VADD(T4, T9); + Tj = VADD(Tc, Te); + Tk = VCONJ(VMUL(LDK(KP500000000), VSUB(Ti, Tj))); + Tl = VMUL(LDK(KP500000000), VADD(Ti, Tj)); + ST(&(Rm[WS(rs, 1)]), Tk, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[0]), Tl, ms, &(Rp[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 4, XSIMD_STRING("hc2cfdftv_4"), twinstr, &GENUS, {15, 10, 0, 0} }; + +void XSIMD(codelet_hc2cfdftv_4) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_4, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 6 -dit -name hc2cfdftv_6 -include hc2cfv.h */ + +/* + * This function contains 29 FP additions, 30 FP multiplications, + * (or, 17 additions, 18 multiplications, 12 fused multiply/add), + * 38 stack variables, 2 constants, and 12 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 10)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(24, rs)) { + V T5, T6, T3, Tj, T4, T9, Te, Th, T1, T2, Ti, Tc, Td, Tb, Tg; + V T7, Ta, Tt, Tk, Tr, T8, Ts, Tf, Tx, Tu, To, Tl, Tw, Tv, Tn; + V Tm, Tz, Ty, Tp, Tq; + T1 = LD(&(Rp[0]), ms, &(Rp[0])); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + Ti = LDW(&(W[0])); + Tc = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + Td = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + Tb = LDW(&(W[TWVL * 8])); + Tg = LDW(&(W[TWVL * 6])); + T5 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + T6 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + T3 = VFMACONJ(T2, T1); + Tj = VZMULIJ(Ti, VFNMSCONJ(T2, T1)); + T4 = LDW(&(W[TWVL * 4])); + T9 = LDW(&(W[TWVL * 2])); + Te = VZMULIJ(Tb, VFNMSCONJ(Td, Tc)); + Th = VZMULJ(Tg, VFMACONJ(Td, Tc)); + T7 = VZMULIJ(T4, VFNMSCONJ(T6, T5)); + Ta = VZMULJ(T9, VFMACONJ(T6, T5)); + Tt = VADD(Tj, Th); + Tk = VSUB(Th, Tj); + Tr = VADD(T3, T7); + T8 = VSUB(T3, T7); + Ts = VADD(Ta, Te); + Tf = VSUB(Ta, Te); + Tx = VMUL(LDK(KP866025403), VSUB(Tt, Ts)); + Tu = VADD(Ts, Tt); + To = VMUL(LDK(KP866025403), VSUB(Tk, Tf)); + Tl = VADD(Tf, Tk); + Tw = VFNMS(LDK(KP500000000), Tu, Tr); + Tv = VCONJ(VMUL(LDK(KP500000000), VADD(Tr, Tu))); + Tn = VFNMS(LDK(KP500000000), Tl, T8); + Tm = VMUL(LDK(KP500000000), VADD(T8, Tl)); + Tz = VMUL(LDK(KP500000000), VFMAI(Tx, Tw)); + Ty = VCONJ(VMUL(LDK(KP500000000), VFNMSI(Tx, Tw))); + ST(&(Rm[WS(rs, 2)]), Tv, -ms, &(Rm[0])); + Tp = VMUL(LDK(KP500000000), VFNMSI(To, Tn)); + Tq = VCONJ(VMUL(LDK(KP500000000), VFMAI(To, Tn))); + ST(&(Rp[0]), Tm, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 1)]), Tz, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[0]), Ty, -ms, &(Rm[0])); + ST(&(Rm[WS(rs, 1)]), Tq, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 2)]), Tp, ms, &(Rp[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 6, XSIMD_STRING("hc2cfdftv_6"), twinstr, &GENUS, {17, 18, 12, 0} }; + +void XSIMD(codelet_hc2cfdftv_6) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_6, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 6 -dit -name hc2cfdftv_6 -include hc2cfv.h */ + +/* + * This function contains 29 FP additions, 20 FP multiplications, + * (or, 27 additions, 18 multiplications, 2 fused multiply/add), + * 42 stack variables, 3 constants, and 12 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP250000000, +0.250000000000000000000000000000000000000000000); + DVK(KP866025403, +0.866025403784438646763723170752936183471402627); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 10)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(24, rs)) { + V Ta, Tu, Tn, Tw, Ti, Tv, T1, T8, Tg, Tf, T7, T3, Te, T6, T2; + V T4, T9, T5, Tk, Tm, Tj, Tl, Tc, Th, Tb, Td, Tr, Tp, Tq, To; + V Tt, Ts, TA, Ty, Tz, Tx, TC, TB; + T1 = LD(&(Rp[0]), ms, &(Rp[0])); + T8 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + Tg = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + Te = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + Tf = VCONJ(Te); + T6 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + T7 = VCONJ(T6); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + T3 = VCONJ(T2); + T4 = VADD(T1, T3); + T5 = LDW(&(W[TWVL * 4])); + T9 = VZMULIJ(T5, VSUB(T7, T8)); + Ta = VADD(T4, T9); + Tu = VSUB(T4, T9); + Tj = LDW(&(W[0])); + Tk = VZMULIJ(Tj, VSUB(T3, T1)); + Tl = LDW(&(W[TWVL * 6])); + Tm = VZMULJ(Tl, VADD(Tf, Tg)); + Tn = VADD(Tk, Tm); + Tw = VSUB(Tm, Tk); + Tb = LDW(&(W[TWVL * 2])); + Tc = VZMULJ(Tb, VADD(T7, T8)); + Td = LDW(&(W[TWVL * 8])); + Th = VZMULIJ(Td, VSUB(Tf, Tg)); + Ti = VADD(Tc, Th); + Tv = VSUB(Tc, Th); + Tr = VMUL(LDK(KP500000000), VBYI(VMUL(LDK(KP866025403), VSUB(Tn, Ti)))); + To = VADD(Ti, Tn); + Tp = VMUL(LDK(KP500000000), VADD(Ta, To)); + Tq = VFNMS(LDK(KP250000000), To, VMUL(LDK(KP500000000), Ta)); + ST(&(Rp[0]), Tp, ms, &(Rp[0])); + Tt = VCONJ(VADD(Tq, Tr)); + ST(&(Rm[WS(rs, 1)]), Tt, -ms, &(Rm[WS(rs, 1)])); + Ts = VSUB(Tq, Tr); + ST(&(Rp[WS(rs, 2)]), Ts, ms, &(Rp[0])); + TA = VMUL(LDK(KP500000000), VBYI(VMUL(LDK(KP866025403), VSUB(Tw, Tv)))); + Tx = VADD(Tv, Tw); + Ty = VCONJ(VMUL(LDK(KP500000000), VADD(Tu, Tx))); + Tz = VFNMS(LDK(KP250000000), Tx, VMUL(LDK(KP500000000), Tu)); + ST(&(Rm[WS(rs, 2)]), Ty, -ms, &(Rm[0])); + TC = VADD(Tz, TA); + ST(&(Rp[WS(rs, 1)]), TC, ms, &(Rp[WS(rs, 1)])); + TB = VCONJ(VSUB(Tz, TA)); + ST(&(Rm[0]), TB, -ms, &(Rm[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 6, XSIMD_STRING("hc2cfdftv_6"), twinstr, &GENUS, {27, 18, 2, 0} }; + +void XSIMD(codelet_hc2cfdftv_6) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_6, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/common/hc2cfdftv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Tue Mar 4 13:51:49 EST 2014 */ + +#include "codelet-rdft.h" + +#ifdef HAVE_FMA + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 8 -dit -name hc2cfdftv_8 -include hc2cfv.h */ + +/* + * This function contains 41 FP additions, 40 FP multiplications, + * (or, 23 additions, 22 multiplications, 18 fused multiply/add), + * 52 stack variables, 2 constants, and 16 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 14)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(32, rs)) { + V T3, Tc, Tl, Ts, Tf, Tg, Te, Tp, T7, Ta, T1, T2, Tb, Tj, Tk; + V Ti, Tr, T5, T6, T4, T9, Th, Tq, TC, T8, Td, TF, Tm, TG, TD; + V Tt, Tu, Tn, TH, TL, TE, TK, Tz, Tv, Ty, To, TJ, TI, TN, TM; + V TB, TA, Tx, Tw; + T1 = LD(&(Rp[0]), ms, &(Rp[0])); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + Tb = LDW(&(W[0])); + Tj = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + Tk = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + Ti = LDW(&(W[TWVL * 12])); + Tr = LDW(&(W[TWVL * 10])); + T5 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + T6 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + T3 = VFMACONJ(T2, T1); + Tc = VZMULIJ(Tb, VFNMSCONJ(T2, T1)); + T4 = LDW(&(W[TWVL * 6])); + T9 = LDW(&(W[TWVL * 8])); + Tl = VZMULIJ(Ti, VFNMSCONJ(Tk, Tj)); + Ts = VZMULJ(Tr, VFMACONJ(Tk, Tj)); + Tf = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + Tg = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + Te = LDW(&(W[TWVL * 4])); + Tp = LDW(&(W[TWVL * 2])); + T7 = VZMULJ(T4, VFMACONJ(T6, T5)); + Ta = VZMULIJ(T9, VFNMSCONJ(T6, T5)); + Th = VZMULIJ(Te, VFNMSCONJ(Tg, Tf)); + Tq = VZMULJ(Tp, VFMACONJ(Tg, Tf)); + TC = VADD(T3, T7); + T8 = VSUB(T3, T7); + Td = VSUB(Ta, Tc); + TF = VADD(Tc, Ta); + Tm = VSUB(Th, Tl); + TG = VADD(Th, Tl); + TD = VADD(Tq, Ts); + Tt = VSUB(Tq, Ts); + Tu = VSUB(Tm, Td); + Tn = VADD(Td, Tm); + TH = VSUB(TF, TG); + TL = VADD(TF, TG); + TE = VSUB(TC, TD); + TK = VADD(TC, TD); + Tz = VFMA(LDK(KP707106781), Tu, Tt); + Tv = VFNMS(LDK(KP707106781), Tu, Tt); + Ty = VFNMS(LDK(KP707106781), Tn, T8); + To = VFMA(LDK(KP707106781), Tn, T8); + TJ = VCONJ(VMUL(LDK(KP500000000), VFNMSI(TH, TE))); + TI = VMUL(LDK(KP500000000), VFMAI(TH, TE)); + TN = VCONJ(VMUL(LDK(KP500000000), VADD(TL, TK))); + TM = VMUL(LDK(KP500000000), VSUB(TK, TL)); + TB = VMUL(LDK(KP500000000), VFMAI(Tz, Ty)); + TA = VCONJ(VMUL(LDK(KP500000000), VFNMSI(Tz, Ty))); + Tx = VCONJ(VMUL(LDK(KP500000000), VFMAI(Tv, To))); + Tw = VMUL(LDK(KP500000000), VFNMSI(Tv, To)); + ST(&(Rm[WS(rs, 1)]), TJ, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 2)]), TI, ms, &(Rp[0])); + ST(&(Rm[WS(rs, 3)]), TN, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[0]), TM, ms, &(Rp[0])); + ST(&(Rp[WS(rs, 3)]), TB, ms, &(Rp[WS(rs, 1)])); + ST(&(Rm[WS(rs, 2)]), TA, -ms, &(Rm[0])); + ST(&(Rm[0]), Tx, -ms, &(Rm[0])); + ST(&(Rp[WS(rs, 1)]), Tw, ms, &(Rp[WS(rs, 1)])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 8, XSIMD_STRING("hc2cfdftv_8"), twinstr, &GENUS, {23, 22, 18, 0} }; + +void XSIMD(codelet_hc2cfdftv_8) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_8, &desc, HC2C_VIA_DFT); +} +#else /* HAVE_FMA */ + +/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 8 -dit -name hc2cfdftv_8 -include hc2cfv.h */ + +/* + * This function contains 41 FP additions, 23 FP multiplications, + * (or, 41 additions, 23 multiplications, 0 fused multiply/add), + * 57 stack variables, 3 constants, and 16 memory accesses + */ +#include "hc2cfv.h" + +static void hc2cfdftv_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) +{ + DVK(KP707106781, +0.707106781186547524400844362104849039284835938); + DVK(KP353553390, +0.353553390593273762200422181052424519642417969); + DVK(KP500000000, +0.500000000000000000000000000000000000000000000); + { + INT m; + for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 14)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(32, rs)) { + V Ta, TE, Tr, TF, Tl, TK, Tw, TG, T1, T6, T3, T8, T2, T7, T4; + V T9, T5, To, Tq, Tn, Tp, Tc, Th, Te, Tj, Td, Ti, Tf, Tk, Tb; + V Tg, Tt, Tv, Ts, Tu, Ty, Tz, Tm, Tx, TC, TD, TA, TB, TI, TO; + V TL, TP, TH, TJ, TM, TR, TN, TQ; + T1 = LD(&(Rp[0]), ms, &(Rp[0])); + T6 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); + T2 = LD(&(Rm[0]), -ms, &(Rm[0])); + T3 = VCONJ(T2); + T7 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); + T8 = VCONJ(T7); + T4 = VADD(T1, T3); + T5 = LDW(&(W[TWVL * 6])); + T9 = VZMULJ(T5, VADD(T6, T8)); + Ta = VADD(T4, T9); + TE = VMUL(LDK(KP500000000), VSUB(T4, T9)); + Tn = LDW(&(W[0])); + To = VZMULIJ(Tn, VSUB(T3, T1)); + Tp = LDW(&(W[TWVL * 8])); + Tq = VZMULIJ(Tp, VSUB(T8, T6)); + Tr = VADD(To, Tq); + TF = VSUB(To, Tq); + Tc = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); + Th = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); + Td = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); + Te = VCONJ(Td); + Ti = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); + Tj = VCONJ(Ti); + Tb = LDW(&(W[TWVL * 2])); + Tf = VZMULJ(Tb, VADD(Tc, Te)); + Tg = LDW(&(W[TWVL * 10])); + Tk = VZMULJ(Tg, VADD(Th, Tj)); + Tl = VADD(Tf, Tk); + TK = VSUB(Tf, Tk); + Ts = LDW(&(W[TWVL * 4])); + Tt = VZMULIJ(Ts, VSUB(Te, Tc)); + Tu = LDW(&(W[TWVL * 12])); + Tv = VZMULIJ(Tu, VSUB(Tj, Th)); + Tw = VADD(Tt, Tv); + TG = VSUB(Tv, Tt); + Tm = VADD(Ta, Tl); + Tx = VADD(Tr, Tw); + Ty = VCONJ(VMUL(LDK(KP500000000), VSUB(Tm, Tx))); + Tz = VMUL(LDK(KP500000000), VADD(Tm, Tx)); + ST(&(Rm[WS(rs, 3)]), Ty, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[0]), Tz, ms, &(Rp[0])); + TA = VSUB(Ta, Tl); + TB = VBYI(VSUB(Tw, Tr)); + TC = VCONJ(VMUL(LDK(KP500000000), VSUB(TA, TB))); + TD = VMUL(LDK(KP500000000), VADD(TA, TB)); + ST(&(Rm[WS(rs, 1)]), TC, -ms, &(Rm[WS(rs, 1)])); + ST(&(Rp[WS(rs, 2)]), TD, ms, &(Rp[0])); + TH = VMUL(LDK(KP353553390), VADD(TF, TG)); + TI = VADD(TE, TH); + TO = VSUB(TE, TH); + TJ = VMUL(LDK(KP707106781), VSUB(TG, TF)); + TL = VMUL(LDK(KP500000000), VBYI(VSUB(TJ, TK))); + TP = VMUL(LDK(KP500000000), VBYI(VADD(TK, TJ))); + TM = VCONJ(VSUB(TI, TL)); + ST(&(Rm[0]), TM, -ms, &(Rm[0])); + TR = VADD(TO, TP); + ST(&(Rp[WS(rs, 3)]), TR, ms, &(Rp[WS(rs, 1)])); + TN = VADD(TI, TL); + ST(&(Rp[WS(rs, 1)]), TN, ms, &(Rp[WS(rs, 1)])); + TQ = VCONJ(VSUB(TO, TP)); + ST(&(Rm[WS(rs, 2)]), TQ, -ms, &(Rm[0])); + } + } + VLEAVE(); +} + +static const tw_instr twinstr[] = { + VTW(1, 1), + VTW(1, 2), + VTW(1, 3), + VTW(1, 4), + VTW(1, 5), + VTW(1, 6), + VTW(1, 7), + {TW_NEXT, VL, 0} +}; + +static const hc2c_desc desc = { 8, XSIMD_STRING("hc2cfdftv_8"), twinstr, &GENUS, {41, 23, 0, 0} }; + +void XSIMD(codelet_hc2cfdftv_8) (planner *p) { + X(khc2c_register) (p, hc2cfdftv_8, &desc, HC2C_VIA_DFT); +} +#endif /* HAVE_FMA */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/hc2cbv.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/hc2cbv.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#define VTW VTW3 +#define TWVL TWVL3 +#define LDW(x) LDA(x, 0, 0) + +#define GENUS XSIMD(rdft_hc2cbv_genus) +extern const hc2c_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/hc2cfv.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/hc2cfv.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include SIMD_HEADER + +#define VTW VTW3 +#define TWVL TWVL3 +#define LDW(x) LDA(x, 0, 0) + +#define GENUS XSIMD(rdft_hc2cfv_genus) +extern const hc2c_genus GENUS; diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,13 @@ +AM_CFLAGS = $(NEON_CFLAGS) +SIMD_HEADER=simd-neon.h + +include $(top_srcdir)/rdft/simd/codlist.mk +include $(top_srcdir)/rdft/simd/simd.mk + +if HAVE_NEON + +noinst_LTLIBRARIES = librdft_neon_codelets.la +BUILT_SOURCES = $(EXTRA_DIST) +librdft_neon_codelets_la_SOURCES = $(BUILT_SOURCES) + +endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,687 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# This file contains a standard list of RDFT SIMD codelets. It is +# included by common/Makefile to generate the C files with the actual +# codelets in them. It is included by {sse,sse2,...}/Makefile to +# generate and compile stub files that include common/*.c + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +DIST_COMMON = $(top_srcdir)/rdft/simd/codlist.mk \ + $(top_srcdir)/rdft/simd/simd.mk $(srcdir)/Makefile.in \ + $(srcdir)/Makefile.am $(top_srcdir)/depcomp +subdir = rdft/simd/neon +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +librdft_neon_codelets_la_LIBADD = +am__librdft_neon_codelets_la_SOURCES_DIST = hc2cfdftv_2.c \ + hc2cfdftv_4.c hc2cfdftv_6.c hc2cfdftv_8.c hc2cfdftv_10.c \ + hc2cfdftv_12.c hc2cfdftv_16.c hc2cfdftv_32.c hc2cfdftv_20.c \ + hc2cbdftv_2.c hc2cbdftv_4.c hc2cbdftv_6.c hc2cbdftv_8.c \ + hc2cbdftv_10.c hc2cbdftv_12.c hc2cbdftv_16.c hc2cbdftv_32.c \ + hc2cbdftv_20.c genus.c codlist.c +am__objects_1 = hc2cfdftv_2.lo hc2cfdftv_4.lo hc2cfdftv_6.lo \ + hc2cfdftv_8.lo hc2cfdftv_10.lo hc2cfdftv_12.lo hc2cfdftv_16.lo \ + hc2cfdftv_32.lo hc2cfdftv_20.lo +am__objects_2 = hc2cbdftv_2.lo hc2cbdftv_4.lo hc2cbdftv_6.lo \ + hc2cbdftv_8.lo hc2cbdftv_10.lo hc2cbdftv_12.lo hc2cbdftv_16.lo \ + hc2cbdftv_32.lo hc2cbdftv_20.lo +am__objects_3 = $(am__objects_1) $(am__objects_2) +am__objects_4 = $(am__objects_3) genus.lo codlist.lo +@HAVE_NEON_TRUE@am__objects_5 = $(am__objects_4) +@HAVE_NEON_TRUE@am_librdft_neon_codelets_la_OBJECTS = \ +@HAVE_NEON_TRUE@ $(am__objects_5) +librdft_neon_codelets_la_OBJECTS = \ + $(am_librdft_neon_codelets_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +@HAVE_NEON_TRUE@am_librdft_neon_codelets_la_rpath = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(librdft_neon_codelets_la_SOURCES) +DIST_SOURCES = $(am__librdft_neon_codelets_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(NEON_CFLAGS) +SIMD_HEADER = simd-neon.h +HC2CFDFTV = hc2cfdftv_2.c hc2cfdftv_4.c hc2cfdftv_6.c hc2cfdftv_8.c \ +hc2cfdftv_10.c hc2cfdftv_12.c hc2cfdftv_16.c hc2cfdftv_32.c \ +hc2cfdftv_20.c + +HC2CBDFTV = hc2cbdftv_2.c hc2cbdftv_4.c hc2cbdftv_6.c hc2cbdftv_8.c \ +hc2cbdftv_10.c hc2cbdftv_12.c hc2cbdftv_16.c hc2cbdftv_32.c \ +hc2cbdftv_20.c + + +########################################################################### +SIMD_CODELETS = $(HC2CFDFTV) $(HC2CBDFTV) +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/rdft \ +-I$(top_srcdir)/rdft/simd -I$(top_srcdir)/simd-support + +EXTRA_DIST = $(SIMD_CODELETS) genus.c codlist.c +@HAVE_NEON_TRUE@noinst_LTLIBRARIES = librdft_neon_codelets.la +@HAVE_NEON_TRUE@BUILT_SOURCES = $(EXTRA_DIST) +@HAVE_NEON_TRUE@librdft_neon_codelets_la_SOURCES = $(BUILT_SOURCES) +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/rdft/simd/codlist.mk $(top_srcdir)/rdft/simd/simd.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu rdft/simd/neon/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu rdft/simd/neon/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(top_srcdir)/rdft/simd/codlist.mk $(top_srcdir)/rdft/simd/simd.mk: + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +librdft_neon_codelets.la: $(librdft_neon_codelets_la_OBJECTS) $(librdft_neon_codelets_la_DEPENDENCIES) $(EXTRA_librdft_neon_codelets_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(am_librdft_neon_codelets_la_rpath) $(librdft_neon_codelets_la_OBJECTS) $(librdft_neon_codelets_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/codlist.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/genus.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_8.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: all check install install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + + +$(EXTRA_DIST): Makefile + ( \ + echo "/* Generated automatically. DO NOT EDIT! */"; \ + echo "#define SIMD_HEADER \"$(SIMD_HEADER)\""; \ + echo "#include \"../common/"$*".c\""; \ + ) >$@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/codlist.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/codlist.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/codlist.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/genus.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/genus.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/genus.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cbdftv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cbdftv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cbdftv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cbdftv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cbdftv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cbdftv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cbdftv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cbdftv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cbdftv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cbdftv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cfdftv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cfdftv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cfdftv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cfdftv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cfdftv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cfdftv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cfdftv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cfdftv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/neon/hc2cfdftv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-neon.h" +#include "../common/hc2cfdftv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/simd.mk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/simd.mk Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,12 @@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/rdft \ +-I$(top_srcdir)/rdft/simd -I$(top_srcdir)/simd-support + +EXTRA_DIST = $(SIMD_CODELETS) genus.c codlist.c + +$(EXTRA_DIST): Makefile + ( \ + echo "/* Generated automatically. DO NOT EDIT! */"; \ + echo "#define SIMD_HEADER \"$(SIMD_HEADER)\""; \ + echo "#include \"../common/"$*".c\""; \ + ) >$@ + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,15 @@ +AM_CFLAGS = $(SSE2_CFLAGS) +SIMD_HEADER=simd-sse2.h + +include $(top_srcdir)/rdft/simd/codlist.mk +include $(top_srcdir)/rdft/simd/simd.mk + +if HAVE_SSE2 + +BUILT_SOURCES = $(EXTRA_DIST) +noinst_LTLIBRARIES = librdft_sse2_codelets.la +librdft_sse2_codelets_la_SOURCES = $(BUILT_SOURCES) + +endif + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,687 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# This file contains a standard list of RDFT SIMD codelets. It is +# included by common/Makefile to generate the C files with the actual +# codelets in them. It is included by {sse,sse2,...}/Makefile to +# generate and compile stub files that include common/*.c + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +DIST_COMMON = $(top_srcdir)/rdft/simd/codlist.mk \ + $(top_srcdir)/rdft/simd/simd.mk $(srcdir)/Makefile.in \ + $(srcdir)/Makefile.am $(top_srcdir)/depcomp +subdir = rdft/simd/sse2 +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +librdft_sse2_codelets_la_LIBADD = +am__librdft_sse2_codelets_la_SOURCES_DIST = hc2cfdftv_2.c \ + hc2cfdftv_4.c hc2cfdftv_6.c hc2cfdftv_8.c hc2cfdftv_10.c \ + hc2cfdftv_12.c hc2cfdftv_16.c hc2cfdftv_32.c hc2cfdftv_20.c \ + hc2cbdftv_2.c hc2cbdftv_4.c hc2cbdftv_6.c hc2cbdftv_8.c \ + hc2cbdftv_10.c hc2cbdftv_12.c hc2cbdftv_16.c hc2cbdftv_32.c \ + hc2cbdftv_20.c genus.c codlist.c +am__objects_1 = hc2cfdftv_2.lo hc2cfdftv_4.lo hc2cfdftv_6.lo \ + hc2cfdftv_8.lo hc2cfdftv_10.lo hc2cfdftv_12.lo hc2cfdftv_16.lo \ + hc2cfdftv_32.lo hc2cfdftv_20.lo +am__objects_2 = hc2cbdftv_2.lo hc2cbdftv_4.lo hc2cbdftv_6.lo \ + hc2cbdftv_8.lo hc2cbdftv_10.lo hc2cbdftv_12.lo hc2cbdftv_16.lo \ + hc2cbdftv_32.lo hc2cbdftv_20.lo +am__objects_3 = $(am__objects_1) $(am__objects_2) +am__objects_4 = $(am__objects_3) genus.lo codlist.lo +@HAVE_SSE2_TRUE@am__objects_5 = $(am__objects_4) +@HAVE_SSE2_TRUE@am_librdft_sse2_codelets_la_OBJECTS = \ +@HAVE_SSE2_TRUE@ $(am__objects_5) +librdft_sse2_codelets_la_OBJECTS = \ + $(am_librdft_sse2_codelets_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +@HAVE_SSE2_TRUE@am_librdft_sse2_codelets_la_rpath = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(librdft_sse2_codelets_la_SOURCES) +DIST_SOURCES = $(am__librdft_sse2_codelets_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CFLAGS = $(SSE2_CFLAGS) +SIMD_HEADER = simd-sse2.h +HC2CFDFTV = hc2cfdftv_2.c hc2cfdftv_4.c hc2cfdftv_6.c hc2cfdftv_8.c \ +hc2cfdftv_10.c hc2cfdftv_12.c hc2cfdftv_16.c hc2cfdftv_32.c \ +hc2cfdftv_20.c + +HC2CBDFTV = hc2cbdftv_2.c hc2cbdftv_4.c hc2cbdftv_6.c hc2cbdftv_8.c \ +hc2cbdftv_10.c hc2cbdftv_12.c hc2cbdftv_16.c hc2cbdftv_32.c \ +hc2cbdftv_20.c + + +########################################################################### +SIMD_CODELETS = $(HC2CFDFTV) $(HC2CBDFTV) +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/rdft \ +-I$(top_srcdir)/rdft/simd -I$(top_srcdir)/simd-support + +EXTRA_DIST = $(SIMD_CODELETS) genus.c codlist.c +@HAVE_SSE2_TRUE@BUILT_SOURCES = $(EXTRA_DIST) +@HAVE_SSE2_TRUE@noinst_LTLIBRARIES = librdft_sse2_codelets.la +@HAVE_SSE2_TRUE@librdft_sse2_codelets_la_SOURCES = $(BUILT_SOURCES) +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/rdft/simd/codlist.mk $(top_srcdir)/rdft/simd/simd.mk $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu rdft/simd/sse2/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu rdft/simd/sse2/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(top_srcdir)/rdft/simd/codlist.mk $(top_srcdir)/rdft/simd/simd.mk: + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +librdft_sse2_codelets.la: $(librdft_sse2_codelets_la_OBJECTS) $(librdft_sse2_codelets_la_DEPENDENCIES) $(EXTRA_librdft_sse2_codelets_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(am_librdft_sse2_codelets_la_rpath) $(librdft_sse2_codelets_la_OBJECTS) $(librdft_sse2_codelets_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/codlist.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/genus.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cbdftv_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_20.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_32.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hc2cfdftv_8.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: all check install install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + + +$(EXTRA_DIST): Makefile + ( \ + echo "/* Generated automatically. DO NOT EDIT! */"; \ + echo "#define SIMD_HEADER \"$(SIMD_HEADER)\""; \ + echo "#include \"../common/"$*".c\""; \ + ) >$@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/codlist.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/codlist.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/codlist.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/genus.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/genus.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/genus.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cbdftv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cbdftv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cbdftv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cbdftv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cbdftv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cbdftv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cbdftv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cbdftv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cbdftv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cbdftv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_10.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_10.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cfdftv_10.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_12.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_12.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cfdftv_12.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_16.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_16.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cfdftv_16.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cfdftv_2.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_20.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_20.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cfdftv_20.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_32.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cfdftv_32.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_4.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cfdftv_4.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_6.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_6.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cfdftv_6.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/simd/sse2/hc2cfdftv_8.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,3 @@ +/* Generated automatically. DO NOT EDIT! */ +#define SIMD_HEADER "simd-sse2.h" +#include "../common/hc2cfdftv_8.c" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/solve.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/solve.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "rdft.h" + +/* use the apply() operation for RDFT problems */ +void X(rdft_solve)(const plan *ego_, const problem *p_) +{ + const plan_rdft *ego = (const plan_rdft *) ego_; + const problem_rdft *p = (const problem_rdft *) p_; + ego->apply(ego_, UNTAINT(p->I), UNTAINT(p->O)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/solve2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/solve2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "rdft.h" + +/* use the apply() operation for RDFT2 problems */ +void X(rdft2_solve)(const plan *ego_, const problem *p_) +{ + const plan_rdft2 *ego = (const plan_rdft2 *) ego_; + const problem_rdft2 *p = (const problem_rdft2 *) p_; + ego->apply(ego_, + UNTAINT(p->r0), UNTAINT(p->r1), + UNTAINT(p->cr), UNTAINT(p->ci)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/vrank-geq1-rdft2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/vrank-geq1-rdft2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + + +/* Plans for handling vector transform loops. These are *just* the + loops, and rely on child plans for the actual RDFT2s. + + They form a wrapper around solvers that don't have apply functions + for non-null vectors. + + vrank-geq1-rdft2 plans also recursively handle the case of + multi-dimensional vectors, obviating the need for most solvers to + deal with this. We can also play games here, such as reordering + the vector loops. + + Each vrank-geq1-rdft2 plan reduces the vector rank by 1, picking out a + dimension determined by the vecloop_dim field of the solver. */ + +#include "rdft.h" + +typedef struct { + solver super; + int vecloop_dim; + const int *buddies; + int nbuddies; +} S; + +typedef struct { + plan_rdft2 super; + + plan *cld; + INT vl; + INT rvs, cvs; + const S *solver; +} P; + +static void apply(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + INT i, vl = ego->vl; + INT rvs = ego->rvs, cvs = ego->cvs; + rdft2apply cldapply = ((plan_rdft2 *) ego->cld)->apply; + + for (i = 0; i < vl; ++i) { + cldapply(ego->cld, r0 + i * rvs, r1 + i * rvs, + cr + i * cvs, ci + i * cvs); + } +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *s = ego->solver; + p->print(p, "(rdft2-vrank>=1-x%D/%d%(%p%))", + ego->vl, s->vecloop_dim, ego->cld); +} + +static int pickdim(const S *ego, const tensor *vecsz, int oop, int *dp) +{ + return X(pickdim)(ego->vecloop_dim, ego->buddies, ego->nbuddies, + vecsz, oop, dp); +} + +static int applicable0(const solver *ego_, const problem *p_, int *dp) +{ + const S *ego = (const S *) ego_; + const problem_rdft2 *p = (const problem_rdft2 *) p_; + if (FINITE_RNK(p->vecsz->rnk) + && p->vecsz->rnk > 0 + && pickdim(ego, p->vecsz, p->r0 != p->cr, dp)) { + if (p->r0 != p->cr) + return 1; /* can always operate out-of-place */ + + return(X(rdft2_inplace_strides)(p, *dp)); + } + + return 0; +} + + +static int applicable(const solver *ego_, const problem *p_, + const planner *plnr, int *dp) +{ + const S *ego = (const S *)ego_; + if (!applicable0(ego_, p_, dp)) return 0; + + /* fftw2 behavior */ + if (NO_VRANK_SPLITSP(plnr) && (ego->vecloop_dim != ego->buddies[0])) + return 0; + + if (NO_UGLYP(plnr)) { + const problem_rdft2 *p = (const problem_rdft2 *) p_; + iodim *d = p->vecsz->dims + *dp; + + /* Heuristic: if the transform is multi-dimensional, and the + vector stride is less than the transform size, then we + probably want to use a rank>=2 plan first in order to combine + this vector with the transform-dimension vectors. */ + if (p->sz->rnk > 1 + && X(imin)(X(iabs)(d->is), X(iabs)(d->os)) + < X(rdft2_tensor_max_index)(p->sz, p->kind) + ) + return 0; + + /* Heuristic: don't use a vrank-geq1 for rank-0 vrank-1 + transforms, since this case is better handled by rank-0 + solvers. */ + if (p->sz->rnk == 0 && p->vecsz->rnk == 1) return 0; + + if (NO_NONTHREADEDP(plnr)) + return 0; /* prefer threaded version */ + } + + return 1; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_rdft2 *p; + P *pln; + plan *cld; + int vdim; + iodim *d; + INT rvs, cvs; + + static const plan_adt padt = { + X(rdft2_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr, &vdim)) + return (plan *) 0; + p = (const problem_rdft2 *) p_; + + d = p->vecsz->dims + vdim; + + A(d->n > 1); /* or else, p->ri + d->is etc. are invalid */ + + X(rdft2_strides)(p->kind, d, &rvs, &cvs); + + cld = X(mkplan_d)(plnr, + X(mkproblem_rdft2_d)( + X(tensor_copy)(p->sz), + X(tensor_copy_except)(p->vecsz, vdim), + TAINT(p->r0, rvs), TAINT(p->r1, rvs), + TAINT(p->cr, cvs), TAINT(p->ci, cvs), + p->kind)); + if (!cld) return (plan *) 0; + + pln = MKPLAN_RDFT2(P, &padt, apply); + + pln->cld = cld; + pln->vl = d->n; + pln->rvs = rvs; + pln->cvs = cvs; + + pln->solver = ego; + X(ops_zero)(&pln->super.super.ops); + pln->super.super.ops.other = 3.14159; /* magic to prefer codelet loops */ + X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); + + if (p->sz->rnk != 1 || (p->sz->dims[0].n > 128)) + pln->super.super.pcost = pln->vl * cld->pcost; + + return &(pln->super.super); +} + +static solver *mksolver(int vecloop_dim, const int *buddies, int nbuddies) +{ + static const solver_adt sadt = { PROBLEM_RDFT2, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->vecloop_dim = vecloop_dim; + slv->buddies = buddies; + slv->nbuddies = nbuddies; + return &(slv->super); +} + +void X(rdft2_vrank_geq1_register)(planner *p) +{ + int i; + + /* FIXME: Should we try other vecloop_dim values? */ + static const int buddies[] = { 1, -1 }; + + const int nbuddies = (int)(sizeof(buddies) / sizeof(buddies[0])); + + for (i = 0; i < nbuddies; ++i) + REGISTER_SOLVER(p, mksolver(buddies[i], buddies, nbuddies)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/vrank-geq1.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/vrank-geq1.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + + +/* Plans for handling vector transform loops. These are *just* the + loops, and rely on child plans for the actual RDFTs. + + They form a wrapper around solvers that don't have apply functions + for non-null vectors. + + vrank-geq1 plans also recursively handle the case of multi-dimensional + vectors, obviating the need for most solvers to deal with this. We + can also play games here, such as reordering the vector loops. + + Each vrank-geq1 plan reduces the vector rank by 1, picking out a + dimension determined by the vecloop_dim field of the solver. */ + +#include "rdft.h" + +typedef struct { + solver super; + int vecloop_dim; + const int *buddies; + int nbuddies; +} S; + +typedef struct { + plan_rdft super; + + plan *cld; + INT vl; + INT ivs, ovs; + const S *solver; +} P; + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT i, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + rdftapply cldapply = ((plan_rdft *) ego->cld)->apply; + + for (i = 0; i < vl; ++i) { + cldapply(ego->cld, I + i * ivs, O + i * ovs); + } +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *s = ego->solver; + p->print(p, "(rdft-vrank>=1-x%D/%d%(%p%))", + ego->vl, s->vecloop_dim, ego->cld); +} + +static int pickdim(const S *ego, const tensor *vecsz, int oop, int *dp) +{ + return X(pickdim)(ego->vecloop_dim, ego->buddies, ego->nbuddies, + vecsz, oop, dp); +} + +static int applicable0(const solver *ego_, const problem *p_, int *dp) +{ + const S *ego = (const S *) ego_; + const problem_rdft *p = (const problem_rdft *) p_; + + return (1 + && FINITE_RNK(p->vecsz->rnk) + && p->vecsz->rnk > 0 + + && p->sz->rnk >= 0 + + && pickdim(ego, p->vecsz, p->I != p->O, dp) + ); +} + +static int applicable(const solver *ego_, const problem *p_, + const planner *plnr, int *dp) +{ + const S *ego = (const S *)ego_; + const problem_rdft *p; + + if (!applicable0(ego_, p_, dp)) return 0; + + /* fftw2 behavior */ + if (NO_VRANK_SPLITSP(plnr) && (ego->vecloop_dim != ego->buddies[0])) + return 0; + + p = (const problem_rdft *) p_; + + if (NO_UGLYP(plnr)) { + /* the rank-0 solver deals with the general case most of the + time (an exception is loops of non-square transposes) */ + if (NO_SLOWP(plnr) && p->sz->rnk == 0) + return 0; + + /* Heuristic: if the transform is multi-dimensional, and the + vector stride is less than the transform size, then we + probably want to use a rank>=2 plan first in order to combine + this vector with the transform-dimension vectors. */ + { + iodim *d = p->vecsz->dims + *dp; + if (1 + && p->sz->rnk > 1 + && X(imin)(X(iabs)(d->is), X(iabs)(d->os)) + < X(tensor_max_index)(p->sz) + ) + return 0; + } + + /* prefer threaded version */ + if (NO_NONTHREADEDP(plnr)) return 0; + + /* exploit built-in vecloops of (ugly) r{e,o}dft solvers */ + if (p->vecsz->rnk == 1 && p->sz->rnk == 1 + && REODFT_KINDP(p->kind[0])) + return 0; + } + + return 1; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_rdft *p; + P *pln; + plan *cld; + int vdim; + iodim *d; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr, &vdim)) + return (plan *) 0; + p = (const problem_rdft *) p_; + + d = p->vecsz->dims + vdim; + + A(d->n > 1); + + cld = X(mkplan_d)(plnr, + X(mkproblem_rdft_d)( + X(tensor_copy)(p->sz), + X(tensor_copy_except)(p->vecsz, vdim), + TAINT(p->I, d->is), TAINT(p->O, d->os), + p->kind)); + if (!cld) return (plan *) 0; + + pln = MKPLAN_RDFT(P, &padt, apply); + + pln->cld = cld; + pln->vl = d->n; + pln->ivs = d->is; + pln->ovs = d->os; + + pln->solver = ego; + X(ops_zero)(&pln->super.super.ops); + pln->super.super.ops.other = 3.14159; /* magic to prefer codelet loops */ + X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); + + if (p->sz->rnk != 1 || (p->sz->dims[0].n > 128)) + pln->super.super.pcost = pln->vl * cld->pcost; + + return &(pln->super.super); +} + +static solver *mksolver(int vecloop_dim, const int *buddies, int nbuddies) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->vecloop_dim = vecloop_dim; + slv->buddies = buddies; + slv->nbuddies = nbuddies; + return &(slv->super); +} + +void X(rdft_vrank_geq1_register)(planner *p) +{ + int i; + + /* FIXME: Should we try other vecloop_dim values? */ + static const int buddies[] = { 1, -1 }; + + const int nbuddies = (int)(sizeof(buddies) / sizeof(buddies[0])); + + for (i = 0; i < nbuddies; ++i) + REGISTER_SOLVER(p, mksolver(buddies[i], buddies, nbuddies)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/rdft/vrank3-transpose.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/rdft/vrank3-transpose.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,777 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* rank-0, vector-rank-3, non-square in-place transposition + (see rank0.c for square transposition) */ + +#include "rdft.h" + +#ifdef HAVE_STRING_H +#include /* for memcpy() */ +#endif + +struct P_s; + +typedef struct { + rdftapply apply; + int (*applicable)(const problem_rdft *p, planner *plnr, + int dim0, int dim1, int dim2, INT *nbuf); + int (*mkcldrn)(const problem_rdft *p, planner *plnr, struct P_s *ego); + const char *nam; +} transpose_adt; + +typedef struct { + solver super; + const transpose_adt *adt; +} S; + +typedef struct P_s { + plan_rdft super; + INT n, m, vl; /* transpose n x m matrix of vl-tuples */ + INT nbuf; /* buffer size */ + INT nd, md, d; /* transpose-gcd params */ + INT nc, mc; /* transpose-cut params */ + plan *cld1, *cld2, *cld3; /* children, null if unused */ + const S *slv; +} P; + + +/*************************************************************************/ +/* some utilities for the solvers */ + +static INT gcd(INT a, INT b) +{ + INT r; + do { + r = a % b; + a = b; + b = r; + } while (r != 0); + + return a; +} + +/* whether we can transpose with one of our routines expecting + contiguous Ntuples */ +static int Ntuple_transposable(const iodim *a, const iodim *b, INT vl, INT vs) +{ + return (vs == 1 && b->is == vl && a->os == vl && + ((a->n == b->n && a->is == b->os + && a->is >= b->n && a->is % vl == 0) + || (a->is == b->n * vl && b->os == a->n * vl))); +} + +/* check whether a and b correspond to the first and second dimensions + of a transpose of tuples with vector length = vl, stride = vs. */ +static int transposable(const iodim *a, const iodim *b, INT vl, INT vs) +{ + return ((a->n == b->n && a->os == b->is && a->is == b->os) + || Ntuple_transposable(a, b, vl, vs)); +} + +static int pickdim(const tensor *s, int *pdim0, int *pdim1, int *pdim2) +{ + int dim0, dim1; + + for (dim0 = 0; dim0 < s->rnk; ++dim0) + for (dim1 = 0; dim1 < s->rnk; ++dim1) { + int dim2 = 3 - dim0 - dim1; + if (dim0 == dim1) continue; + if ((s->rnk == 2 || s->dims[dim2].is == s->dims[dim2].os) + && transposable(s->dims + dim0, s->dims + dim1, + s->rnk == 2 ? (INT)1 : s->dims[dim2].n, + s->rnk == 2 ? (INT)1 : s->dims[dim2].is)) { + *pdim0 = dim0; + *pdim1 = dim1; + *pdim2 = dim2; + return 1; + } + } + return 0; +} + +#define MINBUFDIV 9 /* min factor by which buffer is smaller than data */ +#define MAXBUF 65536 /* maximum non-ugly buffer */ + +/* generic applicability function */ +static int applicable(const solver *ego_, const problem *p_, planner *plnr, + int *dim0, int *dim1, int *dim2, INT *nbuf) +{ + const S *ego = (const S *) ego_; + const problem_rdft *p = (const problem_rdft *) p_; + + return (1 + && p->I == p->O + && p->sz->rnk == 0 + && (p->vecsz->rnk == 2 || p->vecsz->rnk == 3) + + && pickdim(p->vecsz, dim0, dim1, dim2) + + /* UGLY if vecloop in wrong order for locality */ + && (!NO_UGLYP(plnr) || + p->vecsz->rnk == 2 || + X(iabs)(p->vecsz->dims[*dim2].is) + < X(imax)(X(iabs)(p->vecsz->dims[*dim0].is), + X(iabs)(p->vecsz->dims[*dim0].os))) + + /* SLOW if non-square */ + && (!NO_SLOWP(plnr) + || p->vecsz->dims[*dim0].n == p->vecsz->dims[*dim1].n) + + && ego->adt->applicable(p, plnr, *dim0,*dim1,*dim2,nbuf) + + /* buffers too big are UGLY */ + && ((!NO_UGLYP(plnr) && !CONSERVE_MEMORYP(plnr)) + || *nbuf <= MAXBUF + || *nbuf * MINBUFDIV <= X(tensor_sz)(p->vecsz)) + ); +} + +static void get_transpose_vec(const problem_rdft *p, int dim2, INT *vl,INT *vs) +{ + if (p->vecsz->rnk == 2) { + *vl = 1; *vs = 1; + } + else { + *vl = p->vecsz->dims[dim2].n; + *vs = p->vecsz->dims[dim2].is; /* == os */ + } +} + +/*************************************************************************/ +/* Cache-oblivious in-place transpose of non-square matrices, based + on transposes of blocks given by the gcd of the dimensions. + + This algorithm is related to algorithm V5 from Murray Dow, + "Transposing a matrix on a vector computer," Parallel Computing 21 + (12), 1997-2005 (1995), with the modification that we use + cache-oblivious recursive transpose subroutines (and we derived + it independently). + + For a p x q matrix, this requires scratch space equal to the size + of the matrix divided by gcd(p,q). Alternatively, see also the + "cut" algorithm below, if |p-q| * gcd(p,q) < max(p,q). */ + +static void apply_gcd(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT n = ego->nd, m = ego->md, d = ego->d; + INT vl = ego->vl; + R *buf = (R *)MALLOC(sizeof(R) * ego->nbuf, BUFFERS); + INT i, num_el = n*m*d*vl; + + A(ego->n == n * d && ego->m == m * d); + UNUSED(O); + + /* Transpose the matrix I in-place, where I is an (n*d) x (m*d) matrix + of vl-tuples and buf contains n*m*d*vl elements. + + In general, to transpose a p x q matrix, you should call this + routine with d = gcd(p, q), n = p/d, and m = q/d. */ + + A(n > 0 && m > 0 && vl > 0); + A(d > 1); + + /* treat as (d x n) x (d' x m) matrix. (d' = d) */ + + /* First, transpose d x (n x d') x m to d x (d' x n) x m, + using the buf matrix. This consists of d transposes + of contiguous n x d' matrices of m-tuples. */ + if (n > 1) { + rdftapply cldapply = ((plan_rdft *) ego->cld1)->apply; + for (i = 0; i < d; ++i) { + cldapply(ego->cld1, I + i*num_el, buf); + memcpy(I + i*num_el, buf, num_el*sizeof(R)); + } + } + + /* Now, transpose (d x d') x (n x m) to (d' x d) x (n x m), which + is a square in-place transpose of n*m-tuples: */ + { + rdftapply cldapply = ((plan_rdft *) ego->cld2)->apply; + cldapply(ego->cld2, I, I); + } + + /* Finally, transpose d' x ((d x n) x m) to d' x (m x (d x n)), + using the buf matrix. This consists of d' transposes + of contiguous d*n x m matrices. */ + if (m > 1) { + rdftapply cldapply = ((plan_rdft *) ego->cld3)->apply; + for (i = 0; i < d; ++i) { + cldapply(ego->cld3, I + i*num_el, buf); + memcpy(I + i*num_el, buf, num_el*sizeof(R)); + } + } + + X(ifree)(buf); +} + +static int applicable_gcd(const problem_rdft *p, planner *plnr, + int dim0, int dim1, int dim2, INT *nbuf) +{ + INT n = p->vecsz->dims[dim0].n; + INT m = p->vecsz->dims[dim1].n; + INT d, vl, vs; + get_transpose_vec(p, dim2, &vl, &vs); + d = gcd(n, m); + *nbuf = n * (m / d) * vl; + return (!NO_SLOWP(plnr) /* FIXME: not really SLOW for large 1d ffts */ + && n != m + && d > 1 + && Ntuple_transposable(p->vecsz->dims + dim0, + p->vecsz->dims + dim1, + vl, vs)); +} + +static int mkcldrn_gcd(const problem_rdft *p, planner *plnr, P *ego) +{ + INT n = ego->nd, m = ego->md, d = ego->d; + INT vl = ego->vl; + R *buf = (R *)MALLOC(sizeof(R) * ego->nbuf, BUFFERS); + INT num_el = n*m*d*vl; + + if (n > 1) { + ego->cld1 = X(mkplan_d)(plnr, + X(mkproblem_rdft_0_d)( + X(mktensor_3d)(n, d*m*vl, m*vl, + d, m*vl, n*m*vl, + m*vl, 1, 1), + TAINT(p->I, num_el), buf)); + if (!ego->cld1) + goto nada; + X(ops_madd)(d, &ego->cld1->ops, &ego->super.super.ops, + &ego->super.super.ops); + ego->super.super.ops.other += num_el * d * 2; + } + + ego->cld2 = X(mkplan_d)(plnr, + X(mkproblem_rdft_0_d)( + X(mktensor_3d)(d, d*n*m*vl, n*m*vl, + d, n*m*vl, d*n*m*vl, + n*m*vl, 1, 1), + p->I, p->I)); + if (!ego->cld2) + goto nada; + X(ops_add2)(&ego->cld2->ops, &ego->super.super.ops); + + if (m > 1) { + ego->cld3 = X(mkplan_d)(plnr, + X(mkproblem_rdft_0_d)( + X(mktensor_3d)(d*n, m*vl, vl, + m, vl, d*n*vl, + vl, 1, 1), + TAINT(p->I, num_el), buf)); + if (!ego->cld3) + goto nada; + X(ops_madd2)(d, &ego->cld3->ops, &ego->super.super.ops); + ego->super.super.ops.other += num_el * d * 2; + } + + X(ifree)(buf); + return 1; + + nada: + X(ifree)(buf); + return 0; +} + +static const transpose_adt adt_gcd = +{ + apply_gcd, applicable_gcd, mkcldrn_gcd, + "rdft-transpose-gcd" +}; + +/*************************************************************************/ +/* Cache-oblivious in-place transpose of non-square n x m matrices, + based on transposing a sub-matrix first and then transposing the + remainder(s) with the help of a buffer. See also transpose-gcd, + above, if gcd(n,m) is large. + + This algorithm is related to algorithm V3 from Murray Dow, + "Transposing a matrix on a vector computer," Parallel Computing 21 + (12), 1997-2005 (1995), with the modifications that we use + cache-oblivious recursive transpose subroutines and we have the + generalization for large |n-m| below. + + The best case, and the one described by Dow, is for |n-m| small, in + which case we transpose a square sub-matrix of size min(n,m), + handling the remainder via a buffer. This requires scratch space + equal to the size of the matrix times |n-m| / max(n,m). + + As a generalization when |n-m| is not small, we also support cutting + *both* dimensions to an nc x mc matrix which is *not* necessarily + square, but has a large gcd (and can therefore use transpose-gcd). +*/ + +static void apply_cut(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT n = ego->n, m = ego->m, nc = ego->nc, mc = ego->mc, vl = ego->vl; + INT i; + R *buf1 = (R *)MALLOC(sizeof(R) * ego->nbuf, BUFFERS); + UNUSED(O); + + if (m > mc) { + ((plan_rdft *) ego->cld1)->apply(ego->cld1, I + mc*vl, buf1); + for (i = 0; i < nc; ++i) + memmove(I + (mc*vl) * i, I + (m*vl) * i, sizeof(R) * (mc*vl)); + } + + ((plan_rdft *) ego->cld2)->apply(ego->cld2, I, I); /* nc x mc transpose */ + + if (n > nc) { + R *buf2 = buf1 + (m-mc)*(nc*vl); /* FIXME: force better alignment? */ + memcpy(buf2, I + nc*(m*vl), (n-nc)*(m*vl)*sizeof(R)); + for (i = mc-1; i >= 0; --i) + memmove(I + (n*vl) * i, I + (nc*vl) * i, sizeof(R) * (n*vl)); + ((plan_rdft *) ego->cld3)->apply(ego->cld3, buf2, I + nc*vl); + } + + if (m > mc) { + if (n > nc) + for (i = mc; i < m; ++i) + memcpy(I + i*(n*vl), buf1 + (i-mc)*(nc*vl), + (nc*vl)*sizeof(R)); + else + memcpy(I + mc*(n*vl), buf1, (m-mc)*(n*vl)*sizeof(R)); + } + + X(ifree)(buf1); +} + +/* only cut one dimension if the resulting buffer is small enough */ +static int cut1(INT n, INT m, INT vl) +{ + return (X(imax)(n,m) >= X(iabs)(n-m) * MINBUFDIV + || X(imin)(n,m) * X(iabs)(n-m) * vl <= MAXBUF); +} + +#define CUT_NSRCH 32 /* range of sizes to search for possible cuts */ + +static int applicable_cut(const problem_rdft *p, planner *plnr, + int dim0, int dim1, int dim2, INT *nbuf) +{ + INT n = p->vecsz->dims[dim0].n; + INT m = p->vecsz->dims[dim1].n; + INT vl, vs; + get_transpose_vec(p, dim2, &vl, &vs); + *nbuf = 0; /* always small enough to be non-UGLY (?) */ + A(MINBUFDIV <= CUT_NSRCH); /* assumed to avoid inf. loops below */ + return (!NO_SLOWP(plnr) /* FIXME: not really SLOW for large 1d ffts? */ + && n != m + + /* Don't call transpose-cut recursively (avoid inf. loops): + the non-square sub-transpose produced when !cut1 + should always have gcd(n,m) >= min(CUT_NSRCH,n,m), + for which transpose-gcd is applicable */ + && (cut1(n, m, vl) + || gcd(n, m) < X(imin)(MINBUFDIV, X(imin)(n,m))) + + && Ntuple_transposable(p->vecsz->dims + dim0, + p->vecsz->dims + dim1, + vl, vs)); +} + +static int mkcldrn_cut(const problem_rdft *p, planner *plnr, P *ego) +{ + INT n = ego->n, m = ego->m, nc, mc; + INT vl = ego->vl; + R *buf; + + /* pick the "best" cut */ + if (cut1(n, m, vl)) { + nc = mc = X(imin)(n,m); + } + else { + INT dc, ns, ms; + dc = gcd(m, n); nc = n; mc = m; + /* search for cut with largest gcd + (TODO: different optimality criteria? different search range?) */ + for (ms = m; ms > 0 && ms > m - CUT_NSRCH; --ms) { + for (ns = n; ns > 0 && ns > n - CUT_NSRCH; --ns) { + INT ds = gcd(ms, ns); + if (ds > dc) { + dc = ds; nc = ns; mc = ms; + if (dc == X(imin)(ns, ms)) + break; /* cannot get larger than this */ + } + } + if (dc == X(imin)(n, ms)) + break; /* cannot get larger than this */ + } + A(dc >= X(imin)(CUT_NSRCH, X(imin)(n, m))); + } + ego->nc = nc; + ego->mc = mc; + ego->nbuf = (m-mc)*(nc*vl) + (n-nc)*(m*vl); + + buf = (R *)MALLOC(sizeof(R) * ego->nbuf, BUFFERS); + + if (m > mc) { + ego->cld1 = X(mkplan_d)(plnr, + X(mkproblem_rdft_0_d)( + X(mktensor_3d)(nc, m*vl, vl, + m-mc, vl, nc*vl, + vl, 1, 1), + p->I + mc*vl, buf)); + if (!ego->cld1) + goto nada; + X(ops_add2)(&ego->cld1->ops, &ego->super.super.ops); + } + + ego->cld2 = X(mkplan_d)(plnr, + X(mkproblem_rdft_0_d)( + X(mktensor_3d)(nc, mc*vl, vl, + mc, vl, nc*vl, + vl, 1, 1), + p->I, p->I)); + if (!ego->cld2) + goto nada; + X(ops_add2)(&ego->cld2->ops, &ego->super.super.ops); + + if (n > nc) { + ego->cld3 = X(mkplan_d)(plnr, + X(mkproblem_rdft_0_d)( + X(mktensor_3d)(n-nc, m*vl, vl, + m, vl, n*vl, + vl, 1, 1), + buf + (m-mc)*(nc*vl), p->I + nc*vl)); + if (!ego->cld3) + goto nada; + X(ops_add2)(&ego->cld3->ops, &ego->super.super.ops); + } + + /* memcpy/memmove operations */ + ego->super.super.ops.other += 2 * vl * (nc*mc * ((m > mc) + (n > nc)) + + (n-nc)*m + (m-mc)*nc); + + X(ifree)(buf); + return 1; + + nada: + X(ifree)(buf); + return 0; +} + +static const transpose_adt adt_cut = +{ + apply_cut, applicable_cut, mkcldrn_cut, + "rdft-transpose-cut" +}; + +/*************************************************************************/ +/* In-place transpose routine from TOMS, which follows the cycles of + the permutation so that it writes to each location only once. + Because of cache-line and other issues, however, this routine is + typically much slower than transpose-gcd or transpose-cut, even + though the latter do some extra writes. On the other hand, if the + vector length is large then the TOMS routine is best. + + The TOMS routine also has the advantage of requiring less buffer + space for the case of gcd(nx,ny) small. However, in this case it + has been superseded by the combination of the generalized + transpose-cut method with the transpose-gcd method, which can + always transpose with buffers a small fraction of the array size + regardless of gcd(nx,ny). */ + +/* + * TOMS Transpose. Algorithm 513 (Revised version of algorithm 380). + * + * These routines do in-place transposes of arrays. + * + * [ Cate, E.G. and Twigg, D.W., ACM Transactions on Mathematical Software, + * vol. 3, no. 1, 104-110 (1977) ] + * + * C version by Steven G. Johnson (February 1997). + */ + +/* + * "a" is a 1D array of length ny*nx*N which constains the nx x ny + * matrix of N-tuples to be transposed. "a" is stored in row-major + * order (last index varies fastest). move is a 1D array of length + * move_size used to store information to speed up the process. The + * value move_size=(ny+nx)/2 is recommended. buf should be an array + * of length 2*N. + * + */ + +static void transpose_toms513(R *a, INT nx, INT ny, INT N, + char *move, INT move_size, R *buf) +{ + INT i, im, mn; + R *b, *c, *d; + INT ncount; + INT k; + + /* check arguments and initialize: */ + A(ny > 0 && nx > 0 && N > 0 && move_size > 0); + + b = buf; + + /* Cate & Twigg have a special case for nx == ny, but we don't + bother, since we already have special code for this case elsewhere. */ + + c = buf + N; + ncount = 2; /* always at least 2 fixed points */ + k = (mn = ny * nx) - 1; + + for (i = 0; i < move_size; ++i) + move[i] = 0; + + if (ny >= 3 && nx >= 3) + ncount += gcd(ny - 1, nx - 1) - 1; /* # fixed points */ + + i = 1; + im = ny; + + while (1) { + INT i1, i2, i1c, i2c; + INT kmi; + + /** Rearrange the elements of a loop + and its companion loop: **/ + + i1 = i; + kmi = k - i; + i1c = kmi; + switch (N) { + case 1: + b[0] = a[i1]; + c[0] = a[i1c]; + break; + case 2: + b[0] = a[2*i1]; + b[1] = a[2*i1+1]; + c[0] = a[2*i1c]; + c[1] = a[2*i1c+1]; + break; + default: + memcpy(b, &a[N * i1], N * sizeof(R)); + memcpy(c, &a[N * i1c], N * sizeof(R)); + } + while (1) { + i2 = ny * i1 - k * (i1 / nx); + i2c = k - i2; + if (i1 < move_size) + move[i1] = 1; + if (i1c < move_size) + move[i1c] = 1; + ncount += 2; + if (i2 == i) + break; + if (i2 == kmi) { + d = b; + b = c; + c = d; + break; + } + switch (N) { + case 1: + a[i1] = a[i2]; + a[i1c] = a[i2c]; + break; + case 2: + a[2*i1] = a[2*i2]; + a[2*i1+1] = a[2*i2+1]; + a[2*i1c] = a[2*i2c]; + a[2*i1c+1] = a[2*i2c+1]; + break; + default: + memcpy(&a[N * i1], &a[N * i2], + N * sizeof(R)); + memcpy(&a[N * i1c], &a[N * i2c], + N * sizeof(R)); + } + i1 = i2; + i1c = i2c; + } + switch (N) { + case 1: + a[i1] = b[0]; + a[i1c] = c[0]; + break; + case 2: + a[2*i1] = b[0]; + a[2*i1+1] = b[1]; + a[2*i1c] = c[0]; + a[2*i1c+1] = c[1]; + break; + default: + memcpy(&a[N * i1], b, N * sizeof(R)); + memcpy(&a[N * i1c], c, N * sizeof(R)); + } + if (ncount >= mn) + break; /* we've moved all elements */ + + /** Search for loops to rearrange: **/ + + while (1) { + INT max = k - i; + ++i; + A(i <= max); + im += ny; + if (im > k) + im -= k; + i2 = im; + if (i == i2) + continue; + if (i >= move_size) { + while (i2 > i && i2 < max) { + i1 = i2; + i2 = ny * i1 - k * (i1 / nx); + } + if (i2 == i) + break; + } else if (!move[i]) + break; + } + } +} + +static void apply_toms513(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT n = ego->n, m = ego->m; + INT vl = ego->vl; + R *buf = (R *)MALLOC(sizeof(R) * ego->nbuf, BUFFERS); + UNUSED(O); + transpose_toms513(I, n, m, vl, (char *) (buf + 2*vl), (n+m)/2, buf); + X(ifree)(buf); +} + +static int applicable_toms513(const problem_rdft *p, planner *plnr, + int dim0, int dim1, int dim2, INT *nbuf) +{ + INT n = p->vecsz->dims[dim0].n; + INT m = p->vecsz->dims[dim1].n; + INT vl, vs; + get_transpose_vec(p, dim2, &vl, &vs); + *nbuf = 2*vl + + ((n + m) / 2 * sizeof(char) + sizeof(R) - 1) / sizeof(R); + return (!NO_SLOWP(plnr) + && (vl > 8 || !NO_UGLYP(plnr)) /* UGLY for small vl */ + && n != m + && Ntuple_transposable(p->vecsz->dims + dim0, + p->vecsz->dims + dim1, + vl, vs)); +} + +static int mkcldrn_toms513(const problem_rdft *p, planner *plnr, P *ego) +{ + UNUSED(p); UNUSED(plnr); + /* heuristic so that TOMS algorithm is last resort for small vl */ + ego->super.super.ops.other += ego->n * ego->m * 2 * (ego->vl + 30); + return 1; +} + +static const transpose_adt adt_toms513 = +{ + apply_toms513, applicable_toms513, mkcldrn_toms513, + "rdft-transpose-toms513" +}; + +/*-----------------------------------------------------------------------*/ +/*-----------------------------------------------------------------------*/ +/* generic stuff: */ + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld1, wakefulness); + X(plan_awake)(ego->cld2, wakefulness); + X(plan_awake)(ego->cld3, wakefulness); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(%s-%Dx%D%v", ego->slv->adt->nam, + ego->n, ego->m, ego->vl); + if (ego->cld1) p->print(p, "%(%p%)", ego->cld1); + if (ego->cld2) p->print(p, "%(%p%)", ego->cld2); + if (ego->cld3) p->print(p, "%(%p%)", ego->cld3); + p->print(p, ")"); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld3); + X(plan_destroy_internal)(ego->cld2); + X(plan_destroy_internal)(ego->cld1); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_rdft *p; + int dim0, dim1, dim2; + INT nbuf, vs; + P *pln; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr, &dim0, &dim1, &dim2, &nbuf)) + return (plan *) 0; + + p = (const problem_rdft *) p_; + pln = MKPLAN_RDFT(P, &padt, ego->adt->apply); + + pln->n = p->vecsz->dims[dim0].n; + pln->m = p->vecsz->dims[dim1].n; + get_transpose_vec(p, dim2, &pln->vl, &vs); + pln->nbuf = nbuf; + pln->d = gcd(pln->n, pln->m); + pln->nd = pln->n / pln->d; + pln->md = pln->m / pln->d; + pln->slv = ego; + + X(ops_zero)(&pln->super.super.ops); /* mkcldrn is responsible for ops */ + + pln->cld1 = pln->cld2 = pln->cld3 = 0; + if (!ego->adt->mkcldrn(p, plnr, pln)) { + X(plan_destroy_internal)(&(pln->super.super)); + return 0; + } + + return &(pln->super.super); +} + +static solver *mksolver(const transpose_adt *adt) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->adt = adt; + return &(slv->super); +} + +void X(rdft_vrank3_transpose_register)(planner *p) +{ + unsigned i; + static const transpose_adt *const adts[] = { + &adt_gcd, &adt_cut, + &adt_toms513 + }; + for (i = 0; i < sizeof(adts) / sizeof(adts[0]); ++i) + REGISTER_SOLVER(p, mksolver(adts[i])); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/reodft/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/reodft/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,15 @@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/rdft +SUBDIRS = + +noinst_LTLIBRARIES = libreodft.la + +# pkgincludedir = $(includedir)/fftw3@PREC_SUFFIX@ +# pkginclude_HEADERS = reodft.h + +# no longer used due to numerical problems +EXTRA_DIST = reodft11e-r2hc.c redft00e-r2hc.c rodft00e-r2hc.c + +libreodft_la_SOURCES = conf.c reodft.h reodft010e-r2hc.c \ +reodft11e-radix2.c reodft11e-r2hc-odd.c redft00e-r2hc-pad.c \ +rodft00e-r2hc-pad.c reodft00e-splitradix.c +# redft00e-r2hc.c rodft00e-r2hc.c reodft11e-r2hc.c diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/reodft/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/reodft/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,743 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = reodft +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libreodft_la_LIBADD = +am_libreodft_la_OBJECTS = conf.lo reodft010e-r2hc.lo \ + reodft11e-radix2.lo reodft11e-r2hc-odd.lo redft00e-r2hc-pad.lo \ + rodft00e-r2hc-pad.lo reodft00e-splitradix.lo +libreodft_la_OBJECTS = $(am_libreodft_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libreodft_la_SOURCES) +DIST_SOURCES = $(libreodft_la_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/rdft +SUBDIRS = +noinst_LTLIBRARIES = libreodft.la + +# pkgincludedir = $(includedir)/fftw3@PREC_SUFFIX@ +# pkginclude_HEADERS = reodft.h + +# no longer used due to numerical problems +EXTRA_DIST = reodft11e-r2hc.c redft00e-r2hc.c rodft00e-r2hc.c +libreodft_la_SOURCES = conf.c reodft.h reodft010e-r2hc.c \ +reodft11e-radix2.c reodft11e-r2hc-odd.c redft00e-r2hc-pad.c \ +rodft00e-r2hc-pad.c reodft00e-splitradix.c + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu reodft/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu reodft/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libreodft.la: $(libreodft_la_OBJECTS) $(libreodft_la_DEPENDENCIES) $(EXTRA_libreodft_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(libreodft_la_OBJECTS) $(libreodft_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/conf.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/redft00e-r2hc-pad.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reodft00e-splitradix.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reodft010e-r2hc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reodft11e-r2hc-odd.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reodft11e-radix2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rodft00e-r2hc-pad.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(LTLIBRARIES) +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool \ + clean-noinstLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am + +# redft00e-r2hc.c rodft00e-r2hc.c reodft11e-r2hc.c + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/reodft/conf.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/reodft/conf.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "reodft.h" + +static const solvtab s = +{ +#if 0 /* 1 to enable "standard" algorithms with substandard accuracy; + you must also add them to Makefile.am to compile these files*/ + SOLVTAB(X(redft00e_r2hc_register)), + SOLVTAB(X(rodft00e_r2hc_register)), + SOLVTAB(X(reodft11e_r2hc_register)), +#endif + SOLVTAB(X(redft00e_r2hc_pad_register)), + SOLVTAB(X(rodft00e_r2hc_pad_register)), + SOLVTAB(X(reodft00e_splitradix_register)), + SOLVTAB(X(reodft010e_r2hc_register)), + SOLVTAB(X(reodft11e_radix2_r2hc_register)), + SOLVTAB(X(reodft11e_r2hc_odd_register)), + + SOLVTAB_END +}; + +void X(reodft_conf_standard)(planner *p) +{ + X(solvtab_exec)(s, p); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/reodft/redft00e-r2hc-pad.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/reodft/redft00e-r2hc-pad.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* Do a REDFT00 problem via an R2HC problem, padded symmetrically to + twice the size. This is asymptotically a factor of ~2 worse than + redft00e-r2hc.c (the algorithm used in e.g. FFTPACK and Numerical + Recipes), but we abandoned the latter after we discovered that it + has intrinsic accuracy problems. */ + +#include "reodft.h" + +typedef struct { + solver super; +} S; + +typedef struct { + plan_rdft super; + plan *cld, *cldcpy; + INT is; + INT n; + INT vl; + INT ivs, ovs; +} P; + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is; + INT i, n = ego->n; + INT iv, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + R *buf; + + buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS); + + for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { + buf[0] = I[0]; + for (i = 1; i < n; ++i) { + R a = I[i * is]; + buf[i] = a; + buf[2*n - i] = a; + } + buf[i] = I[i * is]; /* i == n, Nyquist */ + + /* r2hc transform of size 2*n */ + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, buf, buf); + } + + /* copy n+1 real numbers (real parts of hc array) from buf to O */ + { + plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy; + cldcpy->apply((plan *) cldcpy, buf, O); + } + } + + X(ifree)(buf); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld, wakefulness); + X(plan_awake)(ego->cldcpy, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cldcpy); + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(redft00e-r2hc-pad-%D%v%(%p%)%(%p%))", + ego->n + 1, ego->vl, ego->cld, ego->cldcpy); +} + +static int applicable0(const solver *ego_, const problem *p_) +{ + const problem_rdft *p = (const problem_rdft *) p_; + UNUSED(ego_); + + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk <= 1 + && p->kind[0] == REDFT00 + && p->sz->dims[0].n > 1 /* n == 1 is not well-defined */ + ); +} + +static int applicable(const solver *ego, const problem *p, const planner *plnr) +{ + return (!NO_SLOWP(plnr) && applicable0(ego, p)); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + P *pln; + const problem_rdft *p; + plan *cld = (plan *) 0, *cldcpy; + R *buf = (R *) 0; + INT n; + INT vl, ivs, ovs; + opcnt ops; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr)) + goto nada; + + p = (const problem_rdft *) p_; + + n = p->sz->dims[0].n - 1; + A(n > 0); + buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS); + + cld = X(mkplan_d)(plnr,X(mkproblem_rdft_1_d)(X(mktensor_1d)(2*n,1,1), + X(mktensor_0d)(), + buf, buf, R2HC)); + if (!cld) + goto nada; + + X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs); + cldcpy = + X(mkplan_d)(plnr, + X(mkproblem_rdft_1_d)(X(mktensor_0d)(), + X(mktensor_1d)(n+1,1, + p->sz->dims[0].os), + buf, TAINT(p->O, ovs), R2HC)); + if (!cldcpy) + goto nada; + + X(ifree)(buf); + + pln = MKPLAN_RDFT(P, &padt, apply); + + pln->n = n; + pln->is = p->sz->dims[0].is; + pln->cld = cld; + pln->cldcpy = cldcpy; + pln->vl = vl; + pln->ivs = ivs; + pln->ovs = ovs; + + X(ops_zero)(&ops); + ops.other = n + 2*n; /* loads + stores (input -> buf) */ + + X(ops_zero)(&pln->super.super.ops); + X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops); + X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); + X(ops_madd2)(pln->vl, &cldcpy->ops, &pln->super.super.ops); + + return &(pln->super.super); + + nada: + X(ifree0)(buf); + if (cld) + X(plan_destroy_internal)(cld); + return (plan *)0; +} + +/* constructor */ +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return &(slv->super); +} + +void X(redft00e_r2hc_pad_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/reodft/redft00e-r2hc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/reodft/redft00e-r2hc.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* Do a REDFT00 problem via an R2HC problem, with some pre/post-processing. + + This code uses the trick from FFTPACK, also documented in a similar + form by Numerical Recipes. Unfortunately, this algorithm seems to + have intrinsic numerical problems (similar to those in + reodft11e-r2hc.c), possibly due to the fact that it multiplies its + input by a cosine, causing a loss of precision near the zero. For + transforms of 16k points, it has already lost three or four decimal + places of accuracy, which we deem unacceptable. + + So, we have abandoned this algorithm in favor of the one in + redft00-r2hc-pad.c, which unfortunately sacrifices 30-50% in speed. + The only other alternative in the literature that does not have + similar numerical difficulties seems to be the direct adaptation of + the Cooley-Tukey decomposition for symmetric data, but this would + require a whole new set of codelets and it's not clear that it's + worth it at this point. However, we did implement the latter + algorithm for the specific case of odd n (logically adapting the + split-radix algorithm); see reodft00e-splitradix.c. */ + +#include "reodft.h" + +typedef struct { + solver super; +} S; + +typedef struct { + plan_rdft super; + plan *cld; + twid *td; + INT is, os; + INT n; + INT vl; + INT ivs, ovs; +} P; + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is, os = ego->os; + INT i, n = ego->n; + INT iv, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + R *W = ego->td->W; + R *buf; + E csum; + + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { + buf[0] = I[0] + I[is * n]; + csum = I[0] - I[is * n]; + for (i = 1; i < n - i; ++i) { + E a, b, apb, amb; + a = I[is * i]; + b = I[is * (n - i)]; + csum += W[2*i] * (amb = K(2.0)*(a - b)); + amb = W[2*i+1] * amb; + apb = (a + b); + buf[i] = apb - amb; + buf[n - i] = apb + amb; + } + if (i == n - i) { + buf[i] = K(2.0) * I[is * i]; + } + + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, buf, buf); + } + + /* FIXME: use recursive/cascade summation for better stability? */ + O[0] = buf[0]; + O[os] = csum; + for (i = 1; i + i < n; ++i) { + INT k = i + i; + O[os * k] = buf[i]; + O[os * (k + 1)] = O[os * (k - 1)] - buf[n - i]; + } + if (i + i == n) { + O[os * n] = buf[i]; + } + } + + X(ifree)(buf); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + static const tw_instr redft00e_tw[] = { + { TW_COS, 0, 1 }, + { TW_SIN, 0, 1 }, + { TW_NEXT, 1, 0 } + }; + + X(plan_awake)(ego->cld, wakefulness); + X(twiddle_awake)(wakefulness, + &ego->td, redft00e_tw, 2*ego->n, 1, (ego->n+1)/2); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(redft00e-r2hc-%D%v%(%p%))", ego->n + 1, ego->vl, ego->cld); +} + +static int applicable0(const solver *ego_, const problem *p_) +{ + const problem_rdft *p = (const problem_rdft *) p_; + UNUSED(ego_); + + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk <= 1 + && p->kind[0] == REDFT00 + && p->sz->dims[0].n > 1 /* n == 1 is not well-defined */ + ); +} + +static int applicable(const solver *ego, const problem *p, const planner *plnr) +{ + return (!NO_SLOWP(plnr) && applicable0(ego, p)); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + P *pln; + const problem_rdft *p; + plan *cld; + R *buf; + INT n; + opcnt ops; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr)) + return (plan *)0; + + p = (const problem_rdft *) p_; + + n = p->sz->dims[0].n - 1; + A(n > 0); + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1), + X(mktensor_0d)(), + buf, buf, R2HC)); + X(ifree)(buf); + if (!cld) + return (plan *)0; + + pln = MKPLAN_RDFT(P, &padt, apply); + + pln->n = n; + pln->is = p->sz->dims[0].is; + pln->os = p->sz->dims[0].os; + pln->cld = cld; + pln->td = 0; + + X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); + + X(ops_zero)(&ops); + ops.other = 8 + (n-1)/2 * 11 + (1 - n % 2) * 5; + ops.add = 2 + (n-1)/2 * 5; + ops.mul = (n-1)/2 * 3 + (1 - n % 2) * 1; + + X(ops_zero)(&pln->super.super.ops); + X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops); + X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); + + return &(pln->super.super); +} + +/* constructor */ +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return &(slv->super); +} + +void X(redft00e_r2hc_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/reodft/reodft.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/reodft/reodft.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __REODFT_H__ +#define __REODFT_H__ + +#include "ifftw.h" +#include "rdft.h" + +#define REODFT_KINDP(k) ((k) >= REDFT00 && (k) <= RODFT11) + +void X(redft00e_r2hc_register)(planner *p); +void X(redft00e_r2hc_pad_register)(planner *p); +void X(rodft00e_r2hc_register)(planner *p); +void X(rodft00e_r2hc_pad_register)(planner *p); +void X(reodft00e_splitradix_register)(planner *p); +void X(reodft010e_r2hc_register)(planner *p); +void X(reodft11e_r2hc_register)(planner *p); +void X(reodft11e_radix2_r2hc_register)(planner *p); +void X(reodft11e_r2hc_odd_register)(planner *p); + +/* configurations */ +void X(reodft_conf_standard)(planner *p); + +#endif /* __REODFT_H__ */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/reodft/reodft00e-splitradix.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/reodft/reodft00e-splitradix.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2005 Matteo Frigo + * Copyright (c) 2005 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* Do an R{E,O}DFT00 problem (of an odd length n) recursively via an + R{E,O}DFT00 problem and an RDFT problem of half the length. + + This works by "logically" expanding the array to a real-even/odd DFT of + length 2n-/+2 and then applying the split-radix algorithm. + + In this way, we can avoid having to pad to twice the length + (ala redft00-r2hc-pad), saving a factor of ~2 for n=2^m+/-1, + but don't incur the accuracy loss that the "ordinary" algorithm + sacrifices (ala redft00-r2hc.c). +*/ + +#include "reodft.h" + +typedef struct { + solver super; +} S; + +typedef struct { + plan_rdft super; + plan *clde, *cldo; + twid *td; + INT is, os; + INT n; + INT vl; + INT ivs, ovs; +} P; + +/* redft00 */ +static void apply_e(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is, os = ego->os; + INT i, j, n = ego->n + 1, n2 = (n-1)/2; + INT iv, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + R *W = ego->td->W - 2; + R *buf; + + buf = (R *) MALLOC(sizeof(R) * n2, BUFFERS); + + for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { + /* do size (n-1)/2 r2hc transform of odd-indexed elements + with stride 4, "wrapping around" end of array with even + boundary conditions */ + for (j = 0, i = 1; i < n; i += 4) + buf[j++] = I[is * i]; + for (i = 2*n-2-i; i > 0; i -= 4) + buf[j++] = I[is * i]; + { + plan_rdft *cld = (plan_rdft *) ego->cldo; + cld->apply((plan *) cld, buf, buf); + } + + /* do size (n+1)/2 redft00 of the even-indexed elements, + writing to O: */ + { + plan_rdft *cld = (plan_rdft *) ego->clde; + cld->apply((plan *) cld, I, O); + } + + /* combine the results with the twiddle factors to get output */ + { /* DC element */ + E b20 = O[0], b0 = K(2.0) * buf[0]; + O[0] = b20 + b0; + O[2*(n2*os)] = b20 - b0; + /* O[n2*os] = O[n2*os]; */ + } + for (i = 1; i < n2 - i; ++i) { + E ap, am, br, bi, wr, wi, wbr, wbi; + br = buf[i]; + bi = buf[n2 - i]; + wr = W[2*i]; + wi = W[2*i+1]; +#if FFT_SIGN == -1 + wbr = K(2.0) * (wr*br + wi*bi); + wbi = K(2.0) * (wr*bi - wi*br); +#else + wbr = K(2.0) * (wr*br - wi*bi); + wbi = K(2.0) * (wr*bi + wi*br); +#endif + ap = O[i*os]; + O[i*os] = ap + wbr; + O[(2*n2 - i)*os] = ap - wbr; + am = O[(n2 - i)*os]; +#if FFT_SIGN == -1 + O[(n2 - i)*os] = am - wbi; + O[(n2 + i)*os] = am + wbi; +#else + O[(n2 - i)*os] = am + wbi; + O[(n2 + i)*os] = am - wbi; +#endif + } + if (i == n2 - i) { /* Nyquist element */ + E ap, wbr; + wbr = K(2.0) * (W[2*i] * buf[i]); + ap = O[i*os]; + O[i*os] = ap + wbr; + O[(2*n2 - i)*os] = ap - wbr; + } + } + + X(ifree)(buf); +} + +/* rodft00 */ +static void apply_o(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is, os = ego->os; + INT i, j, n = ego->n - 1, n2 = (n+1)/2; + INT iv, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + R *W = ego->td->W - 2; + R *buf; + + buf = (R *) MALLOC(sizeof(R) * n2, BUFFERS); + + for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { + /* do size (n+1)/2 r2hc transform of even-indexed elements + with stride 4, "wrapping around" end of array with odd + boundary conditions */ + for (j = 0, i = 0; i < n; i += 4) + buf[j++] = I[is * i]; + for (i = 2*n-i; i > 0; i -= 4) + buf[j++] = -I[is * i]; + { + plan_rdft *cld = (plan_rdft *) ego->cldo; + cld->apply((plan *) cld, buf, buf); + } + + /* do size (n-1)/2 rodft00 of the odd-indexed elements, + writing to O: */ + { + plan_rdft *cld = (plan_rdft *) ego->clde; + if (I == O) { + /* can't use I+is and I, subplan would lose in-placeness */ + cld->apply((plan *) cld, I + is, I + is); + /* we could maybe avoid this copy by modifying the + twiddle loop, but currently I can't be bothered. */ + A(is >= os); + for (i = 0; i < n2-1; ++i) + O[os*i] = I[is*(i+1)]; + } + else + cld->apply((plan *) cld, I + is, O); + } + + /* combine the results with the twiddle factors to get output */ + O[(n2-1)*os] = K(2.0) * buf[0]; + for (i = 1; i < n2 - i; ++i) { + E ap, am, br, bi, wr, wi, wbr, wbi; + br = buf[i]; + bi = buf[n2 - i]; + wr = W[2*i]; + wi = W[2*i+1]; +#if FFT_SIGN == -1 + wbr = K(2.0) * (wr*br + wi*bi); + wbi = K(2.0) * (wi*br - wr*bi); +#else + wbr = K(2.0) * (wr*br - wi*bi); + wbi = K(2.0) * (wr*bi + wi*br); +#endif + ap = O[(i-1)*os]; + O[(i-1)*os] = wbi + ap; + O[(2*n2-1 - i)*os] = wbi - ap; + am = O[(n2-1 - i)*os]; +#if FFT_SIGN == -1 + O[(n2-1 - i)*os] = wbr + am; + O[(n2-1 + i)*os] = wbr - am; +#else + O[(n2-1 - i)*os] = wbr + am; + O[(n2-1 + i)*os] = wbr - am; +#endif + } + if (i == n2 - i) { /* Nyquist element */ + E ap, wbi; + wbi = K(2.0) * (W[2*i+1] * buf[i]); + ap = O[(i-1)*os]; + O[(i-1)*os] = wbi + ap; + O[(2*n2-1 - i)*os] = wbi - ap; + } + } + + X(ifree)(buf); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + static const tw_instr reodft00e_tw[] = { + { TW_COS, 1, 1 }, + { TW_SIN, 1, 1 }, + { TW_NEXT, 1, 0 } + }; + + X(plan_awake)(ego->clde, wakefulness); + X(plan_awake)(ego->cldo, wakefulness); + X(twiddle_awake)(wakefulness, &ego->td, reodft00e_tw, + 2*ego->n, 1, ego->n/4); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cldo); + X(plan_destroy_internal)(ego->clde); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + if (ego->super.apply == apply_e) + p->print(p, "(redft00e-splitradix-%D%v%(%p%)%(%p%))", + ego->n + 1, ego->vl, ego->clde, ego->cldo); + else + p->print(p, "(rodft00e-splitradix-%D%v%(%p%)%(%p%))", + ego->n - 1, ego->vl, ego->clde, ego->cldo); +} + +static int applicable0(const solver *ego_, const problem *p_) +{ + const problem_rdft *p = (const problem_rdft *) p_; + UNUSED(ego_); + + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk <= 1 + && (p->kind[0] == REDFT00 || p->kind[0] == RODFT00) + && p->sz->dims[0].n > 1 /* don't create size-0 sub-plans */ + && p->sz->dims[0].n % 2 /* odd: 4 divides "logical" DFT */ + && (p->I != p->O || p->vecsz->rnk == 0 + || p->vecsz->dims[0].is == p->vecsz->dims[0].os) + && (p->kind[0] != RODFT00 || p->I != p->O || + p->sz->dims[0].is >= p->sz->dims[0].os) /* laziness */ + ); +} + +static int applicable(const solver *ego, const problem *p, const planner *plnr) +{ + return (!NO_SLOWP(plnr) && applicable0(ego, p)); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + P *pln; + const problem_rdft *p; + plan *clde, *cldo; + R *buf; + INT n, n0; + opcnt ops; + int inplace_odd; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr)) + return (plan *)0; + + p = (const problem_rdft *) p_; + + n = (n0 = p->sz->dims[0].n) + (p->kind[0] == REDFT00 ? (INT)-1 : (INT)1); + A(n > 0 && n % 2 == 0); + buf = (R *) MALLOC(sizeof(R) * (n/2), BUFFERS); + + inplace_odd = p->kind[0]==RODFT00 && p->I == p->O; + clde = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)( + X(mktensor_1d)(n0-n/2, 2*p->sz->dims[0].is, + inplace_odd ? p->sz->dims[0].is + : p->sz->dims[0].os), + X(mktensor_0d)(), + TAINT(p->I + + p->sz->dims[0].is * (p->kind[0]==RODFT00), + p->vecsz->rnk ? p->vecsz->dims[0].is : 0), + TAINT(p->O + + p->sz->dims[0].is * inplace_odd, + p->vecsz->rnk ? p->vecsz->dims[0].os : 0), + p->kind[0])); + if (!clde) { + X(ifree)(buf); + return (plan *)0; + } + + cldo = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)( + X(mktensor_1d)(n/2, 1, 1), + X(mktensor_0d)(), + buf, buf, R2HC)); + X(ifree)(buf); + if (!cldo) + return (plan *)0; + + pln = MKPLAN_RDFT(P, &padt, p->kind[0] == REDFT00 ? apply_e : apply_o); + + pln->n = n; + pln->is = p->sz->dims[0].is; + pln->os = p->sz->dims[0].os; + pln->clde = clde; + pln->cldo = cldo; + pln->td = 0; + + X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); + + X(ops_zero)(&ops); + ops.other = n/2; + ops.add = (p->kind[0]==REDFT00 ? (INT)2 : (INT)0) + + (n/2-1)/2 * 6 + ((n/2)%2==0) * 2; + ops.mul = 1 + (n/2-1)/2 * 6 + ((n/2)%2==0) * 2; + + /* tweak ops.other so that r2hc-pad is used for small sizes, which + seems to be a lot faster on my machine: */ + ops.other += 256; + + X(ops_zero)(&pln->super.super.ops); + X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops); + X(ops_madd2)(pln->vl, &clde->ops, &pln->super.super.ops); + X(ops_madd2)(pln->vl, &cldo->ops, &pln->super.super.ops); + + return &(pln->super.super); +} + +/* constructor */ +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return &(slv->super); +} + +void X(reodft00e_splitradix_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/reodft/reodft010e-r2hc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/reodft/reodft010e-r2hc.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,410 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* Do an R{E,O}DFT{01,10} problem via an R2HC problem, with some + pre/post-processing ala FFTPACK. */ + +#include "reodft.h" + +typedef struct { + solver super; +} S; + +typedef struct { + plan_rdft super; + plan *cld; + twid *td; + INT is, os; + INT n; + INT vl; + INT ivs, ovs; + rdft_kind kind; +} P; + +/* A real-even-01 DFT operates logically on a size-4N array: + I 0 -r(I*) -I 0 r(I*), + where r denotes reversal and * denotes deletion of the 0th element. + To compute the transform of this, we imagine performing a radix-4 + (real-input) DIF step, which turns the size-4N DFT into 4 size-N + (contiguous) DFTs, two of which are zero and two of which are + conjugates. The non-redundant size-N DFT has halfcomplex input, so + we can do it with a size-N hc2r transform. (In order to share + plans with the re10 (inverse) transform, however, we use the DHT + trick to re-express the hc2r problem as r2hc. This has little cost + since we are already pre- and post-processing the data in {i,n-i} + order.) Finally, we have to write out the data in the correct + order...the two size-N redundant (conjugate) hc2r DFTs correspond + to the even and odd outputs in O (i.e. the usual interleaved output + of DIF transforms); since this data has even symmetry, we only + write the first half of it. + + The real-even-10 DFT is just the reverse of these steps, i.e. a + radix-4 DIT transform. There, however, we just use the r2hc + transform naturally without resorting to the DHT trick. + + A real-odd-01 DFT is very similar, except that the input is + 0 I (rI)* 0 -I -(rI)*. This format, however, can be transformed + into precisely the real-even-01 format above by sending I -> rI + and shifting the array by N. The former swap is just another + transformation on the input during preprocessing; the latter + multiplies the even/odd outputs by i/-i, which combines with + the factor of -i (to take the imaginary part) to simply flip + the sign of the odd outputs. Vice-versa for real-odd-10. + + The FFTPACK source code was very helpful in working this out. + (They do unnecessary passes over the array, though.) The same + algorithm is also described in: + + John Makhoul, "A fast cosine transform in one and two dimensions," + IEEE Trans. on Acoust. Speech and Sig. Proc., ASSP-28 (1), 27--34 (1980). + + Note that Numerical Recipes suggests a different algorithm that + requires more operations and uses trig. functions for both the pre- + and post-processing passes. +*/ + +static void apply_re01(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is, os = ego->os; + INT i, n = ego->n; + INT iv, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + R *W = ego->td->W; + R *buf; + + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { + buf[0] = I[0]; + for (i = 1; i < n - i; ++i) { + E a, b, apb, amb, wa, wb; + a = I[is * i]; + b = I[is * (n - i)]; + apb = a + b; + amb = a - b; + wa = W[2*i]; + wb = W[2*i + 1]; + buf[i] = wa * amb + wb * apb; + buf[n - i] = wa * apb - wb * amb; + } + if (i == n - i) { + buf[i] = K(2.0) * I[is * i] * W[2*i]; + } + + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, buf, buf); + } + + O[0] = buf[0]; + for (i = 1; i < n - i; ++i) { + E a, b; + INT k; + a = buf[i]; + b = buf[n - i]; + k = i + i; + O[os * (k - 1)] = a - b; + O[os * k] = a + b; + } + if (i == n - i) { + O[os * (n - 1)] = buf[i]; + } + } + + X(ifree)(buf); +} + +/* ro01 is same as re01, but with i <-> n - 1 - i in the input and + the sign of the odd output elements flipped. */ +static void apply_ro01(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is, os = ego->os; + INT i, n = ego->n; + INT iv, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + R *W = ego->td->W; + R *buf; + + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { + buf[0] = I[is * (n - 1)]; + for (i = 1; i < n - i; ++i) { + E a, b, apb, amb, wa, wb; + a = I[is * (n - 1 - i)]; + b = I[is * (i - 1)]; + apb = a + b; + amb = a - b; + wa = W[2*i]; + wb = W[2*i+1]; + buf[i] = wa * amb + wb * apb; + buf[n - i] = wa * apb - wb * amb; + } + if (i == n - i) { + buf[i] = K(2.0) * I[is * (i - 1)] * W[2*i]; + } + + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, buf, buf); + } + + O[0] = buf[0]; + for (i = 1; i < n - i; ++i) { + E a, b; + INT k; + a = buf[i]; + b = buf[n - i]; + k = i + i; + O[os * (k - 1)] = b - a; + O[os * k] = a + b; + } + if (i == n - i) { + O[os * (n - 1)] = -buf[i]; + } + } + + X(ifree)(buf); +} + +static void apply_re10(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is, os = ego->os; + INT i, n = ego->n; + INT iv, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + R *W = ego->td->W; + R *buf; + + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { + buf[0] = I[0]; + for (i = 1; i < n - i; ++i) { + E u, v; + INT k = i + i; + u = I[is * (k - 1)]; + v = I[is * k]; + buf[n - i] = u; + buf[i] = v; + } + if (i == n - i) { + buf[i] = I[is * (n - 1)]; + } + + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, buf, buf); + } + + O[0] = K(2.0) * buf[0]; + for (i = 1; i < n - i; ++i) { + E a, b, wa, wb; + a = K(2.0) * buf[i]; + b = K(2.0) * buf[n - i]; + wa = W[2*i]; + wb = W[2*i + 1]; + O[os * i] = wa * a + wb * b; + O[os * (n - i)] = wb * a - wa * b; + } + if (i == n - i) { + O[os * i] = K(2.0) * buf[i] * W[2*i]; + } + } + + X(ifree)(buf); +} + +/* ro10 is same as re10, but with i <-> n - 1 - i in the output and + the sign of the odd input elements flipped. */ +static void apply_ro10(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is, os = ego->os; + INT i, n = ego->n; + INT iv, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + R *W = ego->td->W; + R *buf; + + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { + buf[0] = I[0]; + for (i = 1; i < n - i; ++i) { + E u, v; + INT k = i + i; + u = -I[is * (k - 1)]; + v = I[is * k]; + buf[n - i] = u; + buf[i] = v; + } + if (i == n - i) { + buf[i] = -I[is * (n - 1)]; + } + + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, buf, buf); + } + + O[os * (n - 1)] = K(2.0) * buf[0]; + for (i = 1; i < n - i; ++i) { + E a, b, wa, wb; + a = K(2.0) * buf[i]; + b = K(2.0) * buf[n - i]; + wa = W[2*i]; + wb = W[2*i + 1]; + O[os * (n - 1 - i)] = wa * a + wb * b; + O[os * (i - 1)] = wb * a - wa * b; + } + if (i == n - i) { + O[os * (i - 1)] = K(2.0) * buf[i] * W[2*i]; + } + } + + X(ifree)(buf); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + static const tw_instr reodft010e_tw[] = { + { TW_COS, 0, 1 }, + { TW_SIN, 0, 1 }, + { TW_NEXT, 1, 0 } + }; + + X(plan_awake)(ego->cld, wakefulness); + + X(twiddle_awake)(wakefulness, &ego->td, reodft010e_tw, + 4*ego->n, 1, ego->n/2+1); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(%se-r2hc-%D%v%(%p%))", + X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld); +} + +static int applicable0(const solver *ego_, const problem *p_) +{ + const problem_rdft *p = (const problem_rdft *) p_; + UNUSED(ego_); + + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk <= 1 + && (p->kind[0] == REDFT01 || p->kind[0] == REDFT10 + || p->kind[0] == RODFT01 || p->kind[0] == RODFT10) + ); +} + +static int applicable(const solver *ego, const problem *p, const planner *plnr) +{ + return (!NO_SLOWP(plnr) && applicable0(ego, p)); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + P *pln; + const problem_rdft *p; + plan *cld; + R *buf; + INT n; + opcnt ops; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr)) + return (plan *)0; + + p = (const problem_rdft *) p_; + + n = p->sz->dims[0].n; + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1), + X(mktensor_0d)(), + buf, buf, R2HC)); + X(ifree)(buf); + if (!cld) + return (plan *)0; + + switch (p->kind[0]) { + case REDFT01: pln = MKPLAN_RDFT(P, &padt, apply_re01); break; + case REDFT10: pln = MKPLAN_RDFT(P, &padt, apply_re10); break; + case RODFT01: pln = MKPLAN_RDFT(P, &padt, apply_ro01); break; + case RODFT10: pln = MKPLAN_RDFT(P, &padt, apply_ro10); break; + default: A(0); return (plan*)0; + } + + pln->n = n; + pln->is = p->sz->dims[0].is; + pln->os = p->sz->dims[0].os; + pln->cld = cld; + pln->td = 0; + pln->kind = p->kind[0]; + + X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); + + X(ops_zero)(&ops); + ops.other = 4 + (n-1)/2 * 10 + (1 - n % 2) * 5; + if (p->kind[0] == REDFT01 || p->kind[0] == RODFT01) { + ops.add = (n-1)/2 * 6; + ops.mul = (n-1)/2 * 4 + (1 - n % 2) * 2; + } + else { /* 10 transforms */ + ops.add = (n-1)/2 * 2; + ops.mul = 1 + (n-1)/2 * 6 + (1 - n % 2) * 2; + } + + X(ops_zero)(&pln->super.super.ops); + X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops); + X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); + + return &(pln->super.super); +} + +/* constructor */ +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return &(slv->super); +} + +void X(reodft010e_r2hc_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/reodft/reodft11e-r2hc-odd.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/reodft/reodft11e-r2hc-odd.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* Do an R{E,O}DFT11 problem via an R2HC problem of the same *odd* size, + with some permutations and post-processing, as described in: + + S. C. Chan and K. L. Ho, "Fast algorithms for computing the + discrete cosine transform," IEEE Trans. Circuits Systems II: + Analog & Digital Sig. Proc. 39 (3), 185--190 (1992). + + (For even sizes, see reodft11e-radix2.c.) + + This algorithm is related to the 8 x n prime-factor-algorithm (PFA) + decomposition of the size 8n "logical" DFT corresponding to the + R{EO}DFT11. + + Aside from very confusing notation (several symbols are redefined + from one line to the next), be aware that this paper has some + errors. In particular, the signs are wrong in Eqs. (34-35). Also, + Eqs. (36-37) should be simply C(k) = C(2k + 1 mod N), and similarly + for S (or, equivalently, the second cases should have 2*N - 2*k - 1 + instead of N - k - 1). Note also that in their definition of the + DFT, similarly to FFTW's, the exponent's sign is -1, but they + forgot to correspondingly multiply S (the sine terms) by -1. +*/ + +#include "reodft.h" + +typedef struct { + solver super; +} S; + +typedef struct { + plan_rdft super; + plan *cld; + INT is, os; + INT n; + INT vl; + INT ivs, ovs; + rdft_kind kind; +} P; + +static DK(SQRT2, +1.4142135623730950488016887242096980785696718753769); + +#define SGN_SET(x, i) ((i) % 2 ? -(x) : (x)) + +static void apply_re11(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is, os = ego->os; + INT i, n = ego->n, n2 = n/2; + INT iv, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + R *buf; + + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { + { + INT m; + for (i = 0, m = n2; m < n; ++i, m += 4) + buf[i] = I[is * m]; + for (; m < 2 * n; ++i, m += 4) + buf[i] = -I[is * (2*n - m - 1)]; + for (; m < 3 * n; ++i, m += 4) + buf[i] = -I[is * (m - 2*n)]; + for (; m < 4 * n; ++i, m += 4) + buf[i] = I[is * (4*n - m - 1)]; + m -= 4 * n; + for (; i < n; ++i, m += 4) + buf[i] = I[is * m]; + } + + { /* child plan: R2HC of size n */ + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, buf, buf); + } + + /* FIXME: strength-reduce loop by 4 to eliminate ugly sgn_set? */ + for (i = 0; i + i + 1 < n2; ++i) { + INT k = i + i + 1; + E c1, s1; + E c2, s2; + c1 = buf[k]; + c2 = buf[k + 1]; + s2 = buf[n - (k + 1)]; + s1 = buf[n - k]; + + O[os * i] = SQRT2 * (SGN_SET(c1, (i+1)/2) + + SGN_SET(s1, i/2)); + O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c1, (n-i)/2) - + SGN_SET(s1, (n-(i+1))/2)); + + O[os * (n2 - (i+1))] = SQRT2 * (SGN_SET(c2, (n2-i)/2) - + SGN_SET(s2, (n2-(i+1))/2)); + O[os * (n2 + (i+1))] = SQRT2 * (SGN_SET(c2, (n2+i+2)/2) + + SGN_SET(s2, (n2+(i+1))/2)); + } + if (i + i + 1 == n2) { + E c, s; + c = buf[n2]; + s = buf[n - n2]; + O[os * i] = SQRT2 * (SGN_SET(c, (i+1)/2) + + SGN_SET(s, i/2)); + O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c, (i+2)/2) + + SGN_SET(s, (i+1)/2)); + } + O[os * n2] = SQRT2 * SGN_SET(buf[0], (n2+1)/2); + } + + X(ifree)(buf); +} + +/* like for rodft01, rodft11 is obtained from redft11 by + reversing the input and flipping the sign of every other output. */ +static void apply_ro11(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is, os = ego->os; + INT i, n = ego->n, n2 = n/2; + INT iv, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + R *buf; + + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { + { + INT m; + for (i = 0, m = n2; m < n; ++i, m += 4) + buf[i] = I[is * (n - 1 - m)]; + for (; m < 2 * n; ++i, m += 4) + buf[i] = -I[is * (m - n)]; + for (; m < 3 * n; ++i, m += 4) + buf[i] = -I[is * (3*n - 1 - m)]; + for (; m < 4 * n; ++i, m += 4) + buf[i] = I[is * (m - 3*n)]; + m -= 4 * n; + for (; i < n; ++i, m += 4) + buf[i] = I[is * (n - 1 - m)]; + } + + { /* child plan: R2HC of size n */ + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, buf, buf); + } + + /* FIXME: strength-reduce loop by 4 to eliminate ugly sgn_set? */ + for (i = 0; i + i + 1 < n2; ++i) { + INT k = i + i + 1; + INT j; + E c1, s1; + E c2, s2; + c1 = buf[k]; + c2 = buf[k + 1]; + s2 = buf[n - (k + 1)]; + s1 = buf[n - k]; + + O[os * i] = SQRT2 * (SGN_SET(c1, (i+1)/2 + i) + + SGN_SET(s1, i/2 + i)); + O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c1, (n-i)/2 + i) - + SGN_SET(s1, (n-(i+1))/2 + i)); + + j = n2 - (i+1); + O[os * j] = SQRT2 * (SGN_SET(c2, (n2-i)/2 + j) - + SGN_SET(s2, (n2-(i+1))/2 + j)); + O[os * (n2 + (i+1))] = SQRT2 * (SGN_SET(c2, (n2+i+2)/2 + j) + + SGN_SET(s2, (n2+(i+1))/2 + j)); + } + if (i + i + 1 == n2) { + E c, s; + c = buf[n2]; + s = buf[n - n2]; + O[os * i] = SQRT2 * (SGN_SET(c, (i+1)/2 + i) + + SGN_SET(s, i/2 + i)); + O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c, (i+2)/2 + i) + + SGN_SET(s, (i+1)/2 + i)); + } + O[os * n2] = SQRT2 * SGN_SET(buf[0], (n2+1)/2 + n2); + } + + X(ifree)(buf); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(%se-r2hc-odd-%D%v%(%p%))", + X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld); +} + +static int applicable0(const solver *ego_, const problem *p_) +{ + const problem_rdft *p = (const problem_rdft *) p_; + UNUSED(ego_); + + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk <= 1 + && p->sz->dims[0].n % 2 == 1 + && (p->kind[0] == REDFT11 || p->kind[0] == RODFT11) + ); +} + +static int applicable(const solver *ego, const problem *p, const planner *plnr) +{ + return (!NO_SLOWP(plnr) && applicable0(ego, p)); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + P *pln; + const problem_rdft *p; + plan *cld; + R *buf; + INT n; + opcnt ops; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr)) + return (plan *)0; + + p = (const problem_rdft *) p_; + + n = p->sz->dims[0].n; + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1), + X(mktensor_0d)(), + buf, buf, R2HC)); + X(ifree)(buf); + if (!cld) + return (plan *)0; + + pln = MKPLAN_RDFT(P, &padt, p->kind[0]==REDFT11 ? apply_re11:apply_ro11); + pln->n = n; + pln->is = p->sz->dims[0].is; + pln->os = p->sz->dims[0].os; + pln->cld = cld; + pln->kind = p->kind[0]; + + X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); + + X(ops_zero)(&ops); + ops.add = n - 1; + ops.mul = n; + ops.other = 4*n; + + X(ops_zero)(&pln->super.super.ops); + X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops); + X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); + + return &(pln->super.super); +} + +/* constructor */ +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return &(slv->super); +} + +void X(reodft11e_r2hc_odd_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/reodft/reodft11e-r2hc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/reodft/reodft11e-r2hc.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* Do an R{E,O}DFT11 problem via an R2HC problem, with some + pre/post-processing ala FFTPACK. Use a trick from: + + S. C. Chan and K. L. Ho, "Direct methods for computing discrete + sinusoidal transforms," IEE Proceedings F 137 (6), 433--442 (1990). + + to re-express as an REDFT01 (DCT-III) problem. + + NOTE: We no longer use this algorithm, because it turns out to suffer + a catastrophic loss of accuracy for certain inputs, apparently because + its post-processing multiplies the output by a cosine. Near the zero + of the cosine, the REDFT01 must produce a near-singular output. +*/ + +#include "reodft.h" + +typedef struct { + solver super; +} S; + +typedef struct { + plan_rdft super; + plan *cld; + twid *td, *td2; + INT is, os; + INT n; + INT vl; + INT ivs, ovs; + rdft_kind kind; +} P; + +static void apply_re11(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is, os = ego->os; + INT i, n = ego->n; + INT iv, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + R *W; + R *buf; + E cur; + + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { + /* I wish that this didn't require an extra pass. */ + /* FIXME: use recursive/cascade summation for better stability? */ + buf[n - 1] = cur = K(2.0) * I[is * (n - 1)]; + for (i = n - 1; i > 0; --i) { + E curnew; + buf[(i - 1)] = curnew = K(2.0) * I[is * (i - 1)] - cur; + cur = curnew; + } + + W = ego->td->W; + for (i = 1; i < n - i; ++i) { + E a, b, apb, amb, wa, wb; + a = buf[i]; + b = buf[n - i]; + apb = a + b; + amb = a - b; + wa = W[2*i]; + wb = W[2*i + 1]; + buf[i] = wa * amb + wb * apb; + buf[n - i] = wa * apb - wb * amb; + } + if (i == n - i) { + buf[i] = K(2.0) * buf[i] * W[2*i]; + } + + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, buf, buf); + } + + W = ego->td2->W; + O[0] = W[0] * buf[0]; + for (i = 1; i < n - i; ++i) { + E a, b; + INT k; + a = buf[i]; + b = buf[n - i]; + k = i + i; + O[os * (k - 1)] = W[k - 1] * (a - b); + O[os * k] = W[k] * (a + b); + } + if (i == n - i) { + O[os * (n - 1)] = W[n - 1] * buf[i]; + } + } + + X(ifree)(buf); +} + +/* like for rodft01, rodft11 is obtained from redft11 by + reversing the input and flipping the sign of every other output. */ +static void apply_ro11(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is, os = ego->os; + INT i, n = ego->n; + INT iv, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + R *W; + R *buf; + E cur; + + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { + /* I wish that this didn't require an extra pass. */ + /* FIXME: use recursive/cascade summation for better stability? */ + buf[n - 1] = cur = K(2.0) * I[0]; + for (i = n - 1; i > 0; --i) { + E curnew; + buf[(i - 1)] = curnew = K(2.0) * I[is * (n - i)] - cur; + cur = curnew; + } + + W = ego->td->W; + for (i = 1; i < n - i; ++i) { + E a, b, apb, amb, wa, wb; + a = buf[i]; + b = buf[n - i]; + apb = a + b; + amb = a - b; + wa = W[2*i]; + wb = W[2*i + 1]; + buf[i] = wa * amb + wb * apb; + buf[n - i] = wa * apb - wb * amb; + } + if (i == n - i) { + buf[i] = K(2.0) * buf[i] * W[2*i]; + } + + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, buf, buf); + } + + W = ego->td2->W; + O[0] = W[0] * buf[0]; + for (i = 1; i < n - i; ++i) { + E a, b; + INT k; + a = buf[i]; + b = buf[n - i]; + k = i + i; + O[os * (k - 1)] = W[k - 1] * (b - a); + O[os * k] = W[k] * (a + b); + } + if (i == n - i) { + O[os * (n - 1)] = -W[n - 1] * buf[i]; + } + } + + X(ifree)(buf); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + static const tw_instr reodft010e_tw[] = { + { TW_COS, 0, 1 }, + { TW_SIN, 0, 1 }, + { TW_NEXT, 1, 0 } + }; + static const tw_instr reodft11e_tw[] = { + { TW_COS, 1, 1 }, + { TW_NEXT, 2, 0 } + }; + + X(plan_awake)(ego->cld, wakefulness); + + X(twiddle_awake)(wakefulness, + &ego->td, reodft010e_tw, 4*ego->n, 1, ego->n/2+1); + X(twiddle_awake)(wakefulness, + &ego->td2, reodft11e_tw, 8*ego->n, 1, ego->n * 2); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(%se-r2hc-%D%v%(%p%))", + X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld); +} + +static int applicable0(const solver *ego_, const problem *p_) +{ + const problem_rdft *p = (const problem_rdft *) p_; + + UNUSED(ego_); + + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk <= 1 + && (p->kind[0] == REDFT11 || p->kind[0] == RODFT11) + ); +} + +static int applicable(const solver *ego, const problem *p, const planner *plnr) +{ + return (!NO_SLOWP(plnr) && applicable0(ego, p)); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + P *pln; + const problem_rdft *p; + plan *cld; + R *buf; + INT n; + opcnt ops; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr)) + return (plan *)0; + + p = (const problem_rdft *) p_; + + n = p->sz->dims[0].n; + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1), + X(mktensor_0d)(), + buf, buf, R2HC)); + X(ifree)(buf); + if (!cld) + return (plan *)0; + + pln = MKPLAN_RDFT(P, &padt, p->kind[0]==REDFT11 ? apply_re11:apply_ro11); + pln->n = n; + pln->is = p->sz->dims[0].is; + pln->os = p->sz->dims[0].os; + pln->cld = cld; + pln->td = pln->td2 = 0; + pln->kind = p->kind[0]; + + X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); + + X(ops_zero)(&ops); + ops.other = 5 + (n-1) * 2 + (n-1)/2 * 12 + (1 - n % 2) * 6; + ops.add = (n - 1) * 1 + (n-1)/2 * 6; + ops.mul = 2 + (n-1) * 1 + (n-1)/2 * 6 + (1 - n % 2) * 3; + + X(ops_zero)(&pln->super.super.ops); + X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops); + X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); + + return &(pln->super.super); +} + +/* constructor */ +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return &(slv->super); +} + +void X(reodft11e_r2hc_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/reodft/reodft11e-radix2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/reodft/reodft11e-radix2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,513 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* Do an R{E,O}DFT11 problem of *even* size by a pair of R2HC problems + of half the size, plus some pre/post-processing. Use a trick from: + + Zhongde Wang, "On computing the discrete Fourier and cosine transforms," + IEEE Trans. Acoust. Speech Sig. Proc. ASSP-33 (4), 1341--1344 (1985). + + to re-express as a pair of half-size REDFT01 (DCT-III) problems. Our + implementation looks quite a bit different from the algorithm described + in the paper because we combined the paper's pre/post-processing with + the pre/post-processing used to turn REDFT01 into R2HC. (Also, the + paper uses a DCT/DST pair, but we turn the DST into a DCT via the + usual reordering/sign-flip trick. We additionally combined a couple + of the matrices/transformations of the paper into a single pass.) + + NOTE: We originally used a simpler method by S. C. Chan and K. L. Ho + that turned out to have numerical problems; see reodft11e-r2hc.c. + + (For odd sizes, see reodft11e-r2hc-odd.c.) +*/ + +#include "reodft.h" + +typedef struct { + solver super; +} S; + +typedef struct { + plan_rdft super; + plan *cld; + twid *td, *td2; + INT is, os; + INT n; + INT vl; + INT ivs, ovs; + rdft_kind kind; +} P; + +static void apply_re11(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is, os = ego->os; + INT i, n = ego->n, n2 = n/2; + INT iv, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + R *W = ego->td->W; + R *W2; + R *buf; + + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { + buf[0] = K(2.0) * I[0]; + buf[n2] = K(2.0) * I[is * (n - 1)]; + for (i = 1; i + i < n2; ++i) { + INT k = i + i; + E a, b, a2, b2; + { + E u, v; + u = I[is * (k - 1)]; + v = I[is * k]; + a = u + v; + b2 = u - v; + } + { + E u, v; + u = I[is * (n - k - 1)]; + v = I[is * (n - k)]; + b = u + v; + a2 = u - v; + } + { + E wa, wb; + wa = W[2*i]; + wb = W[2*i + 1]; + { + E apb, amb; + apb = a + b; + amb = a - b; + buf[i] = wa * amb + wb * apb; + buf[n2 - i] = wa * apb - wb * amb; + } + { + E apb, amb; + apb = a2 + b2; + amb = a2 - b2; + buf[n2 + i] = wa * amb + wb * apb; + buf[n - i] = wa * apb - wb * amb; + } + } + } + if (i + i == n2) { + E u, v; + u = I[is * (n2 - 1)]; + v = I[is * n2]; + buf[i] = (u + v) * (W[2*i] * K(2.0)); + buf[n - i] = (u - v) * (W[2*i] * K(2.0)); + } + + + /* child plan: two r2hc's of size n/2 */ + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, buf, buf); + } + + W2 = ego->td2->W; + { /* i == 0 case */ + E wa, wb; + E a, b; + wa = W2[0]; /* cos */ + wb = W2[1]; /* sin */ + a = buf[0]; + b = buf[n2]; + O[0] = wa * a + wb * b; + O[os * (n - 1)] = wb * a - wa * b; + } + W2 += 2; + for (i = 1; i + i < n2; ++i, W2 += 2) { + INT k; + E u, v, u2, v2; + u = buf[i]; + v = buf[n2 - i]; + u2 = buf[n2 + i]; + v2 = buf[n - i]; + k = (i + i) - 1; + { + E wa, wb; + E a, b; + wa = W2[0]; /* cos */ + wb = W2[1]; /* sin */ + a = u - v; + b = v2 - u2; + O[os * k] = wa * a + wb * b; + O[os * (n - 1 - k)] = wb * a - wa * b; + } + ++k; + W2 += 2; + { + E wa, wb; + E a, b; + wa = W2[0]; /* cos */ + wb = W2[1]; /* sin */ + a = u + v; + b = u2 + v2; + O[os * k] = wa * a + wb * b; + O[os * (n - 1 - k)] = wb * a - wa * b; + } + } + if (i + i == n2) { + INT k = (i + i) - 1; + E wa, wb; + E a, b; + wa = W2[0]; /* cos */ + wb = W2[1]; /* sin */ + a = buf[i]; + b = buf[n2 + i]; + O[os * k] = wa * a - wb * b; + O[os * (n - 1 - k)] = wb * a + wa * b; + } + } + + X(ifree)(buf); +} + +#if 0 + +/* This version of apply_re11 uses REDFT01 child plans, more similar + to the original paper by Z. Wang. We keep it around for reference + (it is simpler) and because it may become more efficient if we + ever implement REDFT01 codelets. */ + +static void apply_re11(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is, os = ego->os; + INT i, n = ego->n; + INT iv, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + R *W; + R *buf; + + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { + buf[0] = K(2.0) * I[0]; + buf[n/2] = K(2.0) * I[is * (n - 1)]; + for (i = 1; i + i < n; ++i) { + INT k = i + i; + E a, b; + a = I[is * (k - 1)]; + b = I[is * k]; + buf[i] = a + b; + buf[n - i] = a - b; + } + + /* child plan: two redft01's (DCT-III) */ + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, buf, buf); + } + + W = ego->td2->W; + for (i = 0; i + 1 < n/2; ++i, W += 2) { + { + E wa, wb; + E a, b; + wa = W[0]; /* cos */ + wb = W[1]; /* sin */ + a = buf[i]; + b = buf[n/2 + i]; + O[os * i] = wa * a + wb * b; + O[os * (n - 1 - i)] = wb * a - wa * b; + } + ++i; + W += 2; + { + E wa, wb; + E a, b; + wa = W[0]; /* cos */ + wb = W[1]; /* sin */ + a = buf[i]; + b = buf[n/2 + i]; + O[os * i] = wa * a - wb * b; + O[os * (n - 1 - i)] = wb * a + wa * b; + } + } + if (i < n/2) { + E wa, wb; + E a, b; + wa = W[0]; /* cos */ + wb = W[1]; /* sin */ + a = buf[i]; + b = buf[n/2 + i]; + O[os * i] = wa * a + wb * b; + O[os * (n - 1 - i)] = wb * a - wa * b; + } + } + + X(ifree)(buf); +} + +#endif /* 0 */ + +/* like for rodft01, rodft11 is obtained from redft11 by + reversing the input and flipping the sign of every other output. */ +static void apply_ro11(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is, os = ego->os; + INT i, n = ego->n, n2 = n/2; + INT iv, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + R *W = ego->td->W; + R *W2; + R *buf; + + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { + buf[0] = K(2.0) * I[is * (n - 1)]; + buf[n2] = K(2.0) * I[0]; + for (i = 1; i + i < n2; ++i) { + INT k = i + i; + E a, b, a2, b2; + { + E u, v; + u = I[is * (n - k)]; + v = I[is * (n - 1 - k)]; + a = u + v; + b2 = u - v; + } + { + E u, v; + u = I[is * (k)]; + v = I[is * (k - 1)]; + b = u + v; + a2 = u - v; + } + { + E wa, wb; + wa = W[2*i]; + wb = W[2*i + 1]; + { + E apb, amb; + apb = a + b; + amb = a - b; + buf[i] = wa * amb + wb * apb; + buf[n2 - i] = wa * apb - wb * amb; + } + { + E apb, amb; + apb = a2 + b2; + amb = a2 - b2; + buf[n2 + i] = wa * amb + wb * apb; + buf[n - i] = wa * apb - wb * amb; + } + } + } + if (i + i == n2) { + E u, v; + u = I[is * n2]; + v = I[is * (n2 - 1)]; + buf[i] = (u + v) * (W[2*i] * K(2.0)); + buf[n - i] = (u - v) * (W[2*i] * K(2.0)); + } + + + /* child plan: two r2hc's of size n/2 */ + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, buf, buf); + } + + W2 = ego->td2->W; + { /* i == 0 case */ + E wa, wb; + E a, b; + wa = W2[0]; /* cos */ + wb = W2[1]; /* sin */ + a = buf[0]; + b = buf[n2]; + O[0] = wa * a + wb * b; + O[os * (n - 1)] = wa * b - wb * a; + } + W2 += 2; + for (i = 1; i + i < n2; ++i, W2 += 2) { + INT k; + E u, v, u2, v2; + u = buf[i]; + v = buf[n2 - i]; + u2 = buf[n2 + i]; + v2 = buf[n - i]; + k = (i + i) - 1; + { + E wa, wb; + E a, b; + wa = W2[0]; /* cos */ + wb = W2[1]; /* sin */ + a = v - u; + b = u2 - v2; + O[os * k] = wa * a + wb * b; + O[os * (n - 1 - k)] = wa * b - wb * a; + } + ++k; + W2 += 2; + { + E wa, wb; + E a, b; + wa = W2[0]; /* cos */ + wb = W2[1]; /* sin */ + a = u + v; + b = u2 + v2; + O[os * k] = wa * a + wb * b; + O[os * (n - 1 - k)] = wa * b - wb * a; + } + } + if (i + i == n2) { + INT k = (i + i) - 1; + E wa, wb; + E a, b; + wa = W2[0]; /* cos */ + wb = W2[1]; /* sin */ + a = buf[i]; + b = buf[n2 + i]; + O[os * k] = wb * b - wa * a; + O[os * (n - 1 - k)] = wa * b + wb * a; + } + } + + X(ifree)(buf); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + static const tw_instr reodft010e_tw[] = { + { TW_COS, 0, 1 }, + { TW_SIN, 0, 1 }, + { TW_NEXT, 1, 0 } + }; + static const tw_instr reodft11e_tw[] = { + { TW_COS, 1, 1 }, + { TW_SIN, 1, 1 }, + { TW_NEXT, 2, 0 } + }; + + X(plan_awake)(ego->cld, wakefulness); + + X(twiddle_awake)(wakefulness, &ego->td, reodft010e_tw, + 2*ego->n, 1, ego->n/4+1); + X(twiddle_awake)(wakefulness, &ego->td2, reodft11e_tw, + 8*ego->n, 1, ego->n); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(%se-radix2-r2hc-%D%v%(%p%))", + X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld); +} + +static int applicable0(const solver *ego_, const problem *p_) +{ + const problem_rdft *p = (const problem_rdft *) p_; + UNUSED(ego_); + + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk <= 1 + && p->sz->dims[0].n % 2 == 0 + && (p->kind[0] == REDFT11 || p->kind[0] == RODFT11) + ); +} + +static int applicable(const solver *ego, const problem *p, const planner *plnr) +{ + return (!NO_SLOWP(plnr) && applicable0(ego, p)); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + P *pln; + const problem_rdft *p; + plan *cld; + R *buf; + INT n; + opcnt ops; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr)) + return (plan *)0; + + p = (const problem_rdft *) p_; + + n = p->sz->dims[0].n; + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n/2, 1, 1), + X(mktensor_1d)(2, n/2, n/2), + buf, buf, R2HC)); + X(ifree)(buf); + if (!cld) + return (plan *)0; + + pln = MKPLAN_RDFT(P, &padt, p->kind[0]==REDFT11 ? apply_re11:apply_ro11); + pln->n = n; + pln->is = p->sz->dims[0].is; + pln->os = p->sz->dims[0].os; + pln->cld = cld; + pln->td = pln->td2 = 0; + pln->kind = p->kind[0]; + + X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); + + X(ops_zero)(&ops); + ops.add = 2 + (n/2 - 1)/2 * 20; + ops.mul = 6 + (n/2 - 1)/2 * 16; + ops.other = 4*n + 2 + (n/2 - 1)/2 * 6; + if ((n/2) % 2 == 0) { + ops.add += 4; + ops.mul += 8; + ops.other += 4; + } + + X(ops_zero)(&pln->super.super.ops); + X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops); + X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); + + return &(pln->super.super); +} + +/* constructor */ +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return &(slv->super); +} + +void X(reodft11e_radix2_r2hc_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/reodft/rodft00e-r2hc-pad.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/reodft/rodft00e-r2hc-pad.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* Do a RODFT00 problem via an R2HC problem, padded antisymmetrically to + twice the size. This is asymptotically a factor of ~2 worse than + rodft00e-r2hc.c (the algorithm used in e.g. FFTPACK and Numerical + Recipes), but we abandoned the latter after we discovered that it + has intrinsic accuracy problems. */ + +#include "reodft.h" + +typedef struct { + solver super; +} S; + +typedef struct { + plan_rdft super; + plan *cld, *cldcpy; + INT is; + INT n; + INT vl; + INT ivs, ovs; +} P; + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is; + INT i, n = ego->n; + INT iv, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + R *buf; + + buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS); + + for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { + buf[0] = K(0.0); + for (i = 1; i < n; ++i) { + R a = I[(i-1) * is]; + buf[i] = -a; + buf[2*n - i] = a; + } + buf[i] = K(0.0); /* i == n, Nyquist */ + + /* r2hc transform of size 2*n */ + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, buf, buf); + } + + /* copy n-1 real numbers (imag. parts of hc array) from buf to O */ + { + plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy; + cldcpy->apply((plan *) cldcpy, buf+2*n-1, O); + } + } + + X(ifree)(buf); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + X(plan_awake)(ego->cld, wakefulness); + X(plan_awake)(ego->cldcpy, wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cldcpy); + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(rodft00e-r2hc-pad-%D%v%(%p%)%(%p%))", + ego->n - 1, ego->vl, ego->cld, ego->cldcpy); +} + +static int applicable0(const solver *ego_, const problem *p_) +{ + const problem_rdft *p = (const problem_rdft *) p_; + UNUSED(ego_); + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk <= 1 + && p->kind[0] == RODFT00 + ); +} + +static int applicable(const solver *ego, const problem *p, const planner *plnr) +{ + return (!NO_SLOWP(plnr) && applicable0(ego, p)); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + P *pln; + const problem_rdft *p; + plan *cld = (plan *) 0, *cldcpy; + R *buf = (R *) 0; + INT n; + INT vl, ivs, ovs; + opcnt ops; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr)) + goto nada; + + p = (const problem_rdft *) p_; + + n = p->sz->dims[0].n + 1; + A(n > 0); + buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS); + + cld = X(mkplan_d)(plnr,X(mkproblem_rdft_1_d)(X(mktensor_1d)(2*n,1,1), + X(mktensor_0d)(), + buf, buf, R2HC)); + if (!cld) + goto nada; + + X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs); + cldcpy = + X(mkplan_d)(plnr, + X(mkproblem_rdft_1_d)(X(mktensor_0d)(), + X(mktensor_1d)(n-1,-1, + p->sz->dims[0].os), + buf+2*n-1,TAINT(p->O, ovs), R2HC)); + if (!cldcpy) + goto nada; + + X(ifree)(buf); + + pln = MKPLAN_RDFT(P, &padt, apply); + + pln->n = n; + pln->is = p->sz->dims[0].is; + pln->cld = cld; + pln->cldcpy = cldcpy; + pln->vl = vl; + pln->ivs = ivs; + pln->ovs = ovs; + + X(ops_zero)(&ops); + ops.other = n-1 + 2*n; /* loads + stores (input -> buf) */ + + X(ops_zero)(&pln->super.super.ops); + X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops); + X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); + X(ops_madd2)(pln->vl, &cldcpy->ops, &pln->super.super.ops); + + return &(pln->super.super); + + nada: + X(ifree0)(buf); + if (cld) + X(plan_destroy_internal)(cld); + return (plan *)0; +} + +/* constructor */ +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return &(slv->super); +} + +void X(rodft00e_r2hc_pad_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/reodft/rodft00e-r2hc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/reodft/rodft00e-r2hc.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* Do a RODFT00 problem via an R2HC problem, with some pre/post-processing. + + This code uses the trick from FFTPACK, also documented in a similar + form by Numerical Recipes. Unfortunately, this algorithm seems to + have intrinsic numerical problems (similar to those in + reodft11e-r2hc.c), possibly due to the fact that it multiplies its + input by a sine, causing a loss of precision near the zero. For + transforms of 16k points, it has already lost three or four decimal + places of accuracy, which we deem unacceptable. + + So, we have abandoned this algorithm in favor of the one in + rodft00-r2hc-pad.c, which unfortunately sacrifices 30-50% in speed. + The only other alternative in the literature that does not have + similar numerical difficulties seems to be the direct adaptation of + the Cooley-Tukey decomposition for antisymmetric data, but this + would require a whole new set of codelets and it's not clear that + it's worth it at this point. However, we did implement the latter + algorithm for the specific case of odd n (logically adapting the + split-radix algorithm); see reodft00e-splitradix.c. */ + +#include "reodft.h" + +typedef struct { + solver super; +} S; + +typedef struct { + plan_rdft super; + plan *cld; + twid *td; + INT is, os; + INT n; + INT vl; + INT ivs, ovs; +} P; + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + INT is = ego->is, os = ego->os; + INT i, n = ego->n; + INT iv, vl = ego->vl; + INT ivs = ego->ivs, ovs = ego->ovs; + R *W = ego->td->W; + R *buf; + + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { + buf[0] = 0; + for (i = 1; i < n - i; ++i) { + E a, b, apb, amb; + a = I[is * (i - 1)]; + b = I[is * ((n - i) - 1)]; + apb = K(2.0) * W[i] * (a + b); + amb = (a - b); + buf[i] = apb + amb; + buf[n - i] = apb - amb; + } + if (i == n - i) { + buf[i] = K(4.0) * I[is * (i - 1)]; + } + + { + plan_rdft *cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, buf, buf); + } + + /* FIXME: use recursive/cascade summation for better stability? */ + O[0] = buf[0] * 0.5; + for (i = 1; i + i < n - 1; ++i) { + INT k = i + i; + O[os * (k - 1)] = -buf[n - i]; + O[os * k] = O[os * (k - 2)] + buf[i]; + } + if (i + i == n - 1) { + O[os * (n - 2)] = -buf[n - i]; + } + } + + X(ifree)(buf); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + static const tw_instr rodft00e_tw[] = { + { TW_SIN, 0, 1 }, + { TW_NEXT, 1, 0 } + }; + + X(plan_awake)(ego->cld, wakefulness); + + X(twiddle_awake)(wakefulness, + &ego->td, rodft00e_tw, 2*ego->n, 1, (ego->n+1)/2); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + X(plan_destroy_internal)(ego->cld); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + p->print(p, "(rodft00e-r2hc-%D%v%(%p%))", ego->n - 1, ego->vl, ego->cld); +} + +static int applicable0(const solver *ego_, const problem *p_) +{ + const problem_rdft *p = (const problem_rdft *) p_; + UNUSED(ego_); + + return (1 + && p->sz->rnk == 1 + && p->vecsz->rnk <= 1 + && p->kind[0] == RODFT00 + ); +} + +static int applicable(const solver *ego, const problem *p, const planner *plnr) +{ + return (!NO_SLOWP(plnr) && applicable0(ego, p)); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + P *pln; + const problem_rdft *p; + plan *cld; + R *buf; + INT n; + opcnt ops; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr)) + return (plan *)0; + + p = (const problem_rdft *) p_; + + n = p->sz->dims[0].n + 1; + buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); + + cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1), + X(mktensor_0d)(), + buf, buf, R2HC)); + X(ifree)(buf); + if (!cld) + return (plan *)0; + + pln = MKPLAN_RDFT(P, &padt, apply); + + pln->n = n; + pln->is = p->sz->dims[0].is; + pln->os = p->sz->dims[0].os; + pln->cld = cld; + pln->td = 0; + + X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); + + X(ops_zero)(&ops); + ops.other = 4 + (n-1)/2 * 5 + (n-2)/2 * 5; + ops.add = (n-1)/2 * 4 + (n-2)/2 * 1; + ops.mul = 1 + (n-1)/2 * 2; + if (n % 2 == 0) + ops.mul += 1; + + X(ops_zero)(&pln->super.super.ops); + X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops); + X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); + + return &(pln->super.super); +} + +/* constructor */ +static solver *mksolver(void) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + return &(slv->super); +} + +void X(rodft00e_r2hc_register)(planner *p) +{ + REGISTER_SOLVER(p, mksolver()); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/simd-support/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/simd-support/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,12 @@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel +noinst_LTLIBRARIES = libsimd_support.la libsimd_sse2_nonportable.la + +libsimd_support_la_SOURCES = taint.c simd-common.h simd-sse2.h sse2.c \ +x86-cpuid.h amd64-cpuid.h avx.c simd-avx.h altivec.c simd-altivec.h \ +neon.c simd-neon.h + +# sse2-nonportable.c needs SSE2_CFLAGS, but Automake does not support +# per-object CFLAGS. Thus we build a separate library. +libsimd_sse2_nonportable_la_CFLAGS = $(SSE2_CFLAGS) +libsimd_sse2_nonportable_la_SOURCES = sse2-nonportable.c + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/simd-support/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/simd-support/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,643 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = simd-support +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libsimd_sse2_nonportable_la_LIBADD = +am_libsimd_sse2_nonportable_la_OBJECTS = \ + libsimd_sse2_nonportable_la-sse2-nonportable.lo +libsimd_sse2_nonportable_la_OBJECTS = \ + $(am_libsimd_sse2_nonportable_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libsimd_sse2_nonportable_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libsimd_sse2_nonportable_la_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +libsimd_support_la_LIBADD = +am_libsimd_support_la_OBJECTS = taint.lo sse2.lo avx.lo altivec.lo \ + neon.lo +libsimd_support_la_OBJECTS = $(am_libsimd_support_la_OBJECTS) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libsimd_sse2_nonportable_la_SOURCES) \ + $(libsimd_support_la_SOURCES) +DIST_SOURCES = $(libsimd_sse2_nonportable_la_SOURCES) \ + $(libsimd_support_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel +noinst_LTLIBRARIES = libsimd_support.la libsimd_sse2_nonportable.la +libsimd_support_la_SOURCES = taint.c simd-common.h simd-sse2.h sse2.c \ +x86-cpuid.h amd64-cpuid.h avx.c simd-avx.h altivec.c simd-altivec.h \ +neon.c simd-neon.h + + +# sse2-nonportable.c needs SSE2_CFLAGS, but Automake does not support +# per-object CFLAGS. Thus we build a separate library. +libsimd_sse2_nonportable_la_CFLAGS = $(SSE2_CFLAGS) +libsimd_sse2_nonportable_la_SOURCES = sse2-nonportable.c +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu simd-support/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu simd-support/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libsimd_sse2_nonportable.la: $(libsimd_sse2_nonportable_la_OBJECTS) $(libsimd_sse2_nonportable_la_DEPENDENCIES) $(EXTRA_libsimd_sse2_nonportable_la_DEPENDENCIES) + $(AM_V_CCLD)$(libsimd_sse2_nonportable_la_LINK) $(libsimd_sse2_nonportable_la_OBJECTS) $(libsimd_sse2_nonportable_la_LIBADD) $(LIBS) + +libsimd_support.la: $(libsimd_support_la_OBJECTS) $(libsimd_support_la_DEPENDENCIES) $(EXTRA_libsimd_support_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(libsimd_support_la_OBJECTS) $(libsimd_support_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/altivec.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/avx.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libsimd_sse2_nonportable_la-sse2-nonportable.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/neon.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sse2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/taint.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libsimd_sse2_nonportable_la-sse2-nonportable.lo: sse2-nonportable.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsimd_sse2_nonportable_la_CFLAGS) $(CFLAGS) -MT libsimd_sse2_nonportable_la-sse2-nonportable.lo -MD -MP -MF $(DEPDIR)/libsimd_sse2_nonportable_la-sse2-nonportable.Tpo -c -o libsimd_sse2_nonportable_la-sse2-nonportable.lo `test -f 'sse2-nonportable.c' || echo '$(srcdir)/'`sse2-nonportable.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libsimd_sse2_nonportable_la-sse2-nonportable.Tpo $(DEPDIR)/libsimd_sse2_nonportable_la-sse2-nonportable.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sse2-nonportable.c' object='libsimd_sse2_nonportable_la-sse2-nonportable.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsimd_sse2_nonportable_la_CFLAGS) $(CFLAGS) -c -o libsimd_sse2_nonportable_la-sse2-nonportable.lo `test -f 'sse2-nonportable.c' || echo '$(srcdir)/'`sse2-nonportable.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/simd-support/altivec.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/simd-support/altivec.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +#if HAVE_ALTIVEC + +#if HAVE_SYS_SYSCTL_H +# include +#endif + +#if HAVE_SYS_SYSCTL_H && HAVE_SYSCTL && defined(CTL_HW) && defined(HW_VECTORUNIT) +/* code for darwin */ +static int really_have_altivec(void) +{ + int mib[2], altivecp; + size_t len; + mib[0] = CTL_HW; + mib[1] = HW_VECTORUNIT; + len = sizeof(altivecp); + sysctl(mib, 2, &altivecp, &len, NULL, 0); + return altivecp; +} +#else /* GNU/Linux and other non-Darwin systems (!HAVE_SYS_SYSCTL_H etc.) */ + +#include +#include + +static jmp_buf jb; + +static void sighandler(int x) +{ + longjmp(jb, 1); +} + +static int really_have_altivec(void) +{ + void (*oldsig)(int); + oldsig = signal(SIGILL, sighandler); + if (setjmp(jb)) { + signal(SIGILL, oldsig); + return 0; + } else { + __asm__ __volatile__ (".long 0x10000484"); /* vor 0,0,0 */ + signal(SIGILL, oldsig); + return 1; + } + return 0; +} +#endif + +int X(have_simd_altivec)(void) +{ + static int init = 0, res; + if (!init) { + res = really_have_altivec(); + init = 1; + } + return res; +} + +#endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/simd-support/amd64-cpuid.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/simd-support/amd64-cpuid.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#ifdef _MSC_VER +#ifndef inline +#define inline __inline +#endif +#endif + +#ifdef _MSC_VER +#include +#if (_MSC_VER >= 1600) && !defined(__INTEL_COMPILER) +#include +#endif +#endif + +static inline int cpuid_ecx(int op) +{ +# ifdef _MSC_VER +# ifdef __INTEL_COMPILER + int result; + _asm { + push rbx + mov eax,op + cpuid + mov result,ecx + pop rbx + } + return result; +# else + int cpu_info[4]; + __cpuid(cpu_info,op); + return cpu_info[2]; +# endif +# else + int eax, ecx, edx; + + __asm__("pushq %%rbx\n\tcpuid\n\tpopq %%rbx" + : "=a" (eax), "=c" (ecx), "=d" (edx) + : "a" (op)); + return ecx; +# endif +} + +static inline int xgetbv_eax(int op) +{ +# ifdef _MSC_VER +# ifdef __INTEL_COMPILER + int veax, vedx; + _asm { + mov ecx,op + xgetbv + mov veax,eax + mov vedx,edx + } + return veax; +# else +# if defined(_MSC_VER) && (_MSC_VER >= 1600) + unsigned __int64 result; + result = _xgetbv(op); + return (int)result; +# else +# error "Need at least Visual Studio 10 SP1 for AVX support" +# endif +# endif +# else + int eax, edx; + __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (op)); + return eax; +#endif +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/simd-support/avx.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/simd-support/avx.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +#if HAVE_AVX + +#if defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) + +#include "amd64-cpuid.h" + +int X(have_simd_avx)(void) +{ + static int init = 0, res; + + if (!init) { + res = 1 + && ((cpuid_ecx(1) & 0x18000000) == 0x18000000) + && ((xgetbv_eax(0) & 0x6) == 0x6); + init = 1; + } + return res; +} + +#else /* 32-bit code */ + +#include "x86-cpuid.h" + +int X(have_simd_avx)(void) +{ + static int init = 0, res; + + if (!init) { + res = !is_386() + && has_cpuid() + && ((cpuid_ecx(1) & 0x18000000) == 0x18000000) + && ((xgetbv_eax(0) & 0x6) == 0x6); + init = 1; + } + return res; +} +#endif + +#endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/simd-support/neon.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/simd-support/neon.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +#if HAVE_NEON + +/* check for an environment where signals are known to work */ +#if defined(unix) || defined(linux) + # include + # include + + static jmp_buf jb; + + static void sighandler(int x) + { + UNUSED(x); + longjmp(jb, 1); + } + + static int really_have_neon(void) + { + void (*oldsig)(int); + oldsig = signal(SIGILL, sighandler); + if (setjmp(jb)) { + signal(SIGILL, oldsig); + return 0; + } else { + /* paranoia: encode the instruction in binary because the + assembler may not recognize it without -mfpu=neon */ + /*asm volatile ("vand q0, q0, q0");*/ + asm volatile (".long 0xf2000150"); + signal(SIGILL, oldsig); + return 1; + } + } + + extern void X(check_alignment_of_sse2_pm)(void); + + int X(have_simd_neon)(void) + { + static int init = 0, res; + + if (!init) { + res = really_have_neon(); + init = 1; + } + return res; + } + + +#else +/* don't know how to autodetect NEON; assume it is present */ + int X(have_simd_neon)(void) + { + return 1; + } +#endif + +#endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/simd-support/simd-altivec.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/simd-support/simd-altivec.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef FFTW_SINGLE +#error "ALTIVEC only works in single precision" +#endif + +/* define these unconditionally, because they are used by + taint.c which is compiled without altivec */ +#define SIMD_SUFFIX _altivec /* for renaming */ +#define VL 2 /* SIMD complex vector length */ +#define SIMD_VSTRIDE_OKA(x) ((x) == 2) +#define SIMD_STRIDE_OKPAIR SIMD_STRIDE_OKA + +#if !defined(__VEC__) && !defined(FAKE__VEC__) +# error "compiling simd-altivec.h requires -maltivec or equivalent" +#endif + +#ifdef HAVE_ALTIVEC_H +# include +#endif + +typedef vector float V; +#define VLIT(x0, x1, x2, x3) {x0, x1, x2, x3} +#define LDK(x) x +#define DVK(var, val) const V var = VLIT(val, val, val, val) + +static inline V VADD(V a, V b) { return vec_add(a, b); } +static inline V VSUB(V a, V b) { return vec_sub(a, b); } +static inline V VFMA(V a, V b, V c) { return vec_madd(a, b, c); } +static inline V VFNMS(V a, V b, V c) { return vec_nmsub(a, b, c); } + +static inline V VMUL(V a, V b) +{ + DVK(zero, -0.0); + return VFMA(a, b, zero); +} + +static inline V VFMS(V a, V b, V c) { return VSUB(VMUL(a, b), c); } + +static inline V LDA(const R *x, INT ivs, const R *aligned_like) +{ + UNUSED(ivs); + UNUSED(aligned_like); + return vec_ld(0, x); +} + +static inline V LD(const R *x, INT ivs, const R *aligned_like) +{ + /* common subexpressions */ + const INT fivs = sizeof(R) * ivs; + /* you are not expected to understand this: */ + const vector unsigned int perm = VLIT(0, 0, 0xFFFFFFFF, 0xFFFFFFFF); + vector unsigned char ml = vec_lvsr(fivs + 8, aligned_like); + vector unsigned char mh = vec_lvsl(0, aligned_like); + vector unsigned char msk = + (vector unsigned char)vec_sel((V)mh, (V)ml, perm); + /* end of common subexpressions */ + + return vec_perm(vec_ld(0, x), vec_ld(fivs, x), msk); +} + +/* store lower half */ +static inline void STH(R *x, V v, R *aligned_like) +{ + v = vec_perm(v, v, vec_lvsr(0, aligned_like)); + vec_ste(v, 0, x); + vec_ste(v, sizeof(R), x); +} + +static inline void STL(R *x, V v, INT ovs, R *aligned_like) +{ + const INT fovs = sizeof(R) * ovs; + v = vec_perm(v, v, vec_lvsr(fovs + 8, aligned_like)); + vec_ste(v, fovs, x); + vec_ste(v, sizeof(R) + fovs, x); +} + +static inline void STA(R *x, V v, INT ovs, R *aligned_like) +{ + UNUSED(ovs); + UNUSED(aligned_like); + vec_st(v, 0, x); +} + +static inline void ST(R *x, V v, INT ovs, R *aligned_like) +{ + /* WARNING: the extra_iter hack depends upon STH occurring after + STL */ + STL(x, v, ovs, aligned_like); + STH(x, v, aligned_like); +} + +#define STM2(x, v, ovs, aligned_like) /* no-op */ + +static inline void STN2(R *x, V v0, V v1, INT ovs) +{ + const INT fovs = sizeof(R) * ovs; + const vector unsigned int even = + VLIT(0x00010203, 0x04050607, 0x10111213, 0x14151617); + const vector unsigned int odd = + VLIT(0x08090a0b, 0x0c0d0e0f, 0x18191a1b, 0x1c1d1e1f); + vec_st(vec_perm(v0, v1, (vector unsigned char)even), 0, x); + vec_st(vec_perm(v0, v1, (vector unsigned char)odd), fovs, x); +} + +#define STM4(x, v, ovs, aligned_like) /* no-op */ + +static inline void STN4(R *x, V v0, V v1, V v2, V v3, INT ovs) +{ + const INT fovs = sizeof(R) * ovs; + V x0 = vec_mergeh(v0, v2); + V x1 = vec_mergel(v0, v2); + V x2 = vec_mergeh(v1, v3); + V x3 = vec_mergel(v1, v3); + V y0 = vec_mergeh(x0, x2); + V y1 = vec_mergel(x0, x2); + V y2 = vec_mergeh(x1, x3); + V y3 = vec_mergel(x1, x3); + vec_st(y0, 0, x); + vec_st(y1, fovs, x); + vec_st(y2, 2 * fovs, x); + vec_st(y3, 3 * fovs, x); +} + +static inline V FLIP_RI(V x) +{ + const vector unsigned int perm = + VLIT(0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b); + return vec_perm(x, x, (vector unsigned char)perm); +} + +static inline V VCONJ(V x) +{ + const V pmpm = VLIT(0.0, -0.0, 0.0, -0.0); + return vec_xor(x, pmpm); +} + +static inline V VBYI(V x) +{ + return FLIP_RI(VCONJ(x)); +} + +static inline V VFMAI(V b, V c) +{ + const V mpmp = VLIT(-1.0, 1.0, -1.0, 1.0); + return VFMA(FLIP_RI(b), mpmp, c); +} + +static inline V VFNMSI(V b, V c) +{ + const V mpmp = VLIT(-1.0, 1.0, -1.0, 1.0); + return VFNMS(FLIP_RI(b), mpmp, c); +} + +static inline V VFMACONJ(V b, V c) +{ + const V pmpm = VLIT(1.0, -1.0, 1.0, -1.0); + return VFMA(b, pmpm, c); +} + +static inline V VFNMSCONJ(V b, V c) +{ + const V pmpm = VLIT(1.0, -1.0, 1.0, -1.0); + return VFNMS(b, pmpm, c); +} + +static inline V VFMSCONJ(V b, V c) +{ + return VSUB(VCONJ(b), c); +} + +static inline V VZMUL(V tx, V sr) +{ + const vector unsigned int real = + VLIT(0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b); + const vector unsigned int imag = + VLIT(0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f); + V si = VBYI(sr); + V tr = vec_perm(tx, tx, (vector unsigned char)real); + V ti = vec_perm(tx, tx, (vector unsigned char)imag); + return VFMA(ti, si, VMUL(tr, sr)); +} + +static inline V VZMULJ(V tx, V sr) +{ + const vector unsigned int real = + VLIT(0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b); + const vector unsigned int imag = + VLIT(0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f); + V si = VBYI(sr); + V tr = vec_perm(tx, tx, (vector unsigned char)real); + V ti = vec_perm(tx, tx, (vector unsigned char)imag); + return VFNMS(ti, si, VMUL(tr, sr)); +} + +static inline V VZMULI(V tx, V si) +{ + const vector unsigned int real = + VLIT(0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b); + const vector unsigned int imag = + VLIT(0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f); + V sr = VBYI(si); + V tr = vec_perm(tx, tx, (vector unsigned char)real); + V ti = vec_perm(tx, tx, (vector unsigned char)imag); + return VFNMS(ti, si, VMUL(tr, sr)); +} + +static inline V VZMULIJ(V tx, V si) +{ + const vector unsigned int real = + VLIT(0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b); + const vector unsigned int imag = + VLIT(0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f); + V sr = VBYI(si); + V tr = vec_perm(tx, tx, (vector unsigned char)real); + V ti = vec_perm(tx, tx, (vector unsigned char)imag); + return VFMA(ti, si, VMUL(tr, sr)); +} + +/* twiddle storage #1: compact, slower */ +#define VTW1(v,x) \ + {TW_COS, v, x}, {TW_COS, v+1, x}, {TW_SIN, v, x}, {TW_SIN, v+1, x} +#define TWVL1 (VL) + +static inline V BYTW1(const R *t, V sr) +{ + const V *twp = (const V *)t; + V si = VBYI(sr); + V tx = twp[0]; + V tr = vec_mergeh(tx, tx); + V ti = vec_mergel(tx, tx); + return VFMA(ti, si, VMUL(tr, sr)); +} + +static inline V BYTWJ1(const R *t, V sr) +{ + const V *twp = (const V *)t; + V si = VBYI(sr); + V tx = twp[0]; + V tr = vec_mergeh(tx, tx); + V ti = vec_mergel(tx, tx); + return VFNMS(ti, si, VMUL(tr, sr)); +} + +/* twiddle storage #2: twice the space, faster (when in cache) */ +#define VTW2(v,x) \ + {TW_COS, v, x}, {TW_COS, v, x}, {TW_COS, v+1, x}, {TW_COS, v+1, x}, \ + {TW_SIN, v, -x}, {TW_SIN, v, x}, {TW_SIN, v+1, -x}, {TW_SIN, v+1, x} +#define TWVL2 (2 * VL) + +static inline V BYTW2(const R *t, V sr) +{ + const V *twp = (const V *)t; + V si = FLIP_RI(sr); + V tr = twp[0], ti = twp[1]; + return VFMA(ti, si, VMUL(tr, sr)); +} + +static inline V BYTWJ2(const R *t, V sr) +{ + const V *twp = (const V *)t; + V si = FLIP_RI(sr); + V tr = twp[0], ti = twp[1]; + return VFNMS(ti, si, VMUL(tr, sr)); +} + +/* twiddle storage #3 */ +#define VTW3(v,x) {TW_CEXP, v, x}, {TW_CEXP, v+1, x} +#define TWVL3 (VL) + +/* twiddle storage for split arrays */ +#define VTWS(v,x) \ + {TW_COS, v, x}, {TW_COS, v+1, x}, {TW_COS, v+2, x}, {TW_COS, v+3, x}, \ + {TW_SIN, v, x}, {TW_SIN, v+1, x}, {TW_SIN, v+2, x}, {TW_SIN, v+3, x} +#define TWVLS (2 * VL) + +#define VLEAVE() /* nothing */ + +#include "simd-common.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/simd-support/simd-avx.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/simd-support/simd-avx.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#if defined(FFTW_LDOUBLE) || defined(FFTW_QUAD) +#error "AVX only works in single or double precision" +#endif + +#ifdef FFTW_SINGLE +# define DS(d,s) s /* single-precision option */ +# define SUFF(name) name ## s +#else +# define DS(d,s) d /* double-precision option */ +# define SUFF(name) name ## d +#endif + +#define SIMD_SUFFIX _avx /* for renaming */ +#define VL DS(2, 4) /* SIMD complex vector length */ +#define SIMD_VSTRIDE_OKA(x) ((x) == 2) +#define SIMD_STRIDE_OKPAIR SIMD_STRIDE_OK + +#if defined(__GNUC__) && !defined(__AVX__) /* sanity check */ +#error "compiling simd-avx.h without -mavx" +#endif + +#ifdef _MSC_VER +#ifndef inline +#define inline __inline +#endif +#endif + +#include + +typedef DS(__m256d, __m256) V; +#define VADD SUFF(_mm256_add_p) +#define VSUB SUFF(_mm256_sub_p) +#define VMUL SUFF(_mm256_mul_p) +#define VXOR SUFF(_mm256_xor_p) +#define VSHUF SUFF(_mm256_shuffle_p) + +#define SHUFVALD(fp0,fp1) \ + (((fp1) << 3) | ((fp0) << 2) | ((fp1) << 1) | ((fp0))) +#define SHUFVALS(fp0,fp1,fp2,fp3) \ + (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) + +#define VDUPL(x) DS(_mm256_unpacklo_pd(x, x), VSHUF(x, x, SHUFVALS(0, 0, 2, 2))) +#define VDUPH(x) DS(_mm256_unpackhi_pd(x, x), VSHUF(x, x, SHUFVALS(1, 1, 3, 3))) + +#define VLIT(x0, x1) DS(_mm256_set_pd(x0, x1, x0, x1), _mm256_set_ps(x0, x1, x0, x1, x0, x1, x0, x1)) +#define DVK(var, val) V var = VLIT(val, val) +#define LDK(x) x + +static inline V LDA(const R *x, INT ivs, const R *aligned_like) +{ + (void)aligned_like; /* UNUSED */ + (void)ivs; /* UNUSED */ + return SUFF(_mm256_loadu_p)(x); +} + +static inline void STA(R *x, V v, INT ovs, const R *aligned_like) +{ + (void)aligned_like; /* UNUSED */ + (void)ovs; /* UNUSED */ + SUFF(_mm256_storeu_p)(x, v); +} + +#if FFTW_SINGLE + +#define LOADH(addr, val) _mm_loadh_pi(val, (const __m64 *)(addr)) +#define LOADL(addr, val) _mm_loadl_pi(val, (const __m64 *)(addr)) +#define STOREH(addr, val) _mm_storeh_pi((__m64 *)(addr), val) +#define STOREL(addr, val) _mm_storel_pi((__m64 *)(addr), val) + +/* it seems like the only AVX way to store 4 complex floats is to + extract two pairs of complex floats into two __m128 registers, and + then use SSE-like half-stores. Similarly, to load 4 complex + floats, we load two pairs of complex floats into two __m128 + registers, and then pack the two __m128 registers into one __m256 + value. */ +static inline V LD(const R *x, INT ivs, const R *aligned_like) +{ + __m128 l, h; + V v; + (void)aligned_like; /* UNUSED */ + l = LOADL(x, l); + l = LOADH(x + ivs, l); + h = LOADL(x + 2*ivs, h); + h = LOADH(x + 3*ivs, h); + v = _mm256_castps128_ps256(l); + v = _mm256_insertf128_ps(v, h, 1); + return v; +} + +static inline void ST(R *x, V v, INT ovs, const R *aligned_like) +{ + __m128 h = _mm256_extractf128_ps(v, 1); + __m128 l = _mm256_castps256_ps128(v); + (void)aligned_like; /* UNUSED */ + /* WARNING: the extra_iter hack depends upon STOREL occurring + after STOREH */ + STOREH(x + 3*ovs, h); + STOREL(x + 2*ovs, h); + STOREH(x + ovs, l); + STOREL(x, l); +} + +#define STM2(x, v, ovs, aligned_like) /* no-op */ +static inline void STN2(R *x, V v0, V v1, INT ovs) +{ + V x0 = VSHUF(v0, v1, SHUFVALS(0, 1, 0, 1)); + V x1 = VSHUF(v0, v1, SHUFVALS(2, 3, 2, 3)); + __m128 h0 = _mm256_extractf128_ps(x0, 1); + __m128 l0 = _mm256_castps256_ps128(x0); + __m128 h1 = _mm256_extractf128_ps(x1, 1); + __m128 l1 = _mm256_castps256_ps128(x1); + *(__m128 *)(x + 3*ovs) = h1; + *(__m128 *)(x + 2*ovs) = h0; + *(__m128 *)(x + 1*ovs) = l1; + *(__m128 *)(x + 0*ovs) = l0; +} + +#define STM4(x, v, ovs, aligned_like) /* no-op */ +#define STN4(x, v0, v1, v2, v3, ovs) \ +{ \ + V xxx0, xxx1, xxx2, xxx3; \ + V yyy0, yyy1, yyy2, yyy3; \ + xxx0 = _mm256_unpacklo_ps(v0, v2); \ + xxx1 = _mm256_unpackhi_ps(v0, v2); \ + xxx2 = _mm256_unpacklo_ps(v1, v3); \ + xxx3 = _mm256_unpackhi_ps(v1, v3); \ + yyy0 = _mm256_unpacklo_ps(xxx0, xxx2); \ + yyy1 = _mm256_unpackhi_ps(xxx0, xxx2); \ + yyy2 = _mm256_unpacklo_ps(xxx1, xxx3); \ + yyy3 = _mm256_unpackhi_ps(xxx1, xxx3); \ + *(__m128 *)(x + 0 * ovs) = _mm256_castps256_ps128(yyy0); \ + *(__m128 *)(x + 4 * ovs) = _mm256_extractf128_ps(yyy0, 1); \ + *(__m128 *)(x + 1 * ovs) = _mm256_castps256_ps128(yyy1); \ + *(__m128 *)(x + 5 * ovs) = _mm256_extractf128_ps(yyy1, 1); \ + *(__m128 *)(x + 2 * ovs) = _mm256_castps256_ps128(yyy2); \ + *(__m128 *)(x + 6 * ovs) = _mm256_extractf128_ps(yyy2, 1); \ + *(__m128 *)(x + 3 * ovs) = _mm256_castps256_ps128(yyy3); \ + *(__m128 *)(x + 7 * ovs) = _mm256_extractf128_ps(yyy3, 1); \ +} + +#else +static inline __m128d VMOVAPD_LD(const R *x) +{ + /* gcc-4.6 miscompiles the combination _mm256_castpd128_pd256(VMOVAPD_LD(x)) + into a 256-bit vmovapd, which requires 32-byte aligment instead of + 16-byte alignment. + + Force the use of vmovapd via asm until compilers stabilize. + */ +#if defined(__GNUC__) + __m128d var; + __asm__("vmovapd %1, %0\n" : "=x"(var) : "m"(x[0])); + return var; +#else + return *(const __m128d *)x; +#endif +} + +static inline V LD(const R *x, INT ivs, const R *aligned_like) +{ + V var; + (void)aligned_like; /* UNUSED */ + var = _mm256_castpd128_pd256(VMOVAPD_LD(x)); + var = _mm256_insertf128_pd(var, *(const __m128d *)(x+ivs), 1); + return var; +} + +static inline void ST(R *x, V v, INT ovs, const R *aligned_like) +{ + (void)aligned_like; /* UNUSED */ + /* WARNING: the extra_iter hack depends upon the store of the low + part occurring after the store of the high part */ + *(__m128d *)(x + ovs) = _mm256_extractf128_pd(v, 1); + *(__m128d *)x = _mm256_castpd256_pd128(v); +} + + +#define STM2 ST +#define STN2(x, v0, v1, ovs) /* nop */ +#define STM4(x, v, ovs, aligned_like) /* no-op */ + +/* STN4 is a macro, not a function, thanks to Visual C++ developers + deciding "it would be infrequent that people would want to pass more + than 3 [__m128 parameters] by value." Even though the comment + was made about __m128 parameters, it appears to apply to __m256 + parameters as well. */ +#define STN4(x, v0, v1, v2, v3, ovs) \ +{ \ + V xxx0, xxx1, xxx2, xxx3; \ + xxx0 = _mm256_unpacklo_pd(v0, v1); \ + xxx1 = _mm256_unpackhi_pd(v0, v1); \ + xxx2 = _mm256_unpacklo_pd(v2, v3); \ + xxx3 = _mm256_unpackhi_pd(v2, v3); \ + STA(x, _mm256_permute2f128_pd(xxx0, xxx2, 0x20), 0, 0); \ + STA(x + ovs, _mm256_permute2f128_pd(xxx1, xxx3, 0x20), 0, 0); \ + STA(x + 2 * ovs, _mm256_permute2f128_pd(xxx0, xxx2, 0x31), 0, 0); \ + STA(x + 3 * ovs, _mm256_permute2f128_pd(xxx1, xxx3, 0x31), 0, 0); \ +} +#endif + +static inline V FLIP_RI(V x) +{ + return VSHUF(x, x, + DS(SHUFVALD(1, 0), + SHUFVALS(1, 0, 3, 2))); +} + +static inline V VCONJ(V x) +{ + V pmpm = VLIT(-0.0, 0.0); + return VXOR(pmpm, x); +} + +static inline V VBYI(V x) +{ + return FLIP_RI(VCONJ(x)); +} + +/* FMA support */ +#define VFMA(a, b, c) VADD(c, VMUL(a, b)) +#define VFNMS(a, b, c) VSUB(c, VMUL(a, b)) +#define VFMS(a, b, c) VSUB(VMUL(a, b), c) +#define VFMAI(b, c) VADD(c, VBYI(b)) +#define VFNMSI(b, c) VSUB(c, VBYI(b)) +#define VFMACONJ(b,c) VADD(VCONJ(b),c) +#define VFMSCONJ(b,c) VSUB(VCONJ(b),c) +#define VFNMSCONJ(b,c) VSUB(c, VCONJ(b)) + +static inline V VZMUL(V tx, V sr) +{ + V tr = VDUPL(tx); + V ti = VDUPH(tx); + tr = VMUL(sr, tr); + sr = VBYI(sr); + return VFMA(ti, sr, tr); +} + +static inline V VZMULJ(V tx, V sr) +{ + V tr = VDUPL(tx); + V ti = VDUPH(tx); + tr = VMUL(sr, tr); + sr = VBYI(sr); + return VFNMS(ti, sr, tr); +} + +static inline V VZMULI(V tx, V sr) +{ + V tr = VDUPL(tx); + V ti = VDUPH(tx); + ti = VMUL(ti, sr); + sr = VBYI(sr); + return VFMS(tr, sr, ti); +} + +static inline V VZMULIJ(V tx, V sr) +{ + V tr = VDUPL(tx); + V ti = VDUPH(tx); + ti = VMUL(ti, sr); + sr = VBYI(sr); + return VFMA(tr, sr, ti); +} + +/* twiddle storage #1: compact, slower */ +#ifdef FFTW_SINGLE +# define VTW1(v,x) {TW_CEXP, v, x}, {TW_CEXP, v+1, x}, {TW_CEXP, v+2, x}, {TW_CEXP, v+3, x} +#else +# define VTW1(v,x) {TW_CEXP, v, x}, {TW_CEXP, v+1, x} +#endif +#define TWVL1 (VL) + +static inline V BYTW1(const R *t, V sr) +{ + return VZMUL(LDA(t, 2, t), sr); +} + +static inline V BYTWJ1(const R *t, V sr) +{ + return VZMULJ(LDA(t, 2, t), sr); +} + +/* twiddle storage #2: twice the space, faster (when in cache) */ +#ifdef FFTW_SINGLE +# define VTW2(v,x) \ + {TW_COS, v, x}, {TW_COS, v, x}, {TW_COS, v+1, x}, {TW_COS, v+1, x}, \ + {TW_COS, v+2, x}, {TW_COS, v+2, x}, {TW_COS, v+3, x}, {TW_COS, v+3, x}, \ + {TW_SIN, v, -x}, {TW_SIN, v, x}, {TW_SIN, v+1, -x}, {TW_SIN, v+1, x}, \ + {TW_SIN, v+2, -x}, {TW_SIN, v+2, x}, {TW_SIN, v+3, -x}, {TW_SIN, v+3, x} +#else +# define VTW2(v,x) \ + {TW_COS, v, x}, {TW_COS, v, x}, {TW_COS, v+1, x}, {TW_COS, v+1, x}, \ + {TW_SIN, v, -x}, {TW_SIN, v, x}, {TW_SIN, v+1, -x}, {TW_SIN, v+1, x} +#endif +#define TWVL2 (2 * VL) + +static inline V BYTW2(const R *t, V sr) +{ + const V *twp = (const V *)t; + V si = FLIP_RI(sr); + V tr = twp[0], ti = twp[1]; + return VFMA(tr, sr, VMUL(ti, si)); +} + +static inline V BYTWJ2(const R *t, V sr) +{ + const V *twp = (const V *)t; + V si = FLIP_RI(sr); + V tr = twp[0], ti = twp[1]; + return VFNMS(ti, si, VMUL(tr, sr)); +} + +/* twiddle storage #3 */ +#define VTW3 VTW1 +#define TWVL3 TWVL1 + +/* twiddle storage for split arrays */ +#ifdef FFTW_SINGLE +# define VTWS(v,x) \ + {TW_COS, v, x}, {TW_COS, v+1, x}, {TW_COS, v+2, x}, {TW_COS, v+3, x}, \ + {TW_COS, v+4, x}, {TW_COS, v+5, x}, {TW_COS, v+6, x}, {TW_COS, v+7, x}, \ + {TW_SIN, v, x}, {TW_SIN, v+1, x}, {TW_SIN, v+2, x}, {TW_SIN, v+3, x}, \ + {TW_SIN, v+4, x}, {TW_SIN, v+5, x}, {TW_SIN, v+6, x}, {TW_SIN, v+7, x} +#else +# define VTWS(v,x) \ + {TW_COS, v, x}, {TW_COS, v+1, x}, {TW_COS, v+2, x}, {TW_COS, v+3, x}, \ + {TW_SIN, v, x}, {TW_SIN, v+1, x}, {TW_SIN, v+2, x}, {TW_SIN, v+3, x} +#endif +#define TWVLS (2 * VL) + + +/* Use VZEROUPPER to avoid the penalty of switching from AVX to SSE. + See Intel Optimization Manual (April 2011, version 248966), Section + 11.3 */ +#define VLEAVE _mm256_zeroupper + +#include "simd-common.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/simd-support/simd-common.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/simd-support/simd-common.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* detection of alignment. This is complicated because a machine may + support multiple SIMD extensions (e.g. SSE2 and AVX) but only one + set of alignment contraints. So this alignment stuff cannot be + defined in the SIMD header files. Rather than defining a separate + set of "machine" header files, we just do this ugly ifdef here. */ +#if defined(HAVE_SSE2) || defined(HAVE_AVX) +# if defined(FFTW_SINGLE) +# define ALIGNMENT 8 /* Alignment for the LD/ST macros */ +# define ALIGNMENTA 16 /* Alignment for the LDA/STA macros */ +# else +# define ALIGNMENT 16 /* Alignment for the LD/ST macros */ +# define ALIGNMENTA 16 /* Alignment for the LDA/STA macros */ +# endif +#elif defined(HAVE_ALTIVEC) +# define ALIGNMENT 8 /* Alignment for the LD/ST macros */ +# define ALIGNMENTA 16 /* Alignment for the LDA/STA macros */ +#elif defined(HAVE_NEON) +# define ALIGNMENT 8 /* Alignment for the LD/ST macros */ +# define ALIGNMENTA 8 /* Alignment for the LDA/STA macros */ +#endif + +#if HAVE_SIMD +# ifndef ALIGNMENT +# error "ALIGNMENT not defined" +# endif +# ifndef ALIGNMENTA +# error "ALIGNMENTA not defined" +# endif +#endif + +/* rename for precision and for SIMD extensions */ +#define XSIMD0(name, suffix) CONCAT(name, suffix) +#define XSIMD(name) XSIMD0(X(name), SIMD_SUFFIX) +#define XSIMD_STRING(x) x STRINGIZE(SIMD_SUFFIX) + +/* TAINT_BIT is set if pointers are not guaranteed to be multiples of + ALIGNMENT */ +#define TAINT_BIT 1 + +/* TAINT_BITA is set if pointers are not guaranteed to be multiples of + ALIGNMENTA */ +#define TAINT_BITA 2 + +#define PTRINT(p) ((uintptr_t)(p)) + +#define ALIGNED(p) \ + (((PTRINT(UNTAINT(p)) % ALIGNMENT) == 0) && !(PTRINT(p) & TAINT_BIT)) + +#define ALIGNEDA(p) \ + (((PTRINT(UNTAINT(p)) % ALIGNMENTA) == 0) && !(PTRINT(p) & TAINT_BITA)) + +#define SIMD_STRIDE_OK(x) (!(((x) * sizeof(R)) % ALIGNMENT)) +#define SIMD_STRIDE_OKA(x) (!(((x) * sizeof(R)) % ALIGNMENTA)) +#define SIMD_VSTRIDE_OK SIMD_STRIDE_OK + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/simd-support/simd-neon.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/simd-support/simd-neon.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef FFTW_SINGLE +#error "NEON only works in single precision" +#endif + +/* define these unconditionally, because they are used by + taint.c which is compiled without neon */ +#define SIMD_SUFFIX _neon /* for renaming */ +#define VL 2 /* SIMD complex vector length */ +#define SIMD_VSTRIDE_OKA(x) ((x) == 2) +#define SIMD_STRIDE_OKPAIR SIMD_STRIDE_OK + +#if defined(__GNUC__) && !defined(__ARM_NEON__) +#error "compiling simd-neon.h requires -mfpu=neon or equivalent" +#endif + +#include + +/* FIXME: I am not sure whether this code assumes little-endian + ordering. VLIT may or may not be wrong for big-endian systems. */ +typedef float32x4_t V; + +#define VLIT(x0, x1, x2, x3) {x0, x1, x2, x3} +#define LDK(x) x +#define DVK(var, val) const V var = VLIT(val, val, val, val) + +/* NEON has FMA, but a three-operand FMA is not too useful + for FFT purposes. We normally compute + + t0=a+b*c + t1=a-b*c + + In a three-operand instruction set this translates into + + t0=a + t0+=b*c + t1=a + t1-=b*c + + At least one move must be implemented, negating the advantage of + the FMA in the first place. At least some versions of gcc generate + both moves. So we are better off generating t=b*c;t0=a+t;t1=a-t;*/ +#if HAVE_FMA +#warning "--enable-fma on NEON is probably a bad idea (see source code)" +#endif + +#define VADD(a, b) vaddq_f32(a, b) +#define VSUB(a, b) vsubq_f32(a, b) +#define VMUL(a, b) vmulq_f32(a, b) +#define VFMA(a, b, c) vmlaq_f32(c, a, b) /* a*b+c */ +#define VFNMS(a, b, c) vmlsq_f32(c, a, b) /* FNMS=-(a*b-c) in powerpc terminology; MLS=c-a*b + in ARM terminology */ +#define VFMS(a, b, c) VSUB(VMUL(a, b), c) /* FMS=a*b-c in powerpc terminology; no equivalent + arm instruction (?) */ + +static inline V LDA(const R *x, INT ivs, const R *aligned_like) +{ + (void) aligned_like; /* UNUSED */ + return vld1q_f32((const float32_t *)x); +} + +static inline V LD(const R *x, INT ivs, const R *aligned_like) +{ + (void) aligned_like; /* UNUSED */ + return vcombine_f32(vld1_f32((float32_t *)x), vld1_f32((float32_t *)(x + ivs))); +} + +static inline void STA(R *x, V v, INT ovs, const R *aligned_like) +{ + (void) aligned_like; /* UNUSED */ + vst1q_f32((float32_t *)x, v); +} + +static inline void ST(R *x, V v, INT ovs, const R *aligned_like) +{ + (void) aligned_like; /* UNUSED */ + /* WARNING: the extra_iter hack depends upon store-low occurring + after store-high */ + vst1_f32((float32_t *)(x + ovs), vget_high_f32(v)); + vst1_f32((float32_t *)x, vget_low_f32(v)); +} + +/* 2x2 complex transpose and store */ +#define STM2 ST +#define STN2(x, v0, v1, ovs) /* nop */ + +/* store and 4x4 real transpose */ +static inline void STM4(R *x, V v, INT ovs, const R *aligned_like) +{ + (void) aligned_like; /* UNUSED */ + vst1_lane_f32((float32_t *)(x) , vget_low_f32(v), 0); + vst1_lane_f32((float32_t *)(x + ovs), vget_low_f32(v), 1); + vst1_lane_f32((float32_t *)(x + 2 * ovs), vget_high_f32(v), 0); + vst1_lane_f32((float32_t *)(x + 3 * ovs), vget_high_f32(v), 1); +} +#define STN4(x, v0, v1, v2, v3, ovs) /* use STM4 */ + +#define FLIP_RI(x) vrev64q_f32(x) + +static inline V VCONJ(V x) +{ +#if 1 + static const uint32x4_t pm = {0, 0x80000000u, 0, 0x80000000u}; + return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(x), pm)); +#else + const V pm = VLIT(1.0, -1.0, 1.0, -1.0); + return VMUL(x, pm); +#endif +} + +static inline V VBYI(V x) +{ + return FLIP_RI(VCONJ(x)); +} + +static inline V VFMAI(V b, V c) +{ + const V mp = VLIT(-1.0, 1.0, -1.0, 1.0); + return VFMA(FLIP_RI(b), mp, c); +} + +static inline V VFNMSI(V b, V c) +{ + const V mp = VLIT(-1.0, 1.0, -1.0, 1.0); + return VFNMS(FLIP_RI(b), mp, c); +} + +static inline V VFMACONJ(V b, V c) +{ + const V pm = VLIT(1.0, -1.0, 1.0, -1.0); + return VFMA(b, pm, c); +} + +static inline V VFNMSCONJ(V b, V c) +{ + const V pm = VLIT(1.0, -1.0, 1.0, -1.0); + return VFNMS(b, pm, c); +} + +static inline V VFMSCONJ(V b, V c) +{ + return VSUB(VCONJ(b), c); +} + +#if 1 +#define VEXTRACT_REIM(tr, ti, tx) \ +{ \ + tr = vcombine_f32(vdup_lane_f32(vget_low_f32(tx), 0), \ + vdup_lane_f32(vget_high_f32(tx), 0)); \ + ti = vcombine_f32(vdup_lane_f32(vget_low_f32(tx), 1), \ + vdup_lane_f32(vget_high_f32(tx), 1)); \ +} +#else +/* this alternative might be faster in an ideal world, but gcc likes + to spill VVV onto the stack */ +#define VEXTRACT_REIM(tr, ti, tx) \ +{ \ + float32x4x2_t vvv = vtrnq_f32(tx, tx); \ + tr = vvv.val[0]; \ + ti = vvv.val[1]; \ +} +#endif + +static inline V VZMUL(V tx, V sr) +{ + V tr, ti; + VEXTRACT_REIM(tr, ti, tx); + tr = VMUL(sr, tr); + sr = VBYI(sr); + return VFMA(ti, sr, tr); +} + +static inline V VZMULJ(V tx, V sr) +{ + V tr, ti; + VEXTRACT_REIM(tr, ti, tx); + tr = VMUL(sr, tr); + sr = VBYI(sr); + return VFNMS(ti, sr, tr); +} + +static inline V VZMULI(V tx, V sr) +{ + V tr, ti; + VEXTRACT_REIM(tr, ti, tx); + ti = VMUL(ti, sr); + sr = VBYI(sr); + return VFMS(tr, sr, ti); +} + +static inline V VZMULIJ(V tx, V sr) +{ + V tr, ti; + VEXTRACT_REIM(tr, ti, tx); + ti = VMUL(ti, sr); + sr = VBYI(sr); + return VFMA(tr, sr, ti); +} + +/* twiddle storage #1: compact, slower */ +#define VTW1(v,x) {TW_CEXP, v, x}, {TW_CEXP, v+1, x} +#define TWVL1 VL +static inline V BYTW1(const R *t, V sr) +{ + V tx = LDA(t, 2, 0); + return VZMUL(tx, sr); +} + +static inline V BYTWJ1(const R *t, V sr) +{ + V tx = LDA(t, 2, 0); + return VZMULJ(tx, sr); +} + +/* twiddle storage #2: twice the space, faster (when in cache) */ +# define VTW2(v,x) \ + {TW_COS, v, x}, {TW_COS, v, x}, {TW_COS, v+1, x}, {TW_COS, v+1, x}, \ + {TW_SIN, v, -x}, {TW_SIN, v, x}, {TW_SIN, v+1, -x}, {TW_SIN, v+1, x} +#define TWVL2 (2 * VL) + +static inline V BYTW2(const R *t, V sr) +{ + V si = FLIP_RI(sr); + V tr = LDA(t, 2, 0), ti = LDA(t+2*VL, 2, 0); + return VFMA(ti, si, VMUL(tr, sr)); +} + +static inline V BYTWJ2(const R *t, V sr) +{ + V si = FLIP_RI(sr); + V tr = LDA(t, 2, 0), ti = LDA(t+2*VL, 2, 0); + return VFNMS(ti, si, VMUL(tr, sr)); +} + +/* twiddle storage #3 */ +# define VTW3(v,x) {TW_CEXP, v, x}, {TW_CEXP, v+1, x} +# define TWVL3 (VL) + +/* twiddle storage for split arrays */ +# define VTWS(v,x) \ + {TW_COS, v, x}, {TW_COS, v+1, x}, {TW_COS, v+2, x}, {TW_COS, v+3, x}, \ + {TW_SIN, v, x}, {TW_SIN, v+1, x}, {TW_SIN, v+2, x}, {TW_SIN, v+3, x} +#define TWVLS (2 * VL) + +#define VLEAVE() /* nothing */ + +#include "simd-common.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/simd-support/simd-sse2.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/simd-support/simd-sse2.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,360 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#if defined(FFTW_LDOUBLE) || defined(FFTW_QUAD) +# error "SSE/SSE2 only works in single/double precision" +#endif + +#ifdef FFTW_SINGLE +# define DS(d,s) s /* single-precision option */ +# define SUFF(name) name ## s +#else +# define DS(d,s) d /* double-precision option */ +# define SUFF(name) name ## d +#endif + +#define SIMD_SUFFIX _sse2 /* for renaming */ +#define VL DS(1,2) /* SIMD vector length, in term of complex numbers */ +#define SIMD_VSTRIDE_OKA(x) DS(1,((x) == 2)) +#define SIMD_STRIDE_OKPAIR SIMD_STRIDE_OK + +#if defined(__GNUC__) && !defined(FFTW_SINGLE) && !defined(__SSE2__) +# error "compiling simd-sse2.h in double precision without -msse2" +#elif defined(__GNUC__) && defined(FFTW_SINGLE) && !defined(__SSE__) +# error "compiling simd-sse2.h in single precision without -msse" +#endif + +#ifdef _MSC_VER +#ifndef inline +#define inline __inline +#endif +#endif + +/* some versions of glibc's sys/cdefs.h define __inline to be empty, + which is wrong because emmintrin.h defines several inline + procedures */ +#ifndef _MSC_VER +#undef __inline +#endif + +#ifdef FFTW_SINGLE +# include +#else +# include +#endif + +typedef DS(__m128d,__m128) V; +#define VADD SUFF(_mm_add_p) +#define VSUB SUFF(_mm_sub_p) +#define VMUL SUFF(_mm_mul_p) +#define VXOR SUFF(_mm_xor_p) +#define SHUF SUFF(_mm_shuffle_p) +#define UNPCKL SUFF(_mm_unpacklo_p) +#define UNPCKH SUFF(_mm_unpackhi_p) + +#define SHUFVALS(fp0,fp1,fp2,fp3) \ + (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) + +#define VDUPL(x) DS(UNPCKL(x, x), SHUF(x, x, SHUFVALS(0, 0, 2, 2))) +#define VDUPH(x) DS(UNPCKH(x, x), SHUF(x, x, SHUFVALS(1, 1, 3, 3))) +#define STOREH(a, v) DS(_mm_storeh_pd(a, v), _mm_storeh_pi((__m64 *)(a), v)) +#define STOREL(a, v) DS(_mm_storel_pd(a, v), _mm_storel_pi((__m64 *)(a), v)) + + +#ifdef __GNUC__ + /* + * gcc-3.3 generates slow code for mm_set_ps (write all elements to + * the stack and load __m128 from the stack). + * + * gcc-3.[34] generates slow code for mm_set_ps1 (load into low element + * and shuffle). + * + * This hack forces gcc to generate a constant __m128 at compile time. + */ + union rvec { + R r[DS(2,4)]; + V v; + }; + +# ifdef FFTW_SINGLE +# define DVK(var, val) V var = __extension__ ({ \ + static const union rvec _var = { {val,val,val,val} }; _var.v; }) +# else +# define DVK(var, val) V var = __extension__ ({ \ + static const union rvec _var = { {val,val} }; _var.v; }) +# endif +# define LDK(x) x +#else +# define DVK(var, val) const R var = K(val) +# define LDK(x) DS(_mm_set1_pd,_mm_set_ps1)(x) +#endif + +union uvec { + unsigned u[4]; + V v; +}; + +static inline V LDA(const R *x, INT ivs, const R *aligned_like) +{ + (void)aligned_like; /* UNUSED */ + (void)ivs; /* UNUSED */ + return *(const V *)x; +} + +static inline void STA(R *x, V v, INT ovs, const R *aligned_like) +{ + (void)aligned_like; /* UNUSED */ + (void)ovs; /* UNUSED */ + *(V *)x = v; +} + +#ifdef FFTW_SINGLE + +# ifdef _MSC_VER + /* Temporarily disable the warning "uninitialized local variable + 'name' used" and runtime checks for using a variable before it is + defined which is erroneously triggered by the LOADL0 / LOADH macros + as they only modify VAL partly each. */ +# pragma warning(disable : 4700) +# pragma runtime_checks("u", off) +# endif + +static inline V LD(const R *x, INT ivs, const R *aligned_like) +{ + V var; + (void)aligned_like; /* UNUSED */ +# ifdef __GNUC__ + /* We use inline asm because gcc-3.x generates slow code for + _mm_loadh_pi(). gcc-3.x insists upon having an existing variable for + VAL, which is however never used. Thus, it generates code to move + values in and out the variable. Worse still, gcc-4.0 stores VAL on + the stack, causing valgrind to complain about uninitialized reads. */ + __asm__("movlps %1, %0\n\tmovhps %2, %0" + : "=x"(var) : "m"(x[0]), "m"(x[ivs])); +# else +# define LOADH(addr, val) _mm_loadh_pi(val, (const __m64 *)(addr)) +# define LOADL0(addr, val) _mm_loadl_pi(val, (const __m64 *)(addr)) + var = LOADL0(x, var); + var = LOADH(x + ivs, var); +# endif + return var; +} + +# ifdef _MSC_VER +# pragma warning(default : 4700) +# pragma runtime_checks("u", restore) +# endif + +static inline void ST(R *x, V v, INT ovs, const R *aligned_like) +{ + (void)aligned_like; /* UNUSED */ + /* WARNING: the extra_iter hack depends upon STOREL occurring + after STOREH */ + STOREH(x + ovs, v); + STOREL(x, v); +} + +#else /* ! FFTW_SINGLE */ +# define LD LDA +# define ST STA +#endif + +#define STM2 DS(STA,ST) +#define STN2(x, v0, v1, ovs) /* nop */ + +#ifdef FFTW_SINGLE +# define STM4(x, v, ovs, aligned_like) /* no-op */ +/* STN4 is a macro, not a function, thanks to Visual C++ developers + deciding "it would be infrequent that people would want to pass more + than 3 [__m128 parameters] by value." 3 parameters ought to be enough + for anybody. */ +# define STN4(x, v0, v1, v2, v3, ovs) \ +{ \ + V xxx0, xxx1, xxx2, xxx3; \ + xxx0 = UNPCKL(v0, v2); \ + xxx1 = UNPCKH(v0, v2); \ + xxx2 = UNPCKL(v1, v3); \ + xxx3 = UNPCKH(v1, v3); \ + STA(x, UNPCKL(xxx0, xxx2), 0, 0); \ + STA(x + ovs, UNPCKH(xxx0, xxx2), 0, 0); \ + STA(x + 2 * ovs, UNPCKL(xxx1, xxx3), 0, 0); \ + STA(x + 3 * ovs, UNPCKH(xxx1, xxx3), 0, 0); \ +} +#else /* !FFTW_SINGLE */ +static inline void STM4(R *x, V v, INT ovs, const R *aligned_like) +{ + (void)aligned_like; /* UNUSED */ + STOREL(x, v); + STOREH(x + ovs, v); +} +# define STN4(x, v0, v1, v2, v3, ovs) /* nothing */ +#endif + +static inline V FLIP_RI(V x) +{ + return SHUF(x, x, DS(1, SHUFVALS(1, 0, 3, 2))); +} + +extern const union uvec X(sse2_pm); +static inline V VCONJ(V x) +{ + return VXOR(X(sse2_pm).v, x); +} + +static inline V VBYI(V x) +{ + x = VCONJ(x); + x = FLIP_RI(x); + return x; +} + +/* FMA support */ +#define VFMA(a, b, c) VADD(c, VMUL(a, b)) +#define VFNMS(a, b, c) VSUB(c, VMUL(a, b)) +#define VFMS(a, b, c) VSUB(VMUL(a, b), c) +#define VFMAI(b, c) VADD(c, VBYI(b)) +#define VFNMSI(b, c) VSUB(c, VBYI(b)) +#define VFMACONJ(b,c) VADD(VCONJ(b),c) +#define VFMSCONJ(b,c) VSUB(VCONJ(b),c) +#define VFNMSCONJ(b,c) VSUB(c, VCONJ(b)) + +static inline V VZMUL(V tx, V sr) +{ + V tr = VDUPL(tx); + V ti = VDUPH(tx); + tr = VMUL(sr, tr); + sr = VBYI(sr); + return VFMA(ti, sr, tr); +} + +static inline V VZMULJ(V tx, V sr) +{ + V tr = VDUPL(tx); + V ti = VDUPH(tx); + tr = VMUL(sr, tr); + sr = VBYI(sr); + return VFNMS(ti, sr, tr); +} + +static inline V VZMULI(V tx, V sr) +{ + V tr = VDUPL(tx); + V ti = VDUPH(tx); + ti = VMUL(ti, sr); + sr = VBYI(sr); + return VFMS(tr, sr, ti); +} + +static inline V VZMULIJ(V tx, V sr) +{ + V tr = VDUPL(tx); + V ti = VDUPH(tx); + ti = VMUL(ti, sr); + sr = VBYI(sr); + return VFMA(tr, sr, ti); +} + +/* twiddle storage #1: compact, slower */ +#ifdef FFTW_SINGLE +# define VTW1(v,x) \ + {TW_COS, v, x}, {TW_COS, v+1, x}, {TW_SIN, v, x}, {TW_SIN, v+1, x} +static inline V BYTW1(const R *t, V sr) +{ + const V *twp = (const V *)t; + V tx = twp[0]; + V tr = UNPCKL(tx, tx); + V ti = UNPCKH(tx, tx); + tr = VMUL(tr, sr); + sr = VBYI(sr); + return VFMA(ti, sr, tr); +} +static inline V BYTWJ1(const R *t, V sr) +{ + const V *twp = (const V *)t; + V tx = twp[0]; + V tr = UNPCKL(tx, tx); + V ti = UNPCKH(tx, tx); + tr = VMUL(tr, sr); + sr = VBYI(sr); + return VFNMS(ti, sr, tr); +} +#else /* !FFTW_SINGLE */ +# define VTW1(v,x) {TW_CEXP, v, x} +static inline V BYTW1(const R *t, V sr) +{ + V tx = LD(t, 1, t); + return VZMUL(tx, sr); +} +static inline V BYTWJ1(const R *t, V sr) +{ + V tx = LD(t, 1, t); + return VZMULJ(tx, sr); +} +#endif +#define TWVL1 (VL) + +/* twiddle storage #2: twice the space, faster (when in cache) */ +#ifdef FFTW_SINGLE +# define VTW2(v,x) \ + {TW_COS, v, x}, {TW_COS, v, x}, {TW_COS, v+1, x}, {TW_COS, v+1, x}, \ + {TW_SIN, v, -x}, {TW_SIN, v, x}, {TW_SIN, v+1, -x}, {TW_SIN, v+1, x} +#else /* !FFTW_SINGLE */ +# define VTW2(v,x) \ + {TW_COS, v, x}, {TW_COS, v, x}, {TW_SIN, v, -x}, {TW_SIN, v, x} +#endif +#define TWVL2 (2 * VL) +static inline V BYTW2(const R *t, V sr) +{ + const V *twp = (const V *)t; + V si = FLIP_RI(sr); + V tr = twp[0], ti = twp[1]; + return VFMA(tr, sr, VMUL(ti, si)); +} +static inline V BYTWJ2(const R *t, V sr) +{ + const V *twp = (const V *)t; + V si = FLIP_RI(sr); + V tr = twp[0], ti = twp[1]; + return VFNMS(ti, si, VMUL(tr, sr)); +} + +/* twiddle storage #3 */ +#ifdef FFTW_SINGLE +# define VTW3(v,x) {TW_CEXP, v, x}, {TW_CEXP, v+1, x} +# define TWVL3 (VL) +#else +# define VTW3(v,x) VTW1(v,x) +# define TWVL3 TWVL1 +#endif + +/* twiddle storage for split arrays */ +#ifdef FFTW_SINGLE +# define VTWS(v,x) \ + {TW_COS, v, x}, {TW_COS, v+1, x}, {TW_COS, v+2, x}, {TW_COS, v+3, x}, \ + {TW_SIN, v, x}, {TW_SIN, v+1, x}, {TW_SIN, v+2, x}, {TW_SIN, v+3, x} +#else +# define VTWS(v,x) \ + {TW_COS, v, x}, {TW_COS, v+1, x}, {TW_SIN, v, x}, {TW_SIN, v+1, x} +#endif +#define TWVLS (2 * VL) + +#define VLEAVE() /* nothing */ + +#include "simd-common.h" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/simd-support/sse2-nonportable.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/simd-support/sse2-nonportable.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "ifftw.h" + +#if HAVE_SSE2 +/* this file must be compiled with -msse/-msse2 or equivalent, and it will + fail at runtime on a machine that does not support sse/sse2 */ +#include "simd-sse2.h" + +/* This will produce -0.0f (or -0.0d) even on broken + compilers that do not distinguish +0.0 from -0.0. + I bet some are still around. */ +const union uvec X(sse2_pm) = { +#ifdef FFTW_SINGLE + { 0x00000000, 0x80000000, 0x00000000, 0x80000000 } +#else + { 0x00000000, 0x00000000, 0x00000000, 0x80000000 } +#endif +}; + +/* paranoia because of past compiler bugs */ +void X(check_alignment_of_sse2_pm)(void) +{ + CK(ALIGNED(&X(sse2_pm))); +} +#endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/simd-support/sse2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/simd-support/sse2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" + +#ifdef FFTW_SINGLE +# define DS(d,s) s /* single-precision option */ +#else +# define DS(d,s) d /* double-precision option */ +#endif + +#if HAVE_SSE2 + +# if defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) + + int X(have_simd_sse2)(void) + { + return 1; + } + +# else /* !x86_64 */ + +# include +# include +# include "x86-cpuid.h" + + static jmp_buf jb; + + static void sighandler(int x) + { + UNUSED(x); + longjmp(jb, 1); + } + + static int sse2_works(void) + { + void (*oldsig)(int); + oldsig = signal(SIGILL, sighandler); + if (setjmp(jb)) { + signal(SIGILL, oldsig); + return 0; + } else { +# ifdef _MSC_VER + _asm { DS(xorpd,xorps) xmm0,xmm0 } +# else + /* asm volatile ("xorpd/s %xmm0, %xmm0"); */ + asm volatile(DS(".byte 0x66; .byte 0x0f; .byte 0x57; .byte 0xc0", + ".byte 0x0f; .byte 0x57; .byte 0xc0")); +# endif + signal(SIGILL, oldsig); + return 1; + } + } + + extern void X(check_alignment_of_sse2_pm)(void); + + int X(have_simd_sse2)(void) + { + static int init = 0, res; + + if (!init) { + res = !is_386() + && has_cpuid() + && (cpuid_edx(1) & (1 << DS(26,25))) + && sse2_works(); + init = 1; + X(check_alignment_of_sse2_pm)(); + } + return res; + } + +# endif + +#endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/simd-support/taint.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/simd-support/taint.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "ifftw.h" +#include "simd-common.h" + +#if HAVE_SIMD + +R *X(taint)(R *p, INT s) +{ + if (((unsigned)s * sizeof(R)) % ALIGNMENT) + p = (R *) (PTRINT(p) | TAINT_BIT); + if (((unsigned)s * sizeof(R)) % ALIGNMENTA) + p = (R *) (PTRINT(p) | TAINT_BITA); + return p; +} + +/* join the taint of two pointers that are supposed to be + the same modulo the taint */ +R *X(join_taint)(R *p1, R *p2) +{ + A(UNTAINT(p1) == UNTAINT(p2)); + return (R *)(PTRINT(p1) | PTRINT(p2)); +} +#endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/simd-support/x86-cpuid.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/simd-support/x86-cpuid.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +/* this code was kindly donated by Eric J. Korpela */ + +#ifdef _MSC_VER +#ifndef inline +#define inline __inline +#endif +#endif + +static inline int is_386() +{ +#ifdef _MSC_VER + unsigned int result,tst; + _asm { + pushfd + pop eax + mov edx,eax + xor eax,40000h + push eax + popfd + pushfd + pop eax + push edx + popfd + mov tst,edx + mov result,eax + } +#else + register unsigned int result,tst; + __asm__ ( + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl $0x40000,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "pushl %1\n\t" + "popfl" + : "=r" (result), "=r" (tst) /* output */ + : /* no inputs */ + ); +#endif + return (result == tst); +} + +static inline int has_cpuid() +{ +#ifdef _MSC_VER + unsigned int result,tst; + _asm { + pushfd + pop eax + mov edx,eax + xor eax,200000h + push eax + popfd + pushfd + pop eax + push edx + popfd + mov tst,edx + mov result,eax + } +#else + register unsigned int result,tst; + __asm__ ( + "pushfl\n\t" + "pop %0\n\t" + "movl %0,%1\n\t" + "xorl $0x200000,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "pushl %1\n\t" + "popfl" + : "=r" (result), "=r" (tst) /* output */ + : /* no inputs */ + ); +#endif + return (result != tst); +} + +static inline int cpuid_edx(int op) +{ +# ifdef _MSC_VER + int result; + _asm { + push ebx + mov eax,op + cpuid + mov result,edx + pop ebx + } + return result; +# else + int eax, ecx, edx; + + __asm__("push %%ebx\n\tcpuid\n\tpop %%ebx" + : "=a" (eax), "=c" (ecx), "=d" (edx) + : "a" (op)); + return edx; +# endif +} + +static inline int cpuid_ecx(int op) +{ +# ifdef _MSC_VER + int result; + _asm { + push ebx + mov eax,op + cpuid + mov result,ecx + pop ebx + } + return result; +# else + int eax, ecx, edx; + + __asm__("push %%ebx\n\tcpuid\n\tpop %%ebx" + : "=a" (eax), "=c" (ecx), "=d" (edx) + : "a" (op)); + return ecx; +# endif +} + +static inline int xgetbv_eax(int op) +{ +# ifdef _MSC_VER + int veax, vedx; + _asm { + mov ecx,op +# if defined(__INTEL_COMPILER) || (_MSC_VER >= 1600) + xgetbv +# else + __emit 15 + __emit 1 + __emit 208 +# endif + mov veax,eax + mov vedx,edx + } + return veax; +# else + int eax, edx; + __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (op)); + return eax; +#endif +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/support/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/support/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,2 @@ +EXTRA_DIST = Makefile.codelets codelet_prelude.dft codelet_prelude.rdft \ +addchain.c twovers.sh diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/support/Makefile.codelets --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/support/Makefile.codelets Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,76 @@ +# -*- makefile -*- +# This file contains special make rules to generate codelets. +# Most of this file requires GNU make . + +CODLIST = codlist.c +CODELET_NAME=codelet_ + +# rule to build codlist +$(CODLIST): Makefile + ( \ + echo "#include \"ifftw.h\""; \ + echo $(INCLUDE_SIMD_HEADER); \ + echo; \ + for i in $(ALL_CODELETS) NIL; do \ + if test "$$i" != NIL; then \ + j=`basename $$i | sed -e 's/[.][cS]$$//g'`; \ + echo "extern void $(XRENAME)($(CODELET_NAME)$$j)(planner *);"; \ + fi \ + done; \ + echo; \ + echo; \ + echo "extern const solvtab $(SOLVTAB_NAME);"; \ + echo "const solvtab $(SOLVTAB_NAME) = {"; \ + for i in $(ALL_CODELETS) NIL; do \ + if test "$$i" != NIL; then \ + j=`basename $$i | sed -e 's/[.][cS]$$//g'`; \ + echo " SOLVTAB($(XRENAME)($(CODELET_NAME)$$j)),"; \ + fi \ + done; \ + echo " SOLVTAB_END"; \ + echo "};"; \ + ) >$@ + +# only delete codlist.c in maintainer-mode, since it is included in the dist +# FIXME: is there a way to delete in 'make clean' only when builddir != srcdir? +maintainer-clean-local: + rm -f $(CODLIST) + +if MAINTAINER_MODE + +INDENT = indent -kr -cs -i5 -l800 -fca -nfc1 -sc -sob -cli4 -TR -Tplanner -TV +TWOVERS = sh ${top_srcdir}/support/twovers.sh +GENFFTDIR = ${top_builddir}/genfft +GEN_NOTW = ${GENFFTDIR}/gen_notw.native +GEN_NOTW_C = ${GENFFTDIR}/gen_notw_c.native +GEN_TWIDDLE = ${GENFFTDIR}/gen_twiddle.native +GEN_TWIDDLE_C = ${GENFFTDIR}/gen_twiddle_c.native +GEN_TWIDSQ = ${GENFFTDIR}/gen_twidsq.native +GEN_TWIDSQ_C = ${GENFFTDIR}/gen_twidsq_c.native +GEN_R2CF = ${GENFFTDIR}/gen_r2cf.native +GEN_R2CB = ${GENFFTDIR}/gen_r2cb.native +GEN_HC2HC = ${GENFFTDIR}/gen_hc2hc.native +GEN_HC2C = ${GENFFTDIR}/gen_hc2c.native +GEN_HC2CDFT = ${GENFFTDIR}/gen_hc2cdft.native +GEN_HC2CDFT_C = ${GENFFTDIR}/gen_hc2cdft_c.native +GEN_R2R = ${GENFFTDIR}/gen_r2r.native +PRELUDE_DFT = ${top_srcdir}/support/codelet_prelude.dft +PRELUDE_RDFT = ${top_srcdir}/support/codelet_prelude.rdft +ADD_DATE = sed -e s/@DATE@/"`date`"/ + +COPYRIGHT=${top_srcdir}/COPYRIGHT +CODELET_DEPS=$(COPYRIGHT) $(PRELUDE) +PRELUDE_COMMANDS_DFT=cat $(COPYRIGHT) $(PRELUDE_DFT) +PRELUDE_COMMANDS_RDFT=cat $(COPYRIGHT) $(PRELUDE_RDFT) + +FLAGS_COMMON = -compact -variables 4 +DFT_FLAGS_COMMON = $(FLAGS_COMMON) -pipeline-latency 4 +RDFT_FLAGS_COMMON = $(FLAGS_COMMON) -pipeline-latency 4 + +# cancel the hideous builtin rules that cause an infinite loop +%: %.o +%: %.s +%: %.c +%: %.S + +endif # MAINTAINER_MODE diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/support/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/support/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,461 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = support +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +EXTRA_DIST = Makefile.codelets codelet_prelude.dft codelet_prelude.rdft \ +addchain.c twovers.sh + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu support/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu support/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags-am uninstall uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/support/addchain.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/support/addchain.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,171 @@ +/* addition-chain optimizer */ +#include +#include +#include + +static int verbose; +static int mulcost = 18; +static int ldcost = 2; +static int sqcost = 10; +static int reflcost = 8; +#define INFTY 100000 + +static int *answer; +static int best_so_far; + +static void print_answer(int n, int t) +{ + int i; + printf("| (%d, %d) -> [", n, t); + for (i = 0; i < t; ++i) + printf("%d;", answer[i]); + printf("] (* %d *)\n", best_so_far); +} + +#define DO(i, j, k, cst) \ +if (k < n) { \ + int c = A[i] + A[j] + cst; \ + if (c < A[k]) { \ + A[k] = c; \ + changed = 1; \ + } \ +} + +#define DO3(i, j, l, k, cst) \ +if (k < n) { \ + int c = A[i] + A[j] + A[l] + cst; \ + if (c < A[k]) { \ + A[k] = c; \ + changed = 1; \ + } \ +} + +static int optimize(int n, int *A) +{ + int i, j, k, changed, cst, cstmax; + + do { + changed = 0; + for (i = 0; i < n; ++i) { + k = i + i; + DO(i, i, k, sqcost); + } + + for (i = 0; i < n; ++i) { + for (j = 0; j <= i; ++j) { + k = i + j; + DO(i, j, k, mulcost); + k = i - j; + DO(i, j, k, mulcost); + + k = i + j; + DO3(i, j, i - j, k, reflcost); + } + } + + } while (changed); + + cst = cstmax = 0; + for (i = 0; i < n; ++i) { + cst += A[i]; + if (A[i] > cstmax) cstmax = A[i]; + } +/* return cstmax; */ + return cst; +} + +static void search(int n, int t, int *A, int *B, int depth) +{ + if (depth == 0) { + int i, tc; + for (i = 0; i < n; ++i) + A[i] = INFTY; + A[0] = 0; /* always free */ + for (i = 1; i <= t; ++i) + A[B[-i]] = ldcost; + + tc = optimize(n, A); + if (tc < best_so_far) { + best_so_far = tc; + for (i = 1; i <= t; ++i) + answer[t - i] = B[-i]; + if (verbose) + print_answer(n, t); + } + } else { + for (B[0] = B[-1] + 1; B[0] < n; ++B[0]) + search(n, t, A, B + 1, depth - 1); + } +} + +static void doit(int n, int t) +{ + int *A; + int *B; + + A = malloc(n * sizeof(int)); + B = malloc((t + 1) * sizeof(int)); + answer = malloc(t * sizeof(int)); + + B[0] = 0; + best_so_far = INFTY; + search(n, t, A, B + 1, t); + + print_answer(n, t); + + free(A); free(B); free(answer); +} + +int main(int argc, char *argv[]) +{ + int n = 32; + int t = 3; + int all; + int ch; + + verbose = 0; + all = 0; + while ((ch = getopt(argc, argv, "n:t:m:l:r:s:va")) != -1) { + switch (ch) { + case 'n': + n = atoi(optarg); + break; + case 't': + t = atoi(optarg); + break; + case 'm': + mulcost = atoi(optarg); + break; + case 'l': + ldcost = atoi(optarg); + break; + case 's': + sqcost = atoi(optarg); + break; + case 'r': + reflcost = atoi(optarg); + break; + case 'v': + ++verbose; + break; + case 'a': + ++all; + break; + case '?': + fprintf(stderr, "use the source\n"); + exit(1); + } + } + + if (all) { + for (n = 4; n <= 64; n *= 2) { + int n1 = n - 1; if (n1 > 7) n1 = 7; + for (t = 1; t <= n1; ++t) + doit(n, t); + } + } else { + doit(n, t); + } + + return 0; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/support/codelet_prelude.dft --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/support/codelet_prelude.dft Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,8 @@ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on @DATE@ */ + +#include "codelet-dft.h" + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/support/codelet_prelude.rdft --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/support/codelet_prelude.rdft Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,8 @@ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on @DATE@ */ + +#include "codelet-rdft.h" + + + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/support/twovers.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/support/twovers.sh Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,17 @@ +#! /bin/sh + +# wrapper to generate two codelet versions, with and without +# fma + +genfft=$1 +shift + +echo "#ifdef HAVE_FMA" +echo + $genfft -fma -reorder-insns -schedule-for-pipeline $* +echo +echo "#else /* HAVE_FMA */" +echo + $genfft $* +echo +echo "#endif /* HAVE_FMA */" diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/tests/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/tests/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,80 @@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/libbench2 \ +-I$(top_srcdir)/dft -I$(top_srcdir)/rdft -I$(top_srcdir)/reodft \ +-I$(top_srcdir)/threads -I$(top_srcdir)/api + +noinst_PROGRAMS = bench +EXTRA_DIST = check.pl README + +if THREADS +bench_CFLAGS = $(PTHREAD_CFLAGS) +if !COMBINED_THREADS +LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_threads.la +endif +else +if OPENMP +bench_CFLAGS = $(OPENMP_CFLAGS) +LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_omp.la +endif +endif + +bench_SOURCES = bench.c hook.c fftw-bench.c fftw-bench.h +bench_LDADD = $(LIBFFTWTHREADS) \ +$(top_builddir)/libfftw3@PREC_SUFFIX@.la \ +$(top_builddir)/libbench2/libbench2.a $(THREADLIBS) + +check-local: bench$(EXEEXT) + perl -w $(srcdir)/check.pl $(CHECK_PL_OPTS) -r -c=30 -v `pwd`/bench$(EXEEXT) + @echo "--------------------------------------------------------------" + @echo " FFTW transforms passed basic tests!" + @echo "--------------------------------------------------------------" +if SMP + perl -w $(srcdir)/check.pl $(CHECK_PL_OPTS) -r -c=30 -v --nthreads=2 `pwd`/bench$(EXEEXT) + @echo "--------------------------------------------------------------" + @echo " FFTW threaded transforms passed basic tests!" + @echo "--------------------------------------------------------------" +endif + +bigcheck: bench$(EXEEXT) + perl -w $(srcdir)/check.pl $(CHECK_PL_OPTS) -a -v `pwd`/bench$(EXEEXT) + @echo "--------------------------------------------------------------" + @echo " FFTW transforms passed big tests!" + @echo "--------------------------------------------------------------" +if SMP + perl -w $(srcdir)/check.pl $(CHECK_PL_OPTS) -a -v --nthreads=2 `pwd`/bench$(EXEEXT) + perl -w $(srcdir)/check.pl $(CHECK_PL_OPTS) -a -v --nthreads=3 `pwd`/bench$(EXEEXT) + perl -w $(srcdir)/check.pl $(CHECK_PL_OPTS) -a -v --nthreads=10 `pwd`/bench$(EXEEXT) + @echo "--------------------------------------------------------------" + @echo " FFTW threaded transforms passed big tests!" + @echo "--------------------------------------------------------------" +endif + +smallcheck: bench$(EXEEXT) + perl -w $(srcdir)/check.pl -r -c=1 -v `pwd`/bench$(EXEEXT) + perl -w $(srcdir)/check.pl -r --estimate -c=5 -v `pwd`/bench$(EXEEXT) + @echo "--------------------------------------------------------------" + @echo " FFTW transforms passed a few tests!" + @echo "--------------------------------------------------------------" +if SMP + perl -w $(srcdir)/check.pl -r --estimate -c=2 -v --nthreads=2 `pwd`/bench$(EXEEXT) + @echo "--------------------------------------------------------------" + @echo " FFTW threaded transforms passed a few tests!" + @echo "--------------------------------------------------------------" +endif + +paranoid-check: bench$(EXEEXT) +if SMP + perl -w $(srcdir)/check.pl -a --patient --nthreads=10 --paranoid `pwd`/bench$(EXEEXT) + perl -w $(srcdir)/check.pl -a --patient --nthreads=7 --paranoid `pwd`/bench$(EXEEXT) + perl -w $(srcdir)/check.pl -a --patient --nthreads=3 --paranoid `pwd`/bench$(EXEEXT) + perl -w $(srcdir)/check.pl -a --patient --nthreads=2 --paranoid `pwd`/bench$(EXEEXT) +endif + perl -w $(srcdir)/check.pl -a --patient --paranoid `pwd`/bench$(EXEEXT) + +exhaustive-check: bench$(EXEEXT) +if SMP + perl -w $(srcdir)/check.pl -a --exhaustive --nthreads=10 --paranoid `pwd`/bench$(EXEEXT) + perl -w $(srcdir)/check.pl -a --exhaustive --nthreads=7 --paranoid `pwd`/bench$(EXEEXT) + perl -w $(srcdir)/check.pl -a --exhaustive --nthreads=3 --paranoid `pwd`/bench$(EXEEXT) + perl -w $(srcdir)/check.pl -a --exhaustive --nthreads=2 --paranoid `pwd`/bench$(EXEEXT) +endif + perl -w $(srcdir)/check.pl -a --exhaustive --paranoid `pwd`/bench$(EXEEXT) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/tests/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/tests/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,718 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +noinst_PROGRAMS = bench$(EXEEXT) +subdir = tests +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp README +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +PROGRAMS = $(noinst_PROGRAMS) +am_bench_OBJECTS = bench-bench.$(OBJEXT) bench-hook.$(OBJEXT) \ + bench-fftw-bench.$(OBJEXT) +bench_OBJECTS = $(am_bench_OBJECTS) +am__DEPENDENCIES_1 = +bench_DEPENDENCIES = $(LIBFFTWTHREADS) \ + $(top_builddir)/libfftw3@PREC_SUFFIX@.la \ + $(top_builddir)/libbench2/libbench2.a $(am__DEPENDENCIES_1) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +bench_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(bench_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(bench_SOURCES) +DIST_SOURCES = $(bench_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/libbench2 \ +-I$(top_srcdir)/dft -I$(top_srcdir)/rdft -I$(top_srcdir)/reodft \ +-I$(top_srcdir)/threads -I$(top_srcdir)/api + +EXTRA_DIST = check.pl README +@OPENMP_TRUE@@THREADS_FALSE@bench_CFLAGS = $(OPENMP_CFLAGS) +@THREADS_TRUE@bench_CFLAGS = $(PTHREAD_CFLAGS) +@COMBINED_THREADS_FALSE@@THREADS_TRUE@LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_threads.la +@OPENMP_TRUE@@THREADS_FALSE@LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_omp.la +bench_SOURCES = bench.c hook.c fftw-bench.c fftw-bench.h +bench_LDADD = $(LIBFFTWTHREADS) \ +$(top_builddir)/libfftw3@PREC_SUFFIX@.la \ +$(top_builddir)/libbench2/libbench2.a $(THREADLIBS) + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu tests/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu tests/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +bench$(EXEEXT): $(bench_OBJECTS) $(bench_DEPENDENCIES) $(EXTRA_bench_DEPENDENCIES) + @rm -f bench$(EXEEXT) + $(AM_V_CCLD)$(bench_LINK) $(bench_OBJECTS) $(bench_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bench-bench.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bench-fftw-bench.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bench-hook.Po@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +bench-bench.o: bench.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(bench_CFLAGS) $(CFLAGS) -MT bench-bench.o -MD -MP -MF $(DEPDIR)/bench-bench.Tpo -c -o bench-bench.o `test -f 'bench.c' || echo '$(srcdir)/'`bench.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/bench-bench.Tpo $(DEPDIR)/bench-bench.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bench.c' object='bench-bench.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(bench_CFLAGS) $(CFLAGS) -c -o bench-bench.o `test -f 'bench.c' || echo '$(srcdir)/'`bench.c + +bench-bench.obj: bench.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(bench_CFLAGS) $(CFLAGS) -MT bench-bench.obj -MD -MP -MF $(DEPDIR)/bench-bench.Tpo -c -o bench-bench.obj `if test -f 'bench.c'; then $(CYGPATH_W) 'bench.c'; else $(CYGPATH_W) '$(srcdir)/bench.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/bench-bench.Tpo $(DEPDIR)/bench-bench.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bench.c' object='bench-bench.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(bench_CFLAGS) $(CFLAGS) -c -o bench-bench.obj `if test -f 'bench.c'; then $(CYGPATH_W) 'bench.c'; else $(CYGPATH_W) '$(srcdir)/bench.c'; fi` + +bench-hook.o: hook.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(bench_CFLAGS) $(CFLAGS) -MT bench-hook.o -MD -MP -MF $(DEPDIR)/bench-hook.Tpo -c -o bench-hook.o `test -f 'hook.c' || echo '$(srcdir)/'`hook.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/bench-hook.Tpo $(DEPDIR)/bench-hook.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='hook.c' object='bench-hook.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(bench_CFLAGS) $(CFLAGS) -c -o bench-hook.o `test -f 'hook.c' || echo '$(srcdir)/'`hook.c + +bench-hook.obj: hook.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(bench_CFLAGS) $(CFLAGS) -MT bench-hook.obj -MD -MP -MF $(DEPDIR)/bench-hook.Tpo -c -o bench-hook.obj `if test -f 'hook.c'; then $(CYGPATH_W) 'hook.c'; else $(CYGPATH_W) '$(srcdir)/hook.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/bench-hook.Tpo $(DEPDIR)/bench-hook.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='hook.c' object='bench-hook.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(bench_CFLAGS) $(CFLAGS) -c -o bench-hook.obj `if test -f 'hook.c'; then $(CYGPATH_W) 'hook.c'; else $(CYGPATH_W) '$(srcdir)/hook.c'; fi` + +bench-fftw-bench.o: fftw-bench.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(bench_CFLAGS) $(CFLAGS) -MT bench-fftw-bench.o -MD -MP -MF $(DEPDIR)/bench-fftw-bench.Tpo -c -o bench-fftw-bench.o `test -f 'fftw-bench.c' || echo '$(srcdir)/'`fftw-bench.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/bench-fftw-bench.Tpo $(DEPDIR)/bench-fftw-bench.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='fftw-bench.c' object='bench-fftw-bench.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(bench_CFLAGS) $(CFLAGS) -c -o bench-fftw-bench.o `test -f 'fftw-bench.c' || echo '$(srcdir)/'`fftw-bench.c + +bench-fftw-bench.obj: fftw-bench.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(bench_CFLAGS) $(CFLAGS) -MT bench-fftw-bench.obj -MD -MP -MF $(DEPDIR)/bench-fftw-bench.Tpo -c -o bench-fftw-bench.obj `if test -f 'fftw-bench.c'; then $(CYGPATH_W) 'fftw-bench.c'; else $(CYGPATH_W) '$(srcdir)/fftw-bench.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/bench-fftw-bench.Tpo $(DEPDIR)/bench-fftw-bench.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='fftw-bench.c' object='bench-fftw-bench.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(bench_CFLAGS) $(CFLAGS) -c -o bench-fftw-bench.obj `if test -f 'fftw-bench.c'; then $(CYGPATH_W) 'fftw-bench.c'; else $(CYGPATH_W) '$(srcdir)/fftw-bench.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) check-local +check: check-am +all-am: Makefile $(PROGRAMS) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: check-am install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am check-local clean \ + clean-generic clean-libtool clean-noinstPROGRAMS cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + + +check-local: bench$(EXEEXT) + perl -w $(srcdir)/check.pl $(CHECK_PL_OPTS) -r -c=30 -v `pwd`/bench$(EXEEXT) + @echo "--------------------------------------------------------------" + @echo " FFTW transforms passed basic tests!" + @echo "--------------------------------------------------------------" +@SMP_TRUE@ perl -w $(srcdir)/check.pl $(CHECK_PL_OPTS) -r -c=30 -v --nthreads=2 `pwd`/bench$(EXEEXT) +@SMP_TRUE@ @echo "--------------------------------------------------------------" +@SMP_TRUE@ @echo " FFTW threaded transforms passed basic tests!" +@SMP_TRUE@ @echo "--------------------------------------------------------------" + +bigcheck: bench$(EXEEXT) + perl -w $(srcdir)/check.pl $(CHECK_PL_OPTS) -a -v `pwd`/bench$(EXEEXT) + @echo "--------------------------------------------------------------" + @echo " FFTW transforms passed big tests!" + @echo "--------------------------------------------------------------" +@SMP_TRUE@ perl -w $(srcdir)/check.pl $(CHECK_PL_OPTS) -a -v --nthreads=2 `pwd`/bench$(EXEEXT) +@SMP_TRUE@ perl -w $(srcdir)/check.pl $(CHECK_PL_OPTS) -a -v --nthreads=3 `pwd`/bench$(EXEEXT) +@SMP_TRUE@ perl -w $(srcdir)/check.pl $(CHECK_PL_OPTS) -a -v --nthreads=10 `pwd`/bench$(EXEEXT) +@SMP_TRUE@ @echo "--------------------------------------------------------------" +@SMP_TRUE@ @echo " FFTW threaded transforms passed big tests!" +@SMP_TRUE@ @echo "--------------------------------------------------------------" + +smallcheck: bench$(EXEEXT) + perl -w $(srcdir)/check.pl -r -c=1 -v `pwd`/bench$(EXEEXT) + perl -w $(srcdir)/check.pl -r --estimate -c=5 -v `pwd`/bench$(EXEEXT) + @echo "--------------------------------------------------------------" + @echo " FFTW transforms passed a few tests!" + @echo "--------------------------------------------------------------" +@SMP_TRUE@ perl -w $(srcdir)/check.pl -r --estimate -c=2 -v --nthreads=2 `pwd`/bench$(EXEEXT) +@SMP_TRUE@ @echo "--------------------------------------------------------------" +@SMP_TRUE@ @echo " FFTW threaded transforms passed a few tests!" +@SMP_TRUE@ @echo "--------------------------------------------------------------" + +paranoid-check: bench$(EXEEXT) +@SMP_TRUE@ perl -w $(srcdir)/check.pl -a --patient --nthreads=10 --paranoid `pwd`/bench$(EXEEXT) +@SMP_TRUE@ perl -w $(srcdir)/check.pl -a --patient --nthreads=7 --paranoid `pwd`/bench$(EXEEXT) +@SMP_TRUE@ perl -w $(srcdir)/check.pl -a --patient --nthreads=3 --paranoid `pwd`/bench$(EXEEXT) +@SMP_TRUE@ perl -w $(srcdir)/check.pl -a --patient --nthreads=2 --paranoid `pwd`/bench$(EXEEXT) + perl -w $(srcdir)/check.pl -a --patient --paranoid `pwd`/bench$(EXEEXT) + +exhaustive-check: bench$(EXEEXT) +@SMP_TRUE@ perl -w $(srcdir)/check.pl -a --exhaustive --nthreads=10 --paranoid `pwd`/bench$(EXEEXT) +@SMP_TRUE@ perl -w $(srcdir)/check.pl -a --exhaustive --nthreads=7 --paranoid `pwd`/bench$(EXEEXT) +@SMP_TRUE@ perl -w $(srcdir)/check.pl -a --exhaustive --nthreads=3 --paranoid `pwd`/bench$(EXEEXT) +@SMP_TRUE@ perl -w $(srcdir)/check.pl -a --exhaustive --nthreads=2 --paranoid `pwd`/bench$(EXEEXT) + perl -w $(srcdir)/check.pl -a --exhaustive --paranoid `pwd`/bench$(EXEEXT) + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/tests/README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/tests/README Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,73 @@ +This directory contains a benchmarking and testing program +for fftw3. + +The `bench' program has a zillion options, because we use it for +benchmarking other FFT libraries as well. This file only documents +the basic usage of bench. + +Usage: bench + +where each command is as follows: + +-s +--speed + + Benchmarks the speed of . + + The syntax for problems is [i|o][r|c][f|b], where + + i/o means in-place or out-of-place. Out of place is the default. + r/c means real or complex transform. Complex is the default. + f/b means forward or backward transform. Forward is the default. + is an arbitrary multidimensional sequence of integers + separated by the character 'x'. + + (The syntax for problems is actually richer, but we do not document + it here. See the man page for fftw-wisdom for more information.) + + Example: + + ib256 : in-place backward complex transform of size 256 + 32x64 : out-of-place forward complex 2D transform of 32 rows + and 64 columns. + +-y +--verify + + Verify that FFTW is computing the correct answer. + + The program does not output anything unless an error occurs or + verbosity is at least one. + +-v + + Set verbosity to , or 1 if is omitted. -v2 will output + the created plans with fftw_print_plan. + +-oestimate +-opatient +-oexhaustive + + Plan with FFTW_ESTIMATE, FFTW_PATIENT, or FFTW_EXHAUSTIVE, respectively. + The default is FFTW_MEASURE. + + If you benchmark FFTW, please use -opatient. + +-onthreads=N + + Use N threads, if FFTW was compiled with --enable-threads. N + must be a positive integer; the default is N=1. + +-onosimd + + Disable SIMD instructions (e.g. SSE or SSE2). + +-ounaligned + + Plan with the FFTW_UNALIGNED flag. + +-owisdom + + On startup, read wisdom from a file wis.dat in the current directory + (if it exists). On completion, write accumulated wisdom to wis.dat + (overwriting any existing file of that name). diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/tests/bench.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/tests/bench.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,552 @@ +/**************************************************************************/ +/* NOTE to users: this is the FFTW self-test and benchmark program. + It is probably NOT a good place to learn FFTW usage, since it has a + lot of added complexity in order to exercise and test the full API, + etcetera. We suggest reading the manual. + + (Some of the self-test code is split off into fftw-bench.c and + hook.c.) */ +/**************************************************************************/ + +#include +#include +#include +#include "fftw-bench.h" + +static const char *mkversion(void) { return FFTW(version); } +static const char *mkcc(void) { return FFTW(cc); } +static const char *mkcodelet_optim(void) { return FFTW(codelet_optim); } + +BEGIN_BENCH_DOC +BENCH_DOC("name", "fftw3") +BENCH_DOCF("version", mkversion) +BENCH_DOCF("cc", mkcc) +BENCH_DOCF("codelet-optim", mkcodelet_optim) +END_BENCH_DOC + +static FFTW(iodim) *bench_tensor_to_fftw_iodim(bench_tensor *t) +{ + FFTW(iodim) *d; + int i; + + BENCH_ASSERT(t->rnk >= 0); + if (t->rnk == 0) return 0; + + d = (FFTW(iodim) *)bench_malloc(sizeof(FFTW(iodim)) * t->rnk); + for (i = 0; i < t->rnk; ++i) { + d[i].n = t->dims[i].n; + d[i].is = t->dims[i].is; + d[i].os = t->dims[i].os; + } + + return d; +} + +static void extract_reim_split(int sign, int size, bench_real *p, + bench_real **r, bench_real **i) +{ + if (sign == FFTW_FORWARD) { + *r = p + 0; + *i = p + size; + } else { + *r = p + size; + *i = p + 0; + } +} + +static int sizeof_problem(bench_problem *p) +{ + return tensor_sz(p->sz) * tensor_sz(p->vecsz); +} + +/* ouch */ +static int expressible_as_api_many(bench_tensor *t) +{ + int i; + + BENCH_ASSERT(FINITE_RNK(t->rnk)); + + i = t->rnk - 1; + while (--i >= 0) { + bench_iodim *d = t->dims + i; + if (d[0].is % d[1].is) return 0; + if (d[0].os % d[1].os) return 0; + } + return 1; +} + +static int *mkn(bench_tensor *t) +{ + int *n = (int *) bench_malloc(sizeof(int *) * t->rnk); + int i; + for (i = 0; i < t->rnk; ++i) + n[i] = t->dims[i].n; + return n; +} + +static void mknembed_many(bench_tensor *t, int **inembedp, int **onembedp) +{ + int i; + bench_iodim *d; + int *inembed = (int *) bench_malloc(sizeof(int *) * t->rnk); + int *onembed = (int *) bench_malloc(sizeof(int *) * t->rnk); + + BENCH_ASSERT(FINITE_RNK(t->rnk)); + *inembedp = inembed; *onembedp = onembed; + + i = t->rnk - 1; + while (--i >= 0) { + d = t->dims + i; + inembed[i+1] = d[0].is / d[1].is; + onembed[i+1] = d[0].os / d[1].os; + } +} + +/* try to use the most appropriate API function. Big mess. */ + +static int imax(int a, int b) { return (a > b ? a : b); } + +static int halfish_sizeof_problem(bench_problem *p) +{ + int n2 = sizeof_problem(p); + if (FINITE_RNK(p->sz->rnk) && p->sz->rnk > 0) + n2 = (n2 / imax(p->sz->dims[p->sz->rnk - 1].n, 1)) * + (p->sz->dims[p->sz->rnk - 1].n / 2 + 1); + return n2; +} + +static FFTW(plan) mkplan_real_split(bench_problem *p, unsigned flags) +{ + FFTW(plan) pln; + bench_tensor *sz = p->sz, *vecsz = p->vecsz; + FFTW(iodim) *dims, *howmany_dims; + bench_real *ri, *ii, *ro, *io; + int n2 = halfish_sizeof_problem(p); + + extract_reim_split(FFTW_FORWARD, n2, (bench_real *) p->in, &ri, &ii); + extract_reim_split(FFTW_FORWARD, n2, (bench_real *) p->out, &ro, &io); + + dims = bench_tensor_to_fftw_iodim(sz); + howmany_dims = bench_tensor_to_fftw_iodim(vecsz); + if (p->sign < 0) { + if (verbose > 2) printf("using plan_guru_split_dft_r2c\n"); + pln = FFTW(plan_guru_split_dft_r2c)(sz->rnk, dims, + vecsz->rnk, howmany_dims, + ri, ro, io, flags); + } + else { + if (verbose > 2) printf("using plan_guru_split_dft_c2r\n"); + pln = FFTW(plan_guru_split_dft_c2r)(sz->rnk, dims, + vecsz->rnk, howmany_dims, + ri, ii, ro, flags); + } + bench_free(dims); + bench_free(howmany_dims); + return pln; +} + +static FFTW(plan) mkplan_real_interleaved(bench_problem *p, unsigned flags) +{ + FFTW(plan) pln; + bench_tensor *sz = p->sz, *vecsz = p->vecsz; + + if (vecsz->rnk == 0 && tensor_unitstridep(sz) + && tensor_real_rowmajorp(sz, p->sign, p->in_place)) + goto api_simple; + + if (vecsz->rnk == 1 && expressible_as_api_many(sz)) + goto api_many; + + goto api_guru; + + api_simple: + switch (sz->rnk) { + case 1: + if (p->sign < 0) { + if (verbose > 2) printf("using plan_dft_r2c_1d\n"); + return FFTW(plan_dft_r2c_1d)(sz->dims[0].n, + (bench_real *) p->in, + (bench_complex *) p->out, + flags); + } + else { + if (verbose > 2) printf("using plan_dft_c2r_1d\n"); + return FFTW(plan_dft_c2r_1d)(sz->dims[0].n, + (bench_complex *) p->in, + (bench_real *) p->out, + flags); + } + break; + case 2: + if (p->sign < 0) { + if (verbose > 2) printf("using plan_dft_r2c_2d\n"); + return FFTW(plan_dft_r2c_2d)(sz->dims[0].n, sz->dims[1].n, + (bench_real *) p->in, + (bench_complex *) p->out, + flags); + } + else { + if (verbose > 2) printf("using plan_dft_c2r_2d\n"); + return FFTW(plan_dft_c2r_2d)(sz->dims[0].n, sz->dims[1].n, + (bench_complex *) p->in, + (bench_real *) p->out, + flags); + } + break; + case 3: + if (p->sign < 0) { + if (verbose > 2) printf("using plan_dft_r2c_3d\n"); + return FFTW(plan_dft_r2c_3d)( + sz->dims[0].n, sz->dims[1].n, sz->dims[2].n, + (bench_real *) p->in, (bench_complex *) p->out, + flags); + } + else { + if (verbose > 2) printf("using plan_dft_c2r_3d\n"); + return FFTW(plan_dft_c2r_3d)( + sz->dims[0].n, sz->dims[1].n, sz->dims[2].n, + (bench_complex *) p->in, (bench_real *) p->out, + flags); + } + break; + default: { + int *n = mkn(sz); + if (p->sign < 0) { + if (verbose > 2) printf("using plan_dft_r2c\n"); + pln = FFTW(plan_dft_r2c)(sz->rnk, n, + (bench_real *) p->in, + (bench_complex *) p->out, + flags); + } + else { + if (verbose > 2) printf("using plan_dft_c2r\n"); + pln = FFTW(plan_dft_c2r)(sz->rnk, n, + (bench_complex *) p->in, + (bench_real *) p->out, + flags); + } + bench_free(n); + return pln; + } + } + + api_many: + { + int *n, *inembed, *onembed; + BENCH_ASSERT(vecsz->rnk == 1); + n = mkn(sz); + mknembed_many(sz, &inembed, &onembed); + if (p->sign < 0) { + if (verbose > 2) printf("using plan_many_dft_r2c\n"); + pln = FFTW(plan_many_dft_r2c)( + sz->rnk, n, vecsz->dims[0].n, + (bench_real *) p->in, inembed, + sz->dims[sz->rnk - 1].is, vecsz->dims[0].is, + (bench_complex *) p->out, onembed, + sz->dims[sz->rnk - 1].os, vecsz->dims[0].os, + flags); + } + else { + if (verbose > 2) printf("using plan_many_dft_c2r\n"); + pln = FFTW(plan_many_dft_c2r)( + sz->rnk, n, vecsz->dims[0].n, + (bench_complex *) p->in, inembed, + sz->dims[sz->rnk - 1].is, vecsz->dims[0].is, + (bench_real *) p->out, onembed, + sz->dims[sz->rnk - 1].os, vecsz->dims[0].os, + flags); + } + bench_free(n); bench_free(inembed); bench_free(onembed); + return pln; + } + + api_guru: + { + FFTW(iodim) *dims, *howmany_dims; + + if (p->sign < 0) { + dims = bench_tensor_to_fftw_iodim(sz); + howmany_dims = bench_tensor_to_fftw_iodim(vecsz); + if (verbose > 2) printf("using plan_guru_dft_r2c\n"); + pln = FFTW(plan_guru_dft_r2c)(sz->rnk, dims, + vecsz->rnk, howmany_dims, + (bench_real *) p->in, + (bench_complex *) p->out, + flags); + } + else { + dims = bench_tensor_to_fftw_iodim(sz); + howmany_dims = bench_tensor_to_fftw_iodim(vecsz); + if (verbose > 2) printf("using plan_guru_dft_c2r\n"); + pln = FFTW(plan_guru_dft_c2r)(sz->rnk, dims, + vecsz->rnk, howmany_dims, + (bench_complex *) p->in, + (bench_real *) p->out, + flags); + } + bench_free(dims); + bench_free(howmany_dims); + return pln; + } +} + +static FFTW(plan) mkplan_real(bench_problem *p, unsigned flags) +{ + if (p->split) + return mkplan_real_split(p, flags); + else + return mkplan_real_interleaved(p, flags); +} + +static FFTW(plan) mkplan_complex_split(bench_problem *p, unsigned flags) +{ + FFTW(plan) pln; + bench_tensor *sz = p->sz, *vecsz = p->vecsz; + FFTW(iodim) *dims, *howmany_dims; + bench_real *ri, *ii, *ro, *io; + + extract_reim_split(p->sign, p->iphyssz, (bench_real *) p->in, &ri, &ii); + extract_reim_split(p->sign, p->ophyssz, (bench_real *) p->out, &ro, &io); + + dims = bench_tensor_to_fftw_iodim(sz); + howmany_dims = bench_tensor_to_fftw_iodim(vecsz); + if (verbose > 2) printf("using plan_guru_split_dft\n"); + pln = FFTW(plan_guru_split_dft)(sz->rnk, dims, + vecsz->rnk, howmany_dims, + ri, ii, ro, io, flags); + bench_free(dims); + bench_free(howmany_dims); + return pln; +} + +static FFTW(plan) mkplan_complex_interleaved(bench_problem *p, unsigned flags) +{ + FFTW(plan) pln; + bench_tensor *sz = p->sz, *vecsz = p->vecsz; + + if (vecsz->rnk == 0 && tensor_unitstridep(sz) && tensor_rowmajorp(sz)) + goto api_simple; + + if (vecsz->rnk == 1 && expressible_as_api_many(sz)) + goto api_many; + + goto api_guru; + + api_simple: + switch (sz->rnk) { + case 1: + if (verbose > 2) printf("using plan_dft_1d\n"); + return FFTW(plan_dft_1d)(sz->dims[0].n, + (bench_complex *) p->in, + (bench_complex *) p->out, + p->sign, flags); + break; + case 2: + if (verbose > 2) printf("using plan_dft_2d\n"); + return FFTW(plan_dft_2d)(sz->dims[0].n, sz->dims[1].n, + (bench_complex *) p->in, + (bench_complex *) p->out, + p->sign, flags); + break; + case 3: + if (verbose > 2) printf("using plan_dft_3d\n"); + return FFTW(plan_dft_3d)( + sz->dims[0].n, sz->dims[1].n, sz->dims[2].n, + (bench_complex *) p->in, (bench_complex *) p->out, + p->sign, flags); + break; + default: { + int *n = mkn(sz); + if (verbose > 2) printf("using plan_dft\n"); + pln = FFTW(plan_dft)(sz->rnk, n, + (bench_complex *) p->in, + (bench_complex *) p->out, p->sign, flags); + bench_free(n); + return pln; + } + } + + api_many: + { + int *n, *inembed, *onembed; + BENCH_ASSERT(vecsz->rnk == 1); + n = mkn(sz); + mknembed_many(sz, &inembed, &onembed); + if (verbose > 2) printf("using plan_many_dft\n"); + pln = FFTW(plan_many_dft)( + sz->rnk, n, vecsz->dims[0].n, + (bench_complex *) p->in, + inembed, sz->dims[sz->rnk - 1].is, vecsz->dims[0].is, + (bench_complex *) p->out, + onembed, sz->dims[sz->rnk - 1].os, vecsz->dims[0].os, + p->sign, flags); + bench_free(n); bench_free(inembed); bench_free(onembed); + return pln; + } + + api_guru: + { + FFTW(iodim) *dims, *howmany_dims; + + dims = bench_tensor_to_fftw_iodim(sz); + howmany_dims = bench_tensor_to_fftw_iodim(vecsz); + if (verbose > 2) printf("using plan_guru_dft\n"); + pln = FFTW(plan_guru_dft)(sz->rnk, dims, + vecsz->rnk, howmany_dims, + (bench_complex *) p->in, + (bench_complex *) p->out, + p->sign, flags); + bench_free(dims); + bench_free(howmany_dims); + return pln; + } +} + +static FFTW(plan) mkplan_complex(bench_problem *p, unsigned flags) +{ + if (p->split) + return mkplan_complex_split(p, flags); + else + return mkplan_complex_interleaved(p, flags); +} + +static FFTW(plan) mkplan_r2r(bench_problem *p, unsigned flags) +{ + FFTW(plan) pln; + bench_tensor *sz = p->sz, *vecsz = p->vecsz; + FFTW(r2r_kind) *k; + + k = (FFTW(r2r_kind) *) bench_malloc(sizeof(FFTW(r2r_kind)) * sz->rnk); + { + int i; + for (i = 0; i < sz->rnk; ++i) + switch (p->k[i]) { + case R2R_R2HC: k[i] = FFTW_R2HC; break; + case R2R_HC2R: k[i] = FFTW_HC2R; break; + case R2R_DHT: k[i] = FFTW_DHT; break; + case R2R_REDFT00: k[i] = FFTW_REDFT00; break; + case R2R_REDFT01: k[i] = FFTW_REDFT01; break; + case R2R_REDFT10: k[i] = FFTW_REDFT10; break; + case R2R_REDFT11: k[i] = FFTW_REDFT11; break; + case R2R_RODFT00: k[i] = FFTW_RODFT00; break; + case R2R_RODFT01: k[i] = FFTW_RODFT01; break; + case R2R_RODFT10: k[i] = FFTW_RODFT10; break; + case R2R_RODFT11: k[i] = FFTW_RODFT11; break; + default: BENCH_ASSERT(0); + } + } + + if (vecsz->rnk == 0 && tensor_unitstridep(sz) && tensor_rowmajorp(sz)) + goto api_simple; + + if (vecsz->rnk == 1 && expressible_as_api_many(sz)) + goto api_many; + + goto api_guru; + + api_simple: + switch (sz->rnk) { + case 1: + if (verbose > 2) printf("using plan_r2r_1d\n"); + pln = FFTW(plan_r2r_1d)(sz->dims[0].n, + (bench_real *) p->in, + (bench_real *) p->out, + k[0], flags); + goto done; + case 2: + if (verbose > 2) printf("using plan_r2r_2d\n"); + pln = FFTW(plan_r2r_2d)(sz->dims[0].n, sz->dims[1].n, + (bench_real *) p->in, + (bench_real *) p->out, + k[0], k[1], flags); + goto done; + case 3: + if (verbose > 2) printf("using plan_r2r_3d\n"); + pln = FFTW(plan_r2r_3d)( + sz->dims[0].n, sz->dims[1].n, sz->dims[2].n, + (bench_real *) p->in, (bench_real *) p->out, + k[0], k[1], k[2], flags); + goto done; + default: { + int *n = mkn(sz); + if (verbose > 2) printf("using plan_r2r\n"); + pln = FFTW(plan_r2r)(sz->rnk, n, + (bench_real *) p->in, (bench_real *) p->out, + k, flags); + bench_free(n); + goto done; + } + } + + api_many: + { + int *n, *inembed, *onembed; + BENCH_ASSERT(vecsz->rnk == 1); + n = mkn(sz); + mknembed_many(sz, &inembed, &onembed); + if (verbose > 2) printf("using plan_many_r2r\n"); + pln = FFTW(plan_many_r2r)( + sz->rnk, n, vecsz->dims[0].n, + (bench_real *) p->in, + inembed, sz->dims[sz->rnk - 1].is, vecsz->dims[0].is, + (bench_real *) p->out, + onembed, sz->dims[sz->rnk - 1].os, vecsz->dims[0].os, + k, flags); + bench_free(n); bench_free(inembed); bench_free(onembed); + goto done; + } + + api_guru: + { + FFTW(iodim) *dims, *howmany_dims; + + dims = bench_tensor_to_fftw_iodim(sz); + howmany_dims = bench_tensor_to_fftw_iodim(vecsz); + if (verbose > 2) printf("using plan_guru_r2r\n"); + pln = FFTW(plan_guru_r2r)(sz->rnk, dims, + vecsz->rnk, howmany_dims, + (bench_real *) p->in, + (bench_real *) p->out, k, flags); + bench_free(dims); + bench_free(howmany_dims); + goto done; + } + + done: + bench_free(k); + return pln; +} + +FFTW(plan) mkplan(bench_problem *p, unsigned flags) +{ + switch (p->kind) { + case PROBLEM_COMPLEX: return mkplan_complex(p, flags); + case PROBLEM_REAL: return mkplan_real(p, flags); + case PROBLEM_R2R: return mkplan_r2r(p, flags); + default: BENCH_ASSERT(0); return 0; + } +} + +void main_init(int *argc, char ***argv) +{ + UNUSED(argc); + UNUSED(argv); +} + +void initial_cleanup(void) +{ +} + +void final_cleanup(void) +{ +} + +int import_wisdom(FILE *f) +{ + return FFTW(import_wisdom_from_file)(f); +} + +void export_wisdom(FILE *f) +{ + FFTW(export_wisdom_to_file)(f); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/tests/check.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/tests/check.pl Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,308 @@ +#! /usr/bin/perl -w + +$program = "./bench"; +$default_options = ""; +$verbose = 0; +$paranoid = 0; +$exhaustive = 0; +$patient = 0; +$estimate = 0; +$wisdom = 0; +$nthreads = 1; +$rounds = 0; +$maxsize = 60000; +$maxcount = 100; +$do_0d = 0; +$do_1d = 0; +$do_2d = 0; +$do_random = 0; +$keepgoing = 0; +$flushcount = 42; + +$mpi = 0; +$mpi_transposed_in = 0; +$mpi_transposed_out = 0; + +sub make_options { + my $options = $default_options; + $options = "--verify-rounds=$rounds $options" if $rounds; + $options = "--verbose=$verbose $options" if $verbose; + $options = "-o paranoid $options" if $paranoid; + $options = "-o exhaustive $options" if $exhaustive; + $options = "-o patient $options" if $patient; + $options = "-o estimate $options" if $estimate; + $options = "-o wisdom $options" if $wisdom; + $options = "-o nthreads=$nthreads $options" if ($nthreads > 1); + $options = "-obflag=30 $options" if $mpi_transposed_in; + $options = "-obflag=31 $options" if $mpi_transposed_out; + return $options; +} + +@list_of_problems = (); + +sub flush_problems { + my $options = shift; + my $problist = ""; + + if ($#list_of_problems >= 0) { + for (@list_of_problems) { + $problist = "$problist --verify '$_'"; + } + print "Executing \"$program $options $problist\"\n" + if $verbose; + + system("$program $options $problist"); + $exit_value = $? >> 8; + $signal_num = $? & 127; + $dumped_core = $? & 128; + + if ($signal_num == 1) { + print "hangup\n"; + exit 0; + } + if ($signal_num == 2) { + print "interrupted\n"; + exit 0; + } + if ($signal_num == 9) { + print "killed\n"; + exit 0; + } + + if ($exit_value != 0 || $dumped_core || $signal_num) { + print "FAILED $program: $problist\n"; + if ($signal_num) { print "received signal $signal_num\n"; } + exit 1 unless $keepgoing; + } + @list_of_problems = (); + } +} + +sub do_problem { + my $problem = shift; + my $doablep = shift; + my $options = &make_options; + + if ($problem =~ /\// && $problem =~ /r/ + && ($problem =~ /i.*x/ + || $problem =~ /v/ || $problem =~ /\*/)) { + return; # cannot do real split inplace-multidimensional or vector + } + + # in --mpi mode, restrict to problems supported by MPI code + if ($mpi) { + if ($problem =~ /\//) { return; } # no split + if ($problem =~ /\*/) { return; } # no non-contiguous vectors + if ($problem =~ /r/ && $problem !~ /x/) { return; } # no 1d r2c + if ($problem =~ /k/ && $problem !~ /x/) { return; } # no 1d r2r + if ($mpi_transposed_in || $problem =~ /\[/) { + if ($problem !~ /x/) { return; } # no 1d transposed_in + if ($problem =~ /r/ && $problem !~ /b/) { return; } # only c2r + } + if ($mpi_transposed_out || $problem =~ /\]/) { + if ($problem !~ /x/) { return; } # no 1d transposed_out + if ($problem =~ /r/ && $problem =~ /b/) { return; } # only r2c + } + } + + # size-1 redft00 is not defined/doable + return if ($problem =~ /[^0-9]1e00/); + + if ($doablep) { + @list_of_problems = ($problem, @list_of_problems); + &flush_problems($options) if ($#list_of_problems > $flushcount); + } else { + print "Executing \"$program $options --can-do $problem\"\n" + if $verbose; + $result=`$program $options --can-do $problem`; + if ($result ne "#f\n" && $result ne "#f\r\n") { + print "FAILED $program: $problem is not undoable\n"; + exit 1 unless $keepgoing; + } + } +} + +# given geometry, try both directions and in place/out of place +sub do_geometry { + my $geom = shift; + my $doablep = shift; + do_problem("if$geom", $doablep); + do_problem("of$geom", $doablep); + do_problem("ib$geom", $doablep); + do_problem("ob$geom", $doablep); + do_problem("//if$geom", $doablep); + do_problem("//of$geom", $doablep); + do_problem("//ib$geom", $doablep); + do_problem("//ob$geom", $doablep); +} + +# given size, try all transform kinds (complex, real, etc.) +sub do_size { + my $size = shift; + my $doablep = shift; + do_geometry("c$size", $doablep); + do_geometry("r$size", $doablep); +} + +sub small_0d { + for ($i = 0; $i <= 16; ++$i) { + for ($j = 0; $j <= 16; ++$j) { + for ($vl = 1; $vl <= 5; ++$vl) { + my $ivl = $i * $vl; + my $jvl = $j * $vl; + do_problem("o1v${i}:${vl}:${jvl}x${j}:${ivl}:${vl}x${vl}:1:1", 1); + do_problem("i1v${i}:${vl}:${jvl}x${j}:${ivl}:${vl}x${vl}:1:1", 1); + do_problem("ok1v${i}:${vl}:${jvl}x${j}:${ivl}:${vl}x${vl}:1:1", 1); + do_problem("ik1v${i}:${vl}:${jvl}x${j}:${ivl}:${vl}x${vl}:1:1", 1); + } + } + } +} + +sub small_1d { + do_size (0, 0); + for ($i = 1; $i <= 100; ++$i) { + do_size ($i, 1); + } + do_size (128, 1); + do_size (256, 1); + do_size (512, 1); + do_size (1024, 1); + do_size (2048, 1); + do_size (4096, 1); +} + +sub small_2d { + do_size ("0x0", 0); + for ($i = 1; $i <= 100; ++$i) { + my $ub = 900/$i; + $ub = 100 if $ub > 100; + for ($j = 1; $j <= $ub; ++$j) { + do_size ("${i}x${j}", 1); + } + } +} + +sub rand_small_factors { + my $l = shift; + my $n = 1; + my $maxfactor = 13; + my $f = int(rand($maxfactor) + 1); + while ($n * $f < $l) { + $n *= $f; + $f = int(rand($maxfactor) + 1); + }; + return $n; +} + +# way too complicated... +sub one_random_test { + my $q = int(2 + rand($maxsize)); + my $rnk = int(1 + rand(4)); + my $vtype = int(rand(3)); + my $g = int(2 + exp(log($q) / ($rnk + ($vtype > 0)))); + my $first = 1; + my $sz = ""; + my $is_r2r = shift; + my @r2r_kinds = ("f", "b", "h", + "e00", "e01", "e10", "e11", "o00", "o01", "o10", "o11"); + + while ($q > 1 && $rnk > 0) { + my $r = rand_small_factors(int(rand($g) + 10)); + if ($r > 1) { + $sz = "${sz}x" if (!$first); + $first = 0; + $sz = "${sz}${r}"; + if ($is_r2r) { + my $k = $r2r_kinds[int(1 + rand($#r2r_kinds))]; + $sz = "${sz}${k}"; + } + $q = int($q / $r); + if ($g > $q) { $g = $q; } + --$rnk; + } + } + if ($vtype > 0 && $g > 1) { + my $v = int(1 + rand($g)); + $sz = "${sz}*${v}" if ($vtype == 1); + $sz = "${sz}v${v}" if ($vtype == 2); + } + if ($mpi) { + my $stype = int(rand(3)); + $sz = "]${sz}" if ($stype == 1); + $sz = "[${sz}" if ($stype == 2); + } + $sz = "d$sz" if (int(rand(3)) == 0); + if ($is_r2r) { + do_problem("ik$sz", 1); + do_problem("ok$sz", 1); + } + else { + do_size($sz, 1); + } +} + +sub random_tests { + my $i; + for ($i = 0; $i < $maxcount; ++$i) { + &one_random_test(0); + &one_random_test(1); + } +} + +sub parse_arguments (@) +{ + local (@arglist) = @_; + + while (@arglist) + { + if ($arglist[0] eq '-v') { ++$verbose; } + elsif ($arglist[0] eq '--verbose') { ++$verbose; } + elsif ($arglist[0] eq '-p') { ++$paranoid; } + elsif ($arglist[0] eq '--paranoid') { ++$paranoid; } + elsif ($arglist[0] eq '--exhaustive') { ++$exhaustive; } + elsif ($arglist[0] eq '--patient') { ++$patient; } + elsif ($arglist[0] eq '--estimate') { ++$estimate; } + elsif ($arglist[0] eq '--wisdom') { ++$wisdom; } + elsif ($arglist[0] =~ /^--nthreads=(.+)$/) { $nthreads = $1; } + elsif ($arglist[0] eq '-k') { ++$keepgoing; } + elsif ($arglist[0] eq '--keep-going') { ++$keepgoing; } + elsif ($arglist[0] =~ /^--verify-rounds=(.+)$/) { $rounds = $1; } + elsif ($arglist[0] =~ /^--count=(.+)$/) { $maxcount = $1; } + elsif ($arglist[0] =~ /^-c=(.+)$/) { $maxcount = $1; } + elsif ($arglist[0] =~ /^--flushcount=(.+)$/) { $flushcount = $1; } + elsif ($arglist[0] =~ /^--maxsize=(.+)$/) { $maxsize = $1; } + + elsif ($arglist[0] eq '--mpi') { ++$mpi; } + elsif ($arglist[0] eq '--mpi-transposed-in') { + ++$mpi; ++$mpi_transposed_in; } + elsif ($arglist[0] eq '--mpi-transposed-out') { + ++$mpi; ++$mpi_transposed_out; } + + elsif ($arglist[0] eq '-0d') { ++$do_0d; } + elsif ($arglist[0] eq '-1d') { ++$do_1d; } + elsif ($arglist[0] eq '-2d') { ++$do_2d; } + elsif ($arglist[0] eq '-r') { ++$do_random; } + elsif ($arglist[0] eq '--random') { ++$do_random; } + elsif ($arglist[0] eq '-a') { + ++$do_0d; ++$do_1d; ++$do_2d; ++$do_random; + } + + else { $program=$arglist[0]; } + shift (@arglist); + } +} + +# MAIN PROGRAM: + +&parse_arguments (@ARGV); + +&random_tests if $do_random; +&small_0d if $do_0d; +&small_1d if $do_1d; +&small_2d if $do_2d; + +{ + my $options = &make_options; + &flush_problems($options); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/tests/fftw-bench.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/tests/fftw-bench.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,242 @@ +/* See bench.c. We keep a few common subroutines in this file so + that they can be re-used in the MPI test program. */ + +#include +#include +#include +#include "fftw-bench.h" + +#ifdef _OPENMP +# include +#endif + +#ifdef HAVE_SMP +int threads_ok = 1; +#endif + +FFTW(plan) the_plan = 0; + +static const char *wisdat = "wis.dat"; +unsigned the_flags = 0; +int paranoid = 0; +int usewisdom = 0; +int havewisdom = 0; +int nthreads = 1; +int amnesia = 0; + +extern void install_hook(void); /* in hook.c */ +extern void uninstall_hook(void); /* in hook.c */ + +#ifdef FFTW_RANDOM_ESTIMATOR +extern unsigned FFTW(random_estimate_seed); +#endif + +void useropt(const char *arg) +{ + int x; + double y; + + if (!strcmp(arg, "patient")) the_flags |= FFTW_PATIENT; + else if (!strcmp(arg, "estimate")) the_flags |= FFTW_ESTIMATE; + else if (!strcmp(arg, "estimatepat")) the_flags |= FFTW_ESTIMATE_PATIENT; + else if (!strcmp(arg, "exhaustive")) the_flags |= FFTW_EXHAUSTIVE; + else if (!strcmp(arg, "unaligned")) the_flags |= FFTW_UNALIGNED; + else if (!strcmp(arg, "nosimd")) the_flags |= FFTW_NO_SIMD; + else if (!strcmp(arg, "noindirectop")) the_flags |= FFTW_NO_INDIRECT_OP; + else if (!strcmp(arg, "wisdom-only")) the_flags |= FFTW_WISDOM_ONLY; + else if (sscanf(arg, "flag=%d", &x) == 1) the_flags |= x; + else if (sscanf(arg, "bflag=%d", &x) == 1) the_flags |= 1U << x; + else if (!strcmp(arg, "paranoid")) paranoid = 1; + else if (!strcmp(arg, "wisdom")) usewisdom = 1; + else if (!strcmp(arg, "amnesia")) amnesia = 1; + else if (sscanf(arg, "nthreads=%d", &x) == 1) nthreads = x; +#ifdef FFTW_RANDOM_ESTIMATOR + else if (sscanf(arg, "eseed=%d", &x) == 1) FFTW(random_estimate_seed) = x; +#endif + else if (sscanf(arg, "timelimit=%lg", &y) == 1) { + FFTW(set_timelimit)(y); + } + + else fprintf(stderr, "unknown user option: %s. Ignoring.\n", arg); +} + +void rdwisdom(void) +{ + FILE *f; + double tim; + int success = 0; + + if (havewisdom) return; + +#ifdef HAVE_SMP + if (threads_ok) { + BENCH_ASSERT(FFTW(init_threads)()); + FFTW(plan_with_nthreads)(nthreads); +#ifdef _OPENMP + omp_set_num_threads(nthreads); +#endif + } + else if (nthreads > 1 && verbose > 1) { + fprintf(stderr, "bench: WARNING - nthreads = %d, but threads not supported\n", nthreads); + nthreads = 1; + } +#endif + + if (!usewisdom) return; + + timer_start(USER_TIMER); + if ((f = fopen(wisdat, "r"))) { + if (!import_wisdom(f)) + fprintf(stderr, "bench: ERROR reading wisdom\n"); + else + success = 1; + fclose(f); + } + tim = timer_stop(USER_TIMER); + + if (success) { + if (verbose > 1) printf("READ WISDOM (%g seconds): ", tim); + + if (verbose > 3) + export_wisdom(stdout); + if (verbose > 1) + printf("\n"); + } + havewisdom = 1; +} + +void wrwisdom(void) +{ + FILE *f; + double tim; + if (!havewisdom) return; + + timer_start(USER_TIMER); + if ((f = fopen(wisdat, "w"))) { + export_wisdom(f); + fclose(f); + } + tim = timer_stop(USER_TIMER); + if (verbose > 1) printf("write wisdom took %g seconds\n", tim); +} + +static unsigned preserve_input_flags(bench_problem *p) +{ + /* + * fftw3 cannot preserve input for multidimensional c2r transforms. + * Enforce FFTW_DESTROY_INPUT + */ + if (p->kind == PROBLEM_REAL && + p->sign > 0 && + !p->in_place && + p->sz->rnk > 1) + p->destroy_input = 1; + + if (p->destroy_input) + return FFTW_DESTROY_INPUT; + else + return FFTW_PRESERVE_INPUT; +} + +int can_do(bench_problem *p) +{ + double tim; + + if (verbose > 2 && p->pstring) + printf("Planning %s...\n", p->pstring); + rdwisdom(); + + timer_start(USER_TIMER); + the_plan = mkplan(p, preserve_input_flags(p) | the_flags | FFTW_ESTIMATE); + tim = timer_stop(USER_TIMER); + if (verbose > 2) printf("estimate-planner time: %g s\n", tim); + + if (the_plan) { + FFTW(destroy_plan)(the_plan); + return 1; + } + return 0; +} + +void setup(bench_problem *p) +{ + double tim; + + if (amnesia) { + FFTW(forget_wisdom)(); + havewisdom = 0; + } + + /* Regression test: check that fftw_malloc exists and links + * properly */ + FFTW(free(FFTW(malloc(42)))); + + rdwisdom(); + install_hook(); + +#ifdef HAVE_SMP + if (verbose > 1 && nthreads > 1) printf("NTHREADS = %d\n", nthreads); +#endif + + timer_start(USER_TIMER); + the_plan = mkplan(p, preserve_input_flags(p) | the_flags); + tim = timer_stop(USER_TIMER); + if (verbose > 1) printf("planner time: %g s\n", tim); + + BENCH_ASSERT(the_plan); + + { + double add, mul, nfma, cost, pcost; + FFTW(flops)(the_plan, &add, &mul, &nfma); + cost = FFTW(estimate_cost)(the_plan); + pcost = FFTW(cost)(the_plan); + if (verbose > 1) { + FFTW(print_plan)(the_plan); + printf("\n"); + printf("flops: %0.0f add, %0.0f mul, %0.0f fma\n", + add, mul, nfma); + printf("estimated cost: %f, pcost = %f\n", cost, pcost); + } + } +} + + +void doit(int iter, bench_problem *p) +{ + int i; + FFTW(plan) q = the_plan; + + UNUSED(p); + for (i = 0; i < iter; ++i) + FFTW(execute)(q); +} + +void done(bench_problem *p) +{ + UNUSED(p); + + FFTW(destroy_plan)(the_plan); + uninstall_hook(); +} + +void cleanup(void) +{ + initial_cleanup(); + + wrwisdom(); +#ifdef HAVE_SMP + FFTW(cleanup_threads)(); +#else + FFTW(cleanup)(); +#endif + +# ifdef FFTW_DEBUG_MALLOC + { + /* undocumented memory checker */ + FFTW_EXTERN void FFTW(malloc_print_minfo)(int v); + FFTW(malloc_print_minfo)(verbose); + } +# endif + + final_cleanup(); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/tests/fftw-bench.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/tests/fftw-bench.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,37 @@ +/* declarations of common subroutines, etc. for use with FFTW + self-test/benchmark program (see bench.c). */ + +#include "bench-user.h" +#include "fftw3.h" + +#define CONCAT(prefix, name) prefix ## name +#if defined(BENCHFFT_SINGLE) +#define FFTW(x) CONCAT(fftwf_, x) +#elif defined(BENCHFFT_LDOUBLE) +#define FFTW(x) CONCAT(fftwl_, x) +#elif defined(BENCHFFT_QUAD) +#define FFTW(x) CONCAT(fftwq_, x) +#else +#define FFTW(x) CONCAT(fftw_, x) +#endif + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +extern FFTW(plan) mkplan(bench_problem *p, unsigned flags); +extern void initial_cleanup(void); +extern void final_cleanup(void); +extern int import_wisdom(FILE *f); +extern void export_wisdom(FILE *f); + +#if defined(HAVE_THREADS) || defined(HAVE_OPENMP) +# define HAVE_SMP + extern int threads_ok; +#endif + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/tests/hook.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/tests/hook.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,259 @@ +/* fftw hook to be used in the benchmark program. + + We keep it in a separate file because + + 1) bench.c is supposed to test the API---we do not want to #include + "ifftw.h" and accidentally use internal symbols/macros. + 2) this code is a royal mess. The messiness is due to + A) confusion between internal fftw tensors and bench_tensor's + (which we want to keep separate because the benchmark + program tests other routines too) + B) despite A), our desire to recycle the libbench verifier. +*/ + +#include +#include "bench-user.h" + +#define CALLING_FFTW /* hack for Windows DLL nonsense */ +#include "api.h" +#include "dft.h" +#include "rdft.h" + +extern int paranoid; /* in bench.c */ +extern X(plan) the_plan; /* in bench.c */ + +/* + transform an fftw tensor into a bench_tensor. +*/ +static bench_tensor *fftw_tensor_to_bench_tensor(tensor *t) +{ + bench_tensor *bt = mktensor(t->rnk); + + if (FINITE_RNK(t->rnk)) { + int i; + for (i = 0; i < t->rnk; ++i) { + /* FIXME: 64-bit unclean because of INT -> int conversion */ + bt->dims[i].n = t->dims[i].n; + bt->dims[i].is = t->dims[i].is; + bt->dims[i].os = t->dims[i].os; + BENCH_ASSERT(bt->dims[i].n == t->dims[i].n); + BENCH_ASSERT(bt->dims[i].is == t->dims[i].is); + BENCH_ASSERT(bt->dims[i].os == t->dims[i].os); + } + } + return bt; +} + +/* + transform an fftw problem into a bench_problem. +*/ +static bench_problem *fftw_problem_to_bench_problem(planner *plnr, + const problem *p_) +{ + bench_problem *bp = 0; + switch (p_->adt->problem_kind) { + case PROBLEM_DFT: + { + const problem_dft *p = (const problem_dft *) p_; + + if (!p->ri || !p->ii) + abort(); + + bp = (bench_problem *) bench_malloc(sizeof(bench_problem)); + + bp->kind = PROBLEM_COMPLEX; + bp->sign = FFT_SIGN; + bp->split = 1; /* tensor strides are in R's, not C's */ + bp->in = UNTAINT(p->ri); + bp->out = UNTAINT(p->ro); + bp->ini = UNTAINT(p->ii); + bp->outi = UNTAINT(p->io); + bp->inphys = bp->outphys = 0; + bp->iphyssz = bp->ophyssz = 0; + bp->in_place = p->ri == p->ro; + bp->sz = fftw_tensor_to_bench_tensor(p->sz); + bp->vecsz = fftw_tensor_to_bench_tensor(p->vecsz); + bp->k = 0; + break; + } + case PROBLEM_RDFT: + { + const problem_rdft *p = (const problem_rdft *) p_; + int i; + + if (!p->I || !p->O) + abort(); + + for (i = 0; i < p->sz->rnk; ++i) + switch (p->kind[i]) { + case R2HC01: + case R2HC10: + case R2HC11: + case HC2R01: + case HC2R10: + case HC2R11: + return bp; + default: + ; + } + + bp = (bench_problem *) bench_malloc(sizeof(bench_problem)); + + bp->kind = PROBLEM_R2R; + bp->sign = FFT_SIGN; + bp->split = 0; + bp->in = UNTAINT(p->I); + bp->out = UNTAINT(p->O); + bp->ini = bp->outi = 0; + bp->inphys = bp->outphys = 0; + bp->iphyssz = bp->ophyssz = 0; + bp->in_place = p->I == p->O; + bp->sz = fftw_tensor_to_bench_tensor(p->sz); + bp->vecsz = fftw_tensor_to_bench_tensor(p->vecsz); + bp->k = (r2r_kind_t *) bench_malloc(sizeof(r2r_kind_t) * p->sz->rnk); + for (i = 0; i < p->sz->rnk; ++i) + switch (p->kind[i]) { + case R2HC: bp->k[i] = R2R_R2HC; break; + case HC2R: bp->k[i] = R2R_HC2R; break; + case DHT: bp->k[i] = R2R_DHT; break; + case REDFT00: bp->k[i] = R2R_REDFT00; break; + case REDFT01: bp->k[i] = R2R_REDFT01; break; + case REDFT10: bp->k[i] = R2R_REDFT10; break; + case REDFT11: bp->k[i] = R2R_REDFT11; break; + case RODFT00: bp->k[i] = R2R_RODFT00; break; + case RODFT01: bp->k[i] = R2R_RODFT01; break; + case RODFT10: bp->k[i] = R2R_RODFT10; break; + case RODFT11: bp->k[i] = R2R_RODFT11; break; + default: CK(0); + } + break; + } + case PROBLEM_RDFT2: + { + const problem_rdft2 *p = (const problem_rdft2 *) p_; + int rnk = p->sz->rnk; + + if (!p->r0 || !p->r1 || !p->cr || !p->ci) + abort(); + + /* give up verifying rdft2 R2HCII */ + if (p->kind != R2HC && p->kind != HC2R) + return bp; + + if (rnk > 0) { + /* can't verify separate even/odd arrays for now */ + if (2 * (p->r1 - p->r0) != + ((p->kind == R2HC) ? + p->sz->dims[rnk-1].is : p->sz->dims[rnk-1].os)) + return bp; + } + + bp = (bench_problem *) bench_malloc(sizeof(bench_problem)); + + bp->kind = PROBLEM_REAL; + bp->sign = p->kind == R2HC ? FFT_SIGN : -FFT_SIGN; + bp->split = 1; /* tensor strides are in R's, not C's */ + if (p->kind == R2HC) { + bp->sign = FFT_SIGN; + bp->in = UNTAINT(p->r0); + bp->out = UNTAINT(p->cr); + bp->ini = 0; + bp->outi = UNTAINT(p->ci); + } + else { + bp->sign = -FFT_SIGN; + bp->out = UNTAINT(p->r0); + bp->in = UNTAINT(p->cr); + bp->outi = 0; + bp->ini = UNTAINT(p->ci); + } + bp->inphys = bp->outphys = 0; + bp->iphyssz = bp->ophyssz = 0; + bp->in_place = p->r0 == p->cr; + bp->sz = fftw_tensor_to_bench_tensor(p->sz); + if (rnk > 0) { + if (p->kind == R2HC) + bp->sz->dims[rnk-1].is /= 2; + else + bp->sz->dims[rnk-1].os /= 2; + } + bp->vecsz = fftw_tensor_to_bench_tensor(p->vecsz); + bp->k = 0; + break; + } + default: + abort(); + } + + bp->userinfo = 0; + bp->pstring = 0; + bp->destroy_input = !NO_DESTROY_INPUTP(plnr); + + return bp; +} + +static void hook(planner *plnr, plan *pln, const problem *p_, int optimalp) +{ + int rounds = 5; + double tol = SINGLE_PRECISION ? 1.0e-3 : 1.0e-10; + UNUSED(optimalp); + + if (verbose > 5) { + printer *pr = X(mkprinter_file)(stdout); + pr->print(pr, "%P:%(%p%)\n", p_, pln); + X(printer_destroy)(pr); + printf("cost %g \n\n", pln->pcost); + } + + if (paranoid) { + bench_problem *bp; + + bp = fftw_problem_to_bench_problem(plnr, p_); + if (bp) { + X(plan) the_plan_save = the_plan; + + the_plan = (apiplan *) MALLOC(sizeof(apiplan), PLANS); + the_plan->pln = pln; + the_plan->prb = (problem *) p_; + + X(plan_awake)(pln, AWAKE_SQRTN_TABLE); + verify_problem(bp, rounds, tol); + X(plan_awake)(pln, SLEEPY); + + X(ifree)(the_plan); + the_plan = the_plan_save; + + problem_destroy(bp); + } + + } +} + +static void paranoid_checks(void) +{ + /* FIXME: assumes char = 8 bits, which is false on at least one + DSP I know of. */ +#if 0 + /* if flags_t is not 64 bits i want to know it. */ + CK(sizeof(flags_t) == 8); + + CK(sizeof(md5uint) >= 4); +#endif + + CK(sizeof(uintptr_t) >= sizeof(R *)); + + CK(sizeof(INT) >= sizeof(R *)); +} + +void install_hook(void) +{ + planner *plnr = X(the_planner)(); + plnr->hook = hook; + paranoid_checks(); +} + +void uninstall_hook(void) +{ + planner *plnr = X(the_planner)(); + plnr->hook = 0; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/threads/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/threads/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,40 @@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft \ +-I$(top_srcdir)/rdft -I$(top_srcdir)/api +AM_CFLAGS = $(STACK_ALIGN_CFLAGS) + +if OPENMP +FFTWOMPLIB = libfftw3@PREC_SUFFIX@_omp.la +else +FFTWOMPLIB = +endif + +if THREADS +if COMBINED_THREADS +noinst_LTLIBRARIES = libfftw3@PREC_SUFFIX@_threads.la +else +lib_LTLIBRARIES = libfftw3@PREC_SUFFIX@_threads.la $(FFTWOMPLIB) +endif +else +lib_LTLIBRARIES = $(FFTWOMPLIB) +endif + +# pkgincludedir = $(includedir)/fftw3@PREC_SUFFIX@ +# pkginclude_HEADERS = threads.h + +libfftw3@PREC_SUFFIX@_threads_la_SOURCES = api.c conf.c threads.c \ +threads.h dft-vrank-geq1.c ct.c rdft-vrank-geq1.c hc2hc.c \ +vrank-geq1-rdft2.c f77api.c f77funcs.h +libfftw3@PREC_SUFFIX@_threads_la_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) +libfftw3@PREC_SUFFIX@_threads_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ +if !COMBINED_THREADS +libfftw3@PREC_SUFFIX@_threads_la_LIBADD = ../libfftw3@PREC_SUFFIX@.la +endif + +libfftw3@PREC_SUFFIX@_omp_la_SOURCES = api.c conf.c openmp.c \ +threads.h dft-vrank-geq1.c ct.c rdft-vrank-geq1.c hc2hc.c \ +vrank-geq1-rdft2.c f77api.c f77funcs.h +libfftw3@PREC_SUFFIX@_omp_la_CFLAGS = $(AM_CFLAGS) $(OPENMP_CFLAGS) +libfftw3@PREC_SUFFIX@_omp_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ +if !COMBINED_THREADS +libfftw3@PREC_SUFFIX@_omp_la_LIBADD = ../libfftw3@PREC_SUFFIX@.la +endif diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/threads/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/threads/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,821 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = threads +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/depcomp +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" +LTLIBRARIES = $(lib_LTLIBRARIES) $(noinst_LTLIBRARIES) +@COMBINED_THREADS_FALSE@libfftw3@PREC_SUFFIX@_omp_la_DEPENDENCIES = \ +@COMBINED_THREADS_FALSE@ ../libfftw3@PREC_SUFFIX@.la +am_libfftw3@PREC_SUFFIX@_omp_la_OBJECTS = \ + libfftw3@PREC_SUFFIX@_omp_la-api.lo \ + libfftw3@PREC_SUFFIX@_omp_la-conf.lo \ + libfftw3@PREC_SUFFIX@_omp_la-openmp.lo \ + libfftw3@PREC_SUFFIX@_omp_la-dft-vrank-geq1.lo \ + libfftw3@PREC_SUFFIX@_omp_la-ct.lo \ + libfftw3@PREC_SUFFIX@_omp_la-rdft-vrank-geq1.lo \ + libfftw3@PREC_SUFFIX@_omp_la-hc2hc.lo \ + libfftw3@PREC_SUFFIX@_omp_la-vrank-geq1-rdft2.lo \ + libfftw3@PREC_SUFFIX@_omp_la-f77api.lo +libfftw3@PREC_SUFFIX@_omp_la_OBJECTS = \ + $(am_libfftw3@PREC_SUFFIX@_omp_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libfftw3@PREC_SUFFIX@_omp_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) \ + $(libfftw3@PREC_SUFFIX@_omp_la_LDFLAGS) $(LDFLAGS) -o $@ +@COMBINED_THREADS_FALSE@@OPENMP_TRUE@@THREADS_TRUE@am_libfftw3@PREC_SUFFIX@_omp_la_rpath = \ +@COMBINED_THREADS_FALSE@@OPENMP_TRUE@@THREADS_TRUE@ -rpath \ +@COMBINED_THREADS_FALSE@@OPENMP_TRUE@@THREADS_TRUE@ $(libdir) +@OPENMP_TRUE@@THREADS_FALSE@am_libfftw3@PREC_SUFFIX@_omp_la_rpath = \ +@OPENMP_TRUE@@THREADS_FALSE@ -rpath $(libdir) +@COMBINED_THREADS_FALSE@libfftw3@PREC_SUFFIX@_threads_la_DEPENDENCIES = \ +@COMBINED_THREADS_FALSE@ ../libfftw3@PREC_SUFFIX@.la +am_libfftw3@PREC_SUFFIX@_threads_la_OBJECTS = \ + libfftw3@PREC_SUFFIX@_threads_la-api.lo \ + libfftw3@PREC_SUFFIX@_threads_la-conf.lo \ + libfftw3@PREC_SUFFIX@_threads_la-threads.lo \ + libfftw3@PREC_SUFFIX@_threads_la-dft-vrank-geq1.lo \ + libfftw3@PREC_SUFFIX@_threads_la-ct.lo \ + libfftw3@PREC_SUFFIX@_threads_la-rdft-vrank-geq1.lo \ + libfftw3@PREC_SUFFIX@_threads_la-hc2hc.lo \ + libfftw3@PREC_SUFFIX@_threads_la-vrank-geq1-rdft2.lo \ + libfftw3@PREC_SUFFIX@_threads_la-f77api.lo +libfftw3@PREC_SUFFIX@_threads_la_OBJECTS = \ + $(am_libfftw3@PREC_SUFFIX@_threads_la_OBJECTS) +libfftw3@PREC_SUFFIX@_threads_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) \ + $(libfftw3@PREC_SUFFIX@_threads_la_LDFLAGS) $(LDFLAGS) -o $@ +@COMBINED_THREADS_FALSE@@THREADS_TRUE@am_libfftw3@PREC_SUFFIX@_threads_la_rpath = \ +@COMBINED_THREADS_FALSE@@THREADS_TRUE@ -rpath $(libdir) +@COMBINED_THREADS_TRUE@@THREADS_TRUE@am_libfftw3@PREC_SUFFIX@_threads_la_rpath = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libfftw3@PREC_SUFFIX@_omp_la_SOURCES) \ + $(libfftw3@PREC_SUFFIX@_threads_la_SOURCES) +DIST_SOURCES = $(libfftw3@PREC_SUFFIX@_omp_la_SOURCES) \ + $(libfftw3@PREC_SUFFIX@_threads_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft \ +-I$(top_srcdir)/rdft -I$(top_srcdir)/api + +AM_CFLAGS = $(STACK_ALIGN_CFLAGS) +@OPENMP_FALSE@FFTWOMPLIB = +@OPENMP_TRUE@FFTWOMPLIB = libfftw3@PREC_SUFFIX@_omp.la +@COMBINED_THREADS_TRUE@@THREADS_TRUE@noinst_LTLIBRARIES = libfftw3@PREC_SUFFIX@_threads.la +@COMBINED_THREADS_FALSE@@THREADS_TRUE@lib_LTLIBRARIES = libfftw3@PREC_SUFFIX@_threads.la $(FFTWOMPLIB) +@THREADS_FALSE@lib_LTLIBRARIES = $(FFTWOMPLIB) + +# pkgincludedir = $(includedir)/fftw3@PREC_SUFFIX@ +# pkginclude_HEADERS = threads.h +libfftw3@PREC_SUFFIX@_threads_la_SOURCES = api.c conf.c threads.c \ +threads.h dft-vrank-geq1.c ct.c rdft-vrank-geq1.c hc2hc.c \ +vrank-geq1-rdft2.c f77api.c f77funcs.h + +libfftw3@PREC_SUFFIX@_threads_la_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) +libfftw3@PREC_SUFFIX@_threads_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ +@COMBINED_THREADS_FALSE@libfftw3@PREC_SUFFIX@_threads_la_LIBADD = ../libfftw3@PREC_SUFFIX@.la +libfftw3@PREC_SUFFIX@_omp_la_SOURCES = api.c conf.c openmp.c \ +threads.h dft-vrank-geq1.c ct.c rdft-vrank-geq1.c hc2hc.c \ +vrank-geq1-rdft2.c f77api.c f77funcs.h + +libfftw3@PREC_SUFFIX@_omp_la_CFLAGS = $(AM_CFLAGS) $(OPENMP_CFLAGS) +libfftw3@PREC_SUFFIX@_omp_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ +@COMBINED_THREADS_FALSE@libfftw3@PREC_SUFFIX@_omp_la_LIBADD = ../libfftw3@PREC_SUFFIX@.la +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu threads/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu threads/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libfftw3@PREC_SUFFIX@_omp.la: $(libfftw3@PREC_SUFFIX@_omp_la_OBJECTS) $(libfftw3@PREC_SUFFIX@_omp_la_DEPENDENCIES) $(EXTRA_libfftw3@PREC_SUFFIX@_omp_la_DEPENDENCIES) + $(AM_V_CCLD)$(libfftw3@PREC_SUFFIX@_omp_la_LINK) $(am_libfftw3@PREC_SUFFIX@_omp_la_rpath) $(libfftw3@PREC_SUFFIX@_omp_la_OBJECTS) $(libfftw3@PREC_SUFFIX@_omp_la_LIBADD) $(LIBS) + +libfftw3@PREC_SUFFIX@_threads.la: $(libfftw3@PREC_SUFFIX@_threads_la_OBJECTS) $(libfftw3@PREC_SUFFIX@_threads_la_DEPENDENCIES) $(EXTRA_libfftw3@PREC_SUFFIX@_threads_la_DEPENDENCIES) + $(AM_V_CCLD)$(libfftw3@PREC_SUFFIX@_threads_la_LINK) $(am_libfftw3@PREC_SUFFIX@_threads_la_rpath) $(libfftw3@PREC_SUFFIX@_threads_la_OBJECTS) $(libfftw3@PREC_SUFFIX@_threads_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-api.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-conf.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-ct.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-dft-vrank-geq1.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-f77api.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-hc2hc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-openmp.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-rdft-vrank-geq1.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-vrank-geq1-rdft2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-api.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-conf.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-ct.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-dft-vrank-geq1.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-f77api.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-hc2hc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-rdft-vrank-geq1.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-threads.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-vrank-geq1-rdft2.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libfftw3@PREC_SUFFIX@_omp_la-api.lo: api.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_omp_la-api.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-api.Tpo -c -o libfftw3@PREC_SUFFIX@_omp_la-api.lo `test -f 'api.c' || echo '$(srcdir)/'`api.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-api.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-api.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='api.c' object='libfftw3@PREC_SUFFIX@_omp_la-api.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_omp_la-api.lo `test -f 'api.c' || echo '$(srcdir)/'`api.c + +libfftw3@PREC_SUFFIX@_omp_la-conf.lo: conf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_omp_la-conf.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-conf.Tpo -c -o libfftw3@PREC_SUFFIX@_omp_la-conf.lo `test -f 'conf.c' || echo '$(srcdir)/'`conf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-conf.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-conf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='conf.c' object='libfftw3@PREC_SUFFIX@_omp_la-conf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_omp_la-conf.lo `test -f 'conf.c' || echo '$(srcdir)/'`conf.c + +libfftw3@PREC_SUFFIX@_omp_la-openmp.lo: openmp.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_omp_la-openmp.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-openmp.Tpo -c -o libfftw3@PREC_SUFFIX@_omp_la-openmp.lo `test -f 'openmp.c' || echo '$(srcdir)/'`openmp.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-openmp.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-openmp.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='openmp.c' object='libfftw3@PREC_SUFFIX@_omp_la-openmp.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_omp_la-openmp.lo `test -f 'openmp.c' || echo '$(srcdir)/'`openmp.c + +libfftw3@PREC_SUFFIX@_omp_la-dft-vrank-geq1.lo: dft-vrank-geq1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_omp_la-dft-vrank-geq1.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-dft-vrank-geq1.Tpo -c -o libfftw3@PREC_SUFFIX@_omp_la-dft-vrank-geq1.lo `test -f 'dft-vrank-geq1.c' || echo '$(srcdir)/'`dft-vrank-geq1.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-dft-vrank-geq1.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-dft-vrank-geq1.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dft-vrank-geq1.c' object='libfftw3@PREC_SUFFIX@_omp_la-dft-vrank-geq1.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_omp_la-dft-vrank-geq1.lo `test -f 'dft-vrank-geq1.c' || echo '$(srcdir)/'`dft-vrank-geq1.c + +libfftw3@PREC_SUFFIX@_omp_la-ct.lo: ct.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_omp_la-ct.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-ct.Tpo -c -o libfftw3@PREC_SUFFIX@_omp_la-ct.lo `test -f 'ct.c' || echo '$(srcdir)/'`ct.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-ct.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-ct.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ct.c' object='libfftw3@PREC_SUFFIX@_omp_la-ct.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_omp_la-ct.lo `test -f 'ct.c' || echo '$(srcdir)/'`ct.c + +libfftw3@PREC_SUFFIX@_omp_la-rdft-vrank-geq1.lo: rdft-vrank-geq1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_omp_la-rdft-vrank-geq1.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-rdft-vrank-geq1.Tpo -c -o libfftw3@PREC_SUFFIX@_omp_la-rdft-vrank-geq1.lo `test -f 'rdft-vrank-geq1.c' || echo '$(srcdir)/'`rdft-vrank-geq1.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-rdft-vrank-geq1.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-rdft-vrank-geq1.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rdft-vrank-geq1.c' object='libfftw3@PREC_SUFFIX@_omp_la-rdft-vrank-geq1.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_omp_la-rdft-vrank-geq1.lo `test -f 'rdft-vrank-geq1.c' || echo '$(srcdir)/'`rdft-vrank-geq1.c + +libfftw3@PREC_SUFFIX@_omp_la-hc2hc.lo: hc2hc.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_omp_la-hc2hc.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-hc2hc.Tpo -c -o libfftw3@PREC_SUFFIX@_omp_la-hc2hc.lo `test -f 'hc2hc.c' || echo '$(srcdir)/'`hc2hc.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-hc2hc.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-hc2hc.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='hc2hc.c' object='libfftw3@PREC_SUFFIX@_omp_la-hc2hc.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_omp_la-hc2hc.lo `test -f 'hc2hc.c' || echo '$(srcdir)/'`hc2hc.c + +libfftw3@PREC_SUFFIX@_omp_la-vrank-geq1-rdft2.lo: vrank-geq1-rdft2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_omp_la-vrank-geq1-rdft2.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-vrank-geq1-rdft2.Tpo -c -o libfftw3@PREC_SUFFIX@_omp_la-vrank-geq1-rdft2.lo `test -f 'vrank-geq1-rdft2.c' || echo '$(srcdir)/'`vrank-geq1-rdft2.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-vrank-geq1-rdft2.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-vrank-geq1-rdft2.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='vrank-geq1-rdft2.c' object='libfftw3@PREC_SUFFIX@_omp_la-vrank-geq1-rdft2.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_omp_la-vrank-geq1-rdft2.lo `test -f 'vrank-geq1-rdft2.c' || echo '$(srcdir)/'`vrank-geq1-rdft2.c + +libfftw3@PREC_SUFFIX@_omp_la-f77api.lo: f77api.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_omp_la-f77api.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-f77api.Tpo -c -o libfftw3@PREC_SUFFIX@_omp_la-f77api.lo `test -f 'f77api.c' || echo '$(srcdir)/'`f77api.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-f77api.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_omp_la-f77api.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='f77api.c' object='libfftw3@PREC_SUFFIX@_omp_la-f77api.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_omp_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_omp_la-f77api.lo `test -f 'f77api.c' || echo '$(srcdir)/'`f77api.c + +libfftw3@PREC_SUFFIX@_threads_la-api.lo: api.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_threads_la-api.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-api.Tpo -c -o libfftw3@PREC_SUFFIX@_threads_la-api.lo `test -f 'api.c' || echo '$(srcdir)/'`api.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-api.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-api.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='api.c' object='libfftw3@PREC_SUFFIX@_threads_la-api.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_threads_la-api.lo `test -f 'api.c' || echo '$(srcdir)/'`api.c + +libfftw3@PREC_SUFFIX@_threads_la-conf.lo: conf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_threads_la-conf.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-conf.Tpo -c -o libfftw3@PREC_SUFFIX@_threads_la-conf.lo `test -f 'conf.c' || echo '$(srcdir)/'`conf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-conf.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-conf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='conf.c' object='libfftw3@PREC_SUFFIX@_threads_la-conf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_threads_la-conf.lo `test -f 'conf.c' || echo '$(srcdir)/'`conf.c + +libfftw3@PREC_SUFFIX@_threads_la-threads.lo: threads.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_threads_la-threads.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-threads.Tpo -c -o libfftw3@PREC_SUFFIX@_threads_la-threads.lo `test -f 'threads.c' || echo '$(srcdir)/'`threads.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-threads.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-threads.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='threads.c' object='libfftw3@PREC_SUFFIX@_threads_la-threads.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_threads_la-threads.lo `test -f 'threads.c' || echo '$(srcdir)/'`threads.c + +libfftw3@PREC_SUFFIX@_threads_la-dft-vrank-geq1.lo: dft-vrank-geq1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_threads_la-dft-vrank-geq1.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-dft-vrank-geq1.Tpo -c -o libfftw3@PREC_SUFFIX@_threads_la-dft-vrank-geq1.lo `test -f 'dft-vrank-geq1.c' || echo '$(srcdir)/'`dft-vrank-geq1.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-dft-vrank-geq1.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-dft-vrank-geq1.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dft-vrank-geq1.c' object='libfftw3@PREC_SUFFIX@_threads_la-dft-vrank-geq1.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_threads_la-dft-vrank-geq1.lo `test -f 'dft-vrank-geq1.c' || echo '$(srcdir)/'`dft-vrank-geq1.c + +libfftw3@PREC_SUFFIX@_threads_la-ct.lo: ct.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_threads_la-ct.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-ct.Tpo -c -o libfftw3@PREC_SUFFIX@_threads_la-ct.lo `test -f 'ct.c' || echo '$(srcdir)/'`ct.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-ct.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-ct.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ct.c' object='libfftw3@PREC_SUFFIX@_threads_la-ct.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_threads_la-ct.lo `test -f 'ct.c' || echo '$(srcdir)/'`ct.c + +libfftw3@PREC_SUFFIX@_threads_la-rdft-vrank-geq1.lo: rdft-vrank-geq1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_threads_la-rdft-vrank-geq1.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-rdft-vrank-geq1.Tpo -c -o libfftw3@PREC_SUFFIX@_threads_la-rdft-vrank-geq1.lo `test -f 'rdft-vrank-geq1.c' || echo '$(srcdir)/'`rdft-vrank-geq1.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-rdft-vrank-geq1.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-rdft-vrank-geq1.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rdft-vrank-geq1.c' object='libfftw3@PREC_SUFFIX@_threads_la-rdft-vrank-geq1.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_threads_la-rdft-vrank-geq1.lo `test -f 'rdft-vrank-geq1.c' || echo '$(srcdir)/'`rdft-vrank-geq1.c + +libfftw3@PREC_SUFFIX@_threads_la-hc2hc.lo: hc2hc.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_threads_la-hc2hc.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-hc2hc.Tpo -c -o libfftw3@PREC_SUFFIX@_threads_la-hc2hc.lo `test -f 'hc2hc.c' || echo '$(srcdir)/'`hc2hc.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-hc2hc.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-hc2hc.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='hc2hc.c' object='libfftw3@PREC_SUFFIX@_threads_la-hc2hc.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_threads_la-hc2hc.lo `test -f 'hc2hc.c' || echo '$(srcdir)/'`hc2hc.c + +libfftw3@PREC_SUFFIX@_threads_la-vrank-geq1-rdft2.lo: vrank-geq1-rdft2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_threads_la-vrank-geq1-rdft2.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-vrank-geq1-rdft2.Tpo -c -o libfftw3@PREC_SUFFIX@_threads_la-vrank-geq1-rdft2.lo `test -f 'vrank-geq1-rdft2.c' || echo '$(srcdir)/'`vrank-geq1-rdft2.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-vrank-geq1-rdft2.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-vrank-geq1-rdft2.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='vrank-geq1-rdft2.c' object='libfftw3@PREC_SUFFIX@_threads_la-vrank-geq1-rdft2.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_threads_la-vrank-geq1-rdft2.lo `test -f 'vrank-geq1-rdft2.c' || echo '$(srcdir)/'`vrank-geq1-rdft2.c + +libfftw3@PREC_SUFFIX@_threads_la-f77api.lo: f77api.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -MT libfftw3@PREC_SUFFIX@_threads_la-f77api.lo -MD -MP -MF $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-f77api.Tpo -c -o libfftw3@PREC_SUFFIX@_threads_la-f77api.lo `test -f 'f77api.c' || echo '$(srcdir)/'`f77api.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-f77api.Tpo $(DEPDIR)/libfftw3@PREC_SUFFIX@_threads_la-f77api.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='f77api.c' object='libfftw3@PREC_SUFFIX@_threads_la-f77api.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libfftw3@PREC_SUFFIX@_threads_la_CFLAGS) $(CFLAGS) -c -o libfftw3@PREC_SUFFIX@_threads_la-f77api.lo `test -f 'f77api.c' || echo '$(srcdir)/'`f77api.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + clean-noinstLTLIBRARIES mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-libLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: all all-am check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool clean-noinstLTLIBRARIES \ + cscopelist-am ctags-am distclean distclean-compile \ + distclean-generic distclean-libtool distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-libLTLIBRARIES install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags-am uninstall uninstall-am uninstall-libLTLIBRARIES + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/threads/api.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/threads/api.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" +#include "threads.h" + +static int threads_inited = 0; + +static void threads_register_hooks(void) +{ + X(mksolver_ct_hook) = X(mksolver_ct_threads); + X(mksolver_hc2hc_hook) = X(mksolver_hc2hc_threads); +} + +static void threads_unregister_hooks(void) +{ + X(mksolver_ct_hook) = 0; + X(mksolver_hc2hc_hook) = 0; +} + +/* should be called before all other FFTW functions! */ +int X(init_threads)(void) +{ + if (!threads_inited) { + planner *plnr; + + if (X(ithreads_init)()) + return 0; + + threads_register_hooks(); + + /* this should be the first time the_planner is called, + and hence the time it is configured */ + plnr = X(the_planner)(); + X(threads_conf_standard)(plnr); + + threads_inited = 1; + } + return 1; +} + + +void X(cleanup_threads)(void) +{ + X(cleanup)(); + if (threads_inited) { + X(threads_cleanup)(); + threads_unregister_hooks(); + threads_inited = 0; + } +} + +void X(plan_with_nthreads)(int nthreads) +{ + planner *plnr; + + if (!threads_inited) { + X(cleanup)(); + X(init_threads)(); + } + A(threads_inited); + plnr = X(the_planner)(); + plnr->nthr = X(imax)(1, nthreads); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/threads/conf.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/threads/conf.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "threads.h" + +static const solvtab s = +{ + SOLVTAB(X(dft_thr_vrank_geq1_register)), + SOLVTAB(X(rdft_thr_vrank_geq1_register)), + SOLVTAB(X(rdft2_thr_vrank_geq1_register)), + + SOLVTAB_END +}; + +void X(threads_conf_standard)(planner *p) +{ + X(solvtab_exec)(s, p); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/threads/ct.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/threads/ct.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,271 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "threads.h" + +typedef struct { + plan_dft super; + plan *cld; + plan **cldws; + int nthr; + INT r; +} P; + +typedef struct { + plan **cldws; + R *r, *i; +} PD; + +static void *spawn_apply(spawn_data *d) +{ + PD *ego = (PD *) d->data; + INT thr_num = d->thr_num; + + plan_dftw *cldw = (plan_dftw *) (ego->cldws[thr_num]); + cldw->apply((plan *) cldw, ego->r, ego->i); + return 0; +} + +static void apply_dit(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + plan_dft *cld; + + cld = (plan_dft *) ego->cld; + cld->apply(ego->cld, ri, ii, ro, io); + + { + PD d; + + d.r = ro; d.i = io; + d.cldws = ego->cldws; + + X(spawn_loop)(ego->nthr, ego->nthr, spawn_apply, (void*)&d); + } +} + +static void apply_dif(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + plan_dft *cld; + + { + PD d; + + d.r = ri; d.i = ii; + d.cldws = ego->cldws; + + X(spawn_loop)(ego->nthr, ego->nthr, spawn_apply, (void*)&d); + } + + cld = (plan_dft *) ego->cld; + cld->apply(ego->cld, ri, ii, ro, io); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + int i; + X(plan_awake)(ego->cld, wakefulness); + for (i = 0; i < ego->nthr; ++i) + X(plan_awake)(ego->cldws[i], wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + int i; + X(plan_destroy_internal)(ego->cld); + for (i = 0; i < ego->nthr; ++i) + X(plan_destroy_internal)(ego->cldws[i]); + X(ifree)(ego->cldws); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + int i; + p->print(p, "(dft-thr-ct-%s-x%d/%D", + ego->super.apply == apply_dit ? "dit" : "dif", + ego->nthr, ego->r); + for (i = 0; i < ego->nthr; ++i) + if (i == 0 || (ego->cldws[i] != ego->cldws[i-1] && + (i <= 1 || ego->cldws[i] != ego->cldws[i-2]))) + p->print(p, "%(%p%)", ego->cldws[i]); + p->print(p, "%(%p%))", ego->cld); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const ct_solver *ego = (const ct_solver *) ego_; + const problem_dft *p; + P *pln = 0; + plan *cld = 0, **cldws = 0; + INT n, r, m, v, ivs, ovs; + INT block_size; + int i, nthr, plnr_nthr_save; + iodim *d; + + static const plan_adt padt = { + X(dft_solve), awake, print, destroy + }; + + if (plnr->nthr <= 1 || !X(ct_applicable)(ego, p_, plnr)) + return (plan *) 0; + + p = (const problem_dft *) p_; + d = p->sz->dims; + n = d[0].n; + r = X(choose_radix)(ego->r, n); + m = n / r; + + X(tensor_tornk1)(p->vecsz, &v, &ivs, &ovs); + + block_size = (m + plnr->nthr - 1) / plnr->nthr; + nthr = (int)((m + block_size - 1) / block_size); + plnr_nthr_save = plnr->nthr; + plnr->nthr = (plnr->nthr + nthr - 1) / nthr; + + cldws = (plan **) MALLOC(sizeof(plan *) * nthr, PLANS); + for (i = 0; i < nthr; ++i) cldws[i] = (plan *) 0; + + switch (ego->dec) { + case DECDIT: + { + for (i = 0; i < nthr; ++i) { + cldws[i] = ego->mkcldw(ego, + r, m * d[0].os, m * d[0].os, + m, d[0].os, + v, ovs, ovs, + i*block_size, + (i == nthr - 1) ? + (m - i*block_size) : block_size, + p->ro, p->io, plnr); + if (!cldws[i]) goto nada; + } + + plnr->nthr = plnr_nthr_save; + + cld = X(mkplan_d)(plnr, + X(mkproblem_dft_d)( + X(mktensor_1d)(m, r * d[0].is, d[0].os), + X(mktensor_2d)(r, d[0].is, m * d[0].os, + v, ivs, ovs), + p->ri, p->ii, p->ro, p->io) + ); + if (!cld) goto nada; + + pln = MKPLAN_DFT(P, &padt, apply_dit); + break; + } + case DECDIF: + case DECDIF+TRANSPOSE: + { + INT cors, covs; /* cldw ors, ovs */ + if (ego->dec == DECDIF+TRANSPOSE) { + cors = ivs; + covs = m * d[0].is; + /* ensure that we generate well-formed dftw subproblems */ + /* FIXME: too conservative */ + if (!(1 + && r == v + && d[0].is == r * cors)) + goto nada; + + /* FIXME: allow in-place only for now, like in + fftw-3.[01] */ + if (!(1 + && p->ri == p->ro + && d[0].is == r * d[0].os + && cors == d[0].os + && covs == ovs + )) + goto nada; + } else { + cors = m * d[0].is; + covs = ivs; + } + + for (i = 0; i < nthr; ++i) { + cldws[i] = ego->mkcldw(ego, + r, m * d[0].is, cors, + m, d[0].is, + v, ivs, covs, + i*block_size, + (i == nthr - 1) ? + (m - i*block_size) : block_size, + p->ri, p->ii, plnr); + if (!cldws[i]) goto nada; + } + + plnr->nthr = plnr_nthr_save; + + cld = X(mkplan_d)(plnr, + X(mkproblem_dft_d)( + X(mktensor_1d)(m, d[0].is, r * d[0].os), + X(mktensor_2d)(r, cors, d[0].os, + v, covs, ovs), + p->ri, p->ii, p->ro, p->io) + ); + if (!cld) goto nada; + + pln = MKPLAN_DFT(P, &padt, apply_dif); + break; + } + + default: A(0); + + } + + pln->cld = cld; + pln->cldws = cldws; + pln->nthr = nthr; + pln->r = r; + X(ops_zero)(&pln->super.super.ops); + for (i = 0; i < nthr; ++i) { + X(ops_add2)(&cldws[i]->ops, &pln->super.super.ops); + pln->super.super.could_prune_now_p |= cldws[i]->could_prune_now_p; + } + X(ops_add2)(&cld->ops, &pln->super.super.ops); + return &(pln->super.super); + + nada: + if (cldws) { + for (i = 0; i < nthr; ++i) + X(plan_destroy_internal)(cldws[i]); + X(ifree)(cldws); + } + X(plan_destroy_internal)(cld); + return (plan *) 0; +} + +ct_solver *X(mksolver_ct_threads)(size_t size, INT r, int dec, + ct_mkinferior mkcldw, + ct_force_vrecursion force_vrecursionp) +{ + static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; + ct_solver *slv = (ct_solver *) X(mksolver)(size, &sadt); + slv->r = r; + slv->dec = dec; + slv->mkcldw = mkcldw; + slv->force_vrecursionp = force_vrecursionp; + return slv; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/threads/dft-vrank-geq1.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/threads/dft-vrank-geq1.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "threads.h" + +typedef struct { + solver super; + int vecloop_dim; + const int *buddies; + int nbuddies; +} S; + +typedef struct { + plan_dft super; + plan **cldrn; + INT its, ots; + int nthr; + const S *solver; +} P; + +typedef struct { + INT its, ots; + R *ri, *ii, *ro, *io; + plan **cldrn; +} PD; + +static void *spawn_apply(spawn_data *d) +{ + PD *ego = (PD *) d->data; + INT its = ego->its; + INT ots = ego->ots; + int thr_num = d->thr_num; + plan_dft *cld = (plan_dft *) ego->cldrn[thr_num]; + + cld->apply((plan *) cld, + ego->ri + thr_num * its, ego->ii + thr_num * its, + ego->ro + thr_num * ots, ego->io + thr_num * ots); + return 0; +} + +static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io) +{ + const P *ego = (const P *) ego_; + PD d; + + d.its = ego->its; + d.ots = ego->ots; + d.cldrn = ego->cldrn; + d.ri = ri; d.ii = ii; d.ro = ro; d.io = io; + + X(spawn_loop)(ego->nthr, ego->nthr, spawn_apply, (void*) &d); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + int i; + for (i = 0; i < ego->nthr; ++i) + X(plan_awake)(ego->cldrn[i], wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + int i; + for (i = 0; i < ego->nthr; ++i) + X(plan_destroy_internal)(ego->cldrn[i]); + X(ifree)(ego->cldrn); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *s = ego->solver; + int i; + p->print(p, "(dft-thr-vrank>=1-x%d/%d", ego->nthr, s->vecloop_dim); + for (i = 0; i < ego->nthr; ++i) + if (i == 0 || (ego->cldrn[i] != ego->cldrn[i-1] && + (i <= 1 || ego->cldrn[i] != ego->cldrn[i-2]))) + p->print(p, "%(%p%)", ego->cldrn[i]); + p->putchr(p, ')'); +} + +static int pickdim(const S *ego, const tensor *vecsz, int oop, int *dp) +{ + return X(pickdim)(ego->vecloop_dim, ego->buddies, ego->nbuddies, + vecsz, oop, dp); +} + +static int applicable0(const solver *ego_, const problem *p_, + const planner *plnr, int *dp) +{ + const S *ego = (const S *) ego_; + const problem_dft *p = (const problem_dft *) p_; + + return (1 + && plnr->nthr > 1 + && FINITE_RNK(p->vecsz->rnk) + && p->vecsz->rnk > 0 + && pickdim(ego, p->vecsz, p->ri != p->ro, dp) + ); +} + +static int applicable(const solver *ego_, const problem *p_, + const planner *plnr, int *dp) +{ + const S *ego = (const S *)ego_; + + if (!applicable0(ego_, p_, plnr, dp)) return 0; + + /* fftw2 behavior */ + if (NO_VRANK_SPLITSP(plnr) && (ego->vecloop_dim != ego->buddies[0])) + return 0; + + return 1; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_dft *p; + P *pln; + problem *cldp; + int vdim; + iodim *d; + plan **cldrn = (plan **) 0; + int i, nthr; + INT its, ots, block_size; + tensor *vecsz = 0; + + static const plan_adt padt = { + X(dft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr, &vdim)) + return (plan *) 0; + p = (const problem_dft *) p_; + d = p->vecsz->dims + vdim; + + block_size = (d->n + plnr->nthr - 1) / plnr->nthr; + nthr = (int)((d->n + block_size - 1) / block_size); + plnr->nthr = (plnr->nthr + nthr - 1) / nthr; + its = d->is * block_size; + ots = d->os * block_size; + + cldrn = (plan **)MALLOC(sizeof(plan *) * nthr, PLANS); + for (i = 0; i < nthr; ++i) cldrn[i] = (plan *) 0; + + vecsz = X(tensor_copy)(p->vecsz); + for (i = 0; i < nthr; ++i) { + vecsz->dims[vdim].n = + (i == nthr - 1) ? (d->n - i*block_size) : block_size; + cldp = X(mkproblem_dft)(p->sz, vecsz, + p->ri + i*its, p->ii + i*its, + p->ro + i*ots, p->io + i*ots); + cldrn[i] = X(mkplan_d)(plnr, cldp); + if (!cldrn[i]) goto nada; + } + X(tensor_destroy)(vecsz); + + pln = MKPLAN_DFT(P, &padt, apply); + + pln->cldrn = cldrn; + pln->its = its; + pln->ots = ots; + pln->nthr = nthr; + + pln->solver = ego; + X(ops_zero)(&pln->super.super.ops); + pln->super.super.pcost = 0; + for (i = 0; i < nthr; ++i) { + X(ops_add2)(&cldrn[i]->ops, &pln->super.super.ops); + pln->super.super.pcost += cldrn[i]->pcost; + } + + return &(pln->super.super); + + nada: + if (cldrn) { + for (i = 0; i < nthr; ++i) + X(plan_destroy_internal)(cldrn[i]); + X(ifree)(cldrn); + } + X(tensor_destroy)(vecsz); + return (plan *) 0; +} + +static solver *mksolver(int vecloop_dim, const int *buddies, int nbuddies) +{ + static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->vecloop_dim = vecloop_dim; + slv->buddies = buddies; + slv->nbuddies = nbuddies; + return &(slv->super); +} + +void X(dft_thr_vrank_geq1_register)(planner *p) +{ + int i; + + /* FIXME: Should we try other vecloop_dim values? */ + static const int buddies[] = { 1, -1 }; + + const int nbuddies = (int)(sizeof(buddies) / sizeof(buddies[0])); + + for (i = 0; i < nbuddies; ++i) + REGISTER_SOLVER(p, mksolver(buddies[i], buddies, nbuddies)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/threads/f77api.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/threads/f77api.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "api.h" + +/* if F77_FUNC is not defined, then we don't know how to mangle identifiers + for the Fortran linker, and we must omit the f77 API. */ +#if defined(F77_FUNC) || defined(WINDOWS_F77_MANGLING) + +#include "x77.h" + +#define F77(a, A) F77x(x77(a), X77(A)) + +#ifndef WINDOWS_F77_MANGLING + +#if defined(F77_FUNC) +# define F77x(a, A) F77_FUNC(a, A) +# include "f77funcs.h" +#endif + +#if defined(F77_FUNC_) && !defined(F77_FUNC_EQUIV) +# undef F77x +# define F77x(a, A) F77_FUNC_(a, A) +# include "f77funcs.h" +#endif + +#else /* WINDOWS_F77_MANGLING */ + +/* Various mangling conventions common (?) under Windows. */ + +/* g77 */ +# define WINDOWS_F77_FUNC(a, A) a ## __ +# define F77x(a, A) WINDOWS_F77_FUNC(a, A) +# include "f77funcs.h" + +/* Intel, etc. */ +# undef WINDOWS_F77_FUNC +# define WINDOWS_F77_FUNC(a, A) a ## _ +# include "f77funcs.h" + +/* Digital/Compaq/HP Visual Fortran, Intel Fortran. stdcall attribute + is apparently required to adjust for calling conventions (callee + pops stack in stdcall). See also: + http://msdn.microsoft.com/library/en-us/vccore98/html/_core_mixed.2d.language_programming.3a_.overview.asp +*/ +# undef WINDOWS_F77_FUNC +# if defined(__GNUC__) +# define WINDOWS_F77_FUNC(a, A) __attribute__((stdcall)) A +# elif defined(_MSC_VER) || defined(_ICC) || defined(_STDCALL_SUPPORTED) +# define WINDOWS_F77_FUNC(a, A) __stdcall A +# else +# define WINDOWS_F77_FUNC(a, A) A /* oh well */ +# endif +# include "f77funcs.h" + +#endif /* WINDOWS_F77_MANGLING */ + +#endif /* F77_FUNC */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/threads/f77funcs.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/threads/f77funcs.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* Functions in the FFTW Fortran API, mangled according to the + F77(...) macro. This file is designed to be #included by + f77api.c, possibly multiple times in order to support multiple + compiler manglings (via redefinition of F77). */ + +FFTW_VOIDFUNC F77(plan_with_nthreads, PLAN_WITH_NTHREADS)(int *nthreads) +{ + X(plan_with_nthreads)(*nthreads); +} + +FFTW_VOIDFUNC F77(init_threads, INIT_THREADS)(int *okay) +{ + *okay = X(init_threads)(); +} + +FFTW_VOIDFUNC F77(cleanup_threads, CLEANUP_THREADS)(void) +{ + X(cleanup_threads)(); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/threads/hc2hc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/threads/hc2hc.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "threads.h" + +typedef struct { + plan_rdft super; + plan *cld; + plan **cldws; + int nthr; + INT r; +} P; + +typedef struct { + plan **cldws; + R *IO; +} PD; + +static void *spawn_apply(spawn_data *d) +{ + PD *ego = (PD *) d->data; + + plan_hc2hc *cldw = (plan_hc2hc *) (ego->cldws[d->thr_num]); + cldw->apply((plan *) cldw, ego->IO); + return 0; +} + +static void apply_dit(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld; + + cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, I, O); + + { + PD d; + + d.IO = O; + d.cldws = ego->cldws; + + X(spawn_loop)(ego->nthr, ego->nthr, spawn_apply, (void*)&d); + } +} + +static void apply_dif(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + plan_rdft *cld; + + { + PD d; + + d.IO = I; + d.cldws = ego->cldws; + + X(spawn_loop)(ego->nthr, ego->nthr, spawn_apply, (void*)&d); + } + + cld = (plan_rdft *) ego->cld; + cld->apply((plan *) cld, I, O); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + int i; + X(plan_awake)(ego->cld, wakefulness); + for (i = 0; i < ego->nthr; ++i) + X(plan_awake)(ego->cldws[i], wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + int i; + X(plan_destroy_internal)(ego->cld); + for (i = 0; i < ego->nthr; ++i) + X(plan_destroy_internal)(ego->cldws[i]); + X(ifree)(ego->cldws); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + int i; + p->print(p, "(rdft-thr-ct-%s-x%d/%D", + ego->super.apply == apply_dit ? "dit" : "dif", + ego->nthr, ego->r); + for (i = 0; i < ego->nthr; ++i) + if (i == 0 || (ego->cldws[i] != ego->cldws[i-1] && + (i <= 1 || ego->cldws[i] != ego->cldws[i-2]))) + p->print(p, "%(%p%)", ego->cldws[i]); + p->print(p, "%(%p%))", ego->cld); +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const hc2hc_solver *ego = (const hc2hc_solver *) ego_; + const problem_rdft *p; + P *pln = 0; + plan *cld = 0, **cldws = 0; + INT n, r, m, v, ivs, ovs, mcount; + int i, nthr, plnr_nthr_save; + INT block_size; + iodim *d; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (plnr->nthr <= 1 || !X(hc2hc_applicable)(ego, p_, plnr)) + return (plan *) 0; + + p = (const problem_rdft *) p_; + d = p->sz->dims; + n = d[0].n; + r = X(choose_radix)(ego->r, n); + m = n / r; + mcount = (m + 2) / 2; + + X(tensor_tornk1)(p->vecsz, &v, &ivs, &ovs); + + block_size = (mcount + plnr->nthr - 1) / plnr->nthr; + nthr = (int)((mcount + block_size - 1) / block_size); + plnr_nthr_save = plnr->nthr; + plnr->nthr = (plnr->nthr + nthr - 1) / nthr; + + cldws = (plan **) MALLOC(sizeof(plan *) * nthr, PLANS); + for (i = 0; i < nthr; ++i) cldws[i] = (plan *) 0; + + switch (p->kind[0]) { + case R2HC: + for (i = 0; i < nthr; ++i) { + cldws[i] = ego->mkcldw(ego, + R2HC, r, m, d[0].os, v, ovs, + i*block_size, + (i == nthr - 1) ? + (mcount - i*block_size) : block_size, + p->O, plnr); + if (!cldws[i]) goto nada; + } + + plnr->nthr = plnr_nthr_save; + + cld = X(mkplan_d)(plnr, + X(mkproblem_rdft_d)( + X(mktensor_1d)(m, r * d[0].is, d[0].os), + X(mktensor_2d)(r, d[0].is, m * d[0].os, + v, ivs, ovs), + p->I, p->O, p->kind) + ); + if (!cld) goto nada; + + pln = MKPLAN_RDFT(P, &padt, apply_dit); + break; + + case HC2R: + for (i = 0; i < nthr; ++i) { + cldws[i] = ego->mkcldw(ego, + HC2R, r, m, d[0].is, v, ivs, + i*block_size, + (i == nthr - 1) ? + (mcount - i*block_size) : block_size, + p->I, plnr); + if (!cldws[i]) goto nada; + } + + plnr->nthr = plnr_nthr_save; + + cld = X(mkplan_d)(plnr, + X(mkproblem_rdft_d)( + X(mktensor_1d)(m, d[0].is, r * d[0].os), + X(mktensor_2d)(r, m * d[0].is, d[0].os, + v, ivs, ovs), + p->I, p->O, p->kind) + ); + if (!cld) goto nada; + + pln = MKPLAN_RDFT(P, &padt, apply_dif); + break; + + default: + A(0); + } + + pln->cld = cld; + pln->cldws = cldws; + pln->nthr = nthr; + pln->r = r; + X(ops_zero)(&pln->super.super.ops); + for (i = 0; i < nthr; ++i) { + X(ops_add2)(&cldws[i]->ops, &pln->super.super.ops); + pln->super.super.could_prune_now_p |= cldws[i]->could_prune_now_p; + } + X(ops_add2)(&cld->ops, &pln->super.super.ops); + return &(pln->super.super); + + nada: + if (cldws) { + for (i = 0; i < nthr; ++i) + X(plan_destroy_internal)(cldws[i]); + X(ifree)(cldws); + } + X(plan_destroy_internal)(cld); + return (plan *) 0; +} + +hc2hc_solver *X(mksolver_hc2hc_threads)(size_t size, INT r, + hc2hc_mkinferior mkcldw) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + hc2hc_solver *slv = (hc2hc_solver *)X(mksolver)(size, &sadt); + slv->r = r; + slv->mkcldw = mkcldw; + return slv; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/threads/openmp.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/threads/openmp.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* openmp.c: thread spawning via OpenMP */ + +#include "threads.h" + +#if !defined(_OPENMP) +#error OpenMP enabled but not using an OpenMP compiler +#endif + +int X(ithreads_init)(void) +{ + return 0; /* no error */ +} + +/* Distribute a loop from 0 to loopmax-1 over nthreads threads. + proc(d) is called to execute a block of iterations from d->min + to d->max-1. d->thr_num indicate the number of the thread + that is executing proc (from 0 to nthreads-1), and d->data is + the same as the data parameter passed to X(spawn_loop). + + This function returns only after all the threads have completed. */ +void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data) +{ + int block_size; + spawn_data d; + int i; + + A(loopmax >= 0); + A(nthr > 0); + A(proc); + + if (!loopmax) return; + + /* Choose the block size and number of threads in order to (1) + minimize the critical path and (2) use the fewest threads that + achieve the same critical path (to minimize overhead). + e.g. if loopmax is 5 and nthr is 4, we should use only 3 + threads with block sizes of 2, 2, and 1. */ + block_size = (loopmax + nthr - 1) / nthr; + nthr = (loopmax + block_size - 1) / block_size; + + THREAD_ON; /* prevent debugging mode from failing under threads */ +#pragma omp parallel for private(d) + for (i = 0; i < nthr; ++i) { + d.max = (d.min = i * block_size) + block_size; + if (d.max > loopmax) + d.max = loopmax; + d.thr_num = i; + d.data = data; + proc(&d); + } + THREAD_OFF; /* prevent debugging mode from failing under threads */ +} + +void X(threads_cleanup)(void) +{ +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/threads/rdft-vrank-geq1.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/threads/rdft-vrank-geq1.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "threads.h" + +typedef struct { + solver super; + int vecloop_dim; + const int *buddies; + int nbuddies; +} S; + +typedef struct { + plan_rdft super; + plan **cldrn; + INT its, ots; + int nthr; + const S *solver; +} P; + +typedef struct { + INT its, ots; + R *I, *O; + plan **cldrn; +} PD; + +static void *spawn_apply(spawn_data *d) +{ + PD *ego = (PD *) d->data; + int thr_num = d->thr_num; + plan_rdft *cld = (plan_rdft *) ego->cldrn[d->thr_num]; + + cld->apply((plan *) cld, + ego->I + thr_num * ego->its, ego->O + thr_num * ego->ots); + return 0; +} + +static void apply(const plan *ego_, R *I, R *O) +{ + const P *ego = (const P *) ego_; + PD d; + + d.its = ego->its; + d.ots = ego->ots; + d.cldrn = ego->cldrn; + d.I = I; d.O = O; + + X(spawn_loop)(ego->nthr, ego->nthr, spawn_apply, (void*) &d); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + int i; + for (i = 0; i < ego->nthr; ++i) + X(plan_awake)(ego->cldrn[i], wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + int i; + for (i = 0; i < ego->nthr; ++i) + X(plan_destroy_internal)(ego->cldrn[i]); + X(ifree)(ego->cldrn); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *s = ego->solver; + int i; + p->print(p, "(rdft-thr-vrank>=1-x%d/%d", ego->nthr, s->vecloop_dim); + for (i = 0; i < ego->nthr; ++i) + if (i == 0 || (ego->cldrn[i] != ego->cldrn[i-1] && + (i <= 1 || ego->cldrn[i] != ego->cldrn[i-2]))) + p->print(p, "%(%p%)", ego->cldrn[i]); + p->putchr(p, ')'); +} + +static int pickdim(const S *ego, const tensor *vecsz, int oop, int *dp) +{ + return X(pickdim)(ego->vecloop_dim, ego->buddies, ego->nbuddies, + vecsz, oop, dp); +} + +static int applicable0(const solver *ego_, const problem *p_, + const planner *plnr, int *dp) +{ + const S *ego = (const S *) ego_; + const problem_rdft *p = (const problem_rdft *) p_; + + return (1 + && plnr->nthr > 1 + && FINITE_RNK(p->vecsz->rnk) + && p->vecsz->rnk > 0 + && pickdim(ego, p->vecsz, p->I != p->O, dp) + ); +} + +static int applicable(const solver *ego_, const problem *p_, + const planner *plnr, int *dp) +{ + const S *ego = (const S *)ego_; + + if (!applicable0(ego_, p_, plnr, dp)) return 0; + + /* fftw2 behavior */ + if (NO_VRANK_SPLITSP(plnr) && (ego->vecloop_dim != ego->buddies[0])) + return 0; + + return 1; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_rdft *p; + P *pln; + problem *cldp; + int vdim; + iodim *d; + plan **cldrn = (plan **) 0; + int i, nthr; + INT its, ots, block_size; + tensor *vecsz; + + static const plan_adt padt = { + X(rdft_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr, &vdim)) + return (plan *) 0; + p = (const problem_rdft *) p_; + + d = p->vecsz->dims + vdim; + + block_size = (d->n + plnr->nthr - 1) / plnr->nthr; + nthr = (int)((d->n + block_size - 1) / block_size); + plnr->nthr = (plnr->nthr + nthr - 1) / nthr; + its = d->is * block_size; + ots = d->os * block_size; + + cldrn = (plan **)MALLOC(sizeof(plan *) * nthr, PLANS); + for (i = 0; i < nthr; ++i) cldrn[i] = (plan *) 0; + + vecsz = X(tensor_copy)(p->vecsz); + for (i = 0; i < nthr; ++i) { + vecsz->dims[vdim].n = + (i == nthr - 1) ? (d->n - i*block_size) : block_size; + cldp = X(mkproblem_rdft)(p->sz, vecsz, + p->I + i*its, p->O + i*ots, p->kind); + cldrn[i] = X(mkplan_d)(plnr, cldp); + if (!cldrn[i]) goto nada; + } + X(tensor_destroy)(vecsz); + + pln = MKPLAN_RDFT(P, &padt, apply); + + pln->cldrn = cldrn; + pln->its = its; + pln->ots = ots; + pln->nthr = nthr; + + pln->solver = ego; + X(ops_zero)(&pln->super.super.ops); + pln->super.super.pcost = 0; + for (i = 0; i < nthr; ++i) { + X(ops_add2)(&cldrn[i]->ops, &pln->super.super.ops); + pln->super.super.pcost += cldrn[i]->pcost; + } + + return &(pln->super.super); + + nada: + if (cldrn) { + for (i = 0; i < nthr; ++i) + X(plan_destroy_internal)(cldrn[i]); + X(ifree)(cldrn); + } + X(tensor_destroy)(vecsz); + return (plan *) 0; +} + +static solver *mksolver(int vecloop_dim, const int *buddies, int nbuddies) +{ + static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->vecloop_dim = vecloop_dim; + slv->buddies = buddies; + slv->nbuddies = nbuddies; + return &(slv->super); +} + +void X(rdft_thr_vrank_geq1_register)(planner *p) +{ + int i; + + /* FIXME: Should we try other vecloop_dim values? */ + static const int buddies[] = { 1, -1 }; + + const int nbuddies = (int)(sizeof(buddies) / sizeof(buddies[0])); + + for (i = 0; i < nbuddies; ++i) + REGISTER_SOLVER(p, mksolver(buddies[i], buddies, nbuddies)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/threads/threads.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/threads/threads.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,439 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* threads.c: Portable thread spawning for loops, via the X(spawn_loop) + function. The first portion of this file is a set of macros to + spawn and join threads on various systems. */ + +#include "threads.h" + +#if defined(USING_POSIX_THREADS) + +#include + +#ifdef HAVE_UNISTD_H +# include +#endif + +/* imlementation of semaphores and mutexes: */ +#if (defined(_POSIX_SEMAPHORES) && (_POSIX_SEMAPHORES >= 200112L)) + + /* If optional POSIX semaphores are supported, use them to + implement both semaphores and mutexes. */ +# include +# include + + typedef sem_t os_sem_t; + + static void os_sem_init(os_sem_t *s) { sem_init(s, 0, 0); } + static void os_sem_destroy(os_sem_t *s) { sem_destroy(s); } + + static void os_sem_down(os_sem_t *s) + { + int err; + do { + err = sem_wait(s); + } while (err == -1 && errno == EINTR); + CK(err == 0); + } + + static void os_sem_up(os_sem_t *s) { sem_post(s); } + + /* + The reason why we use sem_t to implement mutexes is that I have + seen mysterious hangs with glibc-2.7 and linux-2.6.22 when using + pthread_mutex_t, but no hangs with sem_t or with linux >= + 2.6.24. For lack of better information, sem_t looks like the + safest choice. + */ + typedef sem_t os_mutex_t; + static void os_mutex_init(os_mutex_t *s) { sem_init(s, 0, 1); } + #define os_mutex_destroy os_sem_destroy + #define os_mutex_lock os_sem_down + #define os_mutex_unlock os_sem_up + +#else + + /* If optional POSIX semaphores are not defined, use pthread + mutexes for mutexes, and simulate semaphores with condition + variables */ + typedef pthread_mutex_t os_mutex_t; + + static void os_mutex_init(os_mutex_t *s) + { + pthread_mutex_init(s, (pthread_mutexattr_t *)0); + } + + static void os_mutex_destroy(os_mutex_t *s) { pthread_mutex_destroy(s); } + static void os_mutex_lock(os_mutex_t *s) { pthread_mutex_lock(s); } + static void os_mutex_unlock(os_mutex_t *s) { pthread_mutex_unlock(s); } + + typedef struct { + pthread_mutex_t m; + pthread_cond_t c; + volatile int x; + } os_sem_t; + + static void os_sem_init(os_sem_t *s) + { + pthread_mutex_init(&s->m, (pthread_mutexattr_t *)0); + pthread_cond_init(&s->c, (pthread_condattr_t *)0); + + /* wrap initialization in lock to exploit the release + semantics of pthread_mutex_unlock() */ + pthread_mutex_lock(&s->m); + s->x = 0; + pthread_mutex_unlock(&s->m); + } + + static void os_sem_destroy(os_sem_t *s) + { + pthread_mutex_destroy(&s->m); + pthread_cond_destroy(&s->c); + } + + static void os_sem_down(os_sem_t *s) + { + pthread_mutex_lock(&s->m); + while (s->x <= 0) + pthread_cond_wait(&s->c, &s->m); + --s->x; + pthread_mutex_unlock(&s->m); + } + + static void os_sem_up(os_sem_t *s) + { + pthread_mutex_lock(&s->m); + ++s->x; + pthread_cond_signal(&s->c); + pthread_mutex_unlock(&s->m); + } + +#endif + +#define FFTW_WORKER void * + +static void os_create_thread(FFTW_WORKER (*worker)(void *arg), + void *arg) +{ + pthread_attr_t attr; + pthread_t tid; + + pthread_attr_init(&attr); + pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + + pthread_create(&tid, &attr, worker, (void *)arg); + pthread_attr_destroy(&attr); +} + +static void os_destroy_thread(void) +{ + pthread_exit((void *)0); +} + +#elif defined(__WIN32__) || defined(_WIN32) || defined(_WINDOWS) +/* hack: windef.h defines INT for its own purposes and this causes + a conflict with our own INT in ifftw.h. Divert the windows + definition into another name unlikely to cause a conflict */ +#define INT magnus_ab_INTegro_seclorum_nascitur_ordo +#include +#include +#undef INT + +typedef HANDLE os_mutex_t; + +static void os_mutex_init(os_mutex_t *s) +{ + *s = CreateMutex(NULL, FALSE, NULL); +} + +static void os_mutex_destroy(os_mutex_t *s) +{ + CloseHandle(*s); +} + +static void os_mutex_lock(os_mutex_t *s) +{ + WaitForSingleObject(*s, INFINITE); +} + +static void os_mutex_unlock(os_mutex_t *s) +{ + ReleaseMutex(*s); +} + +typedef HANDLE os_sem_t; + +static void os_sem_init(os_sem_t *s) +{ + *s = CreateSemaphore(NULL, 0, 0x7FFFFFFFL, NULL); +} + +static void os_sem_destroy(os_sem_t *s) +{ + CloseHandle(*s); +} + +static void os_sem_down(os_sem_t *s) +{ + WaitForSingleObject(*s, INFINITE); +} + +static void os_sem_up(os_sem_t *s) +{ + ReleaseSemaphore(*s, 1, NULL); +} + +#define FFTW_WORKER unsigned __stdcall +typedef unsigned (__stdcall *winthread_start) (void *); + +static void os_create_thread(winthread_start worker, + void *arg) +{ + _beginthreadex((void *)NULL, /* security attrib */ + 0, /* stack size */ + worker, /* start address */ + arg, /* parameters */ + 0, /* creation flags */ + (unsigned *)NULL); /* tid */ +} + +static void os_destroy_thread(void) +{ + _endthreadex(0); +} + + +#else +#error "No threading layer defined" +#endif + +/************************************************************************/ + +/* Main code: */ +struct worker { + os_sem_t ready; + os_sem_t done; + struct work *w; + struct worker *cdr; +}; + +static struct worker *make_worker(void) +{ + struct worker *q = (struct worker *)MALLOC(sizeof(*q), OTHER); + os_sem_init(&q->ready); + os_sem_init(&q->done); + return q; +} + +static void unmake_worker(struct worker *q) +{ + os_sem_destroy(&q->done); + os_sem_destroy(&q->ready); + X(ifree)(q); +} + +struct work { + spawn_function proc; + spawn_data d; + struct worker *q; /* the worker responsible for performing this work */ +}; + +static os_mutex_t queue_lock; +static os_sem_t termination_semaphore; + +static struct worker *worker_queue; +#define WITH_QUEUE_LOCK(what) \ +{ \ + os_mutex_lock(&queue_lock); \ + what; \ + os_mutex_unlock(&queue_lock); \ +} + +static FFTW_WORKER worker(void *arg) +{ + struct worker *ego = (struct worker *)arg; + struct work *w; + + for (;;) { + /* wait until work becomes available */ + os_sem_down(&ego->ready); + + w = ego->w; + + /* !w->proc ==> terminate worker */ + if (!w->proc) break; + + /* do the work */ + w->proc(&w->d); + + /* signal that work is done */ + os_sem_up(&ego->done); + } + + /* termination protocol */ + os_sem_up(&termination_semaphore); + + os_destroy_thread(); + /* UNREACHABLE */ + return 0; +} + +static void enqueue(struct worker *q) +{ + WITH_QUEUE_LOCK({ + q->cdr = worker_queue; + worker_queue = q; + }); +} + +static struct worker *dequeue(void) +{ + struct worker *q; + + WITH_QUEUE_LOCK({ + q = worker_queue; + if (q) + worker_queue = q->cdr; + }); + + if (!q) { + /* no worker is available. Create one */ + q = make_worker(); + os_create_thread(worker, q); + } + + return q; +} + + +static void kill_workforce(void) +{ + struct work w; + + w.proc = 0; + + THREAD_ON; /* needed for debugging mode: since make_worker + is called from dequeue which is only called in + thread_on mode, we need to unmake_worker in thread_on. */ + WITH_QUEUE_LOCK({ + /* tell all workers that they must terminate. + + Because workers enqueue themselves before signaling the + completion of the work, all workers belong to the worker queue + if we get here. Also, all workers are waiting at + os_sem_down(ready), so we can hold the queue lock without + deadlocking */ + while (worker_queue) { + struct worker *q = worker_queue; + worker_queue = q->cdr; + q->w = &w; + os_sem_up(&q->ready); + os_sem_down(&termination_semaphore); + unmake_worker(q); + } + }); + THREAD_OFF; +} + +int X(ithreads_init)(void) +{ + os_mutex_init(&queue_lock); + os_sem_init(&termination_semaphore); + + WITH_QUEUE_LOCK({ + worker_queue = 0; + }) + + return 0; /* no error */ +} + +/* Distribute a loop from 0 to loopmax-1 over nthreads threads. + proc(d) is called to execute a block of iterations from d->min + to d->max-1. d->thr_num indicate the number of the thread + that is executing proc (from 0 to nthreads-1), and d->data is + the same as the data parameter passed to X(spawn_loop). + + This function returns only after all the threads have completed. */ +void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data) +{ + int block_size; + struct work *r; + int i; + + A(loopmax >= 0); + A(nthr > 0); + A(proc); + + if (!loopmax) return; + + /* Choose the block size and number of threads in order to (1) + minimize the critical path and (2) use the fewest threads that + achieve the same critical path (to minimize overhead). + e.g. if loopmax is 5 and nthr is 4, we should use only 3 + threads with block sizes of 2, 2, and 1. */ + block_size = (loopmax + nthr - 1) / nthr; + nthr = (loopmax + block_size - 1) / block_size; + + THREAD_ON; /* prevent debugging mode from failing under threads */ + STACK_MALLOC(struct work *, r, sizeof(struct work) * nthr); + + /* distribute work: */ + for (i = 0; i < nthr; ++i) { + struct work *w = &r[i]; + spawn_data *d = &w->d; + + d->max = (d->min = i * block_size) + block_size; + if (d->max > loopmax) + d->max = loopmax; + d->thr_num = i; + d->data = data; + w->proc = proc; + + if (i == nthr - 1) { + /* do the work ourselves */ + proc(d); + } else { + /* assign a worker to W */ + w->q = dequeue(); + + /* tell worker w->q to do it */ + w->q->w = w; /* Dirac could have written this */ + os_sem_up(&w->q->ready); + } + } + + for (i = 0; i < nthr - 1; ++i) { + struct work *w = &r[i]; + os_sem_down(&w->q->done); + enqueue(w->q); + } + + STACK_FREE(r); + THREAD_OFF; /* prevent debugging mode from failing under threads */ +} + +void X(threads_cleanup)(void) +{ + kill_workforce(); + os_mutex_destroy(&queue_lock); + os_sem_destroy(&termination_semaphore); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/threads/threads.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/threads/threads.h Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __THREADS_H__ +#define __THREADS_H__ + +#include "ifftw.h" +#include "ct.h" +#include "hc2hc.h" + +typedef struct { + int min, max, thr_num; + void *data; +} spawn_data; + +typedef void *(*spawn_function) (spawn_data *); + +void X(spawn_loop)(int loopmax, int nthreads, + spawn_function proc, void *data); +int X(ithreads_init)(void); +void X(threads_cleanup)(void); + +/* configurations */ + +void X(dft_thr_vrank_geq1_register)(planner *p); +void X(rdft_thr_vrank_geq1_register)(planner *p); +void X(rdft2_thr_vrank_geq1_register)(planner *p); + +ct_solver *X(mksolver_ct_threads)(size_t size, INT r, int dec, + ct_mkinferior mkcldw, + ct_force_vrecursion force_vrecursionp); +hc2hc_solver *X(mksolver_hc2hc_threads)(size_t size, INT r, hc2hc_mkinferior mkcldw); + +void X(threads_conf_standard)(planner *p); +void X(threads_register_hooks)(void); +void X(threads_unregister_hooks)(void); +#endif /* __THREADS_H__ */ diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/threads/vrank-geq1-rdft2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/threads/vrank-geq1-rdft2.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + + +#include "threads.h" + +typedef struct { + solver super; + int vecloop_dim; + const int *buddies; + int nbuddies; +} S; + +typedef struct { + plan_rdft2 super; + + plan **cldrn; + INT its, ots; + int nthr; + const S *solver; +} P; + +typedef struct { + INT its, ots; + R *r0, *r1, *cr, *ci; + plan **cldrn; +} PD; + +static void *spawn_apply(spawn_data *d) +{ + PD *ego = (PD *) d->data; + INT its = ego->its; + INT ots = ego->ots; + int thr_num = d->thr_num; + plan_rdft2 *cld = (plan_rdft2 *) ego->cldrn[d->thr_num]; + + cld->apply((plan *) cld, + ego->r0 + thr_num * its, ego->r1 + thr_num * its, + ego->cr + thr_num * ots, ego->ci + thr_num * ots); + return 0; +} + +static void apply(const plan *ego_, R *r0, R *r1, R *cr, R *ci) +{ + const P *ego = (const P *) ego_; + PD d; + + d.its = ego->its; + d.ots = ego->ots; + d.cldrn = ego->cldrn; + d.r0 = r0; d.r1 = r1; d.cr = cr; d.ci = ci; + + X(spawn_loop)(ego->nthr, ego->nthr, spawn_apply, (void*) &d); +} + +static void awake(plan *ego_, enum wakefulness wakefulness) +{ + P *ego = (P *) ego_; + int i; + for (i = 0; i < ego->nthr; ++i) + X(plan_awake)(ego->cldrn[i], wakefulness); +} + +static void destroy(plan *ego_) +{ + P *ego = (P *) ego_; + int i; + for (i = 0; i < ego->nthr; ++i) + X(plan_destroy_internal)(ego->cldrn[i]); + X(ifree)(ego->cldrn); +} + +static void print(const plan *ego_, printer *p) +{ + const P *ego = (const P *) ego_; + const S *s = ego->solver; + int i; + p->print(p, "(rdft2-thr-vrank>=1-x%d/%d)", ego->nthr, s->vecloop_dim); + for (i = 0; i < ego->nthr; ++i) + if (i == 0 || (ego->cldrn[i] != ego->cldrn[i-1] && + (i <= 1 || ego->cldrn[i] != ego->cldrn[i-2]))) + p->print(p, "%(%p%)", ego->cldrn[i]); + p->putchr(p, ')'); +} + +static int pickdim(const S *ego, const tensor *vecsz, int oop, int *dp) +{ + return X(pickdim)(ego->vecloop_dim, ego->buddies, ego->nbuddies, + vecsz, oop, dp); +} + +static int applicable0(const solver *ego_, const problem *p_, + const planner *plnr, int *dp) +{ + const S *ego = (const S *) ego_; + const problem_rdft2 *p = (const problem_rdft2 *) p_; + + if (FINITE_RNK(p->vecsz->rnk) + && p->vecsz->rnk > 0 + && plnr->nthr > 1 + && pickdim(ego, p->vecsz, p->r0 != p->cr, dp)) { + if (p->r0 != p->cr) + return 1; /* can always operate out-of-place */ + + return(X(rdft2_inplace_strides)(p, *dp)); + } + + return 0; +} + +static int applicable(const solver *ego_, const problem *p_, + const planner *plnr, int *dp) +{ + const S *ego = (const S *)ego_; + + if (!applicable0(ego_, p_, plnr, dp)) return 0; + + /* fftw2 behavior */ + if (NO_VRANK_SPLITSP(plnr) && (ego->vecloop_dim != ego->buddies[0])) + return 0; + + return 1; +} + +static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) +{ + const S *ego = (const S *) ego_; + const problem_rdft2 *p; + P *pln; + problem *cldp; + int vdim; + iodim *d; + plan **cldrn = (plan **) 0; + int i, nthr; + INT its, ots, block_size; + tensor *vecsz; + + static const plan_adt padt = { + X(rdft2_solve), awake, print, destroy + }; + + if (!applicable(ego_, p_, plnr, &vdim)) + return (plan *) 0; + p = (const problem_rdft2 *) p_; + + d = p->vecsz->dims + vdim; + + block_size = (d->n + plnr->nthr - 1) / plnr->nthr; + nthr = (int)((d->n + block_size - 1) / block_size); + plnr->nthr = (plnr->nthr + nthr - 1) / nthr; + X(rdft2_strides)(p->kind, d, &its, &ots); + its *= block_size; ots *= block_size; + + cldrn = (plan **)MALLOC(sizeof(plan *) * nthr, PLANS); + for (i = 0; i < nthr; ++i) cldrn[i] = (plan *) 0; + + vecsz = X(tensor_copy)(p->vecsz); + for (i = 0; i < nthr; ++i) { + vecsz->dims[vdim].n = + (i == nthr - 1) ? (d->n - i*block_size) : block_size; + cldp = X(mkproblem_rdft2)(p->sz, vecsz, + p->r0 + i*its, p->r1 + i*its, + p->cr + i*ots, p->ci + i*ots, + p->kind); + cldrn[i] = X(mkplan_d)(plnr, cldp); + if (!cldrn[i]) goto nada; + } + X(tensor_destroy)(vecsz); + + pln = MKPLAN_RDFT2(P, &padt, apply); + + pln->cldrn = cldrn; + pln->its = its; + pln->ots = ots; + pln->nthr = nthr; + + pln->solver = ego; + X(ops_zero)(&pln->super.super.ops); + pln->super.super.pcost = 0; + for (i = 0; i < nthr; ++i) { + X(ops_add2)(&cldrn[i]->ops, &pln->super.super.ops); + pln->super.super.pcost += cldrn[i]->pcost; + } + + return &(pln->super.super); + + nada: + if (cldrn) { + for (i = 0; i < nthr; ++i) + X(plan_destroy_internal)(cldrn[i]); + X(ifree)(cldrn); + } + X(tensor_destroy)(vecsz); + return (plan *) 0; +} + +static solver *mksolver(int vecloop_dim, const int *buddies, int nbuddies) +{ + static const solver_adt sadt = { PROBLEM_RDFT2, mkplan, 0 }; + S *slv = MKSOLVER(S, &sadt); + slv->vecloop_dim = vecloop_dim; + slv->buddies = buddies; + slv->nbuddies = nbuddies; + return &(slv->super); +} + +void X(rdft2_thr_vrank_geq1_register)(planner *p) +{ + int i; + + /* FIXME: Should we try other vecloop_dim values? */ + static const int buddies[] = { 1, -1 }; + + const int nbuddies = (int)(sizeof(buddies) / sizeof(buddies[0])); + + for (i = 0; i < nbuddies; ++i) + REGISTER_SOLVER(p, mksolver(buddies[i], buddies, nbuddies)); +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/tools/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/tools/Makefile.am Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,31 @@ +AM_CPPFLAGS = -I$(top_srcdir)/libbench2 -I$(top_srcdir)/api + +bin_SCRIPTS = fftw-wisdom-to-conf +bin_PROGRAMS = fftw@PREC_SUFFIX@-wisdom + +BUILT_SOURCES = fftw-wisdom-to-conf fftw@PREC_SUFFIX@-wisdom.1 +EXTRA_DIST = fftw-wisdom-to-conf.in + +dist_man_MANS = fftw-wisdom-to-conf.1 fftw@PREC_SUFFIX@-wisdom.1 +EXTRA_MANS = fftw_wisdom.1.in +fftw@PREC_SUFFIX@-wisdom.1: fftw_wisdom.1 + rm -f $@ + cp fftw_wisdom.1 $@ + +if THREADS +fftw@PREC_SUFFIX@_wisdom_CFLAGS = $(PTHREAD_CFLAGS) +if !COMBINED_THREADS +LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_threads.la +endif +else +if OPENMP +fftw@PREC_SUFFIX@_wisdom_CFLAGS = $(OPENMP_CFLAGS) +LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_omp.la +endif +endif + +fftw@PREC_SUFFIX@_wisdom_SOURCES = fftw-wisdom.c +fftw@PREC_SUFFIX@_wisdom_LDADD = $(top_builddir)/tests/bench-bench.o \ +$(top_builddir)/tests/bench-fftw-bench.o $(LIBFFTWTHREADS) \ +$(top_builddir)/libfftw3@PREC_SUFFIX@.la \ +$(top_builddir)/libbench2/libbench2.a $(THREADLIBS) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/tools/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/tools/Makefile.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,750 @@ +# Makefile.in generated by automake 1.14 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +bin_PROGRAMS = fftw@PREC_SUFFIX@-wisdom$(EXEEXT) +subdir = tools +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(srcdir)/fftw_wisdom.1.in $(srcdir)/fftw-wisdom-to-conf.in \ + $(top_srcdir)/depcomp $(dist_man_MANS) +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \ + $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/ax_cc_maxopt.m4 \ + $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ + $(top_srcdir)/m4/ax_compiler_vendor.m4 \ + $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ + $(top_srcdir)/m4/ax_gcc_version.m4 \ + $(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = fftw_wisdom.1 fftw-wisdom-to-conf +CONFIG_CLEAN_VPATH_FILES = +am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)" \ + "$(DESTDIR)$(man1dir)" +PROGRAMS = $(bin_PROGRAMS) +am_fftw@PREC_SUFFIX@_wisdom_OBJECTS = \ + fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.$(OBJEXT) +fftw@PREC_SUFFIX@_wisdom_OBJECTS = \ + $(am_fftw@PREC_SUFFIX@_wisdom_OBJECTS) +am__DEPENDENCIES_1 = +fftw@PREC_SUFFIX@_wisdom_DEPENDENCIES = \ + $(top_builddir)/tests/bench-bench.o \ + $(top_builddir)/tests/bench-fftw-bench.o $(LIBFFTWTHREADS) \ + $(top_builddir)/libfftw3@PREC_SUFFIX@.la \ + $(top_builddir)/libbench2/libbench2.a $(am__DEPENDENCIES_1) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +fftw@PREC_SUFFIX@_wisdom_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(fftw@PREC_SUFFIX@_wisdom_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +SCRIPTS = $(bin_SCRIPTS) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(fftw@PREC_SUFFIX@_wisdom_SOURCES) +DIST_SOURCES = $(fftw@PREC_SUFFIX@_wisdom_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +man1dir = $(mandir)/man1 +NROFF = nroff +MANS = $(dist_man_MANS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AVX_CFLAGS = @AVX_CFLAGS@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_PL_OPTS = @CHECK_PL_OPTS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@ +C_MPI_FINT = @C_MPI_FINT@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FGREP = @FGREP@ +FLIBS = @FLIBS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBQUADMATH = @LIBQUADMATH@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPILIBS = @MPILIBS@ +MPIRUN = @MPIRUN@ +NEON_CFLAGS = @NEON_CFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OCAMLBUILD = @OCAMLBUILD@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POW_LIB = @POW_LIB@ +PRECISION = @PRECISION@ +PREC_SUFFIX = @PREC_SUFFIX@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +SSE2_CFLAGS = @SSE2_CFLAGS@ +STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_ct_F77 = @ac_ct_F77@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/libbench2 -I$(top_srcdir)/api +bin_SCRIPTS = fftw-wisdom-to-conf +BUILT_SOURCES = fftw-wisdom-to-conf fftw@PREC_SUFFIX@-wisdom.1 +EXTRA_DIST = fftw-wisdom-to-conf.in +dist_man_MANS = fftw-wisdom-to-conf.1 fftw@PREC_SUFFIX@-wisdom.1 +EXTRA_MANS = fftw_wisdom.1.in +@OPENMP_TRUE@@THREADS_FALSE@fftw@PREC_SUFFIX@_wisdom_CFLAGS = $(OPENMP_CFLAGS) +@THREADS_TRUE@fftw@PREC_SUFFIX@_wisdom_CFLAGS = $(PTHREAD_CFLAGS) +@COMBINED_THREADS_FALSE@@THREADS_TRUE@LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_threads.la +@OPENMP_TRUE@@THREADS_FALSE@LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_omp.la +fftw@PREC_SUFFIX@_wisdom_SOURCES = fftw-wisdom.c +fftw@PREC_SUFFIX@_wisdom_LDADD = $(top_builddir)/tests/bench-bench.o \ +$(top_builddir)/tests/bench-fftw-bench.o $(LIBFFTWTHREADS) \ +$(top_builddir)/libfftw3@PREC_SUFFIX@.la \ +$(top_builddir)/libbench2/libbench2.a $(THREADLIBS) + +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu tools/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu tools/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +fftw_wisdom.1: $(top_builddir)/config.status $(srcdir)/fftw_wisdom.1.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +fftw-wisdom-to-conf: $(top_builddir)/config.status $(srcdir)/fftw-wisdom-to-conf.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(bindir)" && rm -f $$files + +clean-binPROGRAMS: + @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +fftw@PREC_SUFFIX@-wisdom$(EXEEXT): $(fftw@PREC_SUFFIX@_wisdom_OBJECTS) $(fftw@PREC_SUFFIX@_wisdom_DEPENDENCIES) $(EXTRA_fftw@PREC_SUFFIX@_wisdom_DEPENDENCIES) + @rm -f fftw@PREC_SUFFIX@-wisdom$(EXEEXT) + $(AM_V_CCLD)$(fftw@PREC_SUFFIX@_wisdom_LINK) $(fftw@PREC_SUFFIX@_wisdom_OBJECTS) $(fftw@PREC_SUFFIX@_wisdom_LDADD) $(LIBS) +install-binSCRIPTS: $(bin_SCRIPTS) + @$(NORMAL_INSTALL) + @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n' \ + -e 'h;s|.*|.|' \ + -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) { files[d] = files[d] " " $$1; \ + if (++n[d] == $(am__install_max)) { \ + print "f", d, files[d]; n[d] = 0; files[d] = "" } } \ + else { print "f", d "/" $$4, $$1 } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binSCRIPTS: + @$(NORMAL_UNINSTALL) + @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || exit 0; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 's,.*/,,;$(transform)'`; \ + dir='$(DESTDIR)$(bindir)'; $(am__uninstall_files_from_dir) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.Po@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.o: fftw-wisdom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(fftw@PREC_SUFFIX@_wisdom_CFLAGS) $(CFLAGS) -MT fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.o -MD -MP -MF $(DEPDIR)/fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.Tpo -c -o fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.o `test -f 'fftw-wisdom.c' || echo '$(srcdir)/'`fftw-wisdom.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.Tpo $(DEPDIR)/fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='fftw-wisdom.c' object='fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(fftw@PREC_SUFFIX@_wisdom_CFLAGS) $(CFLAGS) -c -o fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.o `test -f 'fftw-wisdom.c' || echo '$(srcdir)/'`fftw-wisdom.c + +fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.obj: fftw-wisdom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(fftw@PREC_SUFFIX@_wisdom_CFLAGS) $(CFLAGS) -MT fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.obj -MD -MP -MF $(DEPDIR)/fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.Tpo -c -o fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.obj `if test -f 'fftw-wisdom.c'; then $(CYGPATH_W) 'fftw-wisdom.c'; else $(CYGPATH_W) '$(srcdir)/fftw-wisdom.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.Tpo $(DEPDIR)/fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='fftw-wisdom.c' object='fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(fftw@PREC_SUFFIX@_wisdom_CFLAGS) $(CFLAGS) -c -o fftw@PREC_SUFFIX@_wisdom-fftw-wisdom.obj `if test -f 'fftw-wisdom.c'; then $(CYGPATH_W) 'fftw-wisdom.c'; else $(CYGPATH_W) '$(srcdir)/fftw-wisdom.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-man1: $(dist_man_MANS) + @$(NORMAL_INSTALL) + @list1=''; \ + list2='$(dist_man_MANS)'; \ + test -n "$(man1dir)" \ + && test -n "`echo $$list1$$list2`" \ + || exit 0; \ + echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \ + { for i in $$list1; do echo "$$i"; done; \ + if test -n "$$list2"; then \ + for i in $$list2; do echo "$$i"; done \ + | sed -n '/\.1[a-z]*$$/p'; \ + fi; \ + } | while read p; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; echo "$$p"; \ + done | \ + sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ + sed 'N;N;s,\n, ,g' | { \ + list=; while read file base inst; do \ + if test "$$base" = "$$inst"; then list="$$list $$file"; else \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \ + fi; \ + done; \ + for i in $$list; do echo "$$i"; done | $(am__base_list) | \ + while read files; do \ + test -z "$$files" || { \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \ + done; } + +uninstall-man1: + @$(NORMAL_UNINSTALL) + @list=''; test -n "$(man1dir)" || exit 0; \ + files=`{ for i in $$list; do echo "$$i"; done; \ + l2='$(dist_man_MANS)'; for i in $$l2; do echo "$$i"; done | \ + sed -n '/\.1[a-z]*$$/p'; \ + } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ + dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir) +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(PROGRAMS) $(SCRIPTS) $(MANS) +installdirs: + for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-man + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-binPROGRAMS install-binSCRIPTS + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: install-man1 + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binPROGRAMS uninstall-binSCRIPTS uninstall-man + +uninstall-man: uninstall-man1 + +.MAKE: all check install install-am install-strip + +.PHONY: all all-am check check-am clean clean-binPROGRAMS \ + clean-generic clean-libtool cscopelist-am ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool distdir \ + dvi dvi-am html html-am info info-am install install-am \ + install-binPROGRAMS install-binSCRIPTS install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-man1 install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags-am uninstall uninstall-am uninstall-binPROGRAMS \ + uninstall-binSCRIPTS uninstall-man uninstall-man1 + +fftw@PREC_SUFFIX@-wisdom.1: fftw_wisdom.1 + rm -f $@ + cp fftw_wisdom.1 $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/tools/fftw-wisdom-to-conf.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/tools/fftw-wisdom-to-conf.1 Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,91 @@ +.\" +.\" Copyright (c) 2003, 2007-14 Matteo Frigo +.\" Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology +.\" +.\" This program is free software; you can redistribute it and/or modify +.\" it under the terms of the GNU General Public License as published by +.\" the Free Software Foundation; either version 2 of the License, or +.\" (at your option) any later version. +.\" +.\" This program is distributed in the hope that it will be useful, +.\" but WITHOUT ANY WARRANTY; without even the implied warranty of +.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +.\" GNU General Public License for more details. +.\" +.\" You should have received a copy of the GNU General Public License +.\" along with this program; if not, write to the Free Software +.\" Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +.\" +.TH FFTW-WISDOM-TO-CONF 1 "February, 2003" "fftw" "fftw" +.SH NAME +fftw\-wisdom\-to\-conf \- generate FFTW wisdom (pre-planned transforms) +.SH SYNOPSIS +\fBfftw\-wisdom\-to\-conf\fR [< \fIINPUT\fR] [> \fIOUTPUT\fR] +.SH DESCRIPTION +.PP +.\" Add any additional description here +.I fftw\-wisdom\-to\-conf +is a utility to generate C +.B configuration +routines from FFTW +.B wisdom +files, where the latter contain saved information about how to +optimally compute (Fourier) transforms of various sizes. A +configuration routine is a C subroutine that you link into your +program, replacing a routine of the same name in the FFTW library, +that determines which parts of FFTW are callable by your program. + +The reason to do this is that, if you only need transforms of a +limited set of sizes and types, and if you are statically linking your +program, then using a configuration file generated from wisdom for +those types can substantially reduce the size of your executable. +(Otherwise, because of FFTW's dynamic nature, all of FFTW's transform +code must be linked into any program using FFTW.) + +FFTW is a free library to compute discrete Fourier transforms in one +or more dimensions, for arbitrary sizes, and of both real and complex +data, among other related operations. More information on FFTW can be +found at the FFTW home page: +.I http://www.fftw.org + +.I fftw\-wisdom\-to\-conf +reads wisdom from standard input and writes the configuration to +standard output. It can easily be combined with the +.I fftw\-wisdom +tool, for example: + +fftw\-wisdom \-n \-o wisdom cof1024 cob1024 +.br +fftw\-wisdom\-to\-conf < wisdom > conf.c + +will create a configuration "conf.c" containing only those parts of +FFTW needed for the optimized complex forwards and backwards +out-of-place transforms of size 1024 (also saving the wisdom itself in +"wisdom"). + +Alternatively, you can run your actual program, export wisdom for all +plans that were created (ideally in FFTW_PATIENT or FFTW_EXHAUSTIVE +mode), use this as input for \fIfftw\-wisdom\-to\-conf\fR, +and then re-link your program with the resulting configuration routine. + +Note that the configuration routine does not contain the wisdom, only +the routines necessary to implement the wisdom, so your program should +also import the wisdom in order to benefit from the pre-optimized +plans. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +Display help on the command-line options and usage. +.TP +\fB\-V\fR, \fB\-\-version\fR +Print the version number and copyright information. +.SH BUGS +Send bug reports to fftw@fftw.org. +.SH AUTHORS +Written by Steven G. Johnson and Matteo Frigo. + +Copyright (c) 2003, 2007-14 Matteo Frigo +.br +Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology +.SH "SEE ALSO" +fftw-wisdom(1) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/tools/fftw-wisdom-to-conf.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/tools/fftw-wisdom-to-conf.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,82 @@ +#! /bin/sh + +if test "x$1" = "x--help" || test "x$1" = "x-h"; then + cat < OUTPUT] +Convert wisdom (stdin) to C configuration routine (stdout). + +Options: + -h, --help: print this help + -V, --version: print version/copyright info +EOF + exit 0 +fi + +if test "x$1" = "x--version" || test "x$1" = "x-V"; then + cat <&2 + exit 1 + ;; +esac + +cat <<\fIinplace\fR><\fIdirection\fR><\fIgeometry\fR> + +<\fItype\fR> is either \'c\' (complex), \'r\' (real, r2c/c2r), or +\'k\' (r2r, per-dimension kinds, specified in the geometry, below). + +<\fIinplace\fR> is either \'i\' (in place) or \'o\' (out of place). + +<\fIdirection\fR> is either \'f\' (forward) or \'b\' (backward). The +<\fIdirection\fR> should be omitted for \'k\' transforms, where it is +specified via the geometry instead. + +<\fIgeometry\fR> is the size and dimensionality of the transform, +where different dimensions are separated by \'x\' (e.g. \'16x32\' for +a two-dimensional 16 by 32 transform). In the case of \'k\' +transforms, the size of each dimension is followed by a "type" string, +which can be one of f/b/h/e00/e01/e10/e11/o00/o01/o10/o11 for +R2HC/HC2R/DHT/REDFT00/.../RODFT11, respectively, as defined in the +FFTW manual. + +For example, \'cif12x13x14\' is a three-dimensional 12 by 13 x 14 +complex DFT operating in-place. \'rob65536\' is a one-dimensional +size-65536 out-of-place complex-to-real (backwards) transform +operating on Hermitian-symmetry input. \'ki10hx20e01\' is a +two-dimensional 10 by 20 r2r transform where the first dimension is a +DHT and the second dimension is an REDFT01 (DCT-III). + +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +Display help on the command-line options and usage. +.TP +\fB\-V\fR, \fB\-\-version\fR +Print the version number and copyright information. +.TP +\fB\-v\fR, \fB\-\-verbose\fR +Verbose output. (You can specify this multiple times, or supply a numeric +argument greater than 1, to increase the verbosity level.) Note that the +verbose output will be mixed with the wisdom output (making it impossible +to import), unless you write the wisdom to a file via the +.B \-o +option. +.TP +\fB\-c\fR, \fB\-\-canonical\fR +Optimize/pre-plan a canonical set of sizes: all powers of two and ten +up to 2^20 (1048576), including both real and complex, forward and +backwards, in-place and out-of-place transforms. Also includes two- +and three-dimensional transforms of equal-size dimensions +(e.g. 16x16x16). +.TP +\fB\-t\fR \fIhours\fR, \fB\-\-time\-limit\fR=\fIhours\fR +Stop after a time of +.I hours +(hours) has elapsed, outputting accumulated wisdom. (The problems are planned +in increasing order of size.) Defaults to 0, indicating no time limit. +.TP +\fB\-o\fR \fIfile\fR, \fB\-\-output-file\fR=\fIfile\fR +Send wisdom output to +.I file +rather than to standard output (the default). +.TP +\fB\-m\fR, \fB\-\-measure\fR; \fB\-e\fR, \fB\-\-estimate\fR; \fB\-x\fR, \fB\-\-exhaustive\fR +Normally, +.I fftw\-wisdom +creates plans in FFTW_PATIENT mode, but with these options you can instead +use FFTW_MEASURE, FFTW_ESTIMATE, or FFTW_EXHAUSTIVE modes, respectively, +as described in more detail by the FFTW manual. + +Note that wisdom is tagged with the planning patience level, and a +single file can mix different levels of wisdom (e.g. you can mostly +use the patient default, but plan a few sizes that you especially care +about in +.B \-\-exhaustive +mode). +.TP +\fB\-n\fR, \fB\-\-no\-system\-wisdom\fR +Do not import the system wisdom from +.I /etc/fftw/wisdom +(which is normally read by default). +.TP +\fB\-w\fR \fIfile\fR, \fB\-\-wisdom\-file\fR=\fIfile\fR +Import wisdom from +.I file +(in addition to the system wisdom, unless +.B \-n +is specified). Multiple wisdom files can be read via multiple +.B \-w +options. If +.I file +is "\-", then read wisdom from standard input. +.TP +\fB\-T\fR \fIN\fR, \fB\--threads\fR=\fIN\fR +Plan with +.I N +threads. This option is only present if FFTW was configured with +thread support. +.SH BUGS +Send bug reports to fftw@fftw.org. +.SH AUTHORS +Written by Steven G. Johnson and Matteo Frigo. + +Copyright (c) 2003, 2007-14 Matteo Frigo +.br +Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology +.SH "SEE ALSO" +fftw-wisdom-to-conf(1) diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/tools/fftw-wisdom.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/tools/fftw-wisdom.c Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,360 @@ +/* Re-use libbench2 and the test program, but override bench_main so that + we can have different command-line syntax. */ +#include "my-getopt.h" +#include "bench.h" + +#include +#include +#include +#include +#include +#include + +#if defined(HAVE_THREADS) || defined(HAVE_OPENMP) +# define HAVE_SMP + extern int threads_ok; +#endif + +#define CONCAT(prefix, name) prefix ## name +#if defined(BENCHFFT_SINGLE) +#define FFTW(x) CONCAT(fftwf_, x) +#elif defined(BENCHFFT_LDOUBLE) +#define FFTW(x) CONCAT(fftwl_, x) +#elif defined(BENCHFFT_QUAD) +#define FFTW(x) CONCAT(fftwq_, x) +#else +#define FFTW(x) CONCAT(fftw_, x) +#endif + +/* from bench.c: */ +extern unsigned the_flags; +extern int usewisdom; +extern int nthreads; + +/* dummy routines to replace those in hook.c */ +void install_hook(void) {} +void uninstall_hook(void) {} + +int verbose; + +static void do_problem(bench_problem *p) +{ + if (verbose) + printf("PLANNING PROBLEM: %s\n", p->pstring); + /* BENCH_ASSERT(can_do(p)); */ + problem_alloc(p); + setup(p); + done(p); +} + +static void add_problem(const char *pstring, + bench_problem ***p, int *ip, int *np) +{ + if (*ip >= *np) { + *np = *np * 2 + 1; + *p = (bench_problem **) realloc(*p, sizeof(bench_problem *) * *np); + } + (*p)[(*ip)++] = problem_parse(pstring); +} + +static int sz(const bench_problem *p) +{ + return tensor_sz(p->sz) * tensor_sz(p->vecsz); +} + +static int prob_size_cmp(const void *p1_, const void *p2_) +{ + const bench_problem * const *p1 = (const bench_problem * const *) p1_; + const bench_problem * const *p2 = (const bench_problem * const *) p2_; + return (sz(*p1) - sz(*p2)); +} + +static struct my_option options[] = +{ + {"help", NOARG, 'h'}, + {"version", NOARG, 'V'}, + {"verbose", NOARG, 'v'}, + + {"canonical", NOARG, 'c'}, + {"time-limit", REQARG, 't'}, + + {"output-file", REQARG, 'o'}, + + {"impatient", NOARG, 'i'}, + {"measure", NOARG, 'm'}, + {"estimate", NOARG, 'e'}, + {"exhaustive", NOARG, 'x'}, + + {"no-system-wisdom", NOARG, 'n'}, + {"wisdom-file", REQARG, 'w'}, + +#ifdef HAVE_SMP + {"threads", REQARG, 'T'}, +#endif + + /* options to restrict configuration to rdft-only, etcetera? */ + + {0, NOARG, 0} +}; + +static void help(FILE *f, const char *program_name) +{ + fprintf( + f, + "Usage: %s [options] [sizes]\n" +" Create wisdom (pre-planned/optimized transforms) for specified sizes,\n" +" writing wisdom to stdout (or to a file, using -o).\n" + "\nOptions:\n" + " -h, --help: print this help\n" + " -V, --version: print version/copyright info\n" + " -v, --verbose: verbose output\n" + " -c, --canonical: plan/optimize canonical set of sizes\n" + " -t , --time-limit=: time limit in hours (default: 0, no limit)\n" + " -o FILE, --output-file=FILE: output to FILE instead of stdout\n" + " -m, --measure: plan in MEASURE mode (PATIENT is default)\n" + " -e, --estimate: plan in ESTIMATE mode (not recommended)\n" + " -x, --exhaustive: plan in EXHAUSTIVE mode (may be slow)\n" + " -n, --no-system-wisdom: don't read /etc/fftw/ system wisdom file\n" + " -w FILE, --wisdom-file=FILE: read wisdom from FILE (stdin if -)\n" +#ifdef HAVE_SMP + " -T N, --threads=N: plan with N threads\n" +#endif + "\nSize syntax: \n" + " = c/r/k for complex/real(r2c,c2r)/r2r\n" + " = i/o for in/out-of place\n" + " = f/b for forward/backward, omitted for k transforms\n" + " = [x[x...]], e.g. 10x12x14\n" + " -- for k transforms, after each dimension is a :\n" + " = f/b/h/e00/e01/e10/e11/o00/o01/o10/o11\n" + " for R2HC/HC2R/DHT/REDFT00/.../RODFT11\n" + , program_name); +} + +/* powers of two and ten up to 2^20, for now */ +static char canonical_sizes[][32] = { + "1", "2", "4", "8", "16", "32", "64", "128", "256", "512", "1024", + "2048", "4096", "8192", "16384", "32768", "65536", "131072", + "262144", "524288", "1048576", + + "10", "100", "1000", "10000", "100000", "1000000", + + "2x2", "4x4", "8x8", "10x10", "16x16", "32x32", "64x64", "100x100", + "128x128", "256x256", "512x512", "1000x1000", "1024x1024", + + "2x2x2", "4x4x4", "8x8x8", "10x10x10", "16x16x16", "32x32x32", + "64x64x64", "100x100x100" +}; + +#define NELEM(array)(sizeof(array) / sizeof((array)[0])) + +int bench_main(int argc, char *argv[]) +{ + int c; + unsigned i; + int impatient = 0; + int system_wisdom = 1; + int canonical = 0; + double hours = 0; + FILE *output_file; + char *output_fname = 0; + bench_problem **problems = 0; + int nproblems = 0, iproblem = 0; + time_t begin; + + verbose = 0; + usewisdom = 0; + + bench_srand(1); +#ifdef HAVE_SMP + /* do not configure FFTW with threads, unless the + user requests -T */ + threads_ok = 0; +#endif + + while ((c = my_getopt(argc, argv, options)) != -1) { + switch (c) { + case 'h': + help(stdout, argv[0]); + exit(EXIT_SUCCESS); + break; + + case 'V': + printf("fftw-wisdom tool for FFTW version " VERSION ".\n"); + printf( +"\n" +"Copyright (c) 2003, 2007-14 Matteo Frigo\n" +"Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology\n" +"\n" +"This program is free software; you can redistribute it and/or modify\n" +"it under the terms of the GNU General Public License as published by\n" +"the Free Software Foundation; either version 2 of the License, or\n" +"(at your option) any later version.\n" +"\n" +"This program is distributed in the hope that it will be useful,\n" +"but WITHOUT ANY WARRANTY; without even the implied warranty of\n" +"MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" +"GNU General Public License for more details.\n" +"\n" +"You should have received a copy of the GNU General Public License\n" +"along with this program; if not, write to the Free Software\n" +"Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n" + ); + exit(EXIT_SUCCESS); + break; + + case 'v': + verbose = 1; + break; + + case 'c': + canonical = 1; + break; + + case 't': + hours = atof(my_optarg); + break; + + case 'o': + if (output_fname) + bench_free(output_fname); + + if (!strcmp(my_optarg, "-")) + output_fname = 0; + else { + output_fname = (char *) bench_malloc(sizeof(char) * + (strlen(my_optarg) + 1)); + strcpy(output_fname, my_optarg); + } + break; + + case 'm': + case 'i': + impatient = 1; + break; + + case 'e': + the_flags |= FFTW_ESTIMATE; + break; + + case 'x': + the_flags |= FFTW_EXHAUSTIVE; + break; + + case 'n': + system_wisdom = 0; + break; + + case 'w': { + FILE *w = stdin; + if (strcmp(my_optarg, "-") && !(w = fopen(my_optarg, "r"))) { + fprintf(stderr, + "fftw-wisdom: error opening \"%s\": ", my_optarg); + perror(""); + exit(EXIT_FAILURE); + } + if (!FFTW(import_wisdom_from_file)(w)) { + fprintf(stderr, "fftw_wisdom: error reading wisdom " + "from \"%s\"\n", my_optarg); + exit(EXIT_FAILURE); + } + if (w != stdin) + fclose(w); + break; + } + +#ifdef HAVE_SMP + case 'T': + nthreads = atoi(my_optarg); + if (nthreads < 1) nthreads = 1; + threads_ok = 1; + BENCH_ASSERT(FFTW(init_threads)()); + break; +#endif + + case '?': + /* `my_getopt' already printed an error message. */ + cleanup(); + return EXIT_FAILURE; + + default: + abort (); + } + } + + if (!impatient) + the_flags |= FFTW_PATIENT; + + if (system_wisdom) + if (!FFTW(import_system_wisdom)() && verbose) + fprintf(stderr, "fftw-wisdom: system-wisdom import failed\n"); + + if (canonical) { + for (i = 0; i < NELEM(canonical_sizes); ++i) { + unsigned j; + char types[][8] = { + "cof", "cob", "cif", "cib", "rof", "rob", "rif", "rib" + }; + + for (j = 0; j < NELEM(types); ++j) { + char ps[64]; + if (!strchr(canonical_sizes[i],'x') + || !strchr(types[j],'o')) { +#ifdef HAVE_SNPRINTF + snprintf(ps, sizeof(ps), "%s%s", types[j], canonical_sizes[i]); +#else + sprintf(ps, "%s%s", types[j], canonical_sizes[i]); +#endif + add_problem(ps, &problems, &iproblem, &nproblems); + } + } + } + } + + while (my_optind < argc) { + if (!strcmp(argv[my_optind], "-")) { + char s[1025]; + while (1 == fscanf(stdin, "%1024s", s)) + add_problem(s, &problems, &iproblem, &nproblems); + } + else + add_problem(argv[my_optind], &problems, &iproblem, &nproblems); + ++my_optind; + } + + nproblems = iproblem; + qsort(problems, nproblems, sizeof(bench_problem *), prob_size_cmp); + + if (!output_fname) + output_file = stdout; + else + if (!(output_file = fopen(output_fname, "w"))) { + fprintf(stderr, + "fftw-wisdom: error creating \"%s\"", output_fname); + perror(""); + exit(EXIT_FAILURE); + } + + begin = time((time_t*)0); + for (iproblem = 0; iproblem < nproblems; ++iproblem) { + if (hours <= 0 + || hours > (time((time_t*)0) - begin) / 3600.0) + do_problem(problems[iproblem]); + problem_destroy(problems[iproblem]); + + } + free(problems); + + if (verbose && hours > 0 + && hours < (time((time_t*)0) - begin) / 3600.0) + fprintf(stderr, "EXCEEDED TIME LIMIT OF %g HOURS.\n", hours); + + FFTW(export_wisdom_to_file)(output_file); + if (output_file != stdout) + fclose(output_file); + if (output_fname) + bench_free(output_fname); + + cleanup(); + + return EXIT_SUCCESS; +} diff -r 8db794ca3e0b -r 26056e866c29 fft/fftw/fftw-3.3.4/tools/fftw_wisdom.1.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft/fftw/fftw-3.3.4/tools/fftw_wisdom.1.in Tue Oct 06 13:08:39 2015 +0100 @@ -0,0 +1,196 @@ +.\" +.\" Copyright (c) 2003, 2007-14 Matteo Frigo +.\" Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology +.\" +.\" This program is free software; you can redistribute it and/or modify +.\" it under the terms of the GNU General Public License as published by +.\" the Free Software Foundation; either version 2 of the License, or +.\" (at your option) any later version. +.\" +.\" This program is distributed in the hope that it will be useful, +.\" but WITHOUT ANY WARRANTY; without even the implied warranty of +.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +.\" GNU General Public License for more details. +.\" +.\" You should have received a copy of the GNU General Public License +.\" along with this program; if not, write to the Free Software +.\" Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +.\" +.TH FFTW-WISDOM 1 "February, 2003" "fftw" "fftw" +.SH NAME +fftw@PREC_SUFFIX@\-wisdom \- create wisdom (pre-optimized FFTs) +.SH SYNOPSIS +.B fftw@PREC_SUFFIX@\-wisdom +[\fIOPTION\fR]... [\fISIZE\fR]... +.SH DESCRIPTION +.PP +.\" Add any additional description here +.I fftw@PREC_SUFFIX@\-wisdom +is a utility to generate FFTW +.B wisdom +files, which contain saved information about how to optimally compute +(Fourier) transforms of various sizes. FFTW is a free library to +compute discrete Fourier transforms in one or more dimensions, for +arbitrary sizes, and of both real and complex data, among other +related operations. More information on FFTW can be found at the FFTW +home page: +.I http://www.fftw.org + +Programs using FFTW can be written to load wisdom from an arbitrary file, +string, or other source. Moreover, it is likely that many FFTW-using +programs will load the \fBsystem wisdom\fR file, which is stored in +.I /etc/fftw/wisdom@PREC_SUFFIX@ +by default. +.I fftw@PREC_SUFFIX@\-wisdom +can be used to create or add to such wisdom files. In its most +typical usage, the wisdom file can be created to pre-plan a canonical +set of sizes (see below) via: + +.ce +fftw@PREC_SUFFIX@\-wisdom \-v \-c \-o wisdom@PREC_SUFFIX@ + +(this will take many hours, which can be limited by the +.B \-t +option) and the output +.I wisdom@PREC_SUFFIX@ +file can then be copied (as root) to +.I /etc/fftw/ +or whatever. + +The +.I fftw@PREC_SUFFIX@\-wisdom +program normally writes the wisdom directly to standard output, but this +can be changed via the +.B \-o +option, as in the example above. + +If the system wisdom file +.I /etc/fftw/wisdom@PREC_SUFFIX@ +already exists, then +.I fftw@PREC_SUFFIX@\-wisdom +reads this existing wisdom (unless the +.B \-n +option is specified) and outputs both the old wisdom and any +newly created wisdom. In this way, it can be used to add new transform +sizes to the existing system wisdom (or other wisdom file, with the +.B \-w +option). +.SH SPECIFYING SIZES +Although a canonical set of sizes to optimize is specified by the +.B \-c +option, the user can also specify zero or more non-canonical transform +sizes and types to optimize, via the +.I SIZE +arguments following the option flags. Alternatively, the sizes to +optimize can be read from standard input (whitespace-separated), if a +.I SIZE +argument of "\-" is supplied. + +Sizes are specified by the syntax: + +.ce +<\fItype\fR><\fIinplace\fR><\fIdirection\fR><\fIgeometry\fR> + +<\fItype\fR> is either \'c\' (complex), \'r\' (real, r2c/c2r), or +\'k\' (r2r, per-dimension kinds, specified in the geometry, below). + +<\fIinplace\fR> is either \'i\' (in place) or \'o\' (out of place). + +<\fIdirection\fR> is either \'f\' (forward) or \'b\' (backward). The +<\fIdirection\fR> should be omitted for \'k\' transforms, where it is +specified via the geometry instead. + +<\fIgeometry\fR> is the size and dimensionality of the transform, +where different dimensions are separated by \'x\' (e.g. \'16x32\' for +a two-dimensional 16 by 32 transform). In the case of \'k\' +transforms, the size of each dimension is followed by a "type" string, +which can be one of f/b/h/e00/e01/e10/e11/o00/o01/o10/o11 for +R2HC/HC2R/DHT/REDFT00/.../RODFT11, respectively, as defined in the +FFTW manual. + +For example, \'cif12x13x14\' is a three-dimensional 12 by 13 x 14 +complex DFT operating in-place. \'rob65536\' is a one-dimensional +size-65536 out-of-place complex-to-real (backwards) transform +operating on Hermitian-symmetry input. \'ki10hx20e01\' is a +two-dimensional 10 by 20 r2r transform where the first dimension is a +DHT and the second dimension is an REDFT01 (DCT-III). + +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +Display help on the command-line options and usage. +.TP +\fB\-V\fR, \fB\-\-version\fR +Print the version number and copyright information. +.TP +\fB\-v\fR, \fB\-\-verbose\fR +Verbose output. (You can specify this multiple times, or supply a numeric +argument greater than 1, to increase the verbosity level.) Note that the +verbose output will be mixed with the wisdom output (making it impossible +to import), unless you write the wisdom to a file via the +.B \-o +option. +.TP +\fB\-c\fR, \fB\-\-canonical\fR +Optimize/pre-plan a canonical set of sizes: all powers of two and ten +up to 2^20 (1048576), including both real and complex, forward and +backwards, in-place and out-of-place transforms. Also includes two- +and three-dimensional transforms of equal-size dimensions +(e.g. 16x16x16). +.TP +\fB\-t\fR \fIhours\fR, \fB\-\-time\-limit\fR=\fIhours\fR +Stop after a time of +.I hours +(hours) has elapsed, outputting accumulated wisdom. (The problems are planned +in increasing order of size.) Defaults to 0, indicating no time limit. +.TP +\fB\-o\fR \fIfile\fR, \fB\-\-output-file\fR=\fIfile\fR +Send wisdom output to +.I file +rather than to standard output (the default). +.TP +\fB\-m\fR, \fB\-\-measure\fR; \fB\-e\fR, \fB\-\-estimate\fR; \fB\-x\fR, \fB\-\-exhaustive\fR +Normally, +.I fftw@PREC_SUFFIX@\-wisdom +creates plans in FFTW_PATIENT mode, but with these options you can instead +use FFTW_MEASURE, FFTW_ESTIMATE, or FFTW_EXHAUSTIVE modes, respectively, +as described in more detail by the FFTW manual. + +Note that wisdom is tagged with the planning patience level, and a +single file can mix different levels of wisdom (e.g. you can mostly +use the patient default, but plan a few sizes that you especially care +about in +.B \-\-exhaustive +mode). +.TP +\fB\-n\fR, \fB\-\-no\-system\-wisdom\fR +Do not import the system wisdom from +.I /etc/fftw/wisdom@PREC_SUFFIX@ +(which is normally read by default). +.TP +\fB\-w\fR \fIfile\fR, \fB\-\-wisdom\-file\fR=\fIfile\fR +Import wisdom from +.I file +(in addition to the system wisdom, unless +.B \-n +is specified). Multiple wisdom files can be read via multiple +.B \-w +options. If +.I file +is "\-", then read wisdom from standard input. +.TP +\fB\-T\fR \fIN\fR, \fB\--threads\fR=\fIN\fR +Plan with +.I N +threads. This option is only present if FFTW was configured with +thread support. +.SH BUGS +Send bug reports to fftw@fftw.org. +.SH AUTHORS +Written by Steven G. Johnson and Matteo Frigo. + +Copyright (c) 2003, 2007-14 Matteo Frigo +.br +Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology +.SH "SEE ALSO" +fftw-wisdom-to-conf(1) diff -r 8db794ca3e0b -r 26056e866c29 fft/kissfft/FFT.js --- a/fft/kissfft/FFT.js Mon Oct 05 15:51:10 2015 +0100 +++ b/fft/kissfft/FFT.js Tue Oct 06 13:08:39 2015 +0100 @@ -19,30 +19,35 @@ ); function KissFFT(size) { + this.size = size; this.fcfg = kiss_fftr_alloc(size, false); this.icfg = kiss_fftr_alloc(size, true); - this.ptr = kissFFTModule._malloc(size*4 + (size+2)*4); - this.ri = new Uint8Array(kissFFTModule.HEAPU8.buffer, this.ptr, size*4); - this.ci = new Uint8Array(kissFFTModule.HEAPU8.buffer, this.ptr + size*4, (size+2)*4); + + this.rptr = kissFFTModule._malloc(size*4 + (size+2)*4); + this.cptr = this.rptr + size*4; + + this.ri = new Float32Array(kissFFTModule.HEAPU8.buffer, this.rptr, size); + this.ci = new Float32Array(kissFFTModule.HEAPU8.buffer, this.cptr, size+2); + this.forward = function(real) { - var ptr = this.ptr; - var size = this.size; - this.ri.set(new Uint8Array(real.buffer)); - kiss_fftr(this.fcfg, ptr, ptr + this.size*4); - var out = new Float32Array(kissFFTModule.HEAPU8.buffer, ptr + size*4, size+2); - return out; + this.ri.set(real); + kiss_fftr(this.fcfg, this.rptr, this.cptr); + return (new Float32Array + (kissFFTModule.HEAPU8.buffer, this.cptr, this.size + 2)) + .slice(0); } + this.inverse = function(cpx) { - var ptr = this.ptr; - var size = this.size; - this.ci.set(new Uint8Array(cpx.buffer)); - kiss_fftri(this.icfg, ptr + size*4, ptr); - var out = new Float32Array(kissFFTModule.HEAPU8.buffer, ptr, size); - return out; + this.ci.set(cpx); + kiss_fftri(this.icfg, this.cptr, this.rptr); + return (new Float32Array + (kissFFTModule.HEAPU8.buffer, this.rptr, this.size)) + .slice(0); } - this.discard = function() { - kissFFTModule._free(this.ptr); + + this.dispose = function() { + kissFFTModule._free(this.rptr); kiss_fftr_free(this.fcfg); kiss_fftr_free(this.icfg); } diff -r 8db794ca3e0b -r 26056e866c29 fft/kissfft/Makefile.emscripten --- a/fft/kissfft/Makefile.emscripten Mon Oct 05 15:51:10 2015 +0100 +++ b/fft/kissfft/Makefile.emscripten Tue Oct 06 13:08:39 2015 +0100 @@ -1,6 +1,15 @@ KissFFT.js: kiss_fft.c kiss_fft.h _kiss_fft_guts.h tools/kiss_fftr.c tools/kiss_fftr.h - emcc -O3 -I. --memory-init-file 0 -s NO_FILESYSTEM=1 -s NO_BROWSER=1 -s MODULARIZE=1 -s EXPORT_NAME="'KissFFTModule'" -s EXPORTED_FUNCTIONS="['_kiss_fftr_alloc','_kiss_fftr','_kiss_fftri','_kiss_fftr_free']" -o KissFFT.js kiss_fft.c tools/kiss_fftr.c + emcc -O3 -I. \ + --memory-init-file 0 \ + -s NO_FILESYSTEM=1 \ + -s NO_BROWSER=1 \ + -s MODULARIZE=1 \ + -s EXPORT_NAME="'KissFFTModule'" \ + -s EXPORTED_FUNCTIONS="['_kiss_fftr_alloc','_kiss_fftr','_kiss_fftri','_kiss_fftr_free']" \ + -o KissFFT.js \ + kiss_fft.c tools/kiss_fftr.c clean: rm -f KissFFT.js + diff -r 8db794ca3e0b -r 26056e866c29 fft/nayuki-obj/fft.js --- a/fft/nayuki-obj/fft.js Mon Oct 05 15:51:10 2015 +0100 +++ b/fft/nayuki-obj/fft.js Tue Oct 06 13:08:39 2015 +0100 @@ -25,6 +25,7 @@ "use strict"; function FFTNayuki(n) { + this.n = n; this.levels = -1; diff -r 8db794ca3e0b -r 26056e866c29 fft/test.html --- a/fft/test.html Mon Oct 05 15:51:10 2015 +0100 +++ b/fft/test.html Tue Oct 06 13:08:39 2015 +0100 @@ -18,6 +18,8 @@ + + @@ -43,44 +45,29 @@ Cross KissFFT + + FFTW

    Notes

      -
    • Nayuki: in-place single-precision complex-complex
    • -
    • Nayuki (obj): Nayuki with the sin/cos tables pre-calculated on object construction
    • -
    • Nockert: double-precision real-complex
    • +
    • Nayuki: in-place single-precision complex-complex. Around 7kb.
    • +
    • Nayuki (obj): Nayuki with the sin/cos tables pre-calculated on object construction. Around 4kb.
    • +
    • Nockert: double-precision real-complex. Around 25kb.
    • Dntj: double-precision complex-complex. Forward transform is scaled and I've scaled it back again here, which may - introduce rounding error.
    • + introduce rounding error. Around 10kb.
    • Cross: double-precision real-complex in C, compiled with Emscripten. This is considered a slow implementation amongst - native code ones.
    • + native code ones. Around 60kb.
    • KissFFT: single-precision real-complex in C, compiled - with Emscripten. This should be faster than Cross. Despite its - name, it is the most sophisticated implementation here.
    • + with Emscripten. A reasonably sophisticated implementation. Around + 70kb. +
    • FFTW: single-precision real-complex in C, compiled with + Emscripten. GPL licensed. Around 3Mb.
    -

    Rationale

    - -

    If 2150 iterations of real-to-complex FFT of size 2048 takes less - than 10 seconds, then we may be able to make a high quality - real-time phase vocoder (just).

    - -

    A phase-vocoder of course must use overlapped windowed FFT - (although you can choose the size, within limits), IFFT, and - cartesian-polar conversion to calculate the phase for the - instantaneous frequency.

    - -

    A reasonable estimate of CPU cost for the whole thing is - somewhere around 10x the cost of simple non-overlapping short-time - forward Fourier transforms across the signal.

    - -

    2150 iterations corresponds to 100 seconds of audio - non-overlapped at 44.1kHz, so if that takes less than 10 seconds, - then in theory we might be OK.

    - diff -r 8db794ca3e0b -r 26056e866c29 fft/test.js --- a/fft/test.js Mon Oct 05 15:51:10 2015 +0100 +++ b/fft/test.js Tue Oct 06 13:08:39 2015 +0100 @@ -39,20 +39,20 @@ return result; } -var iterations = 2150; -var size = 2048; +var iterations = 2000; function report(name, start, middle, end, total) { - document.getElementById(name + "-result").innerHTML = total; - document.getElementById(name + "-1").innerHTML = - Math.round(middle - start) + " ms"; - document.getElementById(name + "-2").innerHTML = - Math.round(end - middle) + " ms"; - document.getElementById(name + "-itr").innerHTML = - Math.round((1000.0 / ((end - middle) / iterations))) + " itr/sec"; + function addTo(tag, thing) { + document.getElementById(name + "-" + tag).innerHTML += thing + "
    "; + } + addTo("result", total); + addTo("1", Math.round(middle - start) + " ms"); + addTo("2", Math.round(end - middle) + " ms"); + addTo("itr", Math.round((1000.0 / + ((end - middle) / iterations))) + " itr/sec"); } -function testNayuki() { +function testNayuki(size) { var start = performance.now(); var middle = start; @@ -77,7 +77,7 @@ report("nayuki", start, middle, end, total); } -function testNayukiObj() { +function testNayukiObj(size) { var fft = new FFTNayuki(size); @@ -104,7 +104,7 @@ report("nayukiobj", start, middle, end, total); } -function testNockert() { +function testNockert(size) { var fft = new FFT.complex(size, false); @@ -131,7 +131,7 @@ report("nockert", start, middle, end, total); } -function testDntj() { +function testDntj(size) { var start = performance.now(); var middle = start; @@ -157,7 +157,7 @@ report("dntj", start, middle, end, total); } -function testCross() { +function testCross(size) { var fft = new FFTCross(size); @@ -183,10 +183,10 @@ report("cross", start, middle, end, total); - fft.discard(); + fft.dispose(); } -function testKissFFT() { +function testKissFFT(size) { var fft = new KissFFT(size); @@ -216,26 +216,67 @@ report("kissfft", start, middle, end, total); - fft.discard(); + fft.dispose(); } -var tests = [ testNayuki, testNayukiObj, testKissFFT, testCross, testNockert, testDntj ]; +function testFFTW(size) { + + var fft = new FFTW(size); + + var start = performance.now(); + var middle = start; + var end = start; + + total = 0.0; + + for (var i = 0; i < 2*iterations; ++i) { + if (i == iterations) { + middle = performance.now(); + } + var ri = inputReals(size); + var out = fft.forward(ri); + for (var j = 0; j <= size/2; ++j) { + total += Math.sqrt(out[j*2] * out[j*2] + out[j*2+1] * out[j*2+1]); + } + // FFTW returns only the first half of the output (plus + // DC/Nyquist) -- synthesise the conjugate half + for (var j = 1; j < size/2; ++j) { + total += Math.sqrt(out[j*2] * out[j*2] + out[j*2+1] * out[j*2+1]); + } + } + + var end = performance.now(); + + report("fftw", start, middle, end, total); + + fft.dispose(); +} + +var sizes = [ 256, 1024, 4096 ]; +var tests = [ testNayuki, testNayukiObj, testFFTW, testKissFFT, testCross, testNockert, testDntj ]; var nextTest = 0; +var nextSize = 0; var interval; function test() { clearInterval(interval); - if (nextTest < tests.length) { - f = tests[nextTest]; - nextTest++; - f(); - interval = setInterval(test, 100); + if (nextTest == tests.length) { + nextSize++; + nextTest = 0; + if (nextSize == sizes.length) { + return; + } } + f = tests[nextTest]; + size = sizes[nextSize]; + nextTest++; + f(size); + interval = setInterval(test, 100); } window.onload = function() { document.getElementById("test-description").innerHTML = - "Running " + 2*iterations + " iterations per implementation, of FFT size " + size + ".
    Timings are given separately for the first half of the run (" + iterations + " iterations) and the second half, in case the JS engine takes some warming up." - interval = setInterval(test(), 100); + "Running " + 2*iterations + " iterations per implementation.
    Timings are given separately for the first half of the run (" + iterations + " iterations) and the second half, in case the JS engine takes some warming up.
    Each cell contains results for the following sizes: " + sizes; + interval = setInterval(test, 100); }